From b8fa21c286297ef329f521e645545b8e143f6a64 Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Fri, 9 Mar 2018 12:42:20 +0000 Subject: [PATCH 01/29] Better naming of decomposition methods and related variables --- include/maths/CTimeSeriesDecomposition.h | 14 ++-- .../maths/CTimeSeriesDecompositionInterface.h | 14 ++-- include/maths/CTimeSeriesDecompositionStub.h | 10 +-- lib/maths/CTimeSeriesDecomposition.cc | 25 ++++--- lib/maths/CTimeSeriesDecompositionStub.cc | 10 +-- lib/maths/CTimeSeriesModel.cc | 65 +++++++++---------- lib/model/CInterimBucketCorrector.cc | 47 +++++--------- 7 files changed, 84 insertions(+), 101 deletions(-) diff --git a/include/maths/CTimeSeriesDecomposition.h b/include/maths/CTimeSeriesDecomposition.h index 06dc4361e6..19b5944d62 100644 --- a/include/maths/CTimeSeriesDecomposition.h +++ b/include/maths/CTimeSeriesDecomposition.h @@ -118,19 +118,19 @@ class MATHS_EXPORT CTimeSeriesDecomposition : public CTimeSeriesDecompositionInt //! Propagate the decomposition forwards to \p time. void propagateForwardsTo(core_t::TTime time); - //! Get the mean value of the baseline in the vicinity of \p time. - virtual double mean(core_t::TTime time) const; + //! Get the mean value of the time series in the vicinity of \p time. + virtual double meanValue(core_t::TTime time) const; - //! Get the value of the time series baseline at \p time. + //! Get the value of the time series at \p time. //! //! \param[in] time The time of interest. //! \param[in] confidence The symmetric confidence interval for the prediction //! the baseline as a percentage. //! \param[in] components The components to include in the baseline. - virtual maths_t::TDoubleDoublePr baseline(core_t::TTime time, - double confidence = 0.0, - int components = E_All, - bool smooth = true) const; + virtual maths_t::TDoubleDoublePr value(core_t::TTime time, + double confidence = 0.0, + int components = E_All, + bool smooth = true) const; //! Forecast from \p start to \p end at \p dt intervals. //! diff --git a/include/maths/CTimeSeriesDecompositionInterface.h b/include/maths/CTimeSeriesDecompositionInterface.h index 05fda695cf..988bcd4cad 100644 --- a/include/maths/CTimeSeriesDecompositionInterface.h +++ b/include/maths/CTimeSeriesDecompositionInterface.h @@ -90,19 +90,19 @@ class MATHS_EXPORT CTimeSeriesDecompositionInterface //! Propagate the decomposition forwards to \p time. virtual void propagateForwardsTo(core_t::TTime time) = 0; - //! Get the mean value of the baseline in the vicinity of \p time. - virtual double mean(core_t::TTime time) const = 0; + //! Get the mean value of the time series in the vicinity of \p time. + virtual double meanValue(core_t::TTime time) const = 0; - //! Get the value of the time series baseline at \p time. + //! Get the value of the time series at \p time. //! //! \param[in] time The time of interest. //! \param[in] confidence The symmetric confidence interval for the prediction //! the baseline as a percentage. //! \param[in] components The components to include in the baseline. - virtual maths_t::TDoubleDoublePr baseline(core_t::TTime time, - double confidence = 0.0, - int components = E_All, - bool smooth = true) const = 0; + virtual maths_t::TDoubleDoublePr value(core_t::TTime time, + double confidence = 0.0, + int components = E_All, + bool smooth = true) const = 0; //! Forecast from \p start to \p end at \p dt intervals. //! diff --git a/include/maths/CTimeSeriesDecompositionStub.h b/include/maths/CTimeSeriesDecompositionStub.h index b3d03c4152..04e89b55db 100644 --- a/include/maths/CTimeSeriesDecompositionStub.h +++ b/include/maths/CTimeSeriesDecompositionStub.h @@ -47,13 +47,13 @@ class MATHS_EXPORT CTimeSeriesDecompositionStub : public CTimeSeriesDecompositio virtual void propagateForwardsTo(core_t::TTime time); //! Returns 0. - virtual double mean(core_t::TTime time) const; + virtual double meanValue(core_t::TTime time) const; //! Returns (0.0, 0.0). - virtual maths_t::TDoubleDoublePr baseline(core_t::TTime time, - double confidence = 0.0, - int components = E_All, - bool smooth = true) const; + virtual maths_t::TDoubleDoublePr value(core_t::TTime time, + double confidence = 0.0, + int components = E_All, + bool smooth = true) const; //! Clears \p result. virtual void forecast(core_t::TTime startTime, diff --git a/lib/maths/CTimeSeriesDecomposition.cc b/lib/maths/CTimeSeriesDecomposition.cc index 992d547350..0dd7b0014b 100644 --- a/lib/maths/CTimeSeriesDecomposition.cc +++ b/lib/maths/CTimeSeriesDecomposition.cc @@ -243,13 +243,12 @@ bool CTimeSeriesDecomposition::addPoint(core_t::TTime time, this->propagateForwardsTo(time); SAddValue message{time, lastTime, value, weightStyles, weights, - CBasicStatistics::mean(this->baseline(time, 0.0, E_TrendForced)), - CBasicStatistics::mean(this->baseline(time, 0.0, E_Seasonal)), - CBasicStatistics::mean(this->baseline(time, 0.0, E_Calendar)), + CBasicStatistics::mean(this->value(time, 0.0, E_TrendForced)), + CBasicStatistics::mean(this->value(time, 0.0, E_Seasonal)), + CBasicStatistics::mean(this->value(time, 0.0, E_Calendar)), [this](core_t::TTime time_) { - return CBasicStatistics::mean(this->baseline( - time_, 0.0, E_Seasonal | E_Calendar)); + return CBasicStatistics::mean(this->value(time_, 0.0, E_Seasonal | E_Calendar)); }, m_Components.periodicityTestConfig()}; @@ -271,15 +270,15 @@ void CTimeSeriesDecomposition::propagateForwardsTo(core_t::TTime time) m_LastPropagationTime = std::max(m_LastPropagationTime, time); } -double CTimeSeriesDecomposition::mean(core_t::TTime time) const +double CTimeSeriesDecomposition::meanValue(core_t::TTime time) const { return m_Components.meanValue(time); } -TDoubleDoublePr CTimeSeriesDecomposition::baseline(core_t::TTime time, - double confidence, - int components, - bool smooth) const +TDoubleDoublePr CTimeSeriesDecomposition::value(core_t::TTime time, + double confidence, + int components, + bool smooth) const { TVector2x1 baseline{0.0}; @@ -320,7 +319,7 @@ TDoubleDoublePr CTimeSeriesDecomposition::baseline(core_t::TTime time, if (smooth) { baseline += vector2x1(this->smooth( - boost::bind(&CTimeSeriesDecomposition::baseline, + boost::bind(&CTimeSeriesDecomposition::value, this, _1, confidence, components & E_Seasonal, false), time, components)); } @@ -399,8 +398,8 @@ double CTimeSeriesDecomposition::detrend(core_t::TTime time, double value, doubl { return value; } - TDoubleDoublePr baseline{this->baseline(time, confidence)}; - return std::min(value - baseline.first, 0.0) + std::max(value - baseline.second, 0.0); + TDoubleDoublePr interval{this->value(time, confidence)}; + return std::min(value - interval.first, 0.0) + std::max(value - interval.second, 0.0); } double CTimeSeriesDecomposition::meanVariance(void) const diff --git a/lib/maths/CTimeSeriesDecompositionStub.cc b/lib/maths/CTimeSeriesDecompositionStub.cc index d8d2d3f15f..7fc413d987 100644 --- a/lib/maths/CTimeSeriesDecompositionStub.cc +++ b/lib/maths/CTimeSeriesDecompositionStub.cc @@ -48,15 +48,15 @@ void CTimeSeriesDecompositionStub::propagateForwardsTo(core_t::TTime /*time*/) { } -double CTimeSeriesDecompositionStub::mean(core_t::TTime /*time*/) const +double CTimeSeriesDecompositionStub::meanValue(core_t::TTime /*time*/) const { return 0.0; } -maths_t::TDoubleDoublePr CTimeSeriesDecompositionStub::baseline(core_t::TTime /*time*/, - double /*confidence*/, - int /*components*/, - bool /*smooth*/) const +maths_t::TDoubleDoublePr CTimeSeriesDecompositionStub::value(core_t::TTime /*time*/, + double /*confidence*/, + int /*components*/, + bool /*smooth*/) const { return {0.0, 0.0}; } diff --git a/lib/maths/CTimeSeriesModel.cc b/lib/maths/CTimeSeriesModel.cc index 889a1324d5..7b2a08d177 100644 --- a/lib/maths/CTimeSeriesModel.cc +++ b/lib/maths/CTimeSeriesModel.cc @@ -764,8 +764,8 @@ CUnivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams ¶ms, { CDecayRateController &controller{(*m_Controllers)[E_TrendControl]}; core_t::TTime time{static_cast(CBasicStatistics::mean(averageTime))}; - TDouble1Vec prediction{m_Trend->mean(time)}; - multiplier = controller.multiplier(prediction, errors[E_TrendControl], + TDouble1Vec trendMean{m_Trend->meanValue(time)}; + multiplier = controller.multiplier(trendMean, errors[E_TrendControl], this->params().bucketLength(), this->params().learnRate(), this->params().decayRate()); @@ -777,8 +777,8 @@ CUnivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams ¶ms, } { CDecayRateController &controller{(*m_Controllers)[E_PriorControl]}; - TDouble1Vec prediction{m_Prior->marginalLikelihoodMean()}; - multiplier = controller.multiplier(prediction, errors[E_PriorControl], + TDouble1Vec residualMean{m_Prior->marginalLikelihoodMean()}; + multiplier = controller.multiplier(residualMean, errors[E_PriorControl], this->params().bucketLength(), this->params().learnRate(), this->params().decayRate()); @@ -820,7 +820,7 @@ CUnivariateTimeSeriesModel::mode(core_t::TTime time, weights.push_back(weight[0]); } return { m_Prior->marginalLikelihoodMode(weightStyles, weights) - + CBasicStatistics::mean(m_Trend->baseline(time))}; + + CBasicStatistics::mean(m_Trend->value(time))}; } CUnivariateTimeSeriesModel::TDouble2Vec1Vec @@ -840,11 +840,11 @@ CUnivariateTimeSeriesModel::correlateModes(core_t::TTime time, { result.resize(correlated.size(), TDouble10Vec(2)); - double baseline[2]; - baseline[0] = CBasicStatistics::mean(m_Trend->baseline(time)); + double trend[2]; + trend[0] = CBasicStatistics::mean(m_Trend->value(time)); for (std::size_t i = 0u; i < correlated.size(); ++i) { - baseline[1] = CBasicStatistics::mean(correlatedTimeSeriesModels[i]->m_Trend->baseline(time)); + trend[1] = CBasicStatistics::mean(correlatedTimeSeriesModels[i]->m_Trend->value(time)); TDouble10Vec4Vec weights; weights.resize(weights_[i].size(), TDouble10Vec(2)); for (std::size_t j = 0u; j < weights_[i].size(); ++j) @@ -855,8 +855,8 @@ CUnivariateTimeSeriesModel::correlateModes(core_t::TTime time, } } TDouble10Vec mode(correlationDistributionModels[i].first->marginalLikelihoodMode(weightStyles, weights)); - result[i][variables[i][0]] = baseline[0] + mode[variables[i][0]]; - result[i][variables[i][1]] = baseline[1] + mode[variables[i][1]]; + result[i][variables[i][0]] = trend[0] + mode[variables[i][0]]; + result[i][variables[i][1]] = trend[1] + mode[variables[i][1]]; } } @@ -953,10 +953,10 @@ CUnivariateTimeSeriesModel::predict(core_t::TTime time, double scale{1.0 - this->params().probabilityBucketEmpty()}; - double seasonalOffset{0.0}; + double trend{0.0}; if (m_Trend->initialized()) { - seasonalOffset = CBasicStatistics::mean(m_Trend->baseline(time)); + trend = CBasicStatistics::mean(m_Trend->value(time)); } if (hint.size() == 1) @@ -968,7 +968,7 @@ CUnivariateTimeSeriesModel::predict(core_t::TTime time, m_Prior->marginalLikelihoodMean() : (hint.empty() ? CBasicStatistics::mean(m_Prior->marginalLikelihoodConfidenceInterval(0.0)) : m_Prior->nearestMarginalLikelihoodMean(hint[0]))}; - double result{scale * (seasonalOffset + median + correlateCorrection)}; + double result{scale * (trend + median + correlateCorrection)}; return {m_IsNonNegative ? std::max(result, 0.0) : result}; } @@ -986,8 +986,8 @@ CUnivariateTimeSeriesModel::confidenceInterval(core_t::TTime time, double scale{1.0 - this->params().probabilityBucketEmpty()}; - double seasonalOffset{m_Trend->initialized() ? - CBasicStatistics::mean(m_Trend->baseline(time, confidenceInterval)) : 0.0}; + double trend{m_Trend->initialized() ? + CBasicStatistics::mean(m_Trend->value(time, confidenceInterval)) : 0.0}; TDouble4Vec weights; weights.reserve(weights_.size()); @@ -1001,9 +1001,9 @@ CUnivariateTimeSeriesModel::confidenceInterval(core_t::TTime time, TDoubleDoublePr interval{ m_Prior->marginalLikelihoodConfidenceInterval(confidenceInterval, weightStyles, weights)}; - double result[]{scale * (seasonalOffset + interval.first), - scale * (seasonalOffset + median), - scale * (seasonalOffset + interval.second)}; + double result[]{scale * (trend + interval.first), + scale * (trend + median), + scale * (trend + interval.second)}; return {{m_IsNonNegative ? std::max(result[0], 0.0) : result[0]}, {m_IsNonNegative ? std::max(result[1], 0.0) : result[1]}, @@ -2208,13 +2208,13 @@ CMultivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams ¶ms, } { CDecayRateController &controller{(*m_Controllers)[E_TrendControl]}; - TDouble1Vec prediction(dimension); + TDouble1Vec trendMean(dimension); core_t::TTime time{static_cast(CBasicStatistics::mean(averageTime))}; for (std::size_t d = 0u; d < dimension; ++d) { - prediction[d] = m_Trend[d]->mean(time); + trendMean[d] = m_Trend[d]->meanValue(time); } - double multiplier{controller.multiplier(prediction, errors[E_TrendControl], + double multiplier{controller.multiplier(trendMean, errors[E_TrendControl], this->params().bucketLength(), this->params().learnRate(), this->params().decayRate())}; @@ -2229,8 +2229,8 @@ CMultivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams ¶ms, } { CDecayRateController &controller{(*m_Controllers)[E_PriorControl]}; - TDouble1Vec prediction(m_Prior->marginalLikelihoodMean()); - double multiplier{controller.multiplier(prediction, errors[E_PriorControl], + TDouble1Vec residualMean(m_Prior->marginalLikelihoodMean()); + double multiplier{controller.multiplier(residualMean, errors[E_PriorControl], this->params().bucketLength(), this->params().learnRate(), this->params().decayRate())}; @@ -2280,7 +2280,7 @@ CMultivariateTimeSeriesModel::mode(core_t::TTime time, for (std::size_t d = 0u; d < dimension; ++d) { - result[d] = mode[d] + CBasicStatistics::mean(m_Trend[d]->baseline(time)); + result[d] = mode[d] + CBasicStatistics::mean(m_Trend[d]->value(time)); } return result; @@ -2353,10 +2353,10 @@ CMultivariateTimeSeriesModel::predict(core_t::TTime time, TDouble10Vec mean(m_Prior->marginalLikelihoodMean()); for (std::size_t d = 0u; d < dimension; --marginalize[std::min(d, dimension - 2)], ++d) { - double seasonalOffset{0.0}; + double trend{0.0}; if (m_Trend[d]->initialized()) { - seasonalOffset = CBasicStatistics::mean(m_Trend[d]->baseline(time)); + trend = CBasicStatistics::mean(m_Trend[d]->value(time)); } double median{mean[d]}; if (!m_Prior->isNonInformative()) @@ -2365,7 +2365,7 @@ CMultivariateTimeSeriesModel::predict(core_t::TTime time, median = hint.empty() ? CBasicStatistics::mean(marginal->marginalLikelihoodConfidenceInterval(0.0)) : marginal->nearestMarginalLikelihoodMean(hint[d]); } - result[d] = scale * (seasonalOffset + median); + result[d] = scale * (trend + median); if (m_IsNonNegative) { result[d] = std::max(result[d], 0.0); @@ -2401,9 +2401,8 @@ CMultivariateTimeSeriesModel::confidenceInterval(core_t::TTime time, TDouble4Vec weights; for (std::size_t d = 0u; d < dimension; --marginalize[std::min(d, dimension - 2)], ++d) { - double seasonalOffset{m_Trend[d]->initialized() ? - CBasicStatistics::mean( - m_Trend[d]->baseline(time, confidenceInterval)) : 0.0}; + double trend{m_Trend[d]->initialized() ? + CBasicStatistics::mean(m_Trend[d]->value(time, confidenceInterval)) : 0.0}; weights.clear(); weights.reserve(weights_.size()); @@ -2417,9 +2416,9 @@ CMultivariateTimeSeriesModel::confidenceInterval(core_t::TTime time, TDoubleDoublePr interval{ marginal->marginalLikelihoodConfidenceInterval(confidenceInterval, weightStyles, weights)}; - result[0][d] = scale * (seasonalOffset + interval.first); - result[1][d] = scale * (seasonalOffset + median); - result[2][d] = scale * (seasonalOffset + interval.second); + result[0][d] = scale * (trend + interval.first); + result[1][d] = scale * (trend + median); + result[2][d] = scale * (trend + interval.second); if (m_IsNonNegative) { result[0][d] = std::max(result[0][d], 0.0); diff --git a/lib/model/CInterimBucketCorrector.cc b/lib/model/CInterimBucketCorrector.cc index a903eb1524..d8cc6936e0 100644 --- a/lib/model/CInterimBucketCorrector.cc +++ b/lib/model/CInterimBucketCorrector.cc @@ -6,6 +6,7 @@ #include #include +#include #include #include @@ -14,6 +15,8 @@ #include #include +#include + namespace ml { namespace model @@ -60,7 +63,7 @@ void CInterimBucketCorrector::update(core_t::TTime time, std::size_t bucketCount m_CountTrend.addPoint(bucketMidPoint, static_cast(bucketCount)); - double alpha = ::exp(-meanDecayRate(m_BucketLength)); + double alpha = std::exp(-meanDecayRate(m_BucketLength)); m_CountMean.age(alpha); m_CountMean.add(bucketCount); } @@ -68,22 +71,13 @@ void CInterimBucketCorrector::update(core_t::TTime time, std::size_t bucketCount double CInterimBucketCorrector::estimateBucketCompleteness(core_t::TTime time, std::size_t currentCount) const { - double baselineCount = 0.0; core_t::TTime bucketMidPoint = this->calcBucketMidPoint(time); - if (m_CountTrend.initialized()) - { - baselineCount = maths::CBasicStatistics::mean(m_CountTrend.baseline(bucketMidPoint)); - } - else - { - baselineCount = maths::CBasicStatistics::mean(m_CountMean); - } - - if (baselineCount == 0.0) - { - return 1.0; - } - return maths::CTools::truncate(static_cast(currentCount) / baselineCount, 0.0, 1.0); + double bucketCount = m_CountTrend.initialized() ? + maths::CBasicStatistics::mean(m_CountTrend.value(bucketMidPoint)) : + maths::CBasicStatistics::mean(m_CountMean); + return bucketCount > 0.0 ? + maths::CTools::truncate( static_cast(currentCount) + / bucketCount, 0.0, 1.0) : 1.0; } double CInterimBucketCorrector::corrections(core_t::TTime time, @@ -138,21 +132,12 @@ bool CInterimBucketCorrector::acceptRestoreTraverser(core::CStateRestoreTraverse do { const std::string &name = traverser.name(); - if (name == COUNT_TREND_TAG) - { - maths::CTimeSeriesDecomposition restored(trendDecayRate(m_BucketLength), - m_BucketLength, COMPONENT_SIZE, - traverser); - m_CountTrend.swap(restored); - } - else if (name == COUNT_MEAN_TAG) - { - if (m_CountMean.fromDelimited(traverser.value()) == false) - { - LOG_ERROR("Invalid count mean in " << traverser.value()); - return false; - } - } + RESTORE_NO_ERROR(COUNT_TREND_TAG, + maths::CTimeSeriesDecomposition restored(trendDecayRate(m_BucketLength), + m_BucketLength, COMPONENT_SIZE, + traverser); + m_CountTrend.swap(restored)) + RESTORE(COUNT_MEAN_TAG, m_CountMean.fromDelimited(traverser.value())) } while (traverser.next()); return true; From 91c0057994184badf989447a24a9886337b19c58 Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Fri, 9 Mar 2018 17:39:13 +0000 Subject: [PATCH 02/29] More consistent naming in CTimeSeriesModel and avoid lots of overloads of the name prior --- include/core/Constants.h | 27 +- include/maths/CTimeSeriesModel.h | 99 +-- lib/maths/CTimeSeriesModel.cc | 569 +++++++++--------- .../unittest/CTimeSeriesDecompositionTest.cc | 211 +++---- lib/maths/unittest/CTimeSeriesModelTest.cc | 84 +-- lib/model/CEventRatePopulationModel.cc | 6 +- lib/model/CIndividualModel.cc | 8 +- lib/model/CMetricPopulationModel.cc | 6 +- lib/model/unittest/CEventRateModelTest.cc | 62 +- .../unittest/CEventRatePopulationModelTest.cc | 22 +- lib/model/unittest/CMetricModelTest.cc | 10 +- 11 files changed, 546 insertions(+), 558 deletions(-) diff --git a/include/core/Constants.h b/include/core/Constants.h index 0147bc2b47..24a44bd2d3 100644 --- a/include/core/Constants.h +++ b/include/core/Constants.h @@ -9,8 +9,8 @@ #include +#include #include -#include namespace ml { @@ -19,38 +19,41 @@ namespace core namespace constants { +//! A minute in seconds. +const core_t::TTime MINUTE{60}; + //! An hour in seconds. -const core_t::TTime HOUR = 3600; +const core_t::TTime HOUR{3600}; //! A day in seconds. -const core_t::TTime DAY = 86400; +const core_t::TTime DAY{86400}; //! A (two day) weekend in seconds. -const core_t::TTime WEEKEND = 172800; +const core_t::TTime WEEKEND{172800}; //! Five weekdays in seconds. -const core_t::TTime WEEKDAYS = 432000; +const core_t::TTime WEEKDAYS{432000}; //! A week in seconds. -const core_t::TTime WEEK = 604800; +const core_t::TTime WEEK{604800}; //! A (364 day) year in seconds. -const core_t::TTime YEAR = 31449600; +const core_t::TTime YEAR{31449600}; //! Log of min double. -const double LOG_MIN_DOUBLE = ::log(std::numeric_limits::min()); +const double LOG_MIN_DOUBLE{std::log(std::numeric_limits::min())}; //! Log of max double. -const double LOG_MAX_DOUBLE = ::log(std::numeric_limits::max()); +const double LOG_MAX_DOUBLE{std::log(std::numeric_limits::max())}; //! Log of double epsilon. -const double LOG_DOUBLE_EPSILON = ::log(std::numeric_limits::epsilon()); +const double LOG_DOUBLE_EPSILON{std::log(std::numeric_limits::epsilon())}; //! Log of two. -const double LOG_TWO = 0.693147180559945; +const double LOG_TWO{0.693147180559945}; //! Log of two pi. -const double LOG_TWO_PI = 1.83787706640935; +const double LOG_TWO_PI{1.83787706640935}; #ifdef Windows const char PATH_SEPARATOR = '\\'; diff --git a/include/maths/CTimeSeriesModel.h b/include/maths/CTimeSeriesModel.h index facf2574ba..239e330fb8 100644 --- a/include/maths/CTimeSeriesModel.h +++ b/include/maths/CTimeSeriesModel.h @@ -41,16 +41,16 @@ class MATHS_EXPORT CUnivariateTimeSeriesModel : public CModel public: //! \param[in] params The model parameters. //! \param[in] id The *unique* identifier for this time series. - //! \param[in] trend The time series trend decomposition. - //! \param[in] prior The time series residuals' prior. + //! \param[in] trendModel The time series trend decomposition. + //! \param[in] residualModel The prior for the time series residual model. //! \param[in] controllers Optional decay rate controllers for the trend - //! and prior. + //! and residual model. //! \param[in] modelAnomalies If true we use a separate model to capture //! the characteristics of anomalous time periods. CUnivariateTimeSeriesModel(const CModelParams ¶ms, std::size_t id, - const CTimeSeriesDecompositionInterface &trend, - const CPrior &prior, + const CTimeSeriesDecompositionInterface &trendModel, + const CPrior &residualModel, const TDecayRateController2Ary *controllers = 0, bool modelAnomalies = true); CUnivariateTimeSeriesModel(const SModelRestoreParams ¶ms, @@ -63,7 +63,8 @@ class MATHS_EXPORT CUnivariateTimeSeriesModel : public CModel //! Create a copy of this model passing ownership to the caller. virtual CUnivariateTimeSeriesModel *clone(std::size_t id) const; - //! Create a copy of the state we need to persist passing ownership to the caller. + //! Create a copy of the state we need to persist passing ownership + //! to the caller. virtual CUnivariateTimeSeriesModel *cloneForPersistence(void) const; //! Create a copy of the state we need to run forecasting. @@ -93,8 +94,8 @@ class MATHS_EXPORT CUnivariateTimeSeriesModel : public CModel const maths_t::TWeightStyleVec &weightStyles, const TDouble2Vec4Vec &weights) const; - //! Get the most likely value for each correlate time series at - //! \p time, if there are any. + //! Get the most likely value for each correlate time series + //! at \p time, if there are any. virtual TDouble2Vec1Vec correlateModes(core_t::TTime time, const maths_t::TWeightStyleVec &weightStyles, const TDouble2Vec4Vec1Vec &weights) const; @@ -173,10 +174,10 @@ class MATHS_EXPORT CUnivariateTimeSeriesModel : public CModel const TTimeDoublePrCBuf &slidingWindow(void) const; //! Get the trend. - const CTimeSeriesDecompositionInterface &trend(void) const; + const CTimeSeriesDecompositionInterface &trendModel(void) const; - //! Get the prior. - const CPrior &prior(void) const; + //! Get the residual model. + const CPrior &residualModel(void) const; //@} private: @@ -224,18 +225,18 @@ class MATHS_EXPORT CUnivariateTimeSeriesModel : public CModel //! A random number generator for sampling the sliding window. CPRNG::CXorOShiro128Plus m_Rng; - //! These control the trend and prior decay rates (see CDecayRateController - //! for more details). + //! These control the trend and residual model decay rates (see + //! CDecayRateController for more details). TDecayRateController2AryPtr m_Controllers; //! The time series trend decomposition. - TDecompositionPtr m_Trend; + TDecompositionPtr m_TrendModel; - //! The prior for the time series' residual model. - TPriorPtr m_Prior; + //! The time series' residual model. + TPriorPtr m_ResidualModel; - //! A model for time periods when the basic model can't predict the value - //! of the time series. + //! A model for time periods when the basic model can't predict the + //! value of the time series. TAnomalyModelPtr m_AnomalyModel; //! A sliding window of the recent samples (used to reinitialize the @@ -318,9 +319,9 @@ class MATHS_EXPORT CTimeSeriesCorrelations TSize1Vec s_Tags; //! The sample weights. TDouble4Vec1Vec s_Weights; - //! The interval by which to age the prior. + //! The interval by which to age the correlation model. double s_Interval; - //! The prior decay rate multiplier. + //! The decay rate multiplier. double s_Multiplier; }; @@ -333,7 +334,8 @@ class MATHS_EXPORT CTimeSeriesCorrelations //! Create a copy of this model passing ownership to the caller. CTimeSeriesCorrelations *clone(void) const; - //! Create a copy of the state we need to persist passing ownership to the caller. + //! Create a copy of the state we need to persist passing ownership + //! to the caller. CTimeSeriesCorrelations *cloneForPersistence(void) const; //! Process all samples added from individual time series models. @@ -350,7 +352,7 @@ class MATHS_EXPORT CTimeSeriesCorrelations void refresh(const CTimeSeriesCorrelateModelAllocator &allocator); //! Get the correlation joint distribution models. - const TSizeSizePrMultivariatePriorPtrDoublePrUMap &correlatePriors(void) const; + const TSizeSizePrMultivariatePriorPtrDoublePrUMap &correlationModels(void) const; //! Debug the memory used by this object. void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; @@ -377,20 +379,22 @@ class MATHS_EXPORT CTimeSeriesCorrelations CTimeSeriesCorrelations(const CTimeSeriesCorrelations &other, bool isForPersistence = false); - //! Restore the correlate priors reading state from \p traverser. - bool restoreCorrelatePriors(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser); + //! Restore the correlation distribution models reading state from + //! \p traverser. + bool restoreCorrelationModels(const SDistributionRestoreParams ¶ms, + core::CStateRestoreTraverser &traverser); - //! Persist the correlate priors passing information to \p inserter. - void persistCorrelatePriors(core::CStatePersistInserter &inserter) const; + //! Persist the correlation distribution models passing information + //! to \p inserter. + void persistCorrelationModels(core::CStatePersistInserter &inserter) const; - //! Restore the correlate priors reading state from \p traverser. + //! Restore the \p model reading state from \p traverser. static bool restore(const SDistributionRestoreParams ¶ms, - TSizeSizePrMultivariatePriorPtrDoublePrPr &prior, + TSizeSizePrMultivariatePriorPtrDoublePrPr &model, core::CStateRestoreTraverser &traverser); - //! Persist the correlate priors passing information to \p inserter. - static void persist(const TSizeSizePrMultivariatePriorPtrDoublePrPr &prior, + //! Persist the \p model passing information to \p inserter. + static void persist(const TSizeSizePrMultivariatePriorPtrDoublePrPr &model, core::CStatePersistInserter &inserter); //! Add the time series identified by \p id. @@ -459,15 +463,15 @@ class MATHS_EXPORT CMultivariateTimeSeriesModel : public CModel public: //! \param[in] params The model parameters. - //! \param[in] trend The time series trend decomposition. - //! \param[in] prior The time series residuals' prior. + //! \param[in] trendModel The time series trend decomposition. + //! \param[in] residualModel The prior for the time series residual model. //! \param[in] controllers Optional decay rate controllers for the trend - //! and prior. + //! and residual model. //! \param[in] modelAnomalies If true we use a separate model to capture //! the characteristics of anomalous time periods. CMultivariateTimeSeriesModel(const CModelParams ¶ms, - const CTimeSeriesDecompositionInterface &trend, - const CMultivariatePrior &prior, + const CTimeSeriesDecompositionInterface &trendModel, + const CMultivariatePrior &residualModel, const TDecayRateController2Ary *controllers = 0, bool modelAnomalies = true); CMultivariateTimeSeriesModel(const CMultivariateTimeSeriesModel &other); @@ -480,7 +484,8 @@ class MATHS_EXPORT CMultivariateTimeSeriesModel : public CModel //! Create a copy of this model passing ownership to the caller. virtual CMultivariateTimeSeriesModel *clone(std::size_t id) const; - //! Create a copy of the state we need to persist passing ownership to the caller. + //! Create a copy of the state we need to persist passing ownership + //! to the caller. virtual CMultivariateTimeSeriesModel *cloneForPersistence(void) const; //! Create a copy of the state we need to run forecasting. @@ -587,10 +592,10 @@ class MATHS_EXPORT CMultivariateTimeSeriesModel : public CModel const TTimeDouble2VecPrCBuf &slidingWindow(void) const; //! Get the trend. - const TDecompositionPtr10Vec &trend(void) const; + const TDecompositionPtr10Vec &trendModel(void) const; - //! Get the prior. - const CMultivariatePrior &prior(void) const; + //! Get the residual model. + const CMultivariatePrior &residualModel(void) const; //@} private: @@ -624,18 +629,18 @@ class MATHS_EXPORT CMultivariateTimeSeriesModel : public CModel //! A random number generator for sampling the sliding window. CPRNG::CXorOShiro128Plus m_Rng; - //! These control the trend and prior decay rates (see CDecayRateController - //! for more details). + //! These control the trend and residual model decay rates (see + //! CDecayRateController for more details). TDecayRateController2AryPtr m_Controllers; //! The time series trend decomposition. - TDecompositionPtr10Vec m_Trend; + TDecompositionPtr10Vec m_TrendModel; - //! The prior for the time series' residual model. - TMultivariatePriorPtr m_Prior; + //! The time series residual model. + TMultivariatePriorPtr m_ResidualModel; - //! A model for time periods when the basic model can't predict the value - //! of the time series. + //! A model for time periods when the basic model can't predict the + //! value of the time series. TAnomalyModelPtr m_AnomalyModel; //! A sliding window of the recent samples (used to reinitialize the diff --git a/lib/maths/CTimeSeriesModel.cc b/lib/maths/CTimeSeriesModel.cc index 7b2a08d177..6f3962078c 100644 --- a/lib/maths/CTimeSeriesModel.cc +++ b/lib/maths/CTimeSeriesModel.cc @@ -58,6 +58,14 @@ using TSize1Vec = CTimeSeriesCorrelations::TSize1Vec; using TSize2Vec1Vec = CTimeSeriesCorrelations::TSize2Vec1Vec; using TMultivariatePriorCPtrSizePr1Vec = CTimeSeriesCorrelations::TMultivariatePriorCPtrSizePr1Vec; +//! The decay rate controllers we maintain. +enum EDecayRateController +{ + E_TrendControl = 0, + E_ResidualControl, + E_NumberControls +}; + //! Computes the Winsorisation weight for \p value. double computeWinsorisationWeight(const CPrior &prior, double derate, double scale, double value) { @@ -151,14 +159,6 @@ double computeWinsorisationWeight(const CMultivariatePrior &prior, return computeWinsorisationWeight(*conditional, derate, scale, value[dimension]); } -//! The decay rate controllers we maintain. -enum EDecayRateController -{ - E_TrendControl = 0, - E_PriorControl, - E_NumberControls -}; - // Models // Version 6.3 @@ -168,8 +168,8 @@ const std::string IS_NON_NEGATIVE_6_3_TAG{"b"}; const std::string IS_FORECASTABLE_6_3_TAG{"c"}; const std::string RNG_6_3_TAG{"d"}; const std::string CONTROLLER_6_3_TAG{"e"}; -const std::string TREND_6_3_TAG{"f"}; -const std::string PRIOR_6_3_TAG{"g"}; +const std::string TREND_MODEL_6_3_TAG{"f"}; +const std::string RESIDUAL_MODEL_6_3_TAG{"g"}; const std::string ANOMALY_MODEL_6_3_TAG{"h"}; const std::string SLIDING_WINDOW_6_3_TAG{"i"}; // Version < 6.3 @@ -184,7 +184,7 @@ const std::string IS_FORECASTABLE_OLD_TAG{"h"}; // Anomaly model const std::string MEAN_ERROR_TAG{"a"}; const std::string ANOMALIES_TAG{"b"}; -const std::string PRIOR_TAG{"d"}; +const std::string ANOMALY_FEATURE_MODEL_TAG{"d"}; // Anomaly model nested const std::string TAG_TAG{"a"}; const std::string OPEN_TIME_TAG{"b"}; @@ -194,11 +194,11 @@ const std::string MEAN_ERROR_NORM_TAG{"d"}; // Correlations const std::string K_MOST_CORRELATED_TAG{"a"}; const std::string CORRELATED_LOOKUP_TAG{"b"}; -const std::string CORRELATED_PRIORS_TAG{"c"}; +const std::string CORRELATION_MODELS_TAG{"c"}; // Correlations nested const std::string FIRST_CORRELATE_ID_TAG{"a"}; const std::string SECOND_CORRELATE_ID_TAG{"b"}; -const std::string CORRELATE_PRIOR_TAG{"c"}; +const std::string CORRELATION_MODEL_TAG{"c"}; const std::string CORRELATION_TAG{"d"}; const std::size_t MAXIMUM_CORRELATIONS{5000}; @@ -374,8 +374,8 @@ class CTimeSeriesAnomalyModel { std::size_t index(anomaly.positive() ? 0 : 1); TDouble10Vec1Vec features{anomaly.features(this->scale(time))}; - m_Priors[index].addSamples(CConstantWeights::COUNT, features, - {{TDouble10Vec(2, weight)}}); + m_AnomalyFeatureModels[index].addSamples(CConstantWeights::COUNT, features, + {{TDouble10Vec(2, weight)}}); } //! Get the scaled time. @@ -392,24 +392,28 @@ class CTimeSeriesAnomalyModel TAnomaly1Vec m_Anomalies; //! The model describing features of anomalous time periods. - TMultivariateNormalConjugateVec m_Priors; + TMultivariateNormalConjugateVec m_AnomalyFeatureModels; }; CTimeSeriesAnomalyModel::CTimeSeriesAnomalyModel(void) : m_BucketLength(0) { - m_Priors.reserve(2); - m_Priors.push_back(TMultivariateNormalConjugate::nonInformativePrior(maths_t::E_ContinuousData)); - m_Priors.push_back(TMultivariateNormalConjugate::nonInformativePrior(maths_t::E_ContinuousData)); + m_AnomalyFeatureModels.reserve(2); + m_AnomalyFeatureModels.push_back(TMultivariateNormalConjugate::nonInformativePrior( + maths_t::E_ContinuousData)); + m_AnomalyFeatureModels.push_back(TMultivariateNormalConjugate::nonInformativePrior( + maths_t::E_ContinuousData)); } CTimeSeriesAnomalyModel::CTimeSeriesAnomalyModel(core_t::TTime bucketLength, double decayRate) : m_BucketLength(bucketLength) { - m_Priors.reserve(2); - m_Priors.push_back(TMultivariateNormalConjugate::nonInformativePrior(maths_t::E_ContinuousData, - 0.5 * LARGEST_ANOMALOUS_PROBABILITY * decayRate)); - m_Priors.push_back(TMultivariateNormalConjugate::nonInformativePrior(maths_t::E_ContinuousData, - 0.5 * LARGEST_ANOMALOUS_PROBABILITY * decayRate)); + m_AnomalyFeatureModels.reserve(2); + m_AnomalyFeatureModels.push_back(TMultivariateNormalConjugate::nonInformativePrior( + maths_t::E_ContinuousData, + 0.5 * LARGEST_ANOMALOUS_PROBABILITY * decayRate)); + m_AnomalyFeatureModels.push_back(TMultivariateNormalConjugate::nonInformativePrior( + maths_t::E_ContinuousData, + 0.5 * LARGEST_ANOMALOUS_PROBABILITY * decayRate)); } void CTimeSeriesAnomalyModel::updateAnomaly(const CModelProbabilityParams ¶ms, @@ -466,9 +470,10 @@ void CTimeSeriesAnomalyModel::sampleAnomaly(const CModelProbabilityParams ¶m void CTimeSeriesAnomalyModel::reset(void) { m_MeanError = TMeanAccumulator(); - for (auto &prior : m_Priors) + for (auto &model : m_AnomalyFeatureModels) { - prior = TMultivariateNormalConjugate::nonInformativePrior(maths_t::E_ContinuousData, prior.decayRate()); + model = TMultivariateNormalConjugate::nonInformativePrior(maths_t::E_ContinuousData, + model.decayRate()); } } @@ -485,10 +490,10 @@ void CTimeSeriesAnomalyModel::probability(const CModelProbabilityParams ¶ms, double pl, pu; TTail10Vec tail; if ( probability < LARGEST_ANOMALOUS_PROBABILITY - && !m_Priors[index].isNonInformative() - && m_Priors[index].probabilityOfLessLikelySamples(maths_t::E_OneSidedAbove, - CConstantWeights::COUNT, features, UNIT, - pl, pu, tail)) + && !m_AnomalyFeatureModels[index].isNonInformative() + && m_AnomalyFeatureModels[index].probabilityOfLessLikelySamples(maths_t::E_OneSidedAbove, + CConstantWeights::COUNT, features, UNIT, + pl, pu, tail)) { double logp{CTools::fastLog(probability)}; double alpha{0.5 * std::min( (logp - LOG_LARGEST_ANOMALOUS_PROBABILITY) @@ -509,8 +514,8 @@ void CTimeSeriesAnomalyModel::probability(const CModelProbabilityParams ¶ms, void CTimeSeriesAnomalyModel::propagateForwardsByTime(double time) { - m_Priors[0].propagateForwardsByTime(time); - m_Priors[1].propagateForwardsByTime(time); + m_AnomalyFeatureModels[0].propagateForwardsByTime(time); + m_AnomalyFeatureModels[1].propagateForwardsByTime(time); } uint64_t CTimeSeriesAnomalyModel::checksum(uint64_t seed) const @@ -518,21 +523,21 @@ uint64_t CTimeSeriesAnomalyModel::checksum(uint64_t seed) const seed = CChecksum::calculate(seed, m_BucketLength); seed = CChecksum::calculate(seed, m_MeanError); seed = CChecksum::calculate(seed, m_Anomalies); - seed = CChecksum::calculate(seed, m_Priors[0]); - return CChecksum::calculate(seed, m_Priors[1]); + seed = CChecksum::calculate(seed, m_AnomalyFeatureModels[0]); + return CChecksum::calculate(seed, m_AnomalyFeatureModels[1]); } void CTimeSeriesAnomalyModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CTimeSeriesAnomalyModel"); core::CMemoryDebug::dynamicSize("m_Anomalies", m_Anomalies, mem); - core::CMemoryDebug::dynamicSize("m_Priors", m_Priors, mem); + core::CMemoryDebug::dynamicSize("m_AnomalyFeatureModels", m_AnomalyFeatureModels, mem); } std::size_t CTimeSeriesAnomalyModel::memoryUsage(void) const { return core::CMemory::dynamicSize(m_Anomalies) - + core::CMemory::dynamicSize(m_Priors); + + core::CMemory::dynamicSize(m_AnomalyFeatureModels); } bool CTimeSeriesAnomalyModel::acceptRestoreTraverser(const SModelRestoreParams ¶ms, @@ -545,9 +550,10 @@ bool CTimeSeriesAnomalyModel::acceptRestoreTraverser(const SModelRestoreParams & const std::string &name{traverser.name()}; RESTORE(MEAN_ERROR_TAG, m_MeanError.fromDelimited(traverser.value())); RESTORE(ANOMALIES_TAG, core::CPersistUtils::restore(ANOMALIES_TAG, m_Anomalies, traverser)); - RESTORE(PRIOR_TAG, traverser.traverseSubLevel( - boost::bind(&TMultivariateNormalConjugate::acceptRestoreTraverser, - &m_Priors[index++], _1))) + RESTORE(ANOMALY_FEATURE_MODEL_TAG, + traverser.traverseSubLevel( + boost::bind(&TMultivariateNormalConjugate::acceptRestoreTraverser, + &m_AnomalyFeatureModels[index++], _1))) } while (traverser.next()); return true; @@ -557,8 +563,12 @@ void CTimeSeriesAnomalyModel::acceptPersistInserter(core::CStatePersistInserter { inserter.insertValue(MEAN_ERROR_TAG, m_MeanError.toDelimited()); core::CPersistUtils::persist(ANOMALIES_TAG, m_Anomalies, inserter); - inserter.insertLevel(PRIOR_TAG, boost::bind(&TMultivariateNormalConjugate::acceptPersistInserter, &m_Priors[0], _1)); - inserter.insertLevel(PRIOR_TAG, boost::bind(&TMultivariateNormalConjugate::acceptPersistInserter, &m_Priors[1], _1)); + inserter.insertLevel(ANOMALY_FEATURE_MODEL_TAG, + boost::bind(&TMultivariateNormalConjugate::acceptPersistInserter, + &m_AnomalyFeatureModels[0], _1)); + inserter.insertLevel(ANOMALY_FEATURE_MODEL_TAG, + boost::bind(&TMultivariateNormalConjugate::acceptPersistInserter, + &m_AnomalyFeatureModels[1], _1)); } const double CTimeSeriesAnomalyModel::LARGEST_ANOMALOUS_PROBABILITY{0.1}; @@ -569,16 +579,16 @@ const TDouble10Vec4Vec1Vec CTimeSeriesAnomalyModel::UNIT{CConstantWeights::unit< CUnivariateTimeSeriesModel::CUnivariateTimeSeriesModel(const CModelParams ¶ms, std::size_t id, - const CTimeSeriesDecompositionInterface &trend, - const CPrior &prior, + const CTimeSeriesDecompositionInterface &trendModel, + const CPrior &residualModel, const TDecayRateController2Ary *controllers, bool modelAnomalies) : CModel(params), m_Id(id), m_IsNonNegative(false), m_IsForecastable(true), - m_Trend(trend.clone()), - m_Prior(prior.clone()), + m_TrendModel(trendModel.clone()), + m_ResidualModel(residualModel.clone()), m_AnomalyModel(modelAnomalies ? boost::make_shared(params.bucketLength(), params.decayRate()) : @@ -638,7 +648,7 @@ CUnivariateTimeSeriesModel *CUnivariateTimeSeriesModel::cloneForForecast(void) c bool CUnivariateTimeSeriesModel::isForecastPossible(void) const { - return m_IsForecastable && !m_Prior->isNonInformative(); + return m_IsForecastable && !m_ResidualModel->isNonInformative(); } void CUnivariateTimeSeriesModel::modelCorrelations(CTimeSeriesCorrelations &model) @@ -652,11 +662,9 @@ TSize2Vec1Vec CUnivariateTimeSeriesModel::correlates(void) const TSize2Vec1Vec result; TSize1Vec correlated; TSize2Vec1Vec variables; - TMultivariatePriorCPtrSizePr1Vec correlationDistributionModels; + TMultivariatePriorCPtrSizePr1Vec correlationModels; TModelCPtr1Vec correlatedTimeSeriesModels; - this->correlationModels(correlated, variables, - correlationDistributionModels, - correlatedTimeSeriesModels); + this->correlationModels(correlated, variables, correlationModels, correlatedTimeSeriesModels); result.resize(correlated.size(), TSize2Vec(2)); for (std::size_t i = 0u; i < correlated.size(); ++i) { @@ -670,9 +678,9 @@ void CUnivariateTimeSeriesModel::addBucketValue(const TTimeDouble2VecSizeTrVec & { for (const auto &value : values) { - m_Prior->adjustOffset(CConstantWeights::COUNT, - {m_Trend->detrend(value.first, value.second[0], 0.0)}, - CConstantWeights::SINGLE_UNIT); + m_ResidualModel->adjustOffset(CConstantWeights::COUNT, + {m_TrendModel->detrend(value.first, value.second[0], 0.0)}, + CConstantWeights::SINGLE_UNIT); } } @@ -712,7 +720,7 @@ CUnivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams ¶ms, for (auto &sample : samples) { - sample.second[0] = m_Trend->detrend(sample.first, sample.second[0], 0.0); + sample.second[0] = m_TrendModel->detrend(sample.first, sample.second[0], 0.0); } std::stable_sort(valueorder.begin(), valueorder.end(), @@ -722,7 +730,7 @@ CUnivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams ¶ms, }); maths_t::EDataType type{params.type()}; - m_Prior->dataType(type); + m_ResidualModel->dataType(type); TDouble1Vec samples_; TDouble4Vec1Vec weights; @@ -743,8 +751,8 @@ CUnivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams ¶ms, averageTime.add(static_cast(samples[i].first)); } - m_Prior->addSamples(params.weightStyles(), samples_, weights); - m_Prior->propagateForwardsByTime(params.propagationInterval()); + m_ResidualModel->addSamples(params.weightStyles(), samples_, weights); + m_ResidualModel->propagateForwardsByTime(params.propagationInterval()); if (m_AnomalyModel) { m_AnomalyModel->propagateForwardsByTime(params.propagationInterval()); @@ -764,28 +772,28 @@ CUnivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams ¶ms, { CDecayRateController &controller{(*m_Controllers)[E_TrendControl]}; core_t::TTime time{static_cast(CBasicStatistics::mean(averageTime))}; - TDouble1Vec trendMean{m_Trend->meanValue(time)}; + TDouble1Vec trendMean{m_TrendModel->meanValue(time)}; multiplier = controller.multiplier(trendMean, errors[E_TrendControl], this->params().bucketLength(), this->params().learnRate(), this->params().decayRate()); if (multiplier != 1.0) { - m_Trend->decayRate(multiplier * m_Trend->decayRate()); - LOG_TRACE("trend decay rate = " << m_Trend->decayRate()); + m_TrendModel->decayRate(multiplier * m_TrendModel->decayRate()); + LOG_TRACE("trend decay rate = " << m_TrendModel->decayRate()); } } { - CDecayRateController &controller{(*m_Controllers)[E_PriorControl]}; - TDouble1Vec residualMean{m_Prior->marginalLikelihoodMean()}; - multiplier = controller.multiplier(residualMean, errors[E_PriorControl], + CDecayRateController &controller{(*m_Controllers)[E_ResidualControl]}; + TDouble1Vec residualMean{m_ResidualModel->marginalLikelihoodMean()}; + multiplier = controller.multiplier(residualMean, errors[E_ResidualControl], this->params().bucketLength(), this->params().learnRate(), this->params().decayRate()); if (multiplier != 1.0) { - m_Prior->decayRate(multiplier * m_Prior->decayRate()); - LOG_TRACE("prior decay rate = " << m_Prior->decayRate()); + m_ResidualModel->decayRate(multiplier * m_ResidualModel->decayRate()); + LOG_TRACE("prior decay rate = " << m_ResidualModel->decayRate()); } } } @@ -805,7 +813,7 @@ CUnivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams ¶ms, void CUnivariateTimeSeriesModel::skipTime(core_t::TTime gap) { - m_Trend->skipTime(gap); + m_TrendModel->skipTime(gap); } CUnivariateTimeSeriesModel::TDouble2Vec @@ -819,8 +827,8 @@ CUnivariateTimeSeriesModel::mode(core_t::TTime time, { weights.push_back(weight[0]); } - return { m_Prior->marginalLikelihoodMode(weightStyles, weights) - + CBasicStatistics::mean(m_Trend->value(time))}; + return { m_ResidualModel->marginalLikelihoodMode(weightStyles, weights) + + CBasicStatistics::mean(m_TrendModel->value(time))}; } CUnivariateTimeSeriesModel::TDouble2Vec1Vec @@ -832,19 +840,17 @@ CUnivariateTimeSeriesModel::correlateModes(core_t::TTime time, TSize1Vec correlated; TSize2Vec1Vec variables; - TMultivariatePriorCPtrSizePr1Vec correlationDistributionModels; + TMultivariatePriorCPtrSizePr1Vec correlationModels; TModelCPtr1Vec correlatedTimeSeriesModels; - if (this->correlationModels(correlated, variables, - correlationDistributionModels, - correlatedTimeSeriesModels)) + if (this->correlationModels(correlated, variables, correlationModels, correlatedTimeSeriesModels)) { result.resize(correlated.size(), TDouble10Vec(2)); double trend[2]; - trend[0] = CBasicStatistics::mean(m_Trend->value(time)); + trend[0] = CBasicStatistics::mean(m_TrendModel->value(time)); for (std::size_t i = 0u; i < correlated.size(); ++i) { - trend[1] = CBasicStatistics::mean(correlatedTimeSeriesModels[i]->m_Trend->value(time)); + trend[1] = CBasicStatistics::mean(correlatedTimeSeriesModels[i]->m_TrendModel->value(time)); TDouble10Vec4Vec weights; weights.resize(weights_[i].size(), TDouble10Vec(2)); for (std::size_t j = 0u; j < weights_[i].size(); ++j) @@ -854,7 +860,7 @@ CUnivariateTimeSeriesModel::correlateModes(core_t::TTime time, weights[j][d] = weights_[i][j][d]; } } - TDouble10Vec mode(correlationDistributionModels[i].first->marginalLikelihoodMode(weightStyles, weights)); + TDouble10Vec mode(correlationModels[i].first->marginalLikelihoodMode(weightStyles, weights)); result[i][variables[i][0]] = trend[0] + mode[variables[i][0]]; result[i][variables[i][1]] = trend[1] + mode[variables[i][1]]; } @@ -876,7 +882,7 @@ CUnivariateTimeSeriesModel::residualModes(const maths_t::TWeightStyleVec &weight weights.push_back(weight[0]); } - TDouble1Vec modes(m_Prior->marginalLikelihoodModes(weightStyles, weights)); + TDouble1Vec modes(m_ResidualModel->marginalLikelihoodModes(weightStyles, weights)); result.reserve(modes.size()); for (auto mode : modes) { @@ -897,29 +903,27 @@ void CUnivariateTimeSeriesModel::detrend(const TTime2Vec1Vec &time, if (value[0].size() == 1) { - value[0][0] = m_Trend->detrend(time[0][0], value[0][0], confidenceInterval); + value[0][0] = m_TrendModel->detrend(time[0][0], value[0][0], confidenceInterval); } else { TSize1Vec correlated; TSize2Vec1Vec variables; - TMultivariatePriorCPtrSizePr1Vec correlationDistributionModels; + TMultivariatePriorCPtrSizePr1Vec correlationModels; TModelCPtr1Vec correlatedTimeSeriesModels; - if (this->correlationModels(correlated, variables, - correlationDistributionModels, - correlatedTimeSeriesModels)) + if (this->correlationModels(correlated, variables, correlationModels, correlatedTimeSeriesModels)) { for (std::size_t i = 0u; i < variables.size(); ++i) { if (!value[i].empty()) { - value[i][variables[i][0]] = m_Trend->detrend(time[i][variables[i][0]], - value[i][variables[i][0]], - confidenceInterval); + value[i][variables[i][0]] = m_TrendModel->detrend(time[i][variables[i][0]], + value[i][variables[i][0]], + confidenceInterval); value[i][variables[i][1]] = - correlatedTimeSeriesModels[i]->m_Trend->detrend(time[i][variables[i][1]], - value[i][variables[i][1]], - confidenceInterval); + correlatedTimeSeriesModels[i]->m_TrendModel->detrend(time[i][variables[i][1]], + value[i][variables[i][1]], + confidenceInterval); } } } @@ -941,7 +945,7 @@ CUnivariateTimeSeriesModel::predict(core_t::TTime time, if (m_Correlations->correlationModels(m_Id, correlated, variables, correlationModel, correlatedModel)) { - double sample{correlatedModel[0]->m_Trend->detrend(time, correlatedValue[0].second, 0.0)}; + double sample{correlatedModel[0]->m_TrendModel->detrend(time, correlatedValue[0].second, 0.0)}; TSize10Vec marginalize{variables[0][1]}; TSizeDoublePr10Vec condition{{variables[0][1], sample}}; const CMultivariatePrior *joint{correlationModel[0].first}; @@ -954,20 +958,20 @@ CUnivariateTimeSeriesModel::predict(core_t::TTime time, double scale{1.0 - this->params().probabilityBucketEmpty()}; double trend{0.0}; - if (m_Trend->initialized()) + if (m_TrendModel->initialized()) { - trend = CBasicStatistics::mean(m_Trend->value(time)); + trend = CBasicStatistics::mean(m_TrendModel->value(time)); } if (hint.size() == 1) { - hint[0] = m_Trend->detrend(time, hint[0], 0.0); + hint[0] = m_TrendModel->detrend(time, hint[0], 0.0); } - double median{m_Prior->isNonInformative() ? - m_Prior->marginalLikelihoodMean() : - (hint.empty() ? CBasicStatistics::mean(m_Prior->marginalLikelihoodConfidenceInterval(0.0)) : - m_Prior->nearestMarginalLikelihoodMean(hint[0]))}; + double median{m_ResidualModel->isNonInformative() ? + m_ResidualModel->marginalLikelihoodMean() : + (hint.empty() ? CBasicStatistics::mean(m_ResidualModel->marginalLikelihoodConfidenceInterval(0.0)) : + m_ResidualModel->nearestMarginalLikelihoodMean(hint[0]))}; double result{scale * (trend + median + correlateCorrection)}; return {m_IsNonNegative ? std::max(result, 0.0) : result}; @@ -979,15 +983,15 @@ CUnivariateTimeSeriesModel::confidenceInterval(core_t::TTime time, const maths_t::TWeightStyleVec &weightStyles, const TDouble2Vec4Vec &weights_) const { - if (m_Prior->isNonInformative()) + if (m_ResidualModel->isNonInformative()) { return TDouble2Vec3Vec(); } double scale{1.0 - this->params().probabilityBucketEmpty()}; - double trend{m_Trend->initialized() ? - CBasicStatistics::mean(m_Trend->value(time, confidenceInterval)) : 0.0}; + double trend{m_TrendModel->initialized() ? + CBasicStatistics::mean(m_TrendModel->value(time, confidenceInterval)) : 0.0}; TDouble4Vec weights; weights.reserve(weights_.size()); @@ -997,9 +1001,9 @@ CUnivariateTimeSeriesModel::confidenceInterval(core_t::TTime time, } double median{CBasicStatistics::mean( - m_Prior->marginalLikelihoodConfidenceInterval(0.0, weightStyles, weights))}; + m_ResidualModel->marginalLikelihoodConfidenceInterval(0.0, weightStyles, weights))}; TDoubleDoublePr interval{ - m_Prior->marginalLikelihoodConfidenceInterval(confidenceInterval, weightStyles, weights)}; + m_ResidualModel->marginalLikelihoodConfidenceInterval(confidenceInterval, weightStyles, weights)}; double result[]{scale * (trend + interval.first), scale * (trend + median), @@ -1018,7 +1022,7 @@ bool CUnivariateTimeSeriesModel::forecast(core_t::TTime startTime, const TForecastPushDatapointFunc &forecastPushDataPointFunc, std::string &messageOut) { - if (m_Prior->isNonInformative()) + if (m_ResidualModel->isNonInformative()) { messageOut = forecast::INFO_INSUFFICIENT_HISTORY; return true; @@ -1032,8 +1036,8 @@ bool CUnivariateTimeSeriesModel::forecast(core_t::TTime startTime, double maximum{m_IsNonNegative ? std::max(maximum_[0], 0.0) : maximum_[0]}; TDouble3VecVec predictions; - m_Trend->forecast(startTime, endTime, bucketLength, confidenceInterval, - this->params().minimumSeasonalVarianceScale(), predictions); + m_TrendModel->forecast(startTime, endTime, bucketLength, confidenceInterval, + this->params().minimumSeasonalVarianceScale(), predictions); core_t::TTime time{startTime}; for (const auto &prediction : predictions) @@ -1076,7 +1080,7 @@ bool CUnivariateTimeSeriesModel::probability(const CModelProbabilityParams ¶ if (value[0].size() == 1) { core_t::TTime time{time_[0][0]}; - TDouble1Vec sample{m_Trend->detrend(time, value[0][0], params.seasonalConfidenceInterval())}; + TDouble1Vec sample{m_TrendModel->detrend(time, value[0][0], params.seasonalConfidenceInterval())}; TDouble4Vec1Vec weights(1); weights[0].reserve(params.weights()[0].size()); @@ -1087,9 +1091,9 @@ bool CUnivariateTimeSeriesModel::probability(const CModelProbabilityParams ¶ double pl, pu; maths_t::ETail tail_; - if (m_Prior->probabilityOfLessLikelySamples(params.calculation(0), - params.weightStyles(), - sample, weights, pl, pu, tail_)) + if (m_ResidualModel->probabilityOfLessLikelySamples(params.calculation(0), + params.weightStyles(), + sample, weights, pl, pu, tail_)) { LOG_TRACE("P(" << sample << " | weight = " << weights << ", time = " << time << ") = " << (pl + pu) / 2.0); @@ -1107,7 +1111,7 @@ bool CUnivariateTimeSeriesModel::probability(const CModelProbabilityParams ¶ if (m_AnomalyModel) { - TDouble2Vec residual{ (sample[0] - m_Prior->nearestMarginalLikelihoodMean(sample[0])) + TDouble2Vec residual{ (sample[0] - m_ResidualModel->nearestMarginalLikelihoodMean(sample[0])) / std::max(std::sqrt(this->seasonalWeight(0.0, time)[0]), 1.0)}; m_AnomalyModel->updateAnomaly(params, time, residual, probability); m_AnomalyModel->probability(params, time, probability); @@ -1119,11 +1123,9 @@ bool CUnivariateTimeSeriesModel::probability(const CModelProbabilityParams ¶ { TSize1Vec correlated; TSize2Vec1Vec variables; - TMultivariatePriorCPtrSizePr1Vec correlationDistributionModels; + TMultivariatePriorCPtrSizePr1Vec correlationModels; TModelCPtr1Vec correlatedTimeSeriesModels; - if (!this->correlationModels(correlated, variables, - correlationDistributionModels, - correlatedTimeSeriesModels)) + if (!this->correlationModels(correlated, variables, correlationModels, correlatedTimeSeriesModels)) { return false; } @@ -1141,20 +1143,20 @@ bool CUnivariateTimeSeriesModel::probability(const CModelProbabilityParams ¶ TTail10Vec ti; core_t::TTime mostAnomalousTime{0}; double mostAnomalousSample{0.0}; - TPriorPtr mostAnomalousPrior; + TPriorPtr mostAnomalousCorrelationModel; for (std::size_t i = 0u; i < variables.size(); ++i) { if (!value[i].empty() || (!params.mostAnomalousCorrelate() || i == *params.mostAnomalousCorrelate())) { variable[0] = variables[i][0]; - sample[0][variables[i][0]] = m_Trend->detrend(time_[i][variables[i][0]], - value[i][variables[i][0]], - params.seasonalConfidenceInterval()); + sample[0][variables[i][0]] = m_TrendModel->detrend(time_[i][variables[i][0]], + value[i][variables[i][0]], + params.seasonalConfidenceInterval()); sample[0][variables[i][1]] = - correlatedTimeSeriesModels[i]->m_Trend->detrend(time_[i][variables[i][1]], - value[i][variables[i][1]], - params.seasonalConfidenceInterval()); + correlatedTimeSeriesModels[i]->m_TrendModel->detrend(time_[i][variables[i][1]], + value[i][variables[i][1]], + params.seasonalConfidenceInterval()); for (std::size_t j = 0u; j < params.weights()[i].size(); ++j) { for (std::size_t d = 0u; d < 2; ++d) @@ -1163,10 +1165,10 @@ bool CUnivariateTimeSeriesModel::probability(const CModelProbabilityParams ¶ } } - if (correlationDistributionModels[i].first->probabilityOfLessLikelySamples(params.calculation(0), - params.weightStyles(), - sample, weights, - variable, pli, pui, ti)) + if (correlationModels[i].first->probabilityOfLessLikelySamples(params.calculation(0), + params.weightStyles(), + sample, weights, + variable, pli, pui, ti)) { LOG_TRACE("Marginal P(" << sample << " | weight = " << weights << ", coordinate = " << variable @@ -1203,12 +1205,11 @@ bool CUnivariateTimeSeriesModel::probability(const CModelProbabilityParams ¶ conditional = ((pli[1][0] + pui[1][0]) < (pli[0][0] + pui[0][0])); mostAnomalousTime = time_[0][variables[i][0]]; mostAnomalousSample = sample[0][variables[i][0]]; - mostAnomalousPrior = - conditional ? - correlationDistributionModels[i].first->univariate({variables[i][1]}, CONDITION).first : - correlationDistributionModels[i].first->univariate(MARGINALIZE, - {{variables[i][1], - sample[0][variables[i][1]]}}).first; + mostAnomalousCorrelationModel = conditional ? + correlationModels[i].first->univariate({variables[i][1]}, CONDITION).first : + correlationModels[i].first->univariate(MARGINALIZE, + {{variables[i][1], + sample[0][variables[i][1]]}}).first; } } else @@ -1221,7 +1222,7 @@ bool CUnivariateTimeSeriesModel::probability(const CModelProbabilityParams ¶ if (m_AnomalyModel) { TDouble2Vec residual{ ( mostAnomalousSample - - mostAnomalousPrior->nearestMarginalLikelihoodMean(mostAnomalousSample)) + - mostAnomalousCorrelationModel->nearestMarginalLikelihoodMean(mostAnomalousSample)) / std::max(std::sqrt(this->seasonalWeight(0.0, mostAnomalousTime)[0]), 1.0)}; m_AnomalyModel->updateAnomaly(params, mostAnomalousTime, residual, probability); m_AnomalyModel->probability(params, mostAnomalousTime, probability); @@ -1238,14 +1239,14 @@ CUnivariateTimeSeriesModel::winsorisationWeight(double derate, const TDouble2Vec &value) const { double scale{this->seasonalWeight(0.0, time)[0]}; - double sample{m_Trend->detrend(time, value[0], 0.0)}; - return {computeWinsorisationWeight(*m_Prior, derate, scale, sample)}; + double sample{m_TrendModel->detrend(time, value[0], 0.0)}; + return {computeWinsorisationWeight(*m_ResidualModel, derate, scale, sample)}; } CUnivariateTimeSeriesModel::TDouble2Vec CUnivariateTimeSeriesModel::seasonalWeight(double confidence, core_t::TTime time) const { - double scale{m_Trend->scale(time, m_Prior->marginalLikelihoodVariance(), confidence).second}; + double scale{m_TrendModel->scale(time, m_ResidualModel->marginalLikelihoodVariance(), confidence).second}; return {std::max(scale, this->params().minimumSeasonalVarianceScale())}; } @@ -1253,8 +1254,8 @@ uint64_t CUnivariateTimeSeriesModel::checksum(uint64_t seed) const { seed = CChecksum::calculate(seed, m_IsNonNegative); seed = CChecksum::calculate(seed, m_Controllers); - seed = CChecksum::calculate(seed, m_Trend); - seed = CChecksum::calculate(seed, m_Prior); + seed = CChecksum::calculate(seed, m_TrendModel); + seed = CChecksum::calculate(seed, m_ResidualModel); seed = CChecksum::calculate(seed, m_AnomalyModel); seed = CChecksum::calculate(seed, m_SlidingWindow); return CChecksum::calculate(seed, m_Correlations != 0); @@ -1264,8 +1265,8 @@ void CUnivariateTimeSeriesModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsa { mem->setName("CUnivariateTimeSeriesModel"); core::CMemoryDebug::dynamicSize("m_Controllers", m_Controllers, mem); - core::CMemoryDebug::dynamicSize("m_Trend", m_Trend, mem); - core::CMemoryDebug::dynamicSize("m_Prior", m_Prior, mem); + core::CMemoryDebug::dynamicSize("m_TrendModel", m_TrendModel, mem); + core::CMemoryDebug::dynamicSize("m_ResidualModel", m_ResidualModel, mem); core::CMemoryDebug::dynamicSize("m_AnomalyModel", m_AnomalyModel, mem); core::CMemoryDebug::dynamicSize("m_SlidingWindow", m_SlidingWindow, mem); } @@ -1273,8 +1274,8 @@ void CUnivariateTimeSeriesModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsa std::size_t CUnivariateTimeSeriesModel::memoryUsage(void) const { return core::CMemory::dynamicSize(m_Controllers) - + core::CMemory::dynamicSize(m_Trend) - + core::CMemory::dynamicSize(m_Prior) + + core::CMemory::dynamicSize(m_TrendModel) + + core::CMemory::dynamicSize(m_ResidualModel) + core::CMemory::dynamicSize(m_AnomalyModel) + core::CMemory::dynamicSize(m_SlidingWindow); } @@ -1295,14 +1296,14 @@ bool CUnivariateTimeSeriesModel::acceptRestoreTraverser(const SModelRestoreParam m_Controllers = boost::make_shared(), core::CPersistUtils::restore(CONTROLLER_6_3_TAG, *m_Controllers, traverser), /**/) - RESTORE(TREND_6_3_TAG, traverser.traverseSubLevel(boost::bind( - CTimeSeriesDecompositionStateSerialiser(), - boost::cref(params.s_DecompositionParams), - boost::ref(m_Trend), _1))) - RESTORE(PRIOR_6_3_TAG, traverser.traverseSubLevel(boost::bind( - CPriorStateSerialiser(), - boost::cref(params.s_DistributionParams), - boost::ref(m_Prior), _1))) + RESTORE(TREND_MODEL_6_3_TAG, traverser.traverseSubLevel(boost::bind( + CTimeSeriesDecompositionStateSerialiser(), + boost::cref(params.s_DecompositionParams), + boost::ref(m_TrendModel), _1))) + RESTORE(RESIDUAL_MODEL_6_3_TAG, traverser.traverseSubLevel(boost::bind( + CPriorStateSerialiser(), + boost::cref(params.s_DistributionParams), + boost::ref(m_ResidualModel), _1))) RESTORE_SETUP_TEARDOWN(ANOMALY_MODEL_6_3_TAG, m_AnomalyModel = boost::make_shared(), traverser.traverseSubLevel(boost::bind(&CTimeSeriesAnomalyModel::acceptRestoreTraverser, @@ -1328,11 +1329,11 @@ bool CUnivariateTimeSeriesModel::acceptRestoreTraverser(const SModelRestoreParam RESTORE(TREND_OLD_TAG, traverser.traverseSubLevel(boost::bind( CTimeSeriesDecompositionStateSerialiser(), boost::cref(params.s_DecompositionParams), - boost::ref(m_Trend), _1))) + boost::ref(m_TrendModel), _1))) RESTORE(PRIOR_OLD_TAG, traverser.traverseSubLevel(boost::bind( CPriorStateSerialiser(), boost::cref(params.s_DistributionParams), - boost::ref(m_Prior), _1))) + boost::ref(m_ResidualModel), _1))) RESTORE_SETUP_TEARDOWN(ANOMALY_MODEL_OLD_TAG, m_AnomalyModel = boost::make_shared(), traverser.traverseSubLevel(boost::bind(&CTimeSeriesAnomalyModel::acceptRestoreTraverser, @@ -1357,10 +1358,10 @@ void CUnivariateTimeSeriesModel::acceptPersistInserter(core::CStatePersistInsert { core::CPersistUtils::persist(CONTROLLER_6_3_TAG, *m_Controllers, inserter); } - inserter.insertLevel(TREND_6_3_TAG, boost::bind(CTimeSeriesDecompositionStateSerialiser(), - boost::cref(*m_Trend), _1)); - inserter.insertLevel(PRIOR_6_3_TAG, boost::bind(CPriorStateSerialiser(), - boost::cref(*m_Prior), _1)); + inserter.insertLevel(TREND_MODEL_6_3_TAG, boost::bind(CTimeSeriesDecompositionStateSerialiser(), + boost::cref(*m_TrendModel), _1)); + inserter.insertLevel(RESIDUAL_MODEL_6_3_TAG, boost::bind(CPriorStateSerialiser(), + boost::cref(*m_ResidualModel), _1)); if (m_AnomalyModel) { inserter.insertLevel(ANOMALY_MODEL_6_3_TAG, @@ -1372,7 +1373,7 @@ void CUnivariateTimeSeriesModel::acceptPersistInserter(core::CStatePersistInsert maths_t::EDataType CUnivariateTimeSeriesModel::dataType(void) const { - return m_Prior->dataType(); + return m_ResidualModel->dataType(); } const CUnivariateTimeSeriesModel::TTimeDoublePrCBuf &CUnivariateTimeSeriesModel::slidingWindow(void) const @@ -1380,14 +1381,14 @@ const CUnivariateTimeSeriesModel::TTimeDoublePrCBuf &CUnivariateTimeSeriesModel: return m_SlidingWindow; } -const CTimeSeriesDecompositionInterface &CUnivariateTimeSeriesModel::trend(void) const +const CTimeSeriesDecompositionInterface &CUnivariateTimeSeriesModel::trendModel(void) const { - return *m_Trend; + return *m_TrendModel; } -const CPrior &CUnivariateTimeSeriesModel::prior(void) const +const CPrior &CUnivariateTimeSeriesModel::residualModel(void) const { - return *m_Prior; + return *m_ResidualModel; } CUnivariateTimeSeriesModel::CUnivariateTimeSeriesModel(const CUnivariateTimeSeriesModel &other, @@ -1397,8 +1398,8 @@ CUnivariateTimeSeriesModel::CUnivariateTimeSeriesModel(const CUnivariateTimeSeri m_IsNonNegative(other.m_IsNonNegative), m_IsForecastable(other.m_IsForecastable), m_Rng(other.m_Rng), - m_Trend(other.m_Trend->clone()), - m_Prior(other.m_Prior->clone()), + m_TrendModel(other.m_TrendModel->clone()), + m_ResidualModel(other.m_ResidualModel->clone()), m_AnomalyModel(other.m_AnomalyModel ? boost::make_shared(*other.m_AnomalyModel) : TAnomalyModelPtr()), @@ -1449,7 +1450,7 @@ CUnivariateTimeSeriesModel::updateTrend(const maths_t::TWeightStyleVec &weightSt { weight[j] = weights[i][j][0]; } - if (m_Trend->addPoint(time, value, weightStyles, weight)) + if (m_TrendModel->addPoint(time, value, weightStyles, weight)) { result = E_Reset; } @@ -1457,13 +1458,13 @@ CUnivariateTimeSeriesModel::updateTrend(const maths_t::TWeightStyleVec &weightSt } if (result == E_Reset) { - m_Prior->setToNonInformative(0.0, m_Prior->decayRate()); + m_ResidualModel->setToNonInformative(0.0, m_ResidualModel->decayRate()); TDouble4Vec1Vec weight{{std::max(this->params().learnRate(), 5.0 / static_cast(SLIDING_WINDOW_SIZE))}}; for (const auto &value : m_SlidingWindow) { - TDouble1Vec sample{m_Trend->detrend(value.first, value.second, 0.0)}; - m_Prior->addSamples(CConstantWeights::COUNT, sample, weight); + TDouble1Vec sample{m_TrendModel->detrend(value.first, value.second, 0.0)}; + m_ResidualModel->addSamples(CConstantWeights::COUNT, sample, weight); } if (m_Correlations) { @@ -1471,10 +1472,10 @@ CUnivariateTimeSeriesModel::updateTrend(const maths_t::TWeightStyleVec &weightSt } if (m_Controllers) { - m_Prior->decayRate( m_Prior->decayRate() - / (*m_Controllers)[E_PriorControl].multiplier()); - m_Trend->decayRate( m_Trend->decayRate() - / (*m_Controllers)[E_TrendControl].multiplier()); + m_ResidualModel->decayRate( m_ResidualModel->decayRate() + / (*m_Controllers)[E_ResidualControl].multiplier()); + m_TrendModel->decayRate( m_TrendModel->decayRate() + / (*m_Controllers)[E_TrendControl].multiplier()); for (auto &controller : *m_Controllers) { controller.reset(); @@ -1495,10 +1496,10 @@ void CUnivariateTimeSeriesModel::appendPredictionErrors(double interval, { using TDecompositionPtr1Vec = core::CSmallVector; TDouble1Vec sample{sample_}; - TDecompositionPtr1Vec trend{m_Trend}; - if (auto error = predictionError(interval, m_Prior, sample)) + TDecompositionPtr1Vec trend{m_TrendModel}; + if (auto error = predictionError(interval, m_ResidualModel, sample)) { - result[E_PriorControl].push_back(*error); + result[E_ResidualControl].push_back(*error); } if (auto error = predictionError(trend, sample)) { @@ -1508,14 +1509,14 @@ void CUnivariateTimeSeriesModel::appendPredictionErrors(double interval, bool CUnivariateTimeSeriesModel::correlationModels(TSize1Vec &correlated, TSize2Vec1Vec &variables, - TMultivariatePriorCPtrSizePr1Vec &correlationDistributionModels, + TMultivariatePriorCPtrSizePr1Vec &correlationModels, TModelCPtr1Vec &correlatedTimeSeriesModels) const { if (m_Correlations) { correlated = m_Correlations->correlated(m_Id); m_Correlations->correlationModels(m_Id, correlated, variables, - correlationDistributionModels, + correlationModels, correlatedTimeSeriesModels); } return correlated.size() > 0; @@ -1763,7 +1764,7 @@ void CTimeSeriesCorrelations::refresh(const CTimeSeriesCorrelateModelAllocator & } const CTimeSeriesCorrelations::TSizeSizePrMultivariatePriorPtrDoublePrUMap & -CTimeSeriesCorrelations::correlatePriors(void) const +CTimeSeriesCorrelations::correlationModels(void) const { return m_CorrelationDistributionModels; } @@ -1796,8 +1797,8 @@ bool CTimeSeriesCorrelations::acceptRestoreTraverser(const SDistributionRestoreP &m_Correlations, _1))) RESTORE(CORRELATED_LOOKUP_TAG, core::CPersistUtils::restore(CORRELATED_LOOKUP_TAG, m_CorrelatedLookup, traverser)) - RESTORE(CORRELATED_PRIORS_TAG, - traverser.traverseSubLevel(boost::bind(&CTimeSeriesCorrelations::restoreCorrelatePriors, + RESTORE(CORRELATION_MODELS_TAG, + traverser.traverseSubLevel(boost::bind(&CTimeSeriesCorrelations::restoreCorrelationModels, this, boost::cref(params), _1))) } while (traverser.next()); @@ -1814,17 +1815,17 @@ void CTimeSeriesCorrelations::acceptPersistInserter(core::CStatePersistInserter inserter.insertLevel(K_MOST_CORRELATED_TAG, boost::bind(&CKMostCorrelated::acceptPersistInserter, &m_Correlations, _1)); core::CPersistUtils::persist(CORRELATED_LOOKUP_TAG, m_CorrelatedLookup, inserter); - inserter.insertLevel(CORRELATED_PRIORS_TAG, - boost::bind(&CTimeSeriesCorrelations::persistCorrelatePriors, this, _1)); + inserter.insertLevel(CORRELATION_MODELS_TAG, + boost::bind(&CTimeSeriesCorrelations::persistCorrelationModels, this, _1)); } -bool CTimeSeriesCorrelations::restoreCorrelatePriors(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser) +bool CTimeSeriesCorrelations::restoreCorrelationModels(const SDistributionRestoreParams ¶ms, + core::CStateRestoreTraverser &traverser) { do { const std::string &name{traverser.name()}; - RESTORE_SETUP_TEARDOWN(CORRELATE_PRIOR_TAG, + RESTORE_SETUP_TEARDOWN(CORRELATION_MODEL_TAG, TSizeSizePrMultivariatePriorPtrDoublePrPr prior, traverser.traverseSubLevel( boost::bind(&restore, boost::cref(params), boost::ref(prior), _1)), @@ -1834,7 +1835,7 @@ bool CTimeSeriesCorrelations::restoreCorrelatePriors(const SDistributionRestoreP return true; } -void CTimeSeriesCorrelations::persistCorrelatePriors(core::CStatePersistInserter &inserter) const +void CTimeSeriesCorrelations::persistCorrelationModels(core::CStatePersistInserter &inserter) const { using TSizeSizePrMultivariatePriorPtrDoublePrUMapCItrVec = std::vector; @@ -1848,39 +1849,39 @@ void CTimeSeriesCorrelations::persistCorrelatePriors(core::CStatePersistInserter core::CFunctional::SDereference()); for (auto prior : ordered) { - inserter.insertLevel(CORRELATE_PRIOR_TAG, boost::bind(&persist, boost::cref(*prior), _1)); + inserter.insertLevel(CORRELATION_MODEL_TAG, boost::bind(&persist, boost::cref(*prior), _1)); } } bool CTimeSeriesCorrelations::restore(const SDistributionRestoreParams ¶ms, - TSizeSizePrMultivariatePriorPtrDoublePrPr &prior, + TSizeSizePrMultivariatePriorPtrDoublePrPr &model, core::CStateRestoreTraverser &traverser) { do { const std::string &name{traverser.name()}; - RESTORE_BUILT_IN(FIRST_CORRELATE_ID_TAG, prior.first.first) - RESTORE_BUILT_IN(SECOND_CORRELATE_ID_TAG, prior.first.second) - RESTORE(CORRELATE_PRIOR_TAG, + RESTORE_BUILT_IN(FIRST_CORRELATE_ID_TAG, model.first.first) + RESTORE_BUILT_IN(SECOND_CORRELATE_ID_TAG, model.first.second) + RESTORE(CORRELATION_MODEL_TAG, traverser.traverseSubLevel(boost::bind(CPriorStateSerialiser(), boost::cref(params), - boost::ref(prior.second.first), _1))) - RESTORE_BUILT_IN(CORRELATION_TAG, prior.second.second) + boost::ref(model.second.first), _1))) + RESTORE_BUILT_IN(CORRELATION_TAG, model.second.second) } while (traverser.next()); return true; } -void CTimeSeriesCorrelations::persist(const TSizeSizePrMultivariatePriorPtrDoublePrPr &prior, +void CTimeSeriesCorrelations::persist(const TSizeSizePrMultivariatePriorPtrDoublePrPr &model, core::CStatePersistInserter &inserter) { - inserter.insertValue(FIRST_CORRELATE_ID_TAG, prior.first.first); - inserter.insertValue(SECOND_CORRELATE_ID_TAG, prior.first.second); - inserter.insertLevel(CORRELATE_PRIOR_TAG, boost::bind(CPriorStateSerialiser(), - boost::cref(*prior.second.first), _1)); + inserter.insertValue(FIRST_CORRELATE_ID_TAG, model.first.first); + inserter.insertValue(SECOND_CORRELATE_ID_TAG, model.first.second); + inserter.insertLevel(CORRELATION_MODEL_TAG, boost::bind(CPriorStateSerialiser(), + boost::cref(*model.second.first), _1)); inserter.insertValue(CORRELATION_TAG, - prior.second.second, + model.second.second, core::CIEEE754::E_SinglePrecision); } @@ -1941,11 +1942,11 @@ TSize1Vec CTimeSeriesCorrelations::correlated(std::size_t id) const bool CTimeSeriesCorrelations::correlationModels(std::size_t id, TSize1Vec &correlated, TSize2Vec1Vec &variables, - TMultivariatePriorCPtrSizePr1Vec &correlationDistributionModels, + TMultivariatePriorCPtrSizePr1Vec &correlationModels, TModelCPtr1Vec &correlatedTimeSeriesModels) const { variables.clear(); - correlationDistributionModels.clear(); + correlationModels.clear(); correlatedTimeSeriesModels.clear(); if (correlated.empty()) @@ -1954,7 +1955,7 @@ bool CTimeSeriesCorrelations::correlationModels(std::size_t id, } variables.reserve(correlated.size()); - correlationDistributionModels.reserve(correlated.size()); + correlationModels.reserve(correlated.size()); correlatedTimeSeriesModels.reserve(correlated.size()); std::size_t end{0u}; for (auto correlate : correlated) @@ -1984,7 +1985,7 @@ bool CTimeSeriesCorrelations::correlationModels(std::size_t id, } correlated[end] = correlate; variables.push_back(std::move(variable)); - correlationDistributionModels.push_back({i->second.first.get(), variable[0]}); + correlationModels.push_back({i->second.first.get(), variable[0]}); ++end; } @@ -1994,7 +1995,7 @@ bool CTimeSeriesCorrelations::correlationModels(std::size_t id, correlatedTimeSeriesModels.push_back(m_TimeSeriesModels[correlate]); } - return correlationDistributionModels.size() > 0; + return correlationModels.size() > 0; } void CTimeSeriesCorrelations::refreshLookup(void) @@ -2021,7 +2022,7 @@ CMultivariateTimeSeriesModel::CMultivariateTimeSeriesModel(const CModelParams &p bool modelAnomalies) : CModel(params), m_IsNonNegative(false), - m_Prior(prior.clone()), + m_ResidualModel(prior.clone()), m_AnomalyModel(modelAnomalies ? boost::make_shared(params.bucketLength(), params.decayRate()) : @@ -2034,14 +2035,14 @@ CMultivariateTimeSeriesModel::CMultivariateTimeSeriesModel(const CModelParams &p } for (std::size_t d = 0u; d < this->dimension(); ++d) { - m_Trend.emplace_back(trend.clone()); + m_TrendModel.emplace_back(trend.clone()); } } CMultivariateTimeSeriesModel::CMultivariateTimeSeriesModel(const CMultivariateTimeSeriesModel &other) : CModel(other.params()), m_IsNonNegative(other.m_IsNonNegative), - m_Prior(other.m_Prior->clone()), + m_ResidualModel(other.m_ResidualModel->clone()), m_AnomalyModel(other.m_AnomalyModel ? boost::make_shared(*other.m_AnomalyModel) : TAnomalyModelPtr()), @@ -2051,10 +2052,10 @@ CMultivariateTimeSeriesModel::CMultivariateTimeSeriesModel(const CMultivariateTi { m_Controllers = boost::make_shared(*other.m_Controllers); } - m_Trend.reserve(other.m_Trend.size()); - for (const auto &trend : other.m_Trend) + m_TrendModel.reserve(other.m_TrendModel.size()); + for (const auto &trend : other.m_TrendModel) { - m_Trend.emplace_back(trend->clone()); + m_TrendModel.emplace_back(trend->clone()); } } @@ -2155,7 +2156,7 @@ CMultivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams ¶ms, core_t::TTime time{sample.first}; for (std::size_t d = 0u; d < sample.second.size(); ++d) { - sample.second[d] = m_Trend[d]->detrend(time, sample.second[d], 0.0); + sample.second[d] = m_TrendModel[d]->detrend(time, sample.second[d], 0.0); } } @@ -2166,7 +2167,7 @@ CMultivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams ¶ms, }); maths_t::EDataType type{params.type()}; - m_Prior->dataType(type); + m_ResidualModel->dataType(type); TDouble10Vec1Vec samples_; TDouble10Vec4Vec1Vec weights; @@ -2190,8 +2191,8 @@ CMultivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams ¶ms, averageTime.add(static_cast(samples[i].first)); } - m_Prior->addSamples(params.weightStyles(), samples_, weights); - m_Prior->propagateForwardsByTime(params.propagationInterval()); + m_ResidualModel->addSamples(params.weightStyles(), samples_, weights); + m_ResidualModel->propagateForwardsByTime(params.propagationInterval()); if (m_AnomalyModel) { m_AnomalyModel->propagateForwardsByTime(params.propagationInterval()); @@ -2212,7 +2213,7 @@ CMultivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams ¶ms, core_t::TTime time{static_cast(CBasicStatistics::mean(averageTime))}; for (std::size_t d = 0u; d < dimension; ++d) { - trendMean[d] = m_Trend[d]->meanValue(time); + trendMean[d] = m_TrendModel[d]->meanValue(time); } double multiplier{controller.multiplier(trendMean, errors[E_TrendControl], this->params().bucketLength(), @@ -2220,24 +2221,24 @@ CMultivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams ¶ms, this->params().decayRate())}; if (multiplier != 1.0) { - for (const auto &trend : m_Trend) + for (const auto &trend : m_TrendModel) { trend->decayRate(multiplier * trend->decayRate()); } - LOG_TRACE("trend decay rate = " << m_Trend[0]->decayRate()); + LOG_TRACE("trend decay rate = " << m_TrendModel[0]->decayRate()); } } { - CDecayRateController &controller{(*m_Controllers)[E_PriorControl]}; - TDouble1Vec residualMean(m_Prior->marginalLikelihoodMean()); - double multiplier{controller.multiplier(residualMean, errors[E_PriorControl], + CDecayRateController &controller{(*m_Controllers)[E_ResidualControl]}; + TDouble1Vec residualMean(m_ResidualModel->marginalLikelihoodMean()); + double multiplier{controller.multiplier(residualMean, errors[E_ResidualControl], this->params().bucketLength(), this->params().learnRate(), this->params().decayRate())}; if (multiplier != 1.0) { - m_Prior->decayRate(multiplier * m_Prior->decayRate()); - LOG_TRACE("prior decay rate = " << m_Prior->decayRate()); + m_ResidualModel->decayRate(multiplier * m_ResidualModel->decayRate()); + LOG_TRACE("prior decay rate = " << m_ResidualModel->decayRate()); } } } @@ -2252,7 +2253,7 @@ CMultivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams ¶ms, void CMultivariateTimeSeriesModel::skipTime(core_t::TTime gap) { - for (const auto &trend : m_Trend) + for (const auto &trend : m_TrendModel) { trend->skipTime(gap); } @@ -2276,11 +2277,11 @@ CMultivariateTimeSeriesModel::mode(core_t::TTime time, } } - TDouble10Vec mode(m_Prior->marginalLikelihoodMode(weightStyles, weights)); + TDouble10Vec mode(m_ResidualModel->marginalLikelihoodMode(weightStyles, weights)); for (std::size_t d = 0u; d < dimension; ++d) { - result[d] = mode[d] + CBasicStatistics::mean(m_Trend[d]->value(time)); + result[d] = mode[d] + CBasicStatistics::mean(m_TrendModel[d]->value(time)); } return result; @@ -2304,7 +2305,7 @@ CMultivariateTimeSeriesModel::residualModes(const maths_t::TWeightStyleVec &weig { weights.emplace_back(weight[0]); } - TDouble10Vec1Vec modes(m_Prior->marginalLikelihoodModes(weightStyles, weights)); + TDouble10Vec1Vec modes(m_ResidualModel->marginalLikelihoodModes(weightStyles, weights)); TDouble2Vec1Vec result; result.reserve(modes.size()); for (const auto &mode : modes) @@ -2322,7 +2323,7 @@ void CMultivariateTimeSeriesModel::detrend(const TTime2Vec1Vec &time_, core_t::TTime time{time_[0][0]}; for (std::size_t d = 0u; d < dimension; ++d) { - value[0][d] = m_Trend[d]->detrend(time, value[0][d], confidenceInterval); + value[0][d] = m_TrendModel[d]->detrend(time, value[0][d], confidenceInterval); } } @@ -2342,7 +2343,7 @@ CMultivariateTimeSeriesModel::predict(core_t::TTime time, { for (std::size_t d = 0u; d < dimension; ++d) { - hint[d] = m_Trend[d]->detrend(time, hint[d], 0.0); + hint[d] = m_TrendModel[d]->detrend(time, hint[d], 0.0); } } @@ -2350,18 +2351,18 @@ CMultivariateTimeSeriesModel::predict(core_t::TTime time, std::iota(marginalize.begin(), marginalize.end(), 1); TDouble2Vec result(dimension); - TDouble10Vec mean(m_Prior->marginalLikelihoodMean()); + TDouble10Vec mean(m_ResidualModel->marginalLikelihoodMean()); for (std::size_t d = 0u; d < dimension; --marginalize[std::min(d, dimension - 2)], ++d) { double trend{0.0}; - if (m_Trend[d]->initialized()) + if (m_TrendModel[d]->initialized()) { - trend = CBasicStatistics::mean(m_Trend[d]->value(time)); + trend = CBasicStatistics::mean(m_TrendModel[d]->value(time)); } double median{mean[d]}; - if (!m_Prior->isNonInformative()) + if (!m_ResidualModel->isNonInformative()) { - TUnivariatePriorPtr marginal{m_Prior->univariate(marginalize, CONDITION).first}; + TUnivariatePriorPtr marginal{m_ResidualModel->univariate(marginalize, CONDITION).first}; median = hint.empty() ? CBasicStatistics::mean(marginal->marginalLikelihoodConfidenceInterval(0.0)) : marginal->nearestMarginalLikelihoodMean(hint[d]); } @@ -2381,7 +2382,7 @@ CMultivariateTimeSeriesModel::confidenceInterval(core_t::TTime time, const maths_t::TWeightStyleVec &weightStyles, const TDouble2Vec4Vec &weights_) const { - if (m_Prior->isNonInformative()) + if (m_ResidualModel->isNonInformative()) { return TDouble2Vec3Vec(); } @@ -2401,8 +2402,8 @@ CMultivariateTimeSeriesModel::confidenceInterval(core_t::TTime time, TDouble4Vec weights; for (std::size_t d = 0u; d < dimension; --marginalize[std::min(d, dimension - 2)], ++d) { - double trend{m_Trend[d]->initialized() ? - CBasicStatistics::mean(m_Trend[d]->value(time, confidenceInterval)) : 0.0}; + double trend{m_TrendModel[d]->initialized() ? + CBasicStatistics::mean(m_TrendModel[d]->value(time, confidenceInterval)) : 0.0}; weights.clear(); weights.reserve(weights_.size()); @@ -2411,7 +2412,7 @@ CMultivariateTimeSeriesModel::confidenceInterval(core_t::TTime time, weights.push_back(weight[d]); } - TUnivariatePriorPtr marginal{m_Prior->univariate(marginalize, CONDITION).first}; + TUnivariatePriorPtr marginal{m_ResidualModel->univariate(marginalize, CONDITION).first}; double median{CBasicStatistics::mean(marginal->marginalLikelihoodConfidenceInterval(0.0))}; TDoubleDoublePr interval{ marginal->marginalLikelihoodConfidenceInterval(confidenceInterval, weightStyles, weights)}; @@ -2470,8 +2471,8 @@ bool CMultivariateTimeSeriesModel::probability(const CModelProbabilityParams &pa TDouble10Vec(dimension))}; for (std::size_t d = 0u; d < dimension; ++d) { - sample[0][d] = m_Trend[d]->detrend(time, value[0][d], - params.seasonalConfidenceInterval()); + sample[0][d] = m_TrendModel[d]->detrend(time, value[0][d], + params.seasonalConfidenceInterval()); } for (std::size_t i = 0u; i < params.weightStyles().size(); ++i) { @@ -2494,10 +2495,10 @@ bool CMultivariateTimeSeriesModel::probability(const CModelProbabilityParams &pa { maths_t::EProbabilityCalculation calculation = params.calculation(i); coordinate[0] = coordinates[i]; - if (!m_Prior->probabilityOfLessLikelySamples(calculation, - params.weightStyles(), - sample, weights, coordinate, - pls, pus, tail_)) + if (!m_ResidualModel->probabilityOfLessLikelySamples(calculation, + params.weightStyles(), + sample, weights, coordinate, + pls, pus, tail_)) { LOG_ERROR("Failed to compute P(" << sample << " | weight = " << weights << ")"); return false; @@ -2528,7 +2529,7 @@ bool CMultivariateTimeSeriesModel::probability(const CModelProbabilityParams &pa if (m_AnomalyModel) { TDouble2Vec residual(dimension); - TDouble10Vec nearest(m_Prior->nearestMarginalLikelihoodMean(sample[0])); + TDouble10Vec nearest(m_ResidualModel->nearestMarginalLikelihoodMean(sample[0])); TDouble2Vec scale(this->seasonalWeight(0.0, time)); for (std::size_t i = 0u; i < dimension; ++i) { @@ -2554,12 +2555,12 @@ CMultivariateTimeSeriesModel::winsorisationWeight(double derate, TDouble10Vec sample(dimension); for (std::size_t d = 0u; d < dimension; ++d) { - sample[d] = m_Trend[d]->detrend(time, value[d], 0.0); + sample[d] = m_TrendModel[d]->detrend(time, value[d], 0.0); } for (std::size_t d = 0u; d < dimension; ++d) { - result[d] = computeWinsorisationWeight(*m_Prior, d, derate, scale[d], sample); + result[d] = computeWinsorisationWeight(*m_ResidualModel, d, derate, scale[d], sample); } return result; @@ -2569,10 +2570,10 @@ CMultivariateTimeSeriesModel::TDouble2Vec CMultivariateTimeSeriesModel::seasonalWeight(double confidence, core_t::TTime time) const { TDouble2Vec result(this->dimension()); - TDouble10Vec variances(m_Prior->marginalLikelihoodVariances()); + TDouble10Vec variances(m_ResidualModel->marginalLikelihoodVariances()); for (std::size_t d = 0u, dimension = this->dimension(); d < dimension; ++d) { - double scale{m_Trend[d]->scale(time, variances[d], confidence).second}; + double scale{m_TrendModel[d]->scale(time, variances[d], confidence).second}; result[d] = std::max(scale, this->params().minimumSeasonalVarianceScale()); } return result; @@ -2582,8 +2583,8 @@ uint64_t CMultivariateTimeSeriesModel::checksum(uint64_t seed) const { seed = CChecksum::calculate(seed, m_IsNonNegative); seed = CChecksum::calculate(seed, m_Controllers); - seed = CChecksum::calculate(seed, m_Trend); - seed = CChecksum::calculate(seed, m_Prior); + seed = CChecksum::calculate(seed, m_TrendModel); + seed = CChecksum::calculate(seed, m_ResidualModel); seed = CChecksum::calculate(seed, m_AnomalyModel); return CChecksum::calculate(seed, m_SlidingWindow); } @@ -2592,8 +2593,8 @@ void CMultivariateTimeSeriesModel::debugMemoryUsage(core::CMemoryUsage::TMemoryU { mem->setName("CUnivariateTimeSeriesModel"); core::CMemoryDebug::dynamicSize("m_Controllers", m_Controllers, mem); - core::CMemoryDebug::dynamicSize("m_Trend", m_Trend, mem); - core::CMemoryDebug::dynamicSize("m_Prior", m_Prior, mem); + core::CMemoryDebug::dynamicSize("m_TrendModel", m_TrendModel, mem); + core::CMemoryDebug::dynamicSize("m_ResidualModel", m_ResidualModel, mem); core::CMemoryDebug::dynamicSize("m_AnomalyModel", m_AnomalyModel, mem); core::CMemoryDebug::dynamicSize("m_SlidingWindow", m_SlidingWindow, mem); } @@ -2601,8 +2602,8 @@ void CMultivariateTimeSeriesModel::debugMemoryUsage(core::CMemoryUsage::TMemoryU std::size_t CMultivariateTimeSeriesModel::memoryUsage(void) const { return core::CMemory::dynamicSize(m_Controllers) - + core::CMemory::dynamicSize(m_Trend) - + core::CMemory::dynamicSize(m_Prior) + + core::CMemory::dynamicSize(m_TrendModel) + + core::CMemory::dynamicSize(m_ResidualModel) + core::CMemory::dynamicSize(m_AnomalyModel) + core::CMemory::dynamicSize(m_SlidingWindow); } @@ -2621,17 +2622,17 @@ bool CMultivariateTimeSeriesModel::acceptRestoreTraverser(const SModelRestorePar m_Controllers = boost::make_shared(), core::CPersistUtils::restore(CONTROLLER_6_3_TAG, *m_Controllers, traverser), /**/) - RESTORE_SETUP_TEARDOWN(TREND_6_3_TAG, - m_Trend.push_back(TDecompositionPtr()), + RESTORE_SETUP_TEARDOWN(TREND_MODEL_6_3_TAG, + m_TrendModel.push_back(TDecompositionPtr()), traverser.traverseSubLevel(boost::bind( CTimeSeriesDecompositionStateSerialiser(), boost::cref(params.s_DecompositionParams), - boost::ref(m_Trend.back()), _1)), + boost::ref(m_TrendModel.back()), _1)), /**/) - RESTORE(PRIOR_6_3_TAG, traverser.traverseSubLevel(boost::bind( - CPriorStateSerialiser(), - boost::cref(params.s_DistributionParams), - boost::ref(m_Prior), _1))) + RESTORE(RESIDUAL_MODEL_6_3_TAG, traverser.traverseSubLevel(boost::bind( + CPriorStateSerialiser(), + boost::cref(params.s_DistributionParams), + boost::ref(m_ResidualModel), _1))) RESTORE_SETUP_TEARDOWN(ANOMALY_MODEL_6_3_TAG, m_AnomalyModel = boost::make_shared(), traverser.traverseSubLevel(boost::bind(&CTimeSeriesAnomalyModel::acceptRestoreTraverser, @@ -2652,16 +2653,16 @@ bool CMultivariateTimeSeriesModel::acceptRestoreTraverser(const SModelRestorePar core::CPersistUtils::restore(CONTROLLER_6_3_TAG, *m_Controllers, traverser), /**/) RESTORE_SETUP_TEARDOWN(TREND_OLD_TAG, - m_Trend.push_back(TDecompositionPtr()), + m_TrendModel.push_back(TDecompositionPtr()), traverser.traverseSubLevel(boost::bind( CTimeSeriesDecompositionStateSerialiser(), boost::cref(params.s_DecompositionParams), - boost::ref(m_Trend.back()), _1)), + boost::ref(m_TrendModel.back()), _1)), /**/) RESTORE(PRIOR_OLD_TAG, traverser.traverseSubLevel(boost::bind( CPriorStateSerialiser(), boost::cref(params.s_DistributionParams), - boost::ref(m_Prior), _1))) + boost::ref(m_ResidualModel), _1))) RESTORE_SETUP_TEARDOWN(ANOMALY_MODEL_OLD_TAG, m_AnomalyModel = boost::make_shared(), traverser.traverseSubLevel(boost::bind(&CTimeSeriesAnomalyModel::acceptRestoreTraverser, @@ -2683,13 +2684,13 @@ void CMultivariateTimeSeriesModel::acceptPersistInserter(core::CStatePersistInse { core::CPersistUtils::persist(CONTROLLER_6_3_TAG, *m_Controllers, inserter); } - for (const auto &trend : m_Trend) + for (const auto &trend : m_TrendModel) { - inserter.insertLevel(TREND_6_3_TAG, boost::bind(CTimeSeriesDecompositionStateSerialiser(), - boost::cref(*trend), _1)); + inserter.insertLevel(TREND_MODEL_6_3_TAG, boost::bind(CTimeSeriesDecompositionStateSerialiser(), + boost::cref(*trend), _1)); } - inserter.insertLevel(PRIOR_6_3_TAG, boost::bind(CPriorStateSerialiser(), - boost::cref(*m_Prior), _1)); + inserter.insertLevel(RESIDUAL_MODEL_6_3_TAG, boost::bind(CPriorStateSerialiser(), + boost::cref(*m_ResidualModel), _1)); if (m_AnomalyModel) { inserter.insertLevel(ANOMALY_MODEL_6_3_TAG, @@ -2701,22 +2702,24 @@ void CMultivariateTimeSeriesModel::acceptPersistInserter(core::CStatePersistInse maths_t::EDataType CMultivariateTimeSeriesModel::dataType(void) const { - return m_Prior->dataType(); + return m_ResidualModel->dataType(); } -const CMultivariateTimeSeriesModel::TTimeDouble2VecPrCBuf &CMultivariateTimeSeriesModel::slidingWindow(void) const +const CMultivariateTimeSeriesModel::TTimeDouble2VecPrCBuf & +CMultivariateTimeSeriesModel::slidingWindow(void) const { return m_SlidingWindow; } -const CMultivariateTimeSeriesModel::CMultivariateTimeSeriesModel::TDecompositionPtr10Vec &CMultivariateTimeSeriesModel::trend(void) const +const CMultivariateTimeSeriesModel::CMultivariateTimeSeriesModel::TDecompositionPtr10Vec & +CMultivariateTimeSeriesModel::trendModel(void) const { - return m_Trend; + return m_TrendModel; } -const CMultivariatePrior &CMultivariateTimeSeriesModel::prior(void) const +const CMultivariatePrior &CMultivariateTimeSeriesModel::residualModel(void) const { - return *m_Prior; + return *m_ResidualModel; } CMultivariateTimeSeriesModel::EUpdateResult @@ -2731,7 +2734,7 @@ CMultivariateTimeSeriesModel::updateTrend(const maths_t::TWeightStyleVec &weight if (sample.second.size() != dimension) { LOG_ERROR("Dimension mismatch: '" - << sample.second.size() << " != " << m_Trend.size() << "'"); + << sample.second.size() << " != " << m_TrendModel.size() << "'"); return E_Failure; } } @@ -2762,7 +2765,7 @@ CMultivariateTimeSeriesModel::updateTrend(const maths_t::TWeightStyleVec &weight { weight[j] = weights[i][j][d]; } - if (m_Trend[d]->addPoint(time, value[d], weightStyles, weight)) + if (m_TrendModel[d]->addPoint(time, value[d], weightStyles, weight)) { result = E_Reset; } @@ -2771,7 +2774,7 @@ CMultivariateTimeSeriesModel::updateTrend(const maths_t::TWeightStyleVec &weight } if (result == E_Reset) { - m_Prior->setToNonInformative(0.0, m_Prior->decayRate()); + m_ResidualModel->setToNonInformative(0.0, m_ResidualModel->decayRate()); TDouble10Vec4Vec1Vec weight{{TDouble10Vec( dimension, std::max(this->params().learnRate(), 5.0 / static_cast(SLIDING_WINDOW_SIZE)))}}; @@ -2780,15 +2783,15 @@ CMultivariateTimeSeriesModel::updateTrend(const maths_t::TWeightStyleVec &weight TDouble10Vec1Vec sample{TDouble10Vec(dimension)}; for (std::size_t i = 0u; i < dimension; ++i) { - sample[0][i] = m_Trend[i]->detrend(value.first, value.second[i], 0.0); + sample[0][i] = m_TrendModel[i]->detrend(value.first, value.second[i], 0.0); } - m_Prior->addSamples(CConstantWeights::COUNT, sample, weight); + m_ResidualModel->addSamples(CConstantWeights::COUNT, sample, weight); } if (m_Controllers) { - m_Prior->decayRate( m_Prior->decayRate() - / (*m_Controllers)[E_PriorControl].multiplier()); - for (auto &trend : m_Trend) + m_ResidualModel->decayRate( m_ResidualModel->decayRate() + / (*m_Controllers)[E_ResidualControl].multiplier()); + for (auto &trend : m_TrendModel) { trend->decayRate( trend->decayRate() / (*m_Controllers)[E_TrendControl].multiplier()); @@ -2811,11 +2814,11 @@ void CMultivariateTimeSeriesModel::appendPredictionErrors(double interval, const TDouble2Vec &sample, TDouble1VecVec (&result)[2]) { - if (auto error = predictionError(interval, m_Prior, sample)) + if (auto error = predictionError(interval, m_ResidualModel, sample)) { - result[E_PriorControl].push_back(*error); + result[E_ResidualControl].push_back(*error); } - if (auto error = predictionError(m_Trend, sample)) + if (auto error = predictionError(m_TrendModel, sample)) { result[E_TrendControl].push_back(*error); } @@ -2823,7 +2826,7 @@ void CMultivariateTimeSeriesModel::appendPredictionErrors(double interval, std::size_t CMultivariateTimeSeriesModel::dimension(void) const { - return m_Prior->dimension(); + return m_ResidualModel->dimension(); } } diff --git a/lib/maths/unittest/CTimeSeriesDecompositionTest.cc b/lib/maths/unittest/CTimeSeriesDecompositionTest.cc index 75579d833d..3e56cc1609 100644 --- a/lib/maths/unittest/CTimeSeriesDecompositionTest.cc +++ b/lib/maths/unittest/CTimeSeriesDecompositionTest.cc @@ -118,16 +118,16 @@ void CTimeSeriesDecompositionTest::testSuperpositionOfSines(void) for (core_t::TTime t = lastWeek; t < lastWeek + WEEK; t += HALF_HOUR) { - TDoubleDoublePr baseline = decomposition.baseline(t, 70.0); - double residual = ::fabs(trend[t / HALF_HOUR] - mean(baseline)); + TDoubleDoublePr prediction = decomposition.value(t, 70.0); + double residual = ::fabs(trend[t / HALF_HOUR] - mean(prediction)); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += ::fabs(trend[t / HALF_HOUR]); maxValue = std::max(maxValue, ::fabs(trend[t / HALF_HOUR])); - percentileError += std::max(std::max(baseline.first - trend[t / HALF_HOUR], - trend[t / HALF_HOUR] - baseline.second), 0.0); - //f.push_back(mean(baseline)); - //r.push_back(mean(baseline) - trend[t / HALF_HOUR]); + percentileError += std::max(std::max(prediction.first - trend[t / HALF_HOUR], + trend[t / HALF_HOUR] - prediction.second), 0.0); + //f.push_back(mean(value)); + //r.push_back(mean(value) - trend[t / HALF_HOUR]); } LOG_DEBUG("'sum residual' / 'sum value' = " << sumResidual / sumValue); @@ -285,19 +285,19 @@ void CTimeSeriesDecompositionTest::testDistortedPeriodic(void) static_cast(tt / HOUR) < boost::size(timeseries); tt += HOUR) { - TDoubleDoublePr baseline = decomposition.baseline(tt, 70.0); + TDoubleDoublePr prediction = decomposition.value(tt, 70.0); - double residual = ::fabs(timeseries[tt / HOUR] - mean(baseline)); + double residual = ::fabs(timeseries[tt / HOUR] - mean(prediction)); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += ::fabs(timeseries[tt / HOUR]); maxValue = std::max(maxValue, ::fabs(timeseries[tt / HOUR])); - percentileError += std::max(std::max(baseline.first - timeseries[tt / HOUR], - timeseries[tt / HOUR] - baseline.second), 0.0); + percentileError += std::max(std::max(prediction.first - timeseries[tt / HOUR], + timeseries[tt / HOUR] - prediction.second), 0.0); //t.push_back(tt); //f.push_back(timeseries[tt / HOUR]); - //fe.push_back(mean(baseline)); + //fe.push_back(mean(value)); } LOG_DEBUG("'sum residual' / 'sum value' = " << sumResidual / sumValue); @@ -399,17 +399,17 @@ void CTimeSeriesDecompositionTest::testMinimizeLongComponents(void) for (core_t::TTime t = lastWeek; t < lastWeek + WEEK; t += HALF_HOUR) { - TDoubleDoublePr baseline = decomposition.baseline(t, 70.0); + TDoubleDoublePr prediction = decomposition.value(t, 70.0); - double residual = ::fabs(trend[t / HALF_HOUR] - mean(baseline)); + double residual = ::fabs(trend[t / HALF_HOUR] - mean(prediction)); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += ::fabs(trend[t / HALF_HOUR]); maxValue = std::max(maxValue, ::fabs(trend[t / HALF_HOUR])); - percentileError += std::max(std::max(baseline.first - trend[t / HALF_HOUR], - trend[t / HALF_HOUR] - baseline.second), 0.0); + percentileError += std::max(std::max(prediction.first - trend[t / HALF_HOUR], + trend[t / HALF_HOUR] - prediction.second), 0.0); - //f.push_back(mean(baseline)); + //f.push_back(mean(value)); //r.push_back(residual); } @@ -526,17 +526,17 @@ void CTimeSeriesDecompositionTest::testWeekend(void) for (core_t::TTime t = lastWeek; t < lastWeek + WEEK; t += HALF_HOUR) { - TDoubleDoublePr baseline = decomposition.baseline(t, 70.0); + TDoubleDoublePr prediction = decomposition.value(t, 70.0); - double residual = ::fabs(trend[t / HALF_HOUR] - mean(baseline)); + double residual = ::fabs(trend[t / HALF_HOUR] - mean(prediction)); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += ::fabs(trend[t / HALF_HOUR]); maxValue = std::max(maxValue, ::fabs(trend[t / HALF_HOUR])); - percentileError += std::max(std::max(baseline.first - trend[t / HALF_HOUR], - trend[t / HALF_HOUR] - baseline.second), 0.0); + percentileError += std::max(std::max(prediction.first - trend[t / HALF_HOUR], + trend[t / HALF_HOUR] - prediction.second), 0.0); - //f.push_back(mean(baseline)); + //f.push_back(mean(value)); //r.push_back(residual); } @@ -637,17 +637,17 @@ void CTimeSeriesDecompositionTest::testSinglePeriodicity(void) t < lastWeek + WEEK; t += HALF_HOUR) { - TDoubleDoublePr baseline = decomposition.baseline(t, 70.0); + TDoubleDoublePr prediction = decomposition.value(t, 70.0); - double residual = ::fabs(trend[t / HALF_HOUR] + noiseMean - mean(baseline)); + double residual = ::fabs(trend[t / HALF_HOUR] + noiseMean - mean(prediction)); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += ::fabs(trend[t / HALF_HOUR]); maxValue = std::max(maxValue, ::fabs(trend[t / HALF_HOUR])); - percentileError += std::max(std::max(baseline.first - (trend[t / HALF_HOUR] + noiseMean), - (trend[t / HALF_HOUR] + noiseMean) - baseline.second), 0.0); + percentileError += std::max(std::max(prediction.first - (trend[t / HALF_HOUR] + noiseMean), + (trend[t / HALF_HOUR] + noiseMean) - prediction.second), 0.0); - //f.push_back(mean(baseline)); + //f.push_back(mean(value)); //r.push_back(residual); } @@ -719,14 +719,14 @@ void CTimeSeriesDecompositionTest::testSeasonalOnset(void) TDoubleVec trend; for (core_t::TTime time = 0; time < 150 * WEEK + 1; time += HOUR) { - double baseline = 0.0; + double value = 0.0; if (time > 10 * WEEK) { - baseline += daily[(time % DAY) / HOUR]; - baseline *= weekly[(time % WEEK) / DAY]; + value += daily[(time % DAY) / HOUR]; + value *= weekly[(time % WEEK) / DAY]; } times.push_back(time); - trend.push_back(baseline); + trend.push_back(value); } test::CRandomNumbers rng; @@ -769,16 +769,16 @@ void CTimeSeriesDecompositionTest::testSeasonalOnset(void) double percentileError = 0.0; for (core_t::TTime t = lastWeek; t < lastWeek + WEEK; t += HOUR) { - TDoubleDoublePr baseline = decomposition.baseline(t, 70.0); + TDoubleDoublePr prediction = decomposition.value(t, 70.0); - double residual = ::fabs(trend[t / HOUR] - mean(baseline)); + double residual = ::fabs(trend[t / HOUR] - mean(prediction)); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += ::fabs(trend[t / HOUR]); maxValue = std::max(maxValue, ::fabs(trend[t / HOUR])); - percentileError += std::max(std::max(baseline.first - trend[t / HOUR], - trend[t / HOUR] - baseline.second), 0.0); - //f.push_back(mean(baseline)); + percentileError += std::max(std::max(prediction.first - trend[t / HOUR], + trend[t / HOUR] - prediction.second), 0.0); + //f.push_back(mean(value)); //r.push_back(residual); } @@ -850,16 +850,16 @@ void CTimeSeriesDecompositionTest::testVarianceScale(void) { for (core_t::TTime t = 0; t < DAY; t += TEN_MINS) { - double baseline = 1.0; + double value = 1.0; double variance = 1.0; if (t >= 3600 && t < 7200) { - baseline = 5.0; + value = 5.0; variance = 10.0; } - TDoubleVec value; - rng.generateNormalSamples(baseline, variance, 1, value); - decomposition.addPoint(time + t, value[0]); + TDoubleVec noise; + rng.generateNormalSamples(value, variance, 1, noise); + decomposition.addPoint(time + t, noise[0]); } time += DAY; } @@ -904,17 +904,17 @@ void CTimeSeriesDecompositionTest::testVarianceScale(void) { for (core_t::TTime t = 0; t < DAY; t += TEN_MINS) { - double baseline = 5.0 * ::sin(boost::math::double_constants::two_pi - * static_cast(t) - / static_cast(DAY)); + double value = 5.0 * ::sin(boost::math::double_constants::two_pi + * static_cast(t) + / static_cast(DAY)); double variance = 1.0; if (t >= 3600 && t < 7200) { variance = 10.0; } - TDoubleVec value; - rng.generateNormalSamples(0.0, variance, 1, value); - decomposition.addPoint(time + t, baseline + value[0]); + TDoubleVec noise; + rng.generateNormalSamples(0.0, variance, 1, noise); + decomposition.addPoint(time + t, value + noise[0]); } time += DAY; } @@ -1041,15 +1041,15 @@ void CTimeSeriesDecompositionTest::testSpikeyDataProblemCase(void) for (std::size_t j = 0u; j < lastWeekTimeseries.size(); ++j) { - TDoubleDoublePr baseline = decomposition.baseline(lastWeekTimeseries[j].first, 70.0); + TDoubleDoublePr prediction = decomposition.value(lastWeekTimeseries[j].first, 70.0); - double residual = ::fabs(lastWeekTimeseries[j].second - mean(baseline)); + double residual = ::fabs(lastWeekTimeseries[j].second - mean(prediction)); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += ::fabs(lastWeekTimeseries[j].second); maxValue = std::max(maxValue, ::fabs(lastWeekTimeseries[j].second)); - percentileError += std::max(std::max(baseline.first - lastWeekTimeseries[j].second, - lastWeekTimeseries[j].second - baseline.second), 0.0); + percentileError += std::max(std::max(prediction.first - lastWeekTimeseries[j].second, + lastWeekTimeseries[j].second - prediction.second), 0.0); } LOG_DEBUG("'sum residual' / 'sum value' = " @@ -1096,7 +1096,7 @@ void CTimeSeriesDecompositionTest::testSpikeyDataProblemCase(void) //file.open("results.m"); //TTimeVec times; //TDoubleVec raw; - //TDoubleVec baseline; + //TDoubleVec values; //TDoubleVec scales; //TDoubleVec probs; @@ -1121,7 +1121,7 @@ void CTimeSeriesDecompositionTest::testSpikeyDataProblemCase(void) //times.push_back(time); //raw.push_back(value); - //baseline.push_back(mean(decomposition.baseline(time, 70.0))); + //values.push_back(mean(decomposition.value(time, 70.0))); //scales.push_back(mean(decomposition.scale(time, variance, 70.0))); //probs.push_back(-::log(pScaled)); @@ -1138,7 +1138,7 @@ void CTimeSeriesDecompositionTest::testSpikeyDataProblemCase(void) //file << "hold on;\n"; //file << "t = " << core::CContainerPrinter::print(times) << ";\n"; //file << "r = " << core::CContainerPrinter::print(raw) << ";\n"; - //file << "b = " << core::CContainerPrinter::print(baseline) << ";\n"; + //file << "b = " << core::CContainerPrinter::print(values) << ";\n"; //file << "s = " << core::CContainerPrinter::print(scales) << ";\n"; //file << "p = " << core::CContainerPrinter::print(probs) << ";\n"; //file << "subplot(3,1,1); hold on; plot(t, r, 'b'); plot(t, b, 'r');\n"; @@ -1204,19 +1204,19 @@ void CTimeSeriesDecompositionTest::testDiurnalProblemCase(void) for (std::size_t j = 0u; j < lastWeekTimeseries.size(); ++j) { - TDoubleDoublePr baseline = decomposition.baseline(lastWeekTimeseries[j].first, 70.0); + TDoubleDoublePr prediction = decomposition.value(lastWeekTimeseries[j].first, 70.0); - double residual = ::fabs(lastWeekTimeseries[j].second - mean(baseline)); + double residual = ::fabs(lastWeekTimeseries[j].second - mean(prediction)); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += ::fabs(lastWeekTimeseries[j].second); maxValue = std::max(maxValue, ::fabs(lastWeekTimeseries[j].second)); - percentileError += std::max(std::max(baseline.first - lastWeekTimeseries[j].second, - lastWeekTimeseries[j].second - baseline.second), 0.0); + percentileError += std::max(std::max(prediction.first - lastWeekTimeseries[j].second, + lastWeekTimeseries[j].second - prediction.second), 0.0); //times.push_back(lastWeekTimeseries[j].first); //values.push_back(lastWeekTimeseries[j].second); - //f.push_back(mean(baseline)); + //f.push_back(mean(value)); //r.push_back(residual); } @@ -1330,19 +1330,19 @@ void CTimeSeriesDecompositionTest::testComplexDiurnalProblemCase(void) for (std::size_t j = 0u; j < lastWeekTimeseries.size(); ++j) { - TDoubleDoublePr baseline = decomposition.baseline(lastWeekTimeseries[j].first, 70.0); + TDoubleDoublePr prediction = decomposition.value(lastWeekTimeseries[j].first, 70.0); - double residual = ::fabs(lastWeekTimeseries[j].second - mean(baseline)); + double residual = ::fabs(lastWeekTimeseries[j].second - mean(prediction)); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += ::fabs(lastWeekTimeseries[j].second); maxValue = std::max(maxValue, ::fabs(lastWeekTimeseries[j].second)); - percentileError += std::max(std::max(baseline.first - lastWeekTimeseries[j].second, - lastWeekTimeseries[j].second - baseline.second), 0.0); + percentileError += std::max(std::max(prediction.first - lastWeekTimeseries[j].second, + lastWeekTimeseries[j].second - prediction.second), 0.0); //times.push_back(lastWeekTimeseries[j].first); //values.push_back(lastWeekTimeseries[j].second); - //f.push_back(mean(baseline)); + //f.push_back(mean(value)); //r.push_back(residual); } @@ -1425,12 +1425,12 @@ void CTimeSeriesDecompositionTest::testDiurnalPeriodicityWithMissingValues(void) { error.add(::fabs( ( value + noise[0] - - maths::CBasicStatistics::mean(decomposition.baseline(time, 0.0)))) + - maths::CBasicStatistics::mean(decomposition.value(time, 0.0)))) / ::fabs(value + noise[0])); } //times.push_back(time); //values.push_back(value + noise[0]); - //f.push_back(maths::CBasicStatistics::mean(decomposition.baseline(time, 0.0))); + //f.push_back(maths::CBasicStatistics::mean(decomposition.value(time, 0.0))); } time += HALF_HOUR; } @@ -1484,12 +1484,12 @@ void CTimeSeriesDecompositionTest::testDiurnalPeriodicityWithMissingValues(void) { error.add(::fabs( ( value + noise[0] - - maths::CBasicStatistics::mean(decomposition.baseline(time, 0.0)))) + - maths::CBasicStatistics::mean(decomposition.value(time, 0.0)))) / ::fabs(value + noise[0])); } //times.push_back(time); //values.push_back(value + noise[0]); - //f.push_back(maths::CBasicStatistics::mean(decomposition.baseline(time, 0.0))); + //f.push_back(maths::CBasicStatistics::mean(decomposition.value(time, 0.0))); } time += HOUR; } @@ -1558,13 +1558,10 @@ void CTimeSeriesDecompositionTest::testLongTermTrend(void) double sumValue = 0.0; double maxValue = 0.0; - TDoubleVec baselines; - for (std::size_t j = i - 48; j < i; ++j) { - TDoubleDoublePr baseline = decomposition.baseline(times[j], 70.0); - baselines.push_back(mean(baseline)); - double residual = ::fabs(trend[j] - mean(baseline)); + TDoubleDoublePr prediction = decomposition.value(times[j], 70.0); + double residual = ::fabs(trend[j] - mean(prediction)); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += ::fabs(trend[j]); @@ -1587,7 +1584,7 @@ void CTimeSeriesDecompositionTest::testLongTermTrend(void) lastDay += DAY; } //values.push_back(trend[i] + noise[i]); - //f.push_back(maths::CBasicStatistics::mean(decomposition.baseline(times[i]))); + //f.push_back(maths::CBasicStatistics::mean(decomposition.value(times[i]))); } LOG_DEBUG("total 'sum residual' / 'sum value' = " << totalSumResidual / totalSumValue); @@ -1649,13 +1646,10 @@ void CTimeSeriesDecompositionTest::testLongTermTrend(void) double sumValue = 0.0; double maxValue = 0.0; - TDoubleVec baselines; - for (std::size_t j = i - 48; j < i; ++j) { - TDoubleDoublePr baseline = decomposition.baseline(times[j], 70.0); - baselines.push_back(mean(baseline)); - double residual = ::fabs(trend[j] - mean(baseline)); + TDoubleDoublePr prediction = decomposition.value(times[j], 70.0); + double residual = ::fabs(trend[j] - mean(prediction)); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += ::fabs(trend[j]); @@ -1675,7 +1669,7 @@ void CTimeSeriesDecompositionTest::testLongTermTrend(void) lastDay += DAY; } //values.push_back(trend[i] + 0.3*noise[i]); - //f.push_back(maths::CBasicStatistics::mean(decomposition.baseline(times[i]))); + //f.push_back(maths::CBasicStatistics::mean(decomposition.value(times[i]))); } LOG_DEBUG("total 'sum residual' / 'sum value' = " << totalSumResidual / totalSumValue); @@ -1748,13 +1742,10 @@ void CTimeSeriesDecompositionTest::testLongTermTrendAndPeriodicity(void) double sumValue = 0.0; double maxValue = 0.0; - TDoubleVec baselines; - for (std::size_t j = i - 48; j < i; ++j) { - TDoubleDoublePr baseline = decomposition.baseline(times[j], 70.0); - baselines.push_back(mean(baseline)); - double residual = ::fabs(trend[j] - mean(baseline)); + TDoubleDoublePr prediction = decomposition.value(times[j], 70.0); + double residual = ::fabs(trend[j] - mean(prediction)); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += ::fabs(trend[j]); @@ -1777,7 +1768,7 @@ void CTimeSeriesDecompositionTest::testLongTermTrendAndPeriodicity(void) lastDay += DAY; } //values.push_back(trend[i] + 0.3 * noise[i]); - //f.push_back(maths::CBasicStatistics::mean(decomposition.baseline(times[i]))); + //f.push_back(maths::CBasicStatistics::mean(decomposition.value(times[i]))); } LOG_DEBUG("total 'sum residual' / 'sum value' = " << totalSumResidual / totalSumValue); @@ -1852,13 +1843,10 @@ void CTimeSeriesDecompositionTest::testNonDiurnal(void) double sumValue = 0.0; double maxValue = 0.0; - TDoubleVec baselines; - for (std::size_t j = i - 12; j < i; ++j) { - TDoubleDoublePr baseline = decomposition.baseline(times[j], 70.0); - baselines.push_back(mean(baseline)); - double residual = ::fabs(trends[t][j] - mean(baseline)); + TDoubleDoublePr prediction = decomposition.value(times[j], 70.0); + double residual = ::fabs(trends[t][j] - mean(prediction)); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += ::fabs(trends[t][j]); @@ -1881,7 +1869,7 @@ void CTimeSeriesDecompositionTest::testNonDiurnal(void) lastHour += HOUR; } //values.push_back(trends[t][i] + noise[i]); - //f.push_back(maths::CBasicStatistics::mean(decomposition.baseline(times[i]))); + //f.push_back(maths::CBasicStatistics::mean(decomposition.value(times[i]))); } LOG_DEBUG("total 'sum residual' / 'sum value' = " << totalSumResidual / totalSumValue); @@ -1944,13 +1932,10 @@ void CTimeSeriesDecompositionTest::testNonDiurnal(void) double sumValue = 0.0; double maxValue = 0.0; - TDoubleVec baselines; - for (std::size_t j = i - 288; j < i; ++j) { - TDoubleDoublePr baseline = decomposition.baseline(times[j], 70.0); - baselines.push_back(mean(baseline)); - double residual = ::fabs(trend[j] - mean(baseline)); + TDoubleDoublePr prediction = decomposition.value(times[j], 70.0); + double residual = ::fabs(trend[j] - mean(prediction)); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += ::fabs(trend[j]); @@ -1973,7 +1958,7 @@ void CTimeSeriesDecompositionTest::testNonDiurnal(void) lastTwoDay += 2 * DAY; } //values.push_back(trend[i] + noise[i]); - //f.push_back(maths::CBasicStatistics::mean(decomposition.baseline(times[i]))); + //f.push_back(maths::CBasicStatistics::mean(decomposition.value(times[i]))); } LOG_DEBUG("total 'sum residual' / 'sum value' = " << totalSumResidual / totalSumValue); @@ -2018,7 +2003,7 @@ void CTimeSeriesDecompositionTest::testYearly(void) decomposition.addPoint(time, trend + noise[0]); if (decomposition.initialized()) { - TDouble1Vec prediction{decomposition.mean(time)}; + TDouble1Vec prediction{decomposition.meanValue(time)}; TDouble1Vec predictionError{decomposition.detrend(time, trend, 0.0)}; double multiplier{controller.multiplier( prediction, {predictionError}, 4 * HOUR, 1.0, 0.0005)}; @@ -2026,11 +2011,11 @@ void CTimeSeriesDecompositionTest::testYearly(void) } } - std::ofstream file; - file.open("results.m"); - TDoubleVec f; - TTimeVec times; - TDoubleVec values; + //std::ofstream file; + //file.open("results.m"); + //TDoubleVec f; + //TTimeVec times; + //TDoubleVec values; // Predict over one year and check we get reasonable accuracy. TMeanAccumulator meanError; @@ -2042,12 +2027,12 @@ void CTimeSeriesDecompositionTest::testYearly(void) + 7.5 * ::sin( boost::math::double_constants::two_pi * static_cast(time) / static_cast(DAY)); - double prediction = maths::CBasicStatistics::mean(decomposition.baseline(time, 0.0)); + double prediction = maths::CBasicStatistics::mean(decomposition.value(time, 0.0)); double error = ::fabs((prediction - trend) / trend); meanError.add(error); - times.push_back(time); - values.push_back(trend); - f.push_back(prediction); + //times.push_back(time); + //values.push_back(trend); + //f.push_back(prediction); if (time / HOUR % 40 == 0) { LOG_DEBUG("error = " << error); @@ -2124,7 +2109,7 @@ void CTimeSeriesDecompositionTest::testCalendar(void) for (core_t::TTime time_ = time - DAY; time_ < time; time_ += TEN_MINS) { - double prediction = maths::CBasicStatistics::mean(decomposition.baseline(time_)); + double prediction = maths::CBasicStatistics::mean(decomposition.value(time_)); double variance = 4.0 * maths::CBasicStatistics::mean(decomposition.scale(time_, 4.0, 0.0)); double actual = trend(time_); if (::fabs(prediction - actual) / ::sqrt(variance) > 3.0) @@ -2143,7 +2128,7 @@ void CTimeSeriesDecompositionTest::testCalendar(void) //times.push_back(time); //values.push_back(trend(time) + noise[0]); - //f.push_back(maths::CBasicStatistics::mean(decomposition.baseline(time, 0.0))); + //f.push_back(maths::CBasicStatistics::mean(decomposition.value(time, 0.0))); } //file << "t = " << core::CContainerPrinter::print(times) << ";\n"; @@ -2350,7 +2335,7 @@ void CTimeSeriesDecompositionTest::testUpgrade(void) CPPUNIT_ASSERT_EQUAL(0.01, decomposition.decayRate()); - double meanValue{decomposition.mean(60480000)}; + double meanValue{decomposition.meanValue(60480000)}; double meanVariance{decomposition.meanVariance()}; LOG_DEBUG("restored mean value = " << meanValue); LOG_DEBUG("restored mean variance = " << meanVariance); @@ -2363,7 +2348,7 @@ void CTimeSeriesDecompositionTest::testUpgrade(void) { TDoubleDoublePr expectedValue{stringToPair(expectedValues[i])}; TDoubleDoublePr expectedScale{stringToPair(expectedScales[i])}; - TDoubleDoublePr value{decomposition.baseline(time, 10.0)}; + TDoubleDoublePr value{decomposition.value(time, 10.0)}; TDoubleDoublePr scale{decomposition.scale(time, 286374.0, 10.0)}; CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedValue.first, value.first, @@ -2415,7 +2400,7 @@ void CTimeSeriesDecompositionTest::testUpgrade(void) CPPUNIT_ASSERT_EQUAL(0.024, decomposition.decayRate()); - double meanValue{decomposition.mean(10366200)}; + double meanValue{decomposition.meanValue(10366200)}; double meanVariance{decomposition.meanVariance()}; LOG_DEBUG("restored mean value = " << meanValue); LOG_DEBUG("restored mean variance = " << meanVariance); @@ -2430,7 +2415,7 @@ void CTimeSeriesDecompositionTest::testUpgrade(void) { TDoubleDoublePr expectedValue{stringToPair(expectedValues[i])}; TDoubleDoublePr expectedScale{stringToPair(expectedScales[i])}; - TDoubleDoublePr value{decomposition.baseline(time, 10.0)}; + TDoubleDoublePr value{decomposition.value(time, 10.0)}; TDoubleDoublePr scale{decomposition.scale(time, 96.1654, 10.0)}; CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedValue.first, value.first, diff --git a/lib/maths/unittest/CTimeSeriesModelTest.cc b/lib/maths/unittest/CTimeSeriesModelTest.cc index f5fd12320c..1de281ec2f 100644 --- a/lib/maths/unittest/CTimeSeriesModelTest.cc +++ b/lib/maths/unittest/CTimeSeriesModelTest.cc @@ -316,7 +316,7 @@ void CTimeSeriesModelTest::testMode(void) model.addSamples(params, {core::make_triple(time, TDouble2Vec{sample}, TAG)}); time += bucketLength; } - double expectedMode{ maths::CBasicStatistics::mean(trend.baseline(time)) + double expectedMode{ maths::CBasicStatistics::mean(trend.value(time)) + prior.marginalLikelihoodMode()}; TDouble2Vec mode(model.mode(time, maths::CConstantWeights::COUNT, weight)); @@ -373,7 +373,7 @@ void CTimeSeriesModelTest::testMode(void) time += bucketLength; } - double expectedMode{ maths::CBasicStatistics::mean(trend.baseline(time)) + double expectedMode{ maths::CBasicStatistics::mean(trend.value(time)) + prior.marginalLikelihoodMode()}; TDouble2Vec mode(model.mode(time, maths::CConstantWeights::COUNT, weight)); @@ -428,7 +428,7 @@ void CTimeSeriesModelTest::testMode(void) maths::CConstantWeights::unit(3))); for (std::size_t i = 0u; i < trends.size(); ++i) { - expectedMode[i] += maths::CBasicStatistics::mean(trends[i]->baseline(time)); + expectedMode[i] += maths::CBasicStatistics::mean(trends[i]->value(time)); } TDouble2Vec mode(model.mode(time, maths::CConstantWeights::COUNT, @@ -504,7 +504,7 @@ void CTimeSeriesModelTest::testMode(void) maths::CConstantWeights::unit(3))); for (std::size_t i = 0u; i < trends.size(); ++i) { - expectedMode[i] += maths::CBasicStatistics::mean(trends[i]->baseline(time)); + expectedMode[i] += maths::CBasicStatistics::mean(trends[i]->value(time)); } TDouble2Vec mode(model.mode(time, maths::CConstantWeights::COUNT, @@ -558,7 +558,7 @@ void CTimeSeriesModelTest::testAddBucketValue(void) model.addSamples(params, samples); model.addBucketValue({core::make_triple(core_t::TTime{20}, TDouble2Vec{-1.0}, TAG)}); - CPPUNIT_ASSERT_EQUAL(prior.checksum(), model.prior().checksum()); + CPPUNIT_ASSERT_EQUAL(prior.checksum(), model.residualModel().checksum()); } void CTimeSeriesModelTest::testAddSamples(void) @@ -604,11 +604,11 @@ void CTimeSeriesModelTest::testAddSamples(void) prior.propagateForwardsByTime(1.0); uint64_t checksum1{trend.checksum()}; - uint64_t checksum2{model.trend().checksum()}; + uint64_t checksum2{model.trendModel().checksum()}; LOG_DEBUG("checksum1 = " << checksum1 << " checksum2 = " << checksum2); CPPUNIT_ASSERT_EQUAL(checksum1, checksum2); checksum1 = prior.checksum(); - checksum2 = model.prior().checksum(); + checksum2 = model.residualModel().checksum(); LOG_DEBUG("checksum1 = " << checksum1 << " checksum2 = " << checksum2); CPPUNIT_ASSERT_EQUAL(checksum1, checksum2); } @@ -652,12 +652,12 @@ void CTimeSeriesModelTest::testAddSamples(void) for (std::size_t i = 0u; i < trends.size(); ++i) { uint64_t checksum1{trends[i]->checksum()}; - uint64_t checksum2{model.trend()[i]->checksum()}; + uint64_t checksum2{model.trendModel()[i]->checksum()}; LOG_DEBUG("checksum1 = " << checksum1 << " checksum2 = " << checksum2); CPPUNIT_ASSERT_EQUAL(checksum1, checksum2); } uint64_t checksum1{prior.checksum()}; - uint64_t checksum2{model.prior().checksum()}; + uint64_t checksum2{model.residualModel().checksum()}; LOG_DEBUG("checksum1 = " << checksum1 << " checksum2 = " << checksum2); CPPUNIT_ASSERT_EQUAL(checksum1, checksum2); } @@ -692,7 +692,7 @@ void CTimeSeriesModelTest::testAddSamples(void) prior.propagateForwardsByTime(interval[i]); uint64_t checksum1{prior.checksum()}; - uint64_t checksum2{model.prior().checksum()}; + uint64_t checksum2{model.residualModel().checksum()}; LOG_DEBUG("checksum1 = " << checksum1 << " checksum2 = " << checksum2); CPPUNIT_ASSERT_EQUAL(checksum1, checksum2); @@ -740,7 +740,7 @@ void CTimeSeriesModelTest::testAddSamples(void) prior.propagateForwardsByTime(interval[i]); uint64_t checksum1{prior.checksum()}; - uint64_t checksum2{model.prior().checksum()}; + uint64_t checksum2{model.residualModel().checksum()}; LOG_DEBUG("checksum1 = " << checksum1 << " checksum2 = " << checksum2); CPPUNIT_ASSERT_EQUAL(checksum1, checksum2); @@ -798,7 +798,7 @@ void CTimeSeriesModelTest::testAddSamples(void) if (trend.initialized()) { - double multiplier{controllers[0].multiplier({trend.mean(time)}, + double multiplier{controllers[0].multiplier({trend.meanValue(time)}, {{detrended}}, bucketLength, model.params().learnRate(), @@ -816,10 +816,10 @@ void CTimeSeriesModelTest::testAddSamples(void) } uint64_t checksum1{trend.checksum()}; - uint64_t checksum2{model.trend().checksum()}; + uint64_t checksum2{model.trendModel().checksum()}; CPPUNIT_ASSERT_EQUAL(checksum1, checksum2); checksum1 = prior.checksum(); - checksum2 = model.prior().checksum(); + checksum2 = model.residualModel().checksum(); CPPUNIT_ASSERT_EQUAL(checksum1, checksum2); time += bucketLength; @@ -862,7 +862,7 @@ void CTimeSeriesModelTest::testAddSamples(void) + (time / bucketLength > 1800 ? 10.0 : 0.0) + sample[i]; reinitialize |= trends[i]->addPoint(time, sample[i]); detrended[0][i] = trends[i]->detrend(time, sample[i], 0.0); - mean[i] = trends[i]->mean(time); + mean[i] = trends[i]->meanValue(time); hasTrend |= true; amplitude += 4.0; } @@ -913,11 +913,11 @@ void CTimeSeriesModelTest::testAddSamples(void) for (std::size_t i = 0u; i < trends.size(); ++i) { uint64_t checksum1{trends[i]->checksum()}; - uint64_t checksum2{model.trend()[i]->checksum()}; + uint64_t checksum2{model.trendModel()[i]->checksum()}; CPPUNIT_ASSERT_EQUAL(checksum1, checksum2); } uint64_t checksum1{prior.checksum()}; - uint64_t checksum2{model.prior().checksum()}; + uint64_t checksum2{model.residualModel().checksum()}; CPPUNIT_ASSERT_EQUAL(checksum1, checksum2); time += bucketLength; @@ -985,7 +985,7 @@ void CTimeSeriesModelTest::testPredict(void) { double trend_{10.0 + 5.0 * ::sin( boost::math::double_constants::two_pi * static_cast(time_) / 86400.0)}; - double expected{ maths::CBasicStatistics::mean(trend.baseline(time_)) + double expected{ maths::CBasicStatistics::mean(trend.value(time_)) + maths::CBasicStatistics::mean(prior.marginalLikelihoodConfidenceInterval(0.0))}; double predicted{model.predict(time_)[0]}; LOG_DEBUG("expected = " << expected @@ -1106,7 +1106,7 @@ void CTimeSeriesModelTest::testPredict(void) double trend_{mean[i] + 10.0 + 5.0 * ::sin( boost::math::double_constants::two_pi * static_cast(time_) / 86400.0)}; maths::CMultivariatePrior::TUnivariatePriorPtr margin{prior.univariate(marginalize, condition).first}; - double expected{ maths::CBasicStatistics::mean(trends[i]->baseline(time_)) + double expected{ maths::CBasicStatistics::mean(trends[i]->value(time_)) + maths::CBasicStatistics::mean(margin->marginalLikelihoodConfidenceInterval(0.0))}; double predicted{model.predict(time_)[i]}; --marginalize[std::min(i, marginalize.size() - 1)]; @@ -1265,17 +1265,17 @@ void CTimeSeriesModelTest::testProbability(void) weights_.push_back(weight_[0]); } double lb[2], ub[2]; - models[0].prior().probabilityOfLessLikelySamples( - calculation, - weightStyles[i], - sample, {weights_}, - lb[0], ub[0], expectedTail[0]); - models[1].prior().probabilityOfLessLikelySamples( - calculation, - weightStyles[i], - {models[1].trend().detrend(time, sample[0], confidence)}, - {weights_}, - lb[1], ub[1], expectedTail[1]); + models[0].residualModel().probabilityOfLessLikelySamples( + calculation, + weightStyles[i], + sample, {weights_}, + lb[0], ub[0], expectedTail[0]); + models[1].residualModel().probabilityOfLessLikelySamples( + calculation, + weightStyles[i], + {models[1].trendModel().detrend(time, sample[0], confidence)}, + {weights_}, + lb[1], ub[1], expectedTail[1]); expectedProbability[0] = (lb[0] + ub[0]) / 2.0; expectedProbability[1] = (lb[1] + ub[1]) / 2.0; } @@ -1385,21 +1385,23 @@ void CTimeSeriesModelTest::testProbability(void) weights_.push_back(weight_); } double lb[2], ub[2]; - models[0].prior().probabilityOfLessLikelySamples(calculation, - weightStyles[i], - {TDouble10Vec(sample)}, - {weights_}, - lb[0], ub[0], expectedTail[0]); + models[0].residualModel().probabilityOfLessLikelySamples( + calculation, + weightStyles[i], + {TDouble10Vec(sample)}, + {weights_}, + lb[0], ub[0], expectedTail[0]); TDouble10Vec detrended; for (std::size_t j = 0u; j < sample.size(); ++j) { - detrended.push_back(models[1].trend()[j]->detrend(time, sample[j], confidence)); + detrended.push_back(models[1].trendModel()[j]->detrend(time, sample[j], confidence)); } - models[1].prior().probabilityOfLessLikelySamples(calculation, - weightStyles[i], - {detrended}, - {weights_}, - lb[1], ub[1], expectedTail[1]); + models[1].residualModel().probabilityOfLessLikelySamples( + calculation, + weightStyles[i], + {detrended}, + {weights_}, + lb[1], ub[1], expectedTail[1]); expectedProbability[0] = (lb[0] + ub[0]) / 2.0; expectedProbability[1] = (lb[1] + ub[1]) / 2.0; } diff --git a/lib/model/CEventRatePopulationModel.cc b/lib/model/CEventRatePopulationModel.cc index b7f7e90513..181b6d73f0 100644 --- a/lib/model/CEventRatePopulationModel.cc +++ b/lib/model/CEventRatePopulationModel.cc @@ -840,14 +840,14 @@ uint64_t CEventRatePopulationModel::checksum(bool includeCurrentBucketStats) con for (const auto &feature : m_FeatureCorrelatesModels) { - for (const auto &prior : feature.s_Models->correlatePriors()) + for (const auto &model : feature.s_Models->correlationModels()) { - std::size_t cids[]{prior.first.first, prior.first.second}; + std::size_t cids[]{model.first.first, model.first.second}; if (gatherer.isAttributeActive(cids[0]) && gatherer.isAttributeActive(cids[1])) { uint64_t &hash = hashes[{boost::cref(gatherer.attributeName(cids[0])), boost::cref(gatherer.attributeName(cids[1]))}]; - hash = maths::CChecksum::calculate(hash, prior.second); + hash = maths::CChecksum::calculate(hash, model.second); } } } diff --git a/lib/model/CIndividualModel.cc b/lib/model/CIndividualModel.cc index 48a5b23c3f..1c03cac715 100644 --- a/lib/model/CIndividualModel.cc +++ b/lib/model/CIndividualModel.cc @@ -312,14 +312,14 @@ uint64_t CIndividualModel::checksum(bool includeCurrentBucketStats) const for (const auto &feature : m_FeatureCorrelatesModels) { - for (const auto &prior : feature.s_Models->correlatePriors()) + for (const auto &model : feature.s_Models->correlationModels()) { - std::size_t pids[]{prior.first.first, prior.first.second}; + std::size_t pids[]{model.first.first, model.first.second}; if (gatherer.isPersonActive(pids[0]) && gatherer.isPersonActive(pids[1])) { uint64_t &hash = hashes2[{boost::cref(this->personName(pids[0])), boost::cref(this->personName(pids[1]))}]; - hash = maths::CChecksum::calculate(hash, prior.second); + hash = maths::CChecksum::calculate(hash, model.second); } } } @@ -685,7 +685,7 @@ std::size_t CIndividualModel::numberCorrelations(void) const std::size_t result = 0u; for (const auto &feature : m_FeatureCorrelatesModels) { - result += feature.s_Models->correlatePriors().size(); + result += feature.s_Models->correlationModels().size(); } return result; } diff --git a/lib/model/CMetricPopulationModel.cc b/lib/model/CMetricPopulationModel.cc index 136fca71f7..02964b1cac 100644 --- a/lib/model/CMetricPopulationModel.cc +++ b/lib/model/CMetricPopulationModel.cc @@ -750,14 +750,14 @@ uint64_t CMetricPopulationModel::checksum(bool includeCurrentBucketStats) const for (const auto &feature : m_FeatureCorrelatesModels) { - for (const auto &prior : feature.s_Models->correlatePriors()) + for (const auto &model : feature.s_Models->correlationModels()) { - std::size_t cids[]{prior.first.first, prior.first.second}; + std::size_t cids[]{model.first.first, model.first.second}; if (gatherer.isAttributeActive(cids[0]) && gatherer.isAttributeActive(cids[1])) { uint64_t &hash = hashes[{boost::cref(gatherer.attributeName(cids[0])), boost::cref(gatherer.attributeName(cids[1]))}]; - hash = maths::CChecksum::calculate(hash, prior.second); + hash = maths::CChecksum::calculate(hash, model.second); } } } diff --git a/lib/model/unittest/CEventRateModelTest.cc b/lib/model/unittest/CEventRateModelTest.cc index 20a5a79848..2ae9ec8f83 100644 --- a/lib/model/unittest/CEventRateModelTest.cc +++ b/lib/model/unittest/CEventRateModelTest.cc @@ -95,21 +95,21 @@ void generateEvents(const core_t::TTime &startTime, // Generate an ordered collection of event arrival times. test::CRandomNumbers rng; double bucketStartTime = static_cast(startTime); - for (std::size_t i = 0u; i < eventCountsPerBucket.size(); ++i) + for (auto count : eventCountsPerBucket) { double bucketEndTime = bucketStartTime + static_cast(bucketLength); TDoubleVec bucketEventTimes; rng.generateUniformSamples(bucketStartTime, bucketEndTime - 1.0, - static_cast(eventCountsPerBucket[i]), + static_cast(count), bucketEventTimes); std::sort(bucketEventTimes.begin(), bucketEventTimes.end()); - for (std::size_t j = 0u; j < bucketEventTimes.size(); ++j) + for (auto time_ : bucketEventTimes) { - core_t::TTime time = static_cast(bucketEventTimes[j]); + core_t::TTime time = static_cast(time_); time = std::min(static_cast(bucketEndTime - 1.0), std::max(static_cast(bucketStartTime), time)); eventArrivalTimes.push_back(time); @@ -127,43 +127,32 @@ void generateSporadicEvents(const core_t::TTime &startTime, // Generate an ordered collection of event arrival times. test::CRandomNumbers rng; double bucketStartTime = static_cast(startTime); - for (std::size_t i = 0u; i < nonZeroEventCountsPerBucket.size(); ++i) + for (auto count : nonZeroEventCountsPerBucket) { double bucketEndTime = bucketStartTime + static_cast(bucketLength); TDoubleVec bucketEventTimes; rng.generateUniformSamples(bucketStartTime, bucketEndTime - 1.0, - static_cast(nonZeroEventCountsPerBucket[i]), + static_cast(count), bucketEventTimes); std::sort(bucketEventTimes.begin(), bucketEventTimes.end()); - for (std::size_t j = 0u; j < bucketEventTimes.size(); ++j) + for (auto time_ : bucketEventTimes) { - core_t::TTime time = static_cast(bucketEventTimes[j]); + core_t::TTime time = static_cast(time_); time = std::min(static_cast(bucketEndTime - 1.0), std::max(static_cast(bucketStartTime), time)); eventArrivalTimes.push_back(time); } TDoubleVec gap; - rng.generateUniformSamples(0.0, 10.0 * static_cast(bucketLength), 1u, gap); - bucketStartTime += static_cast(bucketLength) - * ::ceil(gap[0] / static_cast(bucketLength)); + rng.generateUniformSamples(0.0, 10.0, 1u, gap); + bucketStartTime += static_cast(bucketLength) * ::ceil(gap[0]); } } -class CTimeLess -{ - public: - bool operator()(const CEventData &lhs, - const CEventData &rhs) const - { - return lhs.time() < rhs.time(); - } -}; - std::size_t addPerson(const std::string &p, const CModelFactory::TDataGathererPtr &gatherer, CResourceMonitor &resourceMonitor) @@ -184,7 +173,7 @@ std::size_t addPersonWithInfluence(const std::string &p, std::string i("i"); CDataGatherer::TStrCPtrVec person; person.push_back(&p); - for (std::size_t j = 0; j < numInfluencers; j++) + for (std::size_t j = 0; j < numInfluencers; ++j) { person.push_back(&i); } @@ -1266,7 +1255,9 @@ void CEventRateModelTest::testPrune(void) } } } - std::sort(events.begin(), events.end(), CTimeLess()); + std::sort(events.begin(), events.end(), + [](const CEventData &lhs, + const CEventData &rhs) { return lhs.time() < rhs.time(); }); TEventDataVec expectedEvents; expectedEvents.reserve(events.size()); @@ -2377,14 +2368,14 @@ void CEventRateModelTest::testSkipSampling(void) // Check priors are the same CPPUNIT_ASSERT_EQUAL( static_cast( - modelWithGap->details()->model(model_t::E_IndividualCountByBucketAndPerson, 0))->prior().checksum(), + modelWithGap->details()->model(model_t::E_IndividualCountByBucketAndPerson, 0))->residualModel().checksum(), static_cast( - modelNoGap->details()->model(model_t::E_IndividualCountByBucketAndPerson, 0))->prior().checksum()); + modelNoGap->details()->model(model_t::E_IndividualCountByBucketAndPerson, 0))->residualModel().checksum()); CPPUNIT_ASSERT_EQUAL( static_cast( - modelWithGap->details()->model(model_t::E_IndividualCountByBucketAndPerson, 1))->prior().checksum(), + modelWithGap->details()->model(model_t::E_IndividualCountByBucketAndPerson, 1))->residualModel().checksum(), static_cast( - modelNoGap->details()->model(model_t::E_IndividualCountByBucketAndPerson, 1))->prior().checksum()); + modelNoGap->details()->model(model_t::E_IndividualCountByBucketAndPerson, 1))->residualModel().checksum()); // Confirm last seen times are only updated by gap duration by forcing p2 to be pruned modelWithGap->sample(1200, 1500, m_ResourceMonitor); @@ -2461,14 +2452,14 @@ void CEventRateModelTest::testExplicitNulls(void) // Check priors are the same CPPUNIT_ASSERT_EQUAL( static_cast( - modelExNullGap->details()->model(model_t::E_IndividualCountByBucketAndPerson, 0))->prior().checksum(), + modelExNullGap->details()->model(model_t::E_IndividualCountByBucketAndPerson, 0))->residualModel().checksum(), static_cast( - modelSkipGap->details()->model(model_t::E_IndividualCountByBucketAndPerson, 0))->prior().checksum()); + modelSkipGap->details()->model(model_t::E_IndividualCountByBucketAndPerson, 0))->residualModel().checksum()); CPPUNIT_ASSERT_EQUAL( static_cast( - modelExNullGap->details()->model(model_t::E_IndividualCountByBucketAndPerson, 1))->prior().checksum(), + modelExNullGap->details()->model(model_t::E_IndividualCountByBucketAndPerson, 1))->residualModel().checksum(), static_cast( - modelSkipGap->details()->model(model_t::E_IndividualCountByBucketAndPerson, 1))->prior().checksum()); + modelSkipGap->details()->model(model_t::E_IndividualCountByBucketAndPerson, 1))->residualModel().checksum()); } void CEventRateModelTest::testInterimCorrections(void) @@ -3014,7 +3005,6 @@ void CEventRateModelTest::testIgnoreSamplingGivenDetectionRules(void) // At the end the checksums for the underlying models should // be the same. - // Create a rule to filter buckets where the count > 100 CRuleCondition condition; condition.type(CRuleCondition::E_NumericalActual); @@ -3104,9 +3094,9 @@ void CEventRateModelTest::testIgnoreSamplingGivenDetectionRules(void) CAnomalyDetectorModel::CModelDetailsViewPtr modelNoSkipView = modelNoSkip->details(); uint64_t withSkipChecksum = static_cast( - modelWithSkipView->model(model_t::E_IndividualCountByBucketAndPerson, 0))->prior().checksum(); + modelWithSkipView->model(model_t::E_IndividualCountByBucketAndPerson, 0))->residualModel().checksum(); uint64_t noSkipChecksum = static_cast( - modelNoSkipView->model(model_t::E_IndividualCountByBucketAndPerson, 0))->prior().checksum(); + modelNoSkipView->model(model_t::E_IndividualCountByBucketAndPerson, 0))->residualModel().checksum(); CPPUNIT_ASSERT_EQUAL(withSkipChecksum, noSkipChecksum); // Check the last value times of the underlying models are the same @@ -3114,12 +3104,12 @@ void CEventRateModelTest::testIgnoreSamplingGivenDetectionRules(void) dynamic_cast(modelNoSkipView->model(model_t::E_IndividualCountByBucketAndPerson, 0)); CPPUNIT_ASSERT(timeSeriesModel != 0); - core_t::TTime time = timeSeriesModel->trend().lastValueTime(); + core_t::TTime time = timeSeriesModel->trendModel().lastValueTime(); CPPUNIT_ASSERT_EQUAL(model_t::sampleTime(model_t::E_IndividualCountByBucketAndPerson, startTime, bucketLength), time); // The last times of model with a skip should be the same timeSeriesModel = dynamic_cast(modelWithSkipView->model(model_t::E_IndividualCountByBucketAndPerson, 0)); - CPPUNIT_ASSERT_EQUAL(time, timeSeriesModel->trend().lastValueTime()); + CPPUNIT_ASSERT_EQUAL(time, timeSeriesModel->trendModel().lastValueTime()); } CppUnit::Test *CEventRateModelTest::suite(void) diff --git a/lib/model/unittest/CEventRatePopulationModelTest.cc b/lib/model/unittest/CEventRatePopulationModelTest.cc index a9dd57b821..cebec9ae9e 100644 --- a/lib/model/unittest/CEventRatePopulationModelTest.cc +++ b/lib/model/unittest/CEventRatePopulationModelTest.cc @@ -1302,14 +1302,14 @@ void CEventRatePopulationModelTest::testSkipSampling() // Check priors are the same CPPUNIT_ASSERT_EQUAL( static_cast( - modelWithGap->details()->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 0))->prior().checksum(), + modelWithGap->details()->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 0))->residualModel().checksum(), static_cast( - modelNoGap->details()->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 0))->prior().checksum()); + modelNoGap->details()->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 0))->residualModel().checksum()); CPPUNIT_ASSERT_EQUAL( static_cast( - modelWithGap->details()->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 1))->prior().checksum(), + modelWithGap->details()->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 1))->residualModel().checksum(), static_cast( - modelNoGap->details()->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 1))->prior().checksum()); + modelNoGap->details()->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 1))->residualModel().checksum()); // Confirm last seen times are only updated by gap duration by forcing p2 and a2 to be pruned modelWithGap->sample(1200, 1500, m_ResourceMonitor); @@ -1593,23 +1593,23 @@ void CEventRatePopulationModelTest::testIgnoreSamplingGivenDetectionRules(void) dynamic_cast(modelNoSkipView->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 0)); CPPUNIT_ASSERT(timeSeriesModel != 0); - core_t::TTime time = timeSeriesModel->trend().lastValueTime(); + core_t::TTime time = timeSeriesModel->trendModel().lastValueTime(); CPPUNIT_ASSERT_EQUAL(model_t::sampleTime(model_t::E_PopulationCountByBucketPersonAndAttribute, 200, bucketLength), time); // The last times of the underlying time series models should all be the same timeSeriesModel = dynamic_cast(modelNoSkipView->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 1)); - CPPUNIT_ASSERT_EQUAL(time, timeSeriesModel->trend().lastValueTime()); + CPPUNIT_ASSERT_EQUAL(time, timeSeriesModel->trendModel().lastValueTime()); timeSeriesModel = dynamic_cast(modelNoSkipView->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 2)); - CPPUNIT_ASSERT_EQUAL(time, timeSeriesModel->trend().lastValueTime()); + CPPUNIT_ASSERT_EQUAL(time, timeSeriesModel->trendModel().lastValueTime()); timeSeriesModel = dynamic_cast(modelWithSkipView->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 0)); - CPPUNIT_ASSERT_EQUAL(time, timeSeriesModel->trend().lastValueTime()); + CPPUNIT_ASSERT_EQUAL(time, timeSeriesModel->trendModel().lastValueTime()); timeSeriesModel = dynamic_cast(modelWithSkipView->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 1)); - CPPUNIT_ASSERT_EQUAL(time, timeSeriesModel->trend().lastValueTime()); + CPPUNIT_ASSERT_EQUAL(time, timeSeriesModel->trendModel().lastValueTime()); timeSeriesModel = dynamic_cast(modelWithSkipView->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 2)); - CPPUNIT_ASSERT_EQUAL(time, timeSeriesModel->trend().lastValueTime()); + CPPUNIT_ASSERT_EQUAL(time, timeSeriesModel->trendModel().lastValueTime()); timeSeriesModel = dynamic_cast(modelWithSkipView->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 3)); - CPPUNIT_ASSERT_EQUAL(time, timeSeriesModel->trend().lastValueTime()); + CPPUNIT_ASSERT_EQUAL(time, timeSeriesModel->trendModel().lastValueTime()); } CppUnit::Test *CEventRatePopulationModelTest::suite(void) diff --git a/lib/model/unittest/CMetricModelTest.cc b/lib/model/unittest/CMetricModelTest.cc index c8f3c153bd..d3cbf447f7 100644 --- a/lib/model/unittest/CMetricModelTest.cc +++ b/lib/model/unittest/CMetricModelTest.cc @@ -696,7 +696,7 @@ void CMetricModelTest::testMultivariateSample(void) CPPUNIT_ASSERT(latLong == multivariateFeatureData(model, model_t::E_IndividualMeanLatLongByPerson, 0, time)); CPPUNIT_ASSERT_EQUAL(expectedMeanPrior->checksum(), dynamic_cast( - model.details()->model(model_t::E_IndividualMeanLatLongByPerson, 0))->prior().checksum()); + model.details()->model(model_t::E_IndividualMeanLatLongByPerson, 0))->residualModel().checksum()); // Test persistence. (We check for idempotency.) std::string origXml; @@ -1758,9 +1758,9 @@ void CMetricModelTest::testSkipSampling(void) CPPUNIT_ASSERT_EQUAL( static_cast( - modelNoGap.details()->model(model_t::E_IndividualSumByBucketAndPerson, 0))->prior().checksum(), + modelNoGap.details()->model(model_t::E_IndividualSumByBucketAndPerson, 0))->residualModel().checksum(), static_cast( - modelWithGap.details()->model(model_t::E_IndividualSumByBucketAndPerson, 0))->prior().checksum()); + modelWithGap.details()->model(model_t::E_IndividualSumByBucketAndPerson, 0))->residualModel().checksum()); } void CMetricModelTest::testExplicitNulls(void) @@ -1833,9 +1833,9 @@ void CMetricModelTest::testExplicitNulls(void) CPPUNIT_ASSERT_EQUAL( static_cast( - modelSkipGap.details()->model(model_t::E_IndividualSumByBucketAndPerson, 0))->prior().checksum(), + modelSkipGap.details()->model(model_t::E_IndividualSumByBucketAndPerson, 0))->residualModel().checksum(), static_cast( - modelExNullGap.details()->model(model_t::E_IndividualSumByBucketAndPerson, 0))->prior().checksum()); + modelExNullGap.details()->model(model_t::E_IndividualSumByBucketAndPerson, 0))->residualModel().checksum()); } void CMetricModelTest::testVarp(void) From eac04e90b4e5733521a82b0565ab5eff8fd3c8d7 Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Mon, 12 Mar 2018 10:54:45 +0000 Subject: [PATCH 03/29] [ML] First pass implementation of support functionality for change detection and modelling (#9) This implements 1) a naive Bayes classifier, using our distribution models, which will be used for modelling the probability of a change, and 2) a change detector framework, currently supporting detecting level shifts and time shifts, which works by comparing BIC of the various possible hypotheses against one another and a null hypothesis that there is no change. --- include/maths/CNaiveBayes.h | 228 ++++++++ include/maths/CTimeSeriesChangeDetector.h | 376 +++++++++++++ lib/maths/CNaiveBayes.cc | 355 ++++++++++++ lib/maths/CTimeSeriesChangeDetector.cc | 509 ++++++++++++++++++ lib/maths/CTimeSeriesModel.cc | 1 - lib/maths/CTrendComponent.cc | 2 +- lib/maths/Makefile | 2 + lib/maths/unittest/CNaiveBayesTest.cc | 374 +++++++++++++ lib/maths/unittest/CNaiveBayesTest.h | 23 + .../unittest/CNaturalBreaksClassifierTest.h | 1 - .../unittest/CTimeSeriesChangeDetectorTest.cc | 352 ++++++++++++ .../unittest/CTimeSeriesChangeDetectorTest.h | 39 ++ lib/maths/unittest/Main.cc | 4 + lib/maths/unittest/Makefile | 2 + 14 files changed, 2265 insertions(+), 3 deletions(-) create mode 100644 include/maths/CNaiveBayes.h create mode 100644 include/maths/CTimeSeriesChangeDetector.h create mode 100644 lib/maths/CNaiveBayes.cc create mode 100644 lib/maths/CTimeSeriesChangeDetector.cc create mode 100644 lib/maths/unittest/CNaiveBayesTest.cc create mode 100644 lib/maths/unittest/CNaiveBayesTest.h create mode 100644 lib/maths/unittest/CTimeSeriesChangeDetectorTest.cc create mode 100644 lib/maths/unittest/CTimeSeriesChangeDetectorTest.h diff --git a/include/maths/CNaiveBayes.h b/include/maths/CNaiveBayes.h new file mode 100644 index 0000000000..2f1997cc2c --- /dev/null +++ b/include/maths/CNaiveBayes.h @@ -0,0 +1,228 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +#ifndef INCLUDED_ml_maths_CNaiveBayes_h +#define INCLUDED_ml_maths_CNaiveBayes_h + +#include + +#include + +#include + +#include +#include + +namespace ml +{ +namespace core +{ +class CStatePersistInserter; +class CStateRestoreTraverser; +} +namespace maths +{ +struct SDistributionRestoreParams; + +//! \brief The interface expected by CNaiveBayes for implementations +//! of the class conditional density functions. +class MATHS_EXPORT CNaiveBayesFeatureDensity +{ + public: + using TDouble1Vec = core::CSmallVector; + + public: + virtual ~CNaiveBayesFeatureDensity() = default; + + //! Create and return a clone. + //! + //! \note The caller owns this. + virtual CNaiveBayesFeatureDensity *clone() const = 0; + + //! Initialize by reading state from \p traverser. + virtual bool acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, + core::CStateRestoreTraverser &traverser) = 0; + + //! Persist state by passing information to \p inserter. + virtual void acceptPersistInserter(core::CStatePersistInserter &inserter) const = 0; + + //! Add the value \p x. + virtual void add(const TDouble1Vec &x) = 0; + + //! Compute the log value of the density function at \p x. + virtual double logValue(const TDouble1Vec &x) const = 0; + + //! Age out old values density to account for \p time passing. + virtual void propagateForwardsByTime(double time) = 0; + + //! Debug the memory used by this object. + virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const = 0; + + //! Get the static size of this object. + virtual std::size_t staticSize() const = 0; + + //! Get the memory used by this object. + virtual std::size_t memoryUsage() const = 0; + + //! Get a checksum for this object. + virtual uint64_t checksum(uint64_t seed) const = 0; +}; + +//! \brief An implementation of the class conditional density function +//! based on the CPrior hierarchy. +class MATHS_EXPORT CNaiveBayesFeatureDensityFromPrior final : public CNaiveBayesFeatureDensity +{ + public: + CNaiveBayesFeatureDensityFromPrior() = default; + CNaiveBayesFeatureDensityFromPrior(CPrior &prior); + + //! Create and return a clone. + //! + //! \note The caller owns this. + virtual CNaiveBayesFeatureDensityFromPrior *clone() const; + + //! Initialize by reading state from \p traverser. + virtual bool acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, + core::CStateRestoreTraverser &traverser); + + //! Persist state by passing information to \p inserter. + virtual void acceptPersistInserter(core::CStatePersistInserter &inserter) const; + + //! Add the value \p x. + virtual void add(const TDouble1Vec &x); + + //! Compute the log value of the density function at \p x. + virtual double logValue(const TDouble1Vec &x) const; + + //! Age out old values density to account for \p time passing. + virtual void propagateForwardsByTime(double time); + + //! Debug the memory used by this object. + virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + + //! Get the static size of this object. + virtual std::size_t staticSize() const; + + //! Get the memory used by this object. + virtual std::size_t memoryUsage() const; + + //! Get a checksum for this object. + virtual uint64_t checksum(uint64_t seed) const; + + private: + using TPriorPtr = boost::shared_ptr; + + private: + //! The density model. + TPriorPtr m_Prior; +}; + +//! \brief Implements a Naive Bayes classifier. +class MATHS_EXPORT CNaiveBayes +{ + public: + using TDoubleSizePr = std::pair; + using TDoubleSizePrVec = std::vector; + using TDouble1Vec = core::CSmallVector; + using TDouble1VecVec = std::vector; + + public: + explicit CNaiveBayes(const CNaiveBayesFeatureDensity &exemplar, + double decayRate = 0.0); + CNaiveBayes(const SDistributionRestoreParams ¶ms, + core::CStateRestoreTraverser &traverser); + + //! Persist state by passing information to \p inserter. + void acceptPersistInserter(core::CStatePersistInserter &inserter) const; + + //! This can be used to optionally seed the class counts + //! with \p counts. These are added on to data class counts + //! to compute the class posterior probabilities. + void initialClassCounts(const TDoubleSizePrVec &counts); + + //! Add a training data point comprising the pair \f$(x,l)\f$ + //! for feature vector \f$x\f$ and class label \f$l\f$. + //! + //! \param[in] label The class label for \p x. + //! \param[in] x The feature values. + //! \note \p x size should be equal to the number of features. + //! A feature is missing is indicated by passing an empty vector + //! for that feature. + void addTrainingDataPoint(std::size_t label, const TDouble1VecVec &x); + + //! Age out old values from the class conditional densities + //! to account for \p time passing. + void propagateForwardsByTime(double time); + + //! Get the top \p n class probabilities for \p features. + //! + //! \param[in] n The number of class probabilities to estimate. + //! \param[in] x The feature values. + //! \note \p x size should be equal to the number of features. + //! A feature is missing is indicated by passing an empty vector + //! for that feature. + TDoubleSizePrVec highestClassProbabilities(std::size_t n, + const TDouble1VecVec &x) const; + + //! Debug the memory used by this object. + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + + //! Get the memory used by this object. + std::size_t memoryUsage() const; + + //! Get a checksum for this object. + uint64_t checksum(uint64_t seed = 0) const; + + private: + using TFeatureDensityPtr = boost::shared_ptr; + using TFeatureDensityPtrVec = std::vector; + + //! \brief The data associated with a class. + struct SClass + { + //! Initialize by reading state from \p traverser. + bool acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, + core::CStateRestoreTraverser &traverser); + //! Persist state by passing information to \p inserter. + void acceptPersistInserter(core::CStatePersistInserter &inserter) const; + //! Debug the memory used by this object. + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + //! Get the memory used by this object. + std::size_t memoryUsage() const; + //! Get a checksum for this object. + uint64_t checksum(uint64_t seed = 0) const; + + //! The number of examples in this class. + double s_Count = 0.0; + //! The feature conditional densities for this class. + TFeatureDensityPtrVec s_ConditionalDensities; + }; + + using TSizeClassUMap = boost::unordered_map; + + private: + //! Initialize by reading state from \p traverser. + bool acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, + core::CStateRestoreTraverser &traverser); + + //! Validate \p x. + bool validate(const TDouble1VecVec &x) const; + + private: + //! Controls the rate at which data are aged out. + double m_DecayRate; + + //! An exemplar for creating conditional densities. + TFeatureDensityPtr m_Exemplar; + + //! The class conditional density estimates and weights. + TSizeClassUMap m_ClassConditionalDensities; +}; + +} +} + +#endif // INCLUDED_ml_maths_CNaiveBayes_h diff --git a/include/maths/CTimeSeriesChangeDetector.h b/include/maths/CTimeSeriesChangeDetector.h new file mode 100644 index 0000000000..68471affb2 --- /dev/null +++ b/include/maths/CTimeSeriesChangeDetector.h @@ -0,0 +1,376 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +#ifndef INCLUDED_ml_maths_CTimeSeriesChangeDetector_h +#define INCLUDED_ml_maths_CTimeSeriesChangeDetector_h + +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +namespace ml +{ +namespace core +{ +class CStatePersistInserter; +class CStateRestoreTraverser; +} +namespace maths +{ +class CModelAddSamplesParams; +class CPrior; +class CTimeSeriesDecompositionInterface; +struct SDistributionRestoreParams; + +namespace time_series_change_detector_detail +{ +class CUnivariateTimeSeriesChangeModel; +} + +//! \brief A description of a time series change. +struct MATHS_EXPORT SChangeDescription +{ + using TDouble2Vec = core::CSmallVector; + + //! The types of change we can detect. + enum EDescription + { + E_LevelShift, + E_TimeShift + }; + + SChangeDescription(EDescription decription, double value); + + //! The type of change. + EDescription s_Description; + + //! The change value. + TDouble2Vec s_Value; +}; + +//! \brief Tests a variety of possible changes which might have +//! occurred in a time series and selects one if it provides a +//! good explanation of the recent behaviour. +class MATHS_EXPORT CUnivariateTimeSeriesChangeDetector +{ + public: + using TTimeDoublePr = std::pair; + using TTimeDoublePr1Vec = core::CSmallVector; + using TDouble4Vec = core::CSmallVector; + using TDouble4Vec1Vec = core::CSmallVector; + using TWeightStyleVec = maths_t::TWeightStyleVec; + using TOptionalChangeDescription = boost::optional; + using TPriorPtr = boost::shared_ptr; + + public: + CUnivariateTimeSeriesChangeDetector(const CTimeSeriesDecompositionInterface &trendModel, + const TPriorPtr &residualModel, + core_t::TTime minimumTimeToDetect, + core_t::TTime maximumTimeToDetect, + double minimumDeltaBicToDetect); + + //! Initialize by reading state from \p traverser. + bool acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, + core::CStateRestoreTraverser &traverser); + + //! Persist state by passing information to \p inserter. + void acceptPersistInserter(core::CStatePersistInserter &inserter) const; + + //! Check if there has been a change and get a description + //! if there has been. + TOptionalChangeDescription change(); + + //! Add \p samples to the change detector. + void addSamples(maths_t::EDataType dataType, + const TWeightStyleVec &weightStyles, + const TTimeDoublePr1Vec &samples, + const TDouble4Vec1Vec &weights, + double propagationInterval = 1.0); + + //! Check if we should stop testing. + bool stopTesting() const; + + //! Debug the memory used by this object. + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + + //! Get the memory used by this object. + std::size_t memoryUsage() const; + + //! Get a checksum for this object. + uint64_t checksum(uint64_t seed = 0) const; + + private: + using TUnivariateTimeSeriesChangeModel = + time_series_change_detector_detail::CUnivariateTimeSeriesChangeModel; + using TChangeModelPtr = boost::shared_ptr; + using TChangeModelPtr4Vec = core::CSmallVector; + using TMinMaxAccumulator = CBasicStatistics::CMinMax; + + private: + //! The minimum amount of time we need to observe before + //! selecting a change model. + core_t::TTime m_MinimumTimeToDetect; + + //! The maximum amount of time to try to detect a change. + core_t::TTime m_MaximumTimeToDetect; + + //! The minimum increase in BIC select a change model. + double m_MinimumDeltaBicToDetect; + + //! The start and end of the change model. + TMinMaxAccumulator m_TimeRange; + + //! The count of samples added to the change models. + std::size_t m_SampleCount; + + //! The current evidence of a change. + double m_CurrentEvidenceOfChange; + + //! The change models. + TChangeModelPtr4Vec m_ChangeModels; +}; + +namespace time_series_change_detector_detail +{ + +//! \brief Helper interface for change detection. Implementations of +//! this are used to model specific types of changes which can occur. +class MATHS_EXPORT CUnivariateTimeSeriesChangeModel : private core::CNonCopyable +{ + public: + using TTimeDoublePr = std::pair; + using TTimeDoublePr1Vec = core::CSmallVector; + using TDouble4Vec = core::CSmallVector; + using TDouble4Vec1Vec = core::CSmallVector; + using TWeightStyleVec = maths_t::TWeightStyleVec; + using TPriorPtr = boost::shared_ptr; + using TOptionalChangeDescription = boost::optional; + + public: + CUnivariateTimeSeriesChangeModel(const CTimeSeriesDecompositionInterface &trendModel); + virtual ~CUnivariateTimeSeriesChangeModel() = default; + + //! Initialize by reading state from \p traverser. + virtual bool acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, + core::CStateRestoreTraverser &traverser) = 0; + + //! Persist state by passing information to \p inserter. + virtual void acceptPersistInserter(core::CStatePersistInserter &inserter) const = 0; + + //! The BIC of applying the change. + virtual double bic() const = 0; + + //! Get a description of the change. + virtual TOptionalChangeDescription change() const = 0; + + //! Update the change model with \p samples. + virtual void addSamples(std::size_t count, + maths_t::EDataType dataType, + const TWeightStyleVec &weightStyles, + const TTimeDoublePr1Vec &samples, + const TDouble4Vec1Vec &weights, + double propagationInterval = 1.0) = 0; + + //! Debug the memory used by this object. + virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const = 0; + + //! Get the static size of this object. + virtual std::size_t staticSize() const = 0; + + //! Get the memory used by this object. + virtual std::size_t memoryUsage() const = 0; + + //! Get a checksum for this object. + virtual uint64_t checksum(uint64_t seed) const = 0; + + protected: + //! Get the log-likelihood. + double logLikelihood() const; + + //! Update the data log-likelihood with \p logLikelihood. + void addLogLikelihood(double logLikelihood); + + //! Get the time series trend model. + const CTimeSeriesDecompositionInterface &trendModel() const; + + private: + //! The likelihood of the data under this model. + double m_LogLikelihood; + + //! A model decomposing the time series trend. + const CTimeSeriesDecompositionInterface &m_TrendModel; +}; + +//! \brief Used to capture the likelihood of the data given no change. +class MATHS_EXPORT CUnivariateNoChangeModel final : public CUnivariateTimeSeriesChangeModel +{ + public: + CUnivariateNoChangeModel(const CTimeSeriesDecompositionInterface &trendModel, + const TPriorPtr &residualModel); + + //! Initialize by reading state from \p traverser. + virtual bool acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, + core::CStateRestoreTraverser &traverser); + + //! Persist state by passing information to \p inserter. + virtual void acceptPersistInserter(core::CStatePersistInserter &inserter) const; + + //! Returns the no change BIC. + virtual double bic() const; + + //! Returns a null object. + virtual TOptionalChangeDescription change() const; + + //! Get the log likelihood of \p samples. + virtual void addSamples(std::size_t count, + maths_t::EDataType dataType, + const TWeightStyleVec &weightStyles, + const TTimeDoublePr1Vec &samples, + const TDouble4Vec1Vec &weights, + double propagationInterval = 1.0); + + //! Debug the memory used by this object. + virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + + //! Get the static size of this object. + virtual std::size_t staticSize() const; + + //! Get the memory used by this object. + virtual std::size_t memoryUsage() const; + + //! Get a checksum for this object. + virtual uint64_t checksum(uint64_t seed) const; + + private: + //! A reference to the underlying prior. + TPriorPtr m_ResidualModel; +}; + +//! \brief Captures the likelihood of the data given an arbitrary +//! level shift. +class MATHS_EXPORT CUnivariateTimeSeriesLevelShiftModel final : public CUnivariateTimeSeriesChangeModel +{ + public: + CUnivariateTimeSeriesLevelShiftModel(const CTimeSeriesDecompositionInterface &trendModel, + const TPriorPtr &residualModel); + + //! Initialize by reading state from \p traverser. + virtual bool acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, + core::CStateRestoreTraverser &traverser); + + //! Persist state by passing information to \p inserter. + virtual void acceptPersistInserter(core::CStatePersistInserter &inserter) const; + + //! The BIC of applying the level shift. + virtual double bic() const; + + //! Get a description of the level shift. + virtual TOptionalChangeDescription change() const; + + //! Update with \p samples. + virtual void addSamples(std::size_t count, + maths_t::EDataType dataType, + const TWeightStyleVec &weightStyles, + const TTimeDoublePr1Vec &samples, + const TDouble4Vec1Vec &weights, + double propagationInterval = 1.0); + + //! Debug the memory used by this object. + virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + + //! Get the static size of this object. + virtual std::size_t staticSize() const; + + //! Get the memory used by this object. + virtual std::size_t memoryUsage() const; + + //! Get a checksum for this object. + virtual uint64_t checksum(uint64_t seed) const; + + private: + using TDoubleVec = std::vector; + using TMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; + + private: + //! The optimal shift. + TMeanAccumulator m_Shift; + + //! Get the number of samples. + double m_SampleCount; + + //! The prior for the time series' residual model subject + //! to the shift. + TPriorPtr m_ResidualModel; + + //! The initial residual model mode. + double m_ResidualModelMode; +}; + +//! \brief Captures the likelihood of the data given a specified +//! time shift. +class MATHS_EXPORT CUnivariateTimeSeriesTimeShiftModel final : public CUnivariateTimeSeriesChangeModel +{ + public: + CUnivariateTimeSeriesTimeShiftModel(const CTimeSeriesDecompositionInterface &trendModel, + const TPriorPtr &residualModel, + core_t::TTime shift); + + //! Initialize by reading state from \p traverser. + virtual bool acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, + core::CStateRestoreTraverser &traverser); + + //! Persist state by passing information to \p inserter. + virtual void acceptPersistInserter(core::CStatePersistInserter &inserter) const; + + //! The BIC of applying the time shift. + virtual double bic() const; + + //! Get a description of the time shift. + virtual TOptionalChangeDescription change() const; + + //! Update with \p samples. + virtual void addSamples(std::size_t count, + maths_t::EDataType dataType, + const TWeightStyleVec &weightStyles, + const TTimeDoublePr1Vec &samples, + const TDouble4Vec1Vec &weights, + double propagationInterval = 1.0); + + //! Debug the memory used by this object. + virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + + //! Get the static size of this object. + virtual std::size_t staticSize() const; + + //! Get the memory used by this object. + virtual std::size_t memoryUsage() const; + + //! Get a checksum for this object. + virtual uint64_t checksum(uint64_t seed) const; + + private: + //! The shift in time of the time series trend model. + core_t::TTime m_Shift; + + //! The prior for the time series' residual model subject + //! to the shift. + TPriorPtr m_ResidualModel; +}; + +} + +} +} + +#endif // INCLUDED_ml_maths_CTimeSeriesChangeDetector_h diff --git a/lib/maths/CNaiveBayes.cc b/lib/maths/CNaiveBayes.cc new file mode 100644 index 0000000000..68690fd90d --- /dev/null +++ b/lib/maths/CNaiveBayes.cc @@ -0,0 +1,355 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +namespace ml +{ +namespace maths +{ +namespace +{ +const std::string PRIOR_TAG{"a"}; +const std::string CLASS_LABEL_TAG{"b"}; +const std::string CLASS_MODEL_TAG{"c"}; +const std::string COUNT_TAG{"d"}; +const std::string CONDITIONAL_DENSITY_FROM_PRIOR_TAG{"e"}; +} + +CNaiveBayesFeatureDensityFromPrior::CNaiveBayesFeatureDensityFromPrior(CPrior &prior) : + m_Prior(prior.clone()) +{} + +void CNaiveBayesFeatureDensityFromPrior::add(const TDouble1Vec &x) +{ + m_Prior->addSamples(CConstantWeights::COUNT, x, CConstantWeights::SINGLE_UNIT); +} + +CNaiveBayesFeatureDensityFromPrior *CNaiveBayesFeatureDensityFromPrior::clone() const +{ + return new CNaiveBayesFeatureDensityFromPrior(*m_Prior); +} + +bool CNaiveBayesFeatureDensityFromPrior::acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, + core::CStateRestoreTraverser &traverser) +{ + do + { + const std::string &name{traverser.name()}; + RESTORE(PRIOR_TAG, traverser.traverseSubLevel(boost::bind( + CPriorStateSerialiser(), + boost::cref(params), boost::ref(m_Prior), _1))); + } + while (traverser.next()); + return true; +} + +void CNaiveBayesFeatureDensityFromPrior::acceptPersistInserter(core::CStatePersistInserter &inserter) const +{ + inserter.insertLevel(PRIOR_TAG, boost::bind(CPriorStateSerialiser(), + boost::cref(*m_Prior), _1)); +} + +double CNaiveBayesFeatureDensityFromPrior::logValue(const TDouble1Vec &x) const +{ + double result; + if (m_Prior->jointLogMarginalLikelihood(CConstantWeights::COUNT, x, + CConstantWeights::SINGLE_UNIT, + result) != maths_t::E_FpNoErrors) + { + LOG_ERROR("Bad value density value for " << x); + return boost::numeric::bounds::lowest(); + } + return result; +} + +void CNaiveBayesFeatureDensityFromPrior::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const +{ + return core::CMemoryDebug::dynamicSize("m_Prior", m_Prior, mem); +} + +std::size_t CNaiveBayesFeatureDensityFromPrior::staticSize() const +{ + return sizeof(*this); +} + +std::size_t CNaiveBayesFeatureDensityFromPrior::memoryUsage() const +{ + return core::CMemory::dynamicSize(m_Prior); +} + +void CNaiveBayesFeatureDensityFromPrior::propagateForwardsByTime(double time) +{ + m_Prior->propagateForwardsByTime(time); +} + +uint64_t CNaiveBayesFeatureDensityFromPrior::checksum(uint64_t seed) const +{ + return CChecksum::calculate(seed, m_Prior); +} + + +CNaiveBayes::CNaiveBayes(const CNaiveBayesFeatureDensity &exemplar, double decayRate) : + m_DecayRate{decayRate}, + m_Exemplar{exemplar.clone()}, + m_ClassConditionalDensities{2} +{} + +CNaiveBayes::CNaiveBayes(const SDistributionRestoreParams ¶ms, + core::CStateRestoreTraverser &traverser) : + m_DecayRate{params.s_DecayRate}, + m_ClassConditionalDensities{2} +{ + traverser.traverseSubLevel(boost::bind(&CNaiveBayes::acceptRestoreTraverser, + this, boost::cref(params), _1)); +} + +bool CNaiveBayes::acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, + core::CStateRestoreTraverser &traverser) +{ + std::size_t label; + do + { + const std::string &name{traverser.name()}; + RESTORE_BUILT_IN(CLASS_LABEL_TAG, label) + RESTORE_SETUP_TEARDOWN(CLASS_MODEL_TAG, + SClass class_, + traverser.traverseSubLevel(boost::bind( + &SClass::acceptRestoreTraverser, + boost::ref(class_), boost::cref(params), _1)), + m_ClassConditionalDensities.emplace(label, class_)) + } + while (traverser.next()); + return true; +} + +void CNaiveBayes::acceptPersistInserter(core::CStatePersistInserter &inserter) const +{ + using TSizeClassUMapCItr = TSizeClassUMap::const_iterator; + using TSizeClassUMapCItrVec = std::vector; + TSizeClassUMapCItrVec classes; + classes.reserve(m_ClassConditionalDensities.size()); + for (auto i = m_ClassConditionalDensities.begin(); i != m_ClassConditionalDensities.end(); ++i) + { + classes.push_back(i); + } + std::sort(classes.begin(), classes.end(), core::CFunctional::SDereference()); + for (const auto &class_ : classes) + { + inserter.insertValue(CLASS_LABEL_TAG, class_->first); + inserter.insertLevel(CLASS_MODEL_TAG, boost::bind(&SClass::acceptPersistInserter, + boost::ref(class_->second), _1)); + } +} + +void CNaiveBayes::initialClassCounts(const TDoubleSizePrVec &counts) +{ + for (const auto &count : counts) + { + m_ClassConditionalDensities[count.second] = SClass{count.first, {}}; + } +} + +void CNaiveBayes::addTrainingDataPoint(std::size_t label, const TDouble1VecVec &x) +{ + if (!this->validate(x)) + { + return; + } + + auto &class_ = m_ClassConditionalDensities[label]; + + if (class_.s_ConditionalDensities.empty()) + { + class_.s_ConditionalDensities.reserve(x.size()); + std::generate_n(std::back_inserter(class_.s_ConditionalDensities), + x.size(), + [this]() { return TFeatureDensityPtr{m_Exemplar->clone()}; }); + } + + bool updateCount{false}; + for (std::size_t i = 0u; i < x.size(); ++i) + { + if (x[i].size() > 0) + { + class_.s_ConditionalDensities[i]->add(x[i]); + updateCount = true; + } + } + + if (updateCount) + { + class_.s_Count += 1.0; + } + else + { + LOG_TRACE("Ignoring empty feature vector"); + } +} + +void CNaiveBayes::propagateForwardsByTime(double time) +{ + double factor{std::exp(-m_DecayRate * time)}; + for (auto &class_ : m_ClassConditionalDensities) + { + class_.second.s_Count *= factor; + for (auto &density : class_.second.s_ConditionalDensities) + { + density->propagateForwardsByTime(time); + } + } +} + +CNaiveBayes::TDoubleSizePrVec +CNaiveBayes::highestClassProbabilities(std::size_t n, const TDouble1VecVec &x) const +{ + if (!this->validate(x)) + { + return {}; + } + if (m_ClassConditionalDensities.empty()) + { + LOG_ERROR("Trying to compute class probabilities without supplying training data"); + return {}; + } + + TDoubleSizePrVec p; + p.reserve(m_ClassConditionalDensities.size()); + + for (const auto &class_ : m_ClassConditionalDensities) + { + double f{CTools::fastLog(class_.second.s_Count)}; + for (std::size_t i = 0u; i < x.size(); ++i) + { + if (x[i].size() > 0) + { + f += class_.second.s_ConditionalDensities[i]->logValue(x[i]); + } + } + p.emplace_back(f, class_.first); + } + + double scale{std::max_element(p.begin(), p.end())->first}; + double Z{0.0}; + for (auto &pc : p) + { + pc.first = std::exp(pc.first - scale); + Z += pc.first; + } + for (auto &pc : p) + { + pc.first /= Z; + } + + n = std::min(n, p.size()); + std::sort(p.begin(), p.begin() + n, std::greater()); + + return TDoubleSizePrVec{p.begin(), p.begin() + n}; +} + +void CNaiveBayes::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const +{ + core::CMemoryDebug::dynamicSize("m_Exemplar", m_Exemplar, mem); + core::CMemoryDebug::dynamicSize("m_ClassConditionalDensities", + m_ClassConditionalDensities, mem); +} + +std::size_t CNaiveBayes::memoryUsage() const +{ + return core::CMemory::dynamicSize(m_Exemplar) + + core::CMemory::dynamicSize(m_ClassConditionalDensities); +} + +uint64_t CNaiveBayes::checksum(uint64_t seed) const +{ + return CChecksum::calculate(seed, m_ClassConditionalDensities); +} + +bool CNaiveBayes::validate(const TDouble1VecVec &x) const +{ + auto class_ = m_ClassConditionalDensities.begin(); + if ( class_ != m_ClassConditionalDensities.end() + && class_->second.s_ConditionalDensities.size() > 0 + && class_->second.s_ConditionalDensities.size() != x.size()) + { + LOG_ERROR("Unexpected feature vector: " << core::CContainerPrinter::print(x)); + return false; + } + return true; +} + +bool CNaiveBayes::SClass::acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, + core::CStateRestoreTraverser &traverser) +{ + do + { + const std::string &name{traverser.name()}; + RESTORE_BUILT_IN(COUNT_TAG, s_Count) + RESTORE_SETUP_TEARDOWN(CONDITIONAL_DENSITY_FROM_PRIOR_TAG, + CNaiveBayesFeatureDensityFromPrior tmp, + traverser.traverseSubLevel(boost::bind( + &CNaiveBayesFeatureDensityFromPrior::acceptRestoreTraverser, + boost::ref(tmp), boost::cref(params), _1)), + s_ConditionalDensities.emplace_back(tmp.clone())) + // Add other implementation's restore code here. + } + while (traverser.next()); + return true; +} + +void CNaiveBayes::SClass::acceptPersistInserter(core::CStatePersistInserter &inserter) const +{ + inserter.insertValue(COUNT_TAG, s_Count, core::CIEEE754::E_SinglePrecision); + for (const auto &density : s_ConditionalDensities) + { + if (dynamic_cast(density.get())) + { + inserter.insertLevel(CONDITIONAL_DENSITY_FROM_PRIOR_TAG, + boost::bind(&CNaiveBayesFeatureDensity::acceptPersistInserter, + density.get(), _1)); + continue; + } + // Add other implementation's persist code here. + } +} + +void CNaiveBayes::SClass::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const +{ + core::CMemoryDebug::dynamicSize("s_ConditionalDensities", s_ConditionalDensities, mem); +} + +std::size_t CNaiveBayes::SClass::memoryUsage() const +{ + return core::CMemory::dynamicSize(s_ConditionalDensities); +} + +uint64_t CNaiveBayes::SClass::checksum(uint64_t seed) const +{ + seed = CChecksum::calculate(seed, s_Count); + return CChecksum::calculate(seed, s_ConditionalDensities); +} + +} +} diff --git a/lib/maths/CTimeSeriesChangeDetector.cc b/lib/maths/CTimeSeriesChangeDetector.cc new file mode 100644 index 0000000000..0f9e5971d1 --- /dev/null +++ b/lib/maths/CTimeSeriesChangeDetector.cc @@ -0,0 +1,509 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace ml +{ +namespace maths +{ +using namespace time_series_change_detector_detail; + +namespace +{ +using TDouble1Vec = core::CSmallVector; +using TDouble4Vec = core::CSmallVector; +using TDouble4Vec1Vec = core::CSmallVector; +using TOptionalChangeDescription = CUnivariateTimeSeriesChangeDetector::TOptionalChangeDescription; + +const std::string SAMPLE_COUNT_TAG{"a"}; +const std::string MIN_TIME_TAG{"b"}; +const std::string MAX_TIME_TAG{"c"}; +const std::string CHANGE_MODEL_TAG{"d"}; +const std::string LOG_LIKELIHOOD_TAG{"e"}; +const std::string SHIFT_TAG{"f"}; +const std::string RESIDUAL_MODEL_TAG{"g"}; +} + +SChangeDescription::SChangeDescription(EDescription description, double value) : + s_Description{description}, s_Value{value} +{} + +CUnivariateTimeSeriesChangeDetector::CUnivariateTimeSeriesChangeDetector(const CTimeSeriesDecompositionInterface &trendModel, + const TPriorPtr &residualModel, + core_t::TTime minimumTimeToDetect, + core_t::TTime maximumTimeToDetect, + double minimumDeltaBicToDetect) : + m_MinimumTimeToDetect{minimumTimeToDetect}, + m_MaximumTimeToDetect{maximumTimeToDetect}, + m_MinimumDeltaBicToDetect{minimumDeltaBicToDetect}, + m_SampleCount{0}, + m_CurrentEvidenceOfChange{0.0}, + m_ChangeModels{boost::make_shared(trendModel, residualModel), + boost::make_shared(trendModel, residualModel), + boost::make_shared(trendModel, residualModel, -core::constants::HOUR), + boost::make_shared(trendModel, residualModel, +core::constants::HOUR)} +{} + +bool CUnivariateTimeSeriesChangeDetector::acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, + core::CStateRestoreTraverser &traverser) +{ + auto model = m_ChangeModels.begin(); + do + { + const std::string name{traverser.name()}; + RESTORE_BUILT_IN(SAMPLE_COUNT_TAG, m_SampleCount) + RESTORE_SETUP_TEARDOWN(MIN_TIME_TAG, + core_t::TTime time, + core::CStringUtils::stringToType(traverser.value(), time), + m_TimeRange.add(time)) + RESTORE_SETUP_TEARDOWN(MAX_TIME_TAG, + core_t::TTime time, + core::CStringUtils::stringToType(traverser.value(), time), + m_TimeRange.add(time)) + RESTORE(CHANGE_MODEL_TAG, traverser.traverseSubLevel(boost::bind( + &CUnivariateTimeSeriesChangeModel::acceptRestoreTraverser, + (model++)->get(), boost::cref(params), _1))) + } + while (traverser.next()); + return true; +} + +void CUnivariateTimeSeriesChangeDetector::acceptPersistInserter(core::CStatePersistInserter &inserter) const +{ + inserter.insertValue(SAMPLE_COUNT_TAG, m_SampleCount); + inserter.insertValue(MIN_TIME_TAG, m_TimeRange.min()); + inserter.insertValue(MAX_TIME_TAG, m_TimeRange.max()); + for (const auto &model : m_ChangeModels) + { + inserter.insertLevel(CHANGE_MODEL_TAG, + boost::bind(&CUnivariateTimeSeriesChangeModel::acceptPersistInserter, + model.get(), _1)); + } +} + +TOptionalChangeDescription CUnivariateTimeSeriesChangeDetector::change() +{ + using TChangeModelPtr4VecCItr = TChangeModelPtr4Vec::const_iterator; + using TDoubleChangeModelPtr4VecCItrPr = std::pair; + using TMinAccumulator = CBasicStatistics::COrderStatisticsStack; + + if (m_TimeRange.range() > m_MinimumTimeToDetect) + { + double noChangeBic{m_ChangeModels[0]->bic()}; + TMinAccumulator candidates; + for (auto i = m_ChangeModels.begin() + 1; i != m_ChangeModels.end(); ++i) + { + candidates.add({(*i)->bic(), i}); + } + candidates.sort(); + + double evidences[]{noChangeBic - candidates[0].first, + noChangeBic - candidates[1].first}; + m_CurrentEvidenceOfChange = evidences[0]; + if ( evidences[0] > m_MinimumDeltaBicToDetect + && evidences[0] > evidences[1] + m_MinimumDeltaBicToDetect / 2.0) + { + return (*candidates[0].second)->change(); + } + } + return TOptionalChangeDescription(); +} + +bool CUnivariateTimeSeriesChangeDetector::stopTesting() const +{ + core_t::TTime range{m_TimeRange.range()}; + if (range > m_MinimumTimeToDetect) + { + double scale{0.5 + CTools::smoothHeaviside(2.0 * m_CurrentEvidenceOfChange + / m_MinimumDeltaBicToDetect, 0.2)}; + return static_cast(range) + > m_MinimumTimeToDetect + scale * static_cast( + m_MaximumTimeToDetect - m_MinimumTimeToDetect); + } + return false; +} +void CUnivariateTimeSeriesChangeDetector::addSamples(maths_t::EDataType dataType, + const TWeightStyleVec &weightStyles, + const TTimeDoublePr1Vec &samples, + const TDouble4Vec1Vec &weights, + double propagationInterval) +{ + for (const auto &sample : samples) + { + m_TimeRange.add(sample.first); + } + + ++m_SampleCount; + + for (auto &model : m_ChangeModels) + { + model->addSamples(m_SampleCount, dataType, + weightStyles, samples, weights, + propagationInterval); + } +} + +void CUnivariateTimeSeriesChangeDetector::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const +{ + core::CMemoryDebug::dynamicSize("m_ChangeModels", m_ChangeModels, mem); +} + +std::size_t CUnivariateTimeSeriesChangeDetector::memoryUsage() const +{ + return core::CMemory::dynamicSize(m_ChangeModels); +} + +uint64_t CUnivariateTimeSeriesChangeDetector::checksum(uint64_t seed) const +{ + seed = CChecksum::calculate(seed, m_TimeRange); + seed = CChecksum::calculate(seed, m_SampleCount); + return CChecksum::calculate(seed, m_ChangeModels); +} + +namespace time_series_change_detector_detail +{ + +CUnivariateTimeSeriesChangeModel::CUnivariateTimeSeriesChangeModel(const CTimeSeriesDecompositionInterface &trendModel) : + m_LogLikelihood{0.0}, m_TrendModel{trendModel} +{} + +double CUnivariateTimeSeriesChangeModel::logLikelihood() const +{ + return m_LogLikelihood; +} + +void CUnivariateTimeSeriesChangeModel::addLogLikelihood(double logLikelihood) +{ + m_LogLikelihood += logLikelihood; +} + +const CTimeSeriesDecompositionInterface &CUnivariateTimeSeriesChangeModel::trendModel() const +{ + return m_TrendModel; +} + +CUnivariateNoChangeModel::CUnivariateNoChangeModel(const CTimeSeriesDecompositionInterface &trendModel, + const TPriorPtr &residualModel) : + CUnivariateTimeSeriesChangeModel{trendModel}, + m_ResidualModel{residualModel} +{} + +bool CUnivariateNoChangeModel::acceptRestoreTraverser(const SDistributionRestoreParams &/*params*/, + core::CStateRestoreTraverser &traverser) +{ + do + { + const std::string name{traverser.name()}; + RESTORE_SETUP_TEARDOWN(LOG_LIKELIHOOD_TAG, + double logLikelihood, + core::CStringUtils::stringToType(traverser.value(), logLikelihood), + this->addLogLikelihood(logLikelihood)) + } + while (traverser.next()); + return true; +} + +void CUnivariateNoChangeModel::acceptPersistInserter(core::CStatePersistInserter &inserter) const +{ + inserter.insertValue(LOG_LIKELIHOOD_TAG, this->logLikelihood()); +} + +double CUnivariateNoChangeModel::bic() const +{ + return -2.0 * this->logLikelihood(); +} + +TOptionalChangeDescription CUnivariateNoChangeModel::change() const +{ + return TOptionalChangeDescription(); +} + +void CUnivariateNoChangeModel::addSamples(std::size_t count, + maths_t::EDataType /*dataType*/, + const TWeightStyleVec &weightStyles, + const TTimeDoublePr1Vec &samples_, + const TDouble4Vec1Vec &weights, + double /*propagationInterval*/) +{ + TDouble1Vec samples; + samples.reserve(samples_.size()); + for (const auto &sample : samples_) + { + samples.push_back(this->trendModel().detrend(sample.first, sample.second, 0.0)); + } + + // See CUnivariateTimeSeriesLevelShiftModel for an explanation + // of the delay updating the log-likelihood. + + double logLikelihood; + if (count >= 5 && m_ResidualModel->jointLogMarginalLikelihood( + weightStyles, samples, weights, + logLikelihood) == maths_t::E_FpNoErrors) + { + this->addLogLikelihood(logLikelihood); + } +} + +void CUnivariateNoChangeModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr /*mem*/) const +{ +} + +std::size_t CUnivariateNoChangeModel::staticSize() const +{ + return sizeof(*this); +} + +std::size_t CUnivariateNoChangeModel::memoryUsage() const +{ + return 0; +} + +uint64_t CUnivariateNoChangeModel::checksum(uint64_t seed) const +{ + seed = CChecksum::calculate(seed, this->logLikelihood()); + seed = CChecksum::calculate(seed, this->trendModel()); + return CChecksum::calculate(seed, m_ResidualModel); +} + +CUnivariateTimeSeriesLevelShiftModel::CUnivariateTimeSeriesLevelShiftModel(const CTimeSeriesDecompositionInterface &trendModel, + const TPriorPtr &residualModel) : + CUnivariateTimeSeriesChangeModel{trendModel}, + m_SampleCount{0.0}, + m_ResidualModel{residualModel->clone()}, + m_ResidualModelMode{residualModel->marginalLikelihoodMode()} +{} + +bool CUnivariateTimeSeriesLevelShiftModel::acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, + core::CStateRestoreTraverser &traverser) +{ + do + { + const std::string name{traverser.name()}; + RESTORE_SETUP_TEARDOWN(LOG_LIKELIHOOD_TAG, + double logLikelihood, + core::CStringUtils::stringToType(traverser.value(), logLikelihood), + this->addLogLikelihood(logLikelihood)) + RESTORE(SHIFT_TAG, m_Shift.fromDelimited(traverser.value())) + RESTORE_BUILT_IN(SAMPLE_COUNT_TAG, m_SampleCount) + RESTORE(RESIDUAL_MODEL_TAG, traverser.traverseSubLevel( + boost::bind(CPriorStateSerialiser(), + boost::cref(params), + boost::ref(m_ResidualModel), _1))) + } + while (traverser.next()); + return true; +} + +void CUnivariateTimeSeriesLevelShiftModel::acceptPersistInserter(core::CStatePersistInserter &inserter) const +{ + inserter.insertValue(LOG_LIKELIHOOD_TAG, this->logLikelihood()); + inserter.insertValue(SHIFT_TAG, m_Shift.toDelimited()); + inserter.insertValue(SAMPLE_COUNT_TAG, m_SampleCount); + inserter.insertLevel(RESIDUAL_MODEL_TAG, boost::bind(CPriorStateSerialiser(), + boost::cref(*m_ResidualModel), _1)); +} + +double CUnivariateTimeSeriesLevelShiftModel::bic() const +{ + return -2.0 * this->logLikelihood() + std::log(m_SampleCount); +} + +TOptionalChangeDescription CUnivariateTimeSeriesLevelShiftModel::change() const +{ + return SChangeDescription{SChangeDescription::E_LevelShift, CBasicStatistics::mean(m_Shift)}; +} + +void CUnivariateTimeSeriesLevelShiftModel::addSamples(std::size_t count, + maths_t::EDataType dataType, + const TWeightStyleVec &weightStyles, + const TTimeDoublePr1Vec &samples_, + const TDouble4Vec1Vec &weights, + double propagationInterval) +{ + TDouble1Vec samples; + samples.reserve(samples_.size()); + for (const auto &sample : samples_) + { + double x{this->trendModel().detrend(sample.first, sample.second, 0.0)}; + samples.push_back(x); + m_Shift.add(x - m_ResidualModelMode); + } + for (auto &sample : samples) + { + sample -= CBasicStatistics::mean(m_Shift); + } + for (const auto &weight : weights) + { + m_SampleCount += maths_t::count(weightStyles, weight); + } + + m_ResidualModel->dataType(dataType); + m_ResidualModel->addSamples(weightStyles, samples, weights); + m_ResidualModel->propagateForwardsByTime(propagationInterval); + + // We delay updating the log-likelihood because early on the + // level can change giving us a better apparent fit to the + // data than a fixed step. Five updates was found to be the + // minimum to get empirically similar sum log-likelihood if + // there is no shift in the data. + + double logLikelihood; + if (count >= 5 && m_ResidualModel->jointLogMarginalLikelihood( + weightStyles, samples, weights, + logLikelihood) == maths_t::E_FpNoErrors) + { + this->addLogLikelihood(logLikelihood); + } +} + +void CUnivariateTimeSeriesLevelShiftModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const +{ + core::CMemoryDebug::dynamicSize("m_ResidualModel", m_ResidualModel, mem); +} + +std::size_t CUnivariateTimeSeriesLevelShiftModel::staticSize() const +{ + return sizeof(*this); +} + +std::size_t CUnivariateTimeSeriesLevelShiftModel::memoryUsage() const +{ + return core::CMemory::dynamicSize(m_ResidualModel); +} + +uint64_t CUnivariateTimeSeriesLevelShiftModel::checksum(uint64_t seed) const +{ + seed = CChecksum::calculate(seed, this->logLikelihood()); + seed = CChecksum::calculate(seed, this->trendModel()); + seed = CChecksum::calculate(seed, m_Shift); + seed = CChecksum::calculate(seed, m_SampleCount); + return CChecksum::calculate(seed, m_ResidualModel); +} + +CUnivariateTimeSeriesTimeShiftModel::CUnivariateTimeSeriesTimeShiftModel(const CTimeSeriesDecompositionInterface &trendModel, + const TPriorPtr &residualModel, + core_t::TTime shift) : + CUnivariateTimeSeriesChangeModel{trendModel}, + m_Shift{shift}, + m_ResidualModel{residualModel->clone()} +{} + +bool CUnivariateTimeSeriesTimeShiftModel::acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, + core::CStateRestoreTraverser &traverser) +{ + do + { + const std::string name{traverser.name()}; + RESTORE_SETUP_TEARDOWN(LOG_LIKELIHOOD_TAG, + double logLikelihood, + core::CStringUtils::stringToType(traverser.value(), logLikelihood), + this->addLogLikelihood(logLikelihood)) + RESTORE(RESIDUAL_MODEL_TAG, traverser.traverseSubLevel( + boost::bind(CPriorStateSerialiser(), + boost::cref(params), + boost::ref(m_ResidualModel), _1))) + } + while (traverser.next()); + return true; +} + +void CUnivariateTimeSeriesTimeShiftModel::acceptPersistInserter(core::CStatePersistInserter &inserter) const +{ + inserter.insertValue(LOG_LIKELIHOOD_TAG, this->logLikelihood()); + inserter.insertLevel(RESIDUAL_MODEL_TAG, boost::bind(CPriorStateSerialiser(), + boost::cref(*m_ResidualModel), _1)); +} + +double CUnivariateTimeSeriesTimeShiftModel::bic() const +{ + return -2.0 * this->logLikelihood(); +} + +TOptionalChangeDescription CUnivariateTimeSeriesTimeShiftModel::change() const +{ + return SChangeDescription{SChangeDescription::E_TimeShift, static_cast(m_Shift)}; +} + +void CUnivariateTimeSeriesTimeShiftModel::addSamples(std::size_t count, + maths_t::EDataType dataType, + const TWeightStyleVec &weightStyles, + const TTimeDoublePr1Vec &samples_, + const TDouble4Vec1Vec &weights, + double propagationInterval) +{ + TDouble1Vec samples; + samples.reserve(samples_.size()); + for (const auto &sample : samples_) + { + samples.push_back(this->trendModel().detrend(sample.first + m_Shift, sample.second, 0.0)); + } + + m_ResidualModel->dataType(dataType); + m_ResidualModel->addSamples(weightStyles, samples, weights); + m_ResidualModel->propagateForwardsByTime(propagationInterval); + + // See CUnivariateTimeSeriesLevelShiftModel for an explanation + // of the delay updating the log-likelihood. + + double logLikelihood; + if (count >= 5 && m_ResidualModel->jointLogMarginalLikelihood( + weightStyles, samples, weights, + logLikelihood) == maths_t::E_FpNoErrors) + { + this->addLogLikelihood(logLikelihood); + } +} + +void CUnivariateTimeSeriesTimeShiftModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const +{ + core::CMemoryDebug::dynamicSize("m_ResidualModel", m_ResidualModel, mem); +} + +std::size_t CUnivariateTimeSeriesTimeShiftModel::staticSize() const +{ + return sizeof(*this); +} + +std::size_t CUnivariateTimeSeriesTimeShiftModel::memoryUsage() const +{ + return core::CMemory::dynamicSize(m_ResidualModel); +} + +uint64_t CUnivariateTimeSeriesTimeShiftModel::checksum(uint64_t seed) const +{ + seed = CChecksum::calculate(seed, this->logLikelihood()); + seed = CChecksum::calculate(seed, this->trendModel()); + seed = CChecksum::calculate(seed, m_Shift); + return CChecksum::calculate(seed, m_ResidualModel); +} + +} + +} +} diff --git a/lib/maths/CTimeSeriesModel.cc b/lib/maths/CTimeSeriesModel.cc index 6f3962078c..333c872dd2 100644 --- a/lib/maths/CTimeSeriesModel.cc +++ b/lib/maths/CTimeSeriesModel.cc @@ -160,7 +160,6 @@ double computeWinsorisationWeight(const CMultivariatePrior &prior, } // Models - // Version 6.3 const std::string VERSION_6_3_TAG("6.3"); const std::string ID_6_3_TAG{"a"}; diff --git a/lib/maths/CTrendComponent.cc b/lib/maths/CTrendComponent.cc index 24608e730a..6fbbae5052 100644 --- a/lib/maths/CTrendComponent.cc +++ b/lib/maths/CTrendComponent.cc @@ -333,7 +333,6 @@ void CTrendComponent::forecast(core_t::TTime startTime, endTime = startTime + CIntegerTools::ceil(endTime - startTime, step); - core_t::TTime steps{(endTime - startTime) / step}; result.resize(steps, TDouble3Vec(3)); @@ -355,6 +354,7 @@ void CTrendComponent::forecast(core_t::TTime startTime, + CBasicStatistics::variance(m_Models[i].s_ResidualMoments); LOG_TRACE("params = " << core::CContainerPrinter::print(models[i])); LOG_TRACE("covariances = " << modelCovariances[i].toDelimited()) + LOG_TRACE("variances = " << residualVariances[i]); } LOG_TRACE("long time variance = " << CBasicStatistics::variance(m_ValueMoments)); diff --git a/lib/maths/Makefile b/lib/maths/Makefile index 20af85b3ce..22a5fd7a79 100644 --- a/lib/maths/Makefile +++ b/lib/maths/Makefile @@ -60,6 +60,7 @@ CMultivariateNormalConjugateFactory.cc \ CMultivariateOneOfNPrior.cc \ CMultivariateOneOfNPriorFactory.cc \ CMultivariatePrior.cc \ +CNaiveBayes.cc \ CNaturalBreaksClassifier.cc \ CNormalMeanPrecConjugate.cc \ COneOfNPrior.cc \ @@ -85,6 +86,7 @@ CSeasonalTime.cc \ CSignal.cc \ CSpline.cc \ CStatisticalTests.cc \ +CTimeSeriesChangeDetector.cc \ CTimeSeriesDecomposition.cc \ CTimeSeriesDecompositionDetail.cc \ CTimeSeriesDecompositionStateSerialiser.cc \ diff --git a/lib/maths/unittest/CNaiveBayesTest.cc b/lib/maths/unittest/CNaiveBayesTest.cc new file mode 100644 index 0000000000..7791ad1872 --- /dev/null +++ b/lib/maths/unittest/CNaiveBayesTest.cc @@ -0,0 +1,374 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +#include "CNaiveBayesTest.h" + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include +#include +#include + +#include + +using namespace ml; + +using TDoubleVec = std::vector; +using TDouble1Vec = core::CSmallVector; +using TDouble1VecVec = std::vector; +using TDoubleSizePr = std::pair; +using TDoubleSizePrVec = std::vector; +using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; +using TMeanVarAccumulator = maths::CBasicStatistics::SSampleMeanVar::TAccumulator; + +void CNaiveBayesTest::testClassification() +{ + LOG_DEBUG("+---------------------------------------+"); + LOG_DEBUG("| CNaiveBayesTest::testClassification |"); + LOG_DEBUG("+---------------------------------------+"); + + // We'll test classification using Gaussian naive Bayes. We + // test: + // - We get the probabilities we expect using if the underlying + // classes are consistent with the assumptions, + // - Test with missing data + + // We test two features with true density + // - x(1) ~ N(0,12) | C(1), + // - x(2) ~ N(10,16) | C(1), + // - x(1) ~ N(3,14) | C(2), + // - x(2) ~ N(-5,24) | C(2) + + test::CRandomNumbers rng; + + TDoubleVec trainingData[4]; + rng.generateNormalSamples( 0.0, 12.0, 100, trainingData[0]); + rng.generateNormalSamples(10.0, 16.0, 100, trainingData[1]); + rng.generateNormalSamples( 3.0, 14.0, 200, trainingData[2]); + rng.generateNormalSamples(-5.0, 24.0, 200, trainingData[3]); + + TMeanAccumulator meanMeanError; + + for (auto initialCount : {0.0, 100.0}) + { + maths::CNormalMeanPrecConjugate normal{ + maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData)}; + maths::CNaiveBayes nb{maths::CNaiveBayesFeatureDensityFromPrior(normal)}; + + if (initialCount > 0) + { + nb.initialClassCounts({{initialCount, 1}, {initialCount, 2}}); + } + + for (std::size_t i = 0u; i < 100; ++i) + { + nb.addTrainingDataPoint(1, {{trainingData[0][i]}, {trainingData[1][i]}}); + } + for (std::size_t i = 0u; i < 200; ++i) + { + nb.addTrainingDataPoint(2, {{trainingData[2][i]}, {trainingData[3][i]}}); + } + + TMeanVarAccumulator moments[4]; + moments[0].add(trainingData[0]); + moments[1].add(trainingData[1]); + moments[2].add(trainingData[2]); + moments[3].add(trainingData[3]); + + // The training data sizes are 100 and 200 so we expect the + // class probabilities to be: + // - P(1) = (initialCount + 100) / (2*initialCount + 300) + // - P(2) = (initialCount + 200) / (2*initialCount + 300) + + TDoubleSizePrVec probabilities(nb.highestClassProbabilities(2, {{}, {}})); + + double P1{(initialCount + 100.0) / (2.0 * initialCount + 300.0)}; + double P2{(initialCount + 200.0) / (2.0 * initialCount + 300.0)}; + + CPPUNIT_ASSERT_EQUAL(std::size_t(2), probabilities.size()); + CPPUNIT_ASSERT_DOUBLES_EQUAL(P1, probabilities[1].first, 1e-5); + CPPUNIT_ASSERT_EQUAL(std::size_t(1), probabilities[1].second); + CPPUNIT_ASSERT_DOUBLES_EQUAL(P2, probabilities[0].first, 1e-5); + CPPUNIT_ASSERT_EQUAL(std::size_t(2), probabilities[0].second); + + // If we supply feature values we should approximately + // get these modulated by the product of the true density + // ratios for those feature values. + + boost::math::normal class1[]{ + boost::math::normal{maths::CBasicStatistics::mean(moments[0]), + std::sqrt(maths::CBasicStatistics::variance(moments[0]))}, + boost::math::normal{maths::CBasicStatistics::mean(moments[1]), + std::sqrt(maths::CBasicStatistics::variance(moments[1]))}}; + boost::math::normal class2[]{ + boost::math::normal{maths::CBasicStatistics::mean(moments[2]), + std::sqrt(maths::CBasicStatistics::variance(moments[2]))}, + boost::math::normal{maths::CBasicStatistics::mean(moments[3]), + std::sqrt(maths::CBasicStatistics::variance(moments[3]))}}; + + TDoubleVec xtest; + rng.generateNormalSamples(0.0, 64.0, 40, xtest); + + TMeanAccumulator meanErrors[3]; + + for (std::size_t i = 0u; i < xtest.size(); i += 2) + { + auto test = [i](double p1, double p2, const TDoubleSizePrVec &p, TMeanAccumulator &meanError) + { + double Z{p1 + p2}; + p1 /= Z; + p2 /= Z; + double p1_{p[0].second == 1 ? p[0].first : p[1].first}; + double p2_{p[0].second == 1 ? p[1].first : p[0].first}; + + if (i % 10 == 0) + { + LOG_DEBUG(i << ") expected P(1) = " << p1 << ", P(2) = " << p2 + << " got P(1) = " << p1_ << ", P(2) = " << p2_); + } + + CPPUNIT_ASSERT_EQUAL(std::size_t(2), p.size()); + CPPUNIT_ASSERT_DOUBLES_EQUAL(p1, p1_, 0.03); + CPPUNIT_ASSERT_DOUBLES_EQUAL(p2, p2_, 0.03); + if (p1 > 0.001) + { + meanError.add(std::fabs((p1 - p1_) / p1)); + } + if (p2 > 0.001) + { + meanError.add(std::fabs((p2 - p2_) / p2)); + } + }; + + // Supply both feature values. + double p1{P1 * maths::CTools::safePdf(class1[0], xtest[i]) + * maths::CTools::safePdf(class1[1], xtest[i+1])}; + double p2{P2 * maths::CTools::safePdf(class2[0], xtest[i]) + * maths::CTools::safePdf(class2[1], xtest[i+1])}; + probabilities = nb.highestClassProbabilities(2, {{xtest[i]}, {xtest[i+1]}}); + test(p1, p2, probabilities, meanErrors[0]); + + // Miss out the first feature value. + p1 = P1 * maths::CTools::safePdf(class1[1], xtest[i+1]); + p2 = P2 * maths::CTools::safePdf(class2[1], xtest[i+1]); + probabilities = nb.highestClassProbabilities(2, {{}, {xtest[i+1]}}); + test(p1, p2, probabilities, meanErrors[1]); + + // Miss out the second feature value. + p1 = P1 * maths::CTools::safePdf(class1[0], xtest[i]); + p2 = P2 * maths::CTools::safePdf(class2[0], xtest[i]); + probabilities = nb.highestClassProbabilities(2, {{xtest[i]}, {}}); + test(p1, p2, probabilities, meanErrors[2]); + } + + for (std::size_t i = 0u; i < 3; ++i) + { + LOG_DEBUG("Mean relative error = " + << maths::CBasicStatistics::mean(meanErrors[i])); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanErrors[i]) < 0.05); + meanMeanError += meanErrors[i]; + } + } +} + +void CNaiveBayesTest::testPropagationByTime() +{ + LOG_DEBUG("+------------------------------------------+"); + LOG_DEBUG("| CNaiveBayesTest::testPropagationByTime |"); + LOG_DEBUG("+------------------------------------------+"); + + // Make feature distributions drift over time and verify that + // the classifier adapts. + + test::CRandomNumbers rng; + + maths::CNormalMeanPrecConjugate normal{ + maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.05)}; + maths::CNaiveBayes nb[]{ + maths::CNaiveBayes{maths::CNaiveBayesFeatureDensityFromPrior(normal), 0.05}, + maths::CNaiveBayes{maths::CNaiveBayesFeatureDensityFromPrior(normal), 0.05}}; + + TDoubleVec trainingData[4]; + for (std::size_t i = 0u; i < 1000; ++i) + { + double x{static_cast(i)}; + rng.generateNormalSamples( 0.02 * x - 14.0, 16.0, 1, trainingData[0]); + rng.generateNormalSamples( 0.02 * x - 14.0, 16.0, 1, trainingData[1]); + rng.generateNormalSamples(-0.02 * x + 14.0, 16.0, 1, trainingData[2]); + rng.generateNormalSamples(-0.02 * x + 14.0, 16.0, 1, trainingData[3]); + + nb[0].addTrainingDataPoint(1, {{trainingData[0][0]}, {trainingData[1][0]}}); + nb[0].addTrainingDataPoint(2, {{trainingData[2][0]}, {trainingData[3][0]}}); + nb[0].propagateForwardsByTime(1.0); + + nb[1].addTrainingDataPoint(1, {{trainingData[0][0]}, {trainingData[1][0]}}); + nb[1].addTrainingDataPoint(2, {{trainingData[2][0]}, {trainingData[3][0]}}); +} + + // Check that the value: + // - (-10,-10) gets assigned to class 2 + // - ( 10, 10) gets assigned to class 1 + // for the aged classifier and vice versa. + + { + TDoubleSizePrVec probabilities[]{ + nb[0].highestClassProbabilities(2, {{-10.0}, {-10.0}}), + nb[1].highestClassProbabilities(2, {{-10.0}, {-10.0}})}; + LOG_DEBUG("Aged class probabilities = " + << core::CContainerPrinter::print(probabilities[0])); + LOG_DEBUG("Class probabilities = " + << core::CContainerPrinter::print(probabilities[1])); + CPPUNIT_ASSERT_EQUAL(std::size_t(2), probabilities[0][0].second); + CPPUNIT_ASSERT(probabilities[0][0].first > 0.99); + CPPUNIT_ASSERT_EQUAL(std::size_t(1), probabilities[1][0].second); + CPPUNIT_ASSERT(probabilities[1][0].first > 0.95); + } + { + TDoubleSizePrVec probabilities[]{ + nb[0].highestClassProbabilities(2, {{10.0}, {10.0}}), + nb[1].highestClassProbabilities(2, {{10.0}, {10.0}})}; + LOG_DEBUG("Aged class probabilities = " + << core::CContainerPrinter::print(probabilities[0])); + LOG_DEBUG("Class probabilities = " + << core::CContainerPrinter::print(probabilities[1])); + CPPUNIT_ASSERT_EQUAL(std::size_t(1), probabilities[0][0].second); + CPPUNIT_ASSERT(probabilities[0][0].first > 0.99); + CPPUNIT_ASSERT_EQUAL(std::size_t(2), probabilities[1][0].second); + CPPUNIT_ASSERT(probabilities[1][0].first > 0.95); + } +} + +void CNaiveBayesTest::testMemoryUsage() +{ + LOG_DEBUG("+------------------------------------+"); + LOG_DEBUG("| CNaiveBayesTest::testMemoryUsage |"); + LOG_DEBUG("+------------------------------------+"); + + // Check invariants. + + using TMemoryUsagePtr = boost::scoped_ptr; + using TNaiveBayesPtr = boost::shared_ptr; + + test::CRandomNumbers rng; + + TDoubleVec trainingData[4]; + rng.generateNormalSamples( 0.0, 12.0, 100, trainingData[0]); + rng.generateNormalSamples(10.0, 16.0, 100, trainingData[1]); + rng.generateNormalSamples( 3.0, 14.0, 200, trainingData[2]); + rng.generateNormalSamples(-5.0, 24.0, 200, trainingData[3]); + + TMeanAccumulator meanMeanError; + + maths::CNormalMeanPrecConjugate normal{ + maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.1)}; + TNaiveBayesPtr nb{new maths::CNaiveBayes{maths::CNaiveBayesFeatureDensityFromPrior(normal), 0.1}}; + + for (std::size_t i = 0u; i < 100; ++i) + { + nb->addTrainingDataPoint(1, {{trainingData[0][i]}, {trainingData[1][i]}}); + } + for (std::size_t i = 0u; i < 200; ++i) + { + nb->addTrainingDataPoint(2, {{trainingData[2][i]}, {trainingData[3][i]}}); + } + + std::size_t memoryUsage{nb->memoryUsage()}; + TMemoryUsagePtr mem{new core::CMemoryUsage}; + nb->debugMemoryUsage(mem.get()); + + LOG_DEBUG("Memory = " << memoryUsage); + CPPUNIT_ASSERT_EQUAL(memoryUsage, mem->usage()); + + LOG_DEBUG("Memory = " << core::CMemory::dynamicSize(nb)); + CPPUNIT_ASSERT_EQUAL(memoryUsage + sizeof(maths::CNaiveBayes), + core::CMemory::dynamicSize(nb)); +} + +void CNaiveBayesTest::testPersist() +{ + LOG_DEBUG("+--------------------------------+"); + LOG_DEBUG("| CNaiveBayesTest::testPersist |"); + LOG_DEBUG("+--------------------------------+"); + + test::CRandomNumbers rng; + + TDoubleVec trainingData[4]; + rng.generateNormalSamples( 0.0, 12.0, 100, trainingData[0]); + rng.generateNormalSamples(10.0, 16.0, 100, trainingData[1]); + rng.generateNormalSamples( 3.0, 14.0, 200, trainingData[2]); + rng.generateNormalSamples(-5.0, 24.0, 200, trainingData[3]); + + TMeanAccumulator meanMeanError; + + maths::CNormalMeanPrecConjugate normal{ + maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.1)}; + maths::CNaiveBayes origNb{maths::CNaiveBayesFeatureDensityFromPrior(normal), 0.1}; + + for (std::size_t i = 0u; i < 100; ++i) + { + origNb.addTrainingDataPoint(1, {{trainingData[0][i]}, {trainingData[1][i]}}); + } + for (std::size_t i = 0u; i < 200; ++i) + { + origNb.addTrainingDataPoint(2, {{trainingData[2][i]}, {trainingData[3][i]}}); + } + + std::string origXml; + { + core::CRapidXmlStatePersistInserter inserter("root"); + origNb.acceptPersistInserter(inserter); + inserter.toXml(origXml); + } + + LOG_DEBUG("Naive Bayes XML representation:\n" << origXml); + + core::CRapidXmlParser parser; + CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); + core::CRapidXmlStateRestoreTraverser traverser(parser); + + maths::SDistributionRestoreParams params{maths_t::E_ContinuousData, 0.1, 0.0, 0.0, 0.0}; + maths::CNaiveBayes restoredNb{params, traverser}; + + CPPUNIT_ASSERT_EQUAL(origNb.checksum(), restoredNb.checksum()); + + std::string restoredXml; + { + core::CRapidXmlStatePersistInserter inserter("root"); + origNb.acceptPersistInserter(inserter); + inserter.toXml(restoredXml); + } + CPPUNIT_ASSERT_EQUAL(origXml, restoredXml); +} + +CppUnit::Test *CNaiveBayesTest::suite() +{ + CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CNaiveBayesTest"); + + suiteOfTests->addTest( new CppUnit::TestCaller( + "CNaiveBayesTest::testClassification", + &CNaiveBayesTest::testClassification) ); + suiteOfTests->addTest( new CppUnit::TestCaller( + "CNaiveBayesTest::testPropagationByTime", + &CNaiveBayesTest::testPropagationByTime) ); + suiteOfTests->addTest( new CppUnit::TestCaller( + "CNaiveBayesTest::testMemoryUsage", + &CNaiveBayesTest::testMemoryUsage) ); + suiteOfTests->addTest( new CppUnit::TestCaller( + "CNaiveBayesTest::testPersist", + &CNaiveBayesTest::testPersist) ); + + return suiteOfTests; +} diff --git a/lib/maths/unittest/CNaiveBayesTest.h b/lib/maths/unittest/CNaiveBayesTest.h new file mode 100644 index 0000000000..2efcf1daa0 --- /dev/null +++ b/lib/maths/unittest/CNaiveBayesTest.h @@ -0,0 +1,23 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +#ifndef INCLUDED_CNaiveBayesTest_h +#define INCLUDED_CNaiveBayesTest_h + +#include + +class CNaiveBayesTest : public CppUnit::TestFixture +{ + public: + void testClassification(); + void testPropagationByTime(); + void testMemoryUsage(); + void testPersist(); + + static CppUnit::Test *suite(); +}; + +#endif // INCLUDED_CNaiveBayesTest_h diff --git a/lib/maths/unittest/CNaturalBreaksClassifierTest.h b/lib/maths/unittest/CNaturalBreaksClassifierTest.h index 6cd3a0c26a..a145cea2a6 100644 --- a/lib/maths/unittest/CNaturalBreaksClassifierTest.h +++ b/lib/maths/unittest/CNaturalBreaksClassifierTest.h @@ -9,7 +9,6 @@ #include - class CNaturalBreaksClassifierTest : public CppUnit::TestFixture { public: diff --git a/lib/maths/unittest/CTimeSeriesChangeDetectorTest.cc b/lib/maths/unittest/CTimeSeriesChangeDetectorTest.cc new file mode 100644 index 0000000000..2111b3d18c --- /dev/null +++ b/lib/maths/unittest/CTimeSeriesChangeDetectorTest.cc @@ -0,0 +1,352 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +#include "CTimeSeriesChangeDetectorTest.h" + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "TestUtils.h" + +#include + +#include +#include + +using namespace ml; + +namespace +{ + +using TDoubleVec = std::vector; +using TDouble2Vec = core::CSmallVector; +using TPriorPtr = boost::shared_ptr; +using TPriorPtrVec = std::vector; + +core_t::TTime BUCKET_LENGTH{1800}; +const double DECAY_RATE{0.0002}; + +TPriorPtr makeResidualModel() +{ + maths::CGammaRateConjugate gamma{ + maths::CGammaRateConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.1, DECAY_RATE)}; + maths::CLogNormalMeanPrecConjugate lognormal{ + maths::CLogNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, 1.0, DECAY_RATE)}; + maths::CNormalMeanPrecConjugate normal{ + maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, DECAY_RATE)}; + + TPriorPtrVec mode; + mode.reserve(3u); + mode.emplace_back(gamma.clone()); + mode.emplace_back(lognormal.clone()); + mode.emplace_back(normal.clone()); + maths::COneOfNPrior modePrior{mode, maths_t::E_ContinuousData, DECAY_RATE}; + maths::CXMeansOnline1d clusterer{maths_t::E_ContinuousData, + maths::CAvailableModeDistributions::ALL, + maths_t::E_ClustersFractionWeight, + DECAY_RATE, 0.05, 12.0, 1.0}; + maths::CMultimodalPrior multimodal{maths_t::E_ContinuousData, clusterer, modePrior, DECAY_RATE}; + + TPriorPtrVec models; + mode.emplace_back(gamma.clone()); + mode.emplace_back(lognormal.clone()); + mode.emplace_back(normal.clone()); + mode.emplace_back(multimodal.clone()); + + return TPriorPtr{maths::COneOfNPrior{mode, maths_t::E_ContinuousData, DECAY_RATE}.clone()}; +} + +} + +void CTimeSeriesChangeDetectorTest::testNoChange() +{ + LOG_DEBUG("+-----------------------------------------------+"); + LOG_DEBUG("| CTimeSeriesChangeDetectorTest::testNoChange |"); + LOG_DEBUG("+-----------------------------------------------+"); + + test::CRandomNumbers rng; + + TDoubleVec variances{1.0, 10.0, 20.0, 30.0, 100.0, 1000.0}; + TDoubleVec scales{0.1, 1.0, 2.0, 3.0, 5.0, 8.0}; + + TDoubleVec samples; + for (std::size_t t = 0u; t < 100; ++t) + { + if (t % 10 == 0) + { + LOG_DEBUG(t << "%"); + } + + switch (t % 3) + { + case 0: rng.generateNormalSamples(10.0, variances[(t/3) % variances.size()], 1000, samples); break; + case 1: rng.generateLogNormalSamples(1.0, scales[(t/3) % scales.size()], 1000, samples); break; + case 2: rng.generateGammaSamples(10.0, 10.0 * scales[(t/3) % scales.size()], 1000, samples); break; + } + + maths::CTimeSeriesDecomposition trendModel(DECAY_RATE, BUCKET_LENGTH); + TPriorPtr residualModel(makeResidualModel()); + + auto addSampleToModel = [&trendModel, &residualModel](core_t::TTime time, double x) + { + trendModel.addPoint(time, x); + double detrended{trendModel.detrend(time, x, 0.0)}; + residualModel->addSamples(maths::CConstantWeights::COUNT, {detrended}, {{1.0}}); + residualModel->propagateForwardsByTime(1.0); + }; + + core_t::TTime time{0}; + for (std::size_t i = 0u; i < 950; ++i) + { + addSampleToModel(time, samples[i]); + time += BUCKET_LENGTH; + } + + maths::CUnivariateTimeSeriesChangeDetector detector{trendModel, residualModel, + 6 * core::constants::HOUR, + 24 * core::constants::HOUR, + 12.0}; + for (std::size_t i = 950u; i < samples.size(); ++i) + { + addSampleToModel(time, samples[i]); + detector.addSamples(maths_t::E_ContinuousData, + maths::CConstantWeights::COUNT, + {{time, samples[i]}}, {{1.0}}); + if (detector.stopTesting()) + { + break; + } + + CPPUNIT_ASSERT(!detector.change()); + + time += BUCKET_LENGTH; + } + } +} + +void CTimeSeriesChangeDetectorTest::testLevelShift() +{ + LOG_DEBUG("+-------------------------------------------------+"); + LOG_DEBUG("| CTimeSeriesChangeDetectorTest::testLevelShift |"); + LOG_DEBUG("+-------------------------------------------------+"); + + TGeneratorVec trends{constant, ramp, smoothDaily, weekends, spikeyDaily}; + + this->testChange(trends, + maths::SChangeDescription::E_LevelShift, + [](TGenerator trend, core_t::TTime time) + { + return trend(time) + 0.5; + }, 5.0, 15.0); +} + +void CTimeSeriesChangeDetectorTest::testTimeShift() +{ + LOG_DEBUG("+------------------------------------------------+"); + LOG_DEBUG("| CTimeSeriesChangeDetectorTest::testTimeShift |"); + LOG_DEBUG("+------------------------------------------------+"); + + TGeneratorVec trends{smoothDaily, spikeyDaily}; + + this->testChange(trends, + maths::SChangeDescription::E_TimeShift, + [](TGenerator trend, core_t::TTime time) + { + return trend(time - core::constants::HOUR); + }, -static_cast(core::constants::HOUR), 24.0); + + this->testChange(trends, + maths::SChangeDescription::E_TimeShift, + [](TGenerator trend, core_t::TTime time) + { + return trend(time + core::constants::HOUR); + }, +static_cast(core::constants::HOUR), 24.0); +} + +void CTimeSeriesChangeDetectorTest::testPersist() +{ + LOG_DEBUG("+----------------------------------------------+"); + LOG_DEBUG("| CTimeSeriesChangeDetectorTest::testPersist |"); + LOG_DEBUG("+----------------------------------------------+"); + + test::CRandomNumbers rng; + + TDoubleVec samples; + rng.generateNormalSamples(10.0, 10.0, 1000, samples); + + maths::CTimeSeriesDecomposition trendModel(DECAY_RATE, BUCKET_LENGTH); + TPriorPtr residualModel(makeResidualModel()); + + auto addSampleToModel = [&trendModel, &residualModel](core_t::TTime time, double x) + { + trendModel.addPoint(time, x); + double detrended{trendModel.detrend(time, x, 0.0)}; + residualModel->addSamples(maths::CConstantWeights::COUNT, + {detrended}, + maths::CConstantWeights::SINGLE_UNIT); + residualModel->propagateForwardsByTime(1.0); + }; + + core_t::TTime time{0}; + for (std::size_t i = 0u; i < 990; ++i) + { + addSampleToModel(time, samples[i]); + time += BUCKET_LENGTH; + } + + maths::CUnivariateTimeSeriesChangeDetector origDetector{trendModel, residualModel, + 6 * core::constants::HOUR, + 24 * core::constants::HOUR, + 12.0}; + + maths::SDistributionRestoreParams params{maths_t::E_ContinuousData, + DECAY_RATE, 0.05, 12.0, 1.0}; + for (std::size_t i = 990u; i < samples.size(); ++i) + { + addSampleToModel(time, samples[i]); + std::string origXml; + { + ml::core::CRapidXmlStatePersistInserter inserter{"root"}; + origDetector.acceptPersistInserter(inserter); + inserter.toXml(origXml); + } + + maths::CUnivariateTimeSeriesChangeDetector restoredDetector{trendModel, residualModel, + 6 * core::constants::HOUR, + 24 * core::constants::HOUR, + 12.0}; + core::CRapidXmlParser parser; + CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); + core::CRapidXmlStateRestoreTraverser traverser(parser); + traverser.traverseSubLevel(boost::bind( + &maths::CUnivariateTimeSeriesChangeDetector::acceptRestoreTraverser, + &restoredDetector, boost::cref(params), _1)); + + LOG_DEBUG("expected " << origDetector.checksum() + << " got " << restoredDetector.checksum()); + CPPUNIT_ASSERT_EQUAL(origDetector.checksum(), restoredDetector.checksum()); + } +} + +CppUnit::Test *CTimeSeriesChangeDetectorTest::suite() +{ + CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CTimeSeriesChangeDetectorTest"); + + suiteOfTests->addTest( new CppUnit::TestCaller( + "CTimeSeriesChangeDetectorTest::testNoChange", + &CTimeSeriesChangeDetectorTest::testNoChange) ); + suiteOfTests->addTest( new CppUnit::TestCaller( + "CTimeSeriesChangeDetectorTest::testLevelShift", + &CTimeSeriesChangeDetectorTest::testLevelShift) ); + suiteOfTests->addTest( new CppUnit::TestCaller( + "CTimeSeriesChangeDetectorTest::testTimeShift", + &CTimeSeriesChangeDetectorTest::testTimeShift) ); + suiteOfTests->addTest( new CppUnit::TestCaller( + "CTimeSeriesChangeDetectorTest::testPersist", + &CTimeSeriesChangeDetectorTest::testPersist) ); + + return suiteOfTests; +} + +void CTimeSeriesChangeDetectorTest::testChange(const TGeneratorVec &trends, + maths::SChangeDescription::EDescription description, + TChange applyChange, + double expectedChange, + double expectedMeanBucketsToDetectChange) +{ + using TOptionalSize = boost::optional; + using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; + + test::CRandomNumbers rng; + + TMeanAccumulator meanBucketsToDetect; + + TDoubleVec samples; + for (std::size_t t = 0u; t < 100; ++t) + { + if (t % 10 == 0) + { + LOG_DEBUG(t << "%"); + } + + rng.generateNormalSamples(0.0, 1.0, 1000, samples); + + maths::CTimeSeriesDecomposition trendModel(DECAY_RATE, BUCKET_LENGTH); + TPriorPtr residualModel(makeResidualModel()); + + auto addSampleToModel = [&trendModel, &residualModel](core_t::TTime time, double x, double weight) + { + trendModel.addPoint(time, x, maths::CConstantWeights::COUNT, {weight}); + double detrended{trendModel.detrend(time, x, 0.0)}; + residualModel->addSamples(maths::CConstantWeights::COUNT, {detrended}, {{weight}}); + residualModel->propagateForwardsByTime(1.0); + }; + + core_t::TTime time{0}; + for (std::size_t i = 0u; i < 950; ++i) + { + double x{10.0 * trends[t % trends.size()](time) + samples[i]}; + addSampleToModel(time, x, 1.0); + time += BUCKET_LENGTH; + } + + maths::CUnivariateTimeSeriesChangeDetector detector{trendModel, residualModel, + 6 * core::constants::HOUR, + 24 * core::constants::HOUR, + 12.0}; + + TOptionalSize bucketsToDetect; + for (std::size_t i = 950u; i < samples.size(); ++i) + { + double x{10.0 * applyChange(trends[t % trends.size()], time) + samples[i]}; + + addSampleToModel(time, x, 0.5); + detector.addSamples(maths_t::E_ContinuousData, + maths::CConstantWeights::COUNT, + {{time, x}}, {{1.0}}); + + auto change = detector.change(); + if (change) + { + if (!bucketsToDetect) + { + bucketsToDetect.reset(i - 949); + } + CPPUNIT_ASSERT_EQUAL(change->s_Description, description); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedChange, + change->s_Value[0], + 0.5 * std::fabs(expectedChange)); + break; + } + if (detector.stopTesting()) + { + break; + } + + time += BUCKET_LENGTH; + } + CPPUNIT_ASSERT(bucketsToDetect); + meanBucketsToDetect.add(static_cast(*bucketsToDetect)); + } + + LOG_DEBUG("buckets to detect = " << maths::CBasicStatistics::mean(meanBucketsToDetect)); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanBucketsToDetect) < expectedMeanBucketsToDetectChange); +} diff --git a/lib/maths/unittest/CTimeSeriesChangeDetectorTest.h b/lib/maths/unittest/CTimeSeriesChangeDetectorTest.h new file mode 100644 index 0000000000..c907b1050f --- /dev/null +++ b/lib/maths/unittest/CTimeSeriesChangeDetectorTest.h @@ -0,0 +1,39 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +#ifndef INCLUDED_CTimeSeriesChangeDetectorTest_h +#define INCLUDED_CTimeSeriesChangeDetectorTest_h + +#include + +#include + +#include + +class CTimeSeriesChangeDetectorTest : public CppUnit::TestFixture +{ + public: + void testNoChange(); + void testLevelShift(); + void testTimeShift(); + void testPersist(); + + static CppUnit::Test *suite(); + + private: + using TGenerator = std::function; + using TGeneratorVec = std::vector; + using TChange = std::function; + + private: + void testChange(const TGeneratorVec &trends, + ml::maths::SChangeDescription::EDescription description, + TChange applyChange, + double expectedChange, + double expectedMeanBucketsToDetectChange); +}; + +#endif // INCLUDED_CTimeSeriesChangeDetectorTest_h diff --git a/lib/maths/unittest/Main.cc b/lib/maths/unittest/Main.cc index dd87515175..851d04ac80 100644 --- a/lib/maths/unittest/Main.cc +++ b/lib/maths/unittest/Main.cc @@ -45,6 +45,7 @@ #include "CMultivariateMultimodalPriorTest.h" #include "CMultivariateNormalConjugateTest.h" #include "CMultivariateOneOfNPriorTest.h" +#include "CNaiveBayesTest.h" #include "CNaturalBreaksClassifierTest.h" #include "CNormalMeanPrecConjugateTest.h" #include "COneOfNPriorTest.h" @@ -70,6 +71,7 @@ #include "CSolversTest.h" #include "CSplineTest.h" #include "CStatisticalTestsTest.h" +#include "CTimeSeriesChangeDetectorTest.h" #include "CTimeSeriesDecompositionTest.h" #include "CTimeSeriesModelTest.h" #include "CToolsTest.h" @@ -122,6 +124,7 @@ int main(int argc, const char **argv) runner.addTest( CMultivariateMultimodalPriorTest::suite() ); runner.addTest( CMultivariateNormalConjugateTest::suite() ); runner.addTest( CMultivariateOneOfNPriorTest::suite() ); + runner.addTest( CNaiveBayesTest::suite() ); runner.addTest( CNaturalBreaksClassifierTest::suite() ); runner.addTest( CNormalMeanPrecConjugateTest::suite() ); runner.addTest( COneOfNPriorTest::suite() ); @@ -150,6 +153,7 @@ int main(int argc, const char **argv) runner.addTest( CTimeSeriesDecompositionTest::suite() ); runner.addTest( CTimeSeriesModelTest::suite() ); runner.addTest( CToolsTest::suite() ); + runner.addTest( CTimeSeriesChangeDetectorTest::suite() ); runner.addTest( CTrendComponentTest::suite() ); runner.addTest( CTrendTestsTest::suite() ); runner.addTest( CXMeansTest::suite() ); diff --git a/lib/maths/unittest/Makefile b/lib/maths/unittest/Makefile index 9e7c9c8798..566dcad738 100644 --- a/lib/maths/unittest/Makefile +++ b/lib/maths/unittest/Makefile @@ -56,6 +56,7 @@ SRCS=\ CMultivariateMultimodalPriorTest.cc \ CMultivariateNormalConjugateTest.cc \ CMultivariateOneOfNPriorTest.cc \ + CNaiveBayesTest.cc \ CNaturalBreaksClassifierTest.cc \ CNormalMeanPrecConjugateTest.cc \ COneOfNPriorTest.cc \ @@ -81,6 +82,7 @@ SRCS=\ CSolversTest.cc \ CSplineTest.cc \ CStatisticalTestsTest.cc \ + CTimeSeriesChangeDetectorTest.cc \ CTimeSeriesDecompositionTest.cc \ CTimeSeriesModelTest.cc \ CToolsTest.cc \ From 4052390921eb16f574c15f4e18be7b94f916f152 Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Mon, 12 Mar 2018 18:07:02 +0000 Subject: [PATCH 04/29] Factor out some convenience functionality. Switch to using std versions of c library functions in maths CTools --- include/maths/CTools.h | 20 +++ lib/maths/CModel.cc | 16 +-- lib/maths/CTools.cc | 183 +++++++++++++++++++++++---- lib/maths/ProbabilityAggregators.cc | 182 ++------------------------ lib/model/CInterimBucketCorrector.cc | 6 +- 5 files changed, 199 insertions(+), 208 deletions(-) diff --git a/include/maths/CTools.h b/include/maths/CTools.h index 4d217fc134..f363fa8f43 100644 --- a/include/maths/CTools.h +++ b/include/maths/CTools.h @@ -717,6 +717,12 @@ class MATHS_EXPORT CTools : private core::CNonInstantiatable //! Shift \p x to the right by \p eps times \p x. static double shiftRight(double x, double eps = std::numeric_limits::epsilon()); + //! Compute \f$x^2\f$. + static double pow2(double x) + { + return x * x; + } + //! Sigmoid function of \p p. static double sigmoid(double p) { @@ -739,6 +745,20 @@ class MATHS_EXPORT CTools : private core::CNonInstantiatable return sigmoid(std::exp(sign * (x - 1.0) / width)) / sigmoid(std::exp(1.0 / width)); } + + //! A custom, numerically robust, implementation of \f$(1 - x) ^ p\f$. + //! + //! \note It is assumed that p is integer. + static double powOneMinusX(double x, double p); + + //! A custom, numerically robust, implementation of \f$1 - (1 - x) ^ p\f$. + //! + //! \note It is assumed that p is integer. + static double oneMinusPowOneMinusX(double x, double p); + + //! A custom implementation of \f$\log(1 - x)\f$ which handles the + //! cancellation error for small x. + static double logOneMinusX(double x); }; } diff --git a/lib/maths/CModel.cc b/lib/maths/CModel.cc index db1eaac13e..750cd4e996 100644 --- a/lib/maths/CModel.cc +++ b/lib/maths/CModel.cc @@ -418,27 +418,27 @@ void CModelStub::modelCorrelations(CTimeSeriesCorrelations &/*model*/) CModelStub::TSize2Vec1Vec CModelStub::correlates(void) const { - return TSize2Vec1Vec(); + return {}; } CModelStub::TDouble2Vec CModelStub::mode(core_t::TTime /*time*/, const maths_t::TWeightStyleVec &/*weightStyles*/, const TDouble2Vec4Vec &/*weights*/) const { - return TDouble2Vec(); + return {}; } CModelStub::TDouble2Vec1Vec CModelStub::correlateModes(core_t::TTime /*time*/, const maths_t::TWeightStyleVec &/*weightStyles*/, const TDouble2Vec4Vec1Vec &/*weights*/) const { - return TDouble2Vec1Vec(); + return {}; } CModelStub::TDouble2Vec1Vec CModelStub::residualModes(const maths_t::TWeightStyleVec &/*weightStyles*/, const TDouble2Vec4Vec &/*weights*/) const { - return TDouble2Vec1Vec(); + return {}; } void CModelStub::addBucketValue(const TTimeDouble2VecSizeTrVec &/*value*/) @@ -465,7 +465,7 @@ CModelStub::TDouble2Vec CModelStub::predict(core_t::TTime /*time*/, const TSizeDoublePr1Vec &/*correlated*/, TDouble2Vec /*hint*/) const { - return TDouble2Vec(); + return {}; } CModelStub::TDouble2Vec3Vec CModelStub::confidenceInterval(core_t::TTime /*time*/, @@ -473,7 +473,7 @@ CModelStub::TDouble2Vec3Vec CModelStub::confidenceInterval(core_t::TTime /*time* const maths_t::TWeightStyleVec &/*weightStyles*/, const TDouble2Vec4Vec &/*weights*/) const { - return TDouble2Vec3Vec(); + return {}; } bool CModelStub::forecast(core_t::TTime /*startTime*/, @@ -506,13 +506,13 @@ CModelStub::TDouble2Vec CModelStub::winsorisationWeight(double /*derate*/, core_t::TTime /*time*/, const TDouble2Vec &/*value*/) const { - return TDouble2Vec(); + return {}; } CModelStub::TDouble2Vec CModelStub::seasonalWeight(double /*confidence*/, core_t::TTime /*time*/) const { - return TDouble2Vec(); + return {}; } std::uint64_t CModelStub::checksum(std::uint64_t seed) const diff --git a/lib/maths/CTools.cc b/lib/maths/CTools.cc index 2184628bb7..9bf2700290 100644 --- a/lib/maths/CTools.cc +++ b/lib/maths/CTools.cc @@ -96,12 +96,6 @@ inline TDoubleBoolPr stationaryPoint(const boost::math::beta_distribution<> &bet return {boost::math::mode(beta), true}; } -//! Compute \f$x^2\f$. -inline double square(double x) -{ - return x * x; -} - //! \brief p.d.f function adapter. //! //! DESCRIPTION:\n @@ -667,11 +661,11 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const negative_binomia } catch (const std::exception &e) { - if (::fabs(f1 - fx) < 10.0 * EPSILON * fx) + if (std::fabs(f1 - fx) < 10.0 * EPSILON * fx) { y = b1; } - else if (::fabs(f2 - fx) < 10.0 * EPSILON * fx) + else if (std::fabs(f2 - fx) < 10.0 * EPSILON * fx) { y = b2; } @@ -731,9 +725,9 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const lognormal &logNo // + 2 * s^2 * (log(x) - m))^(1/2)) if x > mode double logx = std::log(x); - double squareScale = square(logNormal.scale()); + double squareScale = pow2(logNormal.scale()); double discriminant = - std::sqrt(square(squareScale) + std::sqrt(pow2(squareScale) + (logx - logNormal.location() + 2.0 * squareScale) * (logx - logNormal.location())); double m = boost::math::mode(logNormal); @@ -843,11 +837,11 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const CLogTDistributio } catch (const std::exception &e) { - if (::fabs(f1 - fx) < 10.0 * EPSILON * fx) + if (std::fabs(f1 - fx) < 10.0 * EPSILON * fx) { y = b1; } - else if (::fabs(f2 - fx) < 10.0 * EPSILON * fx) + else if (std::fabs(f2 - fx) < 10.0 * EPSILON * fx) { y = b2; } @@ -991,11 +985,11 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const CLogTDistributio } catch (const std::exception &e) { - if (::fabs(f1 - fx) < 10.0 * EPSILON * fx) + if (std::fabs(f1 - fx) < 10.0 * EPSILON * fx) { y = b1; } - else if (::fabs(f2 - fx) < 10.0 * EPSILON * fx) + else if (std::fabs(f2 - fx) < 10.0 * EPSILON * fx) { y = b2; } @@ -1085,7 +1079,7 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const gamma &gamma_, y[(i + 1) % 2] = x + m * std::log(y[i % 2] / x); LOG_TRACE("y = " << y[(i + 1) % 2]); if (++i == MAX_ITERATIONS - || ::fabs(y[1] - y[0]) < CONVERGENCE_TOLERANCE * std::max(y[0], y[1])) + || std::fabs(y[1] - y[0]) < CONVERGENCE_TOLERANCE * std::max(y[0], y[1])) { break; } @@ -1116,7 +1110,7 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const gamma &gamma_, y[(i + 1) % 2] = x * ::exp(-(x - y[i % 2]) / m); LOG_TRACE("y = " << y[(i + 1) % 2]); if (++i == MAX_ITERATIONS - || ::fabs(y[1] - y[0]) < CONVERGENCE_TOLERANCE * std::max(y[0], y[1])) + || std::fabs(y[1] - y[0]) < CONVERGENCE_TOLERANCE * std::max(y[0], y[1])) { break; } @@ -1127,7 +1121,7 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const gamma &gamma_, double fy = safePdf(gamma_, y[i % 2]); LOG_TRACE("f(x) = " << fx << ", f(y) = " << fy); - if (::fabs(fx - fy) <= PDF_TOLERANCE * std::max(fx, fy)) + if (std::fabs(fx - fy) <= PDF_TOLERANCE * std::max(fx, fy)) { if (x > y[i % 2]) { @@ -1203,18 +1197,18 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const gamma &gamma_, << ", iterations = " << maxIterations << ", f(candidate) = " << safePdf(gamma_, candidate) - fx); - if (::fabs(safePdf(gamma_, candidate) - fx) < ::fabs(fy - fx)) + if (std::fabs(safePdf(gamma_, candidate) - fx) < std::fabs(fy - fx)) { y[i % 2] = candidate; } } catch (const std::exception &e) { - if (::fabs(fa - fx) < 10.0 * EPSILON * fx) + if (std::fabs(fa - fx) < 10.0 * EPSILON * fx) { y[i % 2] = a; } - else if (::fabs(fb - fx) < 10.0 * EPSILON * fx) + else if (std::fabs(fb - fx) < 10.0 * EPSILON * fx) { y[i % 2] = b; } @@ -1346,7 +1340,7 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const beta &beta_, { y[(i + 1) % 2] = 1.0 - ::exp(k * std::log(x / y[i % 2])) * (1.0 - x); if (++i == MAX_ITERATIONS - || ::fabs(y[1] - y[0]) < CONVERGENCE_TOLERANCE) + || std::fabs(y[1] - y[0]) < CONVERGENCE_TOLERANCE) { break; } @@ -1387,7 +1381,7 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const beta &beta_, { y[(i + 1) % 2] = ::exp(k * std::log((1.0 - x) / (1.0 - y[i % 2]))) * x; if (++i == MAX_ITERATIONS - || ::fabs(y[1] - y[0]) < CONVERGENCE_TOLERANCE) + || std::fabs(y[1] - y[0]) < CONVERGENCE_TOLERANCE) { break; } @@ -1414,7 +1408,7 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const beta &beta_, try { double error = sp.second ? fy - fx : fx - fy; - if (::fabs(error) <= PDF_TOLERANCE * std::max(fx, fy)) + if (std::fabs(error) <= PDF_TOLERANCE * std::max(fx, fy)) { if (x > y[i % 2]) { @@ -1484,18 +1478,18 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const beta &beta_, << ", f(candidate) = " << safePdf(beta_, candidate) - fx << ", eps = " << eps); - if (::fabs(safePdf(beta_, candidate) - fx) < ::fabs(fy - fx)) + if (std::fabs(safePdf(beta_, candidate) - fx) < std::fabs(fy - fx)) { y[i % 2] = candidate; } } catch (const std::exception &e) { - if (::fabs(fBracket.first - fx) < 10.0 * EPSILON * fx) + if (std::fabs(fBracket.first - fx) < 10.0 * EPSILON * fx) { y[i % 2] = bracket.first; } - else if (::fabs(fBracket.second - fx) < 10.0 * EPSILON * fx) + else if (std::fabs(fBracket.second - fx) < 10.0 * EPSILON * fx) { y[i % 2] = bracket.second; } @@ -2225,7 +2219,7 @@ double CTools::differentialEntropy(const lognormal &logNormal) double scale = logNormal.scale(); return 0.5 * std::log( boost::math::double_constants::two_pi * boost::math::double_constants::e - * square(scale)) + location; + * pow2(scale)) + location; } double CTools::differentialEntropy(const gamma &gamma_) @@ -2290,6 +2284,23 @@ double CTools::CGroup::rightEndpoint(double separation) const const CTools::CLookupTableForFastLog CTools::FAST_LOG_TABLE; +//////// Miscellaneous Implementations //////// + +namespace +{ +const double EPS{0.1}; +const double COEFFS[] + { + -1.0, + +1.0 / 2.0, + -1.0 / 6.0, + +1.0 / 24.0, + -1.0 / 120.0, + +1.0 / 720.0 + }; +const std::size_t N{boost::size(COEFFS)}; +} + double CTools::shiftLeft(double x, double eps) { if (x == NEG_INF) @@ -2308,5 +2319,123 @@ double CTools::shiftRight(double x, double eps) return (x < 0.0 ? 1.0 - eps : 1.0 + eps) * x; } +double CTools::powOneMinusX(double x, double p) +{ + // For large p, + // (1 - x) ^ p ~= exp(-p * x). + // + // and this doesn't suffer from cancellation errors in the limit + // p -> inf and x -> 0. For p * x << 1 we get much better precision + // using the Taylor expansion: + // (1 - x) ^ p = 1 - p * x + p * (p - 1) * x^2 / 2! + ... + // + // and canceling the leading terms. + + if (x == 1.0) + { + return 0.0; + } + if (p == 1.0) + { + return 1.0 - x; + } + + double y = p * x; + if (std::fabs(y) < EPS) + { + double remainder = 0.0; + double ti = 1.0; + for (std::size_t i = 0u; i < N && p != 0.0; ++i, p -= 1.0) + { + ti *= p * x; + remainder += COEFFS[i] * ti; + } + return 1.0 + remainder; + } + else if (p > 1000.0) + { + return std::exp(-y); + } + + if (x > 1.0) + { + double sign = static_cast(p) % 2 ? -1.0 : 1.0; + return sign * std::exp(p * std::log(x - 1.0)); + } + + return std::exp(p * std::log(1.0 - x)); +} + +double CTools::oneMinusPowOneMinusX(double x, double p) +{ + // For large p, + // (1 - x) ^ p ~= exp(-p * x). + // + // and this doesn't suffer from cancellation errors in the limit + // p -> inf and x -> 0. For p * x << 1 we get much better precision + // using the Taylor expansion: + // (1 - x) ^ p = 1 - p * x + p * (p - 1) * x^2 / 2! + ... + // + // Note that this doesn't make use of powOneMinusX because we can + // avoid the cancellation errors by using: + // 1 - (1 - x) ^ p = p * x - p * (p - 1) * x^2 / 2 + ... + // + // when p * x is small. + + if (x == 1.0) + { + return 1.0; + } + if (p == 1.0) + { + return x; + } + + double y = p * x; + if (std::fabs(y) < EPS) + { + double result = 0.0; + double ti = 1.0; + for (std::size_t i = 0u; i < N && p != 0.0; ++i, p -= 1.0) + { + ti *= p * x; + result -= COEFFS[i] * ti; + } + return result; + } + else if (p > 1000.0) + { + return 1.0 - std::exp(-y); + } + + if (x > 1.0) + { + double sign = static_cast(p) % 2 ? -1.0 : 1.0; + return 1.0 - sign * std::exp(p * std::log(x - 1.0)); + } + + return 1.0 - std::exp(p * std::log(1.0 - x)); +} + +double CTools::logOneMinusX(double x) +{ + double result = 0.0; + + if (std::fabs(x) < EPS) + { + double xi = -x; + for (std::size_t i = 0u; i < 6; ++i, xi *= -x) + { + result += xi / static_cast(i + 1); + } + } + else + { + result = std::log(1.0 - x); + } + + return result; +} + } } diff --git a/lib/maths/ProbabilityAggregators.cc b/lib/maths/ProbabilityAggregators.cc index 0c935d1708..c5644af76f 100644 --- a/lib/maths/ProbabilityAggregators.cc +++ b/lib/maths/ProbabilityAggregators.cc @@ -28,14 +28,8 @@ namespace maths namespace { -typedef std::vector TDoubleVec; -typedef std::pair TDoubleDoublePr; - -//! Compute \f$x^2\f$. -inline double square(double x) -{ - return x * x; -} +using TDoubleVec = std::vector; +using TDoubleDoublePr = std::pair; //! Compute the deviation corresponding to a probability of less likely //! samples \p p. @@ -56,7 +50,7 @@ bool deviation(double p, double &result) try { boost::math::normal_distribution<> normal(0.0, 1.0); - result = square(boost::math::quantile(normal, p / 2.0)); + result = CTools::pow2(boost::math::quantile(normal, p / 2.0)); return true; } catch (const std::exception &e) @@ -67,158 +61,6 @@ bool deviation(double p, double &result) return false; } -const double EPS = 0.1; - -//! A custom, numerically robust, implementation of \f$(1 - x) ^ p\f$. -//! -//! \note It is assumed that p is integer. -double powOneMinusX(double x, double p) -{ - // For large p, - // (1 - x) ^ p ~= exp(-p * x). - // - // and this doesn't suffer from cancellation errors in the limit - // p -> inf and x -> 0. For p * x << 1 we get much better precision - // using the Taylor expansion: - // (1 - x) ^ p = 1 - p * x + p * (p - 1) * x^2 / 2! + ... - // - // and canceling the leading terms. - - if (x == 1.0) - { - return 0.0; - } - if (p == 1.0) - { - return 1.0 - x; - } - - double y = p * x; - if (::fabs(y) < EPS) - { - static const double COEFFS[] = - { - -1.0, - +1.0 / 2.0, - -1.0 / 6.0, - +1.0 / 24.0, - -1.0 / 120.0, - +1.0 / 720.0 - }; - static const std::size_t N = boost::size(COEFFS); - - double remainder = 0.0; - double ti = 1.0; - for (std::size_t i = 0u; i < N && p != 0.0; ++i, p -= 1.0) - { - ti *= p * x; - remainder += COEFFS[i] * ti; - } - return 1.0 + remainder; - } - else if (p > 1000.0) - { - return ::exp(-y); - } - - if (x > 1.0) - { - double sign = static_cast(p) % 2 ? -1.0 : 1.0; - return sign * ::exp(p * ::log(x - 1.0)); - } - - return ::exp(p * ::log(1.0 - x)); -} - -//! A custom, numerically robust, implementation of \f$1 - (1 - x) ^ p\f$. -//! -//! \note It is assumed that p is integer. -double oneMinusPowOneMinusX(double x, double p) -{ - // For large p, - // (1 - x) ^ p ~= exp(-p * x). - // - // and this doesn't suffer from cancellation errors in the limit - // p -> inf and x -> 0. For p * x << 1 we get much better precision - // using the Taylor expansion: - // (1 - x) ^ p = 1 - p * x + p * (p - 1) * x^2 / 2! + ... - // - // Note that this doesn't make use of powOneMinusX because we can - // avoid the cancellation errors by using: - // 1 - (1 - x) ^ p = p * x - p * (p - 1) * x^2 / 2 + ... - // - // when p * x is small. - - if (x == 1.0) - { - return 1.0; - } - if (p == 1.0) - { - return x; - } - - double y = p * x; - if (::fabs(y) < EPS) - { - static const double COEFFS[] = - { - +1.0, - -1.0 / 2.0, - +1.0 / 6.0, - -1.0 / 24.0, - +1.0 / 120.0, - -1.0 / 720.0 - }; - static const std::size_t N = boost::size(COEFFS); - - double result = 0.0; - - double ti = 1.0; - for (std::size_t i = 0u; i < N && p != 0.0; ++i, p -= 1.0) - { - ti *= p * x; - result += COEFFS[i] * ti; - } - - return result; - } - else if (p > 1000.0) - { - return 1.0 - ::exp(-y); - } - - if (x > 1.0) - { - double sign = static_cast(p) % 2 ? -1.0 : 1.0; - return 1.0 - sign * ::exp(p * ::log(x - 1.0)); - } - - return 1.0 - ::exp(p * ::log(1.0 - x)); -} - -//! A custom implementation of \f$\log(1 - x)\f$ which handles the -//! cancellation error for small x. -double logOneMinusX(double x) -{ - double result = 0.0; - - if (::fabs(x) < EPS) - { - double xi = -x; - for (std::size_t i = 0u; i < 6; ++i, xi *= -x) - { - result += xi / static_cast(i + 1); - } - } - else - { - result = ::log(1.0 - x); - } - - return result; -} - //! \brief Calculates the probability of the m most extreme samples. //! //! DESCRIPTION:\n @@ -272,7 +114,7 @@ class CNumericalLogProbabilityOfMFromNExtremeSamples { if (m_I == m_M) { - return static_cast(m_N - m_M) * logOneMinusX(x); + return static_cast(m_N - m_M) * CTools::logOneMinusX(x); } double result; CLogIntegrand f(*m_Limits, *m_Corrections, m_N, m_M, m_I + 1u); @@ -750,7 +592,7 @@ bool CLogJointProbabilityOfLessLikelySamples::calculateLowerBound(double &result else if (E * x / m != 1.0) { double r = 1.0 - E * x / m; - b1 = -1.0 - 0.5 * logm + ::log(oneMinusPowOneMinusX(r, m + 1.0) / r); + b1 = -1.0 - 0.5 * logm + ::log(CTools::oneMinusPowOneMinusX(r, m + 1.0) / r); } else { @@ -775,7 +617,7 @@ bool CLogJointProbabilityOfLessLikelySamples::calculateLowerBound(double &result { double r = 1.0 - E * x / p; t = m + (m + 1.0) * logx - (m + 1.5) * logp - + ::log(oneMinusPowOneMinusX(r, p - m) / r); + + ::log(CTools::oneMinusPowOneMinusX(r, p - m) / r); } else { @@ -910,7 +752,7 @@ bool CLogJointProbabilityOfLessLikelySamples::calculateUpperBound(double &result else if (p != x) { double r = 1.0 - p / x; - b1 = ::log(oneMinusPowOneMinusX(r, p + 1.0) / r); + b1 = ::log(CTools::oneMinusPowOneMinusX(r, p + 1.0) / r); } else { @@ -989,7 +831,7 @@ bool CProbabilityOfExtremeSample::calculate(double &result) const result = 1.0; if (m_NumberSamples > 0) { - result = CTools::truncate(oneMinusPowOneMinusX(m_MinValue[0], m_NumberSamples), 0.0, 1.0); + result = CTools::truncate(CTools::oneMinusPowOneMinusX(m_MinValue[0], m_NumberSamples), 0.0, 1.0); } return true; } @@ -1116,7 +958,7 @@ bool CLogProbabilityOfMFromNExtremeSamples::calculate(double &result) { double index = static_cast(coeffs.size() - i); coeffs[i] /= index; - sum += coeffs[i] * powOneMinusX(p / 2.0, index); + sum += coeffs[i] * CTools::powOneMinusX(p / 2.0, index); } LOG_TRACE("sum = " << sum); @@ -1126,7 +968,7 @@ bool CLogProbabilityOfMFromNExtremeSamples::calculate(double &result) // that the following calculation can't use the re-normalized // "c" directly because it might be infinite. Instead, we make // use the fact that c * (1 - p)^(N - M + m) won't overflow. - double q = CTools::truncate(powOneMinusX(p, static_cast(N - M + m)), 0.0, 1.0); + double q = CTools::truncate(CTools::powOneMinusX(p, static_cast(N - M + m)), 0.0, 1.0); coeffs.push_back(-sum - q * ::exp(logc - logLargestCoeff)); LOG_TRACE("c(0) = " << coeffs.back()); @@ -1171,7 +1013,7 @@ bool CLogProbabilityOfMFromNExtremeSamples::calculate(double &result) double pM = m_MinValues[0]; LOG_TRACE("p(" << M << ") = " << pM); - double pMin = oneMinusPowOneMinusX(pM, static_cast(N)); + double pMin = CTools::oneMinusPowOneMinusX(pM, static_cast(N)); LOG_TRACE("1 - (1 - p(" << M << "))^" << N << " = " << pMin); if (M > 1) @@ -1191,7 +1033,7 @@ bool CLogProbabilityOfMFromNExtremeSamples::calculate(double &result) { double index = static_cast(coeffs.size() - i); double c = coeffs[i] / index; - double p = oneMinusPowOneMinusX(pM / 2.0, index); + double p = CTools::oneMinusPowOneMinusX(pM / 2.0, index); LOG_TRACE("term(" << index << ") = " << (c * p) << " (c(" << index << ") = " << c << ", 1 - (1 - p(M)/2)^" << index << " = " << p << ")"); diff --git a/lib/model/CInterimBucketCorrector.cc b/lib/model/CInterimBucketCorrector.cc index d8cc6936e0..c1097be60b 100644 --- a/lib/model/CInterimBucketCorrector.cc +++ b/lib/model/CInterimBucketCorrector.cc @@ -27,7 +27,7 @@ const std::size_t COMPONENT_SIZE(24); const std::string COUNT_TREND_TAG("a"); const std::string COUNT_MEAN_TAG("b"); -double meanDecayRate(core_t::TTime bucketLength) +double decayRate(core_t::TTime bucketLength) { return CAnomalyDetectorModelConfig::DEFAULT_DECAY_RATE * CAnomalyDetectorModelConfig::bucketNormalizationFactor(bucketLength); @@ -35,7 +35,7 @@ double meanDecayRate(core_t::TTime bucketLength) double trendDecayRate(core_t::TTime bucketLength) { - return CAnomalyDetectorModelConfig::trendDecayRate(meanDecayRate(bucketLength), bucketLength); + return CAnomalyDetectorModelConfig::trendDecayRate(decayRate(bucketLength), bucketLength); } } @@ -63,7 +63,7 @@ void CInterimBucketCorrector::update(core_t::TTime time, std::size_t bucketCount m_CountTrend.addPoint(bucketMidPoint, static_cast(bucketCount)); - double alpha = std::exp(-meanDecayRate(m_BucketLength)); + double alpha = std::exp(-decayRate(m_BucketLength)); m_CountMean.age(alpha); m_CountMean.add(bucketCount); } From 10d4912f439807457a09d0b796f82f75ab044579 Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Thu, 15 Mar 2018 10:25:12 +0000 Subject: [PATCH 05/29] [ML] Wire in change detection/modelling to our univariate time series model (#11) This wires in change detection and starts some unit testing of CTimeSeriesModel with change points. There is some more work to be done to avoid using level shifts to try and fit other types of change points, such as scaling. --- include/maths/CBasicStatistics.h | 10 + include/maths/CModel.h | 28 +- include/maths/CNaiveBayes.h | 64 +- include/maths/CRestoreParams.h | 52 +- include/maths/CTimeSeriesChangeDetector.h | 167 ++- include/maths/CTimeSeriesDecomposition.h | 44 +- .../maths/CTimeSeriesDecompositionDetail.h | 18 +- .../maths/CTimeSeriesDecompositionInterface.h | 26 +- include/maths/CTimeSeriesDecompositionStub.h | 18 +- include/maths/CTimeSeriesModel.h | 73 +- include/maths/CTools.h | 7 +- include/maths/CTrendComponent.h | 79 +- include/maths/Constants.h | 22 +- include/maths/MathsTypes.h | 10 + include/model/CAnomalyDetectorModelConfig.h | 52 +- include/model/CModelFactory.h | 5 - include/model/CModelParams.h | 19 +- lib/maths/CDecayRateController.cc | 2 +- lib/maths/CModel.cc | 67 +- lib/maths/CNaiveBayes.cc | 176 ++- lib/maths/CRestoreParams.cc | 27 +- lib/maths/CSeasonalComponent.cc | 2 +- .../CSeasonalComponentAdaptiveBucketing.cc | 2 +- lib/maths/CTimeSeriesChangeDetector.cc | 421 ++++--- lib/maths/CTimeSeriesDecomposition.cc | 113 +- lib/maths/CTimeSeriesDecompositionDetail.cc | 58 +- ...CTimeSeriesDecompositionStateSerialiser.cc | 5 +- lib/maths/CTimeSeriesDecompositionStub.cc | 18 +- lib/maths/CTimeSeriesModel.cc | 734 +++++++----- lib/maths/CTrendComponent.cc | 296 +++-- lib/maths/MathsTypes.cc | 208 ++-- lib/maths/ProbabilityAggregators.cc | 4 +- lib/maths/unittest/CForecastTest.cc | 4 +- lib/maths/unittest/CModelTest.cc | 8 +- .../CMultivariateMultimodalPriorTest.cc | 4 +- .../unittest/CTimeSeriesChangeDetectorTest.cc | 51 +- .../unittest/CTimeSeriesDecompositionTest.cc | 36 +- lib/maths/unittest/CTimeSeriesModelTest.cc | 1007 +++++++++-------- lib/maths/unittest/CTimeSeriesModelTest.h | 2 + lib/maths/unittest/CTrendComponentTest.cc | 13 +- lib/model/CAnomalyDetectorModel.cc | 8 +- lib/model/CAnomalyDetectorModelConfig.cc | 55 +- lib/model/CInterimBucketCorrector.cc | 25 +- lib/model/CModelFactory.cc | 9 +- lib/model/CModelParams.cc | 28 +- lib/model/unittest/CModelDetailsViewTest.cc | 5 +- lib/model/unittest/CModelToolsTest.cc | 5 +- .../CProbabilityAndInfluenceCalculatorTest.cc | 4 +- 48 files changed, 2535 insertions(+), 1556 deletions(-) diff --git a/include/maths/CBasicStatistics.h b/include/maths/CBasicStatistics.h index 092189b46e..bc1e814e07 100644 --- a/include/maths/CBasicStatistics.h +++ b/include/maths/CBasicStatistics.h @@ -221,6 +221,16 @@ class MATHS_EXPORT CBasicStatistics } } + //! Update the moments with the collection \p x. + template + void add(const core::CSmallVector &x) + { + for (const auto &xi : x) + { + this->add(xi); + } + } + //! Update the moments with the collection \p x. template void add(const std::vector> &x) diff --git a/include/maths/CModel.h b/include/maths/CModel.h index cf49a9ee6e..9c9a4186d1 100644 --- a/include/maths/CModel.h +++ b/include/maths/CModel.h @@ -49,9 +49,11 @@ class MATHS_EXPORT CModelParams { public: CModelParams(core_t::TTime bucketLength, - const double &learnRate, - const double &decayRate, - double minimumSeasonalVarianceScale); + double learnRate, + double decayRate, + double minimumSeasonalVarianceScale, + core_t::TTime minimumTimeToDetectChange, + core_t::TTime maximumTimeToTestForChange); //! Get the bucket length. core_t::TTime bucketLength(void) const; @@ -68,6 +70,15 @@ class MATHS_EXPORT CModelParams //! Get the minimum seasonal variance scale. double minimumSeasonalVarianceScale(void) const; + //! Check if we should start testing for a change point in the model. + bool testForChange(core_t::TTime changeInterval) const; + + //! Get the minimum time to detect a change point in the model. + core_t::TTime minimumTimeToDetectChange(core_t::TTime timeSinceLastChangePoint) const; + + //! Get the maximum time to test for a change point in the model. + core_t::TTime maximumTimeToTestForChange(void) const; + //! Set the probability that the bucket will be empty for the model. void probabilityBucketEmpty(double probability); @@ -83,6 +94,10 @@ class MATHS_EXPORT CModelParams double m_DecayRate; //! The minimum seasonal variance scale. double m_MinimumSeasonalVarianceScale; + //! The minimum time permitted to detect a change in the model. + core_t::TTime m_MinimumTimeToDetectChange; + //! The maximum time permitted to test for a change in the model. + core_t::TTime m_MaximumTimeToTestForChange; //! The probability that a bucket will be empty for the model. double m_ProbabilityBucketEmpty; }; @@ -97,8 +112,6 @@ class MATHS_EXPORT CModelAddSamplesParams public: CModelAddSamplesParams(void); - CModelAddSamplesParams(const CModelAddSamplesParams &) = delete; - const CModelAddSamplesParams &operator=(const CModelAddSamplesParams &) = delete; //! Set whether or not the data are integer valued. CModelAddSamplesParams &integer(bool integer); @@ -160,8 +173,6 @@ class MATHS_EXPORT CModelProbabilityParams public: CModelProbabilityParams(void); - CModelProbabilityParams(const CModelAddSamplesParams &) = delete; - const CModelProbabilityParams &operator=(const CModelAddSamplesParams &) = delete; //! Set the tag for the entity for which to compute the probability. CModelProbabilityParams &tag(std::size_t tag); @@ -278,6 +289,9 @@ class MATHS_EXPORT CModel E_Reset //!< Model reset. }; + //! Combine the results \p lhs and \p rhs. + static EUpdateResult combine(EUpdateResult lhs, EUpdateResult rhs); + public: CModel(const CModelParams ¶ms); virtual ~CModel(void) = default; diff --git a/include/maths/CNaiveBayes.h b/include/maths/CNaiveBayes.h index 2f1997cc2c..7c20eb6194 100644 --- a/include/maths/CNaiveBayes.h +++ b/include/maths/CNaiveBayes.h @@ -11,9 +11,11 @@ #include +#include #include #include +#include #include namespace ml @@ -49,12 +51,18 @@ class MATHS_EXPORT CNaiveBayesFeatureDensity //! Persist state by passing information to \p inserter. virtual void acceptPersistInserter(core::CStatePersistInserter &inserter) const = 0; + //! Set the data type. + virtual void dataType(maths_t::EDataType dataType) = 0; + //! Add the value \p x. virtual void add(const TDouble1Vec &x) = 0; //! Compute the log value of the density function at \p x. virtual double logValue(const TDouble1Vec &x) const = 0; + //! Compute the density at the mode. + virtual double logMaximumValue() const = 0; + //! Age out old values density to account for \p time passing. virtual void propagateForwardsByTime(double time) = 0; @@ -69,6 +77,9 @@ class MATHS_EXPORT CNaiveBayesFeatureDensity //! Get a checksum for this object. virtual uint64_t checksum(uint64_t seed) const = 0; + + //! Get a human readable description of the class density function. + virtual std::string print() const = 0; }; //! \brief An implementation of the class conditional density function @@ -77,7 +88,7 @@ class MATHS_EXPORT CNaiveBayesFeatureDensityFromPrior final : public CNaiveBayes { public: CNaiveBayesFeatureDensityFromPrior() = default; - CNaiveBayesFeatureDensityFromPrior(CPrior &prior); + CNaiveBayesFeatureDensityFromPrior(const CPrior &prior); //! Create and return a clone. //! @@ -97,6 +108,12 @@ class MATHS_EXPORT CNaiveBayesFeatureDensityFromPrior final : public CNaiveBayes //! Compute the log value of the density function at \p x. virtual double logValue(const TDouble1Vec &x) const; + //! Compute the density at the mode. + virtual double logMaximumValue() const; + + //! Set the data type. + virtual void dataType(maths_t::EDataType dataType); + //! Age out old values density to account for \p time passing. virtual void propagateForwardsByTime(double time); @@ -112,6 +129,9 @@ class MATHS_EXPORT CNaiveBayesFeatureDensityFromPrior final : public CNaiveBayes //! Get a checksum for this object. virtual uint64_t checksum(uint64_t seed) const; + //! Get a human readable description of the class density function. + virtual std::string print() const; + private: using TPriorPtr = boost::shared_ptr; @@ -128,16 +148,24 @@ class MATHS_EXPORT CNaiveBayes using TDoubleSizePrVec = std::vector; using TDouble1Vec = core::CSmallVector; using TDouble1VecVec = std::vector; + using TOptionalDouble = boost::optional; public: explicit CNaiveBayes(const CNaiveBayesFeatureDensity &exemplar, - double decayRate = 0.0); + double decayRate = 0.0, + TOptionalDouble minMaxLogLikelihoodToUseFeature = TOptionalDouble()); CNaiveBayes(const SDistributionRestoreParams ¶ms, core::CStateRestoreTraverser &traverser); //! Persist state by passing information to \p inserter. void acceptPersistInserter(core::CStatePersistInserter &inserter) const; + //! Efficiently swap the contents of this and \p other. + void swap(CNaiveBayes &other); + + //! Check if any training data has been added initialized. + bool initialized() const; + //! This can be used to optionally seed the class counts //! with \p counts. These are added on to data class counts //! to compute the class posterior probabilities. @@ -153,11 +181,14 @@ class MATHS_EXPORT CNaiveBayes //! for that feature. void addTrainingDataPoint(std::size_t label, const TDouble1VecVec &x); + //! Set the data type. + void dataType(maths_t::EDataType dataType); + //! Age out old values from the class conditional densities //! to account for \p time passing. void propagateForwardsByTime(double time); - //! Get the top \p n class probabilities for \p features. + //! Get the top \p n class probabilities for \p x. //! //! \param[in] n The number of class probabilities to estimate. //! \param[in] x The feature values. @@ -167,6 +198,23 @@ class MATHS_EXPORT CNaiveBayes TDoubleSizePrVec highestClassProbabilities(std::size_t n, const TDouble1VecVec &x) const; + //! Get the probability of the class labeled \p label for \p x. + //! + //! \param[in] label The label of the class of interest. + //! \param[in] x The feature values. + //! \note \p x size should be equal to the number of features. + //! A feature is missing is indicated by passing an empty vector + //! for that feature. + double classProbability(std::size_t label, const TDouble1VecVec &x) const; + + //! Get the probabilities of all the classes for \p x. + //! + //! \param[in] x The feature values. + //! \note \p x size should be equal to the number of features. + //! A feature is missing is indicated by passing an empty vector + //! for that feature. + TDoubleSizePrVec classProbabilities(const TDouble1VecVec &x) const; + //! Debug the memory used by this object. void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; @@ -176,6 +224,9 @@ class MATHS_EXPORT CNaiveBayes //! Get a checksum for this object. uint64_t checksum(uint64_t seed = 0) const; + //! Get a human readable description of the classifier. + std::string print() const; + private: using TFeatureDensityPtr = boost::shared_ptr; using TFeatureDensityPtrVec = std::vector; @@ -212,6 +263,13 @@ class MATHS_EXPORT CNaiveBayes bool validate(const TDouble1VecVec &x) const; private: + //! It is not always appropriate to use features with very low + //! probability in all classes to discriminate: the class choice + //! will be very sensitive to the underlying conditional density + //! model. This is a cutoff (for the minimum maximum class log + //! likelihood) in order to use a feature. + TOptionalDouble m_MinMaxLogLikelihoodToUseFeature; + //! Controls the rate at which data are aged out. double m_DecayRate; diff --git a/include/maths/CRestoreParams.h b/include/maths/CRestoreParams.h index 1f5abea443..8f8da3c697 100644 --- a/include/maths/CRestoreParams.h +++ b/include/maths/CRestoreParams.h @@ -9,6 +9,7 @@ #include +#include #include #include @@ -20,33 +21,15 @@ namespace maths { class CModelParams; -//! \brief Gatherers up extra parameters supplied when restoring -//! time series decompositions. -struct MATHS_EXPORT STimeSeriesDecompositionRestoreParams -{ - STimeSeriesDecompositionRestoreParams(double decayRate, - core_t::TTime minimumBucketLength, - std::size_t componentSize); - - //! The rate at which decomposition loses information. - double s_DecayRate; - - //! The data bucket length. - core_t::TTime s_MinimumBucketLength; - - //! The decomposition seasonal component size. - std::size_t s_ComponentSize; -}; - //! \brief Gatherers up extra parameters supplied when restoring //! distribution models. struct MATHS_EXPORT SDistributionRestoreParams { SDistributionRestoreParams(maths_t::EDataType dataType, double decayRate, - double minimumClusterFraction, - double minimumClusterCount, - double minimumCategoryCount); + double minimumClusterFraction = MINIMUM_CLUSTER_SPLIT_FRACTION, + double minimumClusterCount = MINIMUM_CLUSTER_SPLIT_COUNT, + double minimumCategoryCount = MINIMUM_CATEGORY_COUNT); //! The type of data being clustered. maths_t::EDataType s_DataType; @@ -66,6 +49,31 @@ struct MATHS_EXPORT SDistributionRestoreParams //! \brief Gatherers up extra parameters supplied when restoring //! time series decompositions. +struct MATHS_EXPORT STimeSeriesDecompositionRestoreParams +{ + STimeSeriesDecompositionRestoreParams(double decayRate, + core_t::TTime minimumBucketLength, + std::size_t componentSize, + const SDistributionRestoreParams &changeModelParams); + STimeSeriesDecompositionRestoreParams(double decayRate, + core_t::TTime minimumBucketLength, + const SDistributionRestoreParams &changeModelParams); + + //! The rate at which decomposition loses information. + double s_DecayRate; + + //! The data bucket length. + core_t::TTime s_MinimumBucketLength; + + //! The decomposition seasonal component size. + std::size_t s_ComponentSize; + + //! The change model distributions' restore parameters. + SDistributionRestoreParams s_ChangeModelParams; +}; + +//! \brief Gatherers up extra parameters supplied when restoring +//! time series models. struct MATHS_EXPORT SModelRestoreParams { using TModelParamsCRef = boost::reference_wrapper; @@ -80,7 +88,7 @@ struct MATHS_EXPORT SModelRestoreParams //! The time series decomposition restore parameters. STimeSeriesDecompositionRestoreParams s_DecompositionParams; - //! The time series decomposition restore parameters. + //! The time series residual distribution restore parameters. SDistributionRestoreParams s_DistributionParams; }; diff --git a/include/maths/CTimeSeriesChangeDetector.h b/include/maths/CTimeSeriesChangeDetector.h index 68471affb2..55659e361d 100644 --- a/include/maths/CTimeSeriesChangeDetector.h +++ b/include/maths/CTimeSeriesChangeDetector.h @@ -7,8 +7,9 @@ #ifndef INCLUDED_ml_maths_CTimeSeriesChangeDetector_h #define INCLUDED_ml_maths_CTimeSeriesChangeDetector_h -#include #include +#include +#include #include #include @@ -32,16 +33,18 @@ class CModelAddSamplesParams; class CPrior; class CTimeSeriesDecompositionInterface; struct SDistributionRestoreParams; +struct SModelRestoreParams; namespace time_series_change_detector_detail { -class CUnivariateTimeSeriesChangeModel; +class CUnivariateChangeModel; } //! \brief A description of a time series change. struct MATHS_EXPORT SChangeDescription { using TDouble2Vec = core::CSmallVector; + using TPriorPtr = boost::shared_ptr; //! The types of change we can detect. enum EDescription @@ -50,13 +53,21 @@ struct MATHS_EXPORT SChangeDescription E_TimeShift }; - SChangeDescription(EDescription decription, double value); + SChangeDescription(EDescription decription, + double value, + const TPriorPtr &residualModel); + + //! Get a description of this change. + std::string print() const; //! The type of change. EDescription s_Description; //! The change value. TDouble2Vec s_Value; + + //! The residual model to use after the change. + TPriorPtr s_ResidualModel; }; //! \brief Tests a variety of possible changes which might have @@ -65,23 +76,24 @@ struct MATHS_EXPORT SChangeDescription class MATHS_EXPORT CUnivariateTimeSeriesChangeDetector { public: - using TTimeDoublePr = std::pair; - using TTimeDoublePr1Vec = core::CSmallVector; using TDouble4Vec = core::CSmallVector; using TDouble4Vec1Vec = core::CSmallVector; + using TTimeDoublePr = std::pair; + using TTimeDoublePr1Vec = core::CSmallVector; using TWeightStyleVec = maths_t::TWeightStyleVec; - using TOptionalChangeDescription = boost::optional; + using TDecompositionPtr = boost::shared_ptr; using TPriorPtr = boost::shared_ptr; + using TOptionalChangeDescription = boost::optional; public: - CUnivariateTimeSeriesChangeDetector(const CTimeSeriesDecompositionInterface &trendModel, + CUnivariateTimeSeriesChangeDetector(const TDecompositionPtr &trendModel, const TPriorPtr &residualModel, - core_t::TTime minimumTimeToDetect, - core_t::TTime maximumTimeToDetect, - double minimumDeltaBicToDetect); + core_t::TTime minimumTimeToDetect = 6 * core::constants::HOUR, + core_t::TTime maximumTimeToDetect = core::constants::DAY, + double minimumDeltaBicToDetect = 12.0); //! Initialize by reading state from \p traverser. - bool acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, + bool acceptRestoreTraverser(const SModelRestoreParams ¶ms, core::CStateRestoreTraverser &traverser); //! Persist state by passing information to \p inserter. @@ -92,11 +104,9 @@ class MATHS_EXPORT CUnivariateTimeSeriesChangeDetector TOptionalChangeDescription change(); //! Add \p samples to the change detector. - void addSamples(maths_t::EDataType dataType, - const TWeightStyleVec &weightStyles, + void addSamples(const TWeightStyleVec &weightStyles, const TTimeDoublePr1Vec &samples, - const TDouble4Vec1Vec &weights, - double propagationInterval = 1.0); + const TDouble4Vec1Vec &weights); //! Check if we should stop testing. bool stopTesting() const; @@ -111,9 +121,8 @@ class MATHS_EXPORT CUnivariateTimeSeriesChangeDetector uint64_t checksum(uint64_t seed = 0) const; private: - using TUnivariateTimeSeriesChangeModel = - time_series_change_detector_detail::CUnivariateTimeSeriesChangeModel; - using TChangeModelPtr = boost::shared_ptr; + using TChangeModel = time_series_change_detector_detail::CUnivariateChangeModel; + using TChangeModelPtr = boost::shared_ptr; using TChangeModelPtr4Vec = core::CSmallVector; using TMinMaxAccumulator = CBasicStatistics::CMinMax; @@ -146,23 +155,25 @@ namespace time_series_change_detector_detail //! \brief Helper interface for change detection. Implementations of //! this are used to model specific types of changes which can occur. -class MATHS_EXPORT CUnivariateTimeSeriesChangeModel : private core::CNonCopyable +class MATHS_EXPORT CUnivariateChangeModel : private core::CNonCopyable { public: - using TTimeDoublePr = std::pair; - using TTimeDoublePr1Vec = core::CSmallVector; using TDouble4Vec = core::CSmallVector; using TDouble4Vec1Vec = core::CSmallVector; + using TTimeDoublePr = std::pair; + using TTimeDoublePr1Vec = core::CSmallVector; using TWeightStyleVec = maths_t::TWeightStyleVec; + using TDecompositionPtr = boost::shared_ptr; using TPriorPtr = boost::shared_ptr; using TOptionalChangeDescription = boost::optional; public: - CUnivariateTimeSeriesChangeModel(const CTimeSeriesDecompositionInterface &trendModel); - virtual ~CUnivariateTimeSeriesChangeModel() = default; + CUnivariateChangeModel(const TDecompositionPtr &trendModel, + const TPriorPtr &residualModel); + virtual ~CUnivariateChangeModel() = default; //! Initialize by reading state from \p traverser. - virtual bool acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, + virtual bool acceptRestoreTraverser(const SModelRestoreParams ¶ms, core::CStateRestoreTraverser &traverser) = 0; //! Persist state by passing information to \p inserter. @@ -176,25 +187,31 @@ class MATHS_EXPORT CUnivariateTimeSeriesChangeModel : private core::CNonCopyable //! Update the change model with \p samples. virtual void addSamples(std::size_t count, - maths_t::EDataType dataType, const TWeightStyleVec &weightStyles, const TTimeDoublePr1Vec &samples, - const TDouble4Vec1Vec &weights, - double propagationInterval = 1.0) = 0; + const TDouble4Vec1Vec &weights) = 0; //! Debug the memory used by this object. - virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const = 0; + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + + //! Get the memory used by this object. + std::size_t memoryUsage() const; //! Get the static size of this object. virtual std::size_t staticSize() const = 0; - //! Get the memory used by this object. - virtual std::size_t memoryUsage() const = 0; - //! Get a checksum for this object. virtual uint64_t checksum(uint64_t seed) const = 0; protected: + //! The sample count to initialize a change model. + static const std::size_t COUNT_TO_INITIALIZE{5u}; + + protected: + //! Restore the residual model reading state from \p traverser. + bool restoreResidualModel(const SDistributionRestoreParams ¶ms, + core::CStateRestoreTraverser &traverser); + //! Get the log-likelihood. double logLikelihood() const; @@ -203,24 +220,36 @@ class MATHS_EXPORT CUnivariateTimeSeriesChangeModel : private core::CNonCopyable //! Get the time series trend model. const CTimeSeriesDecompositionInterface &trendModel() const; + //! Get the time series trend model. + CTimeSeriesDecompositionInterface &trendModel(); + + //! Get the time series residual model. + const CPrior &residualModel() const; + //! Get the time series residual model. + CPrior &residualModel(); + //! Get the time series residual model member variable. + TPriorPtr residualModelPtr() const; private: //! The likelihood of the data under this model. double m_LogLikelihood; //! A model decomposing the time series trend. - const CTimeSeriesDecompositionInterface &m_TrendModel; + TDecompositionPtr m_TrendModel; + + //! A reference to the underlying prior. + TPriorPtr m_ResidualModel; }; //! \brief Used to capture the likelihood of the data given no change. -class MATHS_EXPORT CUnivariateNoChangeModel final : public CUnivariateTimeSeriesChangeModel +class MATHS_EXPORT CUnivariateNoChangeModel final : public CUnivariateChangeModel { public: - CUnivariateNoChangeModel(const CTimeSeriesDecompositionInterface &trendModel, + CUnivariateNoChangeModel(const TDecompositionPtr &trendModel, const TPriorPtr &residualModel); //! Initialize by reading state from \p traverser. - virtual bool acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, + virtual bool acceptRestoreTraverser(const SModelRestoreParams ¶ms, core::CStateRestoreTraverser &traverser); //! Persist state by passing information to \p inserter. @@ -234,39 +263,27 @@ class MATHS_EXPORT CUnivariateNoChangeModel final : public CUnivariateTimeSeries //! Get the log likelihood of \p samples. virtual void addSamples(std::size_t count, - maths_t::EDataType dataType, const TWeightStyleVec &weightStyles, const TTimeDoublePr1Vec &samples, - const TDouble4Vec1Vec &weights, - double propagationInterval = 1.0); - - //! Debug the memory used by this object. - virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + const TDouble4Vec1Vec &weights); //! Get the static size of this object. virtual std::size_t staticSize() const; - //! Get the memory used by this object. - virtual std::size_t memoryUsage() const; - //! Get a checksum for this object. virtual uint64_t checksum(uint64_t seed) const; - - private: - //! A reference to the underlying prior. - TPriorPtr m_ResidualModel; }; //! \brief Captures the likelihood of the data given an arbitrary //! level shift. -class MATHS_EXPORT CUnivariateTimeSeriesLevelShiftModel final : public CUnivariateTimeSeriesChangeModel +class MATHS_EXPORT CUnivariateLevelShiftModel final : public CUnivariateChangeModel { public: - CUnivariateTimeSeriesLevelShiftModel(const CTimeSeriesDecompositionInterface &trendModel, - const TPriorPtr &residualModel); + CUnivariateLevelShiftModel(const TDecompositionPtr &trendModel, + const TPriorPtr &residualModel); //! Initialize by reading state from \p traverser. - virtual bool acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, + virtual bool acceptRestoreTraverser(const SModelRestoreParams ¶ms, core::CStateRestoreTraverser &traverser); //! Persist state by passing information to \p inserter. @@ -280,21 +297,13 @@ class MATHS_EXPORT CUnivariateTimeSeriesLevelShiftModel final : public CUnivaria //! Update with \p samples. virtual void addSamples(std::size_t count, - maths_t::EDataType dataType, const TWeightStyleVec &weightStyles, const TTimeDoublePr1Vec &samples, - const TDouble4Vec1Vec &weights, - double propagationInterval = 1.0); - - //! Debug the memory used by this object. - virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + const TDouble4Vec1Vec &weights); //! Get the static size of this object. virtual std::size_t staticSize() const; - //! Get the memory used by this object. - virtual std::size_t memoryUsage() const; - //! Get a checksum for this object. virtual uint64_t checksum(uint64_t seed) const; @@ -306,28 +315,24 @@ class MATHS_EXPORT CUnivariateTimeSeriesLevelShiftModel final : public CUnivaria //! The optimal shift. TMeanAccumulator m_Shift; - //! Get the number of samples. - double m_SampleCount; - - //! The prior for the time series' residual model subject - //! to the shift. - TPriorPtr m_ResidualModel; - - //! The initial residual model mode. + //! The mode of the initial residual distribution model. double m_ResidualModelMode; + + //! The number of samples added so far. + double m_SampleCount; }; //! \brief Captures the likelihood of the data given a specified //! time shift. -class MATHS_EXPORT CUnivariateTimeSeriesTimeShiftModel final : public CUnivariateTimeSeriesChangeModel +class MATHS_EXPORT CUnivariateTimeShiftModel final : public CUnivariateChangeModel { public: - CUnivariateTimeSeriesTimeShiftModel(const CTimeSeriesDecompositionInterface &trendModel, - const TPriorPtr &residualModel, - core_t::TTime shift); + CUnivariateTimeShiftModel(const TDecompositionPtr &trendModel, + const TPriorPtr &residualModel, + core_t::TTime shift); //! Initialize by reading state from \p traverser. - virtual bool acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, + virtual bool acceptRestoreTraverser(const SModelRestoreParams ¶ms, core::CStateRestoreTraverser &traverser); //! Persist state by passing information to \p inserter. @@ -341,31 +346,19 @@ class MATHS_EXPORT CUnivariateTimeSeriesTimeShiftModel final : public CUnivariat //! Update with \p samples. virtual void addSamples(std::size_t count, - maths_t::EDataType dataType, const TWeightStyleVec &weightStyles, const TTimeDoublePr1Vec &samples, - const TDouble4Vec1Vec &weights, - double propagationInterval = 1.0); - - //! Debug the memory used by this object. - virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + const TDouble4Vec1Vec &weights); //! Get the static size of this object. virtual std::size_t staticSize() const; - //! Get the memory used by this object. - virtual std::size_t memoryUsage() const; - //! Get a checksum for this object. virtual uint64_t checksum(uint64_t seed) const; private: //! The shift in time of the time series trend model. core_t::TTime m_Shift; - - //! The prior for the time series' residual model subject - //! to the shift. - TPriorPtr m_ResidualModel; }; } diff --git a/include/maths/CTimeSeriesDecomposition.h b/include/maths/CTimeSeriesDecomposition.h index 19b5944d62..022b981e96 100644 --- a/include/maths/CTimeSeriesDecomposition.h +++ b/include/maths/CTimeSeriesDecomposition.h @@ -7,6 +7,7 @@ #ifndef INCLUDED_ml_maths_CTimeSeriesDecomposition_h #define INCLUDED_ml_maths_CTimeSeriesDecomposition_h +#include #include #include #include @@ -25,6 +26,7 @@ class CStateRestoreTraverser; namespace maths { class CPrior; +struct STimeSeriesDecompositionRestoreParams; //! \brief Decomposes a time series into a linear combination //! of periodic functions and a stationary random process. @@ -55,10 +57,6 @@ class MATHS_EXPORT CTimeSeriesDecomposition : public CTimeSeriesDecompositionInt public: using TSizeVec = std::vector; - public: - //! The default size to use for the seasonal components. - static const std::size_t DEFAULT_COMPONENT_SIZE; - public: //! \param[in] decayRate The rate at which information is lost. //! \param[in] bucketLength The data bucketing length. @@ -66,16 +64,14 @@ class MATHS_EXPORT CTimeSeriesDecomposition : public CTimeSeriesDecompositionInt //! use estimate a seasonal component. explicit CTimeSeriesDecomposition(double decayRate = 0.0, core_t::TTime bucketLength = 0, - std::size_t seasonalComponentSize = DEFAULT_COMPONENT_SIZE); + std::size_t seasonalComponentSize = DECOMPOSITION_COMPONENT_SIZE); //! Construct from part of a state document. - CTimeSeriesDecomposition(double decayRate, - core_t::TTime bucketLength, - std::size_t seasonalComponentSize, + CTimeSeriesDecomposition(const STimeSeriesDecompositionRestoreParams ¶ms, core::CStateRestoreTraverser &traverser); //! Deep copy. - CTimeSeriesDecomposition(const CTimeSeriesDecomposition &other); + CTimeSeriesDecomposition(const CTimeSeriesDecomposition &other, bool isForForecast = false); //! An efficient swap of the state of this and \p other. void swap(CTimeSeriesDecomposition &other); @@ -87,7 +83,10 @@ class MATHS_EXPORT CTimeSeriesDecomposition : public CTimeSeriesDecompositionInt void acceptPersistInserter(core::CStatePersistInserter &inserter) const; //! Clone this decomposition. - virtual CTimeSeriesDecomposition *clone(void) const; + virtual CTimeSeriesDecomposition *clone(bool isForForecast = false) const; + + //! Set the data type. + virtual void dataType(maths_t::EDataType dataType); //! Set the decay rate. virtual void decayRate(double decayRate); @@ -115,6 +114,15 @@ class MATHS_EXPORT CTimeSeriesDecomposition : public CTimeSeriesDecompositionInt const maths_t::TWeightStyleVec &weightStyles = TWeights::COUNT, const maths_t::TDouble4Vec &weights = TWeights::UNIT); + //! Apply \p change at \p time. + //! + //! \param[in] time The time of the change point. + //! \param[in] value The value immediately before the change + //! point. + //! \param[in] change A description of the change to apply. + virtual void applyChange(core_t::TTime time, double value, + const SChangeDescription &change); + //! Propagate the decomposition forwards to \p time. void propagateForwardsTo(core_t::TTime time); @@ -139,18 +147,20 @@ class MATHS_EXPORT CTimeSeriesDecomposition : public CTimeSeriesDecompositionInt //! \param[in] step The time increment. //! \param[in] confidence The forecast confidence interval. //! \param[in] minimumScale The minimum permitted seasonal scale. - //! \param[in] result Filled in with the forecast lower bound, prediction - //! and upper bound. + //! \param[in] writer Forecast results are passed to this callback. virtual void forecast(core_t::TTime startTime, core_t::TTime endTime, core_t::TTime step, double confidence, double minimumScale, - TDouble3VecVec &result); + const TWriteForecastResult &writer); //! Detrend \p value from the time series being modeled by removing //! any trend and periodic component at \p time. - virtual double detrend(core_t::TTime time, double value, double confidence) const; + virtual double detrend(core_t::TTime time, + double value, + double confidence, + int components = E_All) const; //! Get the mean variance of the baseline. virtual double meanVariance(void) const; @@ -196,7 +206,8 @@ class MATHS_EXPORT CTimeSeriesDecomposition : public CTimeSeriesDecompositionInt void initializeMediator(void); //! Create from part of a state document. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); + bool acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, + core::CStateRestoreTraverser &traverser); //! The correction to produce a smooth join between periodic //! repeats and partitions. @@ -219,6 +230,9 @@ class MATHS_EXPORT CTimeSeriesDecomposition : public CTimeSeriesDecompositionInt static const core_t::TTime SMOOTHING_INTERVAL; private: + //! Any time shift to supplied times. + core_t::TTime m_TimeShift; + //! The time of the latest value added. core_t::TTime m_LastValueTime; diff --git a/include/maths/CTimeSeriesDecompositionDetail.h b/include/maths/CTimeSeriesDecompositionDetail.h index d60a4a2886..401d39e233 100644 --- a/include/maths/CTimeSeriesDecompositionDetail.h +++ b/include/maths/CTimeSeriesDecompositionDetail.h @@ -198,7 +198,7 @@ class MATHS_EXPORT CTimeSeriesDecompositionDetail { public: CPeriodicityTest(double decayRate, core_t::TTime bucketLength); - CPeriodicityTest(const CPeriodicityTest &other); + CPeriodicityTest(const CPeriodicityTest &other, bool isForForecast = false); //! Initialize by reading state from \p traverser. bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); @@ -280,7 +280,7 @@ class MATHS_EXPORT CTimeSeriesDecompositionDetail { public: CCalendarTest(double decayRate, core_t::TTime bucketLength); - CCalendarTest(const CCalendarTest &other); + CCalendarTest(const CCalendarTest &other, bool isForForecast = false); //! Initialize by reading state from \p traverser. bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); @@ -371,7 +371,8 @@ class MATHS_EXPORT CTimeSeriesDecompositionDetail }; //! Initialize by reading state from \p traverser. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); + bool acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, + core::CStateRestoreTraverser &traverser); //! Persist state by passing information to \p inserter. void acceptPersistInserter(core::CStatePersistInserter &inserter) const; @@ -388,8 +389,17 @@ class MATHS_EXPORT CTimeSeriesDecompositionDetail //! Create a new calendar component. virtual void handle(const SDetectedCalendar &message); + //! Apply \p change at \p time. + void shiftLevel(core_t::TTime time, double value, double shift); + //! Maybe re-interpolate the components. - void interpolate(const SMessage &message, bool refine = true); + void interpolate(const SMessage &message); + + //! Maybe re-interpolate the components. + void interpolateForForecast(core_t::TTime time); + + //! Set the data type. + void dataType(maths_t::EDataType dataType); //! Set the decay rate. void decayRate(double decayRate); diff --git a/include/maths/CTimeSeriesDecompositionInterface.h b/include/maths/CTimeSeriesDecompositionInterface.h index 988bcd4cad..5caf75d2d3 100644 --- a/include/maths/CTimeSeriesDecompositionInterface.h +++ b/include/maths/CTimeSeriesDecompositionInterface.h @@ -30,6 +30,7 @@ namespace maths class CMultivariatePrior; class CPrior; class CSeasonalComponent; +struct SChangeDescription; //! \brief The interface for decomposing times series into periodic, //! calendar periodic and trend components. @@ -40,6 +41,7 @@ class MATHS_EXPORT CTimeSeriesDecompositionInterface using TDouble3VecVec = std::vector; using TDoubleAry = boost::array; using TWeights = CConstantWeights; + using TWriteForecastResult = std::function; //! The components of the decomposition. enum EComponents @@ -59,7 +61,10 @@ class MATHS_EXPORT CTimeSeriesDecompositionInterface virtual ~CTimeSeriesDecompositionInterface(void) = default; //! Clone this decomposition. - virtual CTimeSeriesDecompositionInterface *clone(void) const = 0; + virtual CTimeSeriesDecompositionInterface *clone(bool isForForecast = false) const = 0; + + //! Set the data type. + virtual void dataType(maths_t::EDataType dataType) = 0; //! Set the decay rate. virtual void decayRate(double decayRate) = 0; @@ -87,6 +92,15 @@ class MATHS_EXPORT CTimeSeriesDecompositionInterface const maths_t::TWeightStyleVec &weightStyles = TWeights::COUNT, const maths_t::TDouble4Vec &weights = TWeights::UNIT) = 0; + //! Apply \p change at \p time. + //! + //! \param[in] time The time of the change point. + //! \param[in] value The value immediately before the change + //! point. + //! \param[in] change A description of the change to apply. + virtual void applyChange(core_t::TTime time, double value, + const SChangeDescription &change) = 0; + //! Propagate the decomposition forwards to \p time. virtual void propagateForwardsTo(core_t::TTime time) = 0; @@ -111,20 +125,22 @@ class MATHS_EXPORT CTimeSeriesDecompositionInterface //! \param[in] step The time increment. //! \param[in] confidence The forecast confidence interval. //! \param[in] minimumScale The minimum permitted seasonal scale. - //! \param[in] result Filled in with the forecast lower bound, prediction - //! and upper bound. + //! \param[in] writer Forecast results are passed to this callback. virtual void forecast(core_t::TTime startTime, core_t::TTime endTime, core_t::TTime step, double confidence, double minimumScale, - TDouble3VecVec &result) = 0; + const TWriteForecastResult &writer) = 0; //! Detrend \p value from the time series being modeled by removing //! any periodic component at \p time. //! //! \note That detrending preserves the time series mean. - virtual double detrend(core_t::TTime time, double value, double confidence) const = 0; + virtual double detrend(core_t::TTime time, + double value, + double confidence, + int components = E_All) const = 0; //! Get the mean variance of the baseline. virtual double meanVariance(void) const = 0; diff --git a/include/maths/CTimeSeriesDecompositionStub.h b/include/maths/CTimeSeriesDecompositionStub.h index 04e89b55db..42502734e1 100644 --- a/include/maths/CTimeSeriesDecompositionStub.h +++ b/include/maths/CTimeSeriesDecompositionStub.h @@ -26,7 +26,10 @@ class MATHS_EXPORT CTimeSeriesDecompositionStub : public CTimeSeriesDecompositio { public: //! Clone this decomposition. - virtual CTimeSeriesDecompositionStub *clone(void) const; + virtual CTimeSeriesDecompositionStub *clone(bool isForForecast = false) const; + + //! No-op. + virtual void dataType(maths_t::EDataType dataType); //! No-op. virtual void decayRate(double decayRate); @@ -43,6 +46,10 @@ class MATHS_EXPORT CTimeSeriesDecompositionStub : public CTimeSeriesDecompositio const maths_t::TWeightStyleVec &weightStyles = TWeights::COUNT, const maths_t::TDouble4Vec &weights = TWeights::UNIT); + //! No-op. + virtual void applyChange(core_t::TTime time, double value, + const SChangeDescription &change); + //! No-op. virtual void propagateForwardsTo(core_t::TTime time); @@ -55,16 +62,19 @@ class MATHS_EXPORT CTimeSeriesDecompositionStub : public CTimeSeriesDecompositio int components = E_All, bool smooth = true) const; - //! Clears \p result. + //! No-op. virtual void forecast(core_t::TTime startTime, core_t::TTime endTime, core_t::TTime step, double confidence, double minimumScale, - TDouble3VecVec &result); + const TWriteForecastResult &writer); //! Returns \p value. - virtual double detrend(core_t::TTime time, double value, double confidence) const; + virtual double detrend(core_t::TTime time, + double value, + double confidence, + int components = E_All) const; //! Returns 0.0. virtual double meanVariance(void) const; diff --git a/include/maths/CTimeSeriesModel.h b/include/maths/CTimeSeriesModel.h index 239e330fb8..919b897d85 100644 --- a/include/maths/CTimeSeriesModel.h +++ b/include/maths/CTimeSeriesModel.h @@ -27,6 +27,8 @@ class CMultivariatePrior; class CPrior; class CTimeSeriesDecompositionInterface; class CTimeSeriesAnomalyModel; +class CUnivariateTimeSeriesChangeDetector; +struct SChangeDescription; struct SDistributionRestoreParams; struct SModelRestoreParams; @@ -34,8 +36,10 @@ struct SModelRestoreParams; class MATHS_EXPORT CUnivariateTimeSeriesModel : public CModel { public: + using TDouble4Vec = core::CSmallVector; using TTimeDoublePr = std::pair; using TTimeDoublePrCBuf = boost::circular_buffer; + using TDecompositionPtr = boost::shared_ptr; using TDecayRateController2Ary = boost::array; public: @@ -168,6 +172,19 @@ class MATHS_EXPORT CUnivariateTimeSeriesModel : public CModel //! Get the type of data being modeled. virtual maths_t::EDataType dataType(void) const; + //! \name Helpers + //@{ + //! Unpack the weights in \p weights. + static TDouble4Vec unpack(const TDouble2Vec4Vec &weights); + + //! Reinitialize \p residualModel using the detrended values + //! from \p slidingWindow. + static void reinitializeResidualModel(double learnRate, + const TDecompositionPtr &trend, + const TTimeDoublePrCBuf &slidingWindow, + CPrior &residualModel); + //@} + //! \name Test Functions //@{ //! Get the sliding window of recent values. @@ -181,30 +198,46 @@ class MATHS_EXPORT CUnivariateTimeSeriesModel : public CModel //@} private: + using TSizeVec = std::vector; using TDouble1Vec = core::CSmallVector; using TDouble1VecVec = std::vector; using TDouble2Vec4VecVec = std::vector; using TVector = CVectorNx1; using TVectorMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; using TDecayRateController2AryPtr = boost::shared_ptr; - using TDecompositionPtr = boost::shared_ptr; using TPriorPtr = boost::shared_ptr; using TAnomalyModelPtr = boost::shared_ptr; using TMultivariatePriorCPtrSizePr = std::pair; using TMultivariatePriorCPtrSizePr1Vec = core::CSmallVector; using TModelCPtr1Vec = core::CSmallVector; + using TChangeDetectorPtr = boost::shared_ptr; private: - CUnivariateTimeSeriesModel(const CUnivariateTimeSeriesModel &other, std::size_t id); + CUnivariateTimeSeriesModel(const CUnivariateTimeSeriesModel &other, + std::size_t id, + bool isForForecast = false); + + //! Test for and apply any change we find. + EUpdateResult testAndApplyChange(const CModelAddSamplesParams ¶ms, + const TSizeVec &order, + const TTimeDouble2VecSizeTrVec &samples); + + //! Apply \p change to this model. + EUpdateResult applyChange(const SChangeDescription &change); //! Update the trend with \p samples. EUpdateResult updateTrend(const maths_t::TWeightStyleVec &trendStyles, const TTimeDouble2VecSizeTrVec &samples, const TDouble2Vec4VecVec &trendWeights); + //! Compute the prediction errors for \p sample. void appendPredictionErrors(double interval, double sample, TDouble1VecVec (&result)[2]); + //! Reinitialize state after detecting a new component of the trend + //! decomposition. + void reinitializeStateGivenNewComponent(void); + //! Get the models for the correlations and the models of the correlated //! time series. bool correlationModels(TSize1Vec &correlated, @@ -239,6 +272,19 @@ class MATHS_EXPORT CUnivariateTimeSeriesModel : public CModel //! value of the time series. TAnomalyModelPtr m_AnomalyModel; + //! The last "normal" time and median value. + TTimeDoublePr m_CandidateChangePoint; + + //! If the time series appears to be undergoing change, the contiguous + //! interval of unpredictable values. + core_t::TTime m_CurrentChangeInterval; + + //! The time of the last change point. + core_t::TTime m_TimeOfLastChangePoint; + + //! Used to test for changes in the time series. + TChangeDetectorPtr m_ChangeDetector; + //! A sliding window of the recent samples (used to reinitialize the //! residual model when a new trend component is detected). TTimeDoublePrCBuf m_SlidingWindow; @@ -405,10 +451,8 @@ class MATHS_EXPORT CTimeSeriesCorrelations //! Add a sample for the time series identified by \p id. void addSamples(std::size_t id, - maths_t::EDataType type, + const CModelAddSamplesParams ¶ms, const TTimeDouble2VecSizeTrVec &samples, - const TDouble4Vec1Vec &weights, - double interval, double multiplier); //! Get the ids of the time series correlated with \p id. @@ -455,6 +499,8 @@ class MATHS_EXPORT CTimeSeriesCorrelations class MATHS_EXPORT CMultivariateTimeSeriesModel : public CModel { public: + using TDouble10Vec = core::CSmallVector; + using TDouble10Vec4Vec = core::CSmallVector; using TTimeDouble2VecPr = std::pair; using TTimeDouble2VecPrCBuf = boost::circular_buffer; using TDecompositionPtr = boost::shared_ptr; @@ -586,6 +632,19 @@ class MATHS_EXPORT CMultivariateTimeSeriesModel : public CModel //! Get the type of data being modeled. virtual maths_t::EDataType dataType(void) const; + //! \name Helpers + //@{ + //! Unpack the weights in \p weights. + static TDouble10Vec4Vec unpack(const TDouble2Vec4Vec &weights); + + //! Reinitialize \p residualModel using the detrended values + //! from \p slidingWindow. + static void reinitializeResidualModel(double learnRate, + const TDecompositionPtr10Vec &trend, + const TTimeDouble2VecPrCBuf &slidingWindow, + CMultivariatePrior &residualModel); + //@} + //! \name Test Functions //@{ //! Get the sliding window of recent values. @@ -619,6 +678,10 @@ class MATHS_EXPORT CMultivariateTimeSeriesModel : public CModel const TDouble2Vec &sample, TDouble1VecVec (&result)[2]); + //! Reinitialize state after detecting a new component of the trend + //! decomposition. + void reinitializeStateGivenNewComponent(void); + //! Get the model dimension. std::size_t dimension(void) const; diff --git a/include/maths/CTools.h b/include/maths/CTools.h index f363fa8f43..4b9f3bdb85 100644 --- a/include/maths/CTools.h +++ b/include/maths/CTools.h @@ -729,7 +729,7 @@ class MATHS_EXPORT CTools : private core::CNonInstantiatable return 1.0 / (1.0 + 1.0 / p); } - //! A smooth Heaviside function centred at one. + //! A smooth Heaviside function. //! //! This is a smooth version of the Heaviside function implemented //! as \f$sigmoid\left(\frac{sign (x - 1)}{wb}\right)\f$ normalized @@ -739,10 +739,11 @@ class MATHS_EXPORT CTools : private core::CNonInstantiatable //! //! \param[in] x The argument. //! \param[in] width The step width. + //! \param[in] x0 The centre of the step. //! \param[in] sign Determines whether it's a step up or down. - static double smoothHeaviside(double x, double width, double sign = 1.0) + static double smoothHeaviside(double x, double width, double x0 = 0.0, double sign = 1.0) { - return sigmoid(std::exp(sign * (x - 1.0) / width)) + return sigmoid(std::exp(sign * (x - x0) / width)) / sigmoid(std::exp(1.0 / width)); } diff --git a/include/maths/CTrendComponent.h b/include/maths/CTrendComponent.h index 119e18c514..5b87f115c5 100644 --- a/include/maths/CTrendComponent.h +++ b/include/maths/CTrendComponent.h @@ -11,6 +11,9 @@ #include #include +#include +#include +#include #include #include #include @@ -21,6 +24,7 @@ namespace ml { namespace maths { +struct SDistributionRestoreParams; //! \brief Models the trend component of a time series. //! @@ -53,6 +57,8 @@ class MATHS_EXPORT CTrendComponent using TVectorVecVec = std::vector; using TMatrix = CSymmetricMatrixNxN; using TMatrixVec = std::vector; + using TSeasonalForecast = std::function; + using TWriteForecastResult = std::function; public: CTrendComponent(double decayRate); @@ -64,7 +70,8 @@ class MATHS_EXPORT CTrendComponent void acceptPersistInserter(core::CStatePersistInserter &inserter) const; //! Initialize by reading state from \p traverser. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); + bool acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, + core::CStateRestoreTraverser &traverser); //! Check if the trend has been estimated. bool initialized() const; @@ -79,6 +86,14 @@ class MATHS_EXPORT CTrendComponent //! greater than \p decayRate. void shiftSlope(double decayRate, double shift); + //! Apply a level shift of \p value at \p time and \p value. + void shiftLevel(core_t::TTime time, double value, double shift); + + //! Apply no level shift at \p time and \p value. + //! + //! This updates the model for the probability of a level shift. + void dontShiftLevel(core_t::TTime time, double value); + //! Adds a value \f$(t, f(t))\f$ to this component. //! //! \param[in] time The time of the point. @@ -87,6 +102,9 @@ class MATHS_EXPORT CTrendComponent //! less influence it has on the component. void add(core_t::TTime time, double value, double weight = 1.0); + //! Set the data type. + void dataType(maths_t::EDataType dataType); + //! Get the base rate at which models lose information. double defaultDecayRate() const; @@ -109,12 +127,20 @@ class MATHS_EXPORT CTrendComponent //! variance as a percentage. TDoubleDoublePr variance(double confidence) const; - //! Create \p n sample forecast paths. + //! Forecast the trend model from \p startTime to \p endTime. + //! + //! \param[in] startTime The start time of the forecast interval. + //! \param[in] endTime The end time of the forecast interval. + //! \param[in] step The time step. + //! \param[in] confidence The confidence interval to calculate. + //! \param[in] seasonal Forecasts seasonal components. + //! \param[in] writer Writes out forecast results. void forecast(core_t::TTime startTime, core_t::TTime endTime, core_t::TTime step, double confidence, - TDouble3VecVec &result) const; + const TSeasonalForecast &seasonal, + const TWriteForecastResult &writer) const; //! Get the interval which has been observed so far. core_t::TTime observedInterval() const; @@ -149,6 +175,44 @@ class MATHS_EXPORT CTrendComponent }; using TModelVec = std::vector; + //! \brief Forecasts the level model by path roll out. + class CForecastLevel : private core::CNonCopyable + { + public: + //! The default number of roll out paths to use. + static const std::size_t DEFAULT_NUMBER_PATHS{100u}; + + public: + CForecastLevel(const CNaiveBayes &probability, + const CNormalMeanPrecConjugate &magnitude, + core_t::TTime timeOfLastChange, + std::size_t numberPaths = DEFAULT_NUMBER_PATHS); + + //! Forecast the time series level at \p time. + TDouble3Vec forecast(core_t::TTime time, + double prediction, + double confidence); + + private: + using TTimeVec = std::vector; + + private: + //! The model of the change probability. + const CNaiveBayes &m_Probability; + //! The model of the change magnitude. + const CNormalMeanPrecConjugate &m_Magnitude; + //! A random number generator for generating roll outs. + CPRNG::CXorOShiro128Plus m_Rng; + //! The current roll outs forecasted levels. + TDoubleVec m_Levels; + //! The current roll outs times of last change. + TTimeVec m_TimesOfLastChange; + //! Maintains the current bucket probability of change. + TDoubleVec m_ProbabilitiesOfChange; + //! Place holder for sampling. + TDoubleVec m_Uniform01; + }; + private: //! Get the factors by which to age the different regression models. TDoubleVec factors(core_t::TTime interval) const; @@ -185,11 +249,18 @@ class MATHS_EXPORT CTrendComponent //! The start time of the regression models. core_t::TTime m_RegressionOrigin; //! The regression models (we have them for multiple time scales). - TModelVec m_Models; + TModelVec m_TrendModels; //! The variance of the prediction errors. double m_PredictionErrorVariance; //! The mean and variance of the values added to the trend component. TMeanVarAccumulator m_ValueMoments; + + //! The time of the last level change. + core_t::TTime m_TimeOfLastLevelChange; + //! A model of probability of level changes for the trend. + CNaiveBayes m_ProbabilityOfLevelChangeModel; + //! A model of magnitude of level changes for the trend. + CNormalMeanPrecConjugate m_MagnitudeOfLevelChangeModel; }; } diff --git a/include/maths/Constants.h b/include/maths/Constants.h index 5d40bec369..3615c40831 100644 --- a/include/maths/Constants.h +++ b/include/maths/Constants.h @@ -79,6 +79,10 @@ const double MINIMUM_ACCURATE_VARIANCE_SCALE{0.5}; //! introduced for some priors. const double MAXIMUM_ACCURATE_VARIANCE_SCALE{2.0}; +//! The default number of regression models used in periodic and +//! calendar cyclic components of the trend decomposition. +const std::size_t DECOMPOSITION_COMPONENT_SIZE{36u}; + //! The confidence interval to use for the seasonal trend and //! variation. We detrend to the nearest point in the confidence //! interval and use the upper confidence interval variance when @@ -87,6 +91,15 @@ const double MAXIMUM_ACCURATE_VARIANCE_SCALE{2.0}; //! can be in significant error). const double DEFAULT_SEASONAL_CONFIDENCE_INTERVAL{50.0}; +//! The minimum fractional count of points in a cluster. +const double MINIMUM_CLUSTER_SPLIT_FRACTION{0.0}; + +//! The default minimum count of points in a cluster. +const double MINIMUM_CLUSTER_SPLIT_COUNT{24.0}; + +//! The minimum count of a category in the sketch to cluster. +const double MINIMUM_CATEGORY_COUNT{0.5}; + //! \brief A collection of weight styles and weights. class MATHS_EXPORT CConstantWeights { @@ -123,15 +136,6 @@ class MATHS_EXPORT CConstantWeights } }; -//! The minimum fractional count of points in a cluster. -const double MINIMUM_CLUSTER_SPLIT_FRACTION{0.0}; - -//! The default minimum count of points in a cluster. -const double MINIMUM_CLUSTER_SPLIT_COUNT{24.0}; - -//! The minimum count of a category in the sketch to cluster. -const double MINIMUM_CATEGORY_COUNT{0.5}; - //! Get the maximum amount we'll penalize a model in addSamples. MATHS_EXPORT double maxModelPenalty(double numberSamples); diff --git a/include/maths/MathsTypes.h b/include/maths/MathsTypes.h index 46ddb35066..5075a4b1bb 100644 --- a/include/maths/MathsTypes.h +++ b/include/maths/MathsTypes.h @@ -107,6 +107,16 @@ TDouble10Vec countForUpdate(std::size_t dimension, const TWeightStyleVec &weightStyles, const TDouble10Vec4Vec &weights); +//! Extract the winsorisation weight from a collection of weights. +MATHS_EXPORT +double winsorisationWeight(const TWeightStyleVec &weightStyles, + const TDouble4Vec &weights); + +//! Extract the winsorisation weight from a collection of weights. +MATHS_EXPORT +TDouble10Vec winsorisationWeight(const TWeightStyleVec &weightStyles, + const TDouble10Vec4Vec &weights); + //! Extract the variance scale from a collection of weights. MATHS_EXPORT double seasonalVarianceScale(const TWeightStyleVec &weightStyles, diff --git a/include/model/CAnomalyDetectorModelConfig.h b/include/model/CAnomalyDetectorModelConfig.h index ff05b4698f..8f8c0728bb 100644 --- a/include/model/CAnomalyDetectorModelConfig.h +++ b/include/model/CAnomalyDetectorModelConfig.h @@ -35,20 +35,18 @@ class CSearchKey; class CModelAutoConfigurer; class CModelFactory; -//! \brief Holds configuration for the anomaly detection models. +//! \brief Responsible for configuring anomaly detection models. //! //! DESCRIPTION:\n -//! Holds configuration state for anomaly detection models. +//! Responsible for configuring classes for performing anomaly detection. +//! It also defines all parameter defaults. //! //! IMPLEMENTATION:\n -//! This wraps up the configuration of the models to encapsulate -//! the details from the calling code. It is intended that at least -//! some of this information will be exposed to the user via a -//! configuration file. -//! -//! Default settings for various modes of operation are provided -//! by the default* factory methods. - +//! This wraps up the configuration of anomaly detection to encapsulate +//! the details from calling code. It is anticipated that: +//! -# Some of this information will be exposed to the user via a +//! configuration file, +//! -# Some may be calculated from data characteristics and so on. class MODEL_EXPORT CAnomalyDetectorModelConfig { public: @@ -93,6 +91,8 @@ class MODEL_EXPORT CAnomalyDetectorModelConfig typedef boost::reference_wrapper TStrDetectionRulePrVecCRef; public: + //! \name Data Gathering + //@{ //! The default value used to separate components of a multivariate feature //! in its string value. static const std::string DEFAULT_MULTIVARIATE_COMPONENT_DELIMITER; @@ -117,6 +117,13 @@ class MODEL_EXPORT CAnomalyDetectorModelConfig //! Bucket length corresponding to the default decay and learn rates. static const core_t::TTime STANDARD_BUCKET_LENGTH; + //! The default number of half buckets to store before choosing which + //! overlapping bucket has the biggest anomaly + static const std::size_t DEFAULT_BUCKET_RESULTS_DELAY; + //@} + + //! \name Modelling + //@{ //! The default rate at which the model priors decay to non-informative //! per standard bucket length. static const double DEFAULT_DECAY_RATE; @@ -140,20 +147,22 @@ class MODEL_EXPORT CAnomalyDetectorModelConfig //! The default minimum count in a cluster we'll permit in a cluster. static const double DEFAULT_MINIMUM_CLUSTER_SPLIT_COUNT; - //! The default minimum frequency of non-empty buckets at which we model - //! all buckets. - static const double DEFAULT_CUTOFF_TO_MODEL_EMPTY_BUCKETS; - //! The default proportion of initial count at which we'll delete a //! category from the sketch to cluster. static const double DEFAULT_CATEGORY_DELETE_FRACTION; + //! The default minimum frequency of non-empty buckets at which we model + //! all buckets. + static const double DEFAULT_CUTOFF_TO_MODEL_EMPTY_BUCKETS; + //! The default size of the seasonal components we will model. static const std::size_t DEFAULT_COMPONENT_SIZE; - //! The default number of times to sample a person model when computing - //! total probabilities for population models. - static const std::size_t DEFAULT_TOTAL_PROBABILITY_CALC_SAMPLING_SIZE; + //! The default minimum time to detect a change point in a time series. + static const core_t::TTime DEFAULT_MINIMUM_TIME_TO_DETECT_CHANGE; + + //! The default maximum time to test for a change point in a time series. + static const core_t::TTime DEFAULT_MAXIMUM_TIME_TO_TEST_FOR_CHANGE; //! The maximum number of times we'll update a model in a bucketing //! interval. This only applies to our metric statistics, which are @@ -181,10 +190,7 @@ class MODEL_EXPORT CAnomalyDetectorModelConfig //! The default threshold for the Pearson correlation coefficient at //! which a correlate will be modeled. static const double DEFAULT_MINIMUM_SIGNIFICANT_CORRELATION; - - //! The default number of half buckets to store before choosing which - //! overlapping bucket has the biggest anomaly - static const std::size_t DEFAULT_BUCKET_RESULTS_DELAY; + //@} //! \name Anomaly Score Calculation //@{ @@ -218,9 +224,6 @@ class MODEL_EXPORT CAnomalyDetectorModelConfig static const TDoubleDoublePr DEFAULT_NORMALIZED_SCORE_KNOT_POINTS[9]; //@} - //! The maximum number of samples we use when re-sampling a prior. - static const std::size_t DEFAULT_RESAMPLING_MAX_SAMPLES; - public: //! Create the default configuration. //! @@ -444,6 +447,7 @@ class MODEL_EXPORT CAnomalyDetectorModelConfig //! The time window during which samples are accepted. core_t::TTime samplingAgeCutoff(void) const; + private: //! Bucket length. core_t::TTime m_BucketLength; diff --git a/include/model/CModelFactory.h b/include/model/CModelFactory.h index 975d51cdac..4e16188001 100644 --- a/include/model/CModelFactory.h +++ b/include/model/CModelFactory.h @@ -318,11 +318,6 @@ class MODEL_EXPORT CModelFactory //! Set the prune window scale factor maximum void pruneWindowScaleMaximum(double factor); - //! Set the number of times we sample the people's attribute - //! distributions to compute raw total probabilities for population - //! models. - void totalProbabilityCalcSamplingSize(std::size_t samplingSize); - //! Set whether multivariate analysis of correlated 'by' fields should //! be performed. void multivariateByFields(bool enabled); diff --git a/include/model/CModelParams.h b/include/model/CModelParams.h index 140db0a526..437bf68081 100644 --- a/include/model/CModelParams.h +++ b/include/model/CModelParams.h @@ -27,6 +27,7 @@ namespace ml namespace maths { struct SDistributionRestoreParams; +struct STimeSeriesDecompositionRestoreParams; } namespace model { @@ -34,8 +35,8 @@ namespace model //! //! DESCIRIPTION:\n //! The idea of this class is to encapsulate global model configuration -//! to avoid the need of updating the constructor signatures of all the -//! classes in the CModel hierarchy when new parameters added. +//! parameters to avoid the need of updating the constructor signatures +//! of all the classes in the CModel hierarchy when new parameters added. //! //! IMPLEMENTATION:\n //! This is purposely not implemented as a nested class so that it can @@ -47,7 +48,6 @@ struct MODEL_EXPORT SModelParams using TStrDetectionRulePr = std::pair; using TStrDetectionRulePrVec = std::vector; using TStrDetectionRulePrVecCRef = boost::reference_wrapper; - using TTimeVec = std::vector; explicit SModelParams(core_t::TTime bucketLength); @@ -58,6 +58,9 @@ struct MODEL_EXPORT SModelParams //! Get the minimum permitted number of points in a sketched point. double minimumCategoryCount(void) const; + //! Get the parameters supplied when restoring time series decompositions. + maths::STimeSeriesDecompositionRestoreParams decompositionRestoreParams(maths_t::EDataType dataType) const; + //! Get the parameters supplied when restoring distribution models. maths::SDistributionRestoreParams distributionRestoreParams(maths_t::EDataType dataType) const; @@ -96,6 +99,12 @@ struct MODEL_EXPORT SModelParams //! The number of points to use for approximating each seasonal component. std::size_t s_ComponentSize; + //! The minimum time to detect a change point in a time series. + core_t::TTime s_MinimumTimeToDetectChange; + + //! The maximum time to test for a change point in a time series. + core_t::TTime s_MaximumTimeToTestForChange; + //! Controls whether to exclude heavy hitters. model_t::EExcludeFrequent s_ExcludeFrequent; @@ -108,10 +117,6 @@ struct MODEL_EXPORT SModelParams //! The maximum number of times we'll update a metric model in a bucket. double s_MaximumUpdatesPerBucket; - //! The number of times we sample the people's attribute distributions - //! to compute raw total probabilities for population models. - std::size_t s_TotalProbabilityCalcSamplingSize; - //! The minimum value for the influence for which an influencing field //! value is judged to have any influence on a feature value. double s_InfluenceCutoff; diff --git a/lib/maths/CDecayRateController.cc b/lib/maths/CDecayRateController.cc index b594d74593..7713eeec52 100644 --- a/lib/maths/CDecayRateController.cc +++ b/lib/maths/CDecayRateController.cc @@ -123,7 +123,7 @@ void CDecayRateController::reset(void) m_PredictionMean = TMeanAccumulator1Vec(m_PredictionMean.size()); m_Bias = TMeanAccumulator1Vec(m_Bias.size()); m_RecentAbsError = TMeanAccumulator1Vec(m_RecentAbsError.size()); - m_HistoricalAbsError = TMeanAccumulator1Vec(m_HistoricalAbsError.size()); + m_HistoricalAbsError = TMeanAccumulator1Vec(m_HistoricalAbsError.size()); m_Multiplier.add(m_Target); } diff --git a/lib/maths/CModel.cc b/lib/maths/CModel.cc index 750cd4e996..a6b7af772b 100644 --- a/lib/maths/CModel.cc +++ b/lib/maths/CModel.cc @@ -8,6 +8,9 @@ #include #include +#include + +#include #include @@ -60,26 +63,28 @@ double oneSidedEmptyBucketCorrection(maths_t::EProbabilityCalculation calculatio return 0.0; } -const double EFFECTIVE_COUNT[] = { 1.0, 0.8, 0.7, 0.65, 0.6, 0.57, 0.54, 0.52, 0.51 }; -const double LEARN_RATE = 1.0; -const double DECAY_RATE = 0.0; +const double EFFECTIVE_COUNT[]{ 1.0, 0.8, 0.7, 0.65, 0.6, 0.57, 0.54, 0.52, 0.51 }; //! Get the parameters for the stub model. CModelParams stubParameters(void) { - return CModelParams(0, LEARN_RATE, DECAY_RATE, 0.0); + return CModelParams{0, 1.0, 0.0, 0.0, 6 * core::constants::HOUR, core::constants::DAY}; } } CModelParams::CModelParams(core_t::TTime bucketLength, - const double &learnRate, - const double &decayRate, - double minimumSeasonalVarianceScale) : + double learnRate, + double decayRate, + double minimumSeasonalVarianceScale, + core_t::TTime minimumTimeToDetectChange, + core_t::TTime maximumTimeToTestForChange) : m_BucketLength(bucketLength), m_LearnRate(learnRate), m_DecayRate(decayRate), m_MinimumSeasonalVarianceScale(minimumSeasonalVarianceScale), + m_MinimumTimeToDetectChange(std::max(minimumTimeToDetectChange, 12 * bucketLength)), + m_MaximumTimeToTestForChange(std::max(maximumTimeToTestForChange, 48 * bucketLength)), m_ProbabilityBucketEmpty(0.0) {} @@ -108,6 +113,27 @@ double CModelParams::minimumSeasonalVarianceScale(void) const return m_MinimumSeasonalVarianceScale; } +bool CModelParams::testForChange(core_t::TTime changeInterval) const +{ + return changeInterval >= std::max(3 * m_BucketLength, 10 * core::constants::MINUTE); +} + +core_t::TTime CModelParams::minimumTimeToDetectChange(core_t::TTime timeSinceLastChangePoint) const +{ + // If there was a recent change then there is a chance that this is + // a reversion of the previous change. We reversions to occur faster. + double revertFactor{CTools::smoothHeaviside( static_cast(timeSinceLastChangePoint) + / static_cast(m_MaximumTimeToTestForChange), + 0.1, 1.0)}; + return static_cast(std::ceil( (0.3 + 0.7 * revertFactor) + * static_cast(m_MinimumTimeToDetectChange))); +} + +core_t::TTime CModelParams::maximumTimeToTestForChange(void) const +{ + return m_MaximumTimeToTestForChange; +} + void CModelParams::probabilityBucketEmpty(double probability) { m_ProbabilityBucketEmpty = probability; @@ -318,6 +344,17 @@ bool CModelProbabilityParams::updateAnomalyModel(void) const } +CModel::EUpdateResult CModel::combine(EUpdateResult lhs, EUpdateResult rhs) +{ + switch (lhs) + { + case E_Success: return rhs; + case E_Reset: return rhs == E_Failure ? E_Failure : E_Reset; + case E_Failure: return E_Failure; + } + return E_Failure; +} + CModel::CModel(const CModelParams ¶ms) : m_Params(params) {} double CModel::effectiveCount(std::size_t n) @@ -341,11 +378,11 @@ double CModel::correctForEmptyBucket(maths_t::EProbabilityCalculation calculatio double probabilityBucketEmpty, double probability) { - double pCorrected = (1.0 - probabilityBucketEmpty) * probability; + double pCorrected{(1.0 - probabilityBucketEmpty) * probability}; if (!bucketEmpty) { - double pOneSided = oneSidedEmptyBucketCorrection(calculation, value, probabilityBucketEmpty); + double pOneSided{oneSidedEmptyBucketCorrection(calculation, value, probabilityBucketEmpty)}; return std::min(pOneSided + pCorrected, 1.0); } @@ -360,22 +397,22 @@ double CModel::correctForEmptyBucket(maths_t::EProbabilityCalculation calculatio { if (!bucketEmpty[0] && !bucketEmpty[1]) { - double pState = (1.0 - probabilityEmptyBucket[0]) * (1.0 - probabilityEmptyBucket[1]); - double pOneSided = oneSidedEmptyBucketCorrection(calculation, TDouble2Vec{value}, 1.0 - pState); + double pState{(1.0 - probabilityEmptyBucket[0]) * (1.0 - probabilityEmptyBucket[1])}; + double pOneSided{oneSidedEmptyBucketCorrection(calculation, {value}, 1.0 - pState)}; return std::min(pOneSided + pState * probability, 1.0); } if (!bucketEmpty[0]) { - double pState = (1.0 - probabilityEmptyBucket[0]) * probabilityEmptyBucket[1]; - double pOneSided = oneSidedEmptyBucketCorrection(calculation, TDouble2Vec{value}, probabilityEmptyBucket[0]); + double pState{(1.0 - probabilityEmptyBucket[0]) * probabilityEmptyBucket[1]}; + double pOneSided{oneSidedEmptyBucketCorrection(calculation, {value}, probabilityEmptyBucket[0])}; return std::min(pOneSided + pState + (1.0 - pState) * probability, 1.0); } if (!bucketEmpty[1]) { - double pState = probabilityEmptyBucket[0] * (1.0 - probabilityEmptyBucket[1]); - double pOneSided = oneSidedEmptyBucketCorrection(calculation, TDouble2Vec{value}, probabilityEmptyBucket[1]); + double pState{probabilityEmptyBucket[0] * (1.0 - probabilityEmptyBucket[1])}; + double pOneSided{oneSidedEmptyBucketCorrection(calculation, {value}, probabilityEmptyBucket[1])}; return std::min(pOneSided + pState + (1.0 - pState) * probability, 1.0); } diff --git a/lib/maths/CNaiveBayes.cc b/lib/maths/CNaiveBayes.cc index 68690fd90d..4e38fdadd8 100644 --- a/lib/maths/CNaiveBayes.cc +++ b/lib/maths/CNaiveBayes.cc @@ -34,11 +34,12 @@ namespace const std::string PRIOR_TAG{"a"}; const std::string CLASS_LABEL_TAG{"b"}; const std::string CLASS_MODEL_TAG{"c"}; -const std::string COUNT_TAG{"d"}; -const std::string CONDITIONAL_DENSITY_FROM_PRIOR_TAG{"e"}; +const std::string MIN_MAX_LOG_LIKELIHOOD_TO_USE_FEATURE_TAG{"d"}; +const std::string COUNT_TAG{"e"}; +const std::string CONDITIONAL_DENSITY_FROM_PRIOR_TAG{"f"}; } -CNaiveBayesFeatureDensityFromPrior::CNaiveBayesFeatureDensityFromPrior(CPrior &prior) : +CNaiveBayesFeatureDensityFromPrior::CNaiveBayesFeatureDensityFromPrior(const CPrior &prior) : m_Prior(prior.clone()) {} @@ -59,8 +60,8 @@ bool CNaiveBayesFeatureDensityFromPrior::acceptRestoreTraverser(const SDistribut { const std::string &name{traverser.name()}; RESTORE(PRIOR_TAG, traverser.traverseSubLevel(boost::bind( - CPriorStateSerialiser(), - boost::cref(params), boost::ref(m_Prior), _1))); + CPriorStateSerialiser(), + boost::cref(params), boost::ref(m_Prior), _1))); } while (traverser.next()); return true; @@ -79,12 +80,36 @@ double CNaiveBayesFeatureDensityFromPrior::logValue(const TDouble1Vec &x) const CConstantWeights::SINGLE_UNIT, result) != maths_t::E_FpNoErrors) { - LOG_ERROR("Bad value density value for " << x); + LOG_ERROR("Bad density value at " << x << " for " << m_Prior->print()); return boost::numeric::bounds::lowest(); } return result; } +double CNaiveBayesFeatureDensityFromPrior::logMaximumValue() const +{ + double result; + if (m_Prior->jointLogMarginalLikelihood(CConstantWeights::COUNT, + {m_Prior->marginalLikelihoodMode()}, + CConstantWeights::SINGLE_UNIT, + result) != maths_t::E_FpNoErrors) + { + LOG_ERROR("Bad density value for " << m_Prior->print()); + return boost::numeric::bounds::lowest(); + } + return result; +} + +void CNaiveBayesFeatureDensityFromPrior::dataType(maths_t::EDataType dataType) +{ + m_Prior->dataType(dataType); +} + +void CNaiveBayesFeatureDensityFromPrior::propagateForwardsByTime(double time) +{ + m_Prior->propagateForwardsByTime(time); +} + void CNaiveBayesFeatureDensityFromPrior::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { return core::CMemoryDebug::dynamicSize("m_Prior", m_Prior, mem); @@ -100,18 +125,22 @@ std::size_t CNaiveBayesFeatureDensityFromPrior::memoryUsage() const return core::CMemory::dynamicSize(m_Prior); } -void CNaiveBayesFeatureDensityFromPrior::propagateForwardsByTime(double time) -{ - m_Prior->propagateForwardsByTime(time); -} - uint64_t CNaiveBayesFeatureDensityFromPrior::checksum(uint64_t seed) const { return CChecksum::calculate(seed, m_Prior); } +std::string CNaiveBayesFeatureDensityFromPrior::print() const +{ + std::string result; + m_Prior->print(" ", result); + return result; +} -CNaiveBayes::CNaiveBayes(const CNaiveBayesFeatureDensity &exemplar, double decayRate) : +CNaiveBayes::CNaiveBayes(const CNaiveBayesFeatureDensity &exemplar, + double decayRate, + TOptionalDouble minMaxLogLikelihoodToUseFeature) : + m_MinMaxLogLikelihoodToUseFeature{minMaxLogLikelihoodToUseFeature}, m_DecayRate{decayRate}, m_Exemplar{exemplar.clone()}, m_ClassConditionalDensities{2} @@ -137,9 +166,13 @@ bool CNaiveBayes::acceptRestoreTraverser(const SDistributionRestoreParams ¶m RESTORE_SETUP_TEARDOWN(CLASS_MODEL_TAG, SClass class_, traverser.traverseSubLevel(boost::bind( - &SClass::acceptRestoreTraverser, - boost::ref(class_), boost::cref(params), _1)), + &SClass::acceptRestoreTraverser, + boost::ref(class_), boost::cref(params), _1)), m_ClassConditionalDensities.emplace(label, class_)) + RESTORE_SETUP_TEARDOWN(MIN_MAX_LOG_LIKELIHOOD_TO_USE_FEATURE_TAG, + double value, + core::CStringUtils::stringToType(traverser.value(), value), + m_MinMaxLogLikelihoodToUseFeature.reset(value)) } while (traverser.next()); return true; @@ -155,13 +188,33 @@ void CNaiveBayes::acceptPersistInserter(core::CStatePersistInserter &inserter) c { classes.push_back(i); } - std::sort(classes.begin(), classes.end(), core::CFunctional::SDereference()); + std::sort(classes.begin(), classes.end(), + core::CFunctional::SDereference()); for (const auto &class_ : classes) { inserter.insertValue(CLASS_LABEL_TAG, class_->first); inserter.insertLevel(CLASS_MODEL_TAG, boost::bind(&SClass::acceptPersistInserter, boost::ref(class_->second), _1)); } + if (m_MinMaxLogLikelihoodToUseFeature) + { + inserter.insertValue(MIN_MAX_LOG_LIKELIHOOD_TO_USE_FEATURE_TAG, + *m_MinMaxLogLikelihoodToUseFeature, + core::CIEEE754::E_SinglePrecision); + } +} + +void CNaiveBayes::swap(CNaiveBayes &other) +{ + std::swap(m_DecayRate, other.m_DecayRate); + m_Exemplar.swap(other.m_Exemplar); + m_ClassConditionalDensities.swap(other.m_ClassConditionalDensities); + std::swap(m_MinMaxLogLikelihoodToUseFeature, other.m_MinMaxLogLikelihoodToUseFeature); +} + +bool CNaiveBayes::initialized() const +{ + return m_ClassConditionalDensities.size() > 0; } void CNaiveBayes::initialClassCounts(const TDoubleSizePrVec &counts) @@ -172,7 +225,8 @@ void CNaiveBayes::initialClassCounts(const TDoubleSizePrVec &counts) } } -void CNaiveBayes::addTrainingDataPoint(std::size_t label, const TDouble1VecVec &x) +void CNaiveBayes::addTrainingDataPoint(std::size_t label, + const TDouble1VecVec &x) { if (!this->validate(x)) { @@ -184,8 +238,7 @@ void CNaiveBayes::addTrainingDataPoint(std::size_t label, const TDouble1VecVec & if (class_.s_ConditionalDensities.empty()) { class_.s_ConditionalDensities.reserve(x.size()); - std::generate_n(std::back_inserter(class_.s_ConditionalDensities), - x.size(), + std::generate_n(std::back_inserter(class_.s_ConditionalDensities), x.size(), [this]() { return TFeatureDensityPtr{m_Exemplar->clone()}; }); } @@ -209,6 +262,17 @@ void CNaiveBayes::addTrainingDataPoint(std::size_t label, const TDouble1VecVec & } } +void CNaiveBayes::dataType(maths_t::EDataType dataType) +{ + for (auto &class_ : m_ClassConditionalDensities) + { + for (auto &density : class_.second.s_ConditionalDensities) + { + density->dataType(dataType); + } + } +} + void CNaiveBayes::propagateForwardsByTime(double time) { double factor{std::exp(-m_DecayRate * time)}; @@ -224,6 +288,22 @@ void CNaiveBayes::propagateForwardsByTime(double time) CNaiveBayes::TDoubleSizePrVec CNaiveBayes::highestClassProbabilities(std::size_t n, const TDouble1VecVec &x) const +{ + TDoubleSizePrVec p(this->classProbabilities(x)); + n = std::min(n, p.size()); + std::sort(p.begin(), p.begin() + n, std::greater()); + return TDoubleSizePrVec{p.begin(), p.begin() + n}; +} + +double CNaiveBayes::classProbability(std::size_t label, const TDouble1VecVec &x) const +{ + TDoubleSizePrVec p(this->classProbabilities(x)); + auto i = std::find_if(p.begin(), p.end(), + [label](const TDoubleSizePr &p_) { return p_.second == label; }); + return i == p.end() ? 0.0 : i->first; +} + +CNaiveBayes::TDoubleSizePrVec CNaiveBayes::classProbabilities(const TDouble1VecVec &x) const { if (!this->validate(x)) { @@ -235,20 +315,43 @@ CNaiveBayes::highestClassProbabilities(std::size_t n, const TDouble1VecVec &x) c return {}; } + using TDoubleVec = std::vector; + using TMaxAccumulator = CBasicStatistics::SMax::TAccumulator; + TDoubleSizePrVec p; p.reserve(m_ClassConditionalDensities.size()); - for (const auto &class_ : m_ClassConditionalDensities) { - double f{CTools::fastLog(class_.second.s_Count)}; - for (std::size_t i = 0u; i < x.size(); ++i) + p.emplace_back(CTools::fastLog(class_.second.s_Count), class_.first); + } + + TDoubleVec logLikelihoods; + for (std::size_t i = 0u; i < x.size(); ++i) + { + if (x[i].size() > 0) { - if (x[i].size() > 0) + TMaxAccumulator maxLogLikelihood; + logLikelihoods.clear(); + for (const auto &class_ : m_ClassConditionalDensities) { - f += class_.second.s_ConditionalDensities[i]->logValue(x[i]); + const auto &density = class_.second.s_ConditionalDensities[i]; + double logLikelihood{density->logValue(x[i])}; + double logMaximumLikelihood{density->logMaximumValue()}; + maxLogLikelihood.add(logLikelihood - logMaximumLikelihood); + logLikelihoods.push_back(logLikelihood); + } + double weight{1.0}; + if (m_MinMaxLogLikelihoodToUseFeature) + { + weight = CTools::smoothHeaviside( + (maxLogLikelihood[0] - *m_MinMaxLogLikelihoodToUseFeature) + / std::fabs(*m_MinMaxLogLikelihoodToUseFeature), 0.1); + } + for (std::size_t j = 0u; j < logLikelihoods.size(); ++j) + { + p[j].first += weight * logLikelihoods[j]; } } - p.emplace_back(f, class_.first); } double scale{std::max_element(p.begin(), p.end())->first}; @@ -263,10 +366,7 @@ CNaiveBayes::highestClassProbabilities(std::size_t n, const TDouble1VecVec &x) c pc.first /= Z; } - n = std::min(n, p.size()); - std::sort(p.begin(), p.begin() + n, std::greater()); - - return TDoubleSizePrVec{p.begin(), p.begin() + n}; + return p; } void CNaiveBayes::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const @@ -287,6 +387,22 @@ uint64_t CNaiveBayes::checksum(uint64_t seed) const return CChecksum::calculate(seed, m_ClassConditionalDensities); } +std::string CNaiveBayes::print() const +{ + std::ostringstream result; + result << "\n"; + for (const auto &class_ : m_ClassConditionalDensities) + { + result << "CLASS(" << class_.first << ")\n"; + for (const auto &density : class_.second.s_ConditionalDensities) + { + result << "---"; + result << density->print() << "\n"; + } + } + return result.str(); +} + bool CNaiveBayes::validate(const TDouble1VecVec &x) const { auto class_ = m_ClassConditionalDensities.begin(); @@ -313,7 +429,7 @@ bool CNaiveBayes::SClass::acceptRestoreTraverser(const SDistributionRestoreParam &CNaiveBayesFeatureDensityFromPrior::acceptRestoreTraverser, boost::ref(tmp), boost::cref(params), _1)), s_ConditionalDensities.emplace_back(tmp.clone())) - // Add other implementation's restore code here. + // Add other implementations' restore code here. } while (traverser.next()); return true; @@ -331,7 +447,7 @@ void CNaiveBayes::SClass::acceptPersistInserter(core::CStatePersistInserter &ins density.get(), _1)); continue; } - // Add other implementation's persist code here. + // Add other implementations' persist code here. } } diff --git a/lib/maths/CRestoreParams.cc b/lib/maths/CRestoreParams.cc index 87c1352677..b1d656ce4a 100644 --- a/lib/maths/CRestoreParams.cc +++ b/lib/maths/CRestoreParams.cc @@ -13,14 +13,6 @@ namespace ml namespace maths { -STimeSeriesDecompositionRestoreParams::STimeSeriesDecompositionRestoreParams(double decayRate, - core_t::TTime minimumBucketLength, - std::size_t componentSize) : - s_DecayRate{decayRate}, - s_MinimumBucketLength{minimumBucketLength}, - s_ComponentSize{componentSize} -{} - SDistributionRestoreParams::SDistributionRestoreParams(maths_t::EDataType dataType, double decayRate, double minimumClusterFraction, @@ -33,6 +25,25 @@ SDistributionRestoreParams::SDistributionRestoreParams(maths_t::EDataType dataTy s_MinimumCategoryCount{minimumCategoryCount} {} +STimeSeriesDecompositionRestoreParams::STimeSeriesDecompositionRestoreParams(double decayRate, + core_t::TTime minimumBucketLength, + std::size_t componentSize, + const SDistributionRestoreParams &changeModelParams) : + s_DecayRate{decayRate}, + s_MinimumBucketLength{minimumBucketLength}, + s_ComponentSize{componentSize}, + s_ChangeModelParams{changeModelParams} +{} + +STimeSeriesDecompositionRestoreParams::STimeSeriesDecompositionRestoreParams(double decayRate, + core_t::TTime minimumBucketLength, + const SDistributionRestoreParams &changeModelParams) : + s_DecayRate{decayRate}, + s_MinimumBucketLength{minimumBucketLength}, + s_ComponentSize{DECOMPOSITION_COMPONENT_SIZE}, + s_ChangeModelParams{changeModelParams} +{} + SModelRestoreParams::SModelRestoreParams(const CModelParams ¶ms, const STimeSeriesDecompositionRestoreParams &decompositionParams, const SDistributionRestoreParams &distributionParams) : diff --git a/lib/maths/CSeasonalComponent.cc b/lib/maths/CSeasonalComponent.cc index 3a1b2feb46..f580269de9 100644 --- a/lib/maths/CSeasonalComponent.cc +++ b/lib/maths/CSeasonalComponent.cc @@ -259,7 +259,7 @@ double CSeasonalComponent::delta(core_t::TTime time, // a delta for the case that the difference from the mean // is 1/3 of the range. We force the delta to zero for values // significantly smaller than this. - double scale{CTools::smoothHeaviside(3.0 * min[0] / minmax.range(), 1.0 / 12.0)}; + double scale{CTools::smoothHeaviside(3.0 * min[0] / minmax.range(), 0.1, 1.0)}; scale = CTools::truncate(1.002 * scale - 0.001, 0.0, 1.0); return -scale * min[0] * CTools::sign(shortPeriodValue); diff --git a/lib/maths/CSeasonalComponentAdaptiveBucketing.cc b/lib/maths/CSeasonalComponentAdaptiveBucketing.cc index 66d5bc92b7..afe1a2dc0f 100644 --- a/lib/maths/CSeasonalComponentAdaptiveBucketing.cc +++ b/lib/maths/CSeasonalComponentAdaptiveBucketing.cc @@ -646,7 +646,7 @@ double CSeasonalComponentAdaptiveBucketing::predict(std::size_t bucket, core_t:: // We mean revert our predictions if trying to predict much further // ahead than the observed interval for the data. - double alpha{CTools::smoothHeaviside(extrapolateInterval / interval, 1.0 / 12.0, -1.0)}; + double alpha{CTools::smoothHeaviside(extrapolateInterval / interval, 0.1, 1.0, -1.0)}; double beta{1.0 - alpha}; return alpha * regression.predict(t) + beta * regression.mean(); } diff --git a/lib/maths/CTimeSeriesChangeDetector.cc b/lib/maths/CTimeSeriesChangeDetector.cc index 0f9e5971d1..1a2f6282c8 100644 --- a/lib/maths/CTimeSeriesChangeDetector.cc +++ b/lib/maths/CTimeSeriesChangeDetector.cc @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -41,20 +42,41 @@ using TDouble4Vec = core::CSmallVector; using TDouble4Vec1Vec = core::CSmallVector; using TOptionalChangeDescription = CUnivariateTimeSeriesChangeDetector::TOptionalChangeDescription; -const std::string SAMPLE_COUNT_TAG{"a"}; -const std::string MIN_TIME_TAG{"b"}; -const std::string MAX_TIME_TAG{"c"}; -const std::string CHANGE_MODEL_TAG{"d"}; -const std::string LOG_LIKELIHOOD_TAG{"e"}; -const std::string SHIFT_TAG{"f"}; -const std::string RESIDUAL_MODEL_TAG{"g"}; -} - -SChangeDescription::SChangeDescription(EDescription description, double value) : - s_Description{description}, s_Value{value} +const std::string MINIMUM_TIME_TO_DETECT{"a"}; +const std::string MAXIMUM_TIME_TO_DETECT{"b"}; +const std::string MINIMUM_DELTA_BIC_TO_DETECT{"c"}; +const std::string RESIDUAL_MODEL_MODE_TAG{"d"}; +const std::string SAMPLE_COUNT_TAG{"e"}; +const std::string CURRENT_EVIDENCE_OF_CHANGE{"f"}; +const std::string MIN_TIME_TAG{"g"}; +const std::string MAX_TIME_TAG{"h"}; +const std::string CHANGE_MODEL_TAG{"i"}; +const std::string LOG_LIKELIHOOD_TAG{"j"}; +const std::string SHIFT_TAG{"k"}; +const std::string TREND_MODEL_TAG{"l"}; +const std::string RESIDUAL_MODEL_TAG{"m"}; +} + +SChangeDescription::SChangeDescription(EDescription description, + double value, + const TPriorPtr &residualModel) : + s_Description{description}, + s_Value{value}, + s_ResidualModel{residualModel} {} -CUnivariateTimeSeriesChangeDetector::CUnivariateTimeSeriesChangeDetector(const CTimeSeriesDecompositionInterface &trendModel, +std::string SChangeDescription::print() const +{ + std::string result; + switch (s_Description) + { + case E_LevelShift: result += "level shift by "; break; + case E_TimeShift: result += "time shift by "; break; + } + return result + core::CStringUtils::typeToString(s_Value[0]); +} + +CUnivariateTimeSeriesChangeDetector::CUnivariateTimeSeriesChangeDetector(const TDecompositionPtr &trendModel, const TPriorPtr &residualModel, core_t::TTime minimumTimeToDetect, core_t::TTime maximumTimeToDetect, @@ -65,19 +87,23 @@ CUnivariateTimeSeriesChangeDetector::CUnivariateTimeSeriesChangeDetector(const C m_SampleCount{0}, m_CurrentEvidenceOfChange{0.0}, m_ChangeModels{boost::make_shared(trendModel, residualModel), - boost::make_shared(trendModel, residualModel), - boost::make_shared(trendModel, residualModel, -core::constants::HOUR), - boost::make_shared(trendModel, residualModel, +core::constants::HOUR)} + boost::make_shared(trendModel, residualModel), + boost::make_shared(trendModel, residualModel, -core::constants::HOUR), + boost::make_shared(trendModel, residualModel, +core::constants::HOUR)} {} -bool CUnivariateTimeSeriesChangeDetector::acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, +bool CUnivariateTimeSeriesChangeDetector::acceptRestoreTraverser(const SModelRestoreParams ¶ms, core::CStateRestoreTraverser &traverser) { auto model = m_ChangeModels.begin(); do { const std::string name{traverser.name()}; + RESTORE_BUILT_IN(MINIMUM_TIME_TO_DETECT, m_MinimumTimeToDetect) + RESTORE_BUILT_IN(MAXIMUM_TIME_TO_DETECT, m_MaximumTimeToDetect) + RESTORE_BUILT_IN(MINIMUM_DELTA_BIC_TO_DETECT, m_MinimumDeltaBicToDetect) RESTORE_BUILT_IN(SAMPLE_COUNT_TAG, m_SampleCount) + RESTORE_BUILT_IN(CURRENT_EVIDENCE_OF_CHANGE, m_CurrentEvidenceOfChange) RESTORE_SETUP_TEARDOWN(MIN_TIME_TAG, core_t::TTime time, core::CStringUtils::stringToType(traverser.value(), time), @@ -87,8 +113,8 @@ bool CUnivariateTimeSeriesChangeDetector::acceptRestoreTraverser(const SDistribu core::CStringUtils::stringToType(traverser.value(), time), m_TimeRange.add(time)) RESTORE(CHANGE_MODEL_TAG, traverser.traverseSubLevel(boost::bind( - &CUnivariateTimeSeriesChangeModel::acceptRestoreTraverser, - (model++)->get(), boost::cref(params), _1))) + &CUnivariateChangeModel::acceptRestoreTraverser, + (model++)->get(), boost::cref(params), _1))) } while (traverser.next()); return true; @@ -96,13 +122,22 @@ bool CUnivariateTimeSeriesChangeDetector::acceptRestoreTraverser(const SDistribu void CUnivariateTimeSeriesChangeDetector::acceptPersistInserter(core::CStatePersistInserter &inserter) const { + inserter.insertValue(MINIMUM_TIME_TO_DETECT, m_MinimumTimeToDetect); + inserter.insertValue(MAXIMUM_TIME_TO_DETECT, m_MaximumTimeToDetect); + inserter.insertValue(MINIMUM_DELTA_BIC_TO_DETECT, m_MinimumDeltaBicToDetect, + core::CIEEE754::E_SinglePrecision); inserter.insertValue(SAMPLE_COUNT_TAG, m_SampleCount); - inserter.insertValue(MIN_TIME_TAG, m_TimeRange.min()); - inserter.insertValue(MAX_TIME_TAG, m_TimeRange.max()); + inserter.insertValue(CURRENT_EVIDENCE_OF_CHANGE, m_CurrentEvidenceOfChange, + core::CIEEE754::E_SinglePrecision); + if (m_TimeRange.initialized()) + { + inserter.insertValue(MIN_TIME_TAG, m_TimeRange.min()); + inserter.insertValue(MAX_TIME_TAG, m_TimeRange.max()); + } for (const auto &model : m_ChangeModels) { inserter.insertLevel(CHANGE_MODEL_TAG, - boost::bind(&CUnivariateTimeSeriesChangeModel::acceptPersistInserter, + boost::bind(&CUnivariateChangeModel::acceptPersistInserter, model.get(), _1)); } } @@ -141,18 +176,17 @@ bool CUnivariateTimeSeriesChangeDetector::stopTesting() const if (range > m_MinimumTimeToDetect) { double scale{0.5 + CTools::smoothHeaviside(2.0 * m_CurrentEvidenceOfChange - / m_MinimumDeltaBicToDetect, 0.2)}; + / m_MinimumDeltaBicToDetect, 0.2, 1.0)}; return static_cast(range) > m_MinimumTimeToDetect + scale * static_cast( m_MaximumTimeToDetect - m_MinimumTimeToDetect); } return false; } -void CUnivariateTimeSeriesChangeDetector::addSamples(maths_t::EDataType dataType, - const TWeightStyleVec &weightStyles, + +void CUnivariateTimeSeriesChangeDetector::addSamples(const TWeightStyleVec &weightStyles, const TTimeDoublePr1Vec &samples, - const TDouble4Vec1Vec &weights, - double propagationInterval) + const TDouble4Vec1Vec &weights) { for (const auto &sample : samples) { @@ -163,9 +197,7 @@ void CUnivariateTimeSeriesChangeDetector::addSamples(maths_t::EDataType dataType for (auto &model : m_ChangeModels) { - model->addSamples(m_SampleCount, dataType, - weightStyles, samples, weights, - propagationInterval); + model->addSamples(m_SampleCount, weightStyles, samples, weights); } } @@ -181,40 +213,95 @@ std::size_t CUnivariateTimeSeriesChangeDetector::memoryUsage() const uint64_t CUnivariateTimeSeriesChangeDetector::checksum(uint64_t seed) const { + seed = CChecksum::calculate(seed, m_MinimumTimeToDetect); + seed = CChecksum::calculate(seed, m_MaximumTimeToDetect); + seed = CChecksum::calculate(seed, m_MinimumDeltaBicToDetect); seed = CChecksum::calculate(seed, m_TimeRange); seed = CChecksum::calculate(seed, m_SampleCount); + seed = CChecksum::calculate(seed, m_CurrentEvidenceOfChange); return CChecksum::calculate(seed, m_ChangeModels); } namespace time_series_change_detector_detail { -CUnivariateTimeSeriesChangeModel::CUnivariateTimeSeriesChangeModel(const CTimeSeriesDecompositionInterface &trendModel) : - m_LogLikelihood{0.0}, m_TrendModel{trendModel} +CUnivariateChangeModel::CUnivariateChangeModel(const TDecompositionPtr &trendModel, + const TPriorPtr &residualModel) : + m_LogLikelihood{0.0}, m_TrendModel{trendModel}, m_ResidualModel{residualModel} {} -double CUnivariateTimeSeriesChangeModel::logLikelihood() const +void CUnivariateChangeModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const +{ + // Note if the trend and residual models are shallow copied their + // reference count will be updated so core::CMemory::dynamicSize + // will give the correct contribution for these reference. + core::CMemoryDebug::dynamicSize("m_TrendModel", m_TrendModel, mem); + core::CMemoryDebug::dynamicSize("m_ResidualModel", m_ResidualModel, mem); +} + +std::size_t CUnivariateChangeModel::memoryUsage() const +{ + // See above. + return core::CMemory::dynamicSize(m_TrendModel) + + core::CMemory::dynamicSize(m_ResidualModel); +} + +uint64_t CUnivariateChangeModel::checksum(uint64_t seed) const +{ + seed = CChecksum::calculate(seed, m_LogLikelihood); + seed = CChecksum::calculate(seed, m_TrendModel); + return CChecksum::calculate(seed, m_ResidualModel); +} + +bool CUnivariateChangeModel::restoreResidualModel(const SDistributionRestoreParams ¶ms, + core::CStateRestoreTraverser &traverser) +{ + return traverser.traverseSubLevel(boost::bind(CPriorStateSerialiser(), + boost::cref(params), + boost::ref(m_ResidualModel), _1)); +} + +double CUnivariateChangeModel::logLikelihood() const { return m_LogLikelihood; } -void CUnivariateTimeSeriesChangeModel::addLogLikelihood(double logLikelihood) +void CUnivariateChangeModel::addLogLikelihood(double logLikelihood) { m_LogLikelihood += logLikelihood; } -const CTimeSeriesDecompositionInterface &CUnivariateTimeSeriesChangeModel::trendModel() const +const CTimeSeriesDecompositionInterface &CUnivariateChangeModel::trendModel() const { - return m_TrendModel; + return *m_TrendModel; } -CUnivariateNoChangeModel::CUnivariateNoChangeModel(const CTimeSeriesDecompositionInterface &trendModel, +CTimeSeriesDecompositionInterface &CUnivariateChangeModel::trendModel() +{ + return *m_TrendModel; +} + +const CPrior &CUnivariateChangeModel::residualModel() const +{ + return *m_ResidualModel; +} + +CPrior &CUnivariateChangeModel::residualModel() +{ + return *m_ResidualModel; +} + +CUnivariateChangeModel::TPriorPtr CUnivariateChangeModel::residualModelPtr() const +{ + return m_ResidualModel; +} + +CUnivariateNoChangeModel::CUnivariateNoChangeModel(const TDecompositionPtr &trendModel, const TPriorPtr &residualModel) : - CUnivariateTimeSeriesChangeModel{trendModel}, - m_ResidualModel{residualModel} + CUnivariateChangeModel{trendModel, residualModel} {} -bool CUnivariateNoChangeModel::acceptRestoreTraverser(const SDistributionRestoreParams &/*params*/, +bool CUnivariateNoChangeModel::acceptRestoreTraverser(const SModelRestoreParams &/*params*/, core::CStateRestoreTraverser &traverser) { do @@ -245,33 +332,29 @@ TOptionalChangeDescription CUnivariateNoChangeModel::change() const } void CUnivariateNoChangeModel::addSamples(std::size_t count, - maths_t::EDataType /*dataType*/, const TWeightStyleVec &weightStyles, const TTimeDoublePr1Vec &samples_, - const TDouble4Vec1Vec &weights, - double /*propagationInterval*/) + const TDouble4Vec1Vec &weights) { - TDouble1Vec samples; - samples.reserve(samples_.size()); - for (const auto &sample : samples_) - { - samples.push_back(this->trendModel().detrend(sample.first, sample.second, 0.0)); - } - // See CUnivariateTimeSeriesLevelShiftModel for an explanation // of the delay updating the log-likelihood. - double logLikelihood; - if (count >= 5 && m_ResidualModel->jointLogMarginalLikelihood( - weightStyles, samples, weights, - logLikelihood) == maths_t::E_FpNoErrors) + if (count >= COUNT_TO_INITIALIZE) { - this->addLogLikelihood(logLikelihood); - } -} + TDouble1Vec samples; + samples.reserve(samples_.size()); + for (const auto &sample : samples_) + { + samples.push_back(this->trendModel().detrend(sample.first, sample.second, 0.0)); + } -void CUnivariateNoChangeModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr /*mem*/) const -{ + double logLikelihood; + if (this->residualModel().jointLogMarginalLikelihood(weightStyles, samples, weights, + logLikelihood) == maths_t::E_FpNoErrors) + { + this->addLogLikelihood(logLikelihood); + } + } } std::size_t CUnivariateNoChangeModel::staticSize() const @@ -279,28 +362,20 @@ std::size_t CUnivariateNoChangeModel::staticSize() const return sizeof(*this); } -std::size_t CUnivariateNoChangeModel::memoryUsage() const -{ - return 0; -} - uint64_t CUnivariateNoChangeModel::checksum(uint64_t seed) const { - seed = CChecksum::calculate(seed, this->logLikelihood()); - seed = CChecksum::calculate(seed, this->trendModel()); - return CChecksum::calculate(seed, m_ResidualModel); + return this->CUnivariateChangeModel::checksum(seed); } -CUnivariateTimeSeriesLevelShiftModel::CUnivariateTimeSeriesLevelShiftModel(const CTimeSeriesDecompositionInterface &trendModel, - const TPriorPtr &residualModel) : - CUnivariateTimeSeriesChangeModel{trendModel}, - m_SampleCount{0.0}, - m_ResidualModel{residualModel->clone()}, - m_ResidualModelMode{residualModel->marginalLikelihoodMode()} +CUnivariateLevelShiftModel::CUnivariateLevelShiftModel(const TDecompositionPtr &trendModel, + const TPriorPtr &residualModel) : + CUnivariateChangeModel{trendModel, TPriorPtr{residualModel->clone()}}, + m_ResidualModelMode{residualModel->marginalLikelihoodMode()}, + m_SampleCount{0.0} {} -bool CUnivariateTimeSeriesLevelShiftModel::acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser) +bool CUnivariateLevelShiftModel::acceptRestoreTraverser(const SModelRestoreParams ¶ms, + core::CStateRestoreTraverser &traverser) { do { @@ -310,112 +385,107 @@ bool CUnivariateTimeSeriesLevelShiftModel::acceptRestoreTraverser(const SDistrib core::CStringUtils::stringToType(traverser.value(), logLikelihood), this->addLogLikelihood(logLikelihood)) RESTORE(SHIFT_TAG, m_Shift.fromDelimited(traverser.value())) + RESTORE_BUILT_IN(RESIDUAL_MODEL_MODE_TAG, m_ResidualModelMode) RESTORE_BUILT_IN(SAMPLE_COUNT_TAG, m_SampleCount) - RESTORE(RESIDUAL_MODEL_TAG, traverser.traverseSubLevel( - boost::bind(CPriorStateSerialiser(), - boost::cref(params), - boost::ref(m_ResidualModel), _1))) + RESTORE(RESIDUAL_MODEL_TAG, this->restoreResidualModel(params.s_DistributionParams, traverser)) } while (traverser.next()); return true; } -void CUnivariateTimeSeriesLevelShiftModel::acceptPersistInserter(core::CStatePersistInserter &inserter) const +void CUnivariateLevelShiftModel::acceptPersistInserter(core::CStatePersistInserter &inserter) const { inserter.insertValue(LOG_LIKELIHOOD_TAG, this->logLikelihood()); inserter.insertValue(SHIFT_TAG, m_Shift.toDelimited()); inserter.insertValue(SAMPLE_COUNT_TAG, m_SampleCount); inserter.insertLevel(RESIDUAL_MODEL_TAG, boost::bind(CPriorStateSerialiser(), - boost::cref(*m_ResidualModel), _1)); + boost::cref(this->residualModel()), _1)); } -double CUnivariateTimeSeriesLevelShiftModel::bic() const +double CUnivariateLevelShiftModel::bic() const { return -2.0 * this->logLikelihood() + std::log(m_SampleCount); } -TOptionalChangeDescription CUnivariateTimeSeriesLevelShiftModel::change() const +TOptionalChangeDescription CUnivariateLevelShiftModel::change() const { - return SChangeDescription{SChangeDescription::E_LevelShift, CBasicStatistics::mean(m_Shift)}; + // The "magic" 0.9 is due to the fact that the trend is updated + // with new values during change detection. As a result, the + // estimate is biased (by early values) and too large. This was + // an empirical estimate of the degree of bias across a range of + // step changes. + return SChangeDescription{SChangeDescription::E_LevelShift, + 0.9 * CBasicStatistics::mean(m_Shift), + this->residualModelPtr()}; } -void CUnivariateTimeSeriesLevelShiftModel::addSamples(std::size_t count, - maths_t::EDataType dataType, - const TWeightStyleVec &weightStyles, - const TTimeDoublePr1Vec &samples_, - const TDouble4Vec1Vec &weights, - double propagationInterval) +void CUnivariateLevelShiftModel::addSamples(std::size_t count, + const TWeightStyleVec &weightStyles, + const TTimeDoublePr1Vec &samples_, + const TDouble4Vec1Vec &weights) { - TDouble1Vec samples; - samples.reserve(samples_.size()); + const CTimeSeriesDecompositionInterface &trendModel{this->trendModel()}; + for (const auto &sample : samples_) { - double x{this->trendModel().detrend(sample.first, sample.second, 0.0)}; - samples.push_back(x); - m_Shift.add(x - m_ResidualModelMode); - } - for (auto &sample : samples) - { - sample -= CBasicStatistics::mean(m_Shift); - } - for (const auto &weight : weights) - { - m_SampleCount += maths_t::count(weightStyles, weight); + double x{trendModel.detrend(sample.first, sample.second, 0.0) - m_ResidualModelMode}; + m_Shift.add(x); } - m_ResidualModel->dataType(dataType); - m_ResidualModel->addSamples(weightStyles, samples, weights); - m_ResidualModel->propagateForwardsByTime(propagationInterval); - // We delay updating the log-likelihood because early on the // level can change giving us a better apparent fit to the // data than a fixed step. Five updates was found to be the // minimum to get empirically similar sum log-likelihood if // there is no shift in the data. - double logLikelihood; - if (count >= 5 && m_ResidualModel->jointLogMarginalLikelihood( - weightStyles, samples, weights, - logLikelihood) == maths_t::E_FpNoErrors) + if (count >= COUNT_TO_INITIALIZE) { - this->addLogLikelihood(logLikelihood); - } -} + TDouble1Vec samples; + samples.reserve(samples_.size()); + for (const auto &sample : samples_) + { + double shift{CBasicStatistics::mean(m_Shift)}; + samples.push_back(trendModel.detrend(sample.first, sample.second, 0.0) - shift); + } + for (const auto &weight : weights) + { + m_SampleCount += maths_t::count(weightStyles, weight); + } -void CUnivariateTimeSeriesLevelShiftModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ - core::CMemoryDebug::dynamicSize("m_ResidualModel", m_ResidualModel, mem); -} + CPrior &residualModel{this->residualModel()}; + residualModel.addSamples(weightStyles, samples, weights); + residualModel.propagateForwardsByTime(1.0); -std::size_t CUnivariateTimeSeriesLevelShiftModel::staticSize() const -{ - return sizeof(*this); + double logLikelihood; + if (residualModel.jointLogMarginalLikelihood(weightStyles, samples, weights, + logLikelihood) == maths_t::E_FpNoErrors) + { + this->addLogLikelihood(logLikelihood); + } + } } -std::size_t CUnivariateTimeSeriesLevelShiftModel::memoryUsage() const +std::size_t CUnivariateLevelShiftModel::staticSize() const { - return core::CMemory::dynamicSize(m_ResidualModel); + return sizeof(*this); } -uint64_t CUnivariateTimeSeriesLevelShiftModel::checksum(uint64_t seed) const +uint64_t CUnivariateLevelShiftModel::checksum(uint64_t seed) const { - seed = CChecksum::calculate(seed, this->logLikelihood()); - seed = CChecksum::calculate(seed, this->trendModel()); + seed = this->CUnivariateChangeModel::checksum(seed); seed = CChecksum::calculate(seed, m_Shift); - seed = CChecksum::calculate(seed, m_SampleCount); - return CChecksum::calculate(seed, m_ResidualModel); + return CChecksum::calculate(seed, m_SampleCount); } -CUnivariateTimeSeriesTimeShiftModel::CUnivariateTimeSeriesTimeShiftModel(const CTimeSeriesDecompositionInterface &trendModel, - const TPriorPtr &residualModel, - core_t::TTime shift) : - CUnivariateTimeSeriesChangeModel{trendModel}, - m_Shift{shift}, - m_ResidualModel{residualModel->clone()} +CUnivariateTimeShiftModel::CUnivariateTimeShiftModel(const TDecompositionPtr &trendModel, + const TPriorPtr &residualModel, + core_t::TTime shift) : + CUnivariateChangeModel{trendModel, TPriorPtr{residualModel->clone()}}, + m_Shift{shift} {} -bool CUnivariateTimeSeriesTimeShiftModel::acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser) +bool CUnivariateTimeShiftModel::acceptRestoreTraverser(const SModelRestoreParams ¶ms, + core::CStateRestoreTraverser &traverser) { do { @@ -424,83 +494,70 @@ bool CUnivariateTimeSeriesTimeShiftModel::acceptRestoreTraverser(const SDistribu double logLikelihood, core::CStringUtils::stringToType(traverser.value(), logLikelihood), this->addLogLikelihood(logLikelihood)) - RESTORE(RESIDUAL_MODEL_TAG, traverser.traverseSubLevel( - boost::bind(CPriorStateSerialiser(), - boost::cref(params), - boost::ref(m_ResidualModel), _1))) + RESTORE(RESIDUAL_MODEL_TAG, this->restoreResidualModel(params.s_DistributionParams, traverser)) } while (traverser.next()); return true; } -void CUnivariateTimeSeriesTimeShiftModel::acceptPersistInserter(core::CStatePersistInserter &inserter) const +void CUnivariateTimeShiftModel::acceptPersistInserter(core::CStatePersistInserter &inserter) const { inserter.insertValue(LOG_LIKELIHOOD_TAG, this->logLikelihood()); inserter.insertLevel(RESIDUAL_MODEL_TAG, boost::bind(CPriorStateSerialiser(), - boost::cref(*m_ResidualModel), _1)); + boost::cref(this->residualModel()), _1)); } -double CUnivariateTimeSeriesTimeShiftModel::bic() const +double CUnivariateTimeShiftModel::bic() const { return -2.0 * this->logLikelihood(); } -TOptionalChangeDescription CUnivariateTimeSeriesTimeShiftModel::change() const +TOptionalChangeDescription CUnivariateTimeShiftModel::change() const { - return SChangeDescription{SChangeDescription::E_TimeShift, static_cast(m_Shift)}; + return SChangeDescription{SChangeDescription::E_TimeShift, + static_cast(m_Shift), + this->residualModelPtr()}; } -void CUnivariateTimeSeriesTimeShiftModel::addSamples(std::size_t count, - maths_t::EDataType dataType, - const TWeightStyleVec &weightStyles, - const TTimeDoublePr1Vec &samples_, - const TDouble4Vec1Vec &weights, - double propagationInterval) +void CUnivariateTimeShiftModel::addSamples(std::size_t count, + const TWeightStyleVec &weightStyles, + const TTimeDoublePr1Vec &samples_, + const TDouble4Vec1Vec &weights) { - TDouble1Vec samples; - samples.reserve(samples_.size()); - for (const auto &sample : samples_) - { - samples.push_back(this->trendModel().detrend(sample.first + m_Shift, sample.second, 0.0)); - } - - m_ResidualModel->dataType(dataType); - m_ResidualModel->addSamples(weightStyles, samples, weights); - m_ResidualModel->propagateForwardsByTime(propagationInterval); - // See CUnivariateTimeSeriesLevelShiftModel for an explanation // of the delay updating the log-likelihood. - double logLikelihood; - if (count >= 5 && m_ResidualModel->jointLogMarginalLikelihood( - weightStyles, samples, weights, - logLikelihood) == maths_t::E_FpNoErrors) + if (count >= COUNT_TO_INITIALIZE) { - this->addLogLikelihood(logLikelihood); - } -} + TDouble1Vec samples; + samples.reserve(samples_.size()); + for (const auto &sample : samples_) + { + samples.push_back(this->trendModel().detrend(sample.first + m_Shift, sample.second, 0.0)); + } -void CUnivariateTimeSeriesTimeShiftModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ - core::CMemoryDebug::dynamicSize("m_ResidualModel", m_ResidualModel, mem); -} + CPrior &residualModel{this->residualModel()}; + residualModel.addSamples(weightStyles, samples, weights); + residualModel.propagateForwardsByTime(1.0); -std::size_t CUnivariateTimeSeriesTimeShiftModel::staticSize() const -{ - return sizeof(*this); + double logLikelihood; + if (residualModel.jointLogMarginalLikelihood(weightStyles, samples, weights, + logLikelihood) == maths_t::E_FpNoErrors) + { + this->addLogLikelihood(logLikelihood); + } + } } -std::size_t CUnivariateTimeSeriesTimeShiftModel::memoryUsage() const +std::size_t CUnivariateTimeShiftModel::staticSize() const { - return core::CMemory::dynamicSize(m_ResidualModel); + return sizeof(*this); } -uint64_t CUnivariateTimeSeriesTimeShiftModel::checksum(uint64_t seed) const +uint64_t CUnivariateTimeShiftModel::checksum(uint64_t seed) const { - seed = CChecksum::calculate(seed, this->logLikelihood()); - seed = CChecksum::calculate(seed, this->trendModel()); - seed = CChecksum::calculate(seed, m_Shift); - return CChecksum::calculate(seed, m_ResidualModel); + seed = this->CUnivariateChangeModel::checksum(seed); + return CChecksum::calculate(seed, m_Shift); } } diff --git a/lib/maths/CTimeSeriesDecomposition.cc b/lib/maths/CTimeSeriesDecomposition.cc index 0dd7b0014b..411f5a9169 100644 --- a/lib/maths/CTimeSeriesDecomposition.cc +++ b/lib/maths/CTimeSeriesDecomposition.cc @@ -19,7 +19,9 @@ #include #include #include +#include #include +#include #include #include @@ -87,6 +89,7 @@ const std::string LAST_PROPAGATION_TIME_6_3_TAG{"b"}; const std::string PERIODICITY_TEST_6_3_TAG{"c"}; const std::string CALENDAR_CYCLIC_TEST_6_3_TAG{"d"}; const std::string COMPONENTS_6_3_TAG{"e"}; +const std::string TIME_SHIFT_6_3_TAG{"f"}; // Version < 6.3 const std::string DECAY_RATE_OLD_TAG{"a"}; const std::string LAST_VALUE_TIME_OLD_TAG{"b"}; @@ -101,6 +104,7 @@ const std::string EMPTY_STRING; CTimeSeriesDecomposition::CTimeSeriesDecomposition(double decayRate, core_t::TTime bucketLength, std::size_t seasonalComponentSize) : + m_TimeShift{0}, m_LastValueTime{0}, m_LastPropagationTime{0}, m_PeriodicityTest{decayRate, bucketLength}, @@ -110,37 +114,41 @@ CTimeSeriesDecomposition::CTimeSeriesDecomposition(double decayRate, this->initializeMediator(); } -CTimeSeriesDecomposition::CTimeSeriesDecomposition(double decayRate, - core_t::TTime bucketLength, - std::size_t seasonalComponentSize, +CTimeSeriesDecomposition::CTimeSeriesDecomposition(const STimeSeriesDecompositionRestoreParams ¶ms, core::CStateRestoreTraverser &traverser) : + m_TimeShift{0}, m_LastValueTime{0}, m_LastPropagationTime{0}, - m_PeriodicityTest{decayRate, bucketLength}, - m_CalendarCyclicTest{decayRate, bucketLength}, - m_Components{decayRate, bucketLength, seasonalComponentSize} + m_PeriodicityTest{params.s_DecayRate, params.s_MinimumBucketLength}, + m_CalendarCyclicTest{params.s_DecayRate, params.s_MinimumBucketLength}, + m_Components{params.s_DecayRate, params.s_MinimumBucketLength, params.s_ComponentSize} { - traverser.traverseSubLevel(boost::bind(&CTimeSeriesDecomposition::acceptRestoreTraverser, this, _1)); + traverser.traverseSubLevel(boost::bind(&CTimeSeriesDecomposition::acceptRestoreTraverser, + this, boost::cref(params.s_ChangeModelParams), _1)); this->initializeMediator(); } -CTimeSeriesDecomposition::CTimeSeriesDecomposition(const CTimeSeriesDecomposition &other) : +CTimeSeriesDecomposition::CTimeSeriesDecomposition(const CTimeSeriesDecomposition &other, + bool isForForecast) : + m_TimeShift{other.m_TimeShift}, m_LastValueTime{other.m_LastValueTime}, m_LastPropagationTime{other.m_LastPropagationTime}, - m_PeriodicityTest{other.m_PeriodicityTest}, - m_CalendarCyclicTest{other.m_CalendarCyclicTest}, + m_PeriodicityTest{other.m_PeriodicityTest, isForForecast}, + m_CalendarCyclicTest{other.m_CalendarCyclicTest, isForForecast}, m_Components{other.m_Components} { this->initializeMediator(); } -bool CTimeSeriesDecomposition::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) +bool CTimeSeriesDecomposition::acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, + core::CStateRestoreTraverser &traverser) { if (traverser.name() == VERSION_6_3_TAG) { while (traverser.next()) { const std::string &name{traverser.name()}; + RESTORE_BUILT_IN(TIME_SHIFT_6_3_TAG, m_TimeShift) RESTORE_BUILT_IN(LAST_VALUE_TIME_6_3_TAG, m_LastValueTime) RESTORE_BUILT_IN(LAST_PROPAGATION_TIME_6_3_TAG, m_LastPropagationTime) RESTORE(PERIODICITY_TEST_6_3_TAG, traverser.traverseSubLevel( @@ -151,7 +159,7 @@ bool CTimeSeriesDecomposition::acceptRestoreTraverser(core::CStateRestoreTravers &m_CalendarCyclicTest, _1))) RESTORE(COMPONENTS_6_3_TAG, traverser.traverseSubLevel( boost::bind(&CComponents::acceptRestoreTraverser, - &m_Components, _1))) + &m_Components, boost::cref(params), _1))) } } else @@ -169,7 +177,7 @@ bool CTimeSeriesDecomposition::acceptRestoreTraverser(core::CStateRestoreTravers &m_CalendarCyclicTest, _1))) RESTORE(COMPONENTS_OLD_TAG, traverser.traverseSubLevel( boost::bind(&CComponents::acceptRestoreTraverser, - &m_Components, _1))) + &m_Components, boost::cref(params), _1))) } while (traverser.next()); this->decayRate(decayRate); @@ -179,6 +187,7 @@ bool CTimeSeriesDecomposition::acceptRestoreTraverser(core::CStateRestoreTravers void CTimeSeriesDecomposition::swap(CTimeSeriesDecomposition &other) { + std::swap(m_TimeShift, other.m_TimeShift); std::swap(m_LastValueTime, other.m_LastValueTime); std::swap(m_LastPropagationTime, other.m_LastPropagationTime); m_PeriodicityTest.swap(other.m_PeriodicityTest); @@ -199,6 +208,7 @@ CTimeSeriesDecomposition &CTimeSeriesDecomposition::operator=(const CTimeSeriesD void CTimeSeriesDecomposition::acceptPersistInserter(core::CStatePersistInserter &inserter) const { inserter.insertValue(VERSION_6_3_TAG, ""); + inserter.insertValue(TIME_SHIFT_6_3_TAG, m_TimeShift); inserter.insertValue(LAST_VALUE_TIME_6_3_TAG, m_LastValueTime); inserter.insertValue(LAST_PROPAGATION_TIME_6_3_TAG, m_LastPropagationTime); inserter.insertLevel(PERIODICITY_TEST_6_3_TAG, boost::bind(&CPeriodicityTest::acceptPersistInserter, @@ -209,9 +219,14 @@ void CTimeSeriesDecomposition::acceptPersistInserter(core::CStatePersistInserter &m_Components, _1)); } -CTimeSeriesDecomposition *CTimeSeriesDecomposition::clone(void) const +CTimeSeriesDecomposition *CTimeSeriesDecomposition::clone(bool isForForecast) const +{ + return new CTimeSeriesDecomposition{*this, isForForecast}; +} + +void CTimeSeriesDecomposition::dataType(maths_t::EDataType dataType) { - return new CTimeSeriesDecomposition{*this}; + m_Components.dataType(dataType); } void CTimeSeriesDecomposition::decayRate(double decayRate) @@ -237,6 +252,8 @@ bool CTimeSeriesDecomposition::addPoint(core_t::TTime time, { CComponents::CScopeNotifyOnStateChange result{m_Components}; + time -= m_TimeShift; + core_t::TTime lastTime{std::max(m_LastValueTime, m_LastPropagationTime)}; m_LastValueTime = std::max(m_LastValueTime, time); @@ -259,6 +276,23 @@ bool CTimeSeriesDecomposition::addPoint(core_t::TTime time, return result.changed(); } +void CTimeSeriesDecomposition::applyChange(core_t::TTime time, + double value, + const SChangeDescription &change) +{ + switch (change.s_Description) + { + case SChangeDescription::E_LevelShift: + { + m_Components.shiftLevel(time, value, change.s_Value[0]); + break; + } + case SChangeDescription::E_TimeShift: + m_TimeShift += static_cast(change.s_Value[0]); + break; + } +} + void CTimeSeriesDecomposition::propagateForwardsTo(core_t::TTime time) { if (time > m_LastPropagationTime) @@ -282,6 +316,8 @@ TDoubleDoublePr CTimeSeriesDecomposition::value(core_t::TTime time, { TVector2x1 baseline{0.0}; + time += m_TimeShift; + if (components & E_TrendForced) { baseline += vector2x1(m_Components.trend().value(time, confidence)); @@ -332,7 +368,7 @@ void CTimeSeriesDecomposition::forecast(core_t::TTime startTime, core_t::TTime step, double confidence, double minimumScale, - TDouble3VecVec &result) + const TWriteForecastResult &writer) { if (endTime < startTime) { @@ -345,7 +381,7 @@ void CTimeSeriesDecomposition::forecast(core_t::TTime startTime, return; } - auto predictor = [this, confidence](core_t::TTime time) + auto seasonal = [this, confidence](core_t::TTime time) { TVector2x1 prediction(0.0); for (const auto &component : m_Components.seasonal()) @@ -365,40 +401,42 @@ void CTimeSeriesDecomposition::forecast(core_t::TTime startTime, return pair(prediction); }; - endTime = startTime + CIntegerTools::ceil(endTime - startTime, step); + startTime += m_TimeShift; + endTime += m_TimeShift; + endTime = startTime + CIntegerTools::ceil(endTime - startTime, step); double trendVariance{CBasicStatistics::mean(m_Components.trend().variance(0.0))}; double seasonalVariance{m_Components.meanVariance() - trendVariance}; double variance{this->meanVariance()}; - double scale0{std::sqrt(std::max(CBasicStatistics::mean( this->scale(startTime, variance, 0.0)), minimumScale))}; TVector2x1 i0{vector2x1(confidenceInterval(confidence, seasonalVariance))}; - m_Components.trend().forecast(startTime, endTime, step, confidence, result); - for (core_t::TTime time = startTime; time < endTime; time += step) - { - double scale{std::sqrt(std::max(CBasicStatistics::mean( - this->scale(time, variance, 0.0)), minimumScale))}; - TVector2x1 prediction{ vector2x1(predictor(time)) - + vector2x1(this->smooth(predictor, time, E_Seasonal)) - + (scale - scale0) * i0}; - - core_t::TTime index{(time - startTime) / step}; - result[index][0] += prediction(0); - result[index][1] += (prediction(0) + prediction(1)) / 2.0; - result[index][2] += prediction(1); - m_Components.interpolate(SMessage{time, time - step}, false); - } + auto forecastSeasonal = [&](core_t::TTime time) + { + m_Components.interpolateForForecast(time); + double scale{std::sqrt(std::max(CBasicStatistics::mean( + this->scale(time, variance, 0.0)), minimumScale))}; + TVector2x1 prediction{ vector2x1(seasonal(time)) + + vector2x1(this->smooth(seasonal, time, E_Seasonal)) + + (scale - scale0) * i0}; + return TDouble3Vec{prediction(0), (prediction(0) + prediction(1)) / 2.0, prediction(1)}; + }; + + m_Components.trend().forecast(startTime, endTime, step, confidence, forecastSeasonal, writer); } -double CTimeSeriesDecomposition::detrend(core_t::TTime time, double value, double confidence) const +double CTimeSeriesDecomposition::detrend(core_t::TTime time, + double value, + double confidence, + int components) const { if (!this->initialized()) { return value; } - TDoubleDoublePr interval{this->value(time, confidence)}; + time += m_TimeShift; + TDoubleDoublePr interval{this->value(time, confidence, components)}; return std::min(value - interval.first, 0.0) + std::max(value - interval.second, 0.0); } @@ -423,6 +461,8 @@ TDoubleDoublePr CTimeSeriesDecomposition::scale(core_t::TTime time, return {1.0, 1.0}; } + time += m_TimeShift; + double components{0.0}; TVector2x1 scale(0.0); if (m_Components.usingTrendForPrediction()) @@ -595,7 +635,6 @@ core_t::TTime CTimeSeriesDecomposition::lastValueTime(void) const } const core_t::TTime CTimeSeriesDecomposition::SMOOTHING_INTERVAL{7200}; -const std::size_t CTimeSeriesDecomposition::DEFAULT_COMPONENT_SIZE{36u}; } } diff --git a/lib/maths/CTimeSeriesDecompositionDetail.cc b/lib/maths/CTimeSeriesDecompositionDetail.cc index 9bf2eb5ec0..667d28587f 100644 --- a/lib/maths/CTimeSeriesDecompositionDetail.cc +++ b/lib/maths/CTimeSeriesDecompositionDetail.cc @@ -512,13 +512,14 @@ CTimeSeriesDecompositionDetail::CPeriodicityTest::CPeriodicityTest(double decayR m_BucketLength{bucketLength} {} -CTimeSeriesDecompositionDetail::CPeriodicityTest::CPeriodicityTest(const CPeriodicityTest &other) : +CTimeSeriesDecompositionDetail::CPeriodicityTest::CPeriodicityTest(const CPeriodicityTest &other, + bool isForForecast) : m_Machine{other.m_Machine}, m_DecayRate{other.m_DecayRate}, m_BucketLength{other.m_BucketLength} { // Note that m_Windows is an array. - for (std::size_t i = 0u; i < other.m_Windows.size(); ++i) + for (std::size_t i = 0u; !isForForecast && i < other.m_Windows.size(); ++i) { if (other.m_Windows[i]) { @@ -814,11 +815,13 @@ CTimeSeriesDecompositionDetail::CCalendarTest::CCalendarTest(double decayRate, m_LastMonth{} {} -CTimeSeriesDecompositionDetail::CCalendarTest::CCalendarTest(const CCalendarTest &other) : +CTimeSeriesDecompositionDetail::CCalendarTest::CCalendarTest(const CCalendarTest &other, + bool isForForecast) : m_Machine{other.m_Machine}, m_DecayRate{other.m_DecayRate}, m_LastMonth{other.m_LastMonth}, - m_Test{other.m_Test ? new CCalendarCyclicTest(*other.m_Test) : 0} + m_Test{!isForForecast && other.m_Test ? + boost::make_shared(*other.m_Test) : 0} {} bool CTimeSeriesDecompositionDetail::CCalendarTest::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) @@ -830,7 +833,7 @@ bool CTimeSeriesDecompositionDetail::CCalendarTest::acceptRestoreTraverser(core: boost::bind(&core::CStateMachine::acceptRestoreTraverser, &m_Machine, _1))) RESTORE_BUILT_IN(LAST_MONTH_6_3_TAG, m_LastMonth); RESTORE_SETUP_TEARDOWN(CALENDAR_TEST_6_3_TAG, - m_Test.reset(new CCalendarCyclicTest(m_DecayRate)), + m_Test = boost::make_shared(m_DecayRate), traverser.traverseSubLevel( boost::bind(&CCalendarCyclicTest::acceptRestoreTraverser, m_Test.get(), _1)), /**/) @@ -988,7 +991,7 @@ void CTimeSeriesDecompositionDetail::CCalendarTest::apply(std::size_t symbol, co case CC_TEST: if (!m_Test) { - m_Test.reset(new CCalendarCyclicTest(m_DecayRate)); + m_Test = boost::make_shared(m_DecayRate); m_LastMonth = this->month(time) + 2; } break; @@ -1055,7 +1058,8 @@ CTimeSeriesDecompositionDetail::CComponents::CComponents(const CComponents &othe m_Watcher{0} {} -bool CTimeSeriesDecompositionDetail::CComponents::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) +bool CTimeSeriesDecompositionDetail::CComponents::acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, + core::CStateRestoreTraverser &traverser) { if (traverser.name() == VERSION_6_3_TAG) { @@ -1066,7 +1070,8 @@ bool CTimeSeriesDecompositionDetail::CComponents::acceptRestoreTraverser(core::C boost::bind(&core::CStateMachine::acceptRestoreTraverser, &m_Machine, _1))); RESTORE_BUILT_IN(DECAY_RATE_6_3_TAG, m_DecayRate); RESTORE(TREND_6_3_TAG, traverser.traverseSubLevel(boost::bind( - &CTrendComponent::acceptRestoreTraverser, &m_Trend, _1))) + &CTrendComponent::acceptRestoreTraverser, &m_Trend, + boost::cref(params), _1))) RESTORE_SETUP_TEARDOWN(SEASONAL_6_3_TAG, m_Seasonal.reset(new SSeasonal), traverser.traverseSubLevel(boost::bind( @@ -1204,6 +1209,7 @@ void CTimeSeriesDecompositionDetail::CComponents::handle(const SAddValue &messag core_t::TTime observedInterval{m_Trend.observedInterval()}; m_Trend.add(time, values[0], weight); + m_Trend.dontShiftLevel(time, value); for (std::size_t i = 1u; i <= m; ++i) { CSeasonalComponent *component{seasonalComponents[i - 1]}; @@ -1234,6 +1240,10 @@ void CTimeSeriesDecompositionDetail::CComponents::handle(const SAddValue &messag v1 < SIGNIFICANT_VARIANCE_REDUCTION[0] * v0 && df0 > 0.0 && df1 > 0.0 && CStatisticalTests::leftTailFTest(v1 / v0, df1, df0) <= MAXIMUM_SIGNIFICANCE; + if (m_UsingTrendForPrediction) + { + LOG_DEBUG("Detected trend at " << time); + } *m_Watcher = m_UsingTrendForPrediction; } } @@ -1341,7 +1351,12 @@ void CTimeSeriesDecompositionDetail::CComponents::handle(const SDetectedCalendar } } -void CTimeSeriesDecompositionDetail::CComponents::interpolate(const SMessage &message, bool refine) +void CTimeSeriesDecompositionDetail::CComponents::shiftLevel(core_t::TTime time, double value, double shift) +{ + m_Trend.shiftLevel(time, value, shift); +} + +void CTimeSeriesDecompositionDetail::CComponents::interpolate(const SMessage &message) { core_t::TTime time{message.s_Time}; core_t::TTime lastTime{message.s_LastTime}; @@ -1359,11 +1374,11 @@ void CTimeSeriesDecompositionDetail::CComponents::interpolate(const SMessage &me if (m_Seasonal) { - m_Seasonal->interpolate(time, lastTime, refine); + m_Seasonal->interpolate(time, lastTime, true); } if (m_Calendar) { - m_Calendar->interpolate(time, lastTime, refine); + m_Calendar->interpolate(time, lastTime, true); } this->apply(SC_INTERPOLATED, message); @@ -1378,6 +1393,27 @@ void CTimeSeriesDecompositionDetail::CComponents::interpolate(const SMessage &me } } +void CTimeSeriesDecompositionDetail::CComponents::interpolateForForecast(core_t::TTime time) +{ + if (this->shouldInterpolate(time, time - m_BucketLength)) + { + if (m_Seasonal) + { + m_Seasonal->interpolate(time, time - m_BucketLength, false); + } + if (m_Calendar) + { + m_Calendar->interpolate(time, time - m_BucketLength, true); + } + } +} + + +void CTimeSeriesDecompositionDetail::CComponents::dataType(maths_t::EDataType dataType) +{ + m_Trend.dataType(dataType); +} + void CTimeSeriesDecompositionDetail::CComponents::decayRate(double decayRate) { m_DecayRate = decayRate; diff --git a/lib/maths/CTimeSeriesDecompositionStateSerialiser.cc b/lib/maths/CTimeSeriesDecompositionStateSerialiser.cc index 1095a19314..b6a4a07962 100644 --- a/lib/maths/CTimeSeriesDecompositionStateSerialiser.cc +++ b/lib/maths/CTimeSeriesDecompositionStateSerialiser.cc @@ -50,10 +50,7 @@ bool CTimeSeriesDecompositionStateSerialiser::operator()(const STimeSeriesDecomp const std::string &name = traverser.name(); if (name == TIME_SERIES_DECOMPOSITION_TAG) { - result.reset(new CTimeSeriesDecomposition(params.s_DecayRate, - params.s_MinimumBucketLength, - params.s_ComponentSize, - traverser)); + result.reset(new CTimeSeriesDecomposition(params, traverser)); ++numResults; } else if (name == TIME_SERIES_DECOMPOSITION_STUB_TAG) diff --git a/lib/maths/CTimeSeriesDecompositionStub.cc b/lib/maths/CTimeSeriesDecompositionStub.cc index 7fc413d987..508efbbf12 100644 --- a/lib/maths/CTimeSeriesDecompositionStub.cc +++ b/lib/maths/CTimeSeriesDecompositionStub.cc @@ -17,11 +17,15 @@ namespace const maths_t::TSeasonalComponentVec NO_COMPONENTS; } -CTimeSeriesDecompositionStub *CTimeSeriesDecompositionStub::clone(void) const +CTimeSeriesDecompositionStub *CTimeSeriesDecompositionStub::clone(bool /*isForForecast*/) const { return new CTimeSeriesDecompositionStub(*this); } +void CTimeSeriesDecompositionStub::dataType(maths_t::EDataType /*dataType*/) +{ +} + void CTimeSeriesDecompositionStub::decayRate(double /*decayRate*/) { } @@ -44,6 +48,12 @@ bool CTimeSeriesDecompositionStub::addPoint(core_t::TTime /*time*/, return false; } +void CTimeSeriesDecompositionStub::applyChange(core_t::TTime /*time*/, + double /*value*/, + const SChangeDescription &/*change*/) +{ +} + void CTimeSeriesDecompositionStub::propagateForwardsTo(core_t::TTime /*time*/) { } @@ -66,14 +76,14 @@ void CTimeSeriesDecompositionStub::forecast(core_t::TTime /*startTime*/, core_t::TTime /*step*/, double /*confidence*/, double /*minimumScale*/, - TDouble3VecVec &result) + const TWriteForecastResult &/*writer*/) { - result.clear(); } double CTimeSeriesDecompositionStub::detrend(core_t::TTime /*time*/, double value, - double /*confidence*/) const + double /*confidence*/, + int /*components*/) const { return value; } diff --git a/lib/maths/CTimeSeriesModel.cc b/lib/maths/CTimeSeriesModel.cc index 333c872dd2..25efa87f7f 100644 --- a/lib/maths/CTimeSeriesModel.cc +++ b/lib/maths/CTimeSeriesModel.cc @@ -23,9 +23,11 @@ #include #include #include +#include #include #include +#include #include #include @@ -38,25 +40,30 @@ namespace maths { namespace { + using TDoubleDoublePr = std::pair; +using TSizeDoublePr = std::pair; +using TTimeDoublePr = std::pair; +using TDouble1Vec = core::CSmallVector; using TDouble2Vec = core::CSmallVector; +using TDouble4Vec = core::CSmallVector; using TDouble10Vec = core::CSmallVector; +using TDouble4Vec1Vec = core::CSmallVector; using TDouble10Vec1Vec = core::CSmallVector; using TDouble10Vec2Vec = core::CSmallVector; using TDouble10Vec4Vec = core::CSmallVector; using TDouble10Vec4Vec1Vec = core::CSmallVector; using TSizeVec = std::vector; +using TSize1Vec = core::CSmallVector; +using TSize2Vec = core::CSmallVector; +using TSize2Vec1Vec = core::CSmallVector; +using TTime1Vec = core::CSmallVector; using TSize10Vec = core::CSmallVector; -using TSizeDoublePr = std::pair; using TSizeDoublePr10Vec = core::CSmallVector; using TTail10Vec = core::CSmallVector; -using TTime1Vec = CTimeSeriesCorrelations::TTime1Vec; -using TDouble1Vec = CTimeSeriesCorrelations::TDouble1Vec; -using TDouble4Vec = CTimeSeriesCorrelations::TDouble4Vec; -using TDouble4Vec1Vec = CTimeSeriesCorrelations::TDouble4Vec1Vec; -using TSize1Vec = CTimeSeriesCorrelations::TSize1Vec; -using TSize2Vec1Vec = CTimeSeriesCorrelations::TSize2Vec1Vec; using TMultivariatePriorCPtrSizePr1Vec = CTimeSeriesCorrelations::TMultivariatePriorCPtrSizePr1Vec; +using TOptionalSize = boost::optional; +using TMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; //! The decay rate controllers we maintain. enum EDecayRateController @@ -66,16 +73,23 @@ enum EDecayRateController E_NumberControls }; +const std::size_t MAXIMUM_CORRELATIONS{5000}; +const double MINIMUM_CORRELATE_PRIOR_SAMPLE_COUNT{24.0}; +const std::size_t SLIDING_WINDOW_SIZE{12u}; +const TSize10Vec NOTHING_TO_MARGINALIZE; +const TSizeDoublePr10Vec NOTHING_TO_CONDITION; +const double WINSORISED_FRACTION{1e-2}; +const double MINIMUM_WINSORISATION_WEIGHT_FRACTION{1e-10}; +const double MINIMUM_WINSORISATION_WEIGHT{0.01}; +const double LOG_WINSORISED_FRACTION{std::log(WINSORISED_FRACTION)}; +const double LOG_MINIMUM_WEIGHT_FRACTION{std::log(MINIMUM_WINSORISATION_WEIGHT_FRACTION)}; +const double MINUS_LOG_TOLERANCE{-std::log(1.0 - 100.0 * std::numeric_limits::epsilon())}; + //! Computes the Winsorisation weight for \p value. double computeWinsorisationWeight(const CPrior &prior, double derate, double scale, double value) { - static const double WINSORISED_FRACTION = 1e-4; - static const double MINIMUM_WEIGHT_FRACTION = 1e-12; - static const double MINIMUM_WEIGHT = 0.05; - static const double MINUS_LOG_TOLERANCE = -std::log(1.0 - 100.0 * std::numeric_limits::epsilon()); - - double deratedMinimumWeight = MINIMUM_WEIGHT - + (0.5 - MINIMUM_WEIGHT) + double deratedMinimumWeight = MINIMUM_WINSORISATION_WEIGHT + + (0.5 - MINIMUM_WINSORISATION_WEIGHT) * CTools::truncate(derate, 0.0, 1.0); double lowerBound; @@ -92,13 +106,13 @@ double computeWinsorisationWeight(const CPrior &prior, double derate, double sca return 1.0; } - double f = std::exp(-(lowerBound + upperBound) / 2.0); + double f{std::exp(-(lowerBound + upperBound) / 2.0)}; f = std::min(f, 1.0 - f); if (f >= WINSORISED_FRACTION) { return 1.0; } - if (f <= MINIMUM_WEIGHT_FRACTION) + if (f <= MINIMUM_WINSORISATION_WEIGHT_FRACTION) { return deratedMinimumWeight; } @@ -109,21 +123,11 @@ double computeWinsorisationWeight(const CPrior &prior, double derate, double sca // fraction and c is determined by solving: // MW = (MWF / WF)^(-c log(MWF)) - static const double EXPONENT = -std::log(MINIMUM_WEIGHT) - / std::log(MINIMUM_WEIGHT_FRACTION) - / std::log(MINIMUM_WEIGHT_FRACTION / WINSORISED_FRACTION); - static const double LOG_WINSORISED_FRACTION = std::log(WINSORISED_FRACTION); - - double deratedExponent = EXPONENT; - if (deratedMinimumWeight != MINIMUM_WEIGHT) - { - deratedExponent = -std::log(deratedMinimumWeight) - / std::log(MINIMUM_WEIGHT_FRACTION) - / std::log(MINIMUM_WEIGHT_FRACTION / WINSORISED_FRACTION); - } - - double logf = std::log(f); - double result = std::exp(-deratedExponent * logf * (logf - LOG_WINSORISED_FRACTION)); + double deratedExponent{ -std::log(deratedMinimumWeight) + / LOG_MINIMUM_WEIGHT_FRACTION + / (LOG_MINIMUM_WEIGHT_FRACTION - LOG_WINSORISED_FRACTION)}; + double logf{std::log(f)}; + double result{std::exp(-deratedExponent * logf * (logf - LOG_WINSORISED_FRACTION))}; if (CMathsFuncs::isNan(result)) { @@ -142,23 +146,77 @@ double computeWinsorisationWeight(const CMultivariatePrior &prior, double scale, const TDouble10Vec &value) { - static const TSize10Vec MARGINALIZE; - - std::size_t d = prior.dimension(); - - TSizeDoublePr10Vec condition(d - 1); - for (std::size_t i = 0u, j = 0u; i < d; ++i) + std::size_t dimensions = prior.dimension(); + TSizeDoublePr10Vec condition(dimensions - 1); + for (std::size_t i = 0u, j = 0u; i < dimensions; ++i) { if (i != dimension) { condition[j++] = std::make_pair(i, value[i]); } } - - boost::shared_ptr conditional(prior.univariate(MARGINALIZE, condition).first); + boost::shared_ptr conditional( + prior.univariate(NOTHING_TO_MARGINALIZE, condition).first); return computeWinsorisationWeight(*conditional, derate, scale, value[dimension]); } +//! Optionally randomly sample from \p indices. +TOptionalSize randomlySample(CPRNG::CXorOShiro128Plus &rng, + const CModelAddSamplesParams ¶ms, + core_t::TTime bucketLength, + const TSizeVec &indices) +{ + using TDouble2Vec4Vec = core::CSmallVector; + + double weight{1.0}; + { + auto i = std::find(params.weightStyles().begin(), + params.weightStyles().end(), + maths_t::E_SampleWinsorisationWeight); + if (i != params.weightStyles().end()) + { + std::ptrdiff_t index{i - params.weightStyles().begin()}; + auto addWeight = [index](TMeanAccumulator mean, const TDouble2Vec4Vec &weight_) + { + mean.add(weight_[index]); + return mean; + }; + TMeanAccumulator mean{std::accumulate(params.trendWeights().begin(), + params.trendWeights().end(), + TMeanAccumulator{}, addWeight)}; + weight = CBasicStatistics::mean(mean); + } + } + + double p{SLIDING_WINDOW_SIZE * static_cast(bucketLength) + / static_cast(core::constants::DAY) + * weight}; + if (p >= 1.0 || CSampling::uniformSample(rng, 0.0, 1.0) < p) + { + std::size_t i{CSampling::uniformSample(rng, 0, indices.size())}; + return indices[i]; + } + + return TOptionalSize{}; +} + +//! Convert \p value to comma separated string. +std::string toDelimited(const TTimeDoublePr &value) +{ + return core::CStringUtils::typeToString(value.first) + ',' + + core::CStringUtils::typeToStringPrecise( + value.second, core::CIEEE754::E_SinglePrecision); +} + +//! Extract \p value from comma separated string. +bool fromDelimited(const std::string &str, TTimeDoublePr &value) +{ + std::size_t pos{str.find(',')}; + return pos != std::string::npos + && core::CStringUtils::stringToType(str.substr(0, pos), value.first) + && core::CStringUtils::stringToType(str.substr(pos + 1), value.second); +} + // Models // Version 6.3 const std::string VERSION_6_3_TAG("6.3"); @@ -171,6 +229,10 @@ const std::string TREND_MODEL_6_3_TAG{"f"}; const std::string RESIDUAL_MODEL_6_3_TAG{"g"}; const std::string ANOMALY_MODEL_6_3_TAG{"h"}; const std::string SLIDING_WINDOW_6_3_TAG{"i"}; +const std::string CANDIDATE_CHANGE_POINT_6_3_TAG{"j"}; +const std::string CURRENT_CHANGE_INTERVAL_6_3_TAG{"k"}; +const std::string TIME_OF_LAST_CHANGE_POINT_6_3_TAG{"l"}; +const std::string CHANGE_DETECTOR_6_3_TAG{"m"}; // Version < 6.3 const std::string ID_OLD_TAG{"a"}; const std::string CONTROLLER_OLD_TAG{"b"}; @@ -200,12 +262,6 @@ const std::string SECOND_CORRELATE_ID_TAG{"b"}; const std::string CORRELATION_MODEL_TAG{"c"}; const std::string CORRELATION_TAG{"d"}; -const std::size_t MAXIMUM_CORRELATIONS{5000}; -const double MINIMUM_CORRELATE_PRIOR_SAMPLE_COUNT{24.0}; -const std::size_t SLIDING_WINDOW_SIZE{12}; -const TSize10Vec NOTHING_TO_MARGINALIZE; -const TSizeDoublePr10Vec NOTHING_TO_CONDITION; - namespace forecast { const std::string INFO_INSUFFICIENT_HISTORY("Insufficient history to forecast"); @@ -592,6 +648,8 @@ CUnivariateTimeSeriesModel::CUnivariateTimeSeriesModel(const CModelParams ¶m boost::make_shared(params.bucketLength(), params.decayRate()) : TAnomalyModelPtr()), + m_CurrentChangeInterval(0), + m_TimeOfLastChangePoint(0), m_SlidingWindow(SLIDING_WINDOW_SIZE), m_Correlations(0) { @@ -614,7 +672,7 @@ CUnivariateTimeSeriesModel::CUnivariateTimeSeriesModel(const SModelRestoreParams CUnivariateTimeSeriesModel::~CUnivariateTimeSeriesModel(void) { - if (m_Correlations) + if (m_Correlations != nullptr) { m_Correlations->removeTimeSeries(m_Id); } @@ -628,7 +686,7 @@ std::size_t CUnivariateTimeSeriesModel::identifier(void) const CUnivariateTimeSeriesModel *CUnivariateTimeSeriesModel::clone(std::size_t id) const { CUnivariateTimeSeriesModel *result{new CUnivariateTimeSeriesModel{*this, id}}; - if (m_Correlations) + if (m_Correlations != nullptr) { result->modelCorrelations(*m_Correlations); } @@ -642,7 +700,7 @@ CUnivariateTimeSeriesModel *CUnivariateTimeSeriesModel::cloneForPersistence(void CUnivariateTimeSeriesModel *CUnivariateTimeSeriesModel::cloneForForecast(void) const { - return new CUnivariateTimeSeriesModel{*this, m_Id}; + return new CUnivariateTimeSeriesModel{*this, m_Id, true}; } bool CUnivariateTimeSeriesModel::isForecastPossible(void) const @@ -692,7 +750,6 @@ CUnivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams ¶ms, return E_Success; } - using TMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; using TOptionalTimeDoublePr = boost::optional; TSizeVec valueorder(samples.size()); @@ -704,18 +761,19 @@ CUnivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams ¶ms, }); TOptionalTimeDoublePr randomSample; - - double p{SLIDING_WINDOW_SIZE * static_cast(this->params().bucketLength()) - / static_cast(core::constants::DAY)}; - if (p >= 1.0 || CSampling::uniformSample(m_Rng, 0.0, 1.0) < p) + if (TOptionalSize index = randomlySample(m_Rng, params, + this->params().bucketLength(), + valueorder)) { - std::size_t i{CSampling::uniformSample(m_Rng, 0, samples.size())}; - randomSample.reset({samples[valueorder[i]].first, samples[valueorder[i]].second[0]}); + randomSample.reset({samples[*index].first, samples[*index].second[0]}); } + EUpdateResult result{this->testAndApplyChange(params, valueorder, samples)}; + m_IsNonNegative = params.isNonNegative(); - EUpdateResult result{this->updateTrend(params.weightStyles(), samples, params.trendWeights())}; + result = CModel::combine(result, this->updateTrend(params.weightStyles(), + samples, params.trendWeights())); for (auto &sample : samples) { @@ -732,41 +790,34 @@ CUnivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams ¶ms, m_ResidualModel->dataType(type); TDouble1Vec samples_; - TDouble4Vec1Vec weights; + TDouble4Vec1Vec weights_; samples_.reserve(samples.size()); - weights.reserve(samples.size()); + weights_.reserve(samples.size()); TMeanAccumulator averageTime; for (auto i : valueorder) { samples_.push_back(samples[i].second[0]); - TDouble4Vec1Vec wi(1); - wi[0].reserve(params.priorWeights()[i].size()); - for (const auto &weight : params.priorWeights()[i]) - { - wi[0].push_back(weight[0]); - } - weights.push_back(wi[0]); + weights_.push_back(unpack(params.priorWeights()[i])); averageTime.add(static_cast(samples[i].first)); } - m_ResidualModel->addSamples(params.weightStyles(), samples_, weights); + m_ResidualModel->addSamples(params.weightStyles(), samples_, weights_); m_ResidualModel->propagateForwardsByTime(params.propagationInterval()); - if (m_AnomalyModel) + if (m_AnomalyModel != nullptr) { m_AnomalyModel->propagateForwardsByTime(params.propagationInterval()); } double multiplier{1.0}; - if (m_Controllers) + if (m_Controllers != nullptr) { TDouble1VecVec errors[2]; errors[0].reserve(samples.size()); errors[1].reserve(samples.size()); - for (auto i : valueorder) + for (auto sample : samples_) { - this->appendPredictionErrors(params.propagationInterval(), - samples[i].second[0], errors); + this->appendPredictionErrors(params.propagationInterval(), sample, errors); } { CDecayRateController &controller{(*m_Controllers)[E_TrendControl]}; @@ -797,9 +848,9 @@ CUnivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams ¶ms, } } - if (m_Correlations) + if (m_Correlations != nullptr) { - m_Correlations->addSamples(m_Id, type, samples, weights, params.propagationInterval(), multiplier); + m_Correlations->addSamples(m_Id, params, samples, multiplier); } if (randomSample) @@ -818,22 +869,16 @@ void CUnivariateTimeSeriesModel::skipTime(core_t::TTime gap) CUnivariateTimeSeriesModel::TDouble2Vec CUnivariateTimeSeriesModel::mode(core_t::TTime time, const maths_t::TWeightStyleVec &weightStyles, - const TDouble2Vec4Vec &weights_) const + const TDouble2Vec4Vec &weights) const { - TDouble4Vec weights; - weights.reserve(weights_.size()); - for (const auto &weight : weights_) - { - weights.push_back(weight[0]); - } - return { m_ResidualModel->marginalLikelihoodMode(weightStyles, weights) + return { m_ResidualModel->marginalLikelihoodMode(weightStyles, unpack(weights)) + CBasicStatistics::mean(m_TrendModel->value(time))}; } CUnivariateTimeSeriesModel::TDouble2Vec1Vec CUnivariateTimeSeriesModel::correlateModes(core_t::TTime time, const maths_t::TWeightStyleVec &weightStyles, - const TDouble2Vec4Vec1Vec &weights_) const + const TDouble2Vec4Vec1Vec &weights) const { TDouble2Vec1Vec result; @@ -845,23 +890,15 @@ CUnivariateTimeSeriesModel::correlateModes(core_t::TTime time, { result.resize(correlated.size(), TDouble10Vec(2)); - double trend[2]; - trend[0] = CBasicStatistics::mean(m_TrendModel->value(time)); + double baseline[2]; + baseline[0] = CBasicStatistics::mean(m_TrendModel->value(time)); for (std::size_t i = 0u; i < correlated.size(); ++i) { - trend[1] = CBasicStatistics::mean(correlatedTimeSeriesModels[i]->m_TrendModel->value(time)); - TDouble10Vec4Vec weights; - weights.resize(weights_[i].size(), TDouble10Vec(2)); - for (std::size_t j = 0u; j < weights_[i].size(); ++j) - { - for (std::size_t d = 0u; d < 2; ++d) - { - weights[j][d] = weights_[i][j][d]; - } - } - TDouble10Vec mode(correlationModels[i].first->marginalLikelihoodMode(weightStyles, weights)); - result[i][variables[i][0]] = trend[0] + mode[variables[i][0]]; - result[i][variables[i][1]] = trend[1] + mode[variables[i][1]]; + baseline[1] = CBasicStatistics::mean(correlatedTimeSeriesModels[i]->m_TrendModel->value(time)); + TDouble10Vec mode(correlationModels[i].first->marginalLikelihoodMode( + weightStyles, CMultivariateTimeSeriesModel::unpack(weights[i]))); + result[i][variables[i][0]] = baseline[0] + mode[variables[i][0]]; + result[i][variables[i][1]] = baseline[1] + mode[variables[i][1]]; } } @@ -870,18 +907,11 @@ CUnivariateTimeSeriesModel::correlateModes(core_t::TTime time, CUnivariateTimeSeriesModel::TDouble2Vec1Vec CUnivariateTimeSeriesModel::residualModes(const maths_t::TWeightStyleVec &weightStyles, - const TDouble2Vec4Vec &weights_) const + const TDouble2Vec4Vec &weights) const { - TDouble2Vec1Vec result; + TDouble1Vec modes(m_ResidualModel->marginalLikelihoodModes(weightStyles, unpack(weights))); - TDouble4Vec weights; - weights.reserve(weights_.size()); - for (const auto &weight : weights_) - { - weights.push_back(weight[0]); - } - - TDouble1Vec modes(m_ResidualModel->marginalLikelihoodModes(weightStyles, weights)); + TDouble2Vec1Vec result; result.reserve(modes.size()); for (auto mode : modes) { @@ -992,13 +1022,7 @@ CUnivariateTimeSeriesModel::confidenceInterval(core_t::TTime time, double trend{m_TrendModel->initialized() ? CBasicStatistics::mean(m_TrendModel->value(time, confidenceInterval)) : 0.0}; - TDouble4Vec weights; - weights.reserve(weights_.size()); - for (const auto &weight : weights_) - { - weights.push_back(weight[0]); - } - + TDouble4Vec weights(unpack(weights_)); double median{CBasicStatistics::mean( m_ResidualModel->marginalLikelihoodConfidenceInterval(0.0, weightStyles, weights))}; TDoubleDoublePr interval{ @@ -1028,32 +1052,26 @@ bool CUnivariateTimeSeriesModel::forecast(core_t::TTime startTime, } using TDouble3Vec = core::CSmallVector; - using TDouble3VecVec = std::vector; core_t::TTime bucketLength{this->params().bucketLength()}; double minimum{m_IsNonNegative ? std::max(minimum_[0], 0.0) : minimum_[0]}; double maximum{m_IsNonNegative ? std::max(maximum_[0], 0.0) : maximum_[0]}; - TDouble3VecVec predictions; - m_TrendModel->forecast(startTime, endTime, bucketLength, confidenceInterval, - this->params().minimumSeasonalVarianceScale(), predictions); + auto writer = [&](core_t::TTime time, const TDouble3Vec &prediction) + { + SErrorBar errorBar{time, bucketLength, + CTools::truncate(prediction[0], + minimum, + maximum + prediction[0] - prediction[1]), + CTools::truncate(prediction[1], minimum, maximum), + CTools::truncate(prediction[2], + minimum + prediction[2] - prediction[1], + maximum)}; + forecastPushDataPointFunc(errorBar); + }; - core_t::TTime time{startTime}; - for (const auto &prediction : predictions) - { - SErrorBar errorBar; - errorBar.s_Time = time; - errorBar.s_BucketLength = bucketLength; - errorBar.s_LowerBound = CTools::truncate(prediction[0], - minimum, - maximum + prediction[0] - prediction[1]); - errorBar.s_Predicted = CTools::truncate(prediction[1], minimum, maximum); - errorBar.s_UpperBound = CTools::truncate(prediction[2], - minimum + prediction[2] - prediction[1], - maximum); - forecastPushDataPointFunc(errorBar); - time += bucketLength; - } + m_TrendModel->forecast(startTime, endTime, bucketLength, confidenceInterval, + this->params().minimumSeasonalVarianceScale(), writer); return true; } @@ -1080,13 +1098,7 @@ bool CUnivariateTimeSeriesModel::probability(const CModelProbabilityParams ¶ { core_t::TTime time{time_[0][0]}; TDouble1Vec sample{m_TrendModel->detrend(time, value[0][0], params.seasonalConfidenceInterval())}; - - TDouble4Vec1Vec weights(1); - weights[0].reserve(params.weights()[0].size()); - for (const auto &weight : params.weights()[0]) - { - weights[0].push_back(weight[0]); - } + TDouble4Vec1Vec weights{unpack(params.weights()[0])}; double pl, pu; maths_t::ETail tail_; @@ -1108,7 +1120,7 @@ bool CUnivariateTimeSeriesModel::probability(const CModelProbabilityParams ¶ this->params().probabilityBucketEmpty(), (pl + pu) / 2.0); - if (m_AnomalyModel) + if (m_AnomalyModel != nullptr) { TDouble2Vec residual{ (sample[0] - m_ResidualModel->nearestMarginalLikelihoodMean(sample[0])) / std::max(std::sqrt(this->seasonalWeight(0.0, time)[0]), 1.0)}; @@ -1136,7 +1148,7 @@ bool CUnivariateTimeSeriesModel::probability(const CModelProbabilityParams ¶ // Declared outside the loop to minimize the number of times they are created. TSize10Vec variable(1); TDouble10Vec1Vec sample{TDouble10Vec(2)}; - TDouble10Vec4Vec1Vec weights{TDouble10Vec4Vec(params.weightStyles().size(), TDouble10Vec(2))}; + TDouble10Vec4Vec1Vec weights(1); TDouble2Vec probabilityBucketEmpty(2); TDouble10Vec2Vec pli, pui; TTail10Vec ti; @@ -1156,13 +1168,7 @@ bool CUnivariateTimeSeriesModel::probability(const CModelProbabilityParams ¶ correlatedTimeSeriesModels[i]->m_TrendModel->detrend(time_[i][variables[i][1]], value[i][variables[i][1]], params.seasonalConfidenceInterval()); - for (std::size_t j = 0u; j < params.weights()[i].size(); ++j) - { - for (std::size_t d = 0u; d < 2; ++d) - { - weights[0][j][d] = params.weights()[i][j][d]; - } - } + weights[0] = CMultivariateTimeSeriesModel::unpack(params.weights()[i]); if (correlationModels[i].first->probabilityOfLessLikelySamples(params.calculation(0), params.weightStyles(), @@ -1196,17 +1202,15 @@ bool CUnivariateTimeSeriesModel::probability(const CModelProbabilityParams ¶ aggregator.add(p, neff); if (minProbability.add(p)) { - static TSizeDoublePr10Vec CONDITION; - static TSize10Vec MARGINALIZE; - tail[0] = ti[0]; mostAnomalousCorrelate.assign(1, i); conditional = ((pli[1][0] + pui[1][0]) < (pli[0][0] + pui[0][0])); mostAnomalousTime = time_[0][variables[i][0]]; mostAnomalousSample = sample[0][variables[i][0]]; mostAnomalousCorrelationModel = conditional ? - correlationModels[i].first->univariate({variables[i][1]}, CONDITION).first : - correlationModels[i].first->univariate(MARGINALIZE, + correlationModels[i].first->univariate({variables[i][1]}, + NOTHING_TO_CONDITION).first : + correlationModels[i].first->univariate(NOTHING_TO_MARGINALIZE, {{variables[i][1], sample[0][variables[i][1]]}}).first; } @@ -1218,7 +1222,7 @@ bool CUnivariateTimeSeriesModel::probability(const CModelProbabilityParams ¶ } aggregator.calculate(probability); - if (m_AnomalyModel) + if (m_AnomalyModel != nullptr) { TDouble2Vec residual{ ( mostAnomalousSample - mostAnomalousCorrelationModel->nearestMarginalLikelihoodMean(mostAnomalousSample)) @@ -1255,6 +1259,10 @@ uint64_t CUnivariateTimeSeriesModel::checksum(uint64_t seed) const seed = CChecksum::calculate(seed, m_Controllers); seed = CChecksum::calculate(seed, m_TrendModel); seed = CChecksum::calculate(seed, m_ResidualModel); + seed = CChecksum::calculate(seed, m_CandidateChangePoint); + seed = CChecksum::calculate(seed, m_CurrentChangeInterval); + seed = CChecksum::calculate(seed, m_TimeOfLastChangePoint); + seed = CChecksum::calculate(seed, m_ChangeDetector); seed = CChecksum::calculate(seed, m_AnomalyModel); seed = CChecksum::calculate(seed, m_SlidingWindow); return CChecksum::calculate(seed, m_Correlations != 0); @@ -1267,6 +1275,7 @@ void CUnivariateTimeSeriesModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsa core::CMemoryDebug::dynamicSize("m_TrendModel", m_TrendModel, mem); core::CMemoryDebug::dynamicSize("m_ResidualModel", m_ResidualModel, mem); core::CMemoryDebug::dynamicSize("m_AnomalyModel", m_AnomalyModel, mem); + core::CMemoryDebug::dynamicSize("m_ChangeDetector", m_ChangeDetector, mem); core::CMemoryDebug::dynamicSize("m_SlidingWindow", m_SlidingWindow, mem); } @@ -1276,6 +1285,7 @@ std::size_t CUnivariateTimeSeriesModel::memoryUsage(void) const + core::CMemory::dynamicSize(m_TrendModel) + core::CMemory::dynamicSize(m_ResidualModel) + core::CMemory::dynamicSize(m_AnomalyModel) + + core::CMemory::dynamicSize(m_ChangeDetector) + core::CMemory::dynamicSize(m_SlidingWindow); } @@ -1308,6 +1318,16 @@ bool CUnivariateTimeSeriesModel::acceptRestoreTraverser(const SModelRestoreParam traverser.traverseSubLevel(boost::bind(&CTimeSeriesAnomalyModel::acceptRestoreTraverser, m_AnomalyModel.get(), boost::cref(params), _1)), /**/) + RESTORE(CANDIDATE_CHANGE_POINT_6_3_TAG, fromDelimited(traverser.value(), m_CandidateChangePoint)) + RESTORE_BUILT_IN(CURRENT_CHANGE_INTERVAL_6_3_TAG, m_CurrentChangeInterval) + RESTORE_BUILT_IN(TIME_OF_LAST_CHANGE_POINT_6_3_TAG, m_TimeOfLastChangePoint) + RESTORE_SETUP_TEARDOWN(CHANGE_DETECTOR_6_3_TAG, + m_ChangeDetector = boost::make_shared( + m_TrendModel, m_ResidualModel), + traverser.traverseSubLevel(boost::bind( + &CUnivariateTimeSeriesChangeDetector::acceptRestoreTraverser, + m_ChangeDetector.get(), boost::cref(params), _1)), + /**/) RESTORE(SLIDING_WINDOW_6_3_TAG, core::CPersistUtils::restore(SLIDING_WINDOW_6_3_TAG, m_SlidingWindow, traverser)) } @@ -1361,7 +1381,15 @@ void CUnivariateTimeSeriesModel::acceptPersistInserter(core::CStatePersistInsert boost::cref(*m_TrendModel), _1)); inserter.insertLevel(RESIDUAL_MODEL_6_3_TAG, boost::bind(CPriorStateSerialiser(), boost::cref(*m_ResidualModel), _1)); - if (m_AnomalyModel) + inserter.insertValue(CANDIDATE_CHANGE_POINT_6_3_TAG, toDelimited(m_CandidateChangePoint)); + inserter.insertValue(CURRENT_CHANGE_INTERVAL_6_3_TAG, m_CurrentChangeInterval); + inserter.insertValue(TIME_OF_LAST_CHANGE_POINT_6_3_TAG, m_TimeOfLastChangePoint); + if (m_ChangeDetector != nullptr) + { + inserter.insertLevel(CHANGE_DETECTOR_6_3_TAG, boost::bind( + &CUnivariateTimeSeriesChangeDetector::acceptPersistInserter, m_ChangeDetector.get(), _1)); + } + if (m_AnomalyModel != nullptr) { inserter.insertLevel(ANOMALY_MODEL_6_3_TAG, boost::bind(&CTimeSeriesAnomalyModel::acceptPersistInserter, @@ -1375,6 +1403,35 @@ maths_t::EDataType CUnivariateTimeSeriesModel::dataType(void) const return m_ResidualModel->dataType(); } +CUnivariateTimeSeriesModel::TDouble4Vec CUnivariateTimeSeriesModel::unpack(const TDouble2Vec4Vec &weights) +{ + TDouble4Vec result; + result.reserve(weights.size()); + for (const auto &weight : weights) + { + result.push_back(weight[0]); + } + return result; +} + +void CUnivariateTimeSeriesModel::reinitializeResidualModel(double learnRate, + const TDecompositionPtr &trend, + const TTimeDoublePrCBuf &slidingWindow, + CPrior &residualModel) +{ + residualModel.setToNonInformative(0.0, residualModel.decayRate()); + if (!slidingWindow.empty()) + { + double slidingWindowLength{static_cast(slidingWindow.size())}; + TDouble4Vec1Vec weight{{std::max(learnRate, std::min(5.0 / slidingWindowLength, 1.0))}}; + for (const auto &value : slidingWindow) + { + TDouble1Vec sample{trend->detrend(value.first, value.second, 0.0)}; + residualModel.addSamples(CConstantWeights::COUNT, sample, weight); + } + } +} + const CUnivariateTimeSeriesModel::TTimeDoublePrCBuf &CUnivariateTimeSeriesModel::slidingWindow(void) const { return m_SlidingWindow; @@ -1391,7 +1448,8 @@ const CPrior &CUnivariateTimeSeriesModel::residualModel(void) const } CUnivariateTimeSeriesModel::CUnivariateTimeSeriesModel(const CUnivariateTimeSeriesModel &other, - std::size_t id) : + std::size_t id, + bool isForForecast) : CModel(other.params()), m_Id(id), m_IsNonNegative(other.m_IsNonNegative), @@ -1399,18 +1457,98 @@ CUnivariateTimeSeriesModel::CUnivariateTimeSeriesModel(const CUnivariateTimeSeri m_Rng(other.m_Rng), m_TrendModel(other.m_TrendModel->clone()), m_ResidualModel(other.m_ResidualModel->clone()), - m_AnomalyModel(other.m_AnomalyModel ? + m_AnomalyModel(!isForForecast && other.m_AnomalyModel ? boost::make_shared(*other.m_AnomalyModel) : TAnomalyModelPtr()), - m_SlidingWindow(other.m_SlidingWindow), + m_CandidateChangePoint(other.m_CandidateChangePoint), + m_CurrentChangeInterval(other.m_CurrentChangeInterval), + m_TimeOfLastChangePoint(other.m_TimeOfLastChangePoint), + m_ChangeDetector(!isForForecast && other.m_ChangeDetector ? + boost::make_shared(*other.m_ChangeDetector) : + TChangeDetectorPtr()), + m_SlidingWindow(!isForForecast ? other.m_SlidingWindow : TTimeDoublePrCBuf{}), m_Correlations(0) { - if (other.m_Controllers) + if (!isForForecast && other.m_Controllers != nullptr) { m_Controllers = boost::make_shared(*other.m_Controllers); } } +CUnivariateTimeSeriesModel::EUpdateResult +CUnivariateTimeSeriesModel::testAndApplyChange(const CModelAddSamplesParams ¶ms, + const TSizeVec &order, + const TTimeDouble2VecSizeTrVec &values) +{ + std::size_t median{order[order.size() / 2]}; + TDouble4Vec weights{unpack(params.priorWeights()[median])}; + core_t::TTime time{values[median].first}; + + if (m_ChangeDetector == nullptr) + { + if (maths_t::winsorisationWeight(params.weightStyles(), {weights}) < 1.0) + { + m_CurrentChangeInterval += this->params().bucketLength(); + if (this->params().testForChange(m_CurrentChangeInterval)) + { + m_ChangeDetector = boost::make_shared( + m_TrendModel, m_ResidualModel, + this->params().minimumTimeToDetectChange(time - m_TimeOfLastChangePoint), + this->params().maximumTimeToTestForChange()); + m_CurrentChangeInterval = 0; + } + } + else + { + m_CandidateChangePoint = {time, values[median].second[0]}; + m_CurrentChangeInterval = 0; + } + } + + if (m_ChangeDetector != nullptr) + { + m_ChangeDetector->addSamples(params.weightStyles(), + {std::make_pair(time, values[median].second[0])}, {weights}); + + if (m_ChangeDetector->stopTesting()) + { + m_ChangeDetector.reset(); + } + else if (auto change = m_ChangeDetector->change()) + { + LOG_DEBUG("Detected " << change->print() << " at " << values[median].first); + m_ChangeDetector.reset(); + m_TimeOfLastChangePoint = time; + return this->applyChange(*change); + } + } + + return E_Success; +} + +CUnivariateTimeSeriesModel::EUpdateResult +CUnivariateTimeSeriesModel::applyChange(const SChangeDescription &change) +{ + for (auto &value : m_SlidingWindow) + { + switch (change.s_Description) + { + case SChangeDescription::E_LevelShift: + value.second += change.s_Value[0]; + break; + case SChangeDescription::E_TimeShift: + value.first += static_cast(change.s_Value[0]); + break; + } + } + + m_TrendModel->applyChange(m_CandidateChangePoint.first, m_CandidateChangePoint.second, change); + change.s_ResidualModel->decayRate(m_ResidualModel->decayRate()); + m_ResidualModel = change.s_ResidualModel; + + return E_Success; +} + CUnivariateTimeSeriesModel::EUpdateResult CUnivariateTimeSeriesModel::updateTrend(const maths_t::TWeightStyleVec &weightStyles, const TTimeDouble2VecSizeTrVec &samples, @@ -1425,6 +1563,8 @@ CUnivariateTimeSeriesModel::updateTrend(const maths_t::TWeightStyleVec &weightSt } } + EUpdateResult result = E_Success; + // Time order is not reliable, for example if the data are polled // or for count feature, the times of all samples will be the same. TSizeVec timeorder(samples.size()); @@ -1438,52 +1578,20 @@ CUnivariateTimeSeriesModel::updateTrend(const maths_t::TWeightStyleVec &weightSt samples[rhs].second); }); - EUpdateResult result = E_Success; + for (auto i : timeorder) { - TDouble4Vec weight(weightStyles.size()); - for (auto i : timeorder) + core_t::TTime time{samples[i].first}; + double value{samples[i].second[0]}; + TDouble4Vec weight(unpack(weights[i])); + if (m_TrendModel->addPoint(time, value, weightStyles, weight)) { - core_t::TTime time{samples[i].first}; - double value{samples[i].second[0]}; - for (std::size_t j = 0u; j < weights[i].size(); ++j) - { - weight[j] = weights[i][j][0]; - } - if (m_TrendModel->addPoint(time, value, weightStyles, weight)) - { - result = E_Reset; - } + result = E_Reset; } } + if (result == E_Reset) { - m_ResidualModel->setToNonInformative(0.0, m_ResidualModel->decayRate()); - TDouble4Vec1Vec weight{{std::max(this->params().learnRate(), - 5.0 / static_cast(SLIDING_WINDOW_SIZE))}}; - for (const auto &value : m_SlidingWindow) - { - TDouble1Vec sample{m_TrendModel->detrend(value.first, value.second, 0.0)}; - m_ResidualModel->addSamples(CConstantWeights::COUNT, sample, weight); - } - if (m_Correlations) - { - m_Correlations->removeTimeSeries(m_Id); - } - if (m_Controllers) - { - m_ResidualModel->decayRate( m_ResidualModel->decayRate() - / (*m_Controllers)[E_ResidualControl].multiplier()); - m_TrendModel->decayRate( m_TrendModel->decayRate() - / (*m_Controllers)[E_TrendControl].multiplier()); - for (auto &controller : *m_Controllers) - { - controller.reset(); - } - } - if (m_AnomalyModel) - { - m_AnomalyModel->reset(); - } + this->reinitializeStateGivenNewComponent(); } return result; @@ -1506,6 +1614,32 @@ void CUnivariateTimeSeriesModel::appendPredictionErrors(double interval, } } +void CUnivariateTimeSeriesModel::reinitializeStateGivenNewComponent(void) +{ + reinitializeResidualModel(this->params().learnRate(), + m_TrendModel, m_SlidingWindow, *m_ResidualModel); + if (m_Correlations != nullptr) + { + m_Correlations->removeTimeSeries(m_Id); + } + if (m_Controllers != nullptr) + { + m_ResidualModel->decayRate( m_ResidualModel->decayRate() + / (*m_Controllers)[E_ResidualControl].multiplier()); + m_TrendModel->decayRate( m_TrendModel->decayRate() + / (*m_Controllers)[E_TrendControl].multiplier()); + for (auto &controller : *m_Controllers) + { + controller.reset(); + } + } + if (m_AnomalyModel != nullptr) + { + m_AnomalyModel->reset(); + } + m_ChangeDetector.reset(); +} + bool CUnivariateTimeSeriesModel::correlationModels(TSize1Vec &correlated, TSize2Vec1Vec &variables, TMultivariatePriorCPtrSizePr1Vec &correlationModels, @@ -1909,25 +2043,24 @@ void CTimeSeriesCorrelations::removeTimeSeries(std::size_t id) } void CTimeSeriesCorrelations::addSamples(std::size_t id, - maths_t::EDataType type, + const CModelAddSamplesParams ¶ms, const TTimeDouble2VecSizeTrVec &samples, - const TDouble4Vec1Vec &weights, - double interval, double multiplier) { SSampleData &data{m_SampleData[id]}; - data.s_Type = type; + data.s_Type = params.type(); data.s_Times.reserve(samples.size()); data.s_Samples.reserve(samples.size()); data.s_Tags.reserve(samples.size()); - for (const auto &sample : samples) + for (std::size_t i = 0u; i < samples.size(); ++i) { - data.s_Times.push_back(sample.first); - data.s_Samples.push_back(sample.second[0]); - data.s_Tags.push_back(sample.third); + data.s_Times.push_back(samples[i].first); + data.s_Samples.push_back(samples[i].second[0]); + data.s_Tags.push_back(samples[i].third); + data.s_Weights.push_back( + CUnivariateTimeSeriesModel::unpack(params.priorWeights()[i])); } - data.s_Weights = weights; - data.s_Interval = interval; + data.s_Interval = params.propagationInterval(); data.s_Multiplier = multiplier; m_Correlations.add(id, CBasicStatistics::median(data.s_Samples)); } @@ -2016,12 +2149,12 @@ void CTimeSeriesCorrelations::refreshLookup(void) CMultivariateTimeSeriesModel::CMultivariateTimeSeriesModel(const CModelParams ¶ms, const CTimeSeriesDecompositionInterface &trend, - const CMultivariatePrior &prior, + const CMultivariatePrior &residualModel, const TDecayRateController2Ary *controllers, bool modelAnomalies) : CModel(params), m_IsNonNegative(false), - m_ResidualModel(prior.clone()), + m_ResidualModel(residualModel.clone()), m_AnomalyModel(modelAnomalies ? boost::make_shared(params.bucketLength(), params.decayRate()) : @@ -2117,7 +2250,6 @@ CMultivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams ¶ms, return E_Success; } - using TMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; using TOptionalTimeDouble2VecPr = boost::optional; TSizeVec valueorder(samples.size()); @@ -2129,13 +2261,11 @@ CMultivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams ¶ms, }); TOptionalTimeDouble2VecPr randomSample; - - double p{SLIDING_WINDOW_SIZE * static_cast(this->params().bucketLength()) - / static_cast(core::constants::DAY)}; - if (p >= 1.0 || CSampling::uniformSample(m_Rng, 0.0, 1.0) < p) + if (TOptionalSize index = randomlySample(m_Rng, params, + this->params().bucketLength(), + valueorder)) { - std::size_t i{CSampling::uniformSample(m_Rng, 0, samples.size())}; - randomSample.reset({samples[valueorder[i]].first, samples[valueorder[i]].second}); + randomSample.reset({samples[*index].first, samples[*index].second}); } m_IsNonNegative = params.isNonNegative(); @@ -2169,35 +2299,26 @@ CMultivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams ¶ms, m_ResidualModel->dataType(type); TDouble10Vec1Vec samples_; - TDouble10Vec4Vec1Vec weights; + TDouble10Vec4Vec1Vec weights_; samples_.reserve(samples.size()); - weights.reserve(samples.size()); + weights_.reserve(samples.size()); TMeanAccumulator averageTime; for (auto i : valueorder) { samples_.push_back(samples[i].second); - TDouble10Vec4Vec wi(params.weightStyles().size(), TDouble10Vec(dimension)); - for (std::size_t j = 0u; j < params.priorWeights()[i].size(); ++j) - { - const TDouble2Vec &weight{params.priorWeights()[i][j]}; - for (std::size_t d = 0u; d < dimension; ++d) - { - wi[j][d] = weight[d]; - } - } - weights.push_back(wi); + weights_.push_back(unpack(params.priorWeights()[i])); averageTime.add(static_cast(samples[i].first)); } - m_ResidualModel->addSamples(params.weightStyles(), samples_, weights); + m_ResidualModel->addSamples(params.weightStyles(), samples_, weights_); m_ResidualModel->propagateForwardsByTime(params.propagationInterval()); - if (m_AnomalyModel) + if (m_AnomalyModel != nullptr) { m_AnomalyModel->propagateForwardsByTime(params.propagationInterval()); } - if (m_Controllers) + if (m_Controllers != nullptr) { TDouble1VecVec errors[2]; errors[0].reserve(samples.size()); @@ -2261,28 +2382,15 @@ void CMultivariateTimeSeriesModel::skipTime(core_t::TTime gap) CMultivariateTimeSeriesModel::TDouble2Vec CMultivariateTimeSeriesModel::mode(core_t::TTime time, const maths_t::TWeightStyleVec &weightStyles, - const TDouble2Vec4Vec &weights_) const + const TDouble2Vec4Vec &weights) const { std::size_t dimension = this->dimension(); - TDouble2Vec result(dimension); - - TDouble10Vec4Vec weights(weights_.size()); - for (std::size_t i = 0u; i < weights_.size(); ++i) - { - for (std::size_t d = 0u; d < dimension; ++d) - { - weights[i].push_back(weights_[i][d]); - } - } - - TDouble10Vec mode(m_ResidualModel->marginalLikelihoodMode(weightStyles, weights)); - + TDouble10Vec mode(m_ResidualModel->marginalLikelihoodMode(weightStyles, unpack(weights))); for (std::size_t d = 0u; d < dimension; ++d) { result[d] = mode[d] + CBasicStatistics::mean(m_TrendModel[d]->value(time)); } - return result; } @@ -2296,15 +2404,9 @@ CMultivariateTimeSeriesModel::correlateModes(core_t::TTime /*time*/, CMultivariateTimeSeriesModel::TDouble2Vec1Vec CMultivariateTimeSeriesModel::residualModes(const maths_t::TWeightStyleVec &weightStyles, - const TDouble2Vec4Vec &weights_) const + const TDouble2Vec4Vec &weights) const { - TDouble10Vec4Vec weights; - weights.reserve(weights_.size()); - for (const auto &weight : weights_) - { - weights.emplace_back(weight[0]); - } - TDouble10Vec1Vec modes(m_ResidualModel->marginalLikelihoodModes(weightStyles, weights)); + TDouble10Vec1Vec modes(m_ResidualModel->marginalLikelihoodModes(weightStyles, unpack(weights))); TDouble2Vec1Vec result; result.reserve(modes.size()); for (const auto &mode : modes) @@ -2333,8 +2435,6 @@ CMultivariateTimeSeriesModel::predict(core_t::TTime time, { using TUnivariatePriorPtr = boost::shared_ptr; - static const TSizeDoublePr10Vec CONDITION; - std::size_t dimension{this->dimension()}; double scale{1.0 - this->params().probabilityBucketEmpty()}; @@ -2361,7 +2461,7 @@ CMultivariateTimeSeriesModel::predict(core_t::TTime time, double median{mean[d]}; if (!m_ResidualModel->isNonInformative()) { - TUnivariatePriorPtr marginal{m_ResidualModel->univariate(marginalize, CONDITION).first}; + TUnivariatePriorPtr marginal{m_ResidualModel->univariate(marginalize, NOTHING_TO_CONDITION).first}; median = hint.empty() ? CBasicStatistics::mean(marginal->marginalLikelihoodConfidenceInterval(0.0)) : marginal->nearestMarginalLikelihoodMean(hint[d]); } @@ -2388,8 +2488,6 @@ CMultivariateTimeSeriesModel::confidenceInterval(core_t::TTime time, using TUnivariatePriorPtr = boost::shared_ptr; - static const TSizeDoublePr10Vec CONDITION; - std::size_t dimension{this->dimension()}; double scale{1.0 - this->params().probabilityBucketEmpty()}; @@ -2411,8 +2509,9 @@ CMultivariateTimeSeriesModel::confidenceInterval(core_t::TTime time, weights.push_back(weight[d]); } - TUnivariatePriorPtr marginal{m_ResidualModel->univariate(marginalize, CONDITION).first}; - double median{CBasicStatistics::mean(marginal->marginalLikelihoodConfidenceInterval(0.0))}; + TUnivariatePriorPtr marginal{m_ResidualModel->univariate(marginalize, NOTHING_TO_CONDITION).first}; + double median{CBasicStatistics::mean( + marginal->marginalLikelihoodConfidenceInterval(0.0, weightStyles, weights))}; TDoubleDoublePr interval{ marginal->marginalLikelihoodConfidenceInterval(confidenceInterval, weightStyles, weights)}; @@ -2466,20 +2565,12 @@ bool CMultivariateTimeSeriesModel::probability(const CModelProbabilityParams &pa std::size_t dimension{this->dimension()}; core_t::TTime time{time_[0][0]}; TDouble10Vec1Vec sample{TDouble10Vec(dimension)}; - TDouble10Vec4Vec1Vec weights{TDouble10Vec4Vec(params.weightStyles().size(), - TDouble10Vec(dimension))}; for (std::size_t d = 0u; d < dimension; ++d) { sample[0][d] = m_TrendModel[d]->detrend(time, value[0][d], params.seasonalConfidenceInterval()); } - for (std::size_t i = 0u; i < params.weightStyles().size(); ++i) - { - for (std::size_t d = 0u; d < dimension; ++d) - { - weights[0][i][d] = params.weights()[0][i][d]; - } - } + TDouble10Vec4Vec1Vec weights{unpack(params.weights()[0])}; bool bucketEmpty{params.bucketEmpty()[0][0]}; double probabilityBucketEmpty{this->params().probabilityBucketEmpty()}; @@ -2525,7 +2616,7 @@ bool CMultivariateTimeSeriesModel::probability(const CModelProbabilityParams &pa probability = (std::sqrt(pl[0] * pl[1]) + std::sqrt(pu[0] * pu[1])) / 2.0; - if (m_AnomalyModel) + if (m_AnomalyModel != nullptr) { TDouble2Vec residual(dimension); TDouble10Vec nearest(m_ResidualModel->nearestMarginalLikelihoodMean(sample[0])); @@ -2690,7 +2781,7 @@ void CMultivariateTimeSeriesModel::acceptPersistInserter(core::CStatePersistInse } inserter.insertLevel(RESIDUAL_MODEL_6_3_TAG, boost::bind(CPriorStateSerialiser(), boost::cref(*m_ResidualModel), _1)); - if (m_AnomalyModel) + if (m_AnomalyModel != nullptr) { inserter.insertLevel(ANOMALY_MODEL_6_3_TAG, boost::bind(&CTimeSeriesAnomalyModel::acceptPersistInserter, @@ -2704,6 +2795,41 @@ maths_t::EDataType CMultivariateTimeSeriesModel::dataType(void) const return m_ResidualModel->dataType(); } +CMultivariateTimeSeriesModel::TDouble10Vec4Vec CMultivariateTimeSeriesModel::unpack(const TDouble2Vec4Vec &weights) +{ + TDouble10Vec4Vec result; + result.reserve(weights.size()); + for (const auto &weight : weights) + { + result.emplace_back(weight); + } + return result; +} + +void CMultivariateTimeSeriesModel::reinitializeResidualModel(double learnRate, + const TDecompositionPtr10Vec &trend, + const TTimeDouble2VecPrCBuf &slidingWindow, + CMultivariatePrior &residualModel) +{ + residualModel.setToNonInformative(0.0, residualModel.decayRate()); + if (!slidingWindow.empty()) + { + std::size_t dimension{residualModel.dimension()}; + double slidingWindowLength{static_cast(slidingWindow.size())}; + TDouble10Vec4Vec1Vec weight{{TDouble10Vec( + dimension, std::max(learnRate, std::min(5.0 / slidingWindowLength, 1.0)))}}; + for (const auto &value : slidingWindow) + { + TDouble10Vec1Vec sample{TDouble10Vec(dimension)}; + for (std::size_t i = 0u; i < dimension; ++i) + { + sample[0][i] = trend[i]->detrend(value.first, value.second[i], 0.0); + } + residualModel.addSamples(CConstantWeights::COUNT, sample, weight); + } + } +} + const CMultivariateTimeSeriesModel::TTimeDouble2VecPrCBuf & CMultivariateTimeSeriesModel::slidingWindow(void) const { @@ -2773,37 +2899,7 @@ CMultivariateTimeSeriesModel::updateTrend(const maths_t::TWeightStyleVec &weight } if (result == E_Reset) { - m_ResidualModel->setToNonInformative(0.0, m_ResidualModel->decayRate()); - TDouble10Vec4Vec1Vec weight{{TDouble10Vec( - dimension, std::max(this->params().learnRate(), - 5.0 / static_cast(SLIDING_WINDOW_SIZE)))}}; - for (const auto &value : m_SlidingWindow) - { - TDouble10Vec1Vec sample{TDouble10Vec(dimension)}; - for (std::size_t i = 0u; i < dimension; ++i) - { - sample[0][i] = m_TrendModel[i]->detrend(value.first, value.second[i], 0.0); - } - m_ResidualModel->addSamples(CConstantWeights::COUNT, sample, weight); - } - if (m_Controllers) - { - m_ResidualModel->decayRate( m_ResidualModel->decayRate() - / (*m_Controllers)[E_ResidualControl].multiplier()); - for (auto &trend : m_TrendModel) - { - trend->decayRate( trend->decayRate() - / (*m_Controllers)[E_TrendControl].multiplier()); - } - for (auto &controller : *m_Controllers) - { - controller.reset(); - } - } - if (m_AnomalyModel) - { - m_AnomalyModel->reset(); - } + this->reinitializeStateGivenNewComponent(); } return result; @@ -2823,6 +2919,30 @@ void CMultivariateTimeSeriesModel::appendPredictionErrors(double interval, } } +void CMultivariateTimeSeriesModel::reinitializeStateGivenNewComponent(void) +{ + reinitializeResidualModel(this->params().learnRate(), + m_TrendModel, m_SlidingWindow, *m_ResidualModel); + if (m_Controllers != nullptr) + { + m_ResidualModel->decayRate( m_ResidualModel->decayRate() + / (*m_Controllers)[E_ResidualControl].multiplier()); + for (auto &trend : m_TrendModel) + { + trend->decayRate( trend->decayRate() + / (*m_Controllers)[E_TrendControl].multiplier()); + } + for (auto &controller : *m_Controllers) + { + controller.reset(); + } + } + if (m_AnomalyModel != nullptr) + { + m_AnomalyModel->reset(); + } +} + std::size_t CMultivariateTimeSeriesModel::dimension(void) const { return m_ResidualModel->dimension(); diff --git a/lib/maths/CTrendComponent.cc b/lib/maths/CTrendComponent.cc index 6fbbae5052..e522eca736 100644 --- a/lib/maths/CTrendComponent.cc +++ b/lib/maths/CTrendComponent.cc @@ -33,6 +33,14 @@ namespace maths { namespace { +using TOptionalDoubleDoublePr = boost::optional>; + +const double TIME_SCALES[]{144.0, 72.0, 36.0, 12.0, 4.0, 1.0, 0.25, 0.05}; +const std::size_t NUMBER_MODELS{boost::size(TIME_SCALES)}; +const double MAX_CONDITION{1e12}; +const core_t::TTime UNSET_TIME{0}; +const std::size_t NO_CHANGE_LABEL{0}; +const std::size_t LEVEL_CHANGE_LABEL{1}; //! Get the desired weight for the regression model. double modelWeight(double targetDecayRate, double modelDecayRate) @@ -49,6 +57,41 @@ double scaleTime(core_t::TTime time, core_t::TTime origin) return static_cast(time - origin) / static_cast(core::constants::WEEK); } +//! Get the \p confidence interval for \p prediction and \p variance. +TOptionalDoubleDoublePr confidenceInterval(double prediction, + double variance, + double confidence) +{ + try + { + boost::math::normal normal{prediction, std::sqrt(variance)}; + double ql{boost::math::quantile(normal, (100.0 - confidence) / 200.0)}; + double qu{boost::math::quantile(normal, (100.0 + confidence) / 200.0)}; + return std::make_pair(ql, qu); + } + catch (const std::exception &e) + { + LOG_ERROR("Failed calculating confidence interval: " << e.what() + << ", prediction = " << prediction + << ", variance = " << variance + << ", confidence = " << confidence); + } + return TOptionalDoubleDoublePr{}; +} + +CNaiveBayes initialProbabilityOfChangeModel(double decayRate) +{ + decayRate *= TIME_SCALES[NUMBER_MODELS - 1]; + return CNaiveBayes{CNaiveBayesFeatureDensityFromPrior{ + CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, + decayRate)}, decayRate, -20.0}; +} + +CNormalMeanPrecConjugate initialMagnitudeOfChangeModel(double decayRate) +{ + return CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, decayRate); +} + const std::string TARGET_DECAY_RATE_TAG{"a"}; const std::string FIRST_UPDATE_TAG{"b"}; const std::string LAST_UPDATE_TAG{"c"}; @@ -56,15 +99,13 @@ const std::string REGRESSION_ORIGIN_TAG{"d"}; const std::string MODEL_TAG{"e"}; const std::string PREDICTION_ERROR_VARIANCE_TAG{"f"}; const std::string VALUE_MOMENTS_TAG{"g"}; +const std::string TIME_OF_LAST_LEVEL_CHANGE_TAG{"h"}; +const std::string PROBABILITY_OF_LEVEL_CHANGE_MODEL_TAG{"i"}; +const std::string MAGNITUDE_OF_LEVEL_CHANGE_MODEL_TAG{"j"}; const std::string WEIGHT_TAG{"a"}; const std::string REGRESSION_TAG{"b"}; const std::string RESIDUAL_MOMENTS_TAG{"c"}; -const double TIME_SCALES[]{144.0, 72.0, 36.0, 12.0, 4.0, 1.0, 0.25, 0.05}; -const std::size_t NUMBER_MODELS{boost::size(TIME_SCALES)}; -const double MAX_CONDITION{1e12}; -const core_t::TTime UNSET_TIME{0}; - } CTrendComponent::CTrendComponent(double decayRate) : @@ -73,11 +114,14 @@ CTrendComponent::CTrendComponent(double decayRate) : m_FirstUpdate(UNSET_TIME), m_LastUpdate(UNSET_TIME), m_RegressionOrigin(UNSET_TIME), - m_PredictionErrorVariance(0.0) + m_PredictionErrorVariance(0.0), + m_TimeOfLastLevelChange(UNSET_TIME), + m_ProbabilityOfLevelChangeModel(initialProbabilityOfChangeModel(decayRate)), + m_MagnitudeOfLevelChangeModel(initialMagnitudeOfChangeModel(decayRate)) { for (std::size_t i = 0u; i < NUMBER_MODELS; ++i) { - m_Models.emplace_back(modelWeight(1.0, TIME_SCALES[i])); + m_TrendModels.emplace_back(modelWeight(1.0, TIME_SCALES[i])); } } @@ -88,9 +132,12 @@ void CTrendComponent::swap(CTrendComponent &other) std::swap(m_FirstUpdate, other.m_FirstUpdate); std::swap(m_LastUpdate, other.m_LastUpdate); std::swap(m_RegressionOrigin, other.m_RegressionOrigin); - m_Models.swap(other.m_Models); + m_TrendModels.swap(other.m_TrendModels); std::swap(m_PredictionErrorVariance, other.m_PredictionErrorVariance); std::swap(m_ValueMoments, other.m_ValueMoments); + std::swap(m_TimeOfLastLevelChange, other.m_TimeOfLastLevelChange); + m_ProbabilityOfLevelChangeModel.swap(other.m_ProbabilityOfLevelChangeModel); + m_MagnitudeOfLevelChangeModel.swap(other.m_MagnitudeOfLevelChangeModel); } void CTrendComponent::acceptPersistInserter(core::CStatePersistInserter &inserter) const @@ -99,7 +146,7 @@ void CTrendComponent::acceptPersistInserter(core::CStatePersistInserter &inserte inserter.insertValue(FIRST_UPDATE_TAG, m_FirstUpdate); inserter.insertValue(LAST_UPDATE_TAG, m_LastUpdate); inserter.insertValue(REGRESSION_ORIGIN_TAG, m_RegressionOrigin); - for (const auto &model : m_Models) + for (const auto &model : m_TrendModels) { inserter.insertLevel(MODEL_TAG, boost::bind(&SModel::acceptPersistInserter, &model, _1)); } @@ -107,9 +154,17 @@ void CTrendComponent::acceptPersistInserter(core::CStatePersistInserter &inserte m_PredictionErrorVariance, core::CIEEE754::E_DoublePrecision); inserter.insertValue(VALUE_MOMENTS_TAG, m_ValueMoments.toDelimited()); + inserter.insertValue(TIME_OF_LAST_LEVEL_CHANGE_TAG, m_TimeOfLastLevelChange); + inserter.insertLevel(PROBABILITY_OF_LEVEL_CHANGE_MODEL_TAG, + boost::bind(&CNaiveBayes::acceptPersistInserter, + &m_ProbabilityOfLevelChangeModel, _1)); + inserter.insertLevel(MAGNITUDE_OF_LEVEL_CHANGE_MODEL_TAG, + boost::bind(&CNormalMeanPrecConjugate::acceptPersistInserter, + &m_MagnitudeOfLevelChangeModel, _1)); } -bool CTrendComponent::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) +bool CTrendComponent::acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, + core::CStateRestoreTraverser &traverser) { std::size_t i{0}; do @@ -120,9 +175,14 @@ bool CTrendComponent::acceptRestoreTraverser(core::CStateRestoreTraverser &trave RESTORE_BUILT_IN(LAST_UPDATE_TAG, m_LastUpdate) RESTORE_BUILT_IN(REGRESSION_ORIGIN_TAG, m_RegressionOrigin) RESTORE(MODEL_TAG, traverser.traverseSubLevel(boost::bind( - &SModel::acceptRestoreTraverser, &m_Models[i++], _1))) + &SModel::acceptRestoreTraverser, &m_TrendModels[i++], _1))) RESTORE_BUILT_IN(PREDICTION_ERROR_VARIANCE_TAG, m_PredictionErrorVariance) RESTORE(VALUE_MOMENTS_TAG, m_ValueMoments.fromDelimited(traverser.value())) + RESTORE_BUILT_IN(TIME_OF_LAST_LEVEL_CHANGE_TAG, m_TimeOfLastLevelChange) + RESTORE_NO_ERROR(PROBABILITY_OF_LEVEL_CHANGE_MODEL_TAG, + m_ProbabilityOfLevelChangeModel = CNaiveBayes(params, traverser)) + RESTORE_NO_ERROR(MAGNITUDE_OF_LEVEL_CHANGE_MODEL_TAG, + m_MagnitudeOfLevelChangeModel = CNormalMeanPrecConjugate(params, traverser)) } while (traverser.next()); return true; @@ -140,10 +200,13 @@ void CTrendComponent::clear() m_RegressionOrigin = UNSET_TIME; for (std::size_t i = 0u; i < NUMBER_MODELS; ++i) { - m_Models[i] = SModel(modelWeight(1.0, TIME_SCALES[i])); + m_TrendModels[i] = SModel(modelWeight(1.0, TIME_SCALES[i])); } m_PredictionErrorVariance = 0.0; m_ValueMoments = TMeanVarAccumulator(); + m_TimeOfLastLevelChange = UNSET_TIME; + m_ProbabilityOfLevelChangeModel = initialProbabilityOfChangeModel(m_DefaultDecayRate); + m_MagnitudeOfLevelChangeModel = initialMagnitudeOfChangeModel(m_DefaultDecayRate); } void CTrendComponent::shiftOrigin(core_t::TTime time) @@ -152,7 +215,7 @@ void CTrendComponent::shiftOrigin(core_t::TTime time) double scaledShift{scaleTime(time, m_RegressionOrigin)}; if (scaledShift > 0.0) { - for (auto &model : m_Models) + for (auto &model : m_TrendModels) { model.s_Regression.shiftAbscissa(-scaledShift); } @@ -166,7 +229,39 @@ void CTrendComponent::shiftSlope(double decayRate, double shift) { double shift_{std::min(m_DefaultDecayRate * TIME_SCALES[i] / decayRate, 1.0) * shift}; - m_Models[i].s_Regression.shiftGradient(shift_); + m_TrendModels[i].s_Regression.shiftGradient(shift_); + } +} + +void CTrendComponent::shiftLevel(core_t::TTime time, double value, double shift) +{ + for (auto &model : m_TrendModels) + { + model.s_Regression.shiftOrdinate(shift); + } + if (m_TimeOfLastLevelChange != UNSET_TIME) + { + double dt{static_cast(time - m_TimeOfLastLevelChange)}; + m_ProbabilityOfLevelChangeModel.addTrainingDataPoint(LEVEL_CHANGE_LABEL, {{dt}, {value}}); + } + // The magic 1.2 is due to the fact that the trend is updated + // with new values during change detection. As a result, we + // purposely reduce the step size since the shift applied after + // change detection is biased and otherwise too large. For the + // purpose of modeling step changes we want an unbiased estimate + // of the step size, including any adaption in the trend during + // the detection period. This is an empirical estimate of the + // degree of bias across a range of step changes. + m_MagnitudeOfLevelChangeModel.addSamples({maths_t::E_SampleCountWeight}, {1.2 * shift}, {{1.0}}); + m_TimeOfLastLevelChange = time; +} + +void CTrendComponent::dontShiftLevel(core_t::TTime time, double value) +{ + if (m_TimeOfLastLevelChange != UNSET_TIME) + { + double dt{static_cast(time - m_TimeOfLastLevelChange)}; + m_ProbabilityOfLevelChangeModel.addTrainingDataPoint(NO_CHANGE_LABEL, {{dt}, {value}}); } } @@ -177,8 +272,8 @@ void CTrendComponent::add(core_t::TTime time, double value, double weight) for (std::size_t i = 0u; i < NUMBER_MODELS; ++i) { - m_Models[i].s_Weight.add(modelWeight(m_TargetDecayRate, - m_DefaultDecayRate * TIME_SCALES[i])); + m_TrendModels[i].s_Weight.add(modelWeight(m_TargetDecayRate, + m_DefaultDecayRate * TIME_SCALES[i])); } // Update the models. @@ -200,10 +295,11 @@ void CTrendComponent::add(core_t::TTime time, double value, double weight) } double scaledTime{scaleTime(time, m_RegressionOrigin)}; - for (auto &model : m_Models) + for (auto &model : m_TrendModels) { model.s_Regression.add(scaledTime, value, weight); - model.s_ResidualMoments.add(value - model.s_Regression.predict(scaledTime, MAX_CONDITION)); + model.s_ResidualMoments.add( + value - model.s_Regression.predict(scaledTime, MAX_CONDITION)); } m_ValueMoments.add(value); @@ -211,6 +307,11 @@ void CTrendComponent::add(core_t::TTime time, double value, double weight) m_LastUpdate = std::max(m_LastUpdate, time); } +void CTrendComponent::dataType(maths_t::EDataType dataType) +{ + m_ProbabilityOfLevelChangeModel.dataType(dataType); +} + double CTrendComponent::defaultDecayRate() const { return m_DefaultDecayRate; @@ -227,10 +328,14 @@ void CTrendComponent::propagateForwardsByTime(core_t::TTime interval) double median{CBasicStatistics::median(factors)}; for (std::size_t i = 0u; i < NUMBER_MODELS; ++i) { - m_Models[i].s_Weight.age(median); - m_Models[i].s_Regression.age(factors[i]); - m_Models[i].s_ResidualMoments.age(std::sqrt(factors[i])); + m_TrendModels[i].s_Weight.age(median); + m_TrendModels[i].s_Regression.age(factors[i]); + m_TrendModels[i].s_ResidualMoments.age(std::sqrt(factors[i])); } + double interval_{ static_cast(interval) + / static_cast(core::constants::DAY)}; + m_ProbabilityOfLevelChangeModel.propagateForwardsByTime(interval_); + m_MagnitudeOfLevelChangeModel.propagateForwardsByTime(interval_); } CTrendComponent::TDoubleDoublePr CTrendComponent::value(core_t::TTime time, double confidence) const @@ -249,8 +354,8 @@ CTrendComponent::TDoubleDoublePr CTrendComponent::value(core_t::TTime time, doub TDoubleVec factors(this->factors(std::abs(time - m_LastUpdate))); for (std::size_t i = 0u; i < NUMBER_MODELS; ++i) { - prediction_.add(m_Models[i].s_Regression.predict(scaledTime, MAX_CONDITION), - factors[i] * CBasicStatistics::mean(m_Models[i].s_Weight)); + prediction_.add(m_TrendModels[i].s_Regression.predict(scaledTime, MAX_CONDITION), + factors[i] * CBasicStatistics::mean(m_TrendModels[i].s_Weight)); } } @@ -263,19 +368,9 @@ CTrendComponent::TDoubleDoublePr CTrendComponent::value(core_t::TTime time, doub / std::max(this->count(), 1.0) + b * CBasicStatistics::variance(m_ValueMoments) / std::max(CBasicStatistics::count(m_ValueMoments), 1.0)}; - try + if (auto interval = confidenceInterval(prediction, variance, confidence)) { - boost::math::normal normal{prediction, std::sqrt(variance)}; - double ql{boost::math::quantile(normal, (100.0 - confidence) / 200.0)}; - double qu{boost::math::quantile(normal, (100.0 + confidence) / 200.0)}; - return {ql, qu}; - } - catch (const std::exception &e) - { - LOG_ERROR("Failed calculating confidence interval: " << e.what() - << ", prediction = " << prediction - << ", variance = " << variance - << ", confidence = " << confidence); + return *interval; } } @@ -316,10 +411,9 @@ void CTrendComponent::forecast(core_t::TTime startTime, core_t::TTime endTime, core_t::TTime step, double confidence, - TDouble3VecVec &result) const + const TSeasonalForecast &seasonal, + const TWriteForecastResult &writer) const { - result.clear(); - if (endTime < startTime) { LOG_ERROR("Bad forecast range: [" << startTime << "," << endTime << "]"); @@ -333,13 +427,9 @@ void CTrendComponent::forecast(core_t::TTime startTime, endTime = startTime + CIntegerTools::ceil(endTime - startTime, step); - core_t::TTime steps{(endTime - startTime) / step}; - result.resize(steps, TDouble3Vec(3)); - LOG_TRACE("forecasting = " << this->print()); TDoubleVec factors(this->factors(step)); - TDoubleVec modelWeights(this->initialForecastModelWeights()); TDoubleVec errorWeights(this->initialForecastErrorWeights()); TRegressionArrayVec models(NUMBER_MODELS); @@ -347,21 +437,26 @@ void CTrendComponent::forecast(core_t::TTime startTime, TDoubleVec residualVariances(NUMBER_MODELS); for (std::size_t i = 0u; i < NUMBER_MODELS; ++i) { - m_Models[i].s_Regression.parameters(models[i], MAX_CONDITION); - m_Models[i].s_Regression.covariances(m_PredictionErrorVariance, modelCovariances[i], MAX_CONDITION); - modelCovariances[i] /= std::max(m_Models[i].s_Regression.count(), 1.0); - residualVariances[i] = std::pow(CBasicStatistics::mean(m_Models[i].s_ResidualMoments), 2.0) - + CBasicStatistics::variance(m_Models[i].s_ResidualMoments); + const SModel &model{m_TrendModels[i]}; + model.s_Regression.parameters(models[i], MAX_CONDITION); + model.s_Regression.covariances(m_PredictionErrorVariance, + modelCovariances[i], MAX_CONDITION); + modelCovariances[i] /= std::max(model.s_Regression.count(), 1.0); + residualVariances[i] = CTools::pow2(CBasicStatistics::mean(model.s_ResidualMoments)) + + CBasicStatistics::variance(model.s_ResidualMoments); LOG_TRACE("params = " << core::CContainerPrinter::print(models[i])); LOG_TRACE("covariances = " << modelCovariances[i].toDelimited()) LOG_TRACE("variances = " << residualVariances[i]); } LOG_TRACE("long time variance = " << CBasicStatistics::variance(m_ValueMoments)); + CForecastLevel level{m_ProbabilityOfLevelChangeModel, + m_MagnitudeOfLevelChangeModel, + m_TimeOfLastLevelChange}; + TDoubleVec variances(NUMBER_MODELS + 1); for (core_t::TTime time = startTime; time < endTime; time += step) { - core_t::TTime pillar{(time - startTime) / step}; double scaledDt{scaleTime(time, startTime)}; TVector times({0.0, scaledDt, scaledDt * scaledDt}); @@ -371,7 +466,7 @@ void CTrendComponent::forecast(core_t::TTime startTime, for (std::size_t j = 0u; j < NUMBER_MODELS; ++j) { modelWeights[j] *= factors[j]; - errorWeights[j] *= std::pow(factors[j], 2.0); + errorWeights[j] *= CTools::pow2(factors[j]); } for (std::size_t j = 0u; j < NUMBER_MODELS; ++j) @@ -390,26 +485,21 @@ void CTrendComponent::forecast(core_t::TTime startTime, } double prediction{this->value(modelWeights, models, scaleTime(time, m_RegressionOrigin))}; + TDouble3Vec seasonal_(seasonal(time)); + TDouble3Vec level_(level.forecast(time, seasonal_[1] + prediction, confidence)); + double ql{0.0}; double qu{0.0}; double variance{ a * CBasicStatistics::mean(variance_) + b * CBasicStatistics::variance(m_ValueMoments)}; - try + if (auto interval = confidenceInterval(0.0, variance, confidence)) { - boost::math::normal normal{0.0, std::sqrt(variance)}; - ql = boost::math::quantile(normal, (100.0 - confidence) / 200.0); - qu = boost::math::quantile(normal, (100.0 + confidence) / 200.0); - } - catch (const std::exception &e) - { - LOG_ERROR("Failed calculating confidence interval: " << e.what() - << ", variance = " << variance - << ", confidence = " << confidence); + boost::tie(ql, qu) = *interval; } - result[pillar][0] = prediction + ql; - result[pillar][1] = prediction; - result[pillar][2] = prediction + qu; + writer(time, {level_[0] + seasonal_[0] + prediction + ql, + level_[1] + seasonal_[1] + prediction, + level_[2] + seasonal_[2] + prediction + qu}); } } @@ -428,18 +518,29 @@ uint64_t CTrendComponent::checksum(uint64_t seed) const seed = CChecksum::calculate(seed, m_TargetDecayRate); seed = CChecksum::calculate(seed, m_FirstUpdate); seed = CChecksum::calculate(seed, m_LastUpdate); - seed = CChecksum::calculate(seed, m_Models); + seed = CChecksum::calculate(seed, m_TrendModels); seed = CChecksum::calculate(seed, m_PredictionErrorVariance); - return CChecksum::calculate(seed, m_ValueMoments); + seed = CChecksum::calculate(seed, m_ValueMoments); + seed = CChecksum::calculate(seed, m_TimeOfLastLevelChange); + seed = CChecksum::calculate(seed, m_ProbabilityOfLevelChangeModel); + return CChecksum::calculate(seed, m_MagnitudeOfLevelChangeModel); } std::string CTrendComponent::print() const { std::ostringstream result; - for (const auto &model : m_Models) + result << "\n===\n"; + result << "Trend Models:"; + for (const auto &model : m_TrendModels) { - result << model.s_Regression.print() << "\n"; + result << "\n" << model.s_Regression.print(); } + result << "\n===\n"; + result << "Probability of Change Model:"; + result << m_ProbabilityOfLevelChangeModel.print(); + result << "===\n"; + result << "Magnitude of Change Model:"; + result << m_MagnitudeOfLevelChangeModel.print(); return result.str(); } @@ -482,7 +583,7 @@ CTrendComponent::TDoubleVec CTrendComponent::initialForecastErrorWeights() const double CTrendComponent::count() const { TMeanAccumulator result; - for (const auto &model : m_Models) + for (const auto &model : m_TrendModels) { result.add(CTools::fastLog(model.s_Regression.count()), CBasicStatistics::mean(model.s_Weight)); @@ -517,7 +618,7 @@ double CTrendComponent::weightOfPrediction(core_t::TTime time) const return 1.0; } - return CTools::smoothHeaviside(extrapolateInterval / interval, 1.0 / 12.0, -1.0); + return CTools::smoothHeaviside(extrapolateInterval / interval, 0.1, 1.0, -1.0); } CTrendComponent::SModel::SModel(double weight) @@ -554,5 +655,64 @@ uint64_t CTrendComponent::SModel::checksum(uint64_t seed) const return CChecksum::calculate(seed, s_ResidualMoments); } +CTrendComponent::CForecastLevel::CForecastLevel(const CNaiveBayes &probability, + const CNormalMeanPrecConjugate &magnitude, + core_t::TTime timeOfLastChange, + std::size_t numberPaths) : + m_Probability(probability), + m_Magnitude(magnitude), + m_Levels(numberPaths), + m_TimesOfLastChange(numberPaths, timeOfLastChange), + m_ProbabilitiesOfChange(numberPaths, 0.0) +{ + m_Uniform01.reserve(numberPaths); +} + +CTrendComponent::TDouble3Vec +CTrendComponent::CForecastLevel::forecast(core_t::TTime time, double prediction, double confidence) +{ + TDouble3Vec result{0.0, 0.0, 0.0}; + + if (m_Probability.initialized()) + { + CSampling::uniformSample(0.0, 1.0, m_Levels.size(), m_Uniform01); + bool reorder{false}; + for (std::size_t i = 0u; i < m_Levels.size(); ++i) + { + double dt{static_cast(time - m_TimesOfLastChange[i])}; + double x = m_Levels[i] + prediction; + double p{m_Probability.classProbability(LEVEL_CHANGE_LABEL, {{dt}, {x}})}; + m_ProbabilitiesOfChange[i] = std::max(m_ProbabilitiesOfChange[i], p); + if (m_Uniform01[i] < m_ProbabilitiesOfChange[i]) + { + double stepMean{m_Magnitude.marginalLikelihoodMean()}; + double stepVariance{m_Magnitude.marginalLikelihoodVariance()}; + m_Levels[i] += CSampling::normalSample(m_Rng, stepMean, stepVariance); + m_TimesOfLastChange[i] = time; + m_ProbabilitiesOfChange[i] = 0.0; + reorder = true; + } + } + if (reorder) + { + COrderings::simultaneousSort(m_Levels, m_TimesOfLastChange, m_ProbabilitiesOfChange); + } + + double rollouts{static_cast(m_Levels.size())}; + std::size_t lower{std::min(static_cast( + (100.0 - confidence) / 200.0 * rollouts + 0.5), + m_Levels.size())}; + std::size_t upper{std::min(static_cast( + (100.0 + confidence) / 200.0 * rollouts + 0.5), + m_Levels.size() - 1)}; + + result[0] = m_Levels[lower]; + result[1] = CBasicStatistics::median(m_Levels); + result[2] = m_Levels[upper]; + } + + return result; +} + } } diff --git a/lib/maths/MathsTypes.cc b/lib/maths/MathsTypes.cc index 4423feaff6..9c20be452b 100644 --- a/lib/maths/MathsTypes.cc +++ b/lib/maths/MathsTypes.cc @@ -56,6 +56,44 @@ inline void multiplyEquals(const TDouble10Vec &rhs, TDouble10Vec &lhs) } } +//! Check if less than zero. +inline bool isNegative(double value) +{ + return value < 0.0; +} + +//! Elementwise check if less than zero. +inline bool isNegative(const TDouble10Vec &values) +{ + for (auto value : values) + { + if (value < 0.0) + { + return true; + } + } + return false; +} + +//! Check if less than or equal to zero. +inline bool isNonPostive(double value) +{ + return value <= 0.0; +} + +//! Elementwise check if less than or equal to zero. +inline bool isNonPostive(const TDouble10Vec &values) +{ + for (auto value : values) + { + if (value < 0.0) + { + return true; + } + } + return false; +} + //! Extract the effective sample count from a collection of weights. template void count(const TWeightStyleVec &weightStyles, @@ -64,16 +102,25 @@ void count(const TWeightStyleVec &weightStyles, { if (check(weightStyles, weights)) { + T candidate(result); for (std::size_t i = 0u; i < weightStyles.size(); ++i) { switch (weightStyles[i]) { - case E_SampleCountWeight: multiplyEquals(weights[i], result); return; + case E_SampleCountWeight: multiplyEquals(weights[i], candidate); break; case E_SampleSeasonalVarianceScaleWeight: break; case E_SampleCountVarianceScaleWeight: break; case E_SampleWinsorisationWeight: break; } } + if (!maths::CMathsFuncs::isFinite(result) || isNegative(result)) + { + LOG_ERROR("Ignoring bad count weight: " << result); + } + else + { + result = std::move(candidate); + } } } @@ -86,20 +133,59 @@ void countForUpdate(const TWeightStyleVec &weightStyles, { if (check(weightStyles, weights)) { + T candidate(result); for (std::size_t i = 0u; i < weightStyles.size(); ++i) { switch (weightStyles[i]) { - case E_SampleCountWeight: multiplyEquals(weights[i], result); break; + case E_SampleCountWeight: multiplyEquals(weights[i], candidate); break; case E_SampleSeasonalVarianceScaleWeight: break; case E_SampleCountVarianceScaleWeight: break; - case E_SampleWinsorisationWeight: multiplyEquals(weights[i], result); break; + case E_SampleWinsorisationWeight: multiplyEquals(weights[i], candidate); break; } } + if (!maths::CMathsFuncs::isFinite(result) || isNegative(result)) + { + LOG_ERROR("Ignoring bad count weight: " << result); + } + else + { + result = std::move(candidate); + } + } +} + +//! Extract the Winsorisation weight from a collection of weights. +template +void winsorisationWeight(const TWeightStyleVec &weightStyles, + const core::CSmallVector &weights, + T &result) +{ + if (check(weightStyles, weights)) + { + T candidate(result); + for (std::size_t i = 0u; i < weightStyles.size(); ++i) + { + switch (weightStyles[i]) + { + case E_SampleCountWeight: break; + case E_SampleSeasonalVarianceScaleWeight: break; + case E_SampleCountVarianceScaleWeight: break; + case E_SampleWinsorisationWeight: multiplyEquals(weights[i], candidate); break; + } + } + if (!maths::CMathsFuncs::isFinite(result) || isNegative(result)) + { + LOG_ERROR("Ignoring bad Winsorisation weight: " << result); + } + else + { + result = std::move(candidate); + } } } -//! Extract the variance scale from a collection of weights. +//! Extract the seasonal variance scale from a collection of weights. template void seasonalVarianceScale(const TWeightStyleVec &weightStyles, const core::CSmallVector &weights, @@ -107,20 +193,29 @@ void seasonalVarianceScale(const TWeightStyleVec &weightStyles, { if (check(weightStyles, weights)) { + T candidate(result); for (std::size_t i = 0u; i < weightStyles.size(); ++i) { switch (weightStyles[i]) { case E_SampleCountWeight: break; - case E_SampleSeasonalVarianceScaleWeight: multiplyEquals(weights[i], result); return; + case E_SampleSeasonalVarianceScaleWeight: multiplyEquals(weights[i], candidate); break; case E_SampleCountVarianceScaleWeight: break; case E_SampleWinsorisationWeight: break; } } + if (!maths::CMathsFuncs::isFinite(result) || isNonPostive(result)) + { + LOG_ERROR("Ignoring bad variance scale: " << result); + } + else + { + result = std::move(candidate); + } } } -//! Extract the variance scale from a collection of weights. +//! Extract the count variance scale from a collection of weights. template void countVarianceScale(const TWeightStyleVec &weightStyles, const core::CSmallVector &weights, @@ -128,16 +223,25 @@ void countVarianceScale(const TWeightStyleVec &weightStyles, { if (check(weightStyles, weights)) { + T candidate(result); for (std::size_t i = 0u; i < weightStyles.size(); ++i) { switch (weightStyles[i]) { case E_SampleCountWeight: break; case E_SampleSeasonalVarianceScaleWeight: break; - case E_SampleCountVarianceScaleWeight: multiplyEquals(weights[i], result); return; + case E_SampleCountVarianceScaleWeight: multiplyEquals(weights[i], candidate); break; case E_SampleWinsorisationWeight: break; } } + if (!maths::CMathsFuncs::isFinite(result) || isNonPostive(result)) + { + LOG_ERROR("Ignoring bad variance scale: " << result); + } + else + { + result = std::move(candidate); + } } } @@ -147,12 +251,8 @@ void countVarianceScale(const TWeightStyleVec &weightStyles, double count(const TWeightStyleVec &weightStyles, const TDouble4Vec &weights) { - double result = 1.0; + double result{1.0}; detail::count(weightStyles, weights, result); - if (!maths::CMathsFuncs::isFinite(result) || result < 0.0) - { - throw std::runtime_error("Bad count weight " + core::CStringUtils::typeToString(result)); - } return result; } @@ -162,13 +262,6 @@ TDouble10Vec count(std::size_t dimension, { TDouble10Vec result(dimension, 1.0); detail::count(weightStyles, weights, result); - for (std::size_t i = 0u; i < dimension; ++i) - { - if (!maths::CMathsFuncs::isFinite(result[i]) || result[i] < 0.0) - { - throw std::runtime_error("Bad count weight: [" + core::CContainerPrinter::print(result) + "]"); - } - } return result; } @@ -176,13 +269,8 @@ TDouble10Vec count(std::size_t dimension, double countForUpdate(const TWeightStyleVec &weightStyles, const TDouble4Vec &weights) { - double result = 1.0; + double result{1.0}; detail::countForUpdate(weightStyles, weights, result); - if (!maths::CMathsFuncs::isFinite(result) || result < 0.0) - { - throw std::runtime_error("Bad count weight " - + core::CStringUtils::typeToString(result)); - } return result; } @@ -192,26 +280,31 @@ TDouble10Vec countForUpdate(std::size_t dimension, { TDouble10Vec result(dimension, 1.0); detail::countForUpdate(weightStyles, weights, result); - for (std::size_t i = 0u; i < dimension; ++i) - { - if (!maths::CMathsFuncs::isFinite(result[i]) || result[i] < 0.0) - { - throw std::runtime_error("Bad count weight: [" + core::CContainerPrinter::print(result) + "]"); - } - } + return result; +} + +double winsorisationWeight(const TWeightStyleVec &weightStyles, + const TDouble4Vec &weights) +{ + double result{1.0}; + detail::winsorisationWeight(weightStyles, weights, result); + return result; +} + +TDouble10Vec winsorisationWeight(std::size_t dimension, + const TWeightStyleVec &weightStyles, + const TDouble10Vec4Vec &weights) +{ + TDouble10Vec result(dimension, 1.0); + detail::winsorisationWeight(weightStyles, weights, result); return result; } double seasonalVarianceScale(const TWeightStyleVec &weightStyles, const TDouble4Vec &weights) { - double result = 1.0; + double result{1.0}; detail::seasonalVarianceScale(weightStyles, weights, result); - if (!maths::CMathsFuncs::isFinite(result) || result <= 0.0) - { - throw std::runtime_error("Bad variance scale " - + core::CStringUtils::typeToString(result)); - } return result; } @@ -221,26 +314,14 @@ TDouble10Vec seasonalVarianceScale(std::size_t dimension, { TDouble10Vec result(dimension, 1.0); detail::seasonalVarianceScale(weightStyles, weights, result); - for (std::size_t i = 0u; i < dimension; ++i) - { - if (!maths::CMathsFuncs::isFinite(result[i]) || result[i] <= 0.0) - { - throw std::runtime_error("Bad count weight: [" + core::CContainerPrinter::print(result) + "]"); - } - } return result; } double countVarianceScale(const TWeightStyleVec &weightStyles, const TDouble4Vec &weights) { - double result = 1.0; + double result{1.0}; detail::countVarianceScale(weightStyles, weights, result); - if (!maths::CMathsFuncs::isFinite(result) || result <= 0.0) - { - throw std::runtime_error("Bad variance scale " - + core::CStringUtils::typeToString(result)); - } return result; } @@ -250,27 +331,13 @@ TDouble10Vec countVarianceScale(std::size_t dimension, { TDouble10Vec result(dimension, 1.0); detail::countVarianceScale(weightStyles, weights, result); - for (std::size_t i = 0u; i < dimension; ++i) - { - if (!maths::CMathsFuncs::isFinite(result[i]) || result[i] <= 0.0) - { - throw std::runtime_error("Bad count weight: [" + core::CContainerPrinter::print(result) + "]"); - } - } return result; } bool hasSeasonalVarianceScale(const TWeightStyleVec &weightStyles, const TDouble4Vec &weights) { - try - { - return seasonalVarianceScale(weightStyles, weights) != 1.0; - } - catch (const std::exception &) - { - } - return true; + return seasonalVarianceScale(weightStyles, weights) != 1.0; } bool hasSeasonalVarianceScale(const TWeightStyleVec &weightStyles, @@ -333,14 +400,7 @@ bool hasSeasonalVarianceScale(const TWeightStyleVec &weightStyles, bool hasCountVarianceScale(const TWeightStyleVec &weightStyles, const TDouble4Vec &weights) { - try - { - return countVarianceScale(weightStyles, weights) != 1.0; - } - catch (const std::exception &) - { - } - return true; + return countVarianceScale(weightStyles, weights) != 1.0; } bool hasCountVarianceScale(const TWeightStyleVec &weightStyles, diff --git a/lib/maths/ProbabilityAggregators.cc b/lib/maths/ProbabilityAggregators.cc index c5644af76f..1101b5f031 100644 --- a/lib/maths/ProbabilityAggregators.cc +++ b/lib/maths/ProbabilityAggregators.cc @@ -25,11 +25,11 @@ namespace ml { namespace maths { - namespace { -using TDoubleVec = std::vector; + using TDoubleDoublePr = std::pair; +using TDoubleVec = std::vector; //! Compute the deviation corresponding to a probability of less likely //! samples \p p. diff --git a/lib/maths/unittest/CForecastTest.cc b/lib/maths/unittest/CForecastTest.cc index 2f8ad11b1a..cb2264d50b 100644 --- a/lib/maths/unittest/CForecastTest.cc +++ b/lib/maths/unittest/CForecastTest.cc @@ -58,7 +58,9 @@ maths::CModelParams params(core_t::TTime bucketLength) static TTimeDoubleMap learnRates; learnRates[bucketLength] = static_cast(bucketLength) / 1800.0; double minimumSeasonalVarianceScale{0.25}; - return maths::CModelParams{bucketLength, learnRates[bucketLength], DECAY_RATE, minimumSeasonalVarianceScale}; + return maths::CModelParams{bucketLength, learnRates[bucketLength], + DECAY_RATE, minimumSeasonalVarianceScale, + 6 * core::constants::HOUR, core::constants::DAY}; } maths::CUnivariateTimeSeriesModel::TDecayRateController2Ary decayRateControllers(void) diff --git a/lib/maths/unittest/CModelTest.cc b/lib/maths/unittest/CModelTest.cc index 9bddc9e2a9..3e4ba44b7c 100644 --- a/lib/maths/unittest/CModelTest.cc +++ b/lib/maths/unittest/CModelTest.cc @@ -7,6 +7,7 @@ #include "CModelTest.h" #include +#include #include #include @@ -26,7 +27,9 @@ void CModelTest::testAll(void) double learnRate{0.5}; double decayRate{0.001}; double minimumSeasonalVarianceScale{0.3}; - maths::CModelParams params(bucketLength, learnRate, decayRate, minimumSeasonalVarianceScale); + maths::CModelParams params(bucketLength, learnRate, decayRate, + minimumSeasonalVarianceScale, + 6 * core::constants::HOUR, core::constants::DAY); CPPUNIT_ASSERT_EQUAL(bucketLength, params.bucketLength()); CPPUNIT_ASSERT_EQUAL(learnRate, params.learnRate()); CPPUNIT_ASSERT_EQUAL(decayRate, params.decayRate()); @@ -34,6 +37,9 @@ void CModelTest::testAll(void) CPPUNIT_ASSERT_EQUAL(0.0, params.probabilityBucketEmpty()); params.probabilityBucketEmpty(0.2); CPPUNIT_ASSERT_EQUAL(0.2, params.probabilityBucketEmpty()); + CPPUNIT_ASSERT_EQUAL(6 * core::constants::HOUR, + params.minimumTimeToDetectChange(2 * core::constants::DAY)); + CPPUNIT_ASSERT_EQUAL(core::constants::DAY, params.maximumTimeToTestForChange()); } { maths::CModelAddSamplesParams::TDouble2Vec weight1(2, 0.4); diff --git a/lib/maths/unittest/CMultivariateMultimodalPriorTest.cc b/lib/maths/unittest/CMultivariateMultimodalPriorTest.cc index df34dc0ab0..a788a2d2a3 100644 --- a/lib/maths/unittest/CMultivariateMultimodalPriorTest.cc +++ b/lib/maths/unittest/CMultivariateMultimodalPriorTest.cc @@ -790,8 +790,8 @@ void CMultivariateMultimodalPriorTest::testMarginalLikelihoodMean(void) for (std::size_t i = 0u; i < samples.size(); ++i) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[i]), SINGLE_UNIT_WEIGHT_2); - expectedMean.add(samples[i]); + filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec{samples[i]}, SINGLE_UNIT_WEIGHT_2); + expectedMean.add(TVector2(samples[i])); if (i % 10 == 0) { diff --git a/lib/maths/unittest/CTimeSeriesChangeDetectorTest.cc b/lib/maths/unittest/CTimeSeriesChangeDetectorTest.cc index 2111b3d18c..83d116aed0 100644 --- a/lib/maths/unittest/CTimeSeriesChangeDetectorTest.cc +++ b/lib/maths/unittest/CTimeSeriesChangeDetectorTest.cc @@ -14,6 +14,7 @@ #include #include +#include #include #include #include @@ -38,6 +39,9 @@ namespace using TDoubleVec = std::vector; using TDouble2Vec = core::CSmallVector; +using TTimeDoublePr = std::pair; +using TTimeDoublePrCBuf = boost::circular_buffer; +using TDecompositionPtr = boost::shared_ptr; using TPriorPtr = boost::shared_ptr; using TPriorPtrVec = std::vector; @@ -102,13 +106,13 @@ void CTimeSeriesChangeDetectorTest::testNoChange() case 2: rng.generateGammaSamples(10.0, 10.0 * scales[(t/3) % scales.size()], 1000, samples); break; } - maths::CTimeSeriesDecomposition trendModel(DECAY_RATE, BUCKET_LENGTH); + TDecompositionPtr trendModel(new maths::CTimeSeriesDecomposition{DECAY_RATE, BUCKET_LENGTH}); TPriorPtr residualModel(makeResidualModel()); auto addSampleToModel = [&trendModel, &residualModel](core_t::TTime time, double x) { - trendModel.addPoint(time, x); - double detrended{trendModel.detrend(time, x, 0.0)}; + trendModel->addPoint(time, x); + double detrended{trendModel->detrend(time, x, 0.0)}; residualModel->addSamples(maths::CConstantWeights::COUNT, {detrended}, {{1.0}}); residualModel->propagateForwardsByTime(1.0); }; @@ -122,14 +126,11 @@ void CTimeSeriesChangeDetectorTest::testNoChange() maths::CUnivariateTimeSeriesChangeDetector detector{trendModel, residualModel, 6 * core::constants::HOUR, - 24 * core::constants::HOUR, - 12.0}; + 24 * core::constants::HOUR, 12.0}; for (std::size_t i = 950u; i < samples.size(); ++i) { addSampleToModel(time, samples[i]); - detector.addSamples(maths_t::E_ContinuousData, - maths::CConstantWeights::COUNT, - {{time, samples[i]}}, {{1.0}}); + detector.addSamples(maths::CConstantWeights::COUNT, {{time, samples[i]}}, {{1.0}}); if (detector.stopTesting()) { break; @@ -192,13 +193,13 @@ void CTimeSeriesChangeDetectorTest::testPersist() TDoubleVec samples; rng.generateNormalSamples(10.0, 10.0, 1000, samples); - maths::CTimeSeriesDecomposition trendModel(DECAY_RATE, BUCKET_LENGTH); + TDecompositionPtr trendModel(new maths::CTimeSeriesDecomposition{DECAY_RATE, BUCKET_LENGTH}); TPriorPtr residualModel(makeResidualModel()); auto addSampleToModel = [&trendModel, &residualModel](core_t::TTime time, double x) { - trendModel.addPoint(time, x); - double detrended{trendModel.detrend(time, x, 0.0)}; + trendModel->addPoint(time, x); + double detrended{trendModel->detrend(time, x, 0.0)}; residualModel->addSamples(maths::CConstantWeights::COUNT, {detrended}, maths::CConstantWeights::SINGLE_UNIT); @@ -214,11 +215,15 @@ void CTimeSeriesChangeDetectorTest::testPersist() maths::CUnivariateTimeSeriesChangeDetector origDetector{trendModel, residualModel, 6 * core::constants::HOUR, - 24 * core::constants::HOUR, - 12.0}; + 24 * core::constants::HOUR, 12.0}; + + maths::CModelParams modelParams{BUCKET_LENGTH, 1.0, 0.0, 1.0, + 6 * core::constants::HOUR, + 24 * core::constants::HOUR}; + maths::SDistributionRestoreParams distributionParams{maths_t::E_ContinuousData, DECAY_RATE}; + maths::STimeSeriesDecompositionRestoreParams decompositionParams{DECAY_RATE, BUCKET_LENGTH, distributionParams}; + maths::SModelRestoreParams params{modelParams, decompositionParams, distributionParams}; - maths::SDistributionRestoreParams params{maths_t::E_ContinuousData, - DECAY_RATE, 0.05, 12.0, 1.0}; for (std::size_t i = 990u; i < samples.size(); ++i) { addSampleToModel(time, samples[i]); @@ -231,8 +236,7 @@ void CTimeSeriesChangeDetectorTest::testPersist() maths::CUnivariateTimeSeriesChangeDetector restoredDetector{trendModel, residualModel, 6 * core::constants::HOUR, - 24 * core::constants::HOUR, - 12.0}; + 24 * core::constants::HOUR, 12.0}; core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); @@ -289,13 +293,13 @@ void CTimeSeriesChangeDetectorTest::testChange(const TGeneratorVec &trends, rng.generateNormalSamples(0.0, 1.0, 1000, samples); - maths::CTimeSeriesDecomposition trendModel(DECAY_RATE, BUCKET_LENGTH); + TDecompositionPtr trendModel(new maths::CTimeSeriesDecomposition{DECAY_RATE, BUCKET_LENGTH}); TPriorPtr residualModel(makeResidualModel()); auto addSampleToModel = [&trendModel, &residualModel](core_t::TTime time, double x, double weight) { - trendModel.addPoint(time, x, maths::CConstantWeights::COUNT, {weight}); - double detrended{trendModel.detrend(time, x, 0.0)}; + trendModel->addPoint(time, x, maths::CConstantWeights::COUNT, {weight}); + double detrended{trendModel->detrend(time, x, 0.0)}; residualModel->addSamples(maths::CConstantWeights::COUNT, {detrended}, {{weight}}); residualModel->propagateForwardsByTime(1.0); }; @@ -310,8 +314,7 @@ void CTimeSeriesChangeDetectorTest::testChange(const TGeneratorVec &trends, maths::CUnivariateTimeSeriesChangeDetector detector{trendModel, residualModel, 6 * core::constants::HOUR, - 24 * core::constants::HOUR, - 12.0}; + 24 * core::constants::HOUR, 12.0}; TOptionalSize bucketsToDetect; for (std::size_t i = 950u; i < samples.size(); ++i) @@ -319,9 +322,7 @@ void CTimeSeriesChangeDetectorTest::testChange(const TGeneratorVec &trends, double x{10.0 * applyChange(trends[t % trends.size()], time) + samples[i]}; addSampleToModel(time, x, 0.5); - detector.addSamples(maths_t::E_ContinuousData, - maths::CConstantWeights::COUNT, - {{time, x}}, {{1.0}}); + detector.addSamples(maths::CConstantWeights::COUNT, {{time, x}}, {{1.0}}); auto change = detector.change(); if (change) diff --git a/lib/maths/unittest/CTimeSeriesDecompositionTest.cc b/lib/maths/unittest/CTimeSeriesDecompositionTest.cc index 3e56cc1609..63b5636b54 100644 --- a/lib/maths/unittest/CTimeSeriesDecompositionTest.cc +++ b/lib/maths/unittest/CTimeSeriesDecompositionTest.cc @@ -16,6 +16,8 @@ #include #include #include +#include +#include #include #include @@ -286,7 +288,6 @@ void CTimeSeriesDecompositionTest::testDistortedPeriodic(void) tt += HOUR) { TDoubleDoublePr prediction = decomposition.value(tt, 70.0); - double residual = ::fabs(timeseries[tt / HOUR] - mean(prediction)); sumResidual += residual; maxResidual = std::max(maxResidual, residual); @@ -294,7 +295,6 @@ void CTimeSeriesDecompositionTest::testDistortedPeriodic(void) maxValue = std::max(maxValue, ::fabs(timeseries[tt / HOUR])); percentileError += std::max(std::max(prediction.first - timeseries[tt / HOUR], timeseries[tt / HOUR] - prediction.second), 0.0); - //t.push_back(tt); //f.push_back(timeseries[tt / HOUR]); //fe.push_back(mean(value)); @@ -400,7 +400,6 @@ void CTimeSeriesDecompositionTest::testMinimizeLongComponents(void) for (core_t::TTime t = lastWeek; t < lastWeek + WEEK; t += HALF_HOUR) { TDoubleDoublePr prediction = decomposition.value(t, 70.0); - double residual = ::fabs(trend[t / HALF_HOUR] - mean(prediction)); sumResidual += residual; maxResidual = std::max(maxResidual, residual); @@ -408,7 +407,6 @@ void CTimeSeriesDecompositionTest::testMinimizeLongComponents(void) maxValue = std::max(maxValue, ::fabs(trend[t / HALF_HOUR])); percentileError += std::max(std::max(prediction.first - trend[t / HALF_HOUR], trend[t / HALF_HOUR] - prediction.second), 0.0); - //f.push_back(mean(value)); //r.push_back(residual); } @@ -457,7 +455,7 @@ void CTimeSeriesDecompositionTest::testMinimizeLongComponents(void) //file << "plot(t(1:length(r)), r, 'k');\n"; CPPUNIT_ASSERT(totalSumResidual < 0.06 * totalSumValue); - CPPUNIT_ASSERT(totalMaxResidual < 0.27 * totalMaxValue); + CPPUNIT_ASSERT(totalMaxResidual < 0.28 * totalMaxValue); CPPUNIT_ASSERT(totalPercentileError < 0.03 * totalSumValue); meanSlope /= refinements; @@ -527,7 +525,6 @@ void CTimeSeriesDecompositionTest::testWeekend(void) for (core_t::TTime t = lastWeek; t < lastWeek + WEEK; t += HALF_HOUR) { TDoubleDoublePr prediction = decomposition.value(t, 70.0); - double residual = ::fabs(trend[t / HALF_HOUR] - mean(prediction)); sumResidual += residual; maxResidual = std::max(maxResidual, residual); @@ -535,7 +532,6 @@ void CTimeSeriesDecompositionTest::testWeekend(void) maxValue = std::max(maxValue, ::fabs(trend[t / HALF_HOUR])); percentileError += std::max(std::max(prediction.first - trend[t / HALF_HOUR], trend[t / HALF_HOUR] - prediction.second), 0.0); - //f.push_back(mean(value)); //r.push_back(residual); } @@ -638,7 +634,6 @@ void CTimeSeriesDecompositionTest::testSinglePeriodicity(void) t += HALF_HOUR) { TDoubleDoublePr prediction = decomposition.value(t, 70.0); - double residual = ::fabs(trend[t / HALF_HOUR] + noiseMean - mean(prediction)); sumResidual += residual; maxResidual = std::max(maxResidual, residual); @@ -646,7 +641,6 @@ void CTimeSeriesDecompositionTest::testSinglePeriodicity(void) maxValue = std::max(maxValue, ::fabs(trend[t / HALF_HOUR])); percentileError += std::max(std::max(prediction.first - (trend[t / HALF_HOUR] + noiseMean), (trend[t / HALF_HOUR] + noiseMean) - prediction.second), 0.0); - //f.push_back(mean(value)); //r.push_back(residual); } @@ -770,7 +764,6 @@ void CTimeSeriesDecompositionTest::testSeasonalOnset(void) for (core_t::TTime t = lastWeek; t < lastWeek + WEEK; t += HOUR) { TDoubleDoublePr prediction = decomposition.value(t, 70.0); - double residual = ::fabs(trend[t / HOUR] - mean(prediction)); sumResidual += residual; maxResidual = std::max(maxResidual, residual); @@ -1042,7 +1035,6 @@ void CTimeSeriesDecompositionTest::testSpikeyDataProblemCase(void) for (std::size_t j = 0u; j < lastWeekTimeseries.size(); ++j) { TDoubleDoublePr prediction = decomposition.value(lastWeekTimeseries[j].first, 70.0); - double residual = ::fabs(lastWeekTimeseries[j].second - mean(prediction)); sumResidual += residual; maxResidual = std::max(maxResidual, residual); @@ -1205,7 +1197,6 @@ void CTimeSeriesDecompositionTest::testDiurnalProblemCase(void) for (std::size_t j = 0u; j < lastWeekTimeseries.size(); ++j) { TDoubleDoublePr prediction = decomposition.value(lastWeekTimeseries[j].first, 70.0); - double residual = ::fabs(lastWeekTimeseries[j].second - mean(prediction)); sumResidual += residual; maxResidual = std::max(maxResidual, residual); @@ -1213,7 +1204,6 @@ void CTimeSeriesDecompositionTest::testDiurnalProblemCase(void) maxValue = std::max(maxValue, ::fabs(lastWeekTimeseries[j].second)); percentileError += std::max(std::max(prediction.first - lastWeekTimeseries[j].second, lastWeekTimeseries[j].second - prediction.second), 0.0); - //times.push_back(lastWeekTimeseries[j].first); //values.push_back(lastWeekTimeseries[j].second); //f.push_back(mean(value)); @@ -1331,7 +1321,6 @@ void CTimeSeriesDecompositionTest::testComplexDiurnalProblemCase(void) for (std::size_t j = 0u; j < lastWeekTimeseries.size(); ++j) { TDoubleDoublePr prediction = decomposition.value(lastWeekTimeseries[j].first, 70.0); - double residual = ::fabs(lastWeekTimeseries[j].second - mean(prediction)); sumResidual += residual; maxResidual = std::max(maxResidual, residual); @@ -1339,7 +1328,6 @@ void CTimeSeriesDecompositionTest::testComplexDiurnalProblemCase(void) maxValue = std::max(maxValue, ::fabs(lastWeekTimeseries[j].second)); percentileError += std::max(std::max(prediction.first - lastWeekTimeseries[j].second, lastWeekTimeseries[j].second - prediction.second), 0.0); - //times.push_back(lastWeekTimeseries[j].first); //values.push_back(lastWeekTimeseries[j].second); //f.push_back(mean(value)); @@ -2257,11 +2245,11 @@ void CTimeSeriesDecompositionTest::testPersist(void) core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); + maths::STimeSeriesDecompositionRestoreParams params{ + decayRate + 0.1, bucketLength, + maths::SDistributionRestoreParams{maths_t::E_ContinuousData, decayRate + 0.1}}; - maths::CTimeSeriesDecomposition restoredDecomposition(decayRate + 0.1, - bucketLength, - maths::CTimeSeriesDecomposition::DEFAULT_COMPONENT_SIZE, - traverser); + maths::CTimeSeriesDecomposition restoredDecomposition(params, traverser); std::string newXml; { @@ -2300,6 +2288,8 @@ void CTimeSeriesDecompositionTest::testUpgrade(void) return TDoubleDoublePr{first, second}; }; + maths::STimeSeriesDecompositionRestoreParams params{ + 0.1, HALF_HOUR, maths::SDistributionRestoreParams{maths_t::E_ContinuousData, 0.1}}; std::string empty; LOG_DEBUG("*** Seasonal and Calendar Components ***"); @@ -2326,9 +2316,7 @@ void CTimeSeriesDecompositionTest::testUpgrade(void) CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(xml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - maths::CTimeSeriesDecomposition decomposition(0.1, HALF_HOUR, - maths::CTimeSeriesDecomposition::DEFAULT_COMPONENT_SIZE, - traverser); + maths::CTimeSeriesDecomposition decomposition(params, traverser); // Check that the decay rates match and the values and variances // predictions match the values obtained from 6.2. @@ -2389,9 +2377,7 @@ void CTimeSeriesDecompositionTest::testUpgrade(void) CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(xml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - maths::CTimeSeriesDecomposition decomposition(0.1, HALF_HOUR, - maths::CTimeSeriesDecomposition::DEFAULT_COMPONENT_SIZE, - traverser); + maths::CTimeSeriesDecomposition decomposition(params, traverser); // Check that the decay rates match and the values and variances // predictions are close to the values obtained from 6.2. We can't diff --git a/lib/maths/unittest/CTimeSeriesModelTest.cc b/lib/maths/unittest/CTimeSeriesModelTest.cc index 1de281ec2f..8c7f7f088c 100644 --- a/lib/maths/unittest/CTimeSeriesModelTest.cc +++ b/lib/maths/unittest/CTimeSeriesModelTest.cc @@ -102,13 +102,45 @@ class CTimeSeriesCorrelateModelAllocator : public maths::CTimeSeriesCorrelateMod } }; -maths::CModelParams params(core_t::TTime bucketLength) +maths::CModelParams modelParams(core_t::TTime bucketLength) { using TTimeDoubleMap = std::map; static TTimeDoubleMap learnRates; learnRates[bucketLength] = static_cast(bucketLength) / 1800.0; double minimumSeasonalVarianceScale{MINIMUM_SEASONAL_SCALE}; - return maths::CModelParams{bucketLength, learnRates[bucketLength], DECAY_RATE, minimumSeasonalVarianceScale}; + return maths::CModelParams{bucketLength, + learnRates[bucketLength], DECAY_RATE, + minimumSeasonalVarianceScale, + 6 * core::constants::HOUR, core::constants::DAY}; +} + +maths::CModelAddSamplesParams addSampleParams(double interval, + maths_t::TWeightStyleVec weightStyles, + const TDouble2Vec4VecVec &weights) +{ + maths::CModelAddSamplesParams params; + params.integer(false) + .propagationInterval(interval) + .weightStyles(weightStyles) + .trendWeights(weights) + .priorWeights(weights); + return params; +} + +maths::CModelAddSamplesParams addSampleParams(const TDouble2Vec4VecVec &weights) +{ + return addSampleParams(1.0, maths::CConstantWeights::COUNT, weights); +} + +maths::CModelProbabilityParams computeProbabilityParams(const TDouble2Vec4Vec &weight) +{ + maths::CModelProbabilityParams params; + params.addCalculation(maths_t::E_TwoSided) + .seasonalConfidenceInterval(50.0) + .addBucketEmpty({false}) + .weightStyles(maths::CConstantWeights::COUNT) + .addWeights(weight); + return params; } maths::CNormalMeanPrecConjugate univariateNormal(void) @@ -178,7 +210,7 @@ void reinitializePrior(double learnRate, } if (controllers) { - for (auto &&trend : trends) + for (auto &trend : trends) { trend->decayRate(trend->decayRate() / (*controllers)[0].multiplier()); } @@ -205,25 +237,19 @@ void CTimeSeriesModelTest::testClone(void) maths::CTimeSeriesDecomposition trend{DECAY_RATE, bucketLength}; auto controllers = decayRateControllers(1); maths::CTimeSeriesCorrelations correlations{MINIMUM_SIGNIFICANT_CORRELATION, DECAY_RATE}; - maths::CUnivariateTimeSeriesModel model(params(bucketLength), 1, + maths::CUnivariateTimeSeriesModel model(modelParams(bucketLength), 1, trend, univariateNormal(), &controllers); model.modelCorrelations(correlations); TDoubleVec samples; rng.generateNormalSamples(1.0, 4.0, 1000, samples); - TDouble2Vec4Vec weight{{1.0}}; - TDouble2Vec4VecVec weights{weight}; + TDouble2Vec4VecVec weights{{{1.0}}}; core_t::TTime time{0}; for (auto sample : samples) { - maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); - model.addSamples(params, {core::make_triple(time, TDouble2Vec{sample}, TAG)}); + model.addSamples(addSampleParams(weights), + {core::make_triple(time, TDouble2Vec{sample}, TAG)}); time += bucketLength; } @@ -237,7 +263,7 @@ void CTimeSeriesModelTest::testClone(void) { maths::CTimeSeriesDecomposition trend{DECAY_RATE, bucketLength}; auto controllers = decayRateControllers(3); - maths::CMultivariateTimeSeriesModel model(params(bucketLength), + maths::CMultivariateTimeSeriesModel model(modelParams(bucketLength), trend, multivariateNormal(), &controllers); TDoubleVec mean{13.0, 9.0, 10.0}; @@ -249,13 +275,8 @@ void CTimeSeriesModelTest::testClone(void) core_t::TTime time{0}; for (const auto &sample : samples) { - maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); - model.addSamples(params, {core::make_triple(time, TDouble2Vec(sample), TAG)}); + model.addSamples(addSampleParams(weights), + {core::make_triple(time, TDouble2Vec(sample), TAG)}); time += bucketLength; } @@ -290,7 +311,7 @@ void CTimeSeriesModelTest::testMode(void) maths::CTimeSeriesDecomposition trend{DECAY_RATE, bucketLength}; maths::CNormalMeanPrecConjugate prior{univariateNormal()}; - maths::CUnivariateTimeSeriesModel model{params(bucketLength), 0, trend, prior}; + maths::CUnivariateTimeSeriesModel model{modelParams(bucketLength), 0, trend, prior}; core_t::TTime time{0}; for (auto sample : samples) @@ -307,13 +328,8 @@ void CTimeSeriesModelTest::testMode(void) time = 0; for (auto sample : samples) { - maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); - model.addSamples(params, {core::make_triple(time, TDouble2Vec{sample}, TAG)}); + model.addSamples(addSampleParams(weights), + {core::make_triple(time, TDouble2Vec{sample}, TAG)}); time += bucketLength; } double expectedMode{ maths::CBasicStatistics::mean(trend.value(time)) @@ -331,17 +347,16 @@ void CTimeSeriesModelTest::testMode(void) TDoubleVec samples; rng.generateNormalSamples(1.0, 4.0, 1000, samples); - double learnRate{params(bucketLength).learnRate()}; + double learnRate{modelParams(bucketLength).learnRate()}; maths::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; maths::CNormalMeanPrecConjugate prior{univariateNormal()}; - maths::CUnivariateTimeSeriesModel model{params(bucketLength), 0, trend, prior}; + maths::CUnivariateTimeSeriesModel model{modelParams(bucketLength), 0, trend, prior}; core_t::TTime time{0}; - for (auto &&sample : samples) + for (auto &sample : samples) { - sample += 20.0 + 10.0 * ::sin( boost::math::double_constants::two_pi - * static_cast(time) - / static_cast(core::constants::DAY)); + sample += 20.0 + 10.0 * std::sin( boost::math::double_constants::two_pi + * static_cast(time) / 86400.0); time += bucketLength; } @@ -350,13 +365,8 @@ void CTimeSeriesModelTest::testMode(void) time = 0; for (auto sample : samples) { - maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); - model.addSamples(params, {core::make_triple(time, TDouble2Vec{sample}, TAG)}); + model.addSamples(addSampleParams(weights), + {core::make_triple(time, TDouble2Vec{sample}, TAG)}); if (trend.addPoint(time, sample)) { prior.setToNonInformative(0.0, DECAY_RATE); @@ -390,11 +400,11 @@ void CTimeSeriesModelTest::testMode(void) TDoubleVecVec samples; rng.generateMultivariateNormalSamples(mean, covariance, 1000, samples); - TDecompositionPtr10Vec trends{TDecompositionPtr{new maths::CTimeSeriesDecomposition{DECAY_RATE, bucketLength}}, - TDecompositionPtr{new maths::CTimeSeriesDecomposition{DECAY_RATE, bucketLength}}, - TDecompositionPtr{new maths::CTimeSeriesDecomposition{DECAY_RATE, bucketLength}}}; + TDecompositionPtr10Vec trends{TDecompositionPtr{new maths::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}}, + TDecompositionPtr{new maths::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}}, + TDecompositionPtr{new maths::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}}}; maths::CMultivariateNormalConjugate<3> prior{multivariateNormal()}; - maths::CMultivariateTimeSeriesModel model{params(bucketLength), *trends[0], prior}; + maths::CMultivariateTimeSeriesModel model{modelParams(bucketLength), *trends[0], prior}; core_t::TTime time{0}; for (const auto &sample : samples) @@ -415,13 +425,8 @@ void CTimeSeriesModelTest::testMode(void) time = 0; for (const auto &sample : samples) { - maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); - model.addSamples(params, {core::make_triple(time, TDouble2Vec(sample), TAG)}); + model.addSamples(addSampleParams(weights), + {core::make_triple(time, TDouble2Vec(sample), TAG)}); time += bucketLength; } TDouble2Vec expectedMode(prior.marginalLikelihoodMode(maths::CConstantWeights::COUNT, @@ -449,22 +454,21 @@ void CTimeSeriesModelTest::testMode(void) TDoubleVecVec samples; rng.generateMultivariateNormalSamples(mean, covariance, 1000, samples); - double learnRate{params(bucketLength).learnRate()}; - TDecompositionPtr10Vec trends{TDecompositionPtr{new maths::CTimeSeriesDecomposition{DECAY_RATE, bucketLength}}, - TDecompositionPtr{new maths::CTimeSeriesDecomposition{DECAY_RATE, bucketLength}}, - TDecompositionPtr{new maths::CTimeSeriesDecomposition{DECAY_RATE, bucketLength}}}; + double learnRate{modelParams(bucketLength).learnRate()}; + TDecompositionPtr10Vec trends{TDecompositionPtr{new maths::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}}, + TDecompositionPtr{new maths::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}}, + TDecompositionPtr{new maths::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}}}; maths::CMultivariateNormalConjugate<3> prior{multivariateNormal()}; - maths::CMultivariateTimeSeriesModel model{params(bucketLength), *trends[0], prior}; + maths::CMultivariateTimeSeriesModel model{modelParams(bucketLength), *trends[0], prior}; core_t::TTime time{0}; - for (auto &&sample : samples) + for (auto &sample : samples) { double amplitude{10.0}; for (std::size_t i = 0u; i < sample.size(); ++i) { - sample[i] += 30.0 + amplitude * ::sin( boost::math::double_constants::two_pi - * static_cast(time) - / static_cast(core::constants::DAY)); + sample[i] += 30.0 + amplitude * std::sin( boost::math::double_constants::two_pi + * static_cast(time) / 86400.0); amplitude += 4.0; } time += bucketLength; @@ -474,13 +478,8 @@ void CTimeSeriesModelTest::testMode(void) time = 0; for (const auto &sample : samples) { - maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); - model.addSamples(params, {core::make_triple(time, TDouble2Vec(sample), TAG)}); + model.addSamples(addSampleParams(weights), + {core::make_triple(time, TDouble2Vec(sample), TAG)}); bool reinitialize{false}; TDouble10Vec1Vec detrended{TDouble10Vec(3)}; @@ -531,7 +530,7 @@ void CTimeSeriesModelTest::testAddBucketValue(void) core_t::TTime bucketLength{600}; maths::CTimeSeriesDecompositionStub trend; maths::CLogNormalMeanPrecConjugate prior{univariateLogNormal()}; - maths::CUnivariateTimeSeriesModel model{params(bucketLength), 0, trend, prior}; + maths::CUnivariateTimeSeriesModel model{modelParams(bucketLength), 0, trend, prior}; TTimeDouble2VecSizeTrVec samples{core::make_triple(core_t::TTime{20}, TDouble2Vec{3.5}, TAG), core::make_triple(core_t::TTime{12}, TDouble2Vec{3.9}, TAG), @@ -549,13 +548,7 @@ void CTimeSeriesModelTest::testAddBucketValue(void) prior.adjustOffset(maths::CConstantWeights::COUNT, {-1.0}, maths::CConstantWeights::SINGLE_UNIT); - maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); - model.addSamples(params, samples); + model.addSamples(addSampleParams(weights), samples); model.addBucketValue({core::make_triple(core_t::TTime{20}, TDouble2Vec{-1.0}, TAG)}); CPPUNIT_ASSERT_EQUAL(prior.checksum(), model.residualModel().checksum()); @@ -579,21 +572,14 @@ void CTimeSeriesModelTest::testAddSamples(void) { maths::CTimeSeriesDecompositionStub trend; maths::CNormalMeanPrecConjugate prior{univariateNormal()}; - maths::CUnivariateTimeSeriesModel model{params(bucketLength), 0, trend, prior}; + maths::CUnivariateTimeSeriesModel model{modelParams(bucketLength), 0, trend, prior}; TTimeDouble2VecSizeTrVec samples{core::make_triple(core_t::TTime{20}, TDouble2Vec{3.5}, TAG), core::make_triple(core_t::TTime{12}, TDouble2Vec{3.9}, TAG), core::make_triple(core_t::TTime{18}, TDouble2Vec{2.1}, TAG)}; TDouble2Vec4VecVec weights{{{1.0}}, {{1.5}}, {{0.9}}}; - maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); - - model.addSamples(params, samples); + model.addSamples(addSampleParams(weights), samples); trend.addPoint(samples[1].first, samples[1].second[0], maths::CConstantWeights::COUNT, weights[1][0]); trend.addPoint(samples[2].first, samples[2].second[0], maths::CConstantWeights::COUNT, weights[2][0]); @@ -619,21 +605,14 @@ void CTimeSeriesModelTest::testAddSamples(void) TDecompositionPtr{new maths::CTimeSeriesDecompositionStub{}}, TDecompositionPtr{new maths::CTimeSeriesDecompositionStub{}}}; maths::CMultivariateNormalConjugate<3> prior{multivariateNormal()}; - maths::CMultivariateTimeSeriesModel model{params(bucketLength), *trends[0], prior}; + maths::CMultivariateTimeSeriesModel model{modelParams(bucketLength), *trends[0], prior}; TTimeDouble2VecSizeTrVec samples{core::make_triple(core_t::TTime{20}, TDouble2Vec{3.5, 3.4, 3.3}, TAG), core::make_triple(core_t::TTime{12}, TDouble2Vec{3.9, 3.8, 3.7}, TAG), core::make_triple(core_t::TTime{18}, TDouble2Vec{2.1, 2.0, 1.9}, TAG)}; TDouble2Vec4VecVec weights{{{1.0, 1.1, 1.2}}, {{1.5, 1.6, 1.7}}, {{0.9, 1.0, 1.1}}}; - maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); - - model.addSamples(params, samples); + model.addSamples(addSampleParams(weights), samples); for (std::size_t i = 0u; i < trends.size(); ++i) { @@ -662,14 +641,15 @@ void CTimeSeriesModelTest::testAddSamples(void) CPPUNIT_ASSERT_EQUAL(checksum1, checksum2); } + maths_t::TWeightStyleVec weightStyles{maths_t::E_SampleWinsorisationWeight, + maths_t::E_SampleCountWeight, + maths_t::E_SampleCountVarianceScaleWeight}; + LOG_DEBUG("Propagation interval univariate"); { - maths_t::TWeightStyleVec weightStyles{maths_t::E_SampleWinsorisationWeight, - maths_t::E_SampleCountWeight, - maths_t::E_SampleCountVarianceScaleWeight}; maths::CTimeSeriesDecompositionStub trend; maths::CNormalMeanPrecConjugate prior{univariateNormal()}; - maths::CUnivariateTimeSeriesModel model{params(bucketLength), 0, trend, prior}; + maths::CUnivariateTimeSeriesModel model{modelParams(bucketLength), 0, trend, prior}; double interval[]{1.0, 1.1, 0.4}; TDouble2Vec samples[]{{10.0}, {13.9}, {27.1}}; @@ -679,13 +659,7 @@ void CTimeSeriesModelTest::testAddSamples(void) for (std::size_t i = 0u; i < 3; ++i) { TTimeDouble2VecSizeTrVec sample{core::make_triple(time, samples[i], TAG)}; - maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(interval[i]) - .weightStyles(weightStyles) - .trendWeights(weights) - .priorWeights(weights); - model.addSamples(params, sample); + model.addSamples(addSampleParams(interval[i], weightStyles, weights), sample); TDouble4Vec weight{weights[0][0][0], weights[0][1][0], weights[0][2][0]}; prior.addSamples(weightStyles, samples[i], {weight}); @@ -706,11 +680,8 @@ void CTimeSeriesModelTest::testAddSamples(void) TDecompositionPtr{new maths::CTimeSeriesDecompositionStub{}}, TDecompositionPtr{new maths::CTimeSeriesDecompositionStub{}}}; maths::CMultivariateNormalConjugate<3> prior{multivariateNormal()}; - maths::CMultivariateTimeSeriesModel model{params(bucketLength), *trends[0], prior}; + maths::CMultivariateTimeSeriesModel model{modelParams(bucketLength), *trends[0], prior}; - maths_t::TWeightStyleVec weightStyles{maths_t::E_SampleWinsorisationWeight, - maths_t::E_SampleCountWeight, - maths_t::E_SampleCountVarianceScaleWeight}; double interval[]{1.0, 1.1, 0.4}; TDouble2Vec samples[]{{13.5, 13.4, 13.3}, {13.9, 13.8, 13.7}, @@ -723,13 +694,7 @@ void CTimeSeriesModelTest::testAddSamples(void) for (std::size_t i = 0u; i < 3; ++i) { TTimeDouble2VecSizeTrVec sample{core::make_triple(time, samples[i], TAG)}; - maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(interval[i]) - .weightStyles(weightStyles) - .trendWeights(weights) - .priorWeights(weights); - model.addSamples(params, sample); + model.addSamples(addSampleParams(interval[i], weightStyles, weights), sample); TDouble10Vec4Vec weight{TDouble10Vec(weights[0][0]), TDouble10Vec(weights[0][1]), @@ -750,11 +715,11 @@ void CTimeSeriesModelTest::testAddSamples(void) LOG_DEBUG("Decay rate control univariate"); { - double learnRate{params(bucketLength).learnRate()}; + double learnRate{modelParams(bucketLength).learnRate()}; maths::CTimeSeriesDecomposition trend{DECAY_RATE, bucketLength}; maths::CNormalMeanPrecConjugate prior{univariateNormal()}; auto controllers = decayRateControllers(1); - maths::CUnivariateTimeSeriesModel model(params(bucketLength), 1, trend, prior, &controllers); + maths::CUnivariateTimeSeriesModel model(modelParams(bucketLength), 1, trend, prior, &controllers); TDoubleVec samples; rng.generateNormalSamples(1.0, 4.0, 2000, samples); @@ -765,18 +730,12 @@ void CTimeSeriesModelTest::testAddSamples(void) core_t::TTime time{0}; for (auto noise : samples) { - double sample{20.0 + 4.0 * ::sin( boost::math::double_constants::two_pi - * static_cast(time) / 86400.0) + double sample{20.0 + 4.0 * std::sin( boost::math::double_constants::two_pi + * static_cast(time) / 86400.0) + (time / bucketLength > 1800 ? 10.0 : 0.0) + noise}; - TTimeDouble2VecSizeTrVec sample_{core::make_triple(time, TDouble2Vec{sample}, TAG)}; - maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); - model.addSamples(params, sample_); + + model.addSamples(addSampleParams(weights), sample_); if (trend.addPoint(time, sample)) { @@ -828,13 +787,13 @@ void CTimeSeriesModelTest::testAddSamples(void) LOG_DEBUG("Decay rate control multivariate"); { - double learnRate{params(bucketLength).learnRate()}; - TDecompositionPtr10Vec trends{TDecompositionPtr{new maths::CTimeSeriesDecomposition{DECAY_RATE, bucketLength}}, - TDecompositionPtr{new maths::CTimeSeriesDecomposition{DECAY_RATE, bucketLength}}, - TDecompositionPtr{new maths::CTimeSeriesDecomposition{DECAY_RATE, bucketLength}}}; + double learnRate{modelParams(bucketLength).learnRate()}; + TDecompositionPtr10Vec trends{TDecompositionPtr{new maths::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}}, + TDecompositionPtr{new maths::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}}, + TDecompositionPtr{new maths::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}}}; maths::CMultivariateNormalConjugate<3> prior{multivariateNormal()}; auto controllers = decayRateControllers(3); - maths::CMultivariateTimeSeriesModel model{params(bucketLength), *trends[0], prior, &controllers}; + maths::CMultivariateTimeSeriesModel model{modelParams(bucketLength), *trends[0], prior, &controllers}; TDoubleVecVec samples; { @@ -847,7 +806,7 @@ void CTimeSeriesModelTest::testAddSamples(void) TDouble2Vec4VecVec weights{{{1.0, 1.0, 1.0}}}; core_t::TTime time{0}; - for (auto &&sample : samples) + for (auto &sample : samples) { bool reinitialize{false}; bool hasTrend{false}; @@ -857,8 +816,8 @@ void CTimeSeriesModelTest::testAddSamples(void) double amplitude{10.0}; for (std::size_t i = 0u; i < sample.size(); ++i) { - sample[i] = 30.0 + amplitude * ::sin( boost::math::double_constants::two_pi - * static_cast(time) / 86400.0) + sample[i] = 30.0 + amplitude * std::sin( boost::math::double_constants::two_pi + * static_cast(time) / 86400.0) + (time / bucketLength > 1800 ? 10.0 : 0.0) + sample[i]; reinitialize |= trends[i]->addPoint(time, sample[i]); detrended[0][i] = trends[i]->detrend(time, sample[i], 0.0); @@ -868,13 +827,8 @@ void CTimeSeriesModelTest::testAddSamples(void) } TTimeDouble2VecSizeTrVec sample_{core::make_triple(time, TDouble2Vec(sample), TAG)}; - maths::CModelAddSamplesParams params_; - params_.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); - model.addSamples(params_, sample_); + + model.addSamples(addSampleParams(weights), sample_); if (reinitialize) { @@ -939,11 +893,11 @@ void CTimeSeriesModelTest::testPredict(void) LOG_DEBUG("Univariate seasonal"); { - double learnRate{params(bucketLength).learnRate()}; + double learnRate{modelParams(bucketLength).learnRate()}; maths::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; maths::CNormalMeanPrecConjugate prior{univariateNormal()}; auto controllers = decayRateControllers(1); - maths::CUnivariateTimeSeriesModel model{params(bucketLength), 0, trend, prior, &controllers}; + maths::CUnivariateTimeSeriesModel model{modelParams(bucketLength), 0, trend, prior, &controllers}; TDoubleVec samples; rng.generateNormalSamples(0.0, 4.0, 1008, samples); @@ -951,16 +905,11 @@ void CTimeSeriesModelTest::testPredict(void) core_t::TTime time{0}; for (auto sample : samples) { - sample += 10.0 + 5.0 * ::sin( boost::math::double_constants::two_pi - * static_cast(time) / 86400.0); + sample += 10.0 + 5.0 * std::sin( boost::math::double_constants::two_pi + * static_cast(time) / 86400.0); - maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); - model.addSamples(params, {core::make_triple(time, TDouble2Vec{sample}, TAG)}); + model.addSamples(addSampleParams(weights), + {core::make_triple(time, TDouble2Vec{sample}, TAG)}); if (trend.addPoint(time, sample)) { @@ -983,8 +932,8 @@ void CTimeSeriesModelTest::testPredict(void) TMeanAccumulator meanError; for (core_t::TTime time_ = time; time_ < time + 86400; time_ += 3600) { - double trend_{10.0 + 5.0 * ::sin( boost::math::double_constants::two_pi - * static_cast(time_) / 86400.0)}; + double trend_{10.0 + 5.0 * std::sin( boost::math::double_constants::two_pi + * static_cast(time_) / 86400.0)}; double expected{ maths::CBasicStatistics::mean(trend.value(time_)) + maths::CBasicStatistics::mean(prior.marginalLikelihoodConfidenceInterval(0.0))}; double predicted{model.predict(time_)[0]}; @@ -1004,7 +953,7 @@ void CTimeSeriesModelTest::testPredict(void) { maths::CTimeSeriesDecompositionStub trend; maths::CMultimodalPrior prior{univariateMultimodal()}; - maths::CUnivariateTimeSeriesModel model{params(bucketLength), 0, trend, prior}; + maths::CUnivariateTimeSeriesModel model{modelParams(bucketLength), 0, trend, prior}; TMeanAccumulator modes[2]; TDoubleVec samples, samples_; @@ -1019,13 +968,8 @@ void CTimeSeriesModelTest::testPredict(void) core_t::TTime time{0}; for (auto sample : samples) { - maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); - model.addSamples(params, {core::make_triple(time, TDouble2Vec{sample}, TAG)}); + model.addSamples(addSampleParams(weights), + {core::make_triple(time, TDouble2Vec{sample}, TAG)}); time += bucketLength; } @@ -1047,28 +991,29 @@ void CTimeSeriesModelTest::testPredict(void) LOG_DEBUG("Multivariate Seasonal"); { - double learnRate{params(bucketLength).learnRate()}; + double learnRate{modelParams(bucketLength).learnRate()}; TDecompositionPtr10Vec trends{TDecompositionPtr{new maths::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}}, TDecompositionPtr{new maths::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}}, TDecompositionPtr{new maths::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}}}; maths::CMultivariateNormalConjugate<3> prior{multivariateNormal()}; - maths::CMultivariateTimeSeriesModel model{maths::CMultivariateTimeSeriesModel{params(bucketLength), *trends[0], prior}}; + maths::CMultivariateTimeSeriesModel model{maths::CMultivariateTimeSeriesModel{modelParams(bucketLength), *trends[0], prior}}; TDoubleVecVec samples; TDoubleVec mean{0.0, 2.0, 1.0}; - { - TDoubleVecVec covariance{{3.0, 2.9, 0.5}, {2.9, 2.6, 0.1}, {0.5, 0.1, 2.0}}; - rng.generateMultivariateNormalSamples(mean, covariance, 1000, samples); - } + rng.generateMultivariateNormalSamples(mean, + {{3.0, 2.9, 0.5}, + {2.9, 2.6, 0.1}, + {0.5, 0.1, 2.0}}, + 1000, samples); TDouble2Vec4VecVec weights{maths::CConstantWeights::unit(3)}; core_t::TTime time{0}; - for (auto &&sample : samples) + for (auto &sample : samples) { - for (auto &&coordinate : sample) + for (auto &coordinate : sample) { - coordinate += 10.0 + 5.0 * ::sin( boost::math::double_constants::two_pi - * static_cast(time) / 86400.0); + coordinate += 10.0 + 5.0 * std::sin( boost::math::double_constants::two_pi + * static_cast(time) / 86400.0); } bool reinitialize{false}; TDouble10Vec detrended; @@ -1086,13 +1031,8 @@ void CTimeSeriesModelTest::testPredict(void) maths::CConstantWeights::singleUnit(3)); prior.propagateForwardsByTime(1.0); - maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); - model.addSamples(params, {core::make_triple(time, TDouble2Vec(sample), TAG)}); + model.addSamples(addSampleParams(weights), + {core::make_triple(time, TDouble2Vec(sample), TAG)}); time += bucketLength; } @@ -1103,8 +1043,8 @@ void CTimeSeriesModelTest::testPredict(void) maths::CMultivariatePrior::TSizeDoublePr10Vec condition; for (std::size_t i = 0u; i < mean.size(); ++i) { - double trend_{mean[i] + 10.0 + 5.0 * ::sin( boost::math::double_constants::two_pi - * static_cast(time_) / 86400.0)}; + double trend_{mean[i] + 10.0 + 5.0 * std::sin( boost::math::double_constants::two_pi + * static_cast(time_) / 86400.0)}; maths::CMultivariatePrior::TUnivariatePriorPtr margin{prior.univariate(marginalize, condition).first}; double expected{ maths::CBasicStatistics::mean(trends[i]->value(time_)) + maths::CBasicStatistics::mean(margin->marginalLikelihoodConfidenceInterval(0.0))}; @@ -1125,7 +1065,7 @@ void CTimeSeriesModelTest::testPredict(void) TDecompositionPtr{new maths::CTimeSeriesDecompositionStub{}}, TDecompositionPtr{new maths::CTimeSeriesDecompositionStub{}}}; maths::CMultivariateMultimodalPrior<3> prior{multivariateMultimodal()}; - maths::CMultivariateTimeSeriesModel model{maths::CMultivariateTimeSeriesModel{params(bucketLength), *trends[0], prior}}; + maths::CMultivariateTimeSeriesModel model{modelParams(bucketLength), *trends[0], prior}; TMeanAccumulator2Vec modes[2]{TMeanAccumulator2Vec(3), TMeanAccumulator2Vec(3)}; TDoubleVecVec samples; @@ -1157,13 +1097,8 @@ void CTimeSeriesModelTest::testPredict(void) core_t::TTime time{0}; for (const auto &sample : samples) { - maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); - model.addSamples(params, {core::make_triple(time, TDouble2Vec(sample), TAG)}); + model.addSamples(addSampleParams(weights), + {core::make_triple(time, TDouble2Vec(sample), TAG)}); time += bucketLength; } @@ -1188,9 +1123,11 @@ void CTimeSeriesModelTest::testProbability(void) LOG_DEBUG("| CTimeSeriesModelTest::testProbability |"); LOG_DEBUG("+-----------------------------------------+"); - // Test: 1) Calculation, seasonal confidence interval, weights, etc. - // 2) Test with and without trend. - // 3) Test with anomalies. + // Test: 1) The different the calculation matches the expected values + // given the trend and decomposition for different calculations, + // seasonal confidence intervals, weights and so on. + // 2) Test the calculation with and without trend. + // 3) Test manually injected anomalies have low probabilities. using TDoubleSizePr = std::pair; using TSizeVec = std::vector; @@ -1202,10 +1139,10 @@ void CTimeSeriesModelTest::testProbability(void) LOG_DEBUG("Univariate"); { maths::CUnivariateTimeSeriesModel models[]{ - maths::CUnivariateTimeSeriesModel{params(bucketLength), 1, + maths::CUnivariateTimeSeriesModel{modelParams(bucketLength), 1, maths::CTimeSeriesDecompositionStub{}, univariateNormal(), 0, false}, - maths::CUnivariateTimeSeriesModel{params(bucketLength), 1, + maths::CUnivariateTimeSeriesModel{modelParams(bucketLength), 1, maths::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}, univariateNormal(), 0, false}}; @@ -1216,19 +1153,12 @@ void CTimeSeriesModelTest::testProbability(void) const TDouble2Vec4VecVec weight{maths::CConstantWeights::unit(1)}; for (auto sample : samples) { - maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weight) - .priorWeights(weight); - - double trend{5.0 + 5.0 * ::sin( boost::math::double_constants::two_pi - * static_cast(time) / 86400.0)}; - - models[0].addSamples(params, {core::make_triple(time, TDouble2Vec{sample}, TAG)}); - models[1].addSamples(params, {core::make_triple(time, TDouble2Vec{trend + sample}, TAG)}); - + double trend{5.0 + 5.0 * std::sin( boost::math::double_constants::two_pi + * static_cast(time) / 86400.0)}; + models[0].addSamples(addSampleParams(weight), + {core::make_triple(time, TDouble2Vec{sample}, TAG)}); + models[1].addSamples(addSampleParams(weight), + {core::make_triple(time, TDouble2Vec{trend + sample}, TAG)}); time += bucketLength; } @@ -1266,16 +1196,14 @@ void CTimeSeriesModelTest::testProbability(void) } double lb[2], ub[2]; models[0].residualModel().probabilityOfLessLikelySamples( - calculation, - weightStyles[i], - sample, {weights_}, - lb[0], ub[0], expectedTail[0]); + calculation, weightStyles[i], + sample, {weights_}, + lb[0], ub[0], expectedTail[0]); models[1].residualModel().probabilityOfLessLikelySamples( - calculation, - weightStyles[i], - {models[1].trendModel().detrend(time, sample[0], confidence)}, - {weights_}, - lb[1], ub[1], expectedTail[1]); + calculation, weightStyles[i], + {models[1].trendModel().detrend(time, sample[0], confidence)}, + {weights_}, + lb[1], ub[1], expectedTail[1]); expectedProbability[0] = (lb[0] + ub[0]) / 2.0; expectedProbability[1] = (lb[1] + ub[1]) / 2.0; } @@ -1312,42 +1240,37 @@ void CTimeSeriesModelTest::testProbability(void) LOG_DEBUG("Multivariate"); { maths::CMultivariateTimeSeriesModel models[]{ - maths::CMultivariateTimeSeriesModel{params(bucketLength), + maths::CMultivariateTimeSeriesModel{modelParams(bucketLength), maths::CTimeSeriesDecompositionStub{}, multivariateNormal(), 0, false}, - maths::CMultivariateTimeSeriesModel{params(bucketLength), + maths::CMultivariateTimeSeriesModel{modelParams(bucketLength), maths::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}, multivariateNormal(), 0, false}}; TDoubleVecVec samples; - { - TDoubleVec mean{10.0, 15.0, 11.0}; - TDoubleVecVec covariance{{3.0, 2.9, 0.5}, {2.9, 2.6, 0.1}, {0.5, 0.1, 2.0}}; - rng.generateMultivariateNormalSamples(mean, covariance, 1000, samples); - } + rng.generateMultivariateNormalSamples({10.0, 15.0, 11.0}, + {{3.0, 2.9, 0.5}, + {2.9, 2.6, 0.1}, + {0.5, 0.1, 2.0}}, + 1000, samples); core_t::TTime time{0}; const TDouble2Vec4VecVec weight{maths::CConstantWeights::unit(3)}; - for (auto &&sample : samples) + for (const auto &sample : samples) { - maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weight) - .priorWeights(weight); - TDouble2Vec sample_(sample); - models[0].addSamples(params, {core::make_triple(time, sample_, TAG)}); + models[0].addSamples(addSampleParams(weight), + {core::make_triple(time, sample_, TAG)}); - double trend{5.0 + 5.0 * ::sin( boost::math::double_constants::two_pi - * static_cast(time) / 86400.0)}; - for (auto &&component : sample_) + double trend{5.0 + 5.0 * std::sin( boost::math::double_constants::two_pi + * static_cast(time) / 86400.0)}; + for (auto &component : sample_) { component += trend; } - models[1].addSamples(params, {core::make_triple(time, sample_, TAG)}); + models[1].addSamples(addSampleParams(weight), + {core::make_triple(time, sample_, TAG)}); time += bucketLength; } @@ -1386,22 +1309,18 @@ void CTimeSeriesModelTest::testProbability(void) } double lb[2], ub[2]; models[0].residualModel().probabilityOfLessLikelySamples( - calculation, - weightStyles[i], - {TDouble10Vec(sample)}, - {weights_}, - lb[0], ub[0], expectedTail[0]); + calculation, weightStyles[i], + {TDouble10Vec(sample)}, {weights_}, + lb[0], ub[0], expectedTail[0]); TDouble10Vec detrended; for (std::size_t j = 0u; j < sample.size(); ++j) { detrended.push_back(models[1].trendModel()[j]->detrend(time, sample[j], confidence)); } models[1].residualModel().probabilityOfLessLikelySamples( - calculation, - weightStyles[i], - {detrended}, - {weights_}, - lb[1], ub[1], expectedTail[1]); + calculation, weightStyles[i], + {detrended}, {weights_}, + lb[1], ub[1], expectedTail[1]); expectedProbability[0] = (lb[0] + ub[0]) / 2.0; expectedProbability[1] = (lb[1] + ub[1]) / 2.0; } @@ -1441,7 +1360,7 @@ void CTimeSeriesModelTest::testProbability(void) LOG_DEBUG("Anomalies"); { maths::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; - maths::CUnivariateTimeSeriesModel model{params(bucketLength), 1, trend, univariateNormal()}; + maths::CUnivariateTimeSeriesModel model{modelParams(bucketLength), 1, trend, univariateNormal()}; TSizeVec anomalies; rng.generateUniformSamples(100, 1000, 10, anomalies); @@ -1455,37 +1374,23 @@ void CTimeSeriesModelTest::testProbability(void) TDouble2Vec4VecVec weights{weight}; std::size_t bucket{0}; core_t::TTime time{0}; - for (auto &&sample : samples) + for (auto sample : samples) { if (std::binary_search(anomalies.begin(), anomalies.end(), bucket++)) { sample += 10.0; } - { - maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); - model.addSamples(params, {core::make_triple(time, TDouble2Vec{sample}, TAG)}); - } - { - maths::CModelProbabilityParams params; - params.addCalculation(maths_t::E_TwoSided) - .seasonalConfidenceInterval(50.0) - .addBucketEmpty({false}) - .weightStyles(maths::CConstantWeights::COUNT) - .addWeights(weight); - TTail2Vec tail; - double probability; - bool conditional; - TSize1Vec mostAnomalousCorrelate; - model.probability(params, {{time}}, {{sample}}, - probability, tail, - conditional, mostAnomalousCorrelate); - smallest.add({probability, bucket - 1}); - } + model.addSamples(addSampleParams(weights), + {core::make_triple(time, TDouble2Vec{sample}, TAG)}); + + TTail2Vec tail; + double probability; + bool conditional; + TSize1Vec mostAnomalousCorrelate; + model.probability(computeProbabilityParams(weight), {{time}}, {{sample}}, + probability, tail, conditional, mostAnomalousCorrelate); + smallest.add({probability, bucket - 1}); + time += bucketLength; } @@ -1508,6 +1413,13 @@ void CTimeSeriesModelTest::testWeights(void) LOG_DEBUG("| CTimeSeriesModelTest::testWeights |"); LOG_DEBUG("+-------------------------------------+"); + // Check that the seasonal weight matches the value we expect given + // 1) the trend and residual model + // 2) the variation in the input data + // + // And that the Winsorisation weight is monotonic decreasing with + // increasing distance from the expected value. + core_t::TTime bucketLength{1800}; test::CRandomNumbers rng; @@ -1516,7 +1428,7 @@ void CTimeSeriesModelTest::testWeights(void) { maths::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; maths::CNormalMeanPrecConjugate prior{univariateNormal()}; - maths::CUnivariateTimeSeriesModel model{params(bucketLength), 0, trend, prior}; + maths::CUnivariateTimeSeriesModel model{modelParams(bucketLength), 0, trend, prior}; TDoubleVec samples; rng.generateNormalSamples(0.0, 4.0, 1008, samples); @@ -1524,32 +1436,11 @@ void CTimeSeriesModelTest::testWeights(void) core_t::TTime time{0}; for (auto sample : samples) { - double scale{10.0 + 5.0 * ::sin( boost::math::double_constants::two_pi - * static_cast(time) / 86400.0)}; + double scale{10.0 + 5.0 * std::sin( boost::math::double_constants::two_pi + * static_cast(time) / 86400.0)}; sample = scale * (1.0 + 0.1 * sample); - - maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); - model.addSamples(params, {core::make_triple(time, TDouble2Vec{sample}, TAG)}); - - if (trend.addPoint(time, sample)) - { - prior.setToNonInformative(0.0, DECAY_RATE); - for (const auto &value : model.slidingWindow()) - { - prior.addSamples(maths::CConstantWeights::COUNT, - {trend.detrend(value.first, value.second, 0.0)}, - maths::CConstantWeights::SINGLE_UNIT); - } - } - prior.addSamples(maths::CConstantWeights::COUNT, - {trend.detrend(time, sample, 0.0)}, - maths::CConstantWeights::SINGLE_UNIT); - + model.addSamples(addSampleParams(weights), + {core::make_triple(time, TDouble2Vec{sample}, TAG)}); time += bucketLength; } @@ -1557,10 +1448,11 @@ void CTimeSeriesModelTest::testWeights(void) TMeanAccumulator error; for (core_t::TTime time_ = time; time_ < time + 86400; time_ += 3600) { - double dataScale{::pow(1.0 + 0.5 * ::sin( boost::math::double_constants::two_pi - * static_cast(time_) / 86400.0), 2.0)}; + double dataScale{std::pow(1.0 + 0.5 * std::sin( boost::math::double_constants::two_pi + * static_cast(time_) / 86400.0), 2.0)}; - double expectedScale{trend.scale(time_, prior.marginalLikelihoodVariance(), 0.0).second}; + double expectedScale{model.trendModel().scale( + time_, model.residualModel().marginalLikelihoodVariance(), 0.0).second}; double scale{model.seasonalWeight(0.0, time_)[0]}; LOG_DEBUG("expected weight = " << expectedScale @@ -1588,50 +1480,29 @@ void CTimeSeriesModelTest::testWeights(void) LOG_DEBUG("Multivariate"); { - double learnRate{params(bucketLength).learnRate()}; - TDecompositionPtr10Vec trends{TDecompositionPtr{new maths::CTimeSeriesDecomposition{DECAY_RATE, bucketLength}}, - TDecompositionPtr{new maths::CTimeSeriesDecomposition{DECAY_RATE, bucketLength}}, - TDecompositionPtr{new maths::CTimeSeriesDecomposition{DECAY_RATE, bucketLength}}}; + maths::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; maths::CMultivariateNormalConjugate<3> prior{multivariateNormal()}; - maths::CMultivariateTimeSeriesModel model{params(bucketLength), *trends[0], prior}; + maths::CMultivariateTimeSeriesModel model{modelParams(bucketLength), trend, prior}; TDoubleVecVec samples; - { - TDoubleVec mean{10.0, 15.0, 11.0}; - TDoubleVecVec covariance{{3.0, 2.9, 0.5}, {2.9, 2.6, 0.1}, {0.5, 0.1, 2.0}}; - rng.generateMultivariateNormalSamples(mean, covariance, 1008, samples); - } + rng.generateMultivariateNormalSamples({10.0, 15.0, 11.0}, + {{3.0, 2.9, 0.5}, + {2.9, 2.6, 0.1}, + {0.5, 0.1, 2.0}}, + 1008, samples); - TDouble10Vec4Vec1Vec weight{{{1.0, 1.0, 1.0}}}; TDouble2Vec4VecVec weights{{{1.0, 1.0, 1.0}}}; core_t::TTime time{0}; - for (auto &&sample : samples) + for (auto &sample : samples) { - double scale{10.0 + 5.0 * ::sin( boost::math::double_constants::two_pi - * static_cast(time) / 86400.0)}; - - bool reinitialize{false}; - TDouble10Vec1Vec detrended{TDouble10Vec(3)}; - for (std::size_t i = 0u; i < sample.size(); ++i) + double scale{10.0 + 5.0 * std::sin( boost::math::double_constants::two_pi + * static_cast(time) / 86400.0)}; + for (auto &component : sample) { - sample[i] = scale * (1.0 + 0.1 * sample[i]); - reinitialize |= trends[i]->addPoint(time, sample[i]); - detrended[0][i] = trends[i]->detrend(time, sample[i], 0.0); - } - if (reinitialize) - { - reinitializePrior(learnRate, model, trends, prior); + component = scale * (1.0 + 0.1 * component); } - prior.addSamples(maths::CConstantWeights::COUNT, detrended, weight); - - maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); - model.addSamples(params, {core::make_triple(time, TDouble2Vec(sample), TAG)}); - + model.addSamples(addSampleParams(weights), + {core::make_triple(time, TDouble2Vec(sample), TAG)}); time += bucketLength; } @@ -1639,12 +1510,13 @@ void CTimeSeriesModelTest::testWeights(void) TMeanAccumulator error; for (core_t::TTime time_ = time; time_ < time + 86400; time_ += 3600) { - double dataScale{::pow(1.0 + 0.5 * ::sin( boost::math::double_constants::two_pi - * static_cast(time_) / 86400.0), 2.0)}; + double dataScale{std::pow(1.0 + 0.5 * std::sin( boost::math::double_constants::two_pi + * static_cast(time_) / 86400.0), 2.0)}; for (std::size_t i = 0u; i < 3; ++i) { - double expectedScale{trends[i]->scale(time_, prior.marginalLikelihoodVariances()[i], 0.0).second}; + double expectedScale{model.trendModel()[i]->scale( + time_, model.residualModel().marginalLikelihoodVariances()[i], 0.0).second}; double scale{model.seasonalWeight(0.0, time_)[i]}; LOG_DEBUG("expected weight = " << expectedScale << ", weight = " << scale @@ -1687,27 +1559,21 @@ void CTimeSeriesModelTest::testMemoryUsage(void) maths::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; auto controllers = decayRateControllers(1); boost::scoped_ptr model{ - new maths::CUnivariateTimeSeriesModel{params(bucketLength), 0, + new maths::CUnivariateTimeSeriesModel{modelParams(bucketLength), 0, trend, univariateNormal(), &controllers}}; TDoubleVec samples; rng.generateNormalSamples(1.0, 4.0, 1000, samples); - TDouble2Vec4Vec weight{{1.0}}; - TDouble2Vec4VecVec weights{weight}; + TDouble2Vec4VecVec weights{{{1.0}}}; core_t::TTime time{0}; for (auto sample : samples) { - maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); - sample += 10.0 + 5.0 * ::sin( boost::math::double_constants::two_pi - * static_cast(time) / 86400.0); + sample += 10.0 + 5.0 * std::sin( boost::math::double_constants::two_pi + * static_cast(time) / 86400.0); trend.addPoint(time, sample); - model->addSamples(params, {core::make_triple(time, TDouble2Vec{sample}, TAG)}); + model->addSamples(addSampleParams(weights), + {core::make_triple(time, TDouble2Vec{sample}, TAG)}); time += bucketLength; } @@ -1732,25 +1598,20 @@ void CTimeSeriesModelTest::testMemoryUsage(void) maths::CMultivariateNormalConjugate<3> prior{multivariateNormal()}; auto controllers = decayRateControllers(3); boost::scoped_ptr model{ - new maths::CMultivariateTimeSeriesModel{params(bucketLength), trend, prior, &controllers}}; + new maths::CMultivariateTimeSeriesModel{modelParams(bucketLength), trend, prior, &controllers}}; TDouble2Vec4VecVec weights{maths::CConstantWeights::unit(3)}; core_t::TTime time{0}; for (auto &sample : samples) { - maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); - for (auto &&coordinate : sample) + for (auto &coordinate : sample) { - coordinate += 10.0 + 5.0 * ::sin( boost::math::double_constants::two_pi - * static_cast(time) / 86400.0); + coordinate += 10.0 + 5.0 * std::sin( boost::math::double_constants::two_pi + * static_cast(time) / 86400.0); } trend.addPoint(time, sample[0]); - model->addSamples(params, {core::make_triple(time, TDouble2Vec(sample), TAG)}); + model->addSamples(addSampleParams(weights), + {core::make_triple(time, TDouble2Vec(sample), TAG)}); time += bucketLength; } @@ -1773,10 +1634,10 @@ void CTimeSeriesModelTest::testPersist(void) LOG_DEBUG("| CTimeSeriesModelTest::testPersist |"); LOG_DEBUG("+-------------------------------------+"); - // Test persist then restore is idempotent. + // Test the restored model checksum matches the persisted model. core_t::TTime bucketLength{600}; - maths::CModelParams params_{params(bucketLength)}; + maths::CModelParams params{modelParams(bucketLength)}; test::CRandomNumbers rng; @@ -1784,24 +1645,18 @@ void CTimeSeriesModelTest::testPersist(void) { maths::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; auto controllers = decayRateControllers(1); - maths::CUnivariateTimeSeriesModel origModel{params_, 1, + maths::CUnivariateTimeSeriesModel origModel{params, 1, trend, univariateNormal(), &controllers}; TDoubleVec samples; rng.generateNormalSamples(1.0, 4.0, 1000, samples); - TDouble2Vec4Vec weight{{1.0}}; - TDouble2Vec4VecVec weights{weight}; + TDouble2Vec4VecVec weights{{{1.0}}}; core_t::TTime time{0}; for (auto sample : samples) { - maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); - origModel.addSamples(params, {core::make_triple(time, TDouble2Vec{sample}, TAG)}); + origModel.addSamples(addSampleParams(weights), + {core::make_triple(time, TDouble2Vec{sample}, TAG)}); time += bucketLength; } @@ -1812,18 +1667,19 @@ void CTimeSeriesModelTest::testPersist(void) inserter.toXml(origXml); } - //LOG_DEBUG("model XML representation:\n" << origXml); + LOG_TRACE("model XML representation:\n" << origXml); + LOG_DEBUG("model XML size: " << origXml.size()); // Restore the XML into a new filter core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); + maths::SDistributionRestoreParams distributionParams{maths_t::E_ContinuousData, DECAY_RATE}; maths::STimeSeriesDecompositionRestoreParams decompositionParams{24.0 * DECAY_RATE, bucketLength, - maths::CTimeSeriesDecomposition::DEFAULT_COMPONENT_SIZE}; - maths::SDistributionRestoreParams distributionParams{maths_t::E_ContinuousData, DECAY_RATE, 0.5, 24.0, 12}; - maths::SModelRestoreParams restoreParams{params_, decompositionParams, distributionParams}; + distributionParams}; + maths::SModelRestoreParams restoreParams{params, decompositionParams, distributionParams}; maths::CUnivariateTimeSeriesModel restoredModel{restoreParams, traverser}; CPPUNIT_ASSERT_EQUAL(origModel.checksum(), restoredModel.checksum()); @@ -1831,27 +1687,25 @@ void CTimeSeriesModelTest::testPersist(void) LOG_DEBUG("Multivariate"); { - TDoubleVec mean{11.0, 10.0, 12.0}; - TDoubleVecVec covariance{{4.0, 2.9, 0.5}, {2.9, 2.6, 0.1}, {0.5, 0.1, 2.0}}; TDoubleVecVec samples; - rng.generateMultivariateNormalSamples(mean, covariance, 1000, samples); + rng.generateMultivariateNormalSamples({11.0, 10.0, 12.0}, + {{4.0, 2.9, 0.5}, + {2.9, 2.6, 0.1}, + {0.5, 0.1, 2.0}}, + 1000, samples); maths::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; maths::CMultivariateNormalConjugate<3> prior{multivariateNormal()}; auto controllers = decayRateControllers(3); - maths::CMultivariateTimeSeriesModel origModel{params(bucketLength), trend, prior, &controllers}; + maths::CMultivariateTimeSeriesModel origModel{modelParams(bucketLength), + trend, prior, &controllers}; TDouble2Vec4VecVec weights{maths::CConstantWeights::unit(3)}; core_t::TTime time{0}; for (const auto &sample : samples) { - maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); - origModel.addSamples(params, {core::make_triple(time, TDouble2Vec(sample), TAG)}); + origModel.addSamples(addSampleParams(weights), + {core::make_triple(time, TDouble2Vec(sample), TAG)}); time += bucketLength; } @@ -1862,18 +1716,19 @@ void CTimeSeriesModelTest::testPersist(void) inserter.toXml(origXml); } - //LOG_DEBUG("model XML representation:\n" << origXml); + LOG_TRACE("model XML representation:\n" << origXml); + LOG_DEBUG("model XML size: " << origXml.size()); // Restore the XML into a new filter core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); + maths::SDistributionRestoreParams distributionParams{maths_t::E_ContinuousData, DECAY_RATE}; maths::STimeSeriesDecompositionRestoreParams decompositionParams{24.0 * DECAY_RATE, bucketLength, - maths::CTimeSeriesDecomposition::DEFAULT_COMPONENT_SIZE}; - maths::SDistributionRestoreParams distributionParams{maths_t::E_ContinuousData, DECAY_RATE, 0.5, 24.0, 12}; - maths::SModelRestoreParams restoreParams{params_, decompositionParams, distributionParams}; + distributionParams}; + maths::SModelRestoreParams restoreParams{params, decompositionParams, distributionParams}; maths::CMultivariateTimeSeriesModel restoredModel{restoreParams, traverser}; CPPUNIT_ASSERT_EQUAL(origModel.checksum(), restoredModel.checksum()); @@ -1888,6 +1743,12 @@ void CTimeSeriesModelTest::testUpgrade(void) LOG_DEBUG("| CTimeSeriesModelTest::testUpgrade |"); LOG_DEBUG("+-------------------------------------+"); + // Test upgrade is minimally disruptive. We test the upgraded model + // predicted confidence intervals verses the values we obtain from + // the previous model. Note the confidence interval depends on both + // trend and residual model so this test is sensitive to problems + // restoring either. + using TStrVec = std::vector; auto load = [](const std::string &name, std::string &result) { @@ -1900,7 +1761,7 @@ void CTimeSeriesModelTest::testUpgrade(void) core_t::TTime bucketLength{600}; core_t::TTime halfHour{1800}; - maths::CModelParams params_{params(bucketLength)}; + maths::CModelParams params{modelParams(bucketLength)}; std::string empty; LOG_DEBUG("Univariate"); @@ -1919,11 +1780,11 @@ void CTimeSeriesModelTest::testUpgrade(void) CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(xml)); core::CRapidXmlStateRestoreTraverser traverser(parser); + maths::SDistributionRestoreParams distributionParams{maths_t::E_ContinuousData, DECAY_RATE}; maths::STimeSeriesDecompositionRestoreParams decompositionParams{24.0 * DECAY_RATE, bucketLength, - maths::CTimeSeriesDecomposition::DEFAULT_COMPONENT_SIZE}; - maths::SDistributionRestoreParams distributionParams{maths_t::E_ContinuousData, DECAY_RATE, 0.5, 24.0, 12}; - maths::SModelRestoreParams restoreParams{params_, decompositionParams, distributionParams}; + distributionParams}; + maths::SModelRestoreParams restoreParams{params, decompositionParams, distributionParams}; maths::CUnivariateTimeSeriesModel restoredModel{restoreParams, traverser}; TStrVec expectedInterval; @@ -1972,11 +1833,11 @@ void CTimeSeriesModelTest::testUpgrade(void) CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(xml)); core::CRapidXmlStateRestoreTraverser traverser(parser); + maths::SDistributionRestoreParams distributionParams{maths_t::E_ContinuousData, DECAY_RATE}; maths::STimeSeriesDecompositionRestoreParams decompositionParams{24.0 * DECAY_RATE, bucketLength, - maths::CTimeSeriesDecomposition::DEFAULT_COMPONENT_SIZE}; - maths::SDistributionRestoreParams distributionParams{maths_t::E_ContinuousData, DECAY_RATE, 0.5, 24.0, 12}; - maths::SModelRestoreParams restoreParams{params_, decompositionParams, distributionParams}; + distributionParams}; + maths::SModelRestoreParams restoreParams{params, decompositionParams, distributionParams}; maths::CMultivariateTimeSeriesModel restoredModel{restoreParams, traverser}; TStrVec expectedInterval; @@ -2023,16 +1884,14 @@ void CTimeSeriesModelTest::testAddSamplesWithCorrelations(void) test::CRandomNumbers rng; { - TDoubleVec mean{10.0, 15.0}; - TDoubleVecVec covariance{{3.0, 2.9}, {2.9, 2.6}}; TDoubleVecVec samples; - rng.generateMultivariateNormalSamples(mean, covariance, 1000, samples); + rng.generateMultivariateNormalSamples({10.0, 15.0}, {{3.0, 2.9}, {2.9, 2.6}}, 1000, samples); maths::CTimeSeriesDecomposition trend{DECAY_RATE, bucketLength}; maths::CTimeSeriesCorrelations correlations{MINIMUM_SIGNIFICANT_CORRELATION, DECAY_RATE}; maths::CNormalMeanPrecConjugate prior{univariateNormal()}; - maths::CUnivariateTimeSeriesModel models[]{{params(bucketLength), 0, trend, prior, 0}, - {params(bucketLength), 1, trend, prior, 0}}; + maths::CUnivariateTimeSeriesModel models[]{{modelParams(bucketLength), 0, trend, prior, 0}, + {modelParams(bucketLength), 1, trend, prior, 0}}; models[0].modelCorrelations(correlations); models[1].modelCorrelations(correlations); CTimeSeriesCorrelateModelAllocator allocator; @@ -2042,14 +1901,10 @@ void CTimeSeriesModelTest::testAddSamplesWithCorrelations(void) for (auto sample : samples) { correlations.refresh(allocator); - maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); - models[0].addSamples(params, {core::make_triple(time, TDouble2Vec{sample[0]}, TAG)}); - models[1].addSamples(params, {core::make_triple(time, TDouble2Vec{sample[1]}, TAG)}); + models[0].addSamples(addSampleParams(weights), + {core::make_triple(time, TDouble2Vec{sample[0]}, TAG)}); + models[1].addSamples(addSampleParams(weights), + {core::make_triple(time, TDouble2Vec{sample[1]}, TAG)}); correlations.processSamples(maths::CConstantWeights::COUNT); time += bucketLength; } @@ -2072,15 +1927,17 @@ void CTimeSeriesModelTest::testAnomalyModel(void) LOG_DEBUG("| CTimeSeriesModelTest::testAnomalyModel |"); LOG_DEBUG("+------------------------------------------+"); + // We test we can find the "odd anomaly out". + using TSizeVec = std::vector; using TDoubleSizePr = std::pair; test::CRandomNumbers rng; + std::size_t length = 2000; + LOG_DEBUG("Univariate") { - std::size_t length = 2000; - TSizeVec anomalies; rng.generateUniformSamples(0, length, 30, anomalies); std::sort(anomalies.begin(), anomalies.end()); @@ -2090,7 +1947,7 @@ void CTimeSeriesModelTest::testAnomalyModel(void) core_t::TTime bucketLength{600}; maths::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; - maths::CUnivariateTimeSeriesModel model{params(bucketLength), 1, trend, univariateNormal()}; + maths::CUnivariateTimeSeriesModel model{modelParams(bucketLength), 1, trend, univariateNormal()}; //std::ofstream file; //file.open("results.m"); @@ -2101,7 +1958,7 @@ void CTimeSeriesModelTest::testAnomalyModel(void) TDouble2Vec4VecVec weights{weight}; std::size_t bucket{0}; core_t::TTime time{0}; - for (auto &&sample : samples) + for (auto &sample : samples) { if (std::binary_search(anomalies.begin(), anomalies.end(), bucket++)) { @@ -2111,31 +1968,18 @@ void CTimeSeriesModelTest::testAnomalyModel(void) { sample += 8.0; } - { - maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); - model.addSamples(params, {core::make_triple(time, TDouble2Vec{sample}, TAG)}); - } - { - maths::CModelProbabilityParams params; - params.addCalculation(maths_t::E_TwoSided) - .seasonalConfidenceInterval(50.0) - .addBucketEmpty({false}) - .weightStyles(maths::CConstantWeights::COUNT) - .addWeights(weight); - TTail2Vec tail; - double probability; - bool conditional; - TSize1Vec mostAnomalousCorrelate; - model.probability(params, {{time}}, {{sample}}, - probability, tail, conditional, mostAnomalousCorrelate); - mostAnomalous.add({::log(probability), bucket}); - //scores.push_back(maths::CTools::deviation(probability)); - } + model.addSamples(addSampleParams(weights), + {core::make_triple(time, TDouble2Vec{sample}, TAG)}); + + TTail2Vec tail; + double probability; + bool conditional; + TSize1Vec mostAnomalousCorrelate; + model.probability(computeProbabilityParams(weight), {{time}}, {{sample}}, + probability, tail, conditional, mostAnomalousCorrelate); + mostAnomalous.add({std::log(probability), bucket}); + //scores.push_back(maths::CTools::deviation(probability)); + time += bucketLength; } @@ -2168,20 +2012,20 @@ void CTimeSeriesModelTest::testAnomalyModel(void) LOG_DEBUG("Multivariate") { - std::size_t length = 2000; - TSizeVec anomalies; rng.generateUniformSamples(0, length, 30, anomalies); std::sort(anomalies.begin(), anomalies.end()); core_t::TTime bucketLength{600}; - TDoubleVec mean{10.0, 10.0, 10.0}; - TDoubleVecVec covariance{{4.0, 0.9, 0.5}, {0.9, 2.6, 0.1}, {0.5, 0.1, 3.0}}; TDoubleVecVec samples; - rng.generateMultivariateNormalSamples(mean, covariance, length, samples); + rng.generateMultivariateNormalSamples({10.0, 10.0, 10.0}, + {{4.0, 0.9, 0.5}, + {0.9, 2.6, 0.1}, + {0.5, 0.1, 3.0}}, + length, samples); maths::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; maths::CMultivariateNormalConjugate<3> prior{multivariateNormal()}; - maths::CMultivariateTimeSeriesModel model{params(bucketLength), trend, prior}; + maths::CMultivariateTimeSeriesModel model{modelParams(bucketLength), trend, prior}; //std::ofstream file; //file.open("results.m"); @@ -2192,9 +2036,9 @@ void CTimeSeriesModelTest::testAnomalyModel(void) TDouble2Vec4VecVec weights{weight}; core_t::TTime time{0}; std::size_t bucket{0}; - for (auto &&sample : samples) + for (auto &sample : samples) { - for (auto &&coordinate : sample) + for (auto &coordinate : sample) { if (std::binary_search(anomalies.begin(), anomalies.end(), bucket)) { @@ -2206,34 +2050,18 @@ void CTimeSeriesModelTest::testAnomalyModel(void) } } ++bucket; - { - maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); - model.addSamples(params, {core::make_triple(time, TDouble2Vec(sample), TAG)}); - } - { - maths::CModelProbabilityParams params; - params.addCalculation(maths_t::E_TwoSided) - .seasonalConfidenceInterval(50.0) - .addBucketEmpty({false}) - .weightStyles(maths::CConstantWeights::COUNT) - .addWeights(weight); - TTail2Vec tail; - double probability; - bool conditional; - TSize1Vec mostAnomalousCorrelate; - model.probability(params, - {{time}}, - {(sample)}, - probability, tail, - conditional, mostAnomalousCorrelate); - mostAnomalous.add({::log(probability), bucket}); - //scores.push_back(maths::CTools::deviation(probability)); - } + model.addSamples(addSampleParams(weights), + {core::make_triple(time, TDouble2Vec(sample), TAG)}); + + TTail2Vec tail; + double probability; + bool conditional; + TSize1Vec mostAnomalousCorrelate; + model.probability(computeProbabilityParams(weight), {{time}}, {(sample)}, + probability, tail, conditional, mostAnomalousCorrelate); + mostAnomalous.add({std::log(probability), bucket}); + //scores.push_back(maths::CTools::deviation(probability)); + time += bucketLength; } @@ -2270,6 +2098,218 @@ void CTimeSeriesModelTest::testAnomalyModel(void) } } +void CTimeSeriesModelTest::testStepChangeDiscontinuities(void) +{ + LOG_DEBUG("+-------------------------------------------------------+"); + LOG_DEBUG("| CTimeSeriesModelTest::testStepChangeDiscontinuities |"); + LOG_DEBUG("+-------------------------------------------------------+"); + + // Check detection and modelling of step changes in data with + // 1) Piecewise constant + // 2) Saw tooth + + using TDouble3Vec = core::CSmallVector; + using TDouble3VecVec = std::vector; + + TDouble2Vec4VecVec weight{{{1.0}}}; + auto updateModel = [&](core_t::TTime time, double value, maths::CUnivariateTimeSeriesModel &model) + { + weight[0][0] = model.winsorisationWeight(1.0, time, {value}); + model.addSamples(addSampleParams(1.0, {maths_t::E_SampleWinsorisationWeight}, weight), + {core::make_triple(time, TDouble2Vec{value}, TAG)}); + }; + + //std::ostringstream actual, modelBounds; + //actual << "r = ["; + //modelBounds << "x = ["; + //auto updateTestDebug = [&](core_t::TTime time, double value, const maths::CUnivariateTimeSeriesModel &model) + // { + // actual << value << std::endl; + // auto x = model.confidenceInterval(time, 90.0, {maths_t::E_SampleCountWeight}, {{1.0}}); + // if (x.size() == 3) + // { + // modelBounds << x[0][0] << "," << x[1][0] << "," << x[2][0] << std::endl; + // } + // }; + + test::CRandomNumbers rng; + + LOG_DEBUG("Univariate: Piecwise Constant"); + { + core_t::TTime bucketLength{600}; + maths::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; + auto controllers = decayRateControllers(1); + maths::CUnivariateTimeSeriesModel model{modelParams(bucketLength), 0, + trend, univariateNormal(), &controllers}; + + // Add some data to the model. + + core_t::TTime time{0}; + TDoubleVec samples; + double level{20.0}; + for (auto dl : {10.0, 20.0, 15.0, 50.0, 30.0, 40.0, 15.0, 40.0, 25.0}) + { + level += dl; + rng.generateNormalSamples(level, 2.0, 300 + static_cast(2.0 * dl), samples); + for (auto sample : samples) + { + updateModel(time, sample, model); + //updateTestDebug(time, sample, model); + time += bucketLength; + } + } + level += 30.0; + rng.generateNormalSamples(level, 2.0, 100, samples); + for (auto sample : samples) + { + updateModel(time, sample, model); + //updateTestDebug(time, sample, model); + time += bucketLength; + } + + // Generate expected values from the same process. + + TDoubleVec expected; + rng.generateNormalSamples(level, 2.0, 260, expected); + for (auto dl : {25.0, 40.0}) + { + level += dl; + rng.generateNormalSamples(level, 2.0, 300 + static_cast(2.0 * dl), samples); + expected.insert(expected.end(), samples.begin(), samples.end()); + } + //std::for_each(expected.begin(), expected.end(), + // [&actual](double sample) { actual << sample << std::endl; }); + + //std::ofstream file; + //file.open("forecast.m"); + //file << actual.str() << "];"; + //file << modelBounds.str() << "];"; + //file << "y = ["; + TDouble3VecVec forecast; + auto pushErrorBar = [&](const maths::SErrorBar &errorBar) + { + forecast.push_back({errorBar.s_LowerBound, + errorBar.s_Predicted, + errorBar.s_UpperBound}); + //file << errorBar.s_LowerBound << "," + // << errorBar.s_Predicted << "," + // << errorBar.s_UpperBound << std::endl; + }; + + std::string m; + model.forecast(time, time + 800 * bucketLength, 90.0, {-1000.0}, {1000.0}, pushErrorBar, m); + + //file << "];"; + + double outOfBounds{0.0}; + for (std::size_t i = 0u; i < forecast.size(); ++i) + { + CPPUNIT_ASSERT_DOUBLES_EQUAL(expected[i], forecast[i][1], 0.1 * expected[i]); + outOfBounds += static_cast( expected[i] < forecast[i][0] + || expected[i] > forecast[i][2]); + } + double percentageOutOfBounds{100.0 * outOfBounds / static_cast(forecast.size())}; + LOG_DEBUG("% out-of-bounds = " << percentageOutOfBounds); + CPPUNIT_ASSERT(percentageOutOfBounds < 1.0); + } + + LOG_DEBUG("Univariate: Saw Tooth"); + { + core_t::TTime bucketLength{1800}; + maths::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; + auto controllers = decayRateControllers(1); + maths::CUnivariateTimeSeriesModel model{modelParams(bucketLength), 0, + trend, univariateNormal(), &controllers}; + + // Add some data to the model. + + core_t::TTime time{0}; + double value{10.0}; + TDoubleVec noise; + for (auto slope : {0.08, 0.056, 0.028, 0.044, 0.06, 0.03}) + { + value = 5.0; + while (value < 95.0) + { + rng.generateNormalSamples(0.0, 2.0, 1, noise); + updateModel(time, value + noise[0], model); + //updateTestDebug(time, value + noise[0], model); + time += bucketLength; + value += slope; + } + } + for (auto slope : {0.042}) + { + value = 5.0; + for (std::size_t i = 0u; i < 1500; ++i) + { + rng.generateNormalSamples(0.0, 2.0, 1, noise); + updateModel(time, value + noise[0], model); + //updateTestDebug(time, value + noise[0], model); + time += bucketLength; + value += slope; + } + } + + // Generate expected values from the same process. + + TDoubleVec expected; + for (auto slope : {0.05, 0.04}) + { + while (expected.size() < 2000 && value < 95.0) + { + rng.generateNormalSamples(0.0, 2.0, 1, noise); + expected.push_back(value + noise[0]); + //actual << value + noise[0] << std::endl; + value += slope; + } + value = 5.0; + } + + // Test forecasting. + + //std::ofstream file; + //file.open("forecast.m"); + //file << actual.str() << "];"; + //file << modelBounds.str() << "];"; + //file << "y = ["; + TDouble3VecVec forecast; + auto pushErrorBar = [&](const maths::SErrorBar &errorBar) + { + forecast.push_back({errorBar.s_LowerBound, + errorBar.s_Predicted, + errorBar.s_UpperBound}); + //file << errorBar.s_LowerBound << "," + // << errorBar.s_Predicted << "," + // << errorBar.s_UpperBound << std::endl; + }; + + std::string m; + model.forecast(time, time + 2000 * bucketLength, 90.0, {-1000.0}, {1000.0}, pushErrorBar, m); + + //file << "];"; + + double outOfBounds{0.0}; + for (std::size_t i = 0u; i < forecast.size(); ++i) + { + outOfBounds += static_cast( expected[i] < forecast[i][0] + || expected[i] > forecast[i][2]); + } + double percentageOutOfBounds{100.0 * outOfBounds / static_cast(forecast.size())}; + LOG_DEBUG("% out-of-bounds = " << percentageOutOfBounds); + CPPUNIT_ASSERT(percentageOutOfBounds < 10.0); + } +} + +void CTimeSeriesModelTest::daylightSaving(void) +{ + LOG_DEBUG("+----------------------------------------+"); + LOG_DEBUG("| CTimeSeriesModelTest::daylightSaving |"); + LOG_DEBUG("+----------------------------------------+"); + + // TODO +} + CppUnit::Test *CTimeSeriesModelTest::suite(void) { CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CTimeSeriesModelTest"); @@ -2313,6 +2353,9 @@ CppUnit::Test *CTimeSeriesModelTest::suite(void) suiteOfTests->addTest( new CppUnit::TestCaller( "CTimeSeriesModelTest::testAnomalyModel", &CTimeSeriesModelTest::testAnomalyModel) ); + suiteOfTests->addTest( new CppUnit::TestCaller( + "CTimeSeriesModelTest::testStepChangeDiscontinuities", + &CTimeSeriesModelTest::testStepChangeDiscontinuities) ); return suiteOfTests; } diff --git a/lib/maths/unittest/CTimeSeriesModelTest.h b/lib/maths/unittest/CTimeSeriesModelTest.h index ac92a39b99..5a3568e878 100644 --- a/lib/maths/unittest/CTimeSeriesModelTest.h +++ b/lib/maths/unittest/CTimeSeriesModelTest.h @@ -25,6 +25,8 @@ class CTimeSeriesModelTest : public CppUnit::TestFixture void testAddSamplesWithCorrelations(void); void testProbabilityWithCorrelations(void); void testAnomalyModel(void); + void testStepChangeDiscontinuities(void); + void daylightSaving(void); static CppUnit::Test *suite(void); }; diff --git a/lib/maths/unittest/CTrendComponentTest.cc b/lib/maths/unittest/CTrendComponentTest.cc index 54cfcd9621..50572d87e0 100644 --- a/lib/maths/unittest/CTrendComponentTest.cc +++ b/lib/maths/unittest/CTrendComponentTest.cc @@ -13,6 +13,7 @@ #include #include +#include #include #include @@ -331,11 +332,16 @@ void CTrendComponentTest::testForecast() component.shiftOrigin(time); TDouble3VecVec forecast; - component.forecast(time, time + 1000 * bucketLength, 3600, 95.0, forecast); + component.forecast(time, time + 1000 * bucketLength, 3600, 95.0, + [](core_t::TTime) { return TDouble3Vec(3, 0.0); }, + [&forecast](core_t::TTime, const TDouble3Vec &value) + { + forecast.push_back(value); + }); TMeanAccumulator meanError; TMeanAccumulator meanErrorAt95; - for (auto &&errorbar : forecast) + for (auto &errorbar : forecast) { core_t::TTime bucket{(time - start) / bucketLength}; meanError.add( std::fabs((values[bucket] - errorbar[1]) @@ -431,10 +437,11 @@ void CTrendComponentTest::testPersist() core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); + maths::SDistributionRestoreParams params{maths_t::E_ContinuousData, 0.1}; maths::CTrendComponent restoredComponent{0.1}; traverser.traverseSubLevel(boost::bind(&maths::CTrendComponent::acceptRestoreTraverser, - &restoredComponent, _1)); + &restoredComponent, boost::cref(params), _1)); CPPUNIT_ASSERT_EQUAL(origComponent.checksum(), restoredComponent.checksum()); diff --git a/lib/model/CAnomalyDetectorModel.cc b/lib/model/CAnomalyDetectorModel.cc index 54789b0293..a512b962a7 100644 --- a/lib/model/CAnomalyDetectorModel.cc +++ b/lib/model/CAnomalyDetectorModel.cc @@ -610,15 +610,11 @@ CAnomalyDetectorModel::SFeatureModels::SFeatureModels(model_t::EFeature feature, {} bool CAnomalyDetectorModel::SFeatureModels::acceptRestoreTraverser(const SModelParams ¶ms_, - core::CStateRestoreTraverser &traverser) + core::CStateRestoreTraverser &traverser) { maths_t::EDataType dataType{s_NewModel->dataType()}; maths::SModelRestoreParams params{s_NewModel->params(), - maths::STimeSeriesDecompositionRestoreParams{ - CAnomalyDetectorModelConfig::trendDecayRate(params_.s_DecayRate, - params_.s_BucketLength), - params_.s_BucketLength, - params_.s_ComponentSize}, + params_.decompositionRestoreParams(dataType), params_.distributionRestoreParams(dataType)}; do { diff --git a/lib/model/CAnomalyDetectorModelConfig.cc b/lib/model/CAnomalyDetectorModelConfig.cc index b116195d50..2da6321f18 100644 --- a/lib/model/CAnomalyDetectorModelConfig.cc +++ b/lib/model/CAnomalyDetectorModelConfig.cc @@ -38,9 +38,6 @@ namespace model namespace { -typedef std::vector TSizeVec; -typedef std::vector TTimeVec; - const CAnomalyDetectorModelConfig::TIntDetectionRuleVecUMap EMPTY_RULES_MAP; const CAnomalyDetectorModelConfig::TStrDetectionRulePrVec EMPTY_EVENTS; @@ -66,18 +63,24 @@ const std::size_t CAnomalyDetectorModelConfig::DEFAULT_SAMPLE_COUNT_FACTOR_NO_LA const std::size_t CAnomalyDetectorModelConfig::DEFAULT_SAMPLE_COUNT_FACTOR_WITH_LATENCY(10); const double CAnomalyDetectorModelConfig::DEFAULT_SAMPLE_QUEUE_GROWTH_FACTOR(0.1); const core_t::TTime CAnomalyDetectorModelConfig::STANDARD_BUCKET_LENGTH(1800); +const std::size_t CAnomalyDetectorModelConfig::DEFAULT_BUCKET_RESULTS_DELAY(0); const double CAnomalyDetectorModelConfig::DEFAULT_DECAY_RATE(0.0005); const double CAnomalyDetectorModelConfig::DEFAULT_INITIAL_DECAY_RATE_MULTIPLIER(4.0); const double CAnomalyDetectorModelConfig::DEFAULT_LEARN_RATE(1.0); const double CAnomalyDetectorModelConfig::DEFAULT_INDIVIDUAL_MINIMUM_MODE_FRACTION(0.05); const double CAnomalyDetectorModelConfig::DEFAULT_POPULATION_MINIMUM_MODE_FRACTION(0.05); const double CAnomalyDetectorModelConfig::DEFAULT_MINIMUM_CLUSTER_SPLIT_COUNT(12.0); -const double CAnomalyDetectorModelConfig::DEFAULT_CUTOFF_TO_MODEL_EMPTY_BUCKETS(0.2); const double CAnomalyDetectorModelConfig::DEFAULT_CATEGORY_DELETE_FRACTION(0.8); +const double CAnomalyDetectorModelConfig::DEFAULT_CUTOFF_TO_MODEL_EMPTY_BUCKETS(0.2); const std::size_t CAnomalyDetectorModelConfig::DEFAULT_COMPONENT_SIZE(36u); -const std::size_t CAnomalyDetectorModelConfig::DEFAULT_TOTAL_PROBABILITY_CALC_SAMPLING_SIZE(10u); +const core_t::TTime CAnomalyDetectorModelConfig::DEFAULT_MINIMUM_TIME_TO_DETECT_CHANGE(6 * core::constants::HOUR); +const core_t::TTime CAnomalyDetectorModelConfig::DEFAULT_MAXIMUM_TIME_TO_TEST_FOR_CHANGE(core::constants::DAY); const double CAnomalyDetectorModelConfig::DEFAULT_MAXIMUM_UPDATES_PER_BUCKET(1.0); const double CAnomalyDetectorModelConfig::DEFAULT_INFLUENCE_CUTOFF(0.5); +const double CAnomalyDetectorModelConfig::DEFAULT_PRUNE_WINDOW_SCALE_MINIMUM(0.25); +const double CAnomalyDetectorModelConfig::DEFAULT_PRUNE_WINDOW_SCALE_MAXIMUM(4.0); +const double CAnomalyDetectorModelConfig::DEFAULT_CORRELATION_MODELS_OVERHEAD(3.0); +const double CAnomalyDetectorModelConfig::DEFAULT_MINIMUM_SIGNIFICANT_CORRELATION(0.3); const double CAnomalyDetectorModelConfig::DEFAULT_AGGREGATION_STYLE_PARAMS[][model_t::NUMBER_AGGREGATION_PARAMS] = { { 0.0, 1.0, 1.0, 1.0 }, @@ -90,7 +93,6 @@ const double CAnomalyDetectorModelConfig::DEFAULT_AGGREGATION_STYLE_PARAMS[][mod const double CAnomalyDetectorModelConfig::DEFAULT_MAXIMUM_ANOMALOUS_PROBABILITY(0.035); const double CAnomalyDetectorModelConfig::DEFAULT_NOISE_PERCENTILE(50.0); const double CAnomalyDetectorModelConfig::DEFAULT_NOISE_MULTIPLIER(1.0); -const std::size_t CAnomalyDetectorModelConfig::DEFAULT_BUCKET_RESULTS_DELAY(0); const CAnomalyDetectorModelConfig::TDoubleDoublePr CAnomalyDetectorModelConfig::DEFAULT_NORMALIZED_SCORE_KNOT_POINTS[9] = { CAnomalyDetectorModelConfig::TDoubleDoublePr(0.0, 0.0), @@ -103,11 +105,6 @@ const CAnomalyDetectorModelConfig::TDoubleDoublePr CAnomalyDetectorModelConfig:: CAnomalyDetectorModelConfig::TDoubleDoublePr(99.9, 90.0), CAnomalyDetectorModelConfig::TDoubleDoublePr(100.0, 100.0) }; -const std::size_t CAnomalyDetectorModelConfig::DEFAULT_RESAMPLING_MAX_SAMPLES(40u); -const double CAnomalyDetectorModelConfig::DEFAULT_PRUNE_WINDOW_SCALE_MINIMUM(0.25); -const double CAnomalyDetectorModelConfig::DEFAULT_PRUNE_WINDOW_SCALE_MAXIMUM(4.0); -const double CAnomalyDetectorModelConfig::DEFAULT_CORRELATION_MODELS_OVERHEAD(3.0); -const double CAnomalyDetectorModelConfig::DEFAULT_MINIMUM_SIGNIFICANT_CORRELATION(0.3); CAnomalyDetectorModelConfig CAnomalyDetectorModelConfig::defaultConfig(core_t::TTime bucketLength, model_t::ESummaryMode summaryMode, @@ -197,7 +194,7 @@ CAnomalyDetectorModelConfig::CAnomalyDetectorModelConfig(void) : void CAnomalyDetectorModelConfig::bucketLength(core_t::TTime length) { m_BucketLength = length; - for (auto &&factory : m_Factories) + for (auto &factory : m_Factories) { factory.second->updateBucketLength(length); } @@ -754,7 +751,7 @@ CAnomalyDetectorModelConfig::factory(int identifier, void CAnomalyDetectorModelConfig::decayRate(double value) { - for (auto &&factory : m_Factories) + for (auto &factory : m_Factories) { factory.second->decayRate(value); } @@ -873,7 +870,6 @@ const std::string ONLINE_LEARN_RATE_PROPERTY("learnrate"); const std::string DECAY_RATE_PROPERTY("decayrate"); const std::string INITIAL_DECAY_RATE_MULTIPLIER_PROPERTY("initialdecayratemultiplier"); const std::string MAXIMUM_UPDATES_PER_BUCKET_PROPERTY("maximumupdatesperbucket"); -const std::string TOTAL_PROBABILITY_CALC_SAMPLING_SIZE_PROPERTY("totalprobabilitycalcsamplingsize"); const std::string INDIVIDUAL_MODE_FRACTION_PROPERTY("individualmodefraction"); const std::string POPULATION_MODE_FRACTION_PROPERTY("populationmodefraction"); const std::string PEERS_MODE_FRACTION_PROPERTY("peersmodefraction"); @@ -912,7 +908,7 @@ bool CAnomalyDetectorModelConfig::processStanza(const boost::property_tree::ptre } learnRate *= bucketNormalizationFactor(this->bucketLength()); - for (auto &&factory : m_Factories) + for (auto &factory : m_Factories) { factory.second->learnRate(learnRate); } @@ -928,7 +924,7 @@ bool CAnomalyDetectorModelConfig::processStanza(const boost::property_tree::ptre } decayRate *= bucketNormalizationFactor(this->bucketLength()); - for (auto &&factory : m_Factories) + for (auto &factory : m_Factories) { factory.second->decayRate(decayRate); } @@ -943,7 +939,7 @@ bool CAnomalyDetectorModelConfig::processStanza(const boost::property_tree::ptre continue; } - for (auto &&factory : m_Factories) + for (auto &factory : m_Factories) { factory.second->initialDecayRateMultiplier(multiplier); } @@ -959,26 +955,11 @@ bool CAnomalyDetectorModelConfig::processStanza(const boost::property_tree::ptre continue; } - for (auto &&factory : m_Factories) + for (auto &factory : m_Factories) { factory.second->maximumUpdatesPerBucket(maximumUpdatesPerBucket); } } - else if (propName == TOTAL_PROBABILITY_CALC_SAMPLING_SIZE_PROPERTY) - { - int totalProbabilityCalcSamplingSize; - if ( core::CStringUtils::stringToType(propValue, totalProbabilityCalcSamplingSize) == false - || totalProbabilityCalcSamplingSize <= 0) - { - LOG_ERROR("Invalid value for property " << propName << " : " << propValue); - result = false; - continue; - } - for (auto &&factory : m_Factories) - { - factory.second->totalProbabilityCalcSamplingSize(totalProbabilityCalcSamplingSize); - } - } else if (propName == INDIVIDUAL_MODE_FRACTION_PROPERTY) { double fraction; @@ -1048,7 +1029,7 @@ bool CAnomalyDetectorModelConfig::processStanza(const boost::property_tree::ptre result = false; continue; } - for (auto &&factory : m_Factories) + for (auto &factory : m_Factories) { factory.second->componentSize(componentSize); } @@ -1062,7 +1043,7 @@ bool CAnomalyDetectorModelConfig::processStanza(const boost::property_tree::ptre result = false; continue; } - for (auto &&factory : m_Factories) + for (auto &factory : m_Factories) { factory.second->sampleCountFactor(factor); } @@ -1076,7 +1057,7 @@ bool CAnomalyDetectorModelConfig::processStanza(const boost::property_tree::ptre result = false; continue; } - for (auto &&factory : m_Factories) + for (auto &factory : m_Factories) { factory.second->pruneWindowScaleMinimum(factor); } @@ -1090,7 +1071,7 @@ bool CAnomalyDetectorModelConfig::processStanza(const boost::property_tree::ptre result = false; continue; } - for (auto &&factory : m_Factories) + for (auto &factory : m_Factories) { factory.second->pruneWindowScaleMaximum(factor); } diff --git a/lib/model/CInterimBucketCorrector.cc b/lib/model/CInterimBucketCorrector.cc index c1097be60b..a0b833c772 100644 --- a/lib/model/CInterimBucketCorrector.cc +++ b/lib/model/CInterimBucketCorrector.cc @@ -10,6 +10,7 @@ #include #include +#include #include #include @@ -42,15 +43,13 @@ double trendDecayRate(core_t::TTime bucketLength) CInterimBucketCorrector::CInterimBucketCorrector(core_t::TTime bucketLength) : m_BucketLength(bucketLength), m_CountTrend(trendDecayRate(bucketLength), bucketLength, COMPONENT_SIZE) -{ -} +{} CInterimBucketCorrector::CInterimBucketCorrector(const CInterimBucketCorrector &other) : m_BucketLength(other.m_BucketLength), m_CountTrend(other.m_CountTrend), m_CountMean(other.m_CountMean) -{ -} +{} core_t::TTime CInterimBucketCorrector::calcBucketMidPoint(core_t::TTime time) const { @@ -132,15 +131,23 @@ bool CInterimBucketCorrector::acceptRestoreTraverser(core::CStateRestoreTraverse do { const std::string &name = traverser.name(); - RESTORE_NO_ERROR(COUNT_TREND_TAG, - maths::CTimeSeriesDecomposition restored(trendDecayRate(m_BucketLength), - m_BucketLength, COMPONENT_SIZE, - traverser); - m_CountTrend.swap(restored)) + if (name == COUNT_TREND_TAG) + { + maths::SDistributionRestoreParams changeModelParams{maths_t::E_ContinuousData, + decayRate(m_BucketLength)}; + maths::STimeSeriesDecompositionRestoreParams params{trendDecayRate(m_BucketLength), + m_BucketLength, + COMPONENT_SIZE, + changeModelParams}; + maths::CTimeSeriesDecomposition restored(params, traverser); + m_CountTrend.swap(restored); + continue; + } RESTORE(COUNT_MEAN_TAG, m_CountMean.fromDelimited(traverser.value())) } while (traverser.next()); return true; } + } } diff --git a/lib/model/CModelFactory.cc b/lib/model/CModelFactory.cc index 0c97096b9a..d3af426b0b 100644 --- a/lib/model/CModelFactory.cc +++ b/lib/model/CModelFactory.cc @@ -82,7 +82,9 @@ CModelFactory::TMathsModelPtr maths::CModelParams params{bucketLength, m_ModelParams.s_LearnRate, m_ModelParams.s_DecayRate, - minimumSeasonalVarianceScale}; + minimumSeasonalVarianceScale, + m_ModelParams.s_MinimumTimeToDetectChange, + m_ModelParams.s_MaximumTimeToTestForChange}; std::size_t dimension{model_t::dimension(feature)}; @@ -284,11 +286,6 @@ void CModelFactory::pruneWindowScaleMaximum(double factor) m_ModelParams.s_PruneWindowScaleMaximum = factor; } -void CModelFactory::totalProbabilityCalcSamplingSize(std::size_t samplingSize) -{ - m_ModelParams.s_TotalProbabilityCalcSamplingSize = samplingSize; -} - void CModelFactory::multivariateByFields(bool enabled) { m_ModelParams.s_MultivariateByFields = enabled; diff --git a/lib/model/CModelParams.cc b/lib/model/CModelParams.cc index f23ca37b0b..6db4cd6836 100644 --- a/lib/model/CModelParams.cc +++ b/lib/model/CModelParams.cc @@ -39,11 +39,12 @@ SModelParams::SModelParams(core_t::TTime bucketLength) : s_MinimumModeCount(CAnomalyDetectorModelConfig::DEFAULT_MINIMUM_CLUSTER_SPLIT_COUNT), s_CutoffToModelEmptyBuckets(CAnomalyDetectorModelConfig::DEFAULT_CUTOFF_TO_MODEL_EMPTY_BUCKETS), s_ComponentSize(CAnomalyDetectorModelConfig::DEFAULT_COMPONENT_SIZE), + s_MinimumTimeToDetectChange(CAnomalyDetectorModelConfig::DEFAULT_MINIMUM_TIME_TO_DETECT_CHANGE), + s_MaximumTimeToTestForChange(CAnomalyDetectorModelConfig::DEFAULT_MAXIMUM_TIME_TO_TEST_FOR_CHANGE), s_ExcludeFrequent(model_t::E_XF_None), s_ExcludePersonFrequency(0.1), s_ExcludeAttributeFrequency(0.1), s_MaximumUpdatesPerBucket(CAnomalyDetectorModelConfig::DEFAULT_MAXIMUM_UPDATES_PER_BUCKET), - s_TotalProbabilityCalcSamplingSize(CAnomalyDetectorModelConfig::DEFAULT_TOTAL_PROBABILITY_CALC_SAMPLING_SIZE), s_InfluenceCutoff(CAnomalyDetectorModelConfig::DEFAULT_INFLUENCE_CUTOFF), s_LatencyBuckets(CAnomalyDetectorModelConfig::DEFAULT_LATENCY_BUCKETS), s_SampleCountFactor(CAnomalyDetectorModelConfig::DEFAULT_SAMPLE_COUNT_FACTOR_NO_LATENCY), @@ -80,12 +81,15 @@ double SModelParams::minimumCategoryCount(void) const return s_LearnRate * CAnomalyDetectorModelConfig::DEFAULT_CATEGORY_DELETE_FRACTION; } +maths::STimeSeriesDecompositionRestoreParams SModelParams::decompositionRestoreParams(maths_t::EDataType dataType) const +{ + double decayRate{CAnomalyDetectorModelConfig::trendDecayRate(s_DecayRate, s_BucketLength)}; + return {decayRate, s_BucketLength, s_ComponentSize, this->distributionRestoreParams(dataType)}; +} + maths::SDistributionRestoreParams SModelParams::distributionRestoreParams(maths_t::EDataType dataType) const { - return maths::SDistributionRestoreParams(dataType, s_DecayRate, - s_MinimumModeFraction, - s_MinimumModeCount, - this->minimumCategoryCount()); + return {dataType, s_DecayRate, s_MinimumModeFraction, s_MinimumModeCount, this->minimumCategoryCount()}; } uint64_t SModelParams::checksum(uint64_t seed) const @@ -93,9 +97,16 @@ uint64_t SModelParams::checksum(uint64_t seed) const seed = maths::CChecksum::calculate(seed, s_LearnRate); seed = maths::CChecksum::calculate(seed, s_DecayRate); seed = maths::CChecksum::calculate(seed, s_InitialDecayRateMultiplier); + seed = maths::CChecksum::calculate(seed, s_MinimumModeFraction); + seed = maths::CChecksum::calculate(seed, s_MinimumModeCount); + seed = maths::CChecksum::calculate(seed, s_CutoffToModelEmptyBuckets); + seed = maths::CChecksum::calculate(seed, s_ComponentSize); + seed = maths::CChecksum::calculate(seed, s_MinimumTimeToDetectChange); + seed = maths::CChecksum::calculate(seed, s_MaximumTimeToTestForChange); seed = maths::CChecksum::calculate(seed, s_ExcludeFrequent); + seed = maths::CChecksum::calculate(seed, s_ExcludePersonFrequency); + seed = maths::CChecksum::calculate(seed, s_ExcludeAttributeFrequency); seed = maths::CChecksum::calculate(seed, s_MaximumUpdatesPerBucket); - seed = maths::CChecksum::calculate(seed, s_TotalProbabilityCalcSamplingSize); seed = maths::CChecksum::calculate(seed, s_InfluenceCutoff); seed = maths::CChecksum::calculate(seed, s_LatencyBuckets); seed = maths::CChecksum::calculate(seed, s_SampleCountFactor); @@ -105,7 +116,10 @@ uint64_t SModelParams::checksum(uint64_t seed) const seed = maths::CChecksum::calculate(seed, s_CorrelationModelsOverhead); seed = maths::CChecksum::calculate(seed, s_MultivariateByFields); seed = maths::CChecksum::calculate(seed, s_MinimumSignificantCorrelation); - return maths::CChecksum::calculate(seed, s_MinimumToDeduplicate); + //seed = maths::CChecksum::calculate(seed, s_DetectionRules); + //seed = maths::CChecksum::calculate(seed, s_ScheduledEvents); + seed = maths::CChecksum::calculate(seed, s_MinimumToDeduplicate); + return maths::CChecksum::calculate(seed, s_SamplingAgeCutoff); } } diff --git a/lib/model/unittest/CModelDetailsViewTest.cc b/lib/model/unittest/CModelDetailsViewTest.cc index cfe1fe2ec5..0926226e34 100644 --- a/lib/model/unittest/CModelDetailsViewTest.cc +++ b/lib/model/unittest/CModelDetailsViewTest.cc @@ -7,6 +7,7 @@ #include "CModelDetailsViewTest.h" #include +#include #include #include @@ -68,7 +69,9 @@ void CModelDetailsViewTest::testModelPlot() maths::CTimeSeriesDecomposition trend; maths::CNormalMeanPrecConjugate prior{ maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData)}; - maths::CModelParams timeSeriesModelParams{bucketLength, 1.0, 0.001, 0.2}; + maths::CModelParams timeSeriesModelParams{bucketLength, 1.0, 0.001, 0.2, + 6 * core::constants::HOUR, + 24 * core::constants::HOUR}; maths::CUnivariateTimeSeriesModel timeSeriesModel{timeSeriesModelParams, 0, trend, prior}; model->mockTimeSeriesModels({model::CMockModel::TMathsModelPtr(timeSeriesModel.clone(0)), model::CMockModel::TMathsModelPtr(timeSeriesModel.clone(1)), diff --git a/lib/model/unittest/CModelToolsTest.cc b/lib/model/unittest/CModelToolsTest.cc index e22da367a6..f7e8622c84 100644 --- a/lib/model/unittest/CModelToolsTest.cc +++ b/lib/model/unittest/CModelToolsTest.cc @@ -7,6 +7,7 @@ #include "CModelToolsTest.h" #include +#include #include #include @@ -41,7 +42,9 @@ maths::CModelParams params(core_t::TTime bucketLength) static TTimeDoubleMap learnRates; learnRates[bucketLength] = static_cast(bucketLength) / 1800.0; double minimumSeasonalVarianceScale{MINIMUM_SEASONAL_SCALE}; - return maths::CModelParams{bucketLength, learnRates[bucketLength], DECAY_RATE, minimumSeasonalVarianceScale}; + return maths::CModelParams{bucketLength, learnRates[bucketLength], + DECAY_RATE, minimumSeasonalVarianceScale, + 6 * core::constants::HOUR, 24 * core::constants::HOUR}; } maths::CNormalMeanPrecConjugate normal(void) diff --git a/lib/model/unittest/CProbabilityAndInfluenceCalculatorTest.cc b/lib/model/unittest/CProbabilityAndInfluenceCalculatorTest.cc index 4e873d0f54..ead1f7fe68 100644 --- a/lib/model/unittest/CProbabilityAndInfluenceCalculatorTest.cc +++ b/lib/model/unittest/CProbabilityAndInfluenceCalculatorTest.cc @@ -7,6 +7,7 @@ #include "CProbabilityAndInfluenceCalculatorTest.h" #include +#include #include #include @@ -82,7 +83,8 @@ maths::CModelParams params(core_t::TTime bucketLength) { double learnRate{static_cast(bucketLength) / 1800.0}; double minimumSeasonalVarianceScale{0.4}; - return maths::CModelParams{bucketLength, learnRate, 0.0, minimumSeasonalVarianceScale}; + return maths::CModelParams{bucketLength, learnRate, 0.0, minimumSeasonalVarianceScale, + 6 * core::constants::HOUR, 24 * core::constants::HOUR}; } std::size_t dimension(double) { return 1; } From 7202a17d128e4620b5cae75fcc0aad3953b770d0 Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Thu, 15 Mar 2018 16:58:20 +0000 Subject: [PATCH 06/29] Less confusing naming of the anomaly score calculation --- include/maths/CTools.h | 12 +++--- lib/api/CResultNormalizer.cc | 2 +- lib/maths/CTimeSeriesModel.cc | 8 ++-- lib/maths/CTools.cc | 38 +++++++++---------- lib/maths/unittest/CToolsTest.cc | 12 +++--- lib/maths/unittest/CToolsTest.h | 2 +- lib/maths/unittest/TestUtils.cc | 2 +- lib/model/CAnomalyScore.cc | 4 +- lib/model/CHierarchicalResultsAggregator.cc | 2 +- lib/model/CHierarchicalResultsNormalizer.cc | 2 +- ...HierarchicalResultsProbabilityFinalizer.cc | 2 +- 11 files changed, 43 insertions(+), 43 deletions(-) diff --git a/include/maths/CTools.h b/include/maths/CTools.h index 4b9f3bdb85..77dd73d8f8 100644 --- a/include/maths/CTools.h +++ b/include/maths/CTools.h @@ -371,9 +371,9 @@ class MATHS_EXPORT CTools : private core::CNonInstantiatable static double safeCdfComplement(const chi_squared &chi2, double x); //@} - //! Compute the deviation from the probability of seeing a more - //! extreme event for a distribution, i.e. for a sample \f$x\f$ - //! from a R.V. the probability \f$P(R)\f$ of the set: + //! Compute the anomalousness from the probability of seeing a + //! more extreme event for a distribution, i.e. for a sample + //! \f$x\f$ from a R.V. the probability \f$P(R)\f$ of the set: //!
         //!   \f$ R = \{y\ |\ f(y) \leq f(x)\} \f$
         //! 
@@ -382,10 +382,10 @@ class MATHS_EXPORT CTools : private core::CNonInstantiatable //! This is a monotonically decreasing function of \f$P(R)\f$ and //! is chosen so that for \f$P(R)\f$ near one it is zero and as //! \f$P(R) \rightarrow 0\f$ it saturates at 100. - static double deviation(double p); + static double anomalyScore(double p); - //! The inverse of the deviation function. - static double inverseDeviation(double deviation); + //! The inverse of the anomalyScore function. + static double inverseAnomalyScore(double deviation); //! \name Differential Entropy //! Compute the differential entropy of the specified distribution.\n\n diff --git a/lib/api/CResultNormalizer.cc b/lib/api/CResultNormalizer.cc index 5e5ffba5f8..915df63623 100644 --- a/lib/api/CResultNormalizer.cc +++ b/lib/api/CResultNormalizer.cc @@ -107,7 +107,7 @@ bool CResultNormalizer::handleRecord(const TStrStrUMap &dataRowFields) { const model::CAnomalyScore::CNormalizer *levelNormalizer = 0; double score = probability > m_ModelConfig.maximumAnomalousProbability() ? - 0.0 : maths::CTools::deviation(probability); + 0.0 : maths::CTools::anomalyScore(probability); if (level == ROOT_LEVEL) { levelNormalizer = &m_Normalizer.bucketNormalizer(); diff --git a/lib/maths/CTimeSeriesModel.cc b/lib/maths/CTimeSeriesModel.cc index 25efa87f7f..e20aa75a9c 100644 --- a/lib/maths/CTimeSeriesModel.cc +++ b/lib/maths/CTimeSeriesModel.cc @@ -554,14 +554,14 @@ void CTimeSeriesAnomalyModel::probability(const CModelProbabilityParams ¶ms, double alpha{0.5 * std::min( (logp - LOG_LARGEST_ANOMALOUS_PROBABILITY) / (LOG_SMALL_PROBABILITY - LOG_LARGEST_ANOMALOUS_PROBABILITY), 1.0)}; double pGivenAnomalous{(pl + pu) / 2.0}; - double pScore{CTools::deviation(probability)}; - double pScoreGivenAnomalous{CTools::deviation(pGivenAnomalous)}; + double pScore{CTools::anomalyScore(probability)}; + double pScoreGivenAnomalous{CTools::anomalyScore(pGivenAnomalous)}; LOG_TRACE("features = " << features << " score(.) = " << pScore << " score(.|anomalous) = " << pScoreGivenAnomalous << " p = " << probability); - probability = std::min(CTools::inverseDeviation( (1.0 - alpha) * pScore - + alpha * pScoreGivenAnomalous), + probability = std::min(CTools::inverseAnomalyScore( (1.0 - alpha) * pScore + + alpha * pScoreGivenAnomalous), LARGEST_ANOMALOUS_PROBABILITY); } } diff --git a/lib/maths/CTools.cc b/lib/maths/CTools.cc index 9bf2700290..4d66a07080 100644 --- a/lib/maths/CTools.cc +++ b/lib/maths/CTools.cc @@ -2068,16 +2068,16 @@ double CTools::safeCdfComplement(const chi_squared &chi2, double x) namespace { -const double SMALL_PROBABILITY_DEVIATION = 1.0; -const double MINUSCULE_PROBABILITY_DEVIATION = 50.0; -const double MAX_DEVIATION = 100.0; +const double SMALL_PROBABILITY_ANOMALY_SCORE = 1.0; +const double MINUSCULE_PROBABILITY_ANOMALY_SCORE = 50.0; +const double MAX_ANOMALY_SCORE = 100.0; const double INV_LARGEST_SIGNIFICANT_PROBABILITY = 1.0 / LARGEST_SIGNIFICANT_PROBABILITY; const double INV_SMALL_PROBABILITY = 1.0 / SMALL_PROBABILITY; const double MINUS_LOG_SMALL_PROBABILITY = -std::log(SMALL_PROBABILITY); const double MINUS_LOG_MINUSCULE_PROBABILITY = -std::log(MINUSCULE_PROBABILITY); } -double CTools::deviation(double p) +double CTools::anomalyScore(double p) { const double MINUS_LOG_SMALLEST_PROBABILITY = -std::log(smallestProbability()); @@ -2090,7 +2090,7 @@ double CTools::deviation(double p) { // We use a linear scaling based on the inverse probability // into the range (0.0, 1.0]. - result = SMALL_PROBABILITY_DEVIATION + result = SMALL_PROBABILITY_ANOMALY_SCORE * (1.0 / adjP - INV_LARGEST_SIGNIFICANT_PROBABILITY) / (INV_SMALL_PROBABILITY - INV_LARGEST_SIGNIFICANT_PROBABILITY); } @@ -2098,8 +2098,8 @@ double CTools::deviation(double p) { // We use a linear scaling based on the log probability into // the range (1.0, 50.0]. - result = SMALL_PROBABILITY_DEVIATION - + (MINUSCULE_PROBABILITY_DEVIATION - SMALL_PROBABILITY_DEVIATION) + result = SMALL_PROBABILITY_ANOMALY_SCORE + + (MINUSCULE_PROBABILITY_ANOMALY_SCORE - SMALL_PROBABILITY_ANOMALY_SCORE) * (-std::log(adjP) - MINUS_LOG_SMALL_PROBABILITY) / (MINUS_LOG_MINUSCULE_PROBABILITY - MINUS_LOG_SMALL_PROBABILITY); } @@ -2107,14 +2107,14 @@ double CTools::deviation(double p) { // We use a linear scaling based on the log probability into // the range (50.0, 100.0]. - result = MINUSCULE_PROBABILITY_DEVIATION - + (MAX_DEVIATION - MINUSCULE_PROBABILITY_DEVIATION) + result = MINUSCULE_PROBABILITY_ANOMALY_SCORE + + (MAX_ANOMALY_SCORE - MINUSCULE_PROBABILITY_ANOMALY_SCORE) * (-std::log(adjP) - MINUS_LOG_MINUSCULE_PROBABILITY) / (MINUS_LOG_SMALLEST_PROBABILITY - MINUS_LOG_MINUSCULE_PROBABILITY); } } - if (!(result >= 0.0 && result <= MAX_DEVIATION)) + if (!(result >= 0.0 && result <= MAX_ANOMALY_SCORE)) { LOG_ERROR("Deviation " << result << " out of range, p =" << p); } @@ -2122,34 +2122,34 @@ double CTools::deviation(double p) return result; } -double CTools::inverseDeviation(double deviation) +double CTools::inverseAnomalyScore(double deviation) { const double MINUS_LOG_SMALLEST_PROBABILITY = -std::log(smallestProbability()); double result = 0.0; - double adjDeviation = truncate(deviation, 0.0, MAX_DEVIATION); + double adjDeviation = truncate(deviation, 0.0, MAX_ANOMALY_SCORE); if (adjDeviation == 0.0) { result = (1.0 + LARGEST_SIGNIFICANT_PROBABILITY) / 2.0; } - else if (adjDeviation <= SMALL_PROBABILITY_DEVIATION) + else if (adjDeviation <= SMALL_PROBABILITY_ANOMALY_SCORE) { // We invert the linear scaling of the inverse probability // into the range (0.0, 1.0]. result = 1.0 / (INV_LARGEST_SIGNIFICANT_PROBABILITY + (INV_SMALL_PROBABILITY - INV_LARGEST_SIGNIFICANT_PROBABILITY) * deviation - / SMALL_PROBABILITY_DEVIATION); + / SMALL_PROBABILITY_ANOMALY_SCORE); } - else if (adjDeviation <= MINUSCULE_PROBABILITY_DEVIATION) + else if (adjDeviation <= MINUSCULE_PROBABILITY_ANOMALY_SCORE) { // We invert the linear scaling of the log probability // into the range (1.0, 50.0]. result = ::exp(-(MINUS_LOG_SMALL_PROBABILITY + (MINUS_LOG_MINUSCULE_PROBABILITY - MINUS_LOG_SMALL_PROBABILITY) - * (deviation - SMALL_PROBABILITY_DEVIATION) - / (MINUSCULE_PROBABILITY_DEVIATION - SMALL_PROBABILITY_DEVIATION))); + * (deviation - SMALL_PROBABILITY_ANOMALY_SCORE) + / (MINUSCULE_PROBABILITY_ANOMALY_SCORE - SMALL_PROBABILITY_ANOMALY_SCORE))); } else { @@ -2157,8 +2157,8 @@ double CTools::inverseDeviation(double deviation) // into the range (50.0, 100.0]. result = ::exp(-(MINUS_LOG_MINUSCULE_PROBABILITY + (MINUS_LOG_SMALLEST_PROBABILITY - MINUS_LOG_MINUSCULE_PROBABILITY) - * (deviation - MINUSCULE_PROBABILITY_DEVIATION) - / (MAX_DEVIATION - MINUSCULE_PROBABILITY_DEVIATION))); + * (deviation - MINUSCULE_PROBABILITY_ANOMALY_SCORE) + / (MAX_ANOMALY_SCORE - MINUSCULE_PROBABILITY_ANOMALY_SCORE))); } if (!(result >= 0.0 && result <= 1.0)) diff --git a/lib/maths/unittest/CToolsTest.cc b/lib/maths/unittest/CToolsTest.cc index 87395c1db1..f7e0e16b76 100644 --- a/lib/maths/unittest/CToolsTest.cc +++ b/lib/maths/unittest/CToolsTest.cc @@ -1130,7 +1130,7 @@ void CToolsTest::testMixtureProbabilityOfLessLikelySample(void) } } -void CToolsTest::testDeviation(void) +void CToolsTest::testAnomalyScore(void) { LOG_DEBUG("+-----------------------------+"); LOG_DEBUG("| CToolsTest::testDeviation |"); @@ -1141,9 +1141,9 @@ void CToolsTest::testDeviation(void) double p = 0.04; for (std::size_t i = 0u; i < 305; ++i, p *= 0.1) { - double deviation = CTools::deviation(p); - LOG_DEBUG("p = " << p << ", deviation = " << deviation); - CPPUNIT_ASSERT_DOUBLES_EQUAL(p, CTools::inverseDeviation(deviation), 1e-3 * p); + double anomalyScore = CTools::anomalyScore(p); + LOG_DEBUG("p = " << p << ", anomalyScore = " << anomalyScore); + CPPUNIT_ASSERT_DOUBLES_EQUAL(p, CTools::inverseAnomalyScore(anomalyScore), 1e-3 * p); } } @@ -1328,8 +1328,8 @@ CppUnit::Test *CToolsTest::suite(void) "CToolsTest::testMixtureProbabilityOfLessLikelySample", &CToolsTest::testMixtureProbabilityOfLessLikelySample) ); suiteOfTests->addTest( new CppUnit::TestCaller( - "CToolsTest::testDeviation", - &CToolsTest::testDeviation) ); + "CToolsTest::testAnomalyScore", + &CToolsTest::testAnomalyScore) ); suiteOfTests->addTest( new CppUnit::TestCaller( "CToolsTest::testSpread", &CToolsTest::testSpread) ); diff --git a/lib/maths/unittest/CToolsTest.h b/lib/maths/unittest/CToolsTest.h index 1c4dd3b410..8b877c0ac8 100644 --- a/lib/maths/unittest/CToolsTest.h +++ b/lib/maths/unittest/CToolsTest.h @@ -15,7 +15,7 @@ class CToolsTest : public CppUnit::TestFixture void testProbabilityOfLessLikelySample(void); void testIntervalExpectation(void); void testMixtureProbabilityOfLessLikelySample(void); - void testDeviation(void); + void testAnomalyScore(void); void testSpread(void); void testFastLog(void); void testMiscellaneous(void); diff --git a/lib/maths/unittest/TestUtils.cc b/lib/maths/unittest/TestUtils.cc index 46f17301ce..f046fbb978 100644 --- a/lib/maths/unittest/TestUtils.cc +++ b/lib/maths/unittest/TestUtils.cc @@ -203,7 +203,7 @@ bool CPriorTestInterface::anomalyScore(maths_t::EProbabilityCalculation calculat return false; } - result = CTools::deviation((lowerBound + upperBound) / 2.0); + result = CTools::anomalyScore((lowerBound + upperBound) / 2.0); return true; } diff --git a/lib/model/CAnomalyScore.cc b/lib/model/CAnomalyScore.cc index bab954cc5f..3597263600 100644 --- a/lib/model/CAnomalyScore.cc +++ b/lib/model/CAnomalyScore.cc @@ -66,13 +66,13 @@ std::size_t addProbabilities(const TDoubleVec &probabilities, //! The function to convert probabilities to *raw* scores. double probabilityToScore(double probability) { - return maths::CTools::deviation(probability); + return maths::CTools::anomalyScore(probability); } //! The function to convert *raw* scores to probabilities. double scoreToProbability(double score) { - return maths::CTools::inverseDeviation(score); + return maths::CTools::inverseAnomalyScore(score); } // We use short field names to reduce the state size diff --git a/lib/model/CHierarchicalResultsAggregator.cc b/lib/model/CHierarchicalResultsAggregator.cc index 0cee80c669..eafdf0507f 100644 --- a/lib/model/CHierarchicalResultsAggregator.cc +++ b/lib/model/CHierarchicalResultsAggregator.cc @@ -227,7 +227,7 @@ void CHierarchicalResultsAggregator::aggregateLeaf(const TNode &node) node.s_AggregationStyle = style; node.s_SmallestChildProbability = probability; node.s_SmallestDescendantProbability = probability; - node.s_RawAnomalyScore = maths::CTools::deviation(probability); + node.s_RawAnomalyScore = maths::CTools::anomalyScore(probability); } void CHierarchicalResultsAggregator::aggregateNode(const TNode &node, bool pivot) diff --git a/lib/model/CHierarchicalResultsNormalizer.cc b/lib/model/CHierarchicalResultsNormalizer.cc index c50b2d7cf3..7df7380433 100644 --- a/lib/model/CHierarchicalResultsNormalizer.cc +++ b/lib/model/CHierarchicalResultsNormalizer.cc @@ -143,7 +143,7 @@ void CHierarchicalResultsNormalizer::visit(const CHierarchicalResults &/*results // scaled so that it sums to the bucket anomaly score. double score = node.probability() > m_ModelConfig.maximumAnomalousProbability() ? 0.0 : - maths::CTools::deviation(node.probability()); + maths::CTools::anomalyScore(node.probability()); switch (m_Job) { diff --git a/lib/model/CHierarchicalResultsProbabilityFinalizer.cc b/lib/model/CHierarchicalResultsProbabilityFinalizer.cc index de5e833d47..540e9fe292 100644 --- a/lib/model/CHierarchicalResultsProbabilityFinalizer.cc +++ b/lib/model/CHierarchicalResultsProbabilityFinalizer.cc @@ -19,7 +19,7 @@ void CHierarchicalResultsProbabilityFinalizer::visit(const CHierarchicalResults { if (node.s_RawAnomalyScore > 0.0) { - node.s_AnnotatedProbability.s_Probability = maths::CTools::inverseDeviation(node.s_RawAnomalyScore); + node.s_AnnotatedProbability.s_Probability = maths::CTools::inverseAnomalyScore(node.s_RawAnomalyScore); } } From 6b420dccde935ba183d060c8a816680df6076cf5 Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Tue, 20 Mar 2018 09:50:41 +0000 Subject: [PATCH 07/29] Fix name change fallout --- lib/model/unittest/CHierarchicalResultsTest.cc | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/lib/model/unittest/CHierarchicalResultsTest.cc b/lib/model/unittest/CHierarchicalResultsTest.cc index 7262ec25f2..3fe42697c6 100644 --- a/lib/model/unittest/CHierarchicalResultsTest.cc +++ b/lib/model/unittest/CHierarchicalResultsTest.cc @@ -293,8 +293,8 @@ class CCheckScores : public model::CHierarchicalResultsVisitor { LOG_DEBUG(node.s_Spec.print() << " score = " << node.s_RawAnomalyScore - << ", expected score = " << maths::CTools::deviation(node.probability())); - CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CTools::deviation(node.probability()), + << ", expected score = " << maths::CTools::anomalyScore(node.probability())); + CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CTools::anomalyScore(node.probability()), node.s_RawAnomalyScore, 1e-10); } @@ -1609,7 +1609,7 @@ void CHierarchicalResultsTest::testNormalizer(void) double probability = extract.leafNodes()[j]->probability(); // This truncation condition needs to be kept the same as the one in CHierarchicalResultsNormalizer::visit() double score = probability > modelConfig.maximumAnomalousProbability() ? - 0.0 : maths::CTools::deviation(probability); + 0.0 : maths::CTools::anomalyScore(probability); itr->second->updateQuantiles(score); } for (std::size_t j = 0u; j < extract.leafNodes().size(); ++j) @@ -1622,7 +1622,7 @@ void CHierarchicalResultsTest::testNormalizer(void) double probability = extract.leafNodes()[j]->probability(); // This truncation condition needs to be kept the same as the one in CHierarchicalResultsNormalizer::visit() double score = probability > modelConfig.maximumAnomalousProbability() ? - 0.0 : maths::CTools::deviation(probability); + 0.0 : maths::CTools::anomalyScore(probability); normalized.push_back(extract.leafNodes()[j]->s_NormalizedAnomalyScore); CPPUNIT_ASSERT(itr->second->normalize(score)); expectedNormalized.push_back(score); @@ -1650,7 +1650,7 @@ void CHierarchicalResultsTest::testNormalizer(void) double probability = extract.personNodes()[j]->probability(); // This truncation condition needs to be kept the same as the one in CHierarchicalResultsNormalizer::visit() double score = probability > modelConfig.maximumAnomalousProbability() ? - 0.0 : maths::CTools::deviation(probability); + 0.0 : maths::CTools::anomalyScore(probability); itr->second->updateQuantiles(score); } for (std::size_t j = 0u; j < extract.personNodes().size(); ++j) @@ -1663,7 +1663,7 @@ void CHierarchicalResultsTest::testNormalizer(void) double probability = extract.personNodes()[j]->probability(); // This truncation condition needs to be kept the same as the one in CHierarchicalResultsNormalizer::visit() double score = probability > modelConfig.maximumAnomalousProbability() ? - 0.0 : maths::CTools::deviation(probability); + 0.0 : maths::CTools::anomalyScore(probability); normalized.push_back(extract.personNodes()[j]->s_NormalizedAnomalyScore); CPPUNIT_ASSERT(itr->second->normalize(score)); expectedNormalized.push_back(score); @@ -1690,7 +1690,7 @@ void CHierarchicalResultsTest::testNormalizer(void) double probability = extract.partitionNodes()[j]->probability(); // This truncation condition needs to be kept the same as the one in CHierarchicalResultsNormalizer::visit() double score = probability > modelConfig.maximumAnomalousProbability() ? - 0.0 : maths::CTools::deviation(probability); + 0.0 : maths::CTools::anomalyScore(probability); itr->second->updateQuantiles(score); } for (std::size_t j = 0u; j < extract.partitionNodes().size(); ++j) @@ -1702,7 +1702,7 @@ void CHierarchicalResultsTest::testNormalizer(void) double probability = extract.partitionNodes()[j]->probability(); // This truncation condition needs to be kept the same as the one in CHierarchicalResultsNormalizer::visit() double score = probability > modelConfig.maximumAnomalousProbability() ? - 0.0 : maths::CTools::deviation(probability); + 0.0 : maths::CTools::anomalyScore(probability); normalized.push_back(extract.partitionNodes()[j]->s_NormalizedAnomalyScore); CPPUNIT_ASSERT(itr->second->normalize(score)); expectedNormalized.push_back(score); @@ -1717,7 +1717,7 @@ void CHierarchicalResultsTest::testNormalizer(void) double probability = results.root()->probability(); // This truncation condition needs to be kept the same as the one in CHierarchicalResultsNormalizer::visit() double score = probability > modelConfig.maximumAnomalousProbability() ? - 0.0 : maths::CTools::deviation(probability); + 0.0 : maths::CTools::anomalyScore(probability); expectedNormalizers.find(std::string("r"))->second->updateQuantiles(score); expectedNormalizers.find(std::string("r"))->second->normalize(score); From 69a96ef7ba26175dbf861965ae5347ffffd77324 Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Tue, 20 Mar 2018 11:08:49 +0000 Subject: [PATCH 08/29] Switch to (more standard) logistic function --- include/maths/CTools.h | 13 ++++--------- lib/maths/CModel.cc | 6 +++--- lib/maths/CNaiveBayes.cc | 2 +- lib/maths/CSeasonalComponent.cc | 2 +- lib/maths/CSeasonalComponentAdaptiveBucketing.cc | 2 +- lib/maths/CTimeSeriesChangeDetector.cc | 4 ++-- lib/maths/CTrendComponent.cc | 2 +- lib/maths/unittest/CTimeSeriesDecompositionTest.cc | 10 +++++----- lib/maths/unittest/CTimeSeriesModelTest.cc | 2 +- lib/maths/unittest/Main.cc | 2 +- 10 files changed, 20 insertions(+), 25 deletions(-) diff --git a/include/maths/CTools.h b/include/maths/CTools.h index 77dd73d8f8..6b7b03d829 100644 --- a/include/maths/CTools.h +++ b/include/maths/CTools.h @@ -729,22 +729,17 @@ class MATHS_EXPORT CTools : private core::CNonInstantiatable return 1.0 / (1.0 + 1.0 / p); } - //! A smooth Heaviside function. + //! The logistic function. //! - //! This is a smooth version of the Heaviside function implemented - //! as \f$sigmoid\left(\frac{sign (x - 1)}{wb}\right)\f$ normalized - //! to the range [0, 1], where \f$b\f$ is \p boundary and \f$w\f$ - //! is \p width. Note, if \p sign is one this is a step up and if - //! it is -1 it is a step down. + //! i.e. \f$sigmoid\left(\frac{sign (x - x0)}{width}\right)\f$. //! //! \param[in] x The argument. //! \param[in] width The step width. //! \param[in] x0 The centre of the step. //! \param[in] sign Determines whether it's a step up or down. - static double smoothHeaviside(double x, double width, double x0 = 0.0, double sign = 1.0) + static double logisticFunction(double x, double width, double x0 = 0.0, double sign = 1.0) { - return sigmoid(std::exp(sign * (x - x0) / width)) - / sigmoid(std::exp(1.0 / width)); + return sigmoid(std::exp(sign / std::fabs(sign) * (x - x0) / width)); } //! A custom, numerically robust, implementation of \f$(1 - x) ^ p\f$. diff --git a/lib/maths/CModel.cc b/lib/maths/CModel.cc index a6b7af772b..9a8b09bfe6 100644 --- a/lib/maths/CModel.cc +++ b/lib/maths/CModel.cc @@ -122,9 +122,9 @@ core_t::TTime CModelParams::minimumTimeToDetectChange(core_t::TTime timeSinceLas { // If there was a recent change then there is a chance that this is // a reversion of the previous change. We reversions to occur faster. - double revertFactor{CTools::smoothHeaviside( static_cast(timeSinceLastChangePoint) - / static_cast(m_MaximumTimeToTestForChange), - 0.1, 1.0)}; + double revertFactor{CTools::logisticFunction( static_cast(timeSinceLastChangePoint) + / static_cast(m_MaximumTimeToTestForChange), + 0.1, 1.0)}; return static_cast(std::ceil( (0.3 + 0.7 * revertFactor) * static_cast(m_MinimumTimeToDetectChange))); } diff --git a/lib/maths/CNaiveBayes.cc b/lib/maths/CNaiveBayes.cc index 4e38fdadd8..2ae52c8442 100644 --- a/lib/maths/CNaiveBayes.cc +++ b/lib/maths/CNaiveBayes.cc @@ -343,7 +343,7 @@ CNaiveBayes::TDoubleSizePrVec CNaiveBayes::classProbabilities(const TDouble1VecV double weight{1.0}; if (m_MinMaxLogLikelihoodToUseFeature) { - weight = CTools::smoothHeaviside( + weight = CTools::logisticFunction( (maxLogLikelihood[0] - *m_MinMaxLogLikelihoodToUseFeature) / std::fabs(*m_MinMaxLogLikelihoodToUseFeature), 0.1); } diff --git a/lib/maths/CSeasonalComponent.cc b/lib/maths/CSeasonalComponent.cc index f580269de9..091cfbb273 100644 --- a/lib/maths/CSeasonalComponent.cc +++ b/lib/maths/CSeasonalComponent.cc @@ -259,7 +259,7 @@ double CSeasonalComponent::delta(core_t::TTime time, // a delta for the case that the difference from the mean // is 1/3 of the range. We force the delta to zero for values // significantly smaller than this. - double scale{CTools::smoothHeaviside(3.0 * min[0] / minmax.range(), 0.1, 1.0)}; + double scale{CTools::logisticFunction(3.0 * min[0] / minmax.range(), 0.1, 1.0)}; scale = CTools::truncate(1.002 * scale - 0.001, 0.0, 1.0); return -scale * min[0] * CTools::sign(shortPeriodValue); diff --git a/lib/maths/CSeasonalComponentAdaptiveBucketing.cc b/lib/maths/CSeasonalComponentAdaptiveBucketing.cc index afe1a2dc0f..73265d0f3b 100644 --- a/lib/maths/CSeasonalComponentAdaptiveBucketing.cc +++ b/lib/maths/CSeasonalComponentAdaptiveBucketing.cc @@ -646,7 +646,7 @@ double CSeasonalComponentAdaptiveBucketing::predict(std::size_t bucket, core_t:: // We mean revert our predictions if trying to predict much further // ahead than the observed interval for the data. - double alpha{CTools::smoothHeaviside(extrapolateInterval / interval, 0.1, 1.0, -1.0)}; + double alpha{CTools::logisticFunction(extrapolateInterval / interval, 0.1, 1.0, -1.0)}; double beta{1.0 - alpha}; return alpha * regression.predict(t) + beta * regression.mean(); } diff --git a/lib/maths/CTimeSeriesChangeDetector.cc b/lib/maths/CTimeSeriesChangeDetector.cc index 1a2f6282c8..fc2b067a06 100644 --- a/lib/maths/CTimeSeriesChangeDetector.cc +++ b/lib/maths/CTimeSeriesChangeDetector.cc @@ -175,8 +175,8 @@ bool CUnivariateTimeSeriesChangeDetector::stopTesting() const core_t::TTime range{m_TimeRange.range()}; if (range > m_MinimumTimeToDetect) { - double scale{0.5 + CTools::smoothHeaviside(2.0 * m_CurrentEvidenceOfChange - / m_MinimumDeltaBicToDetect, 0.2, 1.0)}; + double scale{0.5 + CTools::logisticFunction(2.0 * m_CurrentEvidenceOfChange + / m_MinimumDeltaBicToDetect, 0.2, 1.0)}; return static_cast(range) > m_MinimumTimeToDetect + scale * static_cast( m_MaximumTimeToDetect - m_MinimumTimeToDetect); diff --git a/lib/maths/CTrendComponent.cc b/lib/maths/CTrendComponent.cc index e522eca736..70cb6825be 100644 --- a/lib/maths/CTrendComponent.cc +++ b/lib/maths/CTrendComponent.cc @@ -618,7 +618,7 @@ double CTrendComponent::weightOfPrediction(core_t::TTime time) const return 1.0; } - return CTools::smoothHeaviside(extrapolateInterval / interval, 0.1, 1.0, -1.0); + return CTools::logisticFunction(extrapolateInterval / interval, 0.1, 1.0, -1.0); } CTrendComponent::SModel::SModel(double weight) diff --git a/lib/maths/unittest/CTimeSeriesDecompositionTest.cc b/lib/maths/unittest/CTimeSeriesDecompositionTest.cc index 63b5636b54..4bf9c88fc4 100644 --- a/lib/maths/unittest/CTimeSeriesDecompositionTest.cc +++ b/lib/maths/unittest/CTimeSeriesDecompositionTest.cc @@ -161,8 +161,8 @@ void CTimeSeriesDecompositionTest::testSuperpositionOfSines(void) //file << "plot(t(1:length(fe)), fe, 'r');\n"; //file << "plot(t(1:length(r)), r, 'k');\n"; - CPPUNIT_ASSERT(totalSumResidual < 0.018 * totalSumValue); - CPPUNIT_ASSERT(totalMaxResidual < 0.021 * totalMaxValue); + CPPUNIT_ASSERT(totalSumResidual < 0.019 * totalSumValue); + CPPUNIT_ASSERT(totalMaxResidual < 0.020 * totalMaxValue); CPPUNIT_ASSERT(totalPercentileError < 0.01 * totalSumValue); } @@ -1751,7 +1751,7 @@ void CTimeSeriesDecompositionTest::testLongTermTrendAndPeriodicity(void) totalMaxValue += maxValue; CPPUNIT_ASSERT(sumResidual / sumValue < 0.4); - CPPUNIT_ASSERT(maxResidual / maxValue < 0.4); + CPPUNIT_ASSERT(maxResidual / maxValue < 0.45); } lastDay += DAY; } @@ -2143,9 +2143,9 @@ void CTimeSeriesDecompositionTest::testConditionOfTrend(void) maths::CTimeSeriesDecomposition decomposition(0.0005, bucketLength); TDoubleVec noise; - for (core_t::TTime time = 0; time < 10 * YEAR; time += 6 * HOUR) + for (core_t::TTime time = 0; time < 9 * YEAR; time += 6 * HOUR) { - rng.generateNormalSamples(0.0, 3.0, 1, noise); + rng.generateNormalSamples(0.0, 4.0, 1, noise); decomposition.addPoint(time, trend(time) + noise[0]); if (time > 10 * WEEK) { diff --git a/lib/maths/unittest/CTimeSeriesModelTest.cc b/lib/maths/unittest/CTimeSeriesModelTest.cc index 8c7f7f088c..b4c1353eb9 100644 --- a/lib/maths/unittest/CTimeSeriesModelTest.cc +++ b/lib/maths/unittest/CTimeSeriesModelTest.cc @@ -2297,7 +2297,7 @@ void CTimeSeriesModelTest::testStepChangeDiscontinuities(void) } double percentageOutOfBounds{100.0 * outOfBounds / static_cast(forecast.size())}; LOG_DEBUG("% out-of-bounds = " << percentageOutOfBounds); - CPPUNIT_ASSERT(percentageOutOfBounds < 10.0); + CPPUNIT_ASSERT(percentageOutOfBounds < 11.0); } } diff --git a/lib/maths/unittest/Main.cc b/lib/maths/unittest/Main.cc index 851d04ac80..25fe8c5365 100644 --- a/lib/maths/unittest/Main.cc +++ b/lib/maths/unittest/Main.cc @@ -150,10 +150,10 @@ int main(int argc, const char **argv) runner.addTest( CSolversTest::suite() ); runner.addTest( CSplineTest::suite() ); runner.addTest( CStatisticalTestsTest::suite() ); + runner.addTest( CTimeSeriesChangeDetectorTest::suite() ); runner.addTest( CTimeSeriesDecompositionTest::suite() ); runner.addTest( CTimeSeriesModelTest::suite() ); runner.addTest( CToolsTest::suite() ); - runner.addTest( CTimeSeriesChangeDetectorTest::suite() ); runner.addTest( CTrendComponentTest::suite() ); runner.addTest( CTrendTestsTest::suite() ); runner.addTest( CXMeansTest::suite() ); From 4bc1e53c4185c900b46931c2489bb724b1d7abda Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Wed, 21 Mar 2018 09:40:51 +0000 Subject: [PATCH 09/29] Tidy up expectation w.r.t. marginal likelihood --- include/maths/CPriorDetail.h | 29 +++++++---------------------- 1 file changed, 7 insertions(+), 22 deletions(-) diff --git a/include/maths/CPriorDetail.h b/include/maths/CPriorDetail.h index a1cbce2d7b..d8cd144ced 100644 --- a/include/maths/CPriorDetail.h +++ b/include/maths/CPriorDetail.h @@ -13,21 +13,6 @@ namespace ml namespace maths { -//! Compute the expectation of the specified function w.r.t. to the marginal -//! likelihood. -//! -//! This computes the expectation using order three Gauss-Legendre quadrature -//! in \p numberIntervals subdivisions of a high confidence interval for the -//! marginal likelihood. -//! -//! \param f The function to integrate. -//! \param numberIntervals The number intervals to use for integration. -//! \param result Filled in with the result if the expectation could be calculated. -//! -//! \tparam F This must conform to the function type expected by -//! CIntegration::gaussLegendre. -//! \tparam T The return type of the function F which must conform to the type -//! expected by CIntegration::gaussLegendre. template bool CPrior::expectation(const F &f, std::size_t numberIntervals, @@ -43,16 +28,16 @@ bool CPrior::expectation(const F &f, result = T(); - double n = static_cast(numberIntervals); - TDoubleDoublePr interval = + double n{static_cast(numberIntervals)}; + TDoubleDoublePr interval{ this->marginalLikelihoodConfidenceInterval(100.0 - 1.0 / (100.0 * n), weightStyles, - weight); - double x = interval.first; - double dx = (interval.second - interval.first) / n; + weight)}; + double x{interval.first}; + double dx{(interval.second - interval.first) / n}; - double normalizationFactor = 0.0; - TDouble4Vec1Vec weights(1, weight); + double normalizationFactor{0.0}; + TDouble4Vec1Vec weights{weight}; CPrior::CLogMarginalLikelihood logLikelihood(*this, weightStyles, weights); CCompositeFunctions::CExp likelihood(logLikelihood); for (std::size_t i = 0u; i < numberIntervals; ++i, x += dx) From 893f0b26f8e1abe1a815c12d300de14478910050 Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Fri, 23 Mar 2018 15:34:01 +0000 Subject: [PATCH 10/29] Bad merge --- lib/maths/CTimeSeriesModel.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/maths/CTimeSeriesModel.cc b/lib/maths/CTimeSeriesModel.cc index c6b8af3da5..65042756ae 100644 --- a/lib/maths/CTimeSeriesModel.cc +++ b/lib/maths/CTimeSeriesModel.cc @@ -2835,7 +2835,7 @@ const CMultivariateTimeSeriesModel::TTimeDouble2VecPrCBuf &CMultivariateTimeSeri return m_SlidingWindow; } -const CMultivariateTimeSeriesModel::TDecompositionPtr10Vec &CMultivariateTimeSeriesModel::trend(void) const +const CMultivariateTimeSeriesModel::TDecompositionPtr10Vec &CMultivariateTimeSeriesModel::trendModel(void) const { return m_TrendModel; } From 447f31ab10fa3965166fd55adb27b179f2fccbc4 Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Mon, 26 Mar 2018 14:58:57 +0100 Subject: [PATCH 11/29] [ML] Implements an absolute goodness-of-fit test to accept a change (#21) This implements an absolute "goodness-of-fit" test for each change, by additionally testing a change versus its expected BIC given the residual distribution. It means we will only accept changes if they are a reasonably accurate description of the change currently occurring in the time series. --- include/maths/CTimeSeriesChangeDetector.h | 23 +++- lib/maths/CTimeSeriesChangeDetector.cc | 148 +++++++++++++++++----- 2 files changed, 136 insertions(+), 35 deletions(-) diff --git a/include/maths/CTimeSeriesChangeDetector.h b/include/maths/CTimeSeriesChangeDetector.h index 55659e361d..6d3cac3be0 100644 --- a/include/maths/CTimeSeriesChangeDetector.h +++ b/include/maths/CTimeSeriesChangeDetector.h @@ -90,7 +90,7 @@ class MATHS_EXPORT CUnivariateTimeSeriesChangeDetector const TPriorPtr &residualModel, core_t::TTime minimumTimeToDetect = 6 * core::constants::HOUR, core_t::TTime maximumTimeToDetect = core::constants::DAY, - double minimumDeltaBicToDetect = 12.0); + double minimumDeltaBicToDetect = 14.0); //! Initialize by reading state from \p traverser. bool acceptRestoreTraverser(const SModelRestoreParams ¶ms, @@ -182,6 +182,9 @@ class MATHS_EXPORT CUnivariateChangeModel : private core::CNonCopyable //! The BIC of applying the change. virtual double bic() const = 0; + //! The expected BIC of applying the change. + virtual double expectedBic() const = 0; + //! Get a description of the change. virtual TOptionalChangeDescription change() const = 0; @@ -214,10 +217,14 @@ class MATHS_EXPORT CUnivariateChangeModel : private core::CNonCopyable //! Get the log-likelihood. double logLikelihood() const; - //! Update the data log-likelihood with \p logLikelihood. void addLogLikelihood(double logLikelihood); + //! Get the expected log-likelihood. + double expectedLogLikelihood() const; + //! Update the expected data log-likelihood with \p logLikelihood. + void addExpectedLogLikelihood(double logLikelihood); + //! Get the time series trend model. const CTimeSeriesDecompositionInterface &trendModel() const; //! Get the time series trend model. @@ -234,6 +241,9 @@ class MATHS_EXPORT CUnivariateChangeModel : private core::CNonCopyable //! The likelihood of the data under this model. double m_LogLikelihood; + //! The expected log-likelihood of the data under this model. + double m_ExpectedLogLikelihood; + //! A model decomposing the time series trend. TDecompositionPtr m_TrendModel; @@ -258,6 +268,9 @@ class MATHS_EXPORT CUnivariateNoChangeModel final : public CUnivariateChangeMode //! Returns the no change BIC. virtual double bic() const; + //! The expected BIC of applying the change. + virtual double expectedBic() const; + //! Returns a null object. virtual TOptionalChangeDescription change() const; @@ -292,6 +305,9 @@ class MATHS_EXPORT CUnivariateLevelShiftModel final : public CUnivariateChangeMo //! The BIC of applying the level shift. virtual double bic() const; + //! The expected BIC of applying the change. + virtual double expectedBic() const; + //! Get a description of the level shift. virtual TOptionalChangeDescription change() const; @@ -341,6 +357,9 @@ class MATHS_EXPORT CUnivariateTimeShiftModel final : public CUnivariateChangeMod //! The BIC of applying the time shift. virtual double bic() const; + //! The expected BIC of applying the change. + virtual double expectedBic() const; + //! Get a description of the time shift. virtual TOptionalChangeDescription change() const; diff --git a/lib/maths/CTimeSeriesChangeDetector.cc b/lib/maths/CTimeSeriesChangeDetector.cc index fc2b067a06..e0a957c0b3 100644 --- a/lib/maths/CTimeSeriesChangeDetector.cc +++ b/lib/maths/CTimeSeriesChangeDetector.cc @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -41,7 +42,6 @@ using TDouble1Vec = core::CSmallVector; using TDouble4Vec = core::CSmallVector; using TDouble4Vec1Vec = core::CSmallVector; using TOptionalChangeDescription = CUnivariateTimeSeriesChangeDetector::TOptionalChangeDescription; - const std::string MINIMUM_TIME_TO_DETECT{"a"}; const std::string MAXIMUM_TIME_TO_DETECT{"b"}; const std::string MINIMUM_DELTA_BIC_TO_DETECT{"c"}; @@ -52,9 +52,12 @@ const std::string MIN_TIME_TAG{"g"}; const std::string MAX_TIME_TAG{"h"}; const std::string CHANGE_MODEL_TAG{"i"}; const std::string LOG_LIKELIHOOD_TAG{"j"}; -const std::string SHIFT_TAG{"k"}; -const std::string TREND_MODEL_TAG{"l"}; -const std::string RESIDUAL_MODEL_TAG{"m"}; +const std::string EXPECTED_LOG_LIKELIHOOD_TAG{"k"}; +const std::string SHIFT_TAG{"l"}; +const std::string TREND_MODEL_TAG{"m"}; +const std::string RESIDUAL_MODEL_TAG{"n"}; +const std::size_t EXPECTED_LOG_LIKELIHOOD_NUMBER_INTERVALS{4u}; +const double EXPECTED_EVIDENCE_THRESHOLD_MULTIPLIER{0.9}; } SChangeDescription::SChangeDescription(EDescription description, @@ -160,12 +163,25 @@ TOptionalChangeDescription CUnivariateTimeSeriesChangeDetector::change() double evidences[]{noChangeBic - candidates[0].first, noChangeBic - candidates[1].first}; - m_CurrentEvidenceOfChange = evidences[0]; - if ( evidences[0] > m_MinimumDeltaBicToDetect - && evidences[0] > evidences[1] + m_MinimumDeltaBicToDetect / 2.0) + double expectedEvidence{noChangeBic - (*candidates[0].second)->expectedBic()}; + + double x[]{evidences[0] / m_MinimumDeltaBicToDetect, + 2.0 * (evidences[0] - evidences[1]) / m_MinimumDeltaBicToDetect, + evidences[0] / EXPECTED_EVIDENCE_THRESHOLD_MULTIPLIER / expectedEvidence, + static_cast(m_TimeRange.range() - m_MinimumTimeToDetect) + / static_cast(m_MaximumTimeToDetect - m_MinimumTimeToDetect)}; + double p{ CTools::logisticFunction(x[0], 0.05, 1.0) + * CTools::logisticFunction(x[1], 0.1, 1.0) + * (x[2] < 0.0 ? 1.0 : CTools::logisticFunction(x[2], 0.2, 1.0)) + * (0.5 + CTools::logisticFunction(x[3], 0.2, 0.5))}; + LOG_TRACE("p = " << p); + + if (p > 0.0625/*= std::pow(0.5, 4.0)*/) { return (*candidates[0].second)->change(); } + + m_CurrentEvidenceOfChange = evidences[0]; } return TOptionalChangeDescription(); } @@ -227,9 +243,34 @@ namespace time_series_change_detector_detail CUnivariateChangeModel::CUnivariateChangeModel(const TDecompositionPtr &trendModel, const TPriorPtr &residualModel) : - m_LogLikelihood{0.0}, m_TrendModel{trendModel}, m_ResidualModel{residualModel} + m_LogLikelihood{0.0}, m_ExpectedLogLikelihood{0.0}, + m_TrendModel{trendModel}, m_ResidualModel{residualModel} {} +bool CUnivariateChangeModel::acceptRestoreTraverser(const SModelRestoreParams &/*params*/, + core::CStateRestoreTraverser &traverser) +{ + do + { + const std::string name{traverser.name()}; + RESTORE_BUILT_IN(LOG_LIKELIHOOD_TAG, m_LogLikelihood); + RESTORE_BUILT_IN(EXPECTED_LOG_LIKELIHOOD_TAG, m_ExpectedLogLikelihood); + return true; + } + while (traverser.next()); + return true; +} + +void CUnivariateChangeModel::acceptPersistInserter(core::CStatePersistInserter &inserter) const +{ + inserter.insertValue(LOG_LIKELIHOOD_TAG, + m_LogLikelihood, + core::CIEEE754::E_SinglePrecision); + inserter.insertValue(EXPECTED_LOG_LIKELIHOOD_TAG, + m_ExpectedLogLikelihood, + core::CIEEE754::E_SinglePrecision); +} + void CUnivariateChangeModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { // Note if the trend and residual models are shallow copied their @@ -249,6 +290,7 @@ std::size_t CUnivariateChangeModel::memoryUsage() const uint64_t CUnivariateChangeModel::checksum(uint64_t seed) const { seed = CChecksum::calculate(seed, m_LogLikelihood); + seed = CChecksum::calculate(seed, m_ExpectedLogLikelihood); seed = CChecksum::calculate(seed, m_TrendModel); return CChecksum::calculate(seed, m_ResidualModel); } @@ -271,6 +313,16 @@ void CUnivariateChangeModel::addLogLikelihood(double logLikelihood) m_LogLikelihood += logLikelihood; } +double CUnivariateChangeModel::expectedLogLikelihood() const +{ + return m_ExpectedLogLikelihood; +} + +void CUnivariateChangeModel::addExpectedLogLikelihood(double logLikelihood) +{ + m_ExpectedLogLikelihood += logLikelihood; +} + const CTimeSeriesDecompositionInterface &CUnivariateChangeModel::trendModel() const { return *m_TrendModel; @@ -301,24 +353,15 @@ CUnivariateNoChangeModel::CUnivariateNoChangeModel(const TDecompositionPtr &tren CUnivariateChangeModel{trendModel, residualModel} {} -bool CUnivariateNoChangeModel::acceptRestoreTraverser(const SModelRestoreParams &/*params*/, +bool CUnivariateNoChangeModel::acceptRestoreTraverser(const SModelRestoreParams ¶ms, core::CStateRestoreTraverser &traverser) { - do - { - const std::string name{traverser.name()}; - RESTORE_SETUP_TEARDOWN(LOG_LIKELIHOOD_TAG, - double logLikelihood, - core::CStringUtils::stringToType(traverser.value(), logLikelihood), - this->addLogLikelihood(logLikelihood)) - } - while (traverser.next()); - return true; + return this->CUnivariateChangeModel::acceptRestoreTraverser(params, traverser); } void CUnivariateNoChangeModel::acceptPersistInserter(core::CStatePersistInserter &inserter) const { - inserter.insertValue(LOG_LIKELIHOOD_TAG, this->logLikelihood()); + this->CUnivariateChangeModel::acceptPersistInserter(inserter); } double CUnivariateNoChangeModel::bic() const @@ -326,6 +369,13 @@ double CUnivariateNoChangeModel::bic() const return -2.0 * this->logLikelihood(); } +double CUnivariateNoChangeModel::expectedBic() const +{ + // This is irrelevant since this is only used for deciding + // whether to accept a change. + return this->bic(); +} + TOptionalChangeDescription CUnivariateNoChangeModel::change() const { return TOptionalChangeDescription(); @@ -348,7 +398,7 @@ void CUnivariateNoChangeModel::addSamples(std::size_t count, samples.push_back(this->trendModel().detrend(sample.first, sample.second, 0.0)); } - double logLikelihood; + double logLikelihood{0.0}; if (this->residualModel().jointLogMarginalLikelihood(weightStyles, samples, weights, logLikelihood) == maths_t::E_FpNoErrors) { @@ -377,13 +427,13 @@ CUnivariateLevelShiftModel::CUnivariateLevelShiftModel(const TDecompositionPtr & bool CUnivariateLevelShiftModel::acceptRestoreTraverser(const SModelRestoreParams ¶ms, core::CStateRestoreTraverser &traverser) { + if (this->CUnivariateChangeModel::acceptRestoreTraverser(params, traverser) == false) + { + return false; + } do { const std::string name{traverser.name()}; - RESTORE_SETUP_TEARDOWN(LOG_LIKELIHOOD_TAG, - double logLikelihood, - core::CStringUtils::stringToType(traverser.value(), logLikelihood), - this->addLogLikelihood(logLikelihood)) RESTORE(SHIFT_TAG, m_Shift.fromDelimited(traverser.value())) RESTORE_BUILT_IN(RESIDUAL_MODEL_MODE_TAG, m_ResidualModelMode) RESTORE_BUILT_IN(SAMPLE_COUNT_TAG, m_SampleCount) @@ -395,7 +445,7 @@ bool CUnivariateLevelShiftModel::acceptRestoreTraverser(const SModelRestoreParam void CUnivariateLevelShiftModel::acceptPersistInserter(core::CStatePersistInserter &inserter) const { - inserter.insertValue(LOG_LIKELIHOOD_TAG, this->logLikelihood()); + this->CUnivariateChangeModel::acceptPersistInserter(inserter); inserter.insertValue(SHIFT_TAG, m_Shift.toDelimited()); inserter.insertValue(SAMPLE_COUNT_TAG, m_SampleCount); inserter.insertLevel(RESIDUAL_MODEL_TAG, boost::bind(CPriorStateSerialiser(), @@ -407,6 +457,11 @@ double CUnivariateLevelShiftModel::bic() const return -2.0 * this->logLikelihood() + std::log(m_SampleCount); } +double CUnivariateLevelShiftModel::expectedBic() const +{ + return -2.0 * this->expectedLogLikelihood() + std::log(m_SampleCount); +} + TOptionalChangeDescription CUnivariateLevelShiftModel::change() const { // The "magic" 0.9 is due to the fact that the trend is updated @@ -456,12 +511,24 @@ void CUnivariateLevelShiftModel::addSamples(std::size_t count, residualModel.addSamples(weightStyles, samples, weights); residualModel.propagateForwardsByTime(1.0); - double logLikelihood; + double logLikelihood{0.0}; if (residualModel.jointLogMarginalLikelihood(weightStyles, samples, weights, logLikelihood) == maths_t::E_FpNoErrors) { this->addLogLikelihood(logLikelihood); } + for (const auto &weight : weights) + { + double expectedLogLikelihood{0.0}; + TDouble4Vec1Vec weight_{weight}; + if (residualModel.expectation(maths::CPrior::CLogMarginalLikelihood{ + residualModel, weightStyles, weight_}, + EXPECTED_LOG_LIKELIHOOD_NUMBER_INTERVALS, + expectedLogLikelihood, weightStyles, weight)) + { + this->addExpectedLogLikelihood(expectedLogLikelihood); + } + } } } @@ -487,13 +554,13 @@ CUnivariateTimeShiftModel::CUnivariateTimeShiftModel(const TDecompositionPtr &tr bool CUnivariateTimeShiftModel::acceptRestoreTraverser(const SModelRestoreParams ¶ms, core::CStateRestoreTraverser &traverser) { + if (this->CUnivariateChangeModel::acceptRestoreTraverser(params, traverser) == false) + { + return false; + } do { const std::string name{traverser.name()}; - RESTORE_SETUP_TEARDOWN(LOG_LIKELIHOOD_TAG, - double logLikelihood, - core::CStringUtils::stringToType(traverser.value(), logLikelihood), - this->addLogLikelihood(logLikelihood)) RESTORE(RESIDUAL_MODEL_TAG, this->restoreResidualModel(params.s_DistributionParams, traverser)) } while (traverser.next()); @@ -502,7 +569,7 @@ bool CUnivariateTimeShiftModel::acceptRestoreTraverser(const SModelRestoreParams void CUnivariateTimeShiftModel::acceptPersistInserter(core::CStatePersistInserter &inserter) const { - inserter.insertValue(LOG_LIKELIHOOD_TAG, this->logLikelihood()); + this->CUnivariateChangeModel::acceptPersistInserter(inserter); inserter.insertLevel(RESIDUAL_MODEL_TAG, boost::bind(CPriorStateSerialiser(), boost::cref(this->residualModel()), _1)); } @@ -512,6 +579,11 @@ double CUnivariateTimeShiftModel::bic() const return -2.0 * this->logLikelihood(); } +double CUnivariateTimeShiftModel::expectedBic() const +{ + return -2.0 * this->expectedLogLikelihood(); +} + TOptionalChangeDescription CUnivariateTimeShiftModel::change() const { return SChangeDescription{SChangeDescription::E_TimeShift, @@ -540,12 +612,22 @@ void CUnivariateTimeShiftModel::addSamples(std::size_t count, residualModel.addSamples(weightStyles, samples, weights); residualModel.propagateForwardsByTime(1.0); - double logLikelihood; + double logLikelihood{0.0}; if (residualModel.jointLogMarginalLikelihood(weightStyles, samples, weights, logLikelihood) == maths_t::E_FpNoErrors) { this->addLogLikelihood(logLikelihood); } + for (const auto &weight : weights) + { + double expectedLogLikelihood{0.0}; + TDouble4Vec1Vec weight_{weight}; + residualModel.expectation(maths::CPrior::CLogMarginalLikelihood{ + residualModel, weightStyles, weight_}, + EXPECTED_LOG_LIKELIHOOD_NUMBER_INTERVALS, + expectedLogLikelihood, weightStyles, weight); + this->addExpectedLogLikelihood(expectedLogLikelihood); + } } } From e763bc39bccdd5c317edc2aa4fc2cf92963f708e Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Wed, 4 Apr 2018 12:25:39 +0100 Subject: [PATCH 12/29] [ML] Linear scaling change detection (#25) This implements detection of linear scaling events. It also finishes up the unit testing of change detection and fixes some issues these turned up: specifically, 1) the behaviour when a change is detected but the trend model has no components, 2) the handling of time shifts in the trend model and 3) the handling of data types in the trend component change model. Finally, we are now more careful with the weights we apply to samples added to both the standard and change models. This has meant I've been able to revert scaling the changes, since the trend is less influenced by values during the change detection period if we're likely to detect a change. --- include/maths/CCalendarComponent.h | 3 + .../CCalendarComponentAdaptiveBucketing.h | 3 + include/maths/CModel.h | 2 +- include/maths/CRegression.h | 20 +- include/maths/CSeasonalComponent.h | 3 + .../CSeasonalComponentAdaptiveBucketing.h | 3 + include/maths/CTimeSeriesChangeDetector.h | 109 +++-- include/maths/CTimeSeriesDecomposition.h | 11 +- .../maths/CTimeSeriesDecompositionDetail.h | 14 +- .../maths/CTimeSeriesDecompositionInterface.h | 9 +- include/maths/CTimeSeriesDecompositionStub.h | 9 +- include/maths/CTimeSeriesModel.h | 20 +- include/maths/CTrendComponent.h | 3 + include/maths/MathsTypes.h | 15 + lib/maths/CCalendarComponent.cc | 6 + .../CCalendarComponentAdaptiveBucketing.cc | 10 +- lib/maths/CModel.cc | 12 +- lib/maths/CSeasonalComponent.cc | 6 + .../CSeasonalComponentAdaptiveBucketing.cc | 8 + lib/maths/CTimeSeriesChangeDetector.cc | 402 ++++++++++++------ lib/maths/CTimeSeriesDecomposition.cc | 22 +- lib/maths/CTimeSeriesDecompositionDetail.cc | 34 ++ lib/maths/CTimeSeriesDecompositionStub.cc | 8 +- lib/maths/CTimeSeriesModel.cc | 234 ++++++---- lib/maths/CTrendComponent.cc | 19 +- lib/maths/MathsTypes.cc | 38 +- lib/maths/unittest/CModelTest.cc | 3 +- lib/maths/unittest/CRegressionTest.cc | 47 ++ lib/maths/unittest/CRegressionTest.h | 1 + .../unittest/CTimeSeriesChangeDetectorTest.cc | 23 +- .../unittest/CTimeSeriesChangeDetectorTest.h | 1 + lib/maths/unittest/CTimeSeriesModelTest.cc | 272 ++++++++++-- lib/maths/unittest/CTimeSeriesModelTest.h | 3 +- lib/model/CAnomalyDetectorModelConfig.cc | 2 +- lib/model/unittest/CEventRateModelTest.cc | 17 +- .../unittest/CMetricAnomalyDetectorTest.cc | 6 +- lib/model/unittest/CMetricModelTest.cc | 23 +- lib/test/CTimeSeriesTestData.cc | 2 +- 38 files changed, 1081 insertions(+), 342 deletions(-) diff --git a/include/maths/CCalendarComponent.h b/include/maths/CCalendarComponent.h index 8910367bf9..e98e39590c 100644 --- a/include/maths/CCalendarComponent.h +++ b/include/maths/CCalendarComponent.h @@ -86,6 +86,9 @@ class MATHS_EXPORT CCalendarComponent : private CDecompositionComponent //! Clear all data. void clear(void); + //! Linearly scale the component's by \p scale. + void linearScale(core_t::TTime time, double scale); + //! Adds a value \f$(t, f(t))\f$ to this component. //! //! \param[in] time The time of the point. diff --git a/include/maths/CCalendarComponentAdaptiveBucketing.h b/include/maths/CCalendarComponentAdaptiveBucketing.h index e80ff09cef..5941babc9e 100644 --- a/include/maths/CCalendarComponentAdaptiveBucketing.h +++ b/include/maths/CCalendarComponentAdaptiveBucketing.h @@ -71,6 +71,9 @@ class MATHS_EXPORT CCalendarComponentAdaptiveBucketing : private CAdaptiveBucket //! allocated memory. void clear(void); + //! Linearly scale the bucket values by \p scale. + void linearScale(double scale); + //! Add the function value at \p time. //! //! \param[in] time The time of \p value. diff --git a/include/maths/CModel.h b/include/maths/CModel.h index 9c9a4186d1..03e5745c9d 100644 --- a/include/maths/CModel.h +++ b/include/maths/CModel.h @@ -74,7 +74,7 @@ class MATHS_EXPORT CModelParams bool testForChange(core_t::TTime changeInterval) const; //! Get the minimum time to detect a change point in the model. - core_t::TTime minimumTimeToDetectChange(core_t::TTime timeSinceLastChangePoint) const; + core_t::TTime minimumTimeToDetectChange(void) const; //! Get the maximum time to test for a change point in the model. core_t::TTime maximumTimeToTestForChange(void) const; diff --git a/include/maths/CRegression.h b/include/maths/CRegression.h index bf88a563e3..be5ae9bc11 100644 --- a/include/maths/CRegression.h +++ b/include/maths/CRegression.h @@ -235,6 +235,23 @@ class MATHS_EXPORT CRegression } } + //! Linearly scale the regression model. + //! + //! i.e. apply a transform such that each regression parameter maps + //! to \p scale times its current value. + //! + //! \param[in] scale The scale to apply to the regression parameters. + void linearScale(double scale) + { + if (CBasicStatistics::count(m_S) > 0.0) + { + for (std::size_t i = 0u; i < N; ++i) + { + CBasicStatistics::moment<0>(m_S)(i+2*N-1) *= scale; + } + } + } + //! Multiply the statistics' count by \p scale. CLeastSquaresOnline scaled(double scale) const { @@ -272,14 +289,13 @@ class MATHS_EXPORT CRegression if (this->parameters(params, maxCondition)) { std::ptrdiff_t n = static_cast(params.size()); - double xi = x; for (std::ptrdiff_t i = n - 1; i >= 0; --i) { result[i] = params[i]; for (std::ptrdiff_t j = i + 1; j < n; ++j) { params[j] *= static_cast(i + 1) - / static_cast(j - i) * xi; + / static_cast(j - i) * x; result[i] += params[j]; } } diff --git a/include/maths/CSeasonalComponent.h b/include/maths/CSeasonalComponent.h index 9a50e562c2..cde002cace 100644 --- a/include/maths/CSeasonalComponent.h +++ b/include/maths/CSeasonalComponent.h @@ -104,6 +104,9 @@ class MATHS_EXPORT CSeasonalComponent : private CDecompositionComponent //! Shift the component's slope by \p shift. void shiftSlope(double shift); + //! Linearly scale the component's by \p scale. + void linearScale(core_t::TTime time, double scale); + //! Adds a value \f$(t, f(t))\f$ to this component. //! //! \param[in] time The time of the point. diff --git a/include/maths/CSeasonalComponentAdaptiveBucketing.h b/include/maths/CSeasonalComponentAdaptiveBucketing.h index 2d0ae51f95..89a4cbe6c1 100644 --- a/include/maths/CSeasonalComponentAdaptiveBucketing.h +++ b/include/maths/CSeasonalComponentAdaptiveBucketing.h @@ -97,6 +97,9 @@ class MATHS_EXPORT CSeasonalComponentAdaptiveBucketing : private CAdaptiveBucket //! Shift the regressions' gradients by \p shift. void shiftSlope(double shift); + //! Linearly scale the regressions by \p scale. + void linearScale(double scale); + //! Add the function value at \p time. //! //! \param[in] time The time of \p value. diff --git a/include/maths/CTimeSeriesChangeDetector.h b/include/maths/CTimeSeriesChangeDetector.h index 6d3cac3be0..873ea1cd72 100644 --- a/include/maths/CTimeSeriesChangeDetector.h +++ b/include/maths/CTimeSeriesChangeDetector.h @@ -50,6 +50,7 @@ struct MATHS_EXPORT SChangeDescription enum EDescription { E_LevelShift, + E_LinearScale, E_TimeShift }; @@ -88,7 +89,7 @@ class MATHS_EXPORT CUnivariateTimeSeriesChangeDetector public: CUnivariateTimeSeriesChangeDetector(const TDecompositionPtr &trendModel, const TPriorPtr &residualModel, - core_t::TTime minimumTimeToDetect = 6 * core::constants::HOUR, + core_t::TTime minimumTimeToDetect = 12 * core::constants::HOUR, core_t::TTime maximumTimeToDetect = core::constants::DAY, double minimumDeltaBicToDetect = 14.0); @@ -103,6 +104,13 @@ class MATHS_EXPORT CUnivariateTimeSeriesChangeDetector //! if there has been. TOptionalChangeDescription change(); + //! The function used to decide whether to accept a change. + //! A change is accepted at a value of 1.0 for this function. + //! + //! \param[out] change Filled in with the index of the change + //! the most likely change. + double decisionFunction(std::size_t &change) const; + //! Add \p samples to the change detector. void addSamples(const TWeightStyleVec &weightStyles, const TTimeDoublePr1Vec &samples, @@ -123,7 +131,7 @@ class MATHS_EXPORT CUnivariateTimeSeriesChangeDetector private: using TChangeModel = time_series_change_detector_detail::CUnivariateChangeModel; using TChangeModelPtr = boost::shared_ptr; - using TChangeModelPtr4Vec = core::CSmallVector; + using TChangeModelPtr5Vec = core::CSmallVector; using TMinMaxAccumulator = CBasicStatistics::CMinMax; private: @@ -147,7 +155,7 @@ class MATHS_EXPORT CUnivariateTimeSeriesChangeDetector double m_CurrentEvidenceOfChange; //! The change models. - TChangeModelPtr4Vec m_ChangeModels; + TChangeModelPtr5Vec m_ChangeModels; }; namespace time_series_change_detector_detail @@ -158,6 +166,7 @@ namespace time_series_change_detector_detail class MATHS_EXPORT CUnivariateChangeModel : private core::CNonCopyable { public: + using TDouble1Vec = core::CSmallVector; using TDouble4Vec = core::CSmallVector; using TDouble4Vec1Vec = core::CSmallVector; using TTimeDoublePr = std::pair; @@ -189,10 +198,10 @@ class MATHS_EXPORT CUnivariateChangeModel : private core::CNonCopyable virtual TOptionalChangeDescription change() const = 0; //! Update the change model with \p samples. - virtual void addSamples(std::size_t count, - const TWeightStyleVec &weightStyles, + virtual void addSamples(const std::size_t count, + TWeightStyleVec weightStyles, const TTimeDoublePr1Vec &samples, - const TDouble4Vec1Vec &weights) = 0; + TDouble4Vec1Vec weights) = 0; //! Debug the memory used by this object. void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; @@ -206,10 +215,6 @@ class MATHS_EXPORT CUnivariateChangeModel : private core::CNonCopyable //! Get a checksum for this object. virtual uint64_t checksum(uint64_t seed) const = 0; - protected: - //! The sample count to initialize a change model. - static const std::size_t COUNT_TO_INITIALIZE{5u}; - protected: //! Restore the residual model reading state from \p traverser. bool restoreResidualModel(const SDistributionRestoreParams ¶ms, @@ -217,18 +222,21 @@ class MATHS_EXPORT CUnivariateChangeModel : private core::CNonCopyable //! Get the log-likelihood. double logLikelihood() const; - //! Update the data log-likelihood with \p logLikelihood. - void addLogLikelihood(double logLikelihood); //! Get the expected log-likelihood. double expectedLogLikelihood() const; - //! Update the expected data log-likelihood with \p logLikelihood. - void addExpectedLogLikelihood(double logLikelihood); + + //! Update the log-likelihood with \p samples. + void updateLogLikelihood(const TWeightStyleVec &weightStyles, + const TDouble1Vec &samples, + const TDouble4Vec1Vec &weights); + + //! Update the expected log-likelihoods. + void updateExpectedLogLikelihood(const TWeightStyleVec &weightStyles, + const TDouble4Vec1Vec &weights); //! Get the time series trend model. const CTimeSeriesDecompositionInterface &trendModel() const; - //! Get the time series trend model. - CTimeSeriesDecompositionInterface &trendModel(); //! Get the time series residual model. const CPrior &residualModel() const; @@ -275,10 +283,10 @@ class MATHS_EXPORT CUnivariateNoChangeModel final : public CUnivariateChangeMode virtual TOptionalChangeDescription change() const; //! Get the log likelihood of \p samples. - virtual void addSamples(std::size_t count, - const TWeightStyleVec &weightStyles, + virtual void addSamples(const std::size_t count, + TWeightStyleVec weightStyles, const TTimeDoublePr1Vec &samples, - const TDouble4Vec1Vec &weights); + TDouble4Vec1Vec weights); //! Get the static size of this object. virtual std::size_t staticSize() const; @@ -312,10 +320,10 @@ class MATHS_EXPORT CUnivariateLevelShiftModel final : public CUnivariateChangeMo virtual TOptionalChangeDescription change() const; //! Update with \p samples. - virtual void addSamples(std::size_t count, - const TWeightStyleVec &weightStyles, + virtual void addSamples(const std::size_t count, + TWeightStyleVec weightStyles, const TTimeDoublePr1Vec &samples, - const TDouble4Vec1Vec &weights); + TDouble4Vec1Vec weights); //! Get the static size of this object. virtual std::size_t staticSize() const; @@ -324,7 +332,6 @@ class MATHS_EXPORT CUnivariateLevelShiftModel final : public CUnivariateChangeMo virtual uint64_t checksum(uint64_t seed) const; private: - using TDoubleVec = std::vector; using TMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; private: @@ -338,6 +345,56 @@ class MATHS_EXPORT CUnivariateLevelShiftModel final : public CUnivariateChangeMo double m_SampleCount; }; +//! \brief Captures the likelihood of the data given an arbitrary +//! linear scaling. +class MATHS_EXPORT CUnivariateLinearScaleModel final : public CUnivariateChangeModel +{ + public: + CUnivariateLinearScaleModel(const TDecompositionPtr &trendModel, + const TPriorPtr &residualModel); + + //! Initialize by reading state from \p traverser. + virtual bool acceptRestoreTraverser(const SModelRestoreParams ¶ms, + core::CStateRestoreTraverser &traverser); + + //! Persist state by passing information to \p inserter. + virtual void acceptPersistInserter(core::CStatePersistInserter &inserter) const; + + //! The BIC of applying the level shift. + virtual double bic() const; + + //! The expected BIC of applying the change. + virtual double expectedBic() const; + + //! Get a description of the level shift. + virtual TOptionalChangeDescription change() const; + + //! Update with \p samples. + virtual void addSamples(const std::size_t count, + TWeightStyleVec weightStyles, + const TTimeDoublePr1Vec &samples, + TDouble4Vec1Vec weights); + + //! Get the static size of this object. + virtual std::size_t staticSize() const; + + //! Get a checksum for this object. + virtual uint64_t checksum(uint64_t seed) const; + + private: + using TMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; + + private: + //! The optimal shift. + TMeanAccumulator m_Scale; + + //! The mode of the initial residual distribution model. + double m_ResidualModelMode; + + //! The number of samples added so far. + double m_SampleCount; +}; + //! \brief Captures the likelihood of the data given a specified //! time shift. class MATHS_EXPORT CUnivariateTimeShiftModel final : public CUnivariateChangeModel @@ -364,10 +421,10 @@ class MATHS_EXPORT CUnivariateTimeShiftModel final : public CUnivariateChangeMod virtual TOptionalChangeDescription change() const; //! Update with \p samples. - virtual void addSamples(std::size_t count, - const TWeightStyleVec &weightStyles, + virtual void addSamples(const std::size_t count, + TWeightStyleVec weightStyles, const TTimeDoublePr1Vec &samples, - const TDouble4Vec1Vec &weights); + TDouble4Vec1Vec weights); //! Get the static size of this object. virtual std::size_t staticSize() const; diff --git a/include/maths/CTimeSeriesDecomposition.h b/include/maths/CTimeSeriesDecomposition.h index 022b981e96..9ef36cd957 100644 --- a/include/maths/CTimeSeriesDecomposition.h +++ b/include/maths/CTimeSeriesDecomposition.h @@ -120,11 +120,12 @@ class MATHS_EXPORT CTimeSeriesDecomposition : public CTimeSeriesDecompositionInt //! \param[in] value The value immediately before the change //! point. //! \param[in] change A description of the change to apply. - virtual void applyChange(core_t::TTime time, double value, + //! \return True if a new component was detected. + virtual bool applyChange(core_t::TTime time, double value, const SChangeDescription &change); //! Propagate the decomposition forwards to \p time. - void propagateForwardsTo(core_t::TTime time); + virtual void propagateForwardsTo(core_t::TTime time); //! Get the mean value of the time series in the vicinity of \p time. virtual double meanValue(core_t::TTime time) const; @@ -192,10 +193,14 @@ class MATHS_EXPORT CTimeSeriesDecomposition : public CTimeSeriesDecompositionInt //! Get the static size of this object. virtual std::size_t staticSize(void) const; + //! Get the time shift which is being applied. + virtual core_t::TTime timeShift(void) const; + //! Get the seasonal components. virtual const maths_t::TSeasonalComponentVec &seasonalComponents(void) const; - //! This is the latest time of any point added to this object or the time skipped to. + //! This is the latest time of any point added to this object or + //! the time skipped to. virtual core_t::TTime lastValueTime(void) const; private: diff --git a/include/maths/CTimeSeriesDecompositionDetail.h b/include/maths/CTimeSeriesDecompositionDetail.h index 401d39e233..3f3c22bba1 100644 --- a/include/maths/CTimeSeriesDecompositionDetail.h +++ b/include/maths/CTimeSeriesDecompositionDetail.h @@ -389,9 +389,15 @@ class MATHS_EXPORT CTimeSeriesDecompositionDetail //! Create a new calendar component. virtual void handle(const SDetectedCalendar &message); - //! Apply \p change at \p time. + //! Start using the trend for prediction. + void useTrendForPrediction(void); + + //! Apply \p shift to the level at \p time and \p value. void shiftLevel(core_t::TTime time, double value, double shift); + //! Apply a linear scale of \p scale. + void linearScale(core_t::TTime time, double scale); + //! Maybe re-interpolate the components. void interpolate(const SMessage &message); @@ -549,6 +555,9 @@ class MATHS_EXPORT CTimeSeriesDecompositionDetail //! Shift the components' time origin to \p time. void shiftOrigin(core_t::TTime time); + //! Linearly scale the components' by \p scale. + void linearScale(core_t::TTime time, double scale); + //! Get a checksum for this object. uint64_t checksum(uint64_t seed = 0) const; @@ -608,6 +617,9 @@ class MATHS_EXPORT CTimeSeriesDecompositionDetail //! Remove low value components. bool prune(core_t::TTime time, core_t::TTime bucketLength); + //! Linearly scale the components' by \p scale. + void linearScale(core_t::TTime time, double scale); + //! Get a checksum for this object. uint64_t checksum(uint64_t seed = 0) const; diff --git a/include/maths/CTimeSeriesDecompositionInterface.h b/include/maths/CTimeSeriesDecompositionInterface.h index 5caf75d2d3..14588a559d 100644 --- a/include/maths/CTimeSeriesDecompositionInterface.h +++ b/include/maths/CTimeSeriesDecompositionInterface.h @@ -98,7 +98,8 @@ class MATHS_EXPORT CTimeSeriesDecompositionInterface //! \param[in] value The value immediately before the change //! point. //! \param[in] change A description of the change to apply. - virtual void applyChange(core_t::TTime time, double value, + //! \return True if a new component was detected. + virtual bool applyChange(core_t::TTime time, double value, const SChangeDescription &change) = 0; //! Propagate the decomposition forwards to \p time. @@ -171,10 +172,14 @@ class MATHS_EXPORT CTimeSeriesDecompositionInterface //! Get the static size of this object. virtual std::size_t staticSize(void) const = 0; + //! Get the time shift which is being applied. + virtual core_t::TTime timeShift(void) const = 0; + //! Get the seasonal components. virtual const maths_t::TSeasonalComponentVec &seasonalComponents(void) const = 0; - //! This is the latest time of any point added to this object or the time skipped to. + //! This is the latest time of any point added to this object or + //! the time skipped to. virtual core_t::TTime lastValueTime(void) const = 0; }; diff --git a/include/maths/CTimeSeriesDecompositionStub.h b/include/maths/CTimeSeriesDecompositionStub.h index 42502734e1..6674bf8f52 100644 --- a/include/maths/CTimeSeriesDecompositionStub.h +++ b/include/maths/CTimeSeriesDecompositionStub.h @@ -46,8 +46,8 @@ class MATHS_EXPORT CTimeSeriesDecompositionStub : public CTimeSeriesDecompositio const maths_t::TWeightStyleVec &weightStyles = TWeights::COUNT, const maths_t::TDouble4Vec &weights = TWeights::UNIT); - //! No-op. - virtual void applyChange(core_t::TTime time, double value, + //! No-op returning false. + virtual bool applyChange(core_t::TTime time, double value, const SChangeDescription &change); //! No-op. @@ -100,7 +100,10 @@ class MATHS_EXPORT CTimeSeriesDecompositionStub : public CTimeSeriesDecompositio //! Get the static size of this object. virtual std::size_t staticSize(void) const; - //! Get the seasonal components. + //! Returns zero. + virtual core_t::TTime timeShift(void) const; + + //! Returns an empty vector. virtual const maths_t::TSeasonalComponentVec &seasonalComponents(void) const; //! Returns 0. diff --git a/include/maths/CTimeSeriesModel.h b/include/maths/CTimeSeriesModel.h index 919b897d85..43b1ef111d 100644 --- a/include/maths/CTimeSeriesModel.h +++ b/include/maths/CTimeSeriesModel.h @@ -32,6 +32,23 @@ struct SChangeDescription; struct SDistributionRestoreParams; struct SModelRestoreParams; +//! Computes a Winsorisation weight for \p value based on its +//! one tail p-value. +MATHS_EXPORT +double tailWinsorisationWeight(const CPrior &prior, + double derate, + double scale, + double value); + +//! Computes a Winsorisation weight for \p value based on its +//! marginal for \p dimension one tail p-value. +MATHS_EXPORT +double tailWinsorisationWeight(const CMultivariatePrior &prior, + std::size_t dimension, + double derate, + double scale, + const core::CSmallVector &value); + //! \brief A CModel implementation for modeling a univariate time series. class MATHS_EXPORT CUnivariateTimeSeriesModel : public CModel { @@ -279,9 +296,6 @@ class MATHS_EXPORT CUnivariateTimeSeriesModel : public CModel //! interval of unpredictable values. core_t::TTime m_CurrentChangeInterval; - //! The time of the last change point. - core_t::TTime m_TimeOfLastChangePoint; - //! Used to test for changes in the time series. TChangeDetectorPtr m_ChangeDetector; diff --git a/include/maths/CTrendComponent.h b/include/maths/CTrendComponent.h index 5b87f115c5..d1bd7aaa4d 100644 --- a/include/maths/CTrendComponent.h +++ b/include/maths/CTrendComponent.h @@ -94,6 +94,9 @@ class MATHS_EXPORT CTrendComponent //! This updates the model for the probability of a level shift. void dontShiftLevel(core_t::TTime time, double value); + //! Apply a linear scale by \p scale. + void linearScale(double scale); + //! Adds a value \f$(t, f(t))\f$ to this component. //! //! \param[in] time The time of the point. diff --git a/include/maths/MathsTypes.h b/include/maths/MathsTypes.h index 5075a4b1bb..a6847a0921 100644 --- a/include/maths/MathsTypes.h +++ b/include/maths/MathsTypes.h @@ -179,6 +179,21 @@ MATHS_EXPORT bool hasCountVarianceScale(const TWeightStyleVec &weightStyles, const TDouble10Vec4Vec1Vec &weights); +//! Set \p style to weight or append if it isn't in \p weightStyles. +MATHS_EXPORT +void setWeight(ESampleWeightStyle style, + double weight, + TWeightStyleVec &weightStyles, + TDouble4Vec &weights); + +//! Set \p style to weight or append if it isn't in \p weightStyles. +MATHS_EXPORT +void setWeight(ESampleWeightStyle style, + double weight, + std::size_t dimension, + TWeightStyleVec &weightStyles, + TDouble10Vec4Vec &weights); + //! Enumerates the possible probability of less likely sample calculations. //! //! The possible calculations are: diff --git a/lib/maths/CCalendarComponent.cc b/lib/maths/CCalendarComponent.cc index ea35a5b76b..1e27e11f6c 100644 --- a/lib/maths/CCalendarComponent.cc +++ b/lib/maths/CCalendarComponent.cc @@ -119,6 +119,12 @@ void CCalendarComponent::clear(void) } } +void CCalendarComponent::linearScale(core_t::TTime time, double scale) +{ + m_Bucketing.linearScale(scale); + this->interpolate(time, false); +} + void CCalendarComponent::add(core_t::TTime time, double value, double weight) { m_Bucketing.add(time, value, weight); diff --git a/lib/maths/CCalendarComponentAdaptiveBucketing.cc b/lib/maths/CCalendarComponentAdaptiveBucketing.cc index 6b14676f48..a5cb8b02ff 100644 --- a/lib/maths/CCalendarComponentAdaptiveBucketing.cc +++ b/lib/maths/CCalendarComponentAdaptiveBucketing.cc @@ -114,6 +114,14 @@ void CCalendarComponentAdaptiveBucketing::clear(void) clearAndShrink(m_Values); } +void CCalendarComponentAdaptiveBucketing::linearScale(double scale) +{ + for (auto &value : m_Values) + { + CBasicStatistics::moment<0>(value) *= scale; + } +} + void CCalendarComponentAdaptiveBucketing::add(core_t::TTime time, double value, double weight) { std::size_t bucket{0}; @@ -153,7 +161,7 @@ void CCalendarComponentAdaptiveBucketing::propagateForwardsByTime(double time) { double factor{::exp(-this->CAdaptiveBucketing::decayRate() * time)}; this->CAdaptiveBucketing::age(factor); - for (auto &&value : m_Values) + for (auto &value : m_Values) { value.age(factor); } diff --git a/lib/maths/CModel.cc b/lib/maths/CModel.cc index 9a8b09bfe6..1bff3fc7cd 100644 --- a/lib/maths/CModel.cc +++ b/lib/maths/CModel.cc @@ -118,15 +118,9 @@ bool CModelParams::testForChange(core_t::TTime changeInterval) const return changeInterval >= std::max(3 * m_BucketLength, 10 * core::constants::MINUTE); } -core_t::TTime CModelParams::minimumTimeToDetectChange(core_t::TTime timeSinceLastChangePoint) const -{ - // If there was a recent change then there is a chance that this is - // a reversion of the previous change. We reversions to occur faster. - double revertFactor{CTools::logisticFunction( static_cast(timeSinceLastChangePoint) - / static_cast(m_MaximumTimeToTestForChange), - 0.1, 1.0)}; - return static_cast(std::ceil( (0.3 + 0.7 * revertFactor) - * static_cast(m_MinimumTimeToDetectChange))); +core_t::TTime CModelParams::minimumTimeToDetectChange(void) const +{ + return m_MinimumTimeToDetectChange; } core_t::TTime CModelParams::maximumTimeToTestForChange(void) const diff --git a/lib/maths/CSeasonalComponent.cc b/lib/maths/CSeasonalComponent.cc index 091cfbb273..f67676a682 100644 --- a/lib/maths/CSeasonalComponent.cc +++ b/lib/maths/CSeasonalComponent.cc @@ -154,6 +154,12 @@ void CSeasonalComponent::shiftSlope(double shift) m_Bucketing.shiftSlope(shift); } +void CSeasonalComponent::linearScale(core_t::TTime time, double scale) +{ + m_Bucketing.linearScale(scale); + this->interpolate(time, false); +} + void CSeasonalComponent::add(core_t::TTime time, double value, double weight) { double predicted{CBasicStatistics::mean(this->value(this->jitter(time), 0.0))}; diff --git a/lib/maths/CSeasonalComponentAdaptiveBucketing.cc b/lib/maths/CSeasonalComponentAdaptiveBucketing.cc index 73265d0f3b..209eaddfe0 100644 --- a/lib/maths/CSeasonalComponentAdaptiveBucketing.cc +++ b/lib/maths/CSeasonalComponentAdaptiveBucketing.cc @@ -212,6 +212,14 @@ void CSeasonalComponentAdaptiveBucketing::shiftSlope(double shift) } } +void CSeasonalComponentAdaptiveBucketing::linearScale(double scale) +{ + for (auto &bucket : m_Buckets) + { + bucket.s_Regression.linearScale(scale); + } +} + void CSeasonalComponentAdaptiveBucketing::add(core_t::TTime time, double value, double prediction, diff --git a/lib/maths/CTimeSeriesChangeDetector.cc b/lib/maths/CTimeSeriesChangeDetector.cc index e0a957c0b3..12f66787a5 100644 --- a/lib/maths/CTimeSeriesChangeDetector.cc +++ b/lib/maths/CTimeSeriesChangeDetector.cc @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -54,10 +55,13 @@ const std::string CHANGE_MODEL_TAG{"i"}; const std::string LOG_LIKELIHOOD_TAG{"j"}; const std::string EXPECTED_LOG_LIKELIHOOD_TAG{"k"}; const std::string SHIFT_TAG{"l"}; -const std::string TREND_MODEL_TAG{"m"}; +const std::string SCALE_TAG{"m"}; const std::string RESIDUAL_MODEL_TAG{"n"}; const std::size_t EXPECTED_LOG_LIKELIHOOD_NUMBER_INTERVALS{4u}; const double EXPECTED_EVIDENCE_THRESHOLD_MULTIPLIER{0.9}; +const std::size_t COUNT_TO_INITIALIZE{5u}; +const double MINIMUM_SCALE{0.1}; +const double MAXIMUM_SCALE{10.0}; } SChangeDescription::SChangeDescription(EDescription description, @@ -73,8 +77,9 @@ std::string SChangeDescription::print() const std::string result; switch (s_Description) { - case E_LevelShift: result += "level shift by "; break; - case E_TimeShift: result += "time shift by "; break; + case E_LevelShift: result += "level shift by "; break; + case E_LinearScale: result += "linear scale by "; break; + case E_TimeShift: result += "time shift by "; break; } return result + core::CStringUtils::typeToString(s_Value[0]); } @@ -87,13 +92,17 @@ CUnivariateTimeSeriesChangeDetector::CUnivariateTimeSeriesChangeDetector(const T m_MinimumTimeToDetect{minimumTimeToDetect}, m_MaximumTimeToDetect{maximumTimeToDetect}, m_MinimumDeltaBicToDetect{minimumDeltaBicToDetect}, - m_SampleCount{0}, - m_CurrentEvidenceOfChange{0.0}, + m_SampleCount{0}, m_CurrentEvidenceOfChange{0.0}, m_ChangeModels{boost::make_shared(trendModel, residualModel), boost::make_shared(trendModel, residualModel), boost::make_shared(trendModel, residualModel, -core::constants::HOUR), boost::make_shared(trendModel, residualModel, +core::constants::HOUR)} -{} +{ + if (trendModel->seasonalComponents().size() > 0) + { + m_ChangeModels.push_back(boost::make_shared(trendModel, residualModel)); + } +} bool CUnivariateTimeSeriesChangeDetector::acceptRestoreTraverser(const SModelRestoreParams ¶ms, core::CStateRestoreTraverser &traverser) @@ -147,45 +156,69 @@ void CUnivariateTimeSeriesChangeDetector::acceptPersistInserter(core::CStatePers TOptionalChangeDescription CUnivariateTimeSeriesChangeDetector::change() { - using TChangeModelPtr4VecCItr = TChangeModelPtr4Vec::const_iterator; - using TDoubleChangeModelPtr4VecCItrPr = std::pair; - using TMinAccumulator = CBasicStatistics::COrderStatisticsStack; - if (m_TimeRange.range() > m_MinimumTimeToDetect) { - double noChangeBic{m_ChangeModels[0]->bic()}; - TMinAccumulator candidates; - for (auto i = m_ChangeModels.begin() + 1; i != m_ChangeModels.end(); ++i) - { - candidates.add({(*i)->bic(), i}); - } - candidates.sort(); - - double evidences[]{noChangeBic - candidates[0].first, - noChangeBic - candidates[1].first}; - double expectedEvidence{noChangeBic - (*candidates[0].second)->expectedBic()}; - - double x[]{evidences[0] / m_MinimumDeltaBicToDetect, - 2.0 * (evidences[0] - evidences[1]) / m_MinimumDeltaBicToDetect, - evidences[0] / EXPECTED_EVIDENCE_THRESHOLD_MULTIPLIER / expectedEvidence, - static_cast(m_TimeRange.range() - m_MinimumTimeToDetect) - / static_cast(m_MaximumTimeToDetect - m_MinimumTimeToDetect)}; - double p{ CTools::logisticFunction(x[0], 0.05, 1.0) - * CTools::logisticFunction(x[1], 0.1, 1.0) - * (x[2] < 0.0 ? 1.0 : CTools::logisticFunction(x[2], 0.2, 1.0)) - * (0.5 + CTools::logisticFunction(x[3], 0.2, 0.5))}; - LOG_TRACE("p = " << p); - - if (p > 0.0625/*= std::pow(0.5, 4.0)*/) + std::size_t candidate{}; + double p{this->decisionFunction(candidate)}; + + if (p > 1.0) { - return (*candidates[0].second)->change(); + return m_ChangeModels[candidate]->change(); } - m_CurrentEvidenceOfChange = evidences[0]; + m_CurrentEvidenceOfChange = + m_ChangeModels[0]->bic() - m_ChangeModels[candidate]->bic(); } return TOptionalChangeDescription(); } +double CUnivariateTimeSeriesChangeDetector::decisionFunction(std::size_t &change) const +{ + using TChangeModelPtr5VecCItr = TChangeModelPtr5Vec::const_iterator; + using TDoubleChangeModelPtr5VecCItrPr = std::pair; + using TMinAccumulator = CBasicStatistics::COrderStatisticsStack; + + if (m_SampleCount <= COUNT_TO_INITIALIZE) + { + return 0.0; + } + + double noChangeBic{m_ChangeModels[0]->bic()}; + TMinAccumulator candidates; + for (auto i = m_ChangeModels.begin() + 1; i != m_ChangeModels.end(); ++i) + { + candidates.add({(*i)->bic(), i}); + } + candidates.sort(); + + double evidences[]{noChangeBic - candidates[0].first, + noChangeBic - candidates[1].first}; + double expectedEvidence{noChangeBic - (*candidates[0].second)->expectedBic()}; + + double x[]{evidences[0] / m_MinimumDeltaBicToDetect, + 2.0 * (evidences[0] - evidences[1]) / m_MinimumDeltaBicToDetect, + evidences[0] / EXPECTED_EVIDENCE_THRESHOLD_MULTIPLIER / expectedEvidence, + static_cast(m_TimeRange.range() - m_MinimumTimeToDetect) + / static_cast(m_MaximumTimeToDetect - m_MinimumTimeToDetect)}; + double p{ CTools::logisticFunction(x[0], 0.05, 1.0) + * CTools::logisticFunction(x[1], 0.1, 1.0) + * (x[2] < 0.0 ? 1.0 : CTools::logisticFunction(x[2], 0.2, 1.0)) + * CTools::logisticFunction(x[3], 0.2, 0.5)}; + LOG_TRACE("p(" << (*candidates[0].second)->change()->print() << ") = " << p + << " | x = " << core::CContainerPrinter::print(x)); + + change = candidates[0].second - m_ChangeModels.begin(); + + // Note 0.03125 = 0.5^5. This is chosen so that this function + // is equal to one when each of the decision criteria are at + // the centre of the sigmoid functions and the time range is + // equal to "minimum time to detect". This means we'll (just) + // accept the change if all of the individual hard decision + // criteria are satisfied. + + return p / 0.03125; +} + bool CUnivariateTimeSeriesChangeDetector::stopTesting() const { core_t::TTime range{m_TimeRange.range()}; @@ -308,27 +341,41 @@ double CUnivariateChangeModel::logLikelihood() const return m_LogLikelihood; } -void CUnivariateChangeModel::addLogLikelihood(double logLikelihood) -{ - m_LogLikelihood += logLikelihood; -} - double CUnivariateChangeModel::expectedLogLikelihood() const { return m_ExpectedLogLikelihood; } -void CUnivariateChangeModel::addExpectedLogLikelihood(double logLikelihood) +void CUnivariateChangeModel::updateLogLikelihood(const TWeightStyleVec &weightStyles, + const TDouble1Vec &samples, + const TDouble4Vec1Vec &weights) { - m_ExpectedLogLikelihood += logLikelihood; + double logLikelihood{}; + if (m_ResidualModel->jointLogMarginalLikelihood(weightStyles, samples, weights, + logLikelihood) == maths_t::E_FpNoErrors) + { + m_LogLikelihood += logLikelihood; + } } -const CTimeSeriesDecompositionInterface &CUnivariateChangeModel::trendModel() const +void CUnivariateChangeModel::updateExpectedLogLikelihood(const TWeightStyleVec &weightStyles, + const TDouble4Vec1Vec &weights) { - return *m_TrendModel; + for (const auto &weight : weights) + { + double expectedLogLikelihood{}; + TDouble4Vec1Vec weight_{weight}; + if (m_ResidualModel->expectation(maths::CPrior::CLogMarginalLikelihood{ + *m_ResidualModel, weightStyles, weight_}, + EXPECTED_LOG_LIKELIHOOD_NUMBER_INTERVALS, + expectedLogLikelihood, weightStyles, weight)) + { + m_ExpectedLogLikelihood += expectedLogLikelihood; + } + } } -CTimeSeriesDecompositionInterface &CUnivariateChangeModel::trendModel() +const CTimeSeriesDecompositionInterface &CUnivariateChangeModel::trendModel() const { return *m_TrendModel; } @@ -381,29 +428,36 @@ TOptionalChangeDescription CUnivariateNoChangeModel::change() const return TOptionalChangeDescription(); } -void CUnivariateNoChangeModel::addSamples(std::size_t count, - const TWeightStyleVec &weightStyles, +void CUnivariateNoChangeModel::addSamples(const std::size_t count, + TWeightStyleVec weightStyles, const TTimeDoublePr1Vec &samples_, - const TDouble4Vec1Vec &weights) + TDouble4Vec1Vec weights) { - // See CUnivariateTimeSeriesLevelShiftModel for an explanation - // of the delay updating the log-likelihood. + // See, for example, CUnivariateLevelShiftModel::addSamples + // for an explanation of the delay updating the log-likelihood. if (count >= COUNT_TO_INITIALIZE) { + CPrior &residualModel{this->residualModel()}; + TDouble1Vec samples; samples.reserve(samples_.size()); - for (const auto &sample : samples_) + for (std::size_t i = 0u; i < samples_.size(); ++i) { - samples.push_back(this->trendModel().detrend(sample.first, sample.second, 0.0)); + core_t::TTime time{samples_[i].first}; + double value{samples_[i].second}; + double seasonalScale{maths_t::seasonalVarianceScale(weightStyles, weights[i])}; + double sample{this->trendModel().detrend(time, value, 0.0)}; + double weight{tailWinsorisationWeight(residualModel, 0.2, seasonalScale, sample)}; + samples.push_back(sample); + maths_t::setWeight(maths_t::E_SampleWinsorisationWeight, weight, weightStyles, weights[i]); } - double logLikelihood{0.0}; - if (this->residualModel().jointLogMarginalLikelihood(weightStyles, samples, weights, - logLikelihood) == maths_t::E_FpNoErrors) + for (auto &weight : weights) { - this->addLogLikelihood(logLikelihood); + maths_t::setWeight(maths_t::E_SampleWinsorisationWeight, 1.0, weightStyles, weight); } + this->updateLogLikelihood(weightStyles, samples, weights); } } @@ -454,81 +508,70 @@ void CUnivariateLevelShiftModel::acceptPersistInserter(core::CStatePersistInsert double CUnivariateLevelShiftModel::bic() const { - return -2.0 * this->logLikelihood() + std::log(m_SampleCount); + return -2.0 * this->logLikelihood() + CTools::fastLog(m_SampleCount); } double CUnivariateLevelShiftModel::expectedBic() const { - return -2.0 * this->expectedLogLikelihood() + std::log(m_SampleCount); + return -2.0 * this->expectedLogLikelihood() + CTools::fastLog(m_SampleCount); } TOptionalChangeDescription CUnivariateLevelShiftModel::change() const { - // The "magic" 0.9 is due to the fact that the trend is updated - // with new values during change detection. As a result, the - // estimate is biased (by early values) and too large. This was - // an empirical estimate of the degree of bias across a range of - // step changes. return SChangeDescription{SChangeDescription::E_LevelShift, - 0.9 * CBasicStatistics::mean(m_Shift), + CBasicStatistics::mean(m_Shift), this->residualModelPtr()}; } -void CUnivariateLevelShiftModel::addSamples(std::size_t count, - const TWeightStyleVec &weightStyles, +void CUnivariateLevelShiftModel::addSamples(const std::size_t count, + TWeightStyleVec weightStyles, const TTimeDoublePr1Vec &samples_, - const TDouble4Vec1Vec &weights) + TDouble4Vec1Vec weights) { const CTimeSeriesDecompositionInterface &trendModel{this->trendModel()}; - for (const auto &sample : samples_) - { - double x{trendModel.detrend(sample.first, sample.second, 0.0) - m_ResidualModelMode}; - m_Shift.add(x); - } - // We delay updating the log-likelihood because early on the // level can change giving us a better apparent fit to the // data than a fixed step. Five updates was found to be the // minimum to get empirically similar sum log-likelihood if - // there is no shift in the data. + // there is no change in the data. if (count >= COUNT_TO_INITIALIZE) { + CPrior &residualModel{this->residualModel()}; + TDouble1Vec samples; samples.reserve(samples_.size()); - for (const auto &sample : samples_) - { - double shift{CBasicStatistics::mean(m_Shift)}; - samples.push_back(trendModel.detrend(sample.first, sample.second, 0.0) - shift); - } - for (const auto &weight : weights) + double shift{CBasicStatistics::mean(m_Shift)}; + for (std::size_t i = 0u; i < samples_.size(); ++i) { - m_SampleCount += maths_t::count(weightStyles, weight); + core_t::TTime time{samples_[i].first}; + double value{samples_[i].second}; + double seasonalScale{maths_t::seasonalVarianceScale(weightStyles, weights[i])}; + double sample{trendModel.detrend(time, value, 0.0) - shift}; + double weight{tailWinsorisationWeight(residualModel, 0.2, seasonalScale, sample)}; + samples.push_back(sample); + maths_t::setWeight(maths_t::E_SampleWinsorisationWeight, weight, weightStyles, weights[i]); + m_SampleCount += maths_t::count(weightStyles, weights[i]); } - CPrior &residualModel{this->residualModel()}; residualModel.addSamples(weightStyles, samples, weights); residualModel.propagateForwardsByTime(1.0); - double logLikelihood{0.0}; - if (residualModel.jointLogMarginalLikelihood(weightStyles, samples, weights, - logLikelihood) == maths_t::E_FpNoErrors) - { - this->addLogLikelihood(logLikelihood); - } - for (const auto &weight : weights) + for (auto &weight : weights) { - double expectedLogLikelihood{0.0}; - TDouble4Vec1Vec weight_{weight}; - if (residualModel.expectation(maths::CPrior::CLogMarginalLikelihood{ - residualModel, weightStyles, weight_}, - EXPECTED_LOG_LIKELIHOOD_NUMBER_INTERVALS, - expectedLogLikelihood, weightStyles, weight)) - { - this->addExpectedLogLikelihood(expectedLogLikelihood); - } + maths_t::setWeight(maths_t::E_SampleWinsorisationWeight, 1.0, weightStyles, weight); } + this->updateLogLikelihood(weightStyles, samples, weights); + this->updateExpectedLogLikelihood(weightStyles, weights); + } + + for (std::size_t i = 0u; i < samples_.size(); ++i) + { + core_t::TTime time{samples_[i].first}; + double value{samples_[i].second}; + double shift{trendModel.detrend(time, value, 0.0) - m_ResidualModelMode}; + m_Shift.add(shift); } } @@ -544,6 +587,126 @@ uint64_t CUnivariateLevelShiftModel::checksum(uint64_t seed) const return CChecksum::calculate(seed, m_SampleCount); } +CUnivariateLinearScaleModel::CUnivariateLinearScaleModel(const TDecompositionPtr &trendModel, + const TPriorPtr &residualModel) : + CUnivariateChangeModel{trendModel, TPriorPtr{residualModel->clone()}}, + m_ResidualModelMode{residualModel->marginalLikelihoodMode()}, + m_SampleCount{0.0} +{} + +bool CUnivariateLinearScaleModel::acceptRestoreTraverser(const SModelRestoreParams ¶ms, + core::CStateRestoreTraverser &traverser) +{ + if (this->CUnivariateChangeModel::acceptRestoreTraverser(params, traverser) == false) + { + return false; + } + do + { + const std::string name{traverser.name()}; + RESTORE(SCALE_TAG, m_Scale.fromDelimited(traverser.value())) + RESTORE_BUILT_IN(RESIDUAL_MODEL_MODE_TAG, m_ResidualModelMode) + RESTORE_BUILT_IN(SAMPLE_COUNT_TAG, m_SampleCount) + RESTORE(RESIDUAL_MODEL_TAG, this->restoreResidualModel(params.s_DistributionParams, traverser)) + } + while (traverser.next()); + return true; +} + +void CUnivariateLinearScaleModel::acceptPersistInserter(core::CStatePersistInserter &inserter) const +{ + this->CUnivariateChangeModel::acceptPersistInserter(inserter); + inserter.insertValue(SCALE_TAG, m_Scale.toDelimited()); + inserter.insertValue(SAMPLE_COUNT_TAG, m_SampleCount); + inserter.insertLevel(RESIDUAL_MODEL_TAG, boost::bind(CPriorStateSerialiser(), + boost::cref(this->residualModel()), _1)); +} + +double CUnivariateLinearScaleModel::bic() const +{ + return -2.0 * this->logLikelihood() + CTools::fastLog(m_SampleCount); +} + +double CUnivariateLinearScaleModel::expectedBic() const +{ + return -2.0 * this->expectedLogLikelihood() + CTools::fastLog(m_SampleCount); +} + +CUnivariateLinearScaleModel::TOptionalChangeDescription CUnivariateLinearScaleModel::change() const +{ + return SChangeDescription{SChangeDescription::E_LinearScale, + CBasicStatistics::mean(m_Scale), + this->residualModelPtr()}; +} + +void CUnivariateLinearScaleModel::addSamples(const std::size_t count, + TWeightStyleVec weightStyles, + const TTimeDoublePr1Vec &samples_, + TDouble4Vec1Vec weights) +{ + const CTimeSeriesDecompositionInterface &trendModel{this->trendModel()}; + + // We delay updating the log-likelihood because early on the + // scale can change giving us a better apparent fit to the + // data than a fixed scale. Five updates was found to be the + // minimum to get empirically similar sum log-likelihood if + // there is no change in the data. + + for (std::size_t i = 0u; i < samples_.size(); ++i) + { + core_t::TTime time{samples_[i].first}; + double value{samples_[i].second - m_ResidualModelMode}; + double prediction{CBasicStatistics::mean(trendModel.value(time, 0.0))}; + double scale{std::fabs(value) / std::fabs(prediction)}; + m_Scale.add(value * prediction < 0.0 ? + MINIMUM_SCALE : CTools::truncate(scale, MINIMUM_SCALE, MAXIMUM_SCALE), + std::fabs(prediction)); + } + + if (count >= COUNT_TO_INITIALIZE) + { + CPrior &residualModel{this->residualModel()}; + + TDouble1Vec samples; + samples.reserve(samples_.size()); + double scale{CBasicStatistics::mean(m_Scale)}; + for (std::size_t i = 0u; i < samples_.size(); ++i) + { + core_t::TTime time{samples_[i].first}; + double value{samples_[i].second}; + double seasonalScale{maths_t::seasonalVarianceScale(weightStyles, weights[i])}; + double prediction{CBasicStatistics::mean(trendModel.value(time, 0.0))}; + double sample{value - scale * prediction}; + double weight{tailWinsorisationWeight(residualModel, 0.2, seasonalScale, sample)}; + samples.push_back(sample); + maths_t::setWeight(maths_t::E_SampleWinsorisationWeight, weight, weightStyles, weights[i]); + m_SampleCount += maths_t::count(weightStyles, weights[i]); + } + + residualModel.addSamples(weightStyles, samples, weights); + residualModel.propagateForwardsByTime(1.0); + + for (auto &weight : weights) + { + maths_t::setWeight(maths_t::E_SampleWinsorisationWeight, 1.0, weightStyles, weight); + } + this->updateLogLikelihood(weightStyles, samples, weights); + this->updateExpectedLogLikelihood(weightStyles, weights); + } +} + +std::size_t CUnivariateLinearScaleModel::staticSize() const +{ + return sizeof(*this); +} + +uint64_t CUnivariateLinearScaleModel::checksum(uint64_t seed) const +{ + seed = this->CUnivariateChangeModel::checksum(seed); + seed = CChecksum::calculate(seed, m_Scale); + return CChecksum::calculate(seed, m_SampleCount); +} + CUnivariateTimeShiftModel::CUnivariateTimeShiftModel(const TDecompositionPtr &trendModel, const TPriorPtr &residualModel, core_t::TTime shift) : @@ -591,43 +754,40 @@ TOptionalChangeDescription CUnivariateTimeShiftModel::change() const this->residualModelPtr()}; } -void CUnivariateTimeShiftModel::addSamples(std::size_t count, - const TWeightStyleVec &weightStyles, +void CUnivariateTimeShiftModel::addSamples(const std::size_t count, + TWeightStyleVec weightStyles, const TTimeDoublePr1Vec &samples_, - const TDouble4Vec1Vec &weights) + TDouble4Vec1Vec weights) { - // See CUnivariateTimeSeriesLevelShiftModel for an explanation - // of the delay updating the log-likelihood. + // See, for example, CUnivariateLevelShiftModel::addSamples + // for an explanation of the delay updating the log-likelihood. if (count >= COUNT_TO_INITIALIZE) { + CPrior &residualModel{this->residualModel()}; + TDouble1Vec samples; samples.reserve(samples_.size()); - for (const auto &sample : samples_) + for (std::size_t i = 0u; i < samples_.size(); ++i) { - samples.push_back(this->trendModel().detrend(sample.first + m_Shift, sample.second, 0.0)); + core_t::TTime time{samples_[i].first}; + double value{samples_[i].second}; + double seasonalScale{maths_t::seasonalVarianceScale(weightStyles, weights[i])}; + double sample{this->trendModel().detrend(time + m_Shift, value, 0.0)}; + double weight{tailWinsorisationWeight(residualModel, 0.2, seasonalScale, sample)}; + samples.push_back(sample); + maths_t::setWeight(maths_t::E_SampleWinsorisationWeight, weight, weightStyles, weights[i]); } - CPrior &residualModel{this->residualModel()}; residualModel.addSamples(weightStyles, samples, weights); residualModel.propagateForwardsByTime(1.0); - double logLikelihood{0.0}; - if (residualModel.jointLogMarginalLikelihood(weightStyles, samples, weights, - logLikelihood) == maths_t::E_FpNoErrors) - { - this->addLogLikelihood(logLikelihood); - } - for (const auto &weight : weights) + for (auto &weight : weights) { - double expectedLogLikelihood{0.0}; - TDouble4Vec1Vec weight_{weight}; - residualModel.expectation(maths::CPrior::CLogMarginalLikelihood{ - residualModel, weightStyles, weight_}, - EXPECTED_LOG_LIKELIHOOD_NUMBER_INTERVALS, - expectedLogLikelihood, weightStyles, weight); - this->addExpectedLogLikelihood(expectedLogLikelihood); + maths_t::setWeight(maths_t::E_SampleWinsorisationWeight, 1.0, weightStyles, weight); } + this->updateLogLikelihood(weightStyles, samples, weights); + this->updateExpectedLogLikelihood(weightStyles, weights); } } diff --git a/lib/maths/CTimeSeriesDecomposition.cc b/lib/maths/CTimeSeriesDecomposition.cc index 411f5a9169..7b22e6908e 100644 --- a/lib/maths/CTimeSeriesDecomposition.cc +++ b/lib/maths/CTimeSeriesDecomposition.cc @@ -252,7 +252,7 @@ bool CTimeSeriesDecomposition::addPoint(core_t::TTime time, { CComponents::CScopeNotifyOnStateChange result{m_Components}; - time -= m_TimeShift; + time += m_TimeShift; core_t::TTime lastTime{std::max(m_LastValueTime, m_LastPropagationTime)}; @@ -276,21 +276,27 @@ bool CTimeSeriesDecomposition::addPoint(core_t::TTime time, return result.changed(); } -void CTimeSeriesDecomposition::applyChange(core_t::TTime time, +bool CTimeSeriesDecomposition::applyChange(core_t::TTime time, double value, const SChangeDescription &change) { + bool result{m_Components.usingTrendForPrediction() == false}; + m_Components.useTrendForPrediction(); + switch (change.s_Description) { case SChangeDescription::E_LevelShift: - { m_Components.shiftLevel(time, value, change.s_Value[0]); break; - } + case SChangeDescription::E_LinearScale: + m_Components.linearScale(time, change.s_Value[0]); + break; case SChangeDescription::E_TimeShift: m_TimeShift += static_cast(change.s_Value[0]); break; } + + return result; } void CTimeSeriesDecomposition::propagateForwardsTo(core_t::TTime time) @@ -357,7 +363,7 @@ TDoubleDoublePr CTimeSeriesDecomposition::value(core_t::TTime time, baseline += vector2x1(this->smooth( boost::bind(&CTimeSeriesDecomposition::value, this, _1, confidence, components & E_Seasonal, false), - time, components)); + time - m_TimeShift, components)); } return pair(baseline); @@ -435,7 +441,6 @@ double CTimeSeriesDecomposition::detrend(core_t::TTime time, { return value; } - time += m_TimeShift; TDoubleDoublePr interval{this->value(time, confidence, components)}; return std::min(value - interval.first, 0.0) + std::max(value - interval.second, 0.0); } @@ -546,6 +551,11 @@ std::size_t CTimeSeriesDecomposition::staticSize(void) const return sizeof(*this); } +core_t::TTime CTimeSeriesDecomposition::timeShift(void) const +{ + return m_TimeShift; +} + const maths_t::TSeasonalComponentVec &CTimeSeriesDecomposition::seasonalComponents(void) const { return m_Components.seasonal(); diff --git a/lib/maths/CTimeSeriesDecompositionDetail.cc b/lib/maths/CTimeSeriesDecompositionDetail.cc index 667d28587f..617da1e983 100644 --- a/lib/maths/CTimeSeriesDecompositionDetail.cc +++ b/lib/maths/CTimeSeriesDecompositionDetail.cc @@ -1351,11 +1351,29 @@ void CTimeSeriesDecompositionDetail::CComponents::handle(const SDetectedCalendar } } +void CTimeSeriesDecompositionDetail::CComponents::useTrendForPrediction(void) +{ + m_UsingTrendForPrediction = true; +} + void CTimeSeriesDecompositionDetail::CComponents::shiftLevel(core_t::TTime time, double value, double shift) { m_Trend.shiftLevel(time, value, shift); } +void CTimeSeriesDecompositionDetail::CComponents::linearScale(core_t::TTime time, double scale) +{ + m_Trend.linearScale(scale); + if (m_Seasonal) + { + m_Seasonal->linearScale(time, scale); + } + if (m_Calendar) + { + m_Calendar->linearScale(time, scale); + } +} + void CTimeSeriesDecompositionDetail::CComponents::interpolate(const SMessage &message) { core_t::TTime time{message.s_Time}; @@ -2124,6 +2142,14 @@ void CTimeSeriesDecompositionDetail::CComponents::SSeasonal::shiftOrigin(core_t: } } +void CTimeSeriesDecompositionDetail::CComponents::SSeasonal::linearScale(core_t::TTime time, double scale) +{ + for (auto &component : s_Components) + { + component.linearScale(time, scale); + } +} + uint64_t CTimeSeriesDecompositionDetail::CComponents::SSeasonal::checksum(uint64_t seed) const { seed = CChecksum::calculate(seed, s_Components); @@ -2308,6 +2334,14 @@ bool CTimeSeriesDecompositionDetail::CComponents::SCalendar::prune(core_t::TTime return s_Components.empty(); } +void CTimeSeriesDecompositionDetail::CComponents::SCalendar::linearScale(core_t::TTime time, double scale) +{ + for (auto &component : s_Components) + { + component.linearScale(time, scale); + } +} + uint64_t CTimeSeriesDecompositionDetail::CComponents::SCalendar::checksum(uint64_t seed) const { seed = CChecksum::calculate(seed, s_Components); diff --git a/lib/maths/CTimeSeriesDecompositionStub.cc b/lib/maths/CTimeSeriesDecompositionStub.cc index 508efbbf12..6855c907d1 100644 --- a/lib/maths/CTimeSeriesDecompositionStub.cc +++ b/lib/maths/CTimeSeriesDecompositionStub.cc @@ -48,10 +48,11 @@ bool CTimeSeriesDecompositionStub::addPoint(core_t::TTime /*time*/, return false; } -void CTimeSeriesDecompositionStub::applyChange(core_t::TTime /*time*/, +bool CTimeSeriesDecompositionStub::applyChange(core_t::TTime /*time*/, double /*value*/, const SChangeDescription &/*change*/) { + return false; } void CTimeSeriesDecompositionStub::propagateForwardsTo(core_t::TTime /*time*/) @@ -125,6 +126,11 @@ std::size_t CTimeSeriesDecompositionStub::staticSize(void) const return sizeof(*this); } +core_t::TTime CTimeSeriesDecompositionStub::timeShift(void) const +{ + return 0; +} + const maths_t::TSeasonalComponentVec &CTimeSeriesDecompositionStub::seasonalComponents(void) const { return NO_COMPONENTS; diff --git a/lib/maths/CTimeSeriesModel.cc b/lib/maths/CTimeSeriesModel.cc index 65042756ae..502c2af57c 100644 --- a/lib/maths/CTimeSeriesModel.cc +++ b/lib/maths/CTimeSeriesModel.cc @@ -61,9 +61,10 @@ using TTime1Vec = core::CSmallVector; using TSize10Vec = core::CSmallVector; using TSizeDoublePr10Vec = core::CSmallVector; using TTail10Vec = core::CSmallVector; -using TMultivariatePriorCPtrSizePr1Vec = CTimeSeriesCorrelations::TMultivariatePriorCPtrSizePr1Vec; using TOptionalSize = boost::optional; using TMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; +using TChangeDetectorPtr = boost::shared_ptr; +using TMultivariatePriorCPtrSizePr1Vec = CTimeSeriesCorrelations::TMultivariatePriorCPtrSizePr1Vec; //! The decay rate controllers we maintain. enum EDecayRateController @@ -80,84 +81,34 @@ const TSize10Vec NOTHING_TO_MARGINALIZE; const TSizeDoublePr10Vec NOTHING_TO_CONDITION; const double WINSORISED_FRACTION{1e-2}; const double MINIMUM_WINSORISATION_WEIGHT_FRACTION{1e-10}; -const double MINIMUM_WINSORISATION_WEIGHT{0.01}; +const double MINIMUM_TAIL_WINSORISATION_WEIGHT{1e-2}; +const double MINIMUM_CHANGE_WINSORISATION_WEIGHT{1e-1}; const double LOG_WINSORISED_FRACTION{std::log(WINSORISED_FRACTION)}; const double LOG_MINIMUM_WEIGHT_FRACTION{std::log(MINIMUM_WINSORISATION_WEIGHT_FRACTION)}; +const double LOG_MINIMUM_TAIL_WINSORISATION_WEIGHT{std::log(MINIMUM_TAIL_WINSORISATION_WEIGHT)}; const double MINUS_LOG_TOLERANCE{-std::log(1.0 - 100.0 * std::numeric_limits::epsilon())}; -//! Computes the Winsorisation weight for \p value. -double computeWinsorisationWeight(const CPrior &prior, double derate, double scale, double value) +//! Derate the minimum Winsorisation weight. +double deratedMinimumWinsorisationWeight(double derate) { - double deratedMinimumWeight = MINIMUM_WINSORISATION_WEIGHT - + (0.5 - MINIMUM_WINSORISATION_WEIGHT) - * CTools::truncate(derate, 0.0, 1.0); - - double lowerBound; - double upperBound; - if (!prior.minusLogJointCdf(CConstantWeights::SEASONAL_VARIANCE, - {value}, {{scale}}, lowerBound, upperBound)) - { - return 1.0; - } - if ( upperBound < MINUS_LOG_TOLERANCE - && !prior.minusLogJointCdfComplement(CConstantWeights::SEASONAL_VARIANCE, - {value}, {{scale}}, lowerBound, upperBound)) - { - return 1.0; - } - - double f{std::exp(-(lowerBound + upperBound) / 2.0)}; - f = std::min(f, 1.0 - f); - if (f >= WINSORISED_FRACTION) - { - return 1.0; - } - if (f <= MINIMUM_WINSORISATION_WEIGHT_FRACTION) - { - return deratedMinimumWeight; - } - - // We interpolate between 1.0 and the minimum weight on the - // interval [WINSORISED_FRACTION, MINIMUM_WEIGHT_FRACTION] - // by fitting (f / WF)^(-c log(f)) where WF is the Winsorised - // fraction and c is determined by solving: - // MW = (MWF / WF)^(-c log(MWF)) - - double deratedExponent{ -std::log(deratedMinimumWeight) - / LOG_MINIMUM_WEIGHT_FRACTION - / (LOG_MINIMUM_WEIGHT_FRACTION - LOG_WINSORISED_FRACTION)}; - double logf{std::log(f)}; - double result{std::exp(-deratedExponent * logf * (logf - LOG_WINSORISED_FRACTION))}; - - if (CMathsFuncs::isNan(result)) - { - return 1.0; - } - - LOG_TRACE("sample = " << value << " min(F, 1-F) = " << f << ", weight = " << result); - - return result; + derate = CTools::truncate(derate, 0.0, 1.0); + return MINIMUM_TAIL_WINSORISATION_WEIGHT + (0.5 - MINIMUM_TAIL_WINSORISATION_WEIGHT) * derate; } -//! Computes the Winsorisation weight for \p value. -double computeWinsorisationWeight(const CMultivariatePrior &prior, - std::size_t dimension, - double derate, - double scale, - const TDouble10Vec &value) +//! Get the one tail p-value from a specified Winsorisation weight. +double pValueFromTailWinsorisationWeight(double weight) { - std::size_t dimensions = prior.dimension(); - TSizeDoublePr10Vec condition(dimensions - 1); - for (std::size_t i = 0u, j = 0u; i < dimensions; ++i) + if (weight >= 1.0) { - if (i != dimension) - { - condition[j++] = std::make_pair(i, value[i]); - } + return 1.0; } - boost::shared_ptr conditional( - prior.univariate(NOTHING_TO_MARGINALIZE, condition).first); - return computeWinsorisationWeight(*conditional, derate, scale, value[dimension]); + + double logw{std::log(std::max(weight, MINIMUM_TAIL_WINSORISATION_WEIGHT))}; + return std::exp(0.5 * ( LOG_WINSORISED_FRACTION + - std::sqrt( CTools::pow2(LOG_WINSORISED_FRACTION) + + 4.0 * logw / LOG_MINIMUM_TAIL_WINSORISATION_WEIGHT + * LOG_MINIMUM_WEIGHT_FRACTION + * (LOG_MINIMUM_WEIGHT_FRACTION - LOG_WINSORISED_FRACTION)))); } //! Optionally randomly sample from \p indices. @@ -200,6 +151,20 @@ TOptionalSize randomlySample(CPRNG::CXorOShiro128Plus &rng, return TOptionalSize{}; } +//! Computes a Winsorisation weight based on the chance that the +//! time series is currently undergoing a change. +double changeWinsorisationWeight(const TChangeDetectorPtr &detector) +{ + if (detector != nullptr) + { + std::size_t dummy; + return std::max(CTools::logisticFunction( + detector->decisionFunction(dummy), 0.1, 1.0, -1.0), + MINIMUM_CHANGE_WINSORISATION_WEIGHT); + } + return 1.0; +} + //! Convert \p value to comma separated string. std::string toDelimited(const TTimeDoublePr &value) { @@ -231,8 +196,7 @@ const std::string ANOMALY_MODEL_6_3_TAG{"h"}; const std::string SLIDING_WINDOW_6_3_TAG{"i"}; const std::string CANDIDATE_CHANGE_POINT_6_3_TAG{"j"}; const std::string CURRENT_CHANGE_INTERVAL_6_3_TAG{"k"}; -const std::string TIME_OF_LAST_CHANGE_POINT_6_3_TAG{"l"}; -const std::string CHANGE_DETECTOR_6_3_TAG{"m"}; +const std::string CHANGE_DETECTOR_6_3_TAG{"l"}; // Version < 6.3 const std::string ID_OLD_TAG{"a"}; const std::string CONTROLLER_OLD_TAG{"b"}; @@ -267,6 +231,81 @@ namespace forecast const std::string INFO_INSUFFICIENT_HISTORY("Insufficient history to forecast"); const std::string ERROR_MULTIVARIATE("Forecast not supported for multivariate features"); } + +} + +double tailWinsorisationWeight(const CPrior &prior, + double derate, + double scale, + double value) +{ + double deratedMinimumWeight{deratedMinimumWinsorisationWeight(derate)}; + + double lowerBound; + double upperBound; + if (!prior.minusLogJointCdf(CConstantWeights::SEASONAL_VARIANCE, + {value}, {{scale}}, lowerBound, upperBound)) + { + return 1.0; + } + if ( upperBound < MINUS_LOG_TOLERANCE + && !prior.minusLogJointCdfComplement(CConstantWeights::SEASONAL_VARIANCE, + {value}, {{scale}}, lowerBound, upperBound)) + { + return 1.0; + } + + double f{std::exp(-(lowerBound + upperBound) / 2.0)}; + f = std::min(f, 1.0 - f); + if (f >= WINSORISED_FRACTION) + { + return 1.0; + } + if (f <= MINIMUM_WINSORISATION_WEIGHT_FRACTION) + { + return deratedMinimumWeight; + } + + // We interpolate between 1.0 and the minimum weight on the + // interval [WINSORISED_FRACTION, MINIMUM_WEIGHT_FRACTION] + // by fitting (f / WF)^(-c log(f)) where WF is the Winsorised + // fraction and c is determined by solving: + // MW = (MWF / WF)^(-c log(MWF)) + + double deratedExponent{ -std::log(deratedMinimumWeight) + / LOG_MINIMUM_WEIGHT_FRACTION + / (LOG_MINIMUM_WEIGHT_FRACTION - LOG_WINSORISED_FRACTION)}; + double logf{std::log(f)}; + double result{std::exp(-deratedExponent * logf * (logf - LOG_WINSORISED_FRACTION))}; + + if (CMathsFuncs::isNan(result)) + { + return 1.0; + } + + LOG_TRACE("sample = " << value << " min(F, 1-F) = " << f << ", weight = " << result); + + return result; +} + +double tailWinsorisationWeight(const CMultivariatePrior &prior, + std::size_t dimension, + double derate, + double scale, + const core::CSmallVector &value) +{ + std::size_t dimensions = prior.dimension(); + TSizeDoublePr10Vec condition(dimensions - 1); + for (std::size_t i = 0u, j = 0u; i < dimensions; ++i) + { + if (i != dimension) + { + condition[j++] = std::make_pair(i, value[i]); + } + } + boost::shared_ptr conditional( + prior.univariate(NOTHING_TO_MARGINALIZE, condition).first); + return tailWinsorisationWeight(*conditional, derate, scale, value[dimension]); } //! \brief A model of anomalous sections of a time series. @@ -649,7 +688,6 @@ CUnivariateTimeSeriesModel::CUnivariateTimeSeriesModel(const CModelParams ¶m params.decayRate()) : TAnomalyModelPtr()), m_CurrentChangeInterval(0), - m_TimeOfLastChangePoint(0), m_SlidingWindow(SLIDING_WINDOW_SIZE), m_Correlations(0) { @@ -772,6 +810,10 @@ CUnivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams ¶ms, m_IsNonNegative = params.isNonNegative(); + maths_t::EDataType type{params.type()}; + m_ResidualModel->dataType(type); + m_TrendModel->dataType(type); + result = CModel::combine(result, this->updateTrend(params.weightStyles(), samples, params.trendWeights())); @@ -786,9 +828,6 @@ CUnivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams ¶ms, return samples[lhs].second < samples[rhs].second; }); - maths_t::EDataType type{params.type()}; - m_ResidualModel->dataType(type); - TDouble1Vec samples_; TDouble4Vec1Vec weights_; samples_.reserve(samples.size()); @@ -1243,7 +1282,8 @@ CUnivariateTimeSeriesModel::winsorisationWeight(double derate, { double scale{this->seasonalWeight(0.0, time)[0]}; double sample{m_TrendModel->detrend(time, value[0], 0.0)}; - return {computeWinsorisationWeight(*m_ResidualModel, derate, scale, sample)}; + return { tailWinsorisationWeight(*m_ResidualModel, derate, scale, sample) + * changeWinsorisationWeight(m_ChangeDetector)}; } CUnivariateTimeSeriesModel::TDouble2Vec @@ -1261,7 +1301,6 @@ uint64_t CUnivariateTimeSeriesModel::checksum(uint64_t seed) const seed = CChecksum::calculate(seed, m_ResidualModel); seed = CChecksum::calculate(seed, m_CandidateChangePoint); seed = CChecksum::calculate(seed, m_CurrentChangeInterval); - seed = CChecksum::calculate(seed, m_TimeOfLastChangePoint); seed = CChecksum::calculate(seed, m_ChangeDetector); seed = CChecksum::calculate(seed, m_AnomalyModel); seed = CChecksum::calculate(seed, m_SlidingWindow); @@ -1320,7 +1359,6 @@ bool CUnivariateTimeSeriesModel::acceptRestoreTraverser(const SModelRestoreParam /**/) RESTORE(CANDIDATE_CHANGE_POINT_6_3_TAG, fromDelimited(traverser.value(), m_CandidateChangePoint)) RESTORE_BUILT_IN(CURRENT_CHANGE_INTERVAL_6_3_TAG, m_CurrentChangeInterval) - RESTORE_BUILT_IN(TIME_OF_LAST_CHANGE_POINT_6_3_TAG, m_TimeOfLastChangePoint) RESTORE_SETUP_TEARDOWN(CHANGE_DETECTOR_6_3_TAG, m_ChangeDetector = boost::make_shared( m_TrendModel, m_ResidualModel), @@ -1383,7 +1421,6 @@ void CUnivariateTimeSeriesModel::acceptPersistInserter(core::CStatePersistInsert boost::cref(*m_ResidualModel), _1)); inserter.insertValue(CANDIDATE_CHANGE_POINT_6_3_TAG, toDelimited(m_CandidateChangePoint)); inserter.insertValue(CURRENT_CHANGE_INTERVAL_6_3_TAG, m_CurrentChangeInterval); - inserter.insertValue(TIME_OF_LAST_CHANGE_POINT_6_3_TAG, m_TimeOfLastChangePoint); if (m_ChangeDetector != nullptr) { inserter.insertLevel(CHANGE_DETECTOR_6_3_TAG, boost::bind( @@ -1462,7 +1499,6 @@ CUnivariateTimeSeriesModel::CUnivariateTimeSeriesModel(const CUnivariateTimeSeri TAnomalyModelPtr()), m_CandidateChangePoint(other.m_CandidateChangePoint), m_CurrentChangeInterval(other.m_CurrentChangeInterval), - m_TimeOfLastChangePoint(other.m_TimeOfLastChangePoint), m_ChangeDetector(!isForForecast && other.m_ChangeDetector ? boost::make_shared(*other.m_ChangeDetector) : TChangeDetectorPtr()), @@ -1486,15 +1522,18 @@ CUnivariateTimeSeriesModel::testAndApplyChange(const CModelAddSamplesParams &par if (m_ChangeDetector == nullptr) { - if (maths_t::winsorisationWeight(params.weightStyles(), {weights}) < 1.0) + core_t::TTime minimumTimeToDetect{this->params().minimumTimeToDetectChange()}; + core_t::TTime maximumTimeToTest{this->params().maximumTimeToTestForChange()}; + double weight{maths_t::winsorisationWeight(params.weightStyles(), {weights})}; + if ( minimumTimeToDetect < maximumTimeToTest + && pValueFromTailWinsorisationWeight(weight) <= 1e-5) { m_CurrentChangeInterval += this->params().bucketLength(); if (this->params().testForChange(m_CurrentChangeInterval)) { m_ChangeDetector = boost::make_shared( m_TrendModel, m_ResidualModel, - this->params().minimumTimeToDetectChange(time - m_TimeOfLastChangePoint), - this->params().maximumTimeToTestForChange()); + minimumTimeToDetect, maximumTimeToTest); m_CurrentChangeInterval = 0; } } @@ -1518,7 +1557,6 @@ CUnivariateTimeSeriesModel::testAndApplyChange(const CModelAddSamplesParams &par { LOG_DEBUG("Detected " << change->print() << " at " << values[median].first); m_ChangeDetector.reset(); - m_TimeOfLastChangePoint = time; return this->applyChange(*change); } } @@ -1536,15 +1574,25 @@ CUnivariateTimeSeriesModel::applyChange(const SChangeDescription &change) case SChangeDescription::E_LevelShift: value.second += change.s_Value[0]; break; + case SChangeDescription::E_LinearScale: + value.second *= change.s_Value[0]; + break; case SChangeDescription::E_TimeShift: value.first += static_cast(change.s_Value[0]); break; } } - m_TrendModel->applyChange(m_CandidateChangePoint.first, m_CandidateChangePoint.second, change); - change.s_ResidualModel->decayRate(m_ResidualModel->decayRate()); - m_ResidualModel = change.s_ResidualModel; + if (m_TrendModel->applyChange(m_CandidateChangePoint.first, + m_CandidateChangePoint.second, change)) + { + this->reinitializeStateGivenNewComponent(); + } + else + { + change.s_ResidualModel->decayRate(m_ResidualModel->decayRate()); + m_ResidualModel = change.s_ResidualModel; + } return E_Success; } @@ -2270,6 +2318,13 @@ CMultivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams ¶ms, m_IsNonNegative = params.isNonNegative(); + maths_t::EDataType type{params.type()}; + m_ResidualModel->dataType(type); + for (auto &trendModel : m_TrendModel) + { + trendModel->dataType(type); + } + std::size_t dimension{this->dimension()}; EUpdateResult result{this->updateTrend(params.weightStyles(), samples, params.trendWeights())}; @@ -2295,9 +2350,6 @@ CMultivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams ¶ms, return samples[lhs].second < samples[rhs].second; }); - maths_t::EDataType type{params.type()}; - m_ResidualModel->dataType(type); - TDouble10Vec1Vec samples_; TDouble10Vec4Vec1Vec weights_; samples_.reserve(samples.size()); @@ -2650,7 +2702,7 @@ CMultivariateTimeSeriesModel::winsorisationWeight(double derate, for (std::size_t d = 0u; d < dimension; ++d) { - result[d] = computeWinsorisationWeight(*m_ResidualModel, d, derate, scale[d], sample); + result[d] = tailWinsorisationWeight(*m_ResidualModel, d, derate, scale[d], sample); } return result; diff --git a/lib/maths/CTrendComponent.cc b/lib/maths/CTrendComponent.cc index 70cb6825be..a661cb61a8 100644 --- a/lib/maths/CTrendComponent.cc +++ b/lib/maths/CTrendComponent.cc @@ -244,15 +244,7 @@ void CTrendComponent::shiftLevel(core_t::TTime time, double value, double shift) double dt{static_cast(time - m_TimeOfLastLevelChange)}; m_ProbabilityOfLevelChangeModel.addTrainingDataPoint(LEVEL_CHANGE_LABEL, {{dt}, {value}}); } - // The magic 1.2 is due to the fact that the trend is updated - // with new values during change detection. As a result, we - // purposely reduce the step size since the shift applied after - // change detection is biased and otherwise too large. For the - // purpose of modeling step changes we want an unbiased estimate - // of the step size, including any adaption in the trend during - // the detection period. This is an empirical estimate of the - // degree of bias across a range of step changes. - m_MagnitudeOfLevelChangeModel.addSamples({maths_t::E_SampleCountWeight}, {1.2 * shift}, {{1.0}}); + m_MagnitudeOfLevelChangeModel.addSamples({maths_t::E_SampleCountWeight}, {shift}, {{1.0}}); m_TimeOfLastLevelChange = time; } @@ -265,6 +257,14 @@ void CTrendComponent::dontShiftLevel(core_t::TTime time, double value) } } +void CTrendComponent::linearScale(double scale) +{ + for (auto &model : m_TrendModels) + { + model.s_Regression.linearScale(scale); + } +} + void CTrendComponent::add(core_t::TTime time, double value, double weight) { // Update the model weights: we weight the components based on the @@ -310,6 +310,7 @@ void CTrendComponent::add(core_t::TTime time, double value, double weight) void CTrendComponent::dataType(maths_t::EDataType dataType) { m_ProbabilityOfLevelChangeModel.dataType(dataType); + m_MagnitudeOfLevelChangeModel.dataType(dataType); } double CTrendComponent::defaultDecayRate() const diff --git a/lib/maths/MathsTypes.cc b/lib/maths/MathsTypes.cc index 9c20be452b..3403164cd3 100644 --- a/lib/maths/MathsTypes.cc +++ b/lib/maths/MathsTypes.cc @@ -460,8 +460,42 @@ bool hasCountVarianceScale(const TWeightStyleVec &weightStyles, return false; } +void setWeight(ESampleWeightStyle style, + double weight, + TWeightStyleVec &weightStyles, + TDouble4Vec &weights) +{ + std::ptrdiff_t i{std::find(weightStyles.begin(), + weightStyles.end(), style) - weightStyles.begin()}; + if (static_cast(i) < weightStyles.size()) + { + weights[i] = weight; + } + else + { + weightStyles.push_back(style); + weights.push_back(weight); + } } -} - +void setWeight(ESampleWeightStyle style, + double weight, + std::size_t dimension, + TWeightStyleVec &weightStyles, + TDouble10Vec4Vec &weights) +{ + std::ptrdiff_t i{std::find(weightStyles.begin(), + weightStyles.end(), style) - weightStyles.begin()}; + if (static_cast(i) < weightStyles.size()) + { + weights[i].assign(dimension, weight); + } + else + { + weightStyles.push_back(style); + weights.push_back(TDouble10Vec(dimension, weight)); + } +} +} +} diff --git a/lib/maths/unittest/CModelTest.cc b/lib/maths/unittest/CModelTest.cc index 3e4ba44b7c..de52efe397 100644 --- a/lib/maths/unittest/CModelTest.cc +++ b/lib/maths/unittest/CModelTest.cc @@ -37,8 +37,7 @@ void CModelTest::testAll(void) CPPUNIT_ASSERT_EQUAL(0.0, params.probabilityBucketEmpty()); params.probabilityBucketEmpty(0.2); CPPUNIT_ASSERT_EQUAL(0.2, params.probabilityBucketEmpty()); - CPPUNIT_ASSERT_EQUAL(6 * core::constants::HOUR, - params.minimumTimeToDetectChange(2 * core::constants::DAY)); + CPPUNIT_ASSERT_EQUAL(6 * core::constants::HOUR, params.minimumTimeToDetectChange()); CPPUNIT_ASSERT_EQUAL(core::constants::DAY, params.maximumTimeToTestForChange()); } { diff --git a/lib/maths/unittest/CRegressionTest.cc b/lib/maths/unittest/CRegressionTest.cc index cf2790139a..40b00b82ea 100644 --- a/lib/maths/unittest/CRegressionTest.cc +++ b/lib/maths/unittest/CRegressionTest.cc @@ -370,6 +370,50 @@ void CRegressionTest::testShiftGradient(void) CPPUNIT_ASSERT_DOUBLES_EQUAL( params1[3], params2[3], 1e-6 * ::fabs(params1[3])); } +void CRegressionTest::testLinearScale(void) +{ + LOG_DEBUG("+------------------------------------+"); + LOG_DEBUG("| CRegressionTest::testLinearScale |"); + LOG_DEBUG("+------------------------------------+"); + + // Test that linearly scaling a regression linearly + // scales all the parameters. + + maths::CRegression::CLeastSquaresOnline<3, double> regression; + for (double x = 0.0; x < 100.0; x += 1.0) + { + regression.add(x, 0.01 * x * x * x - 0.2 * x * x + 1.0 * x + 10.0); + } + + TDoubleArray4 params1; + regression.parameters(params1); + + regression.linearScale(0.1); + + TDoubleArray4 params2; + regression.parameters(params2); + + LOG_DEBUG("parameters 1 = " << core::CContainerPrinter::print(params1)); + LOG_DEBUG("parameters 2 = " << core::CContainerPrinter::print(params2)); + + for (std::size_t i = 0u; i < 4; ++i) + { + CPPUNIT_ASSERT_DOUBLES_EQUAL(0.1 * params1[i], params2[i], 1e-6); + } + + regression.linearScale(100.0); + + regression.parameters(params2); + + LOG_DEBUG("parameters 1 = " << core::CContainerPrinter::print(params1)); + LOG_DEBUG("parameters 2 = " << core::CContainerPrinter::print(params2)); + + for (std::size_t i = 0u; i < 4; ++i) + { + CPPUNIT_ASSERT_DOUBLES_EQUAL(10.0 * params1[i], params2[i], 1e-6); + } +} + void CRegressionTest::testAge(void) { LOG_DEBUG("+----------------------------+"); @@ -1178,6 +1222,9 @@ CppUnit::Test *CRegressionTest::suite(void) suiteOfTests->addTest( new CppUnit::TestCaller( "CRegressionTest::testShiftGradient", &CRegressionTest::testShiftGradient) ); + suiteOfTests->addTest( new CppUnit::TestCaller( + "CRegressionTest::testLinearScale", + &CRegressionTest::testLinearScale) ); suiteOfTests->addTest( new CppUnit::TestCaller( "CRegressionTest::testAge", &CRegressionTest::testAge) ); diff --git a/lib/maths/unittest/CRegressionTest.h b/lib/maths/unittest/CRegressionTest.h index 4ad012470e..6f26d2e42b 100644 --- a/lib/maths/unittest/CRegressionTest.h +++ b/lib/maths/unittest/CRegressionTest.h @@ -17,6 +17,7 @@ class CRegressionTest : public CppUnit::TestFixture void testShiftAbscissa(void); void testShiftOrdinate(void); void testShiftGradient(void); + void testLinearScale(void); void testAge(void); void testPrediction(void); void testCombination(void); diff --git a/lib/maths/unittest/CTimeSeriesChangeDetectorTest.cc b/lib/maths/unittest/CTimeSeriesChangeDetectorTest.cc index 83d116aed0..93c852be9b 100644 --- a/lib/maths/unittest/CTimeSeriesChangeDetectorTest.cc +++ b/lib/maths/unittest/CTimeSeriesChangeDetectorTest.cc @@ -126,7 +126,7 @@ void CTimeSeriesChangeDetectorTest::testNoChange() maths::CUnivariateTimeSeriesChangeDetector detector{trendModel, residualModel, 6 * core::constants::HOUR, - 24 * core::constants::HOUR, 12.0}; + 24 * core::constants::HOUR, 14.0}; for (std::size_t i = 950u; i < samples.size(); ++i) { addSampleToModel(time, samples[i]); @@ -159,6 +159,22 @@ void CTimeSeriesChangeDetectorTest::testLevelShift() }, 5.0, 15.0); } +void CTimeSeriesChangeDetectorTest::testLinearScale() +{ + LOG_DEBUG("+--------------------------------------------------+"); + LOG_DEBUG("| CTimeSeriesChangeDetectorTest::testLinearScale |"); + LOG_DEBUG("+--------------------------------------------------+"); + + TGeneratorVec trends{smoothDaily, spikeyDaily}; + + this->testChange(trends, + maths::SChangeDescription::E_LinearScale, + [](TGenerator trend, core_t::TTime time) + { + return 3.0 * trend(time); + }, 3.0, 15.0); +} + void CTimeSeriesChangeDetectorTest::testTimeShift() { LOG_DEBUG("+------------------------------------------------+"); @@ -260,6 +276,9 @@ CppUnit::Test *CTimeSeriesChangeDetectorTest::suite() suiteOfTests->addTest( new CppUnit::TestCaller( "CTimeSeriesChangeDetectorTest::testLevelShift", &CTimeSeriesChangeDetectorTest::testLevelShift) ); + suiteOfTests->addTest( new CppUnit::TestCaller( + "CTimeSeriesChangeDetectorTest::testLinearScale", + &CTimeSeriesChangeDetectorTest::testLinearScale) ); suiteOfTests->addTest( new CppUnit::TestCaller( "CTimeSeriesChangeDetectorTest::testTimeShift", &CTimeSeriesChangeDetectorTest::testTimeShift) ); @@ -314,7 +333,7 @@ void CTimeSeriesChangeDetectorTest::testChange(const TGeneratorVec &trends, maths::CUnivariateTimeSeriesChangeDetector detector{trendModel, residualModel, 6 * core::constants::HOUR, - 24 * core::constants::HOUR, 12.0}; + 24 * core::constants::HOUR, 14.0}; TOptionalSize bucketsToDetect; for (std::size_t i = 950u; i < samples.size(); ++i) diff --git a/lib/maths/unittest/CTimeSeriesChangeDetectorTest.h b/lib/maths/unittest/CTimeSeriesChangeDetectorTest.h index c907b1050f..c3fc9acb02 100644 --- a/lib/maths/unittest/CTimeSeriesChangeDetectorTest.h +++ b/lib/maths/unittest/CTimeSeriesChangeDetectorTest.h @@ -18,6 +18,7 @@ class CTimeSeriesChangeDetectorTest : public CppUnit::TestFixture public: void testNoChange(); void testLevelShift(); + void testLinearScale(); void testTimeShift(); void testPersist(); diff --git a/lib/maths/unittest/CTimeSeriesModelTest.cc b/lib/maths/unittest/CTimeSeriesModelTest.cc index b4c1353eb9..e688fc092d 100644 --- a/lib/maths/unittest/CTimeSeriesModelTest.cc +++ b/lib/maths/unittest/CTimeSeriesModelTest.cc @@ -25,6 +25,9 @@ #include #include +#include + +#include "TestUtils.h" #include @@ -34,20 +37,14 @@ using namespace ml; namespace { +using namespace handy_typedefs; using TBool2Vec = core::CSmallVector; using TDoubleVec = std::vector; using TDoubleVecVec = std::vector; -using TDouble1Vec = core::CSmallVector; using TDouble2Vec = core::CSmallVector; -using TDouble4Vec = core::CSmallVector; -using TDouble10Vec = core::CSmallVector; using TDouble2Vec1Vec = core::CSmallVector; using TDouble2Vec4Vec = core::CSmallVector; -using TDouble4Vec1Vec = core::CSmallVector; -using TDouble10Vec1Vec = core::CSmallVector; -using TDouble10Vec4Vec = core::CSmallVector; using TDouble2Vec4VecVec = std::vector; -using TDouble10Vec4Vec1Vec = core::CSmallVector; using TSize1Vec = core::CSmallVector; using TTime2Vec = core::CSmallVector; using TTime2Vec1Vec = core::CSmallVector; @@ -111,7 +108,7 @@ maths::CModelParams modelParams(core_t::TTime bucketLength) return maths::CModelParams{bucketLength, learnRates[bucketLength], DECAY_RATE, minimumSeasonalVarianceScale, - 6 * core::constants::HOUR, core::constants::DAY}; + 12 * core::constants::HOUR, core::constants::DAY}; } maths::CModelAddSamplesParams addSampleParams(double interval, @@ -143,40 +140,40 @@ maths::CModelProbabilityParams computeProbabilityParams(const TDouble2Vec4Vec &w return params; } -maths::CNormalMeanPrecConjugate univariateNormal(void) +maths::CNormalMeanPrecConjugate univariateNormal(double decayRate = DECAY_RATE) { - return maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, DECAY_RATE); + return maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, decayRate); } -maths::CLogNormalMeanPrecConjugate univariateLogNormal(void) +maths::CLogNormalMeanPrecConjugate univariateLogNormal(double decayRate = DECAY_RATE) { - return maths::CLogNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.0, DECAY_RATE); + return maths::CLogNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.0, decayRate); } -maths::CMultimodalPrior univariateMultimodal(void) +maths::CMultimodalPrior univariateMultimodal(double decayRate = DECAY_RATE) { maths::CXMeansOnline1d clusterer{maths_t::E_ContinuousData, maths::CAvailableModeDistributions::ALL, maths_t::E_ClustersFractionWeight, - DECAY_RATE}; - return maths::CMultimodalPrior{maths_t::E_ContinuousData, clusterer, univariateNormal(), DECAY_RATE}; + decayRate}; + return maths::CMultimodalPrior{maths_t::E_ContinuousData, clusterer, univariateNormal(), decayRate}; } -maths::CMultivariateNormalConjugate<3> multivariateNormal(void) +maths::CMultivariateNormalConjugate<3> multivariateNormal(double decayRate = DECAY_RATE) { - return maths::CMultivariateNormalConjugate<3>::nonInformativePrior(maths_t::E_ContinuousData, DECAY_RATE); + return maths::CMultivariateNormalConjugate<3>::nonInformativePrior(maths_t::E_ContinuousData, decayRate); } -maths::CMultivariateMultimodalPrior<3> multivariateMultimodal(void) +maths::CMultivariateMultimodalPrior<3> multivariateMultimodal(double decayRate = DECAY_RATE) { maths::CXMeansOnline clusterer(maths_t::E_ContinuousData, maths_t::E_ClustersFractionWeight, - DECAY_RATE); + decayRate); return maths::CMultivariateMultimodalPrior<3>( maths_t::E_ContinuousData, clusterer, - maths::CMultivariateNormalConjugate<3>::nonInformativePrior(maths_t::E_ContinuousData, DECAY_RATE), - DECAY_RATE); + maths::CMultivariateNormalConjugate<3>::nonInformativePrior(maths_t::E_ContinuousData, decayRate), + decayRate); } maths::CUnivariateTimeSeriesModel::TDecayRateController2Ary decayRateControllers(std::size_t dimension) @@ -1470,7 +1467,7 @@ void CTimeSeriesModelTest::testWeights(void) double lastWeight = 1.0; for (std::size_t i = 0u; i < 10; ++i) { - double weight_{model.winsorisationWeight(1.0, time, prediction)[0]}; + double weight_{model.winsorisationWeight(0.0, time, prediction)[0]}; LOG_DEBUG("weight = " << weight_); CPPUNIT_ASSERT(weight_ <= lastWeight); lastWeight = weight_; @@ -1533,7 +1530,7 @@ void CTimeSeriesModelTest::testWeights(void) double lastWeight = 1.0; for (std::size_t i = 0u; i < 10; ++i) { - double weight_{model.winsorisationWeight(1.0, time, prediction)[0]}; + double weight_{model.winsorisationWeight(0.0, time, prediction)[0]}; LOG_DEBUG("weight = " << weight_); CPPUNIT_ASSERT(weight_ <= lastWeight); lastWeight = weight_; @@ -2105,8 +2102,8 @@ void CTimeSeriesModelTest::testStepChangeDiscontinuities(void) LOG_DEBUG("+-------------------------------------------------------+"); // Check detection and modelling of step changes in data with - // 1) Piecewise constant - // 2) Saw tooth + // 1) Piecewise constant, + // 2) Saw tooth. using TDouble3Vec = core::CSmallVector; using TDouble3VecVec = std::vector; @@ -2114,7 +2111,7 @@ void CTimeSeriesModelTest::testStepChangeDiscontinuities(void) TDouble2Vec4VecVec weight{{{1.0}}}; auto updateModel = [&](core_t::TTime time, double value, maths::CUnivariateTimeSeriesModel &model) { - weight[0][0] = model.winsorisationWeight(1.0, time, {value}); + weight[0][0] = model.winsorisationWeight(0.0, time, {value}); model.addSamples(addSampleParams(1.0, {maths_t::E_SampleWinsorisationWeight}, weight), {core::make_triple(time, TDouble2Vec{value}, TAG)}); }; @@ -2122,7 +2119,8 @@ void CTimeSeriesModelTest::testStepChangeDiscontinuities(void) //std::ostringstream actual, modelBounds; //actual << "r = ["; //modelBounds << "x = ["; - //auto updateTestDebug = [&](core_t::TTime time, double value, const maths::CUnivariateTimeSeriesModel &model) + //auto updateTestDebug = [&](core_t::TTime time, double value, + // const maths::CUnivariateTimeSeriesModel &model) // { // actual << value << std::endl; // auto x = model.confidenceInterval(time, 90.0, {maths_t::E_SampleCountWeight}, {{1.0}}); @@ -2140,7 +2138,8 @@ void CTimeSeriesModelTest::testStepChangeDiscontinuities(void) maths::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; auto controllers = decayRateControllers(1); maths::CUnivariateTimeSeriesModel model{modelParams(bucketLength), 0, - trend, univariateNormal(), &controllers}; + trend, univariateNormal(DECAY_RATE / 3.0), + &controllers}; // Add some data to the model. @@ -2297,17 +2296,216 @@ void CTimeSeriesModelTest::testStepChangeDiscontinuities(void) } double percentageOutOfBounds{100.0 * outOfBounds / static_cast(forecast.size())}; LOG_DEBUG("% out-of-bounds = " << percentageOutOfBounds); - CPPUNIT_ASSERT(percentageOutOfBounds < 11.0); + CPPUNIT_ASSERT(percentageOutOfBounds < 5.0); } } -void CTimeSeriesModelTest::daylightSaving(void) +void CTimeSeriesModelTest::testLinearScaling(void) { - LOG_DEBUG("+----------------------------------------+"); - LOG_DEBUG("| CTimeSeriesModelTest::daylightSaving |"); - LOG_DEBUG("+----------------------------------------+"); + LOG_DEBUG("+-------------------------------------------+"); + LOG_DEBUG("| CTimeSeriesModelTest::testLinearScaling |"); + LOG_DEBUG("+-------------------------------------------+"); + + // We test that the predictions are good and the bounds do not + // blow up after we: + // 1) linearly scale down a periodic pattern, + // 2) linearly scale up the same periodic pattern. + + TDouble2Vec4VecVec weight{{{1.0}}}; + auto updateModel = [&](core_t::TTime time, double value, maths::CUnivariateTimeSeriesModel &model) + { + weight[0][0] = model.winsorisationWeight(0.0, time, {value}); + model.addSamples(addSampleParams(1.0, {maths_t::E_SampleWinsorisationWeight}, weight), + {core::make_triple(time, TDouble2Vec{value}, TAG)}); + }; - // TODO + //std::ostringstream actual, modelBounds; + //actual << "r = ["; + //modelBounds << "x = ["; + //auto updateTestDebug = [&](core_t::TTime time, double value, + // const maths::CUnivariateTimeSeriesModel &model) + // { + // actual << value << std::endl; + // auto x = model.confidenceInterval(time, 90.0, {maths_t::E_SampleCountWeight}, {{1.0}}); + // if (x.size() == 3) + // { + // modelBounds << x[0][0] << "," << x[1][0] << "," << x[2][0] << std::endl; + // } + // }; + + test::CRandomNumbers rng; + + double noiseVariance{3.0}; + + core_t::TTime bucketLength{600}; + maths::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; + auto controllers = decayRateControllers(1); + maths::CUnivariateTimeSeriesModel model{modelParams(bucketLength), 0, + trend, univariateNormal(DECAY_RATE / 3.0), + &controllers}; + + core_t::TTime time{0}; + TDoubleVec samples; + rng.generateNormalSamples(0.0, noiseVariance, 1000, samples); + for (auto sample : samples) + { + sample += 12.0 + 10.0 * smoothDaily(time); + updateModel(time, sample, model); + //updateTestDebug(time, sample, model); + time += bucketLength; + } + + // Scale by 0.3 + + rng.generateNormalSamples(0.0, noiseVariance, 200, samples); + for (auto sample : samples) + { + sample = 0.3 * (12.0 + 10.0 * smoothDaily(time) + sample); + updateModel(time, sample, model); + //updateTestDebug(time, sample, model); + time += bucketLength; + } + rng.generateNormalSamples(0.0, noiseVariance, 1500, samples); + for (auto sample : samples) + { + sample = 0.3 * (12.0 + 10.0 * smoothDaily(time) + sample); + updateModel(time, sample, model); + //updateTestDebug(time, sample, model); + auto x = model.confidenceInterval(time, 90.0, {maths_t::E_SampleCountWeight}, {{1.0}}); + CPPUNIT_ASSERT(::fabs(sample - x[1][0]) < 1.2 * std::sqrt(noiseVariance)); + CPPUNIT_ASSERT(::fabs(x[2][0] - x[0][0]) < 3.3 * std::sqrt(noiseVariance)); + time += bucketLength; + } + + // Scale by 2 / 0.3 + + rng.generateNormalSamples(0.0, noiseVariance, 200, samples); + for (auto sample : samples) + { + sample = 2.0 * (12.0 + 10.0 * smoothDaily(time)) + sample; + updateModel(time, sample, model); + //updateTestDebug(time, sample, model); + time += bucketLength; + } + rng.generateNormalSamples(0.0, noiseVariance, 400, samples); + for (auto sample : samples) + { + sample = 2.0 * (12.0 + 10.0 * smoothDaily(time)) + sample; + updateModel(time, sample, model); + //updateTestDebug(time, sample, model); + auto x = model.confidenceInterval(time, 90.0, {maths_t::E_SampleCountWeight}, {{1.0}}); + CPPUNIT_ASSERT(::fabs(sample - x[1][0]) < 3.1 * std::sqrt(noiseVariance)); + CPPUNIT_ASSERT(::fabs(x[2][0] - x[0][0]) < 3.3 * std::sqrt(noiseVariance)); + time += bucketLength; + } + + //std::ofstream file; + //file.open("bounds.m"); + //file << actual.str() << "];"; + //file << modelBounds.str() << "];"; +} + +void CTimeSeriesModelTest::testDaylightSaving(void) +{ + LOG_DEBUG("+--------------------------------------------+"); + LOG_DEBUG("| CTimeSeriesModelTest::testDaylightSaving |"); + LOG_DEBUG("+--------------------------------------------+"); + + TDouble2Vec4VecVec weight{{{1.0}}}; + auto updateModel = [&](core_t::TTime time, double value, maths::CUnivariateTimeSeriesModel &model) + { + weight[0][0] = model.winsorisationWeight(0.0, time, {value}); + model.addSamples(addSampleParams(1.0, {maths_t::E_SampleWinsorisationWeight}, weight), + {core::make_triple(time, TDouble2Vec{value}, TAG)}); + }; + + //std::ostringstream actual, modelBounds; + //actual << "r = ["; + //modelBounds << "x = ["; + //auto updateTestDebug = [&](core_t::TTime time, double value, + // const maths::CUnivariateTimeSeriesModel &model) + // { + // actual << value << std::endl; + // auto x = model.confidenceInterval(time, 90.0, {maths_t::E_SampleCountWeight}, {{1.0}}); + // if (x.size() == 3) + // { + // modelBounds << x[0][0] << "," << x[1][0] << "," << x[2][0] << std::endl; + // } + // }; + + test::CRandomNumbers rng; + + core_t::TTime hour{core::constants::HOUR}; + double noiseVariance{0.36}; + + core_t::TTime bucketLength{600}; + maths::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; + auto controllers = decayRateControllers(1); + maths::CUnivariateTimeSeriesModel model{modelParams(bucketLength), 0, + trend, univariateNormal(DECAY_RATE / 3.0), + &controllers}; + + core_t::TTime time{0}; + TDoubleVec samples; + rng.generateNormalSamples(0.0, noiseVariance, 1000, samples); + for (auto sample : samples) + { + sample += 12.0 + 10.0 * smoothDaily(time); + updateModel(time, sample, model); + //updateTestDebug(time, sample, model); + time += bucketLength; + } + + // Shift by +1 hr. + + rng.generateNormalSamples(0.0, noiseVariance, 200, samples); + for (auto sample : samples) + { + sample += 12.0 + 10.0 * smoothDaily(time + hour); + updateModel(time, sample, model); + //updateTestDebug(time, sample, model); + time += bucketLength; + } + rng.generateNormalSamples(0.0, noiseVariance, 1500, samples); + for (auto sample : samples) + { + sample += 12.0 + 10.0 * smoothDaily(time + hour); + updateModel(time, sample, model); + //updateTestDebug(time, sample, model); + CPPUNIT_ASSERT_EQUAL(hour, model.trendModel().timeShift()); + auto x = model.confidenceInterval(time, 90.0, {maths_t::E_SampleCountWeight}, {{1.0}}); + CPPUNIT_ASSERT(::fabs(sample - x[1][0]) < 3.6 * std::sqrt(noiseVariance)); + CPPUNIT_ASSERT(::fabs(x[2][0] - x[0][0]) < 3.6 * std::sqrt(noiseVariance)); + time += bucketLength; + } + + // Shift by -1 hr. + + rng.generateNormalSamples(0.0, noiseVariance, 200, samples); + for (auto sample : samples) + { + sample += 12.0 + 10.0 * smoothDaily(time); + updateModel(time, sample, model); + //updateTestDebug(time, sample, model); + time += bucketLength; + } + rng.generateNormalSamples(0.0, noiseVariance, 400, samples); + for (auto sample : samples) + { + sample += 12.0 + 10.0 * smoothDaily(time); + updateModel(time, sample, model); + //updateTestDebug(time, sample, model); + CPPUNIT_ASSERT_EQUAL(core_t::TTime(0), model.trendModel().timeShift()); + auto x = model.confidenceInterval(time, 90.0, {maths_t::E_SampleCountWeight}, {{1.0}}); + CPPUNIT_ASSERT(::fabs(sample - x[1][0]) < 4.1 * std::sqrt(noiseVariance)); + CPPUNIT_ASSERT(::fabs(x[2][0] - x[0][0]) < 3.8 * std::sqrt(noiseVariance)); + time += bucketLength; + } + + //std::ofstream file; + //file.open("bounds.m"); + //file << actual.str() << "];"; + //file << modelBounds.str() << "];"; } CppUnit::Test *CTimeSeriesModelTest::suite(void) @@ -2356,6 +2554,12 @@ CppUnit::Test *CTimeSeriesModelTest::suite(void) suiteOfTests->addTest( new CppUnit::TestCaller( "CTimeSeriesModelTest::testStepChangeDiscontinuities", &CTimeSeriesModelTest::testStepChangeDiscontinuities) ); + suiteOfTests->addTest( new CppUnit::TestCaller( + "CTimeSeriesModelTest::testLinearScaling", + &CTimeSeriesModelTest::testLinearScaling) ); + suiteOfTests->addTest( new CppUnit::TestCaller( + "CTimeSeriesModelTest::testDaylightSaving", + &CTimeSeriesModelTest::testDaylightSaving) ); return suiteOfTests; } diff --git a/lib/maths/unittest/CTimeSeriesModelTest.h b/lib/maths/unittest/CTimeSeriesModelTest.h index 5a3568e878..a8543aad0c 100644 --- a/lib/maths/unittest/CTimeSeriesModelTest.h +++ b/lib/maths/unittest/CTimeSeriesModelTest.h @@ -26,7 +26,8 @@ class CTimeSeriesModelTest : public CppUnit::TestFixture void testProbabilityWithCorrelations(void); void testAnomalyModel(void); void testStepChangeDiscontinuities(void); - void daylightSaving(void); + void testLinearScaling(void); + void testDaylightSaving(void); static CppUnit::Test *suite(void); }; diff --git a/lib/model/CAnomalyDetectorModelConfig.cc b/lib/model/CAnomalyDetectorModelConfig.cc index 2da6321f18..448e19e86d 100644 --- a/lib/model/CAnomalyDetectorModelConfig.cc +++ b/lib/model/CAnomalyDetectorModelConfig.cc @@ -73,7 +73,7 @@ const double CAnomalyDetectorModelConfig::DEFAULT_MINIMUM_CLUSTER_SPLIT_COUNT(12 const double CAnomalyDetectorModelConfig::DEFAULT_CATEGORY_DELETE_FRACTION(0.8); const double CAnomalyDetectorModelConfig::DEFAULT_CUTOFF_TO_MODEL_EMPTY_BUCKETS(0.2); const std::size_t CAnomalyDetectorModelConfig::DEFAULT_COMPONENT_SIZE(36u); -const core_t::TTime CAnomalyDetectorModelConfig::DEFAULT_MINIMUM_TIME_TO_DETECT_CHANGE(6 * core::constants::HOUR); +const core_t::TTime CAnomalyDetectorModelConfig::DEFAULT_MINIMUM_TIME_TO_DETECT_CHANGE(12 * core::constants::HOUR); const core_t::TTime CAnomalyDetectorModelConfig::DEFAULT_MAXIMUM_TIME_TO_TEST_FOR_CHANGE(core::constants::DAY); const double CAnomalyDetectorModelConfig::DEFAULT_MAXIMUM_UPDATES_PER_BUCKET(1.0); const double CAnomalyDetectorModelConfig::DEFAULT_INFLUENCE_CUTOFF(0.5); diff --git a/lib/model/unittest/CEventRateModelTest.cc b/lib/model/unittest/CEventRateModelTest.cc index 2ae9ec8f83..fa65703346 100644 --- a/lib/model/unittest/CEventRateModelTest.cc +++ b/lib/model/unittest/CEventRateModelTest.cc @@ -585,7 +585,7 @@ void CEventRateModelTest::testOnlineRare(void) LOG_TRACE("origXml = " << origXml); LOG_DEBUG("size = " << origXml.size()); - CPPUNIT_ASSERT(origXml.size() < 21000); + CPPUNIT_ASSERT(origXml.size() < 22000); // Restore the XML into a new filter core::CRapidXmlParser parser; @@ -2876,10 +2876,11 @@ void CEventRateModelTest::testDecayRateControl(void) 0.05); } - LOG_DEBUG("*** Test step change ***"); + LOG_DEBUG("*** Test linear scaling ***"); { - // Test a step change in a stable signal is detected and we get a - // significant reduction in the prediction error. + // This change point is amongst those we explicitly detect so + // check we get similar detection performance with and without + // decay rate control. params.s_ControlDecayRate = true; params.s_DecayRate = 0.001; @@ -2929,8 +2930,9 @@ void CEventRateModelTest::testDecayRateControl(void) } LOG_DEBUG("mean = " << maths::CBasicStatistics::mean(meanPredictionError)); LOG_DEBUG("reference = " << maths::CBasicStatistics::mean(meanReferencePredictionError)); - CPPUNIT_ASSERT( maths::CBasicStatistics::mean(meanPredictionError) - < 0.94 * maths::CBasicStatistics::mean(meanReferencePredictionError)); + CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(meanReferencePredictionError), + maths::CBasicStatistics::mean(meanPredictionError), + 0.05); } LOG_DEBUG("*** Test unmodelled cyclic component ***"); @@ -2938,7 +2940,8 @@ void CEventRateModelTest::testDecayRateControl(void) // This modulates the event rate using a sine with period 10 weeks // effectively there are significant "manoeuvres" in the event rate // every 5 weeks at the function turning points. We check we get a - // significant reduction in the prediction error. + // significant reduction in the prediction error with decay rate + // control. params.s_ControlDecayRate = true; params.s_DecayRate = 0.001; diff --git a/lib/model/unittest/CMetricAnomalyDetectorTest.cc b/lib/model/unittest/CMetricAnomalyDetectorTest.cc index 483220eaba..2ef6649d29 100644 --- a/lib/model/unittest/CMetricAnomalyDetectorTest.cc +++ b/lib/model/unittest/CMetricAnomalyDetectorTest.cc @@ -362,7 +362,7 @@ void CMetricAnomalyDetectorTest::testAnomalies(void) double noise = std::accumulate(anomalyFactors.begin(), anomalyFactors.end(), 0.0); LOG_DEBUG("S/N = " << (signal / noise)); - CPPUNIT_ASSERT(signal / noise > 100.0); + CPPUNIT_ASSERT(signal / noise > 90.0); } // Find the high/low rate partition point. @@ -499,7 +499,7 @@ void CMetricAnomalyDetectorTest::testExcludeFrequent(void) // expect there to be 2 anomalies CPPUNIT_ASSERT_EQUAL(std::size_t(2), highAnomalyTimes.size()); - CPPUNIT_ASSERT_DOUBLES_EQUAL(92.0, highAnomalyFactors[1], 0.5); + CPPUNIT_ASSERT_DOUBLES_EQUAL(99.0, highAnomalyFactors[1], 0.5); } { model::CAnomalyDetectorModelConfig modelConfig = @@ -532,7 +532,7 @@ void CMetricAnomalyDetectorTest::testExcludeFrequent(void) // expect there to be 1 anomaly CPPUNIT_ASSERT_EQUAL(std::size_t(1), highAnomalyTimes.size()); - CPPUNIT_ASSERT_DOUBLES_EQUAL(23.0, highAnomalyFactors[0], 0.4); + CPPUNIT_ASSERT_DOUBLES_EQUAL(24.0, highAnomalyFactors[0], 0.5); } } diff --git a/lib/model/unittest/CMetricModelTest.cc b/lib/model/unittest/CMetricModelTest.cc index d3cbf447f7..ca8bf4fd17 100644 --- a/lib/model/unittest/CMetricModelTest.cc +++ b/lib/model/unittest/CMetricModelTest.cc @@ -330,11 +330,11 @@ void CMetricModelTest::testSample(void) TTimeDoublePr(66, 1.33), TTimeDoublePr(68, 1.5), TTimeDoublePr(84, 1.58), - TTimeDoublePr(87, 1.99), + TTimeDoublePr(87, 1.69), TTimeDoublePr(157, 1.6), TTimeDoublePr(164, 1.66), TTimeDoublePr(199, 1.28), - TTimeDoublePr(202, 1.0), + TTimeDoublePr(202, 1.2), TTimeDoublePr(204, 1.5) }; @@ -582,11 +582,11 @@ void CMetricModelTest::testMultivariateSample(void) { 66, 1.33, 1.6 }, { 68, 1.5, 1.37}, { 84, 1.58, 1.42}, - { 87, 1.99, 2.2 }, + { 87, 1.6, 1.6 }, { 157, 1.6, 1.6 }, { 164, 1.66, 1.55}, { 199, 1.28, 1.4 }, - { 202, 1.0, 0.7 }, + { 202, 1.3, 1.1 }, { 204, 1.5, 1.8 } }; TTimeDouble2AryPrVec data; @@ -2380,13 +2380,14 @@ void CMetricModelTest::testDecayRateControl(void) LOG_DEBUG("reference = " << maths::CBasicStatistics::mean(meanReferencePredictionError)); CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(meanReferencePredictionError), maths::CBasicStatistics::mean(meanPredictionError), - 0.06); + 0.05); } LOG_DEBUG("*** Test step change ***"); { - // Test a step change in a stable signal is detected and we get a - // significant reduction in the prediction error. + // This change point is amongst those we explicitly detect so + // check we get similar detection performance with and without + // decay rate control. params.s_ControlDecayRate = true; params.s_DecayRate = 0.001; @@ -2433,8 +2434,9 @@ void CMetricModelTest::testDecayRateControl(void) } LOG_DEBUG("mean = " << maths::CBasicStatistics::mean(meanPredictionError)); LOG_DEBUG("reference = " << maths::CBasicStatistics::mean(meanReferencePredictionError)); - CPPUNIT_ASSERT( maths::CBasicStatistics::mean(meanPredictionError) - < 0.94 * maths::CBasicStatistics::mean(meanReferencePredictionError)); + CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(meanReferencePredictionError), + maths::CBasicStatistics::mean(meanPredictionError), + 0.05); } LOG_DEBUG("*** Test unmodelled cyclic component ***"); @@ -2442,7 +2444,8 @@ void CMetricModelTest::testDecayRateControl(void) // This modulates the event rate using a sine with period 10 weeks // effectively there are significant "manoeuvres" in the event rate // every 5 weeks at the function turning points. We check we get a - // significant reduction in the prediction error. + // significant reduction in the prediction error with decay rate + // control. params.s_ControlDecayRate = true; params.s_DecayRate = 0.001; diff --git a/lib/test/CTimeSeriesTestData.cc b/lib/test/CTimeSeriesTestData.cc index 2769c04d95..c7ac0fdfaf 100644 --- a/lib/test/CTimeSeriesTestData.cc +++ b/lib/test/CTimeSeriesTestData.cc @@ -267,7 +267,7 @@ bool CTimeSeriesTestData::parseLine(const core::CRegex &tokenRegex, core::CRegex::TStrVec tokens; if (tokenRegex.tokenise(line, tokens) == false) { - LOG_ERROR("Regex error '" << tokenRegex.str() << "' " << line); + LOG_ERROR("Regex error '" << tokenRegex.str() << "' '" << line << "'"); return false; } From 96bcb62590b02740c6f649ddf374a39cf1d7053f Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Wed, 4 Apr 2018 16:07:42 +0100 Subject: [PATCH 13/29] Fix windows build issue --- lib/maths/CTimeSeriesModel.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/maths/CTimeSeriesModel.cc b/lib/maths/CTimeSeriesModel.cc index 502c2af57c..e70c60d75c 100644 --- a/lib/maths/CTimeSeriesModel.cc +++ b/lib/maths/CTimeSeriesModel.cc @@ -1517,7 +1517,7 @@ CUnivariateTimeSeriesModel::testAndApplyChange(const CModelAddSamplesParams &par const TTimeDouble2VecSizeTrVec &values) { std::size_t median{order[order.size() / 2]}; - TDouble4Vec weights{unpack(params.priorWeights()[median])}; + TDouble4Vec weights(unpack(params.priorWeights()[median])); core_t::TTime time{values[median].first}; if (m_ChangeDetector == nullptr) From 3ec6deb75b122bf65c264851d6945d2aa17b26c7 Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Wed, 4 Apr 2018 16:42:17 +0100 Subject: [PATCH 14/29] Reference to weight styles outlives the object --- lib/maths/unittest/CLinearAlgebraTest.cc | 3 ++- lib/maths/unittest/CTimeSeriesModelTest.cc | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/maths/unittest/CLinearAlgebraTest.cc b/lib/maths/unittest/CLinearAlgebraTest.cc index de870e9696..c9caf9b6fd 100644 --- a/lib/maths/unittest/CLinearAlgebraTest.cc +++ b/lib/maths/unittest/CLinearAlgebraTest.cc @@ -1330,7 +1330,8 @@ void CLinearAlgebraTest::testPersist(void) LOG_DEBUG("| CLinearAlgebraTest::testPersist |"); LOG_DEBUG("+-----------------------------------+"); - // Check conversion to and from delimited is idempotent. + // Check conversion to and from delimited is idempotent and parsing + // bad input produces an error. { double matrix_[][4] = diff --git a/lib/maths/unittest/CTimeSeriesModelTest.cc b/lib/maths/unittest/CTimeSeriesModelTest.cc index e688fc092d..91835257fb 100644 --- a/lib/maths/unittest/CTimeSeriesModelTest.cc +++ b/lib/maths/unittest/CTimeSeriesModelTest.cc @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -112,7 +113,7 @@ maths::CModelParams modelParams(core_t::TTime bucketLength) } maths::CModelAddSamplesParams addSampleParams(double interval, - maths_t::TWeightStyleVec weightStyles, + const maths_t::TWeightStyleVec &weightStyles, const TDouble2Vec4VecVec &weights) { maths::CModelAddSamplesParams params; From 57b51c2c59c71b0d67c32b32c5b9f8e456d0ced5 Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Fri, 13 Apr 2018 18:42:20 +0100 Subject: [PATCH 15/29] Reformat --- bin/autoconfig/CCmdLineParser.cc | 140 +- bin/autoconfig/CCmdLineParser.h | 57 +- bin/autoconfig/Main.cc | 42 +- bin/autodetect/CCmdLineParser.cc | 322 +- bin/autodetect/CCmdLineParser.h | 99 +- bin/autodetect/Main.cc | 184 +- bin/categorize/CCmdLineParser.cc | 166 +- bin/categorize/CCmdLineParser.h | 63 +- bin/categorize/Main.cc | 113 +- .../CBlockingCallCancellerThread.cc | 43 +- bin/controller/CBlockingCallCancellerThread.h | 42 +- bin/controller/CCmdLineParser.cc | 59 +- bin/controller/CCmdLineParser.h | 29 +- bin/controller/CCommandProcessor.cc | 67 +- bin/controller/CCommandProcessor.h | 64 +- bin/controller/Main.cc | 49 +- .../CBlockingCallCancellerThreadTest.cc | 53 +- .../CBlockingCallCancellerThreadTest.h | 10 +- .../unittest/CCommandProcessorTest.cc | 70 +- .../unittest/CCommandProcessorTest.h | 18 +- bin/controller/unittest/Main.cc | 9 +- bin/normalize/CCmdLineParser.cc | 141 +- bin/normalize/CCmdLineParser.h | 60 +- bin/normalize/Main.cc | 92 +- devbin/analyze_test/Main.cc | 26 +- devbin/move_copy_swap/Main.cc | 80 +- devbin/unixtime_to_string/CCmdLineParser.cc | 61 +- devbin/unixtime_to_string/CCmdLineParser.h | 31 +- devbin/unixtime_to_string/Main.cc | 13 +- devbin/vfprog/CIncrementer.cc | 20 +- devbin/vfprog/CIncrementer.h | 35 +- devbin/vfprog/CLooper.cc | 59 +- devbin/vfprog/CLooper.h | 55 +- devbin/vfprog/Main.cc | 116 +- devbin/vsbug/Main.cc | 17 +- devinclude/vflib/CIncrementer.h | 39 +- devinclude/vflib/CLooper.h | 34 +- devinclude/vflib/ImportExport.h | 1 - devlib/vflib/CIncrementer.cc | 20 +- devlib/vflib/CLooper.cc | 35 +- include/api/CAnomalyJob.h | 768 ++--- include/api/CBackgroundPersister.h | 216 +- include/api/CBaseTokenListDataTyper.h | 456 ++- include/api/CBenchMarker.h | 71 +- include/api/CCategoryExamplesCollector.h | 82 +- include/api/CCmdSkeleton.h | 53 +- include/api/CConfigUpdater.h | 45 +- include/api/CCsvInputParser.h | 299 +- include/api/CCsvOutputWriter.h | 216 +- include/api/CDataProcessor.h | 83 +- include/api/CDataTyper.h | 155 +- include/api/CDetectionRulesJsonParser.h | 72 +- include/api/CFieldConfig.h | 997 +++--- include/api/CFieldDataTyper.h | 262 +- include/api/CForecastRunner.h | 316 +- include/api/CHierarchicalResultsWriter.h | 325 +- include/api/CInputParser.h | 140 +- include/api/CIoManager.h | 167 +- include/api/CJsonOutputWriter.h | 485 ++- include/api/CLengthEncodedInputParser.h | 145 +- include/api/CLineifiedInputParser.h | 117 +- include/api/CLineifiedJsonInputParser.h | 67 +- include/api/CLineifiedJsonOutputWriter.h | 107 +- include/api/CLineifiedXmlInputParser.h | 67 +- include/api/CLineifiedXmlOutputWriter.h | 82 +- include/api/CModelPlotDataJsonWriter.h | 114 +- include/api/CModelSizeStatsJsonWriter.h | 19 +- include/api/CModelSnapshotJsonWriter.h | 69 +- include/api/CNullOutput.h | 37 +- include/api/COutputChainer.h | 161 +- include/api/COutputHandler.h | 142 +- include/api/CResultNormalizer.h | 170 +- include/api/CSingleStreamDataAdder.h | 63 +- include/api/CSingleStreamSearcher.h | 43 +- include/api/CStateRestoreStreamFilter.h | 32 +- include/api/CTokenListDataTyper.h | 384 +-- include/api/CTokenListReverseSearchCreator.h | 92 +- .../api/CTokenListReverseSearchCreatorIntf.h | 136 +- include/api/CTokenListType.h | 256 +- include/api/ImportExport.h | 1 - include/config/CAutoconfigurer.h | 54 +- .../config/CAutoconfigurerDetectorPenalties.h | 59 +- .../CAutoconfigurerFieldRolePenalties.h | 48 +- include/config/CAutoconfigurerParams.h | 629 ++-- include/config/CDataCountStatistics.h | 497 ++- include/config/CDataSemantics.h | 175 +- include/config/CDataSummaryStatistics.h | 274 +- include/config/CDetectorEnumerator.h | 195 +- include/config/CDetectorFieldRolePenalty.h | 44 +- include/config/CDetectorRecord.h | 193 +- include/config/CDetectorSpecification.h | 385 +-- include/config/CFieldRolePenalty.h | 147 +- include/config/CFieldStatistics.h | 105 +- include/config/CLongTailPenalty.h | 56 +- .../config/CLowInformationContentPenalty.h | 28 +- include/config/CLowVariationPenalty.h | 73 +- include/config/CNotEnoughDataPenalty.h | 57 +- include/config/CPenalty.h | 168 +- include/config/CPolledDataPenalty.h | 36 +- include/config/CReportWriter.h | 248 +- .../CSpanTooSmallForBucketLengthPenalty.h | 28 +- include/config/CSparseCountPenalty.h | 28 +- include/config/CTooMuchDataPenalty.h | 57 +- include/config/CTools.h | 52 +- include/config/ConfigTypes.h | 66 +- include/config/Constants.h | 27 +- include/config/ImportExport.h | 1 - include/core/BoostMultiIndex.h | 9 +- include/core/CAllocationStrategy.h | 74 +- include/core/CBase64Filter.h | 470 ++- include/core/CBlockingMessageQueue.h | 351 +- include/core/CBufferFlushTimer.h | 29 +- include/core/CByteSwapper.h | 37 +- include/core/CCTimeR.h | 18 +- include/core/CCompressOStream.h | 76 +- include/core/CCompressUtils.h | 116 +- include/core/CCompressedDictionary.h | 360 +- include/core/CConcurrentQueue.h | 170 +- include/core/CConcurrentWrapper.h | 126 +- include/core/CCondition.h | 76 +- include/core/CContainerPrinter.h | 452 ++- include/core/CCrashHandler.h | 14 +- include/core/CDataAdder.h | 88 +- include/core/CDataSearcher.h | 54 +- include/core/CDelimiter.h | 181 +- include/core/CDetachedProcessSpawner.h | 103 +- include/core/CDualThreadStreamBuf.h | 254 +- include/core/CFastMutex.h | 32 +- include/core/CFileDeleter.h | 27 +- include/core/CFlatPrefixTree.h | 193 +- include/core/CFloatStorage.h | 199 +- include/core/CFunctional.h | 95 +- include/core/CGmTimeR.h | 19 +- include/core/CHashing.h | 866 +++-- include/core/CHexUtils.h | 63 +- include/core/CIEEE754.h | 100 +- include/core/CJsonLogLayout.h | 97 +- include/core/CJsonOutputStreamWrapper.h | 104 +- include/core/CJsonStatePersistInserter.h | 64 +- include/core/CJsonStateRestoreTraverser.h | 268 +- include/core/CLocalTimeR.h | 19 +- include/core/CLogger.h | 203 +- include/core/CMaskIterator.h | 206 +- include/core/CMemory.h | 1504 ++++----- include/core/CMemoryUsage.h | 145 +- include/core/CMemoryUsageJsonWriter.h | 61 +- include/core/CMessageBuffer.h | 171 +- include/core/CMessageQueue.h | 411 +-- include/core/CMonotonicTime.h | 37 +- include/core/CMutex.h | 30 +- include/core/CNamedPipeFactory.h | 103 +- include/core/CNonCopyable.h | 40 +- include/core/CNonInstantiatable.h | 22 +- include/core/COsFileFuncs.h | 169 +- include/core/CPOpen.h | 21 +- include/core/CPatternSet.h | 59 +- include/core/CPersistUtils.h | 1625 ++++----- include/core/CPolymorphicStackObjectCPtr.h | 162 +- include/core/CProcess.h | 170 +- include/core/CProcessPriority.h | 22 +- include/core/CProgName.h | 30 +- include/core/CRapidJsonConcurrentLineWriter.h | 67 +- include/core/CRapidJsonLineWriter.h | 72 +- include/core/CRapidJsonPoolAllocator.h | 92 +- include/core/CRapidJsonPrettyWriter.h | 20 +- include/core/CRapidJsonWriterBase.h | 956 +++--- include/core/CRapidXmlParser.h | 236 +- include/core/CRapidXmlStatePersistInserter.h | 92 +- include/core/CRapidXmlStateRestoreTraverser.h | 128 +- include/core/CReadWriteLock.h | 34 +- include/core/CRegex.h | 103 +- include/core/CRegexFilter.h | 48 +- include/core/CResourceLocator.h | 36 +- include/core/CScopedFastLock.h | 27 +- include/core/CScopedLock.h | 27 +- include/core/CScopedRapidJsonPoolAllocator.h | 31 +- include/core/CScopedReadLock.h | 27 +- include/core/CScopedWriteLock.h | 27 +- include/core/CSetEnv.h | 20 +- include/core/CSetMode.h | 27 +- include/core/CShellArgQuoter.h | 20 +- include/core/CSleep.h | 46 +- include/core/CSmallVector.h | 170 +- include/core/CStat.h | 43 +- include/core/CStateCompressor.h | 176 +- include/core/CStateDecompressor.h | 220 +- include/core/CStateMachine.h | 306 +- include/core/CStatePersistInserter.h | 100 +- include/core/CStateRestoreTraverser.h | 168 +- include/core/CStatistics.h | 66 +- include/core/CStopWatch.h | 76 +- include/core/CStoredStringPtr.h | 106 +- include/core/CStrCaseCmp.h | 20 +- include/core/CStrFTime.h | 21 +- include/core/CStrPTime.h | 20 +- include/core/CStrPairFirstElementEqual.h | 31 +- include/core/CStrPairFirstElementLess.h | 31 +- include/core/CStrTokR.h | 18 +- include/core/CStringCache.h | 133 +- include/core/CStringSimilarityTester.h | 739 ++-- include/core/CStringUtils.h | 464 ++- include/core/CThread.h | 113 +- include/core/CThreadFarm.h | 316 +- include/core/CThreadFarmReceiver.h | 52 +- include/core/CTicker.h | 93 +- include/core/CTimeGm.h | 18 +- include/core/CTimeUtils.h | 147 +- include/core/CTimezone.h | 140 +- include/core/CTriple.h | 132 +- include/core/CUnSetEnv.h | 18 +- include/core/CUname.h | 56 +- include/core/CVectorRange.h | 443 +-- include/core/CWindowsError.h | 45 +- include/core/CWordDictionary.h | 272 +- include/core/CWordExtractor.h | 57 +- include/core/CXmlNode.h | 172 +- include/core/CXmlNodeWithChildren.h | 75 +- include/core/CXmlNodeWithChildrenPool.h | 95 +- include/core/CXmlParser.h | 412 ++- include/core/CXmlParserIntf.h | 132 +- include/core/Constants.h | 10 +- include/core/CoreTypes.h | 18 +- include/core/ImportExport.h | 1 - include/core/LogMacros.h | 10 +- include/core/MainForServices.h | 12 +- include/core/RestoreMacros.h | 108 +- include/core/WindowsSafe.h | 1 - include/maths/CAdaptiveBucketing.h | 333 +- include/maths/CAgglomerativeClusterer.h | 172 +- include/maths/CAnnotatedVector.h | 79 +- include/maths/CAssignment.h | 57 +- include/maths/CBasicStatistics.h | 2684 +++++++-------- include/maths/CBasicStatisticsPersist.h | 166 +- include/maths/CBjkstUniqueValues.h | 171 +- include/maths/CBootstrapClusterer.h | 1873 +++++------ include/maths/CBoundingBox.h | 183 +- include/maths/CCalendarComponent.h | 246 +- .../CCalendarComponentAdaptiveBucketing.h | 297 +- include/maths/CCalendarFeature.h | 141 +- include/maths/CCategoricalTools.h | 263 +- include/maths/CChecksum.h | 435 ++- include/maths/CClusterer.h | 426 ++- include/maths/CClustererStateSerialiser.h | 165 +- include/maths/CCompositeFunctions.h | 325 +- include/maths/CConstantPrior.h | 298 +- include/maths/CCooccurrences.h | 116 +- include/maths/CCountMinSketch.h | 169 +- include/maths/CDecayRateController.h | 138 +- include/maths/CDecompositionComponent.h | 341 +- include/maths/CDoublePrecisionStorage.h | 63 +- include/maths/CEntropySketch.h | 46 +- include/maths/CEqualWithTolerance.h | 164 +- include/maths/CExpandingWindow.h | 122 +- include/maths/CGammaRateConjugate.h | 719 ++-- include/maths/CGradientDescent.h | 145 +- include/maths/CGramSchmidt.h | 445 ++- include/maths/CInformationCriteria.h | 389 +-- include/maths/CIntegerTools.h | 217 +- include/maths/CIntegration.h | 1176 +++---- include/maths/CKMeansFast.h | 934 +++--- include/maths/CKMeansOnline.h | 1021 +++--- include/maths/CKMeansOnline1d.h | 228 +- include/maths/CKMostCorrelated.h | 358 +- include/maths/CKdTree.h | 557 ++-- include/maths/CLassoLogisticRegression.h | 576 ++-- include/maths/CLinearAlgebra.h | 2067 +++++------- include/maths/CLinearAlgebraEigen.h | 278 +- include/maths/CLinearAlgebraFwd.h | 56 +- include/maths/CLinearAlgebraPersist.h | 57 +- include/maths/CLinearAlgebraTools.h | 562 ++-- include/maths/CLogNormalMeanPrecConjugate.h | 783 +++-- include/maths/CLogTDistribution.h | 58 +- include/maths/CMathsFuncs.h | 311 +- .../CMathsFuncsForMatrixAndVectorTypes.h | 67 +- include/maths/CMixtureDistribution.h | 503 +-- include/maths/CModel.h | 976 +++--- include/maths/CModelDetail.h | 40 +- include/maths/CModelStateSerialiser.h | 35 +- include/maths/CModelWeight.h | 107 +- include/maths/CMultimodalPrior.h | 627 ++-- include/maths/CMultimodalPriorMode.h | 67 +- include/maths/CMultimodalPriorUtils.h | 1692 +++++----- include/maths/CMultinomialConjugate.h | 711 ++-- include/maths/CMultivariateConstantPrior.h | 236 +- include/maths/CMultivariateMultimodalPrior.h | 1877 +++++------ .../CMultivariateMultimodalPriorFactory.h | 49 +- include/maths/CMultivariateNormalConjugate.h | 2039 +++++------ .../CMultivariateNormalConjugateFactory.h | 37 +- include/maths/CMultivariateOneOfNPrior.h | 513 ++- .../maths/CMultivariateOneOfNPriorFactory.h | 40 +- include/maths/CMultivariatePrior.h | 778 +++-- include/maths/CNaiveBayes.h | 396 ++- include/maths/CNaturalBreaksClassifier.h | 467 ++- include/maths/CNormalMeanPrecConjugate.h | 652 ++-- include/maths/COneOfNPrior.h | 658 ++-- include/maths/COrderings.h | 1537 ++++----- include/maths/COrdinal.h | 115 +- include/maths/CPRNG.h | 495 ++- include/maths/CPackedBitVector.h | 204 +- include/maths/CPeriodicityHypothesisTests.h | 685 ++-- include/maths/CPoissonMeanConjugate.h | 560 ++-- include/maths/CPrior.h | 1028 +++--- include/maths/CPriorDetail.h | 33 +- include/maths/CPriorStateSerialiser.h | 58 +- include/maths/CProbabilityCalibrator.h | 103 +- include/maths/CQDigest.h | 638 ++-- include/maths/CQuantileSketch.h | 197 +- include/maths/CRadialBasisFunction.h | 293 +- include/maths/CRandomProjectionClusterer.h | 1275 ++++--- include/maths/CRegression.h | 855 +++-- include/maths/CRegressionDetail.h | 203 +- include/maths/CRestoreParams.h | 26 +- include/maths/CSampling.h | 770 ++--- include/maths/CSeasonalComponent.h | 349 +- .../CSeasonalComponentAdaptiveBucketing.h | 423 ++- include/maths/CSeasonalTime.h | 417 ++- include/maths/CSetTools.h | 408 +-- include/maths/CSignal.h | 98 +- include/maths/CSolvers.h | 1773 +++++----- include/maths/CSphericalCluster.h | 154 +- include/maths/CSpline.h | 1089 +++--- include/maths/CStatisticalTests.h | 220 +- include/maths/CTimeSeriesChangeDetector.h | 597 ++-- include/maths/CTimeSeriesDecomposition.h | 402 ++- .../maths/CTimeSeriesDecompositionDetail.h | 1043 +++--- .../maths/CTimeSeriesDecompositionInterface.h | 294 +- .../CTimeSeriesDecompositionStateSerialiser.h | 45 +- include/maths/CTimeSeriesDecompositionStub.h | 126 +- include/maths/CTimeSeriesModel.h | 1260 ++++--- include/maths/CTools.h | 1258 ++++--- include/maths/CToolsDetail.h | 197 +- include/maths/CTrendComponent.h | 425 ++- include/maths/CTrendTests.h | 316 +- include/maths/CTypeConversions.h | 152 +- include/maths/CXMeans.h | 703 ++-- include/maths/CXMeansOnline.h | 2271 ++++++------- include/maths/CXMeansOnline1d.h | 695 ++-- include/maths/CXMeansOnlineFactory.h | 115 +- include/maths/Constants.h | 69 +- include/maths/ImportExport.h | 1 - include/maths/MathsTypes.h | 120 +- include/maths/ProbabilityAggregators.h | 224 +- include/model/CAnnotatedProbability.h | 42 +- include/model/CAnnotatedProbabilityBuilder.h | 116 +- include/model/CAnomalyDetector.h | 580 ++-- include/model/CAnomalyDetectorModel.h | 1129 +++---- include/model/CAnomalyDetectorModelConfig.h | 909 +++-- include/model/CAnomalyScore.h | 504 ++- include/model/CBucketGatherer.h | 817 +++-- include/model/CBucketQueue.h | 541 ++- include/model/CCountingModel.h | 500 ++- include/model/CCountingModelFactory.h | 272 +- include/model/CDataClassifier.h | 89 +- include/model/CDataGatherer.h | 1380 ++++---- include/model/CDetectionRule.h | 156 +- include/model/CDetectorEqualizer.h | 82 +- include/model/CDynamicStringIdRegistry.h | 274 +- include/model/CEventData.h | 222 +- include/model/CEventRateBucketGatherer.h | 905 +++-- include/model/CEventRateModel.h | 570 ++-- include/model/CEventRateModelFactory.h | 308 +- include/model/CEventRatePopulationModel.h | 651 ++-- .../model/CEventRatePopulationModelFactory.h | 324 +- include/model/CFeatureData.h | 44 +- include/model/CForecastDataSink.h | 307 +- include/model/CGathererTools.h | 487 ++- include/model/CHierarchicalResults.h | 445 ++- .../model/CHierarchicalResultsAggregator.h | 194 +- include/model/CHierarchicalResultsLevelSet.h | 510 ++- .../model/CHierarchicalResultsNormalizer.h | 259 +- include/model/CHierarchicalResultsPopulator.h | 25 +- ...CHierarchicalResultsProbabilityFinalizer.h | 16 +- include/model/CIndividualModel.h | 543 ++- include/model/CIndividualModelDetail.h | 110 +- include/model/CInterimBucketCorrector.h | 139 +- include/model/CLimits.h | 189 +- include/model/CMemoryUsageEstimator.h | 118 +- include/model/CMetricBucketGatherer.h | 564 ++-- include/model/CMetricModel.h | 556 ++- include/model/CMetricModelFactory.h | 322 +- include/model/CMetricMultivariateStatistic.h | 236 +- include/model/CMetricPartialStatistic.h | 217 +- include/model/CMetricPopulationModel.h | 625 ++-- include/model/CMetricPopulationModelFactory.h | 336 +- include/model/CMetricStatisticWrappers.h | 170 +- include/model/CModelDetailsView.h | 263 +- include/model/CModelFactory.h | 716 ++-- include/model/CModelParams.h | 15 +- include/model/CModelPlotData.h | 136 +- include/model/CModelTools.h | 504 ++- include/model/CPartitioningFields.h | 58 +- include/model/CPopulationModel.h | 465 ++- include/model/CPopulationModelDetail.h | 67 +- .../CProbabilityAndInfluenceCalculator.h | 681 ++-- include/model/CResourceMonitor.h | 278 +- include/model/CResultsQueue.h | 83 +- include/model/CRuleCondition.h | 185 +- include/model/CSample.h | 94 +- include/model/CSampleCounts.h | 142 +- include/model/CSampleGatherer.h | 684 ++-- include/model/CSampleQueue.h | 845 ++--- include/model/CSearchKey.h | 362 +- include/model/CSimpleCountDetector.h | 60 +- include/model/CStringStore.h | 178 +- include/model/FrequencyPredicates.h | 86 +- include/model/FunctionTypes.h | 22 +- include/model/ImportExport.h | 1 - include/model/ModelTypes.h | 404 +-- include/test/CMultiFileDataAdder.h | 58 +- include/test/CMultiFileSearcher.h | 47 +- include/test/CRandomNumbers.h | 268 +- include/test/CRandomNumbersDetail.h | 83 +- include/test/CShellCmdEscape.h | 22 +- include/test/CTestRunner.h | 72 +- include/test/CTestTimer.h | 49 +- include/test/CTestTmpDir.h | 20 +- include/test/CTimeSeriesTestData.h | 211 +- include/test/CTimingXmlOutputterHook.h | 52 +- include/test/ImportExport.h | 1 - include/ver/CBuildInfo.h | 42 +- lib/api/CAnomalyJob.cc | 1219 +++---- lib/api/CBackgroundPersister.cc | 135 +- lib/api/CBaseTokenListDataTyper.cc | 456 +-- lib/api/CBenchMarker.cc | 146 +- lib/api/CCategoryExamplesCollector.cc | 134 +- lib/api/CCmdSkeleton.cc | 57 +- lib/api/CConfigUpdater.cc | 66 +- lib/api/CCsvInputParser.cc | 246 +- lib/api/CCsvOutputWriter.cc | 176 +- lib/api/CDataProcessor.cc | 32 +- lib/api/CDataTyper.cc | 34 +- lib/api/CDetectionRulesJsonParser.cc | 243 +- lib/api/CFieldConfig.cc | 1942 ++++------- lib/api/CFieldDataTyper.cc | 377 +-- lib/api/CForecastRunner.cc | 265 +- lib/api/CHierarchicalResultsWriter.cc | 405 +-- lib/api/CInputParser.cc | 37 +- lib/api/CIoManager.cc | 90 +- lib/api/CJsonOutputWriter.cc | 503 +-- lib/api/CLengthEncodedInputParser.cc | 164 +- lib/api/CLineifiedInputParser.cc | 68 +- lib/api/CLineifiedJsonInputParser.cc | 197 +- lib/api/CLineifiedJsonOutputWriter.cc | 90 +- lib/api/CLineifiedXmlInputParser.cc | 91 +- lib/api/CLineifiedXmlOutputWriter.cc | 65 +- lib/api/CModelPlotDataJsonWriter.cc | 106 +- lib/api/CModelSizeStatsJsonWriter.cc | 18 +- lib/api/CModelSnapshotJsonWriter.cc | 37 +- lib/api/CNullOutput.cc | 20 +- lib/api/COutputChainer.cc | 104 +- lib/api/COutputHandler.cc | 51 +- lib/api/CResultNormalizer.cc | 166 +- lib/api/CSingleStreamDataAdder.cc | 38 +- lib/api/CSingleStreamSearcher.cc | 20 +- lib/api/CStateRestoreStreamFilter.cc | 48 +- lib/api/CTokenListReverseSearchCreator.cc | 72 +- lib/api/CTokenListReverseSearchCreatorIntf.cc | 25 +- lib/api/CTokenListType.cc | 318 +- lib/api/dump_state/Main.cc | 161 +- lib/api/unittest/CAnomalyJobLimitTest.cc | 143 +- lib/api/unittest/CAnomalyJobLimitTest.h | 14 +- lib/api/unittest/CAnomalyJobTest.cc | 498 ++- lib/api/unittest/CAnomalyJobTest.h | 29 +- lib/api/unittest/CBackgroundPersisterTest.cc | 100 +- lib/api/unittest/CBackgroundPersisterTest.h | 21 +- .../unittest/CBaseTokenListDataTyperTest.cc | 23 +- .../unittest/CBaseTokenListDataTyperTest.h | 13 +- .../CCategoryExamplesCollectorTest.cc | 66 +- .../unittest/CCategoryExamplesCollectorTest.h | 23 +- lib/api/unittest/CConfigUpdaterTest.cc | 107 +- lib/api/unittest/CConfigUpdaterTest.h | 23 +- lib/api/unittest/CCsvInputParserTest.cc | 388 +-- lib/api/unittest/CCsvInputParserTest.h | 21 +- lib/api/unittest/CCsvOutputWriterTest.cc | 175 +- lib/api/unittest/CCsvOutputWriterTest.h | 19 +- .../unittest/CDetectionRulesJsonParserTest.cc | 150 +- .../unittest/CDetectionRulesJsonParserTest.h | 43 +- lib/api/unittest/CFieldConfigTest.cc | 445 +-- lib/api/unittest/CFieldConfigTest.h | 87 +- lib/api/unittest/CFieldDataTyperTest.cc | 237 +- lib/api/unittest/CFieldDataTyperTest.h | 18 +- lib/api/unittest/CForecastRunnerTest.cc | 236 +- lib/api/unittest/CForecastRunnerTest.h | 28 +- lib/api/unittest/CIoManagerTest.cc | 303 +- lib/api/unittest/CIoManagerTest.h | 30 +- lib/api/unittest/CJsonOutputWriterTest.cc | 2705 +++++++-------- lib/api/unittest/CJsonOutputWriterTest.h | 51 +- .../unittest/CLengthEncodedInputParserTest.cc | 250 +- .../unittest/CLengthEncodedInputParserTest.h | 15 +- .../unittest/CLineifiedJsonInputParserTest.cc | 134 +- .../unittest/CLineifiedJsonInputParserTest.h | 17 +- .../CLineifiedJsonOutputWriterTest.cc | 29 +- .../unittest/CLineifiedJsonOutputWriterTest.h | 13 +- .../unittest/CLineifiedXmlInputParserTest.cc | 155 +- .../unittest/CLineifiedXmlInputParserTest.h | 23 +- lib/api/unittest/CMockDataAdder.cc | 35 +- lib/api/unittest/CMockDataAdder.h | 79 +- lib/api/unittest/CMockDataProcessor.cc | 53 +- lib/api/unittest/CMockDataProcessor.h | 53 +- lib/api/unittest/CMockSearcher.cc | 25 +- lib/api/unittest/CMockSearcher.h | 21 +- .../unittest/CModelPlotDataJsonWriterTest.cc | 24 +- .../unittest/CModelPlotDataJsonWriterTest.h | 10 +- .../unittest/CModelSnapshotJsonWriterTest.cc | 35 +- .../unittest/CModelSnapshotJsonWriterTest.h | 11 +- lib/api/unittest/CMultiFileDataAdderTest.cc | 140 +- lib/api/unittest/CMultiFileDataAdderTest.h | 31 +- lib/api/unittest/COutputChainerTest.cc | 27 +- lib/api/unittest/COutputChainerTest.h | 11 +- lib/api/unittest/CRestorePreviousStateTest.cc | 187 +- lib/api/unittest/CRestorePreviousStateTest.h | 35 +- lib/api/unittest/CResultNormalizerTest.cc | 41 +- lib/api/unittest/CResultNormalizerTest.h | 11 +- .../unittest/CSingleStreamDataAdderTest.cc | 155 +- lib/api/unittest/CSingleStreamDataAdderTest.h | 31 +- .../unittest/CStateRestoreStreamFilterTest.cc | 40 +- .../unittest/CStateRestoreStreamFilterTest.h | 12 +- lib/api/unittest/CStringStoreTest.cc | 194 +- lib/api/unittest/CStringStoreTest.h | 20 +- lib/api/unittest/CTokenListDataTyperTest.cc | 350 +- lib/api/unittest/CTokenListDataTyperTest.h | 39 +- .../CTokenListReverseSearchCreatorTest.cc | 62 +- .../CTokenListReverseSearchCreatorTest.h | 23 +- lib/api/unittest/Main.cc | 61 +- lib/config/CAutoconfigurer.cc | 431 +-- .../CAutoconfigurerDetectorPenalties.cc | 80 +- .../CAutoconfigurerFieldRolePenalties.cc | 85 +- lib/config/CAutoconfigurerParams.cc | 1391 ++++---- lib/config/CDataCountStatistics.cc | 358 +- lib/config/CDataSemantics.cc | 312 +- lib/config/CDataSummaryStatistics.cc | 340 +- lib/config/CDetectorEnumerator.cc | 160 +- lib/config/CDetectorFieldRolePenalty.cc | 61 +- lib/config/CDetectorRecord.cc | 151 +- lib/config/CDetectorSpecification.cc | 461 +-- lib/config/CFieldRolePenalty.cc | 131 +- lib/config/CFieldStatistics.cc | 108 +- lib/config/CLongTailPenalty.cc | 110 +- lib/config/CLowInformationContentPenalty.cc | 78 +- lib/config/CLowVariationPenalty.cc | 277 +- lib/config/CNotEnoughDataPenalty.cc | 155 +- lib/config/CPenalty.cc | 111 +- lib/config/CPolledDataPenalty.cc | 84 +- lib/config/CReportWriter.cc | 287 +- .../CSpanTooSmallForBucketLengthPenalty.cc | 36 +- lib/config/CSparseCountPenalty.cc | 126 +- lib/config/CTooMuchDataPenalty.cc | 155 +- lib/config/CTools.cc | 98 +- lib/config/ConfigTypes.cc | 172 +- lib/config/Constants.cc | 40 +- .../unittest/CAutoconfigurerParamsTest.cc | 108 +- .../unittest/CAutoconfigurerParamsTest.h | 11 +- lib/config/unittest/CDataSemanticsTest.cc | 153 +- lib/config/unittest/CDataSemanticsTest.h | 17 +- .../unittest/CDataSummaryStatisticsTest.cc | 159 +- .../unittest/CDataSummaryStatisticsTest.h | 17 +- .../unittest/CDetectorEnumeratorTest.cc | 23 +- lib/config/unittest/CDetectorEnumeratorTest.h | 9 +- lib/config/unittest/CReportWriterTest.cc | 130 +- lib/config/unittest/CReportWriterTest.h | 11 +- lib/config/unittest/Main.cc | 13 +- lib/core/CBase64Filter.cc | 18 +- lib/core/CBufferFlushTimer.cc | 32 +- lib/core/CCTimeR.cc | 14 +- lib/core/CCTimeR_Windows.cc | 14 +- lib/core/CCompressOStream.cc | 61 +- lib/core/CCompressUtils.cc | 84 +- lib/core/CCompressedDictionary.cc | 8 +- lib/core/CCondition.cc | 67 +- lib/core/CCondition_Windows.cc | 47 +- lib/core/CContainerPrinter.cc | 7 +- lib/core/CCrashHandler.cc | 11 +- lib/core/CCrashHandler_Linux.cc | 31 +- lib/core/CDataAdder.cc | 25 +- lib/core/CDataSearcher.cc | 24 +- lib/core/CDelimiter.cc | 239 +- lib/core/CDetachedProcessSpawner.cc | 367 +- lib/core/CDetachedProcessSpawner_Windows.cc | 267 +- lib/core/CDualThreadStreamBuf.cc | 213 +- lib/core/CFastMutex.cc | 35 +- lib/core/CFastMutex_MacOSX.cc | 23 +- lib/core/CFastMutex_Windows.cc | 23 +- lib/core/CFileDeleter.cc | 27 +- lib/core/CFlatPrefixTree.cc | 142 +- lib/core/CGmTimeR.cc | 15 +- lib/core/CGmTimeR_Windows.cc | 15 +- lib/core/CHashing.cc | 287 +- lib/core/CHexUtils.cc | 77 +- lib/core/CIEEE754.cc | 28 +- lib/core/CJsonLogLayout.cc | 104 +- lib/core/CJsonOutputStreamWrapper.cc | 104 +- lib/core/CJsonStatePersistInserter.cc | 35 +- lib/core/CJsonStateRestoreTraverser.cc | 301 +- lib/core/CLogger.cc | 295 +- lib/core/CMemory.cc | 10 +- lib/core/CMemoryUsage.cc | 137 +- lib/core/CMemoryUsageJsonWriter.cc | 42 +- lib/core/CMonotonicTime.cc | 36 +- lib/core/CMonotonicTime_MacOSX.cc | 30 +- lib/core/CMonotonicTime_Windows.cc | 33 +- lib/core/CMutex.cc | 44 +- lib/core/CMutex_Windows.cc | 26 +- lib/core/CNamedPipeFactory.cc | 206 +- lib/core/CNamedPipeFactory_Windows.cc | 104 +- lib/core/COsFileFuncs.cc | 59 +- lib/core/COsFileFuncs_Windows.cc | 223 +- lib/core/CPOpen.cc | 18 +- lib/core/CPOpen_Windows.cc | 21 +- lib/core/CPatternSet.cc | 88 +- lib/core/CPersistUtils.cc | 7 +- lib/core/CProcess.cc | 85 +- lib/core/CProcessPriority.cc | 14 +- lib/core/CProcessPriority_Linux.cc | 37 +- lib/core/CProcess_Windows.cc | 239 +- lib/core/CProgName_Linux.cc | 28 +- lib/core/CProgName_MacOSX.cc | 28 +- lib/core/CProgName_Windows.cc | 46 +- lib/core/CRapidJsonConcurrentLineWriter.cc | 31 +- lib/core/CRapidXmlParser.cc | 355 +- lib/core/CRapidXmlStatePersistInserter.cc | 93 +- lib/core/CRapidXmlStateRestoreTraverser.cc | 116 +- lib/core/CReadWriteLock.cc | 47 +- lib/core/CReadWriteLock_Windows.cc | 29 +- lib/core/CRegex.cc | 531 ++- lib/core/CRegexFilter.cc | 37 +- lib/core/CResourceLocator.cc | 44 +- lib/core/CScopedFastLock.cc | 18 +- lib/core/CScopedLock.cc | 18 +- lib/core/CScopedReadLock.cc | 18 +- lib/core/CScopedWriteLock.cc | 18 +- lib/core/CSetEnv.cc | 16 +- lib/core/CSetEnv_Windows.cc | 19 +- lib/core/CSetMode.cc | 16 +- lib/core/CSetMode_Windows.cc | 18 +- lib/core/CShellArgQuoter.cc | 68 +- lib/core/CShellArgQuoter_Windows.cc | 57 +- lib/core/CSleep.cc | 26 +- lib/core/CSleep_Windows.cc | 21 +- lib/core/CStat.cc | 25 +- lib/core/CStateCompressor.cc | 86 +- lib/core/CStateDecompressor.cc | 188 +- lib/core/CStateMachine.cc | 193 +- lib/core/CStatePersistInserter.cc | 25 +- lib/core/CStateRestoreTraverser.cc | 43 +- lib/core/CStatistics.cc | 87 +- lib/core/CStopWatch.cc | 53 +- lib/core/CStoredStringPtr.cc | 71 +- lib/core/CStrCaseCmp.cc | 17 +- lib/core/CStrCaseCmp_Windows.cc | 17 +- lib/core/CStrFTime.cc | 17 +- lib/core/CStrFTime_Windows.cc | 42 +- lib/core/CStrPTime.cc | 15 +- lib/core/CStrPTime_Linux.cc | 100 +- lib/core/CStrPTime_Windows.cc | 116 +- lib/core/CStrTokR.cc | 14 +- lib/core/CStrTokR_Windows.cc | 14 +- lib/core/CStringCache.cc | 71 +- lib/core/CStringSimilarityTester.cc | 86 +- lib/core/CStringUtils.cc | 846 ++--- lib/core/CThread.cc | 111 +- lib/core/CThread_Windows.cc | 125 +- lib/core/CTimeGm.cc | 13 +- lib/core/CTimeGm_Windows.cc | 14 +- lib/core/CTimeUtils.cc | 139 +- lib/core/CTimezone.cc | 71 +- lib/core/CTimezone_Windows.cc | 123 +- lib/core/CUnSetEnv.cc | 14 +- lib/core/CUnSetEnv_Windows.cc | 14 +- lib/core/CUname.cc | 52 +- lib/core/CUname_Windows.cc | 174 +- lib/core/CWindowsError.cc | 29 +- lib/core/CWindowsError_Windows.cc | 46 +- lib/core/CWordDictionary.cc | 177 +- lib/core/CWordExtractor.cc | 104 +- lib/core/CXmlNode.cc | 57 +- lib/core/CXmlNodeWithChildren.cc | 73 +- lib/core/CXmlNodeWithChildrenPool.cc | 50 +- lib/core/CXmlParser.cc | 725 ++-- lib/core/CXmlParserIntf.cc | 38 +- lib/core/unittest/CAllocationStrategyTest.cc | 28 +- lib/core/unittest/CAllocationStrategyTest.h | 10 +- lib/core/unittest/CBase64FilterTest.cc | 147 +- lib/core/unittest/CBase64FilterTest.h | 14 +- .../unittest/CBlockingMessageQueueTest.cc | 61 +- lib/core/unittest/CBlockingMessageQueueTest.h | 11 +- lib/core/unittest/CByteSwapperTest.cc | 14 +- lib/core/unittest/CByteSwapperTest.h | 10 +- lib/core/unittest/CCompressUtilsTest.cc | 46 +- lib/core/unittest/CCompressUtilsTest.h | 17 +- .../unittest/CCompressedDictionaryTest.cc | 27 +- lib/core/unittest/CCompressedDictionaryTest.h | 12 +- lib/core/unittest/CConcurrentWrapperTest.cc | 144 +- lib/core/unittest/CConcurrentWrapperTest.h | 20 +- lib/core/unittest/CContainerPrinterTest.cc | 42 +- lib/core/unittest/CContainerPrinterTest.h | 9 +- lib/core/unittest/CContainerThroughputTest.cc | 176 +- lib/core/unittest/CContainerThroughputTest.h | 54 +- lib/core/unittest/CDelimiterTest.cc | 123 +- lib/core/unittest/CDelimiterTest.h | 29 +- .../unittest/CDetachedProcessSpawnerTest.cc | 73 +- .../unittest/CDetachedProcessSpawnerTest.h | 17 +- lib/core/unittest/CDualThreadStreamBufTest.cc | 223 +- lib/core/unittest/CDualThreadStreamBufTest.h | 17 +- lib/core/unittest/CFileDeleterTest.cc | 20 +- lib/core/unittest/CFileDeleterTest.h | 11 +- lib/core/unittest/CFlatPrefixTreeTest.cc | 94 +- lib/core/unittest/CFlatPrefixTreeTest.h | 24 +- lib/core/unittest/CFunctionalTest.cc | 41 +- lib/core/unittest/CFunctionalTest.h | 11 +- lib/core/unittest/CHashingTest.cc | 171 +- lib/core/unittest/CHashingTest.h | 15 +- lib/core/unittest/CHexUtilsTest.cc | 19 +- lib/core/unittest/CHexUtilsTest.h | 10 +- lib/core/unittest/CIEEE754Test.cc | 16 +- lib/core/unittest/CIEEE754Test.h | 9 +- lib/core/unittest/CJsonLogLayoutTest.cc | 29 +- lib/core/unittest/CJsonLogLayoutTest.h | 11 +- .../unittest/CJsonOutputStreamWrapperTest.cc | 44 +- .../unittest/CJsonOutputStreamWrapperTest.h | 12 +- .../unittest/CJsonStatePersistInserterTest.cc | 27 +- .../unittest/CJsonStatePersistInserterTest.h | 11 +- .../CJsonStateRestoreTraverserTest.cc | 98 +- .../unittest/CJsonStateRestoreTraverserTest.h | 23 +- lib/core/unittest/CLoggerTest.cc | 96 +- lib/core/unittest/CLoggerTest.h | 19 +- lib/core/unittest/CMapPopulationTest.cc | 202 +- lib/core/unittest/CMapPopulationTest.h | 100 +- .../unittest/CMemoryUsageJsonWriterTest.cc | 22 +- .../unittest/CMemoryUsageJsonWriterTest.h | 10 +- lib/core/unittest/CMemoryUsageTest.cc | 708 ++-- lib/core/unittest/CMemoryUsageTest.h | 27 +- lib/core/unittest/CMessageBufferTest.cc | 129 +- lib/core/unittest/CMessageBufferTest.h | 9 +- lib/core/unittest/CMessageQueueTest.cc | 100 +- lib/core/unittest/CMessageQueueTest.h | 12 +- lib/core/unittest/CMonotonicTimeTest.cc | 29 +- lib/core/unittest/CMonotonicTimeTest.h | 13 +- lib/core/unittest/CMutexTest.cc | 14 +- lib/core/unittest/CMutexTest.h | 9 +- lib/core/unittest/CNamedPipeFactoryTest.cc | 284 +- lib/core/unittest/CNamedPipeFactoryTest.h | 22 +- lib/core/unittest/COsFileFuncsTest.cc | 64 +- lib/core/unittest/COsFileFuncsTest.h | 13 +- lib/core/unittest/CPatternSetTest.cc | 87 +- lib/core/unittest/CPatternSetTest.h | 27 +- lib/core/unittest/CPersistUtilsTest.cc | 231 +- lib/core/unittest/CPersistUtilsTest.h | 13 +- .../CPolymorphicStackObjectCPtrTest.cc | 56 +- .../CPolymorphicStackObjectCPtrTest.h | 9 +- lib/core/unittest/CProcessPriorityTest.cc | 15 +- lib/core/unittest/CProcessPriorityTest.h | 11 +- .../unittest/CProcessPriorityTest_Linux.cc | 50 +- lib/core/unittest/CProcessTest.cc | 16 +- lib/core/unittest/CProcessTest.h | 11 +- lib/core/unittest/CProgNameTest.cc | 21 +- lib/core/unittest/CProgNameTest.h | 13 +- lib/core/unittest/CRapidJsonLineWriterTest.cc | 43 +- lib/core/unittest/CRapidJsonLineWriterTest.h | 15 +- lib/core/unittest/CRapidJsonWriterBaseTest.cc | 73 +- lib/core/unittest/CRapidJsonWriterBaseTest.h | 12 +- lib/core/unittest/CRapidXmlParserTest.cc | 141 +- lib/core/unittest/CRapidXmlParserTest.h | 40 +- .../CRapidXmlStatePersistInserterTest.cc | 26 +- .../CRapidXmlStatePersistInserterTest.h | 11 +- .../CRapidXmlStateRestoreTraverserTest.cc | 29 +- .../CRapidXmlStateRestoreTraverserTest.h | 11 +- lib/core/unittest/CReadWriteLockTest.cc | 423 +-- lib/core/unittest/CReadWriteLockTest.h | 15 +- lib/core/unittest/CRegexFilterTest.cc | 48 +- lib/core/unittest/CRegexFilterTest.h | 17 +- lib/core/unittest/CRegexTest.cc | 83 +- lib/core/unittest/CRegexTest.h | 21 +- lib/core/unittest/CResourceLocatorTest.cc | 31 +- lib/core/unittest/CResourceLocatorTest.h | 15 +- lib/core/unittest/CShellArgQuoterTest.cc | 65 +- lib/core/unittest/CShellArgQuoterTest.h | 11 +- lib/core/unittest/CSleepTest.cc | 19 +- lib/core/unittest/CSleepTest.h | 11 +- lib/core/unittest/CSmallVectorTest.cc | 25 +- lib/core/unittest/CSmallVectorTest.h | 9 +- lib/core/unittest/CStateCompressorTest.cc | 219 +- lib/core/unittest/CStateCompressorTest.h | 16 +- lib/core/unittest/CStateMachineTest.cc | 192 +- lib/core/unittest/CStateMachineTest.h | 13 +- lib/core/unittest/CStatisticsTest.cc | 105 +- lib/core/unittest/CStatisticsTest.h | 6 +- lib/core/unittest/CStopWatchTest.cc | 23 +- lib/core/unittest/CStopWatchTest.h | 11 +- lib/core/unittest/CStoredStringPtrTest.cc | 68 +- lib/core/unittest/CStoredStringPtrTest.h | 14 +- .../unittest/CStringSimilarityTesterTest.cc | 145 +- .../unittest/CStringSimilarityTesterTest.h | 22 +- lib/core/unittest/CStringUtilsTest.cc | 321 +- lib/core/unittest/CStringUtilsTest.h | 56 +- lib/core/unittest/CThreadFarmTest.cc | 193 +- lib/core/unittest/CThreadFarmTest.h | 13 +- .../unittest/CThreadMutexConditionTest.cc | 201 +- lib/core/unittest/CThreadMutexConditionTest.h | 11 +- lib/core/unittest/CThreadPoolTest.cc | 28 +- lib/core/unittest/CThreadPoolTest.h | 11 +- lib/core/unittest/CTickerTest.cc | 43 +- lib/core/unittest/CTickerTest.h | 9 +- lib/core/unittest/CTimeUtilsTest.cc | 291 +- lib/core/unittest/CTimeUtilsTest.h | 23 +- lib/core/unittest/CTripleTest.cc | 21 +- lib/core/unittest/CTripleTest.h | 13 +- lib/core/unittest/CUnameTest.cc | 14 +- lib/core/unittest/CUnameTest.h | 11 +- lib/core/unittest/CVectorRangeTest.cc | 85 +- lib/core/unittest/CVectorRangeTest.h | 19 +- lib/core/unittest/CWindowsErrorTest.cc | 16 +- lib/core/unittest/CWindowsErrorTest.h | 11 +- lib/core/unittest/CWordDictionaryTest.cc | 178 +- lib/core/unittest/CWordDictionaryTest.h | 17 +- lib/core/unittest/CWordExtractorTest.cc | 29 +- lib/core/unittest/CWordExtractorTest.h | 13 +- lib/core/unittest/CXmlNodeWithChildrenTest.cc | 65 +- lib/core/unittest/CXmlNodeWithChildrenTest.h | 16 +- lib/core/unittest/CXmlParserTest.cc | 380 +-- lib/core/unittest/CXmlParserTest.h | 76 +- lib/core/unittest/Main.cc | 151 +- lib/maths/CAdaptiveBucketing.cc | 281 +- lib/maths/CAgglomerativeClusterer.cc | 328 +- lib/maths/CAssignment.cc | 197 +- lib/maths/CBasicStatistics.cc | 33 +- lib/maths/CBjkstUniqueValues.cc | 688 ++-- lib/maths/CCalendarComponent.cc | 130 +- .../CCalendarComponentAdaptiveBucketing.cc | 251 +- lib/maths/CCalendarFeature.cc | 179 +- lib/maths/CCategoricalTools.cc | 362 +- lib/maths/CClusterer.cc | 53 +- lib/maths/CClustererStateSerialiser.cc | 58 +- lib/maths/CConstantPrior.cc | 303 +- lib/maths/CCooccurrences.cc | 285 +- lib/maths/CCountMinSketch.cc | 458 +-- lib/maths/CDecayRateController.cc | 161 +- lib/maths/CDecompositionComponent.cc | 221 +- lib/maths/CEntropySketch.cc | 34 +- lib/maths/CExpandingWindow.cc | 124 +- lib/maths/CGammaRateConjugate.cc | 1456 +++----- lib/maths/CGradientDescent.cc | 67 +- lib/maths/CGramSchmidt.cc | 95 +- lib/maths/CInformationCriteria.cc | 36 +- lib/maths/CIntegerTools.cc | 45 +- lib/maths/CIntegration.cc | 341 +- lib/maths/CKMeansOnline1d.cc | 222 +- lib/maths/CKMostCorrelated.cc | 573 ++-- lib/maths/CLassoLogisticRegression.cc | 640 ++-- lib/maths/CLinearAlgebraTools.cc | 515 ++- lib/maths/CLogNormalMeanPrecConjugate.cc | 1579 ++++----- lib/maths/CLogTDistribution.cc | 89 +- lib/maths/CMathsFuncs.cc | 71 +- lib/maths/CMixtureDistribution.cc | 162 +- lib/maths/CModel.cc | 384 +-- lib/maths/CModelStateSerialiser.cc | 68 +- lib/maths/CModelWeight.cc | 57 +- lib/maths/CMultimodalPrior.cc | 578 ++-- lib/maths/CMultinomialConjugate.cc | 1581 ++++----- lib/maths/CMultivariateConstantPrior.cc | 260 +- lib/maths/CMultivariateMultimodalPrior.cc | 172 +- .../CMultivariateMultimodalPriorFactory.cc | 110 +- .../CMultivariateNormalConjugateFactory.cc | 71 +- lib/maths/CMultivariateOneOfNPrior.cc | 575 ++-- lib/maths/CMultivariateOneOfNPriorFactory.cc | 24 +- lib/maths/CMultivariatePrior.cc | 327 +- lib/maths/CNaiveBayes.cc | 334 +- lib/maths/CNaturalBreaksClassifier.cc | 446 +-- lib/maths/CNormalMeanPrecConjugate.cc | 1166 +++---- lib/maths/COneOfNPrior.cc | 697 ++-- lib/maths/COrdinal.cc | 219 +- lib/maths/CPRNG.cc | 201 +- lib/maths/CPackedBitVector.cc | 300 +- lib/maths/CPeriodicityHypothesisTests.cc | 1604 ++++----- lib/maths/CPoissonMeanConjugate.cc | 625 ++-- lib/maths/CPrior.cc | 339 +- lib/maths/CPriorStateSerialiser.cc | 161 +- lib/maths/CProbabilityCalibrator.cc | 95 +- lib/maths/CQDigest.cc | 784 ++--- lib/maths/CQuantileSketch.cc | 401 +-- lib/maths/CRadialBasisFunction.cc | 230 +- lib/maths/CRegression.cc | 9 +- lib/maths/CRestoreParams.cc | 57 +- lib/maths/CSampling.cc | 612 ++-- lib/maths/CSeasonalComponent.cc | 193 +- .../CSeasonalComponentAdaptiveBucketing.cc | 473 +-- lib/maths/CSeasonalTime.cc | 221 +- lib/maths/CSignal.cc | 135 +- lib/maths/CSpline.cc | 138 +- lib/maths/CStatisticalTests.cc | 412 ++- lib/maths/CTimeSeriesChangeDetector.cc | 544 ++- lib/maths/CTimeSeriesDecomposition.cc | 467 +-- lib/maths/CTimeSeriesDecompositionDetail.cc | 1722 ++++------ ...CTimeSeriesDecompositionStateSerialiser.cc | 63 +- lib/maths/CTimeSeriesDecompositionStub.cc | 91 +- lib/maths/CTimeSeriesModel.cc | 2346 +++++-------- lib/maths/CTools.cc | 1349 +++----- lib/maths/CTrendComponent.cc | 454 +-- lib/maths/CTrendTests.cc | 367 +- lib/maths/CXMeansOnline1d.cc | 1088 +++--- lib/maths/CXMeansOnlineFactory.cc | 44 +- lib/maths/Constants.cc | 12 +- lib/maths/MathsTypes.cc | 396 +-- lib/maths/ProbabilityAggregators.cc | 665 ++-- .../unittest/CAgglomerativeClustererTest.cc | 447 +-- .../unittest/CAgglomerativeClustererTest.h | 15 +- lib/maths/unittest/CAssignmentTest.cc | 181 +- lib/maths/unittest/CAssignmentTest.h | 9 +- lib/maths/unittest/CBasicStatisticsTest.cc | 684 ++-- lib/maths/unittest/CBasicStatisticsTest.h | 25 +- lib/maths/unittest/CBjkstUniqueValuesTest.cc | 185 +- lib/maths/unittest/CBjkstUniqueValuesTest.h | 19 +- lib/maths/unittest/CBootstrapClustererTest.cc | 675 ++-- lib/maths/unittest/CBootstrapClustererTest.h | 21 +- lib/maths/unittest/CBoundingBoxTest.cc | 108 +- lib/maths/unittest/CBoundingBoxTest.h | 11 +- ...CCalendarComponentAdaptiveBucketingTest.cc | 260 +- .../CCalendarComponentAdaptiveBucketingTest.h | 31 +- lib/maths/unittest/CCalendarFeatureTest.cc | 139 +- lib/maths/unittest/CCalendarFeatureTest.h | 15 +- lib/maths/unittest/CCategoricalToolsTest.cc | 365 +- lib/maths/unittest/CCategoricalToolsTest.h | 17 +- lib/maths/unittest/CChecksumTest.cc | 228 +- lib/maths/unittest/CChecksumTest.h | 21 +- lib/maths/unittest/CClustererTest.cc | 35 +- lib/maths/unittest/CClustererTest.h | 9 +- lib/maths/unittest/CCountMinSketchTest.cc | 99 +- lib/maths/unittest/CCountMinSketchTest.h | 13 +- .../unittest/CDecayRateControllerTest.cc | 60 +- lib/maths/unittest/CDecayRateControllerTest.h | 13 +- lib/maths/unittest/CEntropySketchTest.cc | 70 +- lib/maths/unittest/CEntropySketchTest.h | 9 +- lib/maths/unittest/CEqualWithToleranceTest.cc | 159 +- lib/maths/unittest/CEqualWithToleranceTest.h | 13 +- lib/maths/unittest/CForecastTest.cc | 338 +- lib/maths/unittest/CForecastTest.h | 47 +- lib/maths/unittest/CGammaRateConjugateTest.cc | 857 ++--- lib/maths/unittest/CGammaRateConjugateTest.h | 43 +- lib/maths/unittest/CGramSchmidtTest.cc | 221 +- lib/maths/unittest/CGramSchmidtTest.h | 15 +- .../unittest/CInformationCriteriaTest.cc | 255 +- lib/maths/unittest/CInformationCriteriaTest.h | 15 +- lib/maths/unittest/CIntegerToolsTest.cc | 143 +- lib/maths/unittest/CIntegerToolsTest.h | 15 +- lib/maths/unittest/CIntegrationTest.cc | 1154 +++---- lib/maths/unittest/CIntegrationTest.h | 15 +- lib/maths/unittest/CKMeansFastTest.cc | 395 +-- lib/maths/unittest/CKMeansFastTest.h | 21 +- lib/maths/unittest/CKMeansOnlineTest.cc | 471 +-- lib/maths/unittest/CKMeansOnlineTest.h | 25 +- lib/maths/unittest/CKMostCorrelatedTest.cc | 615 ++-- lib/maths/unittest/CKMostCorrelatedTest.h | 27 +- lib/maths/unittest/CKdTreeTest.cc | 72 +- lib/maths/unittest/CKdTreeTest.h | 12 +- .../unittest/CLassoLogisticRegressionTest.cc | 168 +- .../unittest/CLassoLogisticRegressionTest.h | 17 +- lib/maths/unittest/CLinearAlgebraTest.cc | 1005 ++---- lib/maths/unittest/CLinearAlgebraTest.h | 29 +- .../CLogNormalMeanPrecConjugateTest.cc | 1011 ++---- .../CLogNormalMeanPrecConjugateTest.h | 44 +- lib/maths/unittest/CLogTDistributionTest.cc | 125 +- lib/maths/unittest/CLogTDistributionTest.h | 15 +- lib/maths/unittest/CMathsFuncsTest.cc | 63 +- lib/maths/unittest/CMathsFuncsTest.h | 15 +- lib/maths/unittest/CMathsMemoryTest.cc | 70 +- lib/maths/unittest/CMathsMemoryTest.h | 6 +- .../unittest/CMixtureDistributionTest.cc | 223 +- lib/maths/unittest/CMixtureDistributionTest.h | 17 +- lib/maths/unittest/CModelTest.cc | 57 +- lib/maths/unittest/CModelTest.h | 9 +- lib/maths/unittest/CMultimodalPriorTest.cc | 1146 +++---- lib/maths/unittest/CMultimodalPriorTest.h | 33 +- .../unittest/CMultinomialConjugateTest.cc | 596 ++-- .../unittest/CMultinomialConjugateTest.h | 29 +- .../CMultivariateConstantPriorTest.cc | 226 +- .../unittest/CMultivariateConstantPriorTest.h | 23 +- .../CMultivariateMultimodalPriorTest.cc | 746 ++--- .../CMultivariateMultimodalPriorTest.h | 35 +- .../CMultivariateNormalConjugateTest.cc | 805 ++--- .../CMultivariateNormalConjugateTest.h | 33 +- .../unittest/CMultivariateOneOfNPriorTest.cc | 666 ++-- .../unittest/CMultivariateOneOfNPriorTest.h | 29 +- lib/maths/unittest/CNaiveBayesTest.cc | 208 +- lib/maths/unittest/CNaiveBayesTest.h | 15 +- .../unittest/CNaturalBreaksClassifierTest.cc | 298 +- .../unittest/CNaturalBreaksClassifierTest.h | 15 +- .../unittest/CNormalMeanPrecConjugateTest.cc | 881 ++--- .../unittest/CNormalMeanPrecConjugateTest.h | 42 +- lib/maths/unittest/COneOfNPriorTest.cc | 597 ++-- lib/maths/unittest/COneOfNPriorTest.h | 34 +- lib/maths/unittest/COrderingsTest.cc | 440 +-- lib/maths/unittest/COrderingsTest.h | 27 +- lib/maths/unittest/COrdinalTest.cc | 182 +- lib/maths/unittest/COrdinalTest.h | 17 +- lib/maths/unittest/CPRNGTest.cc | 179 +- lib/maths/unittest/CPRNGTest.h | 13 +- lib/maths/unittest/CPackedBitVectorTest.cc | 253 +- lib/maths/unittest/CPackedBitVectorTest.h | 21 +- .../CPeriodicityHypothesisTestsTest.cc | 418 +-- .../CPeriodicityHypothesisTestsTest.h | 17 +- .../unittest/CPoissonMeanConjugateTest.cc | 503 +-- .../unittest/CPoissonMeanConjugateTest.h | 34 +- lib/maths/unittest/CPriorTest.cc | 140 +- lib/maths/unittest/CPriorTest.h | 9 +- .../unittest/CProbabilityAggregatorsTest.cc | 534 ++- .../unittest/CProbabilityAggregatorsTest.h | 15 +- .../unittest/CProbabilityCalibratorTest.cc | 93 +- .../unittest/CProbabilityCalibratorTest.h | 10 +- lib/maths/unittest/CQDigestTest.cc | 234 +- lib/maths/unittest/CQDigestTest.h | 22 +- lib/maths/unittest/CQuantileSketchTest.cc | 305 +- lib/maths/unittest/CQuantileSketchTest.h | 23 +- .../unittest/CRadialBasisFunctionTest.cc | 276 +- lib/maths/unittest/CRadialBasisFunctionTest.h | 16 +- .../CRandomProjectionClustererTest.cc | 383 +-- .../unittest/CRandomProjectionClustererTest.h | 19 +- lib/maths/unittest/CRegressionTest.cc | 422 +-- lib/maths/unittest/CRegressionTest.h | 39 +- lib/maths/unittest/CSamplingTest.cc | 139 +- lib/maths/unittest/CSamplingTest.h | 11 +- ...CSeasonalComponentAdaptiveBucketingTest.cc | 442 +-- .../CSeasonalComponentAdaptiveBucketingTest.h | 31 +- lib/maths/unittest/CSeasonalComponentTest.cc | 499 +-- lib/maths/unittest/CSeasonalComponentTest.h | 19 +- lib/maths/unittest/CSetToolsTest.cc | 219 +- lib/maths/unittest/CSetToolsTest.h | 15 +- lib/maths/unittest/CSignalTest.cc | 382 +-- lib/maths/unittest/CSignalTest.h | 19 +- lib/maths/unittest/CSolversTest.cc | 230 +- lib/maths/unittest/CSolversTest.h | 15 +- lib/maths/unittest/CSplineTest.cc | 405 +-- lib/maths/unittest/CSplineTest.h | 21 +- lib/maths/unittest/CStatisticalTestsTest.cc | 78 +- lib/maths/unittest/CStatisticalTestsTest.h | 11 +- .../unittest/CTimeSeriesChangeDetectorTest.cc | 238 +- .../unittest/CTimeSeriesChangeDetectorTest.h | 43 +- .../unittest/CTimeSeriesDecompositionTest.cc | 1143 +++---- .../unittest/CTimeSeriesDecompositionTest.h | 47 +- lib/maths/unittest/CTimeSeriesModelTest.cc | 1352 +++----- lib/maths/unittest/CTimeSeriesModelTest.h | 39 +- lib/maths/unittest/CToolsTest.cc | 816 ++--- lib/maths/unittest/CToolsTest.h | 21 +- lib/maths/unittest/CTrendComponentTest.cc | 262 +- lib/maths/unittest/CTrendComponentTest.h | 15 +- lib/maths/unittest/CTrendTestsTest.cc | 238 +- lib/maths/unittest/CTrendTestsTest.h | 13 +- lib/maths/unittest/CXMeansOnline1dTest.cc | 450 +-- lib/maths/unittest/CXMeansOnline1dTest.h | 29 +- lib/maths/unittest/CXMeansOnlineTest.cc | 580 ++-- lib/maths/unittest/CXMeansOnlineTest.h | 23 +- lib/maths/unittest/CXMeansTest.cc | 402 +-- lib/maths/unittest/CXMeansTest.h | 21 +- lib/maths/unittest/Main.cc | 168 +- lib/maths/unittest/TestUtils.cc | 355 +- lib/maths/unittest/TestUtils.h | 432 ++- lib/model/CAnnotatedProbability.cc | 206 +- lib/model/CAnnotatedProbabilityBuilder.cc | 138 +- lib/model/CAnomalyDetector.cc | 562 +--- lib/model/CAnomalyDetectorModel.cc | 575 ++-- lib/model/CAnomalyDetectorModelConfig.cc | 747 ++--- lib/model/CAnomalyScore.cc | 819 ++--- lib/model/CBucketGatherer.cc | 532 ++- lib/model/CCountingModel.cc | 318 +- lib/model/CCountingModelFactory.cc | 123 +- lib/model/CDataClassifier.cc | 49 +- lib/model/CDataGatherer.cc | 938 ++---- lib/model/CDetectionRule.cc | 150 +- lib/model/CDetectorEqualizer.cc | 105 +- lib/model/CDynamicStringIdRegistry.cc | 223 +- lib/model/CEventData.cc | 125 +- lib/model/CEventRateBucketGatherer.cc | 1616 +++------ lib/model/CEventRateModel.cc | 539 ++- lib/model/CEventRateModelFactory.cc | 157 +- lib/model/CEventRatePopulationModel.cc | 835 ++--- lib/model/CEventRatePopulationModelFactory.cc | 158 +- lib/model/CFeatureData.cc | 88 +- lib/model/CForecastDataSink.cc | 129 +- lib/model/CGathererTools.cc | 274 +- lib/model/CHierarchicalResults.cc | 878 ++--- lib/model/CHierarchicalResultsAggregator.cc | 315 +- lib/model/CHierarchicalResultsNormalizer.cc | 401 +-- lib/model/CHierarchicalResultsPopulator.cc | 44 +- ...HierarchicalResultsProbabilityFinalizer.cc | 15 +- lib/model/CIndividualModel.cc | 507 +-- lib/model/CInterimBucketCorrector.cc | 118 +- lib/model/CLimits.cc | 99 +- lib/model/CMemoryUsageEstimator.cc | 99 +- lib/model/CMetricBucketGatherer.cc | 1879 +++++------ lib/model/CMetricModel.cc | 537 ++- lib/model/CMetricModelFactory.cc | 157 +- lib/model/CMetricPopulationModel.cc | 792 ++--- lib/model/CMetricPopulationModelFactory.cc | 153 +- lib/model/CModelDetailsView.cc | 264 +- lib/model/CModelFactory.cc | 325 +- lib/model/CModelParams.cc | 97 +- lib/model/CModelPlotData.cc | 195 +- lib/model/CModelTools.cc | 320 +- lib/model/CPartitioningFields.cc | 34 +- lib/model/CPopulationModel.cc | 481 +-- .../CProbabilityAndInfluenceCalculator.cc | 1288 ++++--- lib/model/CResourceMonitor.cc | 226 +- lib/model/CResultsQueue.cc | 126 +- lib/model/CRuleCondition.cc | 277 +- lib/model/CSample.cc | 98 +- lib/model/CSampleCounts.cc | 258 +- lib/model/CSearchKey.cc | 286 +- lib/model/CSimpleCountDetector.cc | 47 +- lib/model/CStringStore.cc | 121 +- lib/model/FrequencyPredicates.cc | 21 +- lib/model/FunctionTypes.cc | 2966 ++++++++--------- lib/model/ModelTypes.cc | 425 +-- .../CAnnotatedProbabilityBuilderTest.cc | 171 +- .../CAnnotatedProbabilityBuilderTest.h | 25 +- .../CAnomalyDetectorModelConfigTest.cc | 63 +- .../CAnomalyDetectorModelConfigTest.h | 12 +- lib/model/unittest/CAnomalyScoreTest.cc | 304 +- lib/model/unittest/CAnomalyScoreTest.h | 33 +- lib/model/unittest/CBucketQueueTest.cc | 89 +- lib/model/unittest/CBucketQueueTest.h | 23 +- lib/model/unittest/CCountingModelTest.cc | 52 +- lib/model/unittest/CCountingModelTest.h | 17 +- lib/model/unittest/CDetectionRuleTest.cc | 712 ++-- lib/model/unittest/CDetectionRuleTest.h | 39 +- lib/model/unittest/CDetectorEqualizerTest.cc | 98 +- lib/model/unittest/CDetectorEqualizerTest.h | 13 +- .../unittest/CDynamicStringIdRegistryTest.cc | 44 +- .../unittest/CDynamicStringIdRegistryTest.h | 12 +- .../unittest/CEventRateAnomalyDetectorTest.cc | 242 +- .../unittest/CEventRateAnomalyDetectorTest.h | 12 +- .../unittest/CEventRateDataGathererTest.cc | 1571 +++++---- .../unittest/CEventRateDataGathererTest.h | 39 +- lib/model/unittest/CEventRateModelTest.cc | 1516 +++------ lib/model/unittest/CEventRateModelTest.h | 56 +- .../CEventRatePopulationDataGathererTest.cc | 580 ++-- .../CEventRatePopulationDataGathererTest.h | 30 +- .../unittest/CEventRatePopulationModelTest.cc | 837 ++--- .../unittest/CEventRatePopulationModelTest.h | 35 +- lib/model/unittest/CFunctionTypesTest.cc | 15 +- lib/model/unittest/CFunctionTypesTest.h | 9 +- lib/model/unittest/CGathererToolsTest.cc | 16 +- lib/model/unittest/CGathererToolsTest.h | 11 +- .../CHierarchicalResultsLevelSetTest.cc | 79 +- .../CHierarchicalResultsLevelSetTest.h | 9 +- .../unittest/CHierarchicalResultsTest.cc | 1178 +++---- lib/model/unittest/CHierarchicalResultsTest.h | 31 +- .../unittest/CInterimBucketCorrectorTest.cc | 53 +- .../unittest/CInterimBucketCorrectorTest.h | 17 +- lib/model/unittest/CLimitsTest.cc | 34 +- lib/model/unittest/CLimitsTest.h | 15 +- .../unittest/CMemoryUsageEstimatorTest.cc | 84 +- .../unittest/CMemoryUsageEstimatorTest.h | 13 +- .../unittest/CMetricAnomalyDetectorTest.cc | 356 +- .../unittest/CMetricAnomalyDetectorTest.h | 14 +- lib/model/unittest/CMetricDataGathererTest.cc | 1400 ++++---- lib/model/unittest/CMetricDataGathererTest.h | 36 +- lib/model/unittest/CMetricModelTest.cc | 1618 ++++----- lib/model/unittest/CMetricModelTest.h | 59 +- .../CMetricPopulationDataGathererTest.cc | 862 ++--- .../CMetricPopulationDataGathererTest.h | 38 +- .../unittest/CMetricPopulationModelTest.cc | 971 ++---- .../unittest/CMetricPopulationModelTest.h | 34 +- lib/model/unittest/CModelDetailsViewTest.cc | 107 +- lib/model/unittest/CModelDetailsViewTest.h | 13 +- lib/model/unittest/CModelMemoryTest.cc | 75 +- lib/model/unittest/CModelMemoryTest.h | 14 +- lib/model/unittest/CModelToolsTest.cc | 207 +- lib/model/unittest/CModelToolsTest.h | 12 +- lib/model/unittest/CModelTypesTest.cc | 45 +- lib/model/unittest/CModelTypesTest.h | 9 +- .../CProbabilityAndInfluenceCalculatorTest.cc | 1080 +++--- .../CProbabilityAndInfluenceCalculatorTest.h | 19 +- lib/model/unittest/CResourceLimitTest.cc | 435 +-- lib/model/unittest/CResourceLimitTest.h | 40 +- lib/model/unittest/CResourceMonitorTest.cc | 101 +- lib/model/unittest/CResourceMonitorTest.h | 40 +- lib/model/unittest/CRuleConditionTest.cc | 63 +- lib/model/unittest/CRuleConditionTest.h | 10 +- lib/model/unittest/CSampleQueueTest.cc | 447 +-- lib/model/unittest/CSampleQueueTest.h | 95 +- lib/model/unittest/CStringStoreTest.cc | 161 +- lib/model/unittest/CStringStoreTest.h | 13 +- lib/model/unittest/CToolsTest.cc | 36 +- lib/model/unittest/CToolsTest.h | 11 +- lib/model/unittest/Main.cc | 85 +- lib/model/unittest/Mocks.cc | 172 +- lib/model/unittest/Mocks.h | 196 +- lib/test/CMultiFileDataAdder.cc | 51 +- lib/test/CMultiFileSearcher.cc | 31 +- lib/test/CRandomNumbers.cc | 167 +- lib/test/CShellCmdEscape.cc | 14 +- lib/test/CShellCmdEscape_Windows.cc | 14 +- lib/test/CTestRunner.cc | 185 +- lib/test/CTestTimer.cc | 45 +- lib/test/CTestTmpDir.cc | 37 +- lib/test/CTestTmpDir_Windows.cc | 19 +- lib/test/CTimeSeriesTestData.cc | 203 +- lib/test/CTimingXmlOutputterHook.cc | 93 +- lib/ver/unittest/CBuildInfoTest.cc | 14 +- lib/ver/unittest/CBuildInfoTest.h | 11 +- lib/ver/unittest/Main.cc | 7 +- 1199 files changed, 106877 insertions(+), 156118 deletions(-) diff --git a/bin/autoconfig/CCmdLineParser.cc b/bin/autoconfig/CCmdLineParser.cc index c2b9b15ea1..c79526d873 100644 --- a/bin/autoconfig/CCmdLineParser.cc +++ b/bin/autoconfig/CCmdLineParser.cc @@ -12,137 +12,107 @@ #include -namespace ml -{ -namespace autoconfig -{ +namespace ml { +namespace autoconfig { -const std::string CCmdLineParser::DESCRIPTION = -"Usage: autoconfig [options]\n" -"Options"; +const std::string CCmdLineParser::DESCRIPTION = "Usage: autoconfig [options]\n" + "Options"; bool CCmdLineParser::parse(int argc, - const char * const *argv, - std::string &logProperties, - std::string &logPipe, - char &delimiter, - bool &lengthEncodedInput, - std::string &timeField, - std::string &timeFormat, - std::string &configFile, - std::string &inputFileName, - bool &isInputFileNamedPipe, - std::string &outputFileName, - bool &isOutputFileNamedPipe, - bool &verbose, - bool &writeDetectorConfigs) -{ - try - { + const char* const* argv, + std::string& logProperties, + std::string& logPipe, + char& delimiter, + bool& lengthEncodedInput, + std::string& timeField, + std::string& timeFormat, + std::string& configFile, + std::string& inputFileName, + bool& isInputFileNamedPipe, + std::string& outputFileName, + bool& isOutputFileNamedPipe, + bool& verbose, + bool& writeDetectorConfigs) { + try { boost::program_options::options_description desc(DESCRIPTION); - desc.add_options() - ("help", "Display this information and exit") - ("version", "Display version information and exit") - ("logProperties", boost::program_options::value(), - "Optional logger properties file") - ("logPipe", boost::program_options::value(), - "Optional log to named pipe") - ("delimiter", boost::program_options::value(), - "Optional delimiter character for delimited data formats - default is ',' (comma separated)") - ("lengthEncodedInput", - "Take input in length encoded binary format - default is delimited") - ("timefield", boost::program_options::value(), - "Optional name of the field containing the timestamp - default is 'time'") - ("timeformat", boost::program_options::value(), - "Optional format of the date in the time field in strptime code - default is the epoch time in seconds") - ("config", boost::program_options::value(), - "Optional configuration file") - ("input", boost::program_options::value(), - "Optional file to read input from - not present means read from STDIN") - ("inputIsPipe", "Specified input file is a named pipe") - ("output", boost::program_options::value(), - "Optional file to write output to - not present means write to STDOUT") - ("outputIsPipe", "Specified output file is a named pipe") - ("verbose", "Output information about all detectors including those that have been discarded") - ("writeDetectorConfigs", - "Output the detector configurations in JSON format") - ; + desc.add_options()("help", "Display this information and exit")("version", "Display version information and exit")( + "logProperties", boost::program_options::value(), "Optional logger properties file")( + "logPipe", boost::program_options::value(), "Optional log to named pipe")( + "delimiter", + boost::program_options::value(), + "Optional delimiter character for delimited data formats - default is ',' (comma separated)")( + "lengthEncodedInput", "Take input in length encoded binary format - default is delimited")( + "timefield", + boost::program_options::value(), + "Optional name of the field containing the timestamp - default is 'time'")( + "timeformat", + boost::program_options::value(), + "Optional format of the date in the time field in strptime code - default is the epoch time in seconds")( + "config", boost::program_options::value(), "Optional configuration file")( + "input", boost::program_options::value(), "Optional file to read input from - not present means read from STDIN")( + "inputIsPipe", "Specified input file is a named pipe")( + "output", boost::program_options::value(), "Optional file to write output to - not present means write to STDOUT")( + "outputIsPipe", "Specified output file is a named pipe")( + "verbose", "Output information about all detectors including those that have been discarded")( + "writeDetectorConfigs", "Output the detector configurations in JSON format"); boost::program_options::variables_map vm; boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), vm); boost::program_options::notify(vm); - if (vm.count("help") > 0) - { + if (vm.count("help") > 0) { std::cerr << desc << std::endl; return false; } - if (vm.count("version") > 0) - { + if (vm.count("version") > 0) { std::cerr << ver::CBuildInfo::fullInfo() << std::endl; return false; } - if (vm.count("logProperties") > 0) - { + if (vm.count("logProperties") > 0) { logProperties = vm["logProperties"].as(); } - if (vm.count("logPipe") > 0) - { + if (vm.count("logPipe") > 0) { logPipe = vm["logPipe"].as(); } - if (vm.count("delimiter") > 0) - { + if (vm.count("delimiter") > 0) { delimiter = vm["delimiter"].as(); } - if (vm.count("lengthEncodedInput") > 0) - { + if (vm.count("lengthEncodedInput") > 0) { lengthEncodedInput = true; } - if (vm.count("timefield") > 0) - { + if (vm.count("timefield") > 0) { timeField = vm["timefield"].as(); } - if (vm.count("timeformat") > 0) - { + if (vm.count("timeformat") > 0) { timeFormat = vm["timeformat"].as(); } - if (vm.count("config") > 0) - { + if (vm.count("config") > 0) { configFile = vm["config"].as(); } - if (vm.count("input") > 0) - { + if (vm.count("input") > 0) { inputFileName = vm["input"].as(); } - if (vm.count("inputIsPipe") > 0) - { + if (vm.count("inputIsPipe") > 0) { isInputFileNamedPipe = true; } - if (vm.count("output") > 0) - { + if (vm.count("output") > 0) { outputFileName = vm["output"].as(); } - if (vm.count("outputIsPipe") > 0) - { + if (vm.count("outputIsPipe") > 0) { isOutputFileNamedPipe = true; } - if (vm.count("verbose") > 0) - { + if (vm.count("verbose") > 0) { verbose = true; } - if (vm.count("writeDetectorConfigs") > 0) - { + if (vm.count("writeDetectorConfigs") > 0) { writeDetectorConfigs = true; } - } - catch (std::exception &e) - { + } catch (std::exception& e) { std::cerr << "Error processing command line: " << e.what() << std::endl; return false; } return true; } - } } diff --git a/bin/autoconfig/CCmdLineParser.h b/bin/autoconfig/CCmdLineParser.h index dd6b7cae28..cb7213973f 100644 --- a/bin/autoconfig/CCmdLineParser.h +++ b/bin/autoconfig/CCmdLineParser.h @@ -11,10 +11,8 @@ #include #include -namespace ml -{ -namespace autoconfig -{ +namespace ml { +namespace autoconfig { //! \brief Very simple command line parser. //! @@ -24,34 +22,31 @@ namespace autoconfig //! IMPLEMENTATION DECISIONS:\n //! Put in a class rather than main to allow testing. //! -class CCmdLineParser -{ - public: - using TStrVec = std::vector; - - public: - //! Parse the arguments and return options if appropriate. - static bool parse(int argc, - const char * const *argv, - std::string &logProperties, - std::string &logPipe, - char &delimiter, - bool &lengthEncodedInput, - std::string &timeField, - std::string &timeFormat, - std::string &configFile, - std::string &inputFileName, - bool &isInputFileNamedPipe, - std::string &outputFileName, - bool &isOutputFileNamedPipe, - bool &verbose, - bool &writeDetectorConfigs); - - private: - static const std::string DESCRIPTION; +class CCmdLineParser { +public: + using TStrVec = std::vector; + +public: + //! Parse the arguments and return options if appropriate. + static bool parse(int argc, + const char* const* argv, + std::string& logProperties, + std::string& logPipe, + char& delimiter, + bool& lengthEncodedInput, + std::string& timeField, + std::string& timeFormat, + std::string& configFile, + std::string& inputFileName, + bool& isInputFileNamedPipe, + std::string& outputFileName, + bool& isOutputFileNamedPipe, + bool& verbose, + bool& writeDetectorConfigs); + +private: + static const std::string DESCRIPTION; }; - - } } diff --git a/bin/autoconfig/Main.cc b/bin/autoconfig/Main.cc index ace7cdbb64..78fbb4b994 100644 --- a/bin/autoconfig/Main.cc +++ b/bin/autoconfig/Main.cc @@ -14,8 +14,8 @@ //! Standalone program. //! #include -#include #include +#include #include @@ -37,23 +37,21 @@ #include - -int main(int argc, char **argv) -{ +int main(int argc, char** argv) { // Read command line options std::string logProperties; std::string logPipe; - char delimiter(','); - bool lengthEncodedInput(false); + char delimiter(','); + bool lengthEncodedInput(false); std::string timeField("time"); std::string timeFormat; std::string configFile; std::string inputFileName; - bool isInputFileNamedPipe(false); + bool isInputFileNamedPipe(false); std::string outputFileName; - bool isOutputFileNamedPipe(false); - bool verbose(false); - bool writeDetectorConfigs(false); + bool isOutputFileNamedPipe(false); + bool verbose(false); + bool writeDetectorConfigs(false); if (ml::autoconfig::CCmdLineParser::parse(argc, argv, logProperties, @@ -68,20 +66,15 @@ int main(int argc, char **argv) outputFileName, isOutputFileNamedPipe, verbose, - writeDetectorConfigs) == false) - { + writeDetectorConfigs) == false) { return EXIT_FAILURE; } // Construct the IO manager before reconfiguring the logger, as it performs // std::ios actions that only work before first use - ml::api::CIoManager ioMgr(inputFileName, - isInputFileNamedPipe, - outputFileName, - isOutputFileNamedPipe); + ml::api::CIoManager ioMgr(inputFileName, isInputFileNamedPipe, outputFileName, isOutputFileNamedPipe); - if (ml::core::CLogger::instance().reconfigure(logPipe, logProperties) == false) - { + if (ml::core::CLogger::instance().reconfigure(logPipe, logProperties) == false) { LOG_FATAL("Could not reconfigure logging"); return EXIT_FAILURE; } @@ -93,20 +86,16 @@ int main(int argc, char **argv) ml::core::CProcessPriority::reducePriority(); - if (ioMgr.initIo() == false) - { + if (ioMgr.initIo() == false) { LOG_FATAL("Failed to initialise IO"); return EXIT_FAILURE; } typedef boost::scoped_ptr TScopedInputParserP; TScopedInputParserP inputParser; - if (lengthEncodedInput) - { + if (lengthEncodedInput) { inputParser.reset(new ml::api::CLengthEncodedInputParser(ioMgr.inputStream())); - } - else - { + } else { inputParser.reset(new ml::api::CCsvInputParser(ioMgr.inputStream(), delimiter)); } @@ -125,8 +114,7 @@ int main(int argc, char **argv) 0, // no persistence at present *inputParser, configurer); - if (skeleton.ioLoop() == false) - { + if (skeleton.ioLoop() == false) { LOG_FATAL("Ml autoconfig failed"); return EXIT_FAILURE; } diff --git a/bin/autodetect/CCmdLineParser.cc b/bin/autodetect/CCmdLineParser.cc index 0d6d7eeefd..9284706c69 100644 --- a/bin/autodetect/CCmdLineParser.cc +++ b/bin/autodetect/CCmdLineParser.cc @@ -14,280 +14,228 @@ #include +namespace ml { +namespace autodetect { -namespace ml -{ -namespace autodetect -{ - - -const std::string CCmdLineParser::DESCRIPTION = -"Usage: autodetect [options] [+ [by ]]\n" -"Options:"; - +const std::string CCmdLineParser::DESCRIPTION = "Usage: autodetect [options] [+ [by ]]\n" + "Options:"; bool CCmdLineParser::parse(int argc, - const char * const *argv, - std::string &limitConfigFile, - std::string &modelConfigFile, - std::string &fieldConfigFile, - std::string &modelPlotConfigFile, - std::string &jobId, - std::string &logProperties, - std::string &logPipe, - core_t::TTime &bucketSpan, - core_t::TTime &latency, - std::string &summaryCountFieldName, - char &delimiter, - bool &lengthEncodedInput, - std::string &timeField, - std::string &timeFormat, - std::string &quantilesState, - bool &deleteStateFiles, - core_t::TTime &persistInterval, - core_t::TTime &maxQuantileInterval, - std::string &inputFileName, - bool &isInputFileNamedPipe, - std::string &outputFileName, - bool &isOutputFileNamedPipe, - std::string &restoreFileName, - bool &isRestoreFileNamedPipe, - std::string &persistFileName, - bool &isPersistFileNamedPipe, - size_t &maxAnomalyRecords, - bool &memoryUsage, - std::size_t &bucketResultsDelay, - bool &multivariateByFields, - std::string &multipleBucketspans, - bool &perPartitionNormalization, - TStrVec &clauseTokens) -{ - try - { + const char* const* argv, + std::string& limitConfigFile, + std::string& modelConfigFile, + std::string& fieldConfigFile, + std::string& modelPlotConfigFile, + std::string& jobId, + std::string& logProperties, + std::string& logPipe, + core_t::TTime& bucketSpan, + core_t::TTime& latency, + std::string& summaryCountFieldName, + char& delimiter, + bool& lengthEncodedInput, + std::string& timeField, + std::string& timeFormat, + std::string& quantilesState, + bool& deleteStateFiles, + core_t::TTime& persistInterval, + core_t::TTime& maxQuantileInterval, + std::string& inputFileName, + bool& isInputFileNamedPipe, + std::string& outputFileName, + bool& isOutputFileNamedPipe, + std::string& restoreFileName, + bool& isRestoreFileNamedPipe, + std::string& persistFileName, + bool& isPersistFileNamedPipe, + size_t& maxAnomalyRecords, + bool& memoryUsage, + std::size_t& bucketResultsDelay, + bool& multivariateByFields, + std::string& multipleBucketspans, + bool& perPartitionNormalization, + TStrVec& clauseTokens) { + try { boost::program_options::options_description desc(DESCRIPTION); - desc.add_options() - ("help", "Display this information and exit") - ("version", "Display version information and exit") - ("limitconfig", boost::program_options::value(), - "Optional limit config file") - ("modelconfig", boost::program_options::value(), - "Optional model config file") - ("fieldconfig", boost::program_options::value(), - "Optional field config file") - ("modelplotconfig", boost::program_options::value(), - "Optional model plot config file") - ("jobid", boost::program_options::value(), - "ID of the job this process is associated with") - ("logProperties", boost::program_options::value(), - "Optional logger properties file") - ("logPipe", boost::program_options::value(), - "Optional log to named pipe") - ("bucketspan", boost::program_options::value(), - "Optional aggregation bucket span (in seconds) - default is 300") - ("latency", boost::program_options::value(), - "Optional maximum delay for out-of-order records (in seconds) - default is 0") - ("summarycountfield", boost::program_options::value(), - "Optional field to that contains counts for pre-summarized input - default is none") - ("delimiter", boost::program_options::value(), - "Optional delimiter character for delimited data formats - default is '\t' (tab separated)") - ("lengthEncodedInput", - "Take input in length encoded binary format - default is delimited") - ("timefield", boost::program_options::value(), - "Optional name of the field containing the timestamp - default is 'time'") - ("timeformat", boost::program_options::value(), - "Optional format of the date in the time field in strptime code - default is the epoch time in seconds") - ("quantilesState", boost::program_options::value(), - "Optional file to quantiles for normalization") - ("deleteStateFiles", - "If the 'quantilesState' option is used and this flag is set then delete the model state files once they have been read") - ("input", boost::program_options::value(), - "Optional file to read input from - not present means read from STDIN") - ("inputIsPipe", "Specified input file is a named pipe") - ("output", boost::program_options::value(), - "Optional file to write output to - not present means write to STDOUT") - ("outputIsPipe", "Specified output file is a named pipe") - ("restore", boost::program_options::value(), - "Optional file to restore state from - not present means no state restoration") - ("restoreIsPipe", "Specified restore file is a named pipe") - ("persist", boost::program_options::value(), - "Optional file to persist state to - not present means no state persistence") - ("persistIsPipe", "Specified persist file is a named pipe") - ("persistInterval", boost::program_options::value(), - "Optional interval at which to periodically persist model state - if not specified then models will only be persisted at program exit") - ("maxQuantileInterval", boost::program_options::value(), - "Optional interval at which to periodically output quantiles if they have not been output due to an anomaly - if not specified then quantiles will only be output following a big anomaly") - ("maxAnomalyRecords", boost::program_options::value(), - "The maximum number of records to be outputted for each bucket. Defaults to 100, a value 0 removes the limit.") - ("memoryUsage", - "Log the model memory usage at the end of the job") - ("resultFinalizationWindow", boost::program_options::value(), - "The numer of half buckets to store before choosing which overlapping bucket has the biggest anomaly") - ("multivariateByFields", - "Optional flag to enable multi-variate analysis of correlated by fields") - ("multipleBucketspans", boost::program_options::value(), - "Optional comma-separated list of additional bucketspans - must be direct multiples of the main bucketspan") - ("perPartitionNormalization", - "Optional flag to enable per partition normalization") - ; + desc.add_options()("help", "Display this information and exit")("version", "Display version information and exit")( + "limitconfig", boost::program_options::value(), "Optional limit config file")( + "modelconfig", boost::program_options::value(), "Optional model config file")( + "fieldconfig", boost::program_options::value(), "Optional field config file")( + "modelplotconfig", boost::program_options::value(), "Optional model plot config file")( + "jobid", boost::program_options::value(), "ID of the job this process is associated with")( + "logProperties", boost::program_options::value(), "Optional logger properties file")( + "logPipe", boost::program_options::value(), "Optional log to named pipe")( + "bucketspan", boost::program_options::value(), "Optional aggregation bucket span (in seconds) - default is 300")( + "latency", + boost::program_options::value(), + "Optional maximum delay for out-of-order records (in seconds) - default is 0")( + "summarycountfield", + boost::program_options::value(), + "Optional field to that contains counts for pre-summarized input - default is none")( + "delimiter", + boost::program_options::value(), + "Optional delimiter character for delimited data formats - default is '\t' (tab separated)")( + "lengthEncodedInput", "Take input in length encoded binary format - default is delimited")( + "timefield", + boost::program_options::value(), + "Optional name of the field containing the timestamp - default is 'time'")( + "timeformat", + boost::program_options::value(), + "Optional format of the date in the time field in strptime code - default is the epoch time in seconds")( + "quantilesState", boost::program_options::value(), "Optional file to quantiles for normalization")( + "deleteStateFiles", + "If the 'quantilesState' option is used and this flag is set then delete the model state files once they have been read")( + "input", boost::program_options::value(), "Optional file to read input from - not present means read from STDIN")( + "inputIsPipe", "Specified input file is a named pipe")( + "output", boost::program_options::value(), "Optional file to write output to - not present means write to STDOUT")( + "outputIsPipe", "Specified output file is a named pipe")( + "restore", + boost::program_options::value(), + "Optional file to restore state from - not present means no state restoration")("restoreIsPipe", + "Specified restore file is a named pipe")( + "persist", + boost::program_options::value(), + "Optional file to persist state to - not present means no state persistence")("persistIsPipe", + "Specified persist file is a named pipe")( + "persistInterval", + boost::program_options::value(), + "Optional interval at which to periodically persist model state - if not specified then models will only be persisted at " + "program exit")("maxQuantileInterval", + boost::program_options::value(), + "Optional interval at which to periodically output quantiles if they have not been output due to an anomaly - " + "if not specified then quantiles will only be output following a big anomaly")( + "maxAnomalyRecords", + boost::program_options::value(), + "The maximum number of records to be outputted for each bucket. Defaults to 100, a value 0 removes the limit.")( + "memoryUsage", "Log the model memory usage at the end of the job")( + "resultFinalizationWindow", + boost::program_options::value(), + "The numer of half buckets to store before choosing which overlapping bucket has the biggest anomaly")( + "multivariateByFields", "Optional flag to enable multi-variate analysis of correlated by fields")( + "multipleBucketspans", + boost::program_options::value(), + "Optional comma-separated list of additional bucketspans - must be direct multiples of the main bucketspan")( + "perPartitionNormalization", "Optional flag to enable per partition normalization"); boost::program_options::variables_map vm; boost::program_options::parsed_options parsed = - boost::program_options::command_line_parser(argc, argv).options(desc).allow_unregistered().run(); + boost::program_options::command_line_parser(argc, argv).options(desc).allow_unregistered().run(); boost::program_options::store(parsed, vm); - if (vm.count("help") > 0) - { + if (vm.count("help") > 0) { std::cerr << desc << std::endl; return false; } - if (vm.count("version") > 0) - { + if (vm.count("version") > 0) { std::cerr << "Model State Version " << model::CAnomalyDetector::STATE_VERSION << std::endl << "Quantile State Version " << model::CAnomalyScore::CURRENT_FORMAT_VERSION << std::endl << ver::CBuildInfo::fullInfo() << std::endl; return false; } - if (vm.count("limitconfig") > 0) - { + if (vm.count("limitconfig") > 0) { limitConfigFile = vm["limitconfig"].as(); } - if (vm.count("modelconfig") > 0) - { + if (vm.count("modelconfig") > 0) { modelConfigFile = vm["modelconfig"].as(); } - if (vm.count("fieldconfig") > 0) - { + if (vm.count("fieldconfig") > 0) { fieldConfigFile = vm["fieldconfig"].as(); } - if (vm.count("modelplotconfig") > 0) - { + if (vm.count("modelplotconfig") > 0) { modelPlotConfigFile = vm["modelplotconfig"].as(); } - if (vm.count("jobid") > 0) - { + if (vm.count("jobid") > 0) { jobId = vm["jobid"].as(); } - if (vm.count("logProperties") > 0) - { + if (vm.count("logProperties") > 0) { logProperties = vm["logProperties"].as(); } - if (vm.count("logPipe") > 0) - { + if (vm.count("logPipe") > 0) { logPipe = vm["logPipe"].as(); } - if (vm.count("bucketspan") > 0) - { + if (vm.count("bucketspan") > 0) { bucketSpan = vm["bucketspan"].as(); } - if (vm.count("latency") > 0) - { + if (vm.count("latency") > 0) { latency = vm["latency"].as(); } - if (vm.count("summarycountfield") > 0) - { + if (vm.count("summarycountfield") > 0) { summaryCountFieldName = vm["summarycountfield"].as(); } - if (vm.count("delimiter") > 0) - { + if (vm.count("delimiter") > 0) { delimiter = vm["delimiter"].as(); } - if (vm.count("lengthEncodedInput") > 0) - { + if (vm.count("lengthEncodedInput") > 0) { lengthEncodedInput = true; } - if (vm.count("timefield") > 0) - { + if (vm.count("timefield") > 0) { timeField = vm["timefield"].as(); } - if (vm.count("timeformat") > 0) - { + if (vm.count("timeformat") > 0) { timeFormat = vm["timeformat"].as(); } - if (vm.count("quantilesState") > 0) - { + if (vm.count("quantilesState") > 0) { quantilesState = vm["quantilesState"].as(); } - if (vm.count("deleteStateFiles") > 0) - { + if (vm.count("deleteStateFiles") > 0) { deleteStateFiles = true; } - if (vm.count("persistInterval") > 0) - { + if (vm.count("persistInterval") > 0) { persistInterval = vm["persistInterval"].as(); } - if (vm.count("maxQuantileInterval") > 0) - { + if (vm.count("maxQuantileInterval") > 0) { maxQuantileInterval = vm["maxQuantileInterval"].as(); } - if (vm.count("input") > 0) - { + if (vm.count("input") > 0) { inputFileName = vm["input"].as(); } - if (vm.count("inputIsPipe") > 0) - { + if (vm.count("inputIsPipe") > 0) { isInputFileNamedPipe = true; } - if (vm.count("output") > 0) - { + if (vm.count("output") > 0) { outputFileName = vm["output"].as(); } - if (vm.count("outputIsPipe") > 0) - { + if (vm.count("outputIsPipe") > 0) { isOutputFileNamedPipe = true; } - if (vm.count("restore") > 0) - { + if (vm.count("restore") > 0) { restoreFileName = vm["restore"].as(); } - if (vm.count("restoreIsPipe") > 0) - { + if (vm.count("restoreIsPipe") > 0) { isRestoreFileNamedPipe = true; } - if (vm.count("persist") > 0) - { + if (vm.count("persist") > 0) { persistFileName = vm["persist"].as(); } - if (vm.count("persistIsPipe") > 0) - { + if (vm.count("persistIsPipe") > 0) { isPersistFileNamedPipe = true; } - if (vm.count("maxAnomalyRecords") > 0) - { + if (vm.count("maxAnomalyRecords") > 0) { maxAnomalyRecords = vm["maxAnomalyRecords"].as(); } - if (vm.count("memoryUsage") > 0) - { + if (vm.count("memoryUsage") > 0) { memoryUsage = true; } - if (vm.count("resultFinalizationWindow") > 0) - { + if (vm.count("resultFinalizationWindow") > 0) { bucketResultsDelay = vm["resultFinalizationWindow"].as(); } - if (vm.count("multivariateByFields") > 0) - { + if (vm.count("multivariateByFields") > 0) { multivariateByFields = true; } - if (vm.count("multipleBucketspans") > 0) - { + if (vm.count("multipleBucketspans") > 0) { multipleBucketspans = vm["multipleBucketspans"].as(); } - if (vm.count("perPartitionNormalization") > 0) - { + if (vm.count("perPartitionNormalization") > 0) { perPartitionNormalization = true; } - boost::program_options::collect_unrecognized(parsed.options, - boost::program_options::include_positional).swap(clauseTokens); - } - catch (std::exception &e) - { + boost::program_options::collect_unrecognized(parsed.options, boost::program_options::include_positional).swap(clauseTokens); + } catch (std::exception& e) { std::cerr << "Error processing command line: " << e.what() << std::endl; return false; } return true; } - - } } diff --git a/bin/autodetect/CCmdLineParser.h b/bin/autodetect/CCmdLineParser.h index 28c5e3a202..ad4f60fc0d 100644 --- a/bin/autodetect/CCmdLineParser.h +++ b/bin/autodetect/CCmdLineParser.h @@ -11,12 +11,8 @@ #include #include - -namespace ml -{ -namespace autodetect -{ - +namespace ml { +namespace autodetect { //! \brief //! Very simple command line parser. @@ -28,56 +24,53 @@ namespace autodetect //! Put in a class rather than main to allow testing. //! TODO make this generic. //! -class CCmdLineParser -{ - public: - typedef std::vector TStrVec; +class CCmdLineParser { +public: + typedef std::vector TStrVec; - public: - //! Parse the arguments and return options if appropriate. Unamed - //! options are placed in a vector for further processing/validation - //! later on by the api::CFieldConfig class. - static bool parse(int argc, - const char * const *argv, - std::string &limitConfigFile, - std::string &modelConfigFile, - std::string &fieldConfigFile, - std::string &modelPlotConfigFile, - std::string &jobId, - std::string &logProperties, - std::string &logPipe, - core_t::TTime &bucketSpan, - core_t::TTime &latency, - std::string &summaryCountFieldName, - char &delimiter, - bool &lengthEncodedInput, - std::string &timeField, - std::string &timeFormat, - std::string &quantilesState, - bool &deleteStateFiles, - core_t::TTime &persistInterval, - core_t::TTime &maxQuantileInterval, - std::string &inputFileName, - bool &isInputFileNamedPipe, - std::string &outputFileName, - bool &isOutputFileNamedPipe, - std::string &restoreFileName, - bool &isRestoreFileNamedPipe, - std::string &persistFileName, - bool &isPersistFileNamedPipe, - size_t &maxAnomalyRecords, - bool &memoryUsage, - std::size_t &bucketResultsDelay, - bool &multivariateByFields, - std::string &multipleBucketspans, - bool &perPartitionNormalization, - TStrVec &clauseTokens); +public: + //! Parse the arguments and return options if appropriate. Unamed + //! options are placed in a vector for further processing/validation + //! later on by the api::CFieldConfig class. + static bool parse(int argc, + const char* const* argv, + std::string& limitConfigFile, + std::string& modelConfigFile, + std::string& fieldConfigFile, + std::string& modelPlotConfigFile, + std::string& jobId, + std::string& logProperties, + std::string& logPipe, + core_t::TTime& bucketSpan, + core_t::TTime& latency, + std::string& summaryCountFieldName, + char& delimiter, + bool& lengthEncodedInput, + std::string& timeField, + std::string& timeFormat, + std::string& quantilesState, + bool& deleteStateFiles, + core_t::TTime& persistInterval, + core_t::TTime& maxQuantileInterval, + std::string& inputFileName, + bool& isInputFileNamedPipe, + std::string& outputFileName, + bool& isOutputFileNamedPipe, + std::string& restoreFileName, + bool& isRestoreFileNamedPipe, + std::string& persistFileName, + bool& isPersistFileNamedPipe, + size_t& maxAnomalyRecords, + bool& memoryUsage, + std::size_t& bucketResultsDelay, + bool& multivariateByFields, + std::string& multipleBucketspans, + bool& perPartitionNormalization, + TStrVec& clauseTokens); - private: - static const std::string DESCRIPTION; +private: + static const std::string DESCRIPTION; }; - - } } diff --git a/bin/autodetect/Main.cc b/bin/autodetect/Main.cc index 67c0ab01f2..5038ac99ad 100644 --- a/bin/autodetect/Main.cc +++ b/bin/autodetect/Main.cc @@ -17,9 +17,9 @@ #include #include #include -#include #include #include +#include #include @@ -53,45 +53,43 @@ #include #include - -int main(int argc, char **argv) -{ +int main(int argc, char** argv) { using TStrVec = ml::autodetect::CCmdLineParser::TStrVec; // Read command line options - std::string limitConfigFile; - std::string modelConfigFile; - std::string fieldConfigFile; - std::string modelPlotConfigFile; - std::string jobId; - std::string logProperties; - std::string logPipe; + std::string limitConfigFile; + std::string modelConfigFile; + std::string fieldConfigFile; + std::string modelPlotConfigFile; + std::string jobId; + std::string logProperties; + std::string logPipe; ml::core_t::TTime bucketSpan(0); ml::core_t::TTime latency(0); - std::string summaryCountFieldName; - char delimiter('\t'); - bool lengthEncodedInput(false); - std::string timeField(ml::api::CAnomalyJob::DEFAULT_TIME_FIELD_NAME); - std::string timeFormat; - std::string quantilesStateFile; - bool deleteStateFiles(false); + std::string summaryCountFieldName; + char delimiter('\t'); + bool lengthEncodedInput(false); + std::string timeField(ml::api::CAnomalyJob::DEFAULT_TIME_FIELD_NAME); + std::string timeFormat; + std::string quantilesStateFile; + bool deleteStateFiles(false); ml::core_t::TTime persistInterval(-1); ml::core_t::TTime maxQuantileInterval(-1); - std::string inputFileName; - bool isInputFileNamedPipe(false); - std::string outputFileName; - bool isOutputFileNamedPipe(false); - std::string restoreFileName; - bool isRestoreFileNamedPipe(false); - std::string persistFileName; - bool isPersistFileNamedPipe(false); - size_t maxAnomalyRecords(100u); - bool memoryUsage(false); - std::size_t bucketResultsDelay(0); - bool multivariateByFields(false); - std::string multipleBucketspans; - bool perPartitionNormalization(false); - TStrVec clauseTokens; + std::string inputFileName; + bool isInputFileNamedPipe(false); + std::string outputFileName; + bool isOutputFileNamedPipe(false); + std::string restoreFileName; + bool isRestoreFileNamedPipe(false); + std::string persistFileName; + bool isPersistFileNamedPipe(false); + size_t maxAnomalyRecords(100u); + bool memoryUsage(false); + std::size_t bucketResultsDelay(0); + bool multivariateByFields(false); + std::string multipleBucketspans; + bool perPartitionNormalization(false); + TStrVec clauseTokens; if (ml::autodetect::CCmdLineParser::parse(argc, argv, limitConfigFile, @@ -126,8 +124,7 @@ int main(int argc, char **argv) multivariateByFields, multipleBucketspans, perPartitionNormalization, - clauseTokens) == false) - { + clauseTokens) == false) { return EXIT_FAILURE; } @@ -142,8 +139,7 @@ int main(int argc, char **argv) persistFileName, isPersistFileNamedPipe); - if (ml::core::CLogger::instance().reconfigure(logPipe, logProperties) == false) - { + if (ml::core::CLogger::instance().reconfigure(logPipe, logProperties) == false) { LOG_FATAL("Could not reconfigure logging"); return EXIT_FAILURE; } @@ -155,118 +151,87 @@ int main(int argc, char **argv) ml::core::CProcessPriority::reducePriority(); - if (ioMgr.initIo() == false) - { + if (ioMgr.initIo() == false) { LOG_FATAL("Failed to initialise IO"); return EXIT_FAILURE; } - if (jobId.empty()) - { + if (jobId.empty()) { LOG_FATAL("No job ID specified"); return EXIT_FAILURE; } ml::model::CLimits limits; - if (!limitConfigFile.empty() && limits.init(limitConfigFile) == false) - { - LOG_FATAL("Ml limit config file '" << limitConfigFile << - "' could not be loaded"); + if (!limitConfigFile.empty() && limits.init(limitConfigFile) == false) { + LOG_FATAL("Ml limit config file '" << limitConfigFile << "' could not be loaded"); return EXIT_FAILURE; } ml::api::CFieldConfig fieldConfig; - ml::model_t::ESummaryMode summaryMode(summaryCountFieldName.empty() ? ml::model_t::E_None - : ml::model_t::E_Manual); - ml::model::CAnomalyDetectorModelConfig modelConfig = - ml::model::CAnomalyDetectorModelConfig::defaultConfig(bucketSpan, - summaryMode, - summaryCountFieldName, - latency, - bucketResultsDelay, - multivariateByFields, - multipleBucketspans); + ml::model_t::ESummaryMode summaryMode(summaryCountFieldName.empty() ? ml::model_t::E_None : ml::model_t::E_Manual); + ml::model::CAnomalyDetectorModelConfig modelConfig = ml::model::CAnomalyDetectorModelConfig::defaultConfig( + bucketSpan, summaryMode, summaryCountFieldName, latency, bucketResultsDelay, multivariateByFields, multipleBucketspans); modelConfig.perPartitionNormalization(perPartitionNormalization); - modelConfig.detectionRules( - ml::model::CAnomalyDetectorModelConfig::TIntDetectionRuleVecUMapCRef(fieldConfig.detectionRules())); - modelConfig.scheduledEvents( - ml::model::CAnomalyDetectorModelConfig::TStrDetectionRulePrVecCRef(fieldConfig.scheduledEvents())); - - if (!modelConfigFile.empty() && modelConfig.init(modelConfigFile) == false) - { - LOG_FATAL("Ml model config file '" << modelConfigFile << - "' could not be loaded"); + modelConfig.detectionRules(ml::model::CAnomalyDetectorModelConfig::TIntDetectionRuleVecUMapCRef(fieldConfig.detectionRules())); + modelConfig.scheduledEvents(ml::model::CAnomalyDetectorModelConfig::TStrDetectionRulePrVecCRef(fieldConfig.scheduledEvents())); + + if (!modelConfigFile.empty() && modelConfig.init(modelConfigFile) == false) { + LOG_FATAL("Ml model config file '" << modelConfigFile << "' could not be loaded"); return EXIT_FAILURE; } - if (!modelPlotConfigFile.empty() && modelConfig.configureModelPlot(modelPlotConfigFile) == false) - { - LOG_FATAL("Ml model plot config file '" << modelPlotConfigFile << - "' could not be loaded"); + if (!modelPlotConfigFile.empty() && modelConfig.configureModelPlot(modelPlotConfigFile) == false) { + LOG_FATAL("Ml model plot config file '" << modelPlotConfigFile << "' could not be loaded"); return EXIT_FAILURE; } using TScopedDataSearcherP = boost::scoped_ptr; TScopedDataSearcherP restoreSearcher; - if (ioMgr.restoreStream() != 0) - { + if (ioMgr.restoreStream() != 0) { // Check whether state is restored from a file, if so we assume that this is a debugging case // and therefore does not originate from X-Pack. - if (!isRestoreFileNamedPipe) - { + if (!isRestoreFileNamedPipe) { // apply a filter to overcome differences in the way persistence vs. restore works auto strm = boost::make_shared(); strm->push(ml::api::CStateRestoreStreamFilter()); strm->push(*ioMgr.restoreStream()); restoreSearcher.reset(new ml::api::CSingleStreamSearcher(strm)); - } - else - { + } else { restoreSearcher.reset(new ml::api::CSingleStreamSearcher(ioMgr.restoreStream())); } } using TScopedDataAdderP = boost::scoped_ptr; TScopedDataAdderP persister; - if (ioMgr.persistStream() != 0) - { + if (ioMgr.persistStream() != 0) { persister.reset(new ml::api::CSingleStreamDataAdder(ioMgr.persistStream())); } using TScopedBackgroundPersisterP = boost::scoped_ptr; TScopedBackgroundPersisterP periodicPersister; - if (persistInterval >= 0) - { - if (persister == 0) - { + if (persistInterval >= 0) { + if (persister == 0) { LOG_FATAL("Periodic persistence cannot be enabled using the 'persistInterval' argument " "unless a place to persist to has been specified using the 'persist' argument"); return EXIT_FAILURE; } - periodicPersister.reset(new ml::api::CBackgroundPersister(persistInterval, - *persister)); + periodicPersister.reset(new ml::api::CBackgroundPersister(persistInterval, *persister)); } using TScopedInputParserP = boost::scoped_ptr; TScopedInputParserP inputParser; - if (lengthEncodedInput) - { + if (lengthEncodedInput) { inputParser.reset(new ml::api::CLengthEncodedInputParser(ioMgr.inputStream())); - } - else - { - inputParser.reset(new ml::api::CCsvInputParser(ioMgr.inputStream(), - delimiter)); + } else { + inputParser.reset(new ml::api::CCsvInputParser(ioMgr.inputStream(), delimiter)); } ml::core::CJsonOutputStreamWrapper wrappedOutputStream(ioMgr.outputStream()); ml::api::CModelSnapshotJsonWriter modelSnapshotWriter(jobId, wrappedOutputStream); - if (fieldConfig.initFromCmdLine(fieldConfigFile, - clauseTokens) == false) - { + if (fieldConfig.initFromCmdLine(fieldConfigFile, clauseTokens) == false) { LOG_FATAL("Field config could not be interpreted"); return EXIT_FAILURE; } @@ -284,20 +249,17 @@ int main(int argc, char **argv) timeFormat, maxAnomalyRecords); - if (!quantilesStateFile.empty()) - { - if (job.initNormalizer(quantilesStateFile) == false) - { + if (!quantilesStateFile.empty()) { + if (job.initNormalizer(quantilesStateFile) == false) { LOG_FATAL("Failed to restore quantiles and initialize normalizer"); return EXIT_FAILURE; } - if (deleteStateFiles) - { + if (deleteStateFiles) { ::remove(quantilesStateFile.c_str()); } } - ml::api::CDataProcessor *firstProcessor(&job); + ml::api::CDataProcessor* firstProcessor(&job); // Chain the categorizer's output to the anomaly detector's input ml::api::COutputChainer outputChainer(job); @@ -307,24 +269,18 @@ int main(int argc, char **argv) // The typer knows how to assign categories to records ml::api::CFieldDataTyper typer(jobId, fieldConfig, limits, outputChainer, fieldDataTyperOutputWriter); - if (fieldConfig.fieldNameSuperset().count(ml::api::CFieldDataTyper::MLCATEGORY_NAME) > 0) - { + if (fieldConfig.fieldNameSuperset().count(ml::api::CFieldDataTyper::MLCATEGORY_NAME) > 0) { LOG_DEBUG("Applying the categorization typer for anomaly detection"); firstProcessor = &typer; } - if (periodicPersister != nullptr) - { - periodicPersister->firstProcessorPeriodicPersistFunc(boost::bind(&ml::api::CDataProcessor::periodicPersistState, - firstProcessor, - _1)); + if (periodicPersister != nullptr) { + periodicPersister->firstProcessorPeriodicPersistFunc( + boost::bind(&ml::api::CDataProcessor::periodicPersistState, firstProcessor, _1)); } // The skeleton avoids the need to duplicate a lot of boilerplate code - ml::api::CCmdSkeleton skeleton(restoreSearcher.get(), - persister.get(), - *inputParser, - *firstProcessor); + ml::api::CCmdSkeleton skeleton(restoreSearcher.get(), persister.get(), *inputParser, *firstProcessor); bool ioLoopSucceeded(skeleton.ioLoop()); // Unfortunately we cannot rely on destruction to finalise the output writer @@ -333,14 +289,12 @@ int main(int argc, char **argv) // writer as it was constructed last. fieldDataTyperOutputWriter.finalise(); - if (!ioLoopSucceeded) - { + if (!ioLoopSucceeded) { LOG_FATAL("Ml anomaly detector job failed"); return EXIT_FAILURE; } - if (memoryUsage) - { + if (memoryUsage) { job.descriptionAndDebugMemoryUsage(); } diff --git a/bin/categorize/CCmdLineParser.cc b/bin/categorize/CCmdLineParser.cc index cf1de1f609..dfdba5fb06 100644 --- a/bin/categorize/CCmdLineParser.cc +++ b/bin/categorize/CCmdLineParser.cc @@ -11,158 +11,124 @@ #include +namespace ml { +namespace categorize { -namespace ml -{ -namespace categorize -{ - -const std::string CCmdLineParser::DESCRIPTION = -"Usage: categorize [options]\n" -"Options:"; +const std::string CCmdLineParser::DESCRIPTION = "Usage: categorize [options]\n" + "Options:"; bool CCmdLineParser::parse(int argc, - const char * const *argv, - std::string &limitConfigFile, - std::string &jobId, - std::string &logProperties, - std::string &logPipe, - char &delimiter, - bool &lengthEncodedInput, - core_t::TTime &persistInterval, - std::string &inputFileName, - bool &isInputFileNamedPipe, - std::string &outputFileName, - bool &isOutputFileNamedPipe, - std::string &restoreFileName, - bool &isRestoreFileNamedPipe, - std::string &persistFileName, - bool &isPersistFileNamedPipe, - std::string &categorizationFieldName) -{ - try - { + const char* const* argv, + std::string& limitConfigFile, + std::string& jobId, + std::string& logProperties, + std::string& logPipe, + char& delimiter, + bool& lengthEncodedInput, + core_t::TTime& persistInterval, + std::string& inputFileName, + bool& isInputFileNamedPipe, + std::string& outputFileName, + bool& isOutputFileNamedPipe, + std::string& restoreFileName, + bool& isRestoreFileNamedPipe, + std::string& persistFileName, + bool& isPersistFileNamedPipe, + std::string& categorizationFieldName) { + try { boost::program_options::options_description desc(DESCRIPTION); - desc.add_options() - ("help", "Display this information and exit") - ("version", "Display version information and exit") - ("limitconfig", boost::program_options::value(), - "Optional limit config file") - ("jobid", boost::program_options::value(), - "ID of the job this process is associated with") - ("logProperties", boost::program_options::value(), - "Optional logger properties file") - ("logPipe", boost::program_options::value(), - "Optional log to named pipe") - ("delimiter", boost::program_options::value(), - "Optional delimiter character for delimited data formats - default is '\t' (tab separated)") - ("lengthEncodedInput", - "Take input in length encoded binary format - default is delimited") - ("input", boost::program_options::value(), - "Optional file to read input from - not present means read from STDIN") - ("inputIsPipe", "Specified input file is a named pipe") - ("output", boost::program_options::value(), - "Optional file to write output to - not present means write to STDOUT") - ("outputIsPipe", "Specified output file is a named pipe") - ("restore", boost::program_options::value(), - "Optional file to restore state from - not present means no state restoration") - ("restoreIsPipe", "Specified restore file is a named pipe") - ("persist", boost::program_options::value(), - "Optional file to persist state to - not present means no state persistence") - ("persistIsPipe", "Specified persist file is a named pipe") - ("persistInterval", boost::program_options::value(), - "Optional interval at which to periodically persist model state - if not specified then models will only be persisted at program exit") - ("categorizationfield", boost::program_options::value(), - "Field to compute mlcategory from") - ; + desc.add_options()("help", "Display this information and exit")("version", "Display version information and exit")( + "limitconfig", boost::program_options::value(), "Optional limit config file")( + "jobid", boost::program_options::value(), "ID of the job this process is associated with")( + "logProperties", boost::program_options::value(), "Optional logger properties file")( + "logPipe", boost::program_options::value(), "Optional log to named pipe")( + "delimiter", + boost::program_options::value(), + "Optional delimiter character for delimited data formats - default is '\t' (tab separated)")( + "lengthEncodedInput", "Take input in length encoded binary format - default is delimited")( + "input", boost::program_options::value(), "Optional file to read input from - not present means read from STDIN")( + "inputIsPipe", "Specified input file is a named pipe")( + "output", boost::program_options::value(), "Optional file to write output to - not present means write to STDOUT")( + "outputIsPipe", "Specified output file is a named pipe")( + "restore", + boost::program_options::value(), + "Optional file to restore state from - not present means no state restoration")("restoreIsPipe", + "Specified restore file is a named pipe")( + "persist", + boost::program_options::value(), + "Optional file to persist state to - not present means no state persistence")("persistIsPipe", + "Specified persist file is a named pipe")( + "persistInterval", + boost::program_options::value(), + "Optional interval at which to periodically persist model state - if not specified then models will only be persisted at " + "program exit")("categorizationfield", boost::program_options::value(), "Field to compute mlcategory from"); boost::program_options::variables_map vm; boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), vm); boost::program_options::notify(vm); - if (vm.count("help") > 0) - { + if (vm.count("help") > 0) { std::cerr << desc << std::endl; return false; } - if (vm.count("version") > 0) - { + if (vm.count("version") > 0) { std::cerr << ver::CBuildInfo::fullInfo() << std::endl; return false; } - if (vm.count("limitconfig") > 0) - { + if (vm.count("limitconfig") > 0) { limitConfigFile = vm["limitconfig"].as(); } - if (vm.count("jobid") > 0) - { + if (vm.count("jobid") > 0) { jobId = vm["jobid"].as(); } - if (vm.count("logProperties") > 0) - { + if (vm.count("logProperties") > 0) { logProperties = vm["logProperties"].as(); } - if (vm.count("logPipe") > 0) - { + if (vm.count("logPipe") > 0) { logPipe = vm["logPipe"].as(); } - if (vm.count("delimiter") > 0) - { + if (vm.count("delimiter") > 0) { delimiter = vm["delimiter"].as(); } - if (vm.count("lengthEncodedInput") > 0) - { + if (vm.count("lengthEncodedInput") > 0) { lengthEncodedInput = true; } - if (vm.count("persistInterval") > 0) - { + if (vm.count("persistInterval") > 0) { persistInterval = vm["persistInterval"].as(); } - if (vm.count("input") > 0) - { + if (vm.count("input") > 0) { inputFileName = vm["input"].as(); } - if (vm.count("inputIsPipe") > 0) - { + if (vm.count("inputIsPipe") > 0) { isInputFileNamedPipe = true; } - if (vm.count("output") > 0) - { + if (vm.count("output") > 0) { outputFileName = vm["output"].as(); } - if (vm.count("outputIsPipe") > 0) - { + if (vm.count("outputIsPipe") > 0) { isOutputFileNamedPipe = true; } - if (vm.count("restore") > 0) - { + if (vm.count("restore") > 0) { restoreFileName = vm["restore"].as(); } - if (vm.count("restoreIsPipe") > 0) - { + if (vm.count("restoreIsPipe") > 0) { isRestoreFileNamedPipe = true; } - if (vm.count("persist") > 0) - { + if (vm.count("persist") > 0) { persistFileName = vm["persist"].as(); } - if (vm.count("persistIsPipe") > 0) - { + if (vm.count("persistIsPipe") > 0) { isPersistFileNamedPipe = true; } - if (vm.count("categorizationfield") > 0) - { + if (vm.count("categorizationfield") > 0) { categorizationFieldName = vm["categorizationfield"].as(); } - } - catch (std::exception &e) - { + } catch (std::exception& e) { std::cerr << "Error processing command line: " << e.what() << std::endl; return false; } return true; } - } } diff --git a/bin/categorize/CCmdLineParser.h b/bin/categorize/CCmdLineParser.h index c7aa92f3de..2effef35d2 100644 --- a/bin/categorize/CCmdLineParser.h +++ b/bin/categorize/CCmdLineParser.h @@ -10,12 +10,8 @@ #include - -namespace ml -{ -namespace categorize -{ - +namespace ml { +namespace categorize { //! \brief //! Very simple command line parser. @@ -27,36 +23,33 @@ namespace categorize //! Put in a class rather than main to allow testing. //! TODO make this generic. //! -class CCmdLineParser -{ - public: - //! Parse the arguments and return options if appropriate. Unamed - //! options are placed in a vector for further processing/validation - //! later on by the api::CFieldConfig class. - static bool parse(int argc, - const char * const *argv, - std::string &limitConfigFile, - std::string &jobId, - std::string &logProperties, - std::string &logPipe, - char &delimiter, - bool &lengthEncodedInput, - core_t::TTime &persistInterval, - std::string &inputFileName, - bool &isInputFileNamedPipe, - std::string &outputFileName, - bool &isOutputFileNamedPipe, - std::string &restoreFileName, - bool &isRestoreFileNamedPipe, - std::string &persistFileName, - bool &isPersistFileNamedPipe, - std::string &categorizationFieldName); - - private: - static const std::string DESCRIPTION; +class CCmdLineParser { +public: + //! Parse the arguments and return options if appropriate. Unamed + //! options are placed in a vector for further processing/validation + //! later on by the api::CFieldConfig class. + static bool parse(int argc, + const char* const* argv, + std::string& limitConfigFile, + std::string& jobId, + std::string& logProperties, + std::string& logPipe, + char& delimiter, + bool& lengthEncodedInput, + core_t::TTime& persistInterval, + std::string& inputFileName, + bool& isInputFileNamedPipe, + std::string& outputFileName, + bool& isOutputFileNamedPipe, + std::string& restoreFileName, + bool& isRestoreFileNamedPipe, + std::string& persistFileName, + bool& isPersistFileNamedPipe, + std::string& categorizationFieldName); + +private: + static const std::string DESCRIPTION; }; - - } } diff --git a/bin/categorize/Main.cc b/bin/categorize/Main.cc index ff70aeb8ed..2bfd3e0058 100644 --- a/bin/categorize/Main.cc +++ b/bin/categorize/Main.cc @@ -17,8 +17,8 @@ #include #include #include -#include #include +#include #include @@ -48,26 +48,24 @@ #include - -int main(int argc, char **argv) -{ +int main(int argc, char** argv) { // Read command line options - std::string limitConfigFile; - std::string jobId; - std::string logProperties; - std::string logPipe; - char delimiter('\t'); - bool lengthEncodedInput(false); + std::string limitConfigFile; + std::string jobId; + std::string logProperties; + std::string logPipe; + char delimiter('\t'); + bool lengthEncodedInput(false); ml::core_t::TTime persistInterval(-1); - std::string inputFileName; - bool isInputFileNamedPipe(false); - std::string outputFileName; - bool isOutputFileNamedPipe(false); - std::string restoreFileName; - bool isRestoreFileNamedPipe(false); - std::string persistFileName; - bool isPersistFileNamedPipe(false); - std::string categorizationFieldName; + std::string inputFileName; + bool isInputFileNamedPipe(false); + std::string outputFileName; + bool isOutputFileNamedPipe(false); + std::string restoreFileName; + bool isRestoreFileNamedPipe(false); + std::string persistFileName; + bool isPersistFileNamedPipe(false); + std::string categorizationFieldName; if (ml::categorize::CCmdLineParser::parse(argc, argv, limitConfigFile, @@ -85,8 +83,7 @@ int main(int argc, char **argv) isRestoreFileNamedPipe, persistFileName, isPersistFileNamedPipe, - categorizationFieldName) == false) - { + categorizationFieldName) == false) { return EXIT_FAILURE; } @@ -101,8 +98,7 @@ int main(int argc, char **argv) persistFileName, isPersistFileNamedPipe); - if (ml::core::CLogger::instance().reconfigure(logPipe, logProperties) == false) - { + if (ml::core::CLogger::instance().reconfigure(logPipe, logProperties) == false) { LOG_FATAL("Could not reconfigure logging"); return EXIT_FAILURE; } @@ -114,28 +110,23 @@ int main(int argc, char **argv) ml::core::CProcessPriority::reducePriority(); - if (ioMgr.initIo() == false) - { + if (ioMgr.initIo() == false) { LOG_FATAL("Failed to initialise IO"); return EXIT_FAILURE; } - if (jobId.empty()) - { + if (jobId.empty()) { LOG_FATAL("No job ID specified"); return EXIT_FAILURE; } ml::model::CLimits limits; - if (!limitConfigFile.empty() && limits.init(limitConfigFile) == false) - { - LOG_FATAL("Ml limit config file '" << limitConfigFile << - "' could not be loaded"); + if (!limitConfigFile.empty() && limits.init(limitConfigFile) == false) { + LOG_FATAL("Ml limit config file '" << limitConfigFile << "' could not be loaded"); return EXIT_FAILURE; } - if (categorizationFieldName.empty()) - { + if (categorizationFieldName.empty()) { LOG_FATAL("No categorization field name specified"); return EXIT_FAILURE; } @@ -143,56 +134,44 @@ int main(int argc, char **argv) using TScopedDataSearcherP = boost::scoped_ptr; TScopedDataSearcherP restoreSearcher; - if (ioMgr.restoreStream() != 0) - { + if (ioMgr.restoreStream() != 0) { // Check whether state is restored from a file, if so we assume that this is a debugging case // and therefore does not originate from X-Pack. - if (!isRestoreFileNamedPipe) - { + if (!isRestoreFileNamedPipe) { // apply a filter to overcome differences in the way persistence vs. restore works auto strm = boost::make_shared(); strm->push(ml::api::CStateRestoreStreamFilter()); strm->push(*ioMgr.restoreStream()); restoreSearcher.reset(new ml::api::CSingleStreamSearcher(strm)); - } - else - { + } else { restoreSearcher.reset(new ml::api::CSingleStreamSearcher(ioMgr.restoreStream())); } } using TScopedDataAdderP = boost::scoped_ptr; TScopedDataAdderP persister; - if (ioMgr.persistStream() != 0) - { + if (ioMgr.persistStream() != 0) { persister.reset(new ml::api::CSingleStreamDataAdder(ioMgr.persistStream())); } using TScopedBackgroundPersisterP = boost::scoped_ptr; TScopedBackgroundPersisterP periodicPersister; - if (persistInterval >= 0) - { - if (persister == 0) - { + if (persistInterval >= 0) { + if (persister == 0) { LOG_FATAL("Periodic persistence cannot be enabled using the 'persistInterval' argument " "unless a place to persist to has been specified using the 'persist' argument"); return EXIT_FAILURE; } - periodicPersister.reset(new ml::api::CBackgroundPersister(persistInterval, - *persister)); + periodicPersister.reset(new ml::api::CBackgroundPersister(persistInterval, *persister)); } using TScopedInputParserP = boost::scoped_ptr; TScopedInputParserP inputParser; - if (lengthEncodedInput) - { + if (lengthEncodedInput) { inputParser.reset(new ml::api::CLengthEncodedInputParser(ioMgr.inputStream())); - } - else - { - inputParser.reset(new ml::api::CCsvInputParser(ioMgr.inputStream(), - delimiter)); + } else { + inputParser.reset(new ml::api::CCsvInputParser(ioMgr.inputStream(), delimiter)); } ml::core::CJsonOutputStreamWrapper wrappedOutputStream(ioMgr.outputStream()); @@ -205,25 +184,14 @@ int main(int argc, char **argv) ml::api::CJsonOutputWriter outputWriter(jobId, wrappedOutputStream); // The typer knows how to assign categories to records - ml::api::CFieldDataTyper typer(jobId, - fieldConfig, - limits, - nullOutput, - outputWriter, - periodicPersister.get()); - - if (periodicPersister != nullptr) - { - periodicPersister->firstProcessorPeriodicPersistFunc(boost::bind(&ml::api::CFieldDataTyper::periodicPersistState, - &typer, - _1)); + ml::api::CFieldDataTyper typer(jobId, fieldConfig, limits, nullOutput, outputWriter, periodicPersister.get()); + + if (periodicPersister != nullptr) { + periodicPersister->firstProcessorPeriodicPersistFunc(boost::bind(&ml::api::CFieldDataTyper::periodicPersistState, &typer, _1)); } // The skeleton avoids the need to duplicate a lot of boilerplate code - ml::api::CCmdSkeleton skeleton(restoreSearcher.get(), - persister.get(), - *inputParser, - typer); + ml::api::CCmdSkeleton skeleton(restoreSearcher.get(), persister.get(), *inputParser, typer); bool ioLoopSucceeded(skeleton.ioLoop()); // Unfortunately we cannot rely on destruction to finalise the output writer @@ -232,8 +200,7 @@ int main(int argc, char **argv) // writer as it was constructed last. outputWriter.finalise(); - if (!ioLoopSucceeded) - { + if (!ioLoopSucceeded) { LOG_FATAL("Ml categorization job failed"); return EXIT_FAILURE; } diff --git a/bin/controller/CBlockingCallCancellerThread.cc b/bin/controller/CBlockingCallCancellerThread.cc index a1b84ffa2d..cd2bc5f232 100644 --- a/bin/controller/CBlockingCallCancellerThread.cc +++ b/bin/controller/CBlockingCallCancellerThread.cc @@ -9,54 +9,35 @@ #include +namespace ml { +namespace controller { -namespace ml -{ -namespace controller -{ - - -CBlockingCallCancellerThread::CBlockingCallCancellerThread(core::CThread::TThreadId potentiallyBlockedThreadId, - std::istream &monitorStream) - : m_PotentiallyBlockedThreadId(potentiallyBlockedThreadId), - m_MonitorStream(monitorStream), - m_Shutdown(false) -{ +CBlockingCallCancellerThread::CBlockingCallCancellerThread(core::CThread::TThreadId potentiallyBlockedThreadId, std::istream& monitorStream) + : m_PotentiallyBlockedThreadId(potentiallyBlockedThreadId), m_MonitorStream(monitorStream), m_Shutdown(false) { } -void CBlockingCallCancellerThread::run() -{ +void CBlockingCallCancellerThread::run() { char c; - while (m_MonitorStream >> c) - { - if (m_Shutdown) - { + while (m_MonitorStream >> c) { + if (m_Shutdown) { return; } } - if (core::CThread::cancelBlockedIo(m_PotentiallyBlockedThreadId) == false) - { - LOG_WARN("Failed to cancel blocked IO in thread " << - m_PotentiallyBlockedThreadId); + if (core::CThread::cancelBlockedIo(m_PotentiallyBlockedThreadId) == false) { + LOG_WARN("Failed to cancel blocked IO in thread " << m_PotentiallyBlockedThreadId); } } -void CBlockingCallCancellerThread::shutdown() -{ +void CBlockingCallCancellerThread::shutdown() { m_Shutdown = true; // This is to wake up the stream reading in the run() method of this object. // If this has an effect then the assumption is that the program is exiting // due to a reason other than the stream this object is monitoring ending. - if (this->cancelBlockedIo() == false) - { - LOG_WARN("Failed to cancel blocked IO in thread " << - this->currentThreadId()); + if (this->cancelBlockedIo() == false) { + LOG_WARN("Failed to cancel blocked IO in thread " << this->currentThreadId()); } } - - } } - diff --git a/bin/controller/CBlockingCallCancellerThread.h b/bin/controller/CBlockingCallCancellerThread.h index a2c4ec0b3a..81c53bbcd9 100644 --- a/bin/controller/CBlockingCallCancellerThread.h +++ b/bin/controller/CBlockingCallCancellerThread.h @@ -10,10 +10,8 @@ #include -namespace ml -{ -namespace controller -{ +namespace ml { +namespace controller { //! \brief //! Cancels blocking IO in one thread if a stream reaches end-of-file. @@ -36,32 +34,28 @@ namespace controller //! but will be blocked opening one of the named pipes. The blocking call //! needs to be cancelled to allow this process to exit gracefully. //! -class CBlockingCallCancellerThread : public core::CThread -{ - public: - CBlockingCallCancellerThread(core::CThread::TThreadId potentiallyBlockedThreadId, - std::istream &monitorStream); +class CBlockingCallCancellerThread : public core::CThread { +public: + CBlockingCallCancellerThread(core::CThread::TThreadId potentiallyBlockedThreadId, std::istream& monitorStream); - protected: - //! Called when the thread is started. - virtual void run(); +protected: + //! Called when the thread is started. + virtual void run(); - //! Called when the thread is stopped. - virtual void shutdown(); + //! Called when the thread is stopped. + virtual void shutdown(); - private: - //! Thread ID of the thread that this object will cancel blocking IO in - //! if it detects end-of-file on its input stream. - core::CThread::TThreadId m_PotentiallyBlockedThreadId; +private: + //! Thread ID of the thread that this object will cancel blocking IO in + //! if it detects end-of-file on its input stream. + core::CThread::TThreadId m_PotentiallyBlockedThreadId; - //! Stream to monitor for end-of-file. - std::istream &m_MonitorStream; + //! Stream to monitor for end-of-file. + std::istream& m_MonitorStream; - //! Flag to indicate the thread should shut down - volatile bool m_Shutdown; + //! Flag to indicate the thread should shut down + volatile bool m_Shutdown; }; - - } } diff --git a/bin/controller/CCmdLineParser.cc b/bin/controller/CCmdLineParser.cc index a90788d1ec..06e5ca51e6 100644 --- a/bin/controller/CCmdLineParser.cc +++ b/bin/controller/CCmdLineParser.cc @@ -11,70 +11,51 @@ #include -namespace ml -{ -namespace controller -{ +namespace ml { +namespace controller { -const std::string CCmdLineParser::DESCRIPTION = -"Usage: controller [options]\n" -"Options"; +const std::string CCmdLineParser::DESCRIPTION = "Usage: controller [options]\n" + "Options"; -bool CCmdLineParser::parse(int argc, - const char * const *argv, - std::string &jvmPidStr, - std::string &logPipe, - std::string &commandPipe) -{ - try - { +bool CCmdLineParser::parse(int argc, const char* const* argv, std::string& jvmPidStr, std::string& logPipe, std::string& commandPipe) { + try { boost::program_options::options_description desc(DESCRIPTION); - desc.add_options() - ("help", "Display this information and exit") - ("version", "Display version information and exit") - ("jvmPid", boost::program_options::value(), - "Process ID of the JVM to communicate with - default is parent process PID") - ("logPipe", boost::program_options::value(), - "Named pipe to log to - default is controller_log_") - ("commandPipe", boost::program_options::value(), - "Named pipe to accept commands from - default is controller_command_") - ; + desc.add_options()("help", "Display this information and exit")("version", "Display version information and exit")( + "jvmPid", + boost::program_options::value(), + "Process ID of the JVM to communicate with - default is parent process PID")( + "logPipe", boost::program_options::value(), "Named pipe to log to - default is controller_log_")( + "commandPipe", + boost::program_options::value(), + "Named pipe to accept commands from - default is controller_command_"); boost::program_options::variables_map vm; boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), vm); boost::program_options::notify(vm); - if (vm.count("help") > 0) - { + if (vm.count("help") > 0) { std::cerr << desc << std::endl; return false; } - if (vm.count("version") > 0) - { + if (vm.count("version") > 0) { std::cerr << ver::CBuildInfo::fullInfo() << std::endl; return false; } - if (vm.count("jvmPid") > 0) - { + if (vm.count("jvmPid") > 0) { jvmPidStr = vm["jvmPid"].as(); } - if (vm.count("logPipe") > 0) - { + if (vm.count("logPipe") > 0) { logPipe = vm["logPipe"].as(); } - if (vm.count("commandPipe") > 0) - { + if (vm.count("commandPipe") > 0) { commandPipe = vm["commandPipe"].as(); } - } - catch (std::exception &e) - { + } catch (std::exception& e) { std::cerr << "Error processing command line: " << e.what() << std::endl; return false; } return true; } - } } diff --git a/bin/controller/CCmdLineParser.h b/bin/controller/CCmdLineParser.h index 5dfd7e75e4..77891532a1 100644 --- a/bin/controller/CCmdLineParser.h +++ b/bin/controller/CCmdLineParser.h @@ -11,10 +11,8 @@ #include #include -namespace ml -{ -namespace controller -{ +namespace ml { +namespace controller { //! \brief //! Very simple command line parser. @@ -25,24 +23,17 @@ namespace controller //! IMPLEMENTATION DECISIONS:\n //! Put in a class rather than main to allow testing. //! -class CCmdLineParser -{ - public: - typedef std::vector TStrVec; +class CCmdLineParser { +public: + typedef std::vector TStrVec; - public: - //! Parse the arguments and return options if appropriate. - static bool parse(int argc, - const char * const *argv, - std::string &jvmPidStr, - std::string &logPipe, - std::string &commandPipe); +public: + //! Parse the arguments and return options if appropriate. + static bool parse(int argc, const char* const* argv, std::string& jvmPidStr, std::string& logPipe, std::string& commandPipe); - private: - static const std::string DESCRIPTION; +private: + static const std::string DESCRIPTION; }; - - } } diff --git a/bin/controller/CCommandProcessor.cc b/bin/controller/CCommandProcessor.cc index 0554a4671c..44f71719a2 100644 --- a/bin/controller/CCommandProcessor.cc +++ b/bin/controller/CCommandProcessor.cc @@ -14,58 +14,43 @@ #include #include - -namespace -{ +namespace { const std::string TAB(1, '\t'); const std::string EMPTY_STRING; } -namespace ml -{ -namespace controller -{ - +namespace ml { +namespace controller { // Initialise statics const std::string CCommandProcessor::START("start"); const std::string CCommandProcessor::KILL("kill"); - -CCommandProcessor::CCommandProcessor(const TStrVec &permittedProcessPaths) - : m_Spawner(permittedProcessPaths) -{ +CCommandProcessor::CCommandProcessor(const TStrVec& permittedProcessPaths) : m_Spawner(permittedProcessPaths) { } -void CCommandProcessor::processCommands(std::istream &stream) -{ +void CCommandProcessor::processCommands(std::istream& stream) { std::string command; - while (std::getline(stream, command)) - { - if (!command.empty()) - { + while (std::getline(stream, command)) { + if (!command.empty()) { this->handleCommand(command); } } } -bool CCommandProcessor::handleCommand(const std::string &command) -{ +bool CCommandProcessor::handleCommand(const std::string& command) { // Command lines must be tab-separated - TStrVec tokens; + TStrVec tokens; std::string remainder; core::CStringUtils::tokenise(TAB, command, tokens, remainder); - if (!remainder.empty()) - { + if (!remainder.empty()) { tokens.push_back(remainder); } // Multiple consecutive tabs might have caused empty tokens - tokens.erase(std::remove(tokens.begin(), tokens.end(), EMPTY_STRING), - tokens.end()); + tokens.erase(std::remove(tokens.begin(), tokens.end(), EMPTY_STRING), tokens.end()); - if (tokens.empty()) - { + if (tokens.empty()) { LOG_DEBUG("Ignoring empty command"); return false; } @@ -74,12 +59,10 @@ bool CCommandProcessor::handleCommand(const std::string &command) std::string verb(tokens[0]); tokens.erase(tokens.begin()); - if (verb == START) - { + if (verb == START) { return this->handleStart(tokens); } - if (verb == KILL) - { + if (verb == KILL) { return this->handleKill(tokens); } @@ -87,14 +70,12 @@ bool CCommandProcessor::handleCommand(const std::string &command) return false; } -bool CCommandProcessor::handleStart(TStrVec &tokens) -{ +bool CCommandProcessor::handleStart(TStrVec& tokens) { std::string processPath; processPath.swap(tokens[0]); tokens.erase(tokens.begin()); - if (m_Spawner.spawn(processPath, tokens) == false) - { + if (m_Spawner.spawn(processPath, tokens) == false) { LOG_ERROR("Failed to start process '" << processPath << '\''); return false; } @@ -102,27 +83,19 @@ bool CCommandProcessor::handleStart(TStrVec &tokens) return true; } -bool CCommandProcessor::handleKill(TStrVec &tokens) -{ +bool CCommandProcessor::handleKill(TStrVec& tokens) { core::CProcess::TPid pid = 0; - if (tokens.size() != 1 || - core::CStringUtils::stringToType(tokens[0], pid) == false) - { - LOG_ERROR("Unexpected arguments for kill command: " << - core::CContainerPrinter::print(tokens)); + if (tokens.size() != 1 || core::CStringUtils::stringToType(tokens[0], pid) == false) { + LOG_ERROR("Unexpected arguments for kill command: " << core::CContainerPrinter::print(tokens)); return false; } - if (m_Spawner.terminateChild(pid) == false) - { + if (m_Spawner.terminateChild(pid) == false) { LOG_ERROR("Failed to kill process with PID " << pid); return false; } return true; } - - } } - diff --git a/bin/controller/CCommandProcessor.h b/bin/controller/CCommandProcessor.h index 2320f2c4db..00a24b5f1e 100644 --- a/bin/controller/CCommandProcessor.h +++ b/bin/controller/CCommandProcessor.h @@ -12,11 +12,8 @@ #include #include - -namespace ml -{ -namespace controller -{ +namespace ml { +namespace controller { //! \brief //! Processes commands received on a C++ stream. @@ -44,44 +41,41 @@ namespace controller //! Only processes started by this controller may be killed; requests to //! kill other processes are ignored. //! -class CCommandProcessor -{ - public: - typedef std::vector TStrVec; +class CCommandProcessor { +public: + typedef std::vector TStrVec; - public: - //! Possible commands - static const std::string START; - static const std::string KILL; +public: + //! Possible commands + static const std::string START; + static const std::string KILL; - public: - CCommandProcessor(const TStrVec &permittedProcessPaths); +public: + CCommandProcessor(const TStrVec& permittedProcessPaths); - //! Action commands read from the supplied \p stream until end-of-file - //! is reached. - void processCommands(std::istream &stream); + //! Action commands read from the supplied \p stream until end-of-file + //! is reached. + void processCommands(std::istream& stream); - //! Parse and handle a single command. - bool handleCommand(const std::string &command); + //! Parse and handle a single command. + bool handleCommand(const std::string& command); - private: - //! Handle a start command. - //! \param tokens Tokens to the command excluding the verb. Passed - //! non-const so that this method can manipulate the - //! tokens without having to copy. - bool handleStart(TStrVec &tokens); +private: + //! Handle a start command. + //! \param tokens Tokens to the command excluding the verb. Passed + //! non-const so that this method can manipulate the + //! tokens without having to copy. + bool handleStart(TStrVec& tokens); - //! Handle a kill command. - //! \param tokens Expected to contain one element, namely the process - //! ID of the process to be killed. - bool handleKill(TStrVec &tokens); + //! Handle a kill command. + //! \param tokens Expected to contain one element, namely the process + //! ID of the process to be killed. + bool handleKill(TStrVec& tokens); - private: - //! Used to spawn/kill the requested processes. - core::CDetachedProcessSpawner m_Spawner; +private: + //! Used to spawn/kill the requested processes. + core::CDetachedProcessSpawner m_Spawner; }; - - } } diff --git a/bin/controller/Main.cc b/bin/controller/Main.cc index e148cabd38..382b469449 100644 --- a/bin/controller/Main.cc +++ b/bin/controller/Main.cc @@ -58,33 +58,22 @@ #include #include - -int main(int argc, char **argv) -{ - const std::string &defaultNamedPipePath = - ml::core::CNamedPipeFactory::defaultPath(); - const std::string &progName = ml::core::CProgName::progName(); +int main(int argc, char** argv) { + const std::string& defaultNamedPipePath = ml::core::CNamedPipeFactory::defaultPath(); + const std::string& progName = ml::core::CProgName::progName(); // Read command line options - std::string jvmPidStr = - ml::core::CStringUtils::typeToString(ml::core::CProcess::instance().parentId()); + std::string jvmPidStr = ml::core::CStringUtils::typeToString(ml::core::CProcess::instance().parentId()); std::string logPipe; std::string commandPipe; - if (ml::controller::CCmdLineParser::parse(argc, - argv, - jvmPidStr, - logPipe, - commandPipe) == false) - { + if (ml::controller::CCmdLineParser::parse(argc, argv, jvmPidStr, logPipe, commandPipe) == false) { return EXIT_FAILURE; } - if (logPipe.empty()) - { + if (logPipe.empty()) { logPipe = defaultNamedPipePath + progName + "_log_" + jvmPidStr; } - if (commandPipe.empty()) - { + if (commandPipe.empty()) { commandPipe = defaultNamedPipePath + progName + "_command_" + jvmPidStr; } @@ -97,18 +86,15 @@ int main(int argc, char **argv) // 4) No plugin code ever runs // This thread will detect the death of the parent process because this // process's STDIN will be closed. - ml::controller::CBlockingCallCancellerThread cancellerThread(ml::core::CThread::currentThreadId(), - std::cin); - if (cancellerThread.start() == false) - { + ml::controller::CBlockingCallCancellerThread cancellerThread(ml::core::CThread::currentThreadId(), std::cin); + if (cancellerThread.start() == false) { // This log message will probably never been seen as it will go to the // real stderr of this process rather than the log pipe... LOG_FATAL("Could not start blocking call canceller thread"); return EXIT_FAILURE; } - if (ml::core::CLogger::instance().reconfigureLogToNamedPipe(logPipe) == false) - { + if (ml::core::CLogger::instance().reconfigureLogToNamedPipe(logPipe) == false) { LOG_FATAL("Could not reconfigure logging"); cancellerThread.stop(); return EXIT_FAILURE; @@ -123,10 +109,8 @@ int main(int argc, char **argv) // the controller is critical to the overall system. Also its resource // requirements should always be very low. - ml::core::CNamedPipeFactory::TIStreamP commandStream = - ml::core::CNamedPipeFactory::openPipeStreamRead(commandPipe); - if (commandStream == 0) - { + ml::core::CNamedPipeFactory::TIStreamP commandStream = ml::core::CNamedPipeFactory::openPipeStreamRead(commandPipe); + if (commandStream == 0) { LOG_FATAL("Could not open command pipe"); cancellerThread.stop(); return EXIT_FAILURE; @@ -135,11 +119,9 @@ int main(int argc, char **argv) // Change directory to the directory containing this program, because the // permitted paths all assume the current working directory contains the // permitted programs - const std::string &progDir = ml::core::CProgName::progDir(); - if (ml::core::COsFileFuncs::chdir(progDir.c_str()) == -1) - { - LOG_FATAL("Could not change directory to '" << progDir << "': " << - ::strerror(errno)); + const std::string& progDir = ml::core::CProgName::progDir(); + if (ml::core::COsFileFuncs::chdir(progDir.c_str()) == -1) { + LOG_FATAL("Could not change directory to '" << progDir << "': " << ::strerror(errno)); cancellerThread.stop(); return EXIT_FAILURE; } @@ -162,4 +144,3 @@ int main(int argc, char **argv) return EXIT_SUCCESS; } - diff --git a/bin/controller/unittest/CBlockingCallCancellerThreadTest.cc b/bin/controller/unittest/CBlockingCallCancellerThreadTest.cc index ee2837051b..3637c4b46f 100644 --- a/bin/controller/unittest/CBlockingCallCancellerThreadTest.cc +++ b/bin/controller/unittest/CBlockingCallCancellerThreadTest.cc @@ -14,54 +14,40 @@ #include +namespace { -namespace -{ +class CEofThread : public ml::core::CThread { +public: + CEofThread(ml::core::CDualThreadStreamBuf& buf) : m_Buf(buf) {} -class CEofThread : public ml::core::CThread -{ - public: - CEofThread(ml::core::CDualThreadStreamBuf &buf) - : m_Buf(buf) - { - } +protected: + virtual void run() { + ml::core::CSleep::sleep(200); - protected: - virtual void run() - { - ml::core::CSleep::sleep(200); + m_Buf.signalEndOfFile(); + } - m_Buf.signalEndOfFile(); - } + virtual void shutdown() {} - virtual void shutdown() - { - } - - private: - ml::core::CDualThreadStreamBuf &m_Buf; +private: + ml::core::CDualThreadStreamBuf& m_Buf; }; - } -CppUnit::Test *CBlockingCallCancellerThreadTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CBlockingCallCancellerThreadTest"); +CppUnit::Test* CBlockingCallCancellerThreadTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CBlockingCallCancellerThreadTest"); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CBlockingCallCancellerThreadTest::testCancelBlock", - &CBlockingCallCancellerThreadTest::testCancelBlock) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CBlockingCallCancellerThreadTest::testCancelBlock", + &CBlockingCallCancellerThreadTest::testCancelBlock)); return suiteOfTests; } -void CBlockingCallCancellerThreadTest::testCancelBlock() -{ +void CBlockingCallCancellerThreadTest::testCancelBlock() { ml::core::CDualThreadStreamBuf buf; std::istream monStrm(&buf); - ml::controller::CBlockingCallCancellerThread cancellerThread(ml::core::CThread::currentThreadId(), - monStrm); + ml::controller::CBlockingCallCancellerThread cancellerThread(ml::core::CThread::currentThreadId(), monStrm); CPPUNIT_ASSERT(cancellerThread.start()); // The CBlockingCallCancellerThread should wake up the blocking open of the @@ -75,11 +61,10 @@ void CBlockingCallCancellerThreadTest::testCancelBlock() CPPUNIT_ASSERT(eofThread.start()); ml::core::CNamedPipeFactory::TIStreamP pipeStrm = - ml::core::CNamedPipeFactory::openPipeStreamRead(ml::core::CNamedPipeFactory::defaultPath() + "test_pipe"); + ml::core::CNamedPipeFactory::openPipeStreamRead(ml::core::CNamedPipeFactory::defaultPath() + "test_pipe"); CPPUNIT_ASSERT(pipeStrm == 0); CPPUNIT_ASSERT(cancellerThread.stop()); CPPUNIT_ASSERT(eofThread.stop()); } - diff --git a/bin/controller/unittest/CBlockingCallCancellerThreadTest.h b/bin/controller/unittest/CBlockingCallCancellerThreadTest.h index 37dc838f93..ba0c7ccdf1 100644 --- a/bin/controller/unittest/CBlockingCallCancellerThreadTest.h +++ b/bin/controller/unittest/CBlockingCallCancellerThreadTest.h @@ -8,13 +8,11 @@ #include -class CBlockingCallCancellerThreadTest : public CppUnit::TestFixture -{ - public: - void testCancelBlock(); +class CBlockingCallCancellerThreadTest : public CppUnit::TestFixture { +public: + void testCancelBlock(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CBlockingCallCancellerThreadTest_h - diff --git a/bin/controller/unittest/CCommandProcessorTest.cc b/bin/controller/unittest/CCommandProcessorTest.cc index 86ec4678ee..25ace4b054 100644 --- a/bin/controller/unittest/CCommandProcessorTest.cc +++ b/bin/controller/unittest/CCommandProcessorTest.cc @@ -19,57 +19,46 @@ #include - -namespace -{ +namespace { const std::string OUTPUT_FILE("slogan1.txt"); #ifdef Windows // Unlike Windows NT system calls, copy's command line cannot cope with // forward slash path separators const std::string INPUT_FILE1("testfiles\\slogan1.txt"); const std::string INPUT_FILE2("testfiles\\slogan2.txt"); -const char *winDir(::getenv("windir")); -const std::string PROCESS_PATH(winDir != 0 ? std::string(winDir) + "\\System32\\cmd" - : std::string("C:\\Windows\\System32\\cmd")); -const std::string PROCESS_ARGS1[] = { "/C", "copy " + INPUT_FILE1 + " ." }; -const std::string PROCESS_ARGS2[] = { "/C", "del " + INPUT_FILE2 }; +const char* winDir(::getenv("windir")); +const std::string PROCESS_PATH(winDir != 0 ? std::string(winDir) + "\\System32\\cmd" : std::string("C:\\Windows\\System32\\cmd")); +const std::string PROCESS_ARGS1[] = {"/C", "copy " + INPUT_FILE1 + " ."}; +const std::string PROCESS_ARGS2[] = {"/C", "del " + INPUT_FILE2}; #else const std::string INPUT_FILE1("testfiles/slogan1.txt"); const std::string INPUT_FILE2("testfiles/slogan2.txt"); const std::string PROCESS_PATH("/bin/sh"); -const std::string PROCESS_ARGS1[] = { "-c", "cp " + INPUT_FILE1 + " ." }; -const std::string PROCESS_ARGS2[] = { "-c", "rm " + INPUT_FILE2 }; +const std::string PROCESS_ARGS1[] = {"-c", "cp " + INPUT_FILE1 + " ."}; +const std::string PROCESS_ARGS2[] = {"-c", "rm " + INPUT_FILE2}; #endif const std::string SLOGAN1("Elastic is great!"); const std::string SLOGAN2("You know, for search!"); } +CppUnit::Test* CCommandProcessorTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CCommandProcessorTest"); -CppUnit::Test *CCommandProcessorTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CCommandProcessorTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCommandProcessorTest::testStartPermitted", - &CCommandProcessorTest::testStartPermitted) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCommandProcessorTest::testStartNonPermitted", - &CCommandProcessorTest::testStartNonPermitted) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCommandProcessorTest::testStartNonExistent", - &CCommandProcessorTest::testStartNonExistent) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCommandProcessorTest::testKillDisallowed", - &CCommandProcessorTest::testKillDisallowed) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCommandProcessorTest::testInvalidVerb", - &CCommandProcessorTest::testInvalidVerb) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CCommandProcessorTest::testStartPermitted", + &CCommandProcessorTest::testStartPermitted)); + suiteOfTests->addTest(new CppUnit::TestCaller("CCommandProcessorTest::testStartNonPermitted", + &CCommandProcessorTest::testStartNonPermitted)); + suiteOfTests->addTest(new CppUnit::TestCaller("CCommandProcessorTest::testStartNonExistent", + &CCommandProcessorTest::testStartNonExistent)); + suiteOfTests->addTest(new CppUnit::TestCaller("CCommandProcessorTest::testKillDisallowed", + &CCommandProcessorTest::testKillDisallowed)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CCommandProcessorTest::testInvalidVerb", &CCommandProcessorTest::testInvalidVerb)); return suiteOfTests; } -void CCommandProcessorTest::testStartPermitted() -{ +void CCommandProcessorTest::testStartPermitted() { // Remove any output file left behind by a previous failed test, but don't // check the return code as this will usually fail ::remove(OUTPUT_FILE.c_str()); @@ -80,8 +69,7 @@ void CCommandProcessorTest::testStartPermitted() std::string command(ml::controller::CCommandProcessor::START); command += '\t'; command += PROCESS_PATH; - for (size_t index = 0; index < boost::size(PROCESS_ARGS1); ++index) - { + for (size_t index = 0; index < boost::size(PROCESS_ARGS1); ++index) { command += '\t'; command += PROCESS_ARGS1[index]; } @@ -103,16 +91,14 @@ void CCommandProcessorTest::testStartPermitted() CPPUNIT_ASSERT_EQUAL(0, ::remove(OUTPUT_FILE.c_str())); } -void CCommandProcessorTest::testStartNonPermitted() -{ +void CCommandProcessorTest::testStartNonPermitted() { ml::controller::CCommandProcessor::TStrVec permittedPaths(1, "some other process"); ml::controller::CCommandProcessor processor(permittedPaths); std::string command(ml::controller::CCommandProcessor::START); command += '\t'; command += PROCESS_PATH; - for (size_t index = 0; index < boost::size(PROCESS_ARGS2); ++index) - { + for (size_t index = 0; index < boost::size(PROCESS_ARGS2); ++index) { command += '\t'; command += PROCESS_ARGS2[index]; } @@ -132,8 +118,7 @@ void CCommandProcessorTest::testStartNonPermitted() CPPUNIT_ASSERT_EQUAL(SLOGAN2, content); } -void CCommandProcessorTest::testStartNonExistent() -{ +void CCommandProcessorTest::testStartNonExistent() { ml::controller::CCommandProcessor::TStrVec permittedPaths(1, "some other process"); ml::controller::CCommandProcessor processor(permittedPaths); @@ -143,8 +128,7 @@ void CCommandProcessorTest::testStartNonExistent() CPPUNIT_ASSERT(!processor.handleCommand(command)); } -void CCommandProcessorTest::testKillDisallowed() -{ +void CCommandProcessorTest::testKillDisallowed() { // Attempt to kill a process that exists but isn't allowed to be killed, // namely the unit test program @@ -158,8 +142,7 @@ void CCommandProcessorTest::testKillDisallowed() CPPUNIT_ASSERT(!processor.handleCommand(command)); } -void CCommandProcessorTest::testInvalidVerb() -{ +void CCommandProcessorTest::testInvalidVerb() { ml::controller::CCommandProcessor::TStrVec permittedPaths(1, "some other process"); ml::controller::CCommandProcessor processor(permittedPaths); @@ -168,4 +151,3 @@ void CCommandProcessorTest::testInvalidVerb() CPPUNIT_ASSERT(!processor.handleCommand(command)); } - diff --git a/bin/controller/unittest/CCommandProcessorTest.h b/bin/controller/unittest/CCommandProcessorTest.h index f25bef93d5..39b62e9b4f 100644 --- a/bin/controller/unittest/CCommandProcessorTest.h +++ b/bin/controller/unittest/CCommandProcessorTest.h @@ -8,17 +8,15 @@ #include -class CCommandProcessorTest : public CppUnit::TestFixture -{ - public: - void testStartPermitted(); - void testStartNonPermitted(); - void testStartNonExistent(); - void testKillDisallowed(); - void testInvalidVerb(); +class CCommandProcessorTest : public CppUnit::TestFixture { +public: + void testStartPermitted(); + void testStartNonPermitted(); + void testStartNonExistent(); + void testKillDisallowed(); + void testInvalidVerb(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CCommandProcessorTest_h - diff --git a/bin/controller/unittest/Main.cc b/bin/controller/unittest/Main.cc index 60ddd5fbf7..27702049a5 100644 --- a/bin/controller/unittest/Main.cc +++ b/bin/controller/unittest/Main.cc @@ -8,14 +8,11 @@ #include "CBlockingCallCancellerThreadTest.h" #include "CCommandProcessorTest.h" - -int main(int argc, const char **argv) -{ +int main(int argc, const char** argv) { ml::test::CTestRunner runner(argc, argv); - runner.addTest( CBlockingCallCancellerThreadTest::suite() ); - runner.addTest( CCommandProcessorTest::suite() ); + runner.addTest(CBlockingCallCancellerThreadTest::suite()); + runner.addTest(CCommandProcessorTest::suite()); return !runner.runTests(); } - diff --git a/bin/normalize/CCmdLineParser.cc b/bin/normalize/CCmdLineParser.cc index e14d875a2b..64ebda4cae 100644 --- a/bin/normalize/CCmdLineParser.cc +++ b/bin/normalize/CCmdLineParser.cc @@ -11,143 +11,102 @@ #include +namespace ml { +namespace normalize { -namespace ml -{ -namespace normalize -{ - - -const std::string CCmdLineParser::DESCRIPTION = -"Usage: normalize [options]\n" -"Options:"; - +const std::string CCmdLineParser::DESCRIPTION = "Usage: normalize [options]\n" + "Options:"; bool CCmdLineParser::parse(int argc, - const char * const *argv, - std::string &modelConfigFile, - std::string &logProperties, - std::string &logPipe, - core_t::TTime &bucketSpan, - bool &lengthEncodedInput, - std::string &inputFileName, - bool &isInputFileNamedPipe, - std::string &outputFileName, - bool &isOutputFileNamedPipe, - std::string &quantilesState, - bool &deleteStateFiles, - bool &writeCsv, - bool &perPartitionNormalization) -{ - try - { + const char* const* argv, + std::string& modelConfigFile, + std::string& logProperties, + std::string& logPipe, + core_t::TTime& bucketSpan, + bool& lengthEncodedInput, + std::string& inputFileName, + bool& isInputFileNamedPipe, + std::string& outputFileName, + bool& isOutputFileNamedPipe, + std::string& quantilesState, + bool& deleteStateFiles, + bool& writeCsv, + bool& perPartitionNormalization) { + try { boost::program_options::options_description desc(DESCRIPTION); - desc.add_options() - ("help", "Display this information and exit") - ("version", "Display version information and exit") - ("modelconfig", boost::program_options::value(), - "Optional model config file") - ("logProperties", boost::program_options::value(), - "Optional logger properties file") - ("logPipe", boost::program_options::value(), - "Optional log to named pipe") - ("bucketspan", boost::program_options::value(), - "Optional aggregation bucket span (in seconds) - default is 300") - ("lengthEncodedInput", - "Take input in length encoded binary format - default is CSV") - ("input", boost::program_options::value(), - "Optional file to read input from - not present means read from STDIN") - ("inputIsPipe", "Specified input file is a named pipe") - ("output", boost::program_options::value(), - "Optional file to write output to - not present means write to STDOUT") - ("outputIsPipe", "Specified output file is a named pipe") - ("quantilesState", boost::program_options::value(), - "Optional file to initialization data for normalization (in JSON)") - ("deleteStateFiles", - "If this flag is set then delete the normalizer state files once they have been read") - ("writeCsv", - "Write the results in CSV format (default is lineified JSON)") - ("perPartitionNormalization", - "Optional flag to enable per partition normalization") - ; + desc.add_options()("help", "Display this information and exit")("version", "Display version information and exit")( + "modelconfig", boost::program_options::value(), "Optional model config file")( + "logProperties", boost::program_options::value(), "Optional logger properties file")( + "logPipe", boost::program_options::value(), "Optional log to named pipe")( + "bucketspan", boost::program_options::value(), "Optional aggregation bucket span (in seconds) - default is 300")( + "lengthEncodedInput", "Take input in length encoded binary format - default is CSV")( + "input", boost::program_options::value(), "Optional file to read input from - not present means read from STDIN")( + "inputIsPipe", "Specified input file is a named pipe")( + "output", boost::program_options::value(), "Optional file to write output to - not present means write to STDOUT")( + "outputIsPipe", "Specified output file is a named pipe")("quantilesState", + boost::program_options::value(), + "Optional file to initialization data for normalization (in JSON)")( + "deleteStateFiles", "If this flag is set then delete the normalizer state files once they have been read")( + "writeCsv", "Write the results in CSV format (default is lineified JSON)")( + "perPartitionNormalization", "Optional flag to enable per partition normalization"); boost::program_options::variables_map vm; boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), vm); boost::program_options::notify(vm); - if (vm.count("help") > 0) - { + if (vm.count("help") > 0) { std::cerr << desc << std::endl; return false; } - if (vm.count("version") > 0) - { + if (vm.count("version") > 0) { std::cerr << ver::CBuildInfo::fullInfo() << std::endl; return false; } - if (vm.count("modelconfig") > 0) - { + if (vm.count("modelconfig") > 0) { modelConfigFile = vm["modelconfig"].as(); } - if (vm.count("logProperties") > 0) - { + if (vm.count("logProperties") > 0) { logProperties = vm["logProperties"].as(); } - if (vm.count("logPipe") > 0) - { + if (vm.count("logPipe") > 0) { logPipe = vm["logPipe"].as(); } - if (vm.count("bucketspan") > 0) - { + if (vm.count("bucketspan") > 0) { bucketSpan = vm["bucketspan"].as(); } - if (vm.count("lengthEncodedInput") > 0) - { + if (vm.count("lengthEncodedInput") > 0) { lengthEncodedInput = true; } - if (vm.count("input") > 0) - { + if (vm.count("input") > 0) { inputFileName = vm["input"].as(); } - if (vm.count("inputIsPipe") > 0) - { + if (vm.count("inputIsPipe") > 0) { isInputFileNamedPipe = true; } - if (vm.count("output") > 0) - { + if (vm.count("output") > 0) { outputFileName = vm["output"].as(); } - if (vm.count("outputIsPipe") > 0) - { + if (vm.count("outputIsPipe") > 0) { isOutputFileNamedPipe = true; } - if (vm.count("quantilesState") > 0) - { + if (vm.count("quantilesState") > 0) { quantilesState = vm["quantilesState"].as(); } - if (vm.count("deleteStateFiles") > 0) - { + if (vm.count("deleteStateFiles") > 0) { deleteStateFiles = true; } - if (vm.count("writeCsv") > 0) - { + if (vm.count("writeCsv") > 0) { writeCsv = true; } - if (vm.count("perPartitionNormalization") > 0) - { + if (vm.count("perPartitionNormalization") > 0) { perPartitionNormalization = true; } - } - catch (std::exception &e) - { + } catch (std::exception& e) { std::cerr << "Error processing command line: " << e.what() << std::endl; return false; } return true; } - - } } - diff --git a/bin/normalize/CCmdLineParser.h b/bin/normalize/CCmdLineParser.h index caba89aca3..9310ba2e83 100644 --- a/bin/normalize/CCmdLineParser.h +++ b/bin/normalize/CCmdLineParser.h @@ -11,12 +11,8 @@ #include #include - -namespace ml -{ -namespace normalize -{ - +namespace ml { +namespace normalize { //! \brief //! Very simple command line parser. @@ -28,36 +24,32 @@ namespace normalize //! Put in a class rather than main to allow testing. //! TODO make this generic. //! -class CCmdLineParser -{ - public: - typedef std::vector TStrVec; - - public: - //! Parse the arguments and return options if appropriate. - static bool parse(int argc, - const char * const *argv, - std::string &modelConfigFile, - std::string &logProperties, - std::string &logPipe, - core_t::TTime &bucketSpan, - bool &lengthEncodedInput, - std::string &inputFileName, - bool &isInputFileNamedPipe, - std::string &outputFileName, - bool &isOutputFileNamedPipe, - std::string &quantilesState, - bool &deleteStateFiles, - bool &writeCsv, - bool &perPartitionNormalization); - - private: - static const std::string DESCRIPTION; +class CCmdLineParser { +public: + typedef std::vector TStrVec; + +public: + //! Parse the arguments and return options if appropriate. + static bool parse(int argc, + const char* const* argv, + std::string& modelConfigFile, + std::string& logProperties, + std::string& logPipe, + core_t::TTime& bucketSpan, + bool& lengthEncodedInput, + std::string& inputFileName, + bool& isInputFileNamedPipe, + std::string& outputFileName, + bool& isOutputFileNamedPipe, + std::string& quantilesState, + bool& deleteStateFiles, + bool& writeCsv, + bool& perPartitionNormalization); + +private: + static const std::string DESCRIPTION; }; - - } } #endif // INCLUDED_ml_normalize_CCmdLineParser_h - diff --git a/bin/normalize/Main.cc b/bin/normalize/Main.cc index 40336f3326..4ac1186d36 100644 --- a/bin/normalize/Main.cc +++ b/bin/normalize/Main.cc @@ -14,8 +14,8 @@ //! Standalone program. //! #include -#include #include +#include #include @@ -24,8 +24,8 @@ #include #include #include -#include #include +#include #include #include "CCmdLineParser.h" @@ -38,23 +38,21 @@ #include #include - -int main(int argc, char **argv) -{ +int main(int argc, char** argv) { // Read command line options - std::string modelConfigFile; - std::string logProperties; - std::string logPipe; + std::string modelConfigFile; + std::string logProperties; + std::string logPipe; ml::core_t::TTime bucketSpan(0); - bool lengthEncodedInput(false); - std::string inputFileName; - bool isInputFileNamedPipe(false); - std::string outputFileName; - bool isOutputFileNamedPipe(false); - std::string quantilesStateFile; - bool deleteStateFiles(false); - bool writeCsv(false); - bool perPartitionNormalization(false); + bool lengthEncodedInput(false); + std::string inputFileName; + bool isInputFileNamedPipe(false); + std::string outputFileName; + bool isOutputFileNamedPipe(false); + std::string quantilesStateFile; + bool deleteStateFiles(false); + bool writeCsv(false); + bool perPartitionNormalization(false); if (ml::normalize::CCmdLineParser::parse(argc, argv, modelConfigFile, @@ -69,20 +67,15 @@ int main(int argc, char **argv) quantilesStateFile, deleteStateFiles, writeCsv, - perPartitionNormalization) == false) - { + perPartitionNormalization) == false) { return EXIT_FAILURE; } // Construct the IO manager before reconfiguring the logger, as it performs // std::ios actions that only work before first use - ml::api::CIoManager ioMgr(inputFileName, - isInputFileNamedPipe, - outputFileName, - isOutputFileNamedPipe); + ml::api::CIoManager ioMgr(inputFileName, isInputFileNamedPipe, outputFileName, isOutputFileNamedPipe); - if (ml::core::CLogger::instance().reconfigure(logPipe, logProperties) == false) - { + if (ml::core::CLogger::instance().reconfigure(logPipe, logProperties) == false) { LOG_FATAL("Could not reconfigure logging"); return EXIT_FAILURE; } @@ -94,18 +87,14 @@ int main(int argc, char **argv) ml::core::CProcessPriority::reducePriority(); - if (ioMgr.initIo() == false) - { + if (ioMgr.initIo() == false) { LOG_FATAL("Failed to initialise IO"); return EXIT_FAILURE; } - ml::model::CAnomalyDetectorModelConfig modelConfig = - ml::model::CAnomalyDetectorModelConfig::defaultConfig(bucketSpan); - if (!modelConfigFile.empty() && modelConfig.init(modelConfigFile) == false) - { - LOG_FATAL("Ml model config file '" << modelConfigFile << - "' could not be loaded"); + ml::model::CAnomalyDetectorModelConfig modelConfig = ml::model::CAnomalyDetectorModelConfig::defaultConfig(bucketSpan); + if (!modelConfigFile.empty() && modelConfig.init(modelConfigFile) == false) { + LOG_FATAL("Ml model config file '" << modelConfigFile << "' could not be loaded"); return EXIT_FAILURE; } modelConfig.perPartitionNormalization(perPartitionNormalization); @@ -113,51 +102,37 @@ int main(int argc, char **argv) // There's a choice of input and output formats for the numbers to be normalised using TScopedInputParserP = boost::scoped_ptr; TScopedInputParserP inputParser; - if (lengthEncodedInput) - { + if (lengthEncodedInput) { inputParser.reset(new ml::api::CLengthEncodedInputParser(ioMgr.inputStream())); - } - else - { - inputParser.reset(new ml::api::CCsvInputParser(ioMgr.inputStream(), - ml::api::CCsvInputParser::COMMA)); + } else { + inputParser.reset(new ml::api::CCsvInputParser(ioMgr.inputStream(), ml::api::CCsvInputParser::COMMA)); } using TScopedOutputHandlerP = boost::scoped_ptr; TScopedOutputHandlerP outputWriter; - if (writeCsv) - { + if (writeCsv) { outputWriter.reset(new ml::api::CCsvOutputWriter(ioMgr.outputStream())); - } - else - { - outputWriter.reset(new ml::api::CLineifiedJsonOutputWriter({ ml::api::CResultNormalizer::PROBABILITY_NAME, - ml::api::CResultNormalizer::NORMALIZED_SCORE_NAME }, - ioMgr.outputStream())); + } else { + outputWriter.reset(new ml::api::CLineifiedJsonOutputWriter( + {ml::api::CResultNormalizer::PROBABILITY_NAME, ml::api::CResultNormalizer::NORMALIZED_SCORE_NAME}, ioMgr.outputStream())); } // This object will do the work ml::api::CResultNormalizer normalizer(modelConfig, *outputWriter); // Restore state - if (!quantilesStateFile.empty()) - { - if (normalizer.initNormalizer(quantilesStateFile) == false) - { + if (!quantilesStateFile.empty()) { + if (normalizer.initNormalizer(quantilesStateFile) == false) { LOG_FATAL("Failed to initialize normalizer"); return EXIT_FAILURE; } - if (deleteStateFiles) - { + if (deleteStateFiles) { ::remove(quantilesStateFile.c_str()); } } // Now handle the numbers to be normalised from stdin - if (inputParser->readStream(boost::bind(&ml::api::CResultNormalizer::handleRecord, - &normalizer, - _1)) == false) - { + if (inputParser->readStream(boost::bind(&ml::api::CResultNormalizer::handleRecord, &normalizer, _1)) == false) { LOG_FATAL("Failed to handle input to be normalized"); return EXIT_FAILURE; } @@ -169,4 +144,3 @@ int main(int argc, char **argv) return EXIT_SUCCESS; } - diff --git a/devbin/analyze_test/Main.cc b/devbin/analyze_test/Main.cc index 12f460d8f1..1aae08ce6e 100644 --- a/devbin/analyze_test/Main.cc +++ b/devbin/analyze_test/Main.cc @@ -7,31 +7,22 @@ #include #include -template -struct STemplated -{ - void printFirst(void) - { - std::cout << s_First << std::endl; - } +template +struct STemplated { + void printFirst(void) { std::cout << s_First << std::endl; } - T s_First; + T s_First; std::vector s_Second; }; -struct SSimple -{ - void printFirst(void) - { - std::cout << s_First << std::endl; - } +struct SSimple { + void printFirst(void) { std::cout << s_First << std::endl; } - int s_First; + int s_First; std::vector s_Second; }; -int main(int, char **) -{ +int main(int, char**) { { SSimple obj; obj.printFirst(); @@ -44,4 +35,3 @@ int main(int, char **) return 0; } - diff --git a/devbin/move_copy_swap/Main.cc b/devbin/move_copy_swap/Main.cc index d01238f617..aa16f2a3b3 100644 --- a/devbin/move_copy_swap/Main.cc +++ b/devbin/move_copy_swap/Main.cc @@ -27,70 +27,51 @@ std::string s2; std::string s3; size_t totalLength = 0; -template -void transfer(std::string &&from, std::string &to) -{ - if (OP == 'm') - { +template +void transfer(std::string&& from, std::string& to) { + if (OP == 'm') { to = std::move(from); - } - else if (OP == 'c') - { + } else if (OP == 'c') { to = from; - } - else if (OP == 'd') - { + } else if (OP == 'd') { to.assign(from, 0, from.length()); - } - else - { + } else { from.swap(to); } } -template -DONT_INLINE_THIS_FUNCTION -void func3(std::string &&s) -{ +template +DONT_INLINE_THIS_FUNCTION void func3(std::string&& s) { transfer(std::move(s), s3); s3[0] = '3'; totalLength += s3.length(); } -template -DONT_INLINE_THIS_FUNCTION -void func2(std::string &&s) -{ +template +DONT_INLINE_THIS_FUNCTION void func2(std::string&& s) { transfer(std::move(s), s2); s2[0] = '2'; func3(std::move(s2)); } -template -DONT_INLINE_THIS_FUNCTION -void func1(std::string &&s) -{ +template +DONT_INLINE_THIS_FUNCTION void func1(std::string&& s) { transfer(std::move(s), s1); s1[0] = '1'; func2(std::move(s1)); } -template -void generate(size_t minSize, size_t iterations) -{ - for (size_t count = 0; count < iterations; ++count) - { +template +void generate(size_t minSize, size_t iterations) { + for (size_t count = 0; count < iterations; ++count) { s0.assign(minSize + count % 15, char('A' + count % 26)); func1(std::move(s0)); } } -int main(int argc, char **argv) -{ - if (argc != 4) - { - std::cerr << "Usage: " << argv[0] - << " " << std::endl +int main(int argc, char** argv) { + if (argc != 4) { + std::cerr << "Usage: " << argv[0] << " " << std::endl << "Where: m = move" << std::endl << " c = copy" << std::endl << " d = copy defeating copy-on-write" << std::endl @@ -108,36 +89,29 @@ int main(int argc, char **argv) std::chrono::steady_clock::time_point startTime = std::chrono::steady_clock::now(); #endif - if (argv[1][0] == 'm') - { + if (argv[1][0] == 'm') { generate<'m'>(minSize, iterations); - } - else if (argv[1][0] == 'c') - { + } else if (argv[1][0] == 'c') { generate<'c'>(minSize, iterations); - } - else if (argv[1][0] == 'd') - { + } else if (argv[1][0] == 'd') { generate<'d'>(minSize, iterations); - } - else - { + } else { generate<'s'>(minSize, iterations); } #ifdef NO_STD_CHRONO uint64_t endTimeNs = clock.nanoseconds(); uint64_t durationTenthMs = (endTimeNs - startTimeNs) / 100000; - uint64_t durationMs = (durationTenthMs / 10) + - ((durationTenthMs % 10 >= 5) ? 1 : 0); + uint64_t durationMs = (durationTenthMs / 10) + ((durationTenthMs % 10 >= 5) ? 1 : 0); #else std::chrono::steady_clock::time_point endTime = std::chrono::steady_clock::now(); size_t durationMs = std::chrono::duration_cast(endTime - startTime).count(); #endif - std::cout << "Time " << durationMs << "ms, " - "Total length: " << totalLength << std::endl; + std::cout << "Time " << durationMs + << "ms, " + "Total length: " + << totalLength << std::endl; return EXIT_SUCCESS; } - diff --git a/devbin/unixtime_to_string/CCmdLineParser.cc b/devbin/unixtime_to_string/CCmdLineParser.cc index fa996268c0..4a2141d4d3 100644 --- a/devbin/unixtime_to_string/CCmdLineParser.cc +++ b/devbin/unixtime_to_string/CCmdLineParser.cc @@ -11,57 +11,40 @@ #include - -namespace ml -{ -namespace syslogparsertester -{ - +namespace ml { +namespace syslogparsertester { const std::string CCmdLineParser::DESCRIPTION = -"Usage: syslog_parser_tester [options]\n" -"Development tool to verify format of syslog parsing config XML files\n" -"E.g. ./syslog_parser_tester --config syslog_parser.xml --syslogline 'ml1234.log: Transport node error on node 0x9876 '\n" -"Options:"; - - -bool CCmdLineParser::parse(int argc, - const char * const *argv, - std::string &configFile, - std::string &syslogLine) -{ - try - { + "Usage: syslog_parser_tester [options]\n" + "Development tool to verify format of syslog parsing config XML files\n" + "E.g. ./syslog_parser_tester --config syslog_parser.xml --syslogline 'ml1234.log: " + "Transport node error on node 0x9876 '\n" + "Options:"; + +bool CCmdLineParser::parse(int argc, const char* const* argv, std::string& configFile, std::string& syslogLine) { + try { boost::program_options::options_description desc(DESCRIPTION); - desc.add_options() - ("help", "Display this information and exit") - ("version", "Display version information and exit") - ("config", boost::program_options::value(), "Read configuration from ") - ("syslogline", boost::program_options::value(), "Optional line of syslog") - ; + desc.add_options()("help", "Display this information and exit")("version", "Display version information and exit")( + "config", boost::program_options::value(), "Read configuration from ")( + "syslogline", boost::program_options::value(), "Optional line of syslog"); boost::program_options::variables_map vm; - boost::program_options::store( - boost::program_options::parse_command_line(argc, argv, desc), vm); + boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), vm); boost::program_options::notify(vm); - if (vm.count("help") > 0) - { + if (vm.count("help") > 0) { std::cerr << desc << std::endl; return false; } - if (vm.count("version") > 0) - { + if (vm.count("version") > 0) { std::cerr << ver::CBuildInfo::fullInfo() << std::endl; return false; } - if (vm.count("config") > 0) - { + if (vm.count("config") > 0) { configFile = vm["config"].as(); - if (vm.count("syslogline") > 0) - { + if (vm.count("syslogline") > 0) { syslogLine = vm["syslogline"].as(); } @@ -71,15 +54,9 @@ bool CCmdLineParser::parse(int argc, // Raise error std::cerr << "Error: Invalid command line options" << std::endl; std::cerr << desc << std::endl; - } - catch (std::exception &e) - { - std::cerr << "Error processing command line: " << e.what() << std::endl; - } + } catch (std::exception& e) { std::cerr << "Error processing command line: " << e.what() << std::endl; } return false; } - - } } diff --git a/devbin/unixtime_to_string/CCmdLineParser.h b/devbin/unixtime_to_string/CCmdLineParser.h index 5ef1a5f45f..b44ca252de 100644 --- a/devbin/unixtime_to_string/CCmdLineParser.h +++ b/devbin/unixtime_to_string/CCmdLineParser.h @@ -8,12 +8,8 @@ #include - -namespace ml -{ -namespace syslogparsertester -{ - +namespace ml { +namespace syslogparsertester { //! \brief //! Very simple command line parser. @@ -25,23 +21,16 @@ namespace syslogparsertester //! Put in a class rather than main to allow testing. //! TODO make this generic. //! -class CCmdLineParser -{ - public: - //! Parse the arguments. ONLY return true if configFile and dateTime - //! are defined. - static bool parse(int argc, - const char * const *argv, - std::string &configFile, - std::string &syslogLine); - - private: - static const std::string DESCRIPTION; +class CCmdLineParser { +public: + //! Parse the arguments. ONLY return true if configFile and dateTime + //! are defined. + static bool parse(int argc, const char* const* argv, std::string& configFile, std::string& syslogLine); + +private: + static const std::string DESCRIPTION; }; - - } } #endif // INCLUDED_ml_date_time_tester_CCmdLineParser_h - diff --git a/devbin/unixtime_to_string/Main.cc b/devbin/unixtime_to_string/Main.cc index 7240d08183..8ad5579e25 100644 --- a/devbin/unixtime_to_string/Main.cc +++ b/devbin/unixtime_to_string/Main.cc @@ -6,9 +6,9 @@ #include "CCmdLineParser.h" #include -#include #include #include +#include #include @@ -16,19 +16,15 @@ using namespace ml; - -int main(int argc, char **argv) -{ - if (argc != 2) - { +int main(int argc, char** argv) { + if (argc != 2) { std::cerr << "Utility to convert a Unix time to a string" << std::endl; std::cerr << "Usage: " << argv[0] << " " << std::endl; return EXIT_FAILURE; } core_t::TTime t(0); - if (core::CStringUtils::stringToType(argv[1], t) == false) - { + if (core::CStringUtils::stringToType(argv[1], t) == false) { LOG_FATAL("Unable to convert " << argv[1] << " to integer"); return EXIT_FAILURE; } @@ -37,4 +33,3 @@ int main(int argc, char **argv) return EXIT_SUCCESS; } - diff --git a/devbin/vfprog/CIncrementer.cc b/devbin/vfprog/CIncrementer.cc index 42be16d736..84fc46dce8 100644 --- a/devbin/vfprog/CIncrementer.cc +++ b/devbin/vfprog/CIncrementer.cc @@ -5,28 +5,18 @@ */ #include "CIncrementer.h" +namespace ml { +namespace vfprog { -namespace ml -{ -namespace vfprog -{ - - -CIncrementer::~CIncrementer(void) -{ +CIncrementer::~CIncrementer(void) { } -size_t CIncrementer::nonVirtualIncrement(size_t val) -{ +size_t CIncrementer::nonVirtualIncrement(size_t val) { return val + 1; } -size_t CIncrementer::virtualIncrement(size_t val) -{ +size_t CIncrementer::virtualIncrement(size_t val) { return val + 1; } - - } } - diff --git a/devbin/vfprog/CIncrementer.h b/devbin/vfprog/CIncrementer.h index b3ef6b3633..f5acecaa8c 100644 --- a/devbin/vfprog/CIncrementer.h +++ b/devbin/vfprog/CIncrementer.h @@ -8,12 +8,8 @@ #include - -namespace ml -{ -namespace vfprog -{ - +namespace ml { +namespace vfprog { //! \brief //! Class for measuring function call overhead. @@ -27,28 +23,21 @@ namespace vfprog //! that passes arguments in registers no memory access should //! be required. //! -class CIncrementer -{ - public: - //! Best practice, though not really required in this case - virtual ~CIncrementer(void); +class CIncrementer { +public: + //! Best practice, though not really required in this case + virtual ~CIncrementer(void); - //! Inlined incrementer - size_t inlinedIncrement(size_t val) - { - return val + 1; - } + //! Inlined incrementer + size_t inlinedIncrement(size_t val) { return val + 1; } - //! Non-virtual incrementer - size_t nonVirtualIncrement(size_t val); + //! Non-virtual incrementer + size_t nonVirtualIncrement(size_t val); - //! Virtual incrementer - virtual size_t virtualIncrement(size_t val); + //! Virtual incrementer + virtual size_t virtualIncrement(size_t val); }; - - } } #endif // INCLUDED_ml_vfprog_CIncrementer_h - diff --git a/devbin/vfprog/CLooper.cc b/devbin/vfprog/CLooper.cc index a644e06812..4be6c696ef 100644 --- a/devbin/vfprog/CLooper.cc +++ b/devbin/vfprog/CLooper.cc @@ -9,80 +9,49 @@ #include "CIncrementer.h" +namespace ml { +namespace vfprog { -namespace ml -{ -namespace vfprog -{ - - -size_t CLooper::inlinedProgramCallLoop(CIncrementer &incrementer, - size_t count, - size_t val) -{ - for (size_t i = 0; i < count; ++i) - { +size_t CLooper::inlinedProgramCallLoop(CIncrementer& incrementer, size_t count, size_t val) { + for (size_t i = 0; i < count; ++i) { val = incrementer.inlinedIncrement(val); } return val; } -size_t CLooper::nonVirtualProgramCallLoop(CIncrementer &incrementer, - size_t count, - size_t val) -{ - for (size_t i = 0; i < count; ++i) - { +size_t CLooper::nonVirtualProgramCallLoop(CIncrementer& incrementer, size_t count, size_t val) { + for (size_t i = 0; i < count; ++i) { val = incrementer.nonVirtualIncrement(val); } return val; } -size_t CLooper::virtualProgramCallLoop(CIncrementer &incrementer, - size_t count, - size_t val) -{ - for (size_t i = 0; i < count; ++i) - { +size_t CLooper::virtualProgramCallLoop(CIncrementer& incrementer, size_t count, size_t val) { + for (size_t i = 0; i < count; ++i) { val = incrementer.virtualIncrement(val); } return val; } -size_t CLooper::inlinedLibraryCallLoop(vflib::CIncrementer &incrementer, - size_t count, - size_t val) -{ - for (size_t i = 0; i < count; ++i) - { +size_t CLooper::inlinedLibraryCallLoop(vflib::CIncrementer& incrementer, size_t count, size_t val) { + for (size_t i = 0; i < count; ++i) { val = incrementer.inlinedIncrement(val); } return val; } -size_t CLooper::nonVirtualLibraryCallLoop(vflib::CIncrementer &incrementer, - size_t count, - size_t val) -{ - for (size_t i = 0; i < count; ++i) - { +size_t CLooper::nonVirtualLibraryCallLoop(vflib::CIncrementer& incrementer, size_t count, size_t val) { + for (size_t i = 0; i < count; ++i) { val = incrementer.nonVirtualIncrement(val); } return val; } -size_t CLooper::virtualLibraryCallLoop(vflib::CIncrementer &incrementer, - size_t count, - size_t val) -{ - for (size_t i = 0; i < count; ++i) - { +size_t CLooper::virtualLibraryCallLoop(vflib::CIncrementer& incrementer, size_t count, size_t val) { + for (size_t i = 0; i < count; ++i) { val = incrementer.virtualIncrement(val); } return val; } - - } } - diff --git a/devbin/vfprog/CLooper.h b/devbin/vfprog/CLooper.h index 65c3f49641..95bd623732 100644 --- a/devbin/vfprog/CLooper.h +++ b/devbin/vfprog/CLooper.h @@ -8,18 +8,13 @@ #include - -namespace ml -{ -namespace vflib -{ +namespace ml { +namespace vflib { class CIncrementer; } -namespace vfprog -{ +namespace vfprog { class CIncrementer; - //! \brief //! Class for measuring function call overhead. //! @@ -33,43 +28,27 @@ class CIncrementer; //! b) To ensure that the object code of these loops is in the //! intended program //! -class CLooper -{ - public: - //! Loop calling the inlined incrementer - static size_t inlinedProgramCallLoop(CIncrementer &incrementer, - size_t count, - size_t val); +class CLooper { +public: + //! Loop calling the inlined incrementer + static size_t inlinedProgramCallLoop(CIncrementer& incrementer, size_t count, size_t val); - //! Loop calling the non-virtual incrementer - static size_t nonVirtualProgramCallLoop(CIncrementer &incrementer, - size_t count, - size_t val); + //! Loop calling the non-virtual incrementer + static size_t nonVirtualProgramCallLoop(CIncrementer& incrementer, size_t count, size_t val); - //! Loop calling the virtual incrementer - static size_t virtualProgramCallLoop(CIncrementer &incrementer, - size_t count, - size_t val); + //! Loop calling the virtual incrementer + static size_t virtualProgramCallLoop(CIncrementer& incrementer, size_t count, size_t val); - //! Loop calling the inlined incrementer - static size_t inlinedLibraryCallLoop(vflib::CIncrementer &incrementer, - size_t count, - size_t val); + //! Loop calling the inlined incrementer + static size_t inlinedLibraryCallLoop(vflib::CIncrementer& incrementer, size_t count, size_t val); - //! Loop calling the non-virtual incrementer - static size_t nonVirtualLibraryCallLoop(vflib::CIncrementer &incrementer, - size_t count, - size_t val); + //! Loop calling the non-virtual incrementer + static size_t nonVirtualLibraryCallLoop(vflib::CIncrementer& incrementer, size_t count, size_t val); - //! Loop calling the virtual incrementer - static size_t virtualLibraryCallLoop(vflib::CIncrementer &incrementer, - size_t count, - size_t val); + //! Loop calling the virtual incrementer + static size_t virtualLibraryCallLoop(vflib::CIncrementer& incrementer, size_t count, size_t val); }; - - } } #endif // INCLUDED_ml_vfprog_CLooper_h - diff --git a/devbin/vfprog/Main.cc b/devbin/vfprog/Main.cc index 116ad6b58c..731f2d7182 100644 --- a/devbin/vfprog/Main.cc +++ b/devbin/vfprog/Main.cc @@ -17,88 +17,58 @@ #include #include - -namespace -{ +namespace { const size_t WARMUP_COUNT(100); const size_t TEST_COUNT(1000000000); typedef std::pair TSizeUInt64Pr; -size_t benchmark(char testId, - ml::vfprog::CIncrementer &incrementer, - size_t count) -{ +size_t benchmark(char testId, ml::vfprog::CIncrementer& incrementer, size_t count) { size_t val(0); - switch (testId) - { - case '1': - val = ml::vfprog::CLooper::inlinedProgramCallLoop(incrementer, - count, - val); - break; - case '2': - val = ml::vfprog::CLooper::nonVirtualProgramCallLoop(incrementer, - count, - val); - break; - case '3': - val = ml::vfprog::CLooper::virtualProgramCallLoop(incrementer, - count, - val); - break; + switch (testId) { + case '1': + val = ml::vfprog::CLooper::inlinedProgramCallLoop(incrementer, count, val); + break; + case '2': + val = ml::vfprog::CLooper::nonVirtualProgramCallLoop(incrementer, count, val); + break; + case '3': + val = ml::vfprog::CLooper::virtualProgramCallLoop(incrementer, count, val); + break; } return val; } -size_t benchmark(char testId, - ml::vflib::CIncrementer &incrementer, - size_t count) -{ +size_t benchmark(char testId, ml::vflib::CIncrementer& incrementer, size_t count) { size_t val(0); - switch (testId) - { - case '4': - val = ml::vfprog::CLooper::inlinedLibraryCallLoop(incrementer, - count, - val); - break; - case '5': - val = ml::vfprog::CLooper::nonVirtualLibraryCallLoop(incrementer, - count, - val); - break; - case '6': - val = ml::vfprog::CLooper::virtualLibraryCallLoop(incrementer, - count, - val); - break; - case '7': - val = ml::vflib::CLooper::inlinedLibraryCallLoop(incrementer, - count, - val); - break; - case '8': - val = ml::vflib::CLooper::nonVirtualLibraryCallLoop(incrementer, - count, - val); - break; - case '9': - val = ml::vflib::CLooper::virtualLibraryCallLoop(incrementer, - count, - val); - break; + switch (testId) { + case '4': + val = ml::vfprog::CLooper::inlinedLibraryCallLoop(incrementer, count, val); + break; + case '5': + val = ml::vfprog::CLooper::nonVirtualLibraryCallLoop(incrementer, count, val); + break; + case '6': + val = ml::vfprog::CLooper::virtualLibraryCallLoop(incrementer, count, val); + break; + case '7': + val = ml::vflib::CLooper::inlinedLibraryCallLoop(incrementer, count, val); + break; + case '8': + val = ml::vflib::CLooper::nonVirtualLibraryCallLoop(incrementer, count, val); + break; + case '9': + val = ml::vflib::CLooper::virtualLibraryCallLoop(incrementer, count, val); + break; } return val; } -template -TSizeUInt64Pr benchmark(char testId, - INCREMENTER &incrementer) -{ +template +TSizeUInt64Pr benchmark(char testId, INCREMENTER& incrementer) { ml::core::CMonotonicTime clock; TSizeUInt64Pr result; @@ -114,10 +84,8 @@ TSizeUInt64Pr benchmark(char testId, return result; } -TSizeUInt64Pr benchmark(char testId) -{ - if (testId < '4') - { +TSizeUInt64Pr benchmark(char testId) { + if (testId < '4') { ml::vfprog::CIncrementer incrementer; return benchmark(testId, incrementer); } @@ -125,17 +93,10 @@ TSizeUInt64Pr benchmark(char testId) ml::vflib::CIncrementer incrementer; return benchmark(testId, incrementer); } - } - -int main(int argc, char **argv) -{ - if (argc != 2 || - ::strlen(argv[1]) != 1 || - argv[1][0] < '1' || - argv[1][0] > '9') - { +int main(int argc, char** argv) { + if (argc != 2 || ::strlen(argv[1]) != 1 || argv[1][0] < '1' || argv[1][0] > '9') { std::cerr << "Usage: " << argv[0] << " <1-9>" << std::endl; return EXIT_FAILURE; } @@ -147,4 +108,3 @@ int main(int argc, char **argv) return EXIT_SUCCESS; } - diff --git a/devbin/vsbug/Main.cc b/devbin/vsbug/Main.cc index 668d964baf..9b67ddfec4 100644 --- a/devbin/vsbug/Main.cc +++ b/devbin/vsbug/Main.cc @@ -8,9 +8,7 @@ #include #include - -int main(int, char **) -{ +int main(int, char**) { const std::time_t startTime = 1346968800; const std::time_t bucketLength = 3600; @@ -20,20 +18,14 @@ int main(int, char **) eventTimes.push_back(1347019162); std::time_t endTime = (eventTimes.back() / bucketLength + 1) * bucketLength; - std::cout << "startTime = " << startTime - << ", endTime = " << endTime - << ", # events = " << eventTimes.size() << std::endl; + std::cout << "startTime = " << startTime << ", endTime = " << endTime << ", # events = " << eventTimes.size() << std::endl; { std::time_t offset = endTime - startTime; unsigned long i = 0; - for (std::time_t bucketStartTime = startTime; - bucketStartTime < endTime; - bucketStartTime += bucketLength) - { + for (std::time_t bucketStartTime = startTime; bucketStartTime < endTime; bucketStartTime += bucketLength) { std::time_t bucketEndTime = bucketStartTime + bucketLength; - for (; i < eventTimes.size() && eventTimes[i] < bucketEndTime; ++i) - { + for (; i < eventTimes.size() && eventTimes[i] < bucketEndTime; ++i) { std::vector temp; // Comment out the next line to get the correct result temp.push_back(eventTimes[i] + offset); @@ -50,4 +42,3 @@ int main(int, char **) return 0; } - diff --git a/devinclude/vflib/CIncrementer.h b/devinclude/vflib/CIncrementer.h index 57d2b5233a..0150935aea 100644 --- a/devinclude/vflib/CIncrementer.h +++ b/devinclude/vflib/CIncrementer.h @@ -10,12 +10,8 @@ #include - -namespace ml -{ -namespace vflib -{ - +namespace ml { +namespace vflib { //! \brief //! Class for measuring function call overhead. @@ -29,28 +25,21 @@ namespace vflib //! that passes arguments in registers no memory access should //! be required. //! -class VFLIB_EXPORT CIncrementer -{ - public: - //! Best practice, though not really required in this case - virtual ~CIncrementer(void); - - //! Inlined incrementer - size_t inlinedIncrement(size_t val) - { - return val + 1; - } - - //! Non-virtual incrementer - size_t nonVirtualIncrement(size_t val); - - //! Virtual incrementer - virtual size_t virtualIncrement(size_t val); -}; +class VFLIB_EXPORT CIncrementer { +public: + //! Best practice, though not really required in this case + virtual ~CIncrementer(void); + //! Inlined incrementer + size_t inlinedIncrement(size_t val) { return val + 1; } + //! Non-virtual incrementer + size_t nonVirtualIncrement(size_t val); + + //! Virtual incrementer + virtual size_t virtualIncrement(size_t val); +}; } } #endif // INCLUDED_ml_vflib_CIncrementer_h - diff --git a/devinclude/vflib/CLooper.h b/devinclude/vflib/CLooper.h index 5d6b96ecb5..b1c5343856 100644 --- a/devinclude/vflib/CLooper.h +++ b/devinclude/vflib/CLooper.h @@ -10,14 +10,10 @@ #include - -namespace ml -{ -namespace vflib -{ +namespace ml { +namespace vflib { class CIncrementer; - //! \brief //! Class for measuring function call overhead. //! @@ -32,28 +28,18 @@ class CIncrementer; //! b) To ensure that the object code of these loops is in the //! intended library //! -class VFLIB_EXPORT CLooper -{ - public: - //! Loop calling the inlined incrementer - static size_t inlinedLibraryCallLoop(CIncrementer &incrementer, - size_t count, - size_t val); +class VFLIB_EXPORT CLooper { +public: + //! Loop calling the inlined incrementer + static size_t inlinedLibraryCallLoop(CIncrementer& incrementer, size_t count, size_t val); - //! Loop calling the non-virtual incrementer - static size_t nonVirtualLibraryCallLoop(CIncrementer &incrementer, - size_t count, - size_t val); + //! Loop calling the non-virtual incrementer + static size_t nonVirtualLibraryCallLoop(CIncrementer& incrementer, size_t count, size_t val); - //! Loop calling the virtual incrementer - static size_t virtualLibraryCallLoop(CIncrementer &incrementer, - size_t count, - size_t val); + //! Loop calling the virtual incrementer + static size_t virtualLibraryCallLoop(CIncrementer& incrementer, size_t count, size_t val); }; - - } } #endif // INCLUDED_ml_vflib_CLooper_h - diff --git a/devinclude/vflib/ImportExport.h b/devinclude/vflib/ImportExport.h index fddbda7d7e..db545832ff 100644 --- a/devinclude/vflib/ImportExport.h +++ b/devinclude/vflib/ImportExport.h @@ -36,4 +36,3 @@ #endif #endif // INCLUDED_ml_vflib_ImportExport_h - diff --git a/devlib/vflib/CIncrementer.cc b/devlib/vflib/CIncrementer.cc index cad2da86c5..46d217fe4c 100644 --- a/devlib/vflib/CIncrementer.cc +++ b/devlib/vflib/CIncrementer.cc @@ -5,28 +5,18 @@ */ #include +namespace ml { +namespace vflib { -namespace ml -{ -namespace vflib -{ - - -CIncrementer::~CIncrementer(void) -{ +CIncrementer::~CIncrementer(void) { } -size_t CIncrementer::nonVirtualIncrement(size_t val) -{ +size_t CIncrementer::nonVirtualIncrement(size_t val) { return val + 1; } -size_t CIncrementer::virtualIncrement(size_t val) -{ +size_t CIncrementer::virtualIncrement(size_t val) { return val + 1; } - - } } - diff --git a/devlib/vflib/CLooper.cc b/devlib/vflib/CLooper.cc index 663012abec..d840007705 100644 --- a/devlib/vflib/CLooper.cc +++ b/devlib/vflib/CLooper.cc @@ -7,47 +7,28 @@ #include +namespace ml { +namespace vflib { -namespace ml -{ -namespace vflib -{ - - -size_t CLooper::inlinedLibraryCallLoop(CIncrementer &incrementer, - size_t count, - size_t val) -{ - for (size_t i = 0; i < count; ++i) - { +size_t CLooper::inlinedLibraryCallLoop(CIncrementer& incrementer, size_t count, size_t val) { + for (size_t i = 0; i < count; ++i) { val = incrementer.inlinedIncrement(val); } return val; } -size_t CLooper::nonVirtualLibraryCallLoop(CIncrementer &incrementer, - size_t count, - size_t val) -{ - for (size_t i = 0; i < count; ++i) - { +size_t CLooper::nonVirtualLibraryCallLoop(CIncrementer& incrementer, size_t count, size_t val) { + for (size_t i = 0; i < count; ++i) { val = incrementer.nonVirtualIncrement(val); } return val; } -size_t CLooper::virtualLibraryCallLoop(CIncrementer &incrementer, - size_t count, - size_t val) -{ - for (size_t i = 0; i < count; ++i) - { +size_t CLooper::virtualLibraryCallLoop(CIncrementer& incrementer, size_t count, size_t val) { + for (size_t i = 0; i < count; ++i) { val = incrementer.virtualIncrement(val); } return val; } - - } } - diff --git a/include/api/CAnomalyJob.h b/include/api/CAnomalyJob.h index 202fbb411a..9297984ed2 100644 --- a/include/api/CAnomalyJob.h +++ b/include/api/CAnomalyJob.h @@ -7,8 +7,8 @@ #define INCLUDED_ml_api_CAnomalyJob_h #include -#include #include +#include #include #include @@ -16,9 +16,9 @@ #include #include #include -#include #include #include +#include #include #include @@ -40,21 +40,17 @@ class CBackgroundPersisterTest; class CAnomalyJobTest; -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CDataAdder; class CDataSearcher; class CStateRestoreTraverser; } -namespace model -{ +namespace model { class CHierarchicalResults; class CLimits; } -namespace api -{ +namespace api { class CBackgroundPersister; class CModelPlotDataJsonWriter; class CFieldConfig; @@ -73,438 +69,410 @@ class CFieldConfig; //! handler to be a CJsonOutputWriter rather than a writer for an //! arbitrary format //! -class API_EXPORT CAnomalyJob : public CDataProcessor -{ - public: - //! Elasticsearch index for state - static const std::string ML_STATE_INDEX; - - //! Discriminant for Elasticsearch IDs - static const std::string STATE_TYPE; - - //! Input field names - static const std::string EMPTY_STRING; - static const std::string DEFAULT_TIME_FIELD_NAME; - - public: - //! Enum represents the result of persisted Model restoration - //! Possible states are: - //! -# IncorrectVersion: The version of the stored model state - //! does not match the anomaly detector version. - //! -# UnexpectedTag: State is malformed or could not be parsed - //! correctly - //! -# MemoryLimitReached: The detector could not be allocated - //! becasuse it would violate the memory usage restrictions - //! -# NotRestoredToTime: The detector was not restored to the - //! requested time - //! -# Success: - //! -# Failure: - enum ERestoreStateStatus - { - E_IncorrectVersion, - E_UnexpectedTag, - E_MemoryLimitReached, - E_NotRestoredToTime, - E_NoDetectorsRecovered, - E_Success, - E_Failure - }; - - - public: - using TPersistCompleteFunc = std::function; - using TAnomalyDetectorPtr = model::CAnomalyDetector::TAnomalyDetectorPtr; - using TAnomalyDetectorPtrVec = std::vector; - using TAnomalyDetectorPtrVecItr = std::vector::iterator; - using TAnomalyDetectorPtrVecCItr = std::vector::const_iterator; - using TKeyVec = std::vector; - using TKeyAnomalyDetectorPtrUMap = - boost::unordered_map; - using TKeyCRefAnomalyDetectorPtrPr = std::pair; - using TKeyCRefAnomalyDetectorPtrPrVec = std::vector; - using TModelPlotDataVec = model::CAnomalyDetector::TModelPlotDataVec; - using TModelPlotDataVecCItr = TModelPlotDataVec::const_iterator; - using TModelPlotDataVecQueue = model::CBucketQueue; - - struct API_EXPORT SRestoredStateDetail - { - ERestoreStateStatus s_RestoredStateStatus; - boost::optional s_Extra; - }; - - struct SBackgroundPersistArgs - { - SBackgroundPersistArgs(const model::CResultsQueue &resultsQueue, - const TModelPlotDataVecQueue &modelPlotQueue, - core_t::TTime time, - const model::CResourceMonitor::SResults &modelSizeStats, - const model::CHierarchicalResultsAggregator &aggregator, - core_t::TTime latestRecordTime, - core_t::TTime lastResultsTime); - - model::CResultsQueue s_ResultsQueue; - TModelPlotDataVecQueue s_ModelPlotQueue; - core_t::TTime s_Time; - model::CResourceMonitor::SResults s_ModelSizeStats; - model::CHierarchicalResultsAggregator s_Aggregator; - std::string s_NormalizerState; - core_t::TTime s_LatestRecordTime; - core_t::TTime s_LastResultsTime; - TKeyCRefAnomalyDetectorPtrPrVec s_Detectors; - }; - - using TBackgroundPersistArgsPtr = boost::shared_ptr; - - public: - CAnomalyJob(const std::string &jobId, - model::CLimits &limits, - CFieldConfig &fieldConfig, - model::CAnomalyDetectorModelConfig &modelConfig, - core::CJsonOutputStreamWrapper &outputBuffer, - const TPersistCompleteFunc &persistCompleteFunc = TPersistCompleteFunc(), - CBackgroundPersister *periodicPersister = nullptr, - core_t::TTime maxQuantileInterval = -1, - const std::string &timeFieldName = DEFAULT_TIME_FIELD_NAME, - const std::string &timeFieldFormat = EMPTY_STRING, - size_t maxAnomalyRecords = 0u); - - virtual ~CAnomalyJob(); - - //! We're going to be writing to a new output stream - virtual void newOutputStream(); - - //! Access the output handler - virtual COutputHandler &outputHandler(); - - //! Receive a single record to be processed, and produce output - //! with any required modifications - virtual bool handleRecord(const TStrStrUMap &dataRowFields); - - //! Perform any final processing once all input data has been seen. - virtual void finalise(); - - //! Restore previously saved state - virtual bool restoreState(core::CDataSearcher &restoreSearcher, - core_t::TTime &completeToTime); - - //! Persist current state - virtual bool persistState(core::CDataAdder &persister); - - //! Initialise normalizer from quantiles state - virtual bool initNormalizer(const std::string &quantilesStateFile); - - //! How many records did we handle? - virtual uint64_t numRecordsHandled() const; - - //! Log a list of the detectors and keys - void description() const; - - //! Log a list of the detectors, keys and their memory usage - void descriptionAndDebugMemoryUsage() const; - - //! Extra information on the success/failure of restoring the model state. - //! In certain situations such as no data being loaded from the restorer - //! or the stored state version is wrong the restoreState function will - //! still return true. If interested in these kinds of errors check them - //! here. - const SRestoredStateDetail &restoreStateStatus() const; - - private: - //! NULL pointer that we can take a long-lived const reference to - static const TAnomalyDetectorPtr NULL_DETECTOR; - - private: - //! Handle a control message. The first character of the control - //! message indicates its type. Currently defined types are: - //! ' ' => Dummy message to force all previously uploaded data through - //! buffers - //! 'f' => Echo a flush ID so that the attached process knows that data - //! sent previously has all been processed - //! 'i' => Generate interim results - bool handleControlMessage(const std::string &controlMessage); - - //! Write out the results for the bucket starting at \p bucketStartTime. - void outputResults(core_t::TTime bucketStartTime); - - //! Write out interim results for the bucket starting at \p bucketStartTime. - void outputInterimResults(core_t::TTime bucketStartTime); - - //! Helper function for outputResults. - //! \p processingTimer is the processing time can be written to the bucket - //! \p sumPastProcessingTime is the total time previously spent processing - //! but resulted in no bucket being outputted. - void writeOutResults(bool interim, model::CHierarchicalResults &results, - core_t::TTime bucketTime, uint64_t processingTime, - uint64_t sumPastProcessingTime); - - //! Reset buckets in the range specified by the control message. - void resetBuckets(const std::string &controlMessage); - - //! Attempt to restore the detectors - bool restoreState(core::CStateRestoreTraverser &traverser, - core_t::TTime &completeToTime, - std::size_t &numDetectors); - - //! Attempt to restore one detector from an already-created traverser. - bool restoreSingleDetector(core::CStateRestoreTraverser &traverser); - - //! Restore the detector identified by \p key and \p partitionFieldValue - //! from \p traverser. - bool restoreDetectorState(const model::CSearchKey &key, - const std::string &partitionFieldValue, - core::CStateRestoreTraverser &traverser); - - //! Persist current state in the background - bool backgroundPersistState(CBackgroundPersister &backgroundPersister); - - //! This is the function that is called in a different thread to the - //! main processing when background persistence is triggered. - bool runBackgroundPersist(TBackgroundPersistArgsPtr args, - core::CDataAdder &persister); - - //! Persist the detectors to a stream. - bool persistState(const std::string &descriptionPrefix, - const model::CResultsQueue &resultsQueue, - const TModelPlotDataVecQueue &modelPlotQueue, - core_t::TTime time, - const TKeyCRefAnomalyDetectorPtrPrVec &detectors, - const model::CResourceMonitor::SResults &modelSizeStats, - const model::CHierarchicalResultsAggregator &aggregator, - const std::string &normalizerState, - core_t::TTime latestRecordTime, - core_t::TTime lastResultsTime, - core::CDataAdder &persister); - - //! Persist current state due to the periodic persistence being triggered. - virtual bool periodicPersistState(CBackgroundPersister &persister); - - //! Acknowledge a flush request - void acknowledgeFlush(const std::string &flushId); - - //! Advance time until \p time, if it can be parsed. - //! - //! This also calls outputBucketResultsUntil, so may generate results if - //! a bucket boundary is crossed and updates time in *all* the detector - //! models. - void advanceTime(const std::string &time); - - //! Output any results new results which are available at \p time. - void outputBucketResultsUntil(core_t::TTime time); - - //! Skip time to the bucket end of \p time, if it can be parsed. - void skipTime(const std::string &time); - - //! Rolls time to \p endTime while skipping sampling the models for buckets - //! within the gap. - //! - //! \param[in] endTime The end of the time interval to skip sampling. - void skipSampling(core_t::TTime endTime); - - //! Outputs queued results and resets the queue to the given \p startTime - void flushAndResetResultsQueue(core_t::TTime startTime); - - //! Roll time forward to \p time - void timeNow(core_t::TTime time); - - //! Get the bucketLength, or half the bucketLength if - //! out-of-phase buckets are active - core_t::TTime effectiveBucketLength() const; - - //! Update configuration - void updateConfig(const std::string &config); - - //! Generate interim results. - void generateInterimResults(const std::string &controlMessage); - - //! Parses the time range in a control message assuming the time range follows after a - //! single character code (e.g. starts with 'i10 20'). - bool parseTimeRangeInControlMessage(const std::string &controlMessage, - core_t::TTime &start, - core_t::TTime &end); - - //! Update equalizers if not interim and aggregate. - void updateAggregatorAndAggregate(bool isInterim, - model::CHierarchicalResults &results); - - //! Update quantiles if not interim and normalize. - void updateQuantilesAndNormalize(bool isInterim, - model::CHierarchicalResults &results); - - //! Outputs results for the buckets that are within the specified range. - //! The range includes the start but does not include the end. - void outputResultsWithinRange(bool isInterim, - core_t::TTime start, - core_t::TTime end); - - //! Generate the model plot for the models of the specified detector in the - //! specified time range. - void generateModelPlot(core_t::TTime startTime, - core_t::TTime endTime, - const model::CAnomalyDetector &detector); - - //! Write the pre-generated model plot to the output stream of the user's - //! choosing: either file or streamed to the API - void writeOutModelPlot(core_t::TTime resultsTime); - - //! Write the pre-generated model plot to the output stream of the user's - //! choosing: either file or streamed to the API - void writeOutModelPlot(core_t::TTime, CModelPlotDataJsonWriter &writer); - - //! Persist one detector to a stream. - //! This method is static so that there is no danger of it accessing - //! the member variables of an object. This makes it safer to call - //! from within a persistence thread that's working off a cloned - //! anomaly detector. - static void persistIndividualDetector(const model::CAnomalyDetector &detector, - core::CStatePersistInserter &inserter); - - //! Iterate over the models, refresh their memory status, and send a report - //! to the API - void refreshMemoryAndReport(); +class API_EXPORT CAnomalyJob : public CDataProcessor { +public: + //! Elasticsearch index for state + static const std::string ML_STATE_INDEX; + + //! Discriminant for Elasticsearch IDs + static const std::string STATE_TYPE; + + //! Input field names + static const std::string EMPTY_STRING; + static const std::string DEFAULT_TIME_FIELD_NAME; + +public: + //! Enum represents the result of persisted Model restoration + //! Possible states are: + //! -# IncorrectVersion: The version of the stored model state + //! does not match the anomaly detector version. + //! -# UnexpectedTag: State is malformed or could not be parsed + //! correctly + //! -# MemoryLimitReached: The detector could not be allocated + //! becasuse it would violate the memory usage restrictions + //! -# NotRestoredToTime: The detector was not restored to the + //! requested time + //! -# Success: + //! -# Failure: + enum ERestoreStateStatus { + E_IncorrectVersion, + E_UnexpectedTag, + E_MemoryLimitReached, + E_NotRestoredToTime, + E_NoDetectorsRecovered, + E_Success, + E_Failure + }; + +public: + using TPersistCompleteFunc = std::function; + using TAnomalyDetectorPtr = model::CAnomalyDetector::TAnomalyDetectorPtr; + using TAnomalyDetectorPtrVec = std::vector; + using TAnomalyDetectorPtrVecItr = std::vector::iterator; + using TAnomalyDetectorPtrVecCItr = std::vector::const_iterator; + using TKeyVec = std::vector; + using TKeyAnomalyDetectorPtrUMap = + boost::unordered_map; + using TKeyCRefAnomalyDetectorPtrPr = std::pair; + using TKeyCRefAnomalyDetectorPtrPrVec = std::vector; + using TModelPlotDataVec = model::CAnomalyDetector::TModelPlotDataVec; + using TModelPlotDataVecCItr = TModelPlotDataVec::const_iterator; + using TModelPlotDataVecQueue = model::CBucketQueue; + + struct API_EXPORT SRestoredStateDetail { + ERestoreStateStatus s_RestoredStateStatus; + boost::optional s_Extra; + }; + + struct SBackgroundPersistArgs { + SBackgroundPersistArgs(const model::CResultsQueue& resultsQueue, + const TModelPlotDataVecQueue& modelPlotQueue, + core_t::TTime time, + const model::CResourceMonitor::SResults& modelSizeStats, + const model::CHierarchicalResultsAggregator& aggregator, + core_t::TTime latestRecordTime, + core_t::TTime lastResultsTime); + + model::CResultsQueue s_ResultsQueue; + TModelPlotDataVecQueue s_ModelPlotQueue; + core_t::TTime s_Time; + model::CResourceMonitor::SResults s_ModelSizeStats; + model::CHierarchicalResultsAggregator s_Aggregator; + std::string s_NormalizerState; + core_t::TTime s_LatestRecordTime; + core_t::TTime s_LastResultsTime; + TKeyCRefAnomalyDetectorPtrPrVec s_Detectors; + }; + + using TBackgroundPersistArgsPtr = boost::shared_ptr; + +public: + CAnomalyJob(const std::string& jobId, + model::CLimits& limits, + CFieldConfig& fieldConfig, + model::CAnomalyDetectorModelConfig& modelConfig, + core::CJsonOutputStreamWrapper& outputBuffer, + const TPersistCompleteFunc& persistCompleteFunc = TPersistCompleteFunc(), + CBackgroundPersister* periodicPersister = nullptr, + core_t::TTime maxQuantileInterval = -1, + const std::string& timeFieldName = DEFAULT_TIME_FIELD_NAME, + const std::string& timeFieldFormat = EMPTY_STRING, + size_t maxAnomalyRecords = 0u); + + virtual ~CAnomalyJob(); + + //! We're going to be writing to a new output stream + virtual void newOutputStream(); + + //! Access the output handler + virtual COutputHandler& outputHandler(); + + //! Receive a single record to be processed, and produce output + //! with any required modifications + virtual bool handleRecord(const TStrStrUMap& dataRowFields); + + //! Perform any final processing once all input data has been seen. + virtual void finalise(); + + //! Restore previously saved state + virtual bool restoreState(core::CDataSearcher& restoreSearcher, core_t::TTime& completeToTime); + + //! Persist current state + virtual bool persistState(core::CDataAdder& persister); + + //! Initialise normalizer from quantiles state + virtual bool initNormalizer(const std::string& quantilesStateFile); + + //! How many records did we handle? + virtual uint64_t numRecordsHandled() const; + + //! Log a list of the detectors and keys + void description() const; + + //! Log a list of the detectors, keys and their memory usage + void descriptionAndDebugMemoryUsage() const; + + //! Extra information on the success/failure of restoring the model state. + //! In certain situations such as no data being loaded from the restorer + //! or the stored state version is wrong the restoreState function will + //! still return true. If interested in these kinds of errors check them + //! here. + const SRestoredStateDetail& restoreStateStatus() const; + +private: + //! NULL pointer that we can take a long-lived const reference to + static const TAnomalyDetectorPtr NULL_DETECTOR; + +private: + //! Handle a control message. The first character of the control + //! message indicates its type. Currently defined types are: + //! ' ' => Dummy message to force all previously uploaded data through + //! buffers + //! 'f' => Echo a flush ID so that the attached process knows that data + //! sent previously has all been processed + //! 'i' => Generate interim results + bool handleControlMessage(const std::string& controlMessage); + + //! Write out the results for the bucket starting at \p bucketStartTime. + void outputResults(core_t::TTime bucketStartTime); + + //! Write out interim results for the bucket starting at \p bucketStartTime. + void outputInterimResults(core_t::TTime bucketStartTime); + + //! Helper function for outputResults. + //! \p processingTimer is the processing time can be written to the bucket + //! \p sumPastProcessingTime is the total time previously spent processing + //! but resulted in no bucket being outputted. + void writeOutResults(bool interim, + model::CHierarchicalResults& results, + core_t::TTime bucketTime, + uint64_t processingTime, + uint64_t sumPastProcessingTime); + + //! Reset buckets in the range specified by the control message. + void resetBuckets(const std::string& controlMessage); + + //! Attempt to restore the detectors + bool restoreState(core::CStateRestoreTraverser& traverser, core_t::TTime& completeToTime, std::size_t& numDetectors); + + //! Attempt to restore one detector from an already-created traverser. + bool restoreSingleDetector(core::CStateRestoreTraverser& traverser); + + //! Restore the detector identified by \p key and \p partitionFieldValue + //! from \p traverser. + bool + restoreDetectorState(const model::CSearchKey& key, const std::string& partitionFieldValue, core::CStateRestoreTraverser& traverser); + + //! Persist current state in the background + bool backgroundPersistState(CBackgroundPersister& backgroundPersister); + + //! This is the function that is called in a different thread to the + //! main processing when background persistence is triggered. + bool runBackgroundPersist(TBackgroundPersistArgsPtr args, core::CDataAdder& persister); + + //! Persist the detectors to a stream. + bool persistState(const std::string& descriptionPrefix, + const model::CResultsQueue& resultsQueue, + const TModelPlotDataVecQueue& modelPlotQueue, + core_t::TTime time, + const TKeyCRefAnomalyDetectorPtrPrVec& detectors, + const model::CResourceMonitor::SResults& modelSizeStats, + const model::CHierarchicalResultsAggregator& aggregator, + const std::string& normalizerState, + core_t::TTime latestRecordTime, + core_t::TTime lastResultsTime, + core::CDataAdder& persister); + + //! Persist current state due to the periodic persistence being triggered. + virtual bool periodicPersistState(CBackgroundPersister& persister); + + //! Acknowledge a flush request + void acknowledgeFlush(const std::string& flushId); + + //! Advance time until \p time, if it can be parsed. + //! + //! This also calls outputBucketResultsUntil, so may generate results if + //! a bucket boundary is crossed and updates time in *all* the detector + //! models. + void advanceTime(const std::string& time); + + //! Output any results new results which are available at \p time. + void outputBucketResultsUntil(core_t::TTime time); + + //! Skip time to the bucket end of \p time, if it can be parsed. + void skipTime(const std::string& time); + + //! Rolls time to \p endTime while skipping sampling the models for buckets + //! within the gap. + //! + //! \param[in] endTime The end of the time interval to skip sampling. + void skipSampling(core_t::TTime endTime); + + //! Outputs queued results and resets the queue to the given \p startTime + void flushAndResetResultsQueue(core_t::TTime startTime); + + //! Roll time forward to \p time + void timeNow(core_t::TTime time); + + //! Get the bucketLength, or half the bucketLength if + //! out-of-phase buckets are active + core_t::TTime effectiveBucketLength() const; + + //! Update configuration + void updateConfig(const std::string& config); + + //! Generate interim results. + void generateInterimResults(const std::string& controlMessage); + + //! Parses the time range in a control message assuming the time range follows after a + //! single character code (e.g. starts with 'i10 20'). + bool parseTimeRangeInControlMessage(const std::string& controlMessage, core_t::TTime& start, core_t::TTime& end); + + //! Update equalizers if not interim and aggregate. + void updateAggregatorAndAggregate(bool isInterim, model::CHierarchicalResults& results); + + //! Update quantiles if not interim and normalize. + void updateQuantilesAndNormalize(bool isInterim, model::CHierarchicalResults& results); + + //! Outputs results for the buckets that are within the specified range. + //! The range includes the start but does not include the end. + void outputResultsWithinRange(bool isInterim, core_t::TTime start, core_t::TTime end); - //! Update configuration - void doForecast(const std::string &controlMessage); + //! Generate the model plot for the models of the specified detector in the + //! specified time range. + void generateModelPlot(core_t::TTime startTime, core_t::TTime endTime, const model::CAnomalyDetector& detector); + //! Write the pre-generated model plot to the output stream of the user's + //! choosing: either file or streamed to the API + void writeOutModelPlot(core_t::TTime resultsTime); - model::CAnomalyDetector::TAnomalyDetectorPtr - makeDetector(int identifier, - const model::CAnomalyDetectorModelConfig &modelConfig, - model::CLimits &limits, - const std::string &partitionFieldValue, - core_t::TTime firstTime, - const model::CAnomalyDetector::TModelFactoryCPtr &modelFactory); + //! Write the pre-generated model plot to the output stream of the user's + //! choosing: either file or streamed to the API + void writeOutModelPlot(core_t::TTime, CModelPlotDataJsonWriter& writer); - //! Populate detector keys from the field config. - void populateDetectorKeys(const CFieldConfig &fieldConfig, TKeyVec &keys); + //! Persist one detector to a stream. + //! This method is static so that there is no danger of it accessing + //! the member variables of an object. This makes it safer to call + //! from within a persistence thread that's working off a cloned + //! anomaly detector. + static void persistIndividualDetector(const model::CAnomalyDetector& detector, core::CStatePersistInserter& inserter); - //! Extract the field called \p fieldName from \p dataRowFields. - const std::string *fieldValue(const std::string &fieldName, - const TStrStrUMap &dataRowFields); + //! Iterate over the models, refresh their memory status, and send a report + //! to the API + void refreshMemoryAndReport(); + //! Update configuration + void doForecast(const std::string& controlMessage); - //! Extract the required fields from \p dataRowFields - //! and add the new record to \p detector - void addRecord(const TAnomalyDetectorPtr detector, - core_t::TTime time, - const TStrStrUMap &dataRowFields); + model::CAnomalyDetector::TAnomalyDetectorPtr makeDetector(int identifier, + const model::CAnomalyDetectorModelConfig& modelConfig, + model::CLimits& limits, + const std::string& partitionFieldValue, + core_t::TTime firstTime, + const model::CAnomalyDetector::TModelFactoryCPtr& modelFactory); - protected: - //! Get all the detectors. - void detectors(TAnomalyDetectorPtrVec &detectors) const; + //! Populate detector keys from the field config. + void populateDetectorKeys(const CFieldConfig& fieldConfig, TKeyVec& keys); - //! Get the detectors by parition - const TKeyAnomalyDetectorPtrUMap & detectorPartitionMap() const; + //! Extract the field called \p fieldName from \p dataRowFields. + const std::string* fieldValue(const std::string& fieldName, const TStrStrUMap& dataRowFields); - //! Get all sorted references to the detectors. - void sortedDetectors(TKeyCRefAnomalyDetectorPtrPrVec &detectors) const; + //! Extract the required fields from \p dataRowFields + //! and add the new record to \p detector + void addRecord(const TAnomalyDetectorPtr detector, core_t::TTime time, const TStrStrUMap& dataRowFields); - //! Get a reference to the detector for a given key - const TAnomalyDetectorPtr &detectorForKey(bool isRestoring, - core_t::TTime time, - const model::CSearchKey &key, - const std::string &partitionFieldValue, - model::CResourceMonitor &resourceMonitor); +protected: + //! Get all the detectors. + void detectors(TAnomalyDetectorPtrVec& detectors) const; - //! Prune all the models - void pruneAllModels(); + //! Get the detectors by parition + const TKeyAnomalyDetectorPtrUMap& detectorPartitionMap() const; - private: - //! The job ID - std::string m_JobId; + //! Get all sorted references to the detectors. + void sortedDetectors(TKeyCRefAnomalyDetectorPtrPrVec& detectors) const; - //! Configurable limits - model::CLimits &m_Limits; + //! Get a reference to the detector for a given key + const TAnomalyDetectorPtr& detectorForKey(bool isRestoring, + core_t::TTime time, + const model::CSearchKey& key, + const std::string& partitionFieldValue, + model::CResourceMonitor& resourceMonitor); - //! Stream used by the output writer - core::CJsonOutputStreamWrapper &m_OutputStream; + //! Prune all the models + void pruneAllModels(); - //! Responsible for performing forecasts - CForecastRunner m_ForecastRunner; +private: + //! The job ID + std::string m_JobId; - //! Object to which the output is passed - CJsonOutputWriter m_JsonOutputWriter; + //! Configurable limits + model::CLimits& m_Limits; - //! Field names to use for the analysis - CFieldConfig &m_FieldConfig; + //! Stream used by the output writer + core::CJsonOutputStreamWrapper& m_OutputStream; - //! The model configuration - model::CAnomalyDetectorModelConfig &m_ModelConfig; + //! Responsible for performing forecasts + CForecastRunner m_ForecastRunner; - //! Keep count of how many records we've handled - uint64_t m_NumRecordsHandled; + //! Object to which the output is passed + CJsonOutputWriter m_JsonOutputWriter; - //! Detector keys. - TKeyVec m_DetectorKeys; + //! Field names to use for the analysis + CFieldConfig& m_FieldConfig; - //! Map of objects to provide the inner workings - TKeyAnomalyDetectorPtrUMap m_Detectors; + //! The model configuration + model::CAnomalyDetectorModelConfig& m_ModelConfig; - //! The end time of the last bucket out of latency window we've seen - core_t::TTime m_LastFinalisedBucketEndTime; + //! Keep count of how many records we've handled + uint64_t m_NumRecordsHandled; - //! Optional function to be called when persistence is complete - TPersistCompleteFunc m_PersistCompleteFunc; + //! Detector keys. + TKeyVec m_DetectorKeys; - //! Name of field holding the time - std::string m_TimeFieldName; + //! Map of objects to provide the inner workings + TKeyAnomalyDetectorPtrUMap m_Detectors; - //! Time field format. Blank means seconds since the epoch, i.e. the - //! time field can be converted to a time_t by simply converting the - //! string to a number. - std::string m_TimeFieldFormat; + //! The end time of the last bucket out of latency window we've seen + core_t::TTime m_LastFinalisedBucketEndTime; - //! License restriction on the number of detectors allowed - size_t m_MaxDetectors; + //! Optional function to be called when persistence is complete + TPersistCompleteFunc m_PersistCompleteFunc; - //! Pointer to periodic persister that works in the background. May be - //! nullptr if this object is not responsible for starting periodic - //! persistence. - CBackgroundPersister *m_PeriodicPersister; + //! Name of field holding the time + std::string m_TimeFieldName; - //! If we haven't output quantiles for this long due to a big anomaly - //! we'll output them to reflect decay. Non-positive values mean never. - core_t::TTime m_MaxQuantileInterval; + //! Time field format. Blank means seconds since the epoch, i.e. the + //! time field can be converted to a time_t by simply converting the + //! string to a number. + std::string m_TimeFieldFormat; - //! What was the wall clock time when we last persisted the - //! normalizer? The normalizer is persisted for two reasons: - //! either there was a significant change or more than a - //! certain period of time has passed since last time it was persisted. - core_t::TTime m_LastNormalizerPersistTime; + //! License restriction on the number of detectors allowed + size_t m_MaxDetectors; - //! Latest record time seen. - core_t::TTime m_LatestRecordTime; + //! Pointer to periodic persister that works in the background. May be + //! nullptr if this object is not responsible for starting periodic + //! persistence. + CBackgroundPersister* m_PeriodicPersister; - //! Last time we sent a finalised result to the API. - core_t::TTime m_LastResultsTime; + //! If we haven't output quantiles for this long due to a big anomaly + //! we'll output them to reflect decay. Non-positive values mean never. + core_t::TTime m_MaxQuantileInterval; - //! When the model state was restored was it entirely successful. - //! Extra information about any errors that may have occurred - SRestoredStateDetail m_RestoredStateDetail; + //! What was the wall clock time when we last persisted the + //! normalizer? The normalizer is persisted for two reasons: + //! either there was a significant change or more than a + //! certain period of time has passed since last time it was persisted. + core_t::TTime m_LastNormalizerPersistTime; - //! The hierarchical results aggregator. - model::CHierarchicalResultsAggregator m_Aggregator; + //! Latest record time seen. + core_t::TTime m_LatestRecordTime; - //! The hierarchical results normalizer. - model::CHierarchicalResultsNormalizer m_Normalizer; + //! Last time we sent a finalised result to the API. + core_t::TTime m_LastResultsTime; - //! Store the last N half-buckets' results in order - //! to choose the best result - model::CResultsQueue m_ResultsQueue; + //! When the model state was restored was it entirely successful. + //! Extra information about any errors that may have occurred + SRestoredStateDetail m_RestoredStateDetail; - //! Also store the model plot for the buckets for each - //! result time - these will be output when the corresponding - //! result is output - TModelPlotDataVecQueue m_ModelPlotQueue; + //! The hierarchical results aggregator. + model::CHierarchicalResultsAggregator m_Aggregator; + + //! The hierarchical results normalizer. + model::CHierarchicalResultsNormalizer m_Normalizer; + + //! Store the last N half-buckets' results in order + //! to choose the best result + model::CResultsQueue m_ResultsQueue; + + //! Also store the model plot for the buckets for each + //! result time - these will be output when the corresponding + //! result is output + TModelPlotDataVecQueue m_ModelPlotQueue; friend class ::CBackgroundPersisterTest; friend class ::CAnomalyJobTest; }; - } } #endif // INCLUDED_ml_api_CAnomalyJob_h - diff --git a/include/api/CBackgroundPersister.h b/include/api/CBackgroundPersister.h index 34018b628f..eb0577616f 100644 --- a/include/api/CBackgroundPersister.h +++ b/include/api/CBackgroundPersister.h @@ -9,8 +9,8 @@ #include #include #include -#include #include +#include #include @@ -20,10 +20,8 @@ class CBackgroundPersisterTest; -namespace ml -{ -namespace api -{ +namespace ml { +namespace api { //! \brief //! Enables a data adder to run in a different thread. @@ -61,127 +59,124 @@ namespace api //! the data adder is not thread safe then it may not be used by //! any other object until after this object is destroyed. //! -class API_EXPORT CBackgroundPersister : private core::CNonCopyable -{ +class API_EXPORT CBackgroundPersister : private core::CNonCopyable { +public: + using TFirstProcessorPeriodicPersistFunc = std::function; + +public: + //! The supplied data adder must outlive this object. If the data + //! adder is not thread safe then it may not be used by any other + //! object until after this object is destroyed. When using this + //! constructor the first processor persistence function must be + //! set before the object is used. + CBackgroundPersister(core_t::TTime periodicPersistInterval, core::CDataAdder& dataAdder); + + //! As above, but also supply the first processor persistence + //! function at construction time. + CBackgroundPersister(core_t::TTime periodicPersistInterval, + const TFirstProcessorPeriodicPersistFunc& firstProcessorPeriodicPersistFunc, + core::CDataAdder& dataAdder); + + ~CBackgroundPersister(); + + //! Is background persistence currently in progress? + bool isBusy() const; + + //! Wait for any background persistence currently in progress to + //! complete + bool waitForIdle(); + + //! Add a function to be called when the background persist is started. + //! This will be rejected if a background persistence is currently in + //! progress. It is likely that the supplied \p persistFunc will have + //! data bound into it that will be used by the function it calls, i.e. the + //! called function will take more arguments than just the data adder. + //! \return true if the function was added; false if not. + bool addPersistFunc(core::CDataAdder::TPersistFunc persistFunc); + + //! Set the first processor persist function, which is used to start the + //! chain of background persistence. This will be rejected if a + //! background persistence is currently in progress. + //! This should be set once before startBackgroundPersistIfAppropriate is + //! called. + bool firstProcessorPeriodicPersistFunc(const TFirstProcessorPeriodicPersistFunc& firstProcessorPeriodicPersistFunc); + + //! Start a background persist is one is not running. + //! Calls the first processor periodic persist function first. + //! Concurrent calls to this method are not threadsafe. + bool startBackgroundPersist(); + + //! If the periodic persist interval has passed since the last persist + //! then it is appropriate to persist now. Start it by calling the + //! first processor periodic persist function. + //! Concurrent calls to this method are not threadsafe. + bool startBackgroundPersistIfAppropriate(); + +private: + //! Implementation of the background thread + class CBackgroundThread : public core::CThread { public: - using TFirstProcessorPeriodicPersistFunc = std::function; + CBackgroundThread(CBackgroundPersister& owner); - public: - //! The supplied data adder must outlive this object. If the data - //! adder is not thread safe then it may not be used by any other - //! object until after this object is destroyed. When using this - //! constructor the first processor persistence function must be - //! set before the object is used. - CBackgroundPersister(core_t::TTime periodicPersistInterval, - core::CDataAdder &dataAdder); - - //! As above, but also supply the first processor persistence - //! function at construction time. - CBackgroundPersister(core_t::TTime periodicPersistInterval, - const TFirstProcessorPeriodicPersistFunc &firstProcessorPeriodicPersistFunc, - core::CDataAdder &dataAdder); - - ~CBackgroundPersister(); - - //! Is background persistence currently in progress? - bool isBusy() const; - - //! Wait for any background persistence currently in progress to - //! complete - bool waitForIdle(); - - //! Add a function to be called when the background persist is started. - //! This will be rejected if a background persistence is currently in - //! progress. It is likely that the supplied \p persistFunc will have - //! data bound into it that will be used by the function it calls, i.e. the - //! called function will take more arguments than just the data adder. - //! \return true if the function was added; false if not. - bool addPersistFunc(core::CDataAdder::TPersistFunc persistFunc); - - //! Set the first processor persist function, which is used to start the - //! chain of background persistence. This will be rejected if a - //! background persistence is currently in progress. - //! This should be set once before startBackgroundPersistIfAppropriate is - //! called. - bool firstProcessorPeriodicPersistFunc(const TFirstProcessorPeriodicPersistFunc &firstProcessorPeriodicPersistFunc); - - //! Start a background persist is one is not running. - //! Calls the first processor periodic persist function first. - //! Concurrent calls to this method are not threadsafe. - bool startBackgroundPersist(); - - //! If the periodic persist interval has passed since the last persist - //! then it is appropriate to persist now. Start it by calling the - //! first processor periodic persist function. - //! Concurrent calls to this method are not threadsafe. - bool startBackgroundPersistIfAppropriate(); + protected: + //! Inherited virtual interface + virtual void run(); + virtual void shutdown(); private: - //! Implementation of the background thread - class CBackgroundThread : public core::CThread - { - public: - CBackgroundThread(CBackgroundPersister &owner); - - protected: - //! Inherited virtual interface - virtual void run(); - virtual void shutdown(); - - private: - //! Reference to the owning background persister - CBackgroundPersister &m_Owner; - }; + //! Reference to the owning background persister + CBackgroundPersister& m_Owner; + }; - private: - //! Persist in the background setting the last persist time - //! to timeOfPersistence - bool startBackgroundPersist(core_t::TTime timeOfPersistence); +private: + //! Persist in the background setting the last persist time + //! to timeOfPersistence + bool startBackgroundPersist(core_t::TTime timeOfPersistence); - //! When this function is called a background persistence will be - //! triggered unless there is already one in progress. - bool startPersist(); + //! When this function is called a background persistence will be + //! triggered unless there is already one in progress. + bool startPersist(); - //! Clear any persistence functions that have been added but not yet - //! invoked. This will be rejected if a background persistence is - //! currently in progress. - //! \return true if the list of functions is clear; false if not. - bool clear(); + //! Clear any persistence functions that have been added but not yet + //! invoked. This will be rejected if a background persistence is + //! currently in progress. + //! \return true if the list of functions is clear; false if not. + bool clear(); - private: - //! How frequently should background persistence be attempted? - core_t::TTime m_PeriodicPersistInterval; +private: + //! How frequently should background persistence be attempted? + core_t::TTime m_PeriodicPersistInterval; - //! What was the wall clock time when we started our last periodic - //! persistence? - core_t::TTime m_LastPeriodicPersistTime; + //! What was the wall clock time when we started our last periodic + //! persistence? + core_t::TTime m_LastPeriodicPersistTime; - //! The function that will be called to start the chain of background - //! persistence. - TFirstProcessorPeriodicPersistFunc m_FirstProcessorPeriodicPersistFunc; + //! The function that will be called to start the chain of background + //! persistence. + TFirstProcessorPeriodicPersistFunc m_FirstProcessorPeriodicPersistFunc; - //! Reference to the data adder to be used by the background thread. - //! The data adder refered to must outlive this object. If the data - //! adder is not thread safe then it may not be used by any other - //! object until after this object is destroyed. - core::CDataAdder &m_DataAdder; + //! Reference to the data adder to be used by the background thread. + //! The data adder refered to must outlive this object. If the data + //! adder is not thread safe then it may not be used by any other + //! object until after this object is destroyed. + core::CDataAdder& m_DataAdder; - //! Mutex to ensure atomicity of operations where required. - core::CFastMutex m_Mutex; + //! Mutex to ensure atomicity of operations where required. + core::CFastMutex m_Mutex; - //! Is the background thread currently busy persisting data? - std::atomic_bool m_IsBusy; + //! Is the background thread currently busy persisting data? + std::atomic_bool m_IsBusy; - //! Have we been told to shut down? - std::atomic_bool m_IsShutdown; + //! Have we been told to shut down? + std::atomic_bool m_IsShutdown; - using TPersistFuncList = std::list; + using TPersistFuncList = std::list; - //! Function to call in the background thread to do persistence. - TPersistFuncList m_PersistFuncs; + //! Function to call in the background thread to do persistence. + TPersistFuncList m_PersistFuncs; - //! Thread used to do the background work - CBackgroundThread m_BackgroundThread; + //! Thread used to do the background work + CBackgroundThread m_BackgroundThread; // Allow the background thread to access the member variables of the owning // object @@ -190,10 +185,7 @@ class API_EXPORT CBackgroundPersister : private core::CNonCopyable // For testing friend class ::CBackgroundPersisterTest; }; - - } } #endif // INCLUDED_ml_api_CBackgroundPersister_h - diff --git a/include/api/CBaseTokenListDataTyper.h b/include/api/CBaseTokenListDataTyper.h index f003cdbc86..3a055f2951 100644 --- a/include/api/CBaseTokenListDataTyper.h +++ b/include/api/CBaseTokenListDataTyper.h @@ -24,16 +24,12 @@ class CBaseTokenListDataTyperTest; - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace api -{ +namespace api { class CTokenListReverseSearchCreatorIntf; //! \brief @@ -60,272 +56,236 @@ class CTokenListReverseSearchCreatorIntf; //! correct setting of reverse search creator state needs to be added to //! the copy constructor and assignment operator of this class.) //! -class API_EXPORT CBaseTokenListDataTyper : public CDataTyper -{ +class API_EXPORT CBaseTokenListDataTyper : public CDataTyper { +public: + //! Name of the field that contains pre-tokenised tokens (in CSV format) + //! if available + static const std::string PRETOKENISED_TOKEN_FIELD; + +public: + //! Shared pointer to reverse search creator that we're will function + //! after being shallow copied + using TTokenListReverseSearchCreatorIntfCPtr = boost::shared_ptr; + + //! Used to associate tokens with weightings: + //! first -> token ID + //! second -> weighting + using TSizeSizePr = std::pair; + + //! Used for storing token ID sequences + using TSizeSizePrVec = std::vector; + + //! Used for storing distinct token IDs + using TSizeSizeMap = std::map; + + //! Used for stream output of token IDs translated back to the original + //! tokens + struct API_EXPORT SIdTranslater { + SIdTranslater(const CBaseTokenListDataTyper& typer, const TSizeSizePrVec& tokenIds, char separator); + + const CBaseTokenListDataTyper& s_Typer; + const TSizeSizePrVec& s_TokenIds; + char s_Separator; + }; + +public: + //! Create a data typer with threshold for how comparable types are + //! 0.0 means everything is the same type + //! 1.0 means things have to match exactly to be the same type + CBaseTokenListDataTyper(const TTokenListReverseSearchCreatorIntfCPtr& reverseSearchCreator, + double threshold, + const std::string& fieldName); + + //! Dump stats + virtual void dumpStats() const; + + //! Compute a type from a string. The raw string length may be longer + //! than the length of the passed string, because the passed string may + //! have the date stripped out of it. Field names/values are available + //! to the type computation. + virtual int computeType(bool dryRun, const TStrStrUMap& fields, const std::string& str, size_t rawStringLen); + + // Bring the other overload of computeType() into scope + using CDataTyper::computeType; + + //! Create a search that will (more or less) just select the records + //! that are classified as the given type. Note that the reverse search + //! is only approximate - it may select more records than have actually + //! been classified as the returned type. + virtual bool createReverseSearch(int type, std::string& part1, std::string& part2, size_t& maxMatchingLength, bool& wasCached); + + //! Has the data typer's state changed? + virtual bool hasChanged() const; + + //! Populate the object from part of a state document + virtual bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); + + //! Persist state by passing information to the supplied inserter + virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Make a function that can be called later to persist state + virtual TPersistFunc makePersistFunc() const; + +protected: + //! Split the string into a list of tokens. The result of the + //! tokenisation is returned in \p tokenIds, \p tokenUniqueIds and + //! \p totalWeight. Any previous content of these variables is wiped. + virtual void tokeniseString(const TStrStrUMap& fields, + const std::string& str, + TSizeSizePrVec& tokenIds, + TSizeSizeMap& tokenUniqueIds, + size_t& totalWeight) = 0; + + //! Take a string token, convert it to a numeric ID and a weighting and + //! add these to the provided data structures. + virtual void + tokenToIdAndWeight(const std::string& token, TSizeSizePrVec& tokenIds, TSizeSizeMap& tokenUniqueIds, size_t& totalWeight) = 0; + + //! Compute similarity between two vectors + virtual double similarity(const TSizeSizePrVec& left, size_t leftWeight, const TSizeSizePrVec& right, size_t rightWeight) const = 0; + + //! Used to hold statistics about the types we compute: + //! first -> count of matches + //! second -> type vector index + using TSizeSizePrList = std::list; + using TSizeSizePrListItr = TSizeSizePrList::iterator; + + //! Add a match to an existing type + void addTypeMatch(bool isDryRun, + const std::string& str, + size_t rawStringLen, + const TSizeSizePrVec& tokenIds, + const TSizeSizeMap& tokenUniqueIds, + double similarity, + TSizeSizePrListItr& iter); + + //! Given the total token weight in a vector and a threshold, what is + //! the minimum possible token weight in a different vector that could + //! possibly be considered to match? + static size_t minMatchingWeight(size_t weight, double threshold); + + //! Given the total token weight in a vector and a threshold, what is + //! maximum possible token weight in a different vector that could + //! possibly be considered to match? + static size_t maxMatchingWeight(size_t weight, double threshold); + + //! Get the unique token ID for a given token (assigning one if it's + //! being seen for the first time) + size_t idForToken(const std::string& token); + +private: + //! Value type for the TTokenMIndex below + class CTokenInfoItem { public: - //! Name of the field that contains pre-tokenised tokens (in CSV format) - //! if available - static const std::string PRETOKENISED_TOKEN_FIELD; + CTokenInfoItem(const std::string& str, size_t index); - public: - //! Shared pointer to reverse search creator that we're will function - //! after being shallow copied - using TTokenListReverseSearchCreatorIntfCPtr = boost::shared_ptr; - - //! Used to associate tokens with weightings: - //! first -> token ID - //! second -> weighting - using TSizeSizePr = std::pair; - - //! Used for storing token ID sequences - using TSizeSizePrVec = std::vector; - - //! Used for storing distinct token IDs - using TSizeSizeMap = std::map; - - //! Used for stream output of token IDs translated back to the original - //! tokens - struct API_EXPORT SIdTranslater - { - SIdTranslater(const CBaseTokenListDataTyper &typer, - const TSizeSizePrVec &tokenIds, - char separator); - - const CBaseTokenListDataTyper &s_Typer; - const TSizeSizePrVec &s_TokenIds; - char s_Separator; - }; + //! Accessors + const std::string& str() const; + size_t index() const; + size_t typeCount() const; + void typeCount(size_t typeCount); - public: - //! Create a data typer with threshold for how comparable types are - //! 0.0 means everything is the same type - //! 1.0 means things have to match exactly to be the same type - CBaseTokenListDataTyper(const TTokenListReverseSearchCreatorIntfCPtr &reverseSearchCreator, - double threshold, - const std::string &fieldName); - - //! Dump stats - virtual void dumpStats() const; - - //! Compute a type from a string. The raw string length may be longer - //! than the length of the passed string, because the passed string may - //! have the date stripped out of it. Field names/values are available - //! to the type computation. - virtual int computeType(bool dryRun, - const TStrStrUMap &fields, - const std::string &str, - size_t rawStringLen); - - // Bring the other overload of computeType() into scope - using CDataTyper::computeType; - - //! Create a search that will (more or less) just select the records - //! that are classified as the given type. Note that the reverse search - //! is only approximate - it may select more records than have actually - //! been classified as the returned type. - virtual bool createReverseSearch(int type, - std::string &part1, - std::string &part2, - size_t &maxMatchingLength, - bool &wasCached); - - //! Has the data typer's state changed? - virtual bool hasChanged() const; - - //! Populate the object from part of a state document - virtual bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); - - //! Persist state by passing information to the supplied inserter - virtual void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - - //! Make a function that can be called later to persist state - virtual TPersistFunc makePersistFunc() const; - - protected: - //! Split the string into a list of tokens. The result of the - //! tokenisation is returned in \p tokenIds, \p tokenUniqueIds and - //! \p totalWeight. Any previous content of these variables is wiped. - virtual void tokeniseString(const TStrStrUMap &fields, - const std::string &str, - TSizeSizePrVec &tokenIds, - TSizeSizeMap &tokenUniqueIds, - size_t &totalWeight) = 0; - - //! Take a string token, convert it to a numeric ID and a weighting and - //! add these to the provided data structures. - virtual void tokenToIdAndWeight(const std::string &token, - TSizeSizePrVec &tokenIds, - TSizeSizeMap &tokenUniqueIds, - size_t &totalWeight) = 0; - - //! Compute similarity between two vectors - virtual double similarity(const TSizeSizePrVec &left, - size_t leftWeight, - const TSizeSizePrVec &right, - size_t rightWeight) const = 0; - - //! Used to hold statistics about the types we compute: - //! first -> count of matches - //! second -> type vector index - using TSizeSizePrList = std::list; - using TSizeSizePrListItr = TSizeSizePrList::iterator; - - //! Add a match to an existing type - void addTypeMatch(bool isDryRun, - const std::string &str, - size_t rawStringLen, - const TSizeSizePrVec &tokenIds, - const TSizeSizeMap &tokenUniqueIds, - double similarity, - TSizeSizePrListItr &iter); - - //! Given the total token weight in a vector and a threshold, what is - //! the minimum possible token weight in a different vector that could - //! possibly be considered to match? - static size_t minMatchingWeight(size_t weight, double threshold); - - //! Given the total token weight in a vector and a threshold, what is - //! maximum possible token weight in a different vector that could - //! possibly be considered to match? - static size_t maxMatchingWeight(size_t weight, double threshold); - - //! Get the unique token ID for a given token (assigning one if it's - //! being seen for the first time) - size_t idForToken(const std::string &token); + //! Increment the type count + void incTypeCount(); private: - //! Value type for the TTokenMIndex below - class CTokenInfoItem - { - public: - CTokenInfoItem(const std::string &str, - size_t index); - - //! Accessors - const std::string &str() const; - size_t index() const; - size_t typeCount() const; - void typeCount(size_t typeCount); - - //! Increment the type count - void incTypeCount(); - - private: - //! String value of the token - std::string m_Str; - - //! Index of the token - size_t m_Index; - - //! How many types use this token? - size_t m_TypeCount; - }; - - //! Compute equality based on the first element of a pair only - class CSizePairFirstElementEquals - { - public: - CSizePairFirstElementEquals(size_t value); - - //! PAIRTYPE can be any struct with a data member named "first" - //! that can be checked for equality to a size_t - template - bool operator()(const PAIRTYPE &lhs) const - { - return lhs.first == m_Value; - } - - private: - size_t m_Value; - }; - - //! Used to hold the distinct types we compute (vector reallocations are - //! not expensive because CTokenListType is movable) - using TTokenListTypeVec = std::vector; - - //! Tag for the token index - struct SToken - { - }; - - using TTokenMIndex = boost::multi_index::multi_index_container< - CTokenInfoItem, - boost::multi_index::indexed_by< - boost::multi_index::random_access<>, - boost::multi_index::hashed_unique< - boost::multi_index::tag, - BOOST_MULTI_INDEX_CONST_TYPE_CONST_MEM_FUN(CTokenInfoItem, std::string, str) - > - > - >; + //! String value of the token + std::string m_Str; - private: - //! Used by deferred persistence functions - static void acceptPersistInserter(const TTokenMIndex &tokenIdLookup, - const TTokenListTypeVec &types, - core::CStatePersistInserter &inserter); - - //! Given a string containing comma separated pre-tokenised input, add - //! the tokens to the working data structures in the same way as if they - //! had been determined by the tokeniseString() method. The result of - //! the tokenisation is returned in \p tokenIds, \p tokenUniqueIds and - //! \p totalWeight. Any previous content of these variables is wiped. - bool addPretokenisedTokens(const std::string &tokensCsv, - TSizeSizePrVec &tokenIds, - TSizeSizeMap &tokenUniqueIds, - size_t &totalWeight); + //! Index of the token + size_t m_Index; + + //! How many types use this token? + size_t m_TypeCount; + }; + + //! Compute equality based on the first element of a pair only + class CSizePairFirstElementEquals { + public: + CSizePairFirstElementEquals(size_t value); + + //! PAIRTYPE can be any struct with a data member named "first" + //! that can be checked for equality to a size_t + template + bool operator()(const PAIRTYPE& lhs) const { + return lhs.first == m_Value; + } private: - //! Reference to the object we'll use to create reverse searches - const TTokenListReverseSearchCreatorIntfCPtr m_ReverseSearchCreator; + size_t m_Value; + }; + + //! Used to hold the distinct types we compute (vector reallocations are + //! not expensive because CTokenListType is movable) + using TTokenListTypeVec = std::vector; - //! The lower threshold for comparison. If another type matches this - //! closely, we'll take it providing there's no other better match. - double m_LowerThreshold; + //! Tag for the token index + struct SToken {}; - //! The upper threshold for comparison. If another type matches this - //! closely, we accept it immediately (i.e. don't look for a better one). - double m_UpperThreshold; + using TTokenMIndex = boost::multi_index::multi_index_container< + CTokenInfoItem, + boost::multi_index::indexed_by< + boost::multi_index::random_access<>, + boost::multi_index::hashed_unique, + BOOST_MULTI_INDEX_CONST_TYPE_CONST_MEM_FUN(CTokenInfoItem, std::string, str)>>>; - //! Has the data typer's state changed? - bool m_HasChanged; +private: + //! Used by deferred persistence functions + static void + acceptPersistInserter(const TTokenMIndex& tokenIdLookup, const TTokenListTypeVec& types, core::CStatePersistInserter& inserter); - //! The types - TTokenListTypeVec m_Types; + //! Given a string containing comma separated pre-tokenised input, add + //! the tokens to the working data structures in the same way as if they + //! had been determined by the tokeniseString() method. The result of + //! the tokenisation is returned in \p tokenIds, \p tokenUniqueIds and + //! \p totalWeight. Any previous content of these variables is wiped. + bool addPretokenisedTokens(const std::string& tokensCsv, TSizeSizePrVec& tokenIds, TSizeSizeMap& tokenUniqueIds, size_t& totalWeight); - //! List of match count/index into type vector in descending order of - //! match count - TSizeSizePrList m_TypesByCount; +private: + //! Reference to the object we'll use to create reverse searches + const TTokenListReverseSearchCreatorIntfCPtr m_ReverseSearchCreator; - //! Used for looking up tokens to a unique ID - TTokenMIndex m_TokenIdLookup; + //! The lower threshold for comparison. If another type matches this + //! closely, we'll take it providing there's no other better match. + double m_LowerThreshold; - //! Vector to use to build up sequences of token IDs. This is a member - //! to save repeated reallocations for different strings. - TSizeSizePrVec m_WorkTokenIds; + //! The upper threshold for comparison. If another type matches this + //! closely, we accept it immediately (i.e. don't look for a better one). + double m_UpperThreshold; - //! Set to use to build up unique token IDs. This is a member to save - //! repeated reallocations for different strings. - TSizeSizeMap m_WorkTokenUniqueIds; + //! Has the data typer's state changed? + bool m_HasChanged; - //! Used to parse pre-tokenised input supplied as CSV. - CCsvInputParser::CCsvLineParser m_CsvLineParser; + //! The types + TTokenListTypeVec m_Types; + + //! List of match count/index into type vector in descending order of + //! match count + TSizeSizePrList m_TypesByCount; + + //! Used for looking up tokens to a unique ID + TTokenMIndex m_TokenIdLookup; + + //! Vector to use to build up sequences of token IDs. This is a member + //! to save repeated reallocations for different strings. + TSizeSizePrVec m_WorkTokenIds; + + //! Set to use to build up unique token IDs. This is a member to save + //! repeated reallocations for different strings. + TSizeSizeMap m_WorkTokenUniqueIds; + + //! Used to parse pre-tokenised input supplied as CSV. + CCsvInputParser::CCsvLineParser m_CsvLineParser; // For unit testing friend class ::CBaseTokenListDataTyperTest; // For ostream output - friend API_EXPORT std::ostream &operator<<(std::ostream &, - const SIdTranslater &); + friend API_EXPORT std::ostream& operator<<(std::ostream&, const SIdTranslater&); }; - -API_EXPORT std::ostream &operator<<(std::ostream &strm, - const CBaseTokenListDataTyper::SIdTranslater &translator); - - +API_EXPORT std::ostream& operator<<(std::ostream& strm, const CBaseTokenListDataTyper::SIdTranslater& translator); } } #endif // INCLUDED_ml_api_CBaseTokenListDataTyper_h - diff --git a/include/api/CBenchMarker.h b/include/api/CBenchMarker.h index 4c4bacc44f..421c599c53 100644 --- a/include/api/CBenchMarker.h +++ b/include/api/CBenchMarker.h @@ -15,12 +15,8 @@ #include #include - -namespace ml -{ -namespace api -{ - +namespace ml { +namespace api { //! \brief //! Benchmark Ml categorisation using regexes. @@ -36,52 +32,47 @@ namespace api //! to be hard to categorise (even by human being) and are hence //! ignored for benchmarking purposes. //! -class API_EXPORT CBenchMarker -{ - public: - //! A count and and example string - using TSizeStrPr = std::pair; +class API_EXPORT CBenchMarker { +public: + //! A count and and example string + using TSizeStrPr = std::pair; - //! Used for mapping Ml type to count and example - using TIntSizeStrPrMap = std::map; - using TIntSizeStrPrMapItr = TIntSizeStrPrMap::iterator; - using TIntSizeStrPrMapCItr = TIntSizeStrPrMap::const_iterator; + //! Used for mapping Ml type to count and example + using TIntSizeStrPrMap = std::map; + using TIntSizeStrPrMapItr = TIntSizeStrPrMap::iterator; + using TIntSizeStrPrMapCItr = TIntSizeStrPrMap::const_iterator; - //! A regex and its corresponding type count map - using TRegexIntSizeStrPrMapPr = std::pair; + //! A regex and its corresponding type count map + using TRegexIntSizeStrPrMapPr = std::pair; - //! Vector of regexes with corresponding type count maps - using TRegexIntSizeStrPrMapPrVec = std::vector; - using TRegexIntSizeStrPrMapPrVecItr = TRegexIntSizeStrPrMapPrVec::iterator; - using TRegexIntSizeStrPrMapPrVecCItr = TRegexIntSizeStrPrMapPrVec::const_iterator; + //! Vector of regexes with corresponding type count maps + using TRegexIntSizeStrPrMapPrVec = std::vector; + using TRegexIntSizeStrPrMapPrVecItr = TRegexIntSizeStrPrMapPrVec::iterator; + using TRegexIntSizeStrPrMapPrVecCItr = TRegexIntSizeStrPrMapPrVec::const_iterator; - public: - CBenchMarker(); +public: + CBenchMarker(); - //! Initialise from a file - bool init(const std::string ®exFilename); + //! Initialise from a file + bool init(const std::string& regexFilename); - //! Add a message together with the type Ml assigned to it - void addResult(const std::string &message, - int type); + //! Add a message together with the type Ml assigned to it + void addResult(const std::string& message, int type); - void dumpResults() const; + void dumpResults() const; - private: - //! Number of messages passed to the benchmarker - size_t m_TotalMessages; +private: + //! Number of messages passed to the benchmarker + size_t m_TotalMessages; - //! Number of messages that matched one of the regexes, and hence - //! contribute to the scoring - size_t m_ScoredMessages; + //! Number of messages that matched one of the regexes, and hence + //! contribute to the scoring + size_t m_ScoredMessages; - //! The string and tokens we base this type on - TRegexIntSizeStrPrMapPrVec m_Measures; + //! The string and tokens we base this type on + TRegexIntSizeStrPrMapPrVec m_Measures; }; - - } } #endif // INCLUDED_ml_api_CBenchMarker_h - diff --git a/include/api/CCategoryExamplesCollector.h b/include/api/CCategoryExamplesCollector.h index ba59f756d7..7394de2fe6 100644 --- a/include/api/CCategoryExamplesCollector.h +++ b/include/api/CCategoryExamplesCollector.h @@ -16,10 +16,8 @@ #include #include -namespace ml -{ -namespace api -{ +namespace ml { +namespace api { //! \brief //! Collects up to a configurable number of distinct examples per category @@ -30,60 +28,56 @@ namespace api //! small number of expected examples should be more performant than a //! traditional set. //! -class API_EXPORT CCategoryExamplesCollector -{ - public: - using TStrSet = std::set; - using TStrSetCItr = TStrSet::const_iterator; +class API_EXPORT CCategoryExamplesCollector { +public: + using TStrSet = std::set; + using TStrSetCItr = TStrSet::const_iterator; - //! Truncate examples to be no longer than this - static const size_t MAX_EXAMPLE_LENGTH; + //! Truncate examples to be no longer than this + static const size_t MAX_EXAMPLE_LENGTH; - public: - CCategoryExamplesCollector(std::size_t maxExamples); - CCategoryExamplesCollector(std::size_t maxExamples, core::CStateRestoreTraverser &traverser); +public: + CCategoryExamplesCollector(std::size_t maxExamples); + CCategoryExamplesCollector(std::size_t maxExamples, core::CStateRestoreTraverser& traverser); - //! Adds the example to the category if the example is a new - //! distinct example and if there are less than the maximum - //! number of examples for the given category. - //! Returns true if the example was added or false otherwise. - bool add(std::size_t category, const std::string &example); + //! Adds the example to the category if the example is a new + //! distinct example and if there are less than the maximum + //! number of examples for the given category. + //! Returns true if the example was added or false otherwise. + bool add(std::size_t category, const std::string& example); - //! Returns the number of examples currently stored for a given category. - std::size_t numberOfExamplesForCategory(std::size_t category) const; + //! Returns the number of examples currently stored for a given category. + std::size_t numberOfExamplesForCategory(std::size_t category) const; - const TStrSet &examples(std::size_t category) const; + const TStrSet& examples(std::size_t category) const; - //! Persist state by passing information to the supplied inserter - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; + //! Persist state by passing information to the supplied inserter + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; - //! Populate the object from part of a state document - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); + //! Populate the object from part of a state document + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); - //! Clear all examples - void clear(); + //! Clear all examples + void clear(); - private: - using TSizeStrSetUMap = boost::unordered_map; +private: + using TSizeStrSetUMap = boost::unordered_map; - private: - void persistExamples(std::size_t category, - const TStrSet &examples, - core::CStatePersistInserter &inserter) const; - bool restoreExamples(core::CStateRestoreTraverser &traverser); +private: + void persistExamples(std::size_t category, const TStrSet& examples, core::CStatePersistInserter& inserter) const; + bool restoreExamples(core::CStateRestoreTraverser& traverser); - //! Truncate long examples to MAX_EXAMPLE_LENGTH bytes, appending an - //! ellipsis to those that are truncated. - std::string truncateExample(std::string example); + //! Truncate long examples to MAX_EXAMPLE_LENGTH bytes, appending an + //! ellipsis to those that are truncated. + std::string truncateExample(std::string example); - private: - //! The max number of examples that will be collected per category - std::size_t m_MaxExamples; +private: + //! The max number of examples that will be collected per category + std::size_t m_MaxExamples; - //! A map from categories to the set that contains the examples - TSizeStrSetUMap m_ExamplesByCategory; + //! A map from categories to the set that contains the examples + TSizeStrSetUMap m_ExamplesByCategory; }; - } } diff --git a/include/api/CCmdSkeleton.h b/include/api/CCmdSkeleton.h index eddd5daae4..4b53d5de5d 100644 --- a/include/api/CCmdSkeleton.h +++ b/include/api/CCmdSkeleton.h @@ -12,16 +12,12 @@ #include - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CDataAdder; class CDataSearcher; } -namespace api -{ +namespace api { class CDataProcessor; class CInputParser; @@ -36,39 +32,32 @@ class CInputParser; //! this class, which, in practice, means that the CIoManager object managing //! them must outlive this object. //! -class API_EXPORT CCmdSkeleton : private core::CNonCopyable -{ - public: - CCmdSkeleton(core::CDataSearcher *restoreSearcher, - core::CDataAdder *persister, - CInputParser &inputParser, - CDataProcessor &processor); +class API_EXPORT CCmdSkeleton : private core::CNonCopyable { +public: + CCmdSkeleton(core::CDataSearcher* restoreSearcher, core::CDataAdder* persister, CInputParser& inputParser, CDataProcessor& processor); - //! Pass input to the processor until it's consumed as much as it can. - bool ioLoop(); + //! Pass input to the processor until it's consumed as much as it can. + bool ioLoop(); - private: - //! Persists the state of the models - bool persistState(); +private: + //! Persists the state of the models + bool persistState(); - private: - //! NULL if state restoration is not required. - core::CDataSearcher *m_RestoreSearcher; +private: + //! NULL if state restoration is not required. + core::CDataSearcher* m_RestoreSearcher; - //! NULL if state persistence is not required. - core::CDataAdder *m_Persister; + //! NULL if state persistence is not required. + core::CDataAdder* m_Persister; - //! Input data parser. - CInputParser &m_InputParser; + //! Input data parser. + CInputParser& m_InputParser; - //! Reference to the object that's going to do the command-specific - //! processing of the data. - CDataProcessor &m_Processor; + //! Reference to the object that's going to do the command-specific + //! processing of the data. + CDataProcessor& m_Processor; }; - - } } #endif // INCLUDED_ml_api_CCmdSkeleton_h - diff --git a/include/api/CConfigUpdater.h b/include/api/CConfigUpdater.h index 56f3c9c4e2..c3a4573983 100644 --- a/include/api/CConfigUpdater.h +++ b/include/api/CConfigUpdater.h @@ -13,10 +13,8 @@ #include -namespace ml -{ -namespace api -{ +namespace ml { +namespace api { //! \brief //! Parse a configuration and apply requested configuration updates. @@ -34,26 +32,25 @@ namespace api //! changes as possible even if it fails on a particular //! change (e.g. unknown stanza name). //! -class API_EXPORT CConfigUpdater -{ - public: - CConfigUpdater(CFieldConfig &fieldConfig, model::CAnomalyDetectorModelConfig &modelConfig); - - //! Update from given config changes - //! \param config the requested changes in an ini syntax - bool update(const std::string &config); - - private: - static const std::string MODEL_DEBUG_CONFIG; - static const std::string DETECTOR_RULES; - static const std::string DETECTOR_INDEX; - static const std::string RULES_JSON; - static const std::string FILTERS; - static const std::string SCHEDULED_EVENTS; - - private: - CFieldConfig &m_FieldConfig; - model::CAnomalyDetectorModelConfig &m_ModelConfig; +class API_EXPORT CConfigUpdater { +public: + CConfigUpdater(CFieldConfig& fieldConfig, model::CAnomalyDetectorModelConfig& modelConfig); + + //! Update from given config changes + //! \param config the requested changes in an ini syntax + bool update(const std::string& config); + +private: + static const std::string MODEL_DEBUG_CONFIG; + static const std::string DETECTOR_RULES; + static const std::string DETECTOR_INDEX; + static const std::string RULES_JSON; + static const std::string FILTERS; + static const std::string SCHEDULED_EVENTS; + +private: + CFieldConfig& m_FieldConfig; + model::CAnomalyDetectorModelConfig& m_ModelConfig; }; } } diff --git a/include/api/CCsvInputParser.h b/include/api/CCsvInputParser.h index 1420c36525..8e040281c4 100644 --- a/include/api/CCsvInputParser.h +++ b/include/api/CCsvInputParser.h @@ -15,11 +15,8 @@ #include #include - -namespace ml -{ -namespace api -{ +namespace ml { +namespace api { //! \brief //! Parse the CSV formatted input data @@ -50,169 +47,163 @@ namespace api //! up, it expects fields containing quotes to be quoted, whereas Excel format //! only quotes fields that contain commas or new lines //! -class API_EXPORT CCsvInputParser : public CInputParser -{ - public: - //! Default CSV separator - static const char COMMA; +class API_EXPORT CCsvInputParser : public CInputParser { +public: + //! Default CSV separator + static const char COMMA; - //! CSV quote character - static const char QUOTE; + //! CSV quote character + static const char QUOTE; - //! CSV record end character - static const char RECORD_END; + //! CSV record end character + static const char RECORD_END; - //! Character to ignore at the end of lines - static const char STRIP_BEFORE_END; + //! Character to ignore at the end of lines + static const char STRIP_BEFORE_END; - private: - using TScopedCharArray = boost::scoped_array; +private: + using TScopedCharArray = boost::scoped_array; +public: + //! A class for parsing individual lines of CSV data. + //! Used in the implementation of the overall CSV input + //! parser, but also publicly available for use in other + //! situations. + class API_EXPORT CCsvLineParser { public: - //! A class for parsing individual lines of CSV data. - //! Used in the implementation of the overall CSV input - //! parser, but also publicly available for use in other - //! situations. - class API_EXPORT CCsvLineParser - { - public: - //! Construct, optionally supplying a non-standard separator. - //! The string to be parsed must be supplied by calling the - //! reset() method. - CCsvLineParser(char separator = COMMA); - - //! Supply a new CSV string to be parsed. - void reset(const std::string &line); - - //! Parse the next token from the current line. - bool parseNext(std::string &value); - - //! Are we at the end of the current line? - bool atEnd() const; - - private: - //! Attempt to parse the next token from the working record - //! into the working field. - bool parseNextToken(const char *end, const char *¤t); - - private: - //! Input field separator by default this is ',' but can be - //! overridden in the constructor. - const char m_Separator; - - //! Did the separator character appear after the last CSV field - //! we parsed? - bool m_SeparatorAfterLastField; - - //! The line to be parsed. Held as a pointer that must outlive - //! use of this class to avoid copying. - const std::string *m_Line; - - //! Pointers to the current position and end of the line being - //! parsed. - const char *m_LineCurrent; - const char *m_LineEnd; - - //! The working field is a raw character array rather than a - //! string because it is built up one character at a time, and - //! when you append a character to a string the following - //! character has to be set to the zero terminator. The array - //! of characters is NOT zero terminated and hence avoids this - //! overhead. This is something to be aware of when accessing - //! it, but improves performance of the parsing by about 20%. - //! The character array is always big enough to hold the entire - //! current row string such that the code that pulls out - //! individual fields doesn't need to check the capacity - even - //! if the current row has just one field, the working field - //! array will be big enough to hold it. - TScopedCharArray m_WorkField; - char *m_WorkFieldEnd; - size_t m_WorkFieldCapacity; - }; + //! Construct, optionally supplying a non-standard separator. + //! The string to be parsed must be supplied by calling the + //! reset() method. + CCsvLineParser(char separator = COMMA); - public: - //! Construct with a string to be parsed - CCsvInputParser(const std::string &input, - char separator = COMMA); - - //! Construct with an input stream to be parsed. Once a stream is - //! passed to this constructor, no other object should read from it. - //! For example, if std::cin is passed, no other object should read from - //! std::cin, otherwise unpredictable and incorrect results will be - //! generated. - CCsvInputParser(std::istream &strmIn, - char separator = COMMA); - - //! Get field name row exactly as it was in the input - const std::string &fieldNameStr() const; - - //! Read records from the stream. The supplied reader function is called - //! once per record. If the supplied reader function returns false, - //! reading will stop. This method keeps reading until it reaches the - //! end of the stream or an error occurs. If it successfully reaches - //! the end of the stream it returns true, otherwise it returns false. - virtual bool readStream(const TReaderFunc &readerFunc); + //! Supply a new CSV string to be parsed. + void reset(const std::string& line); - private: - //! Attempt to parse a single CSV record from the stream into the - //! working record. The CSV is assumed to be in the Excel style. - bool parseCsvRecordFromStream(); + //! Parse the next token from the current line. + bool parseNext(std::string& value); - //! Attempt to parse the field names from the working record. - bool parseFieldNames(); + //! Are we at the end of the current line? + bool atEnd() const; - //! Attempt to parse the current working record into data fields. - bool parseDataRecord(const TStrRefVec &fieldValRefs); - - //! Wrapper around std::getline() that removes carriage returns - //! preceding the linefeed that breaks the line. This means that we - //! never get confused by carriage returns in field values, whether - //! we're running on Unix or Windows. - std::istream &getline(std::string &str); + private: + //! Attempt to parse the next token from the working record + //! into the working field. + bool parseNextToken(const char* end, const char*& current); private: - //! Allocate this much memory for the working buffer - static const size_t WORK_BUFFER_SIZE; - - //! If we've been initialised with a string, this object is used to read - //! the string - std::istringstream m_StringInputBuf; - - //! Reference to the stream we're going to read from - std::istream &m_StrmIn; - - //! Hold this as a member, so that its capacity adjusts to a reasonable - //! size for the input rather than repeatedly having to allocate new - //! string buffers. - std::string m_CurrentRowStr; - - //! Similar to the current row string, the working buffer is also held - //! as a member to avoid constantly reallocating it. However, the - //! working buffer is a raw character array rather than a string to - //! facilitate the use of std::istream::read() to obtain input rather - //! than std::getline(). std::getline() is efficient in the GNU STL but - //! sadly not in the Microsoft or Apache STLs, where it copies one - //! character at a time. std::istream::read() uses memcpy() to shuffle - //! data around on all platforms, and is hence an order of magnitude - //! faster. (This is the sort of optimisation to be used ONLY after - //! careful profiling in the rare cases where the reduction in code - //! clarity yields a large performance benefit.) The array of - //! characters is NOT zero terminated, which is something to be aware of - //! when accessing it. - TScopedCharArray m_WorkBuffer; - const char *m_WorkBufferPtr; - const char *m_WorkBufferEnd; - bool m_NoMoreRecords; - - //! Field name row exactly as it appears in the input - std::string m_FieldNameStr; - - //! Parser used to parse the individual lines - CCsvLineParser m_LineParser; + //! Input field separator by default this is ',' but can be + //! overridden in the constructor. + const char m_Separator; + + //! Did the separator character appear after the last CSV field + //! we parsed? + bool m_SeparatorAfterLastField; + + //! The line to be parsed. Held as a pointer that must outlive + //! use of this class to avoid copying. + const std::string* m_Line; + + //! Pointers to the current position and end of the line being + //! parsed. + const char* m_LineCurrent; + const char* m_LineEnd; + + //! The working field is a raw character array rather than a + //! string because it is built up one character at a time, and + //! when you append a character to a string the following + //! character has to be set to the zero terminator. The array + //! of characters is NOT zero terminated and hence avoids this + //! overhead. This is something to be aware of when accessing + //! it, but improves performance of the parsing by about 20%. + //! The character array is always big enough to hold the entire + //! current row string such that the code that pulls out + //! individual fields doesn't need to check the capacity - even + //! if the current row has just one field, the working field + //! array will be big enough to hold it. + TScopedCharArray m_WorkField; + char* m_WorkFieldEnd; + size_t m_WorkFieldCapacity; + }; + +public: + //! Construct with a string to be parsed + CCsvInputParser(const std::string& input, char separator = COMMA); + + //! Construct with an input stream to be parsed. Once a stream is + //! passed to this constructor, no other object should read from it. + //! For example, if std::cin is passed, no other object should read from + //! std::cin, otherwise unpredictable and incorrect results will be + //! generated. + CCsvInputParser(std::istream& strmIn, char separator = COMMA); + + //! Get field name row exactly as it was in the input + const std::string& fieldNameStr() const; + + //! Read records from the stream. The supplied reader function is called + //! once per record. If the supplied reader function returns false, + //! reading will stop. This method keeps reading until it reaches the + //! end of the stream or an error occurs. If it successfully reaches + //! the end of the stream it returns true, otherwise it returns false. + virtual bool readStream(const TReaderFunc& readerFunc); + +private: + //! Attempt to parse a single CSV record from the stream into the + //! working record. The CSV is assumed to be in the Excel style. + bool parseCsvRecordFromStream(); + + //! Attempt to parse the field names from the working record. + bool parseFieldNames(); + + //! Attempt to parse the current working record into data fields. + bool parseDataRecord(const TStrRefVec& fieldValRefs); + + //! Wrapper around std::getline() that removes carriage returns + //! preceding the linefeed that breaks the line. This means that we + //! never get confused by carriage returns in field values, whether + //! we're running on Unix or Windows. + std::istream& getline(std::string& str); + +private: + //! Allocate this much memory for the working buffer + static const size_t WORK_BUFFER_SIZE; + + //! If we've been initialised with a string, this object is used to read + //! the string + std::istringstream m_StringInputBuf; + + //! Reference to the stream we're going to read from + std::istream& m_StrmIn; + + //! Hold this as a member, so that its capacity adjusts to a reasonable + //! size for the input rather than repeatedly having to allocate new + //! string buffers. + std::string m_CurrentRowStr; + + //! Similar to the current row string, the working buffer is also held + //! as a member to avoid constantly reallocating it. However, the + //! working buffer is a raw character array rather than a string to + //! facilitate the use of std::istream::read() to obtain input rather + //! than std::getline(). std::getline() is efficient in the GNU STL but + //! sadly not in the Microsoft or Apache STLs, where it copies one + //! character at a time. std::istream::read() uses memcpy() to shuffle + //! data around on all platforms, and is hence an order of magnitude + //! faster. (This is the sort of optimisation to be used ONLY after + //! careful profiling in the rare cases where the reduction in code + //! clarity yields a large performance benefit.) The array of + //! characters is NOT zero terminated, which is something to be aware of + //! when accessing it. + TScopedCharArray m_WorkBuffer; + const char* m_WorkBufferPtr; + const char* m_WorkBufferEnd; + bool m_NoMoreRecords; + + //! Field name row exactly as it appears in the input + std::string m_FieldNameStr; + + //! Parser used to parse the individual lines + CCsvLineParser m_LineParser; }; - } } #endif // INCLUDED_ml_api_CCsvInputParser_h - diff --git a/include/api/CCsvOutputWriter.h b/include/api/CCsvOutputWriter.h index 4dc6048564..659bf00c12 100644 --- a/include/api/CCsvOutputWriter.h +++ b/include/api/CCsvOutputWriter.h @@ -15,11 +15,8 @@ #include #include - -namespace ml -{ -namespace api -{ +namespace ml { +namespace api { //! \brief //! Write output data in CSV format @@ -44,117 +41,108 @@ namespace api //! It is not acceptable to have the separator character be the same as the //! escape character, the quote character or the record end character. //! -class API_EXPORT CCsvOutputWriter : public COutputHandler -{ - public: - //! CSV separator - static const char COMMA; - - //! CSV quote character - static const char QUOTE; - - //! CSV record end character - static const char RECORD_END; - - public: - //! Constructor that causes output to be written to the internal string - //! stream - CCsvOutputWriter(bool outputMessages = false, - bool outputHeader = true, - char escape = QUOTE, - char separator = COMMA); - - //! Constructor that causes output to be written to the specified stream - CCsvOutputWriter(std::ostream &strmOut, - bool outputMessages = false, - bool outputHeader = true, - char escape = QUOTE, - char separator = COMMA); - - //! Destructor flushes the stream - virtual ~CCsvOutputWriter(); - - //! Set field names, adding extra field names if they're not already - //! present - this is only allowed once - virtual bool fieldNames(const TStrVec &fieldNames, - const TStrVec &extraFieldNames); - - //! Get field names - virtual const TStrVec &fieldNames() const; - - // Bring the other overload of fieldNames() into scope - using COutputHandler::fieldNames; - - //! Write a row to the stream, optionally overriding some of the - //! original field values. Where the same field is present in both - //! overrideDataRowFields and dataRowFields, the value in - //! overrideDataRowFields will be written. - virtual bool writeRow(const TStrStrUMap &dataRowFields, - const TStrStrUMap &overrideDataRowFields); - - // Bring the other overload of writeRow() into scope - using COutputHandler::writeRow; - - //! Get the contents of the internal string stream - for use with the - //! zero argument constructor - std::string internalString() const; - - protected: - //! Output stream accessor - std::ostream &outputStream(); - - private: - //! Append a field to the work record, quoting it if required, and - //! escaping embedded quotes - void appendField(const std::string &field); - - private: - //! If we've been initialised without a specific stream, output is - //! written to this string stream - std::ostringstream m_StringOutputBuf; - - //! Reference to the stream we're going to write to - std::ostream &m_StrmOut; - - //! Should we output a messages section before the CSV? - bool m_OutputMessages; - - //! Should we output a row containing the CSV column names? - bool m_OutputHeader; - - //! CSV field names in the order they are to be written to the output - TStrVec m_FieldNames; - - //! Pre-computed hashes for each field name. The pre-computed hashes - //! are at the same index in this vector as the corresponding field name - //! in the m_FieldNames vector. - TPreComputedHashVec m_Hashes; - - //! Used to build up output records before writing them to the output - //! stream, so that invalid write requests can have no effect on the - //! output stream. Held as a member so that the capacity adjusts to - //! an appropriate level, avoiding regular memory allocations. - std::string m_WorkRecord; - - using TStrStrPr = std::pair; - using TStrStrPrSet = std::set; - using TStrStrPrSetCItr = TStrStrPrSet::const_iterator; - - //! Messages to be printed before the next lot of output - TStrStrPrSet m_Messages; - - //! Character to use for escaping quotes (const to allow compiler - //! optimisations, since the value can't be changed after construction) - const char m_Escape; - - //! Output field separator by default this is ',' but can be - //! overridden in the constructor - const char m_Separator; +class API_EXPORT CCsvOutputWriter : public COutputHandler { +public: + //! CSV separator + static const char COMMA; + + //! CSV quote character + static const char QUOTE; + + //! CSV record end character + static const char RECORD_END; + +public: + //! Constructor that causes output to be written to the internal string + //! stream + CCsvOutputWriter(bool outputMessages = false, bool outputHeader = true, char escape = QUOTE, char separator = COMMA); + + //! Constructor that causes output to be written to the specified stream + CCsvOutputWriter(std::ostream& strmOut, + bool outputMessages = false, + bool outputHeader = true, + char escape = QUOTE, + char separator = COMMA); + + //! Destructor flushes the stream + virtual ~CCsvOutputWriter(); + + //! Set field names, adding extra field names if they're not already + //! present - this is only allowed once + virtual bool fieldNames(const TStrVec& fieldNames, const TStrVec& extraFieldNames); + + //! Get field names + virtual const TStrVec& fieldNames() const; + + // Bring the other overload of fieldNames() into scope + using COutputHandler::fieldNames; + + //! Write a row to the stream, optionally overriding some of the + //! original field values. Where the same field is present in both + //! overrideDataRowFields and dataRowFields, the value in + //! overrideDataRowFields will be written. + virtual bool writeRow(const TStrStrUMap& dataRowFields, const TStrStrUMap& overrideDataRowFields); + + // Bring the other overload of writeRow() into scope + using COutputHandler::writeRow; + + //! Get the contents of the internal string stream - for use with the + //! zero argument constructor + std::string internalString() const; + +protected: + //! Output stream accessor + std::ostream& outputStream(); + +private: + //! Append a field to the work record, quoting it if required, and + //! escaping embedded quotes + void appendField(const std::string& field); + +private: + //! If we've been initialised without a specific stream, output is + //! written to this string stream + std::ostringstream m_StringOutputBuf; + + //! Reference to the stream we're going to write to + std::ostream& m_StrmOut; + + //! Should we output a messages section before the CSV? + bool m_OutputMessages; + + //! Should we output a row containing the CSV column names? + bool m_OutputHeader; + + //! CSV field names in the order they are to be written to the output + TStrVec m_FieldNames; + + //! Pre-computed hashes for each field name. The pre-computed hashes + //! are at the same index in this vector as the corresponding field name + //! in the m_FieldNames vector. + TPreComputedHashVec m_Hashes; + + //! Used to build up output records before writing them to the output + //! stream, so that invalid write requests can have no effect on the + //! output stream. Held as a member so that the capacity adjusts to + //! an appropriate level, avoiding regular memory allocations. + std::string m_WorkRecord; + + using TStrStrPr = std::pair; + using TStrStrPrSet = std::set; + using TStrStrPrSetCItr = TStrStrPrSet::const_iterator; + + //! Messages to be printed before the next lot of output + TStrStrPrSet m_Messages; + + //! Character to use for escaping quotes (const to allow compiler + //! optimisations, since the value can't be changed after construction) + const char m_Escape; + + //! Output field separator by default this is ',' but can be + //! overridden in the constructor + const char m_Separator; }; - - } } #endif // INCLUDED_ml_api_CCsvOutputWriter_h - diff --git a/include/api/CDataProcessor.h b/include/api/CDataProcessor.h index d7876d88cd..2d909fd853 100644 --- a/include/api/CDataProcessor.h +++ b/include/api/CDataProcessor.h @@ -18,17 +18,13 @@ #include - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CDataAdder; class CDataSearcher; } -namespace api -{ +namespace api { class CBackgroundPersister; class COutputHandler; @@ -42,59 +38,54 @@ class COutputHandler; //! //! IMPLEMENTATION DECISIONS:\n //! -class API_EXPORT CDataProcessor : private core::CNonCopyable -{ - public: - static const char CONTROL_FIELD_NAME_CHAR = '.'; - static const std::string CONTROL_FIELD_NAME; +class API_EXPORT CDataProcessor : private core::CNonCopyable { +public: + static const char CONTROL_FIELD_NAME_CHAR = '.'; + static const std::string CONTROL_FIELD_NAME; - public: - using TStrVec = std::vector; - using TStrVecItr = TStrVec::iterator; - using TStrVecCItr = TStrVec::const_iterator; +public: + using TStrVec = std::vector; + using TStrVecItr = TStrVec::iterator; + using TStrVecCItr = TStrVec::const_iterator; - using TStrStrUMap = boost::unordered_map; - using TStrStrUMapItr = TStrStrUMap::iterator; - using TStrStrUMapCItr = TStrStrUMap::const_iterator; + using TStrStrUMap = boost::unordered_map; + using TStrStrUMapItr = TStrStrUMap::iterator; + using TStrStrUMapCItr = TStrStrUMap::const_iterator; - public: - CDataProcessor(); - virtual ~CDataProcessor(); +public: + CDataProcessor(); + virtual ~CDataProcessor(); - //! We're going to be writing to a new output stream - virtual void newOutputStream() = 0; + //! We're going to be writing to a new output stream + virtual void newOutputStream() = 0; - //! Receive a single record to be processed, and produce output - //! with any required modifications - virtual bool handleRecord(const TStrStrUMap &dataRowFields) = 0; + //! Receive a single record to be processed, and produce output + //! with any required modifications + virtual bool handleRecord(const TStrStrUMap& dataRowFields) = 0; - //! Perform any final processing once all input data has been seen. - virtual void finalise() = 0; + //! Perform any final processing once all input data has been seen. + virtual void finalise() = 0; - //! Restore previously saved state - virtual bool restoreState(core::CDataSearcher &restoreSearcher, - core_t::TTime &completeToTime) = 0; + //! Restore previously saved state + virtual bool restoreState(core::CDataSearcher& restoreSearcher, core_t::TTime& completeToTime) = 0; - //! Persist current state - virtual bool persistState(core::CDataAdder &persister) = 0; + //! Persist current state + virtual bool persistState(core::CDataAdder& persister) = 0; - //! Persist current state due to the periodic persistence being triggered. - virtual bool periodicPersistState(CBackgroundPersister &persister); + //! Persist current state due to the periodic persistence being triggered. + virtual bool periodicPersistState(CBackgroundPersister& persister); - //! How many records did we handle? - virtual uint64_t numRecordsHandled() const = 0; + //! How many records did we handle? + virtual uint64_t numRecordsHandled() const = 0; - //! Access the output handler - virtual COutputHandler &outputHandler() = 0; + //! Access the output handler + virtual COutputHandler& outputHandler() = 0; - //! Create debug for a record. This is expensive so should NOT be - //! called for every record as a matter of course. - static std::string debugPrintRecord(const TStrStrUMap &dataRowFields); + //! Create debug for a record. This is expensive so should NOT be + //! called for every record as a matter of course. + static std::string debugPrintRecord(const TStrStrUMap& dataRowFields); }; - - } } #endif // INCLUDED_ml_api_CDataProcessor_h - diff --git a/include/api/CDataTyper.h b/include/api/CDataTyper.h index 744d295b9b..a16b151860 100644 --- a/include/api/CDataTyper.h +++ b/include/api/CDataTyper.h @@ -16,16 +16,12 @@ #include #include - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace api -{ +namespace api { //! \brief //! Interface for classes that convert a raw event string to a type. @@ -40,88 +36,75 @@ namespace api //! there are specialist data typers for XML, JSON or delimited files, //! so it is good to have an abstract interface that they can all use. //! -class API_EXPORT CDataTyper -{ - public: - //! Used for storing distinct token IDs - using TStrStrUMap = boost::unordered_map; - using TStrStrUMapCItr = TStrStrUMap::const_iterator; - - //! Shared pointer to an instance of this class - using TDataTyperP = boost::shared_ptr; - - //! Shared pointer to an instance of this class - using TPersistFunc = std::function; - - public: - CDataTyper(const std::string &fieldName); - - //! Virtual destructor for an abstract base class - virtual ~CDataTyper(); - - //! Dump stats - virtual void dumpStats() const = 0; - - //! Compute a type from a string. The raw string length may be longer - //! than the length of the passed string, because the passed string may - //! have the date stripped out of it. - int computeType(bool isDryRun, - const std::string &str, - size_t rawStringLen); - - //! As above, but also take into account field names/values. - virtual int computeType(bool isDryRun, - const TStrStrUMap &fields, - const std::string &str, - size_t rawStringLen) = 0; - - //! Create reverse search commands that will (more or less) just - //! select the records that are classified as the given type when - //! combined with the original search. Note that the reverse search is - //! only approximate - it may select more records than have actually - //! been classified as the returned type. - virtual bool createReverseSearch(int type, - std::string &part1, - std::string &part2, - size_t &maxMatchingLength, - bool &wasCached) = 0; - - //! Has the data typer's state changed? - virtual bool hasChanged() const = 0; - - //! Populate the object from part of a state document - virtual bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) = 0; - - //! Persist state by passing information to the supplied inserter - virtual void acceptPersistInserter(core::CStatePersistInserter &inserter) const = 0; - - //! Make a function that can be called later to persist state - virtual TPersistFunc makePersistFunc() const = 0; - - //! Access to the field name - const std::string &fieldName() const; - - //! Access to last persistence time - core_t::TTime lastPersistTime() const; - - //! Set last persistence time - void lastPersistTime(core_t::TTime lastPersistTime); - - protected: - //! Used if no fields are supplied to the computeType() method. - static const TStrStrUMap EMPTY_FIELDS; - - private: - //! Which field name are we working on? - std::string m_FieldName; - - //! When was data last persisted for this typer? (0 means never.) - core_t::TTime m_LastPersistTime; -}; +class API_EXPORT CDataTyper { +public: + //! Used for storing distinct token IDs + using TStrStrUMap = boost::unordered_map; + using TStrStrUMapCItr = TStrStrUMap::const_iterator; + + //! Shared pointer to an instance of this class + using TDataTyperP = boost::shared_ptr; + + //! Shared pointer to an instance of this class + using TPersistFunc = std::function; + +public: + CDataTyper(const std::string& fieldName); + + //! Virtual destructor for an abstract base class + virtual ~CDataTyper(); + + //! Dump stats + virtual void dumpStats() const = 0; + + //! Compute a type from a string. The raw string length may be longer + //! than the length of the passed string, because the passed string may + //! have the date stripped out of it. + int computeType(bool isDryRun, const std::string& str, size_t rawStringLen); + + //! As above, but also take into account field names/values. + virtual int computeType(bool isDryRun, const TStrStrUMap& fields, const std::string& str, size_t rawStringLen) = 0; + + //! Create reverse search commands that will (more or less) just + //! select the records that are classified as the given type when + //! combined with the original search. Note that the reverse search is + //! only approximate - it may select more records than have actually + //! been classified as the returned type. + virtual bool createReverseSearch(int type, std::string& part1, std::string& part2, size_t& maxMatchingLength, bool& wasCached) = 0; + //! Has the data typer's state changed? + virtual bool hasChanged() const = 0; + //! Populate the object from part of a state document + virtual bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) = 0; + + //! Persist state by passing information to the supplied inserter + virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const = 0; + + //! Make a function that can be called later to persist state + virtual TPersistFunc makePersistFunc() const = 0; + + //! Access to the field name + const std::string& fieldName() const; + + //! Access to last persistence time + core_t::TTime lastPersistTime() const; + + //! Set last persistence time + void lastPersistTime(core_t::TTime lastPersistTime); + +protected: + //! Used if no fields are supplied to the computeType() method. + static const TStrStrUMap EMPTY_FIELDS; + +private: + //! Which field name are we working on? + std::string m_FieldName; + + //! When was data last persisted for this typer? (0 means never.) + core_t::TTime m_LastPersistTime; +}; } } #endif // INCLUDED_ml_api_CDataTyper_h - diff --git a/include/api/CDetectionRulesJsonParser.h b/include/api/CDetectionRulesJsonParser.h index 4b8cd2cf46..28092c5154 100644 --- a/include/api/CDetectionRulesJsonParser.h +++ b/include/api/CDetectionRulesJsonParser.h @@ -20,49 +20,39 @@ #include #include -namespace ml -{ -namespace api -{ +namespace ml { +namespace api { //! \brief A parser to convert JSON detection rules into objects -class API_EXPORT CDetectionRulesJsonParser -{ - public: - using TDetectionRuleVec = std::vector; - using TStrPatternSetUMap = boost::unordered_map; - - public: - //! Default constructor - CDetectionRulesJsonParser(TStrPatternSetUMap &filtersByIdMap); - - //! Parses a string expected to contain a JSON array with - //! detection rules and adds the rule objects into the given vector. - bool parseRules(const std::string &json, TDetectionRuleVec &rules); - - private: - bool parseRuleConditions(const rapidjson::Value &ruleObject, - model::CDetectionRule &rule); - bool parseFilterId(const rapidjson::Value &conditionObject, - model::CRuleCondition &ruleCondition); - - static bool hasStringMember(const rapidjson::Value &object, const std::string &name); - static bool hasArrayMember(const rapidjson::Value &object, const std::string &name); - static bool parseRuleActions(const rapidjson::Value &ruleObject, model::CDetectionRule &rule); - static bool parseConditionsConnective(const rapidjson::Value &ruleObject, - model::CDetectionRule &rule); - static bool parseRuleConditionType(const rapidjson::Value &ruleConditionObject, - model::CRuleCondition &ruleCondition); - static bool parseCondition(const rapidjson::Value &ruleConditionObject, - model::CRuleCondition &ruleCondition); - static bool parseConditionOperator(const rapidjson::Value &conditionObject, - model::CRuleCondition &ruleCondition); - static bool parseConditionThreshold(const rapidjson::Value &conditionObject, - model::CRuleCondition &ruleCondition); - - private: - //! The filters per id used by categorical rule conditions. - TStrPatternSetUMap &m_FiltersByIdMap; +class API_EXPORT CDetectionRulesJsonParser { +public: + using TDetectionRuleVec = std::vector; + using TStrPatternSetUMap = boost::unordered_map; + +public: + //! Default constructor + CDetectionRulesJsonParser(TStrPatternSetUMap& filtersByIdMap); + + //! Parses a string expected to contain a JSON array with + //! detection rules and adds the rule objects into the given vector. + bool parseRules(const std::string& json, TDetectionRuleVec& rules); + +private: + bool parseRuleConditions(const rapidjson::Value& ruleObject, model::CDetectionRule& rule); + bool parseFilterId(const rapidjson::Value& conditionObject, model::CRuleCondition& ruleCondition); + + static bool hasStringMember(const rapidjson::Value& object, const std::string& name); + static bool hasArrayMember(const rapidjson::Value& object, const std::string& name); + static bool parseRuleActions(const rapidjson::Value& ruleObject, model::CDetectionRule& rule); + static bool parseConditionsConnective(const rapidjson::Value& ruleObject, model::CDetectionRule& rule); + static bool parseRuleConditionType(const rapidjson::Value& ruleConditionObject, model::CRuleCondition& ruleCondition); + static bool parseCondition(const rapidjson::Value& ruleConditionObject, model::CRuleCondition& ruleCondition); + static bool parseConditionOperator(const rapidjson::Value& conditionObject, model::CRuleCondition& ruleCondition); + static bool parseConditionThreshold(const rapidjson::Value& conditionObject, model::CRuleCondition& ruleCondition); + +private: + //! The filters per id used by categorical rule conditions. + TStrPatternSetUMap& m_FiltersByIdMap; }; } } diff --git a/include/api/CFieldConfig.h b/include/api/CFieldConfig.h index 22b8c68fa3..2e0372f9f5 100644 --- a/include/api/CFieldConfig.h +++ b/include/api/CFieldConfig.h @@ -26,12 +26,8 @@ class CFieldConfigTest; - -namespace ml -{ -namespace api -{ - +namespace ml { +namespace api { //! \brief //! Holds field configuration options. @@ -80,550 +76,515 @@ namespace api //! models. For population models the "by field" is referred to as //! the "attribute field" in model library code. //! -class API_EXPORT CFieldConfig -{ - public: - //! Prefix for detector settings - static const std::string DETECTOR_PREFIX; - - //! Prefix for categorization filter settings - static const std::string CATEGORIZATION_FILTER_PREFIX; - - //! Prefix for influencer settings - static const std::string INFLUENCER_PREFIX; - - //! Prefix for filter setting - static const std::string FILTER_PREFIX; - - //! Prefix for scheduled events - static const std::string SCHEDULED_EVENT_PREFIX; - - //! Suffix for clause settings - static const std::string CLAUSE_SUFFIX; - - //! Suffix for description settings - static const std::string DESCRIPTION_SUFFIX; - - //! Suffix for detector rules - static const std::string RULES_SUFFIX; - - //! Name of the "categorizationfield" option - static const std::string CATEGORIZATION_FIELD_OPTION; - - //! Name of the "summarycountfield" option - static const std::string SUMMARY_COUNT_FIELD_OPTION; - - //! Character to look for to distinguish setting names - static const char SUFFIX_SEPARATOR; - - //! Character to look for to split field names out of complete config keys - static const char FIELDNAME_SEPARATOR; - - //! Suffix applied to field names for the setting that indicates whether - //! they're enabled - static const std::string IS_ENABLED_SUFFIX; - - //! Suffix applied to field names for the setting that indicates whether - //! they're metrics - static const std::string BY_SUFFIX; - - //! Suffix applied to field names for the setting that indicates whether - //! they're metrics - static const std::string OVER_SUFFIX; - - //! Suffix applied to field names for the setting that indicates whether - //! they're metrics - static const std::string PARTITION_SUFFIX; - - //! Option to look for in the command line clause to indicate that the - //! "partitionfield" parameter is specified (case-insensitive) - static const std::string PARTITION_FIELD_OPTION; - - //! Suffix applied to field names for the setting that indicates whether - //! empty/missing values of the "by" field should be ignored - static const std::string USE_NULL_SUFFIX; - - //! Option to look for in the command line clause to indicate that the - //! "usenull" parameter is specified (case-insensitive) - static const std::string USE_NULL_OPTION; - - //! Token to look for in the command line clause to indicate that the - //! "by" field follows - static const std::string BY_TOKEN; - - //! Token to look for in the command line clause to indicate that the - //! "over" field follows - static const std::string OVER_TOKEN; - - //! Magic field name used to indicate that event rate should be - //! analysed rather than a field value - static const std::string COUNT_NAME; - - //! A default string value that our string utilities will convert to - //! boolean false - static const std::string FALSE_VALUE; - - //! A default string value that our string utilities will convert to - //! boolean true - static const std::string TRUE_VALUE; - - //! Token to look in the config file to indicate a list of field - //! names that are used to indicate an influence pivot relationship - static const std::string INFLUENCER_FIELD_NAMES_OPTION; - - //! Option to specify an influencer field - static const std::string INFLUENCER_FIELD_OPTION; - - //! Strings that define the type of analysis to run - static const std::string FUNCTION_COUNT; - static const std::string FUNCTION_COUNT_ABBREV; - static const std::string FUNCTION_LOW_COUNT; - static const std::string FUNCTION_LOW_COUNT_ABBREV; - static const std::string FUNCTION_HIGH_COUNT; - static const std::string FUNCTION_HIGH_COUNT_ABBREV; - static const std::string FUNCTION_DISTINCT_COUNT; - static const std::string FUNCTION_DISTINCT_COUNT_ABBREV; - static const std::string FUNCTION_LOW_DISTINCT_COUNT; - static const std::string FUNCTION_LOW_DISTINCT_COUNT_ABBREV; - static const std::string FUNCTION_HIGH_DISTINCT_COUNT; - static const std::string FUNCTION_HIGH_DISTINCT_COUNT_ABBREV; - static const std::string FUNCTION_NON_ZERO_COUNT; - static const std::string FUNCTION_NON_ZERO_COUNT_ABBREV; - static const std::string FUNCTION_RARE_NON_ZERO_COUNT; - static const std::string FUNCTION_RARE_NON_ZERO_COUNT_ABBREV; - static const std::string FUNCTION_RARE; - static const std::string FUNCTION_RARE_COUNT; - static const std::string FUNCTION_FREQ_RARE; - static const std::string FUNCTION_FREQ_RARE_ABBREV; - static const std::string FUNCTION_FREQ_RARE_COUNT; - static const std::string FUNCTION_FREQ_RARE_COUNT_ABBREV; - static const std::string FUNCTION_LOW_NON_ZERO_COUNT; - static const std::string FUNCTION_LOW_NON_ZERO_COUNT_ABBREV; - static const std::string FUNCTION_HIGH_NON_ZERO_COUNT; - static const std::string FUNCTION_HIGH_NON_ZERO_COUNT_ABBREV; - static const std::string FUNCTION_INFO_CONTENT; - static const std::string FUNCTION_LOW_INFO_CONTENT; - static const std::string FUNCTION_HIGH_INFO_CONTENT; - static const std::string FUNCTION_METRIC; - static const std::string FUNCTION_AVERAGE; - static const std::string FUNCTION_MEAN; - static const std::string FUNCTION_LOW_MEAN; - static const std::string FUNCTION_HIGH_MEAN; - static const std::string FUNCTION_LOW_AVERAGE; - static const std::string FUNCTION_HIGH_AVERAGE; - static const std::string FUNCTION_MEDIAN; - static const std::string FUNCTION_LOW_MEDIAN; - static const std::string FUNCTION_HIGH_MEDIAN; - static const std::string FUNCTION_MIN; - static const std::string FUNCTION_MAX; - static const std::string FUNCTION_VARIANCE; - static const std::string FUNCTION_LOW_VARIANCE; - static const std::string FUNCTION_HIGH_VARIANCE; - static const std::string FUNCTION_SUM; - static const std::string FUNCTION_LOW_SUM; - static const std::string FUNCTION_HIGH_SUM; - static const std::string FUNCTION_NON_NULL_SUM; - static const std::string FUNCTION_NON_NULL_SUM_ABBREV; - static const std::string FUNCTION_LOW_NON_NULL_SUM; - static const std::string FUNCTION_LOW_NON_NULL_SUM_ABBREV; - static const std::string FUNCTION_HIGH_NON_NULL_SUM; - static const std::string FUNCTION_HIGH_NON_NULL_SUM_ABBREV; - static const std::string FUNCTION_TIME_OF_DAY; - static const std::string FUNCTION_TIME_OF_WEEK; - static const std::string FUNCTION_LAT_LONG; - static const std::string FUNCTION_MAX_VELOCITY; - static const std::string FUNCTION_MIN_VELOCITY; - static const std::string FUNCTION_MEAN_VELOCITY; - static const std::string FUNCTION_SUM_VELOCITY; - - //! String that defines whether to exclude frequent results - static const std::string EXCLUDE_FREQUENT_SUFFIX; - static const std::string EXCLUDE_FREQUENT_OPTION; - static const std::string ALL_TOKEN; - static const std::string NONE_TOKEN; - - static const std::string CLEAR; - static const std::string EMPTY_STRING; - - public: - //! Class representing all the options associated with a field config. - class API_EXPORT CFieldOptions - { - public: - //! Construct with no "by" field nor "partition" field, deducing - //! the function from the fieldName - CFieldOptions(const std::string &fieldName, - int configKey); - - //! Deduce the function from the fieldName - CFieldOptions(const std::string &fieldName, - int configKey, - const std::string &byFieldName, - bool byHasExcludeFrequent, - bool useNull); - - //! Deduce the function from the fieldName - CFieldOptions(const std::string &fieldName, - int configKey, - const std::string &byFieldName, - const std::string &partitionFieldName, - bool byHasExcludeFrequent, - bool overHasExcludeFrequent, - bool useNull); - - //! Specify everything - CFieldOptions(model::function_t::EFunction function, - const std::string &fieldName, - int configKey, - const std::string &byFieldName, - const std::string &overFieldName, - const std::string &partitionFieldName, - bool byHasExcludeFrequent, - bool overHasExcludeFrequent, - bool useNull); - - //! Set description - void description(std::string description); - - //! Accessors - const std::string &description() const; - model::function_t::EFunction function() const; - const std::string &fieldName() const; - int configKey() const; - const std::string &byFieldName() const; - const std::string &overFieldName() const; - const std::string &partitionFieldName() const; - bool useNull() const; - ml::model_t::EExcludeFrequent excludeFrequent() const; - - //! Map back from the function enum to the shortest possible - //! function name that could be used to specify the function - const std::string &terseFunctionName() const; - - //! Map back from the function enum to the longest possible - //! function name that could be used to specify the function - const std::string &verboseFunctionName() const; - - //! Write the detector-specific parts of the configuration - //! clause. Note that this cannot include summarycountfield, - //! influencerfield or categorizationfield as these are not - //! detector-specific. - std::ostream &debugPrintClause(std::ostream &strm) const; - - //! Efficient swap - void swap(CFieldOptions &other); - - private: - std::string m_Description; - model::function_t::EFunction m_Function; - std::string m_FieldName; - int m_ConfigKey; - std::string m_ByFieldName; - std::string m_OverFieldName; - std::string m_PartitionFieldName; - bool m_ByHasExcludeFrequent; - bool m_OverHasExcludeFrequent; - bool m_UseNull; - - friend std::ostream &operator<<(std::ostream &, - const CFieldOptions &); - }; +class API_EXPORT CFieldConfig { +public: + //! Prefix for detector settings + static const std::string DETECTOR_PREFIX; + //! Prefix for categorization filter settings + static const std::string CATEGORIZATION_FILTER_PREFIX; + + //! Prefix for influencer settings + static const std::string INFLUENCER_PREFIX; + + //! Prefix for filter setting + static const std::string FILTER_PREFIX; + + //! Prefix for scheduled events + static const std::string SCHEDULED_EVENT_PREFIX; + + //! Suffix for clause settings + static const std::string CLAUSE_SUFFIX; + + //! Suffix for description settings + static const std::string DESCRIPTION_SUFFIX; + + //! Suffix for detector rules + static const std::string RULES_SUFFIX; + + //! Name of the "categorizationfield" option + static const std::string CATEGORIZATION_FIELD_OPTION; + + //! Name of the "summarycountfield" option + static const std::string SUMMARY_COUNT_FIELD_OPTION; + + //! Character to look for to distinguish setting names + static const char SUFFIX_SEPARATOR; + + //! Character to look for to split field names out of complete config keys + static const char FIELDNAME_SEPARATOR; + + //! Suffix applied to field names for the setting that indicates whether + //! they're enabled + static const std::string IS_ENABLED_SUFFIX; + + //! Suffix applied to field names for the setting that indicates whether + //! they're metrics + static const std::string BY_SUFFIX; + + //! Suffix applied to field names for the setting that indicates whether + //! they're metrics + static const std::string OVER_SUFFIX; + + //! Suffix applied to field names for the setting that indicates whether + //! they're metrics + static const std::string PARTITION_SUFFIX; + + //! Option to look for in the command line clause to indicate that the + //! "partitionfield" parameter is specified (case-insensitive) + static const std::string PARTITION_FIELD_OPTION; + + //! Suffix applied to field names for the setting that indicates whether + //! empty/missing values of the "by" field should be ignored + static const std::string USE_NULL_SUFFIX; + + //! Option to look for in the command line clause to indicate that the + //! "usenull" parameter is specified (case-insensitive) + static const std::string USE_NULL_OPTION; + + //! Token to look for in the command line clause to indicate that the + //! "by" field follows + static const std::string BY_TOKEN; + + //! Token to look for in the command line clause to indicate that the + //! "over" field follows + static const std::string OVER_TOKEN; + + //! Magic field name used to indicate that event rate should be + //! analysed rather than a field value + static const std::string COUNT_NAME; + + //! A default string value that our string utilities will convert to + //! boolean false + static const std::string FALSE_VALUE; + + //! A default string value that our string utilities will convert to + //! boolean true + static const std::string TRUE_VALUE; + + //! Token to look in the config file to indicate a list of field + //! names that are used to indicate an influence pivot relationship + static const std::string INFLUENCER_FIELD_NAMES_OPTION; + + //! Option to specify an influencer field + static const std::string INFLUENCER_FIELD_OPTION; + + //! Strings that define the type of analysis to run + static const std::string FUNCTION_COUNT; + static const std::string FUNCTION_COUNT_ABBREV; + static const std::string FUNCTION_LOW_COUNT; + static const std::string FUNCTION_LOW_COUNT_ABBREV; + static const std::string FUNCTION_HIGH_COUNT; + static const std::string FUNCTION_HIGH_COUNT_ABBREV; + static const std::string FUNCTION_DISTINCT_COUNT; + static const std::string FUNCTION_DISTINCT_COUNT_ABBREV; + static const std::string FUNCTION_LOW_DISTINCT_COUNT; + static const std::string FUNCTION_LOW_DISTINCT_COUNT_ABBREV; + static const std::string FUNCTION_HIGH_DISTINCT_COUNT; + static const std::string FUNCTION_HIGH_DISTINCT_COUNT_ABBREV; + static const std::string FUNCTION_NON_ZERO_COUNT; + static const std::string FUNCTION_NON_ZERO_COUNT_ABBREV; + static const std::string FUNCTION_RARE_NON_ZERO_COUNT; + static const std::string FUNCTION_RARE_NON_ZERO_COUNT_ABBREV; + static const std::string FUNCTION_RARE; + static const std::string FUNCTION_RARE_COUNT; + static const std::string FUNCTION_FREQ_RARE; + static const std::string FUNCTION_FREQ_RARE_ABBREV; + static const std::string FUNCTION_FREQ_RARE_COUNT; + static const std::string FUNCTION_FREQ_RARE_COUNT_ABBREV; + static const std::string FUNCTION_LOW_NON_ZERO_COUNT; + static const std::string FUNCTION_LOW_NON_ZERO_COUNT_ABBREV; + static const std::string FUNCTION_HIGH_NON_ZERO_COUNT; + static const std::string FUNCTION_HIGH_NON_ZERO_COUNT_ABBREV; + static const std::string FUNCTION_INFO_CONTENT; + static const std::string FUNCTION_LOW_INFO_CONTENT; + static const std::string FUNCTION_HIGH_INFO_CONTENT; + static const std::string FUNCTION_METRIC; + static const std::string FUNCTION_AVERAGE; + static const std::string FUNCTION_MEAN; + static const std::string FUNCTION_LOW_MEAN; + static const std::string FUNCTION_HIGH_MEAN; + static const std::string FUNCTION_LOW_AVERAGE; + static const std::string FUNCTION_HIGH_AVERAGE; + static const std::string FUNCTION_MEDIAN; + static const std::string FUNCTION_LOW_MEDIAN; + static const std::string FUNCTION_HIGH_MEDIAN; + static const std::string FUNCTION_MIN; + static const std::string FUNCTION_MAX; + static const std::string FUNCTION_VARIANCE; + static const std::string FUNCTION_LOW_VARIANCE; + static const std::string FUNCTION_HIGH_VARIANCE; + static const std::string FUNCTION_SUM; + static const std::string FUNCTION_LOW_SUM; + static const std::string FUNCTION_HIGH_SUM; + static const std::string FUNCTION_NON_NULL_SUM; + static const std::string FUNCTION_NON_NULL_SUM_ABBREV; + static const std::string FUNCTION_LOW_NON_NULL_SUM; + static const std::string FUNCTION_LOW_NON_NULL_SUM_ABBREV; + static const std::string FUNCTION_HIGH_NON_NULL_SUM; + static const std::string FUNCTION_HIGH_NON_NULL_SUM_ABBREV; + static const std::string FUNCTION_TIME_OF_DAY; + static const std::string FUNCTION_TIME_OF_WEEK; + static const std::string FUNCTION_LAT_LONG; + static const std::string FUNCTION_MAX_VELOCITY; + static const std::string FUNCTION_MIN_VELOCITY; + static const std::string FUNCTION_MEAN_VELOCITY; + static const std::string FUNCTION_SUM_VELOCITY; + + //! String that defines whether to exclude frequent results + static const std::string EXCLUDE_FREQUENT_SUFFIX; + static const std::string EXCLUDE_FREQUENT_OPTION; + static const std::string ALL_TOKEN; + static const std::string NONE_TOKEN; + + static const std::string CLEAR; + static const std::string EMPTY_STRING; + +public: + //! Class representing all the options associated with a field config. + class API_EXPORT CFieldOptions { public: - //! Key specifiers for the multi-index - struct SUniqueKey - { - }; - struct SConfigKey - { - }; - - //! Index of field names to field options. - //! Uniqueness is enforced by config key and also by the combination of - //! function, field name, by field name, over field name and - //! partition field name. - using TFieldOptionsMIndex = boost::multi_index::multi_index_container< - CFieldOptions, - boost::multi_index::indexed_by< - boost::multi_index::ordered_unique< - boost::multi_index::tag, - BOOST_MULTI_INDEX_CONST_MEM_FUN(CFieldOptions, int, configKey) - >, - boost::multi_index::ordered_unique< - boost::multi_index::tag, - boost::multi_index::composite_key< - CFieldOptions, - BOOST_MULTI_INDEX_CONST_MEM_FUN(CFieldOptions, model::function_t::EFunction, function), - BOOST_MULTI_INDEX_CONST_TYPE_CONST_MEM_FUN(CFieldOptions, std::string, fieldName), - BOOST_MULTI_INDEX_CONST_TYPE_CONST_MEM_FUN(CFieldOptions, std::string, byFieldName), - BOOST_MULTI_INDEX_CONST_TYPE_CONST_MEM_FUN(CFieldOptions, std::string, overFieldName), - BOOST_MULTI_INDEX_CONST_TYPE_CONST_MEM_FUN(CFieldOptions, std::string, partitionFieldName) - > - > - > - >; - - using TFieldOptionsMIndexItr = TFieldOptionsMIndex::iterator; - using TFieldOptionsMIndexCItr = TFieldOptionsMIndex::const_iterator; - - //! Used to maintain a list of all unique config keys - using TIntSet = std::set; - - //! Used to return the superset of enabled field names - using TStrSet = std::set; - - //! Used to obtain command line clause tokens - using TStrVec = std::vector; - using TStrVecItr = TStrVec::iterator; - - using TDetectionRuleVec = std::vector; - using TIntDetectionRuleVecUMap = boost::unordered_map; - - using TStrPatternSetUMap = boost::unordered_map; - - using TStrDetectionRulePr = std::pair; - using TStrDetectionRulePrVec = std::vector; - - public: - //! Construct empty. This call should generally be followed by a call to - //! one of the init...() methods. - CFieldConfig(); - - //! Construct with just a categorization field. (In the case of a - //! categorization job, this is all that is needed for this config.) - CFieldConfig(const std::string &categorizationFieldName); - - //! Construct with a single field. (This constructor is largely for - //! unit testing and backwards compatibility.) - CFieldConfig(const std::string &fieldName, - const std::string &byFieldName, - bool useNull = false, - const std::string &summaryCountFieldName = EMPTY_STRING); - - //! Construct with a single field and a partition field. (This - //! constructor is only used for unit testing.) - CFieldConfig(const std::string &fieldName, - const std::string &byFieldName, - const std::string &partitionFieldName, - bool useNull); - - //! Initialise from command line options. This method expects that only - //! one of the config file and the tokens will have been specified. If - //! neither or both have been specified, this is reported as an error. - bool initFromCmdLine(const std::string &configFile, - const TStrVec &tokens); + //! Construct with no "by" field nor "partition" field, deducing + //! the function from the fieldName + CFieldOptions(const std::string& fieldName, int configKey); + + //! Deduce the function from the fieldName + CFieldOptions(const std::string& fieldName, int configKey, const std::string& byFieldName, bool byHasExcludeFrequent, bool useNull); + + //! Deduce the function from the fieldName + CFieldOptions(const std::string& fieldName, + int configKey, + const std::string& byFieldName, + const std::string& partitionFieldName, + bool byHasExcludeFrequent, + bool overHasExcludeFrequent, + bool useNull); + + //! Specify everything + CFieldOptions(model::function_t::EFunction function, + const std::string& fieldName, + int configKey, + const std::string& byFieldName, + const std::string& overFieldName, + const std::string& partitionFieldName, + bool byHasExcludeFrequent, + bool overHasExcludeFrequent, + bool useNull); + + //! Set description + void description(std::string description); + + //! Accessors + const std::string& description() const; + model::function_t::EFunction function() const; + const std::string& fieldName() const; + int configKey() const; + const std::string& byFieldName() const; + const std::string& overFieldName() const; + const std::string& partitionFieldName() const; + bool useNull() const; + ml::model_t::EExcludeFrequent excludeFrequent() const; + + //! Map back from the function enum to the shortest possible + //! function name that could be used to specify the function + const std::string& terseFunctionName() const; + + //! Map back from the function enum to the longest possible + //! function name that could be used to specify the function + const std::string& verboseFunctionName() const; + + //! Write the detector-specific parts of the configuration + //! clause. Note that this cannot include summarycountfield, + //! influencerfield or categorizationfield as these are not + //! detector-specific. + std::ostream& debugPrintClause(std::ostream& strm) const; + + //! Efficient swap + void swap(CFieldOptions& other); - //! Initialise from a config file. - bool initFromFile(const std::string &configFile); - - //! Initialise from a command line clause that has been tokenised by the - //! command line parser (i.e. using whitespace). The clause may have at - //! most one "by" token. If there is a "by" token, there must be - //! exactly one token following it, and one or more tokens before it. - bool initFromClause(const TStrVec &tokens); - - //! Add an extra set of field config options to the configuration. It - //! is not expected that this will be done once analysis has started. - //! If options are added during the analysis then this class will remain - //! consistent but it is likely to cause problems for other classes that - //! do not expect this. The likely use case for this function is for - //! building config migration programs. - bool addOptions(const CFieldOptions &options); - - //! Get the list of categorization filters - const TStrVec &categorizationFilters() const; - - //! Get the field to use for summary counts. If the returned string is - //! empty then this implies that input has not been manually summarised. - const std::string &summaryCountFieldName() const; - - //! Does any config have a non-empty partition field configured? - //! (This is used by licensing.) - bool havePartitionFields() const; - - //! Access the superset of all field names that are used by any detector. - const TStrSet &fieldNameSuperset() const; - - //! Debug dump of fields - std::string debug() const; - - //! Add influencer fields for all the by/over/partition fields of all - //! existing configurations - void addInfluencerFieldsFromByOverPartitionFields(); + private: + std::string m_Description; + model::function_t::EFunction m_Function; + std::string m_FieldName; + int m_ConfigKey; + std::string m_ByFieldName; + std::string m_OverFieldName; + std::string m_PartitionFieldName; + bool m_ByHasExcludeFrequent; + bool m_OverHasExcludeFrequent; + bool m_UseNull; + + friend std::ostream& operator<<(std::ostream&, const CFieldOptions&); + }; + +public: + //! Key specifiers for the multi-index + struct SUniqueKey {}; + struct SConfigKey {}; + + //! Index of field names to field options. + //! Uniqueness is enforced by config key and also by the combination of + //! function, field name, by field name, over field name and + //! partition field name. + using TFieldOptionsMIndex = boost::multi_index::multi_index_container< + CFieldOptions, + boost::multi_index::indexed_by< + boost::multi_index::ordered_unique, + BOOST_MULTI_INDEX_CONST_MEM_FUN(CFieldOptions, int, configKey)>, + boost::multi_index::ordered_unique< + boost::multi_index::tag, + boost::multi_index::composite_key< + CFieldOptions, + BOOST_MULTI_INDEX_CONST_MEM_FUN(CFieldOptions, model::function_t::EFunction, function), + BOOST_MULTI_INDEX_CONST_TYPE_CONST_MEM_FUN(CFieldOptions, std::string, fieldName), + BOOST_MULTI_INDEX_CONST_TYPE_CONST_MEM_FUN(CFieldOptions, std::string, byFieldName), + BOOST_MULTI_INDEX_CONST_TYPE_CONST_MEM_FUN(CFieldOptions, std::string, overFieldName), + BOOST_MULTI_INDEX_CONST_TYPE_CONST_MEM_FUN(CFieldOptions, std::string, partitionFieldName)>>>>; + + using TFieldOptionsMIndexItr = TFieldOptionsMIndex::iterator; + using TFieldOptionsMIndexCItr = TFieldOptionsMIndex::const_iterator; + + //! Used to maintain a list of all unique config keys + using TIntSet = std::set; + + //! Used to return the superset of enabled field names + using TStrSet = std::set; + + //! Used to obtain command line clause tokens + using TStrVec = std::vector; + using TStrVecItr = TStrVec::iterator; + + using TDetectionRuleVec = std::vector; + using TIntDetectionRuleVecUMap = boost::unordered_map; + + using TStrPatternSetUMap = boost::unordered_map; + + using TStrDetectionRulePr = std::pair; + using TStrDetectionRulePrVec = std::vector; + +public: + //! Construct empty. This call should generally be followed by a call to + //! one of the init...() methods. + CFieldConfig(); + + //! Construct with just a categorization field. (In the case of a + //! categorization job, this is all that is needed for this config.) + CFieldConfig(const std::string& categorizationFieldName); + + //! Construct with a single field. (This constructor is largely for + //! unit testing and backwards compatibility.) + CFieldConfig(const std::string& fieldName, + const std::string& byFieldName, + bool useNull = false, + const std::string& summaryCountFieldName = EMPTY_STRING); + + //! Construct with a single field and a partition field. (This + //! constructor is only used for unit testing.) + CFieldConfig(const std::string& fieldName, const std::string& byFieldName, const std::string& partitionFieldName, bool useNull); + + //! Initialise from command line options. This method expects that only + //! one of the config file and the tokens will have been specified. If + //! neither or both have been specified, this is reported as an error. + bool initFromCmdLine(const std::string& configFile, const TStrVec& tokens); + + //! Initialise from a config file. + bool initFromFile(const std::string& configFile); + + //! Initialise from a command line clause that has been tokenised by the + //! command line parser (i.e. using whitespace). The clause may have at + //! most one "by" token. If there is a "by" token, there must be + //! exactly one token following it, and one or more tokens before it. + bool initFromClause(const TStrVec& tokens); + + //! Add an extra set of field config options to the configuration. It + //! is not expected that this will be done once analysis has started. + //! If options are added during the analysis then this class will remain + //! consistent but it is likely to cause problems for other classes that + //! do not expect this. The likely use case for this function is for + //! building config migration programs. + bool addOptions(const CFieldOptions& options); + + //! Get the list of categorization filters + const TStrVec& categorizationFilters() const; + + //! Get the field to use for summary counts. If the returned string is + //! empty then this implies that input has not been manually summarised. + const std::string& summaryCountFieldName() const; + + //! Does any config have a non-empty partition field configured? + //! (This is used by licensing.) + bool havePartitionFields() const; + + //! Access the superset of all field names that are used by any detector. + const TStrSet& fieldNameSuperset() const; + + //! Debug dump of fields + std::string debug() const; + + //! Add influencer fields for all the by/over/partition fields of all + //! existing configurations + void addInfluencerFieldsFromByOverPartitionFields(); + + //! Get the list of field names for pivoting the anomaly results + const TStrVec& influencerFieldNames() const; + + //! Get the detector key to detection rules map + const TIntDetectionRuleVecUMap& detectionRules() const; + + //! Get the scheduled events + const TStrDetectionRulePrVec& scheduledEvents() const; + + //! Attempt to parse a detector's rules. + bool parseRules(int detectorIndex, const std::string& rules); + + //! Process and store a rule filter + bool processFilter(const std::string& key, const std::string& value); + + // //! Replaces filters with the ones in the given property tree + bool updateFilters(const boost::property_tree::ptree& propTree); + + //! Replaces scheduled events with the ones in the given property tree + bool updateScheduledEvents(const boost::property_tree::ptree& propTree); + + const TFieldOptionsMIndex& fieldOptions() const; + + const std::string& categorizationFieldName() const; + + const TStrPatternSetUMap& ruleFilters() const; + +private: + //! Parse detection rules into detectionRules + bool parseRules(TDetectionRuleVec& detectionRules, const std::string& rules); + + //! Attempt to parse a single analysis clause. This could have come + //! from either the command line of a custom command or from one entry + //! in a config file. The supplied tokens must have been + //! split using both whitespace and commas. + bool parseClause(bool allowMultipleFunctions, + int configKey, + const std::string& description, + TStrVec& copyTokens, + TFieldOptionsMIndex& optionsIndex, + std::string& categorizationFieldName, + std::string& summaryCountFieldName); + + //! Helper method for initFromFile(). Because multiple config + //! file settings are required to specify a single configuration, the + //! config file is read using a mix of iteration and search. We iterate + //! to find the unique config keys, then search for all the settings + //! that correspond to each particular config key. Doing this + //! simplifies the error reporting. + bool + processDetector(const boost::property_tree::ptree& propTree, const std::string& key, const std::string& value, TIntSet& handledConfigs); - //! Get the list of field names for pivoting the anomaly results - const TStrVec &influencerFieldNames() const; + //! Add data structures relating to an active detector. + bool addActiveDetector(int configKey, const std::string& description, const std::string& rules, TStrVec& copyTokens); - //! Get the detector key to detection rules map - const TIntDetectionRuleVecUMap &detectionRules() const; + //! Get a function name and field name from a field string + static bool parseFieldString(bool haveSummaryCountField, + bool isPopulation, + bool hasByField, + const std::string& str, + model::function_t::EFunction& function, + std::string& fieldName); - //! Get the scheduled events - const TStrDetectionRulePrVec &scheduledEvents() const; + //! Used to keep the field superset up-to-date + void seenField(const std::string& fieldName); - //! Attempt to parse a detector's rules. - bool parseRules(int detectorIndex, const std::string &rules); + //! Split a config clause on whitespace and commas. + bool tokenise(const std::string& clause, TStrVec& copyTokens); - //! Process and store a rule filter - bool processFilter(const std::string &key, const std::string &value); + //! Split a config clause that has already been tokenised by a + //! command-line processor to additionally be split at commas + void retokenise(const TStrVec& tokens, TStrVec& copyTokens); - // //! Replaces filters with the ones in the given property tree - bool updateFilters(const boost::property_tree::ptree &propTree); + //! Check that we have at most one "by" and one "over" token + //! and report their positions in the token list + bool findLastByOverTokens(const TStrVec& copyTokens, std::size_t& lastByTokenIndex, std::size_t& lastOverTokenIndex); - //! Replaces scheduled events with the ones in the given property tree - bool updateScheduledEvents(const boost::property_tree::ptree &propTree); + //! Check that the "by" or "over" field is valid + bool validateByOverField(const TStrVec& copyTokens, + const std::size_t thisIndex, + const std::size_t otherIndex, + const TStrVec& clashingNames, + std::string& fieldName); - const TFieldOptionsMIndex &fieldOptions() const; + std::string findParameter(const std::string& parameter, TStrVec& copyTokens); - const std::string &categorizationFieldName() const; + //! How does a setting for excludefrequent map to the underlying boolean + //! flags? + static bool decipherExcludeFrequentSetting(const std::string& excludeFrequentString, + bool hasByField, + bool isPopulation, + bool& byExcludeFrequent, + bool& overExcludeFrequent); - const TStrPatternSetUMap &ruleFilters() const; + //! Store the list of influencer field names, if any + void influencerFieldNames(TStrVec influencers); - private: - //! Parse detection rules into detectionRules - bool parseRules(TDetectionRuleVec &detectionRules, const std::string &rules); - - //! Attempt to parse a single analysis clause. This could have come - //! from either the command line of a custom command or from one entry - //! in a config file. The supplied tokens must have been - //! split using both whitespace and commas. - bool parseClause(bool allowMultipleFunctions, - int configKey, - const std::string &description, - TStrVec ©Tokens, - TFieldOptionsMIndex &optionsIndex, - std::string &categorizationFieldName, - std::string &summaryCountFieldName); - - //! Helper method for initFromFile(). Because multiple config - //! file settings are required to specify a single configuration, the - //! config file is read using a mix of iteration and search. We iterate - //! to find the unique config keys, then search for all the settings - //! that correspond to each particular config key. Doing this - //! simplifies the error reporting. - bool processDetector(const boost::property_tree::ptree &propTree, - const std::string &key, - const std::string &value, - TIntSet &handledConfigs); - - //! Add data structures relating to an active detector. - bool addActiveDetector(int configKey, - const std::string &description, - const std::string &rules, - TStrVec ©Tokens); - - //! Get a function name and field name from a field string - static bool parseFieldString(bool haveSummaryCountField, - bool isPopulation, - bool hasByField, - const std::string &str, - model::function_t::EFunction &function, - std::string &fieldName); - - //! Used to keep the field superset up-to-date - void seenField(const std::string &fieldName); - - //! Split a config clause on whitespace and commas. - bool tokenise(const std::string &clause, TStrVec ©Tokens); - - //! Split a config clause that has already been tokenised by a - //! command-line processor to additionally be split at commas - void retokenise(const TStrVec &tokens, TStrVec ©Tokens); - - //! Check that we have at most one "by" and one "over" token - //! and report their positions in the token list - bool findLastByOverTokens(const TStrVec ©Tokens, - std::size_t &lastByTokenIndex, - std::size_t &lastOverTokenIndex); - - //! Check that the "by" or "over" field is valid - bool validateByOverField(const TStrVec ©Tokens, - const std::size_t thisIndex, - const std::size_t otherIndex, - const TStrVec &clashingNames, - std::string &fieldName); - - std::string findParameter(const std::string ¶meter, - TStrVec ©Tokens); - - //! How does a setting for excludefrequent map to the underlying boolean - //! flags? - static bool decipherExcludeFrequentSetting(const std::string &excludeFrequentString, - bool hasByField, - bool isPopulation, - bool &byExcludeFrequent, - bool &overExcludeFrequent); - - //! Store the list of influencer field names, if any - void influencerFieldNames(TStrVec influencers); - - //! Add influencer fields for all the by/over/partition fields of all - //! detectors. - void addInfluencerFieldsFromByOverPartitionFields(const TFieldOptionsMIndex &fieldOptions); - - //! Store one influencer field name - void addInfluencerFieldName(const std::string &influencer, - bool quiet = false); - - //! Sort the influencers (so that downstream code doesn't have to worry - //! about ordering changes when the overall set is unchanged) - void sortInfluencers(); - - //! Store one categorization filter - void addCategorizationFilter(const std::string &filter); - - //! Process and store a scheduled event - bool processScheduledEvent(const boost::property_tree::ptree &propTree, - const std::string &key, - const std::string &value, - TIntSet &handledScheduledEvents); - - //! Debug dump of field options - void debug(const TFieldOptionsMIndex &fieldOptions, - std::ostream &strm) const; + //! Add influencer fields for all the by/over/partition fields of all + //! detectors. + void addInfluencerFieldsFromByOverPartitionFields(const TFieldOptionsMIndex& fieldOptions); - private: - //! The fields options. - TFieldOptionsMIndex m_FieldOptions; + //! Store one influencer field name + void addInfluencerFieldName(const std::string& influencer, bool quiet = false); - //! The superset of all field names that are used in the config. - TStrSet m_FieldNameSuperset; + //! Sort the influencers (so that downstream code doesn't have to worry + //! about ordering changes when the overall set is unchanged) + void sortInfluencers(); - //! The categorization field name. - std::string m_CategorizationFieldName; + //! Store one categorization filter + void addCategorizationFilter(const std::string& filter); - //! The filters to be applied to values of the categorization field. - TStrVec m_CategorizationFilters; + //! Process and store a scheduled event + bool processScheduledEvent(const boost::property_tree::ptree& propTree, + const std::string& key, + const std::string& value, + TIntSet& handledScheduledEvents); - //! The field names specified for "influence", if any have been provided - TStrVec m_Influencers; + //! Debug dump of field options + void debug(const TFieldOptionsMIndex& fieldOptions, std::ostream& strm) const; - //! The summary count field name. If this is empty then it implies - //! that input has not been manually summarised. - std::string m_SummaryCountFieldName; +private: + //! The fields options. + TFieldOptionsMIndex m_FieldOptions; + + //! The superset of all field names that are used in the config. + TStrSet m_FieldNameSuperset; + + //! The categorization field name. + std::string m_CategorizationFieldName; + + //! The filters to be applied to values of the categorization field. + TStrVec m_CategorizationFilters; + + //! The field names specified for "influence", if any have been provided + TStrVec m_Influencers; + + //! The summary count field name. If this is empty then it implies + //! that input has not been manually summarised. + std::string m_SummaryCountFieldName; - //! The detection rules per detector index. - TIntDetectionRuleVecUMap m_DetectorRules; + //! The detection rules per detector index. + TIntDetectionRuleVecUMap m_DetectorRules; - //! The filters per id used by categorical rule conditions. - TStrPatternSetUMap m_RuleFilters; + //! The filters per id used by categorical rule conditions. + TStrPatternSetUMap m_RuleFilters; - //! The scheduled events (events apply to all detectors). - //! Events consist of a description and a detection rule - TStrDetectionRulePrVec m_ScheduledEvents; + //! The scheduled events (events apply to all detectors). + //! Events consist of a description and a detection rule + TStrDetectionRulePrVec m_ScheduledEvents; // For unit testing friend class ::CFieldConfigTest; }; //! Efficient swap for field options -void swap(CFieldConfig::CFieldOptions &lhs, CFieldConfig::CFieldOptions &rhs); +void swap(CFieldConfig::CFieldOptions& lhs, CFieldConfig::CFieldOptions& rhs); //! Print field options -std::ostream &operator<<(std::ostream &strm, - const CFieldConfig::CFieldOptions &options); - +std::ostream& operator<<(std::ostream& strm, const CFieldConfig::CFieldOptions& options); } } #endif // INCLUDED_ml_api_CFieldConfig_h - diff --git a/include/api/CFieldDataTyper.h b/include/api/CFieldDataTyper.h index 53c212c924..72bfd46b7b 100644 --- a/include/api/CFieldDataTyper.h +++ b/include/api/CFieldDataTyper.h @@ -6,9 +6,9 @@ #ifndef INCLUDED_ml_api_CFieldDataTyper_h #define INCLUDED_ml_api_CFieldDataTyper_h -#include #include #include +#include #include #include @@ -20,28 +20,22 @@ #include - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CDataAdder; class CDataSearcher; class CStatePersistInserter; class CStateRestoreTraverser; } -namespace model -{ +namespace model { class CLimits; } -namespace api -{ +namespace api { class CBackgroundPersister; class CFieldConfig; class CJsonOutputWriter; class COutputHandler; - //! \brief //! Assign categorisation fields to input records //! @@ -49,166 +43,160 @@ class COutputHandler; //! Adds a new field called mlcategory and assigns to it //! integers that correspond to the various cateogories //! -class API_EXPORT CFieldDataTyper : public CDataProcessor -{ - public: - //! The index where state is stored - static const std::string ML_STATE_INDEX; - //! The name of the field where the category is going to be written - static const std::string MLCATEGORY_NAME; - - static const double SIMILARITY_THRESHOLD; - - //! Discriminant for Elasticsearch IDs - static const std::string STATE_TYPE; - - //! The current state version - static const std::string STATE_VERSION; - - public: - // A type of token list data typer that DOESN'T exclude fields from its - // analysis - using TTokenListDataTyperKeepsFields = - CTokenListDataTyper; - - public: - //! Construct without persistence capability - CFieldDataTyper(const std::string &jobId, - const CFieldConfig &config, - const model::CLimits &limits, - COutputHandler &outputHandler, - CJsonOutputWriter &jsonOutputWriter, - CBackgroundPersister *periodicPersister = nullptr); - - virtual ~CFieldDataTyper(); +class API_EXPORT CFieldDataTyper : public CDataProcessor { +public: + //! The index where state is stored + static const std::string ML_STATE_INDEX; + //! The name of the field where the category is going to be written + static const std::string MLCATEGORY_NAME; + + static const double SIMILARITY_THRESHOLD; + + //! Discriminant for Elasticsearch IDs + static const std::string STATE_TYPE; + + //! The current state version + static const std::string STATE_VERSION; + +public: + // A type of token list data typer that DOESN'T exclude fields from its + // analysis + using TTokenListDataTyperKeepsFields = CTokenListDataTyper; + +public: + //! Construct without persistence capability + CFieldDataTyper(const std::string& jobId, + const CFieldConfig& config, + const model::CLimits& limits, + COutputHandler& outputHandler, + CJsonOutputWriter& jsonOutputWriter, + CBackgroundPersister* periodicPersister = nullptr); - //! We're going to be writing to a new output stream - virtual void newOutputStream(); + virtual ~CFieldDataTyper(); - //! Receive a single record to be typed, and output that record to - //! STDOUT with its type field added - virtual bool handleRecord(const TStrStrUMap &dataRowFields); + //! We're going to be writing to a new output stream + virtual void newOutputStream(); - //! Perform any final processing once all input data has been seen. - virtual void finalise(); + //! Receive a single record to be typed, and output that record to + //! STDOUT with its type field added + virtual bool handleRecord(const TStrStrUMap& dataRowFields); - //! Restore previously saved state - virtual bool restoreState(core::CDataSearcher &restoreSearcher, - core_t::TTime &completeToTime); + //! Perform any final processing once all input data has been seen. + virtual void finalise(); - //! Persist current state - virtual bool persistState(core::CDataAdder &persister); + //! Restore previously saved state + virtual bool restoreState(core::CDataSearcher& restoreSearcher, core_t::TTime& completeToTime); - //! Persist current state due to the periodic persistence being triggered. - virtual bool periodicPersistState(CBackgroundPersister &persister); + //! Persist current state + virtual bool persistState(core::CDataAdder& persister); - //! How many records did we handle? - virtual uint64_t numRecordsHandled() const; + //! Persist current state due to the periodic persistence being triggered. + virtual bool periodicPersistState(CBackgroundPersister& persister); - //! Access the output handler - virtual COutputHandler &outputHandler(); + //! How many records did we handle? + virtual uint64_t numRecordsHandled() const; - private: - //! Create the typer to operate on the categorization field - void createTyper(const std::string &fieldName); + //! Access the output handler + virtual COutputHandler& outputHandler(); - //! Compute the type for a given record. - int computeType(const TStrStrUMap &dataRowFields); +private: + //! Create the typer to operate on the categorization field + void createTyper(const std::string& fieldName); - //! Create the reverse search and return true if it has changed or false otherwise - bool createReverseSearch(int type); + //! Compute the type for a given record. + int computeType(const TStrStrUMap& dataRowFields); - bool doPersistState(const CDataTyper::TPersistFunc &dataTyperPersistFunc, - const CCategoryExamplesCollector &examplesCollector, - core::CDataAdder &persister); - void acceptPersistInserter(const CDataTyper::TPersistFunc &dataTyperPersistFunc, - const CCategoryExamplesCollector &examplesCollector, - core::CStatePersistInserter &inserter) const; - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); + //! Create the reverse search and return true if it has changed or false otherwise + bool createReverseSearch(int type); - //! Respond to an attempt to restore corrupt categorizer state by - //! resetting the categorizer and re-categorizing from scratch. - void resetAfterCorruptRestore(); + bool doPersistState(const CDataTyper::TPersistFunc& dataTyperPersistFunc, + const CCategoryExamplesCollector& examplesCollector, + core::CDataAdder& persister); + void acceptPersistInserter(const CDataTyper::TPersistFunc& dataTyperPersistFunc, + const CCategoryExamplesCollector& examplesCollector, + core::CStatePersistInserter& inserter) const; + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); - //! Handle a control message. The first character of the control - //! message indicates its type. Currently defined types are: - //! ' ' => Dummy message to force all previously uploaded data through - //! buffers - //! 'f' => Echo a flush ID so that the attached process knows that data - //! sent previously has all been processed - bool handleControlMessage(const std::string &controlMessage); + //! Respond to an attempt to restore corrupt categorizer state by + //! resetting the categorizer and re-categorizing from scratch. + void resetAfterCorruptRestore(); - //! Acknowledge a flush request - void acknowledgeFlush(const std::string &flushId); + //! Handle a control message. The first character of the control + //! message indicates its type. Currently defined types are: + //! ' ' => Dummy message to force all previously uploaded data through + //! buffers + //! 'f' => Echo a flush ID so that the attached process knows that data + //! sent previously has all been processed + bool handleControlMessage(const std::string& controlMessage); - private: - using TStrSet = CCategoryExamplesCollector::TStrSet; + //! Acknowledge a flush request + void acknowledgeFlush(const std::string& flushId); - private: - //! The job ID - std::string m_JobId; +private: + using TStrSet = CCategoryExamplesCollector::TStrSet; - //! Object to which the output is passed - COutputHandler &m_OutputHandler; +private: + //! The job ID + std::string m_JobId; - //! Cache extra field names to be added - TStrVec m_ExtraFieldNames; + //! Object to which the output is passed + COutputHandler& m_OutputHandler; - //! Should we write the field names before the next output? - bool m_WriteFieldNames; + //! Cache extra field names to be added + TStrVec m_ExtraFieldNames; - //! Keep count of how many records we've handled - uint64_t m_NumRecordsHandled; + //! Should we write the field names before the next output? + bool m_WriteFieldNames; - //! Map holding fields to add/change in the output compared to the input - TStrStrUMap m_Overrides; + //! Keep count of how many records we've handled + uint64_t m_NumRecordsHandled; - //! References to specific entries in the overrides map to save - //! repeatedly searching for them - std::string &m_OutputFieldCategory; + //! Map holding fields to add/change in the output compared to the input + TStrStrUMap m_Overrides; - //! Space separated list of search terms for the current category - std::string m_SearchTerms; + //! References to specific entries in the overrides map to save + //! repeatedly searching for them + std::string& m_OutputFieldCategory; - //! Regex to match values of the current category - std::string m_SearchTermsRegex; + //! Space separated list of search terms for the current category + std::string m_SearchTerms; - //! The max matching length of the current category - std::size_t m_MaxMatchingLength; + //! Regex to match values of the current category + std::string m_SearchTermsRegex; - //! Pointer to the actual typer - CDataTyper::TDataTyperP m_DataTyper; + //! The max matching length of the current category + std::size_t m_MaxMatchingLength; - //! Reference to the json output writer so that examples can be written - CJsonOutputWriter &m_JsonOutputWriter; + //! Pointer to the actual typer + CDataTyper::TDataTyperP m_DataTyper; - //! Collects up to a configurable number of examples per category - CCategoryExamplesCollector m_ExamplesCollector; + //! Reference to the json output writer so that examples can be written + CJsonOutputWriter& m_JsonOutputWriter; - //! Which field name are we categorizing? - std::string m_CategorizationFieldName; + //! Collects up to a configurable number of examples per category + CCategoryExamplesCollector m_ExamplesCollector; - //! The categorization filter - core::CRegexFilter m_CategorizationFilter; - - //! Pointer to periodic persister that works in the background. May be - //! nullptr if this object is not responsible for starting periodic - //! persistence. - CBackgroundPersister *m_PeriodicPersister; -}; + //! Which field name are we categorizing? + std::string m_CategorizationFieldName; + //! The categorization filter + core::CRegexFilter m_CategorizationFilter; + //! Pointer to periodic persister that works in the background. May be + //! nullptr if this object is not responsible for starting periodic + //! persistence. + CBackgroundPersister* m_PeriodicPersister; +}; } } #endif // INCLUDED_ml_api_CFieldDataTyper_h - diff --git a/include/api/CForecastRunner.h b/include/api/CForecastRunner.h index 182924de67..a45e108951 100644 --- a/include/api/CForecastRunner.h +++ b/include/api/CForecastRunner.h @@ -26,20 +26,18 @@ #include #include +#include #include #include #include -#include #include #include class CForecastRunnerTest; -namespace ml -{ -namespace api -{ +namespace ml { +namespace api { //! \brief //! Forecast Worker to create forecasts of timeseries/ml models. @@ -60,204 +58,198 @@ namespace api //! pruned in the main thread. //! Cloning also happens beforehand as the forecast job might hang in //! the queue for a while -class API_EXPORT CForecastRunner final: private core::CNonCopyable -{ - public: - //! max open forecast requests - //! if you change this, also change the ERROR_TOO_MANY_JOBS message accordingly - static const size_t MAX_FORECAST_JOBS_IN_QUEUE = 3; - - //! default expiry time - static const size_t DEFAULT_EXPIRY_TIME = 14 * core::constants::DAY; - - //! max memory allowed to use for forecast models - static const size_t MAX_FORECAST_MODEL_MEMORY = 20971520; // 20MB - - //! minimum time between stat updates to prevent to many updates in a short time - static const uint64_t MINIMUM_TIME_ELAPSED_FOR_STATS_UPDATE = 3000; // 3s - - private: - static const std::string ERROR_FORECAST_REQUEST_FAILED_TO_PARSE; - static const std::string ERROR_NO_FORECAST_ID; - static const std::string ERROR_TOO_MANY_JOBS; - static const std::string ERROR_NO_MODELS; - static const std::string ERROR_NO_DATA_PROCESSED; - static const std::string ERROR_NO_CREATE_TIME; - static const std::string ERROR_BAD_MEMORY_STATUS; - static const std::string ERROR_MEMORY_LIMIT; - static const std::string ERROR_NOT_SUPPORTED_FOR_POPULATION_MODELS; - static const std::string ERROR_NO_SUPPORTED_FUNCTIONS; - static const std::string WARNING_DURATION_LIMIT; - static const std::string WARNING_INVALID_EXPIRY; - static const std::string INFO_DEFAULT_DURATION; - static const std::string INFO_DEFAULT_EXPIRY; - static const std::string INFO_NO_MODELS_CAN_CURRENTLY_BE_FORECAST; - - public: - using TOStreamConcurrentWrapper = core::CConcurrentWrapper; - using TOStreamConcurrentWrapperPtr = boost::shared_ptr; - - using TAnomalyDetectorPtr = model::CAnomalyDetector::TAnomalyDetectorPtr; - using TAnomalyDetectorPtrVec = std::vector; - - using TForecastModelWrapper = model::CForecastDataSink::SForecastModelWrapper; - using TForecastResultSeries = model::CForecastDataSink::SForecastResultSeries; - using TForecastResultSeriesVec = std::vector; - - using TStrUSet = boost::unordered_set; - - public: - - //! Initialize and start the forecast runner thread - //! \p jobId The job ID - //! \p strmOut The output stream to write forecast results to - CForecastRunner(const std::string &jobId, core::CJsonOutputStreamWrapper &strmOut, model::CResourceMonitor &resourceMonitor); +class API_EXPORT CForecastRunner final : private core::CNonCopyable { +public: + //! max open forecast requests + //! if you change this, also change the ERROR_TOO_MANY_JOBS message accordingly + static const size_t MAX_FORECAST_JOBS_IN_QUEUE = 3; + + //! default expiry time + static const size_t DEFAULT_EXPIRY_TIME = 14 * core::constants::DAY; + + //! max memory allowed to use for forecast models + static const size_t MAX_FORECAST_MODEL_MEMORY = 20971520; // 20MB + + //! minimum time between stat updates to prevent to many updates in a short time + static const uint64_t MINIMUM_TIME_ELAPSED_FOR_STATS_UPDATE = 3000; // 3s + +private: + static const std::string ERROR_FORECAST_REQUEST_FAILED_TO_PARSE; + static const std::string ERROR_NO_FORECAST_ID; + static const std::string ERROR_TOO_MANY_JOBS; + static const std::string ERROR_NO_MODELS; + static const std::string ERROR_NO_DATA_PROCESSED; + static const std::string ERROR_NO_CREATE_TIME; + static const std::string ERROR_BAD_MEMORY_STATUS; + static const std::string ERROR_MEMORY_LIMIT; + static const std::string ERROR_NOT_SUPPORTED_FOR_POPULATION_MODELS; + static const std::string ERROR_NO_SUPPORTED_FUNCTIONS; + static const std::string WARNING_DURATION_LIMIT; + static const std::string WARNING_INVALID_EXPIRY; + static const std::string INFO_DEFAULT_DURATION; + static const std::string INFO_DEFAULT_EXPIRY; + static const std::string INFO_NO_MODELS_CAN_CURRENTLY_BE_FORECAST; + +public: + using TOStreamConcurrentWrapper = core::CConcurrentWrapper; + using TOStreamConcurrentWrapperPtr = boost::shared_ptr; + + using TAnomalyDetectorPtr = model::CAnomalyDetector::TAnomalyDetectorPtr; + using TAnomalyDetectorPtrVec = std::vector; - //! Destructor, cancels all queued forecast requests, finishes a running forecast. - //! To finish all remaining forecasts call finishForecasts() first. - ~CForecastRunner(); + using TForecastModelWrapper = model::CForecastDataSink::SForecastModelWrapper; + using TForecastResultSeries = model::CForecastDataSink::SForecastResultSeries; + using TForecastResultSeriesVec = std::vector; - //! Enqueue a forecast job that will execute the requested forecast - //! - //! Parses and verifies the controlMessage and creates an internal job object which - //! contains the required detectors (reference) as well as start and end date. - //! The forecast itself isn't executed but might start later depending on the workers - //! load. - //! - //! Validation fails if the message is invalid and/or the too many jobs are in the - //! queue. - //! - //! \param controlMessage The control message retrieved. - //! \param detectors vector of detectors (shallow copy) - //! \return true if the forecast request passed validation - bool pushForecastJob(const std::string &controlMessage, - const TAnomalyDetectorPtrVec &detectors, - const core_t::TTime lastResultsTime); + using TStrUSet = boost::unordered_set; - //! Blocks and waits until all queued forecasts are done - void finishForecasts(); +public: + //! Initialize and start the forecast runner thread + //! \p jobId The job ID + //! \p strmOut The output stream to write forecast results to + CForecastRunner(const std::string& jobId, core::CJsonOutputStreamWrapper& strmOut, model::CResourceMonitor& resourceMonitor); - //! Deletes all pending forecast requests - void deleteAllForecastJobs(); + //! Destructor, cancels all queued forecast requests, finishes a running forecast. + //! To finish all remaining forecasts call finishForecasts() first. + ~CForecastRunner(); - private: - struct API_EXPORT SForecast - { - SForecast(); + //! Enqueue a forecast job that will execute the requested forecast + //! + //! Parses and verifies the controlMessage and creates an internal job object which + //! contains the required detectors (reference) as well as start and end date. + //! The forecast itself isn't executed but might start later depending on the workers + //! load. + //! + //! Validation fails if the message is invalid and/or the too many jobs are in the + //! queue. + //! + //! \param controlMessage The control message retrieved. + //! \param detectors vector of detectors (shallow copy) + //! \return true if the forecast request passed validation + bool pushForecastJob(const std::string& controlMessage, const TAnomalyDetectorPtrVec& detectors, const core_t::TTime lastResultsTime); - SForecast(SForecast &&other); - SForecast &operator=(SForecast &&other); + //! Blocks and waits until all queued forecasts are done + void finishForecasts(); - SForecast(const SForecast &that) = delete; - SForecast &operator=(const SForecast &) = delete; + //! Deletes all pending forecast requests + void deleteAllForecastJobs(); - //! reset the struct, important to e.g. clean up reference counts - void reset(); +private: + struct API_EXPORT SForecast { + SForecast(); - //! get the the end time - core_t::TTime forecastEnd() const; + SForecast(SForecast&& other); + SForecast& operator=(SForecast&& other); - //! The forecast ID - std::string s_ForecastId; + SForecast(const SForecast& that) = delete; + SForecast& operator=(const SForecast&) = delete; - //! The forecast alias - std::string s_ForecastAlias; + //! reset the struct, important to e.g. clean up reference counts + void reset(); - //! Vector of models/series selected for forecasting (cloned for forecasting) - TForecastResultSeriesVec s_ForecastSeries; + //! get the the end time + core_t::TTime forecastEnd() const; - //! Forecast create time - core_t::TTime s_CreateTime; + //! The forecast ID + std::string s_ForecastId; - //! Forecast start time - core_t::TTime s_StartTime; + //! The forecast alias + std::string s_ForecastAlias; - //! Forecast duration - core_t::TTime s_Duration; + //! Vector of models/series selected for forecasting (cloned for forecasting) + TForecastResultSeriesVec s_ForecastSeries; - //! Expiration of the forecast (for automatic deletion) - core_t::TTime s_ExpiryTime; + //! Forecast create time + core_t::TTime s_CreateTime; - //! Forecast bounds - double s_BoundsPercentile; + //! Forecast start time + core_t::TTime s_StartTime; - //! total number of models - size_t s_NumberOfModels; + //! Forecast duration + core_t::TTime s_Duration; - //! total number of models able to forecast - size_t s_NumberOfForecastableModels; + //! Expiration of the forecast (for automatic deletion) + core_t::TTime s_ExpiryTime; - //! total memory required for this forecasting job (only the models) - size_t s_MemoryUsage; + //! Forecast bounds + double s_BoundsPercentile; - //! A collection storing important messages from forecasting - TStrUSet s_Messages; - }; + //! total number of models + size_t s_NumberOfModels; - private: - using TErrorFunc = std::function; + //! total number of models able to forecast + size_t s_NumberOfForecastableModels; - private: - //! The worker loop - void forecastWorker(); + //! total memory required for this forecasting job (only the models) + size_t s_MemoryUsage; - //! Check for new jobs, blocks while waiting - bool tryGetJob(SForecast &forecastJob); + //! A collection storing important messages from forecasting + TStrUSet s_Messages; + }; - //! pushes new jobs into the internal 'queue' (thread boundary) - bool push(SForecast &forecastJob); +private: + using TErrorFunc = std::function; - //! send a scheduled message - void sendScheduledMessage(const SForecast &forecastJob) const; +private: + //! The worker loop + void forecastWorker(); - //! send an error message - void sendErrorMessage(const SForecast &forecastJob, const std::string &message) const; + //! Check for new jobs, blocks while waiting + bool tryGetJob(SForecast& forecastJob); - //! send a final message - void sendFinalMessage(const SForecast &forecastJob, const std::string &message) const; + //! pushes new jobs into the internal 'queue' (thread boundary) + bool push(SForecast& forecastJob); - //! send a message using \p write - template - void sendMessage(WRITE write, const SForecast &forecastJob, const std::string &message) const; + //! send a scheduled message + void sendScheduledMessage(const SForecast& forecastJob) const; - //! parse and validate a forecast request and turn it into a forecast job - static bool parseAndValidateForecastRequest(const std::string &controlMessage, - SForecast &forecastJob, - const core_t::TTime lastResultsTime, - const TErrorFunc &errorFunction = TErrorFunc()); + //! send an error message + void sendErrorMessage(const SForecast& forecastJob, const std::string& message) const; - private: - //! This job ID - std::string m_JobId; + //! send a final message + void sendFinalMessage(const SForecast& forecastJob, const std::string& message) const; - //! the output stream to write results to - core::CJsonOutputStreamWrapper &m_ConcurrentOutputStream; + //! send a message using \p write + template + void sendMessage(WRITE write, const SForecast& forecastJob, const std::string& message) const; - //! The resource monitor by reference (owned by CAnomalyJob) - //! note: we use the resource monitor only for checks at the moment - model::CResourceMonitor &m_ResourceMonitor; + //! parse and validate a forecast request and turn it into a forecast job + static bool parseAndValidateForecastRequest(const std::string& controlMessage, + SForecast& forecastJob, + const core_t::TTime lastResultsTime, + const TErrorFunc& errorFunction = TErrorFunc()); - //! thread for the worker - std::thread m_Worker; +private: + //! This job ID + std::string m_JobId; - //! indicator for worker - volatile bool m_Shutdown; + //! the output stream to write results to + core::CJsonOutputStreamWrapper& m_ConcurrentOutputStream; - //! The 'queue' of forecast jobs to be executed - std::list m_ForecastJobs; + //! The resource monitor by reference (owned by CAnomalyJob) + //! note: we use the resource monitor only for checks at the moment + model::CResourceMonitor& m_ResourceMonitor; - //! Mutex - std::mutex m_Mutex; + //! thread for the worker + std::thread m_Worker; - //! Condition variable for the requests queue - std::condition_variable m_WorkAvailableCondition; + //! indicator for worker + volatile bool m_Shutdown; - //! Condition variable for notifications on done requests - std::condition_variable m_WorkCompleteCondition; + //! The 'queue' of forecast jobs to be executed + std::list m_ForecastJobs; - friend class ::CForecastRunnerTest; -}; + //! Mutex + std::mutex m_Mutex; + //! Condition variable for the requests queue + std::condition_variable m_WorkAvailableCondition; + + //! Condition variable for notifications on done requests + std::condition_variable m_WorkCompleteCondition; + + friend class ::CForecastRunnerTest; +}; } } diff --git a/include/api/CHierarchicalResultsWriter.h b/include/api/CHierarchicalResultsWriter.h index 48477ba36f..d08683e658 100644 --- a/include/api/CHierarchicalResultsWriter.h +++ b/include/api/CHierarchicalResultsWriter.h @@ -20,10 +20,8 @@ #include -namespace ml -{ -namespace api -{ +namespace ml { +namespace api { //! \brief Writes out hierarchical results using a callback to write //! individual results. @@ -34,173 +32,160 @@ namespace api //! //! For each node one or more CAnomalyDetector::SResults objects are //! constructed and written by the callback supplied to the constructor. -class API_EXPORT CHierarchicalResultsWriter : public model::CHierarchicalResultsVisitor, - private core::CNonCopyable -{ - public: - using TDouble1Vec = core::CSmallVector; - using TOptionalDouble = boost::optional; - using TOptionalUInt64 = boost::optional; - - // Influencers - using TStoredStringPtrVec = std::vector; - using TStoredStringPtrStoredStringPtrPr = std::pair; - using TStoredStringPtrStoredStringPtrPrDoublePr = std::pair; - using TStoredStringPtrStoredStringPtrPrDoublePrVec = std::vector; - - using TStr1Vec = core::CSmallVector; - - public: - - enum EResultType {E_SimpleCountResult, E_PopulationResult, E_PartitionResult, E_Result}; - //! Type which wraps up the results of anomaly detection. - struct API_EXPORT SResults - { - //! Construct for population results - SResults(bool isAllTimeResult, - bool isOverallResult, - const std::string &partitionFieldName, - const std::string &partitionFieldValue, - const std::string &overFieldName, - const std::string &overFieldValue, - const std::string &byFieldName, - const std::string &byFieldValue, - const std::string &correlatedByFieldValue, - core_t::TTime bucketStartTime, - const std::string &functionName, - const std::string &functionDescription, - const TDouble1Vec &functionValue, - const TDouble1Vec &populationAverage, - double rawAnomalyScore, - double normalizedAnomalyScore, - double probability, - const TOptionalUInt64 ¤tRate, - const std::string &metricValueField, - const TStoredStringPtrStoredStringPtrPrDoublePrVec &influences, - bool useNull, - bool metric, - int identifier, - core_t::TTime bucketSpan); - - //! Construct for other results - SResults(EResultType resultType, - const std::string &partitionFieldName, - const std::string &partitionFieldValue, - const std::string &byFieldName, - const std::string &byFieldValue, - const std::string &correlatedByFieldValue, - core_t::TTime bucketStartTime, - const std::string &functionName, - const std::string &functionDescription, - const TOptionalDouble &baselineRate, - const TOptionalUInt64 ¤tRate, - const TDouble1Vec &baselineMean, - const TDouble1Vec ¤tMean, - double rawAnomalyScore, - double normalizedAnomalyScore, - double probability, - const std::string &metricValueField, - const TStoredStringPtrStoredStringPtrPrDoublePrVec &influences, - bool useNull, - bool metric, - int identifier, - core_t::TTime bucketSpan, - TStr1Vec scheduledEventDescriptions); - - EResultType s_ResultType; - bool s_IsAllTimeResult; - bool s_IsOverallResult; - bool s_UseNull; - bool s_IsMetric; - const std::string &s_PartitionFieldName; - const std::string &s_PartitionFieldValue; - const std::string &s_ByFieldName; - const std::string &s_ByFieldValue; - const std::string &s_CorrelatedByFieldValue; - const std::string &s_OverFieldName; - const std::string &s_OverFieldValue; - const std::string &s_MetricValueField; - core_t::TTime s_BucketStartTime; - core_t::TTime s_BucketSpan; - const std::string &s_FunctionName; - const std::string &s_FunctionDescription; - TDouble1Vec s_FunctionValue; - TDouble1Vec s_PopulationAverage; - TOptionalDouble s_BaselineRate; - TOptionalUInt64 s_CurrentRate; - TDouble1Vec s_BaselineMean; - TDouble1Vec s_CurrentMean; - double s_RawAnomalyScore; - double s_NormalizedAnomalyScore; - double s_Probability; - const TStoredStringPtrStoredStringPtrPrDoublePrVec &s_Influences; - int s_Identifier; - TStr1Vec s_ScheduledEventDescriptions; - }; - - public: - using TResults = SResults; - using TResultWriterFunc = std::function; - using TPivotWriterFunc = std::function; - - public: - CHierarchicalResultsWriter(const model::CLimits &limits, - const model::CAnomalyDetectorModelConfig &modelConfig, - const TResultWriterFunc &resultWriter, - const TPivotWriterFunc &pivotsWriterFunc); - - //! Write \p node. - virtual void visit(const model::CHierarchicalResults &results, - const TNode &node, - bool pivot); - - private: - //! Write out a population person result if \p node is a - //! member a population. - void writePopulationResult(const model::CHierarchicalResults &results, - const TNode &node); - - //! Write out an individual person result if \p node is - //! an individual time series result. - void writeIndividualResult(const model::CHierarchicalResults &results, - const TNode &node); - - //! Write out the pivot (influencer) result if \p node is a - //! pivot. - void writePivotResult(const model::CHierarchicalResults &results, - const TNode &node); - - //! Write partition result if \p node is a partition level result - void writePartitionResult(const model::CHierarchicalResults &results, - const TNode &node); - - //! Write out a simple count result if \p node is simple - //! count. - void writeSimpleCountResult(const TNode &node); - - //! Given a leaf node, search upwards to find the most appropriate - //! values for person and partition probability results. - static void findParentProbabilities(const TNode &node, - double &personProbability, - double &partitionProbability); - - private: - //! The various limits. - const model::CLimits &m_Limits; - - //! Config options - const model::CAnomalyDetectorModelConfig &m_ModelConfig; - - //! The results writer. - TResultWriterFunc m_ResultWriterFunc; - - //! The influencers/pivots writer - TPivotWriterFunc m_PivotWriterFunc; - - //! Remember the current bucket time for writing pivots - core_t::TTime m_BucketTime; +class API_EXPORT CHierarchicalResultsWriter : public model::CHierarchicalResultsVisitor, private core::CNonCopyable { +public: + using TDouble1Vec = core::CSmallVector; + using TOptionalDouble = boost::optional; + using TOptionalUInt64 = boost::optional; + + // Influencers + using TStoredStringPtrVec = std::vector; + using TStoredStringPtrStoredStringPtrPr = std::pair; + using TStoredStringPtrStoredStringPtrPrDoublePr = std::pair; + using TStoredStringPtrStoredStringPtrPrDoublePrVec = std::vector; + + using TStr1Vec = core::CSmallVector; + +public: + enum EResultType { E_SimpleCountResult, E_PopulationResult, E_PartitionResult, E_Result }; + //! Type which wraps up the results of anomaly detection. + struct API_EXPORT SResults { + //! Construct for population results + SResults(bool isAllTimeResult, + bool isOverallResult, + const std::string& partitionFieldName, + const std::string& partitionFieldValue, + const std::string& overFieldName, + const std::string& overFieldValue, + const std::string& byFieldName, + const std::string& byFieldValue, + const std::string& correlatedByFieldValue, + core_t::TTime bucketStartTime, + const std::string& functionName, + const std::string& functionDescription, + const TDouble1Vec& functionValue, + const TDouble1Vec& populationAverage, + double rawAnomalyScore, + double normalizedAnomalyScore, + double probability, + const TOptionalUInt64& currentRate, + const std::string& metricValueField, + const TStoredStringPtrStoredStringPtrPrDoublePrVec& influences, + bool useNull, + bool metric, + int identifier, + core_t::TTime bucketSpan); + + //! Construct for other results + SResults(EResultType resultType, + const std::string& partitionFieldName, + const std::string& partitionFieldValue, + const std::string& byFieldName, + const std::string& byFieldValue, + const std::string& correlatedByFieldValue, + core_t::TTime bucketStartTime, + const std::string& functionName, + const std::string& functionDescription, + const TOptionalDouble& baselineRate, + const TOptionalUInt64& currentRate, + const TDouble1Vec& baselineMean, + const TDouble1Vec& currentMean, + double rawAnomalyScore, + double normalizedAnomalyScore, + double probability, + const std::string& metricValueField, + const TStoredStringPtrStoredStringPtrPrDoublePrVec& influences, + bool useNull, + bool metric, + int identifier, + core_t::TTime bucketSpan, + TStr1Vec scheduledEventDescriptions); + + EResultType s_ResultType; + bool s_IsAllTimeResult; + bool s_IsOverallResult; + bool s_UseNull; + bool s_IsMetric; + const std::string& s_PartitionFieldName; + const std::string& s_PartitionFieldValue; + const std::string& s_ByFieldName; + const std::string& s_ByFieldValue; + const std::string& s_CorrelatedByFieldValue; + const std::string& s_OverFieldName; + const std::string& s_OverFieldValue; + const std::string& s_MetricValueField; + core_t::TTime s_BucketStartTime; + core_t::TTime s_BucketSpan; + const std::string& s_FunctionName; + const std::string& s_FunctionDescription; + TDouble1Vec s_FunctionValue; + TDouble1Vec s_PopulationAverage; + TOptionalDouble s_BaselineRate; + TOptionalUInt64 s_CurrentRate; + TDouble1Vec s_BaselineMean; + TDouble1Vec s_CurrentMean; + double s_RawAnomalyScore; + double s_NormalizedAnomalyScore; + double s_Probability; + const TStoredStringPtrStoredStringPtrPrDoublePrVec& s_Influences; + int s_Identifier; + TStr1Vec s_ScheduledEventDescriptions; + }; + +public: + using TResults = SResults; + using TResultWriterFunc = std::function; + using TPivotWriterFunc = std::function; + +public: + CHierarchicalResultsWriter(const model::CLimits& limits, + const model::CAnomalyDetectorModelConfig& modelConfig, + const TResultWriterFunc& resultWriter, + const TPivotWriterFunc& pivotsWriterFunc); + + //! Write \p node. + virtual void visit(const model::CHierarchicalResults& results, const TNode& node, bool pivot); + +private: + //! Write out a population person result if \p node is a + //! member a population. + void writePopulationResult(const model::CHierarchicalResults& results, const TNode& node); + + //! Write out an individual person result if \p node is + //! an individual time series result. + void writeIndividualResult(const model::CHierarchicalResults& results, const TNode& node); + + //! Write out the pivot (influencer) result if \p node is a + //! pivot. + void writePivotResult(const model::CHierarchicalResults& results, const TNode& node); + + //! Write partition result if \p node is a partition level result + void writePartitionResult(const model::CHierarchicalResults& results, const TNode& node); + + //! Write out a simple count result if \p node is simple + //! count. + void writeSimpleCountResult(const TNode& node); + + //! Given a leaf node, search upwards to find the most appropriate + //! values for person and partition probability results. + static void findParentProbabilities(const TNode& node, double& personProbability, double& partitionProbability); + +private: + //! The various limits. + const model::CLimits& m_Limits; + + //! Config options + const model::CAnomalyDetectorModelConfig& m_ModelConfig; + + //! The results writer. + TResultWriterFunc m_ResultWriterFunc; + + //! The influencers/pivots writer + TPivotWriterFunc m_PivotWriterFunc; + + //! Remember the current bucket time for writing pivots + core_t::TTime m_BucketTime; }; - } } diff --git a/include/api/CInputParser.h b/include/api/CInputParser.h index f7424c7591..2c5c892f53 100644 --- a/include/api/CInputParser.h +++ b/include/api/CInputParser.h @@ -13,17 +13,13 @@ #include #include - #include #include #include #include - -namespace ml -{ -namespace api -{ +namespace ml { +namespace api { //! \brief //! Input parser interface @@ -35,76 +31,72 @@ namespace api //! Abstract interface declares the readStream method that must be //! implemented in sub-classes. //! -class API_EXPORT CInputParser : private core::CNonCopyable -{ - public: - using TStrVec = std::vector; - using TStrVecItr = TStrVec::iterator; - using TStrVecCItr = TStrVec::const_iterator; - - using TStrStrUMap = boost::unordered_map; - using TStrStrUMapItr = TStrStrUMap::iterator; - using TStrStrUMapCItr = TStrStrUMap::const_iterator; - - //! For fast access to the field values without repeatedly computing the - //! hash, we maintain references to the values in the hash map - using TStrRef = boost::reference_wrapper; - using TStrRefVec = std::vector; - using TStrRefVecItr = TStrRefVec::iterator; - using TStrRefVecCItr = TStrRefVec::const_iterator; - - //! Callback function prototype that gets called for each record - //! read from the input stream. Return false to exit reader loop. - //! Arguments are: - //! 1) Header row fields - //! 2) Data row fields - using TReaderFunc = std::function; - - public: - CInputParser(); - virtual ~CInputParser(); - - //! Did we find the input field names? - bool gotFieldNames() const; - - //! Did we find any data in the input? - bool gotData() const; - - //! Get field names - const TStrVec &fieldNames() const; - - //! Read records from the stream. The supplied settings function is - //! called only once. The supplied reader function is called once per - //! record. If the supplied reader function returns false, reading will - //! stop. This method keeps reading until it reaches the end of the - //! stream or an error occurs. If it successfully reaches the end of - //! the stream it returns true, otherwise it returns false. If - virtual bool readStream(const TReaderFunc &readerFunc) = 0; - - protected: - //! Set the "got field names" flag - void gotFieldNames(bool gotFieldNames); - - //! Set the "got data" flag - void gotData(bool gotData); - - //! Writable access to the field names for derived classes only - TStrVec &fieldNames(); - - private: - //! Have we got the field names? - bool m_GotFieldNames; - - //! Have we found any data? - bool m_GotData; - - //! Field names parsed from the input - TStrVec m_FieldNames; +class API_EXPORT CInputParser : private core::CNonCopyable { +public: + using TStrVec = std::vector; + using TStrVecItr = TStrVec::iterator; + using TStrVecCItr = TStrVec::const_iterator; + + using TStrStrUMap = boost::unordered_map; + using TStrStrUMapItr = TStrStrUMap::iterator; + using TStrStrUMapCItr = TStrStrUMap::const_iterator; + + //! For fast access to the field values without repeatedly computing the + //! hash, we maintain references to the values in the hash map + using TStrRef = boost::reference_wrapper; + using TStrRefVec = std::vector; + using TStrRefVecItr = TStrRefVec::iterator; + using TStrRefVecCItr = TStrRefVec::const_iterator; + + //! Callback function prototype that gets called for each record + //! read from the input stream. Return false to exit reader loop. + //! Arguments are: + //! 1) Header row fields + //! 2) Data row fields + using TReaderFunc = std::function; + +public: + CInputParser(); + virtual ~CInputParser(); + + //! Did we find the input field names? + bool gotFieldNames() const; + + //! Did we find any data in the input? + bool gotData() const; + + //! Get field names + const TStrVec& fieldNames() const; + + //! Read records from the stream. The supplied settings function is + //! called only once. The supplied reader function is called once per + //! record. If the supplied reader function returns false, reading will + //! stop. This method keeps reading until it reaches the end of the + //! stream or an error occurs. If it successfully reaches the end of + //! the stream it returns true, otherwise it returns false. If + virtual bool readStream(const TReaderFunc& readerFunc) = 0; + +protected: + //! Set the "got field names" flag + void gotFieldNames(bool gotFieldNames); + + //! Set the "got data" flag + void gotData(bool gotData); + + //! Writable access to the field names for derived classes only + TStrVec& fieldNames(); + +private: + //! Have we got the field names? + bool m_GotFieldNames; + + //! Have we found any data? + bool m_GotData; + + //! Field names parsed from the input + TStrVec m_FieldNames; }; - - } } #endif // INCLUDED_ml_api_CInputParser_h - diff --git a/include/api/CIoManager.h b/include/api/CIoManager.h index b4ed490180..fcefaf899d 100644 --- a/include/api/CIoManager.h +++ b/include/api/CIoManager.h @@ -14,11 +14,8 @@ #include #include - -namespace ml -{ -namespace api -{ +namespace ml { +namespace api { //! \brief //! Manages the various IO streams of an API command. @@ -46,91 +43,87 @@ namespace api //! always required. Persist/restore streams are returned as pointers //! because some processes may not require both. //! -class API_EXPORT CIoManager : private core::CNonCopyable -{ - public: - //! Leave \p inputFileName/\p outputFileName empty to indicate - //! STDIN/STDOUT. Leave \p restoreFileName/\p persistFileName empty to - //! indicate no state restore or persist. - CIoManager(const std::string &inputFileName, - bool isInputFileNamedPipe, - const std::string &outputFileName, - bool isOutputFileNamedPipe, - const std::string &restoreFileName = std::string(), - bool isRestoreFileNamedPipe = true, - const std::string &persistFileName = std::string(), - bool isPersistFileNamedPipe = true); - - //! This will close any streams and unlink named pipes. All - //! input/output/restore/persist operations must be complete at the time - //! this object is destroyed. - ~CIoManager(); - - //! Set up the necessary streams given the constructor arguments. - bool initIo(); - - //! Get the stream to get input data from. - std::istream &inputStream(); - - //! Get the stream to write output to. - std::ostream &outputStream(); - - //! Get the stream to restore state from. If NULL then don't restore state. - core::CNamedPipeFactory::TIStreamP restoreStream(); - - //! Get the stream to persist state to. If NULL then don't persist state. - core::CNamedPipeFactory::TOStreamP persistStream(); - - private: - //! Have the streams been successfully initialised? - bool m_IoInitialised; - - //! Name of file/pipe to get input from. Empty implies STDIN. - std::string m_InputFileName; - - //! Is the input file a named pipe? - bool m_IsInputFileNamedPipe; - - //! If this object owns the input stream then a pointer to it. If - //! std::cin is being used then this will be NULL. - core::CNamedPipeFactory::TIStreamP m_InputStream; - - //! Name of file/pipe to write output to. Empty implies STDOUT. - std::string m_OutputFileName; - - //! Is the input file a named pipe? - bool m_IsOutputFileNamedPipe; - - //! If this object owns the output stream then a pointer to it. If - //! std::cout is being used then this will be NULL. - core::CNamedPipeFactory::TOStreamP m_OutputStream; - - //! Name of file/pipe to restore state from. Empty implies don't - //! restore state. - std::string m_RestoreFileName; - - //! Is the restore file a named pipe? - bool m_IsRestoreFileNamedPipe; - - //! If this object owns the restore stream then a pointer to it. A - //! NULL pointer implies state is not being restored. - core::CNamedPipeFactory::TIStreamP m_RestoreStream; - - //! Name of file/pipe to persist state to. Empty implies don't persist - //! state. - std::string m_PersistFileName; - - //! Is the persist file a named pipe? - bool m_IsPersistFileNamedPipe; - - //! If this object owns the persist stream then a pointer to it. A - //! NULL pointer implies state is not being persisted. - core::CNamedPipeFactory::TOStreamP m_PersistStream; -}; +class API_EXPORT CIoManager : private core::CNonCopyable { +public: + //! Leave \p inputFileName/\p outputFileName empty to indicate + //! STDIN/STDOUT. Leave \p restoreFileName/\p persistFileName empty to + //! indicate no state restore or persist. + CIoManager(const std::string& inputFileName, + bool isInputFileNamedPipe, + const std::string& outputFileName, + bool isOutputFileNamedPipe, + const std::string& restoreFileName = std::string(), + bool isRestoreFileNamedPipe = true, + const std::string& persistFileName = std::string(), + bool isPersistFileNamedPipe = true); + + //! This will close any streams and unlink named pipes. All + //! input/output/restore/persist operations must be complete at the time + //! this object is destroyed. + ~CIoManager(); + + //! Set up the necessary streams given the constructor arguments. + bool initIo(); + + //! Get the stream to get input data from. + std::istream& inputStream(); + + //! Get the stream to write output to. + std::ostream& outputStream(); + + //! Get the stream to restore state from. If NULL then don't restore state. + core::CNamedPipeFactory::TIStreamP restoreStream(); + + //! Get the stream to persist state to. If NULL then don't persist state. + core::CNamedPipeFactory::TOStreamP persistStream(); + +private: + //! Have the streams been successfully initialised? + bool m_IoInitialised; + + //! Name of file/pipe to get input from. Empty implies STDIN. + std::string m_InputFileName; + + //! Is the input file a named pipe? + bool m_IsInputFileNamedPipe; + //! If this object owns the input stream then a pointer to it. If + //! std::cin is being used then this will be NULL. + core::CNamedPipeFactory::TIStreamP m_InputStream; + //! Name of file/pipe to write output to. Empty implies STDOUT. + std::string m_OutputFileName; + + //! Is the input file a named pipe? + bool m_IsOutputFileNamedPipe; + + //! If this object owns the output stream then a pointer to it. If + //! std::cout is being used then this will be NULL. + core::CNamedPipeFactory::TOStreamP m_OutputStream; + + //! Name of file/pipe to restore state from. Empty implies don't + //! restore state. + std::string m_RestoreFileName; + + //! Is the restore file a named pipe? + bool m_IsRestoreFileNamedPipe; + + //! If this object owns the restore stream then a pointer to it. A + //! NULL pointer implies state is not being restored. + core::CNamedPipeFactory::TIStreamP m_RestoreStream; + + //! Name of file/pipe to persist state to. Empty implies don't persist + //! state. + std::string m_PersistFileName; + + //! Is the persist file a named pipe? + bool m_IsPersistFileNamedPipe; + + //! If this object owns the persist stream then a pointer to it. A + //! NULL pointer implies state is not being persisted. + core::CNamedPipeFactory::TOStreamP m_PersistStream; +}; } } #endif // INCLUDED_ml_api_CIoManager_h - diff --git a/include/api/CJsonOutputWriter.h b/include/api/CJsonOutputWriter.h index e5e3b14338..333a8078b3 100644 --- a/include/api/CJsonOutputWriter.h +++ b/include/api/CJsonOutputWriter.h @@ -7,9 +7,9 @@ #define INCLUDED_ml_api_CJsonOutputWriter_h #include -#include #include #include +#include #include #include @@ -31,19 +31,15 @@ #include #include - -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { class CHierarchicalResultsNormalizer; } -namespace core -{ -template class CScopedRapidJsonPoolAllocator; +namespace core { +template +class CScopedRapidJsonPoolAllocator; } -namespace api -{ +namespace api { //! \brief //! Write output data in JSON format @@ -99,254 +95,231 @@ namespace api //! re-normalisation of previous results using the normalize //! process, so it's best that this doesn't happen too often.) //! -class API_EXPORT CJsonOutputWriter : public COutputHandler -{ - public: - using TDocumentPtr = boost::shared_ptr; - using TDocumentWeakPtr = boost::weak_ptr; - using TDocumentWeakPtrVec = std::vector; - using TDocumentWeakPtrVecItr = TDocumentWeakPtrVec::iterator; - using TDocumentWeakPtrVecCItr = TDocumentWeakPtrVec::const_iterator; - - using TDocumentWeakPtrIntPr = std::pair; - using TDocumentWeakPtrIntPrVec = std::vector; - using TDocumentWeakPtrIntPrVecItr = TDocumentWeakPtrIntPrVec::iterator; - using TStrDocumentPtrVecMap = std::map; - - using TStrVec = std::vector; - using TStr1Vec = core::CSmallVector; - using TTimeVec = std::vector; - using TDoubleVec = std::vector; - using TDoubleDoublePr = std::pair; - using TDoubleDoublePrVec = std::vector; - using TDoubleDoubleDoublePrPr = std::pair; - using TDoubleDoubleDoublePrPrVec = std::vector; - using TStringDoublePr = std::pair; - using TStringDoublePrVec = std::vector; - - using TValuePtr = boost::shared_ptr; - - //! Structure to buffer up information about each bucket that we have - //! unwritten results for - struct SBucketData - { - SBucketData(); - - //! The max normalized anomaly score of the bucket influencers - double s_MaxBucketInfluencerNormalizedAnomalyScore; - - //! Count of input events for the bucket - size_t s_InputEventCount; - - //! Count of result records in the bucket for which results are - //! being built up - size_t s_RecordCount; - - //! The bucketspan of this bucket - core_t::TTime s_BucketSpan; - - //! The result record documents to be written, in a vector keyed on - //! detector index - TDocumentWeakPtrIntPrVec s_DocumentsToWrite; - - //! Bucket Influencer documents - TDocumentWeakPtrVec s_BucketInfluencerDocuments; - - //! Influencer documents - TDocumentWeakPtrVec s_InfluencerDocuments; - - // The highest probability of all the records stored - // in the s_DocumentsToWrite array. Used for filtering - // new records with a higher probability - double s_HighestProbability; - - // Used for filtering new influencers - // when the number to write is limited - double s_LowestInfluencerScore; - - // Used for filtering new bucket influencers - // when the number to write is limited - double s_LowestBucketInfluencerScore; - - //! Partition scores - TDocumentWeakPtrVec s_PartitionScoreDocuments; - - //! scheduled event descriptions - TStr1Vec s_ScheduledEventDescriptions; - }; - - using TTimeBucketDataMap = std::map; - using TTimeBucketDataMapItr = TTimeBucketDataMap::iterator; - using TTimeBucketDataMapCItr = TTimeBucketDataMap::const_iterator; - - private: - using TStrSet = CCategoryExamplesCollector::TStrSet; - using TStrSetCItr = TStrSet::const_iterator; - - public: - //! Constructor that causes output to be written to the specified wrapped stream - CJsonOutputWriter(const std::string &jobId, - core::CJsonOutputStreamWrapper &strmOut); - - //! Destructor flushes the stream - virtual ~CJsonOutputWriter(); - - //! Set field names. In this class this function has no effect and it - //! always returns true - virtual bool fieldNames(const TStrVec &fieldNames, - const TStrVec &extraFieldNames); - - // Bring the other overload of fieldNames() into scope - using COutputHandler::fieldNames; - - //! Returns an empty vector - virtual const TStrVec &fieldNames() const; - - //! Write the data row fields as a JSON object - virtual bool writeRow(const TStrStrUMap &dataRowFields, - const TStrStrUMap &overrideDataRowFields); - - //! Limit the output to the top count anomalous records and influencers. - //! Each detector will write no more than count records and influencers - //! per bucket (i.e. a max of N records, N influencers and N bucket - //! influencers). - //! The bucket time influencer does not add to this count but only - //! if it is added after all the other bucket influencers - void limitNumberRecords(size_t count); - - //! A value of 0 indicates no limit has been set - size_t limitNumberRecords() const; - - //! Close the JSON structures and flush output. - //! This method should only be called once and will have no affect - //! on subsequent invocations - virtual void finalise(); - - //! Receive a count of possible results - void possibleResultCount(core_t::TTime time, size_t count); - - //! Accept a result from the anomaly detector - //! Virtual for testing mocks - virtual bool acceptResult(const CHierarchicalResultsWriter::TResults &results); - - //! Accept the influencer - bool acceptInfluencer(core_t::TTime time, - const model::CHierarchicalResults::TNode &node, - bool isBucketInfluencer); - - //! Creates a time bucket influencer. - //! If limitNumberRecords is set add this influencer after all other influencers - //! have been added otherwise it may be filtered out if its anomaly score is lower - //! than the others. - //! Only one per bucket is expected, this does not add to the influencer - //! count if limitNumberRecords is used - virtual void acceptBucketTimeInfluencer(core_t::TTime time, - double probability, - double rawAnomalyScore, - double normalizedAnomalyScore); - - //! This method must be called after all the results for a given bucket - //! are available. It triggers the writing of the results. - bool endOutputBatch(bool isInterim, uint64_t bucketProcessingTime); - - //! Report the current levels of resource usage, as given to us - //! from the CResourceMonitor via a callback - void reportMemoryUsage(const model::CResourceMonitor::SResults &results); - - //! Acknowledge a flush request by echoing back the flush ID - void acknowledgeFlush(const std::string &flushId, core_t::TTime lastFinalizedBucketEnd); - - //! Write a category definition - void writeCategoryDefinition(int categoryId, - const std::string &terms, - const std::string ®ex, - std::size_t maxMatchingFieldLength, - const TStrSet &examples); - - //! Persist a normalizer by writing its state to the output - void persistNormalizer(const model::CHierarchicalResultsNormalizer &normalizer, - core_t::TTime &persistTime); - - private: - template friend class core::CScopedRapidJsonPoolAllocator; - // hooks for the CScopedRapidJsonPoolAllocator interface - - //! use a new allocator for JSON output processing - //! \p allocatorName A unique identifier for the allocator - void pushAllocator(const std::string &allocatorName); - - //! revert to using the previous allocator for JSON output processing - void popAllocator(); - - private: - //! Write out all the JSON documents that have been built up for - //! a particular bucket - void writeBucket(bool isInterim, - core_t::TTime bucketTime, - SBucketData &bucketData, - uint64_t bucketProcessingTime); - - //! Add the fields for a metric detector - void addMetricFields(const CHierarchicalResultsWriter::TResults &results, - TDocumentWeakPtr weakDoc); - - //! Write the fields for a population detector - void addPopulationFields(const CHierarchicalResultsWriter::TResults &results, - TDocumentWeakPtr weakDoc); - - //! Write the fields for a population detector cause - void addPopulationCauseFields(const CHierarchicalResultsWriter::TResults &results, - TDocumentWeakPtr weakDoc); - - //! Write the fields for an event rate detector - void addEventRateFields(const CHierarchicalResultsWriter::TResults &results, - TDocumentWeakPtr weakDoc); - - //! Add the influencer fields to the doc - void addInfluencerFields(bool isBucketInfluencer, - const model::CHierarchicalResults::TNode &node, - TDocumentWeakPtr weakDoc); - - //! Write the influence results. - void addInfluences(const CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPrDoublePrVec &influenceResults, - TDocumentWeakPtr weakDoc); - - //! Write partition score & probability - void addPartitionScores(const CHierarchicalResultsWriter::TResults &results, - TDocumentWeakPtr weakDoc); - - private: - //! The job ID - std::string m_JobId; - - //! JSON line writer - core::CRapidJsonConcurrentLineWriter m_Writer; - - //! Time of last non-interim bucket written to output - core_t::TTime m_LastNonInterimBucketTime; - - //! Has the output been finalised? - bool m_Finalised; - - //! Max number of records to write for each bucket/detector - size_t m_RecordOutputLimit; - - //! Vector for building up documents representing nested sub-results. - //! The documents in this vector will reference memory owned by - //! m_JsonPoolAllocator. (Hence this is declared after the memory pool - //! so that it's destroyed first when the destructor runs.) - TDocumentWeakPtrVec m_NestedDocs; - - //! Bucket data waiting to be written. The map is keyed on bucket time. - //! The documents in this map will reference memory owned by - //! m_JsonPoolAllocator. (Hence this is declared after the memory pool - //! so that it's destroyed first when the destructor runs.) - TTimeBucketDataMap m_BucketDataByTime; -}; +class API_EXPORT CJsonOutputWriter : public COutputHandler { +public: + using TDocumentPtr = boost::shared_ptr; + using TDocumentWeakPtr = boost::weak_ptr; + using TDocumentWeakPtrVec = std::vector; + using TDocumentWeakPtrVecItr = TDocumentWeakPtrVec::iterator; + using TDocumentWeakPtrVecCItr = TDocumentWeakPtrVec::const_iterator; + + using TDocumentWeakPtrIntPr = std::pair; + using TDocumentWeakPtrIntPrVec = std::vector; + using TDocumentWeakPtrIntPrVecItr = TDocumentWeakPtrIntPrVec::iterator; + using TStrDocumentPtrVecMap = std::map; + + using TStrVec = std::vector; + using TStr1Vec = core::CSmallVector; + using TTimeVec = std::vector; + using TDoubleVec = std::vector; + using TDoubleDoublePr = std::pair; + using TDoubleDoublePrVec = std::vector; + using TDoubleDoubleDoublePrPr = std::pair; + using TDoubleDoubleDoublePrPrVec = std::vector; + using TStringDoublePr = std::pair; + using TStringDoublePrVec = std::vector; + + using TValuePtr = boost::shared_ptr; + + //! Structure to buffer up information about each bucket that we have + //! unwritten results for + struct SBucketData { + SBucketData(); + + //! The max normalized anomaly score of the bucket influencers + double s_MaxBucketInfluencerNormalizedAnomalyScore; + + //! Count of input events for the bucket + size_t s_InputEventCount; + + //! Count of result records in the bucket for which results are + //! being built up + size_t s_RecordCount; + + //! The bucketspan of this bucket + core_t::TTime s_BucketSpan; + + //! The result record documents to be written, in a vector keyed on + //! detector index + TDocumentWeakPtrIntPrVec s_DocumentsToWrite; + + //! Bucket Influencer documents + TDocumentWeakPtrVec s_BucketInfluencerDocuments; + + //! Influencer documents + TDocumentWeakPtrVec s_InfluencerDocuments; + + // The highest probability of all the records stored + // in the s_DocumentsToWrite array. Used for filtering + // new records with a higher probability + double s_HighestProbability; + + // Used for filtering new influencers + // when the number to write is limited + double s_LowestInfluencerScore; + + // Used for filtering new bucket influencers + // when the number to write is limited + double s_LowestBucketInfluencerScore; + + //! Partition scores + TDocumentWeakPtrVec s_PartitionScoreDocuments; + + //! scheduled event descriptions + TStr1Vec s_ScheduledEventDescriptions; + }; + + using TTimeBucketDataMap = std::map; + using TTimeBucketDataMapItr = TTimeBucketDataMap::iterator; + using TTimeBucketDataMapCItr = TTimeBucketDataMap::const_iterator; + +private: + using TStrSet = CCategoryExamplesCollector::TStrSet; + using TStrSetCItr = TStrSet::const_iterator; + +public: + //! Constructor that causes output to be written to the specified wrapped stream + CJsonOutputWriter(const std::string& jobId, core::CJsonOutputStreamWrapper& strmOut); + + //! Destructor flushes the stream + virtual ~CJsonOutputWriter(); + + //! Set field names. In this class this function has no effect and it + //! always returns true + virtual bool fieldNames(const TStrVec& fieldNames, const TStrVec& extraFieldNames); + + // Bring the other overload of fieldNames() into scope + using COutputHandler::fieldNames; + + //! Returns an empty vector + virtual const TStrVec& fieldNames() const; + + //! Write the data row fields as a JSON object + virtual bool writeRow(const TStrStrUMap& dataRowFields, const TStrStrUMap& overrideDataRowFields); + + //! Limit the output to the top count anomalous records and influencers. + //! Each detector will write no more than count records and influencers + //! per bucket (i.e. a max of N records, N influencers and N bucket + //! influencers). + //! The bucket time influencer does not add to this count but only + //! if it is added after all the other bucket influencers + void limitNumberRecords(size_t count); + + //! A value of 0 indicates no limit has been set + size_t limitNumberRecords() const; + + //! Close the JSON structures and flush output. + //! This method should only be called once and will have no affect + //! on subsequent invocations + virtual void finalise(); + + //! Receive a count of possible results + void possibleResultCount(core_t::TTime time, size_t count); + + //! Accept a result from the anomaly detector + //! Virtual for testing mocks + virtual bool acceptResult(const CHierarchicalResultsWriter::TResults& results); + + //! Accept the influencer + bool acceptInfluencer(core_t::TTime time, const model::CHierarchicalResults::TNode& node, bool isBucketInfluencer); + + //! Creates a time bucket influencer. + //! If limitNumberRecords is set add this influencer after all other influencers + //! have been added otherwise it may be filtered out if its anomaly score is lower + //! than the others. + //! Only one per bucket is expected, this does not add to the influencer + //! count if limitNumberRecords is used + virtual void acceptBucketTimeInfluencer(core_t::TTime time, double probability, double rawAnomalyScore, double normalizedAnomalyScore); + + //! This method must be called after all the results for a given bucket + //! are available. It triggers the writing of the results. + bool endOutputBatch(bool isInterim, uint64_t bucketProcessingTime); + + //! Report the current levels of resource usage, as given to us + //! from the CResourceMonitor via a callback + void reportMemoryUsage(const model::CResourceMonitor::SResults& results); + + //! Acknowledge a flush request by echoing back the flush ID + void acknowledgeFlush(const std::string& flushId, core_t::TTime lastFinalizedBucketEnd); + + //! Write a category definition + void writeCategoryDefinition(int categoryId, + const std::string& terms, + const std::string& regex, + std::size_t maxMatchingFieldLength, + const TStrSet& examples); + + //! Persist a normalizer by writing its state to the output + void persistNormalizer(const model::CHierarchicalResultsNormalizer& normalizer, core_t::TTime& persistTime); + +private: + template + friend class core::CScopedRapidJsonPoolAllocator; + // hooks for the CScopedRapidJsonPoolAllocator interface + //! use a new allocator for JSON output processing + //! \p allocatorName A unique identifier for the allocator + void pushAllocator(const std::string& allocatorName); + //! revert to using the previous allocator for JSON output processing + void popAllocator(); + +private: + //! Write out all the JSON documents that have been built up for + //! a particular bucket + void writeBucket(bool isInterim, core_t::TTime bucketTime, SBucketData& bucketData, uint64_t bucketProcessingTime); + + //! Add the fields for a metric detector + void addMetricFields(const CHierarchicalResultsWriter::TResults& results, TDocumentWeakPtr weakDoc); + + //! Write the fields for a population detector + void addPopulationFields(const CHierarchicalResultsWriter::TResults& results, TDocumentWeakPtr weakDoc); + + //! Write the fields for a population detector cause + void addPopulationCauseFields(const CHierarchicalResultsWriter::TResults& results, TDocumentWeakPtr weakDoc); + + //! Write the fields for an event rate detector + void addEventRateFields(const CHierarchicalResultsWriter::TResults& results, TDocumentWeakPtr weakDoc); + + //! Add the influencer fields to the doc + void addInfluencerFields(bool isBucketInfluencer, const model::CHierarchicalResults::TNode& node, TDocumentWeakPtr weakDoc); + + //! Write the influence results. + void addInfluences(const CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPrDoublePrVec& influenceResults, + TDocumentWeakPtr weakDoc); + + //! Write partition score & probability + void addPartitionScores(const CHierarchicalResultsWriter::TResults& results, TDocumentWeakPtr weakDoc); + +private: + //! The job ID + std::string m_JobId; + + //! JSON line writer + core::CRapidJsonConcurrentLineWriter m_Writer; + + //! Time of last non-interim bucket written to output + core_t::TTime m_LastNonInterimBucketTime; + + //! Has the output been finalised? + bool m_Finalised; + + //! Max number of records to write for each bucket/detector + size_t m_RecordOutputLimit; + + //! Vector for building up documents representing nested sub-results. + //! The documents in this vector will reference memory owned by + //! m_JsonPoolAllocator. (Hence this is declared after the memory pool + //! so that it's destroyed first when the destructor runs.) + TDocumentWeakPtrVec m_NestedDocs; + + //! Bucket data waiting to be written. The map is keyed on bucket time. + //! The documents in this map will reference memory owned by + //! m_JsonPoolAllocator. (Hence this is declared after the memory pool + //! so that it's destroyed first when the destructor runs.) + TTimeBucketDataMap m_BucketDataByTime; +}; } } #endif // INCLUDED_ml_api_CJsonOutputWriter_h - diff --git a/include/api/CLengthEncodedInputParser.h b/include/api/CLengthEncodedInputParser.h index bfb7f8d3d3..59812e35ea 100644 --- a/include/api/CLengthEncodedInputParser.h +++ b/include/api/CLengthEncodedInputParser.h @@ -6,7 +6,6 @@ #ifndef INCLUDED_ml_api_CLengthEncodedInputParser_h #define INCLUDED_ml_api_CLengthEncodedInputParser_h - #include #include @@ -17,11 +16,8 @@ #include - -namespace ml -{ -namespace api -{ +namespace ml { +namespace api { //! \brief //! Parse the length encoded input data @@ -75,79 +71,74 @@ namespace api //! interfacing with Java (which doesn't have built-in unsigned //! types) easier. //! -class API_EXPORT CLengthEncodedInputParser : public CInputParser -{ - public: - //! Construct with an input stream to be parsed. Once a stream is - //! passed to this constructor, no other object should read from it. - //! For example, if std::cin is passed, no other object should read from - //! std::cin, otherwise unpredictable and incorrect results will be - //! generated. - //! - //! The stream passed should have been created in binary mode, i.e. with - //! the std::ios::binary flag as part of its constructor's openmode - //! argument. Otherwise, on Windows, a CTRL+Z in the input stream will - //! be considered as end-of-file. The exception is when std::cin is the - //! input stream, in which case this constructor will set the standard - //! input of the whole process to binary mode (because it's not possible - //! to do this for an already opened stream and std::cin will be open - //! before main() runs). - CLengthEncodedInputParser(std::istream &strmIn); - - //! Read records from the stream. The supplied reader function is called - //! once per record. If the supplied reader function returns false, - //! reading will stop. This method keeps reading until it reaches the - //! end of the stream or an error occurs. If it successfully reaches - //! the end of the stream it returns true, otherwise it returns false. - virtual bool readStream(const TReaderFunc &readerFunc); - - private: - //! Attempt to parse a single length encoded record from the stream into - //! the strings in the vector provided. The vector is a template - //! argument so that it may be a vector of boost::reference_wrappers - //! of std::strings instead of std::strings. The first template - //! argument indicates whether the vector must have the correct size - //! when the function is called or whether the function is allowed to - //! resize it. - template - bool parseRecordFromStream(STR_VEC &results); - - //! Parse a 32 bit unsigned integer from the input stream. - bool parseUInt32FromStream(uint32_t &num); - - //! Parse a string of given length from the input stream. - bool parseStringFromStream(size_t length, - std::string &str); - - //! Refill the working buffer from the stream - size_t refillBuffer(); - - private: - //! Allocate this much memory for the working buffer - static const size_t WORK_BUFFER_SIZE; - - //! Reference to the stream we're going to read from - std::istream &m_StrmIn; - - using TScopedCharArray = boost::scoped_array; - - //! The working buffer is also held as a member to avoid constantly - //! reallocating it. It is a raw character array rather than a string - //! to facilitate the use of std::istream::read() to obtain input. - //! std::istream::read() uses memcpy() to shuffle data around on all - //! platforms, and is hence an order of magnitude faster than reading - //! small chunks of data from the stream repeatedly. The array of - //! characters is NOT zero terminated, which is something to be aware of - //! when accessing it. - TScopedCharArray m_WorkBuffer; - const char *m_WorkBufferPtr; - const char *m_WorkBufferEnd; - bool m_NoMoreRecords; +class API_EXPORT CLengthEncodedInputParser : public CInputParser { +public: + //! Construct with an input stream to be parsed. Once a stream is + //! passed to this constructor, no other object should read from it. + //! For example, if std::cin is passed, no other object should read from + //! std::cin, otherwise unpredictable and incorrect results will be + //! generated. + //! + //! The stream passed should have been created in binary mode, i.e. with + //! the std::ios::binary flag as part of its constructor's openmode + //! argument. Otherwise, on Windows, a CTRL+Z in the input stream will + //! be considered as end-of-file. The exception is when std::cin is the + //! input stream, in which case this constructor will set the standard + //! input of the whole process to binary mode (because it's not possible + //! to do this for an already opened stream and std::cin will be open + //! before main() runs). + CLengthEncodedInputParser(std::istream& strmIn); + + //! Read records from the stream. The supplied reader function is called + //! once per record. If the supplied reader function returns false, + //! reading will stop. This method keeps reading until it reaches the + //! end of the stream or an error occurs. If it successfully reaches + //! the end of the stream it returns true, otherwise it returns false. + virtual bool readStream(const TReaderFunc& readerFunc); + +private: + //! Attempt to parse a single length encoded record from the stream into + //! the strings in the vector provided. The vector is a template + //! argument so that it may be a vector of boost::reference_wrappers + //! of std::strings instead of std::strings. The first template + //! argument indicates whether the vector must have the correct size + //! when the function is called or whether the function is allowed to + //! resize it. + template + bool parseRecordFromStream(STR_VEC& results); + + //! Parse a 32 bit unsigned integer from the input stream. + bool parseUInt32FromStream(uint32_t& num); + + //! Parse a string of given length from the input stream. + bool parseStringFromStream(size_t length, std::string& str); + + //! Refill the working buffer from the stream + size_t refillBuffer(); + +private: + //! Allocate this much memory for the working buffer + static const size_t WORK_BUFFER_SIZE; + + //! Reference to the stream we're going to read from + std::istream& m_StrmIn; + + using TScopedCharArray = boost::scoped_array; + + //! The working buffer is also held as a member to avoid constantly + //! reallocating it. It is a raw character array rather than a string + //! to facilitate the use of std::istream::read() to obtain input. + //! std::istream::read() uses memcpy() to shuffle data around on all + //! platforms, and is hence an order of magnitude faster than reading + //! small chunks of data from the stream repeatedly. The array of + //! characters is NOT zero terminated, which is something to be aware of + //! when accessing it. + TScopedCharArray m_WorkBuffer; + const char* m_WorkBufferPtr; + const char* m_WorkBufferEnd; + bool m_NoMoreRecords; }; - - } } #endif // INCLUDED_ml_api_CLengthEncodedInputParser_h - diff --git a/include/api/CLineifiedInputParser.h b/include/api/CLineifiedInputParser.h index 9cf3fd2a50..710e4d3d26 100644 --- a/include/api/CLineifiedInputParser.h +++ b/include/api/CLineifiedInputParser.h @@ -14,11 +14,8 @@ #include #include - -namespace ml -{ -namespace api -{ +namespace ml { +namespace api { //! \brief //! Base class to parse lines containing nested documents from a stream. @@ -34,65 +31,61 @@ namespace api //! The original use case was to factor out commonality from lineified //! JSON and XML parsers. //! -class API_EXPORT CLineifiedInputParser : public CInputParser -{ - public: - //! Construct with an input stream to be parsed. Once a stream is - //! passed to this constructor, no other object should read from it. - //! For example, if std::cin is passed, no other object should read from - //! std::cin, otherwise unpredictable and incorrect results will be - //! generated. - CLineifiedInputParser(std::istream &strmIn); - - protected: - //! Line end character - static const char LINE_END; - - using TCharPSizePr = std::pair; - - protected: - //! Return a pointer to the start of the next line and its length, - //! reading extra data from the stream if required. The pair (NULL, 0) - //! will be returned if no further data is available. The newline - //! character at the end of the line is replaced with a zero terminator - //! byte so that the line can later be parsed in-situ by a library - //! that expects a zero-terminated string. - TCharPSizePr parseLine(); - - //! Reset the work buffer to empty. This should be called if the stream - //! that data is being read from might have had its stream buffer - //! changed. - void resetBuffer(); - - private: - //! Allocate this much memory for the working buffer - static const size_t WORK_BUFFER_SIZE; - - //! Reference to the stream we're going to read from - std::istream &m_StrmIn; - - using TScopedCharArray = boost::scoped_array; - - //! The working buffer is a raw character array rather than a string to - //! facilitate the use of std::istream::read() to obtain input rather - //! than std::getline(). std::getline() is efficient in the GNU STL but - //! sadly not in the Microsoft or Apache STLs, where it copies one - //! character at a time. std::istream::read() uses memcpy() to shuffle - //! data around on all platforms, and is hence an order of magnitude - //! faster. (This is the sort of optimisation to be used ONLY after - //! careful profiling in the rare cases where the reduction in code - //! clarity yields a large performance benefit.) The array of - //! characters is NOT zero terminated, which is something to be aware of - //! when accessing it. - TScopedCharArray m_WorkBuffer; - size_t m_WorkBufferCapacity; - char *m_WorkBufferPtr; - char *m_WorkBufferEnd; +class API_EXPORT CLineifiedInputParser : public CInputParser { +public: + //! Construct with an input stream to be parsed. Once a stream is + //! passed to this constructor, no other object should read from it. + //! For example, if std::cin is passed, no other object should read from + //! std::cin, otherwise unpredictable and incorrect results will be + //! generated. + CLineifiedInputParser(std::istream& strmIn); + +protected: + //! Line end character + static const char LINE_END; + + using TCharPSizePr = std::pair; + +protected: + //! Return a pointer to the start of the next line and its length, + //! reading extra data from the stream if required. The pair (NULL, 0) + //! will be returned if no further data is available. The newline + //! character at the end of the line is replaced with a zero terminator + //! byte so that the line can later be parsed in-situ by a library + //! that expects a zero-terminated string. + TCharPSizePr parseLine(); + + //! Reset the work buffer to empty. This should be called if the stream + //! that data is being read from might have had its stream buffer + //! changed. + void resetBuffer(); + +private: + //! Allocate this much memory for the working buffer + static const size_t WORK_BUFFER_SIZE; + + //! Reference to the stream we're going to read from + std::istream& m_StrmIn; + + using TScopedCharArray = boost::scoped_array; + + //! The working buffer is a raw character array rather than a string to + //! facilitate the use of std::istream::read() to obtain input rather + //! than std::getline(). std::getline() is efficient in the GNU STL but + //! sadly not in the Microsoft or Apache STLs, where it copies one + //! character at a time. std::istream::read() uses memcpy() to shuffle + //! data around on all platforms, and is hence an order of magnitude + //! faster. (This is the sort of optimisation to be used ONLY after + //! careful profiling in the rare cases where the reduction in code + //! clarity yields a large performance benefit.) The array of + //! characters is NOT zero terminated, which is something to be aware of + //! when accessing it. + TScopedCharArray m_WorkBuffer; + size_t m_WorkBufferCapacity; + char* m_WorkBufferPtr; + char* m_WorkBufferEnd; }; - - } } #endif // INCLUDED_ml_api_CLineifiedJsonInputParser_h - diff --git a/include/api/CLineifiedJsonInputParser.h b/include/api/CLineifiedJsonInputParser.h index baab9b3095..bf9808a89e 100644 --- a/include/api/CLineifiedJsonInputParser.h +++ b/include/api/CLineifiedJsonInputParser.h @@ -14,11 +14,8 @@ #include #include - -namespace ml -{ -namespace api -{ +namespace ml { +namespace api { //! \brief //! Parse JSON input where each line is a separate JSON document @@ -37,47 +34,39 @@ namespace api //! Using the RapidJson library to do the heavy lifting, but copying output //! to standard STL/Boost data structures. //! -class API_EXPORT CLineifiedJsonInputParser : public CLineifiedInputParser -{ - public: - //! Construct with an input stream to be parsed. Once a stream is - //! passed to this constructor, no other object should read from it. - //! For example, if std::cin is passed, no other object should read from - //! std::cin, otherwise unpredictable and incorrect results will be - //! generated. - CLineifiedJsonInputParser(std::istream &strmIn, - bool allDocsSameStructure = false); +class API_EXPORT CLineifiedJsonInputParser : public CLineifiedInputParser { +public: + //! Construct with an input stream to be parsed. Once a stream is + //! passed to this constructor, no other object should read from it. + //! For example, if std::cin is passed, no other object should read from + //! std::cin, otherwise unpredictable and incorrect results will be + //! generated. + CLineifiedJsonInputParser(std::istream& strmIn, bool allDocsSameStructure = false); - //! Read records from the stream. The supplied reader function is called - //! once per record. If the supplied reader function returns false, - //! reading will stop. This method keeps reading until it reaches the - //! end of the stream or an error occurs. If it successfully reaches - //! the end of the stream it returns true, otherwise it returns false. - virtual bool readStream(const TReaderFunc &readerFunc); + //! Read records from the stream. The supplied reader function is called + //! once per record. If the supplied reader function returns false, + //! reading will stop. This method keeps reading until it reaches the + //! end of the stream or an error occurs. If it successfully reaches + //! the end of the stream it returns true, otherwise it returns false. + virtual bool readStream(const TReaderFunc& readerFunc); - private: - //! Attempt to parse the current working record into data fields. - bool parseDocument(char *begin, - rapidjson::Document &document); +private: + //! Attempt to parse the current working record into data fields. + bool parseDocument(char* begin, rapidjson::Document& document); - bool decodeDocumentWithCommonFields(const rapidjson::Document &document, - TStrVec &fieldNames, - TStrRefVec &fieldValRefs, - TStrStrUMap &recordFields); + bool decodeDocumentWithCommonFields(const rapidjson::Document& document, + TStrVec& fieldNames, + TStrRefVec& fieldValRefs, + TStrStrUMap& recordFields); - bool decodeDocumentWithArbitraryFields(const rapidjson::Document &document, - TStrVec &fieldNames, - TStrStrUMap &recordFields); + bool decodeDocumentWithArbitraryFields(const rapidjson::Document& document, TStrVec& fieldNames, TStrStrUMap& recordFields); - private: - //! Are all JSON documents expected to contain the same fields in the - //! same order? - bool m_AllDocsSameStructure; +private: + //! Are all JSON documents expected to contain the same fields in the + //! same order? + bool m_AllDocsSameStructure; }; - - } } #endif // INCLUDED_ml_api_CLineifiedJsonInputParser_h - diff --git a/include/api/CLineifiedJsonOutputWriter.h b/include/api/CLineifiedJsonOutputWriter.h index 96e9c1eee2..87ca6e8732 100644 --- a/include/api/CLineifiedJsonOutputWriter.h +++ b/include/api/CLineifiedJsonOutputWriter.h @@ -19,11 +19,8 @@ #include #include - -namespace ml -{ -namespace api -{ +namespace ml { +namespace api { //! \brief //! Write output data in JSON format, one document per line @@ -37,80 +34,72 @@ namespace api //! IMPLEMENTATION:\n //! Using RapidJson to do the heavy lifting. //! -class API_EXPORT CLineifiedJsonOutputWriter : public COutputHandler -{ - public: - using TStrSet = std::set; +class API_EXPORT CLineifiedJsonOutputWriter : public COutputHandler { +public: + using TStrSet = std::set; - public: - //! Constructor that causes output to be written to the internal string - //! stream - CLineifiedJsonOutputWriter(); +public: + //! Constructor that causes output to be written to the internal string + //! stream + CLineifiedJsonOutputWriter(); - //! Constructor that causes output to be written to the internal string - //! stream, with some numeric fields - CLineifiedJsonOutputWriter(const TStrSet &numericFields); + //! Constructor that causes output to be written to the internal string + //! stream, with some numeric fields + CLineifiedJsonOutputWriter(const TStrSet& numericFields); - //! Constructor that causes output to be written to the specified stream - CLineifiedJsonOutputWriter(std::ostream &strmOut); + //! Constructor that causes output to be written to the specified stream + CLineifiedJsonOutputWriter(std::ostream& strmOut); - //! Constructor that causes output to be written to the specified stream - CLineifiedJsonOutputWriter(const TStrSet &numericFields, std::ostream &strmOut); + //! Constructor that causes output to be written to the specified stream + CLineifiedJsonOutputWriter(const TStrSet& numericFields, std::ostream& strmOut); - //! Destructor flushes the stream - virtual ~CLineifiedJsonOutputWriter(); + //! Destructor flushes the stream + virtual ~CLineifiedJsonOutputWriter(); - //! Set field names - this function has no affect it always - //! returns true - virtual bool fieldNames(const TStrVec &fieldNames, - const TStrVec &extraFieldNames); + //! Set field names - this function has no affect it always + //! returns true + virtual bool fieldNames(const TStrVec& fieldNames, const TStrVec& extraFieldNames); - //! Returns an empty vector - virtual const TStrVec &fieldNames() const; + //! Returns an empty vector + virtual const TStrVec& fieldNames() const; - // Bring the other overload of fieldNames() into scope - using COutputHandler::fieldNames; + // Bring the other overload of fieldNames() into scope + using COutputHandler::fieldNames; - //! Write the data row fields as a JSON object - virtual bool writeRow(const TStrStrUMap &dataRowFields, - const TStrStrUMap &overrideDataRowFields); + //! Write the data row fields as a JSON object + virtual bool writeRow(const TStrStrUMap& dataRowFields, const TStrStrUMap& overrideDataRowFields); - // Bring the other overload of writeRow() into scope - using COutputHandler::writeRow; + // Bring the other overload of writeRow() into scope + using COutputHandler::writeRow; - //! Get the contents of the internal string stream - for use with the - //! zero argument constructor - std::string internalString() const; + //! Get the contents of the internal string stream - for use with the + //! zero argument constructor + std::string internalString() const; - private: - //! Write a single field to the document - void writeField(const std::string &name, - const std::string &value, - rapidjson::Document &doc) const; +private: + //! Write a single field to the document + void writeField(const std::string& name, const std::string& value, rapidjson::Document& doc) const; - private: - //! Which output fields are numeric? - TStrSet m_NumericFields; +private: + //! Which output fields are numeric? + TStrSet m_NumericFields; - //! If we've been initialised without a specific stream, output is - //! written to this string stream - std::ostringstream m_StringOutputBuf; + //! If we've been initialised without a specific stream, output is + //! written to this string stream + std::ostringstream m_StringOutputBuf; - //! Reference to the stream we're going to write to - std::ostream &m_OutStream; + //! Reference to the stream we're going to write to + std::ostream& m_OutStream; - //! JSON writer ostream wrapper - rapidjson::OStreamWrapper m_WriteStream; + //! JSON writer ostream wrapper + rapidjson::OStreamWrapper m_WriteStream; - using TGenericLineWriter = core::CRapidJsonLineWriter; + using TGenericLineWriter = core::CRapidJsonLineWriter; - //! JSON writer - TGenericLineWriter m_Writer; + //! JSON writer + TGenericLineWriter m_Writer; }; - - } } #endif // INCLUDED_ml_api_CLineifiedJsonOutputWriter_h - diff --git a/include/api/CLineifiedXmlInputParser.h b/include/api/CLineifiedXmlInputParser.h index 452d72abcc..e4a3e753b1 100644 --- a/include/api/CLineifiedXmlInputParser.h +++ b/include/api/CLineifiedXmlInputParser.h @@ -11,15 +11,11 @@ #include - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CXmlParserIntf; } -namespace api -{ +namespace api { //! \brief //! Parse XML input where each line is a separate XML document. @@ -38,46 +34,37 @@ namespace api //! Takes an interface to the XML parser as a constructor argument to allow //! performance comparisons between different XML parsers. //! -class API_EXPORT CLineifiedXmlInputParser : public CLineifiedInputParser -{ - public: - //! Construct with an XML parser interface and an input stream to be - //! parsed. Once a stream is passed to this constructor, no other - //! object should read from it. For example, if std::cin is passed, no - //! other object should read from std::cin, otherwise unpredictable and - //! incorrect results will be generated. - CLineifiedXmlInputParser(core::CXmlParserIntf &parser, - std::istream &strmIn, - bool allDocsSameStructure = false); +class API_EXPORT CLineifiedXmlInputParser : public CLineifiedInputParser { +public: + //! Construct with an XML parser interface and an input stream to be + //! parsed. Once a stream is passed to this constructor, no other + //! object should read from it. For example, if std::cin is passed, no + //! other object should read from std::cin, otherwise unpredictable and + //! incorrect results will be generated. + CLineifiedXmlInputParser(core::CXmlParserIntf& parser, std::istream& strmIn, bool allDocsSameStructure = false); - //! Read records from the stream. The supplied reader function is called - //! once per record. If the supplied reader function returns false, - //! reading will stop. This method keeps reading until it reaches the - //! end of the stream or an error occurs. If it successfully reaches - //! the end of the stream it returns true, otherwise it returns false. - virtual bool readStream(const TReaderFunc &readerFunc); + //! Read records from the stream. The supplied reader function is called + //! once per record. If the supplied reader function returns false, + //! reading will stop. This method keeps reading until it reaches the + //! end of the stream or an error occurs. If it successfully reaches + //! the end of the stream it returns true, otherwise it returns false. + virtual bool readStream(const TReaderFunc& readerFunc); - private: - //! Attempt to parse the current working record into data fields. - bool decodeDocumentWithCommonFields(TStrVec &fieldNames, - TStrRefVec &fieldValRefs, - TStrStrUMap &recordFields); +private: + //! Attempt to parse the current working record into data fields. + bool decodeDocumentWithCommonFields(TStrVec& fieldNames, TStrRefVec& fieldValRefs, TStrStrUMap& recordFields); - void decodeDocumentWithArbitraryFields(TStrVec &fieldNames, - TStrStrUMap &recordFields); + void decodeDocumentWithArbitraryFields(TStrVec& fieldNames, TStrStrUMap& recordFields); - private: - //! Reference to the parser we're going to use - core::CXmlParserIntf &m_Parser; +private: + //! Reference to the parser we're going to use + core::CXmlParserIntf& m_Parser; - //! Are all XML documents expected to contain the same fields in the - //! same order? - bool m_AllDocsSameStructure; + //! Are all XML documents expected to contain the same fields in the + //! same order? + bool m_AllDocsSameStructure; }; - - } } #endif // INCLUDED_ml_api_CLineifiedXmlInputParser_h - diff --git a/include/api/CLineifiedXmlOutputWriter.h b/include/api/CLineifiedXmlOutputWriter.h index ac5f89695e..5c4204c479 100644 --- a/include/api/CLineifiedXmlOutputWriter.h +++ b/include/api/CLineifiedXmlOutputWriter.h @@ -15,11 +15,8 @@ #include #include - -namespace ml -{ -namespace api -{ +namespace ml { +namespace api { //! \brief //! Write output data in XML format, one document per line @@ -33,61 +30,54 @@ namespace api //! IMPLEMENTATION:\n //! Using RapidXml to do the heavy lifting. //! -class API_EXPORT CLineifiedXmlOutputWriter : public COutputHandler -{ - public: - //! Constructor that causes output to be written to the internal string - //! stream - CLineifiedXmlOutputWriter(const std::string &rootName); +class API_EXPORT CLineifiedXmlOutputWriter : public COutputHandler { +public: + //! Constructor that causes output to be written to the internal string + //! stream + CLineifiedXmlOutputWriter(const std::string& rootName); - //! Constructor that causes output to be written to the specified stream - CLineifiedXmlOutputWriter(const std::string &rootName, - std::ostream &strmOut); + //! Constructor that causes output to be written to the specified stream + CLineifiedXmlOutputWriter(const std::string& rootName, std::ostream& strmOut); - //! Destructor flushes the stream - virtual ~CLineifiedXmlOutputWriter(); + //! Destructor flushes the stream + virtual ~CLineifiedXmlOutputWriter(); - //! Set field names - this function has no affect it always - //! returns true - virtual bool fieldNames(const TStrVec &fieldNames, - const TStrVec &extraFieldNames); + //! Set field names - this function has no affect it always + //! returns true + virtual bool fieldNames(const TStrVec& fieldNames, const TStrVec& extraFieldNames); - //! Returns an empty vector - virtual const TStrVec &fieldNames() const; + //! Returns an empty vector + virtual const TStrVec& fieldNames() const; - // Bring the other overload of fieldNames() into scope - using COutputHandler::fieldNames; + // Bring the other overload of fieldNames() into scope + using COutputHandler::fieldNames; - //! Write the data row fields as an XML document - virtual bool writeRow(const TStrStrUMap &dataRowFields, - const TStrStrUMap &overrideDataRowFields); + //! Write the data row fields as an XML document + virtual bool writeRow(const TStrStrUMap& dataRowFields, const TStrStrUMap& overrideDataRowFields); - // Bring the other overload of writeRow() into scope - using COutputHandler::writeRow; + // Bring the other overload of writeRow() into scope + using COutputHandler::writeRow; - //! Get the contents of the internal string stream - for use with the - //! zero argument constructor - std::string internalString() const; + //! Get the contents of the internal string stream - for use with the + //! zero argument constructor + std::string internalString() const; - private: - //! Name of the root element in which the fields to be output will be - //! nested - std::string m_RootName; +private: + //! Name of the root element in which the fields to be output will be + //! nested + std::string m_RootName; - //! If we've been initialised without a specific stream, output is - //! written to this string stream - std::ostringstream m_StringOutputBuf; + //! If we've been initialised without a specific stream, output is + //! written to this string stream + std::ostringstream m_StringOutputBuf; - //! Reference to the stream we're going to write to - std::ostream &m_OutStream; + //! Reference to the stream we're going to write to + std::ostream& m_OutStream; - //! XML node pool for efficiency - core::CXmlNodeWithChildrenPool m_Pool; + //! XML node pool for efficiency + core::CXmlNodeWithChildrenPool m_Pool; }; - - } } #endif // INCLUDED_ml_api_CLineifiedXmlOutputWriter_h - diff --git a/include/api/CModelPlotDataJsonWriter.h b/include/api/CModelPlotDataJsonWriter.h index ab2b68f017..a95fb43125 100644 --- a/include/api/CModelPlotDataJsonWriter.h +++ b/include/api/CModelPlotDataJsonWriter.h @@ -7,9 +7,9 @@ #define INCLUDED_ml_api_CModelPlotDataJsonWriter_h #include -#include #include #include +#include #include @@ -25,11 +25,8 @@ #include - -namespace ml -{ -namespace api -{ +namespace ml { +namespace api { //! \brief //! Write visualisation data as a JSON document @@ -46,63 +43,58 @@ namespace api //! The stream is flushed after at the end of each of the public //! write.... functions. //! -class API_EXPORT CModelPlotDataJsonWriter final: private core::CNonCopyable -{ - private: - - static const std::string JOB_ID; - static const std::string MODEL_PLOT; - static const std::string DETECTOR_INDEX; - static const std::string PARTITION_FIELD_NAME; - static const std::string PARTITION_FIELD_VALUE; - static const std::string TIME; - static const std::string FEATURE; - static const std::string BY; - static const std::string BY_FIELD_NAME; - static const std::string BY_FIELD_VALUE; - static const std::string OVER_FIELD_NAME; - static const std::string OVER_FIELD_VALUE; - static const std::string LOWER; - static const std::string UPPER; - static const std::string MEDIAN; - static const std::string ACTUAL; - static const std::string BUCKET_SPAN; - - public: - using TStrDoublePrVec = model::CModelPlotData::TStrDoublePrVec; - using TByFieldData = model::CModelPlotData::SByFieldData; - using TStrByFieldDataUMap = model::CModelPlotData::TStrByFieldDataUMap; - using TStrByFieldDataUMapCItr = TStrByFieldDataUMap::const_iterator; - using TFeatureStrByFieldDataUMapUMapCItr = model::CModelPlotData::TFeatureStrByFieldDataUMapUMapCItr; - using TStrDoublePr = model::CModelPlotData::TStrDoublePr; - - public: - //! Constructor that causes to be written to the specified stream - explicit CModelPlotDataJsonWriter(core::CJsonOutputStreamWrapper &outStream); - - void writeFlat(const std::string &jobId, const model::CModelPlotData &data); - - private: - void writeFlatRow(core_t::TTime time, - const std::string &jobId, - int detectorIndex, - const std::string &partitionFieldName, - const std::string &partitionFieldValue, - const std::string &feature, - const std::string &byFieldName, - const std::string &byFieldValue, - const TByFieldData &byData, - core_t::TTime bucketSpan, - rapidjson::Value &doc); - - private: - //! JSON line writer - core::CRapidJsonConcurrentLineWriter m_Writer; +class API_EXPORT CModelPlotDataJsonWriter final : private core::CNonCopyable { +private: + static const std::string JOB_ID; + static const std::string MODEL_PLOT; + static const std::string DETECTOR_INDEX; + static const std::string PARTITION_FIELD_NAME; + static const std::string PARTITION_FIELD_VALUE; + static const std::string TIME; + static const std::string FEATURE; + static const std::string BY; + static const std::string BY_FIELD_NAME; + static const std::string BY_FIELD_VALUE; + static const std::string OVER_FIELD_NAME; + static const std::string OVER_FIELD_VALUE; + static const std::string LOWER; + static const std::string UPPER; + static const std::string MEDIAN; + static const std::string ACTUAL; + static const std::string BUCKET_SPAN; + +public: + using TStrDoublePrVec = model::CModelPlotData::TStrDoublePrVec; + using TByFieldData = model::CModelPlotData::SByFieldData; + using TStrByFieldDataUMap = model::CModelPlotData::TStrByFieldDataUMap; + using TStrByFieldDataUMapCItr = TStrByFieldDataUMap::const_iterator; + using TFeatureStrByFieldDataUMapUMapCItr = model::CModelPlotData::TFeatureStrByFieldDataUMapUMapCItr; + using TStrDoublePr = model::CModelPlotData::TStrDoublePr; + +public: + //! Constructor that causes to be written to the specified stream + explicit CModelPlotDataJsonWriter(core::CJsonOutputStreamWrapper& outStream); + + void writeFlat(const std::string& jobId, const model::CModelPlotData& data); + +private: + void writeFlatRow(core_t::TTime time, + const std::string& jobId, + int detectorIndex, + const std::string& partitionFieldName, + const std::string& partitionFieldValue, + const std::string& feature, + const std::string& byFieldName, + const std::string& byFieldValue, + const TByFieldData& byData, + core_t::TTime bucketSpan, + rapidjson::Value& doc); + +private: + //! JSON line writer + core::CRapidJsonConcurrentLineWriter m_Writer; }; - - } } #endif // INCLUDED_ml_api_CModelPlotDataJsonWriter_h - diff --git a/include/api/CModelSizeStatsJsonWriter.h b/include/api/CModelSizeStatsJsonWriter.h index e4d17f8eef..4967baf614 100644 --- a/include/api/CModelSizeStatsJsonWriter.h +++ b/include/api/CModelSizeStatsJsonWriter.h @@ -15,22 +15,17 @@ #include -namespace ml -{ -namespace api -{ +namespace ml { +namespace api { //! \brief //! A static utility for writing the model_size_stats document in JSON. -class API_EXPORT CModelSizeStatsJsonWriter : private core::CNonInstantiatable -{ - public: - //! Writes the model size stats in the \p results in JSON format. - static void write(const std::string &jobId, - const model::CResourceMonitor::SResults &results, - core::CRapidJsonConcurrentLineWriter &writer); +class API_EXPORT CModelSizeStatsJsonWriter : private core::CNonInstantiatable { +public: + //! Writes the model size stats in the \p results in JSON format. + static void + write(const std::string& jobId, const model::CResourceMonitor::SResults& results, core::CRapidJsonConcurrentLineWriter& writer); }; - } } diff --git a/include/api/CModelSnapshotJsonWriter.h b/include/api/CModelSnapshotJsonWriter.h index a07e4377ee..bf348a1a2c 100644 --- a/include/api/CModelSnapshotJsonWriter.h +++ b/include/api/CModelSnapshotJsonWriter.h @@ -15,10 +15,8 @@ #include -namespace ml -{ -namespace api -{ +namespace ml { +namespace api { //! \brief //! Write model snapshots in JSON format @@ -26,46 +24,41 @@ namespace api //! DESCRIPTION:\n //! Outputs the model snapshot documents that accompany each state persist. //! -class API_EXPORT CModelSnapshotJsonWriter -{ - public: - //! Structure to store the model snapshot metadata - struct SModelSnapshotReport - { - std::string s_MinVersion; - core_t::TTime s_SnapshotTimestamp; - std::string s_Description; - std::string s_SnapshotId; - size_t s_NumDocs; - model::CResourceMonitor::SResults s_ModelSizeStats; - std::string s_NormalizerState; - core_t::TTime s_LatestRecordTime; - core_t::TTime s_LatestFinalResultTime; - }; +class API_EXPORT CModelSnapshotJsonWriter { +public: + //! Structure to store the model snapshot metadata + struct SModelSnapshotReport { + std::string s_MinVersion; + core_t::TTime s_SnapshotTimestamp; + std::string s_Description; + std::string s_SnapshotId; + size_t s_NumDocs; + model::CResourceMonitor::SResults s_ModelSizeStats; + std::string s_NormalizerState; + core_t::TTime s_LatestRecordTime; + core_t::TTime s_LatestFinalResultTime; + }; - public: - //! Constructor that causes output to be written to the specified wrapped stream - CModelSnapshotJsonWriter(const std::string &jobId, - core::CJsonOutputStreamWrapper &strmOut); +public: + //! Constructor that causes output to be written to the specified wrapped stream + CModelSnapshotJsonWriter(const std::string& jobId, core::CJsonOutputStreamWrapper& strmOut); - //! Writes the given model snapshot in JSON format. - void write(const SModelSnapshotReport &report); + //! Writes the given model snapshot in JSON format. + void write(const SModelSnapshotReport& report); - //! Write the quantile's state - static void writeQuantileState(const std::string &jobId, - const std::string &state, - core_t::TTime timestamp, - core::CRapidJsonConcurrentLineWriter &writer); + //! Write the quantile's state + static void writeQuantileState(const std::string& jobId, + const std::string& state, + core_t::TTime timestamp, + core::CRapidJsonConcurrentLineWriter& writer); - private: - //! The job ID - std::string m_JobId; +private: + //! The job ID + std::string m_JobId; - //! JSON line writer - core::CRapidJsonConcurrentLineWriter m_Writer; + //! JSON line writer + core::CRapidJsonConcurrentLineWriter m_Writer; }; - - } } diff --git a/include/api/CNullOutput.h b/include/api/CNullOutput.h index 297a1152dc..81be9be7ac 100644 --- a/include/api/CNullOutput.h +++ b/include/api/CNullOutput.h @@ -9,11 +9,8 @@ #include #include - -namespace ml -{ -namespace api -{ +namespace ml { +namespace api { //! \brief //! Output handler that ignores all output. @@ -30,30 +27,24 @@ namespace api //! that is separate to the output handler that all data //! processors have. //! -class API_EXPORT CNullOutput : public COutputHandler -{ - public: - //! Does nothing with the field names provided. - virtual bool fieldNames(const TStrVec &fieldNames, - const TStrVec &extraFieldNames); +class API_EXPORT CNullOutput : public COutputHandler { +public: + //! Does nothing with the field names provided. + virtual bool fieldNames(const TStrVec& fieldNames, const TStrVec& extraFieldNames); - //! Get field names - always empty. - virtual const TStrVec &fieldNames() const; + //! Get field names - always empty. + virtual const TStrVec& fieldNames() const; - // Bring the other overload of fieldNames() into scope - using COutputHandler::fieldNames; + // Bring the other overload of fieldNames() into scope + using COutputHandler::fieldNames; - //! Does nothing with the row provided. - virtual bool writeRow(const TStrStrUMap &dataRowFields, - const TStrStrUMap &overrideDataRowFields); + //! Does nothing with the row provided. + virtual bool writeRow(const TStrStrUMap& dataRowFields, const TStrStrUMap& overrideDataRowFields); - // Bring the other overload of writeRow() into scope - using COutputHandler::writeRow; + // Bring the other overload of writeRow() into scope + using COutputHandler::writeRow; }; - - } } #endif // INCLUDED_ml_api_CNullOutput_h - diff --git a/include/api/COutputChainer.h b/include/api/COutputChainer.h index dde31725fe..5a12245bd3 100644 --- a/include/api/COutputChainer.h +++ b/include/api/COutputChainer.h @@ -13,16 +13,12 @@ #include - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CDataAdder; class CDataSearcher; } -namespace api -{ +namespace api { class CBackgroundPersister; class CDataProcessor; @@ -40,87 +36,80 @@ class CDataProcessor; //! The function to be called for each output record is encapsulated //! in a std::function to reduce coupling. //! -class API_EXPORT COutputChainer : public COutputHandler -{ - public: - //! Construct with a reference to the next data processor in the chain - COutputChainer(CDataProcessor &dataProcessor); - - //! We're going to be writing to a new output stream - virtual void newOutputStream(); - - //! Set field names, adding extra field names if they're not already - //! present - this is only allowed once - virtual bool fieldNames(const TStrVec &fieldNames, - const TStrVec &extraFieldNames); - - //! Get field names - virtual const TStrVec &fieldNames() const; - - // Bring the other overload of fieldNames() into scope - using COutputHandler::fieldNames; - - //! Call the next data processor's input function with some output - //! values, optionally overriding some of the original field values. - //! Where the same field is present in both overrideDataRowFields and - //! dataRowFields, the value in overrideDataRowFields will be written. - virtual bool writeRow(const TStrStrUMap &dataRowFields, - const TStrStrUMap &overrideDataRowFields); - - // Bring the other overload of writeRow() into scope - using COutputHandler::writeRow; - - //! Perform any final processing once all data for the current search - //! has been seen. Chained classes should NOT rely on this method being - //! called - they should do the best they can on the assumption that - //! this method will not be called, but may be able to improve their - //! output if this method is called. - virtual void finalise(); - - //! Restore previously saved state - virtual bool restoreState(core::CDataSearcher &restoreSearcher, - core_t::TTime &completeToTime); - - //! Persist current state - virtual bool persistState(core::CDataAdder &persister); - - //! Persist current state due to the periodic persistence being triggered. - virtual bool periodicPersistState(CBackgroundPersister &persister); - - //! The chainer does consume control messages, because it passes them on - //! to whatever processor it's chained to. - virtual bool consumesControlMessages(); - - private: - //! The function that will be called for every record output via this - //! object - CDataProcessor &m_DataProcessor; - - //! Field names in the order they are to be written to the output - TStrVec m_FieldNames; - - //! Pre-computed hashes for each field name. The pre-computed hashes - //! are at the same index in this vector as the corresponding field name - //! in the m_FieldNames vector. - TPreComputedHashVec m_Hashes; - - //! Used to build up the full set of fields to pass on to the next data - //! processor - TStrStrUMap m_WorkRecordFields; - - using TStrRef = boost::reference_wrapper; - using TStrRefVec = std::vector; - using TStrRefVecCItr = TStrRefVec::const_iterator; - - //! References to the strings within m_WorkRecordFields in the same - //! order as the field names in m_FieldNames. This avoids the need to - //! do hash lookups when populating m_WorkRecordFields. - TStrRefVec m_WorkRecordFieldRefs; +class API_EXPORT COutputChainer : public COutputHandler { +public: + //! Construct with a reference to the next data processor in the chain + COutputChainer(CDataProcessor& dataProcessor); + + //! We're going to be writing to a new output stream + virtual void newOutputStream(); + + //! Set field names, adding extra field names if they're not already + //! present - this is only allowed once + virtual bool fieldNames(const TStrVec& fieldNames, const TStrVec& extraFieldNames); + + //! Get field names + virtual const TStrVec& fieldNames() const; + + // Bring the other overload of fieldNames() into scope + using COutputHandler::fieldNames; + + //! Call the next data processor's input function with some output + //! values, optionally overriding some of the original field values. + //! Where the same field is present in both overrideDataRowFields and + //! dataRowFields, the value in overrideDataRowFields will be written. + virtual bool writeRow(const TStrStrUMap& dataRowFields, const TStrStrUMap& overrideDataRowFields); + + // Bring the other overload of writeRow() into scope + using COutputHandler::writeRow; + + //! Perform any final processing once all data for the current search + //! has been seen. Chained classes should NOT rely on this method being + //! called - they should do the best they can on the assumption that + //! this method will not be called, but may be able to improve their + //! output if this method is called. + virtual void finalise(); + + //! Restore previously saved state + virtual bool restoreState(core::CDataSearcher& restoreSearcher, core_t::TTime& completeToTime); + + //! Persist current state + virtual bool persistState(core::CDataAdder& persister); + + //! Persist current state due to the periodic persistence being triggered. + virtual bool periodicPersistState(CBackgroundPersister& persister); + + //! The chainer does consume control messages, because it passes them on + //! to whatever processor it's chained to. + virtual bool consumesControlMessages(); + +private: + //! The function that will be called for every record output via this + //! object + CDataProcessor& m_DataProcessor; + + //! Field names in the order they are to be written to the output + TStrVec m_FieldNames; + + //! Pre-computed hashes for each field name. The pre-computed hashes + //! are at the same index in this vector as the corresponding field name + //! in the m_FieldNames vector. + TPreComputedHashVec m_Hashes; + + //! Used to build up the full set of fields to pass on to the next data + //! processor + TStrStrUMap m_WorkRecordFields; + + using TStrRef = boost::reference_wrapper; + using TStrRefVec = std::vector; + using TStrRefVecCItr = TStrRefVec::const_iterator; + + //! References to the strings within m_WorkRecordFields in the same + //! order as the field names in m_FieldNames. This avoids the need to + //! do hash lookups when populating m_WorkRecordFields. + TStrRefVec m_WorkRecordFieldRefs; }; - - } } #endif // INCLUDED_ml_api_COutputChainer_h - diff --git a/include/api/COutputHandler.h b/include/api/COutputHandler.h index d1a05aab92..f704a6ec17 100644 --- a/include/api/COutputHandler.h +++ b/include/api/COutputHandler.h @@ -6,10 +6,10 @@ #ifndef INCLUDED_ml_api_COutputHandler_h #define INCLUDED_ml_api_COutputHandler_h -#include -#include #include #include +#include +#include #include @@ -19,16 +19,12 @@ #include #include - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CDataAdder; class CDataSearcher; } -namespace api -{ +namespace api { class CBackgroundPersister; //! \brief @@ -48,96 +44,88 @@ class CBackgroundPersister; //! for the strings that hold the field names. The nested CPreComputedHash //! class and TPreComputedHashVec typedef can be used to implement this. //! -class API_EXPORT COutputHandler : private core::CNonCopyable -{ - public: - using TStrVec = std::vector; - using TStrVecItr = TStrVec::iterator; - using TStrVecCItr = TStrVec::const_iterator; +class API_EXPORT COutputHandler : private core::CNonCopyable { +public: + using TStrVec = std::vector; + using TStrVecItr = TStrVec::iterator; + using TStrVecCItr = TStrVec::const_iterator; - using TStrStrUMap = boost::unordered_map; - using TStrStrUMapItr = TStrStrUMap::iterator; - using TStrStrUMapCItr = TStrStrUMap::const_iterator; + using TStrStrUMap = boost::unordered_map; + using TStrStrUMapItr = TStrStrUMap::iterator; + using TStrStrUMapCItr = TStrStrUMap::const_iterator; - public: - COutputHandler(); +public: + COutputHandler(); - //! Virtual destructor for abstract base class - virtual ~COutputHandler(); + //! Virtual destructor for abstract base class + virtual ~COutputHandler(); - //! We're going to be writing to a new output stream - virtual void newOutputStream(); + //! We're going to be writing to a new output stream + virtual void newOutputStream(); - //! Set field names - this must only be called once per output file - bool fieldNames(const TStrVec &fieldNames); + //! Set field names - this must only be called once per output file + bool fieldNames(const TStrVec& fieldNames); - //! Set field names, adding extra field names if they're not already - //! present - this is only allowed once - virtual bool fieldNames(const TStrVec &fieldNames, - const TStrVec &extraFieldNames) = 0; + //! Set field names, adding extra field names if they're not already + //! present - this is only allowed once + virtual bool fieldNames(const TStrVec& fieldNames, const TStrVec& extraFieldNames) = 0; - //! Get field names - virtual const TStrVec &fieldNames() const = 0; + //! Get field names + virtual const TStrVec& fieldNames() const = 0; - //! Write a row to the stream. The supplied map must contain every - //! field value. - bool writeRow(const TStrStrUMap &dataRowFields); + //! Write a row to the stream. The supplied map must contain every + //! field value. + bool writeRow(const TStrStrUMap& dataRowFields); - //! Write a row to the stream, optionally overriding some of the - //! original field values. Where the same field is present in both - //! overrideDataRowFields and dataRowFields, the value in - //! overrideDataRowFields will be written. - virtual bool writeRow(const TStrStrUMap &dataRowFields, - const TStrStrUMap &overrideDataRowFields) = 0; + //! Write a row to the stream, optionally overriding some of the + //! original field values. Where the same field is present in both + //! overrideDataRowFields and dataRowFields, the value in + //! overrideDataRowFields will be written. + virtual bool writeRow(const TStrStrUMap& dataRowFields, const TStrStrUMap& overrideDataRowFields) = 0; - //! Perform any final processing once all input data has been seen. - virtual void finalise(); + //! Perform any final processing once all input data has been seen. + virtual void finalise(); - //! Restore previously saved state - virtual bool restoreState(core::CDataSearcher &restoreSearcher, - core_t::TTime &completeToTime); + //! Restore previously saved state + virtual bool restoreState(core::CDataSearcher& restoreSearcher, core_t::TTime& completeToTime); - //! Persist current state - virtual bool persistState(core::CDataAdder &persister); + //! Persist current state + virtual bool persistState(core::CDataAdder& persister); - //! Persist current state due to the periodic persistence being triggered. - virtual bool periodicPersistState(CBackgroundPersister &persister); + //! Persist current state due to the periodic persistence being triggered. + virtual bool periodicPersistState(CBackgroundPersister& persister); - //! Does this handler deal with control messages? - virtual bool consumesControlMessages(); + //! Does this handler deal with control messages? + virtual bool consumesControlMessages(); - protected: - //! Class to cache a hash value so that it doesn't have to be repeatedly - //! recomputed - class API_EXPORT CPreComputedHash : public std::unary_function - { - public: - //! Store the given hash - CPreComputedHash(size_t hash); +protected: + //! Class to cache a hash value so that it doesn't have to be repeatedly + //! recomputed + class API_EXPORT CPreComputedHash : public std::unary_function { + public: + //! Store the given hash + CPreComputedHash(size_t hash); - //! Return the hash regardless of what string is passed. Use - //! with care! - size_t operator()(const std::string &) const; + //! Return the hash regardless of what string is passed. Use + //! with care! + size_t operator()(const std::string&) const; - private: - size_t m_Hash; - }; + private: + size_t m_Hash; + }; - protected: - //! Used when there are no extra fields - static const TStrVec EMPTY_FIELD_NAMES; +protected: + //! Used when there are no extra fields + static const TStrVec EMPTY_FIELD_NAMES; - //! Used when there are no field overrides - static const TStrStrUMap EMPTY_FIELD_OVERRIDES; + //! Used when there are no field overrides + static const TStrStrUMap EMPTY_FIELD_OVERRIDES; - using TPreComputedHashVec = std::vector; - using TPreComputedHashVecItr = TPreComputedHashVec::iterator; - using TPreComputedHashVecCItr = TPreComputedHashVec::const_iterator; + using TPreComputedHashVec = std::vector; + using TPreComputedHashVecItr = TPreComputedHashVec::iterator; + using TPreComputedHashVecCItr = TPreComputedHashVec::const_iterator; }; - - } } #endif // INCLUDED_ml_api_COutputHandler_h - diff --git a/include/api/CResultNormalizer.h b/include/api/CResultNormalizer.h index 044ce7ecb2..79a07fb0ab 100644 --- a/include/api/CResultNormalizer.h +++ b/include/api/CResultNormalizer.h @@ -21,11 +21,8 @@ #include #include - -namespace ml -{ -namespace api -{ +namespace ml { +namespace api { //! \brief //! Create normalized anomaly scores @@ -47,99 +44,94 @@ namespace api //! Does not support processor chaining functionality as it is unlikely //! that this class would ever be chained to another data processor. //! -class API_EXPORT CResultNormalizer -{ - public: - //! Field names used in records to be normalised - static const std::string LEVEL; - static const std::string PARTITION_FIELD_NAME; - static const std::string PARTITION_FIELD_VALUE; - static const std::string PERSON_FIELD_NAME; - static const std::string FUNCTION_NAME; - static const std::string VALUE_FIELD_NAME; - static const std::string PROBABILITY_NAME; - static const std::string NORMALIZED_SCORE_NAME; - - //! Normalisation level values - static const std::string ROOT_LEVEL; - static const std::string PARTITION_LEVEL; - static const std::string LEAF_LEVEL; - static const std::string BUCKET_INFLUENCER_LEVEL; - static const std::string INFLUENCER_LEVEL; - - static const std::string ZERO; - - public: - using TStrVec = std::vector; - using TStrVecItr = TStrVec::iterator; - using TStrVecCItr = TStrVec::const_iterator; - - using TStrStrUMap = boost::unordered_map; - using TStrStrUMapItr = TStrStrUMap::iterator; - using TStrStrUMapCItr = TStrStrUMap::const_iterator; - - public: - CResultNormalizer(const model::CAnomalyDetectorModelConfig &modelConfig, - COutputHandler &outputHandler); - - //! Initialise the system change normalizer - bool initNormalizer(const std::string &stateFileName); - - //! Handle a record to be normalized - bool handleRecord(const TStrStrUMap &dataRowFields); - - private: - bool parseDataFields(const TStrStrUMap &dataRowFields, - std::string &level, std::string &partition, - std::string &person, std::string &function, - std::string &valueFieldName, double &probability); - - bool parseDataFields(const TStrStrUMap &dataRowFields, - std::string &level, std::string &partition, - std::string &partitionValue, std::string &person, - std::string &function, std::string &valueFieldName, - double &probability); - - template - bool parseDataField(const TStrStrUMap &dataRowFields, - const std::string &fieldName, - T &result) const - { - TStrStrUMapCItr iter = dataRowFields.find(fieldName); - if (iter == dataRowFields.end() || - core::CStringUtils::stringToType(iter->second, result) == false) - { - LOG_ERROR("Cannot interpret " << fieldName << " field in record:\n" << - CDataProcessor::debugPrintRecord(dataRowFields)); - return false; - } - return true; +class API_EXPORT CResultNormalizer { +public: + //! Field names used in records to be normalised + static const std::string LEVEL; + static const std::string PARTITION_FIELD_NAME; + static const std::string PARTITION_FIELD_VALUE; + static const std::string PERSON_FIELD_NAME; + static const std::string FUNCTION_NAME; + static const std::string VALUE_FIELD_NAME; + static const std::string PROBABILITY_NAME; + static const std::string NORMALIZED_SCORE_NAME; + + //! Normalisation level values + static const std::string ROOT_LEVEL; + static const std::string PARTITION_LEVEL; + static const std::string LEAF_LEVEL; + static const std::string BUCKET_INFLUENCER_LEVEL; + static const std::string INFLUENCER_LEVEL; + + static const std::string ZERO; + +public: + using TStrVec = std::vector; + using TStrVecItr = TStrVec::iterator; + using TStrVecCItr = TStrVec::const_iterator; + + using TStrStrUMap = boost::unordered_map; + using TStrStrUMapItr = TStrStrUMap::iterator; + using TStrStrUMapCItr = TStrStrUMap::const_iterator; + +public: + CResultNormalizer(const model::CAnomalyDetectorModelConfig& modelConfig, COutputHandler& outputHandler); + + //! Initialise the system change normalizer + bool initNormalizer(const std::string& stateFileName); + + //! Handle a record to be normalized + bool handleRecord(const TStrStrUMap& dataRowFields); + +private: + bool parseDataFields(const TStrStrUMap& dataRowFields, + std::string& level, + std::string& partition, + std::string& person, + std::string& function, + std::string& valueFieldName, + double& probability); + + bool parseDataFields(const TStrStrUMap& dataRowFields, + std::string& level, + std::string& partition, + std::string& partitionValue, + std::string& person, + std::string& function, + std::string& valueFieldName, + double& probability); + + template + bool parseDataField(const TStrStrUMap& dataRowFields, const std::string& fieldName, T& result) const { + TStrStrUMapCItr iter = dataRowFields.find(fieldName); + if (iter == dataRowFields.end() || core::CStringUtils::stringToType(iter->second, result) == false) { + LOG_ERROR("Cannot interpret " << fieldName << " field in record:\n" << CDataProcessor::debugPrintRecord(dataRowFields)); + return false; } + return true; + } - private: - //! Reference to model config - const model::CAnomalyDetectorModelConfig &m_ModelConfig; +private: + //! Reference to model config + const model::CAnomalyDetectorModelConfig& m_ModelConfig; - //! Object to which the output is passed - COutputHandler &m_OutputHandler; + //! Object to which the output is passed + COutputHandler& m_OutputHandler; - //! Do we need to tell the output handler what our fieldnames are? - bool m_WriteFieldNames; + //! Do we need to tell the output handler what our fieldnames are? + bool m_WriteFieldNames; - //! Map holding fields to write to the output - TStrStrUMap m_OutputFields; + //! Map holding fields to write to the output + TStrStrUMap m_OutputFields; - //! References to specific entries in the map to save repeatedly - //! searching for them - std::string &m_OutputFieldNormalizedScore; + //! References to specific entries in the map to save repeatedly + //! searching for them + std::string& m_OutputFieldNormalizedScore; - //! The hierarchical results normalizer - model::CHierarchicalResultsNormalizer m_Normalizer; + //! The hierarchical results normalizer + model::CHierarchicalResultsNormalizer m_Normalizer; }; - - } } #endif // INCLUDED_ml_api_CResultNormalizer_h - diff --git a/include/api/CSingleStreamDataAdder.h b/include/api/CSingleStreamDataAdder.h index 63e9200d76..872a373de5 100644 --- a/include/api/CSingleStreamDataAdder.h +++ b/include/api/CSingleStreamDataAdder.h @@ -12,11 +12,8 @@ #include - -namespace ml -{ -namespace api -{ +namespace ml { +namespace api { //! \brief //! Persists data to a single C++ stream. @@ -37,44 +34,38 @@ namespace api //! //! The single stream must be already open when passed to the constructor. //! -class API_EXPORT CSingleStreamDataAdder : public core::CDataAdder -{ - public: - //! The \p stream must already be open when the constructor is - //! called. - CSingleStreamDataAdder(const TOStreamP &stream); +class API_EXPORT CSingleStreamDataAdder : public core::CDataAdder { +public: + //! The \p stream must already be open when the constructor is + //! called. + CSingleStreamDataAdder(const TOStreamP& stream); - //! Returns a stream that can be used to persist data to a C++ - //! stream, or NULL if this is not possible. Many errors cannot - //! be detected by this method, so the stream will go into the - //! "bad" state if an error occurs during upload. The caller - //! must check for this. - //! \param index Index to add to metadata document - //! \param id ID to add to metadata document - virtual TOStreamP addStreamed(const std::string &index, - const std::string &id); + //! Returns a stream that can be used to persist data to a C++ + //! stream, or NULL if this is not possible. Many errors cannot + //! be detected by this method, so the stream will go into the + //! "bad" state if an error occurs during upload. The caller + //! must check for this. + //! \param index Index to add to metadata document + //! \param id ID to add to metadata document + virtual TOStreamP addStreamed(const std::string& index, const std::string& id); - //! Clients that get a stream using addStreamed() must call this - //! method one they've finished sending data to the stream. - //! \param stream The completed data stream - //! \param force If true the stream is flushed - virtual bool streamComplete(TOStreamP &stream, - bool force); + //! Clients that get a stream using addStreamed() must call this + //! method one they've finished sending data to the stream. + //! \param stream The completed data stream + //! \param force If true the stream is flushed + virtual bool streamComplete(TOStreamP& stream, bool force); - virtual std::size_t maxDocumentSize() const; + virtual std::size_t maxDocumentSize() const; - private: - //! Recommended maximum Elasticsearch document size - static const size_t MAX_DOCUMENT_SIZE; +private: + //! Recommended maximum Elasticsearch document size + static const size_t MAX_DOCUMENT_SIZE; - private: - //! The stream we're writing to. - TOStreamP m_Stream; +private: + //! The stream we're writing to. + TOStreamP m_Stream; }; - - } } #endif // INCLUDED_ml_api_CSingleStreamDataAdder_h - diff --git a/include/api/CSingleStreamSearcher.h b/include/api/CSingleStreamSearcher.h index 3598c7ebc9..4d3c470515 100644 --- a/include/api/CSingleStreamSearcher.h +++ b/include/api/CSingleStreamSearcher.h @@ -10,11 +10,8 @@ #include - -namespace ml -{ -namespace api -{ +namespace ml { +namespace api { //! \brief //! Retrieves data from a single C++ stream. @@ -35,28 +32,24 @@ namespace api //! again, but doing this enables the interface to be used in cases //! where different streams are returned for each request. //! -class API_EXPORT CSingleStreamSearcher : public core::CDataSearcher -{ - public: - //! The \p stream must already be open when the constructor is - //! called. - CSingleStreamSearcher(const TIStreamP &stream); - - //! Get the stream to retrieve data from. - //! \return Pointer to the input stream. - //! Some errors cannot be detected by this call itself, and are - //! indicated by the stream going into the "bad" state as it is - //! read from. - virtual TIStreamP search(size_t currentDocNum, size_t limit); - - private: - //! The stream we're reading from. - TIStreamP m_Stream; +class API_EXPORT CSingleStreamSearcher : public core::CDataSearcher { +public: + //! The \p stream must already be open when the constructor is + //! called. + CSingleStreamSearcher(const TIStreamP& stream); + + //! Get the stream to retrieve data from. + //! \return Pointer to the input stream. + //! Some errors cannot be detected by this call itself, and are + //! indicated by the stream going into the "bad" state as it is + //! read from. + virtual TIStreamP search(size_t currentDocNum, size_t limit); + +private: + //! The stream we're reading from. + TIStreamP m_Stream; }; - - } } #endif // INCLUDED_ml_api_CSingleStreamSearcher_h - diff --git a/include/api/CStateRestoreStreamFilter.h b/include/api/CStateRestoreStreamFilter.h index fe23cf0e3a..b30f78c367 100644 --- a/include/api/CStateRestoreStreamFilter.h +++ b/include/api/CStateRestoreStreamFilter.h @@ -15,10 +15,8 @@ #include -namespace ml -{ -namespace api -{ +namespace ml { +namespace api { //! \brief //! A streaming filter that maps persistence to restore format, specific to @@ -45,26 +43,24 @@ namespace api //! When using it with boost::iostreams::filtering_ostream not that the filters gets //! copied once pushed to the ostream instance. //! -class API_EXPORT CStateRestoreStreamFilter : public boost::iostreams::basic_line_filter -{ - public: - using boost::iostreams::basic_line_filter::string_type; +class API_EXPORT CStateRestoreStreamFilter : public boost::iostreams::basic_line_filter { +public: + using boost::iostreams::basic_line_filter::string_type; - CStateRestoreStreamFilter(); + CStateRestoreStreamFilter(); - size_t getDocCount() const; - private: - //! number of documents found in the stream - size_t m_DocCount; + size_t getDocCount() const; - //! whether the previous line has been rewritten - bool m_RewrotePreviousLine; +private: + //! number of documents found in the stream + size_t m_DocCount; - string_type do_filter(const string_type &line) override; -}; + //! whether the previous line has been rewritten + bool m_RewrotePreviousLine; + string_type do_filter(const string_type& line) override; +}; } - } #endif /* INCLUDED_ml_api_CRestoreStreamFilter_h */ diff --git a/include/api/CTokenListDataTyper.h b/include/api/CTokenListDataTyper.h index 1e594a3672..33600cd96f 100644 --- a/include/api/CTokenListDataTyper.h +++ b/include/api/CTokenListDataTyper.h @@ -18,11 +18,8 @@ #include - -namespace ml -{ -namespace api -{ +namespace ml { +namespace api { //! \brief //! Concrete implementation class to categorise strings. @@ -40,265 +37,194 @@ namespace api //! more than 10% compared to having lots of flags being constantly //! checked at runtime.) //! -template -class CTokenListDataTyper : public CBaseTokenListDataTyper -{ - public: - //! Create a data typer with threshold for how comparable types are - //! 0.0 means everything is the same type - //! 1.0 means things have to match exactly to be the same type - CTokenListDataTyper(const TTokenListReverseSearchCreatorIntfCPtr &reverseSearchCreator, - double threshold, - const std::string &fieldName) - : CBaseTokenListDataTyper(reverseSearchCreator, - threshold, - fieldName), - m_Dict(core::CWordDictionary::instance()) - { - } - - protected: - //! Split the string into a list of tokens. The result of the - //! tokenisation is returned in \p tokenIds, \p tokenUniqueIds and - //! \p totalWeight. Any previous content of these variables is wiped. - virtual void tokeniseString(const TStrStrUMap &fields, - const std::string &str, - TSizeSizePrVec &tokenIds, - TSizeSizeMap &tokenUniqueIds, - size_t &totalWeight) - { - tokenIds.clear(); - tokenUniqueIds.clear(); - totalWeight = 0; - - std::string temp; - - // TODO - make more efficient - std::string::size_type nonHexPos(std::string::npos); - for (std::string::size_type i = 0; i < str.size(); ++i) - { - const char curChar(str[i]); - - // Basically tokenise into [a-zA-Z0-9]+ strings, possibly - // allowing underscores, dots and dashes in the middle - if (::isalnum(static_cast(curChar)) || - (!temp.empty() && - ( - (ALLOW_UNDERSCORE && curChar == '_') || - (ALLOW_DOT && curChar == '.') || - (ALLOW_DASH && curChar == '-') - ) - ) - ) - { - temp += curChar; - if (IGNORE_HEX) - { - // Count dots and dashes as numeric - if (!::isxdigit(static_cast(curChar)) && curChar != '.' && curChar != '-') - { - nonHexPos = temp.length() - 1; - } +template +class CTokenListDataTyper : public CBaseTokenListDataTyper { +public: + //! Create a data typer with threshold for how comparable types are + //! 0.0 means everything is the same type + //! 1.0 means things have to match exactly to be the same type + CTokenListDataTyper(const TTokenListReverseSearchCreatorIntfCPtr& reverseSearchCreator, double threshold, const std::string& fieldName) + : CBaseTokenListDataTyper(reverseSearchCreator, threshold, fieldName), m_Dict(core::CWordDictionary::instance()) {} + +protected: + //! Split the string into a list of tokens. The result of the + //! tokenisation is returned in \p tokenIds, \p tokenUniqueIds and + //! \p totalWeight. Any previous content of these variables is wiped. + virtual void tokeniseString(const TStrStrUMap& fields, + const std::string& str, + TSizeSizePrVec& tokenIds, + TSizeSizeMap& tokenUniqueIds, + size_t& totalWeight) { + tokenIds.clear(); + tokenUniqueIds.clear(); + totalWeight = 0; + + std::string temp; + + // TODO - make more efficient + std::string::size_type nonHexPos(std::string::npos); + for (std::string::size_type i = 0; i < str.size(); ++i) { + const char curChar(str[i]); + + // Basically tokenise into [a-zA-Z0-9]+ strings, possibly + // allowing underscores, dots and dashes in the middle + if (::isalnum(static_cast(curChar)) || + (!temp.empty() && + ((ALLOW_UNDERSCORE && curChar == '_') || (ALLOW_DOT && curChar == '.') || (ALLOW_DASH && curChar == '-')))) { + temp += curChar; + if (IGNORE_HEX) { + // Count dots and dashes as numeric + if (!::isxdigit(static_cast(curChar)) && curChar != '.' && curChar != '-') { + nonHexPos = temp.length() - 1; } } - else - { - if (!temp.empty()) - { - this->considerToken(fields, - nonHexPos, - temp, - tokenIds, - tokenUniqueIds, - totalWeight); - temp.clear(); - } - - if (IGNORE_HEX) - { - nonHexPos = std::string::npos; - } + } else { + if (!temp.empty()) { + this->considerToken(fields, nonHexPos, temp, tokenIds, tokenUniqueIds, totalWeight); + temp.clear(); } - } - if (!temp.empty()) - { - this->considerToken(fields, - nonHexPos, - temp, - tokenIds, - tokenUniqueIds, - totalWeight); + if (IGNORE_HEX) { + nonHexPos = std::string::npos; + } } - - LOG_TRACE(str << " tokenised to " << tokenIds.size() << - " tokens with total weight " << totalWeight << ": " << - SIdTranslater(*this, tokenIds, ' ')); } - //! Take a string token, convert it to a numeric ID and a weighting and - //! add these to the provided data structures. - virtual void tokenToIdAndWeight(const std::string &token, - TSizeSizePrVec &tokenIds, - TSizeSizeMap &tokenUniqueIds, - size_t &totalWeight) - { - TSizeSizePr idWithWeight(this->idForToken(token), 1); - - if (token.length() >= MIN_DICTIONARY_LENGTH) - { - // Give more weighting to tokens that are dictionary words. - idWithWeight.second += m_DictionaryWeightFunc(m_Dict.partOfSpeech(token)); - } - tokenIds.push_back(idWithWeight); - tokenUniqueIds[idWithWeight.first] += idWithWeight.second; - totalWeight += idWithWeight.second; + if (!temp.empty()) { + this->considerToken(fields, nonHexPos, temp, tokenIds, tokenUniqueIds, totalWeight); } - //! Compute similarity between two vectors - virtual double similarity(const TSizeSizePrVec &left, - size_t leftWeight, - const TSizeSizePrVec &right, - size_t rightWeight) const - { - double similarity(1.0); - - size_t maxWeight(std::max(leftWeight, rightWeight)); - if (maxWeight > 0) - { - size_t diff(DO_WARPING ? - m_SimilarityTester.weightedEditDistance(left, right) : - this->compareNoWarp(left, right)); - - similarity = 1.0 - double(diff) / double(maxWeight); - } + LOG_TRACE(str << " tokenised to " << tokenIds.size() << " tokens with total weight " << totalWeight << ": " + << SIdTranslater(*this, tokenIds, ' ')); + } + + //! Take a string token, convert it to a numeric ID and a weighting and + //! add these to the provided data structures. + virtual void tokenToIdAndWeight(const std::string& token, TSizeSizePrVec& tokenIds, TSizeSizeMap& tokenUniqueIds, size_t& totalWeight) { + TSizeSizePr idWithWeight(this->idForToken(token), 1); - return similarity; + if (token.length() >= MIN_DICTIONARY_LENGTH) { + // Give more weighting to tokens that are dictionary words. + idWithWeight.second += m_DictionaryWeightFunc(m_Dict.partOfSpeech(token)); } + tokenIds.push_back(idWithWeight); + tokenUniqueIds[idWithWeight.first] += idWithWeight.second; + totalWeight += idWithWeight.second; + } - private: - //! Compare two vectors of tokens without doing any warping (this is an - //! alternative to using the Levenshtein distance, which is a form of - //! warping) - size_t compareNoWarp(const TSizeSizePrVec &left, - const TSizeSizePrVec &right) const - { - size_t minSize(std::min(left.size(), right.size())); - size_t maxSize(std::max(left.size(), right.size())); - - size_t diff(0); - - for (size_t index = 0; index < minSize; ++index) - { - if (left[index].first != right[index].first) - { - diff += std::max(left[index].second, - right[index].second); - } - } + //! Compute similarity between two vectors + virtual double similarity(const TSizeSizePrVec& left, size_t leftWeight, const TSizeSizePrVec& right, size_t rightWeight) const { + double similarity(1.0); - // Account for different length vector instances - if (left.size() < right.size()) - { - for (size_t index = minSize; index < maxSize; ++index) - { - diff += right[index].second; - } - } - else if (left.size() > right.size()) - { - for (size_t index = minSize; index < maxSize; ++index) - { - diff += left[index].second; - } - } + size_t maxWeight(std::max(leftWeight, rightWeight)); + if (maxWeight > 0) { + size_t diff(DO_WARPING ? m_SimilarityTester.weightedEditDistance(left, right) : this->compareNoWarp(left, right)); - return diff; + similarity = 1.0 - double(diff) / double(maxWeight); } - //! Consider adding a token to the data structures that will be used in - //! the comparison. The \p token argument must not be empty when this - //! method is called. This method may modify \p token. - void considerToken(const TStrStrUMap &fields, - std::string::size_type nonHexPos, - std::string &token, - TSizeSizePrVec &tokenIds, - TSizeSizeMap &tokenUniqueIds, - size_t &totalWeight) - { - if (IGNORE_LEADING_DIGIT && ::isdigit(static_cast(token[0]))) - { - return; - } + return similarity; + } - // If configured, ignore pure hex numbers, with or without a 0x - // prefix. - if (IGNORE_HEX) - { - if (nonHexPos == std::string::npos) - { - // Implies hex without 0x prefix. - return; - } +private: + //! Compare two vectors of tokens without doing any warping (this is an + //! alternative to using the Levenshtein distance, which is a form of + //! warping) + size_t compareNoWarp(const TSizeSizePrVec& left, const TSizeSizePrVec& right) const { + size_t minSize(std::min(left.size(), right.size())); + size_t maxSize(std::max(left.size(), right.size())); - // This second hex test is redundant if we're ignoring tokens - // with leading digits, and checking this first will cause the - // check to be completely compiled away as IGNORE_LEADING_DIGIT - // is a template argument - if (!IGNORE_LEADING_DIGIT && - nonHexPos == 1 && - token.compare(0, 2, "0x") == 0 && - token.length() != 2) - { - // Implies hex with 0x prefix. - return; - } + size_t diff(0); + + for (size_t index = 0; index < minSize; ++index) { + if (left[index].first != right[index].first) { + diff += std::max(left[index].second, right[index].second); } + } - // If the last character is not alphanumeric, strip it. - while (!::isalnum(static_cast(token[token.length() - 1]))) - { - token.erase(token.length() - 1); + // Account for different length vector instances + if (left.size() < right.size()) { + for (size_t index = minSize; index < maxSize; ++index) { + diff += right[index].second; + } + } else if (left.size() > right.size()) { + for (size_t index = minSize; index < maxSize; ++index) { + diff += left[index].second; } + } - if (IGNORE_DATE_WORDS && core::CTimeUtils::isDateWord(token)) - { + return diff; + } + + //! Consider adding a token to the data structures that will be used in + //! the comparison. The \p token argument must not be empty when this + //! method is called. This method may modify \p token. + void considerToken(const TStrStrUMap& fields, + std::string::size_type nonHexPos, + std::string& token, + TSizeSizePrVec& tokenIds, + TSizeSizeMap& tokenUniqueIds, + size_t& totalWeight) { + if (IGNORE_LEADING_DIGIT && ::isdigit(static_cast(token[0]))) { + return; + } + + // If configured, ignore pure hex numbers, with or without a 0x + // prefix. + if (IGNORE_HEX) { + if (nonHexPos == std::string::npos) { + // Implies hex without 0x prefix. return; } - if (IGNORE_FIELD_NAMES && fields.find(token) != fields.end()) - { + // This second hex test is redundant if we're ignoring tokens + // with leading digits, and checking this first will cause the + // check to be completely compiled away as IGNORE_LEADING_DIGIT + // is a template argument + if (!IGNORE_LEADING_DIGIT && nonHexPos == 1 && token.compare(0, 2, "0x") == 0 && token.length() != 2) { + // Implies hex with 0x prefix. return; } + } + + // If the last character is not alphanumeric, strip it. + while (!::isalnum(static_cast(token[token.length() - 1]))) { + token.erase(token.length() - 1); + } - this->tokenToIdAndWeight(token, tokenIds, tokenUniqueIds, totalWeight); + if (IGNORE_DATE_WORDS && core::CTimeUtils::isDateWord(token)) { + return; } - private: - //! Reference to a part-of-speech dictionary. - const core::CWordDictionary &m_Dict; + if (IGNORE_FIELD_NAMES && fields.find(token) != fields.end()) { + return; + } - //! Used for determining the edit distance between two vectors of - //! strings, i.e. how many insertions, deletions or changes would it - //! take to convert one to the other - core::CStringSimilarityTester m_SimilarityTester; + this->tokenToIdAndWeight(token, tokenIds, tokenUniqueIds, totalWeight); + } - //! Function used to increase weighting for dictionary words - DICTIONARY_WEIGHT_FUNC m_DictionaryWeightFunc; -}; +private: + //! Reference to a part-of-speech dictionary. + const core::CWordDictionary& m_Dict; + //! Used for determining the edit distance between two vectors of + //! strings, i.e. how many insertions, deletions or changes would it + //! take to convert one to the other + core::CStringSimilarityTester m_SimilarityTester; + //! Function used to increase weighting for dictionary words + DICTIONARY_WEIGHT_FUNC m_DictionaryWeightFunc; +}; } } #endif // INCLUDED_ml_api_CTokenListDataTyper_h - diff --git a/include/api/CTokenListReverseSearchCreator.h b/include/api/CTokenListReverseSearchCreator.h index d34d5b23d8..050dd3ebc2 100644 --- a/include/api/CTokenListReverseSearchCreator.h +++ b/include/api/CTokenListReverseSearchCreator.h @@ -8,10 +8,8 @@ #include -namespace ml -{ -namespace api -{ +namespace ml { +namespace api { //! \brief //! Create Engine API reverse searches for categories of events. @@ -27,66 +25,52 @@ namespace api //! The Engine API reverse search has the space separated list of the tokens //! and the regex because most modern index-based storages accept such searches. //! -class API_EXPORT CTokenListReverseSearchCreator : public CTokenListReverseSearchCreatorIntf -{ - public: - CTokenListReverseSearchCreator(const std::string &fieldName); +class API_EXPORT CTokenListReverseSearchCreator : public CTokenListReverseSearchCreatorIntf { +public: + CTokenListReverseSearchCreator(const std::string& fieldName); - //! What's the maximum cost of tokens we can include in the reverse - //! search? This cost is loosely based on the maximum length of an - //! Internet Explorer URL. - virtual size_t availableCost() const; + //! What's the maximum cost of tokens we can include in the reverse + //! search? This cost is loosely based on the maximum length of an + //! Internet Explorer URL. + virtual size_t availableCost() const; - //! What would be the cost of adding the specified token occurring the - //! specified number of times to the reverse search? - virtual size_t costOfToken(const std::string &token, - size_t numOccurrences) const; + //! What would be the cost of adding the specified token occurring the + //! specified number of times to the reverse search? + virtual size_t costOfToken(const std::string& token, size_t numOccurrences) const; - //! Create a reverse search for a NULL field value. - virtual bool createNullSearch(std::string &part1, - std::string &part2) const; + //! Create a reverse search for a NULL field value. + virtual bool createNullSearch(std::string& part1, std::string& part2) const; - //! If possible, create a reverse search for the case where there are no - //! unique tokens identifying the type. (If this is not possible return - //! false.) - virtual bool createNoUniqueTokenSearch(int type, - const std::string &example, - size_t maxMatchingStringLen, - std::string &part1, - std::string &part2) const; + //! If possible, create a reverse search for the case where there are no + //! unique tokens identifying the type. (If this is not possible return + //! false.) + virtual bool createNoUniqueTokenSearch(int type, + const std::string& example, + size_t maxMatchingStringLen, + std::string& part1, + std::string& part2) const; - //! Initialise the two strings that form a reverse search. For example, - //! this could be as simple as clearing the strings or setting them to - //! some sort of one-off preamble. - virtual void initStandardSearch(int type, - const std::string &example, - size_t maxMatchingStringLen, - std::string &part1, - std::string &part2) const; + //! Initialise the two strings that form a reverse search. For example, + //! this could be as simple as clearing the strings or setting them to + //! some sort of one-off preamble. + virtual void + initStandardSearch(int type, const std::string& example, size_t maxMatchingStringLen, std::string& part1, std::string& part2) const; - //! Modify the two strings that form a reverse search to account for the - //! specified token, which may occur anywhere within the original - //! message, but has been determined to be a good thing to distinguish - //! this type of messages from other types. - virtual void addCommonUniqueToken(const std::string &token, - std::string &part1, - std::string &part2) const; + //! Modify the two strings that form a reverse search to account for the + //! specified token, which may occur anywhere within the original + //! message, but has been determined to be a good thing to distinguish + //! this type of messages from other types. + virtual void addCommonUniqueToken(const std::string& token, std::string& part1, std::string& part2) const; - //! Modify the two strings that form a reverse search to account for the - //! specified token. - virtual void addInOrderCommonToken(const std::string &token, - bool first, - std::string &part1, - std::string &part2) const; + //! Modify the two strings that form a reverse search to account for the + //! specified token. + virtual void addInOrderCommonToken(const std::string& token, bool first, std::string& part1, std::string& part2) const; - //! Close off the two strings that form a reverse search. For example, - //! this may be when closing brackets need to be appended. - virtual void closeStandardSearch(std::string &part1, - std::string &part2) const; + //! Close off the two strings that form a reverse search. For example, + //! this may be when closing brackets need to be appended. + virtual void closeStandardSearch(std::string& part1, std::string& part2) const; }; - } } #endif // INCLUDED_ml_api_CTokenListReverseSearchCreator_h - diff --git a/include/api/CTokenListReverseSearchCreatorIntf.h b/include/api/CTokenListReverseSearchCreatorIntf.h index 679d7ccc2e..13dbe92009 100644 --- a/include/api/CTokenListReverseSearchCreatorIntf.h +++ b/include/api/CTokenListReverseSearchCreatorIntf.h @@ -10,11 +10,8 @@ #include - -namespace ml -{ -namespace api -{ +namespace ml { +namespace api { //! \brief //! Interface for classes that create reverse searches for the token @@ -32,80 +29,65 @@ namespace api //! typer, because at present duplicating a const pointer is deemed //! adequate.) //! -class API_EXPORT CTokenListReverseSearchCreatorIntf -{ - public: - CTokenListReverseSearchCreatorIntf(const std::string &fieldName); - - //! Virtual destructor for an abstract base class - virtual ~CTokenListReverseSearchCreatorIntf(); - - //! What's the maximum cost of tokens we can include in the reverse - //! search? Derived classes can decide what they mean by cost, as they - //! also decide the cost of each token. - virtual size_t availableCost() const = 0; - - //! What would be the cost of adding the specified token occurring the - //! specified number of times to the reverse search? Derived classes - //! can decide what they mean by cost, as they also decided what the - //! maximum permitted total cost is. - virtual size_t costOfToken(const std::string &token, - size_t numOccurrences) const = 0; - - //! If possible, create a reverse search for a NULL field value. (If - //! this is not possible return false.) - virtual bool createNullSearch(std::string &part1, - std::string &part2) const = 0; - - //! If possible, create a reverse search for the case where there are no - //! unique tokens identifying the type. (If this is not possible return - //! false.) - virtual bool createNoUniqueTokenSearch(int type, - const std::string &example, - size_t maxMatchingStringLen, - std::string &part1, - std::string &part2) const = 0; - - //! Initialise the two strings that form a reverse search. For example, - //! this could be as simple as clearing the strings or setting them to - //! some sort of one-off preamble. - virtual void initStandardSearch(int type, - const std::string &example, - size_t maxMatchingStringLen, - std::string &part1, - std::string &part2) const = 0; - - //! Modify the two strings that form a reverse search to account for the - //! specified token, which may occur anywhere within the original - //! message, but has been determined to be a good thing to distinguish - //! this type of messages from other types. - virtual void addCommonUniqueToken(const std::string &token, - std::string &part1, - std::string &part2) const = 0; - - //! Modify the two strings that form a reverse search to account for the - //! specified token. - virtual void addInOrderCommonToken(const std::string &token, - bool first, - std::string &part1, - std::string &part2) const = 0; - - //! Close off the two strings that form a reverse search. For example, - //! this may be when closing brackets need to be appended. - virtual void closeStandardSearch(std::string &part1, - std::string &part2) const; - - //! Access to the field name - const std::string &fieldName() const; - - private: - //! Which field name is being used for categorisation? - std::string m_FieldName; +class API_EXPORT CTokenListReverseSearchCreatorIntf { +public: + CTokenListReverseSearchCreatorIntf(const std::string& fieldName); + + //! Virtual destructor for an abstract base class + virtual ~CTokenListReverseSearchCreatorIntf(); + + //! What's the maximum cost of tokens we can include in the reverse + //! search? Derived classes can decide what they mean by cost, as they + //! also decide the cost of each token. + virtual size_t availableCost() const = 0; + + //! What would be the cost of adding the specified token occurring the + //! specified number of times to the reverse search? Derived classes + //! can decide what they mean by cost, as they also decided what the + //! maximum permitted total cost is. + virtual size_t costOfToken(const std::string& token, size_t numOccurrences) const = 0; + + //! If possible, create a reverse search for a NULL field value. (If + //! this is not possible return false.) + virtual bool createNullSearch(std::string& part1, std::string& part2) const = 0; + + //! If possible, create a reverse search for the case where there are no + //! unique tokens identifying the type. (If this is not possible return + //! false.) + virtual bool createNoUniqueTokenSearch(int type, + const std::string& example, + size_t maxMatchingStringLen, + std::string& part1, + std::string& part2) const = 0; + + //! Initialise the two strings that form a reverse search. For example, + //! this could be as simple as clearing the strings or setting them to + //! some sort of one-off preamble. + virtual void + initStandardSearch(int type, const std::string& example, size_t maxMatchingStringLen, std::string& part1, std::string& part2) const = 0; + + //! Modify the two strings that form a reverse search to account for the + //! specified token, which may occur anywhere within the original + //! message, but has been determined to be a good thing to distinguish + //! this type of messages from other types. + virtual void addCommonUniqueToken(const std::string& token, std::string& part1, std::string& part2) const = 0; + + //! Modify the two strings that form a reverse search to account for the + //! specified token. + virtual void addInOrderCommonToken(const std::string& token, bool first, std::string& part1, std::string& part2) const = 0; + + //! Close off the two strings that form a reverse search. For example, + //! this may be when closing brackets need to be appended. + virtual void closeStandardSearch(std::string& part1, std::string& part2) const; + + //! Access to the field name + const std::string& fieldName() const; + +private: + //! Which field name is being used for categorisation? + std::string m_FieldName; }; - - } } #endif // INCLUDED_ml_api_CTokenListReverseSearchCreatorIntf_h - diff --git a/include/api/CTokenListType.h b/include/api/CTokenListType.h index a16e62014a..e579d6acd4 100644 --- a/include/api/CTokenListType.h +++ b/include/api/CTokenListType.h @@ -13,16 +13,12 @@ #include #include - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace api -{ +namespace api { //! \brief //! The data associated with this Ml 'type' @@ -38,134 +34,128 @@ namespace api //! that created this object knows the mappings between the //! token IDs and string tokens. //! -class API_EXPORT CTokenListType -{ - public: - //! Used to associate tokens with weightings: - //! first -> token ID - //! second -> weighting - using TSizeSizePr = std::pair; - - //! Used for storing token ID sequences - using TSizeSizePrVec = std::vector; - using TSizeSizePrVecItr = TSizeSizePrVec::iterator; - using TSizeSizePrVecCItr = TSizeSizePrVec::const_iterator; - - //! Used for storing distinct token IDs mapped to weightings - using TSizeSizeMap = std::map; - using TSizeSizeMapItr = TSizeSizeMap::iterator; - using TSizeSizeMapCItr = TSizeSizeMap::const_iterator; - - public: - //! Create a new type - CTokenListType(bool isDryRun, - const std::string &baseString, - size_t rawStringLen, - const TSizeSizePrVec &baseTokenIds, - size_t baseWeight, - const TSizeSizeMap &uniqueTokenIds); - - //! Constructor used when restoring from XML - CTokenListType(core::CStateRestoreTraverser &traverser); - - //! Add string to this type with a double indicating - //! how well matched the string is - bool addString(bool isDryRun, - const std::string &str, - size_t rawStringLen, - const TSizeSizePrVec &tokenIds, - const TSizeSizeMap &uniqueTokenIds, - double similarity); - - //! Accessors - const std::string &baseString() const; - const TSizeSizePrVec &baseTokenIds() const; - size_t baseWeight() const; - const TSizeSizePrVec &commonUniqueTokenIds() const; - size_t commonUniqueTokenWeight() const; - size_t origUniqueTokenWeight() const; - size_t maxStringLen() const; - size_t outOfOrderCommonTokenIndex() const; - - //! What's the longest string we'll consider a match for this type? - //! Currently simply 10% longer than the longest string we've seen. - size_t maxMatchingStringLen() const; - - //! What is the weight of tokens in a given map that are missing from - //! this type's common unique tokens? - size_t missingCommonTokenWeight(const TSizeSizeMap &uniqueTokenIds) const; - - //! Is the weight of tokens in a given map that are missing from this - //! type's common unique tokens equal to zero? It is possible to test: - //! if (type.missingCommonTokenWeight(uniqueTokenIds) == 0) - //! instead of calling this method. However, this method is much faster - //! as it can return false as soon as a mismatch occurs. - bool isMissingCommonTokenWeightZero(const TSizeSizeMap &uniqueTokenIds) const; - - //! Does the supplied token vector contain all our common tokens in the - //! same order as our base token vector? - bool containsCommonTokensInOrder(const TSizeSizePrVec &tokenIds) const; - - //! How many matching strings are there? - size_t numMatches() const; - - //! Persist state by passing information to the supplied inserter - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - - //! Attempt to get cached reverse search - bool cachedReverseSearch(std::string &part1, - std::string &part2) const; - - //! Set the cached reverse search - void cacheReverseSearch(const std::string &part1, - const std::string &part2); - - private: - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); - - private: - //! The string and tokens we base this type on - std::string m_BaseString; - TSizeSizePrVec m_BaseTokenIds; - - //! Cache the total weight of the base tokens - size_t m_BaseWeight; - - //! The maximum original length of all the strings that have been - //! classified as this type. The original length may be longer than the - //! length of the strings in passed to the addString() method, because - //! it will include the date. - size_t m_MaxStringLen; - - //! The index into the base token IDs that we should stop at when - //! generating an ordered regex, because subsequent common token IDs are - //! not in the same order for all strings of this type. - size_t m_OutOfOrderCommonTokenIndex; - - //! The unique token IDs that all strings classified to be this type - //! contain. This vector must always be sorted into ascending order. - TSizeSizePrVec m_CommonUniqueTokenIds; - - //! Cache the weight of the common unique tokens - size_t m_CommonUniqueTokenWeight; - - //! What was the weight of the original unique tokens (i.e. when the type - //! only represented one string)? Remembering this means we can ensure - //! that the degree of commonality doesn't fall below a certain level as - //! the number of strings classified as this type grows. - size_t m_OrigUniqueTokenWeight; - - //! Number of matched strings - size_t m_NumMatches; - - //! Cache reverse searches to save repeated recalculations - std::string m_ReverseSearchPart1; - std::string m_ReverseSearchPart2; +class API_EXPORT CTokenListType { +public: + //! Used to associate tokens with weightings: + //! first -> token ID + //! second -> weighting + using TSizeSizePr = std::pair; + + //! Used for storing token ID sequences + using TSizeSizePrVec = std::vector; + using TSizeSizePrVecItr = TSizeSizePrVec::iterator; + using TSizeSizePrVecCItr = TSizeSizePrVec::const_iterator; + + //! Used for storing distinct token IDs mapped to weightings + using TSizeSizeMap = std::map; + using TSizeSizeMapItr = TSizeSizeMap::iterator; + using TSizeSizeMapCItr = TSizeSizeMap::const_iterator; + +public: + //! Create a new type + CTokenListType(bool isDryRun, + const std::string& baseString, + size_t rawStringLen, + const TSizeSizePrVec& baseTokenIds, + size_t baseWeight, + const TSizeSizeMap& uniqueTokenIds); + + //! Constructor used when restoring from XML + CTokenListType(core::CStateRestoreTraverser& traverser); + + //! Add string to this type with a double indicating + //! how well matched the string is + bool addString(bool isDryRun, + const std::string& str, + size_t rawStringLen, + const TSizeSizePrVec& tokenIds, + const TSizeSizeMap& uniqueTokenIds, + double similarity); + + //! Accessors + const std::string& baseString() const; + const TSizeSizePrVec& baseTokenIds() const; + size_t baseWeight() const; + const TSizeSizePrVec& commonUniqueTokenIds() const; + size_t commonUniqueTokenWeight() const; + size_t origUniqueTokenWeight() const; + size_t maxStringLen() const; + size_t outOfOrderCommonTokenIndex() const; + + //! What's the longest string we'll consider a match for this type? + //! Currently simply 10% longer than the longest string we've seen. + size_t maxMatchingStringLen() const; + + //! What is the weight of tokens in a given map that are missing from + //! this type's common unique tokens? + size_t missingCommonTokenWeight(const TSizeSizeMap& uniqueTokenIds) const; + + //! Is the weight of tokens in a given map that are missing from this + //! type's common unique tokens equal to zero? It is possible to test: + //! if (type.missingCommonTokenWeight(uniqueTokenIds) == 0) + //! instead of calling this method. However, this method is much faster + //! as it can return false as soon as a mismatch occurs. + bool isMissingCommonTokenWeightZero(const TSizeSizeMap& uniqueTokenIds) const; + + //! Does the supplied token vector contain all our common tokens in the + //! same order as our base token vector? + bool containsCommonTokensInOrder(const TSizeSizePrVec& tokenIds) const; + + //! How many matching strings are there? + size_t numMatches() const; + + //! Persist state by passing information to the supplied inserter + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Attempt to get cached reverse search + bool cachedReverseSearch(std::string& part1, std::string& part2) const; + + //! Set the cached reverse search + void cacheReverseSearch(const std::string& part1, const std::string& part2); + +private: + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); + +private: + //! The string and tokens we base this type on + std::string m_BaseString; + TSizeSizePrVec m_BaseTokenIds; + + //! Cache the total weight of the base tokens + size_t m_BaseWeight; + + //! The maximum original length of all the strings that have been + //! classified as this type. The original length may be longer than the + //! length of the strings in passed to the addString() method, because + //! it will include the date. + size_t m_MaxStringLen; + + //! The index into the base token IDs that we should stop at when + //! generating an ordered regex, because subsequent common token IDs are + //! not in the same order for all strings of this type. + size_t m_OutOfOrderCommonTokenIndex; + + //! The unique token IDs that all strings classified to be this type + //! contain. This vector must always be sorted into ascending order. + TSizeSizePrVec m_CommonUniqueTokenIds; + + //! Cache the weight of the common unique tokens + size_t m_CommonUniqueTokenWeight; + + //! What was the weight of the original unique tokens (i.e. when the type + //! only represented one string)? Remembering this means we can ensure + //! that the degree of commonality doesn't fall below a certain level as + //! the number of strings classified as this type grows. + size_t m_OrigUniqueTokenWeight; + + //! Number of matched strings + size_t m_NumMatches; + + //! Cache reverse searches to save repeated recalculations + std::string m_ReverseSearchPart1; + std::string m_ReverseSearchPart2; }; - - } } #endif // INCLUDED_ml_api_CTokenListType_h - diff --git a/include/api/ImportExport.h b/include/api/ImportExport.h index a4dd379ff0..df219a9648 100644 --- a/include/api/ImportExport.h +++ b/include/api/ImportExport.h @@ -36,4 +36,3 @@ #endif #endif // INCLUDED_ml_api_ImportExport_h - diff --git a/include/config/CAutoconfigurer.h b/include/config/CAutoconfigurer.h index 9fe6097a3f..b95362b82d 100644 --- a/include/config/CAutoconfigurer.h +++ b/include/config/CAutoconfigurer.h @@ -13,10 +13,8 @@ #include -namespace ml -{ -namespace config -{ +namespace ml { +namespace config { class CAutoconfigurerImpl; class CAutoconfigurerParams; class CReportWriter; @@ -35,42 +33,38 @@ class CReportWriter; //! //! We use the pimpl idiom to isolate the internals of this library from the //! automatic configuration commands. -class CONFIG_EXPORT CAutoconfigurer : public api::CDataProcessor -{ - public: - CAutoconfigurer(const CAutoconfigurerParams ¶ms, - CReportWriter &reportWriter); +class CONFIG_EXPORT CAutoconfigurer : public api::CDataProcessor { +public: + CAutoconfigurer(const CAutoconfigurerParams& params, CReportWriter& reportWriter); - //! We're going to be writing to a new output stream. - virtual void newOutputStream(); + //! We're going to be writing to a new output stream. + virtual void newOutputStream(); - //! Receive a single record to be processed. - virtual bool handleRecord(const TStrStrUMap &fieldValues); + //! Receive a single record to be processed. + virtual bool handleRecord(const TStrStrUMap& fieldValues); - //! Generate the report. - virtual void finalise(); + //! Generate the report. + virtual void finalise(); - //! No-op. - virtual bool restoreState(core::CDataSearcher &restoreSearcher, - core_t::TTime &completeToTime); + //! No-op. + virtual bool restoreState(core::CDataSearcher& restoreSearcher, core_t::TTime& completeToTime); - //! No-op. - virtual bool persistState(core::CDataAdder &persister); + //! No-op. + virtual bool persistState(core::CDataAdder& persister); - //! How many records did we handle? - virtual uint64_t numRecordsHandled() const; + //! How many records did we handle? + virtual uint64_t numRecordsHandled() const; - //! Access the output handler. - virtual api::COutputHandler &outputHandler(); + //! Access the output handler. + virtual api::COutputHandler& outputHandler(); - private: - using TImplPtr = boost::shared_ptr; +private: + using TImplPtr = boost::shared_ptr; - private: - //! The pointer to the actual implementation. - TImplPtr m_Impl; +private: + //! The pointer to the actual implementation. + TImplPtr m_Impl; }; - } } diff --git a/include/config/CAutoconfigurerDetectorPenalties.h b/include/config/CAutoconfigurerDetectorPenalties.h index f9dd20440c..379d682e0e 100644 --- a/include/config/CAutoconfigurerDetectorPenalties.h +++ b/include/config/CAutoconfigurerDetectorPenalties.h @@ -14,10 +14,8 @@ #include -namespace ml -{ -namespace config -{ +namespace ml { +namespace config { class CAutoconfigurerParams; class CAutoconfigurerFieldRolePenalties; class CDetectorSpecification; @@ -33,44 +31,41 @@ class CPenalty; //! This provides a single definition point for a logical group of penalties //! and has been factored into its own class to avoid CAutoconfigurer becoming //! monolithic. -class CONFIG_EXPORT CAutoconfigurerDetectorPenalties -{ - public: - using TPenaltyPtr = boost::shared_ptr; +class CONFIG_EXPORT CAutoconfigurerDetectorPenalties { +public: + using TPenaltyPtr = boost::shared_ptr; - public: - CAutoconfigurerDetectorPenalties(const CAutoconfigurerParams ¶ms, - const CAutoconfigurerFieldRolePenalties &fieldRolePenalties); +public: + CAutoconfigurerDetectorPenalties(const CAutoconfigurerParams& params, const CAutoconfigurerFieldRolePenalties& fieldRolePenalties); - //! Get the penalty for the detector \p spec. - TPenaltyPtr penaltyFor(const CDetectorSpecification &spec); + //! Get the penalty for the detector \p spec. + TPenaltyPtr penaltyFor(const CDetectorSpecification& spec); - private: - using TAutoconfigurerParamsCRef = boost::reference_wrapper; - using TAutoconfigurerFieldRolePenaltiesCRef = boost::reference_wrapper; - using TPenaltyPtrVec = std::vector; +private: + using TAutoconfigurerParamsCRef = boost::reference_wrapper; + using TAutoconfigurerFieldRolePenaltiesCRef = boost::reference_wrapper; + using TPenaltyPtrVec = std::vector; - private: - //! Get the penalty for the detector \p spec based on its field roles. - const CPenalty &fieldRolePenalty(const CDetectorSpecification &spec); +private: + //! Get the penalty for the detector \p spec based on its field roles. + const CPenalty& fieldRolePenalty(const CDetectorSpecification& spec); - private: - //! The parameters. - TAutoconfigurerParamsCRef m_Params; +private: + //! The parameters. + TAutoconfigurerParamsCRef m_Params; - //! The field role penalties. - TAutoconfigurerFieldRolePenaltiesCRef m_FieldRolePenalties; + //! The field role penalties. + TAutoconfigurerFieldRolePenaltiesCRef m_FieldRolePenalties; - //! The detector penalties based on their fields and roles. - TPenaltyPtrVec m_DetectorFieldRolePenalties; + //! The detector penalties based on their fields and roles. + TPenaltyPtrVec m_DetectorFieldRolePenalties; - //! The bucket length penalties. - TPenaltyPtrVec m_BucketLengthPenalties; + //! The bucket length penalties. + TPenaltyPtrVec m_BucketLengthPenalties; - //! The function specific penalties. - TPenaltyPtrVec m_FunctionSpecificPenalties; + //! The function specific penalties. + TPenaltyPtrVec m_FunctionSpecificPenalties; }; - } } diff --git a/include/config/CAutoconfigurerFieldRolePenalties.h b/include/config/CAutoconfigurerFieldRolePenalties.h index 483eb5fd53..d07643f89a 100644 --- a/include/config/CAutoconfigurerFieldRolePenalties.h +++ b/include/config/CAutoconfigurerFieldRolePenalties.h @@ -13,10 +13,8 @@ #include -namespace ml -{ -namespace config -{ +namespace ml { +namespace config { class CAutoconfigurerParams; class CPenalty; @@ -30,37 +28,35 @@ class CPenalty; //! This provides a single definition point for a logical group of penalties //! and has been factored into its own class to avoid CAutoconfigurer becoming //! monolithic. -class CONFIG_EXPORT CAutoconfigurerFieldRolePenalties : core::CNonCopyable -{ - public: - CAutoconfigurerFieldRolePenalties(const CAutoconfigurerParams ¶ms); +class CONFIG_EXPORT CAutoconfigurerFieldRolePenalties : core::CNonCopyable { +public: + CAutoconfigurerFieldRolePenalties(const CAutoconfigurerParams& params); - //! Get the penalty for categorical function arguments. - const CPenalty &categoricalFunctionArgumentPenalty() const; + //! Get the penalty for categorical function arguments. + const CPenalty& categoricalFunctionArgumentPenalty() const; - //! Get the penalty for metric function arguments. - const CPenalty &metricFunctionArgumentPenalty() const; + //! Get the penalty for metric function arguments. + const CPenalty& metricFunctionArgumentPenalty() const; - //! Get the penalty for "by" fields. - const CPenalty &byPenalty() const; + //! Get the penalty for "by" fields. + const CPenalty& byPenalty() const; - //! Get the penalty for "by" fields of rare commands. - const CPenalty &rareByPenalty() const; + //! Get the penalty for "by" fields of rare commands. + const CPenalty& rareByPenalty() const; - //! Get the penalty for "over" fields. - const CPenalty &overPenalty() const; + //! Get the penalty for "over" fields. + const CPenalty& overPenalty() const; - //! Get the penalty for "partition" fields. - const CPenalty &partitionPenalty() const; + //! Get the penalty for "partition" fields. + const CPenalty& partitionPenalty() const; - private: - using TPenaltyCPtr = boost::shared_ptr; +private: + using TPenaltyCPtr = boost::shared_ptr; - private: - //! The penalties. - TPenaltyCPtr m_Penalties[6]; +private: + //! The penalties. + TPenaltyCPtr m_Penalties[6]; }; - } } diff --git a/include/config/CAutoconfigurerParams.h b/include/config/CAutoconfigurerParams.h index befaa0c6c5..a74738ccd7 100644 --- a/include/config/CAutoconfigurerParams.h +++ b/include/config/CAutoconfigurerParams.h @@ -21,10 +21,8 @@ #include -namespace ml -{ -namespace config -{ +namespace ml { +namespace config { //! \brief The parameters which control auto-configuration. //! @@ -46,408 +44,403 @@ namespace config //! //! Because supplying parameters via the config file is optional the //! boost property_tree is copied into separate member variables. -class CONFIG_EXPORT CAutoconfigurerParams -{ - public: - using TTimeVec = std::vector; - using TSizeVec = std::vector; - using TStrVec = std::vector; - using TOptionalStrVec = boost::optional; - using TStrUserDataTypePr = std::pair; - using TStrUserDataTypePrVec = std::vector; - using TOptionalUserDataType = boost::optional; - using TFunctionCategoryVec = std::vector; - - public: - CAutoconfigurerParams(const std::string &timeFieldName, - const std::string &timeFieldFormat, - bool verbose, - bool writeDetectorConfigs); - - //! Initialize from the specified file. - bool init(const std::string &file); - - //! Get the name of field holding the time. - const std::string &timeFieldName() const; - - //! Get the time field format. Blank means seconds since the epoch, i.e. - //! the time field can be converted to a time_t by simply converting the - //! string to a number. Otherwise, it is assumed to be suitable for passing - //! to strptime. - const std::string &timeFieldFormat() const; - - //! Check if we should be outputting all detectors including those that - //! have been discarded. - bool verbose() const; - - //! Check if we should output the top detectors in JSON format. - bool writeDetectorConfigs() const; - - //! Get the line ending to use when writing detectors in JSON format. - const std::string &detectorConfigLineEnding() const; - - //! \name Scoping - //@{ - //! Check that \p field is not one of the fields in the data set we've - //! been told to ignore. - bool fieldOfInterest(const std::string &field) const; +class CONFIG_EXPORT CAutoconfigurerParams { +public: + using TTimeVec = std::vector; + using TSizeVec = std::vector; + using TStrVec = std::vector; + using TOptionalStrVec = boost::optional; + using TStrUserDataTypePr = std::pair; + using TStrUserDataTypePrVec = std::vector; + using TOptionalUserDataType = boost::optional; + using TFunctionCategoryVec = std::vector; - //! Check if we can use \p argument for the argument of a function. - bool canUseForFunctionArgument(const std::string &argument) const; +public: + CAutoconfigurerParams(const std::string& timeFieldName, const std::string& timeFieldFormat, bool verbose, bool writeDetectorConfigs); - //! Check if we can use \p by as a by field. - bool canUseForByField(const std::string &by) const; - - //! Check if we can use \p over as an over field. - bool canUseForOverField(const std::string &over) const; + //! Initialize from the specified file. + bool init(const std::string& file); - //! Check if we can use \p partition as a partition field. - bool canUseForPartitionField(const std::string &partition) const; + //! Get the name of field holding the time. + const std::string& timeFieldName() const; - //! Get the function categories to configure. - const TFunctionCategoryVec &functionsCategoriesToConfigure() const; - //@} + //! Get the time field format. Blank means seconds since the epoch, i.e. + //! the time field can be converted to a time_t by simply converting the + //! string to a number. Otherwise, it is assumed to be suitable for passing + //! to strptime. + const std::string& timeFieldFormat() const; - //! \name Statistics - //@{ - //! The user specified field data types. - TOptionalUserDataType dataType(const std::string &field) const; + //! Check if we should be outputting all detectors including those that + //! have been discarded. + bool verbose() const; - //! The minimum number of records to classify a field. - uint64_t minimumExamplesToClassify() const; + //! Check if we should output the top detectors in JSON format. + bool writeDetectorConfigs() const; - //! The minimum number of records to classify a field. - std::size_t numberOfMostFrequentFieldsCounts() const; - //@} + //! Get the line ending to use when writing detectors in JSON format. + const std::string& detectorConfigLineEnding() const; - //! \name General Configuration - //@{ - //! The minimum number of records to classify a field. - uint64_t minimumRecordsToAttemptConfig() const; + //! \name Scoping + //@{ + //! Check that \p field is not one of the fields in the data set we've + //! been told to ignore. + bool fieldOfInterest(const std::string& field) const; - //! The minimum permitted detector score. - double minimumDetectorScore() const; - //@} + //! Check if we can use \p argument for the argument of a function. + bool canUseForFunctionArgument(const std::string& argument) const; - //! A number of by field values which is considered high so - //! larger numbers will be penalized. - std::size_t highNumberByFieldValues() const; + //! Check if we can use \p by as a by field. + bool canUseForByField(const std::string& by) const; - //! The highest permitted number of by field values. - std::size_t maximumNumberByFieldValues() const; - - //! A number of by field values for rare commands which is considered - //! high so larger numbers will be penalized. - std::size_t highNumberRareByFieldValues() const; - - //! The highest permitted number of by field values for rare commands. - std::size_t maximumNumberRareByFieldValues() const; - - //! A number of partition field values which is considered high - //! so larger numbers will be penalized. - std::size_t highNumberPartitionFieldValues() const; - - //! The highest permitted number of partition field values. - std::size_t maximumNumberPartitionFieldValues() const; - - //! A number of over field values which is considered small so - //! that smaller numbers will be penalized. - std::size_t lowNumberOverFieldValues() const; + //! Check if we can use \p over as an over field. + bool canUseForOverField(const std::string& over) const; - //! The lowest permitted number of over field values. - std::size_t minimumNumberOverFieldValues() const; + //! Check if we can use \p partition as a partition field. + bool canUseForPartitionField(const std::string& partition) const; - //! The factor, as a multiple of the lowest field value count, which is - //! an upper bound for a field value being in the low frequency tail. - double highCardinalityInTailFactor() const; + //! Get the function categories to configure. + const TFunctionCategoryVec& functionsCategoriesToConfigure() const; + //@} - //! The margin, as an increment on the lowest field value count, which - //! is an upper bound for a field value being in the low frequency tail. - uint64_t highCardinalityInTailIncrement() const; + //! \name Statistics + //@{ + //! The user specified field data types. + TOptionalUserDataType dataType(const std::string& field) const; - //! A proportion of records in the low frequency tail for rare analysis - //! which is considered large so that larger proportions will be penalized. - double highCardinalityHighTailFraction() const; + //! The minimum number of records to classify a field. + uint64_t minimumExamplesToClassify() const; - //! The highest permitted proportion of records in the low frequency - //! tail for rare analysis. - double highCardinalityMaximumTailFraction() const; + //! The minimum number of records to classify a field. + std::size_t numberOfMostFrequentFieldsCounts() const; + //@} - //! A fraction of populated buckets that is considered small for \p function - //! and \p ignoreEmpty so that smaller proportions will be penalized. - double lowPopulatedBucketFraction(config_t::EFunctionCategory function, bool ignoreEmpty) const; + //! \name General Configuration + //@{ + //! The minimum number of records to classify a field. + uint64_t minimumRecordsToAttemptConfig() const; - //! The smallest permitted fraction of populated buckets for \p function and - //! \p ignoreEmpty. - double minimumPopulatedBucketFraction(config_t::EFunctionCategory function, bool ignoreEmpty) const; + //! The minimum permitted detector score. + double minimumDetectorScore() const; + //@} - //! A fraction of populated buckets that is considered high for \p function - //! and \p ignoreEmpty so that higher fractions will be penalized. - double highPopulatedBucketFraction(config_t::EFunctionCategory function, bool ignoreEmpty) const; + //! A number of by field values which is considered high so + //! larger numbers will be penalized. + std::size_t highNumberByFieldValues() const; - //! The maximum permitted fraction of populated buckets for \p function and - //! \p ignoreEmpty. - double maximumPopulatedBucketFraction(config_t::EFunctionCategory function, bool ignoreEmpty) const; + //! The highest permitted number of by field values. + std::size_t maximumNumberByFieldValues() const; - //! Get the candidate bucket lengths to test for each detector. - const TTimeVec &candidateBucketLengths() const; + //! A number of by field values for rare commands which is considered + //! high so larger numbers will be penalized. + std::size_t highNumberRareByFieldValues() const; - //! Get a number of buckets considered small to configure a detector. - double lowNumberOfBucketsForConfig() const; + //! The highest permitted number of by field values for rare commands. + std::size_t maximumNumberRareByFieldValues() const; + + //! A number of partition field values which is considered high + //! so larger numbers will be penalized. + std::size_t highNumberPartitionFieldValues() const; + + //! The highest permitted number of partition field values. + std::size_t maximumNumberPartitionFieldValues() const; + + //! A number of over field values which is considered small so + //! that smaller numbers will be penalized. + std::size_t lowNumberOverFieldValues() const; - //! Get the lowest permitted number of buckets we'll use to configure - //! a detector. - double minimumNumberOfBucketsForConfig() const; + //! The lowest permitted number of over field values. + std::size_t minimumNumberOverFieldValues() const; - //! Get the minimum possible proportion of values in the jitter interval - //! surrounding the polling interval to classify data as polled. - double polledDataMinimumMassAtInterval() const; + //! The factor, as a multiple of the lowest field value count, which is + //! an upper bound for a field value being in the low frequency tail. + double highCardinalityInTailFactor() const; - //! Get the maximum amount that polled data times can jitter about the - //! polling interval. - double polledDataJitter() const; + //! The margin, as an increment on the lowest field value count, which + //! is an upper bound for a field value being in the low frequency tail. + uint64_t highCardinalityInTailIncrement() const; - //! Get a coefficient of variation for a bucketed statistic which is - //! considered low such that lower values are penalized. - double lowCoefficientOfVariation() const; + //! A proportion of records in the low frequency tail for rare analysis + //! which is considered large so that larger proportions will be penalized. + double highCardinalityHighTailFraction() const; - //! Get the minimum coefficient of variation for a bucketed statistic - //! to be worthwhile modeling. - double minimumCoefficientOfVariation() const; + //! The highest permitted proportion of records in the low frequency + //! tail for rare analysis. + double highCardinalityMaximumTailFraction() const; - //! Get a low range for the category lengths to be a suitable argument - //! for information content such that lower values are penalized. - double lowLengthRangeForInfoContent() const; + //! A fraction of populated buckets that is considered small for \p function + //! and \p ignoreEmpty so that smaller proportions will be penalized. + double lowPopulatedBucketFraction(config_t::EFunctionCategory function, bool ignoreEmpty) const; - //! Get the minimum range for the category lengths to be a suitable - //! argument for information content. - double minimumLengthRangeForInfoContent() const; + //! The smallest permitted fraction of populated buckets for \p function and + //! \p ignoreEmpty. + double minimumPopulatedBucketFraction(config_t::EFunctionCategory function, bool ignoreEmpty) const; - //! Get a low maximum category length for it to be a suitable argument - //! for information content such that lower values are penalized. - double lowMaximumLengthForInfoContent() const; + //! A fraction of populated buckets that is considered high for \p function + //! and \p ignoreEmpty so that higher fractions will be penalized. + double highPopulatedBucketFraction(config_t::EFunctionCategory function, bool ignoreEmpty) const; - //! Get the minimum category length for it to be a suitable argument - //! for information content. - double minimumMaximumLengthForInfoContent() const; + //! The maximum permitted fraction of populated buckets for \p function and + //! \p ignoreEmpty. + double maximumPopulatedBucketFraction(config_t::EFunctionCategory function, bool ignoreEmpty) const; - //! Get a low empirical entropy for a field to be a suitable argument - //! for information content such that lower values will be penalized. - //! - //! \note This is as a portion of the maximum possible entropy based - //! on the distinct count. - double lowEntropyForInfoContent() const; + //! Get the candidate bucket lengths to test for each detector. + const TTimeVec& candidateBucketLengths() const; - //! Get the minimum empirical entropy for a field to be a suitable - //! argument for information content. - //! - //! \note This is as a portion of the maximum possible entropy based - //! on the distinct count. - double minimumEntropyForInfoContent() const; + //! Get a number of buckets considered small to configure a detector. + double lowNumberOfBucketsForConfig() const; - //! Get a low distinct count for a field to be a suitable argument - //! for information content such that lower values will be penalized. - double lowDistinctCountForInfoContent() const; + //! Get the lowest permitted number of buckets we'll use to configure + //! a detector. + double minimumNumberOfBucketsForConfig() const; - //! Get the minimum distinct count for a field to be a suitable - //! argument for information content. - double minimumDistinctCountForInfoContent() const; + //! Get the minimum possible proportion of values in the jitter interval + //! surrounding the polling interval to classify data as polled. + double polledDataMinimumMassAtInterval() const; - //! Get the penalty indices for the candidate bucket length identified - //! by \p bid. - const TSizeVec &penaltyIndicesFor(std::size_t bid) const; + //! Get the maximum amount that polled data times can jitter about the + //! polling interval. + double polledDataJitter() const; - //! Get the penalty indices for the function version which ignores empty - //! value if \p value is true and considers them otherwise. - const TSizeVec &penaltyIndicesFor(bool ignoreEmpty) const; + //! Get a coefficient of variation for a bucketed statistic which is + //! considered low such that lower values are penalized. + double lowCoefficientOfVariation() const; - //! Get the penalty index for the candidate bucket length identified - //! by \p bid and the function version which ignores empty. - std::size_t penaltyIndexFor(std::size_t bid, bool ignoreEmpty) const; + //! Get the minimum coefficient of variation for a bucketed statistic + //! to be worthwhile modeling. + double minimumCoefficientOfVariation() const; - //! Get a string describing all the parameters. - std::string print() const; + //! Get a low range for the category lengths to be a suitable argument + //! for information content such that lower values are penalized. + double lowLengthRangeForInfoContent() const; - private: - using TDoubleVec = std::vector; - using TSizeVecVec = std::vector; + //! Get the minimum range for the category lengths to be a suitable + //! argument for information content. + double minimumLengthRangeForInfoContent() const; - private: - //! Refresh the penalty indices. - void refreshPenaltyIndices(); + //! Get a low maximum category length for it to be a suitable argument + //! for information content such that lower values are penalized. + double lowMaximumLengthForInfoContent() const; - private: - //! The name of field holding the time. - std::string m_TimeFieldName; + //! Get the minimum category length for it to be a suitable argument + //! for information content. + double minimumMaximumLengthForInfoContent() const; - //! The time field format. - std::string m_TimeFieldFormat; + //! Get a low empirical entropy for a field to be a suitable argument + //! for information content such that lower values will be penalized. + //! + //! \note This is as a portion of the maximum possible entropy based + //! on the distinct count. + double lowEntropyForInfoContent() const; - //! If true then output information about all possible detectors including - //! those that have been discarded. - bool m_Verbose; + //! Get the minimum empirical entropy for a field to be a suitable + //! argument for information content. + //! + //! \note This is as a portion of the maximum possible entropy based + //! on the distinct count. + double minimumEntropyForInfoContent() const; - //! If true then output the top detector for each candidate in JSON format. - bool m_WriteDetectorConfigs; + //! Get a low distinct count for a field to be a suitable argument + //! for information content such that lower values will be penalized. + double lowDistinctCountForInfoContent() const; - //! The line ending to use when writing detectors in JSON format. - std::string m_DetectorConfigLineEnding; + //! Get the minimum distinct count for a field to be a suitable + //! argument for information content. + double minimumDistinctCountForInfoContent() const; - //! \name Scoping - //@{ - //! The only fields to use in auto-configuration. - TOptionalStrVec m_FieldsOfInterest; + //! Get the penalty indices for the candidate bucket length identified + //! by \p bid. + const TSizeVec& penaltyIndicesFor(std::size_t bid) const; - //! The only argument or partition fields to use in auto-configuration. - TOptionalStrVec m_FieldsToUseInAutoconfigureByRole[constants::NUMBER_FIELD_INDICES]; + //! Get the penalty indices for the function version which ignores empty + //! value if \p value is true and considers them otherwise. + const TSizeVec& penaltyIndicesFor(bool ignoreEmpty) const; - //! The function categories to consider configuring. - TFunctionCategoryVec m_FunctionCategoriesToConfigure; - //@} + //! Get the penalty index for the candidate bucket length identified + //! by \p bid and the function version which ignores empty. + std::size_t penaltyIndexFor(std::size_t bid, bool ignoreEmpty) const; - //! \name Statistics - //@{ - //! The type of data in each field (numeric or categorical). - TStrUserDataTypePrVec m_FieldDataTypes; + //! Get a string describing all the parameters. + std::string print() const; - //! The minimum number of records to use to classify a field. - uint64_t m_MinimumExamplesToClassify; +private: + using TDoubleVec = std::vector; + using TSizeVecVec = std::vector; - //! The number of field values to count occurrences for in the categorical - //! field statistics. - std::size_t m_NumberOfMostFrequentFieldsCounts; - //@} +private: + //! Refresh the penalty indices. + void refreshPenaltyIndices(); - //! \name General Configuration - //@{ - //! The minimum number of records needed to attempt generating search - //! configurations. - uint64_t m_MinimumRecordsToAttemptConfig; +private: + //! The name of field holding the time. + std::string m_TimeFieldName; - //! The minimum permitted detector score. - double m_MinimumDetectorScore; - //@} + //! The time field format. + std::string m_TimeFieldFormat; - //! \name Field Role Scoring - //@{ - //! A number of by field values which is considered high. - std::size_t m_HighNumberByFieldValues; + //! If true then output information about all possible detectors including + //! those that have been discarded. + bool m_Verbose; - //! The highest permitted number of by field values. - std::size_t m_MaximumNumberByFieldValues; + //! If true then output the top detector for each candidate in JSON format. + bool m_WriteDetectorConfigs; - //! A number of by field values which is considered high for rare commands. - std::size_t m_HighNumberRareByFieldValues; + //! The line ending to use when writing detectors in JSON format. + std::string m_DetectorConfigLineEnding; - //! The highest permitted number of by field values for rare commands. - std::size_t m_MaximumNumberRareByFieldValues; + //! \name Scoping + //@{ + //! The only fields to use in auto-configuration. + TOptionalStrVec m_FieldsOfInterest; - //! A number of partition field values which is considered high. - std::size_t m_HighNumberPartitionFieldValues; + //! The only argument or partition fields to use in auto-configuration. + TOptionalStrVec m_FieldsToUseInAutoconfigureByRole[constants::NUMBER_FIELD_INDICES]; - //! The highest permitted number of partition field values. - std::size_t m_MaximumNumberPartitionFieldValues; + //! The function categories to consider configuring. + TFunctionCategoryVec m_FunctionCategoriesToConfigure; + //@} - //! A number of over field values which is considered small. - std::size_t m_LowNumberOverFieldValues; + //! \name Statistics + //@{ + //! The type of data in each field (numeric or categorical). + TStrUserDataTypePrVec m_FieldDataTypes; - //! The lowest permitted number of over field values. - std::size_t m_MinimumNumberOverFieldValues; - //@} + //! The minimum number of records to use to classify a field. + uint64_t m_MinimumExamplesToClassify; - //! \name Detector Scoring - //@{ - //! The factor for a field value being in the low frequency tail. - double m_HighCardinalityInTailFactor; + //! The number of field values to count occurrences for in the categorical + //! field statistics. + std::size_t m_NumberOfMostFrequentFieldsCounts; + //@} - //! The margin for a field value being in the low frequency tail. - uint64_t m_HighCardinalityInTailIncrement; + //! \name General Configuration + //@{ + //! The minimum number of records needed to attempt generating search + //! configurations. + uint64_t m_MinimumRecordsToAttemptConfig; - //! A high proportion of records in the low frequency tail for rare - //! analysis to be effective. - double m_HighCardinalityHighTailFraction; + //! The minimum permitted detector score. + double m_MinimumDetectorScore; + //@} - //! The maximum proportion of records in the low frequency tail - //! for rare analysis. - double m_HighCardinalityMaximumTailFraction; + //! \name Field Role Scoring + //@{ + //! A number of by field values which is considered high. + std::size_t m_HighNumberByFieldValues; - //! The lower fractions for populated buckets which trigger a penalty. - TDoubleVec m_LowPopulatedBucketFractions; + //! The highest permitted number of by field values. + std::size_t m_MaximumNumberByFieldValues; - //! The minimum permitted fractions of populated buckets. - TDoubleVec m_MinimumPopulatedBucketFractions; + //! A number of by field values which is considered high for rare commands. + std::size_t m_HighNumberRareByFieldValues; - //! The upper fractions for populated buckets which trigger a penalty. - TDoubleVec m_HighPopulatedBucketFractions; + //! The highest permitted number of by field values for rare commands. + std::size_t m_MaximumNumberRareByFieldValues; - //! The maximum permitted fractions of populated buckets. - TDoubleVec m_MaximumPopulatedBucketFractions; + //! A number of partition field values which is considered high. + std::size_t m_HighNumberPartitionFieldValues; - //! The bucket lengths that can be selected in seconds. - TTimeVec m_CandidateBucketLengths; + //! The highest permitted number of partition field values. + std::size_t m_MaximumNumberPartitionFieldValues; - //! A low number of buckets for configuration. - double m_LowNumberOfBucketsForConfig; + //! A number of over field values which is considered small. + std::size_t m_LowNumberOverFieldValues; - //! The lowest permitted number of buckets for configuration. - double m_MinimumNumberOfBucketsForConfig; + //! The lowest permitted number of over field values. + std::size_t m_MinimumNumberOverFieldValues; + //@} - //! The minimum proportion of regular data to classify a data set - //! as polled. - double m_PolledDataMinimumMassAtInterval; + //! \name Detector Scoring + //@{ + //! The factor for a field value being in the low frequency tail. + double m_HighCardinalityInTailFactor; - //! The maximum amount that polled data times can jitter about the - //! polling interval. - double m_PolledDataJitter; + //! The margin for a field value being in the low frequency tail. + uint64_t m_HighCardinalityInTailIncrement; - //! A coefficient of variation for a bucketed statistic which is - //! considered low. - double m_LowCoefficientOfVariation; + //! A high proportion of records in the low frequency tail for rare + //! analysis to be effective. + double m_HighCardinalityHighTailFraction; - //! The minimum coefficient of variation for a bucketed statistic. - double m_MinimumCoefficientOfVariation; + //! The maximum proportion of records in the low frequency tail + //! for rare analysis. + double m_HighCardinalityMaximumTailFraction; - //! A low range for the category lengths to be a suitable argument - //! for information content. - double m_LowLengthRangeForInfoContent; + //! The lower fractions for populated buckets which trigger a penalty. + TDoubleVec m_LowPopulatedBucketFractions; - //! The minimum range for the category lengths to be a suitable - //! argument for information content. - double m_MinimumLengthRangeForInfoContent; + //! The minimum permitted fractions of populated buckets. + TDoubleVec m_MinimumPopulatedBucketFractions; - //! A low maximum category length for it to be a suitable argument - //! for information content. - double m_LowMaximumLengthForInfoContent; + //! The upper fractions for populated buckets which trigger a penalty. + TDoubleVec m_HighPopulatedBucketFractions; - //! The minimum category length for it to be a suitable argument - //! for information content. - double m_MinimumMaximumLengthForInfoContent; + //! The maximum permitted fractions of populated buckets. + TDoubleVec m_MaximumPopulatedBucketFractions; - //! A low empirical entropy for a field to be a suitable argument - //! for information content. - double m_LowEntropyForInfoContent; + //! The bucket lengths that can be selected in seconds. + TTimeVec m_CandidateBucketLengths; - //! The minimum empirical entropy for a field to be a suitable - //! argument for information content. - double m_MinimumEntropyForInfoContent; + //! A low number of buckets for configuration. + double m_LowNumberOfBucketsForConfig; - //! A low distinct count for a field to be a suitable argument - //! for information content. - double m_LowDistinctCountForInfoContent; + //! The lowest permitted number of buckets for configuration. + double m_MinimumNumberOfBucketsForConfig; - //! The minimum distinct count for a field to be a suitable - //! argument for information content. - double m_MinimumDistinctCountForInfoContent; - //@} + //! The minimum proportion of regular data to classify a data set + //! as polled. + double m_PolledDataMinimumMassAtInterval; - //! The penalty indices for each bucket length. - TSizeVecVec m_BucketLengthPenaltyIndices; + //! The maximum amount that polled data times can jitter about the + //! polling interval. + double m_PolledDataJitter; - //! The penalty indices for function versions which do and don't - //! consider empty buckets. - TSizeVecVec m_IgnoreEmptyPenaltyIndices; -}; + //! A coefficient of variation for a bucketed statistic which is + //! considered low. + double m_LowCoefficientOfVariation; + + //! The minimum coefficient of variation for a bucketed statistic. + double m_MinimumCoefficientOfVariation; + + //! A low range for the category lengths to be a suitable argument + //! for information content. + double m_LowLengthRangeForInfoContent; + + //! The minimum range for the category lengths to be a suitable + //! argument for information content. + double m_MinimumLengthRangeForInfoContent; + + //! A low maximum category length for it to be a suitable argument + //! for information content. + double m_LowMaximumLengthForInfoContent; + + //! The minimum category length for it to be a suitable argument + //! for information content. + double m_MinimumMaximumLengthForInfoContent; + //! A low empirical entropy for a field to be a suitable argument + //! for information content. + double m_LowEntropyForInfoContent; + + //! The minimum empirical entropy for a field to be a suitable + //! argument for information content. + double m_MinimumEntropyForInfoContent; + + //! A low distinct count for a field to be a suitable argument + //! for information content. + double m_LowDistinctCountForInfoContent; + + //! The minimum distinct count for a field to be a suitable + //! argument for information content. + double m_MinimumDistinctCountForInfoContent; + //@} + + //! The penalty indices for each bucket length. + TSizeVecVec m_BucketLengthPenaltyIndices; + + //! The penalty indices for function versions which do and don't + //! consider empty buckets. + TSizeVecVec m_IgnoreEmptyPenaltyIndices; +}; } } diff --git a/include/config/CDataCountStatistics.h b/include/config/CDataCountStatistics.h index c16009512d..27b7a26027 100644 --- a/include/config/CDataCountStatistics.h +++ b/include/config/CDataCountStatistics.h @@ -8,8 +8,8 @@ #define INCLUDED_ml_config_CDataCountStatistics_h #include -#include #include +#include #include #include @@ -18,112 +18,103 @@ #include -#include #include +#include #include #include #include #include -#include #include +#include -namespace ml -{ -namespace config -{ +namespace ml { +namespace config { class CAutoconfigurerParams; class CDetectorRecord; class CDetectorSpecification; //! \brief Statistics for the bucketed data. -class CONFIG_EXPORT CBucketCountStatistics -{ - public: - using TSizeSizePr = std::pair; - using TSizeSizeSizeTr = core::CTriple; - using TSizeSizePrUInt64UMap = boost::unordered_map; - using TSizeSizeSizeTrUInt64UMap = boost::unordered_map; - using TDetectorRecordVec = std::vector; - using TDetectorRecordCItr = core::CMaskIterator; - using TMoments = maths::CBasicStatistics::SSampleMeanVarSkew::TAccumulator; - - //! \brief The moments of a categorical function argument field. - struct CONFIG_EXPORT SArgumentMoments - { - //! The distinct count moments. - TMoments s_DistinctCount; - //! The information content moments. - TMoments s_InfoContent; - }; - - using TSizeSizePrMomentsUMap = boost::unordered_map; - using TSizeSizePrArgumentMomentsUMap = boost::unordered_map; - using TStrCPtrSizeSizePrArgumentMomentsUMapPr = std::pair; - using TStrCPtrSizeSizePrArgumentMomentsUMapPrVec = std::vector; - using TSizeSizePrQuantileUMap = boost::unordered_map; - - public: - //! Add the record for \p partition. - void add(const TSizeSizeSizeTr &partition, - TDetectorRecordCItr beginRecords, - TDetectorRecordCItr endRecords); - - //! Capture the current bucket statistics. - void capture(); - - //! Get the total count of distinct partitions and buckets seen to date. - uint64_t bucketPartitionCount() const; - - //! Get the moments of the count distribution per partition. - const TSizeSizePrMomentsUMap &countMomentsPerPartition() const; - - //! Get the quantile summary for the count distribution per partition. - const TSizeSizePrQuantileUMap &countQuantilesPerPartition() const; - - //! Get the moments of the distribution of the distinct count of argument - //! field values for \p name. - const TSizeSizePrArgumentMomentsUMap &argumentMomentsPerPartition(const std::string &name) const; - - private: - using TMean = maths::CBasicStatistics::SSampleMean::TAccumulator; - - //! \brief Bucket data stored about argument field. - struct CONFIG_EXPORT SBucketArgumentData - { - SBucketArgumentData(const maths::CBjkstUniqueValues distinctValues) : - s_DistinctValues(distinctValues) - {} - //! The approximate distinct values. - maths::CBjkstUniqueValues s_DistinctValues; - //! A sample of the unique strings in the bucket. - TMean s_MeanStringLength; - }; - - using TSizeSizeSizeTrArgumentDataUMap = boost::unordered_map; - using TStrCPtrSizeSizeSizeTrBjkstArgumentDataUMapPr = std::pair; - using TStrCPtrSizeSizeSizeTrArgumentDataUMapPrVec = std::vector; - - private: - //! The distinct partitions seen this bucket. - TSizeSizeSizeTrUInt64UMap m_CurrentBucketPartitionCounts; - - //! The distinct counts of the argument fields for each partition - //! for the current bucket. - TStrCPtrSizeSizeSizeTrArgumentDataUMapPrVec m_CurrentBucketArgumentDataPerPartition; - - //! The total count of distinct partitions and buckets seen to date. - uint64_t m_BucketPartitionCount; - - //! The moments of the distribution of partition counts. - TSizeSizePrMomentsUMap m_CountMomentsPerPartition; - - //! The count quantiles. - TSizeSizePrQuantileUMap m_CountQuantiles; - - //! The moments of the distribution of distinct counts of the argument - //! fields per partition and bucket length. - TStrCPtrSizeSizePrArgumentMomentsUMapPrVec m_ArgumentMomentsPerPartition; +class CONFIG_EXPORT CBucketCountStatistics { +public: + using TSizeSizePr = std::pair; + using TSizeSizeSizeTr = core::CTriple; + using TSizeSizePrUInt64UMap = boost::unordered_map; + using TSizeSizeSizeTrUInt64UMap = boost::unordered_map; + using TDetectorRecordVec = std::vector; + using TDetectorRecordCItr = core::CMaskIterator; + using TMoments = maths::CBasicStatistics::SSampleMeanVarSkew::TAccumulator; + + //! \brief The moments of a categorical function argument field. + struct CONFIG_EXPORT SArgumentMoments { + //! The distinct count moments. + TMoments s_DistinctCount; + //! The information content moments. + TMoments s_InfoContent; + }; + + using TSizeSizePrMomentsUMap = boost::unordered_map; + using TSizeSizePrArgumentMomentsUMap = boost::unordered_map; + using TStrCPtrSizeSizePrArgumentMomentsUMapPr = std::pair; + using TStrCPtrSizeSizePrArgumentMomentsUMapPrVec = std::vector; + using TSizeSizePrQuantileUMap = boost::unordered_map; + +public: + //! Add the record for \p partition. + void add(const TSizeSizeSizeTr& partition, TDetectorRecordCItr beginRecords, TDetectorRecordCItr endRecords); + + //! Capture the current bucket statistics. + void capture(); + + //! Get the total count of distinct partitions and buckets seen to date. + uint64_t bucketPartitionCount() const; + + //! Get the moments of the count distribution per partition. + const TSizeSizePrMomentsUMap& countMomentsPerPartition() const; + + //! Get the quantile summary for the count distribution per partition. + const TSizeSizePrQuantileUMap& countQuantilesPerPartition() const; + + //! Get the moments of the distribution of the distinct count of argument + //! field values for \p name. + const TSizeSizePrArgumentMomentsUMap& argumentMomentsPerPartition(const std::string& name) const; + +private: + using TMean = maths::CBasicStatistics::SSampleMean::TAccumulator; + + //! \brief Bucket data stored about argument field. + struct CONFIG_EXPORT SBucketArgumentData { + SBucketArgumentData(const maths::CBjkstUniqueValues distinctValues) : s_DistinctValues(distinctValues) {} + //! The approximate distinct values. + maths::CBjkstUniqueValues s_DistinctValues; + //! A sample of the unique strings in the bucket. + TMean s_MeanStringLength; + }; + + using TSizeSizeSizeTrArgumentDataUMap = boost::unordered_map; + using TStrCPtrSizeSizeSizeTrBjkstArgumentDataUMapPr = std::pair; + using TStrCPtrSizeSizeSizeTrArgumentDataUMapPrVec = std::vector; + +private: + //! The distinct partitions seen this bucket. + TSizeSizeSizeTrUInt64UMap m_CurrentBucketPartitionCounts; + + //! The distinct counts of the argument fields for each partition + //! for the current bucket. + TStrCPtrSizeSizeSizeTrArgumentDataUMapPrVec m_CurrentBucketArgumentDataPerPartition; + + //! The total count of distinct partitions and buckets seen to date. + uint64_t m_BucketPartitionCount; + + //! The moments of the distribution of partition counts. + TSizeSizePrMomentsUMap m_CountMomentsPerPartition; + + //! The count quantiles. + TSizeSizePrQuantileUMap m_CountQuantiles; + + //! The moments of the distribution of distinct counts of the argument + //! fields per partition and bucket length. + TStrCPtrSizeSizePrArgumentMomentsUMapPrVec m_ArgumentMomentsPerPartition; }; //! \brief The root of the class hierarchy for useful count statistics. @@ -139,174 +130,168 @@ class CONFIG_EXPORT CBucketCountStatistics //! Each logical set of data statistics has its own hierarchy and is managed //! by a direct address table which enumerates unique combinations and maps //! detectors to their appropriate statistics. -class CONFIG_EXPORT CDataCountStatistics -{ - public: - using TUInt64Vec = std::vector; - using TDetectorRecordVec = std::vector; - using TDetectorRecordCItr = core::CMaskIterator; - using TBucketStatisticsVec = std::vector; +class CONFIG_EXPORT CDataCountStatistics { +public: + using TUInt64Vec = std::vector; + using TDetectorRecordVec = std::vector; + using TDetectorRecordCItr = core::CMaskIterator; + using TBucketStatisticsVec = std::vector; - public: - CDataCountStatistics(const CAutoconfigurerParams ¶ms); - virtual ~CDataCountStatistics(); +public: + CDataCountStatistics(const CAutoconfigurerParams& params); + virtual ~CDataCountStatistics(); - //! Update the statistics with [\p beginRecords, \p endRecords). - virtual void add(TDetectorRecordCItr beginRecords, TDetectorRecordCItr endRecords) = 0; + //! Update the statistics with [\p beginRecords, \p endRecords). + virtual void add(TDetectorRecordCItr beginRecords, TDetectorRecordCItr endRecords) = 0; - //! Get the total count of records added. - uint64_t recordCount() const; + //! Get the total count of records added. + uint64_t recordCount() const; - //! Get the total count of each bucket length. - const TUInt64Vec &bucketCounts() const; + //! Get the total count of each bucket length. + const TUInt64Vec& bucketCounts() const; - //! Get the arrival time distribution - const maths::CQuantileSketch &arrivalTimeDistribution() const; + //! Get the arrival time distribution + const maths::CQuantileSketch& arrivalTimeDistribution() const; - //! Get the total time range. - core_t::TTime timeRange() const; + //! Get the total time range. + core_t::TTime timeRange() const; - //! Get the number of time series. - std::size_t numberSampledTimeSeries() const; + //! Get the number of time series. + std::size_t numberSampledTimeSeries() const; - //! Get the counts of distinct (bucket, by, partition) triples - //! per bucket length seen to date. - const TBucketStatisticsVec &bucketStatistics() const; + //! Get the counts of distinct (bucket, by, partition) triples + //! per bucket length seen to date. + const TBucketStatisticsVec& bucketStatistics() const; - //! Extract the by field value. - template - static std::size_t by(const std::pair, T> &p) - { - return p.first.first; - } + //! Extract the by field value. + template + static std::size_t by(const std::pair, T>& p) { + return p.first.first; + } - //! Extract the partition field value. - template - static std::size_t partition(const std::pair, T> &p) - { - return p.first.second; - } + //! Extract the partition field value. + template + static std::size_t partition(const std::pair, T>& p) { + return p.first.second; + } - protected: - using TTimeVec = std::vector; - using TSizeUSet = boost::unordered_set; +protected: + using TTimeVec = std::vector; + using TSizeUSet = boost::unordered_set; - protected: - //! Get the parameters. - const CAutoconfigurerParams ¶ms() const; +protected: + //! Get the parameters. + const CAutoconfigurerParams& params() const; - //! Check if we should sample the partition. - bool samplePartition(std::size_t partition) const; + //! Check if we should sample the partition. + bool samplePartition(std::size_t partition) const; - private: - using TBoolVec = std::vector; - using TBoolVecVec = std::vector; - using TSizeVec = std::vector; - using TSizeSizePr = std::pair; - using TSizeSizePrUSet = boost::unordered_set; - using TOptionalTime = boost::optional; - using TAutoconfigurerParamsCRef = boost::reference_wrapper; - using TMinTimeAccumulator = maths::CBasicStatistics::COrderStatisticsStack; - using TMaxTimeAccumulator = maths::CBasicStatistics::COrderStatisticsStack >; +private: + using TBoolVec = std::vector; + using TBoolVecVec = std::vector; + using TSizeVec = std::vector; + using TSizeSizePr = std::pair; + using TSizeSizePrUSet = boost::unordered_set; + using TOptionalTime = boost::optional; + using TAutoconfigurerParamsCRef = boost::reference_wrapper; + using TMinTimeAccumulator = maths::CBasicStatistics::COrderStatisticsStack; + using TMaxTimeAccumulator = maths::CBasicStatistics::COrderStatisticsStack>; - private: - //! Fill in the last bucket end times if they are empty. - void fillLastBucketEndTimes(core_t::TTime time); +private: + //! Fill in the last bucket end times if they are empty. + void fillLastBucketEndTimes(core_t::TTime time); - private: - //! The parameters. - TAutoconfigurerParamsCRef m_Params; +private: + //! The parameters. + TAutoconfigurerParamsCRef m_Params; - //! The total count of records added. - uint64_t m_RecordCount; + //! The total count of records added. + uint64_t m_RecordCount; - //! The last record time. - TOptionalTime m_LastRecordTime; + //! The last record time. + TOptionalTime m_LastRecordTime; - //! The approximate distribution function of arrival times. - maths::CQuantileSketch m_ArrivalTimeDistribution; + //! The approximate distribution function of arrival times. + maths::CQuantileSketch m_ArrivalTimeDistribution; - //! The earliest example time. - TMinTimeAccumulator m_Earliest; + //! The earliest example time. + TMinTimeAccumulator m_Earliest; - //! The latest example time. - TMaxTimeAccumulator m_Latest; + //! The latest example time. + TMaxTimeAccumulator m_Latest; - //! The times of the ends of the last complete buckets. - TTimeVec m_LastBucketEndTimes; + //! The times of the ends of the last complete buckets. + TTimeVec m_LastBucketEndTimes; - //! The set of all partitions. - TSizeUSet m_Partitions; + //! The set of all partitions. + TSizeUSet m_Partitions; - //! The partitions which are being sampled. - TSizeUSet m_SampledPartitions; + //! The partitions which are being sampled. + TSizeUSet m_SampledPartitions; - //! The sampled distinct time series. - TSizeSizePrUSet m_SampledTimeSeries; + //! The sampled distinct time series. + TSizeSizePrUSet m_SampledTimeSeries; - //! The pseudo r.n.g. for generating permutations of the masks. - maths::CPRNG::CXorOShiro128Plus m_Rng; + //! The pseudo r.n.g. for generating permutations of the masks. + maths::CPRNG::CXorOShiro128Plus m_Rng; - //! The current index into the masks. - TSizeVec m_BucketIndices; + //! The current index into the masks. + TSizeVec m_BucketIndices; - //! The bucket sampling masks. - TBoolVecVec m_BucketMasks; + //! The bucket sampling masks. + TBoolVecVec m_BucketMasks; - //! The total count of complete buckets seen. - TUInt64Vec m_BucketCounts; + //! The total count of complete buckets seen. + TUInt64Vec m_BucketCounts; - //! The bucket statistics. - TBucketStatisticsVec m_BucketStatistics; + //! The bucket statistics. + TBucketStatisticsVec m_BucketStatistics; }; //! \brief The count statistics for detectors with no "by" or "over" field. -class CONFIG_EXPORT CPartitionDataCountStatistics : public CDataCountStatistics -{ - public: - CPartitionDataCountStatistics(const CAutoconfigurerParams ¶ms); +class CONFIG_EXPORT CPartitionDataCountStatistics : public CDataCountStatistics { +public: + CPartitionDataCountStatistics(const CAutoconfigurerParams& params); - //! Update the statistics with [\p beginRecords, \p endRecords). - virtual void add(TDetectorRecordCItr beginRecords, TDetectorRecordCItr endRecords); + //! Update the statistics with [\p beginRecords, \p endRecords). + virtual void add(TDetectorRecordCItr beginRecords, TDetectorRecordCItr endRecords); }; //! \brief The count statistics for detectors with no "over" field. -class CONFIG_EXPORT CByAndPartitionDataCountStatistics : public CDataCountStatistics -{ - public: - using TSizeSizePr = std::pair; - using TSizeSizePrUSet = boost::unordered_set; - using TSizeSizePrUInt64UMap = boost::unordered_map; - using TSizeSizePrUInt64UMapCItr = TSizeSizePrUInt64UMap::const_iterator; - - public: - CByAndPartitionDataCountStatistics(const CAutoconfigurerParams ¶ms); - - //! Update the statistics with [\p beginRecords, \p endRecords). - virtual void add(TDetectorRecordCItr beginRecords, TDetectorRecordCItr endRecords); +class CONFIG_EXPORT CByAndPartitionDataCountStatistics : public CDataCountStatistics { +public: + using TSizeSizePr = std::pair; + using TSizeSizePrUSet = boost::unordered_set; + using TSizeSizePrUInt64UMap = boost::unordered_map; + using TSizeSizePrUInt64UMapCItr = TSizeSizePrUInt64UMap::const_iterator; + +public: + CByAndPartitionDataCountStatistics(const CAutoconfigurerParams& params); + + //! Update the statistics with [\p beginRecords, \p endRecords). + virtual void add(TDetectorRecordCItr beginRecords, TDetectorRecordCItr endRecords); }; //! \brief The count statistics for detectors with a "by" and an "over" field. -class CONFIG_EXPORT CByOverAndPartitionDataCountStatistics : public CDataCountStatistics -{ - public: - using TSizeUInt64UMap = boost::unordered_map; - using TSizeSizePr = std::pair; - using TSizeSizePrCBjkstUMap = boost::unordered_map; - using TSizeSizePrCBjkstUMapCItr = TSizeSizePrCBjkstUMap::const_iterator; - - public: - CByOverAndPartitionDataCountStatistics(const CAutoconfigurerParams ¶ms); - - //! Update the statistics with [\p beginRecords, \p endRecords). - virtual void add(TDetectorRecordCItr beginRecords, TDetectorRecordCItr endRecords); - - //! Get the distinct count of over field values per (by, partition) pair. - const TSizeSizePrCBjkstUMap &sampledByAndPartitionDistinctOverCounts() const; - - private: - //! The distinct count of over values per (by, partition) pair. - TSizeSizePrCBjkstUMap m_DistinctOverValues; +class CONFIG_EXPORT CByOverAndPartitionDataCountStatistics : public CDataCountStatistics { +public: + using TSizeUInt64UMap = boost::unordered_map; + using TSizeSizePr = std::pair; + using TSizeSizePrCBjkstUMap = boost::unordered_map; + using TSizeSizePrCBjkstUMapCItr = TSizeSizePrCBjkstUMap::const_iterator; + +public: + CByOverAndPartitionDataCountStatistics(const CAutoconfigurerParams& params); + + //! Update the statistics with [\p beginRecords, \p endRecords). + virtual void add(TDetectorRecordCItr beginRecords, TDetectorRecordCItr endRecords); + + //! Get the distinct count of over field values per (by, partition) pair. + const TSizeSizePrCBjkstUMap& sampledByAndPartitionDistinctOverCounts() const; + +private: + //! The distinct count of over values per (by, partition) pair. + TSizeSizePrCBjkstUMap m_DistinctOverValues; }; //! \brief Responsible for creating unique data count statistics and providing @@ -319,53 +304,51 @@ class CONFIG_EXPORT CByOverAndPartitionDataCountStatistics : public CDataCountSt //! maintain a direct address table from every detector, built once up front //! on the set of initial candidate detectors, to a corresponding collection //! unique data count statistics. -class CONFIG_EXPORT CDataCountStatisticsDirectAddressTable -{ - public: - using TDetectorRecordVec = std::vector; - using TDetectorSpecificationVec = std::vector; +class CONFIG_EXPORT CDataCountStatisticsDirectAddressTable { +public: + using TDetectorRecordVec = std::vector; + using TDetectorSpecificationVec = std::vector; - public: - CDataCountStatisticsDirectAddressTable(const CAutoconfigurerParams ¶ms); +public: + CDataCountStatisticsDirectAddressTable(const CAutoconfigurerParams& params); - //! Build the table from \p specs. - void build(const TDetectorSpecificationVec &specs); + //! Build the table from \p specs. + void build(const TDetectorSpecificationVec& specs); - //! Clear the state (as a precursor to build). - void pruneUnsed(const TDetectorSpecificationVec &specs); + //! Clear the state (as a precursor to build). + void pruneUnsed(const TDetectorSpecificationVec& specs); - //! Update the statistics with \p records. - void add(const TDetectorRecordVec &records); + //! Update the statistics with \p records. + void add(const TDetectorRecordVec& records); - //! Get the detector \p spec's statistics. - const CDataCountStatistics &statistics(const CDetectorSpecification &spec) const; + //! Get the detector \p spec's statistics. + const CDataCountStatistics& statistics(const CDetectorSpecification& spec) const; - private: - using TSizeVec = std::vector; - using TPtrDiffVec = std::vector; - using TPtrDiffVecVec = std::vector; - using TAutoconfigurerParamsCRef = boost::reference_wrapper; - using TDataCountStatisticsPtr = std::shared_ptr; - using TDataCountStatisticsPtrVec = std::vector; +private: + using TSizeVec = std::vector; + using TPtrDiffVec = std::vector; + using TPtrDiffVecVec = std::vector; + using TAutoconfigurerParamsCRef = boost::reference_wrapper; + using TDataCountStatisticsPtr = std::shared_ptr; + using TDataCountStatisticsPtrVec = std::vector; - private: - //! Get the statistics for \p spec. - TDataCountStatisticsPtr stats(const CDetectorSpecification &spec) const; +private: + //! Get the statistics for \p spec. + TDataCountStatisticsPtr stats(const CDetectorSpecification& spec) const; - private: - //! The parameters. - TAutoconfigurerParamsCRef m_Params; +private: + //! The parameters. + TAutoconfigurerParamsCRef m_Params; - //! The many-to-one map from detector to data count statistic. - TSizeVec m_DetectorSchema; + //! The many-to-one map from detector to data count statistic. + TSizeVec m_DetectorSchema; - //! The one-to-one map from data count statistic to first detector. - TPtrDiffVecVec m_RecordSchema; + //! The one-to-one map from data count statistic to first detector. + TPtrDiffVecVec m_RecordSchema; - //! The actual count statistics. - TDataCountStatisticsPtrVec m_DataCountStatistics; + //! The actual count statistics. + TDataCountStatisticsPtrVec m_DataCountStatistics; }; - } } diff --git a/include/config/CDataSemantics.h b/include/config/CDataSemantics.h index 76c3ed7755..23b3af5fc9 100644 --- a/include/config/CDataSemantics.h +++ b/include/config/CDataSemantics.h @@ -20,10 +20,8 @@ #include -namespace ml -{ -namespace config -{ +namespace ml { +namespace config { //! \brief Determines the semantics of some data from examples. //! @@ -42,122 +40,119 @@ namespace config //! examples from a single data type, to be identified, are //! supplied. If multiple data types need to be identified then //! a different object should be used for each. -class CONFIG_EXPORT CDataSemantics -{ - public: - using TOptionalUserDataType = boost::optional; +class CONFIG_EXPORT CDataSemantics { +public: + using TOptionalUserDataType = boost::optional; - public: - //! The proportion of values which must be numeric for the - //! data to be a candidate metric. - static const double NUMERIC_PROPORTION_FOR_METRIC_STRICT; +public: + //! The proportion of values which must be numeric for the + //! data to be a candidate metric. + static const double NUMERIC_PROPORTION_FOR_METRIC_STRICT; - //! The proportion of values which must be numeric for the - //! data to be a candidate metric if there are only a small - //! number of distinct non-numeric strings. - static const double NUMERIC_PROPORTION_FOR_METRIC_WITH_SUSPECTED_MISSING_VALUES; + //! The proportion of values which must be numeric for the + //! data to be a candidate metric if there are only a small + //! number of distinct non-numeric strings. + static const double NUMERIC_PROPORTION_FOR_METRIC_WITH_SUSPECTED_MISSING_VALUES; - //! The proportion of values which must be integer for the - //! data to be a candidate integer. - static const double INTEGER_PRORORTION_FOR_INTEGER; + //! The proportion of values which must be integer for the + //! data to be a candidate integer. + static const double INTEGER_PRORORTION_FOR_INTEGER; - public: - explicit CDataSemantics(TOptionalUserDataType override = TOptionalUserDataType()); +public: + explicit CDataSemantics(TOptionalUserDataType override = TOptionalUserDataType()); - //! Add an example from the data set. - void add(const std::string &example); + //! Add an example from the data set. + void add(const std::string& example); - //! Compute the type of the data based on the examples added so far. - void computeType(); + //! Compute the type of the data based on the examples added so far. + void computeType(); - //! Get the last inferred data type set by computeType. - config_t::EDataType type() const; + //! Get the last inferred data type set by computeType. + config_t::EDataType type() const; - private: - //! \brief Hashes an ordinal type. - class CONFIG_EXPORT CHashOrdinal - { - public: - std::size_t operator()(maths::COrdinal value) const - { - return value.hash(); - } - }; - using TStrVec = std::vector; - using TOrdinalSizeUMap = boost::unordered_map; - using TMinAccumulator = maths::CBasicStatistics::COrderStatisticsStack; - using TMaxAccumulator = maths::CBasicStatistics::COrderStatisticsStack >; +private: + //! \brief Hashes an ordinal type. + class CONFIG_EXPORT CHashOrdinal { + public: + std::size_t operator()(maths::COrdinal value) const { return value.hash(); } + }; + using TStrVec = std::vector; + using TOrdinalSizeUMap = boost::unordered_map; + using TMinAccumulator = maths::CBasicStatistics::COrderStatisticsStack; + using TMaxAccumulator = maths::CBasicStatistics::COrderStatisticsStack>; - private: - //! The maximum number of values we'll hold in the empirical - //! distribution. - static const std::size_t MAXIMUM_EMPIRICAL_DISTRIBUTION_SIZE; +private: + //! The maximum number of values we'll hold in the empirical + //! distribution. + static const std::size_t MAXIMUM_EMPIRICAL_DISTRIBUTION_SIZE; - private: - //! Get the categorical type. - config_t::EDataType categoricalType() const; +private: + //! Get the categorical type. + config_t::EDataType categoricalType() const; - //! Get the real type. - config_t::EDataType realType() const; + //! Get the real type. + config_t::EDataType realType() const; - //! Get the integer type. - config_t::EDataType integerType() const; + //! Get the integer type. + config_t::EDataType integerType() const; - //! Check if the field is numeric. - bool isNumeric() const; + //! Check if the field is numeric. + bool isNumeric() const; - //! Check if the field is integer. - bool isInteger() const; + //! Check if the field is integer. + bool isInteger() const; - //! Check how well the data is approximated by a Gaussian - //! mixture model. - bool GMMGoodFit() const; + //! Check how well the data is approximated by a Gaussian + //! mixture model. + bool GMMGoodFit() const; - //! Add an integer value. - template maths::COrdinal addInteger(INT value); + //! Add an integer value. + template + maths::COrdinal addInteger(INT value); - //! Add a positive integer value. - template maths::COrdinal addPositiveInteger(UINT value); + //! Add a positive integer value. + template + maths::COrdinal addPositiveInteger(UINT value); - //! Add a real value. - template maths::COrdinal addReal(REAL value); + //! Add a real value. + template + maths::COrdinal addReal(REAL value); - private: - //! The last computed type. - config_t::EDataType m_Type; +private: + //! The last computed type. + config_t::EDataType m_Type; - //! Get a user specified override for the field type. - TOptionalUserDataType m_Override; + //! Get a user specified override for the field type. + TOptionalUserDataType m_Override; - //! The total number of examples. - double m_Count; + //! The total number of examples. + double m_Count; - //! True if the values are numeric. - double m_NumericProportion; + //! True if the values are numeric. + double m_NumericProportion; - //! The proportion of values which are integer. - double m_IntegerProportion; + //! The proportion of values which are integer. + double m_IntegerProportion; - //! The smallest numerical value received. - TMinAccumulator m_Smallest; + //! The smallest numerical value received. + TMinAccumulator m_Smallest; - //! The largest numerical value received. - TMaxAccumulator m_Largest; + //! The largest numerical value received. + TMaxAccumulator m_Largest; - //! The no more than three of the distinct values. - TStrVec m_DistinctValues; + //! The no more than three of the distinct values. + TStrVec m_DistinctValues; - //! Examples of non-numeric strings. - TStrVec m_NonNumericValues; + //! Examples of non-numeric strings. + TStrVec m_NonNumericValues; - //! Set to true if there are too many distinct values to maintain - //! the empirical distribution. - bool m_EmpiricalDistributionOverflowed; + //! Set to true if there are too many distinct values to maintain + //! the empirical distribution. + bool m_EmpiricalDistributionOverflowed; - //! The empirical distribution. - TOrdinalSizeUMap m_EmpiricalDistribution; + //! The empirical distribution. + TOrdinalSizeUMap m_EmpiricalDistribution; }; - } } diff --git a/include/config/CDataSummaryStatistics.h b/include/config/CDataSummaryStatistics.h index 415ff20f83..3f363fb3e2 100644 --- a/include/config/CDataSummaryStatistics.h +++ b/include/config/CDataSummaryStatistics.h @@ -23,10 +23,8 @@ #include -namespace ml -{ -namespace config -{ +namespace ml { +namespace config { //! \brief Basic summary statistics for both categorical and numerical //! data. @@ -34,37 +32,36 @@ namespace config //! DESCRIPTION:\n //! This gets the time range of the data set and computes the mean rate //! of records in the data set. -class CONFIG_EXPORT CDataSummaryStatistics -{ - public: - CDataSummaryStatistics(); +class CONFIG_EXPORT CDataSummaryStatistics { +public: + CDataSummaryStatistics(); - //! Add an example arriving at \p time. - void add(core_t::TTime time); + //! Add an example arriving at \p time. + void add(core_t::TTime time); - //! Get the total count of examples. - uint64_t count() const; + //! Get the total count of examples. + uint64_t count() const; - //! Get the earliest time of any example. - core_t::TTime earliest() const; + //! Get the earliest time of any example. + core_t::TTime earliest() const; - //! Get the latest time of any example. - core_t::TTime latest() const; + //! Get the latest time of any example. + core_t::TTime latest() const; - //! The mean rate of examples in the data set. - double meanRate() const; + //! The mean rate of examples in the data set. + double meanRate() const; - protected: - using TMinTimeAccumulator = maths::CBasicStatistics::COrderStatisticsStack; - using TMaxTimeAccumulator = maths::CBasicStatistics::COrderStatisticsStack >; +protected: + using TMinTimeAccumulator = maths::CBasicStatistics::COrderStatisticsStack; + using TMaxTimeAccumulator = maths::CBasicStatistics::COrderStatisticsStack>; - private: - //! The earliest example time. - TMinTimeAccumulator m_Earliest; - //! The latest example time. - TMaxTimeAccumulator m_Latest; - //! The total count of examples. - uint64_t m_Count; +private: + //! The earliest example time. + TMinTimeAccumulator m_Earliest; + //! The latest example time. + TMaxTimeAccumulator m_Latest; + //! The total count of examples. + uint64_t m_Count; }; //! \brief Computes simple summary statistics for a categorical data set. @@ -78,124 +75,121 @@ class CONFIG_EXPORT CDataSummaryStatistics //! The estimates of distinct count and most frequent category counts //! are exact for small data sets and then switch seemlessly to using //! appropriate sketch data structures for very high distinct counts. -class CONFIG_EXPORT CCategoricalDataSummaryStatistics : public CDataSummaryStatistics -{ - public: - using TStrSizePr = std::pair; - using TStrSizePrVec = std::vector; +class CONFIG_EXPORT CCategoricalDataSummaryStatistics : public CDataSummaryStatistics { +public: + using TStrSizePr = std::pair; + using TStrSizePrVec = std::vector; - //! The smallest cardinality at which we'll approximate the statistics. - static const std::size_t TO_APPROXIMATE = 5000000; + //! The smallest cardinality at which we'll approximate the statistics. + static const std::size_t TO_APPROXIMATE = 5000000; - public: - explicit CCategoricalDataSummaryStatistics(std::size_t n, std::size_t toApproximate = TO_APPROXIMATE); - CCategoricalDataSummaryStatistics(const CDataSummaryStatistics &other, - std::size_t n, - std::size_t toApproximate = TO_APPROXIMATE); +public: + explicit CCategoricalDataSummaryStatistics(std::size_t n, std::size_t toApproximate = TO_APPROXIMATE); + CCategoricalDataSummaryStatistics(const CDataSummaryStatistics& other, std::size_t n, std::size_t toApproximate = TO_APPROXIMATE); - //! Add an example at \p time. - void add(core_t::TTime time, const std::string &example); + //! Add an example at \p time. + void add(core_t::TTime time, const std::string& example); - //! Get the distinct count of categories. - std::size_t distinctCount() const; + //! Get the distinct count of categories. + std::size_t distinctCount() const; - //! Get the minimum length of any category. - std::size_t minimumLength() const; + //! Get the minimum length of any category. + std::size_t minimumLength() const; - //! Get the maximum length of any category. - std::size_t maximumLength() const; + //! Get the maximum length of any category. + std::size_t maximumLength() const; - //! Get the estimated empirical entropy of the categories. - double entropy() const; + //! Get the estimated empirical entropy of the categories. + double entropy() const; - //! Get the top-n most frequent categories and their counts. - void topN(TStrSizePrVec &result) const; + //! Get the top-n most frequent categories and their counts. + void topN(TStrSizePrVec& result) const; - //! Get the mean count in the remaining categories. - double meanCountInRemainders() const; + //! Get the mean count in the remaining categories. + double meanCountInRemainders() const; - private: - //! The number of n-grams on which we maintain statistics. - static const std::size_t NUMBER_N_GRAMS = 5; +private: + //! The number of n-grams on which we maintain statistics. + static const std::size_t NUMBER_N_GRAMS = 5; - private: - using TUInt32UInt64Pr = std::pair; - using TUInt32UInt64PrVec = std::vector; - using TSizeUInt64UMap = boost::unordered_map; - using TStrUInt64UMap = boost::unordered_map; - using TStrUInt64UMapItr = TStrUInt64UMap::iterator; - using TStrUInt64UMapCItr = TStrUInt64UMap::const_iterator; - using TStrUInt64UMapCItrVec = std::vector; - using TMinSizeAccumulator = maths::CBasicStatistics::COrderStatisticsStack; - using TMaxSizeAccumulator = maths::CBasicStatistics::COrderStatisticsStack >; - using TBjkstUniqueValuesVec = std::vector; - using TEntropySketchVec = std::vector; +private: + using TUInt32UInt64Pr = std::pair; + using TUInt32UInt64PrVec = std::vector; + using TSizeUInt64UMap = boost::unordered_map; + using TStrUInt64UMap = boost::unordered_map; + using TStrUInt64UMapItr = TStrUInt64UMap::iterator; + using TStrUInt64UMapCItr = TStrUInt64UMap::const_iterator; + using TStrUInt64UMapCItrVec = std::vector; + using TMinSizeAccumulator = maths::CBasicStatistics::COrderStatisticsStack; + using TMaxSizeAccumulator = maths::CBasicStatistics::COrderStatisticsStack>; + using TBjkstUniqueValuesVec = std::vector; + using TEntropySketchVec = std::vector; - private: - //! Extract the \p n grams and update the relevant statistics. - void addNGrams(std::size_t n, const std::string &example); +private: + //! Extract the \p n grams and update the relevant statistics. + void addNGrams(std::size_t n, const std::string& example); - //! If the cardinality is too high approximate the statistics. - void approximateIfCardinalityTooHigh(); + //! If the cardinality is too high approximate the statistics. + void approximateIfCardinalityTooHigh(); - //! Update the counts of the calibrators. - void updateCalibrators(std::size_t category); + //! Update the counts of the calibrators. + void updateCalibrators(std::size_t category); - //! Get the calibrated estimate of the count of \p category. - double calibratedCount(std::size_t category) const; + //! Get the calibrated estimate of the count of \p category. + double calibratedCount(std::size_t category) const; - //! Fill in the lowest top-n vector. - void findLowestTopN(); + //! Fill in the lowest top-n vector. + void findLowestTopN(); - //! Get the top-n most frequent categories. - void topN(TStrUInt64UMapCItrVec &result) const; + //! Get the top-n most frequent categories. + void topN(TStrUInt64UMapCItrVec& result) const; - private: - //! A pseudo r.n.g. for deciding whether to sample the n-grams. - maths::CPRNG::CXorOShiro128Plus m_Rng; +private: + //! A pseudo r.n.g. for deciding whether to sample the n-grams. + maths::CPRNG::CXorOShiro128Plus m_Rng; - //! The smallest cardinality at which we'll approximate the statistics. - std::size_t m_ToApproximate; + //! The smallest cardinality at which we'll approximate the statistics. + std::size_t m_ToApproximate; - //! Set to true if we are approximating the statistics. - bool m_Approximating; + //! Set to true if we are approximating the statistics. + bool m_Approximating; - //! The distinct field values and their counts in the data set. - TSizeUInt64UMap m_ValueCounts; + //! The distinct field values and their counts in the data set. + TSizeUInt64UMap m_ValueCounts; - //! The approximate distinct count of values. - maths::CBjkstUniqueValues m_DistinctValues; + //! The approximate distinct count of values. + maths::CBjkstUniqueValues m_DistinctValues; - //! A set of exact counts for a small number of categories - //! which are used to calibrate the count sketch counts. - TUInt32UInt64PrVec m_Calibrators; + //! A set of exact counts for a small number of categories + //! which are used to calibrate the count sketch counts. + TUInt32UInt64PrVec m_Calibrators; - //! A min-sketch of the category counts. - maths::CCountMinSketch m_CountSketch; + //! A min-sketch of the category counts. + maths::CCountMinSketch m_CountSketch; - //! The number of top-n distinct categories to count. - std::size_t m_N; + //! The number of top-n distinct categories to count. + std::size_t m_N; - //! The top n categories by count and their counts. - TStrUInt64UMap m_TopN; + //! The top n categories by count and their counts. + TStrUInt64UMap m_TopN; - //! The smallest count in the top n category counts collection. - TStrUInt64UMapCItr m_LowestTopN; + //! The smallest count in the top n category counts collection. + TStrUInt64UMapCItr m_LowestTopN; - //! The minimum category length. - TMinSizeAccumulator m_MinLength; + //! The minimum category length. + TMinSizeAccumulator m_MinLength; - //! The maximum category length. - TMaxSizeAccumulator m_MaxLength; + //! The maximum category length. + TMaxSizeAccumulator m_MaxLength; - //! The approximate empirical entropy of the categories. - maths::CEntropySketch m_EmpiricalEntropy; + //! The approximate empirical entropy of the categories. + maths::CEntropySketch m_EmpiricalEntropy; - //! The count of distinct n-grams in the categories. - TBjkstUniqueValuesVec m_DistinctNGrams; + //! The count of distinct n-grams in the categories. + TBjkstUniqueValuesVec m_DistinctNGrams; - //! The approximate empirical entropy of the n-grams in the categories. - TEntropySketchVec m_NGramEmpricalEntropy; + //! The approximate empirical entropy of the n-grams in the categories. + TEntropySketchVec m_NGramEmpricalEntropy; }; //! \brief Computes simple summary statistics of a metric data set. @@ -210,43 +204,41 @@ class CONFIG_EXPORT CCategoricalDataSummaryStatistics : public CDataSummaryStati //! it to use only Gaussian modes and allow many more clusters since //! we want an accurate description of the bulk of the distribution //! and don't care about over fitting as we do for anomaly detection. -class CONFIG_EXPORT CNumericDataSummaryStatistics : public CDataSummaryStatistics -{ - public: - using TDoubleDoublePr = std::pair; - using TDoubleDoublePrVec = std::vector; +class CONFIG_EXPORT CNumericDataSummaryStatistics : public CDataSummaryStatistics { +public: + using TDoubleDoublePr = std::pair; + using TDoubleDoublePrVec = std::vector; - public: - CNumericDataSummaryStatistics(bool integer); - CNumericDataSummaryStatistics(const CDataSummaryStatistics &other, bool integer); +public: + CNumericDataSummaryStatistics(bool integer); + CNumericDataSummaryStatistics(const CDataSummaryStatistics& other, bool integer); - //! Add an example at \p time. - void add(core_t::TTime time, const std::string &example); + //! Add an example at \p time. + void add(core_t::TTime time, const std::string& example); - //! Get the minimum value. - double minimum() const; + //! Get the minimum value. + double minimum() const; - //! Get the approximate median of the values. - double median() const; + //! Get the approximate median of the values. + double median() const; - //! Get the maximum value. - double maximum() const; + //! Get the maximum value. + double maximum() const; - //! Get a chart of the density function we have estimated. - bool densityChart(TDoubleDoublePrVec &result) const; + //! Get a chart of the density function we have estimated. + bool densityChart(TDoubleDoublePrVec& result) const; - private: - //! The count of non-numeric values. - uint64_t m_NonNumericCount; +private: + //! The count of non-numeric values. + uint64_t m_NonNumericCount; - //! A quantile sketch used for estimating the median of the - //! data in a space efficient manner. - maths::CQuantileSketch m_QuantileSketch; + //! A quantile sketch used for estimating the median of the + //! data in a space efficient manner. + maths::CQuantileSketch m_QuantileSketch; - //! The principle clusters present in the data. - maths::CXMeansOnline1d m_Clusters; + //! The principle clusters present in the data. + maths::CXMeansOnline1d m_Clusters; }; - } } diff --git a/include/config/CDetectorEnumerator.h b/include/config/CDetectorEnumerator.h index 12aec9a25a..75036f4b90 100644 --- a/include/config/CDetectorEnumerator.h +++ b/include/config/CDetectorEnumerator.h @@ -16,10 +16,8 @@ #include #include -namespace ml -{ -namespace config -{ +namespace ml { +namespace config { class CAutoconfigurerParams; class CDetectorSpecification; @@ -33,104 +31,99 @@ class CDetectorSpecification; //! This is essentially the builder pattern. The intention is that suitable //! fields are added one at a time based on their statistical properties //! and once all fields have been added the configurations are generated. -class CONFIG_EXPORT CDetectorEnumerator -{ - public: - using TDetectorSpecificationVec = std::vector; - - public: - CDetectorEnumerator(const CAutoconfigurerParams ¶ms); - - //! \name Builder Interface - //@{ - //! Add a function. - void addFunction(config_t::EFunctionCategory category); - - //! Add a candidate for a categorical function argument. - void addCategoricalFunctionArgument(const std::string &argument); - - //! Add a candidate for a metric function argument. - void addMetricFunctionArgument(const std::string &argument); - - //! Add a candidate by field. - void addByField(const std::string &by); - - //! Add a candidate by field for a rare detector. - void addRareByField(const std::string &by); - - //! Add a candidate over field. - void addOverField(const std::string &over); - - //! Add a candidate partition field. - void addPartitionField(const std::string &partition); - //@} - - //! Generate all the detectors. - //! - //! All detectors which can be built out of the functions and argument, - //! by, over and partition fields added so far are appended to \p result. - void generate(TDetectorSpecificationVec &result); - - private: - using TStrVec = std::vector; - using TOptionalStr = boost::optional; - using TFunctionCategoryVec = std::vector; - using TAutoconfigurerParamsCRef = boost::reference_wrapper; - - private: - //! Add the detectors with no partitioning fields. - void generateNoPartitioning(TDetectorSpecificationVec &result) const; - - //! Add the detectors with one partitioning field. - //! - //! The indices \p a and \p b define the start and end of the seed - //! detectors in \p result used to generate detectors with one - //! partitioning field. - void addOnePartitioning(std::size_t a, std::size_t b, - TDetectorSpecificationVec &result) const; - - //! Add the detectors with two partitioning fields. - //! - //! The indices \p a and \p b define the start and end of the seed - //! detectors in \p result used to generate detectors with two - //! partitioning fields. - void addTwoPartitioning(std::size_t a, std::size_t b, - TDetectorSpecificationVec &result) const; - - //! Add the detectors with three partitioning fields. - //! - //! The indices \p a and \p b define the start and end of the seed - //! detectors in \p result used to generate detectors with three - //! partitioning fields. - void addThreePartitioning(std::size_t a, std::size_t b, - TDetectorSpecificationVec &result) const; - - private: - //! The parameters. - TAutoconfigurerParamsCRef m_Params; - - //! The list of functions to be considered. - TFunctionCategoryVec m_Functions; - - //! Candidate field names for arguments categorical functions. - TStrVec m_CandidateCategoricalFunctionArguments; - - //! Candidate arguments for metric functions. - TStrVec m_CandidateMetricFunctionArguments; - - //! Candidate by fields. - TStrVec m_CandidateByFields; - - //! Candidate by fields for rare commands. - TStrVec m_CandidateRareByFields; - - //! Candidate over fields. - TStrVec m_CandidateOverFields; - - //! Candidate partition fields. - TStrVec m_CandidatePartitionFields; +class CONFIG_EXPORT CDetectorEnumerator { +public: + using TDetectorSpecificationVec = std::vector; + +public: + CDetectorEnumerator(const CAutoconfigurerParams& params); + + //! \name Builder Interface + //@{ + //! Add a function. + void addFunction(config_t::EFunctionCategory category); + + //! Add a candidate for a categorical function argument. + void addCategoricalFunctionArgument(const std::string& argument); + + //! Add a candidate for a metric function argument. + void addMetricFunctionArgument(const std::string& argument); + + //! Add a candidate by field. + void addByField(const std::string& by); + + //! Add a candidate by field for a rare detector. + void addRareByField(const std::string& by); + + //! Add a candidate over field. + void addOverField(const std::string& over); + + //! Add a candidate partition field. + void addPartitionField(const std::string& partition); + //@} + + //! Generate all the detectors. + //! + //! All detectors which can be built out of the functions and argument, + //! by, over and partition fields added so far are appended to \p result. + void generate(TDetectorSpecificationVec& result); + +private: + using TStrVec = std::vector; + using TOptionalStr = boost::optional; + using TFunctionCategoryVec = std::vector; + using TAutoconfigurerParamsCRef = boost::reference_wrapper; + +private: + //! Add the detectors with no partitioning fields. + void generateNoPartitioning(TDetectorSpecificationVec& result) const; + + //! Add the detectors with one partitioning field. + //! + //! The indices \p a and \p b define the start and end of the seed + //! detectors in \p result used to generate detectors with one + //! partitioning field. + void addOnePartitioning(std::size_t a, std::size_t b, TDetectorSpecificationVec& result) const; + + //! Add the detectors with two partitioning fields. + //! + //! The indices \p a and \p b define the start and end of the seed + //! detectors in \p result used to generate detectors with two + //! partitioning fields. + void addTwoPartitioning(std::size_t a, std::size_t b, TDetectorSpecificationVec& result) const; + + //! Add the detectors with three partitioning fields. + //! + //! The indices \p a and \p b define the start and end of the seed + //! detectors in \p result used to generate detectors with three + //! partitioning fields. + void addThreePartitioning(std::size_t a, std::size_t b, TDetectorSpecificationVec& result) const; + +private: + //! The parameters. + TAutoconfigurerParamsCRef m_Params; + + //! The list of functions to be considered. + TFunctionCategoryVec m_Functions; + + //! Candidate field names for arguments categorical functions. + TStrVec m_CandidateCategoricalFunctionArguments; + + //! Candidate arguments for metric functions. + TStrVec m_CandidateMetricFunctionArguments; + + //! Candidate by fields. + TStrVec m_CandidateByFields; + + //! Candidate by fields for rare commands. + TStrVec m_CandidateRareByFields; + + //! Candidate over fields. + TStrVec m_CandidateOverFields; + + //! Candidate partition fields. + TStrVec m_CandidatePartitionFields; }; - } } diff --git a/include/config/CDetectorFieldRolePenalty.h b/include/config/CDetectorFieldRolePenalty.h index d3b97bc4c4..e1f7ad63d3 100644 --- a/include/config/CDetectorFieldRolePenalty.h +++ b/include/config/CDetectorFieldRolePenalty.h @@ -7,15 +7,13 @@ #ifndef INCLUDED_ml_config_CDetectorFieldRolePenalty_h #define INCLUDED_ml_config_CDetectorFieldRolePenalty_h -#include #include +#include #include -namespace ml -{ -namespace config -{ +namespace ml { +namespace config { class CAutoconfigurerParams; //! \brief A penalty for a detector based on its field roles. @@ -24,31 +22,29 @@ class CAutoconfigurerParams; //! This wraps up a collection of field role penalties and assigns //! a penalty to a detector based on the product of all its argument //! and partitioning fields penalties. -class CDetectorFieldRolePenalty : public CPenalty -{ - public: - CDetectorFieldRolePenalty(const CAutoconfigurerParams ¶ms); +class CDetectorFieldRolePenalty : public CPenalty { +public: + CDetectorFieldRolePenalty(const CAutoconfigurerParams& params); - //! Create a copy on the heap. - virtual CDetectorFieldRolePenalty *clone() const; + //! Create a copy on the heap. + virtual CDetectorFieldRolePenalty* clone() const; - //! Get the name of this penalty. - virtual std::string name() const; + //! Get the name of this penalty. + virtual std::string name() const; - //! Set the field penalty for the field identified by \p index - //! which must be one of ARGUMENT_INDEX, BY_INDEX, OVER_INDEX - //! or PARTITION_INDEX. - void addPenalty(std::size_t index, const CPenalty &penalty); + //! Set the field penalty for the field identified by \p index + //! which must be one of ARGUMENT_INDEX, BY_INDEX, OVER_INDEX + //! or PARTITION_INDEX. + void addPenalty(std::size_t index, const CPenalty& penalty); - private: - //! Compute the penalty based on the detector's fields. - virtual void penaltyFromMe(CDetectorSpecification &spec) const; +private: + //! Compute the penalty based on the detector's fields. + virtual void penaltyFromMe(CDetectorSpecification& spec) const; - private: - //! The penalties to apply for each field. - const CPenalty *m_FieldRolePenalties[constants::NUMBER_FIELD_INDICES]; +private: + //! The penalties to apply for each field. + const CPenalty* m_FieldRolePenalties[constants::NUMBER_FIELD_INDICES]; }; - } } diff --git a/include/config/CDetectorRecord.h b/include/config/CDetectorRecord.h index ae12323c07..fac75acedb 100644 --- a/include/config/CDetectorRecord.h +++ b/include/config/CDetectorRecord.h @@ -20,10 +20,8 @@ #include #include -namespace ml -{ -namespace config -{ +namespace ml { +namespace config { class CDetectorSpecification; //! \brief Holds the field values needed to update a detector. @@ -32,80 +30,79 @@ class CDetectorSpecification; //! The state from a record need to update a detector's penalty functions. //! This is its time and its field values corresponding to the argument //! and partitioning field(s) used by the detector. -class CONFIG_EXPORT CDetectorRecord -{ - public: - using TSizeAry = boost::array; - using TStrCPtrAry = boost::array; - using TStrStrUMap = boost::unordered_map; +class CONFIG_EXPORT CDetectorRecord { +public: + using TSizeAry = boost::array; + using TStrCPtrAry = boost::array; + using TStrStrUMap = boost::unordered_map; - public: - CDetectorRecord(core_t::TTime time, - config_t::EFunctionCategory function, - const TStrCPtrAry &fieldNames, - const TStrCPtrAry &fieldValues, - const TSizeAry &hashedFieldValues); +public: + CDetectorRecord(core_t::TTime time, + config_t::EFunctionCategory function, + const TStrCPtrAry& fieldNames, + const TStrCPtrAry& fieldValues, + const TSizeAry& hashedFieldValues); - //! Get the record time. - core_t::TTime time() const; + //! Get the record time. + core_t::TTime time() const; - //! Get the function of the record detector. - config_t::EFunctionCategory function() const; + //! Get the function of the record detector. + config_t::EFunctionCategory function() const; - //! Get the name of the argument field. - const std::string *argumentFieldName() const; + //! Get the name of the argument field. + const std::string* argumentFieldName() const; - //! Get the name of the by field. - const std::string *byFieldName() const; + //! Get the name of the by field. + const std::string* byFieldName() const; - //! Get the name of the over field. - const std::string *overFieldName() const; + //! Get the name of the over field. + const std::string* overFieldName() const; - //! Get the name of the partition field. - const std::string *partitionFieldName() const; + //! Get the name of the partition field. + const std::string* partitionFieldName() const; - //! Get the argument field value if there is one or null. - const std::string *argumentFieldValue() const; + //! Get the argument field value if there is one or null. + const std::string* argumentFieldValue() const; - //! Get the by field value if there is one or null. - const std::string *byFieldValue() const; + //! Get the by field value if there is one or null. + const std::string* byFieldValue() const; - //! Get the over field value if there is one or null. - const std::string *overFieldValue() const; + //! Get the over field value if there is one or null. + const std::string* overFieldValue() const; - //! Get the partition field value if there is one or null. - const std::string *partitionFieldValue() const; + //! Get the partition field value if there is one or null. + const std::string* partitionFieldValue() const; - //! Get the argument field value hash. - std::size_t argumentFieldValueHash() const; + //! Get the argument field value hash. + std::size_t argumentFieldValueHash() const; - //! Get the by field value hash. - std::size_t byFieldValueHash() const; + //! Get the by field value hash. + std::size_t byFieldValueHash() const; - //! Get the over field value hash. - std::size_t overFieldValueHash() const; + //! Get the over field value hash. + std::size_t overFieldValueHash() const; - //! Get the partition field value hash. - std::size_t partitionFieldValueHash() const; + //! Get the partition field value hash. + std::size_t partitionFieldValueHash() const; - //! Print a description of this record for debug. - std::string print() const; + //! Print a description of this record for debug. + std::string print() const; - private: - //! The record time. - core_t::TTime m_Time; +private: + //! The record time. + core_t::TTime m_Time; - //! The function of the record's detector. - config_t::EFunctionCategory m_Function; + //! The function of the record's detector. + config_t::EFunctionCategory m_Function; - //! The relevant field names. - TStrCPtrAry m_FieldNames; + //! The relevant field names. + TStrCPtrAry m_FieldNames; - //! The relevant field values. - TStrCPtrAry m_FieldValues; + //! The relevant field values. + TStrCPtrAry m_FieldValues; - //! Hashes of the field values. - TSizeAry m_HashedFieldValues; + //! Hashes of the field values. + TSizeAry m_HashedFieldValues; }; //! \brief Defines a fast scheme, which minimizes lookups in the field values @@ -117,50 +114,46 @@ class CONFIG_EXPORT CDetectorRecord //! address table from every detector, built once up front on the set of initial //! candidate detectors, to a corresponding collection of entries in a field //! value vector which we populate once per record. -class CONFIG_EXPORT CDetectorRecordDirectAddressTable -{ - public: - using TStrStrUMap = boost::unordered_map; - using TDetectorSpecificationVec = std::vector; - using TDetectorRecordVec = std::vector; - - public: - //! Build the table from \p specs. - void build(const TDetectorSpecificationVec &specs); - - //! Get the unique records from \p time and \p fieldValues for \p specs. - void detectorRecords(core_t::TTime time, - const TStrStrUMap &fieldValues, - const TDetectorSpecificationVec &specs, - TDetectorRecordVec &result); - - private: - //! Clear the state (as a precursor to build). - void clear(); - - private: - using TSizeVec = std::vector; - using TStrSizePr = std::pair; - using TStrSizePrVec = std::vector; - using TSizeAry = boost::array; - using TSizeAryVec = std::vector; - using TStrCPtrVec = std::vector; - - private: - //! A map from field to its value entry in the field value table. - TStrSizePrVec m_FieldSchema; - - //! A map from detectors to their field value entries in the field - //! value table. - TSizeAryVec m_DetectorFieldSchema; - - //! The table of field values populated once per record. - TStrCPtrVec m_FieldValueTable; - - //! The table of field value hashes populated once per record. - TSizeVec m_HashedFieldValueTable; +class CONFIG_EXPORT CDetectorRecordDirectAddressTable { +public: + using TStrStrUMap = boost::unordered_map; + using TDetectorSpecificationVec = std::vector; + using TDetectorRecordVec = std::vector; + +public: + //! Build the table from \p specs. + void build(const TDetectorSpecificationVec& specs); + + //! Get the unique records from \p time and \p fieldValues for \p specs. + void + detectorRecords(core_t::TTime time, const TStrStrUMap& fieldValues, const TDetectorSpecificationVec& specs, TDetectorRecordVec& result); + +private: + //! Clear the state (as a precursor to build). + void clear(); + +private: + using TSizeVec = std::vector; + using TStrSizePr = std::pair; + using TStrSizePrVec = std::vector; + using TSizeAry = boost::array; + using TSizeAryVec = std::vector; + using TStrCPtrVec = std::vector; + +private: + //! A map from field to its value entry in the field value table. + TStrSizePrVec m_FieldSchema; + + //! A map from detectors to their field value entries in the field + //! value table. + TSizeAryVec m_DetectorFieldSchema; + + //! The table of field values populated once per record. + TStrCPtrVec m_FieldValueTable; + + //! The table of field value hashes populated once per record. + TSizeVec m_HashedFieldValueTable; }; - } } diff --git a/include/config/CDetectorSpecification.h b/include/config/CDetectorSpecification.h index f0e01e747f..8caf02b240 100644 --- a/include/config/CDetectorSpecification.h +++ b/include/config/CDetectorSpecification.h @@ -16,16 +16,14 @@ #include #include #include -#include #include +#include #include #include -namespace ml -{ -namespace config -{ +namespace ml { +namespace config { class CAutoconfigurerParams; class CDataCountStatistics; class CFieldStatistics; @@ -43,258 +41,243 @@ class CPenalty; //! This is essentially a builder pattern where the final output //! is a JSON document which can be used to configure the our //! autodetect API or Splunk. -class CONFIG_EXPORT CDetectorSpecification : boost::equality_comparable< CDetectorSpecification, - boost::less_than_comparable< CDetectorSpecification > > -{ - public: - using TDoubleVec = std::vector; - using TDoubleVecVec = std::vector; - using TSizeVec = std::vector; - using TTimeVec = std::vector; - using TStrVec = std::vector; - using TOptionalStr = boost::optional; - using TFieldStatisticsVec = std::vector; - using TPenaltyPtr = boost::shared_ptr; - - //! Ternary boolean type which supports unknown. - enum EFuzzyBool - { - E_True, - E_False, - E_Maybe - }; - - //! \brief The score for a given set of parameters. - struct CONFIG_EXPORT SParamScores - { - SParamScores(core_t::TTime bucketLength, - const std::string &ignoreEmpty, - double score, - const TStrVec &descriptions); - - //! The bucket length. - core_t::TTime s_BucketLength; - - //! The name of the ignore empty version. - std::string s_IgnoreEmpty; - - //! The parameters score. - double s_Score; - - //! The descriptions associated with this score. - TStrVec s_Descriptions; - }; - - using TParamScoresVec = std::vector; - - public: - CDetectorSpecification(const CAutoconfigurerParams ¶ms, - config_t::EFunctionCategory function, - std::size_t id); - CDetectorSpecification(const CAutoconfigurerParams ¶ms, - config_t::EFunctionCategory function, - const std::string &argument, - std::size_t id); +class CONFIG_EXPORT CDetectorSpecification + : boost::equality_comparable> { +public: + using TDoubleVec = std::vector; + using TDoubleVecVec = std::vector; + using TSizeVec = std::vector; + using TTimeVec = std::vector; + using TStrVec = std::vector; + using TOptionalStr = boost::optional; + using TFieldStatisticsVec = std::vector; + using TPenaltyPtr = boost::shared_ptr; - //! Efficiently exchange the contents of two detectors. - void swap(CDetectorSpecification &other); + //! Ternary boolean type which supports unknown. + enum EFuzzyBool { E_True, E_False, E_Maybe }; - //! \name Builder Interface - //@{ - //! Set the side the detector is sensitive to. - void side(config_t::ESide side); + //! \brief The score for a given set of parameters. + struct CONFIG_EXPORT SParamScores { + SParamScores(core_t::TTime bucketLength, const std::string& ignoreEmpty, double score, const TStrVec& descriptions); - //! Set whether the detector ignore empty buckets. - void ignoreEmpty(bool ignoreEmpty); + //! The bucket length. + core_t::TTime s_BucketLength; - //! Check if we can add the partitioning field \p value. - bool canAddPartitioning(std::size_t index, const std::string &value) const; + //! The name of the ignore empty version. + std::string s_IgnoreEmpty; - //! Set the field identified by \p index which must be one - //! of ARGUMENT_INDEX, BY_INDEX, OVER_INDEX or PARTITION_INDEX. - void addPartitioning(std::size_t index, const std::string &value); + //! The parameters score. + double s_Score; - //! Add \p influence as an influencer. - void addInfluencer(const std::string &influence); + //! The descriptions associated with this score. + TStrVec s_Descriptions; + }; - //! Set the shortest bucket length. - void bucketLength(core_t::TTime bucketLength); + using TParamScoresVec = std::vector; - //! Add the statistics for the detector's fields. - void addFieldStatistics(const TFieldStatisticsVec &stats); +public: + CDetectorSpecification(const CAutoconfigurerParams& params, config_t::EFunctionCategory function, std::size_t id); + CDetectorSpecification(const CAutoconfigurerParams& params, + config_t::EFunctionCategory function, + const std::string& argument, + std::size_t id); - //! Set the detector's count statistics. - void setCountStatistics(const CDataCountStatistics &stats); + //! Efficiently exchange the contents of two detectors. + void swap(CDetectorSpecification& other); - //! Set the penalty function for this detector. - void setPenalty(const TPenaltyPtr &penalty); - //@} + //! \name Builder Interface + //@{ + //! Set the side the detector is sensitive to. + void side(config_t::ESide side); - //! \name Detector Scoring - //@{ - //! Get the current detector score. - double score() const; + //! Set whether the detector ignore empty buckets. + void ignoreEmpty(bool ignoreEmpty); - //! The penalties that apply to the various - void scores(TParamScoresVec &result) const; + //! Check if we can add the partitioning field \p value. + bool canAddPartitioning(std::size_t index, const std::string& value) const; - //! Apply the penalty \p penalty. - void applyPenalty(double penalty, const std::string &description); + //! Set the field identified by \p index which must be one + //! of ARGUMENT_INDEX, BY_INDEX, OVER_INDEX or PARTITION_INDEX. + void addPartitioning(std::size_t index, const std::string& value); - //! Apply the penalty for the bucket length \p bucketLength. - void applyPenalties(const TSizeVec &indices, - const TDoubleVec &penalty, - const TStrVec &description); + //! Add \p influence as an influencer. + void addInfluencer(const std::string& influence); - //! Refresh all scores. - void refreshScores(); - //@} + //! Set the shortest bucket length. + void bucketLength(core_t::TTime bucketLength); - //! \name Detector Attributes - //@{ - //! Is the detector one of the count functions? - config_t::EFunctionCategory function() const; + //! Add the statistics for the detector's fields. + void addFieldStatistics(const TFieldStatisticsVec& stats); - //! Get the field which is the argument of the function. - const TOptionalStr &argumentField() const; + //! Set the detector's count statistics. + void setCountStatistics(const CDataCountStatistics& stats); - //! Get the by field name. Null if there isn't one. - const TOptionalStr &byField() const; + //! Set the penalty function for this detector. + void setPenalty(const TPenaltyPtr& penalty); + //@} - //! Get the over field name. Null if there isn't one. - const TOptionalStr &overField() const; + //! \name Detector Scoring + //@{ + //! Get the current detector score. + double score() const; - //! Get the partition field name. Null if there isn't one. - const TOptionalStr &partitionField() const; + //! The penalties that apply to the various + void scores(TParamScoresVec& result) const; - //! Get the influences which have been configured. - const TStrVec &influences() const; + //! Apply the penalty \p penalty. + void applyPenalty(double penalty, const std::string& description); - //! Get the bucket lengths. - void candidateBucketLengths(TTimeVec &result) const; + //! Apply the penalty for the bucket length \p bucketLength. + void applyPenalties(const TSizeVec& indices, const TDoubleVec& penalty, const TStrVec& description); - //! Check if this detector is for population analysis. - bool isPopulation() const; - //@} + //! Refresh all scores. + void refreshScores(); + //@} - //! A total order of two detector specifications. - bool operator<(const CDetectorSpecification &rhs) const; + //! \name Detector Attributes + //@{ + //! Is the detector one of the count functions? + config_t::EFunctionCategory function() const; - //! Equality comparison for two detector specifications. - bool operator==(const CDetectorSpecification &rhs) const; + //! Get the field which is the argument of the function. + const TOptionalStr& argumentField() const; - //! Get the identifier. - std::size_t id() const; + //! Get the by field name. Null if there isn't one. + const TOptionalStr& byField() const; - //! Set the identifier. - void id(std::size_t id); + //! Get the over field name. Null if there isn't one. + const TOptionalStr& overField() const; - //! Get the argument field statistics if there is one. - const CFieldStatistics *argumentFieldStatistics() const; + //! Get the partition field name. Null if there isn't one. + const TOptionalStr& partitionField() const; - //! Get the by field statistics if there is one. - const CFieldStatistics *byFieldStatistics() const; + //! Get the influences which have been configured. + const TStrVec& influences() const; - //! Get the over field statistics if there is one. - const CFieldStatistics *overFieldStatistics() const; + //! Get the bucket lengths. + void candidateBucketLengths(TTimeVec& result) const; - //! Get the partition field statistics if there is one. - const CFieldStatistics *partitionFieldStatistics() const; + //! Check if this detector is for population analysis. + bool isPopulation() const; + //@} - //! Get the count statistics for this detector. - const CDataCountStatistics *countStatistics() const; + //! A total order of two detector specifications. + bool operator<(const CDetectorSpecification& rhs) const; - //! Write a configuration which can be interpreted by our - //! autodetect processes. - std::string detectorConfig() const; + //! Equality comparison for two detector specifications. + bool operator==(const CDetectorSpecification& rhs) const; - //! Get a description of the detector. - std::string description() const; + //! Get the identifier. + std::size_t id() const; - private: - using TStrVecVec = std::vector; - using TOptionalTime = boost::optional; - using TSizeVecCPtrAry = boost::array; - using TAutoconfigurerParamsCRef = boost::reference_wrapper; + //! Set the identifier. + void id(std::size_t id); - private: - //! Get the parameters. - const CAutoconfigurerParams ¶ms() const; + //! Get the argument field statistics if there is one. + const CFieldStatistics* argumentFieldStatistics() const; - //! Get the highest index of any non-null function field. - int highestFieldIndex() const; + //! Get the by field statistics if there is one. + const CFieldStatistics* byFieldStatistics() const; - //! Get the indices of the penalties used for this detector. - TSizeVecCPtrAry penaltyIndicesInUse() const; + //! Get the over field statistics if there is one. + const CFieldStatistics* overFieldStatistics() const; - //! Initialize the penalties. - void initializePenalties(); + //! Get the partition field statistics if there is one. + const CFieldStatistics* partitionFieldStatistics() const; - //! Update the value of the ignore empty flag based on the - //! current scores. - void refreshIgnoreEmpty(); + //! Get the count statistics for this detector. + const CDataCountStatistics* countStatistics() const; - private: - //! The parameters. - TAutoconfigurerParamsCRef m_Params; + //! Write a configuration which can be interpreted by our + //! autodetect processes. + std::string detectorConfig() const; - //! \name Specification - //@{ - //! The detector function category. - config_t::EFunctionCategory m_Function; + //! Get a description of the detector. + std::string description() const; - //! The side of the anomaly the detector is sensitive, i.e. - //! high, low or both. - config_t::ESide m_Side; +private: + using TStrVecVec = std::vector; + using TOptionalTime = boost::optional; + using TSizeVecCPtrAry = boost::array; + using TAutoconfigurerParamsCRef = boost::reference_wrapper; - //! Set to true if the function can ignore empty buckets and - //! should, false if it can ignore empty buckets and shouldn't - //! and left as maybe otherwise. - EFuzzyBool m_IgnoreEmpty; +private: + //! Get the parameters. + const CAutoconfigurerParams& params() const; - //! The argument and partitioning fields (i.e. by, over and - //! partition) for the detector. A null value means the it - //! doesn't have one. - TOptionalStr m_FunctionFields[constants::NUMBER_FIELD_INDICES]; + //! Get the highest index of any non-null function field. + int highestFieldIndex() const; - //! The influences configured for the detector. - TStrVec m_Influencers; + //! Get the indices of the penalties used for this detector. + TSizeVecCPtrAry penaltyIndicesInUse() const; - //! The shortest (in the context of multi-) bucket length to - //! use for this detector. - TOptionalTime m_BucketLength; + //! Initialize the penalties. + void initializePenalties(); - // TODO exclude frequent + frequency + //! Update the value of the ignore empty flag based on the + //! current scores. + void refreshIgnoreEmpty(); - // TODO include/exclude list - //@} +private: + //! The parameters. + TAutoconfigurerParamsCRef m_Params; - //! \name Scoring - //@{ - //! The current assessment of the quality of the detector for its - //! candidate parameters, such as bucket length, ignore empty, etc. - //! These are in the range [0, 1.0] and are updated during the - //! configuration process. A higher penalty indicates a better - //! detector. - TDoubleVec m_Penalties; + //! \name Specification + //@{ + //! The detector function category. + config_t::EFunctionCategory m_Function; - //! The function for computing the penalties to apply to this detector. - TPenaltyPtr m_Penalty; + //! The side of the anomaly the detector is sensitive, i.e. + //! high, low or both. + config_t::ESide m_Side; - //! Descriptions of the penalties that apply. - TStrVecVec m_PenaltyDescriptions; + //! Set to true if the function can ignore empty buckets and + //! should, false if it can ignore empty buckets and shouldn't + //! and left as maybe otherwise. + EFuzzyBool m_IgnoreEmpty; - //! A unique identifier of this detector. - std::size_t m_Id; + //! The argument and partitioning fields (i.e. by, over and + //! partition) for the detector. A null value means the it + //! doesn't have one. + TOptionalStr m_FunctionFields[constants::NUMBER_FIELD_INDICES]; - //! The statistics for each of the detectors fields. - const CFieldStatistics *m_FieldStatistics[constants::NUMBER_FIELD_INDICES]; + //! The influences configured for the detector. + TStrVec m_Influencers; - //! The count statistics for the detector. - const CDataCountStatistics *m_CountStatistics; - //@} -}; + //! The shortest (in the context of multi-) bucket length to + //! use for this detector. + TOptionalTime m_BucketLength; + + // TODO exclude frequent + frequency + + // TODO include/exclude list + //@} + + //! \name Scoring + //@{ + //! The current assessment of the quality of the detector for its + //! candidate parameters, such as bucket length, ignore empty, etc. + //! These are in the range [0, 1.0] and are updated during the + //! configuration process. A higher penalty indicates a better + //! detector. + TDoubleVec m_Penalties; + //! The function for computing the penalties to apply to this detector. + TPenaltyPtr m_Penalty; + + //! Descriptions of the penalties that apply. + TStrVecVec m_PenaltyDescriptions; + + //! A unique identifier of this detector. + std::size_t m_Id; + + //! The statistics for each of the detectors fields. + const CFieldStatistics* m_FieldStatistics[constants::NUMBER_FIELD_INDICES]; + + //! The count statistics for the detector. + const CDataCountStatistics* m_CountStatistics; + //@} +}; } } diff --git a/include/config/CFieldRolePenalty.h b/include/config/CFieldRolePenalty.h index 77110f1f71..60bc503b1c 100644 --- a/include/config/CFieldRolePenalty.h +++ b/include/config/CFieldRolePenalty.h @@ -9,8 +9,8 @@ #include -#include #include +#include #include #include @@ -18,10 +18,8 @@ #include #include -namespace ml -{ -namespace config -{ +namespace ml { +namespace config { class CCategoricalDataSummaryStatistics; class CNumericDataSummaryStatistics; @@ -33,22 +31,19 @@ class CNumericDataSummaryStatistics; //! share a single field for a given role then objects of this hierarchy //! are penalty functions which are constant on the set of detectors for //! which a given field and its role are fixed. -class CONFIG_EXPORT CCantBeNumeric : public CPenalty -{ - public: - CCantBeNumeric(const CAutoconfigurerParams ¶ms); - - //! Create a copy on the heap. - virtual CCantBeNumeric *clone() const; - - //! Get the name of this penalty. - virtual std::string name() const; - - private: - //! Sets \p penalty to 0.0 for numerics and a no-op otherwise. - virtual void penaltyFromMe(const CFieldStatistics &stats, - double &penalty, - std::string &description) const; +class CONFIG_EXPORT CCantBeNumeric : public CPenalty { +public: + CCantBeNumeric(const CAutoconfigurerParams& params); + + //! Create a copy on the heap. + virtual CCantBeNumeric* clone() const; + + //! Get the name of this penalty. + virtual std::string name() const; + +private: + //! Sets \p penalty to 0.0 for numerics and a no-op otherwise. + virtual void penaltyFromMe(const CFieldStatistics& stats, double& penalty, std::string& description) const; }; //! \brief Encapsulates the fact that categorical fields can't be used @@ -59,22 +54,19 @@ class CONFIG_EXPORT CCantBeNumeric : public CPenalty //! share a single field for a given role then objects of this hierarchy //! are penalty functions which are constant on the set of detectors for //! which a given field and its role are fixed. -class CONFIG_EXPORT CCantBeCategorical : public CPenalty -{ - public: - CCantBeCategorical(const CAutoconfigurerParams ¶ms); - - //! Create a copy on the heap. - virtual CCantBeCategorical *clone() const; - - //! Get the name of this penalty. - virtual std::string name() const; - - private: - //! Sets \p penalty to 0.0 for categorical and a no-op otherwise. - virtual void penaltyFromMe(const CFieldStatistics &stats, - double &penalty, - std::string &description) const; +class CONFIG_EXPORT CCantBeCategorical : public CPenalty { +public: + CCantBeCategorical(const CAutoconfigurerParams& params); + + //! Create a copy on the heap. + virtual CCantBeCategorical* clone() const; + + //! Get the name of this penalty. + virtual std::string name() const; + +private: + //! Sets \p penalty to 0.0 for categorical and a no-op otherwise. + virtual void penaltyFromMe(const CFieldStatistics& stats, double& penalty, std::string& description) const; }; //! \brief A penalty which stops unary categorical fields being used @@ -85,23 +77,20 @@ class CONFIG_EXPORT CCantBeCategorical : public CPenalty //! share a single field for a given role then objects of this hierarchy //! are penalty functions which are constant on the set of detectors for //! which a given field and its role are fixed. -class CONFIG_EXPORT CDontUseUnaryField : public CPenalty -{ - public: - CDontUseUnaryField(const CAutoconfigurerParams ¶ms); - - //! Create a copy on the heap. - virtual CDontUseUnaryField *clone() const; - - //! Get the name of this penalty. - virtual std::string name() const; - - private: - //! Sets \p penalty to 0.0 for categorical with a single category - //! and a no-op otherwise. - virtual void penaltyFromMe(const CFieldStatistics &stats, - double &penalty, - std::string &description) const; +class CONFIG_EXPORT CDontUseUnaryField : public CPenalty { +public: + CDontUseUnaryField(const CAutoconfigurerParams& params); + + //! Create a copy on the heap. + virtual CDontUseUnaryField* clone() const; + + //! Get the name of this penalty. + virtual std::string name() const; + +private: + //! Sets \p penalty to 0.0 for categorical with a single category + //! and a no-op otherwise. + virtual void penaltyFromMe(const CFieldStatistics& stats, double& penalty, std::string& description) const; }; //! \brief A penalty based on the a specified range of penalized distinct @@ -112,34 +101,30 @@ class CONFIG_EXPORT CDontUseUnaryField : public CPenalty //! share a single field for a given role then objects of this hierarchy //! are penalty functions which are constant on the set of detectors for //! which a given field and its role are fixed. -class CONFIG_EXPORT CDistinctCountThresholdPenalty : public CPenalty -{ - public: - CDistinctCountThresholdPenalty(const CAutoconfigurerParams ¶ms, - std::size_t distinctCountForPenaltyOfOne, - std::size_t distinctCountForPenaltyOfZero); - - //! Create a copy on the heap. - virtual CDistinctCountThresholdPenalty *clone() const; - - //! Get the name of this penalty. - virtual std::string name() const; - - private: - //! The penalty is a piecewise continuous linear function which - //! is constant outside interval \f$[dc_0, dc_1]\f$ and linear - //! decreasing from 1 at \f$dc_1\f$ to 0 at \f$dc_0\f$. - virtual void penaltyFromMe(const CFieldStatistics &stats, - double &penalty, - std::string &description) const; - - private: - //! The distinct count for which the penalty is one. - double m_DistinctCountForPenaltyOfOne; - //! The distinct count for which the penalty is zero. - double m_DistinctCountForPenaltyOfZero; +class CONFIG_EXPORT CDistinctCountThresholdPenalty : public CPenalty { +public: + CDistinctCountThresholdPenalty(const CAutoconfigurerParams& params, + std::size_t distinctCountForPenaltyOfOne, + std::size_t distinctCountForPenaltyOfZero); + + //! Create a copy on the heap. + virtual CDistinctCountThresholdPenalty* clone() const; + + //! Get the name of this penalty. + virtual std::string name() const; + +private: + //! The penalty is a piecewise continuous linear function which + //! is constant outside interval \f$[dc_0, dc_1]\f$ and linear + //! decreasing from 1 at \f$dc_1\f$ to 0 at \f$dc_0\f$. + virtual void penaltyFromMe(const CFieldStatistics& stats, double& penalty, std::string& description) const; + +private: + //! The distinct count for which the penalty is one. + double m_DistinctCountForPenaltyOfOne; + //! The distinct count for which the penalty is zero. + double m_DistinctCountForPenaltyOfZero; }; - } } diff --git a/include/config/CFieldStatistics.h b/include/config/CFieldStatistics.h index 73636dd0b5..65032abcd3 100644 --- a/include/config/CFieldStatistics.h +++ b/include/config/CFieldStatistics.h @@ -18,10 +18,8 @@ #include #include -namespace ml -{ -namespace config -{ +namespace ml { +namespace config { class CAutoconfigurerParams; class CPenalty; @@ -30,77 +28,72 @@ class CPenalty; //! DESCRIPTION:\n //! This wraps up the functionality to discover data semantics //! and gather the appropriate summary statistics. -class CONFIG_EXPORT CFieldStatistics -{ - public: - using TAutoconfigurerParamsCRef = boost::reference_wrapper; +class CONFIG_EXPORT CFieldStatistics { +public: + using TAutoconfigurerParamsCRef = boost::reference_wrapper; - public: - CFieldStatistics(const std::string &fieldName, const CAutoconfigurerParams ¶ms); +public: + CFieldStatistics(const std::string& fieldName, const CAutoconfigurerParams& params); - //! Get the name of the field. - const std::string &name() const; + //! Get the name of the field. + const std::string& name() const; - //! If we have been able to determine the data type start - //! capturing the appropriate statistics. - void maybeStartCapturingTypeStatistics(); + //! If we have been able to determine the data type start + //! capturing the appropriate statistics. + void maybeStartCapturingTypeStatistics(); - //! Add an example value for the field. - void add(core_t::TTime time, const std::string &example); + //! Add an example value for the field. + void add(core_t::TTime time, const std::string& example); - //! Get the type of data we think we have. - config_t::EDataType type() const; + //! Get the type of data we think we have. + config_t::EDataType type() const; - //! Get the data summary statistics if no more specific - //! ones are available. - const CDataSummaryStatistics *summary() const; + //! Get the data summary statistics if no more specific + //! ones are available. + const CDataSummaryStatistics* summary() const; - //! Get the categorical summary statistics if we think - //! the data are categorical. - const CCategoricalDataSummaryStatistics *categoricalSummary() const; + //! Get the categorical summary statistics if we think + //! the data are categorical. + const CCategoricalDataSummaryStatistics* categoricalSummary() const; - //! Get the numeric summary statistics if we think the - //! data are numeric. - const CNumericDataSummaryStatistics *numericSummary() const; + //! Get the numeric summary statistics if we think the + //! data are numeric. + const CNumericDataSummaryStatistics* numericSummary() const; - //! Get the score for this field based on \p penalty. - double score(const CPenalty &penalty) const; + //! Get the score for this field based on \p penalty. + double score(const CPenalty& penalty) const; - private: - using TTimeStrPr = std::pair; - using TTimeStrPrVec = std::vector; - using TDataSummaryStatistics = - boost::variant; +private: + using TTimeStrPr = std::pair; + using TTimeStrPrVec = std::vector; + using TDataSummaryStatistics = boost::variant; - private: - //! The auto-configuration parameters. - const CAutoconfigurerParams ¶ms() const; +private: + //! The auto-configuration parameters. + const CAutoconfigurerParams& params() const; - //! Add the records in the buffer to the statistics. - void replayBuffer(); + //! Add the records in the buffer to the statistics. + void replayBuffer(); - private: - //! A reference to the auto-configuration parameters. - TAutoconfigurerParamsCRef m_Params; +private: + //! A reference to the auto-configuration parameters. + TAutoconfigurerParamsCRef m_Params; - //! The field name. - std::string m_FieldName; + //! The field name. + std::string m_FieldName; - //! The number of examples added. - uint64_t m_NumberExamples; + //! The number of examples added. + uint64_t m_NumberExamples; - //! A buffer of the records before the field has been classified. - TTimeStrPrVec m_Buffer; + //! A buffer of the records before the field has been classified. + TTimeStrPrVec m_Buffer; - //! Deduces the data semantics. - CDataSemantics m_Semantics; + //! Deduces the data semantics. + CDataSemantics m_Semantics; - //! Computes the summary statistics. - TDataSummaryStatistics m_SummaryStatistics; + //! Computes the summary statistics. + TDataSummaryStatistics m_SummaryStatistics; }; - } } diff --git a/include/config/CLongTailPenalty.h b/include/config/CLongTailPenalty.h index c8ecd8081e..b075cdd55b 100644 --- a/include/config/CLongTailPenalty.h +++ b/include/config/CLongTailPenalty.h @@ -13,10 +13,8 @@ #include #include -namespace ml -{ -namespace config -{ +namespace ml { +namespace config { class CAutoconfigurerParams; class CByAndPartitionDataCountStatistics; class CByOverAndPartitionDataCountStatistics; @@ -31,43 +29,37 @@ class CByOverAndPartitionDataCountStatistics; //! detectors will fail to detect any anomalies. As such we penalize detectors //! based on the proportion of count in a tail which is defined as a threshold //! on the difference between the by field count and the minimum count. -class CONFIG_EXPORT CLongTailPenalty : public CPenalty -{ - public: - CLongTailPenalty(const CAutoconfigurerParams ¶ms); +class CONFIG_EXPORT CLongTailPenalty : public CPenalty { +public: + CLongTailPenalty(const CAutoconfigurerParams& params); - //! Create a copy on the heap. - virtual CLongTailPenalty *clone() const; + //! Create a copy on the heap. + virtual CLongTailPenalty* clone() const; - //! Get the name of this penalty. - virtual std::string name() const; + //! Get the name of this penalty. + virtual std::string name() const; - private: - using TSizeUInt64UMap = boost::unordered_map; +private: + using TSizeUInt64UMap = boost::unordered_map; - private: - //! Compute a penalty for rare detectors. - virtual void penaltyFromMe(CDetectorSpecification &spec) const; +private: + //! Compute a penalty for rare detectors. + virtual void penaltyFromMe(CDetectorSpecification& spec) const; - //! Compute the penalty for a by field and optionally a partition. - void penaltyFor(const CByAndPartitionDataCountStatistics &stats, - CDetectorSpecification &spec) const; + //! Compute the penalty for a by field and optionally a partition. + void penaltyFor(const CByAndPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const; - //! Compute the penalty for a by, over and optionally a partition field. - void penaltyFor(const CByOverAndPartitionDataCountStatistics &stats, - CDetectorSpecification &spec) const; + //! Compute the penalty for a by, over and optionally a partition field. + void penaltyFor(const CByOverAndPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const; - //! Extract the tail and total counts from \p counts. - template - void extractTailCounts(const MAP &counts, - TSizeUInt64UMap &totals, - TSizeUInt64UMap &tail) const; + //! Extract the tail and total counts from \p counts. + template + void extractTailCounts(const MAP& counts, TSizeUInt64UMap& totals, TSizeUInt64UMap& tail) const; - //! Compute the penalty for the rare counts and total counts \p rares - //! and \p totals, respectively. - double penaltyFor(TSizeUInt64UMap &rares, TSizeUInt64UMap &totals) const; + //! Compute the penalty for the rare counts and total counts \p rares + //! and \p totals, respectively. + double penaltyFor(TSizeUInt64UMap& rares, TSizeUInt64UMap& totals) const; }; - } } diff --git a/include/config/CLowInformationContentPenalty.h b/include/config/CLowInformationContentPenalty.h index 9f764749db..e5deef6a24 100644 --- a/include/config/CLowInformationContentPenalty.h +++ b/include/config/CLowInformationContentPenalty.h @@ -10,10 +10,8 @@ #include #include -namespace ml -{ -namespace config -{ +namespace ml { +namespace config { //! \brief A penalty for the information content command if there is //! little evidence that the categories are carrying any information. @@ -26,22 +24,20 @@ namespace config //! -# Any long categories. //! -# Significant empirical entropy in the categories relative to //! their distinct count (which bounds the entropy). -class CONFIG_EXPORT CLowInformationContentPenalty : public CPenalty -{ - public: - CLowInformationContentPenalty(const CAutoconfigurerParams ¶ms); +class CONFIG_EXPORT CLowInformationContentPenalty : public CPenalty { +public: + CLowInformationContentPenalty(const CAutoconfigurerParams& params); - //! Create a copy on the heap. - virtual CLowInformationContentPenalty *clone() const; + //! Create a copy on the heap. + virtual CLowInformationContentPenalty* clone() const; - //! Get the name of this penalty. - virtual std::string name() const; + //! Get the name of this penalty. + virtual std::string name() const; - private: - //! Compute a penalty for rare detectors. - virtual void penaltyFromMe(CDetectorSpecification &spec) const; +private: + //! Compute a penalty for rare detectors. + virtual void penaltyFromMe(CDetectorSpecification& spec) const; }; - } } diff --git a/include/config/CLowVariationPenalty.h b/include/config/CLowVariationPenalty.h index 68c359173f..94f1c169ef 100644 --- a/include/config/CLowVariationPenalty.h +++ b/include/config/CLowVariationPenalty.h @@ -11,10 +11,8 @@ #include -namespace ml -{ -namespace config -{ +namespace ml { +namespace config { class CPartitionDataCountStatistics; class CByAndPartitionDataCountStatistics; class CByOverAndPartitionDataCountStatistics; @@ -28,58 +26,47 @@ class CByOverAndPartitionDataCountStatistics; //! every category of a distinct count function argument appears in every bucket, //! say because it labels a monitor, and the case that polled data is analyzed for //! count anomalies. -class CONFIG_EXPORT CLowVariationPenalty : public CPenalty -{ - public: - CLowVariationPenalty(const CAutoconfigurerParams ¶ms); +class CONFIG_EXPORT CLowVariationPenalty : public CPenalty { +public: + CLowVariationPenalty(const CAutoconfigurerParams& params); - //! Create a copy on the heap. - virtual CLowVariationPenalty *clone() const; + //! Create a copy on the heap. + virtual CLowVariationPenalty* clone() const; - //! Get the name of this penalty. - virtual std::string name() const; + //! Get the name of this penalty. + virtual std::string name() const; - private: - //! Apply a penalty for features with very little variation. - virtual void penaltyFromMe(CDetectorSpecification &spec) const; +private: + //! Apply a penalty for features with very little variation. + virtual void penaltyFromMe(CDetectorSpecification& spec) const; - //! Apply the penalty for count with optionally a partition. - void penaltiesForCount(const CPartitionDataCountStatistics &stats, - CDetectorSpecification &spec) const; + //! Apply the penalty for count with optionally a partition. + void penaltiesForCount(const CPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const; - //! Apply the penalty for count with a by field and optionally a partition. - void penaltiesForCount(const CByAndPartitionDataCountStatistics &stats, - CDetectorSpecification &spec) const; + //! Apply the penalty for count with a by field and optionally a partition. + void penaltiesForCount(const CByAndPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const; - //! Apply the penalty for count with a by, over and optionally a partition field. - void penaltiesForCount(const CByOverAndPartitionDataCountStatistics &stats, - CDetectorSpecification &spec) const; + //! Apply the penalty for count with a by, over and optionally a partition field. + void penaltiesForCount(const CByOverAndPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const; - //! Apply the penalty for distinct count with optionally a partition. - void penaltyForDistinctCount(const CPartitionDataCountStatistics &stats, - CDetectorSpecification &spec) const; + //! Apply the penalty for distinct count with optionally a partition. + void penaltyForDistinctCount(const CPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const; - //! Apply the penalty for distinct count with by and optionally a partition. - void penaltyForDistinctCount(const CByAndPartitionDataCountStatistics &stats, - CDetectorSpecification &spec) const; + //! Apply the penalty for distinct count with by and optionally a partition. + void penaltyForDistinctCount(const CByAndPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const; - //! Apply the penalty for distinct count with by, over and optionally a partition. - void penaltyForDistinctCount(const CByOverAndPartitionDataCountStatistics &stats, - CDetectorSpecification &spec) const; + //! Apply the penalty for distinct count with by, over and optionally a partition. + void penaltyForDistinctCount(const CByOverAndPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const; - //! Apply the penalty for info content with optionally a partition. - void penaltyForInfoContent(const CPartitionDataCountStatistics &stats, - CDetectorSpecification &spec) const; + //! Apply the penalty for info content with optionally a partition. + void penaltyForInfoContent(const CPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const; - //! Apply the penalty for info content with a by field and optionally a partition. - void penaltyForInfoContent(const CByAndPartitionDataCountStatistics &stats, - CDetectorSpecification &spec) const; + //! Apply the penalty for info content with a by field and optionally a partition. + void penaltyForInfoContent(const CByAndPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const; - //! Apply the penalty for info content with a by, over and optionally a partition field. - void penaltyForInfoContent(const CByOverAndPartitionDataCountStatistics &stats, - CDetectorSpecification &spec) const; + //! Apply the penalty for info content with a by, over and optionally a partition field. + void penaltyForInfoContent(const CByOverAndPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const; }; - } } diff --git a/include/config/CNotEnoughDataPenalty.h b/include/config/CNotEnoughDataPenalty.h index ec14b73f5d..37182b0d27 100644 --- a/include/config/CNotEnoughDataPenalty.h +++ b/include/config/CNotEnoughDataPenalty.h @@ -12,10 +12,8 @@ #include -namespace ml -{ -namespace config -{ +namespace ml { +namespace config { class CAutoconfigurerParams; class CBucketCountStatistics; class CPartitionDataCountStatistics; @@ -30,43 +28,38 @@ class CByOverAndPartitionDataCountStatistics; //! The important factor is the number of populated buckets for each distinct //! (by, partition) field value pair. This applies a bucket length specific //! penalty based on the proportion of populated buckets verses total buckets. -class CONFIG_EXPORT CNotEnoughDataPenalty : public CPenalty -{ - public: - CNotEnoughDataPenalty(const CAutoconfigurerParams ¶ms); +class CONFIG_EXPORT CNotEnoughDataPenalty : public CPenalty { +public: + CNotEnoughDataPenalty(const CAutoconfigurerParams& params); - //! Create a copy on the heap. - virtual CNotEnoughDataPenalty *clone() const; + //! Create a copy on the heap. + virtual CNotEnoughDataPenalty* clone() const; - //! Get the name of this penalty. - virtual std::string name() const; + //! Get the name of this penalty. + virtual std::string name() const; - private: - using TUInt64Vec = std::vector; - using TBucketCountStatisticsVec = std::vector; +private: + using TUInt64Vec = std::vector; + using TBucketCountStatisticsVec = std::vector; - private: - //! Compute a penalty for rare detectors. - virtual void penaltyFromMe(CDetectorSpecification &spec) const; +private: + //! Compute a penalty for rare detectors. + virtual void penaltyFromMe(CDetectorSpecification& spec) const; - //! Compute the penalty for optionally a partition. - void penaltyFor(const CPartitionDataCountStatistics &stats, - CDetectorSpecification &spec) const; + //! Compute the penalty for optionally a partition. + void penaltyFor(const CPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const; - //! Compute the penalty for a by field and optionally a partition. - void penaltyFor(const CByAndPartitionDataCountStatistics &stats, - CDetectorSpecification &spec) const; + //! Compute the penalty for a by field and optionally a partition. + void penaltyFor(const CByAndPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const; - //! Compute the penalty for a by, over and optionally a partition field. - void penaltyFor(const CByOverAndPartitionDataCountStatistics &stats, - CDetectorSpecification &spec) const; + //! Compute the penalty for a by, over and optionally a partition field. + void penaltyFor(const CByOverAndPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const; - //! The common penalty calculation. - void penaltyFor(const TUInt64Vec &bucketCounts, - const TBucketCountStatisticsVec &bucketDistinctTupleCounts, - CDetectorSpecification &spec) const; + //! The common penalty calculation. + void penaltyFor(const TUInt64Vec& bucketCounts, + const TBucketCountStatisticsVec& bucketDistinctTupleCounts, + CDetectorSpecification& spec) const; }; - } } diff --git a/include/config/CPenalty.h b/include/config/CPenalty.h index 0affa11dce..9a9e0ce460 100644 --- a/include/config/CPenalty.h +++ b/include/config/CPenalty.h @@ -19,10 +19,8 @@ #include #include -namespace ml -{ -namespace config -{ +namespace ml { +namespace config { class CAutoconfigurerParams; class CCategoricalDataSummaryStatistics; class CFieldStatistics; @@ -61,117 +59,109 @@ class CNumericDataSummaryStatistics; //! is a closure, i.e. (MyPenaltyA * MyPenaltyB) is a new CPenalty object //! whose penalty function is the product of the penalty functions of //! MyPenaltyA and MyPenaltyB. -class CONFIG_EXPORT CPenalty -{ +class CONFIG_EXPORT CPenalty { +public: + using TDoubleVec = std::vector; + using TSizeVec = std::vector; + using TTimeVec = std::vector; + using TStrVec = std::vector; + using TPenaltyPtr = boost::shared_ptr; + using TPenaltyCPtr = boost::shared_ptr; + using TPenaltyCPtrVec = std::vector; + + //! \brief Represents the result of multiplying penalties. + class CClosure { public: - using TDoubleVec = std::vector; - using TSizeVec = std::vector; - using TTimeVec = std::vector; - using TStrVec = std::vector; - using TPenaltyPtr = boost::shared_ptr; - using TPenaltyCPtr = boost::shared_ptr; - using TPenaltyCPtrVec = std::vector; - - //! \brief Represents the result of multiplying penalties. - class CClosure - { - public: - CClosure(const CPenalty &penalty); - - //! Create a penalty on the heap from this closure. - CPenalty *clone() const; - - //! Add a penalty to the closure. - CClosure &add(const CPenalty &penalty); - - //! Get the closure's penalties. - TPenaltyCPtrVec &penalties(); - - private: - //! The penalties in the closure. - TPenaltyCPtrVec m_Penalties; - }; + CClosure(const CPenalty& penalty); - public: - CPenalty(const CAutoconfigurerParams ¶ms); - CPenalty(const CPenalty &other); - explicit CPenalty(CClosure other); - virtual ~CPenalty(); + //! Create a penalty on the heap from this closure. + CPenalty* clone() const; - //! Create a copy on the heap. - virtual CPenalty *clone() const; + //! Add a penalty to the closure. + CClosure& add(const CPenalty& penalty); - //! Get the name of this penalty. - virtual std::string name() const; + //! Get the closure's penalties. + TPenaltyCPtrVec& penalties(); - //! Get the product penalty of this and \p rhs. - const CPenalty &operator*=(const CPenalty &rhs); + private: + //! The penalties in the closure. + TPenaltyCPtrVec m_Penalties; + }; - //! Get the product of this and the closure \p rhs. - const CPenalty &operator*=(CClosure rhs); +public: + CPenalty(const CAutoconfigurerParams& params); + CPenalty(const CPenalty& other); + explicit CPenalty(CClosure other); + virtual ~CPenalty(); - //! Compute the penalty to apply for the first property. - void penalty(const CFieldStatistics &stats, double &penalty) const - { - std::string ignore; - this->penalty(stats, penalty, ignore); - } + //! Create a copy on the heap. + virtual CPenalty* clone() const; - //! Compute the penalty to apply for the first property. - void penalty(const CFieldStatistics &stats, - double &penalty, - std::string &description) const; + //! Get the name of this penalty. + virtual std::string name() const; - //! Update the penalties of \p detector. - void penalize(CDetectorSpecification &spec) const; + //! Get the product penalty of this and \p rhs. + const CPenalty& operator*=(const CPenalty& rhs); - //! Compute the score for \p penalty. - static double score(double penalty); + //! Get the product of this and the closure \p rhs. + const CPenalty& operator*=(CClosure rhs); - //! True if \p penalty forces the score to zero. - static bool scoreIsZeroFor(double penalty); + //! Compute the penalty to apply for the first property. + void penalty(const CFieldStatistics& stats, double& penalty) const { + std::string ignore; + this->penalty(stats, penalty, ignore); + } - protected: - using TAutoconfigurerParamsCRef = boost::reference_wrapper; + //! Compute the penalty to apply for the first property. + void penalty(const CFieldStatistics& stats, double& penalty, std::string& description) const; - protected: - //! Get the parameters. - const CAutoconfigurerParams ¶ms() const; + //! Update the penalties of \p detector. + void penalize(CDetectorSpecification& spec) const; - private: - //! Not assignable. - const CPenalty &operator=(const CPenalty &other); + //! Compute the score for \p penalty. + static double score(double penalty); - //! Compute the penalty based on a detector's field's statistics. - //! - //! \note No-op unless a derived class overrides it. - virtual void penaltyFromMe(const CFieldStatistics &stats, - double &penalty, - std::string &description) const; + //! True if \p penalty forces the score to zero. + static bool scoreIsZeroFor(double penalty); - //! Compute a penalty based a complete detector specification. - //! - //! \note No-op unless a derived class overrides it. - virtual void penaltyFromMe(CDetectorSpecification &spec) const; +protected: + using TAutoconfigurerParamsCRef = boost::reference_wrapper; - private: - //! The parameters. - TAutoconfigurerParamsCRef m_Params; +protected: + //! Get the parameters. + const CAutoconfigurerParams& params() const; - //! The penalties. - TPenaltyCPtrVec m_Penalties; +private: + //! Not assignable. + const CPenalty& operator=(const CPenalty& other); + + //! Compute the penalty based on a detector's field's statistics. + //! + //! \note No-op unless a derived class overrides it. + virtual void penaltyFromMe(const CFieldStatistics& stats, double& penalty, std::string& description) const; + + //! Compute a penalty based a complete detector specification. + //! + //! \note No-op unless a derived class overrides it. + virtual void penaltyFromMe(CDetectorSpecification& spec) const; + +private: + //! The parameters. + TAutoconfigurerParamsCRef m_Params; + + //! The penalties. + TPenaltyCPtrVec m_Penalties; }; //! Multiply a two penalties. CONFIG_EXPORT -CPenalty::CClosure operator*(const CPenalty &lhs, const CPenalty &rhs); +CPenalty::CClosure operator*(const CPenalty& lhs, const CPenalty& rhs); //! Multiply a closure by a penalty. CONFIG_EXPORT -CPenalty::CClosure operator*(CPenalty::CClosure lhs, const CPenalty &rhs); +CPenalty::CClosure operator*(CPenalty::CClosure lhs, const CPenalty& rhs); //! Multiply a penalty by a closure. CONFIG_EXPORT -CPenalty::CClosure operator*(const CPenalty &lhs, CPenalty::CClosure rhs); - +CPenalty::CClosure operator*(const CPenalty& lhs, CPenalty::CClosure rhs); } } diff --git a/include/config/CPolledDataPenalty.h b/include/config/CPolledDataPenalty.h index 217c313c03..a95efc58d5 100644 --- a/include/config/CPolledDataPenalty.h +++ b/include/config/CPolledDataPenalty.h @@ -14,10 +14,8 @@ #include -namespace ml -{ -namespace config -{ +namespace ml { +namespace config { class CAutoconfigurerParams; class CDataCountStatistics; @@ -29,28 +27,26 @@ class CDataCountStatistics; //! having bucket lengths less than the arrival interval. This tests this //! condition and applies a decreasing penalty based on the number of intervals //! for which this behavior has been observed. -class CONFIG_EXPORT CPolledDataPenalty : public CPenalty -{ - public: - CPolledDataPenalty(const CAutoconfigurerParams ¶ms); +class CONFIG_EXPORT CPolledDataPenalty : public CPenalty { +public: + CPolledDataPenalty(const CAutoconfigurerParams& params); - //! Create a copy on the heap. - virtual CPolledDataPenalty *clone() const; + //! Create a copy on the heap. + virtual CPolledDataPenalty* clone() const; - //! Get the name of this penalty. - virtual std::string name() const; + //! Get the name of this penalty. + virtual std::string name() const; - private: - using TOptionalTime = boost::optional; +private: + using TOptionalTime = boost::optional; - private: - //! Compute a penalty for rare detectors. - virtual void penaltyFromMe(CDetectorSpecification &spec) const; +private: + //! Compute a penalty for rare detectors. + virtual void penaltyFromMe(CDetectorSpecification& spec) const; - //! Get the interval at which the data are polled if there is one. - TOptionalTime pollingInterval(const CDataCountStatistics &stats) const; + //! Get the interval at which the data are polled if there is one. + TOptionalTime pollingInterval(const CDataCountStatistics& stats) const; }; - } } diff --git a/include/config/CReportWriter.h b/include/config/CReportWriter.h index 194264eefd..cd3ef2398e 100644 --- a/include/config/CReportWriter.h +++ b/include/config/CReportWriter.h @@ -15,10 +15,8 @@ #include #include -namespace ml -{ -namespace config -{ +namespace ml { +namespace config { class CCategoricalDataSummaryStatistics; class CDataSummaryStatistics; class CDetectorSpecification; @@ -35,138 +33,128 @@ class CNumericDataSummaryStatistics; //! IMPLEMENTATION:\n //! This uses the builder pattern accepting different objects from //! which to create the report. -class CONFIG_EXPORT CReportWriter : public api::COutputHandler -{ - public: - using TStrVec = std::vector; - using TStrVecVec = std::vector; - using TStrVecVecVec = std::vector; - using TStrVecVecVecVec = std::vector; - - //! \name Summary Statistics. - //@{ - static const std::size_t FIELD_NAME = 0; - static const std::size_t DATA_TYPE = 1; - static const std::size_t EARLIEST_TIME = 2; - static const std::size_t LATEST_TIME = 3; - static const std::size_t MEAN_RATE = 4; - static const std::size_t CATEGORICAL_DISTINCT_COUNT = 5; - static const std::size_t CATEGORICAL_TOP_N_COUNTS = 6; - static const std::size_t NUMERIC_MINIMUM = 7; - static const std::size_t NUMERIC_MEDIAN = 8; - static const std::size_t NUMERIC_MAXIMUM = 9; - static const std::size_t NUMERIC_DENSITY_CHART = 10; - static const std::size_t NUMBER_STATISTICS = 11; - //@} - - //! \name Detector Attributes. - //@{ - static const std::size_t DESCRIPTION = 0; - static const std::size_t OVERALL_SCORE = 1; - static const std::size_t PARAMETER_SCORES = 2; - static const std::size_t DETECTOR_CONFIG = 3; - static const std::size_t NUMBER_ATTRIBUTES = 4; - //@} - - //! \name Detector Parameter Labels - //@{ - static const std::size_t BUCKET_LENGTH_PARAMETER = 0; - static const std::size_t IGNORE_EMPTY_PARAMETER = 1; - static const std::size_t SCORE_PARAMETER = 2; - static const std::size_t DESCRIPTION_PARAMETER = 3; - static const std::size_t NUMBER_PARAMETERS = 4; - //@} - - //! The summary statistic labels. - static const std::string STATISTIC_LABELS[NUMBER_STATISTICS]; - - //! The detector parameter labels. - static const std::string PARAMETER_LABELS[NUMBER_PARAMETERS]; - - public: - explicit CReportWriter(std::ostream &writeStream); - - //! No-op. - virtual bool fieldNames(const TStrVec &fieldNames, - const TStrVec &extraFieldNames); - - //! Return an empty string vector. - virtual const TStrVec &fieldNames() const; - - // Bring the other overload of fieldNames() into scope. - using api::COutputHandler::fieldNames; - - //! No-op. - virtual bool writeRow(const TStrStrUMap &dataRowFields, - const TStrStrUMap &overrideDataRowFields); - - // Bring the other overload of writeRow() into scope. - using api::COutputHandler::writeRow; - - //! Add the total number of records processed. - void addTotalRecords(uint64_t n); - - //! Add the total number of invalid records processed. - void addInvalidRecords(uint64_t n); - - //! Add the summary for \p field. - void addFieldStatistics(const std::string &field, - config_t::EDataType type, - const CDataSummaryStatistics &summary); - - //! Add the summary for the categorical field \p field. - void addFieldStatistics(const std::string &field, - config_t::EDataType type, - const CCategoricalDataSummaryStatistics &summary); - - //! Add the summary for the numeric field \p field. - void addFieldStatistics(const std::string &field, - config_t::EDataType type, - const CNumericDataSummaryStatistics &summary); - - //! Add a summary of the detector \p detector. - void addDetector(const CDetectorSpecification &spec); - - //! Write the report. - //virtual void write() const = 0; - void write() const; - - protected: - //! The statistics in the summary table for unclassified fields. - static const std::size_t UNCLASSIFIED_STATISTICS[5]; - - //! The statistics in the summary table for categorical fields. - static const std::size_t CATEGORICAL_STATISTICS[6]; - - //! The statistics in the summary table for numeric fields. - static const std::size_t NUMERIC_STATISTICS[8]; - - //! The detector parameters in score table. - static const std::size_t DETECTOR_PARAMETERS[4]; - - private: - //! The stream to which to write the report. - std::ostream &m_WriteStream; - - //! The total number of records processed. - std::string m_TotalRecords; +class CONFIG_EXPORT CReportWriter : public api::COutputHandler { +public: + using TStrVec = std::vector; + using TStrVecVec = std::vector; + using TStrVecVecVec = std::vector; + using TStrVecVecVecVec = std::vector; + + //! \name Summary Statistics. + //@{ + static const std::size_t FIELD_NAME = 0; + static const std::size_t DATA_TYPE = 1; + static const std::size_t EARLIEST_TIME = 2; + static const std::size_t LATEST_TIME = 3; + static const std::size_t MEAN_RATE = 4; + static const std::size_t CATEGORICAL_DISTINCT_COUNT = 5; + static const std::size_t CATEGORICAL_TOP_N_COUNTS = 6; + static const std::size_t NUMERIC_MINIMUM = 7; + static const std::size_t NUMERIC_MEDIAN = 8; + static const std::size_t NUMERIC_MAXIMUM = 9; + static const std::size_t NUMERIC_DENSITY_CHART = 10; + static const std::size_t NUMBER_STATISTICS = 11; + //@} + + //! \name Detector Attributes. + //@{ + static const std::size_t DESCRIPTION = 0; + static const std::size_t OVERALL_SCORE = 1; + static const std::size_t PARAMETER_SCORES = 2; + static const std::size_t DETECTOR_CONFIG = 3; + static const std::size_t NUMBER_ATTRIBUTES = 4; + //@} + + //! \name Detector Parameter Labels + //@{ + static const std::size_t BUCKET_LENGTH_PARAMETER = 0; + static const std::size_t IGNORE_EMPTY_PARAMETER = 1; + static const std::size_t SCORE_PARAMETER = 2; + static const std::size_t DESCRIPTION_PARAMETER = 3; + static const std::size_t NUMBER_PARAMETERS = 4; + //@} + + //! The summary statistic labels. + static const std::string STATISTIC_LABELS[NUMBER_STATISTICS]; + + //! The detector parameter labels. + static const std::string PARAMETER_LABELS[NUMBER_PARAMETERS]; + +public: + explicit CReportWriter(std::ostream& writeStream); + + //! No-op. + virtual bool fieldNames(const TStrVec& fieldNames, const TStrVec& extraFieldNames); + + //! Return an empty string vector. + virtual const TStrVec& fieldNames() const; + + // Bring the other overload of fieldNames() into scope. + using api::COutputHandler::fieldNames; + + //! No-op. + virtual bool writeRow(const TStrStrUMap& dataRowFields, const TStrStrUMap& overrideDataRowFields); + + // Bring the other overload of writeRow() into scope. + using api::COutputHandler::writeRow; + + //! Add the total number of records processed. + void addTotalRecords(uint64_t n); + + //! Add the total number of invalid records processed. + void addInvalidRecords(uint64_t n); + + //! Add the summary for \p field. + void addFieldStatistics(const std::string& field, config_t::EDataType type, const CDataSummaryStatistics& summary); + + //! Add the summary for the categorical field \p field. + void addFieldStatistics(const std::string& field, config_t::EDataType type, const CCategoricalDataSummaryStatistics& summary); + + //! Add the summary for the numeric field \p field. + void addFieldStatistics(const std::string& field, config_t::EDataType type, const CNumericDataSummaryStatistics& summary); + + //! Add a summary of the detector \p detector. + void addDetector(const CDetectorSpecification& spec); + + //! Write the report. + //virtual void write() const = 0; + void write() const; + +protected: + //! The statistics in the summary table for unclassified fields. + static const std::size_t UNCLASSIFIED_STATISTICS[5]; + + //! The statistics in the summary table for categorical fields. + static const std::size_t CATEGORICAL_STATISTICS[6]; + + //! The statistics in the summary table for numeric fields. + static const std::size_t NUMERIC_STATISTICS[8]; + + //! The detector parameters in score table. + static const std::size_t DETECTOR_PARAMETERS[4]; + +private: + //! The stream to which to write the report. + std::ostream& m_WriteStream; - //! The total number of invalid records processed. - std::string m_InvalidRecords; + //! The total number of records processed. + std::string m_TotalRecords; - //! The summary statistics. - TStrVecVec m_UnclassifiedFields; + //! The total number of invalid records processed. + std::string m_InvalidRecords; - //! The summary statistics for categorical fields. - TStrVecVec m_CategoricalFields; + //! The summary statistics. + TStrVecVec m_UnclassifiedFields; - //! The summary statistics for numeric fields. - TStrVecVec m_NumericFields; + //! The summary statistics for categorical fields. + TStrVecVec m_CategoricalFields; - //! The summary of a candidate detector. - TStrVecVecVecVec m_Detectors; -}; + //! The summary statistics for numeric fields. + TStrVecVec m_NumericFields; + //! The summary of a candidate detector. + TStrVecVecVecVec m_Detectors; +}; } } diff --git a/include/config/CSpanTooSmallForBucketLengthPenalty.h b/include/config/CSpanTooSmallForBucketLengthPenalty.h index 2bd56ac57a..d7ed2f583a 100644 --- a/include/config/CSpanTooSmallForBucketLengthPenalty.h +++ b/include/config/CSpanTooSmallForBucketLengthPenalty.h @@ -10,10 +10,8 @@ #include #include -namespace ml -{ -namespace config -{ +namespace ml { +namespace config { //! \brief Penalty for the case that the total data range is small w.r.t. //! the candidate bucket length. @@ -22,22 +20,20 @@ namespace config //! If we only see a small number of buckets it is difficult to be confident //! in that choice of bucket length. This penalizes bucket lengths which are //! large w.r.t. the observed data span. -class CONFIG_EXPORT CSpanTooSmallForBucketLengthPenalty : public CPenalty -{ - public: - CSpanTooSmallForBucketLengthPenalty(const CAutoconfigurerParams ¶ms); +class CONFIG_EXPORT CSpanTooSmallForBucketLengthPenalty : public CPenalty { +public: + CSpanTooSmallForBucketLengthPenalty(const CAutoconfigurerParams& params); - //! Create a copy on the heap. - virtual CSpanTooSmallForBucketLengthPenalty *clone() const; + //! Create a copy on the heap. + virtual CSpanTooSmallForBucketLengthPenalty* clone() const; - //! Get the name of this penalty. - virtual std::string name() const; + //! Get the name of this penalty. + virtual std::string name() const; - private: - //! Compute a penalty for rare detectors. - virtual void penaltyFromMe(CDetectorSpecification &spec) const; +private: + //! Compute a penalty for rare detectors. + virtual void penaltyFromMe(CDetectorSpecification& spec) const; }; - } } diff --git a/include/config/CSparseCountPenalty.h b/include/config/CSparseCountPenalty.h index c50667485b..3ca1e97209 100644 --- a/include/config/CSparseCountPenalty.h +++ b/include/config/CSparseCountPenalty.h @@ -10,10 +10,8 @@ #include #include -namespace ml -{ -namespace config -{ +namespace ml { +namespace config { //! \brief Penalty for the case that counting functions are viewed at a //! bucket length which is too short relative the data rate. @@ -24,22 +22,20 @@ namespace config //! not properly capture the variation in arrival times. This penalizes //! bucket lengths which are less than the shortest bucket length which //! captures the count distribution at longer bucket lengths. -class CONFIG_EXPORT CSparseCountPenalty : public CPenalty -{ - public: - CSparseCountPenalty(const CAutoconfigurerParams ¶ms); +class CONFIG_EXPORT CSparseCountPenalty : public CPenalty { +public: + CSparseCountPenalty(const CAutoconfigurerParams& params); - //! Create a copy on the heap. - virtual CSparseCountPenalty *clone() const; + //! Create a copy on the heap. + virtual CSparseCountPenalty* clone() const; - //! Get the name of this penalty. - virtual std::string name() const; + //! Get the name of this penalty. + virtual std::string name() const; - private: - //! Compute a penalty for rare detectors. - virtual void penaltyFromMe(CDetectorSpecification &spec) const; +private: + //! Compute a penalty for rare detectors. + virtual void penaltyFromMe(CDetectorSpecification& spec) const; }; - } } diff --git a/include/config/CTooMuchDataPenalty.h b/include/config/CTooMuchDataPenalty.h index 32fd6c0cf8..6aeb9fa2ca 100644 --- a/include/config/CTooMuchDataPenalty.h +++ b/include/config/CTooMuchDataPenalty.h @@ -12,10 +12,8 @@ #include -namespace ml -{ -namespace config -{ +namespace ml { +namespace config { class CAutoconfigurerParams; class CBucketCountStatistics; class CPartitionDataCountStatistics; @@ -32,43 +30,38 @@ class CByOverAndPartitionDataCountStatistics; //! distinct (by, partition) field value pair. This applies a bucket length //! specific penalty based on the proportion of populated buckets verses total //! buckets. -class CONFIG_EXPORT CTooMuchDataPenalty : public CPenalty -{ - public: - CTooMuchDataPenalty(const CAutoconfigurerParams ¶ms); +class CONFIG_EXPORT CTooMuchDataPenalty : public CPenalty { +public: + CTooMuchDataPenalty(const CAutoconfigurerParams& params); - //! Create a copy on the heap. - virtual CTooMuchDataPenalty *clone() const; + //! Create a copy on the heap. + virtual CTooMuchDataPenalty* clone() const; - //! Get the name of this penalty. - virtual std::string name() const; + //! Get the name of this penalty. + virtual std::string name() const; - private: - using TUInt64Vec = std::vector; - using TBucketCountStatisticsVec = std::vector; +private: + using TUInt64Vec = std::vector; + using TBucketCountStatisticsVec = std::vector; - private: - //! Compute a penalty for rare detectors. - virtual void penaltyFromMe(CDetectorSpecification &spec) const; +private: + //! Compute a penalty for rare detectors. + virtual void penaltyFromMe(CDetectorSpecification& spec) const; - //! Compute the penalty for optionally a partition. - void penaltyFor(const CPartitionDataCountStatistics &stats, - CDetectorSpecification &spec) const; + //! Compute the penalty for optionally a partition. + void penaltyFor(const CPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const; - //! Compute the penalty for a by field and optionally a partition. - void penaltyFor(const CByAndPartitionDataCountStatistics &stats, - CDetectorSpecification &spec) const; + //! Compute the penalty for a by field and optionally a partition. + void penaltyFor(const CByAndPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const; - //! Compute the penalty for a by, over and optionally a partition field. - void penaltyFor(const CByOverAndPartitionDataCountStatistics &stats, - CDetectorSpecification &spec) const; + //! Compute the penalty for a by, over and optionally a partition field. + void penaltyFor(const CByOverAndPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const; - //! The common penalty calculation. - void penaltyFor(const TUInt64Vec &bucketCounts, - const TBucketCountStatisticsVec &bucketDistinctTupleCounts, - CDetectorSpecification &spec) const; + //! The common penalty calculation. + void penaltyFor(const TUInt64Vec& bucketCounts, + const TBucketCountStatisticsVec& bucketDistinctTupleCounts, + CDetectorSpecification& spec) const; }; - } } diff --git a/include/config/CTools.h b/include/config/CTools.h index 63255a35ae..a9e769423e 100644 --- a/include/config/CTools.h +++ b/include/config/CTools.h @@ -11,46 +11,42 @@ #include -#include #include +#include -namespace ml -{ -namespace config -{ +namespace ml { +namespace config { //! \brief Utility functionality for auto-configuration. -class CONFIG_EXPORT CTools -{ - public: - //! Get a 32 bit integer category corresponding to \p value. - static uint32_t category32(const std::string &value); +class CONFIG_EXPORT CTools { +public: + //! Get a 32 bit integer category corresponding to \p value. + static uint32_t category32(const std::string& value); - //! Get a 32 bit integer category corresponding to \p category64. - static uint32_t category32(std::size_t category64); + //! Get a 32 bit integer category corresponding to \p category64. + static uint32_t category32(std::size_t category64); - //! Get a 64 bit integer identifier corresponding to \p value. - static std::size_t category64(const std::string &value); + //! Get a 64 bit integer identifier corresponding to \p value. + static std::size_t category64(const std::string& value); - //! Linearly interpolate the penalties (a, p(a)) and (b, p(b)) - //! at x. - static double interpolate(double a, double b, double pa, double pb, double x); + //! Linearly interpolate the penalties (a, p(a)) and (b, p(b)) + //! at x. + static double interpolate(double a, double b, double pa, double pb, double x); - //! Interpolate using the p'th power between (a, p(a)) and - //! (b, p(b)) at x. - static double powInterpolate(double p, double a, double b, double pa, double pb, double x); + //! Interpolate using the p'th power between (a, p(a)) and + //! (b, p(b)) at x. + static double powInterpolate(double p, double a, double b, double pa, double pb, double x); - //! Logarithmically interpolate the penalties (a, p(a)) and - //! (b, p(b)) at x. - static double logInterpolate(double a, double b, double pa, double pb, double x); + //! Logarithmically interpolate the penalties (a, p(a)) and + //! (b, p(b)) at x. + static double logInterpolate(double a, double b, double pa, double pb, double x); - //! Print a double in a human friendly format. - static std::string prettyPrint(double d); + //! Print a double in a human friendly format. + static std::string prettyPrint(double d); - //! Print a time in seconds in a human friendly format. - static std::string prettyPrint(core_t::TTime time); + //! Print a time in seconds in a human friendly format. + static std::string prettyPrint(core_t::TTime time); }; - } } diff --git a/include/config/ConfigTypes.h b/include/config/ConfigTypes.h index 176f705ed2..924aa3e8fa 100644 --- a/include/config/ConfigTypes.h +++ b/include/config/ConfigTypes.h @@ -12,37 +12,22 @@ #include #include -namespace ml -{ -namespace config_t -{ +namespace ml { +namespace config_t { //! Enumeration of the user specified data types. -enum EUserDataType -{ - E_UserCategorical, - E_UserNumeric -}; +enum EUserDataType { E_UserCategorical, E_UserNumeric }; //! Get a string for the data type. CONFIG_EXPORT -const std::string &print(EUserDataType type); +const std::string& print(EUserDataType type); //! Write the data type to a stream. CONFIG_EXPORT -std::ostream &operator<<(std::ostream &o, EUserDataType type); +std::ostream& operator<<(std::ostream& o, EUserDataType type); //! Enumeration of the data types we understand. -enum EDataType -{ - E_UndeterminedType, - E_Binary, - E_Categorical, - E_PositiveInteger, - E_Integer, - E_PositiveReal, - E_Real -}; +enum EDataType { E_UndeterminedType, E_Binary, E_Categorical, E_PositiveInteger, E_Integer, E_PositiveReal, E_Real }; //! Check if the type is categorical. CONFIG_EXPORT @@ -58,26 +43,14 @@ bool isInteger(EDataType type); //! Get a string for the data type. CONFIG_EXPORT -const std::string &print(EDataType type); +const std::string& print(EDataType type); //! Write the data type to a stream. CONFIG_EXPORT -std::ostream &operator<<(std::ostream &o, EDataType type); +std::ostream& operator<<(std::ostream& o, EDataType type); //! Enumeration of the top-level functions we'll consider configuring. -enum EFunctionCategory -{ - E_Count, - E_Rare, - E_DistinctCount, - E_InfoContent, - E_Mean, - E_Min, - E_Max, - E_Sum, - E_Varp, - E_Median -}; +enum EFunctionCategory { E_Count, E_Rare, E_DistinctCount, E_InfoContent, E_Mean, E_Min, E_Max, E_Sum, E_Varp, E_Median }; //! Check if the function takes an argument. CONFIG_EXPORT @@ -111,36 +84,27 @@ bool hasDoAndDontIgnoreEmptyVersions(EFunctionCategory function); //! Get the prefix of the function corresponding to \p ignoreEmpty //! and \p isPopulation. CONFIG_EXPORT -const std::string &ignoreEmptyVersionName(EFunctionCategory function, - bool ignoreEmpty, - bool isPopulation); +const std::string& ignoreEmptyVersionName(EFunctionCategory function, bool ignoreEmpty, bool isPopulation); //! Get a string for the function function. CONFIG_EXPORT -const std::string &print(EFunctionCategory function); +const std::string& print(EFunctionCategory function); //! Write the function function to a stream. CONFIG_EXPORT -std::ostream &operator<<(std::ostream &o, EFunctionCategory function); +std::ostream& operator<<(std::ostream& o, EFunctionCategory function); //! Enumeration of the sensitivity of the anomaly detection to high, //! low, or both tails. -enum ESide -{ - E_HighSide, - E_LowSide, - E_TwoSide, - E_UndeterminedSide -}; +enum ESide { E_HighSide, E_LowSide, E_TwoSide, E_UndeterminedSide }; //! Get a string for the side. CONFIG_EXPORT -const std::string &print(ESide side); +const std::string& print(ESide side); //! Write the side to a stream. CONFIG_EXPORT -std::ostream &operator<<(std::ostream &o, ESide side); - +std::ostream& operator<<(std::ostream& o, ESide side); } } diff --git a/include/config/Constants.h b/include/config/Constants.h index a33f6bce4d..62df27d0a0 100644 --- a/include/config/Constants.h +++ b/include/config/Constants.h @@ -14,12 +14,9 @@ #include #include -namespace ml -{ -namespace config -{ -namespace constants -{ +namespace ml { +namespace config { +namespace constants { //! The initial score to apply to a detector. This is reduced to a minimum //! of zero, at which point the detector is discarded, based on a set of @@ -52,19 +49,17 @@ const std::size_t PARTITION_INDEX = 3u; const std::size_t NUMBER_FIELD_INDICES = 4u; //! \brief Useful collections of field indices. -class CONFIG_EXPORT CFieldIndices -{ - public: - //! The detector partitioning fields, i.e. by, over and partition. - static const std::size_t PARTITIONING[3]; - - //! All detector fields. - static const std::size_t ALL[4]; +class CONFIG_EXPORT CFieldIndices { +public: + //! The detector partitioning fields, i.e. by, over and partition. + static const std::size_t PARTITIONING[3]; + + //! All detector fields. + static const std::size_t ALL[4]; }; //! The field name for \p index. -CONFIG_EXPORT const std::string &name(std::size_t index); - +CONFIG_EXPORT const std::string& name(std::size_t index); } } } diff --git a/include/config/ImportExport.h b/include/config/ImportExport.h index d803030eef..324209b94f 100644 --- a/include/config/ImportExport.h +++ b/include/config/ImportExport.h @@ -37,4 +37,3 @@ #endif #endif // INCLUDED_ml_config_ImportExport_h - diff --git a/include/core/BoostMultiIndex.h b/include/core/BoostMultiIndex.h index 0ab14d3cce..3224bf70ac 100644 --- a/include/core/BoostMultiIndex.h +++ b/include/core/BoostMultiIndex.h @@ -9,19 +9,18 @@ //! \brief //! Boost headers + extra MACRO for boost multi-index objects -#include #include #include #include -#include #include +#include #include #include #include +#include //! Define a member function that returns a const reference -#define BOOST_MULTI_INDEX_CONST_TYPE_CONST_MEM_FUN(Class,Type,MemberFunName) \ -::boost::multi_index::const_mem_fun< Class, const Type &, &Class::MemberFunName > +#define BOOST_MULTI_INDEX_CONST_TYPE_CONST_MEM_FUN(Class, Type, MemberFunName) \ + ::boost::multi_index::const_mem_fun #endif // INCLUDED_ml_core_BoostMultiIndex_h - diff --git a/include/core/CAllocationStrategy.h b/include/core/CAllocationStrategy.h index cba51b0923..61367fb01d 100644 --- a/include/core/CAllocationStrategy.h +++ b/include/core/CAllocationStrategy.h @@ -11,10 +11,8 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { //! \brief //! Container allocation strategy @@ -31,50 +29,42 @@ namespace core //! IMPLEMENTATION DECISIONS:\n //! Template class to work with different containers //! -class CORE_EXPORT CAllocationStrategy -{ - public: - //! Reserve a container working around implementation-specific silliness - template - static void reserve(T &t, std::size_t n) - { - t.reserve(n); - } +class CORE_EXPORT CAllocationStrategy { +public: + //! Reserve a container working around implementation-specific silliness + template + static void reserve(T& t, std::size_t n) { + t.reserve(n); + } - //! Resize a container using a 10% capacity increase - template - static void resize(T &t, std::size_t n) - { - if (n > t.capacity()) - { - CAllocationStrategy::reserve(t, n * 11 / 10); - } - t.resize(n); + //! Resize a container using a 10% capacity increase + template + static void resize(T& t, std::size_t n) { + if (n > t.capacity()) { + CAllocationStrategy::reserve(t, n * 11 / 10); } + t.resize(n); + } - //! Resize a container using a 10% capacity increase, with default value type - template - static void resize(T &t, std::size_t n, const typename T::value_type &v) - { - if (n > t.capacity()) - { - CAllocationStrategy::reserve(t, n * 11 / 10); - } - t.resize(n, v); + //! Resize a container using a 10% capacity increase, with default value type + template + static void resize(T& t, std::size_t n, const typename T::value_type& v) { + if (n > t.capacity()) { + CAllocationStrategy::reserve(t, n * 11 / 10); } + t.resize(n, v); + } - //! push_back an item to a container using a 10% capacity - //! increase - template - static void push_back(std::vector &v, const T &t) - { - std::size_t capacity = v.capacity(); - if (v.size() == capacity) - { - CAllocationStrategy::reserve(v, (capacity * 11 / 10) + 1); - } - v.push_back(t); + //! push_back an item to a container using a 10% capacity + //! increase + template + static void push_back(std::vector& v, const T& t) { + std::size_t capacity = v.capacity(); + if (v.size() == capacity) { + CAllocationStrategy::reserve(v, (capacity * 11 / 10) + 1); } + v.push_back(t); + } }; } // core diff --git a/include/core/CBase64Filter.h b/include/core/CBase64Filter.h index 7e4d991c73..6ff173258a 100644 --- a/include/core/CBase64Filter.h +++ b/include/core/CBase64Filter.h @@ -23,12 +23,9 @@ #include +namespace ml { -namespace ml -{ - -namespace core -{ +namespace core { //! \brief //! Convert a stream of bytes into Base64. //! @@ -74,106 +71,92 @@ namespace core //! ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER //! DEALINGS IN THE SOFTWARE. //! -class CORE_EXPORT CBase64Encoder -{ - public: - using TUInt8Buf = boost::circular_buffer; - using TUInt8BufItr = TUInt8Buf::iterator; - using TUInt8BufCItr = TUInt8Buf::const_iterator; - - using char_type = char; - - //! Tell boost::iostreams what this filter is capable of - struct category : - public boost::iostreams::output, - public boost::iostreams::filter_tag, - public boost::iostreams::multichar_tag, - public boost::iostreams::closable_tag - {}; - - public: - //! Constructor - CBase64Encoder(); - - //! Destructor - virtual ~CBase64Encoder(); - - //! Interface method for handling stream data: n bytes are available from s, - //! and output is written to snk. - //! Note that up to n bytes should be read if possible, but we don't report - //! here how many bytes were actually written to the stream, only how many - //! we actually consumed from s. - template - std::streamsize write(SINK &snk, const char_type *s, std::streamsize n) - { - // copy into the buffer while there is data to read and space in the buffer - std::streamsize done = 0; - while (done < n) - { - std::streamsize toCopy = std::min(std::streamsize(n - done), - std::streamsize(m_Buffer.capacity() - m_Buffer.size())); - m_Buffer.insert(m_Buffer.end(), s + done, s + done + toCopy); - done += toCopy; - this->Encode(snk, false); - } - LOG_TRACE("Base64 write " << n); - return n; - } - - //! Interface method for terminating this filter class - flush - //! any remaining bytes and pad the output if necessary. - template - void close(SINK &snk) - { - this->Encode(snk, true); +class CORE_EXPORT CBase64Encoder { +public: + using TUInt8Buf = boost::circular_buffer; + using TUInt8BufItr = TUInt8Buf::iterator; + using TUInt8BufCItr = TUInt8Buf::const_iterator; + + using char_type = char; + + //! Tell boost::iostreams what this filter is capable of + struct category : public boost::iostreams::output, + public boost::iostreams::filter_tag, + public boost::iostreams::multichar_tag, + public boost::iostreams::closable_tag {}; + +public: + //! Constructor + CBase64Encoder(); + + //! Destructor + virtual ~CBase64Encoder(); + + //! Interface method for handling stream data: n bytes are available from s, + //! and output is written to snk. + //! Note that up to n bytes should be read if possible, but we don't report + //! here how many bytes were actually written to the stream, only how many + //! we actually consumed from s. + template + std::streamsize write(SINK& snk, const char_type* s, std::streamsize n) { + // copy into the buffer while there is data to read and space in the buffer + std::streamsize done = 0; + while (done < n) { + std::streamsize toCopy = std::min(std::streamsize(n - done), std::streamsize(m_Buffer.capacity() - m_Buffer.size())); + m_Buffer.insert(m_Buffer.end(), s + done, s + done + toCopy); + done += toCopy; + this->Encode(snk, false); } - - private: - //! Do the actual work of encoding the data - take a chunck of buffered data and write - //! the converted output into the stream snk - template - void Encode(SINK &snk, bool isFinal) - { - using TUInt8BufCItrTransformItr = boost::archive::iterators::transform_width; - using TBase64Text = boost::archive::iterators::base64_from_binary; - - TUInt8BufItr endItr = m_Buffer.end(); - // Base64 turns 3 bytes into 4 characters - unless this is the final part - // of the string, we don't encode non-multiples of 3 - if (isFinal == false) - { - for (std::size_t i = (m_Buffer.size() % 3); i != 0; i--) - { - --endItr; - } + LOG_TRACE("Base64 write " << n); + return n; + } + + //! Interface method for terminating this filter class - flush + //! any remaining bytes and pad the output if necessary. + template + void close(SINK& snk) { + this->Encode(snk, true); + } + +private: + //! Do the actual work of encoding the data - take a chunck of buffered data and write + //! the converted output into the stream snk + template + void Encode(SINK& snk, bool isFinal) { + using TUInt8BufCItrTransformItr = boost::archive::iterators::transform_width; + using TBase64Text = boost::archive::iterators::base64_from_binary; + + TUInt8BufItr endItr = m_Buffer.end(); + // Base64 turns 3 bytes into 4 characters - unless this is the final part + // of the string, we don't encode non-multiples of 3 + if (isFinal == false) { + for (std::size_t i = (m_Buffer.size() % 3); i != 0; i--) { + --endItr; } + } - // Do the conversion - std::string e(TBase64Text(m_Buffer.begin()), TBase64Text(endItr)); + // Do the conversion + std::string e(TBase64Text(m_Buffer.begin()), TBase64Text(endItr)); - // Remove the encoded bytes from the buffer - m_Buffer.erase(m_Buffer.begin(), endItr); + // Remove the encoded bytes from the buffer + m_Buffer.erase(m_Buffer.begin(), endItr); - // Pad the final string if necessary - if (isFinal && !e.empty()) - { - std::size_t paddingCount = 4 - e.length() % 4; - for (std::size_t i = 0; i < paddingCount; i++) - { - e += '='; - } + // Pad the final string if necessary + if (isFinal && !e.empty()) { + std::size_t paddingCount = 4 - e.length() % 4; + for (std::size_t i = 0; i < paddingCount; i++) { + e += '='; } - LOG_TRACE("Encoded: " << e); - boost::iostreams::write(snk, e.c_str(), e.length()); } + LOG_TRACE("Encoded: " << e); + boost::iostreams::write(snk, e.c_str(), e.length()); + } - private: - //! Buffer the incoming stream data so that we can handle non-multiples of 3 - TUInt8Buf m_Buffer; - +private: + //! Buffer the incoming stream data so that we can handle non-multiples of 3 + TUInt8Buf m_Buffer; }; - //! \brief //! Convert a stream of Base64 characters to bytes //! @@ -219,189 +202,162 @@ class CORE_EXPORT CBase64Encoder //! ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER //! DEALINGS IN THE SOFTWARE. //! -class CORE_EXPORT CBase64Decoder -{ - public: - using TUInt8Buf = boost::circular_buffer; - using TUInt8BufItr = TUInt8Buf::iterator; - using TUInt8BufCItr = TUInt8Buf::const_iterator; - using TUInt8BufCRItr = TUInt8Buf::const_reverse_iterator; - using char_type = char; - - //! Tell boost::iostreams what this filter is capable of - struct category : - public boost::iostreams::input, - public boost::iostreams::filter_tag, - public boost::iostreams::multichar_tag, - public boost::iostreams::closable_tag - {}; - - public: - //! Constructor - CBase64Decoder(); - - //! Destructor - virtual ~CBase64Decoder(); - - //! Interface method: read as many bytes as we need from src, and - //! put up to n output bytes into s - //! The input bytes are buffered, decoded, and the decoded bytes - //! written to s. Note that we return the number of bytes written to - //! s, not the number of input bytes copied from src - template - std::streamsize read(SOURCE &src, char_type *s, std::streamsize n) - { - // copy into the buffer while there is data to read and space in the buffer - std::streamsize done = 0; - char buf[4096]; - while (done < n) - { - std::streamsize toCopy = std::min(std::streamsize(m_BufferOut.size()), std::streamsize(n - done)); - LOG_TRACE("Trying to copy " << toCopy << " bytes into stream, max " << n << ", available " << m_BufferOut.size()); - for (std::streamsize i = 0; i < toCopy; i++) - { - s[done++] = m_BufferOut.front(); - m_BufferOut.pop_front(); - } - LOG_TRACE("Eos: " << m_Eos << ", In: " << m_BufferIn.empty() << ", Out: " << m_BufferOut.empty()); - if (done == n) - { - break; - } - if ((done > 0) && m_BufferIn.empty() && m_BufferOut.empty() && m_Eos) - { - LOG_TRACE("Base64 READ " << done << ", from n " << n << ", left " << m_BufferOut.size() ); - return done; - } +class CORE_EXPORT CBase64Decoder { +public: + using TUInt8Buf = boost::circular_buffer; + using TUInt8BufItr = TUInt8Buf::iterator; + using TUInt8BufCItr = TUInt8Buf::const_iterator; + using TUInt8BufCRItr = TUInt8Buf::const_reverse_iterator; + using char_type = char; + + //! Tell boost::iostreams what this filter is capable of + struct category : public boost::iostreams::input, + public boost::iostreams::filter_tag, + public boost::iostreams::multichar_tag, + public boost::iostreams::closable_tag {}; + +public: + //! Constructor + CBase64Decoder(); + + //! Destructor + virtual ~CBase64Decoder(); + + //! Interface method: read as many bytes as we need from src, and + //! put up to n output bytes into s + //! The input bytes are buffered, decoded, and the decoded bytes + //! written to s. Note that we return the number of bytes written to + //! s, not the number of input bytes copied from src + template + std::streamsize read(SOURCE& src, char_type* s, std::streamsize n) { + // copy into the buffer while there is data to read and space in the buffer + std::streamsize done = 0; + char buf[4096]; + while (done < n) { + std::streamsize toCopy = std::min(std::streamsize(m_BufferOut.size()), std::streamsize(n - done)); + LOG_TRACE("Trying to copy " << toCopy << " bytes into stream, max " << n << ", available " << m_BufferOut.size()); + for (std::streamsize i = 0; i < toCopy; i++) { + s[done++] = m_BufferOut.front(); + m_BufferOut.pop_front(); + } + LOG_TRACE("Eos: " << m_Eos << ", In: " << m_BufferIn.empty() << ", Out: " << m_BufferOut.empty()); + if (done == n) { + break; + } + if ((done > 0) && m_BufferIn.empty() && m_BufferOut.empty() && m_Eos) { + LOG_TRACE("Base64 READ " << done << ", from n " << n << ", left " << m_BufferOut.size()); + return done; + } - // grab some data if we need it - if ((m_BufferIn.size() < 4) && (m_Eos == false)) - { - std::streamsize readBytes = boost::iostreams::read(src, buf, 4096); - LOG_TRACE("Read " << readBytes << " from input stream"); - if (readBytes == -1) - { - LOG_TRACE("Got EOS from underlying store"); - m_Eos = true; - } - else - { - for (std::streamsize i = 0; i < readBytes; i++) - { - // Only copy Base64 characters - JSON punctuation is ignored - // The dechunker parses JSON and should give us only base64 strings, - // but we don't want to try and decode anything which might cause - // the decoder to choke - switch(buf[i]) - { - case ']': - case '[': - case ',': - case '"': - case '{': - case '}': - case '\\': - case ' ': - case ':': - break; - - default: - m_BufferIn.push_back(static_cast(buf[i])); - break; - } + // grab some data if we need it + if ((m_BufferIn.size() < 4) && (m_Eos == false)) { + std::streamsize readBytes = boost::iostreams::read(src, buf, 4096); + LOG_TRACE("Read " << readBytes << " from input stream"); + if (readBytes == -1) { + LOG_TRACE("Got EOS from underlying store"); + m_Eos = true; + } else { + for (std::streamsize i = 0; i < readBytes; i++) { + // Only copy Base64 characters - JSON punctuation is ignored + // The dechunker parses JSON and should give us only base64 strings, + // but we don't want to try and decode anything which might cause + // the decoder to choke + switch (buf[i]) { + case ']': + case '[': + case ',': + case '"': + case '{': + case '}': + case '\\': + case ' ': + case ':': + break; + + default: + m_BufferIn.push_back(static_cast(buf[i])); + break; } } } - this->Decode(m_Eos); - if (m_Eos && m_BufferOut.empty() && m_BufferIn.empty() && (done == 0)) - { - LOG_TRACE("Returning -1 from read"); - return -1; - } } - LOG_TRACE("Base64 READ " << done << ", from n " << n << ", left " << m_BufferOut.size() ); - return done; + this->Decode(m_Eos); + if (m_Eos && m_BufferOut.empty() && m_BufferIn.empty() && (done == 0)) { + LOG_TRACE("Returning -1 from read"); + return -1; + } } - - //! Interface method - unused - template - void close(SOURCE &/*src*/) - { + LOG_TRACE("Base64 READ " << done << ", from n " << n << ", left " << m_BufferOut.size()); + return done; + } + + //! Interface method - unused + template + void close(SOURCE& /*src*/) {} + +private: + //! Perform the conversion from Base64 to raw bytes + void Decode(bool isFinal) { + // Base64 turns 4 characters into 3 bytes + using TUInt8BufCItrBinaryBase64Itr = boost::archive::iterators::binary_from_base64; + using TBase64Binary = boost::archive::iterators::transform_width; + + std::size_t inBytes = m_BufferIn.size(); + if (inBytes == 0) { + return; } - private: - - //! Perform the conversion from Base64 to raw bytes - void Decode(bool isFinal) - { - // Base64 turns 4 characters into 3 bytes - using TUInt8BufCItrBinaryBase64Itr = boost::archive::iterators::binary_from_base64; - using TBase64Binary = boost::archive::iterators::transform_width; - - std::size_t inBytes = m_BufferIn.size(); - if (inBytes == 0) - { + TUInt8BufItr endItr = m_BufferIn.end(); + std::size_t paddingBytes = 0; + // Only try and decode multiples of 4 characters, unless this is the last + // data in the stream + if (isFinal == false) { + if (inBytes < 4) { return; } - TUInt8BufItr endItr = m_BufferIn.end(); - std::size_t paddingBytes = 0; - // Only try and decode multiples of 4 characters, unless this is the last - // data in the stream - if (isFinal == false) - { - if (inBytes < 4) - { - return; - } - - for (std::size_t i = 0; i < inBytes % 4; i++) - { - LOG_TRACE("Ignoring end bytes of " << inBytes); - --endItr; - } + for (std::size_t i = 0; i < inBytes % 4; i++) { + LOG_TRACE("Ignoring end bytes of " << inBytes); + --endItr; } - else - { - // We can only work with 4 or more bytes, so with fewer there is something - // wrong, and there can't be a sensible outcome - if (inBytes < 4) - { - LOG_ERROR("Invalid size of stream for decoding: " << inBytes); - m_BufferIn.clear(); - return; - } + } else { + // We can only work with 4 or more bytes, so with fewer there is something + // wrong, and there can't be a sensible outcome + if (inBytes < 4) { + LOG_ERROR("Invalid size of stream for decoding: " << inBytes); + m_BufferIn.clear(); + return; } + } - // Check for padding characters - { - TUInt8BufCRItr i = m_BufferIn.rbegin(); - while ((i != m_BufferIn.rend()) && (*i == '=')) - { - ++i; - paddingBytes++; - } + // Check for padding characters + { + TUInt8BufCRItr i = m_BufferIn.rbegin(); + while ((i != m_BufferIn.rend()) && (*i == '=')) { + ++i; + paddingBytes++; } - LOG_TRACE("About to decode: " << std::string(m_BufferIn.begin(), endItr)); + } + LOG_TRACE("About to decode: " << std::string(m_BufferIn.begin(), endItr)); - m_BufferOut.insert(m_BufferOut.end(), TBase64Binary(m_BufferIn.begin()), TBase64Binary(endItr)); + m_BufferOut.insert(m_BufferOut.end(), TBase64Binary(m_BufferIn.begin()), TBase64Binary(endItr)); - // Remove padding bytes off the back of the stream - m_BufferOut.erase_end(paddingBytes); + // Remove padding bytes off the back of the stream + m_BufferOut.erase_end(paddingBytes); - // Remove the encoded bytes from the buffer - m_BufferIn.erase(m_BufferIn.begin(), endItr); - } + // Remove the encoded bytes from the buffer + m_BufferIn.erase(m_BufferIn.begin(), endItr); + } - private: - //! The input buffer - TUInt8Buf m_BufferIn; +private: + //! The input buffer + TUInt8Buf m_BufferIn; - //! The output buffer - TUInt8Buf m_BufferOut; + //! The output buffer + TUInt8Buf m_BufferOut; - //! Have we read all the available data from the downstream stream - bool m_Eos; + //! Have we read all the available data from the downstream stream + bool m_Eos; }; } // core diff --git a/include/core/CBlockingMessageQueue.h b/include/core/CBlockingMessageQueue.h index 7a2522dcf9..188d824cdd 100644 --- a/include/core/CBlockingMessageQueue.h +++ b/include/core/CBlockingMessageQueue.h @@ -17,11 +17,8 @@ #include -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! A thread safe fixed size message queue. @@ -52,248 +49,208 @@ namespace core //! The maximum queue size is specified as a template //! parameter, and hence cannot be changed at runtime. //! -template -class CBlockingMessageQueue -{ - public: - //! Prototype for function to be called on queue shutdown - using TShutdownFunc = std::function; - - public: - CBlockingMessageQueue(RECEIVER &receiver, - const TShutdownFunc &shutdownFunc = &CBlockingMessageQueue::defaultShutdownFunc) - : m_Thread(*this), - m_ProducerCondition(m_Mutex), - m_ConsumerCondition(m_Mutex), - m_Receiver(receiver), - m_Queue(QUEUE_CAPACITY), - m_ShutdownFunc(shutdownFunc) - { +template +class CBlockingMessageQueue { +public: + //! Prototype for function to be called on queue shutdown + using TShutdownFunc = std::function; + +public: + CBlockingMessageQueue(RECEIVER& receiver, const TShutdownFunc& shutdownFunc = &CBlockingMessageQueue::defaultShutdownFunc) + : m_Thread(*this), + m_ProducerCondition(m_Mutex), + m_ConsumerCondition(m_Mutex), + m_Receiver(receiver), + m_Queue(QUEUE_CAPACITY), + m_ShutdownFunc(shutdownFunc) {} + + virtual ~CBlockingMessageQueue() {} + + //! Initialise - create the receiving thread + bool start() { + CScopedLock lock(m_Mutex); + + if (m_Thread.start() == false) { + LOG_ERROR("Unable to initialise thread"); + return false; } - virtual ~CBlockingMessageQueue() - { - } + m_ProducerCondition.wait(); - //! Initialise - create the receiving thread - bool start() - { - CScopedLock lock(m_Mutex); + return true; + } - if (m_Thread.start() == false) - { - LOG_ERROR("Unable to initialise thread"); - return false; - } + //! Shutdown - kill thread + bool stop() { + m_Thread.stop(); - m_ProducerCondition.wait(); + return true; + } - return true; - } + //! Send a message to the message queue thread (from any thread) + void dispatchMsg(const MESSAGE& msg) { + size_t dummy(0); + this->dispatchMsg(msg, dummy); + } - //! Shutdown - kill thread - bool stop() - { - m_Thread.stop(); + //! Send a message to the message queue thread (from any thread), + //! and get the pending count at the same time + void dispatchMsg(const MESSAGE& msg, size_t& pending) { + CScopedLock lock(m_Mutex); - return true; - } + if (!m_Thread.isRunning()) { + pending = 0; - //! Send a message to the message queue thread (from any thread) - void dispatchMsg(const MESSAGE &msg) - { - size_t dummy(0); - this->dispatchMsg(msg, dummy); + // Should be fatal error + LOG_FATAL("Cannot dispatch to message queue. Queue not initialised"); + return; } - //! Send a message to the message queue thread (from any thread), - //! and get the pending count at the same time - void dispatchMsg(const MESSAGE &msg, size_t &pending) - { - CScopedLock lock(m_Mutex); + for (;;) { + // The pending count includes the item to be added + pending = 1 + m_Queue.size(); - if (!m_Thread.isRunning()) - { - pending = 0; - - // Should be fatal error - LOG_FATAL("Cannot dispatch to message queue. Queue not initialised"); - return; + if (pending <= QUEUE_CAPACITY) { + break; } - for (;;) - { - // The pending count includes the item to be added - pending = 1 + m_Queue.size(); - - if (pending <= QUEUE_CAPACITY) - { - break; - } - - m_ProducerCondition.wait(); - } - - m_Queue.push_back(msg); - - // If there was already work queued up, we can save the cost of - // signalling (which is expensive as it involves kernel interaction) - if (pending <= 1) - { - m_ConsumerCondition.signal(); - } + m_ProducerCondition.wait(); } - //! Get the number of pending messages in the queue. Note that it's - //! much more efficient to get this when dispatching a message, as - //! everything can then be done under a single mutex lock. This method - //! must be used sparingly to avoid excessive lock contention. - size_t pending() const - { - CScopedLock lock(m_Mutex); + m_Queue.push_back(msg); - return m_Queue.size(); + // If there was already work queued up, we can save the cost of + // signalling (which is expensive as it involves kernel interaction) + if (pending <= 1) { + m_ConsumerCondition.signal(); } + } - private: - //! No-op shutdown function if no other is provided - static void defaultShutdownFunc() - { - } + //! Get the number of pending messages in the queue. Note that it's + //! much more efficient to get this when dispatching a message, as + //! everything can then be done under a single mutex lock. This method + //! must be used sparingly to avoid excessive lock contention. + size_t pending() const { + CScopedLock lock(m_Mutex); - private: - class CMessageQueueThread : public CThread - { - public: - CMessageQueueThread(CBlockingMessageQueue &messageQueue) - : m_MessageQueue(messageQueue), - m_ShuttingDown(false), - m_IsRunning(false) - { - } + return m_Queue.size(); + } - //! The queue must have the mutex for this to be called - bool isRunning() const - { - // Assumes lock - return m_IsRunning; - } +private: + //! No-op shutdown function if no other is provided + static void defaultShutdownFunc() {} - protected: - void run() - { - m_MessageQueue.m_Mutex.lock(); - m_MessageQueue.m_ProducerCondition.signal(); +private: + class CMessageQueueThread : public CThread { + public: + CMessageQueueThread(CBlockingMessageQueue& messageQueue) + : m_MessageQueue(messageQueue), m_ShuttingDown(false), m_IsRunning(false) {} + + //! The queue must have the mutex for this to be called + bool isRunning() const { + // Assumes lock + return m_IsRunning; + } - m_IsRunning = true; + protected: + void run() { + m_MessageQueue.m_Mutex.lock(); + m_MessageQueue.m_ProducerCondition.signal(); - while (!m_ShuttingDown) - { - m_MessageQueue.m_ConsumerCondition.wait(); + m_IsRunning = true; - while (!m_MessageQueue.m_Queue.empty()) - { - MESSAGE &msg = m_MessageQueue.m_Queue.front(); + while (!m_ShuttingDown) { + m_MessageQueue.m_ConsumerCondition.wait(); - // Don't include the current work item in the backlog - size_t backlog(m_MessageQueue.m_Queue.size() - 1); + while (!m_MessageQueue.m_Queue.empty()) { + MESSAGE& msg = m_MessageQueue.m_Queue.front(); - m_MessageQueue.m_Mutex.unlock(); + // Don't include the current work item in the backlog + size_t backlog(m_MessageQueue.m_Queue.size() - 1); - m_MessageQueue.m_Receiver.processMsg(msg, backlog); + m_MessageQueue.m_Mutex.unlock(); - // Try to do as much deletion as possible outside - // the lock, so the pop_front() below is cheap - this->destroyMsgDataUnlocked(msg); + m_MessageQueue.m_Receiver.processMsg(msg, backlog); - m_MessageQueue.m_Mutex.lock(); + // Try to do as much deletion as possible outside + // the lock, so the pop_front() below is cheap + this->destroyMsgDataUnlocked(msg); - // If the queue was full, signal a thread waiting to - // add data - if (m_MessageQueue.m_Queue.size() == QUEUE_CAPACITY) - { - // Only do this if the queue is full, as it - // involves kernel interaction, so is expensive - m_MessageQueue.m_ProducerCondition.signal(); - } + m_MessageQueue.m_Mutex.lock(); - m_MessageQueue.m_Queue.pop_front(); - } + // If the queue was full, signal a thread waiting to + // add data + if (m_MessageQueue.m_Queue.size() == QUEUE_CAPACITY) { + // Only do this if the queue is full, as it + // involves kernel interaction, so is expensive + m_MessageQueue.m_ProducerCondition.signal(); } - m_IsRunning = false; + m_MessageQueue.m_Queue.pop_front(); + } + } - m_MessageQueue.m_ShutdownFunc(); + m_IsRunning = false; - m_MessageQueue.m_Mutex.unlock(); - } + m_MessageQueue.m_ShutdownFunc(); - void shutdown() - { - CScopedLock lock(m_MessageQueue.m_Mutex); + m_MessageQueue.m_Mutex.unlock(); + } - m_ShuttingDown = true; - m_MessageQueue.m_ConsumerCondition.signal(); - m_MessageQueue.m_ProducerCondition.broadcast(); - } + void shutdown() { + CScopedLock lock(m_MessageQueue.m_Mutex); - private: - //! It's best to minimise work done while the mutex is locked, - //! so ideally we'll clean up the MESSAGE object as much as - //! possible outside the lock. This is the most generic case, - //! where we can't do anything. - template - void destroyMsgDataUnlocked(ANYTHING &) - { - // For an arbitrary type we have no idea how to destroy some - // of its data without calling its destructor - } + m_ShuttingDown = true; + m_MessageQueue.m_ConsumerCondition.signal(); + m_MessageQueue.m_ProducerCondition.broadcast(); + } - //! Specialisation of the above that might delete the referenced - //! data if the MESSAGE type is a shared pointer (if no other - //! shared pointer points to it). - template - void destroyMsgDataUnlocked(boost::shared_ptr &ptr) - { - ptr.reset(); - } + private: + //! It's best to minimise work done while the mutex is locked, + //! so ideally we'll clean up the MESSAGE object as much as + //! possible outside the lock. This is the most generic case, + //! where we can't do anything. + template + void destroyMsgDataUnlocked(ANYTHING&) { + // For an arbitrary type we have no idea how to destroy some + // of its data without calling its destructor + } - // Other specialisations could potentially be added here + //! Specialisation of the above that might delete the referenced + //! data if the MESSAGE type is a shared pointer (if no other + //! shared pointer points to it). + template + void destroyMsgDataUnlocked(boost::shared_ptr& ptr) { + ptr.reset(); + } - private: - CBlockingMessageQueue &m_MessageQueue; - bool m_ShuttingDown; - bool m_IsRunning; - }; + // Other specialisations could potentially be added here - CMessageQueueThread m_Thread; - mutable CMutex m_Mutex; - CCondition m_ProducerCondition; - CCondition m_ConsumerCondition; - RECEIVER &m_Receiver; + private: + CBlockingMessageQueue& m_MessageQueue; + bool m_ShuttingDown; + bool m_IsRunning; + }; + + CMessageQueueThread m_Thread; + mutable CMutex m_Mutex; + CCondition m_ProducerCondition; + CCondition m_ConsumerCondition; + RECEIVER& m_Receiver; - //! Using a circular buffer for the queue means that it will not do any - //! memory allocations after construction (providing the message type - //! does not allocate any heap memory in its constructor). - using TMessageCircBuf = boost::circular_buffer; + //! Using a circular buffer for the queue means that it will not do any + //! memory allocations after construction (providing the message type + //! does not allocate any heap memory in its constructor). + using TMessageCircBuf = boost::circular_buffer; - TMessageCircBuf m_Queue; + TMessageCircBuf m_Queue; - //! Function to be called on queue shutdown - TShutdownFunc m_ShutdownFunc; + //! Function to be called on queue shutdown + TShutdownFunc m_ShutdownFunc; friend class CMessageQueueThread; }; - - } } #endif // INCLUDED_ml_core_CBlockingMessageQueue_h - diff --git a/include/core/CBufferFlushTimer.h b/include/core/CBufferFlushTimer.h index fdd109c1e3..7697564f8a 100644 --- a/include/core/CBufferFlushTimer.h +++ b/include/core/CBufferFlushTimer.h @@ -9,11 +9,8 @@ #include #include - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { //! \brief //! Class to hide the complexity of establishing when @@ -34,24 +31,20 @@ namespace core //! To avoid the method will return maxtime if //! maxTime hasn't changed for bufferDelay clock seconds. //! -class CORE_EXPORT CBufferFlushTimer -{ - public: - CBufferFlushTimer(); +class CORE_EXPORT CBufferFlushTimer { +public: + CBufferFlushTimer(); - core_t::TTime flushTime(core_t::TTime bufferDelay, - core_t::TTime bufferMaxTime); + core_t::TTime flushTime(core_t::TTime bufferDelay, core_t::TTime bufferMaxTime); - private: - //! The last reported 'max time' - core_t::TTime m_LastMaxTime; +private: + //! The last reported 'max time' + core_t::TTime m_LastMaxTime; - //! The last actual clock time of the flush - core_t::TTime m_LastFlushTime; + //! The last actual clock time of the flush + core_t::TTime m_LastFlushTime; }; - } } #endif // INCLUDED_ml_core_CBufferFlushTimer_h - diff --git a/include/core/CByteSwapper.h b/include/core/CByteSwapper.h index ba1ece6025..459f60a17f 100644 --- a/include/core/CByteSwapper.h +++ b/include/core/CByteSwapper.h @@ -13,12 +13,8 @@ #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! Swap the order of bytes in a variable. @@ -37,25 +33,20 @@ namespace core //! to the caller to ensure it's only called for types where //! it's sensible, e.g. builtin numeric types. //! -class CORE_EXPORT CByteSwapper : private CNonInstantiatable -{ - public: - template - static TYPE swapBytes(TYPE var) - { - void *varAddress(&var); - uint8_t *begin(static_cast(varAddress)); - uint8_t *end(begin + sizeof(var)); - - std::reverse(begin, end); - - return var; - } +class CORE_EXPORT CByteSwapper : private CNonInstantiatable { +public: + template + static TYPE swapBytes(TYPE var) { + void* varAddress(&var); + uint8_t* begin(static_cast(varAddress)); + uint8_t* end(begin + sizeof(var)); + + std::reverse(begin, end); + + return var; + } }; - - } } #endif // INCLUDED_ml_core_CByteSwapper_h - diff --git a/include/core/CCTimeR.h b/include/core/CCTimeR.h index e29cd8eaaa..37329c1ffd 100644 --- a/include/core/CCTimeR.h +++ b/include/core/CCTimeR.h @@ -11,12 +11,8 @@ #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! Portable wrapper for the ctime_r() function. @@ -29,15 +25,11 @@ namespace core //! ctime_s() function with slightly different semantics to Unix's //! ctime_r(). //! -class CORE_EXPORT CCTimeR : private CNonInstantiatable -{ - public: - static char *cTimeR(const time_t *clock, char *result); +class CORE_EXPORT CCTimeR : private CNonInstantiatable { +public: + static char* cTimeR(const time_t* clock, char* result); }; - - } } #endif // INCLUDED_ml_core_CCTimeR_h - diff --git a/include/core/CCompressOStream.h b/include/core/CCompressOStream.h index 679b40c087..ef57806d69 100644 --- a/include/core/CCompressOStream.h +++ b/include/core/CCompressOStream.h @@ -11,10 +11,8 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { //! \brief //! An output stream that writes to a boost filtering_stream endpoint. @@ -30,53 +28,49 @@ namespace core //! manages the buffering of data between the client thread and //! the upload thread. //! -class CORE_EXPORT CCompressOStream : public std::ostream -{ - public: - //! Constructor - CCompressOStream(CStateCompressor::CChunkFilter &filter); +class CORE_EXPORT CCompressOStream : public std::ostream { +public: + //! Constructor + CCompressOStream(CStateCompressor::CChunkFilter& filter); - //! Destructor will close the stream - virtual ~CCompressOStream(); + //! Destructor will close the stream + virtual ~CCompressOStream(); - //! Close the stream - void close(); + //! Close the stream + void close(); - private: - class CCompressThread : public CThread - { - public: - CCompressThread(CCompressOStream &stream, - CDualThreadStreamBuf &streamBuf, - CStateCompressor::CChunkFilter &filter); +private: + class CCompressThread : public CThread { + public: + CCompressThread(CCompressOStream& stream, CDualThreadStreamBuf& streamBuf, CStateCompressor::CChunkFilter& filter); - protected: - //! Implementation of inherited interface - virtual void run(); - virtual void shutdown(); + protected: + //! Implementation of inherited interface + virtual void run(); + virtual void shutdown(); - public: - //! Reference to the owning stream - CCompressOStream &m_Stream; + public: + //! Reference to the owning stream + CCompressOStream& m_Stream; - //! Reference to the owning stream's buffer - CDualThreadStreamBuf &m_StreamBuf; + //! Reference to the owning stream's buffer + CDualThreadStreamBuf& m_StreamBuf; - private: - //! Reference to the output sink - this handles - //! downstream writing to datastore - CStateCompressor::CChunkFilter &m_FilterSink; + private: + //! Reference to the output sink - this handles + //! downstream writing to datastore + CStateCompressor::CChunkFilter& m_FilterSink; - //! The gzip filter to live within the new thread - CStateCompressor::TFilteredOutput m_OutFilter; - }; + //! The gzip filter to live within the new thread + CStateCompressor::TFilteredOutput m_OutFilter; + }; - private: - //! The stream buffer - CDualThreadStreamBuf m_StreamBuf; +private: + //! The stream buffer + CDualThreadStreamBuf m_StreamBuf; - //! Thread used for the upload - CCompressThread m_UploadThread; + //! Thread used for the upload + CCompressThread m_UploadThread; }; } // core diff --git a/include/core/CCompressUtils.h b/include/core/CCompressUtils.h index 20279df60b..e64a9355d1 100644 --- a/include/core/CCompressUtils.h +++ b/include/core/CCompressUtils.h @@ -14,11 +14,8 @@ #include #include - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { //! \brief //! Shrink wrap zlib calls. @@ -38,67 +35,58 @@ namespace core //! a multi-threaded application it would be best to create //! one object for each thread. //! -class CORE_EXPORT CCompressUtils : private CNonCopyable -{ - public: - //! The output type - using TByteVec = std::vector; - - public: - explicit CCompressUtils(bool lengthOnly, - int level = Z_DEFAULT_COMPRESSION); - ~CCompressUtils(); - - //! Add a string. Multiple calls to this function without finishing the - //! compression are equivalent to compressing the concatenation of the - //! strings passed in the order they are passed. - bool addString(const std::string &input); - - //! Get compressed representation. This will fail if the lengthOnly - //! constructor argument was set to true. - //! - //! \note The compressed representation is a byte array NOT a string, - //! and hence not printable. - //! - //! If finish==false then retrieve partial compressed state. - bool compressedData(bool finish, TByteVec &result); - - //! Get compressed string length. - //! - //! If finish==false then retrieve partial compressed length. - bool compressedLength(bool finish, size_t &length); - - //! Reset the compressor. This will happen automatically when adding a - //! new string after having finished the previous compression, but - //! sometimes, for example when recovering from an error, it may be - //! desirable to explicitly reset the compressor state. - void reset(); - - private: - bool doCompress(bool finish, const std::string &input); - - private: - enum EState - { - E_Unused, - E_Compressing, - E_Finished - }; - - EState m_State; - - //! Is this object only fit for getting compressed lengths? - bool m_LengthOnly; - - //! The output buffer when the compressed result is being stored - TByteVec m_FullResult; - - //! The zlib data structure. - z_stream m_ZlibStrm; +class CORE_EXPORT CCompressUtils : private CNonCopyable { +public: + //! The output type + using TByteVec = std::vector; + +public: + explicit CCompressUtils(bool lengthOnly, int level = Z_DEFAULT_COMPRESSION); + ~CCompressUtils(); + + //! Add a string. Multiple calls to this function without finishing the + //! compression are equivalent to compressing the concatenation of the + //! strings passed in the order they are passed. + bool addString(const std::string& input); + + //! Get compressed representation. This will fail if the lengthOnly + //! constructor argument was set to true. + //! + //! \note The compressed representation is a byte array NOT a string, + //! and hence not printable. + //! + //! If finish==false then retrieve partial compressed state. + bool compressedData(bool finish, TByteVec& result); + + //! Get compressed string length. + //! + //! If finish==false then retrieve partial compressed length. + bool compressedLength(bool finish, size_t& length); + + //! Reset the compressor. This will happen automatically when adding a + //! new string after having finished the previous compression, but + //! sometimes, for example when recovering from an error, it may be + //! desirable to explicitly reset the compressor state. + void reset(); + +private: + bool doCompress(bool finish, const std::string& input); + +private: + enum EState { E_Unused, E_Compressing, E_Finished }; + + EState m_State; + + //! Is this object only fit for getting compressed lengths? + bool m_LengthOnly; + + //! The output buffer when the compressed result is being stored + TByteVec m_FullResult; + + //! The zlib data structure. + z_stream m_ZlibStrm; }; - } } #endif // INCLUDED_ml_core_CCompressUtils_h - diff --git a/include/core/CCompressedDictionary.h b/include/core/CCompressedDictionary.h index 2fe1cfa1b6..76307ffbcd 100644 --- a/include/core/CCompressedDictionary.h +++ b/include/core/CCompressedDictionary.h @@ -22,11 +22,8 @@ #include - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { //! \brief A space efficient representation of a dictionary. //! @@ -43,229 +40,176 @@ namespace core //! This uses murmur hash 2 behind the scenes because its distributions //! are good enough and it is very fast to compute. template -class CCompressedDictionary -{ +class CCompressedDictionary { +public: + using TUInt64Array = boost::array; + using TStrCPtr = const std::string*; + + //! \brief A hash representation of a string in the dictionary + //! with low probability of collision even for relatively large + //! dictionaries. + //! + //! DESCRIPTION:\n + //! We use N near independent 64 bit hashes of the strings. + //! Assuming the distributions are uniform (which is a reasonable + //! assumption, see below), the probability of a collision in + //! any of the hashes is: + //!
+    //!   \f$\displaystyle P = 1 - \prod_{i=0}^{n}{\left(1 - \frac{i}{2^{64}}\right)}\f$
+    //! 
+ //! (See http://en.wikipedia.org/wiki/Birthday_problem.) + //! + //! It is relatively straightforward to show that for large + //! \f$n\f$ this is approximately: + //!
+    //!   \f$\displaystyle P = 1 - \exp(\frac{-n^2}{2^{64}})\f$
+    //! 
+ //! + //! So, for example, if \f$n = 2^{28}\f$, which would be a + //! dictionary containing 268 million words the probability + //! of a single hash colliding is around 0.004. So for N + //! independent hashes we get a probability of all hashes + //! colliding of \f$0.004^N\f$. If N is 2 the probability of + //! a collision is \f$1.5 \times 10^{-5}\f$. + class CWord : private boost::equality_comparable1> { public: - using TUInt64Array = boost::array; - using TStrCPtr = const std::string*; - - //! \brief A hash representation of a string in the dictionary - //! with low probability of collision even for relatively large - //! dictionaries. - //! - //! DESCRIPTION:\n - //! We use N near independent 64 bit hashes of the strings. - //! Assuming the distributions are uniform (which is a reasonable - //! assumption, see below), the probability of a collision in - //! any of the hashes is: - //!
-        //!   \f$\displaystyle P = 1 - \prod_{i=0}^{n}{\left(1 - \frac{i}{2^{64}}\right)}\f$
-        //! 
- //! (See http://en.wikipedia.org/wiki/Birthday_problem.) - //! - //! It is relatively straightforward to show that for large - //! \f$n\f$ this is approximately: - //!
-        //!   \f$\displaystyle P = 1 - \exp(\frac{-n^2}{2^{64}})\f$
-        //! 
- //! - //! So, for example, if \f$n = 2^{28}\f$, which would be a - //! dictionary containing 268 million words the probability - //! of a single hash colliding is around 0.004. So for N - //! independent hashes we get a probability of all hashes - //! colliding of \f$0.004^N\f$. If N is 2 the probability of - //! a collision is \f$1.5 \times 10^{-5}\f$. - class CWord : private boost::equality_comparable1 > - { - public: - //! See CMemory. - static bool dynamicSizeAlwaysZero() { return true; } - - //! Used for converting to and from a delimited string. - static const char DELIMITER; - - public: - CWord() - { - std::fill(m_Hash.begin(), m_Hash.end(), 0); - } - CWord(const TUInt64Array &hash) : m_Hash(hash) {} - - bool operator==(const CWord &other) const - { - return m_Hash == other.m_Hash; - } - - bool operator<(const CWord &rhs) const - { - return m_Hash < rhs.m_Hash; - } - - bool fromDelimited(const std::string &str) - { - // expect N strings, separated by commas - std::size_t n = 0; - std::size_t pos = 0; - std::size_t comma = 0; - while ((comma = str.find(DELIMITER, pos)) != std::string::npos && n < N) - { - CStringUtils::stringToType(str.substr(pos, comma - pos), m_Hash[n++]); - pos = comma + 1; - } - if (n < N) - { - CStringUtils::stringToType(str.substr(pos, comma - pos), m_Hash[n++]); - } - return n == N; - } - - std::string toDelimited() const - { - std::string result = CStringUtils::typeToString(m_Hash[0]); - for (std::size_t i = 1; i < N; ++i) - { - result += DELIMITER; - result += CStringUtils::typeToString(m_Hash[i]); - } - return result; - } - - std::size_t hash() const - { - return static_cast(m_Hash[0]); - } - - uint64_t hash64() const - { - return m_Hash[0]; - } - - std::string print() const - { - return CContainerPrinter::print(m_Hash); - } - - private: - TUInt64Array m_Hash; - }; - - //! \brief A fast hash of a dictionary word. - class CHash : public std::unary_function - { - public: - inline std::size_t operator()(const CWord &word) const - { - return word.hash(); - } - }; - - //! The type of an ordered set of words. - using TWordSet = std::set; - - //! The type of an unordered set of words. - using TWordUSet = boost::unordered_set; - - //! A "template typedef" of an ordered map from words to - //! objects of type T. - template - class CWordMap - { - public: - using Type = std::map; - }; - - //! A "template typedef" of an unordered map from words to - //! objects of type T. - template - class CWordUMap - { - public: - using Type = boost::unordered_map; - }; + //! See CMemory. + static bool dynamicSizeAlwaysZero() { return true; } + + //! Used for converting to and from a delimited string. + static const char DELIMITER; public: - CCompressedDictionary() - { - // 472882027 and 982451653 are prime numbers, so repeatedly - // multiplying them will give many distinct seeds modulo 2 ^ 64. - // This method of seed generation was chosen in preference - // to using uniform random numbers as the Boost random number - // generators can generate different numbers on different - // platforms, even with the same random number generator seed. - m_Seeds[0] = 472882027; - for (std::size_t i = 1u; i < N; ++i) - { - m_Seeds[i] = m_Seeds[i - 1] * 982451653ull; + CWord() { std::fill(m_Hash.begin(), m_Hash.end(), 0); } + CWord(const TUInt64Array& hash) : m_Hash(hash) {} + + bool operator==(const CWord& other) const { return m_Hash == other.m_Hash; } + + bool operator<(const CWord& rhs) const { return m_Hash < rhs.m_Hash; } + + bool fromDelimited(const std::string& str) { + // expect N strings, separated by commas + std::size_t n = 0; + std::size_t pos = 0; + std::size_t comma = 0; + while ((comma = str.find(DELIMITER, pos)) != std::string::npos && n < N) { + CStringUtils::stringToType(str.substr(pos, comma - pos), m_Hash[n++]); + pos = comma + 1; + } + if (n < N) { + CStringUtils::stringToType(str.substr(pos, comma - pos), m_Hash[n++]); } + return n == N; } - //! Extract the dictionary word corresponding to \p word. - CWord word(const std::string &word) const - { - TUInt64Array hash; - for (std::size_t i = 0u; i < N; ++i) - { - hash[i] = CHashing::safeMurmurHash64(word.c_str(), - static_cast(word.size()), - m_Seeds[i]); + std::string toDelimited() const { + std::string result = CStringUtils::typeToString(m_Hash[0]); + for (std::size_t i = 1; i < N; ++i) { + result += DELIMITER; + result += CStringUtils::typeToString(m_Hash[i]); } - return CWord(hash); + return result; } - //! Extract the dictionary word corresponding to (\p word1, \p word2). - CWord word(const std::string &word1, - const std::string &word2) const - { - TStrCPtr words[] = {&word1, &word2}; - return this->word(words); - } + std::size_t hash() const { return static_cast(m_Hash[0]); } - //! Extract the dictionary word corresponding to (\p word1, \p word2, \p word3). - CWord word(const std::string &word1, - const std::string &word2, - const std::string &word3) const - { - TStrCPtr words[] = {&word1, &word2, &word3}; - return this->word(words); - } + uint64_t hash64() const { return m_Hash[0]; } - //! Extract the dictionary word corresponding to (\p word1, \p word2, \p word3, \p word4). - CWord word(const std::string &word1, - const std::string &word2, - const std::string &word3, - const std::string &word4) const - { - TStrCPtr words[] = {&word1, &word2, &word3, &word4}; - return this->word(words); - } + std::string print() const { return CContainerPrinter::print(m_Hash); } private: - template - CWord word(const TStrCPtr (&words)[NUMBER_OF_WORDS]) const - { - TUInt64Array hashes; - for (std::size_t i = 0u; i < N; ++i) - { - uint64_t &hash = hashes[i]; - for (std::size_t wordIndex = 0; wordIndex < NUMBER_OF_WORDS; ++wordIndex) - { - const std::string &word = *words[wordIndex]; - hash = CHashing::safeMurmurHash64(word.c_str(), - static_cast(word.size()), - (wordIndex) == 0 ? m_Seeds[i] : hash); - } + TUInt64Array m_Hash; + }; + + //! \brief A fast hash of a dictionary word. + class CHash : public std::unary_function { + public: + inline std::size_t operator()(const CWord& word) const { return word.hash(); } + }; + + //! The type of an ordered set of words. + using TWordSet = std::set; + + //! The type of an unordered set of words. + using TWordUSet = boost::unordered_set; + + //! A "template typedef" of an ordered map from words to + //! objects of type T. + template + class CWordMap { + public: + using Type = std::map; + }; + + //! A "template typedef" of an unordered map from words to + //! objects of type T. + template + class CWordUMap { + public: + using Type = boost::unordered_map; + }; + +public: + CCompressedDictionary() { + // 472882027 and 982451653 are prime numbers, so repeatedly + // multiplying them will give many distinct seeds modulo 2 ^ 64. + // This method of seed generation was chosen in preference + // to using uniform random numbers as the Boost random number + // generators can generate different numbers on different + // platforms, even with the same random number generator seed. + m_Seeds[0] = 472882027; + for (std::size_t i = 1u; i < N; ++i) { + m_Seeds[i] = m_Seeds[i - 1] * 982451653ull; + } + } + + //! Extract the dictionary word corresponding to \p word. + CWord word(const std::string& word) const { + TUInt64Array hash; + for (std::size_t i = 0u; i < N; ++i) { + hash[i] = CHashing::safeMurmurHash64(word.c_str(), static_cast(word.size()), m_Seeds[i]); + } + return CWord(hash); + } + + //! Extract the dictionary word corresponding to (\p word1, \p word2). + CWord word(const std::string& word1, const std::string& word2) const { + TStrCPtr words[] = {&word1, &word2}; + return this->word(words); + } + + //! Extract the dictionary word corresponding to (\p word1, \p word2, \p word3). + CWord word(const std::string& word1, const std::string& word2, const std::string& word3) const { + TStrCPtr words[] = {&word1, &word2, &word3}; + return this->word(words); + } + + //! Extract the dictionary word corresponding to (\p word1, \p word2, \p word3, \p word4). + CWord word(const std::string& word1, const std::string& word2, const std::string& word3, const std::string& word4) const { + TStrCPtr words[] = {&word1, &word2, &word3, &word4}; + return this->word(words); + } + +private: + template + CWord word(const TStrCPtr (&words)[NUMBER_OF_WORDS]) const { + TUInt64Array hashes; + for (std::size_t i = 0u; i < N; ++i) { + uint64_t& hash = hashes[i]; + for (std::size_t wordIndex = 0; wordIndex < NUMBER_OF_WORDS; ++wordIndex) { + const std::string& word = *words[wordIndex]; + hash = CHashing::safeMurmurHash64(word.c_str(), static_cast(word.size()), (wordIndex) == 0 ? m_Seeds[i] : hash); } - return CWord(hashes); } + return CWord(hashes); + } - private: - TUInt64Array m_Seeds; +private: + TUInt64Array m_Seeds; }; template const char CCompressedDictionary::CWord::DELIMITER(','); - } } diff --git a/include/core/CConcurrentQueue.h b/include/core/CConcurrentQueue.h index 4ee1a3f615..c1eef24fde 100644 --- a/include/core/CConcurrentQueue.h +++ b/include/core/CConcurrentQueue.h @@ -6,18 +6,16 @@ #ifndef INCLUDED_ml_core_CConcurrentQueue_h #define INCLUDED_ml_core_CConcurrentQueue_h -#include #include +#include #include #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { //! \brief //! A thread safe concurrent queue. @@ -34,114 +32,94 @@ namespace core //! @tparam QUEUE_CAPACITY fixed queue capacity //! @tparam NOTIFY_CAPACITY special parameter, for signaling the producer in blocking case template -class CConcurrentQueue final: private CNonCopyable -{ - public: - CConcurrentQueue() - : m_Queue(QUEUE_CAPACITY) - { - static_assert(NOTIFY_CAPACITY > 0, "NOTIFY_CAPACITY must be positive"); - static_assert(QUEUE_CAPACITY >= NOTIFY_CAPACITY, "QUEUE_CAPACITY cannot be less than NOTIFY_CAPACITY"); +class CConcurrentQueue final : private CNonCopyable { +public: + CConcurrentQueue() : m_Queue(QUEUE_CAPACITY) { + static_assert(NOTIFY_CAPACITY > 0, "NOTIFY_CAPACITY must be positive"); + static_assert(QUEUE_CAPACITY >= NOTIFY_CAPACITY, "QUEUE_CAPACITY cannot be less than NOTIFY_CAPACITY"); + } + + //! Pop an item out of the queue, this blocks until an item is available + T pop() { + std::unique_lock lock(m_Mutex); + while (m_Queue.empty()) { + m_ConsumerCondition.wait(lock); } + size_t oldSize = m_Queue.size(); + auto val = m_Queue.front(); + m_Queue.pop_front(); - //! Pop an item out of the queue, this blocks until an item is available - T pop() - { - std::unique_lock lock(m_Mutex); - while (m_Queue.empty()) - { - m_ConsumerCondition.wait(lock); - } - size_t oldSize = m_Queue.size(); - auto val = m_Queue.front(); - m_Queue.pop_front(); - - // notification in case buffer was full - if (oldSize >= NOTIFY_CAPACITY) - { - lock.unlock(); - m_ProducerCondition.notify_all(); - } - return val; + // notification in case buffer was full + if (oldSize >= NOTIFY_CAPACITY) { + lock.unlock(); + m_ProducerCondition.notify_all(); } - - //! Pop an item out of the queue, this blocks until an item is available - void pop(T &item) - { - std::unique_lock lock(m_Mutex); - while (m_Queue.empty()) - { - m_ConsumerCondition.wait(lock); - } - - size_t oldSize = m_Queue.size(); - item = m_Queue.front(); - m_Queue.pop_front(); - - // notification in case buffer was full - if (oldSize >= NOTIFY_CAPACITY) - { - lock.unlock(); - m_ProducerCondition.notify_all(); - } + return val; + } + + //! Pop an item out of the queue, this blocks until an item is available + void pop(T& item) { + std::unique_lock lock(m_Mutex); + while (m_Queue.empty()) { + m_ConsumerCondition.wait(lock); } - //! Pop an item out of the queue, this blocks if the queue is full - //! which means it can deadlock if no one consumes items (implementor's responsibility) - void push(const T &item) - { - std::unique_lock lock(m_Mutex); - size_t pending = m_Queue.size(); - // block if buffer is full, this can deadlock if no one consumes items, - // implementor has to take care - while (pending >= QUEUE_CAPACITY) - { - m_ProducerCondition.wait(lock); - pending = m_Queue.size(); - } - - m_Queue.push_back(item); + size_t oldSize = m_Queue.size(); + item = m_Queue.front(); + m_Queue.pop_front(); + // notification in case buffer was full + if (oldSize >= NOTIFY_CAPACITY) { lock.unlock(); - if (pending == 0) - { - m_ConsumerCondition.notify_all(); - } + m_ProducerCondition.notify_all(); } - - //! Debug the memory used by this component. - void debugMemoryUsage(CMemoryUsage::TMemoryUsagePtr mem) const - { - mem->setName("CConcurrentQueue"); - CMemoryDebug::dynamicSize("m_Queue", m_Queue, mem); + } + + //! Pop an item out of the queue, this blocks if the queue is full + //! which means it can deadlock if no one consumes items (implementor's responsibility) + void push(const T& item) { + std::unique_lock lock(m_Mutex); + size_t pending = m_Queue.size(); + // block if buffer is full, this can deadlock if no one consumes items, + // implementor has to take care + while (pending >= QUEUE_CAPACITY) { + m_ProducerCondition.wait(lock); + pending = m_Queue.size(); } - //! Get the memory used by this component. - std::size_t memoryUsage() const - { - return CMemory::dynamicSize(m_Queue); - } + m_Queue.push_back(item); - // ! Return the number of items currently in the queue - size_t size() const - { - return m_Queue.size(); + lock.unlock(); + if (pending == 0) { + m_ConsumerCondition.notify_all(); } + } - private: - //! The internal queue - boost::circular_buffer m_Queue; + //! Debug the memory used by this component. + void debugMemoryUsage(CMemoryUsage::TMemoryUsagePtr mem) const { + mem->setName("CConcurrentQueue"); + CMemoryDebug::dynamicSize("m_Queue", m_Queue, mem); + } - //! Mutex - std::mutex m_Mutex; + //! Get the memory used by this component. + std::size_t memoryUsage() const { return CMemory::dynamicSize(m_Queue); } - //! Condition variable for consumer - std::condition_variable m_ConsumerCondition; + // ! Return the number of items currently in the queue + size_t size() const { return m_Queue.size(); } - //! Condition variable for producers - std::condition_variable m_ProducerCondition; -}; +private: + //! The internal queue + boost::circular_buffer m_Queue; + + //! Mutex + std::mutex m_Mutex; + //! Condition variable for consumer + std::condition_variable m_ConsumerCondition; + + //! Condition variable for producers + std::condition_variable m_ProducerCondition; +}; } } diff --git a/include/core/CConcurrentWrapper.h b/include/core/CConcurrentWrapper.h index 455319e404..c5ee2138af 100644 --- a/include/core/CConcurrentWrapper.h +++ b/include/core/CConcurrentWrapper.h @@ -8,16 +8,14 @@ #define INCLUDED_ml_core_CConcurrentWrapper_h #include -#include #include +#include #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { //! \brief //! A thread safe concurrent wrapper. @@ -31,76 +29,56 @@ namespace core //! @tparam QUEUE_CAPACITY internal queue capacity //! @tparam NOTIFY_CAPACITY special parameter, for signaling the producer in blocking case template -class CConcurrentWrapper final: private CNonCopyable -{ - public: - //! Wrap and return the wrapped object - //! - //! The object has to wrapped once and only once, pass the reference around in your code. - //! This starts a background thread. - CConcurrentWrapper(T &resource) : - m_Resource(resource), - m_Done(false) - { - m_Worker = std::thread([this] - { - while (!m_Done) - { - m_Queue.pop()(); - } - }); - } - - ~CConcurrentWrapper() - { - m_Queue.push([this] - { - m_Done = true; - }); - - m_Worker.join(); - } - - //! Push something into the queue of the wrapped object - //! The code inside of this lambda is guaranteed to be executed in an atomic fashion. - template - void operator()(F f) const - { - m_Queue.push([=] - { - f(m_Resource); - }); - } - - //! Debug the memory used by this component. - void debugMemoryUsage(CMemoryUsage::TMemoryUsagePtr mem) const - { - mem->setName("CConcurrentWrapper"); - m_Queue.debugMemoryUsage(mem->addChild()); - } - - //! Get the memory used by this component. - std::size_t memoryUsage() const - { - return m_Queue.memoryUsage(); - } - - private: - //! Queue for the tasks - mutable CConcurrentQueue, - QUEUE_CAPACITY, NOTIFY_CAPACITY> m_Queue; - - //! The wrapped resource - T &m_Resource; - - //! thread for the worker - std::thread m_Worker; - - //! boolean for stopping the worker - //! never touched outside of the main thread - bool m_Done; +class CConcurrentWrapper final : private CNonCopyable { +public: + //! Wrap and return the wrapped object + //! + //! The object has to wrapped once and only once, pass the reference around in your code. + //! This starts a background thread. + CConcurrentWrapper(T& resource) : m_Resource(resource), m_Done(false) { + m_Worker = std::thread([this] { + while (!m_Done) { + m_Queue.pop()(); + } + }); + } + + ~CConcurrentWrapper() { + m_Queue.push([this] { m_Done = true; }); + + m_Worker.join(); + } + + //! Push something into the queue of the wrapped object + //! The code inside of this lambda is guaranteed to be executed in an atomic fashion. + template + void operator()(F f) const { + m_Queue.push([=] { f(m_Resource); }); + } + + //! Debug the memory used by this component. + void debugMemoryUsage(CMemoryUsage::TMemoryUsagePtr mem) const { + mem->setName("CConcurrentWrapper"); + m_Queue.debugMemoryUsage(mem->addChild()); + } + + //! Get the memory used by this component. + std::size_t memoryUsage() const { return m_Queue.memoryUsage(); } + +private: + //! Queue for the tasks + mutable CConcurrentQueue, QUEUE_CAPACITY, NOTIFY_CAPACITY> m_Queue; + + //! The wrapped resource + T& m_Resource; + + //! thread for the worker + std::thread m_Worker; + + //! boolean for stopping the worker + //! never touched outside of the main thread + bool m_Done; }; - } } diff --git a/include/core/CCondition.h b/include/core/CCondition.h index 17ccb7ec35..6d61a19aff 100644 --- a/include/core/CCondition.h +++ b/include/core/CCondition.h @@ -15,14 +15,10 @@ #endif #include - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CMutex; - //! \brief //! Wrapper around pthread_cond_wait. //! @@ -34,51 +30,47 @@ class CMutex; //! they do not consume system handles, and match the semantics //! of pthread condition variables. //! -class CORE_EXPORT CCondition : private CNonCopyable -{ - public: - CCondition(CMutex &); - ~CCondition(); - - //! Wait in current thread for signal - blocks. The wait may be - //! spuriously interrupted by a signal, so the caller must check a - //! condition that will detect spurious wakeups, and wait again if - //! necessary. - bool wait(); - - //! Timed wait in current thread for millisecs - blocks. The wait may - //! be spuriously interrupted by a signal, so the caller must check a - //! condition that will detect spurious wakeups, and wait again if - //! necessary. - bool wait(uint32_t t); - - //! Wake up a single thread that is blocked in wait - void signal(); - - //! Wake up all threads that are blocked in wait - void broadcast(); - - private: +class CORE_EXPORT CCondition : private CNonCopyable { +public: + CCondition(CMutex&); + ~CCondition(); + + //! Wait in current thread for signal - blocks. The wait may be + //! spuriously interrupted by a signal, so the caller must check a + //! condition that will detect spurious wakeups, and wait again if + //! necessary. + bool wait(); + + //! Timed wait in current thread for millisecs - blocks. The wait may + //! be spuriously interrupted by a signal, so the caller must check a + //! condition that will detect spurious wakeups, and wait again if + //! necessary. + bool wait(uint32_t t); + + //! Wake up a single thread that is blocked in wait + void signal(); + + //! Wake up all threads that are blocked in wait + void broadcast(); + +private: #ifndef Windows - //! Convert milliseconds to timespec - static bool convert(uint32_t, timespec &); + //! Convert milliseconds to timespec + static bool convert(uint32_t, timespec&); #endif - private: - //! Reference to associated mutex - CMutex &m_Mutex; +private: + //! Reference to associated mutex + CMutex& m_Mutex; - //! The condition variable +//! The condition variable #ifdef Windows - CONDITION_VARIABLE m_Condition; + CONDITION_VARIABLE m_Condition; #else - pthread_cond_t m_Condition; + pthread_cond_t m_Condition; #endif }; - - } } #endif // INCLUDED_ml_core_CCondition_h - diff --git a/include/core/CContainerPrinter.h b/include/core/CContainerPrinter.h index b21323f969..00508b7985 100644 --- a/include/core/CContainerPrinter.h +++ b/include/core/CContainerPrinter.h @@ -22,13 +22,10 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { -namespace printer_detail -{ +namespace printer_detail { using true_ = boost::true_type; using false_ = boost::false_type; @@ -36,16 +33,14 @@ using false_ = boost::false_type; //! Auxiliary type used by has_const_iterator to test for a nested //! typedef. template -struct enable_if_has -{ +struct enable_if_has { using type = R; }; //! Auxiliary type used by has_print_function to test for a nested //! member function. template -struct enable_if_is -{ +struct enable_if_is { using type = R; }; @@ -63,14 +58,12 @@ struct enable_if_is //! 14.5.4/9 of the standard). //@{ template -struct has_const_iterator -{ +struct has_const_iterator { using value = false_; }; template -struct has_const_iterator::type> -{ +struct has_const_iterator::type> { using value = true_; }; //@} @@ -90,14 +83,12 @@ struct has_const_iterator: //! 14.5.4/9 of the standard). //@{ template -struct has_print_function -{ +struct has_print_function { using value = false_; }; template -struct has_print_function::type> -{ +struct has_print_function::type> { using value = true_; }; //@} @@ -111,65 +102,52 @@ struct has_print_function class CLeafPrinter {}; +template +class CLeafPrinter {}; template<> -class CLeafPrinter -{ - public: - static std::string print(const std::string &value) - { - return value; - } - - template - static std::string print(const T &value) - { - // Use CStringUtils if possible: it's much faster but - // only supports fundamental types. - return print_(value, typename boost::is_arithmetic::type()); - } - - private: - //! Fast CStringUtil implementation with bounds checking. - template - inline static std::string print_(T value, true_/*is arithmetic*/) - { - // For signed types only. - if (value != T(0) && value == boost::numeric::bounds::lowest()) - { - return "\"min\""; - } - if (value == boost::numeric::bounds::highest()) - { - return "\"max\""; - } - return CStringUtils::typeToStringPretty(value); - } - - //! Fast CStringUtil implementation. - inline static std::string print_(bool value, true_/*is arithmetic*/) - { - return CStringUtils::typeToStringPretty(value); +class CLeafPrinter { +public: + static std::string print(const std::string& value) { return value; } + + template + static std::string print(const T& value) { + // Use CStringUtils if possible: it's much faster but + // only supports fundamental types. + return print_(value, typename boost::is_arithmetic::type()); + } + +private: + //! Fast CStringUtil implementation with bounds checking. + template + inline static std::string print_(T value, true_ /*is arithmetic*/) { + // For signed types only. + if (value != T(0) && value == boost::numeric::bounds::lowest()) { + return "\"min\""; } - - //! Slow std::ostringstream stream implementation. - template - inline static std::string print_(const T &value, false_/*is arithmetic*/) - { - std::ostringstream result; - result << value; - return result.str(); + if (value == boost::numeric::bounds::highest()) { + return "\"max\""; } + return CStringUtils::typeToStringPretty(value); + } + + //! Fast CStringUtil implementation. + inline static std::string print_(bool value, true_ /*is arithmetic*/) { return CStringUtils::typeToStringPretty(value); } + + //! Slow std::ostringstream stream implementation. + template + inline static std::string print_(const T& value, false_ /*is arithmetic*/) { + std::ostringstream result; + result << value; + return result.str(); + } }; template<> -class CLeafPrinter -{ - public: - template - static std::string print(const T &value) - { - return value.print(); - } +class CLeafPrinter { +public: + template + static std::string print(const T& value) { + return value.print(); + } }; //@} @@ -181,29 +159,25 @@ class CLeafPrinter //! //! \note Partial specializations can't be nested classes. //@{ -template class CNodePrinter {}; +template +class CNodePrinter {}; template -class CNodePrinter -{ - public: - template - static std::string print(const T &value) - { - return CLeafPrinter::value>::print(value); - } +class CNodePrinter { +public: + template + static std::string print(const T& value) { + return CLeafPrinter::value>::print(value); + } }; template -class CNodePrinter -{ - public: - template - static std::string print(const T &value) - { - return PRINTER::print(value); - } +class CNodePrinter { +public: + template + static std::string print(const T& value) { + return PRINTER::print(value); + } }; //@} - } //! \brief Prints STL compliant container objects and iterator ranges. @@ -247,183 +221,151 @@ class CNodePrinter //! use of std::ostringstream and isn't too careful about copying //! strings to be really high performance and so this functionality //! is primarily intended for testing and debugging. -class CORE_EXPORT CContainerPrinter : private CNonInstantiatable -{ - private: - //! Print a non associative container element for debug. - template - static std::string printElement(const T &value) - { - using namespace printer_detail; - using U = typename boost::unwrap_reference::type; - using Printer = CNodePrinter::value, - CContainerPrinter>; - return Printer::print(boost::unwrap_ref(value)); +class CORE_EXPORT CContainerPrinter : private CNonInstantiatable { +private: + //! Print a non associative container element for debug. + template + static std::string printElement(const T& value) { + using namespace printer_detail; + using U = typename boost::unwrap_reference::type; + using Printer = CNodePrinter::value, CContainerPrinter>; + return Printer::print(boost::unwrap_ref(value)); + } + + //! Print a non associative element pointer to const for debug. + template + static std::string printElement(const T* value) { + if (value == 0) { + return "\"null\""; } - - //! Print a non associative element pointer to const for debug. - template - static std::string printElement(const T *value) - { - if (value == 0) - { - return "\"null\""; - } - std::ostringstream result; - result << printElement(boost::unwrap_ref(*value)); - return result.str(); + std::ostringstream result; + result << printElement(boost::unwrap_ref(*value)); + return result.str(); + } + + //! Print a non associative element pointer for debug. + template + static std::string printElement(T* value) { + if (value == 0) { + return "\"null\""; } - - //! Print a non associative element pointer for debug. - template - static std::string printElement(T *value) - { - if (value == 0) - { - return "\"null\""; - } - std::ostringstream result; - result << printElement(boost::unwrap_ref(*value)); - return result.str(); + std::ostringstream result; + result << printElement(boost::unwrap_ref(*value)); + return result.str(); + } + + //! Print a std::auto_ptr. + template + static std::string printElement(const std::auto_ptr& value) { + if (value.get() == 0) { + return "\"null\""; } - - //! Print a std::auto_ptr. - template - static std::string printElement(const std::auto_ptr &value) - { - if (value.get() == 0) - { - return "\"null\""; - } - std::ostringstream result; - result << printElement(*value); - return result.str(); + std::ostringstream result; + result << printElement(*value); + return result.str(); + } + + //! Print a CStoredStringPtr + static std::string printElement(const CStoredStringPtr& value) { + if (value == nullptr) { + return "\"null\""; } - - //! Print a CStoredStringPtr - static std::string printElement(const CStoredStringPtr &value) - { - if (value == nullptr) - { - return "\"null\""; - } - return *value; + return *value; + } + + //! Print a boost::shared_pointer. + template + static std::string printElement(const boost::shared_ptr& value) { + if (value == boost::shared_ptr()) { + return "\"null\""; } - - //! Print a boost::shared_pointer. - template - static std::string printElement(const boost::shared_ptr &value) - { - if (value == boost::shared_ptr()) - { - return "\"null\""; - } - std::ostringstream result; - result << printElement(*value); - return result.str(); + std::ostringstream result; + result << printElement(*value); + return result.str(); + } + + // If you find yourself using some different smart pointer and + // it isn't printing please feel free to add an overload here. + + //! Print a non associative (boost) optional element for debug. + template + static std::string printElement(const boost::optional& value) { + if (!value) { + return "\"null\""; } - - // If you find yourself using some different smart pointer and - // it isn't printing please feel free to add an overload here. - - //! Print a non associative (boost) optional element for debug. + std::ostringstream result; + result << printElement(boost::unwrap_ref(*value)); + return result.str(); + } + + //! Print an associative container element for debug. + template + static std::string printElement(const std::pair& value) { + std::ostringstream result; + result << "(" << printElement(boost::unwrap_ref(value.first)) << ", " << printElement(boost::unwrap_ref(value.second)) << ")"; + return result.str(); + } + + //! Print a string for debug (otherwise we split them into their + //! component characters since they have iterators). + static std::string printElement(const std::string& value) { return value; } + +public: + //! Function object wrapper around printElement for use with STL. + class CElementPrinter { + public: template - static std::string printElement(const boost::optional &value) - { - if (!value) - { - return "\"null\""; - } - std::ostringstream result; - result << printElement(boost::unwrap_ref(*value)); - return result.str(); - } - - //! Print an associative container element for debug. - template - static std::string printElement(const std::pair &value) - { - std::ostringstream result; - result << "(" << printElement(boost::unwrap_ref(value.first)) - << ", " << printElement(boost::unwrap_ref(value.second)) << ")"; - return result.str(); - } - - //! Print a string for debug (otherwise we split them into their - //! component characters since they have iterators). - static std::string printElement(const std::string &value) - { - return value; + std::string operator()(const T& value) { + return printElement(value); } - - public: - //! Function object wrapper around printElement for use with STL. - class CElementPrinter - { - public: - template - std::string operator()(const T &value) - { - return printElement(value); - } - }; - - //! Print a range of values as defined by a start and end iterator - //! for debug. This assumes that ITR is a forward iterator, i.e. - //! it must implement prefix ++ and * operators. - template - static std::string print(ITR begin, ITR end) - { - std::ostringstream result; - - result << "["; - if (begin != end) - { - for (;;) - { - result << printElement(*begin); - if (++begin == end) - { - break; - } - result << ", "; + }; + + //! Print a range of values as defined by a start and end iterator + //! for debug. This assumes that ITR is a forward iterator, i.e. + //! it must implement prefix ++ and * operators. + template + static std::string print(ITR begin, ITR end) { + std::ostringstream result; + + result << "["; + if (begin != end) { + for (;;) { + result << printElement(*begin); + if (++begin == end) { + break; } + result << ", "; } - result << "]"; - - return result.str(); - } - - //! Print a STL compliant container for debug. - template - static std::string print(const CONTAINER &container) - { - return print(boost::unwrap_ref(container).begin(), - boost::unwrap_ref(container).end()); - } - - //! Specialization for arrays. - template - static std::string print(const T (&array)[SIZE]) - { - return print(array, array + SIZE); - } - - //! Print a pair for debug. - template - static std::string print(const std::pair &value) - { - return printElement(value); - } - - //! Print an optional value for debug. - template - static std::string print(const boost::optional &value) - { - return printElement(value); } + result << "]"; + + return result.str(); + } + + //! Print a STL compliant container for debug. + template + static std::string print(const CONTAINER& container) { + return print(boost::unwrap_ref(container).begin(), boost::unwrap_ref(container).end()); + } + + //! Specialization for arrays. + template + static std::string print(const T (&array)[SIZE]) { + return print(array, array + SIZE); + } + + //! Print a pair for debug. + template + static std::string print(const std::pair& value) { + return printElement(value); + } + + //! Print an optional value for debug. + template + static std::string print(const boost::optional& value) { + return printElement(value); + } }; - } } #endif // INCLUDED_ml_core_CContainerPrinter_h diff --git a/include/core/CCrashHandler.h b/include/core/CCrashHandler.h index d324bd5b91..7779edbd14 100644 --- a/include/core/CCrashHandler.h +++ b/include/core/CCrashHandler.h @@ -9,10 +9,8 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { //! \brief //! Helper class to register a crashhandler and getting better traces. @@ -34,12 +32,10 @@ namespace core //! //! addr2line -e library normalized_address //! (library can be the symbol file) -class CORE_EXPORT CCrashHandler : private CNonInstantiatable -{ - public: - static void installCrashHandler(); +class CORE_EXPORT CCrashHandler : private CNonInstantiatable { +public: + static void installCrashHandler(); }; - } } diff --git a/include/core/CDataAdder.h b/include/core/CDataAdder.h index ef0fc94d76..c5d276caae 100644 --- a/include/core/CDataAdder.h +++ b/include/core/CDataAdder.h @@ -17,11 +17,8 @@ #include - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { //! \brief //! Abstract interface for data adder. @@ -37,52 +34,45 @@ namespace core //! data store that can retrieve based on 2 values: index and ID. //! Elasticsearch supports this. //! -class CORE_EXPORT CDataAdder : private CNonCopyable -{ - public: - using TOStreamP = boost::shared_ptr; - using TDataAdderP = boost::shared_ptr; - - using TPersistFunc = std::function; - - public: - virtual ~CDataAdder(); - - //! Add streamed data - return of NULL stream indicates failure. - //! Since the data to be written isn't known at the time this function - //! returns it is not possible to detect all error conditions - //! immediately. If the stream goes bad whilst being written to then - //! this also indicates failure. - virtual TOStreamP addStreamed(const std::string &index, - const std::string &id) = 0; - - //! Clients that get a stream using addStreamed() must call this - //! method one they've finished sending data to the stream. - //! They should set force to true when the very last stream is - //! complete, in case the persister needs to close off some - //! sort of cached data structure. - virtual bool streamComplete(TOStreamP &strm, - bool force) = 0; - - //! The max number of documents that can go in a single - //! batch save - virtual std::size_t maxDocumentsPerBatchSave() const; - - //! The max size of a document - to be determined by the - //! underlying storage medium - virtual std::size_t maxDocumentSize() const; - - //! Get the current document ID given a base ID and current document - //! document number. The ID is of the form baseId#currentDocNum if - //! baseId is not empty, and simply currentDocNum converted to a string - //! if baseId is empty. - static std::string makeCurrentDocId(const std::string &baseId, - size_t currentDocNum); +class CORE_EXPORT CDataAdder : private CNonCopyable { +public: + using TOStreamP = boost::shared_ptr; + using TDataAdderP = boost::shared_ptr; + + using TPersistFunc = std::function; + +public: + virtual ~CDataAdder(); + + //! Add streamed data - return of NULL stream indicates failure. + //! Since the data to be written isn't known at the time this function + //! returns it is not possible to detect all error conditions + //! immediately. If the stream goes bad whilst being written to then + //! this also indicates failure. + virtual TOStreamP addStreamed(const std::string& index, const std::string& id) = 0; + + //! Clients that get a stream using addStreamed() must call this + //! method one they've finished sending data to the stream. + //! They should set force to true when the very last stream is + //! complete, in case the persister needs to close off some + //! sort of cached data structure. + virtual bool streamComplete(TOStreamP& strm, bool force) = 0; + + //! The max number of documents that can go in a single + //! batch save + virtual std::size_t maxDocumentsPerBatchSave() const; + + //! The max size of a document - to be determined by the + //! underlying storage medium + virtual std::size_t maxDocumentSize() const; + + //! Get the current document ID given a base ID and current document + //! document number. The ID is of the form baseId#currentDocNum if + //! baseId is not empty, and simply currentDocNum converted to a string + //! if baseId is empty. + static std::string makeCurrentDocId(const std::string& baseId, size_t currentDocNum); }; - - } } #endif // INCLUDED_ml_core_CDataAdder_h - diff --git a/include/core/CDataSearcher.h b/include/core/CDataSearcher.h index ed36b31297..bb02d4a226 100644 --- a/include/core/CDataSearcher.h +++ b/include/core/CDataSearcher.h @@ -16,10 +16,8 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { //! \brief //! Interface class for retrieving data by running a search. @@ -34,41 +32,37 @@ namespace core //! //! The result of a successful search is a std::istream. //! -class CORE_EXPORT CDataSearcher : private CNonCopyable -{ - public: - using TStrVec = std::vector; - using TStrVecCItr = TStrVec::const_iterator; +class CORE_EXPORT CDataSearcher : private CNonCopyable { +public: + using TStrVec = std::vector; + using TStrVecCItr = TStrVec::const_iterator; - using TIStreamP = boost::shared_ptr; + using TIStreamP = boost::shared_ptr; - public: - //! Empty string - static const std::string EMPTY_STRING; +public: + //! Empty string + static const std::string EMPTY_STRING; - public: - CDataSearcher(); - virtual ~CDataSearcher(); +public: + CDataSearcher(); + virtual ~CDataSearcher(); - //! Do a search that results in an input stream. - //! A return value of NULL indicates a technical problem with the - //! creation of the stream. Other errors may be indicated by the - //! returned stream going into the "bad" state. - virtual TIStreamP search(size_t currentDocNum, size_t limit) = 0; + //! Do a search that results in an input stream. + //! A return value of NULL indicates a technical problem with the + //! creation of the stream. Other errors may be indicated by the + //! returned stream going into the "bad" state. + virtual TIStreamP search(size_t currentDocNum, size_t limit) = 0; - //! Set the search for all documents in the index - virtual void setStateRestoreSearch(const std::string &index); + //! Set the search for all documents in the index + virtual void setStateRestoreSearch(const std::string& index); - //! Set the search for all documents in the index with the ID - virtual void setStateRestoreSearch(const std::string &index, - const std::string &id); + //! Set the search for all documents in the index with the ID + virtual void setStateRestoreSearch(const std::string& index, const std::string& id); - protected: - TStrVec m_SearchTerms; +protected: + TStrVec m_SearchTerms; }; - } } #endif // INCLUDED_ml_core_CDataSearcher_h - diff --git a/include/core/CDelimiter.h b/include/core/CDelimiter.h index 34bc59c29a..a5624cffd2 100644 --- a/include/core/CDelimiter.h +++ b/include/core/CDelimiter.h @@ -6,18 +6,15 @@ #ifndef INCLUDED_ml_core_CDelimiter_h #define INCLUDED_ml_core_CDelimiter_h -#include #include #include +#include #include #include - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { //! \brief //! Encapsulates a delimiter @@ -33,105 +30,87 @@ namespace core //! 2) A delimiter can match any number of spaces in a space delimited //! text file. //! -class CORE_EXPORT CDelimiter -{ - public: - //! Delimiter used by default constructor - static const std::string DEFAULT_DELIMITER; - - public: - //! Constructors. - CDelimiter(); - CDelimiter(const std::string &delimiter); - CDelimiter(const std::string &delimiter, - const std::string &followingRegex, - bool orTime = true); - - //! Operators - bool operator==(const CDelimiter &rhs) const; - bool operator!=(const CDelimiter &rhs) const; - - //! Check whether the text that followed the primary delimiter was - //! acceptable - bool isFollowingTextAcceptable(size_t searchPos, - const std::string &str, - bool timePassed) const; - - //! Is the delimiter valid? - bool valid() const; - - //! Accessor for primary delimiter - std::string delimiter() const; - - //! Tokenise a string - void tokenise(const std::string &str, - CStringUtils::TStrVec &tokens, - std::string &remainder) const; - - //! Tokenise a string, stating whether time has passed since the last - //! attempt - void tokenise(const std::string &str, - bool timePassed, - CStringUtils::TStrVec &tokens, - std::string &remainder) const; - - //! Tokenise a string, also retrieving an example of the literal - //! delimiter that was found - void tokenise(const std::string &str, - CStringUtils::TStrVec &tokens, - std::string &exampleDelimiter, - std::string &remainder) const; - - //! Tokenise a string, also retrieving an example of the literal - //! delimiter that was found, stating whether time has passed since the - //! last attempt - void tokenise(const std::string &str, - bool timePassed, - CStringUtils::TStrVec &tokens, - std::string &exampleDelimiter, - std::string &remainder) const; - - //! Set the quote character - void quote(char quote, char escape = '\\'); - - //! Get the quote character - char quote() const; - - private: - //! Get the position of the next unescaped quote within a string - size_t getNextQuote(const std::string &str, size_t startPos) const; - - private: - //! The primary delimiter - CRegex m_Delimiter; - bool m_Valid; - - //! Only treat the primary delimiter as a delimiter if it's followed by - //! this regular expression. - CRegex m_FollowingRegex; - bool m_HaveFollowingRegex; - - //! After some time has passed, should we waive the following regex? - bool m_WaiveFollowingRegexAfterTime; - - //! The quote character (or '\0' if there isn't one). - //! The main delimiter will be ignored if it's inside quotes. - char m_Quote; - - //! The character used to escape the quote character ('\0' if none). - char m_Escape; - - friend CORE_EXPORT std::ostream &operator<<(std::ostream &strm, - const CDelimiter &delimiter); +class CORE_EXPORT CDelimiter { +public: + //! Delimiter used by default constructor + static const std::string DEFAULT_DELIMITER; + +public: + //! Constructors. + CDelimiter(); + CDelimiter(const std::string& delimiter); + CDelimiter(const std::string& delimiter, const std::string& followingRegex, bool orTime = true); + + //! Operators + bool operator==(const CDelimiter& rhs) const; + bool operator!=(const CDelimiter& rhs) const; + + //! Check whether the text that followed the primary delimiter was + //! acceptable + bool isFollowingTextAcceptable(size_t searchPos, const std::string& str, bool timePassed) const; + + //! Is the delimiter valid? + bool valid() const; + + //! Accessor for primary delimiter + std::string delimiter() const; + + //! Tokenise a string + void tokenise(const std::string& str, CStringUtils::TStrVec& tokens, std::string& remainder) const; + + //! Tokenise a string, stating whether time has passed since the last + //! attempt + void tokenise(const std::string& str, bool timePassed, CStringUtils::TStrVec& tokens, std::string& remainder) const; + + //! Tokenise a string, also retrieving an example of the literal + //! delimiter that was found + void tokenise(const std::string& str, CStringUtils::TStrVec& tokens, std::string& exampleDelimiter, std::string& remainder) const; + + //! Tokenise a string, also retrieving an example of the literal + //! delimiter that was found, stating whether time has passed since the + //! last attempt + void tokenise(const std::string& str, + bool timePassed, + CStringUtils::TStrVec& tokens, + std::string& exampleDelimiter, + std::string& remainder) const; + + //! Set the quote character + void quote(char quote, char escape = '\\'); + + //! Get the quote character + char quote() const; + +private: + //! Get the position of the next unescaped quote within a string + size_t getNextQuote(const std::string& str, size_t startPos) const; + +private: + //! The primary delimiter + CRegex m_Delimiter; + bool m_Valid; + + //! Only treat the primary delimiter as a delimiter if it's followed by + //! this regular expression. + CRegex m_FollowingRegex; + bool m_HaveFollowingRegex; + + //! After some time has passed, should we waive the following regex? + bool m_WaiveFollowingRegexAfterTime; + + //! The quote character (or '\0' if there isn't one). + //! The main delimiter will be ignored if it's inside quotes. + char m_Quote; + + //! The character used to escape the quote character ('\0' if none). + char m_Escape; + + friend CORE_EXPORT std::ostream& operator<<(std::ostream& strm, const CDelimiter& delimiter); }; //! Useful for debugging and CPPUNIT_ASSERT_EQUALS -CORE_EXPORT std::ostream &operator<<(std::ostream &strm, - const CDelimiter &delimiter); - - +CORE_EXPORT std::ostream& operator<<(std::ostream& strm, const CDelimiter& delimiter); } } #endif // INCLUDED_ml_core_CDelimiter_h - diff --git a/include/core/CDetachedProcessSpawner.h b/include/core/CDetachedProcessSpawner.h index 15fb68ef84..50e53d353f 100644 --- a/include/core/CDetachedProcessSpawner.h +++ b/include/core/CDetachedProcessSpawner.h @@ -14,17 +14,12 @@ #include #include - -namespace ml -{ -namespace core -{ -namespace detail -{ +namespace ml { +namespace core { +namespace detail { class CTrackerThread; } - //! \brief //! Spawn detached processes. //! @@ -66,57 +61,51 @@ class CTrackerThread; //! entires in the lookup, and this could represent a security risk //! given how operating systems recycle process IDs.) //! -class CORE_EXPORT CDetachedProcessSpawner -{ - public: - using TStrVec = std::vector; - - using TTrackerThreadP = boost::shared_ptr; - - public: - //! Permitted paths may be relative or absolute, but each process must - //! be invoked using the exact path supplied. For example, if - //! /usr/bin/grep is permitted then you cannot invoke it as ./grep - //! while the current working directory is /usr/bin. On Windows, - //! the supplied names should NOT have the .exe extension. - CDetachedProcessSpawner(const TStrVec &permittedProcessPaths); - - ~CDetachedProcessSpawner(); - - //! Spawn a process. Returns true on success or false on error, - //! however, it is important to realise that if the spawned process - //! itself crashes this will not be detected as a failure by this - //! method. On Windows, the supplied process path should NOT have the - //! .exe extension. - bool spawn(const std::string &processPath, const TStrVec &args); - - //! As above, but, on success, returns the PID of the process that was - //! started. - bool spawn(const std::string &processPath, - const TStrVec &args, - CProcess::TPid &childPid); - - //! Kill the child process with the specified PID. If there is a - //! process running with the specified PID that was not spawned by this - //! object then it will NOT be killed. - bool terminateChild(CProcess::TPid pid); - - //! Returns true if this object spawned a process with the given PID - //! that is still running. - bool hasChild(CProcess::TPid pid) const; - - private: - //! Paths to processes that may be spawned. - TStrVec m_PermittedProcessPaths; - - //! Thread to track which processes that have been created are still - //! alive. - TTrackerThreadP m_TrackerThread; +class CORE_EXPORT CDetachedProcessSpawner { +public: + using TStrVec = std::vector; + + using TTrackerThreadP = boost::shared_ptr; + +public: + //! Permitted paths may be relative or absolute, but each process must + //! be invoked using the exact path supplied. For example, if + //! /usr/bin/grep is permitted then you cannot invoke it as ./grep + //! while the current working directory is /usr/bin. On Windows, + //! the supplied names should NOT have the .exe extension. + CDetachedProcessSpawner(const TStrVec& permittedProcessPaths); + + ~CDetachedProcessSpawner(); + + //! Spawn a process. Returns true on success or false on error, + //! however, it is important to realise that if the spawned process + //! itself crashes this will not be detected as a failure by this + //! method. On Windows, the supplied process path should NOT have the + //! .exe extension. + bool spawn(const std::string& processPath, const TStrVec& args); + + //! As above, but, on success, returns the PID of the process that was + //! started. + bool spawn(const std::string& processPath, const TStrVec& args, CProcess::TPid& childPid); + + //! Kill the child process with the specified PID. If there is a + //! process running with the specified PID that was not spawned by this + //! object then it will NOT be killed. + bool terminateChild(CProcess::TPid pid); + + //! Returns true if this object spawned a process with the given PID + //! that is still running. + bool hasChild(CProcess::TPid pid) const; + +private: + //! Paths to processes that may be spawned. + TStrVec m_PermittedProcessPaths; + + //! Thread to track which processes that have been created are still + //! alive. + TTrackerThreadP m_TrackerThread; }; - - } } #endif // INCLUDED_ml_core_CDetachedProcessSpawner_h - diff --git a/include/core/CDualThreadStreamBuf.h b/include/core/CDualThreadStreamBuf.h index de7e5156c8..6aa8a8b6f0 100644 --- a/include/core/CDualThreadStreamBuf.h +++ b/include/core/CDualThreadStreamBuf.h @@ -14,11 +14,8 @@ #include - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { //! \brief //! A stream buffer where reads and writes are processed in different threads. @@ -59,135 +56,130 @@ namespace core //! std::streambuf: //! http://www.cplusplus.com/reference/streambuf/streambuf/ //! -class CORE_EXPORT CDualThreadStreamBuf : public std::streambuf -{ - public: - //! By default, the three buffers will initially have this size. They - //! may potentially grow if characters are put back into them. - static const size_t DEFAULT_BUFFER_CAPACITY; - - public: - //! Constructor initialises buffers - CDualThreadStreamBuf(size_t bufferCapacity = DEFAULT_BUFFER_CAPACITY); - - //! Set the end-of-file flag - void signalEndOfFile(); - - //! Get the end-of-file flag - bool endOfFile() const; - - //! Set the fatal error flag - void signalFatalError(); - - //! Get the fatal error flag - bool hasFatalError() const; - - protected: - //! Get an estimate of the number of characters still to read after an - //! underflow. In the case of this class we return the amount of data - //! in the intermediate buffer. - virtual std::streamsize showmanyc(); - - //! Switch the buffers immediately. Effectively this flushes data - //! through with lower latency but also less efficiently. - virtual int sync(); - - //! Get up to n characters from the read buffer and store them in the - //! array pointed to by s. - virtual std::streamsize xsgetn(char *s, std::streamsize n); - - //! Try to obtain more data for the write buffer. This is done by - //! swapping it with the intermediate buffer. This may block if no data - //! is available to read in the intermediate buffer. - virtual int underflow(); - - //! Put character back in the case of backup underflow. - virtual int pbackfail(int c = traits_type::eof()); - - //! Write up to n characters from the array pointed to by s into the - //! write buffer. - virtual std::streamsize xsputn(const char *s, std::streamsize n); - - //! Try to obtain more space in the write buffer. This is done by - //! swapping it with the intermediate buffer. This may block if no data - //! is available to read in the intermediate buffer. - virtual int overflow(int c = traits_type::eof()); - - //! In a random access stream this would seek to the specified position. - //! This class does not support such seeking, but implements this method - //! allowing a zero byte seek in order to allow tellg() and tellp() to - //! work on the connected stream. - virtual std::streampos seekoff(std::streamoff off, - std::ios_base::seekdir way, - std::ios_base::openmode which = std::ios_base::in | std::ios_base::out); - - private: - //! Swap the intermediate buffer with the write buffer. Will block if - //! the intermediate buffer is not empty. NB: m_IntermediateBufferMutex - //! MUST be locked when this method is called. - bool swapWriteBuffer(); - - //! Swap the intermediate buffer with the read buffer. Will block if - //! the intermediate buffer is empty. NB: m_IntermediateBufferMutex - //! MUST be locked when this method is called. - bool swapReadBuffer(); - - private: - //! Used to manage the two buffers. - using TScopedCharArray = boost::scoped_array; - - //! Buffer that put functions will write to. - TScopedCharArray m_WriteBuffer; - - //! Capacity of the write buffer. - size_t m_WriteBufferCapacity; - - //! Buffer that get functions will read from. - TScopedCharArray m_ReadBuffer; - - //! Capacity of the read buffer. - size_t m_ReadBufferCapacity; - - //! Buffer that get functions will read from. - TScopedCharArray m_IntermediateBuffer; - - //! Capacity of the read buffer. - size_t m_IntermediateBufferCapacity; - - //! End of data held in the intermediate buffer. If this points at the - //! beginning of the intermediate buffer, the implication is that the - //! buffer is empty. - char *m_IntermediateBufferEnd; - - //! Number of bytes that have been swapped from the read buffer to the - //! intermediate buffer over the lifetime of this object. Enables - //! tellg() to work on an associated istream. - size_t m_ReadBytesSwapped; - - //! Number of bytes that have been swapped from the write buffer to the - //! intermediate buffer over the lifetime of this object. Enables - //! tellp() to work on an associated ostream. - size_t m_WriteBytesSwapped; - - //! A lock to protect swapping of the buffers and manage blocking when - CMutex m_IntermediateBufferMutex; - - //! A condition to wait on ing of the buffers and manage blocking when - CCondition m_IntermediateBufferCondition; - - //! Flag to indicate end-of-file. When this is set, the reader will - //! receive end-of-file notification once all the buffers are empty. - //! The writer will not be allowed to add any more data. - volatile bool m_Eof; - - //! A call to signalFatalError() chucks away all currently buffered data - //! and prevents future data being added. - volatile bool m_FatalError; -}; +class CORE_EXPORT CDualThreadStreamBuf : public std::streambuf { +public: + //! By default, the three buffers will initially have this size. They + //! may potentially grow if characters are put back into them. + static const size_t DEFAULT_BUFFER_CAPACITY; + +public: + //! Constructor initialises buffers + CDualThreadStreamBuf(size_t bufferCapacity = DEFAULT_BUFFER_CAPACITY); + + //! Set the end-of-file flag + void signalEndOfFile(); + + //! Get the end-of-file flag + bool endOfFile() const; + + //! Set the fatal error flag + void signalFatalError(); + //! Get the fatal error flag + bool hasFatalError() const; + +protected: + //! Get an estimate of the number of characters still to read after an + //! underflow. In the case of this class we return the amount of data + //! in the intermediate buffer. + virtual std::streamsize showmanyc(); + + //! Switch the buffers immediately. Effectively this flushes data + //! through with lower latency but also less efficiently. + virtual int sync(); + + //! Get up to n characters from the read buffer and store them in the + //! array pointed to by s. + virtual std::streamsize xsgetn(char* s, std::streamsize n); + + //! Try to obtain more data for the write buffer. This is done by + //! swapping it with the intermediate buffer. This may block if no data + //! is available to read in the intermediate buffer. + virtual int underflow(); + + //! Put character back in the case of backup underflow. + virtual int pbackfail(int c = traits_type::eof()); + + //! Write up to n characters from the array pointed to by s into the + //! write buffer. + virtual std::streamsize xsputn(const char* s, std::streamsize n); + + //! Try to obtain more space in the write buffer. This is done by + //! swapping it with the intermediate buffer. This may block if no data + //! is available to read in the intermediate buffer. + virtual int overflow(int c = traits_type::eof()); + + //! In a random access stream this would seek to the specified position. + //! This class does not support such seeking, but implements this method + //! allowing a zero byte seek in order to allow tellg() and tellp() to + //! work on the connected stream. + virtual std::streampos + seekoff(std::streamoff off, std::ios_base::seekdir way, std::ios_base::openmode which = std::ios_base::in | std::ios_base::out); + +private: + //! Swap the intermediate buffer with the write buffer. Will block if + //! the intermediate buffer is not empty. NB: m_IntermediateBufferMutex + //! MUST be locked when this method is called. + bool swapWriteBuffer(); + + //! Swap the intermediate buffer with the read buffer. Will block if + //! the intermediate buffer is empty. NB: m_IntermediateBufferMutex + //! MUST be locked when this method is called. + bool swapReadBuffer(); + +private: + //! Used to manage the two buffers. + using TScopedCharArray = boost::scoped_array; + + //! Buffer that put functions will write to. + TScopedCharArray m_WriteBuffer; + + //! Capacity of the write buffer. + size_t m_WriteBufferCapacity; + + //! Buffer that get functions will read from. + TScopedCharArray m_ReadBuffer; + + //! Capacity of the read buffer. + size_t m_ReadBufferCapacity; + + //! Buffer that get functions will read from. + TScopedCharArray m_IntermediateBuffer; + + //! Capacity of the read buffer. + size_t m_IntermediateBufferCapacity; + + //! End of data held in the intermediate buffer. If this points at the + //! beginning of the intermediate buffer, the implication is that the + //! buffer is empty. + char* m_IntermediateBufferEnd; + + //! Number of bytes that have been swapped from the read buffer to the + //! intermediate buffer over the lifetime of this object. Enables + //! tellg() to work on an associated istream. + size_t m_ReadBytesSwapped; + + //! Number of bytes that have been swapped from the write buffer to the + //! intermediate buffer over the lifetime of this object. Enables + //! tellp() to work on an associated ostream. + size_t m_WriteBytesSwapped; + //! A lock to protect swapping of the buffers and manage blocking when + CMutex m_IntermediateBufferMutex; + + //! A condition to wait on ing of the buffers and manage blocking when + CCondition m_IntermediateBufferCondition; + + //! Flag to indicate end-of-file. When this is set, the reader will + //! receive end-of-file notification once all the buffers are empty. + //! The writer will not be allowed to add any more data. + volatile bool m_Eof; + + //! A call to signalFatalError() chucks away all currently buffered data + //! and prevents future data being added. + volatile bool m_FatalError; +}; } } #endif // INCLUDED_ml_core_CDualThreadStreamBuf_h - diff --git a/include/core/CFastMutex.h b/include/core/CFastMutex.h index cc240fb782..524db29835 100644 --- a/include/core/CFastMutex.h +++ b/include/core/CFastMutex.h @@ -18,12 +18,8 @@ #endif #endif - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! Wrapper class around a fast simple mutex. @@ -63,28 +59,24 @@ namespace core //! write lock). These are faster than critical sections, presumably //! because critical sections are recursive. //! -class CORE_EXPORT CFastMutex : private CNonCopyable -{ - public: - CFastMutex(); - ~CFastMutex(); +class CORE_EXPORT CFastMutex : private CNonCopyable { +public: + CFastMutex(); + ~CFastMutex(); - void lock(); - void unlock(); + void lock(); + void unlock(); - private: +private: #ifdef Windows - SRWLOCK m_Mutex; + SRWLOCK m_Mutex; #elif defined(MacOSX) - OSSpinLock m_Mutex; + OSSpinLock m_Mutex; #else - pthread_mutex_t m_Mutex; + pthread_mutex_t m_Mutex; #endif }; - - } } #endif // INCLUDED_ml_core_CFastMutex_h - diff --git a/include/core/CFileDeleter.h b/include/core/CFileDeleter.h index e299bd0956..b29cfc4e6c 100644 --- a/include/core/CFileDeleter.h +++ b/include/core/CFileDeleter.h @@ -11,11 +11,8 @@ #include - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { //! \brief //! RAII file deleter @@ -35,22 +32,18 @@ namespace core //! to take a file descriptor/FILE pointer/stream reference //! and close this before attempting to delete the file. //! -class CORE_EXPORT CFileDeleter : private CNonCopyable -{ - public: - //! Record the name of the file to delete - CFileDeleter(const std::string &fileName); +class CORE_EXPORT CFileDeleter : private CNonCopyable { +public: + //! Record the name of the file to delete + CFileDeleter(const std::string& fileName); - //! Attempt to remove the specified file - ~CFileDeleter(); + //! Attempt to remove the specified file + ~CFileDeleter(); - private: - std::string m_FileName; +private: + std::string m_FileName; }; - - } } #endif // INCLUDED_ml_core_CFileDeleter_h - diff --git a/include/core/CFlatPrefixTree.h b/include/core/CFlatPrefixTree.h index e258b5c727..52173d5cf1 100644 --- a/include/core/CFlatPrefixTree.h +++ b/include/core/CFlatPrefixTree.h @@ -14,10 +14,8 @@ #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { //! \brief A flat prefix tree that allows efficient string lookups //! @@ -47,103 +45,96 @@ namespace core //! binary search on the first character, moving on to the node indicated by //! the characters next index, applying binary search on the second character, //! and so on. -class CORE_EXPORT CFlatPrefixTree -{ - public: - using TStrVec = std::vector; - using TStrVecCItr = TStrVec::const_iterator; - using TStrCItr = std::string::const_iterator; - using TStrCRItr = std::string::const_reverse_iterator; - - private: - struct SNode - { - //! See CMemory. - static bool dynamicSizeAlwaysZero() - { - return true; - } - - SNode(char c, char type, uint32_t next); - - bool operator<(char rhs) const; - char s_Char; - char s_Type; - uint32_t s_Next; - }; - - struct SDistinctChar - { - SDistinctChar(char c, char type, std::size_t start, std::size_t end); - - char s_Char; - char s_Type; - std::size_t s_Start; - std::size_t s_End; - }; - - private: - using TNodeVec = std::vector; - using TNodeVecCItr = TNodeVec::const_iterator; - using TDistinctCharVec = std::vector; - - public: - //! Default constructor. - CFlatPrefixTree(); - - //! Builds the tree from a list of \p prefixes. The \p prefixes - //! vector is required to be lexicographically sorted. - //! Returns true if the tree was build successfully. - bool build(const TStrVec &prefixes); - - //! Returns true if the \p key starts with a prefix present in the tree. - bool matchesStart(const std::string &key) const; - - //! Returns true if the \p key fully matches a prefix present in the tree. - bool matchesFully(const std::string &key) const; - - //! Returns true if the string described by \p start, \p end - //! starts with a prefix present in the tree. - bool matchesStart(TStrCItr start, TStrCItr end) const; - - //! Returns true if the string described by \p start, \p end - //! fully matches a prefix present in the tree. - bool matchesFully(TStrCItr start, TStrCItr end) const; - - //! Returns true if the string described by \p start, \p end - //! starts with a prefix present in the tree. - bool matchesStart(TStrCRItr start, TStrCRItr end) const; - - //! Returns true if the string described by \p start, \p end - //! fully matches a prefix present in the tree. - bool matchesFully(TStrCRItr start, TStrCRItr end) const; - - //! Clears the tree. - void clear(); - - //! Pretty-prints the tree. - std::string print() const; - private: - //! The recursive building helper. - void buildRecursively(const TStrVec &prefixes, - std::size_t prefixesStart, - std::size_t prefixesEnd, - std::size_t charPos); - - //! Extracts the distinct characters and stores it in \p distinctChars - //! along with the start and end index in the \p prefixes vector. - void extractDistinctCharacters(const TStrVec &prefixes, - std::size_t prefixesStart, - std::size_t prefixesEnd, - std::size_t charPos, - TDistinctCharVec &distinctChars); - - //! Implementation of the search algorithm. - template - bool matches(ITR start, ITR end, bool requireFullMatch) const; - private: - //! The vector representing the trie tree. - TNodeVec m_FlatTree; +class CORE_EXPORT CFlatPrefixTree { +public: + using TStrVec = std::vector; + using TStrVecCItr = TStrVec::const_iterator; + using TStrCItr = std::string::const_iterator; + using TStrCRItr = std::string::const_reverse_iterator; + +private: + struct SNode { + //! See CMemory. + static bool dynamicSizeAlwaysZero() { return true; } + + SNode(char c, char type, uint32_t next); + + bool operator<(char rhs) const; + char s_Char; + char s_Type; + uint32_t s_Next; + }; + + struct SDistinctChar { + SDistinctChar(char c, char type, std::size_t start, std::size_t end); + + char s_Char; + char s_Type; + std::size_t s_Start; + std::size_t s_End; + }; + +private: + using TNodeVec = std::vector; + using TNodeVecCItr = TNodeVec::const_iterator; + using TDistinctCharVec = std::vector; + +public: + //! Default constructor. + CFlatPrefixTree(); + + //! Builds the tree from a list of \p prefixes. The \p prefixes + //! vector is required to be lexicographically sorted. + //! Returns true if the tree was build successfully. + bool build(const TStrVec& prefixes); + + //! Returns true if the \p key starts with a prefix present in the tree. + bool matchesStart(const std::string& key) const; + + //! Returns true if the \p key fully matches a prefix present in the tree. + bool matchesFully(const std::string& key) const; + + //! Returns true if the string described by \p start, \p end + //! starts with a prefix present in the tree. + bool matchesStart(TStrCItr start, TStrCItr end) const; + + //! Returns true if the string described by \p start, \p end + //! fully matches a prefix present in the tree. + bool matchesFully(TStrCItr start, TStrCItr end) const; + + //! Returns true if the string described by \p start, \p end + //! starts with a prefix present in the tree. + bool matchesStart(TStrCRItr start, TStrCRItr end) const; + + //! Returns true if the string described by \p start, \p end + //! fully matches a prefix present in the tree. + bool matchesFully(TStrCRItr start, TStrCRItr end) const; + + //! Clears the tree. + void clear(); + + //! Pretty-prints the tree. + std::string print() const; + +private: + //! The recursive building helper. + void buildRecursively(const TStrVec& prefixes, std::size_t prefixesStart, std::size_t prefixesEnd, std::size_t charPos); + + //! Extracts the distinct characters and stores it in \p distinctChars + //! along with the start and end index in the \p prefixes vector. + void extractDistinctCharacters(const TStrVec& prefixes, + std::size_t prefixesStart, + std::size_t prefixesEnd, + std::size_t charPos, + TDistinctCharVec& distinctChars); + + //! Implementation of the search algorithm. + template + bool matches(ITR start, ITR end, bool requireFullMatch) const; + +private: + //! The vector representing the trie tree. + TNodeVec m_FlatTree; }; } } diff --git a/include/core/CFloatStorage.h b/include/core/CFloatStorage.h index 11ce8f96f9..e7b7d70062 100644 --- a/include/core/CFloatStorage.h +++ b/include/core/CFloatStorage.h @@ -15,19 +15,11 @@ #include #include -namespace ml -{ -namespace core -{ - -namespace -{ -const int MAX_PRECISE_INTEGER_FLOAT( - static_cast( - std::pow(10.0, - static_cast(std::numeric_limits::digits10)) - ) - 1 -); +namespace ml { +namespace core { + +namespace { +const int MAX_PRECISE_INTEGER_FLOAT(static_cast(std::pow(10.0, static_cast(std::numeric_limits::digits10))) - 1); } //! \brief This class should be used in place of float whenever @@ -66,123 +58,94 @@ const int MAX_PRECISE_INTEGER_FLOAT( //! //! Will use exactly one conversion from double to float to assign //! the value of a * b + 2.0 * c * c to d. -class CORE_EXPORT CFloatStorage -{ - public: - //! See core::CMemory. - static bool dynamicSizeAlwaysZero() { return true; } - - public: - //! Default construction of the floating point value. - CFloatStorage() : m_Value() {} - - //! Integer promotion. So one can write things like CFloatStorage(1). - CFloatStorage(int value) : m_Value(float(value)) - { -#ifdef CFLOATSTORAGE_BOUNDS_CHECK - if ( value > MAX_PRECISE_INTEGER_FLOAT - || -value < MAX_PRECISE_INTEGER_FLOAT) - { - LOG_WARN("Loss of precision assigning int " << value << " to float"); - } -#endif // CFLOATSTORAGE_BOUNDS_CHECK - } +class CORE_EXPORT CFloatStorage { +public: + //! See core::CMemory. + static bool dynamicSizeAlwaysZero() { return true; } - //! Implicit construction from a float. - CFloatStorage(float value) : m_Value(value) {} +public: + //! Default construction of the floating point value. + CFloatStorage() : m_Value() {} - //! Implicit construction from a double. - CFloatStorage(double value) : m_Value() - { - this->set(value); + //! Integer promotion. So one can write things like CFloatStorage(1). + CFloatStorage(int value) : m_Value(float(value)) { +#ifdef CFLOATSTORAGE_BOUNDS_CHECK + if (value > MAX_PRECISE_INTEGER_FLOAT || -value < MAX_PRECISE_INTEGER_FLOAT) { + LOG_WARN("Loss of precision assigning int " << value << " to float"); } +#endif // CFLOATSTORAGE_BOUNDS_CHECK + } - //! Set from a string. - bool fromString(const std::string &string) - { - double value; - if (CStringUtils::stringToType(string, value)) - { - this->set(value); - return true; - } - return false; - } + //! Implicit construction from a float. + CFloatStorage(float value) : m_Value(value) {} - //! Convert to a string. - std::string toString() const - { - return CStringUtils::typeToStringPrecise(static_cast(m_Value), - CIEEE754::E_SinglePrecision); - } + //! Implicit construction from a double. + CFloatStorage(double value) : m_Value() { this->set(value); } - //! \name Double Assignment - //@{ - //! Assign from a double. - CFloatStorage &operator=(double value) - { + //! Set from a string. + bool fromString(const std::string& string) { + double value; + if (CStringUtils::stringToType(string, value)) { this->set(value); - return *this; - } - //! Plus assign from double. - CFloatStorage &operator+=(double value) - { - this->set(static_cast(m_Value) + value); - return *this; + return true; } - //! Minus assign from double. - CFloatStorage &operator-=(double value) - { - this->set(static_cast(m_Value) - value); - return *this; - } - //! Multiply assign from double. - CFloatStorage &operator*=(double value) - { - this->set(static_cast(m_Value) * value); - return *this; + return false; + } + + //! Convert to a string. + std::string toString() const { return CStringUtils::typeToStringPrecise(static_cast(m_Value), CIEEE754::E_SinglePrecision); } + + //! \name Double Assignment + //@{ + //! Assign from a double. + CFloatStorage& operator=(double value) { + this->set(value); + return *this; + } + //! Plus assign from double. + CFloatStorage& operator+=(double value) { + this->set(static_cast(m_Value) + value); + return *this; + } + //! Minus assign from double. + CFloatStorage& operator-=(double value) { + this->set(static_cast(m_Value) - value); + return *this; + } + //! Multiply assign from double. + CFloatStorage& operator*=(double value) { + this->set(static_cast(m_Value) * value); + return *this; + } + //! Divide assign from double. + CFloatStorage& operator/=(double value) { + this->set(static_cast(m_Value) / value); + return *this; + } + //@} + + //! Implicit conversion to a double. + operator double() const { return static_cast(m_Value); } + +private: + //! Utility to actually set the floating point value. + void set(double value) { +#ifdef CFLOATSTORAGE_BOUNDS_CHECK + if (value > std::numeric_limits::max() || -value > std::numeric_limits::max()) { + LOG_WARN("Value overflows float " << value); } - //! Divide assign from double. - CFloatStorage &operator/=(double value) - { - this->set(static_cast(m_Value) / value); - return *this; + if (value < std::numeric_limits::min() && -value < std::numeric_limits::min()) { + LOG_WARN("Value underflows float " << value); + } else if (value < 100 * std::numeric_limits::min() && -value < 100 * std::numeric_limits::min()) { + LOG_WARN("Less than 3 s.f. precision retained for " << value); } - //@} - - //! Implicit conversion to a double. - operator double () const - { - return static_cast(m_Value); - } - - private: - //! Utility to actually set the floating point value. - void set(double value) - { -#ifdef CFLOATSTORAGE_BOUNDS_CHECK - if ( value > std::numeric_limits::max() - || -value > std::numeric_limits::max()) - { - LOG_WARN("Value overflows float " << value); - } - if ( value < std::numeric_limits::min() - && -value < std::numeric_limits::min()) - { - LOG_WARN("Value underflows float " << value); - } - else if ( value < 100 * std::numeric_limits::min() - && -value < 100 * std::numeric_limits::min()) - { - LOG_WARN("Less than 3 s.f. precision retained for " << value); - } #endif // CFLOATSTORAGE_BOUNDS_CHECK - m_Value = static_cast(value); - } - private: - float m_Value; -}; + m_Value = static_cast(value); + } +private: + float m_Value; +}; } } diff --git a/include/core/CFunctional.h b/include/core/CFunctional.h index 962ce0c748..8362b31909 100644 --- a/include/core/CFunctional.h +++ b/include/core/CFunctional.h @@ -13,68 +13,57 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { //! \brief Useful extensions to the std:: functional collection of types. -class CORE_EXPORT CFunctional : CNonInstantiatable -{ - public: - //! \brief Checks is a nullable type is null. - struct CORE_EXPORT SIsNull - { - template - bool operator()(const T *ptr) const - { - return ptr == 0; - } +class CORE_EXPORT CFunctional : CNonInstantiatable { +public: + //! \brief Checks is a nullable type is null. + struct CORE_EXPORT SIsNull { + template + bool operator()(const T* ptr) const { + return ptr == 0; + } - template - bool operator()(const boost::optional &optional) const - { - return !optional; - } + template + bool operator()(const boost::optional& optional) const { + return !optional; + } - template - bool operator()(boost::shared_ptr &ptr) const - { - return ptr == 0; - } - }; + template + bool operator()(boost::shared_ptr& ptr) const { + return ptr == 0; + } + }; - //! \brief Dereferences objects which support a unary operator * - //! and calls the predicate \p PRED on them. - template - struct SDereference - { - SDereference(const PRED &pred = PRED()) : s_Pred(pred) {} + //! \brief Dereferences objects which support a unary operator * + //! and calls the predicate \p PRED on them. + template + struct SDereference { + SDereference(const PRED& pred = PRED()) : s_Pred(pred) {} - //! Version for unary predicates. - //! - //! \note SFINAE means this won't be a problem even if PRED - //! is a unary predicate. - template - inline bool operator()(const T &ptr) const - { - return s_Pred(*ptr); - } + //! Version for unary predicates. + //! + //! \note SFINAE means this won't be a problem even if PRED + //! is a unary predicate. + template + inline bool operator()(const T& ptr) const { + return s_Pred(*ptr); + } - //! Version for binary predicates. - //! - //! \note SFINAE means this won't be a problem even if PRED - //! is a unary predicate. - template - inline bool operator()(const U &lhs, const V &rhs) const - { - return s_Pred(*lhs, *rhs); - } + //! Version for binary predicates. + //! + //! \note SFINAE means this won't be a problem even if PRED + //! is a unary predicate. + template + inline bool operator()(const U& lhs, const V& rhs) const { + return s_Pred(*lhs, *rhs); + } - PRED s_Pred; - }; + PRED s_Pred; + }; }; - } } diff --git a/include/core/CGmTimeR.h b/include/core/CGmTimeR.h index 23bcff2f20..bf9391572d 100644 --- a/include/core/CGmTimeR.h +++ b/include/core/CGmTimeR.h @@ -11,12 +11,8 @@ #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! Portable wrapper for the gmtime_r() function. @@ -29,16 +25,11 @@ namespace core //! gmtime_s() function with slightly different semantics to Unix's //! gmtime_r(). //! -class CORE_EXPORT CGmTimeR : private CNonInstantiatable -{ - public: - static struct tm *gmTimeR(const time_t *clock, - struct tm *result); +class CORE_EXPORT CGmTimeR : private CNonInstantiatable { +public: + static struct tm* gmTimeR(const time_t* clock, struct tm* result); }; - - } } #endif // INCLUDED_ml_core_CGmTimeR_h - diff --git a/include/core/CHashing.h b/include/core/CHashing.h index 9aef0d6770..7607385d8e 100644 --- a/include/core/CHashing.h +++ b/include/core/CHashing.h @@ -21,11 +21,8 @@ #include - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { //! \brief Hashing functionality. //! @@ -33,466 +30,414 @@ namespace core //! This is a place holder for useful hashing functionality. In particular, //! it implements various universal hashing schemes, some high quality hash //! functions (near cryptographic, but much faster). -class CORE_EXPORT CHashing : private CNonInstantiatable -{ +class CORE_EXPORT CHashing : private CNonInstantiatable { +public: + //! Implements universal hashing on integers. + //! + //! See http://en.wikipedia.org/wiki/Universal_hashing for discussion. + //! + //! \warning The hashes use the prime 4294967291 so rely on the fact + //! that integers being hashed are smaller than this. This should be + //! sufficient for our use cases! + class CORE_EXPORT CUniversalHash { public: - //! Implements universal hashing on integers. - //! - //! See http://en.wikipedia.org/wiki/Universal_hashing for discussion. + using TUInt32Vec = std::vector; + + public: + //! A member of the universal (2-independent) hash family on + //! integers (Carter and Wegman): + //!
+        //!   \f$\displaystyle f_{a,b}(x) = ((ax + b)\ \textup{mod}\ p)\ \textup{mod}\ m\f$
+        //! 
//! - //! \warning The hashes use the prime 4294967291 so rely on the fact - //! that integers being hashed are smaller than this. This should be - //! sufficient for our use cases! - class CORE_EXPORT CUniversalHash - { - public: - using TUInt32Vec = std::vector; - - public: - //! A member of the universal (2-independent) hash family on - //! integers (Carter and Wegman): - //!
-                //!   \f$\displaystyle f_{a,b}(x) = ((ax + b)\ \textup{mod}\ p)\ \textup{mod}\ m\f$
-                //! 
- //! - //! \note This is not the fastest implementation of universal - //! hashing which is the multiply-shift scheme. We can revisit - //! the implementation if this is a bottleneck in practice. - class CORE_EXPORT CUInt32Hash - { - public: - //! See CMemory. - static bool dynamicSizeAlwaysZero() { return true; } - - public: - CUInt32Hash(); - CUInt32Hash(uint32_t m, uint32_t a, uint32_t b); - - //! Get the range. - uint32_t m() const; - - //! Get the multiplier. - uint32_t a() const; - - //! Get the offset. - uint32_t b() const; - - //! \note This is implemented inline in contravention to - //! the coding standards because we definitely don't want - //! the cost of a function call here. - uint32_t operator()(uint32_t x) const - { - // Note by construction: - // a * x + b < p^2 + p < 2^64 - return static_cast((( static_cast(m_A) * x - + static_cast(m_B)) % BIG_PRIME) - % static_cast(m_M)); - } - - //! Print the hash function for debug. - std::string print() const; - - private: - uint32_t m_M, m_A, m_B; - }; - - using TUInt32HashVec = std::vector; - - //! A lightweight implementation universal (2-independent) on - //! 32-bit integers. This doesn't further restrict the range - //! of the hash so avoids storing an extra integer and doing - //! modulo the range. - class CORE_EXPORT CUInt32UnrestrictedHash - { - public: - //! See CMemory. - static bool dynamicSizeAlwaysZero() { return true; } - - public: - CUInt32UnrestrictedHash(); - CUInt32UnrestrictedHash(uint32_t a, uint32_t b); - - //! Get the multiplier. - uint32_t a() const; - - //! Get the offset. - uint32_t b() const; - - //! \note This is implemented inline in contravention to - //! the coding standards because we definitely don't want - //! the cost of a function call here. - uint32_t operator()(uint32_t x) const - { - // Note by construction: - // a * x + b < p^2 + p < 2^64 - return static_cast(( static_cast(m_A) * x - + static_cast(m_B)) % BIG_PRIME); - } - - //! Print the hash function for debug. - std::string print() const; - - private: - uint32_t m_A, m_B; - }; - - using TUInt32UnrestrictedHashVec = std::vector; - - //! A member of the universal (2-independent) hash family on - //! vectors of integers (Carter and Wegman): - //!
-                //!   \f$\displaystyle f(\{x_i\}) = \sum_i{h_i(x_i)}\ \textup{mod}\ m\f$
-                //! 
- //! - //! Here, \f$h_i(.)\f$ are independent samples from a universal - //! family of hash functions. - //! - //! \note This is not the fastest implementation of universal - //! hashing which is the multiply-shift scheme. We can revisit - //! the implementation if this is a bottleneck in practice. - class CORE_EXPORT CUInt32VecHash - { - public: - CUInt32VecHash(uint32_t m, const TUInt32Vec &a, uint32_t b); - - //! Get the range. - uint32_t m() const; - - //! Get the multipliers. - const TUInt32Vec &a() const; - - //! Get the offset. - uint32_t b() const; - - //! Overload for case our vector has two elements to - //! avoid overhead of creating a vector to hash. - //! - //! \note This is implemented inline in contravention to - //! the coding standards because we definitely don't want - //! the cost of a function call here. - uint32_t operator()(uint32_t x1, uint32_t x2) const - { - // Note by construction: - // (a(1) * x(1)) mod p + a(2) * x(2) + b - // < p^2 + 2*p - // < 2^64 - uint64_t h = (static_cast(m_A[0]) * x1) % BIG_PRIME - + static_cast(m_A[1]) * x2; - return static_cast(((h + static_cast(m_B)) % BIG_PRIME) - % static_cast(m_M)); - } - - //! \note This is implemented inline in contravention to - //! the coding standards because we definitely don't want - //! the cost of a function call here. - uint32_t operator()(const TUInt32Vec &x) const - { - // Note we variously use that: - // a(1) * x(1) - // < h mod p + a(i) * x(i) - // < h mod p + a(n) * x(n) + b - // < p^2 + 2*p - // < 2^64 - uint64_t h = static_cast(m_A[0]) * x[0]; - for (std::size_t i = 1u; i < x.size(); ++i) - { - h = ( h % BIG_PRIME - + static_cast(m_A[i]) * x[i]); - } - return static_cast(((h + static_cast(m_B)) % BIG_PRIME) - % static_cast(m_M)); - } - - //! Print the hash function for debug. - std::string print() const; - - private: - uint32_t m_M; - TUInt32Vec m_A; - uint32_t m_B; - }; - - using TUInt32VecHashVec = std::vector; - - //! Converts hash function objects to a string. - class CORE_EXPORT CToString - { - public: - CToString(const char delimiter); - - std::string operator()(const CUInt32UnrestrictedHash &hash) const; - std::string operator()(const CUInt32Hash &hash) const; - - private: - char m_Delimiter; - }; - - //! Initializes hash function objects from a string. - class CORE_EXPORT CFromString - { - public: - CFromString(const char delimiter); - - bool operator()(const std::string &token, - CUInt32UnrestrictedHash &hash) const; - bool operator()(const std::string &token, CUInt32Hash &hash) const; - - private: - char m_Delimiter; - mutable std::string m_Token; - }; - - public: - //! We choose a prime just a smaller than \f$2^{32}\f$. Note - //! that if unsigned integer multiplication overflows the result - //! is returned modulo \f$2^{64}\f$ for 64 bit integers however: - //!
-                //!   \f$(ax + b)\textup{ mod }p \neq ((ax + b)\textup{ mod }2^{64})\textup{ mod }p\f$
-                //! 
- //! - //! So in order to guaranty that the hash functions belong - //! to a universal family for all possible universes we need - //! to avoid overflow (or explicitly handle it). We choose - //! to almost always guaranty it by choosing:\n - //!
-                //!   \f$p = 4294967291\f$
-                //! 
- //! - //! which is the largest prime less than \f$2^{32}\f$. See: - //! http://www.prime-numbers.org/prime-number-4294965000-4294970000.htm - //! and by make use of the fact that we can take the mod at any - //! time because if two integers are equal modulo \f$p\f$ they - //! are identical (in \f$Z/p\f$) so can be interchanged in any - //! statement which is true for one or the other. For most - //! applications we'll be mapping unique values (client ids, etc) - //! to the first n integers so 4294967291 will be plenty big enough - //! for our universes. - static const uint64_t BIG_PRIME; - - //! Generate k independent samples of the 32 bit integer universal - //! hash functions: - //!
-                //!   \f$\displaystyle h_{a,b}\ :\ U \rightarrow [m-1]\f$
-                //! 
- //! - //! \param k The number of hash functions. - //! \param m The range of the hash functions. - //! \param result Filled in with the sampled hash functions. - static void generateHashes(std::size_t k, - uint32_t m, - TUInt32HashVec &result); - - //! Generate k independent samples of the 32 bit integer universal - //! hash functions: - //!
-                //!   \f$\displaystyle h_{a,b}\ :\ U \rightarrow [2^{32}]\f$
-                //! 
- //! - //! \param k The number of hash functions. - //! \param result Filled in with the sampled hash functions. - static void generateHashes(std::size_t k, - TUInt32UnrestrictedHashVec &result); - - //! Generate k independent samples of the 32 bit integer vector - //! universal hash functions: - //!
-                //!   \f$\displaystyle h\ :\ U \rightarrow [m-1]\f$
-                //! 
- //! - //! \param k The number of hash functions. - //! \param n The size of vectors to hash. - //! \param m The range of the hash functions. - //! \param result Filled in with the sampled hash functions. - static void generateHashes(std::size_t k, - std::size_t n, - uint32_t m, - TUInt32VecHashVec &result); - - private: - //! Our random number generator for sampling hash function. - static boost::random::mt11213b ms_Generator; - - //! Used by generateHashes to protect non thread safe calls - //! to the random number generator. - static CFastMutex ms_Mutex; + //! \note This is not the fastest implementation of universal + //! hashing which is the multiply-shift scheme. We can revisit + //! the implementation if this is a bottleneck in practice. + class CORE_EXPORT CUInt32Hash { + public: + //! See CMemory. + static bool dynamicSizeAlwaysZero() { return true; } + + public: + CUInt32Hash(); + CUInt32Hash(uint32_t m, uint32_t a, uint32_t b); + + //! Get the range. + uint32_t m() const; + + //! Get the multiplier. + uint32_t a() const; + + //! Get the offset. + uint32_t b() const; + + //! \note This is implemented inline in contravention to + //! the coding standards because we definitely don't want + //! the cost of a function call here. + uint32_t operator()(uint32_t x) const { + // Note by construction: + // a * x + b < p^2 + p < 2^64 + return static_cast(((static_cast(m_A) * x + static_cast(m_B)) % BIG_PRIME) % + static_cast(m_M)); + } + + //! Print the hash function for debug. + std::string print() const; + + private: + uint32_t m_M, m_A, m_B; }; - //! MurmurHash2: fast 32-bit hash. - //! - //! This is very close to Austin Appleby's optimized implementation - //! of the MurmurHash2 function (which is now in the public domain). - //! This is neither endian neutral nor alignment safe. If you are - //! going to use this version you must either be confident or check - //! that the address of \p key can be safely read. For addressable - //! values this will be the case, but might for example fail if you - //! pass in a pointer to the middle of a string. Note that hashing - //! whole strings will be fine. If you need to check alignment, on - //! a 32-bit platform it amounts to checking that: - //! \code - //! reinterpret_cast(key) & 0x3 == 0 - //! \endcode - //! - //! Furthermore, you should not serialize the hashed values because - //! they will be different on machines with different endian - //! conventions. If you aren't sure that you can safely use this - //! version then use safeMurmurHash32. - static uint32_t murmurHash32(const void *key, - int length, - uint32_t seed); - - //! MurmurHash2: safe 32-bit hash. - //! - //! This is very close to Austin Appleby's neutral implementation - //! of MurmurHash2 (which is now in the public domain). This is - //! both alignment safe and endian neutral. I have factored this - //! out from our fastest MurmurHash2 implementation because I - //! don't want the result of hashing to depend on the address of - //! the object which it would if we tried to mix the two approaches - //! and check alignment. - static uint32_t safeMurmurHash32(const void *key, - int length, - uint32_t seed); - - //! MurmurHash2: fast 64-bit hash. - //! - //! This is adapted from Austin Appleby's optimized implementation - //! of the 32 bit MurmurHash2 function (which is now in the public - //! domain). This is neither endian neutral nor alignment safe. If - //! you are going to use this version you must either be confident - //! or check that the address of \p key can be safely read. For - //! addressable values this will be the case, but might for example - //! fail if you pass in a pointer to the middle of a string. Note - //! that hashing whole strings will be fine. If you need to check - //! alignment, on a 64-bit platform it amounts to checking that: - //! \code - //! reinterpret_cast(key) & 0x7 == 0 - //! \endcode - //! - //! Furthermore, you should not serialize the hashed values because - //! they will be different on machines with different endian - //! conventions. If you aren't sure that you can safely use this - //! version then use safeMurmurHash64. - static uint64_t murmurHash64(const void *key, - int length, - uint64_t seed); - - //! MurmurHash2: safe 64-bit hash. + using TUInt32HashVec = std::vector; + + //! A lightweight implementation universal (2-independent) on + //! 32-bit integers. This doesn't further restrict the range + //! of the hash so avoids storing an extra integer and doing + //! modulo the range. + class CORE_EXPORT CUInt32UnrestrictedHash { + public: + //! See CMemory. + static bool dynamicSizeAlwaysZero() { return true; } + + public: + CUInt32UnrestrictedHash(); + CUInt32UnrestrictedHash(uint32_t a, uint32_t b); + + //! Get the multiplier. + uint32_t a() const; + + //! Get the offset. + uint32_t b() const; + + //! \note This is implemented inline in contravention to + //! the coding standards because we definitely don't want + //! the cost of a function call here. + uint32_t operator()(uint32_t x) const { + // Note by construction: + // a * x + b < p^2 + p < 2^64 + return static_cast((static_cast(m_A) * x + static_cast(m_B)) % BIG_PRIME); + } + + //! Print the hash function for debug. + std::string print() const; + + private: + uint32_t m_A, m_B; + }; + + using TUInt32UnrestrictedHashVec = std::vector; + + //! A member of the universal (2-independent) hash family on + //! vectors of integers (Carter and Wegman): + //!
+        //!   \f$\displaystyle f(\{x_i\}) = \sum_i{h_i(x_i)}\ \textup{mod}\ m\f$
+        //! 
//! - //! This is adapted from Austin Appleby's neutral implementation - //! of the 32-bit MurmurHash2 (which is now in the public domain). - //! This is both alignment safe and endian neutral. I have factored - //! this out from our fastest MurmurHash2 implementation because I - //! don't want the result of hashing to depend on the address of - //! the object, which it would if we tried to mix the two approaches - //! and check alignment. - static uint64_t safeMurmurHash64(const void *key, - int length, - uint64_t seed); - - //! Wrapper for murmur hash to use with basic types. + //! Here, \f$h_i(.)\f$ are independent samples from a universal + //! family of hash functions. //! - //! \warning This is slower than boost::hash for the types I tested - //! std::size_t, int, uint64_t, but does have better distributions. - template - class CMurmurHash2BT : public std::unary_function - { - public: - //! See CMemory. - static bool dynamicSizeAlwaysZero() { return true; } - - public: - CMurmurHash2BT(std::size_t seed = 0x5bd1e995) : - m_Seed(seed) - { + //! \note This is not the fastest implementation of universal + //! hashing which is the multiply-shift scheme. We can revisit + //! the implementation if this is a bottleneck in practice. + class CORE_EXPORT CUInt32VecHash { + public: + CUInt32VecHash(uint32_t m, const TUInt32Vec& a, uint32_t b); + + //! Get the range. + uint32_t m() const; + + //! Get the multipliers. + const TUInt32Vec& a() const; + + //! Get the offset. + uint32_t b() const; + + //! Overload for case our vector has two elements to + //! avoid overhead of creating a vector to hash. + //! + //! \note This is implemented inline in contravention to + //! the coding standards because we definitely don't want + //! the cost of a function call here. + uint32_t operator()(uint32_t x1, uint32_t x2) const { + // Note by construction: + // (a(1) * x(1)) mod p + a(2) * x(2) + b + // < p^2 + 2*p + // < 2^64 + uint64_t h = (static_cast(m_A[0]) * x1) % BIG_PRIME + static_cast(m_A[1]) * x2; + return static_cast(((h + static_cast(m_B)) % BIG_PRIME) % static_cast(m_M)); + } + + //! \note This is implemented inline in contravention to + //! the coding standards because we definitely don't want + //! the cost of a function call here. + uint32_t operator()(const TUInt32Vec& x) const { + // Note we variously use that: + // a(1) * x(1) + // < h mod p + a(i) * x(i) + // < h mod p + a(n) * x(n) + b + // < p^2 + 2*p + // < 2^64 + uint64_t h = static_cast(m_A[0]) * x[0]; + for (std::size_t i = 1u; i < x.size(); ++i) { + h = (h % BIG_PRIME + static_cast(m_A[i]) * x[i]); } + return static_cast(((h + static_cast(m_B)) % BIG_PRIME) % static_cast(m_M)); + } - std::size_t operator()(const T &key) const; + //! Print the hash function for debug. + std::string print() const; - private: - std::size_t m_Seed; + private: + uint32_t m_M; + TUInt32Vec m_A; + uint32_t m_B; }; - //! Wrapper for murmur hash to use with std::string. - //! - //! \note This is significantly faster than boost::hash - //! and has better distributions. - class CORE_EXPORT CMurmurHash2String : public std::unary_function - { - public: - //! See CMemory. - static bool dynamicSizeAlwaysZero() { return true; } - using TStrCRef = boost::reference_wrapper; - - public: - CMurmurHash2String(std::size_t seed = 0x5bd1e995) : m_Seed(seed) {} - - std::size_t operator()(const std::string &key) const; - std::size_t operator()(TStrCRef key) const - { - return this->operator()(key.get()); - } - std::size_t operator()(const CStoredStringPtr &key) const - { - if (key) - { - return this->operator()(*key); - } - return m_Seed; - } + using TUInt32VecHashVec = std::vector; - private: - std::size_t m_Seed; + //! Converts hash function objects to a string. + class CORE_EXPORT CToString { + public: + CToString(const char delimiter); + + std::string operator()(const CUInt32UnrestrictedHash& hash) const; + std::string operator()(const CUInt32Hash& hash) const; + + private: + char m_Delimiter; }; - //! Wrapper for murmur hash to use with std::string in cases where a 64 - //! bit hash is required rather than a machine word size hash. An - //! example would be where the hash value somehow affects data that is - //! visible outside the program, such as state persisted to a data - //! store. This is also immune to endianness issues. - class CORE_EXPORT CSafeMurmurHash2String64 : public std::unary_function - { - public: - //! See CMemory. - static bool dynamicSizeAlwaysZero() { return true; } - using TStrCRef = boost::reference_wrapper; - - public: - CSafeMurmurHash2String64(uint64_t seed = 0x5bd1e995) : m_Seed(seed) {} - - uint64_t operator()(const std::string &key) const; - std::size_t operator()(TStrCRef key) const - { - return this->operator()(key.get()); - } - std::size_t operator()(const CStoredStringPtr &key) const - { - if (key) - { - return this->operator()(*key); - } - return m_Seed; - } + //! Initializes hash function objects from a string. + class CORE_EXPORT CFromString { + public: + CFromString(const char delimiter); + + bool operator()(const std::string& token, CUInt32UnrestrictedHash& hash) const; + bool operator()(const std::string& token, CUInt32Hash& hash) const; - private: - uint64_t m_Seed; + private: + char m_Delimiter; + mutable std::string m_Token; }; - //! 32 bit hash combine modeled on boost::hash_combine. - static uint32_t hashCombine(uint32_t seed, uint32_t h); + public: + //! We choose a prime just a smaller than \f$2^{32}\f$. Note + //! that if unsigned integer multiplication overflows the result + //! is returned modulo \f$2^{64}\f$ for 64 bit integers however: + //!
+        //!   \f$(ax + b)\textup{ mod }p \neq ((ax + b)\textup{ mod }2^{64})\textup{ mod }p\f$
+        //! 
+ //! + //! So in order to guaranty that the hash functions belong + //! to a universal family for all possible universes we need + //! to avoid overflow (or explicitly handle it). We choose + //! to almost always guaranty it by choosing:\n + //!
+        //!   \f$p = 4294967291\f$
+        //! 
+ //! + //! which is the largest prime less than \f$2^{32}\f$. See: + //! http://www.prime-numbers.org/prime-number-4294965000-4294970000.htm + //! and by make use of the fact that we can take the mod at any + //! time because if two integers are equal modulo \f$p\f$ they + //! are identical (in \f$Z/p\f$) so can be interchanged in any + //! statement which is true for one or the other. For most + //! applications we'll be mapping unique values (client ids, etc) + //! to the first n integers so 4294967291 will be plenty big enough + //! for our universes. + static const uint64_t BIG_PRIME; + + //! Generate k independent samples of the 32 bit integer universal + //! hash functions: + //!
+        //!   \f$\displaystyle h_{a,b}\ :\ U \rightarrow [m-1]\f$
+        //! 
+ //! + //! \param k The number of hash functions. + //! \param m The range of the hash functions. + //! \param result Filled in with the sampled hash functions. + static void generateHashes(std::size_t k, uint32_t m, TUInt32HashVec& result); + + //! Generate k independent samples of the 32 bit integer universal + //! hash functions: + //!
+        //!   \f$\displaystyle h_{a,b}\ :\ U \rightarrow [2^{32}]\f$
+        //! 
+ //! + //! \param k The number of hash functions. + //! \param result Filled in with the sampled hash functions. + static void generateHashes(std::size_t k, TUInt32UnrestrictedHashVec& result); + + //! Generate k independent samples of the 32 bit integer vector + //! universal hash functions: + //!
+        //!   \f$\displaystyle h\ :\ U \rightarrow [m-1]\f$
+        //! 
+ //! + //! \param k The number of hash functions. + //! \param n The size of vectors to hash. + //! \param m The range of the hash functions. + //! \param result Filled in with the sampled hash functions. + static void generateHashes(std::size_t k, std::size_t n, uint32_t m, TUInt32VecHashVec& result); + + private: + //! Our random number generator for sampling hash function. + static boost::random::mt11213b ms_Generator; + + //! Used by generateHashes to protect non thread safe calls + //! to the random number generator. + static CFastMutex ms_Mutex; + }; + + //! MurmurHash2: fast 32-bit hash. + //! + //! This is very close to Austin Appleby's optimized implementation + //! of the MurmurHash2 function (which is now in the public domain). + //! This is neither endian neutral nor alignment safe. If you are + //! going to use this version you must either be confident or check + //! that the address of \p key can be safely read. For addressable + //! values this will be the case, but might for example fail if you + //! pass in a pointer to the middle of a string. Note that hashing + //! whole strings will be fine. If you need to check alignment, on + //! a 32-bit platform it amounts to checking that: + //! \code + //! reinterpret_cast(key) & 0x3 == 0 + //! \endcode + //! + //! Furthermore, you should not serialize the hashed values because + //! they will be different on machines with different endian + //! conventions. If you aren't sure that you can safely use this + //! version then use safeMurmurHash32. + static uint32_t murmurHash32(const void* key, int length, uint32_t seed); + + //! MurmurHash2: safe 32-bit hash. + //! + //! This is very close to Austin Appleby's neutral implementation + //! of MurmurHash2 (which is now in the public domain). This is + //! both alignment safe and endian neutral. I have factored this + //! out from our fastest MurmurHash2 implementation because I + //! don't want the result of hashing to depend on the address of + //! the object which it would if we tried to mix the two approaches + //! and check alignment. + static uint32_t safeMurmurHash32(const void* key, int length, uint32_t seed); + + //! MurmurHash2: fast 64-bit hash. + //! + //! This is adapted from Austin Appleby's optimized implementation + //! of the 32 bit MurmurHash2 function (which is now in the public + //! domain). This is neither endian neutral nor alignment safe. If + //! you are going to use this version you must either be confident + //! or check that the address of \p key can be safely read. For + //! addressable values this will be the case, but might for example + //! fail if you pass in a pointer to the middle of a string. Note + //! that hashing whole strings will be fine. If you need to check + //! alignment, on a 64-bit platform it amounts to checking that: + //! \code + //! reinterpret_cast(key) & 0x7 == 0 + //! \endcode + //! + //! Furthermore, you should not serialize the hashed values because + //! they will be different on machines with different endian + //! conventions. If you aren't sure that you can safely use this + //! version then use safeMurmurHash64. + static uint64_t murmurHash64(const void* key, int length, uint64_t seed); + + //! MurmurHash2: safe 64-bit hash. + //! + //! This is adapted from Austin Appleby's neutral implementation + //! of the 32-bit MurmurHash2 (which is now in the public domain). + //! This is both alignment safe and endian neutral. I have factored + //! this out from our fastest MurmurHash2 implementation because I + //! don't want the result of hashing to depend on the address of + //! the object, which it would if we tried to mix the two approaches + //! and check alignment. + static uint64_t safeMurmurHash64(const void* key, int length, uint64_t seed); + + //! Wrapper for murmur hash to use with basic types. + //! + //! \warning This is slower than boost::hash for the types I tested + //! std::size_t, int, uint64_t, but does have better distributions. + template + class CMurmurHash2BT : public std::unary_function { + public: + //! See CMemory. + static bool dynamicSizeAlwaysZero() { return true; } + + public: + CMurmurHash2BT(std::size_t seed = 0x5bd1e995) : m_Seed(seed) {} + + std::size_t operator()(const T& key) const; - //! 64 bit hash combine modeled on boost::hash_combine. - static uint64_t hashCombine(uint64_t seed, uint64_t h); + private: + std::size_t m_Seed; + }; + + //! Wrapper for murmur hash to use with std::string. + //! + //! \note This is significantly faster than boost::hash + //! and has better distributions. + class CORE_EXPORT CMurmurHash2String : public std::unary_function { + public: + //! See CMemory. + static bool dynamicSizeAlwaysZero() { return true; } + using TStrCRef = boost::reference_wrapper; + + public: + CMurmurHash2String(std::size_t seed = 0x5bd1e995) : m_Seed(seed) {} + + std::size_t operator()(const std::string& key) const; + std::size_t operator()(TStrCRef key) const { return this->operator()(key.get()); } + std::size_t operator()(const CStoredStringPtr& key) const { + if (key) { + return this->operator()(*key); + } + return m_Seed; + } + + private: + std::size_t m_Seed; + }; + + //! Wrapper for murmur hash to use with std::string in cases where a 64 + //! bit hash is required rather than a machine word size hash. An + //! example would be where the hash value somehow affects data that is + //! visible outside the program, such as state persisted to a data + //! store. This is also immune to endianness issues. + class CORE_EXPORT CSafeMurmurHash2String64 : public std::unary_function { + public: + //! See CMemory. + static bool dynamicSizeAlwaysZero() { return true; } + using TStrCRef = boost::reference_wrapper; + + public: + CSafeMurmurHash2String64(uint64_t seed = 0x5bd1e995) : m_Seed(seed) {} + + uint64_t operator()(const std::string& key) const; + std::size_t operator()(TStrCRef key) const { return this->operator()(key.get()); } + std::size_t operator()(const CStoredStringPtr& key) const { + if (key) { + return this->operator()(*key); + } + return m_Seed; + } + + private: + uint64_t m_Seed; + }; + + //! 32 bit hash combine modeled on boost::hash_combine. + static uint32_t hashCombine(uint32_t seed, uint32_t h); + + //! 64 bit hash combine modeled on boost::hash_combine. + static uint64_t hashCombine(uint64_t seed, uint64_t h); }; -namespace hash_detail -{ +namespace hash_detail { //! Selects MurmurHash2 32-bit implementation by default. template -struct SMurmurHashForArchitecture -{ - static std::size_t hash(const void *key, int length, std::size_t seed) - { +struct SMurmurHashForArchitecture { + static std::size_t hash(const void* key, int length, std::size_t seed) { return static_cast(CHashing::murmurHash32(key, length, static_cast(seed))); } }; @@ -501,46 +446,25 @@ struct SMurmurHashForArchitecture //! //! If we are on 64-bit platforms the 64-bit implementation is faster. template<> -struct SMurmurHashForArchitecture<8> -{ - static std::size_t hash(const void *key, - int length, - std::size_t seed) - { +struct SMurmurHashForArchitecture<8> { + static std::size_t hash(const void* key, int length, std::size_t seed) { return static_cast(CHashing::murmurHash64(key, length, seed)); } }; - } template -inline -std::size_t CHashing::CMurmurHash2BT::operator()(const T &key) const -{ - return hash_detail::SMurmurHashForArchitecture::hash( - &key, - static_cast(sizeof(key)), - m_Seed); +inline std::size_t CHashing::CMurmurHash2BT::operator()(const T& key) const { + return hash_detail::SMurmurHashForArchitecture::hash(&key, static_cast(sizeof(key)), m_Seed); } -inline -std::size_t CHashing::CMurmurHash2String::operator()(const std::string &key) const -{ - return hash_detail::SMurmurHashForArchitecture::hash( - key.data(), - static_cast(key.size()), - m_Seed); +inline std::size_t CHashing::CMurmurHash2String::operator()(const std::string& key) const { + return hash_detail::SMurmurHashForArchitecture::hash(key.data(), static_cast(key.size()), m_Seed); } -inline -uint64_t CHashing::CSafeMurmurHash2String64::operator()(const std::string &key) const -{ - return CHashing::safeMurmurHash64( - key.data(), - static_cast(key.size()), - m_Seed); +inline uint64_t CHashing::CSafeMurmurHash2String64::operator()(const std::string& key) const { + return CHashing::safeMurmurHash64(key.data(), static_cast(key.size()), m_Seed); } - } } diff --git a/include/core/CHexUtils.h b/include/core/CHexUtils.h index 604e0f8fd7..f1b476f931 100644 --- a/include/core/CHexUtils.h +++ b/include/core/CHexUtils.h @@ -13,12 +13,8 @@ #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! Print out binary data in hex format. @@ -32,49 +28,38 @@ namespace core //! Can be used with STL streams, or with a simple static dump() //! function. //! -class CORE_EXPORT CHexUtils -{ - public: - using TDataVec = std::vector; +class CORE_EXPORT CHexUtils { +public: + using TDataVec = std::vector; - public: - //! Construct an object of this class, which can then be output to a - //! stream - only a shallow copy is done, so the data must exist for - //! the lifetime of the object - CHexUtils(const uint8_t *pkt, - size_t pktLen, - bool printHeader = true, - bool printAscii = true); - CHexUtils(const TDataVec &data, - bool printHeader = true, - bool printAscii = true); +public: + //! Construct an object of this class, which can then be output to a + //! stream - only a shallow copy is done, so the data must exist for + //! the lifetime of the object + CHexUtils(const uint8_t* pkt, size_t pktLen, bool printHeader = true, bool printAscii = true); + CHexUtils(const TDataVec& data, bool printHeader = true, bool printAscii = true); - //! Dump a packet of given length to stdout - static void dump(const uint8_t *pkt, size_t pktLen); + //! Dump a packet of given length to stdout + static void dump(const uint8_t* pkt, size_t pktLen); - private: - //! Pointer to raw data - we don't own this - const uint8_t *m_Pkt; +private: + //! Pointer to raw data - we don't own this + const uint8_t* m_Pkt; - //! Packet length - size_t m_PktLen; + //! Packet length + size_t m_PktLen; - //! Should we print a header? - bool m_PrintHeader; + //! Should we print a header? + bool m_PrintHeader; - //! Should we the raw ASCII (where possible) next to the hex? - bool m_PrintAscii; + //! Should we the raw ASCII (where possible) next to the hex? + bool m_PrintAscii; - friend CORE_EXPORT std::ostream &operator<<(std::ostream &, const CHexUtils &); + friend CORE_EXPORT std::ostream& operator<<(std::ostream&, const CHexUtils&); }; - -CORE_EXPORT std::ostream &operator<<(std::ostream &strm, const CHexUtils &hex); - - +CORE_EXPORT std::ostream& operator<<(std::ostream& strm, const CHexUtils& hex); } } - #endif // INCLUDED_ml_core_CHexUtils_h - diff --git a/include/core/CIEEE754.h b/include/core/CIEEE754.h index 5ff45ed6e9..75a102b23b 100644 --- a/include/core/CIEEE754.h +++ b/include/core/CIEEE754.h @@ -12,72 +12,60 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { //! \brief A collection of utility functionality that understands //! IEEE 754 floating point representation. -class CORE_EXPORT CIEEE754 -{ - public: - //! Enumeration of possible precision types: - //! -# Half precision: 10 bit mantissa, 5 bit exponent, 1 sign bit. - //! -# Single precision: 23 bit mantissa, 8 bit exponent, 1 sign bit. - //! -# Double precision: 52 bit mantissa, 11 bit exponent, 1 sign bit. - enum EPrecision - { - E_HalfPrecision, - E_SinglePrecision, - E_DoublePrecision - }; +class CORE_EXPORT CIEEE754 { +public: + //! Enumeration of possible precision types: + //! -# Half precision: 10 bit mantissa, 5 bit exponent, 1 sign bit. + //! -# Single precision: 23 bit mantissa, 8 bit exponent, 1 sign bit. + //! -# Double precision: 52 bit mantissa, 11 bit exponent, 1 sign bit. + enum EPrecision { E_HalfPrecision, E_SinglePrecision, E_DoublePrecision }; - //! This emulates rounding to a specified precision (based on the number - //! of bits in the mantissa corresponding to \p precision). It uses round - //! to nearest ties away from zero. It never converts the values to less - //! than double precision so leaves the exponent unmodified and handles - //! the case that the exponent would overflow. - static double round(double value, EPrecision precision); + //! This emulates rounding to a specified precision (based on the number + //! of bits in the mantissa corresponding to \p precision). It uses round + //! to nearest ties away from zero. It never converts the values to less + //! than double precision so leaves the exponent unmodified and handles + //! the case that the exponent would overflow. + static double round(double value, EPrecision precision); - //! Used to extract the bits corresponding to the mantissa, exponent - //! and sign of an IEEE754 double. - //! - //! \warning You need to be careful using these bits since the mantissa - //! corresponding to a given double is not endian neutral when interpreted - //! as an integer. - //! \note The actual "exponent" is "exponent - 1022" in two's complement. - struct SDoubleRep - { -#ifdef __sparc // Add any other big endian architectures - uint64_t s_Sign: 1; // sign bit - uint64_t s_Exponent: 11; // exponent - uint64_t s_Mantissa: 52; // mantissa + //! Used to extract the bits corresponding to the mantissa, exponent + //! and sign of an IEEE754 double. + //! + //! \warning You need to be careful using these bits since the mantissa + //! corresponding to a given double is not endian neutral when interpreted + //! as an integer. + //! \note The actual "exponent" is "exponent - 1022" in two's complement. + struct SDoubleRep { +#ifdef __sparc // Add any other big endian architectures + uint64_t s_Sign : 1; // sign bit + uint64_t s_Exponent : 11; // exponent + uint64_t s_Mantissa : 52; // mantissa #else - uint64_t s_Mantissa: 52; // mantissa - uint64_t s_Exponent: 11; // exponent - uint64_t s_Sign: 1; // sign bit + uint64_t s_Mantissa : 52; // mantissa + uint64_t s_Exponent : 11; // exponent + uint64_t s_Sign : 1; // sign bit #endif - }; + }; - static const uint64_t IEEE754_MANTISSA_MASK = 0xFFFFFFFFFFFFF; + static const uint64_t IEEE754_MANTISSA_MASK = 0xFFFFFFFFFFFFF; - //! Decompose a double in to its mantissa and exponent. - //! - //! \note This is closely related to std::frexp for double but returns - //! the mantissa interpreted as an integer. - static void decompose(double value, uint64_t &mantissa, int &exponent) - { - SDoubleRep parsed; - static_assert(sizeof(double) == sizeof(SDoubleRep), - "SDoubleRep definition unsuitable for memcpy to double"); - // Use memcpy() rather than union to adhere to strict aliasing rules - ::memcpy(&parsed, &value, sizeof(double)); - exponent = static_cast(parsed.s_Exponent) - 1022; - mantissa = parsed.s_Mantissa; - } + //! Decompose a double in to its mantissa and exponent. + //! + //! \note This is closely related to std::frexp for double but returns + //! the mantissa interpreted as an integer. + static void decompose(double value, uint64_t& mantissa, int& exponent) { + SDoubleRep parsed; + static_assert(sizeof(double) == sizeof(SDoubleRep), "SDoubleRep definition unsuitable for memcpy to double"); + // Use memcpy() rather than union to adhere to strict aliasing rules + ::memcpy(&parsed, &value, sizeof(double)); + exponent = static_cast(parsed.s_Exponent) - 1022; + mantissa = parsed.s_Mantissa; + } }; - } } diff --git a/include/core/CJsonLogLayout.h b/include/core/CJsonLogLayout.h index 1e16c75db2..7181962599 100644 --- a/include/core/CJsonLogLayout.h +++ b/include/core/CJsonLogLayout.h @@ -15,10 +15,8 @@ class CJsonLogLayoutTest; // NB: log4cxx extensions have to go in the log4cxx namespace, hence cannot // stick to the convention of our code being in the ml namespace. This // is due to use of (log4cxx mandated) macros in the implementation. -namespace log4cxx -{ -namespace helpers -{ +namespace log4cxx { +namespace helpers { //! \brief //! Output log messages as lineified JSON. @@ -31,63 +29,56 @@ namespace helpers //! Violates several aspects of the Ml coding standards in order //! to work with log4cxx macros and other conventions. //! -class CORE_EXPORT CJsonLogLayout : public Layout -{ - public: - DECLARE_LOG4CXX_OBJECT(CJsonLogLayout) - BEGIN_LOG4CXX_CAST_MAP() - LOG4CXX_CAST_ENTRY(CJsonLogLayout) - LOG4CXX_CAST_ENTRY_CHAIN(Layout) - END_LOG4CXX_CAST_MAP() - - CJsonLogLayout(); - - //! Accessors for location info (i.e. should file/line be included in - //! log output? - void locationInfo(bool locationInfo); - bool locationInfo() const; - - //! Accessors for whether MDC key-value pairs should be output. - void properties(bool properties); - bool properties() const; - - //! No options to activate. - void activateOptions(Pool &p); - - //! Set options. - virtual void setOption(const LogString &option, - const LogString &value); - - //! Formats a LoggingEvent as JSON. - virtual void format(LogString &output, - const spi::LoggingEventPtr &event, - Pool &p) const; - - //! The CJsonLogLayout prints and does not ignore exceptions. - virtual bool ignoresThrowable() const; - - private: - //! Include location info by default - bool m_LocationInfo; - bool m_Properties; - - static std::string cropPath(const std::string &filename); - - // For unit testing - friend class ::CJsonLogLayoutTest; +class CORE_EXPORT CJsonLogLayout : public Layout { +public: + DECLARE_LOG4CXX_OBJECT(CJsonLogLayout) + BEGIN_LOG4CXX_CAST_MAP() + LOG4CXX_CAST_ENTRY(CJsonLogLayout) + LOG4CXX_CAST_ENTRY_CHAIN(Layout) + END_LOG4CXX_CAST_MAP() + + CJsonLogLayout(); + + //! Accessors for location info (i.e. should file/line be included in + //! log output? + void locationInfo(bool locationInfo); + bool locationInfo() const; + + //! Accessors for whether MDC key-value pairs should be output. + void properties(bool properties); + bool properties() const; + + //! No options to activate. + void activateOptions(Pool& p); + + //! Set options. + virtual void setOption(const LogString& option, const LogString& value); + + //! Formats a LoggingEvent as JSON. + virtual void format(LogString& output, const spi::LoggingEventPtr& event, Pool& p) const; + + //! The CJsonLogLayout prints and does not ignore exceptions. + virtual bool ignoresThrowable() const; + +private: + //! Include location info by default + bool m_LocationInfo; + bool m_Properties; + + static std::string cropPath(const std::string& filename); + + // For unit testing + friend class ::CJsonLogLayoutTest; }; LOG4CXX_PTR_DEF(CJsonLogLayout); } // end helpers -namespace classes -{ -extern const helpers::ClassRegistration &CJsonLogLayoutRegistration; +namespace classes { +extern const helpers::ClassRegistration& CJsonLogLayoutRegistration; } } // end log4cxx - #endif // INCLUDED_ml_core_CJsonLogLayout_h - diff --git a/include/core/CJsonOutputStreamWrapper.h b/include/core/CJsonOutputStreamWrapper.h index e37f7bd144..2bf8e67d7f 100644 --- a/include/core/CJsonOutputStreamWrapper.h +++ b/include/core/CJsonOutputStreamWrapper.h @@ -31,77 +31,73 @@ namespace core { //! //! IMPLEMENTATION DECISIONS:\n //! Pool and buffer sizes are hardcoded. -class CORE_EXPORT CJsonOutputStreamWrapper final: CNonCopyable -{ - private: - //! number of buffers in the pool - static const size_t BUFFER_POOL_SIZE = 16; - //! size of 1 buffer in the pool - //! Note: this size is not fixed but might get enlarged at runtime - static const size_t BUFFER_START_SIZE = 1024; - //! Upper boundary for buffer size, if above buffer gets automatically shrunk - //! back to BUFFER_START_SIZE after last usage - static const size_t BUFFER_REALLOC_TRIGGER_SIZE = 4096; +class CORE_EXPORT CJsonOutputStreamWrapper final : CNonCopyable { +private: + //! number of buffers in the pool + static const size_t BUFFER_POOL_SIZE = 16; + //! size of 1 buffer in the pool + //! Note: this size is not fixed but might get enlarged at runtime + static const size_t BUFFER_START_SIZE = 1024; + //! Upper boundary for buffer size, if above buffer gets automatically shrunk + //! back to BUFFER_START_SIZE after last usage + static const size_t BUFFER_REALLOC_TRIGGER_SIZE = 4096; - static const char JSON_ARRAY_START; - static const char JSON_ARRAY_END; - static const char JSON_ARRAY_DELIMITER; + static const char JSON_ARRAY_START; + static const char JSON_ARRAY_END; + static const char JSON_ARRAY_DELIMITER; - public: - using TOStreamConcurrentWrapper = core::CConcurrentWrapper; - using TGenericLineWriter = core::CRapidJsonLineWriter; +public: + using TOStreamConcurrentWrapper = core::CConcurrentWrapper; + using TGenericLineWriter = core::CRapidJsonLineWriter; - public: +public: + //! wrap a given ostream for concurrent access + //! \param[in] outStream The stream to write to + explicit CJsonOutputStreamWrapper(std::ostream& outStream); - //! wrap a given ostream for concurrent access - //! \param[in] outStream The stream to write to - explicit CJsonOutputStreamWrapper(std::ostream &outStream); + ~CJsonOutputStreamWrapper(); - ~CJsonOutputStreamWrapper(); + //! acquires a buffer from the pool and attaches it to the given writer object + void acquireBuffer(TGenericLineWriter& writer, rapidjson::StringBuffer*& buffer); - //! acquires a buffer from the pool and attaches it to the given writer object - void acquireBuffer(TGenericLineWriter &writer, rapidjson::StringBuffer *&buffer); + //! releases a buffer from the pool, remaining data will be written before returning it + void releaseBuffer(TGenericLineWriter& writer, rapidjson::StringBuffer* buffer); - //! releases a buffer from the pool, remaining data will be written before returning it - void releaseBuffer(TGenericLineWriter &writer, rapidjson::StringBuffer *buffer); + //! flush the buffer/writer if necessary, keeps the logic when to flush in here + //! \param writer A rapidjson writer object + //! \param buffer The buffer for writing + //! side-effect: the writer as well as the buffer are altered + void flushBuffer(TGenericLineWriter& writer, rapidjson::StringBuffer*& buffer); - //! flush the buffer/writer if necessary, keeps the logic when to flush in here - //! \param writer A rapidjson writer object - //! \param buffer The buffer for writing - //! side-effect: the writer as well as the buffer are altered - void flushBuffer(TGenericLineWriter &writer, - rapidjson::StringBuffer *&buffer); + //! flush the wrapped outputstream + //! note: this is still async + void flush(); - //! flush the wrapped outputstream - //! note: this is still async - void flush(); + //! a sync flush, that blocks until flush has actually happened + void syncFlush(); - //! a sync flush, that blocks until flush has actually happened - void syncFlush(); + //! Debug the memory used by this component. + void debugMemoryUsage(CMemoryUsage::TMemoryUsagePtr mem) const; - //! Debug the memory used by this component. - void debugMemoryUsage(CMemoryUsage::TMemoryUsagePtr mem) const; + //! Get the memory used by this component. + std::size_t memoryUsage() const; - //! Get the memory used by this component. - std::size_t memoryUsage() const; +private: + void returnAndCheckBuffer(rapidjson::StringBuffer* buffer); - private: - void returnAndCheckBuffer(rapidjson::StringBuffer *buffer); +private: + //! the pool of buffers + rapidjson::StringBuffer m_StringBuffers[BUFFER_POOL_SIZE]; - private: - //! the pool of buffers - rapidjson::StringBuffer m_StringBuffers[BUFFER_POOL_SIZE]; + //! the pool of available buffers + CConcurrentQueue m_StringBufferQueue; - //! the pool of available buffers - CConcurrentQueue m_StringBufferQueue; + //! the stream object wrapped by CConcurrentWrapper + TOStreamConcurrentWrapper m_ConcurrentOutputStream; - //! the stream object wrapped by CConcurrentWrapper - TOStreamConcurrentWrapper m_ConcurrentOutputStream; - - //! whether we wrote the first element - bool m_FirstObject; + //! whether we wrote the first element + bool m_FirstObject; }; - } } diff --git a/include/core/CJsonStatePersistInserter.h b/include/core/CJsonStatePersistInserter.h index ec321e7cc1..f5fd45976c 100644 --- a/include/core/CJsonStatePersistInserter.h +++ b/include/core/CJsonStatePersistInserter.h @@ -15,11 +15,8 @@ #include #include -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! For persisting state in JSON format. @@ -37,49 +34,44 @@ namespace core //! have attributes). This may complicate code that needs to be 100% //! JSON/XML agnostic. //! -class CORE_EXPORT CJsonStatePersistInserter : public CStatePersistInserter -{ - public: - //! Root node has no attributes - CJsonStatePersistInserter(std::ostream &outputStream); +class CORE_EXPORT CJsonStatePersistInserter : public CStatePersistInserter { +public: + //! Root node has no attributes + CJsonStatePersistInserter(std::ostream& outputStream); - //! Destructor flushes - virtual ~CJsonStatePersistInserter(); + //! Destructor flushes + virtual ~CJsonStatePersistInserter(); - //! Store a name/value - virtual void insertValue(const std::string &name, - const std::string &value); + //! Store a name/value + virtual void insertValue(const std::string& name, const std::string& value); - //! Write as an integer avoiding the string conversion - //! overloads - void insertInteger(const std::string &name, size_t value); + //! Write as an integer avoiding the string conversion + //! overloads + void insertInteger(const std::string& name, size_t value); - // Bring extra base class overloads into scope - using CStatePersistInserter::insertValue; + // Bring extra base class overloads into scope + using CStatePersistInserter::insertValue; - //! Flush the underlying output stream - void flush(); + //! Flush the underlying output stream + void flush(); - protected: - //! Start a new level with the given name - virtual void newLevel(const std::string &name); +protected: + //! Start a new level with the given name + virtual void newLevel(const std::string& name); - //! End the current level - virtual void endLevel(); + //! End the current level + virtual void endLevel(); - private: - //! JSON writer ostream wrapper - rapidjson::OStreamWrapper m_WriteStream; +private: + //! JSON writer ostream wrapper + rapidjson::OStreamWrapper m_WriteStream; - using TGenericLineWriter = core::CRapidJsonLineWriter; + using TGenericLineWriter = core::CRapidJsonLineWriter; - //! JSON writer - TGenericLineWriter m_Writer; + //! JSON writer + TGenericLineWriter m_Writer; }; - - } } #endif // INCLUDED_ml_core_CJsonStatePersistInserter_h - diff --git a/include/core/CJsonStateRestoreTraverser.h b/include/core/CJsonStateRestoreTraverser.h index 0a60e57261..13b850f061 100644 --- a/include/core/CJsonStateRestoreTraverser.h +++ b/include/core/CJsonStateRestoreTraverser.h @@ -12,16 +12,13 @@ #include #include -#include #include +#include #include -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! For restoring state in JSON format. @@ -39,149 +36,142 @@ namespace core //! have attributes). This may complicate code that needs to be 100% //! JSON/XML agnostic. //! -class CORE_EXPORT CJsonStateRestoreTraverser : public CStateRestoreTraverser -{ - public: - CJsonStateRestoreTraverser(std::istream &inputStream); - - //! Navigate to the next element at the current level, or return false - //! if there isn't one - virtual bool next(); - - //! Go to the start of the next object - //! Stops at the first '}' character so this will not - //! work with nested objects - bool nextObject(); - - //! Does the current element have a sub-level? - virtual bool hasSubLevel() const; - - //! Get the name of the current element - the returned reference is only - //! valid for as long as the traverser is pointing at the same element - virtual const std::string &name() const; - - //! Get the value of the current element - the returned reference is - //! only valid for as long as the traverser is pointing at the same - //! element - virtual const std::string &value() const; - - //! Is the traverser at the end of the inputstream? - virtual bool isEof() const; - - protected: - //! Navigate to the start of the sub-level of the current element, or - //! return false if there isn't one - virtual bool descend(); - - //! Navigate to the element of the level above from which descend() was - //! called, or return false if there isn't a level above - virtual bool ascend(); - - //! Print debug - void debug() const; - - private: - //! Accessors for alternating state variables - size_t currentLevel() const; - bool currentIsEndOfLevel() const; - const std::string ¤tName() const; - const std::string ¤tValue() const; - size_t nextLevel() const; - bool nextIsEndOfLevel() const; - const std::string &nextName() const; - const std::string &nextValue() const; - - //! Start off the parsing process - bool start(); - - //! Get the next token - bool advance(); - - //! Log an error that the JSON parser has detected - void logError(); - - //! Continue parsing the JSON structure - bool parseNext(bool remember); - - //! Skip the (JSON) array until it ends - bool skipArray(); - - private: - - //! Handler - //! for events fired by rapidjson during parsing. - struct SRapidJsonHandler final - { - SRapidJsonHandler(); - - bool Null(); - bool Bool(bool b); - bool Int(int i); - bool Uint(unsigned u); - bool Int64(int64_t i); - bool Uint64(uint64_t u); - bool Double(double d); - bool RawNumber(const char*, rapidjson::SizeType, bool); - bool String(const char *str, rapidjson::SizeType length, bool); - bool StartObject(); - bool Key(const char *str, rapidjson::SizeType length, bool); - bool EndObject(rapidjson::SizeType); - bool StartArray(); - bool EndArray(rapidjson::SizeType); - - enum ETokenType - { - E_TokenNull = 0, - E_TokenKey = 1, - E_TokenBool = 2, - E_TokenInt = 3, - E_TokenUInt = 4, - E_TokenInt64 = 5, - E_TokenUInt64 = 6, - E_TokenDouble = 7, - E_TokenString = 8, - E_TokenObjectStart = 9, - E_TokenObjectEnd = 10, - E_TokenArrayStart = 11, - E_TokenArrayEnd = 12 - }; - - ETokenType s_Type; - - size_t s_Level[2]; - bool s_IsEndOfLevel[2]; - std::string s_Name[2]; - std::string s_Value[2]; - - //! Setting m_NextIndex = (1 - m_NextIndex) advances the - //! stored details. - size_t s_NextIndex; - - bool s_RememberValue; +class CORE_EXPORT CJsonStateRestoreTraverser : public CStateRestoreTraverser { +public: + CJsonStateRestoreTraverser(std::istream& inputStream); + + //! Navigate to the next element at the current level, or return false + //! if there isn't one + virtual bool next(); + + //! Go to the start of the next object + //! Stops at the first '}' character so this will not + //! work with nested objects + bool nextObject(); + + //! Does the current element have a sub-level? + virtual bool hasSubLevel() const; + + //! Get the name of the current element - the returned reference is only + //! valid for as long as the traverser is pointing at the same element + virtual const std::string& name() const; + + //! Get the value of the current element - the returned reference is + //! only valid for as long as the traverser is pointing at the same + //! element + virtual const std::string& value() const; + + //! Is the traverser at the end of the inputstream? + virtual bool isEof() const; + +protected: + //! Navigate to the start of the sub-level of the current element, or + //! return false if there isn't one + virtual bool descend(); + + //! Navigate to the element of the level above from which descend() was + //! called, or return false if there isn't a level above + virtual bool ascend(); + + //! Print debug + void debug() const; + +private: + //! Accessors for alternating state variables + size_t currentLevel() const; + bool currentIsEndOfLevel() const; + const std::string& currentName() const; + const std::string& currentValue() const; + size_t nextLevel() const; + bool nextIsEndOfLevel() const; + const std::string& nextName() const; + const std::string& nextValue() const; + + //! Start off the parsing process + bool start(); + + //! Get the next token + bool advance(); + + //! Log an error that the JSON parser has detected + void logError(); + + //! Continue parsing the JSON structure + bool parseNext(bool remember); + + //! Skip the (JSON) array until it ends + bool skipArray(); + +private: + //! Handler + //! for events fired by rapidjson during parsing. + struct SRapidJsonHandler final { + SRapidJsonHandler(); + + bool Null(); + bool Bool(bool b); + bool Int(int i); + bool Uint(unsigned u); + bool Int64(int64_t i); + bool Uint64(uint64_t u); + bool Double(double d); + bool RawNumber(const char*, rapidjson::SizeType, bool); + bool String(const char* str, rapidjson::SizeType length, bool); + bool StartObject(); + bool Key(const char* str, rapidjson::SizeType length, bool); + bool EndObject(rapidjson::SizeType); + bool StartArray(); + bool EndArray(rapidjson::SizeType); + + enum ETokenType { + E_TokenNull = 0, + E_TokenKey = 1, + E_TokenBool = 2, + E_TokenInt = 3, + E_TokenUInt = 4, + E_TokenInt64 = 5, + E_TokenUInt64 = 6, + E_TokenDouble = 7, + E_TokenString = 8, + E_TokenObjectStart = 9, + E_TokenObjectEnd = 10, + E_TokenArrayStart = 11, + E_TokenArrayEnd = 12 }; - //! JSON reader istream wrapper - rapidjson::IStreamWrapper m_ReadStream; + ETokenType s_Type; - //! JSON reader - rapidjson::Reader m_Reader; + size_t s_Level[2]; + bool s_IsEndOfLevel[2]; + std::string s_Name[2]; + std::string s_Value[2]; - SRapidJsonHandler m_Handler; + //! Setting m_NextIndex = (1 - m_NextIndex) advances the + //! stored details. + size_t s_NextIndex; - //! Flag to indicate whether we've started parsing - bool m_Started; + bool s_RememberValue; + }; - //! Which level within the JSON structure do we want to be getting - //! values from? - size_t m_DesiredLevel; + //! JSON reader istream wrapper + rapidjson::IStreamWrapper m_ReadStream; - //! If the first token is an '[' then we are parsing an array of objects - bool m_IsArrayOfObjects; -}; + //! JSON reader + rapidjson::Reader m_Reader; + SRapidJsonHandler m_Handler; + //! Flag to indicate whether we've started parsing + bool m_Started; + + //! Which level within the JSON structure do we want to be getting + //! values from? + size_t m_DesiredLevel; + + //! If the first token is an '[' then we are parsing an array of objects + bool m_IsArrayOfObjects; +}; } } #endif // INCLUDED_ml_core_CJsonStateRestoreTraverser_h - diff --git a/include/core/CLocalTimeR.h b/include/core/CLocalTimeR.h index 85d2fe019f..8d7fad9d4b 100644 --- a/include/core/CLocalTimeR.h +++ b/include/core/CLocalTimeR.h @@ -11,12 +11,8 @@ #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! Portable wrapper for the localtime_r() function. @@ -29,16 +25,11 @@ namespace core //! localtime_s() function with slightly different semantics to Unix's //! localtime_r(). //! -class CORE_EXPORT CLocalTimeR : private CNonInstantiatable -{ - public: - static struct tm *localTimeR(const time_t *clock, - struct tm *result); +class CORE_EXPORT CLocalTimeR : private CNonInstantiatable { +public: + static struct tm* localTimeR(const time_t* clock, struct tm* result); }; - - } } #endif // INCLUDED_ml_core_CLocalTimeR_h - diff --git a/include/core/CLogger.h b/include/core/CLogger.h index d537cf75eb..87ce98db75 100644 --- a/include/core/CLogger.h +++ b/include/core/CLogger.h @@ -19,11 +19,8 @@ class CLoggerTest; -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! Core logging class in Ml @@ -65,119 +62,105 @@ namespace core //! product, but can be useful when a unit test needs to log more //! detailed information. //! -class CORE_EXPORT CLogger : private CNonCopyable -{ - public: - //! Used to set the level we should log at - enum ELevel - { - E_Fatal, - E_Error, - E_Warn, - E_Info, - E_Debug, - E_Trace - }; - - public: - //! Access to singleton - use MACROS to get to this when logging - //! messages - static CLogger &instance(); - - //! Reconfigure to either a named pipe or a properties file. - //! If both are supplied the named pipe takes precedence. - bool reconfigure(const std::string &pipeName, - const std::string &propertiesFile); - - //! Tell the logger to log to a named pipe rather than a file. - bool reconfigureLogToNamedPipe(const std::string &pipeName); - - //! Tell the logger to reconfigure itself by reading a specified - //! properties file, if the file exists. - bool reconfigureFromFile(const std::string &propertiesFile); - - //! Tell the logger to reconfigure itself to log JSON. - bool reconfigureLogJson(); - - //! Set the logging level on the fly - useful when unit tests need to - //! log at a lower level than the shipped programs - bool setLoggingLevel(ELevel level); - - //! Has the logger been reconfigured? Callers should note that there - //! is nothing to stop the logger being reconfigured between a call to - //! this method and them using the result. - bool hasBeenReconfigured() const; - - //! Log all environment variables. Callers are responsible for ensuring - //! that this method is not called at the same time as a putenv() or - //! setenv() call in another thread. - void logEnvironment() const; - - //! Access to underlying logger (must only be called from macros) - log4cxx::LoggerPtr logger(); +class CORE_EXPORT CLogger : private CNonCopyable { +public: + //! Used to set the level we should log at + enum ELevel { E_Fatal, E_Error, E_Warn, E_Info, E_Debug, E_Trace }; + +public: + //! Access to singleton - use MACROS to get to this when logging + //! messages + static CLogger& instance(); + + //! Reconfigure to either a named pipe or a properties file. + //! If both are supplied the named pipe takes precedence. + bool reconfigure(const std::string& pipeName, const std::string& propertiesFile); + + //! Tell the logger to log to a named pipe rather than a file. + bool reconfigureLogToNamedPipe(const std::string& pipeName); + + //! Tell the logger to reconfigure itself by reading a specified + //! properties file, if the file exists. + bool reconfigureFromFile(const std::string& propertiesFile); + + //! Tell the logger to reconfigure itself to log JSON. + bool reconfigureLogJson(); + + //! Set the logging level on the fly - useful when unit tests need to + //! log at a lower level than the shipped programs + bool setLoggingLevel(ELevel level); + + //! Has the logger been reconfigured? Callers should note that there + //! is nothing to stop the logger being reconfigured between a call to + //! this method and them using the result. + bool hasBeenReconfigured() const; + + //! Log all environment variables. Callers are responsible for ensuring + //! that this method is not called at the same time as a putenv() or + //! setenv() call in another thread. + void logEnvironment() const; + + //! Access to underlying logger (must only be called from macros) + log4cxx::LoggerPtr logger(); #ifdef Windows - //! Throw a fatal exception - __declspec(noreturn) static void fatal(); + //! Throw a fatal exception + __declspec(noreturn) static void fatal(); #else - //! Throw a fatal exception - __attribute__ ((noreturn)) static void fatal(); + //! Throw a fatal exception + __attribute__((noreturn)) static void fatal(); #endif - private: - //! Constructor for a singleton is private. - CLogger(); - ~CLogger(); - - //! Replace Ml specific patterns in log4cxx properties. In - //! addition to the patterns usually supported by log4cxx, Ml will - //! substitute: - //! 1) %D with the path to the Ml base log directory - //! 2) %N with the program's name - //! 3) %P with the program's process ID - void massageProperties(log4cxx::helpers::Properties &props) const; - - using TLogCharLogStrMap = std::map; - using TLogCharLogStrMapCItr = TLogCharLogStrMap::const_iterator; - - //! Replace Ml specific mappings in a single string - void massageString(const TLogCharLogStrMap &mappings, - const log4cxx::LogString &oldStr, - log4cxx::LogString &newStr) const; - - //! Helper for other reconfiguration methods - bool reconfigureFromProps(log4cxx::helpers::Properties &props); - - //! Reset the logger, this is a helper for unit testing as - //! CLogger is a singleton, so we can not just create new instances - void reset(); - private: - log4cxx::LoggerPtr m_Logger; - - //! Has the logger ever been reconfigured? This is not protected by a - //! lock despite the fact that it may be accessed from different - //! threads. It is declared volatile to prevent the compiler optimising - //! away reads of it. - volatile bool m_Reconfigured; - - //! Cache the program name - std::string m_ProgramName; - - //! When logging to a named pipe this stores the C FILE pointer to - //! access the pipe. Should be NULL otherwise. - CNamedPipeFactory::TFileP m_PipeFile; - - //! When logging to a pipe, the file descriptor that stderr was - //! originally associated with. - int m_OrigStderrFd; - - //! friend class for testing - friend class ::CLoggerTest; -}; +private: + //! Constructor for a singleton is private. + CLogger(); + ~CLogger(); + //! Replace Ml specific patterns in log4cxx properties. In + //! addition to the patterns usually supported by log4cxx, Ml will + //! substitute: + //! 1) %D with the path to the Ml base log directory + //! 2) %N with the program's name + //! 3) %P with the program's process ID + void massageProperties(log4cxx::helpers::Properties& props) const; + using TLogCharLogStrMap = std::map; + using TLogCharLogStrMapCItr = TLogCharLogStrMap::const_iterator; + + //! Replace Ml specific mappings in a single string + void massageString(const TLogCharLogStrMap& mappings, const log4cxx::LogString& oldStr, log4cxx::LogString& newStr) const; + + //! Helper for other reconfiguration methods + bool reconfigureFromProps(log4cxx::helpers::Properties& props); + + //! Reset the logger, this is a helper for unit testing as + //! CLogger is a singleton, so we can not just create new instances + void reset(); + +private: + log4cxx::LoggerPtr m_Logger; + + //! Has the logger ever been reconfigured? This is not protected by a + //! lock despite the fact that it may be accessed from different + //! threads. It is declared volatile to prevent the compiler optimising + //! away reads of it. + volatile bool m_Reconfigured; + + //! Cache the program name + std::string m_ProgramName; + + //! When logging to a named pipe this stores the C FILE pointer to + //! access the pipe. Should be NULL otherwise. + CNamedPipeFactory::TFileP m_PipeFile; + + //! When logging to a pipe, the file descriptor that stderr was + //! originally associated with. + int m_OrigStderrFd; + + //! friend class for testing + friend class ::CLoggerTest; +}; } } #endif // INCLUDED_ml_core_CLogger_h - diff --git a/include/core/CMaskIterator.h b/include/core/CMaskIterator.h index bb5180b824..fb92006d17 100644 --- a/include/core/CMaskIterator.h +++ b/include/core/CMaskIterator.h @@ -12,10 +12,8 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { //! \brief A random access iterator over a subset of the elements of //! a random access container. @@ -33,146 +31,122 @@ namespace core //! are the same, although the relevant comparison operators work for //! both const and non-const versions of the underlying iterator. template -class CMaskIterator : private boost::incrementable< CMaskIterator, - boost::decrementable< CMaskIterator, - boost::addable2< CMaskIterator, typename std::iterator_traits::difference_type, - boost::subtractable2< CMaskIterator, typename std::iterator_traits::difference_type > > > > -{ - public: - using difference_type = typename std::iterator_traits::difference_type; - using value_type = typename std::iterator_traits::value_type; - using pointer = typename std::iterator_traits::pointer; - using reference = typename std::iterator_traits::reference; - using iterator_category = typename std::iterator_traits::iterator_category; - using TDifferenceVec = std::vector; - - public: - CMaskIterator(ITR begin, const TDifferenceVec &mask, difference_type index) : - m_Begin(begin), m_Mask(&mask), m_Index(index) - {} - - template - bool operator==(const CMaskIterator &rhs) const - { - return this->baseEqual(rhs) && m_Index == rhs.m_Index; - } - template - bool operator!=(const CMaskIterator &rhs) const - { - return !(*this == rhs); - } - template - bool operator<(const CMaskIterator &rhs) const - { - return this->baseEqual(rhs) && m_Index < rhs.m_Index; - } - template - bool operator<=(const CMaskIterator &rhs) const - { - return this->baseEqual(rhs) && m_Index <= rhs.m_Index; - } - template - bool operator>(const CMaskIterator &rhs) const - { - return this->baseEqual(rhs) && m_Index > rhs.m_Index; - } - template - bool operator>=(const CMaskIterator &rhs) const - { - return this->baseEqual(rhs) && m_Index <= rhs.m_Index; - } - - reference operator*() const - { - return *(m_Begin + (*m_Mask)[m_Index]); - } - pointer operator->() const - { - return &(*(m_Begin + (*m_Mask)[m_Index])); - } - reference operator[](difference_type n) const - { - return *(m_Begin + (*m_Mask)[m_Index + n]); - } - - const CMaskIterator &operator++() { ++m_Index; return *this; } - const CMaskIterator &operator--() { --m_Index; return *this; } - template - difference_type operator-(const CMaskIterator &rhs) const - { - return static_cast(m_Index) - - static_cast(rhs.m_Index); - } - const CMaskIterator &operator+=(difference_type n) - { - m_Index += n; - return *this; - } - const CMaskIterator &operator-=(difference_type n) - { - m_Index -= n; - return *this; - } - - private: - template - bool baseEqual(const CMaskIterator &rhs) const - { - return m_Begin == rhs.m_Begin && m_Mask == rhs.m_Mask; - } - - private: - //! The start of the container. - ITR m_Begin; - //! The mask. - const TDifferenceVec *m_Mask; - //! The current element (in the mask). - difference_type m_Index; +class CMaskIterator + : private boost::incrementable< + CMaskIterator, + boost::decrementable< + CMaskIterator, + boost::addable2, + typename std::iterator_traits::difference_type, + boost::subtractable2, typename std::iterator_traits::difference_type>>>> { +public: + using difference_type = typename std::iterator_traits::difference_type; + using value_type = typename std::iterator_traits::value_type; + using pointer = typename std::iterator_traits::pointer; + using reference = typename std::iterator_traits::reference; + using iterator_category = typename std::iterator_traits::iterator_category; + using TDifferenceVec = std::vector; + +public: + CMaskIterator(ITR begin, const TDifferenceVec& mask, difference_type index) : m_Begin(begin), m_Mask(&mask), m_Index(index) {} + + template + bool operator==(const CMaskIterator& rhs) const { + return this->baseEqual(rhs) && m_Index == rhs.m_Index; + } + template + bool operator!=(const CMaskIterator& rhs) const { + return !(*this == rhs); + } + template + bool operator<(const CMaskIterator& rhs) const { + return this->baseEqual(rhs) && m_Index < rhs.m_Index; + } + template + bool operator<=(const CMaskIterator& rhs) const { + return this->baseEqual(rhs) && m_Index <= rhs.m_Index; + } + template + bool operator>(const CMaskIterator& rhs) const { + return this->baseEqual(rhs) && m_Index > rhs.m_Index; + } + template + bool operator>=(const CMaskIterator& rhs) const { + return this->baseEqual(rhs) && m_Index <= rhs.m_Index; + } + + reference operator*() const { return *(m_Begin + (*m_Mask)[m_Index]); } + pointer operator->() const { return &(*(m_Begin + (*m_Mask)[m_Index])); } + reference operator[](difference_type n) const { return *(m_Begin + (*m_Mask)[m_Index + n]); } + + const CMaskIterator& operator++() { + ++m_Index; + return *this; + } + const CMaskIterator& operator--() { + --m_Index; + return *this; + } + template + difference_type operator-(const CMaskIterator& rhs) const { + return static_cast(m_Index) - static_cast(rhs.m_Index); + } + const CMaskIterator& operator+=(difference_type n) { + m_Index += n; + return *this; + } + const CMaskIterator& operator-=(difference_type n) { + m_Index -= n; + return *this; + } + +private: + template + bool baseEqual(const CMaskIterator& rhs) const { + return m_Begin == rhs.m_Begin && m_Mask == rhs.m_Mask; + } + +private: + //! The start of the container. + ITR m_Begin; + //! The mask. + const TDifferenceVec* m_Mask; + //! The current element (in the mask). + difference_type m_Index; }; //! Get a non-constant mask iterator over a subset of the elements of a vector. template -CMaskIterator::iterator> begin_masked(std::vector &v, - const std::vector &mask) -{ +CMaskIterator::iterator> begin_masked(std::vector& v, const std::vector& mask) { return CMaskIterator::iterator>(v.begin(), mask, 0); } //! Get a non-constant mask iterator at the end of a subset of the elements of a vector. template -CMaskIterator::iterator> end_masked(std::vector &v, - const std::vector &mask) -{ +CMaskIterator::iterator> end_masked(std::vector& v, const std::vector& mask) { return CMaskIterator::iterator>(v.begin(), mask, mask.size()); } //! Get a constant mask iterator over a subset of the elements of a vector. template -CMaskIterator::const_iterator> begin_masked(const std::vector &v, - const std::vector &mask) -{ +CMaskIterator::const_iterator> begin_masked(const std::vector& v, const std::vector& mask) { return CMaskIterator::const_iterator>(v.begin(), mask, 0); } //! Get a constant mask iterator at the end of a subset of the elements of a vector. template -CMaskIterator::const_iterator> end_masked(const std::vector &v, - const std::vector &mask) -{ +CMaskIterator::const_iterator> end_masked(const std::vector& v, const std::vector& mask) { return CMaskIterator::const_iterator>(v.begin(), mask, mask.size()); } //! A mask iterator over a subset of an iterated sequence. template -CMaskIterator begin_masked(ITR i, const std::vector &mask) -{ +CMaskIterator begin_masked(ITR i, const std::vector& mask) { return CMaskIterator(i, mask, 0); } //! Get a mask iterator at the end of a subset of the elements of an iterated sequence. template -CMaskIterator end_masked(ITR i, const std::vector &mask) -{ +CMaskIterator end_masked(ITR i, const std::vector& mask) { return CMaskIterator(i, mask, mask.size()); } - } } diff --git a/include/core/CMemory.h b/include/core/CMemory.h index 817ae44b1e..fd9e69f59a 100644 --- a/include/core/CMemory.h +++ b/include/core/CMemory.h @@ -12,12 +12,12 @@ #include #include -#include #include +#include #include #include -#include #include +#include #include #include #include @@ -33,16 +33,14 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { -template class CSmallVector; +template +class CSmallVector; using TTypeInfoCRef = boost::reference_wrapper; -namespace memory_detail -{ +namespace memory_detail { // Windows creates an extra map/list node per map/list #ifdef Windows @@ -64,119 +62,88 @@ const std::size_t MIN_DEQUE_PAGE_VEC_ENTRIES = 8; #endif template -struct enable_if_member_function -{ +struct enable_if_member_function { using type = R; }; template -struct enable_if_function -{ +struct enable_if_function { using type = R; }; //! \brief Default template declaration for CMemoryDynamicSize::dispatch. template -struct SMemoryDynamicSize -{ - static std::size_t dispatch(const T &) { return 0; } +struct SMemoryDynamicSize { + static std::size_t dispatch(const T&) { return 0; } }; //! \brief Template specialisation where T has member function "memoryUsage()" template -struct SMemoryDynamicSize::type> -{ - static std::size_t dispatch(const T &t) { return t.memoryUsage(); } +struct SMemoryDynamicSize::type> { + static std::size_t dispatch(const T& t) { return t.memoryUsage(); } }; //! \brief Default template for classes that don't sport a staticSize member. template -struct SMemoryStaticSize -{ - static std::size_t dispatch(const T & /*t*/) - { - return sizeof(T); - } +struct SMemoryStaticSize { + static std::size_t dispatch(const T& /*t*/) { return sizeof(T); } }; //! \brief Template specialisation for classes having a staticSize member: //! used when base class pointers are passed to dynamicSize(). template -struct SMemoryStaticSize::type> -{ - static std::size_t dispatch(const T &t) - { - return t.staticSize(); - } +struct SMemoryStaticSize::type> { + static std::size_t dispatch(const T& t) { return t.staticSize(); } }; //! \brief Base implementation checks for POD. template -struct SDynamicSizeAlwaysZero -{ - static inline bool value() - { - return boost::is_pod::value; - } +struct SDynamicSizeAlwaysZero { + static inline bool value() { return boost::is_pod::value; } }; //! \brief Checks types in pair. template -struct SDynamicSizeAlwaysZero> -{ - static inline bool value() - { - return SDynamicSizeAlwaysZero::value() && SDynamicSizeAlwaysZero::value(); - } +struct SDynamicSizeAlwaysZero> { + static inline bool value() { return SDynamicSizeAlwaysZero::value() && SDynamicSizeAlwaysZero::value(); } }; //! \brief Specialisation for std::less always true. template -struct SDynamicSizeAlwaysZero> -{ +struct SDynamicSizeAlwaysZero> { static inline bool value() { return true; } }; //! \brief Specialisation for std::greater always true. template -struct SDynamicSizeAlwaysZero> -{ +struct SDynamicSizeAlwaysZero> { static inline bool value() { return true; } }; //! \brief Checks type in optional. template -struct SDynamicSizeAlwaysZero> -{ +struct SDynamicSizeAlwaysZero> { static inline bool value() { return SDynamicSizeAlwaysZero::value(); } }; //! \brief Check for member dynamicSizeAlwaysZero function. template -struct SDynamicSizeAlwaysZero::type> -{ +struct SDynamicSizeAlwaysZero::type> { static inline bool value() { return T::dynamicSizeAlwaysZero(); } }; //! \brief Total ordering of type_info objects. -struct STypeInfoLess -{ +struct STypeInfoLess { template - bool operator()(const std::pair &lhs, - const std::pair &rhs) const - { + bool operator()(const std::pair& lhs, const std::pair& rhs) const { return boost::unwrap_ref(lhs.first).before(boost::unwrap_ref(rhs.first)); } template - bool operator()(const std::pair &lhs, - TTypeInfoCRef rhs) const - { + bool operator()(const std::pair& lhs, TTypeInfoCRef rhs) const { return boost::unwrap_ref(lhs.first).before(boost::unwrap_ref(rhs)); } template - bool operator()(TTypeInfoCRef lhs, - const std::pair &rhs) const - { + bool operator()(TTypeInfoCRef lhs, const std::pair& rhs) const { return boost::unwrap_ref(lhs).before(boost::unwrap_ref(rhs.first)); } }; @@ -188,16 +155,14 @@ struct STypeInfoLess //! that we can't simply check if capacity > N because N is treated //! as a guideline. template -static bool inplace(const CSmallVector &t) -{ - const char *address = reinterpret_cast(&t); - const char *storage = reinterpret_cast(t.data()); +static bool inplace(const CSmallVector& t) { + const char* address = reinterpret_cast(&t); + const char* storage = reinterpret_cast(t.data()); return storage >= address && storage < address + sizeof t; } } // memory_detail:: - //! \brief Core memory usage template class. //! //! DESCRIPTION:\n @@ -223,444 +188,350 @@ static bool inplace(const CSmallVector &t) //! //! Only contains static members, this should not be instantiated. //! -class CORE_EXPORT CMemory : private CNonInstantiatable -{ - private: - static const std::string EMPTY_STRING; - +class CORE_EXPORT CMemory : private CNonInstantiatable { +private: + static const std::string EMPTY_STRING; + +public: + //! Implements a visitor pattern for computing the size of types + //! stored in boost::any. + //! + //! DESCRIPTION:\n + //! The idea of this class is that the user of dynamicSize should + //! register call backs to compute the size of objects which are + //! stored in boost::any. Provided all registered types which will + //! be visited have been registered then this should correctly + //! compute the dynamic size used by objects stored in boost::any. + //! It will warn if a type is visited which is not registered. + //! There is a singleton visitor available from CMemory. Example + //! usage is as follows: + //! \code{cpp} + //! CMemory::anyVisitor().insertCallback>(); + //! std::vector variables; + //! variables.push_back(TDoubleVec(10)); + //! std::size_t size = CMemory::dynamicSize(variables, visitor); + //! \endcode + class CORE_EXPORT CAnyVisitor { public: - //! Implements a visitor pattern for computing the size of types - //! stored in boost::any. - //! - //! DESCRIPTION:\n - //! The idea of this class is that the user of dynamicSize should - //! register call backs to compute the size of objects which are - //! stored in boost::any. Provided all registered types which will - //! be visited have been registered then this should correctly - //! compute the dynamic size used by objects stored in boost::any. - //! It will warn if a type is visited which is not registered. - //! There is a singleton visitor available from CMemory. Example - //! usage is as follows: - //! \code{cpp} - //! CMemory::anyVisitor().insertCallback>(); - //! std::vector variables; - //! variables.push_back(TDoubleVec(10)); - //! std::size_t size = CMemory::dynamicSize(variables, visitor); - //! \endcode - class CORE_EXPORT CAnyVisitor - { - public: - using TDynamicSizeFunc = std::size_t (*)(const boost::any &any); - using TTypeInfoDynamicSizeFuncPr = std::pair; - using TTypeInfoDynamicSizeFuncPrVec = std::vector; - - //! Insert a callback to compute the size of the type T - //! if it is stored in boost::any. - template - bool registerCallback() - { - auto i = std::lower_bound(m_Callbacks.begin(), - m_Callbacks.end(), - boost::cref(typeid(T)), - memory_detail::STypeInfoLess()); - if (i == m_Callbacks.end()) - { - m_Callbacks.emplace_back(boost::cref(typeid(T)), - &CAnyVisitor::dynamicSizeCallback); - return true; - } - else if (i->first.get() != typeid(T)) - { - m_Callbacks.insert(i, {boost::cref(typeid(T)), - &CAnyVisitor::dynamicSizeCallback}); - return true; - } - return false; - } + using TDynamicSizeFunc = std::size_t (*)(const boost::any& any); + using TTypeInfoDynamicSizeFuncPr = std::pair; + using TTypeInfoDynamicSizeFuncPrVec = std::vector; - //! Calculate the dynamic size of x if a callback has been - //! registered for its type. - std::size_t dynamicSize(const boost::any &x) const - { - if (!x.empty()) - { - auto i = std::lower_bound(m_Callbacks.begin(), - m_Callbacks.end(), - boost::cref(x.type()), - memory_detail::STypeInfoLess()); - if (i != m_Callbacks.end() && i->first.get() == x.type()) - { - return (*i->second)(x); - } - LOG_ERROR("No callback registered for " << x.type().name()); - } - return 0; + //! Insert a callback to compute the size of the type T + //! if it is stored in boost::any. + template + bool registerCallback() { + auto i = std::lower_bound(m_Callbacks.begin(), m_Callbacks.end(), boost::cref(typeid(T)), memory_detail::STypeInfoLess()); + if (i == m_Callbacks.end()) { + m_Callbacks.emplace_back(boost::cref(typeid(T)), &CAnyVisitor::dynamicSizeCallback); + return true; + } else if (i->first.get() != typeid(T)) { + m_Callbacks.insert(i, {boost::cref(typeid(T)), &CAnyVisitor::dynamicSizeCallback}); + return true; + } + return false; + } + + //! Calculate the dynamic size of x if a callback has been + //! registered for its type. + std::size_t dynamicSize(const boost::any& x) const { + if (!x.empty()) { + auto i = std::lower_bound(m_Callbacks.begin(), m_Callbacks.end(), boost::cref(x.type()), memory_detail::STypeInfoLess()); + if (i != m_Callbacks.end() && i->first.get() == x.type()) { + return (*i->second)(x); } + LOG_ERROR("No callback registered for " << x.type().name()); + } + return 0; + } - private: - //! Wraps up call to any_cast and dynamicSize. - template - static std::size_t dynamicSizeCallback(const boost::any &any) - { - try - { - return sizeof(T) + CMemory::dynamicSize(boost::any_cast(any)); - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to calculate size " << e.what()); - } - return 0; - } + private: + //! Wraps up call to any_cast and dynamicSize. + template + static std::size_t dynamicSizeCallback(const boost::any& any) { + try { + return sizeof(T) + CMemory::dynamicSize(boost::any_cast(any)); + } catch (const std::exception& e) { LOG_ERROR("Failed to calculate size " << e.what()); } + return 0; + } - TTypeInfoDynamicSizeFuncPrVec m_Callbacks; - }; + TTypeInfoDynamicSizeFuncPrVec m_Callbacks; + }; - public: - //! Default template. - template - static std::size_t dynamicSize(const T &t, typename boost::disable_if>::type * = 0) - { - std::size_t mem = 0; - if (!memory_detail::SDynamicSizeAlwaysZero::value()) - { - mem += memory_detail::SMemoryDynamicSize::dispatch(t); - } - return mem; +public: + //! Default template. + template + static std::size_t dynamicSize(const T& t, typename boost::disable_if>::type* = 0) { + std::size_t mem = 0; + if (!memory_detail::SDynamicSizeAlwaysZero::value()) { + mem += memory_detail::SMemoryDynamicSize::dispatch(t); } + return mem; + } - //! Overload for pointer. - template - static std::size_t dynamicSize(const T &t, typename boost::enable_if>::type * = 0) - { - if (t == 0) - { - return 0; - } - return staticSize(*t) + dynamicSize(*t); + //! Overload for pointer. + template + static std::size_t dynamicSize(const T& t, typename boost::enable_if>::type* = 0) { + if (t == 0) { + return 0; } + return staticSize(*t) + dynamicSize(*t); + } - //! Overload for boost::shared_ptr. - template - static std::size_t dynamicSize(const boost::shared_ptr &t) - { - if (!t) - { - return 0; - } - long uc = t.use_count(); - // Round up - return (staticSize(*t) + dynamicSize(*t) + std::size_t(uc - 1)) / uc; + //! Overload for boost::shared_ptr. + template + static std::size_t dynamicSize(const boost::shared_ptr& t) { + if (!t) { + return 0; } + long uc = t.use_count(); + // Round up + return (staticSize(*t) + dynamicSize(*t) + std::size_t(uc - 1)) / uc; + } - //! Overload for boost::array. - template - static std::size_t dynamicSize(const boost::array &t) - { - std::size_t mem = 0; - if (!memory_detail::SDynamicSizeAlwaysZero::value()) - { - for (auto i = t.begin(); i != t.end(); ++i) - { - mem += dynamicSize(*i); - } + //! Overload for boost::array. + template + static std::size_t dynamicSize(const boost::array& t) { + std::size_t mem = 0; + if (!memory_detail::SDynamicSizeAlwaysZero::value()) { + for (auto i = t.begin(); i != t.end(); ++i) { + mem += dynamicSize(*i); } - return mem; } + return mem; + } - //! Overload for std::vector. - template - static std::size_t dynamicSize(const std::vector &t) - { - std::size_t mem = 0; - if (!memory_detail::SDynamicSizeAlwaysZero::value()) - { - for (auto i = t.begin(); i != t.end(); ++i) - { - mem += dynamicSize(*i); - } + //! Overload for std::vector. + template + static std::size_t dynamicSize(const std::vector& t) { + std::size_t mem = 0; + if (!memory_detail::SDynamicSizeAlwaysZero::value()) { + for (auto i = t.begin(); i != t.end(); ++i) { + mem += dynamicSize(*i); } - return mem + sizeof(T) * t.capacity(); } + return mem + sizeof(T) * t.capacity(); + } - //! Overload for small vector. - template - static std::size_t dynamicSize(const CSmallVector &t) - { - std::size_t mem = 0; - if (!memory_detail::SDynamicSizeAlwaysZero::value()) - { - for (auto i = t.begin(); i != t.end(); ++i) - { - mem += dynamicSize(*i); - } + //! Overload for small vector. + template + static std::size_t dynamicSize(const CSmallVector& t) { + std::size_t mem = 0; + if (!memory_detail::SDynamicSizeAlwaysZero::value()) { + for (auto i = t.begin(); i != t.end(); ++i) { + mem += dynamicSize(*i); } - return mem + (memory_detail::inplace(t) ? 0 : t.capacity()) * sizeof(T); } + return mem + (memory_detail::inplace(t) ? 0 : t.capacity()) * sizeof(T); + } - //! Overload for std::string. - static std::size_t dynamicSize(const std::string &t) - { - std::size_t capacity = t.capacity(); - // The different STLs we use on various platforms all have different - // allocation strategies for strings - // These are hard-coded here, on the assumption that they will not - // change frequently - but checked by unittests that do runtime - // verification - // See http://linux/wiki/index.php/Technical_design_issues#std::string + //! Overload for std::string. + static std::size_t dynamicSize(const std::string& t) { + std::size_t capacity = t.capacity(); +// The different STLs we use on various platforms all have different +// allocation strategies for strings +// These are hard-coded here, on the assumption that they will not +// change frequently - but checked by unittests that do runtime +// verification +// See http://linux/wiki/index.php/Technical_design_issues#std::string #ifdef MacOSX - // For lengths up to 22 bytes there is no allocation - if (capacity <= 22) - { - return 0; - } - return capacity + 1; - -#else // Linux with C++11 ABI and Windows - // For lengths up to 15 bytes there is no allocation - if (capacity <= 15) - { - return 0; - } - return capacity + 1; -#endif + // For lengths up to 22 bytes there is no allocation + if (capacity <= 22) { + return 0; } + return capacity + 1; - //! Overload for boost::unordered_map. - template - static std::size_t dynamicSize(const boost::unordered_map &t) - { - std::size_t mem = 0; - if (!(memory_detail::SDynamicSizeAlwaysZero::value() && - memory_detail::SDynamicSizeAlwaysZero::value())) - { - for (auto i = t.begin(); i != t.end(); ++i) - { - mem += dynamicSize(*i); - } - } - return mem + (t.bucket_count() * sizeof(std::size_t) * 2) - + (t.size() * (sizeof(K) + sizeof(V) + 2 * sizeof(std::size_t))); +#else // Linux with C++11 ABI and Windows \ + // For lengths up to 15 bytes there is no allocation + if (capacity <= 15) { + return 0; } + return capacity + 1; +#endif + } - //! Overload for std::map. - template - static std::size_t dynamicSize(const std::map &t) - { - // std::map appears to use 4 pointers/size_ts per tree node - // (colour, parent, left and right child pointers). - std::size_t mem = 0; - if (!(memory_detail::SDynamicSizeAlwaysZero::value() && - memory_detail::SDynamicSizeAlwaysZero::value())) - { - for (auto i = t.begin(); i != t.end(); ++i) - { - mem += dynamicSize(*i); - } + //! Overload for boost::unordered_map. + template + static std::size_t dynamicSize(const boost::unordered_map& t) { + std::size_t mem = 0; + if (!(memory_detail::SDynamicSizeAlwaysZero::value() && memory_detail::SDynamicSizeAlwaysZero::value())) { + for (auto i = t.begin(); i != t.end(); ++i) { + mem += dynamicSize(*i); } - return mem + (memory_detail::EXTRA_NODES + t.size()) - * (sizeof(K) + sizeof(V) + 4 * sizeof(std::size_t)); } + return mem + (t.bucket_count() * sizeof(std::size_t) * 2) + (t.size() * (sizeof(K) + sizeof(V) + 2 * sizeof(std::size_t))); + } - //! Overload for boost::container::flat_map. - template - static std::size_t dynamicSize(const boost::container::flat_map &t) - { - std::size_t mem = 0; - if (!(memory_detail::SDynamicSizeAlwaysZero::value() && - memory_detail::SDynamicSizeAlwaysZero::value())) - { - for (auto i = t.begin(); i != t.end(); ++i) - { - mem += dynamicSize(*i); - } + //! Overload for std::map. + template + static std::size_t dynamicSize(const std::map& t) { + // std::map appears to use 4 pointers/size_ts per tree node + // (colour, parent, left and right child pointers). + std::size_t mem = 0; + if (!(memory_detail::SDynamicSizeAlwaysZero::value() && memory_detail::SDynamicSizeAlwaysZero::value())) { + for (auto i = t.begin(); i != t.end(); ++i) { + mem += dynamicSize(*i); } - return mem + t.capacity() * sizeof(std::pair); } + return mem + (memory_detail::EXTRA_NODES + t.size()) * (sizeof(K) + sizeof(V) + 4 * sizeof(std::size_t)); + } - //! Overload for boost::unordered_set. - template - static std::size_t dynamicSize(const boost::unordered_set &t) - { - std::size_t mem = 0; - if (!memory_detail::SDynamicSizeAlwaysZero::value()) - { - for (auto i = t.begin(); i != t.end(); ++i) - { - mem += dynamicSize(*i); - } + //! Overload for boost::container::flat_map. + template + static std::size_t dynamicSize(const boost::container::flat_map& t) { + std::size_t mem = 0; + if (!(memory_detail::SDynamicSizeAlwaysZero::value() && memory_detail::SDynamicSizeAlwaysZero::value())) { + for (auto i = t.begin(); i != t.end(); ++i) { + mem += dynamicSize(*i); } - return mem + (t.bucket_count() * sizeof(std::size_t) * 2) - + (t.size() * (sizeof(T) + 2 * sizeof(std::size_t))); } + return mem + t.capacity() * sizeof(std::pair); + } - //! Overload for std::set. - template - static std::size_t dynamicSize(const std::set &t) - { - // std::set appears to use 4 pointers/size_ts per tree node - // (colour, parent, left and right child pointers). - std::size_t mem = 0; - if (!memory_detail::SDynamicSizeAlwaysZero::value()) - { - for (auto i = t.begin(); i != t.end(); ++i) - { - mem += dynamicSize(*i); - } + //! Overload for boost::unordered_set. + template + static std::size_t dynamicSize(const boost::unordered_set& t) { + std::size_t mem = 0; + if (!memory_detail::SDynamicSizeAlwaysZero::value()) { + for (auto i = t.begin(); i != t.end(); ++i) { + mem += dynamicSize(*i); } - return mem + (memory_detail::EXTRA_NODES + t.size()) - * (sizeof(T) + 4 * sizeof(std::size_t)); } + return mem + (t.bucket_count() * sizeof(std::size_t) * 2) + (t.size() * (sizeof(T) + 2 * sizeof(std::size_t))); + } - //! Overload for boost::container::flat_set. - template - static std::size_t dynamicSize(const boost::container::flat_set &t) - { - std::size_t mem = 0; - if (!memory_detail::SDynamicSizeAlwaysZero::value()) - { - for (auto i = t.begin(); i != t.end(); ++i) - { - mem += dynamicSize(*i); - } + //! Overload for std::set. + template + static std::size_t dynamicSize(const std::set& t) { + // std::set appears to use 4 pointers/size_ts per tree node + // (colour, parent, left and right child pointers). + std::size_t mem = 0; + if (!memory_detail::SDynamicSizeAlwaysZero::value()) { + for (auto i = t.begin(); i != t.end(); ++i) { + mem += dynamicSize(*i); } - return mem + t.capacity() * sizeof(T); } + return mem + (memory_detail::EXTRA_NODES + t.size()) * (sizeof(T) + 4 * sizeof(std::size_t)); + } - //! Overload for std::list. - template - static std::size_t dynamicSize(const std::list &t) - { - // std::list appears to use 2 pointers per list node - // (prev and next pointers). - std::size_t mem = 0; - if (!memory_detail::SDynamicSizeAlwaysZero::value()) - { - for (auto i = t.begin(); i != t.end(); ++i) - { - mem += dynamicSize(*i); - } + //! Overload for boost::container::flat_set. + template + static std::size_t dynamicSize(const boost::container::flat_set& t) { + std::size_t mem = 0; + if (!memory_detail::SDynamicSizeAlwaysZero::value()) { + for (auto i = t.begin(); i != t.end(); ++i) { + mem += dynamicSize(*i); } - return mem + (memory_detail::EXTRA_NODES + t.size()) - * (sizeof(T) + 2 * sizeof(std::size_t)); } + return mem + t.capacity() * sizeof(T); + } - //! Overload for std::deque. - template - static std::size_t dynamicSize(const std::deque &t) - { - // std::deque is a pointer to an array of pointers to pages - std::size_t mem = 0; - if (!memory_detail::SDynamicSizeAlwaysZero::value()) - { - for (auto i = t.begin(); i != t.end(); ++i) - { - mem += dynamicSize(*i); - } + //! Overload for std::list. + template + static std::size_t dynamicSize(const std::list& t) { + // std::list appears to use 2 pointers per list node + // (prev and next pointers). + std::size_t mem = 0; + if (!memory_detail::SDynamicSizeAlwaysZero::value()) { + for (auto i = t.begin(); i != t.end(); ++i) { + mem += dynamicSize(*i); } - std::size_t pageSize = std::max(sizeof(T), memory_detail::MIN_DEQUE_PAGE_SIZE); - std::size_t itemsPerPage = pageSize / sizeof(T); - // This could be an underestimate if items have been removed - std::size_t numPages = (t.size() + itemsPerPage - 1) / itemsPerPage; - // This could also be an underestimate if items have been removed - std::size_t pageVecEntries = std::max(numPages, memory_detail::MIN_DEQUE_PAGE_VEC_ENTRIES); - - return mem + pageVecEntries * sizeof(std::size_t) - + numPages * pageSize; } + return mem + (memory_detail::EXTRA_NODES + t.size()) * (sizeof(T) + 2 * sizeof(std::size_t)); + } - //! Overload for boost::circular_buffer. - template - static std::size_t dynamicSize(const boost::circular_buffer &t) - { - std::size_t mem = 0; - if (!memory_detail::SDynamicSizeAlwaysZero::value()) - { - for (std::size_t i = 0; i < t.size(); ++i) - { - mem += dynamicSize(t[i]); - } + //! Overload for std::deque. + template + static std::size_t dynamicSize(const std::deque& t) { + // std::deque is a pointer to an array of pointers to pages + std::size_t mem = 0; + if (!memory_detail::SDynamicSizeAlwaysZero::value()) { + for (auto i = t.begin(); i != t.end(); ++i) { + mem += dynamicSize(*i); } - return mem + t.capacity() * sizeof(T); } + std::size_t pageSize = std::max(sizeof(T), memory_detail::MIN_DEQUE_PAGE_SIZE); + std::size_t itemsPerPage = pageSize / sizeof(T); + // This could be an underestimate if items have been removed + std::size_t numPages = (t.size() + itemsPerPage - 1) / itemsPerPage; + // This could also be an underestimate if items have been removed + std::size_t pageVecEntries = std::max(numPages, memory_detail::MIN_DEQUE_PAGE_VEC_ENTRIES); - //! Overload for boost::optional. - template - static std::size_t dynamicSize(const boost::optional &t) - { - if (!t) - { - return 0; + return mem + pageVecEntries * sizeof(std::size_t) + numPages * pageSize; + } + + //! Overload for boost::circular_buffer. + template + static std::size_t dynamicSize(const boost::circular_buffer& t) { + std::size_t mem = 0; + if (!memory_detail::SDynamicSizeAlwaysZero::value()) { + for (std::size_t i = 0; i < t.size(); ++i) { + mem += dynamicSize(t[i]); } - return dynamicSize(*t); } + return mem + t.capacity() * sizeof(T); + } - //! Overload for boost::reference_wrapper. - template - static std::size_t dynamicSize(const boost::reference_wrapper &/*t*/) - { + //! Overload for boost::optional. + template + static std::size_t dynamicSize(const boost::optional& t) { + if (!t) { return 0; } + return dynamicSize(*t); + } - //! Overload for std::pair. - template - static std::size_t dynamicSize(const std::pair &t) - { - std::size_t mem = 0; - if (!memory_detail::SDynamicSizeAlwaysZero::value()) - { - mem += dynamicSize(t.first); - } - if (!memory_detail::SDynamicSizeAlwaysZero::value()) - { - mem += dynamicSize(t.second); - } - return mem; - } + //! Overload for boost::reference_wrapper. + template + static std::size_t dynamicSize(const boost::reference_wrapper& /*t*/) { + return 0; + } - //! Overload for boost::any. - static std::size_t dynamicSize(const boost::any &t) - { - // boost::any holds a pointer to a new'd item. - return ms_AnyVisitor.dynamicSize(t); + //! Overload for std::pair. + template + static std::size_t dynamicSize(const std::pair& t) { + std::size_t mem = 0; + if (!memory_detail::SDynamicSizeAlwaysZero::value()) { + mem += dynamicSize(t.first); } - - //! Default template. - template - static std::size_t staticSize(const T &t) - { - return memory_detail::SMemoryStaticSize::dispatch(t); + if (!memory_detail::SDynamicSizeAlwaysZero::value()) { + mem += dynamicSize(t.second); } + return mem; + } - //! Get the any visitor singleton. - static CAnyVisitor &anyVisitor() - { - return ms_AnyVisitor; - } + //! Overload for boost::any. + static std::size_t dynamicSize(const boost::any& t) { + // boost::any holds a pointer to a new'd item. + return ms_AnyVisitor.dynamicSize(t); + } - private: - static CAnyVisitor ms_AnyVisitor; -}; + //! Default template. + template + static std::size_t staticSize(const T& t) { + return memory_detail::SMemoryStaticSize::dispatch(t); + } + + //! Get the any visitor singleton. + static CAnyVisitor& anyVisitor() { return ms_AnyVisitor; } +private: + static CAnyVisitor ms_AnyVisitor; +}; -namespace memory_detail -{ +namespace memory_detail { template -struct enable_if_member_debug_function -{ +struct enable_if_member_debug_function { using type = R; }; //! Default template declaration for SDebugMemoryDynamicSize::dispatch. template -struct SDebugMemoryDynamicSize -{ - static void dispatch(const char *name, const T &t, CMemoryUsage::TMemoryUsagePtr mem) - { +struct SDebugMemoryDynamicSize { + static void dispatch(const char* name, const T& t, CMemoryUsage::TMemoryUsagePtr mem) { std::size_t used = CMemory::dynamicSize(t); - if (used > 0) - { + if (used > 0) { std::string description(name); description += "::"; description += typeid(T).name(); @@ -671,17 +542,12 @@ struct SDebugMemoryDynamicSize //! Template specialisation for when T has a debugMemoryUsage member function. template -struct SDebugMemoryDynamicSize::type> -{ - static void dispatch(const char *, const T &t, CMemoryUsage::TMemoryUsagePtr mem) - { - t.debugMemoryUsage(mem->addChild()); - } +struct SDebugMemoryDynamicSize::type> { + static void dispatch(const char*, const T& t, CMemoryUsage::TMemoryUsagePtr mem) { t.debugMemoryUsage(mem->addChild()); } }; } // memory_detail - //! \brief Core memory debug usage template class. //! //! DESCRIPTION:\n @@ -701,524 +567,394 @@ struct SDebugMemoryDynamicSize; - using TTypeInfoDynamicSizeFuncPrVec = std::vector; - - //! Insert a callback to compute the size of the type T - //! if it is stored in boost::any. - template - bool registerCallback() - { - auto i = std::lower_bound(m_Callbacks.begin(), - m_Callbacks.end(), - boost::cref(typeid(T)), - memory_detail::STypeInfoLess()); - if (i == m_Callbacks.end()) - { - m_Callbacks.emplace_back(boost::cref(typeid(T)), - &CAnyVisitor::dynamicSizeCallback); - return true; - } - else if (i->first.get() != typeid(T)) - { - m_Callbacks.insert(i, {boost::cref(typeid(T)), - &CAnyVisitor::dynamicSizeCallback}); - return true; - } - return false; - } - - //! Calculate the dynamic size of x if a callback has been - //! registered for its type. - void dynamicSize(const char *name, - const boost::any &x, - CMemoryUsage::TMemoryUsagePtr mem) const - { - if (!x.empty()) - { - auto i = std::lower_bound(m_Callbacks.begin(), - m_Callbacks.end(), - boost::cref(x.type()), - memory_detail::STypeInfoLess()); - if (i != m_Callbacks.end() && i->first.get() == x.type()) - { - (*i->second)(name, x, mem); - return; - } - LOG_ERROR("No callback registered for " << x.type().name()); - } - } - - private: - //! Wraps up call to any_cast and dynamicSize. - template - static void dynamicSizeCallback(const char *name, - const boost::any &any, - CMemoryUsage::TMemoryUsagePtr mem) - { - try - { - mem->addItem(name, sizeof(T)); - CMemoryDebug::dynamicSize(name, boost::any_cast(any), mem); - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to calculate size " << e.what()); - } - } + using TDynamicSizeFunc = void (*)(const char*, const boost::any& any, CMemoryUsage::TMemoryUsagePtr mem); + using TTypeInfoDynamicSizeFuncPr = std::pair; + using TTypeInfoDynamicSizeFuncPrVec = std::vector; - TTypeInfoDynamicSizeFuncPrVec m_Callbacks; - }; - - public: - //! Default template. + //! Insert a callback to compute the size of the type T + //! if it is stored in boost::any. template - static void dynamicSize(const char *name, - const T &t, - CMemoryUsage::TMemoryUsagePtr mem, - typename boost::disable_if>::type * = 0) - { - memory_detail::SDebugMemoryDynamicSize::dispatch(name, t, mem); + bool registerCallback() { + auto i = std::lower_bound(m_Callbacks.begin(), m_Callbacks.end(), boost::cref(typeid(T)), memory_detail::STypeInfoLess()); + if (i == m_Callbacks.end()) { + m_Callbacks.emplace_back(boost::cref(typeid(T)), &CAnyVisitor::dynamicSizeCallback); + return true; + } else if (i->first.get() != typeid(T)) { + m_Callbacks.insert(i, {boost::cref(typeid(T)), &CAnyVisitor::dynamicSizeCallback}); + return true; + } + return false; + } + + //! Calculate the dynamic size of x if a callback has been + //! registered for its type. + void dynamicSize(const char* name, const boost::any& x, CMemoryUsage::TMemoryUsagePtr mem) const { + if (!x.empty()) { + auto i = std::lower_bound(m_Callbacks.begin(), m_Callbacks.end(), boost::cref(x.type()), memory_detail::STypeInfoLess()); + if (i != m_Callbacks.end() && i->first.get() == x.type()) { + (*i->second)(name, x, mem); + return; + } + LOG_ERROR("No callback registered for " << x.type().name()); + } } - //! Overload for pointer. + private: + //! Wraps up call to any_cast and dynamicSize. template - static void dynamicSize(const char *name, - const T &t, - CMemoryUsage::TMemoryUsagePtr mem, - typename boost::enable_if>::type * = 0) - { - if (t != 0) - { - mem->addItem("ptr", CMemory::staticSize(*t)); - memory_detail::SDebugMemoryDynamicSize::dispatch(name, *t, mem); - } + static void dynamicSizeCallback(const char* name, const boost::any& any, CMemoryUsage::TMemoryUsagePtr mem) { + try { + mem->addItem(name, sizeof(T)); + CMemoryDebug::dynamicSize(name, boost::any_cast(any), mem); + } catch (const std::exception& e) { LOG_ERROR("Failed to calculate size " << e.what()); } } - //! Overload for boost::shared_ptr. - template - static void dynamicSize(const char *name, - const boost::shared_ptr &t, - CMemoryUsage::TMemoryUsagePtr mem) - { - if (t) - { - long uc = t.use_count(); - // If the pointer is shared by multiple users, each one - // might count it, so divide by the number of users. - // However, if only 1 user has it, do a full debug. - if (uc == 1) - { - mem->addItem("shared_ptr", CMemory::staticSize(*t)); - dynamicSize(name, *t, mem); - } - else - { - std::ostringstream ss; - ss << "shared_ptr (x" << uc << ')'; - // Round up - mem->addItem(ss.str(), (CMemory::staticSize(*t) + CMemory::dynamicSize(*t) + std::size_t(uc - 1)) / uc); - } - } + TTypeInfoDynamicSizeFuncPrVec m_Callbacks; + }; + +public: + //! Default template. + template + static void dynamicSize(const char* name, + const T& t, + CMemoryUsage::TMemoryUsagePtr mem, + typename boost::disable_if>::type* = 0) { + memory_detail::SDebugMemoryDynamicSize::dispatch(name, t, mem); + } + + //! Overload for pointer. + template + static void dynamicSize(const char* name, + const T& t, + CMemoryUsage::TMemoryUsagePtr mem, + typename boost::enable_if>::type* = 0) { + if (t != 0) { + mem->addItem("ptr", CMemory::staticSize(*t)); + memory_detail::SDebugMemoryDynamicSize::dispatch(name, *t, mem); } + } - //! Overload for boost::array. - template - static void dynamicSize(const char *name, - const boost::array &t, - CMemoryUsage::TMemoryUsagePtr mem) - { - if (!memory_detail::SDynamicSizeAlwaysZero::value()) - { - std::string componentName(name); - componentName += "_item"; - - CMemoryUsage::TMemoryUsagePtr ptr = mem->addChild(); - for (auto i = t.begin(); i != t.end(); ++i) - { - dynamicSize(componentName.c_str(), *i, ptr); - } + //! Overload for boost::shared_ptr. + template + static void dynamicSize(const char* name, const boost::shared_ptr& t, CMemoryUsage::TMemoryUsagePtr mem) { + if (t) { + long uc = t.use_count(); + // If the pointer is shared by multiple users, each one + // might count it, so divide by the number of users. + // However, if only 1 user has it, do a full debug. + if (uc == 1) { + mem->addItem("shared_ptr", CMemory::staticSize(*t)); + dynamicSize(name, *t, mem); + } else { + std::ostringstream ss; + ss << "shared_ptr (x" << uc << ')'; + // Round up + mem->addItem(ss.str(), (CMemory::staticSize(*t) + CMemory::dynamicSize(*t) + std::size_t(uc - 1)) / uc); } } + } - //! Overload for std::vector. - template - static void dynamicSize(const char *name, - const std::vector &t, - CMemoryUsage::TMemoryUsagePtr mem) - { + //! Overload for boost::array. + template + static void dynamicSize(const char* name, const boost::array& t, CMemoryUsage::TMemoryUsagePtr mem) { + if (!memory_detail::SDynamicSizeAlwaysZero::value()) { std::string componentName(name); + componentName += "_item"; - std::size_t items = t.size(); - std::size_t capacity = t.capacity(); - CMemoryUsage::SMemoryUsage usage(componentName + "::" + typeid(T).name(), - capacity * sizeof(T), - (capacity - items) * sizeof(T)); CMemoryUsage::TMemoryUsagePtr ptr = mem->addChild(); - ptr->setName(usage); - - componentName += "_item"; - for (auto i = t.begin(); i != t.end(); ++i) - { + for (auto i = t.begin(); i != t.end(); ++i) { dynamicSize(componentName.c_str(), *i, ptr); } } + } + + //! Overload for std::vector. + template + static void dynamicSize(const char* name, const std::vector& t, CMemoryUsage::TMemoryUsagePtr mem) { + std::string componentName(name); + + std::size_t items = t.size(); + std::size_t capacity = t.capacity(); + CMemoryUsage::SMemoryUsage usage(componentName + "::" + typeid(T).name(), capacity * sizeof(T), (capacity - items) * sizeof(T)); + CMemoryUsage::TMemoryUsagePtr ptr = mem->addChild(); + ptr->setName(usage); + + componentName += "_item"; + for (auto i = t.begin(); i != t.end(); ++i) { + dynamicSize(componentName.c_str(), *i, ptr); + } + } - //! Overload for small vector. - template - static void dynamicSize(const char *name, - const CSmallVector &t, - CMemoryUsage::TMemoryUsagePtr mem) - { - std::string componentName(name); + //! Overload for small vector. + template + static void dynamicSize(const char* name, const CSmallVector& t, CMemoryUsage::TMemoryUsagePtr mem) { + std::string componentName(name); - std::size_t items = memory_detail::inplace(t) ? 0 : t.size(); - std::size_t capacity = memory_detail::inplace(t) ? 0 : t.capacity(); - CMemoryUsage::SMemoryUsage usage(componentName + "::" + typeid(T).name(), - capacity * sizeof(T), - (capacity - items) * sizeof(T)); - CMemoryUsage::TMemoryUsagePtr ptr = mem->addChild(); - ptr->setName(usage); + std::size_t items = memory_detail::inplace(t) ? 0 : t.size(); + std::size_t capacity = memory_detail::inplace(t) ? 0 : t.capacity(); + CMemoryUsage::SMemoryUsage usage(componentName + "::" + typeid(T).name(), capacity * sizeof(T), (capacity - items) * sizeof(T)); + CMemoryUsage::TMemoryUsagePtr ptr = mem->addChild(); + ptr->setName(usage); - componentName += "_item"; - for (auto i = t.begin(); i != t.end(); ++i) - { - dynamicSize(componentName.c_str(), *i, ptr); - } + componentName += "_item"; + for (auto i = t.begin(); i != t.end(); ++i) { + dynamicSize(componentName.c_str(), *i, ptr); } + } - //! Overload for std::string. - static void dynamicSize(const char *name, - const std::string &t, - CMemoryUsage::TMemoryUsagePtr mem) - { - std::string componentName(name); - componentName += "_string"; - std::size_t length = t.size(); - std::size_t capacity = t.capacity(); - std::size_t unused = 0; + //! Overload for std::string. + static void dynamicSize(const char* name, const std::string& t, CMemoryUsage::TMemoryUsagePtr mem) { + std::string componentName(name); + componentName += "_string"; + std::size_t length = t.size(); + std::size_t capacity = t.capacity(); + std::size_t unused = 0; #ifdef MacOSX - // For lengths up to 22 bytes there is no allocation - if (capacity > 22) - { - unused = capacity - length; - ++capacity; - } - else - { - capacity = 0; - } - -#else // Linux with C++11 ABI and Windows - // For lengths up to 15 bytes there is no allocation - if (capacity > 15) - { - unused = capacity - length; - ++capacity; - } - else - { - capacity = 0; - } -#endif - CMemoryUsage::SMemoryUsage usage(componentName, capacity, unused); - CMemoryUsage::TMemoryUsagePtr ptr = mem->addChild(); - ptr->setName(usage); + // For lengths up to 22 bytes there is no allocation + if (capacity > 22) { + unused = capacity - length; + ++capacity; + } else { + capacity = 0; + } + +#else // Linux with C++11 ABI and Windows \ + // For lengths up to 15 bytes there is no allocation + if (capacity > 15) { + unused = capacity - length; + ++capacity; + } else { + capacity = 0; } +#endif + CMemoryUsage::SMemoryUsage usage(componentName, capacity, unused); + CMemoryUsage::TMemoryUsagePtr ptr = mem->addChild(); + ptr->setName(usage); + } - //! Overload for boost::unordered_map. - template - static void dynamicSize(const char *name, - const boost::unordered_map &t, - CMemoryUsage::TMemoryUsagePtr mem) - { - std::string componentName(name); - componentName += "_umap"; + //! Overload for boost::unordered_map. + template + static void dynamicSize(const char* name, const boost::unordered_map& t, CMemoryUsage::TMemoryUsagePtr mem) { + std::string componentName(name); + componentName += "_umap"; - std::size_t mapSize = (t.bucket_count() * sizeof(std::size_t) * 2) - + (t.size() * (sizeof(K) + sizeof(V) + 2 * sizeof(std::size_t))); + std::size_t mapSize = (t.bucket_count() * sizeof(std::size_t) * 2) + (t.size() * (sizeof(K) + sizeof(V) + 2 * sizeof(std::size_t))); - CMemoryUsage::SMemoryUsage usage(componentName, mapSize); - CMemoryUsage::TMemoryUsagePtr ptr = mem->addChild(); - ptr->setName(usage); + CMemoryUsage::SMemoryUsage usage(componentName, mapSize); + CMemoryUsage::TMemoryUsagePtr ptr = mem->addChild(); + ptr->setName(usage); - for (auto i = t.begin(); i != t.end(); ++i) - { - dynamicSize("key", i->first, ptr); - dynamicSize("value", i->second, ptr); - } + for (auto i = t.begin(); i != t.end(); ++i) { + dynamicSize("key", i->first, ptr); + dynamicSize("value", i->second, ptr); } + } - //! Overload for std::map. - template - static void dynamicSize(const char *name, - const std::map &t, - CMemoryUsage::TMemoryUsagePtr mem) - { - // std::map appears to use 4 pointers/size_ts per tree node - // (colour, parent, left and right child pointers) - std::string componentName(name); - componentName += "_map"; + //! Overload for std::map. + template + static void dynamicSize(const char* name, const std::map& t, CMemoryUsage::TMemoryUsagePtr mem) { + // std::map appears to use 4 pointers/size_ts per tree node + // (colour, parent, left and right child pointers) + std::string componentName(name); + componentName += "_map"; - std::size_t mapSize = (memory_detail::EXTRA_NODES + t.size()) - * (sizeof(K) + sizeof(V) + 4 * sizeof(std::size_t)); + std::size_t mapSize = (memory_detail::EXTRA_NODES + t.size()) * (sizeof(K) + sizeof(V) + 4 * sizeof(std::size_t)); - CMemoryUsage::SMemoryUsage usage(componentName, mapSize); - CMemoryUsage::TMemoryUsagePtr ptr = mem->addChild(); - ptr->setName(usage); + CMemoryUsage::SMemoryUsage usage(componentName, mapSize); + CMemoryUsage::TMemoryUsagePtr ptr = mem->addChild(); + ptr->setName(usage); - for (auto i = t.begin(); i != t.end(); ++i) - { - dynamicSize("key", i->first, ptr); - dynamicSize("value", i->second, ptr); - } + for (auto i = t.begin(); i != t.end(); ++i) { + dynamicSize("key", i->first, ptr); + dynamicSize("value", i->second, ptr); } + } - //! Overload for boost::container::flat_map. - template - static void dynamicSize(const char *name, - const boost::container::flat_map &t, - CMemoryUsage::TMemoryUsagePtr mem) - { - std::string componentName(name); - componentName += "_fmap"; + //! Overload for boost::container::flat_map. + template + static void dynamicSize(const char* name, const boost::container::flat_map& t, CMemoryUsage::TMemoryUsagePtr mem) { + std::string componentName(name); + componentName += "_fmap"; - std::size_t items = t.size(); - std::size_t capacity = t.capacity(); + std::size_t items = t.size(); + std::size_t capacity = t.capacity(); - CMemoryUsage::SMemoryUsage usage(componentName + "::" + typeid(std::pair).name(), - capacity * sizeof(std::pair), - (capacity - items) * sizeof(std::pair)); - CMemoryUsage::TMemoryUsagePtr ptr = mem->addChild(); - ptr->setName(usage); + CMemoryUsage::SMemoryUsage usage(componentName + "::" + typeid(std::pair).name(), + capacity * sizeof(std::pair), + (capacity - items) * sizeof(std::pair)); + CMemoryUsage::TMemoryUsagePtr ptr = mem->addChild(); + ptr->setName(usage); - for (auto i = t.begin(); i != t.end(); ++i) - { - dynamicSize("key", i->first, ptr); - dynamicSize("value", i->second, ptr); - } + for (auto i = t.begin(); i != t.end(); ++i) { + dynamicSize("key", i->first, ptr); + dynamicSize("value", i->second, ptr); } + } - //! Overload for boost::unordered_set. - template - static void dynamicSize(const char *name, - const boost::unordered_set &t, - CMemoryUsage::TMemoryUsagePtr mem) - { - std::string componentName(name); - componentName += "_uset"; + //! Overload for boost::unordered_set. + template + static void dynamicSize(const char* name, const boost::unordered_set& t, CMemoryUsage::TMemoryUsagePtr mem) { + std::string componentName(name); + componentName += "_uset"; - std::size_t setSize = (t.bucket_count() * sizeof(std::size_t) * 2) - + (t.size() * (sizeof(T) + 2 * sizeof(std::size_t))); + std::size_t setSize = (t.bucket_count() * sizeof(std::size_t) * 2) + (t.size() * (sizeof(T) + 2 * sizeof(std::size_t))); - CMemoryUsage::SMemoryUsage usage(componentName, setSize); - CMemoryUsage::TMemoryUsagePtr ptr = mem->addChild(); - ptr->setName(usage); + CMemoryUsage::SMemoryUsage usage(componentName, setSize); + CMemoryUsage::TMemoryUsagePtr ptr = mem->addChild(); + ptr->setName(usage); - for (auto i = t.begin(); i != t.end(); ++i) - { - dynamicSize("value", *i, ptr); - } + for (auto i = t.begin(); i != t.end(); ++i) { + dynamicSize("value", *i, ptr); } + } - //! Overload for std::set. - template - static void dynamicSize(const char *name, - const std::set &t, - CMemoryUsage::TMemoryUsagePtr mem) - { - // std::set appears to use 4 pointers/size_ts per tree node - // (colour, parent, left and right child pointers) - std::string componentName(name); - componentName += "_set"; + //! Overload for std::set. + template + static void dynamicSize(const char* name, const std::set& t, CMemoryUsage::TMemoryUsagePtr mem) { + // std::set appears to use 4 pointers/size_ts per tree node + // (colour, parent, left and right child pointers) + std::string componentName(name); + componentName += "_set"; - std::size_t setSize = (memory_detail::EXTRA_NODES + t.size()) - * (sizeof(T) + 4 * sizeof(std::size_t)); + std::size_t setSize = (memory_detail::EXTRA_NODES + t.size()) * (sizeof(T) + 4 * sizeof(std::size_t)); - CMemoryUsage::SMemoryUsage usage(componentName, setSize); - CMemoryUsage::TMemoryUsagePtr ptr = mem->addChild(); - ptr->setName(usage); + CMemoryUsage::SMemoryUsage usage(componentName, setSize); + CMemoryUsage::TMemoryUsagePtr ptr = mem->addChild(); + ptr->setName(usage); - for (auto i = t.begin(); i != t.end(); ++i) - { - dynamicSize("value", *i, ptr); - } + for (auto i = t.begin(); i != t.end(); ++i) { + dynamicSize("value", *i, ptr); } + } - //! Overload for boost::container::flat_set. - template - static void dynamicSize(const char *name, - const boost::container::flat_set &t, - CMemoryUsage::TMemoryUsagePtr mem) - { - std::string componentName(name); - componentName += "_fset"; + //! Overload for boost::container::flat_set. + template + static void dynamicSize(const char* name, const boost::container::flat_set& t, CMemoryUsage::TMemoryUsagePtr mem) { + std::string componentName(name); + componentName += "_fset"; - std::size_t items = t.size(); - std::size_t capacity = t.capacity(); + std::size_t items = t.size(); + std::size_t capacity = t.capacity(); - CMemoryUsage::SMemoryUsage usage(componentName + "::" + typeid(T).name(), - capacity * sizeof(T), - (capacity - items) * sizeof(T)); - CMemoryUsage::TMemoryUsagePtr ptr = mem->addChild(); - ptr->setName(usage); + CMemoryUsage::SMemoryUsage usage(componentName + "::" + typeid(T).name(), capacity * sizeof(T), (capacity - items) * sizeof(T)); + CMemoryUsage::TMemoryUsagePtr ptr = mem->addChild(); + ptr->setName(usage); - for (auto i = t.begin(); i != t.end(); ++i) - { - dynamicSize("value", *i, ptr); - } + for (auto i = t.begin(); i != t.end(); ++i) { + dynamicSize("value", *i, ptr); } + } - //! Overload for std::list. - template - static void dynamicSize(const char *name, - const std::list &t, - CMemoryUsage::TMemoryUsagePtr mem) - { - // std::list appears to use 2 pointers per list node - // (prev and next pointers). - std::string componentName(name); - componentName += "_list"; + //! Overload for std::list. + template + static void dynamicSize(const char* name, const std::list& t, CMemoryUsage::TMemoryUsagePtr mem) { + // std::list appears to use 2 pointers per list node + // (prev and next pointers). + std::string componentName(name); + componentName += "_list"; - std::size_t listSize = (memory_detail::EXTRA_NODES + t.size()) - * (sizeof(T) + 4 * sizeof(std::size_t)); + std::size_t listSize = (memory_detail::EXTRA_NODES + t.size()) * (sizeof(T) + 4 * sizeof(std::size_t)); - CMemoryUsage::SMemoryUsage usage(componentName, listSize); - CMemoryUsage::TMemoryUsagePtr ptr = mem->addChild(); - ptr->setName(usage); + CMemoryUsage::SMemoryUsage usage(componentName, listSize); + CMemoryUsage::TMemoryUsagePtr ptr = mem->addChild(); + ptr->setName(usage); - for (auto i = t.begin(); i != t.end(); ++i) - { - dynamicSize("value", *i, ptr); - } + for (auto i = t.begin(); i != t.end(); ++i) { + dynamicSize("value", *i, ptr); } + } - //! Overload for std::deque. - template - static void dynamicSize(const char *name, - const std::deque &t, - CMemoryUsage::TMemoryUsagePtr mem) - { - // std::deque is a pointer to an array of pointers to pages - std::string componentName(name); - componentName += "_deque"; - - std::size_t pageSize = std::max(sizeof(T), memory_detail::MIN_DEQUE_PAGE_SIZE); - std::size_t itemsPerPage = pageSize / sizeof(T); - // This could be an underestimate if items have been removed - std::size_t numPages = (t.size() + itemsPerPage - 1) / itemsPerPage; - // This could also be an underestimate if items have been removed - std::size_t pageVecEntries = std::max(numPages, memory_detail::MIN_DEQUE_PAGE_VEC_ENTRIES); - - std::size_t dequeTotal = pageVecEntries * sizeof(std::size_t) - + numPages * pageSize; - std::size_t dequeUsed = numPages * sizeof(std::size_t) - + t.size() * sizeof(T); - - CMemoryUsage::SMemoryUsage usage(componentName, - dequeTotal, - dequeTotal - dequeUsed); - CMemoryUsage::TMemoryUsagePtr ptr = mem->addChild(); - ptr->setName(usage); + //! Overload for std::deque. + template + static void dynamicSize(const char* name, const std::deque& t, CMemoryUsage::TMemoryUsagePtr mem) { + // std::deque is a pointer to an array of pointers to pages + std::string componentName(name); + componentName += "_deque"; - for (auto i = t.begin(); i != t.end(); ++i) - { - dynamicSize("value", *i, ptr); - } - } + std::size_t pageSize = std::max(sizeof(T), memory_detail::MIN_DEQUE_PAGE_SIZE); + std::size_t itemsPerPage = pageSize / sizeof(T); + // This could be an underestimate if items have been removed + std::size_t numPages = (t.size() + itemsPerPage - 1) / itemsPerPage; + // This could also be an underestimate if items have been removed + std::size_t pageVecEntries = std::max(numPages, memory_detail::MIN_DEQUE_PAGE_VEC_ENTRIES); - //! Overload for boost::circular_buffer. - template - static void dynamicSize(const char *name, - const boost::circular_buffer &t, - CMemoryUsage::TMemoryUsagePtr mem) - { - std::string componentName(name); + std::size_t dequeTotal = pageVecEntries * sizeof(std::size_t) + numPages * pageSize; + std::size_t dequeUsed = numPages * sizeof(std::size_t) + t.size() * sizeof(T); - std::size_t items = t.size(); - std::size_t capacity = t.capacity(); - CMemoryUsage::SMemoryUsage usage(componentName + "::" + typeid(T).name(), - capacity * sizeof(T), - (capacity - items) * sizeof(T)); - CMemoryUsage::TMemoryUsagePtr ptr = mem->addChild(); - ptr->setName(usage); + CMemoryUsage::SMemoryUsage usage(componentName, dequeTotal, dequeTotal - dequeUsed); + CMemoryUsage::TMemoryUsagePtr ptr = mem->addChild(); + ptr->setName(usage); - componentName += "_item"; - for (std::size_t i = 0; i < items; ++i) - { - dynamicSize(componentName.c_str(), t[i], ptr); - } + for (auto i = t.begin(); i != t.end(); ++i) { + dynamicSize("value", *i, ptr); } + } - //! Overload for boost::optional. - template - static void dynamicSize(const char *name, - const boost::optional &t, - CMemoryUsage::TMemoryUsagePtr mem) - { - if (t) - { - dynamicSize(name, *t, mem); - } - } + //! Overload for boost::circular_buffer. + template + static void dynamicSize(const char* name, const boost::circular_buffer& t, CMemoryUsage::TMemoryUsagePtr mem) { + std::string componentName(name); - //! Overload for boost::reference_wrapper. - template - static void dynamicSize(const char * /*name*/, - const boost::reference_wrapper &/*t*/, - CMemoryUsage::TMemoryUsagePtr /*mem*/) - { - return; - } + std::size_t items = t.size(); + std::size_t capacity = t.capacity(); + CMemoryUsage::SMemoryUsage usage(componentName + "::" + typeid(T).name(), capacity * sizeof(T), (capacity - items) * sizeof(T)); + CMemoryUsage::TMemoryUsagePtr ptr = mem->addChild(); + ptr->setName(usage); - //! Overload for std::pair. - template - static void dynamicSize(const char *name, - const std::pair &t, - CMemoryUsage::TMemoryUsagePtr mem) - { - std::string keyName(name); - keyName += "_key"; - std::string valueName(name); - valueName += "_value"; - dynamicSize(keyName.c_str(), t.first, mem); - dynamicSize(valueName.c_str(), t.second, mem); + componentName += "_item"; + for (std::size_t i = 0; i < items; ++i) { + dynamicSize(componentName.c_str(), t[i], ptr); } + } - //! Overload for boost::any. - static void dynamicSize(const char *name, - const boost::any &t, - CMemoryUsage::TMemoryUsagePtr mem) - { - // boost::any holds a pointer to a new'd item. - ms_AnyVisitor.dynamicSize(name, t, mem); + //! Overload for boost::optional. + template + static void dynamicSize(const char* name, const boost::optional& t, CMemoryUsage::TMemoryUsagePtr mem) { + if (t) { + dynamicSize(name, *t, mem); } + } - //! Get the any visitor singleton. - static CAnyVisitor &anyVisitor() - { - return ms_AnyVisitor; - } + //! Overload for boost::reference_wrapper. + template + static void dynamicSize(const char* /*name*/, const boost::reference_wrapper& /*t*/, CMemoryUsage::TMemoryUsagePtr /*mem*/) { + return; + } - private: - static CAnyVisitor ms_AnyVisitor; + //! Overload for std::pair. + template + static void dynamicSize(const char* name, const std::pair& t, CMemoryUsage::TMemoryUsagePtr mem) { + std::string keyName(name); + keyName += "_key"; + std::string valueName(name); + valueName += "_value"; + dynamicSize(keyName.c_str(), t.first, mem); + dynamicSize(valueName.c_str(), t.second, mem); + } + + //! Overload for boost::any. + static void dynamicSize(const char* name, const boost::any& t, CMemoryUsage::TMemoryUsagePtr mem) { + // boost::any holds a pointer to a new'd item. + ms_AnyVisitor.dynamicSize(name, t, mem); + } + + //! Get the any visitor singleton. + static CAnyVisitor& anyVisitor() { return ms_AnyVisitor; } + +private: + static CAnyVisitor ms_AnyVisitor; }; } // core diff --git a/include/core/CMemoryUsage.h b/include/core/CMemoryUsage.h index 370003c341..b62a85f9ba 100644 --- a/include/core/CMemoryUsage.h +++ b/include/core/CMemoryUsage.h @@ -10,17 +10,14 @@ #include +#include #include #include -#include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { -namespace memory_detail -{ +namespace memory_detail { class CMemoryUsageComparison; class CMemoryUsageComparisonTwo; } @@ -33,106 +30,94 @@ class CMemoryUsageJsonWriter; //! This is a tree structure designed to be passed to a component //! containing subcomponents, so that each component can fill in //! its memory usage -class CORE_EXPORT CMemoryUsage -{ - public: - //! A collection of data to record memory usage information for - //! arbitrary components - struct CORE_EXPORT SMemoryUsage - { - SMemoryUsage(const std::string &name, std::size_t memory) : - s_Name(name), s_Memory(memory), s_Unused(0) - { - } - - SMemoryUsage(const std::string &name, std::size_t memory, - std::size_t unused) : s_Name(name), s_Memory(memory), s_Unused(unused) - { - } +class CORE_EXPORT CMemoryUsage { +public: + //! A collection of data to record memory usage information for + //! arbitrary components + struct CORE_EXPORT SMemoryUsage { + SMemoryUsage(const std::string& name, std::size_t memory) : s_Name(name), s_Memory(memory), s_Unused(0) {} - //! Name of the component - std::string s_Name; + SMemoryUsage(const std::string& name, std::size_t memory, std::size_t unused) : s_Name(name), s_Memory(memory), s_Unused(unused) {} - //! Bytes used by the component - std::size_t s_Memory; + //! Name of the component + std::string s_Name; - //! For dynamic pre-allocation containers, the capacity - actual used bytes - //! which equates to pre-allocated space, unused - std::size_t s_Unused; - }; + //! Bytes used by the component + std::size_t s_Memory; - using TMemoryUsagePtr = CMemoryUsage*; - using TMemoryUsagePtrList = std::list; - using TMemoryUsagePtrListCItr = TMemoryUsagePtrList::const_iterator; - using TMemoryUsagePtrListItr = TMemoryUsagePtrList::iterator; - using TMemoryUsageVec = std::vector; - using TMemoryUsageVecCitr = TMemoryUsageVec::const_iterator; + //! For dynamic pre-allocation containers, the capacity - actual used bytes + //! which equates to pre-allocated space, unused + std::size_t s_Unused; + }; - public: - //! Constructor - CMemoryUsage(); + using TMemoryUsagePtr = CMemoryUsage*; + using TMemoryUsagePtrList = std::list; + using TMemoryUsagePtrListCItr = TMemoryUsagePtrList::const_iterator; + using TMemoryUsagePtrListItr = TMemoryUsagePtrList::iterator; + using TMemoryUsageVec = std::vector; + using TMemoryUsageVecCitr = TMemoryUsageVec::const_iterator; - //! Destructor - ~CMemoryUsage(); +public: + //! Constructor + CMemoryUsage(); - //! Create a child node - TMemoryUsagePtr addChild(); + //! Destructor + ~CMemoryUsage(); - //! Create a child node with a pre-computed size offset - this is to - //! allow sizeof(T) to be removed from items within containers - TMemoryUsagePtr addChild(std::size_t initialAmount); + //! Create a child node + TMemoryUsagePtr addChild(); - //! Create a memory usage member item - void addItem(const SMemoryUsage &item); + //! Create a child node with a pre-computed size offset - this is to + //! allow sizeof(T) to be removed from items within containers + TMemoryUsagePtr addChild(std::size_t initialAmount); - // Create a memory usage member item - void addItem(const std::string &name, std::size_t memory); + //! Create a memory usage member item + void addItem(const SMemoryUsage& item); - //! Set the name and size of this node - void setName(const SMemoryUsage& item); + // Create a memory usage member item + void addItem(const std::string& name, std::size_t memory); - // Set the name and size of this node - void setName(const std::string &name, std::size_t memory); + //! Set the name and size of this node + void setName(const SMemoryUsage& item); - // Set the name and size of this node - void setName(const std::string &name); + // Set the name and size of this node + void setName(const std::string& name, std::size_t memory); - //! Get the memory used by this node and all child nodes - std::size_t usage() const; + // Set the name and size of this node + void setName(const std::string& name); - //! Get the unused memory wasted by this node and all child nodes - std::size_t unusage() const; + //! Get the memory used by this node and all child nodes + std::size_t usage() const; - //! Format the memory used by this node and all child nodes - //! into a JSON stream - void print(std::ostream &outStream) const; + //! Get the unused memory wasted by this node and all child nodes + std::size_t unusage() const; - //! Aggregate big collections of child items together - void compress(); + //! Format the memory used by this node and all child nodes + //! into a JSON stream + void print(std::ostream& outStream) const; - private: - //! Give out data to the JSON writer to format, recursively - void summary(CMemoryUsageJsonWriter &writer) const; + //! Aggregate big collections of child items together + void compress(); - //! Collection of child items - TMemoryUsagePtrList m_Children; +private: + //! Give out data to the JSON writer to format, recursively + void summary(CMemoryUsageJsonWriter& writer) const; - //! Collection of component items within this node - TMemoryUsageVec m_Items; + //! Collection of child items + TMemoryUsagePtrList m_Children; - //! Description of this item - SMemoryUsage m_Description; + //! Collection of component items within this node + TMemoryUsageVec m_Items; - friend class memory_detail::CMemoryUsageComparison; - friend class memory_detail::CMemoryUsageComparisonTwo; + //! Description of this item + SMemoryUsage m_Description; + friend class memory_detail::CMemoryUsageComparison; + friend class memory_detail::CMemoryUsageComparisonTwo; }; - } // core } // ml - - #endif // INCLUDED_ml_core_CMemoryUsage_h diff --git a/include/core/CMemoryUsageJsonWriter.h b/include/core/CMemoryUsageJsonWriter.h index 431cccb414..12a88df4bd 100644 --- a/include/core/CMemoryUsageJsonWriter.h +++ b/include/core/CMemoryUsageJsonWriter.h @@ -18,10 +18,8 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { //! \brief A lightweight wrapper over rapidjson::LineWriter //! to be used by CMemoryUsage to format DebugMemoryUsage info @@ -33,48 +31,45 @@ namespace core //! and this is then filled with JSON objects and arrays. //! Finalise should be called to flush the stream before //! downstream use. -class CORE_EXPORT CMemoryUsageJsonWriter -{ - public: - //! Constructor - CMemoryUsageJsonWriter(std::ostream &outStream); +class CORE_EXPORT CMemoryUsageJsonWriter { +public: + //! Constructor + CMemoryUsageJsonWriter(std::ostream& outStream); - //! Destructor - ~CMemoryUsageJsonWriter(); + //! Destructor + ~CMemoryUsageJsonWriter(); - //! Flush the underlying stream, which we only hold by reference - void finalise(); + //! Flush the underlying stream, which we only hold by reference + void finalise(); - //! Calls underlying JSON writer startObject - void startObject(); + //! Calls underlying JSON writer startObject + void startObject(); - //! Calls underlying JSON writer endObject() - void endObject(); + //! Calls underlying JSON writer endObject() + void endObject(); - //! Calls underlying JSON writer startArray, with a string name - void startArray(const std::string &description); + //! Calls underlying JSON writer startArray, with a string name + void startArray(const std::string& description); - //! Calls underlying JSON writer endArray - void endArray(); + //! Calls underlying JSON writer endArray + void endArray(); - //! Add a memory description item to the writer - void addItem(const CMemoryUsage::SMemoryUsage &item); + //! Add a memory description item to the writer + void addItem(const CMemoryUsage::SMemoryUsage& item); - private: - //! JSON writer ostream wrapper - rapidjson::OStreamWrapper m_WriteStream; +private: + //! JSON writer ostream wrapper + rapidjson::OStreamWrapper m_WriteStream; - using TGenericLineWriter = CRapidJsonLineWriter; + using TGenericLineWriter = CRapidJsonLineWriter; - //! JSON writer - TGenericLineWriter m_Writer; - - //! Have we finalised the stream? - bool m_Finalised; + //! JSON writer + TGenericLineWriter m_Writer; + //! Have we finalised the stream? + bool m_Finalised; }; - } // core } // ml diff --git a/include/core/CMessageBuffer.h b/include/core/CMessageBuffer.h index d47ad069af..61c196b4b5 100644 --- a/include/core/CMessageBuffer.h +++ b/include/core/CMessageBuffer.h @@ -9,18 +9,14 @@ #include #include #include -#include #include #include +#include #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! A thread safe message buffer. @@ -34,129 +30,104 @@ namespace core //! flushed and processed in another (flush) and (process) //! template -class CMessageBuffer -{ - public: - CMessageBuffer(BUFFER &buffer) - : m_Thread(*this), - m_Condition(m_Mutex), - m_Buffer(buffer) - { - } +class CMessageBuffer { +public: + CMessageBuffer(BUFFER& buffer) : m_Thread(*this), m_Condition(m_Mutex), m_Buffer(buffer) {} - virtual ~CMessageBuffer() - { - } + virtual ~CMessageBuffer() {} - //! Initialise - create the receiving thread - bool start() - { - CScopedLock lock(m_Mutex); + //! Initialise - create the receiving thread + bool start() { + CScopedLock lock(m_Mutex); - if (m_Thread.start() == false) - { - LOG_ERROR("Unable to initialise thread"); - return false; - } + if (m_Thread.start() == false) { + LOG_ERROR("Unable to initialise thread"); + return false; + } - m_Condition.wait(); + m_Condition.wait(); - return true; - } + return true; + } - //! Shutdown - kill thread - bool stop() - { - m_Thread.stop(); + //! Shutdown - kill thread + bool stop() { + m_Thread.stop(); - return true; - } + return true; + } - void addMessage(const MESSAGE &msg) - { - CScopedLock lock(m_Mutex); + void addMessage(const MESSAGE& msg) { + CScopedLock lock(m_Mutex); - m_Buffer.addMessage(msg); - } + m_Buffer.addMessage(msg); + } - private: - class CMessageBufferThread : public CThread - { - public: - CMessageBufferThread(CMessageBuffer &messageBuffer) - : m_MessageBuffer(messageBuffer), - m_Shutdown(false), - m_IsRunning(false) - { - } +private: + class CMessageBufferThread : public CThread { + public: + CMessageBufferThread(CMessageBuffer& messageBuffer) + : m_MessageBuffer(messageBuffer), m_Shutdown(false), m_IsRunning(false) {} - //! The queue must have the mutex for this to be called - bool isRunning() const - { - // Assumes lock - return m_IsRunning; - } + //! The queue must have the mutex for this to be called + bool isRunning() const { + // Assumes lock + return m_IsRunning; + } - protected: - void run() - { - using TMessageVec = std::vector; + protected: + void run() { + using TMessageVec = std::vector; - m_MessageBuffer.m_Mutex.lock(); + m_MessageBuffer.m_Mutex.lock(); - m_IsRunning = true; + m_IsRunning = true; - m_MessageBuffer.m_Condition.signal(); + m_MessageBuffer.m_Condition.signal(); - while (!m_Shutdown) - { - m_MessageBuffer.m_Condition.wait(m_MessageBuffer.m_Buffer.flushInterval()); + while (!m_Shutdown) { + m_MessageBuffer.m_Condition.wait(m_MessageBuffer.m_Buffer.flushInterval()); - TMessageVec data; + TMessageVec data; - core_t::TTime flushedTime(m_MessageBuffer.m_Buffer.flushMessages(data)); - m_MessageBuffer.m_Mutex.unlock(); - m_MessageBuffer.m_Buffer.processMessages(data, flushedTime); - m_MessageBuffer.m_Mutex.lock(); - } + core_t::TTime flushedTime(m_MessageBuffer.m_Buffer.flushMessages(data)); + m_MessageBuffer.m_Mutex.unlock(); + m_MessageBuffer.m_Buffer.processMessages(data, flushedTime); + m_MessageBuffer.m_Mutex.lock(); + } - // Flush outstanding messages (maintain lock) - TMessageVec data; + // Flush outstanding messages (maintain lock) + TMessageVec data; - m_MessageBuffer.m_Buffer.flushAllMessages(data); - m_MessageBuffer.m_Buffer.processMessages(data, - std::numeric_limits::max()); + m_MessageBuffer.m_Buffer.flushAllMessages(data); + m_MessageBuffer.m_Buffer.processMessages(data, std::numeric_limits::max()); - m_IsRunning = false; + m_IsRunning = false; - m_MessageBuffer.m_Mutex.unlock(); - } + m_MessageBuffer.m_Mutex.unlock(); + } - void shutdown() - { - CScopedLock lock(m_MessageBuffer.m_Mutex); + void shutdown() { + CScopedLock lock(m_MessageBuffer.m_Mutex); - m_Shutdown = true; - m_MessageBuffer.m_Condition.signal(); - } + m_Shutdown = true; + m_MessageBuffer.m_Condition.signal(); + } - private: - CMessageBuffer &m_MessageBuffer; - bool m_Shutdown; - bool m_IsRunning; - }; + private: + CMessageBuffer& m_MessageBuffer; + bool m_Shutdown; + bool m_IsRunning; + }; - CMessageBufferThread m_Thread; - CMutex m_Mutex; - CCondition m_Condition; - BUFFER &m_Buffer; + CMessageBufferThread m_Thread; + CMutex m_Mutex; + CCondition m_Condition; + BUFFER& m_Buffer; friend class CMessageBufferThread; }; - - } } #endif // INCLUDED_ml_learn_CMessageBuffer_h - diff --git a/include/core/CMessageQueue.h b/include/core/CMessageQueue.h index f14d551e0c..7002916a6a 100644 --- a/include/core/CMessageQueue.h +++ b/include/core/CMessageQueue.h @@ -19,12 +19,8 @@ #include #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! A thread safe message queue. @@ -59,286 +55,239 @@ namespace core //! that timing code can be completely compiled out when //! not required. //! -template -class CMessageQueue -{ - public: - //! Prototype for function to be called on queue shutdown - using TShutdownFunc = std::function; - - public: - CMessageQueue(RECEIVER &receiver, - const TShutdownFunc &shutdownFunc = &CMessageQueue::defaultShutdownFunc) - : m_Thread(*this), - m_Condition(m_Mutex), - m_Receiver(receiver), - m_ShutdownFunc(shutdownFunc), - // If timing is enabled, we need a buffer one bigger than the - // number of times to average over. If timing is disabled, the - // buffer can have capacity zero. - m_Readings((NUM_TO_TIME > 0) ? (NUM_TO_TIME + 1) : 0) - { +template +class CMessageQueue { +public: + //! Prototype for function to be called on queue shutdown + using TShutdownFunc = std::function; + +public: + CMessageQueue(RECEIVER& receiver, const TShutdownFunc& shutdownFunc = &CMessageQueue::defaultShutdownFunc) + : m_Thread(*this), + m_Condition(m_Mutex), + m_Receiver(receiver), + m_ShutdownFunc(shutdownFunc), + // If timing is enabled, we need a buffer one bigger than the + // number of times to average over. If timing is disabled, the + // buffer can have capacity zero. + m_Readings((NUM_TO_TIME > 0) ? (NUM_TO_TIME + 1) : 0) {} + + virtual ~CMessageQueue() {} + + //! Initialise - create the receiving thread + bool start() { + CScopedLock lock(m_Mutex); + + if (m_Thread.start() == false) { + LOG_ERROR("Unable to initialise thread"); + return false; } - virtual ~CMessageQueue() - { - } + m_Condition.wait(); - //! Initialise - create the receiving thread - bool start() - { - CScopedLock lock(m_Mutex); + return true; + } - if (m_Thread.start() == false) - { - LOG_ERROR("Unable to initialise thread"); - return false; - } + //! Shutdown - kill thread + bool stop() { + m_Thread.stop(); - m_Condition.wait(); + return true; + } - return true; - } + //! Send a message to the message queue thread (from any thread) + void dispatchMsg(const MESSAGE& msg) { + size_t dummy(0); + this->dispatchMsg(msg, dummy); + } - //! Shutdown - kill thread - bool stop() - { - m_Thread.stop(); + //! Send a message to the message queue thread (from any thread), + //! and get the pending count at the same time + void dispatchMsg(const MESSAGE& msg, size_t& pending) { + CScopedLock lock(m_Mutex); - return true; - } + if (!m_Thread.isRunning()) { + pending = 0; - //! Send a message to the message queue thread (from any thread) - void dispatchMsg(const MESSAGE &msg) - { - size_t dummy(0); - this->dispatchMsg(msg, dummy); + // Should be fatal error + LOG_FATAL("Cannot dispatch to message queue. Queue not initialised"); + return; } - //! Send a message to the message queue thread (from any thread), - //! and get the pending count at the same time - void dispatchMsg(const MESSAGE &msg, size_t &pending) - { - CScopedLock lock(m_Mutex); - - if (!m_Thread.isRunning()) - { - pending = 0; - - // Should be fatal error - LOG_FATAL("Cannot dispatch to message queue. Queue not initialised"); - return; - } - - m_Queue.push(msg); - pending = m_Queue.size(); + m_Queue.push(msg); + pending = m_Queue.size(); - // If there was already work queued up, we can save the cost of - // signalling (which is expensive as it involves kernel interaction) - if (pending <= 1) - { - m_Condition.signal(); - } + // If there was already work queued up, we can save the cost of + // signalling (which is expensive as it involves kernel interaction) + if (pending <= 1) { + m_Condition.signal(); } - - //! Get the number of pending messages in the queue. Note that it's - //! much more efficient to get this when dispatching a message, as - //! everything can then be done under a single mutex lock. This method - //! must be used sparingly to avoid excessive lock contention. - size_t pending() const - { - CScopedLock lock(m_Mutex); - - return m_Queue.size(); + } + + //! Get the number of pending messages in the queue. Note that it's + //! much more efficient to get this when dispatching a message, as + //! everything can then be done under a single mutex lock. This method + //! must be used sparingly to avoid excessive lock contention. + size_t pending() const { + CScopedLock lock(m_Mutex); + + return m_Queue.size(); + } + + //! Get the average time taken to process the last N items (in + //! seconds), where N was specified when timing was enabled. A + //! negative return value indicates an error. + double rollingAverageProcessingTime() const { + if (NUM_TO_TIME == 0) { + LOG_ERROR("Message queue timing is not switched on"); + + return -1.0; } - //! Get the average time taken to process the last N items (in - //! seconds), where N was specified when timing was enabled. A - //! negative return value indicates an error. - double rollingAverageProcessingTime() const - { - if (NUM_TO_TIME == 0) - { - LOG_ERROR("Message queue timing is not switched on"); + CScopedLock lock(m_Mutex); - return -1.0; - } - - CScopedLock lock(m_Mutex); - - if (m_Readings.size() < 2) - { - return -1.0; - } - - if (m_Readings.front() > m_Readings.back()) - { - LOG_ERROR("Time to process last " << NUM_TO_TIME << - " messages is negative (-" << - (m_Readings.front() - m_Readings.back()) << "ms). " - "Maybe the system clock has been put back?"); - return -1.0; - } - - return double(m_Readings.back() - m_Readings.front()) * 0.001 - / double(NUM_TO_TIME); + if (m_Readings.size() < 2) { + return -1.0; } - private: - //! No-op shutdown function if no other is provided - static void defaultShutdownFunc() - { + if (m_Readings.front() > m_Readings.back()) { + LOG_ERROR("Time to process last " << NUM_TO_TIME << " messages is negative (-" << (m_Readings.front() - m_Readings.back()) + << "ms). " + "Maybe the system clock has been put back?"); + return -1.0; } - private: - class CMessageQueueThread : public CThread - { - public: - CMessageQueueThread(CMessageQueue &messageQueue) - : m_MessageQueue(messageQueue), - m_ShuttingDown(false), - m_IsRunning(false) - { - } + return double(m_Readings.back() - m_Readings.front()) * 0.001 / double(NUM_TO_TIME); + } - //! The queue must have the mutex for this to be called - bool isRunning() const - { - // Assumes lock - return m_IsRunning; - } +private: + //! No-op shutdown function if no other is provided + static void defaultShutdownFunc() {} - protected: - void run() - { - m_MessageQueue.m_Mutex.lock(); - m_MessageQueue.m_Condition.signal(); +private: + class CMessageQueueThread : public CThread { + public: + CMessageQueueThread(CMessageQueue& messageQueue) + : m_MessageQueue(messageQueue), m_ShuttingDown(false), m_IsRunning(false) {} - m_IsRunning = true; + //! The queue must have the mutex for this to be called + bool isRunning() const { + // Assumes lock + return m_IsRunning; + } - while (!m_ShuttingDown) - { - m_MessageQueue.m_Condition.wait(); + protected: + void run() { + m_MessageQueue.m_Mutex.lock(); + m_MessageQueue.m_Condition.signal(); - while (!m_MessageQueue.m_Queue.empty()) - { - // Start the stop watch if it's not running and it - // should be - if (NUM_TO_TIME > 0 && - !m_MessageQueue.m_StopWatch.isRunning()) - { - m_MessageQueue.m_StopWatch.start(); - } + m_IsRunning = true; - MESSAGE &msg = m_MessageQueue.m_Queue.front(); + while (!m_ShuttingDown) { + m_MessageQueue.m_Condition.wait(); - // Don't include the current work item in the backlog - size_t backlog(m_MessageQueue.m_Queue.size() - 1); + while (!m_MessageQueue.m_Queue.empty()) { + // Start the stop watch if it's not running and it + // should be + if (NUM_TO_TIME > 0 && !m_MessageQueue.m_StopWatch.isRunning()) { + m_MessageQueue.m_StopWatch.start(); + } - m_MessageQueue.m_Mutex.unlock(); + MESSAGE& msg = m_MessageQueue.m_Queue.front(); - m_MessageQueue.m_Receiver.processMsg(msg, backlog); + // Don't include the current work item in the backlog + size_t backlog(m_MessageQueue.m_Queue.size() - 1); - // Try to do as much deletion as possible outside - // the lock, so the pop() below is cheap - this->destroyMsgDataUnlocked(msg); + m_MessageQueue.m_Mutex.unlock(); - m_MessageQueue.m_Mutex.lock(); + m_MessageQueue.m_Receiver.processMsg(msg, backlog); - m_MessageQueue.m_Queue.pop(); + // Try to do as much deletion as possible outside + // the lock, so the pop() below is cheap + this->destroyMsgDataUnlocked(msg); - // If the stop watch is running, update the history - // of readings - if (NUM_TO_TIME > 0 && - m_MessageQueue.m_StopWatch.isRunning()) - { - m_MessageQueue.m_Readings.push_back(m_MessageQueue.m_StopWatch.lap()); - } - } + m_MessageQueue.m_Mutex.lock(); - // Stop the stop watch if it's running, as we're - // probably about to go to sleep - if (NUM_TO_TIME > 0 && - m_MessageQueue.m_StopWatch.isRunning()) - { - m_MessageQueue.m_StopWatch.stop(); - } + m_MessageQueue.m_Queue.pop(); + + // If the stop watch is running, update the history + // of readings + if (NUM_TO_TIME > 0 && m_MessageQueue.m_StopWatch.isRunning()) { + m_MessageQueue.m_Readings.push_back(m_MessageQueue.m_StopWatch.lap()); } + } - m_IsRunning = false; + // Stop the stop watch if it's running, as we're + // probably about to go to sleep + if (NUM_TO_TIME > 0 && m_MessageQueue.m_StopWatch.isRunning()) { + m_MessageQueue.m_StopWatch.stop(); + } + } - m_MessageQueue.m_ShutdownFunc(); + m_IsRunning = false; - m_MessageQueue.m_Mutex.unlock(); - } + m_MessageQueue.m_ShutdownFunc(); - void shutdown() - { - CScopedLock lock(m_MessageQueue.m_Mutex); + m_MessageQueue.m_Mutex.unlock(); + } - m_ShuttingDown = true; - m_MessageQueue.m_Condition.signal(); - } + void shutdown() { + CScopedLock lock(m_MessageQueue.m_Mutex); - private: - //! It's best to minimise work done while the mutex is locked, - //! so ideally we'll clean up the MESSAGE object as much as - //! possible outside the lock. This is the most generic case, - //! where we can't do anything. - template - void destroyMsgDataUnlocked(ANYTHING &) - { - // For an arbitrary type we have no idea how to destroy some - // of its data without calling its destructor - } + m_ShuttingDown = true; + m_MessageQueue.m_Condition.signal(); + } - //! Specialisation of the above that might delete the referenced - //! data if the MESSAGE type is a shared pointer (if no other - //! shared pointer points to it). - template - void destroyMsgDataUnlocked(boost::shared_ptr &ptr) - { - ptr.reset(); - } + private: + //! It's best to minimise work done while the mutex is locked, + //! so ideally we'll clean up the MESSAGE object as much as + //! possible outside the lock. This is the most generic case, + //! where we can't do anything. + template + void destroyMsgDataUnlocked(ANYTHING&) { + // For an arbitrary type we have no idea how to destroy some + // of its data without calling its destructor + } - // Other specialisations could potentially be added here + //! Specialisation of the above that might delete the referenced + //! data if the MESSAGE type is a shared pointer (if no other + //! shared pointer points to it). + template + void destroyMsgDataUnlocked(boost::shared_ptr& ptr) { + ptr.reset(); + } + + // Other specialisations could potentially be added here - private: - CMessageQueue &m_MessageQueue; - bool m_ShuttingDown; - bool m_IsRunning; - }; + private: + CMessageQueue& m_MessageQueue; + bool m_ShuttingDown; + bool m_IsRunning; + }; - CMessageQueueThread m_Thread; - mutable CMutex m_Mutex; - CCondition m_Condition; - RECEIVER &m_Receiver; + CMessageQueueThread m_Thread; + mutable CMutex m_Mutex; + CCondition m_Condition; + RECEIVER& m_Receiver; - using TMessageQueue = std::queue; + using TMessageQueue = std::queue; - TMessageQueue m_Queue; + TMessageQueue m_Queue; - //! Function to be called on queue shutdown - TShutdownFunc m_ShutdownFunc; + //! Function to be called on queue shutdown + TShutdownFunc m_ShutdownFunc; - //! A stop watch for timing how long it takes to process messages - CStopWatch m_StopWatch; + //! A stop watch for timing how long it takes to process messages + CStopWatch m_StopWatch; - using TUIntCircBuf = boost::circular_buffer; + using TUIntCircBuf = boost::circular_buffer; - //! Stop watch readings - TUIntCircBuf m_Readings; + //! Stop watch readings + TUIntCircBuf m_Readings; friend class CMessageQueueThread; }; - - } } #endif // INCLUDED_ml_core_CMessageQueue_h - diff --git a/include/core/CMonotonicTime.h b/include/core/CMonotonicTime.h index 7142afd75a..14dc96bbf1 100644 --- a/include/core/CMonotonicTime.h +++ b/include/core/CMonotonicTime.h @@ -10,11 +10,8 @@ #include - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { //! \brief //! Get a time that should never decrease @@ -53,28 +50,24 @@ namespace core //! For platforms using clock_gettime(), there is a further fallback to //! CLOCK_REALTIME in the event of CLOCK_MONOTONIC not being available. //! -class CORE_EXPORT CMonotonicTime -{ - public: - //! Initialise any required scaling factors - CMonotonicTime(); +class CORE_EXPORT CMonotonicTime { +public: + //! Initialise any required scaling factors + CMonotonicTime(); - //! Get the number of milliseconds since some fixed point in the past - uint64_t milliseconds() const; + //! Get the number of milliseconds since some fixed point in the past + uint64_t milliseconds() const; - //! Get the number of nanoseconds since some fixed point in the past - uint64_t nanoseconds() const; + //! Get the number of nanoseconds since some fixed point in the past + uint64_t nanoseconds() const; - private: - //! Operating system specific scaling factors - uint64_t m_ScalingFactor1; - uint64_t m_ScalingFactor2; - uint64_t m_ScalingFactor3; +private: + //! Operating system specific scaling factors + uint64_t m_ScalingFactor1; + uint64_t m_ScalingFactor2; + uint64_t m_ScalingFactor3; }; - - } } #endif // INCLUDED_ml_core_CMonotonicTime_h - diff --git a/include/core/CMutex.h b/include/core/CMutex.h index 8ea71cd0f6..87134ddd89 100644 --- a/include/core/CMutex.h +++ b/include/core/CMutex.h @@ -14,14 +14,10 @@ #include #endif - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CCondition; - //! \brief //! Wrapper class around pthread mutex. //! @@ -38,29 +34,25 @@ class CCondition; //! On Windows, critical sections are preferred to mutexes, as //! they do not consume system handles. //! -class CORE_EXPORT CMutex : private CNonCopyable -{ - public: - CMutex(); - ~CMutex(); +class CORE_EXPORT CMutex : private CNonCopyable { +public: + CMutex(); + ~CMutex(); - void lock(); - void unlock(); + void lock(); + void unlock(); - private: +private: #ifdef Windows - CRITICAL_SECTION m_Mutex; + CRITICAL_SECTION m_Mutex; #else - pthread_mutex_t m_Mutex; + pthread_mutex_t m_Mutex; #endif // Allow CCondition access to internals friend class CCondition; }; - - } } #endif // INCLUDED_ml_core_CMutex_h - diff --git a/include/core/CNamedPipeFactory.h b/include/core/CNamedPipeFactory.h index 4ef69479d4..d0fcc2dcf7 100644 --- a/include/core/CNamedPipeFactory.h +++ b/include/core/CNamedPipeFactory.h @@ -17,11 +17,8 @@ #include - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { //! \brief //! Class to create named pipes. @@ -57,62 +54,58 @@ namespace core //! at least one side of the connection, to avoid a deadlock //! due to buffers filling up. //! -class CORE_EXPORT CNamedPipeFactory : private CNonInstantiatable -{ - public: - using TIStreamP = boost::shared_ptr; - using TOStreamP = boost::shared_ptr; - using TFileP = boost::shared_ptr; - - public: - //! Character that can safely be used to test whether named pipes are - //! connected. The Java side of the pipe will silently ignore it. - //! (Obviously this is specific to Elastic.) - static const char TEST_CHAR; - - public: - //! Initialise and open a named pipe for reading, returning a C++ stream - //! that can be used to read from it. Returns a NULL pointer on - //! failure. - static TIStreamP openPipeStreamRead(const std::string &fileName); - - //! Initialise and open a named pipe for writing, returning a C++ stream - //! that can be used to write to it. Returns a NULL pointer on failure. - static TOStreamP openPipeStreamWrite(const std::string &fileName); - - //! Initialise and open a named pipe for writing, returning a C FILE - //! that can be used to read from it. Returns a NULL pointer on - //! failure. - static TFileP openPipeFileRead(const std::string &fileName); - - //! Initialise and open a named pipe for writing, returning a C FILE - //! that can be used to write to it. Returns a NULL pointer on failure. - static TFileP openPipeFileWrite(const std::string &fileName); - - //! Does the supplied file name refer to a named pipe? - static bool isNamedPipe(const std::string &fileName); - - //! Default path for named pipes. - static std::string defaultPath(); - - private: +class CORE_EXPORT CNamedPipeFactory : private CNonInstantiatable { +public: + using TIStreamP = boost::shared_ptr; + using TOStreamP = boost::shared_ptr; + using TFileP = boost::shared_ptr; + +public: + //! Character that can safely be used to test whether named pipes are + //! connected. The Java side of the pipe will silently ignore it. + //! (Obviously this is specific to Elastic.) + static const char TEST_CHAR; + +public: + //! Initialise and open a named pipe for reading, returning a C++ stream + //! that can be used to read from it. Returns a NULL pointer on + //! failure. + static TIStreamP openPipeStreamRead(const std::string& fileName); + + //! Initialise and open a named pipe for writing, returning a C++ stream + //! that can be used to write to it. Returns a NULL pointer on failure. + static TOStreamP openPipeStreamWrite(const std::string& fileName); + + //! Initialise and open a named pipe for writing, returning a C FILE + //! that can be used to read from it. Returns a NULL pointer on + //! failure. + static TFileP openPipeFileRead(const std::string& fileName); + + //! Initialise and open a named pipe for writing, returning a C FILE + //! that can be used to write to it. Returns a NULL pointer on failure. + static TFileP openPipeFileWrite(const std::string& fileName); + + //! Does the supplied file name refer to a named pipe? + static bool isNamedPipe(const std::string& fileName); + + //! Default path for named pipes. + static std::string defaultPath(); + +private: #ifdef Windows - using TPipeHandle = HANDLE; + using TPipeHandle = HANDLE; #else - using TPipeHandle = int; + using TPipeHandle = int; #endif - private: - //! Initialise and open a named pipe for writing, returning a handle - //! file descriptor that can be used to access it. This is the core - //! implementation of the higher level encapsulations that the public - //! interface provides. - static TPipeHandle initPipeHandle(const std::string &fileName, bool forWrite); +private: + //! Initialise and open a named pipe for writing, returning a handle + //! file descriptor that can be used to access it. This is the core + //! implementation of the higher level encapsulations that the public + //! interface provides. + static TPipeHandle initPipeHandle(const std::string& fileName, bool forWrite); }; - - } } #endif // INCLUDED_ml_core_CNamedPipeFactory_h - diff --git a/include/core/CNonCopyable.h b/include/core/CNonCopyable.h index b4a4090481..e71a2e5d4c 100644 --- a/include/core/CNonCopyable.h +++ b/include/core/CNonCopyable.h @@ -8,12 +8,8 @@ #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! Equivalent to boost::noncopyable. @@ -32,28 +28,20 @@ namespace core //! inlined. This is the difference compared to boost::noncopyable, //! and what prevents Visual C++ warning C4275. //! -class CORE_EXPORT CNonCopyable -{ - protected: - //! Inlined in the hope that the compiler will optimise it away - CNonCopyable() - { - } - - //! Inlined in the hope that the compiler will optimise it away - ~CNonCopyable() - { - } - - private: - //! Prevent copying - CNonCopyable(const CNonCopyable &); - CNonCopyable &operator=(const CNonCopyable &); +class CORE_EXPORT CNonCopyable { +protected: + //! Inlined in the hope that the compiler will optimise it away + CNonCopyable() {} + + //! Inlined in the hope that the compiler will optimise it away + ~CNonCopyable() {} + +private: + //! Prevent copying + CNonCopyable(const CNonCopyable&); + CNonCopyable& operator=(const CNonCopyable&); }; - - } } #endif // INCLUDED_ml_core_CNonCopyable_h - diff --git a/include/core/CNonInstantiatable.h b/include/core/CNonInstantiatable.h index 410927dd69..d05c611870 100644 --- a/include/core/CNonInstantiatable.h +++ b/include/core/CNonInstantiatable.h @@ -8,12 +8,8 @@ #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! Similar idea to boost::noncopyable, but for instantiation. @@ -27,17 +23,13 @@ namespace core //! The class is (seemingly pointlessly) exported from the DLL to //! prevent Visual C++ warning C4275. //! -class CORE_EXPORT CNonInstantiatable -{ - private: - //! Prevent instantiation - CNonInstantiatable(); - CNonInstantiatable(const CNonInstantiatable &); +class CORE_EXPORT CNonInstantiatable { +private: + //! Prevent instantiation + CNonInstantiatable(); + CNonInstantiatable(const CNonInstantiatable&); }; - - } } #endif // INCLUDED_ml_core_CNonInstantiatable_h - diff --git a/include/core/COsFileFuncs.h b/include/core/COsFileFuncs.h index dccbcc6d1f..013d53f446 100644 --- a/include/core/COsFileFuncs.h +++ b/include/core/COsFileFuncs.h @@ -13,18 +13,14 @@ #include #include #endif -#include #include +#include #ifndef Windows #include #endif - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! Portable wrapper around OS level file functions. @@ -50,110 +46,105 @@ namespace core //! On Windows, we have to explicitly request 64 bit types in the //! typedefs and functions in this class. //! -class CORE_EXPORT COsFileFuncs : private CNonInstantiatable -{ - public: - //! Use in place of OS level file flags - will be defined as zero on - //! platforms that don't support them - static const int APPEND; - static const int BINARY; - static const int CREAT; - static const int EXCL; - static const int NOFOLLOW; - static const int RDONLY; - static const int RDWR; - static const int TEXT; - static const int TRUNC; - static const int WRONLY; - static const int RENAMABLE; - - //! Use in place of OS level access flags - will be defined as zero on - //! platforms that don't support them - static const int EXISTS; - static const int READABLE; - static const int WRITABLE; - static const int EXECUTABLE; - - //! The name of the magic file that discards everything written to it - static const char *NULL_FILENAME; - - public: - //! Signed size type (to be used instead of ssize_t) +class CORE_EXPORT COsFileFuncs : private CNonInstantiatable { +public: + //! Use in place of OS level file flags - will be defined as zero on + //! platforms that don't support them + static const int APPEND; + static const int BINARY; + static const int CREAT; + static const int EXCL; + static const int NOFOLLOW; + static const int RDONLY; + static const int RDWR; + static const int TEXT; + static const int TRUNC; + static const int WRONLY; + static const int RENAMABLE; + + //! Use in place of OS level access flags - will be defined as zero on + //! platforms that don't support them + static const int EXISTS; + static const int READABLE; + static const int WRITABLE; + static const int EXECUTABLE; + + //! The name of the magic file that discards everything written to it + static const char* NULL_FILENAME; + +public: +//! Signed size type (to be used instead of ssize_t) #ifdef Windows - using TSignedSize = int; + using TSignedSize = int; #else - using TSignedSize = ssize_t; + using TSignedSize = ssize_t; #endif - //! Offset type (to be used instead of off_t) +//! Offset type (to be used instead of off_t) #ifdef Windows - using TOffset = __int64; + using TOffset = __int64; #else - using TOffset = off_t; + using TOffset = off_t; #endif - //! Mode type (to be used instead of mode_t) +//! Mode type (to be used instead of mode_t) #ifdef Windows - using TMode = int; + using TMode = int; #else - using TMode = mode_t; + using TMode = mode_t; #endif - //! Inode type (to be used instead of ino_t) +//! Inode type (to be used instead of ino_t) #ifdef Windows - using TIno = uint64_t; + using TIno = uint64_t; #else - using TIno = ino_t; + using TIno = ino_t; #endif - //! Stat buffer struct (to be used instead of struct stat) +//! Stat buffer struct (to be used instead of struct stat) #ifdef Windows - struct SStat - { - // Member names don't conform to the coding standards because they - // need to match those of struct stat - _dev_t st_dev; - //! Replaces the _ino_t member of _stati64 - TIno st_ino; - unsigned short st_mode; - short st_nlink; - short st_uid; - short st_gid; - _dev_t st_rdev; - __int64 st_size; - __time64_t st_atime; - __time64_t st_mtime; - __time64_t st_ctime; - }; - - using TStat = SStat; + struct SStat { + // Member names don't conform to the coding standards because they + // need to match those of struct stat + _dev_t st_dev; + //! Replaces the _ino_t member of _stati64 + TIno st_ino; + unsigned short st_mode; + short st_nlink; + short st_uid; + short st_gid; + _dev_t st_rdev; + __int64 st_size; + __time64_t st_atime; + __time64_t st_mtime; + __time64_t st_ctime; + }; + + using TStat = SStat; #else - using TStat = struct stat; + using TStat = struct stat; #endif - public: - static int open(const char *path, int oflag); - static int open(const char *path, int oflag, TMode pmode); - static int dup(int fildes); - static int dup2(int fildes, int fildes2); - static TOffset lseek(int fildes, TOffset offset, int whence); - static TSignedSize read(int fildes, void *buf, size_t nbyte); - static TSignedSize write(int fildes, const void *buf, size_t nbyte); - static int close(int fildes); - - static int fstat(int fildes, TStat *buf); - static int stat(const char *path, TStat *buf); - static int lstat(const char *path, TStat *buf); - static int access(const char *path, int amode); - - static char *getcwd(char *buf, size_t size); - static int chdir(const char *path); - static int mkdir(const char *path); +public: + static int open(const char* path, int oflag); + static int open(const char* path, int oflag, TMode pmode); + static int dup(int fildes); + static int dup2(int fildes, int fildes2); + static TOffset lseek(int fildes, TOffset offset, int whence); + static TSignedSize read(int fildes, void* buf, size_t nbyte); + static TSignedSize write(int fildes, const void* buf, size_t nbyte); + static int close(int fildes); + + static int fstat(int fildes, TStat* buf); + static int stat(const char* path, TStat* buf); + static int lstat(const char* path, TStat* buf); + static int access(const char* path, int amode); + + static char* getcwd(char* buf, size_t size); + static int chdir(const char* path); + static int mkdir(const char* path); }; - - } } #endif // INCLUDED_ml_core_COsFileFuncs_h - diff --git a/include/core/CPOpen.h b/include/core/CPOpen.h index 657da9c9d5..bb4a56b929 100644 --- a/include/core/CPOpen.h +++ b/include/core/CPOpen.h @@ -11,12 +11,8 @@ #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! Portable wrapper for the popen()/pclose() functions. @@ -28,18 +24,13 @@ namespace core //! This has been broken into a class of its own because Windows has a //! _popen() and _pclose() functions rather than Unix's popen() and pclose(). //! -class CORE_EXPORT CPOpen : private CNonInstantiatable -{ - public: - static FILE *pOpen(const char *command, - const char *mode); +class CORE_EXPORT CPOpen : private CNonInstantiatable { +public: + static FILE* pOpen(const char* command, const char* mode); - static int pClose(FILE *stream); + static int pClose(FILE* stream); }; - - } } #endif // INCLUDED_ml_core_CPOpen_h - diff --git a/include/core/CPatternSet.h b/include/core/CPatternSet.h index 6390bae96e..87a0ab80ab 100644 --- a/include/core/CPatternSet.h +++ b/include/core/CPatternSet.h @@ -13,10 +13,8 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { //! \brief A set that allows efficient lookups of strings. //! @@ -37,42 +35,41 @@ namespace core //! - its end matched a suffix pattern //! - it matches fully against a full pattern //! - the start of any of its substrings ending at its end matches a contains pattern -class CORE_EXPORT CPatternSet -{ - public: - using TStrVec = std::vector; - using TStrVecCItr = TStrVec::const_iterator; - using TStrCItr = std::string::const_iterator; +class CORE_EXPORT CPatternSet { +public: + using TStrVec = std::vector; + using TStrVecCItr = TStrVec::const_iterator; + using TStrCItr = std::string::const_iterator; - public: - //! Default constructor. - CPatternSet(); +public: + //! Default constructor. + CPatternSet(); - //! Initialise the set from JSON that is an array of strings. - bool initFromJson(const std::string &json); + //! Initialise the set from JSON that is an array of strings. + bool initFromJson(const std::string& json); - //! Check if the set contains the given key. - bool contains(const std::string &key) const; + //! Check if the set contains the given key. + bool contains(const std::string& key) const; - //! Clears the set. - void clear(); + //! Clears the set. + void clear(); - private: - void sortAndPruneDuplicates(TStrVec &keys); +private: + void sortAndPruneDuplicates(TStrVec& keys); - private: - //! The prefix tree containing full patterns (no wildcard). - CFlatPrefixTree m_FullMatchPatterns; +private: + //! The prefix tree containing full patterns (no wildcard). + CFlatPrefixTree m_FullMatchPatterns; - //! The prefix tree containing prefix patterns. - CFlatPrefixTree m_PrefixPatterns; + //! The prefix tree containing prefix patterns. + CFlatPrefixTree m_PrefixPatterns; - //! The prefix tree containing suffix patterns - //! (note that the suffixes are stored reverted). - CFlatPrefixTree m_SuffixPatterns; + //! The prefix tree containing suffix patterns + //! (note that the suffixes are stored reverted). + CFlatPrefixTree m_SuffixPatterns; - //! The prefix tree containing the contains patterns. - CFlatPrefixTree m_ContainsPatterns; + //! The prefix tree containing the contains patterns. + CFlatPrefixTree m_ContainsPatterns; }; } } diff --git a/include/core/CPersistUtils.h b/include/core/CPersistUtils.h index 3baf2ec82a..4ac40d2550 100644 --- a/include/core/CPersistUtils.h +++ b/include/core/CPersistUtils.h @@ -15,25 +15,22 @@ #include #include +#include #include -#include -#include #include +#include +#include #include #include -#include #include #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { -namespace persist_utils_detail -{ +namespace persist_utils_detail { const std::string FIRST_TAG("a"); const std::string SECOND_TAG("b"); @@ -41,16 +38,13 @@ const std::string MAP_TAG("c"); const std::string SIZE_TAG("d"); template -struct remove_const -{ +struct remove_const { using type = typename boost::remove_const::type; }; template -struct remove_const > -{ - using type = std::pair::type, - typename remove_const::type>; +struct remove_const> { + using type = std::pair::type, typename remove_const::type>; }; //! Template specialisation utility classes for selecting various @@ -67,31 +61,26 @@ class MemberFromDelimited {}; //! Auxiliary type used by has_const_iterator to test for a nested //! typedef. template -struct enable_if -{ +struct enable_if { using type = R; }; //! Auxiliary type used by has_persist_function to test for a nested //! member function. template -struct enable_if_is -{ +struct enable_if_is { using type = R; }; - //! \name Class used to select appropriate persist implementation //! for containers. //@{ template -struct persist_container_selector -{ +struct persist_container_selector { using value = BasicPersist; }; template -struct persist_container_selector::type> -{ +struct persist_container_selector::type> { using value = ContainerPersist; }; //@} @@ -99,45 +88,39 @@ struct persist_container_selector -struct persist_selector -{ +struct persist_selector { using value = typename persist_container_selector::value; }; template -struct persist_selector::type> -{ +struct persist_selector::type> { using value = MemberPersist; }; template -struct persist_selector::type> -{ +struct persist_selector::type> { using value = MemberToDelimited; }; //@} //! Detail of the persist class selected by the object features -template class CPersisterImpl {}; +template +class CPersisterImpl {}; //! Convenience function to select implementation. template -bool persist(const std::string &tag, const T &target, CStatePersistInserter &inserter) -{ +bool persist(const std::string& tag, const T& target, CStatePersistInserter& inserter) { CPersisterImpl::value>::dispatch(tag, target, inserter); return true; } - //! \name Class used to select appropriate restore implementation //! for containers. //@{ template -struct restore_container_selector -{ +struct restore_container_selector { using value = BasicRestore; }; template -struct restore_container_selector::type> -{ +struct restore_container_selector::type> { using value = ContainerRestore; }; //@} @@ -145,29 +128,26 @@ struct restore_container_selector -struct restore_selector -{ +struct restore_selector { using value = typename restore_container_selector::value; }; template -struct restore_selector::type> -{ +struct restore_selector::type> { using value = MemberRestore; }; template -struct restore_selector::type> -{ +struct restore_selector::type> { using value = MemberFromDelimited; }; //@} //! Detail of the restorer implementation based on object features -template class CRestorerImpl {}; +template +class CRestorerImpl {}; //! Convenience function to select implementation. template -bool restore(const std::string &tag, T &target, CStateRestoreTraverser &traverser) -{ +bool restore(const std::string& tag, T& target, CStateRestoreTraverser& traverser) { return CRestorerImpl::value>::dispatch(tag, target, traverser); } @@ -183,52 +163,44 @@ class CanReserve {}; //! various reserve implementations. //@{ template -struct reserve_selector -{ +struct reserve_selector { using value = ENABLE; }; template -struct reserve_selector::type> -{ +struct reserve_selector::type> { using value = CanReserve; }; //@} //! \brief Implementation of the pre-allocation class for objects //! which don't support pre-allocation - i.e. do nothing. -template class CReserveImpl -{ - public: - template - static void dispatch(const T &, std::size_t) - { - } +template +class CReserveImpl { +public: + template + static void dispatch(const T&, std::size_t) {} }; //! \brief Implementation of the pre-allocation class for objects //! which have a void reserve(size_t) method. template<> -class CReserveImpl -{ - public: - template - static void dispatch(T &t, std::size_t amount) - { - t.reserve(amount); - } +class CReserveImpl { +public: + template + static void dispatch(T& t, std::size_t amount) { + t.reserve(amount); + } }; //! Helper function to select the correct reserver class based //! on template specialisation selection template -void reserve(T &t, std::size_t amount) -{ +void reserve(T& t, std::size_t amount) { CReserveImpl::value>::dispatch(t, amount); } } // persist_utils_detail:: - //! \brief A class of persistence patterns. //! //! DESCRIPTION:\n @@ -238,1002 +210,761 @@ void reserve(T &t, std::size_t amount) //! document. This also means that we can reserve vectors on restore. //! //! Any new patterns should be added here. -class CORE_EXPORT CPersistUtils -{ - public: - static const char DELIMITER; - static const char PAIR_DELIMITER; - +class CORE_EXPORT CPersistUtils { +public: + static const char DELIMITER; + static const char PAIR_DELIMITER; + +public: + //! \brief Utility to convert a built in type to a string + //! using CStringUtils functions. + class CORE_EXPORT CBuiltinToString { public: - //! \brief Utility to convert a built in type to a string - //! using CStringUtils functions. - class CORE_EXPORT CBuiltinToString - { - public: - CBuiltinToString(const char pairDelimiter) : - m_PairDelimiter(pairDelimiter) - {} - - std::string operator()(double value) const - { - return CStringUtils::typeToStringPrecise(value, CIEEE754::E_SinglePrecision); - } - - template - std::string operator()(T value) const - { - return CStringUtils::typeToString(value); - } - - std::string operator()(int8_t value) const - { - return CStringUtils::typeToString(static_cast(value)); - } - - std::string operator()(uint8_t value) const - { - return CStringUtils::typeToString(static_cast(value)); - } - - std::string operator()(int16_t value) const - { - return CStringUtils::typeToString(static_cast(value)); - } - - std::string operator()(uint16_t value) const - { - return CStringUtils::typeToString(static_cast(value)); - } - - std::string operator()(CFloatStorage value) const - { - return value.toString(); - } - - template - std::string operator()(const std::pair &value) const - { - return this->operator()(value.first) - + m_PairDelimiter - + this->operator()(value.second); - } - - private: - char m_PairDelimiter; - }; + CBuiltinToString(const char pairDelimiter) : m_PairDelimiter(pairDelimiter) {} - //! \brief Utility to convert a string to a built in type - //! using CStringUtils functions. - class CORE_EXPORT CBuiltinFromString - { - public: - CBuiltinFromString(const char pairDelimiter) : - m_PairDelimiter(pairDelimiter) - { - m_Token.reserve(15); - } + std::string operator()(double value) const { return CStringUtils::typeToStringPrecise(value, CIEEE754::E_SinglePrecision); } - template - bool operator()(const std::string &token, T &value) const - { - return CStringUtils::stringToType(token, value); - } + template + std::string operator()(T value) const { + return CStringUtils::typeToString(value); + } - bool operator()(const std::string &token, int8_t &value) const - { - int value_; - if (CStringUtils::stringToType(token, value_)) - { - value = static_cast(value_); - return true; - } - return false; - } + std::string operator()(int8_t value) const { return CStringUtils::typeToString(static_cast(value)); } - bool operator()(const std::string &token, uint8_t &value) const - { - unsigned int value_; - if (CStringUtils::stringToType(token, value_)) - { - value = static_cast(value_); - return true; - } - return false; - } + std::string operator()(uint8_t value) const { return CStringUtils::typeToString(static_cast(value)); } - bool operator()(const std::string &token, int16_t &value) const - { - int value_; - if (CStringUtils::stringToType(token, value_)) - { - value = static_cast(value_); - return true; - } - return false; - } + std::string operator()(int16_t value) const { return CStringUtils::typeToString(static_cast(value)); } - bool operator()(const std::string &token, uint16_t &value) const - { - unsigned int value_; - if (CStringUtils::stringToType(token, value_)) - { - value = static_cast(value_); - return true; - } - return false; - } + std::string operator()(uint16_t value) const { return CStringUtils::typeToString(static_cast(value)); } - bool operator()(const std::string &token, CFloatStorage &value) const - { - return value.fromString(token); - } + std::string operator()(CFloatStorage value) const { return value.toString(); } - template - bool operator()(const std::string &token, std::pair &value) const - { - std::size_t delimPos(token.find(m_PairDelimiter)); - if (delimPos == std::string::npos) - { - return false; - } - m_Token.assign(token, 0, delimPos); - if (!this->operator()(m_Token, value.first)) - { - return false; - } - m_Token.assign(token, delimPos + 1, token.length() - delimPos); - return this->operator()(m_Token, value.second); - } + template + std::string operator()(const std::pair& value) const { + return this->operator()(value.first) + m_PairDelimiter + this->operator()(value.second); + } - private: - char m_PairDelimiter; - mutable std::string m_Token; - }; + private: + char m_PairDelimiter; + }; - //! Entry method for objects being restored - template - static bool restore(const std::string &tag, - T &collection, - CStateRestoreTraverser &traverser) - { - return persist_utils_detail::restore(tag, collection, traverser); - } + //! \brief Utility to convert a string to a built in type + //! using CStringUtils functions. + class CORE_EXPORT CBuiltinFromString { + public: + CBuiltinFromString(const char pairDelimiter) : m_PairDelimiter(pairDelimiter) { m_Token.reserve(15); } - //! Entry method for objects being persisted template - static bool persist(const std::string &tag, - const T &collection, - CStatePersistInserter &inserter) - { - return persist_utils_detail::persist(tag, collection, inserter); - } - - //! Wrapper for containers of built in types. - template - static std::string toString(const CONTAINER &collection, - const char delimiter = DELIMITER, - const char pairDelimiter = PAIR_DELIMITER) - { - CBuiltinToString f(pairDelimiter); - return toString(collection, f, delimiter); + bool operator()(const std::string& token, T& value) const { + return CStringUtils::stringToType(token, value); } - //! Convert a collection to a string. - //! - //! \param[in] collection The collection to persist. - //! \param[in] stringFunc The function used to persist - //! elements of the collection. - //! \param[in] delimiter The delimiter used to separate - //! elements. - //! \note This should use RVO so just return the string. - template - static std::string toString(const CONTAINER &collection, - const F &stringFunc, - const char delimiter = DELIMITER) - { - if (collection.empty()) - { - return std::string(); + bool operator()(const std::string& token, int8_t& value) const { + int value_; + if (CStringUtils::stringToType(token, value_)) { + value = static_cast(value_); + return true; } - auto begin = collection.begin(); - auto end = collection.end(); - return toString(begin, end, stringFunc, delimiter); - } - - //! Wrapper for containers of built in types. - template - static std::string toString(ITR &begin, ITR &end, - const char delimiter = DELIMITER, - const char pairDelimiter = PAIR_DELIMITER) - { - CBuiltinToString f(pairDelimiter); - return toString(begin, end, f, delimiter); + return false; } - //! Convert the range between 2 iterators to a string. - //! - //! \param[in,out] begin The iterator at the start of the range. - //! This will be equal to end when the function returns - //! \param[in] end The iterator at the end of the range - //! \param[in] stringFunc The function used to persist - //! elements of the collection. - //! \param[in] delimiter The delimiter used to separate - //! elements. - //! \note This should use RVO so just return the string. - template - static std::string toString(ITR &begin, ITR &end, - const F &stringFunc, - const char delimiter = DELIMITER) - { - std::string result = stringFunc(*begin++); - for (/**/; begin != end; ++begin) - { - result += delimiter; - result += stringFunc(*begin); + bool operator()(const std::string& token, uint8_t& value) const { + unsigned int value_; + if (CStringUtils::stringToType(token, value_)) { + value = static_cast(value_); + return true; } - return result; - } - - //! Wrapper for arrays of built in types. - template - static bool fromString(const std::string &state, - boost::array &collection, - const char delimiter = DELIMITER, - const char pairDelimiter = PAIR_DELIMITER) - { - CBuiltinFromString f(pairDelimiter); - return fromString(state, f, collection, delimiter); - } - - //! Wrapper for containers of built in types. - template - static bool fromString(const std::string &state, - CONTAINER &collection, - const char delimiter = DELIMITER, - const char pairDelimiter = PAIR_DELIMITER, - bool append = false) - { - CBuiltinFromString f(pairDelimiter); - return fromString(state, f, collection, delimiter, append); - } - - //! Wrapper for ranges of built in types. - template - static bool fromString(const std::string &state, - ITR begin, - ITR end, - const char delimiter = DELIMITER, - const char pairDelimiter = PAIR_DELIMITER) - { - CBuiltinFromString f(pairDelimiter); - return fromString(state, f, begin, end, delimiter); + return false; } - //! Restore a vector from a string created by toString. - //! - //! \param[in] state The string description of the - //! collection. - //! \param[in] stringFunc The function used to restore - //! elements of the collection. - //! \param[out] collection Filled in with the elements - //! extracted from \p state. - //! \param[in] delimiter The delimiter used to separate - //! elements. - //! \param[in] append If true append the results to the - //! collection otherwise it is cleared first - //! \return True if there was no error parsing \p state - //! and false otherwise. If the state cannot be parsed - //! then an empty collection is returned. - //! \note T must have a default constructor. - //! \note The delimiter must match the delimiter used - //! for persistence. - //! \tparam F Expected to have the signature: - //! \code - //! bool (const std::string &, T &) - //! \endcode - template - static bool fromString(const std::string &state, - const F &stringFunc, - std::vector &collection, - const char delimiter = DELIMITER, - const bool append = false) - { - if (!append) - { - collection.clear(); + bool operator()(const std::string& token, int16_t& value) const { + int value_; + if (CStringUtils::stringToType(token, value_)) { + value = static_cast(value_); + return true; } + return false; + } - if (state.empty()) - { + bool operator()(const std::string& token, uint16_t& value) const { + unsigned int value_; + if (CStringUtils::stringToType(token, value_)) { + value = static_cast(value_); return true; } + return false; + } - collection.reserve(std::count(state.begin(), state.end(), delimiter) + 1); + bool operator()(const std::string& token, CFloatStorage& value) const { return value.fromString(token); } - if (fromString(state, delimiter, stringFunc, - std::back_inserter(collection)) == false) - { - collection.clear(); + template + bool operator()(const std::string& token, std::pair& value) const { + std::size_t delimPos(token.find(m_PairDelimiter)); + if (delimPos == std::string::npos) { return false; } - return true; - } - - //! Restore a boost::array from a string created by toString. - //! - //! \param[in] state The string description of the - //! collection. - //! \param[in] stringFunc The function used to restore - //! elements of the collection. - //! \param[out] collection Filled in with the elements - //! extracted from \p state. - //! \param[in] delimiter The delimiter used to separate - //! elements. - //! \return True if there was no error parsing \p state - //! and it contained exactly N elements and false otherwise. - //! \note The delimiter must match the delimiter used - //! for persistence. - //! \tparam F Expected to have the signature: - //! \code - //! bool (const std::string &, T &) - //! \endcode - template - static bool fromString(const std::string &state, - const F &stringFunc, - boost::array &collection, - const char delimiter = DELIMITER) - { - if (state.empty()) - { - LOG_ERROR("Unexpected number of elements 0" - << ", expected " << N); + m_Token.assign(token, 0, delimPos); + if (!this->operator()(m_Token, value.first)) { return false; } + m_Token.assign(token, delimPos + 1, token.length() - delimPos); + return this->operator()(m_Token, value.second); + } - std::size_t n = std::count(state.begin(), state.end(), delimiter) + 1; - if (n != N) - { - LOG_ERROR("Unexpected number of elements " << n - << ", expected " << N); - return false; - } + private: + char m_PairDelimiter; + mutable std::string m_Token; + }; + + //! Entry method for objects being restored + template + static bool restore(const std::string& tag, T& collection, CStateRestoreTraverser& traverser) { + return persist_utils_detail::restore(tag, collection, traverser); + } + + //! Entry method for objects being persisted + template + static bool persist(const std::string& tag, const T& collection, CStatePersistInserter& inserter) { + return persist_utils_detail::persist(tag, collection, inserter); + } + + //! Wrapper for containers of built in types. + template + static std::string toString(const CONTAINER& collection, const char delimiter = DELIMITER, const char pairDelimiter = PAIR_DELIMITER) { + CBuiltinToString f(pairDelimiter); + return toString(collection, f, delimiter); + } + + //! Convert a collection to a string. + //! + //! \param[in] collection The collection to persist. + //! \param[in] stringFunc The function used to persist + //! elements of the collection. + //! \param[in] delimiter The delimiter used to separate + //! elements. + //! \note This should use RVO so just return the string. + template + static std::string toString(const CONTAINER& collection, const F& stringFunc, const char delimiter = DELIMITER) { + if (collection.empty()) { + return std::string(); + } + auto begin = collection.begin(); + auto end = collection.end(); + return toString(begin, end, stringFunc, delimiter); + } + + //! Wrapper for containers of built in types. + template + static std::string toString(ITR& begin, ITR& end, const char delimiter = DELIMITER, const char pairDelimiter = PAIR_DELIMITER) { + CBuiltinToString f(pairDelimiter); + return toString(begin, end, f, delimiter); + } + + //! Convert the range between 2 iterators to a string. + //! + //! \param[in,out] begin The iterator at the start of the range. + //! This will be equal to end when the function returns + //! \param[in] end The iterator at the end of the range + //! \param[in] stringFunc The function used to persist + //! elements of the collection. + //! \param[in] delimiter The delimiter used to separate + //! elements. + //! \note This should use RVO so just return the string. + template + static std::string toString(ITR& begin, ITR& end, const F& stringFunc, const char delimiter = DELIMITER) { + std::string result = stringFunc(*begin++); + for (/**/; begin != end; ++begin) { + result += delimiter; + result += stringFunc(*begin); + } + return result; + } + + //! Wrapper for arrays of built in types. + template + static bool fromString(const std::string& state, + boost::array& collection, + const char delimiter = DELIMITER, + const char pairDelimiter = PAIR_DELIMITER) { + CBuiltinFromString f(pairDelimiter); + return fromString(state, f, collection, delimiter); + } + + //! Wrapper for containers of built in types. + template + static bool fromString(const std::string& state, + CONTAINER& collection, + const char delimiter = DELIMITER, + const char pairDelimiter = PAIR_DELIMITER, + bool append = false) { + CBuiltinFromString f(pairDelimiter); + return fromString(state, f, collection, delimiter, append); + } + + //! Wrapper for ranges of built in types. + template + static bool + fromString(const std::string& state, ITR begin, ITR end, const char delimiter = DELIMITER, const char pairDelimiter = PAIR_DELIMITER) { + CBuiltinFromString f(pairDelimiter); + return fromString(state, f, begin, end, delimiter); + } + + //! Restore a vector from a string created by toString. + //! + //! \param[in] state The string description of the + //! collection. + //! \param[in] stringFunc The function used to restore + //! elements of the collection. + //! \param[out] collection Filled in with the elements + //! extracted from \p state. + //! \param[in] delimiter The delimiter used to separate + //! elements. + //! \param[in] append If true append the results to the + //! collection otherwise it is cleared first + //! \return True if there was no error parsing \p state + //! and false otherwise. If the state cannot be parsed + //! then an empty collection is returned. + //! \note T must have a default constructor. + //! \note The delimiter must match the delimiter used + //! for persistence. + //! \tparam F Expected to have the signature: + //! \code + //! bool (const std::string &, T &) + //! \endcode + template + static bool fromString(const std::string& state, + const F& stringFunc, + std::vector& collection, + const char delimiter = DELIMITER, + const bool append = false) { + if (!append) { + collection.clear(); + } + + if (state.empty()) { + return true; + } - return fromString(state, delimiter, stringFunc, collection.begin()); + collection.reserve(std::count(state.begin(), state.end(), delimiter) + 1); + + if (fromString(state, delimiter, stringFunc, std::back_inserter(collection)) == false) { + collection.clear(); + return false; + } + return true; + } + + //! Restore a boost::array from a string created by toString. + //! + //! \param[in] state The string description of the + //! collection. + //! \param[in] stringFunc The function used to restore + //! elements of the collection. + //! \param[out] collection Filled in with the elements + //! extracted from \p state. + //! \param[in] delimiter The delimiter used to separate + //! elements. + //! \return True if there was no error parsing \p state + //! and it contained exactly N elements and false otherwise. + //! \note The delimiter must match the delimiter used + //! for persistence. + //! \tparam F Expected to have the signature: + //! \code + //! bool (const std::string &, T &) + //! \endcode + template + static bool + fromString(const std::string& state, const F& stringFunc, boost::array& collection, const char delimiter = DELIMITER) { + if (state.empty()) { + LOG_ERROR("Unexpected number of elements 0" + << ", expected " << N); + return false; + } + + std::size_t n = std::count(state.begin(), state.end(), delimiter) + 1; + if (n != N) { + LOG_ERROR("Unexpected number of elements " << n << ", expected " << N); + return false; + } + + return fromString(state, delimiter, stringFunc, collection.begin()); + } + + //! Restore a container from a string created by toString. + //! + //! \param[in] state The string description of the + //! collection. + //! \param[in] stringFunc The function used to restore + //! elements of the collection. + //! \param[out] collection Filled in with the elements + //! extracted from \p state. + //! \param[in] delimiter The delimiter used to separate + //! elements. + //! \param[in] append If true append the results to the + //! collection otherwise it is cleared first + //! \return True if there was no error parsing \p state + //! and false otherwise. If the state cannot be parsed + //! then an empty collection is returned. + //! \note The container value type must have a default + //! constructor. + //! \note The delimiter must match the delimiter used + //! for persistence. + //! \tparam F Expected to have the signature: + //! \code{.cpp} + //! bool (const std::string &, CONTAINER::value_type &) + //! \endcode + template + static bool fromString(const std::string& state, + const F& stringFunc, + CONTAINER& collection, + const char delimiter = DELIMITER, + bool append = false) { + using T = typename persist_utils_detail::remove_const::type; + + if (!append) { + collection.clear(); + } + + if (state.empty()) { + return true; } - //! Restore a container from a string created by toString. - //! - //! \param[in] state The string description of the - //! collection. - //! \param[in] stringFunc The function used to restore - //! elements of the collection. - //! \param[out] collection Filled in with the elements - //! extracted from \p state. - //! \param[in] delimiter The delimiter used to separate - //! elements. - //! \param[in] append If true append the results to the - //! collection otherwise it is cleared first - //! \return True if there was no error parsing \p state - //! and false otherwise. If the state cannot be parsed - //! then an empty collection is returned. - //! \note The container value type must have a default - //! constructor. - //! \note The delimiter must match the delimiter used - //! for persistence. - //! \tparam F Expected to have the signature: - //! \code{.cpp} - //! bool (const std::string &, CONTAINER::value_type &) - //! \endcode - template - static bool fromString(const std::string &state, - const F &stringFunc, - CONTAINER &collection, - const char delimiter = DELIMITER, - bool append = false) - { - using T = typename persist_utils_detail::remove_const::type; + if (fromString(state, delimiter, stringFunc, std::inserter(collection, collection.end())) == false) { + collection.clear(); + return false; + } + return true; + } + + //! Restore a range from a string created by toString. + //! + //! \param[in] state The string description of the range. + //! \param[in] stringFunc The function used to restore + //! elements of the range. + //! \param[out] begin Filled in with the elements + //! extracted from \p state. + //! \param[in] end The end of the range into which to + //! restore the elements. + //! \param[in] delimiter The delimiter used to separate + //! elements. + //! \return True if there was no error parsing \p state + //! and false otherwise. + //! \note The container value type must have a default + //! constructor. + //! \note The delimiter must match the delimiter used + //! for persistence. + //! \tparam F Expected to have the signature: + //! \code{.cpp} + //! bool (const std::string &, CONTAINER::value_type &) + //! \endcode + template + static bool fromString(const std::string& state, const F& stringFunc, ITR begin, ITR end, const char delimiter = DELIMITER) { + + if (state.empty()) { + return true; + } - if (!append) - { - collection.clear(); - } + std::size_t n = std::count(state.begin(), state.end(), delimiter) + 1; + std::size_t N = std::distance(begin, end); + if (n != N) { + LOG_ERROR("Unexpected number of elements " << n << ", expected " << N); + return false; + } - if (state.empty()) - { - return true; - } + return fromString::value_type>(state, delimiter, stringFunc, begin); + } - if (fromString(state, delimiter, stringFunc, - std::inserter(collection, collection.end())) == false) - { - collection.clear(); +private: + //! Restores to an insertion iterator. + template + static bool fromString(const std::string& state, const char delimiter, const F& stringFunc, ITR inserter) { + std::size_t delimPos = state.find(delimiter); + if (delimPos == std::string::npos) { + T element; + if (stringFunc(state, element) == false) { + LOG_ERROR("Invalid state " << state); return false; } + *inserter = element; + ++inserter; return true; } - //! Restore a range from a string created by toString. - //! - //! \param[in] state The string description of the range. - //! \param[in] stringFunc The function used to restore - //! elements of the range. - //! \param[out] begin Filled in with the elements - //! extracted from \p state. - //! \param[in] end The end of the range into which to - //! restore the elements. - //! \param[in] delimiter The delimiter used to separate - //! elements. - //! \return True if there was no error parsing \p state - //! and false otherwise. - //! \note The container value type must have a default - //! constructor. - //! \note The delimiter must match the delimiter used - //! for persistence. - //! \tparam F Expected to have the signature: - //! \code{.cpp} - //! bool (const std::string &, CONTAINER::value_type &) - //! \endcode - template - static bool fromString(const std::string &state, - const F &stringFunc, - ITR begin, ITR end, - const char delimiter = DELIMITER) + // Reuse this same string to avoid as many allocations + // as possible. + // + // The reservation is 15 because for string implementations + // using the short string optimisation we don't want to + // cause an unnecessary allocation. + std::string token; + token.reserve(15); + token.assign(state, 0, delimPos); { - - if (state.empty()) - { - return true; - } - - std::size_t n = std::count(state.begin(), state.end(), delimiter) + 1; - std::size_t N = std::distance(begin, end); - if (n != N) - { - LOG_ERROR("Unexpected number of elements " << n - << ", expected " << N); + T element; + if (stringFunc(token, element) == false) { + LOG_ERROR("Invalid element 0 : element " << token << " in " << state); return false; } - - return fromString::value_type>( - state, delimiter, stringFunc, begin); + *inserter = element; + ++inserter; } - private: - //! Restores to an insertion iterator. - template - static bool fromString(const std::string &state, - const char delimiter, - const F &stringFunc, - ITR inserter) - { - std::size_t delimPos = state.find(delimiter); - if (delimPos == std::string::npos) - { - T element; - if (stringFunc(state, element) == false) - { - LOG_ERROR("Invalid state " << state); - return false; - } - *inserter = element; - ++inserter; - return true; - } - - // Reuse this same string to avoid as many allocations - // as possible. - // - // The reservation is 15 because for string implementations - // using the short string optimisation we don't want to - // cause an unnecessary allocation. - std::string token; - token.reserve(15); - token.assign(state, 0, delimPos); - { - T element; - if (stringFunc(token, element) == false) - { - LOG_ERROR("Invalid element 0 : element " << token - << " in " << state); - return false; - } - *inserter = element; - ++inserter; + std::size_t i = 1u; + std::size_t lastDelimPos(delimPos); + while (lastDelimPos != std::string::npos) { + delimPos = state.find(delimiter, lastDelimPos + 1); + if (delimPos == std::string::npos) { + token.assign(state, lastDelimPos + 1, state.length() - lastDelimPos); + } else { + token.assign(state, lastDelimPos + 1, delimPos - lastDelimPos - 1); } - std::size_t i = 1u; - std::size_t lastDelimPos(delimPos); - while (lastDelimPos != std::string::npos) - { - delimPos = state.find(delimiter, lastDelimPos + 1); - if (delimPos == std::string::npos) - { - token.assign(state, lastDelimPos + 1, state.length() - lastDelimPos); - } - else - { - token.assign(state, lastDelimPos + 1, delimPos - lastDelimPos - 1); - } - - T element; - if (stringFunc(token, element) == false) - { - LOG_ERROR("Invalid element " << i - << " : element " << token - << " in " << state); - return false; - } - *inserter = element; - - ++i; - lastDelimPos = delimPos; - ++inserter; + T element; + if (stringFunc(token, element) == false) { + LOG_ERROR("Invalid element " << i << " : element " << token << " in " << state); + return false; } + *inserter = element; - return true; + ++i; + lastDelimPos = delimPos; + ++inserter; } -}; + return true; + } +}; -namespace persist_utils_detail -{ +namespace persist_utils_detail { //! Basic persist functionality implementation, for PODs or pairs template<> -class CPersisterImpl -{ - public: - template - static void dispatch(const std::string &tag, - const T &t, - CStatePersistInserter &inserter) - { - CPersistUtils::CBuiltinToString toString(CPersistUtils::PAIR_DELIMITER); - inserter.insertValue(tag, toString(t)); - } - - template - static void dispatch(const std::string &tag, - const std::pair &t, - CStatePersistInserter &inserter) - { - inserter.insertLevel(tag, boost::bind(&newLevel, boost::cref(t), _1)); - } - - private: - template - static void newLevel(const std::pair &t, - CStatePersistInserter &inserter) - { - persist(FIRST_TAG, t.first, inserter); - persist(SECOND_TAG, t.second, inserter); - } +class CPersisterImpl { +public: + template + static void dispatch(const std::string& tag, const T& t, CStatePersistInserter& inserter) { + CPersistUtils::CBuiltinToString toString(CPersistUtils::PAIR_DELIMITER); + inserter.insertValue(tag, toString(t)); + } + + template + static void dispatch(const std::string& tag, const std::pair& t, CStatePersistInserter& inserter) { + inserter.insertLevel(tag, boost::bind(&newLevel, boost::cref(t), _1)); + } + +private: + template + static void newLevel(const std::pair& t, CStatePersistInserter& inserter) { + persist(FIRST_TAG, t.first, inserter); + persist(SECOND_TAG, t.second, inserter); + } }; //! Persister class for containers. If contained types are PODs //! they are written as a delimited string, or strings written //! as straight strings, else added as a new level and re-dispatched template<> -class CPersisterImpl -{ - public: - template - static void dispatch(const std::string &tag, - const T &container, - CStatePersistInserter &inserter) - { - doInsert(tag, - container, - inserter, - boost::integral_constant::value>(), - boost::false_type()); - } - - //! Specialisation for boost::unordered_set which orders values. - template - static void dispatch(const std::string &tag, - const boost::unordered_set &container, - CStatePersistInserter &inserter) - { - using TVec = typename std::vector; - using TCItr = typename boost::unordered_set::const_iterator; - using TCItrVec = typename std::vector; - - if (boost::is_arithmetic::value) - { - TVec values(container.begin(), container.end()); - std::sort(values.begin(), values.end()); - doInsert(tag, values, inserter, boost::true_type(), boost::false_type()); - } - else - { - TCItrVec iterators; - iterators.reserve(container.size()); - for (TCItr i = container.begin(); i != container.end(); ++i) - { - iterators.push_back(i); - } - - // Sort the values to ensure consistent persist state. - std::sort(iterators.begin(), iterators.end(), - [](TCItr lhs, TCItr rhs){ return *lhs < *rhs; }); - doInsert(tag, iterators, inserter, boost::false_type(), boost::true_type()); - } - } - - //! Specialisation for boost::unordered_map which orders values. - template - static void dispatch(const std::string &tag, - const boost::unordered_map &container, - CStatePersistInserter &inserter) - { - using TCItr = typename boost::unordered_map::const_iterator; - using TCItrVec = typename std::vector; - +class CPersisterImpl { +public: + template + static void dispatch(const std::string& tag, const T& container, CStatePersistInserter& inserter) { + doInsert(tag, + container, + inserter, + boost::integral_constant::value>(), + boost::false_type()); + } + + //! Specialisation for boost::unordered_set which orders values. + template + static void dispatch(const std::string& tag, const boost::unordered_set& container, CStatePersistInserter& inserter) { + using TVec = typename std::vector; + using TCItr = typename boost::unordered_set::const_iterator; + using TCItrVec = typename std::vector; + + if (boost::is_arithmetic::value) { + TVec values(container.begin(), container.end()); + std::sort(values.begin(), values.end()); + doInsert(tag, values, inserter, boost::true_type(), boost::false_type()); + } else { TCItrVec iterators; iterators.reserve(container.size()); - for (TCItr i = container.begin(); i != container.end(); ++i) - { + for (TCItr i = container.begin(); i != container.end(); ++i) { iterators.push_back(i); } - // Sort the keys to ensure consistent persist state. - std::sort(iterators.begin(), iterators.end(), - [](TCItr lhs, TCItr rhs) { return lhs->first < rhs->first; }); + // Sort the values to ensure consistent persist state. + std::sort(iterators.begin(), iterators.end(), [](TCItr lhs, TCItr rhs) { return *lhs < *rhs; }); doInsert(tag, iterators, inserter, boost::false_type(), boost::true_type()); } - - //! Specialisation for std::string, which has iterators but doesn't need - //! to be split up into individual characters - static void dispatch(const std::string &tag, - const std::string &str, - CStatePersistInserter &inserter) - { - inserter.insertValue(tag, str); - } - - private: - //! Handle the case of a built-in type. - //! - //! \note Type T is not an iterator - template - static void doInsert(const std::string &tag, - const T &container, - CStatePersistInserter &inserter, - boost::true_type, - boost::false_type) - { - inserter.insertValue(tag, CPersistUtils::toString(container)); - } - - //! Handle the case for a non-built-in type, which will be added - //! as a new level. - //! - //! \note Type T is not an iterator - template - static void doInsert(const std::string &tag, - const T &container, - CStatePersistInserter &inserter, - boost::false_type, - boost::false_type) - { - using TCItr = typename T::const_iterator; - inserter.insertLevel(tag, boost::bind(&newLevel, - container.begin(), container.end(), container.size(), _1)); - } - - //! Handle the case for a non-built-in type, which will be added - //! as a new level. - //! - //! \note Type T is an iterator - template - static void doInsert(const std::string &tag, - const T &t, - CStatePersistInserter &inserter, - boost::false_type, - boost::true_type) - { - using TCItr = boost::indirect_iterator; - inserter.insertLevel(tag, boost::bind(&newLevel, - TCItr(t.begin()), TCItr(t.end()), t.size(), _1)); - } - - //! Dispatch a collection of items - //! - //! \note The container size is added to allow the restorer to - //! pre-size the new container if appropriate - template - static void newLevel(ITR begin, - ITR end, - std::size_t size, - CStatePersistInserter &inserter) - { - inserter.insertValue(SIZE_TAG, size); - for (; begin != end; ++begin) - { - persist(FIRST_TAG, *begin, inserter); - } - } + } + + //! Specialisation for boost::unordered_map which orders values. + template + static void dispatch(const std::string& tag, const boost::unordered_map& container, CStatePersistInserter& inserter) { + using TCItr = typename boost::unordered_map::const_iterator; + using TCItrVec = typename std::vector; + + TCItrVec iterators; + iterators.reserve(container.size()); + for (TCItr i = container.begin(); i != container.end(); ++i) { + iterators.push_back(i); + } + + // Sort the keys to ensure consistent persist state. + std::sort(iterators.begin(), iterators.end(), [](TCItr lhs, TCItr rhs) { return lhs->first < rhs->first; }); + doInsert(tag, iterators, inserter, boost::false_type(), boost::true_type()); + } + + //! Specialisation for std::string, which has iterators but doesn't need + //! to be split up into individual characters + static void dispatch(const std::string& tag, const std::string& str, CStatePersistInserter& inserter) { + inserter.insertValue(tag, str); + } + +private: + //! Handle the case of a built-in type. + //! + //! \note Type T is not an iterator + template + static void doInsert(const std::string& tag, const T& container, CStatePersistInserter& inserter, boost::true_type, boost::false_type) { + inserter.insertValue(tag, CPersistUtils::toString(container)); + } + + //! Handle the case for a non-built-in type, which will be added + //! as a new level. + //! + //! \note Type T is not an iterator + template + static void + doInsert(const std::string& tag, const T& container, CStatePersistInserter& inserter, boost::false_type, boost::false_type) { + using TCItr = typename T::const_iterator; + inserter.insertLevel(tag, boost::bind(&newLevel, container.begin(), container.end(), container.size(), _1)); + } + + //! Handle the case for a non-built-in type, which will be added + //! as a new level. + //! + //! \note Type T is an iterator + template + static void doInsert(const std::string& tag, const T& t, CStatePersistInserter& inserter, boost::false_type, boost::true_type) { + using TCItr = boost::indirect_iterator; + inserter.insertLevel(tag, boost::bind(&newLevel, TCItr(t.begin()), TCItr(t.end()), t.size(), _1)); + } + + //! Dispatch a collection of items + //! + //! \note The container size is added to allow the restorer to + //! pre-size the new container if appropriate + template + static void newLevel(ITR begin, ITR end, std::size_t size, CStatePersistInserter& inserter) { + inserter.insertValue(SIZE_TAG, size); + for (; begin != end; ++begin) { + persist(FIRST_TAG, *begin, inserter); + } + } }; //! \brief Persister for objects which have an acceptPersistInserter method. template<> -class CPersisterImpl -{ - public: - template - static void dispatch(const std::string &tag, - const T &t, - CStatePersistInserter &inserter) - { - inserter.insertLevel(tag, boost::bind(&newLevel, boost::cref(t), _1)); - } - - private: - template - static void newLevel(const T &t, CStatePersistInserter &inserter) - { - t.acceptPersistInserter(inserter); - } +class CPersisterImpl { +public: + template + static void dispatch(const std::string& tag, const T& t, CStatePersistInserter& inserter) { + inserter.insertLevel(tag, boost::bind(&newLevel, boost::cref(t), _1)); + } + +private: + template + static void newLevel(const T& t, CStatePersistInserter& inserter) { + t.acceptPersistInserter(inserter); + } }; //! \brief Persister for objects which have a toDelimited method. template<> -class CPersisterImpl -{ - public: - template - static void dispatch(const std::string &tag, - const T &t, - CStatePersistInserter &inserter) - { - inserter.insertValue(tag, t.toDelimited()); - } +class CPersisterImpl { +public: + template + static void dispatch(const std::string& tag, const T& t, CStatePersistInserter& inserter) { + inserter.insertValue(tag, t.toDelimited()); + } }; - //! \brief Restorer class for PODs and pairs. template<> -class CRestorerImpl -{ - public: - template - static bool dispatch(const std::string &tag, - T &t, - CStateRestoreTraverser &traverser) - { - bool ret = true; - if (traverser.name() == tag) - { - CPersistUtils::CBuiltinFromString stringFunc(CPersistUtils::PAIR_DELIMITER); - ret = stringFunc(traverser.value(), t); +class CRestorerImpl { +public: + template + static bool dispatch(const std::string& tag, T& t, CStateRestoreTraverser& traverser) { + bool ret = true; + if (traverser.name() == tag) { + CPersistUtils::CBuiltinFromString stringFunc(CPersistUtils::PAIR_DELIMITER); + ret = stringFunc(traverser.value(), t); + } + return ret; + } + + template + static bool dispatch(const std::string& tag, std::pair& t, CStateRestoreTraverser& traverser) { + bool ret = true; + if (traverser.name() == tag) { + if (!traverser.hasSubLevel()) { + LOG_ERROR("SubLevel mismatch in restore, at " << traverser.name()); + return false; } - return ret; + ret = traverser.traverseSubLevel(boost::bind(&newLevel, boost::ref(t), _1)); } + return ret; + } - template - static bool dispatch(const std::string &tag, - std::pair &t, - CStateRestoreTraverser &traverser) - { - bool ret = true; - if (traverser.name() == tag) - { - if (!traverser.hasSubLevel()) - { - LOG_ERROR("SubLevel mismatch in restore, at " << traverser.name()); - return false; - } - ret = traverser.traverseSubLevel(boost::bind(&newLevel, boost::ref(t), _1)); - } - return ret; +private: + template + static bool newLevel(std::pair& t, CStateRestoreTraverser& traverser) { + if (traverser.name() != FIRST_TAG) { + LOG_ERROR("Tag mismatch at " << traverser.name() << ", expected " << FIRST_TAG); + return false; } - - private: - template - static bool newLevel(std::pair &t, - CStateRestoreTraverser &traverser) - { - if (traverser.name() != FIRST_TAG) - { - LOG_ERROR("Tag mismatch at " << traverser.name() << ", expected " << FIRST_TAG); - return false; - } - if (!restore(FIRST_TAG, t.first, traverser)) - { - LOG_ERROR("Restore error at " << traverser.name() << ": " << traverser.value()); - return false; - } - if (!traverser.next()) - { - LOG_ERROR("Restore error at " << traverser.name() << ": " << traverser.value()); - return false; - } - if (traverser.name() != SECOND_TAG) - { - LOG_ERROR("Tag mismatch at " << traverser.name() << ", expected " << SECOND_TAG); - return false; - } - if (!restore(SECOND_TAG, t.second, traverser)) - { - LOG_ERROR("Restore error at " << traverser.name() << ": " << traverser.value()); - return false; - } - return true; + if (!restore(FIRST_TAG, t.first, traverser)) { + LOG_ERROR("Restore error at " << traverser.name() << ": " << traverser.value()); + return false; + } + if (!traverser.next()) { + LOG_ERROR("Restore error at " << traverser.name() << ": " << traverser.value()); + return false; + } + if (traverser.name() != SECOND_TAG) { + LOG_ERROR("Tag mismatch at " << traverser.name() << ", expected " << SECOND_TAG); + return false; + } + if (!restore(SECOND_TAG, t.second, traverser)) { + LOG_ERROR("Restore error at " << traverser.name() << ": " << traverser.value()); + return false; } + return true; + } }; //! \brief Restorer class for collections of items. template<> -class CRestorerImpl -{ - public: +class CRestorerImpl { +public: + template + static bool dispatch(const std::string& tag, T& container, CStateRestoreTraverser& traverser) { + return doTraverse(tag, container, traverser, boost::integral_constant::value>()); + } + + //! Specialisation for std::string, which has iterators but doesn't + //! need to be split up into individual characters + static bool dispatch(const std::string& tag, std::string& str, CStateRestoreTraverser& traverser) { + if (traverser.name() == tag) { + str = traverser.value(); + } + return true; + } + +private: + struct SSubLevel { template - static bool dispatch(const std::string &tag, - T &container, - CStateRestoreTraverser &traverser) - { - return doTraverse(tag, - container, - traverser, - boost::integral_constant::value>()); - } - - //! Specialisation for std::string, which has iterators but doesn't - //! need to be split up into individual characters - static bool dispatch(const std::string &tag, - std::string &str, - CStateRestoreTraverser &traverser) - { - if (traverser.name() == tag) - { - str = traverser.value(); - } - return true; - } - - private: - struct SSubLevel - { - template - bool operator()(T &container, CStateRestoreTraverser &traverser) - { - using TValueType = typename remove_const::type; - do - { - if (traverser.name() == SIZE_TAG) - { - std::size_t size = 0; - if (!core::CStringUtils::stringToType(traverser.value(), size)) - { - LOG_WARN("Failed to determine size: " << traverser.value()); - } - else - { - reserve(container, size); - } + bool operator()(T& container, CStateRestoreTraverser& traverser) { + using TValueType = typename remove_const::type; + do { + if (traverser.name() == SIZE_TAG) { + std::size_t size = 0; + if (!core::CStringUtils::stringToType(traverser.value(), size)) { + LOG_WARN("Failed to determine size: " << traverser.value()); + } else { + reserve(container, size); } - else - { - TValueType value; - if (!restore(FIRST_TAG, value, traverser)) - { - LOG_ERROR("Restoration error at " << traverser.name()); - return false; - } - container.insert(container.end(), value); + } else { + TValueType value; + if (!restore(FIRST_TAG, value, traverser)) { + LOG_ERROR("Restoration error at " << traverser.name()); + return false; } + container.insert(container.end(), value); } - while (traverser.next()); - return true; - } + } while (traverser.next()); + return true; + } - template - bool operator()(boost::array &container, CStateRestoreTraverser &traverser) - { - using TValueType = typename remove_const::type; - typename boost::array::iterator i = container.begin(); - do - { - TValueType value; - if (traverser.name() == FIRST_TAG) - { - if (!restore(FIRST_TAG, value, traverser)) - { - LOG_ERROR("Restoration error at " << traverser.name()); - return false; - } - *(i++) = value; + template + bool operator()(boost::array& container, CStateRestoreTraverser& traverser) { + using TValueType = typename remove_const::type; + typename boost::array::iterator i = container.begin(); + do { + TValueType value; + if (traverser.name() == FIRST_TAG) { + if (!restore(FIRST_TAG, value, traverser)) { + LOG_ERROR("Restoration error at " << traverser.name()); + return false; } + *(i++) = value; } - while (traverser.next()); - return true; - } - }; + } while (traverser.next()); + return true; + } + }; - private: - template - static bool doTraverse(const std::string &tag, - T &container, - CStateRestoreTraverser &traverser, - boost::true_type) - { - bool ret = true; - if (traverser.name() == tag) - { - ret = CPersistUtils::fromString(traverser.value(), container); - } - return ret; +private: + template + static bool doTraverse(const std::string& tag, T& container, CStateRestoreTraverser& traverser, boost::true_type) { + bool ret = true; + if (traverser.name() == tag) { + ret = CPersistUtils::fromString(traverser.value(), container); } + return ret; + } - template - static bool doTraverse(const std::string &tag, - T &container, - CStateRestoreTraverser &traverser, - boost::false_type) - { - bool ret = true; - if (traverser.name() == tag) - { - if (!traverser.hasSubLevel()) - { - LOG_ERROR("SubLevel mismatch in restore, at " << traverser.name()); - return false; - } - ret = traverser.traverseSubLevel(boost::bind(SSubLevel(), boost::ref(container), _1)); + template + static bool doTraverse(const std::string& tag, T& container, CStateRestoreTraverser& traverser, boost::false_type) { + bool ret = true; + if (traverser.name() == tag) { + if (!traverser.hasSubLevel()) { + LOG_ERROR("SubLevel mismatch in restore, at " << traverser.name()); + return false; } - return ret; + ret = traverser.traverseSubLevel(boost::bind(SSubLevel(), boost::ref(container), _1)); } + return ret; + } }; //! \brief Restorer for objects which has an acceptRestoreTraverser method. template<> -class CRestorerImpl -{ - public: - template - static bool dispatch(const std::string &tag, - T &t, - CStateRestoreTraverser &traverser) - { - bool ret = true; - if (traverser.name() == tag) - { - if (!traverser.hasSubLevel()) - { - LOG_ERROR("SubLevel mismatch in restore, at " << traverser.name()); - return false; - } - ret = traverser.traverseSubLevel(boost::bind(&subLevel, boost::ref(t), _1)); +class CRestorerImpl { +public: + template + static bool dispatch(const std::string& tag, T& t, CStateRestoreTraverser& traverser) { + bool ret = true; + if (traverser.name() == tag) { + if (!traverser.hasSubLevel()) { + LOG_ERROR("SubLevel mismatch in restore, at " << traverser.name()); + return false; } - return ret; + ret = traverser.traverseSubLevel(boost::bind(&subLevel, boost::ref(t), _1)); } + return ret; + } - private: - template - static bool subLevel(T &t, CStateRestoreTraverser &traverser) - { - return t.acceptRestoreTraverser(traverser); - } +private: + template + static bool subLevel(T& t, CStateRestoreTraverser& traverser) { + return t.acceptRestoreTraverser(traverser); + } }; - //! \brief Restorer for objects which have a fromDelimited method. template<> -class CRestorerImpl -{ - public: - template - static bool dispatch(const std::string &/*tag*/, - T &t, - CStateRestoreTraverser &traverser) - { - return t.fromDelimited(traverser.value()); - } +class CRestorerImpl { +public: + template + static bool dispatch(const std::string& /*tag*/, T& t, CStateRestoreTraverser& traverser) { + return t.fromDelimited(traverser.value()); + } }; } // persist_utils_detail:: - } } diff --git a/include/core/CPolymorphicStackObjectCPtr.h b/include/core/CPolymorphicStackObjectCPtr.h index 70c6daf3c1..8cd46e4a77 100644 --- a/include/core/CPolymorphicStackObjectCPtr.h +++ b/include/core/CPolymorphicStackObjectCPtr.h @@ -12,10 +12,8 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CORE_EXPORT CNullPolymorphicStackObjectCPtr {}; //! \brief A stack based pointer to polymorphic object emulator. @@ -28,94 +26,80 @@ class CORE_EXPORT CNullPolymorphicStackObjectCPtr {}; //! it behaves exactly like a pointer to the base class in terms of usage. //! This is to support runtime polymorphism without using the heap. template -class CPolymorphicStackObjectCPtr -{ - private: - using TConstBase = const typename boost::remove_const::type; - using TConstD1 = const typename boost::remove_const::type; - using TConstD2 = const typename boost::remove_const::type; - using TConstD3 = const typename boost::remove_const::type; - using TConstD4 = const typename boost::remove_const::type; - - public: - CPolymorphicStackObjectCPtr() : m_Storage(CNullPolymorphicStackObjectCPtr()) {} - - template - explicit CPolymorphicStackObjectCPtr(const T &d) : m_Storage(d) {} - - template - CPolymorphicStackObjectCPtr(const CPolymorphicStackObjectCPtr &other) - { -#define MAYBE_SET(TYPE) { \ - TYPE *d = other.template get(); \ - if (d) \ - { \ - m_Storage = *d; \ - return; \ - } \ - } - MAYBE_SET(TConstD1); - MAYBE_SET(TConstD2); - MAYBE_SET(TConstD3); - MAYBE_SET(TConstD4); +class CPolymorphicStackObjectCPtr { +private: + using TConstBase = const typename boost::remove_const::type; + using TConstD1 = const typename boost::remove_const::type; + using TConstD2 = const typename boost::remove_const::type; + using TConstD3 = const typename boost::remove_const::type; + using TConstD4 = const typename boost::remove_const::type; + +public: + CPolymorphicStackObjectCPtr() : m_Storage(CNullPolymorphicStackObjectCPtr()) {} + + template + explicit CPolymorphicStackObjectCPtr(const T& d) : m_Storage(d) {} + + template + CPolymorphicStackObjectCPtr(const CPolymorphicStackObjectCPtr& other) { +#define MAYBE_SET(TYPE) \ + { \ + TYPE* d = other.template get(); \ + if (d) { \ + m_Storage = *d; \ + return; \ + } \ + } + MAYBE_SET(TConstD1); + MAYBE_SET(TConstD2); + MAYBE_SET(TConstD3); + MAYBE_SET(TConstD4); #undef MAYBE_SET - m_Storage = CNullPolymorphicStackObjectCPtr(); - } - - template - const CPolymorphicStackObjectCPtr &operator=(const CPolymorphicStackObjectCPtr &other) - { - CPolymorphicStackObjectCPtr tmp(other); - this->swap(tmp); - return *this; - } - - operator bool () const - { - return boost::relaxed_get(&m_Storage) == 0; - } - - TConstBase *operator->() const - { -#define MAYBE_RETURN(TYPE) { \ - TYPE *result = boost::relaxed_get(&m_Storage); \ - if (result) \ - { \ - return static_cast(result); \ - } \ - } - MAYBE_RETURN(TConstD1); - MAYBE_RETURN(TConstD2); - MAYBE_RETURN(TConstD3); - MAYBE_RETURN(TConstD4); + m_Storage = CNullPolymorphicStackObjectCPtr(); + } + + template + const CPolymorphicStackObjectCPtr& operator=(const CPolymorphicStackObjectCPtr& other) { + CPolymorphicStackObjectCPtr tmp(other); + this->swap(tmp); + return *this; + } + + operator bool() const { return boost::relaxed_get(&m_Storage) == 0; } + + TConstBase* operator->() const { +#define MAYBE_RETURN(TYPE) \ + { \ + TYPE* result = boost::relaxed_get(&m_Storage); \ + if (result) { \ + return static_cast(result); \ + } \ + } + MAYBE_RETURN(TConstD1); + MAYBE_RETURN(TConstD2); + MAYBE_RETURN(TConstD3); + MAYBE_RETURN(TConstD4); #undef MAYBE_RETURN - return 0; - } - - TConstBase &operator*() const - { - return *(this->operator->()); - } - - template const T *get() const - { - return boost::relaxed_get(&m_Storage); - } - - private: - void swap(CPolymorphicStackObjectCPtr &other) - { - m_Storage.swap(other.m_Storage); - } - - private: - using TStorage = boost::variant; - - private: - //! The static storage of the actual type. - TStorage m_Storage; -}; + return 0; + } + + TConstBase& operator*() const { return *(this->operator->()); } + + template + const T* get() const { + return boost::relaxed_get(&m_Storage); + } +private: + void swap(CPolymorphicStackObjectCPtr& other) { m_Storage.swap(other.m_Storage); } + +private: + using TStorage = boost::variant; + +private: + //! The static storage of the actual type. + TStorage m_Storage; +}; } } diff --git a/include/core/CProcess.h b/include/core/CProcess.h index 78946b755b..9d7a2ea3cb 100644 --- a/include/core/CProcess.h +++ b/include/core/CProcess.h @@ -19,12 +19,8 @@ #include #endif - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! Encapsulates actions related to process lifecycle @@ -41,120 +37,114 @@ namespace core //! implementation implements the calls that are required by //! Windows for applications that want to run as Windows services. //! -class CORE_EXPORT CProcess : private CNonCopyable -{ - public: - //! These messages need to be 100% standard across all services - static const char *STARTING_MSG; - static const char *STARTED_MSG; - static const char *STOPPING_MSG; - static const char *STOPPED_MSG; - - public: - //! Prototype of the mlMain() function - using TMlMainFunc = int (*)(int, char *[]); - - //! Vector of process arguments - using TStrVec = std::vector; - using TStrVecCItr = TStrVec::const_iterator; - - //! The shutdown function - using TShutdownFunc = std::function; - - //! Process ID type +class CORE_EXPORT CProcess : private CNonCopyable { +public: + //! These messages need to be 100% standard across all services + static const char* STARTING_MSG; + static const char* STARTED_MSG; + static const char* STOPPING_MSG; + static const char* STOPPED_MSG; + +public: + //! Prototype of the mlMain() function + using TMlMainFunc = int (*)(int, char* []); + + //! Vector of process arguments + using TStrVec = std::vector; + using TStrVecCItr = TStrVec::const_iterator; + + //! The shutdown function + using TShutdownFunc = std::function; + +//! Process ID type #ifdef Windows - using TPid = DWORD; + using TPid = DWORD; #else - using TPid = pid_t; + using TPid = pid_t; #endif - public: - //! Access to singleton - static CProcess &instance(); +public: + //! Access to singleton + static CProcess& instance(); - //! Is this process running as a Windows service? - bool isService() const; + //! Is this process running as a Windows service? + bool isService() const; - //! Get the process ID - TPid id() const; + //! Get the process ID + TPid id() const; - //! Get the parent process ID - TPid parentId() const; + //! Get the parent process ID + TPid parentId() const; - //! If this process is not running as a Windows service, this call will - //! immediately pass control to the mlMain() function. If this - //! process is running as a Windows service, the thread that calls this - //! method will become the service dispatcher thread. - bool startDispatcher(TMlMainFunc mlMain, - int argc, - char *argv[]); + //! If this process is not running as a Windows service, this call will + //! immediately pass control to the mlMain() function. If this + //! process is running as a Windows service, the thread that calls this + //! method will become the service dispatcher thread. + bool startDispatcher(TMlMainFunc mlMain, int argc, char* argv[]); - //! Check if the application is initialised - bool isInitialised() const; + //! Check if the application is initialised + bool isInitialised() const; - //! Record successful completion of the application's initialisation - //! phase. This must be passed a shutdown function that can be used - //! to stop the application gracefully if requested. - void initialisationComplete(const TShutdownFunc &shutdownFunc); + //! Record successful completion of the application's initialisation + //! phase. This must be passed a shutdown function that can be used + //! to stop the application gracefully if requested. + void initialisationComplete(const TShutdownFunc& shutdownFunc); - //! Record successful completion of the application's initialisation - //! phase. No shutdown function is passed, so the application will - //! not be able to stop gracefully. - void initialisationComplete(); + //! Record successful completion of the application's initialisation + //! phase. No shutdown function is passed, so the application will + //! not be able to stop gracefully. + void initialisationComplete(); - //! Check if the application is running - bool isRunning() const; + //! Check if the application is running + bool isRunning() const; - //! Instruct the application to shutdown gracefully. This will only - //! succeed if initialisation has been reported to be complete. (Even - //! if this method returns success, the application will only shut - //! down as gracefully if the shutdown function works as it should.) - bool shutdown(); + //! Instruct the application to shutdown gracefully. This will only + //! succeed if initialisation has been reported to be complete. (Even + //! if this method returns success, the application will only shut + //! down as gracefully if the shutdown function works as it should.) + bool shutdown(); #ifdef Windows - //! Windows service main function - static void WINAPI serviceMain(DWORD argc, char *argv[]); + //! Windows service main function + static void WINAPI serviceMain(DWORD argc, char* argv[]); - //! Windows service control function - static void WINAPI serviceCtrlHandler(DWORD ctrlType); + //! Windows service control function + static void WINAPI serviceCtrlHandler(DWORD ctrlType); #endif - private: - // Constructor for a singleton is private. - CProcess(); +private: + // Constructor for a singleton is private. + CProcess(); - private: - //! Is this process running as a Windows service? - bool m_IsService; +private: + //! Is this process running as a Windows service? + bool m_IsService; - //! Is this process initialised? - bool m_Initialised; + //! Is this process initialised? + bool m_Initialised; - //! Is this process running? - bool m_Running; + //! Is this process running? + bool m_Running; - //! Address of the mlMain() function to call - TMlMainFunc m_MlMainFunc; + //! Address of the mlMain() function to call + TMlMainFunc m_MlMainFunc; - //! Original arguments passed to the program's main() function - TStrVec m_Args; + //! Original arguments passed to the program's main() function + TStrVec m_Args; #ifdef Windows - //! Service handle (will be 0 if we're not running as a service) - SERVICE_STATUS_HANDLE m_ServiceHandle; + //! Service handle (will be 0 if we're not running as a service) + SERVICE_STATUS_HANDLE m_ServiceHandle; #endif - //! Lock to protect the shutdown function object - CFastMutex m_ShutdownFuncMutex; + //! Lock to protect the shutdown function object + CFastMutex m_ShutdownFuncMutex; - //! Function to call if the process is instructed to shut down. - //! Will be empty until initialisation is complete. - TShutdownFunc m_ShutdownFunc; + //! Function to call if the process is instructed to shut down. + //! Will be empty until initialisation is complete. + TShutdownFunc m_ShutdownFunc; }; - - } } #endif // INCLUDED_ml_core_CProcess_h - diff --git a/include/core/CProcessPriority.h b/include/core/CProcessPriority.h index b69ea8e01c..9a1a9f29d0 100644 --- a/include/core/CProcessPriority.h +++ b/include/core/CProcessPriority.h @@ -9,12 +9,8 @@ #include #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! Functions related to adjusting process priority @@ -38,17 +34,13 @@ namespace core //! it is more likely to be killed than other processes when the Linux kernel //! decides that there isn't enough free memory. //! -class CORE_EXPORT CProcessPriority : private CNonInstantiatable -{ - public: - //! Reduce whatever priority measures are deemed appropriate for the - //! current OS. - static void reducePriority(); +class CORE_EXPORT CProcessPriority : private CNonInstantiatable { +public: + //! Reduce whatever priority measures are deemed appropriate for the + //! current OS. + static void reducePriority(); }; - - } } #endif // INCLUDED_ml_core_CProcessPriority_h - diff --git a/include/core/CProgName.h b/include/core/CProgName.h index a8c938fa5a..c499a07be2 100644 --- a/include/core/CProgName.h +++ b/include/core/CProgName.h @@ -11,12 +11,8 @@ #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! Get the name/location of the current program. @@ -32,21 +28,17 @@ namespace core //! For the name, just the program name is returned, with no path //! or extension. //! -class CORE_EXPORT CProgName : private CNonInstantiatable -{ - public: - //! Get the name of the current program. On error, an empty string is - //! returned. - static std::string progName(); - - //! Get the directory where the current program's executable image is - //! located. On error, an empty string is returned. - static std::string progDir(); +class CORE_EXPORT CProgName : private CNonInstantiatable { +public: + //! Get the name of the current program. On error, an empty string is + //! returned. + static std::string progName(); + + //! Get the directory where the current program's executable image is + //! located. On error, an empty string is returned. + static std::string progDir(); }; - - } } #endif // INCLUDED_ml_core_CProgName_h - diff --git a/include/core/CRapidJsonConcurrentLineWriter.h b/include/core/CRapidJsonConcurrentLineWriter.h index d5afd7db6e..356f714127 100644 --- a/include/core/CRapidJsonConcurrentLineWriter.h +++ b/include/core/CRapidJsonConcurrentLineWriter.h @@ -10,10 +10,8 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { //! \brief //! A Json line writer for concurrently writing to a shared output stream @@ -24,49 +22,44 @@ namespace core //! IMPLEMENTATION DECISIONS:\n //! hard code encoding and stream type //! -class CORE_EXPORT CRapidJsonConcurrentLineWriter : public CRapidJsonLineWriter -{ - public: - using TRapidJsonLineWriterBase = CRapidJsonLineWriter; +class CORE_EXPORT CRapidJsonConcurrentLineWriter : public CRapidJsonLineWriter { +public: + using TRapidJsonLineWriterBase = CRapidJsonLineWriter; - public: - //! Take a wrapped stream and provide a json writer object - //! \p outStream reference to an wrapped output stream - CRapidJsonConcurrentLineWriter(CJsonOutputStreamWrapper &outStream); +public: + //! Take a wrapped stream and provide a json writer object + //! \p outStream reference to an wrapped output stream + CRapidJsonConcurrentLineWriter(CJsonOutputStreamWrapper& outStream); - ~CRapidJsonConcurrentLineWriter(); + ~CRapidJsonConcurrentLineWriter(); - //! Flush buffers, including the output stream. - //! Note: flush still happens asynchronous - void flush(); + //! Flush buffers, including the output stream. + //! Note: flush still happens asynchronous + void flush(); - //! Hooks into end object to automatically flush if json object is complete - //! Note: This is a non-virtual overwrite - bool EndObject(rapidjson::SizeType memberCount = 0); + //! Hooks into end object to automatically flush if json object is complete + //! Note: This is a non-virtual overwrite + bool EndObject(rapidjson::SizeType memberCount = 0); - //! Debug the memory used by this component. - void debugMemoryUsage(CMemoryUsage::TMemoryUsagePtr mem) const; + //! Debug the memory used by this component. + void debugMemoryUsage(CMemoryUsage::TMemoryUsagePtr mem) const; - //! Get the memory used by this component. - std::size_t memoryUsage() const; + //! Get the memory used by this component. + std::size_t memoryUsage() const; - //! Write JSON document to outputstream - //! Note this non-virtual overwrite is needed to avoid slicing of the writer - //! and hence ensure the correct EndObject is called - //! \p doc reference to rapidjson document value - void write(rapidjson::Value &doc) - { - doc.Accept(*this); - } + //! Write JSON document to outputstream + //! Note this non-virtual overwrite is needed to avoid slicing of the writer + //! and hence ensure the correct EndObject is called + //! \p doc reference to rapidjson document value + void write(rapidjson::Value& doc) { doc.Accept(*this); } - private: - //! The stream object - CJsonOutputStreamWrapper &m_OutputStreamWrapper; +private: + //! The stream object + CJsonOutputStreamWrapper& m_OutputStreamWrapper; - //! internal buffer, managed by the stream wrapper - rapidjson::StringBuffer *m_StringBuffer; + //! internal buffer, managed by the stream wrapper + rapidjson::StringBuffer* m_StringBuffer; }; - } } diff --git a/include/core/CRapidJsonLineWriter.h b/include/core/CRapidJsonLineWriter.h index d79be523dc..b4b73553ff 100644 --- a/include/core/CRapidJsonLineWriter.h +++ b/include/core/CRapidJsonLineWriter.h @@ -9,10 +9,8 @@ #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { //! Writes each Json object to a single line. //! Not as verbose as rapidjson::prettywriter but it is still possible to @@ -29,50 +27,42 @@ template, typename STACK_ALLOCATOR = rapidjson::CrtAllocator, unsigned WRITE_FLAGS = rapidjson::kWriteDefaultFlags> -class CRapidJsonLineWriter : public CRapidJsonWriterBase< OUTPUT_STREAM, SOURCE_ENCODING, TARGET_ENCODING, STACK_ALLOCATOR, WRITE_FLAGS, rapidjson::Writer > -{ - public: +class CRapidJsonLineWriter + : public CRapidJsonWriterBase { +public: + using TRapidJsonWriterBase = + CRapidJsonWriterBase; + using TRapidJsonWriterBase::TRapidJsonWriterBase; - using TRapidJsonWriterBase = CRapidJsonWriterBase; - using TRapidJsonWriterBase::TRapidJsonWriterBase; + //! Overwrites the Writer::StartObject in order to count nested objects + bool StartObject() { + ++m_ObjectCount; + return TRapidJsonWriterBase::StartObject(); + } - //! Overwrites the Writer::StartObject in order to count nested objects - bool StartObject() - { - ++m_ObjectCount; - return TRapidJsonWriterBase::StartObject(); - } - - //! Overwrites Writer::EndObject in order to inject new lines if: - //! - it's the end of the json object or array - //! - it's the end of a json object as part of an array - bool EndObject(rapidjson::SizeType memberCount = 0) - { - bool baseReturnCode = TRapidJsonWriterBase::EndObject(memberCount); - --m_ObjectCount; + //! Overwrites Writer::EndObject in order to inject new lines if: + //! - it's the end of the json object or array + //! - it's the end of a json object as part of an array + bool EndObject(rapidjson::SizeType memberCount = 0) { + bool baseReturnCode = TRapidJsonWriterBase::EndObject(memberCount); + --m_ObjectCount; - // put a new line if at top level or if inside an array - if (TRapidJsonWriterBase::level_stack_.Empty() || m_ObjectCount == 0) - { - TRapidJsonWriterBase::os_->Put('\n'); - } - return baseReturnCode; + // put a new line if at top level or if inside an array + if (TRapidJsonWriterBase::level_stack_.Empty() || m_ObjectCount == 0) { + TRapidJsonWriterBase::os_->Put('\n'); } + return baseReturnCode; + } - //! Write JSON document to outputstream - //! Note this non-virtual overwrite is needed to avoid slicing of the writer - //! and hence ensure the correct StartObject/EndObject functions are called - //! \p doc reference to rapidjson document value - void write(rapidjson::Value &doc) - { - doc.Accept(*this); - } + //! Write JSON document to outputstream + //! Note this non-virtual overwrite is needed to avoid slicing of the writer + //! and hence ensure the correct StartObject/EndObject functions are called + //! \p doc reference to rapidjson document value + void write(rapidjson::Value& doc) { doc.Accept(*this); } - - private: - size_t m_ObjectCount = 0; +private: + size_t m_ObjectCount = 0; }; - } } diff --git a/include/core/CRapidJsonPoolAllocator.h b/include/core/CRapidJsonPoolAllocator.h index b09d7f79fd..a5157c13ec 100644 --- a/include/core/CRapidJsonPoolAllocator.h +++ b/include/core/CRapidJsonPoolAllocator.h @@ -6,19 +6,16 @@ #ifndef INCLUDED_ml_core_CRapidJsonPoolAllocator_h #define INCLUDED_ml_core_CRapidJsonPoolAllocator_h +#include #include #include -#include - #include #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { //! \brief //! A rapidjson memory allocator using a fixed size buffer //! @@ -33,66 +30,49 @@ namespace core //! //! Clear the allocator on destruction //! -class CRapidJsonPoolAllocator -{ - public: - using TDocumentWeakPtr = boost::weak_ptr; - using TDocumentPtr = boost::shared_ptr; - using TDocumentPtrVec = std::vector; - public: - CRapidJsonPoolAllocator() - : m_JsonPoolAllocator(m_FixedBuffer, FIXED_BUFFER_SIZE) - { - } +class CRapidJsonPoolAllocator { +public: + using TDocumentWeakPtr = boost::weak_ptr; + using TDocumentPtr = boost::shared_ptr; + using TDocumentPtrVec = std::vector; - ~CRapidJsonPoolAllocator() - { - this->clear(); - } +public: + CRapidJsonPoolAllocator() : m_JsonPoolAllocator(m_FixedBuffer, FIXED_BUFFER_SIZE) {} - void clear() - { - m_JsonPoolAllocator.Clear(); - } + ~CRapidJsonPoolAllocator() { this->clear(); } - //! \return document pointer suitable for storing in a container - //! Note: The API is designed to emphasise that the client does not own the document memory - //! i.e. The document will be invalidated on destruction of this allocator - TDocumentWeakPtr makeStorableDoc() - { - TDocumentPtr newDoc = boost::make_shared(&m_JsonPoolAllocator); - newDoc->SetObject(); - m_JsonDocumentStore.push_back(newDoc); - return TDocumentWeakPtr(newDoc); - } + void clear() { m_JsonPoolAllocator.Clear(); } - //! \return const reference to the underlying memory pool allocator - const rapidjson::MemoryPoolAllocator<> &get() const - { - return m_JsonPoolAllocator; - } + //! \return document pointer suitable for storing in a container + //! Note: The API is designed to emphasise that the client does not own the document memory + //! i.e. The document will be invalidated on destruction of this allocator + TDocumentWeakPtr makeStorableDoc() { + TDocumentPtr newDoc = boost::make_shared(&m_JsonPoolAllocator); + newDoc->SetObject(); + m_JsonDocumentStore.push_back(newDoc); + return TDocumentWeakPtr(newDoc); + } - //! \return reference to the underlying memory pool allocator - rapidjson::MemoryPoolAllocator<> &get() - { - return m_JsonPoolAllocator; - } + //! \return const reference to the underlying memory pool allocator + const rapidjson::MemoryPoolAllocator<>& get() const { return m_JsonPoolAllocator; } - private: - //! Size of the fixed buffer to allocate - static const size_t FIXED_BUFFER_SIZE = 4096; + //! \return reference to the underlying memory pool allocator + rapidjson::MemoryPoolAllocator<>& get() { return m_JsonPoolAllocator; } - private: - //! fixed size memory buffer used to optimize allocator performance - char m_FixedBuffer[FIXED_BUFFER_SIZE]; +private: + //! Size of the fixed buffer to allocate + static const size_t FIXED_BUFFER_SIZE = 4096; - //! memory pool to use for allocating rapidjson objects - rapidjson::MemoryPoolAllocator<> m_JsonPoolAllocator; +private: + //! fixed size memory buffer used to optimize allocator performance + char m_FixedBuffer[FIXED_BUFFER_SIZE]; - //! Container used to persist rapidjson documents - TDocumentPtrVec m_JsonDocumentStore; -}; + //! memory pool to use for allocating rapidjson objects + rapidjson::MemoryPoolAllocator<> m_JsonPoolAllocator; + //! Container used to persist rapidjson documents + TDocumentPtrVec m_JsonDocumentStore; +}; } } #endif // INCLUDED_ml_core_CRapidJsonPoolAllocator_h diff --git a/include/core/CRapidJsonPrettyWriter.h b/include/core/CRapidJsonPrettyWriter.h index 0fa4770e84..68d9a54daf 100644 --- a/include/core/CRapidJsonPrettyWriter.h +++ b/include/core/CRapidJsonPrettyWriter.h @@ -11,10 +11,8 @@ #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { //! Writes each Json object with indentation and spacing /*! @@ -29,15 +27,13 @@ template, typename STACK_ALLOCATOR = rapidjson::CrtAllocator, unsigned WRITE_FLAGS = rapidjson::kWriteDefaultFlags> -class CRapidJsonPrettyWriter : public CRapidJsonWriterBase< OUTPUT_STREAM, SOURCE_ENCODING, TARGET_ENCODING, - STACK_ALLOCATOR, WRITE_FLAGS, rapidjson::PrettyWriter > -{ - public: - using TRapidJsonPrettyWriterBase = CRapidJsonWriterBase; - using TRapidJsonPrettyWriterBase::TRapidJsonPrettyWriterBase; +class CRapidJsonPrettyWriter + : public CRapidJsonWriterBase { +public: + using TRapidJsonPrettyWriterBase = + CRapidJsonWriterBase; + using TRapidJsonPrettyWriterBase::TRapidJsonPrettyWriterBase; }; - } } diff --git a/include/core/CRapidJsonWriterBase.h b/include/core/CRapidJsonWriterBase.h index c6bdd68541..276eeace37 100644 --- a/include/core/CRapidJsonWriterBase.h +++ b/include/core/CRapidJsonWriterBase.h @@ -7,13 +7,13 @@ #ifndef INCLUDED_ml_core_CRapidJsonWriterBase_h #define INCLUDED_ml_core_CRapidJsonWriterBase_h -#include +#include #include #include -#include -#include #include #include +#include +#include #include #include @@ -26,14 +26,10 @@ #include #include - #include - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { //! \brief //! A Json writer with fixed length allocator pool //! With utility functions for adding fields to JSON objects. @@ -63,549 +59,423 @@ template, typename STACK_ALLOCATOR = rapidjson::CrtAllocator, unsigned WRITE_FLAGS = rapidjson::kWriteDefaultFlags, - template< typename, typename, typename, typename, unsigned > class JSON_WRITER = rapidjson::Writer - > -class CRapidJsonWriterBase : public JSON_WRITER -{ - public: - using TTimeVec = std::vector; - using TStrVec = std::vector; - using TDoubleVec = std::vector; - using TDoubleDoublePr = std::pair; - using TDoubleDoublePrVec = std::vector; - using TDoubleDoubleDoublePrPr = std::pair; - using TDoubleDoubleDoublePrPrVec = std::vector; - using TStrUSet = boost::unordered_set; - using TDocument = rapidjson::Document; - using TValue = rapidjson::Value; - using TDocumentWeakPtr = boost::weak_ptr; - using TValuePtr = boost::shared_ptr; - - using TPoolAllocatorPtr = boost::shared_ptr; - using TPoolAllocatorPtrStack = std::stack< TPoolAllocatorPtr >; - using TStrPoolAllocatorPtrMap = boost::unordered_map< std::string, TPoolAllocatorPtr>; - using TStrPoolAllocatorPtrMapItr = TStrPoolAllocatorPtrMap::iterator; - using TStrPoolAllocatorPtrMapItrBoolPr = std::pair; - - - public: - using TRapidJsonWriterBase = JSON_WRITER; - - CRapidJsonWriterBase(OUTPUT_STREAM &os) : TRapidJsonWriterBase(os) - { - // push a default rapidjson allocator onto our stack - m_JsonPoolAllocators.push(boost::make_shared()); - } - - CRapidJsonWriterBase() : TRapidJsonWriterBase() - { - // push a default rapidjson allocator onto our stack - m_JsonPoolAllocators.push(boost::make_shared()); - } - - virtual ~CRapidJsonWriterBase() - { - // clean up resources + template class JSON_WRITER = rapidjson::Writer> +class CRapidJsonWriterBase : public JSON_WRITER { +public: + using TTimeVec = std::vector; + using TStrVec = std::vector; + using TDoubleVec = std::vector; + using TDoubleDoublePr = std::pair; + using TDoubleDoublePrVec = std::vector; + using TDoubleDoubleDoublePrPr = std::pair; + using TDoubleDoubleDoublePrPrVec = std::vector; + using TStrUSet = boost::unordered_set; + using TDocument = rapidjson::Document; + using TValue = rapidjson::Value; + using TDocumentWeakPtr = boost::weak_ptr; + using TValuePtr = boost::shared_ptr; + + using TPoolAllocatorPtr = boost::shared_ptr; + using TPoolAllocatorPtrStack = std::stack; + using TStrPoolAllocatorPtrMap = boost::unordered_map; + using TStrPoolAllocatorPtrMapItr = TStrPoolAllocatorPtrMap::iterator; + using TStrPoolAllocatorPtrMapItrBoolPr = std::pair; + +public: + using TRapidJsonWriterBase = JSON_WRITER; + + CRapidJsonWriterBase(OUTPUT_STREAM& os) : TRapidJsonWriterBase(os) { + // push a default rapidjson allocator onto our stack + m_JsonPoolAllocators.push(boost::make_shared()); + } + + CRapidJsonWriterBase() : TRapidJsonWriterBase() { + // push a default rapidjson allocator onto our stack + m_JsonPoolAllocators.push(boost::make_shared()); + } + + virtual ~CRapidJsonWriterBase() { + // clean up resources + m_JsonPoolAllocators.pop(); + } + + //! Push a named allocator on to the stack + //! Look in the cache for the allocator - creating it if not present + void pushAllocator(const std::string& allocatorName) { + TPoolAllocatorPtr& ptr = m_AllocatorCache[allocatorName]; + if (ptr == nullptr) { + ptr = boost::make_shared(); + } + m_JsonPoolAllocators.push(ptr); + } + + //! Clear and remove the last pushed allocator from the stack + void popAllocator() { + if (!m_JsonPoolAllocators.empty()) { + TPoolAllocatorPtr allocator = m_JsonPoolAllocators.top(); + if (allocator) { + allocator->clear(); + } m_JsonPoolAllocators.pop(); } + } - //! Push a named allocator on to the stack - //! Look in the cache for the allocator - creating it if not present - void pushAllocator(const std::string &allocatorName) - { - TPoolAllocatorPtr &ptr = m_AllocatorCache[allocatorName]; - if (ptr == nullptr) - { - ptr = boost::make_shared(); - } - m_JsonPoolAllocators.push(ptr); - } + //! Get a valid allocator from the stack + //! If no valid allocator can be found then store and return a freshly minted one + boost::shared_ptr getAllocator() const { + TPoolAllocatorPtr allocator; + CRapidJsonPoolAllocator* rawAllocator = nullptr; + while (!m_JsonPoolAllocators.empty()) { + allocator = m_JsonPoolAllocators.top(); - //! Clear and remove the last pushed allocator from the stack - void popAllocator() - { - if (!m_JsonPoolAllocators.empty()) - { - TPoolAllocatorPtr allocator = m_JsonPoolAllocators.top(); - if (allocator) - { - allocator->clear(); - } + if (allocator && (rawAllocator = allocator.get())) { + break; + } else { + LOG_ERROR("Invalid JSON memory allocator encountered. Removing."); m_JsonPoolAllocators.pop(); } } - //! Get a valid allocator from the stack - //! If no valid allocator can be found then store and return a freshly minted one - boost::shared_ptr getAllocator() const - { - TPoolAllocatorPtr allocator; - CRapidJsonPoolAllocator *rawAllocator = nullptr; - while (!m_JsonPoolAllocators.empty()) - { - allocator = m_JsonPoolAllocators.top(); - - if (allocator && (rawAllocator = allocator.get())) - { - break; - } - else - { - LOG_ERROR("Invalid JSON memory allocator encountered. Removing."); - m_JsonPoolAllocators.pop(); - } - } - - // shouldn't ever happen as it indicates that the default allocator is invalid - if (!rawAllocator) - { - LOG_ERROR("No viable JSON memory allocator encountered. Recreating."); - allocator = boost::make_shared(); - m_JsonPoolAllocators.push(allocator); - } - - return allocator; - } - - rapidjson::MemoryPoolAllocator<> &getRawAllocator() const - { - return this->getAllocator()->get(); - } - - - bool Double(double d) - { - // rewrite NaN and Infinity to 0 - if (!(boost::math::isfinite)(d)) - { - return TRapidJsonWriterBase::Int(0); - } - - return TRapidJsonWriterBase::Double(d); - } - - //! Writes an epoch second timestamp as an epoch millis timestamp - bool Time(core_t::TTime t) - { - return this->Int64(CTimeUtils::toEpochMs(t)); - } - - //! Push a constant string into a supplied rapidjson object value - //! \p[in] value constant string - //! \p[out] obj rapidjson value to contain the \p value - //! \p name must outlive \p obj or memory corruption will occur. - void pushBack(const char * value, TValue &obj) const - { - obj.PushBack(rapidjson::StringRef(value), this->getRawAllocator()); - } - - //! Push a generic rapidjson value object into a supplied rapidjson object value - //! \p[in] value generic rapidjson value object - //! \p[out] obj rapidjson value to contain the \p value - //! \p name must outlive \p obj or memory corruption will occur. - template - void pushBack(T &&value, TValue &obj) const - { - obj.PushBack(value, this->getRawAllocator()); - } - - //! Push a generic rapidjson value object into a supplied rapidjson object value - //! \p[in] value generic rapidjson value object - //! \p[out] obj shared pointer to a rapidjson value to contain the \p value - //! \p name must outlive \p obj or memory corruption will occur. - template - void pushBack(T &&value, const TValuePtr &obj) const - { - obj->PushBack(value, this->getRawAllocator()); - } - - - //! Add an array of doubles to an object. - //! \p fieldName must outlive \p obj or memory corruption will occur. - template - void addDoubleArrayFieldToObj(const std::string &fieldName, - const CONTAINER &values, - TValue &obj) const - { - TValue array = this->makeArray(values.size()); - - bool considerLogging(true); - for (const auto &value : values) - { - this->checkArrayNumberFinite(value, fieldName, considerLogging); - this->pushBack(value, array); - } - - this->addMember(fieldName, array, obj); - } - - - //! write the rapidjson value document to the output stream - //! \p[in] doc rapidjson document value to write out - virtual void write(TValue &doc) - { - doc.Accept(*this); - } - - //! Return a new rapidjson document - TDocument makeDoc() const - { - TDocument newDoc(&this->getRawAllocator()); - newDoc.SetObject(); - return newDoc; - } - - //! Return a weak pointer to a new rapidjson document - //! This is a convenience function to simplify the (temporary) - //! storage of newly created documents in containers. - //! Note: Be aware that the lifetime of the document - //! should not exceed that of the writer lest the document - //! be invalidated. - TDocumentWeakPtr makeStorableDoc() const - { - return this->getAllocator()->makeStorableDoc(); - } - - //! Return a new rapidjson array - TValue makeArray(size_t length = 0) const - { - TValue array(rapidjson::kArrayType); - if (length > 0) - { - array.Reserve(static_cast(length), this->getRawAllocator()); - } - return array; - } - - //! Return a new rapidjson object - TValue makeObject() const - { - TValue obj(rapidjson::kObjectType); - return obj; - } - - //! Adds a generic rapidjson value field to an object. - //! \p[in] name field name - //! \p[in] value generic rapidjson value - //! \p[out] obj shared pointer to rapidjson object to contain the \p name \p value pair - TValuePtr addMember(const std::string &name, - TValue &value, - const TValuePtr &obj) const - { - obj->AddMember(rapidjson::StringRef(name), value, this->getRawAllocator()); - return obj; - } - - //! Adds a copy of a string field to an object. - //! \p[in] name field name - //! \p[in] value string field to be copied - //! \p[out] obj shared pointer to rapidjson object to contain the \p name \p value pair - TValuePtr addMember(const std::string &name, - const std::string &value, - const TValuePtr &obj) const - { - TValue v(value, this->getRawAllocator()); - obj->AddMember(rapidjson::StringRef(name), v, this->getRawAllocator()); - return obj; - } - - //! Adds a string field as a reference to an object (use for adding constant strings). - //! \p[in] name field name - //! \p[in] value string field - //! \p[out] obj shared pointer to rapidjson object to contain the \p name \p value pair - TValuePtr addMemberRef(const std::string &name, - const std::string &value, - const TValuePtr &obj) const - { - obj->AddMember(rapidjson::StringRef(name), rapidjson::StringRef(value), this->getRawAllocator()); - return obj; - } - - //! Adds a generic rapidjson value field to an object. - //! \p[in] name field name - //! \p[in] value generic rapidjson value - //! \p[out] obj rapidjson object to contain the \p name \p value pair - void addMember(const std::string &name, - TValue &value, - TValue &obj) const - { - obj.AddMember(rapidjson::StringRef(name), value, this->getRawAllocator()); - } - - //! Adds a copy of a string field to an object. - //! \p[in] name field name - //! \p[in] value string field to be copied - //! \p[out] obj rapidjson object to contain the \p name \p value pair - void addMember(const std::string &name, - const std::string &value, - TValue &obj) const - { - TValue v(value, this->getRawAllocator()); - obj.AddMember(rapidjson::StringRef(name), v, this->getRawAllocator()); - } - - //! Adds a string field as a reference to an object (use for adding constant strings). - //! \p[in] name field name - //! \p[in] value string field - //! \p[out] obj rapidjson object to contain the \p name \p value pair - void addMemberRef(const std::string &name, - const std::string &value, - TValue &obj) const - { - obj.AddMember(rapidjson::StringRef(name), rapidjson::StringRef(value), this->getRawAllocator()); - } - - - //! Adds a copy of a string field with the name fieldname to an object. - //! \p fieldName must outlive \p obj or memory corruption will occur. - void addStringFieldCopyToObj(const std::string &fieldName, - const std::string &value, - TValue &obj, - bool allowEmptyString = false) const - { - // Don't add empty strings unless explicitly told to - if (!allowEmptyString && value.empty()) - { - return; - } - - this->addMember(fieldName, value, obj); - } - - //! Adds a reference to a string field with the name fieldname to an object. - //! \p fieldName AND \p value must outlive \p obj or memory corruption will occur. - //! This is an optimized version of addStringFieldToObj() avoiding - //! the string copy for the value. Use with care. - void addStringFieldReferenceToObj(const std::string &fieldName, - const std::string &value, - TValue &obj, - bool allowEmptyString = false) const - { - // Don't add empty strings unless explicitly told to - if (!allowEmptyString && value.empty()) - { - return; - } - - this->addMemberRef(fieldName, value, obj); - } - - //! Adds a time field with the name fieldname to an object. - //! Automatically turns time from 'seconds_since_epoch' into 'milliseconds_since_epoch' - //! \p fieldName must outlive \p obj or memory corruption will occur. - void addTimeFieldToObj(const std::string &fieldName, - core_t::TTime value, - TValue &obj) const - { - TValue v(CTimeUtils::toEpochMs(value)); - this->addMember(fieldName, v, obj); - } - - //! Adds a double field with the name fieldname to an object. - //! \p fieldName must outlive \p obj or memory corruption will occur. - void addDoubleFieldToObj(const std::string &fieldName, - double value, - TValue &obj) const - { - if (!(boost::math::isfinite)(value)) - { - LOG_ERROR("Adding " << value << " to the \"" << - fieldName << "\" field of a JSON document"); - // Don't return - make a best effort to add the value - // Some writers derived from this class may defend themselves by converting to 0 - } - TValue v(value); - this->addMember(fieldName, v, obj); - } - - //! Adds a bool field with the name fieldname to an object. - //! \p fieldName must outlive \p obj or memory corruption will occur. - void addBoolFieldToObj(const std::string &fieldName, - bool value, - TValue &obj) const - { - TValue v(value); - this->addMember(fieldName, v, obj); - } - - //! Adds a signed integer field with the name fieldname to an object. - //! \p fieldName must outlive \p obj or memory corruption will occur. - void addIntFieldToObj(const std::string &fieldName, - int64_t value, - TValue &obj) const - { - TValue v(value); - this->addMember(fieldName, v, obj); - } - - //! Adds an unsigned integer field with the name fieldname to an object. - //! \p fieldName must outlive \p obj or memory corruption will occur. - void addUIntFieldToObj(const std::string &fieldName, - uint64_t value, - TValue &obj) const - { - TValue v(value); - this->addMember(fieldName, v, obj); - } - - //! Add an array of strings to an object. - //! \p fieldName must outlive \p obj or memory corruption will occur. - void addStringArrayFieldToObj(const std::string &fieldName, - const TStrVec &values, - TValue &obj) const - { - this->addArrayToObj(fieldName, values.begin(), values.end(), obj); - } - - //! Add an array of strings to an object. - //! \p fieldName must outlive \p obj or memory corruption will occur. - void addStringArrayFieldToObj(const std::string &fieldName, - const TStrUSet &values, - TValue &obj) const - { - using TStrCPtrVec = std::vector; - - TStrCPtrVec ordered; - ordered.reserve(values.size()); - for (const auto &value: values) - { - ordered.push_back(&value); - } - std::sort(ordered.begin(), ordered.end(), - CFunctional::SDereference>()); - - addArrayToObj(fieldName, - boost::iterators::make_indirect_iterator(ordered.begin()), - boost::iterators::make_indirect_iterator(ordered.end()), - obj); - } - - //! Add an array of pair double, pair double double to an object. - //! \p fieldName must outlive \p obj or memory corruption will occur. - void addDoubleDoubleDoublePrPrArrayFieldToObj(const std::string &fieldName, - const TDoubleDoubleDoublePrPrVec &values, - TValue &obj) const - { - TValue array = this->makeArray(values.size()); - - bool considerLogging(true); - for (const auto &value: values) - { - double firstVal = value.first; - this->checkArrayNumberFinite(firstVal, fieldName, considerLogging); - this->pushBack(firstVal, array); - double secondFirstVal = value.second.first; - this->checkArrayNumberFinite(secondFirstVal, fieldName, considerLogging); - this->pushBack(secondFirstVal, array); - double secondSecondVal = value.second.second; - this->checkArrayNumberFinite(secondSecondVal, fieldName, considerLogging); - this->pushBack(secondSecondVal, array); - } - - this->addMember(fieldName, array, obj); - } - - //! Add an array of pair double double to an object. - //! \p fieldName must outlive \p obj or memory corruption will occur. - void addDoubleDoublePrArrayFieldToObj(const std::string &firstFieldName, - const std::string &secondFieldName, - const TDoubleDoublePrVec &values, - TValue &obj) const - { - TValue firstArray = this->makeArray(values.size()); - TValue secondArray = this->makeArray(values.size()); - - bool considerLoggingFirst(true); - bool considerLoggingSecond(true); - for (const auto &value: values) - { - double firstVal = value.first; - this->checkArrayNumberFinite(firstVal, firstFieldName, considerLoggingFirst); - this->pushBack(firstVal, firstArray); - double secondVal = value.second; - this->checkArrayNumberFinite(secondVal, secondFieldName, considerLoggingSecond); - this->pushBack(secondVal, secondArray); - } - - this->addMember(firstFieldName, firstArray, obj); - this->addMember(secondFieldName, secondArray, obj); - } - - //! Add an array of TTimes to an object. - //! \p fieldName must outlive \p obj or memory corruption will occur. - //! Note: The time values are adjusted to be in standard Java format - //!i.e. milliseconds since epoch - void addTimeArrayFieldToObj(const std::string &fieldName, - const TTimeVec &values, - TValue &obj) const - { - TValue array = this->makeArray(values.size()); - - for (const auto &value: values) - { - this->pushBack(CTimeUtils::toEpochMs(value), array); - } - - this->addMember(fieldName, array, obj); - } - - //! Checks if the \p obj has a member named \p fieldName and - //! removes it if it does. - void removeMemberIfPresent(const std::string &fieldName, TValue &obj) const - { - if (obj.HasMember(fieldName)) - { - obj.RemoveMember(fieldName); - } - } - - private: - //! Log a message if we're trying to add nan/infinity to a JSON array - template - void checkArrayNumberFinite(NUMBER val, - const std::string &fieldName, - bool &considerLogging) const - { - if (considerLogging && !(boost::math::isfinite)(val)) - { - LOG_ERROR("Adding " << val << " to the \"" << - fieldName << "\" array in a JSON document"); - // Don't return - make a best effort to add the value - // Some writers derived from this class may defend themselves by converting to 0 - considerLogging = false; - } - } - - //! Convert \p value to a RapidJSON value. - TValue asRapidJsonValue(const std::string &value) const - { - return {value, this->getRawAllocator()}; - } - - - //! Convert the range [\p begin, \p end) to a RapidJSON array and add to \p obj. - template - void addArrayToObj(const std::string &fieldName, - ITR begin, ITR end, - TValue &obj) const - { - TValue array = this->makeArray(std::distance(begin, end)); - for (/**/; begin != end; ++begin) - { - this->pushBack(asRapidJsonValue(*begin), array); - } - this->addMember(fieldName, array, obj); - } - - - private: - //! cache allocators for potential reuse - TStrPoolAllocatorPtrMap m_AllocatorCache; - - //! Allow for different batches of documents to use independent allocators - mutable TPoolAllocatorPtrStack m_JsonPoolAllocators; + // shouldn't ever happen as it indicates that the default allocator is invalid + if (!rawAllocator) { + LOG_ERROR("No viable JSON memory allocator encountered. Recreating."); + allocator = boost::make_shared(); + m_JsonPoolAllocators.push(allocator); + } + + return allocator; + } + + rapidjson::MemoryPoolAllocator<>& getRawAllocator() const { return this->getAllocator()->get(); } + + bool Double(double d) { + // rewrite NaN and Infinity to 0 + if (!(boost::math::isfinite)(d)) { + return TRapidJsonWriterBase::Int(0); + } + + return TRapidJsonWriterBase::Double(d); + } + + //! Writes an epoch second timestamp as an epoch millis timestamp + bool Time(core_t::TTime t) { return this->Int64(CTimeUtils::toEpochMs(t)); } + + //! Push a constant string into a supplied rapidjson object value + //! \p[in] value constant string + //! \p[out] obj rapidjson value to contain the \p value + //! \p name must outlive \p obj or memory corruption will occur. + void pushBack(const char* value, TValue& obj) const { obj.PushBack(rapidjson::StringRef(value), this->getRawAllocator()); } + + //! Push a generic rapidjson value object into a supplied rapidjson object value + //! \p[in] value generic rapidjson value object + //! \p[out] obj rapidjson value to contain the \p value + //! \p name must outlive \p obj or memory corruption will occur. + template + void pushBack(T&& value, TValue& obj) const { + obj.PushBack(value, this->getRawAllocator()); + } + + //! Push a generic rapidjson value object into a supplied rapidjson object value + //! \p[in] value generic rapidjson value object + //! \p[out] obj shared pointer to a rapidjson value to contain the \p value + //! \p name must outlive \p obj or memory corruption will occur. + template + void pushBack(T&& value, const TValuePtr& obj) const { + obj->PushBack(value, this->getRawAllocator()); + } + + //! Add an array of doubles to an object. + //! \p fieldName must outlive \p obj or memory corruption will occur. + template + void addDoubleArrayFieldToObj(const std::string& fieldName, const CONTAINER& values, TValue& obj) const { + TValue array = this->makeArray(values.size()); + + bool considerLogging(true); + for (const auto& value : values) { + this->checkArrayNumberFinite(value, fieldName, considerLogging); + this->pushBack(value, array); + } + + this->addMember(fieldName, array, obj); + } + + //! write the rapidjson value document to the output stream + //! \p[in] doc rapidjson document value to write out + virtual void write(TValue& doc) { doc.Accept(*this); } + + //! Return a new rapidjson document + TDocument makeDoc() const { + TDocument newDoc(&this->getRawAllocator()); + newDoc.SetObject(); + return newDoc; + } + + //! Return a weak pointer to a new rapidjson document + //! This is a convenience function to simplify the (temporary) + //! storage of newly created documents in containers. + //! Note: Be aware that the lifetime of the document + //! should not exceed that of the writer lest the document + //! be invalidated. + TDocumentWeakPtr makeStorableDoc() const { return this->getAllocator()->makeStorableDoc(); } + + //! Return a new rapidjson array + TValue makeArray(size_t length = 0) const { + TValue array(rapidjson::kArrayType); + if (length > 0) { + array.Reserve(static_cast(length), this->getRawAllocator()); + } + return array; + } + + //! Return a new rapidjson object + TValue makeObject() const { + TValue obj(rapidjson::kObjectType); + return obj; + } + + //! Adds a generic rapidjson value field to an object. + //! \p[in] name field name + //! \p[in] value generic rapidjson value + //! \p[out] obj shared pointer to rapidjson object to contain the \p name \p value pair + TValuePtr addMember(const std::string& name, TValue& value, const TValuePtr& obj) const { + obj->AddMember(rapidjson::StringRef(name), value, this->getRawAllocator()); + return obj; + } + + //! Adds a copy of a string field to an object. + //! \p[in] name field name + //! \p[in] value string field to be copied + //! \p[out] obj shared pointer to rapidjson object to contain the \p name \p value pair + TValuePtr addMember(const std::string& name, const std::string& value, const TValuePtr& obj) const { + TValue v(value, this->getRawAllocator()); + obj->AddMember(rapidjson::StringRef(name), v, this->getRawAllocator()); + return obj; + } + + //! Adds a string field as a reference to an object (use for adding constant strings). + //! \p[in] name field name + //! \p[in] value string field + //! \p[out] obj shared pointer to rapidjson object to contain the \p name \p value pair + TValuePtr addMemberRef(const std::string& name, const std::string& value, const TValuePtr& obj) const { + obj->AddMember(rapidjson::StringRef(name), rapidjson::StringRef(value), this->getRawAllocator()); + return obj; + } + + //! Adds a generic rapidjson value field to an object. + //! \p[in] name field name + //! \p[in] value generic rapidjson value + //! \p[out] obj rapidjson object to contain the \p name \p value pair + void addMember(const std::string& name, TValue& value, TValue& obj) const { + obj.AddMember(rapidjson::StringRef(name), value, this->getRawAllocator()); + } + + //! Adds a copy of a string field to an object. + //! \p[in] name field name + //! \p[in] value string field to be copied + //! \p[out] obj rapidjson object to contain the \p name \p value pair + void addMember(const std::string& name, const std::string& value, TValue& obj) const { + TValue v(value, this->getRawAllocator()); + obj.AddMember(rapidjson::StringRef(name), v, this->getRawAllocator()); + } + + //! Adds a string field as a reference to an object (use for adding constant strings). + //! \p[in] name field name + //! \p[in] value string field + //! \p[out] obj rapidjson object to contain the \p name \p value pair + void addMemberRef(const std::string& name, const std::string& value, TValue& obj) const { + obj.AddMember(rapidjson::StringRef(name), rapidjson::StringRef(value), this->getRawAllocator()); + } + + //! Adds a copy of a string field with the name fieldname to an object. + //! \p fieldName must outlive \p obj or memory corruption will occur. + void addStringFieldCopyToObj(const std::string& fieldName, const std::string& value, TValue& obj, bool allowEmptyString = false) const { + // Don't add empty strings unless explicitly told to + if (!allowEmptyString && value.empty()) { + return; + } + + this->addMember(fieldName, value, obj); + } + + //! Adds a reference to a string field with the name fieldname to an object. + //! \p fieldName AND \p value must outlive \p obj or memory corruption will occur. + //! This is an optimized version of addStringFieldToObj() avoiding + //! the string copy for the value. Use with care. + void + addStringFieldReferenceToObj(const std::string& fieldName, const std::string& value, TValue& obj, bool allowEmptyString = false) const { + // Don't add empty strings unless explicitly told to + if (!allowEmptyString && value.empty()) { + return; + } + + this->addMemberRef(fieldName, value, obj); + } + + //! Adds a time field with the name fieldname to an object. + //! Automatically turns time from 'seconds_since_epoch' into 'milliseconds_since_epoch' + //! \p fieldName must outlive \p obj or memory corruption will occur. + void addTimeFieldToObj(const std::string& fieldName, core_t::TTime value, TValue& obj) const { + TValue v(CTimeUtils::toEpochMs(value)); + this->addMember(fieldName, v, obj); + } + + //! Adds a double field with the name fieldname to an object. + //! \p fieldName must outlive \p obj or memory corruption will occur. + void addDoubleFieldToObj(const std::string& fieldName, double value, TValue& obj) const { + if (!(boost::math::isfinite)(value)) { + LOG_ERROR("Adding " << value << " to the \"" << fieldName << "\" field of a JSON document"); + // Don't return - make a best effort to add the value + // Some writers derived from this class may defend themselves by converting to 0 + } + TValue v(value); + this->addMember(fieldName, v, obj); + } + + //! Adds a bool field with the name fieldname to an object. + //! \p fieldName must outlive \p obj or memory corruption will occur. + void addBoolFieldToObj(const std::string& fieldName, bool value, TValue& obj) const { + TValue v(value); + this->addMember(fieldName, v, obj); + } + + //! Adds a signed integer field with the name fieldname to an object. + //! \p fieldName must outlive \p obj or memory corruption will occur. + void addIntFieldToObj(const std::string& fieldName, int64_t value, TValue& obj) const { + TValue v(value); + this->addMember(fieldName, v, obj); + } + + //! Adds an unsigned integer field with the name fieldname to an object. + //! \p fieldName must outlive \p obj or memory corruption will occur. + void addUIntFieldToObj(const std::string& fieldName, uint64_t value, TValue& obj) const { + TValue v(value); + this->addMember(fieldName, v, obj); + } + + //! Add an array of strings to an object. + //! \p fieldName must outlive \p obj or memory corruption will occur. + void addStringArrayFieldToObj(const std::string& fieldName, const TStrVec& values, TValue& obj) const { + this->addArrayToObj(fieldName, values.begin(), values.end(), obj); + } + + //! Add an array of strings to an object. + //! \p fieldName must outlive \p obj or memory corruption will occur. + void addStringArrayFieldToObj(const std::string& fieldName, const TStrUSet& values, TValue& obj) const { + using TStrCPtrVec = std::vector; + + TStrCPtrVec ordered; + ordered.reserve(values.size()); + for (const auto& value : values) { + ordered.push_back(&value); + } + std::sort(ordered.begin(), ordered.end(), CFunctional::SDereference>()); + + addArrayToObj(fieldName, + boost::iterators::make_indirect_iterator(ordered.begin()), + boost::iterators::make_indirect_iterator(ordered.end()), + obj); + } + + //! Add an array of pair double, pair double double to an object. + //! \p fieldName must outlive \p obj or memory corruption will occur. + void + addDoubleDoubleDoublePrPrArrayFieldToObj(const std::string& fieldName, const TDoubleDoubleDoublePrPrVec& values, TValue& obj) const { + TValue array = this->makeArray(values.size()); + + bool considerLogging(true); + for (const auto& value : values) { + double firstVal = value.first; + this->checkArrayNumberFinite(firstVal, fieldName, considerLogging); + this->pushBack(firstVal, array); + double secondFirstVal = value.second.first; + this->checkArrayNumberFinite(secondFirstVal, fieldName, considerLogging); + this->pushBack(secondFirstVal, array); + double secondSecondVal = value.second.second; + this->checkArrayNumberFinite(secondSecondVal, fieldName, considerLogging); + this->pushBack(secondSecondVal, array); + } + + this->addMember(fieldName, array, obj); + } + + //! Add an array of pair double double to an object. + //! \p fieldName must outlive \p obj or memory corruption will occur. + void addDoubleDoublePrArrayFieldToObj(const std::string& firstFieldName, + const std::string& secondFieldName, + const TDoubleDoublePrVec& values, + TValue& obj) const { + TValue firstArray = this->makeArray(values.size()); + TValue secondArray = this->makeArray(values.size()); + + bool considerLoggingFirst(true); + bool considerLoggingSecond(true); + for (const auto& value : values) { + double firstVal = value.first; + this->checkArrayNumberFinite(firstVal, firstFieldName, considerLoggingFirst); + this->pushBack(firstVal, firstArray); + double secondVal = value.second; + this->checkArrayNumberFinite(secondVal, secondFieldName, considerLoggingSecond); + this->pushBack(secondVal, secondArray); + } + + this->addMember(firstFieldName, firstArray, obj); + this->addMember(secondFieldName, secondArray, obj); + } + + //! Add an array of TTimes to an object. + //! \p fieldName must outlive \p obj or memory corruption will occur. + //! Note: The time values are adjusted to be in standard Java format + //!i.e. milliseconds since epoch + void addTimeArrayFieldToObj(const std::string& fieldName, const TTimeVec& values, TValue& obj) const { + TValue array = this->makeArray(values.size()); + + for (const auto& value : values) { + this->pushBack(CTimeUtils::toEpochMs(value), array); + } + + this->addMember(fieldName, array, obj); + } + + //! Checks if the \p obj has a member named \p fieldName and + //! removes it if it does. + void removeMemberIfPresent(const std::string& fieldName, TValue& obj) const { + if (obj.HasMember(fieldName)) { + obj.RemoveMember(fieldName); + } + } + +private: + //! Log a message if we're trying to add nan/infinity to a JSON array + template + void checkArrayNumberFinite(NUMBER val, const std::string& fieldName, bool& considerLogging) const { + if (considerLogging && !(boost::math::isfinite)(val)) { + LOG_ERROR("Adding " << val << " to the \"" << fieldName << "\" array in a JSON document"); + // Don't return - make a best effort to add the value + // Some writers derived from this class may defend themselves by converting to 0 + considerLogging = false; + } + } + + //! Convert \p value to a RapidJSON value. + TValue asRapidJsonValue(const std::string& value) const { return {value, this->getRawAllocator()}; } + + //! Convert the range [\p begin, \p end) to a RapidJSON array and add to \p obj. + template + void addArrayToObj(const std::string& fieldName, ITR begin, ITR end, TValue& obj) const { + TValue array = this->makeArray(std::distance(begin, end)); + for (/**/; begin != end; ++begin) { + this->pushBack(asRapidJsonValue(*begin), array); + } + this->addMember(fieldName, array, obj); + } + +private: + //! cache allocators for potential reuse + TStrPoolAllocatorPtrMap m_AllocatorCache; + + //! Allow for different batches of documents to use independent allocators + mutable TPoolAllocatorPtrStack m_JsonPoolAllocators; }; - } } diff --git a/include/core/CRapidXmlParser.h b/include/core/CRapidXmlParser.h index 09b11295be..c67ee26bf8 100644 --- a/include/core/CRapidXmlParser.h +++ b/include/core/CRapidXmlParser.h @@ -16,14 +16,10 @@ #include #include - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CRapidXmlStateRestoreTraverser; - //! \brief //! Simple C++ wrapper around the RapidXml library. //! @@ -62,130 +58,118 @@ class CRapidXmlStateRestoreTraverser; //! at least, we'll try to get acceptable performance from //! XML using RapidXml. //! -class CORE_EXPORT CRapidXmlParser : public CXmlParserIntf -{ - public: - using TStrStrMap = std::map; - using TStrStrMapCItr = TStrStrMap::const_iterator; - - public: - CRapidXmlParser(); - virtual ~CRapidXmlParser(); - - //! Parse XML stored in a string - virtual bool parseString(const std::string &xml); - - //! Parse XML stored in a char buffer - virtual bool parseBuffer(const char *begin, size_t length); - - //! Parse XML stored in a char buffer that may be modified by the - //! parsing and will outlive this object - virtual bool parseBufferInSitu(char *begin, size_t length); - - //! Parse a string ignoring CDATA elements - bool parseStringIgnoreCdata(const std::string &xml); - - //! Return the root element name (empty string if not parsed yet) - virtual std::string rootElementName() const; - - //! Get the root element attributes (returns false if not parsed yet) - bool rootElementAttributes(TStrStrMap &rootAttributes) const; - - //! Dump the document to string - virtual std::string dumpToString() const; - - //! Convert the entire XML document into a hierarchy of node objects. - //! This is much more efficient than making repeated calls to - //! evalXPathExpression() to retrieve the entire contents of a parsed - //! document. - virtual bool toNodeHierarchy(CXmlNodeWithChildren::TXmlNodeWithChildrenP &rootNodePtr) const; - - //! As above, but use a pool to avoid XML node memory allocations where possible - virtual bool toNodeHierarchy(CXmlNodeWithChildrenPool &pool, - CXmlNodeWithChildren::TXmlNodeWithChildrenP &rootNodePtr) const; - - //! As above, but use a string cache to avoid string representation memory - //! allocations where possible - virtual bool toNodeHierarchy(CStringCache &cache, - CXmlNodeWithChildren::TXmlNodeWithChildrenP &rootNodePtr) const; - - //! As above, but use both a node pool and a string cache - virtual bool toNodeHierarchy(CXmlNodeWithChildrenPool &pool, - CStringCache &cache, - CXmlNodeWithChildren::TXmlNodeWithChildrenP &rootNodePtr) const; - - //! Functions for navigating an XML document without converting it to a - //! node hierarchy - virtual bool navigateRoot(); - virtual bool navigateFirstChild(); - virtual bool navigateNext(); - virtual bool navigateParent(); - virtual bool currentNodeName(std::string &name); - virtual bool currentNodeValue(std::string &value); - - //! Convert a node hierarchy to XML. - //! (This will escape the text correctly.) - static void convert(const CXmlNodeWithChildren &root, - std::string &result); - - //! Convert a node hierarchy to XML, optionally without indenting. - //! (This will escape the text correctly.) - static void convert(bool indent, - const CXmlNodeWithChildren &root, - std::string &result); - - private: - using TCharRapidXmlDocument = rapidxml::xml_document; - using TCharRapidXmlNode = rapidxml::xml_node; - using TCharRapidXmlAttribute = rapidxml::xml_attribute; - - //! Called recursively by the public toNodeHierarchy() method - bool toNodeHierarchy(const TCharRapidXmlNode &parentNode, - CXmlNodeWithChildrenPool &pool, - CStringCache *cache, - CXmlNodeWithChildren::TXmlNodeWithChildrenP &nodePtr) const; - - //! Called recursively by the convert() method - static void convertChildren(const CXmlNodeWithChildren ¤t, - TCharRapidXmlDocument &doc, - TCharRapidXmlNode &xmlNode, - size_t &approxLen); - - //! Parse a buffer with some specified RapidXml flags set - //! without modifying the contents of the buffer - template - bool parseBufferNonDestructive(const char *begin, size_t length); - - //! Parse a string with some specified RapidXml flags set - //! and modifying the contents of the buffer - template - bool parseBufferDestructive(char *begin, size_t length); - - private: - //! RapidXml modifies the input data, so store it in an array rather - //! than in a string to avoid any problems with reference counting in - //! STL strings. (Obviously the template parameter here needs to match - //! the rapidxml typedef template arguments in the typedefs above.) - using TScopedCharArray = boost::scoped_array; - - //! RapidXml parses the XML in-situ, so keep a copy of the input - TScopedCharArray m_XmlBuf; - - //! Size of array allocated - size_t m_XmlBufSize; - - //! The RapidXml data structure - TCharRapidXmlDocument m_Doc; - - //! Pointer to the current node accessed via the navigation API - TCharRapidXmlNode *m_NavigatedNode; +class CORE_EXPORT CRapidXmlParser : public CXmlParserIntf { +public: + using TStrStrMap = std::map; + using TStrStrMapCItr = TStrStrMap::const_iterator; + +public: + CRapidXmlParser(); + virtual ~CRapidXmlParser(); + + //! Parse XML stored in a string + virtual bool parseString(const std::string& xml); + + //! Parse XML stored in a char buffer + virtual bool parseBuffer(const char* begin, size_t length); + + //! Parse XML stored in a char buffer that may be modified by the + //! parsing and will outlive this object + virtual bool parseBufferInSitu(char* begin, size_t length); + + //! Parse a string ignoring CDATA elements + bool parseStringIgnoreCdata(const std::string& xml); + + //! Return the root element name (empty string if not parsed yet) + virtual std::string rootElementName() const; + + //! Get the root element attributes (returns false if not parsed yet) + bool rootElementAttributes(TStrStrMap& rootAttributes) const; + + //! Dump the document to string + virtual std::string dumpToString() const; + + //! Convert the entire XML document into a hierarchy of node objects. + //! This is much more efficient than making repeated calls to + //! evalXPathExpression() to retrieve the entire contents of a parsed + //! document. + virtual bool toNodeHierarchy(CXmlNodeWithChildren::TXmlNodeWithChildrenP& rootNodePtr) const; + + //! As above, but use a pool to avoid XML node memory allocations where possible + virtual bool toNodeHierarchy(CXmlNodeWithChildrenPool& pool, CXmlNodeWithChildren::TXmlNodeWithChildrenP& rootNodePtr) const; + + //! As above, but use a string cache to avoid string representation memory + //! allocations where possible + virtual bool toNodeHierarchy(CStringCache& cache, CXmlNodeWithChildren::TXmlNodeWithChildrenP& rootNodePtr) const; + + //! As above, but use both a node pool and a string cache + virtual bool + toNodeHierarchy(CXmlNodeWithChildrenPool& pool, CStringCache& cache, CXmlNodeWithChildren::TXmlNodeWithChildrenP& rootNodePtr) const; + + //! Functions for navigating an XML document without converting it to a + //! node hierarchy + virtual bool navigateRoot(); + virtual bool navigateFirstChild(); + virtual bool navigateNext(); + virtual bool navigateParent(); + virtual bool currentNodeName(std::string& name); + virtual bool currentNodeValue(std::string& value); + + //! Convert a node hierarchy to XML. + //! (This will escape the text correctly.) + static void convert(const CXmlNodeWithChildren& root, std::string& result); + + //! Convert a node hierarchy to XML, optionally without indenting. + //! (This will escape the text correctly.) + static void convert(bool indent, const CXmlNodeWithChildren& root, std::string& result); + +private: + using TCharRapidXmlDocument = rapidxml::xml_document; + using TCharRapidXmlNode = rapidxml::xml_node; + using TCharRapidXmlAttribute = rapidxml::xml_attribute; + + //! Called recursively by the public toNodeHierarchy() method + bool toNodeHierarchy(const TCharRapidXmlNode& parentNode, + CXmlNodeWithChildrenPool& pool, + CStringCache* cache, + CXmlNodeWithChildren::TXmlNodeWithChildrenP& nodePtr) const; + + //! Called recursively by the convert() method + static void + convertChildren(const CXmlNodeWithChildren& current, TCharRapidXmlDocument& doc, TCharRapidXmlNode& xmlNode, size_t& approxLen); + + //! Parse a buffer with some specified RapidXml flags set + //! without modifying the contents of the buffer + template + bool parseBufferNonDestructive(const char* begin, size_t length); + + //! Parse a string with some specified RapidXml flags set + //! and modifying the contents of the buffer + template + bool parseBufferDestructive(char* begin, size_t length); + +private: + //! RapidXml modifies the input data, so store it in an array rather + //! than in a string to avoid any problems with reference counting in + //! STL strings. (Obviously the template parameter here needs to match + //! the rapidxml typedef template arguments in the typedefs above.) + using TScopedCharArray = boost::scoped_array; + + //! RapidXml parses the XML in-situ, so keep a copy of the input + TScopedCharArray m_XmlBuf; + + //! Size of array allocated + size_t m_XmlBufSize; + + //! The RapidXml data structure + TCharRapidXmlDocument m_Doc; + + //! Pointer to the current node accessed via the navigation API + TCharRapidXmlNode* m_NavigatedNode; friend class CRapidXmlStateRestoreTraverser; }; - - } } #endif // INCLUDED_ml_core_CRapidXmlParser_h - diff --git a/include/core/CRapidXmlStatePersistInserter.h b/include/core/CRapidXmlStatePersistInserter.h index b842a7999a..5903d255b9 100644 --- a/include/core/CRapidXmlStatePersistInserter.h +++ b/include/core/CRapidXmlStatePersistInserter.h @@ -14,12 +14,8 @@ #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! For persisting state in XML format. @@ -32,67 +28,61 @@ namespace core //! Directly uses RapidXml to avoid the inefficiency of an //! intermediate node hierarchy. //! -class CORE_EXPORT CRapidXmlStatePersistInserter : public CStatePersistInserter -{ - public: - using TStrStrMap = std::map; - using TStrStrMapCItr = TStrStrMap::const_iterator; +class CORE_EXPORT CRapidXmlStatePersistInserter : public CStatePersistInserter { +public: + using TStrStrMap = std::map; + using TStrStrMapCItr = TStrStrMap::const_iterator; - public: - //! Root node has no attributes - CRapidXmlStatePersistInserter(const std::string &rootName); +public: + //! Root node has no attributes + CRapidXmlStatePersistInserter(const std::string& rootName); - //! Root node has attributes - CRapidXmlStatePersistInserter(const std::string &rootName, - const TStrStrMap &rootAttributes); + //! Root node has attributes + CRapidXmlStatePersistInserter(const std::string& rootName, const TStrStrMap& rootAttributes); - //! Store a name/value - virtual void insertValue(const std::string &name, - const std::string &value); + //! Store a name/value + virtual void insertValue(const std::string& name, const std::string& value); - // Bring extra base class overloads into scope - using CStatePersistInserter::insertValue; + // Bring extra base class overloads into scope + using CStatePersistInserter::insertValue; - //! Convert to UTF-8 XML representation - void toXml(std::string &xml) const; + //! Convert to UTF-8 XML representation + void toXml(std::string& xml) const; - //! Convert to UTF-8 XML representation, optionally without indentation - void toXml(bool indent, std::string &xml) const; + //! Convert to UTF-8 XML representation, optionally without indentation + void toXml(bool indent, std::string& xml) const; - protected: - //! Start a new level with the given name - virtual void newLevel(const std::string &name); +protected: + //! Start a new level with the given name + virtual void newLevel(const std::string& name); - //! End the current level - virtual void endLevel(); + //! End the current level + virtual void endLevel(); - private: - //! Get a const char * version of a string that will last at least as - //! long as the RapidXml document - const char *nameFromCache(const std::string &name); +private: + //! Get a const char * version of a string that will last at least as + //! long as the RapidXml document + const char* nameFromCache(const std::string& name); - private: - //! XML documents are likely to contain the same node names many times, - //! so just store each unique name once for efficiency - CStringCache m_NameCache; +private: + //! XML documents are likely to contain the same node names many times, + //! so just store each unique name once for efficiency + CStringCache m_NameCache; - using TCharRapidXmlDocument = rapidxml::xml_document; - using TCharRapidXmlNode = rapidxml::xml_node; + using TCharRapidXmlDocument = rapidxml::xml_document; + using TCharRapidXmlNode = rapidxml::xml_node; - //! The RapidXml data structure - TCharRapidXmlDocument m_Doc; + //! The RapidXml data structure + TCharRapidXmlDocument m_Doc; - //! Parent of the level we're currently inserting to - TCharRapidXmlNode *m_LevelParent; + //! Parent of the level we're currently inserting to + TCharRapidXmlNode* m_LevelParent; - //! Approximate size of final string - used to reserve memory to - //! minimise reallocations during conversion to string representation - size_t m_ApproxLen; + //! Approximate size of final string - used to reserve memory to + //! minimise reallocations during conversion to string representation + size_t m_ApproxLen; }; - - } } #endif // INCLUDED_ml_core_CRapidXmlStatePersistInserter_h - diff --git a/include/core/CRapidXmlStateRestoreTraverser.h b/include/core/CRapidXmlStateRestoreTraverser.h index 303679f4a3..5d9b4961da 100644 --- a/include/core/CRapidXmlStateRestoreTraverser.h +++ b/include/core/CRapidXmlStateRestoreTraverser.h @@ -10,12 +10,8 @@ #include #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! For restoring state in XML format. @@ -35,70 +31,66 @@ namespace core //! Does NOT support CDATA in state XML - any CDATA content is //! currently ignored. //! -class CORE_EXPORT CRapidXmlStateRestoreTraverser : public CStateRestoreTraverser -{ - public: - CRapidXmlStateRestoreTraverser(const CRapidXmlParser &parser); - - //! Navigate to the next element at the current level, or return false - //! if there isn't one - virtual bool next(); - - //! Does the current element have a sub-level? - virtual bool hasSubLevel() const; - - //! Get the name of the current element - the returned reference is only - //! valid for as long as the traverser is pointing at the same element - virtual const std::string &name() const; - - //! Get the value of the current element - the returned reference is - //! only valid for as long as the traverser is pointing at the same - //! element - virtual const std::string &value() const; - - //! Has the end of the underlying document been reached? - virtual bool isEof() const; - - protected: - //! Navigate to the start of the sub-level of the current element, or - //! return false if there isn't one - virtual bool descend(); - - //! Navigate to the element of the level above from which descend() was - //! called, or return false if there isn't a level above - virtual bool ascend(); - - private: - //! Get a pointer to the next node element sibling of the current node, - //! or return NULL if there isn't one - CRapidXmlParser::TCharRapidXmlNode *nextNodeElement() const; - - //! Get a pointer to the first child node element of the current node, - //! or return NULL if there isn't one - CRapidXmlParser::TCharRapidXmlNode *firstChildNodeElement() const; - - private: - //! The parser that has been used to parse the document to be traversed - const CRapidXmlParser &m_Parser; - - //! Pointer to current node within the document - CRapidXmlParser::TCharRapidXmlNode *m_CurrentNode; - - //! RapidXml stores strings as const char *s, which we don't want to - //! use widely throughout our codebase. These strings store copies of - //! the name and value of the current node so that the name() and - //! value() methods can return them quickly. - mutable std::string m_CachedName; - mutable std::string m_CachedValue; - - //! Are m_CachedName and m_CachedValue valid? - mutable bool m_IsNameCacheValid; - mutable bool m_IsValueCacheValid; +class CORE_EXPORT CRapidXmlStateRestoreTraverser : public CStateRestoreTraverser { +public: + CRapidXmlStateRestoreTraverser(const CRapidXmlParser& parser); + + //! Navigate to the next element at the current level, or return false + //! if there isn't one + virtual bool next(); + + //! Does the current element have a sub-level? + virtual bool hasSubLevel() const; + + //! Get the name of the current element - the returned reference is only + //! valid for as long as the traverser is pointing at the same element + virtual const std::string& name() const; + + //! Get the value of the current element - the returned reference is + //! only valid for as long as the traverser is pointing at the same + //! element + virtual const std::string& value() const; + + //! Has the end of the underlying document been reached? + virtual bool isEof() const; + +protected: + //! Navigate to the start of the sub-level of the current element, or + //! return false if there isn't one + virtual bool descend(); + + //! Navigate to the element of the level above from which descend() was + //! called, or return false if there isn't a level above + virtual bool ascend(); + +private: + //! Get a pointer to the next node element sibling of the current node, + //! or return NULL if there isn't one + CRapidXmlParser::TCharRapidXmlNode* nextNodeElement() const; + + //! Get a pointer to the first child node element of the current node, + //! or return NULL if there isn't one + CRapidXmlParser::TCharRapidXmlNode* firstChildNodeElement() const; + +private: + //! The parser that has been used to parse the document to be traversed + const CRapidXmlParser& m_Parser; + + //! Pointer to current node within the document + CRapidXmlParser::TCharRapidXmlNode* m_CurrentNode; + + //! RapidXml stores strings as const char *s, which we don't want to + //! use widely throughout our codebase. These strings store copies of + //! the name and value of the current node so that the name() and + //! value() methods can return them quickly. + mutable std::string m_CachedName; + mutable std::string m_CachedValue; + + //! Are m_CachedName and m_CachedValue valid? + mutable bool m_IsNameCacheValid; + mutable bool m_IsValueCacheValid; }; - - } } #endif // INCLUDED_ml_core_CRapidXmlStateRestoreTraverser_h - diff --git a/include/core/CReadWriteLock.h b/include/core/CReadWriteLock.h index c059d1b612..687e8c95c5 100644 --- a/include/core/CReadWriteLock.h +++ b/include/core/CReadWriteLock.h @@ -14,12 +14,8 @@ #include #endif - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! Wrapper class around pthread rw lock. @@ -41,29 +37,25 @@ namespace core //! //! All errors are just warnings - no action taken. //! -class CORE_EXPORT CReadWriteLock : private CNonCopyable -{ - public: - CReadWriteLock(); - ~CReadWriteLock(); +class CORE_EXPORT CReadWriteLock : private CNonCopyable { +public: + CReadWriteLock(); + ~CReadWriteLock(); - void readLock(); - void readUnlock(); + void readLock(); + void readUnlock(); - void writeLock(); - void writeUnlock(); + void writeLock(); + void writeUnlock(); - private: +private: #ifdef Windows - SRWLOCK m_ReadWriteLock; + SRWLOCK m_ReadWriteLock; #else - pthread_rwlock_t m_ReadWriteLock; + pthread_rwlock_t m_ReadWriteLock; #endif }; - - } } #endif // INCLUDED_ml_core_CReadWriteLock_h - diff --git a/include/core/CRegex.h b/include/core/CRegex.h index af6a43af25..383c339833 100644 --- a/include/core/CRegex.h +++ b/include/core/CRegex.h @@ -12,12 +12,8 @@ #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! Wrapper around boost::regex @@ -28,62 +24,49 @@ namespace core //! IMPLEMENTATION DECISIONS:\n //! Uses init method to initialise so exceptions can be caught. //! -class CORE_EXPORT CRegex -{ - public: - using TStrVec = std::vector; - using TStrVecItr = TStrVec::iterator; - using TStrVecCItr = TStrVec::const_iterator; - - public: - CRegex(); - - bool init(const std::string &); - - //! Simple match test for a string - bool matches(const std::string &) const; - - //! Find the position within a string at which this regex first matches - bool search(size_t startPos, - const std::string &str, - size_t &position, - size_t &length) const; - bool search(size_t startPos, - const std::string &str, - size_t &position) const; - bool search(const std::string &str, - size_t &position, - size_t &length) const; - bool search(const std::string &str, - size_t &position) const; - - //! Match a string with the regex AND - //! tokenise a string by sub-expressions (...) - //! This is based on the 'grouping' syntax in perl regex - bool tokenise(const std::string &, - TStrVec &) const; - - //! Split a string based on a regex - bool split(const std::string &, - TStrVec &) const; - - //! Get the pattern string (not a reference due to boost API) - std::string str() const; - - //! How much of the regex is literal characters rather than character - //! classes? - size_t literalCount() const; - - //! Useful for converting a string literal into a regex that will match - //! it - static std::string escapeRegexSpecial(const std::string &literal); - - private: - bool m_Initialised; - boost::regex m_Regex; -}; +class CORE_EXPORT CRegex { +public: + using TStrVec = std::vector; + using TStrVecItr = TStrVec::iterator; + using TStrVecCItr = TStrVec::const_iterator; + +public: + CRegex(); + + bool init(const std::string&); + + //! Simple match test for a string + bool matches(const std::string&) const; + //! Find the position within a string at which this regex first matches + bool search(size_t startPos, const std::string& str, size_t& position, size_t& length) const; + bool search(size_t startPos, const std::string& str, size_t& position) const; + bool search(const std::string& str, size_t& position, size_t& length) const; + bool search(const std::string& str, size_t& position) const; + //! Match a string with the regex AND + //! tokenise a string by sub-expressions (...) + //! This is based on the 'grouping' syntax in perl regex + bool tokenise(const std::string&, TStrVec&) const; + + //! Split a string based on a regex + bool split(const std::string&, TStrVec&) const; + + //! Get the pattern string (not a reference due to boost API) + std::string str() const; + + //! How much of the regex is literal characters rather than character + //! classes? + size_t literalCount() const; + + //! Useful for converting a string literal into a regex that will match + //! it + static std::string escapeRegexSpecial(const std::string& literal); + +private: + bool m_Initialised; + boost::regex m_Regex; +}; } } diff --git a/include/core/CRegexFilter.h b/include/core/CRegexFilter.h index cefd5a7320..b924c29f19 100644 --- a/include/core/CRegexFilter.h +++ b/include/core/CRegexFilter.h @@ -13,12 +13,8 @@ #include #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! Filters strings based on a list of regular expressions @@ -29,29 +25,27 @@ namespace core //! will iteratively apply each regex to the string until no //! match can be found and it will remove all matched substrings. //! -class CORE_EXPORT CRegexFilter -{ - public: - using TRegexVec = std::vector; - using TStrVec = std::vector; - - public: - CRegexFilter(); - - //! Configures the filter for the given \p regularExpressions. - bool configure(const TStrVec ®ularExpressions); - - //! Applies the filter to \p target. - std::string apply(const std::string &target) const; - - //! Returns true if the filter is empty. - bool empty() const; - private: - //! The regular expressions comprising the filter. - TRegexVec m_Regex; -}; +class CORE_EXPORT CRegexFilter { +public: + using TRegexVec = std::vector; + using TStrVec = std::vector; + +public: + CRegexFilter(); + //! Configures the filter for the given \p regularExpressions. + bool configure(const TStrVec& regularExpressions); + //! Applies the filter to \p target. + std::string apply(const std::string& target) const; + + //! Returns true if the filter is empty. + bool empty() const; + +private: + //! The regular expressions comprising the filter. + TRegexVec m_Regex; +}; } } diff --git a/include/core/CResourceLocator.h b/include/core/CResourceLocator.h index ccc4075360..0e00f95042 100644 --- a/include/core/CResourceLocator.h +++ b/include/core/CResourceLocator.h @@ -11,12 +11,8 @@ #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! Class to find the appropriate directories for various things. @@ -34,24 +30,20 @@ namespace core //! so the methods should not be called repeatedly. It is assumed that //! the classes using this class will do their own caching if required. //! -class CORE_EXPORT CResourceLocator : private CNonInstantiatable -{ - public: - //! Get the directory that stores resource files, e.g. timezone - //! details and dictionary words. - static std::string resourceDir(); - - //! Get the directory that stores log files. - static std::string logDir(); - - //! Get the root directory for the C++ section of the source tree. - //! (Obviously this should only be used in test code!) - static std::string cppRootDir(); +class CORE_EXPORT CResourceLocator : private CNonInstantiatable { +public: + //! Get the directory that stores resource files, e.g. timezone + //! details and dictionary words. + static std::string resourceDir(); + + //! Get the directory that stores log files. + static std::string logDir(); + + //! Get the root directory for the C++ section of the source tree. + //! (Obviously this should only be used in test code!) + static std::string cppRootDir(); }; - - } } #endif // INCLUDED_ml_core_CResourceLocator_h - diff --git a/include/core/CScopedFastLock.h b/include/core/CScopedFastLock.h index 0c66059de2..f53a9520a1 100644 --- a/include/core/CScopedFastLock.h +++ b/include/core/CScopedFastLock.h @@ -9,11 +9,8 @@ #include #include - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CFastMutex; //! \brief @@ -25,22 +22,18 @@ class CFastMutex; //! IMPLEMENTATION DECISIONS:\n //! See Schmidt etc. for details. //! -class CORE_EXPORT CScopedFastLock : private CNonCopyable -{ - public: - //! Lock specified mutex - CScopedFastLock(CFastMutex &mutex); +class CORE_EXPORT CScopedFastLock : private CNonCopyable { +public: + //! Lock specified mutex + CScopedFastLock(CFastMutex& mutex); - //! Unlock specified mutex - ~CScopedFastLock(); + //! Unlock specified mutex + ~CScopedFastLock(); - private: - CFastMutex &m_Mutex; +private: + CFastMutex& m_Mutex; }; - - } } #endif // INCLUDED_ml_core_CScopedFastLock_h - diff --git a/include/core/CScopedLock.h b/include/core/CScopedLock.h index e7294b50bc..7ff91c0842 100644 --- a/include/core/CScopedLock.h +++ b/include/core/CScopedLock.h @@ -9,11 +9,8 @@ #include #include - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CMutex; //! \brief @@ -25,22 +22,18 @@ class CMutex; //! IMPLEMENTATION DECISIONS:\n //! See Schmidt etc. for details. //! -class CORE_EXPORT CScopedLock : private CNonCopyable -{ - public: - //! Lock specified mutex - CScopedLock(CMutex &mutex); +class CORE_EXPORT CScopedLock : private CNonCopyable { +public: + //! Lock specified mutex + CScopedLock(CMutex& mutex); - //! Unlock specified mutex - ~CScopedLock(); + //! Unlock specified mutex + ~CScopedLock(); - private: - CMutex &m_Mutex; +private: + CMutex& m_Mutex; }; - - } } #endif // INCLUDED_ml_core_CScopedLock_h - diff --git a/include/core/CScopedRapidJsonPoolAllocator.h b/include/core/CScopedRapidJsonPoolAllocator.h index 54f55328f2..8fee1903e7 100644 --- a/include/core/CScopedRapidJsonPoolAllocator.h +++ b/include/core/CScopedRapidJsonPoolAllocator.h @@ -9,10 +9,8 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { //! \brief //! A scoped rapidjson memory allocator //! @@ -24,26 +22,17 @@ namespace core //! Remove the allocator from the writer and release resources on destruction. //! template -class CScopedRapidJsonPoolAllocator -{ - public: - //! \p allocatorName Unique identifier for the allocator - //! \p jsonOutputWriter JSON output writer that will make use of the allocator - CScopedRapidJsonPoolAllocator(const std::string &allocatorName, T &writer) - : m_Writer(writer) - { - m_Writer.pushAllocator(allocatorName); - } +class CScopedRapidJsonPoolAllocator { +public: + //! \p allocatorName Unique identifier for the allocator + //! \p jsonOutputWriter JSON output writer that will make use of the allocator + CScopedRapidJsonPoolAllocator(const std::string& allocatorName, T& writer) : m_Writer(writer) { m_Writer.pushAllocator(allocatorName); } - ~CScopedRapidJsonPoolAllocator() - { - m_Writer.popAllocator(); - } + ~CScopedRapidJsonPoolAllocator() { m_Writer.popAllocator(); } - private: - T &m_Writer; +private: + T& m_Writer; }; - } } #endif // INCLUDED_ml_core_CScopedRapidJsonPoolAllocator_h diff --git a/include/core/CScopedReadLock.h b/include/core/CScopedReadLock.h index 9300f255e2..cbd5ec51f9 100644 --- a/include/core/CScopedReadLock.h +++ b/include/core/CScopedReadLock.h @@ -9,11 +9,8 @@ #include #include - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CReadWriteLock; //! \brief @@ -25,22 +22,18 @@ class CReadWriteLock; //! IMPLEMENTATION DECISIONS:\n //! See Schmidt etc. for details. //! -class CORE_EXPORT CScopedReadLock : private CNonCopyable -{ - public: - //! Read lock specified read/write lock - CScopedReadLock(CReadWriteLock &readWriteLock); +class CORE_EXPORT CScopedReadLock : private CNonCopyable { +public: + //! Read lock specified read/write lock + CScopedReadLock(CReadWriteLock& readWriteLock); - //! Unlock specified read/write lock - ~CScopedReadLock(); + //! Unlock specified read/write lock + ~CScopedReadLock(); - private: - CReadWriteLock &m_ReadWriteLock; +private: + CReadWriteLock& m_ReadWriteLock; }; - - } } #endif // INCLUDED_ml_core_CScopedReadLock_h - diff --git a/include/core/CScopedWriteLock.h b/include/core/CScopedWriteLock.h index 7258a3ca08..36dd63a823 100644 --- a/include/core/CScopedWriteLock.h +++ b/include/core/CScopedWriteLock.h @@ -9,11 +9,8 @@ #include #include - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CReadWriteLock; //! \brief @@ -25,22 +22,18 @@ class CReadWriteLock; //! IMPLEMENTATION DECISIONS:\n //! See Schmidt etc. for details. //! -class CORE_EXPORT CScopedWriteLock : private CNonCopyable -{ - public: - //! Write lock specified read/write lock - CScopedWriteLock(CReadWriteLock &readWriteLock); +class CORE_EXPORT CScopedWriteLock : private CNonCopyable { +public: + //! Write lock specified read/write lock + CScopedWriteLock(CReadWriteLock& readWriteLock); - //! Unlock specified read/write lock - ~CScopedWriteLock(); + //! Unlock specified read/write lock + ~CScopedWriteLock(); - private: - CReadWriteLock &m_ReadWriteLock; +private: + CReadWriteLock& m_ReadWriteLock; }; - - } } #endif // INCLUDED_ml_core_CScopedWriteLock_h - diff --git a/include/core/CSetEnv.h b/include/core/CSetEnv.h index 180f835fa7..60a5fe4f4d 100644 --- a/include/core/CSetEnv.h +++ b/include/core/CSetEnv.h @@ -9,12 +9,8 @@ #include #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! Portable wrapper for the setenv() function. @@ -27,17 +23,11 @@ namespace core //! _putenv_s() function with slightly different semantics to Unix's //! setenv(). //! -class CORE_EXPORT CSetEnv : private CNonInstantiatable -{ - public: - static int setEnv(const char *name, - const char *value, - int overwrite); +class CORE_EXPORT CSetEnv : private CNonInstantiatable { +public: + static int setEnv(const char* name, const char* value, int overwrite); }; - - } } #endif // INCLUDED_ml_core_CSetEnv_h - diff --git a/include/core/CSetMode.h b/include/core/CSetMode.h index 23d9d03f60..66647b73e0 100644 --- a/include/core/CSetMode.h +++ b/include/core/CSetMode.h @@ -9,11 +9,8 @@ #include #include -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! Portable wrapper for the Windows _setmode(fd, mode) function. @@ -21,26 +18,22 @@ namespace core //! DESCRIPTION:\n //! Set the input streams translation mode. //! Input streams can be in various text modes or binary. -//! In binary mode end-of-line translation does not take place +//! In binary mode end-of-line translation does not take place //! and ascii character 26 (end-of-transmission) is ignored. //! //! IMPLEMENTATION DECISIONS:\n -//! This has been broken into a class of its own because it is -//! a Windows specific function. There is no need for an equivalent -//! call on *nix as *nix input streams don't interperet ascii +//! This has been broken into a class of its own because it is +//! a Windows specific function. There is no need for an equivalent +//! call on *nix as *nix input streams don't interperet ascii //! character code 26 as end of transmission (the character send by //! pressing Ctrl D on Windows). //! -class CORE_EXPORT CSetMode : private CNonInstantiatable -{ - public: - static int setMode(int fd, int mode); - static int setBinaryMode(int fd); +class CORE_EXPORT CSetMode : private CNonInstantiatable { +public: + static int setMode(int fd, int mode); + static int setBinaryMode(int fd); }; - - } } #endif // INCLUDED_ml_core_CSetMode_h - diff --git a/include/core/CShellArgQuoter.h b/include/core/CShellArgQuoter.h index dfdc4e7b2b..31e7895ebb 100644 --- a/include/core/CShellArgQuoter.h +++ b/include/core/CShellArgQuoter.h @@ -11,12 +11,8 @@ #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! Quote a shell argument. @@ -33,16 +29,12 @@ namespace core //! and not the directory path that the environment variable //! expands to. //! -class CORE_EXPORT CShellArgQuoter : private CNonInstantiatable -{ - public: - //! Returns /tmp on Unix or an expansion of %TEMP% on Windows - static std::string quote(const std::string &arg); +class CORE_EXPORT CShellArgQuoter : private CNonInstantiatable { +public: + //! Returns /tmp on Unix or an expansion of %TEMP% on Windows + static std::string quote(const std::string& arg); }; - - } } #endif // INCLUDED_ml_core_CShellArgQuoter_h - diff --git a/include/core/CSleep.h b/include/core/CSleep.h index 3d54430546..975dd64752 100644 --- a/include/core/CSleep.h +++ b/include/core/CSleep.h @@ -11,12 +11,8 @@ #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! Functions related to sleeping @@ -37,29 +33,25 @@ namespace core //! milliseconds. The reason is that Windows can only sleep for a multiple of //! milliseconds, and we can't use functionality that's only available on Unix. //! -class CORE_EXPORT CSleep : private CNonInstantiatable -{ - public: - //! A processing delay that has been found (by trial and error) to slow - //! down a thread when required, but without causing unwanted MySQL - //! disconnections. - static const uint32_t DEFAULT_PROCESSING_DELAY; - - public: - //! Sleep for the given period of time. Be aware that the operating - //! system may round this up. Windows sleeps are multiples of 1/64 seconds, - //! i.e. multiples of 15.625 milliseconds. Basically, don't expect this to - //! be ultra-accurate. - static void sleep(uint32_t milliseconds); - - //! Delay processing for a period of time that has been observed to not - //! cause problems like database disconnections, socket overflows, etc. - static void delayProcessing(); +class CORE_EXPORT CSleep : private CNonInstantiatable { +public: + //! A processing delay that has been found (by trial and error) to slow + //! down a thread when required, but without causing unwanted MySQL + //! disconnections. + static const uint32_t DEFAULT_PROCESSING_DELAY; + +public: + //! Sleep for the given period of time. Be aware that the operating + //! system may round this up. Windows sleeps are multiples of 1/64 seconds, + //! i.e. multiples of 15.625 milliseconds. Basically, don't expect this to + //! be ultra-accurate. + static void sleep(uint32_t milliseconds); + + //! Delay processing for a period of time that has been observed to not + //! cause problems like database disconnections, socket overflows, etc. + static void delayProcessing(); }; - - } } #endif // INCLUDED_ml_core_CSleep_h - diff --git a/include/core/CSmallVector.h b/include/core/CSmallVector.h index 6c5116f2b5..42bf68a4db 100644 --- a/include/core/CSmallVector.h +++ b/include/core/CSmallVector.h @@ -17,54 +17,42 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { //! Map boost::container::small_vector_base for consistent naming. template using CSmallVectorBase = boost::container::small_vector_base; -namespace small_vector_detail -{ +namespace small_vector_detail { template -struct SPlusAssign -{ +struct SPlusAssign { static_assert(sizeof(T) < 0, "The contained type has no defined += operator"); }; template -struct SPlusAssign -{ - static void compute(CSmallVectorBase &lhs, const CSmallVectorBase &rhs) - { - for (std::size_t i = 0u; i < std::min(lhs.size(), rhs.size()); ++i) - { +struct SPlusAssign { + static void compute(CSmallVectorBase& lhs, const CSmallVectorBase& rhs) { + for (std::size_t i = 0u; i < std::min(lhs.size(), rhs.size()); ++i) { lhs[i] += rhs[i]; } } }; template -struct SMinusAssign -{ +struct SMinusAssign { static_assert(sizeof(T) < 0, "The contained type has no defined -= operator"); }; template -struct SMinusAssign -{ - static void compute(CSmallVectorBase &lhs, const CSmallVectorBase &rhs) - { - for (std::size_t i = 0u; i < std::min(lhs.size(), rhs.size()); ++i) - { +struct SMinusAssign { + static void compute(CSmallVectorBase& lhs, const CSmallVectorBase& rhs) { + for (std::size_t i = 0u; i < std::min(lhs.size(), rhs.size()); ++i) { lhs[i] -= rhs[i]; } } }; - } //! \brief This inherits from boost::container::small_vector. @@ -83,88 +71,78 @@ struct SMinusAssign //! \tparam N The maximum number of elements which are stored on //! the stack. template -class CSmallVector : public boost::container::small_vector -{ - private: - using TBase = boost::container::small_vector; - - public: - // Forward typedefs - using value_type = typename TBase::value_type; - using allocator_type = typename TBase::allocator_type; - using reference = typename TBase::reference; - using const_reference = typename TBase::const_reference; - using pointer = typename TBase::pointer; - using const_pointer = typename TBase::const_pointer; - using difference_type = typename TBase::difference_type; - using size_type = typename TBase::size_type; - using iterator = typename TBase::iterator; - using const_iterator = typename TBase::const_iterator; - using reverse_iterator = typename TBase::reverse_iterator; - using const_reverse_iterator = typename TBase::const_reverse_iterator; - - public: - //! \name Constructors - //@{ - CSmallVector() {} - CSmallVector(const CSmallVector &other) : TBase(other) {} - CSmallVector(CSmallVector &&other) : TBase(std::move(other.baseRef())) {} - explicit CSmallVector(size_type n, const value_type &val = value_type()) : TBase(n, val) {} - CSmallVector(std::initializer_list list) : TBase(list.begin(), list.end()) {} - template - CSmallVector(ITR first, ITR last) : TBase(first, last) {} - template - CSmallVector(const CSmallVector &other) : TBase(other.begin(), other.end()) {} - template - CSmallVector(std::initializer_list list) : TBase(list.begin(), list.end()) {} - // Extend to construct implicitly from a vector. - template - CSmallVector(const std::vector &other) : TBase(other.begin(), other.end()) {} - - CSmallVector &operator=(CSmallVector &&rhs) - { - this->baseRef() = std::move(rhs.baseRef()); - return *this; - } - CSmallVector &operator=(const CSmallVector &rhs) - { - this->baseRef() = rhs.baseRef(); - return *this; - } +class CSmallVector : public boost::container::small_vector { +private: + using TBase = boost::container::small_vector; + +public: + // Forward typedefs + using value_type = typename TBase::value_type; + using allocator_type = typename TBase::allocator_type; + using reference = typename TBase::reference; + using const_reference = typename TBase::const_reference; + using pointer = typename TBase::pointer; + using const_pointer = typename TBase::const_pointer; + using difference_type = typename TBase::difference_type; + using size_type = typename TBase::size_type; + using iterator = typename TBase::iterator; + using const_iterator = typename TBase::const_iterator; + using reverse_iterator = typename TBase::reverse_iterator; + using const_reverse_iterator = typename TBase::const_reverse_iterator; + +public: + //! \name Constructors + //@{ + CSmallVector() {} + CSmallVector(const CSmallVector& other) : TBase(other) {} + CSmallVector(CSmallVector&& other) : TBase(std::move(other.baseRef())) {} + explicit CSmallVector(size_type n, const value_type& val = value_type()) : TBase(n, val) {} + CSmallVector(std::initializer_list list) : TBase(list.begin(), list.end()) {} + template + CSmallVector(ITR first, ITR last) : TBase(first, last) {} + template + CSmallVector(const CSmallVector& other) : TBase(other.begin(), other.end()) {} + template + CSmallVector(std::initializer_list list) : TBase(list.begin(), list.end()) {} + // Extend to construct implicitly from a vector. + template + CSmallVector(const std::vector& other) : TBase(other.begin(), other.end()) {} + + CSmallVector& operator=(CSmallVector&& rhs) { + this->baseRef() = std::move(rhs.baseRef()); + return *this; + } + CSmallVector& operator=(const CSmallVector& rhs) { + this->baseRef() = rhs.baseRef(); + return *this; + } - // Extend to convert implicitly to a vector. - inline operator std::vector () const - { - return std::vector(this->begin(), this->end()); - } + // Extend to convert implicitly to a vector. + inline operator std::vector() const { return std::vector(this->begin(), this->end()); } - // Non-standard plus assign for the case that T has operator+=. - const CSmallVector &operator+=(const CSmallVectorBase &rhs) - { - using MaybeTrue = typename boost::has_plus_assign::type; - small_vector_detail::SPlusAssign::compute(*this, rhs); - return *this; - } + // Non-standard plus assign for the case that T has operator+=. + const CSmallVector& operator+=(const CSmallVectorBase& rhs) { + using MaybeTrue = typename boost::has_plus_assign::type; + small_vector_detail::SPlusAssign::compute(*this, rhs); + return *this; + } - // Non-standard minus assign for the case that T has operator-=. - const CSmallVector &operator-=(const CSmallVectorBase &rhs) - { - using MaybeTrue = typename boost::has_minus_assign::type; - small_vector_detail::SMinusAssign::compute(*this, rhs); - return *this; - } + // Non-standard minus assign for the case that T has operator-=. + const CSmallVector& operator-=(const CSmallVectorBase& rhs) { + using MaybeTrue = typename boost::has_minus_assign::type; + small_vector_detail::SMinusAssign::compute(*this, rhs); + return *this; + } - private: - TBase &baseRef() { return *this; } - const TBase &baseRef() const { return *this; } +private: + TBase& baseRef() { return *this; } + const TBase& baseRef() const { return *this; } }; template -std::ostream &operator<<(std::ostream &o, const CSmallVector &v) -{ +std::ostream& operator<<(std::ostream& o, const CSmallVector& v) { return o << core::CContainerPrinter::print(v.begin(), v.end()); } - } } diff --git a/include/core/CStat.h b/include/core/CStat.h index 21b339fb09..36b70a7cf7 100644 --- a/include/core/CStat.h +++ b/include/core/CStat.h @@ -10,11 +10,8 @@ #include - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { //! \brief //! An atomic statistic object @@ -27,33 +24,31 @@ namespace core //! IMPLEMENTATION DECISIONS:\n //! Not copyable - it only makes sense to have one instance of this //! -class CORE_EXPORT CStat : private CNonCopyable -{ - public: - //! Default constructor - CStat(); +class CORE_EXPORT CStat : private CNonCopyable { +public: + //! Default constructor + CStat(); - //! Add the value 1 to this stat - void increment(); + //! Add the value 1 to this stat + void increment(); - //! Add some value to this stat - void increment(uint64_t value); + //! Add some value to this stat + void increment(uint64_t value); - //! Remove the value 1 from this stat - void decrement(); + //! Remove the value 1 from this stat + void decrement(); - //! Set the stat to this new value - void set(uint64_t value); + //! Set the stat to this new value + void set(uint64_t value); - //! Get the value of this stat - uint64_t value() const; + //! Get the value of this stat + uint64_t value() const; - private: - //! The counter value of this stat - std::atomic_uint_fast64_t m_Value; +private: + //! The counter value of this stat + std::atomic_uint_fast64_t m_Value; }; - } // core } // ml diff --git a/include/core/CStateCompressor.h b/include/core/CStateCompressor.h index 59633ad719..d83158bb67 100644 --- a/include/core/CStateCompressor.h +++ b/include/core/CStateCompressor.h @@ -14,10 +14,8 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CCompressOStream; @@ -41,124 +39,112 @@ class CCompressOStream; //! that downstream CDataAdder/CDataSearcher store will //! support strings of Base64 encoded data //! -class CORE_EXPORT CStateCompressor : public CDataAdder -{ +class CORE_EXPORT CStateCompressor : public CDataAdder { +public: + static const std::string COMPRESSED_ATTRIBUTE; + static const std::string END_OF_STREAM_ATTRIBUTE; + +public: + using TFilteredOutput = boost::iostreams::filtering_stream; + using TFilteredOutputP = boost::shared_ptr; + using TCompressOStreamP = boost::shared_ptr; + + // Implements the boost::iostreams Sink template interface + class CChunkFilter { public: - static const std::string COMPRESSED_ATTRIBUTE; - static const std::string END_OF_STREAM_ATTRIBUTE; + using char_type = char; - public: - using TFilteredOutput = boost::iostreams::filtering_stream; - using TFilteredOutputP = boost::shared_ptr; - using TCompressOStreamP = boost::shared_ptr; - - // Implements the boost::iostreams Sink template interface - class CChunkFilter - { - public: - using char_type = char; + //! Inform the filtering_stream owning object what this is capable of + struct category : public boost::iostreams::sink_tag, public boost::iostreams::closable_tag {}; - //! Inform the filtering_stream owning object what this is capable of - struct category : - public boost::iostreams::sink_tag, - public boost::iostreams::closable_tag - {}; + public: + //! Constructor + CChunkFilter(CDataAdder& adder); - public: - //! Constructor - CChunkFilter(CDataAdder &adder); + //! Interface method: accept n bytes from s + std::streamsize write(const char* s, std::streamsize n); - //! Interface method: accept n bytes from s - std::streamsize write(const char *s, std::streamsize n); + //! Interface method: flush the output and close the stream + void close(); - //! Interface method: flush the output and close the stream - void close(); + //! Set the search ID to use + void index(const std::string& index, const std::string& id); - //! Set the search ID to use - void index(const std::string &index, - const std::string &id); + //! True if all of the chunked writes were successful. + //! If one or any of the writes failed the result is false + bool allWritesSuccessful(); - //! True if all of the chunked writes were successful. - //! If one or any of the writes failed the result is false - bool allWritesSuccessful(); + //! How many compressed documents have been generated? + size_t numCompressedDocs() const; - //! How many compressed documents have been generated? - size_t numCompressedDocs() const; + private: + //! Handle the details of writing a stream of bytes to the internal + //! CDataAdder object + void writeInternal(const char* s, std::streamsize& written, std::streamsize& n); - private: - //! Handle the details of writing a stream of bytes to the internal - //! CDataAdder object - void writeInternal(const char *s, std::streamsize &written, std::streamsize &n); + //! Close stream - end the JSON output + void closeStream(bool isFinal); - //! Close stream - end the JSON output - void closeStream(bool isFinal); + private: + //! The underlying datastore + CDataAdder& m_Adder; - private: - //! The underlying datastore - CDataAdder &m_Adder; + //! The filtering_stream compressor given to external clients + CDataAdder::TOStreamP m_OStream; - //! The filtering_stream compressor given to external clients - CDataAdder::TOStreamP m_OStream; + //! The sequential document number currently being written to + std::size_t m_CurrentDocNum; - //! The sequential document number currently being written to - std::size_t m_CurrentDocNum; + //! The number of bytes written to the current CDataAdder stream + std::size_t m_BytesDone; - //! The number of bytes written to the current CDataAdder stream - std::size_t m_BytesDone; + //! The largest document size permitted by the downstream CDataAdder + std::size_t m_MaxDocSize; - //! The largest document size permitted by the downstream CDataAdder - std::size_t m_MaxDocSize; + //! The search index to use - set by the upstream CDataAdder + std::string m_Index; - //! The search index to use - set by the upstream CDataAdder - std::string m_Index; + //! The base ID + std::string m_BaseId; - //! The base ID - std::string m_BaseId; + //! true if all the writes were successfull + bool m_WritesSuccessful; + }; - //! true if all the writes were successfull - bool m_WritesSuccessful; - }; +public: + //! Constructor: take a reference to the underlying downstream datastore + CStateCompressor(CDataAdder& compressedAdder); - public: - //! Constructor: take a reference to the underlying downstream datastore - CStateCompressor(CDataAdder &compressedAdder); - - //! Add streamed data - return of NULL stream indicates failure. - //! Since the data to be written isn't known at the time this function - //! returns it is not possible to detect all error conditions - //! immediately. If the stream goes bad whilst being written to then - //! this also indicates failure. - //! As this class compresses incoming stream data, it is responsible for - //! dealing with the underlying storage layer, so only 1 stream will ever - //! be given out to clients. - virtual TOStreamP addStreamed(const std::string &index, - const std::string &id); - - //! Clients that get a stream using addStreamed() must call this - //! method one they've finished sending data to the stream. - //! They should set force to true. - //! Returns true if all of the chunked uploads were - //! successful - virtual bool streamComplete(TOStreamP &strm, - bool force); + //! Add streamed data - return of NULL stream indicates failure. + //! Since the data to be written isn't known at the time this function + //! returns it is not possible to detect all error conditions + //! immediately. If the stream goes bad whilst being written to then + //! this also indicates failure. + //! As this class compresses incoming stream data, it is responsible for + //! dealing with the underlying storage layer, so only 1 stream will ever + //! be given out to clients. + virtual TOStreamP addStreamed(const std::string& index, const std::string& id); - //! How many compressed documents have been generated? - size_t numCompressedDocs() const; + //! Clients that get a stream using addStreamed() must call this + //! method one they've finished sending data to the stream. + //! They should set force to true. + //! Returns true if all of the chunked uploads were + //! successful + virtual bool streamComplete(TOStreamP& strm, bool force); - private: + //! How many compressed documents have been generated? + size_t numCompressedDocs() const; - //! The chunking part of the iostreams filter chain - CChunkFilter m_FilterSink; +private: + //! The chunking part of the iostreams filter chain + CChunkFilter m_FilterSink; - //! The iostreams filter chain that handles compression/chunking - TFilteredOutputP m_OutFilter; + //! The iostreams filter chain that handles compression/chunking + TFilteredOutputP m_OutFilter; - TCompressOStreamP m_OutStream; + TCompressOStreamP m_OutStream; }; - - } } #endif // INCLUDED_ml_core_CStateCompressor_h - diff --git a/include/core/CStateDecompressor.h b/include/core/CStateDecompressor.h index 91e8dd04f0..d79971e266 100644 --- a/include/core/CStateDecompressor.h +++ b/include/core/CStateDecompressor.h @@ -11,13 +11,11 @@ #include -#include #include +#include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { //! \brief //! A CDataSearcher-derived class that decompresses chunked and compressed data @@ -41,149 +39,139 @@ namespace core //! Parses JSON from the downstream store, using a stream //! interface. //! -class CORE_EXPORT CStateDecompressor : public CDataSearcher -{ - public: - using TFilteredInput = boost::iostreams::filtering_stream; - using TFilteredInputP = boost::shared_ptr; - - static const std::string EMPTY_DATA; - - // Implements the boost::iostreams Source template interface - class CDechunkFilter - { - public: - using char_type = char; +class CORE_EXPORT CStateDecompressor : public CDataSearcher { +public: + using TFilteredInput = boost::iostreams::filtering_stream; + using TFilteredInputP = boost::shared_ptr; - //! Inform the filtering_stream owning object what this is capable of - struct category : - public boost::iostreams::source_tag - {}; + static const std::string EMPTY_DATA; - public: - //! Constructor - CDechunkFilter(CDataSearcher &searcher); - - //! Interface method: read up to n bytes from the downstream - //! datastore, decompress them and put them into s - std::streamsize read(char *s, std::streamsize n); - - //! Interface method: close the downstream stream - void close(); - - private: - //! Find the JSON header - //! Read until the array field CStateCompressor::COMPRESSED is found - bool readHeader(); + // Implements the boost::iostreams Source template interface + class CDechunkFilter { + public: + using char_type = char; - //! Manage the reading of bytes from the stream - void handleRead(char *s, std::streamsize n, std::streamsize &bytesDone); + //! Inform the filtering_stream owning object what this is capable of + struct category : public boost::iostreams::source_tag {}; - //! Write a footer at the end of the document or stream - std::streamsize endOfStream(char *s, std::streamsize n, std::streamsize bytesDone); + public: + //! Constructor + CDechunkFilter(CDataSearcher& searcher); - //! Parse the next json object - bool parseNext(); + //! Interface method: read up to n bytes from the downstream + //! datastore, decompress them and put them into s + std::streamsize read(char* s, std::streamsize n); - private: - //! Handler - //! for events fired by rapidjson during parsing. - //! Note: using the base handler, so we only need to implement what is needed - struct SRapidJsonHandler final : public rapidjson::BaseReaderHandler<> - { - bool Bool(bool b); - bool String(const char *str, rapidjson::SizeType length, bool); - bool StartObject(); - bool Key(const char *str, rapidjson::SizeType length, bool); - bool EndObject(rapidjson::SizeType); - bool StartArray(); - bool EndArray(rapidjson::SizeType); + //! Interface method: close the downstream stream + void close(); - enum ETokenType - { - E_TokenKey = 1, - E_TokenBool = 2, - E_TokenString = 3, - E_TokenObjectStart = 4, - E_TokenObjectEnd = 5, - E_TokenArrayStart = 6, - E_TokenArrayEnd = 7 - }; + private: + //! Find the JSON header + //! Read until the array field CStateCompressor::COMPRESSED is found + bool readHeader(); - //! the last token type extracted - ETokenType s_Type; + //! Manage the reading of bytes from the stream + void handleRead(char* s, std::streamsize n, std::streamsize& bytesDone); - //! the last string (c string) as pointer (only valid till next call) - const char *s_CompressedChunk; + //! Write a footer at the end of the document or stream + std::streamsize endOfStream(char* s, std::streamsize n, std::streamsize bytesDone); - //! the last string length (only valid till next call) - rapidjson::SizeType s_CompressedChunkLength; - }; + //! Parse the next json object + bool parseNext(); + private: + //! Handler + //! for events fired by rapidjson during parsing. + //! Note: using the base handler, so we only need to implement what is needed + struct SRapidJsonHandler final : public rapidjson::BaseReaderHandler<> { + bool Bool(bool b); + bool String(const char* str, rapidjson::SizeType length, bool); + bool StartObject(); + bool Key(const char* str, rapidjson::SizeType length, bool); + bool EndObject(rapidjson::SizeType); + bool StartArray(); + bool EndArray(rapidjson::SizeType); + + enum ETokenType { + E_TokenKey = 1, + E_TokenBool = 2, + E_TokenString = 3, + E_TokenObjectStart = 4, + E_TokenObjectEnd = 5, + E_TokenArrayStart = 6, + E_TokenArrayEnd = 7 + }; + + //! the last token type extracted + ETokenType s_Type; + + //! the last string (c string) as pointer (only valid till next call) + const char* s_CompressedChunk; + + //! the last string length (only valid till next call) + rapidjson::SizeType s_CompressedChunkLength; + }; - //! Has a valid document been seen? - bool m_Initialised; + //! Has a valid document been seen? + bool m_Initialised; - //! Has any data been written downstream? - bool m_SentData; + //! Has any data been written downstream? + bool m_SentData; - //! The downstream data store to read from - CDataSearcher &m_Searcher; + //! The downstream data store to read from + CDataSearcher& m_Searcher; - //! The stream given to clients to read from - CDataSearcher::TIStreamP m_IStream; + //! The stream given to clients to read from + CDataSearcher::TIStreamP m_IStream; - //! The sequential document number currently being written to - std::size_t m_CurrentDocNum; + //! The sequential document number currently being written to + std::size_t m_CurrentDocNum; - //! Have we read all the data possible from downstream? - bool m_EndOfStream; + //! Have we read all the data possible from downstream? + bool m_EndOfStream; - //! The search configuration parameter set by the upstream caller - std::string m_SearchString; + //! The search configuration parameter set by the upstream caller + std::string m_SearchString; - //! Wrapper around the downstream reader - boost::shared_ptr m_InputStreamWrapper; + //! Wrapper around the downstream reader + boost::shared_ptr m_InputStreamWrapper; - //! JSON reader for the downstream stream - boost::shared_ptr m_Reader; + //! JSON reader for the downstream stream + boost::shared_ptr m_Reader; - SRapidJsonHandler m_Handler; + SRapidJsonHandler m_Handler; - //! The offset into the current token that has been read - std::streamsize m_BufferOffset; + //! The offset into the current token that has been read + std::streamsize m_BufferOffset; - //! Level of nested objects, used to unwind later on. - size_t m_NestedLevel; - }; + //! Level of nested objects, used to unwind later on. + size_t m_NestedLevel; + }; - public: - //! Constructor - take a CDataSearcher for the downstream data store - CStateDecompressor(CDataSearcher &compressedSearcher); +public: + //! Constructor - take a CDataSearcher for the downstream data store + CStateDecompressor(CDataSearcher& compressedSearcher); - //! CDataSearcher interface method - transparently read compressed - //! data and return it in an uncompressed stream - virtual TIStreamP search(size_t currentDocNum, size_t limit); + //! CDataSearcher interface method - transparently read compressed + //! data and return it in an uncompressed stream + virtual TIStreamP search(size_t currentDocNum, size_t limit); - virtual void setStateRestoreSearch(const std::string &index); + virtual void setStateRestoreSearch(const std::string& index); - //! CDataSearcher interface method - specify the search strings to use - virtual void setStateRestoreSearch(const std::string &index, - const std::string &id); + //! CDataSearcher interface method - specify the search strings to use + virtual void setStateRestoreSearch(const std::string& index, const std::string& id); - private: - //! Reference to the downstream data store - CDataSearcher &m_Searcher; +private: + //! Reference to the downstream data store + CDataSearcher& m_Searcher; - //! The dechunker object - CDechunkFilter m_FilterSource; + //! The dechunker object + CDechunkFilter m_FilterSource; - //! The boost filtering_stream object that handles decompression - TFilteredInputP m_InFilter; + //! The boost filtering_stream object that handles decompression + TFilteredInputP m_InFilter; }; - } } #endif // INCLUDED_ml_core_CStateDecompressor_h - diff --git a/include/core/CStateMachine.h b/include/core/CStateMachine.h index 0738d2150c..2a8a484ea9 100644 --- a/include/core/CStateMachine.h +++ b/include/core/CStateMachine.h @@ -17,10 +17,8 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; @@ -64,175 +62,161 @@ class CStateRestoreTraverser; //! structure while it's being changed. Do not use this class in multithreaded //! code until this is fixed. The bug reference is: //! https://github.com/elastic/machine-learning-cpp/issues/10 -class CORE_EXPORT CStateMachine -{ - public: - using TSizeVec = std::vector; - using TSizeVecVec = std::vector; - using TStrVec = std::vector; +class CORE_EXPORT CStateMachine { +public: + using TSizeVec = std::vector; + using TSizeVecVec = std::vector; + using TStrVec = std::vector; + +public: + //! Set the number of machines we expect the program to use. + static void expectedNumberMachines(std::size_t number); + + //! Create a machine with a specified alphabet, set of states and + //! transition function and initialize its state to \p state. + //! + //! \note This can fail if the supplied data are inconsistent in + //! which case the state is set to bad. + static CStateMachine create(const TStrVec& alphabet, const TStrVec& states, const TSizeVecVec& transitionFunction, std::size_t state); + + //! \name Persistence + //@{ + //! Initialize by reading state from \p traverser. + bool acceptRestoreTraverser(CStateRestoreTraverser& traverser); + + //! Persist state by passing information to the supplied inserter. + void acceptPersistInserter(CStatePersistInserter& inserter) const; + //@} + + //! Check if the machine is bad, i.e. not a valid state machine. + bool bad() const; + + //! Apply \p symbol to the machine. + bool apply(std::size_t symbol); + + //! Get the current state of the machine. + std::size_t state() const; + + //! Print \p state. + std::string printState(std::size_t state) const; + + //! Print \p symbol. + std::string printSymbol(std::size_t symbol) const; + + //! Get a checksum of this object. + uint64_t checksum() const; + + //! Print all the state machines. + static std::size_t numberMachines(); + +protected: + //! Clear all machines (for test only). + static void clear(); + +private: + //! \brief The state of a single machine. + struct CORE_EXPORT SMachine { + SMachine(const TStrVec& alphabet, const TStrVec& states, const TSizeVecVec& transitionFunction); + SMachine(const SMachine& other); + + //! The alphabet of action symbols \f$\Sigma\f$. + TStrVec s_Alphabet; + //! The possible states \f$S\f$. + TStrVec s_States; + //! The transition table \f$\delta : \Sigma \times S \rightarrow S\f$. + TSizeVecVec s_TransitionFunction; + }; + + //! \brief A lightweight object to lookup a single machine. + struct CORE_EXPORT SLookupMachine : boost::equality_comparable2 { + SLookupMachine(const TStrVec& alphabet, const TStrVec& states, const TSizeVecVec& transitionFunction); + + //! Test if two machines are equal. + bool operator==(const SMachine& rhs) const; + + //! The alphabet of action symbols \f$\Sigma\f$. + const TStrVec& s_Alphabet; + //! The possible states \f$S\f$. + const TStrVec& s_States; + //! The transition table \f$\delta : \Sigma \times S \rightarrow S\f$. + const TSizeVecVec& s_TransitionFunction; + }; + + //! \brief A custom paired down std::deque like container. + //! + //! DESCRIPTION:\n + //! We have rather specific requirements for this container: + //! -# It must be (as) random access (as possible), + //! -# It must be possible for push_back to occur concurrently with + //! lookup of an existing item in the container. + //! + //! IMPLEMENTATION:\n + //! With std::deque implementations any invocation of operator[] can + //! fail if there is a concurrent push_back. The code using this class + //! ensures that it only ever asks for an element which already exists + //! in the container at the start of push_back. This is possible safely + //! with a std::list. However, since this also needs to be random access, + //! using a vanilla std::list, which has \f$O(N)\f$ complexity for + //! accessing the \f$N^{th}\f$ item and poor locality of reference, + //! is not suitable. Instead we prefer to use a list of preallocated + //! std::vectors, on which it is also safe to call push_back, for our + //! use case, provided doing so doesn't cause them to reallocate. + class CORE_EXPORT CMachineDeque { + private: + //! The default vector capacity. + static const std::size_t DEFAULT_CAPACITY = 20; public: - //! Set the number of machines we expect the program to use. - static void expectedNumberMachines(std::size_t number); - - //! Create a machine with a specified alphabet, set of states and - //! transition function and initialize its state to \p state. - //! - //! \note This can fail if the supplied data are inconsistent in - //! which case the state is set to bad. - static CStateMachine create(const TStrVec &alphabet, - const TStrVec &states, - const TSizeVecVec &transitionFunction, - std::size_t state); - - //! \name Persistence - //@{ - //! Initialize by reading state from \p traverser. - bool acceptRestoreTraverser(CStateRestoreTraverser &traverser); - - //! Persist state by passing information to the supplied inserter. - void acceptPersistInserter(CStatePersistInserter &inserter) const; - //@} - - //! Check if the machine is bad, i.e. not a valid state machine. - bool bad() const; - - //! Apply \p symbol to the machine. - bool apply(std::size_t symbol); + CMachineDeque(); - //! Get the current state of the machine. - std::size_t state() const; + //! Set the vector capacity to \p capacity. + void capacity(std::size_t capacity); - //! Print \p state. - std::string printState(std::size_t state) const; + //! Access the element at \p pos. + const SMachine& operator[](std::size_t pos) const; - //! Print \p symbol. - std::string printSymbol(std::size_t symbol) const; + //! Get the number of elements in this container. + std::size_t size() const; - //! Get a checksum of this object. - uint64_t checksum() const; + //! Add a new element to the back of the collection. + void push_back(const SMachine& machine); - //! Print all the state machines. - static std::size_t numberMachines(); - - protected: - //! Clear all machines (for test only). - static void clear(); - - private: - //! \brief The state of a single machine. - struct CORE_EXPORT SMachine - { - SMachine(const TStrVec &alphabet, - const TStrVec &states, - const TSizeVecVec &transitionFunction); - SMachine(const SMachine &other); - - //! The alphabet of action symbols \f$\Sigma\f$. - TStrVec s_Alphabet; - //! The possible states \f$S\f$. - TStrVec s_States; - //! The transition table \f$\delta : \Sigma \times S \rightarrow S\f$. - TSizeVecVec s_TransitionFunction; - }; - - //! \brief A lightweight object to lookup a single machine. - struct CORE_EXPORT SLookupMachine : boost::equality_comparable2 - { - SLookupMachine(const TStrVec &alphabet, - const TStrVec &states, - const TSizeVecVec &transitionFunction); - - //! Test if two machines are equal. - bool operator==(const SMachine &rhs) const; - - //! The alphabet of action symbols \f$\Sigma\f$. - const TStrVec &s_Alphabet; - //! The possible states \f$S\f$. - const TStrVec &s_States; - //! The transition table \f$\delta : \Sigma \times S \rightarrow S\f$. - const TSizeVecVec &s_TransitionFunction; - }; - - //! \brief A custom paired down std::deque like container. - //! - //! DESCRIPTION:\n - //! We have rather specific requirements for this container: - //! -# It must be (as) random access (as possible), - //! -# It must be possible for push_back to occur concurrently with - //! lookup of an existing item in the container. - //! - //! IMPLEMENTATION:\n - //! With std::deque implementations any invocation of operator[] can - //! fail if there is a concurrent push_back. The code using this class - //! ensures that it only ever asks for an element which already exists - //! in the container at the start of push_back. This is possible safely - //! with a std::list. However, since this also needs to be random access, - //! using a vanilla std::list, which has \f$O(N)\f$ complexity for - //! accessing the \f$N^{th}\f$ item and poor locality of reference, - //! is not suitable. Instead we prefer to use a list of preallocated - //! std::vectors, on which it is also safe to call push_back, for our - //! use case, provided doing so doesn't cause them to reallocate. - class CORE_EXPORT CMachineDeque - { - private: - //! The default vector capacity. - static const std::size_t DEFAULT_CAPACITY = 20; - - public: - CMachineDeque(); - - //! Set the vector capacity to \p capacity. - void capacity(std::size_t capacity); - - //! Access the element at \p pos. - const SMachine &operator[](std::size_t pos) const; - - //! Get the number of elements in this container. - std::size_t size() const; - - //! Add a new element to the back of the collection. - void push_back(const SMachine &machine); - - //! Remove all elements. - void clear(); - - private: - using TMachineVec = std::vector; - using TMachineVecList = std::list; - - private: - //! The vector capacity. - //! - //! \note This should be set to slightly more than the number - //! of distinct machines which are created by the program - //! which uses this class. - std::size_t m_Capacity; - - //! Get the number of available machines. - std::atomic m_NumberMachines; - - //! The actual machines. - TMachineVecList m_Machines; - }; + //! Remove all elements. + void clear(); private: - CStateMachine(); - - //! Try to find \p machine in the range [\p begin, \p end). - static std::size_t find(std::size_t begin, - std::size_t end, - const SLookupMachine &machine); + using TMachineVec = std::vector; + using TMachineVecList = std::list; private: - //! The machine identifier. - std::size_t m_Machine; - //! The current state of the machine. - std::size_t m_State; - //! A complete list of available machines. - static CMachineDeque ms_Machines; + //! The vector capacity. + //! + //! \note This should be set to slightly more than the number + //! of distinct machines which are created by the program + //! which uses this class. + std::size_t m_Capacity; + + //! Get the number of available machines. + std::atomic m_NumberMachines; + + //! The actual machines. + TMachineVecList m_Machines; + }; + +private: + CStateMachine(); + + //! Try to find \p machine in the range [\p begin, \p end). + static std::size_t find(std::size_t begin, std::size_t end, const SLookupMachine& machine); + +private: + //! The machine identifier. + std::size_t m_Machine; + //! The current state of the machine. + std::size_t m_State; + //! A complete list of available machines. + static CMachineDeque ms_Machines; }; - } } diff --git a/include/core/CStatePersistInserter.h b/include/core/CStatePersistInserter.h index 5efb1ce940..5bc61afe8c 100644 --- a/include/core/CStatePersistInserter.h +++ b/include/core/CStatePersistInserter.h @@ -13,12 +13,8 @@ #include #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! Abstract interface for persisting state. @@ -33,64 +29,50 @@ namespace core //! //! All values are stored as strings. //! -class CORE_EXPORT CStatePersistInserter : private CNonCopyable -{ +class CORE_EXPORT CStatePersistInserter : private CNonCopyable { +public: + //! Virtual destructor for abstract class + virtual ~CStatePersistInserter(); + + //! Store a name/value + virtual void insertValue(const std::string& name, const std::string& value) = 0; + + //! Store an arbitrary type that can be converted to a string + template + void insertValue(const std::string& name, const TYPE& value) { + this->insertValue(name, CStringUtils::typeToString(value)); + } + + //! Store a floating point number with a given level of precision + void insertValue(const std::string& name, double value, CIEEE754::EPrecision precision); + + //! Store a nested level of state, to be populated by the supplied + //! function or function object + template + void insertLevel(const std::string& name, FUNC f) { + CAutoLevel level(name, *this); + f(*this); + } + +protected: + //! Start a new level with the given name + virtual void newLevel(const std::string& name) = 0; + + //! End the current level + virtual void endLevel() = 0; + +private: + //! Class to implement RAII for moving to the next level + class CORE_EXPORT CAutoLevel : private CNonCopyable { public: - //! Virtual destructor for abstract class - virtual ~CStatePersistInserter(); - - //! Store a name/value - virtual void insertValue(const std::string &name, - const std::string &value) = 0; - - //! Store an arbitrary type that can be converted to a string - template - void insertValue(const std::string &name, - const TYPE &value) - { - this->insertValue(name, CStringUtils::typeToString(value)); - } - - //! Store a floating point number with a given level of precision - void insertValue(const std::string &name, - double value, - CIEEE754::EPrecision precision); - - //! Store a nested level of state, to be populated by the supplied - //! function or function object - template - void insertLevel(const std::string &name, - FUNC f) - { - CAutoLevel level(name, *this); - f(*this); - } - - protected: - //! Start a new level with the given name - virtual void newLevel(const std::string &name) = 0; - - //! End the current level - virtual void endLevel() = 0; + CAutoLevel(const std::string& name, CStatePersistInserter& inserter); + ~CAutoLevel(); private: - //! Class to implement RAII for moving to the next level - class CORE_EXPORT CAutoLevel : private CNonCopyable - { - public: - CAutoLevel(const std::string &name, - CStatePersistInserter &inserter); - ~CAutoLevel(); - - private: - CStatePersistInserter &m_Inserter; - }; - + CStatePersistInserter& m_Inserter; + }; }; - - } } #endif // INCLUDED_ml_core_CStatePersistInserter_h - diff --git a/include/core/CStateRestoreTraverser.h b/include/core/CStateRestoreTraverser.h index 3ab4cc6ef8..445e2acf56 100644 --- a/include/core/CStateRestoreTraverser.h +++ b/include/core/CStateRestoreTraverser.h @@ -13,12 +13,8 @@ #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! Abstract interface for restoring state. @@ -41,108 +37,98 @@ namespace core //! //! All values are returned as strings. //! -class CORE_EXPORT CStateRestoreTraverser : private CNonCopyable -{ - public: - CStateRestoreTraverser(); - - //! Virtual destructor for abstract class - virtual ~CStateRestoreTraverser(); - - //! Navigate to the next element at the current level, or return false - //! if there isn't one - virtual bool next() = 0; - - //! Does the current element have a sub-level? - virtual bool hasSubLevel() const = 0; - - //! Traverse the sub-level of the current element. The supplied - //! function will be called with the traverser pointing at the first - //! element of the sub-level. When the function returns, the traverser - //! will ascend back to the element at the higher level. The supplied - //! should return a bool and this will be passed on as the return value - //! of this method. - template - bool traverseSubLevel(FUNC f) - { - if (!this->hasSubLevel()) - { - return false; - } - - CAutoLevel level(*this); - try - { - return f(*this); - } - catch (const std::exception &e) - { - LOG_ERROR("Restoration failed: " << e.what()); - level.setBadState(); - return false; - } +class CORE_EXPORT CStateRestoreTraverser : private CNonCopyable { +public: + CStateRestoreTraverser(); + + //! Virtual destructor for abstract class + virtual ~CStateRestoreTraverser(); + + //! Navigate to the next element at the current level, or return false + //! if there isn't one + virtual bool next() = 0; + + //! Does the current element have a sub-level? + virtual bool hasSubLevel() const = 0; + + //! Traverse the sub-level of the current element. The supplied + //! function will be called with the traverser pointing at the first + //! element of the sub-level. When the function returns, the traverser + //! will ascend back to the element at the higher level. The supplied + //! should return a bool and this will be passed on as the return value + //! of this method. + template + bool traverseSubLevel(FUNC f) { + if (!this->hasSubLevel()) { + return false; } - //! Get the name of the current element - the returned reference is only - //! valid for as long as the traverser is pointing at the same element - virtual const std::string &name() const = 0; + CAutoLevel level(*this); + try { + return f(*this); + } catch (const std::exception& e) { + LOG_ERROR("Restoration failed: " << e.what()); + level.setBadState(); + return false; + } + } - //! Get the value of the current element - the returned reference is - //! only valid for as long as the traverser is pointing at the same - //! element - virtual const std::string &value() const = 0; + //! Get the name of the current element - the returned reference is only + //! valid for as long as the traverser is pointing at the same element + virtual const std::string& name() const = 0; - //! Has the end of the inputstream been reached? - virtual bool isEof() const = 0; + //! Get the value of the current element - the returned reference is + //! only valid for as long as the traverser is pointing at the same + //! element + virtual const std::string& value() const = 0; - //! Is the state document unintelligible? - bool haveBadState() const; + //! Has the end of the inputstream been reached? + virtual bool isEof() const = 0; - protected: - //! Set the bad state flag, which indicates that the state document was - //! unintelligible. - void setBadState(); + //! Is the state document unintelligible? + bool haveBadState() const; - //! Navigate to the start of the sub-level of the current element, or - //! return false if there isn't one - virtual bool descend() = 0; +protected: + //! Set the bad state flag, which indicates that the state document was + //! unintelligible. + void setBadState(); - //! Navigate to the element of the level above from which descend() was - //! called, or return false if there isn't a level above - virtual bool ascend() = 0; + //! Navigate to the start of the sub-level of the current element, or + //! return false if there isn't one + virtual bool descend() = 0; - private: - //! Class to implement RAII for traversing the next level down - class CORE_EXPORT CAutoLevel : private CNonCopyable - { - public: - CAutoLevel(CStateRestoreTraverser &traverser); - ~CAutoLevel(); + //! Navigate to the element of the level above from which descend() was + //! called, or return false if there isn't a level above + virtual bool ascend() = 0; - //! Set the bad state flag, called from an exception handler - //! further up, so that we don't try and read from the stream - //! in the destructor - void setBadState(); +private: + //! Class to implement RAII for traversing the next level down + class CORE_EXPORT CAutoLevel : private CNonCopyable { + public: + CAutoLevel(CStateRestoreTraverser& traverser); + ~CAutoLevel(); - private: - CStateRestoreTraverser &m_Traverser; + //! Set the bad state flag, called from an exception handler + //! further up, so that we don't try and read from the stream + //! in the destructor + void setBadState(); - //! Remember whether descent on construction succeeded - bool m_Descended; + private: + CStateRestoreTraverser& m_Traverser; - //! If a stream parsing error occurs, don't try and descend - //! in the destructor - bool m_BadState; - }; + //! Remember whether descent on construction succeeded + bool m_Descended; - private: - //! Flag that should be set when the state document is unintelligible. + //! If a stream parsing error occurs, don't try and descend + //! in the destructor bool m_BadState; -}; - + }; +private: + //! Flag that should be set when the state document is unintelligible. + bool m_BadState; +}; } } #endif // INCLUDED_ml_core_CStateRestoreTraverser_h - diff --git a/include/core/CStatistics.h b/include/core/CStatistics.h index 5b4e28f820..6cf21a38e6 100644 --- a/include/core/CStatistics.h +++ b/include/core/CStatistics.h @@ -6,27 +6,24 @@ #ifndef INCLUDED_ml_core_CStatistics_h #define INCLUDED_ml_core_CStatistics_h -#include #include #include +#include #include -#include #include +#include #include -namespace ml -{ -namespace stat_t -{ +namespace ml { +namespace stat_t { //! Changing the order of these enumeration values will corrupt persisted model //! state, so don't. Any new statistics should be added in the penultimate //! position in the enum, immediately before E_LastEnumStat. -enum EStatTypes -{ +enum EStatTypes { //! The number of new people not created in the data gatherer //! because there wasn't enough free resource E_NumberNewPeopleNotAllowed, @@ -91,11 +88,9 @@ enum EStatTypes //! This MUST be last E_LastEnumStat }; - } -namespace core -{ +namespace core { class CStatisticsServer; class CStateRestoreTraverser; @@ -113,39 +108,38 @@ class CStatePersistInserter; //! IMPLEMENTATION DECISIONS:\n //! A singleton class: there should only be one collection of global stats //! -class CORE_EXPORT CStatistics : private CNonCopyable -{ - public: - //! Singleton pattern - static CStatistics &instance(); +class CORE_EXPORT CStatistics : private CNonCopyable { +public: + //! Singleton pattern + static CStatistics& instance(); - //! Provide access to the relevant stat from the collection - static CStat &stat(int index); + //! Provide access to the relevant stat from the collection + static CStat& stat(int index); - //! \name Persistence - //@{ - //! Restore the static members of this class from persisted state - static bool staticsAcceptRestoreTraverser(CStateRestoreTraverser &traverser); + //! \name Persistence + //@{ + //! Restore the static members of this class from persisted state + static bool staticsAcceptRestoreTraverser(CStateRestoreTraverser& traverser); - //! Persist the static members of this class - static void staticsAcceptPersistInserter(CStatePersistInserter &inserter); - //@} + //! Persist the static members of this class + static void staticsAcceptPersistInserter(CStatePersistInserter& inserter); + //@} - private: - using TStatArray = boost::array; +private: + using TStatArray = boost::array; - private: - //! Constructor of a Singleton is private - CStatistics(); +private: + //! Constructor of a Singleton is private + CStatistics(); - //! The unique instance. - static CStatistics ms_Instance; + //! The unique instance. + static CStatistics ms_Instance; - //! Collection of statistics - TStatArray m_Stats; + //! Collection of statistics + TStatArray m_Stats; - //! Enabling printing out the current statistics. - friend CORE_EXPORT std::ostream &operator<<(std::ostream &o, const CStatistics &stats); + //! Enabling printing out the current statistics. + friend CORE_EXPORT std::ostream& operator<<(std::ostream& o, const CStatistics& stats); }; } // core diff --git a/include/core/CStopWatch.h b/include/core/CStopWatch.h index 06a4ef7654..891cf0264e 100644 --- a/include/core/CStopWatch.h +++ b/include/core/CStopWatch.h @@ -11,12 +11,8 @@ #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! Can be used for timing within a program @@ -33,54 +29,50 @@ namespace core //! 2^32 milliseconds is less than 50 days, and, in production, our //! processes should be able to run for longer than this. //! -class CORE_EXPORT CStopWatch -{ - public: - //! Construct a stop watch, optionally starting it immediately - CStopWatch(bool startRunning = false); +class CORE_EXPORT CStopWatch { +public: + //! Construct a stop watch, optionally starting it immediately + CStopWatch(bool startRunning = false); - //! Start the stop watch - void start(); + //! Start the stop watch + void start(); - //! Stop the stop watch and retrieve the accumulated reading - uint64_t stop(); + //! Stop the stop watch and retrieve the accumulated reading + uint64_t stop(); - //! Retrieve the accumulated reading from the stop watch without - //! stopping it. (Not const because it may trigger a reset if the - //! system clock has been adjusted.) - uint64_t lap(); + //! Retrieve the accumulated reading from the stop watch without + //! stopping it. (Not const because it may trigger a reset if the + //! system clock has been adjusted.) + uint64_t lap(); - //! Is the stop watch running? - bool isRunning() const; + //! Is the stop watch running? + bool isRunning() const; - //! Reset the stop watch, optionally starting it immediately - void reset(bool startRunning = false); + //! Reset the stop watch, optionally starting it immediately + void reset(bool startRunning = false); - private: - //! Calculate the difference between two monotonic times, with sanity - //! checking just in case the timer does go backwards somehow, and - //! return the answer in milliseconds - uint64_t calcDuration(); +private: + //! Calculate the difference between two monotonic times, with sanity + //! checking just in case the timer does go backwards somehow, and + //! return the answer in milliseconds + uint64_t calcDuration(); - private: - //! Is the stop watch currently running? - bool m_IsRunning; +private: + //! Is the stop watch currently running? + bool m_IsRunning; - //! Monotonic timer - should not go backwards if the user sets the clock - CMonotonicTime m_MonotonicTime; + //! Monotonic timer - should not go backwards if the user sets the clock + CMonotonicTime m_MonotonicTime; - //! Monotonic time (in milliseconds since some arbitrary time in the - //! past) when the stop watch was last started - uint64_t m_Start; + //! Monotonic time (in milliseconds since some arbitrary time in the + //! past) when the stop watch was last started + uint64_t m_Start; - //! Time (in milliseconds) accumulated over previous runs of the stop - //! watch since the last reset - uint64_t m_AccumulatedTime; + //! Time (in milliseconds) accumulated over previous runs of the stop + //! watch since the last reset + uint64_t m_AccumulatedTime; }; - - } } #endif // INCLUDED_ml_core_CStopWatch_h - diff --git a/include/core/CStoredStringPtr.h b/include/core/CStoredStringPtr.h index 00b978766d..ec109b24e2 100644 --- a/include/core/CStoredStringPtr.h +++ b/include/core/CStoredStringPtr.h @@ -14,11 +14,8 @@ #include #include - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { //! \brief //! A pointer-like proxy for strings owned by a string store. @@ -35,80 +32,79 @@ namespace core //! The private constructors make it hard to accidentally construct //! stored string pointers that are not managed by a string store. //! -class CORE_EXPORT CStoredStringPtr -{ - public: - //! NULL constructor. - CStoredStringPtr() noexcept; +class CORE_EXPORT CStoredStringPtr { +public: + //! NULL constructor. + CStoredStringPtr() noexcept; - void swap(CStoredStringPtr &other) noexcept; + void swap(CStoredStringPtr& other) noexcept; - //! Get a reference to the string. - const std::string &operator*() const noexcept; + //! Get a reference to the string. + const std::string& operator*() const noexcept; - //! Get a pointer to the string. - const std::string *operator->() const noexcept; + //! Get a pointer to the string. + const std::string* operator->() const noexcept; - //! Get a pointer to the string. - const std::string *get() const noexcept; + //! Get a pointer to the string. + const std::string* get() const noexcept; - //! Is the pointer non-NULL? - explicit operator bool() const noexcept; + //! Is the pointer non-NULL? + explicit operator bool() const noexcept; - //! Is there only one pointer for this stored string? - bool isUnique() const noexcept; + //! Is there only one pointer for this stored string? + bool isUnique() const noexcept; - //! Equality operator for NULL. - bool operator==(std::nullptr_t rhs) const noexcept; - bool operator!=(std::nullptr_t rhs) const noexcept; + //! Equality operator for NULL. + bool operator==(std::nullptr_t rhs) const noexcept; + bool operator!=(std::nullptr_t rhs) const noexcept; - //! Equality operator. - bool operator==(const CStoredStringPtr &rhs) const noexcept; - bool operator!=(const CStoredStringPtr &rhs) const noexcept; + //! Equality operator. + bool operator==(const CStoredStringPtr& rhs) const noexcept; + bool operator!=(const CStoredStringPtr& rhs) const noexcept; - //! Less than operator. - bool operator<(const CStoredStringPtr &rhs) const noexcept; + //! Less than operator. + bool operator<(const CStoredStringPtr& rhs) const noexcept; - //! Claim memory usage is 0 in the main memory usage calculation, on the - //! assumption that the actual memory usage will be accounted for in a - //! string store. - static bool dynamicSizeAlwaysZero() { return true; } + //! Claim memory usage is 0 in the main memory usage calculation, on the + //! assumption that the actual memory usage will be accounted for in a + //! string store. + static bool dynamicSizeAlwaysZero() { return true; } - //! Get the actual memory usage of the string. For use by the string - //! store. - std::size_t actualMemoryUsage() const; - void debugActualMemoryUsage(CMemoryUsage::TMemoryUsagePtr mem) const; + //! Get the actual memory usage of the string. For use by the string + //! store. + std::size_t actualMemoryUsage() const; + void debugActualMemoryUsage(CMemoryUsage::TMemoryUsagePtr mem) const; - //! These factory methods return a stored string pointer given a string. - //! They must only be used within string store classes that contain code - //! to account for memory usage outside of the main memory usage - //! calculation. - static CStoredStringPtr makeStoredString(const std::string &str); - static CStoredStringPtr makeStoredString(std::string &&str); + //! These factory methods return a stored string pointer given a string. + //! They must only be used within string store classes that contain code + //! to account for memory usage outside of the main memory usage + //! calculation. + static CStoredStringPtr makeStoredString(const std::string& str); + static CStoredStringPtr makeStoredString(std::string&& str); - private: - //! Non-NULL constructors are private to prevent accidental construction - //! outside of a string store. - explicit CStoredStringPtr(const std::string &str); - explicit CStoredStringPtr(std::string &&str); +private: + //! Non-NULL constructors are private to prevent accidental construction + //! outside of a string store. + explicit CStoredStringPtr(const std::string& str); + explicit CStoredStringPtr(std::string&& str); - private: - using TStrCPtr = boost::shared_ptr; +private: + using TStrCPtr = boost::shared_ptr; - //! The wrapped shared_ptr. - TStrCPtr m_String; + //! The wrapped shared_ptr. + TStrCPtr m_String; - friend CORE_EXPORT std::size_t hash_value(const CStoredStringPtr &); + friend CORE_EXPORT std::size_t hash_value(const CStoredStringPtr&); }; //! Hash function named such that it will work automatically with Boost //! unordered containers. CORE_EXPORT -std::size_t hash_value(const CStoredStringPtr &ptr); +std::size_t hash_value(const CStoredStringPtr& ptr); //! Swap for use by generic code. CORE_EXPORT -void swap(CStoredStringPtr &lhs, CStoredStringPtr &rhs); +void swap(CStoredStringPtr& lhs, CStoredStringPtr& rhs); } // core } // ml diff --git a/include/core/CStrCaseCmp.h b/include/core/CStrCaseCmp.h index c0a50a8c38..8b9e9af2b2 100644 --- a/include/core/CStrCaseCmp.h +++ b/include/core/CStrCaseCmp.h @@ -11,12 +11,8 @@ #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! Portable wrapper for the strcasecmp() function. @@ -29,16 +25,12 @@ namespace core //! This has been broken into a class of its own because Windows has a //! _stricmp() function whilst Unix has strcasecmp(). //! -class CORE_EXPORT CStrCaseCmp : private CNonInstantiatable -{ - public: - static int strCaseCmp(const char *s1, const char *s2); - static int strNCaseCmp(const char *s1, const char *s2, size_t n); +class CORE_EXPORT CStrCaseCmp : private CNonInstantiatable { +public: + static int strCaseCmp(const char* s1, const char* s2); + static int strNCaseCmp(const char* s1, const char* s2, size_t n); }; - - } } #endif // INCLUDED_ml_core_CStrCaseCmp_h - diff --git a/include/core/CStrFTime.h b/include/core/CStrFTime.h index 54ca59a4e3..728e1a7f8a 100644 --- a/include/core/CStrFTime.h +++ b/include/core/CStrFTime.h @@ -11,12 +11,8 @@ #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! Format the date/time struct into a string in the buffer buf, according to @@ -35,18 +31,11 @@ namespace core //! whereas Unix formats it as a numeric offset. We want the numeric offset on //! all platforms. //! -class CORE_EXPORT CStrFTime : private CNonInstantiatable -{ - public: - static size_t strFTime(char *buf, - size_t maxSize, - const char *format, - struct tm *tm); +class CORE_EXPORT CStrFTime : private CNonInstantiatable { +public: + static size_t strFTime(char* buf, size_t maxSize, const char* format, struct tm* tm); }; - - } } #endif // INCLUDED_ml_core_CStrFTime_h - diff --git a/include/core/CStrPTime.h b/include/core/CStrPTime.h index dbf9842f27..8aa53f7f3c 100644 --- a/include/core/CStrPTime.h +++ b/include/core/CStrPTime.h @@ -11,12 +11,8 @@ #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! Parse the date/time string in the buffer buf, according to the @@ -39,17 +35,11 @@ namespace core //! indicated by the %Z format, but (at least on Fedora 9) it doesn't. //! So Linux requires special handling for %Z too. //! -class CORE_EXPORT CStrPTime : private CNonInstantiatable -{ - public: - static char *strPTime(const char *buf, - const char *format, - struct tm *tm); +class CORE_EXPORT CStrPTime : private CNonInstantiatable { +public: + static char* strPTime(const char* buf, const char* format, struct tm* tm); }; - - } } #endif // INCLUDED_ml_core_CStrPTime_h - diff --git a/include/core/CStrPairFirstElementEqual.h b/include/core/CStrPairFirstElementEqual.h index f87bcd8ff9..f378c70bb8 100644 --- a/include/core/CStrPairFirstElementEqual.h +++ b/include/core/CStrPairFirstElementEqual.h @@ -11,12 +11,8 @@ #include #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! Specialised comparator for string pairs @@ -31,22 +27,17 @@ namespace core //! element of both pairs must be either a std::string, or convertible to a //! const std::string & (e.g. a boost::reference_wrapper). //! -class CORE_EXPORT CStrPairFirstElementEqual -{ - public: - template - bool operator()(const PAIR1 &pr1, const PAIR2 &pr2) - { - const std::string &pr1first = pr1.first; - const std::string &pr2first = pr2.first; - - return pr1first == pr2first; - } +class CORE_EXPORT CStrPairFirstElementEqual { +public: + template + bool operator()(const PAIR1& pr1, const PAIR2& pr2) { + const std::string& pr1first = pr1.first; + const std::string& pr2first = pr2.first; + + return pr1first == pr2first; + } }; - - } } #endif // INCLUDED_ml_core_CStrPairFirstElementEqual_h - diff --git a/include/core/CStrPairFirstElementLess.h b/include/core/CStrPairFirstElementLess.h index bec95fb480..07980966f8 100644 --- a/include/core/CStrPairFirstElementLess.h +++ b/include/core/CStrPairFirstElementLess.h @@ -11,12 +11,8 @@ #include #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! Specialised comparator for string pairs @@ -31,22 +27,17 @@ namespace core //! element of both pairs must be either a std::string, or convertible to a //! const std::string & (e.g. a boost::reference_wrapper). //! -class CORE_EXPORT CStrPairFirstElementLess -{ - public: - template - bool operator()(const PAIR1 &pr1, const PAIR2 &pr2) - { - const std::string &pr1first = pr1.first; - const std::string &pr2first = pr2.first; - - return pr1first < pr2first; - } +class CORE_EXPORT CStrPairFirstElementLess { +public: + template + bool operator()(const PAIR1& pr1, const PAIR2& pr2) { + const std::string& pr1first = pr1.first; + const std::string& pr2first = pr2.first; + + return pr1first < pr2first; + } }; - - } } #endif // INCLUDED_ml_core_CStrPairFirstElementLess_h - diff --git a/include/core/CStrTokR.h b/include/core/CStrTokR.h index 1db6b38ef5..a0cd547adb 100644 --- a/include/core/CStrTokR.h +++ b/include/core/CStrTokR.h @@ -9,12 +9,8 @@ #include #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! Portable wrapper for the strtok_r() function. @@ -26,15 +22,11 @@ namespace core //! This has been broken into a class of its own because Windows has a //! strtok_s() function rather than Unix's strtok_r(). //! -class CORE_EXPORT CStrTokR : private CNonInstantiatable -{ - public: - static char *strTokR(char *str, const char *sep, char **lasts); +class CORE_EXPORT CStrTokR : private CNonInstantiatable { +public: + static char* strTokR(char* str, const char* sep, char** lasts); }; - - } } #endif // INCLUDED_ml_core_CStrTokR_h - diff --git a/include/core/CStringCache.h b/include/core/CStringCache.h index 753d9f6648..3444f1b478 100644 --- a/include/core/CStringCache.h +++ b/include/core/CStringCache.h @@ -12,12 +12,8 @@ #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! A cache of strings that can be reused @@ -59,84 +55,77 @@ namespace core //! a non-zero hash, which would be hard to reproduce for a //! range of characters, hence we use our own hash algorithm. //! -class CORE_EXPORT CStringCache -{ +class CORE_EXPORT CStringCache { +public: + //! Constructor detects whether copy-on-write strings are in use + CStringCache(); + + //! Does the current platform use copy-on-write strings? If it + //! doesn't, it's probably best not to use any further functionality + //! of this class. + bool haveCopyOnWriteStrings() const; + + //! Look up a char pointer when the length is not known + const std::string& stringFor(const char* str); + + //! If the length is already known the hash calculation can be more + //! efficient + const std::string& stringFor(const char* str, size_t length); + +private: + //! String hash that uses the same formula as CCharPHash below. + //! Boost's hash function applied to an empty string returns a non-zero + //! hash, which would be hard to reproduce for a range of characters, + //! hence using a hand coded hash functor. + class CStrHash : public std::unary_function { public: - //! Constructor detects whether copy-on-write strings are in use - CStringCache(); - - //! Does the current platform use copy-on-write strings? If it - //! doesn't, it's probably best not to use any further functionality - //! of this class. - bool haveCopyOnWriteStrings() const; + size_t operator()(const std::string& str) const; + }; - //! Look up a char pointer when the length is not known - const std::string &stringFor(const char *str); + //! Class to hash a range of characters on construction to save + //! calculating the length in operator(). Does NOT construct a + //! temporary string object to create the hash. + class CCharPHash : public std::unary_function { + public: + //! Store the given hash + CCharPHash(const char* str, const char* end); - //! If the length is already known the hash calculation can be more - //! efficient - const std::string &stringFor(const char *str, size_t length); + //! Return the hash computed in the constructor regardless of + //! what argument is passed. + size_t operator()(const char*) const; private: - //! String hash that uses the same formula as CCharPHash below. - //! Boost's hash function applied to an empty string returns a non-zero - //! hash, which would be hard to reproduce for a range of characters, - //! hence using a hand coded hash functor. - class CStrHash : public std::unary_function - { - public: - size_t operator()(const std::string &str) const; - }; - - //! Class to hash a range of characters on construction to save - //! calculating the length in operator(). Does NOT construct a - //! temporary string object to create the hash. - class CCharPHash : public std::unary_function - { - public: - //! Store the given hash - CCharPHash(const char *str, const char *end); - - //! Return the hash computed in the constructor regardless of - //! what argument is passed. - size_t operator()(const char *) const; - - private: - size_t m_Hash; - }; - - //! Check for equality between a char pointer and a string without - //! constructing a temporary string - class CCharPStrEqual : public std::binary_function - { - public: - //! Cache the char pointer length to speed comparisons - CCharPStrEqual(size_t length); - - bool operator()(const char *lhs, const std::string &rhs) const; - - private: - size_t m_Length; - }; + size_t m_Hash; + }; - private: - //! Flag to record whether the current platform has copy-on-write - //! strings - bool m_HaveCopyOnWriteStrings; + //! Check for equality between a char pointer and a string without + //! constructing a temporary string + class CCharPStrEqual : public std::binary_function { + public: + //! Cache the char pointer length to speed comparisons + CCharPStrEqual(size_t length); - using TStrUSet = boost::unordered_set; - using TStrUSetCItr = TStrUSet::const_iterator; + bool operator()(const char* lhs, const std::string& rhs) const; - //! The cache of strings - TStrUSet m_Cache; + private: + size_t m_Length; + }; - //! String to return when passed a NULL pointer - static const std::string EMPTY_STRING; -}; +private: + //! Flag to record whether the current platform has copy-on-write + //! strings + bool m_HaveCopyOnWriteStrings; + + using TStrUSet = boost::unordered_set; + using TStrUSetCItr = TStrUSet::const_iterator; + //! The cache of strings + TStrUSet m_Cache; + //! String to return when passed a NULL pointer + static const std::string EMPTY_STRING; +}; } } #endif // INCLUDED_ml_core_CStringCache_h - diff --git a/include/core/CStringSimilarityTester.h b/include/core/CStringSimilarityTester.h index b683b41bec..7ecf18b008 100644 --- a/include/core/CStringSimilarityTester.h +++ b/include/core/CStringSimilarityTester.h @@ -22,12 +22,8 @@ class CStringSimilarityTesterTest; - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! Class to measure how similar strings are. @@ -67,456 +63,373 @@ namespace core //! The Levenshtein distance method CAN be used from multiple //! threads. //! -class CORE_EXPORT CStringSimilarityTester : private CNonCopyable -{ - public: - //! Used by the simple Levenshtein distance algorithm - using TScopedSizeArray = boost::scoped_array; +class CORE_EXPORT CStringSimilarityTester : private CNonCopyable { +public: + //! Used by the simple Levenshtein distance algorithm + using TScopedSizeArray = boost::scoped_array; + + //! Used by the more advanced Berghel-Roach algorithm + using TScopedIntArray = boost::scoped_array; + using TScopedIntPArray = boost::scoped_array; + +public: + CStringSimilarityTester(); + + //! Calculate how similar two strings are + bool similarity(const std::string& first, const std::string& second, double& result) const; + + //! Calculate how similar two strings are in the case where + //! we already know their individual compressed lengths + bool + similarity(const std::string& first, size_t firstCompLength, const std::string& second, size_t secondCompLength, double& result) const; + + //! Remove those characters from a string that cause a provided + //! predicate to return true (can be used with ctype.h functions + //! like isalpha() etc, or with a functor) + template + std::string strippedString(const std::string& original, PREDICATE excludePred) const { + std::string stripped; + stripped.reserve(original.size()); + + std::remove_copy_if(original.begin(), original.end(), std::back_inserter(stripped), excludePred); + + return stripped; + } + + //! Calculate how similar two strings are, excluding + //! certain characters + template + bool similarityEx(const std::string& first, const std::string& second, PREDICATE excludePred, double& result) const { + return this->similarity(this->strippedString(first, excludePred), this->strippedString(second, excludePred), result); + } + + //! Find the length of the compressed version of a string - note + //! that the actual compressed version is discarded + bool compressedLengthOf(const std::string& str, size_t& length) const; + + //! Calculate the Levenshtein distance between two strings, + //! excluding certain characters + template + size_t levenshteinDistanceEx(const STRINGLIKE& first, const STRINGLIKE& second, PREDICATE excludePred) const { + return this->levenshteinDistance(this->strippedString(first, excludePred), this->strippedString(second, excludePred)); + } + + //! Calculate the Levenshtein distance between two strings or + //! string-like containers (including vector and deque). + //! Can be applied to any container that implements size() and + //! operator[]() where the elements held in the container implement + //! operator==(). + template + size_t levenshteinDistance(const STRINGLIKE& first, const STRINGLIKE& second) const { + // Levenshtein distance is the number of operations required to + // convert one string into another, where an operation means + // inserting 1 character, deleting 1 character or changing 1 + // character. + // + // There are some examples with pretty pictures of the matrix on + // Wikipedia. + // + // This has been generalised to the case of vectors, where this + // method calculates the number of operations to convert one vector + // to another. + + size_t firstLen(first.size()); + size_t secondLen(second.size()); + + // The Berghel-Roach algorithm works in time O(n + d ^ 2), and the + // simple algorithm works in time O(m * n), where the shorter + // sequence length is m, the longer sequence length is m and the + // edit distance is d. Therefore the Berghel-Roach algorithm is + // much faster when the two sequences are similar, but in the case + // of them being very different, it's slower. Additionally, each + // operation it performs is much slower than the simple algorithm, + // because it has to do more complex calculations to get the matrix + // cell values. Therefore, we need a heuristic of when to use the + // Berghel-Roach algorithm and when to use the simple algorithm. + // The chosen heuristic is that if the longer sequence is double the + // length of the shorter sequence, we'll use the simple algorithm. + + if (firstLen >= secondLen) { + // Rule out boundary case + if (secondLen == 0) { + return firstLen; + } + + if (firstLen >= secondLen * 2) { + return this->levenshteinDistanceSimple(second, first); + } - //! Used by the more advanced Berghel-Roach algorithm - using TScopedIntArray = boost::scoped_array; - using TScopedIntPArray = boost::scoped_array; - - public: - CStringSimilarityTester(); - - //! Calculate how similar two strings are - bool similarity(const std::string &first, - const std::string &second, - double &result) const; - - //! Calculate how similar two strings are in the case where - //! we already know their individual compressed lengths - bool similarity(const std::string &first, - size_t firstCompLength, - const std::string &second, - size_t secondCompLength, - double &result) const; - - //! Remove those characters from a string that cause a provided - //! predicate to return true (can be used with ctype.h functions - //! like isalpha() etc, or with a functor) - template - std::string strippedString(const std::string &original, - PREDICATE excludePred) const - { - std::string stripped; - stripped.reserve(original.size()); - - std::remove_copy_if(original.begin(), - original.end(), - std::back_inserter(stripped), - excludePred); - - return stripped; + return this->berghelRoachEditDistance(second, first); } - //! Calculate how similar two strings are, excluding - //! certain characters - template - bool similarityEx(const std::string &first, - const std::string &second, - PREDICATE excludePred, - double &result) const - { - return this->similarity(this->strippedString(first, excludePred), - this->strippedString(second, excludePred), - result); + if (secondLen >= firstLen * 2) { + // Rule out boundary case + if (firstLen == 0) { + return secondLen; + } + + return this->levenshteinDistanceSimple(first, second); } - //! Find the length of the compressed version of a string - note - //! that the actual compressed version is discarded - bool compressedLengthOf(const std::string &str, - size_t &length) const; - - //! Calculate the Levenshtein distance between two strings, - //! excluding certain characters - template - size_t levenshteinDistanceEx(const STRINGLIKE &first, - const STRINGLIKE &second, - PREDICATE excludePred) const - { - return this->levenshteinDistance(this->strippedString(first, excludePred), - this->strippedString(second, excludePred)); + return this->berghelRoachEditDistance(first, second); + } + + //! Calculate the weighted edit distance between two sequences. Each + //! element of each sequence has an associated weight, such that some + //! elements can be considered more expensive to add/remove/replace than + //! others. Can be applied to any container that implements size() and + //! operator[]() where the elements are std::pairs. The + //! first element of each pair must implement operator==(). + //! + //! Unfortunately, in the case of arbitrary weightings, the + //! Berghel-Roach algorithm cannot be applied. Ukkonen gives a + //! counter-example on page 114 of Information and Control, Vol 64, + //! Nos. 1-3, January/February/March 1985. The problem is that the + //! matrix diagonals are not necessarily monotonically increasing. + //! See http://www.cs.helsinki.fi/u/ukkonen/InfCont85.PDF + //! + //! TODO - It may be possible to apply some of the lesser optimisations + //! from section 2 of Ukkonen's paper to this algorithm. + template + size_t weightedEditDistance(const PAIRCONTAINER& first, const PAIRCONTAINER& second) const { + // This is similar to the levenshteinDistanceSimple() method below, + // but adding the concept of different costs for each element. If + // you are trying to understand this method, you should first make + // sure you fully understand the levenshteinDistance() method above + // (and the Wikipedia article referenced in it will help with that). + + size_t firstLen(first.size()); + size_t secondLen(second.size()); + + // Rule out boundary cases + if (firstLen == 0) { + size_t cost(0); + for (size_t index = 0; index < secondLen; ++index) { + cost += second[index].second; + } + return cost; } - //! Calculate the Levenshtein distance between two strings or - //! string-like containers (including vector and deque). - //! Can be applied to any container that implements size() and - //! operator[]() where the elements held in the container implement - //! operator==(). - template - size_t levenshteinDistance(const STRINGLIKE &first, - const STRINGLIKE &second) const - { - // Levenshtein distance is the number of operations required to - // convert one string into another, where an operation means - // inserting 1 character, deleting 1 character or changing 1 - // character. - // - // There are some examples with pretty pictures of the matrix on - // Wikipedia. - // - // This has been generalised to the case of vectors, where this - // method calculates the number of operations to convert one vector - // to another. - - size_t firstLen(first.size()); - size_t secondLen(second.size()); - - // The Berghel-Roach algorithm works in time O(n + d ^ 2), and the - // simple algorithm works in time O(m * n), where the shorter - // sequence length is m, the longer sequence length is m and the - // edit distance is d. Therefore the Berghel-Roach algorithm is - // much faster when the two sequences are similar, but in the case - // of them being very different, it's slower. Additionally, each - // operation it performs is much slower than the simple algorithm, - // because it has to do more complex calculations to get the matrix - // cell values. Therefore, we need a heuristic of when to use the - // Berghel-Roach algorithm and when to use the simple algorithm. - // The chosen heuristic is that if the longer sequence is double the - // length of the shorter sequence, we'll use the simple algorithm. - - if (firstLen >= secondLen) - { - // Rule out boundary case - if (secondLen == 0) - { - return firstLen; - } + if (secondLen == 0) { + size_t cost(0); + for (size_t index = 0; index < firstLen; ++index) { + cost += first[index].second; + } + return cost; + } - if (firstLen >= secondLen * 2) - { - return this->levenshteinDistanceSimple(second, first); - } + // We need to store two columns of the matrix, but allocate both in + // one go for efficiency. Then the current and previous column + // pointers alternate between pointing and the first and second half + // of the memory block. + using TScopedSizeArray = boost::scoped_array; + TScopedSizeArray data(new size_t[(secondLen + 1) * 2]); + size_t* currentCol(data.get()); + size_t* prevCol(currentCol + (secondLen + 1)); + + // Populate the left column + currentCol[0] = 0; + for (size_t downMinusOne = 0; downMinusOne < secondLen; ++downMinusOne) { + currentCol[downMinusOne + 1] = currentCol[downMinusOne] + second[downMinusOne].second; + } - return this->berghelRoachEditDistance(second, first); - } + // Calculate the other entries in the matrix + for (size_t acrossMinusOne = 0; acrossMinusOne < firstLen; ++acrossMinusOne) { + std::swap(currentCol, prevCol); + size_t firstCost(first[acrossMinusOne].second); + currentCol[0] = prevCol[0] + firstCost; - if (secondLen >= firstLen * 2) - { - // Rule out boundary case - if (firstLen == 0) - { - return secondLen; - } + for (size_t downMinusOne = 0; downMinusOne < secondLen; ++downMinusOne) { + size_t secondCost(second[downMinusOne].second); - return this->levenshteinDistanceSimple(first, second); + // There are 3 options, and due to the possible differences + // in the weightings, we must always evaluate all 3: + + // 1) Deletion => cell to the left's value plus cost of + // deleting the element from the first sequence + size_t option1(prevCol[downMinusOne + 1] + firstCost); + + // 2) Insertion => cell above's value plus cost of + // inserting the element from the second sequence + size_t option2(currentCol[downMinusOne] + secondCost); + + // 3) Substitution => cell above left's value plus the + // higher of the two element weights + // OR + // No extra cost in the case where the corresponding + // elements are equal + size_t option3(prevCol[downMinusOne] + + ((first[acrossMinusOne].first == second[downMinusOne].first) ? 0 : std::max(firstCost, secondCost))); + + // Take the cheapest option of the 3 + currentCol[downMinusOne + 1] = std::min(std::min(option1, option2), option3); } + } - return this->berghelRoachEditDistance(first, second); + // Result is the value in the bottom right hand corner of the matrix + return currentCol[secondLen]; + } + +private: + //! Calculate the Levenshtein distance using the naive method of + //! calculating the entire distance matrix. This private method + //! assumes that first.size() > 0 and second.size() > 0. However, + //! it's best if second.size() >= first.size() in addition. + template + size_t levenshteinDistanceSimple(const STRINGLIKE& first, const STRINGLIKE& second) const { + // This method implements the simple algorithm for calculating + // Levenshtein distance. + // + // There are some examples with pretty pictures of the matrix on + // Wikipedia here http://en.wikipedia.org/wiki/Levenshtein_distance + + // It's best if secondLen >= firstLen. Although this uses more + // space for the array below, the total number of calculations will + // be the same, but the bigger array will make compiler + // optimisations such as loop unrolling and vectorisation more + // beneficial. Most internet pages will recommend the opposite, + // i.e. allocate the two arrays based on the size of the smaller + // sequence, but we're more interested in speed than space. + size_t firstLen(first.size()); + size_t secondLen(second.size()); + + // We need to store two columns of the matrix, but allocate both in + // one go for efficiency. Then the current and previous column + // pointers alternate between pointing and the first and second half + // of the memory block. + TScopedSizeArray data(new size_t[(secondLen + 1) * 2]); + size_t* currentCol(data.get()); + size_t* prevCol(currentCol + (secondLen + 1)); + + // Populate the left column + for (size_t down = 0; down <= secondLen; ++down) { + currentCol[down] = down; } - //! Calculate the weighted edit distance between two sequences. Each - //! element of each sequence has an associated weight, such that some - //! elements can be considered more expensive to add/remove/replace than - //! others. Can be applied to any container that implements size() and - //! operator[]() where the elements are std::pairs. The - //! first element of each pair must implement operator==(). - //! - //! Unfortunately, in the case of arbitrary weightings, the - //! Berghel-Roach algorithm cannot be applied. Ukkonen gives a - //! counter-example on page 114 of Information and Control, Vol 64, - //! Nos. 1-3, January/February/March 1985. The problem is that the - //! matrix diagonals are not necessarily monotonically increasing. - //! See http://www.cs.helsinki.fi/u/ukkonen/InfCont85.PDF - //! - //! TODO - It may be possible to apply some of the lesser optimisations - //! from section 2 of Ukkonen's paper to this algorithm. - template - size_t weightedEditDistance(const PAIRCONTAINER &first, - const PAIRCONTAINER &second) const - { - // This is similar to the levenshteinDistanceSimple() method below, - // but adding the concept of different costs for each element. If - // you are trying to understand this method, you should first make - // sure you fully understand the levenshteinDistance() method above - // (and the Wikipedia article referenced in it will help with that). - - size_t firstLen(first.size()); - size_t secondLen(second.size()); - - // Rule out boundary cases - if (firstLen == 0) - { - size_t cost(0); - for (size_t index = 0; index < secondLen; ++index) - { - cost += second[index].second; - } - return cost; - } + // Calculate the other entries in the matrix + for (size_t acrossMinusOne = 0; acrossMinusOne < firstLen; ++acrossMinusOne) { + std::swap(currentCol, prevCol); + currentCol[0] = acrossMinusOne + 1; - if (secondLen == 0) - { - size_t cost(0); - for (size_t index = 0; index < firstLen; ++index) - { - cost += first[index].second; - } - return cost; - } + for (size_t downMinusOne = 0; downMinusOne < secondLen; ++downMinusOne) { + // Do the strings differ at the point we've reached? + if (first[acrossMinusOne] == second[downMinusOne]) { + // No, they're the same => no extra cost - // We need to store two columns of the matrix, but allocate both in - // one go for efficiency. Then the current and previous column - // pointers alternate between pointing and the first and second half - // of the memory block. - using TScopedSizeArray = boost::scoped_array; - TScopedSizeArray data(new size_t[(secondLen + 1) * 2]); - size_t *currentCol(data.get()); - size_t *prevCol(currentCol + (secondLen + 1)); - - // Populate the left column - currentCol[0] = 0; - for (size_t downMinusOne = 0; downMinusOne < secondLen; ++downMinusOne) - { - currentCol[downMinusOne + 1] = currentCol[downMinusOne] + second[downMinusOne].second; - } + currentCol[downMinusOne + 1] = prevCol[downMinusOne]; + } else { + // Yes, they differ, so there are 3 options: + + // 1) Deletion => cell to the left's value plus 1 + size_t option1(prevCol[downMinusOne + 1]); - // Calculate the other entries in the matrix - for (size_t acrossMinusOne = 0; acrossMinusOne < firstLen; ++acrossMinusOne) - { - std::swap(currentCol, prevCol); - size_t firstCost(first[acrossMinusOne].second); - currentCol[0] = prevCol[0] + firstCost; - - for (size_t downMinusOne = 0; downMinusOne < secondLen; ++downMinusOne) - { - size_t secondCost(second[downMinusOne].second); - - // There are 3 options, and due to the possible differences - // in the weightings, we must always evaluate all 3: - - // 1) Deletion => cell to the left's value plus cost of - // deleting the element from the first sequence - size_t option1(prevCol[downMinusOne + 1] + firstCost); - - // 2) Insertion => cell above's value plus cost of - // inserting the element from the second sequence - size_t option2(currentCol[downMinusOne] + secondCost); - - // 3) Substitution => cell above left's value plus the - // higher of the two element weights - // OR - // No extra cost in the case where the corresponding - // elements are equal - size_t option3(prevCol[downMinusOne] + - ((first[acrossMinusOne].first == second[downMinusOne].first) ? - 0 : - std::max(firstCost, secondCost))); + // 2) Insertion => cell above's value plus 1 + size_t option2(currentCol[downMinusOne]); + + // 3) Substitution => cell above left's value plus 1 + size_t option3(prevCol[downMinusOne]); // Take the cheapest option of the 3 - currentCol[downMinusOne + 1] = std::min(std::min(option1, - option2), - option3); + currentCol[downMinusOne + 1] = std::min(std::min(option1, option2), option3) + 1; } } + } - // Result is the value in the bottom right hand corner of the matrix - return currentCol[secondLen]; + // Result is the value in the bottom right hand corner of the matrix + return currentCol[secondLen]; + } + + //! Calculate the Levenshtein distance using the Berghel-Roach + //! algorithm, described at http://berghel.net/publications/asm/asm.pdf + //! This private method assumes that first.size() > 0 and + //! second.size() >= first.size(). + template + size_t berghelRoachEditDistance(const STRINGLIKE& first, const STRINGLIKE& second) const { + // We need to do the calculation using signed variables + int shortLen(static_cast(first.size())); + int maxDist(static_cast(second.size())); + + // Allocate the matrix memory, and setup pointers so that we can + // access it using negative arguments. This enables the + // implementation in this method to vaguely resemble the original + // paper. + TScopedIntArray dataArray; + TScopedIntPArray matrixArary; + int** matrix; + matrix = this->setupBerghelRoachMatrix(maxDist, dataArray, matrixArary); + if (matrix == 0) { + return 0; } - private: - //! Calculate the Levenshtein distance using the naive method of - //! calculating the entire distance matrix. This private method - //! assumes that first.size() > 0 and second.size() > 0. However, - //! it's best if second.size() >= first.size() in addition. - template - size_t levenshteinDistanceSimple(const STRINGLIKE &first, - const STRINGLIKE &second) const - { - // This method implements the simple algorithm for calculating - // Levenshtein distance. - // - // There are some examples with pretty pictures of the matrix on - // Wikipedia here http://en.wikipedia.org/wiki/Levenshtein_distance - - // It's best if secondLen >= firstLen. Although this uses more - // space for the array below, the total number of calculations will - // be the same, but the bigger array will make compiler - // optimisations such as loop unrolling and vectorisation more - // beneficial. Most internet pages will recommend the opposite, - // i.e. allocate the two arrays based on the size of the smaller - // sequence, but we're more interested in speed than space. - size_t firstLen(first.size()); - size_t secondLen(second.size()); - - // We need to store two columns of the matrix, but allocate both in - // one go for efficiency. Then the current and previous column - // pointers alternate between pointing and the first and second half - // of the memory block. - TScopedSizeArray data(new size_t[(secondLen + 1) * 2]); - size_t *currentCol(data.get()); - size_t *prevCol(currentCol + (secondLen + 1)); - - // Populate the left column - for (size_t down = 0; down <= secondLen; ++down) - { - currentCol[down] = down; + // The remaining code corresponds to the pseudo-code in the + // sub-section titled "The Driver Algorithm" on + // http://berghel.net/publications/asm/asm.pdf + + // k is the difference in lengths between the two sequences, i.e. + // the minimum distance + int k(maxDist - shortLen); + // p will end up storing the result + int p(k); + do { + int inc(p); + for (int tempP = 0; tempP < p; ++tempP, --inc) { + if (::abs(k - inc) <= tempP) { + this->calcDist(first, second, k - inc, tempP, matrix); + } + if (::abs(k + inc) <= tempP) { + this->calcDist(first, second, k + inc, tempP, matrix); + } } + this->calcDist(first, second, k, p, matrix); - // Calculate the other entries in the matrix - for (size_t acrossMinusOne = 0; acrossMinusOne < firstLen; ++acrossMinusOne) - { - std::swap(currentCol, prevCol); - currentCol[0] = acrossMinusOne + 1; - - for (size_t downMinusOne = 0; downMinusOne < secondLen; ++downMinusOne) - { - // Do the strings differ at the point we've reached? - if (first[acrossMinusOne] == second[downMinusOne]) - { - // No, they're the same => no extra cost - - currentCol[downMinusOne + 1] = prevCol[downMinusOne]; - } - else - { - // Yes, they differ, so there are 3 options: - - // 1) Deletion => cell to the left's value plus 1 - size_t option1(prevCol[downMinusOne + 1]); - - // 2) Insertion => cell above's value plus 1 - size_t option2(currentCol[downMinusOne]); - - // 3) Substitution => cell above left's value plus 1 - size_t option3(prevCol[downMinusOne]); - - // Take the cheapest option of the 3 - currentCol[downMinusOne + 1] = std::min(std::min(option1, - option2), - option3) + 1; - } - } + if (matrix[k][p] == shortLen) { + break; } + } while (++p < maxDist); - // Result is the value in the bottom right hand corner of the matrix - return currentCol[secondLen]; - } + return static_cast(p); + } - //! Calculate the Levenshtein distance using the Berghel-Roach - //! algorithm, described at http://berghel.net/publications/asm/asm.pdf - //! This private method assumes that first.size() > 0 and - //! second.size() >= first.size(). - template - size_t berghelRoachEditDistance(const STRINGLIKE &first, - const STRINGLIKE &second) const - { - // We need to do the calculation using signed variables - int shortLen(static_cast(first.size())); - int maxDist(static_cast(second.size())); - - // Allocate the matrix memory, and setup pointers so that we can - // access it using negative arguments. This enables the - // implementation in this method to vaguely resemble the original - // paper. - TScopedIntArray dataArray; - TScopedIntPArray matrixArary; - int **matrix; - matrix = this->setupBerghelRoachMatrix(maxDist, - dataArray, - matrixArary); - if (matrix == 0) - { - return 0; - } + //! Helper function for the Berghel-Roach edit distance algorithm. This + //! is called f(k, p) in http://berghel.net/publications/asm/asm.pdf + template + void calcDist(const STRINGLIKE& first, const STRINGLIKE& second, int row, int column, int** matrix) const { + // 1) Substitution + int option1(matrix[row][column - 1] + 1); - // The remaining code corresponds to the pseudo-code in the - // sub-section titled "The Driver Algorithm" on - // http://berghel.net/publications/asm/asm.pdf - - // k is the difference in lengths between the two sequences, i.e. - // the minimum distance - int k(maxDist - shortLen); - // p will end up storing the result - int p(k); - do - { - int inc(p); - for (int tempP = 0; tempP < p; ++tempP, --inc) - { - if (::abs(k - inc) <= tempP) - { - this->calcDist(first, - second, - k - inc, - tempP, - matrix); - } - if (::abs(k + inc) <= tempP) - { - this->calcDist(first, - second, - k + inc, - tempP, - matrix); - } - } - this->calcDist(first, second, k, p, matrix); + // NB: Unlike the Berghel-Roach paper, we DO NOT consider + // transposition at this point - if (matrix[k][p] == shortLen) - { - break; - } - } - while (++p < maxDist); + // 2) Insertion + int option2(matrix[row - 1][column - 1]); - return static_cast(p); - } + // 3) Deletion + int option3(matrix[row + 1][column - 1] + 1); - //! Helper function for the Berghel-Roach edit distance algorithm. This - //! is called f(k, p) in http://berghel.net/publications/asm/asm.pdf - template - void calcDist(const STRINGLIKE &first, - const STRINGLIKE &second, - int row, - int column, - int **matrix) const - { - // 1) Substitution - int option1(matrix[row][column - 1] + 1); - - // NB: Unlike the Berghel-Roach paper, we DO NOT consider - // transposition at this point - - // 2) Insertion - int option2(matrix[row - 1][column - 1]); - - // 3) Deletion - int option3(matrix[row + 1][column - 1] + 1); - - int t(std::max(std::max(option1, option2), option3)); - int limit(std::min(static_cast(first.size()), - static_cast(second.size()) - row)); - while (t < limit && first[t] == second[t + row]) - { - ++t; - } - matrix[row][column] = t; + int t(std::max(std::max(option1, option2), option3)); + int limit(std::min(static_cast(first.size()), static_cast(second.size()) - row)); + while (t < limit && first[t] == second[t + row]) { + ++t; } + matrix[row][column] = t; + } - //! Setup the matrices needed for the Berghel-Roach method of - //! calculating edit distance - static int **setupBerghelRoachMatrix(int longLen, - TScopedIntArray &data, - TScopedIntPArray &matrix); + //! Setup the matrices needed for the Berghel-Roach method of + //! calculating edit distance + static int** setupBerghelRoachMatrix(int longLen, TScopedIntArray& data, TScopedIntPArray& matrix); - private: - //! Required for initialisation of the Berghel-Roach matrix (don't call - //! this MINUS_INFINITY because that can clash with 3rd party macros) - static const int MINUS_INFINITE_INT; +private: + //! Required for initialisation of the Berghel-Roach matrix (don't call + //! this MINUS_INFINITY because that can clash with 3rd party macros) + static const int MINUS_INFINITE_INT; - //! Used by the compression-based similarity measures - mutable CCompressUtils m_Compressor; + //! Used by the compression-based similarity measures + mutable CCompressUtils m_Compressor; // For unit testing friend class ::CStringSimilarityTesterTest; }; - - } } #endif // INCLUDED_ml_core_CStringSimilarityTester_h - diff --git a/include/core/CStringUtils.h b/include/core/CStringUtils.h index 277d9a80af..8ff005ee3a 100644 --- a/include/core/CStringUtils.h +++ b/include/core/CStringUtils.h @@ -14,12 +14,8 @@ #include #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! A holder of string utility methods. @@ -29,265 +25,229 @@ namespace core //! //! IMPLEMENTATION DECISIONS:\n //! -class CORE_EXPORT CStringUtils : private CNonInstantiatable -{ - public: - //! We should only have one definition of whitespace across the whole - //! product - this definition matches what ::isspace() considers as - //! whitespace in the "C" locale - static const std::string WHITESPACE_CHARS; - - public: - using TStrVec = std::vector; - using TStrVecItr = TStrVec::iterator; - using TStrVecCItr = TStrVec::const_iterator; - - public: - //! If \p c is the start of a UTF-8 character, return the number of - //! bytes in the whole character. Otherwise (i.e. it's a continuation - //! character) return -1. - static int utf8ByteType(char c); - - //! Convert a type to a string - template - static std::string typeToString(const T &type) - { - return CStringUtils::_typeToString(type); +class CORE_EXPORT CStringUtils : private CNonInstantiatable { +public: + //! We should only have one definition of whitespace across the whole + //! product - this definition matches what ::isspace() considers as + //! whitespace in the "C" locale + static const std::string WHITESPACE_CHARS; + +public: + using TStrVec = std::vector; + using TStrVecItr = TStrVec::iterator; + using TStrVecCItr = TStrVec::const_iterator; + +public: + //! If \p c is the start of a UTF-8 character, return the number of + //! bytes in the whole character. Otherwise (i.e. it's a continuation + //! character) return -1. + static int utf8ByteType(char c); + + //! Convert a type to a string + template + static std::string typeToString(const T& type) { + return CStringUtils::_typeToString(type); + } + + //! Convert a double to a pretty string (single precision using %g formatting). + static std::string typeToStringPretty(double d); + + //! For types other than double, use the default conversions + template + static std::string typeToStringPretty(const T& type) { + return CStringUtils::_typeToString(type); + } + + //! Convert a double to a string with the specified precision + static std::string typeToStringPrecise(double d, CIEEE754::EPrecision precision); + + //! For types other than double, default conversions are precise + template + static std::string typeToStringPrecise(const T& type, CIEEE754::EPrecision /*precision*/) { + return CStringUtils::_typeToString(type); + } + + //! Convert a string to a type + template + static bool stringToType(const std::string& str, T& ret) { + return CStringUtils::_stringToType(false, str, ret); + } + + //! Convert a string to a type, and don't print an + //! error message if the conversion fails + template + static bool stringToTypeSilent(const std::string& str, T& ret) { + return CStringUtils::_stringToType(true, str, ret); + } + + //! Joins the strings in the container with the \p delimiter. + //! CONTAINER must be a container of std::string. + template + static std::string join(const CONTAINER& strings, const std::string& delimiter) { + if (strings.empty()) { + return std::string(); } - - //! Convert a double to a pretty string (single precision using %g formatting). - static std::string typeToStringPretty(double d); - - //! For types other than double, use the default conversions - template - static std::string typeToStringPretty(const T &type) - { - return CStringUtils::_typeToString(type); + std::size_t requiredSpace = computeStringLength(strings.begin(), strings.end()); + requiredSpace += (strings.size() - 1) * delimiter.length(); + if (requiredSpace == 0) { + return std::string(); } - - //! Convert a double to a string with the specified precision - static std::string typeToStringPrecise(double d, - CIEEE754::EPrecision precision); - - //! For types other than double, default conversions are precise - template - static std::string typeToStringPrecise(const T &type, - CIEEE754::EPrecision /*precision*/) - { - return CStringUtils::_typeToString(type); + std::string output; + output.reserve(requiredSpace); + CStringUtils::join(strings.begin(), strings.end(), delimiter, output); + return output; + } + + //! Joins the strings in the range with the \p delimiter. + //! ITR must be a forward iterator that dereferences to std::string. + template + static void join(ITR begin, ITR end, const std::string& delimiter, std::string& output) { + if (begin == end) { + return; } - - //! Convert a string to a type - template - static bool stringToType(const std::string &str, T &ret) - { - return CStringUtils::_stringToType(false, str, ret); - } - - //! Convert a string to a type, and don't print an - //! error message if the conversion fails - template - static bool stringToTypeSilent(const std::string &str, T &ret) - { - return CStringUtils::_stringToType(true, str, ret); - } - - //! Joins the strings in the container with the \p delimiter. - //! CONTAINER must be a container of std::string. - template - static std::string join(const CONTAINER &strings, const std::string &delimiter) - { - if (strings.empty()) - { - return std::string(); - } - std::size_t requiredSpace = computeStringLength(strings.begin(), strings.end()); - requiredSpace += (strings.size() - 1) * delimiter.length(); - if (requiredSpace == 0) - { - return std::string(); - } - std::string output; - output.reserve(requiredSpace); - CStringUtils::join(strings.begin(), strings.end(), delimiter, output); - return output; - } - - //! Joins the strings in the range with the \p delimiter. - //! ITR must be a forward iterator that dereferences to std::string. - template - static void join(ITR begin, - ITR end, - const std::string &delimiter, - std::string &output) - { - if (begin == end) - { - return; - } - for (;;) - { - output += *begin; - if (++begin == end) - { - break; - } - output += delimiter; + for (;;) { + output += *begin; + if (++begin == end) { + break; } + output += delimiter; } - - //! Convert a string to lower case - static std::string toLower(std::string str); - - //! Convert a string to upper case - static std::string toUpper(std::string str); - - //! How many times does word occur in str? - static size_t numMatches(const std::string &str, - const std::string &word); - - //! Trim whitespace characters from the beginning and end of a string - static void trimWhitespace(std::string &str); - - //! Trim certain characters from the beginning and end of a string - static void trim(const std::string &toTrim, - std::string &str); - - //! Replace adjacent whitespace characters with single spaces - static std::string normaliseWhitespace(const std::string &str); - - //! Find and replace a string within another string - static size_t replace(const std::string &from, - const std::string &to, - std::string &str); - - //! Find and replace the first occurrence (only) of a string within - //! another string - static size_t replaceFirst(const std::string &from, - const std::string &to, - std::string &str); - - //! Escape a specified set of characters in a string - static void escape(char escape, - const std::string &toEscape, - std::string &str); - - //! Remove a given escape character from a string - static void unEscape(char escape, std::string &str); - - //! Tokenise a std::string based on a delimiter. - //! This does NOT behave like strtok - it matches - //! the entire delimiter not just characters in it - static void tokenise(const std::string &delim, - const std::string &str, - TStrVec &tokens, - std::string &remainder); - - //! Find the longest common substring of two strings - static std::string longestCommonSubstr(const std::string &str1, - const std::string &str2); - - //! Find the longest common subsequence of two strings - static std::string longestCommonSubsequence(const std::string &str1, - const std::string &str2); - - //! Convert between wide and narrow strings. - //! There's currently no clever processing here for character set - //! conversion, so for non-ASCII characters the results won't be great. - //! TODO - Use a string library (e.g. ICU) to add support for sensible - //! conversion between different character sets. - static std::string wideToNarrow(const std::wstring &wideStr); - static std::wstring narrowToWide(const std::string &narrowStr); - - //! Get a locale object for character transformations - //! TODO - remove when we switch to a character conversion library - //! (e.g. ICU) - static const std::locale &locale(); - - private: - //! Internal calls for public templated methods - //! Important: These are implemented in terms of the built-in - //! types. The public templated methods will call the correct - //! one based on the actual underlying type for a given typedef. - //! For example, suppose time_t is a long on a particular - //! platform. The user calls typeToString passing a time_t - //! without caring what the underlying type is. Then the - //! compiler calls _typeToString(long) having translated the - //! typedef to its actual underlying type. But at no point - //! did the user have to know the underlying type. - //! In almost every other part of the code base, the built-in - //! types should not be used, as they restrict the ease with - //! which we could switch between 32 bit and 64 bit compilation. - //! Instead typedefs like uint##_t, int##_t, size_t, etc. should - //! be used. But because these methods are called from a - //! templated wrapper and selected by the compiler they're a - //! special case. - - static std::string _typeToString(const unsigned long long &); - static std::string _typeToString(const unsigned long &); - static std::string _typeToString(const unsigned int &); - static std::string _typeToString(const unsigned short &); - static std::string _typeToString(const long long &); - static std::string _typeToString(const long &); - static std::string _typeToString(const int &); - static std::string _typeToString(const short &); - static std::string _typeToString(const bool &); - - //! There's a function for double, but not float as we want to - //! discourage the use of float. - static std::string _typeToString(const double &); - - static std::string _typeToString(const char *); - static std::string _typeToString(const char &); - - //! This one seems silly, but it allows generic methods to be written - //! more easily - static const std::string &_typeToString(const std::string &str); - - static bool _stringToType(bool silent, const std::string &, unsigned long long &); - static bool _stringToType(bool silent, const std::string &, unsigned long &); - static bool _stringToType(bool silent, const std::string &, unsigned int &); - static bool _stringToType(bool silent, const std::string &, unsigned short &); - static bool _stringToType(bool silent, const std::string &, long long &); - static bool _stringToType(bool silent, const std::string &, long &); - static bool _stringToType(bool silent, const std::string &, int &); - static bool _stringToType(bool silent, const std::string &, short &); - - //! This bool converter accepts true/false and yes/no as well as - //! numeric values - static bool _stringToType(bool silent, const std::string &, bool &); - - //! There's a function for double, but not float as we want to - //! discourage the use of float. - static bool _stringToType(bool silent, const std::string &, double &); - - static bool _stringToType(bool silent, const std::string &, char &); - - //! This one seems silly, but it allows generic methods to be written - //! more easily - static bool _stringToType(bool, const std::string &, std::string &); - - template - static std::size_t computeStringLength(ITR begin, ITR end) - { - std::size_t length(0); - while (begin != end) - { - length += begin->length(); - ++begin; - } - return length; + } + + //! Convert a string to lower case + static std::string toLower(std::string str); + + //! Convert a string to upper case + static std::string toUpper(std::string str); + + //! How many times does word occur in str? + static size_t numMatches(const std::string& str, const std::string& word); + + //! Trim whitespace characters from the beginning and end of a string + static void trimWhitespace(std::string& str); + + //! Trim certain characters from the beginning and end of a string + static void trim(const std::string& toTrim, std::string& str); + + //! Replace adjacent whitespace characters with single spaces + static std::string normaliseWhitespace(const std::string& str); + + //! Find and replace a string within another string + static size_t replace(const std::string& from, const std::string& to, std::string& str); + + //! Find and replace the first occurrence (only) of a string within + //! another string + static size_t replaceFirst(const std::string& from, const std::string& to, std::string& str); + + //! Escape a specified set of characters in a string + static void escape(char escape, const std::string& toEscape, std::string& str); + + //! Remove a given escape character from a string + static void unEscape(char escape, std::string& str); + + //! Tokenise a std::string based on a delimiter. + //! This does NOT behave like strtok - it matches + //! the entire delimiter not just characters in it + static void tokenise(const std::string& delim, const std::string& str, TStrVec& tokens, std::string& remainder); + + //! Find the longest common substring of two strings + static std::string longestCommonSubstr(const std::string& str1, const std::string& str2); + + //! Find the longest common subsequence of two strings + static std::string longestCommonSubsequence(const std::string& str1, const std::string& str2); + + //! Convert between wide and narrow strings. + //! There's currently no clever processing here for character set + //! conversion, so for non-ASCII characters the results won't be great. + //! TODO - Use a string library (e.g. ICU) to add support for sensible + //! conversion between different character sets. + static std::string wideToNarrow(const std::wstring& wideStr); + static std::wstring narrowToWide(const std::string& narrowStr); + + //! Get a locale object for character transformations + //! TODO - remove when we switch to a character conversion library + //! (e.g. ICU) + static const std::locale& locale(); + +private: + //! Internal calls for public templated methods + //! Important: These are implemented in terms of the built-in + //! types. The public templated methods will call the correct + //! one based on the actual underlying type for a given typedef. + //! For example, suppose time_t is a long on a particular + //! platform. The user calls typeToString passing a time_t + //! without caring what the underlying type is. Then the + //! compiler calls _typeToString(long) having translated the + //! typedef to its actual underlying type. But at no point + //! did the user have to know the underlying type. + //! In almost every other part of the code base, the built-in + //! types should not be used, as they restrict the ease with + //! which we could switch between 32 bit and 64 bit compilation. + //! Instead typedefs like uint##_t, int##_t, size_t, etc. should + //! be used. But because these methods are called from a + //! templated wrapper and selected by the compiler they're a + //! special case. + + static std::string _typeToString(const unsigned long long&); + static std::string _typeToString(const unsigned long&); + static std::string _typeToString(const unsigned int&); + static std::string _typeToString(const unsigned short&); + static std::string _typeToString(const long long&); + static std::string _typeToString(const long&); + static std::string _typeToString(const int&); + static std::string _typeToString(const short&); + static std::string _typeToString(const bool&); + + //! There's a function for double, but not float as we want to + //! discourage the use of float. + static std::string _typeToString(const double&); + + static std::string _typeToString(const char*); + static std::string _typeToString(const char&); + + //! This one seems silly, but it allows generic methods to be written + //! more easily + static const std::string& _typeToString(const std::string& str); + + static bool _stringToType(bool silent, const std::string&, unsigned long long&); + static bool _stringToType(bool silent, const std::string&, unsigned long&); + static bool _stringToType(bool silent, const std::string&, unsigned int&); + static bool _stringToType(bool silent, const std::string&, unsigned short&); + static bool _stringToType(bool silent, const std::string&, long long&); + static bool _stringToType(bool silent, const std::string&, long&); + static bool _stringToType(bool silent, const std::string&, int&); + static bool _stringToType(bool silent, const std::string&, short&); + + //! This bool converter accepts true/false and yes/no as well as + //! numeric values + static bool _stringToType(bool silent, const std::string&, bool&); + + //! There's a function for double, but not float as we want to + //! discourage the use of float. + static bool _stringToType(bool silent, const std::string&, double&); + + static bool _stringToType(bool silent, const std::string&, char&); + + //! This one seems silly, but it allows generic methods to be written + //! more easily + static bool _stringToType(bool, const std::string&, std::string&); + + template + static std::size_t computeStringLength(ITR begin, ITR end) { + std::size_t length(0); + while (begin != end) { + length += begin->length(); + ++begin; } + return length; + } }; //! Macro to convert a pre-processor symbol to a string constant - has to be //! done in a macro unfortunately as the # operator is only recognised by the //! pre-processor. #define STRINGIFY_MACRO(str) (#str) - - } } #endif // INCLUDED_ml_core_CStringUtils_h - diff --git a/include/core/CThread.h b/include/core/CThread.h index 957359831e..054a9dc690 100644 --- a/include/core/CThread.h +++ b/include/core/CThread.h @@ -15,11 +15,8 @@ #include #endif - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { //! \brief //! Basic wrapper class around pthread_create etc. @@ -32,82 +29,78 @@ namespace core //! conditions when a thread is started and stopped in quick //! succession. //! -class CORE_EXPORT CThread : private CNonCopyable -{ - public: - //! Thread ID type +class CORE_EXPORT CThread : private CNonCopyable { +public: +//! Thread ID type #ifdef Windows - using TThreadId = DWORD; - using TThreadRet = unsigned int; + using TThreadId = DWORD; + using TThreadRet = unsigned int; #else - using TThreadId = pthread_t; - using TThreadRet = void*; + using TThreadId = pthread_t; + using TThreadRet = void*; #endif - public: - CThread(); - virtual ~CThread(); +public: + CThread(); + virtual ~CThread(); - //! Start the thread. It's an error to call this if the thread is - //! already running. - bool start(); + //! Start the thread. It's an error to call this if the thread is + //! already running. + bool start(); - //! Start the thread, retrieving the thread ID. It's an error to call - //! this if the thread is already running. - bool start(TThreadId &threadId); + //! Start the thread, retrieving the thread ID. It's an error to call + //! this if the thread is already running. + bool start(TThreadId& threadId); - //! Stop the thread. It's an error to call this if the thread is - //! already stopped. Only call one of stop() and waitForFinish(); do - //! NOT call both. - bool stop(); + //! Stop the thread. It's an error to call this if the thread is + //! already stopped. Only call one of stop() and waitForFinish(); do + //! NOT call both. + bool stop(); - //! This method blocks and waits for the thread to finish. - //! It differs from 'stop' as it doesn't call shutdown. - //! BE AWARE THIS MAY BLOCK INDEFINITELY. Only call one of stop() - //! and waitForFinish(); do NOT call both. - bool waitForFinish(); + //! This method blocks and waits for the thread to finish. + //! It differs from 'stop' as it doesn't call shutdown. + //! BE AWARE THIS MAY BLOCK INDEFINITELY. Only call one of stop() + //! and waitForFinish(); do NOT call both. + bool waitForFinish(); - //! Has the thread been started? - bool isStarted() const; + //! Has the thread been started? + bool isStarted() const; - //! Wake up any blocking IO calls in this thread, such as reads to named - //! pipes where nothing has connected to the other end of the pipe. - bool cancelBlockedIo(); + //! Wake up any blocking IO calls in this thread, such as reads to named + //! pipes where nothing has connected to the other end of the pipe. + bool cancelBlockedIo(); - //! Wake up any blocking IO calls in the specified thread, such as reads - //! to named pipes where nothing has connected to the other end of the - //! pipe. - static bool cancelBlockedIo(TThreadId threadId); + //! Wake up any blocking IO calls in the specified thread, such as reads + //! to named pipes where nothing has connected to the other end of the + //! pipe. + static bool cancelBlockedIo(TThreadId threadId); - //! Static method to get the ID of the currently running thread - static TThreadId currentThreadId(); + //! Static method to get the ID of the currently running thread + static TThreadId currentThreadId(); - protected: - //! The run() method should only be called from threadFunc() - virtual void run() = 0; - virtual void shutdown() = 0; +protected: + //! The run() method should only be called from threadFunc() + virtual void run() = 0; + virtual void shutdown() = 0; - private: - //! This method is used as a thread start function, hence it must be - //! static so that we can take its address like a free function - static TThreadRet STDCALL threadFunc(void *obj); +private: + //! This method is used as a thread start function, hence it must be + //! static so that we can take its address like a free function + static TThreadRet STDCALL threadFunc(void* obj); - private: - //! ID of the most recently started thread - TThreadId m_ThreadId; +private: + //! ID of the most recently started thread + TThreadId m_ThreadId; #ifdef Windows - //! Windows needs a thread handle as well as a thread ID - HANDLE m_ThreadHandle; + //! Windows needs a thread handle as well as a thread ID + HANDLE m_ThreadHandle; #endif - //! Mutex to protect access to m_ThreadId - mutable CMutex m_IdMutex; + //! Mutex to protect access to m_ThreadId + mutable CMutex m_IdMutex; }; - - } } #endif // INCLUDED_ml_core_CThread_h - diff --git a/include/core/CThreadFarm.h b/include/core/CThreadFarm.h index 25ca8c9d04..0feecfb9f2 100644 --- a/include/core/CThreadFarm.h +++ b/include/core/CThreadFarm.h @@ -7,8 +7,8 @@ #define INCLUDED_ml_core_CThreadFarm_h #include -#include #include +#include #include #include #include @@ -16,17 +16,13 @@ #include -#include #include +#include #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! A means to have multiple threads work on some input @@ -48,236 +44,184 @@ namespace core //! The result type must have both a default constructor and a copy constructor. //! template -class CThreadFarm : private CNonCopyable -{ - public: - CThreadFarm(HANDLER &handler, const std::string &name) - : m_Handler(handler), - m_Pending(0), - m_LastPrint(0), - m_MessagesAdded(0), - m_Started(false), - m_Name(name) - { +class CThreadFarm : private CNonCopyable { +public: + CThreadFarm(HANDLER& handler, const std::string& name) + : m_Handler(handler), m_Pending(0), m_LastPrint(0), m_MessagesAdded(0), m_Started(false), m_Name(name) {} + + virtual ~CThreadFarm() { + // Shared_ptr cleans up + } + + //! Add a processor + bool addProcessor(PROCESSOR& processor) { + if (m_Started == true) { + LOG_ERROR("Can't add receiver to running " << m_Name << " thread farm"); + return false; } - virtual ~CThreadFarm() - { - // Shared_ptr cleans up - } + TReceiverP receiver(new TReceiver(processor, *this)); - //! Add a processor - bool addProcessor(PROCESSOR &processor) - { - if (m_Started == true) - { - LOG_ERROR("Can't add receiver to running " << m_Name << - " thread farm"); - return false; - } + TMessageQueueP mq(new CMessageQueue(*receiver)); - TReceiverP receiver(new TReceiver(processor, *this)); + m_MessageQueues.push_back(mq); + m_Receivers.push_back(receiver); - TMessageQueueP mq(new CMessageQueue(*receiver)); + return true; + } - m_MessageQueues.push_back(mq); - m_Receivers.push_back(receiver); + //! Add some work, and find out how many results are pending + //! following the addition + bool addMessage(const MESSAGE& msg, size_t& pending) { + CScopedLock lock(m_Mutex); - return true; + if (m_Started == false) { + LOG_ERROR("Can't add message to the " << m_Name << " thread farm because it's not running. Call 'start'"); + return false; } - //! Add some work, and find out how many results are pending - //! following the addition - bool addMessage(const MESSAGE &msg, size_t &pending) - { - CScopedLock lock(m_Mutex); - - if (m_Started == false) - { - LOG_ERROR("Can't add message to the " << m_Name << - " thread farm because it's not running. Call 'start'"); - return false; - } + for (TMessageQueuePVecItr itr = m_MessageQueues.begin(); itr != m_MessageQueues.end(); ++itr) { + (*itr)->dispatchMsg(msg); + ++m_Pending; + } - for (TMessageQueuePVecItr itr = m_MessageQueues.begin(); - itr != m_MessageQueues.end(); - ++itr) - { - (*itr)->dispatchMsg(msg); - ++m_Pending; - } + ++m_MessagesAdded; + if (m_MessagesAdded % 1000 == 0) { + LOG_INFO("Added message " << m_MessagesAdded << " to the " << m_Name << " thread farm; pending count now " << m_Pending); + } - ++m_MessagesAdded; - if (m_MessagesAdded % 1000 == 0) - { - LOG_INFO("Added message " << m_MessagesAdded << " to the " << - m_Name << " thread farm; pending count now " << - m_Pending); - } + pending = m_Pending; - pending = m_Pending; + return true; + } - return true; - } + //! Add some work + bool addMessage(const MESSAGE& msg) { + size_t dummy = 0; + return this->addMessage(msg, dummy); + } - //! Add some work - bool addMessage(const MESSAGE &msg) - { - size_t dummy = 0; - return this->addMessage(msg, dummy); + //! Initialise - create the receiving threads + bool start() { + if (m_Started == true) { + LOG_ERROR("Can't start the " << m_Name << " thread farm because it's already running."); + return false; } - //! Initialise - create the receiving threads - bool start() - { - if (m_Started == true) - { - LOG_ERROR("Can't start the " << m_Name << - " thread farm because it's already running."); + size_t count(1); + for (TMessageQueuePVecItr itr = m_MessageQueues.begin(); itr != m_MessageQueues.end(); ++itr) { + if ((*itr)->start() == false) { + LOG_ERROR("Unable to start message queue " << count << " for the " << m_Name << " thread farm"); return false; } - size_t count(1); - for (TMessageQueuePVecItr itr = m_MessageQueues.begin(); - itr != m_MessageQueues.end(); - ++itr) - { - if ((*itr)->start() == false) - { - LOG_ERROR("Unable to start message queue " << count << - " for the " << m_Name << " thread farm"); - return false; - } - - ++count; - } + ++count; + } + + m_Started = true; - m_Started = true; + return true; + } - return true; + //! Shutdown - kill threads + bool stop() { + if (m_Started == false) { + LOG_ERROR("Can't stop the " << m_Name << " thread farm because it's not running."); + return false; } - //! Shutdown - kill threads - bool stop() - { - if (m_Started == false) - { - LOG_ERROR("Can't stop the " << m_Name << - " thread farm because it's not running."); + size_t count(1); + for (TMessageQueuePVecItr itr = m_MessageQueues.begin(); itr != m_MessageQueues.end(); ++itr) { + if ((*itr)->stop() == false) { + LOG_ERROR("Unable to stop message queue " << count << " for the " << m_Name << " thread farm"); return false; } - size_t count(1); - for (TMessageQueuePVecItr itr = m_MessageQueues.begin(); - itr != m_MessageQueues.end(); - ++itr) - { - if ((*itr)->stop() == false) - { - LOG_ERROR("Unable to stop message queue " << count << - " for the " << m_Name << " thread farm"); - return false; - } - - LOG_DEBUG("Stopped message queue " << count << - " for the " << m_Name << " thread farm"); - ++count; - } - - m_Started = false; + LOG_DEBUG("Stopped message queue " << count << " for the " << m_Name << " thread farm"); + ++count; + } - // Reset counters in case of restart - m_MessagesAdded = 0; - m_LastPrint = 0; + m_Started = false; - if (m_Pending != 0) - { - LOG_ERROR("Inconsistency - " << m_Pending << - " pending messages after stopping the " << m_Name << - " thread farm"); - m_Pending = 0; - } + // Reset counters in case of restart + m_MessagesAdded = 0; + m_LastPrint = 0; - return true; + if (m_Pending != 0) { + LOG_ERROR("Inconsistency - " << m_Pending << " pending messages after stopping the " << m_Name << " thread farm"); + m_Pending = 0; } - private: - //! This should only be called by our friend the CThreadFarmReceiver - //! otherwise the pending count will get messed up - void addResult(const RESULT &result) - { - CScopedLock lock(m_Mutex); - - if (m_Pending <= 0) - { - LOG_ERROR("Inconsistency - result added with " << m_Pending << - " pending messages in the " << m_Name << - " thread farm"); - return; - } + return true; + } - m_Handler.processResult(result); +private: + //! This should only be called by our friend the CThreadFarmReceiver + //! otherwise the pending count will get messed up + void addResult(const RESULT& result) { + CScopedLock lock(m_Mutex); - --m_Pending; + if (m_Pending <= 0) { + LOG_ERROR("Inconsistency - result added with " << m_Pending << " pending messages in the " << m_Name << " thread farm"); + return; + } - // Log how much work is outstanding every so often - if ((m_Pending % 10000) == 0 && m_Pending != m_LastPrint) - { - LOG_INFO("Pending count now " << m_Pending << " for the " << - m_Name << " thread farm"); - m_LastPrint = m_Pending; - } + m_Handler.processResult(result); - if (m_Pending == 0) - { - //m_Handler.allComplete(); - } + --m_Pending; + + // Log how much work is outstanding every so often + if ((m_Pending % 10000) == 0 && m_Pending != m_LastPrint) { + LOG_INFO("Pending count now " << m_Pending << " for the " << m_Name << " thread farm"); + m_LastPrint = m_Pending; + } + + if (m_Pending == 0) { + //m_Handler.allComplete(); } + } - private: - //! Reference to the object that will handle the results - HANDLER &m_Handler; +private: + //! Reference to the object that will handle the results + HANDLER& m_Handler; - using TThreadFarm = CThreadFarm; + using TThreadFarm = CThreadFarm; - using TReceiver = CThreadFarmReceiver; - using TReceiverP = boost::shared_ptr; - using TReceiverPVec = std::vector; - using TReceiverPVecItr = typename TReceiverPVec::iterator; + using TReceiver = CThreadFarmReceiver; + using TReceiverP = boost::shared_ptr; + using TReceiverPVec = std::vector; + using TReceiverPVecItr = typename TReceiverPVec::iterator; - using TMessageQueueP = boost::shared_ptr< CMessageQueue >; - using TMessageQueuePVec = std::vector; - using TMessageQueuePVecItr = typename TMessageQueuePVec::iterator; + using TMessageQueueP = boost::shared_ptr>; + using TMessageQueuePVec = std::vector; + using TMessageQueuePVecItr = typename TMessageQueuePVec::iterator; - TReceiverPVec m_Receivers; + TReceiverPVec m_Receivers; - //! We want the message queues destroyed before the receivers - TMessageQueuePVec m_MessageQueues; + //! We want the message queues destroyed before the receivers + TMessageQueuePVec m_MessageQueues; - //! How many results are pending? - size_t m_Pending; + //! How many results are pending? + size_t m_Pending; - //! What was the pending value when we last printed it? - uint64_t m_LastPrint; + //! What was the pending value when we last printed it? + uint64_t m_LastPrint; - //! How many messages have been added to the farm? - uint64_t m_MessagesAdded; + //! How many messages have been added to the farm? + uint64_t m_MessagesAdded; - //! Is the farm started? - bool m_Started; + //! Is the farm started? + bool m_Started; - //! Protect members from multi-threaded access - CMutex m_Mutex; + //! Protect members from multi-threaded access + CMutex m_Mutex; - //! Purely for better logging messages - std::string m_Name; + //! Purely for better logging messages + std::string m_Name; friend class CThreadFarmReceiver; }; - - } } #endif // INCLUDED_ml_core_CThreadFarm_h - diff --git a/include/core/CThreadFarmReceiver.h b/include/core/CThreadFarmReceiver.h index 16e0b8ed59..8c202c0fd5 100644 --- a/include/core/CThreadFarmReceiver.h +++ b/include/core/CThreadFarmReceiver.h @@ -8,12 +8,8 @@ #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! A receiver used by the CThreadFarm class @@ -26,37 +22,25 @@ namespace core //! Only stores a reference to the processor in case it's expensive to copy //! template -class CThreadFarmReceiver -{ - public: - CThreadFarmReceiver(PROCESSOR &processor, - THREADFARM &threadFarm) - : m_Processor(processor), - m_ThreadFarm(threadFarm) - { - } - - virtual ~CThreadFarmReceiver() - { - } - - void processMsg(const MESSAGE &msg, size_t /* backlog */) - { - RESULT result; - - m_Processor.msgToResult(msg, result); - - m_ThreadFarm.addResult(result); - } - - private: - PROCESSOR &m_Processor; - THREADFARM &m_ThreadFarm; -}; +class CThreadFarmReceiver { +public: + CThreadFarmReceiver(PROCESSOR& processor, THREADFARM& threadFarm) : m_Processor(processor), m_ThreadFarm(threadFarm) {} + + virtual ~CThreadFarmReceiver() {} + void processMsg(const MESSAGE& msg, size_t /* backlog */) { + RESULT result; + m_Processor.msgToResult(msg, result); + + m_ThreadFarm.addResult(result); + } + +private: + PROCESSOR& m_Processor; + THREADFARM& m_ThreadFarm; +}; } } #endif // INCLUDED_ml_core_CThreadFarmReceiver_h - diff --git a/include/core/CTicker.h b/include/core/CTicker.h index 172e4256c7..567fa4c7cc 100644 --- a/include/core/CTicker.h +++ b/include/core/CTicker.h @@ -12,12 +12,8 @@ #include #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! A generic class that calls a method 'tick' on the @@ -32,69 +28,54 @@ namespace core //! objects for this but there is complexity with scope. //! template -class CTicker : public CThread -{ - public: - //! Timeout is in milliseconds - CTicker(uint32_t timeOut, RECEIVER &receiver) - : m_Condition(m_Mutex), - m_Quit(false), - m_TimeOut(timeOut), - m_Receiver(receiver) - { +class CTicker : public CThread { +public: + //! Timeout is in milliseconds + CTicker(uint32_t timeOut, RECEIVER& receiver) : m_Condition(m_Mutex), m_Quit(false), m_TimeOut(timeOut), m_Receiver(receiver) {} + + //! Destructor will stop the ticker thread if it's already running + ~CTicker() { + if (this->isStarted()) { + this->stop(); } + } - //! Destructor will stop the ticker thread if it's already running - ~CTicker() - { - if (this->isStarted()) - { - this->stop(); - } - } - - protected: - void run() - { - CScopedLock lock(m_Mutex); - - while (!m_Quit) - { - m_Condition.wait(m_TimeOut); +protected: + void run() { + CScopedLock lock(m_Mutex); - // Call receiver - m_Receiver.tick(); - } + while (!m_Quit) { + m_Condition.wait(m_TimeOut); - // Reset quit flag to false in case we're restarted - m_Quit = false; + // Call receiver + m_Receiver.tick(); } - void shutdown() - { - CScopedLock lock(m_Mutex); + // Reset quit flag to false in case we're restarted + m_Quit = false; + } - m_Quit = true; - m_Condition.signal(); - } - - private: - CMutex m_Mutex; - CCondition m_Condition; + void shutdown() { + CScopedLock lock(m_Mutex); - //! Should the ticker quit? - bool m_Quit; + m_Quit = true; + m_Condition.signal(); + } - //! How often (in milliseconds) should the ticker tick? - uint32_t m_TimeOut; +private: + CMutex m_Mutex; + CCondition m_Condition; - //! Reference to the object whose tick() method will be called - RECEIVER &m_Receiver; -}; + //! Should the ticker quit? + bool m_Quit; + //! How often (in milliseconds) should the ticker tick? + uint32_t m_TimeOut; + //! Reference to the object whose tick() method will be called + RECEIVER& m_Receiver; +}; } } #endif // INCLUDED_ml_core_CTicker_h - diff --git a/include/core/CTimeGm.h b/include/core/CTimeGm.h index 22b7023cb4..247c5e85f9 100644 --- a/include/core/CTimeGm.h +++ b/include/core/CTimeGm.h @@ -11,12 +11,8 @@ #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! Convert tm into a time_t without making a timezone adjustment. @@ -28,15 +24,11 @@ namespace core //! //! IMPLEMENTATION DECISIONS:\n //! -class CORE_EXPORT CTimeGm : private CNonInstantiatable -{ - public: - static time_t timeGm(struct tm *ts); +class CORE_EXPORT CTimeGm : private CNonInstantiatable { +public: + static time_t timeGm(struct tm* ts); }; - - } } #endif // INCLUDED_ml_core_CTimeGm_h - diff --git a/include/core/CTimeUtils.h b/include/core/CTimeUtils.h index bb9c31c31a..0992d48da8 100644 --- a/include/core/CTimeUtils.h +++ b/include/core/CTimeUtils.h @@ -15,12 +15,8 @@ #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! A holder of time utility methods. @@ -33,90 +29,79 @@ namespace core //! Class consoidates time methods used throughout the Ml //! codebase. //! -class CORE_EXPORT CTimeUtils : private CNonInstantiatable -{ +class CORE_EXPORT CTimeUtils : private CNonInstantiatable { +public: + //! Maximum tolerable clock discrepancy between machines at the same + //! customer site + static const core_t::TTime MAX_CLOCK_DISCREPANCY; + +public: + //! Current time + static core_t::TTime now(); + + //! Date and time to string according to http://www.w3.org/TR/NOTE-datetime + //! E.g. 1997-07-16T19:20:30+01:00 + static std::string toIso8601(core_t::TTime t); + + //! Date and time to string according to local convention + static std::string toLocalString(core_t::TTime t); + + //! Time only to string + //! E.g. 19:20:30 + static std::string toTimeString(core_t::TTime t); + + //! Converts an epoch seconds timestamp to epoch millis + static int64_t toEpochMs(core_t::TTime t); + //! strptime interface + //! NOTE: the time returned here is a UTC value + static bool strptime(const std::string& format, const std::string& dateTime, core_t::TTime& preTime); + + //! Same strptime interface as above, but doesn't print any error messages + static bool strptimeSilent(const std::string& format, const std::string& dateTime, core_t::TTime& preTime); + + //! Is a given word a day of the week name, month name, or timezone + //! abbreviation in the current locale? Input should be trimmed of + //! whitespace before calling this function. + static bool isDateWord(const std::string& word); + +private: + //! Factor out common code from the three string conversion methods + static void toStringCommon(core_t::TTime t, const std::string& format, std::string& result); + +private: + //! Class to cache date words so that we don't have to repeatedly use + //! strptime() to check for them + class CDateWordCache { public: - //! Maximum tolerable clock discrepancy between machines at the same - //! customer site - static const core_t::TTime MAX_CLOCK_DISCREPANCY; + //! Get the singleton instance + static const CDateWordCache& instance(); - public: - //! Current time - static core_t::TTime now(); - - //! Date and time to string according to http://www.w3.org/TR/NOTE-datetime - //! E.g. 1997-07-16T19:20:30+01:00 - static std::string toIso8601(core_t::TTime t); - - //! Date and time to string according to local convention - static std::string toLocalString(core_t::TTime t); - - //! Time only to string - //! E.g. 19:20:30 - static std::string toTimeString(core_t::TTime t); - - //! Converts an epoch seconds timestamp to epoch millis - static int64_t toEpochMs(core_t::TTime t); - //! strptime interface - //! NOTE: the time returned here is a UTC value - static bool strptime(const std::string &format, - const std::string &dateTime, - core_t::TTime &preTime); - - //! Same strptime interface as above, but doesn't print any error messages - static bool strptimeSilent(const std::string &format, - const std::string &dateTime, - core_t::TTime &preTime); - - //! Is a given word a day of the week name, month name, or timezone - //! abbreviation in the current locale? Input should be trimmed of - //! whitespace before calling this function. - static bool isDateWord(const std::string &word); + //! Check if a word is a date word + bool isDateWord(const std::string& word) const; private: - //! Factor out common code from the three string conversion methods - static void toStringCommon(core_t::TTime t, - const std::string &format, - std::string &result); + //! Constructor for a singleton is private + CDateWordCache(); + ~CDateWordCache(); private: - //! Class to cache date words so that we don't have to repeatedly use - //! strptime() to check for them - class CDateWordCache - { - public: - //! Get the singleton instance - static const CDateWordCache &instance(); - - //! Check if a word is a date word - bool isDateWord(const std::string &word) const; - - private: - //! Constructor for a singleton is private - CDateWordCache(); - ~CDateWordCache(); - - private: - //! Protect the singleton's initialisation, preventing it from - //! being constructed simultaneously in different threads. - static CFastMutex ms_InitMutex; - - //! This pointer is set after the singleton object has been - //! constructed, and avoids the need to lock the mutex on - //! subsequent calls of the instance() method (once the updated - //! value of this variable has made its way into every thread). - static volatile CDateWordCache *ms_Instance; - - using TStrUSet = boost::unordered_set; - - //! Our cache of date words - TStrUSet m_DateWords; - }; -}; + //! Protect the singleton's initialisation, preventing it from + //! being constructed simultaneously in different threads. + static CFastMutex ms_InitMutex; + //! This pointer is set after the singleton object has been + //! constructed, and avoids the need to lock the mutex on + //! subsequent calls of the instance() method (once the updated + //! value of this variable has made its way into every thread). + static volatile CDateWordCache* ms_Instance; + using TStrUSet = boost::unordered_set; + + //! Our cache of date words + TStrUSet m_DateWords; + }; +}; } } #endif // INCLUDED_ml_core_CTimeUtils_h - diff --git a/include/core/CTimezone.h b/include/core/CTimezone.h index 063376c38a..aa84cd1b2a 100644 --- a/include/core/CTimezone.h +++ b/include/core/CTimezone.h @@ -17,12 +17,8 @@ #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! Portability wrapper to set the current time zone @@ -58,80 +54,76 @@ namespace core //! As a result, our implementation is imperfect for parsing historical data //! on Windows. Problems resulting from this are likely to be rare though. //! -class CORE_EXPORT CTimezone : private CNonCopyable -{ - public: - //! Get the singleton instance - static CTimezone &instance(); - - //! Get the name of the current timezone. This will be a POSIX name, - //! e.g. Europe/London or America/New_York, or, if the timezone has not - //! been changed from the system default, an empty string. - const std::string &timezoneName() const; - - //! Set the name of the timezone used by the C library functions (or our - //! replacements for them) - //! Example input: America/New_York - //! Europe/London - //! A blank string will cause the timezone of the machine - //! we're running on to be used (which the C library will determine - //! in an OS dependent manner). - bool timezoneName(const std::string &name); - - //! Convenience wrapper around the setter for timezone name - static bool setTimezone(const std::string &timezone); - - //! Abbreviation for standard time in the current timezone - std::string stdAbbrev() const; - - //! Abbreviation for daylight saving time in the current timezone - std::string dstAbbrev() const; - - //! Normalise a local time structure and also return the corresponding - //! epoch time (i.e. seconds since midnight on 1/1/1970 UTC). This - //! is a replacement for mktime() that switches to using Boost on - //! Windows when the program's timezone is different to the operating - //! system's timezone. - core_t::TTime localToUtc(struct tm &localTime) const; - - //! Convert a UTC time to local time in the current timezone. - bool utcToLocal(core_t::TTime utcTime, struct tm &localTime) const; - - //! Get the date fields. - bool dateFields(core_t::TTime utcTime, - int &daysSinceSunday, - int &dayOfMonth, - int &daysSinceJanuary1st, - int &monthsSinceJanuary, - int &yearsSince1900, - int &secondsSinceMidnight) const; - - private: - //! Constructor for a singleton is private. - CTimezone(); - ~CTimezone(); - - private: - //! Since there is one timezone for the whole program, access to it is - //! protected by this mutex - mutable CFastMutex m_Mutex; - - //! Name of the current timezone in use within this program, or blank to - //! use the current operating system settings - std::string m_Name; +class CORE_EXPORT CTimezone : private CNonCopyable { +public: + //! Get the singleton instance + static CTimezone& instance(); + + //! Get the name of the current timezone. This will be a POSIX name, + //! e.g. Europe/London or America/New_York, or, if the timezone has not + //! been changed from the system default, an empty string. + const std::string& timezoneName() const; + + //! Set the name of the timezone used by the C library functions (or our + //! replacements for them) + //! Example input: America/New_York + //! Europe/London + //! A blank string will cause the timezone of the machine + //! we're running on to be used (which the C library will determine + //! in an OS dependent manner). + bool timezoneName(const std::string& name); + + //! Convenience wrapper around the setter for timezone name + static bool setTimezone(const std::string& timezone); + + //! Abbreviation for standard time in the current timezone + std::string stdAbbrev() const; + + //! Abbreviation for daylight saving time in the current timezone + std::string dstAbbrev() const; + + //! Normalise a local time structure and also return the corresponding + //! epoch time (i.e. seconds since midnight on 1/1/1970 UTC). This + //! is a replacement for mktime() that switches to using Boost on + //! Windows when the program's timezone is different to the operating + //! system's timezone. + core_t::TTime localToUtc(struct tm& localTime) const; + + //! Convert a UTC time to local time in the current timezone. + bool utcToLocal(core_t::TTime utcTime, struct tm& localTime) const; + + //! Get the date fields. + bool dateFields(core_t::TTime utcTime, + int& daysSinceSunday, + int& dayOfMonth, + int& daysSinceJanuary1st, + int& monthsSinceJanuary, + int& yearsSince1900, + int& secondsSinceMidnight) const; + +private: + //! Constructor for a singleton is private. + CTimezone(); + ~CTimezone(); + +private: + //! Since there is one timezone for the whole program, access to it is + //! protected by this mutex + mutable CFastMutex m_Mutex; + + //! Name of the current timezone in use within this program, or blank to + //! use the current operating system settings + std::string m_Name; #ifdef Windows - //! Boost timezone database - boost::local_time::tz_database m_TimezoneDb; + //! Boost timezone database + boost::local_time::tz_database m_TimezoneDb; - //! Boost timezone database - boost::local_time::time_zone_ptr m_Timezone; + //! Boost timezone database + boost::local_time::time_zone_ptr m_Timezone; #endif }; - - } } #endif // INCLUDED_ml_core_CTimezone_h - diff --git a/include/core/CTriple.h b/include/core/CTriple.h index 9f4996f20f..d11110c03e 100644 --- a/include/core/CTriple.h +++ b/include/core/CTriple.h @@ -18,11 +18,8 @@ #include - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { //! \brief A tuple with three elements. //! @@ -34,95 +31,74 @@ namespace core //! meaning it can be used as a boost::unordered_map key provided the //! underlying types can be hashed using a boost::hasher. template -class CTriple : private boost::equality_comparable< CTriple, - boost::partially_ordered< CTriple > > -{ - public: - //! See CMemory. - static bool dynamicSizeAlwaysZero() - { - return memory_detail::SDynamicSizeAlwaysZero::value() - && memory_detail::SDynamicSizeAlwaysZero::value() - && memory_detail::SDynamicSizeAlwaysZero::value(); - } - - public: - CTriple() : first(), second(), third() {} - CTriple(const T1 &first_, const T2 &second_, const T3 &third_) : - first(first_), second(second_), third(third_) - { - } - - bool operator==(const CTriple &other) const - { - return first == other.first && second == other.second && third == other.third; - } - - bool operator<(const CTriple &other) const - { - if (first == other.first) - { - if (second == other.second) - { - return third < other.third; - } - return second < other.second; +class CTriple : private boost::equality_comparable, boost::partially_ordered>> { +public: + //! See CMemory. + static bool dynamicSizeAlwaysZero() { + return memory_detail::SDynamicSizeAlwaysZero::value() && memory_detail::SDynamicSizeAlwaysZero::value() && + memory_detail::SDynamicSizeAlwaysZero::value(); + } + +public: + CTriple() : first(), second(), third() {} + CTriple(const T1& first_, const T2& second_, const T3& third_) : first(first_), second(second_), third(third_) {} + + bool operator==(const CTriple& other) const { return first == other.first && second == other.second && third == other.third; } + + bool operator<(const CTriple& other) const { + if (first == other.first) { + if (second == other.second) { + return third < other.third; } - return first < other.first; - } - - std::size_t hash() const - { - std::size_t seed = 0; - boost::hash_combine(seed, first); - boost::hash_combine(seed, second); - boost::hash_combine(seed, third); - return seed; - } - - void debugMemoryUsage(CMemoryUsage::TMemoryUsagePtr mem) const - { - mem->setName("CTriple"); - CMemoryDebug::dynamicSize("first", first, mem); - CMemoryDebug::dynamicSize("second", second, mem); - CMemoryDebug::dynamicSize("third", third, mem); - } - - std::size_t memoryUsage() const - { - std::size_t mem = 0; - mem += CMemory::dynamicSize(first); - mem += CMemory::dynamicSize(second); - mem += CMemory::dynamicSize(third); - return mem; + return second < other.second; } - - public: - T1 first; - T2 second; - T3 third; + return first < other.first; + } + + std::size_t hash() const { + std::size_t seed = 0; + boost::hash_combine(seed, first); + boost::hash_combine(seed, second); + boost::hash_combine(seed, third); + return seed; + } + + void debugMemoryUsage(CMemoryUsage::TMemoryUsagePtr mem) const { + mem->setName("CTriple"); + CMemoryDebug::dynamicSize("first", first, mem); + CMemoryDebug::dynamicSize("second", second, mem); + CMemoryDebug::dynamicSize("third", third, mem); + } + + std::size_t memoryUsage() const { + std::size_t mem = 0; + mem += CMemory::dynamicSize(first); + mem += CMemory::dynamicSize(second); + mem += CMemory::dynamicSize(third); + return mem; + } + +public: + T1 first; + T2 second; + T3 third; }; template -CTriple make_triple(const T1 &first, const T2 &second, const T3 &third) -{ +CTriple make_triple(const T1& first, const T2& second, const T3& third) { return CTriple(first, second, third); } template -std::size_t hash_value(const CTriple &triple) -{ +std::size_t hash_value(const CTriple& triple) { return triple.hash(); } template -std::ostream &operator<<(std::ostream &o, const CTriple &triple) -{ +std::ostream& operator<<(std::ostream& o, const CTriple& triple) { return o << '(' << triple.first << ',' << triple.second << ',' << triple.third << ')'; } - } } #endif // INCLUDED_ml_core_CTriple_h - diff --git a/include/core/CUnSetEnv.h b/include/core/CUnSetEnv.h index a299b1d486..c073977fa1 100644 --- a/include/core/CUnSetEnv.h +++ b/include/core/CUnSetEnv.h @@ -9,12 +9,8 @@ #include #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! Portable wrapper for the unsetenv() function. @@ -28,15 +24,11 @@ namespace core //! library treats a request to set a variable to be empty as a //! request to delete it from the environment. //! -class CORE_EXPORT CUnSetEnv : private CNonInstantiatable -{ - public: - static int unSetEnv(const char *name); +class CORE_EXPORT CUnSetEnv : private CNonInstantiatable { +public: + static int unSetEnv(const char* name); }; - - } } #endif // INCLUDED_ml_core_CUnSetEnv_h - diff --git a/include/core/CUname.h b/include/core/CUname.h index 2e03a37775..c84887174b 100644 --- a/include/core/CUname.h +++ b/include/core/CUname.h @@ -11,12 +11,8 @@ #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! Portable wrapper for the uname() function. @@ -31,34 +27,30 @@ namespace core //! Each member of the utsname struct should be implemented as a //! separate method, as usually only one is required. //! -class CORE_EXPORT CUname : private CNonInstantiatable -{ - public: - //! uname -s - static std::string sysName(); - //! uname -n - static std::string nodeName(); - //! uname -r - static std::string release(); - //! uname -v - static std::string version(); - //! uname -m - static std::string machine(); - //! uname -a (or possibly a cut down version on some platforms) - static std::string all(); - - //! Return the platform name in the format - - //! e.g. linux-x86_64 - static std::string mlPlatform(); - - //! On Unix this is equivalent to uname -r; on Windows it's the - //! underlying Windows NT version. - static std::string mlOsVer(); +class CORE_EXPORT CUname : private CNonInstantiatable { +public: + //! uname -s + static std::string sysName(); + //! uname -n + static std::string nodeName(); + //! uname -r + static std::string release(); + //! uname -v + static std::string version(); + //! uname -m + static std::string machine(); + //! uname -a (or possibly a cut down version on some platforms) + static std::string all(); + + //! Return the platform name in the format - + //! e.g. linux-x86_64 + static std::string mlPlatform(); + + //! On Unix this is equivalent to uname -r; on Windows it's the + //! underlying Windows NT version. + static std::string mlOsVer(); }; - - } } #endif // INCLUDED_ml_core_CUname_h - diff --git a/include/core/CVectorRange.h b/include/core/CVectorRange.h index f8cc312ebc..d54838970c 100644 --- a/include/core/CVectorRange.h +++ b/include/core/CVectorRange.h @@ -11,51 +11,39 @@ #include #include -namespace ml -{ -namespace core -{ -template class CVectorRange; +namespace ml { +namespace core { +template +class CVectorRange; -namespace vector_range_detail -{ +namespace vector_range_detail { //! \brief Gets the reference type. template -struct SReferenceType -{ +struct SReferenceType { using type = typename VECTOR::reference; }; template -struct SReferenceType -{ +struct SReferenceType { using type = typename VECTOR::const_reference; }; //! \brief Gets the iterator type. template -struct SIteratorType -{ +struct SIteratorType { using type = typename VECTOR::iterator; }; template -struct SIteratorType -{ +struct SIteratorType { using type = typename VECTOR::const_iterator; }; //! \brief Implements assignment. template -struct SDoAssign -{ - static const CVectorRange &dispatch(CVectorRange &lhs, - const CVectorRange &rhs) - { - if (rhs.base() != lhs.base()) - { +struct SDoAssign { + static const CVectorRange& dispatch(CVectorRange& lhs, const CVectorRange& rhs) { + if (rhs.base() != lhs.base()) { lhs.assign(rhs.begin(), rhs.end()); - } - else - { + } else { VECTOR tmp{rhs.begin(), rhs.end()}; lhs.assign(tmp.begin(), tmp.end()); } @@ -63,11 +51,8 @@ struct SDoAssign } }; template -struct SDoAssign -{ - static const CVectorRange &dispatch(CVectorRange &lhs, - const CVectorRange &rhs) - { +struct SDoAssign { + static const CVectorRange& dispatch(CVectorRange& lhs, const CVectorRange& rhs) { CVectorRange tmp(*rhs.base(), rhs.a(), rhs.b()); lhs.swap(tmp); return lhs; @@ -81,292 +66,232 @@ struct SDoAssign //! A lightweight mostly c++11 compliant vector interface to a contiguous //! sub-range of a specified vector type. template -class CVectorRange -{ - public: - using allocator_type = typename VECTOR::allocator_type; - using size_type = typename VECTOR::size_type; - using reference = typename vector_range_detail::SReferenceType::type; - using const_reference = typename VECTOR::const_reference; - using iterator = typename vector_range_detail::SIteratorType::type; - using const_iterator = typename VECTOR::const_iterator; +class CVectorRange { +public: + using allocator_type = typename VECTOR::allocator_type; + using size_type = typename VECTOR::size_type; + using reference = typename vector_range_detail::SReferenceType::type; + using const_reference = typename VECTOR::const_reference; + using iterator = typename vector_range_detail::SIteratorType::type; + using const_iterator = typename VECTOR::const_iterator; - public: - CVectorRange(VECTOR &vector, size_type a, size_type b) : - m_Vector(&vector), m_A(a), m_B(b) - {} +public: + CVectorRange(VECTOR& vector, size_type a, size_type b) : m_Vector(&vector), m_A(a), m_B(b) {} - //! Copy assignment. - const CVectorRange &operator=(const CVectorRange &other) - { - return vector_range_detail::SDoAssign::dispatch(*this, other); - } + //! Copy assignment. + const CVectorRange& operator=(const CVectorRange& other) { return vector_range_detail::SDoAssign::dispatch(*this, other); } - //! Assign from value. - template - void assign(size_type n, const T &value) - { - std::fill_n(this->begin(), std::min(this->size(), n), value); - if (n > this->size()) - { - m_Vector->insert(this->end(), n - this->size(), value); - } - else if (n < this->size()) - { - m_Vector->erase(this->begin() + n, this->end()); - } - m_B = m_A + n; + //! Assign from value. + template + void assign(size_type n, const T& value) { + std::fill_n(this->begin(), std::min(this->size(), n), value); + if (n > this->size()) { + m_Vector->insert(this->end(), n - this->size(), value); + } else if (n < this->size()) { + m_Vector->erase(this->begin() + n, this->end()); } - //! Assign from range. - template - void assign(ITR begin, ITR end) - { - size_type size = std::distance(begin, end); - std::copy(begin, begin + std::min(this->size(), size), this->begin()); - if (size > this->size()) - { - m_Vector->insert(this->end(), begin + this->size(), end); - } - else if (size < this->size()) - { - m_Vector->erase(this->begin() + size, this->end()); - } - m_B = m_A + size; + m_B = m_A + n; + } + //! Assign from range. + template + void assign(ITR begin, ITR end) { + size_type size = std::distance(begin, end); + std::copy(begin, begin + std::min(this->size(), size), this->begin()); + if (size > this->size()) { + m_Vector->insert(this->end(), begin + this->size(), end); + } else if (size < this->size()) { + m_Vector->erase(this->begin() + size, this->end()); } + m_B = m_A + size; + } - //! Get the underlying vector allocator. - allocator_type get_allocator() const - { - return m_Vector->get_allocator; - } + //! Get the underlying vector allocator. + allocator_type get_allocator() const { return m_Vector->get_allocator; } - //! Get writable element at \p pos. - reference at(size_type pos) - { - this->range_check(pos); - return (*m_Vector)[m_A + pos]; - } - //! Get read-only element at \p pos. - const_reference at(size_type pos) const - { - this->range_check(pos); - return (*m_Vector)[m_A + pos]; - } + //! Get writable element at \p pos. + reference at(size_type pos) { + this->range_check(pos); + return (*m_Vector)[m_A + pos]; + } + //! Get read-only element at \p pos. + const_reference at(size_type pos) const { + this->range_check(pos); + return (*m_Vector)[m_A + pos]; + } - //! Get writable element at \p pos. - reference operator[](size_type pos) - { - return (*m_Vector)[m_A + pos]; - } - //! Get read-only element at \p pos. - const_reference operator[](size_type pos) const - { - return (*m_Vector)[m_A + pos]; - } + //! Get writable element at \p pos. + reference operator[](size_type pos) { return (*m_Vector)[m_A + pos]; } + //! Get read-only element at \p pos. + const_reference operator[](size_type pos) const { return (*m_Vector)[m_A + pos]; } - //! Get writable first element. - reference front() { return this->operator[](0); } - //! Get read-only first element. - const_reference front() const { return this->operator[](0); } + //! Get writable first element. + reference front() { return this->operator[](0); } + //! Get read-only first element. + const_reference front() const { return this->operator[](0); } - //! Get writable last element. - reference back() { return this->operator[](m_B - m_A - 1); } - //! Get read-only last element. - const_reference back() const { return this->operator[](m_B - m_A - 1); } + //! Get writable last element. + reference back() { return this->operator[](m_B - m_A - 1); } + //! Get read-only last element. + const_reference back() const { return this->operator[](m_B - m_A - 1); } - //! Input iterator to start of range. - iterator begin() { return m_Vector->begin() + m_A; } - //! Output iterator to start of range. - const_iterator begin() const { return m_Vector->begin() + m_A; } - //! Output iterator to start of range. - const_iterator cbegin() const { return m_Vector->begin() + m_A; } + //! Input iterator to start of range. + iterator begin() { return m_Vector->begin() + m_A; } + //! Output iterator to start of range. + const_iterator begin() const { return m_Vector->begin() + m_A; } + //! Output iterator to start of range. + const_iterator cbegin() const { return m_Vector->begin() + m_A; } - //! Input iterator to end of range. - iterator end() { return m_Vector->begin() + m_B; } - //! Output iterator to end of range. - const_iterator end() const { return m_Vector->begin() + m_B; } - //! Output iterator to end of range. - const_iterator cend() const { return m_Vector->begin() + m_B; } + //! Input iterator to end of range. + iterator end() { return m_Vector->begin() + m_B; } + //! Output iterator to end of range. + const_iterator end() const { return m_Vector->begin() + m_B; } + //! Output iterator to end of range. + const_iterator cend() const { return m_Vector->begin() + m_B; } - //! Check if the range is empty. - bool empty() const { return m_B == m_A; } - //! Size of range. - size_type size() const { return m_B - m_A; } - //! Get the maximum permitted size. - size_type max_size() const { return m_Vector->max_size(); } - //! Reserve space for \p size elements. - void reserve(size_type size) - { - m_Vector->reserve((size + m_Vector->size()) - this->size()); - } - //! Get the number of elements which can be held in the currently - //! allocated storage. - size_type capacity() const - { - return (m_Vector->capacity() - m_Vector->size()) + this->size(); - } + //! Check if the range is empty. + bool empty() const { return m_B == m_A; } + //! Size of range. + size_type size() const { return m_B - m_A; } + //! Get the maximum permitted size. + size_type max_size() const { return m_Vector->max_size(); } + //! Reserve space for \p size elements. + void reserve(size_type size) { m_Vector->reserve((size + m_Vector->size()) - this->size()); } + //! Get the number of elements which can be held in the currently + //! allocated storage. + size_type capacity() const { return (m_Vector->capacity() - m_Vector->size()) + this->size(); } - //! Clear the contents. - void clear() - { - this->erase(this->begin(), this->end()); - m_B = m_A; - } - //! Remove the element at \p pos. - iterator erase(const_iterator pos) - { - --m_B; - return m_Vector->erase(pos); - } - //! Remove elements in the range [begin, end). - iterator erase(const_iterator begin, const_iterator end) - { - m_B -= std::distance(begin, end); - return m_Vector->erase(begin, end); - } - //! Insert a value at \p pos. - template - iterator insert(const_iterator pos, const T &value) - { - ++m_B; - return m_Vector->insert(pos, value); - } - //! Insert \p n copies of \p value at \p pos. - template - iterator insert(const_iterator pos, size_type n, const T &value) - { - m_B += n; - return m_Vector->insert(pos, n, value); - } - //! Insert the value [\p begin, \p end) at \p pos. - template - iterator insert(const_iterator pos, ITR begin, ITR end) - { - m_B += std::distance(begin, end); - return m_Vector->insert(pos, begin, end); - } - //! Add an element at the end of the range. - //! - //! \warning This is not O(1). - template - void push_back(const T &value) - { - this->insert(this->end(), value); - } - //! Remove an element from the end of the range. - //! - //! \warning This is not O(1). - void pop_back() - { - this->erase(this->end() - 1); - } - //! Resize adding default constructed values if \p n is greater - //! than the current size. - void resize(size_type n) - { - this->resize(n, typename VECTOR::value_type()); - } - //! Resize adding default constructed values if \p n is greater - //! than the current size. - template - void resize(size_type n, const T &value) - { - if (n > this->size()) - { - this->insert(this->end(), n - this->size(), value); - } - else if (n < this->size()) - { - this->erase(this->begin() + n, this->end()); - } - } - //! Swap two ranges. - void swap(CVectorRange &other) - { - std::swap(m_Vector, other.m_Vector); - std::swap(m_A, other.m_A); - std::swap(m_B, other.m_B); + //! Clear the contents. + void clear() { + this->erase(this->begin(), this->end()); + m_B = m_A; + } + //! Remove the element at \p pos. + iterator erase(const_iterator pos) { + --m_B; + return m_Vector->erase(pos); + } + //! Remove elements in the range [begin, end). + iterator erase(const_iterator begin, const_iterator end) { + m_B -= std::distance(begin, end); + return m_Vector->erase(begin, end); + } + //! Insert a value at \p pos. + template + iterator insert(const_iterator pos, const T& value) { + ++m_B; + return m_Vector->insert(pos, value); + } + //! Insert \p n copies of \p value at \p pos. + template + iterator insert(const_iterator pos, size_type n, const T& value) { + m_B += n; + return m_Vector->insert(pos, n, value); + } + //! Insert the value [\p begin, \p end) at \p pos. + template + iterator insert(const_iterator pos, ITR begin, ITR end) { + m_B += std::distance(begin, end); + return m_Vector->insert(pos, begin, end); + } + //! Add an element at the end of the range. + //! + //! \warning This is not O(1). + template + void push_back(const T& value) { + this->insert(this->end(), value); + } + //! Remove an element from the end of the range. + //! + //! \warning This is not O(1). + void pop_back() { this->erase(this->end() - 1); } + //! Resize adding default constructed values if \p n is greater + //! than the current size. + void resize(size_type n) { this->resize(n, typename VECTOR::value_type()); } + //! Resize adding default constructed values if \p n is greater + //! than the current size. + template + void resize(size_type n, const T& value) { + if (n > this->size()) { + this->insert(this->end(), n - this->size(), value); + } else if (n < this->size()) { + this->erase(this->begin() + n, this->end()); } + } + //! Swap two ranges. + void swap(CVectorRange& other) { + std::swap(m_Vector, other.m_Vector); + std::swap(m_A, other.m_A); + std::swap(m_B, other.m_B); + } - //! Get the base vector. - VECTOR *base() const { return m_Vector; } + //! Get the base vector. + VECTOR* base() const { return m_Vector; } - //! Get the start of the range. - size_type a() const { return m_A; } + //! Get the start of the range. + size_type a() const { return m_A; } - //! Get the end of the range. - size_type b() const { return m_B; } + //! Get the end of the range. + size_type b() const { return m_B; } - private: - //! Check if \p pos is in range. - void range_check(size_type pos) const - { - if (m_A + pos >= m_B) - { - std::ostringstream message; - message << "out of range: " << pos << " >= " << m_B - m_A; - throw std::out_of_range(message.str()); - } +private: + //! Check if \p pos is in range. + void range_check(size_type pos) const { + if (m_A + pos >= m_B) { + std::ostringstream message; + message << "out of range: " << pos << " >= " << m_B - m_A; + throw std::out_of_range(message.str()); } + } - private: - //! The underlying vector. - VECTOR *m_Vector; - //! The range [m_A, m_B). - size_type m_A, m_B; +private: + //! The underlying vector. + VECTOR* m_Vector; + //! The range [m_A, m_B). + size_type m_A, m_B; }; //! Check if \p lhs and \p rhs are equal. template -bool operator==(const CVectorRange &lhs, const CVectorRange &rhs) -{ +bool operator==(const CVectorRange& lhs, const CVectorRange& rhs) { return lhs.size() == rhs.size() && std::equal(lhs.begin(), lhs.end(), rhs.begin()); } //! Check if \p lhs and \p rhs are not equal. template -bool operator!=(const CVectorRange &lhs, const CVectorRange &rhs) -{ +bool operator!=(const CVectorRange& lhs, const CVectorRange& rhs) { return !(lhs == rhs); } //! Check if \p lhs is lexicographically less than \p rhs. template -bool operator<(const CVectorRange &lhs, const CVectorRange &rhs) -{ +bool operator<(const CVectorRange& lhs, const CVectorRange& rhs) { return std::lexicographical_compare(lhs.begin(), lhs.end(), rhs.begin(), rhs.end()); } //! Check if \p lhs is lexicographically less or equal to \p rhs. template -bool operator<=(const CVectorRange &lhs, const CVectorRange &rhs) -{ +bool operator<=(const CVectorRange& lhs, const CVectorRange& rhs) { return lhs < rhs || lhs == rhs; } //! Check if \p lhs is lexicographically greater than \p rhs. template -bool operator>(const CVectorRange &lhs, const CVectorRange &rhs) -{ +bool operator>(const CVectorRange& lhs, const CVectorRange& rhs) { return rhs < lhs; } //! Check if \p lhs is lexicographically less or equal to \p rhs. template -bool operator>=(const CVectorRange &lhs, const CVectorRange &rhs) -{ +bool operator>=(const CVectorRange& lhs, const CVectorRange& rhs) { return rhs <= lhs; } //! Free swap function to participate in Koenig lookup. template -void swap(CVectorRange &lhs, CVectorRange &rhs) -{ +void swap(CVectorRange& lhs, CVectorRange& rhs) { lhs.swap(rhs); } //! Make a vector subrange. template -CVectorRange make_range(VECTOR &vector, std::size_t a, std::size_t b) -{ +CVectorRange make_range(VECTOR& vector, std::size_t a, std::size_t b) { return CVectorRange(vector, a, b); } - } } diff --git a/include/core/CWindowsError.h b/include/core/CWindowsError.h index 70ac340926..6b70b8b34b 100644 --- a/include/core/CWindowsError.h +++ b/include/core/CWindowsError.h @@ -13,11 +13,8 @@ #include - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { //! \brief //! Encapsulate interpretation of Windows function errors. @@ -35,37 +32,31 @@ namespace core //! to explicitly initialise an object with a specific error //! code. //! -class CORE_EXPORT CWindowsError -{ - public: - //! Initialise using the last error to occur. This - //! is obtained GetLastError() on Windows. - CWindowsError(); +class CORE_EXPORT CWindowsError { +public: + //! Initialise using the last error to occur. This + //! is obtained GetLastError() on Windows. + CWindowsError(); - //! Initialise using a specific error number - CWindowsError(uint32_t errorCode); + //! Initialise using a specific error number + CWindowsError(uint32_t errorCode); - //! Access the raw error code number - uint32_t errorCode() const; + //! Access the raw error code number + uint32_t errorCode() const; - //! Textual representation of the error - std::string errorString() const; + //! Textual representation of the error + std::string errorString() const; - private: - //! The error code - uint32_t m_ErrorCode; +private: + //! The error code + uint32_t m_ErrorCode; - friend CORE_EXPORT std::ostream &operator<<(std::ostream &, - const CWindowsError &); + friend CORE_EXPORT std::ostream& operator<<(std::ostream&, const CWindowsError&); }; //! Stream output prints textual representation of the error -CORE_EXPORT std::ostream &operator<<(std::ostream &os, - const CWindowsError &windowsError); - - +CORE_EXPORT std::ostream& operator<<(std::ostream& os, const CWindowsError& windowsError); } } #endif // INCLUDED_ml_core_CWindowsError_h - diff --git a/include/core/CWordDictionary.h b/include/core/CWordDictionary.h index 9285be1de2..6e8e6bacc7 100644 --- a/include/core/CWordDictionary.h +++ b/include/core/CWordDictionary.h @@ -15,12 +15,8 @@ #include #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! Class to check whether words are present in a dictionary. @@ -51,157 +47,133 @@ namespace core //! too to avoid repeated locking in the instance() method (see //! Modern C++ Design by Andrei Alexandrescu for details). //! -class CORE_EXPORT CWordDictionary : private CNonCopyable -{ +class CORE_EXPORT CWordDictionary : private CNonCopyable { +public: + //! Types of words. + //! The values used are deliberately powers of two so that in the + //! future we could potentially bitwise-or them together for words + //! with multiple uses. + enum EPartOfSpeech { + E_NotInDictionary = 0, + E_UnknownPart = 1, + E_Noun = 2, + E_Plural = 4, + E_Verb = 8, + E_Adjective = 16, + E_Adverb = 32, + E_Conjunction = 64, + E_Preposition = 128, + E_Interjection = 256, + E_Pronoun = 512, + E_DefiniteArticle = 1024, + E_IndefiniteArticle = 2048 + }; + +public: + //! Functor for weighting all dictionary words by a certain amount + template + class CWeightAll { public: - //! Types of words. - //! The values used are deliberately powers of two so that in the - //! future we could potentially bitwise-or them together for words - //! with multiple uses. - enum EPartOfSpeech - { - E_NotInDictionary = 0, - E_UnknownPart = 1, - E_Noun = 2, - E_Plural = 4, - E_Verb = 8, - E_Adjective = 16, - E_Adverb = 32, - E_Conjunction = 64, - E_Preposition = 128, - E_Interjection = 256, - E_Pronoun = 512, - E_DefiniteArticle = 1024, - E_IndefiniteArticle = 2048 - }; + size_t operator()(EPartOfSpeech partOfSpeech) { return (partOfSpeech == E_NotInDictionary) ? 0 : DEFAULT_EXTRA_WEIGHT; } + }; + using TWeightAll2 = CWeightAll<2>; + + //! Functor for weighting one type of dictionary word by a certain + //! amount and all dictionary words by a different amount + template + class CWeightOnePart { + public: + size_t operator()(EPartOfSpeech partOfSpeech) { + if (partOfSpeech == E_NotInDictionary) { + return 0; + } + return (partOfSpeech == SPECIAL_PART1) ? EXTRA_WEIGHT1 : DEFAULT_EXTRA_WEIGHT; + } + }; + + using TWeightVerbs5Other2 = CWeightOnePart; + + //! Functor for weighting two types of dictionary word by certain + //! amounts and all dictionary words by a different amount + template + class CWeightTwoParts { public: - //! Functor for weighting all dictionary words by a certain amount - template - class CWeightAll - { - public: - size_t operator()(EPartOfSpeech partOfSpeech) - { - return (partOfSpeech == E_NotInDictionary) ? 0 : DEFAULT_EXTRA_WEIGHT; - } - }; - - using TWeightAll2 = CWeightAll<2>; - - //! Functor for weighting one type of dictionary word by a certain - //! amount and all dictionary words by a different amount - template - class CWeightOnePart - { - public: - size_t operator()(EPartOfSpeech partOfSpeech) - { - if (partOfSpeech == E_NotInDictionary) - { - return 0; - } - return (partOfSpeech == SPECIAL_PART1) ? EXTRA_WEIGHT1 : DEFAULT_EXTRA_WEIGHT; - } - }; - - using TWeightVerbs5Other2 = CWeightOnePart; - - //! Functor for weighting two types of dictionary word by certain - //! amounts and all dictionary words by a different amount - template - class CWeightTwoParts - { - public: - size_t operator()(EPartOfSpeech partOfSpeech) - { - if (partOfSpeech == E_NotInDictionary) - { - return 0; - } - if (partOfSpeech == SPECIAL_PART1) - { - return EXTRA_WEIGHT1; - } - return (partOfSpeech == SPECIAL_PART2) ? EXTRA_WEIGHT2 : DEFAULT_EXTRA_WEIGHT; - } - }; - - // Similar templates with more arguments can be added as required... + size_t operator()(EPartOfSpeech partOfSpeech) { + if (partOfSpeech == E_NotInDictionary) { + return 0; + } + if (partOfSpeech == SPECIAL_PART1) { + return EXTRA_WEIGHT1; + } + return (partOfSpeech == SPECIAL_PART2) ? EXTRA_WEIGHT2 : DEFAULT_EXTRA_WEIGHT; + } + }; + + // Similar templates with more arguments can be added as required... + +public: + //! Get the singleton instance + static const CWordDictionary& instance(); + + //! Check if a word is in the dictionary. Don't call this as well as + //! partOfSpeech(). Instead simply call partOfSpeech(), noting that + //! it will return E_NotInDictionary in cases where this method will + //! return false. + bool isInDictionary(const std::string& str) const; + + //! Check what part of speech a word is primarily used for. Note that + //! many words can be used in different parts of speech and this method + //! only returns what Grady Ward thought was the primary use when he + //! created Moby. This method returns E_NotInDictionary for words that + //! aren't in the dictionary. + EPartOfSpeech partOfSpeech(const std::string& str) const; + +private: + //! Constructor for a singleton is private + CWordDictionary(); + ~CWordDictionary(); + +private: + class CStrHashIgnoreCase : std::unary_function { + public: + size_t operator()(const std::string& str) const; + }; + class CStrEqualIgnoreCase : std::binary_function { public: - //! Get the singleton instance - static const CWordDictionary &instance(); - - //! Check if a word is in the dictionary. Don't call this as well as - //! partOfSpeech(). Instead simply call partOfSpeech(), noting that - //! it will return E_NotInDictionary in cases where this method will - //! return false. - bool isInDictionary(const std::string &str) const; - - //! Check what part of speech a word is primarily used for. Note that - //! many words can be used in different parts of speech and this method - //! only returns what Grady Ward thought was the primary use when he - //! created Moby. This method returns E_NotInDictionary for words that - //! aren't in the dictionary. - EPartOfSpeech partOfSpeech(const std::string &str) const; - - private: - //! Constructor for a singleton is private - CWordDictionary(); - ~CWordDictionary(); - - private: - class CStrHashIgnoreCase : std::unary_function - { - public: - size_t operator()(const std::string &str) const; - }; - - class CStrEqualIgnoreCase : std::binary_function - { - public: - bool operator()(const std::string &lhs, - const std::string &rhs) const; - }; - - private: - //! Name of the file to load that contains the dictionary words. - static const char *DICTIONARY_FILE; - - //! The constructor loads a file, and hence may take a while. This - //! mutex prevents the singleton object being constructed simultaneously - //! in different threads. - static CFastMutex ms_LoadMutex; - - //! This pointer is set after the singleton object has been constructed, - //! and avoids the need to lock the mutex on subsequent calls of the - //! instance() method (once the updated value of this variable has made - //! its way into every thread). - static volatile CWordDictionary *ms_Instance; - - //! Stores the dictionary words - using a multi-index even though - //! there's only one index, because of its flexible key extractors. - //! The key is the string, but hashed and compared ignoring case. - using TStrUMap = boost::unordered_map; - using TStrUMapCItr = TStrUMap::const_iterator; - - //! Our dictionary of words - TStrUMap m_DictionaryWords; + bool operator()(const std::string& lhs, const std::string& rhs) const; + }; + +private: + //! Name of the file to load that contains the dictionary words. + static const char* DICTIONARY_FILE; + + //! The constructor loads a file, and hence may take a while. This + //! mutex prevents the singleton object being constructed simultaneously + //! in different threads. + static CFastMutex ms_LoadMutex; + + //! This pointer is set after the singleton object has been constructed, + //! and avoids the need to lock the mutex on subsequent calls of the + //! instance() method (once the updated value of this variable has made + //! its way into every thread). + static volatile CWordDictionary* ms_Instance; + + //! Stores the dictionary words - using a multi-index even though + //! there's only one index, because of its flexible key extractors. + //! The key is the string, but hashed and compared ignoring case. + using TStrUMap = boost::unordered_map; + using TStrUMapCItr = TStrUMap::const_iterator; + + //! Our dictionary of words + TStrUMap m_DictionaryWords; }; - - } } #endif // INCLUDED_ml_core_CWordDictionary_h - diff --git a/include/core/CWordExtractor.h b/include/core/CWordExtractor.h index f6535db8e0..f7648e7d97 100644 --- a/include/core/CWordExtractor.h +++ b/include/core/CWordExtractor.h @@ -10,12 +10,8 @@ #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! Class to extract "words" from a string. @@ -37,36 +33,29 @@ namespace core //! Words are returned in a single string. In future it might be worth //! adding the option to return them in a vector. //! -class CORE_EXPORT CWordExtractor -{ - public: - //! Extract words from a message, and return them in a space separated - //! string - static void extractWordsFromMessage(const std::string &message, - std::string &messageWords); - - //! Extract words from a message, and return them in a space separated - //! string BUT only include words that occur in groups of a specified - //! size - static void extractWordsFromMessage(size_t minConsecutive, - const std::string &message, - std::string &messageWords); - - private: - //! Don't allow objects to be instantiated - CWordExtractor(); - CWordExtractor(const CWordExtractor &); - - private: - //! The ::ispunct() function's definition of punctuation is too - //! permissive (basically anything that's not a letter, number or - //! space), so we have our own definition of punctuation - static const std::string PUNCT_CHARS; +class CORE_EXPORT CWordExtractor { +public: + //! Extract words from a message, and return them in a space separated + //! string + static void extractWordsFromMessage(const std::string& message, std::string& messageWords); + + //! Extract words from a message, and return them in a space separated + //! string BUT only include words that occur in groups of a specified + //! size + static void extractWordsFromMessage(size_t minConsecutive, const std::string& message, std::string& messageWords); + +private: + //! Don't allow objects to be instantiated + CWordExtractor(); + CWordExtractor(const CWordExtractor&); + +private: + //! The ::ispunct() function's definition of punctuation is too + //! permissive (basically anything that's not a letter, number or + //! space), so we have our own definition of punctuation + static const std::string PUNCT_CHARS; }; - - } } #endif // INCLUDED_ml_core_CWordExtractor_h - diff --git a/include/core/CXmlNode.h b/include/core/CXmlNode.h index 94c957003b..d5301b7547 100644 --- a/include/core/CXmlNode.h +++ b/include/core/CXmlNode.h @@ -16,15 +16,11 @@ #include #include - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CXmlNodeWithChildrenPool; class CXmlParser; - //! \brief //! Simple representation of a XML node. //! @@ -38,127 +34,101 @@ class CXmlParser; //! The XML parser is a friend so that it can efficiently //! populate attributes with minimal copying of data //! -class CORE_EXPORT CXmlNode -{ +class CORE_EXPORT CXmlNode { +public: + using TStrStrMap = std::map; + using TStrStrPr = std::pair; + using TStrStrPrVec = std::vector; + using TStrStrPrVecItr = TStrStrPrVec::iterator; + using TStrStrPrVecCItr = TStrStrPrVec::const_iterator; + +private: + class CFirstElementEquals { public: - using TStrStrMap = std::map; - using TStrStrPr = std::pair; - using TStrStrPrVec = std::vector; - using TStrStrPrVecItr = TStrStrPrVec::iterator; - using TStrStrPrVecCItr = TStrStrPrVec::const_iterator; + CFirstElementEquals(const std::string& str) : m_Str(str) {} + + template + bool operator()(const PAIR& pr) { + const std::string& prFirst = pr.first; + return prFirst == m_Str; + } private: - class CFirstElementEquals - { - public: - CFirstElementEquals(const std::string &str) - : m_Str(str) - { - } - - template - bool operator()(const PAIR &pr) - { - const std::string &prFirst = pr.first; - return prFirst == m_Str; - } - - private: - const std::string &m_Str; - }; + const std::string& m_Str; + }; - public: - CXmlNode(); +public: + CXmlNode(); - CXmlNode(const std::string &name); + CXmlNode(const std::string& name); - CXmlNode(const std::string &name, - const std::string &value); + CXmlNode(const std::string& name, const std::string& value); - CXmlNode(const std::string &name, - const std::string &value, - const TStrStrMap &attributes); + CXmlNode(const std::string& name, const std::string& value, const TStrStrMap& attributes); - virtual ~CXmlNode(); + virtual ~CXmlNode(); - //! Accessors - const std::string &name() const; - const std::string &value() const; - const TStrStrPrVec &attributes() const; + //! Accessors + const std::string& name() const; + const std::string& value() const; + const TStrStrPrVec& attributes() const; - //! Set name - void name(const std::string &name); + //! Set name + void name(const std::string& name); - //! Set value - void value(const std::string &value); + //! Set value + void value(const std::string& value); - //! Debug dump of all - virtual std::string dump() const; + //! Debug dump of all + virtual std::string dump() const; - //! Retrieve an attribute (if exists), and convert it to the supplied - //! type - template - bool attribute(const std::string &name, - TYPE &value) const - { - TStrStrPrVecCItr iter = std::find_if(m_Attributes.begin(), - m_Attributes.end(), - CFirstElementEquals(name)); - if (iter == m_Attributes.end()) - { - return false; - } + //! Retrieve an attribute (if exists), and convert it to the supplied + //! type + template + bool attribute(const std::string& name, TYPE& value) const { + TStrStrPrVecCItr iter = std::find_if(m_Attributes.begin(), m_Attributes.end(), CFirstElementEquals(name)); + if (iter == m_Attributes.end()) { + return false; + } - if (CStringUtils::stringToType(iter->second, value) == false) - { - LOG_ERROR("Unable to convert " << iter->second); - return false; - } + if (CStringUtils::stringToType(iter->second, value) == false) { + LOG_ERROR("Unable to convert " << iter->second); + return false; + } + return true; + } + + //! Set an attribute. The caller specifies whether to overwrite an + //! existing attribute of the same name or not. The value must be + //! convertible to a string using CStringUtils. + template + bool attribute(const std::string& name, const TYPE& value, bool overwrite) { + TStrStrPrVecItr iter = std::find_if(m_Attributes.begin(), m_Attributes.end(), CFirstElementEquals(name)); + if (iter == m_Attributes.end()) { + m_Attributes.push_back(TStrStrPr(name, CStringUtils::typeToString(value))); return true; } - //! Set an attribute. The caller specifies whether to overwrite an - //! existing attribute of the same name or not. The value must be - //! convertible to a string using CStringUtils. - template - bool attribute(const std::string &name, - const TYPE &value, - bool overwrite) - { - TStrStrPrVecItr iter = std::find_if(m_Attributes.begin(), - m_Attributes.end(), - CFirstElementEquals(name)); - if (iter == m_Attributes.end()) - { - m_Attributes.push_back(TStrStrPr(name, - CStringUtils::typeToString(value))); - return true; - } - - if (!overwrite) - { - return false; - } - - CStringUtils::typeToString(value).swap(iter->second); - - return true; + if (!overwrite) { + return false; } - private: - std::string m_Name; - std::string m_Value; - TStrStrPrVec m_Attributes; + CStringUtils::typeToString(value).swap(iter->second); + + return true; + } + +private: + std::string m_Name; + std::string m_Value; + TStrStrPrVec m_Attributes; friend class CRapidXmlParser; friend class CXmlNodeWithChildrenPool; friend class CXmlParser; }; - - } } #endif // INCLUDED_ml_core_CXmlNode_h - diff --git a/include/core/CXmlNodeWithChildren.h b/include/core/CXmlNodeWithChildren.h index 43ad5978a8..1b76aaea79 100644 --- a/include/core/CXmlNodeWithChildren.h +++ b/include/core/CXmlNodeWithChildren.h @@ -13,14 +13,10 @@ #include - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CXmlNodeWithChildrenPool; - //! \brief //! Representation of a XML node that can model a hierarchy. //! @@ -30,61 +26,54 @@ class CXmlNodeWithChildrenPool; //! IMPLEMENTATION DECISIONS:\n //! Currently no support for sorting children into any order. //! -class CORE_EXPORT CXmlNodeWithChildren : public CXmlNode -{ - public: - using TXmlNodeWithChildrenP = boost::shared_ptr; +class CORE_EXPORT CXmlNodeWithChildren : public CXmlNode { +public: + using TXmlNodeWithChildrenP = boost::shared_ptr; - using TChildNodePVec = std::vector; - using TChildNodePVecItr = TChildNodePVec::iterator; - using TChildNodePVecCItr = TChildNodePVec::const_iterator; + using TChildNodePVec = std::vector; + using TChildNodePVecItr = TChildNodePVec::iterator; + using TChildNodePVecCItr = TChildNodePVec::const_iterator; - public: - CXmlNodeWithChildren(); +public: + CXmlNodeWithChildren(); - CXmlNodeWithChildren(const std::string &name); + CXmlNodeWithChildren(const std::string& name); - CXmlNodeWithChildren(const std::string &name, - const std::string &value); + CXmlNodeWithChildren(const std::string& name, const std::string& value); - CXmlNodeWithChildren(const std::string &name, - const std::string &value, - const CXmlNode::TStrStrMap &attributes); + CXmlNodeWithChildren(const std::string& name, const std::string& value, const CXmlNode::TStrStrMap& attributes); - CXmlNodeWithChildren(const CXmlNodeWithChildren &arg); + CXmlNodeWithChildren(const CXmlNodeWithChildren& arg); - virtual ~CXmlNodeWithChildren(); + virtual ~CXmlNodeWithChildren(); - CXmlNodeWithChildren &operator=(const CXmlNodeWithChildren &rhs); + CXmlNodeWithChildren& operator=(const CXmlNodeWithChildren& rhs); - //! Add a child with no children of its own - void addChild(const CXmlNode &child); + //! Add a child with no children of its own + void addChild(const CXmlNode& child); - //! Add a child - void addChild(const CXmlNodeWithChildren &child); + //! Add a child + void addChild(const CXmlNodeWithChildren& child); - //! Add a child wrapped in a shared pointer - void addChildP(const TXmlNodeWithChildrenP &childP); + //! Add a child wrapped in a shared pointer + void addChildP(const TXmlNodeWithChildrenP& childP); - //! Get children - const TChildNodePVec &children() const; + //! Get children + const TChildNodePVec& children() const; - //! Debug dump of hierarchy - virtual std::string dump() const; - virtual std::string dump(size_t indent) const; + //! Debug dump of hierarchy + virtual std::string dump() const; + virtual std::string dump(size_t indent) const; - private: - //! Vector of children of this node - stored by pointer - //! rather than by value to avoid slicing if derived classes - //! are ever added - TChildNodePVec m_Children; +private: + //! Vector of children of this node - stored by pointer + //! rather than by value to avoid slicing if derived classes + //! are ever added + TChildNodePVec m_Children; friend class CXmlNodeWithChildrenPool; }; - - } } #endif // INCLUDED_ml_core_CXmlNodeWithChildren_h - diff --git a/include/core/CXmlNodeWithChildrenPool.h b/include/core/CXmlNodeWithChildrenPool.h index e298351ab3..afd3d7fb78 100644 --- a/include/core/CXmlNodeWithChildrenPool.h +++ b/include/core/CXmlNodeWithChildrenPool.h @@ -12,12 +12,8 @@ #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { //! \brief //! Pool to provide XML nodes efficiently. @@ -41,56 +37,47 @@ namespace core //! allocation is in locking. Where multiple threads are dealing //! with XML, it is better to have one node pool per thread. //! -class CORE_EXPORT CXmlNodeWithChildrenPool -{ - public: - //! Construct a pool that will accept as many nodes as a vector will hold - CXmlNodeWithChildrenPool(); - - //! Construct a pool that will never contain more than the specified - //! number of recycled nodes - any nodes that are recycled once the - //! limit is reached will be deleted rather than cached - CXmlNodeWithChildrenPool(size_t maxRecycled); - - //! Allocate a new XML node - callers MUST set the name and value of the - //! returned node, as recycled nodes will still have their old name and - //! value - CXmlNodeWithChildren::TXmlNodeWithChildrenP newNode(); - - //! Allocate a new XML node with the provided name and value - CXmlNodeWithChildren::TXmlNodeWithChildrenP newNode(std::string name, - std::string value); - - //! Allocate a new XML node with the provided name and value - template - CXmlNodeWithChildren::TXmlNodeWithChildrenP newNode(std::string name, - const TYPE &value) - { - return this->newNode(name, CStringUtils::typeToString(value)); - } - - //! Allocate a new XML node with the provided name and value, specifying - //! whether the double should be output with full precision (e.g. for - //! persistence) or not (e.g. for human readability) - CXmlNodeWithChildren::TXmlNodeWithChildrenP newNode(const std::string &name, - double value, - CIEEE754::EPrecision precision); - - //! Recycle an XML node, plus any children it may have - void recycle(CXmlNodeWithChildren::TXmlNodeWithChildrenP &nodePtr); - - private: - //! Vector of recycled nodes that can be quickly provided - //! without performing any memory allocations. - CXmlNodeWithChildren::TChildNodePVec m_Recycled; - - //! The maximum number of nodes that will ever be cached by this pool - size_t m_MaxRecycled; +class CORE_EXPORT CXmlNodeWithChildrenPool { +public: + //! Construct a pool that will accept as many nodes as a vector will hold + CXmlNodeWithChildrenPool(); + + //! Construct a pool that will never contain more than the specified + //! number of recycled nodes - any nodes that are recycled once the + //! limit is reached will be deleted rather than cached + CXmlNodeWithChildrenPool(size_t maxRecycled); + + //! Allocate a new XML node - callers MUST set the name and value of the + //! returned node, as recycled nodes will still have their old name and + //! value + CXmlNodeWithChildren::TXmlNodeWithChildrenP newNode(); + + //! Allocate a new XML node with the provided name and value + CXmlNodeWithChildren::TXmlNodeWithChildrenP newNode(std::string name, std::string value); + + //! Allocate a new XML node with the provided name and value + template + CXmlNodeWithChildren::TXmlNodeWithChildrenP newNode(std::string name, const TYPE& value) { + return this->newNode(name, CStringUtils::typeToString(value)); + } + + //! Allocate a new XML node with the provided name and value, specifying + //! whether the double should be output with full precision (e.g. for + //! persistence) or not (e.g. for human readability) + CXmlNodeWithChildren::TXmlNodeWithChildrenP newNode(const std::string& name, double value, CIEEE754::EPrecision precision); + + //! Recycle an XML node, plus any children it may have + void recycle(CXmlNodeWithChildren::TXmlNodeWithChildrenP& nodePtr); + +private: + //! Vector of recycled nodes that can be quickly provided + //! without performing any memory allocations. + CXmlNodeWithChildren::TChildNodePVec m_Recycled; + + //! The maximum number of nodes that will ever be cached by this pool + size_t m_MaxRecycled; }; - - } } #endif // INCLUDED_ml_core_CXmlNodeWithChildrenPool_h - diff --git a/include/core/CXmlParser.h b/include/core/CXmlParser.h index 29fbdf6f70..821b7c5cd1 100644 --- a/include/core/CXmlParser.h +++ b/include/core/CXmlParser.h @@ -20,11 +20,8 @@ #include #include - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { //! \brief //! Simple C++ wrapper around the libxml2 library. @@ -56,228 +53,197 @@ namespace core //! //! Does not call xmlCleanupParser on application exit. //! -class CORE_EXPORT CXmlParser : public CXmlParserIntf -{ - public: - static const std::string ATTRIBUTE_SEPARATOR; - static const std::string ATTRIBUTE_EQUALS; - static const size_t DEFAULT_INDENT_SPACES; - static const size_t MAX_INDENT_SPACES; - static const char *INDENT_SPACE_STR; - - public: - using TStrVec = std::vector; - using TStrVecItr = TStrVec::iterator; - using TStrVecCItr = TStrVec::const_iterator; - - using TStrSet = std::set; - using TStrSetItr = TStrSet::iterator; - using TStrSetCItr = TStrSet::const_iterator; - - using TXmlNodeVec = std::vector; - using TXmlNodeVecItr = TXmlNodeVec::iterator; - using TXmlNodeVecCItr = TXmlNodeVec::const_iterator; - - using TStrStrMap = std::map; - using TStrStrMapCItr = TStrStrMap::const_iterator; - - public: - CXmlParser(); - virtual ~CXmlParser(); - - bool parseFile(const std::string &fileName); - - //! Parse XML stored in a string - virtual bool parseString(const std::string &xml); - - //! Parse XML stored in a char buffer - virtual bool parseBuffer(const char *begin, size_t length); - - //! Parse XML stored in a char buffer that may be modified by the - //! parsing and will outlive this object - virtual bool parseBufferInSitu(char *begin, size_t length); - - //! Return the root element name (empty string if not parsed yet) - virtual std::string rootElementName() const; - - //! Return result from an XPath expression, if the number of matches != 1 - //! return false. - bool evalXPathExpression(const std::string &xpath, - CXmlNode &value) const; - - //! Return value result from an XPath expression, if the number of matches != 1 - //! return false. - bool evalXPathExpression(const std::string &xpath, - std::string &value) const; - - //! Return value result from an XPath expression - bool evalXPathExpression(const std::string &xpath, - TStrVec &value) const; - - //! Return value result from an XPath expression, if there are - //! duplicates return false. - bool evalXPathExpression(const std::string &xpath, - TStrSet &value) const; - - //! Return a value result from an XPath expression, - //! if the number of matches != 1 or value is not of type - //! return false. - template - bool evalXPathExpression(const std::string &xpath, - TYPE &ret) const - { - CXmlNode value; - if (this->evalXPathExpression(xpath, value) == false) - { - return false; - } - - if (CStringUtils::stringToType(value.value(), ret) == false) - { - LOG_ERROR("Conversion error for " << xpath); - return false; - } - - return true; +class CORE_EXPORT CXmlParser : public CXmlParserIntf { +public: + static const std::string ATTRIBUTE_SEPARATOR; + static const std::string ATTRIBUTE_EQUALS; + static const size_t DEFAULT_INDENT_SPACES; + static const size_t MAX_INDENT_SPACES; + static const char* INDENT_SPACE_STR; + +public: + using TStrVec = std::vector; + using TStrVecItr = TStrVec::iterator; + using TStrVecCItr = TStrVec::const_iterator; + + using TStrSet = std::set; + using TStrSetItr = TStrSet::iterator; + using TStrSetCItr = TStrSet::const_iterator; + + using TXmlNodeVec = std::vector; + using TXmlNodeVecItr = TXmlNodeVec::iterator; + using TXmlNodeVecCItr = TXmlNodeVec::const_iterator; + + using TStrStrMap = std::map; + using TStrStrMapCItr = TStrStrMap::const_iterator; + +public: + CXmlParser(); + virtual ~CXmlParser(); + + bool parseFile(const std::string& fileName); + + //! Parse XML stored in a string + virtual bool parseString(const std::string& xml); + + //! Parse XML stored in a char buffer + virtual bool parseBuffer(const char* begin, size_t length); + + //! Parse XML stored in a char buffer that may be modified by the + //! parsing and will outlive this object + virtual bool parseBufferInSitu(char* begin, size_t length); + + //! Return the root element name (empty string if not parsed yet) + virtual std::string rootElementName() const; + + //! Return result from an XPath expression, if the number of matches != 1 + //! return false. + bool evalXPathExpression(const std::string& xpath, CXmlNode& value) const; + + //! Return value result from an XPath expression, if the number of matches != 1 + //! return false. + bool evalXPathExpression(const std::string& xpath, std::string& value) const; + + //! Return value result from an XPath expression + bool evalXPathExpression(const std::string& xpath, TStrVec& value) const; + + //! Return value result from an XPath expression, if there are + //! duplicates return false. + bool evalXPathExpression(const std::string& xpath, TStrSet& value) const; + + //! Return a value result from an XPath expression, + //! if the number of matches != 1 or value is not of type + //! return false. + template + bool evalXPathExpression(const std::string& xpath, TYPE& ret) const { + CXmlNode value; + if (this->evalXPathExpression(xpath, value) == false) { + return false; } - //! Return result from an XPath expression - bool evalXPathExpression(const std::string &, - TXmlNodeVec &values) const; - - //! Return result from an XPath expression - bool evalXPathExpression(const std::string &, - TStrStrMap &values) const; - - //! Dump the document to stdout - void dumpToStdout() const; - - //! Dump the document to string - virtual std::string dumpToString() const; - - //! Convert a node hierarchy to XML. - //! (This will escape the text correctly.) - static void convert(const CXmlNodeWithChildren &root, - std::string &result); - - //! Convert a node hierarchy to XML. - //! (This will escape the text correctly.) - //! The maximum number of spaces per indent is 10. - static void convert(size_t indentSpaces, - const CXmlNodeWithChildren &root, - std::string &result); - - //! Convert a map of name/value pairs to XML. - //! (This will escape the text correctly.) - //! Note root is the name of the enclosing value. - //! Where a token name contains an @ symbol , the text prior to the @ is - //! taken as the tag name, and the text following the @ is treated - //! as an attribute of the tag. For example, if the map key is - //! field@name=idle cpu % and the map value is 99 then this will - //! be converted to 99 - static void convert(const std::string &root, - const TStrStrMap &values, - std::string &result); - - //! As above, but with the ability to customise the number of spaces - //! per indent (up to a maximum of 10). - static void convert(size_t indentSpaces, - const std::string &root, - const TStrStrMap &values, - std::string &result); - - //! Convert a map of name/value pairs to an XML - //! parser. - //! Note root is the name of the enclosing value - bool convert(const std::string &root, - const TStrStrMap &values); - - //! Convert the entire XML document into a hierarchy of node objects. - //! This is much more efficient than making repeated calls to - //! evalXPathExpression() to retrieve the entire contents of a parsed - //! document. - virtual bool toNodeHierarchy(CXmlNodeWithChildren::TXmlNodeWithChildrenP &rootNodePtr) const; - - //! As above, but use a pool to avoid XML node memory allocations where possible - virtual bool toNodeHierarchy(CXmlNodeWithChildrenPool &pool, - CXmlNodeWithChildren::TXmlNodeWithChildrenP &rootNodePtr) const; - - //! As above, but use a string cache to avoid string representation memory - //! allocations where possible - virtual bool toNodeHierarchy(CStringCache &cache, - CXmlNodeWithChildren::TXmlNodeWithChildrenP &rootNodePtr) const; - - //! As above, but use both a node pool and a string cache - virtual bool toNodeHierarchy(CXmlNodeWithChildrenPool &pool, - CStringCache &cache, - CXmlNodeWithChildren::TXmlNodeWithChildrenP &rootNodePtr) const; - - //! Functions for navigating an XML document without converting it to a - //! node hierarchy - virtual bool navigateRoot(); - virtual bool navigateFirstChild(); - virtual bool navigateNext(); - virtual bool navigateParent(); - virtual bool currentNodeName(std::string &name); - virtual bool currentNodeValue(std::string &value); - - //! Set root name - bool setRootNode(const std::string &); - - //! Add new child element (to root). - //! Restrict to simple value. - bool addNewChildNode(const std::string &name, - const std::string &value); - - //! Add new child element (to root). - //! Allows attributes. - bool addNewChildNode(const std::string &name, - const std::string &value, - const TStrStrMap &attrs); - - //! Change the content of a child element (to root) - //! Restrict to simple value - bool changeChildNodeValue(const std::string &name, - const std::string &newValue); - - //! Make sure a string is in the UTF-8 character set. - //! The XML parser is implemented internally using UTF-8, and will - //! throw a fatal error if faced with a byte sequence that's not - //! a valid UTF-8 character. Hence the need to do _something_, - //! even if it's not brilliant. - //! TODO - in the long term the whole application needs to have - //! proper support for different character sets. Once that work - //! is done, this function can be replaced with whatever we use - //! for the overall solution. - static bool stringLatin1ToUtf8(std::string &str); - - private: - void destroy(); - - //! Called recursively by the convert() method - static void convertChildren(const CXmlNodeWithChildren ¤t, - xmlNode &xmlRep); - - //! Called recursively by the public toNodeHierarchy() method - bool toNodeHierarchy(const xmlNode &parentNode, - CXmlNodeWithChildrenPool &pool, - CStringCache *cache, - CXmlNodeWithChildren::TXmlNodeWithChildrenP &nodePtr) const; - - //! Called on every error - static void errorHandler(void *ctxt, const char *msg, ...); - - private: - xmlDocPtr m_Doc; - xmlXPathContextPtr m_XPathContext; - - //! Pointer to the current node accessed via the navigation API - xmlNode *m_NavigatedNode; -}; - + if (CStringUtils::stringToType(value.value(), ret) == false) { + LOG_ERROR("Conversion error for " << xpath); + return false; + } + return true; + } + + //! Return result from an XPath expression + bool evalXPathExpression(const std::string&, TXmlNodeVec& values) const; + + //! Return result from an XPath expression + bool evalXPathExpression(const std::string&, TStrStrMap& values) const; + + //! Dump the document to stdout + void dumpToStdout() const; + + //! Dump the document to string + virtual std::string dumpToString() const; + + //! Convert a node hierarchy to XML. + //! (This will escape the text correctly.) + static void convert(const CXmlNodeWithChildren& root, std::string& result); + + //! Convert a node hierarchy to XML. + //! (This will escape the text correctly.) + //! The maximum number of spaces per indent is 10. + static void convert(size_t indentSpaces, const CXmlNodeWithChildren& root, std::string& result); + + //! Convert a map of name/value pairs to XML. + //! (This will escape the text correctly.) + //! Note root is the name of the enclosing value. + //! Where a token name contains an @ symbol , the text prior to the @ is + //! taken as the tag name, and the text following the @ is treated + //! as an attribute of the tag. For example, if the map key is + //! field@name=idle cpu % and the map value is 99 then this will + //! be converted to 99 + static void convert(const std::string& root, const TStrStrMap& values, std::string& result); + + //! As above, but with the ability to customise the number of spaces + //! per indent (up to a maximum of 10). + static void convert(size_t indentSpaces, const std::string& root, const TStrStrMap& values, std::string& result); + + //! Convert a map of name/value pairs to an XML + //! parser. + //! Note root is the name of the enclosing value + bool convert(const std::string& root, const TStrStrMap& values); + + //! Convert the entire XML document into a hierarchy of node objects. + //! This is much more efficient than making repeated calls to + //! evalXPathExpression() to retrieve the entire contents of a parsed + //! document. + virtual bool toNodeHierarchy(CXmlNodeWithChildren::TXmlNodeWithChildrenP& rootNodePtr) const; + + //! As above, but use a pool to avoid XML node memory allocations where possible + virtual bool toNodeHierarchy(CXmlNodeWithChildrenPool& pool, CXmlNodeWithChildren::TXmlNodeWithChildrenP& rootNodePtr) const; + + //! As above, but use a string cache to avoid string representation memory + //! allocations where possible + virtual bool toNodeHierarchy(CStringCache& cache, CXmlNodeWithChildren::TXmlNodeWithChildrenP& rootNodePtr) const; + + //! As above, but use both a node pool and a string cache + virtual bool + toNodeHierarchy(CXmlNodeWithChildrenPool& pool, CStringCache& cache, CXmlNodeWithChildren::TXmlNodeWithChildrenP& rootNodePtr) const; + + //! Functions for navigating an XML document without converting it to a + //! node hierarchy + virtual bool navigateRoot(); + virtual bool navigateFirstChild(); + virtual bool navigateNext(); + virtual bool navigateParent(); + virtual bool currentNodeName(std::string& name); + virtual bool currentNodeValue(std::string& value); + + //! Set root name + bool setRootNode(const std::string&); + + //! Add new child element (to root). + //! Restrict to simple value. + bool addNewChildNode(const std::string& name, const std::string& value); + + //! Add new child element (to root). + //! Allows attributes. + bool addNewChildNode(const std::string& name, const std::string& value, const TStrStrMap& attrs); + + //! Change the content of a child element (to root) + //! Restrict to simple value + bool changeChildNodeValue(const std::string& name, const std::string& newValue); + + //! Make sure a string is in the UTF-8 character set. + //! The XML parser is implemented internally using UTF-8, and will + //! throw a fatal error if faced with a byte sequence that's not + //! a valid UTF-8 character. Hence the need to do _something_, + //! even if it's not brilliant. + //! TODO - in the long term the whole application needs to have + //! proper support for different character sets. Once that work + //! is done, this function can be replaced with whatever we use + //! for the overall solution. + static bool stringLatin1ToUtf8(std::string& str); + +private: + void destroy(); + + //! Called recursively by the convert() method + static void convertChildren(const CXmlNodeWithChildren& current, xmlNode& xmlRep); + + //! Called recursively by the public toNodeHierarchy() method + bool toNodeHierarchy(const xmlNode& parentNode, + CXmlNodeWithChildrenPool& pool, + CStringCache* cache, + CXmlNodeWithChildren::TXmlNodeWithChildrenP& nodePtr) const; + + //! Called on every error + static void errorHandler(void* ctxt, const char* msg, ...); + +private: + xmlDocPtr m_Doc; + xmlXPathContextPtr m_XPathContext; + + //! Pointer to the current node accessed via the navigation API + xmlNode* m_NavigatedNode; +}; } } #endif // INCLUDED_ml_core_CXmlParser_h - diff --git a/include/core/CXmlParserIntf.h b/include/core/CXmlParserIntf.h index 61ae761027..8e4684cbe8 100644 --- a/include/core/CXmlParserIntf.h +++ b/include/core/CXmlParserIntf.h @@ -12,15 +12,11 @@ #include - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStringCache; class CXmlNodeWithChildrenPool; - //! \brief //! Polymorphic interface to XML parser classes. //! @@ -34,73 +30,67 @@ class CXmlNodeWithChildrenPool; //! more functionality is added to the RapidXml parser //! encapsulation. //! -class CORE_EXPORT CXmlParserIntf : private CNonCopyable -{ - public: - //! The that goes at the top of XML files - static const std::string XML_HEADER; - - public: - CXmlParserIntf(); - virtual ~CXmlParserIntf(); - - //! Parse XML stored in a string - virtual bool parseString(const std::string &xml) = 0; - - //! Parse XML stored in a char buffer - virtual bool parseBuffer(const char *begin, size_t length) = 0; - - //! Parse XML stored in a char buffer that may be modified by the - //! parsing and will outlive this object - virtual bool parseBufferInSitu(char *begin, size_t length) = 0; - - //! Return the root element name (empty string if not parsed yet) - virtual std::string rootElementName() const = 0; - - //! Dump the document to string - virtual std::string dumpToString() const = 0; - - //! Convert the entire XML document into a hierarchy of node objects. - //! This is much more efficient than making repeated calls to - //! evalXPathExpression() to retrieve the entire contents of a parsed - //! document. - virtual bool toNodeHierarchy(CXmlNodeWithChildren::TXmlNodeWithChildrenP &rootNodePtr) const = 0; - - //! As above, but use a pool to avoid XML node memory allocations where possible - virtual bool toNodeHierarchy(CXmlNodeWithChildrenPool &pool, - CXmlNodeWithChildren::TXmlNodeWithChildrenP &rootNodePtr) const = 0; - - //! As above, but use a string cache to avoid string representation memory - //! allocations where possible - virtual bool toNodeHierarchy(CStringCache &cache, - CXmlNodeWithChildren::TXmlNodeWithChildrenP &rootNodePtr) const = 0; - - //! As above, but use both a node pool and a string cache - virtual bool toNodeHierarchy(CXmlNodeWithChildrenPool &pool, - CStringCache &cache, - CXmlNodeWithChildren::TXmlNodeWithChildrenP &rootNodePtr) const = 0; - - //! Functions for navigating an XML document without converting it to a - //! node hierarchy - virtual bool navigateRoot() = 0; - virtual bool navigateFirstChild() = 0; - virtual bool navigateNext() = 0; - virtual bool navigateParent() = 0; - virtual bool currentNodeName(std::string &name) = 0; - virtual bool currentNodeValue(std::string &value) = 0; - - //! Replace characters that are not valid in an XML element name - //! with underscores - static std::string makeValidName(const std::string &str); - - //! Reformat a piece of XML to a single line. Useful for writing files - //! where each line contains a complete XML document. - static std::string toOneLine(const std::string &xml); +class CORE_EXPORT CXmlParserIntf : private CNonCopyable { +public: + //! The that goes at the top of XML files + static const std::string XML_HEADER; + +public: + CXmlParserIntf(); + virtual ~CXmlParserIntf(); + + //! Parse XML stored in a string + virtual bool parseString(const std::string& xml) = 0; + + //! Parse XML stored in a char buffer + virtual bool parseBuffer(const char* begin, size_t length) = 0; + + //! Parse XML stored in a char buffer that may be modified by the + //! parsing and will outlive this object + virtual bool parseBufferInSitu(char* begin, size_t length) = 0; + + //! Return the root element name (empty string if not parsed yet) + virtual std::string rootElementName() const = 0; + + //! Dump the document to string + virtual std::string dumpToString() const = 0; + + //! Convert the entire XML document into a hierarchy of node objects. + //! This is much more efficient than making repeated calls to + //! evalXPathExpression() to retrieve the entire contents of a parsed + //! document. + virtual bool toNodeHierarchy(CXmlNodeWithChildren::TXmlNodeWithChildrenP& rootNodePtr) const = 0; + + //! As above, but use a pool to avoid XML node memory allocations where possible + virtual bool toNodeHierarchy(CXmlNodeWithChildrenPool& pool, CXmlNodeWithChildren::TXmlNodeWithChildrenP& rootNodePtr) const = 0; + + //! As above, but use a string cache to avoid string representation memory + //! allocations where possible + virtual bool toNodeHierarchy(CStringCache& cache, CXmlNodeWithChildren::TXmlNodeWithChildrenP& rootNodePtr) const = 0; + + //! As above, but use both a node pool and a string cache + virtual bool toNodeHierarchy(CXmlNodeWithChildrenPool& pool, + CStringCache& cache, + CXmlNodeWithChildren::TXmlNodeWithChildrenP& rootNodePtr) const = 0; + + //! Functions for navigating an XML document without converting it to a + //! node hierarchy + virtual bool navigateRoot() = 0; + virtual bool navigateFirstChild() = 0; + virtual bool navigateNext() = 0; + virtual bool navigateParent() = 0; + virtual bool currentNodeName(std::string& name) = 0; + virtual bool currentNodeValue(std::string& value) = 0; + + //! Replace characters that are not valid in an XML element name + //! with underscores + static std::string makeValidName(const std::string& str); + + //! Reformat a piece of XML to a single line. Useful for writing files + //! where each line contains a complete XML document. + static std::string toOneLine(const std::string& xml); }; - - } } #endif // INCLUDED_ml_core_CXmlParserIntf_h - diff --git a/include/core/Constants.h b/include/core/Constants.h index 24a44bd2d3..208939f920 100644 --- a/include/core/Constants.h +++ b/include/core/Constants.h @@ -12,12 +12,9 @@ #include #include -namespace ml -{ -namespace core -{ -namespace constants -{ +namespace ml { +namespace core { +namespace constants { //! A minute in seconds. const core_t::TTime MINUTE{60}; @@ -60,7 +57,6 @@ const char PATH_SEPARATOR = '\\'; #else const char PATH_SEPARATOR = '/'; #endif - } } } diff --git a/include/core/CoreTypes.h b/include/core/CoreTypes.h index d95a55bc17..7dd22c8d9b 100644 --- a/include/core/CoreTypes.h +++ b/include/core/CoreTypes.h @@ -8,36 +8,28 @@ #include - -namespace ml -{ -namespace core_t -{ - +namespace ml { +namespace core_t { //! For now just use seconds as the ml time granularity //! This is a UTC value using TTime = time_t; - //! The standard line ending for the platform - DON'T make this std::string as //! that would cause many strings to be constructed (since the variable is //! static const at the namespace level, so is internal to each file this //! header is included in) #ifdef Windows -static const char *LINE_ENDING = "\r\n"; +static const char* LINE_ENDING = "\r\n"; #else #ifdef __GNUC__ // Tell g++ that it's reasonable that this variable isn't used -__attribute__ ((unused)) static const char *LINE_ENDING = "\n"; +__attribute__((unused)) static const char* LINE_ENDING = "\n"; #else -static const char *LINE_ENDING = "\n"; +static const char* LINE_ENDING = "\n"; #endif #endif - - } } #endif // INCLUDED_ml_core_t_CoreTypes_h - diff --git a/include/core/ImportExport.h b/include/core/ImportExport.h index c2cea7df5e..69c35c04e1 100644 --- a/include/core/ImportExport.h +++ b/include/core/ImportExport.h @@ -36,4 +36,3 @@ #endif #endif // INCLUDED_ml_core_ImportExport_h - diff --git a/include/core/LogMacros.h b/include/core/LogMacros.h index 53b3876c8d..4f04d5718a 100644 --- a/include/core/LogMacros.h +++ b/include/core/LogMacros.h @@ -36,11 +36,11 @@ #ifdef LOG_INFO #undef LOG_INFO #endif -#define LOG_INFO(message) LOG4CXX_INFO(ml::core::CLogger::instance().logger(), message) +#define LOG_INFO(message) LOG4CXX_INFO(ml::core::CLogger::instance().logger(), message) #ifdef LOG_WARN #undef LOG_WARN #endif -#define LOG_WARN(message) LOG4CXX_WARN(ml::core::CLogger::instance().logger(), message) +#define LOG_WARN(message) LOG4CXX_WARN(ml::core::CLogger::instance().logger(), message) #ifdef LOG_ERROR #undef LOG_ERROR #endif @@ -52,8 +52,9 @@ #ifdef LOG_ABORT #undef LOG_ABORT #endif -#define LOG_ABORT(message) LOG4CXX_FATAL(ml::core::CLogger::instance().logger(), message); \ - ml::core::CLogger::fatal() +#define LOG_ABORT(message) \ + LOG4CXX_FATAL(ml::core::CLogger::instance().logger(), message); \ + ml::core::CLogger::fatal() // Log at a level specified at runtime as a string, for example // LOG_AT_LEVEL("WARN", "Stay away from here " << username) @@ -62,4 +63,3 @@ #undef LOG_AT_LEVEL #endif #define LOG_AT_LEVEL(level, message) LOG4CXX_LOGLS(ml::core::CLogger::instance().logger(), log4cxx::Level::toLevel(level), message) - diff --git a/include/core/MainForServices.h b/include/core/MainForServices.h index cbf86c7784..984edc6b42 100644 --- a/include/core/MainForServices.h +++ b/include/core/MainForServices.h @@ -12,8 +12,7 @@ // Programs that can run as Windows services must have a function called // mlMain() - this is the forward declaration of it. -extern int mlMain(int argc, char *argv[]); - +extern int mlMain(int argc, char* argv[]); //! \brief //! Boilerplate implentation of the main() function for applications @@ -36,16 +35,14 @@ extern int mlMain(int argc, char *argv[]); //! Despite being in the core library include directory, this file //! should never be included in a library. //! -int main(int argc, char *argv[]) -{ - ml::core::CProcess &process = ml::core::CProcess::instance(); +int main(int argc, char* argv[]) { + ml::core::CProcess& process = ml::core::CProcess::instance(); // If this process is not running as a Windows service, this call will // immediately pass control to the application's own main() replacement. // If this process is running as a Windows service, the main thread will // become the service dispatcher thread. - if (process.startDispatcher(&mlMain, argc, argv) == false) - { + if (process.startDispatcher(&mlMain, argc, argv) == false) { return EXIT_FAILURE; } @@ -59,4 +56,3 @@ need to be run as a Windows service, in the Main.cc file. It appears that \ this rule has not been followed. #endif // INCLUDED_ml_core_MainForServices_h - diff --git a/include/core/RestoreMacros.h b/include/core/RestoreMacros.h index e1586e52cd..377233c368 100644 --- a/include/core/RestoreMacros.h +++ b/include/core/RestoreMacros.h @@ -7,66 +7,54 @@ #ifndef INCLUDED_ml_core_RestoreMacros_h #define INCLUDED_ml_core_RestoreMacros_h -namespace ml -{ -namespace core -{ - -#define RESTORE(tag, restore) \ - if (name == tag) \ - { \ - if ((restore) == false) \ - { \ - LOG_ERROR("Failed to restore " #tag ", got " << traverser.value()); \ - return false; \ - } \ - continue; \ - } - -#define RESTORE_BUILT_IN(tag, target) \ - if (name == tag) \ - { \ - if (core::CStringUtils::stringToType(traverser.value(), target) == false) \ - { \ - LOG_ERROR("Failed to restore " #tag ", got " << traverser.value()); \ - return false; \ - } \ - continue; \ - } - -#define RESTORE_BOOL(tag, target) \ - if (name == tag) \ - { \ - int value; \ - if (core::CStringUtils::stringToType(traverser.value(), value) == false) \ - { \ - LOG_ERROR("Failed to restore " #tag ", got " << traverser.value()); \ - return false; \ - } \ - target = (value != 0); \ - continue; \ - } - -#define RESTORE_SETUP_TEARDOWN(tag, setup, restore, teardown) \ - if (name == tag) \ - { \ - setup; \ - if ((restore) == false) \ - { \ - LOG_ERROR("Failed to restore " #tag ", got " << traverser.value()); \ - return false; \ - } \ - teardown; \ - continue; \ - } - -#define RESTORE_NO_ERROR(tag, restore) \ - if (name == tag) \ - { \ - restore; \ - continue; \ - } - +namespace ml { +namespace core { + +#define RESTORE(tag, restore) \ + if (name == tag) { \ + if ((restore) == false) { \ + LOG_ERROR("Failed to restore " #tag ", got " << traverser.value()); \ + return false; \ + } \ + continue; \ + } + +#define RESTORE_BUILT_IN(tag, target) \ + if (name == tag) { \ + if (core::CStringUtils::stringToType(traverser.value(), target) == false) { \ + LOG_ERROR("Failed to restore " #tag ", got " << traverser.value()); \ + return false; \ + } \ + continue; \ + } + +#define RESTORE_BOOL(tag, target) \ + if (name == tag) { \ + int value; \ + if (core::CStringUtils::stringToType(traverser.value(), value) == false) { \ + LOG_ERROR("Failed to restore " #tag ", got " << traverser.value()); \ + return false; \ + } \ + target = (value != 0); \ + continue; \ + } + +#define RESTORE_SETUP_TEARDOWN(tag, setup, restore, teardown) \ + if (name == tag) { \ + setup; \ + if ((restore) == false) { \ + LOG_ERROR("Failed to restore " #tag ", got " << traverser.value()); \ + return false; \ + } \ + teardown; \ + continue; \ + } + +#define RESTORE_NO_ERROR(tag, restore) \ + if (name == tag) { \ + restore; \ + continue; \ + } } } diff --git a/include/core/WindowsSafe.h b/include/core/WindowsSafe.h index 96c6fa8c3d..c5692dd66d 100644 --- a/include/core/WindowsSafe.h +++ b/include/core/WindowsSafe.h @@ -38,4 +38,3 @@ #endif #endif // INCLUDED_ml_core_WindowsSafe_h - diff --git a/include/maths/CAdaptiveBucketing.h b/include/maths/CAdaptiveBucketing.h index c4d5f3d135..764bc646fa 100644 --- a/include/maths/CAdaptiveBucketing.h +++ b/include/maths/CAdaptiveBucketing.h @@ -19,15 +19,12 @@ #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStateRestoreTraverser; class CStatePersistInserter; } -namespace maths -{ +namespace maths { //! \brief Common functionality used by our adaptive bucketing classes. //! @@ -68,181 +65,175 @@ namespace maths //! //! The bucketing is aged by relaxing it back towards uniform and //! aging the counts of the mean value for each bucket as usual. -class MATHS_EXPORT CAdaptiveBucketing -{ - public: - using TDoubleVec = std::vector; - using TFloatVec = std::vector; - using TFloatMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; - using TFloatMeanAccumulatorVec = std::vector; - - public: - //! Restore by traversing a state document - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); - - //! Persist by passing information to the supplied inserter. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - - protected: - CAdaptiveBucketing(double decayRate, double minimumBucketLength); - //! Construct by traversing a state document. - CAdaptiveBucketing(double decayRate, - double minimumBucketLength, - core::CStateRestoreTraverser &traverser); - virtual ~CAdaptiveBucketing() = default; - - //! Efficiently swap the contents of two bucketing objects. - void swap(CAdaptiveBucketing &other); - - //! Check if the bucketing has been initialized. - bool initialized() const; - - //! Create a new uniform bucketing with \p n buckets on the - //! interval [\p a, \p b]. - //! - //! \param[in] a The start of the interval to bucket. - //! \param[in] b The end of the interval to bucket. - //! \param[in] n The number of buckets. - bool initialize(double a, double b, std::size_t n); - - //! Add the function mean values \f$([a_i,b_i], m_i)\f$ where - //! \f$m_i\f$ are the means of the function in the time intervals - //! \f$([a+(i-1)l,b+il])\f$, \f$i\in[n]\f$ and \f$l=(b-a)/n\f$. - //! - //! \param[in] startTime The start of the period. - //! \param[in] endTime The start of the period. - //! \param[in] values The mean values in a regular subdivision - //! of [\p start,\p end]. - void initialValues(core_t::TTime startTime, - core_t::TTime endTime, - const TFloatMeanAccumulatorVec &values); - - //! Get the number of buckets. - std::size_t size() const; - - //! Clear the contents of this bucketing and recover any - //! allocated memory. - void clear(); - - //! Add the function value at \p time. - //! - //! \param[in] bucket The index of the bucket of \p time. - //! \param[in] time The time of \p value. - //! \param[in] weight The weight of function point. The smaller - //! this is the less influence it has on the bucket. - void add(std::size_t bucket, core_t::TTime time, double weight); - - //! Set the rate at which the bucketing loses information. - void decayRate(double value); - - //! Get the rate at which the bucketing loses information. - double decayRate() const; - - //! Age the force moments. - void age(double factor); - - //! Get the minimum permitted bucket length. - double minimumBucketLength() const; - - //! Refine the bucket end points to minimize the maximum averaging - //! error in any bucket. - //! - //! \param[in] time The time at which to refine. - void refine(core_t::TTime time); - - //! Get a set of knot points and knot point values to use for - //! interpolating the bucket values. - //! - //! \param[in] time The time at which to get the knot points. - //! \param[in] boundary Controls the style of start and end knots. - //! \param[out] knots Filled in with the knot points to interpolate. - //! \param[out] values Filled in with the values at \p knots. - //! \param[out] variances Filled in with the variances at \p knots. - //! \return True if there are sufficient knot points to interpolate - //! and false otherwise. - bool knots(core_t::TTime time, - CSplineTypes::EBoundaryCondition boundary, - TDoubleVec &knots, - TDoubleVec &values, - TDoubleVec &variances) const; - - //! Get the bucket end points. - const TFloatVec &endpoints() const; - - //! Get the bucket end points. - TFloatVec &endpoints(); - - //! Get the bucket value centres. - const TFloatVec ¢res() const; - - //! Get the bucket value centres. - TFloatVec ¢res(); - - //! Get the total count of in the bucketing. - double count() const; - - //! Get the bucket regressions. - TDoubleVec values(core_t::TTime time) const; - - //! Get the bucket variances. - TDoubleVec variances() const; - - //! Compute the index of the bucket to which \p time belongs - bool bucket(core_t::TTime time, std::size_t &result) const; - - //! Get a checksum for this object. - uint64_t checksum(uint64_t seed = 0) const; - - //! Get the memory used by this component - std::size_t memoryUsage() const; - - private: - //! Compute the values corresponding to the change in end - //! points from \p endpoints. The values are assigned based - //! on their intersection with each bucket in the previous - //! bucket configuration. - virtual void refresh(const TFloatVec &endpoints) = 0; - - //! Check if \p time is in the this component's window. - virtual bool inWindow(core_t::TTime time) const = 0; - - //! Add the function value at \p time. - virtual void add(std::size_t bucket, core_t::TTime time, double value, double weight) = 0; - - //! Get the offset w.r.t. the start of the bucketing of \p time. - virtual double offset(core_t::TTime time) const = 0; +class MATHS_EXPORT CAdaptiveBucketing { +public: + using TDoubleVec = std::vector; + using TFloatVec = std::vector; + using TFloatMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; + using TFloatMeanAccumulatorVec = std::vector; + +public: + //! Restore by traversing a state document + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); + + //! Persist by passing information to the supplied inserter. + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + +protected: + CAdaptiveBucketing(double decayRate, double minimumBucketLength); + //! Construct by traversing a state document. + CAdaptiveBucketing(double decayRate, double minimumBucketLength, core::CStateRestoreTraverser& traverser); + virtual ~CAdaptiveBucketing() = default; + + //! Efficiently swap the contents of two bucketing objects. + void swap(CAdaptiveBucketing& other); + + //! Check if the bucketing has been initialized. + bool initialized() const; + + //! Create a new uniform bucketing with \p n buckets on the + //! interval [\p a, \p b]. + //! + //! \param[in] a The start of the interval to bucket. + //! \param[in] b The end of the interval to bucket. + //! \param[in] n The number of buckets. + bool initialize(double a, double b, std::size_t n); + + //! Add the function mean values \f$([a_i,b_i], m_i)\f$ where + //! \f$m_i\f$ are the means of the function in the time intervals + //! \f$([a+(i-1)l,b+il])\f$, \f$i\in[n]\f$ and \f$l=(b-a)/n\f$. + //! + //! \param[in] startTime The start of the period. + //! \param[in] endTime The start of the period. + //! \param[in] values The mean values in a regular subdivision + //! of [\p start,\p end]. + void initialValues(core_t::TTime startTime, core_t::TTime endTime, const TFloatMeanAccumulatorVec& values); + + //! Get the number of buckets. + std::size_t size() const; + + //! Clear the contents of this bucketing and recover any + //! allocated memory. + void clear(); + + //! Add the function value at \p time. + //! + //! \param[in] bucket The index of the bucket of \p time. + //! \param[in] time The time of \p value. + //! \param[in] weight The weight of function point. The smaller + //! this is the less influence it has on the bucket. + void add(std::size_t bucket, core_t::TTime time, double weight); + + //! Set the rate at which the bucketing loses information. + void decayRate(double value); + + //! Get the rate at which the bucketing loses information. + double decayRate() const; + + //! Age the force moments. + void age(double factor); + + //! Get the minimum permitted bucket length. + double minimumBucketLength() const; + + //! Refine the bucket end points to minimize the maximum averaging + //! error in any bucket. + //! + //! \param[in] time The time at which to refine. + void refine(core_t::TTime time); + + //! Get a set of knot points and knot point values to use for + //! interpolating the bucket values. + //! + //! \param[in] time The time at which to get the knot points. + //! \param[in] boundary Controls the style of start and end knots. + //! \param[out] knots Filled in with the knot points to interpolate. + //! \param[out] values Filled in with the values at \p knots. + //! \param[out] variances Filled in with the variances at \p knots. + //! \return True if there are sufficient knot points to interpolate + //! and false otherwise. + bool knots(core_t::TTime time, + CSplineTypes::EBoundaryCondition boundary, + TDoubleVec& knots, + TDoubleVec& values, + TDoubleVec& variances) const; + + //! Get the bucket end points. + const TFloatVec& endpoints() const; + + //! Get the bucket end points. + TFloatVec& endpoints(); + + //! Get the bucket value centres. + const TFloatVec& centres() const; + + //! Get the bucket value centres. + TFloatVec& centres(); + + //! Get the total count of in the bucketing. + double count() const; + + //! Get the bucket regressions. + TDoubleVec values(core_t::TTime time) const; + + //! Get the bucket variances. + TDoubleVec variances() const; + + //! Compute the index of the bucket to which \p time belongs + bool bucket(core_t::TTime time, std::size_t& result) const; + + //! Get a checksum for this object. + uint64_t checksum(uint64_t seed = 0) const; + + //! Get the memory used by this component + std::size_t memoryUsage() const; + +private: + //! Compute the values corresponding to the change in end + //! points from \p endpoints. The values are assigned based + //! on their intersection with each bucket in the previous + //! bucket configuration. + virtual void refresh(const TFloatVec& endpoints) = 0; + + //! Check if \p time is in the this component's window. + virtual bool inWindow(core_t::TTime time) const = 0; + + //! Add the function value at \p time. + virtual void add(std::size_t bucket, core_t::TTime time, double value, double weight) = 0; + + //! Get the offset w.r.t. the start of the bucketing of \p time. + virtual double offset(core_t::TTime time) const = 0; - //! The count in \p bucket. - virtual double count(std::size_t bucket) const = 0; + //! The count in \p bucket. + virtual double count(std::size_t bucket) const = 0; - //! Get the predicted value for the \p bucket at \p time. - virtual double predict(std::size_t bucket, core_t::TTime time, double offset) const = 0; + //! Get the predicted value for the \p bucket at \p time. + virtual double predict(std::size_t bucket, core_t::TTime time, double offset) const = 0; - //! Get the variance of \p bucket. - virtual double variance(std::size_t bucket) const = 0; + //! Get the variance of \p bucket. + virtual double variance(std::size_t bucket) const = 0; - private: - //! The rate at which information is aged out of the bucket values. - double m_DecayRate; +private: + //! The rate at which information is aged out of the bucket values. + double m_DecayRate; - //! The minimum permitted bucket length if non-zero otherwise this - //! is ignored. - double m_MinimumBucketLength; + //! The minimum permitted bucket length if non-zero otherwise this + //! is ignored. + double m_MinimumBucketLength; - //! The bucket end points. - TFloatVec m_Endpoints; + //! The bucket end points. + TFloatVec m_Endpoints; - //! The mean periodic time of each regression. - TFloatVec m_Centres; + //! The mean periodic time of each regression. + TFloatVec m_Centres; - //! An IIR low pass filter for the total desired end point displacement - //! in refine. - TFloatMeanAccumulator m_LpForce; + //! An IIR low pass filter for the total desired end point displacement + //! in refine. + TFloatMeanAccumulator m_LpForce; - //! The total desired end point displacement in refine. - TFloatMeanAccumulator m_Force; + //! The total desired end point displacement in refine. + TFloatMeanAccumulator m_Force; }; - } } diff --git a/include/maths/CAgglomerativeClusterer.h b/include/maths/CAgglomerativeClusterer.h index a339d44939..668127837e 100644 --- a/include/maths/CAgglomerativeClusterer.h +++ b/include/maths/CAgglomerativeClusterer.h @@ -12,10 +12,8 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { //! \brief Implements optimum runtime agglomerative clustering for //! arbitrary distance matrices. @@ -38,101 +36,91 @@ namespace maths //! //! //! For other styles see https://en.wikipedia.org/wiki/Hierarchical_clustering#Agglomerative_clustering_example. -class MATHS_EXPORT CAgglomerativeClusterer -{ +class MATHS_EXPORT CAgglomerativeClusterer { +public: + using TDoubleVec = std::vector; + using TDoubleVecVec = std::vector; + using TSizeVec = std::vector; + using TSizeVecVec = std::vector; + using TDoubleSizeVecPr = std::pair; + using TDoubleSizeVecPrVec = std::vector; + + //! \brief A representation of a node in the tree of clusters. + class MATHS_EXPORT CNode { public: - using TDoubleVec = std::vector; - using TDoubleVecVec = std::vector; - using TSizeVec = std::vector; - using TSizeVecVec = std::vector; - using TDoubleSizeVecPr = std::pair; - using TDoubleSizeVecPrVec = std::vector; - - //! \brief A representation of a node in the tree of clusters. - class MATHS_EXPORT CNode - { - public: - //! Set the rightmost point below this node. - CNode(std::size_t index, double height); - - //! Add a child node and update connectivity. - bool addChild(CNode &child); - - //! Get the unique index of this node. - std::size_t index() const; - - //! Get the height of this node. - double height() const; - - //! Get the root of the branch containing this node. - //! - //! \note This is the root of the tree unless it is - //! under construction. - CNode &root(); - - //! Get the points in this node's cluster. - void points(TSizeVec &result) const; - - //! Get the joins and their heights. - void clusters(TDoubleSizeVecPrVec &result) const; - - //! Get the clustering at the specified height. - void clusteringAt(double height, TSizeVecVec &result) const; - - //! Get a debug representation of the branch rooted at - //! this node. - std::string print(const std::string &indent = std::string(" ")) const; - - private: - //! The parent cluster. - CNode *m_Parent; - //! The left child cluster. - CNode *m_LeftChild; - //! The right child cluster. - CNode *m_RightChild; - //! The unique index of this cluster. - std::size_t m_Index; - //! The height of this cluster, i.e. the value of the - //! objective function at which the cluster forms. - double m_Height; - }; - - using TNodeVec = std::vector; + //! Set the rightmost point below this node. + CNode(std::size_t index, double height); - public: - //! Possible clustering objective functions supported. - enum EObjective - { - E_Single, - E_Complete, - E_Average, - E_Weighted, - E_Ward - }; + //! Add a child node and update connectivity. + bool addChild(CNode& child); - public: - //! Setup the distance matrix from which to compute the - //! agglomerative clustering. - bool initialize(TDoubleVecVec &distanceMatrix); + //! Get the unique index of this node. + std::size_t index() const; + + //! Get the height of this node. + double height() const; + + //! Get the root of the branch containing this node. + //! + //! \note This is the root of the tree unless it is + //! under construction. + CNode& root(); - //! Run agglomerative clustering targeting \p objective - //! and build the cluster tree. - void run(EObjective objective, TNodeVec &tree); + //! Get the points in this node's cluster. + void points(TSizeVec& result) const; + + //! Get the joins and their heights. + void clusters(TDoubleSizeVecPrVec& result) const; + + //! Get the clustering at the specified height. + void clusteringAt(double height, TSizeVecVec& result) const; + + //! Get a debug representation of the branch rooted at + //! this node. + std::string print(const std::string& indent = std::string(" ")) const; private: - //! The distance matrix on the points to cluster. - TDoubleVecVec m_DistanceMatrix; - //! Filled in with the last object in each cluster to which - //! i'th point connects. - TSizeVec m_Pi; - //! Filled in with the lowest level at which the i'th point - //! is no longer the last object in its cluster. - TDoubleVec m_Lambda; - //! Holds a copy of a column of the distance matrix during - //! update point representation. - TDoubleVec m_M; + //! The parent cluster. + CNode* m_Parent; + //! The left child cluster. + CNode* m_LeftChild; + //! The right child cluster. + CNode* m_RightChild; + //! The unique index of this cluster. + std::size_t m_Index; + //! The height of this cluster, i.e. the value of the + //! objective function at which the cluster forms. + double m_Height; + }; + + using TNodeVec = std::vector; + +public: + //! Possible clustering objective functions supported. + enum EObjective { E_Single, E_Complete, E_Average, E_Weighted, E_Ward }; + +public: + //! Setup the distance matrix from which to compute the + //! agglomerative clustering. + bool initialize(TDoubleVecVec& distanceMatrix); + + //! Run agglomerative clustering targeting \p objective + //! and build the cluster tree. + void run(EObjective objective, TNodeVec& tree); + +private: + //! The distance matrix on the points to cluster. + TDoubleVecVec m_DistanceMatrix; + //! Filled in with the last object in each cluster to which + //! i'th point connects. + TSizeVec m_Pi; + //! Filled in with the lowest level at which the i'th point + //! is no longer the last object in its cluster. + TDoubleVec m_Lambda; + //! Holds a copy of a column of the distance matrix during + //! update point representation. + TDoubleVec m_M; }; - } } diff --git a/include/maths/CAnnotatedVector.h b/include/maths/CAnnotatedVector.h index 2fd6974ed5..6f853dab5b 100644 --- a/include/maths/CAnnotatedVector.h +++ b/include/maths/CAnnotatedVector.h @@ -13,61 +13,44 @@ #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { //! \brief A vector on to which data have been annotated. //! //! \tparam VECTOR The vector type. //! \tparam ANNOTATION The annotated data type. template -class CAnnotatedVector : public VECTOR -{ - public: - using TAnnotation = ANNOTATION; - using TCoordinate = typename SCoordinate::Type; - - //! See core::CMemory. - static bool dynamicSizeAlwaysZero() - { - return core::memory_detail::SDynamicSizeAlwaysZero::value() - && core::memory_detail::SDynamicSizeAlwaysZero::value(); - } - - public: - //! Construct with a vector and annotation data. - CAnnotatedVector(const VECTOR &vector = VECTOR(), - const ANNOTATION &annotation = ANNOTATION()) : - VECTOR(vector), - m_Annotation(annotation) - {} - - //! Construct with a vector initialized with \p coordinate - //! and some default constructed annotation data. - explicit CAnnotatedVector(TCoordinate coordinate) : - VECTOR(coordinate), - m_Annotation() - {} - - //! Get the annotation data by constant reference. - const ANNOTATION &annotation() const - { - return m_Annotation; - } - - //! Get the annotation data by reference. - ANNOTATION &annotation() - { - return m_Annotation; - } - - private: - //! The data which has been annotated onto the vector. - ANNOTATION m_Annotation; +class CAnnotatedVector : public VECTOR { +public: + using TAnnotation = ANNOTATION; + using TCoordinate = typename SCoordinate::Type; + + //! See core::CMemory. + static bool dynamicSizeAlwaysZero() { + return core::memory_detail::SDynamicSizeAlwaysZero::value() && + core::memory_detail::SDynamicSizeAlwaysZero::value(); + } + +public: + //! Construct with a vector and annotation data. + CAnnotatedVector(const VECTOR& vector = VECTOR(), const ANNOTATION& annotation = ANNOTATION()) + : VECTOR(vector), m_Annotation(annotation) {} + + //! Construct with a vector initialized with \p coordinate + //! and some default constructed annotation data. + explicit CAnnotatedVector(TCoordinate coordinate) : VECTOR(coordinate), m_Annotation() {} + + //! Get the annotation data by constant reference. + const ANNOTATION& annotation() const { return m_Annotation; } + + //! Get the annotation data by reference. + ANNOTATION& annotation() { return m_Annotation; } + +private: + //! The data which has been annotated onto the vector. + ANNOTATION m_Annotation; }; - } } diff --git a/include/maths/CAssignment.h b/include/maths/CAssignment.h index 99fa0baefd..9543a04f74 100644 --- a/include/maths/CAssignment.h +++ b/include/maths/CAssignment.h @@ -13,45 +13,40 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { //! \brief Implementation of algorithms for assignment problems. //! //! DESCRIPTION:\n //! Implements the Kuhn-Munkres algorithm to find the minimum //! cost perfect matching given an n x m cost matrix. -class MATHS_EXPORT CAssignment -{ - public: - using TDoubleVec = std::vector; - using TDoubleVecVec = std::vector; - using TSizeSizePr = std::pair; - using TSizeSizePrVec = std::vector; +class MATHS_EXPORT CAssignment { +public: + using TDoubleVec = std::vector; + using TDoubleVecVec = std::vector; + using TSizeSizePr = std::pair; + using TSizeSizePrVec = std::vector; - public: - //! \brief The Kuhn-Munkres algorithm for solving the - //! assignment problem. - //! - //! The assignment problem consists of a number of rows - //! along with a number of columns and a cost matrix which - //! gives the cost of assigning the i'th row to the j'th - //! column at position (i, j). The goal is to find a one-to-one - //! assignment of rows to columns whilst minimizing the total - //! cost of the assignment. - //! - //! \param[in] costs The cost matrix. - //! \param[out] matching Filled in with the optimal matching. - //! \warning The numbers of columns should be the same for - //! every row. - //! \note This implementation is O(m * n^2) where m is the - //! minimum of the # rows and columns and n is the maximum. - static bool kuhnMunkres(const TDoubleVecVec &costs, - TSizeSizePrVec &matching); +public: + //! \brief The Kuhn-Munkres algorithm for solving the + //! assignment problem. + //! + //! The assignment problem consists of a number of rows + //! along with a number of columns and a cost matrix which + //! gives the cost of assigning the i'th row to the j'th + //! column at position (i, j). The goal is to find a one-to-one + //! assignment of rows to columns whilst minimizing the total + //! cost of the assignment. + //! + //! \param[in] costs The cost matrix. + //! \param[out] matching Filled in with the optimal matching. + //! \warning The numbers of columns should be the same for + //! every row. + //! \note This implementation is O(m * n^2) where m is the + //! minimum of the # rows and columns and n is the maximum. + static bool kuhnMunkres(const TDoubleVecVec& costs, TSizeSizePrVec& matching); }; - } } #endif // INCLUDED_ml_maths_CAssignment_h diff --git a/include/maths/CBasicStatistics.h b/include/maths/CBasicStatistics.h index 5e9a9cb0fd..c3af5a5549 100644 --- a/include/maths/CBasicStatistics.h +++ b/include/maths/CBasicStatistics.h @@ -27,18 +27,17 @@ #include #include -namespace ml -{ -namespace maths -{ -namespace basic_statistics_detail -{ +namespace ml { +namespace maths { +namespace basic_statistics_detail { //! Default undefined custom add function for points to the covariance //! estimator. -template struct SCovariancesCustomAdd {}; +template +struct SCovariancesCustomAdd {}; //! Default undefined covariance matrix shrinkage estimator. -template struct SCovariancesLedoitWolf {}; +template +struct SCovariancesLedoitWolf {}; } //! \brief Some basic stats utilities. @@ -46,1690 +45,1392 @@ template struct SCovariancesLedoitWolf {}; //! DESCRIPTION:\n //! Some utilities for computing basic sample statistics such //! as central moments, covariance matrices and so on. -class MATHS_EXPORT CBasicStatistics -{ - public: - using TDoubleDoublePr = std::pair; - using TDoubleVec = std::vector; - - public: - //! Compute the mean of a pair. - static double mean(const TDoubleDoublePr &samples); - - //! Compute the vector mean of a pair. - template - static VECTOR mean(const std::pair &samples) - { - std::size_t n = std::min(samples.first.size(), samples.second.size()); - VECTOR result; - result.reserve(n); - for (std::size_t i = 0u; i < n; ++i) - { - result.push_back(0.5 * (samples.first[i] + samples.second[i])); - } - return result; +class MATHS_EXPORT CBasicStatistics { +public: + using TDoubleDoublePr = std::pair; + using TDoubleVec = std::vector; + +public: + //! Compute the mean of a pair. + static double mean(const TDoubleDoublePr& samples); + + //! Compute the vector mean of a pair. + template + static VECTOR mean(const std::pair& samples) { + std::size_t n = std::min(samples.first.size(), samples.second.size()); + VECTOR result; + result.reserve(n); + for (std::size_t i = 0u; i < n; ++i) { + result.push_back(0.5 * (samples.first[i] + samples.second[i])); } + return result; + } + + //! Compute the sample mean. + static double mean(const TDoubleVec& sample); + + //! Compute the sample median. + static double median(const TDoubleVec& dataIn); + + //! Compute the maximum of \p first, \p second and \p third. + template + static T max(T first, T second, T third) { + return first >= second ? (third >= first ? third : first) : (third >= second ? third : second); + } - //! Compute the sample mean. - static double mean(const TDoubleVec &sample); + //! Compute the minimum of \p first, \p second and \p third. + template + static T min(T first, T second, T third) { + return first <= second ? (third <= first ? third : first) : (third <= second ? third : second); + } + + /////////////////////////// ACCUMULATORS /////////////////////////// + + //! Delimiter used to persist basic types to a string. + static const char INTERNAL_DELIMITER; + + //! Delimiter used to persist central moments to a string buffer. + static const char EXTERNAL_DELIMITER; + + //! \brief An accumulator class for sample central moments. + //! + //! DESCRIPTION:\n + //! This function object accumulates sample central moments for a set + //! of samples passed to its function operator. + //! + //! It is capable of calculating the mean and the 2nd and 3rd central + //! moments. These can be used to calculate the sample mean, variance + //! and skewness. Free functions are defined to compute these. + //! + //! IMPLEMENTATION DECISIONS:\n + //! This is templatized to support, for example, float when space is at + //! a premium or something with higher precision than double when accuracy + //! is at a premium. The type T must technically be an infinite field + //! (in the mathematical sense). In particular, any floating point would + //! do, also custom rational or complex types would work provided they + //! overload "*" and "/" and define the necessary constructors; however, + //! integral types won't work. + //! + //! The number of moments computed is supplied as a template parameter + //! so exactly enough memory is allocated to store the results of the + //! calculations. It is up to the user to not pass bad values for this + //! parameter. Typedefs are provided to get hold of appropriate objects + //! for computing the mean, mean and variance and mean, variance and + //! skewness. Free functions are used to get hold of these quantities. + //! They are overloaded for explicit values of the number of moments to + //! provide compile time checking that the moment is available. + //! + //! We use recurrence relations for the higher order moments which + //! minimize the cancellation errors. These can be derived by considering,\n + //!
+    //!   \f$M(n, N) = \sum_{i=1}^{N}{ (x_i - M(1, N))^n } = \sum_{i=1}^{N}{ (x_i - M(1, N-1) + (M(1, N-1) - M(1, N)))^n }\f$
+    //! 
+ //! + //! where,\n + //! \f$M(1, N)\f$ is defined to be the sample mean of \f$N\f$ samples,\n + //! \f$n > 1\f$ for the higher order central moments, and\n + //! + //! Using these relations means that the lower order moments are used to + //! calculate the higher order moments, so we only allow the user to select + //! the highest order moment to compute. + //! + //! Note, that this is loosely modeled on the boost accumulator statistics + //! library. This makes use of boost::mpl which doesn't compile on all our + //! supported platforms. Also, it isn't very good at managing cancellation + //! errors. + //! + //! \tparam T The "floating point" type. + //! \tparam ORDER The highest order moment to gather. + template + struct SSampleCentralMoments : public std::unary_function { + using TCoordinate = typename SCoordinate::Type; + + //! See core::CMemory. + static bool dynamicSizeAlwaysZero() { return core::memory_detail::SDynamicSizeAlwaysZero::value(); } - //! Compute the sample median. - static double median(const TDoubleVec &dataIn); + explicit SSampleCentralMoments(const T& initial = T(0)) : s_Count(0) { std::fill_n(s_Moments, ORDER, initial); } - //! Compute the maximum of \p first, \p second and \p third. - template - static T max(T first, T second, T third) - { - return first >= second ? - (third >= first ? third : first) : - (third >= second ? third : second); + //! Copy construction from implicitly convertible type. + template + SSampleCentralMoments(const SSampleCentralMoments& other) : s_Count{other.s_Count} { + std::copy(other.s_Moments, other.s_Moments + ORDER, s_Moments); } - //! Compute the minimum of \p first, \p second and \p third. - template - static T min(T first, T second, T third) - { - return first <= second ? - (third <= first ? third : first) : - (third <= second ? third : second); + //! Assignment from implicitly convertible type. + template + const SSampleCentralMoments& operator=(const SSampleCentralMoments& other) { + s_Count = other.s_Count; + std::copy(other.s_Moments, other.s_Moments + ORDER, s_Moments); + return *this; } - /////////////////////////// ACCUMULATORS /////////////////////////// + //! \name Persistence + //@{ + //! Initialize from a delimited string. + bool fromDelimited(const std::string& str); - //! Delimiter used to persist basic types to a string. - static const char INTERNAL_DELIMITER; + //! Convert to a delimited string. + std::string toDelimited() const; + //@} - //! Delimiter used to persist central moments to a string buffer. - static const char EXTERNAL_DELIMITER; + //! Total order based on count then lexicographical less of moments. + bool operator<(const SSampleCentralMoments& rhs) const { + return s_Count < rhs.s_Count || + (s_Count == rhs.s_Count && + std::lexicographical_compare(s_Moments, s_Moments + ORDER, rhs.s_Moments, rhs.s_Moments + ORDER)); + } - //! \brief An accumulator class for sample central moments. - //! - //! DESCRIPTION:\n - //! This function object accumulates sample central moments for a set - //! of samples passed to its function operator. - //! - //! It is capable of calculating the mean and the 2nd and 3rd central - //! moments. These can be used to calculate the sample mean, variance - //! and skewness. Free functions are defined to compute these. - //! - //! IMPLEMENTATION DECISIONS:\n - //! This is templatized to support, for example, float when space is at - //! a premium or something with higher precision than double when accuracy - //! is at a premium. The type T must technically be an infinite field - //! (in the mathematical sense). In particular, any floating point would - //! do, also custom rational or complex types would work provided they - //! overload "*" and "/" and define the necessary constructors; however, - //! integral types won't work. - //! - //! The number of moments computed is supplied as a template parameter - //! so exactly enough memory is allocated to store the results of the - //! calculations. It is up to the user to not pass bad values for this - //! parameter. Typedefs are provided to get hold of appropriate objects - //! for computing the mean, mean and variance and mean, variance and - //! skewness. Free functions are used to get hold of these quantities. - //! They are overloaded for explicit values of the number of moments to - //! provide compile time checking that the moment is available. - //! - //! We use recurrence relations for the higher order moments which - //! minimize the cancellation errors. These can be derived by considering,\n - //!
-        //!   \f$M(n, N) = \sum_{i=1}^{N}{ (x_i - M(1, N))^n } = \sum_{i=1}^{N}{ (x_i - M(1, N-1) + (M(1, N-1) - M(1, N)))^n }\f$
-        //! 
- //! - //! where,\n - //! \f$M(1, N)\f$ is defined to be the sample mean of \f$N\f$ samples,\n - //! \f$n > 1\f$ for the higher order central moments, and\n - //! - //! Using these relations means that the lower order moments are used to - //! calculate the higher order moments, so we only allow the user to select - //! the highest order moment to compute. - //! - //! Note, that this is loosely modeled on the boost accumulator statistics - //! library. This makes use of boost::mpl which doesn't compile on all our - //! supported platforms. Also, it isn't very good at managing cancellation - //! errors. - //! - //! \tparam T The "floating point" type. - //! \tparam ORDER The highest order moment to gather. - template - struct SSampleCentralMoments : public std::unary_function - { - using TCoordinate = typename SCoordinate::Type; - - //! See core::CMemory. - static bool dynamicSizeAlwaysZero() - { - return core::memory_detail::SDynamicSizeAlwaysZero::value(); + //! \name Update + //@{ + //! Define a function operator for use with std:: algorithms. + inline void operator()(const T& x) { this->add(x); } + + //! Update the moments with the collection \p x. + template + void add(const std::vector& x) { + for (const auto& xi : x) { + this->add(xi); } + } - explicit SSampleCentralMoments(const T &initial = T(0)) : s_Count(0) - { - std::fill_n(s_Moments, ORDER, initial); + //! Update the moments with the collection \p x. + template + void add(const core::CSmallVector& x) { + for (const auto& xi : x) { + this->add(xi); } + } - //! Copy construction from implicitly convertible type. - template - SSampleCentralMoments(const SSampleCentralMoments &other) : - s_Count{other.s_Count} - { - std::copy(other.s_Moments, other.s_Moments + ORDER, s_Moments); + //! Update the moments with the collection \p x. + template + void add(const std::vector>& x) { + for (const auto& xi : x) { + this->operator+=(xi); } + } - //! Assignment from implicitly convertible type. - template - const SSampleCentralMoments & - operator=(const SSampleCentralMoments &other) - { - s_Count = other.s_Count; - std::copy(other.s_Moments, other.s_Moments + ORDER, s_Moments); - return *this; - } + //! Update with a generic value \p x. + template + void add(const U& x, const TCoordinate& n = TCoordinate{1}); - //! \name Persistence - //@{ - //! Initialize from a delimited string. - bool fromDelimited(const std::string &str); - - //! Convert to a delimited string. - std::string toDelimited() const; - //@} - - //! Total order based on count then lexicographical less of moments. - bool operator<(const SSampleCentralMoments &rhs) const - { - return s_Count < rhs.s_Count - || ( s_Count == rhs.s_Count - && std::lexicographical_compare(s_Moments, s_Moments + ORDER, - rhs.s_Moments, rhs.s_Moments + ORDER)); + //! Update the moments with \p x. \p n is the optional number + //! of times to add \p x. + void add(const T& x, const TCoordinate& n, int) { + if (n == TCoordinate{0}) { + return; } - //! \name Update - //@{ - //! Define a function operator for use with std:: algorithms. - inline void operator()(const T &x) - { - this->add(x); - } + s_Count += n; - //! Update the moments with the collection \p x. - template - void add(const std::vector &x) - { - for (const auto &xi : x) - { - this->add(xi); - } - } + // Note we don't trap the case alpha is less than epsilon, + // because then we'd have to compute epsilon and it is very + // unlikely the count will get big enough. + TCoordinate alpha{n / s_Count}; + TCoordinate beta{TCoordinate{1} - alpha}; + + T mean{s_Moments[0]}; + s_Moments[0] = beta * mean + alpha * x; + + if (ORDER > 1) { + T r{x - s_Moments[0]}; + T r2{r * r}; + T dMean{mean - s_Moments[0]}; + T dMean2{dMean * dMean}; + T variance{s_Moments[1]}; - //! Update the moments with the collection \p x. - template - void add(const core::CSmallVector &x) - { - for (const auto &xi : x) - { - this->add(xi); + s_Moments[1] = beta * (variance + dMean2) + alpha * r2; + + if (ORDER > 2) { + T skew{s_Moments[2]}; + T dSkew{(TCoordinate(3) * variance + dMean2) * dMean}; + + s_Moments[2] = beta * (skew + dSkew) + alpha * r2 * r; } } + } - //! Update the moments with the collection \p x. - template - void add(const std::vector> &x) - { - for (const auto &xi : x) - { - this->operator+=(xi); - } + //! Combine two moments. This is equivalent to running + //! a single accumulator on the entire collection. + template + const SSampleCentralMoments& operator+=(const SSampleCentralMoments& rhs) { + if (rhs.s_Count == TCoordinate{0}) { + return *this; } - //! Update with a generic value \p x. - template - void add(const U &x, const TCoordinate &n = TCoordinate{1}); - - //! Update the moments with \p x. \p n is the optional number - //! of times to add \p x. - void add(const T &x, const TCoordinate &n, int) - { - if (n == TCoordinate{0}) - { - return; - } + s_Count = s_Count + rhs.s_Count; - s_Count += n; + // Note we don't trap the case alpha is less than epsilon, + // because then we'd have to compute epsilon and it is very + // unlikely the count will get big enough. + TCoordinate alpha{rhs.s_Count / s_Count}; + TCoordinate beta{TCoordinate{1} - alpha}; - // Note we don't trap the case alpha is less than epsilon, - // because then we'd have to compute epsilon and it is very - // unlikely the count will get big enough. - TCoordinate alpha{n / s_Count}; - TCoordinate beta{TCoordinate{1} - alpha}; + T meanLhs{s_Moments[0]}; + T meanRhs{rhs.s_Moments[0]}; - T mean{s_Moments[0]}; - s_Moments[0] = beta * mean + alpha * x; + s_Moments[0] = beta * meanLhs + alpha * meanRhs; - if (ORDER > 1) - { - T r{x - s_Moments[0]}; - T r2{r * r}; - T dMean{mean - s_Moments[0]}; - T dMean2{dMean * dMean}; - T variance{s_Moments[1]}; + if (ORDER > 1) { + T dMeanLhs{meanLhs - s_Moments[0]}; + T dMean2Lhs{dMeanLhs * dMeanLhs}; + T varianceLhs{s_Moments[1]}; + T dMeanRhs{meanRhs - s_Moments[0]}; + T dMean2Rhs{dMeanRhs * dMeanRhs}; + T varianceRhs{rhs.s_Moments[1]}; - s_Moments[1] = beta * (variance + dMean2) + alpha * r2; + s_Moments[1] = beta * (varianceLhs + dMean2Lhs) + alpha * (varianceRhs + dMean2Rhs); - if (ORDER > 2) - { - T skew{s_Moments[2]}; - T dSkew{(TCoordinate(3) * variance + dMean2) * dMean}; + if (ORDER > 2) { + T skewLhs{s_Moments[2]}; + T dSkewLhs{(TCoordinate{3} * varianceLhs + dMean2Lhs) * dMeanLhs}; - s_Moments[2] = beta * (skew + dSkew) + alpha * r2 * r; - } + T skewRhs{rhs.s_Moments[2]}; + T dSkewRhs{(TCoordinate{3} * varianceRhs + dMean2Rhs) * dMeanRhs}; + + s_Moments[2] = beta * (skewLhs + dSkewLhs) + alpha * (skewRhs + dSkewRhs); } } - //! Combine two moments. This is equivalent to running - //! a single accumulator on the entire collection. - template - const SSampleCentralMoments &operator+=(const SSampleCentralMoments &rhs) - { - if (rhs.s_Count == TCoordinate{0}) - { - return *this; - } + return *this; + } - s_Count = s_Count + rhs.s_Count; + //! Combine two moments. This is equivalent to running + //! a single accumulator on the entire collection. + template + SSampleCentralMoments operator+(const SSampleCentralMoments& rhs) const { + SSampleCentralMoments result{*this}; + return result += rhs; + } - // Note we don't trap the case alpha is less than epsilon, - // because then we'd have to compute epsilon and it is very - // unlikely the count will get big enough. - TCoordinate alpha{rhs.s_Count / s_Count}; - TCoordinate beta{TCoordinate{1} - alpha}; + //! Subtract \p rhs from these. + //! + //! \note That this isn't always well defined. For example, + //! the count and variance of these moments must be larger + //! than \p rhs. The caller must ensure that these conditions + //! are satisfied. + template + const SSampleCentralMoments& operator-=(const SSampleCentralMoments& rhs) { + if (rhs.s_Count == TCoordinate{0}) { + return *this; + } - T meanLhs{s_Moments[0]}; - T meanRhs{rhs.s_Moments[0]}; + using std::max; - s_Moments[0] = beta * meanLhs + alpha * meanRhs; + s_Count = max(s_Count - rhs.s_Count, TCoordinate{0}); - if (ORDER > 1) - { - T dMeanLhs{meanLhs - s_Moments[0]}; - T dMean2Lhs{dMeanLhs * dMeanLhs}; - T varianceLhs{s_Moments[1]}; - T dMeanRhs{meanRhs - s_Moments[0]}; - T dMean2Rhs{dMeanRhs * dMeanRhs}; - T varianceRhs{rhs.s_Moments[1]}; + if (s_Count == TCoordinate{0}) { + std::fill_n(s_Moments, ORDER, T{0}); + return *this; + } - s_Moments[1] = beta * (varianceLhs + dMean2Lhs) - + alpha * (varianceRhs + dMean2Rhs); + // Note we don't trap the case alpha is less than epsilon, + // because then we'd have to compute epsilon and it is very + // unlikely the count will get big enough. + TCoordinate alpha{rhs.s_Count / s_Count}; + TCoordinate beta{TCoordinate{1} + alpha}; - if (ORDER > 2) - { - T skewLhs{s_Moments[2]}; - T dSkewLhs{(TCoordinate{3} * varianceLhs + dMean2Lhs) * dMeanLhs}; + T meanLhs{s_Moments[0]}; + T meanRhs{rhs.s_Moments[0]}; - T skewRhs{rhs.s_Moments[2]}; - T dSkewRhs{(TCoordinate{3} * varianceRhs + dMean2Rhs) * dMeanRhs}; + s_Moments[0] = beta * meanLhs - alpha * meanRhs; - s_Moments[2] = beta * (skewLhs + dSkewLhs) - + alpha * (skewRhs + dSkewRhs); - } - } + if (ORDER > 1) { + T dMeanLhs{s_Moments[0] - meanLhs}; + T dMean2Lhs{dMeanLhs * dMeanLhs}; + T dMeanRhs{meanRhs - meanLhs}; + T dMean2Rhs{dMeanRhs * dMeanRhs}; + T varianceRhs{rhs.s_Moments[1]}; - return *this; - } + s_Moments[1] = max(beta * (s_Moments[1] - dMean2Lhs) - alpha * (varianceRhs + dMean2Rhs - dMean2Lhs), T{0}); - //! Combine two moments. This is equivalent to running - //! a single accumulator on the entire collection. - template - SSampleCentralMoments operator+(const SSampleCentralMoments &rhs) const - { - SSampleCentralMoments result{*this}; - return result += rhs; - } + if (ORDER > 2) { + T skewLhs{s_Moments[2]}; + T dSkewLhs{(TCoordinate{3} * s_Moments[1] + dMean2Lhs) * dMeanLhs}; + T skewRhs{rhs.s_Moments[2]}; + T dSkewRhs{(TCoordinate{3} * varianceRhs + dMean2Rhs) * dMeanRhs}; - //! Subtract \p rhs from these. - //! - //! \note That this isn't always well defined. For example, - //! the count and variance of these moments must be larger - //! than \p rhs. The caller must ensure that these conditions - //! are satisfied. - template - const SSampleCentralMoments &operator-=(const SSampleCentralMoments &rhs) - { - if (rhs.s_Count == TCoordinate{0}) - { - return *this; + s_Moments[2] = beta * (skewLhs - dSkewLhs) - alpha * (skewRhs + dSkewRhs - dSkewLhs); } + } - using std::max; + return *this; + } - s_Count = max(s_Count - rhs.s_Count, TCoordinate{0}); + //! Subtract \p rhs from these. + //! + //! \note That this isn't always well defined. For example, + //! the count and variance of these moments must be larger + //! than \p rhs. The caller must ensure that these conditions + //! are satisfied. + template + SSampleCentralMoments operator-(const SSampleCentralMoments& rhs) const { + SSampleCentralMoments result{*this}; + return result -= rhs; + } - if (s_Count == TCoordinate{0}) - { - std::fill_n(s_Moments, ORDER, T{0}); - return *this; - } + //! Age the moments by reducing the count. + //! \note \p factor should be in the range [0,1]. + //! \note It must be possible to multiply T by double to use + //! this method. + void age(double factor) { s_Count = s_Count * TCoordinate{factor}; } + //@} - // Note we don't trap the case alpha is less than epsilon, - // because then we'd have to compute epsilon and it is very - // unlikely the count will get big enough. - TCoordinate alpha{rhs.s_Count / s_Count}; - TCoordinate beta{TCoordinate{1} + alpha}; - - T meanLhs{s_Moments[0]}; - T meanRhs{rhs.s_Moments[0]}; - - s_Moments[0] = beta * meanLhs - alpha * meanRhs; - - if (ORDER > 1) - { - T dMeanLhs{s_Moments[0] - meanLhs}; - T dMean2Lhs{dMeanLhs * dMeanLhs}; - T dMeanRhs{meanRhs - meanLhs}; - T dMean2Rhs{dMeanRhs * dMeanRhs}; - T varianceRhs{rhs.s_Moments[1]}; - - s_Moments[1] = max( beta * (s_Moments[1] - dMean2Lhs) - - alpha * (varianceRhs + dMean2Rhs - dMean2Lhs), T{0}); - - if (ORDER > 2) - { - T skewLhs{s_Moments[2]}; - T dSkewLhs{(TCoordinate{3} * s_Moments[1] + dMean2Lhs) * dMeanLhs}; - T skewRhs{rhs.s_Moments[2]}; - T dSkewRhs{(TCoordinate{3} * varianceRhs + dMean2Rhs) * dMeanRhs}; - - s_Moments[2] = beta * (skewLhs - dSkewLhs) - - alpha * (skewRhs + dSkewRhs - dSkewLhs); - } - } + //! Get a checksum for this object. + uint64_t checksum() const; - return *this; - } + TCoordinate s_Count; + T s_Moments[ORDER]; + }; - //! Subtract \p rhs from these. - //! - //! \note That this isn't always well defined. For example, - //! the count and variance of these moments must be larger - //! than \p rhs. The caller must ensure that these conditions - //! are satisfied. - template - SSampleCentralMoments operator-(const SSampleCentralMoments &rhs) const - { - SSampleCentralMoments result{*this}; - return result -= rhs; - } + //! \name Accumulator Typedefs + //@{ + //! Accumulator object to compute the sample mean. + template + struct SSampleMean { + using TAccumulator = SSampleCentralMoments; + }; - //! Age the moments by reducing the count. - //! \note \p factor should be in the range [0,1]. - //! \note It must be possible to multiply T by double to use - //! this method. - void age(double factor) - { - s_Count = s_Count * TCoordinate{factor}; - } - //@} + //! Accumulator object to compute the sample mean and variance. + template + struct SSampleMeanVar { + using TAccumulator = SSampleCentralMoments; + }; - //! Get a checksum for this object. - uint64_t checksum() const; + //! Accumulator object to compute the sample mean, variance and skewness. + template + struct SSampleMeanVarSkew { + using TAccumulator = SSampleCentralMoments; + }; + //@} + + //! \name Factory Functions + //@{ + //! Make a mean accumulator. + template + static SSampleCentralMoments accumulator(const U& count, const T& m1) { + SSampleCentralMoments result; + result.s_Count = count; + result.s_Moments[0] = m1; + return result; + } - TCoordinate s_Count; - T s_Moments[ORDER]; - }; + //! Make a mean and variance accumulator. + template + static SSampleCentralMoments accumulator(const U& count, const T& m1, const T& m2) { + SSampleCentralMoments result; + result.s_Count = count; + result.s_Moments[0] = m1; + result.s_Moments[1] = m2; + return result; + } - //! \name Accumulator Typedefs - //@{ - //! Accumulator object to compute the sample mean. - template - struct SSampleMean - { - using TAccumulator = SSampleCentralMoments; - }; - - //! Accumulator object to compute the sample mean and variance. - template - struct SSampleMeanVar - { - using TAccumulator = SSampleCentralMoments; - }; - - //! Accumulator object to compute the sample mean, variance and skewness. - template - struct SSampleMeanVarSkew - { - using TAccumulator = SSampleCentralMoments; - }; - //@} + //! Make a mean, variance and skew accumulator. + template + static SSampleCentralMoments accumulator(const U& count, const T& m1, const T& m2, const T& m3) { + SSampleCentralMoments result; + result.s_Count = count; + result.s_Moments[0] = m1; + result.s_Moments[1] = m2; + result.s_Moments[2] = m3; + return result; + } + //@} - //! \name Factory Functions - //@{ - //! Make a mean accumulator. - template - static SSampleCentralMoments - accumulator(const U &count, const T &m1) - { - SSampleCentralMoments result; - result.s_Count = count; - result.s_Moments[0] = m1; - return result; - } + //! Get the specified moment provided it exists + template + static const T& moment(const SSampleCentralMoments& accumulator) { + static_assert(M <= N, "M cannot be greater than N"); + return accumulator.s_Moments[M]; + } - //! Make a mean and variance accumulator. - template - static SSampleCentralMoments - accumulator(const U &count, const T &m1, const T &m2) - { - SSampleCentralMoments result; - result.s_Count = count; - result.s_Moments[0] = m1; - result.s_Moments[1] = m2; - return result; - } + //! Get the specified moment provided it exists + template + static T& moment(SSampleCentralMoments& accumulator) { + static_assert(M <= N, "M cannot be greater than N"); + return accumulator.s_Moments[M]; + } - //! Make a mean, variance and skew accumulator. - template - static SSampleCentralMoments - accumulator(const U &count, const T &m1, const T &m2, const T &m3) - { - SSampleCentralMoments result; - result.s_Count = count; - result.s_Moments[0] = m1; - result.s_Moments[1] = m2; - result.s_Moments[2] = m3; - return result; - } - //@} + //! Extract the count from an accumulator object. + template + static inline const typename SSampleCentralMoments::TCoordinate& count(const SSampleCentralMoments& accumulator) { + return accumulator.s_Count; + } - //! Get the specified moment provided it exists - template - static const T &moment(const SSampleCentralMoments &accumulator) - { - static_assert(M <= N, "M cannot be greater than N"); - return accumulator.s_Moments[M]; - } + //! Extract the count from an accumulator object. + template + static inline typename SSampleCentralMoments::TCoordinate& count(SSampleCentralMoments& accumulator) { + return accumulator.s_Count; + } - //! Get the specified moment provided it exists - template - static T &moment(SSampleCentralMoments &accumulator) - { - static_assert(M <= N, "M cannot be greater than N"); - return accumulator.s_Moments[M]; + //! Extract the counts from a vector of accumulators. + template + static core::CSmallVector::TCoordinate, N> + count(const core::CSmallVector, N>& accumulators) { + core::CSmallVector::TCoordinate, N> result; + result.reserve(accumulators.size()); + for (const auto& accumulator : accumulators) { + result.push_back(count(accumulator)); } + return result; + } - //! Extract the count from an accumulator object. - template - static inline const typename SSampleCentralMoments::TCoordinate & - count(const SSampleCentralMoments &accumulator) - { - return accumulator.s_Count; + //! Extract the counts from a vector of accumulators. + template + static std::vector::TCoordinate> + count(const std::vector>& accumulators) { + std::vector::TCoordinate> result; + result.reserve(accumulators.size()); + for (const auto& accumulator : accumulators) { + result.push_back(count(accumulator)); } + return result; + } - //! Extract the count from an accumulator object. - template - static inline typename SSampleCentralMoments::TCoordinate & - count(SSampleCentralMoments &accumulator) - { - return accumulator.s_Count; - } + //! Extract the mean from an accumulator object. + template + static inline const T& mean(const SSampleCentralMoments& accumulator) { + return accumulator.s_Moments[0]; + } - //! Extract the counts from a vector of accumulators. - template - static core::CSmallVector::TCoordinate, N> - count(const core::CSmallVector, N> &accumulators) - { - core::CSmallVector::TCoordinate, N> result; - result.reserve(accumulators.size()); - for (const auto &accumulator : accumulators) - { - result.push_back(count(accumulator)); - } - return result; + //! Extract the means from a vector of accumulators. + template + static core::CSmallVector::TCoordinate, N> + mean(const core::CSmallVector, N>& accumulators) { + core::CSmallVector::TCoordinate, N> result; + result.reserve(accumulators.size()); + for (const auto& accumulator : accumulators) { + result.push_back(mean(accumulator)); } + return result; + } - //! Extract the counts from a vector of accumulators. - template - static std::vector::TCoordinate> - count(const std::vector> &accumulators) - { - std::vector::TCoordinate> result; - result.reserve(accumulators.size()); - for (const auto &accumulator : accumulators) - { - result.push_back(count(accumulator)); - } - return result; + //! Extract the means from a vector of accumulators. + template + static std::vector::TCoordinate> + mean(const std::vector>& accumulators) { + std::vector::TCoordinate> result; + result.reserve(accumulators.size()); + for (const auto& accumulator : accumulators) { + result.push_back(mean(accumulator)); } + return result; + } - //! Extract the mean from an accumulator object. - template - static inline const T &mean(const SSampleCentralMoments &accumulator) - { - return accumulator.s_Moments[0]; - } + //! Extract the variance from an accumulator object. + //! + //! \note This is the unbiased form. + template + static inline T variance(const SSampleCentralMoments& accumulator) { + using TCoordinate = typename SSampleCentralMoments::TCoordinate; - //! Extract the means from a vector of accumulators. - template - static core::CSmallVector::TCoordinate, N> - mean(const core::CSmallVector, N> &accumulators) - { - core::CSmallVector::TCoordinate, N> result; - result.reserve(accumulators.size()); - for (const auto &accumulator : accumulators) - { - result.push_back(mean(accumulator)); - } - return result; - } + static_assert(N >= 2, "N must be at least 2"); - //! Extract the means from a vector of accumulators. - template - static std::vector::TCoordinate> - mean(const std::vector> &accumulators) - { - std::vector::TCoordinate> result; - result.reserve(accumulators.size()); - for (const auto &accumulator : accumulators) - { - result.push_back(mean(accumulator)); - } - return result; + if (accumulator.s_Count <= TCoordinate{1}) { + return T{0}; } - //! Extract the variance from an accumulator object. - //! - //! \note This is the unbiased form. - template - static inline T variance(const SSampleCentralMoments &accumulator) - { - using TCoordinate = typename SSampleCentralMoments::TCoordinate; - - static_assert(N >= 2, "N must be at least 2"); - - if (accumulator.s_Count <= TCoordinate{1}) - { - return T{0}; - } + TCoordinate bias{accumulator.s_Count / (accumulator.s_Count - TCoordinate{1})}; - TCoordinate bias{accumulator.s_Count / (accumulator.s_Count - TCoordinate{1})}; + return bias * accumulator.s_Moments[1]; + } - return bias * accumulator.s_Moments[1]; + //! Extract the variances from a vector of accumulators. + template + static core::CSmallVector::TCoordinate, N> + variance(const core::CSmallVector, N>& accumulators) { + core::CSmallVector::TCoordinate, N> result; + result.reserve(accumulators.size()); + for (const auto& accumulator : accumulators) { + result.push_back(variance(accumulator)); } + return result; + } - //! Extract the variances from a vector of accumulators. - template - static core::CSmallVector::TCoordinate, N> - variance(const core::CSmallVector, N> &accumulators) - { - core::CSmallVector::TCoordinate, N> result; - result.reserve(accumulators.size()); - for (const auto &accumulator : accumulators) - { - result.push_back(variance(accumulator)); - } - return result; + //! Extract the variances from a vector of accumulators. + template + static std::vector::TCoordinate> + variance(const std::vector>& accumulators) { + std::vector::TCoordinate> result; + result.reserve(accumulators.size()); + for (const auto& accumulator : accumulators) { + result.push_back(variance(accumulator)); } + return result; + } - //! Extract the variances from a vector of accumulators. - template - static std::vector::TCoordinate> - variance(const std::vector> &accumulators) - { - std::vector::TCoordinate> result; - result.reserve(accumulators.size()); - for (const auto &accumulator : accumulators) - { - result.push_back(variance(accumulator)); - } - return result; - } + //! Extract the maximum likelihood variance from an accumulator object. + //! + //! \note This is the biased form. + template + static inline const T& maximumLikelihoodVariance(const SSampleCentralMoments& accumulator) { + static_assert(N >= 2, "N must be at least 2"); + return accumulator.s_Moments[1]; + } - //! Extract the maximum likelihood variance from an accumulator object. - //! - //! \note This is the biased form. - template - static inline const T &maximumLikelihoodVariance(const SSampleCentralMoments &accumulator) - { - static_assert(N >= 2, "N must be at least 2"); - return accumulator.s_Moments[1]; + //! Extract the maximum likelihood variances from a vector of accumulators. + template + static core::CSmallVector::TCoordinate, N> + maximumLikelihoodVariance(const core::CSmallVector, N>& accumulators) { + core::CSmallVector::TCoordinate, N> result; + result.reserve(accumulators.size()); + for (const auto& accumulator : accumulators) { + result.push_back(maximumLikelihoodVariance(accumulator)); } + return result; + } - //! Extract the maximum likelihood variances from a vector of accumulators. - template - static core::CSmallVector::TCoordinate, N> - maximumLikelihoodVariance(const core::CSmallVector, N> &accumulators) - { - core::CSmallVector::TCoordinate, N> result; - result.reserve(accumulators.size()); - for (const auto &accumulator : accumulators) - { - result.push_back(maximumLikelihoodVariance(accumulator)); - } - return result; + //! Extract the maximum likelihood variances from a vector of accumulators. + template + static std::vector::TCoordinate> + maximumLikelihoodVariance(const std::vector>& accumulators) { + std::vector::TCoordinate> result; + result.reserve(accumulators.size()); + for (const auto& accumulator : accumulators) { + result.push_back(maximumLikelihoodVariance(accumulator)); } + return result; + } - //! Extract the maximum likelihood variances from a vector of accumulators. - template - static std::vector::TCoordinate> - maximumLikelihoodVariance(const std::vector> &accumulators) - { - std::vector::TCoordinate> result; - result.reserve(accumulators.size()); - for (const auto &accumulator : accumulators) - { - result.push_back(maximumLikelihoodVariance(accumulator)); - } - return result; - } + //! Extract the skewness from an accumulator object. + template + static inline T skewness(const SSampleCentralMoments& accumulator) { + using TCoordinate = typename SSampleCentralMoments::TCoordinate; - //! Extract the skewness from an accumulator object. - template - static inline T skewness(const SSampleCentralMoments &accumulator) - { - using TCoordinate = typename SSampleCentralMoments::TCoordinate; + static_assert(N >= 3, "N must be at least 3"); - static_assert(N >= 3, "N must be at least 3"); + if (accumulator.s_Count <= TCoordinate{2}) { + return T{0}; + } - if (accumulator.s_Count <= TCoordinate{2}) - { - return T{0}; - } + T normalization{variance(accumulator)}; + using std::sqrt; + normalization = normalization * sqrt(normalization); - T normalization{variance(accumulator)}; - using std::sqrt; - normalization = normalization * sqrt(normalization); + return accumulator.s_Moments[2] == T{0} ? T{0} : accumulator.s_Moments[2] / normalization; + } - return accumulator.s_Moments[2] == T{0} ? T{0} : accumulator.s_Moments[2] / normalization; + //! Extract the skewnesses from a vector of accumulators. + template + static core::CSmallVector::TCoordinate, N> + skewness(const core::CSmallVector, N>& accumulators) { + core::CSmallVector::TCoordinate, N> result; + result.reserve(accumulators.size()); + for (const auto& accumulator : accumulators) { + result.push_back(skewness(accumulator)); } + return result; + } - //! Extract the skewnesses from a vector of accumulators. - template - static core::CSmallVector::TCoordinate, N> - skewness(const core::CSmallVector, N> &accumulators) - { - core::CSmallVector::TCoordinate, N> result; - result.reserve(accumulators.size()); - for (const auto &accumulator : accumulators) - { - result.push_back(skewness(accumulator)); - } - return result; + //! Extract the skewnesses from a vector of accumulators. + template + static std::vector::TCoordinate> + skewness(const std::vector>& accumulators) { + std::vector::TCoordinate> result; + result.reserve(accumulators.size()); + for (const auto& accumulator : accumulators) { + result.push_back(skewness(accumulator)); } + return result; + } - //! Extract the skewnesses from a vector of accumulators. - template - static std::vector::TCoordinate> - skewness(const std::vector> &accumulators) - { - std::vector::TCoordinate> result; - result.reserve(accumulators.size()); - for (const auto &accumulator : accumulators) - { - result.push_back(skewness(accumulator)); - } - return result; - } + //! \name Print Functions + //@{ + //! Print a mean accumulator. + template + static inline std::string print(const SSampleCentralMoments& accumulator) { + std::ostringstream result; + result << '(' << count(accumulator) << ", " << mean(accumulator) << ')'; + return result.str(); + } + //! Print a mean and variance accumulator. + template + static inline std::string print(const SSampleCentralMoments& accumulator) { + std::ostringstream result; + result << '(' << count(accumulator) << ", " << mean(accumulator) << ", " << variance(accumulator) << ')'; + return result.str(); + } + //! Print a mean, variance and skew accumulator. + template + static inline std::string print(const SSampleCentralMoments& accumulator) { + std::ostringstream result; + result << '(' << count(accumulator) << ", " << mean(accumulator) << ", " << variance(accumulator) << ", " << skewness(accumulator) + << ')'; + return result.str(); + } + //@} - //! \name Print Functions - //@{ - //! Print a mean accumulator. - template - static inline std::string print(const SSampleCentralMoments &accumulator) - { - std::ostringstream result; - result << '(' << count(accumulator) - << ", " << mean(accumulator) << ')'; - return result.str(); - } - //! Print a mean and variance accumulator. - template - static inline std::string print(const SSampleCentralMoments &accumulator) - { - std::ostringstream result; - result << '(' << count(accumulator) - << ", " << mean(accumulator) - << ", " << variance(accumulator) << ')'; - return result.str(); - } - //! Print a mean, variance and skew accumulator. - template - static inline std::string print(const SSampleCentralMoments &accumulator) - { - std::ostringstream result; - result << '(' << count(accumulator) - << ", " << mean(accumulator) - << ", " << variance(accumulator) - << ", " << skewness(accumulator) << ')'; - return result.str(); - } - //@} + //! Get a copy of \p moments with count scaled by \p scale. + template + static SSampleCentralMoments scaled(SSampleCentralMoments accumulator, const U& scale) { + accumulator.s_Count *= typename SSampleCentralMoments::TCoordinate{scale}; + return accumulator; + } - //! Get a copy of \p moments with count scaled by \p scale. - template - static SSampleCentralMoments scaled(SSampleCentralMoments accumulator, - const U &scale) - { - accumulator.s_Count *= typename SSampleCentralMoments::TCoordinate{scale}; - return accumulator; - } + //! Get a copy of \p moments with count scaled by \p scale. + template + static void scale(const U& scale, SSampleCentralMoments& accumulator) { + accumulator.s_Count *= typename SSampleCentralMoments::TCoordinate{scale}; + } - //! Get a copy of \p moments with count scaled by \p scale. - template - static void scale(const U &scale, SSampleCentralMoments &accumulator) - { - accumulator.s_Count *= typename SSampleCentralMoments::TCoordinate{scale}; + //! \brief An accumulator class for vector sample mean and covariances. + //! + //! DESCRIPTION:\n + //! This function object accumulates sample mean and covariances for a + //! set of vector samples passed to its function operator. + //! + //! Free functions are defined to retrieve the mean vector and covariances + //! to match the behavior of SSampleCentralMoments. + //! + //! IMPLEMENTATION DECISIONS:\n + //! This is templatized to support, for example, float when space is at + //! a premium or long double when accuracy is at a premium. The type T + //! must technically be an infinite field (in the mathematical sense). + //! In particular, any floating point would do, also custom rational or + //! complex types would work provided they overload "*" and "/" and define + //! the necessary constructors; however, integral types won't work. + //! + //! This uses the same recurrence relations as SSampleCentralMoments so + //! see that class for more information on these. + //! + //! \tparam T The "floating point" type. + //! \tparam N The vector dimension. + template + struct SSampleCovariances : public std::unary_function, void> { + //! See core::CMemory. + static bool dynamicSizeAlwaysZero() { return core::memory_detail::SDynamicSizeAlwaysZero::value(); } + + using TVector = CVectorNx1; + using TMatrix = CSymmetricMatrixNxN; + + SSampleCovariances() : s_Count{0}, s_Mean{0}, s_Covariances{0} {} + + SSampleCovariances(T count, const TVector& mean, const TMatrix& covariances) + : s_Count{count}, s_Mean{mean}, s_Covariances{covariances} {} + + SSampleCovariances(const TVector& count, const TVector& mean, const TMatrix& covariances) + : s_Count{count}, s_Mean{mean}, s_Covariances{covariances} {} + + //! Copy construction from implicitly convertible type. + template + SSampleCovariances(const SSampleCovariances& other) + : s_Count{other.s_Count}, s_Mean{other.s_Mean}, s_Covariances{other.s_Covariances} {} + + //! Assignment from implicitly convertible type. + template + const SSampleCovariances& operator=(const SSampleCovariances& other) { + s_Count = other.s_Count; + s_Mean = other.s_Mean; + s_Covariances = other.s_Covariances; + return *this; } - //! \brief An accumulator class for vector sample mean and covariances. - //! - //! DESCRIPTION:\n - //! This function object accumulates sample mean and covariances for a - //! set of vector samples passed to its function operator. - //! - //! Free functions are defined to retrieve the mean vector and covariances - //! to match the behavior of SSampleCentralMoments. - //! - //! IMPLEMENTATION DECISIONS:\n - //! This is templatized to support, for example, float when space is at - //! a premium or long double when accuracy is at a premium. The type T - //! must technically be an infinite field (in the mathematical sense). - //! In particular, any floating point would do, also custom rational or - //! complex types would work provided they overload "*" and "/" and define - //! the necessary constructors; however, integral types won't work. - //! - //! This uses the same recurrence relations as SSampleCentralMoments so - //! see that class for more information on these. - //! - //! \tparam T The "floating point" type. - //! \tparam N The vector dimension. - template - struct SSampleCovariances : public std::unary_function, void> - { - //! See core::CMemory. - static bool dynamicSizeAlwaysZero() - { - return core::memory_detail::SDynamicSizeAlwaysZero::value(); - } + //! \name Persistence + //@{ + //! Initialize from a delimited string. + bool fromDelimited(std::string str); - using TVector = CVectorNx1; - using TMatrix = CSymmetricMatrixNxN; - - SSampleCovariances() : s_Count{0}, s_Mean{0}, s_Covariances{0} - {} - - SSampleCovariances(T count, - const TVector &mean, - const TMatrix &covariances) : - s_Count{count}, s_Mean{mean}, s_Covariances{covariances} - {} - - SSampleCovariances(const TVector &count, - const TVector &mean, - const TMatrix &covariances) : - s_Count{count}, s_Mean{mean}, s_Covariances{covariances} - {} - - //! Copy construction from implicitly convertible type. - template - SSampleCovariances(const SSampleCovariances &other) : - s_Count{other.s_Count}, - s_Mean{other.s_Mean}, - s_Covariances{other.s_Covariances} - {} - - //! Assignment from implicitly convertible type. - template - const SSampleCovariances &operator=(const SSampleCovariances &other) - { - s_Count = other.s_Count; - s_Mean = other.s_Mean; - s_Covariances = other.s_Covariances; - return *this; - } + //! Convert to a delimited string. + std::string toDelimited() const; + //@} - //! \name Persistence - //@{ - //! Initialize from a delimited string. - bool fromDelimited(std::string str); - - //! Convert to a delimited string. - std::string toDelimited() const; - //@} - - //! \name Update - //@{ - //! Define a function operator for use with std:: algorithms. - inline void operator()(const TVector &x) - { - this->add(x); + //! \name Update + //@{ + //! Define a function operator for use with std:: algorithms. + inline void operator()(const TVector& x) { this->add(x); } + + //! Update the moments with the collection \p x. + template + void add(const std::vector& x) { + for (const auto& xi : x) { + this->add(xi); } + } - //! Update the moments with the collection \p x. - template - void add(const std::vector &x) - { - for (const auto &xi : x) - { - this->add(xi); - } - } + //! Update with a generic point \p x. + template + void add(const POINT& x, const POINT& n = POINT(1)) { + basic_statistics_detail::SCovariancesCustomAdd::add(x, n, *this); + } - //! Update with a generic point \p x. - template - void add(const POINT &x, const POINT &n = POINT(1)) - { - basic_statistics_detail::SCovariancesCustomAdd::add(x, n, *this); + //! Update the mean and covariances with \p x. + void add(const TVector& x, const TVector& n, int) { + if (n == TVector{0}) { + return; } - //! Update the mean and covariances with \p x. - void add(const TVector &x, const TVector &n, int) - { - if (n == TVector{0}) - { - return; - } + s_Count += n; - s_Count += n; + // Note we don't trap the case alpha is less than epsilon, + // because then we'd have to compute epsilon and it is very + // unlikely the count will get big enough. + TVector alpha{n / s_Count}; + TVector beta{TVector{1} - alpha}; - // Note we don't trap the case alpha is less than epsilon, - // because then we'd have to compute epsilon and it is very - // unlikely the count will get big enough. - TVector alpha{n / s_Count}; - TVector beta{TVector{1} - alpha}; + TVector mean{s_Mean}; + s_Mean = beta * mean + alpha * x; - TVector mean{s_Mean}; - s_Mean = beta * mean + alpha * x; + TVector r{x - s_Mean}; + TMatrix r2{E_OuterProduct, r}; + TVector dMean{mean - s_Mean}; + TMatrix dMean2{E_OuterProduct, dMean}; - TVector r{x - s_Mean}; - TMatrix r2{E_OuterProduct, r}; - TVector dMean{mean - s_Mean}; - TMatrix dMean2{E_OuterProduct, dMean}; + s_Covariances += dMean2; + scaleCovariances(beta, s_Covariances); + scaleCovariances(alpha, r2); + s_Covariances += r2; + } - s_Covariances += dMean2; - scaleCovariances(beta, s_Covariances); - scaleCovariances(alpha, r2); - s_Covariances += r2; + //! Combine two moments. This is equivalent to running + //! a single accumulator on the entire collection. + template + const SSampleCovariances& operator+=(const SSampleCovariances& rhs) { + s_Count = s_Count + rhs.s_Count; + if (s_Count == TVector{0}) { + return *this; } - //! Combine two moments. This is equivalent to running - //! a single accumulator on the entire collection. - template - const SSampleCovariances &operator+=(const SSampleCovariances &rhs) - { - s_Count = s_Count + rhs.s_Count; - if (s_Count == TVector{0}) - { - return *this; - } + // Note we don't trap the case alpha is less than epsilon, + // because then we'd have to compute epsilon and it is very + // unlikely the count will get big enough. + TVector alpha{rhs.s_Count / s_Count}; + TVector beta{TVector(1) - alpha}; + + TVector meanLhs{s_Mean}; - // Note we don't trap the case alpha is less than epsilon, - // because then we'd have to compute epsilon and it is very - // unlikely the count will get big enough. - TVector alpha{rhs.s_Count / s_Count}; - TVector beta{TVector(1) - alpha}; + s_Mean = beta * meanLhs + alpha * rhs.s_Mean; - TVector meanLhs{s_Mean}; + TVector dMeanLhs{meanLhs - s_Mean}; + TMatrix dMean2Lhs{E_OuterProduct, dMeanLhs}; + TVector dMeanRhs{rhs.s_Mean - s_Mean}; + TMatrix dMean2Rhs{E_OuterProduct, dMeanRhs}; - s_Mean = beta * meanLhs + alpha * rhs.s_Mean; + s_Covariances += dMean2Lhs; + scaleCovariances(beta, s_Covariances); + dMean2Rhs += rhs.s_Covariances; + scaleCovariances(alpha, dMean2Rhs); + s_Covariances += dMean2Rhs; - TVector dMeanLhs{meanLhs - s_Mean}; - TMatrix dMean2Lhs{E_OuterProduct, dMeanLhs}; - TVector dMeanRhs{rhs.s_Mean - s_Mean}; - TMatrix dMean2Rhs{E_OuterProduct, dMeanRhs}; + return *this; + } - s_Covariances += dMean2Lhs; - scaleCovariances(beta, s_Covariances); - dMean2Rhs += rhs.s_Covariances; - scaleCovariances(alpha, dMean2Rhs); - s_Covariances += dMean2Rhs; + //! Combine two moments. This is equivalent to running + //! a single accumulator on the entire collection. + template + SSampleCovariances operator+(const SSampleCovariances& rhs) const { + SSampleCovariances result{*this}; + return result += rhs; + } + //! Subtract \p rhs from these. + //! + //! \note That this isn't always well defined. For example, + //! the count and variance of these covariances must be + //! larger than \p rhs. The caller must ensure that these + //! conditions are satisfied. + template + const SSampleCovariances& operator-=(const SSampleCovariances& rhs) { + using std::max; + + s_Count = max(s_Count - rhs.s_Count, TVector(0)); + if (s_Count == TVector{0}) { + s_Mean = TVector{0}; + s_Covariances = TMatrix{0}; return *this; } - //! Combine two moments. This is equivalent to running - //! a single accumulator on the entire collection. - template - SSampleCovariances operator+(const SSampleCovariances &rhs) const - { - SSampleCovariances result{*this}; - return result += rhs; - } - - //! Subtract \p rhs from these. - //! - //! \note That this isn't always well defined. For example, - //! the count and variance of these covariances must be - //! larger than \p rhs. The caller must ensure that these - //! conditions are satisfied. - template - const SSampleCovariances &operator-=(const SSampleCovariances &rhs) - { - using std::max; - - s_Count = max(s_Count - rhs.s_Count, TVector(0)); - if (s_Count == TVector{0}) - { - s_Mean = TVector{0}; - s_Covariances = TMatrix{0}; - return *this; - } - - // Note we don't trap the case alpha is less than epsilon, - // because then we'd have to compute epsilon and it is very - // unlikely the count will get big enough. - TVector alpha{rhs.s_Count / s_Count}; - TVector beta{TVector{1} + alpha}; - - TVector meanLhs(s_Mean); - - s_Mean = beta * meanLhs - alpha * rhs.s_Mean; - - TVector dMeanLhs{s_Mean - meanLhs}; - TMatrix dMean2Lhs{E_OuterProduct, dMeanLhs}; - TVector dMeanRhs{rhs.s_Mean - meanLhs}; - TMatrix dMean2Rhs{E_OuterProduct, dMeanRhs}; - - s_Covariances = s_Covariances - dMean2Lhs; - scaleCovariances(beta, s_Covariances); - dMean2Rhs += rhs.s_Covariances - dMean2Lhs; - scaleCovariances(alpha, dMean2Rhs); - s_Covariances -= dMean2Rhs; - - // If any of the diagonal elements are negative round them - // up to zero and zero the corresponding row and column. - for (std::size_t i = 0u; i < N; ++i) - { - if (s_Covariances(i, i) < T{0}) - { - for (std::size_t j = 0u; j < N; ++j) - { - s_Covariances(i, j) = T{0}; - } + // Note we don't trap the case alpha is less than epsilon, + // because then we'd have to compute epsilon and it is very + // unlikely the count will get big enough. + TVector alpha{rhs.s_Count / s_Count}; + TVector beta{TVector{1} + alpha}; + + TVector meanLhs(s_Mean); + + s_Mean = beta * meanLhs - alpha * rhs.s_Mean; + + TVector dMeanLhs{s_Mean - meanLhs}; + TMatrix dMean2Lhs{E_OuterProduct, dMeanLhs}; + TVector dMeanRhs{rhs.s_Mean - meanLhs}; + TMatrix dMean2Rhs{E_OuterProduct, dMeanRhs}; + + s_Covariances = s_Covariances - dMean2Lhs; + scaleCovariances(beta, s_Covariances); + dMean2Rhs += rhs.s_Covariances - dMean2Lhs; + scaleCovariances(alpha, dMean2Rhs); + s_Covariances -= dMean2Rhs; + + // If any of the diagonal elements are negative round them + // up to zero and zero the corresponding row and column. + for (std::size_t i = 0u; i < N; ++i) { + if (s_Covariances(i, i) < T{0}) { + for (std::size_t j = 0u; j < N; ++j) { + s_Covariances(i, j) = T{0}; } } - - return *this; - } - - //! Subtract \p rhs from these. - //! - //! \note That this isn't always well defined. For example, - //! the count and variance of these covariances must be - //! larger than \p rhs. The caller must ensure that these - //! conditions are satisfied. - template - SSampleCovariances operator-(const SSampleCovariances &rhs) - { - SSampleCovariances result{*this}; - return result -= rhs; } - //! Age the mean and covariances by reducing the count. - //! - //! \note \p factor should be in the range [0,1]. - //! \note It must be possible to cast double to T to use - //! this method. - void age(double factor) - { - s_Count = s_Count * T{factor}; - } - //@} - - //! Get a checksum for this object. - uint64_t checksum() const; - - TVector s_Count; - TVector s_Mean; - TMatrix s_Covariances; - }; - - //! Make a covariances accumulator. - template - static inline SSampleCovariances accumulator(T count, - const CVectorNx1 &mean, - const CSymmetricMatrixNxN &covariances) - { - return SSampleCovariances(count, mean, covariances); + return *this; } - //! Make a covariances accumulator. - template - static inline SSampleCovariances accumulator(const CVectorNx1 &count, - const CVectorNx1 &mean, - const CSymmetricMatrixNxN &covariances) - { - return SSampleCovariances(count, mean, covariances); + //! Subtract \p rhs from these. + //! + //! \note That this isn't always well defined. For example, + //! the count and variance of these covariances must be + //! larger than \p rhs. The caller must ensure that these + //! conditions are satisfied. + template + SSampleCovariances operator-(const SSampleCovariances& rhs) { + SSampleCovariances result{*this}; + return result -= rhs; } - //! Extract the count from an accumulator object. - template - static inline T count(const SSampleCovariances &accumulator) - { - return accumulator.s_Count.L1() / static_cast(N); - } + //! Age the mean and covariances by reducing the count. + //! + //! \note \p factor should be in the range [0,1]. + //! \note It must be possible to cast double to T to use + //! this method. + void age(double factor) { s_Count = s_Count * T{factor}; } + //@} - //! Extract the mean vector from an accumulator object. - template - static inline const CVectorNx1 &mean(const SSampleCovariances &accumulator) - { - return accumulator.s_Mean; - } + //! Get a checksum for this object. + uint64_t checksum() const; - //! Extract the covariance matrix from an accumulator object. - //! - //! \note This is the unbiased form. - template - static inline CSymmetricMatrixNxN covariances(const SSampleCovariances &accumulator) - { - CVectorNx1 bias(accumulator.s_Count); - for (std::size_t i = 0u; i < N; ++i) - { - if (bias(i) <= T{1}) - { - bias(i) = T{0}; - } + TVector s_Count; + TVector s_Mean; + TMatrix s_Covariances; + }; + + //! Make a covariances accumulator. + template + static inline SSampleCovariances + accumulator(T count, const CVectorNx1& mean, const CSymmetricMatrixNxN& covariances) { + return SSampleCovariances(count, mean, covariances); + } + + //! Make a covariances accumulator. + template + static inline SSampleCovariances + accumulator(const CVectorNx1& count, const CVectorNx1& mean, const CSymmetricMatrixNxN& covariances) { + return SSampleCovariances(count, mean, covariances); + } + + //! Extract the count from an accumulator object. + template + static inline T count(const SSampleCovariances& accumulator) { + return accumulator.s_Count.L1() / static_cast(N); + } + + //! Extract the mean vector from an accumulator object. + template + static inline const CVectorNx1& mean(const SSampleCovariances& accumulator) { + return accumulator.s_Mean; + } + + //! Extract the covariance matrix from an accumulator object. + //! + //! \note This is the unbiased form. + template + static inline CSymmetricMatrixNxN covariances(const SSampleCovariances& accumulator) { + CVectorNx1 bias(accumulator.s_Count); + for (std::size_t i = 0u; i < N; ++i) { + if (bias(i) <= T{1}) { + bias(i) = T{0}; } - bias /= (bias - CVectorNx1{1}); - CSymmetricMatrixNxN result{accumulator.s_Covariances}; - scaleCovariances(bias, result); - return result; } + bias /= (bias - CVectorNx1{1}); + CSymmetricMatrixNxN result{accumulator.s_Covariances}; + scaleCovariances(bias, result); + return result; + } - //! Extract the covariance matrix from an accumulator object. - //! - //! \note This is the unbiased form. - template - static inline const CSymmetricMatrixNxN &maximumLikelihoodCovariances(const SSampleCovariances &accumulator) - { - return accumulator.s_Covariances; - } + //! Extract the covariance matrix from an accumulator object. + //! + //! \note This is the unbiased form. + template + static inline const CSymmetricMatrixNxN& maximumLikelihoodCovariances(const SSampleCovariances& accumulator) { + return accumulator.s_Covariances; + } - //! Print a covariances accumulator. - template - static inline std::string print(const SSampleCovariances &accumulator) - { - std::ostringstream result; - result << "\n{\n" - << count(accumulator) << ",\n" - << mean(accumulator) << ",\n" - << covariances(accumulator) << "\n" - << "}"; - return result.str(); - } + //! Print a covariances accumulator. + template + static inline std::string print(const SSampleCovariances& accumulator) { + std::ostringstream result; + result << "\n{\n" + << count(accumulator) << ",\n" + << mean(accumulator) << ",\n" + << covariances(accumulator) << "\n" + << "}"; + return result.str(); + } - //! Interface for Ledoit Wolf shrinkage estimator of the sample - //! covariance matrix. - //! - //! See http://perso.ens-lyon.fr/patrick.flandrin/LedoitWolf_JMA2004.pdf - //! for the details. - //! - //! \param[in] points The points for which to estimate the covariance - //! matrix. - //! \param[out] result Filled in with the count, mean and "shrunk" - //! covariance matrix estimate. - template - static void covariancesLedoitWolf(const std::vector &points, - SSampleCovariances &result) - { - result.add(points); - basic_statistics_detail::SCovariancesLedoitWolf::estimate(points, result); - } + //! Interface for Ledoit Wolf shrinkage estimator of the sample + //! covariance matrix. + //! + //! See http://perso.ens-lyon.fr/patrick.flandrin/LedoitWolf_JMA2004.pdf + //! for the details. + //! + //! \param[in] points The points for which to estimate the covariance + //! matrix. + //! \param[out] result Filled in with the count, mean and "shrunk" + //! covariance matrix estimate. + template + static void covariancesLedoitWolf(const std::vector& points, SSampleCovariances& result) { + result.add(points); + basic_statistics_detail::SCovariancesLedoitWolf::estimate(points, result); + } - private: - //! \brief Implementation of an accumulator class for order statistics. - //! - //! DESCRIPTION:\n - //! This implements the underlying algorithm for determining the first - //! n order statistics online. - //! - //! IMPLEMENTATION:\n - //! This maintains the statistics in a heap for worst case complexity - //! \f$O(N log(n))\f$ and typical complexity \f$O(N)\f$ (by checking - //! against the maximum value) for \f$n << N\f$. - //! - //! The container is supplied as a template argument so that a fixed - //! size array can be used for the case n is small. Similarly the less - //! function is supplied so that T can be any type which supports a - //! partial ordering. (T must also have a default constructor.) - template - class COrderStatisticsImpl : public std::unary_function - { - public: - using iterator = typename CONTAINER::iterator; - using const_iterator = typename CONTAINER::const_iterator; - using reverse_iterator = typename CONTAINER::reverse_iterator; - using const_reverse_iterator = typename CONTAINER::const_reverse_iterator; - - public: - COrderStatisticsImpl(const CONTAINER &statistics, const LESS &less) : - m_Less(less), - m_Statistics(statistics), - m_UnusedCount(statistics.size()) - {} - - //! \name Persistence - //@{ - //! Initialize from a delimited string. - bool fromDelimited(const std::string &value); - - //! Convert to a delimited string. - std::string toDelimited() const; - //@} - - //! \name Update - //@{ - //! Define a function operator for use with std:: algorithms. - inline bool operator()(const T &x) - { - return this->add(x); - } +private: + //! \brief Implementation of an accumulator class for order statistics. + //! + //! DESCRIPTION:\n + //! This implements the underlying algorithm for determining the first + //! n order statistics online. + //! + //! IMPLEMENTATION:\n + //! This maintains the statistics in a heap for worst case complexity + //! \f$O(N log(n))\f$ and typical complexity \f$O(N)\f$ (by checking + //! against the maximum value) for \f$n << N\f$. + //! + //! The container is supplied as a template argument so that a fixed + //! size array can be used for the case n is small. Similarly the less + //! function is supplied so that T can be any type which supports a + //! partial ordering. (T must also have a default constructor.) + template + class COrderStatisticsImpl : public std::unary_function { + public: + using iterator = typename CONTAINER::iterator; + using const_iterator = typename CONTAINER::const_iterator; + using reverse_iterator = typename CONTAINER::reverse_iterator; + using const_reverse_iterator = typename CONTAINER::const_reverse_iterator; - //! Check if we would add \p x. - bool wouldAdd(const T &x) const - { - return m_UnusedCount > 0 || m_Less(x, *this->begin()); - } + public: + COrderStatisticsImpl(const CONTAINER& statistics, const LESS& less) + : m_Less(less), m_Statistics(statistics), m_UnusedCount(statistics.size()) {} - //! Update the statistics with the collection \p x. - bool add(const std::vector &x) - { - bool result = false; - for (const auto &xi : x) - { - result |= this->add(xi); - } - return result; - } + //! \name Persistence + //@{ + //! Initialize from a delimited string. + bool fromDelimited(const std::string& value); - //! Update the statistic with \p n copies of \p x. - bool add(const T &x, std::size_t n) - { - bool result = false; - for (std::size_t i = 0u; i < std::min(n, m_Statistics.size()); ++i) - { - result |= this->add(x); - } - return result; - } + //! Convert to a delimited string. + std::string toDelimited() const; + //@} - //! Update the statistics with \p x. -#if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ == 3) - __attribute__ ((__noinline__)) -#endif // defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ == 3) - bool add(const T &x) - { - if (m_UnusedCount > 0) - { - m_Statistics[--m_UnusedCount] = x; - - if (m_UnusedCount == 0) - { - // We need a heap for subsequent insertion. - std::make_heap(this->begin(), this->end(), m_Less); - } - return true; - } - else if (m_Less(x, *this->begin())) - { - // We need to drop the largest value and update the heap. - std::pop_heap(this->begin(), this->end(), m_Less); - m_Statistics.back() = x; - std::push_heap(this->begin(), this->end(), m_Less); - return true; - } - return false; - } + //! \name Update + //@{ + //! Define a function operator for use with std:: algorithms. + inline bool operator()(const T& x) { return this->add(x); } - //! An efficient sort of the statistics (which are not stored - //! in sorted order during accumulation for efficiency). - void sort() - { - if (m_UnusedCount > 0) - { - std::sort(this->begin(), this->end(), m_Less); - } - else - { - std::sort_heap(this->begin(), this->end(), m_Less); - } - } + //! Check if we would add \p x. + bool wouldAdd(const T& x) const { return m_UnusedCount > 0 || m_Less(x, *this->begin()); } - //! Age the values by scaling them. - //! \note \p factor should be in the range (0,1]. - //! \note It must be possible to multiply T by double to use - //! this method. - void age(double factor) - { - if (this->count() == 0) - { - return; - } + //! Update the statistics with the collection \p x. + bool add(const std::vector& x) { + bool result = false; + for (const auto& xi : x) { + result |= this->add(xi); + } + return result; + } - // Check if we need to multiply or divide by the factor. - T tmp{(*this)[0]}; - if (m_Less(static_cast(tmp * factor), tmp)) - { - factor = 1.0 / factor; - } + //! Update the statistic with \p n copies of \p x. + bool add(const T& x, std::size_t n) { + bool result = false; + for (std::size_t i = 0u; i < std::min(n, m_Statistics.size()); ++i) { + result |= this->add(x); + } + return result; + } - for (iterator i = this->begin(); i != this->end(); ++i) - { - *i = static_cast((*i) * factor); - } - } - //@} - - //! \name Access - //@{ - //! Get the "biggest" in the collection. This depends on the - //! order predicate and is effectively the first value which - //! will be removed if a new value displaces it. - inline const T &biggest() const - { - return m_UnusedCount > 0 ? - *std::max_element(this->begin(), - this->end(), m_Less) : *this->begin(); +//! Update the statistics with \p x. +#if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ == 3) + __attribute__((__noinline__)) +#endif // defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ == 3) + bool + add(const T& x) { + if (m_UnusedCount > 0) { + m_Statistics[--m_UnusedCount] = x; + + if (m_UnusedCount == 0) { + // We need a heap for subsequent insertion. + std::make_heap(this->begin(), this->end(), m_Less); } + return true; + } else if (m_Less(x, *this->begin())) { + // We need to drop the largest value and update the heap. + std::pop_heap(this->begin(), this->end(), m_Less); + m_Statistics.back() = x; + std::push_heap(this->begin(), this->end(), m_Less); + return true; + } + return false; + } - //! Get the number of statistics. - inline std::size_t count() const - { - return m_Statistics.size() - m_UnusedCount; - } + //! An efficient sort of the statistics (which are not stored + //! in sorted order during accumulation for efficiency). + void sort() { + if (m_UnusedCount > 0) { + std::sort(this->begin(), this->end(), m_Less); + } else { + std::sort_heap(this->begin(), this->end(), m_Less); + } + } - //! Get the i'th statistic. - inline T &operator[](std::size_t i) - { - return m_Statistics[m_UnusedCount + i]; - } - //! Get the i'th statistic. - inline const T &operator[](std::size_t i) const - { - return m_Statistics[m_UnusedCount + i]; - } + //! Age the values by scaling them. + //! \note \p factor should be in the range (0,1]. + //! \note It must be possible to multiply T by double to use + //! this method. + void age(double factor) { + if (this->count() == 0) { + return; + } - //! Get an iterator over the statistics. - inline iterator begin() - { - return m_Statistics.begin() + m_UnusedCount; - } - //! Get an iterator over the statistics. - inline const_iterator begin() const - { - return m_Statistics.begin() + m_UnusedCount; - } + // Check if we need to multiply or divide by the factor. + T tmp{(*this)[0]}; + if (m_Less(static_cast(tmp * factor), tmp)) { + factor = 1.0 / factor; + } - //! Get a reverse iterator over the order statistics. - inline reverse_iterator rbegin() - { - return m_Statistics.rbegin(); - } - //! Get a reverse iterator over the order statistics. - inline const_reverse_iterator rbegin() const - { - return m_Statistics.rbegin(); - } + for (iterator i = this->begin(); i != this->end(); ++i) { + *i = static_cast((*i) * factor); + } + } + //@} - //! Get an iterator representing the end of the statistics. - inline iterator end() - { - return m_Statistics.end(); - } - //! Get an iterator representing the end of the statistics. - inline const_iterator end() const - { - return m_Statistics.end(); - } + //! \name Access + //@{ + //! Get the "biggest" in the collection. This depends on the + //! order predicate and is effectively the first value which + //! will be removed if a new value displaces it. + inline const T& biggest() const { + return m_UnusedCount > 0 ? *std::max_element(this->begin(), this->end(), m_Less) : *this->begin(); + } - //! Get an iterator representing the end of the statistics. - inline reverse_iterator rend() - { - return m_Statistics.rbegin() + m_UnusedCount; - } - //! Get an iterator representing the end of the statistics. - inline const_reverse_iterator rend() const - { - return m_Statistics.rbegin() + m_UnusedCount; - } - //@} + //! Get the number of statistics. + inline std::size_t count() const { return m_Statistics.size() - m_UnusedCount; } + + //! Get the i'th statistic. + inline T& operator[](std::size_t i) { return m_Statistics[m_UnusedCount + i]; } + //! Get the i'th statistic. + inline const T& operator[](std::size_t i) const { return m_Statistics[m_UnusedCount + i]; } + + //! Get an iterator over the statistics. + inline iterator begin() { return m_Statistics.begin() + m_UnusedCount; } + //! Get an iterator over the statistics. + inline const_iterator begin() const { return m_Statistics.begin() + m_UnusedCount; } + + //! Get a reverse iterator over the order statistics. + inline reverse_iterator rbegin() { return m_Statistics.rbegin(); } + //! Get a reverse iterator over the order statistics. + inline const_reverse_iterator rbegin() const { return m_Statistics.rbegin(); } + + //! Get an iterator representing the end of the statistics. + inline iterator end() { return m_Statistics.end(); } + //! Get an iterator representing the end of the statistics. + inline const_iterator end() const { return m_Statistics.end(); } + + //! Get an iterator representing the end of the statistics. + inline reverse_iterator rend() { return m_Statistics.rbegin() + m_UnusedCount; } + //! Get an iterator representing the end of the statistics. + inline const_reverse_iterator rend() const { return m_Statistics.rbegin() + m_UnusedCount; } + //@} - //! Remove all statistics. - void clear() - { - std::fill(m_Statistics.begin() + m_UnusedCount, m_Statistics.end(), T{}); - m_UnusedCount = m_Statistics.size(); - } + //! Remove all statistics. + void clear() { + std::fill(m_Statistics.begin() + m_UnusedCount, m_Statistics.end(), T{}); + m_UnusedCount = m_Statistics.size(); + } - //! Get a checksum of this object. - uint64_t checksum(uint64_t seed) const; + //! Get a checksum of this object. + uint64_t checksum(uint64_t seed) const; - //! Print for debug. - std::string print() const - { - return core::CContainerPrinter::print(this->begin(), this->end()); - } + //! Print for debug. + std::string print() const { return core::CContainerPrinter::print(this->begin(), this->end()); } - protected: - //! Get the statistics. - CONTAINER &statistics() { return m_Statistics; } + protected: + //! Get the statistics. + CONTAINER& statistics() { return m_Statistics; } - private: - LESS m_Less; - CONTAINER m_Statistics; - //! How many elements of the container are unused? - std::size_t m_UnusedCount; - }; + private: + LESS m_Less; + CONTAINER m_Statistics; + //! How many elements of the container are unused? + std::size_t m_UnusedCount; + }; + +public: + //! \brief A stack based accumulator class for order statistics. + //! + //! DESCRIPTION:\n + //! This function object accumulates the first n order statistics + //! for a partially ordered collection when n is relatively small + //! and known at compile time. + //! + //! The ordering function is supplied by the user and so this can + //! also be used to calculate the maximum statistics of a collection. + //! For example:\n + //! \code{cpp} + //! COrderStatistics> + //! \endcode + //! + //! would find the largest two values of a collection. + //! + //! IMPLEMENTATION DECISIONS:\n + //! This is templatized to support any type for which a partial ordering + //! can be defined. To this end the ordering is a template parameter + //! which is also supplied to the constructor in the case it doesn't + //! have default constructor. + //! + //! This object has been implemented to give near optimal performance + //! both in terms of space and complexity when the number of order + //! statistics being computed is small. As such, the number of statistics + //! to compute is supplied as a template parameter so that exactly enough + //! memory is allocated to store the results and so they are stored on + //! the stack by default. + //! + //! \tparam T The numeric type for which the order statistic is being + //! computed. + //! \tparam N The number of order statistics being computed. + //! \tparam LESS The comparison function object type used to test + //! if one object of type T is less than another. + template> + class COrderStatisticsStack : public COrderStatisticsImpl, LESS>, + private boost::addable> { + private: + using TArray = boost::array; + using TImpl = COrderStatisticsImpl; public: - //! \brief A stack based accumulator class for order statistics. - //! - //! DESCRIPTION:\n - //! This function object accumulates the first n order statistics - //! for a partially ordered collection when n is relatively small - //! and known at compile time. - //! - //! The ordering function is supplied by the user and so this can - //! also be used to calculate the maximum statistics of a collection. - //! For example:\n - //! \code{cpp} - //! COrderStatistics> - //! \endcode - //! - //! would find the largest two values of a collection. - //! - //! IMPLEMENTATION DECISIONS:\n - //! This is templatized to support any type for which a partial ordering - //! can be defined. To this end the ordering is a template parameter - //! which is also supplied to the constructor in the case it doesn't - //! have default constructor. - //! - //! This object has been implemented to give near optimal performance - //! both in terms of space and complexity when the number of order - //! statistics being computed is small. As such, the number of statistics - //! to compute is supplied as a template parameter so that exactly enough - //! memory is allocated to store the results and so they are stored on - //! the stack by default. - //! - //! \tparam T The numeric type for which the order statistic is being - //! computed. - //! \tparam N The number of order statistics being computed. - //! \tparam LESS The comparison function object type used to test - //! if one object of type T is less than another. - template> - class COrderStatisticsStack : public COrderStatisticsImpl, LESS>, - private boost::addable> - { - private: - using TArray = boost::array; - using TImpl = COrderStatisticsImpl; - - public: - // Forward typedefs - using iterator = typename TImpl::iterator; - using const_iterator = typename TImpl::const_iterator; - - //! See core::CMemory. - static bool dynamicSizeAlwaysZero() - { - return core::memory_detail::SDynamicSizeAlwaysZero::value(); - } + // Forward typedefs + using iterator = typename TImpl::iterator; + using const_iterator = typename TImpl::const_iterator; - public: - explicit COrderStatisticsStack(const LESS &less = LESS{}) : - TImpl{TArray(), less} - { - this->statistics().assign(T{}); - } + //! See core::CMemory. + static bool dynamicSizeAlwaysZero() { return core::memory_detail::SDynamicSizeAlwaysZero::value(); } - explicit COrderStatisticsStack(std::size_t /*n*/, const LESS &less = LESS{}) : - TImpl{TArray(), less} - { - this->statistics().assign(T{}); - } + public: + explicit COrderStatisticsStack(const LESS& less = LESS{}) : TImpl{TArray(), less} { this->statistics().assign(T{}); } - //! Combine two statistics. This is equivalent to running a - //! single accumulator on the entire collection. - const COrderStatisticsStack &operator+=(const COrderStatisticsStack &rhs) - { - for (const auto &xi : rhs) - { - this->add(xi); - } - return *this; - } + explicit COrderStatisticsStack(std::size_t /*n*/, const LESS& less = LESS{}) : TImpl{TArray(), less} { + this->statistics().assign(T{}); + } - //! Create a member function so this class works with CChecksum. - uint64_t checksum(uint64_t seed = 0) const - { - return this->TImpl::checksum(seed); - } - }; + //! Combine two statistics. This is equivalent to running a + //! single accumulator on the entire collection. + const COrderStatisticsStack& operator+=(const COrderStatisticsStack& rhs) { + for (const auto& xi : rhs) { + this->add(xi); + } + return *this; + } - //! \brief A heap based accumulator class for order statistics. - //! - //! DESCRIPTION:\n - //! This function object accumulates the first n order statistics - //! for a partially ordered collection when n is relatively large - //! or not known at compile time. - //! - //! The ordering function is supplied by the user and so this can be - //! also used to calculate the maximum statistics of a collection. - //! For example:\n - //! \code{cpp} - //! COrderStatistics> - //! \endcode - //! - //! would find the largest values of a collection. - //! - //! IMPLEMENTATION DECISIONS:\n - //! This is templatized to support any type for which a partial ordering - //! can be defined. To this end the ordering is a template parameter - //! which is also supplied to the constructor in the case it doesn't - //! have default constructor. - //! - //! The statistics are stored on the heap in an up front allocated vector. - //! Since the number of statistics must be fixed for the life time of - //! accumulation it makes sense to allocate the memory once up front. - //! This also initializes the memory since it shares its implementation - //! with the array based accumulator. - //! - //! \tparam T The numeric type for which the order statistic is being - //! computed. - //! \tparam LESS The comparison function object type used to test - //! if one object of type T is less than another. - template> - class COrderStatisticsHeap : public COrderStatisticsImpl, LESS>, - private boost::addable> - { - private: - using TImpl = COrderStatisticsImpl, LESS>; - - public: - // Forward typedefs - using iterator = typename TImpl::iterator; - using const_iterator = typename TImpl::const_iterator; - - public: - explicit COrderStatisticsHeap(std::size_t n, const LESS &less = LESS{}) : - TImpl{std::vector(n, T{}), less} - {} - - //! Reset the number of statistics to gather to \p n. - void resize(std::size_t n) - { - this->clear(); - this->statistics().resize(n); - } + //! Create a member function so this class works with CChecksum. + uint64_t checksum(uint64_t seed = 0) const { return this->TImpl::checksum(seed); } + }; + + //! \brief A heap based accumulator class for order statistics. + //! + //! DESCRIPTION:\n + //! This function object accumulates the first n order statistics + //! for a partially ordered collection when n is relatively large + //! or not known at compile time. + //! + //! The ordering function is supplied by the user and so this can be + //! also used to calculate the maximum statistics of a collection. + //! For example:\n + //! \code{cpp} + //! COrderStatistics> + //! \endcode + //! + //! would find the largest values of a collection. + //! + //! IMPLEMENTATION DECISIONS:\n + //! This is templatized to support any type for which a partial ordering + //! can be defined. To this end the ordering is a template parameter + //! which is also supplied to the constructor in the case it doesn't + //! have default constructor. + //! + //! The statistics are stored on the heap in an up front allocated vector. + //! Since the number of statistics must be fixed for the life time of + //! accumulation it makes sense to allocate the memory once up front. + //! This also initializes the memory since it shares its implementation + //! with the array based accumulator. + //! + //! \tparam T The numeric type for which the order statistic is being + //! computed. + //! \tparam LESS The comparison function object type used to test + //! if one object of type T is less than another. + template> + class COrderStatisticsHeap : public COrderStatisticsImpl, LESS>, + private boost::addable> { + private: + using TImpl = COrderStatisticsImpl, LESS>; - //! Combine two statistics. This is equivalent to running a - //! single accumulator on the entire collection. - const COrderStatisticsHeap &operator+=(const COrderStatisticsHeap &rhs) - { - for (const auto &xi : rhs) - { - this->add(xi); - } - return *this; - } + public: + // Forward typedefs + using iterator = typename TImpl::iterator; + using const_iterator = typename TImpl::const_iterator; - //! Create a member function so this class works with CChecksum. - uint64_t checksum(uint64_t seed = 0) const - { - return this->TImpl::checksum(seed); - } - }; + public: + explicit COrderStatisticsHeap(std::size_t n, const LESS& less = LESS{}) : TImpl{std::vector(n, T{}), less} {} - //! \name Accumulator Typedefs - //@{ - //! Accumulator object to compute the sample maximum. - template - struct SMax - { - using TAccumulator = COrderStatisticsStack>; - }; - - //! Accumulator object to compute the sample minimum. - template - struct SMin - { - using TAccumulator = COrderStatisticsStack; - }; - //@} + //! Reset the number of statistics to gather to \p n. + void resize(std::size_t n) { + this->clear(); + this->statistics().resize(n); + } - //! \brief An accumulator of the minimum and maximum value in a collection. - //! - //! IMPLEMENTATION DECISIONS:\n - //! This is templatized to support any type for which a partial ordering - //! can be defined. To this end the orderings are template parameters - //! which are also supplied to the constructor in the case they don't - //! have default constructors. - template, typename GREATER = std::greater> - class CMinMax : boost::addable> - { - public: - //! See core::CMemory. - static bool dynamicSizeAlwaysZero() - { - return core::memory_detail::SDynamicSizeAlwaysZero::value(); - } + //! Combine two statistics. This is equivalent to running a + //! single accumulator on the entire collection. + const COrderStatisticsHeap& operator+=(const COrderStatisticsHeap& rhs) { + for (const auto& xi : rhs) { + this->add(xi); + } + return *this; + } - public: - explicit CMinMax(const LESS &less = LESS{}, const GREATER &greater = GREATER{}) : - m_Min{less}, m_Max{greater} - {} + //! Create a member function so this class works with CChecksum. + uint64_t checksum(uint64_t seed = 0) const { return this->TImpl::checksum(seed); } + }; - //! Define a function operator for use with std:: algorithms. - inline bool operator()(const T &x) - { - return this->add(x); - } + //! \name Accumulator Typedefs + //@{ + //! Accumulator object to compute the sample maximum. + template + struct SMax { + using TAccumulator = COrderStatisticsStack>; + }; - //! Check if we would add \p x. - bool wouldAdd(const T &x) const - { - return m_Min.wouldAdd(x) || m_Max.wouldAdd(x); - } + //! Accumulator object to compute the sample minimum. + template + struct SMin { + using TAccumulator = COrderStatisticsStack; + }; + //@} + + //! \brief An accumulator of the minimum and maximum value in a collection. + //! + //! IMPLEMENTATION DECISIONS:\n + //! This is templatized to support any type for which a partial ordering + //! can be defined. To this end the orderings are template parameters + //! which are also supplied to the constructor in the case they don't + //! have default constructors. + template, typename GREATER = std::greater> + class CMinMax : boost::addable> { + public: + //! See core::CMemory. + static bool dynamicSizeAlwaysZero() { return core::memory_detail::SDynamicSizeAlwaysZero::value(); } - //! Update the statistic with the collection \p x. - bool add(const std::vector &x) - { - bool result{false}; - for (const auto &xi : x) - { - result |= this->add(xi); - } - return result; - } + public: + explicit CMinMax(const LESS& less = LESS{}, const GREATER& greater = GREATER{}) : m_Min{less}, m_Max{greater} {} - //! Update the statistic with \p n copies of \p x. - bool add(const T &x, std::size_t n = 1) - { - bool result{false}; - if (n > 0) - { - result |= m_Min.add(x); - result |= m_Max.add(x); - } - return result; - } + //! Define a function operator for use with std:: algorithms. + inline bool operator()(const T& x) { return this->add(x); } - //! Combine two statistics. This is equivalent to running a - //! single accumulator on the entire collection. - const CMinMax &operator+=(const CMinMax &rhs) - { - m_Min += rhs.m_Min; - m_Max += rhs.m_Max; - return *this; - } + //! Check if we would add \p x. + bool wouldAdd(const T& x) const { return m_Min.wouldAdd(x) || m_Max.wouldAdd(x); } - //! Get the count of statistics. - bool initialized() const - { - return m_Min.count() > 0; - } + //! Update the statistic with the collection \p x. + bool add(const std::vector& x) { + bool result{false}; + for (const auto& xi : x) { + result |= this->add(xi); + } + return result; + } - //! Get the minimum value. - T min() const - { - return m_Min[0]; - } + //! Update the statistic with \p n copies of \p x. + bool add(const T& x, std::size_t n = 1) { + bool result{false}; + if (n > 0) { + result |= m_Min.add(x); + result |= m_Max.add(x); + } + return result; + } - //! Get the maximum value. - T max() const - { - return m_Max[0]; - } + //! Combine two statistics. This is equivalent to running a + //! single accumulator on the entire collection. + const CMinMax& operator+=(const CMinMax& rhs) { + m_Min += rhs.m_Min; + m_Max += rhs.m_Max; + return *this; + } - //! Get the range. - T range() const - { - return m_Max[0] - m_Min[0]; - } + //! Get the count of statistics. + bool initialized() const { return m_Min.count() > 0; } - //! Get the margin by which all the values have the same sign. - T signMargin() const - { - if (this->initialized()) - { - if (m_Min[0] * m_Max[0] > T{0}) - { - return m_Min[0] > T{0} ? m_Min[0] : m_Max[0]; - } - } - return T{0}; - } + //! Get the minimum value. + T min() const { return m_Min[0]; } + + //! Get the maximum value. + T max() const { return m_Max[0]; } + + //! Get the range. + T range() const { return m_Max[0] - m_Min[0]; } - //! Get a checksum for this object. - uint64_t checksum() const - { - return core::CHashing::hashCombine(m_Min.checksum(), m_Max.checksum()); + //! Get the margin by which all the values have the same sign. + T signMargin() const { + if (this->initialized()) { + if (m_Min[0] * m_Max[0] > T{0}) { + return m_Min[0] > T{0} ? m_Min[0] : m_Max[0]; } + } + return T{0}; + } - private: - //! The set minimum. - COrderStatisticsStack m_Min; - //! The set maximum. - COrderStatisticsStack m_Max; - }; + //! Get a checksum for this object. + uint64_t checksum() const { return core::CHashing::hashCombine(m_Min.checksum(), m_Max.checksum()); } + + private: + //! The set minimum. + COrderStatisticsStack m_Min; + //! The set maximum. + COrderStatisticsStack m_Max; + }; // Friends template - friend std::ostream &operator<<(std::ostream &o, - const CBasicStatistics::SSampleCentralMoments &); + friend std::ostream& operator<<(std::ostream& o, const CBasicStatistics::SSampleCentralMoments&); template - friend std::ostream &operator<<(std::ostream &o, - const CBasicStatistics::SSampleCentralMoments &); + friend std::ostream& operator<<(std::ostream& o, const CBasicStatistics::SSampleCentralMoments&); template - friend std::ostream &operator<<(std::ostream &o, - const CBasicStatistics::SSampleCentralMoments &); + friend std::ostream& operator<<(std::ostream& o, const CBasicStatistics::SSampleCentralMoments&); }; template -std::ostream &operator<<(std::ostream &o, - const CBasicStatistics::SSampleCentralMoments &accumulator) -{ +std::ostream& operator<<(std::ostream& o, const CBasicStatistics::SSampleCentralMoments& accumulator) { return o << CBasicStatistics::print(accumulator); } template -std::ostream &operator<<(std::ostream &o, - const CBasicStatistics::SSampleCentralMoments &accumulator) -{ +std::ostream& operator<<(std::ostream& o, const CBasicStatistics::SSampleCentralMoments& accumulator) { return o << CBasicStatistics::print(accumulator); } template -std::ostream &operator<<(std::ostream &o, - const CBasicStatistics::SSampleCentralMoments &accumulator) -{ +std::ostream& operator<<(std::ostream& o, const CBasicStatistics::SSampleCentralMoments& accumulator) { return o << CBasicStatistics::print(accumulator); } template -std::ostream &operator<<(std::ostream &o, - const CBasicStatistics::COrderStatisticsStack &accumulator) -{ +std::ostream& operator<<(std::ostream& o, const CBasicStatistics::COrderStatisticsStack& accumulator) { return o << accumulator.print(); } template -std::ostream &operator<<(std::ostream &o, - const CBasicStatistics::COrderStatisticsHeap &accumulator) -{ +std::ostream& operator<<(std::ostream& o, const CBasicStatistics::COrderStatisticsHeap& accumulator) { return o << accumulator.print(); } -namespace basic_statistics_detail -{ +namespace basic_statistics_detail { //! \brief Default custom add function for values to the central //! moments estimator. template -struct SCentralMomentsCustomAdd -{ +struct SCentralMomentsCustomAdd { template - static inline void add(const U &x, - typename SCoordinate::Type n, - CBasicStatistics::SSampleCentralMoments &moments) - { + static inline void add(const U& x, typename SCoordinate::Type n, CBasicStatistics::SSampleCentralMoments& moments) { moments.add(static_cast(x), n, 0); } }; @@ -1737,12 +1438,8 @@ struct SCentralMomentsCustomAdd //! \brief Implementation of add stack vector to the covariances //! estimator. template -struct SCovariancesCustomAdd> -{ - static inline void add(const CVectorNx1 &x, - const CVectorNx1 &n, - CBasicStatistics::SSampleCovariances &covariances) - { +struct SCovariancesCustomAdd> { + static inline void add(const CVectorNx1& x, const CVectorNx1& n, CBasicStatistics::SSampleCovariances& covariances) { covariances.add(x, n, 0); } }; @@ -1757,49 +1454,42 @@ struct SCovariancesCustomAdd> //! See http://perso.ens-lyon.fr/patrick.flandrin/LedoitWolf_JMA2004.pdf //! for the details. template -struct SCovariancesLedoitWolf> -{ +struct SCovariancesLedoitWolf> { template - static void estimate(const std::vector> &points, - CBasicStatistics::SSampleCovariances &covariances) - { + static void estimate(const std::vector>& points, CBasicStatistics::SSampleCovariances& covariances) { U d{static_cast(N)}; U n{CBasicStatistics::count(covariances)}; - const CVectorNx1 &m{CBasicStatistics::mean(covariances)}; - const CSymmetricMatrixNxN &s{CBasicStatistics::maximumLikelihoodCovariances(covariances)}; + const CVectorNx1& m{CBasicStatistics::mean(covariances)}; + const CSymmetricMatrixNxN& s{CBasicStatistics::maximumLikelihoodCovariances(covariances)}; U mn{s.trace() / d}; U dn{pow2((s - CVectorNx1{mn}.diagonal()).frobenius()) / d}; U bn{0}; U z{n * n}; - for (std::size_t i = 0u; i < points.size(); ++i) - { + for (std::size_t i = 0u; i < points.size(); ++i) { CVectorNx1 ci{points[i]}; bn += pow2(((ci - m).outer() - s).frobenius()) / d / z; } bn = std::min(bn, dn); LOG_TRACE("m = " << mn << ", d = " << dn << ", b = " << bn); - covariances.s_Covariances = CVectorNx1{bn / dn * mn}.diagonal() - + (U{1} - bn / dn) * covariances.s_Covariances; + covariances.s_Covariances = CVectorNx1{bn / dn * mn}.diagonal() + (U{1} - bn / dn) * covariances.s_Covariances; } - template static U pow2(U x) { return x * x; } + template + static U pow2(U x) { + return x * x; + } }; - } template template -void CBasicStatistics::SSampleCentralMoments::add(const U &x, const TCoordinate &n) -{ +void CBasicStatistics::SSampleCentralMoments::add(const U& x, const TCoordinate& n) { basic_statistics_detail::SCentralMomentsCustomAdd::add(x, n, *this); } - - } } #endif // INCLUDED_ml_maths_CBasicStatistics_h - diff --git a/include/maths/CBasicStatisticsPersist.h b/include/maths/CBasicStatisticsPersist.h index 12a82b6466..db4740e72a 100644 --- a/include/maths/CBasicStatisticsPersist.h +++ b/include/maths/CBasicStatisticsPersist.h @@ -17,29 +17,22 @@ #include -namespace ml -{ -namespace maths -{ -namespace basic_statistics_detail -{ +namespace ml { +namespace maths { +namespace basic_statistics_detail { //! Function to do conversion from string. template -bool stringToType(const std::string &str, T &value) -{ +bool stringToType(const std::string& str, T& value) { return core::CStringUtils::stringToType(str, value); } //! Function to do conversion from string to float storage. -inline bool stringToType(const std::string &str, CFloatStorage &value) -{ +inline bool stringToType(const std::string& str, CFloatStorage& value) { return value.fromString(str); } //! Function to do conversion from string to double storage. -inline bool stringToType(const std::string &str, CDoublePrecisionStorage &value) -{ +inline bool stringToType(const std::string& str, CDoublePrecisionStorage& value) { double d; - if (core::CStringUtils::stringToType(str, d) == false) - { + if (core::CStringUtils::stringToType(str, d) == false) { return false; } value = d; @@ -47,52 +40,43 @@ inline bool stringToType(const std::string &str, CDoublePrecisionStorage &value) } //! Function to do conversion from string to a vector. template -bool stringToType(const std::string &str, CVectorNx1 &value) -{ +bool stringToType(const std::string& str, CVectorNx1& value) { return value.fromDelimited(str); } //! Function to do conversion from string to a symmetric matrix. template -bool stringToType(const std::string &str, CSymmetricMatrixNxN &value) -{ +bool stringToType(const std::string& str, CSymmetricMatrixNxN& value) { return value.fromDelimited(str); } //! Function to do conversion to a string. template -inline std::string typeToString(const T &value) -{ +inline std::string typeToString(const T& value) { return core::CStringUtils::typeToStringPrecise(value, core::CIEEE754::E_SinglePrecision); } //! Function to do conversion to a string from float storage. -inline std::string typeToString(const CFloatStorage &value) -{ +inline std::string typeToString(const CFloatStorage& value) { return value.toString(); } //! Function to do conversion to a string from double storage. -inline std::string typeToString(const CDoublePrecisionStorage &value) -{ +inline std::string typeToString(const CDoublePrecisionStorage& value) { return core::CStringUtils::typeToStringPrecise(double(value), core::CIEEE754::E_DoublePrecision); } //! Function to do conversion to a string from a vector. template -inline std::string typeToString(const CVectorNx1 &value) -{ +inline std::string typeToString(const CVectorNx1& value) { return value.toDelimited(); } //! Function to do conversion to a string from a symmetric matrix. template -inline std::string typeToString(const CSymmetricMatrixNxN &value) -{ +inline std::string typeToString(const CSymmetricMatrixNxN& value) { return value.toDelimited(); } } template -bool CBasicStatistics::SSampleCentralMoments::fromDelimited(const std::string &str) -{ - if (str.empty()) - { +bool CBasicStatistics::SSampleCentralMoments::fromDelimited(const std::string& str) { + if (str.empty()) { LOG_ERROR("Empty accumulator representation"); return false; } @@ -107,39 +91,29 @@ bool CBasicStatistics::SSampleCentralMoments::fromDelimited(const std: std::string token; token.reserve(15); std::size_t delimPos{str.find(INTERNAL_DELIMITER, 0)}; - if (delimPos == std::string::npos) - { + if (delimPos == std::string::npos) { token.assign(str, 0, str.length()); - } - else - { + } else { token.assign(str, 0, delimPos); } - if (!basic_statistics_detail::stringToType(token, s_Count)) - { + if (!basic_statistics_detail::stringToType(token, s_Count)) { LOG_ERROR("Invalid count : element " << token << " in " << str); return false; } std::size_t lastDelimPos{delimPos}; std::size_t index{0}; - while (lastDelimPos != std::string::npos) - { + while (lastDelimPos != std::string::npos) { delimPos = str.find(INTERNAL_DELIMITER, lastDelimPos + 1); - if (delimPos == std::string::npos) - { + if (delimPos == std::string::npos) { token.assign(str, lastDelimPos + 1, str.length() - lastDelimPos); - } - else - { + } else { token.assign(str, lastDelimPos + 1, delimPos - lastDelimPos - 1); } - if (!basic_statistics_detail::stringToType(token, s_Moments[index++])) - { - LOG_ERROR("Invalid moment " << index - << " : element " << token << " in " << str); + if (!basic_statistics_detail::stringToType(token, s_Moments[index++])) { + LOG_ERROR("Invalid moment " << index << " : element " << token << " in " << str); return false; } @@ -150,11 +124,9 @@ bool CBasicStatistics::SSampleCentralMoments::fromDelimited(const std: } template -std::string CBasicStatistics::SSampleCentralMoments::toDelimited() const -{ +std::string CBasicStatistics::SSampleCentralMoments::toDelimited() const { std::string result(basic_statistics_detail::typeToString(s_Count)); - for (std::size_t index = 0; index < ORDER; ++index) - { + for (std::size_t index = 0; index < ORDER; ++index) { result += INTERNAL_DELIMITER; result += basic_statistics_detail::typeToString(s_Moments[index]); } @@ -163,12 +135,10 @@ std::string CBasicStatistics::SSampleCentralMoments::toDelimited() con } template -uint64_t CBasicStatistics::SSampleCentralMoments::checksum() const -{ +uint64_t CBasicStatistics::SSampleCentralMoments::checksum() const { std::ostringstream raw; raw << basic_statistics_detail::typeToString(s_Count); - for (std::size_t i = 0u; i < ORDER; ++i) - { + for (std::size_t i = 0u; i < ORDER; ++i) { raw << ' '; raw << basic_statistics_detail::typeToString(s_Moments[i]); } @@ -176,36 +146,29 @@ uint64_t CBasicStatistics::SSampleCentralMoments::checksum() const return hasher(raw.str()); } - template -bool CBasicStatistics::SSampleCovariances::fromDelimited(std::string str) -{ +bool CBasicStatistics::SSampleCovariances::fromDelimited(std::string str) { std::size_t count{0u}; - for (std::size_t i = 0u; i < N; ++i) - { + for (std::size_t i = 0u; i < N; ++i) { count = str.find_first_of(CLinearAlgebra::DELIMITER, count + 1); } - if (!s_Count.fromDelimited(str.substr(0, count))) - { + if (!s_Count.fromDelimited(str.substr(0, count))) { LOG_ERROR("Failed to extract counts from " << str.substr(0, count)); return false; } str = str.substr(count + 1); std::size_t means{0u}; - for (std::size_t i = 0u; i < N; ++i) - { + for (std::size_t i = 0u; i < N; ++i) { means = str.find_first_of(CLinearAlgebra::DELIMITER, means + 1); } - if (!s_Mean.fromDelimited(str.substr(0, means))) - { + if (!s_Mean.fromDelimited(str.substr(0, means))) { LOG_ERROR("Failed to extract means from " << str.substr(0, means)); return false; } str = str.substr(means + 1); - if (!s_Covariances.fromDelimited(str)) - { + if (!s_Covariances.fromDelimited(str)) { LOG_ERROR("Failed to extract covariances from " << str); return false; } @@ -214,18 +177,13 @@ bool CBasicStatistics::SSampleCovariances::fromDelimited(std::string str) } template -std::string CBasicStatistics::SSampleCovariances::toDelimited() const -{ - return s_Count.toDelimited() - + CLinearAlgebra::DELIMITER - + s_Mean.toDelimited() - + CLinearAlgebra::DELIMITER - + s_Covariances.toDelimited(); +std::string CBasicStatistics::SSampleCovariances::toDelimited() const { + return s_Count.toDelimited() + CLinearAlgebra::DELIMITER + s_Mean.toDelimited() + CLinearAlgebra::DELIMITER + + s_Covariances.toDelimited(); } template -uint64_t CBasicStatistics::SSampleCovariances::checksum() const -{ +uint64_t CBasicStatistics::SSampleCovariances::checksum() const { std::ostringstream raw; raw << basic_statistics_detail::typeToString(s_Count); raw << ' '; @@ -236,24 +194,19 @@ uint64_t CBasicStatistics::SSampleCovariances::checksum() const return hasher(raw.str()); } - template -bool CBasicStatistics::COrderStatisticsImpl::fromDelimited(const std::string &value) -{ +bool CBasicStatistics::COrderStatisticsImpl::fromDelimited(const std::string& value) { this->clear(); - if (value.empty()) - { + if (value.empty()) { return true; } T statistic; std::size_t delimPos{value.find(INTERNAL_DELIMITER)}; - if (delimPos == std::string::npos) - { - if (basic_statistics_detail::stringToType(value, statistic) == false) - { + if (delimPos == std::string::npos) { + if (basic_statistics_detail::stringToType(value, statistic) == false) { LOG_ERROR("Invalid statistic in '" << value << "'"); return false; } @@ -266,22 +219,16 @@ bool CBasicStatistics::COrderStatisticsImpl::fromDelimited(c std::string statistic_; statistic_.reserve(15); statistic_.assign(value, 0, delimPos); - if (basic_statistics_detail::stringToType(statistic_, statistic) == false) - { + if (basic_statistics_detail::stringToType(statistic_, statistic) == false) { LOG_ERROR("Invalid statistic '" << statistic_ << "' in '" << value << "'"); return false; } m_Statistics[--m_UnusedCount] = statistic; - while (delimPos != value.size()) - { - std::size_t nextDelimPos{std::min(value.find(INTERNAL_DELIMITER, delimPos + 1), - value.size())}; - statistic_.assign(value, - delimPos + 1, - nextDelimPos - delimPos - 1); - if (basic_statistics_detail::stringToType(statistic_, statistic) == false) - { + while (delimPos != value.size()) { + std::size_t nextDelimPos{std::min(value.find(INTERNAL_DELIMITER, delimPos + 1), value.size())}; + statistic_.assign(value, delimPos + 1, nextDelimPos - delimPos - 1); + if (basic_statistics_detail::stringToType(statistic_, statistic) == false) { LOG_ERROR("Invalid statistic '" << statistic_ << "' in '" << value << "'"); return false; } @@ -293,16 +240,13 @@ bool CBasicStatistics::COrderStatisticsImpl::fromDelimited(c } template -std::string CBasicStatistics::COrderStatisticsImpl::toDelimited() const -{ - if (this->count() == 0) - { +std::string CBasicStatistics::COrderStatisticsImpl::toDelimited() const { + if (this->count() == 0) { return std::string{}; } std::size_t i{m_Statistics.size()}; std::string result{basic_statistics_detail::typeToString(m_Statistics[i - 1])}; - for (--i; i > m_UnusedCount; --i) - { + for (--i; i > m_UnusedCount; --i) { result += INTERNAL_DELIMITER; result += basic_statistics_detail::typeToString(m_Statistics[i - 1]); } @@ -310,25 +254,21 @@ std::string CBasicStatistics::COrderStatisticsImpl::toDelimi } template -uint64_t CBasicStatistics::COrderStatisticsImpl::checksum(uint64_t seed) const -{ - if (this->count() == 0) - { +uint64_t CBasicStatistics::COrderStatisticsImpl::checksum(uint64_t seed) const { + if (this->count() == 0) { return seed; } std::vector sorted(this->begin(), this->end()); std::sort(sorted.begin(), sorted.end(), m_Less); std::ostringstream raw; raw << basic_statistics_detail::typeToString(sorted[0]); - for (std::size_t i = 1u; i < sorted.size(); ++i) - { + for (std::size_t i = 1u; i < sorted.size(); ++i) { raw << ' '; raw << basic_statistics_detail::typeToString(sorted[i]); } core::CHashing::CSafeMurmurHash2String64 hasher(seed); return hasher(raw.str()); } - } } diff --git a/include/maths/CBjkstUniqueValues.h b/include/maths/CBjkstUniqueValues.h index c5c4b176eb..d2afc54c48 100644 --- a/include/maths/CBjkstUniqueValues.h +++ b/include/maths/CBjkstUniqueValues.h @@ -22,11 +22,8 @@ #include - -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { //! \brief The BJSKT algorithm for estimating the number of unique values //! in a collection. @@ -71,36 +68,74 @@ namespace maths //! Note that the hash map lookup constants are good but the complexity //! is bad \f$O(m)\f$ so the \p maxSize parameter supplied to the //! constructor should be less than a few hundred. -class MATHS_EXPORT CBjkstUniqueValues -{ - public: - using TUInt32HashVec = core::CHashing::CUniversalHash::TUInt32UnrestrictedHashVec; +class MATHS_EXPORT CBjkstUniqueValues { +public: + using TUInt32HashVec = core::CHashing::CUniversalHash::TUInt32UnrestrictedHashVec; - public: - //! Get the count of trailing zeros in value. - static uint8_t trailingZeros(uint32_t value); +public: + //! Get the count of trailing zeros in value. + static uint8_t trailingZeros(uint32_t value); - public: - //! \param numberHashes The number of independent hashes. - //! \param maxSize The maximum size of the hash sets. - CBjkstUniqueValues(std::size_t numberHashes, std::size_t maxSize); +public: + //! \param numberHashes The number of independent hashes. + //! \param maxSize The maximum size of the hash sets. + CBjkstUniqueValues(std::size_t numberHashes, std::size_t maxSize); - //! Create by traversing a state document. - CBjkstUniqueValues(core::CStateRestoreTraverser &traverser); + //! Create by traversing a state document. + CBjkstUniqueValues(core::CStateRestoreTraverser& traverser); + + //! Efficiently swap the contents of two sketches. + void swap(CBjkstUniqueValues& other); + +private: + //! Create by traversing a state document. + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); + +public: + //! Convert to a node tree. + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Add a new value. + void add(uint32_t value); + + //! Remove a value. + void remove(uint32_t value); + + //! Get an estimate of the number of unique values added. + uint32_t number() const; + + //! Get a checksum for the sketch. + uint64_t checksum(uint64_t seed = 0) const; + + //! Get the memory used by this sketch. + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + + //! Get the memory used by this sketch. + std::size_t memoryUsage() const; + +private: + using TUInt8Vec = std::vector; + using TUInt8VecVec = std::vector; + using TUInt32Vec = std::vector; + using TUInt32VecItr = TUInt32Vec::iterator; + using TUInt32VecCItr = TUInt32Vec::const_iterator; + + //! Wraps up the sketch data. + struct MATHS_EXPORT SSketch { + SSketch(); + SSketch(std::size_t numberHashes); //! Efficiently swap the contents of two sketches. - void swap(CBjkstUniqueValues &other); + void swap(SSketch& other); - private: //! Create by traversing a state document. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser, std::size_t numberHashes); - public: //! Convert to a node tree. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; //! Add a new value. - void add(uint32_t value); + void add(std::size_t maxSize, uint32_t value); //! Remove a value. void remove(uint32_t value); @@ -108,72 +143,30 @@ class MATHS_EXPORT CBjkstUniqueValues //! Get an estimate of the number of unique values added. uint32_t number() const; - //! Get a checksum for the sketch. - uint64_t checksum(uint64_t seed = 0) const; - - //! Get the memory used by this sketch. - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - - //! Get the memory used by this sketch. - std::size_t memoryUsage() const; - - private: - using TUInt8Vec = std::vector; - using TUInt8VecVec = std::vector; - using TUInt32Vec = std::vector; - using TUInt32VecItr = TUInt32Vec::iterator; - using TUInt32VecCItr = TUInt32Vec::const_iterator; - - //! Wraps up the sketch data. - struct MATHS_EXPORT SSketch - { - SSketch(); - SSketch(std::size_t numberHashes); - - //! Efficiently swap the contents of two sketches. - void swap(SSketch &other); - - //! Create by traversing a state document. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser, - std::size_t numberHashes); - - //! Convert to a node tree. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - - //! Add a new value. - void add(std::size_t maxSize, uint32_t value); - - //! Remove a value. - void remove(uint32_t value); - - //! Get an estimate of the number of unique values added. - uint32_t number() const; - - //! The secondary hash function. - TUInt32HashVec s_G; - //! The main hash functions. - TUInt32HashVec s_H; - //! The trailing zero counts. - TUInt8Vec s_Z; - //! The unique hashed values. - TUInt8VecVec s_B; - }; - - using TUInt32VecOrSketch = boost::variant; - - private: - //! Maybe switch to sketching the distinct value set. - void sketch(); - - private: - //! The maximum size of the sketch set before compression. - std::size_t m_MaxSize; - //! The number of distinct hashes to use in the sketch. - std::size_t m_NumberHashes; - //! The distinct count sketch. - TUInt32VecOrSketch m_Sketch; + //! The secondary hash function. + TUInt32HashVec s_G; + //! The main hash functions. + TUInt32HashVec s_H; + //! The trailing zero counts. + TUInt8Vec s_Z; + //! The unique hashed values. + TUInt8VecVec s_B; + }; + + using TUInt32VecOrSketch = boost::variant; + +private: + //! Maybe switch to sketching the distinct value set. + void sketch(); + +private: + //! The maximum size of the sketch set before compression. + std::size_t m_MaxSize; + //! The number of distinct hashes to use in the sketch. + std::size_t m_NumberHashes; + //! The distinct count sketch. + TUInt32VecOrSketch m_Sketch; }; - } } diff --git a/include/maths/CBootstrapClusterer.h b/include/maths/CBootstrapClusterer.h index 0862d2e3fa..0ccce9a12b 100644 --- a/include/maths/CBootstrapClusterer.h +++ b/include/maths/CBootstrapClusterer.h @@ -12,8 +12,8 @@ #include #include -#include #include +#include #include #include @@ -28,10 +28,8 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { //! \brief Bootstraps clustering to improve stability. //! @@ -66,1040 +64,838 @@ namespace maths //! Once we have associated clusters we assign points based //! on their majority vote (as with standard bagged clustering). template -class CBootstrapClusterer -{ - public: - using TSizeSizePr = std::pair; - using TSizeVec = std::vector; - using TSizeVecItr = TSizeVec::iterator; - using TSizeVecVec = std::vector; - using TSizeVecVecVec = std::vector; - using TPointVec = std::vector; - using TPointVecVec = std::vector; - using TGraph = boost::adjacency_list >; - using TVertex = typename boost::graph_traits::vertex_descriptor; - using TEdge = typename boost::graph_traits::edge_descriptor; - using TVertexItr = typename boost::graph_traits::vertex_iterator; - using TEdgeItr = typename boost::graph_traits::edge_iterator; - using TOutEdgeItr = typename boost::graph_traits::out_edge_iterator; - using TAdjacencyItr = typename boost::graph_traits::adjacency_iterator; - - public: - CBootstrapClusterer(double overlapThreshold, double chainingFactor) : - m_OverlapThreshold(overlapThreshold), - m_ChainingFactor(std::max(chainingFactor, 1.0)) - {} - - //! Run clustering on \p b bootstrap samples of \p points - //! and find persistent clusters of the data. - //! - //! \param[in] b The number of bootstrap clusterings. - //! \param[in] clusterer The clustering algorithm to use. - //! \param[in] points The points to cluster. - //! \param[out] result Filled in with the clustering. - //! - //! \tparam CLUSTERER Must provide a member function with - //! signature cluster(TPointVec &, TSizeVecVec) which performs - //! clustering. If necessary wrap up an existing clusterer - //! with CBootstrapClustererFacade. - template - void run(std::size_t b, - CLUSTERER clusterer, - TPointVec &points, - TPointVecVec &result) - { - std::sort(points.begin(), points.end()); - TSizeVecVecVec bootstrapClusters; - std::size_t n = this->bootstrapClusters(b, clusterer, points, bootstrapClusters); - TGraph graph(n); - this->buildClusterGraph(points, bootstrapClusters, graph); - this->buildClusters(points, bootstrapClusters, graph, result); +class CBootstrapClusterer { +public: + using TSizeSizePr = std::pair; + using TSizeVec = std::vector; + using TSizeVecItr = TSizeVec::iterator; + using TSizeVecVec = std::vector; + using TSizeVecVecVec = std::vector; + using TPointVec = std::vector; + using TPointVecVec = std::vector; + using TGraph = boost:: + adjacency_list>; + using TVertex = typename boost::graph_traits::vertex_descriptor; + using TEdge = typename boost::graph_traits::edge_descriptor; + using TVertexItr = typename boost::graph_traits::vertex_iterator; + using TEdgeItr = typename boost::graph_traits::edge_iterator; + using TOutEdgeItr = typename boost::graph_traits::out_edge_iterator; + using TAdjacencyItr = typename boost::graph_traits::adjacency_iterator; + +public: + CBootstrapClusterer(double overlapThreshold, double chainingFactor) + : m_OverlapThreshold(overlapThreshold), m_ChainingFactor(std::max(chainingFactor, 1.0)) {} + + //! Run clustering on \p b bootstrap samples of \p points + //! and find persistent clusters of the data. + //! + //! \param[in] b The number of bootstrap clusterings. + //! \param[in] clusterer The clustering algorithm to use. + //! \param[in] points The points to cluster. + //! \param[out] result Filled in with the clustering. + //! + //! \tparam CLUSTERER Must provide a member function with + //! signature cluster(TPointVec &, TSizeVecVec) which performs + //! clustering. If necessary wrap up an existing clusterer + //! with CBootstrapClustererFacade. + template + void run(std::size_t b, CLUSTERER clusterer, TPointVec& points, TPointVecVec& result) { + std::sort(points.begin(), points.end()); + TSizeVecVecVec bootstrapClusters; + std::size_t n = this->bootstrapClusters(b, clusterer, points, bootstrapClusters); + TGraph graph(n); + this->buildClusterGraph(points, bootstrapClusters, graph); + this->buildClusters(points, bootstrapClusters, graph, result); + } + +protected: + using TDoubleVec = std::vector; + using TBoolVec = std::vector; + using TSizeSizePrVec = std::vector; + using TDoubleSizePr = std::pair; + using TDoubleSizePrVec = std::vector; + + //! \brief Checks if a cluster is empty. + struct SIsEmpty { + bool operator()(const TPointVec& cluster) const { return cluster.empty(); } + }; + + //! Check if the second elements are equal. + struct SSecondEqual { + bool operator()(const TDoubleSizePr& lhs, const TDoubleSizePr& rhs) const { return lhs.second == rhs.second; } + }; + + //! \brief State used for the maximum adjacency minimum cost + //! cut search. + struct SCutState { + SCutState(std::size_t seed, const TGraph& graph) + : s_V(boost::num_vertices(graph)), s_ToVisit(1, seed), s_Adjacency(s_V, 0), s_Cut(0.0), s_A(0) { + this->initializeQueue(); } - protected: - using TDoubleVec = std::vector; - using TBoolVec = std::vector; - using TSizeSizePrVec = std::vector; - using TDoubleSizePr = std::pair; - using TDoubleSizePrVec = std::vector; + //! Get the cost of the current cut. + double cost() const { return s_Cut / static_cast(s_A * (s_V - s_A)); } - //! \brief Checks if a cluster is empty. - struct SIsEmpty - { - bool operator()(const TPointVec &cluster) const - { - return cluster.empty(); - } - }; + //! Check if the vertex is to visit. + bool toVisit(std::size_t i) const { return this->toVisit(s_ToVisit.size(), i); } - //! Check if the second elements are equal. - struct SSecondEqual - { - bool operator()(const TDoubleSizePr &lhs, - const TDoubleSizePr &rhs) const - { - return lhs.second == rhs.second; - } - }; + //! Check if the vertex is to visit. + bool toVisit(std::size_t n, std::size_t i) const { return std::binary_search(s_ToVisit.begin(), s_ToVisit.begin() + n, i); } - //! \brief State used for the maximum adjacency minimum cost - //! cut search. - struct SCutState - { - SCutState(std::size_t seed, const TGraph &graph) : - s_V(boost::num_vertices(graph)), - s_ToVisit(1, seed), - s_Adjacency(s_V, 0), - s_Cut(0.0), - s_A(0) - { - this->initializeQueue(); - } - - //! Get the cost of the current cut. - double cost() const - { - return s_Cut / static_cast(s_A * (s_V - s_A)); - } - - //! Check if the vertex is to visit. - bool toVisit(std::size_t i) const - { - return this->toVisit(s_ToVisit.size(), i); - } - - //! Check if the vertex is to visit. - bool toVisit(std::size_t n, std::size_t i) const - { - return std::binary_search(s_ToVisit.begin(), s_ToVisit.begin() + n, i); - } - - //! Get the next vertex to visit. - std::size_t next() const - { - return s_Queue.front().second; - } + //! Get the next vertex to visit. + std::size_t next() const { return s_Queue.front().second; } - //! Get the first right or equal vertex. - std::size_t nextToVisit(std::size_t i) const - { - return static_cast( - std::lower_bound(s_ToVisit.begin(), - s_ToVisit.end(), i) - s_ToVisit.begin()); - } + //! Get the first right or equal vertex. + std::size_t nextToVisit(std::size_t i) const { + return static_cast(std::lower_bound(s_ToVisit.begin(), s_ToVisit.end(), i) - s_ToVisit.begin()); + } - //! Merge any vertices to visit after \p n. - void mergeAfter(std::size_t n) - { - if (s_ToVisit.size() > n) - { - std::sort(s_ToVisit.begin() + n, s_ToVisit.end()); - std::inplace_merge(s_ToVisit.begin(), s_ToVisit.begin() + n, s_ToVisit.end()); - } + //! Merge any vertices to visit after \p n. + void mergeAfter(std::size_t n) { + if (s_ToVisit.size() > n) { + std::sort(s_ToVisit.begin() + n, s_ToVisit.end()); + std::inplace_merge(s_ToVisit.begin(), s_ToVisit.begin() + n, s_ToVisit.end()); } + } - //! Initialize the priority queue of vertices to visit. +//! Initialize the priority queue of vertices to visit. #if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ == 3) - __attribute__ ((__noinline__)) + __attribute__((__noinline__)) #endif // defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ == 3) - void initializeQueue() - { - s_Queue.clear(); - s_Queue.reserve(s_ToVisit.size()); - for (std::size_t i = 0u; i < s_ToVisit.size(); ++i) - { - s_Queue.push_back(std::make_pair(s_Adjacency[s_ToVisit[i]], i)); - } - std::make_heap(s_Queue.begin(), s_Queue.end(), std::less()); + void + initializeQueue() { + s_Queue.clear(); + s_Queue.reserve(s_ToVisit.size()); + for (std::size_t i = 0u; i < s_ToVisit.size(); ++i) { + s_Queue.push_back(std::make_pair(s_Adjacency[s_ToVisit[i]], i)); } + std::make_heap(s_Queue.begin(), s_Queue.end(), std::less()); + } - //! Pop the priority queue of vertices to visit. - void popQueue() - { - std::pop_heap(s_Queue.begin(), s_Queue.end(), std::less()); - s_Queue.pop_back(); - } + //! Pop the priority queue of vertices to visit. + void popQueue() { + std::pop_heap(s_Queue.begin(), s_Queue.end(), std::less()); + s_Queue.pop_back(); + } - //! The number of vertices. - std::size_t s_V; - //! The vertices adjacent to and outside the cut. - TSizeVec s_ToVisit; - //! The adjacency counts of the vertices. - TDoubleVec s_Adjacency; - //! Used for maintaining a priority queue of vertices to visit. - TDoubleSizePrVec s_Queue; - //! The current cut weight. - double s_Cut; - //! The current cut partitions the graph into A and V - A vertices. - std::size_t s_A; - }; - - protected: - //! The fraction of typical cut edges to actual cut edges - //! needed to separate a thicket. - static const double SEPARATION_THRESHOLD; - - protected: - //! Create a collection of clusterings of \p b bootstrap - //! samples of \p points. - //! - //! \param[in] b The number of bootstrap clusterings. - //! \param[in] clusterer The clustering algorithm to use. - //! \param[in] points The points to cluster. - //! \param[out] result Filled in with the \p b bootstrap - //! clusterings. - template - std::size_t bootstrapClusters(std::size_t b, - CLUSTERER &clusterer, - TPointVec &points, - TSizeVecVecVec &result) - { - std::size_t n = points.size(); - LOG_TRACE("# points = " << n); + //! The number of vertices. + std::size_t s_V; + //! The vertices adjacent to and outside the cut. + TSizeVec s_ToVisit; + //! The adjacency counts of the vertices. + TDoubleVec s_Adjacency; + //! Used for maintaining a priority queue of vertices to visit. + TDoubleSizePrVec s_Queue; + //! The current cut weight. + double s_Cut; + //! The current cut partitions the graph into A and V - A vertices. + std::size_t s_A; + }; + +protected: + //! The fraction of typical cut edges to actual cut edges + //! needed to separate a thicket. + static const double SEPARATION_THRESHOLD; + +protected: + //! Create a collection of clusterings of \p b bootstrap + //! samples of \p points. + //! + //! \param[in] b The number of bootstrap clusterings. + //! \param[in] clusterer The clustering algorithm to use. + //! \param[in] points The points to cluster. + //! \param[out] result Filled in with the \p b bootstrap + //! clusterings. + template + std::size_t bootstrapClusters(std::size_t b, CLUSTERER& clusterer, TPointVec& points, TSizeVecVecVec& result) { + std::size_t n = points.size(); + LOG_TRACE("# points = " << n); + + result.clear(); + result.reserve(b); + result.push_back(TSizeVecVec()); + clusterer.cluster(points, result.back()); + LOG_TRACE("Run 1: # clusters = " << result.back().size()); + + TSizeVec sampling; + TPointVec bootstrapPoints; + sampling.reserve(n); + bootstrapPoints.reserve(n); + + for (std::size_t i = 1u; i < b; ++i) { + sampling.clear(); + CSampling::uniformSample(m_Rng, 0, n, n, sampling); + std::sort(sampling.begin(), sampling.end()); + LOG_TRACE("# samples = " << sampling.size()); + + bootstrapPoints.clear(); + for (std::size_t j = 0u; j < n; ++j) { + bootstrapPoints.push_back(points[sampling[j]]); + } - result.clear(); - result.reserve(b); result.push_back(TSizeVecVec()); - clusterer.cluster(points, result.back()); - LOG_TRACE("Run 1: # clusters = " << result.back().size()); - - TSizeVec sampling; - TPointVec bootstrapPoints; - sampling.reserve(n); - bootstrapPoints.reserve(n); - - for (std::size_t i = 1u; i < b; ++i) - { - sampling.clear(); - CSampling::uniformSample(m_Rng, 0, n, n, sampling); - std::sort(sampling.begin(), sampling.end()); - LOG_TRACE("# samples = " << sampling.size()); - - bootstrapPoints.clear(); - for (std::size_t j = 0u; j < n; ++j) - { - bootstrapPoints.push_back(points[sampling[j]]); + clusterer.cluster(bootstrapPoints, result.back()); + for (std::size_t j = 0u; j < result.back().size(); ++j) { + for (std::size_t k = 0u; k < (result.back())[j].size(); ++k) { + (result.back())[j][k] = sampling[(result.back())[j][k]]; } - - result.push_back(TSizeVecVec()); - clusterer.cluster(bootstrapPoints, result.back()); - for (std::size_t j = 0u; j < result.back().size(); ++j) - { - for (std::size_t k = 0u; k < (result.back())[j].size(); ++k) - { - (result.back())[j][k] = sampling[(result.back())[j][k]]; - } - } - LOG_TRACE("Run " << i+1 << ": # clusters = " << result.back().size()); } + LOG_TRACE("Run " << i + 1 << ": # clusters = " << result.back().size()); + } - m_Offsets.clear(); - m_Offsets.resize(result.size()); - std::size_t k = 0u; - for (std::size_t i = 0u; i < result.size(); ++i) - { - m_Offsets[i] = k; - k += result[i].size(); + m_Offsets.clear(); + m_Offsets.resize(result.size()); + std::size_t k = 0u; + for (std::size_t i = 0u; i < result.size(); ++i) { + m_Offsets[i] = k; + k += result[i].size(); + } + return k; + } + + //! Build a graph of by connecting strongly similar clusters. + //! + //! \param[in] points The points to cluster. + //! \param[in] bootstrapClusters The clusters of the bootstrap + //! sampled data. + //! \param[out] graph A graph whose vertices are the clusters + //! in each bootstrap clustering and whose edges connect clusters + //! which overlap significantly. + void buildClusterGraph(const TPointVec& points, TSizeVecVecVec& bootstrapClusters, TGraph& graph) const { + using TSizeSizePrUSet = boost::unordered_set; + using TSizeSizePrUSetCItr = TSizeSizePrUSet::const_iterator; + + TSizeSizePrUSet edges; + + // If there are no common points between a cluster and + // another bootstrap sampling we remember it and assign + // edges which are consistent with its overlap with other + // clusters. + TSizeSizePrUSet ambiguous; + + for (std::size_t i = 0u; i < bootstrapClusters.size(); ++i) { + for (std::size_t j = 0u; j < bootstrapClusters[i].size(); ++j) { + std::sort(bootstrapClusters[i][j].begin(), bootstrapClusters[i][j].end()); } - return k; } - - //! Build a graph of by connecting strongly similar clusters. - //! - //! \param[in] points The points to cluster. - //! \param[in] bootstrapClusters The clusters of the bootstrap - //! sampled data. - //! \param[out] graph A graph whose vertices are the clusters - //! in each bootstrap clustering and whose edges connect clusters - //! which overlap significantly. - void buildClusterGraph(const TPointVec &points, - TSizeVecVecVec &bootstrapClusters, - TGraph &graph) const - { - using TSizeSizePrUSet = boost::unordered_set; - using TSizeSizePrUSetCItr = TSizeSizePrUSet::const_iterator; - - TSizeSizePrUSet edges; - - // If there are no common points between a cluster and - // another bootstrap sampling we remember it and assign - // edges which are consistent with its overlap with other - // clusters. - TSizeSizePrUSet ambiguous; - - for (std::size_t i = 0u; i < bootstrapClusters.size(); ++i) - { - for (std::size_t j = 0u; j < bootstrapClusters[i].size(); ++j) - { - std::sort(bootstrapClusters[i][j].begin(), bootstrapClusters[i][j].end()); + TSizeVec cik; + cik.reserve(points.size()); + TDoubleVec overlaps; + for (std::size_t i = 0u; i < bootstrapClusters.size(); ++i) { + for (std::size_t j = 0u; j < bootstrapClusters.size(); ++j) { + if (i == j) { + continue; } - } - TSizeVec cik; - cik.reserve(points.size()); - TDoubleVec overlaps; - for (std::size_t i = 0u; i < bootstrapClusters.size(); ++i) - { - for (std::size_t j = 0u; j < bootstrapClusters.size(); ++j) - { - if (i == j) - { - continue; - } - for (std::size_t k = 0u; k < bootstrapClusters[i].size(); ++k) - { - cik = bootstrapClusters[i][k]; - cik.erase(std::unique(cik.begin(), cik.end()), cik.end()); - double nik = static_cast(cik.size()); - - overlaps.clear(); - double sum = 0.0; - - for (std::size_t l = 0u; !cik.empty() && l < bootstrapClusters[j].size(); ++l) - { - const TSizeVec &cjl = bootstrapClusters[j][l]; - double o = static_cast(cik.size()); - CSetTools::inplace_set_difference(cik, cjl.begin(), cjl.end()); - o -= static_cast(cik.size()); - o /= nik; - overlaps.push_back(o); - sum += o; - } + for (std::size_t k = 0u; k < bootstrapClusters[i].size(); ++k) { + cik = bootstrapClusters[i][k]; + cik.erase(std::unique(cik.begin(), cik.end()), cik.end()); + double nik = static_cast(cik.size()); + + overlaps.clear(); + double sum = 0.0; + + for (std::size_t l = 0u; !cik.empty() && l < bootstrapClusters[j].size(); ++l) { + const TSizeVec& cjl = bootstrapClusters[j][l]; + double o = static_cast(cik.size()); + CSetTools::inplace_set_difference(cik, cjl.begin(), cjl.end()); + o -= static_cast(cik.size()); + o /= nik; + overlaps.push_back(o); + sum += o; + } - if (sum == 0.0) - { - ambiguous.insert(std::make_pair(this->toVertex(i, k), j)); - } - else - { - for (std::size_t l = 0u; l < overlaps.size(); ++l) - { - if (overlaps[l] > m_OverlapThreshold * sum) - { - std::size_t u = this->toVertex(i, k); - std::size_t v = this->toVertex(j, l); - if (u > v) - { - std::swap(u, v); - } - if (edges.insert(std::make_pair(u, v)).second) - { - boost::put(boost::edge_weight, - graph, - boost::add_edge(u, v, graph).first, - std::min( m_ChainingFactor - * (overlaps[l] - m_OverlapThreshold * sum), 1.0)); - } + if (sum == 0.0) { + ambiguous.insert(std::make_pair(this->toVertex(i, k), j)); + } else { + for (std::size_t l = 0u; l < overlaps.size(); ++l) { + if (overlaps[l] > m_OverlapThreshold * sum) { + std::size_t u = this->toVertex(i, k); + std::size_t v = this->toVertex(j, l); + if (u > v) { + std::swap(u, v); + } + if (edges.insert(std::make_pair(u, v)).second) { + boost::put(boost::edge_weight, + graph, + boost::add_edge(u, v, graph).first, + std::min(m_ChainingFactor * (overlaps[l] - m_OverlapThreshold * sum), 1.0)); } } } } } } - LOG_TRACE("ambiguous = " << core::CContainerPrinter::print(ambiguous)); - - TDoubleSizePrVec consistent; - for (TSizeSizePrUSetCItr i = ambiguous.begin(); i != ambiguous.end(); ++i) - { - std::size_t u = i->first; - - consistent.clear(); - TOutEdgeItr j, endj; - for (boost::tie(j, endj) = boost::out_edges(u, graph); j != endj; ++j) - { - std::size_t v = boost::target(*j, graph); - double weight = boost::get(boost::edge_weight, graph, *j); - - TOutEdgeItr k, endk; - for (boost::tie(k, endk) = boost::out_edges(v, graph); k != endk; ++k) - { - std::size_t w = boost::target(*k, graph); - if (this->fromVertex(w).first == i->second) - { - consistent.push_back(std::make_pair( - weight * boost::get(boost::edge_weight, graph, *k), w)); - } + } + LOG_TRACE("ambiguous = " << core::CContainerPrinter::print(ambiguous)); + + TDoubleSizePrVec consistent; + for (TSizeSizePrUSetCItr i = ambiguous.begin(); i != ambiguous.end(); ++i) { + std::size_t u = i->first; + + consistent.clear(); + TOutEdgeItr j, endj; + for (boost::tie(j, endj) = boost::out_edges(u, graph); j != endj; ++j) { + std::size_t v = boost::target(*j, graph); + double weight = boost::get(boost::edge_weight, graph, *j); + + TOutEdgeItr k, endk; + for (boost::tie(k, endk) = boost::out_edges(v, graph); k != endk; ++k) { + std::size_t w = boost::target(*k, graph); + if (this->fromVertex(w).first == i->second) { + consistent.push_back(std::make_pair(weight * boost::get(boost::edge_weight, graph, *k), w)); } } - std::sort(consistent.begin(), consistent.end(), COrderings::SSecondLess()); - consistent.erase(std::unique(consistent.begin(), - consistent.end(), - SSecondEqual()), consistent.end()); - LOG_TRACE("consistent = " << core::CContainerPrinter::print(consistent)); - - for (std::size_t k = 0u; k < consistent.size(); ++k) - { - boost::put(boost::edge_weight, - graph, - boost::add_edge(u, consistent[k].second, graph).first, - consistent[k].first); - } } - } + std::sort(consistent.begin(), consistent.end(), COrderings::SSecondLess()); + consistent.erase(std::unique(consistent.begin(), consistent.end(), SSecondEqual()), consistent.end()); + LOG_TRACE("consistent = " << core::CContainerPrinter::print(consistent)); - //! Build the clusters from the maximum connected components - //! of \p graph. - //! - //! \param[in] points The points to cluster. - //! \param[in] bootstrapClusters The bootstrap clusters of - //! \p points. - //! \param[in] graph The graph of overlapping clusters in - //! \p bootstrapClusters. - //! \param[out] result Filled in with the majority vote clusters - //! of \p bootstrapClusters. - void buildClusters(const TPointVec &points, - const TSizeVecVecVec &bootstrapClusters, - const TGraph &graph, - TPointVecVec &result) const - { - using TSizeSizeUMap = boost::unordered_map; - using TSizeSizeUMapCItr = TSizeSizeUMap::const_iterator; - using TSizeSizeUMapVec = std::vector; - - // Find the maximum connected components. - TSizeVec components(boost::num_vertices(graph)); - std::size_t n = boost::connected_components(graph, &components[0]); - LOG_TRACE("# vertices = " << components.size()); - LOG_TRACE("Connected components = " << n); - - // Find components which aren't easily separable. These will - // be the voting population. - n = this->thickets(n, graph, components); - LOG_TRACE("thickets = " << n); - - // Build a map from voters to point indices. - TSizeSizeUMapVec voters(n); - for (std::size_t i = 0u; i < components.size(); ++i) - { - TSizeSizeUMap &cluster = voters[components[i]]; - const TSizeVec &vertex = this->fromVertex(bootstrapClusters, i); - for (std::size_t j = 0u; j < vertex.size(); ++j) - { - ++cluster[vertex[j]]; - } + for (std::size_t k = 0u; k < consistent.size(); ++k) { + boost::put(boost::edge_weight, graph, boost::add_edge(u, consistent[k].second, graph).first, consistent[k].first); } + } + } + + //! Build the clusters from the maximum connected components + //! of \p graph. + //! + //! \param[in] points The points to cluster. + //! \param[in] bootstrapClusters The bootstrap clusters of + //! \p points. + //! \param[in] graph The graph of overlapping clusters in + //! \p bootstrapClusters. + //! \param[out] result Filled in with the majority vote clusters + //! of \p bootstrapClusters. + void buildClusters(const TPointVec& points, const TSizeVecVecVec& bootstrapClusters, const TGraph& graph, TPointVecVec& result) const { + using TSizeSizeUMap = boost::unordered_map; + using TSizeSizeUMapCItr = TSizeSizeUMap::const_iterator; + using TSizeSizeUMapVec = std::vector; + + // Find the maximum connected components. + TSizeVec components(boost::num_vertices(graph)); + std::size_t n = boost::connected_components(graph, &components[0]); + LOG_TRACE("# vertices = " << components.size()); + LOG_TRACE("Connected components = " << n); + + // Find components which aren't easily separable. These will + // be the voting population. + n = this->thickets(n, graph, components); + LOG_TRACE("thickets = " << n); + + // Build a map from voters to point indices. + TSizeSizeUMapVec voters(n); + for (std::size_t i = 0u; i < components.size(); ++i) { + TSizeSizeUMap& cluster = voters[components[i]]; + const TSizeVec& vertex = this->fromVertex(bootstrapClusters, i); + for (std::size_t j = 0u; j < vertex.size(); ++j) { + ++cluster[vertex[j]]; + } + } - // Extract clusters via majority vote. - result.clear(); - result.resize(voters.size()); - for (std::size_t i = 0u; i < points.size(); ++i) - { - std::size_t jmax = 0u; - std::size_t cmax = 0u; - std::size_t nmax = 0u; - for (std::size_t j = 0u; j < n; ++j) - { - TSizeSizeUMapCItr k = voters[j].find(i); - if (k == voters[j].end()) - { - continue; - } - std::size_t c = k->second; - std::size_t n_ = voters[j].size(); - if (COrderings::lexicographical_compare(c, n_, cmax, nmax, - std::greater())) - { - jmax = j; - cmax = c; - nmax = n_; - } - } - if (cmax == 0) - { - LOG_ERROR("Failed to find cluster for " << points[i]); + // Extract clusters via majority vote. + result.clear(); + result.resize(voters.size()); + for (std::size_t i = 0u; i < points.size(); ++i) { + std::size_t jmax = 0u; + std::size_t cmax = 0u; + std::size_t nmax = 0u; + for (std::size_t j = 0u; j < n; ++j) { + TSizeSizeUMapCItr k = voters[j].find(i); + if (k == voters[j].end()) { continue; } - - result[jmax].push_back(points[i]); + std::size_t c = k->second; + std::size_t n_ = voters[j].size(); + if (COrderings::lexicographical_compare(c, n_, cmax, nmax, std::greater())) { + jmax = j; + cmax = c; + nmax = n_; + } + } + if (cmax == 0) { + LOG_ERROR("Failed to find cluster for " << points[i]); + continue; } - // It is possible that after voting clusters contain - // no points. Remove these. - result.erase(std::remove_if(result.begin(), result.end(), SIsEmpty()), result.end()); + result[jmax].push_back(points[i]); } - //! Identify subsets of the component of \p graph which - //! are difficult to separate by removing edges. - //! - //! \param[in] n The number of components. - //! \param[in] graph The graph for which to identify thickets. - //! \param[in,out] components The component labels of the - //! vertices of \p graph. Filled in with the thicket labels - //! of \p graph. - //! \return The number of thickets in \p graph. - std::size_t thickets(std::size_t n, const TGraph &graph, TSizeVec &components) const - { - std::size_t V = boost::num_vertices(graph); - - TSizeVec mapping(V); - TSizeVec inverse; - TBoolVec parities(V); - TGraph component(1); - - for (std::size_t i = 0u; i < n; ++i) - { - LOG_TRACE("component = " << i); - - // Extract the component vertices. - inverse.clear(); - for (std::size_t j = 0u; j < V; ++j) - { - if (components[j] == i) - { - inverse.push_back(j); - mapping[j] = inverse.size() - 1; - } + // It is possible that after voting clusters contain + // no points. Remove these. + result.erase(std::remove_if(result.begin(), result.end(), SIsEmpty()), result.end()); + } + + //! Identify subsets of the component of \p graph which + //! are difficult to separate by removing edges. + //! + //! \param[in] n The number of components. + //! \param[in] graph The graph for which to identify thickets. + //! \param[in,out] components The component labels of the + //! vertices of \p graph. Filled in with the thicket labels + //! of \p graph. + //! \return The number of thickets in \p graph. + std::size_t thickets(std::size_t n, const TGraph& graph, TSizeVec& components) const { + std::size_t V = boost::num_vertices(graph); + + TSizeVec mapping(V); + TSizeVec inverse; + TBoolVec parities(V); + TGraph component(1); + + for (std::size_t i = 0u; i < n; ++i) { + LOG_TRACE("component = " << i); + + // Extract the component vertices. + inverse.clear(); + for (std::size_t j = 0u; j < V; ++j) { + if (components[j] == i) { + inverse.push_back(j); + mapping[j] = inverse.size() - 1; } + } - std::size_t Vi = inverse.size(); + std::size_t Vi = inverse.size(); - if (Vi < 3) - { - continue; - } + if (Vi < 3) { + continue; + } - // Build the component graph. - this->copy(graph, mapping, inverse, component); - - // Find the partitions of the component which are difficult - // to separate (by removing edges). - if (this->separate(component, parities)) - { - LOG_TRACE("Separated component"); - LOG_TRACE("parities = " << core::CContainerPrinter::print(parities.begin(), - parities.begin() + Vi)); - for (std::size_t j = 0u; j < Vi; ++j) - { - if (parities[j]) - { - components[inverse[j]] = n; - } + // Build the component graph. + this->copy(graph, mapping, inverse, component); + + // Find the partitions of the component which are difficult + // to separate (by removing edges). + if (this->separate(component, parities)) { + LOG_TRACE("Separated component"); + LOG_TRACE("parities = " << core::CContainerPrinter::print(parities.begin(), parities.begin() + Vi)); + for (std::size_t j = 0u; j < Vi; ++j) { + if (parities[j]) { + components[inverse[j]] = n; } - LOG_TRACE("components = " << core::CContainerPrinter::print(components)); - ++n; } + LOG_TRACE("components = " << core::CContainerPrinter::print(components)); + ++n; } - - return n; } - //! Test to see if we should separate \p graph by a minimum cut. - //! - //! The idea of this test is that if there exists a cut in the - //! graph which contains many fewer edges than we'd expect given - //! the number of edges in the graph then we should separate - //! the graph along this cut. This is to avoid the problem that - //! a small number of different clusterings of the data can - //! cause us to chain together otherwise consistently disjointly - //! clusters. - //! - //! \param[in] graph The graph to separate. - //! \param[out] result Filled in with the parity of each vertex - //! in a cut of \p graph which minimizes the split criterion. - //! \return True if we should split \p graph and false otherwise. - bool separate(const TGraph &graph, TBoolVec &result) const + return n; + } + + //! Test to see if we should separate \p graph by a minimum cut. + //! + //! The idea of this test is that if there exists a cut in the + //! graph which contains many fewer edges than we'd expect given + //! the number of edges in the graph then we should separate + //! the graph along this cut. This is to avoid the problem that + //! a small number of different clusterings of the data can + //! cause us to chain together otherwise consistently disjointly + //! clusters. + //! + //! \param[in] graph The graph to separate. + //! \param[out] result Filled in with the parity of each vertex + //! in a cut of \p graph which minimizes the split criterion. + //! \return True if we should split \p graph and false otherwise. + bool separate(const TGraph& graph, TBoolVec& result) const { + std::size_t V = boost::num_vertices(graph); + std::size_t E = boost::num_edges(graph); + + result.assign(V, true); + + std::size_t D = V; + for (std::size_t i = 0u; i < V; ++i) { + D = std::min(D, boost::out_degree(i, graph)); + } + TDoubleVec weights; + weights.reserve(E); + double totalWeight = 0.0; { - std::size_t V = boost::num_vertices(graph); - std::size_t E = boost::num_edges(graph); + TEdgeItr i, end; + for (boost::tie(i, end) = boost::edges(graph); i != end; ++i) { + double weight = boost::get(boost::edge_weight, graph, *i); + weights.push_back(weight); + totalWeight += weight; + } + } - result.assign(V, true); + double p = totalWeight / static_cast(V * (V - 1) / 2); + double threshold = SEPARATION_THRESHOLD * p; + + // We can bound the ratio of the cut size to the typical + // by noting that in most separable configuration we'd + // remove all edges we're short of a complete graph from + // the cut. This is a poor bound when the split is uneven, + // i.e. unless the graph is close to complete + // V (V - 1) / 2 - E > V - 1 so the graph could disconnected. + // In this case, we can bound the ratio based on the minimum + // vertex degree D for cuts in which one component contains + // fewer than D + 1 vertices. + double bound = std::numeric_limits::max(); + std::sort(weights.begin(), weights.end()); + for (std::size_t i = 1u; i < weights.size(); ++i) { + weights[i] += weights[i - 1]; + } + for (std::size_t i = 1u; i <= V / 2 + 1; ++i) { + std::size_t C = std::max(i * (D - std::min(D, i - 1)), (i * (V - i)) - std::min(i * (V - i), (V * (V - 1)) / 2 - E)); + bound = std::min(bound, weights[C] / static_cast(i * (V - i))); + } + LOG_TRACE("bound = " << bound << " threshold = " << threshold); - std::size_t D = V; - for (std::size_t i = 0u; i < V; ++i) - { - D = std::min(D, boost::out_degree(i, graph)); - } - TDoubleVec weights; - weights.reserve(E); - double totalWeight = 0.0; - { - TEdgeItr i, end; - for (boost::tie(i, end) = boost::edges(graph); i != end; ++i) - { - double weight = boost::get(boost::edge_weight, graph, *i); - weights.push_back(weight); - totalWeight += weight; + if (bound >= threshold) { + LOG_TRACE("Short circuit: D = " << D << ", V = " << V << ", bound = " << bound << ", threshold = " << SEPARATION_THRESHOLD * p); + return false; + } + + TDoubleVec seeds; + CSampling::uniformSample(m_Rng, 0.0, 1.0, 6, seeds); + + TSizeSizePrVec cut; + TSizeSizePrVec newCut; + for (std::size_t i = 0u; i < seeds.size(); ++i) { + if (cut.empty()) { + TEdgeItr seed = boost::edges(graph).first; + for (std::size_t j = 0u; j < static_cast(seeds[i] * static_cast(E)); ++j, ++seed) { } + cut.push_back(std::make_pair(boost::source(*seed, graph), boost::target(*seed, graph))); } - double p = totalWeight / static_cast(V * (V - 1) / 2); - double threshold = SEPARATION_THRESHOLD * p; - - // We can bound the ratio of the cut size to the typical - // by noting that in most separable configuration we'd - // remove all edges we're short of a complete graph from - // the cut. This is a poor bound when the split is uneven, - // i.e. unless the graph is close to complete - // V (V - 1) / 2 - E > V - 1 so the graph could disconnected. - // In this case, we can bound the ratio based on the minimum - // vertex degree D for cuts in which one component contains - // fewer than D + 1 vertices. - double bound = std::numeric_limits::max(); - std::sort(weights.begin(), weights.end()); - for (std::size_t i = 1u; i < weights.size(); ++i) - { - weights[i] += weights[i - 1]; - } - for (std::size_t i = 1u; i <= V / 2 + 1; ++i) - { - std::size_t C = std::max( i * (D - std::min(D, i - 1)), - (i * (V - i)) - std::min( i * (V - i), - (V * (V - 1)) / 2 - E)); - bound = std::min(bound, weights[C] / static_cast(i * (V - i))); - } - LOG_TRACE("bound = " << bound << " threshold = " << threshold); - - if (bound >= threshold) - { - LOG_TRACE("Short circuit: D = " << D - << ", V = " << V - << ", bound = " << bound - << ", threshold = " << SEPARATION_THRESHOLD * p); - return false; + double cost; + if (this->cutSearch(cut.back().first, cut.back().second, graph, threshold, cost, result)) { + return true; } - TDoubleVec seeds; - CSampling::uniformSample(m_Rng, 0.0, 1.0, 6, seeds); - - TSizeSizePrVec cut; - TSizeSizePrVec newCut; - for (std::size_t i = 0u; i < seeds.size(); ++i) - { - if (cut.empty()) - { - TEdgeItr seed = boost::edges(graph).first; - for (std::size_t j = 0u; - j < static_cast(seeds[i] * static_cast(E)); - ++j, ++seed) - { - } - cut.push_back(std::make_pair(boost::source(*seed, graph), boost::target(*seed, graph))); - } - - double cost; - if (this->cutSearch(cut.back().first, cut.back().second, graph, threshold, cost, result)) - { - return true; - } + cut.pop_back(); + std::size_t n = cut.size(); - cut.pop_back(); - std::size_t n = cut.size(); - - TEdgeItr j, end; - for (boost::tie(j, end) = boost::edges(graph); j != end; ++j) - { - std::size_t u = boost::source(*j, graph); - std::size_t v = boost::target(*j, graph); - if (result[u] != result[v]) - { - cut.push_back(std::make_pair(u, v)); - } - } - if (n > 0) - { - std::sort(cut.begin() + n, cut.end()); - newCut.clear(); - std::set_intersection(cut.begin(), cut.begin() + n, - cut.begin() + n, cut.end(), - std::back_inserter(newCut)); - cut.swap(newCut); + TEdgeItr j, end; + for (boost::tie(j, end) = boost::edges(graph); j != end; ++j) { + std::size_t u = boost::source(*j, graph); + std::size_t v = boost::target(*j, graph); + if (result[u] != result[v]) { + cut.push_back(std::make_pair(u, v)); } } - - return false; + if (n > 0) { + std::sort(cut.begin() + n, cut.end()); + newCut.clear(); + std::set_intersection(cut.begin(), cut.begin() + n, cut.begin() + n, cut.end(), std::back_inserter(newCut)); + cut.swap(newCut); + } } - //! Look for the sparsest cut of the graph including (\p u, \p v). - //! - //! Finding the sparsest cut is NP-hard; however, there exist - //! effective approximate solutions. See, for example, - //! http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.187.3506&rep=rep1&type=pdf - //! and references therein. Here, we take a simpler to implement - //! approach and starting with the edge (\p u, \p v), create a - //! sequence of cuts which divide the graph into exactly two - //! pieces using a maximum adjacency search. - //! - //! This is exactly how the Stoer Wagner minimum cut phase - //! search works except for the additional constraint on - //! connectivity (see http://e-maxx.ru/bookz/files/stoer_wagner_mincut.pdf). - //! Note that their inductive argument that this will find the - //! minimum cut through the last edge visited no longer works - //! because the sparsest cut cost function is not countably - //! additive in the edge weights in the cut. Instead, we remember - //! the sparsest cut we find along the way. - //! - //! \param[in] u The start vertex of the seed edge. - //! \param[in] v The end vertex of the seed edge. - //! \param[in] graph The graph to separate. - //! \param[out] cost Filled in with the lowest cost of the cut. - //! \param[out] parities Filled in with the vertex parities in - //! the lowest cost cut. - //! \return True if the cut should split \p graph and false - //! otherwise. - bool cutSearch(std::size_t u, - std::size_t v, - const TGraph &graph, - double threshold, - double &cost, - TBoolVec &parities) const - { - LOG_TRACE("Seed edge = (" << u << "," << v << ")"); - - std::size_t V = boost::num_vertices(graph); - - parities.assign(V, true); - - SCutState state(u, graph); - std::size_t next = state.next(); - this->visit(next, graph, parities, state); - next = state.nextToVisit(v); - this->visit(next, graph, parities, state); - - double lowestCost = state.cost(); - double bestCut = state.s_Cut; - std::size_t bestA = state.s_A; - TBoolVec best = parities; - - while (state.s_A + 1 < V) - { - if (!this->findNext(parities, graph, state)) - { - LOG_TRACE("The positive subgraph is already disconnected"); - - TSizeVec components; - std::size_t c = this->positiveSubgraphConnected(graph, parities, components); - LOG_TRACE("components = " << core::CContainerPrinter::print(components)); - - // Find the smallest component. - TSizeVec sizes(c, 0); - for (std::size_t i = 0u; i < components.size(); ++i) - { - if (parities[i]) - { - ++sizes[components[i]]; - } + return false; + } + + //! Look for the sparsest cut of the graph including (\p u, \p v). + //! + //! Finding the sparsest cut is NP-hard; however, there exist + //! effective approximate solutions. See, for example, + //! http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.187.3506&rep=rep1&type=pdf + //! and references therein. Here, we take a simpler to implement + //! approach and starting with the edge (\p u, \p v), create a + //! sequence of cuts which divide the graph into exactly two + //! pieces using a maximum adjacency search. + //! + //! This is exactly how the Stoer Wagner minimum cut phase + //! search works except for the additional constraint on + //! connectivity (see http://e-maxx.ru/bookz/files/stoer_wagner_mincut.pdf). + //! Note that their inductive argument that this will find the + //! minimum cut through the last edge visited no longer works + //! because the sparsest cut cost function is not countably + //! additive in the edge weights in the cut. Instead, we remember + //! the sparsest cut we find along the way. + //! + //! \param[in] u The start vertex of the seed edge. + //! \param[in] v The end vertex of the seed edge. + //! \param[in] graph The graph to separate. + //! \param[out] cost Filled in with the lowest cost of the cut. + //! \param[out] parities Filled in with the vertex parities in + //! the lowest cost cut. + //! \return True if the cut should split \p graph and false + //! otherwise. + bool cutSearch(std::size_t u, std::size_t v, const TGraph& graph, double threshold, double& cost, TBoolVec& parities) const { + LOG_TRACE("Seed edge = (" << u << "," << v << ")"); + + std::size_t V = boost::num_vertices(graph); + + parities.assign(V, true); + + SCutState state(u, graph); + std::size_t next = state.next(); + this->visit(next, graph, parities, state); + next = state.nextToVisit(v); + this->visit(next, graph, parities, state); + + double lowestCost = state.cost(); + double bestCut = state.s_Cut; + std::size_t bestA = state.s_A; + TBoolVec best = parities; + + while (state.s_A + 1 < V) { + if (!this->findNext(parities, graph, state)) { + LOG_TRACE("The positive subgraph is already disconnected"); + + TSizeVec components; + std::size_t c = this->positiveSubgraphConnected(graph, parities, components); + LOG_TRACE("components = " << core::CContainerPrinter::print(components)); + + // Find the smallest component. + TSizeVec sizes(c, 0); + for (std::size_t i = 0u; i < components.size(); ++i) { + if (parities[i]) { + ++sizes[components[i]]; } - std::size_t smallest = static_cast( - std::min_element(sizes.begin(), sizes.end()) - - sizes.begin()); - LOG_TRACE("sizes = " << core::CContainerPrinter::print(sizes)); - LOG_TRACE("smallest = " << smallest); - - // Add all its vertices to the "to visit" set. - std::size_t n = state.s_ToVisit.size(); - for (std::size_t i = 0u; i < components.size(); ++i) - { - if (parities[i] && components[i] == smallest && !state.toVisit(i)) - { - state.s_ToVisit.push_back(i); - } + } + std::size_t smallest = static_cast(std::min_element(sizes.begin(), sizes.end()) - sizes.begin()); + LOG_TRACE("sizes = " << core::CContainerPrinter::print(sizes)); + LOG_TRACE("smallest = " << smallest); + + // Add all its vertices to the "to visit" set. + std::size_t n = state.s_ToVisit.size(); + for (std::size_t i = 0u; i < components.size(); ++i) { + if (parities[i] && components[i] == smallest && !state.toVisit(i)) { + state.s_ToVisit.push_back(i); } + } - state.mergeAfter(n); + state.mergeAfter(n); - for (std::size_t i = 0u; i < components.size(); ++i) - { - if (parities[i] && components[i] == smallest) - { - next = state.nextToVisit(i); - this->visit(next, graph, parities, state); - } + for (std::size_t i = 0u; i < components.size(); ++i) { + if (parities[i] && components[i] == smallest) { + next = state.nextToVisit(i); + this->visit(next, graph, parities, state); } } - else - { - next = state.next(); - this->visit(next, graph, parities, state); - } + } else { + next = state.next(); + this->visit(next, graph, parities, state); + } - double cutCost = state.cost(); - if (cutCost < lowestCost) - { - lowestCost = cutCost; - bestCut = state.s_Cut; - bestA = state.s_A; - best = parities; - } + double cutCost = state.cost(); + if (cutCost < lowestCost) { + lowestCost = cutCost; + bestCut = state.s_Cut; + bestA = state.s_A; + best = parities; } + } - cost = lowestCost; - parities.swap(best); + cost = lowestCost; + parities.swap(best); - LOG_TRACE("Best cut = " << bestCut - << ", |A| = " << bestA - << ", |B| = " << V - bestA - << ", cost = " << cost - << ", threshold = " << threshold); + LOG_TRACE("Best cut = " << bestCut << ", |A| = " << bestA << ", |B| = " << V - bestA << ", cost = " << cost + << ", threshold = " << threshold); - return cost < threshold; - } + return cost < threshold; + } - //! Get the offsets for the clusterings. - TSizeVec &offsets() { return m_Offsets; } + //! Get the offsets for the clusterings. + TSizeVec& offsets() { return m_Offsets; } - private: - //! \brief A parity filter predicate which tests whether - //! vertices and edges belong to a specified parity subgraph. - //! - //! This is intended for use with boost::filtered_graph. - class CParityFilter - { - public: - CParityFilter() : m_Graph(0), m_Parities(0), m_Parity(false) {} - CParityFilter(const TGraph &graph, const TBoolVec &parities, bool parity) : - m_Graph(&graph), - m_Parities(&parities), - m_Parity(parity) - {} - - //! Check the vertex parity. - bool operator()(const TVertex &v) const - { - return (*m_Parities)[v] == m_Parity; - } +private: + //! \brief A parity filter predicate which tests whether + //! vertices and edges belong to a specified parity subgraph. + //! + //! This is intended for use with boost::filtered_graph. + class CParityFilter { + public: + CParityFilter() : m_Graph(0), m_Parities(0), m_Parity(false) {} + CParityFilter(const TGraph& graph, const TBoolVec& parities, bool parity) + : m_Graph(&graph), m_Parities(&parities), m_Parity(parity) {} - //! Check the end vertices' parity. - bool operator()(const TEdge &e) const - { - return (*m_Parities)[boost::source(e, *m_Graph)] == m_Parity - && (*m_Parities)[boost::target(e, *m_Graph)] == m_Parity; - } + //! Check the vertex parity. + bool operator()(const TVertex& v) const { return (*m_Parities)[v] == m_Parity; } - private: - //! The graph to filter. - const TGraph *m_Graph; - //! The parities of the vertices of \p graph. - const TBoolVec *m_Parities; - //! The parity of the filtered graph. - bool m_Parity; - }; + //! Check the end vertices' parity. + bool operator()(const TEdge& e) const { + return (*m_Parities)[boost::source(e, *m_Graph)] == m_Parity && (*m_Parities)[boost::target(e, *m_Graph)] == m_Parity; + } private: - //! Copy the vertices in \p inverse and edges between them - //! from \p graph into a new graph structure in \p result. - void copy(const TGraph &graph, - const TSizeVec &mapping, - const TSizeVec &inverse, - TGraph &result) const - { - result = TGraph(inverse.size()); - for (std::size_t i = 0u; i < inverse.size(); ++i) - { - TOutEdgeItr j, end; - for (boost::tie(j, end) = boost::out_edges(inverse[i], graph); j != end; ++j) - { - std::size_t u = boost::source(*j, graph); - std::size_t v = boost::target(*j, graph); - if (u < v && std::binary_search(inverse.begin(), inverse.end(), v)) - { - boost::put(boost::edge_weight, - result, - boost::add_edge(mapping[u], mapping[v], result).first, - boost::get(boost::edge_weight, graph, *j)); - } + //! The graph to filter. + const TGraph* m_Graph; + //! The parities of the vertices of \p graph. + const TBoolVec* m_Parities; + //! The parity of the filtered graph. + bool m_Parity; + }; + +private: + //! Copy the vertices in \p inverse and edges between them + //! from \p graph into a new graph structure in \p result. + void copy(const TGraph& graph, const TSizeVec& mapping, const TSizeVec& inverse, TGraph& result) const { + result = TGraph(inverse.size()); + for (std::size_t i = 0u; i < inverse.size(); ++i) { + TOutEdgeItr j, end; + for (boost::tie(j, end) = boost::out_edges(inverse[i], graph); j != end; ++j) { + std::size_t u = boost::source(*j, graph); + std::size_t v = boost::target(*j, graph); + if (u < v && std::binary_search(inverse.begin(), inverse.end(), v)) { + boost::put(boost::edge_weight, + result, + boost::add_edge(mapping[u], mapping[v], result).first, + boost::get(boost::edge_weight, graph, *j)); } } } - - //! Find the next vertex to visit. - //! - //! This is the most adjacent vertex which doesn't disconnect - //! the positive parity subgraph. - bool findNext(const TBoolVec &parities, const TGraph &graph, SCutState &state) const - { - state.initializeQueue(); - TSizeVec components; - for (std::size_t i = 0u; i < state.s_ToVisit.size(); ++i) - { - std::size_t candidate = state.next(); - std::size_t v = state.s_ToVisit[candidate]; - const_cast(parities)[v] = false; - bool connected = (this->positiveSubgraphConnected(graph, parities, components) == 1); - const_cast(parities)[v] = true; - if (connected) - { - return true; - } - state.popQueue(); + } + + //! Find the next vertex to visit. + //! + //! This is the most adjacent vertex which doesn't disconnect + //! the positive parity subgraph. + bool findNext(const TBoolVec& parities, const TGraph& graph, SCutState& state) const { + state.initializeQueue(); + TSizeVec components; + for (std::size_t i = 0u; i < state.s_ToVisit.size(); ++i) { + std::size_t candidate = state.next(); + std::size_t v = state.s_ToVisit[candidate]; + const_cast(parities)[v] = false; + bool connected = (this->positiveSubgraphConnected(graph, parities, components) == 1); + const_cast(parities)[v] = true; + if (connected) { + return true; } - return false; + state.popQueue(); } - - //! Visit the most adjacent vertex in the "to visit" set. - //! - //! This updates the "to visit" set to include newly adjacent - //! vertices to A, the vertex adjacencies, the cut weight, and - //! set sizes \f$|A|\f$ and \f$V - |A|\f$. - void visit(std::size_t next, - const TGraph &graph, - TBoolVec &parities, - SCutState &state) const - { - std::size_t u = state.s_ToVisit[next]; - LOG_TRACE("Visiting " << u); - - parities[u] = false; - state.s_ToVisit.erase(state.s_ToVisit.begin() + next); - - std::size_t n = state.s_ToVisit.size(); - TOutEdgeItr i, end; - for (boost::tie(i, end) = boost::out_edges(u, graph); i != end; ++i) - { - double weight = boost::get(boost::edge_weight, graph, *i); - std::size_t v = boost::target(*i, graph); - if (parities[v]) - { - state.s_Adjacency[v] += weight; - state.s_Cut += weight; - if (!state.toVisit(n, v)) - { - state.s_ToVisit.push_back(v); - } - } - else - { - state.s_Cut -= weight; + return false; + } + + //! Visit the most adjacent vertex in the "to visit" set. + //! + //! This updates the "to visit" set to include newly adjacent + //! vertices to A, the vertex adjacencies, the cut weight, and + //! set sizes \f$|A|\f$ and \f$V - |A|\f$. + void visit(std::size_t next, const TGraph& graph, TBoolVec& parities, SCutState& state) const { + std::size_t u = state.s_ToVisit[next]; + LOG_TRACE("Visiting " << u); + + parities[u] = false; + state.s_ToVisit.erase(state.s_ToVisit.begin() + next); + + std::size_t n = state.s_ToVisit.size(); + TOutEdgeItr i, end; + for (boost::tie(i, end) = boost::out_edges(u, graph); i != end; ++i) { + double weight = boost::get(boost::edge_weight, graph, *i); + std::size_t v = boost::target(*i, graph); + if (parities[v]) { + state.s_Adjacency[v] += weight; + state.s_Cut += weight; + if (!state.toVisit(n, v)) { + state.s_ToVisit.push_back(v); } + } else { + state.s_Cut -= weight; } - - state.mergeAfter(n); - state.s_Adjacency[u] = 0; - ++state.s_A; - } - - //! Check that the subgraph with true parity is connected. - std::size_t positiveSubgraphConnected(const TGraph &graph, - const TBoolVec &parities, - TSizeVec &components) const - { - using TParityGraph = boost::filtered_graph; - CParityFilter parityFilter(graph, parities, true); - TParityGraph parityGraph(graph, parityFilter, parityFilter); - components.resize(boost::num_vertices(graph)); - return boost::connected_components(parityGraph, &components[0]); } - //! Extract the vertex for the \p j'th cluster of the - //! \p i'th bootstrap clustering. - std::size_t toVertex(std::size_t i, std::size_t j) const - { - return m_Offsets[i] + j; - } - - //! Extract the clustering and cluster from the vertex - //! representation \p v. - TSizeSizePr fromVertex(std::size_t v) const - { - std::size_t i = static_cast( - std::upper_bound(m_Offsets.begin(), m_Offsets.end(), v) - - m_Offsets.begin()) - 1; - return std::make_pair(i, v - m_Offsets[i]); - } - - //! Extract the cluster corresponding to the \p v'th vertex - //! of the cluster graph. - const TSizeVec &fromVertex(const TSizeVecVecVec &clusters, std::size_t v) const - { - TSizeSizePr ij = fromVertex(v); - return clusters[ij.first][ij.second]; - } - - private: - //! The random number generator. - mutable CPRNG::CXorShift1024Mult m_Rng; - - //! The threshold in the similarity measure for which we will - //! consider joining clusters. - double m_OverlapThreshold; - - //! The amount overlap between clusters causes them to chain - //! together. - double m_ChainingFactor; - - //! A flat encoding of the vertices in each clustering. - //! - //! In particular, the start of the i'th clustering clusters - //! is encoded by the i'th element. - TSizeVec m_Offsets; + state.mergeAfter(n); + state.s_Adjacency[u] = 0; + ++state.s_A; + } + + //! Check that the subgraph with true parity is connected. + std::size_t positiveSubgraphConnected(const TGraph& graph, const TBoolVec& parities, TSizeVec& components) const { + using TParityGraph = boost::filtered_graph; + CParityFilter parityFilter(graph, parities, true); + TParityGraph parityGraph(graph, parityFilter, parityFilter); + components.resize(boost::num_vertices(graph)); + return boost::connected_components(parityGraph, &components[0]); + } + + //! Extract the vertex for the \p j'th cluster of the + //! \p i'th bootstrap clustering. + std::size_t toVertex(std::size_t i, std::size_t j) const { return m_Offsets[i] + j; } + + //! Extract the clustering and cluster from the vertex + //! representation \p v. + TSizeSizePr fromVertex(std::size_t v) const { + std::size_t i = static_cast(std::upper_bound(m_Offsets.begin(), m_Offsets.end(), v) - m_Offsets.begin()) - 1; + return std::make_pair(i, v - m_Offsets[i]); + } + + //! Extract the cluster corresponding to the \p v'th vertex + //! of the cluster graph. + const TSizeVec& fromVertex(const TSizeVecVecVec& clusters, std::size_t v) const { + TSizeSizePr ij = fromVertex(v); + return clusters[ij.first][ij.second]; + } + +private: + //! The random number generator. + mutable CPRNG::CXorShift1024Mult m_Rng; + + //! The threshold in the similarity measure for which we will + //! consider joining clusters. + double m_OverlapThreshold; + + //! The amount overlap between clusters causes them to chain + //! together. + double m_ChainingFactor; + + //! A flat encoding of the vertices in each clustering. + //! + //! In particular, the start of the i'th clustering clusters + //! is encoded by the i'th element. + TSizeVec m_Offsets; }; template const double CBootstrapClusterer::SEPARATION_THRESHOLD(0.1); - //! \brief Extracts the clusters in canonical form (by index into //! the point vector) for the facade implementations. template -class CBootstrapClustererFacadeExtractClusters -{ - public: - using TSizeVec = std::vector; - using TSizeVecVec = std::vector; - using TPointVec = std::vector; - using TPointVecCItr = typename TPointVec::const_iterator; - - public: - //! Compute the cluster of each point in \p points. - //! - //! \param[in] points The ordered points to cluster. - //! \param[in] clusters The clustering of \p points. - //! \param[out] result Filled in with the clustering of the - //! indexes of \p points. - template - void extract(const TPointVec &points, const CLUSTERS &clusters, TSizeVecVec &result) - { - - result.resize(clusters.size()); - - for (std::size_t i = 0u; i < clusters.size(); ++i) - { - const TPointVec &clusterPoints = clusters[i]; - - result[i].clear(); - result[i].reserve(clusterPoints.size()); - - for (std::size_t j = 0u; j < clusterPoints.size(); ++j) - { - std::size_t k = points.size(); - for (TPointVecCItr l = this->begin(points, clusterPoints[j]), - end = this->end(points, clusterPoints[j]); - l != end; - ++l) - { - if (*l == clusterPoints[j]) - { - k = static_cast(l - points.begin()); - break; - } - } - - if (k == points.size()) - { - LOG_ERROR("Didn't find point " << clusterPoints[j]); - continue; +class CBootstrapClustererFacadeExtractClusters { +public: + using TSizeVec = std::vector; + using TSizeVecVec = std::vector; + using TPointVec = std::vector; + using TPointVecCItr = typename TPointVec::const_iterator; + +public: + //! Compute the cluster of each point in \p points. + //! + //! \param[in] points The ordered points to cluster. + //! \param[in] clusters The clustering of \p points. + //! \param[out] result Filled in with the clustering of the + //! indexes of \p points. + template + void extract(const TPointVec& points, const CLUSTERS& clusters, TSizeVecVec& result) { + + result.resize(clusters.size()); + + for (std::size_t i = 0u; i < clusters.size(); ++i) { + const TPointVec& clusterPoints = clusters[i]; + + result[i].clear(); + result[i].reserve(clusterPoints.size()); + + for (std::size_t j = 0u; j < clusterPoints.size(); ++j) { + std::size_t k = points.size(); + for (TPointVecCItr l = this->begin(points, clusterPoints[j]), end = this->end(points, clusterPoints[j]); l != end; ++l) { + if (*l == clusterPoints[j]) { + k = static_cast(l - points.begin()); + break; } + } - result[i].push_back(k); + if (k == points.size()) { + LOG_ERROR("Didn't find point " << clusterPoints[j]); + continue; } + + result[i].push_back(k); } } + } - private: - //! Get the first point equal or right of \p x. - TPointVecCItr begin(const TPointVec &points, const POINT &x) const - { - return std::lower_bound(points.begin(), points.end(), x); - } +private: + //! Get the first point equal or right of \p x. + TPointVecCItr begin(const TPointVec& points, const POINT& x) const { return std::lower_bound(points.begin(), points.end(), x); } - //! Get the first point right of \p x. - TPointVecCItr end(const TPointVec &points, const POINT &x) const - { - return std::upper_bound(points.begin(), points.end(), x); - } + //! Get the first point right of \p x. + TPointVecCItr end(const TPointVec& points, const POINT& x) const { return std::upper_bound(points.begin(), points.end(), x); } }; //! \brief Adapts clustering implementations for use by the bootstrap @@ -1110,112 +906,99 @@ class CBootstrapClustererFacade {}; //! \brief Adapts the x-means implementation for use by the bootstrap //! clusterer. template -class CBootstrapClustererFacade > : private CBootstrapClustererFacadeExtractClusters -{ - public: - using TSizeVec = std::vector; - using TSizeVecVec = std::vector; - using TPointVec = std::vector; - - public: - CBootstrapClustererFacade(const CXMeans &xmeans, - std::size_t improveParamsKmeansIterations, - std::size_t improveStructureClusterSeeds, - std::size_t improveStructureKmeansIterations) : - m_Xmeans(xmeans), - m_ImproveParamsKmeansIterations(improveParamsKmeansIterations), - m_ImproveStructureClusterSeeds(improveStructureClusterSeeds), - m_ImproveStructureKmeansIterations(improveStructureKmeansIterations) - {} - - //! \note Assumes \p points are sorted. - void cluster(const TPointVec &points, TSizeVecVec &result) - { - using TPointVecCRef = boost::reference_wrapper; - using TPointVecCRefVec = std::vector; - - // Initialize - TPointVec tmp(points); - m_Xmeans.setPoints(tmp); - - // Run - m_Xmeans.run(m_ImproveParamsKmeansIterations, - m_ImproveStructureClusterSeeds, - m_ImproveStructureKmeansIterations); - - // Extract - TPointVecCRefVec clusterPoints; - for (std::size_t i = 0u; i < m_Xmeans.clusters().size(); ++i) - { - clusterPoints.push_back(boost::cref(m_Xmeans.clusters()[i].points())); - } - this->extract(points, clusterPoints, result); +class CBootstrapClustererFacade> : private CBootstrapClustererFacadeExtractClusters { +public: + using TSizeVec = std::vector; + using TSizeVecVec = std::vector; + using TPointVec = std::vector; + +public: + CBootstrapClustererFacade(const CXMeans& xmeans, + std::size_t improveParamsKmeansIterations, + std::size_t improveStructureClusterSeeds, + std::size_t improveStructureKmeansIterations) + : m_Xmeans(xmeans), + m_ImproveParamsKmeansIterations(improveParamsKmeansIterations), + m_ImproveStructureClusterSeeds(improveStructureClusterSeeds), + m_ImproveStructureKmeansIterations(improveStructureKmeansIterations) {} + + //! \note Assumes \p points are sorted. + void cluster(const TPointVec& points, TSizeVecVec& result) { + using TPointVecCRef = boost::reference_wrapper; + using TPointVecCRefVec = std::vector; + + // Initialize + TPointVec tmp(points); + m_Xmeans.setPoints(tmp); + + // Run + m_Xmeans.run(m_ImproveParamsKmeansIterations, m_ImproveStructureClusterSeeds, m_ImproveStructureKmeansIterations); + + // Extract + TPointVecCRefVec clusterPoints; + for (std::size_t i = 0u; i < m_Xmeans.clusters().size(); ++i) { + clusterPoints.push_back(boost::cref(m_Xmeans.clusters()[i].points())); } - - private: - //! The x-means implementation. - CXMeans m_Xmeans; - //! The number of iterations to use in k-means for a single round - //! of improve parameters. - std::size_t m_ImproveParamsKmeansIterations; - //! The number of random seeds to try when initializing k-means - //! for a single round of improve structure. - std::size_t m_ImproveStructureClusterSeeds; - //! The number of iterations to use in k-means for a single round - //! of improve structure. - std::size_t m_ImproveStructureKmeansIterations; + this->extract(points, clusterPoints, result); + } + +private: + //! The x-means implementation. + CXMeans m_Xmeans; + //! The number of iterations to use in k-means for a single round + //! of improve parameters. + std::size_t m_ImproveParamsKmeansIterations; + //! The number of random seeds to try when initializing k-means + //! for a single round of improve structure. + std::size_t m_ImproveStructureClusterSeeds; + //! The number of iterations to use in k-means for a single round + //! of improve structure. + std::size_t m_ImproveStructureKmeansIterations; }; //! \brief Adapts the x-means implementation for use by the bootstrap //! clusterer. template -class CBootstrapClustererFacade > : private CBootstrapClustererFacadeExtractClusters -{ - public: - using TSizeVec = std::vector; - using TSizeVecVec = std::vector; - using TPointVec = std::vector; - - public: - CBootstrapClustererFacade(const CKMeansFast &kmeans, - std::size_t k, - std::size_t maxIterations) : - m_Kmeans(kmeans), - m_K(k), - m_MaxIterations(maxIterations) - {} - - //! \note Assumes \p points are sorted. - void cluster(const TPointVec &points, TSizeVecVec &result) - { - using TPointVecVec = std::vector; - - // Initialize - TPointVec tmp(points); - m_Kmeans.setPoints(tmp); - TPointVec centres; - CKMeansPlusPlusInitialization seedCentres(m_Rng); - seedCentres.run(points, m_K, centres); - m_Kmeans.setCentres(centres); - - // Run - m_Kmeans.run(m_MaxIterations); - - // Extract - TPointVecVec clusterPoints; - m_Kmeans.closestPoints(clusterPoints); - this->extract(points, clusterPoints, result); - } +class CBootstrapClustererFacade> : private CBootstrapClustererFacadeExtractClusters { +public: + using TSizeVec = std::vector; + using TSizeVecVec = std::vector; + using TPointVec = std::vector; + +public: + CBootstrapClustererFacade(const CKMeansFast& kmeans, std::size_t k, std::size_t maxIterations) + : m_Kmeans(kmeans), m_K(k), m_MaxIterations(maxIterations) {} + + //! \note Assumes \p points are sorted. + void cluster(const TPointVec& points, TSizeVecVec& result) { + using TPointVecVec = std::vector; - private: - //! The random number generator. - CPRNG::CXorShift1024Mult m_Rng; - //! The k-means implementation. - CKMeansFast m_Kmeans; - //! The number of clusters to use. - std::size_t m_K; - //! The number of iterations to use in k-means. - std::size_t m_MaxIterations; + // Initialize + TPointVec tmp(points); + m_Kmeans.setPoints(tmp); + TPointVec centres; + CKMeansPlusPlusInitialization seedCentres(m_Rng); + seedCentres.run(points, m_K, centres); + m_Kmeans.setCentres(centres); + + // Run + m_Kmeans.run(m_MaxIterations); + + // Extract + TPointVecVec clusterPoints; + m_Kmeans.closestPoints(clusterPoints); + this->extract(points, clusterPoints, result); + } + +private: + //! The random number generator. + CPRNG::CXorShift1024Mult m_Rng; + //! The k-means implementation. + CKMeansFast m_Kmeans; + //! The number of clusters to use. + std::size_t m_K; + //! The number of iterations to use in k-means. + std::size_t m_MaxIterations; }; //! Cluster \p points using \p B bootstrap samples using x-means. @@ -1239,41 +1022,35 @@ class CBootstrapClustererFacade > : private CBootstrapCluster //! clusters. //! \param[out] result Filled in with the clustering of \p points. template -void bootstrapCluster(std::vector &points, +void bootstrapCluster(std::vector& points, std::size_t B, - const CXMeans &xmeans, + const CXMeans& xmeans, std::size_t improveParamsKmeansIterations, std::size_t improveStructureClusterSeeds, std::size_t improveStructureKmeansIterations, double overlapThreshold, double chainingFactor, - std::vector > &result) -{ - CBootstrapClustererFacade > clusterer( - xmeans, - improveParamsKmeansIterations, - improveStructureClusterSeeds, - improveStructureKmeansIterations); + std::vector>& result) { + CBootstrapClustererFacade> clusterer( + xmeans, improveParamsKmeansIterations, improveStructureClusterSeeds, improveStructureKmeansIterations); CBootstrapClusterer bootstrapClusterer(overlapThreshold, chainingFactor); bootstrapClusterer.run(B, clusterer, points, result); } //! Cluster \p points using \p B bootstrap samples using k-means. template -void bootstrapCluster(std::vector &points, +void bootstrapCluster(std::vector& points, std::size_t B, - const CKMeansFast &kmeans, + const CKMeansFast& kmeans, std::size_t k, std::size_t maxIterations, double overlapThreshold, double chainingFactor, - std::vector > &result) -{ - CBootstrapClustererFacade > clusterer(kmeans, k, maxIterations); + std::vector>& result) { + CBootstrapClustererFacade> clusterer(kmeans, k, maxIterations); CBootstrapClusterer bootstrapClusterer(overlapThreshold, chainingFactor); bootstrapClusterer.run(B, clusterer, points, result); } - } } diff --git a/include/maths/CBoundingBox.h b/include/maths/CBoundingBox.h index cfedf9593a..f25076bf79 100644 --- a/include/maths/CBoundingBox.h +++ b/include/maths/CBoundingBox.h @@ -15,10 +15,8 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { //! \brief An axis aligned bounding box. //! @@ -26,110 +24,89 @@ namespace maths //! Holds the bottom left and top right corners and provides //! various utility functions need by the x-means algorithm. template -class CBoundingBox -{ - public: - //! See core::CMemory. - static bool dynamicSizeAlwaysZero() - { - return core::memory_detail::SDynamicSizeAlwaysZero::value(); +class CBoundingBox { +public: + //! See core::CMemory. + static bool dynamicSizeAlwaysZero() { return core::memory_detail::SDynamicSizeAlwaysZero::value(); } + using TPointPrecise = typename SFloatingPoint::Type; + +public: + CBoundingBox() : m_Empty(true), m_A(), m_B() {} + + CBoundingBox(const POINT& x) : m_Empty(false), m_A(x), m_B(x) {} + + //! Clear the bounding box. + void clear() { + m_Empty = true; + m_A = m_B = POINT(); + } + + //! Add \p p point, i.e. find the bounding box of the point + //! and this bounding box. + void add(const POINT& p) { + if (m_Empty) { + m_A = m_B = p; + } else { + m_A = min(m_A, p); + m_B = max(m_B, p); } - using TPointPrecise = typename SFloatingPoint::Type; - - public: - CBoundingBox() : m_Empty(true), m_A(), m_B() {} - - CBoundingBox(const POINT &x) : m_Empty(false), m_A(x), m_B(x) {} - - //! Clear the bounding box. - void clear() - { - m_Empty = true; - m_A = m_B = POINT(); - } - - //! Add \p p point, i.e. find the bounding box of the point - //! and this bounding box. - void add(const POINT &p) - { - if (m_Empty) - { - m_A = m_B = p; - } - else - { - m_A = min(m_A, p); - m_B = max(m_B, p); - } + m_Empty = false; + } + + //! Add \p other bounding box, i.e. find the bounding box of + //! the two bounding boxes. + void add(const CBoundingBox& other) { + if (m_Empty) { + *this = other; + } else if (!other.m_Empty) { + m_A = min(m_A, other.m_A); + m_B = max(m_B, other.m_B); m_Empty = false; } - - //! Add \p other bounding box, i.e. find the bounding box of - //! the two bounding boxes. - void add(const CBoundingBox &other) - { - if (m_Empty) - { - *this = other; - } - else if (!other.m_Empty) - { - m_A = min(m_A, other.m_A); - m_B = max(m_B, other.m_B); - m_Empty = false; - } - } - - //! Get the bottom left corner. - const POINT &blc() const { return m_A; } - - //! Get the top right corner. - const POINT &trc() const { return m_B; } - - //! Get the centre of the bounding box. - POINT centre() const - { - return POINT(TPointPrecise(m_A + m_B) / 2.0); - } - - //! Check if \p x is everywhere closer to the bounding box - //! than \p y. - //! - //! The idea is fairly simple: find the corner which is closest - //! to the hyperplane bisecting the vector from \p x to \p y. - //! Note that this plane is boundary of the region where every - //! point is closer to \p x or \p y. If the corner is in the - //! region closer to \p x then the bounding box must necessarily - //! be in this region too and no point can therefore be closer - //! to \p y than \p x. - bool closerToX(const POINT &x, const POINT &y) const - { - POINT xy = y - x; - POINT f(0); - for (std::size_t i = 0u; i < x.dimension(); ++i) - { - f(i) = xy(i) < 0 ? m_A(i) : m_B(i); - } - return (f - x).euclidean() <= (f - y).euclidean(); - } - - //! Print this bounding box. - std::string print() const - { - std::ostringstream result; - result << "{" << m_A << ", " << m_B << "}"; - return result.str(); + } + + //! Get the bottom left corner. + const POINT& blc() const { return m_A; } + + //! Get the top right corner. + const POINT& trc() const { return m_B; } + + //! Get the centre of the bounding box. + POINT centre() const { return POINT(TPointPrecise(m_A + m_B) / 2.0); } + + //! Check if \p x is everywhere closer to the bounding box + //! than \p y. + //! + //! The idea is fairly simple: find the corner which is closest + //! to the hyperplane bisecting the vector from \p x to \p y. + //! Note that this plane is boundary of the region where every + //! point is closer to \p x or \p y. If the corner is in the + //! region closer to \p x then the bounding box must necessarily + //! be in this region too and no point can therefore be closer + //! to \p y than \p x. + bool closerToX(const POINT& x, const POINT& y) const { + POINT xy = y - x; + POINT f(0); + for (std::size_t i = 0u; i < x.dimension(); ++i) { + f(i) = xy(i) < 0 ? m_A(i) : m_B(i); } - - private: - //! True if this is empty and false otherwise. - bool m_Empty; - - //! The bottom left and top right corner of the bounding box. - POINT m_A, m_B; + return (f - x).euclidean() <= (f - y).euclidean(); + } + + //! Print this bounding box. + std::string print() const { + std::ostringstream result; + result << "{" << m_A << ", " << m_B << "}"; + return result.str(); + } + +private: + //! True if this is empty and false otherwise. + bool m_Empty; + + //! The bottom left and top right corner of the bounding box. + POINT m_A, m_B; }; - - } } diff --git a/include/maths/CCalendarComponent.h b/include/maths/CCalendarComponent.h index 55b4b65f9c..9e92a008cf 100644 --- a/include/maths/CCalendarComponent.h +++ b/include/maths/CCalendarComponent.h @@ -17,15 +17,12 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace maths -{ +namespace maths { //! \brief Estimates a calendar component of a time series. //! @@ -39,132 +36,127 @@ namespace maths //! //! The bucketing is aged by relaxing it back towards uniform and aging the counts of the //! mean value for each bucket as usual. -class MATHS_EXPORT CCalendarComponent : private CDecompositionComponent -{ - public: - //! \param[in] feature The calendar feature. - //! \param[in] maxSize The maximum number of component buckets. - //! \param[in] decayRate Controls the rate at which information is lost from - //! its adaptive bucketing. - //! \param[in] minimumBucketLength The minimum bucket length permitted in the - //! adaptive bucketing. - //! \param[in] boundaryCondition The boundary condition to use for the splines. - //! \param[in] valueInterpolationType The style of interpolation to use for - //! computing values. - //! \param[in] varianceInterpolationType The style of interpolation to use for - //! computing variances. - CCalendarComponent(const CCalendarFeature &feature, - std::size_t maxSize, - double decayRate = 0.0, - double minimumBucketLength = 0.0, - CSplineTypes::EBoundaryCondition boundaryCondition = CSplineTypes::E_Periodic, - CSplineTypes::EType valueInterpolationType = CSplineTypes::E_Cubic, - CSplineTypes::EType varianceInterpolationType = CSplineTypes::E_Linear); - - //! Construct by traversing part of an state document. - CCalendarComponent(double decayRate, - double minimumBucketLength, - core::CStateRestoreTraverser &traverser, - CSplineTypes::EType valueInterpolationType = CSplineTypes::E_Cubic, - CSplineTypes::EType varianceInterpolationType = CSplineTypes::E_Linear); - - //! An efficient swap of the contents of two components. - void swap(CCalendarComponent &other); - - //! Persist state by passing information to \p inserter. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - - //! Check if the component has been estimated. - bool initialized() const; - - //! Initialize the adaptive bucketing. - void initialize(); - - //! Get the size of this component. - std::size_t size() const; - - //! Clear all data. - void clear(); - - //! Linearly scale the component's by \p scale. - void linearScale(core_t::TTime time, double scale); - - //! Adds a value \f$(t, f(t))\f$ to this component. - //! - //! \param[in] time The time of the point. - //! \param[in] value The value at \p time. - //! \param[in] weight The weight of \p value. The smaller this is the - //! less influence it has on the component. - void add(core_t::TTime time, double value, double weight = 1.0); - - //! Update the interpolation of the bucket values. - //! - //! \param[in] time The time at which to interpolate. - //! \param[in] refine If false disable refining the bucketing. - void interpolate(core_t::TTime time, bool refine = true); - - //! Get the rate at which the seasonal component loses information. - double decayRate() const; - - //! Set the rate at which the seasonal component loses information. - void decayRate(double decayRate); - - //! Age out old data to account for elapsed \p time. - void propagateForwardsByTime(double time); - - //! Get the calendar feature. - CCalendarFeature feature() const; - - //! Interpolate the component at \p time. - //! - //! \param[in] time The time of interest. - //! \param[in] confidence The symmetric confidence interval for the variance - //! as a percentage. - TDoubleDoublePr value(core_t::TTime time, double confidence) const; - - //! Get the mean value of the component. - double meanValue() const; - - //! Get the variance of the residual about the prediction at \p time. - //! - //! \param[in] time The time of interest. - //! \param[in] confidence The symmetric confidence interval for the - //! variance as a percentage. - TDoubleDoublePr variance(core_t::TTime time, double confidence) const; - - //! Get the mean variance of the component residuals. - double meanVariance() const; - - //! Get the maximum ratio between a residual variance and the mean - //! residual variance. - double heteroscedasticity() const; - - //! Get a checksum for this object. - uint64_t checksum(uint64_t seed = 0) const; - - //! Debug the memory used by this component. - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - - //! Get the memory used by this component. - std::size_t memoryUsage() const; - - private: - //! Create by traversing a state document. - bool acceptRestoreTraverser(double decayRate, - double minimumBucketLength, - core::CStateRestoreTraverser &traverser); - - private: - //! The mean and variance in collection of buckets covering the period. - CCalendarComponentAdaptiveBucketing m_Bucketing; +class MATHS_EXPORT CCalendarComponent : private CDecompositionComponent { +public: + //! \param[in] feature The calendar feature. + //! \param[in] maxSize The maximum number of component buckets. + //! \param[in] decayRate Controls the rate at which information is lost from + //! its adaptive bucketing. + //! \param[in] minimumBucketLength The minimum bucket length permitted in the + //! adaptive bucketing. + //! \param[in] boundaryCondition The boundary condition to use for the splines. + //! \param[in] valueInterpolationType The style of interpolation to use for + //! computing values. + //! \param[in] varianceInterpolationType The style of interpolation to use for + //! computing variances. + CCalendarComponent(const CCalendarFeature& feature, + std::size_t maxSize, + double decayRate = 0.0, + double minimumBucketLength = 0.0, + CSplineTypes::EBoundaryCondition boundaryCondition = CSplineTypes::E_Periodic, + CSplineTypes::EType valueInterpolationType = CSplineTypes::E_Cubic, + CSplineTypes::EType varianceInterpolationType = CSplineTypes::E_Linear); + + //! Construct by traversing part of an state document. + CCalendarComponent(double decayRate, + double minimumBucketLength, + core::CStateRestoreTraverser& traverser, + CSplineTypes::EType valueInterpolationType = CSplineTypes::E_Cubic, + CSplineTypes::EType varianceInterpolationType = CSplineTypes::E_Linear); + + //! An efficient swap of the contents of two components. + void swap(CCalendarComponent& other); + + //! Persist state by passing information to \p inserter. + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Check if the component has been estimated. + bool initialized() const; + + //! Initialize the adaptive bucketing. + void initialize(); + + //! Get the size of this component. + std::size_t size() const; + + //! Clear all data. + void clear(); + + //! Linearly scale the component's by \p scale. + void linearScale(core_t::TTime time, double scale); + + //! Adds a value \f$(t, f(t))\f$ to this component. + //! + //! \param[in] time The time of the point. + //! \param[in] value The value at \p time. + //! \param[in] weight The weight of \p value. The smaller this is the + //! less influence it has on the component. + void add(core_t::TTime time, double value, double weight = 1.0); + + //! Update the interpolation of the bucket values. + //! + //! \param[in] time The time at which to interpolate. + //! \param[in] refine If false disable refining the bucketing. + void interpolate(core_t::TTime time, bool refine = true); + + //! Get the rate at which the seasonal component loses information. + double decayRate() const; + + //! Set the rate at which the seasonal component loses information. + void decayRate(double decayRate); + + //! Age out old data to account for elapsed \p time. + void propagateForwardsByTime(double time); + + //! Get the calendar feature. + CCalendarFeature feature() const; + + //! Interpolate the component at \p time. + //! + //! \param[in] time The time of interest. + //! \param[in] confidence The symmetric confidence interval for the variance + //! as a percentage. + TDoubleDoublePr value(core_t::TTime time, double confidence) const; + + //! Get the mean value of the component. + double meanValue() const; + + //! Get the variance of the residual about the prediction at \p time. + //! + //! \param[in] time The time of interest. + //! \param[in] confidence The symmetric confidence interval for the + //! variance as a percentage. + TDoubleDoublePr variance(core_t::TTime time, double confidence) const; + + //! Get the mean variance of the component residuals. + double meanVariance() const; + + //! Get the maximum ratio between a residual variance and the mean + //! residual variance. + double heteroscedasticity() const; + + //! Get a checksum for this object. + uint64_t checksum(uint64_t seed = 0) const; + + //! Debug the memory used by this component. + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + + //! Get the memory used by this component. + std::size_t memoryUsage() const; + +private: + //! Create by traversing a state document. + bool acceptRestoreTraverser(double decayRate, double minimumBucketLength, core::CStateRestoreTraverser& traverser); + +private: + //! The mean and variance in collection of buckets covering the period. + CCalendarComponentAdaptiveBucketing m_Bucketing; }; //! Create a free function which will be found by Koenig lookup. -inline void swap(CCalendarComponent &lhs, CCalendarComponent &rhs) -{ +inline void swap(CCalendarComponent& lhs, CCalendarComponent& rhs) { lhs.swap(rhs); } - } } diff --git a/include/maths/CCalendarComponentAdaptiveBucketing.h b/include/maths/CCalendarComponentAdaptiveBucketing.h index 04d4ba641a..0152d44f4c 100644 --- a/include/maths/CCalendarComponentAdaptiveBucketing.h +++ b/include/maths/CCalendarComponentAdaptiveBucketing.h @@ -20,186 +20,175 @@ #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace maths -{ +namespace maths { class CSeasonalTime; //! \brief An adaptive bucketing of the value of a calendar component. //! //! DESCRIPTION:\n //! See CAdaptiveBucketing for details. -class MATHS_EXPORT CCalendarComponentAdaptiveBucketing : private CAdaptiveBucketing -{ - public: - using TFloatMeanVarAccumulator = CBasicStatistics::SSampleMeanVar::TAccumulator; - - public: - CCalendarComponentAdaptiveBucketing(); - explicit CCalendarComponentAdaptiveBucketing(CCalendarFeature feature, - double decayRate = 0.0, - double minimumBucketLength = 0.0); - //! Construct by traversing a state document. - CCalendarComponentAdaptiveBucketing(double decayRate, - double minimumBucketLength, - core::CStateRestoreTraverser &traverser); - - //! Persist by passing information to the supplied inserter. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - - //! Efficiently swap the contents of two bucketing objects. - void swap(CCalendarComponentAdaptiveBucketing &other); - - //! Check if the bucketing has been initialized. - bool initialized() const; - - //! Create a new uniform bucketing with \p n buckets. - //! - //! \param[in] n The number of buckets. - bool initialize(std::size_t n); - - //! Get the number of buckets. - std::size_t size() const; - - //! Clear the contents of this bucketing and recover any - //! allocated memory. - void clear(); - - //! Linearly scale the bucket values by \p scale. - void linearScale(double scale); - - //! Add the function value at \p time. - //! - //! \param[in] time The time of \p value. - //! \param[in] value The value of the function at \p time. - //! \param[in] weight The weight of function point. The smaller - //! this is the less influence it has on the bucket. - void add(core_t::TTime time, double value, double weight = 1.0); - - //! Get the calendar feature. - CCalendarFeature feature() const; - - //! Set the rate at which the bucketing loses information. - void decayRate(double value); - - //! Get the rate at which the bucketing loses information. - double decayRate() const; - - //! Age the bucket values to account for \p time elapsed time. - void propagateForwardsByTime(double time); - - //! Get the minimum permitted bucket length. - double minimumBucketLength() const; - - //! Refine the bucket end points to minimize the maximum averaging - //! error in any bucket. - //! - //! \param[in] time The time at which to refine. - void refine(core_t::TTime time); - - //! The count in the bucket containing \p time. - double count(core_t::TTime time) const; - - //! Get the count of buckets with no values. - std::size_t emptyBucketCount() const; - - //! Get the value at \p time. - const TFloatMeanVarAccumulator *value(core_t::TTime time) const; - - //! Get a set of knot points and knot point values to use for - //! interpolating the bucket values. - //! - //! \param[in] time The time at which to get the knot points. - //! \param[in] boundary Controls the style of start and end knots. - //! \param[out] knots Filled in with the knot points to interpolate. - //! \param[out] values Filled in with the values at \p knots. - //! \param[out] variances Filled in with the variances at \p knots. - //! \return True if there are sufficient knot points to interpolate - //! and false otherwise. - bool knots(core_t::TTime time, - CSplineTypes::EBoundaryCondition boundary, - TDoubleVec &knots, - TDoubleVec &values, - TDoubleVec &variances) const; - - //! Get a checksum for this object. - uint64_t checksum(uint64_t seed = 0) const; - - //! Get the memory used by this component - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - - //! Get the memory used by this component - std::size_t memoryUsage() const; - - //! \name Test Functions - //@{ - //! Get the bucket end points. - const TFloatVec &endpoints() const; - - //! Get the total count of in the bucketing. - double count() const; +class MATHS_EXPORT CCalendarComponentAdaptiveBucketing : private CAdaptiveBucketing { +public: + using TFloatMeanVarAccumulator = CBasicStatistics::SSampleMeanVar::TAccumulator; + +public: + CCalendarComponentAdaptiveBucketing(); + explicit CCalendarComponentAdaptiveBucketing(CCalendarFeature feature, double decayRate = 0.0, double minimumBucketLength = 0.0); + //! Construct by traversing a state document. + CCalendarComponentAdaptiveBucketing(double decayRate, double minimumBucketLength, core::CStateRestoreTraverser& traverser); + + //! Persist by passing information to the supplied inserter. + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Efficiently swap the contents of two bucketing objects. + void swap(CCalendarComponentAdaptiveBucketing& other); + + //! Check if the bucketing has been initialized. + bool initialized() const; - //! Get the bucket regressions. - TDoubleVec values(core_t::TTime time) const; + //! Create a new uniform bucketing with \p n buckets. + //! + //! \param[in] n The number of buckets. + bool initialize(std::size_t n); + + //! Get the number of buckets. + std::size_t size() const; + + //! Clear the contents of this bucketing and recover any + //! allocated memory. + void clear(); + + //! Linearly scale the bucket values by \p scale. + void linearScale(double scale); + + //! Add the function value at \p time. + //! + //! \param[in] time The time of \p value. + //! \param[in] value The value of the function at \p time. + //! \param[in] weight The weight of function point. The smaller + //! this is the less influence it has on the bucket. + void add(core_t::TTime time, double value, double weight = 1.0); - //! Get the bucket variances. - TDoubleVec variances() const; - //@} + //! Get the calendar feature. + CCalendarFeature feature() const; - private: - using TFloatMeanVarVec = std::vector; + //! Set the rate at which the bucketing loses information. + void decayRate(double value); + + //! Get the rate at which the bucketing loses information. + double decayRate() const; - private: - //! Restore by traversing a state document - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); + //! Age the bucket values to account for \p time elapsed time. + void propagateForwardsByTime(double time); - //! Compute the values corresponding to the change in end - //! points from \p endpoints. The values are assigned based - //! on their intersection with each bucket in the previous - //! bucket configuration. - //! - //! \param[in] endpoints The old end points. - void refresh(const TFloatVec &endpoints); + //! Get the minimum permitted bucket length. + double minimumBucketLength() const; - //! Check if \p time is in the this component's window. - virtual bool inWindow(core_t::TTime time) const; + //! Refine the bucket end points to minimize the maximum averaging + //! error in any bucket. + //! + //! \param[in] time The time at which to refine. + void refine(core_t::TTime time); - //! Add the function value to \p bucket. - virtual void add(std::size_t bucket, core_t::TTime time, double value, double weight); + //! The count in the bucket containing \p time. + double count(core_t::TTime time) const; - //! Get the offset w.r.t. the start of the bucketing of \p time. - virtual double offset(core_t::TTime time) const; - - //! The count in \p bucket. - virtual double count(std::size_t bucket) const; + //! Get the count of buckets with no values. + std::size_t emptyBucketCount() const; - //! Get the predicted value for the \p bucket at \p time. - virtual double predict(std::size_t bucket, core_t::TTime time, double offset) const; + //! Get the value at \p time. + const TFloatMeanVarAccumulator* value(core_t::TTime time) const; - //! Get the variance of \p bucket. - virtual double variance(std::size_t bucket) const; + //! Get a set of knot points and knot point values to use for + //! interpolating the bucket values. + //! + //! \param[in] time The time at which to get the knot points. + //! \param[in] boundary Controls the style of start and end knots. + //! \param[out] knots Filled in with the knot points to interpolate. + //! \param[out] values Filled in with the values at \p knots. + //! \param[out] variances Filled in with the variances at \p knots. + //! \return True if there are sufficient knot points to interpolate + //! and false otherwise. + bool knots(core_t::TTime time, + CSplineTypes::EBoundaryCondition boundary, + TDoubleVec& knots, + TDoubleVec& values, + TDoubleVec& variances) const; - private: - //! The time provider. - CCalendarFeature m_Feature; + //! Get a checksum for this object. + uint64_t checksum(uint64_t seed = 0) const; - //! The bucket values. - TFloatMeanVarVec m_Values; + //! Get the memory used by this component + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + + //! Get the memory used by this component + std::size_t memoryUsage() const; + + //! \name Test Functions + //@{ + //! Get the bucket end points. + const TFloatVec& endpoints() const; + + //! Get the total count of in the bucketing. + double count() const; + + //! Get the bucket regressions. + TDoubleVec values(core_t::TTime time) const; + + //! Get the bucket variances. + TDoubleVec variances() const; + //@} + +private: + using TFloatMeanVarVec = std::vector; + +private: + //! Restore by traversing a state document + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); + + //! Compute the values corresponding to the change in end + //! points from \p endpoints. The values are assigned based + //! on their intersection with each bucket in the previous + //! bucket configuration. + //! + //! \param[in] endpoints The old end points. + void refresh(const TFloatVec& endpoints); + + //! Check if \p time is in the this component's window. + virtual bool inWindow(core_t::TTime time) const; + + //! Add the function value to \p bucket. + virtual void add(std::size_t bucket, core_t::TTime time, double value, double weight); + + //! Get the offset w.r.t. the start of the bucketing of \p time. + virtual double offset(core_t::TTime time) const; + + //! The count in \p bucket. + virtual double count(std::size_t bucket) const; + + //! Get the predicted value for the \p bucket at \p time. + virtual double predict(std::size_t bucket, core_t::TTime time, double offset) const; + + //! Get the variance of \p bucket. + virtual double variance(std::size_t bucket) const; + +private: + //! The time provider. + CCalendarFeature m_Feature; + + //! The bucket values. + TFloatMeanVarVec m_Values; }; //! Create a free function which will be found by Koenig lookup. -inline void swap(CCalendarComponentAdaptiveBucketing &lhs, - CCalendarComponentAdaptiveBucketing &rhs) -{ +inline void swap(CCalendarComponentAdaptiveBucketing& lhs, CCalendarComponentAdaptiveBucketing& rhs) { lhs.swap(rhs); } - } } diff --git a/include/maths/CCalendarFeature.h b/include/maths/CCalendarFeature.h index e163f16d23..50271499af 100644 --- a/include/maths/CCalendarFeature.h +++ b/include/maths/CCalendarFeature.h @@ -16,88 +16,79 @@ #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { //! \brief A light weight encoding of a calendar feature. //! //! IMPLEMENTATION:\n //! Note that this purposely doesn't use an enum for encoding the feature //! so that the member size is only 16 bits rather than sizeof(int). -class MATHS_EXPORT CCalendarFeature : boost::less_than_comparable< CCalendarFeature, - boost::equality_comparable< CCalendarFeature > > -{ - public: - //! See core::CMemory. - static bool dynamicSizeAlwaysZero() { return true; } - - static const uint16_t DAYS_SINCE_START_OF_MONTH = 1; - static const uint16_t DAYS_BEFORE_END_OF_MONTH = 2; - static const uint16_t DAY_OF_WEEK_AND_WEEKS_SINCE_START_OF_MONTH = 3; - static const uint16_t DAY_OF_WEEK_AND_WEEKS_BEFORE_END_OF_MONTH = 4; - static const uint16_t BEGIN_FEATURES = 1; - static const uint16_t END_FEATURES = 5; - - using TCalendarFeature4Ary = boost::array; - - public: - CCalendarFeature(); - CCalendarFeature(uint16_t feature, core_t::TTime time); - - //! Get all the features for \p time. - static TCalendarFeature4Ary features(core_t::TTime time); - - //! Initialize with day of week, month and the month and year. - void initialize(uint16_t feature, - int dayOfWeek, - int dayOfMonth, - int month, - int year); - - //! Initialize from \p value. - bool fromDelimited(const std::string &value); - - //! Convert to a delimited string. - std::string toDelimited() const; - - //! Check this and \p other for equality. - bool operator==(CCalendarFeature rhs) const; - - //! Total ordering of two calendar features. - bool operator<(CCalendarFeature rhs) const; - - //! \name Time Transforms - //@{ - //! The offset of \p time w.r.t. the start of the current month's - //! feature window. - core_t::TTime offset(core_t::TTime time) const; - - //! Check if \p time is in this feature's window. - bool inWindow(core_t::TTime time) const; - //@} - - //! Get this feature's window. - core_t::TTime window() const; - - //! Get a checksum for this object. - uint64_t checksum(uint64_t seed = 0) const; - - //! Get a debug description of the feature. - std::string print() const; - - private: - //! An invalid feature value. - static const uint16_t INVALID; - - private: - //! The feature. - uint16_t m_Feature; - //! The feature value. - uint16_t m_Value; -}; +class MATHS_EXPORT CCalendarFeature : boost::less_than_comparable> { +public: + //! See core::CMemory. + static bool dynamicSizeAlwaysZero() { return true; } + + static const uint16_t DAYS_SINCE_START_OF_MONTH = 1; + static const uint16_t DAYS_BEFORE_END_OF_MONTH = 2; + static const uint16_t DAY_OF_WEEK_AND_WEEKS_SINCE_START_OF_MONTH = 3; + static const uint16_t DAY_OF_WEEK_AND_WEEKS_BEFORE_END_OF_MONTH = 4; + static const uint16_t BEGIN_FEATURES = 1; + static const uint16_t END_FEATURES = 5; + + using TCalendarFeature4Ary = boost::array; + +public: + CCalendarFeature(); + CCalendarFeature(uint16_t feature, core_t::TTime time); + + //! Get all the features for \p time. + static TCalendarFeature4Ary features(core_t::TTime time); + + //! Initialize with day of week, month and the month and year. + void initialize(uint16_t feature, int dayOfWeek, int dayOfMonth, int month, int year); + + //! Initialize from \p value. + bool fromDelimited(const std::string& value); + + //! Convert to a delimited string. + std::string toDelimited() const; + + //! Check this and \p other for equality. + bool operator==(CCalendarFeature rhs) const; + //! Total ordering of two calendar features. + bool operator<(CCalendarFeature rhs) const; + + //! \name Time Transforms + //@{ + //! The offset of \p time w.r.t. the start of the current month's + //! feature window. + core_t::TTime offset(core_t::TTime time) const; + + //! Check if \p time is in this feature's window. + bool inWindow(core_t::TTime time) const; + //@} + + //! Get this feature's window. + core_t::TTime window() const; + + //! Get a checksum for this object. + uint64_t checksum(uint64_t seed = 0) const; + + //! Get a debug description of the feature. + std::string print() const; + +private: + //! An invalid feature value. + static const uint16_t INVALID; + +private: + //! The feature. + uint16_t m_Feature; + //! The feature value. + uint16_t m_Value; +}; } } diff --git a/include/maths/CCategoricalTools.h b/include/maths/CCategoricalTools.h index 4460864e95..deda6f9434 100644 --- a/include/maths/CCategoricalTools.h +++ b/include/maths/CCategoricalTools.h @@ -15,145 +15,132 @@ #include -namespace ml -{ -namespace maths -{ - -class MATHS_EXPORT CCategoricalTools : core::CNonInstantiatable, core::CNonCopyable -{ - public: - using TDoubleVec = std::vector; - using TSizeVec = std::vector; - - public: - //! Compute the probability of seeing a less likely sample from - //! the multinomial distribution with category counts \p ni and - //! category probabilities \p probabilities. - //! - //! This calculates the probability of seeing a less likely sample - //! than \f$\(\{n_i\}\)\f$ from the multinomial distribution - //!
-        //!   \f$\(\displaystyle f(\{n_j\}) = \frac{n!}{\prod_i{n_j}}\prod_i{p_j^{n_j}}\)\f$
-        //! 
- //! - //! where \f$\(n = \sum_i{n_i}\)\f$, i.e. the sum - //!
-        //!   \f$\(\displaystyle \sum_{\{n_j : f(\{n_j\}) \leq f(\{n_i\})\} }{ f(\{n_j\}) }\)\f$
-        //! 
- //! - //! This summation is estimated using the Gaussian approximation - //! to the multinomial distribution. - //! - //! \param[in] probabilities The category probabilities, which - //! should be normalized. - //! \param[in] i The categories. - //! \param[in] ni The category counts. - //! \param[out] result Filled in with an estimate of the probability - //! of seeing a less likely sample than category counts \p ni. - static bool probabilityOfLessLikelyMultinomialSample(const TDoubleVec &probabilities, - const TSizeVec &i, - const TSizeVec &ni, - double &result); - - //! Compute the probability of seeing less likely counts than \p ni - //! independently for each category in \p i whose probabilities are - //! \p probabilities. - //! - //! \param[in] probabilities The category probabilities, which - //! should be normalized. - //! \param[in] i The categories. - //! \param[in] ni The category counts. - //! \param[out] result Filled in with an estimate of the probability - //! of seeing a less likely count than nj in \p ni for each category - //! j in \p i. - static bool probabilityOfLessLikelyCategoryCount(TDoubleVec &probabilities, - const TSizeVec &i, - const TSizeVec &ni, - TDoubleVec &result, - std::size_t trials = 100); - - //! Compute the probability that a category will occur in \p n - //! samples from a multinomial with category probability \p probability. - //! - //! For a category \f$k\f$, this is: - //!
-        //!   \f$\displaystyle \sum_{\{\{n_j\}:\sum{n_j}=n\}} H(n_k) n! \prod_{j=1}^m\frac{p_j^{n_j}}{n_j!}\f$
-        //! 
- //! where \f$H(.)\f$ denotes the Heavyside function.\n\n - //! Summing over all other categories it is clear that this is just - //! the expectation of \f$H(.)\f$ w.r.t. the marginal of the category - //! \f$k\f$ which is binomial. - static double probabilityOfCategory(std::size_t n, - double probability); - - //! \brief Computes the expected number of distinct categories - //! in \p n samples from a multinomial random variable with m - //! categories. - //! - //! This computes the expectation of: - //!
-        //!   \f$\displaystyle E[unique(Y)]=\sum_{\{\{n_j\}:n_j>0,\sum{n_j}=n\}}kn!\prod_{j=1}^k{\frac{p_j^{n_j}}{n_j!}} \f$
-        //! 
- //! Here, \f$Y\f$ denotes the set of \p n random samples from a - //! multinomial with m categories. - //! - //! We calculate this summation by noting that we can write \f$k\f$ - //! as a sum of Heavyside functions and reorder the summations so we - //! end up computing a sum of the expectation of these functions w.r.t. - //! the marginal distributions of the multinomial. - //! - //! \warning It is the callers responsibility to ensure that the - //! probabilities are normalized. - static bool expectedDistinctCategories(const TDoubleVec &probabilities, - double n, - double &result); - - //! Get the log of the binomial coefficient \f$\binom{n}{m}\f$. - static double logBinomialCoefficient(std::size_t n, std::size_t m); - - //! Get the binomial coefficient \f$\binom{n}{m}\f$. - static double binomialCoefficient(std::size_t n, std::size_t m); - - //! Compute the log of the probability of a count of \p m from - //! a binomial with \p n trials and \p probability of success - //! \p p. - //! - //! This is - //!
-        //!   \f$\(\displaystyle \log\left(\frac{n!}{m!(n-m)!}p^m(1-p)^{n-m}\right)\)\f$
-        //! 
- //! - //! for \f$\(m \leq n\)\f$ and minus maximum double otherwise. - //! - //! \param[in] n The number of trials. - //! \param[in] p The probability of success. - //! \param[in] m The number of successes. - //! \param[out] result Filled in with the log probability. - static maths_t::EFloatingPointErrorStatus logBinomialProbability(std::size_t n, - double p, - std::size_t m, - double &result); - - //! Compute the log of the probability of a sample of \p ni counts - //! of categories from the multinomial with number of trials equal - //! to the the sum of \p ni and category probabilities \p probabilities. - //! - //! This is - //!
-        //!   \f$\(\displaystyle \log\left(\frac{n!}{ \prod_i{n_i!} }p^n_i\right)\)\f$
-        //! 
- //! - //! for \f$\(n = sum_i{n_i}\)\f$. - //! - //! \param[in] probabilities The category probabilities. - //! \param[in] ni The category counts. - //! \param[out] result Filled in with the log probability. - static maths_t::EFloatingPointErrorStatus logMultinomialProbability(const TDoubleVec &probabilities, - const TSizeVec &ni, - double &result); +namespace ml { +namespace maths { + +class MATHS_EXPORT CCategoricalTools : core::CNonInstantiatable, core::CNonCopyable { +public: + using TDoubleVec = std::vector; + using TSizeVec = std::vector; + +public: + //! Compute the probability of seeing a less likely sample from + //! the multinomial distribution with category counts \p ni and + //! category probabilities \p probabilities. + //! + //! This calculates the probability of seeing a less likely sample + //! than \f$\(\{n_i\}\)\f$ from the multinomial distribution + //!
+    //!   \f$\(\displaystyle f(\{n_j\}) = \frac{n!}{\prod_i{n_j}}\prod_i{p_j^{n_j}}\)\f$
+    //! 
+ //! + //! where \f$\(n = \sum_i{n_i}\)\f$, i.e. the sum + //!
+    //!   \f$\(\displaystyle \sum_{\{n_j : f(\{n_j\}) \leq f(\{n_i\})\} }{ f(\{n_j\}) }\)\f$
+    //! 
+ //! + //! This summation is estimated using the Gaussian approximation + //! to the multinomial distribution. + //! + //! \param[in] probabilities The category probabilities, which + //! should be normalized. + //! \param[in] i The categories. + //! \param[in] ni The category counts. + //! \param[out] result Filled in with an estimate of the probability + //! of seeing a less likely sample than category counts \p ni. + static bool + probabilityOfLessLikelyMultinomialSample(const TDoubleVec& probabilities, const TSizeVec& i, const TSizeVec& ni, double& result); + + //! Compute the probability of seeing less likely counts than \p ni + //! independently for each category in \p i whose probabilities are + //! \p probabilities. + //! + //! \param[in] probabilities The category probabilities, which + //! should be normalized. + //! \param[in] i The categories. + //! \param[in] ni The category counts. + //! \param[out] result Filled in with an estimate of the probability + //! of seeing a less likely count than nj in \p ni for each category + //! j in \p i. + static bool probabilityOfLessLikelyCategoryCount(TDoubleVec& probabilities, + const TSizeVec& i, + const TSizeVec& ni, + TDoubleVec& result, + std::size_t trials = 100); + + //! Compute the probability that a category will occur in \p n + //! samples from a multinomial with category probability \p probability. + //! + //! For a category \f$k\f$, this is: + //!
+    //!   \f$\displaystyle \sum_{\{\{n_j\}:\sum{n_j}=n\}} H(n_k) n! \prod_{j=1}^m\frac{p_j^{n_j}}{n_j!}\f$
+    //! 
+ //! where \f$H(.)\f$ denotes the Heavyside function.\n\n + //! Summing over all other categories it is clear that this is just + //! the expectation of \f$H(.)\f$ w.r.t. the marginal of the category + //! \f$k\f$ which is binomial. + static double probabilityOfCategory(std::size_t n, double probability); + + //! \brief Computes the expected number of distinct categories + //! in \p n samples from a multinomial random variable with m + //! categories. + //! + //! This computes the expectation of: + //!
+    //!   \f$\displaystyle E[unique(Y)]=\sum_{\{\{n_j\}:n_j>0,\sum{n_j}=n\}}kn!\prod_{j=1}^k{\frac{p_j^{n_j}}{n_j!}} \f$
+    //! 
+ //! Here, \f$Y\f$ denotes the set of \p n random samples from a + //! multinomial with m categories. + //! + //! We calculate this summation by noting that we can write \f$k\f$ + //! as a sum of Heavyside functions and reorder the summations so we + //! end up computing a sum of the expectation of these functions w.r.t. + //! the marginal distributions of the multinomial. + //! + //! \warning It is the callers responsibility to ensure that the + //! probabilities are normalized. + static bool expectedDistinctCategories(const TDoubleVec& probabilities, double n, double& result); + + //! Get the log of the binomial coefficient \f$\binom{n}{m}\f$. + static double logBinomialCoefficient(std::size_t n, std::size_t m); + + //! Get the binomial coefficient \f$\binom{n}{m}\f$. + static double binomialCoefficient(std::size_t n, std::size_t m); + + //! Compute the log of the probability of a count of \p m from + //! a binomial with \p n trials and \p probability of success + //! \p p. + //! + //! This is + //!
+    //!   \f$\(\displaystyle \log\left(\frac{n!}{m!(n-m)!}p^m(1-p)^{n-m}\right)\)\f$
+    //! 
+ //! + //! for \f$\(m \leq n\)\f$ and minus maximum double otherwise. + //! + //! \param[in] n The number of trials. + //! \param[in] p The probability of success. + //! \param[in] m The number of successes. + //! \param[out] result Filled in with the log probability. + static maths_t::EFloatingPointErrorStatus logBinomialProbability(std::size_t n, double p, std::size_t m, double& result); + + //! Compute the log of the probability of a sample of \p ni counts + //! of categories from the multinomial with number of trials equal + //! to the the sum of \p ni and category probabilities \p probabilities. + //! + //! This is + //!
+    //!   \f$\(\displaystyle \log\left(\frac{n!}{ \prod_i{n_i!} }p^n_i\right)\)\f$
+    //! 
+ //! + //! for \f$\(n = sum_i{n_i}\)\f$. + //! + //! \param[in] probabilities The category probabilities. + //! \param[in] ni The category counts. + //! \param[out] result Filled in with the log probability. + static maths_t::EFloatingPointErrorStatus + logMultinomialProbability(const TDoubleVec& probabilities, const TSizeVec& ni, double& result); }; - } } diff --git a/include/maths/CChecksum.h b/include/maths/CChecksum.h index 9a90b09ad6..bef5f92e1d 100644 --- a/include/maths/CChecksum.h +++ b/include/maths/CChecksum.h @@ -25,13 +25,10 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { -namespace checksum_detail -{ +namespace checksum_detail { class BasicChecksum {}; class ContainerChecksum {}; @@ -42,16 +39,14 @@ class MemberHash {}; //! Auxiliary type used by has_const_iterator to test for a nested //! typedef. template -struct enable_if_type -{ +struct enable_if_type { using type = R; }; //! Auxiliary type used by has_checksum_function to test for a nested //! member function. template -struct enable_if_is_type -{ +struct enable_if_is_type { using type = R; }; @@ -67,13 +62,11 @@ struct enable_if_is_type //! 14.5.4/9 of the standard). //@{ template -struct container_selector -{ +struct container_selector { using value = BasicChecksum; }; template -struct container_selector::type> -{ +struct container_selector::type> { using value = ContainerChecksum; }; //@} @@ -92,258 +85,209 @@ struct container_selector //! 14.5.4/9 of the standard). //@{ template -struct selector -{ +struct selector { using value = typename container_selector::value; }; template -struct selector::type> -{ +struct selector::type> { using value = MemberChecksumWithSeed; }; template -struct selector::type> -{ +struct selector::type> { using value = MemberChecksumWithoutSeed; }; template -struct selector::type> -{ +struct selector::type> { using value = MemberHash; }; //@} -template class CChecksumImpl {}; +template +class CChecksumImpl {}; //! Basic checksum functionality implementation. template<> -class CChecksumImpl -{ - public: - //! Checksum integral type. - template - static uint64_t dispatch(uint64_t seed, INTEGRAL target) - { - return core::CHashing::hashCombine(seed, static_cast(target)); - } - - //! Checksum of double. - static uint64_t dispatch(uint64_t seed, double target) - { - return dispatch(seed, core::CStringUtils::typeToStringPrecise( - target, - core::CIEEE754::E_SinglePrecision)); - } - - //! Checksum of a universal hash function. - static uint64_t dispatch(uint64_t seed, const core::CHashing::CUniversalHash::CUInt32UnrestrictedHash &target) - { - seed = core::CHashing::hashCombine(seed, static_cast(target.a())); - return core::CHashing::hashCombine(seed, static_cast(target.b())); - } - - //! Checksum of float storage. - static uint64_t dispatch(uint64_t seed, CFloatStorage target) - { - return dispatch(seed, target.toString()); - } - - //! Checksum of string. - static uint64_t dispatch(uint64_t seed, const std::string &target) - { - return core::CHashing::safeMurmurHash64(target.data(), - static_cast(target.size()), - seed); - } - - //! Checksum a stored string pointer. - static uint64_t dispatch(uint64_t seed, const core::CStoredStringPtr &target) - { - return !target ? seed : CChecksumImpl::dispatch(seed, *target); - } - - //! Checksum of a reference_wrapper. - template - static uint64_t dispatch(uint64_t seed, const boost::reference_wrapper &target) - { - return CChecksumImpl::value>::dispatch(seed, target.get()); - } - - //! Checksum of a optional. - template - static uint64_t dispatch(uint64_t seed, const boost::optional &target) - { - return !target ? seed : CChecksumImpl::value>::dispatch(seed, *target); - } - - //! Checksum a pointer. - template - static uint64_t dispatch(uint64_t seed, const boost::shared_ptr &target) - { - return !target ? seed : CChecksumImpl::value>::dispatch(seed, *target); - } - - //! Checksum a pair. - template - static uint64_t dispatch(uint64_t seed, const std::pair &target) - { - seed = CChecksumImpl::value>::dispatch(seed, target.first); - return CChecksumImpl::value>::dispatch(seed, target.second); - } - - //! Checksum an Eigen dense vector. - template - static uint64_t dispatch(uint64_t seed, - const Eigen::Matrix &target) - { - std::ptrdiff_t dimension = target.size(); - if (dimension > 0) - { - for (std::ptrdiff_t i = 0; i + 1 < dimension; ++i) - { - seed = dispatch(seed, target(i)); - } - return dispatch(seed, target(dimension - 1)); +class CChecksumImpl { +public: + //! Checksum integral type. + template + static uint64_t dispatch(uint64_t seed, INTEGRAL target) { + return core::CHashing::hashCombine(seed, static_cast(target)); + } + + //! Checksum of double. + static uint64_t dispatch(uint64_t seed, double target) { + return dispatch(seed, core::CStringUtils::typeToStringPrecise(target, core::CIEEE754::E_SinglePrecision)); + } + + //! Checksum of a universal hash function. + static uint64_t dispatch(uint64_t seed, const core::CHashing::CUniversalHash::CUInt32UnrestrictedHash& target) { + seed = core::CHashing::hashCombine(seed, static_cast(target.a())); + return core::CHashing::hashCombine(seed, static_cast(target.b())); + } + + //! Checksum of float storage. + static uint64_t dispatch(uint64_t seed, CFloatStorage target) { return dispatch(seed, target.toString()); } + + //! Checksum of string. + static uint64_t dispatch(uint64_t seed, const std::string& target) { + return core::CHashing::safeMurmurHash64(target.data(), static_cast(target.size()), seed); + } + + //! Checksum a stored string pointer. + static uint64_t dispatch(uint64_t seed, const core::CStoredStringPtr& target) { + return !target ? seed : CChecksumImpl::dispatch(seed, *target); + } + + //! Checksum of a reference_wrapper. + template + static uint64_t dispatch(uint64_t seed, const boost::reference_wrapper& target) { + return CChecksumImpl::value>::dispatch(seed, target.get()); + } + + //! Checksum of a optional. + template + static uint64_t dispatch(uint64_t seed, const boost::optional& target) { + return !target ? seed : CChecksumImpl::value>::dispatch(seed, *target); + } + + //! Checksum a pointer. + template + static uint64_t dispatch(uint64_t seed, const boost::shared_ptr& target) { + return !target ? seed : CChecksumImpl::value>::dispatch(seed, *target); + } + + //! Checksum a pair. + template + static uint64_t dispatch(uint64_t seed, const std::pair& target) { + seed = CChecksumImpl::value>::dispatch(seed, target.first); + return CChecksumImpl::value>::dispatch(seed, target.second); + } + + //! Checksum an Eigen dense vector. + template + static uint64_t dispatch(uint64_t seed, const Eigen::Matrix& target) { + std::ptrdiff_t dimension = target.size(); + if (dimension > 0) { + for (std::ptrdiff_t i = 0; i + 1 < dimension; ++i) { + seed = dispatch(seed, target(i)); } - return seed; + return dispatch(seed, target(dimension - 1)); } - - //! Checksum an Eigen sparse vector. - template - static uint64_t dispatch(uint64_t seed, - const Eigen::SparseVector &target) - { - using TIterator = typename Eigen::SparseVector::InnerIterator; - uint64_t result = seed; - for (TIterator i(target, 0); i; ++i) - { - result = dispatch(seed, i.index()); - result = dispatch(result, i.value()); - } - return result; - } - - //! Checksum of an annotated vector. - template - static uint64_t dispatch(uint64_t seed, const CAnnotatedVector &target) - { - seed = CChecksumImpl::value>::dispatch(seed, static_cast(target)); - return CChecksumImpl::value>::dispatch(seed, target.annotation()); + return seed; + } + + //! Checksum an Eigen sparse vector. + template + static uint64_t dispatch(uint64_t seed, const Eigen::SparseVector& target) { + using TIterator = typename Eigen::SparseVector::InnerIterator; + uint64_t result = seed; + for (TIterator i(target, 0); i; ++i) { + result = dispatch(seed, i.index()); + result = dispatch(result, i.value()); } + return result; + } + + //! Checksum of an annotated vector. + template + static uint64_t dispatch(uint64_t seed, const CAnnotatedVector& target) { + seed = CChecksumImpl::value>::dispatch(seed, static_cast(target)); + return CChecksumImpl::value>::dispatch(seed, target.annotation()); + } }; //! Type with checksum member function implementation. template<> -class CChecksumImpl -{ - public: - //! Call member checksum. - template - static uint64_t dispatch(uint64_t seed, const T &target) - { - return target.checksum(seed); - } +class CChecksumImpl { +public: + //! Call member checksum. + template + static uint64_t dispatch(uint64_t seed, const T& target) { + return target.checksum(seed); + } }; //! Type with checksum member function implementation. template<> -class CChecksumImpl -{ - public: - //! Call member checksum. - template - static uint64_t dispatch(uint64_t seed, const T &target) - { - return core::CHashing::hashCombine(seed, target.checksum()); - } +class CChecksumImpl { +public: + //! Call member checksum. + template + static uint64_t dispatch(uint64_t seed, const T& target) { + return core::CHashing::hashCombine(seed, target.checksum()); + } }; //! Type with hash member function implementation. template<> -class CChecksumImpl -{ - public: - //! Call member checksum. - template - static uint64_t dispatch(uint64_t seed, const T &target) - { - return core::CHashing::hashCombine(seed, static_cast(target.hash())); - } +class CChecksumImpl { +public: + //! Call member checksum. + template + static uint64_t dispatch(uint64_t seed, const T& target) { + return core::CHashing::hashCombine(seed, static_cast(target.hash())); + } }; //! Container checksum implementation. template<> -class CChecksumImpl -{ - public: - //! Call on elements. - template - static uint64_t dispatch(uint64_t seed, const T &target) - { - using CItr = typename T::const_iterator; - uint64_t result = seed; - for (CItr itr = target.begin(); itr != target.end(); ++itr) - { - result = CChecksumImpl::value>::dispatch(result, *itr); - } - return result; +class CChecksumImpl { +public: + //! Call on elements. + template + static uint64_t dispatch(uint64_t seed, const T& target) { + using CItr = typename T::const_iterator; + uint64_t result = seed; + for (CItr itr = target.begin(); itr != target.end(); ++itr) { + result = CChecksumImpl::value>::dispatch(result, *itr); + } + return result; + } + + //! Stable hash of unordered set. + template + static uint64_t dispatch(uint64_t seed, const boost::unordered_set& target) { + using TCRef = boost::reference_wrapper; + using TCRefVec = std::vector; + + TCRefVec ordered; + ordered.reserve(target.size()); + for (typename boost::unordered_set::const_iterator itr = target.begin(); itr != target.end(); ++itr) { + ordered.push_back(TCRef(*itr)); } - //! Stable hash of unordered set. - template - static uint64_t dispatch(uint64_t seed, const boost::unordered_set &target) - { - using TCRef = boost::reference_wrapper; - using TCRefVec = std::vector; - - TCRefVec ordered; - ordered.reserve(target.size()); - for (typename boost::unordered_set::const_iterator itr = target.begin(); - itr != target.end(); - ++itr) - { - ordered.push_back(TCRef(*itr)); - } + std::sort(ordered.begin(), ordered.end(), maths::COrderings::SReferenceLess()); - std::sort(ordered.begin(), ordered.end(), maths::COrderings::SReferenceLess()); + return dispatch(seed, ordered); + } - return dispatch(seed, ordered); - } + //! Stable hash of unordered map. + template + static uint64_t dispatch(uint64_t seed, const boost::unordered_map& target) { + using TUCRef = boost::reference_wrapper; + using TVCRef = boost::reference_wrapper; + using TUCRefVCRefPr = std::pair; + using TUCRefVCRefPrVec = std::vector; - //! Stable hash of unordered map. - template - static uint64_t dispatch(uint64_t seed, const boost::unordered_map &target) - { - using TUCRef = boost::reference_wrapper; - using TVCRef = boost::reference_wrapper; - using TUCRefVCRefPr = std::pair; - using TUCRefVCRefPrVec = std::vector; - - TUCRefVCRefPrVec ordered; - ordered.reserve(target.size()); - for (typename boost::unordered_map::const_iterator itr = target.begin(); - itr != target.end(); - ++itr) - { - ordered.push_back(TUCRefVCRefPr(TUCRef(itr->first), TVCRef(itr->second))); - } + TUCRefVCRefPrVec ordered; + ordered.reserve(target.size()); + for (typename boost::unordered_map::const_iterator itr = target.begin(); itr != target.end(); ++itr) { + ordered.push_back(TUCRefVCRefPr(TUCRef(itr->first), TVCRef(itr->second))); + } - std::sort(ordered.begin(), ordered.end(), maths::COrderings::SFirstLess()); + std::sort(ordered.begin(), ordered.end(), maths::COrderings::SFirstLess()); - return dispatch(seed, ordered); - } + return dispatch(seed, ordered); + } - //! Handle std::string which has a const_iterator. - static uint64_t dispatch(uint64_t seed, const std::string &target) - { - return CChecksumImpl::dispatch(seed, target); - } + //! Handle std::string which has a const_iterator. + static uint64_t dispatch(uint64_t seed, const std::string& target) { return CChecksumImpl::dispatch(seed, target); } }; //! Convenience function to select implementation. template -uint64_t checksum(uint64_t seed, const T &target) -{ +uint64_t checksum(uint64_t seed, const T& target) { return CChecksumImpl::value>::dispatch(seed, target); } @@ -352,39 +296,32 @@ uint64_t checksum(uint64_t seed, const T &target) //! \brief Implementation of utility functionality for creating //! object checksums which are stable over model state persistence //! and restoration. -class MATHS_EXPORT CChecksum -{ - public: - //! The basic checksum implementation. - template - static uint64_t calculate(uint64_t seed, const T &target) - { - return checksum_detail::checksum(seed, target); - } - - //! Overload for arrays which chains checksums. - template - static uint64_t calculate(uint64_t seed, const T (&target)[SIZE]) - { - for (std::size_t i = 0u; i+1 < SIZE; ++i) - { - seed = checksum_detail::checksum(seed, target[i]); - } - return checksum_detail::checksum(seed, target[SIZE - 1]); +class MATHS_EXPORT CChecksum { +public: + //! The basic checksum implementation. + template + static uint64_t calculate(uint64_t seed, const T& target) { + return checksum_detail::checksum(seed, target); + } + + //! Overload for arrays which chains checksums. + template + static uint64_t calculate(uint64_t seed, const T (&target)[SIZE]) { + for (std::size_t i = 0u; i + 1 < SIZE; ++i) { + seed = checksum_detail::checksum(seed, target[i]); } - - //! Overload for 2d arrays which chains checksums. - template - static uint64_t calculate(uint64_t seed, const T (&target)[SIZE1][SIZE2]) - { - for (std::size_t i = 0u; i+1 < SIZE1; ++i) - { - seed = calculate(seed, target[i]); - } - return calculate(seed, target[SIZE1 - 1]); + return checksum_detail::checksum(seed, target[SIZE - 1]); + } + + //! Overload for 2d arrays which chains checksums. + template + static uint64_t calculate(uint64_t seed, const T (&target)[SIZE1][SIZE2]) { + for (std::size_t i = 0u; i + 1 < SIZE1; ++i) { + seed = calculate(seed, target[i]); } + return calculate(seed, target[SIZE1 - 1]); + } }; - } } diff --git a/include/maths/CClusterer.h b/include/maths/CClusterer.h index bde98594da..f3464a09e1 100644 --- a/include/maths/CClusterer.h +++ b/include/maths/CClusterer.h @@ -23,88 +23,78 @@ #include #include -#include #include +#include #include - -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { //! \brief Factors out the non-template part of CClusterer for improved //! compile times. -class MATHS_EXPORT CClustererTypes -{ +class MATHS_EXPORT CClustererTypes { +public: + //! A no-op to provide a default for the split and merge callbacks. + class CDoNothing { public: - //! A no-op to provide a default for the split and merge callbacks. - class CDoNothing - { - public: - void operator()(std::size_t, - std::size_t, - std::size_t) const - { - } - }; - - // Callback function signature for when clusters are split. - using TSplitFunc = std::function; + void operator()(std::size_t, std::size_t, std::size_t) const {} + }; - // Callback function signature for when clusters are merged. - using TMergeFunc = std::function; + // Callback function signature for when clusters are split. + using TSplitFunc = std::function; - //! Generates unique cluster indices. - class MATHS_EXPORT CIndexGenerator - { - public: - //! Create a new generator. - CIndexGenerator(); + // Callback function signature for when clusters are merged. + using TMergeFunc = std::function; - //! Restore by traversing a state document - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); + //! Generates unique cluster indices. + class MATHS_EXPORT CIndexGenerator { + public: + //! Create a new generator. + CIndexGenerator(); - //! Persist state by passing information to the supplied inserter - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; + //! Restore by traversing a state document + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); - //! Deep copy this index generator. - CIndexGenerator deepCopy() const; + //! Persist state by passing information to the supplied inserter + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; - //! Get the next available unique cluster index. - std::size_t next() const; + //! Deep copy this index generator. + CIndexGenerator deepCopy() const; - //! Recycle the specified cluster index. - void recycle(std::size_t index); + //! Get the next available unique cluster index. + std::size_t next() const; - //! Get the memory used by this component - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + //! Recycle the specified cluster index. + void recycle(std::size_t index); - //! Get the memory used by this component - std::size_t memoryUsage() const; + //! Get the memory used by this component + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - //! Print the state of the heap for debug. - std::string print() const; + //! Get the memory used by this component + std::size_t memoryUsage() const; - private: - using TSizeVec = std::vector; - using TSizeVecPtr = boost::shared_ptr; + //! Print the state of the heap for debug. + std::string print() const; - private: - //! A heap of the next available unique indices. - TSizeVecPtr m_IndexHeap; - }; + private: + using TSizeVec = std::vector; + using TSizeVecPtr = boost::shared_ptr; - public: - //! \name XML Tag Names - //! - //! These tag the type of clusterer for polymorphic model persistence. - //@{ - static const std::string X_MEANS_ONLINE_1D_TAG; - static const std::string K_MEANS_ONLINE_1D_TAG; - static const std::string X_MEANS_ONLINE_TAG; - //@} + private: + //! A heap of the next available unique indices. + TSizeVecPtr m_IndexHeap; + }; + +public: + //! \name XML Tag Names + //! + //! These tag the type of clusterer for polymorphic model persistence. + //@{ + static const std::string X_MEANS_ONLINE_1D_TAG; + static const std::string K_MEANS_ONLINE_1D_TAG; + static const std::string X_MEANS_ONLINE_TAG; + //@} }; //! \brief Interface for clustering functionality. @@ -134,182 +124,152 @@ class MATHS_EXPORT CClustererTypes //! supports recycling indices to avoid overflowing std::size_t, since //! this is requirement for all implementations. template -class CClusterer : public CClustererTypes -{ - public: - using TClustererPtr = boost::shared_ptr; - using TPointVec = std::vector; - using TPointPrecise = typename SPromoted::Type; - using TPointPreciseVec = std::vector; - using TPointPreciseDoublePr = std::pair; - using TPointPreciseDoublePrVec = std::vector; - using TSizeDoublePr = std::pair; - using TSizeDoublePr2Vec = core::CSmallVector; - - public: - //! Create a new clusterer. - //! - //! \param splitFunc Optional callback for when a cluster is split. - //! \param mergeFunc Optional callback for when two clusters are merged. - explicit CClusterer(const TSplitFunc &splitFunc = CDoNothing(), - const TMergeFunc &mergeFunc = CDoNothing()) : - m_SplitFunc(splitFunc), - m_MergeFunc(mergeFunc) - { +class CClusterer : public CClustererTypes { +public: + using TClustererPtr = boost::shared_ptr; + using TPointVec = std::vector; + using TPointPrecise = typename SPromoted::Type; + using TPointPreciseVec = std::vector; + using TPointPreciseDoublePr = std::pair; + using TPointPreciseDoublePrVec = std::vector; + using TSizeDoublePr = std::pair; + using TSizeDoublePr2Vec = core::CSmallVector; + +public: + //! Create a new clusterer. + //! + //! \param splitFunc Optional callback for when a cluster is split. + //! \param mergeFunc Optional callback for when two clusters are merged. + explicit CClusterer(const TSplitFunc& splitFunc = CDoNothing(), const TMergeFunc& mergeFunc = CDoNothing()) + : m_SplitFunc(splitFunc), m_MergeFunc(mergeFunc) {} + + virtual ~CClusterer() {} + + //! \name Clusterer Contract + //@{ + //! Get the tag name for this clusterer. + virtual std::string persistenceTag() const = 0; + + //! Creates a copy of the clusterer. + //! + //! Persist state by passing information to the supplied inserter + virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const = 0; + + //! \warning Caller owns returned object. + virtual CClusterer* clone() const = 0; + + //! Clear the current clusterer state. + virtual void clear() = 0; + + //! Get the number of clusters. + virtual std::size_t numberClusters() const = 0; + + //! Set the type of data being clustered. + virtual void dataType(maths_t::EDataType dataType) = 0; + + //! Set the rate at which information is aged out. + virtual void decayRate(double decayRate) = 0; + + //! Check if the cluster identified by \p index exists. + virtual bool hasCluster(std::size_t index) const = 0; + + //! Get the centre of the cluster identified by \p index. + virtual bool clusterCentre(std::size_t index, TPointPrecise& result) const = 0; + + //! Get the spread of the cluster identified by \p index. + virtual bool clusterSpread(std::size_t index, double& result) const = 0; + + //! Gets the index of the cluster(s) to which \p point belongs + //! together with their weighting factors. + virtual void cluster(const TPointPrecise& point, TSizeDoublePr2Vec& result, double count = 1.0) const = 0; + + //! Add a point without caring about its cluster. + void add(const TPointPrecise& point, double count = 1.0) { + TSizeDoublePr2Vec clusters; + this->add(point, clusters, count); + } + + //! Update the clustering with \p point and return its cluster(s) + //! together with their weighting factors. + virtual void add(const TPointPrecise& point, TSizeDoublePr2Vec& clusters, double count = 1.0) = 0; + + //! Update the clustering with \p points. + void add(const TPointPreciseVec& points) { + TPointPreciseDoublePrVec weightedPoints; + weightedPoints.reserve(points.size()); + for (std::size_t i = 0u; i < points.size(); ++i) { + weightedPoints.push_back(TPointPreciseDoublePr(points[i], 1.0)); } - - virtual ~CClusterer() {} - - //! \name Clusterer Contract - //@{ - //! Get the tag name for this clusterer. - virtual std::string persistenceTag() const = 0; - - //! Creates a copy of the clusterer. - //! - //! Persist state by passing information to the supplied inserter - virtual void acceptPersistInserter(core::CStatePersistInserter &inserter) const = 0; - - //! \warning Caller owns returned object. - virtual CClusterer *clone() const = 0; - - //! Clear the current clusterer state. - virtual void clear() = 0; - - //! Get the number of clusters. - virtual std::size_t numberClusters() const = 0; - - //! Set the type of data being clustered. - virtual void dataType(maths_t::EDataType dataType) = 0; - - //! Set the rate at which information is aged out. - virtual void decayRate(double decayRate) = 0; - - //! Check if the cluster identified by \p index exists. - virtual bool hasCluster(std::size_t index) const = 0; - - //! Get the centre of the cluster identified by \p index. - virtual bool clusterCentre(std::size_t index, - TPointPrecise &result) const = 0; - - //! Get the spread of the cluster identified by \p index. - virtual bool clusterSpread(std::size_t index, - double &result) const = 0; - - //! Gets the index of the cluster(s) to which \p point belongs - //! together with their weighting factors. - virtual void cluster(const TPointPrecise &point, - TSizeDoublePr2Vec &result, - double count = 1.0) const = 0; - - //! Add a point without caring about its cluster. - void add(const TPointPrecise &point, double count = 1.0) - { - TSizeDoublePr2Vec clusters; - this->add(point, clusters, count); - } - - //! Update the clustering with \p point and return its cluster(s) - //! together with their weighting factors. - virtual void add(const TPointPrecise &point, - TSizeDoublePr2Vec &clusters, - double count = 1.0) = 0; - - //! Update the clustering with \p points. - void add(const TPointPreciseVec &points) - { - TPointPreciseDoublePrVec weightedPoints; - weightedPoints.reserve(points.size()); - for (std::size_t i = 0u; i < points.size(); ++i) - { - weightedPoints.push_back(TPointPreciseDoublePr(points[i], 1.0)); - } - this->add(weightedPoints); - } - - //! Update the clustering with \p points. - virtual void add(const TPointPreciseDoublePrVec &points) = 0; - - //! Propagate the clustering forwards by \p time. - //! - //! The cluster priors relax back to non-informative and the - //! cluster probabilities become less at a rate controlled by - //! the decay rate parameter (optionally supplied to the constructor). - //! - //! \param time The time increment to apply. - virtual void propagateForwardsByTime(double time) = 0; - - //! Sample the cluster with index \p index. - //! - //! \param index The index of the cluster to sample. - //! \param numberSamples The desired number of samples. - //! \param samples Filled in with the samples. - //! \return True if the cluster could be sampled and false otherwise. - virtual bool sample(std::size_t index, - std::size_t numberSamples, - TPointPreciseVec &samples) const = 0; - - //! Get the probability of the cluster with the index \p index. - //! - //! \param index The index of the cluster of interest. - //! \return The probability of the cluster identified by \p index. - virtual double probability(std::size_t index) const = 0; - - //! Get a checksum for this object. - virtual uint64_t checksum(uint64_t seed = 0) const = 0; - - //! Get the memory used by this component - virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const = 0; - - //! Get the memory used by this component - virtual std::size_t memoryUsage() const = 0; - - //! Get the static size of this object - used for virtual hierarchies - virtual std::size_t staticSize() const = 0; - //@} - - //! Get the callback function to invoke when a cluster is split. - const TSplitFunc &splitFunc() const - { - return m_SplitFunc; - } - - //! Set the callback function to invoke when a cluster is split. - void splitFunc(const TSplitFunc &value) - { - m_SplitFunc = value; - } - - //! Get the callback function to invoke when two clusters are merged. - const TMergeFunc &mergeFunc() const - { - return m_MergeFunc; - } - - //! Set the callback function to invoke when two clusters are merged. - void mergeFunc(const TSplitFunc &value) - { - m_MergeFunc = value; - } - - protected: - //! Swap the CClusterer state of two derived objects. - void swap(CClusterer &other) - { - boost::swap(m_SplitFunc, other.m_SplitFunc); - boost::swap(m_MergeFunc, other.m_MergeFunc); - } - - private: - //! An optional callback function to invoke when a cluster is split. - TSplitFunc m_SplitFunc; - - //! An optional callback function to invoke when two clusters are merged. - TMergeFunc m_MergeFunc; + this->add(weightedPoints); + } + + //! Update the clustering with \p points. + virtual void add(const TPointPreciseDoublePrVec& points) = 0; + + //! Propagate the clustering forwards by \p time. + //! + //! The cluster priors relax back to non-informative and the + //! cluster probabilities become less at a rate controlled by + //! the decay rate parameter (optionally supplied to the constructor). + //! + //! \param time The time increment to apply. + virtual void propagateForwardsByTime(double time) = 0; + + //! Sample the cluster with index \p index. + //! + //! \param index The index of the cluster to sample. + //! \param numberSamples The desired number of samples. + //! \param samples Filled in with the samples. + //! \return True if the cluster could be sampled and false otherwise. + virtual bool sample(std::size_t index, std::size_t numberSamples, TPointPreciseVec& samples) const = 0; + + //! Get the probability of the cluster with the index \p index. + //! + //! \param index The index of the cluster of interest. + //! \return The probability of the cluster identified by \p index. + virtual double probability(std::size_t index) const = 0; + + //! Get a checksum for this object. + virtual uint64_t checksum(uint64_t seed = 0) const = 0; + + //! Get the memory used by this component + virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const = 0; + + //! Get the memory used by this component + virtual std::size_t memoryUsage() const = 0; + + //! Get the static size of this object - used for virtual hierarchies + virtual std::size_t staticSize() const = 0; + //@} + + //! Get the callback function to invoke when a cluster is split. + const TSplitFunc& splitFunc() const { return m_SplitFunc; } + + //! Set the callback function to invoke when a cluster is split. + void splitFunc(const TSplitFunc& value) { m_SplitFunc = value; } + + //! Get the callback function to invoke when two clusters are merged. + const TMergeFunc& mergeFunc() const { return m_MergeFunc; } + + //! Set the callback function to invoke when two clusters are merged. + void mergeFunc(const TSplitFunc& value) { m_MergeFunc = value; } + +protected: + //! Swap the CClusterer state of two derived objects. + void swap(CClusterer& other) { + boost::swap(m_SplitFunc, other.m_SplitFunc); + boost::swap(m_MergeFunc, other.m_MergeFunc); + } + +private: + //! An optional callback function to invoke when a cluster is split. + TSplitFunc m_SplitFunc; + + //! An optional callback function to invoke when two clusters are merged. + TMergeFunc m_MergeFunc; }; using CClusterer1d = CClusterer; - } } diff --git a/include/maths/CClustererStateSerialiser.h b/include/maths/CClustererStateSerialiser.h index 02486bee8c..66602c420d 100644 --- a/include/maths/CClustererStateSerialiser.h +++ b/include/maths/CClustererStateSerialiser.h @@ -17,11 +17,10 @@ #include -namespace ml -{ -namespace maths -{ -template class CXMeansOnline; +namespace ml { +namespace maths { +template +class CXMeansOnline; struct SDistributionRestoreParams; //! \brief Convert CClusterer sub-classes to/from text representations. @@ -42,100 +41,78 @@ struct SDistributionRestoreParams; //! The field names given to each prior distribution class are deliberately //! terse and uninformative to avoid giving away details of our analytics //! to potential competitors. -class MATHS_EXPORT CClustererStateSerialiser -{ - public: - using TClusterer1dPtr = boost::shared_ptr; - - public: - //! Construct the appropriate CClusterer sub-class from its state - //! document representation. - //! - //! \note Sets \p ptr to NULL on failure. - bool operator()(const SDistributionRestoreParams ¶ms, - TClusterer1dPtr &ptr, - core::CStateRestoreTraverser &traverser); - - //! Construct the appropriate CClusterer sub-class from its state - //! document representation. - //! - //! \note Sets \p ptr to NULL on failure. - bool operator()(const SDistributionRestoreParams ¶ms, - const CClusterer1d::TSplitFunc &splitFunc, - const CClusterer1d::TMergeFunc &mergeFunc, - TClusterer1dPtr &ptr, - core::CStateRestoreTraverser &traverser); - - //! Persist state by passing information to the supplied inserter. - void operator()(const CClusterer1d &clusterer, - core::CStatePersistInserter &inserter); - - //! Construct the appropriate CClusterer sub-class from its state - //! document representation. - //! - //! \note Sets \p ptr to NULL on failure. - template - bool operator()(const SDistributionRestoreParams ¶ms, - boost::shared_ptr > > &ptr, - core::CStateRestoreTraverser &traverser) - { - return this->operator()(params, - CClustererTypes::CDoNothing(), - CClustererTypes::CDoNothing(), - ptr, - traverser); - } - - //! Construct the appropriate CClusterer sub-class from its state - //! document representation. - //! - //! \note Sets \p ptr to NULL on failure. - template - bool operator()(const SDistributionRestoreParams ¶ms, - const CClustererTypes::TSplitFunc &splitFunc, - const CClustererTypes::TMergeFunc &mergeFunc, - boost::shared_ptr > > &ptr, - core::CStateRestoreTraverser &traverser) - { - std::size_t numResults(0); - - do - { - const std::string &name = traverser.name(); - if (name == CClustererTypes::X_MEANS_ONLINE_TAG) - { - ptr.reset(CXMeansOnlineFactory::restore( - params, splitFunc, mergeFunc, traverser)); - ++numResults; - } - else - { - LOG_ERROR("No clusterer corresponds to node name " << traverser.name()); - } - } - while (traverser.next()); - - if (numResults != 1) - { - LOG_ERROR("Expected 1 (got " << numResults << ") clusterer tags"); - ptr.reset(); - return false; +class MATHS_EXPORT CClustererStateSerialiser { +public: + using TClusterer1dPtr = boost::shared_ptr; + +public: + //! Construct the appropriate CClusterer sub-class from its state + //! document representation. + //! + //! \note Sets \p ptr to NULL on failure. + bool operator()(const SDistributionRestoreParams& params, TClusterer1dPtr& ptr, core::CStateRestoreTraverser& traverser); + + //! Construct the appropriate CClusterer sub-class from its state + //! document representation. + //! + //! \note Sets \p ptr to NULL on failure. + bool operator()(const SDistributionRestoreParams& params, + const CClusterer1d::TSplitFunc& splitFunc, + const CClusterer1d::TMergeFunc& mergeFunc, + TClusterer1dPtr& ptr, + core::CStateRestoreTraverser& traverser); + + //! Persist state by passing information to the supplied inserter. + void operator()(const CClusterer1d& clusterer, core::CStatePersistInserter& inserter); + + //! Construct the appropriate CClusterer sub-class from its state + //! document representation. + //! + //! \note Sets \p ptr to NULL on failure. + template + bool operator()(const SDistributionRestoreParams& params, + boost::shared_ptr>>& ptr, + core::CStateRestoreTraverser& traverser) { + return this->operator()(params, CClustererTypes::CDoNothing(), CClustererTypes::CDoNothing(), ptr, traverser); + } + + //! Construct the appropriate CClusterer sub-class from its state + //! document representation. + //! + //! \note Sets \p ptr to NULL on failure. + template + bool operator()(const SDistributionRestoreParams& params, + const CClustererTypes::TSplitFunc& splitFunc, + const CClustererTypes::TMergeFunc& mergeFunc, + boost::shared_ptr>>& ptr, + core::CStateRestoreTraverser& traverser) { + std::size_t numResults(0); + + do { + const std::string& name = traverser.name(); + if (name == CClustererTypes::X_MEANS_ONLINE_TAG) { + ptr.reset(CXMeansOnlineFactory::restore(params, splitFunc, mergeFunc, traverser)); + ++numResults; + } else { + LOG_ERROR("No clusterer corresponds to node name " << traverser.name()); } + } while (traverser.next()); - return true; + if (numResults != 1) { + LOG_ERROR("Expected 1 (got " << numResults << ") clusterer tags"); + ptr.reset(); + return false; } - //! Persist state by passing information to the supplied inserter. - template - void operator()(const CClusterer > &clusterer, - core::CStatePersistInserter &inserter) - { - inserter.insertLevel(clusterer.persistenceTag(), - boost::bind(&CClusterer >::acceptPersistInserter, - &clusterer, _1)); - } -}; + return true; + } + //! Persist state by passing information to the supplied inserter. + template + void operator()(const CClusterer>& clusterer, core::CStatePersistInserter& inserter) { + inserter.insertLevel(clusterer.persistenceTag(), boost::bind(&CClusterer>::acceptPersistInserter, &clusterer, _1)); + } +}; } } diff --git a/include/maths/CCompositeFunctions.h b/include/maths/CCompositeFunctions.h index b5ff4df633..ab4b877e0a 100644 --- a/include/maths/CCompositeFunctions.h +++ b/include/maths/CCompositeFunctions.h @@ -15,31 +15,24 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { -namespace composition_detail -{ +namespace composition_detail { //! Type used to deduce the result type for a function. template -struct function_result_type -{ -}; +struct function_result_type {}; //! Vanilla function type 1: "result type" is the return type. template -struct function_result_type -{ +struct function_result_type { using type = typename boost::remove_reference::type; }; //! Vanilla function type 2: "result type" is the second argument type. template -struct function_result_type -{ +struct function_result_type { using type = typename boost::remove_reference::type; }; @@ -49,45 +42,38 @@ using false_ = boost::false_type; //! \brief Auxiliary type used by has_result_type to test for //! a nested typedef. template -struct enable_if_type -{ +struct enable_if_type { using type = R; }; //! Checks for a nested typedef called result_type. template -struct has_result_type -{ +struct has_result_type { using value = false_; }; //! Has a nested typedef called result_type. template -struct has_result_type::type> -{ +struct has_result_type::type> { using value = true_; }; //! Extracts the result type of a function (object) for composition. template -struct result_type_impl -{ -}; +struct result_type_impl {}; //! \brief Read the typedef from the function. //! //! This is needed to get result type for function objects: they must //! define a nested typedef called result_type as per our compositions. template -struct result_type_impl -{ +struct result_type_impl { using type = typename F::result_type; }; //! Deduce result type from function (object). template -struct result_type_impl -{ +struct result_type_impl { using type = typename function_result_type::type; }; @@ -95,13 +81,10 @@ struct result_type_impl //! in various ways. template struct result_type : public result_type_impl::type, - typename has_result_type::type>::value> -{ -}; + typename has_result_type::type>::value> {}; } // composition_detail:: - //! \brief A collection of useful compositions of functions for the solver //! and numerical integration functions. //! @@ -125,167 +108,131 @@ struct result_type : public result_type_impl //! specifically solving and integration. These overload operator() to deal //! with this. Since a member function of a template is only instantiated //! when needed, the functions supplied don't need to support both. -class MATHS_EXPORT CCompositeFunctions -{ +class MATHS_EXPORT CCompositeFunctions { +public: + //! Function composition with minus a constant. + template::type> + class CMinusConstant { public: - //! Function composition with minus a constant. - template::type> - class CMinusConstant - { - public: - using F = typename boost::remove_reference::type; - using result_type = T; - - public: - CMinusConstant(const F &f, double offset) : - m_F(f), - m_Offset(offset) - { - } - - //! For function returning value. - inline T operator()(double x) const - { - return m_F(x) - m_Offset; - } - - //! For function return success/fail and taking result as argument. - inline bool operator()(double x, T &result) const - { - if (m_F(x, result)) - { - result -= m_Offset; - return true; - } - return false; - } - - private: - F_ m_F; - double m_Offset; - }; - - //! Function composition with negation. - template::type> - class CMinus - { - public: - using F = typename boost::remove_reference::type; - using result_type = T; - - public: - explicit CMinus(const F &f = F()) : m_F(f) {} - - //! For function returning value. - inline T operator()(double x) const - { - return -m_F(x); - } - - //! For function return success/fail and taking result as argument. - inline bool operator()(double x, T &result) const - { - if (m_F(x, result)) - { - result = -result; - return true; - } - return false; - } - - private: - F_ m_F; - }; - - //! Composition with exponentiation. - template::type> - class CExp - { - public: - using F = typename boost::remove_reference::type; - using result_type = T; - - public: - explicit CExp(const F &f = F()) : m_F(f) {} - - //! For function returning value. - inline T operator()(double x) const - { - static const double LOG_MIN_DOUBLE = - std::log(std::numeric_limits::min()); - double fx = m_F(x); - return fx < LOG_MIN_DOUBLE ? 0.0 : std::exp(fx); - } - - //! For function return success/fail and taking result as argument. - inline bool operator()(double x, T &result) const - { - static const double LOG_MIN_DOUBLE = - std::log(std::numeric_limits::min()); - if (m_F(x, result)) - { - result = result < LOG_MIN_DOUBLE ? 0.0 : std::exp(result); - return true; - } - return false; - } - - private: - F_ m_F; - }; - - //! Composition of two functions by multiplication. - template::type, - typename V = typename composition_detail::result_type::type> - class CProduct - { - public: - using F = typename boost::remove_reference::type; - using G = typename boost::remove_reference::type; - using result_type = U; - - public: - explicit CProduct(const F &f = F(), - const G &g = G()) : - m_F(f), m_G(g) - {} - - //! For function returning value. - inline U operator()(double x) const - { - return m_F(x) * m_G(x); - } - - //! For function return success/fail and taking result as argument. - inline bool operator()(double x, U &result) const - { - U fx; - V gx; - if (m_F(x, fx) && m_G(x, gx)) - { - result = fx * gx; - return true; - } - return false; - } - - //! Retrieve the component function f. - const F &f() const { return m_F; } - - //! Retrieve the component function g. - const G &g() const { return m_G; } - - private: - F_ m_F; - G_ m_G; - }; -}; + using F = typename boost::remove_reference::type; + using result_type = T; + + public: + CMinusConstant(const F& f, double offset) : m_F(f), m_Offset(offset) {} + + //! For function returning value. + inline T operator()(double x) const { return m_F(x) - m_Offset; } + + //! For function return success/fail and taking result as argument. + inline bool operator()(double x, T& result) const { + if (m_F(x, result)) { + result -= m_Offset; + return true; + } + return false; + } + + private: + F_ m_F; + double m_Offset; + }; + + //! Function composition with negation. + template::type> + class CMinus { + public: + using F = typename boost::remove_reference::type; + using result_type = T; + public: + explicit CMinus(const F& f = F()) : m_F(f) {} + + //! For function returning value. + inline T operator()(double x) const { return -m_F(x); } + + //! For function return success/fail and taking result as argument. + inline bool operator()(double x, T& result) const { + if (m_F(x, result)) { + result = -result; + return true; + } + return false; + } + + private: + F_ m_F; + }; + + //! Composition with exponentiation. + template::type> + class CExp { + public: + using F = typename boost::remove_reference::type; + using result_type = T; + + public: + explicit CExp(const F& f = F()) : m_F(f) {} + + //! For function returning value. + inline T operator()(double x) const { + static const double LOG_MIN_DOUBLE = std::log(std::numeric_limits::min()); + double fx = m_F(x); + return fx < LOG_MIN_DOUBLE ? 0.0 : std::exp(fx); + } + + //! For function return success/fail and taking result as argument. + inline bool operator()(double x, T& result) const { + static const double LOG_MIN_DOUBLE = std::log(std::numeric_limits::min()); + if (m_F(x, result)) { + result = result < LOG_MIN_DOUBLE ? 0.0 : std::exp(result); + return true; + } + return false; + } + + private: + F_ m_F; + }; + + //! Composition of two functions by multiplication. + template::type, + typename V = typename composition_detail::result_type::type> + class CProduct { + public: + using F = typename boost::remove_reference::type; + using G = typename boost::remove_reference::type; + using result_type = U; + + public: + explicit CProduct(const F& f = F(), const G& g = G()) : m_F(f), m_G(g) {} + + //! For function returning value. + inline U operator()(double x) const { return m_F(x) * m_G(x); } + + //! For function return success/fail and taking result as argument. + inline bool operator()(double x, U& result) const { + U fx; + V gx; + if (m_F(x, fx) && m_G(x, gx)) { + result = fx * gx; + return true; + } + return false; + } + + //! Retrieve the component function f. + const F& f() const { return m_F; } + + //! Retrieve the component function g. + const G& g() const { return m_G; } + + private: + F_ m_F; + G_ m_G; + }; +}; } } diff --git a/include/maths/CConstantPrior.h b/include/maths/CConstantPrior.h index afa48c8777..714edd2740 100644 --- a/include/maths/CConstantPrior.h +++ b/include/maths/CConstantPrior.h @@ -15,15 +15,12 @@ #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace maths -{ +namespace maths { //! \brief A very lightweight prior for representing data for which //! expect a single value. @@ -33,156 +30,147 @@ namespace maths //! only ever takes a single value. This is useful for modeling data //! features such as the value of an indicator function in a consistent //! manner to all other types of data. -class MATHS_EXPORT CConstantPrior : public CPrior -{ - public: - using TOptionalDouble = boost::optional; - - //! Lift the overloads of addSamples into scope. - using CPrior::addSamples; - //! Lift the overloads of print into scope. - using CPrior::print; - - public: - //! \name Life-Cycle - //@{ - explicit CConstantPrior(const TOptionalDouble &constant = TOptionalDouble()); - - //! Construct by traversing a state document. - CConstantPrior(core::CStateRestoreTraverser &traverser); - //@} - - //! \name Prior Contract - //@{ - //! Get the type of this prior. - virtual EPrior type() const; - - //! Create a copy of the prior. - //! - //! \warning Caller owns returned object. - virtual CConstantPrior *clone() const; - - //! Reset the prior to non-informative. - virtual void setToNonInformative(double offset = 0.0, double decayRate = 0.0); - - //! Returns false. - virtual bool needsOffset() const; - - //! No-op. - virtual double adjustOffset(const TWeightStyleVec &weightStyle, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights); - - //! Returns zero. - virtual double offset() const; - - //! Set the constant if it hasn't been set. - virtual void addSamples(const TWeightStyleVec &weightStyle, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights); - - //! No-op. - virtual void propagateForwardsByTime(double time); - - //! Get the support for the marginal likelihood function. - virtual TDoubleDoublePr marginalLikelihoodSupport() const; - - //! Returns constant or zero if unset (by equidistribution). - virtual double marginalLikelihoodMean() const; - - //! Returns constant or zero if unset (by equidistribution). - virtual double marginalLikelihoodMode(const TWeightStyleVec &weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec &weights = TWeights::UNIT) const; - - //! All confidence intervals are the point [constant, constant]. - virtual TDoubleDoublePr - marginalLikelihoodConfidenceInterval(double percentage, - const TWeightStyleVec &weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec &weights = TWeights::UNIT) const; - - //! Get the variance of the marginal likelihood. - virtual double marginalLikelihoodVariance(const TWeightStyleVec &weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec &weights = TWeights::UNIT) const; - - //! Returns a large value if all samples are equal to the constant - //! and zero otherwise. - virtual maths_t::EFloatingPointErrorStatus - jointLogMarginalLikelihood(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &result) const; - - //! Get \p numberSamples times the constant. - virtual void sampleMarginalLikelihood(std::size_t numberSamples, - TDouble1Vec &samples) const; - - //! A large number if any sample is less than the constant and - //! zero otherwise. - virtual bool minusLogJointCdf(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound) const; - - //! A large number if any sample is larger than the constant and - //! zero otherwise. - virtual bool minusLogJointCdfComplement(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound) const; - - //! Returns one if all samples equal the constant and one otherwise. - virtual bool probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound, - maths_t::ETail &tail) const; - - //! Check if this is a non-informative prior. - bool isNonInformative() const; - - //! Get a human readable description of the prior. - //! - //! \param[in] indent The indent to use at the start of new lines. - //! \param[in,out] result Filled in with the description. - virtual void print(const std::string &indent, std::string &result) const; - - //! Print the marginal likelihood function. - virtual std::string printMarginalLikelihoodFunction(double weight = 1.0) const; - - //! Print the prior density function of the parameters. - virtual std::string printJointDensityFunction() const; - - //! Get a checksum for this object. - virtual uint64_t checksum(uint64_t seed = 0) const; - - //! Get the memory used by this component - virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - - //! Get the memory used by this component - virtual std::size_t memoryUsage() const; - - //! Get the static size of this object - used for virtual hierarchies - virtual std::size_t staticSize() const; - - //! Persist state by passing information to the supplied inserter - virtual void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - //@} - - //! Get the constant value. - TOptionalDouble constant() const; - - private: - //! Create by traversing a state document. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); - - private: - TOptionalDouble m_Constant; -}; +class MATHS_EXPORT CConstantPrior : public CPrior { +public: + using TOptionalDouble = boost::optional; + + //! Lift the overloads of addSamples into scope. + using CPrior::addSamples; + //! Lift the overloads of print into scope. + using CPrior::print; + +public: + //! \name Life-Cycle + //@{ + explicit CConstantPrior(const TOptionalDouble& constant = TOptionalDouble()); + + //! Construct by traversing a state document. + CConstantPrior(core::CStateRestoreTraverser& traverser); + //@} + + //! \name Prior Contract + //@{ + //! Get the type of this prior. + virtual EPrior type() const; + + //! Create a copy of the prior. + //! + //! \warning Caller owns returned object. + virtual CConstantPrior* clone() const; + + //! Reset the prior to non-informative. + virtual void setToNonInformative(double offset = 0.0, double decayRate = 0.0); + + //! Returns false. + virtual bool needsOffset() const; + + //! No-op. + virtual double adjustOffset(const TWeightStyleVec& weightStyle, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights); + + //! Returns zero. + virtual double offset() const; + + //! Set the constant if it hasn't been set. + virtual void addSamples(const TWeightStyleVec& weightStyle, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights); + + //! No-op. + virtual void propagateForwardsByTime(double time); + + //! Get the support for the marginal likelihood function. + virtual TDoubleDoublePr marginalLikelihoodSupport() const; + + //! Returns constant or zero if unset (by equidistribution). + virtual double marginalLikelihoodMean() const; + + //! Returns constant or zero if unset (by equidistribution). + virtual double marginalLikelihoodMode(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; + + //! All confidence intervals are the point [constant, constant]. + virtual TDoubleDoublePr marginalLikelihoodConfidenceInterval(double percentage, + const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; + + //! Get the variance of the marginal likelihood. + virtual double marginalLikelihoodVariance(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; + + //! Returns a large value if all samples are equal to the constant + //! and zero otherwise. + virtual maths_t::EFloatingPointErrorStatus jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& result) const; + + //! Get \p numberSamples times the constant. + virtual void sampleMarginalLikelihood(std::size_t numberSamples, TDouble1Vec& samples) const; + + //! A large number if any sample is less than the constant and + //! zero otherwise. + virtual bool minusLogJointCdf(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound) const; + + //! A large number if any sample is larger than the constant and + //! zero otherwise. + virtual bool minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound) const; + + //! Returns one if all samples equal the constant and one otherwise. + virtual bool probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, + const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound, + maths_t::ETail& tail) const; + + //! Check if this is a non-informative prior. + bool isNonInformative() const; + + //! Get a human readable description of the prior. + //! + //! \param[in] indent The indent to use at the start of new lines. + //! \param[in,out] result Filled in with the description. + virtual void print(const std::string& indent, std::string& result) const; + + //! Print the marginal likelihood function. + virtual std::string printMarginalLikelihoodFunction(double weight = 1.0) const; + + //! Print the prior density function of the parameters. + virtual std::string printJointDensityFunction() const; + + //! Get a checksum for this object. + virtual uint64_t checksum(uint64_t seed = 0) const; + + //! Get the memory used by this component + virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + + //! Get the memory used by this component + virtual std::size_t memoryUsage() const; + + //! Get the static size of this object - used for virtual hierarchies + virtual std::size_t staticSize() const; + + //! Persist state by passing information to the supplied inserter + virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + //@} + //! Get the constant value. + TOptionalDouble constant() const; + +private: + //! Create by traversing a state document. + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); + +private: + TOptionalDouble m_Constant; +}; } } diff --git a/include/maths/CCooccurrences.h b/include/maths/CCooccurrences.h index 87555bc9d6..45fc5cc9ab 100644 --- a/include/maths/CCooccurrences.h +++ b/include/maths/CCooccurrences.h @@ -17,98 +17,88 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CMemory; } -namespace maths -{ +namespace maths { //! \brief Computes various measures for the co-occurrence of events, //! as encoded by an indicator variable, and finds events with significantly //! evidence that they are co-occuring. //! //! DESCRIPTION:\n -class MATHS_EXPORT CCooccurrences -{ - public: - using TDoubleVec = std::vector; - using TSizeVec = std::vector; - using TSizeSizePr = std::pair; - using TSizeSizePrVec = std::vector; +class MATHS_EXPORT CCooccurrences { +public: + using TDoubleVec = std::vector; + using TSizeVec = std::vector; + using TSizeSizePr = std::pair; + using TSizeSizePrVec = std::vector; - public: - CCooccurrences(std::size_t maximumLength, std::size_t indicatorWidth); +public: + CCooccurrences(std::size_t maximumLength, std::size_t indicatorWidth); - //! Create from part of a state document. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); + //! Create from part of a state document. + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); - //! Persist state by passing to the supplied inserter. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; + //! Persist state by passing to the supplied inserter. + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; - //! Get the top \p n most co-occurring events by significance - //! with events \p X. - void topNBySignificance(std::size_t X, - std::size_t n, - TSizeSizePrVec &top, - TDoubleVec &significances) const; + //! Get the top \p n most co-occurring events by significance + //! with events \p X. + void topNBySignificance(std::size_t X, std::size_t n, TSizeSizePrVec& top, TDoubleVec& significances) const; - //! Get the top \p n most co-occurring events by significance. - void topNBySignificance(std::size_t n, - TSizeSizePrVec &top, - TDoubleVec &significances) const; + //! Get the top \p n most co-occurring events by significance. + void topNBySignificance(std::size_t n, TSizeSizePrVec& top, TDoubleVec& significances) const; - //! Resize the relevant statistics to accommodate up to \p event - //! streams. - void addEventStreams(std::size_t n); + //! Resize the relevant statistics to accommodate up to \p event + //! streams. + void addEventStreams(std::size_t n); - //! Remove the event streams \p remove. - void removeEventStreams(const TSizeVec &remove); + //! Remove the event streams \p remove. + void removeEventStreams(const TSizeVec& remove); - //! Recycle the event streams \p recycle. - void recycleEventStreams(const TSizeVec &recycle); + //! Recycle the event streams \p recycle. + void recycleEventStreams(const TSizeVec& recycle); - //! Add the value \p x for the variable \p X. - void add(std::size_t X); + //! Add the value \p x for the variable \p X. + void add(std::size_t X); - //! Capture the indicator values of missing events. - void capture(); + //! Capture the indicator values of missing events. + void capture(); - //! Get the checksum of this object. - uint64_t checksum(uint64_t seed = 0) const; + //! Get the checksum of this object. + uint64_t checksum(uint64_t seed = 0) const; - //! Debug the memory used by this object. - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + //! Debug the memory used by this object. + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - //! Get the memory used by this object. - std::size_t memoryUsage() const; + //! Get the memory used by this object. + std::size_t memoryUsage() const; - private: - using TSizeUSet = boost::unordered_set; - using TPackedBitVectorVec = std::vector; +private: + using TSizeUSet = boost::unordered_set; + using TPackedBitVectorVec = std::vector; - private: - //! The maximum permitted event sequence length. - std::size_t m_MaximumLength; +private: + //! The maximum permitted event sequence length. + std::size_t m_MaximumLength; - //! The current length of the event sequences. - std::size_t m_Length; + //! The current length of the event sequences. + std::size_t m_Length; - //! The width of the indicator function in "captures". - std::size_t m_IndicatorWidth; + //! The width of the indicator function in "captures". + std::size_t m_IndicatorWidth; - //! The current offset from the start of the current indicator. - std::size_t m_Offset; + //! The current offset from the start of the current indicator. + std::size_t m_Offset; - //! The event indicators to add in the next capture. - TSizeUSet m_CurrentIndicators; + //! The event indicators to add in the next capture. + TSizeUSet m_CurrentIndicators; - //! The indicator variables for event streams. - TPackedBitVectorVec m_Indicators; + //! The indicator variables for event streams. + TPackedBitVectorVec m_Indicators; }; - } } diff --git a/include/maths/CCountMinSketch.h b/include/maths/CCountMinSketch.h index aff04bb4ee..717eb16128 100644 --- a/include/maths/CCountMinSketch.h +++ b/include/maths/CCountMinSketch.h @@ -17,10 +17,8 @@ #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { //! \brief Implements Count-Min Sketch approximate counting of //! categories. @@ -48,121 +46,116 @@ namespace maths //! interface to this class is double precision. If floats are used //! they should be used for storage only and transparent to the rest //! of the code base. -class MATHS_EXPORT CCountMinSketch -{ - public: - CCountMinSketch(std::size_t rows, std::size_t columns); +class MATHS_EXPORT CCountMinSketch { +public: + CCountMinSketch(std::size_t rows, std::size_t columns); - //! Create by traversing a state document. - CCountMinSketch(core::CStateRestoreTraverser &traverser); + //! Create by traversing a state document. + CCountMinSketch(core::CStateRestoreTraverser& traverser); - //! Efficient swap the contents of two sketches. - void swap(CCountMinSketch &sketch); + //! Efficient swap the contents of two sketches. + void swap(CCountMinSketch& sketch); - private: - //! Create by traversing a state document. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); +private: + //! Create by traversing a state document. + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); - public: - //! Convert to a node tree. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; +public: + //! Convert to a node tree. + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; - //! Get the number of rows. - std::size_t rows() const; + //! Get the number of rows. + std::size_t rows() const; - //! Get the number of columns. - std::size_t columns() const; + //! Get the number of columns. + std::size_t columns() const; - //! Get the \f$\delta\f$ probability. - double delta() const; + //! Get the \f$\delta\f$ probability. + double delta() const; - //! Get the \f$P(1-\delta)\f$ error. - double oneMinusDeltaError() const; + //! Get the \f$P(1-\delta)\f$ error. + double oneMinusDeltaError() const; - //! Add a count of \p count for category \p category. - //! - //! \note \p count can be negative in which case the count is - //! removed from the sketch. - void add(uint32_t category, double count); + //! Add a count of \p count for category \p category. + //! + //! \note \p count can be negative in which case the count is + //! removed from the sketch. + void add(uint32_t category, double count); - //! Remove \p category from the sketch altogether. - //! - //! \note That one can decrement the counts by calling add with - //! a negative count. However, if we have not sketched the counts - //! this removes the map entry for \p category. - void removeFromMap(uint32_t category); + //! Remove \p category from the sketch altogether. + //! + //! \note That one can decrement the counts by calling add with + //! a negative count. However, if we have not sketched the counts + //! this removes the map entry for \p category. + void removeFromMap(uint32_t category); - //! Age the counts forwards \p time. - void age(double alpha); + //! Age the counts forwards \p time. + void age(double alpha); - //! Get the total count of all categories. - double totalCount() const; + //! Get the total count of all categories. + double totalCount() const; - //! Get the count of category \p category. - double count(uint32_t category) const; + //! Get the count of category \p category. + double count(uint32_t category) const; - //! Get the fraction of category \p category. - double fraction(uint32_t category) const; + //! Get the fraction of category \p category. + double fraction(uint32_t category) const; - //! Check if the counts are sketched. - bool sketched() const; + //! Check if the counts are sketched. + bool sketched() const; - //! Get a checksum for the sketch. - uint64_t checksum(uint64_t seed = 0) const; + //! Get a checksum for the sketch. + uint64_t checksum(uint64_t seed = 0) const; - //! Get the memory used by this sketch. - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + //! Get the memory used by this sketch. + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - //! Get the memory used by this sketch. - std::size_t memoryUsage() const; + //! Get the memory used by this sketch. + std::size_t memoryUsage() const; - private: - using TUInt32HashVec = core::CHashing::CUniversalHash::TUInt32UnrestrictedHashVec; - using TFloatVec = std::vector; - using TFloatVecVec = std::vector; +private: + using TUInt32HashVec = core::CHashing::CUniversalHash::TUInt32UnrestrictedHashVec; + using TFloatVec = std::vector; + using TFloatVecVec = std::vector; - //! Wraps up the sketch data. - struct MATHS_EXPORT SSketch - { - SSketch() = default; - SSketch(std::size_t rows, std::size_t columns); + //! Wraps up the sketch data. + struct MATHS_EXPORT SSketch { + SSketch() = default; + SSketch(std::size_t rows, std::size_t columns); - //! Create by traversing a state document. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser, - std::size_t rows, - std::size_t columns); + //! Create by traversing a state document. + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser, std::size_t rows, std::size_t columns); - //! Convert to a node tree. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; + //! Convert to a node tree. + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; - //! The hash functions. - TUInt32HashVec s_Hashes; + //! The hash functions. + TUInt32HashVec s_Hashes; - //! The counts. - TFloatVecVec s_Counts; - }; + //! The counts. + TFloatVecVec s_Counts; + }; - using TUInt32FloatPr = std::pair; - using TUInt32FloatPrVec = std::vector; - using TUInt32FloatPrVecOrSketch = boost::variant; + using TUInt32FloatPr = std::pair; + using TUInt32FloatPrVec = std::vector; + using TUInt32FloatPrVecOrSketch = boost::variant; - //! Maybe switch to sketching the counts. - void sketch(); + //! Maybe switch to sketching the counts. + void sketch(); - private: - //! The number of rows. - std::size_t m_Rows; +private: + //! The number of rows. + std::size_t m_Rows; - //! The number of columns. - std::size_t m_Columns; + //! The number of columns. + std::size_t m_Columns; - //! The total count. - CFloatStorage m_TotalCount; + //! The total count. + CFloatStorage m_TotalCount; - //! The sketch. - TUInt32FloatPrVecOrSketch m_Sketch; + //! The sketch. + TUInt32FloatPrVecOrSketch m_Sketch; }; - } } diff --git a/include/maths/CDecayRateController.h b/include/maths/CDecayRateController.h index cc4a23e502..430c89224e 100644 --- a/include/maths/CDecayRateController.h +++ b/include/maths/CDecayRateController.h @@ -7,8 +7,8 @@ #ifndef INCLUDED_ml_maths_CDecayRateController_h #define INCLUDED_ml_maths_CDecayRateController_h -#include #include +#include #include #include @@ -16,15 +16,12 @@ #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStateRestoreTraverser; class CStatePersistInserter; } -namespace maths -{ +namespace maths { //! \brief Manages the decay rate based on the data characteristics. //! @@ -43,93 +40,90 @@ namespace maths //! and prediction error is large compared to the long term prediction //! error then the system has recently undergone some state change and //! we should re-learn the model parameters as fast as possible. -class MATHS_EXPORT CDecayRateController -{ - public: - using TDouble1Vec = core::CSmallVector; - using TDouble1VecVec = std::vector; - using TMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; - using TMeanAccumulator1Vec = core::CSmallVector; +class MATHS_EXPORT CDecayRateController { +public: + using TDouble1Vec = core::CSmallVector; + using TDouble1VecVec = std::vector; + using TMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; + using TMeanAccumulator1Vec = core::CSmallVector; - //! Enumerates the type of model check we can perform. - enum EChecks - { - E_PredictionBias = 0x1, //!< Check for prediction bias. - E_PredictionErrorIncrease = 0x2, //!< Check for recent increases - //! in the prediction errors. - E_PredictionErrorDecrease = 0x4 //!< Check for recent decreases - //! in the prediction errors. - }; + //! Enumerates the type of model check we can perform. + enum EChecks { + E_PredictionBias = 0x1, //!< Check for prediction bias. + E_PredictionErrorIncrease = 0x2, //!< Check for recent increases + //! in the prediction errors. + E_PredictionErrorDecrease = 0x4 //!< Check for recent decreases + //! in the prediction errors. + }; - public: - CDecayRateController(); - CDecayRateController(int checks, std::size_t dimension); +public: + CDecayRateController(); + CDecayRateController(int checks, std::size_t dimension); - //! Reset the errors. - void reset(); + //! Reset the errors. + void reset(); - //! Restore by reading state from \p traverser. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); + //! Restore by reading state from \p traverser. + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); - //! Persist by passing state to \p inserter. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; + //! Persist by passing state to \p inserter. + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; - //! Get the decay rate multiplier to apply and update the relevant - //! prediction errors. - double multiplier(const TDouble1Vec &prediction, - const TDouble1VecVec &predictionErrors, - core_t::TTime bucketLength, - double learnRate, - double decayRate); + //! Get the decay rate multiplier to apply and update the relevant + //! prediction errors. + double multiplier(const TDouble1Vec& prediction, + const TDouble1VecVec& predictionErrors, + core_t::TTime bucketLength, + double learnRate, + double decayRate); - //! Get the current multiplier. - double multiplier() const; + //! Get the current multiplier. + double multiplier() const; - //! Get the dimension of the time series model this controls. - std::size_t dimension() const; + //! Get the dimension of the time series model this controls. + std::size_t dimension() const; - //! Debug the memory used by this controller. - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + //! Debug the memory used by this controller. + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - //! Get the memory used by this controller. - std::size_t memoryUsage() const; + //! Get the memory used by this controller. + std::size_t memoryUsage() const; - //! Get a checksum of this object. - uint64_t checksum(uint64_t seed = 0) const; + //! Get a checksum of this object. + uint64_t checksum(uint64_t seed = 0) const; - private: - //! Get the count of residuals added so far. - double count() const; +private: + //! Get the count of residuals added so far. + double count() const; - //! Get the change to apply to the decay rate multiplier. - double change(const double (&stats)[3], core_t::TTime bucketLength) const; + //! Get the change to apply to the decay rate multiplier. + double change(const double (&stats)[3], core_t::TTime bucketLength) const; - private: - //! The checks we perform to detect error conditions. - int m_Checks; +private: + //! The checks we perform to detect error conditions. + int m_Checks; - //! The current target multiplier. - double m_Target; + //! The current target multiplier. + double m_Target; - //! The cumulative multiplier applied to the decay rate. - TMeanAccumulator m_Multiplier; + //! The cumulative multiplier applied to the decay rate. + TMeanAccumulator m_Multiplier; - //! A random number generator. - CPRNG::CXorOShiro128Plus m_Rng; + //! A random number generator. + CPRNG::CXorOShiro128Plus m_Rng; - //! The mean predicted value. - TMeanAccumulator1Vec m_PredictionMean; + //! The mean predicted value. + TMeanAccumulator1Vec m_PredictionMean; - //! The mean bias in the model predictions. - TMeanAccumulator1Vec m_Bias; + //! The mean bias in the model predictions. + TMeanAccumulator1Vec m_Bias; - //! The short term absolute errors in the model predictions. - TMeanAccumulator1Vec m_RecentAbsError; + //! The short term absolute errors in the model predictions. + TMeanAccumulator1Vec m_RecentAbsError; - //! The long term absolute errors in the model predictions. - TMeanAccumulator1Vec m_HistoricalAbsError; + //! The long term absolute errors in the model predictions. + TMeanAccumulator1Vec m_HistoricalAbsError; }; - } } diff --git a/include/maths/CDecompositionComponent.h b/include/maths/CDecompositionComponent.h index c8c8de49a0..fee9136832 100644 --- a/include/maths/CDecompositionComponent.h +++ b/include/maths/CDecompositionComponent.h @@ -20,210 +20,195 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace maths -{ +namespace maths { //! \brief Common functionality used by our decomposition component classes. -class MATHS_EXPORT CDecompositionComponent -{ +class MATHS_EXPORT CDecompositionComponent { +public: + using TDoubleDoublePr = maths_t::TDoubleDoublePr; + using TDoubleVec = std::vector; + using TFloatVec = std::vector; + using TSplineCRef = CSpline, + boost::reference_wrapper, + boost::reference_wrapper>; + using TSplineRef = + CSpline, boost::reference_wrapper, boost::reference_wrapper>; + +public: + //! Persist state by passing information to \p inserter. + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Create by traversing a state document. + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); + +protected: + //! \brief A low memory representation of the value and variance splines. + class MATHS_EXPORT CPackedSplines { public: - using TDoubleDoublePr = maths_t::TDoubleDoublePr; - using TDoubleVec = std::vector; - using TFloatVec = std::vector; - using TSplineCRef = CSpline, - boost::reference_wrapper, - boost::reference_wrapper>; - using TSplineRef = CSpline, - boost::reference_wrapper, - boost::reference_wrapper>; + enum ESpline { E_Value = 0, E_Variance = 1 }; public: - //! Persist state by passing information to \p inserter. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - - //! Create by traversing a state document. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); - - protected: - //! \brief A low memory representation of the value and variance splines. - class MATHS_EXPORT CPackedSplines - { - public: - enum ESpline - { - E_Value = 0, - E_Variance = 1 - }; - - public: - using TTypeArray = boost::array; - using TFloatVecArray = boost::array; - using TDoubleVecArray = boost::array; - - public: - CPackedSplines(CSplineTypes::EType valueInterpolationType, - CSplineTypes::EType varianceInterpolationType); - - //! Create by traversing a state document. - bool acceptRestoreTraverser(CSplineTypes::EBoundaryCondition boundary, - core::CStateRestoreTraverser &traverser); - - //! Persist state by passing information to \p inserter. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - - //! An efficient swap of the contents of two packed splines. - void swap(CPackedSplines &other); - - //! Check if the splines have been initialized. - bool initialized() const; - - //! Clear the splines. - void clear(); - - //! Shift the spline values by \p shift. - void shift(ESpline spline, double shift); - - //! Get a constant spline reference. - TSplineCRef spline(ESpline spline) const; - - //! Get a writable spline reference. - TSplineRef spline(ESpline spline); - - //! Get the splines' knot points. - const TFloatVec &knots() const; - - //! Interpolate the value and variance functions on \p knots. - void interpolate(const TDoubleVec &knots, - const TDoubleVec &values, - const TDoubleVec &variances, - CSplineTypes::EBoundaryCondition boundary); - - //! Get a checksum for this object. - uint64_t checksum(uint64_t seed) const; - - //! Debug the memory used by the splines. - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - - //! Get the memory used by these splines. - std::size_t memoryUsage() const; - - private: - //! The splines' types. - TTypeArray m_Types; - //! The splines' knots. - TFloatVec m_Knots; - //! The splines' values. - TFloatVecArray m_Values; - //! The splines' curvatures. - TDoubleVecArray m_Curvatures; - }; - - protected: - //! \param[in] maxSize The maximum number of component buckets. - //! \param[in] boundaryCondition The boundary condition to use for the splines. - //! \param[in] valueInterpolationType The style of interpolation to use for - //! computing values. - //! \param[in] varianceInterpolationType The style of interpolation to use for - //! computing variances. - CDecompositionComponent(std::size_t maxSize, - CSplineTypes::EBoundaryCondition boundaryCondition, - CSplineTypes::EType valueInterpolationType, - CSplineTypes::EType varianceInterpolationType); - - //! An efficient swap of the contents of two components. - void swap(CDecompositionComponent &other); - - //! Check if the seasonal component has been estimated. - bool initialized() const; - - //! Clear all data. - void clear(); - - //! Update the interpolation of the bucket values. - //! - //! \param[in] knots The spline knot points. - //! \param[in] values The values at the spline knot points. - //! \param[in] variances The variances at the spline knot points. - void interpolate(const TDoubleVec &knots, - const TDoubleVec &values, - const TDoubleVec &variances); + using TTypeArray = boost::array; + using TFloatVecArray = boost::array; + using TDoubleVecArray = boost::array; - //! Shift the component's values by \p shift. - void shiftLevel(double shift); + public: + CPackedSplines(CSplineTypes::EType valueInterpolationType, CSplineTypes::EType varianceInterpolationType); - //! Interpolate the function at \p time. - //! - //! \param[in] offset The offset for which to get the value. - //! \param[in] n The bucket count containing \p offset. - //! \param[in] confidence The symmetric confidence interval for the variance - //! as a percentage. - TDoubleDoublePr value(double offset, double n, double confidence) const; + //! Create by traversing a state document. + bool acceptRestoreTraverser(CSplineTypes::EBoundaryCondition boundary, core::CStateRestoreTraverser& traverser); - //! Get the mean value of the function. - double meanValue() const; + //! Persist state by passing information to \p inserter. + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; - //! Get the variance of the residual about the function at \p time. - //! - //! \param[in] offset The offset for which to get the variance. - //! \param[in] n The bucket count containing \p offset. - //! \param[in] confidence The symmetric confidence interval for the - //! variance as a percentage. - TDoubleDoublePr variance(double offset, double n, double confidence) const; + //! An efficient swap of the contents of two packed splines. + void swap(CPackedSplines& other); - //! Get the mean variance of the function residuals. - double meanVariance() const; + //! Check if the splines have been initialized. + bool initialized() const; - //! Get the maximum ratio between a residual variance and the mean - //! residual variance. - double heteroscedasticity() const; + //! Clear the splines. + void clear(); - //! Get the maximum size to use for the bucketing. - std::size_t maxSize() const; + //! Shift the spline values by \p shift. + void shift(ESpline spline, double shift); - //! Get the boundary condition to use when interpolating. - CSplineTypes::EBoundaryCondition boundaryCondition() const; + //! Get a constant spline reference. + TSplineCRef spline(ESpline spline) const; - //! Get the value spline. - TSplineCRef valueSpline() const; + //! Get a writable spline reference. + TSplineRef spline(ESpline spline); - //! Get the variance spline. - TSplineCRef varianceSpline() const; + //! Get the splines' knot points. + const TFloatVec& knots() const; - //! Get the underlying splines representation. - const CPackedSplines &splines() const; + //! Interpolate the value and variance functions on \p knots. + void interpolate(const TDoubleVec& knots, + const TDoubleVec& values, + const TDoubleVec& variances, + CSplineTypes::EBoundaryCondition boundary); //! Get a checksum for this object. uint64_t checksum(uint64_t seed) const; - private: - //! The minimum permitted size for the points sketch. - static const std::size_t MIN_MAX_SIZE; - - private: - //! The maximum number of buckets to use to cover the period. - std::size_t m_MaxSize; + //! Debug the memory used by the splines. + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - //! The boundary condition to use for the splines. - CSplineTypes::EBoundaryCondition m_BoundaryCondition; + //! Get the memory used by these splines. + std::size_t memoryUsage() const; - //! The spline we fit through the function points and the function point - //! residual variances. - CPackedSplines m_Splines; - - //! The mean value in the period. - double m_MeanValue; - - //! The mean residual variance in the period. - double m_MeanVariance; + private: + //! The splines' types. + TTypeArray m_Types; + //! The splines' knots. + TFloatVec m_Knots; + //! The splines' values. + TFloatVecArray m_Values; + //! The splines' curvatures. + TDoubleVecArray m_Curvatures; + }; + +protected: + //! \param[in] maxSize The maximum number of component buckets. + //! \param[in] boundaryCondition The boundary condition to use for the splines. + //! \param[in] valueInterpolationType The style of interpolation to use for + //! computing values. + //! \param[in] varianceInterpolationType The style of interpolation to use for + //! computing variances. + CDecompositionComponent(std::size_t maxSize, + CSplineTypes::EBoundaryCondition boundaryCondition, + CSplineTypes::EType valueInterpolationType, + CSplineTypes::EType varianceInterpolationType); + + //! An efficient swap of the contents of two components. + void swap(CDecompositionComponent& other); + + //! Check if the seasonal component has been estimated. + bool initialized() const; + + //! Clear all data. + void clear(); + + //! Update the interpolation of the bucket values. + //! + //! \param[in] knots The spline knot points. + //! \param[in] values The values at the spline knot points. + //! \param[in] variances The variances at the spline knot points. + void interpolate(const TDoubleVec& knots, const TDoubleVec& values, const TDoubleVec& variances); + + //! Shift the component's values by \p shift. + void shiftLevel(double shift); + + //! Interpolate the function at \p time. + //! + //! \param[in] offset The offset for which to get the value. + //! \param[in] n The bucket count containing \p offset. + //! \param[in] confidence The symmetric confidence interval for the variance + //! as a percentage. + TDoubleDoublePr value(double offset, double n, double confidence) const; + + //! Get the mean value of the function. + double meanValue() const; + + //! Get the variance of the residual about the function at \p time. + //! + //! \param[in] offset The offset for which to get the variance. + //! \param[in] n The bucket count containing \p offset. + //! \param[in] confidence The symmetric confidence interval for the + //! variance as a percentage. + TDoubleDoublePr variance(double offset, double n, double confidence) const; + + //! Get the mean variance of the function residuals. + double meanVariance() const; + + //! Get the maximum ratio between a residual variance and the mean + //! residual variance. + double heteroscedasticity() const; + + //! Get the maximum size to use for the bucketing. + std::size_t maxSize() const; + + //! Get the boundary condition to use when interpolating. + CSplineTypes::EBoundaryCondition boundaryCondition() const; + + //! Get the value spline. + TSplineCRef valueSpline() const; + + //! Get the variance spline. + TSplineCRef varianceSpline() const; + + //! Get the underlying splines representation. + const CPackedSplines& splines() const; + + //! Get a checksum for this object. + uint64_t checksum(uint64_t seed) const; + +private: + //! The minimum permitted size for the points sketch. + static const std::size_t MIN_MAX_SIZE; + +private: + //! The maximum number of buckets to use to cover the period. + std::size_t m_MaxSize; + + //! The boundary condition to use for the splines. + CSplineTypes::EBoundaryCondition m_BoundaryCondition; + + //! The spline we fit through the function points and the function point + //! residual variances. + CPackedSplines m_Splines; + + //! The mean value in the period. + double m_MeanValue; + + //! The mean residual variance in the period. + double m_MeanVariance; }; - } } diff --git a/include/maths/CDoublePrecisionStorage.h b/include/maths/CDoublePrecisionStorage.h index c2db1301b5..380b783474 100644 --- a/include/maths/CDoublePrecisionStorage.h +++ b/include/maths/CDoublePrecisionStorage.h @@ -8,11 +8,8 @@ #include -namespace ml -{ -namespace maths -{ - +namespace ml { +namespace maths { //! \brief A wrapper around double to enable double-precision persisting //! @@ -22,38 +19,30 @@ namespace maths //! Doubles are usually persisted with single precision, but in certain //! cases this leads to an unacceptable loss of precision, for example //! when a bucket time value is stored in a double (~1e9) -class CDoublePrecisionStorage -{ - public: - CDoublePrecisionStorage() : m_Value(0) - {} - - CDoublePrecisionStorage(double v) : m_Value(v) - {} - - //! Implicit conversion to a double. - operator double () const - { - return m_Value; - } - - //! Assign from a double. - CDoublePrecisionStorage &operator=(double value) - { - m_Value = value; - return *this; - } - - //! Plus assign from double. - CDoublePrecisionStorage &operator+=(double value) - { - m_Value += value; - return *this; - } - - private: - //! The underlying value - double m_Value; +class CDoublePrecisionStorage { +public: + CDoublePrecisionStorage() : m_Value(0) {} + + CDoublePrecisionStorage(double v) : m_Value(v) {} + + //! Implicit conversion to a double. + operator double() const { return m_Value; } + + //! Assign from a double. + CDoublePrecisionStorage& operator=(double value) { + m_Value = value; + return *this; + } + + //! Plus assign from double. + CDoublePrecisionStorage& operator+=(double value) { + m_Value += value; + return *this; + } + +private: + //! The underlying value + double m_Value; }; } // maths diff --git a/include/maths/CEntropySketch.h b/include/maths/CEntropySketch.h index c26771525f..bdfd6d93cc 100644 --- a/include/maths/CEntropySketch.h +++ b/include/maths/CEntropySketch.h @@ -10,13 +10,11 @@ #include #include -#include #include +#include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { //! \brief A sketch data structure for computing the Shannon entropy of a data //! stream under the turnstile model. @@ -29,33 +27,31 @@ namespace maths //! is \f$\frac{1}{\epsilon^2} \log(T) \log(\frac{T}{\epsilon}\f$. //! //! See http://www.jmlr.org/proceedings/papers/v31/clifford13a.pdf for details. -class MATHS_EXPORT CEntropySketch -{ - public: - CEntropySketch(std::size_t k); +class MATHS_EXPORT CEntropySketch { +public: + CEntropySketch(std::size_t k); - //! Add \p category with count of \p count. - void add(std::size_t category, uint64_t count = 1); + //! Add \p category with count of \p count. + void add(std::size_t category, uint64_t count = 1); - //! Compute the entropy based on the values added so far. - double calculate() const; + //! Compute the entropy based on the values added so far. + double calculate() const; - private: - using TDoubleVec = std::vector; - using TUInt64Vec = std::vector; +private: + using TDoubleVec = std::vector; + using TUInt64Vec = std::vector; - private: - //! Generate the projection of the category counts. - void generateProjection(std::size_t category, TDoubleVec &projection); +private: + //! Generate the projection of the category counts. + void generateProjection(std::size_t category, TDoubleVec& projection); - private: - //! The overall count. - uint64_t m_Y; +private: + //! The overall count. + uint64_t m_Y; - //! The sketch count. - TDoubleVec m_Yi; + //! The sketch count. + TDoubleVec m_Yi; }; - } } diff --git a/include/maths/CEqualWithTolerance.h b/include/maths/CEqualWithTolerance.h index 787f162890..d3e8b460f2 100644 --- a/include/maths/CEqualWithTolerance.h +++ b/include/maths/CEqualWithTolerance.h @@ -13,75 +13,46 @@ #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { -namespace equal_with_tolerance_detail -{ +namespace equal_with_tolerance_detail { template -struct SNorm -{ +struct SNorm { using result_type = T; - static T dispatch(const T &t) - { - return t; - } + static T dispatch(const T& t) { return t; } }; template -struct SNorm> -{ +struct SNorm> { using result_type = T; - static T dispatch(const CVectorNx1 &t) - { - return t.euclidean(); - } + static T dispatch(const CVectorNx1& t) { return t.euclidean(); } }; template -struct SNorm> -{ +struct SNorm> { using result_type = T; - static T dispatch(const CVector &t) - { - return t.euclidean(); - } + static T dispatch(const CVector& t) { return t.euclidean(); } }; template -struct SNorm> -{ +struct SNorm> { using result_type = T; - static T dispatch(const CSymmetricMatrixNxN &t) - { - return t.frobenius(); - } + static T dispatch(const CSymmetricMatrixNxN& t) { return t.frobenius(); } }; template -struct SNorm> -{ +struct SNorm> { using result_type = T; - static T dispatch(const CSymmetricMatrix &t) - { - return t.frobenius(); - } + static T dispatch(const CSymmetricMatrix& t) { return t.frobenius(); } }; - } //! \brief The tolerance types for equal with tolerance. -class CToleranceTypes -{ - public: - enum EToleranceType - { - E_AbsoluteTolerance = 03, - E_RelativeTolerance = 06 - }; +class CToleranceTypes { +public: + enum EToleranceType { E_AbsoluteTolerance = 03, E_RelativeTolerance = 06 }; }; //! \brief Comparator that can be used for determining equality to @@ -105,75 +76,54 @@ class CToleranceTypes //! have has_multiplies and so, short of writing this functionality //! ourselves, we can't implement this. template -class CEqualWithTolerance : public std::binary_function, - public CToleranceTypes -{ - public: - CEqualWithTolerance(unsigned int toleranceType, - const T &eps) : - m_ToleranceType(toleranceType), - m_AbsoluteEps(abs(norm(eps))), - m_RelativeEps(abs(norm(eps))) - {} - - CEqualWithTolerance(unsigned int toleranceType, - const T &absoluteEps, - const T &relativeEps) : - m_ToleranceType(toleranceType), - m_AbsoluteEps(abs(norm(absoluteEps))), - m_RelativeEps(abs(norm(relativeEps))) - {} - - bool operator()(const T &lhs, const T &rhs) const - { - const T &max = norm(rhs) > norm(lhs) ? rhs : lhs; - const T &min = norm(rhs) > norm(lhs) ? lhs : rhs; - const T &maxAbs = abs(norm(rhs)) > abs(norm(lhs)) ? rhs : lhs; - - T difference = max - min; - - switch (m_ToleranceType) - { - case 2: // absolute & relative - return (norm(difference) <= m_AbsoluteEps) - && (norm(difference) <= m_RelativeEps * abs(norm(maxAbs))); - case 3: // absolute - return norm(difference) <= m_AbsoluteEps; - case 6: // relative - return norm(difference) <= m_RelativeEps * abs(norm(maxAbs)); - case 7: // absolute | relative - return (norm(difference) <= m_AbsoluteEps) - || (norm(difference) <= m_RelativeEps * abs(norm(maxAbs))); - } - LOG_ERROR("Unexpected tolerance type " << m_ToleranceType); - return false; +class CEqualWithTolerance : public std::binary_function, public CToleranceTypes { +public: + CEqualWithTolerance(unsigned int toleranceType, const T& eps) + : m_ToleranceType(toleranceType), m_AbsoluteEps(abs(norm(eps))), m_RelativeEps(abs(norm(eps))) {} + + CEqualWithTolerance(unsigned int toleranceType, const T& absoluteEps, const T& relativeEps) + : m_ToleranceType(toleranceType), m_AbsoluteEps(abs(norm(absoluteEps))), m_RelativeEps(abs(norm(relativeEps))) {} + + bool operator()(const T& lhs, const T& rhs) const { + const T& max = norm(rhs) > norm(lhs) ? rhs : lhs; + const T& min = norm(rhs) > norm(lhs) ? lhs : rhs; + const T& maxAbs = abs(norm(rhs)) > abs(norm(lhs)) ? rhs : lhs; + + T difference = max - min; + + switch (m_ToleranceType) { + case 2: // absolute & relative + return (norm(difference) <= m_AbsoluteEps) && (norm(difference) <= m_RelativeEps * abs(norm(maxAbs))); + case 3: // absolute + return norm(difference) <= m_AbsoluteEps; + case 6: // relative + return norm(difference) <= m_RelativeEps * abs(norm(maxAbs)); + case 7: // absolute | relative + return (norm(difference) <= m_AbsoluteEps) || (norm(difference) <= m_RelativeEps * abs(norm(maxAbs))); } + LOG_ERROR("Unexpected tolerance type " << m_ToleranceType); + return false; + } - private: - using TNorm = typename equal_with_tolerance_detail::SNorm::result_type; +private: + using TNorm = typename equal_with_tolerance_detail::SNorm::result_type; - private: - //! A type agnostic implementation of fabs. - template - static inline U abs(const U &x) - { - return x < U(0) ? -x : x; - } +private: + //! A type agnostic implementation of fabs. + template + static inline U abs(const U& x) { + return x < U(0) ? -x : x; + } - //! Get the norm of the specified type. - static TNorm norm(const T &t) - { - return equal_with_tolerance_detail::SNorm::dispatch(t); - } + //! Get the norm of the specified type. + static TNorm norm(const T& t) { return equal_with_tolerance_detail::SNorm::dispatch(t); } - private: - unsigned int m_ToleranceType; - TNorm m_AbsoluteEps; - TNorm m_RelativeEps; +private: + unsigned int m_ToleranceType; + TNorm m_AbsoluteEps; + TNorm m_RelativeEps; }; - } } #endif // INCLUDED_ml_maths_CEqualWithTolerance_h - diff --git a/include/maths/CExpandingWindow.h b/include/maths/CExpandingWindow.h index 31cc291f43..cd29253375 100644 --- a/include/maths/CExpandingWindow.h +++ b/include/maths/CExpandingWindow.h @@ -8,8 +8,8 @@ #define INCLUDED_ml_maths_CExpandingWindow_h #include -#include #include +#include #include #include @@ -18,16 +18,13 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace maths -{ +namespace maths { //! \brief Implements a fixed memory expanding time window. //! @@ -39,87 +36,82 @@ namespace maths //! constructor. At the point it overflows, i.e. time since the //! beginning of the window exceeds "size" x "maximum bucket length", //! it will re-initialize the bucketing and update the start time. -class MATHS_EXPORT CExpandingWindow -{ - public: - using TDoubleVec = std::vector; - using TTimeVec = std::vector; - using TTimeCRng = core::CVectorRange; - using TFloatMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; - using TFloatMeanAccumulatorVec = std::vector; - using TPredictor = std::function; +class MATHS_EXPORT CExpandingWindow { +public: + using TDoubleVec = std::vector; + using TTimeVec = std::vector; + using TTimeCRng = core::CVectorRange; + using TFloatMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; + using TFloatMeanAccumulatorVec = std::vector; + using TPredictor = std::function; - public: - CExpandingWindow(core_t::TTime bucketLength, - TTimeCRng bucketLengths, - std::size_t size, - double decayRate = 0.0); +public: + CExpandingWindow(core_t::TTime bucketLength, TTimeCRng bucketLengths, std::size_t size, double decayRate = 0.0); - //! Initialize by reading state from \p traverser. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); + //! Initialize by reading state from \p traverser. + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); - //! Persist state by passing information to \p inserter. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; + //! Persist state by passing information to \p inserter. + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; - //! Get the start time of the sketch. - core_t::TTime startTime() const; + //! Get the start time of the sketch. + core_t::TTime startTime() const; - //! Get the end time of the sketch. - core_t::TTime endTime() const; + //! Get the end time of the sketch. + core_t::TTime endTime() const; - //! Get the current bucket length. - core_t::TTime bucketLength() const; + //! Get the current bucket length. + core_t::TTime bucketLength() const; - //! Get the bucket values. - const TFloatMeanAccumulatorVec &values() const; + //! Get the bucket values. + const TFloatMeanAccumulatorVec& values() const; - //! Get the bucket values minus the values from \p trend. - TFloatMeanAccumulatorVec valuesMinusPrediction(const TPredictor &predictor) const; + //! Get the bucket values minus the values from \p trend. + TFloatMeanAccumulatorVec valuesMinusPrediction(const TPredictor& predictor) const; - //! Set the start time to \p time. - void initialize(core_t::TTime time); + //! Set the start time to \p time. + void initialize(core_t::TTime time); - //! Age the bucket values to account for \p time elapsed time. - void propagateForwardsByTime(double time); + //! Age the bucket values to account for \p time elapsed time. + void propagateForwardsByTime(double time); - //! Add \p value at \p time. - void add(core_t::TTime time, double value, double weight = 1.0); + //! Add \p value at \p time. + void add(core_t::TTime time, double value, double weight = 1.0); - //! Check if we need to compress by increasing the bucket span. - bool needToCompress(core_t::TTime time) const; + //! Check if we need to compress by increasing the bucket span. + bool needToCompress(core_t::TTime time) const; - //! Get a checksum for this object. - uint64_t checksum(uint64_t seed = 0) const; + //! Get a checksum for this object. + uint64_t checksum(uint64_t seed = 0) const; - //! Debug the memory used by this object. - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + //! Debug the memory used by this object. + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - //! Get the memory used by this object. - std::size_t memoryUsage() const; + //! Get the memory used by this object. + std::size_t memoryUsage() const; - private: - //! The rate at which the bucket values are aged. - double m_DecayRate; +private: + //! The rate at which the bucket values are aged. + double m_DecayRate; - //! The data bucketing length. - core_t::TTime m_BucketLength; + //! The data bucketing length. + core_t::TTime m_BucketLength; - //! The bucket lengths to test. - TTimeCRng m_BucketLengths; + //! The bucket lengths to test. + TTimeCRng m_BucketLengths; - //! The index in m_BucketLengths of the current bucketing interval. - std::size_t m_BucketLengthIndex; + //! The index in m_BucketLengths of the current bucketing interval. + std::size_t m_BucketLengthIndex; - //! The time of the first data point. - core_t::TTime m_StartTime; + //! The time of the first data point. + core_t::TTime m_StartTime; - //! The bucket values. - TFloatMeanAccumulatorVec m_BucketValues; + //! The bucket values. + TFloatMeanAccumulatorVec m_BucketValues; - //! The mean value time modulo the data bucketing length. - TFloatMeanAccumulator m_MeanOffset; + //! The mean value time modulo the data bucketing length. + TFloatMeanAccumulator m_MeanOffset; }; - } } diff --git a/include/maths/CGammaRateConjugate.h b/include/maths/CGammaRateConjugate.h index ef3d1356d5..86252303f6 100644 --- a/include/maths/CGammaRateConjugate.h +++ b/include/maths/CGammaRateConjugate.h @@ -15,15 +15,12 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace maths -{ +namespace maths { struct SDistributionRestoreParams; //! \brief A conjugate prior distribution for a stationary gamma variable. @@ -54,373 +51,363 @@ struct SDistributionRestoreParams; //! the data when using one-of-n composition (see COneOfNPrior) or model data with //! multiple modes when using multi-modal composition (see CMultimodalPrior). //! From a design point of view this is the composite pattern. -class MATHS_EXPORT CGammaRateConjugate : public CPrior -{ - public: - //! See core::CMemory. - static bool dynamicSizeAlwaysZero() { return true; } - - using TEqualWithTolerance = CEqualWithTolerance; - - //! Lift the overloads of addSamples into scope. - using CPrior::addSamples; - //! Lift the overloads of print into scope. - using CPrior::print; - - public: - //! \name Life-Cycle - //@{ - //! \param[in] dataType The type of data being modeled (see maths_t::EDataType - //! for details). - //! \param[in] offset The offset to apply to the data. - //! \param[in] priorShape The shape parameter of the gamma prior. - //! \param[in] priorRate The rate parameter of the gamma prior. - //! \param[in] decayRate The rate at which to revert to non-informative. - //! \param[in] offsetMargin The margin between the smallest value and the support - //! left end. - CGammaRateConjugate(maths_t::EDataType dataType, - double offset, - double priorShape, - double priorRate, - double decayRate = 0.0, - double offsetMargin = GAMMA_OFFSET_MARGIN); - - //! Construct by traversing a state document. - CGammaRateConjugate(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser, - double offsetMargin = GAMMA_OFFSET_MARGIN); - - // Default copy constructor and assignment operator work. - - //! Create an instance of a non-informative prior. - //! - //! \param[in] dataType The type of data being modeled (see maths_t::EDataType - //! for details). - //! \param[in] offset The offset to apply to the data. - //! \param[in] decayRate The rate at which to revert to the non-informative prior. - //! \param[in] offsetMargin The margin between the smallest value and the support - //! left end. - //! \return A non-informative prior. - static CGammaRateConjugate nonInformativePrior(maths_t::EDataType dataType, - double offset = 0.0, - double decayRate = 0.0, - double offsetMargin = GAMMA_OFFSET_MARGIN); - //@} - - //! \name Prior Contract - //@{ - //! Get the type of this prior. - virtual EPrior type() const; - - //! Create a copy of the prior. - //! - //! \return A pointer to a newly allocated clone of this prior. - //! \warning The caller owns the object returned. - virtual CGammaRateConjugate *clone() const; - - //! Reset the prior to non-informative. - virtual void setToNonInformative(double offset = 0.0, double decayRate = 0.0); - - //! Get the margin between the smallest value and the support left - //! end. Priors with non-negative support, automatically adjust the - //! offset if a value is seen which is smaller than offset + margin. - virtual double offsetMargin() const; - - //! Returns true. - virtual bool needsOffset() const; - - //! Reset m_Offset so the smallest sample is not within some minimum - //! offset of the support left end. Note that translating the mean of - //! a gamma affects its shape, so there is no easy adjustment of the - //! prior parameters which preserves the distribution after translation. - //! - //! This samples the current marginal likelihood and uses these samples - //! to reconstruct the prior with adjusted offset. - //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. - //! \param[in] samples The samples from which to determine the offset. - //! \param[in] weights The weights of each sample in \p samples. - //! \return The penalty to apply in model selection. - virtual double adjustOffset(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights); - - //! Get the current offset. - virtual double offset() const; - - //! Update the prior with a collection of independent samples from the - //! gamma variable. - //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. - //! \param[in] samples A collection of samples of the variable. - //! \param[in] weights The weights of each sample in \p samples. - virtual void addSamples(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights); - - //! Propagate the prior density function forwards by \p time. - //! - //! The prior distribution relaxes back to non-informative at a rate - //! controlled by the decay rate parameter (optionally supplied to the - //! constructor). - //! - //! \param[in] time The time increment to apply. - //! \note \p time must be non negative. - virtual void propagateForwardsByTime(double time); - - //! Get the support for the marginal likelihood function. - virtual TDoubleDoublePr marginalLikelihoodSupport() const; - - //! Get the mean of the marginal likelihood function. - virtual double marginalLikelihoodMean() const; - - //! Get the mode of the marginal likelihood function. - virtual double marginalLikelihoodMode(const TWeightStyleVec &weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec &weights = TWeights::UNIT) const; - - //! Get the variance of the marginal likelihood. - virtual double marginalLikelihoodVariance(const TWeightStyleVec &weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec &weights = TWeights::UNIT) const; - - //! Get the \p percentage symmetric confidence interval for the marginal - //! likelihood function, i.e. the values \f$a\f$ and \f$b\f$ such that: - //!
-        //!   \f$P([a,m]) = P([m,b]) = p / 100 / 2\f$
-        //! 
- //! - //! where \f$m\f$ is the median of the distribution and \f$p\f$ is the - //! the percentage of interest \p percentage. - //! - //! \param[in] percentage The percentage of interest. - //! \param[in] weightStyles Optional variance scale weight styles. - //! \param[in] weights Optional variance scale weights. - //! \note \p percentage should be in the range [0.0, 100.0). - virtual TDoubleDoublePr - marginalLikelihoodConfidenceInterval(double percentage, - const TWeightStyleVec &weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec &weights = TWeights::UNIT) const; - - //! Compute the log marginal likelihood function at \p samples integrating - //! over the prior density function for the gamma rate. - //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. - //! \param[in] samples A collection of samples of the variable. - //! \param[in] weights The weights of each sample in \p samples. - //! \param[out] result Filled in with the joint likelihood of \p samples. - //! \note The samples are assumed to be independent and identically - //! distributed. - virtual maths_t::EFloatingPointErrorStatus - jointLogMarginalLikelihood(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &result) const; - - //! Sample the marginal likelihood function. - //! - //! \see CPrior::sampleMarginalLikelihood() for a detailed description. - //! - //! \param[in] numberSamples The number of samples required. - //! \param[out] samples Filled in with samples from the prior. - //! \note \p numberSamples is truncated to the number of samples received. - virtual void sampleMarginalLikelihood(std::size_t numberSamples, - TDouble1Vec &samples) const; - - //! Compute minus the log of the joint c.d.f. of the marginal likelihood - //! at \p samples. - //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. - //! \param[in] samples The samples of interest. - //! \param[in] weights The weights of each sample in \p samples. For the - //! count variance scale weight style the weight is interpreted as a scale - //! of the likelihood variance. The mean and variance of a gamma are:\n - //!
-        //!   \f$\displaystyle mean = \frac{a}{b}\f$
-        //!   \f$\displaystyle variance = \frac{a}{b^2}\f$
-        //! 
- //! Here, \f$a\f$ is the shape of the likelihood function and \f$b\f$ - //! is the rate for which this is the prior. Our assumption implies:\n - //!
-        //!   \f$\displaystyle a_i' = \frac{a}{\gamma_i}\f$
-        //!   \f$\displaystyle b_i' = \frac{b}{\gamma_i}\f$
-        //! 
- //! We thus interpret the likelihood function as:\n - //!
-        //!   \f$\displaystyle f(x_i) = \frac{(b_i')^{a_i'}}{\Gamma(a_i')}x_i^{a_i'-1}e^{-b_i'x_i}\f$
-        //! 
- //! \param[out] lowerBound Filled in with \f$-\log(\prod_i{F(x_i)})\f$ - //! where \f$F(.)\f$ is the c.d.f. and \f$\{x_i\}\f$ are the samples. - //! \param[out] upperBound Equal to \p lowerBound. - //! \note The samples are assumed to be independent. - //! \warning The variance scales \f$\gamma_i\f$ must be in the range - //! \f$(0,\infty)\f$, i.e. a value of zero is not well defined and - //! a value of infinity is not well handled. (Very large values are - //! handled though.) - virtual bool minusLogJointCdf(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound) const; - - //! Compute minus the log of the one minus the joint c.d.f. of the - //! marginal likelihood at \p samples without losing precision due to - //! cancellation errors at one, i.e. the smallest non-zero value this - //! can return is the minimum double rather than epsilon. - //! - //! \see minusLogJointCdf for more details. - virtual bool minusLogJointCdfComplement(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound) const; - - //! Compute the probability of a less likely, i.e. lower likelihood, - //! collection of independent samples from the variable. - //! - //! \param[in] calculation The style of the probability calculation - //! (see CTools::EProbabilityCalculation for details). - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. - //! \param[in] samples The samples of interest. - //! \param[in] weights The weights. See minusLogJointCdf for discussion. - //! \param[out] lowerBound Filled in with the probability of the set - //! for which the joint marginal likelihood is less than that of - //! \p samples (subject to the measure \p calculation). - //! \param[out] upperBound Equal to \p lowerBound. - //! \param[out] tail The tail that (left or right) that all the - //! samples are in or neither. - //! \note The samples are assumed to be independent. - //! \warning The variance scales must be in the range \f$(0,\infty)\f$, - //! i.e. a value of zero is not well defined and a value of infinity - //! is not well handled. (Very large values are handled though.) - virtual bool probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound, - maths_t::ETail &tail) const; - - //! Check if this is a non-informative prior. - virtual bool isNonInformative() const; - - //! Get a human readable description of the prior. - //! - //! \param[in] indent The indent to use at the start of new lines. - //! \param[in,out] result Filled in with the description. - virtual void print(const std::string &indent, std::string &result) const; - - //! Print the prior density function in a specified format. - //! - //! \see CPrior::printJointDensityFunction for details. - virtual std::string printJointDensityFunction() const; - - //! Get a checksum for this object. - virtual uint64_t checksum(uint64_t seed = 0) const; - - //! Get the memory used by this component - virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - - //! Get the memory used by this component - virtual std::size_t memoryUsage() const; - - //! Get the static size of this object - used for virtual hierarchies - virtual std::size_t staticSize() const; - - //! Persist state by passing information to the supplied inserter - virtual void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - //@} - - //! Get the current estimate of the likelihood shape. - double likelihoodShape() const; - - //! The current expected rate for the variable. - double likelihoodRate() const; - - //! \name Test Functions - //@{ - //! Compute the specified percentage confidence interval for the - //! variable rate. - TDoubleDoublePr confidenceIntervalRate(double percentage) const; - - //! Check if two priors are equal to the specified tolerance. - bool equalTolerance(const CGammaRateConjugate &rhs, - const TEqualWithTolerance &equal) const; - //@} - - private: - using TMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; - using TMeanVarAccumulator = CBasicStatistics::SSampleMeanVar::TAccumulator; - - private: - //! Read parameters from \p traverser. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); - - //! Get the of the marginal likelihood. - double mean() const; - - //! Get the current posterior value of the shape parameter of the - //! prior gamma distribution. - double priorShape() const; - - //! Get the current posterior value of the rate parameter of the - //! prior gamma distribution. - double priorRate() const; - - //! Check that the state is valid. - bool isBad() const; - - //! Full debug dump of the state of this prior. - virtual std::string debug() const; - - private: - //! The shape parameter of a non-informative prior. - static const double NON_INFORMATIVE_SHAPE; - - //! The rate parameter of a non-informative prior. - static const double NON_INFORMATIVE_RATE; - - //! Since we estimate the shape by maximum likelihood we incorporate - //! error in the shape estimate as an increased variance on the rate - //! relative to the value predicted by conventional Bayesian analysis. - //! The value of this parameter is 0.23 and has been determined - //! empirically to best approximate the percentiles for the rate - //! estimate in the limit of a large number of updates. - static const double RATE_VARIANCE_SCALE; - - private: - //! We assume that the data are described by \f$X = Y - u\f$, where - //! \f$u\f$ is a constant and \f$Y\f$ is gamma distributed. This allows - //! us to model data with negative values greater than \f$-u\f$. - double m_Offset; +class MATHS_EXPORT CGammaRateConjugate : public CPrior { +public: + //! See core::CMemory. + static bool dynamicSizeAlwaysZero() { return true; } + + using TEqualWithTolerance = CEqualWithTolerance; + + //! Lift the overloads of addSamples into scope. + using CPrior::addSamples; + //! Lift the overloads of print into scope. + using CPrior::print; + +public: + //! \name Life-Cycle + //@{ + //! \param[in] dataType The type of data being modeled (see maths_t::EDataType + //! for details). + //! \param[in] offset The offset to apply to the data. + //! \param[in] priorShape The shape parameter of the gamma prior. + //! \param[in] priorRate The rate parameter of the gamma prior. + //! \param[in] decayRate The rate at which to revert to non-informative. + //! \param[in] offsetMargin The margin between the smallest value and the support + //! left end. + CGammaRateConjugate(maths_t::EDataType dataType, + double offset, + double priorShape, + double priorRate, + double decayRate = 0.0, + double offsetMargin = GAMMA_OFFSET_MARGIN); + + //! Construct by traversing a state document. + CGammaRateConjugate(const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser, + double offsetMargin = GAMMA_OFFSET_MARGIN); + + // Default copy constructor and assignment operator work. + + //! Create an instance of a non-informative prior. + //! + //! \param[in] dataType The type of data being modeled (see maths_t::EDataType + //! for details). + //! \param[in] offset The offset to apply to the data. + //! \param[in] decayRate The rate at which to revert to the non-informative prior. + //! \param[in] offsetMargin The margin between the smallest value and the support + //! left end. + //! \return A non-informative prior. + static CGammaRateConjugate nonInformativePrior(maths_t::EDataType dataType, + double offset = 0.0, + double decayRate = 0.0, + double offsetMargin = GAMMA_OFFSET_MARGIN); + //@} + + //! \name Prior Contract + //@{ + //! Get the type of this prior. + virtual EPrior type() const; + + //! Create a copy of the prior. + //! + //! \return A pointer to a newly allocated clone of this prior. + //! \warning The caller owns the object returned. + virtual CGammaRateConjugate* clone() const; + + //! Reset the prior to non-informative. + virtual void setToNonInformative(double offset = 0.0, double decayRate = 0.0); + + //! Get the margin between the smallest value and the support left + //! end. Priors with non-negative support, automatically adjust the + //! offset if a value is seen which is smaller than offset + margin. + virtual double offsetMargin() const; + + //! Returns true. + virtual bool needsOffset() const; + + //! Reset m_Offset so the smallest sample is not within some minimum + //! offset of the support left end. Note that translating the mean of + //! a gamma affects its shape, so there is no easy adjustment of the + //! prior parameters which preserves the distribution after translation. + //! + //! This samples the current marginal likelihood and uses these samples + //! to reconstruct the prior with adjusted offset. + //! + //! \param[in] weightStyles Controls the interpretation of the weight(s) + //! that are associated with each sample. See maths_t::ESampleWeightStyle + //! for more details. + //! \param[in] samples The samples from which to determine the offset. + //! \param[in] weights The weights of each sample in \p samples. + //! \return The penalty to apply in model selection. + virtual double adjustOffset(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights); + + //! Get the current offset. + virtual double offset() const; + + //! Update the prior with a collection of independent samples from the + //! gamma variable. + //! + //! \param[in] weightStyles Controls the interpretation of the weight(s) + //! that are associated with each sample. See maths_t::ESampleWeightStyle + //! for more details. + //! \param[in] samples A collection of samples of the variable. + //! \param[in] weights The weights of each sample in \p samples. + virtual void addSamples(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights); + + //! Propagate the prior density function forwards by \p time. + //! + //! The prior distribution relaxes back to non-informative at a rate + //! controlled by the decay rate parameter (optionally supplied to the + //! constructor). + //! + //! \param[in] time The time increment to apply. + //! \note \p time must be non negative. + virtual void propagateForwardsByTime(double time); + + //! Get the support for the marginal likelihood function. + virtual TDoubleDoublePr marginalLikelihoodSupport() const; + + //! Get the mean of the marginal likelihood function. + virtual double marginalLikelihoodMean() const; + + //! Get the mode of the marginal likelihood function. + virtual double marginalLikelihoodMode(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; + + //! Get the variance of the marginal likelihood. + virtual double marginalLikelihoodVariance(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; + + //! Get the \p percentage symmetric confidence interval for the marginal + //! likelihood function, i.e. the values \f$a\f$ and \f$b\f$ such that: + //!
+    //!   \f$P([a,m]) = P([m,b]) = p / 100 / 2\f$
+    //! 
+ //! + //! where \f$m\f$ is the median of the distribution and \f$p\f$ is the + //! the percentage of interest \p percentage. + //! + //! \param[in] percentage The percentage of interest. + //! \param[in] weightStyles Optional variance scale weight styles. + //! \param[in] weights Optional variance scale weights. + //! \note \p percentage should be in the range [0.0, 100.0). + virtual TDoubleDoublePr marginalLikelihoodConfidenceInterval(double percentage, + const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; + + //! Compute the log marginal likelihood function at \p samples integrating + //! over the prior density function for the gamma rate. + //! + //! \param[in] weightStyles Controls the interpretation of the weight(s) + //! that are associated with each sample. See maths_t::ESampleWeightStyle + //! for more details. + //! \param[in] samples A collection of samples of the variable. + //! \param[in] weights The weights of each sample in \p samples. + //! \param[out] result Filled in with the joint likelihood of \p samples. + //! \note The samples are assumed to be independent and identically + //! distributed. + virtual maths_t::EFloatingPointErrorStatus jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& result) const; + + //! Sample the marginal likelihood function. + //! + //! \see CPrior::sampleMarginalLikelihood() for a detailed description. + //! + //! \param[in] numberSamples The number of samples required. + //! \param[out] samples Filled in with samples from the prior. + //! \note \p numberSamples is truncated to the number of samples received. + virtual void sampleMarginalLikelihood(std::size_t numberSamples, TDouble1Vec& samples) const; + + //! Compute minus the log of the joint c.d.f. of the marginal likelihood + //! at \p samples. + //! + //! \param[in] weightStyles Controls the interpretation of the weight(s) + //! that are associated with each sample. See maths_t::ESampleWeightStyle + //! for more details. + //! \param[in] samples The samples of interest. + //! \param[in] weights The weights of each sample in \p samples. For the + //! count variance scale weight style the weight is interpreted as a scale + //! of the likelihood variance. The mean and variance of a gamma are:\n + //!
+    //!   \f$\displaystyle mean = \frac{a}{b}\f$
+    //!   \f$\displaystyle variance = \frac{a}{b^2}\f$
+    //! 
+ //! Here, \f$a\f$ is the shape of the likelihood function and \f$b\f$ + //! is the rate for which this is the prior. Our assumption implies:\n + //!
+    //!   \f$\displaystyle a_i' = \frac{a}{\gamma_i}\f$
+    //!   \f$\displaystyle b_i' = \frac{b}{\gamma_i}\f$
+    //! 
+ //! We thus interpret the likelihood function as:\n + //!
+    //!   \f$\displaystyle f(x_i) = \frac{(b_i')^{a_i'}}{\Gamma(a_i')}x_i^{a_i'-1}e^{-b_i'x_i}\f$
+    //! 
+ //! \param[out] lowerBound Filled in with \f$-\log(\prod_i{F(x_i)})\f$ + //! where \f$F(.)\f$ is the c.d.f. and \f$\{x_i\}\f$ are the samples. + //! \param[out] upperBound Equal to \p lowerBound. + //! \note The samples are assumed to be independent. + //! \warning The variance scales \f$\gamma_i\f$ must be in the range + //! \f$(0,\infty)\f$, i.e. a value of zero is not well defined and + //! a value of infinity is not well handled. (Very large values are + //! handled though.) + virtual bool minusLogJointCdf(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound) const; + + //! Compute minus the log of the one minus the joint c.d.f. of the + //! marginal likelihood at \p samples without losing precision due to + //! cancellation errors at one, i.e. the smallest non-zero value this + //! can return is the minimum double rather than epsilon. + //! + //! \see minusLogJointCdf for more details. + virtual bool minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound) const; + + //! Compute the probability of a less likely, i.e. lower likelihood, + //! collection of independent samples from the variable. + //! + //! \param[in] calculation The style of the probability calculation + //! (see CTools::EProbabilityCalculation for details). + //! \param[in] weightStyles Controls the interpretation of the weight(s) + //! that are associated with each sample. See maths_t::ESampleWeightStyle + //! for more details. + //! \param[in] samples The samples of interest. + //! \param[in] weights The weights. See minusLogJointCdf for discussion. + //! \param[out] lowerBound Filled in with the probability of the set + //! for which the joint marginal likelihood is less than that of + //! \p samples (subject to the measure \p calculation). + //! \param[out] upperBound Equal to \p lowerBound. + //! \param[out] tail The tail that (left or right) that all the + //! samples are in or neither. + //! \note The samples are assumed to be independent. + //! \warning The variance scales must be in the range \f$(0,\infty)\f$, + //! i.e. a value of zero is not well defined and a value of infinity + //! is not well handled. (Very large values are handled though.) + virtual bool probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, + const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound, + maths_t::ETail& tail) const; + + //! Check if this is a non-informative prior. + virtual bool isNonInformative() const; + + //! Get a human readable description of the prior. + //! + //! \param[in] indent The indent to use at the start of new lines. + //! \param[in,out] result Filled in with the description. + virtual void print(const std::string& indent, std::string& result) const; + + //! Print the prior density function in a specified format. + //! + //! \see CPrior::printJointDensityFunction for details. + virtual std::string printJointDensityFunction() const; + + //! Get a checksum for this object. + virtual uint64_t checksum(uint64_t seed = 0) const; + + //! Get the memory used by this component + virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + + //! Get the memory used by this component + virtual std::size_t memoryUsage() const; + + //! Get the static size of this object - used for virtual hierarchies + virtual std::size_t staticSize() const; + + //! Persist state by passing information to the supplied inserter + virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + //@} + + //! Get the current estimate of the likelihood shape. + double likelihoodShape() const; + + //! The current expected rate for the variable. + double likelihoodRate() const; + + //! \name Test Functions + //@{ + //! Compute the specified percentage confidence interval for the + //! variable rate. + TDoubleDoublePr confidenceIntervalRate(double percentage) const; + + //! Check if two priors are equal to the specified tolerance. + bool equalTolerance(const CGammaRateConjugate& rhs, const TEqualWithTolerance& equal) const; + //@} + +private: + using TMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; + using TMeanVarAccumulator = CBasicStatistics::SSampleMeanVar::TAccumulator; + +private: + //! Read parameters from \p traverser. + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); + + //! Get the of the marginal likelihood. + double mean() const; + + //! Get the current posterior value of the shape parameter of the + //! prior gamma distribution. + double priorShape() const; + + //! Get the current posterior value of the rate parameter of the + //! prior gamma distribution. + double priorRate() const; + + //! Check that the state is valid. + bool isBad() const; + + //! Full debug dump of the state of this prior. + virtual std::string debug() const; + +private: + //! The shape parameter of a non-informative prior. + static const double NON_INFORMATIVE_SHAPE; + + //! The rate parameter of a non-informative prior. + static const double NON_INFORMATIVE_RATE; + + //! Since we estimate the shape by maximum likelihood we incorporate + //! error in the shape estimate as an increased variance on the rate + //! relative to the value predicted by conventional Bayesian analysis. + //! The value of this parameter is 0.23 and has been determined + //! empirically to best approximate the percentiles for the rate + //! estimate in the limit of a large number of updates. + static const double RATE_VARIANCE_SCALE; + +private: + //! We assume that the data are described by \f$X = Y - u\f$, where + //! \f$u\f$ is a constant and \f$Y\f$ is gamma distributed. This allows + //! us to model data with negative values greater than \f$-u\f$. + double m_Offset; - //! The margin between the smallest value and the support left end. - double m_OffsetMargin; + //! The margin between the smallest value and the support left end. + double m_OffsetMargin; - //! The maximum likelihood estimate of the shape parameter. - double m_LikelihoodShape; + //! The maximum likelihood estimate of the shape parameter. + double m_LikelihoodShape; - //! The sum of the logs of the samples. - TMeanAccumulator m_LogSamplesMean; + //! The sum of the logs of the samples. + TMeanAccumulator m_LogSamplesMean; - //! The count, mean and variance of the samples. - TMeanVarAccumulator m_SampleMoments; + //! The count, mean and variance of the samples. + TMeanVarAccumulator m_SampleMoments; - //! The initial shape parameter of the prior gamma distribution. - double m_PriorShape; + //! The initial shape parameter of the prior gamma distribution. + double m_PriorShape; - //! The initial rate parameter of the prior gamma distribution. - double m_PriorRate; + //! The initial rate parameter of the prior gamma distribution. + double m_PriorRate; }; - } } diff --git a/include/maths/CGradientDescent.h b/include/maths/CGradientDescent.h index ae91281657..b29c82a67b 100644 --- a/include/maths/CGradientDescent.h +++ b/include/maths/CGradientDescent.h @@ -17,96 +17,83 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { //! \brief Implements gradient descent with momentum. //! //! DESCRIPTION\n //! \see https://en.wikipedia.org/wiki/Gradient_descent. -class MATHS_EXPORT CGradientDescent -{ - public: - using TDoubleVec = std::vector; - using TVector = CVector; - - //! \brief The interface for the function calculation. - class MATHS_EXPORT CFunction - { - public: - virtual ~CFunction(); - virtual bool operator()(const TVector &x, double &result) const = 0; - }; - - //! \brief The interface for the gradient calculation. - class MATHS_EXPORT CGradient - { - public: - virtual ~CGradient(); - virtual bool operator()(const TVector &x, TVector &result) const = 0; - }; - - //! \brief Computes the gradient using the central difference - //! method. - //! - //! DESCRIPTION:\n - //! \see https://en.wikipedia.org/wiki/Finite_difference. - class MATHS_EXPORT CEmpiricalCentralGradient : public CGradient, - private core::CNonCopyable - { - public: - CEmpiricalCentralGradient(const CFunction &f, double eps); - - virtual bool operator()(const TVector &x, TVector &result) const; - - private: - //! The shift used to get the offset points. - double m_Eps; - //! The function for which to compute the gradient. - const CFunction &m_F; - //! A placeholder for the shifted points. - mutable TVector xShiftEps; - }; +class MATHS_EXPORT CGradientDescent { +public: + using TDoubleVec = std::vector; + using TVector = CVector; + //! \brief The interface for the function calculation. + class MATHS_EXPORT CFunction { public: - CGradientDescent(double learnRate, double momentum); - - //! Set the learn rate. - void learnRate(double learnRate); - - //! Set the momentum. - void momentum(double momentum); - - //! Run gradient descent for \p n steps. - //! - //! \param[in] n The number of steps to use. - //! \param[in] x0 The starting point for the argument of the function - //! to minimize. - //! \param[in] f The function to minimize. - //! \param[in] gf The gradient oracle of the function to minimize. - //! \param[out] xBest Filled in with the minimum function value argument - //! visited. - //! \param[out] fi Filled in with the sequence of function values. - bool run(std::size_t n, - const TVector &x0, - const CFunction &f, - const CGradient &gf, - TVector &xBest, - TDoubleVec &fi); + virtual ~CFunction(); + virtual bool operator()(const TVector& x, double& result) const = 0; + }; - private: - //! The multiplier of the unit vector along the gradient. - double m_LearnRate; + //! \brief The interface for the gradient calculation. + class MATHS_EXPORT CGradient { + public: + virtual ~CGradient(); + virtual bool operator()(const TVector& x, TVector& result) const = 0; + }; + + //! \brief Computes the gradient using the central difference + //! method. + //! + //! DESCRIPTION:\n + //! \see https://en.wikipedia.org/wiki/Finite_difference. + class MATHS_EXPORT CEmpiricalCentralGradient : public CGradient, private core::CNonCopyable { + public: + CEmpiricalCentralGradient(const CFunction& f, double eps); - //! The proportion of the previous step to add. - double m_Momentum; + virtual bool operator()(const TVector& x, TVector& result) const; - //! The last step. - TVector m_PreviousStep; + private: + //! The shift used to get the offset points. + double m_Eps; + //! The function for which to compute the gradient. + const CFunction& m_F; + //! A placeholder for the shifted points. + mutable TVector xShiftEps; + }; + +public: + CGradientDescent(double learnRate, double momentum); + + //! Set the learn rate. + void learnRate(double learnRate); + + //! Set the momentum. + void momentum(double momentum); + + //! Run gradient descent for \p n steps. + //! + //! \param[in] n The number of steps to use. + //! \param[in] x0 The starting point for the argument of the function + //! to minimize. + //! \param[in] f The function to minimize. + //! \param[in] gf The gradient oracle of the function to minimize. + //! \param[out] xBest Filled in with the minimum function value argument + //! visited. + //! \param[out] fi Filled in with the sequence of function values. + bool run(std::size_t n, const TVector& x0, const CFunction& f, const CGradient& gf, TVector& xBest, TDoubleVec& fi); + +private: + //! The multiplier of the unit vector along the gradient. + double m_LearnRate; + + //! The proportion of the previous step to add. + double m_Momentum; + + //! The last step. + TVector m_PreviousStep; }; - } } diff --git a/include/maths/CGramSchmidt.h b/include/maths/CGramSchmidt.h index f3dd2f19fc..3c3068a776 100644 --- a/include/maths/CGramSchmidt.h +++ b/include/maths/CGramSchmidt.h @@ -20,283 +20,236 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { //! \brief Computes an orthonormal basis for a collection //! of vectors using the Gram-Schmidt process. //! //! DESCRIPTION:\n //! See https://en.wikipedia.org/wiki/Gram%E2%80%93Schmidt_process -class MATHS_EXPORT CGramSchmidt : private core::CNonInstantiatable -{ - public: - using TDoubleVec = std::vector; - using TDoubleVecVec = std::vector; - using TVector = CVector; - using TVectorVec = std::vector; - - public: - //! Compute an orthonormal basis for the vectors in \p x. - //! - //! \param[in,out] x The vectors from which to compute the - //! orthonormal basis. Overwritten with the orthonormal - //! basis. - static bool basis(TDoubleVecVec &x); - - //! Compute an orthonormal basis for the vectors in \p x. - //! - //! \param[in,out] x The vectors from which to compute the - //! orthonormal basis. Overwritten with the orthonormal - //! basis. - static bool basis(TVectorVec &x); - - //! Compute an orthonormal basis for the vectors in \p x. - //! - //! \param[in,out] x The vectors from which to compute the - //! orthonormal basis. Overwritten with the orthonormal - //! basis. - template - static bool basis(boost::array &x) - { - return basisImpl(x); - } +class MATHS_EXPORT CGramSchmidt : private core::CNonInstantiatable { +public: + using TDoubleVec = std::vector; + using TDoubleVecVec = std::vector; + using TVector = CVector; + using TVectorVec = std::vector; + +public: + //! Compute an orthonormal basis for the vectors in \p x. + //! + //! \param[in,out] x The vectors from which to compute the + //! orthonormal basis. Overwritten with the orthonormal + //! basis. + static bool basis(TDoubleVecVec& x); + + //! Compute an orthonormal basis for the vectors in \p x. + //! + //! \param[in,out] x The vectors from which to compute the + //! orthonormal basis. Overwritten with the orthonormal + //! basis. + static bool basis(TVectorVec& x); + + //! Compute an orthonormal basis for the vectors in \p x. + //! + //! \param[in,out] x The vectors from which to compute the + //! orthonormal basis. Overwritten with the orthonormal + //! basis. + template + static bool basis(boost::array& x) { + return basisImpl(x); + } + + //! Compute an orthonormal basis for the vectors in \p x. + //! + //! \param[in,out] x The vectors from which to compute the + //! orthonormal basis. Overwritten with the orthonormal + //! basis. + template + static bool basis(boost::array& x) { + return basisImpl(x); + } + + //! Compute an orthonormal basis for the vectors in \p x. + //! + //! \param[in,out] x The vectors from which to compute the + //! orthonormal basis. Overwritten with the orthonormal + //! basis. + template + static bool basis(std::vector>& x) { + return basisImpl(x); + } + +private: + //! The Gram-Schmidt process. + //! + //! \param[in,out] x The vectors from which to compute the + //! orthonormal basis. Overwritten with the orthonormal + //! basis. + template + static bool basisImpl(VECTORS& x) { + std::size_t i = 0u; + std::size_t current = 0u; + + for (/**/; i < x.size(); ++i) { + if (i != current) { + swap(x[current], x[i]); + } - //! Compute an orthonormal basis for the vectors in \p x. - //! - //! \param[in,out] x The vectors from which to compute the - //! orthonormal basis. Overwritten with the orthonormal - //! basis. - template - static bool basis(boost::array &x) - { - return basisImpl(x); - } + double n = norm(x[current]); + LOG_TRACE("i = " << i << ", current = " << current << ", x = " << print(x[current]) << ", norm = " << n); - //! Compute an orthonormal basis for the vectors in \p x. - //! - //! \param[in,out] x The vectors from which to compute the - //! orthonormal basis. Overwritten with the orthonormal - //! basis. - template - static bool basis(std::vector > &x) - { - return basisImpl(x); + if (n != 0.0) { + divide(x[current], n); + ++current; + ++i; + break; + } } - private: - //! The Gram-Schmidt process. - //! - //! \param[in,out] x The vectors from which to compute the - //! orthonormal basis. Overwritten with the orthonormal - //! basis. - template - static bool basisImpl(VECTORS &x) - { - std::size_t i = 0u; - std::size_t current = 0u; - - for (/**/; i < x.size(); ++i) - { - if (i != current) - { + try { + for (/**/; i < x.size(); ++i) { + if (i != current) { swap(x[current], x[i]); } + double eps = 5.0 * norm(x[current]) * std::numeric_limits::epsilon(); + + for (std::size_t j = 0u; j < i; ++j) { + minusProjection(x[current], x[j]); + } + double n = norm(x[current]); - LOG_TRACE("i = " << i - << ", current = " << current - << ", x = " << print(x[current]) - << ", norm = " << n); + LOG_TRACE("i = " << i << ", current = " << current << ", x = " << print(x[current]) << ", norm = " << n + << ", eps = " << eps); - if (n != 0.0) - { + if (std::fabs(n) > eps) { divide(x[current], n); ++current; - ++i; - break; } } - try - { - for (/**/; i < x.size(); ++i) - { - if (i != current) - { - swap(x[current], x[i]); - } - - double eps = 5.0 - * norm(x[current]) - * std::numeric_limits::epsilon(); - - for (std::size_t j = 0u; j < i; ++j) - { - minusProjection(x[current], x[j]); - } - - double n = norm(x[current]); - LOG_TRACE("i = " << i - << ", current = " << current - << ", x = " << print(x[current]) - << ", norm = " << n - << ", eps = " << eps); - - if (std::fabs(n) > eps) - { - divide(x[current], n); - ++current; - } - } - - if (current != x.size()) - { - erase(x, x.begin() + current, x.end()); - } - } - catch (const std::runtime_error &e) - { - LOG_ERROR("Failed to construct basis: " << e.what()); - return false; + if (current != x.size()) { + erase(x, x.begin() + current, x.end()); } - return true; - } - - //! Efficiently swap \p x and \p y. - static void swap(TDoubleVec &x, TDoubleVec &y); - - //! Efficiently swap \p x and \p y. - static void swap(TVector &x, TVector &y); - - //! Efficiently swap \p x and \p y. - template - static void swap(CVectorNx1 &x, CVectorNx1 &y) - { - std::swap(x, y); - } - - //! Subtract the projection of \p x onto \p e from \p x. - static const TDoubleVec &minusProjection(TDoubleVec &x, - const TDoubleVec &e); - - //! Subtract the projection of \p x onto \p e from \p x. - static const TVector &minusProjection(TVector &x, - const TVector &e); - - //! Subtract the projection of \p x onto \p e from \p x. - template - static const CVectorNx1 &minusProjection(CVectorNx1 &x, - const CVectorNx1 &e) - { - double n = e.inner(x); - return x -= n * e; - } - - //! Divide the vector \p x by the scalar \p s. - static const TDoubleVec ÷(TDoubleVec &x, double s); - - //! Divide the vector \p x by the scalar \p s. - static const TVector ÷(TVector &x, double s); - - //! Divide the vector \p x by the scalar \p s. - template - static const CVectorNx1 ÷(CVectorNx1 &x, - double s) - { - return x /= s; - } - - //! Compute the norm of the vector \p x. - static double norm(const TDoubleVec &x); - - //! Compute the norm of the vector \p x. - static double norm(const TVector &x); - - //! Compute the norm of the vector \p x. - template - static double norm(const CVectorNx1 &x) - { - return x.euclidean(); - } - - //! Compute the inner product of \p x and \p y. - static double inner(const TDoubleVec &x, const TDoubleVec &y); - - //! Compute the inner product of \p x and \p y. - static double inner(const TVector &x, const TVector &y); - - //! Compute the inner product of \p x and \p y. - template - static double inner(const CVectorNx1 &x, - const CVectorNx1 &y) - { - return x.inner(y); + } catch (const std::runtime_error& e) { + LOG_ERROR("Failed to construct basis: " << e.what()); + return false; } - - //! Check if \p x and \p y have the same dimension. - static void sameDimension(const TDoubleVec &x, - const TDoubleVec &y); - - //! Check if \p x and \p y have the same dimension. - static void sameDimension(const TVector &x, - const TVector &y); - - //! Remove [\p begin, \p end) from \p x. - template - static void erase(std::vector &x, - typename std::vector::iterator begin, - typename std::vector::iterator end) - { - x.erase(begin, end); + return true; + } + + //! Efficiently swap \p x and \p y. + static void swap(TDoubleVec& x, TDoubleVec& y); + + //! Efficiently swap \p x and \p y. + static void swap(TVector& x, TVector& y); + + //! Efficiently swap \p x and \p y. + template + static void swap(CVectorNx1& x, CVectorNx1& y) { + std::swap(x, y); + } + + //! Subtract the projection of \p x onto \p e from \p x. + static const TDoubleVec& minusProjection(TDoubleVec& x, const TDoubleVec& e); + + //! Subtract the projection of \p x onto \p e from \p x. + static const TVector& minusProjection(TVector& x, const TVector& e); + + //! Subtract the projection of \p x onto \p e from \p x. + template + static const CVectorNx1& minusProjection(CVectorNx1& x, const CVectorNx1& e) { + double n = e.inner(x); + return x -= n * e; + } + + //! Divide the vector \p x by the scalar \p s. + static const TDoubleVec& divide(TDoubleVec& x, double s); + + //! Divide the vector \p x by the scalar \p s. + static const TVector& divide(TVector& x, double s); + + //! Divide the vector \p x by the scalar \p s. + template + static const CVectorNx1& divide(CVectorNx1& x, double s) { + return x /= s; + } + + //! Compute the norm of the vector \p x. + static double norm(const TDoubleVec& x); + + //! Compute the norm of the vector \p x. + static double norm(const TVector& x); + + //! Compute the norm of the vector \p x. + template + static double norm(const CVectorNx1& x) { + return x.euclidean(); + } + + //! Compute the inner product of \p x and \p y. + static double inner(const TDoubleVec& x, const TDoubleVec& y); + + //! Compute the inner product of \p x and \p y. + static double inner(const TVector& x, const TVector& y); + + //! Compute the inner product of \p x and \p y. + template + static double inner(const CVectorNx1& x, const CVectorNx1& y) { + return x.inner(y); + } + + //! Check if \p x and \p y have the same dimension. + static void sameDimension(const TDoubleVec& x, const TDoubleVec& y); + + //! Check if \p x and \p y have the same dimension. + static void sameDimension(const TVector& x, const TVector& y); + + //! Remove [\p begin, \p end) from \p x. + template + static void erase(std::vector& x, typename std::vector::iterator begin, typename std::vector::iterator end) { + x.erase(begin, end); + } + + //! Remove [\p begin, \p end) from \p x. + template + static void erase(boost::array& /*x*/, + typename boost::array::iterator begin, + typename boost::array::iterator end) { + for (/**/; begin != end; ++begin) { + zero(*begin); } + } - //! Remove [\p begin, \p end) from \p x. - template - static void erase(boost::array &/*x*/, - typename boost::array::iterator begin, - typename boost::array::iterator end) - { - for (/**/; begin != end; ++begin) - { - zero(*begin); - } - } + //! Zero the components of \p x. + static void zero(TDoubleVec& x); - //! Zero the components of \p x. - static void zero(TDoubleVec &x); + //! Zero the components of \p x. + static void zero(TVector& x); - //! Zero the components of \p x. - static void zero(TVector &x); - - //! Zero the components of \p x. - template - static void zero(CVectorNx1 &x) - { - for (std::size_t i = 0u; i < x.size(); ++i) - { - x(i) = 0.0; - } + //! Zero the components of \p x. + template + static void zero(CVectorNx1& x) { + for (std::size_t i = 0u; i < x.size(); ++i) { + x(i) = 0.0; } + } - //! Print \p x for debug. - static std::string print(const TDoubleVec &x); + //! Print \p x for debug. + static std::string print(const TDoubleVec& x); - //! Print \p x for debug. - static std::string print(const TVector &x); + //! Print \p x for debug. + static std::string print(const TVector& x); - //! Print \p x for debug. - template - static std::string print(const CVectorNx1 &x) - { - std::ostringstream result; - result << x; - return result.str(); - } + //! Print \p x for debug. + template + static std::string print(const CVectorNx1& x) { + std::ostringstream result; + result << x; + return result.str(); + } }; - } } diff --git a/include/maths/CInformationCriteria.h b/include/maths/CInformationCriteria.h index 0538cf1eca..29897d9acd 100644 --- a/include/maths/CInformationCriteria.h +++ b/include/maths/CInformationCriteria.h @@ -19,24 +19,18 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { -namespace information_criteria_detail -{ +namespace information_criteria_detail { //! \brief Defines the sample covariance accumulator. template -struct SSampleCovariances -{ -}; +struct SSampleCovariances {}; //! \brief Defines the sample covariance accumulator for a CVectorNx1. template -struct SSampleCovariances> -{ +struct SSampleCovariances> { using Type = CBasicStatistics::SSampleCovariances; }; @@ -47,9 +41,9 @@ struct SSampleCovariances> MATHS_EXPORT double confidence(double df); -#define LOG_DETERMINANT(N) \ -MATHS_EXPORT \ -double logDeterminant(const CSymmetricMatrixNxN &c, double upper) +#define LOG_DETERMINANT(N) \ + MATHS_EXPORT \ + double logDeterminant(const CSymmetricMatrixNxN& c, double upper) LOG_DETERMINANT(2); LOG_DETERMINANT(3); LOG_DETERMINANT(4); @@ -57,19 +51,15 @@ LOG_DETERMINANT(5); #undef LOG_DETERMINANT //! The log determinant of our internal heap symmetric matrix. -double logDeterminant(const CSymmetricMatrix &c, double upper); +double logDeterminant(const CSymmetricMatrix& c, double upper); //! The log determinant of an Eigen matrix. -double logDeterminant(const CDenseMatrix &c, double upper); +double logDeterminant(const CDenseMatrix& c, double upper); } // information_criteria_detail:: //! Enumeration of different types of information criterion supported. -enum EInfoCriterionType -{ - E_AICc, - E_BIC -}; +enum EInfoCriterionType { E_AICc, E_BIC }; //! \brief Computes the information content of a collection of point //! clouds under the assumption that they are distributed as a weighted @@ -108,127 +98,87 @@ enum EInfoCriterionType //! See also http://en.wikipedia.org/wiki/Bayesian_information_criterion //! and http://en.wikipedia.org/wiki/Akaike_information_criterion. template -class CSphericalGaussianInfoCriterion -{ - public: - using TPointVec = std::vector; - using TPointVecVec = std::vector; - using TBarePoint = typename SStripped::Type; - using TBarePointPrecise = typename SFloatingPoint::Type; - using TCoordinate = typename SCoordinate::Type; - using TMeanVarAccumulator = typename CBasicStatistics::SSampleMeanVar::TAccumulator; - - public: - CSphericalGaussianInfoCriterion() : - m_D(0.0), - m_K(0.0), - m_N(0.0), - m_Likelihood(0.0) - {} - explicit CSphericalGaussianInfoCriterion(const TPointVecVec &x) : - m_D(0.0), - m_K(0.0), - m_N(0.0), - m_Likelihood(0.0) - { - this->add(x); - } - explicit CSphericalGaussianInfoCriterion(const TPointVec &x) : - m_D(0.0), - m_K(0.0), - m_N(0.0), - m_Likelihood(0.0) - { - this->add(x); +class CSphericalGaussianInfoCriterion { +public: + using TPointVec = std::vector; + using TPointVecVec = std::vector; + using TBarePoint = typename SStripped::Type; + using TBarePointPrecise = typename SFloatingPoint::Type; + using TCoordinate = typename SCoordinate::Type; + using TMeanVarAccumulator = typename CBasicStatistics::SSampleMeanVar::TAccumulator; + +public: + CSphericalGaussianInfoCriterion() : m_D(0.0), m_K(0.0), m_N(0.0), m_Likelihood(0.0) {} + explicit CSphericalGaussianInfoCriterion(const TPointVecVec& x) : m_D(0.0), m_K(0.0), m_N(0.0), m_Likelihood(0.0) { this->add(x); } + explicit CSphericalGaussianInfoCriterion(const TPointVec& x) : m_D(0.0), m_K(0.0), m_N(0.0), m_Likelihood(0.0) { this->add(x); } + + //! Update the sufficient statistics for computing info content. + void add(const TPointVecVec& x) { + for (std::size_t i = 0u; i < x.size(); ++i) { + this->add(x[i]); } + } - //! Update the sufficient statistics for computing info content. - void add(const TPointVecVec &x) - { - for (std::size_t i = 0u; i < x.size(); ++i) - { - this->add(x[i]); - } + //! Update the sufficient statistics for computing info content. + void add(const TPointVec& x) { + if (x.empty()) { + return; } - //! Update the sufficient statistics for computing info content. - void add(const TPointVec &x) - { - if (x.empty()) - { - return; - } - - TMeanVarAccumulator moments; - moments.add(x); - this->add(moments); + TMeanVarAccumulator moments; + moments.add(x); + this->add(moments); + } + + //! Update the sufficient statistics for computing info content. + void add(const TMeanVarAccumulator& moments) { + double ni = CBasicStatistics::count(moments); + const TBarePointPrecise& m = CBasicStatistics::mean(moments); + const TBarePointPrecise& c = CBasicStatistics::maximumLikelihoodVariance(moments); + std::size_t d = c.dimension(); + double vi = 0.0; + for (std::size_t i = 0u; i < d; ++i) { + vi += c(i); } - - //! Update the sufficient statistics for computing info content. - void add(const TMeanVarAccumulator &moments) - { - double ni = CBasicStatistics::count(moments); - const TBarePointPrecise &m = CBasicStatistics::mean(moments); - const TBarePointPrecise &c = CBasicStatistics::maximumLikelihoodVariance(moments); - std::size_t d = c.dimension(); - double vi = 0.0; - for (std::size_t i = 0u; i < d; ++i) - { - vi += c(i); - } - vi = std::max(vi, 10.0 * std::numeric_limits::epsilon() - * m.euclidean()); - - m_D = static_cast(c.dimension()); - m_K += 1.0; - m_N += ni; - if (ni > 1.0) - { - double upper = information_criteria_detail::confidence(ni - 1.0); - m_Likelihood += ni * log(ni) - - 0.5 * m_D * ni * ( 1.0 - + core::constants::LOG_TWO_PI - + std::log(upper * vi / m_D)); - } - else - { - m_Likelihood += ni * log(ni) - - 0.5 * m_D * ni * ( 1.0 - + core::constants::LOG_TWO_PI - + core::constants::LOG_MAX_DOUBLE); - } + vi = std::max(vi, 10.0 * std::numeric_limits::epsilon() * m.euclidean()); + + m_D = static_cast(c.dimension()); + m_K += 1.0; + m_N += ni; + if (ni > 1.0) { + double upper = information_criteria_detail::confidence(ni - 1.0); + m_Likelihood += ni * log(ni) - 0.5 * m_D * ni * (1.0 + core::constants::LOG_TWO_PI + std::log(upper * vi / m_D)); + } else { + m_Likelihood += ni * log(ni) - 0.5 * m_D * ni * (1.0 + core::constants::LOG_TWO_PI + core::constants::LOG_MAX_DOUBLE); } + } - //! Calculate the information content of the clusters added so far. - double calculate() const - { - if (m_N == 0.0) - { - return 0.0; - } - - double logN = std::log(m_N); - double p = (m_D * m_K + 2.0 * m_K - 1.0); - switch (TYPE) - { - case E_BIC: - return -2.0 * (m_Likelihood - m_N * logN) + p * logN; - case E_AICc: - return -2.0 * (m_Likelihood - m_N * logN) - + 2.0 * p + p * (p + 1.0) / (m_N - p - 1.0); - } + //! Calculate the information content of the clusters added so far. + double calculate() const { + if (m_N == 0.0) { return 0.0; } - private: - //! The point dimension. - double m_D; - //! The number of clusters. - double m_K; - //! The number of points. - double m_N; - //! The data likelihood for the k spherically symmetric Gaussians. - double m_Likelihood; + double logN = std::log(m_N); + double p = (m_D * m_K + 2.0 * m_K - 1.0); + switch (TYPE) { + case E_BIC: + return -2.0 * (m_Likelihood - m_N * logN) + p * logN; + case E_AICc: + return -2.0 * (m_Likelihood - m_N * logN) + 2.0 * p + p * (p + 1.0) / (m_N - p - 1.0); + } + return 0.0; + } + +private: + //! The point dimension. + double m_D; + //! The number of clusters. + double m_K; + //! The number of points. + double m_N; + //! The data likelihood for the k spherically symmetric Gaussians. + double m_Likelihood; }; //! \brief Computes the information content of a collection of point @@ -240,119 +190,86 @@ class CSphericalGaussianInfoCriterion //! it is assumed to have \f$frac{D(D+1)}{2}\f$ parameters. For more //! details on the information criteria see CSphericalGaussianInfoCriterion. template -class CGaussianInfoCriterion -{ - public: - using TPointVec = std::vector; - using TPointVecVec = std::vector; - using TBarePoint = typename SStripped::Type; - using TBarePointPrecise = typename SFloatingPoint::Type; - using TCoordinate = typename SCoordinate::Type; - using TCovariances = typename information_criteria_detail::SSampleCovariances::Type; - using TMatrix = typename SConformableMatrix::Type; - - public: - CGaussianInfoCriterion() : - m_D(0.0), - m_K(0.0), - m_N(0.0), - m_Likelihood(0.0) - {} - explicit CGaussianInfoCriterion(const TPointVecVec &x) : - m_D(0.0), - m_K(0.0), - m_N(0.0), - m_Likelihood(0.0) - { - this->add(x); - } - explicit CGaussianInfoCriterion(const TPointVec &x) : - m_D(0.0), - m_K(0.0), - m_N(0.0), - m_Likelihood(0.0) - { - this->add(x); +class CGaussianInfoCriterion { +public: + using TPointVec = std::vector; + using TPointVecVec = std::vector; + using TBarePoint = typename SStripped::Type; + using TBarePointPrecise = typename SFloatingPoint::Type; + using TCoordinate = typename SCoordinate::Type; + using TCovariances = typename information_criteria_detail::SSampleCovariances::Type; + using TMatrix = typename SConformableMatrix::Type; + +public: + CGaussianInfoCriterion() : m_D(0.0), m_K(0.0), m_N(0.0), m_Likelihood(0.0) {} + explicit CGaussianInfoCriterion(const TPointVecVec& x) : m_D(0.0), m_K(0.0), m_N(0.0), m_Likelihood(0.0) { this->add(x); } + explicit CGaussianInfoCriterion(const TPointVec& x) : m_D(0.0), m_K(0.0), m_N(0.0), m_Likelihood(0.0) { this->add(x); } + + //! Update the sufficient statistics for computing info content. + void add(const TPointVecVec& x) { + for (std::size_t i = 0u; i < x.size(); ++i) { + this->add(x[i]); } + } - //! Update the sufficient statistics for computing info content. - void add(const TPointVecVec &x) - { - for (std::size_t i = 0u; i < x.size(); ++i) - { - this->add(x[i]); - } + //! Update the sufficient statistics for computing info content. + void add(const TPointVec& x) { + if (x.empty()) { + return; } - //! Update the sufficient statistics for computing info content. - void add(const TPointVec &x) - { - if (x.empty()) - { - return; - } - - TCovariances covariances; - covariances.add(x); - this->add(covariances); - } - - //! Update the sufficient statistics for computing info content. - void add(const TCovariances &covariance) - { - double ni = CBasicStatistics::count(covariance); - m_D = static_cast(CBasicStatistics::mean(covariance).dimension()); - m_K += 1.0; - m_N += ni; - m_Likelihood += ni * log(ni) - - 0.5 * ni * ( m_D - + m_D * core::constants::LOG_TWO_PI - + (ni <= m_D + 1.0 ? core::constants::LOG_MAX_DOUBLE : - this->logDeterminant(covariance))); - } - - //! Calculate the information content of the clusters added so far. - double calculate() const - { - if (m_N == 0.0) - { - return 0.0; - } - - double logN = std::log(m_N); - double p = (m_D * (1.0 + 0.5 * (m_D + 1.0)) * m_K + m_K - 1.0); - switch (TYPE) - { - case E_BIC: - return -2.0 * (m_Likelihood - m_N * logN) + p * logN; - case E_AICc: - return -2.0 * (m_Likelihood - m_N * logN) - + 2.0 * p + p * (p + 1.0) / (m_N - p - 1.0); - } + TCovariances covariances; + covariances.add(x); + this->add(covariances); + } + + //! Update the sufficient statistics for computing info content. + void add(const TCovariances& covariance) { + double ni = CBasicStatistics::count(covariance); + m_D = static_cast(CBasicStatistics::mean(covariance).dimension()); + m_K += 1.0; + m_N += ni; + m_Likelihood += ni * log(ni) - 0.5 * ni * + (m_D + m_D * core::constants::LOG_TWO_PI + + (ni <= m_D + 1.0 ? core::constants::LOG_MAX_DOUBLE : this->logDeterminant(covariance))); + } + + //! Calculate the information content of the clusters added so far. + double calculate() const { + if (m_N == 0.0) { return 0.0; } - private: - //! Compute the log of the determinant of \p covariance. - double logDeterminant(const TCovariances &covariance) const - { - double n = CBasicStatistics::count(covariance); - const TMatrix &c = CBasicStatistics::maximumLikelihoodCovariances(covariance); - double upper = information_criteria_detail::confidence(n - m_D - 1.0); - return information_criteria_detail::logDeterminant(c, upper); + double logN = std::log(m_N); + double p = (m_D * (1.0 + 0.5 * (m_D + 1.0)) * m_K + m_K - 1.0); + switch (TYPE) { + case E_BIC: + return -2.0 * (m_Likelihood - m_N * logN) + p * logN; + case E_AICc: + return -2.0 * (m_Likelihood - m_N * logN) + 2.0 * p + p * (p + 1.0) / (m_N - p - 1.0); } - - private: - //! The point dimension. - double m_D; - //! The number of clusters. - double m_K; - //! The number of points. - double m_N; - //! The data likelihood for the k Gaussians. - double m_Likelihood; + return 0.0; + } + +private: + //! Compute the log of the determinant of \p covariance. + double logDeterminant(const TCovariances& covariance) const { + double n = CBasicStatistics::count(covariance); + const TMatrix& c = CBasicStatistics::maximumLikelihoodCovariances(covariance); + double upper = information_criteria_detail::confidence(n - m_D - 1.0); + return information_criteria_detail::logDeterminant(c, upper); + } + +private: + //! The point dimension. + double m_D; + //! The number of clusters. + double m_K; + //! The number of points. + double m_N; + //! The data likelihood for the k Gaussians. + double m_Likelihood; }; - } } diff --git a/include/maths/CIntegerTools.h b/include/maths/CIntegerTools.h index e85434ea37..7eab04f979 100644 --- a/include/maths/CIntegerTools.h +++ b/include/maths/CIntegerTools.h @@ -14,11 +14,8 @@ #include - -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { //! \brief A collection of utility functions for operations we do //! integers. @@ -27,129 +24,113 @@ namespace maths //! This implements common integer operations: checking alignment, //! rounding and so on. Also any integer operations we sometimes //! need that can be done cheaply some "bit twiddling hack". -class MATHS_EXPORT CIntegerTools -{ - public: - //! Checks whether a double holds an an integer. - static bool isInteger(double value, double tolerance = 0.0); - - //! Get the next larger power of 2, i.e. - //!
-        //!   \f$p = \left \lceil \log_2(x) \right \rceil\f$
-        //! 
- static std::size_t nextPow2(uint64_t x); - - //! Computes the integer with the reverse of the bits of the binary - //! representation of \p x. - static uint64_t reverseBits(uint64_t x); - - //! Check if \p value is \p alignment aligned. - template - static inline bool aligned(INT_TYPE value, INT_TYPE alignment) - { - return (value % alignment) == static_cast(0); +class MATHS_EXPORT CIntegerTools { +public: + //! Checks whether a double holds an an integer. + static bool isInteger(double value, double tolerance = 0.0); + + //! Get the next larger power of 2, i.e. + //!
+    //!   \f$p = \left \lceil \log_2(x) \right \rceil\f$
+    //! 
+ static std::size_t nextPow2(uint64_t x); + + //! Computes the integer with the reverse of the bits of the binary + //! representation of \p x. + static uint64_t reverseBits(uint64_t x); + + //! Check if \p value is \p alignment aligned. + template + static inline bool aligned(INT_TYPE value, INT_TYPE alignment) { + return (value % alignment) == static_cast(0); + } + + //! Align \p value to \p alignment rounding up. + //! + //! \param[in] value The value to align to a multiple of \p alignment. + //! \param[in] alignment The alignment. + //! \note It is assumed that \p value and \p alignment are integral types. + template + static inline INT_TYPE ceil(INT_TYPE value, INT_TYPE alignment) { + INT_TYPE result = CIntegerTools::floor(value, alignment); + if (result != value) { + result += alignment; } - - //! Align \p value to \p alignment rounding up. - //! - //! \param[in] value The value to align to a multiple of \p alignment. - //! \param[in] alignment The alignment. - //! \note It is assumed that \p value and \p alignment are integral types. - template - static inline INT_TYPE ceil(INT_TYPE value, INT_TYPE alignment) - { - INT_TYPE result = CIntegerTools::floor(value, alignment); - if (result != value) - { - result += alignment; - } - return result; + return result; + } + + //! Align \p value to \p alignment rounding down. + //! + //! \param[in] value The value to align to a multiple of \p alignment. + //! \param[in] alignment The alignment. + //! \note It is assumed that \p value and \p alignment are integral types. + template + static inline INT_TYPE floor(INT_TYPE value, INT_TYPE alignment) { + INT_TYPE result = (value / alignment) * alignment; + return result == value ? result : (value < 0 ? result - alignment : result); + } + + //! Get the largest value smaller than \p value which is an integer + //! multiple of \p alignment. + //! + //! \param[in] value The value for which to compute the infimum. + //! \param[in] alignment The alignment. + //! \note It is assumed that \p value and \p alignment are integral types. + template + static inline INT_TYPE strictInfimum(INT_TYPE value, INT_TYPE alignment) { + INT_TYPE result = floor(value, alignment); + + // Since this is a strict lower bound we need to trap the case the + // value is an exact multiple of the alignment. + if (result == value) { + result -= alignment; } - //! Align \p value to \p alignment rounding down. - //! - //! \param[in] value The value to align to a multiple of \p alignment. - //! \param[in] alignment The alignment. - //! \note It is assumed that \p value and \p alignment are integral types. - template - static inline INT_TYPE floor(INT_TYPE value, INT_TYPE alignment) - { - INT_TYPE result = (value / alignment) * alignment; - return result == value ? - result : - (value < 0 ? result - alignment : result); + return result; + } + + //! Compute the greatest common divisor of \p a and \p b. + //! + //! Implements Euclid's algorithm for finding the greatest common divisor + //! of two integers. + //! + //! \note The tail recursion will be optimized away. + template + static INT_TYPE gcd(INT_TYPE a, INT_TYPE b) { + if (a < b) { + std::swap(a, b); } - - //! Get the largest value smaller than \p value which is an integer - //! multiple of \p alignment. - //! - //! \param[in] value The value for which to compute the infimum. - //! \param[in] alignment The alignment. - //! \note It is assumed that \p value and \p alignment are integral types. - template - static inline INT_TYPE strictInfimum(INT_TYPE value, INT_TYPE alignment) - { - INT_TYPE result = floor(value, alignment); - - // Since this is a strict lower bound we need to trap the case the - // value is an exact multiple of the alignment. - if (result == value) - { - result -= alignment; - } - - return result; + return b == 0 ? a : gcd(b, a % b); + } + + //! Compute the greatest common divisor of all integers in \p c. + //! + //! This uses the property: + //!
+    //!   \f$gcd(a, b, c) = gcd(gcd(a, b), c)\f$
+    //! 
+ //! to extend Euclid's algorithm to a collection of integers. + template + static INT_TYPE gcd(std::vector c) { + if (c.empty()) { + return INT_TYPE(1); } - - //! Compute the greatest common divisor of \p a and \p b. - //! - //! Implements Euclid's algorithm for finding the greatest common divisor - //! of two integers. - //! - //! \note The tail recursion will be optimized away. - template - static INT_TYPE gcd(INT_TYPE a, INT_TYPE b) - { - if (a < b) - { - std::swap(a, b); - } - return b == 0 ? a : gcd(b, a % b); + if (c.size() == 1) { + return c[0]; } - //! Compute the greatest common divisor of all integers in \p c. - //! - //! This uses the property: - //!
-        //!   \f$gcd(a, b, c) = gcd(gcd(a, b), c)\f$
-        //! 
- //! to extend Euclid's algorithm to a collection of integers. - template - static INT_TYPE gcd(std::vector c) - { - if (c.empty()) - { - return INT_TYPE(1); - } - if (c.size() == 1) - { - return c[0]; - } - - // Repeatedly apply Euclid's algorithm and use the fact that - // gcd(a, b, c) = gcd(gcd(a, b), c). - INT_TYPE result = gcd(c[0], c[1]); - for (std::size_t i = 2; i < c.size(); ++i) - { - result = gcd(result, c[i]); - } - return result; + // Repeatedly apply Euclid's algorithm and use the fact that + // gcd(a, b, c) = gcd(gcd(a, b), c). + INT_TYPE result = gcd(c[0], c[1]); + for (std::size_t i = 2; i < c.size(); ++i) { + result = gcd(result, c[i]); } + return result; + } - //! Compute the binomial coefficient \f$\frac{n!}{k!(n-k)!}\f$. - static double binomial(unsigned int n, unsigned int k); + //! Compute the binomial coefficient \f$\frac{n!}{k!(n-k)!}\f$. + static double binomial(unsigned int n, unsigned int k); }; - } } #endif // INCLUDED_ml_maths_CIntegerTools_h diff --git a/include/maths/CIntegration.h b/include/maths/CIntegration.h index 3faa253f44..870718e954 100644 --- a/include/maths/CIntegration.h +++ b/include/maths/CIntegration.h @@ -10,24 +10,22 @@ #include #include #include -#include #include #include +#include #include #include #include -#include #include +#include #include #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { //! \brief Place holder for numerical integration schemes. //! @@ -47,691 +45,599 @@ namespace maths //! where the second argument is filled in with the value of the function at the //! first argument and a return value of false means that the function could not //! be evaluated. -class MATHS_EXPORT CIntegration -{ - public: - using TDoubleVec = std::vector; - using TDoubleDoublePr = std::pair; - using TDoubleDoublePrVec = std::vector; - using TLogFunc = std::function; +class MATHS_EXPORT CIntegration { +public: + using TDoubleVec = std::vector; + using TDoubleDoublePr = std::pair; + using TDoubleDoublePrVec = std::vector; + using TLogFunc = std::function; + +public: + //! Enumeration of order of quadrature. + enum EOrder { + OrderOne = 1, + OrderTwo = 2, + OrderThree = 3, + OrderFour = 4, + OrderFive = 5, + OrderSix = 6, + OrderSeven = 7, + OrderEight = 8, + OrderNine = 9, + OrderTen = 10 + }; + + //! Enumerations of the dimension of multivariate integrals we support + //! using sparse quadrature. + //! + //! \note The limit is imposed by curse of dimensionality. If we need + //! more dimensions we'll have to implement a quasi Monte-Carlo method. + enum EDimension { + OneDimension = 1, + TwoDimensions = 2, + ThreeDimensions = 3, + FourDimensions = 4, + FiveDimensions = 5, + SixDimensions = 6, + SevenDimensions = 7, + EightDimensions = 8, + NineDimensions = 9, + TenDimensions = 10 + }; + +public: + //! Gauss-Legendre quadrature. + //! + //! This implements Gauss-Legendre numerical integration of the function + //! f(x) on the interval [a, b]. + //! + //! \param[in] function The function to integrate. + //! \param[in] a The start of the integration interval. + //! \param[in] b The end of the integration interval. + //! \param[out] result Filled with the integral of \p function over [\p a, \p b]. + //! + //! \tparam ORDER The order of quadrature to use. + //! \tparam F It is assumed that this has the signature: + //! bool function(double x, T &f) + //! where f is filled in with the value of the function at x and returning + //! false means that the function could not be evaluated at x. + //! \tparam T The type of range of \p f. This must have a meaningful default + //! constructor, support multiplication by a double and addition. + template + static bool gaussLegendre(const F& function, double a, double b, T& result) { + result = T(); + + const double* weights = CGaussLegendreQuadrature::weights(ORDER); + const double* abscissas = CGaussLegendreQuadrature::abscissas(ORDER); + + // Evaluate f(x) at the abscissas and compute the weighted sum + // of the quadrature. + double centre = (a + b) / 2.0; + double range = (b - a) / 2.0; + for (unsigned int i = 0; i < ORDER; ++i) { + T fx; + if (!function(centre + range * abscissas[i], fx)) { + return false; + } + fx *= weights[i]; + result += fx; + } + result *= range; + + return true; + } + + //! Gauss-Legendre quadrature of the product of two functions. + //! + //! This implements Gauss-Legendre numerical integration of the function + //! f(x), g(x) and f(x) * g(x) on the interval [a, b] which can be computed + //! simultaneously with only a total of ORDER function evaluations of both. + //! + //! \param[in] f A function to integrate. + //! \param[in] g Another function to integrate. + //! \param[in] a The start of the integration interval. + //! \param[in] b The end of the integration interval. + //! \param[out] productIntegral Filled with the integral of \p f * \p g + //! over [\p a, \p b]. + //! \param[out] fIntegral Filled with the integral of \p f over [\p a, \p b]. + //! \param[out] gIntegral Filled with the integral of \p g over [\p a, \p b]. + //! + //! \tparam ORDER The order of quadrature to use. + //! \tparam F It is assumed that this has the signature: + //! bool function(double x, T &f) + //! where f is filled in with the value of the function at x and returning + //! false means that the function could not be evaluated at x. + //! \tparam G It is assumed that this has the signature: + //! bool function(double x, T &f) + //! where f is filled in with the value of the function at x and returning + //! false means that the function could not be evaluated at x. + //! \tparam U The type of range of \p f. This must have a meaningful default + //! constructor, support multiplication by a double and addition. + //! \tparam V The type of range of \p g. This must have a meaningful default + //! constructor, support multiplication by a double and addition. + template + static bool productGaussLegendre(const F& f, const G& g, double a, double b, U& productIntegral, U& fIntegral, V& gIntegral) { + productIntegral = U(); + fIntegral = U(); + gIntegral = V(); + + const double* weights = CGaussLegendreQuadrature::weights(ORDER); + const double* abscissas = CGaussLegendreQuadrature::abscissas(ORDER); + + // Evaluate f(x) at the abscissas and compute the weighted sum + // of the quadrature. + double centre = (a + b) / 2.0; + double range = (b - a) / 2.0; + for (unsigned int i = 0; i < ORDER; ++i) { + U fx; + V gx; + if (!f(centre + range * abscissas[i], fx) || !g(centre + range * abscissas[i], gx)) { + return false; + } + double weight = weights[i]; + productIntegral += fx * gx * weight; + fIntegral += fx * weight; + gIntegral += gx * weight; + } - public: - //! Enumeration of order of quadrature. - enum EOrder - { - OrderOne = 1, - OrderTwo = 2, - OrderThree = 3, - OrderFour = 4, - OrderFive = 5, - OrderSix = 6, - OrderSeven = 7, - OrderEight = 8, - OrderNine = 9, - OrderTen = 10 - }; - - //! Enumerations of the dimension of multivariate integrals we support - //! using sparse quadrature. - //! - //! \note The limit is imposed by curse of dimensionality. If we need - //! more dimensions we'll have to implement a quasi Monte-Carlo method. - enum EDimension - { - OneDimension = 1, - TwoDimensions = 2, - ThreeDimensions = 3, - FourDimensions = 4, - FiveDimensions = 5, - SixDimensions = 6, - SevenDimensions = 7, - EightDimensions = 8, - NineDimensions = 9, - TenDimensions = 10 - }; + productIntegral *= range; + fIntegral *= range; + gIntegral *= range; + + return true; + } + + //! Gauss-Legendre quadrature using logarithms. + //! + //! This implements Gauss-Legendre numerical integration of the function + //! f(x) on the interval [a, b] where log(f(x)) is computed and the log + //! of the integral is returned. This is intended to support integration + //! where the integrand is very small and may underflow double. This is + //! handled by renormalizing the integral so only underflows if values + //! are less than std::exp(-std::numeric_limits::max()), so really + //! very small! + //! + //! \param[in] function The log of the function to integrate. + //! \param[in] a The start of the integration interval. + //! \param[in] b The end of the integration interval. + //! \param[out] result Filled with the log of the integral of \p function + //! over [\p a, \p b]. + //! + //! \tparam ORDER The order of quadrature to use. + //! \tparam F It is assumed that this has the signature: + //! bool function(double x, double &f) + //! where f is filled in with the value of the function at x and returning + //! false means that the function could not be evaluated at x. + template + static bool logGaussLegendre(const F& function, double a, double b, double& result) { + result = 0.0; + + if (b <= a) { + std::swap(a, b); + } - public: - //! Gauss-Legendre quadrature. - //! - //! This implements Gauss-Legendre numerical integration of the function - //! f(x) on the interval [a, b]. - //! - //! \param[in] function The function to integrate. - //! \param[in] a The start of the integration interval. - //! \param[in] b The end of the integration interval. - //! \param[out] result Filled with the integral of \p function over [\p a, \p b]. - //! - //! \tparam ORDER The order of quadrature to use. - //! \tparam F It is assumed that this has the signature: - //! bool function(double x, T &f) - //! where f is filled in with the value of the function at x and returning - //! false means that the function could not be evaluated at x. - //! \tparam T The type of range of \p f. This must have a meaningful default - //! constructor, support multiplication by a double and addition. - template - static bool gaussLegendre(const F &function, - double a, - double b, - T &result) - { - result = T(); - - const double *weights = CGaussLegendreQuadrature::weights(ORDER); - const double *abscissas = CGaussLegendreQuadrature::abscissas(ORDER); - - // Evaluate f(x) at the abscissas and compute the weighted sum - // of the quadrature. - double centre = (a + b) / 2.0; - double range = (b - a) / 2.0; - for (unsigned int i = 0; i < ORDER; ++i) - { - T fx; - if (!function(centre + range * abscissas[i], fx)) - { - return false; - } - fx *= weights[i]; - result += fx; + const double* weights = CGaussLegendreQuadrature::weights(ORDER); + const double* abscissas = CGaussLegendreQuadrature::abscissas(ORDER); + + double fx[ORDER] = {0.0}; + + // Evaluate f(x) at the abscissas. + double centre = (a + b) / 2.0; + double range = (b - a) / 2.0; + for (unsigned int i = 0; i < ORDER; ++i) { + if (!function(centre + range * abscissas[i], fx[i])) { + return false; } - result *= range; + } - return true; + // Re-normalize and then take exponentials to avoid underflow. + double fmax = *std::max_element(fx, fx + ORDER); + for (unsigned int i = 0; i < ORDER; ++i) { + fx[i] = std::exp(fx[i] - fmax); } - //! Gauss-Legendre quadrature of the product of two functions. - //! - //! This implements Gauss-Legendre numerical integration of the function - //! f(x), g(x) and f(x) * g(x) on the interval [a, b] which can be computed - //! simultaneously with only a total of ORDER function evaluations of both. - //! - //! \param[in] f A function to integrate. - //! \param[in] g Another function to integrate. - //! \param[in] a The start of the integration interval. - //! \param[in] b The end of the integration interval. - //! \param[out] productIntegral Filled with the integral of \p f * \p g - //! over [\p a, \p b]. - //! \param[out] fIntegral Filled with the integral of \p f over [\p a, \p b]. - //! \param[out] gIntegral Filled with the integral of \p g over [\p a, \p b]. - //! - //! \tparam ORDER The order of quadrature to use. - //! \tparam F It is assumed that this has the signature: - //! bool function(double x, T &f) - //! where f is filled in with the value of the function at x and returning - //! false means that the function could not be evaluated at x. - //! \tparam G It is assumed that this has the signature: - //! bool function(double x, T &f) - //! where f is filled in with the value of the function at x and returning - //! false means that the function could not be evaluated at x. - //! \tparam U The type of range of \p f. This must have a meaningful default - //! constructor, support multiplication by a double and addition. - //! \tparam V The type of range of \p g. This must have a meaningful default - //! constructor, support multiplication by a double and addition. - template - static bool productGaussLegendre(const F &f, const G &g, - double a, double b, - U &productIntegral, - U &fIntegral, - V &gIntegral) - { - productIntegral = U(); - fIntegral = U(); - gIntegral = V(); - - const double *weights = CGaussLegendreQuadrature::weights(ORDER); - const double *abscissas = CGaussLegendreQuadrature::abscissas(ORDER); - - // Evaluate f(x) at the abscissas and compute the weighted sum - // of the quadrature. - double centre = (a + b) / 2.0; - double range = (b - a) / 2.0; - for (unsigned int i = 0; i < ORDER; ++i) - { - U fx; - V gx; - if ( !f(centre + range * abscissas[i], fx) - || !g(centre + range * abscissas[i], gx)) - { - return false; - } - double weight = weights[i]; - productIntegral += fx * gx * weight; - fIntegral += fx * weight; - gIntegral += gx * weight; - } + // Quadrature. + for (unsigned int i = 0; i < ORDER; ++i) { + result += weights[i] * fx[i]; + } + result *= range; + result = result <= 0.0 ? core::constants::LOG_MIN_DOUBLE : fmax + std::log(result); + + return true; + } + + //! An adaptive Gauss-Legendre scheme for univariate integration. + //! + //! This evaluates the integral of \p f over successive refinements of + //! the initial intervals \p intervals. An interval is only refined if + //! it is determined that doing so will affect the result by the relative + //! \p tolerance. Note that the worst case complexity is + //! O(\p splitsPerRefinement ^ refinements) although typical complexity + //! are much smaller, because many intervals are quickly pruned. + //! + //! \param[in] f The function to integrate. + //! \param[in,out] intervals The seed intervals over which to evaluate + //! the function integral. + //! \param[in,out] fIntervals The integral of \p f over each of the seed + //! intervals \p intervals. + //! \param[in] refinements The maximum number of times this will split + //! the seed intervals. + //! \param[in] splitsPerRefinement The number of times an interval is + //! is split when it is refined. + //! \param[in] tolerance The relative (to \p result) tolerance in the + //! error in \p result. + //! \param[in,out] result The integral of \p f over \p intervals is added + //! to this value. + //! \note \p intervals and \p fIntervals are modified in order to avoid + //! the copy if it isn't needed. If it is make copies yourself and pass + //! these in. + template + static bool adaptiveGaussLegendre(const F& f, + TDoubleDoublePrVec& intervals, + TDoubleVec& fIntervals, + std::size_t refinements, + std::size_t splitsPerRefinement, + double tolerance, + double& result) { + if (intervals.size() != fIntervals.size()) { + LOG_ERROR("Inconsistent intervals and function integrals: " << core::CContainerPrinter::print(intervals) << " " + << core::CContainerPrinter::print(fIntervals)); + return false; + } - productIntegral *= range; - fIntegral *= range; - gIntegral *= range; + result += std::accumulate(fIntervals.begin(), fIntervals.end(), 0.0); + LOG_TRACE("initial = " << result); - return true; + TDoubleVec corrections; + corrections.reserve(fIntervals.size()); + for (std::size_t i = 0u; i < fIntervals.size(); ++i) { + corrections.push_back(std::fabs(fIntervals[i])); } - //! Gauss-Legendre quadrature using logarithms. - //! - //! This implements Gauss-Legendre numerical integration of the function - //! f(x) on the interval [a, b] where log(f(x)) is computed and the log - //! of the integral is returned. This is intended to support integration - //! where the integrand is very small and may underflow double. This is - //! handled by renormalizing the integral so only underflows if values - //! are less than std::exp(-std::numeric_limits::max()), so really - //! very small! - //! - //! \param[in] function The log of the function to integrate. - //! \param[in] a The start of the integration interval. - //! \param[in] b The end of the integration interval. - //! \param[out] result Filled with the log of the integral of \p function - //! over [\p a, \p b]. - //! - //! \tparam ORDER The order of quadrature to use. - //! \tparam F It is assumed that this has the signature: - //! bool function(double x, double &f) - //! where f is filled in with the value of the function at x and returning - //! false means that the function could not be evaluated at x. - template - static bool logGaussLegendre(const F &function, - double a, - double b, - double &result) - { - result = 0.0; - - if (b <= a) - { - std::swap(a, b); + for (std::size_t i = 0u; !intervals.empty() && i < refinements; ++i) { + std::size_t n = intervals.size(); + double cutoff = tolerance * std::fabs(result) / static_cast(n); + + std::size_t end = 0u; + for (std::size_t j = 0u; j < corrections.size(); ++j) { + if (corrections[j] > cutoff) { + std::swap(intervals[end], intervals[j]); + std::swap(fIntervals[end], fIntervals[j]); + std::swap(corrections[end], corrections[j]); + ++end; + } + } + if (end != corrections.size()) { + intervals.erase(intervals.begin() + end, intervals.end()); + fIntervals.erase(fIntervals.begin() + end, fIntervals.end()); + corrections.erase(corrections.begin() + end, corrections.end()); } + n = intervals.size(); - const double *weights = CGaussLegendreQuadrature::weights(ORDER); - const double *abscissas = CGaussLegendreQuadrature::abscissas(ORDER); + if (i + 1 < refinements) { + intervals.reserve(splitsPerRefinement * n); + fIntervals.reserve(splitsPerRefinement * n); + corrections.reserve(splitsPerRefinement * n); + } - double fx[ORDER] = { 0.0 }; + for (std::size_t j = 0u; j < n; ++j) { + if (corrections[j] <= cutoff) { + corrections[j] = 0.0; + continue; + } - // Evaluate f(x) at the abscissas. - double centre = (a + b) / 2.0; - double range = (b - a) / 2.0; - for (unsigned int i = 0; i < ORDER; ++i) - { - if (!function(centre + range * abscissas[i], fx[i])) - { - return false; + double fjOld = fIntervals[j]; + double fjNew = 0.0; + + double aj = intervals[j].first; + double dj = (intervals[j].second - intervals[j].first) / static_cast(splitsPerRefinement); + for (std::size_t k = 0u; k < splitsPerRefinement; ++k, aj += dj) { + double df; + if (CIntegration::gaussLegendre(f, aj, aj + dj, df)) { + fjNew += df; + if (i + 1 < refinements) { + if (k == 0) { + intervals[j] = TDoubleDoublePr(aj, aj + dj); + fIntervals[j] = df; + } else { + intervals.push_back(TDoubleDoublePr(aj, aj + dj)); + fIntervals.push_back(df); + } + } + } else { + LOG_ERROR("Couldn't integrate f over [" << aj << "," << aj + dj << "]"); + return false; + } } - } - // Re-normalize and then take exponentials to avoid underflow. - double fmax = *std::max_element(fx, fx + ORDER); - for (unsigned int i = 0; i < ORDER; ++i) - { - fx[i] = std::exp(fx[i] - fmax); - } + LOG_TRACE("fjNew = " << fjNew << ", fjOld = " << fjOld); + double correction = fjNew - fjOld; + if (i + 1 < refinements) { + corrections[j] = std::fabs(correction); + corrections.resize(corrections.size() + splitsPerRefinement - 1, std::fabs(correction)); + } - // Quadrature. - for (unsigned int i = 0; i < ORDER; ++i) - { - result += weights[i] * fx[i]; + result += correction; + cutoff = tolerance * std::fabs(result) / static_cast(n); } - result *= range; - result = result <= 0.0 ? core::constants::LOG_MIN_DOUBLE : fmax + std::log(result); - - return true; } - //! An adaptive Gauss-Legendre scheme for univariate integration. - //! - //! This evaluates the integral of \p f over successive refinements of - //! the initial intervals \p intervals. An interval is only refined if - //! it is determined that doing so will affect the result by the relative - //! \p tolerance. Note that the worst case complexity is - //! O(\p splitsPerRefinement ^ refinements) although typical complexity - //! are much smaller, because many intervals are quickly pruned. - //! - //! \param[in] f The function to integrate. - //! \param[in,out] intervals The seed intervals over which to evaluate - //! the function integral. - //! \param[in,out] fIntervals The integral of \p f over each of the seed - //! intervals \p intervals. - //! \param[in] refinements The maximum number of times this will split - //! the seed intervals. - //! \param[in] splitsPerRefinement The number of times an interval is - //! is split when it is refined. - //! \param[in] tolerance The relative (to \p result) tolerance in the - //! error in \p result. - //! \param[in,out] result The integral of \p f over \p intervals is added - //! to this value. - //! \note \p intervals and \p fIntervals are modified in order to avoid - //! the copy if it isn't needed. If it is make copies yourself and pass - //! these in. - template - static bool adaptiveGaussLegendre(const F &f, - TDoubleDoublePrVec &intervals, - TDoubleVec &fIntervals, - std::size_t refinements, - std::size_t splitsPerRefinement, - double tolerance, - double &result) - { - if (intervals.size() != fIntervals.size()) - { - LOG_ERROR("Inconsistent intervals and function integrals: " - << core::CContainerPrinter::print(intervals) - << " " << core::CContainerPrinter::print(fIntervals)); - return false; - } + return true; + } + + //! \brief Implements Smolyak's construction to create the sparse + //! grid points and weights from Gauss-Legendre quadrature. + //! + //! DESCRIPTION:\n + //! For more details on this process see: + //! http://www.cims.nyu.edu/~kellen/research/sparsegrids/%20Numerical%20integration%20using%20sparse%20grids.pdf + //! + //! \note This approach still suffers the curse of dimensionality, + //! albeit with significantly reduced growth rate \f$O(2^l l^d)\f$ for + //! order \f$2^l\f$ and dimension \f$d\f$ rather than \f$O(2^{ld}\f$ + //! for the standard tensor product approach. So, this only supports + //! up to ten dimensions. + //! + //! IMPLEMENTATION DECISIONS:\n + //! In general, we won't need every combination of order and dimension. + //! Therefore, although the points and weights can be precomputed and + //! could in principle be loaded at start time, doing this preemptively + //! would bloat the process footprint unnecessarily. This class therefore + //! implements the singleton pattern, one for each template parameter + //! combination, and computes the points and weights in the constructor. + //! This means any calling code only ever pays the runtime cost for + //! computing them once and means that the amortized cost of integration + //! is as low as possible. + template + class CSparseGaussLegendreQuadrature : private core::CNonCopyable { + private: + static const unsigned int ORDER = static_cast(O); + static const unsigned int DIMENSION = static_cast(D); - result += std::accumulate(fIntervals.begin(), fIntervals.end(), 0.0); - LOG_TRACE("initial = " << result); + public: + using TVector = CVectorNx1; + using TVectorVec = std::vector; - TDoubleVec corrections; - corrections.reserve(fIntervals.size()); - for (std::size_t i = 0u; i < fIntervals.size(); ++i) - { - corrections.push_back(std::fabs(fIntervals[i])); + public: + static const CSparseGaussLegendreQuadrature& instance() { + const CSparseGaussLegendreQuadrature* tmp = ms_Instance.load(std::memory_order_acquire); + if (!tmp) { + core::CScopedFastLock scopedLock(CIntegration::ms_Mutex); + tmp = ms_Instance.load(std::memory_order_relaxed); + if (!tmp) { + tmp = new CSparseGaussLegendreQuadrature(); + ms_Instance.store(tmp, std::memory_order_release); + } } + return *tmp; + } - for (std::size_t i = 0u; - !intervals.empty() && i < refinements; - ++i) - { - std::size_t n = intervals.size(); - double cutoff = tolerance * std::fabs(result) / static_cast(n); - - std::size_t end = 0u; - for (std::size_t j = 0u; j < corrections.size(); ++j) - { - if (corrections[j] > cutoff) - { - std::swap(intervals[end], intervals[j]); - std::swap(fIntervals[end], fIntervals[j]); - std::swap(corrections[end], corrections[j]); - ++end; - } - } - if (end != corrections.size()) - { - intervals.erase(intervals.begin() + end, intervals.end()); - fIntervals.erase(fIntervals.begin() + end, fIntervals.end()); - corrections.erase(corrections.begin() + end, corrections.end()); - } - n = intervals.size(); + //! The sparse grid point weights. + const TDoubleVec& weights() const { return m_Weights; } - if (i + 1 < refinements) - { - intervals.reserve(splitsPerRefinement * n); - fIntervals.reserve(splitsPerRefinement * n); - corrections.reserve(splitsPerRefinement * n); - } + //! The sparse grid point points. + const TVectorVec& points() const { return m_Points; } - for (std::size_t j = 0u; j < n; ++j) - { - if (corrections[j] <= cutoff) - { - corrections[j] = 0.0; - continue; - } + private: + using TUIntVec = std::vector; - double fjOld = fIntervals[j]; - double fjNew = 0.0; - - double aj = intervals[j].first; - double dj = (intervals[j].second - intervals[j].first) - / static_cast(splitsPerRefinement); - for (std::size_t k = 0u; k < splitsPerRefinement; ++k, aj += dj) - { - double df; - if (CIntegration::gaussLegendre(f, aj, aj + dj, df)) - { - fjNew += df; - if (i + 1 < refinements) - { - if (k == 0) - { - intervals[j] = TDoubleDoublePr(aj, aj + dj); - fIntervals[j] = df; - } - else - { - intervals.push_back(TDoubleDoublePr(aj, aj + dj)); - fIntervals.push_back(df); - } - } - } - else - { - LOG_ERROR("Couldn't integrate f over [" - << aj << "," << aj + dj << "]"); - return false; - } + private: + //! Iterates through the combinations such that \f$\|I\|_1 = l\f$ + //! for the indices \f$I\f$ and some fixed monomial order \f$l\f$. + static bool next(std::size_t d, TUIntVec& indices, TUIntVec& stop) { + for (;;) { + ++indices[d]; + if (indices[d] > stop[d]) { + if (d == DIMENSION - 1) { + break; } - - LOG_TRACE("fjNew = " << fjNew << ", fjOld = " << fjOld); - double correction = fjNew - fjOld; - if (i + 1 < refinements) - { - corrections[j] = std::fabs(correction); - corrections.resize(corrections.size() + splitsPerRefinement - 1, - std::fabs(correction)); + indices[d] = 1; + ++d; + } else { + for (std::size_t j = 0; j < d; ++j) { + stop[j] = stop[d] - indices[d] + 1; } - - result += correction; - cutoff = tolerance * std::fabs(result) / static_cast(n); + indices[0] = stop[0]; + d = 0u; + return true; } } - - return true; + return false; } - //! \brief Implements Smolyak's construction to create the sparse - //! grid points and weights from Gauss-Legendre quadrature. - //! - //! DESCRIPTION:\n - //! For more details on this process see: - //! http://www.cims.nyu.edu/~kellen/research/sparsegrids/%20Numerical%20integration%20using%20sparse%20grids.pdf - //! - //! \note This approach still suffers the curse of dimensionality, - //! albeit with significantly reduced growth rate \f$O(2^l l^d)\f$ for - //! order \f$2^l\f$ and dimension \f$d\f$ rather than \f$O(2^{ld}\f$ - //! for the standard tensor product approach. So, this only supports - //! up to ten dimensions. - //! - //! IMPLEMENTATION DECISIONS:\n - //! In general, we won't need every combination of order and dimension. - //! Therefore, although the points and weights can be precomputed and - //! could in principle be loaded at start time, doing this preemptively - //! would bloat the process footprint unnecessarily. This class therefore - //! implements the singleton pattern, one for each template parameter - //! combination, and computes the points and weights in the constructor. - //! This means any calling code only ever pays the runtime cost for - //! computing them once and means that the amortized cost of integration - //! is as low as possible. - template - class CSparseGaussLegendreQuadrature : private core::CNonCopyable - { - private: - static const unsigned int ORDER = static_cast(O); - static const unsigned int DIMENSION = static_cast(D); - - public: - using TVector = CVectorNx1; - using TVectorVec = std::vector; - - public: - static const CSparseGaussLegendreQuadrature &instance() - { - const CSparseGaussLegendreQuadrature *tmp = ms_Instance.load(std::memory_order_acquire); - if (!tmp) - { - core::CScopedFastLock scopedLock(CIntegration::ms_Mutex); - tmp = ms_Instance.load(std::memory_order_relaxed); - if (!tmp) - { - tmp = new CSparseGaussLegendreQuadrature(); - ms_Instance.store(tmp, std::memory_order_release); - } - } - return *tmp; - } + CSparseGaussLegendreQuadrature() { + // Generate the weights. We don't exploit the weight and + // abscissa symmetries to reduce the static storage since + // this reduces the speed of integration and since we limit + // the dimension the maximum number of points will be 8761. + // + // Note this uses the construction: + // Q^d_l = \sum{l <= ||k||_1 <= l+d-1}{ (-1)^(l+d-||k||_1-1) (d-1 ||k||_1-l) (Q^1_k_1 x ... x Q^1_k_d) } - //! The sparse grid point weights. - const TDoubleVec &weights() const - { - return m_Weights; - } + using TVectorDoubleMap = std::map; - //! The sparse grid point points. - const TVectorVec &points() const - { - return m_Points; - } + TVectorDoubleMap ordered; - private: - using TUIntVec = std::vector; - - private: - //! Iterates through the combinations such that \f$\|I\|_1 = l\f$ - //! for the indices \f$I\f$ and some fixed monomial order \f$l\f$. - static bool next(std::size_t d, TUIntVec &indices, TUIntVec &stop) - { - for (;;) - { - ++indices[d]; - if (indices[d] > stop[d]) - { - if (d == DIMENSION-1) - { - break; - } - indices[d] = 1; - ++d; - } - else - { - for (std::size_t j = 0; j < d; ++j) - { - stop[j] = stop[d] - indices[d] + 1; - } - indices[0] = stop[0]; - d = 0u; - return true; - } - } - return false; - } + for (unsigned int l = ORDER > DIMENSION ? ORDER - DIMENSION : 0; l < ORDER; ++l) { + LOG_TRACE("order = " << l); + std::size_t d = 0u; + TUIntVec indices(DIMENSION, 1); + indices[0] = l + 1; + TUIntVec stop(DIMENSION, l + 1); - CSparseGaussLegendreQuadrature() - { - // Generate the weights. We don't exploit the weight and - // abscissa symmetries to reduce the static storage since - // this reduces the speed of integration and since we limit - // the dimension the maximum number of points will be 8761. - // - // Note this uses the construction: - // Q^d_l = \sum{l <= ||k||_1 <= l+d-1}{ (-1)^(l+d-||k||_1-1) (d-1 ||k||_1-l) (Q^1_k_1 x ... x Q^1_k_d) } - - using TVectorDoubleMap = std::map; - - TVectorDoubleMap ordered; - - for (unsigned int l = ORDER > DIMENSION ? ORDER - DIMENSION : 0; l < ORDER; ++l) - { - LOG_TRACE("order = " << l); - std::size_t d = 0u; - TUIntVec indices(DIMENSION, 1); - indices[0] = l + 1; - TUIntVec stop(DIMENSION, l + 1); - - double sign = (ORDER - l - 1) % 2 == 1 ? -1.0 : 1.0; - double scale = sign * CIntegerTools::binomial(DIMENSION - 1, - DIMENSION + l - ORDER); - LOG_TRACE("scale = " << scale); - - do - { - LOG_TRACE("indices = " << core::CContainerPrinter::print(indices)); - - unsigned int n = 1u; - for (std::size_t i = 0u; i < indices.size(); ++i) - { - n *= indices[i]; - } - LOG_TRACE("Number of points = " << n); - - TDoubleVec weights(n, 1.0); - TVectorVec points(n, TVector(0.0)); - for (unsigned int i = 0u; i < n; ++i) - { - for (unsigned int i_ = i, j = 0u; - j < indices.size(); - i_ /= indices[j], ++j) - { - EOrder order = static_cast(indices[j]); - const double *w = CGaussLegendreQuadrature::weights(order); - const double *a = CGaussLegendreQuadrature::abscissas(order); - std::size_t k = i_ % indices[j]; - weights[i] *= w[k]; - points[i](j) = a[k]; - } - } - LOG_TRACE("weights = " << core::CContainerPrinter::print(weights)); - LOG_TRACE("points = " << core::CContainerPrinter::print(points)); - for (std::size_t i = 0u; i < n; ++i) - { - ordered[points[i]] += scale * weights[i]; - } + double sign = (ORDER - l - 1) % 2 == 1 ? -1.0 : 1.0; + double scale = sign * CIntegerTools::binomial(DIMENSION - 1, DIMENSION + l - ORDER); + LOG_TRACE("scale = " << scale); + + do { + LOG_TRACE("indices = " << core::CContainerPrinter::print(indices)); + + unsigned int n = 1u; + for (std::size_t i = 0u; i < indices.size(); ++i) { + n *= indices[i]; + } + LOG_TRACE("Number of points = " << n); + + TDoubleVec weights(n, 1.0); + TVectorVec points(n, TVector(0.0)); + for (unsigned int i = 0u; i < n; ++i) { + for (unsigned int i_ = i, j = 0u; j < indices.size(); i_ /= indices[j], ++j) { + EOrder order = static_cast(indices[j]); + const double* w = CGaussLegendreQuadrature::weights(order); + const double* a = CGaussLegendreQuadrature::abscissas(order); + std::size_t k = i_ % indices[j]; + weights[i] *= w[k]; + points[i](j) = a[k]; } - while (next(d, indices, stop)); } - - m_Weights.reserve(ordered.size()); - m_Points.reserve(ordered.size()); - for (const auto &i : ordered) - { - m_Weights.push_back(i.second); - m_Points.push_back(i.first); + LOG_TRACE("weights = " << core::CContainerPrinter::print(weights)); + LOG_TRACE("points = " << core::CContainerPrinter::print(points)); + for (std::size_t i = 0u; i < n; ++i) { + ordered[points[i]] += scale * weights[i]; } - } - - static std::atomic ms_Instance; - - TDoubleVec m_Weights; - TVectorVec m_Points; - }; - - //! Sparse grid Gauss-Legendre quadrature. - //! - //! This implements Smolyak's sparse grid construction using Gauss- - //! Legendre quadrature to numerically integrate a function f(x) on - //! the box [a_1, b_1] x ... x [a_n, b_n]. - //! - //! \param[in] function The function to integrate. - //! \param[in] a The lower integration limits. - //! \param[in] b The upper integration limits. - //! \param[out] result Filled with the integral of \p function over [\p a, \p b]. - //! \note There must be exactly one upper and lower integration limit for - //! each dimension. - //! \note This only supports up to ten dimensions. - //! - //! \tparam ORDER The order of quadrature to use. - //! \tparam DIMENSION The number of dimensions. - //! \tparam F It is assumed that this has the signature: - //! bool function(const CVectorNx1 &x, T &f) - //! where f is filled in with the value of the function at x and returning - //! false means that the function could not be evaluated at x. - //! \tparam T The type of range of \p f. This must have a meaningful - //! default constructor, support multiplication by a double and addition. - template - static bool sparseGaussLegendre(const F &function, - const TDoubleVec &a, - const TDoubleVec &b, - T &result) - { - using TSparseQuadrature = CSparseGaussLegendreQuadrature; - using TVector = typename TSparseQuadrature::TVector; - using TVectorVec = typename TSparseQuadrature::TVectorVec; - - result = T(); - - if (a.size() != static_cast(DIMENSION)) - { - LOG_ERROR("Bad lower limits: " << core::CContainerPrinter::print(a)); - return false; + } while (next(d, indices, stop)); } - if (b.size() != static_cast(DIMENSION)) - { - LOG_ERROR("Bad upper limits: " << core::CContainerPrinter::print(b)); - return false; + + m_Weights.reserve(ordered.size()); + m_Points.reserve(ordered.size()); + for (const auto& i : ordered) { + m_Weights.push_back(i.second); + m_Points.push_back(i.first); } + } - const TDoubleVec &weights = TSparseQuadrature::instance().weights(); - const TVectorVec &points = TSparseQuadrature::instance().points(); + static std::atomic ms_Instance; + + TDoubleVec m_Weights; + TVectorVec m_Points; + }; + + //! Sparse grid Gauss-Legendre quadrature. + //! + //! This implements Smolyak's sparse grid construction using Gauss- + //! Legendre quadrature to numerically integrate a function f(x) on + //! the box [a_1, b_1] x ... x [a_n, b_n]. + //! + //! \param[in] function The function to integrate. + //! \param[in] a The lower integration limits. + //! \param[in] b The upper integration limits. + //! \param[out] result Filled with the integral of \p function over [\p a, \p b]. + //! \note There must be exactly one upper and lower integration limit for + //! each dimension. + //! \note This only supports up to ten dimensions. + //! + //! \tparam ORDER The order of quadrature to use. + //! \tparam DIMENSION The number of dimensions. + //! \tparam F It is assumed that this has the signature: + //! bool function(const CVectorNx1 &x, T &f) + //! where f is filled in with the value of the function at x and returning + //! false means that the function could not be evaluated at x. + //! \tparam T The type of range of \p f. This must have a meaningful + //! default constructor, support multiplication by a double and addition. + template + static bool sparseGaussLegendre(const F& function, const TDoubleVec& a, const TDoubleVec& b, T& result) { + using TSparseQuadrature = CSparseGaussLegendreQuadrature; + using TVector = typename TSparseQuadrature::TVector; + using TVectorVec = typename TSparseQuadrature::TVectorVec; + + result = T(); + + if (a.size() != static_cast(DIMENSION)) { + LOG_ERROR("Bad lower limits: " << core::CContainerPrinter::print(a)); + return false; + } + if (b.size() != static_cast(DIMENSION)) { + LOG_ERROR("Bad upper limits: " << core::CContainerPrinter::print(b)); + return false; + } - // Evaluate f(x) at the abscissas and compute the weighted sum - // of the quadrature. + const TDoubleVec& weights = TSparseQuadrature::instance().weights(); + const TVectorVec& points = TSparseQuadrature::instance().points(); - TVector a_(a.begin(), a.end()); - TVector b_(b.begin(), b.end()); - TVector centre = (a_ + b_) / 2.0; - TVector range = (b_ - a_) / 2.0; + // Evaluate f(x) at the abscissas and compute the weighted sum + // of the quadrature. - for (std::size_t i = 0; i < weights.size(); ++i) - { - T fx; - if (!function(centre + range * points[i], fx)) - { - return false; - } - fx *= weights[i]; - result += fx; - } + TVector a_(a.begin(), a.end()); + TVector b_(b.begin(), b.end()); + TVector centre = (a_ + b_) / 2.0; + TVector range = (b_ - a_) / 2.0; - for (std::size_t i = 0u; i < DIMENSION; ++i) - { - result *= range(i); + for (std::size_t i = 0; i < weights.size(); ++i) { + T fx; + if (!function(centre + range * points[i], fx)) { + return false; } + fx *= weights[i]; + result += fx; + } - return true; + for (std::size_t i = 0u; i < DIMENSION; ++i) { + result *= range(i); } - private: - //! \brief Definitions of the weights and abscissas for different orders - //! of Gauss-Legendre quadrature. - class MATHS_EXPORT CGaussLegendreQuadrature - { - public: - static const double *weights(EOrder order); - static const double *abscissas(EOrder order); - - private: - //! Order one. - static const double WEIGHTS1[1]; - static const double ABSCISSAS1[1]; - - //! Order two. - static const double WEIGHTS2[2]; - static const double ABSCISSAS2[2]; - - //! Order three. - static const double WEIGHTS3[3]; - static const double ABSCISSAS3[3]; - - //! Order four. - static const double WEIGHTS4[4]; - static const double ABSCISSAS4[4]; - - //! Order five. - static const double WEIGHTS5[5]; - static const double ABSCISSAS5[5]; - - //! Order six. - static const double WEIGHTS6[6]; - static const double ABSCISSAS6[6]; - - //! Order seven. - static const double WEIGHTS7[7]; - static const double ABSCISSAS7[7]; - - //! Order eight. - static const double WEIGHTS8[8]; - static const double ABSCISSAS8[8]; - - //! Order nine. - static const double WEIGHTS9[9]; - static const double ABSCISSAS9[9]; - - //! Order ten. - static const double WEIGHTS10[10]; - static const double ABSCISSAS10[10]; - }; + return true; + } + +private: + //! \brief Definitions of the weights and abscissas for different orders + //! of Gauss-Legendre quadrature. + class MATHS_EXPORT CGaussLegendreQuadrature { + public: + static const double* weights(EOrder order); + static const double* abscissas(EOrder order); private: - //! This is used to protect initialisation of the - //! CSparseGaussLegendreQuadrature singleton. There's a chicken-and-egg - //! situation here with static initialisation, which means that the - //! CSparseGaussLegendreQuadrature cannot be used before main() has run. - static core::CFastMutex ms_Mutex; + //! Order one. + static const double WEIGHTS1[1]; + static const double ABSCISSAS1[1]; + + //! Order two. + static const double WEIGHTS2[2]; + static const double ABSCISSAS2[2]; + + //! Order three. + static const double WEIGHTS3[3]; + static const double ABSCISSAS3[3]; + + //! Order four. + static const double WEIGHTS4[4]; + static const double ABSCISSAS4[4]; + + //! Order five. + static const double WEIGHTS5[5]; + static const double ABSCISSAS5[5]; + + //! Order six. + static const double WEIGHTS6[6]; + static const double ABSCISSAS6[6]; + + //! Order seven. + static const double WEIGHTS7[7]; + static const double ABSCISSAS7[7]; + + //! Order eight. + static const double WEIGHTS8[8]; + static const double ABSCISSAS8[8]; + + //! Order nine. + static const double WEIGHTS9[9]; + static const double ABSCISSAS9[9]; + + //! Order ten. + static const double WEIGHTS10[10]; + static const double ABSCISSAS10[10]; + }; + +private: + //! This is used to protect initialisation of the + //! CSparseGaussLegendreQuadrature singleton. There's a chicken-and-egg + //! situation here with static initialisation, which means that the + //! CSparseGaussLegendreQuadrature cannot be used before main() has run. + static core::CFastMutex ms_Mutex; }; template -std::atomic *> - CIntegration::CSparseGaussLegendreQuadrature::ms_Instance; - +std::atomic*> CIntegration::CSparseGaussLegendreQuadrature::ms_Instance; } } diff --git a/include/maths/CKMeansFast.h b/include/maths/CKMeansFast.h index 674625b46e..f0a398c90e 100644 --- a/include/maths/CKMeansFast.h +++ b/include/maths/CKMeansFast.h @@ -23,28 +23,19 @@ #include #include -namespace ml -{ -namespace maths -{ -namespace kmeans_fast_detail -{ +namespace ml { +namespace maths { +namespace kmeans_fast_detail { using TSizeVec = std::vector; //! Get the closest filtered centre to \p point. template -std::size_t closest(const std::vector ¢res, - ITR filter, - ITR end, - const POINT &point) -{ +std::size_t closest(const std::vector& centres, ITR filter, ITR end, const POINT& point) { std::size_t result = *filter; double d = (point - centres[result]).euclidean(); - for (++filter; filter != end; ++filter) - { + for (++filter; filter != end; ++filter) { double di = (point - centres[*filter]).euclidean(); - if (di < d) - { + if (di < d) { result = *filter; d = di; } @@ -54,13 +45,9 @@ std::size_t closest(const std::vector ¢res, //! Get the closest filtered centre to \p point. template -std::size_t closest(const std::vector ¢res, - const TSizeVec &filter, - const POINT &point) -{ +std::size_t closest(const std::vector& centres, const TSizeVec& filter, const POINT& point) { return closest(centres, filter.begin(), filter.end(), point); } - } //! \brief Implementation of efficient k-means algorithm. @@ -84,497 +71,387 @@ std::size_t closest(const std::vector ¢res, //! by the brackets operator and have member functions called dimension //! and euclidean - which gives the Euclidean norm of the vector. template -class CKMeansFast -{ +class CKMeansFast { +public: + using TSizeVec = std::vector; + using TPointPointPr = std::pair; + using TPointVec = std::vector; + using TPointVecVec = std::vector; + + //! A cluster. + //! + //! DESCRIPTION:\n + //! This associates cluster centre and points. It + //! also provides fast comparison by a checksum and sorts the + //! points for stable comparison. + class CCluster { public: - using TSizeVec = std::vector; - using TPointPointPr = std::pair; - using TPointVec = std::vector; - using TPointVecVec = std::vector; + CCluster() : m_Checksum(0) {} - //! A cluster. - //! - //! DESCRIPTION:\n - //! This associates cluster centre and points. It - //! also provides fast comparison by a checksum and sorts the - //! points for stable comparison. - class CCluster - { - public: - CCluster() : m_Checksum(0) {} - - //! Check for equality using checksum and then points if the - //! checksum is ambiguous. - bool operator==(const CCluster &other) const - { - return m_Checksum == other.m_Checksum - && m_Points == other.m_Points; - } - - //! Total ordering by checksum breaking ties using expensive - //! comparison on all points. - bool operator<(const CCluster &rhs) const - { - return m_Checksum < rhs.m_Checksum - || (m_Checksum == rhs.m_Checksum && m_Points < rhs.m_Points); - } - - //! Get the number of points in the cluster. - std::size_t size() const - { - return m_Points.size(); - } - - //! Set the cluster centre. - void centre(const POINT ¢re) - { - m_Centre = centre; - } - //! Get the cluster centre. - const POINT ¢re() const - { - return m_Centre; - } - - //! Swap the points into place and recalculate the checksum. - void points(TPointVec &points) - { - m_Points.swap(points); - std::sort(m_Points.begin(), m_Points.end()); - m_Checksum = CChecksum::calculate(0, m_Points); - } - //! Get the cluster points. - const TPointVec &points() const - { - return m_Points; - } - - //! Get the cluster checksum. - uint64_t checksum() const - { - return m_Checksum; - } - - private: - //! The centroid of the points in this cluster. - POINT m_Centre; - //! The points in the cluster. - TPointVec m_Points; - //! A checksum for the points in the cluster. - uint64_t m_Checksum; - }; + //! Check for equality using checksum and then points if the + //! checksum is ambiguous. + bool operator==(const CCluster& other) const { return m_Checksum == other.m_Checksum && m_Points == other.m_Points; } - using TClusterVec = std::vector; + //! Total ordering by checksum breaking ties using expensive + //! comparison on all points. + bool operator<(const CCluster& rhs) const { + return m_Checksum < rhs.m_Checksum || (m_Checksum == rhs.m_Checksum && m_Points < rhs.m_Points); + } - protected: - using TBarePoint = typename SStripped::Type; - using TBarePointPrecise = typename SFloatingPoint::Type; - using TMeanAccumulator = typename CBasicStatistics::SSampleMean::TAccumulator; - using TMeanAccumulatorVec = std::vector; - using TBoundingBox = CBoundingBox; - class CKdTreeNodeData; - using TNode = typename CKdTree::SNode; + //! Get the number of points in the cluster. + std::size_t size() const { return m_Points.size(); } - //! \brief The data the x-means algorithm needs at each k-d - //! tree node. - //! - //! DESCRIPTION:\n - //! At every node the algorithm needs the axis aligned - //! bounding box of the points in the branch rooted at that - //! node together with their centroid. - class CKdTreeNodeData - { - public: - CKdTreeNodeData() {} - explicit CKdTreeNodeData(const POINT &x) : - m_BoundingBox(x), - m_Centroid() - { - m_Centroid.add(x); - } - - //! Get the bounding box. - const TBoundingBox &boundingBox() const - { - return m_BoundingBox; - } - - //! Get the centroid. - const TMeanAccumulator ¢roid() const - { - return m_Centroid; - } - - //! Combine the bounding boxes and centroids. - void add(const CKdTreeNodeData &other) const - { - m_BoundingBox.add(other.m_BoundingBox); - m_Centroid += other.m_Centroid; - } - - //! Add \p x to the bounding box and centroid. - void add(const POINT &x) const - { - m_BoundingBox.add(x); - m_Centroid.add(x); - } - - //! Clear the bounding box and centroid. - void clear() const - { - m_BoundingBox.clear(); - m_Centroid = TMeanAccumulator(); - } - - private: - //! The points' bounding box. - mutable TBoundingBox m_BoundingBox; - //! The centroid of the points. - mutable TMeanAccumulator m_Centroid; - }; + //! Set the cluster centre. + void centre(const POINT& centre) { m_Centre = centre; } + //! Get the cluster centre. + const POINT& centre() const { return m_Centre; } - //! \brief Propagates data needed by the x-means algorithm - //! up the k-d tree. - //! - //! DESCRIPTION:\n - //! At every node the algorithm needs the axis aligned - //! bounding box of the points in the branch together with - //! their centroid. This can be computed in a single post- - //! order depth first traversal of the k-d tree. This annotates - //! the data onto the k-d tree nodes. - struct SDataPropagator - { - //! Propagate the data to \p node. - bool operator()(const TNode &node) const - { - node.clear(); - node.add(node.s_Point); - this->propagate(node.s_LeftChild, node); - this->propagate(node.s_RightChild, node); - return true; - } + //! Swap the points into place and recalculate the checksum. + void points(TPointVec& points) { + m_Points.swap(points); + std::sort(m_Points.begin(), m_Points.end()); + m_Checksum = CChecksum::calculate(0, m_Points); + } + //! Get the cluster points. + const TPointVec& points() const { return m_Points; } + + //! Get the cluster checksum. + uint64_t checksum() const { return m_Checksum; } + + private: + //! The centroid of the points in this cluster. + POINT m_Centre; + //! The points in the cluster. + TPointVec m_Points; + //! A checksum for the points in the cluster. + uint64_t m_Checksum; + }; + + using TClusterVec = std::vector; + +protected: + using TBarePoint = typename SStripped::Type; + using TBarePointPrecise = typename SFloatingPoint::Type; + using TMeanAccumulator = typename CBasicStatistics::SSampleMean::TAccumulator; + using TMeanAccumulatorVec = std::vector; + using TBoundingBox = CBoundingBox; + class CKdTreeNodeData; + using TNode = typename CKdTree::SNode; + + //! \brief The data the x-means algorithm needs at each k-d + //! tree node. + //! + //! DESCRIPTION:\n + //! At every node the algorithm needs the axis aligned + //! bounding box of the points in the branch rooted at that + //! node together with their centroid. + class CKdTreeNodeData { + public: + CKdTreeNodeData() {} + explicit CKdTreeNodeData(const POINT& x) : m_BoundingBox(x), m_Centroid() { m_Centroid.add(x); } + + //! Get the bounding box. + const TBoundingBox& boundingBox() const { return m_BoundingBox; } + + //! Get the centroid. + const TMeanAccumulator& centroid() const { return m_Centroid; } + + //! Combine the bounding boxes and centroids. + void add(const CKdTreeNodeData& other) const { + m_BoundingBox.add(other.m_BoundingBox); + m_Centroid += other.m_Centroid; + } + + //! Add \p x to the bounding box and centroid. + void add(const POINT& x) const { + m_BoundingBox.add(x); + m_Centroid.add(x); + } + + //! Clear the bounding box and centroid. + void clear() const { + m_BoundingBox.clear(); + m_Centroid = TMeanAccumulator(); + } + + private: + //! The points' bounding box. + mutable TBoundingBox m_BoundingBox; + //! The centroid of the points. + mutable TMeanAccumulator m_Centroid; + }; + + //! \brief Propagates data needed by the x-means algorithm + //! up the k-d tree. + //! + //! DESCRIPTION:\n + //! At every node the algorithm needs the axis aligned + //! bounding box of the points in the branch together with + //! their centroid. This can be computed in a single post- + //! order depth first traversal of the k-d tree. This annotates + //! the data onto the k-d tree nodes. + struct SDataPropagator { + //! Propagate the data to \p node. + bool operator()(const TNode& node) const { + node.clear(); + node.add(node.s_Point); + this->propagate(node.s_LeftChild, node); + this->propagate(node.s_RightChild, node); + return true; + } - //! Update \p data with the data from \p child. - void propagate(const TNode *child, - const CKdTreeNodeData &data) const - { - if (child) - { - data.add(*child); - } + //! Update \p data with the data from \p child. + void propagate(const TNode* child, const CKdTreeNodeData& data) const { + if (child) { + data.add(*child); } + } + }; + + //! \brief Maintains a set of candidate centres which could + //! be the closest centre to a point in k-d tree branch. + //! + //! DESCRIPTION\n + //! This is responsible for propagating the cluster centres + //! down the k-d tree. The idea is that cluster centres are + //! removed when it is determined that they are further from + //! all points in the branch than some other cluster centre. + //! See http://www.cs.umd.edu/~mount/Projects/KMeans/pami02.pdf + //! for more details. + class CCentreFilter { + public: + //! \brief Predicate used to compute whether a centre + //! is further from the bounding box of a collection + //! of points than a specified point. + class CFurtherFrom { + public: + CFurtherFrom(const TBoundingBox& bb_, std::size_t x_, const TPointVec& centres_) : bb(&bb_), x(x_), centres(¢res_) {} + + bool operator()(std::size_t y) const { return y == x ? false : bb->closerToX((*centres)[x], (*centres)[y]); } + + private: + const TBoundingBox* bb; + std::size_t x; + const TPointVec* centres; }; - //! \brief Maintains a set of candidate centres which could - //! be the closest centre to a point in k-d tree branch. - //! - //! DESCRIPTION\n - //! This is responsible for propagating the cluster centres - //! down the k-d tree. The idea is that cluster centres are - //! removed when it is determined that they are further from - //! all points in the branch than some other cluster centre. - //! See http://www.cs.umd.edu/~mount/Projects/KMeans/pami02.pdf - //! for more details. - class CCentreFilter - { - public: - //! \brief Predicate used to compute whether a centre - //! is further from the bounding box of a collection - //! of points than a specified point. - class CFurtherFrom - { - public: - CFurtherFrom(const TBoundingBox &bb_, - std::size_t x_, - const TPointVec ¢res_) : - bb(&bb_), x(x_), centres(¢res_) - {} - - bool operator()(std::size_t y) const - { - return y == x ? false : bb->closerToX((*centres)[x], - (*centres)[y]); - } - - private: - const TBoundingBox *bb; - std::size_t x; - const TPointVec *centres; - }; - - public: - explicit CCentreFilter(const TPointVec ¢res) : - m_Centres(¢res), - m_Filter(boost::counting_iterator(0), - boost::counting_iterator(centres.size())) - {} - - //! Get the centres. - const TPointVec ¢res() const - { - return *m_Centres; - } - - //! Get the filter. - const TSizeVec &filter() const - { - return m_Filter; - } - - //! Update the filter with to remove all centres which - //! are further from \p bb than one of the current centres - //! in the filter. - //! - //! This is the *key* step in the acceleration of k-means. - //! The idea is to first find the point closest to the - //! centre of \p bb and then remove all centres which - //! are further than this from every point in the bounding - //! box. The farthest point in a bounding box must be one - //! of the 2^d corners of the bounding box. However, this - //! can be found in O(d). (See CBoundingBox::closerToX.) - //! - //! The centres are propagated down the k-d tree in a pre- - //! order depth first traversal. As soon as one centre is - //! closer to the bounding box of the points in a branch - //! the traversal can terminate and update the centre with - //! their centroid. - void prune(const TBoundingBox &bb) - { - namespace detail = kmeans_fast_detail; - - if (m_Filter.size() > 1) - { - std::size_t closest = detail::closest(*m_Centres, m_Filter, POINT(bb.centre())); - m_Filter.erase(std::remove_if(m_Filter.begin(), - m_Filter.end(), - CFurtherFrom(bb, closest, *m_Centres)), - m_Filter.end()); - } - } - - private: - //! The current centres. - const TPointVec *m_Centres; - - //! The centres which could be closer to one of the points - //! in the current branch of the k-d tree. - TSizeVec m_Filter; - }; + public: + explicit CCentreFilter(const TPointVec& centres) + : m_Centres(¢res), + m_Filter(boost::counting_iterator(0), boost::counting_iterator(centres.size())) {} - //! \brief Updates the cluster centres in an iteration of Lloyd's - //! algorithm. - //! - //! DESCRIPTION:\n - //! This is used in a pre-order depth first traversal of the - //! k-d tree of points to efficiently update the cluster centres - //! in one iteration of Lloyd's algorithm. Each point is assigned - //! to its closest centre and the centre placed at the centroid - //! of its assigned points. - class CCentroidComputer - { - public: - CCentroidComputer(const TPointVec ¢res, - TMeanAccumulatorVec ¢roids) : - m_Centres(centres), - m_Centroids(¢roids) - {} - - //! Update the centres with \p node. - //! - //! \return True if we need to recurse and false otherwise. - bool operator()(const TNode &node) - { - namespace detail = kmeans_fast_detail; - - m_Centres.prune(node.boundingBox()); - const TSizeVec &filter = m_Centres.filter(); - if (filter.size() == 1) - { - (*m_Centroids)[filter[0]] += node.centroid(); - return false; - } - else - { - const TPointVec ¢res = m_Centres.centres(); - const POINT &point = node.s_Point; - (*m_Centroids)[detail::closest(centres, filter, point)].add(point); - } - return true; - } - - private: - //! The current centres. - CCentreFilter m_Centres; - - //! Compute the new cluster centres. - TMeanAccumulatorVec *m_Centroids; - }; + //! Get the centres. + const TPointVec& centres() const { return *m_Centres; } + + //! Get the filter. + const TSizeVec& filter() const { return m_Filter; } - //! \brief Extracts the closest points to each centre from a - //! k-d tree in a single traversal. + //! Update the filter with to remove all centres which + //! are further from \p bb than one of the current centres + //! in the filter. //! - //! DESCRIPTION:\n - //! This is used in a post-order depth first traversal of the - //! k-d tree of points to extract the closest points to each - //! centre supplied to the constructor. - class CClosestPointsCollector - { - public: - CClosestPointsCollector(std::size_t numberPoints, - const TPointVec ¢res, - TPointVecVec &closestPoints) : - m_Centres(¢res), - m_ClosestPoints(&closestPoints) - { - m_ClosestPoints->resize(centres.size()); - for (std::size_t i = 0u; i < m_ClosestPoints->size(); ++i) - { - (*m_ClosestPoints)[i].clear(); - (*m_ClosestPoints)[i].reserve(numberPoints / m_ClosestPoints->size() + 1); - } - } - - //! Add \p node's point to the closest centre's nearest - //! point collection. - void operator()(const TNode &node) - { - namespace detail = kmeans_fast_detail; - std::size_t n = m_Centres->size(); - const POINT &point = node.s_Point; - (*m_ClosestPoints)[detail::closest(*m_Centres, - boost::counting_iterator(0), - boost::counting_iterator(n), - point)].push_back(point); - } - - private: - const TPointVec *m_Centres; - TPointVecVec *m_ClosestPoints; - }; + //! This is the *key* step in the acceleration of k-means. + //! The idea is to first find the point closest to the + //! centre of \p bb and then remove all centres which + //! are further than this from every point in the bounding + //! box. The farthest point in a bounding box must be one + //! of the 2^d corners of the bounding box. However, this + //! can be found in O(d). (See CBoundingBox::closerToX.) + //! + //! The centres are propagated down the k-d tree in a pre- + //! order depth first traversal. As soon as one centre is + //! closer to the bounding box of the points in a branch + //! the traversal can terminate and update the centre with + //! their centroid. + void prune(const TBoundingBox& bb) { + namespace detail = kmeans_fast_detail; + + if (m_Filter.size() > 1) { + std::size_t closest = detail::closest(*m_Centres, m_Filter, POINT(bb.centre())); + m_Filter.erase(std::remove_if(m_Filter.begin(), m_Filter.end(), CFurtherFrom(bb, closest, *m_Centres)), m_Filter.end()); + } + } + private: + //! The current centres. + const TPointVec* m_Centres; + + //! The centres which could be closer to one of the points + //! in the current branch of the k-d tree. + TSizeVec m_Filter; + }; + + //! \brief Updates the cluster centres in an iteration of Lloyd's + //! algorithm. + //! + //! DESCRIPTION:\n + //! This is used in a pre-order depth first traversal of the + //! k-d tree of points to efficiently update the cluster centres + //! in one iteration of Lloyd's algorithm. Each point is assigned + //! to its closest centre and the centre placed at the centroid + //! of its assigned points. + class CCentroidComputer { public: - //! Reserve space for \p n points. - void reserve(std::size_t n) - { - m_Points.reserve(n); - } + CCentroidComputer(const TPointVec& centres, TMeanAccumulatorVec& centroids) : m_Centres(centres), m_Centroids(¢roids) {} - //! Set the points to cluster. + //! Update the centres with \p node. //! - //! \note \p points are reordered by this operation. - bool setPoints(TPointVec &points) - { - m_Points.build(points); - try - { - m_Points.postorderDepthFirst(SDataPropagator()); - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to set up k-d tree state: " << e.what()); + //! \return True if we need to recurse and false otherwise. + bool operator()(const TNode& node) { + namespace detail = kmeans_fast_detail; + + m_Centres.prune(node.boundingBox()); + const TSizeVec& filter = m_Centres.filter(); + if (filter.size() == 1) { + (*m_Centroids)[filter[0]] += node.centroid(); return false; + } else { + const TPointVec& centres = m_Centres.centres(); + const POINT& point = node.s_Point; + (*m_Centroids)[detail::closest(centres, filter, point)].add(point); } return true; } - //! Set the initial centres to use. - //! - //! \note These are swapped in to place. - void setCentres(TPointVec ¢res) - { - m_Centres.swap(centres); + private: + //! The current centres. + CCentreFilter m_Centres; + + //! Compute the new cluster centres. + TMeanAccumulatorVec* m_Centroids; + }; + + //! \brief Extracts the closest points to each centre from a + //! k-d tree in a single traversal. + //! + //! DESCRIPTION:\n + //! This is used in a post-order depth first traversal of the + //! k-d tree of points to extract the closest points to each + //! centre supplied to the constructor. + class CClosestPointsCollector { + public: + CClosestPointsCollector(std::size_t numberPoints, const TPointVec& centres, TPointVecVec& closestPoints) + : m_Centres(¢res), m_ClosestPoints(&closestPoints) { + m_ClosestPoints->resize(centres.size()); + for (std::size_t i = 0u; i < m_ClosestPoints->size(); ++i) { + (*m_ClosestPoints)[i].clear(); + (*m_ClosestPoints)[i].reserve(numberPoints / m_ClosestPoints->size() + 1); + } } - //! A run of the k-means algorithm using at most \p maxIterations - //! of Lloyd's algorithm. - //! - //! \return True if it converged and false otherwise. - bool run(std::size_t maxIterations) - { - if (m_Centres.empty()) - { - return true; - } - for (std::size_t i = 0u; i < maxIterations; ++i) - { - if (!this->updateCentres()) - { - return true; - } - } + //! Add \p node's point to the closest centre's nearest + //! point collection. + void operator()(const TNode& node) { + namespace detail = kmeans_fast_detail; + std::size_t n = m_Centres->size(); + const POINT& point = node.s_Point; + (*m_ClosestPoints)[detail::closest( + *m_Centres, boost::counting_iterator(0), boost::counting_iterator(n), point)] + .push_back(point); + } + + private: + const TPointVec* m_Centres; + TPointVecVec* m_ClosestPoints; + }; + +public: + //! Reserve space for \p n points. + void reserve(std::size_t n) { m_Points.reserve(n); } + + //! Set the points to cluster. + //! + //! \note \p points are reordered by this operation. + bool setPoints(TPointVec& points) { + m_Points.build(points); + try { + m_Points.postorderDepthFirst(SDataPropagator()); + } catch (const std::exception& e) { + LOG_ERROR("Failed to set up k-d tree state: " << e.what()); return false; } + return true; + } - //! Get the clusters. - //! - //! \param[out] result Filled in with the k clusters. - void clusters(TClusterVec &result) const - { - result.clear(); - if (m_Centres.empty()) - { - return; - } - result.resize(m_Centres.size()); - TPointVecVec clusters; - this->clusters(clusters); - for (std::size_t i = 0u; i < m_Centres.size(); ++i) - { - result[i].centre(m_Centres[i]); - result[i].points(clusters[i]); + //! Set the initial centres to use. + //! + //! \note These are swapped in to place. + void setCentres(TPointVec& centres) { m_Centres.swap(centres); } + + //! A run of the k-means algorithm using at most \p maxIterations + //! of Lloyd's algorithm. + //! + //! \return True if it converged and false otherwise. + bool run(std::size_t maxIterations) { + if (m_Centres.empty()) { + return true; + } + for (std::size_t i = 0u; i < maxIterations; ++i) { + if (!this->updateCentres()) { + return true; } } + return false; + } - //! Get the points in each cluster. - //! - //! \param[out] result Filled in with the closest point to each - //! of the k centres. - void clusters(TPointVecVec &result) const - { - result.clear(); - if (m_Centres.empty()) - { - return; - } - CClosestPointsCollector collector(m_Points.size(), m_Centres, result); - m_Points.postorderDepthFirst(collector); + //! Get the clusters. + //! + //! \param[out] result Filled in with the k clusters. + void clusters(TClusterVec& result) const { + result.clear(); + if (m_Centres.empty()) { + return; } + result.resize(m_Centres.size()); + TPointVecVec clusters; + this->clusters(clusters); + for (std::size_t i = 0u; i < m_Centres.size(); ++i) { + result[i].centre(m_Centres[i]); + result[i].points(clusters[i]); + } + } - //! Get the cluster centres. - const TPointVec ¢res() const - { - return m_Centres; + //! Get the points in each cluster. + //! + //! \param[out] result Filled in with the closest point to each + //! of the k centres. + void clusters(TPointVecVec& result) const { + result.clear(); + if (m_Centres.empty()) { + return; } + CClosestPointsCollector collector(m_Points.size(), m_Centres, result); + m_Points.postorderDepthFirst(collector); + } - private: - //! Single iteration of Lloyd's algorithm to update \p centres. - bool updateCentres() - { - using TCoordinate = typename SCoordinate::Type; - static const TCoordinate PRECISION = TCoordinate(5) - * std::numeric_limits::epsilon(); - TMeanAccumulatorVec newCentres(m_Centres.size()); - CCentroidComputer computer(m_Centres, newCentres); - m_Points.preorderDepthFirst(computer); - bool changed = false; - for (std::size_t i = 0u; i < newCentres.size(); ++i) - { - POINT newCentre(CBasicStatistics::mean(newCentres[i])); - if ((m_Centres[i] - newCentre).euclidean() > PRECISION * m_Centres[i].euclidean()) - { - m_Centres[i] = newCentre; - changed = true; - } + //! Get the cluster centres. + const TPointVec& centres() const { return m_Centres; } + +private: + //! Single iteration of Lloyd's algorithm to update \p centres. + bool updateCentres() { + using TCoordinate = typename SCoordinate::Type; + static const TCoordinate PRECISION = TCoordinate(5) * std::numeric_limits::epsilon(); + TMeanAccumulatorVec newCentres(m_Centres.size()); + CCentroidComputer computer(m_Centres, newCentres); + m_Points.preorderDepthFirst(computer); + bool changed = false; + for (std::size_t i = 0u; i < newCentres.size(); ++i) { + POINT newCentre(CBasicStatistics::mean(newCentres[i])); + if ((m_Centres[i] - newCentre).euclidean() > PRECISION * m_Centres[i].euclidean()) { + m_Centres[i] = newCentre; + changed = true; } - return changed; } + return changed; + } - private: - //! The current cluster centroids. - TPointVec m_Centres; +private: + //! The current cluster centroids. + TPointVec m_Centres; - //! The points. - CKdTree m_Points; + //! The points. + CKdTree m_Points; }; //! \brief Implements "Arthur and Vassilvitskii"'s seed scheme for @@ -583,79 +460,70 @@ class CKMeansFast //! DESCRIPTION:\n //! See https://en.wikipedia.org/wiki/K-means%2B%2B for details. template -class CKMeansPlusPlusInitialization : private core::CNonCopyable -{ - public: - using TDoubleVec = std::vector; - using TSizeVec = std::vector; - using TPointVec = std::vector; +class CKMeansPlusPlusInitialization : private core::CNonCopyable { +public: + using TDoubleVec = std::vector; + using TSizeVec = std::vector; + using TPointVec = std::vector; + +public: + CKMeansPlusPlusInitialization(RNG& rng) : m_Rng(rng) {} + + //! Run the k-means++ centre selection algorithm on \p points. + //! + //! \param[in] points The points to cluster. + //! \param[in] k The number of seed centres to generate. + //! \param[out] result Filled in with the seed centres. + void run(const TPointVec& points, std::size_t k, TPointVec& result) const { + result.clear(); + if (points.empty() || k == 0) { + return; + } - public: - CKMeansPlusPlusInitialization(RNG &rng) : m_Rng(rng) {} + result.reserve(k); - //! Run the k-means++ centre selection algorithm on \p points. - //! - //! \param[in] points The points to cluster. - //! \param[in] k The number of seed centres to generate. - //! \param[out] result Filled in with the seed centres. - void run(const TPointVec &points, std::size_t k, TPointVec &result) const - { - result.clear(); - if (points.empty() || k == 0) - { - return; - } + std::size_t n = points.size(); + LOG_TRACE("# points = " << n); + + TSizeVec centre; + CSampling::uniformSample(m_Rng, 0, n, 1, centre); + LOG_TRACE("centre = " << centre[0]); - result.reserve(k); + result.push_back(points[centre[0]]); + LOG_TRACE("centres to date = " << core::CContainerPrinter::print(result)); - std::size_t n = points.size(); - LOG_TRACE("# points = " << n); + TDoubleVec distances; + TPointVec centres_; + CKdTree centres; + distances.resize(n); + centres_.reserve(k); + centres.reserve(k); - TSizeVec centre; - CSampling::uniformSample(m_Rng, 0, n, 1, centre); + for (std::size_t i = 1u; i < k; ++i) { + centres_.assign(result.begin(), result.end()); + centres.build(centres_); + + for (std::size_t j = 0u; j < n; ++j) { + const POINT* nn = centres.nearestNeighbour(points[j]); + distances[j] = nn ? square((points[j] - *nn).euclidean()) : 0.0; + } + + centre[0] = CSampling::categoricalSample(m_Rng, distances); LOG_TRACE("centre = " << centre[0]); result.push_back(points[centre[0]]); LOG_TRACE("centres to date = " << core::CContainerPrinter::print(result)); - - TDoubleVec distances; - TPointVec centres_; - CKdTree centres; - distances.resize(n); - centres_.reserve(k); - centres.reserve(k); - - for (std::size_t i = 1u; i < k; ++i) - { - centres_.assign(result.begin(), result.end()); - centres.build(centres_); - - for (std::size_t j = 0u; j < n; ++j) - { - const POINT *nn = centres.nearestNeighbour(points[j]); - distances[j] = nn ? square((points[j] - *nn).euclidean()) : 0.0; - } - - centre[0] = CSampling::categoricalSample(m_Rng, distances); - LOG_TRACE("centre = " << centre[0]); - - result.push_back(points[centre[0]]); - LOG_TRACE("centres to date = " << core::CContainerPrinter::print(result)); - } } + } - private: - //! Compute \p x square. - double square(double x) const - { - return x * x; - } +private: + //! Compute \p x square. + double square(double x) const { return x * x; } - private: - //! The random number generator. - RNG &m_Rng; +private: + //! The random number generator. + RNG& m_Rng; }; - } } diff --git a/include/maths/CKMeansOnline.h b/include/maths/CKMeansOnline.h index e18a7f1d5d..6a0933f2c7 100644 --- a/include/maths/CKMeansOnline.h +++ b/include/maths/CKMeansOnline.h @@ -17,13 +17,13 @@ #include #include -#include #include #include #include -#include #include +#include #include +#include #include #include @@ -31,10 +31,8 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { //! \brief Computes k-means of a set of points online using \f$O(k)\f$ //! memory. @@ -53,633 +51,527 @@ namespace maths //! by the brackets operator and have member functions called dimension //! and euclidean - which gives the Euclidean norm of the vector. template -class CKMeansOnline -{ +class CKMeansOnline { +public: + using TSizeVec = std::vector; + using TSizeVecVec = std::vector; + using TDoublePoint = typename SFloatingPoint::Type; + using TDoublePointVec = std::vector; + using TSphericalCluster = typename CSphericalCluster::Type; + using TSphericalClusterVec = std::vector; + using TSphericalClusterVecVec = std::vector; + using TKMeansOnlineVec = std::vector; + +protected: + //! \brief Checks if a cluster should be deleted based on its count. + class CShouldDelete { public: - using TSizeVec = std::vector; - using TSizeVecVec = std::vector; - using TDoublePoint = typename SFloatingPoint::Type; - using TDoublePointVec = std::vector; - using TSphericalCluster = typename CSphericalCluster::Type; - using TSphericalClusterVec = std::vector; - using TSphericalClusterVecVec = std::vector; - using TKMeansOnlineVec = std::vector; - - protected: - //! \brief Checks if a cluster should be deleted based on its count. - class CShouldDelete - { - public: - CShouldDelete(double minimumCategoryCount) : - m_MinimumCategoryCount(minimumCategoryCount) - {} - - template - bool operator()(const CLUSTER &cluster) const - { - return CBasicStatistics::count(cluster.first) < m_MinimumCategoryCount; - } - - private: - double m_MinimumCategoryCount; - }; - - using TFloatPoint = typename SFloatingPoint::Type; - using TFloatCoordinate = typename SCoordinate::Type; - using TFloatPointDoublePr = std::pair; - using TFloatPointDoublePrVec = std::vector; - using TFloatMeanAccumulator = typename CBasicStatistics::SSampleMean::TAccumulator; - using TFloatMeanAccumulatorDoublePr = std::pair; - using TFloatMeanAccumulatorDoublePrVec = std::vector; - using TDoubleMeanAccumulator = typename CBasicStatistics::SSampleMean::TAccumulator; - using TDoubleMeanVarAccumulator = typename CBasicStatistics::SSampleMeanVar::TAccumulator; - - protected: - //! The minimum permitted size for the clusterer. - static const std::size_t MINIMUM_SPACE; + CShouldDelete(double minimumCategoryCount) : m_MinimumCategoryCount(minimumCategoryCount) {} - //! The maximum allowed size of the points buffer. - static const std::size_t MAXIMUM_BUFFER_SIZE; - - //! The number of times to seed the clustering in reduce. - static const std::size_t NUMBER_SEEDS; - - //! The maximum number of iterations to use for k-means in reduce. - static const std::size_t MAX_ITERATIONS; - - static const std::string K_TAG; - static const std::string CLUSTERS_TAG; - static const std::string POINTS_TAG; - static const std::string RNG_TAG; - - public: - //! \param[in] k The maximum space in numbers of clusters. - //! A cluster comprises one float point vector, one count and - //! a double holding the spherical variance. - //! \param[in] decayRate The rate at which we data ages out - //! of the clusterer. - //! \param[in] minimumCategoryCount The minimum permitted count - //! for a cluster. - //! \note This will store as much information about the points - //! subject to this constraint so will generally hold \p k - //! clusters. - CKMeansOnline(std::size_t k, - double decayRate = 0.0, - double minimumCategoryCount = MINIMUM_CATEGORY_COUNT) : - m_K(std::max(k, MINIMUM_SPACE)), - m_DecayRate(decayRate), - m_MinimumCategoryCount(minimumCategoryCount) - { - m_Clusters.reserve(m_K + MAXIMUM_BUFFER_SIZE + 1u); - m_PointsBuffer.reserve(MAXIMUM_BUFFER_SIZE); + template + bool operator()(const CLUSTER& cluster) const { + return CBasicStatistics::count(cluster.first) < m_MinimumCategoryCount; } - //! Create from part of a state document. - bool acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser) - { - m_DecayRate = params.s_DecayRate; - m_MinimumCategoryCount = params.s_MinimumCategoryCount; - - do - { - const std::string name = traverser.name(); - RESTORE(RNG_TAG, m_Rng.fromString(traverser.value())); - RESTORE(K_TAG, core::CPersistUtils::restore(K_TAG, m_K, traverser)) - RESTORE(CLUSTERS_TAG, core::CPersistUtils::restore(CLUSTERS_TAG, m_Clusters, traverser)) - RESTORE(POINTS_TAG, core::CPersistUtils::restore(POINTS_TAG, m_PointsBuffer, traverser)) - } - while (traverser.next()); - return true; + private: + double m_MinimumCategoryCount; + }; + + using TFloatPoint = typename SFloatingPoint::Type; + using TFloatCoordinate = typename SCoordinate::Type; + using TFloatPointDoublePr = std::pair; + using TFloatPointDoublePrVec = std::vector; + using TFloatMeanAccumulator = typename CBasicStatistics::SSampleMean::TAccumulator; + using TFloatMeanAccumulatorDoublePr = std::pair; + using TFloatMeanAccumulatorDoublePrVec = std::vector; + using TDoubleMeanAccumulator = typename CBasicStatistics::SSampleMean::TAccumulator; + using TDoubleMeanVarAccumulator = typename CBasicStatistics::SSampleMeanVar::TAccumulator; + +protected: + //! The minimum permitted size for the clusterer. + static const std::size_t MINIMUM_SPACE; + + //! The maximum allowed size of the points buffer. + static const std::size_t MAXIMUM_BUFFER_SIZE; + + //! The number of times to seed the clustering in reduce. + static const std::size_t NUMBER_SEEDS; + + //! The maximum number of iterations to use for k-means in reduce. + static const std::size_t MAX_ITERATIONS; + + static const std::string K_TAG; + static const std::string CLUSTERS_TAG; + static const std::string POINTS_TAG; + static const std::string RNG_TAG; + +public: + //! \param[in] k The maximum space in numbers of clusters. + //! A cluster comprises one float point vector, one count and + //! a double holding the spherical variance. + //! \param[in] decayRate The rate at which we data ages out + //! of the clusterer. + //! \param[in] minimumCategoryCount The minimum permitted count + //! for a cluster. + //! \note This will store as much information about the points + //! subject to this constraint so will generally hold \p k + //! clusters. + CKMeansOnline(std::size_t k, double decayRate = 0.0, double minimumCategoryCount = MINIMUM_CATEGORY_COUNT) + : m_K(std::max(k, MINIMUM_SPACE)), m_DecayRate(decayRate), m_MinimumCategoryCount(minimumCategoryCount) { + m_Clusters.reserve(m_K + MAXIMUM_BUFFER_SIZE + 1u); + m_PointsBuffer.reserve(MAXIMUM_BUFFER_SIZE); + } + + //! Create from part of a state document. + bool acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) { + m_DecayRate = params.s_DecayRate; + m_MinimumCategoryCount = params.s_MinimumCategoryCount; + + do { + const std::string name = traverser.name(); + RESTORE(RNG_TAG, m_Rng.fromString(traverser.value())); + RESTORE(K_TAG, core::CPersistUtils::restore(K_TAG, m_K, traverser)) + RESTORE(CLUSTERS_TAG, core::CPersistUtils::restore(CLUSTERS_TAG, m_Clusters, traverser)) + RESTORE(POINTS_TAG, core::CPersistUtils::restore(POINTS_TAG, m_PointsBuffer, traverser)) + } while (traverser.next()); + return true; + } + + //! Persist state by passing to the supplied inserter. + void acceptPersistInserter(core::CStatePersistInserter& inserter) const { + inserter.insertValue(RNG_TAG, m_Rng.toString()); + core::CPersistUtils::persist(K_TAG, m_K, inserter); + core::CPersistUtils::persist(CLUSTERS_TAG, m_Clusters, inserter); + core::CPersistUtils::persist(POINTS_TAG, m_PointsBuffer, inserter); + } + + //! Efficiently swap the contents of this and \p other. + void swap(CKMeansOnline& other) { + std::swap(m_Rng, other.m_Rng); + std::swap(m_K, other.m_K); + std::swap(m_DecayRate, other.m_DecayRate); + std::swap(m_MinimumCategoryCount, other.m_MinimumCategoryCount); + m_Clusters.swap(other.m_Clusters); + m_PointsBuffer.swap(other.m_PointsBuffer); + } + + //! Get the total number of clusters. + std::size_t size() const { return std::min(m_Clusters.size() + m_PointsBuffer.size(), m_K); } + + //! Get the clusters being maintained. + void clusters(TSphericalClusterVec& result) const { + result.clear(); + result.reserve(m_Clusters.size()); + for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { + const TFloatPoint& m = CBasicStatistics::mean(m_Clusters[i].first); + double n = CBasicStatistics::count(m_Clusters[i].first); + double v = m_Clusters[i].second; + result.push_back(TSphericalCluster(m, SCountAndVariance(n, v))); } + } - //! Persist state by passing to the supplied inserter. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const - { - inserter.insertValue(RNG_TAG, m_Rng.toString()); - core::CPersistUtils::persist(K_TAG, m_K, inserter); - core::CPersistUtils::persist(CLUSTERS_TAG, m_Clusters, inserter); - core::CPersistUtils::persist(POINTS_TAG, m_PointsBuffer, inserter); - } + //! Get our best estimate of the \p k means clustering of the + //! k-means maintained by this object. + //! + //! \param[in] k The desired size for the clustering. + //! \param[out] result Filled in with the \p k means clustering. + bool kmeans(std::size_t k, TSphericalClusterVecVec& result) { + LOG_TRACE("split"); - //! Efficiently swap the contents of this and \p other. - void swap(CKMeansOnline &other) - { - std::swap(m_Rng, other.m_Rng); - std::swap(m_K, other.m_K); - std::swap(m_DecayRate, other.m_DecayRate); - std::swap(m_MinimumCategoryCount, other.m_MinimumCategoryCount); - m_Clusters.swap(other.m_Clusters); - m_PointsBuffer.swap(other.m_PointsBuffer); - } + result.clear(); - //! Get the total number of clusters. - std::size_t size() const - { - return std::min(m_Clusters.size() + m_PointsBuffer.size(), m_K); + if (k == 0) { + LOG_ERROR("Bad request for zero categories"); + return false; } - //! Get the clusters being maintained. - void clusters(TSphericalClusterVec &result) const - { - result.clear(); - result.reserve(m_Clusters.size()); - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) - { - const TFloatPoint &m = CBasicStatistics::mean(m_Clusters[i].first); - double n = CBasicStatistics::count(m_Clusters[i].first); - double v = m_Clusters[i].second; - result.push_back(TSphericalCluster(m, SCountAndVariance(n, v))); - } + this->reduce(); + LOG_TRACE("raw clusters = " << this->print()); + + TSphericalClusterVec clusters; + this->clusters(clusters); + + return kmeans(m_Rng, clusters, k, result); + } + + //! Get our best estimate of the \p k means clustering of + //! \p clusters. + //! + //! \param[in] rng The random number generator. + //! \param[in] clusters The spherical clusters to cluster. + //! \param[in] k The desired size for the clustering. + //! \param[out] result Filled in with the \p k means clustering + //! of \p clusters. + template + static bool kmeans(RNG& rng, TSphericalClusterVec& clusters, std::size_t k, TSphericalClusterVecVec& result) { + result.clear(); + + if (k == 0) { + LOG_ERROR("Bad request for zero categories"); + return false; } - - //! Get our best estimate of the \p k means clustering of the - //! k-means maintained by this object. - //! - //! \param[in] k The desired size for the clustering. - //! \param[out] result Filled in with the \p k means clustering. - bool kmeans(std::size_t k, TSphericalClusterVecVec &result) - { - LOG_TRACE("split"); - - result.clear(); - - if (k == 0) - { - LOG_ERROR("Bad request for zero categories"); - return false; - } - - this->reduce(); - LOG_TRACE("raw clusters = " << this->print()); - - TSphericalClusterVec clusters; - this->clusters(clusters); - - return kmeans(m_Rng, clusters, k, result); + if (clusters.empty()) { + return true; } - //! Get our best estimate of the \p k means clustering of - //! \p clusters. - //! - //! \param[in] rng The random number generator. - //! \param[in] clusters The spherical clusters to cluster. - //! \param[in] k The desired size for the clustering. - //! \param[out] result Filled in with the \p k means clustering - //! of \p clusters. - template - static bool kmeans(RNG &rng, - TSphericalClusterVec &clusters, - std::size_t k, - TSphericalClusterVecVec &result) - { - result.clear(); + result.reserve(std::min(k, clusters.size())); - if (k == 0) - { - LOG_ERROR("Bad request for zero categories"); - return false; - } - if (clusters.empty()) - { - return true; - } - - result.reserve(std::min(k, clusters.size())); - - if (k >= clusters.size()) - { - TSphericalClusterVec cluster(1); - for (std::size_t i = 0u; i < clusters.size(); ++i) - { - cluster[0] = clusters[i]; - result.push_back(cluster); - } - return true; - } - else if (k == 1) - { - result.push_back(clusters); - return true; - } - - CKMeansFast kmeans; - kmeans.setPoints(clusters); - CBasicStatistics::COrderStatisticsStack minCost; - TSphericalClusterVec centres; - TSphericalClusterVecVec candidates; - for (std::size_t i = 0u; i < NUMBER_SEEDS; ++i) - { - CKMeansPlusPlusInitialization seedCentres(rng); - seedCentres.run(clusters, k, centres); - kmeans.setCentres(centres); - kmeans.run(MAX_ITERATIONS); - kmeans.clusters(candidates); - CSphericalGaussianInfoCriterion criterion; - criterion.add(candidates); - double cost = criterion.calculate(); - if (minCost.add(cost)) - { - result.swap(candidates); - } + if (k >= clusters.size()) { + TSphericalClusterVec cluster(1); + for (std::size_t i = 0u; i < clusters.size(); ++i) { + cluster[0] = clusters[i]; + result.push_back(cluster); } - - LOG_TRACE("result = " << core::CContainerPrinter::print(result)); - + return true; + } else if (k == 1) { + result.push_back(clusters); return true; } - //! Split this into n online k-means clusterers corresponding to - //! \p split. - //! - //! \param[in] split The desired partition of the k clusters. - //! \param[out] result Filled in with the clusterers representing - //! \p split if it is a valid partition and cleared otherwise. - bool split(const TSizeVecVec &split, TKMeansOnlineVec &result) - { - result.clear(); - this->reduce(); - if (!this->checkSplit(split)) - { - return false; - } - - result.reserve(split.size()); - TFloatMeanAccumulatorDoublePrVec clusters; - for (std::size_t i = 0u; i < split.size(); ++i) - { - clusters.clear(); - clusters.reserve(split[i].size()); - for (std::size_t j = 0u; j < split[i].size(); ++j) - { - clusters.push_back(m_Clusters[split[i][j]]); - } - result.push_back(CKMeansOnline(m_K, m_DecayRate, m_MinimumCategoryCount, clusters)); + CKMeansFast kmeans; + kmeans.setPoints(clusters); + CBasicStatistics::COrderStatisticsStack minCost; + TSphericalClusterVec centres; + TSphericalClusterVecVec candidates; + for (std::size_t i = 0u; i < NUMBER_SEEDS; ++i) { + CKMeansPlusPlusInitialization seedCentres(rng); + seedCentres.run(clusters, k, centres); + kmeans.setCentres(centres); + kmeans.run(MAX_ITERATIONS); + kmeans.clusters(candidates); + CSphericalGaussianInfoCriterion criterion; + criterion.add(candidates); + double cost = criterion.calculate(); + if (minCost.add(cost)) { + result.swap(candidates); } - - return true; } - //! Add \p x to the clusterer. - //! - //! \param[in] x A point to add to the clusterer. - //! \param[in] count The count weight of this point. - void add(const TDoublePoint &x, double count = 1.0) - { - if (m_PointsBuffer.size() < MAXIMUM_BUFFER_SIZE) - { - m_PointsBuffer.push_back(std::make_pair(x, count)); - } - else - { - m_Clusters.push_back(TFloatMeanAccumulatorDoublePr()); - CKMeansOnline::add(x, count, m_Clusters.back()); - this->reduce(); - } + LOG_TRACE("result = " << core::CContainerPrinter::print(result)); + + return true; + } + + //! Split this into n online k-means clusterers corresponding to + //! \p split. + //! + //! \param[in] split The desired partition of the k clusters. + //! \param[out] result Filled in with the clusterers representing + //! \p split if it is a valid partition and cleared otherwise. + bool split(const TSizeVecVec& split, TKMeansOnlineVec& result) { + result.clear(); + this->reduce(); + if (!this->checkSplit(split)) { + return false; } - //! Merge \p other with this clusterer. - //! - //! \param[in] other Another clusterer to merge with this one. - void merge(const CKMeansOnline &other) - { - LOG_TRACE("Merge"); - - for (std::size_t i = 0u; i < other.m_PointsBuffer.size(); ++i) - { - m_Clusters.push_back(TFloatMeanAccumulatorDoublePr()); - CKMeansOnline::add(other.m_PointsBuffer[i].first, - other.m_PointsBuffer[i].second, - m_Clusters.back()); + result.reserve(split.size()); + TFloatMeanAccumulatorDoublePrVec clusters; + for (std::size_t i = 0u; i < split.size(); ++i) { + clusters.clear(); + clusters.reserve(split[i].size()); + for (std::size_t j = 0u; j < split[i].size(); ++j) { + clusters.push_back(m_Clusters[split[i][j]]); } - m_Clusters.insert(m_Clusters.end(), - other.m_Clusters.begin(), other.m_Clusters.end()); - - this->reduce(); - - // Reclaim memory from the vector buffer. - TFloatMeanAccumulatorDoublePrVec categories(m_Clusters); - m_Clusters.swap(categories); + result.push_back(CKMeansOnline(m_K, m_DecayRate, m_MinimumCategoryCount, clusters)); } - //! Set the rate at which information is aged out. - void decayRate(double decayRate) - { - m_DecayRate = decayRate; + return true; + } + + //! Add \p x to the clusterer. + //! + //! \param[in] x A point to add to the clusterer. + //! \param[in] count The count weight of this point. + void add(const TDoublePoint& x, double count = 1.0) { + if (m_PointsBuffer.size() < MAXIMUM_BUFFER_SIZE) { + m_PointsBuffer.push_back(std::make_pair(x, count)); + } else { + m_Clusters.push_back(TFloatMeanAccumulatorDoublePr()); + CKMeansOnline::add(x, count, m_Clusters.back()); + this->reduce(); } + } - //! Propagate the clusters forwards by \p time. - void propagateForwardsByTime(double time) - { - if (time < 0.0) - { - LOG_ERROR("Can't propagate backwards in time"); - return; - } - - double alpha = std::exp(-m_DecayRate * time); - LOG_TRACE("alpha = " << alpha); + //! Merge \p other with this clusterer. + //! + //! \param[in] other Another clusterer to merge with this one. + void merge(const CKMeansOnline& other) { + LOG_TRACE("Merge"); - this->age(alpha); + for (std::size_t i = 0u; i < other.m_PointsBuffer.size(); ++i) { + m_Clusters.push_back(TFloatMeanAccumulatorDoublePr()); + CKMeansOnline::add(other.m_PointsBuffer[i].first, other.m_PointsBuffer[i].second, m_Clusters.back()); } + m_Clusters.insert(m_Clusters.end(), other.m_Clusters.begin(), other.m_Clusters.end()); - //! Age by a factor \p alpha, which should be in the range (0, 1). - void age(double alpha) - { - LOG_TRACE("clusters = " << core::CContainerPrinter::print(m_Clusters)); + this->reduce(); - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) - { - m_Clusters[i].first.age(alpha); - } + // Reclaim memory from the vector buffer. + TFloatMeanAccumulatorDoublePrVec categories(m_Clusters); + m_Clusters.swap(categories); + } - // Prune any dead categories: we're not interested in - // maintaining categories with low counts. - m_Clusters.erase(std::remove_if(m_Clusters.begin(), m_Clusters.end(), - CShouldDelete(m_MinimumCategoryCount)), m_Clusters.end()); + //! Set the rate at which information is aged out. + void decayRate(double decayRate) { m_DecayRate = decayRate; } - LOG_TRACE("clusters = " << core::CContainerPrinter::print(m_Clusters)); + //! Propagate the clusters forwards by \p time. + void propagateForwardsByTime(double time) { + if (time < 0.0) { + LOG_ERROR("Can't propagate backwards in time"); + return; } - //! Get the current points buffer. - bool buffering() const - { - return m_PointsBuffer.size() > 0; - } - - //! Get \p n samples of the distribution corresponding to the - //! categories we are maintaining. - //! - //! \param[in] numberSamples The desired number of samples. - //! \param[out] result Filled in with the samples of the distribution. - void sample(std::size_t numberSamples, TDoublePointVec &result) const - { - result.clear(); - if (numberSamples == 0) - { - return; - } + double alpha = std::exp(-m_DecayRate * time); + LOG_TRACE("alpha = " << alpha); - using TDoubleVec = std::vector; - using TDoubleSizePr = std::pair; + this->age(alpha); + } - static const double ALMOST_ONE = 0.99999; + //! Age by a factor \p alpha, which should be in the range (0, 1). + void age(double alpha) { + LOG_TRACE("clusters = " << core::CContainerPrinter::print(m_Clusters)); - // See, for example, Effective C++ item 3. - const_cast(this)->reduce(); - LOG_TRACE("categories = " << core::CContainerPrinter::print(m_Clusters)); + for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { + m_Clusters[i].first.age(alpha); + } - TDoubleVec counts; - counts.reserve(m_Clusters.size()); - double Z = 0.0; - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) - { - double ni = CBasicStatistics::count(m_Clusters[i].first); - counts.push_back(ni); - Z += ni; - } - Z /= static_cast(numberSamples); - for (std::size_t i = 0u; i < counts.size(); ++i) - { - counts[i] /= Z; - } - LOG_TRACE("weights = " << core::CContainerPrinter::print(counts) - << ", Z = " << Z - << ", n = " << numberSamples); - - result.reserve(2 * numberSamples); - - TDoubleVec weights; - TDoublePointVec categorySamples; - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) - { - double ni = counts[i]; - - categorySamples.clear(); - TDoublePoint m = CBasicStatistics::mean(m_Clusters[i].first); - if (m_Clusters[i].second == 0.0) - { - categorySamples.push_back(m); - } - else - { - std::size_t ni_ = static_cast(std::ceil(ni)); - TDoublePoint v(m_Clusters[i].second); - sampleGaussian(ni_, m, v.diagonal(), categorySamples); - } + // Prune any dead categories: we're not interested in + // maintaining categories with low counts. + m_Clusters.erase(std::remove_if(m_Clusters.begin(), m_Clusters.end(), CShouldDelete(m_MinimumCategoryCount)), m_Clusters.end()); + + LOG_TRACE("clusters = " << core::CContainerPrinter::print(m_Clusters)); + } + + //! Get the current points buffer. + bool buffering() const { return m_PointsBuffer.size() > 0; } + + //! Get \p n samples of the distribution corresponding to the + //! categories we are maintaining. + //! + //! \param[in] numberSamples The desired number of samples. + //! \param[out] result Filled in with the samples of the distribution. + void sample(std::size_t numberSamples, TDoublePointVec& result) const { + result.clear(); + if (numberSamples == 0) { + return; + } - ni /= static_cast(categorySamples.size()); + using TDoubleVec = std::vector; + using TDoubleSizePr = std::pair; - result.insert(result.end(), categorySamples.begin(), categorySamples.end()); - weights.insert(weights.end(), categorySamples.size(), ni); - } - LOG_TRACE("samples = " << core::CContainerPrinter::print(result)); - LOG_TRACE("weights = " << core::CContainerPrinter::print(weights)); - - TDoublePointVec final; - final.reserve(static_cast(std::ceil(std::accumulate(weights.begin(), weights.end(), 0.0)))); - TDoubleMeanAccumulator sample; - for (;;) - { - CBasicStatistics::COrderStatisticsStack nearest; - const TDoublePoint &sample_ = CBasicStatistics::mean(sample); - for (std::size_t j = 0u; j < result.size(); ++j) - { - if (weights[j] > 0.0) - { - nearest.add(std::make_pair((result[j] - sample_).euclidean(), j)); - } - } - if (nearest.count() == 0) - { - break; - } + static const double ALMOST_ONE = 0.99999; - std::size_t j = nearest[0].second; - const TDoublePoint &xj = result[j]; - do - { - double nj = std::min(1.0 - CBasicStatistics::count(sample), weights[j]); - sample.add(xj, nj); - weights[j] -= nj; - if (CBasicStatistics::count(sample) > ALMOST_ONE) - { - final.push_back(CBasicStatistics::mean(sample)); - sample = TDoubleMeanAccumulator(); - } - } - while (weights[j] > 0.0); - } + // See, for example, Effective C++ item 3. + const_cast(this)->reduce(); + LOG_TRACE("categories = " << core::CContainerPrinter::print(m_Clusters)); - result.swap(final); - LOG_TRACE("# samples = " << result.size()); - LOG_TRACE("samples = " << core::CContainerPrinter::print(result)); + TDoubleVec counts; + counts.reserve(m_Clusters.size()); + double Z = 0.0; + for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { + double ni = CBasicStatistics::count(m_Clusters[i].first); + counts.push_back(ni); + Z += ni; } - - //! Print this classifier for debug. - std::string print() const - { - return core::CContainerPrinter::print(m_Clusters); + Z /= static_cast(numberSamples); + for (std::size_t i = 0u; i < counts.size(); ++i) { + counts[i] /= Z; } + LOG_TRACE("weights = " << core::CContainerPrinter::print(counts) << ", Z = " << Z << ", n = " << numberSamples); + + result.reserve(2 * numberSamples); + + TDoubleVec weights; + TDoublePointVec categorySamples; + for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { + double ni = counts[i]; + + categorySamples.clear(); + TDoublePoint m = CBasicStatistics::mean(m_Clusters[i].first); + if (m_Clusters[i].second == 0.0) { + categorySamples.push_back(m); + } else { + std::size_t ni_ = static_cast(std::ceil(ni)); + TDoublePoint v(m_Clusters[i].second); + sampleGaussian(ni_, m, v.diagonal(), categorySamples); + } - //! Get a checksum for this object. - uint64_t checksum(uint64_t seed = 0) const - { - seed = CChecksum::calculate(seed, m_K); - seed = CChecksum::calculate(seed, m_DecayRate); - seed = CChecksum::calculate(seed, m_Clusters); - return CChecksum::calculate(seed, m_PointsBuffer); - } + ni /= static_cast(categorySamples.size()); - //! Get the memory used by this component - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const - { - mem->setName("CKMeansOnline"); - core::CMemoryDebug::dynamicSize("m_Clusters", m_Clusters, mem); - core::CMemoryDebug::dynamicSize("m_PointsBuffer", m_PointsBuffer, mem); + result.insert(result.end(), categorySamples.begin(), categorySamples.end()); + weights.insert(weights.end(), categorySamples.size(), ni); } + LOG_TRACE("samples = " << core::CContainerPrinter::print(result)); + LOG_TRACE("weights = " << core::CContainerPrinter::print(weights)); + + TDoublePointVec final; + final.reserve(static_cast(std::ceil(std::accumulate(weights.begin(), weights.end(), 0.0)))); + TDoubleMeanAccumulator sample; + for (;;) { + CBasicStatistics::COrderStatisticsStack nearest; + const TDoublePoint& sample_ = CBasicStatistics::mean(sample); + for (std::size_t j = 0u; j < result.size(); ++j) { + if (weights[j] > 0.0) { + nearest.add(std::make_pair((result[j] - sample_).euclidean(), j)); + } + } + if (nearest.count() == 0) { + break; + } - //! Get the memory used by this component - std::size_t memoryUsage() const - { - std::size_t mem = core::CMemory::dynamicSize(m_Clusters); - mem += core::CMemory::dynamicSize(m_PointsBuffer); - return mem; + std::size_t j = nearest[0].second; + const TDoublePoint& xj = result[j]; + do { + double nj = std::min(1.0 - CBasicStatistics::count(sample), weights[j]); + sample.add(xj, nj); + weights[j] -= nj; + if (CBasicStatistics::count(sample) > ALMOST_ONE) { + final.push_back(CBasicStatistics::mean(sample)); + sample = TDoubleMeanAccumulator(); + } + } while (weights[j] > 0.0); } - protected: - //! Construct a new classifier with the specified space limit - //! \p space and categories \p categories. - CKMeansOnline(std::size_t k, - double decayRate, - double minimumCategoryCount, - TFloatMeanAccumulatorDoublePrVec &clusters) : - m_K(std::max(k, MINIMUM_SPACE)), - m_DecayRate(decayRate), - m_MinimumCategoryCount(minimumCategoryCount) - { - m_Clusters.swap(clusters); - m_Clusters.reserve(m_K + MAXIMUM_BUFFER_SIZE + 1u); - m_PointsBuffer.reserve(MAXIMUM_BUFFER_SIZE); + result.swap(final); + LOG_TRACE("# samples = " << result.size()); + LOG_TRACE("samples = " << core::CContainerPrinter::print(result)); + } + + //! Print this classifier for debug. + std::string print() const { return core::CContainerPrinter::print(m_Clusters); } + + //! Get a checksum for this object. + uint64_t checksum(uint64_t seed = 0) const { + seed = CChecksum::calculate(seed, m_K); + seed = CChecksum::calculate(seed, m_DecayRate); + seed = CChecksum::calculate(seed, m_Clusters); + return CChecksum::calculate(seed, m_PointsBuffer); + } + + //! Get the memory used by this component + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { + mem->setName("CKMeansOnline"); + core::CMemoryDebug::dynamicSize("m_Clusters", m_Clusters, mem); + core::CMemoryDebug::dynamicSize("m_PointsBuffer", m_PointsBuffer, mem); + } + + //! Get the memory used by this component + std::size_t memoryUsage() const { + std::size_t mem = core::CMemory::dynamicSize(m_Clusters); + mem += core::CMemory::dynamicSize(m_PointsBuffer); + return mem; + } + +protected: + //! Construct a new classifier with the specified space limit + //! \p space and categories \p categories. + CKMeansOnline(std::size_t k, double decayRate, double minimumCategoryCount, TFloatMeanAccumulatorDoublePrVec& clusters) + : m_K(std::max(k, MINIMUM_SPACE)), m_DecayRate(decayRate), m_MinimumCategoryCount(minimumCategoryCount) { + m_Clusters.swap(clusters); + m_Clusters.reserve(m_K + MAXIMUM_BUFFER_SIZE + 1u); + m_PointsBuffer.reserve(MAXIMUM_BUFFER_SIZE); + } + + //! Sanity check \p split. + bool checkSplit(const TSizeVecVec& split) const { + if (split.empty()) { + LOG_ERROR("Bad split = " << core::CContainerPrinter::print(split)); + return false; } - - //! Sanity check \p split. - bool checkSplit(const TSizeVecVec &split) const - { - if (split.empty()) - { + for (std::size_t i = 0u; i < split.size(); ++i) { + if (split[i].empty()) { LOG_ERROR("Bad split = " << core::CContainerPrinter::print(split)); return false; } - for (std::size_t i = 0u; i < split.size(); ++i) - { - if (split[i].empty()) - { + for (std::size_t j = 0u; j < split[i].size(); ++j) { + if (split[i][j] >= m_Clusters.size()) { LOG_ERROR("Bad split = " << core::CContainerPrinter::print(split)); return false; } - for (std::size_t j = 0u; j < split[i].size(); ++j) - { - if (split[i][j] >= m_Clusters.size()) - { - LOG_ERROR("Bad split = " << core::CContainerPrinter::print(split)); - return false; - } - } } - return true; } - - //! Reduce the number of clusters to m_K by k-means clustering. - void reduce() - { - // Add all the points as new spherical clusters and reduce. - for (std::size_t i = 0u; i < m_PointsBuffer.size(); ++i) - { - m_Clusters.push_back(TFloatMeanAccumulatorDoublePr()); - CKMeansOnline::add(m_PointsBuffer[i].first, - m_PointsBuffer[i].second, - m_Clusters.back()); - } - m_PointsBuffer.clear(); - - if (m_Clusters.size() < m_K) - { - return; - } - - LOG_TRACE("clusters = " << core::CContainerPrinter::print(m_Clusters)); - LOG_TRACE("# clusters = " << m_Clusters.size()); - - TSphericalClusterVecVec kclusters; - { - TSphericalClusterVec clusters; - this->clusters(clusters); - kmeans(m_Rng, clusters, m_K, kclusters); - } - - m_Clusters.resize(kclusters.size()); - for (std::size_t i = 0u; i < kclusters.size(); ++i) - { - TDoubleMeanVarAccumulator cluster; - for (std::size_t j = 0u; j < kclusters[i].size(); ++j) - { - cluster.add(kclusters[i][j]); - } - double n = CBasicStatistics::count(cluster); - const TDoublePoint &m = CBasicStatistics::mean(cluster); - m_Clusters[i].first = CBasicStatistics::accumulator(TFloatCoordinate(n), TFloatPoint(m)); - m_Clusters[i].second = variance(cluster); - } - - LOG_TRACE("reduced clusters = " << core::CContainerPrinter::print(m_Clusters)); - LOG_TRACE("# reduced clusters = " << m_Clusters.size()); + return true; + } + + //! Reduce the number of clusters to m_K by k-means clustering. + void reduce() { + // Add all the points as new spherical clusters and reduce. + for (std::size_t i = 0u; i < m_PointsBuffer.size(); ++i) { + m_Clusters.push_back(TFloatMeanAccumulatorDoublePr()); + CKMeansOnline::add(m_PointsBuffer[i].first, m_PointsBuffer[i].second, m_Clusters.back()); } + m_PointsBuffer.clear(); - //! Add \p count copies of \p mx to the cluster \p cluster. - static void add(const TDoublePoint &mx, - double count, - TFloatMeanAccumulatorDoublePr &cluster) - { - double nx = count; - TDoublePoint vx(0.0); - double nc = CBasicStatistics::count(cluster.first); - TDoublePoint mc = CBasicStatistics::mean(cluster.first); - TDoublePoint vc(cluster.second); - TDoubleMeanVarAccumulator moments = CBasicStatistics::accumulator(nc, mc, vc) - + CBasicStatistics::accumulator(nx, mx, vx); - TFloatCoordinate ncx = CBasicStatistics::count(moments); - TFloatPoint mcx = CBasicStatistics::mean(moments); - cluster.first = CBasicStatistics::accumulator(ncx, mcx); - cluster.second = variance(moments); + if (m_Clusters.size() < m_K) { + return; } - //! Get the spherically symmetric variance from \p moments. - static double variance(const TDoubleMeanVarAccumulator &moments) + LOG_TRACE("clusters = " << core::CContainerPrinter::print(m_Clusters)); + LOG_TRACE("# clusters = " << m_Clusters.size()); + + TSphericalClusterVecVec kclusters; { - const TDoublePoint &v = CBasicStatistics::maximumLikelihoodVariance(moments); - return v.L1() / static_cast(v.dimension()); + TSphericalClusterVec clusters; + this->clusters(clusters); + kmeans(m_Rng, clusters, m_K, kclusters); } - private: - //! The random number generator. - CPRNG::CXorOShiro128Plus m_Rng; - - //! The number of clusters to maintain. - std::size_t m_K; - - //! The rate at which the categories lose information. - double m_DecayRate; - - //! The minimum permitted count for a cluster. - double m_MinimumCategoryCount; - - //! The clusters we are maintaining. - TFloatMeanAccumulatorDoublePrVec m_Clusters; + m_Clusters.resize(kclusters.size()); + for (std::size_t i = 0u; i < kclusters.size(); ++i) { + TDoubleMeanVarAccumulator cluster; + for (std::size_t j = 0u; j < kclusters[i].size(); ++j) { + cluster.add(kclusters[i][j]); + } + double n = CBasicStatistics::count(cluster); + const TDoublePoint& m = CBasicStatistics::mean(cluster); + m_Clusters[i].first = CBasicStatistics::accumulator(TFloatCoordinate(n), TFloatPoint(m)); + m_Clusters[i].second = variance(cluster); + } - //! A buffer of the points added while the space constraint - //! is satisfied. - TFloatPointDoublePrVec m_PointsBuffer; + LOG_TRACE("reduced clusters = " << core::CContainerPrinter::print(m_Clusters)); + LOG_TRACE("# reduced clusters = " << m_Clusters.size()); + } + + //! Add \p count copies of \p mx to the cluster \p cluster. + static void add(const TDoublePoint& mx, double count, TFloatMeanAccumulatorDoublePr& cluster) { + double nx = count; + TDoublePoint vx(0.0); + double nc = CBasicStatistics::count(cluster.first); + TDoublePoint mc = CBasicStatistics::mean(cluster.first); + TDoublePoint vc(cluster.second); + TDoubleMeanVarAccumulator moments = CBasicStatistics::accumulator(nc, mc, vc) + CBasicStatistics::accumulator(nx, mx, vx); + TFloatCoordinate ncx = CBasicStatistics::count(moments); + TFloatPoint mcx = CBasicStatistics::mean(moments); + cluster.first = CBasicStatistics::accumulator(ncx, mcx); + cluster.second = variance(moments); + } + + //! Get the spherically symmetric variance from \p moments. + static double variance(const TDoubleMeanVarAccumulator& moments) { + const TDoublePoint& v = CBasicStatistics::maximumLikelihoodVariance(moments); + return v.L1() / static_cast(v.dimension()); + } + +private: + //! The random number generator. + CPRNG::CXorOShiro128Plus m_Rng; + + //! The number of clusters to maintain. + std::size_t m_K; + + //! The rate at which the categories lose information. + double m_DecayRate; + + //! The minimum permitted count for a cluster. + double m_MinimumCategoryCount; + + //! The clusters we are maintaining. + TFloatMeanAccumulatorDoublePrVec m_Clusters; + + //! A buffer of the points added while the space constraint + //! is satisfied. + TFloatPointDoublePrVec m_PointsBuffer; }; template @@ -698,7 +590,6 @@ template const std::string CKMeansOnline::POINTS_TAG("c"); template const std::string CKMeansOnline::RNG_TAG("d"); - } } diff --git a/include/maths/CKMeansOnline1d.h b/include/maths/CKMeansOnline1d.h index b6781119db..17306ab5e1 100644 --- a/include/maths/CKMeansOnline1d.h +++ b/include/maths/CKMeansOnline1d.h @@ -13,10 +13,8 @@ #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { //! \brief A single pass online clusterer which assigns points //! to a fixed clustering of the data. @@ -29,124 +27,112 @@ namespace maths //! Note that this is a soft clustering so that we assign the soft //! membership of a point to a cluster based on the probability that //! it is generated by the corresponding normal. -class MATHS_EXPORT CKMeansOnline1d : public CClusterer1d -{ - public: - using TDoubleVec = TPointPreciseVec; - using TDoubleDoublePrVec = TPointPreciseDoublePrVec; - using TNormalVec = std::vector; - using TNormalVecItr = TNormalVec::iterator; - using TNormalVecCItr = TNormalVec::const_iterator; - - public: - //! Construct a new clusterer. - //! - //! \param[in] clusters The seed clusters. - CKMeansOnline1d(TNormalVec &clusters); - - //! Construct by traversing a state document. - CKMeansOnline1d(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser); - - //! \name Clusterer Contract - //@{ - //! Get the tag name for this clusterer. - virtual std::string persistenceTag() const; - - //! Persist state by passing information to the supplied inserter. - virtual void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - - //! Creates a copy of the clusterer. - //! - //! \warning Caller owns returned object. - virtual CKMeansOnline1d *clone() const; - - //! Clear the current clusterer state. - virtual void clear(); - - //! Get the number of clusters. - virtual std::size_t numberClusters() const; - - //! Set the type of data being clustered. - virtual void dataType(maths_t::EDataType dataType); - - //! Set the rate at which information is aged out. - virtual void decayRate(double decayRate); - - //! Check if the cluster identified by \p index exists. - virtual bool hasCluster(std::size_t index) const; - - //! Get the centre of the cluster identified by \p index. - virtual bool clusterCentre(std::size_t index, - double &result) const; - - //! Get the spread of the cluster identified by \p index. - virtual bool clusterSpread(std::size_t index, - double &result) const; - - //! Gets the index of the cluster(s) to which \p point belongs - //! together with their weighting factors. - virtual void cluster(const double &point, - TSizeDoublePr2Vec &result, - double count = 1.0) const; - - //! Update the clustering with \p point and return its cluster(s) - //! together with their weighting factors. - virtual void add(const double &point, - TSizeDoublePr2Vec &clusters, - double count = 1.0); - - //! Update the clustering with \p points. - virtual void add(const TDoubleDoublePrVec &points); - - //! Propagate the clustering forwards by \p time. - //! - //! The cluster priors relax back to non-informative and the - //! cluster probabilities become less at a rate controlled by - //! the decay rate parameter (optionally supplied to the constructor). - //! - //! \param[in] time The time increment to apply. - virtual void propagateForwardsByTime(double time); - - //! Sample the cluster with index \p index. - //! - //! \param[in] index The index of the cluster to sample. - //! \param[in] numberSamples The desired number of samples. - //! \param[out] samples Filled in with the samples. - //! \return True if the cluster could be sampled and false otherwise. - virtual bool sample(std::size_t index, - std::size_t numberSamples, - TDoubleVec &samples) const; - - //! Get the probability of the cluster with the index \p index. - //! - //! \param[in] index The index of the cluster of interest. - //! \return The probability of the cluster identified by \p index. - virtual double probability(std::size_t index) const; - - //! Get a checksum for this object. - virtual uint64_t checksum(uint64_t seed = 0) const; - - //! Get the memory used by this component - virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - - //! Get the memory used by this component - virtual std::size_t memoryUsage() const; - - //! Get the static size of this object - used for virtual hierarchies - virtual std::size_t staticSize() const; - //@} - - private: - //! Restore by traversing a state document. - bool acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser); - - private: - //! The (fixed) clusters. - TNormalVec m_Clusters; +class MATHS_EXPORT CKMeansOnline1d : public CClusterer1d { +public: + using TDoubleVec = TPointPreciseVec; + using TDoubleDoublePrVec = TPointPreciseDoublePrVec; + using TNormalVec = std::vector; + using TNormalVecItr = TNormalVec::iterator; + using TNormalVecCItr = TNormalVec::const_iterator; + +public: + //! Construct a new clusterer. + //! + //! \param[in] clusters The seed clusters. + CKMeansOnline1d(TNormalVec& clusters); + + //! Construct by traversing a state document. + CKMeansOnline1d(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser); + + //! \name Clusterer Contract + //@{ + //! Get the tag name for this clusterer. + virtual std::string persistenceTag() const; + + //! Persist state by passing information to the supplied inserter. + virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Creates a copy of the clusterer. + //! + //! \warning Caller owns returned object. + virtual CKMeansOnline1d* clone() const; + + //! Clear the current clusterer state. + virtual void clear(); + + //! Get the number of clusters. + virtual std::size_t numberClusters() const; + + //! Set the type of data being clustered. + virtual void dataType(maths_t::EDataType dataType); + + //! Set the rate at which information is aged out. + virtual void decayRate(double decayRate); + + //! Check if the cluster identified by \p index exists. + virtual bool hasCluster(std::size_t index) const; + + //! Get the centre of the cluster identified by \p index. + virtual bool clusterCentre(std::size_t index, double& result) const; + + //! Get the spread of the cluster identified by \p index. + virtual bool clusterSpread(std::size_t index, double& result) const; + + //! Gets the index of the cluster(s) to which \p point belongs + //! together with their weighting factors. + virtual void cluster(const double& point, TSizeDoublePr2Vec& result, double count = 1.0) const; + + //! Update the clustering with \p point and return its cluster(s) + //! together with their weighting factors. + virtual void add(const double& point, TSizeDoublePr2Vec& clusters, double count = 1.0); + + //! Update the clustering with \p points. + virtual void add(const TDoubleDoublePrVec& points); + + //! Propagate the clustering forwards by \p time. + //! + //! The cluster priors relax back to non-informative and the + //! cluster probabilities become less at a rate controlled by + //! the decay rate parameter (optionally supplied to the constructor). + //! + //! \param[in] time The time increment to apply. + virtual void propagateForwardsByTime(double time); + + //! Sample the cluster with index \p index. + //! + //! \param[in] index The index of the cluster to sample. + //! \param[in] numberSamples The desired number of samples. + //! \param[out] samples Filled in with the samples. + //! \return True if the cluster could be sampled and false otherwise. + virtual bool sample(std::size_t index, std::size_t numberSamples, TDoubleVec& samples) const; + + //! Get the probability of the cluster with the index \p index. + //! + //! \param[in] index The index of the cluster of interest. + //! \return The probability of the cluster identified by \p index. + virtual double probability(std::size_t index) const; + + //! Get a checksum for this object. + virtual uint64_t checksum(uint64_t seed = 0) const; + + //! Get the memory used by this component + virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + + //! Get the memory used by this component + virtual std::size_t memoryUsage() const; + + //! Get the static size of this object - used for virtual hierarchies + virtual std::size_t staticSize() const; + //@} + +private: + //! Restore by traversing a state document. + bool acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser); + +private: + //! The (fixed) clusters. + TNormalVec m_Clusters; }; - } } diff --git a/include/maths/CKMostCorrelated.h b/include/maths/CKMostCorrelated.h index 159bcde3f8..1bef2195d3 100644 --- a/include/maths/CKMostCorrelated.h +++ b/include/maths/CKMostCorrelated.h @@ -9,8 +9,8 @@ #include #include -#include #include +#include #include #include @@ -20,11 +20,8 @@ #include - -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { //! \brief Randomized linear complexity search for the most correlated //! pairs of variables. @@ -52,210 +49,201 @@ namespace maths //! components are the projected normalised residuals, finding the //! most correlated variables amounts to a collection neighbourhood //! searches around each point. -class MATHS_EXPORT CKMostCorrelated -{ - public: - //! The number of projections of the data to maintain - //! simultaneously. - static const std::size_t NUMBER_PROJECTIONS = 10u; - - public: - using TDoubleVec = std::vector; - using TSizeVec = std::vector; - using TSizeSizePr = std::pair; - using TSizeSizePrVec = std::vector; - using TVector = CVectorNx1; - using TVectorVec = std::vector; - using TSizeVectorUMap = boost::unordered_map; - using TVectorPackedBitVectorPr = std::pair; - using TSizeVectorPackedBitVectorPrUMap = boost::unordered_map; - - public: - CKMostCorrelated(std::size_t k, double decayRate, bool initialize = true); +class MATHS_EXPORT CKMostCorrelated { +public: + //! The number of projections of the data to maintain + //! simultaneously. + static const std::size_t NUMBER_PROJECTIONS = 10u; + +public: + using TDoubleVec = std::vector; + using TSizeVec = std::vector; + using TSizeSizePr = std::pair; + using TSizeSizePrVec = std::vector; + using TVector = CVectorNx1; + using TVectorVec = std::vector; + using TSizeVectorUMap = boost::unordered_map; + using TVectorPackedBitVectorPr = std::pair; + using TSizeVectorPackedBitVectorPrUMap = boost::unordered_map; + +public: + CKMostCorrelated(std::size_t k, double decayRate, bool initialize = true); + + //! Create from part of a state document. + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); + + //! Persist state by passing to the supplied inserter. + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Get the most correlated variables. + void mostCorrelated(TSizeSizePrVec& result) const; + + //! Get the \p n most correlated variables. + void mostCorrelated(std::size_t n, TSizeSizePrVec& correlates, TDoubleVec* pearson = 0) const; + + //! Get the most correlated variables correlations. + void correlations(TDoubleVec& result) const; + + //! Get the \p n most correlated variables correlations. + void correlations(std::size_t n, TDoubleVec& result) const; + + //! Resize the relevant statistics to accommodate up to \p n variables. + void addVariables(std::size_t n); + + //! Remove the variables \p remove. + void removeVariables(const TSizeVec& remove); + + //! Check if the correlations may have just changed. + bool changed() const; + + //! Add the value \p x for the variable \p X. + void add(std::size_t X, double x); + + //! Capture the projections of all variables added. + void capture(); + + //! Get the checksum of this object. + uint64_t checksum(uint64_t seed = 0) const; + + //! Debug the memory used by this object. + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + + //! Get the memory used by this object. + std::size_t memoryUsage() const; + +protected: + //! The length of the projected sequence to capture. + static const std::size_t PROJECTION_DIMENSION; + //! The minimum sparseness, in terms of proportion of missing values, + //! for a variable we'll consider trying to correlate. + static const double MINIMUM_SPARSENESS; + //! The proportion of values to replace for each projection. + static const double REPLACE_FRACTION; + +protected: + using TMeanVarAccumulator = CBasicStatistics::SSampleMeanVar::TAccumulator; + using TMeanVarAccumulatorVec = std::vector; + using TSizeVectorUMapCItr = TSizeVectorUMap::const_iterator; + using TSizeVectorPackedBitVectorPrUMapItr = TSizeVectorPackedBitVectorPrUMap::iterator; + using TSizeVectorPackedBitVectorPrUMapCItr = TSizeVectorPackedBitVectorPrUMap::const_iterator; + + //! \brief A pair of variables and their correlation. + //! + //! DESCRIPTION:\n + //! This manages the estimation of the sample correlation, + //! i.e. \f$\frac{1}{n}\sum_{i=1}{n}{\frac{(x-m_x)(y-m_y)}{\sigma_x\sigma_y}}\f$, + //! from the projected data. + struct MATHS_EXPORT SCorrelation { + //! See core::CMemory. + static bool dynamicSizeAlwaysZero() { return true; } + + SCorrelation(); + SCorrelation(std::size_t X, + const TVector& px, + const CPackedBitVector& ix, + std::size_t Y, + const TVector& py, + const CPackedBitVector& iy); //! Create from part of a state document. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); //! Persist state by passing to the supplied inserter. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; - //! Get the most correlated variables. - void mostCorrelated(TSizeSizePrVec &result) const; + //! Complete ordering of correlations by _increasing_ + //! absolute correlation. + bool operator<(const SCorrelation& rhs) const; - //! Get the \p n most correlated variables. - void mostCorrelated(std::size_t n, - TSizeSizePrVec &correlates, - TDoubleVec *pearson = 0) const; + //! Update the correlation with a new projection. + void update(const TSizeVectorPackedBitVectorPrUMap& projected); - //! Get the most correlated variables correlations. - void correlations(TDoubleVec &result) const; + //! Get the Euclidean distance between points corresponding + //! to this correlation. + double distance(double amax) const; - //! Get the \p n most correlated variables correlations. - void correlations(std::size_t n, TDoubleVec &result) const; + //! Get (a lower bound) on the estimated absolute correlation. + double absCorrelation() const; - //! Resize the relevant statistics to accommodate up to \p n variables. - void addVariables(std::size_t n); + //! Estimate the correlation based on the projections + //! \p px and \p py. + static double correlation(const TVector& px, const CPackedBitVector& ix, const TVector& py, const CPackedBitVector& iy); - //! Remove the variables \p remove. - void removeVariables(const TSizeVec &remove); + //! Get the checksum of this object. + uint64_t checksum(uint64_t seed) const; - //! Check if the correlations may have just changed. - bool changed() const; + //! Print for debug. + std::string print() const; - //! Add the value \p x for the variable \p X. - void add(std::size_t X, double x); + //! The correlation. + TMeanVarAccumulator s_Correlation; + //! The first variable. + std::size_t s_X; + //! The second variable. + std::size_t s_Y; + }; - //! Capture the projections of all variables added. - void capture(); + //! \brief Checks if a correlation includes a specified variable. + class MATHS_EXPORT CMatches { + public: + CMatches(std::size_t x); - //! Get the checksum of this object. - uint64_t checksum(uint64_t seed = 0) const; - - //! Debug the memory used by this object. - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - - //! Get the memory used by this object. - std::size_t memoryUsage() const; - - protected: - //! The length of the projected sequence to capture. - static const std::size_t PROJECTION_DIMENSION; - //! The minimum sparseness, in terms of proportion of missing values, - //! for a variable we'll consider trying to correlate. - static const double MINIMUM_SPARSENESS; - //! The proportion of values to replace for each projection. - static const double REPLACE_FRACTION; - - protected: - using TMeanVarAccumulator = CBasicStatistics::SSampleMeanVar::TAccumulator; - using TMeanVarAccumulatorVec = std::vector; - using TSizeVectorUMapCItr = TSizeVectorUMap::const_iterator; - using TSizeVectorPackedBitVectorPrUMapItr = TSizeVectorPackedBitVectorPrUMap::iterator; - using TSizeVectorPackedBitVectorPrUMapCItr = TSizeVectorPackedBitVectorPrUMap::const_iterator; - - //! \brief A pair of variables and their correlation. - //! - //! DESCRIPTION:\n - //! This manages the estimation of the sample correlation, - //! i.e. \f$\frac{1}{n}\sum_{i=1}{n}{\frac{(x-m_x)(y-m_y)}{\sigma_x\sigma_y}}\f$, - //! from the projected data. - struct MATHS_EXPORT SCorrelation - { - //! See core::CMemory. - static bool dynamicSizeAlwaysZero() { return true; } - - SCorrelation(); - SCorrelation(std::size_t X, - const TVector &px, - const CPackedBitVector &ix, - std::size_t Y, - const TVector &py, - const CPackedBitVector &iy); - - //! Create from part of a state document. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); - - //! Persist state by passing to the supplied inserter. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - - //! Complete ordering of correlations by _increasing_ - //! absolute correlation. - bool operator<(const SCorrelation &rhs) const; - - //! Update the correlation with a new projection. - void update(const TSizeVectorPackedBitVectorPrUMap &projected); - - //! Get the Euclidean distance between points corresponding - //! to this correlation. - double distance(double amax) const; - - //! Get (a lower bound) on the estimated absolute correlation. - double absCorrelation() const; - - //! Estimate the correlation based on the projections - //! \p px and \p py. - static double correlation(const TVector &px, - const CPackedBitVector &ix, - const TVector &py, - const CPackedBitVector &iy); - - //! Get the checksum of this object. - uint64_t checksum(uint64_t seed) const; - - //! Print for debug. - std::string print() const; - - //! The correlation. - TMeanVarAccumulator s_Correlation; - //! The first variable. - std::size_t s_X; - //! The second variable. - std::size_t s_Y; - }; - - //! \brief Checks if a correlation includes a specified variable. - class MATHS_EXPORT CMatches - { - public: - CMatches(std::size_t x); - - bool operator()(const SCorrelation &correlation) const; - - private: - std::size_t m_X; - }; - - using TCorrelationVec = std::vector; - - protected: - //! Get the most correlated variables based on the current - //! projections. - void mostCorrelated(TCorrelationVec &result) const; - - //! Generate the next projection and reinitialize related state. - void nextProjection(); - - //! Get the projections. - const TVectorVec &projections() const; - - //! Get the projected residuals. - const TSizeVectorPackedBitVectorPrUMap &projected() const; - - //! Get the current correlation collection. - const TCorrelationVec &correlations() const; - - //! Get the variable moments. - const TMeanVarAccumulatorVec &moments() const; + bool operator()(const SCorrelation& correlation) const; private: - //! The number of correlations to find. - std::size_t m_K; + std::size_t m_X; + }; - //! The rate at which to forget about historical correlations. - double m_DecayRate; + using TCorrelationVec = std::vector; - //! The random number generator. - mutable CPRNG::CXorShift1024Mult m_Rng; +protected: + //! Get the most correlated variables based on the current + //! projections. + void mostCorrelated(TCorrelationVec& result) const; - //! The random projections. - TVectorVec m_Projections; + //! Generate the next projection and reinitialize related state. + void nextProjection(); - //! The values to add in the next capture. - TSizeVectorUMap m_CurrentProjected; + //! Get the projections. + const TVectorVec& projections() const; - //! The projected variables' "normalised" residuals. - TSizeVectorPackedBitVectorPrUMap m_Projected; + //! Get the projected residuals. + const TSizeVectorPackedBitVectorPrUMap& projected() const; - //! The maximum possible metric measurement count. - double m_MaximumCount; + //! Get the current correlation collection. + const TCorrelationVec& correlations() const; - //! The variables' means and variances. - TMeanVarAccumulatorVec m_Moments; + //! Get the variable moments. + const TMeanVarAccumulatorVec& moments() const; - //! The 2 * m_Size most correlated variables. - TCorrelationVec m_MostCorrelated; -}; +private: + //! The number of correlations to find. + std::size_t m_K; + + //! The rate at which to forget about historical correlations. + double m_DecayRate; + + //! The random number generator. + mutable CPRNG::CXorShift1024Mult m_Rng; + + //! The random projections. + TVectorVec m_Projections; + //! The values to add in the next capture. + TSizeVectorUMap m_CurrentProjected; + + //! The projected variables' "normalised" residuals. + TSizeVectorPackedBitVectorPrUMap m_Projected; + + //! The maximum possible metric measurement count. + double m_MaximumCount; + + //! The variables' means and variances. + TMeanVarAccumulatorVec m_Moments; + + //! The 2 * m_Size most correlated variables. + TCorrelationVec m_MostCorrelated; +}; } } diff --git a/include/maths/CKdTree.h b/include/maths/CKdTree.h index afd7d1d682..dda924e3e6 100644 --- a/include/maths/CKdTree.h +++ b/include/maths/CKdTree.h @@ -19,26 +19,20 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { -namespace kdtree_detail -{ +namespace kdtree_detail { //! \brief Stubs out the node data parameter for k-d tree. -struct SEmptyNodeData -{ -}; +struct SEmptyNodeData {}; //! Euclidean norm implementation for our internal vector classes. //! //! Overload to adapt the Euclidean norm calculation for different //! point implementations. template -typename SPromoted::Type>::Type euclidean(const POINT &point) -{ +typename SPromoted::Type>::Type euclidean(const POINT& point) { return point.euclidean(); } @@ -65,365 +59,274 @@ typename SPromoted::Type>::Type euclidean(const POIN //! when it is not needed). This should be default constructible and //! have value semantics. This can be useful for implementing certain //! algorithms efficiently. -template -class CKdTree -{ +template +class CKdTree { +public: + using TPointVec = std::vector; + using TPointVecItr = typename TPointVec::iterator; + using TCoordinate = typename SCoordinate::Type; + using TCoordinatePrecise = typename SPromoted::Type; + using TCoordinatePrecisePointPr = std::pair; + using TNearestAccumulator = CBasicStatistics::COrderStatisticsHeap; + + //! Less on a specific coordinate of point position vector. + class CCoordinateLess { public: - using TPointVec = std::vector; - using TPointVecItr = typename TPointVec::iterator; - using TCoordinate = typename SCoordinate::Type; - using TCoordinatePrecise = typename SPromoted::Type; - using TCoordinatePrecisePointPr = std::pair; - using TNearestAccumulator = CBasicStatistics::COrderStatisticsHeap; - - //! Less on a specific coordinate of point position vector. - class CCoordinateLess - { - public: - CCoordinateLess(std::size_t i) : m_I(i) {} - bool operator()(const POINT &lhs, const POINT &rhs) const - { - return lhs(m_I) < rhs(m_I); - } - private: - std::size_t m_I; - }; - - //! A node of the k-d tree. - struct SNode : public NODE_DATA - { - SNode(SNode *parent, const POINT &point) : - NODE_DATA(), - s_Parent(parent), - s_LeftChild(0), - s_RightChild(0), - s_Point(point) - {} - - //! Check node invariants. - bool checkInvariants(std::size_t dimension) const - { - if (s_Parent) - { - if ( s_Parent->s_LeftChild != this - && s_Parent->s_RightChild != this) - { - LOG_ERROR("Not parent's child"); - return false; - } - } + CCoordinateLess(std::size_t i) : m_I(i) {} + bool operator()(const POINT& lhs, const POINT& rhs) const { return lhs(m_I) < rhs(m_I); } - std::size_t coordinate = this->depth() % dimension; - CCoordinateLess less(coordinate); - if (s_LeftChild && less(s_Point, s_LeftChild->s_Point)) - { - LOG_ERROR("parent = " << s_Point - << ", left child = " << s_LeftChild->s_Point - << ", coordinate = " << coordinate); - return false; - } - if (s_RightChild && less(s_RightChild->s_Point, s_Point)) - { - LOG_ERROR("parent = " << s_Point - << ", right child = " << s_RightChild->s_Point - << ", coordinate = " << coordinate); + private: + std::size_t m_I; + }; + + //! A node of the k-d tree. + struct SNode : public NODE_DATA { + SNode(SNode* parent, const POINT& point) : NODE_DATA(), s_Parent(parent), s_LeftChild(0), s_RightChild(0), s_Point(point) {} + + //! Check node invariants. + bool checkInvariants(std::size_t dimension) const { + if (s_Parent) { + if (s_Parent->s_LeftChild != this && s_Parent->s_RightChild != this) { + LOG_ERROR("Not parent's child"); return false; } - return true; } - //! Get the coordinate the points are split on. - std::size_t depth() const - { - std::size_t depth = 0u; - for (const SNode *ancestor = s_Parent; - ancestor; - ancestor = ancestor->s_Parent) - { - ++depth; - } - return depth; + std::size_t coordinate = this->depth() % dimension; + CCoordinateLess less(coordinate); + if (s_LeftChild && less(s_Point, s_LeftChild->s_Point)) { + LOG_ERROR("parent = " << s_Point << ", left child = " << s_LeftChild->s_Point << ", coordinate = " << coordinate); + return false; } - - //! The parent. - SNode *s_Parent; - //! The left child if one exists. - SNode *s_LeftChild; - //! The right child if one exists. - SNode *s_RightChild; - //! The point at this node. - POINT s_Point; - }; - - public: - //! Reserve space for \p n points. - void reserve(std::size_t n) - { - m_Nodes.reserve(n); + if (s_RightChild && less(s_RightChild->s_Point, s_Point)) { + LOG_ERROR("parent = " << s_Point << ", right child = " << s_RightChild->s_Point << ", coordinate = " << coordinate); + return false; + } + return true; } - //! Build a k-d tree on the collection of points \p points. - //! - //! \note \p points are reordered by this operation. - void build(TPointVec &points) - { - if (points.empty()) - { - return; + //! Get the coordinate the points are split on. + std::size_t depth() const { + std::size_t depth = 0u; + for (const SNode* ancestor = s_Parent; ancestor; ancestor = ancestor->s_Parent) { + ++depth; } - m_Dimension = points[0].dimension(); - m_Nodes.clear(); - m_Nodes.reserve(points.size()); - this->buildRecursively(0, // Parent pointer - 0, // Split coordinate - points.begin(), - points.end()); + return depth; } - //! Get the number of points in the tree. - std::size_t size() const - { - return m_Nodes.size(); + //! The parent. + SNode* s_Parent; + //! The left child if one exists. + SNode* s_LeftChild; + //! The right child if one exists. + SNode* s_RightChild; + //! The point at this node. + POINT s_Point; + }; + +public: + //! Reserve space for \p n points. + void reserve(std::size_t n) { m_Nodes.reserve(n); } + + //! Build a k-d tree on the collection of points \p points. + //! + //! \note \p points are reordered by this operation. + void build(TPointVec& points) { + if (points.empty()) { + return; + } + m_Dimension = points[0].dimension(); + m_Nodes.clear(); + m_Nodes.reserve(points.size()); + this->buildRecursively(0, // Parent pointer + 0, // Split coordinate + points.begin(), + points.end()); + } + + //! Get the number of points in the tree. + std::size_t size() const { return m_Nodes.size(); } + + //! Branch and bound search for nearest neighbour of \p point. + const POINT* nearestNeighbour(const POINT& point) const { + const POINT* nearest = 0; + + if (m_Nodes.empty()) { + return nearest; } - //! Branch and bound search for nearest neighbour of \p point. - const POINT *nearestNeighbour(const POINT &point) const - { - const POINT *nearest = 0; + TCoordinatePrecise distanceToNearest = std::numeric_limits::max(); + return this->nearestNeighbour(point, + m_Nodes[0], + 0, // Split coordinate, + nearest, + distanceToNearest); + } - if (m_Nodes.empty()) - { - return nearest; - } + //! Branch and bound search for nearest \p n neighbours of \p point. + void nearestNeighbours(std::size_t n, const POINT& point, TPointVec& result) const { + result.clear(); - TCoordinatePrecise distanceToNearest = - std::numeric_limits::max(); - return this->nearestNeighbour(point, - m_Nodes[0], - 0, // Split coordinate, - nearest, - distanceToNearest); + if (n == 0 || m_Nodes.empty()) { + return; } - //! Branch and bound search for nearest \p n neighbours of \p point. - void nearestNeighbours(std::size_t n, - const POINT &point, - TPointVec &result) const - { - result.clear(); - - if (n == 0 || m_Nodes.empty()) - { - return; - } + TNearestAccumulator nearest(n); + this->nearestNeighbours(point, + m_Nodes[0], + 0, // Split coordinate, + nearest); - TNearestAccumulator nearest(n); - this->nearestNeighbours(point, - m_Nodes[0], - 0, // Split coordinate, - nearest); - - result.reserve(nearest.count()); - nearest.sort(); - for (std::size_t i = 0u; i < nearest.count(); ++i) - { - result.push_back(nearest[i].second); - } + result.reserve(nearest.count()); + nearest.sort(); + for (std::size_t i = 0u; i < nearest.count(); ++i) { + result.push_back(nearest[i].second); } - - //! A pre-order depth first traversal of the k-d tree nodes. - //! - //! \param[in] f The function to apply to the nodes. - //! \tparam F should have the signature bool (const SNode &). - //! Traversal stops below point that \p f returns false. - template - void preorderDepthFirst(F f) const - { - if (m_Nodes.empty()) - { - return; - } - this->preorderDepthFirst(m_Nodes[0], f); + } + + //! A pre-order depth first traversal of the k-d tree nodes. + //! + //! \param[in] f The function to apply to the nodes. + //! \tparam F should have the signature bool (const SNode &). + //! Traversal stops below point that \p f returns false. + template + void preorderDepthFirst(F f) const { + if (m_Nodes.empty()) { + return; } - - //! A post-order depth first traversal of the k-d tree nodes. - //! - //! \param[in] f The function to apply to the nodes. - //! \tparam F should have the signature void (const SNode &). - template - void postorderDepthFirst(F f) const - { - if (m_Nodes.empty()) - { - return; - } - this->postorderDepthFirst(m_Nodes[0], f); + this->preorderDepthFirst(m_Nodes[0], f); + } + + //! A post-order depth first traversal of the k-d tree nodes. + //! + //! \param[in] f The function to apply to the nodes. + //! \tparam F should have the signature void (const SNode &). + template + void postorderDepthFirst(F f) const { + if (m_Nodes.empty()) { + return; } - - //! Check the tree invariants. - bool checkInvariants() const - { - for (std::size_t i = 0u; i < m_Nodes.size(); ++i) - { - if (!m_Nodes[i].checkInvariants(m_Dimension)) - { - return false; - } + this->postorderDepthFirst(m_Nodes[0], f); + } + + //! Check the tree invariants. + bool checkInvariants() const { + for (std::size_t i = 0u; i < m_Nodes.size(); ++i) { + if (!m_Nodes[i].checkInvariants(m_Dimension)) { + return false; } - return true; + } + return true; + } + +private: + using TNodeVec = std::vector; + +private: + //! Recursively build the k-d tree. + SNode* buildRecursively(SNode* parent, std::size_t coordinate, TPointVecItr begin, TPointVecItr end) { + std::size_t n = static_cast(end - begin) / 2; + TPointVecItr median = begin + n; + std::nth_element(begin, median, end, CCoordinateLess(coordinate)); + m_Nodes.push_back(SNode(parent, *median)); + SNode* node = &m_Nodes.back(); + if (median - begin > 0) { + SNode* leftChild = this->buildRecursively(node, (coordinate + 1) % m_Dimension, begin, median); + node->s_LeftChild = leftChild; + } + if (end - median > 1) { + SNode* rightChild = this->buildRecursively(node, (coordinate + 1) % m_Dimension, median + 1, end); + node->s_RightChild = rightChild; + } + return node; + } + + //! Recursively find the nearest point to \p point. + const POINT* nearestNeighbour(const POINT& point, + const SNode& node, + std::size_t coordinate, + const POINT* nearest, + TCoordinatePrecise& distanceToNearest) const { + TCoordinatePrecise distance = kdtree_detail::euclidean(point - node.s_Point); + + if (distance < distanceToNearest) { + nearest = &node.s_Point; + distanceToNearest = distance; } - private: - using TNodeVec = std::vector; + if (node.s_LeftChild || node.s_RightChild) { + TCoordinatePrecise distanceToHyperplane = point(coordinate) - node.s_Point(coordinate); - private: - //! Recursively build the k-d tree. - SNode *buildRecursively(SNode *parent, - std::size_t coordinate, - TPointVecItr begin, - TPointVecItr end) - { - std::size_t n = static_cast(end - begin) / 2; - TPointVecItr median = begin + n; - std::nth_element(begin, median, end, CCoordinateLess(coordinate)); - m_Nodes.push_back(SNode(parent, *median)); - SNode *node = &m_Nodes.back(); - if (median - begin > 0) - { - SNode *leftChild = this->buildRecursively(node, - (coordinate + 1) % m_Dimension, - begin, - median); - node->s_LeftChild = leftChild; + SNode* primary = node.s_LeftChild; + SNode* secondary = node.s_RightChild; + if (!primary || (secondary && distanceToHyperplane > 0)) { + std::swap(primary, secondary); } - if (end - median > 1) - { - SNode *rightChild = this->buildRecursively(node, - (coordinate + 1) % m_Dimension, - median + 1, - end); - node->s_RightChild = rightChild; + + std::size_t nextCoordinate = (coordinate + 1) % m_Dimension; + nearest = this->nearestNeighbour(point, *primary, nextCoordinate, nearest, distanceToNearest); + if (secondary && std::fabs(distanceToHyperplane) < distanceToNearest) { + nearest = this->nearestNeighbour(point, *secondary, nextCoordinate, nearest, distanceToNearest); } - return node; } - //! Recursively find the nearest point to \p point. - const POINT *nearestNeighbour(const POINT &point, - const SNode &node, - std::size_t coordinate, - const POINT *nearest, - TCoordinatePrecise &distanceToNearest) const - { - TCoordinatePrecise distance = kdtree_detail::euclidean(point - node.s_Point); - - if (distance < distanceToNearest) - { - nearest = &node.s_Point; - distanceToNearest = distance; - } + return nearest; + } - if (node.s_LeftChild || node.s_RightChild) - { - TCoordinatePrecise distanceToHyperplane = point(coordinate) - - node.s_Point(coordinate); + //! Recursively find the nearest point to \p point. + void nearestNeighbours(const POINT& point, const SNode& node, std::size_t coordinate, TNearestAccumulator& nearest) const { + TCoordinatePrecise distance = kdtree_detail::euclidean(point - node.s_Point); - SNode *primary = node.s_LeftChild; - SNode *secondary = node.s_RightChild; - if (!primary || (secondary && distanceToHyperplane > 0)) - { - std::swap(primary, secondary); - } + nearest.add(TCoordinatePrecisePointPr(distance, node.s_Point)); - std::size_t nextCoordinate = (coordinate + 1) % m_Dimension; - nearest = this->nearestNeighbour(point, - *primary, - nextCoordinate, - nearest, - distanceToNearest); - if (secondary && std::fabs(distanceToHyperplane) < distanceToNearest) - { - nearest = this->nearestNeighbour(point, - *secondary, - nextCoordinate, - nearest, - distanceToNearest); - } - } + if (node.s_LeftChild || node.s_RightChild) { + TCoordinatePrecise distanceToHyperplane = point(coordinate) - node.s_Point(coordinate); - return nearest; - } - - //! Recursively find the nearest point to \p point. - void nearestNeighbours(const POINT &point, - const SNode &node, - std::size_t coordinate, - TNearestAccumulator &nearest) const - { - TCoordinatePrecise distance = kdtree_detail::euclidean(point - node.s_Point); - - nearest.add(TCoordinatePrecisePointPr(distance, node.s_Point)); - - if (node.s_LeftChild || node.s_RightChild) - { - TCoordinatePrecise distanceToHyperplane = point(coordinate) - - node.s_Point(coordinate); - - SNode *primary = node.s_LeftChild; - SNode *secondary = node.s_RightChild; - if (!primary || (secondary && distanceToHyperplane > 0)) - { - std::swap(primary, secondary); - } - - std::size_t nextCoordinate = (coordinate + 1) % m_Dimension; - this->nearestNeighbours(point, *primary, nextCoordinate, nearest); - if (secondary && std::fabs(distanceToHyperplane) < nearest.biggest().first) - { - this->nearestNeighbours(point, *secondary, nextCoordinate, nearest); - } + SNode* primary = node.s_LeftChild; + SNode* secondary = node.s_RightChild; + if (!primary || (secondary && distanceToHyperplane > 0)) { + std::swap(primary, secondary); } - } - //! Visit the branch rooted at \p node with \p f in pre-order. - template - static void preorderDepthFirst(const SNode &node, F f) - { - if (f(node)) - { - if (node.s_LeftChild) - { - preorderDepthFirst(*node.s_LeftChild, f); - } - if (node.s_RightChild) - { - preorderDepthFirst(*node.s_RightChild, f); - } + std::size_t nextCoordinate = (coordinate + 1) % m_Dimension; + this->nearestNeighbours(point, *primary, nextCoordinate, nearest); + if (secondary && std::fabs(distanceToHyperplane) < nearest.biggest().first) { + this->nearestNeighbours(point, *secondary, nextCoordinate, nearest); } } - - //! Visit the branch rooted at \p node with \p f in post-order. - template - static void postorderDepthFirst(const SNode &node, F f) - { - if (node.s_LeftChild) - { - postorderDepthFirst(*node.s_LeftChild, f); + } + + //! Visit the branch rooted at \p node with \p f in pre-order. + template + static void preorderDepthFirst(const SNode& node, F f) { + if (f(node)) { + if (node.s_LeftChild) { + preorderDepthFirst(*node.s_LeftChild, f); } - if (node.s_RightChild) - { - postorderDepthFirst(*node.s_RightChild, f); + if (node.s_RightChild) { + preorderDepthFirst(*node.s_RightChild, f); } - f(node); } + } - private: - //! The point dimension. - std::size_t m_Dimension; - //! The representation of the points. - TNodeVec m_Nodes; + //! Visit the branch rooted at \p node with \p f in post-order. + template + static void postorderDepthFirst(const SNode& node, F f) { + if (node.s_LeftChild) { + postorderDepthFirst(*node.s_LeftChild, f); + } + if (node.s_RightChild) { + postorderDepthFirst(*node.s_RightChild, f); + } + f(node); + } + +private: + //! The point dimension. + std::size_t m_Dimension; + //! The representation of the points. + TNodeVec m_Nodes; }; - } } diff --git a/include/maths/CLassoLogisticRegression.h b/include/maths/CLassoLogisticRegression.h index 50a3e329ea..58076e66df 100644 --- a/include/maths/CLassoLogisticRegression.h +++ b/include/maths/CLassoLogisticRegression.h @@ -15,13 +15,10 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { -namespace lasso_logistic_regression_detail -{ +namespace lasso_logistic_regression_detail { using TDoubleVec = std::vector; using TSizeSizePr = std::pair; @@ -33,117 +30,75 @@ using TSizeSizePrDoublePrVec = std::vector; //! DESCRIPTION:\n //! Used to represent dense feature vectors. This only implements //! the interface needed by the CLG algorithm. -class MATHS_EXPORT CDenseMatrix -{ - public: - using iterator = TDoubleVec::const_iterator; - using TDoubleVecVec = std::vector; - - public: - CDenseMatrix(); - CDenseMatrix(TDoubleVecVec &elements); - - //! Efficiently swap the contents of two matrices. - void swap(CDenseMatrix &other); - - //! Get the number of rows. - std::size_t rows() const - { - return m_Elements.empty() ? 0 : m_Elements[0].size(); - } - //! Get the number of columns. - std::size_t columns() const - { - return m_Elements.size(); - } - //! Get the beginning of the rows present for the j'th column. - iterator beginRows(std::size_t j) const - { - return m_Elements[j].begin(); - } - //! Get the end of the rows present for the j'th column. - iterator endRows(std::size_t j) const - { - return m_Elements[j].end(); - } - //! Get the row represented by the j'th column row iterator. - std::size_t row(iterator itr, std::size_t j) const - { - return itr - m_Elements[j].begin(); - } - //! Get the element represented by the iterator. - double element(iterator itr) const - { - return *itr; - } - - private: - //! The actual matrix. - TDoubleVecVec m_Elements; +class MATHS_EXPORT CDenseMatrix { +public: + using iterator = TDoubleVec::const_iterator; + using TDoubleVecVec = std::vector; + +public: + CDenseMatrix(); + CDenseMatrix(TDoubleVecVec& elements); + + //! Efficiently swap the contents of two matrices. + void swap(CDenseMatrix& other); + + //! Get the number of rows. + std::size_t rows() const { return m_Elements.empty() ? 0 : m_Elements[0].size(); } + //! Get the number of columns. + std::size_t columns() const { return m_Elements.size(); } + //! Get the beginning of the rows present for the j'th column. + iterator beginRows(std::size_t j) const { return m_Elements[j].begin(); } + //! Get the end of the rows present for the j'th column. + iterator endRows(std::size_t j) const { return m_Elements[j].end(); } + //! Get the row represented by the j'th column row iterator. + std::size_t row(iterator itr, std::size_t j) const { return itr - m_Elements[j].begin(); } + //! Get the element represented by the iterator. + double element(iterator itr) const { return *itr; } + +private: + //! The actual matrix. + TDoubleVecVec m_Elements; }; //! Very simple dynamically sized sparse matrix. //! //! DESCRIPTION:\n //! This only implements the interface needed by the CLG algorithm. -class MATHS_EXPORT CSparseMatrix -{ - public: - using iterator = TSizeSizePrDoublePrVec::const_iterator; - - public: - CSparseMatrix(); - CSparseMatrix(std::size_t rows, - std::size_t columns, - TSizeSizePrDoublePrVec &elements); - - //! Efficiently swap the contents of two matrices. - void swap(CSparseMatrix &other); - - //! Get the number of rows. - std::size_t rows() const - { - return m_Rows; - } - //! Get the number of columns. - std::size_t columns() const - { - return m_Columns; - } - //! Get the beginning of the rows present for the j'th column. - iterator beginRows(std::size_t j) const - { - return std::lower_bound(m_Elements.begin(), - m_Elements.end(), - TSizeSizePr(j, size_t(0)), - COrderings::SFirstLess()); - } - //! Get the end of the rows present for the j'th column. - iterator endRows(std::size_t j) const - { - return std::upper_bound(m_Elements.begin(), - m_Elements.end(), - TSizeSizePr(j, m_Rows), - COrderings::SFirstLess()); - } - //! Get the row represented by the j'th column row iterator. - std::size_t row(iterator itr, std::size_t /*j*/) const - { - return itr->first.second; - } - //! Get the element represented by the iterator. - double element(iterator itr) const - { - return itr->second; - } - - private: - //! The number of rows. - std::size_t m_Rows; - //! The number of columns. - std::size_t m_Columns; - //! Representation of the non-zero elements. - TSizeSizePrDoublePrVec m_Elements; +class MATHS_EXPORT CSparseMatrix { +public: + using iterator = TSizeSizePrDoublePrVec::const_iterator; + +public: + CSparseMatrix(); + CSparseMatrix(std::size_t rows, std::size_t columns, TSizeSizePrDoublePrVec& elements); + + //! Efficiently swap the contents of two matrices. + void swap(CSparseMatrix& other); + + //! Get the number of rows. + std::size_t rows() const { return m_Rows; } + //! Get the number of columns. + std::size_t columns() const { return m_Columns; } + //! Get the beginning of the rows present for the j'th column. + iterator beginRows(std::size_t j) const { + return std::lower_bound(m_Elements.begin(), m_Elements.end(), TSizeSizePr(j, size_t(0)), COrderings::SFirstLess()); + } + //! Get the end of the rows present for the j'th column. + iterator endRows(std::size_t j) const { + return std::upper_bound(m_Elements.begin(), m_Elements.end(), TSizeSizePr(j, m_Rows), COrderings::SFirstLess()); + } + //! Get the row represented by the j'th column row iterator. + std::size_t row(iterator itr, std::size_t /*j*/) const { return itr->first.second; } + //! Get the element represented by the iterator. + double element(iterator itr) const { return itr->second; } + +private: + //! The number of rows. + std::size_t m_Rows; + //! The number of columns. + std::size_t m_Columns; + //! Representation of the non-zero elements. + TSizeSizePrDoublePrVec m_Elements; }; //! \brief Implements Zhang and Oles cyclic coordinate descent scheme, @@ -174,88 +129,70 @@ class MATHS_EXPORT CSparseMatrix //! //! \see http://www.stat.columbia.edu/~madigan/PAPERS/techno.pdf for //! more details. -class MATHS_EXPORT CCyclicCoordinateDescent -{ - public: - CCyclicCoordinateDescent(std::size_t maxIterations, - double eps); - - //! Compute the regression parameters for dense feature vectors. - //! - //! \param[in] x The feature vectors in the training data. - //! \param[in] y The class labels of the feature vectors. - //! \param[in] lambda The precision of the Laplace prior. - //! \param[out] beta The MAP parameters of the LASSO logistic - //! regression. - //! \param[out] numberIterations The number of iterations of - //! the main optimization loop used. - bool run(const CDenseMatrix &x, - const TDoubleVec &y, - const TDoubleVec &lambda, - TDoubleVec &beta, - std::size_t &numberIterations); - - //! Compute the regression parameters for sparse feature vectors. - //! - //! \param[in] x The feature vectors in the training data. - //! \param[in] y The class labels of the feature vectors. - //! \param[in] lambda The precision of the Laplace prior. - //! \param[out] beta The MAP parameters of the LASSO logistic - //! regression. - //! \param[out] numberIterations The number of iterations of - //! the main optimization loop used. - bool run(const CSparseMatrix &x, - const TDoubleVec &y, - const TDoubleVec &lambda, - TDoubleVec &beta, - std::size_t &numberIterations); - - //! Compute the regression parameters for dense feature vectors - //! using the input value of beta to initialize the optimization - //! loop. - //! - //! \param[in] x The feature vectors in the training data. - //! \param[in] y The class labels of the feature vectors. - //! \param[in] lambda The precision of the Laplace prior. - //! \param[in,out] beta The MAP parameters of the LASSO logistic - //! regression. - //! \param[out] numberIterations The number of iterations of - //! the main optimization loop used. - bool runIncremental(const CDenseMatrix &x, - const TDoubleVec &y, - const TDoubleVec &lambda, - TDoubleVec &beta, - std::size_t &numberIterations); - - //! Compute the regression parameters for sparse feature vectors - //! using the input value of beta to initialize the optimization - //! loop. - //! - //! \param[in] x The feature vectors in the training data. - //! \param[in] y The class labels of the feature vectors. - //! \param[in] lambda The precision of the Laplace prior. - //! \param[in,out] beta The MAP parameters of the LASSO logistic - //! regression. - //! \param[out] numberIterations The number of iterations of - //! the main optimization loop used. - bool runIncremental(const CSparseMatrix &x, - const TDoubleVec &y, - const TDoubleVec &lambda, - TDoubleVec &beta, - std::size_t &numberIterations); - - private: - //! Check the validity of the training data and the prior parameters. - template - static bool checkInputs(const MATRIX &x, - const TDoubleVec &y, - const TDoubleVec &lambda); - - private: - //! The maximum number of iterations of the main loop. - std::size_t m_MaxIterations; - //! The relative convergence threshold. - double m_Eps; +class MATHS_EXPORT CCyclicCoordinateDescent { +public: + CCyclicCoordinateDescent(std::size_t maxIterations, double eps); + + //! Compute the regression parameters for dense feature vectors. + //! + //! \param[in] x The feature vectors in the training data. + //! \param[in] y The class labels of the feature vectors. + //! \param[in] lambda The precision of the Laplace prior. + //! \param[out] beta The MAP parameters of the LASSO logistic + //! regression. + //! \param[out] numberIterations The number of iterations of + //! the main optimization loop used. + bool run(const CDenseMatrix& x, const TDoubleVec& y, const TDoubleVec& lambda, TDoubleVec& beta, std::size_t& numberIterations); + + //! Compute the regression parameters for sparse feature vectors. + //! + //! \param[in] x The feature vectors in the training data. + //! \param[in] y The class labels of the feature vectors. + //! \param[in] lambda The precision of the Laplace prior. + //! \param[out] beta The MAP parameters of the LASSO logistic + //! regression. + //! \param[out] numberIterations The number of iterations of + //! the main optimization loop used. + bool run(const CSparseMatrix& x, const TDoubleVec& y, const TDoubleVec& lambda, TDoubleVec& beta, std::size_t& numberIterations); + + //! Compute the regression parameters for dense feature vectors + //! using the input value of beta to initialize the optimization + //! loop. + //! + //! \param[in] x The feature vectors in the training data. + //! \param[in] y The class labels of the feature vectors. + //! \param[in] lambda The precision of the Laplace prior. + //! \param[in,out] beta The MAP parameters of the LASSO logistic + //! regression. + //! \param[out] numberIterations The number of iterations of + //! the main optimization loop used. + bool + runIncremental(const CDenseMatrix& x, const TDoubleVec& y, const TDoubleVec& lambda, TDoubleVec& beta, std::size_t& numberIterations); + + //! Compute the regression parameters for sparse feature vectors + //! using the input value of beta to initialize the optimization + //! loop. + //! + //! \param[in] x The feature vectors in the training data. + //! \param[in] y The class labels of the feature vectors. + //! \param[in] lambda The precision of the Laplace prior. + //! \param[in,out] beta The MAP parameters of the LASSO logistic + //! regression. + //! \param[out] numberIterations The number of iterations of + //! the main optimization loop used. + bool + runIncremental(const CSparseMatrix& x, const TDoubleVec& y, const TDoubleVec& lambda, TDoubleVec& beta, std::size_t& numberIterations); + +private: + //! Check the validity of the training data and the prior parameters. + template + static bool checkInputs(const MATRIX& x, const TDoubleVec& y, const TDoubleVec& lambda); + +private: + //! The maximum number of iterations of the main loop. + std::size_t m_MaxIterations; + //! The relative convergence threshold. + double m_Eps; }; //! The possible styles for learning hyperparameter \f$\lambda\f$. @@ -265,15 +202,10 @@ class MATHS_EXPORT CCyclicCoordinateDescent //! cross validation and searches the interval //! \f$[\frac{\lambda_n}{10}, 10 \lambda_n]\f$ for the value //! maximizing the test data likelihood. -enum EHyperparametersStyle -{ - E_LambdaNormBased, - E_LambdaCrossValidated -}; +enum EHyperparametersStyle { E_LambdaNormBased, E_LambdaCrossValidated }; } // lasso_logistic_regression_detail:: - //! \brief A logistic regression model. //! //! DESCRIPTION:\n @@ -285,33 +217,31 @@ enum EHyperparametersStyle //! The parameters \f$(\beta, \beta_0)\f$ are chosen to minimize //! the log likelihood of a collection training data. For more //! information on fitting this see CLassoLogisticRegression. -class MATHS_EXPORT CLogisticRegressionModel -{ - public: - using TDoubleVec = std::vector; - using TSizeDoublePr = std::pair; - using TSizeDoublePrVec = std::vector; +class MATHS_EXPORT CLogisticRegressionModel { +public: + using TDoubleVec = std::vector; + using TSizeDoublePr = std::pair; + using TSizeDoublePrVec = std::vector; - public: - CLogisticRegressionModel(); - CLogisticRegressionModel(double beta0, - TSizeDoublePrVec &beta); +public: + CLogisticRegressionModel(); + CLogisticRegressionModel(double beta0, TSizeDoublePrVec& beta); - //! Efficiently swap the contents of two models. - void swap(CLogisticRegressionModel &other); + //! Efficiently swap the contents of two models. + void swap(CLogisticRegressionModel& other); - //! Get the probability of the dense feature vector \p x. - bool operator()(const TDoubleVec &x, double &probability) const; + //! Get the probability of the dense feature vector \p x. + bool operator()(const TDoubleVec& x, double& probability) const; - //! Get the probability of the sparse feature vector \p x. - double operator()(const TSizeDoublePrVec &x) const; + //! Get the probability of the sparse feature vector \p x. + double operator()(const TSizeDoublePrVec& x) const; - private: - //! The intercept. - double m_Beta0; +private: + //! The intercept. + double m_Beta0; - //! The non-zero beta parameters. - TSizeDoublePrVec m_Beta; + //! The non-zero beta parameters. + TSizeDoublePrVec m_Beta; }; //! \brief Implements shared functionality for the different Lasso @@ -343,61 +273,60 @@ class MATHS_EXPORT CLogisticRegressionModel //! the functionality to train the hyperparameters, which can be //! shared between the two implementations. template -class MATHS_EXPORT CLassoLogisticRegression -{ - public: - using TDoubleVec = std::vector; - using EHyperparametersStyle = lasso_logistic_regression_detail::EHyperparametersStyle; - - protected: - CLassoLogisticRegression(); - - //! Learn the value of precision of the Laplace prior. - template - void doLearnHyperparameter(EHyperparametersStyle style); - - //! Learn the parameters of the logistic model based on the - //! training data added so far. - template - bool doLearn(CLogisticRegressionModel &result); - - //! Check whether it is possible to learn a model. - //! - //! It is only possible to learn a model if the training - //! data contains a mixture of both positive and negative - //! examples. - bool sanityChecks() const; - - //! Get the training feature vectors. - inline const STORAGE &x() const { return m_X; } - //! Get the training feature vectors. - inline STORAGE &x() { return m_X; } - - //! Get the feature vector dimension. - inline std::size_t d() const { return m_D; } - //! Get the feature vector dimension. - inline std::size_t &d() { return m_D; } - - //! Get the training feature vectors. - inline const TDoubleVec &y() const { return m_Y; } - //! Get the training feature vectors. - inline TDoubleVec &y() { return m_Y; } - - private: - //! The feature vectors. - STORAGE m_X; - //! The dimension of the feature vectors. - std::size_t m_D; - //! The feature vector labels. - TDoubleVec m_Y; - //! The precision of the Laplace prior. - double m_Lambda; - //! The (last) learned regression parameters. - TDoubleVec m_Beta; +class MATHS_EXPORT CLassoLogisticRegression { +public: + using TDoubleVec = std::vector; + using EHyperparametersStyle = lasso_logistic_regression_detail::EHyperparametersStyle; + +protected: + CLassoLogisticRegression(); + + //! Learn the value of precision of the Laplace prior. + template + void doLearnHyperparameter(EHyperparametersStyle style); + + //! Learn the parameters of the logistic model based on the + //! training data added so far. + template + bool doLearn(CLogisticRegressionModel& result); + + //! Check whether it is possible to learn a model. + //! + //! It is only possible to learn a model if the training + //! data contains a mixture of both positive and negative + //! examples. + bool sanityChecks() const; + + //! Get the training feature vectors. + inline const STORAGE& x() const { return m_X; } + //! Get the training feature vectors. + inline STORAGE& x() { return m_X; } + + //! Get the feature vector dimension. + inline std::size_t d() const { return m_D; } + //! Get the feature vector dimension. + inline std::size_t& d() { return m_D; } + + //! Get the training feature vectors. + inline const TDoubleVec& y() const { return m_Y; } + //! Get the training feature vectors. + inline TDoubleVec& y() { return m_Y; } + +private: + //! The feature vectors. + STORAGE m_X; + //! The dimension of the feature vectors. + std::size_t m_D; + //! The feature vector labels. + TDoubleVec m_Y; + //! The precision of the Laplace prior. + double m_Lambda; + //! The (last) learned regression parameters. + TDoubleVec m_Beta; }; -using TDenseStorage = std::vector >; -using TSparseStorage = std::vector > >; +using TDenseStorage = std::vector>; +using TSparseStorage = std::vector>>; //! \brief Lasso logistic regression using dense encoding of the //! feature vectors. @@ -408,31 +337,30 @@ using TSparseStorage = std::vector > //! IMPLEMENTATION DECISIONS:\n //! This uses a dense encoding of the feature vector for the case that //! they are small and mostly non-zero. -class MATHS_EXPORT CLassoLogisticRegressionDense : public CLassoLogisticRegression -{ - public: - using TSizeDoublePr = std::pair; - using TSizeDoublePrVec = std::vector; - - public: - //! Add a labeled feature vector \p x. The label is either - //! interesting or boring. - //! - //! \param[in] x The feature vector. - //! \param[in] interesting The label of \p x. - void addTrainingData(const TDoubleVec &x, bool interesting); - - //! Learn the value of precision of the Laplace prior. - //! - //! \param[in] style The style of training to use. - //! \see EHyperparametersStyle for more details on the options. - void learnHyperparameter(EHyperparametersStyle style); - - //! Learn the parameters of the logistic model based on the - //! training data added so far. - //! - //! \param[out] result The trained logistic model. - bool learn(CLogisticRegressionModel &result); +class MATHS_EXPORT CLassoLogisticRegressionDense : public CLassoLogisticRegression { +public: + using TSizeDoublePr = std::pair; + using TSizeDoublePrVec = std::vector; + +public: + //! Add a labeled feature vector \p x. The label is either + //! interesting or boring. + //! + //! \param[in] x The feature vector. + //! \param[in] interesting The label of \p x. + void addTrainingData(const TDoubleVec& x, bool interesting); + + //! Learn the value of precision of the Laplace prior. + //! + //! \param[in] style The style of training to use. + //! \see EHyperparametersStyle for more details on the options. + void learnHyperparameter(EHyperparametersStyle style); + + //! Learn the parameters of the logistic model based on the + //! training data added so far. + //! + //! \param[out] result The trained logistic model. + bool learn(CLogisticRegressionModel& result); }; //! \brief Lasso logistic regression using sparse encoding of the @@ -444,34 +372,32 @@ class MATHS_EXPORT CLassoLogisticRegressionDense : public CLassoLogisticRegressi //! IMPLEMENTATION DECISIONS:\n //! This uses a sparse encoding of the feature vector for the case //! that they are high dimensional, but most components are zero. -class MATHS_EXPORT CLassoLogisticRegressionSparse : CLassoLogisticRegression -{ - public: - using TSizeDoublePr = std::pair; - using TSizeDoublePrVec = std::vector; - using EHyperparametersStyle = lasso_logistic_regression_detail::EHyperparametersStyle; - - public: - //! Add a labeled feature vector \p x. The label is either - //! interesting or boring. - //! - //! \param[in] x The feature vector. - //! \param[in] interesting The label of \p x. - void addTrainingData(const TSizeDoublePrVec &x, bool interesting); - - //! Learn the value of precision of the Laplace prior. - //! - //! \param[in] style The style of training to use. - //! \see EHyperparametersStyle for more details on the options. - void learnHyperparameter(EHyperparametersStyle style); - - //! Learn the parameters of the logistic model based on the - //! training data added so far. - //! - //! \param[out] result The trained logistic model. - bool learn(CLogisticRegressionModel &result); +class MATHS_EXPORT CLassoLogisticRegressionSparse : CLassoLogisticRegression { +public: + using TSizeDoublePr = std::pair; + using TSizeDoublePrVec = std::vector; + using EHyperparametersStyle = lasso_logistic_regression_detail::EHyperparametersStyle; + +public: + //! Add a labeled feature vector \p x. The label is either + //! interesting or boring. + //! + //! \param[in] x The feature vector. + //! \param[in] interesting The label of \p x. + void addTrainingData(const TSizeDoublePrVec& x, bool interesting); + + //! Learn the value of precision of the Laplace prior. + //! + //! \param[in] style The style of training to use. + //! \see EHyperparametersStyle for more details on the options. + void learnHyperparameter(EHyperparametersStyle style); + + //! Learn the parameters of the logistic model based on the + //! training data added so far. + //! + //! \param[out] result The trained logistic model. + bool learn(CLogisticRegressionModel& result); }; - } } diff --git a/include/maths/CLinearAlgebra.h b/include/maths/CLinearAlgebra.h index 1b92278a74..4f4a4232e9 100644 --- a/include/maths/CLinearAlgebra.h +++ b/include/maths/CLinearAlgebra.h @@ -19,178 +19,144 @@ #include #include #include -#include #include +#include #include #include BOOST_GEOMETRY_REGISTER_BOOST_ARRAY_CS(cs::cartesian) -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { -namespace linear_algebra_detail -{ +namespace linear_algebra_detail { //! SFINAE check that \p N is at least 1. struct CEmpty {}; -template struct CBoundsCheck { using InRange = CEmpty; }; -template<> struct CBoundsCheck<0> {}; +template +struct CBoundsCheck { + using InRange = CEmpty; +}; +template<> +struct CBoundsCheck<0> {}; //! \brief Common vector functionality for variable storage type. template -struct SSymmetricMatrix -{ +struct SSymmetricMatrix { using Type = typename STORAGE::value_type; //! Get read only reference. - inline const SSymmetricMatrix &base() const { return *this; } + inline const SSymmetricMatrix& base() const { return *this; } //! Get writable reference. - inline SSymmetricMatrix &base() { return *this; } + inline SSymmetricMatrix& base() { return *this; } //! Set this vector equal to \p other. template - void assign(const SSymmetricMatrix &other) - { - std::copy(other.m_LowerTriangle.begin(), - other.m_LowerTriangle.end(), - m_LowerTriangle.begin()); + void assign(const SSymmetricMatrix& other) { + std::copy(other.m_LowerTriangle.begin(), other.m_LowerTriangle.end(), m_LowerTriangle.begin()); } //! Create from a delimited string. - bool fromDelimited(const std::string &str); + bool fromDelimited(const std::string& str); //! Convert to a delimited string. std::string toDelimited() const; //! Get the i,j 'th component (no bounds checking). - inline Type element(std::size_t i, std::size_t j) const - { - if (i < j) - { + inline Type element(std::size_t i, std::size_t j) const { + if (i < j) { std::swap(i, j); } return m_LowerTriangle[i * (i + 1) / 2 + j]; } //! Get the i,j 'th component (no bounds checking). - inline Type &element(std::size_t i, std::size_t j) - { - if (i < j) - { + inline Type& element(std::size_t i, std::size_t j) { + if (i < j) { std::swap(i, j); } return m_LowerTriangle[i * (i + 1) / 2 + j]; } //! Component-wise negative. - void negative() - { - for (std::size_t i = 0u; i < m_LowerTriangle.size(); ++i) - { + void negative() { + for (std::size_t i = 0u; i < m_LowerTriangle.size(); ++i) { m_LowerTriangle[i] = -m_LowerTriangle[i]; } } //! Matrix subtraction. - void minusEquals(const SSymmetricMatrix &rhs) - { - for (std::size_t i = 0u; i < m_LowerTriangle.size(); ++i) - { + void minusEquals(const SSymmetricMatrix& rhs) { + for (std::size_t i = 0u; i < m_LowerTriangle.size(); ++i) { m_LowerTriangle[i] -= rhs.m_LowerTriangle[i]; } } //! Matrix addition. - void plusEquals(const SSymmetricMatrix &rhs) - { - for (std::size_t i = 0u; i < m_LowerTriangle.size(); ++i) - { + void plusEquals(const SSymmetricMatrix& rhs) { + for (std::size_t i = 0u; i < m_LowerTriangle.size(); ++i) { m_LowerTriangle[i] += rhs.m_LowerTriangle[i]; } } //! Component-wise multiplication. - void multiplyEquals(const SSymmetricMatrix &rhs) - { - for (std::size_t i = 0u; i < m_LowerTriangle.size(); ++i) - { + void multiplyEquals(const SSymmetricMatrix& rhs) { + for (std::size_t i = 0u; i < m_LowerTriangle.size(); ++i) { m_LowerTriangle[i] *= rhs.m_LowerTriangle[i]; } } //! Scalar multiplication. - void multiplyEquals(Type scale) - { - for (std::size_t i = 0u; i < m_LowerTriangle.size(); ++i) - { + void multiplyEquals(Type scale) { + for (std::size_t i = 0u; i < m_LowerTriangle.size(); ++i) { m_LowerTriangle[i] *= scale; } } //! Scalar division. - void divideEquals(Type scale) - { - for (std::size_t i = 0u; i < m_LowerTriangle.size(); ++i) - { + void divideEquals(Type scale) { + for (std::size_t i = 0u; i < m_LowerTriangle.size(); ++i) { m_LowerTriangle[i] /= scale; } } //! Check if two matrices are identically equal. - bool equal(const SSymmetricMatrix &other) const - { - return m_LowerTriangle == other.m_LowerTriangle; - } + bool equal(const SSymmetricMatrix& other) const { return m_LowerTriangle == other.m_LowerTriangle; } //! Lexicographical total ordering. - bool less(const SSymmetricMatrix &rhs) const - { - return m_LowerTriangle < rhs.m_LowerTriangle; - } + bool less(const SSymmetricMatrix& rhs) const { return m_LowerTriangle < rhs.m_LowerTriangle; } //! Check if this is zero. - bool isZero() const - { - return std::find_if(m_LowerTriangle.begin(), m_LowerTriangle.end(), - [](double ei) { return ei != 0.0; }) == m_LowerTriangle.end(); + bool isZero() const { + return std::find_if(m_LowerTriangle.begin(), m_LowerTriangle.end(), [](double ei) { return ei != 0.0; }) == m_LowerTriangle.end(); } //! Get the matrix diagonal. template - VECTOR diagonal(std::size_t d) const - { + VECTOR diagonal(std::size_t d) const { VECTOR result(d); - for (std::size_t i = 0u; i < d; ++i) - { + for (std::size_t i = 0u; i < d; ++i) { result[i] = this->element(i, i); } return result; } //! Get the trace. - Type trace(std::size_t d) const - { + Type trace(std::size_t d) const { Type result(0); - for (std::size_t i = 0u; i < d; ++i) - { + for (std::size_t i = 0u; i < d; ++i) { result += this->element(i, i); } return result; } //! The Frobenius norm. - double frobenius(std::size_t d) const - { + double frobenius(std::size_t d) const { double result = 0.0; - for (std::size_t i = 0u, i_ = 0u; i < d; ++i, ++i_) - { - for (std::size_t j = 0u; j < i; ++j, ++i_) - { + for (std::size_t i = 0u, i_ = 0u; i < d; ++i, ++i_) { + for (std::size_t j = 0u; j < i; ++j, ++i_) { result += 2.0 * m_LowerTriangle[i_] * m_LowerTriangle[i_]; } result += m_LowerTriangle[i_] * m_LowerTriangle[i_]; @@ -200,26 +166,20 @@ struct SSymmetricMatrix //! Convert to the MATRIX representation. template - inline MATRIX &toType(std::size_t d, MATRIX &result) const - { - for (std::size_t i = 0u, i_ = 0u; i < d; ++i) - { - for (std::size_t j = 0u; j <= i; ++j, ++i_) - { - result(i,j) = result(j,i) = m_LowerTriangle[i_]; + inline MATRIX& toType(std::size_t d, MATRIX& result) const { + for (std::size_t i = 0u, i_ = 0u; i < d; ++i) { + for (std::size_t j = 0u; j <= i; ++j, ++i_) { + result(i, j) = result(j, i) = m_LowerTriangle[i_]; } } return result; } //! Get a checksum of the elements of this matrix. - uint64_t checksum() const - { + uint64_t checksum() const { uint64_t result = 0u; - for (std::size_t i = 0u; i < m_LowerTriangle.size(); ++i) - { - result = core::CHashing::hashCombine( - result, static_cast(m_LowerTriangle[i])); + for (std::size_t i = 0u; i < m_LowerTriangle.size(); ++i) { + result = core::CHashing::hashCombine(result, static_cast(m_LowerTriangle[i])); } return result; } @@ -229,7 +189,6 @@ struct SSymmetricMatrix } // linear_algebra_detail:: - // ************************ STACK SYMMETRIC MATRIX ************************ //! \brief A stack based lightweight dense symmetric matrix class. @@ -263,290 +222,227 @@ struct SSymmetricMatrix //! \tparam T The floating point type. //! \tparam N The matrix dimension. template -class CSymmetricMatrixNxN : private boost::equality_comparable< CSymmetricMatrixNxN, - boost::partially_ordered< CSymmetricMatrixNxN, - boost::addable< CSymmetricMatrixNxN, - boost::subtractable< CSymmetricMatrixNxN, - boost::multipliable< CSymmetricMatrixNxN, - boost::multipliable2< CSymmetricMatrixNxN, T, - boost::dividable2< CSymmetricMatrixNxN, T > > > > > > >, - private linear_algebra_detail::SSymmetricMatrix >, - private linear_algebra_detail::CBoundsCheck::InRange -{ - private: - using TBase = linear_algebra_detail::SSymmetricMatrix >; - template friend class CSymmetricMatrixNxN; - - public: - using TArray = T[N][N]; - using TVec = std::vector; - using TVecVec = std::vector; - using TConstIterator = typename boost::array::const_iterator; - - public: - //! See core::CMemory. - static bool dynamicSizeAlwaysZero() - { - return core::memory_detail::SDynamicSizeAlwaysZero::value(); - } - - public: - //! Set to multiple of ones matrix. - explicit CSymmetricMatrixNxN(T v = T(0)) - { - std::fill_n(&TBase::m_LowerTriangle[0], N * (N + 1) / 2, v); - } - - //! Construct from C-style array of arrays. - explicit CSymmetricMatrixNxN(const TArray &m) - { - for (std::size_t i = 0u, i_ = 0u; i < N; ++i) - { - for (std::size_t j = 0u; j <= i; ++j, ++i_) - { - TBase::m_LowerTriangle[i_] = m[i][j]; - } +class CSymmetricMatrixNxN + : private boost::equality_comparable< + CSymmetricMatrixNxN, + boost::partially_ordered< + CSymmetricMatrixNxN, + boost::addable< + CSymmetricMatrixNxN, + boost::subtractable< + CSymmetricMatrixNxN, + boost::multipliable< + CSymmetricMatrixNxN, + boost::multipliable2, T, boost::dividable2, T>>>>>>>, + private linear_algebra_detail::SSymmetricMatrix>, + private linear_algebra_detail::CBoundsCheck::InRange { +private: + using TBase = linear_algebra_detail::SSymmetricMatrix>; + template + friend class CSymmetricMatrixNxN; + +public: + using TArray = T[N][N]; + using TVec = std::vector; + using TVecVec = std::vector; + using TConstIterator = typename boost::array::const_iterator; + +public: + //! See core::CMemory. + static bool dynamicSizeAlwaysZero() { return core::memory_detail::SDynamicSizeAlwaysZero::value(); } + +public: + //! Set to multiple of ones matrix. + explicit CSymmetricMatrixNxN(T v = T(0)) { std::fill_n(&TBase::m_LowerTriangle[0], N * (N + 1) / 2, v); } + + //! Construct from C-style array of arrays. + explicit CSymmetricMatrixNxN(const TArray& m) { + for (std::size_t i = 0u, i_ = 0u; i < N; ++i) { + for (std::size_t j = 0u; j <= i; ++j, ++i_) { + TBase::m_LowerTriangle[i_] = m[i][j]; } } + } - //! Construct from a vector of vectors. - explicit CSymmetricMatrixNxN(const TVecVec &m) - { - for (std::size_t i = 0u, i_ = 0u; i < N; ++i) - { - for (std::size_t j = 0u; j <= i; ++j, ++i_) - { - TBase::m_LowerTriangle[i_] = m[i][j]; - } + //! Construct from a vector of vectors. + explicit CSymmetricMatrixNxN(const TVecVec& m) { + for (std::size_t i = 0u, i_ = 0u; i < N; ++i) { + for (std::size_t j = 0u; j <= i; ++j, ++i_) { + TBase::m_LowerTriangle[i_] = m[i][j]; } } + } - //! Construct from a small vector of small vectors. - template - explicit CSymmetricMatrixNxN(const core::CSmallVectorBase> &m) - { - for (std::size_t i = 0u, i_ = 0u; i < N; ++i) - { - for (std::size_t j = 0u; j <= i; ++j, ++i_) - { - TBase::m_LowerTriangle[i_] = m[i][j]; - } + //! Construct from a small vector of small vectors. + template + explicit CSymmetricMatrixNxN(const core::CSmallVectorBase>& m) { + for (std::size_t i = 0u, i_ = 0u; i < N; ++i) { + for (std::size_t j = 0u; j <= i; ++j, ++i_) { + TBase::m_LowerTriangle[i_] = m[i][j]; } } + } - //! Construct from a forward iterator. - //! - //! \warning The user must ensure that the range iterated has - //! at least N (N+1) / 2 items. - template - CSymmetricMatrixNxN(ITR begin, ITR end) - { - for (std::size_t i = 0u; - i < N * (N + 1) / 2 && begin != end; - ++i, ++begin) - { - TBase::m_LowerTriangle[i] = static_cast(*begin); - } + //! Construct from a forward iterator. + //! + //! \warning The user must ensure that the range iterated has + //! at least N (N+1) / 2 items. + template + CSymmetricMatrixNxN(ITR begin, ITR end) { + for (std::size_t i = 0u; i < N * (N + 1) / 2 && begin != end; ++i, ++begin) { + TBase::m_LowerTriangle[i] = static_cast(*begin); } + } - explicit CSymmetricMatrixNxN(ESymmetricMatrixType type, const CVectorNx1 &x); - - //! Construct from a dense matrix. - template - CSymmetricMatrixNxN(const CDenseMatrixInitializer &m); - - //! Copy construction if the underlying type is implicitly - //! convertible. - template - CSymmetricMatrixNxN(const CSymmetricMatrixNxN &other) - { - this->operator=(other); - } + explicit CSymmetricMatrixNxN(ESymmetricMatrixType type, const CVectorNx1& x); - //! Assignment if the underlying type is implicitly convertible. - template - const CSymmetricMatrixNxN &operator=(const CSymmetricMatrixNxN &other) - { - this->assign(other.base()); - return *this; - } + //! Construct from a dense matrix. + template + CSymmetricMatrixNxN(const CDenseMatrixInitializer& m); - //! \name Persistence - //@{ - //! Create from a delimited string. - bool fromDelimited(const std::string &str) - { - return this->TBase::fromDelimited(str); - } + //! Copy construction if the underlying type is implicitly + //! convertible. + template + CSymmetricMatrixNxN(const CSymmetricMatrixNxN& other) { + this->operator=(other); + } - //! Convert to a delimited string. - std::string toDelimited() const - { - return this->TBase::toDelimited(); - } - //@} + //! Assignment if the underlying type is implicitly convertible. + template + const CSymmetricMatrixNxN& operator=(const CSymmetricMatrixNxN& other) { + this->assign(other.base()); + return *this; + } - //! Get the number of rows. - std::size_t rows() const { return N; } + //! \name Persistence + //@{ + //! Create from a delimited string. + bool fromDelimited(const std::string& str) { return this->TBase::fromDelimited(str); } - //! Get the number of columns. - std::size_t columns() const { return N; } + //! Convert to a delimited string. + std::string toDelimited() const { return this->TBase::toDelimited(); } + //@} - //! Get the i,j 'th component (no bounds checking). - inline T operator()(std::size_t i, std::size_t j) const - { - return this->element(i, j); - } + //! Get the number of rows. + std::size_t rows() const { return N; } - //! Get the i,j 'th component (no bounds checking). - inline T &operator()(std::size_t i, std::size_t j) - { - return this->element(i, j); - } + //! Get the number of columns. + std::size_t columns() const { return N; } - //! Get an iterator over the elements. - TConstIterator begin() const { return TBase::m_LowerTriangle.begin(); } + //! Get the i,j 'th component (no bounds checking). + inline T operator()(std::size_t i, std::size_t j) const { return this->element(i, j); } - //! Get an iterator to the end of the elements. - TConstIterator end() const { return TBase::m_LowerTriangle.end(); } + //! Get the i,j 'th component (no bounds checking). + inline T& operator()(std::size_t i, std::size_t j) { return this->element(i, j); } - //! Component-wise negation. - CSymmetricMatrixNxN operator-() const - { - CSymmetricMatrixNxN result(*this); - result.negative(); - return result; - } + //! Get an iterator over the elements. + TConstIterator begin() const { return TBase::m_LowerTriangle.begin(); } - //! Matrix subtraction. - const CSymmetricMatrixNxN &operator-=(const CSymmetricMatrixNxN &rhs) - { - this->minusEquals(rhs.base()); - return *this; - } + //! Get an iterator to the end of the elements. + TConstIterator end() const { return TBase::m_LowerTriangle.end(); } - //! Matrix addition. - const CSymmetricMatrixNxN &operator+=(const CSymmetricMatrixNxN &rhs) - { - this->plusEquals(rhs.base()); - return *this; - } + //! Component-wise negation. + CSymmetricMatrixNxN operator-() const { + CSymmetricMatrixNxN result(*this); + result.negative(); + return result; + } - //! Component-wise multiplication. - //! - //! \note This is handy in some cases and since symmetric matrices - //! are not closed under regular matrix multiplication we use - //! multiplication operator for implementing the Hadamard product. - const CSymmetricMatrixNxN &operator*=(const CSymmetricMatrixNxN &rhs) - { - this->multiplyEquals(rhs); - return *this; - } + //! Matrix subtraction. + const CSymmetricMatrixNxN& operator-=(const CSymmetricMatrixNxN& rhs) { + this->minusEquals(rhs.base()); + return *this; + } - //! Scalar multiplication. - const CSymmetricMatrixNxN &operator*=(T scale) - { - this->multiplyEquals(scale); - return *this; - } + //! Matrix addition. + const CSymmetricMatrixNxN& operator+=(const CSymmetricMatrixNxN& rhs) { + this->plusEquals(rhs.base()); + return *this; + } - //! Scalar division. - const CSymmetricMatrixNxN &operator/=(T scale) - { - this->divideEquals(scale); - return *this; - } + //! Component-wise multiplication. + //! + //! \note This is handy in some cases and since symmetric matrices + //! are not closed under regular matrix multiplication we use + //! multiplication operator for implementing the Hadamard product. + const CSymmetricMatrixNxN& operator*=(const CSymmetricMatrixNxN& rhs) { + this->multiplyEquals(rhs); + return *this; + } - // Matrix multiplication doesn't necessarily produce a symmetric - // matrix because matrix multiplication is non-commutative. - // Matrix division requires computing the inverse and is not - // supported. + //! Scalar multiplication. + const CSymmetricMatrixNxN& operator*=(T scale) { + this->multiplyEquals(scale); + return *this; + } - //! Check if two matrices are identically equal. - bool operator==(const CSymmetricMatrixNxN &other) const - { - return this->equal(other.base()); - } + //! Scalar division. + const CSymmetricMatrixNxN& operator/=(T scale) { + this->divideEquals(scale); + return *this; + } - //! Lexicographical total ordering. - bool operator<(const CSymmetricMatrixNxN &rhs) const - { - return this->less(rhs.base()); - } + // Matrix multiplication doesn't necessarily produce a symmetric + // matrix because matrix multiplication is non-commutative. + // Matrix division requires computing the inverse and is not + // supported. - //! Check if this is zero. - bool isZero() const - { - return this->TBase::isZero(); - } + //! Check if two matrices are identically equal. + bool operator==(const CSymmetricMatrixNxN& other) const { return this->equal(other.base()); } - //! Get the matrix diagonal. - template - VECTOR diagonal() const - { - return this->TBase::template diagonal(N); - } + //! Lexicographical total ordering. + bool operator<(const CSymmetricMatrixNxN& rhs) const { return this->less(rhs.base()); } - //! Get the trace. - T trace() const - { - return this->TBase::trace(N); - } + //! Check if this is zero. + bool isZero() const { return this->TBase::isZero(); } - //! Get the Frobenius norm. - double frobenius() const - { - return this->TBase::frobenius(N); - } + //! Get the matrix diagonal. + template + VECTOR diagonal() const { + return this->TBase::template diagonal(N); + } - //! Convert to a vector of vectors. - template - inline VECTOR_OF_VECTORS toVectors() const - { - VECTOR_OF_VECTORS result(N); - for (std::size_t i = 0u; i < N; ++i) - { - result[i].resize(N); - } - for (std::size_t i = 0u; i < N; ++i) - { - result[i][i] = this->operator()(i, i); - for (std::size_t j = 0u; j < i; ++j) - { - result[i][j] = result[j][i] = this->operator()(i, j); - } + //! Get the trace. + T trace() const { return this->TBase::trace(N); } + + //! Get the Frobenius norm. + double frobenius() const { return this->TBase::frobenius(N); } + + //! Convert to a vector of vectors. + template + inline VECTOR_OF_VECTORS toVectors() const { + VECTOR_OF_VECTORS result(N); + for (std::size_t i = 0u; i < N; ++i) { + result[i].resize(N); + } + for (std::size_t i = 0u; i < N; ++i) { + result[i][i] = this->operator()(i, i); + for (std::size_t j = 0u; j < i; ++j) { + result[i][j] = result[j][i] = this->operator()(i, j); } - return result; } + return result; + } - //! Convert to the specified matrix representation. - //! - //! \note The copy should be avoided by RVO. - template - inline MATRIX toType() const - { - MATRIX result(N, N); - return this->TBase::toType(N, result); - } + //! Convert to the specified matrix representation. + //! + //! \note The copy should be avoided by RVO. + template + inline MATRIX toType() const { + MATRIX result(N, N); + return this->TBase::toType(N, result); + } - //! Get a checksum for the matrix. - uint64_t checksum() const - { - return this->TBase::checksum(); - } + //! Get a checksum for the matrix. + uint64_t checksum() const { return this->TBase::checksum(); } }; //! \brief Gets a zero symmetric matrix with specified dimension. template -struct SZero> -{ - static CSymmetricMatrixNxN get(std::size_t /*dimension*/) - { - return CSymmetricMatrixNxN(T(0)); - } +struct SZero> { + static CSymmetricMatrixNxN get(std::size_t /*dimension*/) { return CSymmetricMatrixNxN(T(0)); } }; - // ************************ HEAP SYMMETRIC MATRIX ************************ //! \brief A heap based lightweight dense symmetric matrix class. @@ -579,412 +475,331 @@ struct SZero> //! //! \tparam T The floating point type. template -class CSymmetricMatrix : private boost::equality_comparable< CSymmetricMatrix, - boost::partially_ordered< CSymmetricMatrix, - boost::addable< CSymmetricMatrix, - boost::subtractable< CSymmetricMatrix, - boost::multipliable< CSymmetricMatrix, - boost::multipliable2< CSymmetricMatrix, T, - boost::dividable2< CSymmetricMatrix, T > > > > > > >, - private linear_algebra_detail::SSymmetricMatrix > -{ - private: - using TBase = linear_algebra_detail::SSymmetricMatrix >; - template friend class CSymmetricMatrix; - - public: - using TArray = std::vector >; - using TConstIterator = typename std::vector::const_iterator; - - public: - //! Set to multiple of ones matrix. - explicit CSymmetricMatrix(std::size_t d = 0u, T v = T(0)) : m_D(d) - { - if (d > 0) - { - TBase::m_LowerTriangle.resize(d * (d + 1) / 2, v); - } +class CSymmetricMatrix + : private boost::equality_comparable< + CSymmetricMatrix, + boost::partially_ordered< + CSymmetricMatrix, + boost::addable< + CSymmetricMatrix, + boost::subtractable< + CSymmetricMatrix, + boost::multipliable, + boost::multipliable2, T, boost::dividable2, T>>>>>>>, + private linear_algebra_detail::SSymmetricMatrix> { +private: + using TBase = linear_algebra_detail::SSymmetricMatrix>; + template + friend class CSymmetricMatrix; + +public: + using TArray = std::vector>; + using TConstIterator = typename std::vector::const_iterator; + +public: + //! Set to multiple of ones matrix. + explicit CSymmetricMatrix(std::size_t d = 0u, T v = T(0)) : m_D(d) { + if (d > 0) { + TBase::m_LowerTriangle.resize(d * (d + 1) / 2, v); } + } - //! Construct from C-style array of arrays. - explicit CSymmetricMatrix(const TArray &m) : m_D(m.size()) - { - TBase::m_LowerTriangle.resize(m_D * (m_D + 1) / 2); - for (std::size_t i = 0u, i_ = 0u; i < m_D; ++i) - { - for (std::size_t j = 0u; j <= i; ++j, ++i_) - { - TBase::m_LowerTriangle[i_] = m[i][j]; - } + //! Construct from C-style array of arrays. + explicit CSymmetricMatrix(const TArray& m) : m_D(m.size()) { + TBase::m_LowerTriangle.resize(m_D * (m_D + 1) / 2); + for (std::size_t i = 0u, i_ = 0u; i < m_D; ++i) { + for (std::size_t j = 0u; j <= i; ++j, ++i_) { + TBase::m_LowerTriangle[i_] = m[i][j]; } } + } - //! Construct from a small vector of small vectors. - template - explicit CSymmetricMatrix(const core::CSmallVectorBase> &m) : m_D(m.size()) - { - TBase::m_LowerTriangle.resize(m_D * (m_D + 1) / 2); - for (std::size_t i = 0u, i_ = 0u; i < m_D; ++i) - { - for (std::size_t j = 0u; j <= i; ++j, ++i_) - { - TBase::m_LowerTriangle[i_] = m[i][j]; - } + //! Construct from a small vector of small vectors. + template + explicit CSymmetricMatrix(const core::CSmallVectorBase>& m) : m_D(m.size()) { + TBase::m_LowerTriangle.resize(m_D * (m_D + 1) / 2); + for (std::size_t i = 0u, i_ = 0u; i < m_D; ++i) { + for (std::size_t j = 0u; j <= i; ++j, ++i_) { + TBase::m_LowerTriangle[i_] = m[i][j]; } } + } - //! Construct from a forward iterator. - //! - //! \warning The user must ensure that the range iterated has - //! at least N (N+1) / 2 items. - template - CSymmetricMatrix(ITR begin, ITR end) - { - m_D = this->dimension(std::distance(begin, end)); - TBase::m_LowerTriangle.resize(m_D * (m_D + 1) / 2); - for (std::size_t i = 0u; - i < m_D * (m_D + 1) / 2 && begin != end; - ++i, ++begin) - { - TBase::m_LowerTriangle[i] = static_cast(*begin); - } + //! Construct from a forward iterator. + //! + //! \warning The user must ensure that the range iterated has + //! at least N (N+1) / 2 items. + template + CSymmetricMatrix(ITR begin, ITR end) { + m_D = this->dimension(std::distance(begin, end)); + TBase::m_LowerTriangle.resize(m_D * (m_D + 1) / 2); + for (std::size_t i = 0u; i < m_D * (m_D + 1) / 2 && begin != end; ++i, ++begin) { + TBase::m_LowerTriangle[i] = static_cast(*begin); } + } - explicit CSymmetricMatrix(ESymmetricMatrixType type, const CVector &x); + explicit CSymmetricMatrix(ESymmetricMatrixType type, const CVector& x); - //! Construct from a dense matrix. - template - CSymmetricMatrix(const CDenseMatrixInitializer &m); + //! Construct from a dense matrix. + template + CSymmetricMatrix(const CDenseMatrixInitializer& m); - //! Copy construction if the underlying type is implicitly - //! convertible. - template - CSymmetricMatrix(const CSymmetricMatrix &other) : m_D(other.m_D) - { - this->operator=(other); - } + //! Copy construction if the underlying type is implicitly + //! convertible. + template + CSymmetricMatrix(const CSymmetricMatrix& other) : m_D(other.m_D) { + this->operator=(other); + } - //! Assignment if the underlying type is implicitly convertible. - template - const CSymmetricMatrix &operator=(const CSymmetricMatrix &other) - { - m_D = other.m_D; - TBase::m_LowerTriangle.resize(m_D * (m_D + 1) / 2); - this->assign(other.base()); - return *this; - } + //! Assignment if the underlying type is implicitly convertible. + template + const CSymmetricMatrix& operator=(const CSymmetricMatrix& other) { + m_D = other.m_D; + TBase::m_LowerTriangle.resize(m_D * (m_D + 1) / 2); + this->assign(other.base()); + return *this; + } - //! Efficiently swap the contents of two matrices. - void swap(CSymmetricMatrix &other) - { - std::swap(m_D, other.m_D); - TBase::m_LowerTriangle.swap(other.TBase::m_LowerTriangle); - } + //! Efficiently swap the contents of two matrices. + void swap(CSymmetricMatrix& other) { + std::swap(m_D, other.m_D); + TBase::m_LowerTriangle.swap(other.TBase::m_LowerTriangle); + } - //! \name Persistence - //@{ - //! Create from a delimited string. - bool fromDelimited(const std::string &str) - { - if (this->TBase::fromDelimited(str)) - { - m_D = this->dimension(TBase::m_X.size()); - return true; - } - return false; + //! \name Persistence + //@{ + //! Create from a delimited string. + bool fromDelimited(const std::string& str) { + if (this->TBase::fromDelimited(str)) { + m_D = this->dimension(TBase::m_X.size()); + return true; } + return false; + } - //! Convert to a delimited string. - std::string toDelimited() const - { - return this->TBase::toDelimited(); - } - //@} + //! Convert to a delimited string. + std::string toDelimited() const { return this->TBase::toDelimited(); } + //@} - //! Get the number of rows. - std::size_t rows() const { return m_D; } + //! Get the number of rows. + std::size_t rows() const { return m_D; } - //! Get the number of columns. - std::size_t columns() const { return m_D; } + //! Get the number of columns. + std::size_t columns() const { return m_D; } - //! Get the i,j 'th component (no bounds checking). - inline T operator()(std::size_t i, std::size_t j) const - { - return this->element(i, j); - } + //! Get the i,j 'th component (no bounds checking). + inline T operator()(std::size_t i, std::size_t j) const { return this->element(i, j); } - //! Get the i,j 'th component (no bounds checking). - inline T &operator()(std::size_t i, std::size_t j) - { - return this->element(i, j); - } + //! Get the i,j 'th component (no bounds checking). + inline T& operator()(std::size_t i, std::size_t j) { return this->element(i, j); } - //! Get an iterator over the elements. - TConstIterator begin() const { return TBase::m_X.begin(); } + //! Get an iterator over the elements. + TConstIterator begin() const { return TBase::m_X.begin(); } - //! Get an iterator to the end of the elements. - TConstIterator end() const { return TBase::m_X.end(); } + //! Get an iterator to the end of the elements. + TConstIterator end() const { return TBase::m_X.end(); } - //! Component-wise negation. - CSymmetricMatrix operator-() const - { - CSymmetricMatrix result(*this); - result.negative(); - return result; - } - - //! Matrix subtraction. - const CSymmetricMatrix &operator-=(const CSymmetricMatrix &rhs) - { - this->minusEquals(rhs.base()); - return *this; - } + //! Component-wise negation. + CSymmetricMatrix operator-() const { + CSymmetricMatrix result(*this); + result.negative(); + return result; + } - //! Matrix addition. - const CSymmetricMatrix &operator+=(const CSymmetricMatrix &rhs) - { - this->plusEquals(rhs.base()); - return *this; - } + //! Matrix subtraction. + const CSymmetricMatrix& operator-=(const CSymmetricMatrix& rhs) { + this->minusEquals(rhs.base()); + return *this; + } - //! Component-wise multiplication. - //! - //! \note This is handy in some cases and since symmetric matrices - //! are not closed under regular matrix multiplication we use - //! multiplication operator for implementing the Hadamard product. - const CSymmetricMatrix &operator*=(const CSymmetricMatrix &rhs) - { - this->multiplyEquals(rhs); - return *this; - } + //! Matrix addition. + const CSymmetricMatrix& operator+=(const CSymmetricMatrix& rhs) { + this->plusEquals(rhs.base()); + return *this; + } - //! Scalar multiplication. - const CSymmetricMatrix &operator*=(T scale) - { - this->multiplyEquals(scale); - return *this; - } + //! Component-wise multiplication. + //! + //! \note This is handy in some cases and since symmetric matrices + //! are not closed under regular matrix multiplication we use + //! multiplication operator for implementing the Hadamard product. + const CSymmetricMatrix& operator*=(const CSymmetricMatrix& rhs) { + this->multiplyEquals(rhs); + return *this; + } - //! Scalar division. - const CSymmetricMatrix &operator/=(T scale) - { - this->divideEquals(scale); - return *this; - } + //! Scalar multiplication. + const CSymmetricMatrix& operator*=(T scale) { + this->multiplyEquals(scale); + return *this; + } - // Matrix multiplication doesn't necessarily produce a symmetric - // matrix because matrix multiplication is non-commutative. - // Matrix division requires computing the inverse and is not - // supported. + //! Scalar division. + const CSymmetricMatrix& operator/=(T scale) { + this->divideEquals(scale); + return *this; + } - //! Check if two matrices are identically equal. - bool operator==(const CSymmetricMatrix &other) const - { - return this->equal(other.base()); - } + // Matrix multiplication doesn't necessarily produce a symmetric + // matrix because matrix multiplication is non-commutative. + // Matrix division requires computing the inverse and is not + // supported. - //! Lexicographical total ordering. - bool operator<(const CSymmetricMatrix &rhs) const - { - return this->less(rhs.base()); - } + //! Check if two matrices are identically equal. + bool operator==(const CSymmetricMatrix& other) const { return this->equal(other.base()); } - //! Check if this is zero. - bool isZero() const - { - return this->TBase::isZero(); - } + //! Lexicographical total ordering. + bool operator<(const CSymmetricMatrix& rhs) const { return this->less(rhs.base()); } - //! Get the matrix diagonal. - template - VECTOR diagonal() const - { - return this->TBase::template diagonal(m_D); - } + //! Check if this is zero. + bool isZero() const { return this->TBase::isZero(); } - //! Get the trace. - T trace() const - { - return this->TBase::trace(m_D); - } + //! Get the matrix diagonal. + template + VECTOR diagonal() const { + return this->TBase::template diagonal(m_D); + } - //! The Frobenius norm. - double frobenius() const - { - return this->TBase::frobenius(m_D); - } + //! Get the trace. + T trace() const { return this->TBase::trace(m_D); } - //! Convert to a vector of vectors. - template - inline VECTOR_OF_VECTORS toVectors() const - { - VECTOR_OF_VECTORS result(m_D); - for (std::size_t i = 0u; i < m_D; ++i) - { - result[i].resize(m_D); - } - for (std::size_t i = 0u; i < m_D; ++i) - { - result[i][i] = this->operator()(i, i); - for (std::size_t j = 0u; j < i; ++j) - { - result[i][j] = result[j][i] = this->operator()(i, j); - } + //! The Frobenius norm. + double frobenius() const { return this->TBase::frobenius(m_D); } + + //! Convert to a vector of vectors. + template + inline VECTOR_OF_VECTORS toVectors() const { + VECTOR_OF_VECTORS result(m_D); + for (std::size_t i = 0u; i < m_D; ++i) { + result[i].resize(m_D); + } + for (std::size_t i = 0u; i < m_D; ++i) { + result[i][i] = this->operator()(i, i); + for (std::size_t j = 0u; j < i; ++j) { + result[i][j] = result[j][i] = this->operator()(i, j); } - return result; } + return result; + } - //! Convert to the specified matrix representation. - //! - //! \note The copy should be avoided by RVO. - template - inline MATRIX toType() const - { - MATRIX result(m_D, m_D); - return this->TBase::toType(m_D, result); - } + //! Convert to the specified matrix representation. + //! + //! \note The copy should be avoided by RVO. + template + inline MATRIX toType() const { + MATRIX result(m_D, m_D); + return this->TBase::toType(m_D, result); + } - //! Get a checksum for the matrix. - uint64_t checksum() const - { - return core::CHashing::hashCombine(this->TBase::checksum(), - static_cast(m_D)); - } + //! Get a checksum for the matrix. + uint64_t checksum() const { return core::CHashing::hashCombine(this->TBase::checksum(), static_cast(m_D)); } - private: - //! Compute the dimension from the number of elements. - std::size_t dimension(std::size_t n) const - { - return static_cast( - (std::sqrt(8.0 * static_cast(n) + 1.0) - 1.0) / 2.0 + 0.5); - } +private: + //! Compute the dimension from the number of elements. + std::size_t dimension(std::size_t n) const { + return static_cast((std::sqrt(8.0 * static_cast(n) + 1.0) - 1.0) / 2.0 + 0.5); + } - private: - //! The rows (and columns) of this matrix. - std::size_t m_D; +private: + //! The rows (and columns) of this matrix. + std::size_t m_D; }; //! \brief Gets a zero symmetric matrix with specified dimension. template -struct SZero> -{ - static CSymmetricMatrix get(std::size_t dimension) - { - return CSymmetricMatrix(dimension, T(0)); - } +struct SZero> { + static CSymmetricMatrix get(std::size_t dimension) { return CSymmetricMatrix(dimension, T(0)); } }; - -namespace linear_algebra_detail -{ +namespace linear_algebra_detail { //! \brief Common vector functionality for variable storage type. template -struct SVector -{ +struct SVector { using Type = typename STORAGE::value_type; //! Get read only reference. - inline const SVector &base() const { return *this; } + inline const SVector& base() const { return *this; } //! Get writable reference. - inline SVector &base() { return *this; } + inline SVector& base() { return *this; } //! Set this vector equal to \p other. template - void assign(const SVector &other) - { + void assign(const SVector& other) { std::copy(other.m_X.begin(), other.m_X.end(), m_X.begin()); } //! Create from delimited values. - bool fromDelimited(const std::string &str); + bool fromDelimited(const std::string& str); //! Convert to a delimited string. std::string toDelimited() const; //! Component-wise negative. - void negative() - { - for (std::size_t i = 0u; i < m_X.size(); ++i) - { + void negative() { + for (std::size_t i = 0u; i < m_X.size(); ++i) { m_X[i] = -m_X[i]; } } //! Vector subtraction. - void minusEquals(const SVector &rhs) - { - for (std::size_t i = 0u; i < m_X.size(); ++i) - { + void minusEquals(const SVector& rhs) { + for (std::size_t i = 0u; i < m_X.size(); ++i) { m_X[i] -= rhs.m_X[i]; } } //! Vector addition. - void plusEquals(const SVector &rhs) - { - for (std::size_t i = 0u; i < m_X.size(); ++i) - { + void plusEquals(const SVector& rhs) { + for (std::size_t i = 0u; i < m_X.size(); ++i) { m_X[i] += rhs.m_X[i]; } } //! Component-wise multiplication. - void multiplyEquals(const SVector &scale) - { - for (std::size_t i = 0u; i < m_X.size(); ++i) - { + void multiplyEquals(const SVector& scale) { + for (std::size_t i = 0u; i < m_X.size(); ++i) { m_X[i] *= scale.m_X[i]; } } //! Scalar multiplication. - void multiplyEquals(Type scale) - { - for (std::size_t i = 0u; i < m_X.size(); ++i) - { + void multiplyEquals(Type scale) { + for (std::size_t i = 0u; i < m_X.size(); ++i) { m_X[i] *= scale; } } //! Component-wise division. - void divideEquals(const SVector &scale) - { - for (std::size_t i = 0u; i < m_X.size(); ++i) - { + void divideEquals(const SVector& scale) { + for (std::size_t i = 0u; i < m_X.size(); ++i) { m_X[i] /= scale.m_X[i]; } } //! Scalar division. - void divideEquals(Type scale) - { - for (std::size_t i = 0u; i < m_X.size(); ++i) - { + void divideEquals(Type scale) { + for (std::size_t i = 0u; i < m_X.size(); ++i) { m_X[i] /= scale; } } //! Compare this and \p other for equality. - bool equal(const SVector &other) const { return m_X == other.m_X; } + bool equal(const SVector& other) const { return m_X == other.m_X; } //! Lexicographical total ordering. - bool less(const SVector &rhs) const { return m_X < rhs.m_X; } + bool less(const SVector& rhs) const { return m_X < rhs.m_X; } //! Check if this is zero. - bool isZero() const - { - return std::find_if(m_X.begin(), m_X.end(), - [](double xi) { return xi != 0.0; }) == m_X.end(); + bool isZero() const { + return std::find_if(m_X.begin(), m_X.end(), [](double xi) { return xi != 0.0; }) == m_X.end(); } //! Inner product. - double inner(const SVector &covector) const - { + double inner(const SVector& covector) const { double result = 0.0; - for (std::size_t i = 0u; i < m_X.size(); ++i) - { + for (std::size_t i = 0u; i < m_X.size(); ++i) { result += m_X[i] * covector.m_X[i]; } return result; @@ -992,22 +807,18 @@ struct SVector //! Inner product. template - double inner(const VECTOR &covector) const - { + double inner(const VECTOR& covector) const { double result = 0.0; - for (std::size_t i = 0u; i < m_X.size(); ++i) - { + for (std::size_t i = 0u; i < m_X.size(); ++i) { result += m_X[i] * covector(i); } return result; } //! The L1 norm of the vector. - double L1() const - { + double L1() const { double result = 0.0; - for (std::size_t i = 0u; i < m_X.size(); ++i) - { + for (std::size_t i = 0u; i < m_X.size(); ++i) { result += std::fabs(static_cast(m_X[i])); } return result; @@ -1015,23 +826,18 @@ struct SVector //! Convert to the VECTOR representation. template - inline VECTOR &toType(VECTOR &result) const - { - for (std::size_t i = 0u; i < m_X.size(); ++i) - { + inline VECTOR& toType(VECTOR& result) const { + for (std::size_t i = 0u; i < m_X.size(); ++i) { result(i) = m_X[i]; } return result; } //! Get a checksum of the components of this vector. - uint64_t checksum() const - { + uint64_t checksum() const { uint64_t result = static_cast(m_X[0]); - for (std::size_t i = 1u; i < m_X.size(); ++i) - { - result = core::CHashing::hashCombine( - result, static_cast(m_X[i])); + for (std::size_t i = 1u; i < m_X.size(); ++i) { + result = core::CHashing::hashCombine(result, static_cast(m_X[i])); } return result; } @@ -1042,7 +848,6 @@ struct SVector } // linear_algebra_detail:: - // ************************ STACK VECTOR ************************ //! \brief A stack based lightweight dense vector class. @@ -1069,329 +874,254 @@ struct SVector //! \tparam T The floating point type. //! \tparam N The vector dimension. template -class CVectorNx1 : private boost::equality_comparable< CVectorNx1, - boost::partially_ordered< CVectorNx1, - boost::addable< CVectorNx1, - boost::subtractable< CVectorNx1, - boost::multipliable< CVectorNx1, - boost::multipliable2< CVectorNx1, T, - boost::dividable< CVectorNx1, - boost::dividable2< CVectorNx1, T > > > > > > > >, - private linear_algebra_detail::SVector >, - private linear_algebra_detail::CBoundsCheck::InRange -{ - private: - using TBase = linear_algebra_detail::SVector >; - template friend class CVectorNx1; - - public: - using TArray = T[N]; - using TVec = std::vector; - using TBoostArray = boost::array; - using TConstIterator = typename TBoostArray::const_iterator; - - public: - //! See core::CMemory. - static bool dynamicSizeAlwaysZero() - { - return core::memory_detail::SDynamicSizeAlwaysZero::value(); - } - - public: - //! Set to multiple of ones vector. - explicit CVectorNx1(T v = T(0)) - { - std::fill_n(&TBase::m_X[0], N, v); - } - - //! Construct from a C-style array. - explicit CVectorNx1(const TArray &v) - { - for (std::size_t i = 0u; i < N; ++i) - { - TBase::m_X[i] = v[i]; - } +class CVectorNx1 + : private boost::equality_comparable< + CVectorNx1, + boost::partially_ordered< + CVectorNx1, + boost::addable, + boost::subtractable< + CVectorNx1, + boost::multipliable< + CVectorNx1, + boost::multipliable2, + T, + boost::dividable, boost::dividable2, T>>>>>>>>, + private linear_algebra_detail::SVector>, + private linear_algebra_detail::CBoundsCheck::InRange { +private: + using TBase = linear_algebra_detail::SVector>; + template + friend class CVectorNx1; + +public: + using TArray = T[N]; + using TVec = std::vector; + using TBoostArray = boost::array; + using TConstIterator = typename TBoostArray::const_iterator; + +public: + //! See core::CMemory. + static bool dynamicSizeAlwaysZero() { return core::memory_detail::SDynamicSizeAlwaysZero::value(); } + +public: + //! Set to multiple of ones vector. + explicit CVectorNx1(T v = T(0)) { std::fill_n(&TBase::m_X[0], N, v); } + + //! Construct from a C-style array. + explicit CVectorNx1(const TArray& v) { + for (std::size_t i = 0u; i < N; ++i) { + TBase::m_X[i] = v[i]; } + } - //! Construct from a boost array. - explicit CVectorNx1(const boost::array &a) - { - for (std::size_t i = 0u; i < N; ++i) - { - TBase::m_X[i] = a[i]; - } + //! Construct from a boost array. + explicit CVectorNx1(const boost::array& a) { + for (std::size_t i = 0u; i < N; ++i) { + TBase::m_X[i] = a[i]; } + } - //! Construct from a vector. - explicit CVectorNx1(const TVec &v) - { - for (std::size_t i = 0u; i < N; ++i) - { - TBase::m_X[i] = v[i]; - } + //! Construct from a vector. + explicit CVectorNx1(const TVec& v) { + for (std::size_t i = 0u; i < N; ++i) { + TBase::m_X[i] = v[i]; } + } - //! Construct from a vector. - explicit CVectorNx1(const core::CSmallVectorBase &v) - { - for (std::size_t i = 0u; i < N; ++i) - { - TBase::m_X[i] = v[i]; - } + //! Construct from a vector. + explicit CVectorNx1(const core::CSmallVectorBase& v) { + for (std::size_t i = 0u; i < N; ++i) { + TBase::m_X[i] = v[i]; } + } - //! Construct from a forward iterator. - //! - //! \warning The user must ensure that the range iterated has - //! at least N items. - template - CVectorNx1(ITR begin, ITR end) - { - if (std::distance(begin, end) != N) - { - LOG_ERROR("Bad range"); - return; - } - std::copy(begin, end, &TBase::m_X[0]); - } + //! Construct from a forward iterator. + //! + //! \warning The user must ensure that the range iterated has + //! at least N items. + template + CVectorNx1(ITR begin, ITR end) { + if (std::distance(begin, end) != N) { + LOG_ERROR("Bad range"); + return; + } + std::copy(begin, end, &TBase::m_X[0]); + } - //! Construct from a dense vector. - template - CVectorNx1(const CDenseVectorInitializer &v); + //! Construct from a dense vector. + template + CVectorNx1(const CDenseVectorInitializer& v); - //! Copy construction if the underlying type is implicitly - //! convertible. - template - CVectorNx1(const CVectorNx1 &other) - { - this->operator=(other); - } + //! Copy construction if the underlying type is implicitly + //! convertible. + template + CVectorNx1(const CVectorNx1& other) { + this->operator=(other); + } - //! Assignment if the underlying type is implicitly convertible. - template - const CVectorNx1 &operator=(const CVectorNx1 &other) - { - this->assign(other.base()); - return *this; - } + //! Assignment if the underlying type is implicitly convertible. + template + const CVectorNx1& operator=(const CVectorNx1& other) { + this->assign(other.base()); + return *this; + } - //! \name Persistence - //@{ - //! Create from a delimited string. - bool fromDelimited(const std::string &str) - { - return this->TBase::fromDelimited(str); - } + //! \name Persistence + //@{ + //! Create from a delimited string. + bool fromDelimited(const std::string& str) { return this->TBase::fromDelimited(str); } - //! Convert to a delimited string. - std::string toDelimited() const - { - return this->TBase::toDelimited(); - } - //@} + //! Convert to a delimited string. + std::string toDelimited() const { return this->TBase::toDelimited(); } + //@} - //! Get the dimension. - std::size_t dimension() const { return N; } + //! Get the dimension. + std::size_t dimension() const { return N; } - //! Get the i'th component (no bounds checking). - inline T operator()(std::size_t i) const - { - return TBase::m_X[i]; - } + //! Get the i'th component (no bounds checking). + inline T operator()(std::size_t i) const { return TBase::m_X[i]; } - //! Get the i'th component (no bounds checking). - inline T &operator()(std::size_t i) - { - return TBase::m_X[i]; - } + //! Get the i'th component (no bounds checking). + inline T& operator()(std::size_t i) { return TBase::m_X[i]; } - //! Get an iterator over the elements. - TConstIterator begin() const { return TBase::m_X.begin(); } + //! Get an iterator over the elements. + TConstIterator begin() const { return TBase::m_X.begin(); } - //! Get an iterator to the end of the elements. - TConstIterator end() const { return TBase::m_X.end(); } + //! Get an iterator to the end of the elements. + TConstIterator end() const { return TBase::m_X.end(); } - //! Component-wise negation. - CVectorNx1 operator-() const - { - CVectorNx1 result(*this); - result.negative(); - return result; - } + //! Component-wise negation. + CVectorNx1 operator-() const { + CVectorNx1 result(*this); + result.negative(); + return result; + } - //! Vector subtraction. - const CVectorNx1 &operator-=(const CVectorNx1 &lhs) - { - this->minusEquals(lhs.base()); - return *this; - } + //! Vector subtraction. + const CVectorNx1& operator-=(const CVectorNx1& lhs) { + this->minusEquals(lhs.base()); + return *this; + } - //! Vector addition. - const CVectorNx1 &operator+=(const CVectorNx1 &lhs) - { - this->plusEquals(lhs.base()); - return *this; - } + //! Vector addition. + const CVectorNx1& operator+=(const CVectorNx1& lhs) { + this->plusEquals(lhs.base()); + return *this; + } - //! Component-wise multiplication. - const CVectorNx1 &operator*=(const CVectorNx1 &scale) - { - this->multiplyEquals(scale.base()); - return *this; - } + //! Component-wise multiplication. + const CVectorNx1& operator*=(const CVectorNx1& scale) { + this->multiplyEquals(scale.base()); + return *this; + } - //! Scalar multiplication. - const CVectorNx1 &operator*=(T scale) - { - this->multiplyEquals(scale); - return *this; - } + //! Scalar multiplication. + const CVectorNx1& operator*=(T scale) { + this->multiplyEquals(scale); + return *this; + } - //! Component-wise division. - const CVectorNx1 &operator/=(const CVectorNx1 &scale) - { - this->divideEquals(scale.base()); - return *this; - } + //! Component-wise division. + const CVectorNx1& operator/=(const CVectorNx1& scale) { + this->divideEquals(scale.base()); + return *this; + } - //! Scalar division. - const CVectorNx1 &operator/=(T scale) - { - this->divideEquals(scale); - return *this; - } + //! Scalar division. + const CVectorNx1& operator/=(T scale) { + this->divideEquals(scale); + return *this; + } - //! Check if two vectors are identically equal. - bool operator==(const CVectorNx1 &other) const - { - return this->equal(other.base()); - } + //! Check if two vectors are identically equal. + bool operator==(const CVectorNx1& other) const { return this->equal(other.base()); } - //! Lexicographical total ordering. - bool operator<(const CVectorNx1 &rhs) const - { - return this->less(rhs.base()); - } + //! Lexicographical total ordering. + bool operator<(const CVectorNx1& rhs) const { return this->less(rhs.base()); } - //! Check if this is zero. - bool isZero() const - { - return this->TBase::isZero(); - } + //! Check if this is zero. + bool isZero() const { return this->TBase::isZero(); } - //! Inner product. - double inner(const CVectorNx1 &covector) const - { - return this->TBase::inner(covector.base()); - } + //! Inner product. + double inner(const CVectorNx1& covector) const { return this->TBase::inner(covector.base()); } - //! Inner product. - template - double inner(const VECTOR &covector) const - { - return this->TBase::template inner(covector); - } + //! Inner product. + template + double inner(const VECTOR& covector) const { + return this->TBase::template inner(covector); + } - //! Outer product. - //! - //! \note The copy should be avoided by RVO. - CSymmetricMatrixNxN outer() const - { - return CSymmetricMatrixNxN(E_OuterProduct, *this); - } + //! Outer product. + //! + //! \note The copy should be avoided by RVO. + CSymmetricMatrixNxN outer() const { return CSymmetricMatrixNxN(E_OuterProduct, *this); } - //! A diagonal matrix. - //! - //! \note The copy should be avoided by RVO. - CSymmetricMatrixNxN diagonal() const - { - return CSymmetricMatrixNxN(E_Diagonal, *this); - } + //! A diagonal matrix. + //! + //! \note The copy should be avoided by RVO. + CSymmetricMatrixNxN diagonal() const { return CSymmetricMatrixNxN(E_Diagonal, *this); } - //! L1 norm. - double L1() const - { - return this->TBase::L1(); - } + //! L1 norm. + double L1() const { return this->TBase::L1(); } - //! Euclidean norm. - double euclidean() const - { - return std::sqrt(this->inner(*this)); - } + //! Euclidean norm. + double euclidean() const { return std::sqrt(this->inner(*this)); } - //! Convert to a vector on a different underlying type. - template - inline CVectorNx1 to() const - { - return CVectorNx1(*this); - } + //! Convert to a vector on a different underlying type. + template + inline CVectorNx1 to() const { + return CVectorNx1(*this); + } - //! Convert to a vector. - template - inline VECTOR toVector() const - { - return VECTOR(this->begin(), this->end()); - } + //! Convert to a vector. + template + inline VECTOR toVector() const { + return VECTOR(this->begin(), this->end()); + } - //! Convert to a boost array. - inline TBoostArray toBoostArray() const - { - return TBase::m_X; - } + //! Convert to a boost array. + inline TBoostArray toBoostArray() const { return TBase::m_X; } - //! Convert to the specified vector representation. - //! - //! \note The copy should be avoided by RVO. - template - inline VECTOR toType() const - { - VECTOR result(N); - return this->TBase::toType(result); - } + //! Convert to the specified vector representation. + //! + //! \note The copy should be avoided by RVO. + template + inline VECTOR toType() const { + VECTOR result(N); + return this->TBase::toType(result); + } - //! Get a checksum of this vector's components. - uint64_t checksum() const - { - return this->TBase::checksum(); - } + //! Get a checksum of this vector's components. + uint64_t checksum() const { return this->TBase::checksum(); } - //! Get the smallest possible vector. - static const CVectorNx1 &smallest() - { - static const CVectorNx1 result(boost::numeric::bounds::lowest()); - return result; - } + //! Get the smallest possible vector. + static const CVectorNx1& smallest() { + static const CVectorNx1 result(boost::numeric::bounds::lowest()); + return result; + } - //! Get the largest possible vector. - static const CVectorNx1 &largest() - { - static const CVectorNx1 result(boost::numeric::bounds::highest()); - return result; - } + //! Get the largest possible vector. + static const CVectorNx1& largest() { + static const CVectorNx1 result(boost::numeric::bounds::highest()); + return result; + } }; //! Construct from the outer product of a vector with itself. template -CSymmetricMatrixNxN::CSymmetricMatrixNxN(ESymmetricMatrixType type, - const CVectorNx1 &x) -{ - switch (type) - { +CSymmetricMatrixNxN::CSymmetricMatrixNxN(ESymmetricMatrixType type, const CVectorNx1& x) { + switch (type) { case E_OuterProduct: - for (std::size_t i = 0u, i_ = 0u; i < N; ++i) - { - for (std::size_t j = 0u; j <= i; ++j, ++i_) - { + for (std::size_t i = 0u, i_ = 0u; i < N; ++i) { + for (std::size_t j = 0u; j <= i; ++j, ++i_) { TBase::m_LowerTriangle[i_] = x(i) * x(j); } } break; case E_Diagonal: - for (std::size_t i = 0u, i_ = 0u; i < N; ++i) - { - for (std::size_t j = 0u; j <= i; ++j, ++i_) - { + for (std::size_t i = 0u, i_ = 0u; i < N; ++i) { + for (std::size_t j = 0u; j <= i; ++j, ++i_) { TBase::m_LowerTriangle[i_] = i == j ? x(i) : T(0); } } @@ -1401,15 +1131,10 @@ CSymmetricMatrixNxN::CSymmetricMatrixNxN(ESymmetricMatrixType type, //! \brief Gets a zero vector with specified dimension. template -struct SZero> -{ - static CVectorNx1 get(std::size_t /*dimension*/) - { - return CVectorNx1(T(0)); - } +struct SZero> { + static CVectorNx1 get(std::size_t /*dimension*/) { return CVectorNx1(T(0)); } }; - // ************************ HEAP VECTOR ************************ //! \brief A heap based lightweight dense vector class. @@ -1433,330 +1158,251 @@ struct SZero> //! //! \tparam T The floating point type. template -class CVector : private boost::equality_comparable< CVector, - boost::partially_ordered< CVector, - boost::addable< CVector, - boost::subtractable< CVector, - boost::multipliable< CVector, - boost::multipliable2< CVector, T, - boost::dividable< CVector, - boost::dividable2< CVector, T > > > > > > > >, - private linear_algebra_detail::SVector > -{ - private: - using TBase = linear_algebra_detail::SVector >; - template friend class CVector; - - public: - using TArray = std::vector; - using TConstIterator = typename TArray::const_iterator; - - public: - //! Set to multiple of ones vector. - explicit CVector(std::size_t d = 0u, T v = T(0)) - { - if (d > 0) - { - TBase::m_X.resize(d, v); - } +class CVector + : private boost::equality_comparable< + CVector, + boost::partially_ordered< + CVector, + boost::addable< + CVector, + boost::subtractable< + CVector, + boost::multipliable< + CVector, + boost::multipliable2, T, boost::dividable, boost::dividable2, T>>>>>>>>, + private linear_algebra_detail::SVector> { +private: + using TBase = linear_algebra_detail::SVector>; + template + friend class CVector; + +public: + using TArray = std::vector; + using TConstIterator = typename TArray::const_iterator; + +public: + //! Set to multiple of ones vector. + explicit CVector(std::size_t d = 0u, T v = T(0)) { + if (d > 0) { + TBase::m_X.resize(d, v); } + } - //! Construct from a boost array. - template - explicit CVector(const boost::array &a) - { - for (std::size_t i = 0u; i < N; ++i) - { - TBase::m_X[i] = a[i]; - } + //! Construct from a boost array. + template + explicit CVector(const boost::array& a) { + for (std::size_t i = 0u; i < N; ++i) { + TBase::m_X[i] = a[i]; } + } - //! Construct from a vector. - explicit CVector(const TArray &v) - { - TBase::m_X = v; - } + //! Construct from a vector. + explicit CVector(const TArray& v) { TBase::m_X = v; } - //! Construct from a vector. - explicit CVector(const core::CSmallVectorBase &v) - { - TBase::m_X.assign(v.begin(), v.end()); - } + //! Construct from a vector. + explicit CVector(const core::CSmallVectorBase& v) { TBase::m_X.assign(v.begin(), v.end()); } - //! Construct from the range [\p begin, \p end). - template - CVector(ITR begin, ITR end) - { - TBase::m_X.assign(begin, end); - } + //! Construct from the range [\p begin, \p end). + template + CVector(ITR begin, ITR end) { + TBase::m_X.assign(begin, end); + } - //! Construct from a dense vector. - template - CVector(const CDenseVectorInitializer &v); + //! Construct from a dense vector. + template + CVector(const CDenseVectorInitializer& v); - //! Copy construction if the underlying type is implicitly - //! convertible. - template - CVector(const CVector &other) - { - this->operator=(other); - } + //! Copy construction if the underlying type is implicitly + //! convertible. + template + CVector(const CVector& other) { + this->operator=(other); + } - //! Assignment if the underlying type is implicitly convertible. - template - const CVector &operator=(const CVector &other) - { - TBase::m_X.resize(other.dimension()); - this->TBase::assign(other.base()); - return *this; - } + //! Assignment if the underlying type is implicitly convertible. + template + const CVector& operator=(const CVector& other) { + TBase::m_X.resize(other.dimension()); + this->TBase::assign(other.base()); + return *this; + } - //! Efficiently swap the contents of two vectors. - void swap(CVector &other) - { - TBase::m_X.swap(other.TBase::m_X); - } + //! Efficiently swap the contents of two vectors. + void swap(CVector& other) { TBase::m_X.swap(other.TBase::m_X); } - //! Reserve enough memory to hold \p d components. - void reserve(std::size_t d) - { - TBase::m_X.reserve(d); - } + //! Reserve enough memory to hold \p d components. + void reserve(std::size_t d) { TBase::m_X.reserve(d); } - //! Assign the components from the range [\p begin, \p end). - template - void assign(ITR begin, ITR end) - { - TBase::m_X.assign(begin, end); - } + //! Assign the components from the range [\p begin, \p end). + template + void assign(ITR begin, ITR end) { + TBase::m_X.assign(begin, end); + } - //! Extend the vector to dimension \p d adding components - //! initialized to \p v. - void extend(std::size_t d, T v = T(0)) - { - TBase::m_X.resize(this->dimension() + d, v); - } + //! Extend the vector to dimension \p d adding components + //! initialized to \p v. + void extend(std::size_t d, T v = T(0)) { TBase::m_X.resize(this->dimension() + d, v); } - //! Extend the vector adding components initialized to \p v. - template - void extend(ITR begin, ITR end) - { - TBase::m_X.insert(TBase::m_X.end(), begin, end); - } + //! Extend the vector adding components initialized to \p v. + template + void extend(ITR begin, ITR end) { + TBase::m_X.insert(TBase::m_X.end(), begin, end); + } - //! \name Persistence - //@{ - //! Create from a delimited string. - bool fromDelimited(const std::string &str) - { - return this->TBase::fromDelimited(str); - } + //! \name Persistence + //@{ + //! Create from a delimited string. + bool fromDelimited(const std::string& str) { return this->TBase::fromDelimited(str); } - //! Persist state to delimited values. - std::string toDelimited() const - { - return this->TBase::toDelimited(); - } - //@} + //! Persist state to delimited values. + std::string toDelimited() const { return this->TBase::toDelimited(); } + //@} - //! Get the dimension. - std::size_t dimension() const { return TBase::m_X.size(); } + //! Get the dimension. + std::size_t dimension() const { return TBase::m_X.size(); } - //! Get the i'th component (no bounds checking). - inline T operator()(std::size_t i) const - { - return TBase::m_X[i]; - } + //! Get the i'th component (no bounds checking). + inline T operator()(std::size_t i) const { return TBase::m_X[i]; } - //! Get the i'th component (no bounds checking). - inline T &operator()(std::size_t i) - { - return TBase::m_X[i]; - } + //! Get the i'th component (no bounds checking). + inline T& operator()(std::size_t i) { return TBase::m_X[i]; } - //! Get an iterator over the elements. - TConstIterator begin() const { return TBase::m_X.begin(); } + //! Get an iterator over the elements. + TConstIterator begin() const { return TBase::m_X.begin(); } - //! Get an iterator to the end of the elements. - TConstIterator end() const { return TBase::m_X.end(); } + //! Get an iterator to the end of the elements. + TConstIterator end() const { return TBase::m_X.end(); } - //! Component-wise negation. - CVector operator-() const - { - CVector result(*this); - result.negative(); - return result; - } + //! Component-wise negation. + CVector operator-() const { + CVector result(*this); + result.negative(); + return result; + } - //! Vector subtraction. - const CVector &operator-=(const CVector &lhs) - { - this->minusEquals(lhs.base()); - return *this; - } + //! Vector subtraction. + const CVector& operator-=(const CVector& lhs) { + this->minusEquals(lhs.base()); + return *this; + } - //! Vector addition. - const CVector &operator+=(const CVector &lhs) - { - this->plusEquals(lhs.base()); - return *this; - } + //! Vector addition. + const CVector& operator+=(const CVector& lhs) { + this->plusEquals(lhs.base()); + return *this; + } - //! Component-wise multiplication. - const CVector &operator*=(const CVector &scale) - { - this->multiplyEquals(scale.base()); - return *this; - } + //! Component-wise multiplication. + const CVector& operator*=(const CVector& scale) { + this->multiplyEquals(scale.base()); + return *this; + } - //! Scalar multiplication. - const CVector &operator*=(T scale) - { - this->multiplyEquals(scale); - return *this; - } + //! Scalar multiplication. + const CVector& operator*=(T scale) { + this->multiplyEquals(scale); + return *this; + } - //! Component-wise division. - const CVector &operator/=(const CVector &scale) - { - this->divideEquals(scale.base()); - return *this; - } + //! Component-wise division. + const CVector& operator/=(const CVector& scale) { + this->divideEquals(scale.base()); + return *this; + } - //! Scalar division. - const CVector &operator/=(T scale) - { - this->divideEquals(scale); - return *this; - } + //! Scalar division. + const CVector& operator/=(T scale) { + this->divideEquals(scale); + return *this; + } - //! Check if two vectors are identically equal. - bool operator==(const CVector &other) const - { - return this->equal(other.base()); - } + //! Check if two vectors are identically equal. + bool operator==(const CVector& other) const { return this->equal(other.base()); } - //! Lexicographical total ordering. - bool operator<(const CVector &rhs) const - { - return this->less(rhs.base()); - } + //! Lexicographical total ordering. + bool operator<(const CVector& rhs) const { return this->less(rhs.base()); } - //! Check if this is zero. - bool isZero() const - { - return this->TBase::isZero(); - } + //! Check if this is zero. + bool isZero() const { return this->TBase::isZero(); } - //! Inner product. - double inner(const CVector &covector) const - { - return this->TBase::inner(covector.base()); - } + //! Inner product. + double inner(const CVector& covector) const { return this->TBase::inner(covector.base()); } - //! Inner product. - template - double inner(const VECTOR &covector) const - { - return this->TBase::template inner(covector); - } + //! Inner product. + template + double inner(const VECTOR& covector) const { + return this->TBase::template inner(covector); + } - //! Outer product. - //! - //! \note The copy should be avoided by RVO. - CSymmetricMatrix outer() const - { - return CSymmetricMatrix(E_OuterProduct, *this); - } + //! Outer product. + //! + //! \note The copy should be avoided by RVO. + CSymmetricMatrix outer() const { return CSymmetricMatrix(E_OuterProduct, *this); } - //! A diagonal matrix. - //! - //! \note The copy should be avoided by RVO. - CSymmetricMatrix diagonal() const - { - return CSymmetricMatrix(E_Diagonal, *this); - } + //! A diagonal matrix. + //! + //! \note The copy should be avoided by RVO. + CSymmetricMatrix diagonal() const { return CSymmetricMatrix(E_Diagonal, *this); } - //! L1 norm. - double L1() const - { - return this->TBase::L1(); - } + //! L1 norm. + double L1() const { return this->TBase::L1(); } - //! Euclidean norm. - double euclidean() const - { - return std::sqrt(this->inner(*this)); - } + //! Euclidean norm. + double euclidean() const { return std::sqrt(this->inner(*this)); } - //! Convert to a vector on a different underlying type. - template - inline CVector to() const - { - return CVector(*this); - } + //! Convert to a vector on a different underlying type. + template + inline CVector to() const { + return CVector(*this); + } - //! Convert to a vector. - template - inline VECTOR toVector() const - { - return VECTOR(this->begin(), this->end()); - } + //! Convert to a vector. + template + inline VECTOR toVector() const { + return VECTOR(this->begin(), this->end()); + } - //! Convert to the specified vector representation. - //! - //! \note The copy should be avoided by RVO. - template - inline VECTOR toType() const - { - VECTOR result(this->dimension()); - return this->TBase::toType(result); - } + //! Convert to the specified vector representation. + //! + //! \note The copy should be avoided by RVO. + template + inline VECTOR toType() const { + VECTOR result(this->dimension()); + return this->TBase::toType(result); + } - //! Get a checksum of this vector's components. - uint64_t checksum() const - { - return this->TBase::checksum(); - } + //! Get a checksum of this vector's components. + uint64_t checksum() const { return this->TBase::checksum(); } - //! Get the smallest possible vector. - static const CVector &smallest(std::size_t d) - { - static const CVector result(d, boost::numeric::bounds::lowest()); - return result; - } + //! Get the smallest possible vector. + static const CVector& smallest(std::size_t d) { + static const CVector result(d, boost::numeric::bounds::lowest()); + return result; + } - //! Get the largest possible vector. - static const CVector &largest(std::size_t d) - { - static const CVector result(d, boost::numeric::bounds::highest()); - return result; - } + //! Get the largest possible vector. + static const CVector& largest(std::size_t d) { + static const CVector result(d, boost::numeric::bounds::highest()); + return result; + } }; //! Construct from the outer product of a vector with itself. template -CSymmetricMatrix::CSymmetricMatrix(ESymmetricMatrixType type, - const CVector &x) -{ +CSymmetricMatrix::CSymmetricMatrix(ESymmetricMatrixType type, const CVector& x) { m_D = x.dimension(); TBase::m_LowerTriangle.resize(m_D * (m_D + 1) / 2); - switch (type) - { + switch (type) { case E_OuterProduct: - for (std::size_t i = 0u, i_ = 0u; i < x.dimension(); ++i) - { - for (std::size_t j = 0u; j <= i; ++j, ++i_) - { + for (std::size_t i = 0u, i_ = 0u; i < x.dimension(); ++i) { + for (std::size_t j = 0u; j <= i; ++j, ++i_) { TBase::m_LowerTriangle[i_] = x(i) * x(j); } } break; case E_Diagonal: - for (std::size_t i = 0u, i_ = 0u; i < x.dimension(); ++i) - { - for (std::size_t j = 0u; j <= i; ++j, ++i_) - { + for (std::size_t i = 0u, i_ = 0u; i < x.dimension(); ++i) { + for (std::size_t j = 0u; j <= i; ++j, ++i_) { TBase::m_LowerTriangle[i_] = i == j ? x(i) : T(0); } } @@ -1766,28 +1412,21 @@ CSymmetricMatrix::CSymmetricMatrix(ESymmetricMatrixType type, //! \brief Gets a zero vector with specified dimension. template -struct SZero> -{ - static CVector get(std::size_t dimension) - { - return CVector(dimension, T(0)); - } +struct SZero> { + static CVector get(std::size_t dimension) { return CVector(dimension, T(0)); } }; - // ************************ FREE FUNCTIONS ************************ //! Free swap picked up by std:: algorithms etc. template -void swap(CSymmetricMatrix &lhs, CSymmetricMatrix &rhs) -{ +void swap(CSymmetricMatrix& lhs, CSymmetricMatrix& rhs) { lhs.swap(rhs); } //! Free swap picked up by std:: algorithms etc. template -void swap(CVector &lhs, CVector &rhs) -{ +void swap(CVector& lhs, CVector& rhs) { lhs.swap(rhs); } @@ -1799,15 +1438,11 @@ void swap(CVector &lhs, CVector &rhs) //! \param[in] m The matrix. //! \param[in] x The vector. template -CVectorNx1 operator*(const CSymmetricMatrixNxN &m, - const CVectorNx1 &x) -{ +CVectorNx1 operator*(const CSymmetricMatrixNxN& m, const CVectorNx1& x) { CVectorNx1 result; - for (std::size_t i = 0u; i < N; ++i) - { + for (std::size_t i = 0u; i < N; ++i) { double component = 0.0; - for (std::size_t j = 0u; j < N; ++j) - { + for (std::size_t j = 0u; j < N; ++j) { component += m(i, j) * x(j); } result(i) = component; @@ -1823,21 +1458,17 @@ CVectorNx1 operator*(const CSymmetricMatrixNxN &m, //! \param[in] m The matrix. //! \param[in] x The vector. template -CVector operator*(const CSymmetricMatrix &m, const CVector &x) -{ +CVector operator*(const CSymmetricMatrix& m, const CVector& x) { CVector result(x.dimension()); - for (std::size_t i = 0u; i < m.rows(); ++i) - { + for (std::size_t i = 0u; i < m.rows(); ++i) { double component = 0.0; - for (std::size_t j = 0u; j < m.columns(); ++j) - { + for (std::size_t j = 0u; j < m.columns(); ++j) { component += m(i, j) * x(j); } result(i) = component; } return result; } - } } diff --git a/include/maths/CLinearAlgebraEigen.h b/include/maths/CLinearAlgebraEigen.h index d3f857f8fc..483b407c7c 100644 --- a/include/maths/CLinearAlgebraEigen.h +++ b/include/maths/CLinearAlgebraEigen.h @@ -10,8 +10,8 @@ #include #include -#include #include +#include #include #include #include @@ -19,21 +19,21 @@ #include -namespace Eigen -{ -#define LESS_OR_GREATER(l, r) if (l < r) { return true; } else if (r > l) { return false; } +namespace Eigen { +#define LESS_OR_GREATER(l, r) \ + if (l < r) { \ + return true; \ + } else if (r > l) { \ + return false; \ + } //! Less than on Eigen sparse matrix. template -bool operator<(const SparseMatrix &lhs, - const SparseMatrix &rhs) -{ +bool operator<(const SparseMatrix& lhs, const SparseMatrix& rhs) { LESS_OR_GREATER(lhs.rows(), rhs.rows()) LESS_OR_GREATER(lhs.cols(), rhs.cols()) - for (STORAGE_INDEX i = 0; i < lhs.rows(); ++i) - { - for (STORAGE_INDEX j = 0; j < lhs.cols(); ++j) - { + for (STORAGE_INDEX i = 0; i < lhs.rows(); ++i) { + for (STORAGE_INDEX j = 0; j < lhs.cols(); ++j) { LESS_OR_GREATER(lhs.coeff(i, j), rhs.coeff(i, j)) } } @@ -42,12 +42,9 @@ bool operator<(const SparseMatrix &lhs, //! Less than on Eigen sparse vector. template -bool operator<(const SparseVector &lhs, - const SparseVector &rhs) -{ +bool operator<(const SparseVector& lhs, const SparseVector& rhs) { LESS_OR_GREATER(lhs.size(), rhs.size()) - for (STORAGE_INDEX i = 0; i < lhs.size(); ++i) - { + for (STORAGE_INDEX i = 0; i < lhs.size(); ++i) { LESS_OR_GREATER(lhs.coeff(i), rhs(i)) } return false; @@ -55,16 +52,13 @@ bool operator<(const SparseVector &lhs, //! Less than on Eigen dense matrix. template -bool operator<(const Matrix &lhs, - const Matrix &rhs) -{ +bool operator<(const Matrix& lhs, + const Matrix& rhs) { using TIndex = typename Matrix::Index; LESS_OR_GREATER(lhs.rows(), rhs.rows()) LESS_OR_GREATER(lhs.cols(), rhs.cols()) - for (TIndex i = 0; i < lhs.rows(); ++i) - { - for (TIndex j = 0; j < lhs.cols(); ++j) - { + for (TIndex i = 0; i < lhs.rows(); ++i) { + for (TIndex j = 0; j < lhs.cols(); ++j) { LESS_OR_GREATER(lhs.coeff(i, j), rhs.coeff(i, j)) } } @@ -73,27 +67,21 @@ bool operator<(const Matrix &lh //! Free swap picked up by std:: algorithms etc. template -void swap(SparseVector &lhs, - SparseVector &rhs) -{ +void swap(SparseVector& lhs, SparseVector& rhs) { lhs.swap(rhs); } //! Free swap picked up by std:: algorithms etc. template -void swap(Matrix &lhs, - Matrix &rhs) -{ +void swap(Matrix& lhs, Matrix& rhs) { lhs.swap(rhs); } #undef LESS_OR_GREATER } -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { //! Rename to follow our conventions and add to ml::maths. template @@ -101,12 +89,8 @@ using CSparseMatrix = Eigen::SparseMatrix; //! \brief Gets a zero sparse matrix with specified dimensions. template -struct SZero> -{ - static CSparseMatrix get(std::ptrdiff_t rows, std::ptrdiff_t cols) - { - return CSparseMatrix(rows, cols); - } +struct SZero> { + static CSparseMatrix get(std::ptrdiff_t rows, std::ptrdiff_t cols) { return CSparseMatrix(rows, cols); } }; //! The type of an element of a sparse matrix in coordinate form. @@ -119,12 +103,8 @@ using CSparseVector = Eigen::SparseVector; //! \brief Gets a zero sparse vector with specified dimension. template -struct SZero> -{ - static CSparseVector get(std::ptrdiff_t dimension) - { - return CSparseVector(dimension); - } +struct SZero> { + static CSparseVector get(std::ptrdiff_t dimension) { return CSparseVector(dimension); } }; //! The type of an element of a sparse vector in coordinate form. @@ -133,76 +113,55 @@ using CSparseVectorCoordinate = Eigen::Triplet; //! Create a tuple with which to initialize a sparse matrix. template -inline CSparseMatrixElement matrixElement(std::ptrdiff_t row, std::ptrdiff_t column, SCALAR value) -{ +inline CSparseMatrixElement matrixElement(std::ptrdiff_t row, std::ptrdiff_t column, SCALAR value) { return CSparseMatrixElement(row, column, value); } //! Create a tuple with which to initialize a sparse column vector. template -inline CSparseVectorCoordinate vectorCoordinate(std::ptrdiff_t row, SCALAR value) -{ +inline CSparseVectorCoordinate vectorCoordinate(std::ptrdiff_t row, SCALAR value) { return CSparseVectorCoordinate(row, 0, value); } //! \brief Adapts Eigen::SparseVector::InnerIterator for use with STL. template -class CSparseVectorIndexIterator : public std::iterator -{ - CSparseVectorIndexIterator(const CSparseVector &vector, - std::size_t index) : - m_Vector(&vector), m_Base(vector, index) - {} - - bool operator==(const CSparseVectorIndexIterator &rhs) const - { - return m_Vector == rhs.m_Vector - && m_Base.row() == rhs.m_Base.row() - && m_Base.col() == rhs.m_Base.col(); - } - bool operator!=(const CSparseVectorIndexIterator &rhs) const - { - return !(*this == rhs); - } +class CSparseVectorIndexIterator : public std::iterator { + CSparseVectorIndexIterator(const CSparseVector& vector, std::size_t index) : m_Vector(&vector), m_Base(vector, index) {} - std::ptrdiff_t operator*() const - { - return std::max(m_Base.row(), m_Base.col()); - } + bool operator==(const CSparseVectorIndexIterator& rhs) const { + return m_Vector == rhs.m_Vector && m_Base.row() == rhs.m_Base.row() && m_Base.col() == rhs.m_Base.col(); + } + bool operator!=(const CSparseVectorIndexIterator& rhs) const { return !(*this == rhs); } - CSparseVectorIndexIterator &operator++() - { - ++m_Base; - return *this; - } - CSparseVectorIndexIterator operator++(int) - { - CSparseVectorIndexIterator result(*this); - ++m_Base; - return result; - } + std::ptrdiff_t operator*() const { return std::max(m_Base.row(), m_Base.col()); } - private: - using TIterator = typename CSparseVector::InnerIterator; + CSparseVectorIndexIterator& operator++() { + ++m_Base; + return *this; + } + CSparseVectorIndexIterator operator++(int) { + CSparseVectorIndexIterator result(*this); + ++m_Base; + return result; + } + +private: + using TIterator = typename CSparseVector::InnerIterator; - private: - CSparseVector *m_Vector; - TIterator m_Base; +private: + CSparseVector* m_Vector; + TIterator m_Base; }; //! Get an iterator over the indices of \p vector. template -CSparseVectorIndexIterator -beginIndices(const CSparseVector &vector) -{ +CSparseVectorIndexIterator beginIndices(const CSparseVector& vector) { return CSparseVectorIndexIterator(vector, 0); } //! Get the end iterator of the indices of \p vector. template -CSparseVectorIndexIterator -endIndices(const CSparseVector &vector) -{ +CSparseVectorIndexIterator endIndices(const CSparseVector& vector) { return CSparseVectorIndexIterator(vector, vector.data().size()); } @@ -212,12 +171,8 @@ using CDenseMatrix = Eigen::Matrix; //! \brief Gets a zero dense vector with specified dimension. template -struct SZero> -{ - static CDenseMatrix get(std::ptrdiff_t rows, std::ptrdiff_t cols) - { - return CDenseMatrix::Zero(rows, cols); - } +struct SZero> { + static CDenseMatrix get(std::ptrdiff_t rows, std::ptrdiff_t cols) { return CDenseMatrix::Zero(rows, cols); } }; //! Rename to follow our conventions and add to ml::maths. @@ -226,12 +181,8 @@ using CDenseVector = Eigen::Matrix; //! \brief Gets a zero dense vector with specified dimension. template -struct SZero> -{ - static CDenseVector get(std::ptrdiff_t dimension) - { - return CDenseVector::Zero(dimension); - } +struct SZero> { + static CDenseVector get(std::ptrdiff_t dimension) { return CDenseVector::Zero(dimension); } }; //! \brief Eigen matrix typedef. @@ -243,40 +194,34 @@ struct SZero> //! we instantiate different versions. Also, Eigen matrices are always //! used for calculation for which we want to use double precision. template -struct SDenseMatrix -{ +struct SDenseMatrix { using Type = CDenseMatrix; }; //! \brief Use stack matrix for size 2. template -struct SDenseMatrix> -{ +struct SDenseMatrix> { using Type = Eigen::Matrix; }; //! \brief Use stack matrix for size 3. template -struct SDenseMatrix> -{ +struct SDenseMatrix> { using Type = Eigen::Matrix; }; //! \brief Use stack matrix for size 4. template -struct SDenseMatrix> -{ +struct SDenseMatrix> { using Type = Eigen::Matrix; }; //! Get the Eigen matrix for \p matrix. template -typename SDenseMatrix::Type toDenseMatrix(const MATRIX &matrix) -{ +typename SDenseMatrix::Type toDenseMatrix(const MATRIX& matrix) { return matrix.template toType::Type>(); } //! Get the dynamic Eigen matrix for \p matrix. template -CDenseMatrix toDynamicDenseMatrix(const MATRIX &matrix) -{ +CDenseMatrix toDynamicDenseMatrix(const MATRIX& matrix) { return matrix.template toType>(); } @@ -285,40 +230,34 @@ CDenseMatrix toDynamicDenseMatrix(const MATRIX &matrix) //! DESCRIPTION:\n //! See SDenseMatrix. template -struct SDenseVector -{ +struct SDenseVector { using Type = CDenseVector; }; //! \brief Use stack vector for size 2. template -struct SDenseVector> -{ +struct SDenseVector> { using Type = Eigen::Matrix; }; //! \brief Use stack vector for size 3. template -struct SDenseVector> -{ +struct SDenseVector> { using Type = Eigen::Matrix; }; //! \brief Use stack vector for size 4. template -struct SDenseVector> -{ +struct SDenseVector> { using Type = Eigen::Matrix; }; //! Get the Eigen vector for \p vector. template -typename SDenseMatrix::Type toDenseVector(const VECTOR &vector) -{ +typename SDenseMatrix::Type toDenseVector(const VECTOR& vector) { return vector.template toType::Type>(); } //! Get the dynamic Eigen vector for \p vector. template -CDenseVector toDynamicDenseVector(const VECTOR &vector) -{ +CDenseVector toDynamicDenseVector(const VECTOR& vector) { return vector.template toType>(); } @@ -332,30 +271,22 @@ CDenseVector toDynamicDenseVector(const VECTOR &vector) //! fromDenseMatrix function plays this role in code where we want a //! conversion. template -class CDenseMatrixInitializer -{ - public: - explicit CDenseMatrixInitializer(const MATRIX &type) : m_Type(&type) {} - - std::size_t rows() const - { - return m_Type->rows(); - } +class CDenseMatrixInitializer { +public: + explicit CDenseMatrixInitializer(const MATRIX& type) : m_Type(&type) {} - double get(std::size_t i, std::size_t j) const - { - return (m_Type->template selfadjointView())(i, j); - } + std::size_t rows() const { return m_Type->rows(); } - private: - const MATRIX *m_Type; + double get(std::size_t i, std::size_t j) const { return (m_Type->template selfadjointView())(i, j); } + +private: + const MATRIX* m_Type; }; //! Convert an Eigen matrix to a form which can initialize one of our //! symmetric matrix objects. template -CDenseMatrixInitializer fromDenseMatrix(const MATRIX &type) -{ +CDenseMatrixInitializer fromDenseMatrix(const MATRIX& type) { return CDenseMatrixInitializer(type); } @@ -368,41 +299,30 @@ CDenseMatrixInitializer fromDenseMatrix(const MATRIX &type) //! flag explicitly when a conversion is taking place, the fromDenseVector //! function plays this role in code where we want a conversion. template -class CDenseVectorInitializer -{ - public: - explicit CDenseVectorInitializer(const VECTOR &type) : m_Type(&type) {} - - std::size_t dimension() const - { - return m_Type->size(); - } +class CDenseVectorInitializer { +public: + explicit CDenseVectorInitializer(const VECTOR& type) : m_Type(&type) {} - double get(std::size_t i) const - { - return (*m_Type)(i); - } + std::size_t dimension() const { return m_Type->size(); } - private: - const VECTOR *m_Type; + double get(std::size_t i) const { return (*m_Type)(i); } + +private: + const VECTOR* m_Type; }; //! Convert an Eigen vector to a form which can initialize one of our //! vector objects. template -CDenseVectorInitializer fromDenseVector(const VECTOR &type) -{ +CDenseVectorInitializer fromDenseVector(const VECTOR& type) { return CDenseVectorInitializer(type); } template template -CSymmetricMatrixNxN::CSymmetricMatrixNxN(const CDenseMatrixInitializer &m) -{ - for (std::size_t i = 0u, i_ = 0u; i < N; ++i) - { - for (std::size_t j = 0u; j <= i; ++j, ++i_) - { +CSymmetricMatrixNxN::CSymmetricMatrixNxN(const CDenseMatrixInitializer& m) { + for (std::size_t i = 0u, i_ = 0u; i < N; ++i) { + for (std::size_t j = 0u; j <= i; ++j, ++i_) { TBase::m_LowerTriangle[i_] = m.get(i, j); } } @@ -410,14 +330,11 @@ CSymmetricMatrixNxN::CSymmetricMatrixNxN(const CDenseMatrixInitializer template -CSymmetricMatrix::CSymmetricMatrix(const CDenseMatrixInitializer &m) -{ +CSymmetricMatrix::CSymmetricMatrix(const CDenseMatrixInitializer& m) { m_D = m.rows(); TBase::m_LowerTriangle.resize(m_D * (m_D + 1) / 2); - for (std::size_t i = 0u, i_ = 0u; i < m_D; ++i) - { - for (std::size_t j = 0u; j <= i; ++j, ++i_) - { + for (std::size_t i = 0u, i_ = 0u; i < m_D; ++i) { + for (std::size_t j = 0u; j <= i; ++j, ++i_) { TBase::m_LowerTriangle[i_] = m.get(i, j); } } @@ -425,25 +342,20 @@ CSymmetricMatrix::CSymmetricMatrix(const CDenseMatrixInitializer &m) template template -CVectorNx1::CVectorNx1(const CDenseVectorInitializer &v) -{ - for (std::size_t i = 0u; i < N; ++i) - { +CVectorNx1::CVectorNx1(const CDenseVectorInitializer& v) { + for (std::size_t i = 0u; i < N; ++i) { TBase::m_X[i] = v.get(i); } } template template -CVector::CVector(const CDenseVectorInitializer &v) -{ +CVector::CVector(const CDenseVectorInitializer& v) { TBase::m_X.resize(v.dimension()); - for (std::size_t i = 0u; i < TBase::m_X.size(); ++i) - { + for (std::size_t i = 0u; i < TBase::m_X.size(); ++i) { TBase::m_X[i] = v.get(i); } } - } } diff --git a/include/maths/CLinearAlgebraFwd.h b/include/maths/CLinearAlgebraFwd.h index 2a73eea918..b7636ba727 100644 --- a/include/maths/CLinearAlgebraFwd.h +++ b/include/maths/CLinearAlgebraFwd.h @@ -15,30 +15,25 @@ // Unfortunately, Eigen headers seem to be super fragile to // include directly so we just forward declare here ourselves. -namespace Eigen -{ -template class SparseMatrix; -template class SparseVector; -template class Matrix; +namespace Eigen { +template +class SparseMatrix; +template +class SparseVector; +template +class Matrix; } -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { //! Types of symmetric matrices constructed with a vector. -enum ESymmetricMatrixType -{ - E_OuterProduct, - E_Diagonal -}; +enum ESymmetricMatrixType { E_OuterProduct, E_Diagonal }; //! \brief Common types used by the vector and matrix classes. -class MATHS_EXPORT CLinearAlgebra -{ - public: - static const char DELIMITER = ','; +class MATHS_EXPORT CLinearAlgebra { +public: + static const char DELIMITER = ','; }; //! \brief Base class to get a zero of type \p TYPE. @@ -46,16 +41,23 @@ class MATHS_EXPORT CLinearAlgebra //! Each of our vector and matrix types provides a specialization //! of this class and define a static get method which takes the //! dimension(s). -template struct SZero {}; - -template class CVectorNx1; -template class CSymmetricMatrixNxN; -template class CVector; -template class CSymmetricMatrix; -template class CAnnotatedVector; -template class CDenseMatrixInitializer; -template class CDenseVectorInitializer; +template +struct SZero {}; +template +class CVectorNx1; +template +class CSymmetricMatrixNxN; +template +class CVector; +template +class CSymmetricMatrix; +template +class CAnnotatedVector; +template +class CDenseMatrixInitializer; +template +class CDenseVectorInitializer; } } diff --git a/include/maths/CLinearAlgebraPersist.h b/include/maths/CLinearAlgebraPersist.h index 2c3c0974cd..b9e59da1d5 100644 --- a/include/maths/CLinearAlgebraPersist.h +++ b/include/maths/CLinearAlgebraPersist.h @@ -12,73 +12,48 @@ #include #include -namespace ml -{ -namespace maths -{ -namespace linear_algebra_detail -{ +namespace ml { +namespace maths { +namespace linear_algebra_detail { //! \brief Extracts a vector component / matrix element from a string. -struct SFromString -{ +struct SFromString { template - bool operator()(const std::string &token, T &value) const - { + bool operator()(const std::string& token, T& value) const { return core::CStringUtils::stringToType(token, value); } - bool operator()(const std::string &token, CFloatStorage &value) const - { - return value.fromString(token); - } + bool operator()(const std::string& token, CFloatStorage& value) const { return value.fromString(token); } }; //! \brief Converts a vector component / matrix element to a string. -struct SToString -{ +struct SToString { template - std::string operator()(const T &value) const - { + std::string operator()(const T& value) const { return core::CStringUtils::typeToString(value); } - std::string operator()(double value) const - { - return core::CStringUtils::typeToStringPrecise(value, core::CIEEE754::E_DoublePrecision); - } - std::string operator()(CFloatStorage value) const - { - return value.toString(); - } + std::string operator()(double value) const { return core::CStringUtils::typeToStringPrecise(value, core::CIEEE754::E_DoublePrecision); } + std::string operator()(CFloatStorage value) const { return value.toString(); } }; template -bool SSymmetricMatrix::fromDelimited(const std::string &str) -{ - return core::CPersistUtils::fromString(str, SFromString(), - m_LowerTriangle, - CLinearAlgebra::DELIMITER); +bool SSymmetricMatrix::fromDelimited(const std::string& str) { + return core::CPersistUtils::fromString(str, SFromString(), m_LowerTriangle, CLinearAlgebra::DELIMITER); } template -std::string SSymmetricMatrix::toDelimited() const -{ - return core::CPersistUtils::toString(m_LowerTriangle, - SToString(), - CLinearAlgebra::DELIMITER); +std::string SSymmetricMatrix::toDelimited() const { + return core::CPersistUtils::toString(m_LowerTriangle, SToString(), CLinearAlgebra::DELIMITER); } template -bool SVector::fromDelimited(const std::string &str) -{ +bool SVector::fromDelimited(const std::string& str) { return core::CPersistUtils::fromString(str, SFromString(), m_X, CLinearAlgebra::DELIMITER); } template -std::string SVector::toDelimited() const -{ +std::string SVector::toDelimited() const { return core::CPersistUtils::toString(m_X, SToString(), CLinearAlgebra::DELIMITER); } - } } } diff --git a/include/maths/CLinearAlgebraTools.h b/include/maths/CLinearAlgebraTools.h index b101561d51..1b23ed1939 100644 --- a/include/maths/CLinearAlgebraTools.h +++ b/include/maths/CLinearAlgebraTools.h @@ -20,12 +20,9 @@ #include #include -namespace ml -{ -namespace maths -{ -namespace linear_algebra_tools_detail -{ +namespace ml { +namespace maths { +namespace linear_algebra_tools_detail { struct VectorTag; struct MatrixTag; @@ -36,88 +33,70 @@ struct MatrixScalarTag; struct ScalarVectorTag; struct ScalarMatrixTag; -template struct SSqrt {}; +template +struct SSqrt {}; //! Component-wise sqrt for a vector. template<> -struct SSqrt -{ +struct SSqrt { template - static void calculate(std::size_t d, VECTOR &result) - { - for (std::size_t i = 0u; i < d; ++i) - { + static void calculate(std::size_t d, VECTOR& result) { + for (std::size_t i = 0u; i < d; ++i) { result(i) = std::sqrt(result(i)); } } }; //! Element-wise sqrt for a symmetric matrix. template<> -struct SSqrt -{ +struct SSqrt { template - static void calculate(std::size_t d, MATRIX &result) - { - for (std::size_t i = 0u; i < d; ++i) - { - for (std::size_t j = 0u; j <= i; ++j) - { + static void calculate(std::size_t d, MATRIX& result) { + for (std::size_t i = 0u; i < d; ++i) { + for (std::size_t j = 0u; j <= i; ++j) { result(i, j) = std::sqrt(result(i, j)); } } } }; -template struct SMin {}; +template +struct SMin {}; //! Component-wise minimum for a vector. template<> -struct SMin -{ +struct SMin { template - static void calculate(std::size_t d, const VECTOR &lhs, VECTOR &rhs) - { - for (std::size_t i = 0u; i < d; ++i) - { + static void calculate(std::size_t d, const VECTOR& lhs, VECTOR& rhs) { + for (std::size_t i = 0u; i < d; ++i) { rhs(i) = std::min(lhs(i), rhs(i)); } } }; //! Component-wise minimum for a vector. template<> -struct SMin -{ +struct SMin { template - static void calculate(std::size_t d, VECTOR &lhs, const T &rhs) - { - for (std::size_t i = 0u; i < d; ++i) - { + static void calculate(std::size_t d, VECTOR& lhs, const T& rhs) { + for (std::size_t i = 0u; i < d; ++i) { lhs(i) = std::min(lhs(i), rhs); } } }; //! Component-wise minimum for a vector. template<> -struct SMin -{ +struct SMin { template - static void calculate(std::size_t d, const T &lhs, VECTOR &rhs) - { - for (std::size_t i = 0u; i < d; ++i) - { + static void calculate(std::size_t d, const T& lhs, VECTOR& rhs) { + for (std::size_t i = 0u; i < d; ++i) { rhs(i) = std::min(rhs(i), lhs); } } }; //! Element-wise minimum for a symmetric matrix. template<> -struct SMin -{ +struct SMin { template - static void calculate(std::size_t d, const MATRIX &lhs, MATRIX &rhs) - { - for (std::size_t i = 0u; i < d; ++i) - { - for (std::size_t j = 0u; j <= i; ++j) - { + static void calculate(std::size_t d, const MATRIX& lhs, MATRIX& rhs) { + for (std::size_t i = 0u; i < d; ++i) { + for (std::size_t j = 0u; j <= i; ++j) { rhs(i, j) = std::min(lhs(i, j), rhs(i, j)); } } @@ -125,15 +104,11 @@ struct SMin }; //! Element-wise minimum for a symmetric matrix. template<> -struct SMin -{ +struct SMin { template - static void calculate(std::size_t d, MATRIX &lhs, const T &rhs) - { - for (std::size_t i = 0u; i < d; ++i) - { - for (std::size_t j = 0u; j <= i; ++j) - { + static void calculate(std::size_t d, MATRIX& lhs, const T& rhs) { + for (std::size_t i = 0u; i < d; ++i) { + for (std::size_t j = 0u; j <= i; ++j) { lhs(i, j) = std::min(lhs(i, j), rhs); } } @@ -141,72 +116,56 @@ struct SMin }; //! Element-wise minimum for a symmetric matrix. template<> -struct SMin -{ +struct SMin { template - static void calculate(std::size_t d, const T &lhs, MATRIX &rhs) - { - for (std::size_t i = 0u; i < d; ++i) - { - for (std::size_t j = 0u; j <= i; ++j) - { + static void calculate(std::size_t d, const T& lhs, MATRIX& rhs) { + for (std::size_t i = 0u; i < d; ++i) { + for (std::size_t j = 0u; j <= i; ++j) { rhs(i, j) = std::min(lhs, rhs(i, j)); } } } }; -template struct SMax {}; +template +struct SMax {}; //! Component-wise maximum for a vector. template<> -struct SMax -{ +struct SMax { template - static void calculate(std::size_t d, const VECTOR &lhs, VECTOR &rhs) - { - for (std::size_t i = 0u; i < d; ++i) - { + static void calculate(std::size_t d, const VECTOR& lhs, VECTOR& rhs) { + for (std::size_t i = 0u; i < d; ++i) { rhs(i) = std::max(lhs(i), rhs(i)); } } }; //! Component-wise maximum for a vector. template<> -struct SMax -{ +struct SMax { template - static void calculate(std::size_t d, VECTOR &lhs, const T &rhs) - { - for (std::size_t i = 0u; i < d; ++i) - { + static void calculate(std::size_t d, VECTOR& lhs, const T& rhs) { + for (std::size_t i = 0u; i < d; ++i) { lhs(i) = std::max(lhs(i), rhs); } } }; //! Component-wise maximum for a vector. template<> -struct SMax -{ +struct SMax { template - static void calculate(std::size_t d, const T &lhs, VECTOR &rhs) - { - for (std::size_t i = 0u; i < d; ++i) - { + static void calculate(std::size_t d, const T& lhs, VECTOR& rhs) { + for (std::size_t i = 0u; i < d; ++i) { rhs(i) = std::max(rhs(i), lhs); } } }; //! Element-wise maximum for a symmetric matrix. template<> -struct SMax -{ +struct SMax { template - static void calculate(std::size_t d, const MATRIX &lhs, MATRIX &rhs) - { - for (std::size_t i = 0u; i < d; ++i) - { - for (std::size_t j = 0u; j <= i; ++j) - { + static void calculate(std::size_t d, const MATRIX& lhs, MATRIX& rhs) { + for (std::size_t i = 0u; i < d; ++i) { + for (std::size_t j = 0u; j <= i; ++j) { rhs(i, j) = std::max(lhs(i, j), rhs(i, j)); } } @@ -214,15 +173,11 @@ struct SMax }; //! Element-wise maximum for a symmetric matrix. template<> -struct SMax -{ +struct SMax { template - static void calculate(std::size_t d, MATRIX &lhs, const T &rhs) - { - for (std::size_t i = 0u; i < d; ++i) - { - for (std::size_t j = 0u; j <= i; ++j) - { + static void calculate(std::size_t d, MATRIX& lhs, const T& rhs) { + for (std::size_t i = 0u; i < d; ++i) { + for (std::size_t j = 0u; j <= i; ++j) { lhs(i, j) = std::max(lhs(i, j), rhs); } } @@ -230,59 +185,49 @@ struct SMax }; //! Element-wise maximum for a symmetric matrix. template<> -struct SMax -{ +struct SMax { template - static void calculate(std::size_t d, const T &lhs, MATRIX &rhs) - { - for (std::size_t i = 0u; i < d; ++i) - { - for (std::size_t j = 0u; j <= i; ++j) - { + static void calculate(std::size_t d, const T& lhs, MATRIX& rhs) { + for (std::size_t i = 0u; i < d; ++i) { + for (std::size_t j = 0u; j <= i; ++j) { rhs(i, j) = std::max(lhs, rhs(i, j)); } } } }; -template struct SFabs {}; +template +struct SFabs {}; //! Component-wise fabs for a vector. template<> -struct SFabs -{ +struct SFabs { template - static void calculate(std::size_t d, VECTOR &result) - { - for (std::size_t i = 0u; i < d; ++i) - { + static void calculate(std::size_t d, VECTOR& result) { + for (std::size_t i = 0u; i < d; ++i) { result(i) = std::fabs(result(i)); } } }; //! Element-wise fabs for a symmetric matrix. template<> -struct SFabs -{ +struct SFabs { template - static void calculate(std::size_t d, MATRIX &result) - { - for (std::size_t i = 0u; i < d; ++i) - { - for (std::size_t j = 0u; j <= i; ++j) - { + static void calculate(std::size_t d, MATRIX& result) { + for (std::size_t i = 0u; i < d; ++i) { + for (std::size_t j = 0u; j <= i; ++j) { result(i, j) = std::fabs(result(i, j)); } } } }; -#define INVERSE_QUADRATIC_PRODUCT(T, N) \ -MATHS_EXPORT \ -maths_t::EFloatingPointErrorStatus inverseQuadraticProduct(std::size_t d, \ - const CSymmetricMatrixNxN &covariance, \ - const CVectorNx1 &residual, \ - double &result, \ - bool ignoreSingularSubspace) +#define INVERSE_QUADRATIC_PRODUCT(T, N) \ + MATHS_EXPORT \ + maths_t::EFloatingPointErrorStatus inverseQuadraticProduct(std::size_t d, \ + const CSymmetricMatrixNxN& covariance, \ + const CVectorNx1& residual, \ + double& result, \ + bool ignoreSingularSubspace) INVERSE_QUADRATIC_PRODUCT(CFloatStorage, 2); INVERSE_QUADRATIC_PRODUCT(CFloatStorage, 3); INVERSE_QUADRATIC_PRODUCT(CFloatStorage, 4); @@ -294,25 +239,24 @@ INVERSE_QUADRATIC_PRODUCT(double, 5); #undef INVERSE_QUADRATIC_PRODUCT MATHS_EXPORT maths_t::EFloatingPointErrorStatus inverseQuadraticProduct(std::size_t d, - const CSymmetricMatrix &covariance, - const CVector &residual, - double &result, + const CSymmetricMatrix& covariance, + const CVector& residual, + double& result, bool ignoreSingularSubspace); MATHS_EXPORT maths_t::EFloatingPointErrorStatus inverseQuadraticProduct(std::size_t d, - const CSymmetricMatrix &covariance, - const CVector &residual, - double &result, + const CSymmetricMatrix& covariance, + const CVector& residual, + double& result, bool ignoreSingularSubspace); - -#define GAUSSIAN_LOG_LIKELIHOOD(T, N) \ -MATHS_EXPORT \ -maths_t::EFloatingPointErrorStatus gaussianLogLikelihood(std::size_t d, \ - const CSymmetricMatrixNxN &covariance, \ - const CVectorNx1 &residual, \ - double &result, \ - bool ignoreSingularSubspace) +#define GAUSSIAN_LOG_LIKELIHOOD(T, N) \ + MATHS_EXPORT \ + maths_t::EFloatingPointErrorStatus gaussianLogLikelihood(std::size_t d, \ + const CSymmetricMatrixNxN& covariance, \ + const CVectorNx1& residual, \ + double& result, \ + bool ignoreSingularSubspace) GAUSSIAN_LOG_LIKELIHOOD(CFloatStorage, 2); GAUSSIAN_LOG_LIKELIHOOD(CFloatStorage, 3); GAUSSIAN_LOG_LIKELIHOOD(CFloatStorage, 4); @@ -324,24 +268,24 @@ GAUSSIAN_LOG_LIKELIHOOD(double, 5); #undef GAUSSIAN_LOG_LIKELIHOOD MATHS_EXPORT maths_t::EFloatingPointErrorStatus gaussianLogLikelihood(std::size_t d, - const CSymmetricMatrix &covariance, - const CVector &residual, - double &result, + const CSymmetricMatrix& covariance, + const CVector& residual, + double& result, bool ignoreSingularSubspace); MATHS_EXPORT maths_t::EFloatingPointErrorStatus gaussianLogLikelihood(std::size_t d, - const CSymmetricMatrix &covariance, - const CVector &residual, - double &result, + const CSymmetricMatrix& covariance, + const CVector& residual, + double& result, bool ignoreSingularSubspace); //! Shared implementation of Gaussian sampling. -#define SAMPLE_GAUSSIAN(T, N) \ -MATHS_EXPORT \ -void sampleGaussian(std::size_t n, \ - const CVectorNx1 &mean, \ - const CSymmetricMatrixNxN &covariance, \ - std::vector > &result) +#define SAMPLE_GAUSSIAN(T, N) \ + MATHS_EXPORT \ + void sampleGaussian(std::size_t n, \ + const CVectorNx1& mean, \ + const CSymmetricMatrixNxN& covariance, \ + std::vector>& result) SAMPLE_GAUSSIAN(CFloatStorage, 2); SAMPLE_GAUSSIAN(CFloatStorage, 3); SAMPLE_GAUSSIAN(CFloatStorage, 4); @@ -353,22 +297,20 @@ SAMPLE_GAUSSIAN(double, 5); #undef SAMPLE_GAUSSIAN MATHS_EXPORT void sampleGaussian(std::size_t n, - const CVector &mean, - const CSymmetricMatrix &covariance, - std::vector > &result); + const CVector& mean, + const CSymmetricMatrix& covariance, + std::vector>& result); MATHS_EXPORT void sampleGaussian(std::size_t n, - const CVector &mean, - const CSymmetricMatrix &covariance, - std::vector > &result); + const CVector& mean, + const CSymmetricMatrix& covariance, + std::vector>& result); //! Shared implementation of the log-determinant function. -#define LOG_DETERMINANT(T, N) \ -MATHS_EXPORT \ -maths_t::EFloatingPointErrorStatus logDeterminant(std::size_t d, \ - const CSymmetricMatrixNxN &matrix, \ - double &result, \ - bool ignoreSingularSubspace) +#define LOG_DETERMINANT(T, N) \ + MATHS_EXPORT \ + maths_t::EFloatingPointErrorStatus logDeterminant( \ + std::size_t d, const CSymmetricMatrixNxN& matrix, double& result, bool ignoreSingularSubspace) LOG_DETERMINANT(CFloatStorage, 2); LOG_DETERMINANT(CFloatStorage, 3); LOG_DETERMINANT(CFloatStorage, 4); @@ -379,27 +321,19 @@ LOG_DETERMINANT(double, 4); LOG_DETERMINANT(double, 5); #undef LOG_DETERMINANT MATHS_EXPORT -maths_t::EFloatingPointErrorStatus logDeterminant(std::size_t d, - const CSymmetricMatrix &matrix, - double &result, - bool ignoreSingularSubspace); +maths_t::EFloatingPointErrorStatus +logDeterminant(std::size_t d, const CSymmetricMatrix& matrix, double& result, bool ignoreSingularSubspace); MATHS_EXPORT -maths_t::EFloatingPointErrorStatus logDeterminant(std::size_t d, - const CSymmetricMatrix &matrix, - double &result, - bool ignoreSingularSubspace); - +maths_t::EFloatingPointErrorStatus +logDeterminant(std::size_t d, const CSymmetricMatrix& matrix, double& result, bool ignoreSingularSubspace); } //! Output for debug. template -std::ostream &operator<<(std::ostream &o, const CSymmetricMatrix &m) -{ - for (std::size_t i = 0u; i < m.rows(); ++i) - { +std::ostream& operator<<(std::ostream& o, const CSymmetricMatrix& m) { + for (std::size_t i = 0u; i < m.rows(); ++i) { o << "\n "; - for (std::size_t j = 0u; j < m.columns(); ++j) - { + for (std::size_t j = 0u; j < m.columns(); ++j) { std::string element = core::CStringUtils::typeToStringPretty(m(i, j)); o << element << std::string(15 - element.size(), ' '); } @@ -409,13 +343,10 @@ std::ostream &operator<<(std::ostream &o, const CSymmetricMatrix &m) //! Output for debug. template -std::ostream &operator<<(std::ostream &o, const CSymmetricMatrixNxN &m) -{ - for (std::size_t i = 0u; i < N; ++i) - { +std::ostream& operator<<(std::ostream& o, const CSymmetricMatrixNxN& m) { + for (std::size_t i = 0u; i < N; ++i) { o << "\n "; - for (std::size_t j = 0u; j < N; ++j) - { + for (std::size_t j = 0u; j < N; ++j) { std::string element = core::CStringUtils::typeToStringPretty(m(i, j)); o << element << std::string(15 - element.size(), ' '); } @@ -425,46 +356,39 @@ std::ostream &operator<<(std::ostream &o, const CSymmetricMatrixNxN &m) //! Output for debug. template -std::ostream &operator<<(std::ostream &o, const CVectorNx1 &v) -{ +std::ostream& operator<<(std::ostream& o, const CVectorNx1& v) { o << "["; - for (std::size_t i = 0u; i+1 < N; ++i) - { + for (std::size_t i = 0u; i + 1 < N; ++i) { o << core::CStringUtils::typeToStringPretty(v(i)) << ' '; } - o << core::CStringUtils::typeToStringPretty(v(N-1)) << ']'; + o << core::CStringUtils::typeToStringPretty(v(N - 1)) << ']'; return o; } //! Output for debug. template -std::ostream &operator<<(std::ostream &o, const CVector &v) -{ - if (v.dimension() == 0) - { +std::ostream& operator<<(std::ostream& o, const CVector& v) { + if (v.dimension() == 0) { return o << "[]"; } o << "["; - for (std::size_t i = 0u; i+1 < v.dimension(); ++i) - { + for (std::size_t i = 0u; i + 1 < v.dimension(); ++i) { o << core::CStringUtils::typeToStringPretty(v(i)) << ' '; } - o << core::CStringUtils::typeToStringPretty(v(v.dimension()-1)) << ']'; + o << core::CStringUtils::typeToStringPretty(v(v.dimension() - 1)) << ']'; return o; } //! Overload sqrt for CVectorNx1. template -CVectorNx1 sqrt(const CVectorNx1 &v) -{ +CVectorNx1 sqrt(const CVectorNx1& v) { CVectorNx1 result(v); linear_algebra_tools_detail::SSqrt::calculate(N, result); return result; } //! Overload sqrt for CSymmetricMatrixNxN. template -CSymmetricMatrixNxN sqrt(const CSymmetricMatrixNxN &m) -{ +CSymmetricMatrixNxN sqrt(const CSymmetricMatrixNxN& m) { CSymmetricMatrixNxN result(m); linear_algebra_tools_detail::SSqrt::calculate(N, result); return result; @@ -472,52 +396,42 @@ CSymmetricMatrixNxN sqrt(const CSymmetricMatrixNxN &m) //! Overload minimum for CVectorNx1. template -CVectorNx1 min(const CVectorNx1 &lhs, - const CVectorNx1 &rhs) -{ +CVectorNx1 min(const CVectorNx1& lhs, const CVectorNx1& rhs) { CVectorNx1 result(rhs); linear_algebra_tools_detail::SMin::calculate(N, lhs, result); return result; } //! Overload minimum for CVectorNx1. template -CVectorNx1 min(const CVectorNx1 &lhs, const T &rhs) -{ +CVectorNx1 min(const CVectorNx1& lhs, const T& rhs) { CVectorNx1 result(lhs); linear_algebra_tools_detail::SMin::calculate(N, result, rhs); return result; } //! Overload minimum for CVectorNx1. template -CVectorNx1 min(const T &lhs, const CVectorNx1 &rhs) -{ +CVectorNx1 min(const T& lhs, const CVectorNx1& rhs) { CVectorNx1 result(rhs); linear_algebra_tools_detail::SMin::calculate(N, lhs, result); return result; } //! Overload minimum for CSymmetricMatrixNxN. template -CSymmetricMatrixNxN min(const CSymmetricMatrixNxN &lhs, - const CSymmetricMatrixNxN &rhs) -{ +CSymmetricMatrixNxN min(const CSymmetricMatrixNxN& lhs, const CSymmetricMatrixNxN& rhs) { CSymmetricMatrixNxN result(rhs); linear_algebra_tools_detail::SMin::calculate(N, lhs, result); return result; } //! Overload minimum for CSymmetricMatrixNxN. template -CSymmetricMatrixNxN min(const CSymmetricMatrixNxN &lhs, - const T &rhs) -{ +CSymmetricMatrixNxN min(const CSymmetricMatrixNxN& lhs, const T& rhs) { CSymmetricMatrixNxN result(lhs); linear_algebra_tools_detail::SMin::calculate(N, result, rhs); return result; } //! Overload minimum for CSymmetricMatrixNxN. template -CSymmetricMatrixNxN min(const T &lhs, - const CSymmetricMatrixNxN &rhs) -{ +CSymmetricMatrixNxN min(const T& lhs, const CSymmetricMatrixNxN& rhs) { CSymmetricMatrixNxN result(rhs); linear_algebra_tools_detail::SMin::calculate(N, lhs, result); return result; @@ -525,52 +439,42 @@ CSymmetricMatrixNxN min(const T &lhs, //! Overload maximum for CVectorNx1. template -CVectorNx1 max(const CVectorNx1 &lhs, - const CVectorNx1 &rhs) -{ +CVectorNx1 max(const CVectorNx1& lhs, const CVectorNx1& rhs) { CVectorNx1 result(rhs); linear_algebra_tools_detail::SMax::calculate(N, lhs, result); return result; } //! Overload maximum for CVectorNx1. template -CVectorNx1 max(const CVectorNx1 &lhs, const T &rhs) -{ +CVectorNx1 max(const CVectorNx1& lhs, const T& rhs) { CVectorNx1 result(lhs); linear_algebra_tools_detail::SMax::calculate(N, result, rhs); return result; } //! Overload maximum for CVectorNx1. template -CVectorNx1 max(const T &lhs, const CVectorNx1 &rhs) -{ +CVectorNx1 max(const T& lhs, const CVectorNx1& rhs) { CVectorNx1 result(rhs); linear_algebra_tools_detail::SMax::calculate(N, lhs, result); return result; } //! Overload maximum for CSymmetricMatrixNxN. template -CSymmetricMatrixNxN max(const CSymmetricMatrixNxN &lhs, - const CSymmetricMatrixNxN &rhs) -{ +CSymmetricMatrixNxN max(const CSymmetricMatrixNxN& lhs, const CSymmetricMatrixNxN& rhs) { CSymmetricMatrixNxN result(rhs); linear_algebra_tools_detail::SMax::calculate(N, lhs, result); return result; } //! Overload maximum for CSymmetricMatrixNxN. template -CSymmetricMatrixNxN max(const CSymmetricMatrixNxN &lhs, - const T &rhs) -{ +CSymmetricMatrixNxN max(const CSymmetricMatrixNxN& lhs, const T& rhs) { CSymmetricMatrixNxN result(lhs); linear_algebra_tools_detail::SMax::calculate(N, result, rhs); return result; } //! Overload maximum for CSymmetricMatrixNxN. template -CSymmetricMatrixNxN max(const T &lhs, - const CSymmetricMatrixNxN &rhs) -{ +CSymmetricMatrixNxN max(const T& lhs, const CSymmetricMatrixNxN& rhs) { CSymmetricMatrixNxN result(rhs); linear_algebra_tools_detail::SMax::calculate(N, lhs, result); return result; @@ -578,16 +482,14 @@ CSymmetricMatrixNxN max(const T &lhs, //! Overload ::fabs for CVectorNx1. template -CVectorNx1 fabs(const CVectorNx1 &v) -{ +CVectorNx1 fabs(const CVectorNx1& v) { CVectorNx1 result(v); linear_algebra_tools_detail::SFabs::calculate(N, result); return result; } //! Overload ::fabs for CSymmetricMatrixNxN. template -CSymmetricMatrixNxN fabs(const CSymmetricMatrixNxN &m) -{ +CSymmetricMatrixNxN fabs(const CSymmetricMatrixNxN& m) { CSymmetricMatrixNxN result(m); linear_algebra_tools_detail::SFabs::calculate(N, result); return result; @@ -595,16 +497,14 @@ CSymmetricMatrixNxN fabs(const CSymmetricMatrixNxN &m) //! Overload sqrt for CVector. template -CVector sqrt(const CVector &v) -{ +CVector sqrt(const CVector& v) { CVector result(v); linear_algebra_tools_detail::SSqrt::calculate(result.dimension(), result); return result; } //! Overload sqrt for CSymmetricMatrix. template -CSymmetricMatrix sqrt(const CSymmetricMatrix &m) -{ +CSymmetricMatrix sqrt(const CSymmetricMatrix& m) { CSymmetricMatrix result(m); linear_algebra_tools_detail::SSqrt::calculate(result.rows(), result); return result; @@ -612,48 +512,42 @@ CSymmetricMatrix sqrt(const CSymmetricMatrix &m) //! Overload minimum for CVector. template -CVector min(const CVector &lhs, const CVector &rhs) -{ +CVector min(const CVector& lhs, const CVector& rhs) { CVector result(rhs); linear_algebra_tools_detail::SMin::calculate(result.dimension(), lhs, result); return result; } //! Overload minimum for CVector. template -CVector min(const CVector &lhs, const T &rhs) -{ +CVector min(const CVector& lhs, const T& rhs) { CVector result(lhs); linear_algebra_tools_detail::SMin::calculate(result.dimension(), result, rhs); return result; } //! Overload minimum for CVector. template -CVector min(const T &lhs, const CVector &rhs) -{ +CVector min(const T& lhs, const CVector& rhs) { CVector result(rhs); linear_algebra_tools_detail::SMin::calculate(result.dimension(), lhs, result); return result; } //! Overload minimum for CSymmetricMatrix. template -CSymmetricMatrix min(const CSymmetricMatrix &lhs, const CSymmetricMatrix &rhs) -{ +CSymmetricMatrix min(const CSymmetricMatrix& lhs, const CSymmetricMatrix& rhs) { CSymmetricMatrix result(rhs); linear_algebra_tools_detail::SMin::calculate(result.rows(), lhs, result); return result; } //! Overload minimum for CSymmetricMatrix. template -CSymmetricMatrix min(const CSymmetricMatrix &lhs, const T &rhs) -{ +CSymmetricMatrix min(const CSymmetricMatrix& lhs, const T& rhs) { CSymmetricMatrix result(lhs); linear_algebra_tools_detail::SMin::calculate(result.rows(), result, rhs); return result; } //! Overload minimum for CSymmetricMatrix. template -CSymmetricMatrix min(const T &lhs, const CSymmetricMatrix &rhs) -{ +CSymmetricMatrix min(const T& lhs, const CSymmetricMatrix& rhs) { CSymmetricMatrix result(rhs); linear_algebra_tools_detail::SMin::calculate(result.rows(), lhs, result); return result; @@ -661,48 +555,42 @@ CSymmetricMatrix min(const T &lhs, const CSymmetricMatrix &rhs) //! Overload maximum for CVector. template -CVector max(const CVector &lhs, const CVector &rhs) -{ +CVector max(const CVector& lhs, const CVector& rhs) { CVector result(rhs); linear_algebra_tools_detail::SMax::calculate(result.dimension(), lhs, result); return result; } //! Overload maximum for CVector. template -CVector max(const CVector &lhs, const T &rhs) -{ +CVector max(const CVector& lhs, const T& rhs) { CVector result(lhs); linear_algebra_tools_detail::SMax::calculate(result.dimension(), result, rhs); return result; } //! Overload maximum for CVector. template -CVector max(const T &lhs, const CVector &rhs) -{ +CVector max(const T& lhs, const CVector& rhs) { CVector result(rhs); linear_algebra_tools_detail::SMax::calculate(result.dimension(), lhs, result); return result; } //! Overload maximum for CSymmetricMatrix. template -CSymmetricMatrix max(const CSymmetricMatrix &lhs, const CSymmetricMatrix &rhs) -{ +CSymmetricMatrix max(const CSymmetricMatrix& lhs, const CSymmetricMatrix& rhs) { CSymmetricMatrix result(rhs); linear_algebra_tools_detail::SMax::calculate(result.rows(), lhs, result); return result; } //! Overload maximum for CSymmetricMatrix. template -CSymmetricMatrix max(const CSymmetricMatrix &lhs, const T &rhs) -{ +CSymmetricMatrix max(const CSymmetricMatrix& lhs, const T& rhs) { CSymmetricMatrix result(lhs); linear_algebra_tools_detail::SMax::calculate(result.rows(), result, rhs); return result; } //! Overload maximum for CSymmetricMatrix. template -CSymmetricMatrix max(const T &lhs, const CSymmetricMatrix &rhs) -{ +CSymmetricMatrix max(const T& lhs, const CSymmetricMatrix& rhs) { CSymmetricMatrix result(rhs); linear_algebra_tools_detail::SMax::calculate(result.rows(), lhs, result); return result; @@ -710,16 +598,14 @@ CSymmetricMatrix max(const T &lhs, const CSymmetricMatrix &rhs) //! Overload ::fabs for CVector. template -CVector fabs(const CVector &v) -{ +CVector fabs(const CVector& v) { CVector result(v); linear_algebra_tools_detail::SFabs::calculate(result.dimension(), result); return result; } //! Overload ::fabs for CSymmetricMatrix. template -CSymmetricMatrix fabs(const CSymmetricMatrix &m) -{ +CSymmetricMatrix fabs(const CSymmetricMatrix& m) { CSymmetricMatrix result(m); linear_algebra_tools_detail::SFabs::calculate(result.dimension(), result); return result; @@ -727,15 +613,10 @@ CSymmetricMatrix fabs(const CSymmetricMatrix &m) //! Efficiently scale the \p i'th row and column by \p scale. template -void scaleCovariances(std::size_t i, - T scale, - CSymmetricMatrixNxN &m) -{ +void scaleCovariances(std::size_t i, T scale, CSymmetricMatrixNxN& m) { scale = std::sqrt(scale); - for (std::size_t j = 0u; j < m.columns(); ++j) - { - if (i == j) - { + for (std::size_t j = 0u; j < m.columns(); ++j) { + if (i == j) { m(i, j) *= scale; } m(i, j) *= scale; @@ -744,26 +625,18 @@ void scaleCovariances(std::size_t i, //! Efficiently scale the rows and columns by \p scale. template -void scaleCovariances(const CVectorNx1 &scale, - CSymmetricMatrixNxN &m) -{ - for (std::size_t i = 0u; i < scale.dimension(); ++i) - { +void scaleCovariances(const CVectorNx1& scale, CSymmetricMatrixNxN& m) { + for (std::size_t i = 0u; i < scale.dimension(); ++i) { scaleCovariances(i, scale(i), m); } } //! Efficiently scale the \p i'th row and column by \p scale. template -void scaleCovariances(std::size_t i, - T scale, - CSymmetricMatrix &m) -{ +void scaleCovariances(std::size_t i, T scale, CSymmetricMatrix& m) { scale = std::sqrt(scale); - for (std::size_t j = 0u; j < m.columns(); ++j) - { - if (i == j) - { + for (std::size_t j = 0u; j < m.columns(); ++j) { + if (i == j) { m(i, j) = scale; } m(i, j) = scale; @@ -772,11 +645,8 @@ void scaleCovariances(std::size_t i, //! Efficiently scale the rows and columns by \p scale. template -void scaleCovariances(const CVector &scale, - CSymmetricMatrix &m) -{ - for (std::size_t i = 0u; i < scale.dimension(); ++i) - { +void scaleCovariances(const CVector& scale, CSymmetricMatrix& m) { + for (std::size_t i = 0u; i < scale.dimension(); ++i) { scaleRowAndColumn(i, scale(i), m); } } @@ -790,13 +660,11 @@ void scaleCovariances(const CVector &scale, //! residual on a singular subspace of m. Otherwise the result is //! minus infinity in this case. template -maths_t::EFloatingPointErrorStatus inverseQuadraticForm(const CSymmetricMatrixNxN &covariance, - const CVectorNx1 &residual, - double &result, - bool ignoreSingularSubspace = true) -{ - return linear_algebra_tools_detail::inverseQuadraticProduct(N, covariance, residual, - result, ignoreSingularSubspace); +maths_t::EFloatingPointErrorStatus inverseQuadraticForm(const CSymmetricMatrixNxN& covariance, + const CVectorNx1& residual, + double& result, + bool ignoreSingularSubspace = true) { + return linear_algebra_tools_detail::inverseQuadraticProduct(N, covariance, residual, result, ignoreSingularSubspace); } //! Compute the log-likelihood for the residual \p x and covariance @@ -809,13 +677,11 @@ maths_t::EFloatingPointErrorStatus inverseQuadraticForm(const CSymmetricMatrixNx //! residual on a singular subspace of m. Otherwise the result is //! minus infinity in this case. template -maths_t::EFloatingPointErrorStatus gaussianLogLikelihood(const CSymmetricMatrixNxN &covariance, - const CVectorNx1 &residual, - double &result, - bool ignoreSingularSubspace = true) -{ - return linear_algebra_tools_detail::gaussianLogLikelihood(N, covariance, residual, - result, ignoreSingularSubspace); +maths_t::EFloatingPointErrorStatus gaussianLogLikelihood(const CSymmetricMatrixNxN& covariance, + const CVectorNx1& residual, + double& result, + bool ignoreSingularSubspace = true) { + return linear_algebra_tools_detail::gaussianLogLikelihood(N, covariance, residual, result, ignoreSingularSubspace); } //! Sample from a Gaussian with \p mean and \p covariance in such @@ -828,10 +694,9 @@ maths_t::EFloatingPointErrorStatus gaussianLogLikelihood(const CSymmetricMatrixN //! \param[out] result Filled in with the samples. template void sampleGaussian(std::size_t n, - const CVectorNx1 &mean, - const CSymmetricMatrixNxN &covariance, - std::vector > &result) -{ + const CVectorNx1& mean, + const CSymmetricMatrixNxN& covariance, + std::vector>& result) { return linear_algebra_tools_detail::sampleGaussian(n, mean, covariance, result); } @@ -841,14 +706,11 @@ void sampleGaussian(std::size_t n, //! \param[in] ignoreSingularSubspace If true then we ignore any //! singular subspace of m. Otherwise, the result is minus infinity. template -maths_t::EFloatingPointErrorStatus logDeterminant(const CSymmetricMatrixNxN &matrix, - double &result, - bool ignoreSingularSubspace = true) -{ +maths_t::EFloatingPointErrorStatus +logDeterminant(const CSymmetricMatrixNxN& matrix, double& result, bool ignoreSingularSubspace = true) { return linear_algebra_tools_detail::logDeterminant(N, matrix, result, ignoreSingularSubspace); } - //! Compute the inverse quadratic form \f$x^tC^{-1}x\f$. //! //! \param[in] covariance The matrix. @@ -858,14 +720,11 @@ maths_t::EFloatingPointErrorStatus logDeterminant(const CSymmetricMatrixNxN -maths_t::EFloatingPointErrorStatus inverseQuadraticForm(const CSymmetricMatrix &covariance, - const CVector &residual, - double &result, - bool ignoreSingularSubspace = true) -{ - return linear_algebra_tools_detail::inverseQuadraticProduct(covariance.rows(), - covariance, residual, - result, ignoreSingularSubspace); +maths_t::EFloatingPointErrorStatus inverseQuadraticForm(const CSymmetricMatrix& covariance, + const CVector& residual, + double& result, + bool ignoreSingularSubspace = true) { + return linear_algebra_tools_detail::inverseQuadraticProduct(covariance.rows(), covariance, residual, result, ignoreSingularSubspace); } //! Compute the log-likelihood for the residual \p x and covariance @@ -878,14 +737,11 @@ maths_t::EFloatingPointErrorStatus inverseQuadraticForm(const CSymmetricMatrix -maths_t::EFloatingPointErrorStatus gaussianLogLikelihood(const CSymmetricMatrix &covariance, - const CVector &residual, - double &result, - bool ignoreSingularSubspace = true) -{ - return linear_algebra_tools_detail::gaussianLogLikelihood(covariance.rows(), - covariance, residual, - result, ignoreSingularSubspace); +maths_t::EFloatingPointErrorStatus gaussianLogLikelihood(const CSymmetricMatrix& covariance, + const CVector& residual, + double& result, + bool ignoreSingularSubspace = true) { + return linear_algebra_tools_detail::gaussianLogLikelihood(covariance.rows(), covariance, residual, result, ignoreSingularSubspace); } //! Sample from a Gaussian with \p mean and \p covariance in such @@ -897,11 +753,7 @@ maths_t::EFloatingPointErrorStatus gaussianLogLikelihood(const CSymmetricMatrix< //! \param[in] covariance The covariance matrix of the Gaussian. //! \param[out] result Filled in with the samples. template -void sampleGaussian(std::size_t n, - const CVector &mean, - const CSymmetricMatrix &covariance, - std::vector > &result) -{ +void sampleGaussian(std::size_t n, const CVector& mean, const CSymmetricMatrix& covariance, std::vector>& result) { return linear_algebra_tools_detail::sampleGaussian(n, mean, covariance, result); } @@ -911,25 +763,19 @@ void sampleGaussian(std::size_t n, //! \param[in] ignoreSingularSubspace If true then we ignore any //! singular subspace of m. Otherwise, the result is minus infinity. template -maths_t::EFloatingPointErrorStatus logDeterminant(const CSymmetricMatrix &matrix, - double &result, - bool ignoreSingularSubspace = true) -{ +maths_t::EFloatingPointErrorStatus logDeterminant(const CSymmetricMatrix& matrix, double& result, bool ignoreSingularSubspace = true) { return linear_algebra_tools_detail::logDeterminant(matrix.rows(), matrix, result, ignoreSingularSubspace); } //! Project the matrix on to \p subspace. template -inline Eigen::Matrix - projectedMatrix(const std::vector &subspace, const MATRIX &matrix) -{ +inline Eigen::Matrix projectedMatrix(const std::vector& subspace, + const MATRIX& matrix) { std::size_t d = subspace.size(); Eigen::Matrix result(d, d); - for (std::size_t i = 0u; i < d; ++i) - { - for (std::size_t j = 0u; j < d; ++j) - { - result(i,j) = matrix(subspace[i], subspace[j]); + for (std::size_t i = 0u; i < d; ++i) { + for (std::size_t j = 0u; j < d; ++j) { + result(i, j) = matrix(subspace[i], subspace[j]); } } return result; @@ -937,18 +783,14 @@ inline Eigen::Matrix //! Project the vector on to \p subspace. template -inline Eigen::Matrix - projectedVector(const std::vector &subspace, const VECTOR &vector) -{ +inline Eigen::Matrix projectedVector(const std::vector& subspace, const VECTOR& vector) { std::size_t d = subspace.size(); Eigen::Matrix result(d); - for (std::size_t i = 0u; i < d; ++i) - { + for (std::size_t i = 0u; i < d; ++i) { result(i) = vector(subspace[i]); } return result; } - } } diff --git a/include/maths/CLogNormalMeanPrecConjugate.h b/include/maths/CLogNormalMeanPrecConjugate.h index 67a7e5251c..89dda4ecf2 100644 --- a/include/maths/CLogNormalMeanPrecConjugate.h +++ b/include/maths/CLogNormalMeanPrecConjugate.h @@ -16,15 +16,12 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace maths -{ +namespace maths { struct SDistributionRestoreParams; //! \brief A conjugate prior distribution for a log-normal variable. @@ -48,400 +45,388 @@ struct SDistributionRestoreParams; //! the data when using one-of-n composition (see COneOfNPrior) or model data with //! multiple modes when using multi-modal composition (see CMultimodalPrior). //! From a design point of view this is the composite pattern. -class MATHS_EXPORT CLogNormalMeanPrecConjugate : public CPrior -{ - public: - //! See core::CMemory. - static bool dynamicSizeAlwaysZero() { return true; } - - using TEqualWithTolerance = CEqualWithTolerance; - - //! Lift the overloads of addSamples into scope. - using CPrior::addSamples; - //! Lift the overloads of print into scope. - using CPrior::print; - - public: - //! \name Life-Cycle - //@{ - //! \param[in] dataType The type of data being modeled (see maths_t::EDataType - //! for details). - //! \param[in] offset The offset to apply to the data. - //! \param[in] gaussianMean The mean of the normal component of the prior. - //! \param[in] gaussianPrecision The precision of the normal component of - //! the prior. - //! \param[in] gammaShape The shape parameter of the gamma component of the - //! prior. - //! \param[in] gammaRate The rate parameter of the gamma component of the - //! prior. - //! \param[in] decayRate The rate at which to revert to non-informative. - //! \param[in] offsetMargin The margin between the smallest value and the support - //! left end. - CLogNormalMeanPrecConjugate(maths_t::EDataType dataType, - double offset, - double gaussianMean, - double gaussianPrecision, - double gammaShape, - double gammaRate, - double decayRate = 0.0, - double offsetMargin = LOG_NORMAL_OFFSET_MARGIN); - - //! Construct from part of a state document. - CLogNormalMeanPrecConjugate(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser, - double offsetMargin = LOG_NORMAL_OFFSET_MARGIN); - - // Default copy constructor and assignment operator work. - - //! Create an instance of a non-informative prior. - //! - //! \param[in] dataType The type of data being modeled (see maths_t::EDataType - //! for details). - //! \param[in] offset The offset to apply to the data. - //! \param[in] decayRate The rate at which to revert to the non-informative prior. - //! \param[in] offsetMargin The margin between the smallest value and the support - //! left end. - //! \return A non-informative prior. - static CLogNormalMeanPrecConjugate nonInformativePrior(maths_t::EDataType dataType, - double offset = 0.0, - double decayRate = 0.0, - double offsetMargin = LOG_NORMAL_OFFSET_MARGIN); - //@} - - //! \name Prior Contract - //@{ - //! Get the type of this prior. - virtual EPrior type() const; - - //! Create a copy of the prior. - //! - //! \return A pointer to a newly allocated clone of this prior. - //! \warning The caller owns the object returned. - virtual CLogNormalMeanPrecConjugate *clone() const; - - //! Reset the prior to non-informative. - virtual void setToNonInformative(double offset = 0.0, double decayRate = 0.0); - - //! Get the margin between the smallest value and the support left - //! end. Priors with non-negative support, automatically adjust the - //! offset if a value is seen which is smaller than offset + margin. - virtual double offsetMargin() const; - - //! Returns true. - virtual bool needsOffset() const; - - //! Reset m_Offset so the smallest sample is not within some minimum - //! offset of the support left end. Note that translating the mean of - //! a log-normal affects its shape, so there is no easy adjustment of - //! the prior parameters which preserves the distribution after - //! translation. - //! - //! This samples the current marginal likelihood and uses these samples - //! to reconstruct the prior with adjusted offset. - //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. - //! \param[in] samples The samples from which to determine the offset. - //! \param[in] weights The weights of each sample in \p samples. - //! \return The penalty to apply in model selection. - virtual double adjustOffset(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights); - - //! Get the current offset. - virtual double offset() const; - - //! Update the prior with a collection of independent samples from - //! the log-normal variable. - //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. - //! \param[in] samples A collection of samples of the variable. - //! \param[in] weights The weights of each sample in \p samples. - virtual void addSamples(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights); - - //! Propagate the prior density function forwards by \p time. - //! - //! The prior distribution relaxes back to non-informative at a rate - //! controlled by the decay rate parameter (optionally supplied to the - //! constructor). - //! - //! \param[in] time The time increment to apply. - //! \note \p time must be non negative. - virtual void propagateForwardsByTime(double time); - - //! Get the support for the marginal likelihood function. - virtual TDoubleDoublePr marginalLikelihoodSupport() const; - - //! Get the mean of the marginal likelihood function. - virtual double marginalLikelihoodMean() const; - - //! Get the mode of the marginal likelihood function. - virtual double marginalLikelihoodMode(const TWeightStyleVec &weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec &weights = TWeights::UNIT) const; - - //! Get the variance of the marginal likelihood. - virtual double marginalLikelihoodVariance(const TWeightStyleVec &weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec &weights = TWeights::UNIT) const; - - //! Get the \p percentage symmetric confidence interval for the marginal - //! likelihood function, i.e. the values \f$a\f$ and \f$b\f$ such that: - //!
-        //!   \f$P([a,m]) = P([m,b]) = p / 100 / 2\f$
-        //! 
- //! - //! where \f$m\f$ is the median of the distribution and \f$p\f$ is the - //! the percentage of interest \p percentage. - //! - //! \param[in] percentage The percentage of interest. - //! \param[in] weightStyles Optional variance scale weight styles. - //! \param[in] weights Optional variance scale weights. - //! \note \p percentage should be in the range [0.0, 100.0). - virtual TDoubleDoublePr - marginalLikelihoodConfidenceInterval(double percentage, - const TWeightStyleVec &weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec &weights = TWeights::UNIT) const; - - //! Compute the log marginal likelihood function at \p samples integrating - //! over the prior density function for the exponentiated normal mean - //! and precision. - //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. - //! \param[in] samples A collection of samples of the variable. - //! \param[in] weights The weights of each sample in \p samples. - //! \param[out] result Filled in with the joint likelihood of \p samples. - //! \note The samples are assumed to be independent and identically - //! distributed. - virtual maths_t::EFloatingPointErrorStatus - jointLogMarginalLikelihood(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &result) const; - - //! Sample the marginal likelihood function. - //! - //! \see CPrior::sampleMarginalLikelihood() for a detailed description. - //! - //! \param[in] numberSamples The number of samples required. - //! \param[out] samples Filled in with samples from the prior. - //! \note \p numberSamples is truncated to the number of samples received. - virtual void sampleMarginalLikelihood(std::size_t numberSamples, - TDouble1Vec &samples) const; - - //! Compute minus the log of the joint c.d.f. of the marginal likelihood - //! at \p samples. - //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. - //! \param[in] samples The samples of interest. - //! \param[in] weights The weights of each sample in \p samples. For the - //! count variance scale weight style the weight is interpreted as a scale - //! of the likelihood variance. The mean and variance of a log-normal are: - //!
-        //!   \f$\displaystyle e^{m + 1/2p}\f$
-        //!   \f$\displaystyle (e^{1/p} - 1)e^{2m + 1/p}\f$
-        //! 
- //! Here, \f$m\f$ is the mean and \f$p\f$ is the precision for which - //! this is the prior. Our assumption implies: - //!
-        //!   \f$\displaystyle e^{m_i' + 1/2p_i'} = e^{m + 1/2p}\f$
-        //!   \f$\displaystyle (e^{1/p_i'} - 1)e^{2m_i' + 1/p_i'} = \gamma_i(e^{1/p} - 1)e^{2m + 1/p}\f$
-        //! 
- //! where, \f$m_i'\f$ is the mean and \f$p_i'\f$ are the scaled parameters - //! of the exponentiated normal. We can solve for \f$m_i'\f$ and \f$p_i'\f$ - //! to give: - //!
-        //!   \f$\displaystyle m_i' = m + \frac{1/p - \log(1 + \gamma_i(e^{1/p} - 1))}{2}\f$
-        //!   \f$\displaystyle p_i' = \frac{1}{\log(1 + \gamma_i(e^{1/p} - 1))}\f$
-        //! 
- //! We then interpret the likelihood function as:\n - //!
-        //!   \f$\displaystyle f(x_i) = \sqrt{\frac{p_i'}{2\pi}}e^{-p_i'/2(\log(x_i) - m_i')^2}\f$
-        //! 
- //! \param[out] lowerBound Filled in with \f$-\log(\prod_i{F(x_i)})\f$ - //! where \f$F(.)\f$ is the c.d.f. and \f$\{x_i\}\f$ are the samples. - //! \param[out] upperBound Equal to \p lowerBound. - //! \note The samples are assumed to be independent. - //! \warning The variance scales \f$\gamma_i\f$ must be in the range - //! \f$(0,\infty)\f$, i.e. a value of zero is not well defined and - //! a value of infinity is not well handled. The approximations we - //! make are less good for \f$\gamma_i\f$ a long way from one. - virtual bool minusLogJointCdf(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound) const; - - //! Compute minus the log of the one minus the joint c.d.f. of the - //! marginal likelihood at \p samples without losing precision due to - //! cancellation errors at one, i.e. the smallest non-zero value this - //! can return is the minimum double rather than epsilon. - //! - //! \see minusLogJointCdf for more details. - virtual bool minusLogJointCdfComplement(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound) const; - - //! Compute the probability of a less likely, i.e. lower likelihood, - //! collection of independent samples from the variable. - //! - //! \param[in] calculation The style of the probability calculation - //! (see model_t::EProbabilityCalculation for details). - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. - //! \param[in] samples The samples of interest. - //! \param[in] weights The weights. See minusLogJointCdf for discussion. - //! \param[out] lowerBound Filled in with the probability of the set - //! for which the joint marginal likelihood is less than that of - //! \p samples (subject to the measure \p calculation). - //! \param[out] upperBound Equal to \p lowerBound. - //! \param[out] tail The tail that (left or right) that all the - //! samples are in or neither. - //! \note The samples are assumed to be independent. - //! \warning The variance scales must be in the range \f$(0,\infty)\f$, - //! i.e. a value of zero is not well defined and a value of infinity - //! is not well handled. (Very large values are handled though.) - virtual bool probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound, - maths_t::ETail &tail) const; - - //! Check if this is a non-informative prior. - virtual bool isNonInformative() const; - - //! Get a human readable description of the prior. - //! - //! \param[in] indent The indent to use at the start of new lines. - //! \param[in,out] result Filled in with the description. - virtual void print(const std::string &indent, std::string &result) const; - - //! Print the prior density function in a specified format. - //! - //! \see CPrior::printJointDensityFunction for details. - virtual std::string printJointDensityFunction() const; - - //! Get a checksum for this object. - virtual uint64_t checksum(uint64_t seed = 0) const; - - //! Get the memory used by this component - virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - - //! Get the memory used by this component - virtual std::size_t memoryUsage() const; - - //! Get the static size of this object - used for virtual hierarchies - virtual std::size_t staticSize() const; - - //! Persist state by passing information to the supplied inserter - virtual void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - //@} - - - //! Get the current expected mean for the exponentiated normal. - //! - //! \note This is not to be confused with the mean of the variable itself - //! which is \f$e ^{m + 1 / 2 p}\f$, where \f$m\f$ and \f$p\f$ are the - //! mean and precision of the exponentiated Guassian, respectively. - double normalMean() const; - - //! Compute the current expected precision for the exponentiated normal. - //! - //! \note This is not to be confused with the precision of the variable - //! itself which is \f$\displaystyle \frac{e ^{-2m - 1 / p}}{e ^{1 / p} - 1}\f$, - //! where \f$m\f$ and \f$p\f$ are the mean and precision of the exponentiated - //! Guassian, respectively. - double normalPrecision() const; - - - //! \name Test Functions - //@{ - //! Compute the specified percentage confidence interval for the - //! exponentiated normal mean. - //! - //! \note That this is not to be confused with the mean of variable - //! itself which is \f$e^{m + 1 / 2 p}\f$, where \f$m\f$ and \f$p\f$ - //! are the mean and precision of the exponentiated Guassian, respectively. - TDoubleDoublePr confidenceIntervalNormalMean(double percentage) const; - - //! Compute the specified percentage confidence interval for the - //! exponentiated normal precision. - //! - //! \note This is not to be confused with the precision of the variable - //! itself which is \f$\displaystyle \frac{e ^{-2m - 1 / p}}{e ^{1 / p} - 1}\f$, - //! where \f$m\f$ and \f$p\f$ are the mean and precision of the exponentiated - //! Guassian, respectively. - TDoubleDoublePr confidenceIntervalNormalPrecision(double percentage) const; - - //! Check if two priors are equal to the specified tolerance. - bool equalTolerance(const CLogNormalMeanPrecConjugate &rhs, - const TEqualWithTolerance &equal) const; - //@} - - private: - //! Read parameters from \p traverser. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); - - //! Get the of the marginal likelihood. - double mean() const; - - //! Check that the state is valid. - bool isBad() const; - - //! Full debug dump of the state of this prior. - virtual std::string debug() const; - - private: - //! The mean parameter of a non-informative prior. - static const double NON_INFORMATIVE_MEAN; - - //! The precision parameter of a non-informative prior. - static const double NON_INFORMATIVE_PRECISION; - - //! The shape parameter of a non-informative prior. - static const double NON_INFORMATIVE_SHAPE; - - //! The rate parameter of a non-informative prior. - static const double NON_INFORMATIVE_RATE; - - private: - //! We assume that the data are described by \f$X = e^Y - u\f$, where - //! \f$u\f$ is a constant and \f$Y\f$ is normally distributed. This - //! allows us to model data with negative values greater than \f$-u\f$. - double m_Offset; - - //! The margin between the smallest value and the support left end. - double m_OffsetMargin; - - //! The mean of the prior conditional distribution for the mean of the - //! exponentiated normal (conditioned on its precision). - double m_GaussianMean; - - //! The precision of the prior conditional distribution for the mean - //! of the exponentiated normal (conditioned on its precision). - double m_GaussianPrecision; - - //! The shape of the marginal gamma distribution for the precision of the - //! exponentiated normal. - double m_GammaShape; - - //! The rate of the marginal gamma distribution for the precision of the - //! exponentiated normal. - double m_GammaRate; +class MATHS_EXPORT CLogNormalMeanPrecConjugate : public CPrior { +public: + //! See core::CMemory. + static bool dynamicSizeAlwaysZero() { return true; } + + using TEqualWithTolerance = CEqualWithTolerance; + + //! Lift the overloads of addSamples into scope. + using CPrior::addSamples; + //! Lift the overloads of print into scope. + using CPrior::print; + +public: + //! \name Life-Cycle + //@{ + //! \param[in] dataType The type of data being modeled (see maths_t::EDataType + //! for details). + //! \param[in] offset The offset to apply to the data. + //! \param[in] gaussianMean The mean of the normal component of the prior. + //! \param[in] gaussianPrecision The precision of the normal component of + //! the prior. + //! \param[in] gammaShape The shape parameter of the gamma component of the + //! prior. + //! \param[in] gammaRate The rate parameter of the gamma component of the + //! prior. + //! \param[in] decayRate The rate at which to revert to non-informative. + //! \param[in] offsetMargin The margin between the smallest value and the support + //! left end. + CLogNormalMeanPrecConjugate(maths_t::EDataType dataType, + double offset, + double gaussianMean, + double gaussianPrecision, + double gammaShape, + double gammaRate, + double decayRate = 0.0, + double offsetMargin = LOG_NORMAL_OFFSET_MARGIN); + + //! Construct from part of a state document. + CLogNormalMeanPrecConjugate(const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser, + double offsetMargin = LOG_NORMAL_OFFSET_MARGIN); + + // Default copy constructor and assignment operator work. + + //! Create an instance of a non-informative prior. + //! + //! \param[in] dataType The type of data being modeled (see maths_t::EDataType + //! for details). + //! \param[in] offset The offset to apply to the data. + //! \param[in] decayRate The rate at which to revert to the non-informative prior. + //! \param[in] offsetMargin The margin between the smallest value and the support + //! left end. + //! \return A non-informative prior. + static CLogNormalMeanPrecConjugate nonInformativePrior(maths_t::EDataType dataType, + double offset = 0.0, + double decayRate = 0.0, + double offsetMargin = LOG_NORMAL_OFFSET_MARGIN); + //@} + + //! \name Prior Contract + //@{ + //! Get the type of this prior. + virtual EPrior type() const; + + //! Create a copy of the prior. + //! + //! \return A pointer to a newly allocated clone of this prior. + //! \warning The caller owns the object returned. + virtual CLogNormalMeanPrecConjugate* clone() const; + + //! Reset the prior to non-informative. + virtual void setToNonInformative(double offset = 0.0, double decayRate = 0.0); + + //! Get the margin between the smallest value and the support left + //! end. Priors with non-negative support, automatically adjust the + //! offset if a value is seen which is smaller than offset + margin. + virtual double offsetMargin() const; + + //! Returns true. + virtual bool needsOffset() const; + + //! Reset m_Offset so the smallest sample is not within some minimum + //! offset of the support left end. Note that translating the mean of + //! a log-normal affects its shape, so there is no easy adjustment of + //! the prior parameters which preserves the distribution after + //! translation. + //! + //! This samples the current marginal likelihood and uses these samples + //! to reconstruct the prior with adjusted offset. + //! + //! \param[in] weightStyles Controls the interpretation of the weight(s) + //! that are associated with each sample. See maths_t::ESampleWeightStyle + //! for more details. + //! \param[in] samples The samples from which to determine the offset. + //! \param[in] weights The weights of each sample in \p samples. + //! \return The penalty to apply in model selection. + virtual double adjustOffset(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights); + + //! Get the current offset. + virtual double offset() const; + + //! Update the prior with a collection of independent samples from + //! the log-normal variable. + //! + //! \param[in] weightStyles Controls the interpretation of the weight(s) + //! that are associated with each sample. See maths_t::ESampleWeightStyle + //! for more details. + //! \param[in] samples A collection of samples of the variable. + //! \param[in] weights The weights of each sample in \p samples. + virtual void addSamples(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights); + + //! Propagate the prior density function forwards by \p time. + //! + //! The prior distribution relaxes back to non-informative at a rate + //! controlled by the decay rate parameter (optionally supplied to the + //! constructor). + //! + //! \param[in] time The time increment to apply. + //! \note \p time must be non negative. + virtual void propagateForwardsByTime(double time); + + //! Get the support for the marginal likelihood function. + virtual TDoubleDoublePr marginalLikelihoodSupport() const; + + //! Get the mean of the marginal likelihood function. + virtual double marginalLikelihoodMean() const; + + //! Get the mode of the marginal likelihood function. + virtual double marginalLikelihoodMode(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; + + //! Get the variance of the marginal likelihood. + virtual double marginalLikelihoodVariance(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; + + //! Get the \p percentage symmetric confidence interval for the marginal + //! likelihood function, i.e. the values \f$a\f$ and \f$b\f$ such that: + //!
+    //!   \f$P([a,m]) = P([m,b]) = p / 100 / 2\f$
+    //! 
+ //! + //! where \f$m\f$ is the median of the distribution and \f$p\f$ is the + //! the percentage of interest \p percentage. + //! + //! \param[in] percentage The percentage of interest. + //! \param[in] weightStyles Optional variance scale weight styles. + //! \param[in] weights Optional variance scale weights. + //! \note \p percentage should be in the range [0.0, 100.0). + virtual TDoubleDoublePr marginalLikelihoodConfidenceInterval(double percentage, + const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; + + //! Compute the log marginal likelihood function at \p samples integrating + //! over the prior density function for the exponentiated normal mean + //! and precision. + //! + //! \param[in] weightStyles Controls the interpretation of the weight(s) + //! that are associated with each sample. See maths_t::ESampleWeightStyle + //! for more details. + //! \param[in] samples A collection of samples of the variable. + //! \param[in] weights The weights of each sample in \p samples. + //! \param[out] result Filled in with the joint likelihood of \p samples. + //! \note The samples are assumed to be independent and identically + //! distributed. + virtual maths_t::EFloatingPointErrorStatus jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& result) const; + + //! Sample the marginal likelihood function. + //! + //! \see CPrior::sampleMarginalLikelihood() for a detailed description. + //! + //! \param[in] numberSamples The number of samples required. + //! \param[out] samples Filled in with samples from the prior. + //! \note \p numberSamples is truncated to the number of samples received. + virtual void sampleMarginalLikelihood(std::size_t numberSamples, TDouble1Vec& samples) const; + + //! Compute minus the log of the joint c.d.f. of the marginal likelihood + //! at \p samples. + //! + //! \param[in] weightStyles Controls the interpretation of the weight(s) + //! that are associated with each sample. See maths_t::ESampleWeightStyle + //! for more details. + //! \param[in] samples The samples of interest. + //! \param[in] weights The weights of each sample in \p samples. For the + //! count variance scale weight style the weight is interpreted as a scale + //! of the likelihood variance. The mean and variance of a log-normal are: + //!
+    //!   \f$\displaystyle e^{m + 1/2p}\f$
+    //!   \f$\displaystyle (e^{1/p} - 1)e^{2m + 1/p}\f$
+    //! 
+ //! Here, \f$m\f$ is the mean and \f$p\f$ is the precision for which + //! this is the prior. Our assumption implies: + //!
+    //!   \f$\displaystyle e^{m_i' + 1/2p_i'} = e^{m + 1/2p}\f$
+    //!   \f$\displaystyle (e^{1/p_i'} - 1)e^{2m_i' + 1/p_i'} = \gamma_i(e^{1/p} - 1)e^{2m + 1/p}\f$
+    //! 
+ //! where, \f$m_i'\f$ is the mean and \f$p_i'\f$ are the scaled parameters + //! of the exponentiated normal. We can solve for \f$m_i'\f$ and \f$p_i'\f$ + //! to give: + //!
+    //!   \f$\displaystyle m_i' = m + \frac{1/p - \log(1 + \gamma_i(e^{1/p} - 1))}{2}\f$
+    //!   \f$\displaystyle p_i' = \frac{1}{\log(1 + \gamma_i(e^{1/p} - 1))}\f$
+    //! 
+ //! We then interpret the likelihood function as:\n + //!
+    //!   \f$\displaystyle f(x_i) = \sqrt{\frac{p_i'}{2\pi}}e^{-p_i'/2(\log(x_i) - m_i')^2}\f$
+    //! 
+ //! \param[out] lowerBound Filled in with \f$-\log(\prod_i{F(x_i)})\f$ + //! where \f$F(.)\f$ is the c.d.f. and \f$\{x_i\}\f$ are the samples. + //! \param[out] upperBound Equal to \p lowerBound. + //! \note The samples are assumed to be independent. + //! \warning The variance scales \f$\gamma_i\f$ must be in the range + //! \f$(0,\infty)\f$, i.e. a value of zero is not well defined and + //! a value of infinity is not well handled. The approximations we + //! make are less good for \f$\gamma_i\f$ a long way from one. + virtual bool minusLogJointCdf(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound) const; + + //! Compute minus the log of the one minus the joint c.d.f. of the + //! marginal likelihood at \p samples without losing precision due to + //! cancellation errors at one, i.e. the smallest non-zero value this + //! can return is the minimum double rather than epsilon. + //! + //! \see minusLogJointCdf for more details. + virtual bool minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound) const; + + //! Compute the probability of a less likely, i.e. lower likelihood, + //! collection of independent samples from the variable. + //! + //! \param[in] calculation The style of the probability calculation + //! (see model_t::EProbabilityCalculation for details). + //! \param[in] weightStyles Controls the interpretation of the weight(s) + //! that are associated with each sample. See maths_t::ESampleWeightStyle + //! for more details. + //! \param[in] samples The samples of interest. + //! \param[in] weights The weights. See minusLogJointCdf for discussion. + //! \param[out] lowerBound Filled in with the probability of the set + //! for which the joint marginal likelihood is less than that of + //! \p samples (subject to the measure \p calculation). + //! \param[out] upperBound Equal to \p lowerBound. + //! \param[out] tail The tail that (left or right) that all the + //! samples are in or neither. + //! \note The samples are assumed to be independent. + //! \warning The variance scales must be in the range \f$(0,\infty)\f$, + //! i.e. a value of zero is not well defined and a value of infinity + //! is not well handled. (Very large values are handled though.) + virtual bool probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, + const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound, + maths_t::ETail& tail) const; + + //! Check if this is a non-informative prior. + virtual bool isNonInformative() const; + + //! Get a human readable description of the prior. + //! + //! \param[in] indent The indent to use at the start of new lines. + //! \param[in,out] result Filled in with the description. + virtual void print(const std::string& indent, std::string& result) const; + + //! Print the prior density function in a specified format. + //! + //! \see CPrior::printJointDensityFunction for details. + virtual std::string printJointDensityFunction() const; + + //! Get a checksum for this object. + virtual uint64_t checksum(uint64_t seed = 0) const; + + //! Get the memory used by this component + virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + + //! Get the memory used by this component + virtual std::size_t memoryUsage() const; + + //! Get the static size of this object - used for virtual hierarchies + virtual std::size_t staticSize() const; + + //! Persist state by passing information to the supplied inserter + virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + //@} + + //! Get the current expected mean for the exponentiated normal. + //! + //! \note This is not to be confused with the mean of the variable itself + //! which is \f$e ^{m + 1 / 2 p}\f$, where \f$m\f$ and \f$p\f$ are the + //! mean and precision of the exponentiated Guassian, respectively. + double normalMean() const; + + //! Compute the current expected precision for the exponentiated normal. + //! + //! \note This is not to be confused with the precision of the variable + //! itself which is \f$\displaystyle \frac{e ^{-2m - 1 / p}}{e ^{1 / p} - 1}\f$, + //! where \f$m\f$ and \f$p\f$ are the mean and precision of the exponentiated + //! Guassian, respectively. + double normalPrecision() const; + + //! \name Test Functions + //@{ + //! Compute the specified percentage confidence interval for the + //! exponentiated normal mean. + //! + //! \note That this is not to be confused with the mean of variable + //! itself which is \f$e^{m + 1 / 2 p}\f$, where \f$m\f$ and \f$p\f$ + //! are the mean and precision of the exponentiated Guassian, respectively. + TDoubleDoublePr confidenceIntervalNormalMean(double percentage) const; + + //! Compute the specified percentage confidence interval for the + //! exponentiated normal precision. + //! + //! \note This is not to be confused with the precision of the variable + //! itself which is \f$\displaystyle \frac{e ^{-2m - 1 / p}}{e ^{1 / p} - 1}\f$, + //! where \f$m\f$ and \f$p\f$ are the mean and precision of the exponentiated + //! Guassian, respectively. + TDoubleDoublePr confidenceIntervalNormalPrecision(double percentage) const; + + //! Check if two priors are equal to the specified tolerance. + bool equalTolerance(const CLogNormalMeanPrecConjugate& rhs, const TEqualWithTolerance& equal) const; + //@} + +private: + //! Read parameters from \p traverser. + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); + + //! Get the of the marginal likelihood. + double mean() const; + + //! Check that the state is valid. + bool isBad() const; + + //! Full debug dump of the state of this prior. + virtual std::string debug() const; + +private: + //! The mean parameter of a non-informative prior. + static const double NON_INFORMATIVE_MEAN; + + //! The precision parameter of a non-informative prior. + static const double NON_INFORMATIVE_PRECISION; + + //! The shape parameter of a non-informative prior. + static const double NON_INFORMATIVE_SHAPE; + + //! The rate parameter of a non-informative prior. + static const double NON_INFORMATIVE_RATE; + +private: + //! We assume that the data are described by \f$X = e^Y - u\f$, where + //! \f$u\f$ is a constant and \f$Y\f$ is normally distributed. This + //! allows us to model data with negative values greater than \f$-u\f$. + double m_Offset; + + //! The margin between the smallest value and the support left end. + double m_OffsetMargin; + + //! The mean of the prior conditional distribution for the mean of the + //! exponentiated normal (conditioned on its precision). + double m_GaussianMean; + + //! The precision of the prior conditional distribution for the mean + //! of the exponentiated normal (conditioned on its precision). + double m_GaussianPrecision; + + //! The shape of the marginal gamma distribution for the precision of the + //! exponentiated normal. + double m_GammaShape; + + //! The rate of the marginal gamma distribution for the precision of the + //! exponentiated normal. + double m_GammaRate; }; - } } diff --git a/include/maths/CLogTDistribution.h b/include/maths/CLogTDistribution.h index eb7eca0c49..61c8015793 100644 --- a/include/maths/CLogTDistribution.h +++ b/include/maths/CLogTDistribution.h @@ -13,10 +13,8 @@ #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { //! \brief Representation of a log t distribution. //! @@ -32,57 +30,51 @@ namespace maths //! which defines lightweight objects to represent distributions //! and free functions for computing various properties of the //! distribution. -class MATHS_EXPORT CLogTDistribution -{ - public: - using TDoubleDoublePr = std::pair; - using TOptionalDouble = boost::optional; - - public: - CLogTDistribution(double degreesFreedom, - double location, - double scale); - - double degreesFreedom() const; - double location() const; - double scale() const; - - private: - double m_DegreesFreedom; - double m_Location; - double m_Scale; +class MATHS_EXPORT CLogTDistribution { +public: + using TDoubleDoublePr = std::pair; + using TOptionalDouble = boost::optional; + +public: + CLogTDistribution(double degreesFreedom, double location, double scale); + + double degreesFreedom() const; + double location() const; + double scale() const; + +private: + double m_DegreesFreedom; + double m_Location; + double m_Scale; }; - //! Get the support for a log-t distribution. MATHS_EXPORT -CLogTDistribution::TDoubleDoublePr support(const CLogTDistribution &distribution); +CLogTDistribution::TDoubleDoublePr support(const CLogTDistribution& distribution); //! Compute the mode for \p distribution. MATHS_EXPORT -double mode(const CLogTDistribution &distribution); +double mode(const CLogTDistribution& distribution); //! Get the finite local minimum if the distribution has one. MATHS_EXPORT -CLogTDistribution::TOptionalDouble localMinimum(const CLogTDistribution &distribution); +CLogTDistribution::TOptionalDouble localMinimum(const CLogTDistribution& distribution); //! Compute the p.d.f. at \p x for \p distribution. MATHS_EXPORT -double pdf(const CLogTDistribution &distribution, double x); +double pdf(const CLogTDistribution& distribution, double x); //! Compute the c.d.f. at \p x for \p distribution. MATHS_EXPORT -double cdf(const CLogTDistribution &distribution, double x); +double cdf(const CLogTDistribution& distribution, double x); //! Compute one minus the c.d.f. at \p x for \p distribution. MATHS_EXPORT -double cdfComplement(const CLogTDistribution &distribution, - double x); +double cdfComplement(const CLogTDistribution& distribution, double x); //! Compute the \p q'th quantile for \p distribution. MATHS_EXPORT -double quantile(const CLogTDistribution &distribution, double q); - +double quantile(const CLogTDistribution& distribution, double q); } } diff --git a/include/maths/CMathsFuncs.h b/include/maths/CMathsFuncs.h index 1ce149ae6a..87e113a0a5 100644 --- a/include/maths/CMathsFuncs.h +++ b/include/maths/CMathsFuncs.h @@ -16,10 +16,8 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { //! \brief //! Portable maths functions @@ -38,181 +36,162 @@ namespace maths //! Where maths functions have different names on different platforms, //! they should be added to this file. //! -class MATHS_EXPORT CMathsFuncs : private core::CNonInstantiatable -{ +class MATHS_EXPORT CMathsFuncs : private core::CNonInstantiatable { +public: + //! Wrapper around boost::math::isnan() which avoids the need to add + //! cryptic brackets everywhere to deal with macros. + static bool isNan(double val); + //! Check if any of the components are NaN. + template + static bool isNan(const CVectorNx1& val); + //! Check if any of the components are NaN. + static bool isNan(const CVector& val); + //! Check if any of the elements are NaN. + template + static bool isNan(const CSymmetricMatrixNxN& val); + //! Check if any of the elements are NaN. + static bool isNan(const CSymmetricMatrix& val); + //! Check if an element is NaN. + static bool isNan(const core::CSmallVectorBase& val); + + //! Wrapper around boost::math::isinf() which avoids the need to add + //! cryptic brackets everywhere to deal with macros. + static bool isInf(double val); + //! Check if any of the components are infinite. + template + static bool isInf(const CVectorNx1& val); + //! Check if any of the components are infinite. + static bool isInf(const CVector& val); + //! Check if any of the elements are infinite. + template + static bool isInf(const CSymmetricMatrixNxN& val); + //! Check if any of the elements are infinite. + static bool isInf(const CSymmetricMatrix& val); + //! Check if an element is NaN. + static bool isInf(const core::CSmallVectorBase& val); + + //! Neither infinite nor NaN. + static bool isFinite(double val); + //! Check if all of the components are finite. + template + static bool isFinite(const CVectorNx1& val); + //! Check if all of the components are finite. + static bool isFinite(const CVector& val); + //! Check if all of the components are NaN. + template + static bool isFinite(const CSymmetricMatrixNxN& val); + //! Check if all of the components are NaN. + static bool isFinite(const CSymmetricMatrix& val); + //! Check if an element is NaN. + static bool isFinite(const core::CSmallVectorBase& val); + + //! Check the floating point status of \p value. + static maths_t::EFloatingPointErrorStatus fpStatus(double val); + + //! Unary function object to check if a value is finite. + struct SIsFinite : std::unary_function { + bool operator()(double val) const { return isFinite(val); } + }; + + //! \brief Wrapper around an iterator over a collection of doubles, + //! which must implement the forward iterator concepts, that skips + //! non-finite values. + template + class CFiniteIterator { public: - //! Wrapper around boost::math::isnan() which avoids the need to add - //! cryptic brackets everywhere to deal with macros. - static bool isNan(double val); - //! Check if any of the components are NaN. - template - static bool isNan(const CVectorNx1 &val); - //! Check if any of the components are NaN. - static bool isNan(const CVector &val); - //! Check if any of the elements are NaN. - template - static bool isNan(const CSymmetricMatrixNxN &val); - //! Check if any of the elements are NaN. - static bool isNan(const CSymmetricMatrix &val); - //! Check if an element is NaN. - static bool isNan(const core::CSmallVectorBase &val); - - //! Wrapper around boost::math::isinf() which avoids the need to add - //! cryptic brackets everywhere to deal with macros. - static bool isInf(double val); - //! Check if any of the components are infinite. - template - static bool isInf(const CVectorNx1 &val); - //! Check if any of the components are infinite. - static bool isInf(const CVector &val); - //! Check if any of the elements are infinite. - template - static bool isInf(const CSymmetricMatrixNxN &val); - //! Check if any of the elements are infinite. - static bool isInf(const CSymmetricMatrix &val); - //! Check if an element is NaN. - static bool isInf(const core::CSmallVectorBase &val); - - //! Neither infinite nor NaN. - static bool isFinite(double val); - //! Check if all of the components are finite. - template - static bool isFinite(const CVectorNx1 &val); - //! Check if all of the components are finite. - static bool isFinite(const CVector &val); - //! Check if all of the components are NaN. - template - static bool isFinite(const CSymmetricMatrixNxN &val); - //! Check if all of the components are NaN. - static bool isFinite(const CSymmetricMatrix &val); - //! Check if an element is NaN. - static bool isFinite(const core::CSmallVectorBase &val); - - //! Check the floating point status of \p value. - static maths_t::EFloatingPointErrorStatus fpStatus(double val); - - //! Unary function object to check if a value is finite. - struct SIsFinite : std::unary_function - { - bool operator()(double val) const { return isFinite(val); } - }; - - //! \brief Wrapper around an iterator over a collection of doubles, - //! which must implement the forward iterator concepts, that skips - //! non-finite values. - template - class CFiniteIterator - { - public: - using iterator_category = std::forward_iterator_tag; - using value_type = typename std::iterator_traits::value_type; - using difference_type = typename std::iterator_traits::difference_type; - using pointer = typename std::iterator_traits::pointer; - using reference = typename std::iterator_traits::reference; - - public: - CFiniteIterator() : m_Base(), m_End() {} - CFiniteIterator(const ITR &base, const ITR &end) : - m_Base(base), - m_End(end) - { - if (m_Base != m_End && !isFinite(*m_Base)) - { - this->increment(); - } - } + using iterator_category = std::forward_iterator_tag; + using value_type = typename std::iterator_traits::value_type; + using difference_type = typename std::iterator_traits::difference_type; + using pointer = typename std::iterator_traits::pointer; + using reference = typename std::iterator_traits::reference; - //! Equal. - bool operator==(const CFiniteIterator &rhs) const { return m_Base == rhs.m_Base; } - //! Different. - bool operator!=(const CFiniteIterator &rhs) const { return m_Base != rhs.m_Base; } - - //! Dereference. - reference operator*() const { return *m_Base; } - //! Pointer. - pointer operator->() const { return m_Base.operator->(); } - - //! Prefix increment. - const CFiniteIterator &operator++() - { - this->increment(); - return *this; - } - //! Post-fix increment. - CFiniteIterator operator++(int) - { - CFiniteIterator result(*this); - this->increment(); - return result; - } - - private: - //! Implements increment. - void increment() - { - while (++m_Base != m_End) - { - if (isFinite(*m_Base)) - { - break; - } - } - } + public: + CFiniteIterator() : m_Base(), m_End() {} + CFiniteIterator(const ITR& base, const ITR& end) : m_Base(base), m_End(end) { + if (m_Base != m_End && !isFinite(*m_Base)) { + this->increment(); + } + } - private: - ITR m_Base; - ITR m_End; - }; + //! Equal. + bool operator==(const CFiniteIterator& rhs) const { return m_Base == rhs.m_Base; } + //! Different. + bool operator!=(const CFiniteIterator& rhs) const { return m_Base != rhs.m_Base; } - //! Get an iterator over the finite values of a double container. - template - static CFiniteIterator beginFinite(T &container) - { - return CFiniteIterator(container.begin(), container.end()); - } + //! Dereference. + reference operator*() const { return *m_Base; } + //! Pointer. + pointer operator->() const { return m_Base.operator->(); } - //! Get a const_iterator over the finite values of a double container. - template - static CFiniteIterator beginFinite(const T &container) - { - return CFiniteIterator(container.begin(), container.end()); + //! Prefix increment. + const CFiniteIterator& operator++() { + this->increment(); + return *this; } - - //! Get a finite values iterator at the end of a double container. - template - static CFiniteIterator endFinite(T &container) - { - return CFiniteIterator(container.end(), container.end()); + //! Post-fix increment. + CFiniteIterator operator++(int) { + CFiniteIterator result(*this); + this->increment(); + return result; } - //! Get a finite values const_iterator at the end of a double container. - template - static CFiniteIterator endFinite(const T &container) - { - return CFiniteIterator(container.end(), container.end()); + private: + //! Implements increment. + void increment() { + while (++m_Base != m_End) { + if (isFinite(*m_Base)) { + break; + } + } } private: - //! Check if any of the components return true for \p f. - template - static bool aComponent(const F &f, const VECTOR &val); - - //! Check if all the components return true for \p f. - template - static bool everyComponent(const F &f, const VECTOR &val); - - //! Check if any of the elements return true for \p f. - template - static bool anElement(const F &f, const SYMMETRIC_MATRIX &val); - - //! Check if all the elements return true for \p f. - template - static bool everyElement(const F &f, const SYMMETRIC_MATRIX &val); + ITR m_Base; + ITR m_End; + }; + + //! Get an iterator over the finite values of a double container. + template + static CFiniteIterator beginFinite(T& container) { + return CFiniteIterator(container.begin(), container.end()); + } + + //! Get a const_iterator over the finite values of a double container. + template + static CFiniteIterator beginFinite(const T& container) { + return CFiniteIterator(container.begin(), container.end()); + } + + //! Get a finite values iterator at the end of a double container. + template + static CFiniteIterator endFinite(T& container) { + return CFiniteIterator(container.end(), container.end()); + } + + //! Get a finite values const_iterator at the end of a double container. + template + static CFiniteIterator endFinite(const T& container) { + return CFiniteIterator(container.end(), container.end()); + } + +private: + //! Check if any of the components return true for \p f. + template + static bool aComponent(const F& f, const VECTOR& val); + + //! Check if all the components return true for \p f. + template + static bool everyComponent(const F& f, const VECTOR& val); + + //! Check if any of the elements return true for \p f. + template + static bool anElement(const F& f, const SYMMETRIC_MATRIX& val); + + //! Check if all the elements return true for \p f. + template + static bool everyElement(const F& f, const SYMMETRIC_MATRIX& val); }; - - } } #endif // INCLUDED_ml_maths_CMathsFuncs_h - diff --git a/include/maths/CMathsFuncsForMatrixAndVectorTypes.h b/include/maths/CMathsFuncsForMatrixAndVectorTypes.h index d4de8760b1..b0ab8e3a6b 100644 --- a/include/maths/CMathsFuncsForMatrixAndVectorTypes.h +++ b/include/maths/CMathsFuncsForMatrixAndVectorTypes.h @@ -10,18 +10,13 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { template -bool CMathsFuncs::aComponent(const F &f, const VECTOR &val) -{ - for (std::size_t i = 0u; i < val.dimension(); ++i) - { - if (f(val(i))) - { +bool CMathsFuncs::aComponent(const F& f, const VECTOR& val) { + for (std::size_t i = 0u; i < val.dimension(); ++i) { + if (f(val(i))) { return true; } } @@ -29,12 +24,9 @@ bool CMathsFuncs::aComponent(const F &f, const VECTOR &val) } template -bool CMathsFuncs::everyComponent(const F &f, const VECTOR &val) -{ - for (std::size_t i = 0u; i < val.dimension(); ++i) - { - if (!f(val(i))) - { +bool CMathsFuncs::everyComponent(const F& f, const VECTOR& val) { + for (std::size_t i = 0u; i < val.dimension(); ++i) { + if (!f(val(i))) { return false; } } @@ -42,14 +34,10 @@ bool CMathsFuncs::everyComponent(const F &f, const VECTOR &val) } template -bool CMathsFuncs::anElement(const F &f, const SYMMETRIC_MATRIX &val) -{ - for (std::size_t i = 0u; i < val.rows(); ++i) - { - for (std::size_t j = i; j < val.columns(); ++j) - { - if (f(val(i, j))) - { +bool CMathsFuncs::anElement(const F& f, const SYMMETRIC_MATRIX& val) { + for (std::size_t i = 0u; i < val.rows(); ++i) { + for (std::size_t j = i; j < val.columns(); ++j) { + if (f(val(i, j))) { return true; } } @@ -58,14 +46,10 @@ bool CMathsFuncs::anElement(const F &f, const SYMMETRIC_MATRIX &val) } template -bool CMathsFuncs::everyElement(const F &f, const SYMMETRIC_MATRIX &val) -{ - for (std::size_t i = 0u; i < val.rows(); ++i) - { - for (std::size_t j = i; j < val.columns(); ++j) - { - if (!f(val(i, j))) - { +bool CMathsFuncs::everyElement(const F& f, const SYMMETRIC_MATRIX& val) { + for (std::size_t i = 0u; i < val.rows(); ++i) { + for (std::size_t j = i; j < val.columns(); ++j) { + if (!f(val(i, j))) { return false; } } @@ -74,41 +58,34 @@ bool CMathsFuncs::everyElement(const F &f, const SYMMETRIC_MATRIX &val) } template -bool CMathsFuncs::isNan(const CVectorNx1 &val) -{ +bool CMathsFuncs::isNan(const CVectorNx1& val) { return aComponent(static_cast(&isNan), val); } template -bool CMathsFuncs::isNan(const CSymmetricMatrixNxN &val) -{ +bool CMathsFuncs::isNan(const CSymmetricMatrixNxN& val) { return anElement(static_cast(&isNan), val); } template -bool CMathsFuncs::isInf(const CVectorNx1 &val) -{ +bool CMathsFuncs::isInf(const CVectorNx1& val) { return aComponent(static_cast(&isInf), val); } template -bool CMathsFuncs::isInf(const CSymmetricMatrixNxN &val) -{ +bool CMathsFuncs::isInf(const CSymmetricMatrixNxN& val) { return anElement(static_cast(&isInf), val); } template -bool CMathsFuncs::isFinite(const CVectorNx1 &val) -{ +bool CMathsFuncs::isFinite(const CVectorNx1& val) { return everyComponent(static_cast(&isFinite), val); } template -bool CMathsFuncs::isFinite(const CSymmetricMatrixNxN &val) -{ +bool CMathsFuncs::isFinite(const CSymmetricMatrixNxN& val) { return everyElement(static_cast(&isFinite), val); } - } } diff --git a/include/maths/CMixtureDistribution.h b/include/maths/CMixtureDistribution.h index f64b4785c9..1abe21c45a 100644 --- a/include/maths/CMixtureDistribution.h +++ b/include/maths/CMixtureDistribution.h @@ -26,46 +26,38 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { -namespace mixture_detail -{ +namespace mixture_detail { using TDoubleDoublePr = std::pair; //! \brief Implements the "polymorphic" mixture mode. -class MATHS_EXPORT CMixtureModeImpl -{ - public: - CMixtureModeImpl(const boost::math::normal_distribution<> &normal); - CMixtureModeImpl(const boost::math::gamma_distribution<> &gamma); - CMixtureModeImpl(const boost::math::lognormal_distribution<> &lognormal); - - template - typename F::result_type visit(const F &f, double x) const - { - return boost::apply_visitor(boost::bind(f, _1, x), m_Distribution); - } +class MATHS_EXPORT CMixtureModeImpl { +public: + CMixtureModeImpl(const boost::math::normal_distribution<>& normal); + CMixtureModeImpl(const boost::math::gamma_distribution<>& gamma); + CMixtureModeImpl(const boost::math::lognormal_distribution<>& lognormal); + + template + typename F::result_type visit(const F& f, double x) const { + return boost::apply_visitor(boost::bind(f, _1, x), m_Distribution); + } - template - typename F::result_type visit(const F &f) const - { - return boost::apply_visitor(f, m_Distribution); - } + template + typename F::result_type visit(const F& f) const { + return boost::apply_visitor(f, m_Distribution); + } - private: - using TDistribution = boost::variant, - boost::math::gamma_distribution<>, - boost::math::lognormal_distribution<> >; +private: + using TDistribution = + boost::variant, boost::math::gamma_distribution<>, boost::math::lognormal_distribution<>>; - private: - //! The actual distribution. - TDistribution m_Distribution; +private: + //! The actual distribution. + TDistribution m_Distribution; }; - } template @@ -85,56 +77,54 @@ class CMixtureMode; //! a mode up front and it avoids heap allocation. The complement concept is //! encoded in a type parameter to avoid condition checking. template<> -class MATHS_EXPORT CMixtureMode : public mixture_detail::CMixtureModeImpl -{ - public: - CMixtureMode(const boost::math::normal_distribution<> &normal); - CMixtureMode(const boost::math::gamma_distribution<> &gamma); - CMixtureMode(const boost::math::lognormal_distribution<> &lognormal); +class MATHS_EXPORT CMixtureMode : public mixture_detail::CMixtureModeImpl { +public: + CMixtureMode(const boost::math::normal_distribution<>& normal); + CMixtureMode(const boost::math::gamma_distribution<>& gamma); + CMixtureMode(const boost::math::lognormal_distribution<>& lognormal); }; //! \brief A wrapper around the complement of one of the standard mode //! distributions. template<> -class MATHS_EXPORT CMixtureMode : public mixture_detail::CMixtureModeImpl -{ - public: - CMixtureMode(const CMixtureMode &other); +class MATHS_EXPORT CMixtureMode : public mixture_detail::CMixtureModeImpl { +public: + CMixtureMode(const CMixtureMode& other); }; //! Compute the distribution support. MATHS_EXPORT -mixture_detail::TDoubleDoublePr support(const CMixtureMode &mode); +mixture_detail::TDoubleDoublePr support(const CMixtureMode& mode); //! Compute the distribution mode. MATHS_EXPORT -double mode(const CMixtureMode &mode); +double mode(const CMixtureMode& mode); //! Compute the distribution mean. MATHS_EXPORT -double mean(const CMixtureMode &mode); +double mean(const CMixtureMode& mode); //! Compute the distribution probability density at \p x. MATHS_EXPORT -double pdf(const CMixtureMode &mode, double x); +double pdf(const CMixtureMode& mode, double x); //! Compute the distribution cumulative density at \p x. MATHS_EXPORT -double cdf(const CMixtureMode &mode, double x); +double cdf(const CMixtureMode& mode, double x); //! Compute one minus the distribution cumulative density at \p x. MATHS_EXPORT -double cdf(const CMixtureMode &mode, double x); +double cdf(const CMixtureMode& mode, double x); //! Compute the distribution quantile at \p x. //! //! \note x must be in the range (0, 1). MATHS_EXPORT -double quantile(const CMixtureMode &mode, double x); +double quantile(const CMixtureMode& mode, double x); //! Get the complement distribution of \p mode. MATHS_EXPORT -CMixtureMode complement(const CMixtureMode &mode); +CMixtureMode complement(const CMixtureMode& mode); //! \brief A mixture distribution. //! @@ -150,157 +140,109 @@ CMixtureMode complement(const CMixtureMode &mode); //! distribution. In order to get this to support mixtures of //! different distributions use the CMixtureMode object. template -class CMixtureDistribution -{ - public: - using TDoubleVec = std::vector; - using TModeVec = std::vector; - - public: - CMixtureDistribution() {} - - //! \note The length of \p weights should match \p modes. - CMixtureDistribution(const TDoubleVec &weights, const TModeVec &modes) : - m_Weights(weights), - m_Modes(modes) - { - std::size_t w = m_Weights.size(); - if (w != m_Modes.size()) - { - LOG_ERROR("# weights = " << w << ", # modes = " << m_Modes.size()); - m_Weights.resize(m_Modes.size(), 0.0); - } - - // Normalize the weights. - double weightSum = 0.0; - for (std::size_t i = 0u; i < w; ++i) - { - weightSum += m_Weights[i]; - } - if (weightSum == 0.0) - { - LOG_ERROR("Expected non-zero weight sum"); - } - for (std::size_t i = 0u; i < w; ++i) - { - m_Weights[i] = weightSum == 0.0 ? - 1.0 / static_cast(w) : m_Weights[i] / weightSum; - } +class CMixtureDistribution { +public: + using TDoubleVec = std::vector; + using TModeVec = std::vector; + +public: + CMixtureDistribution() {} + + //! \note The length of \p weights should match \p modes. + CMixtureDistribution(const TDoubleVec& weights, const TModeVec& modes) : m_Weights(weights), m_Modes(modes) { + std::size_t w = m_Weights.size(); + if (w != m_Modes.size()) { + LOG_ERROR("# weights = " << w << ", # modes = " << m_Modes.size()); + m_Weights.resize(m_Modes.size(), 0.0); } - void swap(CMixtureDistribution &other) - { - m_Weights.swap(other.m_Weights); - m_Modes.swap(other.m_Modes); + // Normalize the weights. + double weightSum = 0.0; + for (std::size_t i = 0u; i < w; ++i) { + weightSum += m_Weights[i]; } - - inline const TDoubleVec &weights() const - { - return m_Weights; + if (weightSum == 0.0) { + LOG_ERROR("Expected non-zero weight sum"); } - inline TDoubleVec &weights() - { - return m_Weights; + for (std::size_t i = 0u; i < w; ++i) { + m_Weights[i] = weightSum == 0.0 ? 1.0 / static_cast(w) : m_Weights[i] / weightSum; } + } - inline const TModeVec &modes() const - { - return m_Modes; - } - inline TModeVec &modes() - { - return m_Modes; - } + void swap(CMixtureDistribution& other) { + m_Weights.swap(other.m_Weights); + m_Modes.swap(other.m_Modes); + } - std::string print() const - { - std::string result; - for (std::size_t i = 0u; i < m_Weights.size(); ++i) - { - result += ' ' + core::CStringUtils::typeToStringPretty(m_Weights[i]) - + '/' + core::CStringUtils::typeToStringPretty(mean(m_Modes[i])) - + '/' + core::CStringUtils::typeToStringPretty(standard_deviation(m_Modes[i])); + inline const TDoubleVec& weights() const { return m_Weights; } + inline TDoubleVec& weights() { return m_Weights; } - } - result += (m_Weights.empty() ? "" : " "); - return result; - } + inline const TModeVec& modes() const { return m_Modes; } + inline TModeVec& modes() { return m_Modes; } + std::string print() const { + std::string result; + for (std::size_t i = 0u; i < m_Weights.size(); ++i) { + result += ' ' + core::CStringUtils::typeToStringPretty(m_Weights[i]) + '/' + + core::CStringUtils::typeToStringPretty(mean(m_Modes[i])) + '/' + + core::CStringUtils::typeToStringPretty(standard_deviation(m_Modes[i])); + } + result += (m_Weights.empty() ? "" : " "); + return result; + } - private: - TDoubleVec m_Weights; - TModeVec m_Modes; +private: + TDoubleVec m_Weights; + TModeVec m_Modes; }; - -namespace mixture_detail -{ +namespace mixture_detail { //! Adapts the free p.d.f. function for use with the solver. template -class CPdfAdpater -{ - public: - using result_type = double; - - public: - CPdfAdpater(const CMixtureDistribution &distribution) : - m_Distribution(&distribution) - { - } +class CPdfAdpater { +public: + using result_type = double; - double operator()(double x) const - { - return pdf(*m_Distribution, x); - } +public: + CPdfAdpater(const CMixtureDistribution& distribution) : m_Distribution(&distribution) {} - private: - const CMixtureDistribution *m_Distribution; -}; + double operator()(double x) const { return pdf(*m_Distribution, x); } +private: + const CMixtureDistribution* m_Distribution; +}; } //! Get the support for \p distribution. template -mixture_detail::TDoubleDoublePr support(const CMixtureDistribution &distribution) -{ +mixture_detail::TDoubleDoublePr support(const CMixtureDistribution& distribution) { using TModeVec = typename CMixtureDistribution::TModeVec; - const TModeVec &modes = distribution.modes(); + const TModeVec& modes = distribution.modes(); - if (modes.empty()) - { - return mixture_detail::TDoubleDoublePr(boost::numeric::bounds::lowest(), - boost::numeric::bounds::highest()); + if (modes.empty()) { + return mixture_detail::TDoubleDoublePr(boost::numeric::bounds::lowest(), boost::numeric::bounds::highest()); } - mixture_detail::TDoubleDoublePr result(boost::numeric::bounds::highest(), - boost::numeric::bounds::lowest()); + mixture_detail::TDoubleDoublePr result(boost::numeric::bounds::highest(), boost::numeric::bounds::lowest()); - for (std::size_t i = 0u; i < modes.size(); ++i) - { - try - { + for (std::size_t i = 0u; i < modes.size(); ++i) { + try { mixture_detail::TDoubleDoublePr modeSupport = support(modes[i]); result.first = std::min(result.first, modeSupport.first); result.second = std::max(result.second, modeSupport.second); - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to compute support for mode: " << e.what()); - } + } catch (const std::exception& e) { LOG_ERROR("Failed to compute support for mode: " << e.what()); } } return result; } - //! Compute the mode for \p distribution. //! //! \warning This propagates boost exceptions. template -double mode(const CMixtureDistribution &distribution) -{ +double mode(const CMixtureDistribution& distribution) { using TDoubleVec = typename CMixtureDistribution::TDoubleVec; using TModeVec = typename CMixtureDistribution::TModeVec; @@ -308,44 +250,36 @@ double mode(const CMixtureDistribution &distribution) double result = 0.0; - const TDoubleVec &weights = distribution.weights(); - const TModeVec &modes = distribution.modes(); + const TDoubleVec& weights = distribution.weights(); + const TModeVec& modes = distribution.modes(); - if (weights.empty()) - { + if (weights.empty()) { return result; } - if (weights.size() == 1) - { + if (weights.size() == 1) { return mode(modes[0]); } mixture_detail::CPdfAdpater f(distribution); double fMax = 0.0; - for (std::size_t i = 0u; i < weights.size(); ++i) - { - try - { + for (std::size_t i = 0u; i < weights.size(); ++i) { + try { double x25 = quantile(modes[i], 0.25); double x75 = quantile(modes[i], 0.75); std::size_t maxIterations = MAX_ITERATIONS; double x; double fx; CSolvers::maximize(x25, x75, f(x25), f(x75), f, 0.0, maxIterations, x, fx); - if (fx > fMax) - { + if (fx > fMax) { result = x; fMax = fx; } - } - catch (const std::exception &e) - { + } catch (const std::exception& e) { LOG_ERROR("Failed to compute f(x) at mode: " << e.what()); throw e; } } - return result; } @@ -353,48 +287,36 @@ double mode(const CMixtureDistribution &distribution) //! //! \warning This propagates boost exceptions. template -double pdf(const CMixtureDistribution &distribution, double x) -{ +double pdf(const CMixtureDistribution& distribution, double x) { using TDoubleVec = typename CMixtureDistribution::TDoubleVec; using TModeVec = typename CMixtureDistribution::TModeVec; - if (CMathsFuncs::isNan(x)) - { + if (CMathsFuncs::isNan(x)) { LOG_ERROR("Bad value x = " << x); return 0.0; } double result = 0.0; - const TDoubleVec &weights = distribution.weights(); - const TModeVec &modes = distribution.modes(); + const TDoubleVec& weights = distribution.weights(); + const TModeVec& modes = distribution.modes(); - if (weights.empty()) - { + if (weights.empty()) { return result; } - for (std::size_t i = 0u; i < weights.size(); ++i) - { + for (std::size_t i = 0u; i < weights.size(); ++i) { mixture_detail::TDoubleDoublePr ms = support(modes[i]); - if (x >= ms.first && x <= ms.second) - { - try - { + if (x >= ms.first && x <= ms.second) { + try { double fx = pdf(modes[i], x); - LOG_TRACE("x = " << x - << ", w(" << i << ") = " << weights[i] - << ", f(x, " << i << ") " << fx); + LOG_TRACE("x = " << x << ", w(" << i << ") = " << weights[i] << ", f(x, " << i << ") " << fx); result += weights[i] * fx; - } - catch (const std::exception &e) - { + } catch (const std::exception& e) { LOG_ERROR("Failed to compute f(x) for mode at " << x << ": " << e.what()); throw e; } - } - else - { + } else { LOG_TRACE("x = " << x << ", support = (" << ms.first << "," << ms.second << ")"); } } @@ -406,54 +328,38 @@ double pdf(const CMixtureDistribution &distribution, double x) //! //! \warning This propagates boost exceptions. template -double cdf(const CMixtureDistribution &distribution, double x) -{ +double cdf(const CMixtureDistribution& distribution, double x) { using TDoubleVec = typename CMixtureDistribution::TDoubleVec; using TModeVec = typename CMixtureDistribution::TModeVec; - if (CMathsFuncs::isNan(x)) - { + if (CMathsFuncs::isNan(x)) { LOG_ERROR("Bad value x = " << x); return 1.0; } - const TDoubleVec &weights = distribution.weights(); - const TModeVec &modes = distribution.modes(); + const TDoubleVec& weights = distribution.weights(); + const TModeVec& modes = distribution.modes(); - if (weights.empty()) - { + if (weights.empty()) { return 0.0; } double result = 0.0; - for (std::size_t i = 0u; i < modes.size(); ++i) - { + for (std::size_t i = 0u; i < modes.size(); ++i) { mixture_detail::TDoubleDoublePr ms = support(modes[i]); - if (x >= ms.second) - { + if (x >= ms.second) { result += weights[i]; - } - else if (x >= ms.first) - { - try - { + } else if (x >= ms.first) { + try { double fx = cdf(modes[i], x); - LOG_TRACE("x = " << x - << ", w(" << i << ") = " << weights[i] - << ", f(x, " << i << ") " << fx); + LOG_TRACE("x = " << x << ", w(" << i << ") = " << weights[i] << ", f(x, " << i << ") " << fx); result += weights[i] * fx; - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to compute f(x) for mode at " - << x << ": " << e.what()); + } catch (const std::exception& e) { + LOG_ERROR("Failed to compute f(x) for mode at " << x << ": " << e.what()); throw e; } - } - else - { - LOG_TRACE("x = " << x - << ", support = (" << ms.first << "," << ms.second << ")"); + } else { + LOG_TRACE("x = " << x << ", support = (" << ms.first << "," << ms.second << ")"); } } @@ -464,127 +370,92 @@ double cdf(const CMixtureDistribution &distribution, double x) //! //! \warning This propagates boost exceptions. template -double cdfComplement(const CMixtureDistribution &distribution, double x) -{ +double cdfComplement(const CMixtureDistribution& distribution, double x) { using TDoubleVec = typename CMixtureDistribution::TDoubleVec; using TModeVec = typename CMixtureDistribution::TModeVec; - if (CMathsFuncs::isNan(x)) - { + if (CMathsFuncs::isNan(x)) { LOG_ERROR("Bad value x = " << x); return 1.0; } - const TDoubleVec &weights = distribution.weights(); - const TModeVec &modes = distribution.modes(); + const TDoubleVec& weights = distribution.weights(); + const TModeVec& modes = distribution.modes(); - if (weights.empty()) - { + if (weights.empty()) { return 1.0; } double result = 0.0; - for (std::size_t i = 0u; i < modes.size(); ++i) - { + for (std::size_t i = 0u; i < modes.size(); ++i) { mixture_detail::TDoubleDoublePr ms = support(modes[i]); - if (x < ms.first) - { + if (x < ms.first) { result += weights[i]; - } - else if (x < ms.second) - { - try - { + } else if (x < ms.second) { + try { double fx = cdf(complement(modes[i], x)); - LOG_TRACE("x = " << x - << ", w(" << i << ") = " << weights[i] - << ", f(x, " << i << ") " << fx); + LOG_TRACE("x = " << x << ", w(" << i << ") = " << weights[i] << ", f(x, " << i << ") " << fx); result += weights[i] * fx; - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to compute f(x) for mode at " - << x << ": " << e.what()); + } catch (const std::exception& e) { + LOG_ERROR("Failed to compute f(x) for mode at " << x << ": " << e.what()); throw e; } - } - else - { - LOG_TRACE("x = " << x - << ", support = (" << ms.first << "," << ms.second << ")"); + } else { + LOG_TRACE("x = " << x << ", support = (" << ms.first << "," << ms.second << ")"); } } return result; - } -namespace mixture_detail -{ +namespace mixture_detail { //! Adapts the free c.d.f. function for use with the solver. template -class CCdfAdapter -{ - public: - using result_type = double; - - public: - CCdfAdapter(const CMixtureDistribution &distribution) : - m_Distribution(&distribution) - { - } +class CCdfAdapter { +public: + using result_type = double; - double operator()(const double x) const - { - return cdf(*m_Distribution, x); - } +public: + CCdfAdapter(const CMixtureDistribution& distribution) : m_Distribution(&distribution) {} - private: - const CMixtureDistribution *m_Distribution; -}; + double operator()(const double x) const { return cdf(*m_Distribution, x); } +private: + const CMixtureDistribution* m_Distribution; +}; } //! Compute the \p q'th quantile for \p distribution. //! //! \warning This propagates boost exceptions. template -double quantile(const CMixtureDistribution &distribution, const double q) -{ +double quantile(const CMixtureDistribution& distribution, const double q) { using TModeVec = typename CMixtureDistribution::TModeVec; mixture_detail::TDoubleDoublePr s = support(distribution); - if (q <= 0.0) - { - if (q < 0.0) - { + if (q <= 0.0) { + if (q < 0.0) { LOG_ERROR("Bad quantile " << q); } return s.first; - } - else if (q >= 1.0) - { - if (q > 1.0) - { + } else if (q >= 1.0) { + if (q > 1.0) { LOG_ERROR("Bad quantile " << q); } return s.second; } - const TModeVec &modes = distribution.modes(); - if (modes.empty()) - { + const TModeVec& modes = distribution.modes(); + if (modes.empty()) { return q < 0.5 ? s.first : (q > 0.5 ? s.second : 0.0); - } - else if (modes.size() == 1) - { + } else if (modes.size() == 1) { return quantile(modes[0], q); } mixture_detail::CCdfAdapter f(distribution); - CCompositeFunctions::CMinusConstant > fq(f, q); + CCompositeFunctions::CMinusConstant> fq(f, q); static const std::size_t MAX_ITERATIONS = 100u; static const double EPS = 1e-3; @@ -592,50 +463,34 @@ double quantile(const CMixtureDistribution &distribution, const double q) double x0 = mode(distribution); double result = x0; - try - { + try { double f0 = fq(x0); double a = x0, b = x0, fa = f0, fb = f0; LOG_TRACE("(a,b) = [" << a << "," << b << "], " - << ", (f(a),f(b)) = [" << fa << "," << fb << "]"); + << ", (f(a),f(b)) = [" << fa << "," << fb << "]"); std::size_t maxIterations = MAX_ITERATIONS; - if ( (f0 < 0 && !CSolvers::rightBracket(a, b, fa, fb, fq, - maxIterations, - s.first, s.second)) - || (f0 >= 0 && !CSolvers::leftBracket(a, b, fa, fb, fq, - maxIterations, - s.first, s.second))) - { - LOG_ERROR("Unable to bracket quantile = " << q - << ", (a,b) = (" << a << "," << b << ")" - << ", (f(a),f(b)) = (" << fa << "," << fb << ")"); + if ((f0 < 0 && !CSolvers::rightBracket(a, b, fa, fb, fq, maxIterations, s.first, s.second)) || + (f0 >= 0 && !CSolvers::leftBracket(a, b, fa, fb, fq, maxIterations, s.first, s.second))) { + LOG_ERROR("Unable to bracket quantile = " << q << ", (a,b) = (" << a << "," << b << ")" + << ", (f(a),f(b)) = (" << fa << "," << fb << ")"); result = std::fabs(fa) < std::fabs(fb) ? a : b; - } - else - { + } else { LOG_TRACE("(a,b) = (" << a << "," << b << ")" - << ", (f(a),f(b)) = (" << fa << "," << fb << ")"); + << ", (f(a),f(b)) = (" << fa << "," << fb << ")"); maxIterations = MAX_ITERATIONS - maxIterations; CEqualWithTolerance equal(CToleranceTypes::E_AbsoluteTolerance, - std::min(std::numeric_limits::epsilon() * b, - EPS * q / std::max(fa, fb))); + std::min(std::numeric_limits::epsilon() * b, EPS * q / std::max(fa, fb))); CSolvers::solve(a, b, fa, fb, fq, maxIterations, equal, result); - LOG_TRACE("q = " << q - << ", x = " << result - << ", f(x) = " << fq(result) - << ", iterations = " << maxIterations); + LOG_TRACE("q = " << q << ", x = " << result << ", f(x) = " << fq(result) << ", iterations = " << maxIterations); } - } - catch (const std::exception &e) - { + } catch (const std::exception& e) { LOG_ERROR("Failed to compute quantile " << q); throw e; } return result; } - } } diff --git a/include/maths/CModel.h b/include/maths/CModel.h index 9f2a09cd00..2dd07216e4 100644 --- a/include/maths/CModel.h +++ b/include/maths/CModel.h @@ -7,9 +7,9 @@ #ifndef INCLUDED_ml_maths_CModel_h #define INCLUDED_ml_maths_CModel_h -#include #include #include +#include #include #include @@ -19,22 +19,18 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace maths -{ +namespace maths { class CMultivariatePrior; class CPrior; class CTimeSeriesCorrelations; //! \brief Data describing a prediction error bar. -struct MATHS_EXPORT SErrorBar -{ +struct MATHS_EXPORT SErrorBar { core_t::TTime s_Time; core_t::TTime s_BucketLength; double s_LowerBound; @@ -42,208 +38,205 @@ struct MATHS_EXPORT SErrorBar double s_UpperBound; }; -using TForecastPushDatapointFunc = std::function; +using TForecastPushDatapointFunc = std::function; //! \brief Model parameters. -class MATHS_EXPORT CModelParams -{ - public: - CModelParams(core_t::TTime bucketLength, - double learnRate, - double decayRate, - double minimumSeasonalVarianceScale, - core_t::TTime minimumTimeToDetectChange, - core_t::TTime maximumTimeToTestForChange); - - //! Get the bucket length. - core_t::TTime bucketLength() const; - - //! Get the model learn rate. - double learnRate() const; - - //! Get the model decay rate. - double decayRate() const; - - //! Get the decay rate to use for time averaging the model decay. - double averagingDecayRate() const; - - //! Get the minimum seasonal variance scale. - double minimumSeasonalVarianceScale() const; - - //! Check if we should start testing for a change point in the model. - bool testForChange(core_t::TTime changeInterval) const; - - //! Get the minimum time to detect a change point in the model. - core_t::TTime minimumTimeToDetectChange(void) const; - - //! Get the maximum time to test for a change point in the model. - core_t::TTime maximumTimeToTestForChange(void) const; - - //! Set the probability that the bucket will be empty for the model. - void probabilityBucketEmpty(double probability); - - //! Get the probability that the bucket will be empty for the model. - double probabilityBucketEmpty() const; - - private: - //! The data bucketing length. - core_t::TTime m_BucketLength; - //! The model learn rate. - double m_LearnRate; - //! The model decay rate. - double m_DecayRate; - //! The minimum seasonal variance scale. - double m_MinimumSeasonalVarianceScale; - //! The minimum time permitted to detect a change in the model. - core_t::TTime m_MinimumTimeToDetectChange; - //! The maximum time permitted to test for a change in the model. - core_t::TTime m_MaximumTimeToTestForChange; - //! The probability that a bucket will be empty for the model. - double m_ProbabilityBucketEmpty; +class MATHS_EXPORT CModelParams { +public: + CModelParams(core_t::TTime bucketLength, + double learnRate, + double decayRate, + double minimumSeasonalVarianceScale, + core_t::TTime minimumTimeToDetectChange, + core_t::TTime maximumTimeToTestForChange); + + //! Get the bucket length. + core_t::TTime bucketLength() const; + + //! Get the model learn rate. + double learnRate() const; + + //! Get the model decay rate. + double decayRate() const; + + //! Get the decay rate to use for time averaging the model decay. + double averagingDecayRate() const; + + //! Get the minimum seasonal variance scale. + double minimumSeasonalVarianceScale() const; + + //! Check if we should start testing for a change point in the model. + bool testForChange(core_t::TTime changeInterval) const; + + //! Get the minimum time to detect a change point in the model. + core_t::TTime minimumTimeToDetectChange(void) const; + + //! Get the maximum time to test for a change point in the model. + core_t::TTime maximumTimeToTestForChange(void) const; + + //! Set the probability that the bucket will be empty for the model. + void probabilityBucketEmpty(double probability); + + //! Get the probability that the bucket will be empty for the model. + double probabilityBucketEmpty() const; + +private: + //! The data bucketing length. + core_t::TTime m_BucketLength; + //! The model learn rate. + double m_LearnRate; + //! The model decay rate. + double m_DecayRate; + //! The minimum seasonal variance scale. + double m_MinimumSeasonalVarianceScale; + //! The minimum time permitted to detect a change in the model. + core_t::TTime m_MinimumTimeToDetectChange; + //! The maximum time permitted to test for a change in the model. + core_t::TTime m_MaximumTimeToTestForChange; + //! The probability that a bucket will be empty for the model. + double m_ProbabilityBucketEmpty; }; //! \brief The extra parameters needed by CModel::addSamples. -class MATHS_EXPORT CModelAddSamplesParams -{ - public: - using TDouble2Vec = core::CSmallVector; - using TDouble2Vec4Vec = core::CSmallVector; - using TDouble2Vec4VecVec = std::vector; - - public: - CModelAddSamplesParams(); - - //! Set whether or not the data are integer valued. - CModelAddSamplesParams &integer(bool integer); - //! Get the data type. - maths_t::EDataType type() const; - - //! Set whether or not the data are non-negative. - CModelAddSamplesParams &nonNegative(bool nonNegative); - //! Get the whether the data are non-negative. - bool isNonNegative() const; - - //! Set the model propagation interval. - CModelAddSamplesParams &propagationInterval(double interval); - //! Get the model propagation interval. - double propagationInterval() const; - - //! Set the weight styles. - CModelAddSamplesParams &weightStyles(const maths_t::TWeightStyleVec &styles); - //! Get the weight styles. - const maths_t::TWeightStyleVec &weightStyles() const; - - //! Set the trend samples weights. - CModelAddSamplesParams &trendWeights(const TDouble2Vec4VecVec &weights); - //! Get the trend sample weights. - const TDouble2Vec4VecVec &trendWeights() const; - - //! Set the prior samples weights. - CModelAddSamplesParams &priorWeights(const TDouble2Vec4VecVec &weights); - //! Get the prior sample weights. - const TDouble2Vec4VecVec &priorWeights() const; - - private: - //! The data type. - maths_t::EDataType m_Type; - //! True if the data are non-negative false otherwise. - bool m_IsNonNegative; - //! The propagation interval. - double m_PropagationInterval; - //! Controls the interpretation of the weights. - const maths_t::TWeightStyleVec *m_WeightStyles; - //! The trend sample weights. - const TDouble2Vec4VecVec *m_TrendWeights; - //! The prior sample weights. - const TDouble2Vec4VecVec *m_PriorWeights; +class MATHS_EXPORT CModelAddSamplesParams { +public: + using TDouble2Vec = core::CSmallVector; + using TDouble2Vec4Vec = core::CSmallVector; + using TDouble2Vec4VecVec = std::vector; + +public: + CModelAddSamplesParams(); + + //! Set whether or not the data are integer valued. + CModelAddSamplesParams& integer(bool integer); + //! Get the data type. + maths_t::EDataType type() const; + + //! Set whether or not the data are non-negative. + CModelAddSamplesParams& nonNegative(bool nonNegative); + //! Get the whether the data are non-negative. + bool isNonNegative() const; + + //! Set the model propagation interval. + CModelAddSamplesParams& propagationInterval(double interval); + //! Get the model propagation interval. + double propagationInterval() const; + + //! Set the weight styles. + CModelAddSamplesParams& weightStyles(const maths_t::TWeightStyleVec& styles); + //! Get the weight styles. + const maths_t::TWeightStyleVec& weightStyles() const; + + //! Set the trend samples weights. + CModelAddSamplesParams& trendWeights(const TDouble2Vec4VecVec& weights); + //! Get the trend sample weights. + const TDouble2Vec4VecVec& trendWeights() const; + + //! Set the prior samples weights. + CModelAddSamplesParams& priorWeights(const TDouble2Vec4VecVec& weights); + //! Get the prior sample weights. + const TDouble2Vec4VecVec& priorWeights() const; + +private: + //! The data type. + maths_t::EDataType m_Type; + //! True if the data are non-negative false otherwise. + bool m_IsNonNegative; + //! The propagation interval. + double m_PropagationInterval; + //! Controls the interpretation of the weights. + const maths_t::TWeightStyleVec* m_WeightStyles; + //! The trend sample weights. + const TDouble2Vec4VecVec* m_TrendWeights; + //! The prior sample weights. + const TDouble2Vec4VecVec* m_PriorWeights; }; //! \brief The extra parameters needed by CModel::probability. -class MATHS_EXPORT CModelProbabilityParams -{ - public: - using TOptionalSize = boost::optional; - using TBool2Vec = core::CSmallVector; - using TBool2Vec1Vec = core::CSmallVector; - using TDouble2Vec = core::CSmallVector; - using TDouble2Vec4Vec = core::CSmallVector; - using TDouble2Vec4Vec1Vec = core::CSmallVector; - using TSize2Vec = core::CSmallVector; - using TProbabilityCalculation2Vec = core::CSmallVector; - - public: - CModelProbabilityParams(); - - //! Set the tag for the entity for which to compute the probability. - CModelProbabilityParams &tag(std::size_t tag); - //! Get the tag for the entity for which to compute the probability. - std::size_t tag() const; - - //! Add a coordinate's calculation style. - CModelProbabilityParams &addCalculation(maths_t::EProbabilityCalculation calculation); - //! Get the number of calculations. - std::size_t calculations() const; - //! Get the \p i'th coordinate's calculation style. - maths_t::EProbabilityCalculation calculation(std::size_t i) const; - - //! Set the confidence interval to use when detrending. - CModelProbabilityParams &seasonalConfidenceInterval(double confidence); - //! Get the confidence interval to use when detrending. - double seasonalConfidenceInterval() const; - - //! Add whether a value's bucket is empty. - CModelProbabilityParams &addBucketEmpty(const TBool2Vec &empty); - //! Get whether the values' bucket is empty. - const TBool2Vec1Vec &bucketEmpty() const; - - //! Set the weight styles. - CModelProbabilityParams &weightStyles(const maths_t::TWeightStyleVec &styles); - //! Get the weight styles. - const maths_t::TWeightStyleVec &weightStyles() const; - - //! Add a value's weights. - CModelProbabilityParams &addWeights(const TDouble2Vec4Vec &weights); - //! Set the values' weights. - CModelProbabilityParams &weights(const TDouble2Vec4Vec1Vec &weights); - //! Get the values' weights. - const TDouble2Vec4Vec1Vec &weights() const; - //! Get writable values' weights. - TDouble2Vec4Vec1Vec &weights(); - - //! Add a coordinate for which to compute probability. - CModelProbabilityParams &addCoordinate(std::size_t coordinate); - //! Get the coordinates for which to compute probability. - const TSize2Vec &coordinates() const; - - //! Set the most anomalous correlate. - CModelProbabilityParams &mostAnomalousCorrelate(std::size_t correlate); - //! Get the most anomalous correlate if there is one. - TOptionalSize mostAnomalousCorrelate() const; - - //! Set whether or not to update the anomaly model. - CModelProbabilityParams &updateAnomalyModel(bool update); - //! Get whether or not to update the anomaly model. - bool updateAnomalyModel() const; - - private: - //! The entity tag (if relevant otherwise 0). - std::size_t m_Tag; - //! The coordinates' probability calculations. - TProbabilityCalculation2Vec m_Calculations; - //! The confidence interval to use when detrending. - double m_SeasonalConfidenceInterval; - //! True if the bucket is empty and false otherwise. - TBool2Vec1Vec m_BucketEmpty; - //! Controls the interpretation of the weights. - const maths_t::TWeightStyleVec *m_WeightStyles; - //! The sample weights. - TDouble2Vec4Vec1Vec m_Weights; - //! The coordinates for which to compute the probability. - TSize2Vec m_Coordinates; - //! The most anomalous coordinate (if there is one). - TOptionalSize m_MostAnomalousCorrelate; - //! Whether or not to update the anomaly model. - bool m_UpdateAnomalyModel; +class MATHS_EXPORT CModelProbabilityParams { +public: + using TOptionalSize = boost::optional; + using TBool2Vec = core::CSmallVector; + using TBool2Vec1Vec = core::CSmallVector; + using TDouble2Vec = core::CSmallVector; + using TDouble2Vec4Vec = core::CSmallVector; + using TDouble2Vec4Vec1Vec = core::CSmallVector; + using TSize2Vec = core::CSmallVector; + using TProbabilityCalculation2Vec = core::CSmallVector; + +public: + CModelProbabilityParams(); + + //! Set the tag for the entity for which to compute the probability. + CModelProbabilityParams& tag(std::size_t tag); + //! Get the tag for the entity for which to compute the probability. + std::size_t tag() const; + + //! Add a coordinate's calculation style. + CModelProbabilityParams& addCalculation(maths_t::EProbabilityCalculation calculation); + //! Get the number of calculations. + std::size_t calculations() const; + //! Get the \p i'th coordinate's calculation style. + maths_t::EProbabilityCalculation calculation(std::size_t i) const; + + //! Set the confidence interval to use when detrending. + CModelProbabilityParams& seasonalConfidenceInterval(double confidence); + //! Get the confidence interval to use when detrending. + double seasonalConfidenceInterval() const; + + //! Add whether a value's bucket is empty. + CModelProbabilityParams& addBucketEmpty(const TBool2Vec& empty); + //! Get whether the values' bucket is empty. + const TBool2Vec1Vec& bucketEmpty() const; + + //! Set the weight styles. + CModelProbabilityParams& weightStyles(const maths_t::TWeightStyleVec& styles); + //! Get the weight styles. + const maths_t::TWeightStyleVec& weightStyles() const; + + //! Add a value's weights. + CModelProbabilityParams& addWeights(const TDouble2Vec4Vec& weights); + //! Set the values' weights. + CModelProbabilityParams& weights(const TDouble2Vec4Vec1Vec& weights); + //! Get the values' weights. + const TDouble2Vec4Vec1Vec& weights() const; + //! Get writable values' weights. + TDouble2Vec4Vec1Vec& weights(); + + //! Add a coordinate for which to compute probability. + CModelProbabilityParams& addCoordinate(std::size_t coordinate); + //! Get the coordinates for which to compute probability. + const TSize2Vec& coordinates() const; + + //! Set the most anomalous correlate. + CModelProbabilityParams& mostAnomalousCorrelate(std::size_t correlate); + //! Get the most anomalous correlate if there is one. + TOptionalSize mostAnomalousCorrelate() const; + + //! Set whether or not to update the anomaly model. + CModelProbabilityParams& updateAnomalyModel(bool update); + //! Get whether or not to update the anomaly model. + bool updateAnomalyModel() const; + +private: + //! The entity tag (if relevant otherwise 0). + std::size_t m_Tag; + //! The coordinates' probability calculations. + TProbabilityCalculation2Vec m_Calculations; + //! The confidence interval to use when detrending. + double m_SeasonalConfidenceInterval; + //! True if the bucket is empty and false otherwise. + TBool2Vec1Vec m_BucketEmpty; + //! Controls the interpretation of the weights. + const maths_t::TWeightStyleVec* m_WeightStyles; + //! The sample weights. + TDouble2Vec4Vec1Vec m_Weights; + //! The coordinates for which to compute the probability. + TSize2Vec m_Coordinates; + //! The most anomalous coordinate (if there is one). + TOptionalSize m_MostAnomalousCorrelate; + //! Whether or not to update the anomaly model. + bool m_UpdateAnomalyModel; }; //! \brief The model interface. @@ -260,312 +253,285 @@ class MATHS_EXPORT CModelProbabilityParams //! //! Specific implementations exist for different types of object. For example, //! for univariate and multivariate time series. -class MATHS_EXPORT CModel -{ - public: - using TBool2Vec = core::CSmallVector; - using TDouble2Vec = core::CSmallVector; - using TDouble10Vec = core::CSmallVector; - using TDouble2Vec1Vec = core::CSmallVector; - using TDouble2Vec3Vec = core::CSmallVector; - using TDouble2Vec4Vec = core::CSmallVector; - using TDouble2Vec4Vec1Vec = core::CSmallVector; - using TSize1Vec = core::CSmallVector; - using TSize2Vec = core::CSmallVector; - using TSize2Vec1Vec = core::CSmallVector; - using TTime2Vec = core::CSmallVector; - using TTime2Vec1Vec = core::CSmallVector; - using TSizeDoublePr = std::pair; - using TSizeDoublePr1Vec = core::CSmallVector; - using TTimeDouble2VecSizeTr = core::CTriple; - using TTimeDouble2VecSizeTrVec = std::vector; - using TTail2Vec = core::CSmallVector; - - //! Possible statuses for updating a model. - enum EUpdateResult - { - E_Failure, //!< Update failed. - E_Success, //!< Update succeeded. - E_Reset //!< Model reset. - }; - - //! Combine the results \p lhs and \p rhs. - static EUpdateResult combine(EUpdateResult lhs, EUpdateResult rhs); - - public: - CModel(const CModelParams ¶ms); - virtual ~CModel() = default; - - //! These don't need to be and shouldn't be copied. - const CModel &operator=(const CModel &) = delete; - - //! Get the effective count per correlate model for calibrating aggregation. - static double effectiveCount(std::size_t n); - - //! Get the model identifier. - virtual std::size_t identifier() const = 0; - - //! Create a copy of this model passing ownership to the caller. - virtual CModel *clone(std::size_t id) const = 0; - - //! Create a copy of the state we need to persist passing ownership to the caller. - virtual CModel *cloneForPersistence() const = 0; - - //! Create a copy of the state we need to run forecasting. - virtual CModel *cloneForForecast() const = 0; - - //! Return true if forecast is currently possible for this model. - virtual bool isForecastPossible() const = 0; - - //! Tell this to model correlations. - virtual void modelCorrelations(CTimeSeriesCorrelations &model) = 0; - - //! Get the correlated time series identifier pairs if any. - virtual TSize2Vec1Vec correlates() const = 0; - - //! Update the model with the bucket \p value. - virtual void addBucketValue(const TTimeDouble2VecSizeTrVec &value) = 0; - - //! Update the model with new samples. - virtual EUpdateResult addSamples(const CModelAddSamplesParams ¶ms, - TTimeDouble2VecSizeTrVec samples) = 0; - - //! Advance time by \p gap. - virtual void skipTime(core_t::TTime gap) = 0; - - //! Get the most likely value for the time series at \p time. - virtual TDouble2Vec mode(core_t::TTime time, - const maths_t::TWeightStyleVec &weightStyles, - const TDouble2Vec4Vec &weights) const = 0; - - //! Get the most likely value for each correlate time series at - //! \p time, if there are any. - virtual TDouble2Vec1Vec correlateModes(core_t::TTime time, - const maths_t::TWeightStyleVec &weightStyles, - const TDouble2Vec4Vec1Vec &weights) const = 0; - - //! Get the local maxima of the residual distribution. - virtual TDouble2Vec1Vec residualModes(const maths_t::TWeightStyleVec &weightStyles, - const TDouble2Vec4Vec &weights) const = 0; - - //! Remove any trend components from \p value. - virtual void detrend(const TTime2Vec1Vec &time, - double confidenceInterval, - TDouble2Vec1Vec &value) const = 0; - - //! Get the best (least MSE) predicted value at \p time. - virtual TDouble2Vec predict(core_t::TTime time, - const TSizeDoublePr1Vec &correlated = TSizeDoublePr1Vec(), - TDouble2Vec hint = TDouble2Vec()) const = 0; - - //! Get the prediction and \p confidenceInterval percentage - //! confidence interval for the time series at \p time. - virtual TDouble2Vec3Vec confidenceInterval(core_t::TTime time, - double confidenceInterval, - const maths_t::TWeightStyleVec &weightStyles, - const TDouble2Vec4Vec &weights) const = 0; - - //! Forecast the time series and get its \p confidenceInterval - //! percentage confidence interval between \p startTime and - //! \p endTime. - //! Data is pushed to the given \p forecastPushDataPointFunc - //! \return true if forecast completed, false otherwise, in - //! which case \p[out] messageOut is set. - virtual bool forecast(core_t::TTime startTime, - core_t::TTime endTime, - double confidenceInterval, - const TDouble2Vec &minimum, - const TDouble2Vec &maximum, - const TForecastPushDatapointFunc &forecastPushDataPointFunc, - std::string &messageOut) = 0; - - //! Compute the probability of drawing \p value at \p time. - virtual bool probability(const CModelProbabilityParams ¶ms, - const TTime2Vec1Vec &time, - const TDouble2Vec1Vec &value, - double &probability, - TTail2Vec &tail, - bool &conditional, - TSize1Vec &mostAnomalousCorrelate) const = 0; - - //! Get the Winsorisation weight to apply to \p value, - //! if appropriate. - virtual TDouble2Vec winsorisationWeight(double derate, - core_t::TTime time, - const TDouble2Vec &value) const = 0; - - //! Get the seasonal variance scale at \p time. - virtual TDouble2Vec seasonalWeight(double confidence, core_t::TTime time) const = 0; - - //! Compute a checksum for this object. - virtual std::uint64_t checksum(std::uint64_t seed = 0) const = 0; - - //! Debug the memory used by this object. - virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const = 0; - - //! Get the memory used by this object. - virtual std::size_t memoryUsage() const = 0; - - //! Persist by passing information to \p inserter. - virtual void acceptPersistInserter(core::CStatePersistInserter &inserter) const = 0; - - //! Get the type of data being modeled. - virtual maths_t::EDataType dataType() const = 0; - - //! Get read only model parameters. - const CModelParams ¶ms() const; - - //! Get writable model parameters. - CModelParams ¶ms(); - - protected: - CModel(const CModel &) = default; - - //! Get the nearest mean of \p prior to \p detrended. - template - static VECTOR marginalLikelihoodMean(const CPrior &prior); - - //! Get the nearest mean of \p prior to \p detrended. - template - static VECTOR marginalLikelihoodMean(const CMultivariatePrior &prior); - - //! Get the error in the trend prediction for \p sample. - template - static boost::optional predictionError(const TREND &trend, - const VECTOR &sample); - - //! Get the error in the prior prediction for \p sample. - template - static boost::optional predictionError(double propagationInterval, - const PRIOR &prior, - const VECTOR &sample); - - //! Correct \p probability with \p probabilityEmptyBucket. - static double correctForEmptyBucket(maths_t::EProbabilityCalculation calculation, - const TDouble2Vec &value, - bool bucketEmpty, - double probabilityBucketEmpty, - double probability); - - //! Correct \p probability with \p probabilityEmptyBucket. - static double correctForEmptyBucket(maths_t::EProbabilityCalculation calculation, - double value, - const TBool2Vec &bucketEmpty, - const TDouble2Vec &probabilityEmptyBucket, - double probability); - private: - //! The model parameters. - CModelParams m_Params; +class MATHS_EXPORT CModel { +public: + using TBool2Vec = core::CSmallVector; + using TDouble2Vec = core::CSmallVector; + using TDouble10Vec = core::CSmallVector; + using TDouble2Vec1Vec = core::CSmallVector; + using TDouble2Vec3Vec = core::CSmallVector; + using TDouble2Vec4Vec = core::CSmallVector; + using TDouble2Vec4Vec1Vec = core::CSmallVector; + using TSize1Vec = core::CSmallVector; + using TSize2Vec = core::CSmallVector; + using TSize2Vec1Vec = core::CSmallVector; + using TTime2Vec = core::CSmallVector; + using TTime2Vec1Vec = core::CSmallVector; + using TSizeDoublePr = std::pair; + using TSizeDoublePr1Vec = core::CSmallVector; + using TTimeDouble2VecSizeTr = core::CTriple; + using TTimeDouble2VecSizeTrVec = std::vector; + using TTail2Vec = core::CSmallVector; + + //! Possible statuses for updating a model. + enum EUpdateResult { + E_Failure, //!< Update failed. + E_Success, //!< Update succeeded. + E_Reset //!< Model reset. + }; + + //! Combine the results \p lhs and \p rhs. + static EUpdateResult combine(EUpdateResult lhs, EUpdateResult rhs); + +public: + CModel(const CModelParams& params); + virtual ~CModel() = default; + + //! These don't need to be and shouldn't be copied. + const CModel& operator=(const CModel&) = delete; + + //! Get the effective count per correlate model for calibrating aggregation. + static double effectiveCount(std::size_t n); + + //! Get the model identifier. + virtual std::size_t identifier() const = 0; + + //! Create a copy of this model passing ownership to the caller. + virtual CModel* clone(std::size_t id) const = 0; + + //! Create a copy of the state we need to persist passing ownership to the caller. + virtual CModel* cloneForPersistence() const = 0; + + //! Create a copy of the state we need to run forecasting. + virtual CModel* cloneForForecast() const = 0; + + //! Return true if forecast is currently possible for this model. + virtual bool isForecastPossible() const = 0; + + //! Tell this to model correlations. + virtual void modelCorrelations(CTimeSeriesCorrelations& model) = 0; + + //! Get the correlated time series identifier pairs if any. + virtual TSize2Vec1Vec correlates() const = 0; + + //! Update the model with the bucket \p value. + virtual void addBucketValue(const TTimeDouble2VecSizeTrVec& value) = 0; + + //! Update the model with new samples. + virtual EUpdateResult addSamples(const CModelAddSamplesParams& params, TTimeDouble2VecSizeTrVec samples) = 0; + + //! Advance time by \p gap. + virtual void skipTime(core_t::TTime gap) = 0; + + //! Get the most likely value for the time series at \p time. + virtual TDouble2Vec mode(core_t::TTime time, const maths_t::TWeightStyleVec& weightStyles, const TDouble2Vec4Vec& weights) const = 0; + + //! Get the most likely value for each correlate time series at + //! \p time, if there are any. + virtual TDouble2Vec1Vec + correlateModes(core_t::TTime time, const maths_t::TWeightStyleVec& weightStyles, const TDouble2Vec4Vec1Vec& weights) const = 0; + + //! Get the local maxima of the residual distribution. + virtual TDouble2Vec1Vec residualModes(const maths_t::TWeightStyleVec& weightStyles, const TDouble2Vec4Vec& weights) const = 0; + + //! Remove any trend components from \p value. + virtual void detrend(const TTime2Vec1Vec& time, double confidenceInterval, TDouble2Vec1Vec& value) const = 0; + + //! Get the best (least MSE) predicted value at \p time. + virtual TDouble2Vec + predict(core_t::TTime time, const TSizeDoublePr1Vec& correlated = TSizeDoublePr1Vec(), TDouble2Vec hint = TDouble2Vec()) const = 0; + + //! Get the prediction and \p confidenceInterval percentage + //! confidence interval for the time series at \p time. + virtual TDouble2Vec3Vec confidenceInterval(core_t::TTime time, + double confidenceInterval, + const maths_t::TWeightStyleVec& weightStyles, + const TDouble2Vec4Vec& weights) const = 0; + + //! Forecast the time series and get its \p confidenceInterval + //! percentage confidence interval between \p startTime and + //! \p endTime. + //! Data is pushed to the given \p forecastPushDataPointFunc + //! \return true if forecast completed, false otherwise, in + //! which case \p[out] messageOut is set. + virtual bool forecast(core_t::TTime startTime, + core_t::TTime endTime, + double confidenceInterval, + const TDouble2Vec& minimum, + const TDouble2Vec& maximum, + const TForecastPushDatapointFunc& forecastPushDataPointFunc, + std::string& messageOut) = 0; + + //! Compute the probability of drawing \p value at \p time. + virtual bool probability(const CModelProbabilityParams& params, + const TTime2Vec1Vec& time, + const TDouble2Vec1Vec& value, + double& probability, + TTail2Vec& tail, + bool& conditional, + TSize1Vec& mostAnomalousCorrelate) const = 0; + + //! Get the Winsorisation weight to apply to \p value, + //! if appropriate. + virtual TDouble2Vec winsorisationWeight(double derate, core_t::TTime time, const TDouble2Vec& value) const = 0; + + //! Get the seasonal variance scale at \p time. + virtual TDouble2Vec seasonalWeight(double confidence, core_t::TTime time) const = 0; + + //! Compute a checksum for this object. + virtual std::uint64_t checksum(std::uint64_t seed = 0) const = 0; + + //! Debug the memory used by this object. + virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const = 0; + + //! Get the memory used by this object. + virtual std::size_t memoryUsage() const = 0; + + //! Persist by passing information to \p inserter. + virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const = 0; + + //! Get the type of data being modeled. + virtual maths_t::EDataType dataType() const = 0; + + //! Get read only model parameters. + const CModelParams& params() const; + + //! Get writable model parameters. + CModelParams& params(); + +protected: + CModel(const CModel&) = default; + + //! Get the nearest mean of \p prior to \p detrended. + template + static VECTOR marginalLikelihoodMean(const CPrior& prior); + + //! Get the nearest mean of \p prior to \p detrended. + template + static VECTOR marginalLikelihoodMean(const CMultivariatePrior& prior); + + //! Get the error in the trend prediction for \p sample. + template + static boost::optional predictionError(const TREND& trend, const VECTOR& sample); + + //! Get the error in the prior prediction for \p sample. + template + static boost::optional predictionError(double propagationInterval, const PRIOR& prior, const VECTOR& sample); + + //! Correct \p probability with \p probabilityEmptyBucket. + static double correctForEmptyBucket(maths_t::EProbabilityCalculation calculation, + const TDouble2Vec& value, + bool bucketEmpty, + double probabilityBucketEmpty, + double probability); + + //! Correct \p probability with \p probabilityEmptyBucket. + static double correctForEmptyBucket(maths_t::EProbabilityCalculation calculation, + double value, + const TBool2Vec& bucketEmpty, + const TDouble2Vec& probabilityEmptyBucket, + double probability); + +private: + //! The model parameters. + CModelParams m_Params; }; //! A stateless lightweight model which stubs the interface. -class MATHS_EXPORT CModelStub : public CModel -{ - public: - CModelStub(); - - //! Returns 0. - virtual std::size_t identifier() const; - - //! Create a copy of this model passing ownership to the caller. - virtual CModelStub *clone(std::size_t id) const; - - //! Create a copy of the state we need to persist passing ownership to the caller. - virtual CModelStub *cloneForPersistence() const; - - //! Create a copy of the state we need to run forecasting. - virtual CModelStub *cloneForForecast() const; - - //! Return false; - virtual bool isForecastPossible() const; - - //! No-op. - virtual void modelCorrelations(CTimeSeriesCorrelations &model); - - //! Returns empty. - virtual TSize2Vec1Vec correlates() const; - - //! No-op. - virtual void addBucketValue(const TTimeDouble2VecSizeTrVec &value); - - //! No-op. - virtual EUpdateResult addSamples(const CModelAddSamplesParams ¶ms, - TTimeDouble2VecSizeTrVec samples); - - //! No-op. - virtual void skipTime(core_t::TTime gap); - - //! Returns empty. - virtual TDouble2Vec mode(core_t::TTime time, - const maths_t::TWeightStyleVec &weightStyles, - const TDouble2Vec4Vec &weights) const; - - //! Returns empty. - virtual TDouble2Vec1Vec correlateModes(core_t::TTime time, - const maths_t::TWeightStyleVec &weightStyles, - const TDouble2Vec4Vec1Vec &weights) const; - - //! Returns empty. - virtual TDouble2Vec1Vec residualModes(const maths_t::TWeightStyleVec &weightStyles, - const TDouble2Vec4Vec &weights) const; - - //! No-op. - virtual void detrend(const TTime2Vec1Vec &time, - double confidenceInterval, - TDouble2Vec1Vec &value) const; - - //! Returns empty. - virtual TDouble2Vec predict(core_t::TTime time, - const TSizeDoublePr1Vec &correlated, - TDouble2Vec hint = TDouble2Vec()) const; - - //! Returns empty. - virtual TDouble2Vec3Vec confidenceInterval(core_t::TTime time, - double confidenceInterval, - const maths_t::TWeightStyleVec &weightStyles, - const TDouble2Vec4Vec &weights) const; - //! Returns empty. - virtual bool forecast(core_t::TTime startTime, - core_t::TTime endTime, - double confidenceInterval, - const TDouble2Vec &minimum, - const TDouble2Vec &maximum, - const TForecastPushDatapointFunc &forecastPushDataPointFunc, - std::string &messageOut); - - //! Returns 1.0. - virtual bool probability(const CModelProbabilityParams ¶ms, - const TTime2Vec1Vec &time, - const TDouble2Vec1Vec &value, - double &probability, - TTail2Vec &tail, - bool &conditional, - TSize1Vec &mostAnomalousCorrelate) const; - - //! Returns empty. - virtual TDouble2Vec winsorisationWeight(double derate, - core_t::TTime time, - const TDouble2Vec &value) const; - - //! Returns empty. - virtual TDouble2Vec seasonalWeight(double confidence, core_t::TTime time) const; - - //! Returns the seed. - virtual std::uint64_t checksum(std::uint64_t seed = 0) const; - - //! Debug the memory used by this object. - virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - - //! Get the memory used by this object. - virtual std::size_t memoryUsage() const; - - //! No-op. - virtual void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - - //! Returns mixed data type since we don't know. - virtual maths_t::EDataType dataType() const; -}; +class MATHS_EXPORT CModelStub : public CModel { +public: + CModelStub(); + + //! Returns 0. + virtual std::size_t identifier() const; + + //! Create a copy of this model passing ownership to the caller. + virtual CModelStub* clone(std::size_t id) const; + + //! Create a copy of the state we need to persist passing ownership to the caller. + virtual CModelStub* cloneForPersistence() const; + + //! Create a copy of the state we need to run forecasting. + virtual CModelStub* cloneForForecast() const; + + //! Return false; + virtual bool isForecastPossible() const; + + //! No-op. + virtual void modelCorrelations(CTimeSeriesCorrelations& model); + + //! Returns empty. + virtual TSize2Vec1Vec correlates() const; + + //! No-op. + virtual void addBucketValue(const TTimeDouble2VecSizeTrVec& value); + //! No-op. + virtual EUpdateResult addSamples(const CModelAddSamplesParams& params, TTimeDouble2VecSizeTrVec samples); + + //! No-op. + virtual void skipTime(core_t::TTime gap); + + //! Returns empty. + virtual TDouble2Vec mode(core_t::TTime time, const maths_t::TWeightStyleVec& weightStyles, const TDouble2Vec4Vec& weights) const; + + //! Returns empty. + virtual TDouble2Vec1Vec + correlateModes(core_t::TTime time, const maths_t::TWeightStyleVec& weightStyles, const TDouble2Vec4Vec1Vec& weights) const; + + //! Returns empty. + virtual TDouble2Vec1Vec residualModes(const maths_t::TWeightStyleVec& weightStyles, const TDouble2Vec4Vec& weights) const; + + //! No-op. + virtual void detrend(const TTime2Vec1Vec& time, double confidenceInterval, TDouble2Vec1Vec& value) const; + + //! Returns empty. + virtual TDouble2Vec predict(core_t::TTime time, const TSizeDoublePr1Vec& correlated, TDouble2Vec hint = TDouble2Vec()) const; + + //! Returns empty. + virtual TDouble2Vec3Vec confidenceInterval(core_t::TTime time, + double confidenceInterval, + const maths_t::TWeightStyleVec& weightStyles, + const TDouble2Vec4Vec& weights) const; + //! Returns empty. + virtual bool forecast(core_t::TTime startTime, + core_t::TTime endTime, + double confidenceInterval, + const TDouble2Vec& minimum, + const TDouble2Vec& maximum, + const TForecastPushDatapointFunc& forecastPushDataPointFunc, + std::string& messageOut); + + //! Returns 1.0. + virtual bool probability(const CModelProbabilityParams& params, + const TTime2Vec1Vec& time, + const TDouble2Vec1Vec& value, + double& probability, + TTail2Vec& tail, + bool& conditional, + TSize1Vec& mostAnomalousCorrelate) const; + + //! Returns empty. + virtual TDouble2Vec winsorisationWeight(double derate, core_t::TTime time, const TDouble2Vec& value) const; + + //! Returns empty. + virtual TDouble2Vec seasonalWeight(double confidence, core_t::TTime time) const; + + //! Returns the seed. + virtual std::uint64_t checksum(std::uint64_t seed = 0) const; + + //! Debug the memory used by this object. + virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + + //! Get the memory used by this object. + virtual std::size_t memoryUsage() const; + + //! No-op. + virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Returns mixed data type since we don't know. + virtual maths_t::EDataType dataType() const; +}; } } diff --git a/include/maths/CModelDetail.h b/include/maths/CModelDetail.h index ea0f228093..63c4fd295a 100644 --- a/include/maths/CModelDetail.h +++ b/include/maths/CModelDetail.h @@ -13,38 +13,28 @@ #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { template -VECTOR CModel::marginalLikelihoodMean(const maths::CPrior &prior) -{ +VECTOR CModel::marginalLikelihoodMean(const maths::CPrior& prior) { return VECTOR{prior.marginalLikelihoodMean()}; } template -VECTOR CModel::marginalLikelihoodMean(const maths::CMultivariatePrior &prior) -{ +VECTOR CModel::marginalLikelihoodMean(const maths::CMultivariatePrior& prior) { return prior.marginalLikelihoodMean(); } template -boost::optional CModel::predictionError(const TREND &trend, - const VECTOR &sample) -{ +boost::optional CModel::predictionError(const TREND& trend, const VECTOR& sample) { boost::optional result; std::size_t dimension = sample.size(); - for (std::size_t i = 0u; i < dimension; ++i) - { - if (trend[i]->initialized()) - { + for (std::size_t i = 0u; i < dimension; ++i) { + if (trend[i]->initialized()) { result.reset(VECTOR(dimension, 0.0)); - for (/**/; i < dimension; ++i) - { - if (trend[i]->initialized()) - { + for (/**/; i < dimension; ++i) { + if (trend[i]->initialized()) { (*result)[i] = sample[i]; } } @@ -54,24 +44,18 @@ boost::optional CModel::predictionError(const TREND &trend, } template -boost::optional CModel::predictionError(double propagationInterval, - const PRIOR &prior, - const VECTOR &sample) -{ +boost::optional CModel::predictionError(double propagationInterval, const PRIOR& prior, const VECTOR& sample) { boost::optional result; - if (prior->numberSamples() > 20.0 / propagationInterval) - { + if (prior->numberSamples() > 20.0 / propagationInterval) { std::size_t dimension{sample.size()}; result.reset(sample); VECTOR mean(marginalLikelihoodMean(*prior)); - for (std::size_t d = 0u; d < dimension; ++d) - { + for (std::size_t d = 0u; d < dimension; ++d) { (*result)[d] -= mean[d]; } } return result; } - } } diff --git a/include/maths/CModelStateSerialiser.h b/include/maths/CModelStateSerialiser.h index db56a670e6..8b48aafff0 100644 --- a/include/maths/CModelStateSerialiser.h +++ b/include/maths/CModelStateSerialiser.h @@ -11,15 +11,12 @@ #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace maths -{ +namespace maths { class CModel; struct SModelRestoreParams; @@ -35,22 +32,18 @@ struct SModelRestoreParams; //! name/value pairs where the value may be a nested set of name/value //! pairs. Text format is used to make it easier to provide backwards //! compatibility in the future as the classes evolve. -class MATHS_EXPORT CModelStateSerialiser -{ - public: - using TModelPtr = boost::shared_ptr; - - public: - //! Construct the appropriate CPrior sub-class from its state - //! document representation. Sets \p result to NULL on failure. - bool operator()(const SModelRestoreParams ¶ms, - TModelPtr &result, - core::CStateRestoreTraverser &traverser) const; - - //! Persist state by passing information to the supplied inserter - void operator()(const CModel &model, core::CStatePersistInserter &inserter) const; -}; +class MATHS_EXPORT CModelStateSerialiser { +public: + using TModelPtr = boost::shared_ptr; + +public: + //! Construct the appropriate CPrior sub-class from its state + //! document representation. Sets \p result to NULL on failure. + bool operator()(const SModelRestoreParams& params, TModelPtr& result, core::CStateRestoreTraverser& traverser) const; + //! Persist state by passing information to the supplied inserter + void operator()(const CModel& model, core::CStatePersistInserter& inserter) const; +}; } } diff --git a/include/maths/CModelWeight.h b/include/maths/CModelWeight.h index ee8802aa71..84df1d1c5b 100644 --- a/include/maths/CModelWeight.h +++ b/include/maths/CModelWeight.h @@ -16,15 +16,12 @@ #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace maths -{ +namespace maths { //! \brief Represents the probability assigned to a model in the mixture. //! @@ -33,76 +30,70 @@ namespace maths //! form any weight which is small w.r.t. the largest weight (i.e. less //! than double_eps * largest weight) is effectively zero and the corresponding //! model is (temporarily) removed from the collection. -class MATHS_EXPORT CModelWeight -{ - public: - //! See core::CMemory. - static bool dynamicSizeAlwaysZero() { return true; } +class MATHS_EXPORT CModelWeight { +public: + //! See core::CMemory. + static bool dynamicSizeAlwaysZero() { return true; } - public: - explicit CModelWeight(double weight); +public: + explicit CModelWeight(double weight); - //! Implicit conversion to read only double weight (m_Weight). - operator double() const; + //! Implicit conversion to read only double weight (m_Weight). + operator double() const; - //! Get the log of the current weight. - double logWeight() const; + //! Get the log of the current weight. + double logWeight() const; - //! Reset the log weight. - void logWeight(double logWeight); + //! Reset the log weight. + void logWeight(double logWeight); - //! Add the log of a factor to the weight. - void addLogFactor(double logFactor); + //! Add the log of a factor to the weight. + void addLogFactor(double logFactor); - //! Age the weight by the factor \p alpha. - void age(double alpha); + //! Age the weight by the factor \p alpha. + void age(double alpha); - //! Get a checksum for this object. - uint64_t checksum(uint64_t seed) const; + //! Get a checksum for this object. + uint64_t checksum(uint64_t seed) const; - //! Restore state from part of a state document. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); + //! Restore state from part of a state document. + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); - //! Persist state by passing information to the supplied inserter. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; + //! Persist state by passing information to the supplied inserter. + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; - private: - //! The current weight and must be in the interval [0,1]. - double m_LogWeight; +private: + //! The current weight and must be in the interval [0,1]. + double m_LogWeight; - //! The value to which the weight will revert long term and must be in - //! the interval [0,1]. - double m_LongTermLogWeight; + //! The value to which the weight will revert long term and must be in + //! the interval [0,1]. + double m_LongTermLogWeight; }; //! \brief Re-normalizes weights (so that the sum to one) on destruction. template -class CScopeCanonicalizeWeights : private core::CNonCopyable -{ - public: - using TWeightPriorPr = std::pair; - using TWeightPriorPrVec = std::vector; - - public: - CScopeCanonicalizeWeights(TWeightPriorPrVec &models) : m_Models(models) {} - - ~CScopeCanonicalizeWeights() - { - CBasicStatistics::SMax::TAccumulator logMaxWeight; - for (const auto &model : m_Models) - { - logMaxWeight.add(model.first.logWeight()); - } - for (auto &model : m_Models) - { - model.first.logWeight(model.first.logWeight() - logMaxWeight[0]); - } +class CScopeCanonicalizeWeights : private core::CNonCopyable { +public: + using TWeightPriorPr = std::pair; + using TWeightPriorPrVec = std::vector; + +public: + CScopeCanonicalizeWeights(TWeightPriorPrVec& models) : m_Models(models) {} + + ~CScopeCanonicalizeWeights() { + CBasicStatistics::SMax::TAccumulator logMaxWeight; + for (const auto& model : m_Models) { + logMaxWeight.add(model.first.logWeight()); } + for (auto& model : m_Models) { + model.first.logWeight(model.first.logWeight() - logMaxWeight[0]); + } + } - private: - TWeightPriorPrVec &m_Models; +private: + TWeightPriorPrVec& m_Models; }; - } } diff --git a/include/maths/CMultimodalPrior.h b/include/maths/CMultimodalPrior.h index a8f942fd1d..05d1c0d997 100644 --- a/include/maths/CMultimodalPrior.h +++ b/include/maths/CMultimodalPrior.h @@ -20,15 +20,12 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace maths -{ +namespace maths { //! \brief Implementation for a multimodal prior distribution. //! @@ -51,340 +48,316 @@ namespace maths //! by composite priors. This allows us to select the most appropriate model for //! the data when using one-of-n composition (see COneOfNPrior). From a design //! point of view this is the composite pattern. -class MATHS_EXPORT CMultimodalPrior : public CPrior -{ +class MATHS_EXPORT CMultimodalPrior : public CPrior { +public: + using TClustererPtr = boost::shared_ptr; + using TPriorPtr = boost::shared_ptr; + using TPriorPtrVec = std::vector; + using TPriorPtrVecItr = TPriorPtrVec::iterator; + using TPriorPtrVecCItr = TPriorPtrVec::const_iterator; + using TMeanVarAccumulator = CBasicStatistics::SSampleMeanVar::TAccumulator; + using TMeanVarAccumulatorVec = std::vector; + + // Lift all overloads into scope. + //{ + using CPrior::addSamples; + using CPrior::dataType; + using CPrior::decayRate; + using CPrior::print; + //} + +public: + //! \name Life-Cycle + //@{ + //! Create a new (empty) multimodal prior. + CMultimodalPrior(maths_t::EDataType dataType, const CClusterer1d& clusterer, const CPrior& seedPrior, double decayRate = 0.0); + + //! Create a mixture of normals. + CMultimodalPrior(maths_t::EDataType dataType, const TMeanVarAccumulatorVec& moments, double decayRate = 0.0); + + //! Create from a collection of weights and priors. + CMultimodalPrior(maths_t::EDataType dataType, double decayRate, TPriorPtrVec& priors); + + //! Construct from part of a state document. + CMultimodalPrior(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser); + + //! Implements value semantics for copy construction. + CMultimodalPrior(const CMultimodalPrior& other); + + //! Implements value semantics for assignment. + //! + //! \param[in] rhs The mixed model to copy. + //! \return The newly copied model. + CMultimodalPrior& operator=(const CMultimodalPrior& rhs); + + //! An efficient swap of the contents of this and \p other. + void swap(CMultimodalPrior& other); + //@} + + //! \name Prior Contract. + //@{ + //! Get the type of this prior. + virtual EPrior type() const; + + //! Create a copy of the prior. + //! + //! \return A pointer to a newly allocated clone of this prior. + //! \warning The caller owns the object returned. + virtual CMultimodalPrior* clone() const; + + //! Set the data type. + virtual void dataType(maths_t::EDataType value); + + //! Set the rate at which the prior returns to non-informative. + virtual void decayRate(double value); + + //! Reset the prior to non-informative. + virtual void setToNonInformative(double offset = 0.0, double decayRate = 0.0); + + //! Check if any of the modes needs an offset to be applied. + virtual bool needsOffset() const; + + //! Forward the offset to the mode priors. + //! + //! \return The penalty to apply in model selection. + virtual double adjustOffset(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights); + + //! Get the current offset. + virtual double offset() const; + + //! Update the prior with a collection of independent samples from + //! the variable. + //! + //! \param[in] weightStyles Controls the interpretation of the weight(s) + //! that are associated with each sample. See maths_t::ESampleWeightStyle + //! for more details. + //! \param[in] samples A collection of samples of the variable. + //! \param[in] weights The weights of each sample in \p samples. + virtual void addSamples(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights); + + //! Propagate the prior density function forwards by \p time. + //! + //! The prior distribution relaxes back to non-informative at a rate + //! controlled by the decay rate parameter (optionally supplied to the + //! constructor). + //! + //! \param[in] time The time increment to apply. + virtual void propagateForwardsByTime(double time); + + //! Get the support for the marginal likelihood function. + virtual TDoubleDoublePr marginalLikelihoodSupport() const; + + //! Get the mean of the marginal likelihood function. + virtual double marginalLikelihoodMean() const; + + //! Get the nearest mean of the multimodal prior marginal likelihood. + virtual double nearestMarginalLikelihoodMean(double value) const; + + //! Get the mode of the marginal likelihood function. + virtual double marginalLikelihoodMode(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; + + //! Get the local maxima of the marginal likelihood function. + virtual TDouble1Vec marginalLikelihoodModes(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; + + //! Get the variance of the marginal likelihood. + virtual double marginalLikelihoodVariance(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; + + //! Get the \p percentage symmetric confidence interval for the marginal + //! likelihood function, i.e. the values \f$a\f$ and \f$b\f$ such that: + //!
+    //!   \f$P([a,m]) = P([m,b]) = p / 100 / 2\f$
+    //! 
+ //! + //! where \f$m\f$ is the median of the distribution and \f$p\f$ is the + //! the percentage of interest \p percentage. + //! + //! \param[in] percentage The percentage of interest. + //! \param[in] weightStyles Optional variance scale weight styles. + //! \param[in] weights Optional variance scale weights. + //! \note \p percentage should be in the range [0.0, 100.0). + virtual TDoubleDoublePr marginalLikelihoodConfidenceInterval(double percentage, + const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; + + //! Compute the log marginal likelihood function at \p samples integrating + //! over the prior density function for the mode parameters and summing + //! over modes. + //! + //! \param[in] weightStyles Controls the interpretation of the weight(s) + //! that are associated with each sample. See maths_t::ESampleWeightStyle + //! for more details. + //! \param[in] samples A collection of samples of the variable. + //! \param[in] weights The weights of each sample in \p samples. + //! \param[out] result Filled in with the joint likelihood of \p samples. + //! \note The samples are assumed to be independent and identically + //! distributed. + virtual maths_t::EFloatingPointErrorStatus jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& result) const; + + //! Sample the marginal likelihood function. + //! + //! \see CPrior::sampleMarginalLikelihood() for a detailed description. + //! + //! \param[in] numberSamples The number of samples required. + //! \param[out] samples Filled in with samples from the prior. + //! \note \p numberSamples is truncated to the number of samples received. + virtual void sampleMarginalLikelihood(std::size_t numberSamples, TDouble1Vec& samples) const; + + //! Compute minus the log of the joint c.d.f. of the marginal likelihood + //! at \p samples. + //! + //! \param[in] weightStyles Controls the interpretation of the weight(s) + //! that are associated with each sample. See maths_t::ESampleWeightStyle + //! for more details. + //! \param[in] samples A collection of samples of the variable. + //! \param[in] weights The weights of each sample in \p samples. + //! \param[out] lowerBound Filled in with \f$-\log(\prod_i{F(x_i)})\f$ + //! where \f$F(.)\f$ is the c.d.f. and \f$\{x_i\}\f$ are the samples. + //! \param[out] upperBound Equal to \p lowerBound. + //! \note The samples are assumed to be independent. + //! \warning The variance scales \f$\gamma_i\f$ must be in the range + //! \f$(0,\infty)\f$, i.e. a value of zero is not well defined and + //! a value of infinity is not well handled. (Very large values are + //! handled though.) + virtual bool minusLogJointCdf(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound) const; + + //! Compute minus the log of the one minus the joint c.d.f. of the + //! marginal likelihood at \p samples without losing precision due to + //! cancellation errors at one, i.e. the smallest non-zero value this + //! can return is the minimum double rather than epsilon. + //! + //! \see minusLogJointCdf for more details. + virtual bool minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound) const; + + //! Compute the probability of a less likely, i.e. lower likelihood, + //! collection of independent samples from the variable. + //! + //! \param[in] calculation The style of the probability calculation + //! (see model_t::EProbabilityCalculation for details). + //! \param[in] weightStyles Controls the interpretation of the weight(s) + //! that are associated with each sample. See maths_t::ESampleWeightStyle + //! for more details. + //! \param[in] samples A collection of samples of the variable. + //! \param[in] weights The weights of each sample in \p samples. + //! \param[out] lowerBound Filled in with the probability of the set + //! for which the joint marginal likelihood is less than that of + //! \p samples (subject to the measure \p calculation). + //! \param[out] upperBound Equal to \p lowerBound. + //! \param[out] tail The tail that (left or right) that all the + //! samples are in or neither. + //! \note The samples are assumed to be independent. + //! \warning The variance scales must be in the range \f$(0,\infty)\f$, + //! i.e. a value of zero is not well defined and a value of infinity is + //! not well handled. (Very large values are handled though.) + virtual bool probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, + const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound, + maths_t::ETail& tail) const; + + //! Check if this is a non-informative prior. + virtual bool isNonInformative() const; + + //! Get a human readable description of the prior. + //! + //! \param[in] indent The indent to use at the start of new lines. + //! \param[in,out] result Filled in with the description. + virtual void print(const std::string& indent, std::string& result) const; + + //! Print the prior density function in a specified format. + //! + //! \see CPrior::printJointDensityFunction for details. + virtual std::string printJointDensityFunction() const; + + //! Get a checksum for this object. + virtual uint64_t checksum(uint64_t seed = 0) const; + + //! Debug the memory used by this component. + virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + + //! Get the memory used by this component. + virtual std::size_t memoryUsage() const; + + //! Get the static size of this object - used for virtual hierarchies + virtual std::size_t staticSize() const; + + //! Persist state by passing information to the supplied inserter + virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + //@} + + //! Get the current number of modes. + std::size_t numberModes() const; + + //! Check if the class invariants hold. + bool checkInvariants(const std::string& tag = std::string()) const; + +private: + using TDouble1VecVec = std::vector; + using TDouble4Vec1VecVec = std::vector; + + //! The callback invoked when a mode is split. + class MATHS_EXPORT CModeSplitCallback { public: - using TClustererPtr = boost::shared_ptr; - using TPriorPtr = boost::shared_ptr; - using TPriorPtrVec = std::vector; - using TPriorPtrVecItr = TPriorPtrVec::iterator; - using TPriorPtrVecCItr = TPriorPtrVec::const_iterator; - using TMeanVarAccumulator = CBasicStatistics::SSampleMeanVar::TAccumulator; - using TMeanVarAccumulatorVec = std::vector; - - // Lift all overloads into scope. - //{ - using CPrior::dataType; - using CPrior::decayRate; - using CPrior::addSamples; - using CPrior::print; - //} + CModeSplitCallback(CMultimodalPrior& prior); + void operator()(std::size_t sourceIndex, std::size_t leftSplitIndex, std::size_t rightSplitIndex) const; + private: + CMultimodalPrior* m_Prior; + }; + + //! The callback invoked when two modes are merged. + class MATHS_EXPORT CModeMergeCallback { public: - //! \name Life-Cycle - //@{ - //! Create a new (empty) multimodal prior. - CMultimodalPrior(maths_t::EDataType dataType, - const CClusterer1d &clusterer, - const CPrior &seedPrior, - double decayRate = 0.0); - - //! Create a mixture of normals. - CMultimodalPrior(maths_t::EDataType dataType, - const TMeanVarAccumulatorVec &moments, - double decayRate = 0.0); - - //! Create from a collection of weights and priors. - CMultimodalPrior(maths_t::EDataType dataType, - double decayRate, - TPriorPtrVec &priors); - - //! Construct from part of a state document. - CMultimodalPrior(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser); - - //! Implements value semantics for copy construction. - CMultimodalPrior(const CMultimodalPrior &other); - - //! Implements value semantics for assignment. - //! - //! \param[in] rhs The mixed model to copy. - //! \return The newly copied model. - CMultimodalPrior &operator=(const CMultimodalPrior &rhs); - - //! An efficient swap of the contents of this and \p other. - void swap(CMultimodalPrior &other); - //@} - - //! \name Prior Contract. - //@{ - //! Get the type of this prior. - virtual EPrior type() const; - - //! Create a copy of the prior. - //! - //! \return A pointer to a newly allocated clone of this prior. - //! \warning The caller owns the object returned. - virtual CMultimodalPrior *clone() const; - - //! Set the data type. - virtual void dataType(maths_t::EDataType value); - - //! Set the rate at which the prior returns to non-informative. - virtual void decayRate(double value); - - //! Reset the prior to non-informative. - virtual void setToNonInformative(double offset = 0.0, double decayRate = 0.0); - - //! Check if any of the modes needs an offset to be applied. - virtual bool needsOffset() const; - - //! Forward the offset to the mode priors. - //! - //! \return The penalty to apply in model selection. - virtual double adjustOffset(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights); - - //! Get the current offset. - virtual double offset() const; - - //! Update the prior with a collection of independent samples from - //! the variable. - //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. - //! \param[in] samples A collection of samples of the variable. - //! \param[in] weights The weights of each sample in \p samples. - virtual void addSamples(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights); - - //! Propagate the prior density function forwards by \p time. - //! - //! The prior distribution relaxes back to non-informative at a rate - //! controlled by the decay rate parameter (optionally supplied to the - //! constructor). - //! - //! \param[in] time The time increment to apply. - virtual void propagateForwardsByTime(double time); - - //! Get the support for the marginal likelihood function. - virtual TDoubleDoublePr marginalLikelihoodSupport() const; - - //! Get the mean of the marginal likelihood function. - virtual double marginalLikelihoodMean() const; - - //! Get the nearest mean of the multimodal prior marginal likelihood. - virtual double nearestMarginalLikelihoodMean(double value) const; - - //! Get the mode of the marginal likelihood function. - virtual double marginalLikelihoodMode(const TWeightStyleVec &weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec &weights = TWeights::UNIT) const; - - //! Get the local maxima of the marginal likelihood function. - virtual TDouble1Vec marginalLikelihoodModes(const TWeightStyleVec &weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec &weights = TWeights::UNIT) const; - - //! Get the variance of the marginal likelihood. - virtual double marginalLikelihoodVariance(const TWeightStyleVec &weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec &weights = TWeights::UNIT) const; - - //! Get the \p percentage symmetric confidence interval for the marginal - //! likelihood function, i.e. the values \f$a\f$ and \f$b\f$ such that: - //!
-        //!   \f$P([a,m]) = P([m,b]) = p / 100 / 2\f$
-        //! 
- //! - //! where \f$m\f$ is the median of the distribution and \f$p\f$ is the - //! the percentage of interest \p percentage. - //! - //! \param[in] percentage The percentage of interest. - //! \param[in] weightStyles Optional variance scale weight styles. - //! \param[in] weights Optional variance scale weights. - //! \note \p percentage should be in the range [0.0, 100.0). - virtual TDoubleDoublePr - marginalLikelihoodConfidenceInterval(double percentage, - const TWeightStyleVec &weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec &weights = TWeights::UNIT) const; - - //! Compute the log marginal likelihood function at \p samples integrating - //! over the prior density function for the mode parameters and summing - //! over modes. - //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. - //! \param[in] samples A collection of samples of the variable. - //! \param[in] weights The weights of each sample in \p samples. - //! \param[out] result Filled in with the joint likelihood of \p samples. - //! \note The samples are assumed to be independent and identically - //! distributed. - virtual maths_t::EFloatingPointErrorStatus - jointLogMarginalLikelihood(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &result) const; - - //! Sample the marginal likelihood function. - //! - //! \see CPrior::sampleMarginalLikelihood() for a detailed description. - //! - //! \param[in] numberSamples The number of samples required. - //! \param[out] samples Filled in with samples from the prior. - //! \note \p numberSamples is truncated to the number of samples received. - virtual void sampleMarginalLikelihood(std::size_t numberSamples, - TDouble1Vec &samples) const; - - //! Compute minus the log of the joint c.d.f. of the marginal likelihood - //! at \p samples. - //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. - //! \param[in] samples A collection of samples of the variable. - //! \param[in] weights The weights of each sample in \p samples. - //! \param[out] lowerBound Filled in with \f$-\log(\prod_i{F(x_i)})\f$ - //! where \f$F(.)\f$ is the c.d.f. and \f$\{x_i\}\f$ are the samples. - //! \param[out] upperBound Equal to \p lowerBound. - //! \note The samples are assumed to be independent. - //! \warning The variance scales \f$\gamma_i\f$ must be in the range - //! \f$(0,\infty)\f$, i.e. a value of zero is not well defined and - //! a value of infinity is not well handled. (Very large values are - //! handled though.) - virtual bool minusLogJointCdf(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound) const; - - //! Compute minus the log of the one minus the joint c.d.f. of the - //! marginal likelihood at \p samples without losing precision due to - //! cancellation errors at one, i.e. the smallest non-zero value this - //! can return is the minimum double rather than epsilon. - //! - //! \see minusLogJointCdf for more details. - virtual bool minusLogJointCdfComplement(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound) const; - - //! Compute the probability of a less likely, i.e. lower likelihood, - //! collection of independent samples from the variable. - //! - //! \param[in] calculation The style of the probability calculation - //! (see model_t::EProbabilityCalculation for details). - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. - //! \param[in] samples A collection of samples of the variable. - //! \param[in] weights The weights of each sample in \p samples. - //! \param[out] lowerBound Filled in with the probability of the set - //! for which the joint marginal likelihood is less than that of - //! \p samples (subject to the measure \p calculation). - //! \param[out] upperBound Equal to \p lowerBound. - //! \param[out] tail The tail that (left or right) that all the - //! samples are in or neither. - //! \note The samples are assumed to be independent. - //! \warning The variance scales must be in the range \f$(0,\infty)\f$, - //! i.e. a value of zero is not well defined and a value of infinity is - //! not well handled. (Very large values are handled though.) - virtual bool probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound, - maths_t::ETail &tail) const; - - //! Check if this is a non-informative prior. - virtual bool isNonInformative() const; - - //! Get a human readable description of the prior. - //! - //! \param[in] indent The indent to use at the start of new lines. - //! \param[in,out] result Filled in with the description. - virtual void print(const std::string &indent, std::string &result) const; - - //! Print the prior density function in a specified format. - //! - //! \see CPrior::printJointDensityFunction for details. - virtual std::string printJointDensityFunction() const; - - //! Get a checksum for this object. - virtual uint64_t checksum(uint64_t seed = 0) const; - - //! Debug the memory used by this component. - virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - - //! Get the memory used by this component. - virtual std::size_t memoryUsage() const; - - //! Get the static size of this object - used for virtual hierarchies - virtual std::size_t staticSize() const; - - //! Persist state by passing information to the supplied inserter - virtual void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - //@} - - //! Get the current number of modes. - std::size_t numberModes() const; - - //! Check if the class invariants hold. - bool checkInvariants(const std::string &tag = std::string()) const; + CModeMergeCallback(CMultimodalPrior& prior); + void operator()(std::size_t leftMergeIndex, std::size_t rightMergeIndex, std::size_t targetIndex) const; private: - using TDouble1VecVec = std::vector; - using TDouble4Vec1VecVec = std::vector; - - //! The callback invoked when a mode is split. - class MATHS_EXPORT CModeSplitCallback - { - public: - CModeSplitCallback(CMultimodalPrior &prior); - void operator()(std::size_t sourceIndex, - std::size_t leftSplitIndex, - std::size_t rightSplitIndex) const; - - private: - CMultimodalPrior *m_Prior; - }; - - //! The callback invoked when two modes are merged. - class MATHS_EXPORT CModeMergeCallback - { - public: - CModeMergeCallback(CMultimodalPrior &prior); - void operator()(std::size_t leftMergeIndex, - std::size_t rightMergeIndex, - std::size_t targetIndex) const; - - private: - CMultimodalPrior *m_Prior; - }; - - using TMode = SMultimodalPriorMode >; - using TModeVec = std::vector; + CMultimodalPrior* m_Prior; + }; - private: - //! Read parameters from \p traverser. - bool acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser); + using TMode = SMultimodalPriorMode>; + using TModeVec = std::vector; - //! We should only use this prior when it has multiple modes. - virtual bool participatesInModelSelection() const; +private: + //! Read parameters from \p traverser. + bool acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser); - //! Get the number of nuisance parameters in the marginal likelihood. - //! - //! This is just number modes - 1 due to the normalization constraint. - virtual double unmarginalizedParameters() const; + //! We should only use this prior when it has multiple modes. + virtual bool participatesInModelSelection() const; - //! Full debug dump of the mode weights. - std::string debugWeights() const; + //! Get the number of nuisance parameters in the marginal likelihood. + //! + //! This is just number modes - 1 due to the normalization constraint. + virtual double unmarginalizedParameters() const; - private: - //! The object which partitions the data into clusters. - TClustererPtr m_Clusterer; + //! Full debug dump of the mode weights. + std::string debugWeights() const; - //! The object used to initialize new cluster priors. - TPriorPtr m_SeedPrior; +private: + //! The object which partitions the data into clusters. + TClustererPtr m_Clusterer; - //! The modes of the distribution. - TModeVec m_Modes; -}; + //! The object used to initialize new cluster priors. + TPriorPtr m_SeedPrior; + //! The modes of the distribution. + TModeVec m_Modes; +}; } } diff --git a/include/maths/CMultimodalPriorMode.h b/include/maths/CMultimodalPriorMode.h index c3c365759c..c893749661 100644 --- a/include/maths/CMultimodalPriorMode.h +++ b/include/maths/CMultimodalPriorMode.h @@ -20,10 +20,8 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { //! \brief The prior of a mode of the likelihood function and //! a unique identifier for the clusterer. @@ -31,81 +29,59 @@ namespace maths //! DESCRIPTION:\n //! See, for example, CMultimodalPrior for usage. template -struct SMultimodalPriorMode -{ +struct SMultimodalPriorMode { static const std::string INDEX_TAG; static const std::string PRIOR_TAG; SMultimodalPriorMode() : s_Index(0), s_Prior() {} - SMultimodalPriorMode(std::size_t index, const PRIOR_PTR &prior) : - s_Index(index), - s_Prior(prior->clone()) - {} + SMultimodalPriorMode(std::size_t index, const PRIOR_PTR& prior) : s_Index(index), s_Prior(prior->clone()) {} //! Get the weight of this sample. - double weight() const - { - return s_Prior->numberSamples(); - } + double weight() const { return s_Prior->numberSamples(); } //! Get a checksum for this object. - uint64_t checksum(uint64_t seed) const - { + uint64_t checksum(uint64_t seed) const { seed = CChecksum::calculate(seed, s_Index); return CChecksum::calculate(seed, s_Prior); } //! Get the memory used by this component - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const - { + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CMultimodalPrior::SMode"); core::CMemoryDebug::dynamicSize("s_Prior", s_Prior, mem); } //! Get the memory used by this component - std::size_t memoryUsage() const - { - return core::CMemory::dynamicSize(s_Prior); - } + std::size_t memoryUsage() const { return core::CMemory::dynamicSize(s_Prior); } //! Create from part of a state document. - bool acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser) - { - do - { - const std::string &name = traverser.name(); + bool acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); RESTORE_BUILT_IN(INDEX_TAG, s_Index) - RESTORE(PRIOR_TAG, traverser.traverseSubLevel(boost::bind(CPriorStateSerialiser(), - boost::cref(params), - boost::ref(s_Prior), _1))) - } - while (traverser.next()); + RESTORE(PRIOR_TAG, + traverser.traverseSubLevel(boost::bind(CPriorStateSerialiser(), boost::cref(params), boost::ref(s_Prior), _1))) + } while (traverser.next()); return true; } //! Persist state by passing information to the supplied inserter. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const - { + void acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(INDEX_TAG, s_Index); - inserter.insertLevel(PRIOR_TAG, boost::bind(CPriorStateSerialiser(), - boost::cref(*s_Prior), _1)); + inserter.insertLevel(PRIOR_TAG, boost::bind(CPriorStateSerialiser(), boost::cref(*s_Prior), _1)); } //! Full debug dump of the mode weights. template - static std::string debugWeights(const std::vector > &modes) - { - if (modes.empty()) - { + static std::string debugWeights(const std::vector>& modes) { + if (modes.empty()) { return std::string(); } std::ostringstream result; result << std::scientific << std::setprecision(15) << modes[0].weight(); - for (std::size_t i = 1u; i < modes.size(); ++i) - { - result << " " << modes[i].weight(); + for (std::size_t i = 1u; i < modes.size(); ++i) { + result << " " << modes[i].weight(); } return result.str(); } @@ -118,10 +94,7 @@ template const std::string SMultimodalPriorMode::INDEX_TAG("a"); template const std::string SMultimodalPriorMode::PRIOR_TAG("b"); - } } - - #endif // INCLUDED_ml_maths_SMultimodalPriorMode_h diff --git a/include/maths/CMultimodalPriorUtils.h b/include/maths/CMultimodalPriorUtils.h index 04c82154f5..2d2a6d4d24 100644 --- a/include/maths/CMultimodalPriorUtils.h +++ b/include/maths/CMultimodalPriorUtils.h @@ -12,11 +12,11 @@ #include #include -#include #include #include #include #include +#include #include #include @@ -32,1033 +32,865 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { //! \brief Assorted utility functions and objects used by our multimodal //! and mixture priors. -class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable -{ - public: - using TDoubleDoublePr = std::pair; - using TDoubleVec = std::vector; - using TDouble1Vec = core::CSmallVector; - using TDouble4Vec = core::CSmallVector; - using TDouble4Vec1Vec = core::CSmallVector; - using TMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; - using TWeights = CConstantWeights; - - //! Get the mode of the marginal likelihood function. - template - static TDoubleDoublePr marginalLikelihoodSupport(const std::vector > &modes) - { - if (modes.size() == 0) - { - return std::make_pair(boost::numeric::bounds::lowest(), - boost::numeric::bounds::highest()); - } - if (modes.size() == 1) - { - return modes[0].s_Prior->marginalLikelihoodSupport(); - } - - TDoubleDoublePr result(boost::numeric::bounds::highest(), - boost::numeric::bounds::lowest()); - - // We define this is as the union of the mode supports. - for (std::size_t i = 0u; i < modes.size(); ++i) - { - TDoubleDoublePr s = modes[i].s_Prior->marginalLikelihoodSupport(); - result.first = std::min(result.first, s.first); - result.second = std::max(result.second, s.second); - } - - return result; +class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { +public: + using TDoubleDoublePr = std::pair; + using TDoubleVec = std::vector; + using TDouble1Vec = core::CSmallVector; + using TDouble4Vec = core::CSmallVector; + using TDouble4Vec1Vec = core::CSmallVector; + using TMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; + using TWeights = CConstantWeights; + + //! Get the mode of the marginal likelihood function. + template + static TDoubleDoublePr marginalLikelihoodSupport(const std::vector>& modes) { + if (modes.size() == 0) { + return std::make_pair(boost::numeric::bounds::lowest(), boost::numeric::bounds::highest()); + } + if (modes.size() == 1) { + return modes[0].s_Prior->marginalLikelihoodSupport(); } - //! Get the mean of the marginal likelihood function. - template - static double marginalLikelihoodMean(const std::vector > &modes) - { - if (modes.size() == 0) - { - return 0.0; - } - if (modes.size() == 1) - { - return modes[0].s_Prior->marginalLikelihoodMean(); - } + TDoubleDoublePr result(boost::numeric::bounds::highest(), boost::numeric::bounds::lowest()); - // By linearity we have that: - // Integral{ x * Sum_i{ w(i) * f(x | i) } } - // = Sum_i{ w(i) * Integral{ x * f(x | i) } } - // = Sum_i{ w(i) * mean(i) } - - TMeanAccumulator result; - for (std::size_t i = 0u; i < modes.size(); ++i) - { - const SMultimodalPriorMode &mode = modes[i]; - double w = mode.weight(); - result.add(mode.s_Prior->marginalLikelihoodMean(), w); - } - return CBasicStatistics::mean(result); + // We define this is as the union of the mode supports. + for (std::size_t i = 0u; i < modes.size(); ++i) { + TDoubleDoublePr s = modes[i].s_Prior->marginalLikelihoodSupport(); + result.first = std::min(result.first, s.first); + result.second = std::max(result.second, s.second); } - //! Get the mode of the marginal likelihood function. - template - static double marginalLikelihoodMode(const std::vector > &modes, - const maths_t::TWeightStyleVec &weightStyles, - const TDouble4Vec &weights) - { - if (modes.size() == 0) - { - return 0.0; - } - if (modes.size() == 1) - { - return modes[0].s_Prior->marginalLikelihoodMode(weightStyles, weights); - } + return result; + } - using TMaxAccumulator = CBasicStatistics::COrderStatisticsStack >; - - // We'll approximate this as the maximum likelihood mode (mode). - double result = 0.0; + //! Get the mean of the marginal likelihood function. + template + static double marginalLikelihoodMean(const std::vector>& modes) { + if (modes.size() == 0) { + return 0.0; + } + if (modes.size() == 1) { + return modes[0].s_Prior->marginalLikelihoodMean(); + } - double seasonalScale = 1.0; - double countVarianceScale = 1.0; - try - { - seasonalScale = std::sqrt(maths_t::seasonalVarianceScale(weightStyles, weights)); - countVarianceScale = maths_t::countVarianceScale(weightStyles, weights); - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to get variance scale " << e.what()); - } + // By linearity we have that: + // Integral{ x * Sum_i{ w(i) * f(x | i) } } + // = Sum_i{ w(i) * Integral{ x * f(x | i) } } + // = Sum_i{ w(i) * mean(i) } - // Declared outside the loop to minimize number of times they - // are created. - TDouble1Vec mode(1); - TDouble4Vec1Vec weight(1, TDouble4Vec(1, countVarianceScale)); - - TMaxAccumulator maxLikelihood; - for (std::size_t i = 0u; i < modes.size(); ++i) - { - double w = modes[i].weight(); - const T &prior = modes[i].s_Prior; - mode[0] = prior->marginalLikelihoodMode(TWeights::COUNT_VARIANCE, weight[0]); - double likelihood; - if ( prior->jointLogMarginalLikelihood(TWeights::COUNT_VARIANCE, mode, weight, likelihood) - & (maths_t::E_FpFailed | maths_t::E_FpOverflowed)) - { - continue; - } - if (maxLikelihood.add(std::log(w) + likelihood)) - { - result = mode[0]; - } - } + TMeanAccumulator result; + for (std::size_t i = 0u; i < modes.size(); ++i) { + const SMultimodalPriorMode& mode = modes[i]; + double w = mode.weight(); + result.add(mode.s_Prior->marginalLikelihoodMean(), w); + } + return CBasicStatistics::mean(result); + } + + //! Get the mode of the marginal likelihood function. + template + static double marginalLikelihoodMode(const std::vector>& modes, + const maths_t::TWeightStyleVec& weightStyles, + const TDouble4Vec& weights) { + if (modes.size() == 0) { + return 0.0; + } + if (modes.size() == 1) { + return modes[0].s_Prior->marginalLikelihoodMode(weightStyles, weights); + } - if (maths_t::hasSeasonalVarianceScale(weightStyles, weights)) - { - double mean = marginalLikelihoodMean(modes); - result = mean + seasonalScale * (result - mean); - } + using TMaxAccumulator = CBasicStatistics::COrderStatisticsStack>; - return result; - } + // We'll approximate this as the maximum likelihood mode (mode). + double result = 0.0; - //! Get the variance of the marginal likelihood. - template - static double marginalLikelihoodVariance(const std::vector > &modes, - const maths_t::TWeightStyleVec &weightStyles, - const TDouble4Vec &weights) - { - if (modes.size() == 0) - { - return boost::numeric::bounds::highest(); - } - if (modes.size() == 1) - { - return modes[0].s_Prior->marginalLikelihoodVariance(weightStyles, weights); - } + double seasonalScale = 1.0; + double countVarianceScale = 1.0; + try { + seasonalScale = std::sqrt(maths_t::seasonalVarianceScale(weightStyles, weights)); + countVarianceScale = maths_t::countVarianceScale(weightStyles, weights); + } catch (const std::exception& e) { LOG_ERROR("Failed to get variance scale " << e.what()); } - // By linearity we have that: - // Integral{ (x - m)^2 * Sum_i{ w(i) * f(x | i) } } - // = Sum_i{ w(i) * (Integral{ x^2 * f(x | i) } - m^2) } - // = Sum_i{ w(i) * ((mi^2 + vi) - m^2) } + // Declared outside the loop to minimize number of times they + // are created. + TDouble1Vec mode(1); + TDouble4Vec1Vec weight(1, TDouble4Vec(1, countVarianceScale)); - double varianceScale = 1.0; - try - { - varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) - * maths_t::countVarianceScale(weightStyles, weights); + TMaxAccumulator maxLikelihood; + for (std::size_t i = 0u; i < modes.size(); ++i) { + double w = modes[i].weight(); + const T& prior = modes[i].s_Prior; + mode[0] = prior->marginalLikelihoodMode(TWeights::COUNT_VARIANCE, weight[0]); + double likelihood; + if (prior->jointLogMarginalLikelihood(TWeights::COUNT_VARIANCE, mode, weight, likelihood) & + (maths_t::E_FpFailed | maths_t::E_FpOverflowed)) { + continue; } - catch (const std::exception &e) - { - LOG_ERROR("Failed to get variance scale " << e.what()); + if (maxLikelihood.add(std::log(w) + likelihood)) { + result = mode[0]; } + } + if (maths_t::hasSeasonalVarianceScale(weightStyles, weights)) { double mean = marginalLikelihoodMean(modes); + result = mean + seasonalScale * (result - mean); + } - TMeanAccumulator result; - for (std::size_t i = 0u; i < modes.size(); ++i) - { - const SMultimodalPriorMode &mode = modes[i]; - double w = mode.weight(); - double mm = mode.s_Prior->marginalLikelihoodMean(); - double mv = mode.s_Prior->marginalLikelihoodVariance(); - result.add((mm - mean) * (mm + mean) + mv, w); - } + return result; + } - return std::max(varianceScale * CBasicStatistics::mean(result), 0.0); + //! Get the variance of the marginal likelihood. + template + static double marginalLikelihoodVariance(const std::vector>& modes, + const maths_t::TWeightStyleVec& weightStyles, + const TDouble4Vec& weights) { + if (modes.size() == 0) { + return boost::numeric::bounds::highest(); + } + if (modes.size() == 1) { + return modes[0].s_Prior->marginalLikelihoodVariance(weightStyles, weights); } - //! Get the \p percentage symmetric confidence interval for the marginal - //! likelihood function, i.e. the values \f$a\f$ and \f$b\f$ such that: - //!
-        //!   \f$P([a,m]) = P([m,b]) = p / 100 / 2\f$
-        //! 
- //! - //! where \f$m\f$ is the median of the distribution and \f$p\f$ is the - //! the percentage of interest \p percentage. - template - static TDoubleDoublePr marginalLikelihoodConfidenceInterval(const PRIOR &prior, - const std::vector &modes, - double percentage, - const maths_t::TWeightStyleVec &weightStyles, - const TDouble4Vec &weights) - { - TDoubleDoublePr support = marginalLikelihoodSupport(modes); - - if (isNonInformative(modes)) - { - return support; - } + // By linearity we have that: + // Integral{ (x - m)^2 * Sum_i{ w(i) * f(x | i) } } + // = Sum_i{ w(i) * (Integral{ x^2 * f(x | i) } - m^2) } + // = Sum_i{ w(i) * ((mi^2 + vi) - m^2) } + + double varianceScale = 1.0; + try { + varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) * maths_t::countVarianceScale(weightStyles, weights); + } catch (const std::exception& e) { LOG_ERROR("Failed to get variance scale " << e.what()); } + + double mean = marginalLikelihoodMean(modes); + + TMeanAccumulator result; + for (std::size_t i = 0u; i < modes.size(); ++i) { + const SMultimodalPriorMode& mode = modes[i]; + double w = mode.weight(); + double mm = mode.s_Prior->marginalLikelihoodMean(); + double mv = mode.s_Prior->marginalLikelihoodVariance(); + result.add((mm - mean) * (mm + mean) + mv, w); + } - if (modes.size() == 1) - { - return modes[0].s_Prior->marginalLikelihoodConfidenceInterval(percentage, weightStyles, weights); - } + return std::max(varianceScale * CBasicStatistics::mean(result), 0.0); + } + + //! Get the \p percentage symmetric confidence interval for the marginal + //! likelihood function, i.e. the values \f$a\f$ and \f$b\f$ such that: + //!
+    //!   \f$P([a,m]) = P([m,b]) = p / 100 / 2\f$
+    //! 
+ //! + //! where \f$m\f$ is the median of the distribution and \f$p\f$ is the + //! the percentage of interest \p percentage. + template + static TDoubleDoublePr marginalLikelihoodConfidenceInterval(const PRIOR& prior, + const std::vector& modes, + double percentage, + const maths_t::TWeightStyleVec& weightStyles, + const TDouble4Vec& weights) { + TDoubleDoublePr support = marginalLikelihoodSupport(modes); + + if (isNonInformative(modes)) { + return support; + } - percentage /= 100.0; - percentage = CTools::truncate(percentage, 0.0, 1.0); - if (percentage == 1.0) - { - return support; - } + if (modes.size() == 1) { + return modes[0].s_Prior->marginalLikelihoodConfidenceInterval(percentage, weightStyles, weights); + } - double p1 = std::log((1.0 - percentage) / 2.0); - double p2 = std::log((1.0 + percentage) / 2.0); + percentage /= 100.0; + percentage = CTools::truncate(percentage, 0.0, 1.0); + if (percentage == 1.0) { + return support; + } - CLogCdf fl(CLogCdf::E_Lower, prior, weightStyles, weights); - CLogCdf fu(CLogCdf::E_Upper, prior, weightStyles, weights); + double p1 = std::log((1.0 - percentage) / 2.0); + double p2 = std::log((1.0 + percentage) / 2.0); - CCompositeFunctions::CMinusConstant&> f1(fl, p1); - CCompositeFunctions::CMinusConstant&> f2(fu, p2); + CLogCdf fl(CLogCdf::E_Lower, prior, weightStyles, weights); + CLogCdf fu(CLogCdf::E_Upper, prior, weightStyles, weights); - static const std::size_t MAX_ITERATIONS = 30u; - static const double EPS = 1e-3; + CCompositeFunctions::CMinusConstant&> f1(fl, p1); + CCompositeFunctions::CMinusConstant&> f2(fu, p2); - TDoubleDoublePr result; + static const std::size_t MAX_ITERATIONS = 30u; + static const double EPS = 1e-3; - double x0 = marginalLikelihoodMode(modes, weightStyles, weights); + TDoubleDoublePr result; - try - { - double f10 = f1(x0); - double a = x0, b = x0, fa = f10, fb = f10; - LOG_TRACE("(a,b) = (" << a << "," << b << ")" - << ", (f(a),f(b)) = (" << fa << "," << fb << ")"); - - std::size_t maxIterations = MAX_ITERATIONS; - if ( (f10 < 0 && !CSolvers::rightBracket(a, b, fa, fb, f1, maxIterations)) - || (f10 >= 0 && !CSolvers::leftBracket(a, b, fa, fb, f1, maxIterations))) - { - LOG_ERROR("Unable to bracket left percentile = " << p1 - << ", (a,b) = (" << a << "," << b << ")" - << ", (f(a),f(b)) = (" << fa << "," << fb << ")"); - result.first = support.first; - } - else - { - LOG_TRACE("(a,b) = (" << a << "," << b << ")" - << ", (f(a),f(b)) = (" << fa << "," << fb << ")"); - maxIterations = MAX_ITERATIONS - maxIterations; - CEqualWithTolerance equal(CToleranceTypes::E_AbsoluteTolerance, - std::min(std::numeric_limits::epsilon() * b, - EPS * p1 / std::max(fa, fb))); - CSolvers::solve(a, b, fa, fb, f1, maxIterations, equal, result.first); - LOG_TRACE("p1 = " << p1 - << ", x = " << result.first - << ", f(x) = " << fl(result.first)); - } + double x0 = marginalLikelihoodMode(modes, weightStyles, weights); - result.second = result.first; - double f20 = f2(x0); - a = x0; b = x0; fa = f20; fb = f20; - maxIterations = MAX_ITERATIONS; - if (percentage == 0.0) - { - // Fall: nothing to do. - } - else if ( (f20 < 0 && !CSolvers::rightBracket(a, b, fa, fb, f2, maxIterations)) - || (f20 >= 0 && !CSolvers::leftBracket(a, b, fa, fb, f2, maxIterations))) - { - LOG_ERROR("Unable to bracket right percentile = " << p2 - << ", (a,b) = (" << a << "," << b << ")" - << ", (f(a),f(b)) = (" << fa << "," << fb << ")"); - result.second = support.second; - } - else - { - LOG_TRACE("(a,b) = [" << a << "," << b << "], " - << ", (f(a),f(b)) = [" << fa << "," << fb << "]"); - - maxIterations = MAX_ITERATIONS - maxIterations; - CEqualWithTolerance equal(CToleranceTypes::E_AbsoluteTolerance, - std::min(std::numeric_limits::epsilon() * b, - EPS * p2 / std::max(fa, fb))); - CSolvers::solve(a, b, fa, fb, f2, maxIterations, equal, result.second); - LOG_TRACE("p2 = " << p2 - << ", x = " << result.second - << ", f(x) = " << fu(result.second)); - } - } - catch (const std::exception &e) - { - LOG_ERROR("Unable to find left percentile: " << e.what() - << ", percentiles = [" << p1 << "," << p2 << "]" - << ", x0 = " << x0); - return support; - } + try { + double f10 = f1(x0); + double a = x0, b = x0, fa = f10, fb = f10; + LOG_TRACE("(a,b) = (" << a << "," << b << ")" + << ", (f(a),f(b)) = (" << fa << "," << fb << ")"); - return result; + std::size_t maxIterations = MAX_ITERATIONS; + if ((f10 < 0 && !CSolvers::rightBracket(a, b, fa, fb, f1, maxIterations)) || + (f10 >= 0 && !CSolvers::leftBracket(a, b, fa, fb, f1, maxIterations))) { + LOG_ERROR("Unable to bracket left percentile = " << p1 << ", (a,b) = (" << a << "," << b << ")" + << ", (f(a),f(b)) = (" << fa << "," << fb << ")"); + result.first = support.first; + } else { + LOG_TRACE("(a,b) = (" << a << "," << b << ")" + << ", (f(a),f(b)) = (" << fa << "," << fb << ")"); + maxIterations = MAX_ITERATIONS - maxIterations; + CEqualWithTolerance equal(CToleranceTypes::E_AbsoluteTolerance, + std::min(std::numeric_limits::epsilon() * b, EPS * p1 / std::max(fa, fb))); + CSolvers::solve(a, b, fa, fb, f1, maxIterations, equal, result.first); + LOG_TRACE("p1 = " << p1 << ", x = " << result.first << ", f(x) = " << fl(result.first)); + } + + result.second = result.first; + double f20 = f2(x0); + a = x0; + b = x0; + fa = f20; + fb = f20; + maxIterations = MAX_ITERATIONS; + if (percentage == 0.0) { + // Fall: nothing to do. + } else if ((f20 < 0 && !CSolvers::rightBracket(a, b, fa, fb, f2, maxIterations)) || + (f20 >= 0 && !CSolvers::leftBracket(a, b, fa, fb, f2, maxIterations))) { + LOG_ERROR("Unable to bracket right percentile = " << p2 << ", (a,b) = (" << a << "," << b << ")" + << ", (f(a),f(b)) = (" << fa << "," << fb << ")"); + result.second = support.second; + } else { + LOG_TRACE("(a,b) = [" << a << "," << b << "], " + << ", (f(a),f(b)) = [" << fa << "," << fb << "]"); + + maxIterations = MAX_ITERATIONS - maxIterations; + CEqualWithTolerance equal(CToleranceTypes::E_AbsoluteTolerance, + std::min(std::numeric_limits::epsilon() * b, EPS * p2 / std::max(fa, fb))); + CSolvers::solve(a, b, fa, fb, f2, maxIterations, equal, result.second); + LOG_TRACE("p2 = " << p2 << ", x = " << result.second << ", f(x) = " << fu(result.second)); + } + } catch (const std::exception& e) { + LOG_ERROR("Unable to find left percentile: " << e.what() << ", percentiles = [" << p1 << "," << p2 << "]" + << ", x0 = " << x0); + return support; } - //! Calculate the log marginal likelihood function integrating over - //! the prior density function. - template - static maths_t::EFloatingPointErrorStatus - jointLogMarginalLikelihood(const std::vector > &modes, - const maths_t::TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &result) - { - // The likelihood can be computed from the conditional likelihood - // that a sample is from each mode. In particular, the likelihood - // of a sample x is: - // L(x) = Sum_m{ L(x | m) * p(m) } - // - // where, - // L(x | m) is the likelihood the sample is from the m'th mode, - // p(m) is the probability a sample is from the m'th mode. - // - // We compute the combined likelihood by taking the product of the - // individual likelihoods. Note, this brushes over the fact that the - // joint marginal likelihood that a collection of samples is from - // the i'th mode is not just the product of the likelihoods that the - // individual samples are from the i'th mode since we're integrating - // over a prior. Really, we should compute likelihoods over all - // possible assignments of the samples to the modes and use the fact - // that: - // P(a) = Product_i{ Sum_m{ p(m) * I{a(i) = m} } } - // - // where, - // P(a) is the probability of a given assignment, - // p(m) is the probability a sample is from the m'th mode, - // I{.} is the indicator function. - // - // The approximation is increasingly accurate as the prior distribution - // on each mode narrows. - - using TSizeDoublePr = std::pair; - using TSizeDoublePr5Vec = core::CSmallVector; - - result = 0.0; - - // Declared outside the loop to minimize number of times it is created. - TDouble1Vec sample(1); - TSizeDoublePr5Vec modeLogLikelihoods; - modeLogLikelihoods.reserve(modes.size()); - - double mean = maths_t::hasSeasonalVarianceScale(weightStyles, weights) ? - marginalLikelihoodMean(modes) : 0.0; - TDouble4Vec1Vec weight(1, TDouble4Vec(1, 1.0)); - try - { - for (std::size_t i = 0u; i < samples.size(); ++i) - { - double n = maths_t::countForUpdate(weightStyles, weights[i]); - double seasonalScale = std::sqrt(maths_t::seasonalVarianceScale(weightStyles, weights[i])); - double logSeasonalScale = seasonalScale != 1.0 ? std::log(seasonalScale) : 0.0; - - sample[0] = mean + (samples[i] - mean) / seasonalScale; - weight[0][0] = maths_t::countVarianceScale(weightStyles, weights[i]); - - // We re-normalize so that the maximum log likelihood is one - // to avoid underflow. - modeLogLikelihoods.clear(); - double maxLogLikelihood = boost::numeric::bounds::lowest(); - - for (std::size_t j = 0u; j < modes.size(); ++j) - { - double modeLogLikelihood; - maths_t::EFloatingPointErrorStatus status = - modes[j].s_Prior->jointLogMarginalLikelihood(TWeights::COUNT_VARIANCE, - sample, - weight, - modeLogLikelihood); - if (status & maths_t::E_FpFailed) - { - // Logging handled at a lower level. - return status; - } - if (!(status & maths_t::E_FpOverflowed)) - { - modeLogLikelihoods.push_back(std::make_pair(j, modeLogLikelihood)); - maxLogLikelihood = std::max(maxLogLikelihood, modeLogLikelihood); - } + return result; + } + + //! Calculate the log marginal likelihood function integrating over + //! the prior density function. + template + static maths_t::EFloatingPointErrorStatus jointLogMarginalLikelihood(const std::vector>& modes, + const maths_t::TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& result) { + // The likelihood can be computed from the conditional likelihood + // that a sample is from each mode. In particular, the likelihood + // of a sample x is: + // L(x) = Sum_m{ L(x | m) * p(m) } + // + // where, + // L(x | m) is the likelihood the sample is from the m'th mode, + // p(m) is the probability a sample is from the m'th mode. + // + // We compute the combined likelihood by taking the product of the + // individual likelihoods. Note, this brushes over the fact that the + // joint marginal likelihood that a collection of samples is from + // the i'th mode is not just the product of the likelihoods that the + // individual samples are from the i'th mode since we're integrating + // over a prior. Really, we should compute likelihoods over all + // possible assignments of the samples to the modes and use the fact + // that: + // P(a) = Product_i{ Sum_m{ p(m) * I{a(i) = m} } } + // + // where, + // P(a) is the probability of a given assignment, + // p(m) is the probability a sample is from the m'th mode, + // I{.} is the indicator function. + // + // The approximation is increasingly accurate as the prior distribution + // on each mode narrows. + + using TSizeDoublePr = std::pair; + using TSizeDoublePr5Vec = core::CSmallVector; + + result = 0.0; + + // Declared outside the loop to minimize number of times it is created. + TDouble1Vec sample(1); + TSizeDoublePr5Vec modeLogLikelihoods; + modeLogLikelihoods.reserve(modes.size()); + + double mean = maths_t::hasSeasonalVarianceScale(weightStyles, weights) ? marginalLikelihoodMean(modes) : 0.0; + TDouble4Vec1Vec weight(1, TDouble4Vec(1, 1.0)); + try { + for (std::size_t i = 0u; i < samples.size(); ++i) { + double n = maths_t::countForUpdate(weightStyles, weights[i]); + double seasonalScale = std::sqrt(maths_t::seasonalVarianceScale(weightStyles, weights[i])); + double logSeasonalScale = seasonalScale != 1.0 ? std::log(seasonalScale) : 0.0; + + sample[0] = mean + (samples[i] - mean) / seasonalScale; + weight[0][0] = maths_t::countVarianceScale(weightStyles, weights[i]); + + // We re-normalize so that the maximum log likelihood is one + // to avoid underflow. + modeLogLikelihoods.clear(); + double maxLogLikelihood = boost::numeric::bounds::lowest(); + + for (std::size_t j = 0u; j < modes.size(); ++j) { + double modeLogLikelihood; + maths_t::EFloatingPointErrorStatus status = + modes[j].s_Prior->jointLogMarginalLikelihood(TWeights::COUNT_VARIANCE, sample, weight, modeLogLikelihood); + if (status & maths_t::E_FpFailed) { + // Logging handled at a lower level. + return status; } - - if (modeLogLikelihoods.empty()) - { - // Technically, the marginal likelihood is zero here - // so the log would be infinite. We use minus max - // double because log(0) = HUGE_VALUE, which causes - // problems for Windows. Calling code is notified - // when the calculation overflows and should avoid - // taking the exponential since this will underflow - // and pollute the floating point environment. This - // may cause issues for some library function - // implementations (see fe*exceptflag for more details). - result = boost::numeric::bounds::lowest(); - return maths_t::E_FpOverflowed; + if (!(status & maths_t::E_FpOverflowed)) { + modeLogLikelihoods.push_back(std::make_pair(j, modeLogLikelihood)); + maxLogLikelihood = std::max(maxLogLikelihood, modeLogLikelihood); } + } - LOG_TRACE("modeLogLikelihoods = " - << core::CContainerPrinter::print(modeLogLikelihoods)); + if (modeLogLikelihoods.empty()) { + // Technically, the marginal likelihood is zero here + // so the log would be infinite. We use minus max + // double because log(0) = HUGE_VALUE, which causes + // problems for Windows. Calling code is notified + // when the calculation overflows and should avoid + // taking the exponential since this will underflow + // and pollute the floating point environment. This + // may cause issues for some library function + // implementations (see fe*exceptflag for more details). + result = boost::numeric::bounds::lowest(); + return maths_t::E_FpOverflowed; + } - double sampleLikelihood = 0.0; - double Z = 0.0; + LOG_TRACE("modeLogLikelihoods = " << core::CContainerPrinter::print(modeLogLikelihoods)); - for (std::size_t j = 0u; j < modeLogLikelihoods.size(); ++j) - { - double w = modes[modeLogLikelihoods[j].first].weight(); - // Divide through by the largest value to avoid underflow. - sampleLikelihood += w * std::exp(modeLogLikelihoods[j].second - maxLogLikelihood); - Z += w; - } + double sampleLikelihood = 0.0; + double Z = 0.0; - sampleLikelihood /= Z; - double sampleLogLikelihood = n * (std::log(sampleLikelihood) + maxLogLikelihood); + for (std::size_t j = 0u; j < modeLogLikelihoods.size(); ++j) { + double w = modes[modeLogLikelihoods[j].first].weight(); + // Divide through by the largest value to avoid underflow. + sampleLikelihood += w * std::exp(modeLogLikelihoods[j].second - maxLogLikelihood); + Z += w; + } - LOG_TRACE("sample = " << core::CContainerPrinter::print(sample) - << ", maxLogLikelihood = " << maxLogLikelihood - << ", sampleLogLikelihood = " << sampleLogLikelihood); + sampleLikelihood /= Z; + double sampleLogLikelihood = n * (std::log(sampleLikelihood) + maxLogLikelihood); - result += sampleLogLikelihood - n * logSeasonalScale; - } - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to compute likelihood: " << e.what()); - return maths_t::E_FpFailed; - } + LOG_TRACE("sample = " << core::CContainerPrinter::print(sample) << ", maxLogLikelihood = " << maxLogLikelihood + << ", sampleLogLikelihood = " << sampleLogLikelihood); - maths_t::EFloatingPointErrorStatus status = CMathsFuncs::fpStatus(result); - if (status & maths_t::E_FpFailed) - { - LOG_ERROR("Failed to compute likelihood (" << SMultimodalPriorMode::debugWeights(modes) << ")"); - LOG_ERROR("samples = " << core::CContainerPrinter::print(samples)); - LOG_ERROR("weights = " << core::CContainerPrinter::print(weights)); + result += sampleLogLikelihood - n * logSeasonalScale; } - LOG_TRACE("Joint log likelihood = " << result); - return status; + } catch (const std::exception& e) { + LOG_ERROR("Failed to compute likelihood: " << e.what()); + return maths_t::E_FpFailed; } - //! Sample the marginal likelihood function. - template - static void sampleMarginalLikelihood(const std::vector > &modes, - std::size_t numberSamples, - TDouble1Vec &samples) - { - samples.clear(); - - if (modes.size() == 1) - { - modes[0].s_Prior->sampleMarginalLikelihood(numberSamples, samples); - return; - } + maths_t::EFloatingPointErrorStatus status = CMathsFuncs::fpStatus(result); + if (status & maths_t::E_FpFailed) { + LOG_ERROR("Failed to compute likelihood (" << SMultimodalPriorMode::debugWeights(modes) << ")"); + LOG_ERROR("samples = " << core::CContainerPrinter::print(samples)); + LOG_ERROR("weights = " << core::CContainerPrinter::print(weights)); + } + LOG_TRACE("Joint log likelihood = " << result); + return status; + } + + //! Sample the marginal likelihood function. + template + static void + sampleMarginalLikelihood(const std::vector>& modes, std::size_t numberSamples, TDouble1Vec& samples) { + samples.clear(); + + if (modes.size() == 1) { + modes[0].s_Prior->sampleMarginalLikelihood(numberSamples, samples); + return; + } - // We sample each mode according to its weight. + // We sample each mode according to its weight. - TDoubleVec normalizedWeights; - normalizedWeights.reserve(modes.size()); - double Z = 0.0; + TDoubleVec normalizedWeights; + normalizedWeights.reserve(modes.size()); + double Z = 0.0; - for (std::size_t i = 0u; i < modes.size(); ++i) - { - double weight = modes[i].weight(); - normalizedWeights.push_back(weight); - Z += weight; - } - for (std::size_t i = 0u; i < normalizedWeights.size(); ++i) - { - normalizedWeights[i] /= Z; - } + for (std::size_t i = 0u; i < modes.size(); ++i) { + double weight = modes[i].weight(); + normalizedWeights.push_back(weight); + Z += weight; + } + for (std::size_t i = 0u; i < normalizedWeights.size(); ++i) { + normalizedWeights[i] /= Z; + } - CSampling::TSizeVec sampling; - CSampling::weightedSample(numberSamples, normalizedWeights, sampling); - LOG_TRACE("normalizedWeights = " << core::CContainerPrinter::print(normalizedWeights) - << ", sampling = " << core::CContainerPrinter::print(sampling)); + CSampling::TSizeVec sampling; + CSampling::weightedSample(numberSamples, normalizedWeights, sampling); + LOG_TRACE("normalizedWeights = " << core::CContainerPrinter::print(normalizedWeights) + << ", sampling = " << core::CContainerPrinter::print(sampling)); - if (sampling.size() != modes.size()) - { - LOG_ERROR("Failed to sample marginal likelihood"); - return; - } + if (sampling.size() != modes.size()) { + LOG_ERROR("Failed to sample marginal likelihood"); + return; + } - samples.reserve(numberSamples); - TDouble1Vec modeSamples; - for (std::size_t i = 0u; i < modes.size(); ++i) - { - modes[i].s_Prior->sampleMarginalLikelihood(sampling[i], modeSamples); - LOG_TRACE("modeSamples = " << core::CContainerPrinter::print(modeSamples)); - std::copy(modeSamples.begin(), modeSamples.end(), std::back_inserter(samples)); - } - LOG_TRACE("samples = " << core::CContainerPrinter::print(samples)); + samples.reserve(numberSamples); + TDouble1Vec modeSamples; + for (std::size_t i = 0u; i < modes.size(); ++i) { + modes[i].s_Prior->sampleMarginalLikelihood(sampling[i], modeSamples); + LOG_TRACE("modeSamples = " << core::CContainerPrinter::print(modeSamples)); + std::copy(modeSamples.begin(), modeSamples.end(), std::back_inserter(samples)); + } + LOG_TRACE("samples = " << core::CContainerPrinter::print(samples)); + } + + //! Calculate minus the log of the joint c.d.f. of the marginal + //! likelihood for a collection of independent samples from the + //! variable. + template + static bool minusLogJointCdf(const std::vector>& modes, + const maths_t::TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound) { + return minusLogJointCdf(modes, CMinusLogJointCdf(), weightStyles, samples, weights, lowerBound, upperBound); + } + + //! Compute minus the log of the one minus the joint c.d.f. of the + //! marginal likelihood at \p samples without losing precision due + //! to cancellation errors at one, i.e. the smallest non-zero value + //! this can return is the minimum double rather than epsilon. + template + static bool minusLogJointCdfComplement(const std::vector>& modes, + const maths_t::TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound) { + return minusLogJointCdf(modes, CMinusLogJointCdfComplement(), weightStyles, samples, weights, lowerBound, upperBound); + } + + //! Calculate the joint probability of seeing a lower likelihood + //! collection of independent samples from the variable integrating + //! over the prior density function. + template + static bool probabilityOfLessLikelySamples(const PRIOR& prior, + const std::vector& modes, + maths_t::EProbabilityCalculation calculation, + const maths_t::TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound, + maths_t::ETail& tail) { + lowerBound = upperBound = 1.0; + tail = maths_t::E_UndeterminedTail; + + if (samples.empty()) { + LOG_ERROR("Can't compute distribution for empty sample set"); + return false; } - //! Calculate minus the log of the joint c.d.f. of the marginal - //! likelihood for a collection of independent samples from the - //! variable. - template - static bool minusLogJointCdf(const std::vector > &modes, - const maths_t::TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound) - { - return minusLogJointCdf(modes, CMinusLogJointCdf(), - weightStyles, samples, weights, lowerBound, upperBound); + if (isNonInformative(modes)) { + return true; } - //! Compute minus the log of the one minus the joint c.d.f. of the - //! marginal likelihood at \p samples without losing precision due - //! to cancellation errors at one, i.e. the smallest non-zero value - //! this can return is the minimum double rather than epsilon. - template - static bool minusLogJointCdfComplement(const std::vector > &modes, - const maths_t::TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound) - { - return minusLogJointCdf(modes, CMinusLogJointCdfComplement(), - weightStyles, samples, weights, lowerBound, upperBound); + if (modes.size() == 1) { + return modes[0].s_Prior->probabilityOfLessLikelySamples( + calculation, weightStyles, samples, weights, lowerBound, upperBound, tail); } - //! Calculate the joint probability of seeing a lower likelihood - //! collection of independent samples from the variable integrating - //! over the prior density function. - template - static bool probabilityOfLessLikelySamples(const PRIOR &prior, - const std::vector &modes, - maths_t::EProbabilityCalculation calculation, - const maths_t::TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound, - maths_t::ETail &tail) - { - lowerBound = upperBound = 1.0; - tail = maths_t::E_UndeterminedTail; - - if (samples.empty()) - { - LOG_ERROR("Can't compute distribution for empty sample set"); + // Ideally we'd find the probability of the set of samples whose + // total likelihood is less than or equal to that of the specified + // samples, i.e. the probability of the set + // R = { y | L(y) < L(x) } + // + // where, + // x = {x(1), x(2), ..., x(n)} is the sample vector. + // y is understood to be a vector quantity. + // + // This is not *trivially* related to the probability that the + // probabilities of the sets + // R(i) = { y | L(y) < L(x(i)) } + // + // since the joint conditional likelihood must be integrated over + // priors for the parameters. However, we'll approximate this as + // the joint probability (of a collection of standard normal R.Vs.) + // having probabilities {P(R(i))}. This becomes increasingly accurate + // as the prior distribution narrows. + // + // For the two sided calculation, we use the fact that the likelihood + // function decreases monotonically away from the interval [a, b] + // whose end points are the leftmost and rightmost modes' modes + // since all component likelihoods decrease away from this interval. + // + // To evaluate the probability in the interval [a, b] we relax + // the hard constraint that regions where f > f(x) contribute + // zero probability. In particular, we note that we can write + // the probability as: + // P = Integral{ I(f(s) < f(x)) * f(s) }ds + // + // and that: + // I(f(s) < f(x)) = lim_{k->inf}{ exp(-k * (f(s)/f(x) - 1)) + // / (1 + exp(-k * (f(s)/f(x) - 1))) } + // + // We evaluate a smoother integral, i.e. smaller p, initially + // to find out which regions contribute the most to P and then + // re-evaluate those regions we need with higher resolution + // using the fact that the maximum error in the approximation + // of I(f(s) < f(x)) is 0.5. + + switch (calculation) { + case maths_t::E_OneSidedBelow: + if (!minusLogJointCdf(modes, weightStyles, samples, weights, upperBound, lowerBound)) { + LOG_ERROR("Failed computing probability of less likely samples: " << core::CContainerPrinter::print(samples)); return false; } + lowerBound = std::exp(-lowerBound); + upperBound = std::exp(-upperBound); + tail = maths_t::E_LeftTail; + break; - if (isNonInformative(modes)) - { - return true; - } + case maths_t::E_TwoSided: { + static const double EPS = 1000.0 * std::numeric_limits::epsilon(); + static const std::size_t MAX_ITERATIONS = 20u; + + CJointProbabilityOfLessLikelySamples lowerBoundCalculator; + CJointProbabilityOfLessLikelySamples upperBoundCalculator; - if (modes.size() == 1) - { - return modes[0].s_Prior->probabilityOfLessLikelySamples(calculation, - weightStyles, - samples, - weights, - lowerBound, upperBound, tail); + TDoubleDoublePr support = marginalLikelihoodSupport(modes); + support.first = (1.0 + (support.first > 0.0 ? EPS : -EPS)) * support.first; + support.second = (1.0 + (support.first > 0.0 ? EPS : -EPS)) * support.second; + double mean = marginalLikelihoodMean(modes); + + double a = boost::numeric::bounds::highest(); + double b = boost::numeric::bounds::lowest(); + double Z = 0.0; + for (const auto& mode : modes) { + double mode_ = mode.s_Prior->marginalLikelihoodMode(); + a = std::min(a, mode_); + b = std::max(b, mode_); + Z += mode.weight(); } + a = CTools::truncate(a, support.first, support.second); + b = CTools::truncate(b, support.first, support.second); + LOG_TRACE("a = " << a << ", b = " << b << ", Z = " << Z); + + std::size_t svi = static_cast( + std::find(weightStyles.begin(), weightStyles.end(), maths_t::E_SampleSeasonalVarianceScaleWeight) - weightStyles.begin()); - // Ideally we'd find the probability of the set of samples whose - // total likelihood is less than or equal to that of the specified - // samples, i.e. the probability of the set - // R = { y | L(y) < L(x) } - // - // where, - // x = {x(1), x(2), ..., x(n)} is the sample vector. - // y is understood to be a vector quantity. - // - // This is not *trivially* related to the probability that the - // probabilities of the sets - // R(i) = { y | L(y) < L(x(i)) } - // - // since the joint conditional likelihood must be integrated over - // priors for the parameters. However, we'll approximate this as - // the joint probability (of a collection of standard normal R.Vs.) - // having probabilities {P(R(i))}. This becomes increasingly accurate - // as the prior distribution narrows. - // - // For the two sided calculation, we use the fact that the likelihood - // function decreases monotonically away from the interval [a, b] - // whose end points are the leftmost and rightmost modes' modes - // since all component likelihoods decrease away from this interval. - // - // To evaluate the probability in the interval [a, b] we relax - // the hard constraint that regions where f > f(x) contribute - // zero probability. In particular, we note that we can write - // the probability as: - // P = Integral{ I(f(s) < f(x)) * f(s) }ds - // - // and that: - // I(f(s) < f(x)) = lim_{k->inf}{ exp(-k * (f(s)/f(x) - 1)) - // / (1 + exp(-k * (f(s)/f(x) - 1))) } - // - // We evaluate a smoother integral, i.e. smaller p, initially - // to find out which regions contribute the most to P and then - // re-evaluate those regions we need with higher resolution - // using the fact that the maximum error in the approximation - // of I(f(s) < f(x)) is 0.5. - - switch (calculation) - { - case maths_t::E_OneSidedBelow: - if (!minusLogJointCdf(modes, weightStyles, samples, weights, upperBound, lowerBound)) - { - LOG_ERROR("Failed computing probability of less likely samples: " - << core::CContainerPrinter::print(samples)); + // Declared outside the loop to minimize the number of times + // they are created. + TDouble4Vec1Vec weight(1); + TDouble1Vec wt(1); + + int tail_ = 0; + for (std::size_t i = 0u; i < samples.size(); ++i) { + double x = samples[i]; + weight[0] = weights[i]; + + if (svi < weight.size()) { + x = mean + (x - mean) / std::sqrt(weights[i][svi]); + weight[0][svi] = 1.0; + } + + double fx; + maths_t::EFloatingPointErrorStatus status = jointLogMarginalLikelihood(modes, weightStyles, {x}, weight, fx); + if (status & maths_t::E_FpFailed) { + LOG_ERROR("Unable to compute likelihood for " << x); return false; } - lowerBound = std::exp(-lowerBound); - upperBound = std::exp(-upperBound); - tail = maths_t::E_LeftTail; - break; - - case maths_t::E_TwoSided: - { - static const double EPS = 1000.0 * std::numeric_limits::epsilon(); - static const std::size_t MAX_ITERATIONS = 20u; - - CJointProbabilityOfLessLikelySamples lowerBoundCalculator; - CJointProbabilityOfLessLikelySamples upperBoundCalculator; - - TDoubleDoublePr support = marginalLikelihoodSupport(modes); - support.first = (1.0 + (support.first > 0.0 ? EPS : -EPS)) * support.first; - support.second = (1.0 + (support.first > 0.0 ? EPS : -EPS)) * support.second; - double mean = marginalLikelihoodMean(modes); - - double a = boost::numeric::bounds::highest(); - double b = boost::numeric::bounds::lowest(); - double Z = 0.0; - for (const auto &mode : modes) - { - double mode_ = mode.s_Prior->marginalLikelihoodMode(); - a = std::min(a, mode_); - b = std::max(b, mode_); - Z += mode.weight(); - } - a = CTools::truncate(a, support.first, support.second); - b = CTools::truncate(b, support.first, support.second); - LOG_TRACE("a = " << a << ", b = " << b << ", Z = " << Z); - - std::size_t svi = static_cast( - std::find(weightStyles.begin(), - weightStyles.end(), - maths_t::E_SampleSeasonalVarianceScaleWeight) - - weightStyles.begin()); - - // Declared outside the loop to minimize the number of times - // they are created. - TDouble4Vec1Vec weight(1); - TDouble1Vec wt(1); - - int tail_ = 0; - for (std::size_t i = 0u; i < samples.size(); ++i) - { - double x = samples[i]; - weight[0] = weights[i]; - - if (svi < weight.size()) - { - x = mean + (x - mean) / std::sqrt(weights[i][svi]); - weight[0][svi] = 1.0; - } - - double fx; - maths_t::EFloatingPointErrorStatus status = - jointLogMarginalLikelihood(modes, weightStyles, {x}, weight, fx); - if (status & maths_t::E_FpFailed) - { - LOG_ERROR("Unable to compute likelihood for " << x); - return false; - } - if (status & maths_t::E_FpOverflowed) - { - lowerBound = upperBound = 0.0; - return true; - } - LOG_TRACE("x = " << x << ", f(x) = " << fx); - - CPrior::CLogMarginalLikelihood logLikelihood(prior, weightStyles, weight); - - CTools::CMixtureProbabilityOfLessLikelySample calculator(modes.size(), x, fx, a, b); - for (const auto &mode : modes) - { - double w = mode.weight() / Z; - double centre = mode.s_Prior->marginalLikelihoodMode(weightStyles, weight[0]); - double spread = std::sqrt(mode.s_Prior->marginalLikelihoodVariance(weightStyles, weight[0])); - calculator.addMode(w, centre, spread); - tail_ = tail_ | (x < centre ? maths_t::E_LeftTail : maths_t::E_RightTail); - } - - double sampleLowerBound = 0.0; - double sampleUpperBound = 0.0; - - double lb, ub; - - double l; - CEqualWithTolerance lequal(CToleranceTypes::E_AbsoluteTolerance, EPS * a); - if (calculator.leftTail(logLikelihood, MAX_ITERATIONS, lequal, l)) - { - wt[0] = l; - minusLogJointCdf(modes, weightStyles, wt, weight, lb, ub); - sampleLowerBound += std::exp(std::min(-lb, -ub)); - sampleUpperBound += std::exp(std::max(-lb, -ub)); - } - else - { - wt[0] = l; - minusLogJointCdf(modes, weightStyles, wt, weight, lb, ub); - sampleUpperBound += std::exp(std::max(-lb, -ub)); - } - - double r; - CEqualWithTolerance requal(CToleranceTypes::E_AbsoluteTolerance, EPS * b); - if (calculator.rightTail(logLikelihood, MAX_ITERATIONS, requal, r)) - { - wt[0] = r; - minusLogJointCdfComplement(modes, weightStyles, wt, weight, lb, ub); - sampleLowerBound += std::exp(std::min(-lb, -ub)); - sampleUpperBound += std::exp(std::max(-lb, -ub)); - } - else - { - wt[0] = r; - minusLogJointCdfComplement(modes, weightStyles, wt, weight, lb, ub); - sampleUpperBound += std::exp(std::max(-lb, -ub)); - } - - double p = 0.0; - if (a < b) - { - p = calculator.calculate(logLikelihood, sampleLowerBound); - } - - LOG_TRACE("sampleLowerBound = " << sampleLowerBound - << ", sampleUpperBound = " << sampleUpperBound - << " p = " << p); - - lowerBoundCalculator.add(CTools::truncate(sampleLowerBound + p, 0.0, 1.0)); - upperBoundCalculator.add(CTools::truncate(sampleUpperBound + p, 0.0, 1.0)); - } + if (status & maths_t::E_FpOverflowed) { + lowerBound = upperBound = 0.0; + return true; + } + LOG_TRACE("x = " << x << ", f(x) = " << fx); - if ( !lowerBoundCalculator.calculate(lowerBound) - || !upperBoundCalculator.calculate(upperBound)) - { - LOG_ERROR("Couldn't compute probability of less likely samples:" - << " " << lowerBoundCalculator - << " " << upperBoundCalculator); - return false; - } - tail = static_cast(tail_); + CPrior::CLogMarginalLikelihood logLikelihood(prior, weightStyles, weight); + + CTools::CMixtureProbabilityOfLessLikelySample calculator(modes.size(), x, fx, a, b); + for (const auto& mode : modes) { + double w = mode.weight() / Z; + double centre = mode.s_Prior->marginalLikelihoodMode(weightStyles, weight[0]); + double spread = std::sqrt(mode.s_Prior->marginalLikelihoodVariance(weightStyles, weight[0])); + calculator.addMode(w, centre, spread); + tail_ = tail_ | (x < centre ? maths_t::E_LeftTail : maths_t::E_RightTail); } - break; - case maths_t::E_OneSidedAbove: - if (!minusLogJointCdfComplement(modes, weightStyles, samples, weights, upperBound, lowerBound)) - { - LOG_ERROR("Failed computing probability of less likely samples: " - << core::CContainerPrinter::print(samples)); - return false; + double sampleLowerBound = 0.0; + double sampleUpperBound = 0.0; + + double lb, ub; + + double l; + CEqualWithTolerance lequal(CToleranceTypes::E_AbsoluteTolerance, EPS * a); + if (calculator.leftTail(logLikelihood, MAX_ITERATIONS, lequal, l)) { + wt[0] = l; + minusLogJointCdf(modes, weightStyles, wt, weight, lb, ub); + sampleLowerBound += std::exp(std::min(-lb, -ub)); + sampleUpperBound += std::exp(std::max(-lb, -ub)); + } else { + wt[0] = l; + minusLogJointCdf(modes, weightStyles, wt, weight, lb, ub); + sampleUpperBound += std::exp(std::max(-lb, -ub)); } - lowerBound = std::exp(-lowerBound); - upperBound = std::exp(-upperBound); - tail = maths_t::E_RightTail; - break; - } - return true; - } + double r; + CEqualWithTolerance requal(CToleranceTypes::E_AbsoluteTolerance, EPS * b); + if (calculator.rightTail(logLikelihood, MAX_ITERATIONS, requal, r)) { + wt[0] = r; + minusLogJointCdfComplement(modes, weightStyles, wt, weight, lb, ub); + sampleLowerBound += std::exp(std::min(-lb, -ub)); + sampleUpperBound += std::exp(std::max(-lb, -ub)); + } else { + wt[0] = r; + minusLogJointCdfComplement(modes, weightStyles, wt, weight, lb, ub); + sampleUpperBound += std::exp(std::max(-lb, -ub)); + } - //! Check if this is a non-informative prior. - template - static bool isNonInformative(const std::vector > &modes) - { - return modes.empty() || (modes.size() == 1 && modes[0].s_Prior->isNonInformative()); - } + double p = 0.0; + if (a < b) { + p = calculator.calculate(logLikelihood, sampleLowerBound); + } - //! Get a human readable description of the prior. - template - static void print(const std::vector > &modes, - const std::string &indent, - std::string &result) - { - result += "\n" + indent + "multimodal"; - if (isNonInformative(modes)) - { - result += " non-informative"; - return; + LOG_TRACE("sampleLowerBound = " << sampleLowerBound << ", sampleUpperBound = " << sampleUpperBound << " p = " << p); + + lowerBoundCalculator.add(CTools::truncate(sampleLowerBound + p, 0.0, 1.0)); + upperBoundCalculator.add(CTools::truncate(sampleUpperBound + p, 0.0, 1.0)); } - double Z = 0.0; - for (std::size_t i = 0u; i < modes.size(); ++i) - { - Z += modes[i].weight(); + if (!lowerBoundCalculator.calculate(lowerBound) || !upperBoundCalculator.calculate(upperBound)) { + LOG_ERROR("Couldn't compute probability of less likely samples:" + << " " << lowerBoundCalculator << " " << upperBoundCalculator); + return false; } - result += ":"; - for (std::size_t i = 0u; i < modes.size(); ++i) - { - double weight = modes[i].weight() / Z; - std::string indent_ = indent + " weight " - + core::CStringUtils::typeToStringPretty(weight) + " "; - modes[i].s_Prior->print(indent_, result); + tail = static_cast(tail_); + } break; + + case maths_t::E_OneSidedAbove: + if (!minusLogJointCdfComplement(modes, weightStyles, samples, weights, upperBound, lowerBound)) { + LOG_ERROR("Failed computing probability of less likely samples: " << core::CContainerPrinter::print(samples)); + return false; } + lowerBound = std::exp(-lowerBound); + upperBound = std::exp(-upperBound); + tail = maths_t::E_RightTail; + break; } - private: - //! \brief Wrapper to call the -log(c.d.f) of a prior object. - class CMinusLogJointCdf - { - public: - template - bool operator()(const T &prior, - const maths_t::TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound) const - { - return prior->minusLogJointCdf(weightStyles, samples, weights, lowerBound, upperBound); - } - }; - - //! \brief Wrapper to call the log(1 - c.d.f) of a prior object. - class CMinusLogJointCdfComplement - { - public: - template - bool operator()(const T &prior, - const maths_t::TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound) const - { - return prior->minusLogJointCdfComplement(weightStyles, samples, weights, lowerBound, upperBound); - } - }; - - //! \brief Wrapper of CMultimodalPrior::minusLogJointCdf function - //! for use with our solver. - template - class CLogCdf - { - public: - using result_type = double; - - enum EStyle - { - E_Lower, - E_Upper, - E_Mean - }; - - public: - CLogCdf(EStyle style, - const PRIOR &prior, - const maths_t::TWeightStyleVec &weightStyles, - const TDouble4Vec &weights) : - m_Style(style), - m_Prior(&prior), - m_WeightStyles(&weightStyles), - m_Weights(1, weights), - m_X(1u, 0.0) - {} - - double operator()(double x) const - { - m_X[0] = x; - double lowerBound, upperBound; - if (!m_Prior->minusLogJointCdf(*m_WeightStyles, m_X, m_Weights, lowerBound, upperBound)) - { - throw std::runtime_error("Unable to compute c.d.f. at " - + core::CStringUtils::typeToString(x)); - } - switch (m_Style) - { - case E_Lower: return -lowerBound; - case E_Upper: return -upperBound; - case E_Mean: return -(lowerBound + upperBound) / 2.0; - } - return -(lowerBound + upperBound) / 2.0; - } + return true; + } + + //! Check if this is a non-informative prior. + template + static bool isNonInformative(const std::vector>& modes) { + return modes.empty() || (modes.size() == 1 && modes[0].s_Prior->isNonInformative()); + } + + //! Get a human readable description of the prior. + template + static void print(const std::vector>& modes, const std::string& indent, std::string& result) { + result += "\n" + indent + "multimodal"; + if (isNonInformative(modes)) { + result += " non-informative"; + return; + } - private: - EStyle m_Style; - const PRIOR *m_Prior; - const maths_t::TWeightStyleVec *m_WeightStyles; - TDouble4Vec1Vec m_Weights; - //! Avoids creating the vector argument to minusLogJointCdf - //! more than once. - mutable TDouble1Vec m_X; - }; + double Z = 0.0; + for (std::size_t i = 0u; i < modes.size(); ++i) { + Z += modes[i].weight(); + } + result += ":"; + for (std::size_t i = 0u; i < modes.size(); ++i) { + double weight = modes[i].weight() / Z; + std::string indent_ = indent + " weight " + core::CStringUtils::typeToStringPretty(weight) + " "; + modes[i].s_Prior->print(indent_, result); + } + } - private: - //! Implementation of log of the joint c.d.f. of the marginal - //! likelihood. - template - static bool minusLogJointCdf(const std::vector > &modes, - CDF minusLogCdf, - const maths_t::TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound) - { - lowerBound = upperBound = 0.0; - - if (samples.empty()) - { - LOG_ERROR("Can't compute c.d.f. for empty sample set"); - return false; - } +private: + //! \brief Wrapper to call the -log(c.d.f) of a prior object. + class CMinusLogJointCdf { + public: + template + bool operator()(const T& prior, + const maths_t::TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound) const { + return prior->minusLogJointCdf(weightStyles, samples, weights, lowerBound, upperBound); + } + }; - if (modes.size() == 1) - { - return minusLogCdf(modes[0].s_Prior, weightStyles, samples, weights, lowerBound, upperBound); - } + //! \brief Wrapper to call the log(1 - c.d.f) of a prior object. + class CMinusLogJointCdfComplement { + public: + template + bool operator()(const T& prior, + const maths_t::TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound) const { + return prior->minusLogJointCdfComplement(weightStyles, samples, weights, lowerBound, upperBound); + } + }; - using TMinAccumulator = CBasicStatistics::COrderStatisticsStack; + //! \brief Wrapper of CMultimodalPrior::minusLogJointCdf function + //! for use with our solver. + template + class CLogCdf { + public: + using result_type = double; - // The c.d.f. of the marginal likelihood is the weighted sum - // of the c.d.fs of each mode since: - // cdf(x) = Integral{ L(u) }du - // = Integral{ Sum_m{ L(u | m) p(m) } }du - // = Sum_m{ Integral{ L(u | m) ) p(m) }du } + enum EStyle { E_Lower, E_Upper, E_Mean }; - // Declared outside the loop to minimize the number of times - // they are created. - TDouble1Vec sample(1); - TDouble4Vec1Vec weight(1, TDouble4Vec(1, 1.0)); - TDouble4Vec modeLowerBounds; - TDouble4Vec modeUpperBounds; - modeLowerBounds.reserve(modes.size()); - modeUpperBounds.reserve(modes.size()); - - try - { - double mean = maths_t::hasSeasonalVarianceScale(weightStyles, weights) ? - marginalLikelihoodMean(modes) : 0.0; - - for (std::size_t i = 0; i < samples.size(); ++i) - { - double n = maths_t::count(weightStyles, weights[i]); - double seasonalScale = std::sqrt(maths_t::seasonalVarianceScale(weightStyles, weights[i])); - double countVarianceScale = maths_t::countVarianceScale(weightStyles, weights[i]); - - if (isNonInformative(modes)) - { - lowerBound -= n * std::log(CTools::IMPROPER_CDF); - upperBound -= n * std::log(CTools::IMPROPER_CDF); - continue; - } + public: + CLogCdf(EStyle style, const PRIOR& prior, const maths_t::TWeightStyleVec& weightStyles, const TDouble4Vec& weights) + : m_Style(style), m_Prior(&prior), m_WeightStyles(&weightStyles), m_Weights(1, weights), m_X(1u, 0.0) {} + + double operator()(double x) const { + m_X[0] = x; + double lowerBound, upperBound; + if (!m_Prior->minusLogJointCdf(*m_WeightStyles, m_X, m_Weights, lowerBound, upperBound)) { + throw std::runtime_error("Unable to compute c.d.f. at " + core::CStringUtils::typeToString(x)); + } + switch (m_Style) { + case E_Lower: + return -lowerBound; + case E_Upper: + return -upperBound; + case E_Mean: + return -(lowerBound + upperBound) / 2.0; + } + return -(lowerBound + upperBound) / 2.0; + } - sample[0] = seasonalScale != 1.0 ? mean + (samples[i] - mean) / seasonalScale : samples[i]; - weight[0][0] = countVarianceScale; - - // We re-normalize so that the maximum log c.d.f. is one - // to avoid underflow. - TMinAccumulator minLowerBound; - TMinAccumulator minUpperBound; - modeLowerBounds.clear(); - modeUpperBounds.clear(); - - for (std::size_t j = 0u; j < modes.size(); ++j) - { - double modeLowerBound; - double modeUpperBound; - if (!minusLogCdf(modes[j].s_Prior, - TWeights::COUNT_VARIANCE, - sample, weight, - modeLowerBound, modeUpperBound)) - { - LOG_ERROR("Unable to compute c.d.f. for " - << core::CContainerPrinter::print(samples)); - return false; - } - minLowerBound.add(modeLowerBound); - minUpperBound.add(modeUpperBound); - modeLowerBounds.push_back(modeLowerBound); - modeUpperBounds.push_back(modeUpperBound); - } + private: + EStyle m_Style; + const PRIOR* m_Prior; + const maths_t::TWeightStyleVec* m_WeightStyles; + TDouble4Vec1Vec m_Weights; + //! Avoids creating the vector argument to minusLogJointCdf + //! more than once. + mutable TDouble1Vec m_X; + }; + +private: + //! Implementation of log of the joint c.d.f. of the marginal + //! likelihood. + template + static bool minusLogJointCdf(const std::vector>& modes, + CDF minusLogCdf, + const maths_t::TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound) { + lowerBound = upperBound = 0.0; + + if (samples.empty()) { + LOG_ERROR("Can't compute c.d.f. for empty sample set"); + return false; + } - TMeanAccumulator sampleLowerBound; - TMeanAccumulator sampleUpperBound; - - for (std::size_t j = 0u; j < modes.size(); ++j) - { - LOG_TRACE("Mode -log(c.d.f.) = [" << modeLowerBounds[j] - << "," << modeUpperBounds[j] << "]"); - double w = modes[j].weight(); - // Divide through by the largest value to avoid underflow. - // Remember we are working with minus logs so the largest - // value corresponds to the smallest log. - sampleLowerBound.add(std::exp(-(modeLowerBounds[j] - minLowerBound[0])), w); - sampleUpperBound.add(std::exp(-(modeUpperBounds[j] - minUpperBound[0])), w); - } + if (modes.size() == 1) { + return minusLogCdf(modes[0].s_Prior, weightStyles, samples, weights, lowerBound, upperBound); + } - lowerBound += n * std::max(minLowerBound[0] - std::log(CBasicStatistics::mean(sampleLowerBound)), 0.0); - upperBound += n * std::max(minUpperBound[0] - std::log(CBasicStatistics::mean(sampleUpperBound)), 0.0); + using TMinAccumulator = CBasicStatistics::COrderStatisticsStack; + + // The c.d.f. of the marginal likelihood is the weighted sum + // of the c.d.fs of each mode since: + // cdf(x) = Integral{ L(u) }du + // = Integral{ Sum_m{ L(u | m) p(m) } }du + // = Sum_m{ Integral{ L(u | m) ) p(m) }du } + + // Declared outside the loop to minimize the number of times + // they are created. + TDouble1Vec sample(1); + TDouble4Vec1Vec weight(1, TDouble4Vec(1, 1.0)); + TDouble4Vec modeLowerBounds; + TDouble4Vec modeUpperBounds; + modeLowerBounds.reserve(modes.size()); + modeUpperBounds.reserve(modes.size()); + + try { + double mean = maths_t::hasSeasonalVarianceScale(weightStyles, weights) ? marginalLikelihoodMean(modes) : 0.0; + + for (std::size_t i = 0; i < samples.size(); ++i) { + double n = maths_t::count(weightStyles, weights[i]); + double seasonalScale = std::sqrt(maths_t::seasonalVarianceScale(weightStyles, weights[i])); + double countVarianceScale = maths_t::countVarianceScale(weightStyles, weights[i]); + + if (isNonInformative(modes)) { + lowerBound -= n * std::log(CTools::IMPROPER_CDF); + upperBound -= n * std::log(CTools::IMPROPER_CDF); + continue; + } - LOG_TRACE("sample = " << core::CContainerPrinter::print(sample) - << ", sample -log(c.d.f.) = [" - << sampleLowerBound << "," << sampleUpperBound << "]"); + sample[0] = seasonalScale != 1.0 ? mean + (samples[i] - mean) / seasonalScale : samples[i]; + weight[0][0] = countVarianceScale; + + // We re-normalize so that the maximum log c.d.f. is one + // to avoid underflow. + TMinAccumulator minLowerBound; + TMinAccumulator minUpperBound; + modeLowerBounds.clear(); + modeUpperBounds.clear(); + + for (std::size_t j = 0u; j < modes.size(); ++j) { + double modeLowerBound; + double modeUpperBound; + if (!minusLogCdf(modes[j].s_Prior, TWeights::COUNT_VARIANCE, sample, weight, modeLowerBound, modeUpperBound)) { + LOG_ERROR("Unable to compute c.d.f. for " << core::CContainerPrinter::print(samples)); + return false; + } + minLowerBound.add(modeLowerBound); + minUpperBound.add(modeUpperBound); + modeLowerBounds.push_back(modeLowerBound); + modeUpperBounds.push_back(modeUpperBound); } - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to calculate c.d.f.: " << e.what()); - return false; - } - LOG_TRACE("Joint -log(c.d.f.) = [" << lowerBound << "," << upperBound << "]"); + TMeanAccumulator sampleLowerBound; + TMeanAccumulator sampleUpperBound; + + for (std::size_t j = 0u; j < modes.size(); ++j) { + LOG_TRACE("Mode -log(c.d.f.) = [" << modeLowerBounds[j] << "," << modeUpperBounds[j] << "]"); + double w = modes[j].weight(); + // Divide through by the largest value to avoid underflow. + // Remember we are working with minus logs so the largest + // value corresponds to the smallest log. + sampleLowerBound.add(std::exp(-(modeLowerBounds[j] - minLowerBound[0])), w); + sampleUpperBound.add(std::exp(-(modeUpperBounds[j] - minUpperBound[0])), w); + } - return true; + lowerBound += n * std::max(minLowerBound[0] - std::log(CBasicStatistics::mean(sampleLowerBound)), 0.0); + upperBound += n * std::max(minUpperBound[0] - std::log(CBasicStatistics::mean(sampleUpperBound)), 0.0); + + LOG_TRACE("sample = " << core::CContainerPrinter::print(sample) << ", sample -log(c.d.f.) = [" << sampleLowerBound << "," + << sampleUpperBound << "]"); + } + } catch (const std::exception& e) { + LOG_ERROR("Failed to calculate c.d.f.: " << e.what()); + return false; } -}; + LOG_TRACE("Joint -log(c.d.f.) = [" << lowerBound << "," << upperBound << "]"); + return true; + } +}; } } diff --git a/include/maths/CMultinomialConjugate.h b/include/maths/CMultinomialConjugate.h index 1033b864a0..f6ef1e228e 100644 --- a/include/maths/CMultinomialConjugate.h +++ b/include/maths/CMultinomialConjugate.h @@ -13,15 +13,12 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace maths -{ +namespace maths { struct SDistributionRestoreParams; //! \brief A conjugate prior distribution for a multinomial variable. @@ -44,364 +41,352 @@ struct SDistributionRestoreParams; //! the data when using one-of-n composition (see COneOfNPrior) or model data with //! multiple modes when using multi-modal composition (see CMultimodalPrior). //! From a design point of view this is the composite pattern. -class MATHS_EXPORT CMultinomialConjugate : public CPrior -{ - public: - using TEqualWithTolerance = CEqualWithTolerance; - - //! Lift the overloads of addSamples into scope. - using CPrior::addSamples; - //! Lift the overloads of print into scope. - using CPrior::print; - - public: - //! \name Life-Cycle - //@{ - //! Construct an arbitrarily initialised object, suitable only for - //! assigning to or swapping with a valid one. - CMultinomialConjugate(); - - CMultinomialConjugate(std::size_t maximumNumberOfCategories, - const TDoubleVec &categories, - const TDoubleVec &concentrationParameters, - double decayRate = 0.0); - - //! Construct from part of an state document. - CMultinomialConjugate(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser); - - // Default copy constructor and assignment operator work. - - //! Efficient swap of the contents of this prior and \p other. - void swap(CMultinomialConjugate &other); - - //! Create an instance of a non-informative prior. - //! - //! \param[in] maximumNumberOfCategories The number of categories in the likelihood function. - //! \param[in] decayRate The rate at which to revert to the non-informative prior. - //! \return A non-informative prior. - static CMultinomialConjugate nonInformativePrior(std::size_t maximumNumberOfCategories, - double decayRate = 0.0); - //@} - - //! \name Prior Contract - //@{ - //! Get the type of this prior. - virtual EPrior type() const; - - //! Create a copy of the prior. - //! - //! \return A pointer to a newly allocated clone of this prior. - //! \warning The caller owns the object returned. - virtual CMultinomialConjugate *clone() const; - - //! Reset the prior to non-informative. - virtual void setToNonInformative(double offset = 0.0, double decayRate = 0.0); - - //! Returns false. - virtual bool needsOffset() const; - - //! No-op. - virtual double adjustOffset(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights); - - //! Returns zero. - virtual double offset() const; - - //! Update the prior with a collection of independent samples from the - //! multinomial variable. - //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. - //! \param[in] samples A collection of samples of the variable. - //! \param[in] weights The weights of each sample in \p samples. - virtual void addSamples(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights); - - //! Propagate the prior density function forwards by \p time. - //! - //! The prior distribution relaxes back to non-informative at a rate - //! controlled by the decay rate parameter (optionally supplied to the - //! constructor). - //! - //! \param[in] time The time increment to apply. - //! \note \p time must be non negative. - virtual void propagateForwardsByTime(double time); - - //! Get the support for the marginal likelihood function. - virtual TDoubleDoublePr marginalLikelihoodSupport() const; - - //! Get the mean of the marginal likelihood function. - virtual double marginalLikelihoodMean() const; - - //! Get the mode of the marginal likelihood function. - virtual double marginalLikelihoodMode(const TWeightStyleVec &weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec &weights = TWeights::UNIT) const; - - //! Get the variance of the marginal likelihood. - virtual double marginalLikelihoodVariance(const TWeightStyleVec &weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec &weights = TWeights::UNIT) const; - - //! Get the \p percentage symmetric confidence interval for the marginal - //! likelihood function, i.e. the values \f$a\f$ and \f$b\f$ such that: - //!
-        //!   \f$min_{a,b}\{P([a,m]) + P([m,b])\} = p / 100\f$
-        //! 
- //! - //! where \f$m\f$ is the median of the distribution and \f$p\f$ is the - //! the percentage of interest \p percentage. Note that since the - //! distribution is discrete we can only approximate the probability. - //! - //! \param[in] percentage The percentage of interest. - //! \param[in] weightStyles Ignored. - //! \param[in] weights Ignored. - //! \note \p percentage should be in the range [0.0, 100.0). - virtual TDoubleDoublePr - marginalLikelihoodConfidenceInterval(double percentage, - const TWeightStyleVec &weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec &weights = TWeights::UNIT) const; - - //! Compute the log marginal likelihood function at \p samples integrating - //! over the prior density function for the category probability parameters. - //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. - //! \param[in] samples A collection of samples of the variable. - //! \param[in] weights The weights of each sample in \p samples. - //! \param[out] result Filled in with the log likelihood of \p samples. - //! Note that if the model has overflowed then this is really a lower - //! bound, but in this case we want the model to die off gracefully from - //! the model collection, so this is appropriate. - //! \note The samples are assumed to be independent and identically - //! distributed. - virtual maths_t::EFloatingPointErrorStatus - jointLogMarginalLikelihood(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &result) const; - - //! Sample the marginal likelihood function. - //! - //! This samples each category in proportion to its probability. Since - //! each category can only be sampled an integer number of times we - //! find the sampling which minimizes the error from the ideal sampling. - //! - //! \param[in] numberSamples The number of samples required. - //! \param[out] samples Filled in with samples from the prior. - //! \note \p numberSamples is truncated to the number of samples received. - virtual void sampleMarginalLikelihood(std::size_t numberSamples, - TDouble1Vec &samples) const; - - //! Compute minus the log of the joint cumulative density function - //! of the marginal likelihood at \p samples. - //! - //! \param[in] weightStyles Controls the interpretation of the weights - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. Note variance scales are ignored. - //! \param[in] samples The samples of interest. - //! \param[in] weights The weights of each sample in \p samples. - //! \param[out] lowerBound If the model has not overflowed this is - //! filled in with \f$-\log(\prod_i{F(x_i)})\f$ where \f$F(.)\f$ is - //! the c.d.f. and \f$\{x_i\}\f$ are the samples. Otherwise, it is - //! filled in with a sharp lower bound. - //! \param[out] upperBound If the model has not overflowed this is - //! filled in with \f$-\log(\prod_i{F(x_i)})\f$ where \f$F(.)\f$ is - //! the c.d.f. and \f$\{x_i\}\f$ are the samples. Otherwise, it is - //! filled in with a sharp upper bound. - //! \note The samples are assumed to be independent. - virtual bool minusLogJointCdf(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound) const; - - //! Compute minus the log of the one minus the joint cumulative density - //! function of the marginal likelihood at \p samples without losing - //! precision due to cancellation errors at one, i.e. the smallest - //! non-zero value this can return is the minimum double rather than - //! epsilon. - //! - //! \see minusLogJointCdf for more details. - virtual bool minusLogJointCdfComplement(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound) const; - - //! Compute the probability of a less likely, i.e. lower likelihood, - //! collection of independent samples from the variable. - //! - //! \param[in] calculation The style of the probability calculation - //! (see model_t::EProbabilityCalculation for details). - //! \param[in] weightStyles Controls the interpretation of the weights - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. Note variance scales are ignored. - //! \param[in] samples The samples of interest. - //! \param[in] weights The weights. See minusLogJointCdf for discussion. - //! \param[out] lowerBound If the model has not overflowed this is filled - //! in with the probability of the set for which the joint marginal - //! likelihood is less than that of \p samples (subject to the measure - //! \p calculation). Otherwise, it is filled in with a sharp lower bound. - //! \param[out] upperBound If the model has not overflowed this is filled - //! in with the probability of the set for which the joint marginal - //! likelihood is less than that of \p samples (subject to the measure - //! \p calculation). Otherwise, it is filled in with an upper bound. - //! \param[out] tail The tail that (left or right) that all the samples - //! are in or neither. - //! \note The samples are assumed to be independent. - virtual bool probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound, - maths_t::ETail &tail) const; - - //! Check if this is a non-informative prior. - virtual bool isNonInformative() const; - - //! Get a human readable description of the prior. - //! - //! \param[in] indent The indent to use at the start of new lines. - //! \param[in,out] result Filled in with the description. - virtual void print(const std::string &indent, std::string &result) const; - - //! Print the marginal likelihood function in a specified format. - //! - //! \see CPrior::printMarginalLikelihoodFunction for details. - virtual std::string printMarginalLikelihoodFunction(double weight = 1.0) const; - - //! Print the prior density function in a specified format. - //! - //! \see CPrior::printJointDensityFunction for details. - virtual std::string printJointDensityFunction() const; - - //! Get a checksum for this object. - virtual uint64_t checksum(uint64_t seed = 0) const; - - //! Get the memory used by this component - virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - - //! Get the memory used by this component - virtual std::size_t memoryUsage() const; - - //! Get the static size of this object - used for virtual hierarchies - virtual std::size_t staticSize() const; - - //! Persist state by passing information to the supplied inserter - virtual void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - //@} - - //! Remove the categories in \p categoriesToRemove. - void removeCategories(TDoubleVec categoriesToRemove); - - //! Get the index of \p category in the categories vector if it is a - //! valid category for this prior. - //! - //! \param[in] category The category label. - //! \param[out] result Set to the index of \p category in categories - //! if they exist and maximum size_t otherwise. - bool index(double category, std::size_t &result) const; - - //! Get the categories. - const TDoubleVec &categories() const; - - //! Get the concentrations. - const TDoubleVec &concentrations() const; - - //! Get the concentration for a specified category - bool concentration(double category, double &result) const; - - //! Get the total concetration for a specified category - double totalConcentration() const; - - //! Get the expected probability of \p category if it exists. - //! - //! \note The marginal likelihood function of a single sample is - //! multinomial with probabilities equal to the expected values of - //! each probability parameter in the Dirichlet prior. - bool probability(double category, double &result) const; - - //! Get the expected probabilities for each category. - //! - //! \note The marginal likelihood function of a single sample is - //! multinomial with probabilities equal to the expected values of - //! each probability parameter in the Dirichlet prior. - TDoubleVec probabilities() const; - - //! Compute upper and lower bounds for the collection of probabilities: - //!
-        //!   \f$P_i = P(\{c : L(c) <= L(c_i)\})\f$
-        //! 
- //! - //! for all categories \f$c_i\f$. - //! \param[in] calculation The style of the probability calculation (see - //! CTools::EProbabilityCalculation for details). - //! \param[out] lowerBounds If the model has not overflowed this is filled - //! in with the probabilities (subject to the measure \p calculation). - //! Otherwise, it is filled in with a sharp lower bound. - //! \param[out] upperBounds If the model has not overflowed this is filled - //! in with the probability of the set (subject to the measure \p calculation). - //! Otherwise, it is filled in an upper bound. - void probabilitiesOfLessLikelyCategories(maths_t::EProbabilityCalculation calculation, - TDoubleVec &lowerBounds, - TDoubleVec &upperBounds) const; - - //! \name Test Functions - //@{ - //! Compute the specified percentage confidence intervals for the - //! category probabilities. - //! - //! The marginal distribution of the i'th probability is beta distributed. - //! In particular, the i'th probability marginal density function is:\n - //!
-        //!   \f$\displaystyle f(p_i) = \frac{\Gamma(a_0)}{\Gamma(a_0 - a_i)\Gamma(a_i)}(1 - p_i)^{a_0-a_i-1}p_i^{a_i-1}\f$
-        //! 
- //! - //! where,\n - //! \f$\displaystyle a_0 = \sum_i{a_i}\f$,\n - //! \f$\{a_i\}\f$ are the Dirichlet prior concentrations. - TDoubleDoublePrVec confidenceIntervalProbabilities(double percentage) const; - - //! Check if two priors are equal to the specified tolerance. - bool equalTolerance(const CMultinomialConjugate &rhs, - const TEqualWithTolerance &equal) const; - //@} - - private: - //! Read parameters from \p traverser. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); - - //! Shrinks vectors so that we don't use more memory than we need. - //! Typically vector implements a doubling policy when growing the - //! buffer, which means that the buffers can end up twice as large - //! as we need. This shrinks the capacity based on the number of - //! available categories remaining. - void shrink(); - - private: - //! The sum of the concentration parameters of a non-informative prior. - static const double NON_INFORMATIVE_CONCENTRATION; - - //! Set to true if we overflow the permitted number of categories. - int m_NumberAvailableCategories; - - //! The category values. - TDoubleVec m_Categories; - - //! The concentration parameters of the Dirichlet prior. - TDoubleVec m_Concentrations; - - //! The total concentration. Note that if we have observed more - //! categories than we were permitted this is not equal to the - //! sum of the concentration parameters. - double m_TotalConcentration; +class MATHS_EXPORT CMultinomialConjugate : public CPrior { +public: + using TEqualWithTolerance = CEqualWithTolerance; + + //! Lift the overloads of addSamples into scope. + using CPrior::addSamples; + //! Lift the overloads of print into scope. + using CPrior::print; + +public: + //! \name Life-Cycle + //@{ + //! Construct an arbitrarily initialised object, suitable only for + //! assigning to or swapping with a valid one. + CMultinomialConjugate(); + + CMultinomialConjugate(std::size_t maximumNumberOfCategories, + const TDoubleVec& categories, + const TDoubleVec& concentrationParameters, + double decayRate = 0.0); + + //! Construct from part of an state document. + CMultinomialConjugate(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser); + + // Default copy constructor and assignment operator work. + + //! Efficient swap of the contents of this prior and \p other. + void swap(CMultinomialConjugate& other); + + //! Create an instance of a non-informative prior. + //! + //! \param[in] maximumNumberOfCategories The number of categories in the likelihood function. + //! \param[in] decayRate The rate at which to revert to the non-informative prior. + //! \return A non-informative prior. + static CMultinomialConjugate nonInformativePrior(std::size_t maximumNumberOfCategories, double decayRate = 0.0); + //@} + + //! \name Prior Contract + //@{ + //! Get the type of this prior. + virtual EPrior type() const; + + //! Create a copy of the prior. + //! + //! \return A pointer to a newly allocated clone of this prior. + //! \warning The caller owns the object returned. + virtual CMultinomialConjugate* clone() const; + + //! Reset the prior to non-informative. + virtual void setToNonInformative(double offset = 0.0, double decayRate = 0.0); + + //! Returns false. + virtual bool needsOffset() const; + + //! No-op. + virtual double adjustOffset(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights); + + //! Returns zero. + virtual double offset() const; + + //! Update the prior with a collection of independent samples from the + //! multinomial variable. + //! + //! \param[in] weightStyles Controls the interpretation of the weight(s) + //! that are associated with each sample. See maths_t::ESampleWeightStyle + //! for more details. + //! \param[in] samples A collection of samples of the variable. + //! \param[in] weights The weights of each sample in \p samples. + virtual void addSamples(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights); + + //! Propagate the prior density function forwards by \p time. + //! + //! The prior distribution relaxes back to non-informative at a rate + //! controlled by the decay rate parameter (optionally supplied to the + //! constructor). + //! + //! \param[in] time The time increment to apply. + //! \note \p time must be non negative. + virtual void propagateForwardsByTime(double time); + + //! Get the support for the marginal likelihood function. + virtual TDoubleDoublePr marginalLikelihoodSupport() const; + + //! Get the mean of the marginal likelihood function. + virtual double marginalLikelihoodMean() const; + + //! Get the mode of the marginal likelihood function. + virtual double marginalLikelihoodMode(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; + + //! Get the variance of the marginal likelihood. + virtual double marginalLikelihoodVariance(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; + + //! Get the \p percentage symmetric confidence interval for the marginal + //! likelihood function, i.e. the values \f$a\f$ and \f$b\f$ such that: + //!
+    //!   \f$min_{a,b}\{P([a,m]) + P([m,b])\} = p / 100\f$
+    //! 
+ //! + //! where \f$m\f$ is the median of the distribution and \f$p\f$ is the + //! the percentage of interest \p percentage. Note that since the + //! distribution is discrete we can only approximate the probability. + //! + //! \param[in] percentage The percentage of interest. + //! \param[in] weightStyles Ignored. + //! \param[in] weights Ignored. + //! \note \p percentage should be in the range [0.0, 100.0). + virtual TDoubleDoublePr marginalLikelihoodConfidenceInterval(double percentage, + const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; + + //! Compute the log marginal likelihood function at \p samples integrating + //! over the prior density function for the category probability parameters. + //! + //! \param[in] weightStyles Controls the interpretation of the weight(s) + //! that are associated with each sample. See maths_t::ESampleWeightStyle + //! for more details. + //! \param[in] samples A collection of samples of the variable. + //! \param[in] weights The weights of each sample in \p samples. + //! \param[out] result Filled in with the log likelihood of \p samples. + //! Note that if the model has overflowed then this is really a lower + //! bound, but in this case we want the model to die off gracefully from + //! the model collection, so this is appropriate. + //! \note The samples are assumed to be independent and identically + //! distributed. + virtual maths_t::EFloatingPointErrorStatus jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& result) const; + + //! Sample the marginal likelihood function. + //! + //! This samples each category in proportion to its probability. Since + //! each category can only be sampled an integer number of times we + //! find the sampling which minimizes the error from the ideal sampling. + //! + //! \param[in] numberSamples The number of samples required. + //! \param[out] samples Filled in with samples from the prior. + //! \note \p numberSamples is truncated to the number of samples received. + virtual void sampleMarginalLikelihood(std::size_t numberSamples, TDouble1Vec& samples) const; + + //! Compute minus the log of the joint cumulative density function + //! of the marginal likelihood at \p samples. + //! + //! \param[in] weightStyles Controls the interpretation of the weights + //! that are associated with each sample. See maths_t::ESampleWeightStyle + //! for more details. Note variance scales are ignored. + //! \param[in] samples The samples of interest. + //! \param[in] weights The weights of each sample in \p samples. + //! \param[out] lowerBound If the model has not overflowed this is + //! filled in with \f$-\log(\prod_i{F(x_i)})\f$ where \f$F(.)\f$ is + //! the c.d.f. and \f$\{x_i\}\f$ are the samples. Otherwise, it is + //! filled in with a sharp lower bound. + //! \param[out] upperBound If the model has not overflowed this is + //! filled in with \f$-\log(\prod_i{F(x_i)})\f$ where \f$F(.)\f$ is + //! the c.d.f. and \f$\{x_i\}\f$ are the samples. Otherwise, it is + //! filled in with a sharp upper bound. + //! \note The samples are assumed to be independent. + virtual bool minusLogJointCdf(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound) const; + + //! Compute minus the log of the one minus the joint cumulative density + //! function of the marginal likelihood at \p samples without losing + //! precision due to cancellation errors at one, i.e. the smallest + //! non-zero value this can return is the minimum double rather than + //! epsilon. + //! + //! \see minusLogJointCdf for more details. + virtual bool minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound) const; + + //! Compute the probability of a less likely, i.e. lower likelihood, + //! collection of independent samples from the variable. + //! + //! \param[in] calculation The style of the probability calculation + //! (see model_t::EProbabilityCalculation for details). + //! \param[in] weightStyles Controls the interpretation of the weights + //! that are associated with each sample. See maths_t::ESampleWeightStyle + //! for more details. Note variance scales are ignored. + //! \param[in] samples The samples of interest. + //! \param[in] weights The weights. See minusLogJointCdf for discussion. + //! \param[out] lowerBound If the model has not overflowed this is filled + //! in with the probability of the set for which the joint marginal + //! likelihood is less than that of \p samples (subject to the measure + //! \p calculation). Otherwise, it is filled in with a sharp lower bound. + //! \param[out] upperBound If the model has not overflowed this is filled + //! in with the probability of the set for which the joint marginal + //! likelihood is less than that of \p samples (subject to the measure + //! \p calculation). Otherwise, it is filled in with an upper bound. + //! \param[out] tail The tail that (left or right) that all the samples + //! are in or neither. + //! \note The samples are assumed to be independent. + virtual bool probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, + const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound, + maths_t::ETail& tail) const; + + //! Check if this is a non-informative prior. + virtual bool isNonInformative() const; + + //! Get a human readable description of the prior. + //! + //! \param[in] indent The indent to use at the start of new lines. + //! \param[in,out] result Filled in with the description. + virtual void print(const std::string& indent, std::string& result) const; + + //! Print the marginal likelihood function in a specified format. + //! + //! \see CPrior::printMarginalLikelihoodFunction for details. + virtual std::string printMarginalLikelihoodFunction(double weight = 1.0) const; + + //! Print the prior density function in a specified format. + //! + //! \see CPrior::printJointDensityFunction for details. + virtual std::string printJointDensityFunction() const; + + //! Get a checksum for this object. + virtual uint64_t checksum(uint64_t seed = 0) const; + + //! Get the memory used by this component + virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + + //! Get the memory used by this component + virtual std::size_t memoryUsage() const; + + //! Get the static size of this object - used for virtual hierarchies + virtual std::size_t staticSize() const; + + //! Persist state by passing information to the supplied inserter + virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + //@} + + //! Remove the categories in \p categoriesToRemove. + void removeCategories(TDoubleVec categoriesToRemove); + + //! Get the index of \p category in the categories vector if it is a + //! valid category for this prior. + //! + //! \param[in] category The category label. + //! \param[out] result Set to the index of \p category in categories + //! if they exist and maximum size_t otherwise. + bool index(double category, std::size_t& result) const; + + //! Get the categories. + const TDoubleVec& categories() const; + + //! Get the concentrations. + const TDoubleVec& concentrations() const; + + //! Get the concentration for a specified category + bool concentration(double category, double& result) const; + + //! Get the total concetration for a specified category + double totalConcentration() const; + + //! Get the expected probability of \p category if it exists. + //! + //! \note The marginal likelihood function of a single sample is + //! multinomial with probabilities equal to the expected values of + //! each probability parameter in the Dirichlet prior. + bool probability(double category, double& result) const; + + //! Get the expected probabilities for each category. + //! + //! \note The marginal likelihood function of a single sample is + //! multinomial with probabilities equal to the expected values of + //! each probability parameter in the Dirichlet prior. + TDoubleVec probabilities() const; + + //! Compute upper and lower bounds for the collection of probabilities: + //!
+    //!   \f$P_i = P(\{c : L(c) <= L(c_i)\})\f$
+    //! 
+ //! + //! for all categories \f$c_i\f$. + //! \param[in] calculation The style of the probability calculation (see + //! CTools::EProbabilityCalculation for details). + //! \param[out] lowerBounds If the model has not overflowed this is filled + //! in with the probabilities (subject to the measure \p calculation). + //! Otherwise, it is filled in with a sharp lower bound. + //! \param[out] upperBounds If the model has not overflowed this is filled + //! in with the probability of the set (subject to the measure \p calculation). + //! Otherwise, it is filled in an upper bound. + void probabilitiesOfLessLikelyCategories(maths_t::EProbabilityCalculation calculation, + TDoubleVec& lowerBounds, + TDoubleVec& upperBounds) const; + + //! \name Test Functions + //@{ + //! Compute the specified percentage confidence intervals for the + //! category probabilities. + //! + //! The marginal distribution of the i'th probability is beta distributed. + //! In particular, the i'th probability marginal density function is:\n + //!
+    //!   \f$\displaystyle f(p_i) = \frac{\Gamma(a_0)}{\Gamma(a_0 - a_i)\Gamma(a_i)}(1 - p_i)^{a_0-a_i-1}p_i^{a_i-1}\f$
+    //! 
+ //! + //! where,\n + //! \f$\displaystyle a_0 = \sum_i{a_i}\f$,\n + //! \f$\{a_i\}\f$ are the Dirichlet prior concentrations. + TDoubleDoublePrVec confidenceIntervalProbabilities(double percentage) const; + + //! Check if two priors are equal to the specified tolerance. + bool equalTolerance(const CMultinomialConjugate& rhs, const TEqualWithTolerance& equal) const; + //@} + +private: + //! Read parameters from \p traverser. + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); + + //! Shrinks vectors so that we don't use more memory than we need. + //! Typically vector implements a doubling policy when growing the + //! buffer, which means that the buffers can end up twice as large + //! as we need. This shrinks the capacity based on the number of + //! available categories remaining. + void shrink(); + +private: + //! The sum of the concentration parameters of a non-informative prior. + static const double NON_INFORMATIVE_CONCENTRATION; + + //! Set to true if we overflow the permitted number of categories. + int m_NumberAvailableCategories; + + //! The category values. + TDoubleVec m_Categories; + + //! The concentration parameters of the Dirichlet prior. + TDoubleVec m_Concentrations; + + //! The total concentration. Note that if we have observed more + //! categories than we were permitted this is not equal to the + //! sum of the concentration parameters. + double m_TotalConcentration; }; - } } diff --git a/include/maths/CMultivariateConstantPrior.h b/include/maths/CMultivariateConstantPrior.h index d3df63d990..2558731c5a 100644 --- a/include/maths/CMultivariateConstantPrior.h +++ b/include/maths/CMultivariateConstantPrior.h @@ -15,15 +15,12 @@ #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace maths -{ +namespace maths { //! \brief A very lightweight prior for representing data for which //! expect a single value. @@ -33,133 +30,120 @@ namespace maths //! only ever takes a single value. This is useful for modeling data //! features such as the value of an indicator function in a consistent //! manner to all other types of data. -class MATHS_EXPORT CMultivariateConstantPrior : public CMultivariatePrior -{ - public: - using TOptionalDouble10Vec = boost::optional; - - // Lift all overloads of into scope. - //{ - using CMultivariatePrior::addSamples; - using CMultivariatePrior::print; - //} - - public: - //! \name Life-Cycle - //@{ - CMultivariateConstantPrior(std::size_t dimension, - const TOptionalDouble10Vec &constant = TOptionalDouble10Vec()); - - //! Construct by traversing a state document. - CMultivariateConstantPrior(std::size_t dimension, - core::CStateRestoreTraverser &traverser); - //@} - - //! \name Prior Contract - //@{ - //! Create a copy of the prior. - //! - //! \warning Caller owns returned object. - virtual CMultivariateConstantPrior *clone() const; - - //! Get the dimension of the prior. - virtual std::size_t dimension() const; - - //! Reset the prior to non-informative. - virtual void setToNonInformative(double offset = 0.0, double decayRate = 0.0); - - //! No-op. - virtual void adjustOffset(const TWeightStyleVec &weightStyle, - const TDouble10Vec1Vec &samples, - const TDouble10Vec4Vec1Vec &weights); - - //! Set the constant if it hasn't been set. - virtual void addSamples(const TWeightStyleVec &weightStyle, - const TDouble10Vec1Vec &samples, - const TDouble10Vec4Vec1Vec &weights); - - //! No-op. - virtual void propagateForwardsByTime(double time); - - //! Get the corresponding constant univariate prior. - virtual TUnivariatePriorPtrDoublePr univariate(const TSize10Vec &marginalize, - const TSizeDoublePr10Vec &condition) const; - - //! Compute the bivariate const bivariate prior. - virtual TPriorPtrDoublePr bivariate(const TSize10Vec &marginalize, - const TSizeDoublePr10Vec &condition) const; - - //! Get the support for the marginal likelihood function. - virtual TDouble10VecDouble10VecPr marginalLikelihoodSupport() const; - - //! Returns constant or zero if unset (by equidistribution). - virtual TDouble10Vec marginalLikelihoodMean() const; - - //! Returns constant or zero if unset (by equidistribution). - virtual TDouble10Vec marginalLikelihoodMode(const TWeightStyleVec &weightStyles, - const TDouble10Vec4Vec &weights) const; - - //! Get the covariance matrix of the marginal likelihood. - virtual TDouble10Vec10Vec marginalLikelihoodCovariance() const; - - //! Get the diagonal of the covariance matrix of the marginal likelihood. - virtual TDouble10Vec marginalLikelihoodVariances() const; - - //! Returns a large value if all samples are equal to the constant - //! and zero otherwise. - virtual maths_t::EFloatingPointErrorStatus - jointLogMarginalLikelihood(const TWeightStyleVec &weightStyles, - const TDouble10Vec1Vec &samples, - const TDouble10Vec4Vec1Vec &weights, - double &result) const; - - //! Get \p numberSamples times the constant. - virtual void sampleMarginalLikelihood(std::size_t numberSamples, - TDouble10Vec1Vec &samples) const; - - //! Check if this is a non-informative prior. - bool isNonInformative() const; - - //! Get a human readable description of the prior. - //! - //! \param[in] separator String used to separate priors. - //! \param[in,out] result Filled in with the description. - virtual void print(const std::string &separator, std::string &result) const; - - //! Get a checksum for this object. - virtual uint64_t checksum(uint64_t seed = 0) const; - - //! Get the memory used by this component - virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - - //! Get the memory used by this component - virtual std::size_t memoryUsage() const; - - //! Get the static size of this object - used for virtual hierarchies - virtual std::size_t staticSize() const; +class MATHS_EXPORT CMultivariateConstantPrior : public CMultivariatePrior { +public: + using TOptionalDouble10Vec = boost::optional; - //! Persist state by passing information to the supplied inserter - virtual void acceptPersistInserter(core::CStatePersistInserter &inserter) const; + // Lift all overloads of into scope. + //{ + using CMultivariatePrior::addSamples; + using CMultivariatePrior::print; + //} - //! Get the tag name for this prior. - virtual std::string persistenceTag() const; - //@} +public: + //! \name Life-Cycle + //@{ + CMultivariateConstantPrior(std::size_t dimension, const TOptionalDouble10Vec& constant = TOptionalDouble10Vec()); - //! Get the constant value. - const TOptionalDouble10Vec &constant() const; + //! Construct by traversing a state document. + CMultivariateConstantPrior(std::size_t dimension, core::CStateRestoreTraverser& traverser); + //@} - private: - //! Create by traversing a state document. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); + //! \name Prior Contract + //@{ + //! Create a copy of the prior. + //! + //! \warning Caller owns returned object. + virtual CMultivariateConstantPrior* clone() const; - private: - //! The data dimension. - std::size_t m_Dimension; + //! Get the dimension of the prior. + virtual std::size_t dimension() const; - //! The constant value. - TOptionalDouble10Vec m_Constant; -}; + //! Reset the prior to non-informative. + virtual void setToNonInformative(double offset = 0.0, double decayRate = 0.0); + + //! No-op. + virtual void adjustOffset(const TWeightStyleVec& weightStyle, const TDouble10Vec1Vec& samples, const TDouble10Vec4Vec1Vec& weights); + + //! Set the constant if it hasn't been set. + virtual void addSamples(const TWeightStyleVec& weightStyle, const TDouble10Vec1Vec& samples, const TDouble10Vec4Vec1Vec& weights); + + //! No-op. + virtual void propagateForwardsByTime(double time); + + //! Get the corresponding constant univariate prior. + virtual TUnivariatePriorPtrDoublePr univariate(const TSize10Vec& marginalize, const TSizeDoublePr10Vec& condition) const; + + //! Compute the bivariate const bivariate prior. + virtual TPriorPtrDoublePr bivariate(const TSize10Vec& marginalize, const TSizeDoublePr10Vec& condition) const; + + //! Get the support for the marginal likelihood function. + virtual TDouble10VecDouble10VecPr marginalLikelihoodSupport() const; + + //! Returns constant or zero if unset (by equidistribution). + virtual TDouble10Vec marginalLikelihoodMean() const; + + //! Returns constant or zero if unset (by equidistribution). + virtual TDouble10Vec marginalLikelihoodMode(const TWeightStyleVec& weightStyles, const TDouble10Vec4Vec& weights) const; + + //! Get the covariance matrix of the marginal likelihood. + virtual TDouble10Vec10Vec marginalLikelihoodCovariance() const; + + //! Get the diagonal of the covariance matrix of the marginal likelihood. + virtual TDouble10Vec marginalLikelihoodVariances() const; + + //! Returns a large value if all samples are equal to the constant + //! and zero otherwise. + virtual maths_t::EFloatingPointErrorStatus jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble10Vec1Vec& samples, + const TDouble10Vec4Vec1Vec& weights, + double& result) const; + + //! Get \p numberSamples times the constant. + virtual void sampleMarginalLikelihood(std::size_t numberSamples, TDouble10Vec1Vec& samples) const; + //! Check if this is a non-informative prior. + bool isNonInformative() const; + + //! Get a human readable description of the prior. + //! + //! \param[in] separator String used to separate priors. + //! \param[in,out] result Filled in with the description. + virtual void print(const std::string& separator, std::string& result) const; + + //! Get a checksum for this object. + virtual uint64_t checksum(uint64_t seed = 0) const; + + //! Get the memory used by this component + virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + + //! Get the memory used by this component + virtual std::size_t memoryUsage() const; + + //! Get the static size of this object - used for virtual hierarchies + virtual std::size_t staticSize() const; + + //! Persist state by passing information to the supplied inserter + virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Get the tag name for this prior. + virtual std::string persistenceTag() const; + //@} + + //! Get the constant value. + const TOptionalDouble10Vec& constant() const; + +private: + //! Create by traversing a state document. + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); + +private: + //! The data dimension. + std::size_t m_Dimension; + + //! The constant value. + TOptionalDouble10Vec m_Constant; +}; } } diff --git a/include/maths/CMultivariateMultimodalPrior.h b/include/maths/CMultivariateMultimodalPrior.h index 5b757c1256..c0270330a5 100644 --- a/include/maths/CMultivariateMultimodalPrior.h +++ b/include/maths/CMultivariateMultimodalPrior.h @@ -9,16 +9,15 @@ #include #include -#include #include #include #include +#include #include #include #include #include -#include #include #include #include @@ -27,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -44,48 +44,40 @@ #include #include -namespace ml -{ -namespace maths -{ -namespace multivariate_multimodal_prior_detail -{ +namespace ml { +namespace maths { +namespace multivariate_multimodal_prior_detail { using TSizeDoublePr = std::pair; using TSizeDoublePr3Vec = core::CSmallVector; using TPriorPtr = boost::shared_ptr; using TDouble10Vec1Vec = CMultivariatePrior::TDouble10Vec1Vec; using TDouble10Vec4Vec1Vec = CMultivariatePrior::TDouble10Vec4Vec1Vec; -using TMode = SMultimodalPriorMode >; +using TMode = SMultimodalPriorMode>; using TModeVec = std::vector; //! Implementation of a sample joint log marginal likelihood calculation. MATHS_EXPORT -maths_t::EFloatingPointErrorStatus - jointLogMarginalLikelihood(const TModeVec &modes, - const maths_t::TWeightStyleVec &weightStyles, - const TDouble10Vec1Vec &sample, - const TDouble10Vec4Vec1Vec &weights, - TSizeDoublePr3Vec &modeLogLikelihoods, - double &result); +maths_t::EFloatingPointErrorStatus jointLogMarginalLikelihood(const TModeVec& modes, + const maths_t::TWeightStyleVec& weightStyles, + const TDouble10Vec1Vec& sample, + const TDouble10Vec4Vec1Vec& weights, + TSizeDoublePr3Vec& modeLogLikelihoods, + double& result); //! Implementation of marginal likelihood sample. MATHS_EXPORT -void sampleMarginalLikelihood(const TModeVec &modes, - std::size_t numberSamples, - TDouble10Vec1Vec &samples); +void sampleMarginalLikelihood(const TModeVec& modes, std::size_t numberSamples, TDouble10Vec1Vec& samples); //! Implementation of mode printing. MATHS_EXPORT -void print(const TModeVec &modes, - const std::string &separator, - std::string &result); +void print(const TModeVec& modes, const std::string& separator, std::string& result); //! Implementation of mode merge callback. MATHS_EXPORT void modeMergeCallback(std::size_t dimension, - TModeVec &modes, - const TPriorPtr &seedPrior, + TModeVec& modes, + const TPriorPtr& seedPrior, std::size_t numberSamples, std::size_t leftMergeIndex, std::size_t rightMergeIndex, @@ -93,7 +85,7 @@ void modeMergeCallback(std::size_t dimension, //! Implementation of a full debug dump of the mode weights. MATHS_EXPORT -std::string debugWeights(const TModeVec &modes); +std::string debugWeights(const TModeVec& modes); } // multivariate_multimodal_prior_detail:: @@ -129,1150 +121,946 @@ std::string debugWeights(const TModeVec &modes); //! for the data when using one-of-n composition (see CMultivariateOneOfNPrior). //! From a design point of view this is the composite pattern. template -class CMultivariateMultimodalPrior : public CMultivariatePrior -{ - public: - using TDouble5Vec = core::CSmallVector; - using TPoint = CVectorNx1; - using TFloatPoint = CVectorNx1; - using TPointVec = std::vector; - using TPoint4Vec = core::CSmallVector; - using TMeanAccumulator = typename CBasicStatistics::SSampleMean::TAccumulator; - using TMatrix = CSymmetricMatrixNxN; - using TMatrixVec = std::vector; - using TClusterer = CClusterer; - using TClustererPtr = boost::shared_ptr; - using TPriorPtrVec = std::vector; - using TWeights = CConstantWeights; - - // Lift all overloads of into scope. - //{ - using CMultivariatePrior::dataType; - using CMultivariatePrior::decayRate; - using CMultivariatePrior::addSamples; - using CMultivariatePrior::print; - //} - - public: - //! \name Life-Cycle - //@{ - //! Create a new (empty) multimodal prior. - CMultivariateMultimodalPrior(maths_t::EDataType dataType, - const TClusterer &clusterer, - const CMultivariatePrior &seedPrior, - double decayRate = 0.0) : - CMultivariatePrior(dataType, decayRate), - m_Clusterer(clusterer.clone()), - m_SeedPrior(seedPrior.clone()) - { - // Register the split and merge callbacks. - m_Clusterer->splitFunc(CModeSplitCallback(*this)); - m_Clusterer->mergeFunc(CModeMergeCallback(*this)); +class CMultivariateMultimodalPrior : public CMultivariatePrior { +public: + using TDouble5Vec = core::CSmallVector; + using TPoint = CVectorNx1; + using TFloatPoint = CVectorNx1; + using TPointVec = std::vector; + using TPoint4Vec = core::CSmallVector; + using TMeanAccumulator = typename CBasicStatistics::SSampleMean::TAccumulator; + using TMatrix = CSymmetricMatrixNxN; + using TMatrixVec = std::vector; + using TClusterer = CClusterer; + using TClustererPtr = boost::shared_ptr; + using TPriorPtrVec = std::vector; + using TWeights = CConstantWeights; + + // Lift all overloads of into scope. + //{ + using CMultivariatePrior::addSamples; + using CMultivariatePrior::dataType; + using CMultivariatePrior::decayRate; + using CMultivariatePrior::print; + //} + +public: + //! \name Life-Cycle + //@{ + //! Create a new (empty) multimodal prior. + CMultivariateMultimodalPrior(maths_t::EDataType dataType, + const TClusterer& clusterer, + const CMultivariatePrior& seedPrior, + double decayRate = 0.0) + : CMultivariatePrior(dataType, decayRate), m_Clusterer(clusterer.clone()), m_SeedPrior(seedPrior.clone()) { + // Register the split and merge callbacks. + m_Clusterer->splitFunc(CModeSplitCallback(*this)); + m_Clusterer->mergeFunc(CModeMergeCallback(*this)); + } + + //! Create from a collection of priors. + //! + //! \note The priors are shallow copied. + //! \note This constructor doesn't support subsequent update of the prior. + CMultivariateMultimodalPrior(maths_t::EDataType dataType, TPriorPtrVec& priors) : CMultivariatePrior(dataType, 0.0) { + m_Modes.reserve(priors.size()); + for (std::size_t i = 0u; i < priors.size(); ++i) { + m_Modes.emplace_back(i, priors[i]); } - - //! Create from a collection of priors. - //! - //! \note The priors are shallow copied. - //! \note This constructor doesn't support subsequent update of the prior. - CMultivariateMultimodalPrior(maths_t::EDataType dataType, TPriorPtrVec &priors) : - CMultivariatePrior(dataType, 0.0) - { - m_Modes.reserve(priors.size()); - for (std::size_t i = 0u; i < priors.size(); ++i) - { - m_Modes.emplace_back(i, priors[i]); - } + } + + //! Construct from part of a state document. + CMultivariateMultimodalPrior(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) + : CMultivariatePrior(params.s_DataType, params.s_DecayRate) { + traverser.traverseSubLevel(boost::bind(&CMultivariateMultimodalPrior::acceptRestoreTraverser, this, boost::cref(params), _1)); + } + + //! Implements value semantics for copy construction. + CMultivariateMultimodalPrior(const CMultivariateMultimodalPrior& other) + : CMultivariatePrior(other.dataType(), other.decayRate()), + m_Clusterer(other.m_Clusterer->clone()), + m_SeedPrior(other.m_SeedPrior->clone()) { + // Register the split and merge callbacks. + m_Clusterer->splitFunc(CModeSplitCallback(*this)); + m_Clusterer->mergeFunc(CModeMergeCallback(*this)); + + // Clone all the modes up front so we can implement strong exception safety. + TModeVec modes; + modes.reserve(other.m_Modes.size()); + for (const auto& mode : other.m_Modes) { + modes.emplace_back(mode.s_Index, TPriorPtr(mode.s_Prior->clone())); } - - //! Construct from part of a state document. - CMultivariateMultimodalPrior(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser) : - CMultivariatePrior(params.s_DataType, params.s_DecayRate) - { - traverser.traverseSubLevel(boost::bind(&CMultivariateMultimodalPrior::acceptRestoreTraverser, - this, boost::cref(params), _1)); + m_Modes.swap(modes); + + this->addSamples(other.numberSamples()); + } + + //! Implements value semantics for assignment. + //! + //! \param[in] rhs The multimodal model to copy. + //! \return The newly copied model. + CMultivariateMultimodalPrior& operator=(const CMultivariateMultimodalPrior& rhs) { + if (this != &rhs) { + CMultivariateMultimodalPrior copy(rhs); + this->swap(copy); } - - //! Implements value semantics for copy construction. - CMultivariateMultimodalPrior(const CMultivariateMultimodalPrior &other) : - CMultivariatePrior(other.dataType(), other.decayRate()), - m_Clusterer(other.m_Clusterer->clone()), - m_SeedPrior(other.m_SeedPrior->clone()) - { - // Register the split and merge callbacks. - m_Clusterer->splitFunc(CModeSplitCallback(*this)); - m_Clusterer->mergeFunc(CModeMergeCallback(*this)); - - // Clone all the modes up front so we can implement strong exception safety. - TModeVec modes; - modes.reserve(other.m_Modes.size()); - for (const auto &mode : other.m_Modes) - { - modes.emplace_back(mode.s_Index, TPriorPtr(mode.s_Prior->clone())); - } - m_Modes.swap(modes); - - this->addSamples(other.numberSamples()); + return *this; + } + + //! An efficient swap of the contents of this and \p other. + void swap(CMultivariateMultimodalPrior& other) { + this->CMultivariatePrior::swap(other); + + std::swap(m_Clusterer, other.m_Clusterer); + // The call backs for split and merge should point to the + // appropriate priors (we don't swap the "this" pointers + // after all). So we need to refresh them after swapping. + m_Clusterer->splitFunc(CModeSplitCallback(*this)); + m_Clusterer->mergeFunc(CModeMergeCallback(*this)); + other.m_Clusterer->splitFunc(CModeSplitCallback(other)); + other.m_Clusterer->mergeFunc(CModeMergeCallback(other)); + + std::swap(m_SeedPrior, other.m_SeedPrior); + m_Modes.swap(other.m_Modes); + } + //@} + + //! \name Prior Contract + //@{ + //! Create a copy of the prior. + //! + //! \warning Caller owns returned object. + virtual CMultivariatePrior* clone() const { return new CMultivariateMultimodalPrior(*this); } + + //! Get the dimension of the prior. + virtual std::size_t dimension() const { return N; } + + //! Set the data type. + virtual void dataType(maths_t::EDataType value) { + this->CMultivariatePrior::dataType(value); + m_Clusterer->dataType(value); + for (const auto& mode : m_Modes) { + mode.s_Prior->dataType(value); } - - //! Implements value semantics for assignment. - //! - //! \param[in] rhs The multimodal model to copy. - //! \return The newly copied model. - CMultivariateMultimodalPrior &operator=(const CMultivariateMultimodalPrior &rhs) - { - if (this != &rhs) - { - CMultivariateMultimodalPrior copy(rhs); - this->swap(copy); - } - return *this; + } + + //! Set the rate at which the prior returns to non-informative. + virtual void decayRate(double value) { + this->CMultivariatePrior::decayRate(value); + m_Clusterer->decayRate(this->decayRate()); + for (const auto& mode : m_Modes) { + mode.s_Prior->decayRate(this->decayRate()); } - - //! An efficient swap of the contents of this and \p other. - void swap(CMultivariateMultimodalPrior &other) - { - this->CMultivariatePrior::swap(other); - - std::swap(m_Clusterer, other.m_Clusterer); - // The call backs for split and merge should point to the - // appropriate priors (we don't swap the "this" pointers - // after all). So we need to refresh them after swapping. - m_Clusterer->splitFunc(CModeSplitCallback(*this)); - m_Clusterer->mergeFunc(CModeMergeCallback(*this)); - other.m_Clusterer->splitFunc(CModeSplitCallback(other)); - other.m_Clusterer->mergeFunc(CModeMergeCallback(other)); - - std::swap(m_SeedPrior, other.m_SeedPrior); - m_Modes.swap(other.m_Modes); + m_SeedPrior->decayRate(this->decayRate()); + } + + //! Reset the prior to non-informative. + virtual void setToNonInformative(double /*offset*/, double decayRate) { + m_Clusterer->clear(); + m_Modes.clear(); + this->decayRate(decayRate); + this->numberSamples(0.0); + } + + //! For priors with non-negative support this adjusts the offset used + //! to extend the support to handle negative samples. + //! + //! \param[in] weightStyles Controls the interpretation of the weight(s) + //! that are associated with each sample. See maths_t::ESampleWeightStyle + //! for more details. + //! \param[in] samples The samples from which to determine the offset. + //! \param[in] weights The weights of each sample in \p samples. + virtual void adjustOffset(const TWeightStyleVec& weightStyles, const TDouble10Vec1Vec& samples, const TDouble10Vec4Vec1Vec& weights) { + // This has to adjust offsets for its modes because it must be + // possible to call jointLogMarginalLikelihood before the samples + // have been added to the prior in order for model selection to + // work. + for (const auto& mode : m_Modes) { + mode.s_Prior->adjustOffset(weightStyles, samples, weights); + } + } + + //! Update the prior with a collection of independent samples from the + //! process. + //! + //! \param[in] weightStyles_ Controls the interpretation of the weight(s) + //! that are associated with each sample. See maths_t::ESampleWeightStyle + //! for more details. + //! \param[in] samples A collection of samples of the process. + //! \param[in] weights The weights of each sample in \p samples. + virtual void addSamples(const TWeightStyleVec& weightStyles_, const TDouble10Vec1Vec& samples, const TDouble10Vec4Vec1Vec& weights) { + if (samples.empty()) { + return; } - //@} - - //! \name Prior Contract - //@{ - //! Create a copy of the prior. - //! - //! \warning Caller owns returned object. - virtual CMultivariatePrior *clone() const - { - return new CMultivariateMultimodalPrior(*this); + if (!this->check(samples, weights)) { + return; } - //! Get the dimension of the prior. - virtual std::size_t dimension() const { return N; } + // See CMultimodalPrior::addSamples for discussion. - //! Set the data type. - virtual void dataType(maths_t::EDataType value) - { - this->CMultivariatePrior::dataType(value); - m_Clusterer->dataType(value); - for (const auto &mode : m_Modes) - { - mode.s_Prior->dataType(value); - } - } + using TSizeDoublePr2Vec = core::CSmallVector; - //! Set the rate at which the prior returns to non-informative. - virtual void decayRate(double value) - { - this->CMultivariatePrior::decayRate(value); - m_Clusterer->decayRate(this->decayRate()); - for (const auto &mode : m_Modes) - { - mode.s_Prior->decayRate(this->decayRate()); - } - m_SeedPrior->decayRate(this->decayRate()); - } + // Declared outside the loop to minimize the number of times it + // is initialized. + TWeightStyleVec weightStyles(weightStyles_); + TDouble10Vec1Vec sample(1); + TDouble10Vec4Vec1Vec weight(1); + TSizeDoublePr2Vec clusters; - //! Reset the prior to non-informative. - virtual void setToNonInformative(double /*offset*/, double decayRate) - { - m_Clusterer->clear(); - m_Modes.clear(); - this->decayRate(decayRate); - this->numberSamples(0.0); + std::size_t indices[maths_t::NUMBER_WEIGHT_STYLES]; + std::size_t missing = weightStyles.size() + 1; + std::fill_n(indices, maths_t::NUMBER_WEIGHT_STYLES, missing); + for (std::size_t i = 0u; i < weightStyles.size(); ++i) { + indices[weightStyles[i]] = i; } - - //! For priors with non-negative support this adjusts the offset used - //! to extend the support to handle negative samples. - //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. - //! \param[in] samples The samples from which to determine the offset. - //! \param[in] weights The weights of each sample in \p samples. - virtual void adjustOffset(const TWeightStyleVec &weightStyles, - const TDouble10Vec1Vec &samples, - const TDouble10Vec4Vec1Vec &weights) - { - // This has to adjust offsets for its modes because it must be - // possible to call jointLogMarginalLikelihood before the samples - // have been added to the prior in order for model selection to - // work. - for (const auto &mode : m_Modes) - { - mode.s_Prior->adjustOffset(weightStyles, samples, weights); - } + std::size_t seasonal = indices[maths_t::E_SampleSeasonalVarianceScaleWeight]; + std::size_t count = indices[maths_t::E_SampleCountWeight]; + std::size_t winsorisation = indices[maths_t::E_SampleWinsorisationWeight]; + if (count == missing) { + count = weightStyles.size(); + weightStyles.push_back(maths_t::E_SampleCountWeight); } - //! Update the prior with a collection of independent samples from the - //! process. - //! - //! \param[in] weightStyles_ Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. - //! \param[in] samples A collection of samples of the process. - //! \param[in] weights The weights of each sample in \p samples. - virtual void addSamples(const TWeightStyleVec &weightStyles_, - const TDouble10Vec1Vec &samples, - const TDouble10Vec4Vec1Vec &weights) - { - if (samples.empty()) - { - return; - } - if (!this->check(samples, weights)) - { - return; - } - - // See CMultimodalPrior::addSamples for discussion. - - using TSizeDoublePr2Vec = core::CSmallVector; + try { + bool hasSeasonalScale = !this->isNonInformative() && seasonal != missing; - // Declared outside the loop to minimize the number of times it - // is initialized. - TWeightStyleVec weightStyles(weightStyles_); - TDouble10Vec1Vec sample(1); - TDouble10Vec4Vec1Vec weight(1); - TSizeDoublePr2Vec clusters; + TPoint mean = hasSeasonalScale ? this->mean() : TPoint(0.0); - std::size_t indices[maths_t::NUMBER_WEIGHT_STYLES]; - std::size_t missing = weightStyles.size() + 1; - std::fill_n(indices, maths_t::NUMBER_WEIGHT_STYLES, missing); - for (std::size_t i = 0u; i < weightStyles.size(); ++i) - { - indices[weightStyles[i]] = i; - } - std::size_t seasonal = indices[maths_t::E_SampleSeasonalVarianceScaleWeight]; - std::size_t count = indices[maths_t::E_SampleCountWeight]; - std::size_t winsorisation = indices[maths_t::E_SampleWinsorisationWeight]; - if (count == missing) - { - count = weightStyles.size(); - weightStyles.push_back(maths_t::E_SampleCountWeight); - } + for (std::size_t i = 0u; i < samples.size(); ++i) { + TPoint x(samples[i]); + if (!CMathsFuncs::isFinite(x)) { + LOG_ERROR("Discarding " << x); + continue; + } - try - { - bool hasSeasonalScale = !this->isNonInformative() && seasonal != missing; + if (hasSeasonalScale) { + TPoint seasonalScale = sqrt(TPoint(maths_t::seasonalVarianceScale(N, weightStyles_, weights[i]))); + x = mean + (x - mean) / seasonalScale; + } - TPoint mean = hasSeasonalScale ? this->mean() : TPoint(0.0); + sample[0] = x.template toVector(); + weight[0] = weights[i]; + weight[0].resize(weightStyles.size(), TDouble10Vec(N, 1.0)); + if (seasonal != missing) { + weight[0][seasonal].assign(N, 1.0); + } - for (std::size_t i = 0u; i < samples.size(); ++i) - { - TPoint x(samples[i]); - if (!CMathsFuncs::isFinite(x)) - { - LOG_ERROR("Discarding " << x); - continue; - } + double smallestCountWeight = this->smallest(weight[0][count]); + clusters.clear(); + m_Clusterer->add(x, clusters, smallestCountWeight); - if (hasSeasonalScale) - { - TPoint seasonalScale = sqrt(TPoint(maths_t::seasonalVarianceScale(N, weightStyles_, weights[i]))); - x = mean + (x - mean) / seasonalScale; - } + double Z = std::accumulate( + m_Modes.begin(), m_Modes.end(), smallestCountWeight, [](double sum, const TMode& mode) { return sum + mode.weight(); }); - sample[0] = x.template toVector(); - weight[0] = weights[i]; - weight[0].resize(weightStyles.size(), TDouble10Vec(N, 1.0)); - if (seasonal != missing) - { - weight[0][seasonal].assign(N, 1.0); + double n = 0.0; + for (const auto& cluster : clusters) { + auto k = std::find_if(m_Modes.begin(), m_Modes.end(), CSetTools::CIndexInSet(cluster.first)); + if (k == m_Modes.end()) { + LOG_TRACE("Creating mode with index " << cluster.first); + m_Modes.emplace_back(cluster.first, m_SeedPrior); + k = m_Modes.end() - 1; } - - double smallestCountWeight = this->smallest(weight[0][count]); - clusters.clear(); - m_Clusterer->add(x, clusters, smallestCountWeight); - - double Z = std::accumulate(m_Modes.begin(), m_Modes.end(), - smallestCountWeight, - [](double sum, const TMode &mode) { return sum + mode.weight(); }); - - double n = 0.0; - for (const auto &cluster : clusters) - { - auto k = std::find_if(m_Modes.begin(), m_Modes.end(), - CSetTools::CIndexInSet(cluster.first)); - if (k == m_Modes.end()) - { - LOG_TRACE("Creating mode with index " << cluster.first); - m_Modes.emplace_back(cluster.first, m_SeedPrior); - k = m_Modes.end() - 1; - } - weight[0][count].assign(N, cluster.second); - if (winsorisation != missing) - { - TDouble10Vec &ww = weight[0][winsorisation]; - double f = (k->weight() + cluster.second) / Z; - for (auto &w : ww) - { - w = std::max(1.0 - (1.0 - w) / f, w * f); - } + weight[0][count].assign(N, cluster.second); + if (winsorisation != missing) { + TDouble10Vec& ww = weight[0][winsorisation]; + double f = (k->weight() + cluster.second) / Z; + for (auto& w : ww) { + w = std::max(1.0 - (1.0 - w) / f, w * f); } - k->s_Prior->addSamples(weightStyles, sample, weight); - n += this->smallest(maths_t::countForUpdate(N, weightStyles, weight[0])); } - this->addSamples(n); + k->s_Prior->addSamples(weightStyles, sample, weight); + n += this->smallest(maths_t::countForUpdate(N, weightStyles, weight[0])); } + this->addSamples(n); } - catch (const std::exception &e) - { - LOG_ERROR("Failed to update likelihood: " << e.what()); - } + } catch (const std::exception& e) { LOG_ERROR("Failed to update likelihood: " << e.what()); } + } + + //! Update the prior for the specified elapsed time. + virtual void propagateForwardsByTime(double time) { + if (!CMathsFuncs::isFinite(time) || time < 0.0) { + LOG_ERROR("Bad propagation time " << time); + return; } - //! Update the prior for the specified elapsed time. - virtual void propagateForwardsByTime(double time) - { - if (!CMathsFuncs::isFinite(time) || time < 0.0) - { - LOG_ERROR("Bad propagation time " << time); - return; - } - - if (this->isNonInformative()) - { - // Nothing to be done. - return; - } - - // We want to hold the probabilities constant. Since the i'th - // probability: - // p(i) = w(i) / Sum_j{ w(j) } - // - // where w(i) is its weight we can achieve this by multiplying - // all weights by some factor f in the range [0, 1]. - - if (!this->isForForecasting()) - { - m_Clusterer->propagateForwardsByTime(time); - } + if (this->isNonInformative()) { + // Nothing to be done. + return; + } - for (const auto &mode : m_Modes) - { - mode.s_Prior->propagateForwardsByTime(time); - } + // We want to hold the probabilities constant. Since the i'th + // probability: + // p(i) = w(i) / Sum_j{ w(j) } + // + // where w(i) is its weight we can achieve this by multiplying + // all weights by some factor f in the range [0, 1]. - this->numberSamples(this->numberSamples() * std::exp(-this->scaledDecayRate() * time)); - LOG_TRACE("numberSamples = " << this->numberSamples()); + if (!this->isForForecasting()) { + m_Clusterer->propagateForwardsByTime(time); } - //! Compute the univariate prior marginalizing over the variables - //! \p marginalize and conditioning on the variables \p condition. - //! - //! \param[in] marginalize The variables to marginalize out. - //! \param[in] condition The variables to condition on. - //! \warning The caller owns the result. - //! \note The variables are passed by the index of their dimension - //! which must therefore be in range. - //! \note The caller must specify dimension - 1 variables between - //! \p marginalize and \p condition so the resulting distribution - //! is univariate. - virtual TUnivariatePriorPtrDoublePr univariate(const TSize10Vec &marginalize, - const TSizeDoublePr10Vec &condition) const - { - std::size_t n = m_Modes.size(); - - CMultimodalPrior::TPriorPtrVec modes; - TDouble5Vec weights; - CBasicStatistics::COrderStatisticsStack > maxWeight; - modes.reserve(n); - weights.reserve(n); - - for (const auto &mode : m_Modes) - { - TUnivariatePriorPtrDoublePr prior(mode.s_Prior->univariate(marginalize, condition)); - if (prior.first == 0) - { - return TUnivariatePriorPtrDoublePr(); - } - if (prior.first->isNonInformative()) - { - continue; - } - modes.push_back(prior.first); - weights.push_back(prior.second); - maxWeight.add(weights.back()); - } + for (const auto& mode : m_Modes) { + mode.s_Prior->propagateForwardsByTime(time); + } - double Z = 0.0; - for (auto &weight : weights) - { - weight = std::exp(weight - maxWeight[0]); - Z += weight; + this->numberSamples(this->numberSamples() * std::exp(-this->scaledDecayRate() * time)); + LOG_TRACE("numberSamples = " << this->numberSamples()); + } + + //! Compute the univariate prior marginalizing over the variables + //! \p marginalize and conditioning on the variables \p condition. + //! + //! \param[in] marginalize The variables to marginalize out. + //! \param[in] condition The variables to condition on. + //! \warning The caller owns the result. + //! \note The variables are passed by the index of their dimension + //! which must therefore be in range. + //! \note The caller must specify dimension - 1 variables between + //! \p marginalize and \p condition so the resulting distribution + //! is univariate. + virtual TUnivariatePriorPtrDoublePr univariate(const TSize10Vec& marginalize, const TSizeDoublePr10Vec& condition) const { + std::size_t n = m_Modes.size(); + + CMultimodalPrior::TPriorPtrVec modes; + TDouble5Vec weights; + CBasicStatistics::COrderStatisticsStack> maxWeight; + modes.reserve(n); + weights.reserve(n); + + for (const auto& mode : m_Modes) { + TUnivariatePriorPtrDoublePr prior(mode.s_Prior->univariate(marginalize, condition)); + if (prior.first == 0) { + return TUnivariatePriorPtrDoublePr(); } - for (std::size_t i = 0u; i < weights.size(); ++i) - { - modes[i]->numberSamples(weights[i] / Z * modes[i]->numberSamples()); + if (prior.first->isNonInformative()) { + continue; } + modes.push_back(prior.first); + weights.push_back(prior.second); + maxWeight.add(weights.back()); + } - return {TUnivariatePriorPtr(new CMultimodalPrior(this->dataType(), - this->decayRate(), modes)), - Z > 0.0 ? maxWeight[0] + std::log(Z) : 0.0}; + double Z = 0.0; + for (auto& weight : weights) { + weight = std::exp(weight - maxWeight[0]); + Z += weight; + } + for (std::size_t i = 0u; i < weights.size(); ++i) { + modes[i]->numberSamples(weights[i] / Z * modes[i]->numberSamples()); } - //! Compute the bivariate prior marginalizing over the variables - //! \p marginalize and conditioning on the variables \p condition. - //! - //! \param[in] marginalize The variables to marginalize out. - //! \param[in] condition The variables to condition on. - //! \warning The caller owns the result. - //! \note The variables are passed by the index of their dimension - //! which must therefore be in range. - //! \note It is assumed that the variables are in sorted order. - //! \note The caller must specify dimension - 2 variables between - //! \p marginalize and \p condition so the resulting distribution - //! is univariate. - virtual TPriorPtrDoublePr bivariate(const TSize10Vec &marginalize, - const TSizeDoublePr10Vec &condition) const - { - if (N == 2) - { - return TPriorPtrDoublePr(TPriorPtr(this->clone()), 0.0); - } + return {TUnivariatePriorPtr(new CMultimodalPrior(this->dataType(), this->decayRate(), modes)), + Z > 0.0 ? maxWeight[0] + std::log(Z) : 0.0}; + } + + //! Compute the bivariate prior marginalizing over the variables + //! \p marginalize and conditioning on the variables \p condition. + //! + //! \param[in] marginalize The variables to marginalize out. + //! \param[in] condition The variables to condition on. + //! \warning The caller owns the result. + //! \note The variables are passed by the index of their dimension + //! which must therefore be in range. + //! \note It is assumed that the variables are in sorted order. + //! \note The caller must specify dimension - 2 variables between + //! \p marginalize and \p condition so the resulting distribution + //! is univariate. + virtual TPriorPtrDoublePr bivariate(const TSize10Vec& marginalize, const TSizeDoublePr10Vec& condition) const { + if (N == 2) { + return TPriorPtrDoublePr(TPriorPtr(this->clone()), 0.0); + } - std::size_t n = m_Modes.size(); + std::size_t n = m_Modes.size(); - TPriorPtrVec modes; - TDouble5Vec weights; - modes.reserve(n); - weights.reserve(n); - CBasicStatistics::COrderStatisticsStack > maxWeight; + TPriorPtrVec modes; + TDouble5Vec weights; + modes.reserve(n); + weights.reserve(n); + CBasicStatistics::COrderStatisticsStack> maxWeight; - for (const auto &mode : m_Modes) - { - TPriorPtrDoublePr prior(mode.s_Prior->bivariate(marginalize, condition)); - if (prior.first == 0) - { - return TPriorPtrDoublePr(); - } - if (prior.first->isNonInformative()) - { - continue; - } - modes.push_back(prior.first); - weights.push_back(prior.second); - maxWeight.add(weights.back()); + for (const auto& mode : m_Modes) { + TPriorPtrDoublePr prior(mode.s_Prior->bivariate(marginalize, condition)); + if (prior.first == 0) { + return TPriorPtrDoublePr(); } - - double Z = 0.0; - for (auto &weight : weights) - { - weight = std::exp(weight - maxWeight[0]); - Z += weight; - } - for (std::size_t i = 0u; i < weights.size(); ++i) - { - modes[i]->numberSamples(weights[i] / Z * modes[i]->numberSamples()); + if (prior.first->isNonInformative()) { + continue; } - - return {TPriorPtr(new CMultivariateMultimodalPrior<2>(this->dataType(), modes)), - Z > 0.0 ? maxWeight[0] + std::log(Z) : 0.0}; + modes.push_back(prior.first); + weights.push_back(prior.second); + maxWeight.add(weights.back()); } - //! Get the support for the marginal likelihood function. - virtual TDouble10VecDouble10VecPr marginalLikelihoodSupport() const - { - if (m_Modes.size() == 0) - { - return {TPoint::smallest().template toVector(), - TPoint::largest().template toVector()}; - } - if (m_Modes.size() == 1) - { - return m_Modes[0].s_Prior->marginalLikelihoodSupport(); - } - - TPoint lower = TPoint::largest(); - TPoint upper = TPoint::smallest(); - - // We define this is as the union of the mode supports. - for (const auto &mode : m_Modes) - { - TDouble10VecDouble10VecPr s = mode.s_Prior->marginalLikelihoodSupport(); - lower = min(lower, TPoint(s.first)); - upper = max(upper, TPoint(s.second)); - } - - return {lower.template toVector(), - upper.template toVector()}; + double Z = 0.0; + for (auto& weight : weights) { + weight = std::exp(weight - maxWeight[0]); + Z += weight; + } + for (std::size_t i = 0u; i < weights.size(); ++i) { + modes[i]->numberSamples(weights[i] / Z * modes[i]->numberSamples()); } - //! Get the mean of the marginal likelihood function. - virtual TDouble10Vec marginalLikelihoodMean() const - { - if (m_Modes.size() == 0) - { - return TDouble10Vec(N, 0.0); - } - if (m_Modes.size() == 1) - { - return m_Modes[0].s_Prior->marginalLikelihoodMean(); - } + return {TPriorPtr(new CMultivariateMultimodalPrior<2>(this->dataType(), modes)), Z > 0.0 ? maxWeight[0] + std::log(Z) : 0.0}; + } - return this->mean().template toVector(); + //! Get the support for the marginal likelihood function. + virtual TDouble10VecDouble10VecPr marginalLikelihoodSupport() const { + if (m_Modes.size() == 0) { + return {TPoint::smallest().template toVector(), TPoint::largest().template toVector()}; + } + if (m_Modes.size() == 1) { + return m_Modes[0].s_Prior->marginalLikelihoodSupport(); } - //! Get the nearest mean of the multimodal prior marginal likelihood, - //! otherwise the marginal likelihood mean. - virtual TDouble10Vec nearestMarginalLikelihoodMean(const TDouble10Vec &value_) const - { - if (m_Modes.empty()) - { - return TDouble10Vec(N, 0.0); - } - if (m_Modes.size() == 1) - { - return m_Modes[0].s_Prior->marginalLikelihoodMean(); - } + TPoint lower = TPoint::largest(); + TPoint upper = TPoint::smallest(); - TPoint value(value_); + // We define this is as the union of the mode supports. + for (const auto& mode : m_Modes) { + TDouble10VecDouble10VecPr s = mode.s_Prior->marginalLikelihoodSupport(); + lower = min(lower, TPoint(s.first)); + upper = max(upper, TPoint(s.second)); + } - TPoint result(m_Modes[0].s_Prior->marginalLikelihoodMean()); - double distance = (value - result).euclidean(); - for (std::size_t i = 1u; i < m_Modes.size(); ++i) - { - TPoint mean(m_Modes[i].s_Prior->marginalLikelihoodMean()); - double di = (value - mean).euclidean(); - if (di < distance) - { - distance = di; - result = mean; - } - } + return {lower.template toVector(), upper.template toVector()}; + } - return result.template toVector(); + //! Get the mean of the marginal likelihood function. + virtual TDouble10Vec marginalLikelihoodMean() const { + if (m_Modes.size() == 0) { + return TDouble10Vec(N, 0.0); + } + if (m_Modes.size() == 1) { + return m_Modes[0].s_Prior->marginalLikelihoodMean(); } - //! Get the mode of the marginal likelihood function. - virtual TDouble10Vec marginalLikelihoodMode(const TWeightStyleVec &weightStyles, - const TDouble10Vec4Vec &weight) const - { - if (m_Modes.size() == 0) - { - return TDouble10Vec(N, 0.0); - } - if (m_Modes.size() == 1) - { - return m_Modes[0].s_Prior->marginalLikelihoodMode(weightStyles, weight); - } + return this->mean().template toVector(); + } - using TMaxAccumulator = CBasicStatistics::COrderStatisticsStack >; + //! Get the nearest mean of the multimodal prior marginal likelihood, + //! otherwise the marginal likelihood mean. + virtual TDouble10Vec nearestMarginalLikelihoodMean(const TDouble10Vec& value_) const { + if (m_Modes.empty()) { + return TDouble10Vec(N, 0.0); + } + if (m_Modes.size() == 1) { + return m_Modes[0].s_Prior->marginalLikelihoodMean(); + } - // We'll approximate this as the mode with the maximum likelihood. - TPoint result(0.0); + TPoint value(value_); - TPoint seasonalScale(1.0); - TDouble10Vec4Vec1Vec weight_(1, TDouble10Vec4Vec(1)); - try - { - seasonalScale = sqrt(TPoint(maths_t::seasonalVarianceScale(N, weightStyles, weight))); - weight_[0][0] = maths_t::countVarianceScale(N, weightStyles, weight); - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to get variance scale " << e.what()); + TPoint result(m_Modes[0].s_Prior->marginalLikelihoodMean()); + double distance = (value - result).euclidean(); + for (std::size_t i = 1u; i < m_Modes.size(); ++i) { + TPoint mean(m_Modes[i].s_Prior->marginalLikelihoodMean()); + double di = (value - mean).euclidean(); + if (di < distance) { + distance = di; + result = mean; } + } - // Declared outside the loop to minimize number of times it is created. - TDouble10Vec1Vec mode(1); - - TMaxAccumulator modeLikelihood; - for (const auto &mode_ : m_Modes) - { - double w = mode_.weight(); - const TPriorPtr &prior = mode_.s_Prior; - mode[0] = prior->marginalLikelihoodMode(TWeights::COUNT_VARIANCE, weight_[0]); - double likelihood; - if (prior->jointLogMarginalLikelihood(TWeights::COUNT_VARIANCE, - mode, - weight_, - likelihood) & maths_t::E_FpAllErrors) - { - continue; - } - if (modeLikelihood.add(std::log(w) + likelihood)) - { - result = TPoint(mode[0]); - } - } + return result.template toVector(); + } - TPoint mean = this->mean(); - result = mean + seasonalScale * (result - mean); - return result.template toVector(); + //! Get the mode of the marginal likelihood function. + virtual TDouble10Vec marginalLikelihoodMode(const TWeightStyleVec& weightStyles, const TDouble10Vec4Vec& weight) const { + if (m_Modes.size() == 0) { + return TDouble10Vec(N, 0.0); } - - //! Get the local maxima of the marginal likelihood functions. - TDouble10Vec1Vec marginalLikelihoodModes(const TWeightStyleVec &weightStyles, - const TDouble10Vec4Vec &weights) const - { - TDouble10Vec1Vec result; - result.reserve(m_Modes.size()); - for (const auto &mode : m_Modes) - { - result.push_back(mode.s_Prior->marginalLikelihoodMode(weightStyles, weights)); - } - return result; + if (m_Modes.size() == 1) { + return m_Modes[0].s_Prior->marginalLikelihoodMode(weightStyles, weight); } - //! Get the covariance matrix for the marginal likelihood. - virtual TDouble10Vec10Vec marginalLikelihoodCovariance() const - { - if (m_Modes.size() == 0) - { - return TPoint::largest().diagonal().template toVectors(); + using TMaxAccumulator = CBasicStatistics::COrderStatisticsStack>; + + // We'll approximate this as the mode with the maximum likelihood. + TPoint result(0.0); + + TPoint seasonalScale(1.0); + TDouble10Vec4Vec1Vec weight_(1, TDouble10Vec4Vec(1)); + try { + seasonalScale = sqrt(TPoint(maths_t::seasonalVarianceScale(N, weightStyles, weight))); + weight_[0][0] = maths_t::countVarianceScale(N, weightStyles, weight); + } catch (const std::exception& e) { LOG_ERROR("Failed to get variance scale " << e.what()); } + + // Declared outside the loop to minimize number of times it is created. + TDouble10Vec1Vec mode(1); + + TMaxAccumulator modeLikelihood; + for (const auto& mode_ : m_Modes) { + double w = mode_.weight(); + const TPriorPtr& prior = mode_.s_Prior; + mode[0] = prior->marginalLikelihoodMode(TWeights::COUNT_VARIANCE, weight_[0]); + double likelihood; + if (prior->jointLogMarginalLikelihood(TWeights::COUNT_VARIANCE, mode, weight_, likelihood) & maths_t::E_FpAllErrors) { + continue; } - if (m_Modes.size() == 1) - { - return m_Modes[0].s_Prior->marginalLikelihoodCovariance(); + if (modeLikelihood.add(std::log(w) + likelihood)) { + result = TPoint(mode[0]); } - return this->covarianceMatrix().template toVectors(); } - //! Get the diagonal of the covariance matrix for the marginal likelihood. - virtual TDouble10Vec marginalLikelihoodVariances() const - { - if (m_Modes.size() == 0) - { - return TPoint::largest().template toVector(); - } - if (m_Modes.size() == 1) - { - return m_Modes[0].s_Prior->marginalLikelihoodVariances(); - } - return this->covarianceMatrix().template diagonal(); + TPoint mean = this->mean(); + result = mean + seasonalScale * (result - mean); + return result.template toVector(); + } + + //! Get the local maxima of the marginal likelihood functions. + TDouble10Vec1Vec marginalLikelihoodModes(const TWeightStyleVec& weightStyles, const TDouble10Vec4Vec& weights) const { + TDouble10Vec1Vec result; + result.reserve(m_Modes.size()); + for (const auto& mode : m_Modes) { + result.push_back(mode.s_Prior->marginalLikelihoodMode(weightStyles, weights)); } + return result; + } - //! Calculate the log marginal likelihood function, integrating over the - //! prior density function. - //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. - //! \param[in] samples A collection of samples of the process. - //! \param[in] weights The weights of each sample in \p samples. - //! \param[out] result Filled in with the joint likelihood of \p samples. - virtual maths_t::EFloatingPointErrorStatus - jointLogMarginalLikelihood(const TWeightStyleVec &weightStyles, - const TDouble10Vec1Vec &samples, - const TDouble10Vec4Vec1Vec &weights, - double &result) const - { - result = 0.0; - - if (samples.empty()) - { - LOG_ERROR("Can't compute likelihood for empty sample set"); - return maths_t::E_FpFailed; - } - if (!this->check(samples, weights)) - { - return maths_t::E_FpFailed; - } + //! Get the covariance matrix for the marginal likelihood. + virtual TDouble10Vec10Vec marginalLikelihoodCovariance() const { + if (m_Modes.size() == 0) { + return TPoint::largest().diagonal().template toVectors(); + } + if (m_Modes.size() == 1) { + return m_Modes[0].s_Prior->marginalLikelihoodCovariance(); + } + return this->covarianceMatrix().template toVectors(); + } - if (this->isNonInformative()) - { - // The non-informative likelihood is improper and effectively - // zero everywhere. We use minus max double because - // log(0) = HUGE_VALUE, which causes problems for Windows. - // Calling code is notified when the calculation overflows - // and should avoid taking the exponential since this will - // underflow and pollute the floating point environment. This - // may cause issues for some library function implementations - // (see fe*exceptflag for more details). - result = boost::numeric::bounds::lowest(); - return maths_t::E_FpOverflowed; - } + //! Get the diagonal of the covariance matrix for the marginal likelihood. + virtual TDouble10Vec marginalLikelihoodVariances() const { + if (m_Modes.size() == 0) { + return TPoint::largest().template toVector(); + } + if (m_Modes.size() == 1) { + return m_Modes[0].s_Prior->marginalLikelihoodVariances(); + } + return this->covarianceMatrix().template diagonal(); + } + + //! Calculate the log marginal likelihood function, integrating over the + //! prior density function. + //! + //! \param[in] weightStyles Controls the interpretation of the weight(s) + //! that are associated with each sample. See maths_t::ESampleWeightStyle + //! for more details. + //! \param[in] samples A collection of samples of the process. + //! \param[in] weights The weights of each sample in \p samples. + //! \param[out] result Filled in with the joint likelihood of \p samples. + virtual maths_t::EFloatingPointErrorStatus jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble10Vec1Vec& samples, + const TDouble10Vec4Vec1Vec& weights, + double& result) const { + result = 0.0; + + if (samples.empty()) { + LOG_ERROR("Can't compute likelihood for empty sample set"); + return maths_t::E_FpFailed; + } + if (!this->check(samples, weights)) { + return maths_t::E_FpFailed; + } - if (m_Modes.size() == 1) - { - // Apply a small penalty to kill off this model if the data are - // single mode. - maths_t::EFloatingPointErrorStatus status = - m_Modes[0].s_Prior->jointLogMarginalLikelihood(weightStyles, samples, weights, result); - result -= 10.0 * this->decayRate(); - return status; - } + if (this->isNonInformative()) { + // The non-informative likelihood is improper and effectively + // zero everywhere. We use minus max double because + // log(0) = HUGE_VALUE, which causes problems for Windows. + // Calling code is notified when the calculation overflows + // and should avoid taking the exponential since this will + // underflow and pollute the floating point environment. This + // may cause issues for some library function implementations + // (see fe*exceptflag for more details). + result = boost::numeric::bounds::lowest(); + return maths_t::E_FpOverflowed; + } - // See CMultimodalPrior::jointLogMarginalLikelihood for discussion. + if (m_Modes.size() == 1) { + // Apply a small penalty to kill off this model if the data are + // single mode. + maths_t::EFloatingPointErrorStatus status = + m_Modes[0].s_Prior->jointLogMarginalLikelihood(weightStyles, samples, weights, result); + result -= 10.0 * this->decayRate(); + return status; + } - namespace detail = multivariate_multimodal_prior_detail; + // See CMultimodalPrior::jointLogMarginalLikelihood for discussion. - // Declared outside the loop to minimize number of times it is created. - TDouble10Vec1Vec sample(1); - detail::TSizeDoublePr3Vec modeLogLikelihoods; - modeLogLikelihoods.reserve(m_Modes.size()); + namespace detail = multivariate_multimodal_prior_detail; - bool hasSeasonalScale = maths_t::hasSeasonalVarianceScale(weightStyles, weights); + // Declared outside the loop to minimize number of times it is created. + TDouble10Vec1Vec sample(1); + detail::TSizeDoublePr3Vec modeLogLikelihoods; + modeLogLikelihoods.reserve(m_Modes.size()); - TPoint mean = hasSeasonalScale ? this->mean() : TPoint(0.0); - TDouble10Vec4Vec1Vec weights_(1, TDouble10Vec4Vec(1, TDouble10Vec(N, 1.0))); - try - { - for (std::size_t i = 0u; i < samples.size(); ++i) - { - double n = this->smallest(maths_t::countForUpdate(N, weightStyles, weights[i])); - TPoint seasonalScale = sqrt(TPoint(maths_t::seasonalVarianceScale(N, weightStyles, weights[i]))); - double logSeasonalScale = 0.0; - for (std::size_t j = 0u; j < seasonalScale.dimension(); ++j) - { - logSeasonalScale += std::log(seasonalScale(j)); - } + bool hasSeasonalScale = maths_t::hasSeasonalVarianceScale(weightStyles, weights); - TPoint x(samples[i]); - if (hasSeasonalScale) - { - x = mean + (x - mean) / seasonalScale; - } - sample[0] = x.template toVector(); - weights_[0][0] = maths_t::countVarianceScale(N, weightStyles, weights[i]); - - double sampleLogLikelihood; - maths_t::EFloatingPointErrorStatus status = - detail::jointLogMarginalLikelihood(m_Modes, - TWeights::COUNT_VARIANCE, sample, weights_, - modeLogLikelihoods, - sampleLogLikelihood); - if (status & maths_t::E_FpOverflowed) - { - result = boost::numeric::bounds::lowest(); - return status; - } - if (status & maths_t::E_FpFailed) - { - return status; - } - result += n * (sampleLogLikelihood - logSeasonalScale); + TPoint mean = hasSeasonalScale ? this->mean() : TPoint(0.0); + TDouble10Vec4Vec1Vec weights_(1, TDouble10Vec4Vec(1, TDouble10Vec(N, 1.0))); + try { + for (std::size_t i = 0u; i < samples.size(); ++i) { + double n = this->smallest(maths_t::countForUpdate(N, weightStyles, weights[i])); + TPoint seasonalScale = sqrt(TPoint(maths_t::seasonalVarianceScale(N, weightStyles, weights[i]))); + double logSeasonalScale = 0.0; + for (std::size_t j = 0u; j < seasonalScale.dimension(); ++j) { + logSeasonalScale += std::log(seasonalScale(j)); } - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to compute likelihood: " << e.what()); - return maths_t::E_FpFailed; - } - LOG_TRACE("Joint log likelihood = " << result); - - maths_t::EFloatingPointErrorStatus status = CMathsFuncs::fpStatus(result); - if (status & maths_t::E_FpFailed) - { - LOG_ERROR("Failed to compute likelihood (" << this->debugWeights() << ")"); - LOG_ERROR("samples = " << core::CContainerPrinter::print(samples)); - LOG_ERROR("weights = " << core::CContainerPrinter::print(weights)); + TPoint x(samples[i]); + if (hasSeasonalScale) { + x = mean + (x - mean) / seasonalScale; + } + sample[0] = x.template toVector(); + weights_[0][0] = maths_t::countVarianceScale(N, weightStyles, weights[i]); + + double sampleLogLikelihood; + maths_t::EFloatingPointErrorStatus status = detail::jointLogMarginalLikelihood( + m_Modes, TWeights::COUNT_VARIANCE, sample, weights_, modeLogLikelihoods, sampleLogLikelihood); + if (status & maths_t::E_FpOverflowed) { + result = boost::numeric::bounds::lowest(); + return status; + } + if (status & maths_t::E_FpFailed) { + return status; + } + result += n * (sampleLogLikelihood - logSeasonalScale); } - return status; + } catch (const std::exception& e) { + LOG_ERROR("Failed to compute likelihood: " << e.what()); + return maths_t::E_FpFailed; } - //! Sample the marginal likelihood function. - //! - //! The marginal likelihood functions are sampled in quantile intervals - //! of the generalized cumulative density function, specifically intervals - //! between contours of constant probability density. - //! - //! The idea is to capture a set of samples that accurately and efficiently - //! represent the information in the prior. Random sampling (although it - //! has nice asymptotic properties) doesn't fulfill the second requirement: - //! typically requiring many more samples than sampling in quantile intervals - //! to capture the same amount of information. - //! - //! This is to allow us to transform one prior distribution into another - //! completely generically and relatively efficiently, by updating the target - //! prior with these samples. As such the prior needs to maintain a count of - //! the number of samples to date so that it isn't over sampled. - //! - //! \param[in] numberSamples The number of samples required. - //! \param[out] samples Filled in with samples from the prior. - //! \note \p numberSamples is truncated to the number of samples received. - virtual void sampleMarginalLikelihood(std::size_t numberSamples, - TDouble10Vec1Vec &samples) const - { - namespace detail = multivariate_multimodal_prior_detail; - - samples.clear(); - - if (numberSamples == 0 || this->numberSamples() == 0.0) - { - return; - } + LOG_TRACE("Joint log likelihood = " << result); - detail::sampleMarginalLikelihood(m_Modes, numberSamples, samples); + maths_t::EFloatingPointErrorStatus status = CMathsFuncs::fpStatus(result); + if (status & maths_t::E_FpFailed) { + LOG_ERROR("Failed to compute likelihood (" << this->debugWeights() << ")"); + LOG_ERROR("samples = " << core::CContainerPrinter::print(samples)); + LOG_ERROR("weights = " << core::CContainerPrinter::print(weights)); } - - //! Check if this is a non-informative prior. - virtual bool isNonInformative() const - { - return m_Modes.empty() || ( m_Modes.size() == 1 - && m_Modes[0].s_Prior->isNonInformative()); + return status; + } + + //! Sample the marginal likelihood function. + //! + //! The marginal likelihood functions are sampled in quantile intervals + //! of the generalized cumulative density function, specifically intervals + //! between contours of constant probability density. + //! + //! The idea is to capture a set of samples that accurately and efficiently + //! represent the information in the prior. Random sampling (although it + //! has nice asymptotic properties) doesn't fulfill the second requirement: + //! typically requiring many more samples than sampling in quantile intervals + //! to capture the same amount of information. + //! + //! This is to allow us to transform one prior distribution into another + //! completely generically and relatively efficiently, by updating the target + //! prior with these samples. As such the prior needs to maintain a count of + //! the number of samples to date so that it isn't over sampled. + //! + //! \param[in] numberSamples The number of samples required. + //! \param[out] samples Filled in with samples from the prior. + //! \note \p numberSamples is truncated to the number of samples received. + virtual void sampleMarginalLikelihood(std::size_t numberSamples, TDouble10Vec1Vec& samples) const { + namespace detail = multivariate_multimodal_prior_detail; + + samples.clear(); + + if (numberSamples == 0 || this->numberSamples() == 0.0) { + return; } - //! Get a human readable description of the prior. - //! - //! \param[in] separator String used to separate priors. - //! \param[in,out] result Filled in with the description. - virtual void print(const std::string &separator, - std::string &result) const - { - namespace detail = multivariate_multimodal_prior_detail; - result += "\n" + separator + " multivariate multimodal"; - if (this->isNonInformative()) - { - result += " non-informative"; - return; - } - detail::print(m_Modes, separator, result); - result += "\n" + separator; + detail::sampleMarginalLikelihood(m_Modes, numberSamples, samples); + } + + //! Check if this is a non-informative prior. + virtual bool isNonInformative() const { return m_Modes.empty() || (m_Modes.size() == 1 && m_Modes[0].s_Prior->isNonInformative()); } + + //! Get a human readable description of the prior. + //! + //! \param[in] separator String used to separate priors. + //! \param[in,out] result Filled in with the description. + virtual void print(const std::string& separator, std::string& result) const { + namespace detail = multivariate_multimodal_prior_detail; + result += "\n" + separator + " multivariate multimodal"; + if (this->isNonInformative()) { + result += " non-informative"; + return; } - - //! Get a checksum for this object. - virtual uint64_t checksum(uint64_t seed = 0) const - { - seed = this->CMultivariatePrior::checksum(seed); - seed = CChecksum::calculate(seed, m_Clusterer); - seed = CChecksum::calculate(seed, m_SeedPrior); - return CChecksum::calculate(seed, m_Modes); + detail::print(m_Modes, separator, result); + result += "\n" + separator; + } + + //! Get a checksum for this object. + virtual uint64_t checksum(uint64_t seed = 0) const { + seed = this->CMultivariatePrior::checksum(seed); + seed = CChecksum::calculate(seed, m_Clusterer); + seed = CChecksum::calculate(seed, m_SeedPrior); + return CChecksum::calculate(seed, m_Modes); + } + + //! Get the memory used by this component + virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { + mem->setName("CMultivariateMultimodalPrior"); + core::CMemoryDebug::dynamicSize("m_Clusterer", m_Clusterer, mem); + core::CMemoryDebug::dynamicSize("m_SeedPrior", m_SeedPrior, mem); + core::CMemoryDebug::dynamicSize("m_Modes", m_Modes, mem); + } + + //! Get the memory used by this component + virtual std::size_t memoryUsage() const { + std::size_t mem = core::CMemory::dynamicSize(m_Clusterer); + mem += core::CMemory::dynamicSize(m_SeedPrior); + mem += core::CMemory::dynamicSize(m_Modes); + return mem; + } + + //! Get the static size of this object - used for virtual hierarchies + virtual std::size_t staticSize() const { return sizeof(*this); } + + //! Get the tag name for this prior. + virtual std::string persistenceTag() const { return MULTIMODAL_TAG + core::CStringUtils::typeToString(N); } + + //! Persist state by passing information to the supplied inserter + virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const { + inserter.insertLevel(CLUSTERER_TAG, boost::bind(CClustererStateSerialiser(), boost::cref(*m_Clusterer), _1)); + inserter.insertLevel(SEED_PRIOR_TAG, boost::bind(CPriorStateSerialiser(), boost::cref(*m_SeedPrior), _1)); + for (std::size_t i = 0u; i < m_Modes.size(); ++i) { + inserter.insertLevel(MODE_TAG, boost::bind(&TMode::acceptPersistInserter, &m_Modes[i], _1)); } - - //! Get the memory used by this component - virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const - { - mem->setName("CMultivariateMultimodalPrior"); - core::CMemoryDebug::dynamicSize("m_Clusterer", m_Clusterer, mem); - core::CMemoryDebug::dynamicSize("m_SeedPrior", m_SeedPrior, mem); - core::CMemoryDebug::dynamicSize("m_Modes", m_Modes, mem); + inserter.insertValue(DECAY_RATE_TAG, this->decayRate(), core::CIEEE754::E_SinglePrecision); + inserter.insertValue(NUMBER_SAMPLES_TAG, this->numberSamples(), core::CIEEE754::E_SinglePrecision); + } + //@} + + //! Get the current number of modes. + std::size_t numberModes() const { return m_Modes.size(); } + + //! Get the expected mean of the marginal likelihood. + TPoint mean() const { + // By linearity we have that: + // Integral{ x * Sum_i{ w(i) * f(x | i) } } + // = Sum_i{ w(i) * Integral{ x * f(x | i) } } + // = Sum_i{ w(i) * mean(i) } + + TMeanAccumulator result; + for (const auto& mode : m_Modes) { + double weight = mode.weight(); + result.add(TPoint(mode.s_Prior->marginalLikelihoodMean()), weight); } + return CBasicStatistics::mean(result); + } - //! Get the memory used by this component - virtual std::size_t memoryUsage() const - { - std::size_t mem = core::CMemory::dynamicSize(m_Clusterer); - mem += core::CMemory::dynamicSize(m_SeedPrior); - mem += core::CMemory::dynamicSize(m_Modes); - return mem; - } +protected: + using TMode = multivariate_multimodal_prior_detail::TMode; + using TModeVec = multivariate_multimodal_prior_detail::TModeVec; - //! Get the static size of this object - used for virtual hierarchies - virtual std::size_t staticSize() const - { - return sizeof(*this); - } +protected: + //! Get the modes. + const TModeVec& modes() const { return m_Modes; } - //! Get the tag name for this prior. - virtual std::string persistenceTag() const - { - return MULTIMODAL_TAG + core::CStringUtils::typeToString(N); - } +private: + //! The callback invoked when a mode is split. + class CModeSplitCallback { + public: + static const std::size_t MODE_SPLIT_NUMBER_SAMPLES; - //! Persist state by passing information to the supplied inserter - virtual void acceptPersistInserter(core::CStatePersistInserter &inserter) const - { - inserter.insertLevel(CLUSTERER_TAG, - boost::bind(CClustererStateSerialiser(), - boost::cref(*m_Clusterer), - _1)); - inserter.insertLevel(SEED_PRIOR_TAG, - boost::bind(CPriorStateSerialiser(), boost::cref(*m_SeedPrior), _1)); - for (std::size_t i = 0u; i < m_Modes.size(); ++i) - { - inserter.insertLevel(MODE_TAG, boost::bind(&TMode::acceptPersistInserter, &m_Modes[i], _1)); - } - inserter.insertValue(DECAY_RATE_TAG, this->decayRate(), core::CIEEE754::E_SinglePrecision); - inserter.insertValue(NUMBER_SAMPLES_TAG, - this->numberSamples(), - core::CIEEE754::E_SinglePrecision); - } - //@} + public: + CModeSplitCallback(CMultivariateMultimodalPrior& prior) : m_Prior(&prior) {} - //! Get the current number of modes. - std::size_t numberModes() const - { - return m_Modes.size(); - } + void operator()(std::size_t sourceIndex, std::size_t leftSplitIndex, std::size_t rightSplitIndex) const { + LOG_TRACE("Splitting mode with index " << sourceIndex); - //! Get the expected mean of the marginal likelihood. - TPoint mean() const - { - // By linearity we have that: - // Integral{ x * Sum_i{ w(i) * f(x | i) } } - // = Sum_i{ w(i) * Integral{ x * f(x | i) } } - // = Sum_i{ w(i) * mean(i) } + TModeVec& modes = m_Prior->m_Modes; - TMeanAccumulator result; - for (const auto &mode : m_Modes) - { - double weight = mode.weight(); - result.add(TPoint(mode.s_Prior->marginalLikelihoodMean()), weight); + // Remove the split mode. + auto mode = std::find_if(modes.begin(), modes.end(), CSetTools::CIndexInSet(sourceIndex)); + double numberSamples = mode != modes.end() ? mode->weight() : 0.0; + modes.erase(mode); + + double pLeft = m_Prior->m_Clusterer->probability(leftSplitIndex); + double pRight = m_Prior->m_Clusterer->probability(rightSplitIndex); + double Z = pLeft + pRight; + if (Z > 0.0) { + pLeft /= Z; + pRight /= Z; } - return CBasicStatistics::mean(result); - } + LOG_TRACE("# samples = " << numberSamples << ", pLeft = " << pLeft << ", pRight = " << pRight); - protected: - using TMode = multivariate_multimodal_prior_detail::TMode; - using TModeVec = multivariate_multimodal_prior_detail::TModeVec; + // Create the child modes. + LOG_TRACE("Creating mode with index " << leftSplitIndex); + modes.emplace_back(leftSplitIndex, m_Prior->m_SeedPrior); + { + TPointVec samples; + if (!m_Prior->m_Clusterer->sample(leftSplitIndex, MODE_SPLIT_NUMBER_SAMPLES, samples)) { + LOG_ERROR("Couldn't find cluster for " << leftSplitIndex); + } + LOG_TRACE("samples = " << core::CContainerPrinter::print(samples)); - protected: - //! Get the modes. - const TModeVec &modes() const - { - return m_Modes; - } + double nl = pLeft * numberSamples; + double ns = std::min(nl, static_cast(N + 2)); + double s = static_cast(samples.size()); + LOG_TRACE("# left = " << nl); - private: - //! The callback invoked when a mode is split. - class CModeSplitCallback - { - public: - static const std::size_t MODE_SPLIT_NUMBER_SAMPLES; - - public: - CModeSplitCallback(CMultivariateMultimodalPrior &prior) : - m_Prior(&prior) - {} - - void operator()(std::size_t sourceIndex, - std::size_t leftSplitIndex, - std::size_t rightSplitIndex) const - { - LOG_TRACE("Splitting mode with index " << sourceIndex); - - TModeVec &modes = m_Prior->m_Modes; - - // Remove the split mode. - auto mode = std::find_if(modes.begin(), modes.end(), - CSetTools::CIndexInSet(sourceIndex)); - double numberSamples = mode != modes.end() ? mode->weight() : 0.0; - modes.erase(mode); - - double pLeft = m_Prior->m_Clusterer->probability(leftSplitIndex); - double pRight = m_Prior->m_Clusterer->probability(rightSplitIndex); - double Z = pLeft + pRight; - if (Z > 0.0) - { - pLeft /= Z; - pRight /= Z; - } - LOG_TRACE("# samples = " << numberSamples - << ", pLeft = " << pLeft - << ", pRight = " << pRight); - - // Create the child modes. - LOG_TRACE("Creating mode with index " << leftSplitIndex); - modes.emplace_back(leftSplitIndex, m_Prior->m_SeedPrior); - { - TPointVec samples; - if (!m_Prior->m_Clusterer->sample(leftSplitIndex, MODE_SPLIT_NUMBER_SAMPLES, samples)) - { - LOG_ERROR("Couldn't find cluster for " << leftSplitIndex); - } - LOG_TRACE("samples = " << core::CContainerPrinter::print(samples)); - - double nl = pLeft * numberSamples; - double ns = std::min(nl, static_cast(N + 2)); - double s = static_cast(samples.size()); - LOG_TRACE("# left = " << nl); - - TDouble10Vec1Vec samples_; - samples_.reserve(samples.size()); - for (const auto &sample : samples) - { - samples_.push_back(sample.template toVector()); - } - TDouble10Vec seedWeight(N, ns / s); - TDouble10Vec4Vec1Vec weights(samples_.size(), TDouble10Vec4Vec(1, seedWeight)); - modes.back().s_Prior->addSamples(TWeights::COUNT, samples_, weights); - double weight = (nl - ns) / s; - if (weight > 0.0) - { - weights.assign(weights.size(), TDouble10Vec4Vec(1, TDouble10Vec(N, weight))); - modes.back().s_Prior->addSamples(TWeights::COUNT, samples_, weights); - LOG_TRACE(modes.back().s_Prior->print()); - } - } - - LOG_TRACE("Creating mode with index " << rightSplitIndex); - modes.emplace_back(rightSplitIndex, m_Prior->m_SeedPrior); - { - TPointVec samples; - if (!m_Prior->m_Clusterer->sample(rightSplitIndex, MODE_SPLIT_NUMBER_SAMPLES, samples)) - { - LOG_ERROR("Couldn't find cluster for " << rightSplitIndex) - } - LOG_TRACE("samples = " << core::CContainerPrinter::print(samples)); - - double nr = pRight * numberSamples; - double ns = std::min(nr, static_cast(N + 2)); - double s = static_cast(samples.size()); - LOG_TRACE("# right = " << nr); - - TDouble10Vec1Vec samples_; - samples_.reserve(samples.size()); - for (const auto &sample : samples) - { - samples_.push_back(sample.template toVector()); - } - TDouble10Vec seedWeight(N, ns / s); - TDouble10Vec4Vec1Vec weights(samples_.size(), TDouble10Vec4Vec(1, seedWeight)); - modes.back().s_Prior->addSamples(TWeights::COUNT, samples_, weights); - double weight = (nr - ns) / s; - if (weight > 0.0) - { - weights.assign(weights.size(), TDouble10Vec4Vec(1, TDouble10Vec(N, weight))); - modes.back().s_Prior->addSamples(TWeights::COUNT, samples_, weights); - LOG_TRACE(modes.back().s_Prior->print()); - } - } + TDouble10Vec1Vec samples_; + samples_.reserve(samples.size()); + for (const auto& sample : samples) { + samples_.push_back(sample.template toVector()); + } + TDouble10Vec seedWeight(N, ns / s); + TDouble10Vec4Vec1Vec weights(samples_.size(), TDouble10Vec4Vec(1, seedWeight)); + modes.back().s_Prior->addSamples(TWeights::COUNT, samples_, weights); + double weight = (nl - ns) / s; + if (weight > 0.0) { + weights.assign(weights.size(), TDouble10Vec4Vec(1, TDouble10Vec(N, weight))); + modes.back().s_Prior->addSamples(TWeights::COUNT, samples_, weights); + LOG_TRACE(modes.back().s_Prior->print()); + } + } - LOG_TRACE(m_Prior->print()); - LOG_TRACE("Split mode"); + LOG_TRACE("Creating mode with index " << rightSplitIndex); + modes.emplace_back(rightSplitIndex, m_Prior->m_SeedPrior); + { + TPointVec samples; + if (!m_Prior->m_Clusterer->sample(rightSplitIndex, MODE_SPLIT_NUMBER_SAMPLES, samples)) { + LOG_ERROR("Couldn't find cluster for " << rightSplitIndex) } + LOG_TRACE("samples = " << core::CContainerPrinter::print(samples)); - private: - CMultivariateMultimodalPrior *m_Prior; - }; - - //! The callback invoked when two modes are merged. - class CModeMergeCallback - { - public: - static const std::size_t MODE_MERGE_NUMBER_SAMPLES; - - public: - CModeMergeCallback(CMultivariateMultimodalPrior &prior) : - m_Prior(&prior) - {} - - void operator()(std::size_t leftMergeIndex, - std::size_t rightMergeIndex, - std::size_t targetIndex) const - { - namespace detail = multivariate_multimodal_prior_detail; - detail::modeMergeCallback(N, m_Prior->m_Modes, m_Prior->m_SeedPrior, - MODE_MERGE_NUMBER_SAMPLES, - leftMergeIndex, rightMergeIndex, targetIndex); + double nr = pRight * numberSamples; + double ns = std::min(nr, static_cast(N + 2)); + double s = static_cast(samples.size()); + LOG_TRACE("# right = " << nr); + + TDouble10Vec1Vec samples_; + samples_.reserve(samples.size()); + for (const auto& sample : samples) { + samples_.push_back(sample.template toVector()); + } + TDouble10Vec seedWeight(N, ns / s); + TDouble10Vec4Vec1Vec weights(samples_.size(), TDouble10Vec4Vec(1, seedWeight)); + modes.back().s_Prior->addSamples(TWeights::COUNT, samples_, weights); + double weight = (nr - ns) / s; + if (weight > 0.0) { + weights.assign(weights.size(), TDouble10Vec4Vec(1, TDouble10Vec(N, weight))); + modes.back().s_Prior->addSamples(TWeights::COUNT, samples_, weights); + LOG_TRACE(modes.back().s_Prior->print()); } + } - private: - CMultivariateMultimodalPrior *m_Prior; - }; + LOG_TRACE(m_Prior->print()); + LOG_TRACE("Split mode"); + } private: - //! \name State tags for model persistence. - //@{ - static const std::string CLUSTERER_TAG; - static const std::string SEED_PRIOR_TAG; - static const std::string MODE_TAG; - static const std::string NUMBER_SAMPLES_TAG; - static const std::string MINIMUM_TAG; - static const std::string MAXIMUM_TAG; - static const std::string DECAY_RATE_TAG; - //@} + CMultivariateMultimodalPrior* m_Prior; + }; - private: - //! Read parameters from \p traverser. - bool acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser) - { - do - { - const std::string &name = traverser.name(); - RESTORE_SETUP_TEARDOWN(DECAY_RATE_TAG, - double decayRate, - core::CStringUtils::stringToType(traverser.value(), decayRate), - this->decayRate(decayRate)) - RESTORE(CLUSTERER_TAG, - traverser.traverseSubLevel(boost::bind(CClustererStateSerialiser(), - boost::cref(params), - boost::ref(m_Clusterer), _1))) - RESTORE(SEED_PRIOR_TAG, - traverser.traverseSubLevel(boost::bind(CPriorStateSerialiser(), - boost::cref(params), - boost::ref(m_SeedPrior), _1))) - RESTORE_SETUP_TEARDOWN(MODE_TAG, - TMode mode, - traverser.traverseSubLevel(boost::bind(&TMode::acceptRestoreTraverser, - &mode, boost::cref(params), _1)), - m_Modes.push_back(mode)) - RESTORE_SETUP_TEARDOWN(NUMBER_SAMPLES_TAG, - double numberSamples, - core::CStringUtils::stringToType(traverser.value(), numberSamples), - this->numberSamples(numberSamples)) - } - while (traverser.next()); + //! The callback invoked when two modes are merged. + class CModeMergeCallback { + public: + static const std::size_t MODE_MERGE_NUMBER_SAMPLES; - if (m_Clusterer) - { - // Register the split and merge callbacks. - m_Clusterer->splitFunc(CModeSplitCallback(*this)); - m_Clusterer->mergeFunc(CModeMergeCallback(*this)); - } + public: + CModeMergeCallback(CMultivariateMultimodalPrior& prior) : m_Prior(&prior) {} - return true; + void operator()(std::size_t leftMergeIndex, std::size_t rightMergeIndex, std::size_t targetIndex) const { + namespace detail = multivariate_multimodal_prior_detail; + detail::modeMergeCallback( + N, m_Prior->m_Modes, m_Prior->m_SeedPrior, MODE_MERGE_NUMBER_SAMPLES, leftMergeIndex, rightMergeIndex, targetIndex); } - //! We should only use this prior when it has multiple modes. - virtual bool participatesInModelSelection() const - { - return m_Modes.size() > 1; + private: + CMultivariateMultimodalPrior* m_Prior; + }; + +private: + //! \name State tags for model persistence. + //@{ + static const std::string CLUSTERER_TAG; + static const std::string SEED_PRIOR_TAG; + static const std::string MODE_TAG; + static const std::string NUMBER_SAMPLES_TAG; + static const std::string MINIMUM_TAG; + static const std::string MAXIMUM_TAG; + static const std::string DECAY_RATE_TAG; + //@} + +private: + //! Read parameters from \p traverser. + bool acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); + RESTORE_SETUP_TEARDOWN(DECAY_RATE_TAG, + double decayRate, + core::CStringUtils::stringToType(traverser.value(), decayRate), + this->decayRate(decayRate)) + RESTORE(CLUSTERER_TAG, + traverser.traverseSubLevel( + boost::bind(CClustererStateSerialiser(), boost::cref(params), boost::ref(m_Clusterer), _1))) + RESTORE( + SEED_PRIOR_TAG, + traverser.traverseSubLevel(boost::bind(CPriorStateSerialiser(), boost::cref(params), boost::ref(m_SeedPrior), _1))) + RESTORE_SETUP_TEARDOWN(MODE_TAG, + TMode mode, + traverser.traverseSubLevel(boost::bind(&TMode::acceptRestoreTraverser, &mode, boost::cref(params), _1)), + m_Modes.push_back(mode)) + RESTORE_SETUP_TEARDOWN(NUMBER_SAMPLES_TAG, + double numberSamples, + core::CStringUtils::stringToType(traverser.value(), numberSamples), + this->numberSamples(numberSamples)) + } while (traverser.next()); + + if (m_Clusterer) { + // Register the split and merge callbacks. + m_Clusterer->splitFunc(CModeSplitCallback(*this)); + m_Clusterer->mergeFunc(CModeMergeCallback(*this)); } - //! Get the number of nuisance parameters in the marginal likelihood. - //! - //! This is just number modes - 1 due to the normalization constraint. - virtual double unmarginalizedParameters() const - { - return std::max(static_cast(m_Modes.size()), 1.0) - 1.0; - } + return true; + } - //! Get the convariance matrix for the marginal likelihood. - TMatrix covarianceMatrix() const - { - // By linearity we have that: - // Integral{ (x - m)' * (x - m) * Sum_i{ w(i) * f(x | i) } } - // = Sum_i{ w(i) * (Integral{ x' * x * f(x | i) } - m' * m) } - // = Sum_i{ w(i) * ((mi' * mi + Ci) - m' * m) } + //! We should only use this prior when it has multiple modes. + virtual bool participatesInModelSelection() const { return m_Modes.size() > 1; } - using TMatrixMeanAccumulator = typename CBasicStatistics::SSampleMean::TAccumulator; + //! Get the number of nuisance parameters in the marginal likelihood. + //! + //! This is just number modes - 1 due to the normalization constraint. + virtual double unmarginalizedParameters() const { return std::max(static_cast(m_Modes.size()), 1.0) - 1.0; } - TMatrix mean2 = TPoint(this->marginalLikelihoodMean()).outer(); + //! Get the convariance matrix for the marginal likelihood. + TMatrix covarianceMatrix() const { + // By linearity we have that: + // Integral{ (x - m)' * (x - m) * Sum_i{ w(i) * f(x | i) } } + // = Sum_i{ w(i) * (Integral{ x' * x * f(x | i) } - m' * m) } + // = Sum_i{ w(i) * ((mi' * mi + Ci) - m' * m) } - TMatrixMeanAccumulator result; - for (const auto &mode : m_Modes) - { - double weight = mode.weight(); - TPoint modeMean(mode.s_Prior->marginalLikelihoodMean()); - TMatrix modeVariance(mode.s_Prior->marginalLikelihoodCovariance()); - result.add(modeMean.outer() - mean2 + modeVariance, weight); - } + using TMatrixMeanAccumulator = typename CBasicStatistics::SSampleMean::TAccumulator; - return CBasicStatistics::mean(result); - } + TMatrix mean2 = TPoint(this->marginalLikelihoodMean()).outer(); - //! Full debug dump of the mode weights. - std::string debugWeights() const - { - return multivariate_multimodal_prior_detail::debugWeights(m_Modes); + TMatrixMeanAccumulator result; + for (const auto& mode : m_Modes) { + double weight = mode.weight(); + TPoint modeMean(mode.s_Prior->marginalLikelihoodMean()); + TMatrix modeVariance(mode.s_Prior->marginalLikelihoodCovariance()); + result.add(modeMean.outer() - mean2 + modeVariance, weight); } - private: - //! The object which partitions the data into clusters. - TClustererPtr m_Clusterer; + return CBasicStatistics::mean(result); + } - //! The object used to initialize new cluster priors. - TPriorPtr m_SeedPrior; + //! Full debug dump of the mode weights. + std::string debugWeights() const { return multivariate_multimodal_prior_detail::debugWeights(m_Modes); } - //! The modes of the distribution. - TModeVec m_Modes; +private: + //! The object which partitions the data into clusters. + TClustererPtr m_Clusterer; + + //! The object used to initialize new cluster priors. + TPriorPtr m_SeedPrior; + + //! The modes of the distribution. + TModeVec m_Modes; }; template @@ -1293,7 +1081,6 @@ template const std::size_t CMultivariateMultimodalPrior::CModeSplitCallback::MODE_SPLIT_NUMBER_SAMPLES(50 * N); template const std::size_t CMultivariateMultimodalPrior::CModeMergeCallback::MODE_MERGE_NUMBER_SAMPLES(25 * N); - } } diff --git a/include/maths/CMultivariateMultimodalPriorFactory.h b/include/maths/CMultivariateMultimodalPriorFactory.h index 8a878c2b0d..0629b7ff9b 100644 --- a/include/maths/CMultivariateMultimodalPriorFactory.h +++ b/include/maths/CMultivariateMultimodalPriorFactory.h @@ -14,42 +14,35 @@ #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStateRestoreTraverser; } -namespace maths -{ +namespace maths { class CMultivariatePrior; struct SDistributionRestoreParams; //! \brief Factory for multivariate multimodal priors. -class MATHS_EXPORT CMultivariateMultimodalPriorFactory -{ - public: - using TPriorPtr = boost::shared_ptr; - - public: - //! Create a new non-informative multivariate normal prior. - static TPriorPtr nonInformative(std::size_t dimension, - maths_t::EDataType dataType, - double decayRate, - maths_t::EClusterWeightCalc weightCalc, - double minimumClusterFraction, - double minimumClusterCount, - double minimumCategoryCount, - const CMultivariatePrior &seedPrior); - - //! Create reading state from its state document representation. - static bool restore(std::size_t dimension, - const SDistributionRestoreParams ¶ms, - TPriorPtr &ptr, - core::CStateRestoreTraverser &traverser); +class MATHS_EXPORT CMultivariateMultimodalPriorFactory { +public: + using TPriorPtr = boost::shared_ptr; + +public: + //! Create a new non-informative multivariate normal prior. + static TPriorPtr nonInformative(std::size_t dimension, + maths_t::EDataType dataType, + double decayRate, + maths_t::EClusterWeightCalc weightCalc, + double minimumClusterFraction, + double minimumClusterCount, + double minimumCategoryCount, + const CMultivariatePrior& seedPrior); + + //! Create reading state from its state document representation. + static bool + restore(std::size_t dimension, const SDistributionRestoreParams& params, TPriorPtr& ptr, core::CStateRestoreTraverser& traverser); }; - } } diff --git a/include/maths/CMultivariateNormalConjugate.h b/include/maths/CMultivariateNormalConjugate.h index 857b692b3a..bde22a4c24 100644 --- a/include/maths/CMultivariateNormalConjugate.h +++ b/include/maths/CMultivariateNormalConjugate.h @@ -9,15 +9,14 @@ #include #include -#include #include #include #include +#include #include #include #include -#include #include #include #include @@ -27,6 +26,7 @@ #include #include #include +#include #include #include @@ -40,10 +40,8 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { //! \brief A conjugate prior distribution for a stationary multivariate Normal //! process. @@ -76,1218 +74,1004 @@ namespace maths //! for the data when using one-of-n composition (see CMultivariateOneOfNPrior). //! From a design point of view this is the composite pattern. template -class CMultivariateNormalConjugate : public CMultivariatePrior -{ - public: - //! See core::CMemory. - static bool dynamicSizeAlwaysZero() { return true; } - - using TDoubleVec = std::vector; - using TPoint = CVectorNx1; - using TPointVec = std::vector; - using TPoint4Vec = core::CSmallVector; - using TMatrix = CSymmetricMatrixNxN; - using TMatrixVec = std::vector; - using TCovariance = typename CBasicStatistics::SSampleCovariances; - - // Lift all overloads of into scope. - //{ - using CMultivariatePrior::addSamples; - using CMultivariatePrior::print; - //} - - private: - using TDenseVector = typename SDenseVector::Type; - using TDenseMatrix = typename SDenseMatrix::Type; - - public: - //! \name Life-cycle - //@{ - //! \param[in] dataType The type of data being modeled (see maths_t::EDataType - //! for details). - //! \param[in] gaussianMean The mean of the normal component of the prior. - //! \param[in] gaussianPrecision The precision of the normal component of - //! the prior. - //! \param[in] wishartDegreesFreedom The degrees freedom of Wishart component - //! of the prior. - //! \param[in] wishartScaleMatrix The scale matrix of Wishart component - //! of the prior. - //! \param[in] decayRate The rate at which to revert to non-informative. - CMultivariateNormalConjugate(maths_t::EDataType dataType, - const TPoint &gaussianMean, - const TPoint &gaussianPrecision, - double wishartDegreesFreedom, - const TMatrix &wishartScaleMatrix, - double decayRate = 0.0) : - CMultivariatePrior(dataType, decayRate), - m_GaussianMean(gaussianMean), - m_GaussianPrecision(gaussianPrecision), - m_WishartDegreesFreedom(wishartDegreesFreedom), - m_WishartScaleMatrix(wishartScaleMatrix) - {} - - //! Construct from sample central moments. - CMultivariateNormalConjugate(maths_t::EDataType dataType, - const TCovariance &covariance, - double decayRate = 0.0) : - CMultivariatePrior(dataType, decayRate), - m_GaussianMean(CBasicStatistics::mean(covariance)), - m_GaussianPrecision(covariance.s_Count), - m_WishartDegreesFreedom(this->smallest(covariance.s_Count.template toVector())), - m_WishartScaleMatrix(covariance.s_Count * covariance.s_Covariances) - { - this->numberSamples(CBasicStatistics::count(covariance)); - } - - //! Construct from part of a state document. - CMultivariateNormalConjugate(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser) : - CMultivariatePrior(params.s_DataType, params.s_DecayRate) - { - traverser.traverseSubLevel(boost::bind(&CMultivariateNormalConjugate::acceptRestoreTraverser, this, _1)); - } - - virtual ~CMultivariateNormalConjugate() {} - - // Default copy constructor and assignment operator work. - - //! Create an instance of a non-informative prior. - //! - //! \param[in] dataType The type of data being modeled (see maths_t::EDataType - //! for details). - //! \param[in] decayRate The rate at which to revert to the non-informative prior. - //! \return A non-informative prior. - static CMultivariateNormalConjugate nonInformativePrior(maths_t::EDataType dataType, - double decayRate = 0.0) - { - return CMultivariateNormalConjugate(dataType, - NON_INFORMATIVE_MEAN, - TPoint(NON_INFORMATIVE_PRECISION), - NON_INFORMATIVE_DEGREES_FREEDOM, - NON_INFORMATIVE_SCALE, - decayRate); +class CMultivariateNormalConjugate : public CMultivariatePrior { +public: + //! See core::CMemory. + static bool dynamicSizeAlwaysZero() { return true; } + + using TDoubleVec = std::vector; + using TPoint = CVectorNx1; + using TPointVec = std::vector; + using TPoint4Vec = core::CSmallVector; + using TMatrix = CSymmetricMatrixNxN; + using TMatrixVec = std::vector; + using TCovariance = typename CBasicStatistics::SSampleCovariances; + + // Lift all overloads of into scope. + //{ + using CMultivariatePrior::addSamples; + using CMultivariatePrior::print; + //} + +private: + using TDenseVector = typename SDenseVector::Type; + using TDenseMatrix = typename SDenseMatrix::Type; + +public: + //! \name Life-cycle + //@{ + //! \param[in] dataType The type of data being modeled (see maths_t::EDataType + //! for details). + //! \param[in] gaussianMean The mean of the normal component of the prior. + //! \param[in] gaussianPrecision The precision of the normal component of + //! the prior. + //! \param[in] wishartDegreesFreedom The degrees freedom of Wishart component + //! of the prior. + //! \param[in] wishartScaleMatrix The scale matrix of Wishart component + //! of the prior. + //! \param[in] decayRate The rate at which to revert to non-informative. + CMultivariateNormalConjugate(maths_t::EDataType dataType, + const TPoint& gaussianMean, + const TPoint& gaussianPrecision, + double wishartDegreesFreedom, + const TMatrix& wishartScaleMatrix, + double decayRate = 0.0) + : CMultivariatePrior(dataType, decayRate), + m_GaussianMean(gaussianMean), + m_GaussianPrecision(gaussianPrecision), + m_WishartDegreesFreedom(wishartDegreesFreedom), + m_WishartScaleMatrix(wishartScaleMatrix) {} + + //! Construct from sample central moments. + CMultivariateNormalConjugate(maths_t::EDataType dataType, const TCovariance& covariance, double decayRate = 0.0) + : CMultivariatePrior(dataType, decayRate), + m_GaussianMean(CBasicStatistics::mean(covariance)), + m_GaussianPrecision(covariance.s_Count), + m_WishartDegreesFreedom(this->smallest(covariance.s_Count.template toVector())), + m_WishartScaleMatrix(covariance.s_Count * covariance.s_Covariances) { + this->numberSamples(CBasicStatistics::count(covariance)); + } + + //! Construct from part of a state document. + CMultivariateNormalConjugate(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) + : CMultivariatePrior(params.s_DataType, params.s_DecayRate) { + traverser.traverseSubLevel(boost::bind(&CMultivariateNormalConjugate::acceptRestoreTraverser, this, _1)); + } + + virtual ~CMultivariateNormalConjugate() {} + + // Default copy constructor and assignment operator work. + + //! Create an instance of a non-informative prior. + //! + //! \param[in] dataType The type of data being modeled (see maths_t::EDataType + //! for details). + //! \param[in] decayRate The rate at which to revert to the non-informative prior. + //! \return A non-informative prior. + static CMultivariateNormalConjugate nonInformativePrior(maths_t::EDataType dataType, double decayRate = 0.0) { + return CMultivariateNormalConjugate(dataType, + NON_INFORMATIVE_MEAN, + TPoint(NON_INFORMATIVE_PRECISION), + NON_INFORMATIVE_DEGREES_FREEDOM, + NON_INFORMATIVE_SCALE, + decayRate); + } + //@} + + //! \name Prior Contract + //@{ + //! Create a copy of the prior. + //! + //! \warning Caller owns returned object. + virtual CMultivariateNormalConjugate* clone() const { return new CMultivariateNormalConjugate(*this); } + + //! Get the dimension of the prior. + std::size_t dimension() const { return N; } + + //! Reset the prior to non-informative. + virtual void setToNonInformative(double /*offset = 0.0*/, double decayRate = 0.0) { + *this = nonInformativePrior(this->dataType(), decayRate); + } + + //! No-op. + virtual void + adjustOffset(const TWeightStyleVec& /*weightStyles*/, const TDouble10Vec1Vec& /*samples*/, const TDouble10Vec4Vec1Vec& /*weights*/) {} + + //! Update the prior with a collection of independent samples from the + //! process. + //! + //! \param[in] weightStyles Controls the interpretation of the weight(s) + //! that are associated with each sample. See maths_t::ESampleWeightStyle + //! for more details. + //! \param[in] samples A collection of samples of the process. + //! \param[in] weights The weights of each sample in \p samples. + virtual void addSamples(const TWeightStyleVec& weightStyles, const TDouble10Vec1Vec& samples, const TDouble10Vec4Vec1Vec& weights) { + if (samples.empty()) { + return; } - //@} - - //! \name Prior Contract - //@{ - //! Create a copy of the prior. - //! - //! \warning Caller owns returned object. - virtual CMultivariateNormalConjugate *clone() const - { - return new CMultivariateNormalConjugate(*this); + if (!this->check(samples, weights)) { + return; } - //! Get the dimension of the prior. - std::size_t dimension() const - { - return N; + this->CMultivariatePrior::addSamples(weightStyles, samples, weights); + + // Note that if either count weight or Winsorisation weights are supplied + // the weight of the sample x(i) is interpreted as its count, so for example + // updating with {(x, 2)} is equivalent to updating with {x, x}. + // + // If the data are discrete then we approximate the discrete distribution + // by saying it is uniform on the intervals [n,n+1] for each integral n. + // This is like saying that the data are samples from: + // X' = X + Z + // + // where, + // {[Z]_i} are IID uniform in the interval [0,1]. + // + // We care about the limiting behaviour of the filter, i.e. as the number + // of samples n -> inf. In this case, the law of large numbers give that: + // mean(x(i) + z(i)) + // -> 1/n * Sum_i( x(i) ) + E[Z] + // + // and + // cov(x(i) + z(i)) + // = Sum_i( (x(i) + z(i) - 1/n * Sum_i( x(i) + z(i) ))' + // (x(i) + z(i) - 1/n * Sum_i( x(i) + z(i) )) ) + // -> Sum_i( (x(i) - 1/n * Sum_j( x(j) ) + z(i) - E[Z])' + // (x(i) - 1/n * Sum_j( x(j) ) + z(i) - E[Z]) ) + // -> cov(x(i)) + n * E[(Z - E[Z])'(Z - E[Z])] + // + // Since Z is uniform on the interval [0,1] + // E[Z] = 1/2 1 + // E[(Z - E[Z])^2] = 1/12 I + + TPoint numberSamples(0.0); + TCovariance covariancePost; + try { + for (std::size_t i = 0u; i < samples.size(); ++i) { + TPoint x(samples[i]); + TPoint n(maths_t::countForUpdate(N, weightStyles, weights[i])); + TPoint varianceScale = TPoint(maths_t::seasonalVarianceScale(N, weightStyles, weights[i])) * + TPoint(maths_t::countVarianceScale(N, weightStyles, weights[i])); + numberSamples += n; + covariancePost.add(x, n / varianceScale); + } + } catch (const std::exception& e) { + LOG_ERROR("Failed to update likelihood: " << e.what()); + return; } + TPoint scaledNumberSamples = covariancePost.s_Count; - //! Reset the prior to non-informative. - virtual void setToNonInformative(double /*offset = 0.0*/, double decayRate = 0.0) - { - *this = nonInformativePrior(this->dataType(), decayRate); + if (this->isInteger()) { + covariancePost.s_Mean += TPoint(0.5); + covariancePost.s_Covariances += TPoint(1.0 / 12.0).diagonal(); } - //! No-op. - virtual void adjustOffset(const TWeightStyleVec &/*weightStyles*/, - const TDouble10Vec1Vec &/*samples*/, - const TDouble10Vec4Vec1Vec &/*weights*/) - { + if (m_WishartDegreesFreedom > 0.0) { + TPoint scale = TPoint(1.0) / m_GaussianPrecision; + TMatrix covariances = m_WishartScaleMatrix; + scaleCovariances(scale, covariances); + TCovariance covariancePrior = CBasicStatistics::accumulator(m_GaussianPrecision, m_GaussianMean, covariances); + covariancePost += covariancePrior; } - - //! Update the prior with a collection of independent samples from the - //! process. - //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. - //! \param[in] samples A collection of samples of the process. - //! \param[in] weights The weights of each sample in \p samples. - virtual void addSamples(const TWeightStyleVec &weightStyles, - const TDouble10Vec1Vec &samples, - const TDouble10Vec4Vec1Vec &weights) - { - if (samples.empty()) - { - return; - } - if (!this->check(samples, weights)) - { - return; - } - - this->CMultivariatePrior::addSamples(weightStyles, samples, weights); - - // Note that if either count weight or Winsorisation weights are supplied - // the weight of the sample x(i) is interpreted as its count, so for example - // updating with {(x, 2)} is equivalent to updating with {x, x}. - // - // If the data are discrete then we approximate the discrete distribution - // by saying it is uniform on the intervals [n,n+1] for each integral n. - // This is like saying that the data are samples from: - // X' = X + Z - // - // where, - // {[Z]_i} are IID uniform in the interval [0,1]. - // - // We care about the limiting behaviour of the filter, i.e. as the number - // of samples n -> inf. In this case, the law of large numbers give that: - // mean(x(i) + z(i)) - // -> 1/n * Sum_i( x(i) ) + E[Z] - // - // and - // cov(x(i) + z(i)) - // = Sum_i( (x(i) + z(i) - 1/n * Sum_i( x(i) + z(i) ))' - // (x(i) + z(i) - 1/n * Sum_i( x(i) + z(i) )) ) - // -> Sum_i( (x(i) - 1/n * Sum_j( x(j) ) + z(i) - E[Z])' - // (x(i) - 1/n * Sum_j( x(j) ) + z(i) - E[Z]) ) - // -> cov(x(i)) + n * E[(Z - E[Z])'(Z - E[Z])] - // - // Since Z is uniform on the interval [0,1] - // E[Z] = 1/2 1 - // E[(Z - E[Z])^2] = 1/12 I - - TPoint numberSamples(0.0); - TCovariance covariancePost; - try - { - for (std::size_t i = 0u; i < samples.size(); ++i) - { - TPoint x(samples[i]); - TPoint n(maths_t::countForUpdate(N, weightStyles, weights[i])); - TPoint varianceScale = TPoint(maths_t::seasonalVarianceScale(N, weightStyles, weights[i])) - * TPoint(maths_t::countVarianceScale(N, weightStyles, weights[i])); - numberSamples += n; - covariancePost.add(x, n / varianceScale); - } - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to update likelihood: " << e.what()); - return; - } - TPoint scaledNumberSamples = covariancePost.s_Count; - - if (this->isInteger()) - { - covariancePost.s_Mean += TPoint(0.5); - covariancePost.s_Covariances += TPoint(1.0 / 12.0).diagonal(); - } - - if (m_WishartDegreesFreedom > 0.0) - { - TPoint scale = TPoint(1.0) / m_GaussianPrecision; - TMatrix covariances = m_WishartScaleMatrix; - scaleCovariances(scale, covariances); - TCovariance covariancePrior = CBasicStatistics::accumulator(m_GaussianPrecision, - m_GaussianMean, - covariances); - covariancePost += covariancePrior; - } - m_GaussianMean = CBasicStatistics::mean(covariancePost); - m_GaussianPrecision += scaledNumberSamples; - m_WishartDegreesFreedom += this->smallest(numberSamples.template toVector()); - m_WishartScaleMatrix = covariancePost.s_Covariances; - scaleCovariances(covariancePost.s_Count, m_WishartScaleMatrix); - - // If the coefficient of variation of the data is too small we run - // in to numerical problems. We truncate the variation by modeling - // the impact of an actual variation (standard deviation divided by - // mean) in the data of size MINIMUM_COEFFICIENT_OF_VARATION on the - // prior parameters. - - if (!this->isNonInformative()) - { - double truncatedMean = max(m_GaussianMean, TPoint(1e-8)).euclidean(); - double minimumDeviation = truncatedMean - * MINIMUM_COEFFICIENT_OF_VARIATION; - double minimumDiagonal = m_WishartDegreesFreedom - * minimumDeviation * minimumDeviation; - for (std::size_t i = 0u; i < N; ++i) - { - m_WishartScaleMatrix(i, i) = std::max(m_WishartScaleMatrix(i, i), minimumDiagonal); - } - } - - LOG_TRACE("numberSamples = " << numberSamples - << ", scaledNumberSamples = " << scaledNumberSamples - << ", m_WishartDegreesFreedom = " << m_WishartDegreesFreedom - << ", m_WishartScaleMatrix = " << m_WishartScaleMatrix - << ", m_GaussianMean = " << m_GaussianMean - << ", m_GaussianPrecision = " << m_GaussianPrecision); - - if (this->isBad()) - { - LOG_ERROR("Update failed (" << this->debug() << ")" - << ", samples = " << core::CContainerPrinter::print(samples) - << ", weights = " << core::CContainerPrinter::print(weights)); - this->setToNonInformative(this->offsetMargin(), this->decayRate()); + m_GaussianMean = CBasicStatistics::mean(covariancePost); + m_GaussianPrecision += scaledNumberSamples; + m_WishartDegreesFreedom += this->smallest(numberSamples.template toVector()); + m_WishartScaleMatrix = covariancePost.s_Covariances; + scaleCovariances(covariancePost.s_Count, m_WishartScaleMatrix); + + // If the coefficient of variation of the data is too small we run + // in to numerical problems. We truncate the variation by modeling + // the impact of an actual variation (standard deviation divided by + // mean) in the data of size MINIMUM_COEFFICIENT_OF_VARATION on the + // prior parameters. + + if (!this->isNonInformative()) { + double truncatedMean = max(m_GaussianMean, TPoint(1e-8)).euclidean(); + double minimumDeviation = truncatedMean * MINIMUM_COEFFICIENT_OF_VARIATION; + double minimumDiagonal = m_WishartDegreesFreedom * minimumDeviation * minimumDeviation; + for (std::size_t i = 0u; i < N; ++i) { + m_WishartScaleMatrix(i, i) = std::max(m_WishartScaleMatrix(i, i), minimumDiagonal); } } - //! Update the prior for the specified elapsed time. - virtual void propagateForwardsByTime(double time) - { - if (!CMathsFuncs::isFinite(time) || time < 0.0) - { - LOG_ERROR("Bad propagation time " << time); - return; - } - - if (this->isNonInformative()) - { - // Nothing to be done. - return; - } + LOG_TRACE("numberSamples = " << numberSamples << ", scaledNumberSamples = " << scaledNumberSamples << ", m_WishartDegreesFreedom = " + << m_WishartDegreesFreedom << ", m_WishartScaleMatrix = " << m_WishartScaleMatrix + << ", m_GaussianMean = " << m_GaussianMean << ", m_GaussianPrecision = " << m_GaussianPrecision); - double alpha = std::exp(-this->scaledDecayRate() * time); - - m_GaussianPrecision = alpha * m_GaussianPrecision - + (1.0 - alpha) * TPoint(NON_INFORMATIVE_PRECISION); - - // The mean of the Wishart distribution is n V and the variance - // is [V]_ij = n ( V_ij^2 + V_ii * V_jj), note V is the inverse - // of teh scale matrix. We want to increase the variance while - // holding its mean constant s.t. in the limit t -> inf var -> inf. - // Choosing a factor f in the range [0, 1] and the update is as - // follows: - // n -> f * n - // V -> f * V - // - // Thus the mean is unchanged and variance is increased by 1 / f. - - double factor = std::min( (alpha * m_WishartDegreesFreedom - + (1.0 - alpha) * NON_INFORMATIVE_DEGREES_FREEDOM) - / m_WishartDegreesFreedom, 1.0); - - m_WishartDegreesFreedom *= factor; - m_WishartScaleMatrix *= factor; - - this->numberSamples(this->numberSamples() * alpha); - - LOG_TRACE("time = " << time - << ", alpha = " << alpha - << ", m_WishartDegreesFreedom = " << m_WishartDegreesFreedom - << ", m_WishartScaleMatrix = " << m_WishartScaleMatrix - << ", m_GaussianMean = " << m_GaussianMean - << ", m_GaussianPrecision = " << m_GaussianPrecision - << ", numberSamples = " << this->numberSamples()); + if (this->isBad()) { + LOG_ERROR("Update failed (" << this->debug() << ")" + << ", samples = " << core::CContainerPrinter::print(samples) + << ", weights = " << core::CContainerPrinter::print(weights)); + this->setToNonInformative(this->offsetMargin(), this->decayRate()); } + } - //! Compute the univariate prior marginalizing over the variables - //! \p marginalize and conditioning on the variables \p condition. - //! - //! \param[in] marginalize The variables to marginalize out. - //! \param[in] condition The variables to condition on. - //! \warning The caller owns the result. - //! \note The variables are passed by the index of their dimension - //! which must therefore be in range. - //! \note The caller must specify dimension - 1 variables between - //! \p marginalize and \p condition so the resulting distribution - //! is univariate. - virtual TUnivariatePriorPtrDoublePr univariate(const TSize10Vec &marginalize, - const TSizeDoublePr10Vec &condition) const - { - if (!this->check(marginalize, condition)) - { - return TUnivariatePriorPtrDoublePr(); - } - - TSize10Vec i1; - this->remainingVariables(marginalize, condition, i1); - if (i1.size() != 1) - { - LOG_ERROR("Invalid variables for computing univariate distribution: " - << "marginalize '" << core::CContainerPrinter::print(marginalize) << "'" - << ", condition '" << core::CContainerPrinter::print(condition) << "'"); - return TUnivariatePriorPtrDoublePr(); - } - - maths_t::EDataType dataType = this->dataType(); - double decayRate = this->decayRate(); - if (this->isNonInformative()) - { - return {TUnivariatePriorPtr(CNormalMeanPrecConjugate::nonInformativePrior(dataType, decayRate).clone()), 0.0}; - } - - double p = m_GaussianPrecision(i1[0]); - double s = m_WishartDegreesFreedom / 2.0; - double v = m_WishartDegreesFreedom - static_cast(N) - 1.0; - const TPoint &m = this->mean(); - TMatrix c = m_WishartScaleMatrix / v; - - double m1 = m(i1[0]); - double c11 = c(i1[0], i1[0]); - if (condition.empty()) - { - return {TUnivariatePriorPtr(new CNormalMeanPrecConjugate(dataType, m1, p, s, c11 * v / 2.0, decayRate)), 0.0}; - } + //! Update the prior for the specified elapsed time. + virtual void propagateForwardsByTime(double time) { + if (!CMathsFuncs::isFinite(time) || time < 0.0) { + LOG_ERROR("Bad propagation time " << time); + return; + } - TSize10Vec condition_; - condition_.reserve(condition.size() + 1); - CDenseVector xc(condition.size()); - this->unpack(condition, condition_, xc); - - try - { - std::size_t n = condition_.size(); - CDenseVector m2 = projectedVector(condition_, m); - condition_.push_back(i1[0]); - CDenseMatrix cp = projectedMatrix(condition_, c); - CDenseVector c12 = cp.topRightCorner(n, 1); - Eigen::JacobiSVD> c22(cp.topLeftCorner(n, n), - Eigen::ComputeThinU | Eigen::ComputeThinV); - LOG_TRACE("c22 = " << cp.topLeftCorner(n, n) - << ", c12 = " << c12 - << ", a = " << xc - << ", m2 = " << m2); - - CDenseVector c22SolvexcMinusm2 = c22.solve(xc - m2); - - double mean = m1 + c12.transpose() * c22SolvexcMinusm2; - double variance = std::max(c11 - c12.transpose() * c22.solve(c12), - MINIMUM_COEFFICIENT_OF_VARIATION * std::fabs(mean)); - double weight = 0.5 * (std::log(variance) - (xc - m2).transpose() * c22SolvexcMinusm2); - LOG_TRACE("mean = " << mean - << ", variance = " << variance - << ", weight = " << weight); - - return {TUnivariatePriorPtr( - new CNormalMeanPrecConjugate(dataType, mean, p, s, variance * v / 2.0, decayRate)), weight}; - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to get univariate prior: " << e.what()); - } + if (this->isNonInformative()) { + // Nothing to be done. + return; + } + double alpha = std::exp(-this->scaledDecayRate() * time); + + m_GaussianPrecision = alpha * m_GaussianPrecision + (1.0 - alpha) * TPoint(NON_INFORMATIVE_PRECISION); + + // The mean of the Wishart distribution is n V and the variance + // is [V]_ij = n ( V_ij^2 + V_ii * V_jj), note V is the inverse + // of teh scale matrix. We want to increase the variance while + // holding its mean constant s.t. in the limit t -> inf var -> inf. + // Choosing a factor f in the range [0, 1] and the update is as + // follows: + // n -> f * n + // V -> f * V + // + // Thus the mean is unchanged and variance is increased by 1 / f. + + double factor = + std::min((alpha * m_WishartDegreesFreedom + (1.0 - alpha) * NON_INFORMATIVE_DEGREES_FREEDOM) / m_WishartDegreesFreedom, 1.0); + + m_WishartDegreesFreedom *= factor; + m_WishartScaleMatrix *= factor; + + this->numberSamples(this->numberSamples() * alpha); + + LOG_TRACE("time = " << time << ", alpha = " << alpha << ", m_WishartDegreesFreedom = " << m_WishartDegreesFreedom + << ", m_WishartScaleMatrix = " << m_WishartScaleMatrix << ", m_GaussianMean = " << m_GaussianMean + << ", m_GaussianPrecision = " << m_GaussianPrecision << ", numberSamples = " << this->numberSamples()); + } + + //! Compute the univariate prior marginalizing over the variables + //! \p marginalize and conditioning on the variables \p condition. + //! + //! \param[in] marginalize The variables to marginalize out. + //! \param[in] condition The variables to condition on. + //! \warning The caller owns the result. + //! \note The variables are passed by the index of their dimension + //! which must therefore be in range. + //! \note The caller must specify dimension - 1 variables between + //! \p marginalize and \p condition so the resulting distribution + //! is univariate. + virtual TUnivariatePriorPtrDoublePr univariate(const TSize10Vec& marginalize, const TSizeDoublePr10Vec& condition) const { + if (!this->check(marginalize, condition)) { return TUnivariatePriorPtrDoublePr(); } - //! Compute the bivariate prior marginalizing over the variables - //! \p marginalize and conditioning on the variables \p condition. - //! - //! \param[in] marginalize The variables to marginalize out. - //! \param[in] condition The variables to condition on. - //! \warning The caller owns the result. - //! \note The variables are passed by the index of their dimension - //! which must therefore be in range. - //! \note It is assumed that the variables are in sorted order. - //! \note The caller must specify dimension - 2 variables between - //! \p marginalize and \p condition so the resulting distribution - //! is univariate. - virtual TPriorPtrDoublePr bivariate(const TSize10Vec &marginalize, - const TSizeDoublePr10Vec &condition) const - { - if (N == 2) - { - return TPriorPtrDoublePr(boost::shared_ptr(this->clone()), 0.0); - } - - if (!this->check(marginalize, condition)) - { - return TPriorPtrDoublePr(); - } + TSize10Vec i1; + this->remainingVariables(marginalize, condition, i1); + if (i1.size() != 1) { + LOG_ERROR("Invalid variables for computing univariate distribution: " + << "marginalize '" << core::CContainerPrinter::print(marginalize) << "'" + << ", condition '" << core::CContainerPrinter::print(condition) << "'"); + return TUnivariatePriorPtrDoublePr(); + } - TSize10Vec i1; - this->remainingVariables(marginalize, condition, i1); - if (i1.size() != 2) - { - return TPriorPtrDoublePr(); - } + maths_t::EDataType dataType = this->dataType(); + double decayRate = this->decayRate(); + if (this->isNonInformative()) { + return {TUnivariatePriorPtr(CNormalMeanPrecConjugate::nonInformativePrior(dataType, decayRate).clone()), 0.0}; + } - maths_t::EDataType dataType = this->dataType(); - double decayRate = this->decayRate(); - if (this->isNonInformative()) - { - return {TPriorPtr(CMultivariateNormalConjugate<2>::nonInformativePrior(dataType, decayRate).clone()), 0.0}; - } + double p = m_GaussianPrecision(i1[0]); + double s = m_WishartDegreesFreedom / 2.0; + double v = m_WishartDegreesFreedom - static_cast(N) - 1.0; + const TPoint& m = this->mean(); + TMatrix c = m_WishartScaleMatrix / v; - using TPoint2 = CVectorNx1; - using TMatrix2 = CSymmetricMatrixNxN; - - TPoint2 p; - p(0) = m_GaussianPrecision(i1[0]); - p(1) = m_GaussianPrecision(i1[1]); - double f = m_WishartDegreesFreedom; - const TPoint &m = this->mean(); - const TMatrix &c = m_WishartScaleMatrix; - - TPoint2 m1; - TMatrix2 c11; - for (std::size_t i = 0u; i < 2; ++i) - { - m1(i) = m(i1[i]); - for (std::size_t j = 0u; j < 2; ++j) - { - c11(i,j) = c(i1[i],i1[j]); - } - } - if (condition.empty()) - { - return {TPriorPtr(new CMultivariateNormalConjugate<2>(dataType, m1, p, f, c11, decayRate)), 0.0}; - } + double m1 = m(i1[0]); + double c11 = c(i1[0], i1[0]); + if (condition.empty()) { + return {TUnivariatePriorPtr(new CNormalMeanPrecConjugate(dataType, m1, p, s, c11 * v / 2.0, decayRate)), 0.0}; + } - TSize10Vec condition_; - condition_.reserve(condition.size() + 1); - CDenseVector xc(condition.size()); - this->unpack(condition, condition_, xc); - - try - { - std::size_t n = condition_.size(); - CDenseVector m2 = projectedVector(condition_, m); - condition_.push_back(i1[0]); - condition_.push_back(i1[1]); - CDenseMatrix cp = projectedMatrix(condition_, c); - CDenseVector c12 = cp.topRightCorner(n, 1); - Eigen::JacobiSVD> c22(cp.topLeftCorner(n, n), - Eigen::ComputeThinU | Eigen::ComputeThinV); - LOG_TRACE("c22 = " << cp.topLeftCorner(n, n) - << ", c12 = " << c12 - << ", a = " << xc - << ", m2 = " << m2); - - CDenseVector c22SolvexcMinusm2 = c22.solve(xc - m2); - - TPoint2 mean(fromDenseVector(toDynamicDenseVector(m1) + c12.transpose() * c22SolvexcMinusm2)); - TMatrix2 covariance(fromDenseMatrix(toDynamicDenseMatrix(c11) - c12.transpose() * c22.solve(c12))); - double weight; - logDeterminant(covariance, weight, false); - weight -= 0.5 * (xc - m2).transpose() * c22SolvexcMinusm2; - LOG_TRACE("mean = " << mean << ", covariance = " << covariance); - - return {TPriorPtr(new CMultivariateNormalConjugate<2>(dataType, mean, p, f, covariance, decayRate)), weight}; - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to get univariate prior: " << e.what()); - } + TSize10Vec condition_; + condition_.reserve(condition.size() + 1); + CDenseVector xc(condition.size()); + this->unpack(condition, condition_, xc); + + try { + std::size_t n = condition_.size(); + CDenseVector m2 = projectedVector(condition_, m); + condition_.push_back(i1[0]); + CDenseMatrix cp = projectedMatrix(condition_, c); + CDenseVector c12 = cp.topRightCorner(n, 1); + Eigen::JacobiSVD> c22(cp.topLeftCorner(n, n), Eigen::ComputeThinU | Eigen::ComputeThinV); + LOG_TRACE("c22 = " << cp.topLeftCorner(n, n) << ", c12 = " << c12 << ", a = " << xc << ", m2 = " << m2); + + CDenseVector c22SolvexcMinusm2 = c22.solve(xc - m2); + + double mean = m1 + c12.transpose() * c22SolvexcMinusm2; + double variance = std::max(c11 - c12.transpose() * c22.solve(c12), MINIMUM_COEFFICIENT_OF_VARIATION * std::fabs(mean)); + double weight = 0.5 * (std::log(variance) - (xc - m2).transpose() * c22SolvexcMinusm2); + LOG_TRACE("mean = " << mean << ", variance = " << variance << ", weight = " << weight); + + return {TUnivariatePriorPtr(new CNormalMeanPrecConjugate(dataType, mean, p, s, variance * v / 2.0, decayRate)), weight}; + } catch (const std::exception& e) { LOG_ERROR("Failed to get univariate prior: " << e.what()); } + + return TUnivariatePriorPtrDoublePr(); + } + + //! Compute the bivariate prior marginalizing over the variables + //! \p marginalize and conditioning on the variables \p condition. + //! + //! \param[in] marginalize The variables to marginalize out. + //! \param[in] condition The variables to condition on. + //! \warning The caller owns the result. + //! \note The variables are passed by the index of their dimension + //! which must therefore be in range. + //! \note It is assumed that the variables are in sorted order. + //! \note The caller must specify dimension - 2 variables between + //! \p marginalize and \p condition so the resulting distribution + //! is univariate. + virtual TPriorPtrDoublePr bivariate(const TSize10Vec& marginalize, const TSizeDoublePr10Vec& condition) const { + if (N == 2) { + return TPriorPtrDoublePr(boost::shared_ptr(this->clone()), 0.0); + } + if (!this->check(marginalize, condition)) { return TPriorPtrDoublePr(); } - //! Get the support for the marginal likelihood function. - virtual TDouble10VecDouble10VecPr marginalLikelihoodSupport() const - { - return {TPoint::smallest().template toVector(), - TPoint::largest().template toVector()}; + TSize10Vec i1; + this->remainingVariables(marginalize, condition, i1); + if (i1.size() != 2) { + return TPriorPtrDoublePr(); } - //! Get the mean of the marginal likelihood function. - virtual TDouble10Vec marginalLikelihoodMean() const - { - return this->mean().template toVector(); + maths_t::EDataType dataType = this->dataType(); + double decayRate = this->decayRate(); + if (this->isNonInformative()) { + return {TPriorPtr(CMultivariateNormalConjugate<2>::nonInformativePrior(dataType, decayRate).clone()), 0.0}; } - //! Get the mode of the marginal likelihood function. - virtual TDouble10Vec marginalLikelihoodMode(const TWeightStyleVec &/*weightStyles*/, - const TDouble10Vec4Vec &/*weights*/) const - { - return this->marginalLikelihoodMean(); + using TPoint2 = CVectorNx1; + using TMatrix2 = CSymmetricMatrixNxN; + + TPoint2 p; + p(0) = m_GaussianPrecision(i1[0]); + p(1) = m_GaussianPrecision(i1[1]); + double f = m_WishartDegreesFreedom; + const TPoint& m = this->mean(); + const TMatrix& c = m_WishartScaleMatrix; + + TPoint2 m1; + TMatrix2 c11; + for (std::size_t i = 0u; i < 2; ++i) { + m1(i) = m(i1[i]); + for (std::size_t j = 0u; j < 2; ++j) { + c11(i, j) = c(i1[i], i1[j]); + } + } + if (condition.empty()) { + return {TPriorPtr(new CMultivariateNormalConjugate<2>(dataType, m1, p, f, c11, decayRate)), 0.0}; } - //! Get the covariance matrix for the marginal likelihood. - virtual TDouble10Vec10Vec marginalLikelihoodCovariance() const - { - return this->covarianceMatrix().template toVectors(); + TSize10Vec condition_; + condition_.reserve(condition.size() + 1); + CDenseVector xc(condition.size()); + this->unpack(condition, condition_, xc); + + try { + std::size_t n = condition_.size(); + CDenseVector m2 = projectedVector(condition_, m); + condition_.push_back(i1[0]); + condition_.push_back(i1[1]); + CDenseMatrix cp = projectedMatrix(condition_, c); + CDenseVector c12 = cp.topRightCorner(n, 1); + Eigen::JacobiSVD> c22(cp.topLeftCorner(n, n), Eigen::ComputeThinU | Eigen::ComputeThinV); + LOG_TRACE("c22 = " << cp.topLeftCorner(n, n) << ", c12 = " << c12 << ", a = " << xc << ", m2 = " << m2); + + CDenseVector c22SolvexcMinusm2 = c22.solve(xc - m2); + + TPoint2 mean(fromDenseVector(toDynamicDenseVector(m1) + c12.transpose() * c22SolvexcMinusm2)); + TMatrix2 covariance(fromDenseMatrix(toDynamicDenseMatrix(c11) - c12.transpose() * c22.solve(c12))); + double weight; + logDeterminant(covariance, weight, false); + weight -= 0.5 * (xc - m2).transpose() * c22SolvexcMinusm2; + LOG_TRACE("mean = " << mean << ", covariance = " << covariance); + + return {TPriorPtr(new CMultivariateNormalConjugate<2>(dataType, mean, p, f, covariance, decayRate)), weight}; + } catch (const std::exception& e) { LOG_ERROR("Failed to get univariate prior: " << e.what()); } + + return TPriorPtrDoublePr(); + } + + //! Get the support for the marginal likelihood function. + virtual TDouble10VecDouble10VecPr marginalLikelihoodSupport() const { + return {TPoint::smallest().template toVector(), TPoint::largest().template toVector()}; + } + + //! Get the mean of the marginal likelihood function. + virtual TDouble10Vec marginalLikelihoodMean() const { return this->mean().template toVector(); } + + //! Get the mode of the marginal likelihood function. + virtual TDouble10Vec marginalLikelihoodMode(const TWeightStyleVec& /*weightStyles*/, const TDouble10Vec4Vec& /*weights*/) const { + return this->marginalLikelihoodMean(); + } + + //! Get the covariance matrix for the marginal likelihood. + virtual TDouble10Vec10Vec marginalLikelihoodCovariance() const { + return this->covarianceMatrix().template toVectors(); + } + + //! Get the diagonal of the covariance matrix for the marginal likelihood. + virtual TDouble10Vec marginalLikelihoodVariances() const { return this->covarianceMatrix().template diagonal(); } + + //! Calculate the log marginal likelihood function, integrating over the + //! prior density function. + //! + //! \param[in] weightStyles Controls the interpretation of the weight(s) + //! that are associated with each sample. See maths_t::ESampleWeightStyle + //! for more details. + //! \param[in] samples A collection of samples of the process. + //! \param[in] weights The weights of each sample in \p samples. + //! \param[out] result Filled in with the joint likelihood of \p samples. + virtual maths_t::EFloatingPointErrorStatus jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble10Vec1Vec& samples, + const TDouble10Vec4Vec1Vec& weights, + double& result) const { + result = 0.0; + + if (samples.empty()) { + LOG_ERROR("Can't compute likelihood for empty sample set"); + return maths_t::E_FpFailed; + } + if (!this->check(samples, weights)) { + return maths_t::E_FpFailed; } - //! Get the diagonal of the covariance matrix for the marginal likelihood. - virtual TDouble10Vec marginalLikelihoodVariances() const - { - return this->covarianceMatrix().template diagonal(); + result = boost::numeric::bounds::lowest(); + + if (this->isNonInformative()) { + // The non-informative likelihood is improper and effectively + // zero everywhere. We use minus max double because + // log(0) = HUGE_VALUE, which causes problems for Windows. + // Calling code is notified when the calculation overflows + // and should avoid taking the exponential since this will + // underflow and pollute the floating point environment. This + // may cause issues for some library function implementations + // (see fe*exceptflag for more details). + return maths_t::E_FpOverflowed; } - //! Calculate the log marginal likelihood function, integrating over the - //! prior density function. - //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. - //! \param[in] samples A collection of samples of the process. - //! \param[in] weights The weights of each sample in \p samples. - //! \param[out] result Filled in with the joint likelihood of \p samples. - virtual maths_t::EFloatingPointErrorStatus - jointLogMarginalLikelihood(const TWeightStyleVec &weightStyles, - const TDouble10Vec1Vec &samples, - const TDouble10Vec4Vec1Vec &weights, - double &result) const - { - result = 0.0; - - if (samples.empty()) - { - LOG_ERROR("Can't compute likelihood for empty sample set"); - return maths_t::E_FpFailed; - } - if (!this->check(samples, weights)) - { - return maths_t::E_FpFailed; - } + // We evaluate the integral over the latent variable by Monte-Carlo + // approximation. - result = boost::numeric::bounds::lowest(); - - if (this->isNonInformative()) - { - // The non-informative likelihood is improper and effectively - // zero everywhere. We use minus max double because - // log(0) = HUGE_VALUE, which causes problems for Windows. - // Calling code is notified when the calculation overflows - // and should avoid taking the exponential since this will - // underflow and pollute the floating point environment. This - // may cause issues for some library function implementations - // (see fe*exceptflag for more details). - return maths_t::E_FpOverflowed; - } + maths_t::EFloatingPointErrorStatus status; - // We evaluate the integral over the latent variable by Monte-Carlo - // approximation. + if (this->isInteger()) { + double logLikelihood; + status = this->jointLogMarginalLikelihood(weightStyles, samples, TPoint(0.5), weights, logLikelihood); + if (status != maths_t::E_FpNoErrors) { + return status; + } - maths_t::EFloatingPointErrorStatus status; + double sum = 0.0; + double n = 0.0; + double maxLogLikelihood = logLikelihood; - if (this->isInteger()) - { - double logLikelihood; - status = this->jointLogMarginalLikelihood(weightStyles, samples, TPoint(0.5), weights, logLikelihood); - if (status != maths_t::E_FpNoErrors) - { - return status; + TDoubleVec z; + CSampling::uniformSample(0.0, 1.0, 3 * N, z); + for (std::size_t i = 0u; i < z.size(); i += N) { + status = this->jointLogMarginalLikelihood(weightStyles, samples, TPoint(&z[i], &z[i + N]), weights, logLikelihood); + if (status & maths_t::E_FpFailed) { + return maths_t::E_FpFailed; } - - double sum = 0.0; - double n = 0.0; - double maxLogLikelihood = logLikelihood; - - TDoubleVec z; - CSampling::uniformSample(0.0, 1.0, 3 * N, z); - for (std::size_t i = 0u; i < z.size(); i += N) - { - status = this->jointLogMarginalLikelihood(weightStyles, - samples, - TPoint(&z[i], &z[i + N]), - weights, - logLikelihood); - if (status & maths_t::E_FpFailed) - { - return maths_t::E_FpFailed; - } - if (status & maths_t::E_FpOverflowed) - { - continue; - } - - if (logLikelihood > maxLogLikelihood) - { - sum *= std::exp(maxLogLikelihood - logLikelihood); - maxLogLikelihood = logLikelihood; - } - sum += std::exp(logLikelihood - maxLogLikelihood); - n += 1.0; + if (status & maths_t::E_FpOverflowed) { + continue; } - result = maxLogLikelihood + std::log(sum / n); - } - else - { - status = this->jointLogMarginalLikelihood(weightStyles, samples, TPoint(0.0), weights, result); + if (logLikelihood > maxLogLikelihood) { + sum *= std::exp(maxLogLikelihood - logLikelihood); + maxLogLikelihood = logLikelihood; + } + sum += std::exp(logLikelihood - maxLogLikelihood); + n += 1.0; } - if (status & maths_t::E_FpFailed) - { - LOG_ERROR("Failed to compute log likelihood (" << this->debug() << ")"); - LOG_ERROR("samples = " << core::CContainerPrinter::print(samples)); - LOG_ERROR("weights = " << core::CContainerPrinter::print(weights)); - } - else if (status & maths_t::E_FpOverflowed) - { - LOG_TRACE("Log likelihood overflowed for (" << this->debug() << ")"); - LOG_TRACE("samples = " << core::CContainerPrinter::print(samples)); - LOG_TRACE("weights = " << core::CContainerPrinter::print(weights)); - } - return status; + result = maxLogLikelihood + std::log(sum / n); + } else { + status = this->jointLogMarginalLikelihood(weightStyles, samples, TPoint(0.0), weights, result); } - //! Sample the marginal likelihood function. - //! - //! The marginal likelihood functions are sampled in quantile intervals - //! of the generalized cumulative density function, specifically intervals - //! between contours of constant probability density. - //! - //! The idea is to capture a set of samples that accurately and efficiently - //! represent the information in the prior. Random sampling (although it - //! has nice asymptotic properties) doesn't fulfill the second requirement: - //! typically requiring many more samples than sampling in quantile intervals - //! to capture the same amount of information. - //! - //! This is to allow us to transform one prior distribution into another - //! completely generically and relatively efficiently, by updating the target - //! prior with these samples. As such the prior needs to maintain a count of - //! the number of samples to date so that it isn't over sampled. - //! - //! \param[in] numberSamples The number of samples required. - //! \param[out] samples Filled in with samples from the prior. - //! \note \p numberSamples is truncated to the number of samples received. - virtual void sampleMarginalLikelihood(std::size_t numberSamples, - TDouble10Vec1Vec &samples) const - { - samples.clear(); - - if (numberSamples == 0 || this->numberSamples() == 0.0) - { - return; - } - - if (this->isNonInformative()) - { - // We can't sample the marginal likelihood directly. This should - // only happen if we've had one sample so just return that sample. - samples.push_back(m_GaussianMean.template toVector()); - return; - } - - // We sample the moment matched Gaussian (to the marginal likelihood). - // Clearly, E_{m*,p*}[ X ] = m. To calculate the covariance matrix we - // note that: - // E_{m*,p*}[ (X - m)'(X - m) ] - // = E_{m*,p*}[ (X - m*)'(X - m*) - (m* - m)'(m* - m) ] - // = E_{p*}[ (1 + 1/t) (p* ^ -1) ] - // = (1 + 1/t) V ^ (-1) / (v - d - 1) - // - // In the last line we have used the fact that if X ~ W_d(V, n) and - // Y = X^(-1), then Y ~ W_d^(-1)(V^(-1), n), i.e. the inverse Wishart - // with the same degrees of freedom, but inverse scale matrix. - // - // See sampleGaussian for details on the sampling strategy. - - double d = static_cast(N); - double v = m_WishartDegreesFreedom - d - 1.0; - TPoint mean(m_GaussianMean); - TMatrix covariance(m_WishartScaleMatrix); - for (std::size_t i = 0u; i < N; ++i) - { - if (m_GaussianPrecision(i) > 0.0 && v > 0.0) - { - scaleCovariances(i, (1.0 - 1.0 / m_GaussianPrecision(i)) / v, covariance); - } - } - TPointVec samples_; - sampleGaussian(numberSamples, mean, covariance, samples_); - samples.reserve(samples_.size()); - for (const auto &sample : samples_) - { - samples.push_back(sample.template toVector()); - } + if (status & maths_t::E_FpFailed) { + LOG_ERROR("Failed to compute log likelihood (" << this->debug() << ")"); + LOG_ERROR("samples = " << core::CContainerPrinter::print(samples)); + LOG_ERROR("weights = " << core::CContainerPrinter::print(weights)); + } else if (status & maths_t::E_FpOverflowed) { + LOG_TRACE("Log likelihood overflowed for (" << this->debug() << ")"); + LOG_TRACE("samples = " << core::CContainerPrinter::print(samples)); + LOG_TRACE("weights = " << core::CContainerPrinter::print(weights)); + } + return status; + } + + //! Sample the marginal likelihood function. + //! + //! The marginal likelihood functions are sampled in quantile intervals + //! of the generalized cumulative density function, specifically intervals + //! between contours of constant probability density. + //! + //! The idea is to capture a set of samples that accurately and efficiently + //! represent the information in the prior. Random sampling (although it + //! has nice asymptotic properties) doesn't fulfill the second requirement: + //! typically requiring many more samples than sampling in quantile intervals + //! to capture the same amount of information. + //! + //! This is to allow us to transform one prior distribution into another + //! completely generically and relatively efficiently, by updating the target + //! prior with these samples. As such the prior needs to maintain a count of + //! the number of samples to date so that it isn't over sampled. + //! + //! \param[in] numberSamples The number of samples required. + //! \param[out] samples Filled in with samples from the prior. + //! \note \p numberSamples is truncated to the number of samples received. + virtual void sampleMarginalLikelihood(std::size_t numberSamples, TDouble10Vec1Vec& samples) const { + samples.clear(); + + if (numberSamples == 0 || this->numberSamples() == 0.0) { + return; } - //! Check if this is a non-informative prior. - virtual bool isNonInformative() const - { - return m_WishartDegreesFreedom <= static_cast(N + 1); + if (this->isNonInformative()) { + // We can't sample the marginal likelihood directly. This should + // only happen if we've had one sample so just return that sample. + samples.push_back(m_GaussianMean.template toVector()); + return; } - //! Get a human readable description of the prior. - //! - // \param[in] separator String used to separate priors. - // \param[in,out] result Filled in with the description. - virtual void print(const std::string &separator, std::string &result) const - { - result += "\n" + separator + " multivariate normal"; - if (this->isNonInformative()) - { - result += " non-informative"; - } - else - { - std::ostringstream mean; - mean << this->mean(); - std::ostringstream covariance; - covariance << this->covarianceMatrix(); - result += ":\n" + mean.str() + covariance.str(); - result += "\n" + separator; + // We sample the moment matched Gaussian (to the marginal likelihood). + // Clearly, E_{m*,p*}[ X ] = m. To calculate the covariance matrix we + // note that: + // E_{m*,p*}[ (X - m)'(X - m) ] + // = E_{m*,p*}[ (X - m*)'(X - m*) - (m* - m)'(m* - m) ] + // = E_{p*}[ (1 + 1/t) (p* ^ -1) ] + // = (1 + 1/t) V ^ (-1) / (v - d - 1) + // + // In the last line we have used the fact that if X ~ W_d(V, n) and + // Y = X^(-1), then Y ~ W_d^(-1)(V^(-1), n), i.e. the inverse Wishart + // with the same degrees of freedom, but inverse scale matrix. + // + // See sampleGaussian for details on the sampling strategy. + + double d = static_cast(N); + double v = m_WishartDegreesFreedom - d - 1.0; + TPoint mean(m_GaussianMean); + TMatrix covariance(m_WishartScaleMatrix); + for (std::size_t i = 0u; i < N; ++i) { + if (m_GaussianPrecision(i) > 0.0 && v > 0.0) { + scaleCovariances(i, (1.0 - 1.0 / m_GaussianPrecision(i)) / v, covariance); } } - - //! Get a checksum for this object. - virtual uint64_t checksum(uint64_t seed = 0) const - { - seed = this->CMultivariatePrior::checksum(seed); - seed = CChecksum::calculate(seed, m_GaussianMean); - seed = CChecksum::calculate(seed, m_GaussianPrecision); - seed = CChecksum::calculate(seed, m_WishartDegreesFreedom); - return CChecksum::calculate(seed, m_WishartScaleMatrix); + TPointVec samples_; + sampleGaussian(numberSamples, mean, covariance, samples_); + samples.reserve(samples_.size()); + for (const auto& sample : samples_) { + samples.push_back(sample.template toVector()); } - - //! Get the memory used by this component - virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const - { - mem->setName("CMultivariateNormalConjugate"); + } + + //! Check if this is a non-informative prior. + virtual bool isNonInformative() const { return m_WishartDegreesFreedom <= static_cast(N + 1); } + + //! Get a human readable description of the prior. + //! + // \param[in] separator String used to separate priors. + // \param[in,out] result Filled in with the description. + virtual void print(const std::string& separator, std::string& result) const { + result += "\n" + separator + " multivariate normal"; + if (this->isNonInformative()) { + result += " non-informative"; + } else { + std::ostringstream mean; + mean << this->mean(); + std::ostringstream covariance; + covariance << this->covarianceMatrix(); + result += ":\n" + mean.str() + covariance.str(); + result += "\n" + separator; } - - //! Get the memory used by this component - virtual std::size_t memoryUsage() const - { - return 0; + } + + //! Get a checksum for this object. + virtual uint64_t checksum(uint64_t seed = 0) const { + seed = this->CMultivariatePrior::checksum(seed); + seed = CChecksum::calculate(seed, m_GaussianMean); + seed = CChecksum::calculate(seed, m_GaussianPrecision); + seed = CChecksum::calculate(seed, m_WishartDegreesFreedom); + return CChecksum::calculate(seed, m_WishartScaleMatrix); + } + + //! Get the memory used by this component + virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CMultivariateNormalConjugate"); } + + //! Get the memory used by this component + virtual std::size_t memoryUsage() const { return 0; } + + //! Get the static size of this object - used for virtual hierarchies + virtual std::size_t staticSize() const { return sizeof(*this); } + + //! Get the tag name for this prior. + virtual std::string persistenceTag() const { return NORMAL_TAG + core::CStringUtils::typeToString(N); } + + //! Read parameters from \p traverser. + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); + RESTORE_SETUP_TEARDOWN(DECAY_RATE_TAG, + double decayRate, + core::CStringUtils::stringToType(traverser.value(), decayRate), + this->decayRate(decayRate)) + RESTORE_SETUP_TEARDOWN(NUMBER_SAMPLES_TAG, + double numberSamples, + core::CStringUtils::stringToType(traverser.value(), numberSamples), + this->numberSamples(numberSamples)) + RESTORE(GAUSSIAN_MEAN_TAG, m_GaussianMean.fromDelimited(traverser.value())) + RESTORE(GAUSSIAN_PRECISION_TAG, m_GaussianPrecision.fromDelimited(traverser.value())) + RESTORE(WISHART_DEGREES_FREEDOM_TAG, core::CStringUtils::stringToType(traverser.value(), m_WishartDegreesFreedom)) + RESTORE(WISHART_SCALE_MATRIX_TAG, m_WishartScaleMatrix.fromDelimited(traverser.value())) + } while (traverser.next()); + + return true; + } + + //! Persist state by passing information to the supplied inserter + virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const { + inserter.insertValue(DECAY_RATE_TAG, this->decayRate(), core::CIEEE754::E_SinglePrecision); + inserter.insertValue(NUMBER_SAMPLES_TAG, this->numberSamples(), core::CIEEE754::E_SinglePrecision); + inserter.insertValue(GAUSSIAN_MEAN_TAG, m_GaussianMean.toDelimited()); + inserter.insertValue(GAUSSIAN_PRECISION_TAG, m_GaussianPrecision.toDelimited()); + inserter.insertValue(WISHART_DEGREES_FREEDOM_TAG, m_WishartDegreesFreedom, core::CIEEE754::E_DoublePrecision); + inserter.insertValue(WISHART_SCALE_MATRIX_TAG, m_WishartScaleMatrix.toDelimited()); + } + //@} + + //! \name Sampling + //@{ + //! Randomly sample the covariance matrix prior. + void randomSamplePrecisionMatrixPrior(std::size_t n, TMatrixVec& result) { + // The prior on the precision matrix is Wishart with matrix V equal + // to the inverse of the scale matrix and degrees freedom equal to + // degrees freedom minus the data dimension. To sample from the Wishart + // we use the Bartlett transformation. In particular, if X ~ W(V, n), + // V = LL^t (i.e. the Cholesky factorization), then X = L A A^t L^t + // with + // | c1 0 . . 0 | + // | n21 c2 . | + // A = | . . . | + // | . . 0 | + // | nd1 . . . cd | + // + // Here, ci^2 ~ chi2(n - i + 1) and nij ~ N(0, 1). + + result.clear(); + + if (this->isNonInformative()) { + return; } - //! Get the static size of this object - used for virtual hierarchies - virtual std::size_t staticSize() const - { - return sizeof(*this); - } + double d = static_cast(N); + double f = m_WishartDegreesFreedom - d - 1.0; + LOG_TRACE("f = " << f); - //! Get the tag name for this prior. - virtual std::string persistenceTag() const - { - return NORMAL_TAG + core::CStringUtils::typeToString(N); - } + Eigen::JacobiSVD precision(toDenseMatrix(m_WishartScaleMatrix), Eigen::ComputeFullU | Eigen::ComputeFullV); - //! Read parameters from \p traverser. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) - { - do - { - const std::string &name = traverser.name(); - RESTORE_SETUP_TEARDOWN(DECAY_RATE_TAG, - double decayRate, - core::CStringUtils::stringToType(traverser.value(), decayRate), - this->decayRate(decayRate)) - RESTORE_SETUP_TEARDOWN(NUMBER_SAMPLES_TAG, - double numberSamples, - core::CStringUtils::stringToType(traverser.value(), numberSamples), - this->numberSamples(numberSamples)) - RESTORE(GAUSSIAN_MEAN_TAG, m_GaussianMean.fromDelimited(traverser.value())) - RESTORE(GAUSSIAN_PRECISION_TAG, m_GaussianPrecision.fromDelimited(traverser.value())) - RESTORE(WISHART_DEGREES_FREEDOM_TAG, - core::CStringUtils::stringToType(traverser.value(), m_WishartDegreesFreedom)) - RESTORE(WISHART_SCALE_MATRIX_TAG, m_WishartScaleMatrix.fromDelimited(traverser.value())) - } - while (traverser.next()); + // Note we can extract the (non-zero vectors of the Cholesky + // factorization by noting that U = V^t and multiplying each + // column of U by the square root of the corresponding singular + // value). - return true; - } + std::size_t rank = static_cast(precision.rank()); - //! Persist state by passing information to the supplied inserter - virtual void acceptPersistInserter(core::CStatePersistInserter &inserter) const - { - inserter.insertValue(DECAY_RATE_TAG, this->decayRate(), core::CIEEE754::E_SinglePrecision); - inserter.insertValue(NUMBER_SAMPLES_TAG, - this->numberSamples(), - core::CIEEE754::E_SinglePrecision); - inserter.insertValue(GAUSSIAN_MEAN_TAG, m_GaussianMean.toDelimited()); - inserter.insertValue(GAUSSIAN_PRECISION_TAG, m_GaussianPrecision.toDelimited()); - inserter.insertValue(WISHART_DEGREES_FREEDOM_TAG, - m_WishartDegreesFreedom, - core::CIEEE754::E_DoublePrecision); - inserter.insertValue(WISHART_SCALE_MATRIX_TAG, m_WishartScaleMatrix.toDelimited()); + TDenseVector diag = precision.singularValues(); + for (std::size_t i = 0u; i < rank; ++i) { + diag(i) = 1.0 / std::sqrt(diag(i)); + } + for (std::size_t i = rank; i < N; ++i) { + diag(i) = 0.0; + } + TDenseMatrix L = TDenseMatrix::Zero(N, N); + L.leftCols(rank) = precision.matrixU().leftCols(rank); + L = L * diag.asDiagonal(); + LOG_TRACE("L = " << L); + + TDoubleVec chi; + TDoubleVec chii; + for (std::size_t i = 0u; i < rank; ++i) { + chii.clear(); + CSampling::chiSquaredSample(f - static_cast(i), n, chii); + chi.insert(chi.end(), chii.begin(), chii.end()); } - //@} - - //! \name Sampling - //@{ - //! Randomly sample the covariance matrix prior. - void randomSamplePrecisionMatrixPrior(std::size_t n, TMatrixVec &result) - { - // The prior on the precision matrix is Wishart with matrix V equal - // to the inverse of the scale matrix and degrees freedom equal to - // degrees freedom minus the data dimension. To sample from the Wishart - // we use the Bartlett transformation. In particular, if X ~ W(V, n), - // V = LL^t (i.e. the Cholesky factorization), then X = L A A^t L^t - // with - // | c1 0 . . 0 | - // | n21 c2 . | - // A = | . . . | - // | . . 0 | - // | nd1 . . . cd | - // - // Here, ci^2 ~ chi2(n - i + 1) and nij ~ N(0, 1). - - result.clear(); - - if (this->isNonInformative()) - { - return; + TDoubleVec normal; + CSampling::normalSample(0.0, 1.0, n * rank * (rank - 1) / 2, normal); + + TDenseMatrix A(N, N); + for (std::size_t s = 0u; s < n; ++s) { + A.setZero(); + for (std::size_t i = 0, k = 0u; i < rank; ++i) { + A(i, i) = std::sqrt(chi[i * n + s]); + for (std::size_t j = 0u; j < i; ++j, ++k) { + A(i, j) = normal[(s * rank * (rank - 1)) / 2 + k]; + } } + result.emplace_back(fromDenseMatrix(L * A * A.transpose() * L.transpose())); + } + } - double d = static_cast(N); - double f = m_WishartDegreesFreedom - d - 1.0; - LOG_TRACE("f = " << f); - - Eigen::JacobiSVD precision(toDenseMatrix(m_WishartScaleMatrix), - Eigen::ComputeFullU | Eigen::ComputeFullV); - - // Note we can extract the (non-zero vectors of the Cholesky - // factorization by noting that U = V^t and multiplying each - // column of U by the square root of the corresponding singular - // value). + //! Randomly sample from the marginal over the mean. + void randomSampleMeanPrior(std::size_t n, TPointVec& result) { + result.clear(); - std::size_t rank = static_cast(precision.rank()); + if (this->isNonInformative()) { + return; + } - TDenseVector diag = precision.singularValues(); - for (std::size_t i = 0u; i < rank; ++i) - { - diag(i) = 1.0 / std::sqrt(diag(i)); - } - for (std::size_t i = rank; i < N; ++i) - { - diag(i) = 0.0; - } - TDenseMatrix L = TDenseMatrix::Zero(N, N); - L.leftCols(rank) = precision.matrixU().leftCols(rank); - L = L * diag.asDiagonal(); - LOG_TRACE("L = " << L); - - TDoubleVec chi; - TDoubleVec chii; - for (std::size_t i = 0u; i < rank; ++i) - { - chii.clear(); - CSampling::chiSquaredSample(f - static_cast(i), n, chii); - chi.insert(chi.end(), chii.begin(), chii.end()); - } - TDoubleVec normal; - CSampling::normalSample(0.0, 1.0, n * rank * (rank - 1) / 2, normal); - - TDenseMatrix A(N,N); - for (std::size_t s = 0u; s < n; ++s) - { - A.setZero(); - for (std::size_t i = 0, k = 0u; i < rank; ++i) - { - A(i,i) = std::sqrt(chi[i * n + s]); - for (std::size_t j = 0u; j < i; ++j, ++k) - { - A(i,j) = normal[(s * rank * (rank - 1)) / 2 + k]; - } - } - result.emplace_back(fromDenseMatrix(L * A * A.transpose() * L.transpose())); + // The marginal distribution of the prior is multivariate t with + // mean equal to the Gaussian prior mean and covariance matrix + // equal to 1 / (f - d + 1) * S where S and f are the scale matrix + // and degrees freedom of the Wishart prior and d is the data + // dimension. We use the fact that a multivariate t variable + // T(m, V, v) = m + (W)^(1/2) N(0, V), where m is its mean and + // W ~ v / chi^2(v). + + double d = static_cast(N); + double f = m_WishartDegreesFreedom - d + 1.0; + TPoint mean(m_GaussianMean); + TMatrix covariance = m_WishartScaleMatrix; + for (std::size_t i = 0u; i < N; ++i) { + if (m_GaussianPrecision(i) > 0.0 && f > 0.0) { + scaleCovariances(i, 1.0 / (m_GaussianPrecision(i) * f), covariance); } } + LOG_TRACE("mean = " << mean); + LOG_TRACE("covariance = " << covariance); + LOG_TRACE("fmin = " << f); - //! Randomly sample from the marginal over the mean. - void randomSampleMeanPrior(std::size_t n, TPointVec &result) - { - result.clear(); - - if (this->isNonInformative()) - { - return; - } + TDoubleVec chi; + CSampling::chiSquaredSample(f, n, chi); + TPoint zero(0.0); + CSampling::multivariateNormalSample(zero, covariance, n, result); - // The marginal distribution of the prior is multivariate t with - // mean equal to the Gaussian prior mean and covariance matrix - // equal to 1 / (f - d + 1) * S where S and f are the scale matrix - // and degrees freedom of the Wishart prior and d is the data - // dimension. We use the fact that a multivariate t variable - // T(m, V, v) = m + (W)^(1/2) N(0, V), where m is its mean and - // W ~ v / chi^2(v). - - double d = static_cast(N); - double f = m_WishartDegreesFreedom - d + 1.0; - TPoint mean(m_GaussianMean); - TMatrix covariance = m_WishartScaleMatrix; - for (std::size_t i = 0u; i < N; ++i) - { - if (m_GaussianPrecision(i) > 0.0 && f > 0.0) - { - scaleCovariances(i, 1.0 / (m_GaussianPrecision(i) * f), covariance); - } - } - LOG_TRACE("mean = " << mean); - LOG_TRACE("covariance = " << covariance); - LOG_TRACE("fmin = " << f); - - TDoubleVec chi; - CSampling::chiSquaredSample(f, n, chi); - TPoint zero(0.0); - CSampling::multivariateNormalSample(zero, covariance, n, result); - - for (std::size_t i = 0u; i < n; ++i) - { - result[i] = mean + (f / chi[i]) * result[i]; - } + for (std::size_t i = 0u; i < n; ++i) { + result[i] = mean + (f / chi[i]) * result[i]; } + } - //! Randomly sample from the predictive distribution. - void randomSamplePredictive(std::size_t n, TPointVec &result) - { - result.clear(); + //! Randomly sample from the predictive distribution. + void randomSamplePredictive(std::size_t n, TPointVec& result) { + result.clear(); - if (this->isNonInformative()) - { - return; - } + if (this->isNonInformative()) { + return; + } - // The predictive distribution is multivariate t with mean, m, - // equal to the Gaussian prior mean, scale matrix, V, equal to - // the Wishart prior scale matrix, and degrees freedom, f, equal - // to its degrees of freedom. See randomSampleMeanPrior for how - // to sample from multivariate t. + // The predictive distribution is multivariate t with mean, m, + // equal to the Gaussian prior mean, scale matrix, V, equal to + // the Wishart prior scale matrix, and degrees freedom, f, equal + // to its degrees of freedom. See randomSampleMeanPrior for how + // to sample from multivariate t. - double d = static_cast(N); - double f = m_WishartDegreesFreedom - d; - TPoint mean(m_GaussianMean); + double d = static_cast(N); + double f = m_WishartDegreesFreedom - d; + TPoint mean(m_GaussianMean); - TDoubleVec chi; - CSampling::chiSquaredSample(f, n, chi); - TPoint zero(0.0); - CSampling::multivariateNormalSample(zero, m_WishartScaleMatrix, n, result); + TDoubleVec chi; + CSampling::chiSquaredSample(f, n, chi); + TPoint zero(0.0); + CSampling::multivariateNormalSample(zero, m_WishartScaleMatrix, n, result); - for (std::size_t i = 0u; i < n; ++i) - { - result[i] = mean + (f / chi[i]) * result[i]; + for (std::size_t i = 0u; i < n; ++i) { + result[i] = mean + (f / chi[i]) * result[i]; + } + } + //@} + + //! Get the expected mean of the marginal likelihood. + TPoint mean() const { return this->isInteger() ? m_GaussianMean - TPoint(0.5) : m_GaussianMean; } + + //! Get the covariance matrix for the marginal likelihood. + TMatrix covarianceMatrix() const { + // This can be found by change of variables from the prior on the + // precision matrix. In particular, if X ~ W_d(V, n) and Y = X^(-1), + // then Y ~ W_d^(-1)(V^(-1), n), i.e. the inverse Wishart with the + // same degrees of freedom, but inverse scale matrix. + + double d = static_cast(N); + double v = m_WishartDegreesFreedom - d - 1.0; + TMatrix covariance(m_WishartScaleMatrix); + for (std::size_t i = 0u; i < N; ++i) { + if (m_GaussianPrecision(i) > 0.0 && v > 0.0) { + scaleCovariances(i, (1.0 - 1.0 / m_GaussianPrecision(i)) / v, covariance); } } - //@} - - //! Get the expected mean of the marginal likelihood. - TPoint mean() const - { - return this->isInteger() ? m_GaussianMean - TPoint(0.5) : m_GaussianMean; + return covariance; + } + + //! \name Test Functions + //@{ + //! Get the expected precision matrix of the marginal likelhood. + TMatrix precision() const { + if (this->isNonInformative()) { + return TMatrix(0.0); } - //! Get the covariance matrix for the marginal likelihood. - TMatrix covarianceMatrix() const - { - // This can be found by change of variables from the prior on the - // precision matrix. In particular, if X ~ W_d(V, n) and Y = X^(-1), - // then Y ~ W_d^(-1)(V^(-1), n), i.e. the inverse Wishart with the - // same degrees of freedom, but inverse scale matrix. - - double d = static_cast(N); - double v = m_WishartDegreesFreedom - d - 1.0; - TMatrix covariance(m_WishartScaleMatrix); - for (std::size_t i = 0u; i < N; ++i) - { - if (m_GaussianPrecision(i) > 0.0 && v > 0.0) - { - scaleCovariances(i, (1.0 - 1.0 / m_GaussianPrecision(i)) / v, covariance); + TMatrix result(m_WishartScaleMatrix / m_WishartDegreesFreedom); + return TMatrix(fromDenseMatrix(toDenseMatrix(result).inverse())); + } + + //! Check if two priors are equal to the specified tolerance. + bool equalTolerance(const CMultivariateNormalConjugate& rhs, unsigned int toleranceType, double epsilon) const { + LOG_DEBUG(m_GaussianMean << " " << rhs.m_GaussianMean); + LOG_DEBUG(m_GaussianPrecision << " " << rhs.m_GaussianPrecision); + LOG_DEBUG(m_WishartDegreesFreedom << " " << rhs.m_WishartDegreesFreedom); + LOG_DEBUG(m_WishartScaleMatrix << " " << rhs.m_WishartScaleMatrix); + + CEqualWithTolerance equalScalar(toleranceType, epsilon); + CEqualWithTolerance equalVector(toleranceType, TPoint(epsilon)); + CEqualWithTolerance equalMatrix(toleranceType, TMatrix(epsilon)); + + return equalVector(m_GaussianMean, rhs.m_GaussianMean) && equalVector(m_GaussianPrecision, rhs.m_GaussianPrecision) && + equalScalar(m_WishartDegreesFreedom, rhs.m_WishartDegreesFreedom) && + equalMatrix(m_WishartScaleMatrix, rhs.m_WishartScaleMatrix); + } + //@} + +private: + //! The mean parameter of a non-informative prior. + static const TPoint NON_INFORMATIVE_MEAN; + + //! The precision parameter of a non-informative prior. + static const double NON_INFORMATIVE_PRECISION; + + //! The degrees freedom of a non-informative prior. + static const double NON_INFORMATIVE_DEGREES_FREEDOM; + + //! The scale matrix of a non-informative prior. + static const TMatrix NON_INFORMATIVE_SCALE; + + //! The minimum degrees freedom for the Wishart distribution for + //! which we'll treat the predictive distribution as Gaussian. + static const double MINIMUM_GAUSSIAN_DEGREES_FREEDOM; + + //! \name State tags for model persistence. + //@{ + static const std::string NUMBER_SAMPLES_TAG; + static const std::string GAUSSIAN_MEAN_TAG; + static const std::string GAUSSIAN_PRECISION_TAG; + static const std::string WISHART_DEGREES_FREEDOM_TAG; + static const std::string WISHART_SCALE_MATRIX_TAG; + static const std::string DECAY_RATE_TAG; + //@} + +private: + //! Unpack the variable values on which to condition. + void unpack(const TSizeDoublePr10Vec& condition, TSize10Vec& condition_, CDenseVector& x) const { + condition_.reserve(condition.size()); + for (std::size_t i = 0u; i < condition.size(); ++i) { + condition_.push_back(condition[i].first); + x(i) = condition[i].second; + } + } + + //! Compute the marginal likelihood for \p samples at the offset + //! \p offset. + maths_t::EFloatingPointErrorStatus jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble10Vec1Vec& samples, + const TPoint& offset, + const TDouble10Vec4Vec1Vec& weights, + double& result) const { + // As usual, one can find the marginal likelihood by noting that + // it is proportional to the ratio of the normalization factors + // of the conjugate distribution before and after update with the + // samples. + + double d = static_cast(N); + + double numberSamples = 0.0; + TCovariance covariancePost; + double logCountVarianceScales = 0.0; + try { + TPoint m(this->marginalLikelihoodMean()); + for (std::size_t i = 0u; i < samples.size(); ++i) { + TPoint x(samples[i]); + TPoint n(maths_t::countForUpdate(N, weightStyles, weights[i])); + TPoint seasonalScale = sqrt(TPoint(maths_t::seasonalVarianceScale(N, weightStyles, weights[i]))); + TPoint countVarianceScale(maths_t::countVarianceScale(N, weightStyles, weights[i])); + x = m + (x + offset - m) / seasonalScale; + numberSamples += this->smallest(n.template toVector()); + covariancePost.add(x, n / countVarianceScale); + for (std::size_t j = 0u; j < N; ++j) { + logCountVarianceScales -= 0.5 * std::log(countVarianceScale(j)); } } - return covariance; + } catch (const std::exception& e) { + LOG_ERROR("Failed to update likelihood: " << e.what()); + return maths_t::E_FpFailed; } - - //! \name Test Functions - //@{ - //! Get the expected precision matrix of the marginal likelhood. - TMatrix precision() const - { - if (this->isNonInformative()) - { - return TMatrix(0.0); - } - - TMatrix result(m_WishartScaleMatrix / m_WishartDegreesFreedom); - return TMatrix(fromDenseMatrix(toDenseMatrix(result).inverse())); + TPoint scaledNumberSamples = covariancePost.s_Count; + TCovariance covariancePrior = + CBasicStatistics::accumulator(m_WishartDegreesFreedom, m_GaussianMean, m_WishartScaleMatrix / m_WishartDegreesFreedom); + covariancePost += covariancePrior; + + double logGaussianPrecisionPrior = 0.0; + double logGaussianPrecisionPost = 0.0; + for (std::size_t i = 0u; i < N; ++i) { + logGaussianPrecisionPrior += std::log(m_GaussianPrecision(i)); + logGaussianPrecisionPost += std::log(m_GaussianPrecision(i) + scaledNumberSamples(i)); } - - //! Check if two priors are equal to the specified tolerance. - bool equalTolerance(const CMultivariateNormalConjugate &rhs, - unsigned int toleranceType, - double epsilon) const - { - LOG_DEBUG(m_GaussianMean << " " << rhs.m_GaussianMean); - LOG_DEBUG(m_GaussianPrecision << " " << rhs.m_GaussianPrecision); - LOG_DEBUG(m_WishartDegreesFreedom << " " << rhs.m_WishartDegreesFreedom); - LOG_DEBUG(m_WishartScaleMatrix << " " << rhs.m_WishartScaleMatrix); - - CEqualWithTolerance equalScalar(toleranceType, epsilon); - CEqualWithTolerance equalVector(toleranceType, TPoint(epsilon)); - CEqualWithTolerance equalMatrix(toleranceType, TMatrix(epsilon)); - - return equalVector(m_GaussianMean, rhs.m_GaussianMean) - && equalVector(m_GaussianPrecision, rhs.m_GaussianPrecision) - && equalScalar(m_WishartDegreesFreedom, rhs.m_WishartDegreesFreedom) - && equalMatrix(m_WishartScaleMatrix, rhs.m_WishartScaleMatrix); + double wishartDegreesFreedomPrior = m_WishartDegreesFreedom; + double wishartDegreesFreedomPost = m_WishartDegreesFreedom + numberSamples; + TMatrix wishartScaleMatrixPost = covariancePost.s_Covariances; + scaleCovariances(covariancePost.s_Count, wishartScaleMatrixPost); + double logDeterminantPrior; + if (logDeterminant(m_WishartScaleMatrix, logDeterminantPrior, false) & maths_t::E_FpFailed) { + LOG_ERROR("Failed to calculate log det " << m_WishartScaleMatrix); + return maths_t::E_FpFailed; } - //@} - - private: - //! The mean parameter of a non-informative prior. - static const TPoint NON_INFORMATIVE_MEAN; - - //! The precision parameter of a non-informative prior. - static const double NON_INFORMATIVE_PRECISION; - - //! The degrees freedom of a non-informative prior. - static const double NON_INFORMATIVE_DEGREES_FREEDOM; - - //! The scale matrix of a non-informative prior. - static const TMatrix NON_INFORMATIVE_SCALE; - - //! The minimum degrees freedom for the Wishart distribution for - //! which we'll treat the predictive distribution as Gaussian. - static const double MINIMUM_GAUSSIAN_DEGREES_FREEDOM; - - //! \name State tags for model persistence. - //@{ - static const std::string NUMBER_SAMPLES_TAG; - static const std::string GAUSSIAN_MEAN_TAG; - static const std::string GAUSSIAN_PRECISION_TAG; - static const std::string WISHART_DEGREES_FREEDOM_TAG; - static const std::string WISHART_SCALE_MATRIX_TAG; - static const std::string DECAY_RATE_TAG; - //@} - - private: - //! Unpack the variable values on which to condition. - void unpack(const TSizeDoublePr10Vec &condition, - TSize10Vec &condition_, - CDenseVector &x) const - { - condition_.reserve(condition.size()); - for (std::size_t i = 0u; i < condition.size(); ++i) - { - condition_.push_back(condition[i].first); - x(i) = condition[i].second; - } + double logDeterminantPost; + if (logDeterminant(wishartScaleMatrixPost, logDeterminantPost) & maths_t::E_FpFailed) { + LOG_ERROR("Failed to calculate log det " << wishartScaleMatrixPost); + return maths_t::E_FpFailed; } - //! Compute the marginal likelihood for \p samples at the offset - //! \p offset. - maths_t::EFloatingPointErrorStatus - jointLogMarginalLikelihood(const TWeightStyleVec &weightStyles, - const TDouble10Vec1Vec &samples, - const TPoint &offset, - const TDouble10Vec4Vec1Vec &weights, - double &result) const - { - // As usual, one can find the marginal likelihood by noting that - // it is proportional to the ratio of the normalization factors - // of the conjugate distribution before and after update with the - // samples. - - double d = static_cast(N); - - double numberSamples = 0.0; - TCovariance covariancePost; - double logCountVarianceScales = 0.0; - try - { - TPoint m(this->marginalLikelihoodMean()); - for (std::size_t i = 0u; i < samples.size(); ++i) - { - TPoint x(samples[i]); - TPoint n(maths_t::countForUpdate(N, weightStyles, weights[i])); - TPoint seasonalScale = sqrt(TPoint(maths_t::seasonalVarianceScale(N, weightStyles, weights[i]))); - TPoint countVarianceScale(maths_t::countVarianceScale(N, weightStyles, weights[i])); - x = m + (x + offset - m) / seasonalScale; - numberSamples += this->smallest(n.template toVector()); - covariancePost.add(x, n / countVarianceScale); - for (std::size_t j = 0u; j < N; ++j) - { - logCountVarianceScales -= 0.5 * std::log(countVarianceScale(j)); - } - } - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to update likelihood: " << e.what()); - return maths_t::E_FpFailed; - } - TPoint scaledNumberSamples = covariancePost.s_Count; - TCovariance covariancePrior = - CBasicStatistics::accumulator(m_WishartDegreesFreedom, - m_GaussianMean, - m_WishartScaleMatrix / m_WishartDegreesFreedom); - covariancePost += covariancePrior; - - double logGaussianPrecisionPrior = 0.0; - double logGaussianPrecisionPost = 0.0; - for (std::size_t i = 0u; i < N; ++i) - { - logGaussianPrecisionPrior += std::log(m_GaussianPrecision(i)); - logGaussianPrecisionPost += std::log(m_GaussianPrecision(i) + scaledNumberSamples(i)); - } - double wishartDegreesFreedomPrior = m_WishartDegreesFreedom; - double wishartDegreesFreedomPost = m_WishartDegreesFreedom + numberSamples; - TMatrix wishartScaleMatrixPost = covariancePost.s_Covariances; - scaleCovariances(covariancePost.s_Count, wishartScaleMatrixPost); - double logDeterminantPrior; - if (logDeterminant(m_WishartScaleMatrix, logDeterminantPrior, false) & maths_t::E_FpFailed) - { - LOG_ERROR("Failed to calculate log det " << m_WishartScaleMatrix); - return maths_t::E_FpFailed; - } - double logDeterminantPost; - if (logDeterminant(wishartScaleMatrixPost, logDeterminantPost) & maths_t::E_FpFailed) - { - LOG_ERROR("Failed to calculate log det " << wishartScaleMatrixPost); - return maths_t::E_FpFailed; - } - - try - { - double logGammaPostMinusPrior = 0.0; - for (std::size_t i = 0u; i < N; ++i) - { - logGammaPostMinusPrior += - boost::math::lgamma(0.5 * (wishartDegreesFreedomPost - static_cast(i))) - - boost::math::lgamma(0.5 * (wishartDegreesFreedomPrior - static_cast(i))); - } - LOG_TRACE("numberSamples = " << numberSamples); - LOG_TRACE("logGaussianPrecisionPrior = " << logGaussianPrecisionPrior - << ", logGaussianPrecisionPost = " << logGaussianPrecisionPost); - LOG_TRACE("wishartDegreesFreedomPrior = " << wishartDegreesFreedomPrior - << ", wishartDegreesFreedomPost = " << wishartDegreesFreedomPost); - LOG_TRACE("wishartScaleMatrixPrior = " << m_WishartScaleMatrix); - LOG_TRACE("wishartScaleMatrixPost = " << wishartScaleMatrixPost); - LOG_TRACE("logDeterminantPrior = " << logDeterminantPrior - << ", logDeterminantPost = " << logDeterminantPost); - LOG_TRACE("logGammaPostMinusPrior = " << logGammaPostMinusPrior); - LOG_TRACE("logCountVarianceScales = " << logCountVarianceScales); - - result = 0.5 * ( wishartDegreesFreedomPrior * logDeterminantPrior - - wishartDegreesFreedomPost * logDeterminantPost - - d * (logGaussianPrecisionPost - logGaussianPrecisionPrior) - + (wishartDegreesFreedomPost - wishartDegreesFreedomPrior) * d * core::constants::LOG_TWO - + 2.0 * logGammaPostMinusPrior - - numberSamples * d * core::constants::LOG_TWO_PI - - logCountVarianceScales); + try { + double logGammaPostMinusPrior = 0.0; + for (std::size_t i = 0u; i < N; ++i) { + logGammaPostMinusPrior += boost::math::lgamma(0.5 * (wishartDegreesFreedomPost - static_cast(i))) - + boost::math::lgamma(0.5 * (wishartDegreesFreedomPrior - static_cast(i))); } - catch (const std::exception &e) - { - LOG_ERROR("Failed to calculate marginal likelihood: " << e.what()); - return maths_t::E_FpFailed; - } - return static_cast(CMathsFuncs::fpStatus(result)); - } - - //! Check that the state is valid. - bool isBad() const - { - return !CMathsFuncs::isFinite(m_GaussianMean) - || !CMathsFuncs::isFinite(m_GaussianPrecision) - || !CMathsFuncs::isFinite(m_WishartDegreesFreedom) - || !CMathsFuncs::isFinite(m_WishartScaleMatrix); - } - - //! Full debug dump of the state of this prior. - std::string debug() const - { - std::ostringstream result; - result << std::scientific << std::setprecision(15) - << m_GaussianMean << " " - << m_GaussianPrecision << " " - << m_WishartDegreesFreedom << " " - << m_WishartScaleMatrix; - return result.str(); + LOG_TRACE("numberSamples = " << numberSamples); + LOG_TRACE("logGaussianPrecisionPrior = " << logGaussianPrecisionPrior + << ", logGaussianPrecisionPost = " << logGaussianPrecisionPost); + LOG_TRACE("wishartDegreesFreedomPrior = " << wishartDegreesFreedomPrior + << ", wishartDegreesFreedomPost = " << wishartDegreesFreedomPost); + LOG_TRACE("wishartScaleMatrixPrior = " << m_WishartScaleMatrix); + LOG_TRACE("wishartScaleMatrixPost = " << wishartScaleMatrixPost); + LOG_TRACE("logDeterminantPrior = " << logDeterminantPrior << ", logDeterminantPost = " << logDeterminantPost); + LOG_TRACE("logGammaPostMinusPrior = " << logGammaPostMinusPrior); + LOG_TRACE("logCountVarianceScales = " << logCountVarianceScales); + + result = 0.5 * (wishartDegreesFreedomPrior * logDeterminantPrior - wishartDegreesFreedomPost * logDeterminantPost - + d * (logGaussianPrecisionPost - logGaussianPrecisionPrior) + + (wishartDegreesFreedomPost - wishartDegreesFreedomPrior) * d * core::constants::LOG_TWO + + 2.0 * logGammaPostMinusPrior - numberSamples * d * core::constants::LOG_TWO_PI - logCountVarianceScales); + } catch (const std::exception& e) { + LOG_ERROR("Failed to calculate marginal likelihood: " << e.what()); + return maths_t::E_FpFailed; } - - private: - //! The mean of the multivariate Gaussian prior. - TPoint m_GaussianMean; - - //! The precision scale of the multivariate Gaussian prior. - TPoint m_GaussianPrecision; - - //! The degrees freedom of the Wishart prior. - double m_WishartDegreesFreedom; - - //! The scale matrix of the Wishart prior. - TMatrix m_WishartScaleMatrix; + return static_cast(CMathsFuncs::fpStatus(result)); + } + + //! Check that the state is valid. + bool isBad() const { + return !CMathsFuncs::isFinite(m_GaussianMean) || !CMathsFuncs::isFinite(m_GaussianPrecision) || + !CMathsFuncs::isFinite(m_WishartDegreesFreedom) || !CMathsFuncs::isFinite(m_WishartScaleMatrix); + } + + //! Full debug dump of the state of this prior. + std::string debug() const { + std::ostringstream result; + result << std::scientific << std::setprecision(15) << m_GaussianMean << " " << m_GaussianPrecision << " " << m_WishartDegreesFreedom + << " " << m_WishartScaleMatrix; + return result.str(); + } + +private: + //! The mean of the multivariate Gaussian prior. + TPoint m_GaussianMean; + + //! The precision scale of the multivariate Gaussian prior. + TPoint m_GaussianPrecision; + + //! The degrees freedom of the Wishart prior. + double m_WishartDegreesFreedom; + + //! The scale matrix of the Wishart prior. + TMatrix m_WishartScaleMatrix; }; template @@ -1312,7 +1096,6 @@ template const typename CMultivariateNormalConjugate::TMatrix CMultivariateNormalConjugate::NON_INFORMATIVE_SCALE = TMatrix(0); template const double CMultivariateNormalConjugate::MINIMUM_GAUSSIAN_DEGREES_FREEDOM(100.0); - } } diff --git a/include/maths/CMultivariateNormalConjugateFactory.h b/include/maths/CMultivariateNormalConjugateFactory.h index 72c152b1b1..6dc00b605e 100644 --- a/include/maths/CMultivariateNormalConjugateFactory.h +++ b/include/maths/CMultivariateNormalConjugateFactory.h @@ -14,37 +14,28 @@ #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStateRestoreTraverser; } -namespace maths -{ +namespace maths { class CMultivariatePrior; struct SDistributionRestoreParams; //! \brief Factory for multivariate normal conjugate priors. -class MATHS_EXPORT CMultivariateNormalConjugateFactory -{ - public: - using TPriorPtr = boost::shared_ptr; - - public: - //! Create a new non-informative multivariate normal prior. - static TPriorPtr nonInformative(std::size_t dimension, - maths_t::EDataType dataType, - double decayRate); - - //! Create reading state from its state document representation. - static bool restore(std::size_t dimenion, - const SDistributionRestoreParams ¶ms, - TPriorPtr &ptr, - core::CStateRestoreTraverser &traverser); -}; +class MATHS_EXPORT CMultivariateNormalConjugateFactory { +public: + using TPriorPtr = boost::shared_ptr; + +public: + //! Create a new non-informative multivariate normal prior. + static TPriorPtr nonInformative(std::size_t dimension, maths_t::EDataType dataType, double decayRate); + //! Create reading state from its state document representation. + static bool + restore(std::size_t dimenion, const SDistributionRestoreParams& params, TPriorPtr& ptr, core::CStateRestoreTraverser& traverser); +}; } } diff --git a/include/maths/CMultivariateOneOfNPrior.h b/include/maths/CMultivariateOneOfNPrior.h index ae0e918a6c..1acef8b147 100644 --- a/include/maths/CMultivariateOneOfNPrior.h +++ b/include/maths/CMultivariateOneOfNPrior.h @@ -19,16 +19,13 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace maths -{ +namespace maths { struct SDistributionRestoreParams; //! \brief Interface for a multivariate prior distribution which assumes data @@ -55,267 +52,251 @@ struct SDistributionRestoreParams; //! hierarchy can be mixed in. All component models are owned by the object //! (it wouldn't make sense to share them) so this also defines the necessary //! functions to support value semantics and manage the heap. -class MATHS_EXPORT CMultivariateOneOfNPrior : public CMultivariatePrior -{ - public: - using TDouble3Vec = core::CSmallVector; - using TPriorPtrVec = std::vector; - using TDoublePriorPtrPr = std::pair; - using TDoublePriorPtrPrVec = std::vector; - using TWeightPriorPtrPr = std::pair; - using TWeightPriorPtrPrVec = std::vector; - using TPriorCPtr3Vec = core::CSmallVector; - using TMinAccumulator = CBasicStatistics::SMin::TAccumulator; - using TMaxAccumulator = CBasicStatistics::SMax::TAccumulator; - - // Lift all overloads of into scope. - //{ - using CMultivariatePrior::dataType; - using CMultivariatePrior::decayRate; - using CMultivariatePrior::addSamples; - using CMultivariatePrior::probabilityOfLessLikelySamples; - using CMultivariatePrior::print; - //} - - private: - //! The maximum relative error we'll tolerate in c.d.f. and probability calculations. - static const double MAXIMUM_RELATIVE_ERROR; - //! The log of maximum relative error we'll tolerate in c.d.f. and probability - //! calculations. - static const double LOG_MAXIMUM_RELATIVE_ERROR; - - public: - //! \name Life-Cycle - //@{ - //! Create with a collection of models. - //! - //! \param[in] dimension The model dimension. - //! \param[in] models The simple models which comprise the mixed model. - //! \param[in] dataType The type of data being modeled (see maths_t::EDataType - //! for details). - //! \param[in] decayRate The rate at which to revert to the non-informative prior. - //! \warning This class takes ownership of \p models. - CMultivariateOneOfNPrior(std::size_t dimension, - const TPriorPtrVec &models, - maths_t::EDataType dataType, - double decayRate = 0.0); - - //! Create with a weighted collection of models. - //! - //! \param[in] dimension The model dimension. - //! \param[in] models The simple models and their weights which comprise - //! the mixed model. - //! \param[in] dataType The type of data being modeled (see maths_t::EDataType - //! for details). - //! \param[in] decayRate The rate at which we revert to the non-informative prior. - //! \warning This class takes ownership of \p models. - CMultivariateOneOfNPrior(std::size_t dimension, - const TDoublePriorPtrPrVec &models, - maths_t::EDataType dataType, - double decayRate = 0.0); - - //! Construct from part of a state document. - CMultivariateOneOfNPrior(std::size_t dimension, - const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser); - - //! Implements value semantics for copy construction. - CMultivariateOneOfNPrior(const CMultivariateOneOfNPrior &other); - - //! Implements value semantics for assignment. - //! - //! \param[in] rhs The mixed model to copy. - //! \return The newly updated model. - //! \note That this class has value semantics: this overwrites the current - //! collection of models. - CMultivariateOneOfNPrior &operator=(const CMultivariateOneOfNPrior &rhs); - - //! Efficient swap of the contents of this prior and \p other. - void swap(CMultivariateOneOfNPrior &other); - //@} - - //! \name Prior Contract - //@{ - //! Create a copy of the prior. - //! - //! \return A pointer to a newly allocated clone of this model. - //! \warning The caller owns the object returned. - virtual CMultivariateOneOfNPrior *clone() const; - - //! Get the dimension of the prior. - std::size_t dimension() const; - - //! Set the data type. - virtual void dataType(maths_t::EDataType value); - - //! Set the rate at which the prior returns to non-informative. - virtual void decayRate(double value); - - //! Reset the prior to non-informative. - virtual void setToNonInformative(double offset = 0.0, double decayRate = 0.0); - - //! Forward the offset to the model priors. - virtual void adjustOffset(const TWeightStyleVec &weightStyles, - const TDouble10Vec1Vec &samples, - const TDouble10Vec4Vec1Vec &weights); - - //! Update the model weights using the marginal likelihoods for - //! the data. The component prior parameters are then updated. - //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. - //! \param[in] samples A collection of samples of the process. - //! \param[in] weights The weights of each sample in \p samples. - virtual void addSamples(const TWeightStyleVec &weightStyles, - const TDouble10Vec1Vec &samples, - const TDouble10Vec4Vec1Vec &weights); - - //! Propagate the prior density function forwards by \p time. - //! - //! The prior distribution relaxes back to non-informative at a rate - //! controlled by the decay rate parameter (optionally supplied to the - //! constructor). - //! - //! \param[in] time The time increment to apply. - //! \note \p time must be non negative. - virtual void propagateForwardsByTime(double time); - - //! Compute the univariate prior marginalizing over the variables - //! \p marginalize and conditioning on the variables \p condition. - //! - //! \param[in] marginalize The variables to marginalize out. - //! \param[in] condition The variables to condition on. - //! \return The corresponding univariate prior or null if one couldn't - //! be computed. - //! \warning The caller owns the result. - //! \note The variables are passed by the index of their dimension - //! which must therefore be in range. - //! \note The caller must specify dimension - 1 variables between - //! \p marginalize and \p condition so the resulting distribution - //! is univariate. - virtual TUnivariatePriorPtrDoublePr univariate(const TSize10Vec &marginalize, - const TSizeDoublePr10Vec &condition) const; - - //! Compute the bivariate prior marginalizing over the variables - //! \p marginalize and conditioning on the variables \p condition. - //! - //! \param[in] marginalize The variables to marginalize out. - //! \param[in] condition The variables to condition on. - //! \warning The caller owns the result. - //! \note The variables are passed by the index of their dimension - //! which must therefore be in range. - //! \note It is assumed that the variables are in sorted order. - //! \note The caller must specify dimension - 2 variables between - //! \p marginalize and \p condition so the resulting distribution - //! is univariate. - virtual TPriorPtrDoublePr bivariate(const TSize10Vec &marginalize, - const TSizeDoublePr10Vec &condition) const; - - //! Get the support for the marginal likelihood function. - virtual TDouble10VecDouble10VecPr marginalLikelihoodSupport() const; - - //! Get the mean of the marginal likelihood function. - virtual TDouble10Vec marginalLikelihoodMean() const; - - //! Get the weighted mean of the model nearest marginal likelihood means. - virtual TDouble10Vec nearestMarginalLikelihoodMean(const TDouble10Vec &value) const; - - //! Get the covariance matrix for the marginal likelihood. - virtual TDouble10Vec10Vec marginalLikelihoodCovariance() const; - - //! Get the diagonal of the covariance matrix for the marginal likelihood. - virtual TDouble10Vec marginalLikelihoodVariances() const; - - //! Get the mode of the marginal likelihood function. - virtual TDouble10Vec marginalLikelihoodMode(const TWeightStyleVec &weightStyles, - const TDouble10Vec4Vec &weights) const; - - //! Compute the log marginal likelihood function at \p samples integrating - //! over the prior density function for the distribution parameters. - //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. - //! \param[in] samples A collection of samples of the process. - //! \param[in] weights The weights of each sample in \p samples. - //! \param[out] result Filled in with the joint likelihood of \p samples. - //! \note The samples are assumed to be independent and identically - //! distributed. - virtual maths_t::EFloatingPointErrorStatus - jointLogMarginalLikelihood(const TWeightStyleVec &weightStyles, - const TDouble10Vec1Vec &samples, - const TDouble10Vec4Vec1Vec &weights, - double &result) const; - - //! Sample the marginal likelihood function. - //! - //! This samples each model in proportion to the probability the data - //! come from that model. Since each model can only be sampled an integer - //! number of times we find the sampling which minimizes the error from - //! the ideal sampling. - //! - //! \param[in] numberSamples The number of samples required. - //! \param[out] samples Filled in with samples from the prior. - //! \note \p numberSamples is truncated to the number of samples received. - virtual void sampleMarginalLikelihood(std::size_t numberSamples, - TDouble10Vec1Vec &samples) const; - - //! Check if this is a non-informative prior. - virtual bool isNonInformative() const; - - //! Get a human readable description of the prior. - //! - //! \param[in] separator String used to separate priors. - //! \param[in,out] result Filled in with the description. - virtual void print(const std::string &separator, std::string &result) const; - - //! Get a checksum for this object. - virtual uint64_t checksum(uint64_t seed = 0) const; - - //! Debug the memory used by this component. - virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - - //! Get the memory used by this component. - virtual std::size_t memoryUsage() const; - - //! Get the static size of this object - used for virtual hierarchies. - virtual std::size_t staticSize() const; - - //! Get the tag name for this prior. - virtual std::string persistenceTag() const; - - //! Persist state by passing information to the supplied inserter - virtual void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - //@} - - //! \name Test Functions - //@{ - //! Get the current values for the model weights. - TDouble3Vec weights() const; - - //! Get the current values for the log model weights. - TDouble3Vec logWeights() const; - - //! Get the current constituent models. - TPriorCPtr3Vec models() const; - //@} - - private: - //! Check that the model weights are valid. - bool badWeights() const; - - //! Full debug dump of the model weights. - std::string debugWeights() const; - - private: - //! The model dimension. - std::size_t m_Dimension; - - //! A collection of component models and their probabilities. - TWeightPriorPtrPrVec m_Models; +class MATHS_EXPORT CMultivariateOneOfNPrior : public CMultivariatePrior { +public: + using TDouble3Vec = core::CSmallVector; + using TPriorPtrVec = std::vector; + using TDoublePriorPtrPr = std::pair; + using TDoublePriorPtrPrVec = std::vector; + using TWeightPriorPtrPr = std::pair; + using TWeightPriorPtrPrVec = std::vector; + using TPriorCPtr3Vec = core::CSmallVector; + using TMinAccumulator = CBasicStatistics::SMin::TAccumulator; + using TMaxAccumulator = CBasicStatistics::SMax::TAccumulator; + + // Lift all overloads of into scope. + //{ + using CMultivariatePrior::addSamples; + using CMultivariatePrior::dataType; + using CMultivariatePrior::decayRate; + using CMultivariatePrior::print; + using CMultivariatePrior::probabilityOfLessLikelySamples; + //} + +private: + //! The maximum relative error we'll tolerate in c.d.f. and probability calculations. + static const double MAXIMUM_RELATIVE_ERROR; + //! The log of maximum relative error we'll tolerate in c.d.f. and probability + //! calculations. + static const double LOG_MAXIMUM_RELATIVE_ERROR; + +public: + //! \name Life-Cycle + //@{ + //! Create with a collection of models. + //! + //! \param[in] dimension The model dimension. + //! \param[in] models The simple models which comprise the mixed model. + //! \param[in] dataType The type of data being modeled (see maths_t::EDataType + //! for details). + //! \param[in] decayRate The rate at which to revert to the non-informative prior. + //! \warning This class takes ownership of \p models. + CMultivariateOneOfNPrior(std::size_t dimension, const TPriorPtrVec& models, maths_t::EDataType dataType, double decayRate = 0.0); + + //! Create with a weighted collection of models. + //! + //! \param[in] dimension The model dimension. + //! \param[in] models The simple models and their weights which comprise + //! the mixed model. + //! \param[in] dataType The type of data being modeled (see maths_t::EDataType + //! for details). + //! \param[in] decayRate The rate at which we revert to the non-informative prior. + //! \warning This class takes ownership of \p models. + CMultivariateOneOfNPrior(std::size_t dimension, + const TDoublePriorPtrPrVec& models, + maths_t::EDataType dataType, + double decayRate = 0.0); + + //! Construct from part of a state document. + CMultivariateOneOfNPrior(std::size_t dimension, const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser); + + //! Implements value semantics for copy construction. + CMultivariateOneOfNPrior(const CMultivariateOneOfNPrior& other); + + //! Implements value semantics for assignment. + //! + //! \param[in] rhs The mixed model to copy. + //! \return The newly updated model. + //! \note That this class has value semantics: this overwrites the current + //! collection of models. + CMultivariateOneOfNPrior& operator=(const CMultivariateOneOfNPrior& rhs); + + //! Efficient swap of the contents of this prior and \p other. + void swap(CMultivariateOneOfNPrior& other); + //@} + + //! \name Prior Contract + //@{ + //! Create a copy of the prior. + //! + //! \return A pointer to a newly allocated clone of this model. + //! \warning The caller owns the object returned. + virtual CMultivariateOneOfNPrior* clone() const; + + //! Get the dimension of the prior. + std::size_t dimension() const; + + //! Set the data type. + virtual void dataType(maths_t::EDataType value); + + //! Set the rate at which the prior returns to non-informative. + virtual void decayRate(double value); + + //! Reset the prior to non-informative. + virtual void setToNonInformative(double offset = 0.0, double decayRate = 0.0); + + //! Forward the offset to the model priors. + virtual void adjustOffset(const TWeightStyleVec& weightStyles, const TDouble10Vec1Vec& samples, const TDouble10Vec4Vec1Vec& weights); + + //! Update the model weights using the marginal likelihoods for + //! the data. The component prior parameters are then updated. + //! + //! \param[in] weightStyles Controls the interpretation of the weight(s) + //! that are associated with each sample. See maths_t::ESampleWeightStyle + //! for more details. + //! \param[in] samples A collection of samples of the process. + //! \param[in] weights The weights of each sample in \p samples. + virtual void addSamples(const TWeightStyleVec& weightStyles, const TDouble10Vec1Vec& samples, const TDouble10Vec4Vec1Vec& weights); + + //! Propagate the prior density function forwards by \p time. + //! + //! The prior distribution relaxes back to non-informative at a rate + //! controlled by the decay rate parameter (optionally supplied to the + //! constructor). + //! + //! \param[in] time The time increment to apply. + //! \note \p time must be non negative. + virtual void propagateForwardsByTime(double time); + + //! Compute the univariate prior marginalizing over the variables + //! \p marginalize and conditioning on the variables \p condition. + //! + //! \param[in] marginalize The variables to marginalize out. + //! \param[in] condition The variables to condition on. + //! \return The corresponding univariate prior or null if one couldn't + //! be computed. + //! \warning The caller owns the result. + //! \note The variables are passed by the index of their dimension + //! which must therefore be in range. + //! \note The caller must specify dimension - 1 variables between + //! \p marginalize and \p condition so the resulting distribution + //! is univariate. + virtual TUnivariatePriorPtrDoublePr univariate(const TSize10Vec& marginalize, const TSizeDoublePr10Vec& condition) const; + + //! Compute the bivariate prior marginalizing over the variables + //! \p marginalize and conditioning on the variables \p condition. + //! + //! \param[in] marginalize The variables to marginalize out. + //! \param[in] condition The variables to condition on. + //! \warning The caller owns the result. + //! \note The variables are passed by the index of their dimension + //! which must therefore be in range. + //! \note It is assumed that the variables are in sorted order. + //! \note The caller must specify dimension - 2 variables between + //! \p marginalize and \p condition so the resulting distribution + //! is univariate. + virtual TPriorPtrDoublePr bivariate(const TSize10Vec& marginalize, const TSizeDoublePr10Vec& condition) const; + + //! Get the support for the marginal likelihood function. + virtual TDouble10VecDouble10VecPr marginalLikelihoodSupport() const; + + //! Get the mean of the marginal likelihood function. + virtual TDouble10Vec marginalLikelihoodMean() const; + + //! Get the weighted mean of the model nearest marginal likelihood means. + virtual TDouble10Vec nearestMarginalLikelihoodMean(const TDouble10Vec& value) const; + + //! Get the covariance matrix for the marginal likelihood. + virtual TDouble10Vec10Vec marginalLikelihoodCovariance() const; + + //! Get the diagonal of the covariance matrix for the marginal likelihood. + virtual TDouble10Vec marginalLikelihoodVariances() const; + + //! Get the mode of the marginal likelihood function. + virtual TDouble10Vec marginalLikelihoodMode(const TWeightStyleVec& weightStyles, const TDouble10Vec4Vec& weights) const; + + //! Compute the log marginal likelihood function at \p samples integrating + //! over the prior density function for the distribution parameters. + //! + //! \param[in] weightStyles Controls the interpretation of the weight(s) + //! that are associated with each sample. See maths_t::ESampleWeightStyle + //! for more details. + //! \param[in] samples A collection of samples of the process. + //! \param[in] weights The weights of each sample in \p samples. + //! \param[out] result Filled in with the joint likelihood of \p samples. + //! \note The samples are assumed to be independent and identically + //! distributed. + virtual maths_t::EFloatingPointErrorStatus jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble10Vec1Vec& samples, + const TDouble10Vec4Vec1Vec& weights, + double& result) const; + + //! Sample the marginal likelihood function. + //! + //! This samples each model in proportion to the probability the data + //! come from that model. Since each model can only be sampled an integer + //! number of times we find the sampling which minimizes the error from + //! the ideal sampling. + //! + //! \param[in] numberSamples The number of samples required. + //! \param[out] samples Filled in with samples from the prior. + //! \note \p numberSamples is truncated to the number of samples received. + virtual void sampleMarginalLikelihood(std::size_t numberSamples, TDouble10Vec1Vec& samples) const; + + //! Check if this is a non-informative prior. + virtual bool isNonInformative() const; + + //! Get a human readable description of the prior. + //! + //! \param[in] separator String used to separate priors. + //! \param[in,out] result Filled in with the description. + virtual void print(const std::string& separator, std::string& result) const; + + //! Get a checksum for this object. + virtual uint64_t checksum(uint64_t seed = 0) const; + + //! Debug the memory used by this component. + virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + + //! Get the memory used by this component. + virtual std::size_t memoryUsage() const; + + //! Get the static size of this object - used for virtual hierarchies. + virtual std::size_t staticSize() const; + + //! Get the tag name for this prior. + virtual std::string persistenceTag() const; + + //! Persist state by passing information to the supplied inserter + virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + //@} + + //! \name Test Functions + //@{ + //! Get the current values for the model weights. + TDouble3Vec weights() const; + + //! Get the current values for the log model weights. + TDouble3Vec logWeights() const; + + //! Get the current constituent models. + TPriorCPtr3Vec models() const; + //@} + +private: + //! Check that the model weights are valid. + bool badWeights() const; + + //! Full debug dump of the model weights. + std::string debugWeights() const; + +private: + //! The model dimension. + std::size_t m_Dimension; + + //! A collection of component models and their probabilities. + TWeightPriorPtrPrVec m_Models; }; - } } diff --git a/include/maths/CMultivariateOneOfNPriorFactory.h b/include/maths/CMultivariateOneOfNPriorFactory.h index e0e1f0b2f8..ef0115aca7 100644 --- a/include/maths/CMultivariateOneOfNPriorFactory.h +++ b/include/maths/CMultivariateOneOfNPriorFactory.h @@ -14,39 +14,29 @@ #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStateRestoreTraverser; } -namespace maths -{ +namespace maths { class CMultivariatePrior; struct SDistributionRestoreParams; //! \brief Factory for multivariate 1-of-n priors. -class MATHS_EXPORT CMultivariateOneOfNPriorFactory -{ - public: - using TPriorPtr = boost::shared_ptr; - using TPriorPtrVec = std::vector; - - public: - //! Create a new non-informative multivariate normal prior. - static TPriorPtr nonInformative(std::size_t dimension, - maths_t::EDataType dataType, - double decayRate, - const TPriorPtrVec &models); - - //! Create reading state from its state document representation. - static bool restore(std::size_t dimension, - const SDistributionRestoreParams ¶ms, - TPriorPtr &ptr, - core::CStateRestoreTraverser &traverser); +class MATHS_EXPORT CMultivariateOneOfNPriorFactory { +public: + using TPriorPtr = boost::shared_ptr; + using TPriorPtrVec = std::vector; + +public: + //! Create a new non-informative multivariate normal prior. + static TPriorPtr nonInformative(std::size_t dimension, maths_t::EDataType dataType, double decayRate, const TPriorPtrVec& models); + + //! Create reading state from its state document representation. + static bool + restore(std::size_t dimension, const SDistributionRestoreParams& params, TPriorPtr& ptr, core::CStateRestoreTraverser& traverser); }; - } } diff --git a/include/maths/CMultivariatePrior.h b/include/maths/CMultivariatePrior.h index 8d914e25b7..2137260869 100644 --- a/include/maths/CMultivariatePrior.h +++ b/include/maths/CMultivariatePrior.h @@ -18,14 +18,11 @@ #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; } -namespace maths -{ +namespace maths { class CPrior; //! \brief Interface for a multivariate prior distribution function. @@ -37,399 +34,384 @@ class CPrior; //! This exists to support a one-of-n prior distribution which comprises //! a weighted selection of basic likelihood functions and is implemented //! using the composite pattern. -class MATHS_EXPORT CMultivariatePrior -{ - public: - using TDouble10Vec = core::CSmallVector; - using TDouble10Vec1Vec = core::CSmallVector; - using TDouble10Vec2Vec = core::CSmallVector; - using TDouble10Vec4Vec = core::CSmallVector; - using TDouble10Vec10Vec = core::CSmallVector; - using TDouble10Vec4Vec1Vec = core::CSmallVector; - using TDouble10VecDouble10VecPr = std::pair; - using TSize10Vec = core::CSmallVector; - using TSizeDoublePr = std::pair; - using TSizeDoublePr10Vec = core::CSmallVector; - using TWeightStyleVec = maths_t::TWeightStyleVec; - using TTail10Vec = core::CSmallVector; - using TUnivariatePriorPtr = boost::shared_ptr; - using TUnivariatePriorPtrDoublePr = std::pair; - using TPriorPtr = boost::shared_ptr; - using TPriorPtrDoublePr = std::pair; - - public: - //! The value of the decay rate to fall back to using if the input - //! value is inappropriate. - static const double FALLBACK_DECAY_RATE; - - //! \name Persistence Tags - //! - //! Tags for the persisting objects in this hierarchy. - //@{ - static const std::string MULTIMODAL_TAG; - static const std::string NORMAL_TAG; - static const std::string ONE_OF_N_TAG; - static const std::string CONSTANT_TAG; - //@} - - public: - //! \name Life-cycle - //@{ - //! Construct an arbitrarily initialised object, suitable only for - //! assigning to or swapping with a valid one. - CMultivariatePrior(); - - //! \param[in] dataType The type of data being modeled. - //! \param[in] decayRate The rate at which the prior returns to non- - //! informative. - CMultivariatePrior(maths_t::EDataType dataType, double decayRate); - - virtual ~CMultivariatePrior() = default; - - //! Swap the contents of this prior and \p other. - void swap(CMultivariatePrior &other); - //@} - - //! Mark the prior as being used for forecasting. - void forForecasting(); - - //! Check if this prior is being used for forecasting. - //! - //! \warning This is an irreversible action so if the prior - //! is still need it should be copied first. - bool isForForecasting() const; - - //! Check if the prior is being used to model discrete data. - bool isDiscrete() const; - - //! Check if the prior is being used to model integer data. - bool isInteger() const; - - //! Get the data type. - maths_t::EDataType dataType() const; - - //! Get the rate at which the prior returns to non-informative. - double decayRate() const; - - //! \name Prior Contract - //@{ - //! Create a copy of the prior. - //! - //! \warning Caller owns returned object. - virtual CMultivariatePrior *clone() const = 0; - - //! Get the dimension of the prior. - virtual std::size_t dimension() const = 0; - - //! Set the data type. - virtual void dataType(maths_t::EDataType value); - - //! Set the rate at which the prior returns to non-informative. - virtual void decayRate(double value); - - //! Reset the prior to non-informative. - virtual void setToNonInformative(double offset = 0.0, double decayRate = 0.0) = 0; - - //! For priors with non-negative support this adjusts the offset used - //! to extend the support to handle negative samples. - //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. - //! \param[in] samples The samples from which to determine the offset. - //! \param[in] weights The weights of each sample in \p samples. - virtual void adjustOffset(const TWeightStyleVec &weightStyles, - const TDouble10Vec1Vec &samples, - const TDouble10Vec4Vec1Vec &weights) = 0; - - //! Update the prior with a collection of independent samples from the - //! process. - //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. - //! \param[in] samples A collection of samples of the process. - //! \param[in] weights The weights of each sample in \p samples. - virtual void addSamples(const TWeightStyleVec &weightStyles, - const TDouble10Vec1Vec &samples, - const TDouble10Vec4Vec1Vec &weights) = 0; - - //! Update the prior for the specified elapsed time. - virtual void propagateForwardsByTime(double time) = 0; - - //! Compute the univariate prior marginalizing over the variables - //! \p marginalize and conditioning on the variables \p condition. - //! - //! \param[in] marginalize The variables to marginalize out. - //! \param[in] condition The variables to condition on. - //! \warning The caller owns the result. - //! \note The variables are passed by the index of their dimension - //! which must therefore be in range. - //! \note It is assumed that the variables are in sorted order. - //! \note The caller must specify dimension - 1 variables between - //! \p marginalize and \p condition so the resulting distribution - //! is univariate. - virtual TUnivariatePriorPtrDoublePr univariate(const TSize10Vec &marginalize, - const TSizeDoublePr10Vec &condition) const = 0; - - //! Compute the bivariate prior marginalizing over the variables - //! \p marginalize and conditioning on the variables \p condition. - //! - //! \param[in] marginalize The variables to marginalize out. - //! \param[in] condition The variables to condition on. - //! \warning The caller owns the result. - //! \note The variables are passed by the index of their dimension - //! which must therefore be in range. - //! \note It is assumed that the variables are in sorted order. - //! \note The caller must specify dimension - 2 variables between - //! \p marginalize and \p condition so the resulting distribution - //! is univariate. - virtual TPriorPtrDoublePr bivariate(const TSize10Vec &marginalize, - const TSizeDoublePr10Vec &condition) const = 0; - - //! Get the support for the marginal likelihood function. - virtual TDouble10VecDouble10VecPr marginalLikelihoodSupport() const = 0; - - //! Get the mean of the marginal likelihood function. - virtual TDouble10Vec marginalLikelihoodMean() const = 0; - - //! Get the nearest mean of the multimodal prior marginal likelihood, - //! otherwise the marginal likelihood mean. - virtual TDouble10Vec nearestMarginalLikelihoodMean(const TDouble10Vec &value) const; - - //! Get the mode of the marginal likelihood function. - virtual TDouble10Vec marginalLikelihoodMode(const TWeightStyleVec &weightStyles, - const TDouble10Vec4Vec &weights) const = 0; - - //! Get the local maxima of the marginal likelihood function. - virtual TDouble10Vec1Vec marginalLikelihoodModes(const TWeightStyleVec &weightStyles, - const TDouble10Vec4Vec &weights) const; - - //! Get the covariance matrix for the marginal likelihood. - virtual TDouble10Vec10Vec marginalLikelihoodCovariance() const = 0; - - //! Get the diagonal of the covariance matrix for the marginal likelihood. - virtual TDouble10Vec marginalLikelihoodVariances() const = 0; - - //! Calculate the log marginal likelihood function, integrating over the - //! prior density function. - //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. - //! \param[in] samples A collection of samples of the process. - //! \param[in] weights The weights of each sample in \p samples. - //! \param[out] result Filled in with the joint likelihood of \p samples. - virtual maths_t::EFloatingPointErrorStatus - jointLogMarginalLikelihood(const TWeightStyleVec &weightStyles, - const TDouble10Vec1Vec &samples, - const TDouble10Vec4Vec1Vec &weights, - double &result) const = 0; - - //! Sample the marginal likelihood function. - //! - //! The marginal likelihood functions are sampled in quantile intervals - //! of the generalized cumulative density function, specifically intervals - //! between contours of constant probability density. - //! - //! The idea is to capture a set of samples that accurately and efficiently - //! represent the information in the prior. Random sampling (although it - //! has nice asymptotic properties) doesn't fulfill the second requirement: - //! typically requiring many more samples than sampling in quantile intervals - //! to capture the same amount of information. - //! - //! This is to allow us to transform one prior distribution into another - //! completely generically and relatively efficiently, by updating the target - //! prior with these samples. As such the prior needs to maintain a count of - //! the number of samples to date so that it isn't over sampled. - //! - //! \param[in] numberSamples The number of samples required. - //! \param[out] samples Filled in with samples from the prior. - //! \note \p numberSamples is truncated to the number of samples received. - virtual void sampleMarginalLikelihood(std::size_t numberSamples, - TDouble10Vec1Vec &samples) const = 0; - - //! Calculate the joint probability of seeing a lower marginal likelihood - //! collection of independent samples for each coordinate. - //! - //! \param[in] calculation The style of the probability calculation - //! (see model_t::EProbabilityCalculation for details). - //! \param[in] weightStyles Controls the interpretation of the weights - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. - //! \param[in] samples A collection of samples of the process. - //! \param[in] weights The weights of each sample in \p samples. - //! \param[in] coordinates The coordinates for which to compute probabilities. - //! \param[out] lowerBounds Filled in with lower bounds for the probability - //! of each coordinate. - //! \param[out] upperBounds Filled in with upper bounds for the probability - //! of each coordinate. - //! \param[out] tail The tail (left or right), of each coordinate, that all - //! the samples are in or neither. - //! \note The samples are assumed to be independent. - //! \warning The variance scales must be in the range \f$(0,\infty)\f$, i.e. - //! a value of zero is not well defined and a value of infinity is not well - //! handled. (Very large values are handled though.) - bool probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec &weightStyles, - const TDouble10Vec1Vec &samples, - const TDouble10Vec4Vec1Vec &weights, - const TSize10Vec &coordinates, - TDouble10Vec2Vec &lowerBounds, - TDouble10Vec2Vec &upperBounds, - TTail10Vec &tail) const; - - //! Calculate the joint probability of seeing a lower likelihood collection - //! of independent samples from the distribution integrating over the prior - //! density function. - //! - //! \param[in] calculation The style of the probability calculation - //! (see model_t::EProbabilityCalculation for details). - //! \param[in] weightStyles Controls the interpretation of the weights - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. - //! \param[in] samples A collection of samples of the process. - //! \param[in] weights The weights of each sample in \p samples. - //! \param[out] lowerBound Filled in with a lower bound for the probability - //! of the set for which the joint marginal likelihood is less than - //! that of \p samples (subject to the measure \p calculation). - //! \param[out] upperBound Filled in with an upper bound for the - //! probability of the set for which the joint marginal likelihood is - //! less than that of \p samples (subject to the measure \p calculation). - //! \param[out] tail The tail (left or right), of each coordinate, that all - //! the samples are in or neither. - //! \note The samples are assumed to be independent. - //! \warning The variance scales must be in the range \f$(0,\infty)\f$, i.e. - //! a value of zero is not well defined and a value of infinity is not well - //! handled. (Very large values are handled though.) - bool probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec &weightStyles, - const TDouble10Vec1Vec &samples, - const TDouble10Vec4Vec1Vec &weights, - double &lowerBound, - double &upperBound, - TTail10Vec &tail) const; - - //! Check if this is a non-informative prior. - virtual bool isNonInformative() const = 0; - - //! Get a human readable description of the prior. - std::string print() const; - - //! Get a human readable description of the prior. - //! - //! \param[in] separator String used to separate priors. - //! \param[in,out] result Filled in with the description. - virtual void print(const std::string &separator, std::string &result) const = 0; - - //! Print a projection of the marginal likelihood function onto the specified - //! coordinates. - //! - //! The format is as follows:\n - //! \code{cpp} - //! x = [x1 x2 .... xn ]; - //! y = [y1 y2 .... yn ]; - //! likelihood = [L(x1, y1) L(x1, y2) ... L(x1, yn) - //! L(x2, y1) L(x2, y2) ... L(x2, yn) - //! . . . - //! . . . - //! . . . - //! L(xn, y1) L(xn, y2) ... L(xn, yn) ]; - //! \endcode - //! - //! i.e. domain values are space separated on the first and subsequent line(s) - //! as appropriate and the density function evaluated at those values are space - //! separated on the next line and subsequent lines as appropriate. - std::string printMarginalLikelihoodFunction(std::size_t x, std::size_t y) const; - - //! Get a checksum for this object. - virtual uint64_t checksum(uint64_t seed = 0) const = 0; - - //! Get the memory used by this component - virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const = 0; - - //! Get the memory used by this component - virtual std::size_t memoryUsage() const = 0; - - //! Get the static size of this object - used for virtual hierarchies - virtual std::size_t staticSize() const = 0; - - //! Get the tag name for this prior. - virtual std::string persistenceTag() const = 0; - - //! Persist state by passing information to the supplied inserter - virtual void acceptPersistInserter(core::CStatePersistInserter &inserter) const = 0; - //@} - - //! Get the margin between the smallest value and the support left - //! end. Priors with non-negative support, automatically adjust the - //! offset if a value is seen which is smaller than offset + margin. - //! This is to avoid the numerical instability caused by adding - //! values close to zero. - //! - //! \note This is overridden by CPriorTestInterface so don't replace - //! it by a static constant in the calling functions. - virtual double offsetMargin() const; - - //! Get the number of samples received. - double numberSamples() const; - - //! Set the number of samples received to \p numberSamples. - void numberSamples(double numberSamples); - - //! Check if we should use this prior at present. - virtual bool participatesInModelSelection() const; - - //! Get the number of unmarginalized parameters in the marginal likelihood. - //! - //! \note That any parameters over which we explicitly integrate to - //! compute a marginal likelihood don't need to be counted since we - //! are interested in the estimating the usual BIC approximation for - //! \f$int_{\theta}f(x|\theta, M)f(\theta|M)\d\theta\f$ - virtual double unmarginalizedParameters() const; - - protected: - //! Get the scaled decay rate for use by propagateForwardsByTime. - double scaledDecayRate() const; - - //! Update the number of samples received to date by adding \p n. - void addSamples(double n); - - //! Check that the samples and weights are consistent. - bool check(const TDouble10Vec1Vec &samples, - const TDouble10Vec4Vec1Vec &weights) const; - - //! Check that the variables to marginalize out and condition on - //! are consistent. - bool check(const TSize10Vec &marginalize, - const TSizeDoublePr10Vec &condition) const; - - //! Get the remaining variables. - void remainingVariables(const TSize10Vec &marginalize, - const TSizeDoublePr10Vec &condition, - TSize10Vec &results) const; - - //! Get the smallest component of \p x. - double smallest(const TDouble10Vec &x) const; - - private: - //! Set to true if this model is being used for forecasting. Note - //! we don't have any need to persist forecast models so this is - //! is not persisted. - bool m_Forecasting; - - //! If this is true then the prior is being used to model discrete - //! data. Note that this is not persisted and deduced from context. - maths_t::EDataType m_DataType; - - //! The rate at which the prior returns to non-informative. Note that - //! this is not persisted. - double m_DecayRate; - - //! The number of samples with which the prior has been updated. - double m_NumberSamples; +class MATHS_EXPORT CMultivariatePrior { +public: + using TDouble10Vec = core::CSmallVector; + using TDouble10Vec1Vec = core::CSmallVector; + using TDouble10Vec2Vec = core::CSmallVector; + using TDouble10Vec4Vec = core::CSmallVector; + using TDouble10Vec10Vec = core::CSmallVector; + using TDouble10Vec4Vec1Vec = core::CSmallVector; + using TDouble10VecDouble10VecPr = std::pair; + using TSize10Vec = core::CSmallVector; + using TSizeDoublePr = std::pair; + using TSizeDoublePr10Vec = core::CSmallVector; + using TWeightStyleVec = maths_t::TWeightStyleVec; + using TTail10Vec = core::CSmallVector; + using TUnivariatePriorPtr = boost::shared_ptr; + using TUnivariatePriorPtrDoublePr = std::pair; + using TPriorPtr = boost::shared_ptr; + using TPriorPtrDoublePr = std::pair; + +public: + //! The value of the decay rate to fall back to using if the input + //! value is inappropriate. + static const double FALLBACK_DECAY_RATE; + + //! \name Persistence Tags + //! + //! Tags for the persisting objects in this hierarchy. + //@{ + static const std::string MULTIMODAL_TAG; + static const std::string NORMAL_TAG; + static const std::string ONE_OF_N_TAG; + static const std::string CONSTANT_TAG; + //@} + +public: + //! \name Life-cycle + //@{ + //! Construct an arbitrarily initialised object, suitable only for + //! assigning to or swapping with a valid one. + CMultivariatePrior(); + + //! \param[in] dataType The type of data being modeled. + //! \param[in] decayRate The rate at which the prior returns to non- + //! informative. + CMultivariatePrior(maths_t::EDataType dataType, double decayRate); + + virtual ~CMultivariatePrior() = default; + + //! Swap the contents of this prior and \p other. + void swap(CMultivariatePrior& other); + //@} + + //! Mark the prior as being used for forecasting. + void forForecasting(); + + //! Check if this prior is being used for forecasting. + //! + //! \warning This is an irreversible action so if the prior + //! is still need it should be copied first. + bool isForForecasting() const; + + //! Check if the prior is being used to model discrete data. + bool isDiscrete() const; + + //! Check if the prior is being used to model integer data. + bool isInteger() const; + + //! Get the data type. + maths_t::EDataType dataType() const; + + //! Get the rate at which the prior returns to non-informative. + double decayRate() const; + + //! \name Prior Contract + //@{ + //! Create a copy of the prior. + //! + //! \warning Caller owns returned object. + virtual CMultivariatePrior* clone() const = 0; + + //! Get the dimension of the prior. + virtual std::size_t dimension() const = 0; + + //! Set the data type. + virtual void dataType(maths_t::EDataType value); + + //! Set the rate at which the prior returns to non-informative. + virtual void decayRate(double value); + + //! Reset the prior to non-informative. + virtual void setToNonInformative(double offset = 0.0, double decayRate = 0.0) = 0; + + //! For priors with non-negative support this adjusts the offset used + //! to extend the support to handle negative samples. + //! + //! \param[in] weightStyles Controls the interpretation of the weight(s) + //! that are associated with each sample. See maths_t::ESampleWeightStyle + //! for more details. + //! \param[in] samples The samples from which to determine the offset. + //! \param[in] weights The weights of each sample in \p samples. + virtual void + adjustOffset(const TWeightStyleVec& weightStyles, const TDouble10Vec1Vec& samples, const TDouble10Vec4Vec1Vec& weights) = 0; + + //! Update the prior with a collection of independent samples from the + //! process. + //! + //! \param[in] weightStyles Controls the interpretation of the weight(s) + //! that are associated with each sample. See maths_t::ESampleWeightStyle + //! for more details. + //! \param[in] samples A collection of samples of the process. + //! \param[in] weights The weights of each sample in \p samples. + virtual void addSamples(const TWeightStyleVec& weightStyles, const TDouble10Vec1Vec& samples, const TDouble10Vec4Vec1Vec& weights) = 0; + + //! Update the prior for the specified elapsed time. + virtual void propagateForwardsByTime(double time) = 0; + + //! Compute the univariate prior marginalizing over the variables + //! \p marginalize and conditioning on the variables \p condition. + //! + //! \param[in] marginalize The variables to marginalize out. + //! \param[in] condition The variables to condition on. + //! \warning The caller owns the result. + //! \note The variables are passed by the index of their dimension + //! which must therefore be in range. + //! \note It is assumed that the variables are in sorted order. + //! \note The caller must specify dimension - 1 variables between + //! \p marginalize and \p condition so the resulting distribution + //! is univariate. + virtual TUnivariatePriorPtrDoublePr univariate(const TSize10Vec& marginalize, const TSizeDoublePr10Vec& condition) const = 0; + + //! Compute the bivariate prior marginalizing over the variables + //! \p marginalize and conditioning on the variables \p condition. + //! + //! \param[in] marginalize The variables to marginalize out. + //! \param[in] condition The variables to condition on. + //! \warning The caller owns the result. + //! \note The variables are passed by the index of their dimension + //! which must therefore be in range. + //! \note It is assumed that the variables are in sorted order. + //! \note The caller must specify dimension - 2 variables between + //! \p marginalize and \p condition so the resulting distribution + //! is univariate. + virtual TPriorPtrDoublePr bivariate(const TSize10Vec& marginalize, const TSizeDoublePr10Vec& condition) const = 0; + + //! Get the support for the marginal likelihood function. + virtual TDouble10VecDouble10VecPr marginalLikelihoodSupport() const = 0; + + //! Get the mean of the marginal likelihood function. + virtual TDouble10Vec marginalLikelihoodMean() const = 0; + + //! Get the nearest mean of the multimodal prior marginal likelihood, + //! otherwise the marginal likelihood mean. + virtual TDouble10Vec nearestMarginalLikelihoodMean(const TDouble10Vec& value) const; + + //! Get the mode of the marginal likelihood function. + virtual TDouble10Vec marginalLikelihoodMode(const TWeightStyleVec& weightStyles, const TDouble10Vec4Vec& weights) const = 0; + + //! Get the local maxima of the marginal likelihood function. + virtual TDouble10Vec1Vec marginalLikelihoodModes(const TWeightStyleVec& weightStyles, const TDouble10Vec4Vec& weights) const; + + //! Get the covariance matrix for the marginal likelihood. + virtual TDouble10Vec10Vec marginalLikelihoodCovariance() const = 0; + + //! Get the diagonal of the covariance matrix for the marginal likelihood. + virtual TDouble10Vec marginalLikelihoodVariances() const = 0; + + //! Calculate the log marginal likelihood function, integrating over the + //! prior density function. + //! + //! \param[in] weightStyles Controls the interpretation of the weight(s) + //! that are associated with each sample. See maths_t::ESampleWeightStyle + //! for more details. + //! \param[in] samples A collection of samples of the process. + //! \param[in] weights The weights of each sample in \p samples. + //! \param[out] result Filled in with the joint likelihood of \p samples. + virtual maths_t::EFloatingPointErrorStatus jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble10Vec1Vec& samples, + const TDouble10Vec4Vec1Vec& weights, + double& result) const = 0; + + //! Sample the marginal likelihood function. + //! + //! The marginal likelihood functions are sampled in quantile intervals + //! of the generalized cumulative density function, specifically intervals + //! between contours of constant probability density. + //! + //! The idea is to capture a set of samples that accurately and efficiently + //! represent the information in the prior. Random sampling (although it + //! has nice asymptotic properties) doesn't fulfill the second requirement: + //! typically requiring many more samples than sampling in quantile intervals + //! to capture the same amount of information. + //! + //! This is to allow us to transform one prior distribution into another + //! completely generically and relatively efficiently, by updating the target + //! prior with these samples. As such the prior needs to maintain a count of + //! the number of samples to date so that it isn't over sampled. + //! + //! \param[in] numberSamples The number of samples required. + //! \param[out] samples Filled in with samples from the prior. + //! \note \p numberSamples is truncated to the number of samples received. + virtual void sampleMarginalLikelihood(std::size_t numberSamples, TDouble10Vec1Vec& samples) const = 0; + + //! Calculate the joint probability of seeing a lower marginal likelihood + //! collection of independent samples for each coordinate. + //! + //! \param[in] calculation The style of the probability calculation + //! (see model_t::EProbabilityCalculation for details). + //! \param[in] weightStyles Controls the interpretation of the weights + //! that are associated with each sample. See maths_t::ESampleWeightStyle + //! for more details. + //! \param[in] samples A collection of samples of the process. + //! \param[in] weights The weights of each sample in \p samples. + //! \param[in] coordinates The coordinates for which to compute probabilities. + //! \param[out] lowerBounds Filled in with lower bounds for the probability + //! of each coordinate. + //! \param[out] upperBounds Filled in with upper bounds for the probability + //! of each coordinate. + //! \param[out] tail The tail (left or right), of each coordinate, that all + //! the samples are in or neither. + //! \note The samples are assumed to be independent. + //! \warning The variance scales must be in the range \f$(0,\infty)\f$, i.e. + //! a value of zero is not well defined and a value of infinity is not well + //! handled. (Very large values are handled though.) + bool probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, + const TWeightStyleVec& weightStyles, + const TDouble10Vec1Vec& samples, + const TDouble10Vec4Vec1Vec& weights, + const TSize10Vec& coordinates, + TDouble10Vec2Vec& lowerBounds, + TDouble10Vec2Vec& upperBounds, + TTail10Vec& tail) const; + + //! Calculate the joint probability of seeing a lower likelihood collection + //! of independent samples from the distribution integrating over the prior + //! density function. + //! + //! \param[in] calculation The style of the probability calculation + //! (see model_t::EProbabilityCalculation for details). + //! \param[in] weightStyles Controls the interpretation of the weights + //! that are associated with each sample. See maths_t::ESampleWeightStyle + //! for more details. + //! \param[in] samples A collection of samples of the process. + //! \param[in] weights The weights of each sample in \p samples. + //! \param[out] lowerBound Filled in with a lower bound for the probability + //! of the set for which the joint marginal likelihood is less than + //! that of \p samples (subject to the measure \p calculation). + //! \param[out] upperBound Filled in with an upper bound for the + //! probability of the set for which the joint marginal likelihood is + //! less than that of \p samples (subject to the measure \p calculation). + //! \param[out] tail The tail (left or right), of each coordinate, that all + //! the samples are in or neither. + //! \note The samples are assumed to be independent. + //! \warning The variance scales must be in the range \f$(0,\infty)\f$, i.e. + //! a value of zero is not well defined and a value of infinity is not well + //! handled. (Very large values are handled though.) + bool probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, + const TWeightStyleVec& weightStyles, + const TDouble10Vec1Vec& samples, + const TDouble10Vec4Vec1Vec& weights, + double& lowerBound, + double& upperBound, + TTail10Vec& tail) const; + + //! Check if this is a non-informative prior. + virtual bool isNonInformative() const = 0; + + //! Get a human readable description of the prior. + std::string print() const; + + //! Get a human readable description of the prior. + //! + //! \param[in] separator String used to separate priors. + //! \param[in,out] result Filled in with the description. + virtual void print(const std::string& separator, std::string& result) const = 0; + + //! Print a projection of the marginal likelihood function onto the specified + //! coordinates. + //! + //! The format is as follows:\n + //! \code{cpp} + //! x = [x1 x2 .... xn ]; + //! y = [y1 y2 .... yn ]; + //! likelihood = [L(x1, y1) L(x1, y2) ... L(x1, yn) + //! L(x2, y1) L(x2, y2) ... L(x2, yn) + //! . . . + //! . . . + //! . . . + //! L(xn, y1) L(xn, y2) ... L(xn, yn) ]; + //! \endcode + //! + //! i.e. domain values are space separated on the first and subsequent line(s) + //! as appropriate and the density function evaluated at those values are space + //! separated on the next line and subsequent lines as appropriate. + std::string printMarginalLikelihoodFunction(std::size_t x, std::size_t y) const; + + //! Get a checksum for this object. + virtual uint64_t checksum(uint64_t seed = 0) const = 0; + + //! Get the memory used by this component + virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const = 0; + + //! Get the memory used by this component + virtual std::size_t memoryUsage() const = 0; + + //! Get the static size of this object - used for virtual hierarchies + virtual std::size_t staticSize() const = 0; + + //! Get the tag name for this prior. + virtual std::string persistenceTag() const = 0; + + //! Persist state by passing information to the supplied inserter + virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const = 0; + //@} + + //! Get the margin between the smallest value and the support left + //! end. Priors with non-negative support, automatically adjust the + //! offset if a value is seen which is smaller than offset + margin. + //! This is to avoid the numerical instability caused by adding + //! values close to zero. + //! + //! \note This is overridden by CPriorTestInterface so don't replace + //! it by a static constant in the calling functions. + virtual double offsetMargin() const; + + //! Get the number of samples received. + double numberSamples() const; + + //! Set the number of samples received to \p numberSamples. + void numberSamples(double numberSamples); + + //! Check if we should use this prior at present. + virtual bool participatesInModelSelection() const; + + //! Get the number of unmarginalized parameters in the marginal likelihood. + //! + //! \note That any parameters over which we explicitly integrate to + //! compute a marginal likelihood don't need to be counted since we + //! are interested in the estimating the usual BIC approximation for + //! \f$int_{\theta}f(x|\theta, M)f(\theta|M)\d\theta\f$ + virtual double unmarginalizedParameters() const; + +protected: + //! Get the scaled decay rate for use by propagateForwardsByTime. + double scaledDecayRate() const; + + //! Update the number of samples received to date by adding \p n. + void addSamples(double n); + + //! Check that the samples and weights are consistent. + bool check(const TDouble10Vec1Vec& samples, const TDouble10Vec4Vec1Vec& weights) const; + + //! Check that the variables to marginalize out and condition on + //! are consistent. + bool check(const TSize10Vec& marginalize, const TSizeDoublePr10Vec& condition) const; + + //! Get the remaining variables. + void remainingVariables(const TSize10Vec& marginalize, const TSizeDoublePr10Vec& condition, TSize10Vec& results) const; + + //! Get the smallest component of \p x. + double smallest(const TDouble10Vec& x) const; + +private: + //! Set to true if this model is being used for forecasting. Note + //! we don't have any need to persist forecast models so this is + //! is not persisted. + bool m_Forecasting; + + //! If this is true then the prior is being used to model discrete + //! data. Note that this is not persisted and deduced from context. + maths_t::EDataType m_DataType; + + //! The rate at which the prior returns to non-informative. Note that + //! this is not persisted. + double m_DecayRate; + + //! The number of samples with which the prior has been updated. + double m_NumberSamples; }; - } } diff --git a/include/maths/CNaiveBayes.h b/include/maths/CNaiveBayes.h index 7c20eb6194..b508f854c4 100644 --- a/include/maths/CNaiveBayes.h +++ b/include/maths/CNaiveBayes.h @@ -18,268 +18,254 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace maths -{ +namespace maths { struct SDistributionRestoreParams; //! \brief The interface expected by CNaiveBayes for implementations //! of the class conditional density functions. -class MATHS_EXPORT CNaiveBayesFeatureDensity -{ - public: - using TDouble1Vec = core::CSmallVector; +class MATHS_EXPORT CNaiveBayesFeatureDensity { +public: + using TDouble1Vec = core::CSmallVector; - public: - virtual ~CNaiveBayesFeatureDensity() = default; +public: + virtual ~CNaiveBayesFeatureDensity() = default; - //! Create and return a clone. - //! - //! \note The caller owns this. - virtual CNaiveBayesFeatureDensity *clone() const = 0; + //! Create and return a clone. + //! + //! \note The caller owns this. + virtual CNaiveBayesFeatureDensity* clone() const = 0; - //! Initialize by reading state from \p traverser. - virtual bool acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser) = 0; + //! Initialize by reading state from \p traverser. + virtual bool acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) = 0; - //! Persist state by passing information to \p inserter. - virtual void acceptPersistInserter(core::CStatePersistInserter &inserter) const = 0; + //! Persist state by passing information to \p inserter. + virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const = 0; - //! Set the data type. - virtual void dataType(maths_t::EDataType dataType) = 0; + //! Set the data type. + virtual void dataType(maths_t::EDataType dataType) = 0; - //! Add the value \p x. - virtual void add(const TDouble1Vec &x) = 0; + //! Add the value \p x. + virtual void add(const TDouble1Vec& x) = 0; - //! Compute the log value of the density function at \p x. - virtual double logValue(const TDouble1Vec &x) const = 0; + //! Compute the log value of the density function at \p x. + virtual double logValue(const TDouble1Vec& x) const = 0; - //! Compute the density at the mode. - virtual double logMaximumValue() const = 0; + //! Compute the density at the mode. + virtual double logMaximumValue() const = 0; - //! Age out old values density to account for \p time passing. - virtual void propagateForwardsByTime(double time) = 0; + //! Age out old values density to account for \p time passing. + virtual void propagateForwardsByTime(double time) = 0; - //! Debug the memory used by this object. - virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const = 0; + //! Debug the memory used by this object. + virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const = 0; - //! Get the static size of this object. - virtual std::size_t staticSize() const = 0; + //! Get the static size of this object. + virtual std::size_t staticSize() const = 0; - //! Get the memory used by this object. - virtual std::size_t memoryUsage() const = 0; + //! Get the memory used by this object. + virtual std::size_t memoryUsage() const = 0; - //! Get a checksum for this object. - virtual uint64_t checksum(uint64_t seed) const = 0; + //! Get a checksum for this object. + virtual uint64_t checksum(uint64_t seed) const = 0; - //! Get a human readable description of the class density function. - virtual std::string print() const = 0; + //! Get a human readable description of the class density function. + virtual std::string print() const = 0; }; //! \brief An implementation of the class conditional density function //! based on the CPrior hierarchy. -class MATHS_EXPORT CNaiveBayesFeatureDensityFromPrior final : public CNaiveBayesFeatureDensity -{ - public: - CNaiveBayesFeatureDensityFromPrior() = default; - CNaiveBayesFeatureDensityFromPrior(const CPrior &prior); +class MATHS_EXPORT CNaiveBayesFeatureDensityFromPrior final : public CNaiveBayesFeatureDensity { +public: + CNaiveBayesFeatureDensityFromPrior() = default; + CNaiveBayesFeatureDensityFromPrior(const CPrior& prior); - //! Create and return a clone. - //! - //! \note The caller owns this. - virtual CNaiveBayesFeatureDensityFromPrior *clone() const; + //! Create and return a clone. + //! + //! \note The caller owns this. + virtual CNaiveBayesFeatureDensityFromPrior* clone() const; - //! Initialize by reading state from \p traverser. - virtual bool acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser); + //! Initialize by reading state from \p traverser. + virtual bool acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser); - //! Persist state by passing information to \p inserter. - virtual void acceptPersistInserter(core::CStatePersistInserter &inserter) const; + //! Persist state by passing information to \p inserter. + virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const; - //! Add the value \p x. - virtual void add(const TDouble1Vec &x); + //! Add the value \p x. + virtual void add(const TDouble1Vec& x); - //! Compute the log value of the density function at \p x. - virtual double logValue(const TDouble1Vec &x) const; + //! Compute the log value of the density function at \p x. + virtual double logValue(const TDouble1Vec& x) const; - //! Compute the density at the mode. - virtual double logMaximumValue() const; + //! Compute the density at the mode. + virtual double logMaximumValue() const; - //! Set the data type. - virtual void dataType(maths_t::EDataType dataType); + //! Set the data type. + virtual void dataType(maths_t::EDataType dataType); - //! Age out old values density to account for \p time passing. - virtual void propagateForwardsByTime(double time); + //! Age out old values density to account for \p time passing. + virtual void propagateForwardsByTime(double time); - //! Debug the memory used by this object. - virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + //! Debug the memory used by this object. + virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - //! Get the static size of this object. - virtual std::size_t staticSize() const; + //! Get the static size of this object. + virtual std::size_t staticSize() const; - //! Get the memory used by this object. - virtual std::size_t memoryUsage() const; + //! Get the memory used by this object. + virtual std::size_t memoryUsage() const; - //! Get a checksum for this object. - virtual uint64_t checksum(uint64_t seed) const; + //! Get a checksum for this object. + virtual uint64_t checksum(uint64_t seed) const; - //! Get a human readable description of the class density function. - virtual std::string print() const; + //! Get a human readable description of the class density function. + virtual std::string print() const; - private: - using TPriorPtr = boost::shared_ptr; +private: + using TPriorPtr = boost::shared_ptr; - private: - //! The density model. - TPriorPtr m_Prior; +private: + //! The density model. + TPriorPtr m_Prior; }; //! \brief Implements a Naive Bayes classifier. -class MATHS_EXPORT CNaiveBayes -{ - public: - using TDoubleSizePr = std::pair; - using TDoubleSizePrVec = std::vector; - using TDouble1Vec = core::CSmallVector; - using TDouble1VecVec = std::vector; - using TOptionalDouble = boost::optional; - - public: - explicit CNaiveBayes(const CNaiveBayesFeatureDensity &exemplar, - double decayRate = 0.0, - TOptionalDouble minMaxLogLikelihoodToUseFeature = TOptionalDouble()); - CNaiveBayes(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser); - +class MATHS_EXPORT CNaiveBayes { +public: + using TDoubleSizePr = std::pair; + using TDoubleSizePrVec = std::vector; + using TDouble1Vec = core::CSmallVector; + using TDouble1VecVec = std::vector; + using TOptionalDouble = boost::optional; + +public: + explicit CNaiveBayes(const CNaiveBayesFeatureDensity& exemplar, + double decayRate = 0.0, + TOptionalDouble minMaxLogLikelihoodToUseFeature = TOptionalDouble()); + CNaiveBayes(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser); + + //! Persist state by passing information to \p inserter. + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Efficiently swap the contents of this and \p other. + void swap(CNaiveBayes& other); + + //! Check if any training data has been added initialized. + bool initialized() const; + + //! This can be used to optionally seed the class counts + //! with \p counts. These are added on to data class counts + //! to compute the class posterior probabilities. + void initialClassCounts(const TDoubleSizePrVec& counts); + + //! Add a training data point comprising the pair \f$(x,l)\f$ + //! for feature vector \f$x\f$ and class label \f$l\f$. + //! + //! \param[in] label The class label for \p x. + //! \param[in] x The feature values. + //! \note \p x size should be equal to the number of features. + //! A feature is missing is indicated by passing an empty vector + //! for that feature. + void addTrainingDataPoint(std::size_t label, const TDouble1VecVec& x); + + //! Set the data type. + void dataType(maths_t::EDataType dataType); + + //! Age out old values from the class conditional densities + //! to account for \p time passing. + void propagateForwardsByTime(double time); + + //! Get the top \p n class probabilities for \p x. + //! + //! \param[in] n The number of class probabilities to estimate. + //! \param[in] x The feature values. + //! \note \p x size should be equal to the number of features. + //! A feature is missing is indicated by passing an empty vector + //! for that feature. + TDoubleSizePrVec highestClassProbabilities(std::size_t n, const TDouble1VecVec& x) const; + + //! Get the probability of the class labeled \p label for \p x. + //! + //! \param[in] label The label of the class of interest. + //! \param[in] x The feature values. + //! \note \p x size should be equal to the number of features. + //! A feature is missing is indicated by passing an empty vector + //! for that feature. + double classProbability(std::size_t label, const TDouble1VecVec& x) const; + + //! Get the probabilities of all the classes for \p x. + //! + //! \param[in] x The feature values. + //! \note \p x size should be equal to the number of features. + //! A feature is missing is indicated by passing an empty vector + //! for that feature. + TDoubleSizePrVec classProbabilities(const TDouble1VecVec& x) const; + + //! Debug the memory used by this object. + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + + //! Get the memory used by this object. + std::size_t memoryUsage() const; + + //! Get a checksum for this object. + uint64_t checksum(uint64_t seed = 0) const; + + //! Get a human readable description of the classifier. + std::string print() const; + +private: + using TFeatureDensityPtr = boost::shared_ptr; + using TFeatureDensityPtrVec = std::vector; + + //! \brief The data associated with a class. + struct SClass { + //! Initialize by reading state from \p traverser. + bool acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser); //! Persist state by passing information to \p inserter. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - - //! Efficiently swap the contents of this and \p other. - void swap(CNaiveBayes &other); - - //! Check if any training data has been added initialized. - bool initialized() const; - - //! This can be used to optionally seed the class counts - //! with \p counts. These are added on to data class counts - //! to compute the class posterior probabilities. - void initialClassCounts(const TDoubleSizePrVec &counts); - - //! Add a training data point comprising the pair \f$(x,l)\f$ - //! for feature vector \f$x\f$ and class label \f$l\f$. - //! - //! \param[in] label The class label for \p x. - //! \param[in] x The feature values. - //! \note \p x size should be equal to the number of features. - //! A feature is missing is indicated by passing an empty vector - //! for that feature. - void addTrainingDataPoint(std::size_t label, const TDouble1VecVec &x); - - //! Set the data type. - void dataType(maths_t::EDataType dataType); - - //! Age out old values from the class conditional densities - //! to account for \p time passing. - void propagateForwardsByTime(double time); - - //! Get the top \p n class probabilities for \p x. - //! - //! \param[in] n The number of class probabilities to estimate. - //! \param[in] x The feature values. - //! \note \p x size should be equal to the number of features. - //! A feature is missing is indicated by passing an empty vector - //! for that feature. - TDoubleSizePrVec highestClassProbabilities(std::size_t n, - const TDouble1VecVec &x) const; - - //! Get the probability of the class labeled \p label for \p x. - //! - //! \param[in] label The label of the class of interest. - //! \param[in] x The feature values. - //! \note \p x size should be equal to the number of features. - //! A feature is missing is indicated by passing an empty vector - //! for that feature. - double classProbability(std::size_t label, const TDouble1VecVec &x) const; - - //! Get the probabilities of all the classes for \p x. - //! - //! \param[in] x The feature values. - //! \note \p x size should be equal to the number of features. - //! A feature is missing is indicated by passing an empty vector - //! for that feature. - TDoubleSizePrVec classProbabilities(const TDouble1VecVec &x) const; - + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; //! Debug the memory used by this object. void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - //! Get the memory used by this object. std::size_t memoryUsage() const; - //! Get a checksum for this object. uint64_t checksum(uint64_t seed = 0) const; - //! Get a human readable description of the classifier. - std::string print() const; - - private: - using TFeatureDensityPtr = boost::shared_ptr; - using TFeatureDensityPtrVec = std::vector; - - //! \brief The data associated with a class. - struct SClass - { - //! Initialize by reading state from \p traverser. - bool acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser); - //! Persist state by passing information to \p inserter. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - //! Debug the memory used by this object. - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - //! Get the memory used by this object. - std::size_t memoryUsage() const; - //! Get a checksum for this object. - uint64_t checksum(uint64_t seed = 0) const; - - //! The number of examples in this class. - double s_Count = 0.0; - //! The feature conditional densities for this class. - TFeatureDensityPtrVec s_ConditionalDensities; - }; - - using TSizeClassUMap = boost::unordered_map; - - private: - //! Initialize by reading state from \p traverser. - bool acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser); + //! The number of examples in this class. + double s_Count = 0.0; + //! The feature conditional densities for this class. + TFeatureDensityPtrVec s_ConditionalDensities; + }; - //! Validate \p x. - bool validate(const TDouble1VecVec &x) const; + using TSizeClassUMap = boost::unordered_map; - private: - //! It is not always appropriate to use features with very low - //! probability in all classes to discriminate: the class choice - //! will be very sensitive to the underlying conditional density - //! model. This is a cutoff (for the minimum maximum class log - //! likelihood) in order to use a feature. - TOptionalDouble m_MinMaxLogLikelihoodToUseFeature; +private: + //! Initialize by reading state from \p traverser. + bool acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser); - //! Controls the rate at which data are aged out. - double m_DecayRate; + //! Validate \p x. + bool validate(const TDouble1VecVec& x) const; - //! An exemplar for creating conditional densities. - TFeatureDensityPtr m_Exemplar; +private: + //! It is not always appropriate to use features with very low + //! probability in all classes to discriminate: the class choice + //! will be very sensitive to the underlying conditional density + //! model. This is a cutoff (for the minimum maximum class log + //! likelihood) in order to use a feature. + TOptionalDouble m_MinMaxLogLikelihoodToUseFeature; - //! The class conditional density estimates and weights. - TSizeClassUMap m_ClassConditionalDensities; -}; + //! Controls the rate at which data are aged out. + double m_DecayRate; + //! An exemplar for creating conditional densities. + TFeatureDensityPtr m_Exemplar; + + //! The class conditional density estimates and weights. + TSizeClassUMap m_ClassConditionalDensities; +}; } } diff --git a/include/maths/CNaturalBreaksClassifier.h b/include/maths/CNaturalBreaksClassifier.h index 8327852b32..4296afe3c3 100644 --- a/include/maths/CNaturalBreaksClassifier.h +++ b/include/maths/CNaturalBreaksClassifier.h @@ -18,15 +18,12 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace maths -{ +namespace maths { struct SDistributionRestoreParams; //! \brief This does online segmentation with fixed space by approximate @@ -97,258 +94,228 @@ struct SDistributionRestoreParams; //! interface to this class is double precision. If floats are used //! they should be used for storage only and transparent to the rest //! of the code base. -class MATHS_EXPORT CNaturalBreaksClassifier -{ - public: - using TSizeVec = std::vector; - using TDoubleVec = std::vector; - using TDoubleDoublePr = std::pair; - using TDoubleDoublePrVec = std::vector; - using TDoubleTuple = CBasicStatistics::SSampleMeanVar::TAccumulator; - using TDoubleTupleVec = std::vector; - using TTuple = CBasicStatistics::SSampleMeanVar::TAccumulator; - using TTupleVec = std::vector; - using TClassifierVec = std::vector; - - public: - //! The type of optimization object which it is possible - //! to target. In particular, - //! -# Deviation is the square root of the total sample - //! variation. - //! -# Variation is the total sample variation, i.e. the - //! sum of the square differences from the sample mean. - enum EObjective - { - E_TargetDeviation, - E_TargetVariation - }; - - public: - //! Create a new classifier with the specified space limit. - //! - //! \param[in] space The maximum space in numbers of tuples. - //! A tuple comprises three floats. - //! \param[in] decayRate The rate at which we data ages out - //! of the classifier. - //! \param[in] minimumCategoryCount The minimum permitted count - //! for a category. - //! \note This will store as much information about the points - //! subject to this constraint so will generally hold approximately - //! \p space tuples. - CNaturalBreaksClassifier(std::size_t space, - double decayRate = 0.0, - double minimumCategoryCount = MINIMUM_CATEGORY_COUNT); - - //! Create from part of a state document. - bool acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser); - - //! Persist state by passing information to the supplied inserter. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - - //! Get the count \p p percentile position. - double percentile(double p) const; - - //! Get the total number of categories in the classifier. - std::size_t size() const; - - //! Split this classifier into the n-categories identified by - //! the categories function. - //! - //! \param[in] n The desired size of the split. - //! \param[in] p The minimum category size. - //! \param[out] result Filled in with the classifiers representing - //! the split. - //! \sa categories for details on the split. - bool split(std::size_t n, std::size_t p, TClassifierVec &result); - - //! Split this classifier into the n-categories corresponding to - //! \p split. - //! - //! \param[in] split The desired partition. - //! \param[out] result Filled in with the classifiers representing - //! \p split if it is a valid partition and cleared otherwise. - //! \note \p split should be ordered and the maximum value should - //! be equal to the number of points in the classifier. - bool split(const TSizeVec &split, TClassifierVec &result); - - //! Get the minimum within class total deviation partition - //! of size at most \p n. - //! - //! \param[in] n The number of partitions. - //! \param[in] p The minimum category size. - //! \param[out] result Filled in with the indices at which to break. - bool naturalBreaks(std::size_t n, std::size_t p, TSizeVec &result); - - //! Get as many tuples as possible, but not more than \p n, - //! describing our best estimate of the categories in the data. - //! - //! \param[in] n The desired size for the partition. - //! \param[in] p The minimum category size. - //! \param[out] result Filled in with the minimum within class - //! total deviation partition of size at most n. - //! \param[in] append If true the categories are appended to - //! \p result. - //! \note This finds the optimum partition using a dynamic - //! programming approach in complexity \f$O(N^2n)\f$ where - //! \f$N\f$ the number of tuples and \f$n\f$ is the desired - //! size for the partition. - bool categories(std::size_t n, - std::size_t p, - TTupleVec &result, - bool append = false); - - //! Get the categories corresponding to \p split. - //! - //! \param[in] split The desired partition. - //! \param[out] result Filled in with the categories corresponding - //! to \p split if it is a valid partition and cleared otherwise. - //! \note \p split should be ordered and the maximum value should - //! be equal to the number of points in the classifier. - bool categories(const TSizeVec &split, TTupleVec &result); - - //! Add \p x to the classifier. - //! - //! \param[in] x A point to add to the classifier. - //! \param[in] count The count weight of this point. - void add(double x, double count = 1.0); - - //! Merge \p other with this classifier. - //! - //! \param[in] other Another classifier to merge with this one. - void merge(const CNaturalBreaksClassifier &other); - - //! Set the rate at which information is aged out. - void decayRate(double decayRate); - - //! Propagate the clusters forwards by \p time. - void propagateForwardsByTime(double time); - - //! Check if we are currently buffering points. - bool buffering() const; - - //! Get \p n samples of the distribution corresponding to the - //! categories we are maintaining. - //! - //! \param[in] numberSamples The desired number of samples. - //! \param[in] smallest The smallest permitted sample. - //! \param[in] largest The largest permitted sample. - //! \param[out] result Filled in with the samples of the distribution. - void sample(std::size_t numberSamples, - double smallest, - double largest, - TDoubleVec &result) const; - - //! Print this classifier for debug. - std::string print() const; - - //! Get a checksum for this object. - uint64_t checksum(uint64_t seed = 0) const; - - //! Get the memory used by this component - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - - //! Get the memory used by this component - std::size_t memoryUsage() const; - - //! Get the minimum within class total deviation partition - //! of the categories \p categories with size at most \p n - //! subject to the constraint that no category contains fewer - //! than \p p values. - //! - //! \param[in] categories The categories to partition. - //! \param[in] n The number of partitions. - //! \param[in] p The minimum category size. - //! \param[in] target The optimization objective to target. - //! \param[out] result Filled in with the indices at which to - //! break. - //! \note This finds the optimum partition using a dynamic - //! programming approach in complexity \f$O(N^2n)\f$ where - //! \f$N\f$ the number of tuples and \f$n\f$ is the desired - //! size for the partition. - static bool naturalBreaks(const TTupleVec &categories, - std::size_t n, - std::size_t p, - EObjective target, - TSizeVec &result); - - //! Double tuple version. - //! - //! \see naturalBreaks for more details. - static bool naturalBreaks(const TDoubleTupleVec &categories, - std::size_t n, - std::size_t p, - EObjective target, - TSizeVec &result); - - private: - using TSizeSizePr = std::pair; - - private: - //! Implementation called by naturalBreaks with explicit - //! tuple types. - template - static bool naturalBreaksImpl(const std::vector &categories, - std::size_t n, - std::size_t p, - EObjective target, - TSizeVec &result); - - private: - //! The minimum permitted size for the classifier. - static const std::size_t MINIMUM_SPACE; - - //! The maximum allowed size of the points buffer. - static const std::size_t MAXIMUM_BUFFER_SIZE; - - private: - //! Construct a new classifier with the specified space limit - //! \p space and categories \p categories. - CNaturalBreaksClassifier(std::size_t space, - double decayRate, - double minimumCategoryCount, - TTupleVec &categories); - - //! Reduce the number of tuples until we satisfy the space constraint. - void reduce(); - - //! Get the indices of the closest categories. - TSizeSizePr closestPair() const; - - //! Get the total deviation of the specified class. - static double deviation(const TTuple &category); - - //! Get the total variation of the specified class. - static double variation(const TTuple &category); - - //! Wrapper to evaluate the specified object function. - static inline double objective(EObjective objective, const TTuple &category) - { - switch (objective) - { - case E_TargetDeviation: return deviation(category); - case E_TargetVariation: return variation(category); - } +class MATHS_EXPORT CNaturalBreaksClassifier { +public: + using TSizeVec = std::vector; + using TDoubleVec = std::vector; + using TDoubleDoublePr = std::pair; + using TDoubleDoublePrVec = std::vector; + using TDoubleTuple = CBasicStatistics::SSampleMeanVar::TAccumulator; + using TDoubleTupleVec = std::vector; + using TTuple = CBasicStatistics::SSampleMeanVar::TAccumulator; + using TTupleVec = std::vector; + using TClassifierVec = std::vector; + +public: + //! The type of optimization object which it is possible + //! to target. In particular, + //! -# Deviation is the square root of the total sample + //! variation. + //! -# Variation is the total sample variation, i.e. the + //! sum of the square differences from the sample mean. + enum EObjective { E_TargetDeviation, E_TargetVariation }; + +public: + //! Create a new classifier with the specified space limit. + //! + //! \param[in] space The maximum space in numbers of tuples. + //! A tuple comprises three floats. + //! \param[in] decayRate The rate at which we data ages out + //! of the classifier. + //! \param[in] minimumCategoryCount The minimum permitted count + //! for a category. + //! \note This will store as much information about the points + //! subject to this constraint so will generally hold approximately + //! \p space tuples. + CNaturalBreaksClassifier(std::size_t space, double decayRate = 0.0, double minimumCategoryCount = MINIMUM_CATEGORY_COUNT); + + //! Create from part of a state document. + bool acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser); + + //! Persist state by passing information to the supplied inserter. + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Get the count \p p percentile position. + double percentile(double p) const; + + //! Get the total number of categories in the classifier. + std::size_t size() const; + + //! Split this classifier into the n-categories identified by + //! the categories function. + //! + //! \param[in] n The desired size of the split. + //! \param[in] p The minimum category size. + //! \param[out] result Filled in with the classifiers representing + //! the split. + //! \sa categories for details on the split. + bool split(std::size_t n, std::size_t p, TClassifierVec& result); + + //! Split this classifier into the n-categories corresponding to + //! \p split. + //! + //! \param[in] split The desired partition. + //! \param[out] result Filled in with the classifiers representing + //! \p split if it is a valid partition and cleared otherwise. + //! \note \p split should be ordered and the maximum value should + //! be equal to the number of points in the classifier. + bool split(const TSizeVec& split, TClassifierVec& result); + + //! Get the minimum within class total deviation partition + //! of size at most \p n. + //! + //! \param[in] n The number of partitions. + //! \param[in] p The minimum category size. + //! \param[out] result Filled in with the indices at which to break. + bool naturalBreaks(std::size_t n, std::size_t p, TSizeVec& result); + + //! Get as many tuples as possible, but not more than \p n, + //! describing our best estimate of the categories in the data. + //! + //! \param[in] n The desired size for the partition. + //! \param[in] p The minimum category size. + //! \param[out] result Filled in with the minimum within class + //! total deviation partition of size at most n. + //! \param[in] append If true the categories are appended to + //! \p result. + //! \note This finds the optimum partition using a dynamic + //! programming approach in complexity \f$O(N^2n)\f$ where + //! \f$N\f$ the number of tuples and \f$n\f$ is the desired + //! size for the partition. + bool categories(std::size_t n, std::size_t p, TTupleVec& result, bool append = false); + + //! Get the categories corresponding to \p split. + //! + //! \param[in] split The desired partition. + //! \param[out] result Filled in with the categories corresponding + //! to \p split if it is a valid partition and cleared otherwise. + //! \note \p split should be ordered and the maximum value should + //! be equal to the number of points in the classifier. + bool categories(const TSizeVec& split, TTupleVec& result); + + //! Add \p x to the classifier. + //! + //! \param[in] x A point to add to the classifier. + //! \param[in] count The count weight of this point. + void add(double x, double count = 1.0); + + //! Merge \p other with this classifier. + //! + //! \param[in] other Another classifier to merge with this one. + void merge(const CNaturalBreaksClassifier& other); + + //! Set the rate at which information is aged out. + void decayRate(double decayRate); + + //! Propagate the clusters forwards by \p time. + void propagateForwardsByTime(double time); + + //! Check if we are currently buffering points. + bool buffering() const; + + //! Get \p n samples of the distribution corresponding to the + //! categories we are maintaining. + //! + //! \param[in] numberSamples The desired number of samples. + //! \param[in] smallest The smallest permitted sample. + //! \param[in] largest The largest permitted sample. + //! \param[out] result Filled in with the samples of the distribution. + void sample(std::size_t numberSamples, double smallest, double largest, TDoubleVec& result) const; + + //! Print this classifier for debug. + std::string print() const; + + //! Get a checksum for this object. + uint64_t checksum(uint64_t seed = 0) const; + + //! Get the memory used by this component + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + + //! Get the memory used by this component + std::size_t memoryUsage() const; + + //! Get the minimum within class total deviation partition + //! of the categories \p categories with size at most \p n + //! subject to the constraint that no category contains fewer + //! than \p p values. + //! + //! \param[in] categories The categories to partition. + //! \param[in] n The number of partitions. + //! \param[in] p The minimum category size. + //! \param[in] target The optimization objective to target. + //! \param[out] result Filled in with the indices at which to + //! break. + //! \note This finds the optimum partition using a dynamic + //! programming approach in complexity \f$O(N^2n)\f$ where + //! \f$N\f$ the number of tuples and \f$n\f$ is the desired + //! size for the partition. + static bool naturalBreaks(const TTupleVec& categories, std::size_t n, std::size_t p, EObjective target, TSizeVec& result); + + //! Double tuple version. + //! + //! \see naturalBreaks for more details. + static bool naturalBreaks(const TDoubleTupleVec& categories, std::size_t n, std::size_t p, EObjective target, TSizeVec& result); + +private: + using TSizeSizePr = std::pair; + +private: + //! Implementation called by naturalBreaks with explicit + //! tuple types. + template + static bool naturalBreaksImpl(const std::vector& categories, std::size_t n, std::size_t p, EObjective target, TSizeVec& result); + +private: + //! The minimum permitted size for the classifier. + static const std::size_t MINIMUM_SPACE; + + //! The maximum allowed size of the points buffer. + static const std::size_t MAXIMUM_BUFFER_SIZE; + +private: + //! Construct a new classifier with the specified space limit + //! \p space and categories \p categories. + CNaturalBreaksClassifier(std::size_t space, double decayRate, double minimumCategoryCount, TTupleVec& categories); + + //! Reduce the number of tuples until we satisfy the space constraint. + void reduce(); + + //! Get the indices of the closest categories. + TSizeSizePr closestPair() const; + + //! Get the total deviation of the specified class. + static double deviation(const TTuple& category); + + //! Get the total variation of the specified class. + static double variation(const TTuple& category); + + //! Wrapper to evaluate the specified object function. + static inline double objective(EObjective objective, const TTuple& category) { + switch (objective) { + case E_TargetDeviation: return deviation(category); + case E_TargetVariation: + return variation(category); } + return deviation(category); + } - private: - //! The maximum space in doubles. - std::size_t m_Space; +private: + //! The maximum space in doubles. + std::size_t m_Space; - //! The rate at which the categories lose information. - double m_DecayRate; + //! The rate at which the categories lose information. + double m_DecayRate; - //! The minimum permitted count for a category. - double m_MinimumCategoryCount; + //! The minimum permitted count for a category. + double m_MinimumCategoryCount; - //! The categories we are maintaining. - TTupleVec m_Categories; + //! The categories we are maintaining. + TTupleVec m_Categories; - //! A buffer of the points added while the space constraint is satisfied. - TDoubleDoublePrVec m_PointsBuffer; + //! A buffer of the points added while the space constraint is satisfied. + TDoubleDoublePrVec m_PointsBuffer; }; - } } diff --git a/include/maths/CNormalMeanPrecConjugate.h b/include/maths/CNormalMeanPrecConjugate.h index 29a7854908..4091e83f27 100644 --- a/include/maths/CNormalMeanPrecConjugate.h +++ b/include/maths/CNormalMeanPrecConjugate.h @@ -17,15 +17,12 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace maths -{ +namespace maths { struct SDistributionRestoreParams; //! \brief A conjugate prior distribution for a normal variable. @@ -47,337 +44,320 @@ struct SDistributionRestoreParams; //! the data when using one-of-n composition (see COneOfNPrior) or model data with //! multiple modes when using multi-modal composition (see CMultimodalPrior). //! From a design point of view this is the composite pattern. -class MATHS_EXPORT CNormalMeanPrecConjugate : public CPrior -{ - public: - //! See core::CMemory. - static bool dynamicSizeAlwaysZero() { return true; } - - using TMeanVarAccumulator = CBasicStatistics::SSampleMeanVar::TAccumulator; - using TEqualWithTolerance = CEqualWithTolerance; - - //! Lift the overloads of addSamples into scope. - using CPrior::addSamples; - //! Lift the overloads of print into scope. - using CPrior::print; - - public: - //! \name Life-Cycle - //@{ - //! \param[in] dataType The type of data being modeled (see maths_t::EDataType - //! for details). - //! \param[in] gaussianMean The mean of the normal component of the prior. - //! \param[in] gaussianPrecision The precision of the normal component of - //! the prior. - //! \param[in] gammaShape The shape parameter of the gamma component of the - //! prior. - //! \param[in] gammaRate The rate parameter of the gamma component of the - //! prior. - //! \param[in] decayRate The rate at which to revert to non-informative. - CNormalMeanPrecConjugate(maths_t::EDataType dataType, - double gaussianMean, - double gaussianPrecision, - double gammaShape, - double gammaRate, - double decayRate = 0.0); - - //! Construct from sample central moments. - CNormalMeanPrecConjugate(maths_t::EDataType dataType, - const TMeanVarAccumulator &moments, - double decayRate = 0.0); - - //! Construct from part of a state document. - CNormalMeanPrecConjugate(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser); - - // Default copy constructor and assignment operator work. - - //! Create an instance of a non-informative prior. - //! - //! \param[in] dataType The type of data being modeled (see maths_t::EDataType - //! for details). - //! \param[in] decayRate The rate at which to revert to the non-informative prior. - //! \return A non-informative prior. - static CNormalMeanPrecConjugate nonInformativePrior(maths_t::EDataType dataType, - double decayRate = 0.0); - //@} - - //! Reset the prior based on the sample central moments. - void reset(maths_t::EDataType dataType, - const TMeanVarAccumulator &moments, - double decayRate = 0.0); - - //! \name Prior Contract - //@{ - //! Get the type of this prior. - virtual EPrior type() const; - - //! Create a copy of the prior. - //! - //! \return A pointer to a newly allocated clone of this prior. - //! \warning The caller owns the object returned. - virtual CNormalMeanPrecConjugate *clone() const; - - //! Reset the prior to non-informative. - virtual void setToNonInformative(double offset = 0.0, - double decayRate = 0.0); - - //! Returns false. - virtual bool needsOffset() const; - - //! No-op. - virtual double adjustOffset(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights); - - //! Returns zero. - virtual double offset() const; - - //! Update the prior with a collection of independent samples from - //! the normal variable. - //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. - //! \param[in] samples A collection of samples of the variable. - //! \param[in] weights The weights of each sample in \p samples. - virtual void addSamples(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights); - - //! Propagate the prior density function forwards by \p time. - //! - //! The prior distribution relaxes back to non-informative at a rate - //! controlled by the decay rate parameter (optionally supplied to the - //! constructor). - //! - //! \param[in] time The time increment to apply. - //! \note \p time must be non negative. - virtual void propagateForwardsByTime(double time); - - //! Get the support for the marginal likelihood function. - virtual TDoubleDoublePr marginalLikelihoodSupport() const; - - //! Get the mean of the marginal likelihood function. - virtual double marginalLikelihoodMean() const; - - //! Get the mode of the marginal likelihood function. - virtual double marginalLikelihoodMode(const TWeightStyleVec &weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec &weights = TWeights::UNIT) const; - - //! Get the variance of the marginal likelihood. - virtual double marginalLikelihoodVariance(const TWeightStyleVec &weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec &weights = TWeights::UNIT) const; - - //! Get the \p percentage symmetric confidence interval for the marginal - //! likelihood function, i.e. the values \f$a\f$ and \f$b\f$ such that: - //!
-        //!   \f$P([a,m]) = P([m,b]) = p / 100 / 2\f$
-        //! 
- //! - //! where \f$m\f$ is the median of the distribution and \f$p\f$ is the - //! the percentage of interest \p percentage. - //! - //! \param[in] percentage The percentage of interest. - //! \param[in] weightStyles Optional variance scale weight styles. - //! \param[in] weights Optional variance scale weights. - //! \note \p percentage should be in the range [0.0, 100.0). - virtual TDoubleDoublePr - marginalLikelihoodConfidenceInterval(double percentage, - const TWeightStyleVec &weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec &weights = TWeights::UNIT) const; - - //! Compute the log marginal likelihood function at \p samples integrating - //! over the prior density function for the normal mean and precision. - //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. - //! \param[in] samples A collection of samples of the variable. - //! \param[in] weights The weights of each sample in \p samples. - //! \param[out] result Filled in with the joint likelihood of \p samples. - //! \note The samples are assumed to be independent and identically - //! distributed. - virtual maths_t::EFloatingPointErrorStatus - jointLogMarginalLikelihood(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &result) const; - - //! Sample the marginal likelihood function. - //! - //! \see CPrior::sampleMarginalLikelihood() for a detailed description. - //! - //! \param[in] numberSamples The number of samples required. - //! \param[out] samples Filled in with samples from the prior. - //! \note \p numberSamples is truncated to the number of samples received. - virtual void sampleMarginalLikelihood(std::size_t numberSamples, - TDouble1Vec &samples) const; - - //! Compute minus the log of the joint c.d.f. of the marginal likelihood - //! at \p samples. - //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. - //! \param[in] samples The samples of interest. - //! \param[in] weights The weights of each sample in \p samples. For - //! the count variance scale weight style the weight is interpreted as - //! a scale of the likelihood variance. So we interpret the likelihood - //! function as:\n - //!
-        //!   \f$\displaystyle f(x_i) = \sqrt{\frac{p}{2\pi\gamma_i}} e^{-\frac{p}{2\gamma_i}(x_i - m)^2}\f$
-        //! 
- //! Here, \f$m\f$ is the mean and \f$p\f$ are the mean and precision for - //! which this is the prior. - //! \param[out] lowerBound Filled in with \f$-\log(\prod_i{F(x_i)})\f$ - //! where \f$F(.)\f$ is the c.d.f. and \f$\{x_i\}\f$ are the samples. - //! \param[out] upperBound Equal to \p lowerBound. - //! \note The samples are assumed to be independent and identically - //! distributed. - //! \note The samples are assumed to be independent. - //! \warning The variance scales \f$\gamma_i\f$ must be in the range - //! \f$(0,\infty)\f$, i.e. a value of zero is not well defined and - //! a value of infinity is not well handled. (Very large values are - //! handled though.) - virtual bool minusLogJointCdf(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound) const; - - //! Compute minus the log of the one minus the joint c.d.f. of the - //! marginal likelihood at \p samples without losing precision due to - //! cancellation errors at one, i.e. the smallest non-zero value this - //! can return is the minimum double rather than epsilon. - //! - //! \see minusLogJointCdf for more details. - virtual bool minusLogJointCdfComplement(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound) const; - - //! Compute the probability of a less likely, i.e. lower likelihood, - //! collection of independent samples from the variable. - //! - //! \param[in] calculation The style of the probability calculation - //! (see model_t::EProbabilityCalculation for details). - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. - //! \param[in] samples The samples of interest. - //! \param[in] weights The weights. See minusLogJointCdf for discussion. - //! \param[out] lowerBound Filled in with the probability of the set - //! for which the joint marginal likelihood is less than that of - //! \p samples (subject to the measure \p calculation). - //! \param[out] upperBound Equal to \p lowerBound. - //! \param[out] tail The tail that (left or right) that all the - //! samples are in or neither. - //! \note The samples are assumed to be independent. - //! \warning The variance scales must be in the range \f$(0,\infty)\f$, - //! i.e. a value of zero is not well defined and a value of infinity - //! is not well handled. (Very large values are handled though.) - virtual bool probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound, - maths_t::ETail &tail) const; - - //! Check if this is a non-informative prior. - virtual bool isNonInformative() const; - - //! Get a human readable description of the prior. - //! - //! \param[in] indent The indent to use at the start of new lines. - //! \param[in,out] result Filled in with the description. - virtual void print(const std::string &indent, std::string &result) const; - - //! Print the prior density function in a specified format. - //! - //! \see CPrior::printJointDensityFunction for details. - virtual std::string printJointDensityFunction() const; - - //! Get a checksum for this object. - virtual uint64_t checksum(uint64_t seed = 0) const; - - //! Get the memory used by this component - virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - - //! Get the memory used by this component - virtual std::size_t memoryUsage() const; - - //! Get the static size of this object - used for virtual hierarchies - virtual std::size_t staticSize() const; - - //! Persist state by passing information to the supplied inserter - virtual void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - //@} - - //! The current expected mean for the variable. - double mean() const; - - //! The current expected precision for the variable. - double precision() const; - - //! \name Test Functions - //@{ - //! Compute the specified percentage confidence interval for the variable - //! mean. - TDoubleDoublePr confidenceIntervalMean(double percentage) const; - - //! Compute the specified percentage confidence interval for the variable - //! precision. - TDoubleDoublePr confidenceIntervalPrecision(double percentage) const; - - //! Check if two priors are equal to the specified tolerance. - bool equalTolerance(const CNormalMeanPrecConjugate &rhs, - const TEqualWithTolerance &equal) const; - //@} - - private: - //! Read parameters from \p traverser. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); - - //! Check that the state is valid. - bool isBad() const; - - //! Full debug dump of the state of this prior. - std::string debug() const; - - private: - //! The mean parameter of a non-informative prior. - static const double NON_INFORMATIVE_MEAN; - - //! The precision parameter of a non-informative prior. - static const double NON_INFORMATIVE_PRECISION; - - //! The shape parameter of a non-informative prior. - static const double NON_INFORMATIVE_SHAPE; - - //! The rate parameter of a non-informative prior. - static const double NON_INFORMATIVE_RATE; - - private: - //! The mean of the prior conditional distribution for the mean of the - //! normal variable (conditioned on its precision). - double m_GaussianMean; - - //! The precision of the prior conditional distribution for the mean - //! of the normal variable (conditioned on its precision). - double m_GaussianPrecision; - - //! The shape of the marginal gamma distribution for the precision of the - //! normal variable. - double m_GammaShape; - - //! The rate of the marginal gamma distribution for the precision of the - //! normal variable. - double m_GammaRate; +class MATHS_EXPORT CNormalMeanPrecConjugate : public CPrior { +public: + //! See core::CMemory. + static bool dynamicSizeAlwaysZero() { return true; } + + using TMeanVarAccumulator = CBasicStatistics::SSampleMeanVar::TAccumulator; + using TEqualWithTolerance = CEqualWithTolerance; + + //! Lift the overloads of addSamples into scope. + using CPrior::addSamples; + //! Lift the overloads of print into scope. + using CPrior::print; + +public: + //! \name Life-Cycle + //@{ + //! \param[in] dataType The type of data being modeled (see maths_t::EDataType + //! for details). + //! \param[in] gaussianMean The mean of the normal component of the prior. + //! \param[in] gaussianPrecision The precision of the normal component of + //! the prior. + //! \param[in] gammaShape The shape parameter of the gamma component of the + //! prior. + //! \param[in] gammaRate The rate parameter of the gamma component of the + //! prior. + //! \param[in] decayRate The rate at which to revert to non-informative. + CNormalMeanPrecConjugate(maths_t::EDataType dataType, + double gaussianMean, + double gaussianPrecision, + double gammaShape, + double gammaRate, + double decayRate = 0.0); + + //! Construct from sample central moments. + CNormalMeanPrecConjugate(maths_t::EDataType dataType, const TMeanVarAccumulator& moments, double decayRate = 0.0); + + //! Construct from part of a state document. + CNormalMeanPrecConjugate(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser); + + // Default copy constructor and assignment operator work. + + //! Create an instance of a non-informative prior. + //! + //! \param[in] dataType The type of data being modeled (see maths_t::EDataType + //! for details). + //! \param[in] decayRate The rate at which to revert to the non-informative prior. + //! \return A non-informative prior. + static CNormalMeanPrecConjugate nonInformativePrior(maths_t::EDataType dataType, double decayRate = 0.0); + //@} + + //! Reset the prior based on the sample central moments. + void reset(maths_t::EDataType dataType, const TMeanVarAccumulator& moments, double decayRate = 0.0); + + //! \name Prior Contract + //@{ + //! Get the type of this prior. + virtual EPrior type() const; + + //! Create a copy of the prior. + //! + //! \return A pointer to a newly allocated clone of this prior. + //! \warning The caller owns the object returned. + virtual CNormalMeanPrecConjugate* clone() const; + + //! Reset the prior to non-informative. + virtual void setToNonInformative(double offset = 0.0, double decayRate = 0.0); + + //! Returns false. + virtual bool needsOffset() const; + + //! No-op. + virtual double adjustOffset(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights); + + //! Returns zero. + virtual double offset() const; + + //! Update the prior with a collection of independent samples from + //! the normal variable. + //! + //! \param[in] weightStyles Controls the interpretation of the weight(s) + //! that are associated with each sample. See maths_t::ESampleWeightStyle + //! for more details. + //! \param[in] samples A collection of samples of the variable. + //! \param[in] weights The weights of each sample in \p samples. + virtual void addSamples(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights); + + //! Propagate the prior density function forwards by \p time. + //! + //! The prior distribution relaxes back to non-informative at a rate + //! controlled by the decay rate parameter (optionally supplied to the + //! constructor). + //! + //! \param[in] time The time increment to apply. + //! \note \p time must be non negative. + virtual void propagateForwardsByTime(double time); + + //! Get the support for the marginal likelihood function. + virtual TDoubleDoublePr marginalLikelihoodSupport() const; + + //! Get the mean of the marginal likelihood function. + virtual double marginalLikelihoodMean() const; + + //! Get the mode of the marginal likelihood function. + virtual double marginalLikelihoodMode(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; + + //! Get the variance of the marginal likelihood. + virtual double marginalLikelihoodVariance(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; + + //! Get the \p percentage symmetric confidence interval for the marginal + //! likelihood function, i.e. the values \f$a\f$ and \f$b\f$ such that: + //!
+    //!   \f$P([a,m]) = P([m,b]) = p / 100 / 2\f$
+    //! 
+ //! + //! where \f$m\f$ is the median of the distribution and \f$p\f$ is the + //! the percentage of interest \p percentage. + //! + //! \param[in] percentage The percentage of interest. + //! \param[in] weightStyles Optional variance scale weight styles. + //! \param[in] weights Optional variance scale weights. + //! \note \p percentage should be in the range [0.0, 100.0). + virtual TDoubleDoublePr marginalLikelihoodConfidenceInterval(double percentage, + const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; + + //! Compute the log marginal likelihood function at \p samples integrating + //! over the prior density function for the normal mean and precision. + //! + //! \param[in] weightStyles Controls the interpretation of the weight(s) + //! that are associated with each sample. See maths_t::ESampleWeightStyle + //! for more details. + //! \param[in] samples A collection of samples of the variable. + //! \param[in] weights The weights of each sample in \p samples. + //! \param[out] result Filled in with the joint likelihood of \p samples. + //! \note The samples are assumed to be independent and identically + //! distributed. + virtual maths_t::EFloatingPointErrorStatus jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& result) const; + + //! Sample the marginal likelihood function. + //! + //! \see CPrior::sampleMarginalLikelihood() for a detailed description. + //! + //! \param[in] numberSamples The number of samples required. + //! \param[out] samples Filled in with samples from the prior. + //! \note \p numberSamples is truncated to the number of samples received. + virtual void sampleMarginalLikelihood(std::size_t numberSamples, TDouble1Vec& samples) const; + + //! Compute minus the log of the joint c.d.f. of the marginal likelihood + //! at \p samples. + //! + //! \param[in] weightStyles Controls the interpretation of the weight(s) + //! that are associated with each sample. See maths_t::ESampleWeightStyle + //! for more details. + //! \param[in] samples The samples of interest. + //! \param[in] weights The weights of each sample in \p samples. For + //! the count variance scale weight style the weight is interpreted as + //! a scale of the likelihood variance. So we interpret the likelihood + //! function as:\n + //!
+    //!   \f$\displaystyle f(x_i) = \sqrt{\frac{p}{2\pi\gamma_i}} e^{-\frac{p}{2\gamma_i}(x_i - m)^2}\f$
+    //! 
+ //! Here, \f$m\f$ is the mean and \f$p\f$ are the mean and precision for + //! which this is the prior. + //! \param[out] lowerBound Filled in with \f$-\log(\prod_i{F(x_i)})\f$ + //! where \f$F(.)\f$ is the c.d.f. and \f$\{x_i\}\f$ are the samples. + //! \param[out] upperBound Equal to \p lowerBound. + //! \note The samples are assumed to be independent and identically + //! distributed. + //! \note The samples are assumed to be independent. + //! \warning The variance scales \f$\gamma_i\f$ must be in the range + //! \f$(0,\infty)\f$, i.e. a value of zero is not well defined and + //! a value of infinity is not well handled. (Very large values are + //! handled though.) + virtual bool minusLogJointCdf(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound) const; + + //! Compute minus the log of the one minus the joint c.d.f. of the + //! marginal likelihood at \p samples without losing precision due to + //! cancellation errors at one, i.e. the smallest non-zero value this + //! can return is the minimum double rather than epsilon. + //! + //! \see minusLogJointCdf for more details. + virtual bool minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound) const; + + //! Compute the probability of a less likely, i.e. lower likelihood, + //! collection of independent samples from the variable. + //! + //! \param[in] calculation The style of the probability calculation + //! (see model_t::EProbabilityCalculation for details). + //! \param[in] weightStyles Controls the interpretation of the weight(s) + //! that are associated with each sample. See maths_t::ESampleWeightStyle + //! for more details. + //! \param[in] samples The samples of interest. + //! \param[in] weights The weights. See minusLogJointCdf for discussion. + //! \param[out] lowerBound Filled in with the probability of the set + //! for which the joint marginal likelihood is less than that of + //! \p samples (subject to the measure \p calculation). + //! \param[out] upperBound Equal to \p lowerBound. + //! \param[out] tail The tail that (left or right) that all the + //! samples are in or neither. + //! \note The samples are assumed to be independent. + //! \warning The variance scales must be in the range \f$(0,\infty)\f$, + //! i.e. a value of zero is not well defined and a value of infinity + //! is not well handled. (Very large values are handled though.) + virtual bool probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, + const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound, + maths_t::ETail& tail) const; + + //! Check if this is a non-informative prior. + virtual bool isNonInformative() const; + + //! Get a human readable description of the prior. + //! + //! \param[in] indent The indent to use at the start of new lines. + //! \param[in,out] result Filled in with the description. + virtual void print(const std::string& indent, std::string& result) const; + + //! Print the prior density function in a specified format. + //! + //! \see CPrior::printJointDensityFunction for details. + virtual std::string printJointDensityFunction() const; + + //! Get a checksum for this object. + virtual uint64_t checksum(uint64_t seed = 0) const; + + //! Get the memory used by this component + virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + + //! Get the memory used by this component + virtual std::size_t memoryUsage() const; + + //! Get the static size of this object - used for virtual hierarchies + virtual std::size_t staticSize() const; + + //! Persist state by passing information to the supplied inserter + virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + //@} + + //! The current expected mean for the variable. + double mean() const; + + //! The current expected precision for the variable. + double precision() const; + + //! \name Test Functions + //@{ + //! Compute the specified percentage confidence interval for the variable + //! mean. + TDoubleDoublePr confidenceIntervalMean(double percentage) const; + + //! Compute the specified percentage confidence interval for the variable + //! precision. + TDoubleDoublePr confidenceIntervalPrecision(double percentage) const; + + //! Check if two priors are equal to the specified tolerance. + bool equalTolerance(const CNormalMeanPrecConjugate& rhs, const TEqualWithTolerance& equal) const; + //@} + +private: + //! Read parameters from \p traverser. + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); + + //! Check that the state is valid. + bool isBad() const; + + //! Full debug dump of the state of this prior. + std::string debug() const; + +private: + //! The mean parameter of a non-informative prior. + static const double NON_INFORMATIVE_MEAN; + + //! The precision parameter of a non-informative prior. + static const double NON_INFORMATIVE_PRECISION; + + //! The shape parameter of a non-informative prior. + static const double NON_INFORMATIVE_SHAPE; + + //! The rate parameter of a non-informative prior. + static const double NON_INFORMATIVE_RATE; + +private: + //! The mean of the prior conditional distribution for the mean of the + //! normal variable (conditioned on its precision). + double m_GaussianMean; + + //! The precision of the prior conditional distribution for the mean + //! of the normal variable (conditioned on its precision). + double m_GaussianPrecision; + + //! The shape of the marginal gamma distribution for the precision of the + //! normal variable. + double m_GammaShape; + + //! The rate of the marginal gamma distribution for the precision of the + //! normal variable. + double m_GammaRate; }; - } } diff --git a/include/maths/COneOfNPrior.h b/include/maths/COneOfNPrior.h index 04daecc7f9..f07773ea6c 100644 --- a/include/maths/COneOfNPrior.h +++ b/include/maths/COneOfNPrior.h @@ -21,10 +21,8 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { struct SDistributionRestoreParams; //! \brief Interface for a prior distribution which assumes data are from one @@ -47,341 +45,325 @@ struct SDistributionRestoreParams; //! in. All component models are owned by the object (it wouldn't make sense //! to share them) so this also defines the necessary functions to support //! value semantics and manage the heap. -class MATHS_EXPORT COneOfNPrior : public CPrior -{ - public: - using TPriorPtr = boost::shared_ptr; - using TPriorPtrVec = std::vector; - using TPriorCPtrVec = std::vector; - using TDoublePriorPtrPr = std::pair; - using TDoublePriorPtrPrVec = std::vector; - - //! Lift all overloads of the dataType into scope. - using CPrior::dataType; - //! Lift all overloads of the decayRate into scope. - using CPrior::decayRate; - //! Lift the overloads of addSamples into scope. - using CPrior::addSamples; - //! Lift the overloads of print into scope. - using CPrior::print; - - public: - //! \name Life-Cycle - //@{ - //! Create with a collection of models. - //! - //! \param[in] models The simple models which comprise the mixed model. - //! \param[in] dataType The type of data being modeled (see maths_t::EDataType - //! for details). - //! \param[in] decayRate The rate at which to revert to the non-informative prior. - //! \warning This class takes ownership of \p models. - COneOfNPrior(const TPriorPtrVec &models, - maths_t::EDataType dataType, - double decayRate = 0.0); - - //! Create with a weighted collection of models. - //! - //! \param[in] models The simple models and their weights which comprise - //! the mixed model. - //! \param[in] dataType The type of data being modeled (see maths_t::EDataType - //! for details). - //! \param[in] decayRate The rate at which we revert to the non-informative prior. - //! \warning This class takes ownership of \p models. - COneOfNPrior(const TDoublePriorPtrPrVec &models, - maths_t::EDataType dataType, - double decayRate = 0.0); - - //! Construct from part of a state document. - COneOfNPrior(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser); - - //! Implements value semantics for copy construction. - COneOfNPrior(const COneOfNPrior &other); - - //! Implements value semantics for assignment. - //! - //! \param[in] rhs The mixed model to copy. - //! \return The newly updated model. - //! \note That this class has value semantics: this overwrites the current - //! collection of models. - COneOfNPrior &operator=(const COneOfNPrior &rhs); - - //! Efficient swap of the contents of this prior and \p other. - void swap(COneOfNPrior &other); - //@} - - //! \name Prior Contract - //@{ - //! Get the type of this prior. - virtual EPrior type() const; - - //! Create a copy of the prior. - //! - //! \return A pointer to a newly allocated clone of this model. - //! \warning The caller owns the object returned. - virtual COneOfNPrior *clone() const; - - //! Set the data type. - virtual void dataType(maths_t::EDataType value); - - //! Set the rate at which the prior returns to non-informative. - virtual void decayRate(double value); - - //! Reset the prior to non-informative. - virtual void setToNonInformative(double offset = 0.0, double decayRate = 0.0); - - //! Remove models marked by \p filter. - virtual void removeModels(CModelFilter &filter); - - //! Check if any of the models needs an offset to be applied. - virtual bool needsOffset() const; - - //! Forward the offset to the model priors. - //! - //! \return The penalty to apply in model selection. - virtual double adjustOffset(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights); - - //! Get the maximum model offset. - virtual double offset() const; - - //! Update the model weights using the marginal likelihoods for - //! the data. The component prior parameters are then updated. - //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. - //! \param[in] samples A collection of samples of the variable. - //! \param[in] weights The weights of each sample in \p samples. - virtual void addSamples(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights); - - //! Propagate the prior density function forwards by \p time. - //! - //! The prior distribution relaxes back to non-informative at a rate - //! controlled by the decay rate parameter (optionally supplied to the - //! constructor). - //! - //! \param[in] time The time increment to apply. - //! \note \p time must be non negative. - virtual void propagateForwardsByTime(double time); - - //! Get the support for the marginal likelihood function. - virtual TDoubleDoublePr marginalLikelihoodSupport() const; - - //! Get the mean of the marginal likelihood function. - virtual double marginalLikelihoodMean() const; - - //! Get the weighted mean of the model nearest means. - virtual double nearestMarginalLikelihoodMean(double value) const; - - //! Get the mode of the marginal likelihood function. - virtual double marginalLikelihoodMode(const TWeightStyleVec &weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec &weights = TWeights::UNIT) const; - - //! Get the variance of the marginal likelihood. - virtual double marginalLikelihoodVariance(const TWeightStyleVec &weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec &weights = TWeights::UNIT) const; - - //! Get the \p percentage symmetric confidence interval for the marginal - //! likelihood function, i.e. the values \f$a\f$ and \f$b\f$ such that: - //!
-        //!   \f$P([a,m]) = P([m,b]) = p / 100 / 2\f$
-        //! 
- //! - //! where \f$m\f$ is the mode of the distribution and \f$p\f$ is the - //! the percentage of interest \p percentage. - //! - //! \param[in] percentage The percentage of interest. - //! \param[in] weightStyles Optional variance scale weight styles. - //! \param[in] weights Optional variance scale weights. - //! \note \p percentage should be in the range (0.0, 100.0]. - virtual TDoubleDoublePr - marginalLikelihoodConfidenceInterval(double percentage, - const TWeightStyleVec &weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec &weights = TWeights::UNIT) const; - - //! Compute the log marginal likelihood function at \p samples integrating - //! over the prior density function for the distribution parameters. - //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. - //! \param[in] samples A collection of samples of the variable. - //! \param[in] weights The weights of each sample in \p samples. - //! \param[out] result Filled in with the joint likelihood of \p samples. - //! \note The samples are assumed to be independent and identically - //! distributed. - virtual maths_t::EFloatingPointErrorStatus - jointLogMarginalLikelihood(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &result) const; - - //! Sample the marginal likelihood function. - //! - //! This samples each model in proportion to the probability the data - //! come from that model. Since each model can only be sampled an integer - //! number of times we find the sampling which minimizes the error from - //! the ideal sampling. - //! - //! \param[in] numberSamples The number of samples required. - //! \param[out] samples Filled in with samples from the prior. - //! \note \p numberSamples is truncated to the number of samples received. - virtual void sampleMarginalLikelihood(std::size_t numberSamples, - TDouble1Vec &samples) const; - - private: - //! The common c.d.f. implementation. - bool minusLogJointCdfImpl(bool complement, - const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound) const; - - public: - //! Compute minus the log of the joint c.d.f. of the marginal likelihood - //! at \p samples. - //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. - //! \param[in] samples A collection of samples of the variable. - //! \param[in] weights The weights of each sample in \p samples. - //! \param[out] lowerBound Filled in with a lower bound to acceptable - //! accuracy of \f$-\log(\prod_i{F(x_i)})\f$, where \f$F(.)\f$ is the - //! c.d.f. and \f$\{x_i\}\f$ are the samples. - //! \param[out] upperBound Filled in with an upper bound to acceptable - //! accuracy of \f$-\log(\prod_i{F(x_i)})\f$, where \f$F(.)\f$ is the - //! c.d.f. and \f$\{x_i\}\f$ are the samples. - //! \note The samples are assumed to be independent. - //! \warning The variance scales must be in the range \f$(0,\infty)\f$, - //! i.e. a value of zero is not well defined and a value of infinity is - //! not well handled. (Very large values are handled though.) - virtual bool minusLogJointCdf(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound) const; - - //! Compute minus the log of the one minus the joint c.d.f. of the - //! marginal likelihood at \p samples without losing precision due to - //! cancellation errors at one, i.e. the smallest non-zero value this - //! can return is the minimum double rather than epsilon. - //! - //! \see minusLogJointCdf for more details. - virtual bool minusLogJointCdfComplement(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound) const; - - //! Compute the probability of a less likely, i.e. lower likelihood, - //! collection of independent samples from the variable. - //! - //! \param[in] calculation The style of the probability calculation - //! (see model_t::EProbabilityCalculation for details). - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. - //! \param[in] samples A collection of samples of the variable. - //! \param[in] weights The weights of each sample in \p samples. - //! \param[out] lowerBound Filled in with the probability of the set - //! for which the joint marginal likelihood is less than that of - //! \p samples (subject to the measure \p calculation). - //! \param[out] upperBound Equal to \p lowerBound. - //! \param[out] tail The tail that (left or right) that all the samples - //! are in or neither. - //! \note The samples are assumed to be independent. - //! \warning The variance scales must be in the range \f$(0,\infty)\f$, - //! i.e. a value of zero is not well defined and a value of infinity is - //! not well handled. (Very large values are handled though.) - virtual bool probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound, - maths_t::ETail &tail) const; - - //! Check if this is a non-informative prior. - virtual bool isNonInformative() const; - - //! Get a human readable description of the prior. - //! - //! \param[in] indent The indent to use at the start of new lines. - //! \param[in,out] result Filled in with the description. - virtual void print(const std::string &indent, std::string &result) const; - - //! Print the prior density function in a specified format. - //! - //! \see CPrior::printJointDensityFunction for details. - virtual std::string printJointDensityFunction() const; - - //! Get a checksum for this object. - virtual uint64_t checksum(uint64_t seed = 0) const; - - //! Debug the memory used by this component. - virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - - //! Get the memory used by this component. - virtual std::size_t memoryUsage() const; - - //! Get the static size of this object - used for virtual hierarchies - virtual std::size_t staticSize() const; - - //! Persist state by passing information to the supplied inserter - virtual void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - //@} - - //! \name Test Functions - //@{ - //! Get the current values for the model weights. - TDoubleVec weights() const; - - //! Get the current values for the log model weights. - TDoubleVec logWeights() const; - - //! Get the current constituent models. - TPriorCPtrVec models() const; - //@} - - private: - using TDoubleSizePr = std::pair; - using TDoubleSizePr5Vec = core::CSmallVector; - using TWeightPriorPtrPr = std::pair; - using TWeightPriorPtrPrVec = std::vector; - using TMaxAccumulator = CBasicStatistics::SMax::TAccumulator; - - private: - //! Read parameters from \p traverser. - bool acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser); - - //! Add a model vector entry reading parameters from \p traverser. - bool modelAcceptRestoreTraverser(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser); - - //! Get the normalized model weights. - TDoubleSizePr5Vec normalizedLogWeights() const; - - //! Get the median of the model means. - double medianModelMean() const; - - //! Check that the model weights are valid. - bool badWeights() const; - - //! Full debug dump of the model weights. - std::string debugWeights() const; - - private: - //! A collection of component models and their probabilities. - TWeightPriorPtrPrVec m_Models; +class MATHS_EXPORT COneOfNPrior : public CPrior { +public: + using TPriorPtr = boost::shared_ptr; + using TPriorPtrVec = std::vector; + using TPriorCPtrVec = std::vector; + using TDoublePriorPtrPr = std::pair; + using TDoublePriorPtrPrVec = std::vector; + + //! Lift all overloads of the dataType into scope. + using CPrior::dataType; + //! Lift all overloads of the decayRate into scope. + using CPrior::decayRate; + //! Lift the overloads of addSamples into scope. + using CPrior::addSamples; + //! Lift the overloads of print into scope. + using CPrior::print; + +public: + //! \name Life-Cycle + //@{ + //! Create with a collection of models. + //! + //! \param[in] models The simple models which comprise the mixed model. + //! \param[in] dataType The type of data being modeled (see maths_t::EDataType + //! for details). + //! \param[in] decayRate The rate at which to revert to the non-informative prior. + //! \warning This class takes ownership of \p models. + COneOfNPrior(const TPriorPtrVec& models, maths_t::EDataType dataType, double decayRate = 0.0); + + //! Create with a weighted collection of models. + //! + //! \param[in] models The simple models and their weights which comprise + //! the mixed model. + //! \param[in] dataType The type of data being modeled (see maths_t::EDataType + //! for details). + //! \param[in] decayRate The rate at which we revert to the non-informative prior. + //! \warning This class takes ownership of \p models. + COneOfNPrior(const TDoublePriorPtrPrVec& models, maths_t::EDataType dataType, double decayRate = 0.0); + + //! Construct from part of a state document. + COneOfNPrior(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser); + + //! Implements value semantics for copy construction. + COneOfNPrior(const COneOfNPrior& other); + + //! Implements value semantics for assignment. + //! + //! \param[in] rhs The mixed model to copy. + //! \return The newly updated model. + //! \note That this class has value semantics: this overwrites the current + //! collection of models. + COneOfNPrior& operator=(const COneOfNPrior& rhs); + + //! Efficient swap of the contents of this prior and \p other. + void swap(COneOfNPrior& other); + //@} + + //! \name Prior Contract + //@{ + //! Get the type of this prior. + virtual EPrior type() const; + + //! Create a copy of the prior. + //! + //! \return A pointer to a newly allocated clone of this model. + //! \warning The caller owns the object returned. + virtual COneOfNPrior* clone() const; + + //! Set the data type. + virtual void dataType(maths_t::EDataType value); + + //! Set the rate at which the prior returns to non-informative. + virtual void decayRate(double value); + + //! Reset the prior to non-informative. + virtual void setToNonInformative(double offset = 0.0, double decayRate = 0.0); + + //! Remove models marked by \p filter. + virtual void removeModels(CModelFilter& filter); + + //! Check if any of the models needs an offset to be applied. + virtual bool needsOffset() const; + + //! Forward the offset to the model priors. + //! + //! \return The penalty to apply in model selection. + virtual double adjustOffset(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights); + + //! Get the maximum model offset. + virtual double offset() const; + + //! Update the model weights using the marginal likelihoods for + //! the data. The component prior parameters are then updated. + //! + //! \param[in] weightStyles Controls the interpretation of the weight(s) + //! that are associated with each sample. See maths_t::ESampleWeightStyle + //! for more details. + //! \param[in] samples A collection of samples of the variable. + //! \param[in] weights The weights of each sample in \p samples. + virtual void addSamples(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights); + + //! Propagate the prior density function forwards by \p time. + //! + //! The prior distribution relaxes back to non-informative at a rate + //! controlled by the decay rate parameter (optionally supplied to the + //! constructor). + //! + //! \param[in] time The time increment to apply. + //! \note \p time must be non negative. + virtual void propagateForwardsByTime(double time); + + //! Get the support for the marginal likelihood function. + virtual TDoubleDoublePr marginalLikelihoodSupport() const; + + //! Get the mean of the marginal likelihood function. + virtual double marginalLikelihoodMean() const; + + //! Get the weighted mean of the model nearest means. + virtual double nearestMarginalLikelihoodMean(double value) const; + + //! Get the mode of the marginal likelihood function. + virtual double marginalLikelihoodMode(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; + + //! Get the variance of the marginal likelihood. + virtual double marginalLikelihoodVariance(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; + + //! Get the \p percentage symmetric confidence interval for the marginal + //! likelihood function, i.e. the values \f$a\f$ and \f$b\f$ such that: + //!
+    //!   \f$P([a,m]) = P([m,b]) = p / 100 / 2\f$
+    //! 
+ //! + //! where \f$m\f$ is the mode of the distribution and \f$p\f$ is the + //! the percentage of interest \p percentage. + //! + //! \param[in] percentage The percentage of interest. + //! \param[in] weightStyles Optional variance scale weight styles. + //! \param[in] weights Optional variance scale weights. + //! \note \p percentage should be in the range (0.0, 100.0]. + virtual TDoubleDoublePr marginalLikelihoodConfidenceInterval(double percentage, + const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; + + //! Compute the log marginal likelihood function at \p samples integrating + //! over the prior density function for the distribution parameters. + //! + //! \param[in] weightStyles Controls the interpretation of the weight(s) + //! that are associated with each sample. See maths_t::ESampleWeightStyle + //! for more details. + //! \param[in] samples A collection of samples of the variable. + //! \param[in] weights The weights of each sample in \p samples. + //! \param[out] result Filled in with the joint likelihood of \p samples. + //! \note The samples are assumed to be independent and identically + //! distributed. + virtual maths_t::EFloatingPointErrorStatus jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& result) const; + + //! Sample the marginal likelihood function. + //! + //! This samples each model in proportion to the probability the data + //! come from that model. Since each model can only be sampled an integer + //! number of times we find the sampling which minimizes the error from + //! the ideal sampling. + //! + //! \param[in] numberSamples The number of samples required. + //! \param[out] samples Filled in with samples from the prior. + //! \note \p numberSamples is truncated to the number of samples received. + virtual void sampleMarginalLikelihood(std::size_t numberSamples, TDouble1Vec& samples) const; + +private: + //! The common c.d.f. implementation. + bool minusLogJointCdfImpl(bool complement, + const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound) const; + +public: + //! Compute minus the log of the joint c.d.f. of the marginal likelihood + //! at \p samples. + //! + //! \param[in] weightStyles Controls the interpretation of the weight(s) + //! that are associated with each sample. See maths_t::ESampleWeightStyle + //! for more details. + //! \param[in] samples A collection of samples of the variable. + //! \param[in] weights The weights of each sample in \p samples. + //! \param[out] lowerBound Filled in with a lower bound to acceptable + //! accuracy of \f$-\log(\prod_i{F(x_i)})\f$, where \f$F(.)\f$ is the + //! c.d.f. and \f$\{x_i\}\f$ are the samples. + //! \param[out] upperBound Filled in with an upper bound to acceptable + //! accuracy of \f$-\log(\prod_i{F(x_i)})\f$, where \f$F(.)\f$ is the + //! c.d.f. and \f$\{x_i\}\f$ are the samples. + //! \note The samples are assumed to be independent. + //! \warning The variance scales must be in the range \f$(0,\infty)\f$, + //! i.e. a value of zero is not well defined and a value of infinity is + //! not well handled. (Very large values are handled though.) + virtual bool minusLogJointCdf(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound) const; + + //! Compute minus the log of the one minus the joint c.d.f. of the + //! marginal likelihood at \p samples without losing precision due to + //! cancellation errors at one, i.e. the smallest non-zero value this + //! can return is the minimum double rather than epsilon. + //! + //! \see minusLogJointCdf for more details. + virtual bool minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound) const; + + //! Compute the probability of a less likely, i.e. lower likelihood, + //! collection of independent samples from the variable. + //! + //! \param[in] calculation The style of the probability calculation + //! (see model_t::EProbabilityCalculation for details). + //! \param[in] weightStyles Controls the interpretation of the weight(s) + //! that are associated with each sample. See maths_t::ESampleWeightStyle + //! for more details. + //! \param[in] samples A collection of samples of the variable. + //! \param[in] weights The weights of each sample in \p samples. + //! \param[out] lowerBound Filled in with the probability of the set + //! for which the joint marginal likelihood is less than that of + //! \p samples (subject to the measure \p calculation). + //! \param[out] upperBound Equal to \p lowerBound. + //! \param[out] tail The tail that (left or right) that all the samples + //! are in or neither. + //! \note The samples are assumed to be independent. + //! \warning The variance scales must be in the range \f$(0,\infty)\f$, + //! i.e. a value of zero is not well defined and a value of infinity is + //! not well handled. (Very large values are handled though.) + virtual bool probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, + const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound, + maths_t::ETail& tail) const; + + //! Check if this is a non-informative prior. + virtual bool isNonInformative() const; + + //! Get a human readable description of the prior. + //! + //! \param[in] indent The indent to use at the start of new lines. + //! \param[in,out] result Filled in with the description. + virtual void print(const std::string& indent, std::string& result) const; + + //! Print the prior density function in a specified format. + //! + //! \see CPrior::printJointDensityFunction for details. + virtual std::string printJointDensityFunction() const; + + //! Get a checksum for this object. + virtual uint64_t checksum(uint64_t seed = 0) const; + + //! Debug the memory used by this component. + virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + + //! Get the memory used by this component. + virtual std::size_t memoryUsage() const; + + //! Get the static size of this object - used for virtual hierarchies + virtual std::size_t staticSize() const; + + //! Persist state by passing information to the supplied inserter + virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + //@} + + //! \name Test Functions + //@{ + //! Get the current values for the model weights. + TDoubleVec weights() const; + + //! Get the current values for the log model weights. + TDoubleVec logWeights() const; + + //! Get the current constituent models. + TPriorCPtrVec models() const; + //@} + +private: + using TDoubleSizePr = std::pair; + using TDoubleSizePr5Vec = core::CSmallVector; + using TWeightPriorPtrPr = std::pair; + using TWeightPriorPtrPrVec = std::vector; + using TMaxAccumulator = CBasicStatistics::SMax::TAccumulator; + +private: + //! Read parameters from \p traverser. + bool acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser); + + //! Add a model vector entry reading parameters from \p traverser. + bool modelAcceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser); + + //! Get the normalized model weights. + TDoubleSizePr5Vec normalizedLogWeights() const; + + //! Get the median of the model means. + double medianModelMean() const; + + //! Check that the model weights are valid. + bool badWeights() const; + + //! Full debug dump of the model weights. + std::string debugWeights() const; + +private: + //! A collection of component models and their probabilities. + TWeightPriorPtrPrVec m_Models; }; - } } diff --git a/include/maths/COrderings.h b/include/maths/COrderings.h index 5884573f26..0b7e14eee9 100644 --- a/include/maths/COrderings.h +++ b/include/maths/COrderings.h @@ -21,10 +21,8 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { //! \brief A collection of useful functionality to order collections //! of objects. @@ -39,896 +37,725 @@ namespace maths //! or second element of the pair. //! -# Efficiently, O(N log(N)), simultaneously sorting multiple vectors //! using one of the vectors to provide the ordering. -class COrderings : private core::CNonInstantiatable -{ - public: - //! \brief Orders two optional values such that non-null are - //! less than null values. - //! less than null values and otherwise compares using the type - //! operator <. - struct SOptionalLess - { - using result_type = bool; - - //! \note U and V must be convertible to T or optional - //! for some type T and T must support operator <. - template - inline bool operator()(const U &lhs, const V &rhs) const - { - return less(lhs, rhs); - } - - template - static inline bool less(const boost::optional &lhs, - const boost::optional &rhs) - { - bool lInitialized(lhs); - bool rInitialized(rhs); - return lInitialized && rInitialized ? - boost::unwrap_ref(*lhs) < boost::unwrap_ref(*rhs) : - rInitialized < lInitialized; - } - template - static inline bool less(const T &lhs, - const boost::optional &rhs) - { - return !rhs ? true : boost::unwrap_ref(lhs) < boost::unwrap_ref(*rhs); - } - template - static inline bool less(const boost::optional &lhs, - const T &rhs) - { - return !lhs ? false : boost::unwrap_ref(*lhs) < boost::unwrap_ref(rhs); - } - }; - - //! \brief Orders two optional values such that null are greater - //! than non-null values and otherwise compares using the type - //! operator >. - struct SOptionalGreater - { - using result_type = bool; - - //! \note U and V must be convertible to T or optional - //! for some type T and T must support operator >. - template - inline bool operator()(const U &lhs, const V &rhs) const - { - return greater(lhs, rhs); - } - - template - static inline bool greater(const boost::optional &lhs, - const boost::optional &rhs) - { - bool lInitialized(lhs); - bool rInitialized(rhs); - return lInitialized && rInitialized ? - boost::unwrap_ref(*lhs) > boost::unwrap_ref(*rhs) : - rInitialized > lInitialized; - } - template - static inline bool greater(const T &lhs, - const boost::optional &rhs) - { - return !rhs ? false : boost::unwrap_ref(lhs) > boost::unwrap_ref(*rhs); - } - template - static inline bool greater(const boost::optional &lhs, - const T &rhs) - { - return !lhs ? true : boost::unwrap_ref(*lhs) > boost::unwrap_ref(rhs); - } - }; - - //! \brief Orders two pointers such that non-null are less - //! than null values and otherwise compares using the type - //! operator <. - struct SPtrLess - { - using result_type = bool; - - template - inline bool operator()(const T *lhs, const T *rhs) const - { - return less(lhs, rhs); - } - - template - static inline bool less(const T *lhs, const T *rhs) - { - bool lInitialized(lhs != 0); - bool rInitialized(rhs != 0); - return lInitialized && rInitialized ? - boost::unwrap_ref(*lhs) < boost::unwrap_ref(*rhs) : - rInitialized < lInitialized; - } - }; - - //! \brief Orders two pointers such that null are greater - //! than non-null values and otherwise compares using - //! the type operator >. - struct SPtrGreater - { - using result_type = bool; - - template - inline bool operator()(const T *lhs, const T *rhs) const - { - return greater(lhs, rhs); - } - - template - static inline bool greater(const T *lhs, const T *rhs) - { - bool lInitialized(lhs != 0); - bool rInitialized(rhs != 0); - return lInitialized && rInitialized ? - boost::unwrap_ref(*lhs) > boost::unwrap_ref(*rhs) : - rInitialized > lInitialized; - } - }; - - //! \brief Orders two reference wrapped objects which are - //! comparable with operator <. - struct SReferenceLess - { - using result_type = bool; - - template - inline bool operator()(const U &lhs, const V &rhs) const - { - return less(lhs, rhs); - } - - template - static inline bool less(const U &lhs, const V &rhs) - { - return boost::unwrap_ref(lhs) < boost::unwrap_ref(rhs); - } - }; - - //! \brief Orders two reference wrapped objects which are - //! comparable with operator >. - struct SReferenceGreater - { - using result_type = bool; - - template - inline bool operator()(const U &lhs, const V &rhs) const - { - return greater(lhs, rhs); - } - - template - static inline bool greater(const U &lhs, const V &rhs) - { - return boost::unwrap_ref(lhs) > boost::unwrap_ref(rhs); - } - }; - - //! \name Mixed Type Lexicographical Comparison - //! - //! This is equivalent to std::lexicographical_compare but allows - //! for the type of each value in the collection to be different. - //! Each type must define operator<. - //@{ - //! Lexicographical comparison of \p l1 and \p r1. - template - static bool lexicographical_compare(const T1 &l1, - const T1 &r1, - COMP comp) - { - return comp(l1, r1); - } - template - static bool lexicographical_compare(const T1 &l1, - const T1 &r1) - { - return lexicographical_compare(l1, r1, SReferenceLess()); - } -#define COMPARE(l, r) if (comp(l, r)) { return true; } else if (comp(r, l)) { return false; } - //! Lexicographical comparison of (\p l1, \p l2) and (\p r1, \p r2). - template - static bool lexicographical_compare(const T1 &l1, const T2 &l2, - const T1 &r1, const T2 &r2, - COMP comp) - { - COMPARE(l1, r1); - return comp(l2, r2); +class COrderings : private core::CNonInstantiatable { +public: + //! \brief Orders two optional values such that non-null are + //! less than null values. + //! less than null values and otherwise compares using the type + //! operator <. + struct SOptionalLess { + using result_type = bool; + + //! \note U and V must be convertible to T or optional + //! for some type T and T must support operator <. + template + inline bool operator()(const U& lhs, const V& rhs) const { + return less(lhs, rhs); + } + + template + static inline bool less(const boost::optional& lhs, const boost::optional& rhs) { + bool lInitialized(lhs); + bool rInitialized(rhs); + return lInitialized && rInitialized ? boost::unwrap_ref(*lhs) < boost::unwrap_ref(*rhs) : rInitialized < lInitialized; + } + template + static inline bool less(const T& lhs, const boost::optional& rhs) { + return !rhs ? true : boost::unwrap_ref(lhs) < boost::unwrap_ref(*rhs); + } + template + static inline bool less(const boost::optional& lhs, const T& rhs) { + return !lhs ? false : boost::unwrap_ref(*lhs) < boost::unwrap_ref(rhs); + } + }; + + //! \brief Orders two optional values such that null are greater + //! than non-null values and otherwise compares using the type + //! operator >. + struct SOptionalGreater { + using result_type = bool; + + //! \note U and V must be convertible to T or optional + //! for some type T and T must support operator >. + template + inline bool operator()(const U& lhs, const V& rhs) const { + return greater(lhs, rhs); + } + + template + static inline bool greater(const boost::optional& lhs, const boost::optional& rhs) { + bool lInitialized(lhs); + bool rInitialized(rhs); + return lInitialized && rInitialized ? boost::unwrap_ref(*lhs) > boost::unwrap_ref(*rhs) : rInitialized > lInitialized; + } + template + static inline bool greater(const T& lhs, const boost::optional& rhs) { + return !rhs ? false : boost::unwrap_ref(lhs) > boost::unwrap_ref(*rhs); + } + template + static inline bool greater(const boost::optional& lhs, const T& rhs) { + return !lhs ? true : boost::unwrap_ref(*lhs) > boost::unwrap_ref(rhs); + } + }; + + //! \brief Orders two pointers such that non-null are less + //! than null values and otherwise compares using the type + //! operator <. + struct SPtrLess { + using result_type = bool; + + template + inline bool operator()(const T* lhs, const T* rhs) const { + return less(lhs, rhs); + } + + template + static inline bool less(const T* lhs, const T* rhs) { + bool lInitialized(lhs != 0); + bool rInitialized(rhs != 0); + return lInitialized && rInitialized ? boost::unwrap_ref(*lhs) < boost::unwrap_ref(*rhs) : rInitialized < lInitialized; + } + }; + + //! \brief Orders two pointers such that null are greater + //! than non-null values and otherwise compares using + //! the type operator >. + struct SPtrGreater { + using result_type = bool; + + template + inline bool operator()(const T* lhs, const T* rhs) const { + return greater(lhs, rhs); + } + + template + static inline bool greater(const T* lhs, const T* rhs) { + bool lInitialized(lhs != 0); + bool rInitialized(rhs != 0); + return lInitialized && rInitialized ? boost::unwrap_ref(*lhs) > boost::unwrap_ref(*rhs) : rInitialized > lInitialized; + } + }; + + //! \brief Orders two reference wrapped objects which are + //! comparable with operator <. + struct SReferenceLess { + using result_type = bool; + + template + inline bool operator()(const U& lhs, const V& rhs) const { + return less(lhs, rhs); + } + + template + static inline bool less(const U& lhs, const V& rhs) { + return boost::unwrap_ref(lhs) < boost::unwrap_ref(rhs); + } + }; + + //! \brief Orders two reference wrapped objects which are + //! comparable with operator >. + struct SReferenceGreater { + using result_type = bool; + + template + inline bool operator()(const U& lhs, const V& rhs) const { + return greater(lhs, rhs); + } + + template + static inline bool greater(const U& lhs, const V& rhs) { + return boost::unwrap_ref(lhs) > boost::unwrap_ref(rhs); + } + }; + + //! \name Mixed Type Lexicographical Comparison + //! + //! This is equivalent to std::lexicographical_compare but allows + //! for the type of each value in the collection to be different. + //! Each type must define operator<. + //@{ + //! Lexicographical comparison of \p l1 and \p r1. + template + static bool lexicographical_compare(const T1& l1, const T1& r1, COMP comp) { + return comp(l1, r1); + } + template + static bool lexicographical_compare(const T1& l1, const T1& r1) { + return lexicographical_compare(l1, r1, SReferenceLess()); + } +#define COMPARE(l, r) \ + if (comp(l, r)) { \ + return true; \ + } else if (comp(r, l)) { \ + return false; \ + } + //! Lexicographical comparison of (\p l1, \p l2) and (\p r1, \p r2). + template + static bool lexicographical_compare(const T1& l1, const T2& l2, const T1& r1, const T2& r2, COMP comp) { + COMPARE(l1, r1); + return comp(l2, r2); + } + template + static bool lexicographical_compare(const T1& l1, const T2& l2, const T1& r1, const T2& r2) { + return lexicographical_compare(l1, l2, r1, r2, SReferenceLess()); + } + //! Lexicographical comparison of (\p l1, \p l2, \p l3) and (\p r1, \p r2, \p r3). + template + static bool lexicographical_compare(const T1& l1, const T2& l2, const T3& l3, const T1& r1, const T2& r2, const T3& r3, COMP comp) { + COMPARE(l1, r1); + COMPARE(l2, r2); + return comp(l3, r3); + } + template + static bool lexicographical_compare(const T1& l1, const T2& l2, const T3& l3, const T1& r1, const T2& r2, const T3& r3) { + return lexicographical_compare(l1, l2, l3, r1, r2, r3, SReferenceLess()); + } + //! Lexicographical comparison of (\p l1, \p l2, \p l3, \p l4) and + //! (\p r1, \p r2, \p r3, \p r4). + template + static bool lexicographical_compare(const T1& l1, + const T2& l2, + const T3& l3, + const T4& l4, + const T1& r1, + const T2& r2, + const T3& r3, + const T4& r4, + COMP comp) { + COMPARE(l1, r1); + COMPARE(l2, r2); + COMPARE(l3, r3); + return comp(l4, r4); + } + template + static bool lexicographical_compare(const T1& l1, + const T2& l2, + const T3& l3, + const T4& l4, + const T1& r1, + const T2& r2, + const T3& r3, + const T4& r4) { + return lexicographical_compare(l1, l2, l3, l4, r1, r2, r3, r4, SReferenceLess()); + } + //! Lexicographical comparison of (\p l1, \p l2, \p l3, \p l4, \p l5) and + //! (\p r1, \p r2, \p r3, \p r4, \p r5). + template + static bool lexicographical_compare(const T1& l1, + const T2& l2, + const T3& l3, + const T4& l4, + const T5& l5, + const T1& r1, + const T2& r2, + const T3& r3, + const T4& r4, + const T5& r5, + COMP comp) { + COMPARE(l1, r1); + COMPARE(l2, r2); + COMPARE(l3, r3); + COMPARE(l4, r4); + return comp(l5, r5); + } + template + static bool lexicographical_compare(const T1& l1, + const T2& l2, + const T3& l3, + const T4& l4, + const T5& l5, + const T1& r1, + const T2& r2, + const T3& r3, + const T4& r4, + const T5& r5) { + return lexicographical_compare(l1, l2, l3, l4, l5, r1, r2, r3, r4, r5, SReferenceLess()); + } +#undef COMPARE + //@} + + //! \brief Wrapper around various less than comparisons. + struct SLess { + using result_type = bool; + + template + bool operator()(const boost::optional& lhs, const boost::optional& rhs) const { + return SOptionalLess::less(lhs, rhs); + } + + template + bool operator()(const T* lhs, const T* rhs) const { + return SPtrLess::less(lhs, rhs); + } + + template + bool operator()(T* lhs, T* rhs) const { + return SPtrLess::less(lhs, rhs); + } + + template + bool operator()(const U& lhs, const V& rhs) const { + return SReferenceLess::less(lhs, rhs); + } + + bool operator()(const core::CStoredStringPtr& lhs, const core::CStoredStringPtr& rhs) { + return SPtrLess::less(lhs.get(), rhs.get()); + } + + template + bool operator()(const boost::shared_ptr& lhs, const boost::shared_ptr& rhs) { + return SPtrLess::less(lhs.get(), rhs.get()); + } + + template + bool operator()(const std::pair& lhs, const std::pair& rhs) const { + return lexicographical_compare(lhs.first, lhs.second, rhs.first, rhs.second, *this); + } + SReferenceLess s_Less; + }; + + //! \brief Wrapper around various less than comparisons. + struct SGreater { + using result_type = bool; + + template + bool operator()(const boost::optional& lhs, const boost::optional& rhs) const { + return SOptionalGreater::greater(lhs, rhs); } + + template + bool operator()(const T* lhs, const T* rhs) const { + return SPtrGreater::greater(lhs, rhs); + } + + template + bool operator()(T* lhs, T* rhs) const { + return SPtrGreater::greater(lhs, rhs); + } + + template + bool operator()(const U& lhs, const V& rhs) const { + return SReferenceGreater::greater(lhs, rhs); + } + + bool operator()(const core::CStoredStringPtr& lhs, const core::CStoredStringPtr& rhs) { + return SPtrGreater::greater(lhs.get(), rhs.get()); + } + + template + bool operator()(const boost::shared_ptr& lhs, const boost::shared_ptr& rhs) { + return SPtrGreater::greater(lhs.get(), rhs.get()); + } + + template + bool operator()(const std::pair& lhs, const std::pair& rhs) const { + return lexicographical_compare(lhs.first, lhs.second, rhs.first, rhs.second, *this); + } + + SReferenceGreater s_Greater; + }; + + //! Lexicographical comparison of various common types. + //! + //! IMPLEMENTATION DECISIONS:\n + //! Although these objects provide their own comparison operators + //! This also tuples of handles reference wrapped types. + struct SLexicographicalCompare { template - static bool lexicographical_compare(const T1 &l1, const T2 &l2, - const T1 &r1, const T2 &r2) - { - return lexicographical_compare(l1, l2, r1, r2, SReferenceLess()); - } - //! Lexicographical comparison of (\p l1, \p l2, \p l3) and (\p r1, \p r2, \p r3). - template - static bool lexicographical_compare(const T1 &l1, const T2 &l2, const T3 &l3, - const T1 &r1, const T2 &r2, const T3 &r3, - COMP comp) - { - COMPARE(l1, r1); - COMPARE(l2, r2); - return comp(l3, r3); + inline bool operator()(const std::pair& lhs, const std::pair& rhs) const { + return lexicographical_compare(lhs.first, lhs.second, rhs.first, rhs.second, s_Less); } + template - static bool lexicographical_compare(const T1 &l1, const T2 &l2, const T3 &l3, - const T1 &r1, const T2 &r2, const T3 &r3) - { - return lexicographical_compare(l1, l2, l3, r1, r2, r3, SReferenceLess()); - } - //! Lexicographical comparison of (\p l1, \p l2, \p l3, \p l4) and - //! (\p r1, \p r2, \p r3, \p r4). - template - static bool lexicographical_compare(const T1 &l1, const T2 &l2, const T3 &l3, const T4 &l4, - const T1 &r1, const T2 &r2, const T3 &r3, const T4 &r4, - COMP comp) - { - COMPARE(l1, r1); - COMPARE(l2, r2); - COMPARE(l3, r3); - return comp(l4, r4); + inline bool operator()(const boost::tuple& lhs, const boost::tuple& rhs) const { + return lexicographical_compare(lhs.template get<0>(), + lhs.template get<1>(), + lhs.template get<2>(), + rhs.template get<0>(), + rhs.template get<1>(), + rhs.template get<2>(), + s_Less); } + template - static bool lexicographical_compare(const T1 &l1, const T2 &l2, const T3 &l3, const T4 &l4, - const T1 &r1, const T2 &r2, const T3 &r3, const T4 &r4) - { - return lexicographical_compare(l1, l2, l3, l4, r1, r2, r3, r4, SReferenceLess()); - } - //! Lexicographical comparison of (\p l1, \p l2, \p l3, \p l4, \p l5) and - //! (\p r1, \p r2, \p r3, \p r4, \p r5). - template - static bool lexicographical_compare(const T1 &l1, const T2 &l2, const T3 &l3, const T4 &l4, const T5 &l5, - const T1 &r1, const T2 &r2, const T3 &r3, const T4 &r4, const T5 &r5, - COMP comp) - { - COMPARE(l1, r1); - COMPARE(l2, r2); - COMPARE(l3, r3); - COMPARE(l4, r4); - return comp(l5, r5); + inline bool operator()(const boost::tuple& lhs, const boost::tuple& rhs) const { + return lexicographical_compare(lhs.template get<0>(), + lhs.template get<1>(), + lhs.template get<2>(), + lhs.template get<3>(), + rhs.template get<0>(), + rhs.template get<1>(), + rhs.template get<2>(), + rhs.template get<3>(), + s_Less); } + template - static bool lexicographical_compare(const T1 &l1, const T2 &l2, const T3 &l3, const T4 &l4, const T5 &l5, - const T1 &r1, const T2 &r2, const T3 &r3, const T4 &r4, const T5 &r5) - { - return lexicographical_compare(l1, l2, l3, l4, l5, r1, r2, r3, r4, r5, SReferenceLess()); + inline bool operator()(const boost::tuple& lhs, const boost::tuple& rhs) const { + return lexicographical_compare(lhs.template get<0>(), + lhs.template get<1>(), + lhs.template get<2>(), + lhs.template get<3>(), + lhs.template get<4>(), + rhs.template get<0>(), + rhs.template get<1>(), + rhs.template get<2>(), + rhs.template get<3>(), + rhs.template get<4>(), + s_Less); + } + + SLess s_Less; + }; + + //! \brief Partial ordering of std::pairs and some boost::tuples based + //! on smaller first element. + //! + //! \note That while this functionality can be implemented by boost + //! bind, since it overloads the comparison operators, the resulting + //! code is more than an order of magnitude slower than this version. + struct SFirstLess { + template + inline bool operator()(const std::pair& lhs, const std::pair& rhs) const { + return s_Less(lhs.first, rhs.first); + } + + template + inline bool operator()(const U& lhs, const std::pair& rhs) const { + return s_Less(lhs, rhs.first); + } + + template + inline bool operator()(const std::pair& lhs, const U& rhs) const { + return s_Less(lhs.first, rhs); } -#undef COMPARE - //@} - - //! \brief Wrapper around various less than comparisons. - struct SLess - { - using result_type = bool; - - template - bool operator()(const boost::optional &lhs, - const boost::optional &rhs) const - { - return SOptionalLess::less(lhs, rhs); - } - - template - bool operator()(const T *lhs, const T *rhs) const - { - return SPtrLess::less(lhs, rhs); - } - - template - bool operator()(T *lhs, T *rhs) const - { - return SPtrLess::less(lhs, rhs); - } - - template - bool operator()(const U &lhs, const V &rhs) const - { - return SReferenceLess::less(lhs, rhs); - } - - bool operator()(const core::CStoredStringPtr &lhs, const core::CStoredStringPtr &rhs) - { - return SPtrLess::less(lhs.get(), rhs.get()); - } - - template - bool operator()(const boost::shared_ptr &lhs, const boost::shared_ptr &rhs) - { - return SPtrLess::less(lhs.get(), rhs.get()); - } - - template - bool operator()(const std::pair &lhs, - const std::pair &rhs) const - { - return lexicographical_compare(lhs.first, lhs.second, - rhs.first, rhs.second, - *this); - } - SReferenceLess s_Less; - }; - - //! \brief Wrapper around various less than comparisons. - struct SGreater - { - using result_type = bool; - - template - bool operator()(const boost::optional &lhs, - const boost::optional &rhs) const - { - return SOptionalGreater::greater(lhs, rhs); - } - - template - bool operator()(const T *lhs, const T *rhs) const - { - return SPtrGreater::greater(lhs, rhs); - } - - template - bool operator()(T *lhs, T *rhs) const - { - return SPtrGreater::greater(lhs, rhs); - } - - template - bool operator()(const U &lhs, const V &rhs) const - { - return SReferenceGreater::greater(lhs, rhs); - } - - bool operator()(const core::CStoredStringPtr &lhs, const core::CStoredStringPtr &rhs) - { - return SPtrGreater::greater(lhs.get(), rhs.get()); - } - - template - bool operator()(const boost::shared_ptr &lhs, const boost::shared_ptr &rhs) - { - return SPtrGreater::greater(lhs.get(), rhs.get()); - } - - template - bool operator()(const std::pair &lhs, - const std::pair &rhs) const - { - return lexicographical_compare(lhs.first, lhs.second, - rhs.first, rhs.second, - *this); - } - - SReferenceGreater s_Greater; - }; - - //! Lexicographical comparison of various common types. - //! - //! IMPLEMENTATION DECISIONS:\n - //! Although these objects provide their own comparison operators - //! This also tuples of handles reference wrapped types. - struct SLexicographicalCompare - { - template - inline bool operator()(const std::pair &lhs, - const std::pair &rhs) const - { - return lexicographical_compare(lhs.first, lhs.second, - rhs.first, rhs.second, - s_Less); - } - - template - inline bool operator()(const boost::tuple &lhs, - const boost::tuple &rhs) const - { - return lexicographical_compare(lhs.template get<0>(), - lhs.template get<1>(), - lhs.template get<2>(), - rhs.template get<0>(), - rhs.template get<1>(), - rhs.template get<2>(), - s_Less); - } - - template - inline bool operator()(const boost::tuple &lhs, - const boost::tuple &rhs) const - { - return lexicographical_compare(lhs.template get<0>(), - lhs.template get<1>(), - lhs.template get<2>(), - lhs.template get<3>(), - rhs.template get<0>(), - rhs.template get<1>(), - rhs.template get<2>(), - rhs.template get<3>(), - s_Less); - } - - template - inline bool operator()(const boost::tuple &lhs, - const boost::tuple &rhs) const - { - return lexicographical_compare(lhs.template get<0>(), - lhs.template get<1>(), - lhs.template get<2>(), - lhs.template get<3>(), - lhs.template get<4>(), - rhs.template get<0>(), - rhs.template get<1>(), - rhs.template get<2>(), - rhs.template get<3>(), - rhs.template get<4>(), - s_Less); - } - - SLess s_Less; - }; - - //! \brief Partial ordering of std::pairs and some boost::tuples based - //! on smaller first element. - //! - //! \note That while this functionality can be implemented by boost - //! bind, since it overloads the comparison operators, the resulting - //! code is more than an order of magnitude slower than this version. - struct SFirstLess - { - template - inline bool operator()(const std::pair &lhs, - const std::pair &rhs) const - { - return s_Less(lhs.first, rhs.first); - } - - template - inline bool operator()(const U &lhs, - const std::pair &rhs) const - { - return s_Less(lhs, rhs.first); - } - - template - inline bool operator()(const std::pair &lhs, - const U &rhs) const - { - return s_Less(lhs.first, rhs); - } - -#define TUPLE_FIRST_LESS template \ - inline bool operator()(const boost::tuple &lhs, \ - const boost::tuple &rhs) const \ - { \ - return s_Less(lhs.template get<0>(), rhs.template get<0>()); \ - } \ - template \ - inline bool operator()(const T1 &lhs, \ - const boost::tuple &rhs) const \ - { \ - return s_Less(lhs, rhs.template get<0>()); \ - } \ - template \ - inline bool operator()(const boost::tuple &lhs, \ - const T1 &rhs) const \ - { \ - return s_Less(lhs.template get<0>(), rhs); \ - } + +#define TUPLE_FIRST_LESS \ + template \ + inline bool operator()(const boost::tuple& lhs, const boost::tuple& rhs) const { \ + return s_Less(lhs.template get<0>(), rhs.template get<0>()); \ + } \ + template \ + inline bool operator()(const T1& lhs, const boost::tuple& rhs) const { \ + return s_Less(lhs, rhs.template get<0>()); \ + } \ + template \ + inline bool operator()(const boost::tuple& lhs, const T1& rhs) const { \ + return s_Less(lhs.template get<0>(), rhs); \ + } #define TEMPLATE_ARGS_DECL typename T1, typename T2, typename T3 #define TEMPLATE_ARGS T1, T2, T3 - TUPLE_FIRST_LESS + TUPLE_FIRST_LESS #undef TEMPLATE_ARGS #undef TEMPLATE_ARGS_DECL #define TEMPLATE_ARGS_DECL typename T1, typename T2, typename T3, typename T4 #define TEMPLATE_ARGS T1, T2, T3, T4 - TUPLE_FIRST_LESS + TUPLE_FIRST_LESS #undef TEMPLATE_ARGS #undef TEMPLATE_ARGS_DECL #define TEMPLATE_ARGS_DECL typename T1, typename T2, typename T3, typename T4, typename T5 #define TEMPLATE_ARGS T1, T2, T3, T4, T5 - TUPLE_FIRST_LESS + TUPLE_FIRST_LESS #undef TEMPLATE_ARGS #undef TEMPLATE_ARGS_DECL #undef TUPLE_FIRST_LESS - SLess s_Less; - }; - - //! \brief Partial ordering of std::pairs and some boost::tuples based - //! on larger first element. - //! - //! \note That while this functionality can be implemented by boost - //! bind, since it overloads the comparison operators, the resulting - //! code is more than an order of magnitude slower than this version. - struct SFirstGreater - { - template - inline bool operator()(const std::pair &lhs, - const std::pair &rhs) const - { - return s_Greater(lhs.first, rhs.first); - } - - template - inline bool operator()(const U &lhs, - const std::pair &rhs) const - { - return s_Greater(lhs, rhs.first); - } - - template - inline bool operator()(const std::pair &lhs, - const U &rhs) const - { - return s_Greater(lhs.first, rhs); - } - - SGreater s_Greater; - }; - - //! \brief Partial ordering of pairs based on smaller second element. - //! - //! \note That while this functionality can be implemented by boost - //! bind, since it overloads the comparison operators, the resulting - //! code is more than an order of magnitude slower than this version. - struct SSecondLess - { - template - inline bool operator()(const std::pair &lhs, - const std::pair &rhs) const - { - return s_Less(lhs.second, rhs.second); - } - - template - inline bool operator()(const V &lhs, - const std::pair &rhs) const - { - return s_Less(lhs, rhs.second); - } - - template - inline bool operator()(const std::pair &lhs, - const V &rhs) const - { - return s_Less(lhs.second, rhs); - } - - SLess s_Less; - }; - - //! \brief Partial ordering of pairs based on larger second element. - //! - //! \note That while this functionality can be implemented by boost - //! bind, since it overloads the comparison operators, the resulting - //! code is more than an order of magnitude slower than this version. - struct SSecondGreater - { - template - inline bool operator()(const std::pair &lhs, - const std::pair &rhs) const - { - return s_Greater(lhs.second, rhs.second); - } - - template - inline bool operator()(const V &lhs, - const std::pair &rhs) const - { - return s_Greater(lhs, rhs.second); - } - - template - inline bool operator()(const std::pair &lhs, - const V &rhs) const - { - return s_Greater(lhs.second, rhs); - } - - SGreater s_Greater; - }; - - //! \name Simultaneously Sort Multiple Vectors - //! - //! This simultaneously sorts a number of vectors based on ordering - //! a collection of keys. For examples, the following code - //! \code{cpp} - //! double someids[] = { 3.1, 2.2, 0.5, 1.5 }; - //! std::string somenames[] = - //! { - //! std::string('a'), - //! std::string('b'), - //! std::string('c'), - //! std::string('d') - //! }; - //! std::vector ids(someids, someids + 4); - //! std::vector names(somenames, somenames + 4); - //! - //! maths::COrderings::simultaneousSort(ids, names); - //! - //! for (std::size_t i = 0u; i < 4; ++i) - //! { - //! std::cout << ids[i] << ' ' << names[i] << std::endl; - //! } - //! \endcode - //! - //! Will produce the following output: - //!
-        //! 0.5 c
-        //! 1.5 d
-        //! 2.2 b
-        //! 3.1 a
-        //! 
- //! - //! These support simultaneously sorting up to 4 additional containers - //! to the keys. - //! - //! \note The complexity is O(N log(N)) where N is the length of the - //! containers. - //! \warning All containers must have the same length. - //@{ - private: - //! Orders a set of indices into an array based using the default - //! comparison operator of the corresponding key type. - template > - class CIndexLess - { - public: - CIndexLess(const KEY_VECTOR &keys, const COMP &comp = COMP()) : - m_Keys(&keys), - m_Comp(comp) - {} - - bool operator()(std::size_t lhs, std::size_t rhs) - { - return m_Comp((*m_Keys)[lhs], (*m_Keys)[rhs]); - } - - private: - const KEY_VECTOR *m_Keys; - COMP m_Comp; - }; + SLess s_Less; + }; + + //! \brief Partial ordering of std::pairs and some boost::tuples based + //! on larger first element. + //! + //! \note That while this functionality can be implemented by boost + //! bind, since it overloads the comparison operators, the resulting + //! code is more than an order of magnitude slower than this version. + struct SFirstGreater { + template + inline bool operator()(const std::pair& lhs, const std::pair& rhs) const { + return s_Greater(lhs.first, rhs.first); + } + + template + inline bool operator()(const U& lhs, const std::pair& rhs) const { + return s_Greater(lhs, rhs.first); + } + + template + inline bool operator()(const std::pair& lhs, const U& rhs) const { + return s_Greater(lhs.first, rhs); + } + + SGreater s_Greater; + }; + + //! \brief Partial ordering of pairs based on smaller second element. + //! + //! \note That while this functionality can be implemented by boost + //! bind, since it overloads the comparison operators, the resulting + //! code is more than an order of magnitude slower than this version. + struct SSecondLess { + template + inline bool operator()(const std::pair& lhs, const std::pair& rhs) const { + return s_Less(lhs.second, rhs.second); + } + + template + inline bool operator()(const V& lhs, const std::pair& rhs) const { + return s_Less(lhs, rhs.second); + } + + template + inline bool operator()(const std::pair& lhs, const V& rhs) const { + return s_Less(lhs.second, rhs); + } + + SLess s_Less; + }; + + //! \brief Partial ordering of pairs based on larger second element. + //! + //! \note That while this functionality can be implemented by boost + //! bind, since it overloads the comparison operators, the resulting + //! code is more than an order of magnitude slower than this version. + struct SSecondGreater { + template + inline bool operator()(const std::pair& lhs, const std::pair& rhs) const { + return s_Greater(lhs.second, rhs.second); + } + + template + inline bool operator()(const V& lhs, const std::pair& rhs) const { + return s_Greater(lhs, rhs.second); + } + + template + inline bool operator()(const std::pair& lhs, const V& rhs) const { + return s_Greater(lhs.second, rhs); + } + SGreater s_Greater; + }; + + //! \name Simultaneously Sort Multiple Vectors + //! + //! This simultaneously sorts a number of vectors based on ordering + //! a collection of keys. For examples, the following code + //! \code{cpp} + //! double someids[] = { 3.1, 2.2, 0.5, 1.5 }; + //! std::string somenames[] = + //! { + //! std::string('a'), + //! std::string('b'), + //! std::string('c'), + //! std::string('d') + //! }; + //! std::vector ids(someids, someids + 4); + //! std::vector names(somenames, somenames + 4); + //! + //! maths::COrderings::simultaneousSort(ids, names); + //! + //! for (std::size_t i = 0u; i < 4; ++i) + //! { + //! std::cout << ids[i] << ' ' << names[i] << std::endl; + //! } + //! \endcode + //! + //! Will produce the following output: + //!
+    //! 0.5 c
+    //! 1.5 d
+    //! 2.2 b
+    //! 3.1 a
+    //! 
+ //! + //! These support simultaneously sorting up to 4 additional containers + //! to the keys. + //! + //! \note The complexity is O(N log(N)) where N is the length of the + //! containers. + //! \warning All containers must have the same length. + //@{ +private: + //! Orders a set of indices into an array based using the default + //! comparison operator of the corresponding key type. + template> + class CIndexLess { public: - // The logic in this function is rather subtle because we want to - // sort the collections in place. In particular, we create a sorted - // collection of indices where each index tells us where to get the - // element from at that location and we want to re-order all the - // collections by that ordering in place. If an index matches its - // position then we can move to the next position. Otherwise, we - // need to swap the items at the index in to its position. To work - // in place we need to do something with the items which are displaced. - // If these are the items required at the swapped in position then - // we are done. Otherwise, we just repeat until we find this position. - // It is easy to verify that this process finds a closed cycle with - // at most N steps. Each time a swap is made at least one more item - // is in its correct place, and we update the ordering accordingly. - // So the containers are sorted in at most O(N) additional steps to - // the N * log(N) taken to sort the indices. -#define SIMULTANEOUS_SORT_IMPL if (boost::algorithm::is_sorted(keys.begin(), keys.end(), comp)) \ - { \ - return true; \ - } \ - using TSizeVec = std::vector; \ - TSizeVec ordering; \ - ordering.reserve(keys.size()); \ - for (std::size_t i = 0u; i < keys.size(); ++i) \ - { \ - ordering.push_back(i); \ - } \ - std::stable_sort(ordering.begin(), ordering.end(), CIndexLess(keys, comp)); \ - for (std::size_t i = 0u; i < ordering.size(); ++i) \ - { \ - std::size_t j_ = i; \ - std::size_t j = ordering[j_]; \ - while (i != j) \ - { \ - using std::swap; \ - swap(keys[j_], keys[j]); \ - CUSTOM_SWAP_VALUES \ - ordering[j_] = j_; \ - j_ = j; \ - j = ordering[j_]; \ - } \ - ordering[j_] = j_; \ - } \ - return true; + CIndexLess(const KEY_VECTOR& keys, const COMP& comp = COMP()) : m_Keys(&keys), m_Comp(comp) {} + + bool operator()(std::size_t lhs, std::size_t rhs) { return m_Comp((*m_Keys)[lhs], (*m_Keys)[rhs]); } + + private: + const KEY_VECTOR* m_Keys; + COMP m_Comp; + }; + +public: +// The logic in this function is rather subtle because we want to +// sort the collections in place. In particular, we create a sorted +// collection of indices where each index tells us where to get the +// element from at that location and we want to re-order all the +// collections by that ordering in place. If an index matches its +// position then we can move to the next position. Otherwise, we +// need to swap the items at the index in to its position. To work +// in place we need to do something with the items which are displaced. +// If these are the items required at the swapped in position then +// we are done. Otherwise, we just repeat until we find this position. +// It is easy to verify that this process finds a closed cycle with +// at most N steps. Each time a swap is made at least one more item +// is in its correct place, and we update the ordering accordingly. +// So the containers are sorted in at most O(N) additional steps to +// the N * log(N) taken to sort the indices. +#define SIMULTANEOUS_SORT_IMPL \ + if (boost::algorithm::is_sorted(keys.begin(), keys.end(), comp)) { \ + return true; \ + } \ + using TSizeVec = std::vector; \ + TSizeVec ordering; \ + ordering.reserve(keys.size()); \ + for (std::size_t i = 0u; i < keys.size(); ++i) { \ + ordering.push_back(i); \ + } \ + std::stable_sort(ordering.begin(), ordering.end(), CIndexLess(keys, comp)); \ + for (std::size_t i = 0u; i < ordering.size(); ++i) { \ + std::size_t j_ = i; \ + std::size_t j = ordering[j_]; \ + while (i != j) { \ + using std::swap; \ + swap(keys[j_], keys[j]); \ + CUSTOM_SWAP_VALUES \ + ordering[j_] = j_; \ + j_ = j; \ + j = ordering[j_]; \ + } \ + ordering[j_] = j_; \ + } \ + return true; #define CUSTOM_SWAP_VALUES swap(values[j_], values[j]); - //! Simultaneously sort \p keys and \p values using the \p comp - //! order of \p keys. - template - static bool simultaneousSort(KEY_VECTOR &keys, - VALUE_VECTOR &values, - const COMP &comp) - { - if (keys.size() != values.size()) - { - return false; - } - SIMULTANEOUS_SORT_IMPL + //! Simultaneously sort \p keys and \p values using the \p comp + //! order of \p keys. + template + static bool simultaneousSort(KEY_VECTOR& keys, VALUE_VECTOR& values, const COMP& comp) { + if (keys.size() != values.size()) { + return false; } + SIMULTANEOUS_SORT_IMPL + } #undef CUSTOM_SWAP_VALUES - //! Overload for default operator< comparison. - template - static bool simultaneousSort(KEY_VECTOR &keys, - VALUE_VECTOR &values) - { - return simultaneousSort(keys, values, - std::less()); - } - //! Overload for default operator< comparison. - template - static bool simultaneousSort(core::CVectorRange &keys, - core::CVectorRange &values) - { - return simultaneousSort(keys, values, - std::less()); - } - -#define CUSTOM_SWAP_VALUES swap(values1[j_], values1[j]); \ - swap(values2[j_], values2[j]); - //! Simultaneously sort \p keys, \p values1 and \p values2 - //! using the \p comp order of \p keys. - template - static bool simultaneousSort(KEY_VECTOR &keys, - VALUE1_VECTOR &values1, - VALUE2_VECTOR &values2, - const COMP &comp) - { - if ( keys.size() != values1.size() - || values1.size() != values2.size()) - { - return false; - } - SIMULTANEOUS_SORT_IMPL + //! Overload for default operator< comparison. + template + static bool simultaneousSort(KEY_VECTOR& keys, VALUE_VECTOR& values) { + return simultaneousSort(keys, values, std::less()); + } + //! Overload for default operator< comparison. + template + static bool simultaneousSort(core::CVectorRange& keys, core::CVectorRange& values) { + return simultaneousSort(keys, values, std::less()); + } + +#define CUSTOM_SWAP_VALUES \ + swap(values1[j_], values1[j]); \ + swap(values2[j_], values2[j]); + //! Simultaneously sort \p keys, \p values1 and \p values2 + //! using the \p comp order of \p keys. + template + static bool simultaneousSort(KEY_VECTOR& keys, VALUE1_VECTOR& values1, VALUE2_VECTOR& values2, const COMP& comp) { + if (keys.size() != values1.size() || values1.size() != values2.size()) { + return false; } + SIMULTANEOUS_SORT_IMPL + } #undef CUSTOM_SWAP_VALUES - //! Overload for default operator< comparison. - template - static bool simultaneousSort(KEY_VECTOR &keys, - VALUE1_VECTOR &values1, - VALUE2_VECTOR &values2) - { - return simultaneousSort(keys, values1, values2, - std::less()); - } - //! Overload for default operator< comparison. - template - static bool simultaneousSort(core::CVectorRange keys, - core::CVectorRange values1, - core::CVectorRange values2) - { - return simultaneousSort(keys, values1, values2, - std::less()); - } - -#define CUSTOM_SWAP_VALUES swap(values1[j_], values1[j]); \ - swap(values2[j_], values2[j]); \ - swap(values3[j_], values3[j]); - //! Simultaneously sort \p keys, \p values1, \p values2 - //! and \p values3 using the \p comp order of \p keys. - template - static bool simultaneousSort(KEY_VECTOR &keys, - VALUE1_VECTOR &values1, - VALUE2_VECTOR &values2, - VALUE3_VECTOR &values3, - const COMP &comp) - { - if ( keys.size() != values1.size() - || values1.size() != values2.size() - || values2.size() != values3.size()) - { - return false; - } - SIMULTANEOUS_SORT_IMPL + //! Overload for default operator< comparison. + template + static bool simultaneousSort(KEY_VECTOR& keys, VALUE1_VECTOR& values1, VALUE2_VECTOR& values2) { + return simultaneousSort(keys, values1, values2, std::less()); + } + //! Overload for default operator< comparison. + template + static bool simultaneousSort(core::CVectorRange keys, + core::CVectorRange values1, + core::CVectorRange values2) { + return simultaneousSort(keys, values1, values2, std::less()); + } + +#define CUSTOM_SWAP_VALUES \ + swap(values1[j_], values1[j]); \ + swap(values2[j_], values2[j]); \ + swap(values3[j_], values3[j]); + //! Simultaneously sort \p keys, \p values1, \p values2 + //! and \p values3 using the \p comp order of \p keys. + template + static bool + simultaneousSort(KEY_VECTOR& keys, VALUE1_VECTOR& values1, VALUE2_VECTOR& values2, VALUE3_VECTOR& values3, const COMP& comp) { + if (keys.size() != values1.size() || values1.size() != values2.size() || values2.size() != values3.size()) { + return false; } + SIMULTANEOUS_SORT_IMPL + } #undef CUSTOM_SWAP_VALUES - //! Overload for default operator< comparison. - template - static bool simultaneousSort(KEY_VECTOR &keys, - VALUE1_VECTOR &values1, - VALUE2_VECTOR &values2, - VALUE3_VECTOR &values3) - { - return simultaneousSort(keys, values1, values2, values3, - std::less()); - } - //! Overload for default operator< comparison. - template - static bool simultaneousSort(core::CVectorRange keys, - core::CVectorRange values1, - core::CVectorRange values2, - core::CVectorRange values3) - { - return simultaneousSort(keys, values1, values2, values3, - std::less()); - } - -#define CUSTOM_SWAP_VALUES swap(values1[j_], values1[j]); \ - swap(values2[j_], values2[j]); \ - swap(values3[j_], values3[j]); \ - swap(values4[j_], values4[j]); - //! Simultaneously sort \p keys, \p values1, \p values2, - //! \p values3 and \p values4 using the \p comp order of - //! \p keys. - template - static bool simultaneousSort(KEY_VECTOR &keys, - VALUE1_VECTOR &values1, - VALUE2_VECTOR &values2, - VALUE3_VECTOR &values3, - VALUE4_VECTOR &values4, - const COMP &comp) - { - if ( keys.size() != values1.size() - || values1.size() != values2.size() - || values2.size() != values3.size() - || values3.size() != values4.size()) - { - return false; - } - SIMULTANEOUS_SORT_IMPL + //! Overload for default operator< comparison. + template + static bool simultaneousSort(KEY_VECTOR& keys, VALUE1_VECTOR& values1, VALUE2_VECTOR& values2, VALUE3_VECTOR& values3) { + return simultaneousSort(keys, values1, values2, values3, std::less()); + } + //! Overload for default operator< comparison. + template + static bool simultaneousSort(core::CVectorRange keys, + core::CVectorRange values1, + core::CVectorRange values2, + core::CVectorRange values3) { + return simultaneousSort(keys, values1, values2, values3, std::less()); + } + +#define CUSTOM_SWAP_VALUES \ + swap(values1[j_], values1[j]); \ + swap(values2[j_], values2[j]); \ + swap(values3[j_], values3[j]); \ + swap(values4[j_], values4[j]); + //! Simultaneously sort \p keys, \p values1, \p values2, + //! \p values3 and \p values4 using the \p comp order of + //! \p keys. + template + static bool simultaneousSort(KEY_VECTOR& keys, + VALUE1_VECTOR& values1, + VALUE2_VECTOR& values2, + VALUE3_VECTOR& values3, + VALUE4_VECTOR& values4, + const COMP& comp) { + if (keys.size() != values1.size() || values1.size() != values2.size() || values2.size() != values3.size() || + values3.size() != values4.size()) { + return false; } + SIMULTANEOUS_SORT_IMPL + } #undef CUSTOM_SWAP_VALUES - //! Overload for default operator< comparison. - template - static bool simultaneousSort(KEY_VECTOR &keys, - VALUE1_VECTOR &values1, - VALUE2_VECTOR &values2, - VALUE3_VECTOR &values3, - VALUE4_VECTOR &values4) - { - return simultaneousSort(keys, values1, values2, values3, values4, - std::less()); - } - //! Overload for default operator< comparison. - template - static bool simultaneousSort(core::CVectorRange keys, - core::CVectorRange values1, - core::CVectorRange values2, - core::CVectorRange values3, - core::CVectorRange values4) - { - return simultaneousSort(keys, values1, values2, values3, values4, - std::less()); - } + //! Overload for default operator< comparison. + template + static bool + simultaneousSort(KEY_VECTOR& keys, VALUE1_VECTOR& values1, VALUE2_VECTOR& values2, VALUE3_VECTOR& values3, VALUE4_VECTOR& values4) { + return simultaneousSort(keys, values1, values2, values3, values4, std::less()); + } + //! Overload for default operator< comparison. + template + static bool simultaneousSort(core::CVectorRange keys, + core::CVectorRange values1, + core::CVectorRange values2, + core::CVectorRange values3, + core::CVectorRange values4) { + return simultaneousSort(keys, values1, values2, values3, values4, std::less()); + } #undef SIMULTANEOUS_SORT_IMPL - //@} + //@} }; - } } diff --git a/include/maths/COrdinal.h b/include/maths/COrdinal.h index a0560ad910..9fd9bfc770 100644 --- a/include/maths/COrdinal.h +++ b/include/maths/COrdinal.h @@ -14,10 +14,8 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { //! \brief A representation of an ordinal type. //! @@ -25,62 +23,61 @@ namespace maths //! This deals with floating point and integer values and works //! around the loss of precision converting 64 bit integers to //! doubles. -class MATHS_EXPORT COrdinal : private boost::equality_comparable< COrdinal, - boost::partially_ordered< COrdinal > > -{ - public: - //! Create an unset value. - COrdinal(); - COrdinal(int64_t value); - COrdinal(uint64_t value); - COrdinal(double value); - - //! Check if two ordinals are equal. - bool operator==(COrdinal rhs) const; - - //! Check if one ordinal is less than another. - bool operator<(COrdinal rhs) const; - - //! Check if the value has been set. - bool isNan() const; - - //! Convert to a double (accepting possible loss in precision). - double asDouble() const; - - //! Get a hash of the value. - uint64_t hash(); - - private: - //! Enumeration of the types which can be stored. - enum EType - { - E_Integer, - E_PositiveInteger, - E_Real, - E_Nan // Semantics are same as Nan. - }; - - union Value { int64_t integer; uint64_t positiveInteger; double real; }; - - private: - bool equal(int64_t lhs, uint64_t rhs) const; - bool equal(int64_t lhs, double rhs) const; - bool equal(uint64_t lhs, double rhs) const; - bool less(int64_t lhs, uint64_t rhs) const; - bool less(int64_t lhs, double rhs) const; - bool less(uint64_t lhs, double rhs) const; - - private: - //! The type of value stored. - EType m_Type; - //! The value. - Value m_Value; - -MATHS_EXPORT -friend std::ostream &operator<<(std::ostream &o, COrdinal ord); +class MATHS_EXPORT COrdinal : private boost::equality_comparable> { +public: + //! Create an unset value. + COrdinal(); + COrdinal(int64_t value); + COrdinal(uint64_t value); + COrdinal(double value); + + //! Check if two ordinals are equal. + bool operator==(COrdinal rhs) const; + + //! Check if one ordinal is less than another. + bool operator<(COrdinal rhs) const; + + //! Check if the value has been set. + bool isNan() const; + + //! Convert to a double (accepting possible loss in precision). + double asDouble() const; + + //! Get a hash of the value. + uint64_t hash(); + +private: + //! Enumeration of the types which can be stored. + enum EType { + E_Integer, + E_PositiveInteger, + E_Real, + E_Nan // Semantics are same as Nan. + }; + + union Value { + int64_t integer; + uint64_t positiveInteger; + double real; + }; + +private: + bool equal(int64_t lhs, uint64_t rhs) const; + bool equal(int64_t lhs, double rhs) const; + bool equal(uint64_t lhs, double rhs) const; + bool less(int64_t lhs, uint64_t rhs) const; + bool less(int64_t lhs, double rhs) const; + bool less(uint64_t lhs, double rhs) const; + +private: + //! The type of value stored. + EType m_Type; + //! The value. + Value m_Value; + + MATHS_EXPORT + friend std::ostream& operator<<(std::ostream& o, COrdinal ord); }; - - } } diff --git a/include/maths/CPRNG.h b/include/maths/CPRNG.h index 551395ab30..9e69e97e39 100644 --- a/include/maths/CPRNG.h +++ b/include/maths/CPRNG.h @@ -16,11 +16,8 @@ #include - -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { //! \brief A collection of pseudo random number generators. //! @@ -35,270 +32,246 @@ namespace maths //! The generators all implement the contract for a boost pseudo- //! random number generator, so they can be used freely with the //! boost::random library. -class MATHS_EXPORT CPRNG : private core::CNonInstantiatable -{ - private: - //! Fills [\p begin, \p end) with consecutive random numbers - //! generated by \p rng. - template - static void generate(PRNG &rng, ITR begin, ITR end) - { - for (/**/; begin != end; ++begin) - { - *begin = rng(); - } +class MATHS_EXPORT CPRNG : private core::CNonInstantiatable { +private: + //! Fills [\p begin, \p end) with consecutive random numbers + //! generated by \p rng. + template + static void generate(PRNG& rng, ITR begin, ITR end) { + for (/**/; begin != end; ++begin) { + *begin = rng(); } + } + +public: + //! \brief The split mix pseudo-random number generator. + //! + //! DESCRIPTION:\n + //! A \f$2^{64}\f$ period pseudo-random number generator based + //! on Java's splittable random number generator. + class MATHS_EXPORT CSplitMix64 { + public: + using result_type = uint64_t; public: - //! \brief The split mix pseudo-random number generator. - //! - //! DESCRIPTION:\n - //! A \f$2^{64}\f$ period pseudo-random number generator based - //! on Java's splittable random number generator. - class MATHS_EXPORT CSplitMix64 - { - public: - using result_type = uint64_t; - - public: - CSplitMix64(); - CSplitMix64(uint64_t seed); - - //! Compare for equality. - bool operator==(CSplitMix64 other) const; - //! Not equal. - bool operator!=(CSplitMix64 other) const { return !this->operator==(other); } - - void seed(); - void seed(uint64_t seed); - - //! The minimum value returnable by operator(). - static uint64_t min(); - //! The maximum value returnable by operator(). - static uint64_t max(); - - //! Generate the next random number. - uint64_t operator()(); - - //! Fill the sequence [\p begin, \p end) with the next - //! \p end - \p begin random numbers. - template - void generate(ITR begin, ITR end) - { - CPRNG::generate(*this, begin, end); - } - - //! Discard the next \p n random numbers. - void discard(uint64_t n); - - //! Persist to a string. - std::string toString() const; - //! Restore from a string. - bool fromString(const std::string &state); - - private: - static const uint64_t A; - static const uint64_t B; - static const uint64_t C; - - private: - //! The state. - uint64_t m_X; - }; - - //! \brief The xoroshiro128+ pseudo-random number generator. - //! - //! DESCRIPTION:\n - //! A v.fast \f$2^{128}-1\f$ period pseudo-random number - //! generator with v.good empirical statistical properties. + CSplitMix64(); + CSplitMix64(uint64_t seed); + + //! Compare for equality. + bool operator==(CSplitMix64 other) const; + //! Not equal. + bool operator!=(CSplitMix64 other) const { return !this->operator==(other); } + + void seed(); + void seed(uint64_t seed); + + //! The minimum value returnable by operator(). + static uint64_t min(); + //! The maximum value returnable by operator(). + static uint64_t max(); + + //! Generate the next random number. + uint64_t operator()(); + + //! Fill the sequence [\p begin, \p end) with the next + //! \p end - \p begin random numbers. + template + void generate(ITR begin, ITR end) { + CPRNG::generate(*this, begin, end); + } + + //! Discard the next \p n random numbers. + void discard(uint64_t n); + + //! Persist to a string. + std::string toString() const; + //! Restore from a string. + bool fromString(const std::string& state); + + private: + static const uint64_t A; + static const uint64_t B; + static const uint64_t C; + + private: + //! The state. + uint64_t m_X; + }; + + //! \brief The xoroshiro128+ pseudo-random number generator. + //! + //! DESCRIPTION:\n + //! A v.fast \f$2^{128}-1\f$ period pseudo-random number + //! generator with v.good empirical statistical properties. + //! + //! The lowest bit is an LFSR so use a sign test to extract + //! a random Boolean value. + class MATHS_EXPORT CXorOShiro128Plus { + public: + using result_type = uint64_t; + + public: + CXorOShiro128Plus(); + CXorOShiro128Plus(uint64_t seed); + template + CXorOShiro128Plus(ITR begin, ITR end) { + this->seed(begin, end); + } + + //! Compare for equality. + bool operator==(const CXorOShiro128Plus& other) const; + //! Not equal. + bool operator!=(const CXorOShiro128Plus& other) const { return !this->operator==(other); } + + //! Set to the default seeded generator. //! - //! The lowest bit is an LFSR so use a sign test to extract - //! a random Boolean value. - class MATHS_EXPORT CXorOShiro128Plus - { - public: - using result_type = uint64_t; - - public: - CXorOShiro128Plus(); - CXorOShiro128Plus(uint64_t seed); - template - CXorOShiro128Plus(ITR begin, ITR end) - { - this->seed(begin, end); - } - - //! Compare for equality. - bool operator==(const CXorOShiro128Plus &other) const; - //! Not equal. - bool operator!=(const CXorOShiro128Plus &other) const - { - return !this->operator==(other); - } - - //! Set to the default seeded generator. - //! - //! As per recommendations we use CSplitMix64 for seeding. - void seed(); - //! Set to a seeded generator. - //! - //! As per recommendations we use CSplitMix64 for seeding. - void seed(uint64_t seed); - //! Seed from [\p begin, \p end) which should have two 64 bit - //! seeds. - template - void seed(ITR begin, ITR end) - { - std::size_t i = 0u; - for (/**/; i < 2 && begin != end; ++i, ++begin) - { - m_X[i] = *begin; - } - if (i < 2) - { - CSplitMix64 seeds; - seeds.generate(&m_X[i], &m_X[2]); - } - } - - //! The minimum value returnable by operator(). - static uint64_t min(); - //! The maximum value returnable by operator(). - static uint64_t max(); - - //! Generate the next random number. - uint64_t operator()(); - - //! Fill the sequence [\p begin, \p end) with the next - //! \p end - \p begin random numbers. - template - void generate(ITR begin, ITR end) - { - CPRNG::generate(*this, begin, end); - } - - //! Discard the next \p n random numbers. - void discard(uint64_t n); - - //! This is equivalent to \f$2^{64}\f$ calls to next(); - //! it can be used to generate \f$2^{64}\f$ non-overlapping - //! subsequences of length \f$2^{64}\f$ for parallel - //! computations. - void jump(); - - //! Persist to a string. - std::string toString() const; - //! Restore from a string. - bool fromString(const std::string &state); - - private: - static const uint64_t JUMP[2]; - - private: - //! The state. - uint64_t m_X[2]; - }; - - //! \brief The xorshift1024* pseudo-random number generator. + //! As per recommendations we use CSplitMix64 for seeding. + void seed(); + //! Set to a seeded generator. //! - //! DESCRIPTION:\n - //! A \f$2^{1024}-1\f$ period pseudo-random number generator - //! with v.good empirical statistical properties. + //! As per recommendations we use CSplitMix64 for seeding. + void seed(uint64_t seed); + //! Seed from [\p begin, \p end) which should have two 64 bit + //! seeds. + template + void seed(ITR begin, ITR end) { + std::size_t i = 0u; + for (/**/; i < 2 && begin != end; ++i, ++begin) { + m_X[i] = *begin; + } + if (i < 2) { + CSplitMix64 seeds; + seeds.generate(&m_X[i], &m_X[2]); + } + } + + //! The minimum value returnable by operator(). + static uint64_t min(); + //! The maximum value returnable by operator(). + static uint64_t max(); + + //! Generate the next random number. + uint64_t operator()(); + + //! Fill the sequence [\p begin, \p end) with the next + //! \p end - \p begin random numbers. + template + void generate(ITR begin, ITR end) { + CPRNG::generate(*this, begin, end); + } + + //! Discard the next \p n random numbers. + void discard(uint64_t n); + + //! This is equivalent to \f$2^{64}\f$ calls to next(); + //! it can be used to generate \f$2^{64}\f$ non-overlapping + //! subsequences of length \f$2^{64}\f$ for parallel + //! computations. + void jump(); + + //! Persist to a string. + std::string toString() const; + //! Restore from a string. + bool fromString(const std::string& state); + + private: + static const uint64_t JUMP[2]; + + private: + //! The state. + uint64_t m_X[2]; + }; + + //! \brief The xorshift1024* pseudo-random number generator. + //! + //! DESCRIPTION:\n + //! A \f$2^{1024}-1\f$ period pseudo-random number generator + //! with v.good empirical statistical properties. + //! + //! Note that the three lowest bits of this generator are LSFRs, + //! and thus they are slightly less random than the other bits. + //! Use a sign test to extract a random Boolean value. + //! + //! \sa https://en.wikipedia.org/wiki/Xorshift#cite_note-vigna2-9. + class MATHS_EXPORT CXorShift1024Mult { + public: + using result_type = uint64_t; + + public: + CXorShift1024Mult(); + CXorShift1024Mult(uint64_t seed); + template + CXorShift1024Mult(ITR begin, ITR end) : m_P(0) { + this->seed(begin, end); + } + + //! Compare for equality. + bool operator==(const CXorShift1024Mult& other) const; + //! Not equal. + bool operator!=(const CXorShift1024Mult& other) const { return !this->operator==(other); } + + //! Set to the default seeded generator. //! - //! Note that the three lowest bits of this generator are LSFRs, - //! and thus they are slightly less random than the other bits. - //! Use a sign test to extract a random Boolean value. + //! As per recommendations we use CSplitMix64 for seeding. + void seed(); + //! Set to a seeded generator. //! - //! \sa https://en.wikipedia.org/wiki/Xorshift#cite_note-vigna2-9. - class MATHS_EXPORT CXorShift1024Mult - { - public: - using result_type = uint64_t; - - public: - CXorShift1024Mult(); - CXorShift1024Mult(uint64_t seed); - template - CXorShift1024Mult(ITR begin, ITR end) : m_P(0) - { - this->seed(begin, end); - } - - //! Compare for equality. - bool operator==(const CXorShift1024Mult &other) const; - //! Not equal. - bool operator!=(const CXorShift1024Mult &other) const - { - return !this->operator==(other); - } - - //! Set to the default seeded generator. - //! - //! As per recommendations we use CSplitMix64 for seeding. - void seed(); - //! Set to a seeded generator. - //! - //! As per recommendations we use CSplitMix64 for seeding. - void seed(uint64_t seed); - //! Seed from [\p begin, \p end) which should have sixteen - //! 64 bit seeds. - template - void seed(ITR begin, ITR end) - { - std::size_t i = 0u; - for (/**/; i < 16 && begin != end; ++i, ++begin) - { - m_X[i] = *begin; - } - if (i < 16) - { - CSplitMix64 seeds; - seeds.generate(&m_X[i], &m_X[16]); - } - } - - //! The minimum value returnable by operator(). - static uint64_t min(); - //! The maximum value returnable by operator(). - static uint64_t max(); - - //! Generate the next random number. - uint64_t operator()(); - - //! Fill the sequence [\p begin, \p end) with the next - //! \p end - \p begin random numbers. - template - void generate(ITR begin, ITR end) - { - CPRNG::generate(*this, begin, end); - } - - //! Discard the next \p n random numbers. - void discard(uint64_t n); - - //! This is equivalent to \f$2^{512}\f$ calls to next(); - //! it can be used to generate \f$2^{512}\f$ non-overlapping - //! subsequences of length \f$2^{512}\f$ for parallel - //! computations. - void jump(); - - //! Persist to a string. - std::string toString() const; - //! Restore from a string. - bool fromString(std::string state); - - private: - static const uint64_t A; - static const uint64_t JUMP[16]; - - private: - //! The state. - uint64_t m_X[16]; - //! The current pair. - int m_P; - }; -}; + //! As per recommendations we use CSplitMix64 for seeding. + void seed(uint64_t seed); + //! Seed from [\p begin, \p end) which should have sixteen + //! 64 bit seeds. + template + void seed(ITR begin, ITR end) { + std::size_t i = 0u; + for (/**/; i < 16 && begin != end; ++i, ++begin) { + m_X[i] = *begin; + } + if (i < 16) { + CSplitMix64 seeds; + seeds.generate(&m_X[i], &m_X[16]); + } + } + + //! The minimum value returnable by operator(). + static uint64_t min(); + //! The maximum value returnable by operator(). + static uint64_t max(); + + //! Generate the next random number. + uint64_t operator()(); + + //! Fill the sequence [\p begin, \p end) with the next + //! \p end - \p begin random numbers. + template + void generate(ITR begin, ITR end) { + CPRNG::generate(*this, begin, end); + } + + //! Discard the next \p n random numbers. + void discard(uint64_t n); + //! This is equivalent to \f$2^{512}\f$ calls to next(); + //! it can be used to generate \f$2^{512}\f$ non-overlapping + //! subsequences of length \f$2^{512}\f$ for parallel + //! computations. + void jump(); + + //! Persist to a string. + std::string toString() const; + //! Restore from a string. + bool fromString(std::string state); + + private: + static const uint64_t A; + static const uint64_t JUMP[16]; + + private: + //! The state. + uint64_t m_X[16]; + //! The current pair. + int m_P; + }; +}; } } diff --git a/include/maths/CPackedBitVector.h b/include/maths/CPackedBitVector.h index 07c356ace6..17b9765169 100644 --- a/include/maths/CPackedBitVector.h +++ b/include/maths/CPackedBitVector.h @@ -22,11 +22,8 @@ #include - -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { //! \brief A compact representation of binary vector. //! @@ -46,116 +43,101 @@ namespace maths //! the first bit in the vector and can deduce all other values by the //! number of runs in between. In practice we store one extra bit, the //! vector parity to allow us to extend the vector efficiently. -class MATHS_EXPORT CPackedBitVector : private boost::equality_comparable< CPackedBitVector, - boost::partially_ordered< CPackedBitVector > > -{ - public: - using TBoolVec = std::vector; - - //! Operations which can be performed in the inner product. - enum EOperation - { - E_AND, - E_OR, - E_XOR - }; - - public: - CPackedBitVector(); - explicit CPackedBitVector(bool bit); - CPackedBitVector(std::size_t dimension, bool bit); - CPackedBitVector(const TBoolVec &bits); - - //! Contract the vector by popping a component from the start. - void contract(); - - //! Extend the vector to dimension adding the component \p bit. - void extend(bool bit); - - //! \name Persistence - //@{ - //! Create from delimited values. - bool fromDelimited(const std::string &str); - - //! Persist state to delimited values. - std::string toDelimited() const; - //@} - - //! Get the dimension. - std::size_t dimension() const; - - //! Get the i'th component (no bounds checking). - bool operator()(std::size_t i) const; - - //! Check if two vectors are identically equal. - bool operator==(const CPackedBitVector &other) const; - - //! Lexicographical total ordering. - bool operator<(const CPackedBitVector &rhs) const; - - //! Get the complement vector, i.e. the vector whose bits are negated. - CPackedBitVector complement() const; - - //! Inner product. - double inner(const CPackedBitVector &covector, - EOperation op = E_AND) const; - - //! Euclidean norm. - double euclidean() const - { - return std::sqrt(this->inner(*this)); - } - - //! Manhattan norm. - double manhattan() const - { - return this->inner(*this); - } - - //! Convert to a bit vector. - TBoolVec toBitVector() const; - - //! Get a checksum of this vector's components. - uint64_t checksum() const; - - //! Debug the memory used by this object. - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - - //! Get the memory used by this object. - std::size_t memoryUsage() const; - - private: - using TUInt8Vec = std::vector; - - private: - //! The maximum permitted run length. Longer runs are encoded - //! by stringing together a number of maximum length runs. - static const uint8_t MAX_RUN_LENGTH; - - private: - // Note that the bools are 1 byte aligned so the following - // three variables will be packed into the 64 bits. - - //! The dimension of the vector. - uint32_t m_Dimension; - - //! The value of the first component in the vector. - bool m_First; - - //! The parity of the vector: true indicates that there are an - //! even number runs and false that there are an odd. Together - //! with m_First this determines the value of the last component. - bool m_Parity; - - //! The length of each run. Note that if the length of a run - //! exceeds 255 then this is encoded in multiple run lengths. - TUInt8Vec m_RunLengths; +class MATHS_EXPORT CPackedBitVector : private boost::equality_comparable> { +public: + using TBoolVec = std::vector; + + //! Operations which can be performed in the inner product. + enum EOperation { E_AND, E_OR, E_XOR }; + +public: + CPackedBitVector(); + explicit CPackedBitVector(bool bit); + CPackedBitVector(std::size_t dimension, bool bit); + CPackedBitVector(const TBoolVec& bits); + + //! Contract the vector by popping a component from the start. + void contract(); + + //! Extend the vector to dimension adding the component \p bit. + void extend(bool bit); + + //! \name Persistence + //@{ + //! Create from delimited values. + bool fromDelimited(const std::string& str); + + //! Persist state to delimited values. + std::string toDelimited() const; + //@} + + //! Get the dimension. + std::size_t dimension() const; + + //! Get the i'th component (no bounds checking). + bool operator()(std::size_t i) const; + + //! Check if two vectors are identically equal. + bool operator==(const CPackedBitVector& other) const; + + //! Lexicographical total ordering. + bool operator<(const CPackedBitVector& rhs) const; + + //! Get the complement vector, i.e. the vector whose bits are negated. + CPackedBitVector complement() const; + + //! Inner product. + double inner(const CPackedBitVector& covector, EOperation op = E_AND) const; + + //! Euclidean norm. + double euclidean() const { return std::sqrt(this->inner(*this)); } + + //! Manhattan norm. + double manhattan() const { return this->inner(*this); } + + //! Convert to a bit vector. + TBoolVec toBitVector() const; + + //! Get a checksum of this vector's components. + uint64_t checksum() const; + + //! Debug the memory used by this object. + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + + //! Get the memory used by this object. + std::size_t memoryUsage() const; + +private: + using TUInt8Vec = std::vector; + +private: + //! The maximum permitted run length. Longer runs are encoded + //! by stringing together a number of maximum length runs. + static const uint8_t MAX_RUN_LENGTH; + +private: + // Note that the bools are 1 byte aligned so the following + // three variables will be packed into the 64 bits. + + //! The dimension of the vector. + uint32_t m_Dimension; + + //! The value of the first component in the vector. + bool m_First; + + //! The parity of the vector: true indicates that there are an + //! even number runs and false that there are an odd. Together + //! with m_First this determines the value of the last component. + bool m_Parity; + + //! The length of each run. Note that if the length of a run + //! exceeds 255 then this is encoded in multiple run lengths. + TUInt8Vec m_RunLengths; }; //! Output for debug. MATHS_EXPORT -std::ostream &operator<<(std::ostream &o, const CPackedBitVector &v); - +std::ostream& operator<<(std::ostream& o, const CPackedBitVector& v); } } diff --git a/include/maths/CPeriodicityHypothesisTests.h b/include/maths/CPeriodicityHypothesisTests.h index 720eede515..f03f165b65 100644 --- a/include/maths/CPeriodicityHypothesisTests.h +++ b/include/maths/CPeriodicityHypothesisTests.h @@ -7,9 +7,9 @@ #ifndef INCLUDED_ml_maths_CPeriodicityHypothesisTests_h #define INCLUDED_ml_maths_CPeriodicityHypothesisTests_h -#include #include #include +#include #include #include @@ -21,131 +21,126 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { class CSeasonalTime; //! \brief Represents the result of running the periodicity //! hypothesis tests. -class MATHS_EXPORT CPeriodicityHypothesisTestsResult : boost::equality_comparable > -{ - public: - using TTimeTimePr = std::pair; - - public: - //! \brief Component data. - struct MATHS_EXPORT SComponent - { - SComponent(); - SComponent(const std::string &description, - bool diurnal, - core_t::TTime startOfPartition, - core_t::TTime period, - const TTimeTimePr &window, - double precedence = 1.0); - - //! Check if this is equal to \p other. - bool operator==(const SComponent &other) const; - - //! Get a seasonal time for the specified results. - //! - //! \warning The caller owns the returned object. - CSeasonalTime *seasonalTime() const; - - //! An identifier for the component used by the test. - std::string s_Description; - //! True if this is a diurnal component false otherwise. - bool s_Diurnal; - //! The start of the partition. - core_t::TTime s_StartOfPartition; - //! The period of the component. - core_t::TTime s_Period; - //! The component window. - TTimeTimePr s_Window; - //! The precedence to apply to this component when - //! deciding which to keep. - double s_Precedence; - }; - - using TComponent5Vec = core::CSmallVector; +class MATHS_EXPORT CPeriodicityHypothesisTestsResult + : boost::equality_comparable> { +public: + using TTimeTimePr = std::pair; + +public: + //! \brief Component data. + struct MATHS_EXPORT SComponent { + SComponent(); + SComponent(const std::string& description, + bool diurnal, + core_t::TTime startOfPartition, + core_t::TTime period, + const TTimeTimePr& window, + double precedence = 1.0); - public: //! Check if this is equal to \p other. - bool operator==(const CPeriodicityHypothesisTestsResult &other) const; + bool operator==(const SComponent& other) const; - //! Sets to the union of the periodic components present. + //! Get a seasonal time for the specified results. //! - //! \warning This only makes sense if the this and the - //! other result share the start of the partition time. - const CPeriodicityHypothesisTestsResult &operator+=(const CPeriodicityHypothesisTestsResult &other); - - //! Add a component. - void add(const std::string &description, - bool diurnal, - core_t::TTime startOfWeek, - core_t::TTime period, - const TTimeTimePr &window, - double precedence = 1.0); - - //! Remove the component with \p description. - void remove(const std::string &description); - - //! Check if there are any periodic components. - bool periodic() const; - - //! Get the binary representation of the periodic components. - const TComponent5Vec &components() const; - - //! Get a human readable description of the result. - std::string print() const; - - private: - //! The periodic components. - TComponent5Vec m_Components; + //! \warning The caller owns the returned object. + CSeasonalTime* seasonalTime() const; + + //! An identifier for the component used by the test. + std::string s_Description; + //! True if this is a diurnal component false otherwise. + bool s_Diurnal; + //! The start of the partition. + core_t::TTime s_StartOfPartition; + //! The period of the component. + core_t::TTime s_Period; + //! The component window. + TTimeTimePr s_Window; + //! The precedence to apply to this component when + //! deciding which to keep. + double s_Precedence; + }; + + using TComponent5Vec = core::CSmallVector; + +public: + //! Check if this is equal to \p other. + bool operator==(const CPeriodicityHypothesisTestsResult& other) const; + + //! Sets to the union of the periodic components present. + //! + //! \warning This only makes sense if the this and the + //! other result share the start of the partition time. + const CPeriodicityHypothesisTestsResult& operator+=(const CPeriodicityHypothesisTestsResult& other); + + //! Add a component. + void add(const std::string& description, + bool diurnal, + core_t::TTime startOfWeek, + core_t::TTime period, + const TTimeTimePr& window, + double precedence = 1.0); + + //! Remove the component with \p description. + void remove(const std::string& description); + + //! Check if there are any periodic components. + bool periodic() const; + + //! Get the binary representation of the periodic components. + const TComponent5Vec& components() const; + + //! Get a human readable description of the result. + std::string print() const; + +private: + //! The periodic components. + TComponent5Vec m_Components; }; //! \brief Configures the periodicity testing. -class MATHS_EXPORT CPeriodicityHypothesisTestsConfig -{ - public: - CPeriodicityHypothesisTestsConfig(); - - //! Disable diurnal periodicity tests. - void disableDiurnal(); - //! Test given we know there is daily periodic component. - void hasDaily(bool value); - //! Test given we know there is a weekend. - void hasWeekend(bool value); - //! Test given we know there is a weekly periodic component. - void hasWeekly(bool value); - //! Set the start of the week. - void startOfWeek(core_t::TTime value); - - //! Check if we should test for diurnal periodic components. - bool testForDiurnal() const; - //! Check if we know there is a daily component. - bool hasDaily() const; - //! Check if we know there is a weekend. - bool hasWeekend() const; - //! Check if we know there is a weekly component. - bool hasWeekly() const; - //! Get the start of the week. - core_t::TTime startOfWeek() const; - - private: - //! True if we should test for diurnal periodicity. - bool m_TestForDiurnal; - //! True if we know there is a daily component. - bool m_HasDaily; - //! True if we know there is a weekend. - bool m_HasWeekend; - //! True if we know there is a weekly component. - bool m_HasWeekly; - //! The start of the week. - core_t::TTime m_StartOfWeek; +class MATHS_EXPORT CPeriodicityHypothesisTestsConfig { +public: + CPeriodicityHypothesisTestsConfig(); + + //! Disable diurnal periodicity tests. + void disableDiurnal(); + //! Test given we know there is daily periodic component. + void hasDaily(bool value); + //! Test given we know there is a weekend. + void hasWeekend(bool value); + //! Test given we know there is a weekly periodic component. + void hasWeekly(bool value); + //! Set the start of the week. + void startOfWeek(core_t::TTime value); + + //! Check if we should test for diurnal periodic components. + bool testForDiurnal() const; + //! Check if we know there is a daily component. + bool hasDaily() const; + //! Check if we know there is a weekend. + bool hasWeekend() const; + //! Check if we know there is a weekly component. + bool hasWeekly() const; + //! Get the start of the week. + core_t::TTime startOfWeek() const; + +private: + //! True if we should test for diurnal periodicity. + bool m_TestForDiurnal; + //! True if we know there is a daily component. + bool m_HasDaily; + //! True if we know there is a weekend. + bool m_HasWeekend; + //! True if we know there is a weekly component. + bool m_HasWeekly; + //! The start of the week. + core_t::TTime m_StartOfWeek; }; //! \brief Implements a set of hypothesis tests to discover the @@ -160,264 +155,235 @@ class MATHS_EXPORT CPeriodicityHypothesisTestsConfig //! of the amplitude. It also compares these possibilities with a //! specified period (typically found by examining the cyclic //! autocorrelation). -class MATHS_EXPORT CPeriodicityHypothesisTests -{ +class MATHS_EXPORT CPeriodicityHypothesisTests { +public: + using TDouble2Vec = core::CSmallVector; + using TTimeTimePr = std::pair; + using TTimeTimePr2Vec = core::CSmallVector; + using TMeanVarAccumulator = CBasicStatistics::SSampleMeanVar::TAccumulator; + using TMeanVarAccumulatorVec = std::vector; + using TTimeTimePrMeanVarAccumulatorPr = std::pair; + using TTimeTimePrMeanVarAccumulatorPrVec = std::vector; + using TTimeTimePrMeanVarAccumulatorPrVecVec = std::vector; + using TFloatMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; + using TFloatMeanAccumulatorVec = std::vector; + using TTime2Vec = core::CSmallVector; + using TComponent = CPeriodicityHypothesisTestsResult::SComponent; + +public: + CPeriodicityHypothesisTests(); + explicit CPeriodicityHypothesisTests(const CPeriodicityHypothesisTestsConfig& config); + + //! Check if the test is initialized. + bool initialized() const; + + //! Initialize the bucket values. + void initialize(core_t::TTime bucketLength, core_t::TTime window, core_t::TTime period); + + //! Add \p value at \p time. + void add(core_t::TTime time, double value, double weight = 1.0); + + //! Check if there periodic components and, if there are, + //! which best describe the periodic patterns in the data. + CPeriodicityHypothesisTestsResult test() const; + +private: + using TDoubleVec = std::vector; + using TDoubleVec2Vec = core::CSmallVector; + using TFloatMeanAccumulatorCRng = core::CVectorRange; + using TMinMaxAccumulator = maths::CBasicStatistics::CMinMax; + + //! \brief A collection of statistics used during testing. + struct STestStats { + STestStats(); + //! Set the various test thresholds. + void setThresholds(double vt, double at, double Rt); + //! Check if the null hypothesis is good enough to not need an + //! alternative. + bool nullHypothesisGoodEnough() const; + //! True if a known periodic component is tested. + bool s_HasPeriod; + //! True if a known repeating partition is tested. + bool s_HasPartition; + //! The maximum variance to accept the alternative hypothesis. + double s_Vt; + //! The minimum amplitude to accept the alternative hypothesis. + double s_At; + //! The minimum autocorrelation to accept the alternative + //! hypothesis. + double s_Rt; + //! The data range. + double s_Range; + //! The number of buckets with at least one measurement. + double s_B; + //! The average number of measurements per bucket value. + double s_M; + //! The null hypothesis periodic components. + CPeriodicityHypothesisTestsResult s_H0; + //! The variance estimate of H0. + double s_V0; + //! The degrees of freedom in the variance estimate of H0. + double s_DF0; + //! The trend for the null hypothesis. + TDoubleVec2Vec s_T0; + //! The partition for the null hypothesis. + TTimeTimePr2Vec s_Partition; + //! The start of the repeating partition. + core_t::TTime s_StartOfPartition; + }; + + //! \brief Manages the testing of a set of nested hypotheses. + class CNestedHypotheses { public: - using TDouble2Vec = core::CSmallVector; - using TTimeTimePr = std::pair; - using TTimeTimePr2Vec = core::CSmallVector; - using TMeanVarAccumulator = CBasicStatistics::SSampleMeanVar::TAccumulator; - using TMeanVarAccumulatorVec = std::vector; - using TTimeTimePrMeanVarAccumulatorPr = std::pair; - using TTimeTimePrMeanVarAccumulatorPrVec = std::vector; - using TTimeTimePrMeanVarAccumulatorPrVecVec = std::vector; - using TFloatMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; - using TFloatMeanAccumulatorVec = std::vector; - using TTime2Vec = core::CSmallVector; - using TComponent = CPeriodicityHypothesisTestsResult::SComponent; + using TTestFunc = std::function; + + //! \brief Manages the building of a collection of nested + //! hypotheses. + class CBuilder { + public: + explicit CBuilder(CNestedHypotheses& hypothesis); + CBuilder& addNested(TTestFunc test); + CBuilder& addAlternative(TTestFunc test); + CBuilder& finishedNested(); + + private: + using TNestedHypothesesPtrVec = std::vector; + + private: + TNestedHypothesesPtrVec m_Levels; + }; public: - CPeriodicityHypothesisTests(); - explicit CPeriodicityHypothesisTests(const CPeriodicityHypothesisTestsConfig &config); + explicit CNestedHypotheses(TTestFunc test = 0); - //! Check if the test is initialized. - bool initialized() const; - - //! Initialize the bucket values. - void initialize(core_t::TTime bucketLength, - core_t::TTime window, - core_t::TTime period); - - //! Add \p value at \p time. - void add(core_t::TTime time, double value, double weight = 1.0); - - //! Check if there periodic components and, if there are, - //! which best describe the periodic patterns in the data. - CPeriodicityHypothesisTestsResult test() const; + //! Set the null hypothesis. + CBuilder null(TTestFunc test); + //! Add a nested hypothesis for \p test. + CNestedHypotheses& addNested(TTestFunc test); + //! Test the hypotheses. + CPeriodicityHypothesisTestsResult test(STestStats& stats) const; private: - using TDoubleVec = std::vector; - using TDoubleVec2Vec = core::CSmallVector; - using TFloatMeanAccumulatorCRng = core::CVectorRange; - using TMinMaxAccumulator = maths::CBasicStatistics::CMinMax; - - //! \brief A collection of statistics used during testing. - struct STestStats - { - STestStats(); - //! Set the various test thresholds. - void setThresholds(double vt, double at, double Rt); - //! Check if the null hypothesis is good enough to not need an - //! alternative. - bool nullHypothesisGoodEnough() const; - //! True if a known periodic component is tested. - bool s_HasPeriod; - //! True if a known repeating partition is tested. - bool s_HasPartition; - //! The maximum variance to accept the alternative hypothesis. - double s_Vt; - //! The minimum amplitude to accept the alternative hypothesis. - double s_At; - //! The minimum autocorrelation to accept the alternative - //! hypothesis. - double s_Rt; - //! The data range. - double s_Range; - //! The number of buckets with at least one measurement. - double s_B; - //! The average number of measurements per bucket value. - double s_M; - //! The null hypothesis periodic components. - CPeriodicityHypothesisTestsResult s_H0; - //! The variance estimate of H0. - double s_V0; - //! The degrees of freedom in the variance estimate of H0. - double s_DF0; - //! The trend for the null hypothesis. - TDoubleVec2Vec s_T0; - //! The partition for the null hypothesis. - TTimeTimePr2Vec s_Partition; - //! The start of the repeating partition. - core_t::TTime s_StartOfPartition; - }; - - //! \brief Manages the testing of a set of nested hypotheses. - class CNestedHypotheses - { - public: - using TTestFunc = std::function; - - //! \brief Manages the building of a collection of nested - //! hypotheses. - class CBuilder - { - public: - explicit CBuilder(CNestedHypotheses &hypothesis); - CBuilder &addNested(TTestFunc test); - CBuilder &addAlternative(TTestFunc test); - CBuilder &finishedNested(); - - private: - using TNestedHypothesesPtrVec = std::vector; - - private: - TNestedHypothesesPtrVec m_Levels; - }; - - public: - explicit CNestedHypotheses(TTestFunc test = 0); - - //! Set the null hypothesis. - CBuilder null(TTestFunc test); - //! Add a nested hypothesis for \p test. - CNestedHypotheses &addNested(TTestFunc test); - //! Test the hypotheses. - CPeriodicityHypothesisTestsResult test(STestStats &stats) const; - - private: - using THypothesisVec = std::vector; - - private: - //! The test. - TTestFunc m_Test; - //! If true always test the nested hypotheses. - bool m_AlwaysTestNested; - //! The nested hypotheses to test. - THypothesisVec m_Nested; - }; - - using TNestedHypothesesVec = std::vector; + using THypothesisVec = std::vector; private: - //! Get the hypotheses to test for period/daily/weekly components. - void hypothesesForWeekly(const TTimeTimePr2Vec &windowForTestingWeekly, - const TFloatMeanAccumulatorCRng &bucketsForTestingWeekly, - const TTimeTimePr2Vec &windowForTestingPeriod, - const TFloatMeanAccumulatorCRng &bucketsForTestingPeriod, - TNestedHypothesesVec &hypotheses) const; - - //! Get the hypotheses to test for period/daily components. - void hypothesesForDaily(const TTimeTimePr2Vec &windowForTestingDaily, - const TFloatMeanAccumulatorCRng &bucketsForTestingDaily, - const TTimeTimePr2Vec &windowForTestingPeriod, - const TFloatMeanAccumulatorCRng &bucketsForTestingPeriod, - TNestedHypothesesVec &hypotheses) const; - - //! Get the hypotheses to test for period components. - void hypothesesForPeriod(const TTimeTimePr2Vec &windows, - const TFloatMeanAccumulatorCRng &buckets, - TNestedHypothesesVec &hypotheses) const; - - //! Extract the best hypothesis. - CPeriodicityHypothesisTestsResult best(const TNestedHypothesesVec &hypotheses) const; - - //! The null hypothesis of the various tests. - CPeriodicityHypothesisTestsResult - testForNull(const TTimeTimePr2Vec &window, - const TFloatMeanAccumulatorCRng &buckets, - STestStats &stats) const; - - //! Test for a daily periodic component. - CPeriodicityHypothesisTestsResult - testForDaily(const TTimeTimePr2Vec &window, - const TFloatMeanAccumulatorCRng &buckets, - STestStats &stats) const; - - //! Test for a weekly periodic component. - CPeriodicityHypothesisTestsResult - testForWeekly(const TTimeTimePr2Vec &window, - const TFloatMeanAccumulatorCRng &buckets, - STestStats &stats) const; - - //! Test for a weekday/end partition. - CPeriodicityHypothesisTestsResult - testForDailyWithWeekend(const TFloatMeanAccumulatorCRng &buckets, - STestStats &stats) const; - - //! Test for a weekly period given we think there is a - //! weekday/end partition. - CPeriodicityHypothesisTestsResult - testForWeeklyGivenDailyWithWeekend(const TTimeTimePr2Vec &window, - const TFloatMeanAccumulatorCRng &buckets, - STestStats &stats) const; - - //! Test for the specified period given we think there is diurnal - //! periodicity. - CPeriodicityHypothesisTestsResult - testForPeriod(const TTimeTimePr2Vec &window, - const TFloatMeanAccumulatorCRng &buckets, - STestStats &stats) const; - - //! Check we've seen sufficient data to test accurately. - bool seenSufficientDataToTest(core_t::TTime period, - const TFloatMeanAccumulatorCRng &buckets) const; - - //! Check if there are enough non-empty buckets which are repeated - //! at at least one \p period in \p buckets. - bool seenSufficientPeriodicallyPopulatedBucketsToTest(const TFloatMeanAccumulatorCRng &buckets, - std::size_t period) const; - - //! Compute various ancillary statistics for testing. - bool testStatisticsFor(const TFloatMeanAccumulatorCRng &buckets, - STestStats &stats) const; - - //! Get the variance and degrees freedom for the null hypothesis - //! that there is no trend or repeating partition of any kind. - void nullHypothesis(const TTimeTimePr2Vec &window, - const TFloatMeanAccumulatorCRng &buckets, - STestStats &stats) const; - - //! Compute the variance and degrees freedom for the hypothesis. - void hypothesis(const TTime2Vec &periods, - const TFloatMeanAccumulatorCRng &buckets, - STestStats &stats) const; - - //! Condition \p buckets assuming the null hypothesis is true. - //! - //! This removes any trend associated with the null hypothesis. - void conditionOnHypothesis(const TTimeTimePr2Vec &windows, - const STestStats &stats, - TFloatMeanAccumulatorVec &buckets) const; - - //! Test to see if there is significant evidence for a component - //! with period \p period. - bool testPeriod(const TTimeTimePr2Vec &window, - const TFloatMeanAccumulatorCRng &buckets, - core_t::TTime period, STestStats &stats) const; - - //! Test to see if there is significant evidence for a repeating - //! partition of the data into windows defined by \p partition. - bool testPartition(const TTimeTimePr2Vec &partition, - const TFloatMeanAccumulatorCRng &buckets, - core_t::TTime period, - double correction, STestStats &stats) const; + //! The test. + TTestFunc m_Test; + //! If true always test the nested hypotheses. + bool m_AlwaysTestNested; + //! The nested hypotheses to test. + THypothesisVec m_Nested; + }; + + using TNestedHypothesesVec = std::vector; + +private: + //! Get the hypotheses to test for period/daily/weekly components. + void hypothesesForWeekly(const TTimeTimePr2Vec& windowForTestingWeekly, + const TFloatMeanAccumulatorCRng& bucketsForTestingWeekly, + const TTimeTimePr2Vec& windowForTestingPeriod, + const TFloatMeanAccumulatorCRng& bucketsForTestingPeriod, + TNestedHypothesesVec& hypotheses) const; + + //! Get the hypotheses to test for period/daily components. + void hypothesesForDaily(const TTimeTimePr2Vec& windowForTestingDaily, + const TFloatMeanAccumulatorCRng& bucketsForTestingDaily, + const TTimeTimePr2Vec& windowForTestingPeriod, + const TFloatMeanAccumulatorCRng& bucketsForTestingPeriod, + TNestedHypothesesVec& hypotheses) const; + + //! Get the hypotheses to test for period components. + void + hypothesesForPeriod(const TTimeTimePr2Vec& windows, const TFloatMeanAccumulatorCRng& buckets, TNestedHypothesesVec& hypotheses) const; + + //! Extract the best hypothesis. + CPeriodicityHypothesisTestsResult best(const TNestedHypothesesVec& hypotheses) const; + + //! The null hypothesis of the various tests. + CPeriodicityHypothesisTestsResult + testForNull(const TTimeTimePr2Vec& window, const TFloatMeanAccumulatorCRng& buckets, STestStats& stats) const; + + //! Test for a daily periodic component. + CPeriodicityHypothesisTestsResult + testForDaily(const TTimeTimePr2Vec& window, const TFloatMeanAccumulatorCRng& buckets, STestStats& stats) const; + + //! Test for a weekly periodic component. + CPeriodicityHypothesisTestsResult + testForWeekly(const TTimeTimePr2Vec& window, const TFloatMeanAccumulatorCRng& buckets, STestStats& stats) const; + + //! Test for a weekday/end partition. + CPeriodicityHypothesisTestsResult testForDailyWithWeekend(const TFloatMeanAccumulatorCRng& buckets, STestStats& stats) const; + + //! Test for a weekly period given we think there is a + //! weekday/end partition. + CPeriodicityHypothesisTestsResult + testForWeeklyGivenDailyWithWeekend(const TTimeTimePr2Vec& window, const TFloatMeanAccumulatorCRng& buckets, STestStats& stats) const; + + //! Test for the specified period given we think there is diurnal + //! periodicity. + CPeriodicityHypothesisTestsResult + testForPeriod(const TTimeTimePr2Vec& window, const TFloatMeanAccumulatorCRng& buckets, STestStats& stats) const; + + //! Check we've seen sufficient data to test accurately. + bool seenSufficientDataToTest(core_t::TTime period, const TFloatMeanAccumulatorCRng& buckets) const; + + //! Check if there are enough non-empty buckets which are repeated + //! at at least one \p period in \p buckets. + bool seenSufficientPeriodicallyPopulatedBucketsToTest(const TFloatMeanAccumulatorCRng& buckets, std::size_t period) const; + + //! Compute various ancillary statistics for testing. + bool testStatisticsFor(const TFloatMeanAccumulatorCRng& buckets, STestStats& stats) const; + + //! Get the variance and degrees freedom for the null hypothesis + //! that there is no trend or repeating partition of any kind. + void nullHypothesis(const TTimeTimePr2Vec& window, const TFloatMeanAccumulatorCRng& buckets, STestStats& stats) const; + + //! Compute the variance and degrees freedom for the hypothesis. + void hypothesis(const TTime2Vec& periods, const TFloatMeanAccumulatorCRng& buckets, STestStats& stats) const; + + //! Condition \p buckets assuming the null hypothesis is true. + //! + //! This removes any trend associated with the null hypothesis. + void conditionOnHypothesis(const TTimeTimePr2Vec& windows, const STestStats& stats, TFloatMeanAccumulatorVec& buckets) const; + + //! Test to see if there is significant evidence for a component + //! with period \p period. + bool testPeriod(const TTimeTimePr2Vec& window, const TFloatMeanAccumulatorCRng& buckets, core_t::TTime period, STestStats& stats) const; + + //! Test to see if there is significant evidence for a repeating + //! partition of the data into windows defined by \p partition. + bool testPartition(const TTimeTimePr2Vec& partition, + const TFloatMeanAccumulatorCRng& buckets, + core_t::TTime period, + double correction, + STestStats& stats) const; - private: - //! The minimum proportion of populated buckets for which - //! the test is accurate. - static const double ACCURATE_TEST_POPULATED_FRACTION; +private: + //! The minimum proportion of populated buckets for which + //! the test is accurate. + static const double ACCURATE_TEST_POPULATED_FRACTION; - //! The minimum coefficient of variation to bother to test. - static const double MINIMUM_COEFFICIENT_OF_VARIATION; + //! The minimum coefficient of variation to bother to test. + static const double MINIMUM_COEFFICIENT_OF_VARIATION; - private: - //! Configures the tests to run. - CPeriodicityHypothesisTestsConfig m_Config; +private: + //! Configures the tests to run. + CPeriodicityHypothesisTestsConfig m_Config; - //! The bucketing interval. - core_t::TTime m_BucketLength; + //! The bucketing interval. + core_t::TTime m_BucketLength; - //! The window length for which to maintain bucket values. - core_t::TTime m_WindowLength; + //! The window length for which to maintain bucket values. + core_t::TTime m_WindowLength; - //! The specified period to test. - core_t::TTime m_Period; + //! The specified period to test. + core_t::TTime m_Period; - //! The time range of values added to the test. - TMinMaxAccumulator m_TimeRange; + //! The time range of values added to the test. + TMinMaxAccumulator m_TimeRange; - //! The mean bucket values. - TFloatMeanAccumulatorVec m_BucketValues; + //! The mean bucket values. + TFloatMeanAccumulatorVec m_BucketValues; }; using TFloatMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; @@ -425,11 +391,10 @@ using TFloatMeanAccumulatorVec = std::vector; //! Test for periodic components in \p values. MATHS_EXPORT -CPeriodicityHypothesisTestsResult testForPeriods(const CPeriodicityHypothesisTestsConfig &config, +CPeriodicityHypothesisTestsResult testForPeriods(const CPeriodicityHypothesisTestsConfig& config, core_t::TTime startTime, core_t::TTime bucketLength, - const TFloatMeanAccumulatorVec &values); - + const TFloatMeanAccumulatorVec& values); } } diff --git a/include/maths/CPoissonMeanConjugate.h b/include/maths/CPoissonMeanConjugate.h index 46e740dbe6..cb772fd1dd 100644 --- a/include/maths/CPoissonMeanConjugate.h +++ b/include/maths/CPoissonMeanConjugate.h @@ -16,15 +16,12 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace maths -{ +namespace maths { struct SDistributionRestoreParams; //! \brief Interface for a conjugate prior distribution for a Poisson variable. @@ -48,292 +45,273 @@ struct SDistributionRestoreParams; //! the data when using one-of-n composition (see COneOfNPrior) or model data with //! multiple modes when using multi-modal composition (see CMultimodalPrior). //! From a design point of view this is the composite pattern. -class MATHS_EXPORT CPoissonMeanConjugate : public CPrior -{ - public: - using TEqualWithTolerance = CEqualWithTolerance; - - //! Lift the overloads of addSamples into scope. - using CPrior::addSamples; - //! Lift the overloads of print into scope. - using CPrior::print; - - public: - //! \name Life-Cycle - //@{ - //! \param[in] offset The offset to apply to the data. - //! \param[in] shape The shape parameter of the prior gamma distribution. - //! \param[in] rate The rate parameter of the prior gamma distribution. - //! \param[in] decayRate The rate at which to revert to non-informative. - CPoissonMeanConjugate(double offset, - double shape, - double rate, - double decayRate = 0.0); - - //! Construct from part of a state document. - CPoissonMeanConjugate(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser); - - // Default copy constructor and assignment operator work. - //@} - - - //! Create a new instance of a non-informative prior. - //! - //! \param[in] offset The offset to apply to the data. - //! \param[in] decayRate The rate at which to revert to the non-informative prior. - //! \return A non-informative prior. - //! \warning The caller owns the object returned. - static CPoissonMeanConjugate nonInformativePrior(double offset = 0.0, - double decayRate = 0.0); - - - //! \name Prior Contract - //@{ - //! Get the type of this prior. - virtual EPrior type() const; - - //! Create a copy of the prior. - //! - //! \return A pointer to a newly allocated clone of this prior. - //! \warning The caller owns the object returned. - virtual CPoissonMeanConjugate *clone() const; - - //! Reset the prior to non-informative. - virtual void setToNonInformative(double offset = 0.0, double decayRate = 0.0); - - //! Returns true. - virtual bool needsOffset() const; - - //! Reset m_Offset so the smallest sample is not less that the support - //! left end. Note that translating the mean of a Poisson R.V. affects - //! its variance, so there is no easy adjustment of the prior parameters - //! which preserves the distribution after translation. - //! - //! This samples the current marginal likelihood and uses these samples - //! to reconstruct the prior with adjusted offset. - //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. - //! \param[in] samples A collection of samples of the variable. - //! \param[in] weights The weights of each sample in \p samples. - //! \return The penalty to apply in model selection. - virtual double adjustOffset(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights); - - //! Get the current offset. - virtual double offset() const; - - //! Update the prior with a collection of independent samples from the - //! Poisson variable. - //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. - //! \param[in] samples A collection of samples of the variable. - //! \param[in] weights The weights of each sample in \p samples. - virtual void addSamples(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights); - - //! Propagate the prior density function forwards by \p time. - //! - //! The prior distribution relaxes back to non-informative at a rate - //! controlled by the decay rate parameter (optionally supplied to the - //! constructor). - //! - //! \param[in] time The time increment to apply. - virtual void propagateForwardsByTime(double time); - - //! Get the support for the marginal likelihood function. - virtual TDoubleDoublePr marginalLikelihoodSupport() const; - - //! Get the mean of the marginal likelihood function. - virtual double marginalLikelihoodMean() const; - - //! Get the mode of the marginal likelihood function. - virtual double marginalLikelihoodMode(const TWeightStyleVec &weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec &weights = TWeights::UNIT) const; - - //! Get the variance of the marginal likelihood. - virtual double marginalLikelihoodVariance(const TWeightStyleVec &weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec &weights = TWeights::UNIT) const; - - //! Get the \p percentage symmetric confidence interval for the marginal - //! likelihood function, i.e. the values \f$a\f$ and \f$b\f$ such that: - //!
-        //!   \f$P([a,m]) = P([m,b]) = p / 100 / 2\f$
-        //! 
- //! - //! where \f$m\f$ is the median of the distribution and \f$p\f$ is the - //! the percentage of interest \p percentage. - //! - //! \param[in] percentage The percentage of interest. - //! \param[in] weightStyles Optional variance scale weight styles. - //! \param[in] weights Optional variance scale weights. - //! \note \p percentage should be in the range [0.0, 100.0). - virtual TDoubleDoublePr - marginalLikelihoodConfidenceInterval(double percentage, - const TWeightStyleVec &weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec &weights = TWeights::UNIT) const; - - //! Compute the log marginal likelihood function at \p samples integrating - //! over the prior density function for the Poisson mean. - //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. - //! \param[in] samples A collection of samples of the variable. - //! \param[in] weights The weights of each sample in \p samples. - //! \param[out] result Filled in with the joint likelihood of \p samples. - //! \note The samples are assumed to be independent and identically - //! distributed. - virtual maths_t::EFloatingPointErrorStatus - jointLogMarginalLikelihood(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &result) const; - - //! Sample the marginal likelihood function. - //! - //! See CPrior::sampleMarginalLikelihood for a detailed description. - //! - //! \param[in] numberSamples The number of samples required. - //! \param[out] samples Filled in with samples from the prior. - //! \note \p numberSamples is truncated to the number of samples received. - virtual void sampleMarginalLikelihood(std::size_t numberSamples, - TDouble1Vec &samples) const; - - //! Compute minus the log of the joint c.d.f. of the marginal likelihood - //! at \p samples. - //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. - //! \param[in] samples The samples of interest. - //! \param[in] weights The weights of each sample in \p samples. - //! \param[out] lowerBound Filled in with \f$-\log(\prod_i{F(x_i)})\f$ - //! where \f$F(.)\f$ is the c.d.f. and \f$\{x_i\}\f$ are the samples. - //! \param[out] upperBound Equal to \p lowerBound. - //! \note The samples are assumed to be independent. - virtual bool minusLogJointCdf(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound) const; - - //! Compute minus the log of the one minus the joint c.d.f. of the - //! marginal likelihood at \p samples without losing precision due to - //! cancellation errors at one, i.e. the smallest non-zero value this - //! can return is the minimum double rather than epsilon. - //! - //! \see minusLogJointCdf for more details. - virtual bool minusLogJointCdfComplement(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound) const; - - //! Compute the probability of a less likely collection of independent - //! samples from the variable. - //! - //! \param[in] calculation The style of the probability calculation - //! (see model_t::EProbabilityCalculation for details). - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. - //! \param[in] samples The samples of interest. - //! \param[in] weights The weights of each sample in \p samples. - //! \param[out] lowerBound Filled in with the probability of the set - //! for which the joint marginal likelihood is less than that of - //! \p samples (subject to the measure \p calculation). - //! \param[out] upperBound Equal to \p lowerBound. - //! \param[out] tail The tail that (left or right) that all the samples - //! are in or neither. - //! \note The samples are assumed to be independent. - virtual bool probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound, - maths_t::ETail &tail) const; - - //! Check if this is a non-informative prior. - virtual bool isNonInformative() const; - - //! Get a human readable description of the prior. - //! - //! \param[in] indent The indent to use at the start of new lines. - //! \param[in,out] result Filled in with the description. - virtual void print(const std::string &indent, std::string &result) const; - - //! Print the prior density function in a specified format. - //! - //! \see CPrior::printJointDensityFunction for details. - virtual std::string printJointDensityFunction() const; - - //! Get a checksum for this object. - virtual uint64_t checksum(uint64_t seed = 0) const; - - //! Get the memory used by this component - virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - - //! Get the memory used by this component - virtual std::size_t memoryUsage() const; - - //! Get the static size of this object - used for virtual hierarchies - virtual std::size_t staticSize() const; - - //! Persist state by passing information to the supplied inserter - virtual void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - //@} - - - //! Compute the mean of the prior distribution. - double priorMean() const; - - //! Compute the variance of the prior distribution. - double priorVariance() const; - - - //! \name Test Functions - //@{ - //! Compute the specified percentage confidence interval for the variable mean. - TDoubleDoublePr meanConfidenceInterval(double percentage) const; - - //! Check if two priors are equal to the specified tolerance. - bool equalTolerance(const CPoissonMeanConjugate &rhs, - const TEqualWithTolerance &equal) const; - //@} - - private: - //! Read parameters from \p traverser. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); - - private: - //! The shape parameter of a non-informative prior. - static const double NON_INFORMATIVE_SHAPE; - - //! The rate parameter of a non-informative prior. - static const double NON_INFORMATIVE_RATE; - - private: - //! We assume that the data are described by \f$X = Y - u\f$, where - //! \f$u\f$ is a constant and \f$Y\f$ is Poisson distributed. This - //! allows us to model data with negative values greater than \f$-u\f$. - double m_Offset; - - //! The shape parameter for the gamma distribution. - double m_Shape; - - //! The rate parameter for the gamma distribution. We work with the inverse - //! scale parameter because it makes defining the non-informative prior easy. - double m_Rate; +class MATHS_EXPORT CPoissonMeanConjugate : public CPrior { +public: + using TEqualWithTolerance = CEqualWithTolerance; + + //! Lift the overloads of addSamples into scope. + using CPrior::addSamples; + //! Lift the overloads of print into scope. + using CPrior::print; + +public: + //! \name Life-Cycle + //@{ + //! \param[in] offset The offset to apply to the data. + //! \param[in] shape The shape parameter of the prior gamma distribution. + //! \param[in] rate The rate parameter of the prior gamma distribution. + //! \param[in] decayRate The rate at which to revert to non-informative. + CPoissonMeanConjugate(double offset, double shape, double rate, double decayRate = 0.0); + + //! Construct from part of a state document. + CPoissonMeanConjugate(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser); + + // Default copy constructor and assignment operator work. + //@} + + //! Create a new instance of a non-informative prior. + //! + //! \param[in] offset The offset to apply to the data. + //! \param[in] decayRate The rate at which to revert to the non-informative prior. + //! \return A non-informative prior. + //! \warning The caller owns the object returned. + static CPoissonMeanConjugate nonInformativePrior(double offset = 0.0, double decayRate = 0.0); + + //! \name Prior Contract + //@{ + //! Get the type of this prior. + virtual EPrior type() const; + + //! Create a copy of the prior. + //! + //! \return A pointer to a newly allocated clone of this prior. + //! \warning The caller owns the object returned. + virtual CPoissonMeanConjugate* clone() const; + + //! Reset the prior to non-informative. + virtual void setToNonInformative(double offset = 0.0, double decayRate = 0.0); + + //! Returns true. + virtual bool needsOffset() const; + + //! Reset m_Offset so the smallest sample is not less that the support + //! left end. Note that translating the mean of a Poisson R.V. affects + //! its variance, so there is no easy adjustment of the prior parameters + //! which preserves the distribution after translation. + //! + //! This samples the current marginal likelihood and uses these samples + //! to reconstruct the prior with adjusted offset. + //! + //! \param[in] weightStyles Controls the interpretation of the weight(s) + //! that are associated with each sample. See maths_t::ESampleWeightStyle + //! for more details. + //! \param[in] samples A collection of samples of the variable. + //! \param[in] weights The weights of each sample in \p samples. + //! \return The penalty to apply in model selection. + virtual double adjustOffset(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights); + + //! Get the current offset. + virtual double offset() const; + + //! Update the prior with a collection of independent samples from the + //! Poisson variable. + //! + //! \param[in] weightStyles Controls the interpretation of the weight(s) + //! that are associated with each sample. See maths_t::ESampleWeightStyle + //! for more details. + //! \param[in] samples A collection of samples of the variable. + //! \param[in] weights The weights of each sample in \p samples. + virtual void addSamples(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights); + + //! Propagate the prior density function forwards by \p time. + //! + //! The prior distribution relaxes back to non-informative at a rate + //! controlled by the decay rate parameter (optionally supplied to the + //! constructor). + //! + //! \param[in] time The time increment to apply. + virtual void propagateForwardsByTime(double time); + + //! Get the support for the marginal likelihood function. + virtual TDoubleDoublePr marginalLikelihoodSupport() const; + + //! Get the mean of the marginal likelihood function. + virtual double marginalLikelihoodMean() const; + + //! Get the mode of the marginal likelihood function. + virtual double marginalLikelihoodMode(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; + + //! Get the variance of the marginal likelihood. + virtual double marginalLikelihoodVariance(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; + + //! Get the \p percentage symmetric confidence interval for the marginal + //! likelihood function, i.e. the values \f$a\f$ and \f$b\f$ such that: + //!
+    //!   \f$P([a,m]) = P([m,b]) = p / 100 / 2\f$
+    //! 
+ //! + //! where \f$m\f$ is the median of the distribution and \f$p\f$ is the + //! the percentage of interest \p percentage. + //! + //! \param[in] percentage The percentage of interest. + //! \param[in] weightStyles Optional variance scale weight styles. + //! \param[in] weights Optional variance scale weights. + //! \note \p percentage should be in the range [0.0, 100.0). + virtual TDoubleDoublePr marginalLikelihoodConfidenceInterval(double percentage, + const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; + + //! Compute the log marginal likelihood function at \p samples integrating + //! over the prior density function for the Poisson mean. + //! + //! \param[in] weightStyles Controls the interpretation of the weight(s) + //! that are associated with each sample. See maths_t::ESampleWeightStyle + //! for more details. + //! \param[in] samples A collection of samples of the variable. + //! \param[in] weights The weights of each sample in \p samples. + //! \param[out] result Filled in with the joint likelihood of \p samples. + //! \note The samples are assumed to be independent and identically + //! distributed. + virtual maths_t::EFloatingPointErrorStatus jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& result) const; + + //! Sample the marginal likelihood function. + //! + //! See CPrior::sampleMarginalLikelihood for a detailed description. + //! + //! \param[in] numberSamples The number of samples required. + //! \param[out] samples Filled in with samples from the prior. + //! \note \p numberSamples is truncated to the number of samples received. + virtual void sampleMarginalLikelihood(std::size_t numberSamples, TDouble1Vec& samples) const; + + //! Compute minus the log of the joint c.d.f. of the marginal likelihood + //! at \p samples. + //! + //! \param[in] weightStyles Controls the interpretation of the weight(s) + //! that are associated with each sample. See maths_t::ESampleWeightStyle + //! for more details. + //! \param[in] samples The samples of interest. + //! \param[in] weights The weights of each sample in \p samples. + //! \param[out] lowerBound Filled in with \f$-\log(\prod_i{F(x_i)})\f$ + //! where \f$F(.)\f$ is the c.d.f. and \f$\{x_i\}\f$ are the samples. + //! \param[out] upperBound Equal to \p lowerBound. + //! \note The samples are assumed to be independent. + virtual bool minusLogJointCdf(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound) const; + + //! Compute minus the log of the one minus the joint c.d.f. of the + //! marginal likelihood at \p samples without losing precision due to + //! cancellation errors at one, i.e. the smallest non-zero value this + //! can return is the minimum double rather than epsilon. + //! + //! \see minusLogJointCdf for more details. + virtual bool minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound) const; + + //! Compute the probability of a less likely collection of independent + //! samples from the variable. + //! + //! \param[in] calculation The style of the probability calculation + //! (see model_t::EProbabilityCalculation for details). + //! \param[in] weightStyles Controls the interpretation of the weight(s) + //! that are associated with each sample. See maths_t::ESampleWeightStyle + //! for more details. + //! \param[in] samples The samples of interest. + //! \param[in] weights The weights of each sample in \p samples. + //! \param[out] lowerBound Filled in with the probability of the set + //! for which the joint marginal likelihood is less than that of + //! \p samples (subject to the measure \p calculation). + //! \param[out] upperBound Equal to \p lowerBound. + //! \param[out] tail The tail that (left or right) that all the samples + //! are in or neither. + //! \note The samples are assumed to be independent. + virtual bool probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, + const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound, + maths_t::ETail& tail) const; + + //! Check if this is a non-informative prior. + virtual bool isNonInformative() const; + + //! Get a human readable description of the prior. + //! + //! \param[in] indent The indent to use at the start of new lines. + //! \param[in,out] result Filled in with the description. + virtual void print(const std::string& indent, std::string& result) const; + + //! Print the prior density function in a specified format. + //! + //! \see CPrior::printJointDensityFunction for details. + virtual std::string printJointDensityFunction() const; + + //! Get a checksum for this object. + virtual uint64_t checksum(uint64_t seed = 0) const; + + //! Get the memory used by this component + virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + + //! Get the memory used by this component + virtual std::size_t memoryUsage() const; + + //! Get the static size of this object - used for virtual hierarchies + virtual std::size_t staticSize() const; + + //! Persist state by passing information to the supplied inserter + virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + //@} + + //! Compute the mean of the prior distribution. + double priorMean() const; + + //! Compute the variance of the prior distribution. + double priorVariance() const; + + //! \name Test Functions + //@{ + //! Compute the specified percentage confidence interval for the variable mean. + TDoubleDoublePr meanConfidenceInterval(double percentage) const; + + //! Check if two priors are equal to the specified tolerance. + bool equalTolerance(const CPoissonMeanConjugate& rhs, const TEqualWithTolerance& equal) const; + //@} + +private: + //! Read parameters from \p traverser. + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); + +private: + //! The shape parameter of a non-informative prior. + static const double NON_INFORMATIVE_SHAPE; + + //! The rate parameter of a non-informative prior. + static const double NON_INFORMATIVE_RATE; + +private: + //! We assume that the data are described by \f$X = Y - u\f$, where + //! \f$u\f$ is a constant and \f$Y\f$ is Poisson distributed. This + //! allows us to model data with negative values greater than \f$-u\f$. + double m_Offset; + + //! The shape parameter for the gamma distribution. + double m_Shape; + + //! The rate parameter for the gamma distribution. We work with the inverse + //! scale parameter because it makes defining the non-informative prior easy. + double m_Rate; }; - } } diff --git a/include/maths/CPrior.h b/include/maths/CPrior.h index 1ca792df75..25b507ffbd 100644 --- a/include/maths/CPrior.h +++ b/include/maths/CPrior.h @@ -20,15 +20,12 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace maths -{ +namespace maths { //! \brief Interface for a prior distribution function. //! @@ -39,540 +36,519 @@ namespace maths //! This exists to support a one-of-n prior distribution which comprises //! a weighted selection of basic likelihood functions and is implemented //! using the composite pattern. -class MATHS_EXPORT CPrior -{ +class MATHS_EXPORT CPrior { +public: + using TDoubleVec = std::vector; + using TDoubleVecVec = std::vector; + using TDoubleDoublePr = std::pair; + using TDoubleDoublePrVec = std::vector; + using TWeightStyleVec = maths_t::TWeightStyleVec; + using TDouble1Vec = core::CSmallVector; + using TDouble4Vec = core::CSmallVector; + using TDouble4Vec1Vec = core::CSmallVector; + using TWeights = CConstantWeights; + + //! \brief Data for plotting a series + struct MATHS_EXPORT SPlot { + TDouble1Vec s_Abscissa; + TDouble1Vec s_Ordinates; + }; + + //! Enumeration of prior types. + enum EPrior { + E_Constant = 0x1, + E_Gamma = 0x2, + E_LogNormal = 0x4, + E_Multimodal = 0x8, + E_Multinomial = 0x10, + E_Normal = 0x20, + E_OneOfN = 0x40, + E_Poisson = 0x80 + }; + + //! \brief Defines a filter for removing models from selection. + class MATHS_EXPORT CModelFilter { public: - using TDoubleVec = std::vector; - using TDoubleVecVec = std::vector; - using TDoubleDoublePr = std::pair; - using TDoubleDoublePrVec = std::vector; - using TWeightStyleVec = maths_t::TWeightStyleVec; - using TDouble1Vec = core::CSmallVector; - using TDouble4Vec = core::CSmallVector; - using TDouble4Vec1Vec = core::CSmallVector; - using TWeights = CConstantWeights; - - //! \brief Data for plotting a series - struct MATHS_EXPORT SPlot - { - TDouble1Vec s_Abscissa; - TDouble1Vec s_Ordinates; - }; - - //! Enumeration of prior types. - enum EPrior - { - E_Constant = 0x1, - E_Gamma = 0x2, - E_LogNormal = 0x4, - E_Multimodal = 0x8, - E_Multinomial = 0x10, - E_Normal = 0x20, - E_OneOfN = 0x40, - E_Poisson = 0x80 - }; - - //! \brief Defines a filter for removing models from selection. - class MATHS_EXPORT CModelFilter - { - public: - CModelFilter(); - - //! Mark a model to be removed. - CModelFilter &remove(EPrior model); - - //! Check of \p model should be removed. - bool operator()(EPrior model) const; - - private: - //! A binary representation of the filter. - int m_Filter; - }; - - //! \brief Wrapper around the jointLogMarginalLikelihood function. - //! - //! DESCRIPTION:\n - //! This adapts the jointLogMarginalLikelihood function for use with - //! CIntegration. - class MATHS_EXPORT CLogMarginalLikelihood - { - public: - using result_type = double; - - public: - CLogMarginalLikelihood(const CPrior &prior, - const TWeightStyleVec &weightStyles = CConstantWeights::COUNT, - const TDouble4Vec1Vec &weights = CConstantWeights::SINGLE_UNIT); - - double operator()(double x) const; - bool operator()(double x, double &result) const; - - private: - const CPrior *m_Prior; - const TWeightStyleVec *m_WeightStyles; - const TDouble4Vec1Vec *m_Weights; - //! Avoids creating the vector argument to jointLogMarginalLikelihood - //! more than once. - mutable TDouble1Vec m_X; - }; + CModelFilter(); + //! Mark a model to be removed. + CModelFilter& remove(EPrior model); + + //! Check of \p model should be removed. + bool operator()(EPrior model) const; + + private: + //! A binary representation of the filter. + int m_Filter; + }; + + //! \brief Wrapper around the jointLogMarginalLikelihood function. + //! + //! DESCRIPTION:\n + //! This adapts the jointLogMarginalLikelihood function for use with + //! CIntegration. + class MATHS_EXPORT CLogMarginalLikelihood { public: - //! The value of the decay rate to fall back to using if the input - //! value is inappropriate. - static const double FALLBACK_DECAY_RATE; + using result_type = double; public: - //! \name Life-Cycle - //@{ - //! Construct an arbitrarily initialised object, suitable only for - //! assigning to or swapping with a valid one. - CPrior(); - - //! \param[in] dataType The type of data being modeled. - //! \param[in] decayRate The rate at which the prior returns to non-informative. - CPrior(maths_t::EDataType dataType, double decayRate); - - // Default copy constructor and assignment operator work. - - //! Virtual destructor for deletion by base pointer. - virtual ~CPrior() = default; - - //! Swap the contents of this prior and \p other. - void swap(CPrior &other); - //@} - - //! Check if the prior is being used to model discrete data. - bool isDiscrete() const; - - //! Check if the prior is being used to model integer data. - bool isInteger() const; - - //! Get the data type. - maths_t::EDataType dataType() const; - - //! Get the rate at which the prior returns to non-informative. - double decayRate() const; - - //! \name Prior Contract - //@{ - //! Get the type of this prior. - virtual EPrior type() const = 0; - - //! Create a copy of the prior. - //! - //! \warning Caller owns returned object. - virtual CPrior *clone() const = 0; - - //! Set the data type. - virtual void dataType(maths_t::EDataType value); - - //! Set the rate at which the prior returns to non-informative. - virtual void decayRate(double value); - - //! Reset the prior to non-informative. - virtual void setToNonInformative(double offset = 0.0, - double decayRate = 0.0) = 0; - - //! Remove models marked by \p filter. - virtual void removeModels(CModelFilter &filter); - - //! Get the margin between the smallest value and the support left - //! end. Priors with non-negative support, automatically adjust the - //! offset if a value is seen which is smaller than offset + margin. - virtual double offsetMargin() const; - - //! Check if the prior needs an offset to be applied. - virtual bool needsOffset() const = 0; - - //! For priors with non-negative support this adjusts the offset used - //! to extend the support to handle negative samples. - //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. - //! \param[in] samples The samples from which to determine the offset. - //! \param[in] weights The weights of each sample in \p samples. - //! \return The penalty to apply in model selection. - virtual double adjustOffset(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights) = 0; - - //! Get the current sample offset. - virtual double offset() const = 0; - - //! Update the prior with a collection of independent samples from the - //! variable. - //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. - //! \param[in] samples A collection of samples of the variable. - //! \param[in] weights The weights of each sample in \p samples. - virtual void addSamples(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights) = 0; - - //! Update the prior for the specified elapsed time. - virtual void propagateForwardsByTime(double time) = 0; - - //! Get the support for the marginal likelihood function. - virtual TDoubleDoublePr marginalLikelihoodSupport() const = 0; - - //! Get the mean of the marginal likelihood function. - virtual double marginalLikelihoodMean() const = 0; - - //! Get the nearest mean of the multimodal prior marginal likelihood, - //! otherwise the marginal likelihood mean. - virtual double nearestMarginalLikelihoodMean(double value) const; - - //! Get the mode of the marginal likelihood function. - virtual double marginalLikelihoodMode(const TWeightStyleVec &weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec &weights = TWeights::UNIT) const = 0; - - //! Get the local maxima of the marginal likelihood function. - virtual TDouble1Vec marginalLikelihoodModes(const TWeightStyleVec &weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec &weights = TWeights::UNIT) const; - - //! Get the \p percentage symmetric confidence interval for the marginal - //! likelihood function, i.e. the values \f$a\f$ and \f$b\f$ such that: - //!
-        //!   \f$P([a,m]) = P([m,b]) = p / 100 / 2\f$
-        //! 
- //! - //! where \f$m\f$ is the median of the distribution and \f$p\f$ is the - //! the percentage of interest \p percentage. - //! - //! \param[in] percentage The percentage of interest. - //! \param[in] weightStyles Optional variance scale weight styles. - //! \param[in] weights Optional variance scale weights. - //! \note \p percentage should be in the range [0.0, 100.0). - virtual TDoubleDoublePr - marginalLikelihoodConfidenceInterval(double percentage, - const TWeightStyleVec &weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec &weights = TWeights::UNIT) const = 0; - - //! Get the variance of the marginal likelihood. - virtual double marginalLikelihoodVariance(const TWeightStyleVec &weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec &weights = TWeights::UNIT) const = 0; - - //! Calculate the log marginal likelihood function integrating over the - //! prior density function. - //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. - //! \param[in] samples A collection of samples of the variable. - //! \param[in] weights The weights of each sample in \p samples. - //! \param[out] result Filled in with the joint likelihood of \p samples. - virtual maths_t::EFloatingPointErrorStatus - jointLogMarginalLikelihood(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &result) const = 0; - - //! Sample the marginal likelihood function. - //! - //! The marginal likelihood functions are sampled in quantile intervals. - //! The idea is to capture a set of samples that accurately and efficiently - //! represent the information in the prior. Random sampling (although it has - //! nice asymptotic properties) doesn't fulfill the second requirement: - //! typically requiring many more samples than sampling in quantile intervals - //! to capture the same amount of information. - //! - //! This is to allow us to transform one prior distribution into another - //! completely generically and relatively efficiently, by updating the target - //! prior with these samples. As such the prior needs to maintain a count of - //! the number of samples to date so that it isn't over sampled. - //! - //! \param[in] numberSamples The number of samples required. - //! \param[out] samples Filled in with samples from the prior. - //! \note \p numberSamples is truncated to the number of samples received. - virtual void sampleMarginalLikelihood(std::size_t numberSamples, - TDouble1Vec &samples) const = 0; - - //! Calculate minus the log of the joint c.d.f. of the marginal likelihood - //! for a collection of independent samples from the variable. - //! - //! \param[in] weightStyles Controls the interpretation of the weights - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. - //! \param[in] samples A collection of samples of the variable. - //! \param[in] weights The weights of each sample in \p samples. - //! \param[out] lowerBound Filled in with a lower bound for - //! \f$-\log(\prod_i{F(x_i)})\f$ where \f$F(.)\f$ is the c.d.f. and - //! \f$\{x_i\}\f$ are the samples. - //! \param[out] upperBound Filled in with a upper bound for - //! \f$-\log(\prod_i{F(x_i)})\f$ where \f$F(.)\f$ is the c.d.f. and - //! \f$\{x_i\}\f$ are the samples. - //! \note The samples are assumed to be independent. - //! \note In general, \p lowerBound equals \p upperBound. However, - //! in some cases insufficient data is held to exactly compute the - //! c.d.f. in which case the we use sharp upper and lower bounds. - //! \warning The variance scales must be in the range \f$(0,\infty)\f$, - //! i.e. a value of zero is not well defined and a value of infinity - //! is not well handled. (Very large values are handled though.) - virtual bool minusLogJointCdf(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound) const = 0; - - //! Compute minus the log of the one minus the joint c.d.f. of the - //! marginal likelihood at \p samples without losing precision due to - //! cancellation errors at one, i.e. the smallest non-zero value this - //! can return is the minimum double rather than epsilon. - //! - //! \see minusLogJointCdf for more details. - virtual bool minusLogJointCdfComplement(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound) const = 0; - - //! Calculate the joint probability of seeing a lower likelihood collection - //! of independent samples from the variable integrating over the prior - //! density function. - //! - //! \param[in] calculation The style of the probability calculation - //! (see model_t::EProbabilityCalculation for details). - //! \param[in] weightStyles Controls the interpretation of the weights - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. - //! \param[in] samples A collection of samples of the variable. - //! \param[in] weights The weights of each sample in \p samples. - //! \param[out] lowerBound Filled in with a lower bound for the probability - //! of the set for which the joint marginal likelihood is less than - //! that of \p samples (subject to the measure \p calculation). - //! \param[out] upperBound Filled in with an upper bound for the - //! probability of the set for which the joint marginal likelihood is - //! less than that of \p samples (subject to the measure \p calculation). - //! \param[out] tail The tail that (left or right) that all the - //! samples are in or neither. - //! \note The samples are assumed to be independent. - //! \note In general, \p lowerBound equals \p upperBound. However, - //! in some cases insufficient data is held to exactly compute the - //! c.d.f. in which case the we use sharp upper and lower bounds. - //! \warning The variance scales must be in the range \f$(0,\infty)\f$, - //! i.e. a value of zero is not well defined and a value of infinity - //! is not well handled. (Very large values are handled though.) - virtual bool probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound, - maths_t::ETail &tail) const = 0; - - //! Check if this is a non-informative prior. - virtual bool isNonInformative() const = 0; - - //! Get a human readable description of the prior. - std::string print() const; - - //! Get a human readable description of the prior. - //! - //! \param[in] indent The indent to use at the start of new lines. - //! \param[in,out] result Filled in with the description. - virtual void print(const std::string &indent, std::string &result) const = 0; - - //! Print the marginal likelihood function. - //! - //! The format is as follows:\n - //! \code{cpp} - //! x = [x1 x2 .... xn ]; - //! likelihood = [L(x1) L(x2) ... L(xn) ]; - //! \endcode - //! - //! i.e. domain values are space separated on the first line and the likelihood - //! evaluated at those values are space separated on the next line. - virtual std::string printMarginalLikelihoodFunction(double weight = 1.0) const; - - //! Return the plot data for the marginal likelihood function. - //! - //! \param[in] numberPoints Number of points to use in the returned plot. - //! \param[in] weight A scale which is applied to all likelihoods. - SPlot marginalLikelihoodPlot(unsigned int numberPoints, double weight = 1.0) const; - - //! Print the prior density function of the parameters. - //! - //! The format is as follows:\n - //! \code{cpp} - //! x = [x1 x2 ... xn ]; - //! y = [y1 y2 ... yn ]; - //! pdf = [f(x1, y1) f(x1, y2) ... f(x1, yn) - //! f(x2, y1) f(x2, y2) ... f(x2, yn) - //! ... - //! f(xn, y1) f(xn, y2) ... f(xn, yn) - //! ]; - //! \endcode - //! - //! i.e. domain values are space separated on the first and subsequent line(s) - //! as appropriate and the density function evaluated at those values are space - //! separated on the next line and subsequent lines as appropriate. - virtual std::string printJointDensityFunction() const = 0; - - //! Get a checksum for this object. - virtual uint64_t checksum(uint64_t seed = 0) const = 0; - - //! Get the memory used by this component - virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const = 0; - - //! Get the memory used by this component - virtual std::size_t memoryUsage() const = 0; - - //! Get the static size of this object - used for virtual hierarchies - virtual std::size_t staticSize() const = 0; - - //! Persist state by passing information to the supplied inserter - virtual void acceptPersistInserter(core::CStatePersistInserter &inserter) const = 0; - //@} - - //! Compute the expectation of the specified function w.r.t. to the marginal - //! likelihood. - //! - //! This computes the expectation using order three Gauss-Legendre quadrature - //! in \p numberIntervals subdivisions of a high confidence interval for the - //! marginal likelihood. - //! - //! \param f The function to integrate. - //! \param numberIntervals The number intervals to use for integration. - //! \param result Filled in with the result if the expectation could be calculated. - //! - //! \tparam F This must conform to the function type expected by - //! CIntegration::gaussLegendre. - //! \tparam T The return type of the function F which must conform to the type - //! expected by CIntegration::gaussLegendre. - template - bool expectation(const F &f, - const std::size_t numberIntervals, - T &result, - const TWeightStyleVec &weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec &weights = TWeights::UNIT) const; - - //! Get the number of samples received to date. - double numberSamples() const; - - //! Set the number of samples received to \p numberSamples. - //! - //! This is managed internally and generally should not be called by users. - void numberSamples(double numberSamples); - - //! Check if we should use this prior at present. - virtual bool participatesInModelSelection() const; - - //! Get the number of unmarginalized parameters in the marginal likelihood. - //! - //! \note That any parameters over which we explicitly integrate to - //! compute a marginal likelihood don't need to be counted since we - //! are interested in the estimating the usual BIC approximation for - //! \f$int_{\theta}f(x|\theta, M)f(\theta|M)\d\theta\f$ - virtual double unmarginalizedParameters() const; - - //! Get a set of sample for the prior to use in adjust offset. - void adjustOffsetResamples(double minimumSample, - TDouble1Vec &resamples, - TDouble4Vec1Vec &resamplesWeights) const; + CLogMarginalLikelihood(const CPrior& prior, + const TWeightStyleVec& weightStyles = CConstantWeights::COUNT, + const TDouble4Vec1Vec& weights = CConstantWeights::SINGLE_UNIT); - protected: - //! \brief Defines a set of operations to adjust the offset parameter - //! of those priors with non-negative support. - class MATHS_EXPORT COffsetParameters - { - public: - COffsetParameters(CPrior &prior); - virtual ~COffsetParameters() = default; - - //! Add a collection of samples. - void samples(const maths_t::TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights); - - //! Capture a collection of re-samples from the prior. - virtual void resample(double minimumSample); - - protected: - CPrior &prior() const; - const maths_t::TWeightStyleVec &weightStyles() const; - const TDouble1Vec &samples() const; - const TDouble4Vec1Vec &weights() const; - const TDouble1Vec &resamples() const; - const TDouble4Vec1Vec &resamplesWeights() const; - - private: - CPrior *m_Prior; - const maths_t::TWeightStyleVec *m_WeightStyles; - const TDouble1Vec *m_Samples; - const TDouble4Vec1Vec *m_Weights; - TDouble1Vec m_Resamples; - TDouble4Vec1Vec m_ResamplesWeights; - }; - - //! \brief Computes the likelihood of a collection of samples and - //! resamples at different offsets. - //! - //! This is used to maximize the data likelihood w.r.t. the choice - //! of offset. - class MATHS_EXPORT COffsetCost : public COffsetParameters - { - public: - using result_type = double; - - public: - COffsetCost(CPrior &prior); - - //! Compute the cost. - double operator()(double offset) const; - - protected: - virtual void resetPriors(double offset) const; - virtual double computeCost(double offset) const; - }; - - //! \brief Apply a specified offset to a prior. - class MATHS_EXPORT CApplyOffset : public COffsetParameters - { - public: - CApplyOffset(CPrior &prior); - - //! Apply the offset. - virtual void operator()(double offset) const; - }; + double operator()(double x) const; + bool operator()(double x, double& result) const; - protected: - //! The number of times we sample the prior when adjusting the offset. - static const std::size_t ADJUST_OFFSET_SAMPLE_SIZE; + private: + const CPrior* m_Prior; + const TWeightStyleVec* m_WeightStyles; + const TDouble4Vec1Vec* m_Weights; + //! Avoids creating the vector argument to jointLogMarginalLikelihood + //! more than once. + mutable TDouble1Vec m_X; + }; + +public: + //! The value of the decay rate to fall back to using if the input + //! value is inappropriate. + static const double FALLBACK_DECAY_RATE; + +public: + //! \name Life-Cycle + //@{ + //! Construct an arbitrarily initialised object, suitable only for + //! assigning to or swapping with a valid one. + CPrior(); + + //! \param[in] dataType The type of data being modeled. + //! \param[in] decayRate The rate at which the prior returns to non-informative. + CPrior(maths_t::EDataType dataType, double decayRate); + + // Default copy constructor and assignment operator work. + + //! Virtual destructor for deletion by base pointer. + virtual ~CPrior() = default; + + //! Swap the contents of this prior and \p other. + void swap(CPrior& other); + //@} + + //! Check if the prior is being used to model discrete data. + bool isDiscrete() const; + + //! Check if the prior is being used to model integer data. + bool isInteger() const; + + //! Get the data type. + maths_t::EDataType dataType() const; + + //! Get the rate at which the prior returns to non-informative. + double decayRate() const; + + //! \name Prior Contract + //@{ + //! Get the type of this prior. + virtual EPrior type() const = 0; + + //! Create a copy of the prior. + //! + //! \warning Caller owns returned object. + virtual CPrior* clone() const = 0; + + //! Set the data type. + virtual void dataType(maths_t::EDataType value); + + //! Set the rate at which the prior returns to non-informative. + virtual void decayRate(double value); + + //! Reset the prior to non-informative. + virtual void setToNonInformative(double offset = 0.0, double decayRate = 0.0) = 0; + + //! Remove models marked by \p filter. + virtual void removeModels(CModelFilter& filter); + + //! Get the margin between the smallest value and the support left + //! end. Priors with non-negative support, automatically adjust the + //! offset if a value is seen which is smaller than offset + margin. + virtual double offsetMargin() const; + + //! Check if the prior needs an offset to be applied. + virtual bool needsOffset() const = 0; + + //! For priors with non-negative support this adjusts the offset used + //! to extend the support to handle negative samples. + //! + //! \param[in] weightStyles Controls the interpretation of the weight(s) + //! that are associated with each sample. See maths_t::ESampleWeightStyle + //! for more details. + //! \param[in] samples The samples from which to determine the offset. + //! \param[in] weights The weights of each sample in \p samples. + //! \return The penalty to apply in model selection. + virtual double adjustOffset(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights) = 0; + + //! Get the current sample offset. + virtual double offset() const = 0; + + //! Update the prior with a collection of independent samples from the + //! variable. + //! + //! \param[in] weightStyles Controls the interpretation of the weight(s) + //! that are associated with each sample. See maths_t::ESampleWeightStyle + //! for more details. + //! \param[in] samples A collection of samples of the variable. + //! \param[in] weights The weights of each sample in \p samples. + virtual void addSamples(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights) = 0; + + //! Update the prior for the specified elapsed time. + virtual void propagateForwardsByTime(double time) = 0; + + //! Get the support for the marginal likelihood function. + virtual TDoubleDoublePr marginalLikelihoodSupport() const = 0; + + //! Get the mean of the marginal likelihood function. + virtual double marginalLikelihoodMean() const = 0; + + //! Get the nearest mean of the multimodal prior marginal likelihood, + //! otherwise the marginal likelihood mean. + virtual double nearestMarginalLikelihoodMean(double value) const; + + //! Get the mode of the marginal likelihood function. + virtual double marginalLikelihoodMode(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const = 0; + + //! Get the local maxima of the marginal likelihood function. + virtual TDouble1Vec marginalLikelihoodModes(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; + + //! Get the \p percentage symmetric confidence interval for the marginal + //! likelihood function, i.e. the values \f$a\f$ and \f$b\f$ such that: + //!
+    //!   \f$P([a,m]) = P([m,b]) = p / 100 / 2\f$
+    //! 
+ //! + //! where \f$m\f$ is the median of the distribution and \f$p\f$ is the + //! the percentage of interest \p percentage. + //! + //! \param[in] percentage The percentage of interest. + //! \param[in] weightStyles Optional variance scale weight styles. + //! \param[in] weights Optional variance scale weights. + //! \note \p percentage should be in the range [0.0, 100.0). + virtual TDoubleDoublePr marginalLikelihoodConfidenceInterval(double percentage, + const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const = 0; + + //! Get the variance of the marginal likelihood. + virtual double marginalLikelihoodVariance(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const = 0; + + //! Calculate the log marginal likelihood function integrating over the + //! prior density function. + //! + //! \param[in] weightStyles Controls the interpretation of the weight(s) + //! that are associated with each sample. See maths_t::ESampleWeightStyle + //! for more details. + //! \param[in] samples A collection of samples of the variable. + //! \param[in] weights The weights of each sample in \p samples. + //! \param[out] result Filled in with the joint likelihood of \p samples. + virtual maths_t::EFloatingPointErrorStatus jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& result) const = 0; + + //! Sample the marginal likelihood function. + //! + //! The marginal likelihood functions are sampled in quantile intervals. + //! The idea is to capture a set of samples that accurately and efficiently + //! represent the information in the prior. Random sampling (although it has + //! nice asymptotic properties) doesn't fulfill the second requirement: + //! typically requiring many more samples than sampling in quantile intervals + //! to capture the same amount of information. + //! + //! This is to allow us to transform one prior distribution into another + //! completely generically and relatively efficiently, by updating the target + //! prior with these samples. As such the prior needs to maintain a count of + //! the number of samples to date so that it isn't over sampled. + //! + //! \param[in] numberSamples The number of samples required. + //! \param[out] samples Filled in with samples from the prior. + //! \note \p numberSamples is truncated to the number of samples received. + virtual void sampleMarginalLikelihood(std::size_t numberSamples, TDouble1Vec& samples) const = 0; + + //! Calculate minus the log of the joint c.d.f. of the marginal likelihood + //! for a collection of independent samples from the variable. + //! + //! \param[in] weightStyles Controls the interpretation of the weights + //! that are associated with each sample. See maths_t::ESampleWeightStyle + //! for more details. + //! \param[in] samples A collection of samples of the variable. + //! \param[in] weights The weights of each sample in \p samples. + //! \param[out] lowerBound Filled in with a lower bound for + //! \f$-\log(\prod_i{F(x_i)})\f$ where \f$F(.)\f$ is the c.d.f. and + //! \f$\{x_i\}\f$ are the samples. + //! \param[out] upperBound Filled in with a upper bound for + //! \f$-\log(\prod_i{F(x_i)})\f$ where \f$F(.)\f$ is the c.d.f. and + //! \f$\{x_i\}\f$ are the samples. + //! \note The samples are assumed to be independent. + //! \note In general, \p lowerBound equals \p upperBound. However, + //! in some cases insufficient data is held to exactly compute the + //! c.d.f. in which case the we use sharp upper and lower bounds. + //! \warning The variance scales must be in the range \f$(0,\infty)\f$, + //! i.e. a value of zero is not well defined and a value of infinity + //! is not well handled. (Very large values are handled though.) + virtual bool minusLogJointCdf(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound) const = 0; + + //! Compute minus the log of the one minus the joint c.d.f. of the + //! marginal likelihood at \p samples without losing precision due to + //! cancellation errors at one, i.e. the smallest non-zero value this + //! can return is the minimum double rather than epsilon. + //! + //! \see minusLogJointCdf for more details. + virtual bool minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound) const = 0; + + //! Calculate the joint probability of seeing a lower likelihood collection + //! of independent samples from the variable integrating over the prior + //! density function. + //! + //! \param[in] calculation The style of the probability calculation + //! (see model_t::EProbabilityCalculation for details). + //! \param[in] weightStyles Controls the interpretation of the weights + //! that are associated with each sample. See maths_t::ESampleWeightStyle + //! for more details. + //! \param[in] samples A collection of samples of the variable. + //! \param[in] weights The weights of each sample in \p samples. + //! \param[out] lowerBound Filled in with a lower bound for the probability + //! of the set for which the joint marginal likelihood is less than + //! that of \p samples (subject to the measure \p calculation). + //! \param[out] upperBound Filled in with an upper bound for the + //! probability of the set for which the joint marginal likelihood is + //! less than that of \p samples (subject to the measure \p calculation). + //! \param[out] tail The tail that (left or right) that all the + //! samples are in or neither. + //! \note The samples are assumed to be independent. + //! \note In general, \p lowerBound equals \p upperBound. However, + //! in some cases insufficient data is held to exactly compute the + //! c.d.f. in which case the we use sharp upper and lower bounds. + //! \warning The variance scales must be in the range \f$(0,\infty)\f$, + //! i.e. a value of zero is not well defined and a value of infinity + //! is not well handled. (Very large values are handled though.) + virtual bool probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, + const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound, + maths_t::ETail& tail) const = 0; + + //! Check if this is a non-informative prior. + virtual bool isNonInformative() const = 0; + + //! Get a human readable description of the prior. + std::string print() const; + + //! Get a human readable description of the prior. + //! + //! \param[in] indent The indent to use at the start of new lines. + //! \param[in,out] result Filled in with the description. + virtual void print(const std::string& indent, std::string& result) const = 0; + + //! Print the marginal likelihood function. + //! + //! The format is as follows:\n + //! \code{cpp} + //! x = [x1 x2 .... xn ]; + //! likelihood = [L(x1) L(x2) ... L(xn) ]; + //! \endcode + //! + //! i.e. domain values are space separated on the first line and the likelihood + //! evaluated at those values are space separated on the next line. + virtual std::string printMarginalLikelihoodFunction(double weight = 1.0) const; + + //! Return the plot data for the marginal likelihood function. + //! + //! \param[in] numberPoints Number of points to use in the returned plot. + //! \param[in] weight A scale which is applied to all likelihoods. + SPlot marginalLikelihoodPlot(unsigned int numberPoints, double weight = 1.0) const; + + //! Print the prior density function of the parameters. + //! + //! The format is as follows:\n + //! \code{cpp} + //! x = [x1 x2 ... xn ]; + //! y = [y1 y2 ... yn ]; + //! pdf = [f(x1, y1) f(x1, y2) ... f(x1, yn) + //! f(x2, y1) f(x2, y2) ... f(x2, yn) + //! ... + //! f(xn, y1) f(xn, y2) ... f(xn, yn) + //! ]; + //! \endcode + //! + //! i.e. domain values are space separated on the first and subsequent line(s) + //! as appropriate and the density function evaluated at those values are space + //! separated on the next line and subsequent lines as appropriate. + virtual std::string printJointDensityFunction() const = 0; + + //! Get a checksum for this object. + virtual uint64_t checksum(uint64_t seed = 0) const = 0; + + //! Get the memory used by this component + virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const = 0; + + //! Get the memory used by this component + virtual std::size_t memoryUsage() const = 0; + + //! Get the static size of this object - used for virtual hierarchies + virtual std::size_t staticSize() const = 0; + + //! Persist state by passing information to the supplied inserter + virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const = 0; + //@} + + //! Compute the expectation of the specified function w.r.t. to the marginal + //! likelihood. + //! + //! This computes the expectation using order three Gauss-Legendre quadrature + //! in \p numberIntervals subdivisions of a high confidence interval for the + //! marginal likelihood. + //! + //! \param f The function to integrate. + //! \param numberIntervals The number intervals to use for integration. + //! \param result Filled in with the result if the expectation could be calculated. + //! + //! \tparam F This must conform to the function type expected by + //! CIntegration::gaussLegendre. + //! \tparam T The return type of the function F which must conform to the type + //! expected by CIntegration::gaussLegendre. + template + bool expectation(const F& f, + const std::size_t numberIntervals, + T& result, + const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; + + //! Get the number of samples received to date. + double numberSamples() const; + + //! Set the number of samples received to \p numberSamples. + //! + //! This is managed internally and generally should not be called by users. + void numberSamples(double numberSamples); + + //! Check if we should use this prior at present. + virtual bool participatesInModelSelection() const; + + //! Get the number of unmarginalized parameters in the marginal likelihood. + //! + //! \note That any parameters over which we explicitly integrate to + //! compute a marginal likelihood don't need to be counted since we + //! are interested in the estimating the usual BIC approximation for + //! \f$int_{\theta}f(x|\theta, M)f(\theta|M)\d\theta\f$ + virtual double unmarginalizedParameters() const; + + //! Get a set of sample for the prior to use in adjust offset. + void adjustOffsetResamples(double minimumSample, TDouble1Vec& resamples, TDouble4Vec1Vec& resamplesWeights) const; + +protected: + //! \brief Defines a set of operations to adjust the offset parameter + //! of those priors with non-negative support. + class MATHS_EXPORT COffsetParameters { + public: + COffsetParameters(CPrior& prior); + virtual ~COffsetParameters() = default; + + //! Add a collection of samples. + void samples(const maths_t::TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights); + + //! Capture a collection of re-samples from the prior. + virtual void resample(double minimumSample); protected: - //! For priors with non-negative support this adjusts the offset used - //! to extend the support to handle negative samples by maximizing a - //! specified reward. - //! - //! \return The penalty to apply to the model in selection. - double adjustOffsetWithCost(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - COffsetCost &cost, - CApplyOffset &apply); - - //! Update the number of samples received to date by adding \p n. - void addSamples(double n); - - //! Get a debug description of the prior parameters. - virtual std::string debug() const; + CPrior& prior() const; + const maths_t::TWeightStyleVec& weightStyles() const; + const TDouble1Vec& samples() const; + const TDouble4Vec1Vec& weights() const; + const TDouble1Vec& resamples() const; + const TDouble4Vec1Vec& resamplesWeights() const; private: - //! If this is true then the prior is being used to model discrete - //! data. Note that this is not persisted and deduced from context. - maths_t::EDataType m_DataType; + CPrior* m_Prior; + const maths_t::TWeightStyleVec* m_WeightStyles; + const TDouble1Vec* m_Samples; + const TDouble4Vec1Vec* m_Weights; + TDouble1Vec m_Resamples; + TDouble4Vec1Vec m_ResamplesWeights; + }; + + //! \brief Computes the likelihood of a collection of samples and + //! resamples at different offsets. + //! + //! This is used to maximize the data likelihood w.r.t. the choice + //! of offset. + class MATHS_EXPORT COffsetCost : public COffsetParameters { + public: + using result_type = double; - //! The rate at which the prior returns to non-informative. Note that - //! this is not persisted. - CFloatStorage m_DecayRate; + public: + COffsetCost(CPrior& prior); - //! The number of samples with which the prior has been updated. - CFloatStorage m_NumberSamples; -}; + //! Compute the cost. + double operator()(double offset) const; + protected: + virtual void resetPriors(double offset) const; + virtual double computeCost(double offset) const; + }; + + //! \brief Apply a specified offset to a prior. + class MATHS_EXPORT CApplyOffset : public COffsetParameters { + public: + CApplyOffset(CPrior& prior); + + //! Apply the offset. + virtual void operator()(double offset) const; + }; + +protected: + //! The number of times we sample the prior when adjusting the offset. + static const std::size_t ADJUST_OFFSET_SAMPLE_SIZE; + +protected: + //! For priors with non-negative support this adjusts the offset used + //! to extend the support to handle negative samples by maximizing a + //! specified reward. + //! + //! \return The penalty to apply to the model in selection. + double adjustOffsetWithCost(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + COffsetCost& cost, + CApplyOffset& apply); + + //! Update the number of samples received to date by adding \p n. + void addSamples(double n); + + //! Get a debug description of the prior parameters. + virtual std::string debug() const; + +private: + //! If this is true then the prior is being used to model discrete + //! data. Note that this is not persisted and deduced from context. + maths_t::EDataType m_DataType; + + //! The rate at which the prior returns to non-informative. Note that + //! this is not persisted. + CFloatStorage m_DecayRate; + + //! The number of samples with which the prior has been updated. + CFloatStorage m_NumberSamples; +}; } } diff --git a/include/maths/CPriorDetail.h b/include/maths/CPriorDetail.h index d8cd144ced..212327ad1f 100644 --- a/include/maths/CPriorDetail.h +++ b/include/maths/CPriorDetail.h @@ -8,20 +8,13 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { template -bool CPrior::expectation(const F &f, - std::size_t numberIntervals, - T &result, - const TWeightStyleVec &weightStyles, - const TDouble4Vec &weight) const -{ - if (numberIntervals == 0) - { +bool CPrior::expectation(const F& f, std::size_t numberIntervals, T& result, const TWeightStyleVec& weightStyles, const TDouble4Vec& weight) + const { + if (numberIntervals == 0) { LOG_ERROR("Must specify non-zero number of intervals"); return false; } @@ -29,10 +22,7 @@ bool CPrior::expectation(const F &f, result = T(); double n{static_cast(numberIntervals)}; - TDoubleDoublePr interval{ - this->marginalLikelihoodConfidenceInterval(100.0 - 1.0 / (100.0 * n), - weightStyles, - weight)}; + TDoubleDoublePr interval{this->marginalLikelihoodConfidenceInterval(100.0 - 1.0 / (100.0 * n), weightStyles, weight)}; double x{interval.first}; double dx{(interval.second - interval.first) / n}; @@ -40,17 +30,12 @@ bool CPrior::expectation(const F &f, TDouble4Vec1Vec weights{weight}; CPrior::CLogMarginalLikelihood logLikelihood(*this, weightStyles, weights); CCompositeFunctions::CExp likelihood(logLikelihood); - for (std::size_t i = 0u; i < numberIntervals; ++i, x += dx) - { + for (std::size_t i = 0u; i < numberIntervals; ++i, x += dx) { T productIntegral; T fIntegral; double likelihoodIntegral; - if (!CIntegration::productGaussLegendre(f, likelihood, - x, x + dx, - productIntegral, - fIntegral, - likelihoodIntegral)) - { + if (!CIntegration::productGaussLegendre( + f, likelihood, x, x + dx, productIntegral, fIntegral, likelihoodIntegral)) { result = T(); return false; } diff --git a/include/maths/CPriorStateSerialiser.h b/include/maths/CPriorStateSerialiser.h index ff45d1834e..961946141a 100644 --- a/include/maths/CPriorStateSerialiser.h +++ b/include/maths/CPriorStateSerialiser.h @@ -14,15 +14,12 @@ #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace maths -{ +namespace maths { class CMultivariatePrior; class CPrior; struct SDistributionRestoreParams; @@ -39,37 +36,28 @@ struct SDistributionRestoreParams; //! name/value pairs where the value may be a nested set of name/value //! pairs. Text format makes it easier to provide backwards/forwards //! compatibility in the future as the classes evolve. -class MATHS_EXPORT CPriorStateSerialiser -{ - public: - using TPriorPtr = boost::shared_ptr; - using TMultivariatePriorPtr = boost::shared_ptr; - - public: - //! Construct the appropriate CPrior sub-class from its state - //! document representation. Sets \p ptr to NULL on failure. - bool operator()(const SDistributionRestoreParams ¶ms, - TPriorPtr &ptr, - core::CStateRestoreTraverser &traverser) const; - - //! Persist state by passing information to the supplied inserter - void operator()(const CPrior &prior, - core::CStatePersistInserter &inserter) const; - - //! Construct the appropriate CMultivariatePrior sub-class from - //! its state document representation. Sets \p ptr to NULL on - //! failure. - bool operator()(const SDistributionRestoreParams ¶ms, - TMultivariatePriorPtr &ptr, - core::CStateRestoreTraverser &traverser) const; - - //! Persist state by passing information to the supplied inserter - void operator()(const CMultivariatePrior &prior, - core::CStatePersistInserter &inserter) const; +class MATHS_EXPORT CPriorStateSerialiser { +public: + using TPriorPtr = boost::shared_ptr; + using TMultivariatePriorPtr = boost::shared_ptr; + +public: + //! Construct the appropriate CPrior sub-class from its state + //! document representation. Sets \p ptr to NULL on failure. + bool operator()(const SDistributionRestoreParams& params, TPriorPtr& ptr, core::CStateRestoreTraverser& traverser) const; + + //! Persist state by passing information to the supplied inserter + void operator()(const CPrior& prior, core::CStatePersistInserter& inserter) const; + + //! Construct the appropriate CMultivariatePrior sub-class from + //! its state document representation. Sets \p ptr to NULL on + //! failure. + bool operator()(const SDistributionRestoreParams& params, TMultivariatePriorPtr& ptr, core::CStateRestoreTraverser& traverser) const; + + //! Persist state by passing information to the supplied inserter + void operator()(const CMultivariatePrior& prior, core::CStatePersistInserter& inserter) const; }; - } } #endif // INCLUDED_ml_maths_CPriorStateSerialiser_h - diff --git a/include/maths/CProbabilityCalibrator.h b/include/maths/CProbabilityCalibrator.h index 214f9a99aa..6b6122ba49 100644 --- a/include/maths/CProbabilityCalibrator.h +++ b/include/maths/CProbabilityCalibrator.h @@ -11,16 +11,12 @@ #include - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace maths -{ +namespace maths { class CQDigest; //! \brief Calibrates a collection of probabilities. @@ -30,57 +26,50 @@ class CQDigest; //! to the historical empirical distribution of probabilities, // i.e. we expect to see a probability of \f$p <= f\f$ approximately //! \f$f * n\f$ given \f$n\f$ historical probabilities. -class MATHS_EXPORT CProbabilityCalibrator -{ - public: - //! The type of calibration to perform: - //! -# Partial - only increase probabilities using the - //! historical fractions less the cutoff. Don't use - //! the fractions smaller than the cutoff instead - //! scale probabilities so the transform is continuous. - //! -# Full - perform a full calibration to historical - //! fractions. - enum EStyle - { - E_PartialCalibration = 0, - E_FullCalibration = 1 - }; - - public: - CProbabilityCalibrator(EStyle style, double cutoffProbability); - - //! \name Serialization - //@{ - //! Persist state by passing information to the supplied inserter - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - - //! Create from an XML node tree. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); - //@} - - //! Add \p probability. - void add(double probability); - - //! Calibrate \p probability to the historic empirical - //! distribution of probabilities. - double calibrate(double probability) const; - - private: - using TQDigestPtr = boost::shared_ptr; - - private: - //! The type of calibration to perform. - EStyle m_Style; - - //! The smallest probability where we enforce a match - //! with the historical fraction. - double m_CutoffProbability; - - //! A summary of the historical probability quantiles. - TQDigestPtr m_DiscreteProbabilityQuantiles; +class MATHS_EXPORT CProbabilityCalibrator { +public: + //! The type of calibration to perform: + //! -# Partial - only increase probabilities using the + //! historical fractions less the cutoff. Don't use + //! the fractions smaller than the cutoff instead + //! scale probabilities so the transform is continuous. + //! -# Full - perform a full calibration to historical + //! fractions. + enum EStyle { E_PartialCalibration = 0, E_FullCalibration = 1 }; + +public: + CProbabilityCalibrator(EStyle style, double cutoffProbability); + + //! \name Serialization + //@{ + //! Persist state by passing information to the supplied inserter + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Create from an XML node tree. + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); + //@} + + //! Add \p probability. + void add(double probability); + + //! Calibrate \p probability to the historic empirical + //! distribution of probabilities. + double calibrate(double probability) const; + +private: + using TQDigestPtr = boost::shared_ptr; + +private: + //! The type of calibration to perform. + EStyle m_Style; + + //! The smallest probability where we enforce a match + //! with the historical fraction. + double m_CutoffProbability; + + //! A summary of the historical probability quantiles. + TQDigestPtr m_DiscreteProbabilityQuantiles; }; - - } } diff --git a/include/maths/CQDigest.h b/include/maths/CQDigest.h index 751709a9dc..bbebb6a019 100644 --- a/include/maths/CQDigest.h +++ b/include/maths/CQDigest.h @@ -18,16 +18,12 @@ #include - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace maths -{ +namespace maths { //! \brief This class implements the q-digest quantile approximation. //! @@ -79,362 +75,346 @@ namespace maths //! This uses the fact the maximum length of the q-digest is \f$3k\f$ //! to ensure constant complexity of all operations at various points //! and to reserve sufficient memory up front for our node allocator. -class MATHS_EXPORT CQDigest : private core::CNonCopyable -{ - public: - using TUInt32UInt64Pr = std::pair; - using TUInt32UInt64PrVec = std::vector; - +class MATHS_EXPORT CQDigest : private core::CNonCopyable { +public: + using TUInt32UInt64Pr = std::pair; + using TUInt32UInt64PrVec = std::vector; + +public: + //! \name XML Tag Names + //! + //! These tag the member variables for persistence. + //@{ + static const std::string K_TAG; + static const std::string N_TAG; + static const std::string NODE_TAG; + //@} + +public: + CQDigest(uint64_t k, double decayRate = 0.0); + + //! \name Serialization + //@{ + //! Persist state by passing information to the supplied inserter + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Create from an XML node tree. + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); + //@} + + //! Add \p n values \p value to the q-digest. + void add(uint32_t value, uint64_t n = 1ull); + + //! Merge this and \p digest. + void merge(const CQDigest& digest); + + //! Lose information from the digest. This amounts to aging + //! the counts held by each node and reducing the total count + //! in the digest. + void propagateForwardsByTime(double time); + + //! Scale the quantiles by the specified factor. To be used + //! after upgrades if different versions of the product produce + //! different raw anomaly scores. + bool scale(double factor); + + //! Reset the quantiles to the state before any values were added. + void clear(); + + //! Compute the quantile \p q. + //! + //! \param[in] q The quantile should be in the range [0, 1] + //! and represents the fraction of values less than the + //! quantile value required. + //! \param[out] result Filled in with the quantile if the + //! digest isn't empty. + //! \return True if the quantile could be computed and + //! false otherwise. + bool quantile(double q, uint32_t& result) const; + + //! Find the largest value x such that upper bound of the + //! c.d.f. is less than \p f, i.e. \f$\sup_y{\{y:F(y); + using TSizeVecCItr = TSizeVec::const_iterator; + using TSizeGreater = std::greater; + + class CNode; + class CNodeAllocator; + + using TNodePtrVec = std::vector; + using TNodePtrVecItr = TNodePtrVec::iterator; + using TNodePtrVecCItr = TNodePtrVec::const_iterator; + using TNodePtrVecCRItr = TNodePtrVec::const_reverse_iterator; + + //! Orders node pointers by level order. + struct MATHS_EXPORT SLevelLess { + bool operator()(const CNode* lhs, const CNode* rhs) const; + }; + + //! Order node pointers by post order in completed tree. + struct MATHS_EXPORT SPostLess { + bool operator()(const CNode* lhs, const CNode* rhs) const; + }; + + //! Represents a node of the q-digest with convenience + //! operations for compression. + class MATHS_EXPORT CNode { public: //! \name XML Tag Names //! //! These tag the member variables for persistence. //@{ - static const std::string K_TAG; - static const std::string N_TAG; - static const std::string NODE_TAG; + static const std::string MIN_TAG; + static const std::string MAX_TAG; + static const std::string COUNT_TAG; //@} public: - CQDigest(uint64_t k, double decayRate = 0.0); + CNode(); + CNode(uint32_t min, uint32_t max, uint64_t count, uint64_t subtreeCount); - //! \name Serialization - //@{ - //! Persist state by passing information to the supplied inserter - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; + //! Get the size of the q-digest rooted at this node. + std::size_t size() const; - //! Create from an XML node tree. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); - //@} - - //! Add \p n values \p value to the q-digest. - void add(uint32_t value, uint64_t n = 1ull); + //! Get the approximate quantile \p n. + uint32_t quantile(uint64_t leftCount, uint64_t n) const; - //! Merge this and \p digest. - void merge(const CQDigest &digest); + //! Get the largest value of x for which the upper count + //! i.e. count of values definitely to the right of x, is + //! less than \p n. + bool quantileSublevelSetSupremum(uint64_t n, uint64_t leftCount, uint32_t& result) const; - //! Lose information from the digest. This amounts to aging - //! the counts held by each node and reducing the total count - //! in the digest. - void propagateForwardsByTime(double time); + //! Get the lower bound for the c.d.f. at \p x. + void cdfLowerBound(uint32_t x, uint64_t& result) const; - //! Scale the quantiles by the specified factor. To be used - //! after upgrades if different versions of the product produce - //! different raw anomaly scores. - bool scale(double factor); + //! Get the upper bound for the c.d.f. at \p x. + void cdfUpperBound(uint32_t x, uint64_t& result) const; - //! Reset the quantiles to the state before any values were added. - void clear(); + //! Get the maximum knot point less than \p x. + void sublevelSetSupremum(const int64_t x, uint32_t& result) const; - //! Compute the quantile \p q. - //! - //! \param[in] q The quantile should be in the range [0, 1] - //! and represents the fraction of values less than the - //! quantile value required. - //! \param[out] result Filled in with the quantile if the - //! digest isn't empty. - //! \return True if the quantile could be computed and - //! false otherwise. - bool quantile(double q, uint32_t &result) const; - - //! Find the largest value x such that upper bound of the - //! c.d.f. is less than \p f, i.e. \f$\sup_y{\{y:F(y); - using TSizeVecCItr = TSizeVec::const_iterator; - using TSizeGreater = std::greater; - - class CNode; - class CNodeAllocator; - - using TNodePtrVec = std::vector; - using TNodePtrVecItr = TNodePtrVec::iterator; - using TNodePtrVecCItr = TNodePtrVec::const_iterator; - using TNodePtrVecCRItr = TNodePtrVec::const_reverse_iterator; - - //! Orders node pointers by level order. - struct MATHS_EXPORT SLevelLess - { - bool operator()(const CNode *lhs, const CNode *rhs) const; - }; - - //! Order node pointers by post order in completed tree. - struct MATHS_EXPORT SPostLess - { - bool operator()(const CNode *lhs, const CNode *rhs) const; - }; - - //! Represents a node of the q-digest with convenience - //! operations for compression. - class MATHS_EXPORT CNode - { - public: - //! \name XML Tag Names - //! - //! These tag the member variables for persistence. - //@{ - static const std::string MIN_TAG; - static const std::string MAX_TAG; - static const std::string COUNT_TAG; - //@} - - public: - CNode(); - CNode(uint32_t min, - uint32_t max, - uint64_t count, - uint64_t subtreeCount); - - //! Get the size of the q-digest rooted at this node. - std::size_t size() const; - - //! Get the approximate quantile \p n. - uint32_t quantile(uint64_t leftCount, - uint64_t n) const; - - //! Get the largest value of x for which the upper count - //! i.e. count of values definitely to the right of x, is - //! less than \p n. - bool quantileSublevelSetSupremum(uint64_t n, - uint64_t leftCount, - uint32_t &result) const; - - //! Get the lower bound for the c.d.f. at \p x. - void cdfLowerBound(uint32_t x, uint64_t &result) const; - - //! Get the upper bound for the c.d.f. at \p x. - void cdfUpperBound(uint32_t x, uint64_t &result) const; - - //! Get the maximum knot point less than \p x. - void sublevelSetSupremum(const int64_t x, uint32_t &result) const; - - //! Get the minimum knot point greater than \p x. - void superlevelSetInfimum(uint32_t x, uint32_t &result) const; - - //! Fill in \p nodes with q-digest nodes in post-order. - void postOrder(TNodePtrVec &nodes) const; - - //! Expand the node to fit \p value. - CNode *expand(CNodeAllocator &allocator, const uint32_t &value); - - //! Insert the specified node at its lowest ancestor - //! in the q-digest. - CNode &insert(CNodeAllocator &allocator, const CNode &node); - - //! Compress the digest at the triple comprising this node, - //! its sibling and parent in the complete tree if they are - //! in the q-digest. - CNode *compress(CNodeAllocator &allocator, - uint64_t compressionFactor); - - //! Age the counts by the specified factor. - uint64_t age(double factor); - - //! Get the span of universe values covered by the node. - uint32_t span() const; - //! Get the minimum value covered by the node. - uint32_t min() const; - //! Get the maximum value covered by the node. - uint32_t max() const; - //! Get the count of entries in the node range. - const uint64_t &count() const; - //! Get the count in the subtree rooted at this node. - const uint64_t &subtreeCount() const; - - //! Persist this node and descendents - void persistRecursive(const std::string &nodeTag, - core::CStatePersistInserter &inserter) const; - - //! Create from an XML node tree. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); - - //! Check the node invariants in the q-digest rooted at this node. - bool checkInvariants(uint64_t compressionFactor) const; - - //! Print for debug. - std::string print() const; - - private: - //! Persist state by passing information to the supplied - //! inserter - this should only be called by persistRecursive() - //! to ensure the whole tree gets persisted - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - - //! Test for equality. - bool operator==(const CNode &node) const; - - //! Get the index of the immediate ancestor in the q-digest. - CNode *ancestor() const; - //! Get the number of descendants. - std::size_t numberDescendants() const; - //! Get an iterator over the descendants. - TNodePtrVecCItr beginDescendants() const; - //! Get the end of the descendants. - TNodePtrVecCItr endDescendants() const; - //! Get the sibling of \p node if it exists in the q-digest. - CNode *sibling(const CNode &node) const; - - //! Is this a sibling of \p node? - bool isSibling(const CNode &node) const; - //! Is this a parent of \p node? - bool isParent(const CNode &node) const; - //! Is this an ancestor of \p node in the complete tree. - bool isAncestor(const CNode &node) const; - //! Is this node the root. - bool isRoot() const; - //! Is this node a leaf. - bool isLeaf() const; - //! Is this the left child of a node in the complete tree. - bool isLeftChild() const; - - //! Detach this node from the q-digest. - void detach(CNodeAllocator &allocator); - //! Remove \p node from the descendants. - void removeDescendant(CNode &node); - //! Take the descendants of \p node. - bool takeDescendants(CNode &node); - - private: - //! The immediate ancestor of this node in the q-digest. - CNode *m_Ancestor; - - //! The immediate descendants of this node in the q-digest. - TNodePtrVec m_Descendants; - - //! The minimum value covered by the node. - uint32_t m_Min; - - //! The maximum value covered by the node. - uint32_t m_Max; - - //! The count of the node. - uint64_t m_Count; - - //! The count in the subtree root at this node. - uint64_t m_SubtreeCount; - }; - - //! Manages the creation and recycling of nodes. - class MATHS_EXPORT CNodeAllocator - { - public: - CNodeAllocator(std::size_t size); - - //! Create a new node. - CNode &create(const CNode &node); - - //! Recycle \p node. - void release(CNode &node); - - private: - using TNodePtrVecVec = std::vector; - using TNodeVec = std::vector; - using TNodeVecCItr = std::vector::const_iterator; - using TNodeVecList = std::list; - using TNodeVecListItr = TNodeVecList::iterator; - using TNodeVecListCItr = TNodeVecList::const_iterator; - - private: - //! Find the block to which \p node belongs. - std::size_t findBlock(const CNode &node) const; - - private: - TNodeVecList m_Nodes; - TNodePtrVecVec m_FreeNodes; - }; + //! Persist state by passing information to the supplied + //! inserter - this should only be called by persistRecursive() + //! to ensure the whole tree gets persisted + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Test for equality. + bool operator==(const CNode& node) const; + + //! Get the index of the immediate ancestor in the q-digest. + CNode* ancestor() const; + //! Get the number of descendants. + std::size_t numberDescendants() const; + //! Get an iterator over the descendants. + TNodePtrVecCItr beginDescendants() const; + //! Get the end of the descendants. + TNodePtrVecCItr endDescendants() const; + //! Get the sibling of \p node if it exists in the q-digest. + CNode* sibling(const CNode& node) const; + + //! Is this a sibling of \p node? + bool isSibling(const CNode& node) const; + //! Is this a parent of \p node? + bool isParent(const CNode& node) const; + //! Is this an ancestor of \p node in the complete tree. + bool isAncestor(const CNode& node) const; + //! Is this node the root. + bool isRoot() const; + //! Is this node a leaf. + bool isLeaf() const; + //! Is this the left child of a node in the complete tree. + bool isLeftChild() const; + + //! Detach this node from the q-digest. + void detach(CNodeAllocator& allocator); + //! Remove \p node from the descendants. + void removeDescendant(CNode& node); + //! Take the descendants of \p node. + bool takeDescendants(CNode& node); + + private: + //! The immediate ancestor of this node in the q-digest. + CNode* m_Ancestor; + + //! The immediate descendants of this node in the q-digest. + TNodePtrVec m_Descendants; + + //! The minimum value covered by the node. + uint32_t m_Min; + + //! The maximum value covered by the node. + uint32_t m_Max; + + //! The count of the node. + uint64_t m_Count; + + //! The count in the subtree root at this node. + uint64_t m_SubtreeCount; + }; + + //! Manages the creation and recycling of nodes. + class MATHS_EXPORT CNodeAllocator { + public: + CNodeAllocator(std::size_t size); + + //! Create a new node. + CNode& create(const CNode& node); + + //! Recycle \p node. + void release(CNode& node); private: - //! Compress the q-digest bottom up in level order. - void compress(); + using TNodePtrVecVec = std::vector; + using TNodeVec = std::vector; + using TNodeVecCItr = std::vector::const_iterator; + using TNodeVecList = std::list; + using TNodeVecListItr = TNodeVecList::iterator; + using TNodeVecListCItr = TNodeVecList::const_iterator; - //! Starting at the lowest nodes in \p compress in level order - //! compress all q-digest paths bottom up in level order to the - //! root. - bool compress(TNodePtrVec &compress); + private: + //! Find the block to which \p node belongs. + std::size_t findBlock(const CNode& node) const; private: - //! Controls the maximum number of values stored. In particular, - //! the number of nodes is less than \f$3k\f$. - uint64_t m_K; - //! The number of values added to the q-digest. - uint64_t m_N; - //! The root node. - CNode *m_Root; - //! The node allocator. - CNodeAllocator m_NodeAllocator; - //! The rate at which information is lost by the digest. - double m_DecayRate; + TNodeVecList m_Nodes; + TNodePtrVecVec m_FreeNodes; + }; + +private: + //! Compress the q-digest bottom up in level order. + void compress(); + + //! Starting at the lowest nodes in \p compress in level order + //! compress all q-digest paths bottom up in level order to the + //! root. + bool compress(TNodePtrVec& compress); + +private: + //! Controls the maximum number of values stored. In particular, + //! the number of nodes is less than \f$3k\f$. + uint64_t m_K; + //! The number of values added to the q-digest. + uint64_t m_N; + //! The root node. + CNode* m_Root; + //! The node allocator. + CNodeAllocator m_NodeAllocator; + //! The rate at which information is lost by the digest. + double m_DecayRate; }; - } } diff --git a/include/maths/CQuantileSketch.h b/include/maths/CQuantileSketch.h index 2b31b029f7..aa7f0ca6ac 100644 --- a/include/maths/CQuantileSketch.h +++ b/include/maths/CQuantileSketch.h @@ -17,15 +17,12 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace maths -{ +namespace maths { //! \brief A sketch suitable for c.d.f. queries on a 1d double valued //! random variable. @@ -46,144 +43,124 @@ namespace maths //! which are available for the q-digest, so if you know the range of the //! variable up front, that is a safer choice for approximate quantile //! estimation. -class MATHS_EXPORT CQuantileSketch : private boost::addable< CQuantileSketch > -{ - public: - using TFloatFloatPr = std::pair; - using TFloatFloatPrVec = std::vector; +class MATHS_EXPORT CQuantileSketch : private boost::addable { +public: + using TFloatFloatPr = std::pair; + using TFloatFloatPrVec = std::vector; - //! The types of interpolation used for computing the quantile. - enum EInterpolation - { - E_Linear, - E_PiecewiseConstant - }; + //! The types of interpolation used for computing the quantile. + enum EInterpolation { E_Linear, E_PiecewiseConstant }; - public: - CQuantileSketch(EInterpolation interpolation, std::size_t size); +public: + CQuantileSketch(EInterpolation interpolation, std::size_t size); - //! Create reading state from \p traverser. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); + //! Create reading state from \p traverser. + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); - //! Convert to a node tree. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; + //! Convert to a node tree. + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; - //! Combine two sketches. - const CQuantileSketch &operator+=(const CQuantileSketch &rhs); + //! Combine two sketches. + const CQuantileSketch& operator+=(const CQuantileSketch& rhs); - //! Define a function operator for use with std:: algorithms. - inline void operator()(double x) - { - this->add(x); - } + //! Define a function operator for use with std:: algorithms. + inline void operator()(double x) { this->add(x); } - //! Add \p x to the sketch. - void add(double x, double n = 1.0); + //! Add \p x to the sketch. + void add(double x, double n = 1.0); - //! Age by scaling the counts. - void age(double factor); + //! Age by scaling the counts. + void age(double factor); - //! Get the c.d.f at \p x. - bool cdf(double x, double &result) const; + //! Get the c.d.f at \p x. + bool cdf(double x, double& result) const; - //! Get the minimum value added. - bool minimum(double &result) const; + //! Get the minimum value added. + bool minimum(double& result) const; - //! Get the maximum value added. - bool maximum(double &result) const; + //! Get the maximum value added. + bool maximum(double& result) const; - //! Get the quantile corresponding to \p percentage. - bool quantile(double percentage, double &result) const; + //! Get the quantile corresponding to \p percentage. + bool quantile(double percentage, double& result) const; - //! Get the knot values. - const TFloatFloatPrVec &knots() const; + //! Get the knot values. + const TFloatFloatPrVec& knots() const; - //! Get the total count of points added. - double count() const; + //! Get the total count of points added. + double count() const; - //! Get a checksum of this object. - uint64_t checksum(uint64_t seed = 0) const; + //! Get a checksum of this object. + uint64_t checksum(uint64_t seed = 0) const; - //! Debug the memory used by this object. - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + //! Debug the memory used by this object. + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - //! Get the memory used by this object. - std::size_t memoryUsage() const; + //! Get the memory used by this object. + std::size_t memoryUsage() const; - //! Check invariants. - bool checkInvariants() const; + //! Check invariants. + bool checkInvariants() const; - //! Print the sketch for debug. - std::string print() const; + //! Print the sketch for debug. + std::string print() const; - private: - //! Reduce to the maximum permitted size. - void reduce(); +private: + //! Reduce to the maximum permitted size. + void reduce(); - //! Sort and combine any co-located values. - void orderAndDeduplicate(); + //! Sort and combine any co-located values. + void orderAndDeduplicate(); - //! Get the target size for sketch post reduce. - std::size_t target() const; + //! Get the target size for sketch post reduce. + std::size_t target() const; - //! Compute the cost of combining \p vl and \p vr. - double cost(const TFloatFloatPr &vl, const TFloatFloatPr &vr) const; + //! Compute the cost of combining \p vl and \p vr. + double cost(const TFloatFloatPr& vl, const TFloatFloatPr& vr) const; - private: - //! The style of interpolation to use. - EInterpolation m_Interpolation; - //! The maximum permitted size for the sketch. - std::size_t m_MaxSize; - //! The number of unsorted values. - std::size_t m_Unsorted; - //! The values and counts used as knot points in a linear - //! interpolation of the c.d.f. - TFloatFloatPrVec m_Knots; - //! The total count of points in the sketch. - double m_Count; +private: + //! The style of interpolation to use. + EInterpolation m_Interpolation; + //! The maximum permitted size for the sketch. + std::size_t m_MaxSize; + //! The number of unsorted values. + std::size_t m_Unsorted; + //! The values and counts used as knot points in a linear + //! interpolation of the c.d.f. + TFloatFloatPrVec m_Knots; + //! The total count of points in the sketch. + double m_Count; }; //! \brief Template wrapper for fixed size sketches which can be //! default constructed. template -class CFixedQuantileSketch : public CQuantileSketch -{ - public: - CFixedQuantileSketch() : CQuantileSketch(INTERPOLATION, N) {} - - //! NB1: Needs to be redeclared to work with CChecksum. - //! NB2: This method is not currently virtual - needs changing if any of the - //! methods of this class ever do anything other than forward to the base class - uint64_t checksum(uint64_t seed = 0) const - { - return this->CQuantileSketch::checksum(seed); - } - - //! Debug the memory used by this object. - //! NB1: Needs to be redeclared to work with CMemoryDebug. - //! NB2: This method is not currently virtual - needs changing if any of the - //! methods of this class ever do anything other than forward to the base class - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const - { - this->CQuantileSketch::debugMemoryUsage(mem); - } - - //! Get the memory used by this object. - //! NB1: Needs to be redeclared to work with CMemory. - //! NB2: This method is not currently virtual - needs changing if any of the - //! methods of this class ever do anything other than forward to the base class - std::size_t memoryUsage() const - { - return this->CQuantileSketch::memoryUsage(); - } +class CFixedQuantileSketch : public CQuantileSketch { +public: + CFixedQuantileSketch() : CQuantileSketch(INTERPOLATION, N) {} + + //! NB1: Needs to be redeclared to work with CChecksum. + //! NB2: This method is not currently virtual - needs changing if any of the + //! methods of this class ever do anything other than forward to the base class + uint64_t checksum(uint64_t seed = 0) const { return this->CQuantileSketch::checksum(seed); } + + //! Debug the memory used by this object. + //! NB1: Needs to be redeclared to work with CMemoryDebug. + //! NB2: This method is not currently virtual - needs changing if any of the + //! methods of this class ever do anything other than forward to the base class + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { this->CQuantileSketch::debugMemoryUsage(mem); } + + //! Get the memory used by this object. + //! NB1: Needs to be redeclared to work with CMemory. + //! NB2: This method is not currently virtual - needs changing if any of the + //! methods of this class ever do anything other than forward to the base class + std::size_t memoryUsage() const { return this->CQuantileSketch::memoryUsage(); } }; //! Write to stream using print member. -inline std::ostream &operator<<(std::ostream &o, const CQuantileSketch &qs) -{ +inline std::ostream& operator<<(std::ostream& o, const CQuantileSketch& qs) { return o << qs.print(); } - } } diff --git a/include/maths/CRadialBasisFunction.h b/include/maths/CRadialBasisFunction.h index 040771f83c..27a796c324 100644 --- a/include/maths/CRadialBasisFunction.h +++ b/include/maths/CRadialBasisFunction.h @@ -9,10 +9,8 @@ #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { //! \brief Common interface implemented by all our radial basis //! functions. @@ -26,76 +24,58 @@ namespace maths //! The radial basis function hierarchy is stateless to keep the //! representation as compact as possible. (All functions are //! supplied the centre.) -class MATHS_EXPORT CRadialBasisFunction -{ - public: - virtual ~CRadialBasisFunction(); - - //! Create a copy of this object. - //! - //! \warning The caller owns this copy. - virtual CRadialBasisFunction *clone() const = 0; - - //! \brief Evaluate the basis function with centre \p centre - //! at the point \p x. - virtual double value(double x, - double centre, - double scale = 1.0) const = 0; - - //! \brief Evaluate the derivative of the basis function w.r.t. - //! its argument x, with centre \p centre at the point - //! \p x. - virtual double derivative(double x, - double centre, - double scale = 1.0) const = 0; - - //! \brief Solves for the scale that gives the \p value at a - //! distance \p distance from the centre of the radial basis - //! function, i.e. the value \f$\epsilon^*\f$ s.t. - //!
-        //!   \f$\displaystyle \phi_{\epsilon^*}(\left \|d - c \right \|) = v\f$
-        //! 
- //! - //! \note That \p value must be in the range (0, 1). - virtual bool scale(double distance, - double value, - double &result) const = 0; - - //! \brief Get the mean value of this function on the interval - //! [\p a, \p b], i.e. the result of: - //!
-        //!   \f$\displaystyle \frac{1}{b - a}\int_{[a,b]}{\phi_{\epsilon}(\left \|u - c \right \|)}du\f$
-        //! 
- //! - //! \note \p b should be greater than or equal to \p a. - virtual double mean(double a, - double b, - double centre, - double scale = 1.0) const = 0; - - //! \brief Get the mean square derivative of the basis function - //! on the interval [\p a, \p b], i.e. the result of: - //!
-        //!  \f$\displaystyle \frac{1}{b - a}\int_{[a,b]}{\phi_{\epsilon}'(\left \|u - c \right \|)^2}du\f$
-        //! 
- //! - //! \note \p b should be greater than or equal to \p a. - virtual double meanSquareDerivative(double a, - double b, - double centre, - double scale = 1.0) const = 0; - - //! \brief Get the integral of the product of two basis functions - //! on the interval \f$[a,b]\f$, i.e. - //!
-        //!   \f$\displaystyle \frac{1}{b - a} \int_a^b{\phi_{\epsilon}(\left \|u - c_1 \right \|)\phi_{\epsilon}(\left \|u - c_2 \right \|)}du\f$
-        //! 
- virtual double product(double a, - double b, - double centre1, - double centre2, - double scale1 = 1.0, - double scale2 = 1.0) const = 0; +class MATHS_EXPORT CRadialBasisFunction { +public: + virtual ~CRadialBasisFunction(); + + //! Create a copy of this object. + //! + //! \warning The caller owns this copy. + virtual CRadialBasisFunction* clone() const = 0; + + //! \brief Evaluate the basis function with centre \p centre + //! at the point \p x. + virtual double value(double x, double centre, double scale = 1.0) const = 0; + + //! \brief Evaluate the derivative of the basis function w.r.t. + //! its argument x, with centre \p centre at the point + //! \p x. + virtual double derivative(double x, double centre, double scale = 1.0) const = 0; + + //! \brief Solves for the scale that gives the \p value at a + //! distance \p distance from the centre of the radial basis + //! function, i.e. the value \f$\epsilon^*\f$ s.t. + //!
+    //!   \f$\displaystyle \phi_{\epsilon^*}(\left \|d - c \right \|) = v\f$
+    //! 
+ //! + //! \note That \p value must be in the range (0, 1). + virtual bool scale(double distance, double value, double& result) const = 0; + + //! \brief Get the mean value of this function on the interval + //! [\p a, \p b], i.e. the result of: + //!
+    //!   \f$\displaystyle \frac{1}{b - a}\int_{[a,b]}{\phi_{\epsilon}(\left \|u - c \right \|)}du\f$
+    //! 
+ //! + //! \note \p b should be greater than or equal to \p a. + virtual double mean(double a, double b, double centre, double scale = 1.0) const = 0; + + //! \brief Get the mean square derivative of the basis function + //! on the interval [\p a, \p b], i.e. the result of: + //!
+    //!  \f$\displaystyle \frac{1}{b - a}\int_{[a,b]}{\phi_{\epsilon}'(\left \|u - c \right \|)^2}du\f$
+    //! 
+ //! + //! \note \p b should be greater than or equal to \p a. + virtual double meanSquareDerivative(double a, double b, double centre, double scale = 1.0) const = 0; + + //! \brief Get the integral of the product of two basis functions + //! on the interval \f$[a,b]\f$, i.e. + //!
+    //!   \f$\displaystyle \frac{1}{b - a} \int_a^b{\phi_{\epsilon}(\left \|u - c_1 \right \|)\phi_{\epsilon}(\left \|u - c_2 \right \|)}du\f$
+    //! 
+ virtual double product(double a, double b, double centre1, double centre2, double scale1 = 1.0, double scale2 = 1.0) const = 0; }; //! \brief The Gaussian radial basis function. @@ -108,56 +88,38 @@ class MATHS_EXPORT CRadialBasisFunction //! //! Here, \f$\epsilon\f$ denotes the scale and \f$c\f$ the centre //! of the basis function. -class MATHS_EXPORT CGaussianBasisFunction : public CRadialBasisFunction -{ - public: - //! Create a copy of this object. - //! - //! \warning The caller owns this copy. - virtual CGaussianBasisFunction *clone() const; - - //! \brief Evaluate the basis function with centre \p centre - //! at the point \p x. - virtual double value(double x, - double centre, - double scale = 1.0) const; - - //! \brief Evaluate the derivative of the basis function w.r.t. - //! its argument x, with centre \p centre at the point - //! \p x. - virtual double derivative(double x, - double centre, - double scale = 1.0) const; - - //! \brief Solves for the scale that gives the \p value at a - //! distance \p distance from the centre of the radial basis - //! function. - virtual bool scale(double distance, - double value, - double &result) const; - - //! \brief Get the mean value of this function on the specified - //! interval [\p a, \p b]. - virtual double mean(double a, - double b, - double centre, - double scale = 1.0) const; - - //! \brief Get the mean square derivative of the basis function - //! on the interval [\p a, \p b], i.e. the result of: - virtual double meanSquareDerivative(double a, - double b, - double centre, - double scale = 1.0) const; - - //! \brief Get the integral of the product of two basis functions - //! on the interval [\p a, \p b]. - virtual double product(double a, - double b, - double centre1, - double centre2, - double scale1 = 1.0, - double scale2 = 1.0) const; +class MATHS_EXPORT CGaussianBasisFunction : public CRadialBasisFunction { +public: + //! Create a copy of this object. + //! + //! \warning The caller owns this copy. + virtual CGaussianBasisFunction* clone() const; + + //! \brief Evaluate the basis function with centre \p centre + //! at the point \p x. + virtual double value(double x, double centre, double scale = 1.0) const; + + //! \brief Evaluate the derivative of the basis function w.r.t. + //! its argument x, with centre \p centre at the point + //! \p x. + virtual double derivative(double x, double centre, double scale = 1.0) const; + + //! \brief Solves for the scale that gives the \p value at a + //! distance \p distance from the centre of the radial basis + //! function. + virtual bool scale(double distance, double value, double& result) const; + + //! \brief Get the mean value of this function on the specified + //! interval [\p a, \p b]. + virtual double mean(double a, double b, double centre, double scale = 1.0) const; + + //! \brief Get the mean square derivative of the basis function + //! on the interval [\p a, \p b], i.e. the result of: + virtual double meanSquareDerivative(double a, double b, double centre, double scale = 1.0) const; + + //! \brief Get the integral of the product of two basis functions + //! on the interval [\p a, \p b]. + virtual double product(double a, double b, double centre1, double centre2, double scale1 = 1.0, double scale2 = 1.0) const; }; //! \brief The inverse quadratic radial basis function. @@ -170,58 +132,39 @@ class MATHS_EXPORT CGaussianBasisFunction : public CRadialBasisFunction //! //! Here, \f$\epsilon\f$ denotes the scale and \f$c\f$ the centre //! of the basis function. -class MATHS_EXPORT CInverseQuadraticBasisFunction : public CRadialBasisFunction -{ - public: - //! Create a copy of this object. - //! - //! \warning The caller owns this copy. - CInverseQuadraticBasisFunction *clone() const; - - //! \brief Evaluate the basis function with centre \p centre - //! at the point \p x. - virtual double value(double x, - double centre, - double scale = 1.0) const; - - //! \brief Evaluate the derivative of the basis function w.r.t. - //! its argument x, with centre \p centre at the point - //! \p x. - virtual double derivative(double x, - double centre, - double scale = 1.0) const; - - //! \brief Solves for the scale that gives the \p value at a - //! distance \p distance from the centre of the radial basis - //! function. - virtual bool scale(double distance, - double value, - double &result) const; - - //! \brief Get the mean value of this function on the specified - //! interval [\p a, \p b]. - virtual double mean(double a, - double b, - double centre, - double scale = 1.0) const; - - //! \brief Get the mean square derivative of the basis function - //! on the interval [\p a, \p b], i.e. the result of: - virtual double meanSquareDerivative(double a, - double b, - double centre, - double scale = 1.0) const; - - //! \brief Get the integral of the product of two basis functions - //! on the interval [\p a, \p b]. - virtual double product(double a, - double b, - double centre1, - double centre2, - double scale1 = 1.0, - double scale2 = 1.0) const; +class MATHS_EXPORT CInverseQuadraticBasisFunction : public CRadialBasisFunction { +public: + //! Create a copy of this object. + //! + //! \warning The caller owns this copy. + CInverseQuadraticBasisFunction* clone() const; + + //! \brief Evaluate the basis function with centre \p centre + //! at the point \p x. + virtual double value(double x, double centre, double scale = 1.0) const; + + //! \brief Evaluate the derivative of the basis function w.r.t. + //! its argument x, with centre \p centre at the point + //! \p x. + virtual double derivative(double x, double centre, double scale = 1.0) const; + + //! \brief Solves for the scale that gives the \p value at a + //! distance \p distance from the centre of the radial basis + //! function. + virtual bool scale(double distance, double value, double& result) const; + + //! \brief Get the mean value of this function on the specified + //! interval [\p a, \p b]. + virtual double mean(double a, double b, double centre, double scale = 1.0) const; + + //! \brief Get the mean square derivative of the basis function + //! on the interval [\p a, \p b], i.e. the result of: + virtual double meanSquareDerivative(double a, double b, double centre, double scale = 1.0) const; + + //! \brief Get the integral of the product of two basis functions + //! on the interval [\p a, \p b]. + virtual double product(double a, double b, double centre1, double centre2, double scale1 = 1.0, double scale2 = 1.0) const; }; - } } diff --git a/include/maths/CRandomProjectionClusterer.h b/include/maths/CRandomProjectionClusterer.h index 1a4177480e..501f40eea1 100644 --- a/include/maths/CRandomProjectionClusterer.h +++ b/include/maths/CRandomProjectionClusterer.h @@ -28,10 +28,8 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { //! \brief Common functionality for random projection clustering. //! @@ -58,661 +56,558 @@ namespace maths //! //! For more details see http://people.ee.duke.edu/~lcarin/random-projection-for-high.pdf template -class CRandomProjectionClusterer -{ - public: - using TDoubleVec = std::vector; - using TSizeVec = std::vector; - - public: - virtual ~CRandomProjectionClusterer() = default; - - //! Set up the projections. - virtual bool initialise(std::size_t numberProjections, - std::size_t dimension) - { - m_Dimension = dimension; - if (!this->generateProjections(numberProjections)) - { - LOG_ERROR("Failed to generate projections"); - return false; - } - return true; +class CRandomProjectionClusterer { +public: + using TDoubleVec = std::vector; + using TSizeVec = std::vector; + +public: + virtual ~CRandomProjectionClusterer() = default; + + //! Set up the projections. + virtual bool initialise(std::size_t numberProjections, std::size_t dimension) { + m_Dimension = dimension; + if (!this->generateProjections(numberProjections)) { + LOG_ERROR("Failed to generate projections"); + return false; } + return true; + } - protected: - using TVector = CVector; - using TVectorVec = std::vector; - using TVectorArray = boost::array; - using TVectorArrayVec = std::vector; - - protected: - //! Get the random number generator. - CPRNG::CXorShift1024Mult &rng() const - { - return m_Rng; - } +protected: + using TVector = CVector; + using TVectorVec = std::vector; + using TVectorArray = boost::array; + using TVectorArrayVec = std::vector; - //! Get the projections. - const TVectorArrayVec &projections() const - { - return m_Projections; - } +protected: + //! Get the random number generator. + CPRNG::CXorShift1024Mult& rng() const { return m_Rng; } - //! Generate \p b random projections. - bool generateProjections(std::size_t b) - { - m_Projections.clear(); + //! Get the projections. + const TVectorArrayVec& projections() const { return m_Projections; } - if (b == 0) - { - return true; - } + //! Generate \p b random projections. + bool generateProjections(std::size_t b) { + m_Projections.clear(); - if (m_Dimension <= N) - { - m_Projections.resize(1); - TVectorArray &projection = m_Projections[0]; - for (std::size_t i = 0u; i < N; ++i) - { - projection[i].extend(m_Dimension, 0.0); - if (i < m_Dimension) - { - projection[i](i) = 1.0; - } - } - return true; - } - - m_Projections.resize(b); - - TDoubleVec components; - CSampling::normalSample(m_Rng, 0.0, 1.0, b * N * m_Dimension, components); - for (std::size_t i = 0u; i < b; ++i) - { - TVectorArray &projection = m_Projections[i]; - for (std::size_t j = 0u; j < N; ++j) - { - projection[j].assign(&components[(i * N + j ) * m_Dimension], - &components[(i * N + j + 1) * m_Dimension]); - } + if (b == 0) { + return true; + } - if (!CGramSchmidt::basis(projection)) - { - LOG_ERROR("Failed to construct basis"); - return false; + if (m_Dimension <= N) { + m_Projections.resize(1); + TVectorArray& projection = m_Projections[0]; + for (std::size_t i = 0u; i < N; ++i) { + projection[i].extend(m_Dimension, 0.0); + if (i < m_Dimension) { + projection[i](i) = 1.0; } } - return true; } - //! Extend the projections for an increase in data - //! dimension to \p dimension. - bool extendProjections(std::size_t dimension) - { - using TDoubleVecArray = boost::array; + m_Projections.resize(b); - if (dimension <= m_Dimension) - { - return true; + TDoubleVec components; + CSampling::normalSample(m_Rng, 0.0, 1.0, b * N * m_Dimension, components); + for (std::size_t i = 0u; i < b; ++i) { + TVectorArray& projection = m_Projections[i]; + for (std::size_t j = 0u; j < N; ++j) { + projection[j].assign(&components[(i * N + j) * m_Dimension], &components[(i * N + j + 1) * m_Dimension]); } - else if (dimension <= N) - { - TVectorArray &projection = m_Projections[0]; - for (std::size_t i = m_Dimension; i < dimension; ++i) - { - projection[i](i) = 1.0; - } - return true; + + if (!CGramSchmidt::basis(projection)) { + LOG_ERROR("Failed to construct basis"); + return false; } + } - std::size_t b = m_Projections.size(); - std::size_t d = dimension - m_Dimension; - double alpha = static_cast(m_Dimension) - / static_cast(dimension); - double beta = 1.0 - alpha; - - TDoubleVecArray extension; - TDoubleVec components; - CSampling::normalSample(m_Rng, 0.0, 1.0, b * N * d, components); - for (std::size_t i = 0u; i < b; ++i) - { - for (std::size_t j = 0u; j < N; ++j) - { - extension[j].assign(&components[(i * N + j ) * d], - &components[(i * N + j + 1) * d]); - } + return true; + } - if (!CGramSchmidt::basis(extension)) - { - LOG_ERROR("Failed to construct basis"); - return false; - } + //! Extend the projections for an increase in data + //! dimension to \p dimension. + bool extendProjections(std::size_t dimension) { + using TDoubleVecArray = boost::array; - for (std::size_t j = 0u; j < N; ++j) - { - scale(extension[j], beta); - TVector &projection = m_Projections[i][j]; - projection *= alpha; - projection.reserve(dimension); - projection.extend(extension[j].begin(), extension[j].end()); - } + if (dimension <= m_Dimension) { + return true; + } else if (dimension <= N) { + TVectorArray& projection = m_Projections[0]; + for (std::size_t i = m_Dimension; i < dimension; ++i) { + projection[i](i) = 1.0; } - return true; } - private: - //! Scale the values in the vector \p x by \p scale. - void scale(TDoubleVec &x, double scale) - { - for (std::size_t i = 0u; i < x.size(); ++i) - { - x[i] *= scale; + std::size_t b = m_Projections.size(); + std::size_t d = dimension - m_Dimension; + double alpha = static_cast(m_Dimension) / static_cast(dimension); + double beta = 1.0 - alpha; + + TDoubleVecArray extension; + TDoubleVec components; + CSampling::normalSample(m_Rng, 0.0, 1.0, b * N * d, components); + for (std::size_t i = 0u; i < b; ++i) { + for (std::size_t j = 0u; j < N; ++j) { + extension[j].assign(&components[(i * N + j) * d], &components[(i * N + j + 1) * d]); + } + + if (!CGramSchmidt::basis(extension)) { + LOG_ERROR("Failed to construct basis"); + return false; } + + for (std::size_t j = 0u; j < N; ++j) { + scale(extension[j], beta); + TVector& projection = m_Projections[i][j]; + projection *= alpha; + projection.reserve(dimension); + projection.extend(extension[j].begin(), extension[j].end()); + } + } + + return true; + } + +private: + //! Scale the values in the vector \p x by \p scale. + void scale(TDoubleVec& x, double scale) { + for (std::size_t i = 0u; i < x.size(); ++i) { + x[i] *= scale; } + } - private: - //! The random number generator. - mutable CPRNG::CXorShift1024Mult m_Rng; +private: + //! The random number generator. + mutable CPRNG::CXorShift1024Mult m_Rng; - //! The dimension of the data to project. - std::size_t m_Dimension; + //! The dimension of the data to project. + std::size_t m_Dimension; - //! The projections. - TVectorArrayVec m_Projections; + //! The projections. + TVectorArrayVec m_Projections; }; //! \brief Implements random projection clustering for batches //! of data points. template -class CRandomProjectionClustererBatch : public CRandomProjectionClusterer -{ - public: - using TDoubleVec = typename CRandomProjectionClusterer::TDoubleVec; - using TSizeVec = typename CRandomProjectionClusterer::TSizeVec; - using TVector = typename CRandomProjectionClusterer::TVector; - using TVectorVec = typename CRandomProjectionClusterer::TVectorVec; - using TDoubleVecVec = std::vector; - using TSizeVecVec = std::vector; - using TSizeUSet = boost::unordered_set; - using TVectorNx1 = CVectorNx1; - using TEigenVectorNx1 = typename SDenseVector::Type; - using TVectorNx1Vec = std::vector; - using TVectorNx1VecVec = std::vector; - using TSymmetricMatrixNxN = CSymmetricMatrixNxN; - using TSvdNxN = Eigen::JacobiSVD::Type>; - using TSvdNxNVec = std::vector; - using TSvdNxNVecVec = std::vector; - using TMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; - using TMeanAccumulatorVec = std::vector; - using TMeanAccumulatorVecVec = std::vector; - - public: - CRandomProjectionClustererBatch(double compression) : - m_Compression(compression) - {} - - virtual ~CRandomProjectionClustererBatch() = default; - - //! Create the \p numberProjections random projections. - //! - //! \param[in] numberProjections The number of projections - //! to create. - //! \param[in] dimension The dimension of the space to project. - virtual bool initialise(std::size_t numberProjections, std::size_t dimension) - { - m_ProjectedData.resize(numberProjections); - return this->CRandomProjectionClusterer::initialise(numberProjections, dimension); +class CRandomProjectionClustererBatch : public CRandomProjectionClusterer { +public: + using TDoubleVec = typename CRandomProjectionClusterer::TDoubleVec; + using TSizeVec = typename CRandomProjectionClusterer::TSizeVec; + using TVector = typename CRandomProjectionClusterer::TVector; + using TVectorVec = typename CRandomProjectionClusterer::TVectorVec; + using TDoubleVecVec = std::vector; + using TSizeVecVec = std::vector; + using TSizeUSet = boost::unordered_set; + using TVectorNx1 = CVectorNx1; + using TEigenVectorNx1 = typename SDenseVector::Type; + using TVectorNx1Vec = std::vector; + using TVectorNx1VecVec = std::vector; + using TSymmetricMatrixNxN = CSymmetricMatrixNxN; + using TSvdNxN = Eigen::JacobiSVD::Type>; + using TSvdNxNVec = std::vector; + using TSvdNxNVecVec = std::vector; + using TMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; + using TMeanAccumulatorVec = std::vector; + using TMeanAccumulatorVecVec = std::vector; + +public: + CRandomProjectionClustererBatch(double compression) : m_Compression(compression) {} + + virtual ~CRandomProjectionClustererBatch() = default; + + //! Create the \p numberProjections random projections. + //! + //! \param[in] numberProjections The number of projections + //! to create. + //! \param[in] dimension The dimension of the space to project. + virtual bool initialise(std::size_t numberProjections, std::size_t dimension) { + m_ProjectedData.resize(numberProjections); + return this->CRandomProjectionClusterer::initialise(numberProjections, dimension); + } + + //! Reserve space for \p n data points. + void reserve(std::size_t n) { + for (std::size_t i = 0u; i < m_ProjectedData.size(); ++i) { + m_ProjectedData[i].reserve(n); } - - //! Reserve space for \p n data points. - void reserve(std::size_t n) - { - for (std::size_t i = 0u; i < m_ProjectedData.size(); ++i) - { - m_ProjectedData[i].reserve(n); + } + + //! Add projected data for \p x. + void add(const TVector& x) { + for (std::size_t i = 0u; i < this->projections().size(); ++i) { + TVectorNx1 px; + for (std::size_t j = 0u; j < N; ++j) { + px(j) = this->projections()[i][j].inner(x); } + m_ProjectedData[i].push_back(px); } - - //! Add projected data for \p x. - void add(const TVector &x) - { - for (std::size_t i = 0u; i < this->projections().size(); ++i) - { - TVectorNx1 px; - for (std::size_t j = 0u; j < N; ++j) - { - px(j) = this->projections()[i][j].inner(x); - } - m_ProjectedData[i].push_back(px); - } + } + + //! Compute the clusters. + //! + //! \param[in] clusterer The object responsible for clustering + //! the projected data points. + //! \param[in] result Filled in with the final agglomerative + //! clustering of the different projections. + template + void run(CLUSTERER clusterer, TSizeVecVec& result) const { + if (m_ProjectedData.empty()) { + return; } - //! Compute the clusters. - //! - //! \param[in] clusterer The object responsible for clustering - //! the projected data points. - //! \param[in] result Filled in with the final agglomerative - //! clustering of the different projections. - template - void run(CLUSTERER clusterer, TSizeVecVec &result) const - { - if (m_ProjectedData.empty()) - { - return; - } - - std::size_t b = m_ProjectedData.size(); - - // Filled in with the weights of the clusterings. - TDoubleVecVec W(b); - // Filled in with the sample means of the clusterings. - TVectorNx1VecVec M(b); - // Filled in with the SVDs of the sample covariances - // of the clusterings. - TSvdNxNVecVec C(b); - // Filled in with the sample points indices. - TSizeUSet I; - - // Compute the projected clusterings and sampling. - this->clusterProjections(clusterer, W, M, C, I); - - // Compute the sample neighbourhoods. - std::size_t h = I.size(); - TSizeVecVec H(h); - this->neighbourhoods(I, H); - - // Compute the cluster similarities. - TDoubleVecVec S(h); - this->similarities(W, M, C, H, S); - - // Run agglomerative clustering and choose number of clusters. - this->clusterNeighbourhoods(S, H, result); + std::size_t b = m_ProjectedData.size(); + + // Filled in with the weights of the clusterings. + TDoubleVecVec W(b); + // Filled in with the sample means of the clusterings. + TVectorNx1VecVec M(b); + // Filled in with the SVDs of the sample covariances + // of the clusterings. + TSvdNxNVecVec C(b); + // Filled in with the sample points indices. + TSizeUSet I; + + // Compute the projected clusterings and sampling. + this->clusterProjections(clusterer, W, M, C, I); + + // Compute the sample neighbourhoods. + std::size_t h = I.size(); + TSizeVecVec H(h); + this->neighbourhoods(I, H); + + // Compute the cluster similarities. + TDoubleVecVec S(h); + this->similarities(W, M, C, H, S); + + // Run agglomerative clustering and choose number of clusters. + this->clusterNeighbourhoods(S, H, result); + } + +protected: + //! \brief Hashes a vector. + struct SHashVector { + template + std::size_t operator()(const VECTOR& lhs) const { + return static_cast(boost::unwrap_ref(lhs).checksum()); } - - protected: - //! \brief Hashes a vector. - struct SHashVector - { - template - std::size_t operator()(const VECTOR &lhs) const - { - return static_cast(boost::unwrap_ref(lhs).checksum()); - } - }; - //! \brief Checks two vectors for equality. - struct SVectorsEqual - { - template - bool operator()(const VECTOR &lhs, const VECTOR &rhs) const - { - return boost::unwrap_ref(lhs) == boost::unwrap_ref(rhs); + }; + //! \brief Checks two vectors for equality. + struct SVectorsEqual { + template + bool operator()(const VECTOR& lhs, const VECTOR& rhs) const { + return boost::unwrap_ref(lhs) == boost::unwrap_ref(rhs); + } + }; + +protected: + //! Compute the projected clusterings and find a good sampling + //! of the points on which to perform agglomerative clustering. + //! + //! \param[in] clusterer The object responsible for clustering + //! the projected data points. + //! \param[out] W Filled in with the cluster weights. + //! \param[out] M Filled in with the cluster sample means. + //! \param[out] C Filled in with the SVD of cluster sample + //! covariance matrices. + //! \param[out] I Filled in with the indices of distinct sampled + //! points. + template + void clusterProjections(CLUSTERER clusterer, TDoubleVecVec& W, TVectorNx1VecVec& M, TSvdNxNVecVec& C, TSizeUSet& I) const { + using TVectorNx1CRef = boost::reference_wrapper; + using TVectorNx1CRefSizeUMap = boost::unordered_map; + using TClusterVec = typename CLUSTERER::TClusterVec; + using TSampleCovariancesNxN = CBasicStatistics::SSampleCovariances; + + std::size_t b = m_ProjectedData.size(); + std::size_t n = m_ProjectedData[0].size(); + + // An index lookup for some projected points. + TVectorNx1CRefSizeUMap lookup(n); + // A placeholder for copy of i'th projected data. + TVectorNx1Vec P; + // Filled in with the probabilities of sampling the points (i,j)'th + // cluster. + TDoubleVec pij; + // Filled in with a mapping from the candidates for sampling to the + // actual points in the (i,j)'th cluster. + TSizeVec fij; + // Filled in with the samples of the (i,j)'th cluster. + TSizeVec sij; + + for (std::size_t i = 0u; i < b; ++i) { + LOG_TRACE("projection " << i); + P = m_ProjectedData[i]; + + // Create a lookup of points to their indices. + lookup.clear(); + lookup.rehash(P.size()); + for (std::size_t j = 0u; j < m_ProjectedData[i].size(); ++j) { + lookup[boost::cref(m_ProjectedData[i][j])] = j; } - }; - - protected: - //! Compute the projected clusterings and find a good sampling - //! of the points on which to perform agglomerative clustering. - //! - //! \param[in] clusterer The object responsible for clustering - //! the projected data points. - //! \param[out] W Filled in with the cluster weights. - //! \param[out] M Filled in with the cluster sample means. - //! \param[out] C Filled in with the SVD of cluster sample - //! covariance matrices. - //! \param[out] I Filled in with the indices of distinct sampled - //! points. - template - void clusterProjections(CLUSTERER clusterer, - TDoubleVecVec &W, - TVectorNx1VecVec &M, - TSvdNxNVecVec &C, - TSizeUSet &I) const - { - using TVectorNx1CRef = boost::reference_wrapper; - using TVectorNx1CRefSizeUMap = - boost::unordered_map; - using TClusterVec = typename CLUSTERER::TClusterVec; - using TSampleCovariancesNxN = CBasicStatistics::SSampleCovariances; - - std::size_t b = m_ProjectedData.size(); - std::size_t n = m_ProjectedData[0].size(); - - // An index lookup for some projected points. - TVectorNx1CRefSizeUMap lookup(n); - // A placeholder for copy of i'th projected data. - TVectorNx1Vec P; - // Filled in with the probabilities of sampling the points (i,j)'th - // cluster. - TDoubleVec pij; - // Filled in with a mapping from the candidates for sampling to the - // actual points in the (i,j)'th cluster. - TSizeVec fij; - // Filled in with the samples of the (i,j)'th cluster. - TSizeVec sij; - - for (std::size_t i = 0u; i < b; ++i) - { - LOG_TRACE("projection " << i); - P = m_ProjectedData[i]; - - // Create a lookup of points to their indices. - lookup.clear(); - lookup.rehash(P.size()); - for (std::size_t j = 0u; j < m_ProjectedData[i].size(); ++j) - { - lookup[boost::cref(m_ProjectedData[i][j])] = j; + + // Cluster the i'th projection. + clusterer.setPoints(P); + clusterer.run(); + const TClusterVec& clusters = clusterer.clusters(); + double ni = static_cast(clusters.size()); + LOG_TRACE("# clusters = " << ni); + + for (std::size_t j = 0u; j < clusters.size(); ++j) { + const TVectorNx1Vec& points = clusters[j].points(); + LOG_TRACE("# points = " << points.size()); + + // Compute the number of points to sample from this cluster. + std::size_t nij = points.size(); + double wij = static_cast(nij) / static_cast(n); + std::size_t nsij = static_cast(std::max(m_Compression * wij * ni, 1.0)); + LOG_TRACE("wij = " << wij << ", nsij = " << nsij); + + // Compute the cluster sample mean and covariance matrix. + TSampleCovariancesNxN covariances; + covariances.add(points); + TVectorNx1 mij = CBasicStatistics::mean(covariances); + TSvdNxN Cij(toDenseMatrix(CBasicStatistics::covariances(covariances)), Eigen::ComputeFullU | Eigen::ComputeFullV); + + // Compute the probability that a sample from the cluster + // is a given point in the cluster. + pij.clear(); + fij.clear(); + pij.reserve(nij); + fij.reserve(nij); + double pmax = boost::numeric::bounds::lowest(); + for (std::size_t k = 0u; k < nij; ++k) { + std::size_t index = lookup[boost::cref(points[k])]; + if (I.count(index) == 0) { + TEigenVectorNx1 x = toDenseVector(points[k] - mij); + pij.push_back(-0.5 * x.transpose() * Cij.solve(x)); + fij.push_back(index); + pmax = std::max(pmax, pij.back()); + } } - // Cluster the i'th projection. - clusterer.setPoints(P); - clusterer.run(); - const TClusterVec &clusters = clusterer.clusters(); - double ni = static_cast(clusters.size()); - LOG_TRACE("# clusters = " << ni); - - for (std::size_t j = 0u; j < clusters.size(); ++j) - { - const TVectorNx1Vec &points = clusters[j].points(); - LOG_TRACE("# points = " << points.size()); - - // Compute the number of points to sample from this cluster. - std::size_t nij = points.size(); - double wij = static_cast(nij) / static_cast(n); - std::size_t nsij = static_cast(std::max(m_Compression * wij * ni, 1.0)); - LOG_TRACE("wij = " << wij << ", nsij = " << nsij); - - // Compute the cluster sample mean and covariance matrix. - TSampleCovariancesNxN covariances; - covariances.add(points); - TVectorNx1 mij = CBasicStatistics::mean(covariances); - TSvdNxN Cij(toDenseMatrix(CBasicStatistics::covariances(covariances)), - Eigen::ComputeFullU | Eigen::ComputeFullV); - - // Compute the probability that a sample from the cluster - // is a given point in the cluster. - pij.clear(); - fij.clear(); - pij.reserve(nij); - fij.reserve(nij); - double pmax = boost::numeric::bounds::lowest(); - for (std::size_t k = 0u; k < nij; ++k) - { - std::size_t index = lookup[boost::cref(points[k])]; - if (I.count(index) == 0) - { - TEigenVectorNx1 x = toDenseVector(points[k] - mij); - pij.push_back(-0.5 * x.transpose() * Cij.solve(x)); - fij.push_back(index); - pmax = std::max(pmax, pij.back()); - } + if (pij.size() > 0) { + double Zij = 0.0; + for (std::size_t k = 0u; k < pij.size(); ++k) { + pij[k] = std::exp(pij[k] - pmax); + Zij += pij[k]; } + for (std::size_t k = 0u; k < pij.size(); ++k) { + pij[k] /= Zij; + } + LOG_TRACE("pij = " << core::CContainerPrinter::print(pij)); + + // Sample the cluster. + CSampling::categoricalSampleWithoutReplacement(this->rng(), pij, nsij, sij); + LOG_TRACE("sij = " << core::CContainerPrinter::print(sij)); - if (pij.size() > 0) - { - double Zij = 0.0; - for (std::size_t k = 0u; k < pij.size(); ++k) - { - pij[k] = std::exp(pij[k] - pmax); - Zij += pij[k]; - } - for (std::size_t k = 0u; k < pij.size(); ++k) - { - pij[k] /= Zij; - } - LOG_TRACE("pij = " << core::CContainerPrinter::print(pij)); - - // Sample the cluster. - CSampling::categoricalSampleWithoutReplacement(this->rng(), pij, nsij, sij); - LOG_TRACE("sij = " << core::CContainerPrinter::print(sij)); - - // Save the relevant data for the i'th clustering. - for (std::size_t k = 0u; k < nsij; ++k) - { - I.insert(fij[sij[k]]); - } + // Save the relevant data for the i'th clustering. + for (std::size_t k = 0u; k < nsij; ++k) { + I.insert(fij[sij[k]]); } - W[i].push_back(wij); - M[i].push_back(mij); - C[i].push_back(Cij); } + W[i].push_back(wij); + M[i].push_back(mij); + C[i].push_back(Cij); } } - - //! Construct the neighbourhoods of each of the sampled points. - //! - //! \param[in] I The indices of distinct sampled points. - //! \param[out] H Filled in with the neighbourhoods of each - //! point in \p I, i.e. the indices of the closest points. - void neighbourhoods(const TSizeUSet &I, TSizeVecVec &H) const - { - using TVectorSizeUMap = boost::unordered_map; - - LOG_TRACE("I = " << core::CContainerPrinter::print(I)); - std::size_t b = m_ProjectedData.size(); - std::size_t n = m_ProjectedData[0].size(); - - // Create a k-d tree of the sampled data points. - TVectorVec S; - S.reserve(I.size()); - TVector concat(b * N); - for (auto i : I) - { - for (std::size_t j = 0u; j < b; ++j) - { - for (std::size_t k = 0u; k < N; ++k) - { - concat(N * j + k) = m_ProjectedData[j][i](k); - } + } + + //! Construct the neighbourhoods of each of the sampled points. + //! + //! \param[in] I The indices of distinct sampled points. + //! \param[out] H Filled in with the neighbourhoods of each + //! point in \p I, i.e. the indices of the closest points. + void neighbourhoods(const TSizeUSet& I, TSizeVecVec& H) const { + using TVectorSizeUMap = boost::unordered_map; + + LOG_TRACE("I = " << core::CContainerPrinter::print(I)); + std::size_t b = m_ProjectedData.size(); + std::size_t n = m_ProjectedData[0].size(); + + // Create a k-d tree of the sampled data points. + TVectorVec S; + S.reserve(I.size()); + TVector concat(b * N); + for (auto i : I) { + for (std::size_t j = 0u; j < b; ++j) { + for (std::size_t k = 0u; k < N; ++k) { + concat(N * j + k) = m_ProjectedData[j][i](k); } - LOG_TRACE("concat = " << concat); - S.push_back(concat); - } - TVectorSizeUMap lookup(S.size()); - for (std::size_t i = 0u; i < S.size(); ++i) - { - lookup[S[i]] = i; } - CKdTree samples; - samples.build(S); - - // Compute the neighbourhoods. - for (std::size_t i = 0u; i < n; ++i) - { - for (std::size_t j = 0u; j < b; ++j) - { - for (std::size_t k = 0u; k < N; ++k) - { - concat(N * j + k) = m_ProjectedData[j][i](k); - } - } - const TVector *nn = samples.nearestNeighbour(concat); - if (!nn) - { - LOG_ERROR("No nearest neighbour of " << concat); - continue; + LOG_TRACE("concat = " << concat); + S.push_back(concat); + } + TVectorSizeUMap lookup(S.size()); + for (std::size_t i = 0u; i < S.size(); ++i) { + lookup[S[i]] = i; + } + CKdTree samples; + samples.build(S); + + // Compute the neighbourhoods. + for (std::size_t i = 0u; i < n; ++i) { + for (std::size_t j = 0u; j < b; ++j) { + for (std::size_t k = 0u; k < N; ++k) { + concat(N * j + k) = m_ProjectedData[j][i](k); } - LOG_TRACE("nn = " << *nn); - H[lookup[*nn]].push_back(i); } - LOG_TRACE("H = " << core::CContainerPrinter::print(H)); + const TVector* nn = samples.nearestNeighbour(concat); + if (!nn) { + LOG_ERROR("No nearest neighbour of " << concat); + continue; + } + LOG_TRACE("nn = " << *nn); + H[lookup[*nn]].push_back(i); } - - //! Compute the similarities between neighbourhoods. - //! - //! \param[in] W The cluster weights. - //! \param[in] M The cluster sample means. - //! \param[in] C The SVD of cluster sample covariance matrices. - //! \param[in] H The neighbourhoods of each point in \p I, - //! i.e. the indices of the closest points. - //! \param[out] S Filled in with the mean similarities between - //! neighbourhoods over the different clusterings. - void similarities(const TDoubleVecVec &W, - const TVectorNx1VecVec &M, - const TSvdNxNVecVec &C, - const TSizeVecVec &H, - TDoubleVecVec &S) const - { - std::size_t b = m_ProjectedData.size(); - std::size_t h = H.size(); - - TMeanAccumulatorVecVec S_(h); - - TVectorVec Pi(h); - for (std::size_t i = 0u; i < b; ++i) - { - const TVectorNx1Vec &X = m_ProjectedData[i]; - const TDoubleVec &Wi = W[i]; - const TVectorNx1Vec &Mi = M[i]; - const TSvdNxNVec &Ci = C[i]; - LOG_TRACE("W(i) = " << core::CContainerPrinter::print(Wi)); - LOG_TRACE("M(i) = " << core::CContainerPrinter::print(Mi)); - - std::size_t nci = Mi.size(); - std::fill_n(Pi.begin(), h, TVector(nci)); - - // Compute the probability each neighbourhood is from - // a given cluster. - for (std::size_t c = 0u; c < nci; ++c) - { - double wic = std::log(Wi[c]) - 0.5 * this->logDeterminant(Ci[c]); - LOG_TRACE(" w(" << i << "," << c << ") = " << wic); - for (std::size_t j = 0u; j < h; ++j) - { - std::size_t hj = H[j].size(); - Pi[j](c) = static_cast(hj) * wic; - for (std::size_t k = 0u; k < hj; ++k) - { - TEigenVectorNx1 x = toDenseVector(X[H[j][k]] - Mi[c]); - Pi[j](c) -= 0.5 * x.transpose() * Ci[c].solve(x); - } - LOG_TRACE(" P(" << j << "," << c << ") = " << Pi[j](c)); + LOG_TRACE("H = " << core::CContainerPrinter::print(H)); + } + + //! Compute the similarities between neighbourhoods. + //! + //! \param[in] W The cluster weights. + //! \param[in] M The cluster sample means. + //! \param[in] C The SVD of cluster sample covariance matrices. + //! \param[in] H The neighbourhoods of each point in \p I, + //! i.e. the indices of the closest points. + //! \param[out] S Filled in with the mean similarities between + //! neighbourhoods over the different clusterings. + void + similarities(const TDoubleVecVec& W, const TVectorNx1VecVec& M, const TSvdNxNVecVec& C, const TSizeVecVec& H, TDoubleVecVec& S) const { + std::size_t b = m_ProjectedData.size(); + std::size_t h = H.size(); + + TMeanAccumulatorVecVec S_(h); + + TVectorVec Pi(h); + for (std::size_t i = 0u; i < b; ++i) { + const TVectorNx1Vec& X = m_ProjectedData[i]; + const TDoubleVec& Wi = W[i]; + const TVectorNx1Vec& Mi = M[i]; + const TSvdNxNVec& Ci = C[i]; + LOG_TRACE("W(i) = " << core::CContainerPrinter::print(Wi)); + LOG_TRACE("M(i) = " << core::CContainerPrinter::print(Mi)); + + std::size_t nci = Mi.size(); + std::fill_n(Pi.begin(), h, TVector(nci)); + + // Compute the probability each neighbourhood is from + // a given cluster. + for (std::size_t c = 0u; c < nci; ++c) { + double wic = std::log(Wi[c]) - 0.5 * this->logDeterminant(Ci[c]); + LOG_TRACE(" w(" << i << "," << c << ") = " << wic); + for (std::size_t j = 0u; j < h; ++j) { + std::size_t hj = H[j].size(); + Pi[j](c) = static_cast(hj) * wic; + for (std::size_t k = 0u; k < hj; ++k) { + TEigenVectorNx1 x = toDenseVector(X[H[j][k]] - Mi[c]); + Pi[j](c) -= 0.5 * x.transpose() * Ci[c].solve(x); } + LOG_TRACE(" P(" << j << "," << c << ") = " << Pi[j](c)); } - for (std::size_t j = 0u; j < h; ++j) - { - double Pmax = *std::max_element(Pi[j].begin(), Pi[j].end()); - double Z = 0.0; - for (std::size_t c = 0u; c < nci; ++c) - { - Pi[j](c) = std::exp(Pi[j](c) - Pmax); - Z += Pi[j](c); - } - for (std::size_t c = 0u; c < nci; ++c) - { - Pi[j](c) /= Z; - } - LOG_TRACE(" P(" << j << ") = " << Pi[j]); + } + for (std::size_t j = 0u; j < h; ++j) { + double Pmax = *std::max_element(Pi[j].begin(), Pi[j].end()); + double Z = 0.0; + for (std::size_t c = 0u; c < nci; ++c) { + Pi[j](c) = std::exp(Pi[j](c) - Pmax); + Z += Pi[j](c); } - - // Compute the similarities. - for (std::size_t j = 0u; j < h; ++j) - { - S_[j].resize(j + 1); - for (std::size_t k = 0u; k <= j; ++k) - { - S_[j][k].add(-std::log(std::max(Pi[j].inner(Pi[k]), - boost::numeric::bounds::smallest()))); - } + for (std::size_t c = 0u; c < nci; ++c) { + Pi[j](c) /= Z; } + LOG_TRACE(" P(" << j << ") = " << Pi[j]); } - for (std::size_t i = 0u; i < S_.size(); ++i) - { - S[i].reserve(S_[i].size()); - for (std::size_t j = 0u; j < S_[i].size(); ++j) - { - S[i].push_back(CBasicStatistics::mean(S_[i][j])); + + // Compute the similarities. + for (std::size_t j = 0u; j < h; ++j) { + S_[j].resize(j + 1); + for (std::size_t k = 0u; k <= j; ++k) { + S_[j][k].add(-std::log(std::max(Pi[j].inner(Pi[k]), boost::numeric::bounds::smallest()))); } } } - - //! Extract the clustering of the neighbourhoods based on - //! their similarities. - //! - //! \param[in] S The similarities between neighbourhoods. - //! \param[in] H The neighbourhoods. - //! \param[out] result Filled in with the clustering of the - //! underlying points. - void clusterNeighbourhoods(TDoubleVecVec &S, - const TSizeVecVec &H, - TSizeVecVec &result) const - { - using TNode = CAgglomerativeClusterer::CNode; - using TDoubleTuple = CNaturalBreaksClassifier::TDoubleTuple; - using TDoubleTupleVec = CNaturalBreaksClassifier::TDoubleTupleVec; - - result.clear(); - - CAgglomerativeClusterer agglomerative; - agglomerative.initialize(S); - CAgglomerativeClusterer::TNodeVec tree; - agglomerative.run(CAgglomerativeClusterer::E_Average, tree); - - TDoubleTupleVec heights; - heights.reserve(tree.size()); - for (std::size_t i = 0u; i < tree.size(); ++i) - { - heights.push_back(TDoubleTuple()); - heights.back().add(tree[i].height()); + for (std::size_t i = 0u; i < S_.size(); ++i) { + S[i].reserve(S_[i].size()); + for (std::size_t j = 0u; j < S_[i].size(); ++j) { + S[i].push_back(CBasicStatistics::mean(S_[i][j])); } - LOG_TRACE("heights = " << core::CContainerPrinter::print(heights)); - - TSizeVec splits; - if (CNaturalBreaksClassifier::naturalBreaks(heights, - 2, // Number splits - 0, // Minimum cluster size - CNaturalBreaksClassifier::E_TargetDeviation, - splits)) - { - double height = CBasicStatistics::mean(heights[splits[0] - 1]); - LOG_TRACE("split = " << core::CContainerPrinter::print(splits) - << ", height = " << height); - const TNode &root = tree.back(); - root.clusteringAt(height, result); - for (std::size_t i = 0u; i < result.size(); ++i) - { - TSizeVec &ri = result[i]; - std::size_t n = ri.size(); - for (std::size_t j = 0u; j < n; ++j) - { - ri.insert(ri.end(), H[ri[j]].begin(), H[ri[j]].end()); - } - ri.erase(ri.begin(), ri.begin() + n); + } + } + + //! Extract the clustering of the neighbourhoods based on + //! their similarities. + //! + //! \param[in] S The similarities between neighbourhoods. + //! \param[in] H The neighbourhoods. + //! \param[out] result Filled in with the clustering of the + //! underlying points. + void clusterNeighbourhoods(TDoubleVecVec& S, const TSizeVecVec& H, TSizeVecVec& result) const { + using TNode = CAgglomerativeClusterer::CNode; + using TDoubleTuple = CNaturalBreaksClassifier::TDoubleTuple; + using TDoubleTupleVec = CNaturalBreaksClassifier::TDoubleTupleVec; + + result.clear(); + + CAgglomerativeClusterer agglomerative; + agglomerative.initialize(S); + CAgglomerativeClusterer::TNodeVec tree; + agglomerative.run(CAgglomerativeClusterer::E_Average, tree); + + TDoubleTupleVec heights; + heights.reserve(tree.size()); + for (std::size_t i = 0u; i < tree.size(); ++i) { + heights.push_back(TDoubleTuple()); + heights.back().add(tree[i].height()); + } + LOG_TRACE("heights = " << core::CContainerPrinter::print(heights)); + + TSizeVec splits; + if (CNaturalBreaksClassifier::naturalBreaks(heights, + 2, // Number splits + 0, // Minimum cluster size + CNaturalBreaksClassifier::E_TargetDeviation, + splits)) { + double height = CBasicStatistics::mean(heights[splits[0] - 1]); + LOG_TRACE("split = " << core::CContainerPrinter::print(splits) << ", height = " << height); + const TNode& root = tree.back(); + root.clusteringAt(height, result); + for (std::size_t i = 0u; i < result.size(); ++i) { + TSizeVec& ri = result[i]; + std::size_t n = ri.size(); + for (std::size_t j = 0u; j < n; ++j) { + ri.insert(ri.end(), H[ri[j]].begin(), H[ri[j]].end()); } + ri.erase(ri.begin(), ri.begin() + n); } - else - { - LOG_ERROR("Failed to cluster " << core::CContainerPrinter::print(heights)); - } + } else { + LOG_ERROR("Failed to cluster " << core::CContainerPrinter::print(heights)); } + } - //! Get the projected data points. - const TVectorNx1VecVec &projectedData() const - { - return m_ProjectedData; - } + //! Get the projected data points. + const TVectorNx1VecVec& projectedData() const { return m_ProjectedData; } - //! Get the log determinant of the rank full portion of \p m. - double logDeterminant(const TSvdNxN &svd) const - { - double result = 0.0; - for (std::size_t i = 0u, rank = static_cast(svd.rank()); i < rank; ++i) - { - result += std::log(svd.singularValues()[i]); - } - return result; + //! Get the log determinant of the rank full portion of \p m. + double logDeterminant(const TSvdNxN& svd) const { + double result = 0.0; + for (std::size_t i = 0u, rank = static_cast(svd.rank()); i < rank; ++i) { + result += std::log(svd.singularValues()[i]); } + return result; + } - private: - //! Controls the amount of compression in sampling points - //! for computing the hierarchical clustering. Larger numbers - //! equate to more sampled points so less compression. - double m_Compression; +private: + //! Controls the amount of compression in sampling points + //! for computing the hierarchical clustering. Larger numbers + //! equate to more sampled points so less compression. + double m_Compression; - //! The projected data points. - TVectorNx1VecVec m_ProjectedData; + //! The projected data points. + TVectorNx1VecVec m_ProjectedData; }; //! \brief Adapts clustering implementations for use by the random @@ -722,139 +617,107 @@ class CRandomProjectionClustererFacade {}; //! \brief Adapts x-means for use by the random projection clusterer. template -class CRandomProjectionClustererFacade, COST>> -{ - public: - using TClusterer = CXMeans, COST>; - using TClusterVec = typename TClusterer::TClusterVec; - using TVectorNx1 = CVectorNx1; - using TVectorNx1Vec = std::vector; - - public: - CRandomProjectionClustererFacade(const TClusterer &xmeans, - std::size_t improveParamsKmeansIterations, - std::size_t improveStructureClusterSeeds, - std::size_t improveStructureKmeansIterations) : - m_Xmeans(xmeans), - m_ImproveParamsKmeansIterations(improveParamsKmeansIterations), - m_ImproveStructureClusterSeeds(improveStructureClusterSeeds), - m_ImproveStructureKmeansIterations(improveStructureKmeansIterations) - {} - - //! Set the points to cluster. - void setPoints(TVectorNx1Vec &points) - { - m_Xmeans.setPoints(points); - } - - //! Cluster the points. - void run() - { - m_Xmeans.run(m_ImproveParamsKmeansIterations, - m_ImproveStructureClusterSeeds, - m_ImproveStructureKmeansIterations); - } - - //! Get the clusters (should only be called after run). - const TClusterVec &clusters() const - { - return m_Xmeans.clusters(); - } - - private: - //! The x-means implementation. - TClusterer m_Xmeans; - //! The number of iterations to use in k-means for a single - //! round of improve parameters. - std::size_t m_ImproveParamsKmeansIterations; - //! The number of random seeds to try when initializing k-means - //! for a single round of improve structure. - std::size_t m_ImproveStructureClusterSeeds; - //! The number of iterations to use in k-means for a single - //! round of improve structure. - std::size_t m_ImproveStructureKmeansIterations; +class CRandomProjectionClustererFacade, COST>> { +public: + using TClusterer = CXMeans, COST>; + using TClusterVec = typename TClusterer::TClusterVec; + using TVectorNx1 = CVectorNx1; + using TVectorNx1Vec = std::vector; + +public: + CRandomProjectionClustererFacade(const TClusterer& xmeans, + std::size_t improveParamsKmeansIterations, + std::size_t improveStructureClusterSeeds, + std::size_t improveStructureKmeansIterations) + : m_Xmeans(xmeans), + m_ImproveParamsKmeansIterations(improveParamsKmeansIterations), + m_ImproveStructureClusterSeeds(improveStructureClusterSeeds), + m_ImproveStructureKmeansIterations(improveStructureKmeansIterations) {} + + //! Set the points to cluster. + void setPoints(TVectorNx1Vec& points) { m_Xmeans.setPoints(points); } + + //! Cluster the points. + void run() { m_Xmeans.run(m_ImproveParamsKmeansIterations, m_ImproveStructureClusterSeeds, m_ImproveStructureKmeansIterations); } + + //! Get the clusters (should only be called after run). + const TClusterVec& clusters() const { return m_Xmeans.clusters(); } + +private: + //! The x-means implementation. + TClusterer m_Xmeans; + //! The number of iterations to use in k-means for a single + //! round of improve parameters. + std::size_t m_ImproveParamsKmeansIterations; + //! The number of random seeds to try when initializing k-means + //! for a single round of improve structure. + std::size_t m_ImproveStructureClusterSeeds; + //! The number of iterations to use in k-means for a single + //! round of improve structure. + std::size_t m_ImproveStructureKmeansIterations; }; //! Makes an x-means adapter for random projection clustering. template CRandomProjectionClustererFacade, COST>> - forRandomProjectionClusterer(const CXMeans, COST> &xmeans, - std::size_t improveParamsKmeansIterations, - std::size_t improveStructureClusterSeeds, - std::size_t improveStructureKmeansIterations) -{ +forRandomProjectionClusterer(const CXMeans, COST>& xmeans, + std::size_t improveParamsKmeansIterations, + std::size_t improveStructureClusterSeeds, + std::size_t improveStructureKmeansIterations) { return CRandomProjectionClustererFacade, COST>>( - xmeans, - improveParamsKmeansIterations, - improveStructureClusterSeeds, - improveStructureKmeansIterations); + xmeans, improveParamsKmeansIterations, improveStructureClusterSeeds, improveStructureKmeansIterations); } //! \brief Adapts k-means for use by the random projection clusterer. template -class CRandomProjectionClustererFacade>> -{ - public: - using TClusterer = CKMeansFast>; - using TClusterVec = typename TClusterer::TClusterVec; - using TVectorNx1 = CVectorNx1; - using TVectorNx1Vec = std::vector; - - public: - CRandomProjectionClustererFacade(const TClusterer &kmeans, - std::size_t k, - std::size_t maxIterations) : - m_Kmeans(kmeans), - m_K(k), - m_MaxIterations(maxIterations) - {} - - //! Set the points to cluster. - void setPoints(TVectorNx1Vec &points) - { - m_Kmeans.setPoints(points); - TVectorNx1Vec centres; - CKMeansPlusPlusInitialization seedCentres(m_Rng); - seedCentres.run(points, m_K, centres); - m_Kmeans.setCentres(centres); - } - - //! Cluster the points. - void run() - { - m_Kmeans.run(m_MaxIterations); - } - - //! Get the clusters (should only be called after run). - const TClusterVec &clusters() const - { - m_Kmeans.clusters(m_Clusters); - return m_Clusters; - } - - private: - //! The random number generator. - CPRNG::CXorShift1024Mult m_Rng; - //! The k-means implementation. - TClusterer m_Kmeans; - //! The number of clusters to use. - std::size_t m_K; - //! The number of iterations to use in k-means. - std::size_t m_MaxIterations; - //! The clusters. - mutable TClusterVec m_Clusters; +class CRandomProjectionClustererFacade>> { +public: + using TClusterer = CKMeansFast>; + using TClusterVec = typename TClusterer::TClusterVec; + using TVectorNx1 = CVectorNx1; + using TVectorNx1Vec = std::vector; + +public: + CRandomProjectionClustererFacade(const TClusterer& kmeans, std::size_t k, std::size_t maxIterations) + : m_Kmeans(kmeans), m_K(k), m_MaxIterations(maxIterations) {} + + //! Set the points to cluster. + void setPoints(TVectorNx1Vec& points) { + m_Kmeans.setPoints(points); + TVectorNx1Vec centres; + CKMeansPlusPlusInitialization seedCentres(m_Rng); + seedCentres.run(points, m_K, centres); + m_Kmeans.setCentres(centres); + } + + //! Cluster the points. + void run() { m_Kmeans.run(m_MaxIterations); } + + //! Get the clusters (should only be called after run). + const TClusterVec& clusters() const { + m_Kmeans.clusters(m_Clusters); + return m_Clusters; + } + +private: + //! The random number generator. + CPRNG::CXorShift1024Mult m_Rng; + //! The k-means implementation. + TClusterer m_Kmeans; + //! The number of clusters to use. + std::size_t m_K; + //! The number of iterations to use in k-means. + std::size_t m_MaxIterations; + //! The clusters. + mutable TClusterVec m_Clusters; }; //! Makes a k-means adapter for random projection clustering. template CRandomProjectionClustererFacade>> - forRandomProjectionClusterer(const CKMeansFast> &kmeans, - std::size_t k, - std::size_t maxIterations) -{ +forRandomProjectionClusterer(const CKMeansFast>& kmeans, std::size_t k, std::size_t maxIterations) { return CRandomProjectionClustererFacade>>(kmeans, k, maxIterations); } - } } diff --git a/include/maths/CRegression.h b/include/maths/CRegression.h index be5ae9bc11..cda79f8ff2 100644 --- a/include/maths/CRegression.h +++ b/include/maths/CRegression.h @@ -18,39 +18,33 @@ #include #include -#include #include +#include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace maths -{ +namespace maths { -namespace regression_detail -{ +namespace regression_detail { //! Used for getting the default maximum condition number to use //! when computing parameters. template -struct CMaxCondition -{ +struct CMaxCondition { static const double VALUE; }; -template const double CMaxCondition::VALUE = 1e15; +template +const double CMaxCondition::VALUE = 1e15; //! Used for getting the default maximum condition number to use //! when computing parameters. template<> -struct MATHS_EXPORT CMaxCondition -{ +struct MATHS_EXPORT CMaxCondition { static const double VALUE; }; - } //! \brief A collection of various types of regression. @@ -61,509 +55,433 @@ struct MATHS_EXPORT CMaxCondition //! and definition. As such all member functions should be static and it //! should be state-less. If your functionality doesn't fit this pattern //! just make it a nested class. -class MATHS_EXPORT CRegression -{ +class MATHS_EXPORT CRegression { +public: + //! DESCRIPTION:\n + //! A very lightweight online weighted least squares regression to + //! fit degree N polynomials to a collection of points \f$\{(x_i, y_i)\}\f$, + //! i.e. to find the \f$y = c_0 + c_1 x + ... + c_N x^N\f$ s.t. the + //! weighted sum of the square residuals is minimized. Formally, we + //! are looking for \f$\theta^*\f$ defined as + //!
+    //!   \f$\theta^* = arg\min_{\theta}{(y - X\theta)^tDiag(w)(y - X\theta)}\f$
+    //! 
+ //! Here, \f$X\f$ denotes the design matrix and for a polynomial + //! takes the form \f$[X]_{ij} = x_i^{j-1}\f$. This is solved using + //! the Moore-Penrose pseudo-inverse. + //! + //! We are able to maintain \f$2N-1\f$ sufficient statistics to + //! construct \f$X^tDiag(w)X\f$ and also the \f$N\f$ components of + //! the vector \f$X^tDiag(w)y\f$ online. + //! + //! IMPLEMENTATION DECISIONS:\n + //! This uses float storage and requires \f$3(N+1)\f$ floats where + //! \f$N\f$ is the polynomial order. In total this therefore uses + //! \f$12(N+1)\f$ bytes. + //! + //! Note that this constructs the Gramian \f$X^tDiag(w)X\f$ of the + //! design matrix when computing the least squares solution. This + //! is because holding sufficient statistics for constructing this + //! matrix is the most space efficient representation to compute + //! online. However, the condition of this matrix is the square of + //! the condition of the design matrix and so this approach doesn't + //! have good numerics. + //! + //! A much more robust scheme is to use incremental QR factorization + //! and for large problems that approach should be used in preference. + //! However, much can be done by using an affine transformation of + //! \f$x_i\f$ to improve the numerics of this approach and the intention + //! is that it is used for the case where \f$N\f$ is small and space + //! is at a premium. + //! + //! \tparam N_ The degree of the polynomial. + template + class CLeastSquaresOnline : boost::addable> { public: - //! DESCRIPTION:\n - //! A very lightweight online weighted least squares regression to - //! fit degree N polynomials to a collection of points \f$\{(x_i, y_i)\}\f$, - //! i.e. to find the \f$y = c_0 + c_1 x + ... + c_N x^N\f$ s.t. the - //! weighted sum of the square residuals is minimized. Formally, we - //! are looking for \f$\theta^*\f$ defined as - //!
-        //!   \f$\theta^* = arg\min_{\theta}{(y - X\theta)^tDiag(w)(y - X\theta)}\f$
-        //! 
- //! Here, \f$X\f$ denotes the design matrix and for a polynomial - //! takes the form \f$[X]_{ij} = x_i^{j-1}\f$. This is solved using - //! the Moore-Penrose pseudo-inverse. + static const std::size_t N = N_ + 1; + using TArray = boost::array; + using TVector = CVectorNx1; + using TMatrix = CSymmetricMatrixNxN; + using TVectorMeanAccumulator = typename CBasicStatistics::SSampleMean::TAccumulator; + + public: + static const std::string STATISTIC_TAG; + + public: + CLeastSquaresOnline() : m_S() {} + template + CLeastSquaresOnline(const CLeastSquaresOnline& other) : m_S(other.statistic()) {} + + //! Restore by traversing a state document. + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); + + //! Persist by passing information to \p inserter. + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Add in the point \f$(x, y(x))\f$ with weight \p weight. //! - //! We are able to maintain \f$2N-1\f$ sufficient statistics to - //! construct \f$X^tDiag(w)X\f$ and also the \f$N\f$ components of - //! the vector \f$X^tDiag(w)y\f$ online. + //! \param[in] x The abscissa of the point. + //! \param[in] y The ordinate of the point. + //! \param[in] weight The residual weight at the point. + void add(double x, double y, double weight = 1.0) { + TVector d; + double xi = 1.0; + for (std::size_t i = 0u; i < N; ++i, xi *= x) { + d(i) = xi; + d(i + 2 * N - 1) = xi * y; + } + for (std::size_t i = N; i < 2 * N - 1; ++i, xi *= x) { + d(i) = xi; + } + m_S.add(d, weight); + } + + //! Set the statistics from \p rhs. + template + const CLeastSquaresOnline operator=(const CLeastSquaresOnline& rhs) { + m_S = rhs.statistic(); + return *this; + } + + //! Differences two regressions. //! - //! IMPLEMENTATION DECISIONS:\n - //! This uses float storage and requires \f$3(N+1)\f$ floats where - //! \f$N\f$ is the polynomial order. In total this therefore uses - //! \f$12(N+1)\f$ bytes. + //! This creates a regression which is fit on just the points + //! add to this and not \p rhs. //! - //! Note that this constructs the Gramian \f$X^tDiag(w)X\f$ of the - //! design matrix when computing the least squares solution. This - //! is because holding sufficient statistics for constructing this - //! matrix is the most space efficient representation to compute - //! online. However, the condition of this matrix is the square of - //! the condition of the design matrix and so this approach doesn't - //! have good numerics. + //! \param[in] rhs The regression fit to combine. + //! \note This is only meaningful if they have the same time + //! origin and the values added to \p rhs are a subset of the + //! values add to this. + template + const CLeastSquaresOnline& operator-=(const CLeastSquaresOnline& rhs) { + m_S -= rhs.statistic(); + return *this; + } + + //! Combines two regressions. //! - //! A much more robust scheme is to use incremental QR factorization - //! and for large problems that approach should be used in preference. - //! However, much can be done by using an affine transformation of - //! \f$x_i\f$ to improve the numerics of this approach and the intention - //! is that it is used for the case where \f$N\f$ is small and space - //! is at a premium. + //! This creates the regression fit on the points fit with + //! \p rhs and the points fit with this regression. //! - //! \tparam N_ The degree of the polynomial. - template - class CLeastSquaresOnline : boost::addable< CLeastSquaresOnline > - { - public: - static const std::size_t N = N_+1; - using TArray = boost::array; - using TVector = CVectorNx1; - using TMatrix = CSymmetricMatrixNxN; - using TVectorMeanAccumulator = typename CBasicStatistics::SSampleMean::TAccumulator; - - public: - static const std::string STATISTIC_TAG; - - public: - CLeastSquaresOnline() : m_S() {} - template - CLeastSquaresOnline(const CLeastSquaresOnline &other) : - m_S(other.statistic()) - {} - - //! Restore by traversing a state document. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); - - //! Persist by passing information to \p inserter. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - - //! Add in the point \f$(x, y(x))\f$ with weight \p weight. - //! - //! \param[in] x The abscissa of the point. - //! \param[in] y The ordinate of the point. - //! \param[in] weight The residual weight at the point. - void add(double x, double y, double weight = 1.0) - { - TVector d; - double xi = 1.0; - for (std::size_t i = 0u; i < N; ++i, xi *= x) - { - d(i) = xi; - d(i+2*N-1) = xi * y; - } - for (std::size_t i = N; i < 2*N-1; ++i, xi *= x) - { - d(i) = xi; - } - m_S.add(d, weight); - } + //! \param[in] rhs The regression fit to combine. + //! \note This is only meaningful if they have the same time + //! origin. + template + const CLeastSquaresOnline& operator+=(const CLeastSquaresOnline& rhs) { + m_S += rhs.statistic(); + return *this; + } - //! Set the statistics from \p rhs. - template - const CLeastSquaresOnline operator=(const CLeastSquaresOnline &rhs) - { - m_S = rhs.statistic(); - return *this; - } + //! In order to get reasonable accuracy, one typically needs to + //! use an affine transform of the abscissa. + //! + //! In particular, one will typically use \f$x \mapsto x - b\f$ + //! rather than \f$x\f$ directly, since \f$b\f$ can be adjusted + //! to improve the condition of the Gramian. + //! + //! If this is running online, then as x increases one wants to + //! allow the shift \f$b\f$ to increase. This function computes + //! the impact of a change in \f$b\f$ on the stored statistics. + //! + //! \param[in] dx The shift that will subsequently be applied to + //! the abscissa. + void shiftAbscissa(double dx); - //! Differences two regressions. - //! - //! This creates a regression which is fit on just the points - //! add to this and not \p rhs. - //! - //! \param[in] rhs The regression fit to combine. - //! \note This is only meaningful if they have the same time - //! origin and the values added to \p rhs are a subset of the - //! values add to this. - template - const CLeastSquaresOnline &operator-=(const CLeastSquaresOnline &rhs) - { - m_S -= rhs.statistic(); - return *this; + //! Translate the ordinates by \p dy. + //! + //! \param[in] dy The shift that will subsequently be applied to + //! the ordinates. + void shiftOrdinate(double dy) { + if (CBasicStatistics::count(m_S) > 0.0) { + const TVector& s = CBasicStatistics::mean(m_S); + for (std::size_t i = 0u; i < N; ++i) { + CBasicStatistics::moment<0>(m_S)(i + 2 * N - 1) += s(i) * dy; } + } + } - //! Combines two regressions. - //! - //! This creates the regression fit on the points fit with - //! \p rhs and the points fit with this regression. - //! - //! \param[in] rhs The regression fit to combine. - //! \note This is only meaningful if they have the same time - //! origin. - template - const CLeastSquaresOnline &operator+=(const CLeastSquaresOnline &rhs) - { - m_S += rhs.statistic(); - return *this; + //! Shift the gradient by \p dydx. + //! + //! \param[in] dydx The shift that will subsequently be applied to + //! the derivative of the regression w.r.t. the abscissa. + void shiftGradient(double dydx) { + if (CBasicStatistics::count(m_S) > 0.0) { + const TVector& s = CBasicStatistics::mean(m_S); + for (std::size_t i = 0u; i < N; ++i) { + CBasicStatistics::moment<0>(m_S)(i + 2 * N - 1) += s(i + 1) * dydx; } + } + } - //! In order to get reasonable accuracy, one typically needs to - //! use an affine transform of the abscissa. - //! - //! In particular, one will typically use \f$x \mapsto x - b\f$ - //! rather than \f$x\f$ directly, since \f$b\f$ can be adjusted - //! to improve the condition of the Gramian. - //! - //! If this is running online, then as x increases one wants to - //! allow the shift \f$b\f$ to increase. This function computes - //! the impact of a change in \f$b\f$ on the stored statistics. - //! - //! \param[in] dx The shift that will subsequently be applied to - //! the abscissa. - void shiftAbscissa(double dx); - - //! Translate the ordinates by \p dy. - //! - //! \param[in] dy The shift that will subsequently be applied to - //! the ordinates. - void shiftOrdinate(double dy) - { - if (CBasicStatistics::count(m_S) > 0.0) - { - const TVector &s = CBasicStatistics::mean(m_S); - for (std::size_t i = 0u; i < N; ++i) - { - CBasicStatistics::moment<0>(m_S)(i+2*N-1) += s(i) * dy; - } - } + //! Linearly scale the regression model. + //! + //! i.e. apply a transform such that each regression parameter maps + //! to \p scale times its current value. + //! + //! \param[in] scale The scale to apply to the regression parameters. + void linearScale(double scale) { + if (CBasicStatistics::count(m_S) > 0.0) { + for (std::size_t i = 0u; i < N; ++i) { + CBasicStatistics::moment<0>(m_S)(i + 2 * N - 1) *= scale; } + } + } - //! Shift the gradient by \p dydx. - //! - //! \param[in] dydx The shift that will subsequently be applied to - //! the derivative of the regression w.r.t. the abscissa. - void shiftGradient(double dydx) - { - if (CBasicStatistics::count(m_S) > 0.0) - { - const TVector &s = CBasicStatistics::mean(m_S); - for (std::size_t i = 0u; i < N; ++i) - { - CBasicStatistics::moment<0>(m_S)(i+2*N-1) += s(i+1) * dydx; - } - } - } + //! Multiply the statistics' count by \p scale. + CLeastSquaresOnline scaled(double scale) const { + CLeastSquaresOnline result(*this); + return result.scale(scale); + } - //! Linearly scale the regression model. - //! - //! i.e. apply a transform such that each regression parameter maps - //! to \p scale times its current value. - //! - //! \param[in] scale The scale to apply to the regression parameters. - void linearScale(double scale) - { - if (CBasicStatistics::count(m_S) > 0.0) - { - for (std::size_t i = 0u; i < N; ++i) - { - CBasicStatistics::moment<0>(m_S)(i+2*N-1) *= scale; - } - } - } + //! Scale the statistics' count by \p scale. + const CLeastSquaresOnline& scale(double scale) { + CBasicStatistics::count(m_S) *= scale; + return *this; + } - //! Multiply the statistics' count by \p scale. - CLeastSquaresOnline scaled(double scale) const - { - CLeastSquaresOnline result(*this); - return result.scale(scale); - } + //! Get the predicted value at \p x. + double predict(double x, double maxCondition = regression_detail::CMaxCondition::VALUE) const; - //! Scale the statistics' count by \p scale. - const CLeastSquaresOnline &scale(double scale) - { - CBasicStatistics::count(m_S) *= scale; - return *this; - } + //! Get the regression parameters. + //! + //! i.e. The intercept, slope, curvature, etc. + //! + //! \param[in] maxCondition The maximum condition number for + //! the Gramian this will consider solving. If the condition + //! is worse than this it'll fit a lower order polynomial. + //! \param[out] result Filled in with the regression parameters. + bool parameters(TArray& result, double maxCondition = regression_detail::CMaxCondition::VALUE) const; - //! Get the predicted value at \p x. - double predict(double x, double maxCondition = regression_detail::CMaxCondition::VALUE) const; - - //! Get the regression parameters. - //! - //! i.e. The intercept, slope, curvature, etc. - //! - //! \param[in] maxCondition The maximum condition number for - //! the Gramian this will consider solving. If the condition - //! is worse than this it'll fit a lower order polynomial. - //! \param[out] result Filled in with the regression parameters. - bool parameters(TArray &result, double maxCondition = regression_detail::CMaxCondition::VALUE) const; - - //! Get the predicted value of the regression parameters at \p x. - //! - //! \note Returns array of zeros if getting the parameters fails. - TArray parameters(double x, double maxCondition = regression_detail::CMaxCondition::VALUE) const - { - TArray result; - TArray params; - if (this->parameters(params, maxCondition)) - { - std::ptrdiff_t n = static_cast(params.size()); - for (std::ptrdiff_t i = n - 1; i >= 0; --i) - { - result[i] = params[i]; - for (std::ptrdiff_t j = i + 1; j < n; ++j) - { - params[j] *= static_cast(i + 1) - / static_cast(j - i) * x; - result[i] += params[j]; - } - } + //! Get the predicted value of the regression parameters at \p x. + //! + //! \note Returns array of zeros if getting the parameters fails. + TArray parameters(double x, double maxCondition = regression_detail::CMaxCondition::VALUE) const { + TArray result; + TArray params; + if (this->parameters(params, maxCondition)) { + std::ptrdiff_t n = static_cast(params.size()); + for (std::ptrdiff_t i = n - 1; i >= 0; --i) { + result[i] = params[i]; + for (std::ptrdiff_t j = i + 1; j < n; ++j) { + params[j] *= static_cast(i + 1) / static_cast(j - i) * x; + result[i] += params[j]; } - return result; } + } + return result; + } - //! Get the covariance matrix of the regression parameters. - //! - //! To compute this assume the data to fit are described by - //! \f$y_i = \sum_{j=0}{N} c_j x_i^j + Y_i\f$ where \f$Y_i\f$ - //! are IID and \f$N(0, \sigma)\f$ whence - //!
-                //!   \f$C = (X^t X)^{-1}X^t E[YY^t] X (X^t X)^{-1}\f$
-                //! 
- //! - //! Since \f$E[YY^t] = \sigma^2 I\f$ it follows that - //!
-                //!   \f$C = \sigma^2 (X^t X)^{-1}\f$
-                //! 
- //! - //! \param[in] variance The variance of the data residuals. - //! \param[in] maxCondition The maximum condition number for - //! the Gramian this will consider solving. If the condition - //! is worse than this it'll fit a lower order polynomial. - //! \param[out] result Filled in with the covariance matrix. - bool covariances(double variance, - TMatrix &result, - double maxCondition = regression_detail::CMaxCondition::VALUE) const; - - //! Get the safe prediction horizon based on the spread - //! of the abscissa added to the model so far. - double range() const - { - // The magic 12 comes from assuming the independent - // variable X is uniform over the range (for our uses - // it typically is). We maintain mean X^2 and X. For - // a uniform variable on a range [a, b] we have that - // E[(X - E(X))^2] = E[X^2] - E[X]^2 = (b - a)^2 / 12. - - double x1 = CBasicStatistics::mean(m_S)(1); - double x2 = CBasicStatistics::mean(m_S)(2); - return std::sqrt(12.0 * std::max(x2 - x1 * x1, 0.0)); - } + //! Get the covariance matrix of the regression parameters. + //! + //! To compute this assume the data to fit are described by + //! \f$y_i = \sum_{j=0}{N} c_j x_i^j + Y_i\f$ where \f$Y_i\f$ + //! are IID and \f$N(0, \sigma)\f$ whence + //!
+        //!   \f$C = (X^t X)^{-1}X^t E[YY^t] X (X^t X)^{-1}\f$
+        //! 
+ //! + //! Since \f$E[YY^t] = \sigma^2 I\f$ it follows that + //!
+        //!   \f$C = \sigma^2 (X^t X)^{-1}\f$
+        //! 
+ //! + //! \param[in] variance The variance of the data residuals. + //! \param[in] maxCondition The maximum condition number for + //! the Gramian this will consider solving. If the condition + //! is worse than this it'll fit a lower order polynomial. + //! \param[out] result Filled in with the covariance matrix. + bool covariances(double variance, TMatrix& result, double maxCondition = regression_detail::CMaxCondition::VALUE) const; + + //! Get the safe prediction horizon based on the spread + //! of the abscissa added to the model so far. + double range() const { + // The magic 12 comes from assuming the independent + // variable X is uniform over the range (for our uses + // it typically is). We maintain mean X^2 and X. For + // a uniform variable on a range [a, b] we have that + // E[(X - E(X))^2] = E[X^2] - E[X]^2 = (b - a)^2 / 12. + + double x1 = CBasicStatistics::mean(m_S)(1); + double x2 = CBasicStatistics::mean(m_S)(2); + return std::sqrt(12.0 * std::max(x2 - x1 * x1, 0.0)); + } - //! Age out the old points. - void age(double factor, bool meanRevert = false) - { - if (meanRevert) - { - TVector &s = CBasicStatistics::moment<0>(m_S); - for (std::size_t i = 1u; i < N; ++i) - { - s(i+2*N-1) = factor * s(i+2*N-1) - + (1.0 - factor) * s(i) * s(2*N-1); - } - } - m_S.age(factor); + //! Age out the old points. + void age(double factor, bool meanRevert = false) { + if (meanRevert) { + TVector& s = CBasicStatistics::moment<0>(m_S); + for (std::size_t i = 1u; i < N; ++i) { + s(i + 2 * N - 1) = factor * s(i + 2 * N - 1) + (1.0 - factor) * s(i) * s(2 * N - 1); } + } + m_S.age(factor); + } - //! Get the effective number of points being fitted. - double count() const - { - return CBasicStatistics::count(m_S); - } + //! Get the effective number of points being fitted. + double count() const { return CBasicStatistics::count(m_S); } - //! Get the mean value of the ordinates. - double mean() const - { - return CBasicStatistics::mean(m_S)(2*N-1); - } + //! Get the mean value of the ordinates. + double mean() const { return CBasicStatistics::mean(m_S)(2 * N - 1); } - //! Get the mean in the interval [\p a, \p b]. - double mean(double a, double b) const - { - double result = 0.0; - - double interval = b - a; - - TArray params; - this->parameters(params); - - if (interval == 0.0) - { - result = params[0]; - double xi = a; - for (std::size_t i = 1u; i < params.size(); ++i, xi *= a) - { - result += params[i] * xi; - } - return result; - } + //! Get the mean in the interval [\p a, \p b]. + double mean(double a, double b) const { + double result = 0.0; - for (std::size_t i = 0u; i < N; ++i) - { - for (std::size_t j = 0u; j <= i; ++j) - { - result += CCategoricalTools::binomialCoefficient(i+1, j+1) - * params[i] / static_cast(i+1) - * std::pow(a, static_cast(i-j)) - * std::pow(interval, static_cast(j+1)); - } - } + double interval = b - a; - return result / interval; - } + TArray params; + this->parameters(params); - //! Get the vector statistic. - const TVectorMeanAccumulator &statistic() const - { - return m_S; + if (interval == 0.0) { + result = params[0]; + double xi = a; + for (std::size_t i = 1u; i < params.size(); ++i, xi *= a) { + result += params[i] * xi; } + return result; + } - //! Get a checksum for this object. - std::uint64_t checksum() const - { - return m_S.checksum(); + for (std::size_t i = 0u; i < N; ++i) { + for (std::size_t j = 0u; j <= i; ++j) { + result += CCategoricalTools::binomialCoefficient(i + 1, j + 1) * params[i] / static_cast(i + 1) * + std::pow(a, static_cast(i - j)) * std::pow(interval, static_cast(j + 1)); } + } - //! Print this regression out for debug. - std::string print() const; - - private: - //! Get the first \p n regression parameters. - template - bool parameters(std::size_t n, - MATRIX &x, - VECTOR &y, - double maxCondition, - TArray &result) const; - - //! Compute the covariance matrix of the regression parameters. - template - bool covariances(std::size_t n, - MATRIX &x, - double variance, - double maxCondition, - TMatrix &result) const; - - //! Get the gramian of the design matrix. - template - void gramian(std::size_t n, MATRIX &x) const - { - for (std::size_t i = 0u; i < n; ++i) - { - x(i,i) = CBasicStatistics::mean(m_S)(i+i); - for (std::size_t j = i+1; j < n; ++j) - { - x(i,j) = CBasicStatistics::mean(m_S)(i+j); - } - } - } + return result / interval; + } + + //! Get the vector statistic. + const TVectorMeanAccumulator& statistic() const { return m_S; } - private: - //! Sufficient statistics for computing the least squares - //! regression. There are 3N - 1 in total, for the distinct - //! values in the design matrix and vector. - TVectorMeanAccumulator m_S; - }; - - //! Get the predicted value of \p r at \p x. - template - static double predict(const boost::array ¶ms, double x) - { - double result = params[0]; - double xi = x; - for (std::size_t i = 1u; i < params.size(); ++i, xi *= x) - { - result += params[i] * xi; + //! Get a checksum for this object. + std::uint64_t checksum() const { return m_S.checksum(); } + + //! Print this regression out for debug. + std::string print() const; + + private: + //! Get the first \p n regression parameters. + template + bool parameters(std::size_t n, MATRIX& x, VECTOR& y, double maxCondition, TArray& result) const; + + //! Compute the covariance matrix of the regression parameters. + template + bool covariances(std::size_t n, MATRIX& x, double variance, double maxCondition, TMatrix& result) const; + + //! Get the gramian of the design matrix. + template + void gramian(std::size_t n, MATRIX& x) const { + for (std::size_t i = 0u; i < n; ++i) { + x(i, i) = CBasicStatistics::mean(m_S)(i + i); + for (std::size_t j = i + 1; j < n; ++j) { + x(i, j) = CBasicStatistics::mean(m_S)(i + j); + } } - return result; } - //! \brief A Wiener process model of the evolution of the parameters - //! of our online least squares regression model. - template - class CLeastSquaresOnlineParameterProcess - { - public: - using TVector = CVectorNx1; - using TMatrix = CSymmetricMatrixNxN; - - public: - static const std::string UNIT_TIME_COVARIANCES_TAG; - - public: - //! Restore by traversing a state document. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); - - //! Persist by passing information to \p inserter. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - - //! Add a new sample of the regression parameters drift over - //! \p time. - void add(double time, const TVector &sample, const TVector &weight = TVector(1)) - { - // For the Wiener process: - // - // P(t(i+1)) - P(t(i)) ~ N(0, (t(i+1) - t(i)) * C) - // - // Defining dt(i) = t(i+1) - t(i) and assuming t(i) are - // monotonic increasing it follows that - // - // {D(t(i+1)) = (1 / dt(i))^(1/2) * P(t(i+1)) - P(t(i))} - // - // are N(0, C) IID. Therefore, the ML estimate of the - // distribution of the parameters at time T after the last - // measurement is N(0, T * C) where C is the empirical - // covariance matrix of the samples D(t(0)),..., D(t(n)). - - if (time > 0.0) - { - TVector sample_ = static_cast(std::sqrt(1.0 / time)) * sample; - m_UnitTimeCovariances.add(sample_, weight); - } - } + private: + //! Sufficient statistics for computing the least squares + //! regression. There are 3N - 1 in total, for the distinct + //! values in the design matrix and vector. + TVectorMeanAccumulator m_S; + }; + + //! Get the predicted value of \p r at \p x. + template + static double predict(const boost::array& params, double x) { + double result = params[0]; + double xi = x; + for (std::size_t i = 1u; i < params.size(); ++i, xi *= x) { + result += params[i] * xi; + } + return result; + } - //! Age the covariances. - void age(T factor) - { - m_UnitTimeCovariances.age(factor); - } + //! \brief A Wiener process model of the evolution of the parameters + //! of our online least squares regression model. + template + class CLeastSquaresOnlineParameterProcess { + public: + using TVector = CVectorNx1; + using TMatrix = CSymmetricMatrixNxN; - //! Get the process covariance matrix. - TMatrix covariance() const; + public: + static const std::string UNIT_TIME_COVARIANCES_TAG; - //! Compute the variance of the mean zero normal distribution - //! due to the drift in the regression parameters over \p time. - double predictionVariance(double time) const - { - if (time <= 0.0) - { - return 0.0; - } + public: + //! Restore by traversing a state document. + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); + + //! Persist by passing information to \p inserter. + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Add a new sample of the regression parameters drift over + //! \p time. + void add(double time, const TVector& sample, const TVector& weight = TVector(1)) { + // For the Wiener process: + // + // P(t(i+1)) - P(t(i)) ~ N(0, (t(i+1) - t(i)) * C) + // + // Defining dt(i) = t(i+1) - t(i) and assuming t(i) are + // monotonic increasing it follows that + // + // {D(t(i+1)) = (1 / dt(i))^(1/2) * P(t(i+1)) - P(t(i))} + // + // are N(0, C) IID. Therefore, the ML estimate of the + // distribution of the parameters at time T after the last + // measurement is N(0, T * C) where C is the empirical + // covariance matrix of the samples D(t(0)),..., D(t(n)). + + if (time > 0.0) { + TVector sample_ = static_cast(std::sqrt(1.0 / time)) * sample; + m_UnitTimeCovariances.add(sample_, weight); + } + } - TVector dT; - T dt = static_cast(std::sqrt(time)); - T dTi = dt; - for (std::size_t i = 0u; i < N; ++i, dTi *= dt) - { - dT(i) = dTi; - } + //! Age the covariances. + void age(T factor) { m_UnitTimeCovariances.age(factor); } - CSymmetricMatrixNxN covariance = - CBasicStatistics::covariances(m_UnitTimeCovariances); + //! Get the process covariance matrix. + TMatrix covariance() const; - return dT.inner(covariance * dT); - } + //! Compute the variance of the mean zero normal distribution + //! due to the drift in the regression parameters over \p time. + double predictionVariance(double time) const { + if (time <= 0.0) { + return 0.0; + } - //! Get a checksum for this object. - uint64_t checksum() const; + TVector dT; + T dt = static_cast(std::sqrt(time)); + T dTi = dt; + for (std::size_t i = 0u; i < N; ++i, dTi *= dt) { + dT(i) = dTi; + } + + CSymmetricMatrixNxN covariance = CBasicStatistics::covariances(m_UnitTimeCovariances); + + return dT.inner(covariance * dT); + } + + //! Get a checksum for this object. + uint64_t checksum() const; - //! Print this process out for debug. - std::string print() const; + //! Print this process out for debug. + std::string print() const; - private: - using TCovarianceAccumulator = CBasicStatistics::SSampleCovariances; + private: + using TCovarianceAccumulator = CBasicStatistics::SSampleCovariances; - private: - //! The estimator of the Wiener process's unit time - //! covariance matrix. - TCovarianceAccumulator m_UnitTimeCovariances; - }; + private: + //! The estimator of the Wiener process's unit time + //! covariance matrix. + TCovarianceAccumulator m_UnitTimeCovariances; + }; }; template -double CRegression::CLeastSquaresOnline::predict(double x, double maxCondition) const -{ +double CRegression::CLeastSquaresOnline::predict(double x, double maxCondition) const { TArray params; this->parameters(params, maxCondition); return CRegression::predict(params, x); @@ -573,7 +491,6 @@ template const std::string CRegression::CLeastSquaresOnline::STATISTIC_TAG("a"); template const std::string CRegression::CLeastSquaresOnlineParameterProcess::UNIT_TIME_COVARIANCES_TAG("a"); - } } diff --git a/include/maths/CRegressionDetail.h b/include/maths/CRegressionDetail.h index e2fed53f44..2446853436 100644 --- a/include/maths/CRegressionDetail.h +++ b/include/maths/CRegressionDetail.h @@ -19,35 +19,27 @@ #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { template -bool CRegression::CLeastSquaresOnline::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name = traverser.name(); +bool CRegression::CLeastSquaresOnline::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); RESTORE(STATISTIC_TAG, m_S.fromDelimited(traverser.value())) - } - while (traverser.next()); + } while (traverser.next()); return true; } template -void CRegression::CLeastSquaresOnline::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CRegression::CLeastSquaresOnline::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(STATISTIC_TAG, m_S.toDelimited()); } template -void CRegression::CLeastSquaresOnline::shiftAbscissa(double dx) -{ - if (CBasicStatistics::count(m_S) == 0.0) - { +void CRegression::CLeastSquaresOnline::shiftAbscissa(double dx) { + if (CBasicStatistics::count(m_S) == 0.0) { return; } @@ -61,28 +53,24 @@ void CRegression::CLeastSquaresOnline::shiftAbscissa(double dx) // -> 1/n * ( sum_i{ t(i)^i * y(i) } // + sum_j{ (i j) * dx^(i - j) * sum_i{ t(i)^j y(i) } } ) - double d[2*N-2] = { dx }; - for (std::size_t i = 1u; i < 2*N-2; ++i) - { - d[i] = d[i-1] * dx; + double d[2 * N - 2] = {dx}; + for (std::size_t i = 1u; i < 2 * N - 2; ++i) { + d[i] = d[i - 1] * dx; } LOG_TRACE("d = " << core::CContainerPrinter::print(d)); LOG_TRACE("S(before) " << CBasicStatistics::mean(m_S)); - for (std::size_t i = 2*N-2; i > 0; --i) - { + for (std::size_t i = 2 * N - 2; i > 0; --i) { LOG_TRACE("i = " << i); - for (std::size_t j = 0u; j < i; ++j) - { - double bij = CCategoricalTools::binomialCoefficient(i, j) * d[i-j-1]; + for (std::size_t j = 0u; j < i; ++j) { + double bij = CCategoricalTools::binomialCoefficient(i, j) * d[i - j - 1]; LOG_TRACE("bij = " << bij); CBasicStatistics::moment<0>(m_S)(i) += bij * CBasicStatistics::mean(m_S)(j); - if (i >= N) - { + if (i >= N) { continue; } - std::size_t yi = i + 2*N-1; - std::size_t yj = j + 2*N-1; + std::size_t yi = i + 2 * N - 1; + std::size_t yj = j + 2 * N - 1; LOG_TRACE("yi = " << yi << ", yj = " << yj); CBasicStatistics::moment<0>(m_S)(yi) += bij * CBasicStatistics::mean(m_S)(yj); } @@ -91,37 +79,29 @@ void CRegression::CLeastSquaresOnline::shiftAbscissa(double dx) } template -bool CRegression::CLeastSquaresOnline::parameters(TArray &result, double maxCondition) const -{ +bool CRegression::CLeastSquaresOnline::parameters(TArray& result, double maxCondition) const { result.fill(0.0); // Search for non-singular solution. - std::size_t n = N+1; - while (--n > 0) - { - switch (n) - { - case 1: - { - result[0] = CBasicStatistics::mean(m_S)(2*N-1); + std::size_t n = N + 1; + while (--n > 0) { + switch (n) { + case 1: { + result[0] = CBasicStatistics::mean(m_S)(2 * N - 1); return true; } - case N: - { + case N: { Eigen::Matrix x; Eigen::Matrix y; - if (this->parameters(N, x, y, maxCondition, result)) - { + if (this->parameters(N, x, y, maxCondition, result)) { return true; } break; } - default: - { + default: { CDenseMatrix x(n, n); CDenseVector y(n); - if (this->parameters(n, x, y, maxCondition, result)) - { + if (this->parameters(n, x, y, maxCondition, result)) { return true; } break; @@ -132,37 +112,27 @@ bool CRegression::CLeastSquaresOnline::parameters(TArray &result, double m } template -bool CRegression::CLeastSquaresOnline::covariances(double variance, - TMatrix &result, - double maxCondition) const -{ +bool CRegression::CLeastSquaresOnline::covariances(double variance, TMatrix& result, double maxCondition) const { result = TMatrix(0.0); // Search for the covariance matrix of a non-singular subproblem. - std::size_t n = N+1; - while (--n > 0) - { - switch (n) - { - case 1: - { - result(0,0) = variance / CBasicStatistics::count(m_S); + std::size_t n = N + 1; + while (--n > 0) { + switch (n) { + case 1: { + result(0, 0) = variance / CBasicStatistics::count(m_S); return true; } - case N: - { + case N: { Eigen::Matrix x; - if (!this->covariances(N, x, variance, maxCondition, result)) - { + if (!this->covariances(N, x, variance, maxCondition, result)) { continue; } break; } - default: - { + default: { CDenseMatrix x(n, n); - if (!this->covariances(n, x, variance, maxCondition, result)) - { + if (!this->covariances(n, x, variance, maxCondition, result)) { continue; } break; @@ -174,18 +144,12 @@ bool CRegression::CLeastSquaresOnline::covariances(double variance, } template -std::string CRegression::CLeastSquaresOnline::print() const -{ +std::string CRegression::CLeastSquaresOnline::print() const { TArray params; - if (this->parameters(params)) - { + if (this->parameters(params)) { std::string result; - for (std::size_t i = params.size()-1; i > 0; --i) - { - result += core::CStringUtils::typeToStringPretty(params[i]) - + " x^" - + core::CStringUtils::typeToStringPretty(i) - + " + "; + for (std::size_t i = params.size() - 1; i > 0; --i) { + result += core::CStringUtils::typeToStringPretty(params[i]) + " x^" + core::CStringUtils::typeToStringPretty(i) + " + "; } result += core::CStringUtils::typeToStringPretty(params[0]); return result; @@ -195,31 +159,22 @@ std::string CRegression::CLeastSquaresOnline::print() const template template -bool CRegression::CLeastSquaresOnline::parameters(std::size_t n, - MATRIX &x, - VECTOR &y, - double maxCondition, - TArray &result) const -{ - if (n == 1) - { - result[0] = CBasicStatistics::mean(m_S)(2*N-1); +bool CRegression::CLeastSquaresOnline::parameters(std::size_t n, MATRIX& x, VECTOR& y, double maxCondition, TArray& result) const { + if (n == 1) { + result[0] = CBasicStatistics::mean(m_S)(2 * N - 1); return true; } this->gramian(n, x); - for (std::size_t i = 0u; i < n; ++i) - { - y(i) = CBasicStatistics::mean(m_S)(i+2*N-1); + for (std::size_t i = 0u; i < n; ++i) { + y(i) = CBasicStatistics::mean(m_S)(i + 2 * N - 1); } LOG_TRACE("S = " << CBasicStatistics::mean(m_S)); LOG_TRACE("x =\n" << x); LOG_TRACE("y =\n" << y); - Eigen::JacobiSVD x_(x.template selfadjointView(), - Eigen::ComputeFullU | Eigen::ComputeFullV); - if (x_.singularValues()(0) > maxCondition * x_.singularValues()(n-1)) - { + Eigen::JacobiSVD x_(x.template selfadjointView(), Eigen::ComputeFullU | Eigen::ComputeFullV); + if (x_.singularValues()(0) > maxCondition * x_.singularValues()(n - 1)) { LOG_TRACE("singular values = " << x_.singularValues()); return false; } @@ -227,8 +182,7 @@ bool CRegression::CLeastSquaresOnline::parameters(std::size_t n, // Don't bother checking the solution since we check // the matrix condition above. VECTOR r = x_.solve(y); - for (std::size_t i = 0u; i < n; ++i) - { + for (std::size_t i = 0u; i < n; ++i) { result[i] = r(i); } @@ -237,23 +191,16 @@ bool CRegression::CLeastSquaresOnline::parameters(std::size_t n, template template -bool CRegression::CLeastSquaresOnline::covariances(std::size_t n, - MATRIX &x, - double variance, - double maxCondition, - TMatrix &result) const -{ - if (n == 1) - { +bool CRegression::CLeastSquaresOnline::covariances(std::size_t n, MATRIX& x, double variance, double maxCondition, TMatrix& result) + const { + if (n == 1) { x(0) = variance / CBasicStatistics::count(m_S); return true; } this->gramian(n, x); - Eigen::JacobiSVD x_(x.template selfadjointView(), - Eigen::ComputeFullU | Eigen::ComputeFullV); - if (x_.singularValues()(0) > maxCondition * x_.singularValues()(n-1)) - { + Eigen::JacobiSVD x_(x.template selfadjointView(), Eigen::ComputeFullU | Eigen::ComputeFullV); + if (x_.singularValues()(0) > maxCondition * x_.singularValues()(n - 1)) { LOG_TRACE("singular values = " << x_.singularValues()); return false; } @@ -262,16 +209,12 @@ bool CRegression::CLeastSquaresOnline::covariances(std::size_t n, // the matrix condition above. Also, we zero initialize result // in the calling code so any values we don't fill in the // following loop are zero (as required). - x = ( x_.matrixV() - * x_.singularValues().cwiseInverse().asDiagonal() - * x_.matrixU().transpose()) - * variance / CBasicStatistics::count(m_S); - for (std::size_t i = 0u; i < n; ++i) - { - result(i,i) = x(i,i); - for (std::size_t j = 0u; j < i; ++j) - { - result(i,j) = x(i,j); + x = (x_.matrixV() * x_.singularValues().cwiseInverse().asDiagonal() * x_.matrixU().transpose()) * variance / + CBasicStatistics::count(m_S); + for (std::size_t i = 0u; i < n; ++i) { + result(i, i) = x(i, i); + for (std::size_t j = 0u; j < i; ++j) { + result(i, j) = x(i, j); } } @@ -279,44 +222,36 @@ bool CRegression::CLeastSquaresOnline::covariances(std::size_t n, } template -bool CRegression::CLeastSquaresOnlineParameterProcess::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name = traverser.name(); +bool CRegression::CLeastSquaresOnlineParameterProcess::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); RESTORE(UNIT_TIME_COVARIANCES_TAG, m_UnitTimeCovariances.fromDelimited(traverser.value())) - } - while (traverser.next()); + } while (traverser.next()); return true; } template -void CRegression::CLeastSquaresOnlineParameterProcess::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CRegression::CLeastSquaresOnlineParameterProcess::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(UNIT_TIME_COVARIANCES_TAG, m_UnitTimeCovariances.toDelimited()); } template typename CRegression::CLeastSquaresOnlineParameterProcess::TMatrix -CRegression::CLeastSquaresOnlineParameterProcess::covariance() const -{ +CRegression::CLeastSquaresOnlineParameterProcess::covariance() const { return CBasicStatistics::covariances(m_UnitTimeCovariances); } template -uint64_t CRegression::CLeastSquaresOnlineParameterProcess::checksum() const -{ +uint64_t CRegression::CLeastSquaresOnlineParameterProcess::checksum() const { return m_UnitTimeCovariances.checksum(); } template -std::string CRegression::CLeastSquaresOnlineParameterProcess::print() const -{ +std::string CRegression::CLeastSquaresOnlineParameterProcess::print() const { std::ostringstream result; result << CBasicStatistics::covariances(m_UnitTimeCovariances); return result.str(); } - } } diff --git a/include/maths/CRestoreParams.h b/include/maths/CRestoreParams.h index 8f8da3c697..dda3bdcf0a 100644 --- a/include/maths/CRestoreParams.h +++ b/include/maths/CRestoreParams.h @@ -15,16 +15,13 @@ #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { class CModelParams; //! \brief Gatherers up extra parameters supplied when restoring //! distribution models. -struct MATHS_EXPORT SDistributionRestoreParams -{ +struct MATHS_EXPORT SDistributionRestoreParams { SDistributionRestoreParams(maths_t::EDataType dataType, double decayRate, double minimumClusterFraction = MINIMUM_CLUSTER_SPLIT_FRACTION, @@ -49,15 +46,14 @@ struct MATHS_EXPORT SDistributionRestoreParams //! \brief Gatherers up extra parameters supplied when restoring //! time series decompositions. -struct MATHS_EXPORT STimeSeriesDecompositionRestoreParams -{ +struct MATHS_EXPORT STimeSeriesDecompositionRestoreParams { STimeSeriesDecompositionRestoreParams(double decayRate, core_t::TTime minimumBucketLength, std::size_t componentSize, - const SDistributionRestoreParams &changeModelParams); + const SDistributionRestoreParams& changeModelParams); STimeSeriesDecompositionRestoreParams(double decayRate, core_t::TTime minimumBucketLength, - const SDistributionRestoreParams &changeModelParams); + const SDistributionRestoreParams& changeModelParams); //! The rate at which decomposition loses information. double s_DecayRate; @@ -74,13 +70,12 @@ struct MATHS_EXPORT STimeSeriesDecompositionRestoreParams //! \brief Gatherers up extra parameters supplied when restoring //! time series models. -struct MATHS_EXPORT SModelRestoreParams -{ +struct MATHS_EXPORT SModelRestoreParams { using TModelParamsCRef = boost::reference_wrapper; - SModelRestoreParams(const CModelParams ¶ms, - const STimeSeriesDecompositionRestoreParams &decompositionParams, - const SDistributionRestoreParams &distributionParams); + SModelRestoreParams(const CModelParams& params, + const STimeSeriesDecompositionRestoreParams& decompositionParams, + const SDistributionRestoreParams& distributionParams); //! The model parameters. TModelParamsCRef s_Params; @@ -91,7 +86,6 @@ struct MATHS_EXPORT SModelRestoreParams //! The time series residual distribution restore parameters. SDistributionRestoreParams s_DistributionParams; }; - } } diff --git a/include/maths/CSampling.h b/include/maths/CSampling.h index c77d7d1d0f..51dc05bf47 100644 --- a/include/maths/CSampling.h +++ b/include/maths/CSampling.h @@ -22,465 +22,379 @@ #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace maths -{ +namespace maths { //! \brief Sampling functionality. //! //! DEFINITION:\n //! This is a place holder for random sampling utilities and algorithms. -class MATHS_EXPORT CSampling : private core::CNonInstantiatable -{ +class MATHS_EXPORT CSampling : private core::CNonInstantiatable { +public: + using TDoubleVec = std::vector; + using TDoubleVecVec = std::vector; + using TSizeVec = std::vector; + using TPtrdiffVec = std::vector; + + //! \brief A mockable random number generator which uses boost::random::mt11213b. + class MATHS_EXPORT CRandomNumberGenerator { public: - using TDoubleVec = std::vector; - using TDoubleVecVec = std::vector; - using TSizeVec = std::vector; - using TPtrdiffVec = std::vector; - - //! \brief A mockable random number generator which uses boost::random::mt11213b. - class MATHS_EXPORT CRandomNumberGenerator - { - public: - using result_type = boost::random::mt11213b::result_type; - - public: - //! Mock the random number generator to produce a constant. - void mock(); - - //! Unmock the random number generator. - void unmock(); - - //! Seed the random number generator. - void seed(); - - //! Returns the smallest value that the generator can produce. - static result_type min() - { - return boost::random::mt11213b::min(); - } + using result_type = boost::random::mt11213b::result_type; - //! Returns the largest value that the generator can produce. - static result_type max() - { - return boost::random::mt11213b::max(); - } + public: + //! Mock the random number generator to produce a constant. + void mock(); - //! Produces the next value of the generator. - result_type operator()() - { - if (m_Mock) - { - return *m_Mock; - } - return m_Rng.operator()(); - } + //! Unmock the random number generator. + void unmock(); - //! Fills a range with random values. - template - void generate(ITR first, ITR last) - { - if (m_Mock) - { - for (/**/; first != last; ++first) - { - *first = *m_Mock; - } - } - m_Rng.generate(first, last); - } + //! Seed the random number generator. + void seed(); - //! Writes the mersenne_twister_engine to a std::ostream. - template - friend std::basic_ostream & - operator<<(std::basic_ostream &o, - const CRandomNumberGenerator &g) - { - return o << g.m_Rng; - } + //! Returns the smallest value that the generator can produce. + static result_type min() { return boost::random::mt11213b::min(); } + + //! Returns the largest value that the generator can produce. + static result_type max() { return boost::random::mt11213b::max(); } + + //! Produces the next value of the generator. + result_type operator()() { + if (m_Mock) { + return *m_Mock; + } + return m_Rng.operator()(); + } - //! Reads a mersenne_twister_engine from a std::istream. - template - friend std::basic_istream & - operator>>(std::basic_istream &i, - CRandomNumberGenerator &g) - { - return i >> g.m_Rng; + //! Fills a range with random values. + template + void generate(ITR first, ITR last) { + if (m_Mock) { + for (/**/; first != last; ++first) { + *first = *m_Mock; } + } + m_Rng.generate(first, last); + } - private: - using TOptionalResultType = boost::optional; + //! Writes the mersenne_twister_engine to a std::ostream. + template + friend std::basic_ostream& operator<<(std::basic_ostream& o, const CRandomNumberGenerator& g) { + return o << g.m_Rng; + } - private: - TOptionalResultType m_Mock; - boost::random::mt11213b m_Rng; - }; + //! Reads a mersenne_twister_engine from a std::istream. + template + friend std::basic_istream& operator>>(std::basic_istream& i, CRandomNumberGenerator& g) { + return i >> g.m_Rng; + } + + private: + using TOptionalResultType = boost::optional; - //! \brief Setup and tears down mock random numbers in the scope in which - //! it is constructed. - class MATHS_EXPORT CScopeMockRandomNumberGenerator - { - public: - CScopeMockRandomNumberGenerator(); - ~CScopeMockRandomNumberGenerator(); - }; + private: + TOptionalResultType m_Mock; + boost::random::mt11213b m_Rng; + }; + //! \brief Setup and tears down mock random numbers in the scope in which + //! it is constructed. + class MATHS_EXPORT CScopeMockRandomNumberGenerator { public: - //! \name Persistence - //@{ - //! Restore the static members of this class from persisted state - static bool staticsAcceptRestoreTraverser(core::CStateRestoreTraverser &traverser); - - //! Persist the static members of this class - static void staticsAcceptPersistInserter(core::CStatePersistInserter &inserter); - //@} - - //! Reinitialize the random number generator. - static void seed(); - - //! \name Uniform Sampling - //! - //! Sample uniformly from a specified range - //@{ -#define UNIFORM_SAMPLE(TYPE) \ -static TYPE uniformSample(TYPE a, TYPE b); \ -static TYPE uniformSample(CPRNG::CXorOShiro128Plus &rng, TYPE a, TYPE b); \ -static TYPE uniformSample(CPRNG::CXorShift1024Mult &rng, TYPE a, TYPE b); \ -static void uniformSample(TYPE a, TYPE b, std::size_t n, std::vector &result); \ -static void uniformSample(CPRNG::CXorOShiro128Plus &rng, \ - TYPE a, TYPE b, std::size_t n, std::vector &result); \ -static void uniformSample(CPRNG::CXorShift1024Mult &rng, \ - TYPE a, TYPE b, std::size_t n, std::vector &result); - UNIFORM_SAMPLE(std::size_t) - UNIFORM_SAMPLE(std::ptrdiff_t) - UNIFORM_SAMPLE(double) + CScopeMockRandomNumberGenerator(); + ~CScopeMockRandomNumberGenerator(); + }; + +public: + //! \name Persistence + //@{ + //! Restore the static members of this class from persisted state + static bool staticsAcceptRestoreTraverser(core::CStateRestoreTraverser& traverser); + + //! Persist the static members of this class + static void staticsAcceptPersistInserter(core::CStatePersistInserter& inserter); + //@} + + //! Reinitialize the random number generator. + static void seed(); + +//! \name Uniform Sampling +//! +//! Sample uniformly from a specified range +//@{ +#define UNIFORM_SAMPLE(TYPE) \ + static TYPE uniformSample(TYPE a, TYPE b); \ + static TYPE uniformSample(CPRNG::CXorOShiro128Plus& rng, TYPE a, TYPE b); \ + static TYPE uniformSample(CPRNG::CXorShift1024Mult& rng, TYPE a, TYPE b); \ + static void uniformSample(TYPE a, TYPE b, std::size_t n, std::vector& result); \ + static void uniformSample(CPRNG::CXorOShiro128Plus& rng, TYPE a, TYPE b, std::size_t n, std::vector& result); \ + static void uniformSample(CPRNG::CXorShift1024Mult& rng, TYPE a, TYPE b, std::size_t n, std::vector& result); + UNIFORM_SAMPLE(std::size_t) + UNIFORM_SAMPLE(std::ptrdiff_t) + UNIFORM_SAMPLE(double) #undef UNIFORM_SAMPLE - //@} - - //! Get a normal sample with mean and variance \p mean and - //! \p variance, respectively. - static double normalSample(double mean, double variance); - - //! Get a normal sample with mean and variance \p mean and - //! \p variance, respectively. - static double normalSample(CPRNG::CXorOShiro128Plus &rng, - double mean, - double variance); - - //! Get a normal sample with mean and variance \p mean and - //! \p variance, respectively. - static double normalSample(CPRNG::CXorShift1024Mult &rng, - double mean, - double variance); - - //! Get \p n normal samples with mean and variance \p mean and - //! \p variance, respectively. - static void normalSample(double mean, - double variance, - std::size_t n, - TDoubleVec &result); - - //! Get \p n normal samples with mean and variance \p mean and - //! \p variance, respectively, using \p rng. - static void normalSample(CPRNG::CXorOShiro128Plus &rng, - double mean, - double variance, - std::size_t n, - TDoubleVec &result); - - //! Get \p n normal samples with mean and variance \p mean and - //! \p variance, respectively, using \p rng. - static void normalSample(CPRNG::CXorShift1024Mult &rng, - double mean, - double variance, - std::size_t n, - TDoubleVec &result); - - //! Get \p n samples of a \f$\chi^2\f$ random variable with \p f - //! degrees of freedom. - static void chiSquaredSample(double f, std::size_t n, TDoubleVec &result); - - //! Get \p n samples of a \f$\chi^2\f$ random variable with \p f - //! degrees of freedom using \p rng. - static void chiSquaredSample(CPRNG::CXorOShiro128Plus &rng, - double f, - std::size_t n, - TDoubleVec &result); - - //! Get \p n samples of a \f$\chi^2\f$ random variable with \p f - //! degrees of freedom using \p rng. - static void chiSquaredSample(CPRNG::CXorShift1024Mult &rng, - double f, - std::size_t n, - TDoubleVec &result); - - //! \name Multivariate Normal Sampling - //@{ - //! Sample from the normal distribution with mean \p mean and - //! covariance matrix \p covariance. - //! - //! \param[in] mean The mean vector. - //! \param[in] covariance The covariance matrix. - //! \param[in] n The number of samples to generate. - //! \param[out] samples Filled in with IID samples of the - //! multivariate normal. - static bool multivariateNormalSample(const TDoubleVec &mean, - const TDoubleVecVec &covariance, - std::size_t n, - TDoubleVecVec &samples); - - //! Overload of multivariate normal sample using \p rng - static bool multivariateNormalSample(CPRNG::CXorOShiro128Plus &rng, - const TDoubleVec &mean, - const TDoubleVecVec &covariance, - std::size_t n, - TDoubleVecVec &samples); - - //! Overload of multivariate normal sample using \p rng - static bool multivariateNormalSample(CPRNG::CXorShift1024Mult &rng, - const TDoubleVec &mean, - const TDoubleVecVec &covariance, - std::size_t n, - TDoubleVecVec &samples); - -#define MULTIVARIATE_NORMAL_SAMPLE(N) \ -static void multivariateNormalSample(const CVectorNx1 &mean, \ - const CSymmetricMatrixNxN &covariance, \ - std::size_t n, \ - std::vector > &samples); \ -static void multivariateNormalSample(CPRNG::CXorOShiro128Plus &rng, \ - const CVectorNx1 &mean, \ - const CSymmetricMatrixNxN &covariance, \ - std::size_t n, \ - std::vector > &samples); \ -static void multivariateNormalSample(CPRNG::CXorShift1024Mult &rng, \ - const CVectorNx1 &mean, \ - const CSymmetricMatrixNxN &covariance, \ - std::size_t n, \ - std::vector > &samples) - MULTIVARIATE_NORMAL_SAMPLE(2); - MULTIVARIATE_NORMAL_SAMPLE(3); - MULTIVARIATE_NORMAL_SAMPLE(4); - MULTIVARIATE_NORMAL_SAMPLE(5); - MULTIVARIATE_NORMAL_SAMPLE(6); - MULTIVARIATE_NORMAL_SAMPLE(7); - MULTIVARIATE_NORMAL_SAMPLE(8); - MULTIVARIATE_NORMAL_SAMPLE(9); + //@} + + //! Get a normal sample with mean and variance \p mean and + //! \p variance, respectively. + static double normalSample(double mean, double variance); + + //! Get a normal sample with mean and variance \p mean and + //! \p variance, respectively. + static double normalSample(CPRNG::CXorOShiro128Plus& rng, double mean, double variance); + + //! Get a normal sample with mean and variance \p mean and + //! \p variance, respectively. + static double normalSample(CPRNG::CXorShift1024Mult& rng, double mean, double variance); + + //! Get \p n normal samples with mean and variance \p mean and + //! \p variance, respectively. + static void normalSample(double mean, double variance, std::size_t n, TDoubleVec& result); + + //! Get \p n normal samples with mean and variance \p mean and + //! \p variance, respectively, using \p rng. + static void normalSample(CPRNG::CXorOShiro128Plus& rng, double mean, double variance, std::size_t n, TDoubleVec& result); + + //! Get \p n normal samples with mean and variance \p mean and + //! \p variance, respectively, using \p rng. + static void normalSample(CPRNG::CXorShift1024Mult& rng, double mean, double variance, std::size_t n, TDoubleVec& result); + + //! Get \p n samples of a \f$\chi^2\f$ random variable with \p f + //! degrees of freedom. + static void chiSquaredSample(double f, std::size_t n, TDoubleVec& result); + + //! Get \p n samples of a \f$\chi^2\f$ random variable with \p f + //! degrees of freedom using \p rng. + static void chiSquaredSample(CPRNG::CXorOShiro128Plus& rng, double f, std::size_t n, TDoubleVec& result); + + //! Get \p n samples of a \f$\chi^2\f$ random variable with \p f + //! degrees of freedom using \p rng. + static void chiSquaredSample(CPRNG::CXorShift1024Mult& rng, double f, std::size_t n, TDoubleVec& result); + + //! \name Multivariate Normal Sampling + //@{ + //! Sample from the normal distribution with mean \p mean and + //! covariance matrix \p covariance. + //! + //! \param[in] mean The mean vector. + //! \param[in] covariance The covariance matrix. + //! \param[in] n The number of samples to generate. + //! \param[out] samples Filled in with IID samples of the + //! multivariate normal. + static bool multivariateNormalSample(const TDoubleVec& mean, const TDoubleVecVec& covariance, std::size_t n, TDoubleVecVec& samples); + + //! Overload of multivariate normal sample using \p rng + static bool multivariateNormalSample(CPRNG::CXorOShiro128Plus& rng, + const TDoubleVec& mean, + const TDoubleVecVec& covariance, + std::size_t n, + TDoubleVecVec& samples); + + //! Overload of multivariate normal sample using \p rng + static bool multivariateNormalSample(CPRNG::CXorShift1024Mult& rng, + const TDoubleVec& mean, + const TDoubleVecVec& covariance, + std::size_t n, + TDoubleVecVec& samples); + +#define MULTIVARIATE_NORMAL_SAMPLE(N) \ + static void multivariateNormalSample(const CVectorNx1& mean, \ + const CSymmetricMatrixNxN& covariance, \ + std::size_t n, \ + std::vector>& samples); \ + static void multivariateNormalSample(CPRNG::CXorOShiro128Plus& rng, \ + const CVectorNx1& mean, \ + const CSymmetricMatrixNxN& covariance, \ + std::size_t n, \ + std::vector>& samples); \ + static void multivariateNormalSample(CPRNG::CXorShift1024Mult& rng, \ + const CVectorNx1& mean, \ + const CSymmetricMatrixNxN& covariance, \ + std::size_t n, \ + std::vector>& samples) + MULTIVARIATE_NORMAL_SAMPLE(2); + MULTIVARIATE_NORMAL_SAMPLE(3); + MULTIVARIATE_NORMAL_SAMPLE(4); + MULTIVARIATE_NORMAL_SAMPLE(5); + MULTIVARIATE_NORMAL_SAMPLE(6); + MULTIVARIATE_NORMAL_SAMPLE(7); + MULTIVARIATE_NORMAL_SAMPLE(8); + MULTIVARIATE_NORMAL_SAMPLE(9); #undef MULTIVARIATE_NORMAL_SAMPLE - //@} - - //! \name Categorical Sampling - //@{ - //! Generate a sample from a categorical distribution with - //! category probabilities \p probabilities. - static std::size_t categoricalSample(TDoubleVec &probabilities); - - //! Generate a sample from a categorical distribution with - //! category probabilities \p probabilities using \p rng. - static std::size_t categoricalSample(CPRNG::CXorOShiro128Plus &rng, - TDoubleVec &probabilities); - - //! Generate a sample from a categorical distribution with - //! category probabilities \p probabilities using \p rng. - static std::size_t categoricalSample(CPRNG::CXorShift1024Mult &rng, - TDoubleVec &probabilities); - - //! Generate \p n samples from a categorical distribution - //! with category probabilities \p probabilities assuming - //! the values are replaced between draws. - static void categoricalSampleWithReplacement(TDoubleVec &probabilities, - std::size_t n, - TSizeVec &result); - - //! Generate \p n samples from a categorical distribution - //! with category probabilities \p probabilities using \p rng - //! assuming the values are replaced between draws. - static void categoricalSampleWithReplacement(CPRNG::CXorOShiro128Plus &rng, - TDoubleVec &probabilities, - std::size_t n, - TSizeVec &result); - - //! Generate \p n samples from a categorical distribution - //! with category probabilities \p probabilities using \p rng - //! assuming the values are replaced between draws. - static void categoricalSampleWithReplacement(CPRNG::CXorShift1024Mult &rng, - TDoubleVec &probabilities, - std::size_t n, - TSizeVec &result); - - //! Generate \p n samples from a categorical distribution - //! with category probabilities \p probabilities assuming - //! the values are *not* replaced between draws. - static void categoricalSampleWithoutReplacement(TDoubleVec &probabilities, - std::size_t n, - TSizeVec &result); - - //! Generate \p n samples from a categorical distribution - //! with category probabilities \p probabilities using \p rng - //! assuming the values are *not* replaced between draws. - static void categoricalSampleWithoutReplacement(CPRNG::CXorOShiro128Plus &rng, - TDoubleVec &probabilities, - std::size_t n, - TSizeVec &result); - - //! Generate \p n samples from a categorical distribution - //! with category probabilities \p probabilities using \p rng - //! assuming the values are *not* replaced between draws. - static void categoricalSampleWithoutReplacement(CPRNG::CXorShift1024Mult &rng, - TDoubleVec &probabilities, - std::size_t n, - TSizeVec &result); - - //! Generate samples from a multinomial distribution with number - //! of trials \p n and category probabilities \p probabilities. - //! - //! This generates a sample from the distribution function - //!
-        //!   \f$\(\displaystyle f(\{n_i\}) = \frac{n!}{\prod_i{n_i}}\prod_i{p_i^{n_i}}\)\f$
-        //! 
- //! - //! The probabilities are sorted in decreasing order and - //! \p sample is filled with the counts of each probability - //! in this sorted order. This is the most efficient strategy - //! since often \p sample will then be much smaller than - //! \p probabilities and the sampling loop can end early. - //! - //! \param[in,out] probabilities The category probabilities, - //! which should be normalized. - //! \param[in] n The number of trials. - //! \param[out] sample Filled in with the counts of the each - //! category in the sorted probabilities order. Missing counts - //! are zero. - //! \param[in] sorted Set to true if the probabilities are - //! already sorted in descending order. - static void multinomialSampleFast(TDoubleVec &probabilities, - std::size_t n, - TSizeVec &sample, - bool sorted = false); - - //! Generate samples according to the multinomial distribution - //! with number of trials \p n and category probabilities - //! \p probabilities. - //! - //! This generates a sample from the distribution function - //!
-        //!   \f$\(\displaystyle f(\{n_i\}) = \frac{n!}{\prod_i{n_i}}\prod_i{p_i^{n_i}}\)\f$
-        //! 
- //! - //! \param[in] probabilities The category probabilities, which - //! should be normalized. - //! \param[in] n The number of trials. - //! \param[out] sample Filled in with the counts of each category - //! in \p probabilities. This sample includes zeros explicitly, - //! contrast with multinomialSampleFast. - static void multinomialSampleStable(TDoubleVec probabilities, - std::size_t n, - TSizeVec &sample); - //@} - - //! Sample a random permutation of the value [\p first, \p last). - //! - //! Reorders the elements in the range [\p first, \p last) using the - //! supplied random number generator. - //! - //! \note We provide our own implementation of std::random_shuffle - //! based on the libc++ implementation because this is different from - //! the libstdc++ implementation which can cause platform specific - //! differences. - template - static void random_shuffle(RNG &rng, ITR first, ITR last) - { - auto d = last - first; - if (d > 1) - { - CUniform0nGenerator rand(rng); - for (--last; first < last; ++first, --d) - { - auto i = rand(d); - if (i > 0) - { - std::iter_swap(first, first + i); - } + //@} + + //! \name Categorical Sampling + //@{ + //! Generate a sample from a categorical distribution with + //! category probabilities \p probabilities. + static std::size_t categoricalSample(TDoubleVec& probabilities); + + //! Generate a sample from a categorical distribution with + //! category probabilities \p probabilities using \p rng. + static std::size_t categoricalSample(CPRNG::CXorOShiro128Plus& rng, TDoubleVec& probabilities); + + //! Generate a sample from a categorical distribution with + //! category probabilities \p probabilities using \p rng. + static std::size_t categoricalSample(CPRNG::CXorShift1024Mult& rng, TDoubleVec& probabilities); + + //! Generate \p n samples from a categorical distribution + //! with category probabilities \p probabilities assuming + //! the values are replaced between draws. + static void categoricalSampleWithReplacement(TDoubleVec& probabilities, std::size_t n, TSizeVec& result); + + //! Generate \p n samples from a categorical distribution + //! with category probabilities \p probabilities using \p rng + //! assuming the values are replaced between draws. + static void categoricalSampleWithReplacement(CPRNG::CXorOShiro128Plus& rng, TDoubleVec& probabilities, std::size_t n, TSizeVec& result); + + //! Generate \p n samples from a categorical distribution + //! with category probabilities \p probabilities using \p rng + //! assuming the values are replaced between draws. + static void categoricalSampleWithReplacement(CPRNG::CXorShift1024Mult& rng, TDoubleVec& probabilities, std::size_t n, TSizeVec& result); + + //! Generate \p n samples from a categorical distribution + //! with category probabilities \p probabilities assuming + //! the values are *not* replaced between draws. + static void categoricalSampleWithoutReplacement(TDoubleVec& probabilities, std::size_t n, TSizeVec& result); + + //! Generate \p n samples from a categorical distribution + //! with category probabilities \p probabilities using \p rng + //! assuming the values are *not* replaced between draws. + static void + categoricalSampleWithoutReplacement(CPRNG::CXorOShiro128Plus& rng, TDoubleVec& probabilities, std::size_t n, TSizeVec& result); + + //! Generate \p n samples from a categorical distribution + //! with category probabilities \p probabilities using \p rng + //! assuming the values are *not* replaced between draws. + static void + categoricalSampleWithoutReplacement(CPRNG::CXorShift1024Mult& rng, TDoubleVec& probabilities, std::size_t n, TSizeVec& result); + + //! Generate samples from a multinomial distribution with number + //! of trials \p n and category probabilities \p probabilities. + //! + //! This generates a sample from the distribution function + //!
+    //!   \f$\(\displaystyle f(\{n_i\}) = \frac{n!}{\prod_i{n_i}}\prod_i{p_i^{n_i}}\)\f$
+    //! 
+ //! + //! The probabilities are sorted in decreasing order and + //! \p sample is filled with the counts of each probability + //! in this sorted order. This is the most efficient strategy + //! since often \p sample will then be much smaller than + //! \p probabilities and the sampling loop can end early. + //! + //! \param[in,out] probabilities The category probabilities, + //! which should be normalized. + //! \param[in] n The number of trials. + //! \param[out] sample Filled in with the counts of the each + //! category in the sorted probabilities order. Missing counts + //! are zero. + //! \param[in] sorted Set to true if the probabilities are + //! already sorted in descending order. + static void multinomialSampleFast(TDoubleVec& probabilities, std::size_t n, TSizeVec& sample, bool sorted = false); + + //! Generate samples according to the multinomial distribution + //! with number of trials \p n and category probabilities + //! \p probabilities. + //! + //! This generates a sample from the distribution function + //!
+    //!   \f$\(\displaystyle f(\{n_i\}) = \frac{n!}{\prod_i{n_i}}\prod_i{p_i^{n_i}}\)\f$
+    //! 
+ //! + //! \param[in] probabilities The category probabilities, which + //! should be normalized. + //! \param[in] n The number of trials. + //! \param[out] sample Filled in with the counts of each category + //! in \p probabilities. This sample includes zeros explicitly, + //! contrast with multinomialSampleFast. + static void multinomialSampleStable(TDoubleVec probabilities, std::size_t n, TSizeVec& sample); + //@} + + //! Sample a random permutation of the value [\p first, \p last). + //! + //! Reorders the elements in the range [\p first, \p last) using the + //! supplied random number generator. + //! + //! \note We provide our own implementation of std::random_shuffle + //! based on the libc++ implementation because this is different from + //! the libstdc++ implementation which can cause platform specific + //! differences. + template + static void random_shuffle(RNG& rng, ITR first, ITR last) { + auto d = last - first; + if (d > 1) { + CUniform0nGenerator rand(rng); + for (--last; first < last; ++first, --d) { + auto i = rand(d); + if (i > 0) { + std::iter_swap(first, first + i); } } } - - //! Sample a random permutation of the value [\p first, \p last). - //! - //! Reorders the elements in the range [\p first, \p last) using the - //! internal random number generator to provide a random distribution. - template - static void random_shuffle(ITR first, ITR last) - { - core::CScopedFastLock scopedLock(ms_Lock); - random_shuffle(ms_Rng, first, last); + } + + //! Sample a random permutation of the value [\p first, \p last). + //! + //! Reorders the elements in the range [\p first, \p last) using the + //! internal random number generator to provide a random distribution. + template + static void random_shuffle(ITR first, ITR last) { + core::CScopedFastLock scopedLock(ms_Lock); + random_shuffle(ms_Rng, first, last); + } + + //! Optimal (in a sense to be defined below) weighted sampling + //! algorithm. + //! + //! Compute a set \p numberSamples (\f$\{n(i)\}\f$) such that + //! the following constraints are satisfied. + //!
+    //!   \f$\displaystyle n_i \in \{0,1,...,n\}\ \ \forall i\f$
+    //!   \f$\displaystyle \sum_i{n_i} = [n \sum_i{w(i)}]\f$
+    //!   \f$\displaystyle \sum_i{|n_i - n w(i)|}\ is\ minimized\f$
+    //! 
+ //! + //! Typically, the weights will sum to one. + //! + //! \param n The total number of samples required. + //! \param weights The weights with which to sample. + //! \param sampling Filled in with the weighted sampling. + static void weightedSample(std::size_t n, const TDoubleVec& weights, TSizeVec& sampling); + + //! Sample the expectation of the normal distribution with \p mean + //! and \p variance on the \p n quantile intervals. + static void normalSampleQuantiles(double mean, double variance, std::size_t n, TDoubleVec& result); + + //! Sample the expectation of the gamma distribution with \p shape + //! and \p rate on the \p n quantile intervals. + static void gammaSampleQuantiles(double shape, double rate, std::size_t n, TDoubleVec& result); + +private: + //! \brief A uniform generator on the interval [0, n). + template + class CUniform0nGenerator { + public: + CUniform0nGenerator(RNG& generator) : m_Generator(&generator) {} + std::size_t operator()(std::size_t n) const { + boost::random::uniform_int_distribution uniform(0, n - 1); + return uniform(*m_Generator); } - //! Optimal (in a sense to be defined below) weighted sampling - //! algorithm. - //! - //! Compute a set \p numberSamples (\f$\{n(i)\}\f$) such that - //! the following constraints are satisfied. - //!
-        //!   \f$\displaystyle n_i \in \{0,1,...,n\}\ \ \forall i\f$
-        //!   \f$\displaystyle \sum_i{n_i} = [n \sum_i{w(i)}]\f$
-        //!   \f$\displaystyle \sum_i{|n_i - n w(i)|}\ is\ minimized\f$
-        //! 
- //! - //! Typically, the weights will sum to one. - //! - //! \param n The total number of samples required. - //! \param weights The weights with which to sample. - //! \param sampling Filled in with the weighted sampling. - static void weightedSample(std::size_t n, - const TDoubleVec &weights, - TSizeVec &sampling); - - //! Sample the expectation of the normal distribution with \p mean - //! and \p variance on the \p n quantile intervals. - static void normalSampleQuantiles(double mean, - double variance, - std::size_t n, - TDoubleVec &result); - - //! Sample the expectation of the gamma distribution with \p shape - //! and \p rate on the \p n quantile intervals. - static void gammaSampleQuantiles(double shape, - double rate, - std::size_t n, - TDoubleVec &result); - private: - //! \brief A uniform generator on the interval [0, n). - template - class CUniform0nGenerator - { - public: - CUniform0nGenerator(RNG &generator) : m_Generator(&generator) {} - std::size_t operator()(std::size_t n) const - { - boost::random::uniform_int_distribution uniform(0, n - 1); - return uniform(*m_Generator); - } - - private: - RNG *m_Generator; - }; + RNG* m_Generator; + }; - private: - //! The mutex for protecting access to the random number generator. - static core::CFastMutex ms_Lock; +private: + //! The mutex for protecting access to the random number generator. + static core::CFastMutex ms_Lock; - //! The uniform random number generator. - static CRandomNumberGenerator ms_Rng; + //! The uniform random number generator. + static CRandomNumberGenerator ms_Rng; }; - } } diff --git a/include/maths/CSeasonalComponent.h b/include/maths/CSeasonalComponent.h index 5e3c62d68f..64932dcad4 100644 --- a/include/maths/CSeasonalComponent.h +++ b/include/maths/CSeasonalComponent.h @@ -10,24 +10,21 @@ #include #include +#include #include #include -#include #include #include #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace maths -{ +namespace maths { //! \brief Estimates a seasonal component of a time series. //! @@ -41,184 +38,176 @@ namespace maths //! //! The bucketing is aged by relaxing it back towards uniform and aging the counts of the //! mean value for each bucket as usual. -class MATHS_EXPORT CSeasonalComponent : private CDecompositionComponent -{ - public: - using TMatrix = CSymmetricMatrixNxN; - using TFloatMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; - using TFloatMeanAccumulatorVec = std::vector; - - public: - //! \param[in] time The time provider. - //! \param[in] maxSize The maximum number of component buckets. - //! \param[in] decayRate Controls the rate at which information is lost from - //! its adaptive bucketing. - //! \param[in] minimumBucketLength The minimum bucket length permitted in the - //! adaptive bucketing. - //! \param[in] boundaryCondition The boundary condition to use for the splines. - //! \param[in] valueInterpolationType The style of interpolation to use for - //! computing values. - //! \param[in] varianceInterpolationType The style of interpolation to use for - //! computing variances. - CSeasonalComponent(const CSeasonalTime &time, - std::size_t maxSize, - double decayRate = 0.0, - double minimumBucketLength = 0.0, - CSplineTypes::EBoundaryCondition boundaryCondition = CSplineTypes::E_Periodic, - CSplineTypes::EType valueInterpolationType = CSplineTypes::E_Cubic, - CSplineTypes::EType varianceInterpolationType = CSplineTypes::E_Linear); - - //! Construct by traversing part of an state document. - CSeasonalComponent(double decayRate, - double minimumBucketLength, - core::CStateRestoreTraverser &traverser, - CSplineTypes::EType valueInterpolationType = CSplineTypes::E_Cubic, - CSplineTypes::EType varianceInterpolationType = CSplineTypes::E_Linear); - - //! An efficient swap of the contents of two components. - void swap(CSeasonalComponent &other); - - //! Persist state by passing information to \p inserter. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - - //! Check if the seasonal component has been estimated. - bool initialized() const; - - //! Initialize the adaptive bucketing. - bool initialize(core_t::TTime startTime = 0, - core_t::TTime endTime = 0, - const TFloatMeanAccumulatorVec &values = TFloatMeanAccumulatorVec()); - - //! Get the size of this component. - std::size_t size() const; - - //! Clear all data. - void clear(); - - //! Shift the component's time origin to \p time. - void shiftOrigin(core_t::TTime time); - - //! Shift the component's values by \p shift. - void shiftLevel(double shift); - - //! Shift the component's slope by \p shift. - void shiftSlope(double shift); - - //! Linearly scale the component's by \p scale. - void linearScale(core_t::TTime time, double scale); - - //! Adds a value \f$(t, f(t))\f$ to this component. - //! - //! \param[in] time The time of the point. - //! \param[in] value The value at \p time. - //! \param[in] weight The weight of \p value. The smaller this is the - //! less influence it has on the component. - void add(core_t::TTime time, double value, double weight = 1.0); - - //! Update the interpolation of the bucket values. - //! - //! \param[in] time The time at which to interpolate. - //! \param[in] refine If false disable refining the bucketing. - void interpolate(core_t::TTime time, bool refine = true); - - //! Get the rate at which the seasonal component loses information. - double decayRate() const; - - //! Set the rate at which the seasonal component loses information. - void decayRate(double decayRate); - - //! Age out old data to account for elapsed \p time. - void propagateForwardsByTime(double time, bool meanRevert = false); - - //! Get the time provider. - const CSeasonalTime &time() const; - - //! Interpolate the component at \p time. - //! - //! \param[in] time The time of interest. - //! \param[in] confidence The symmetric confidence interval for the variance - //! as a percentage. - TDoubleDoublePr value(core_t::TTime time, double confidence) const; - - //! Get the mean value of the component. - double meanValue() const; - - //! This computes the delta to apply to the component with \p period. - //! - //! This is used to adjust the decomposition when it contains components - //! whose periods are divisors of one another to get the most efficient - //! representation. - //! - //! \param[in] time The time at which to compute the delta. - //! \param[in] shortPeriod The period of the short component. - //! \param[in] shortPeriodValue The short component value at \p time. - double delta(core_t::TTime time, - core_t::TTime shortPeriod, - double shortPeriodValue) const; - - //! Get the variance of the residual about the prediction at \p time. - //! - //! \param[in] time The time of interest. - //! \param[in] confidence The symmetric confidence interval for the - //! variance as a percentage. - TDoubleDoublePr variance(core_t::TTime time, double confidence) const; - - //! Get the mean variance of the component residuals. - double meanVariance() const; - - //! Get the maximum ratio between a residual variance and the mean - //! residual variance. - double heteroscedasticity() const; - - //! Get the covariance matrix of the regression parameters' at \p time. - //! - //! \param[in] time The time of interest. - //! \param[out] result Filled in with the regression parameters' - //! covariance matrix. - bool covariances(core_t::TTime time, TMatrix &result) const; - - //! Get the value spline. - TSplineCRef valueSpline() const; - - //! Get the common slope of the bucket regression models. - double slope() const; - - //! Check if the bucket regression models have enough history to predict. - bool slopeAccurate(core_t::TTime time) const; - - //! Get a checksum for this object. - uint64_t checksum(uint64_t seed = 0) const; - - //! Debug the memory used by this component. - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - - //! Get the memory used by this component. - std::size_t memoryUsage() const; - - private: - //! Create by traversing a state document. - bool acceptRestoreTraverser(double decayRate, - double minimumBucketLength, - core::CStateRestoreTraverser &traverser); - - //! Get a jitter to apply to the prediction time. - core_t::TTime jitter(core_t::TTime time); - - private: - //! Used to apply jitter to added value times so that we can accommodate - //! small time translations of the trend. - CPRNG::CXorOShiro128Plus m_Rng; - - //! Regression models for a collection of buckets covering the period. - CSeasonalComponentAdaptiveBucketing m_Bucketing; +class MATHS_EXPORT CSeasonalComponent : private CDecompositionComponent { +public: + using TMatrix = CSymmetricMatrixNxN; + using TFloatMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; + using TFloatMeanAccumulatorVec = std::vector; + +public: + //! \param[in] time The time provider. + //! \param[in] maxSize The maximum number of component buckets. + //! \param[in] decayRate Controls the rate at which information is lost from + //! its adaptive bucketing. + //! \param[in] minimumBucketLength The minimum bucket length permitted in the + //! adaptive bucketing. + //! \param[in] boundaryCondition The boundary condition to use for the splines. + //! \param[in] valueInterpolationType The style of interpolation to use for + //! computing values. + //! \param[in] varianceInterpolationType The style of interpolation to use for + //! computing variances. + CSeasonalComponent(const CSeasonalTime& time, + std::size_t maxSize, + double decayRate = 0.0, + double minimumBucketLength = 0.0, + CSplineTypes::EBoundaryCondition boundaryCondition = CSplineTypes::E_Periodic, + CSplineTypes::EType valueInterpolationType = CSplineTypes::E_Cubic, + CSplineTypes::EType varianceInterpolationType = CSplineTypes::E_Linear); + + //! Construct by traversing part of an state document. + CSeasonalComponent(double decayRate, + double minimumBucketLength, + core::CStateRestoreTraverser& traverser, + CSplineTypes::EType valueInterpolationType = CSplineTypes::E_Cubic, + CSplineTypes::EType varianceInterpolationType = CSplineTypes::E_Linear); + + //! An efficient swap of the contents of two components. + void swap(CSeasonalComponent& other); + + //! Persist state by passing information to \p inserter. + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Check if the seasonal component has been estimated. + bool initialized() const; + + //! Initialize the adaptive bucketing. + bool + initialize(core_t::TTime startTime = 0, core_t::TTime endTime = 0, const TFloatMeanAccumulatorVec& values = TFloatMeanAccumulatorVec()); + + //! Get the size of this component. + std::size_t size() const; + + //! Clear all data. + void clear(); + + //! Shift the component's time origin to \p time. + void shiftOrigin(core_t::TTime time); + + //! Shift the component's values by \p shift. + void shiftLevel(double shift); + + //! Shift the component's slope by \p shift. + void shiftSlope(double shift); + + //! Linearly scale the component's by \p scale. + void linearScale(core_t::TTime time, double scale); + + //! Adds a value \f$(t, f(t))\f$ to this component. + //! + //! \param[in] time The time of the point. + //! \param[in] value The value at \p time. + //! \param[in] weight The weight of \p value. The smaller this is the + //! less influence it has on the component. + void add(core_t::TTime time, double value, double weight = 1.0); + + //! Update the interpolation of the bucket values. + //! + //! \param[in] time The time at which to interpolate. + //! \param[in] refine If false disable refining the bucketing. + void interpolate(core_t::TTime time, bool refine = true); + + //! Get the rate at which the seasonal component loses information. + double decayRate() const; + + //! Set the rate at which the seasonal component loses information. + void decayRate(double decayRate); + + //! Age out old data to account for elapsed \p time. + void propagateForwardsByTime(double time, bool meanRevert = false); + + //! Get the time provider. + const CSeasonalTime& time() const; + + //! Interpolate the component at \p time. + //! + //! \param[in] time The time of interest. + //! \param[in] confidence The symmetric confidence interval for the variance + //! as a percentage. + TDoubleDoublePr value(core_t::TTime time, double confidence) const; + + //! Get the mean value of the component. + double meanValue() const; + + //! This computes the delta to apply to the component with \p period. + //! + //! This is used to adjust the decomposition when it contains components + //! whose periods are divisors of one another to get the most efficient + //! representation. + //! + //! \param[in] time The time at which to compute the delta. + //! \param[in] shortPeriod The period of the short component. + //! \param[in] shortPeriodValue The short component value at \p time. + double delta(core_t::TTime time, core_t::TTime shortPeriod, double shortPeriodValue) const; + + //! Get the variance of the residual about the prediction at \p time. + //! + //! \param[in] time The time of interest. + //! \param[in] confidence The symmetric confidence interval for the + //! variance as a percentage. + TDoubleDoublePr variance(core_t::TTime time, double confidence) const; + + //! Get the mean variance of the component residuals. + double meanVariance() const; + + //! Get the maximum ratio between a residual variance and the mean + //! residual variance. + double heteroscedasticity() const; + + //! Get the covariance matrix of the regression parameters' at \p time. + //! + //! \param[in] time The time of interest. + //! \param[out] result Filled in with the regression parameters' + //! covariance matrix. + bool covariances(core_t::TTime time, TMatrix& result) const; + + //! Get the value spline. + TSplineCRef valueSpline() const; + + //! Get the common slope of the bucket regression models. + double slope() const; + + //! Check if the bucket regression models have enough history to predict. + bool slopeAccurate(core_t::TTime time) const; + + //! Get a checksum for this object. + uint64_t checksum(uint64_t seed = 0) const; + + //! Debug the memory used by this component. + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + + //! Get the memory used by this component. + std::size_t memoryUsage() const; + +private: + //! Create by traversing a state document. + bool acceptRestoreTraverser(double decayRate, double minimumBucketLength, core::CStateRestoreTraverser& traverser); + + //! Get a jitter to apply to the prediction time. + core_t::TTime jitter(core_t::TTime time); + +private: + //! Used to apply jitter to added value times so that we can accommodate + //! small time translations of the trend. + CPRNG::CXorOShiro128Plus m_Rng; + + //! Regression models for a collection of buckets covering the period. + CSeasonalComponentAdaptiveBucketing m_Bucketing; }; //! Create a free function which will be picked up in Koenig lookup. -inline void swap(CSeasonalComponent &lhs, CSeasonalComponent &rhs) -{ +inline void swap(CSeasonalComponent& lhs, CSeasonalComponent& rhs) { lhs.swap(rhs); } - } } diff --git a/include/maths/CSeasonalComponentAdaptiveBucketing.h b/include/maths/CSeasonalComponentAdaptiveBucketing.h index 53a92c8ae0..d02d4bf7f2 100644 --- a/include/maths/CSeasonalComponentAdaptiveBucketing.h +++ b/include/maths/CSeasonalComponentAdaptiveBucketing.h @@ -19,241 +19,224 @@ #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace maths -{ +namespace maths { class CSeasonalTime; //! \brief An adaptive bucketing of the value of a periodic function. //! //! DESCRIPTION:\n //! See CAdaptiveBucketing for details. -class MATHS_EXPORT CSeasonalComponentAdaptiveBucketing : private CAdaptiveBucketing -{ - public: - using CAdaptiveBucketing::TFloatMeanAccumulatorVec; - using TDoubleRegression = CRegression::CLeastSquaresOnline<1, double>; - using TRegression = CRegression::CLeastSquaresOnline<1, CFloatStorage>; - - public: - CSeasonalComponentAdaptiveBucketing(); - explicit CSeasonalComponentAdaptiveBucketing(const CSeasonalTime &time, - double decayRate = 0.0, - double minimumBucketLength = 0.0); - CSeasonalComponentAdaptiveBucketing(const CSeasonalComponentAdaptiveBucketing &other); - //! Construct by traversing a state document. - CSeasonalComponentAdaptiveBucketing(double decayRate, - double minimumBucketLength, - core::CStateRestoreTraverser &traverser); - - //! Copy from \p rhs. - const CSeasonalComponentAdaptiveBucketing &operator=(const CSeasonalComponentAdaptiveBucketing &rhs); - - //! Persist by passing information to the supplied inserter. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - - //! Efficiently swap the contents of two bucketing objects. - void swap(CSeasonalComponentAdaptiveBucketing &other); - - //! Check if the bucketing has been initialized. - bool initialized() const; - - //! Create a new uniform bucketing with \p n buckets. - //! - //! \param[in] n The number of buckets. - bool initialize(std::size_t n); - - //! Add the function moments \f$([a_i,b_i], S_i)\f$ where - //! \f$S_i\f$ are the means and variances of the function - //! in the time intervals \f$([a_i,b_i])\f$. - //! - //! \param[in] startTime The start of the period including \p values. - //! \param[in] endTime The end of the period including \p values. - //! \param[in] values Time ranges and the corresponding function - //! value moments. - void initialValues(core_t::TTime startTime, - core_t::TTime endTime, - const TFloatMeanAccumulatorVec &values); - - //! Get the number of buckets. - std::size_t size() const; - - //! Clear the contents of this bucketing and recover any - //! allocated memory. - void clear(); - - //! Shift the regressions' time origin to \p time. - void shiftOrigin(core_t::TTime time); - - //! Shift the regressions' ordinates by \p shift. - void shiftLevel(double shift); - - //! Shift the regressions' gradients by \p shift. - void shiftSlope(double shift); - - //! Linearly scale the regressions by \p scale. - void linearScale(double scale); - - //! Add the function value at \p time. - //! - //! \param[in] time The time of \p value. - //! \param[in] value The value of the function at \p time. - //! \param[in] prediction The prediction for \p value. - //! \param[in] weight The weight of function point. The smaller - //! this is the less influence it has on the bucket. - void add(core_t::TTime time, double value, double prediction, double weight = 1.0); - - //! Get the time provider. - const CSeasonalTime &time() const; - - //! Set the rate at which the bucketing loses information. - void decayRate(double value); - - //! Get the rate at which the bucketing loses information. - double decayRate() const; - - //! Age the bucket values to account for \p time elapsed time. - void propagateForwardsByTime(double time, bool meanRevert = false); - - //! Get the minimum permitted bucket length. - double minimumBucketLength() const; - - //! Refine the bucket end points to minimize the maximum averaging - //! error in any bucket. - //! - //! \param[in] time The time at which to refine. - void refine(core_t::TTime time); - - //! The count in the bucket containing \p time. - double count(core_t::TTime time) const; - - //! Get the regression to use at \p time. - const TRegression *regression(core_t::TTime time) const; - - //! Get a set of knot points and knot point values to use for - //! interpolating the bucket values. - //! - //! \param[in] time The time at which to get the knot points. - //! \param[in] boundary Controls the style of start and end knots. - //! \param[out] knots Filled in with the knot points to interpolate. - //! \param[out] values Filled in with the values at \p knots. - //! \param[out] variances Filled in with the variances at \p knots. - //! \return True if there are sufficient knot points to interpolate - //! and false otherwise. - bool knots(core_t::TTime time, - CSplineTypes::EBoundaryCondition boundary, - TDoubleVec &knots, - TDoubleVec &values, - TDoubleVec &variances) const; - - //! Get the common slope of the bucket regression models. - double slope() const; - - //! Check if this regression models have enough history to predict. - bool slopeAccurate(core_t::TTime time) const; - - //! Get a checksum for this object. - uint64_t checksum(uint64_t seed = 0) const; - - //! Get the memory used by this component - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - - //! Get the memory used by this component - std::size_t memoryUsage() const; - - //! \name Test Functions - //@{ - //! Get the bucket end points. - const TFloatVec &endpoints() const; - - //! Get the total count of in the bucketing. - double count() const; - - //! Get the bucket regression predictions at \p time. - TDoubleVec values(core_t::TTime time) const; - - //! Get the bucket variances. - TDoubleVec variances() const; - //@} - - private: - using TSeasonalTimePtr = boost::shared_ptr; - - //! \brief The state maintained for each bucket. - struct SBucket - { - SBucket(); - SBucket(const TRegression ®ression, - double variance, - core_t::TTime firstUpdate, - core_t::TTime lastUpdate); - - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - - uint64_t checksum(uint64_t seed) const; - - TRegression s_Regression; - CFloatStorage s_Variance; - core_t::TTime s_FirstUpdate; - core_t::TTime s_LastUpdate; - }; - using TBucketVec = std::vector; - - private: - //! Restore by traversing a state document - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); - - //! Compute the values corresponding to the change in end - //! points from \p endpoints. The values are assigned based - //! on their intersection with each bucket in the previous - //! bucket configuration. - //! - //! \param[in] endpoints The old end points. - void refresh(const TFloatVec &endpoints); - - //! Check if \p time is in the this component's window. - virtual bool inWindow(core_t::TTime time) const; - - //! Add the function value at \p time. - virtual void add(std::size_t bucket, core_t::TTime time, double value, double weight); - - //! Get the offset w.r.t. the start of the bucketing of \p time. - virtual double offset(core_t::TTime time) const; - - //! The count in \p bucket. - virtual double count(std::size_t bucket) const; - - //! Get the predicted value for the \p bucket at \p time. - virtual double predict(std::size_t bucket, core_t::TTime time, double offset) const; - - //! Get the variance of \p bucket. - virtual double variance(std::size_t bucket) const; - - //! Get the interval which has been observed at \p time. - double observedInterval(core_t::TTime time) const; - - private: - //! The time provider. - TSeasonalTimePtr m_Time; +class MATHS_EXPORT CSeasonalComponentAdaptiveBucketing : private CAdaptiveBucketing { +public: + using CAdaptiveBucketing::TFloatMeanAccumulatorVec; + using TDoubleRegression = CRegression::CLeastSquaresOnline<1, double>; + using TRegression = CRegression::CLeastSquaresOnline<1, CFloatStorage>; + +public: + CSeasonalComponentAdaptiveBucketing(); + explicit CSeasonalComponentAdaptiveBucketing(const CSeasonalTime& time, double decayRate = 0.0, double minimumBucketLength = 0.0); + CSeasonalComponentAdaptiveBucketing(const CSeasonalComponentAdaptiveBucketing& other); + //! Construct by traversing a state document. + CSeasonalComponentAdaptiveBucketing(double decayRate, double minimumBucketLength, core::CStateRestoreTraverser& traverser); + + //! Copy from \p rhs. + const CSeasonalComponentAdaptiveBucketing& operator=(const CSeasonalComponentAdaptiveBucketing& rhs); + + //! Persist by passing information to the supplied inserter. + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Efficiently swap the contents of two bucketing objects. + void swap(CSeasonalComponentAdaptiveBucketing& other); + + //! Check if the bucketing has been initialized. + bool initialized() const; + + //! Create a new uniform bucketing with \p n buckets. + //! + //! \param[in] n The number of buckets. + bool initialize(std::size_t n); + + //! Add the function moments \f$([a_i,b_i], S_i)\f$ where + //! \f$S_i\f$ are the means and variances of the function + //! in the time intervals \f$([a_i,b_i])\f$. + //! + //! \param[in] startTime The start of the period including \p values. + //! \param[in] endTime The end of the period including \p values. + //! \param[in] values Time ranges and the corresponding function + //! value moments. + void initialValues(core_t::TTime startTime, core_t::TTime endTime, const TFloatMeanAccumulatorVec& values); + + //! Get the number of buckets. + std::size_t size() const; + + //! Clear the contents of this bucketing and recover any + //! allocated memory. + void clear(); + + //! Shift the regressions' time origin to \p time. + void shiftOrigin(core_t::TTime time); + + //! Shift the regressions' ordinates by \p shift. + void shiftLevel(double shift); + + //! Shift the regressions' gradients by \p shift. + void shiftSlope(double shift); + + //! Linearly scale the regressions by \p scale. + void linearScale(double scale); + + //! Add the function value at \p time. + //! + //! \param[in] time The time of \p value. + //! \param[in] value The value of the function at \p time. + //! \param[in] prediction The prediction for \p value. + //! \param[in] weight The weight of function point. The smaller + //! this is the less influence it has on the bucket. + void add(core_t::TTime time, double value, double prediction, double weight = 1.0); + + //! Get the time provider. + const CSeasonalTime& time() const; + + //! Set the rate at which the bucketing loses information. + void decayRate(double value); + + //! Get the rate at which the bucketing loses information. + double decayRate() const; + + //! Age the bucket values to account for \p time elapsed time. + void propagateForwardsByTime(double time, bool meanRevert = false); + + //! Get the minimum permitted bucket length. + double minimumBucketLength() const; + + //! Refine the bucket end points to minimize the maximum averaging + //! error in any bucket. + //! + //! \param[in] time The time at which to refine. + void refine(core_t::TTime time); + + //! The count in the bucket containing \p time. + double count(core_t::TTime time) const; + + //! Get the regression to use at \p time. + const TRegression* regression(core_t::TTime time) const; + + //! Get a set of knot points and knot point values to use for + //! interpolating the bucket values. + //! + //! \param[in] time The time at which to get the knot points. + //! \param[in] boundary Controls the style of start and end knots. + //! \param[out] knots Filled in with the knot points to interpolate. + //! \param[out] values Filled in with the values at \p knots. + //! \param[out] variances Filled in with the variances at \p knots. + //! \return True if there are sufficient knot points to interpolate + //! and false otherwise. + bool knots(core_t::TTime time, + CSplineTypes::EBoundaryCondition boundary, + TDoubleVec& knots, + TDoubleVec& values, + TDoubleVec& variances) const; - //! The buckets. - TBucketVec m_Buckets; + //! Get the common slope of the bucket regression models. + double slope() const; + + //! Check if this regression models have enough history to predict. + bool slopeAccurate(core_t::TTime time) const; + + //! Get a checksum for this object. + uint64_t checksum(uint64_t seed = 0) const; + + //! Get the memory used by this component + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + + //! Get the memory used by this component + std::size_t memoryUsage() const; + + //! \name Test Functions + //@{ + //! Get the bucket end points. + const TFloatVec& endpoints() const; + + //! Get the total count of in the bucketing. + double count() const; + + //! Get the bucket regression predictions at \p time. + TDoubleVec values(core_t::TTime time) const; + + //! Get the bucket variances. + TDoubleVec variances() const; + //@} + +private: + using TSeasonalTimePtr = boost::shared_ptr; + + //! \brief The state maintained for each bucket. + struct SBucket { + SBucket(); + SBucket(const TRegression& regression, double variance, core_t::TTime firstUpdate, core_t::TTime lastUpdate); + + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + + uint64_t checksum(uint64_t seed) const; + + TRegression s_Regression; + CFloatStorage s_Variance; + core_t::TTime s_FirstUpdate; + core_t::TTime s_LastUpdate; + }; + using TBucketVec = std::vector; + +private: + //! Restore by traversing a state document + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); + + //! Compute the values corresponding to the change in end + //! points from \p endpoints. The values are assigned based + //! on their intersection with each bucket in the previous + //! bucket configuration. + //! + //! \param[in] endpoints The old end points. + void refresh(const TFloatVec& endpoints); + + //! Check if \p time is in the this component's window. + virtual bool inWindow(core_t::TTime time) const; + + //! Add the function value at \p time. + virtual void add(std::size_t bucket, core_t::TTime time, double value, double weight); + + //! Get the offset w.r.t. the start of the bucketing of \p time. + virtual double offset(core_t::TTime time) const; + + //! The count in \p bucket. + virtual double count(std::size_t bucket) const; + + //! Get the predicted value for the \p bucket at \p time. + virtual double predict(std::size_t bucket, core_t::TTime time, double offset) const; + + //! Get the variance of \p bucket. + virtual double variance(std::size_t bucket) const; + + //! Get the interval which has been observed at \p time. + double observedInterval(core_t::TTime time) const; + +private: + //! The time provider. + TSeasonalTimePtr m_Time; + + //! The buckets. + TBucketVec m_Buckets; }; //! Create a free function which will be found by Koenig lookup. -inline void swap(CSeasonalComponentAdaptiveBucketing &lhs, - CSeasonalComponentAdaptiveBucketing &rhs) -{ +inline void swap(CSeasonalComponentAdaptiveBucketing& lhs, CSeasonalComponentAdaptiveBucketing& rhs) { lhs.swap(rhs); } - } } diff --git a/include/maths/CSeasonalTime.h b/include/maths/CSeasonalTime.h index 455da03c9f..e0b5647d91 100644 --- a/include/maths/CSeasonalTime.h +++ b/include/maths/CSeasonalTime.h @@ -15,232 +15,226 @@ #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace maths -{ +namespace maths { //! \brief Provides times for seasonal components of a time series //! decomposition. -class MATHS_EXPORT CSeasonalTime -{ - public: - using TTimeTimePr = std::pair; +class MATHS_EXPORT CSeasonalTime { +public: + using TTimeTimePr = std::pair; - public: - CSeasonalTime(); - CSeasonalTime(core_t::TTime period, double precedence); - virtual ~CSeasonalTime() = default; +public: + CSeasonalTime(); + CSeasonalTime(core_t::TTime period, double precedence); + virtual ~CSeasonalTime() = default; - //! A total order on seasonal times. - bool operator<(const CSeasonalTime &rhs) const; + //! A total order on seasonal times. + bool operator<(const CSeasonalTime& rhs) const; - //! Get a copy of this time. - //! - //! \warning The caller owns the result. - virtual CSeasonalTime *clone() const = 0; + //! Get a copy of this time. + //! + //! \warning The caller owns the result. + virtual CSeasonalTime* clone() const = 0; - //! Initialize from a string created by persist. - virtual bool fromString(const std::string &value) = 0; - - //! Convert to a string. - virtual std::string toString() const = 0; - - //! \name Time Transforms - //@{ - //! Extract the time of \p time in the current period. - double periodic(core_t::TTime time) const; - - //! Extract the time of \p time in the current regression. - double regression(core_t::TTime time) const; - - //! Get the interval between in regression time units - //! between \p start and \p end. - double regressionInterval(core_t::TTime start, core_t::TTime end) const; - - //! Get the start of the repeat containing \p time. - core_t::TTime startOfWindowRepeat(core_t::TTime time) const; - - //! Get the start of the window containing \p time. - core_t::TTime startOfWindow(core_t::TTime time) const; - - //! Check if \p time is in the window. - bool inWindow(core_t::TTime time) const; - //@} - - //! \name Period - //@{ - //! Get the period. - core_t::TTime period() const; - - //! Set the period. - void period(core_t::TTime period); - //@} - - //! \name Regression - //@{ - //! Get the origin of the time coordinates. - core_t::TTime regressionOrigin() const; - - //! Set the origin of the time coordinates. - void regressionOrigin(core_t::TTime origin); - //@} - - //! \name Window - //@{ - //! Get the repeat interval for the window pattern. - virtual core_t::TTime windowRepeat() const = 0; - - //! Get the start of the window pattern. - virtual core_t::TTime windowRepeatStart() const = 0; - - //! Get the start of the window. - virtual core_t::TTime windowStart() const = 0; - - //! Get the end of the window. - virtual core_t::TTime windowEnd() const = 0; - - //! Get the window. - TTimeTimePr window() const; - - //! Get the window length. - core_t::TTime windowLength() const; - - //! Check if this applies to a time window. - bool windowed() const; - - //! Get the fraction of time which overlaps the window. - double fractionInWindow() const; - //@} - - //! Check whether this time's seasonal component time excludes - //! modeling \p other's. - bool excludes(const CSeasonalTime &other) const; - - //! True if this has a weekend and false otherwise. - virtual bool hasWeekend() const = 0; - - //! Get a checksum for this object. - virtual uint64_t checksum(uint64_t seed = 0) const = 0; - - private: - //! Get the start of the repeat interval beginning at - //! \p offset including \p time. - //! - //! For diurnal time this is the start of the week containing - //! \p time for other times it is the start of the period - //! containing \p time. - core_t::TTime startOfWindowRepeat(core_t::TTime offset, core_t::TTime time) const; + //! Initialize from a string created by persist. + virtual bool fromString(const std::string& value) = 0; + + //! Convert to a string. + virtual std::string toString() const = 0; + + //! \name Time Transforms + //@{ + //! Extract the time of \p time in the current period. + double periodic(core_t::TTime time) const; + + //! Extract the time of \p time in the current regression. + double regression(core_t::TTime time) const; + + //! Get the interval between in regression time units + //! between \p start and \p end. + double regressionInterval(core_t::TTime start, core_t::TTime end) const; + + //! Get the start of the repeat containing \p time. + core_t::TTime startOfWindowRepeat(core_t::TTime time) const; + + //! Get the start of the window containing \p time. + core_t::TTime startOfWindow(core_t::TTime time) const; + + //! Check if \p time is in the window. + bool inWindow(core_t::TTime time) const; + //@} + + //! \name Period + //@{ + //! Get the period. + core_t::TTime period() const; + + //! Set the period. + void period(core_t::TTime period); + //@} + + //! \name Regression + //@{ + //! Get the origin of the time coordinates. + core_t::TTime regressionOrigin() const; + + //! Set the origin of the time coordinates. + void regressionOrigin(core_t::TTime origin); + //@} + + //! \name Window + //@{ + //! Get the repeat interval for the window pattern. + virtual core_t::TTime windowRepeat() const = 0; + + //! Get the start of the window pattern. + virtual core_t::TTime windowRepeatStart() const = 0; + + //! Get the start of the window. + virtual core_t::TTime windowStart() const = 0; + + //! Get the end of the window. + virtual core_t::TTime windowEnd() const = 0; + + //! Get the window. + TTimeTimePr window() const; + + //! Get the window length. + core_t::TTime windowLength() const; + + //! Check if this applies to a time window. + bool windowed() const; + + //! Get the fraction of time which overlaps the window. + double fractionInWindow() const; + //@} + + //! Check whether this time's seasonal component time excludes + //! modeling \p other's. + bool excludes(const CSeasonalTime& other) const; + + //! True if this has a weekend and false otherwise. + virtual bool hasWeekend() const = 0; + + //! Get a checksum for this object. + virtual uint64_t checksum(uint64_t seed = 0) const = 0; + +private: + //! Get the start of the repeat interval beginning at + //! \p offset including \p time. + //! + //! For diurnal time this is the start of the week containing + //! \p time for other times it is the start of the period + //! containing \p time. + core_t::TTime startOfWindowRepeat(core_t::TTime offset, core_t::TTime time) const; - //! Get the scale to apply when computing the regression time. - virtual core_t::TTime regressionTimeScale() const = 0; + //! Get the scale to apply when computing the regression time. + virtual core_t::TTime regressionTimeScale() const = 0; - private: - //! The periodic repeat. - core_t::TTime m_Period; - //! The origin of the time coordinates used to maintain - //! a reasonably conditioned Gramian of the design matrix. - core_t::TTime m_RegressionOrigin; - //! The precedence of the corresponding component when - //! deciding which to keep amongst alternatives. - double m_Precedence; +private: + //! The periodic repeat. + core_t::TTime m_Period; + //! The origin of the time coordinates used to maintain + //! a reasonably conditioned Gramian of the design matrix. + core_t::TTime m_RegressionOrigin; + //! The precedence of the corresponding component when + //! deciding which to keep amongst alternatives. + double m_Precedence; }; //! \brief Provides times for daily and weekly period seasonal //! components of a time series decomposition. -class MATHS_EXPORT CDiurnalTime : public CSeasonalTime -{ - public: - CDiurnalTime(); - CDiurnalTime(core_t::TTime startOfWeek, - core_t::TTime windowStart, - core_t::TTime windowEnd, - core_t::TTime period, - double precedence = 1.0); - - //! Get a copy of this time. - CDiurnalTime *clone() const; - - //! Initialize from a string created by persist. - virtual bool fromString(const std::string &value); - - //! Convert to a string. - virtual std::string toString() const; - - //! Get the length of a week. - virtual core_t::TTime windowRepeat() const; - - //! Get the start of the week. - virtual core_t::TTime windowRepeatStart() const; - - //! Get the start of the window. - virtual core_t::TTime windowStart() const; - - //! Get the end of the window. - virtual core_t::TTime windowEnd() const; - - //! True if this has a weekend and false otherwise. - virtual bool hasWeekend() const; - - //! Get a checksum for this object. - virtual uint64_t checksum(uint64_t seed = 0) const; - - private: - //! Get the scale to apply when computing the regression time. - virtual core_t::TTime regressionTimeScale() const; - - private: - //! The start of the week. - core_t::TTime m_StartOfWeek; - //! The start of the window. - core_t::TTime m_WindowStart; - //! The end of the window. - core_t::TTime m_WindowEnd; +class MATHS_EXPORT CDiurnalTime : public CSeasonalTime { +public: + CDiurnalTime(); + CDiurnalTime(core_t::TTime startOfWeek, + core_t::TTime windowStart, + core_t::TTime windowEnd, + core_t::TTime period, + double precedence = 1.0); + + //! Get a copy of this time. + CDiurnalTime* clone() const; + + //! Initialize from a string created by persist. + virtual bool fromString(const std::string& value); + + //! Convert to a string. + virtual std::string toString() const; + + //! Get the length of a week. + virtual core_t::TTime windowRepeat() const; + + //! Get the start of the week. + virtual core_t::TTime windowRepeatStart() const; + + //! Get the start of the window. + virtual core_t::TTime windowStart() const; + + //! Get the end of the window. + virtual core_t::TTime windowEnd() const; + + //! True if this has a weekend and false otherwise. + virtual bool hasWeekend() const; + + //! Get a checksum for this object. + virtual uint64_t checksum(uint64_t seed = 0) const; + +private: + //! Get the scale to apply when computing the regression time. + virtual core_t::TTime regressionTimeScale() const; + +private: + //! The start of the week. + core_t::TTime m_StartOfWeek; + //! The start of the window. + core_t::TTime m_WindowStart; + //! The end of the window. + core_t::TTime m_WindowEnd; }; //! \brief Provides times for arbitrary period seasonal components //! of a time series decomposition. -class MATHS_EXPORT CGeneralPeriodTime : public CSeasonalTime -{ - public: - CGeneralPeriodTime() = default; - CGeneralPeriodTime(core_t::TTime period, double precedence = 1.0); +class MATHS_EXPORT CGeneralPeriodTime : public CSeasonalTime { +public: + CGeneralPeriodTime() = default; + CGeneralPeriodTime(core_t::TTime period, double precedence = 1.0); - //! Get a copy of this time. - CGeneralPeriodTime *clone() const; + //! Get a copy of this time. + CGeneralPeriodTime* clone() const; - //! Initialize from a string created by persist. - virtual bool fromString(const std::string &value); + //! Initialize from a string created by persist. + virtual bool fromString(const std::string& value); - //! Convert to a string. - virtual std::string toString() const; + //! Convert to a string. + virtual std::string toString() const; - //! Return the period. - virtual core_t::TTime windowRepeat() const; + //! Return the period. + virtual core_t::TTime windowRepeat() const; - //! Returns zero. - virtual core_t::TTime windowRepeatStart() const; + //! Returns zero. + virtual core_t::TTime windowRepeatStart() const; - //! Returns zero. - virtual core_t::TTime windowStart() const; + //! Returns zero. + virtual core_t::TTime windowStart() const; - //! Returns the period. - virtual core_t::TTime windowEnd() const; + //! Returns the period. + virtual core_t::TTime windowEnd() const; - //! Returns false. - virtual bool hasWeekend() const; + //! Returns false. + virtual bool hasWeekend() const; - //! Get a checksum for this object. - virtual uint64_t checksum(uint64_t seed = 0) const; + //! Get a checksum for this object. + virtual uint64_t checksum(uint64_t seed = 0) const; - private: - //! Get the scale to apply when computing the regression time. - virtual core_t::TTime regressionTimeScale() const; +private: + //! Get the scale to apply when computing the regression time. + virtual core_t::TTime regressionTimeScale() const; }; //! \brief Convert CSeasonalTime sub-classes to/from text representations. @@ -249,25 +243,20 @@ class MATHS_EXPORT CGeneralPeriodTime : public CSeasonalTime //! Encapsulate the conversion of arbitrary CSeasonalTime sub-classes to/from //! textual state. In particular, the field name associated with each type of //! CSeasonalTime is then in one file. -class MATHS_EXPORT CSeasonalTimeStateSerializer -{ - public: - //! Shared pointer to the CTimeSeriesDecompositionInterface abstract - //! base class. - using TSeasonalTimePtr = boost::shared_ptr; - - public: - //! Construct the appropriate CSeasonalTime sub-class from its state - //! document representation. Sets \p result to NULL on failure. - static bool acceptRestoreTraverser(TSeasonalTimePtr &result, - core::CStateRestoreTraverser &traverser); - - //! Persist state by passing information to \p inserter. - static void acceptPersistInserter(const CSeasonalTime &time, - core::CStatePersistInserter &inserter); - +class MATHS_EXPORT CSeasonalTimeStateSerializer { +public: + //! Shared pointer to the CTimeSeriesDecompositionInterface abstract + //! base class. + using TSeasonalTimePtr = boost::shared_ptr; + +public: + //! Construct the appropriate CSeasonalTime sub-class from its state + //! document representation. Sets \p result to NULL on failure. + static bool acceptRestoreTraverser(TSeasonalTimePtr& result, core::CStateRestoreTraverser& traverser); + + //! Persist state by passing information to \p inserter. + static void acceptPersistInserter(const CSeasonalTime& time, core::CStatePersistInserter& inserter); }; - } } diff --git a/include/maths/CSetTools.h b/include/maths/CSetTools.h index 70c451b52b..25d6511d6b 100644 --- a/include/maths/CSetTools.h +++ b/include/maths/CSetTools.h @@ -15,10 +15,8 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { //! \brief Collection of set utility functions not provided by the STL. //! @@ -28,252 +26,214 @@ namespace maths //! computing set difference in-place, and functions for counting //! elements in set differences and unions. Common measures of set //! similarity such as the Jaccard index are also implemented. -class MATHS_EXPORT CSetTools -{ +class MATHS_EXPORT CSetTools { +public: + //! \brief Checks if an indexed object is in a specified collection + //! of indices. + class CIndexInSet { public: - //! \brief Checks if an indexed object is in a specified collection - //! of indices. - class CIndexInSet - { - public: - using TSizeSet = std::set; + using TSizeSet = std::set; - public: - CIndexInSet(std::size_t index) : m_IndexSet(index) {} - CIndexInSet(const TSizeSet &indexSet) : m_IndexSet(indexSet) {} - - template - bool operator()(const T &indexedObject) const - { - const std::size_t *index = boost::get(&m_IndexSet); - if (index) - { - return indexedObject.s_Index == *index; - } - const TSizeSet &indexSet = boost::get(m_IndexSet); - return indexSet.count(indexedObject.s_Index) > 0; - } - - private: - using TSizeOrSizeSet = boost::variant; - - private: - TSizeOrSizeSet m_IndexSet; - }; - - //! Compute the difference between \p S and [\p begin, \p end). - template - static void inplace_set_difference(std::vector &S, ITR begin, ITR end) - { - typename std::vector::iterator i = S.begin(), last = i; - for (ITR j = begin; i != S.end() && j != end; /**/) - { - if (*i < *j) - { - if (last != i) - { - std::iter_swap(last, i); - } - ++i; ++last; - } - else if (*j < *i) - { - ++j; - } - else - { - ++i; ++j; - } - } - if (last != i) - { - S.erase(std::swap_ranges(i, S.end(), last), S.end()); + public: + CIndexInSet(std::size_t index) : m_IndexSet(index) {} + CIndexInSet(const TSizeSet& indexSet) : m_IndexSet(indexSet) {} + + template + bool operator()(const T& indexedObject) const { + const std::size_t* index = boost::get(&m_IndexSet); + if (index) { + return indexedObject.s_Index == *index; } + const TSizeSet& indexSet = boost::get(m_IndexSet); + return indexSet.count(indexedObject.s_Index) > 0; } -#define SIMULTANEOUS_REMOVE_IF_IMPL using std::swap; \ - std::size_t last{0u}; \ - std::size_t n{values1.size()}; \ - for (std::size_t i = 0u; i < n; ++i) \ - { \ - if (last != i) \ - { \ - CUSTOM_SWAP_VALUES \ - } \ - if (!pred(values1[last])) \ - { \ - ++last; \ - } \ - } \ - if (last < n) \ - { \ - CUSTOM_ERASE_VALUES \ - return true; \ - } \ - return false; - -#define CUSTOM_SWAP_VALUES swap(values1[i], values1[last]); \ - swap(values2[i], values2[last]); -#define CUSTOM_ERASE_VALUES values1.erase(values1.begin() + last, values1.end()); \ - values2.erase(values2.begin() + last, values2.end()); - - //! Remove all instances of \p values1 for which \p pred is true - //! and corresponding values of \p values2. - template - static bool simultaneousRemoveIf(std::vector &values1, - std::vector &values2, - const F &pred) - { - if (values1.size() != values2.size()) - { - return false; + private: + using TSizeOrSizeSet = boost::variant; + + private: + TSizeOrSizeSet m_IndexSet; + }; + + //! Compute the difference between \p S and [\p begin, \p end). + template + static void inplace_set_difference(std::vector& S, ITR begin, ITR end) { + typename std::vector::iterator i = S.begin(), last = i; + for (ITR j = begin; i != S.end() && j != end; /**/) { + if (*i < *j) { + if (last != i) { + std::iter_swap(last, i); + } + ++i; + ++last; + } else if (*j < *i) { + ++j; + } else { + ++i; + ++j; } - - SIMULTANEOUS_REMOVE_IF_IMPL + } + if (last != i) { + S.erase(std::swap_ranges(i, S.end(), last), S.end()); + } + } + +#define SIMULTANEOUS_REMOVE_IF_IMPL \ + using std::swap; \ + std::size_t last{0u}; \ + std::size_t n{values1.size()}; \ + for (std::size_t i = 0u; i < n; ++i) { \ + if (last != i) { \ + CUSTOM_SWAP_VALUES \ + } \ + if (!pred(values1[last])) { \ + ++last; \ + } \ + } \ + if (last < n) { \ + CUSTOM_ERASE_VALUES \ + return true; \ + } \ + return false; + +#define CUSTOM_SWAP_VALUES \ + swap(values1[i], values1[last]); \ + swap(values2[i], values2[last]); +#define CUSTOM_ERASE_VALUES \ + values1.erase(values1.begin() + last, values1.end()); \ + values2.erase(values2.begin() + last, values2.end()); + + //! Remove all instances of \p values1 for which \p pred is true + //! and corresponding values of \p values2. + template + static bool simultaneousRemoveIf(std::vector& values1, std::vector& values2, const F& pred) { + if (values1.size() != values2.size()) { + return false; } + SIMULTANEOUS_REMOVE_IF_IMPL + } + #undef CUSTOM_SWAP_VALUES #undef CUSTOM_ERASE_VALUES -#define CUSTOM_SWAP_VALUES swap(values1[i], values1[last]); \ - swap(values2[i], values2[last]); \ - swap(values3[i], values3[last]); -#define CUSTOM_ERASE_VALUES values1.erase(values1.begin() + last, values1.end()); \ - values2.erase(values2.begin() + last, values2.end()); \ - values3.erase(values3.begin() + last, values3.end()); - - //! Remove all instances of \p values1 for which \p pred is true - //! and corresponding values of \p values2 and \p values3. - template - static bool simultaneousRemoveIf(std::vector &values1, - std::vector &values2, - std::vector &values3, - const F &pred) - { - if ( values1.size() != values2.size() - || values2.size() != values3.size()) - { - return false; - } - - SIMULTANEOUS_REMOVE_IF_IMPL +#define CUSTOM_SWAP_VALUES \ + swap(values1[i], values1[last]); \ + swap(values2[i], values2[last]); \ + swap(values3[i], values3[last]); +#define CUSTOM_ERASE_VALUES \ + values1.erase(values1.begin() + last, values1.end()); \ + values2.erase(values2.begin() + last, values2.end()); \ + values3.erase(values3.begin() + last, values3.end()); + + //! Remove all instances of \p values1 for which \p pred is true + //! and corresponding values of \p values2 and \p values3. + template + static bool simultaneousRemoveIf(std::vector& values1, std::vector& values2, std::vector& values3, const F& pred) { + if (values1.size() != values2.size() || values2.size() != values3.size()) { + return false; } + SIMULTANEOUS_REMOVE_IF_IMPL + } + #undef CUSTOM_SWAP_VALUES #undef CUSTOM_ERASE_VALUES #undef SIMULTANEOUS_REMOVE_IF_IMPL - //! Compute the number of elements in the intersection of the - //! ranges [\p beginLhs, \p endLhs) and [\p beginRhs, \p endRhs). - template - static std::size_t setIntersectSize(ITR1 beginLhs, ITR1 endLhs, ITR2 beginRhs, ITR2 endRhs) - { - std::size_t result = 0u; - while (beginLhs != endLhs && beginRhs != endRhs) - { - if (*beginLhs < *beginRhs) - { - ++beginLhs; - } - else if (*beginRhs < *beginLhs) - { - ++beginRhs; - } - else - { - ++beginLhs; ++beginRhs; ++result; - } + //! Compute the number of elements in the intersection of the + //! ranges [\p beginLhs, \p endLhs) and [\p beginRhs, \p endRhs). + template + static std::size_t setIntersectSize(ITR1 beginLhs, ITR1 endLhs, ITR2 beginRhs, ITR2 endRhs) { + std::size_t result = 0u; + while (beginLhs != endLhs && beginRhs != endRhs) { + if (*beginLhs < *beginRhs) { + ++beginLhs; + } else if (*beginRhs < *beginLhs) { + ++beginRhs; + } else { + ++beginLhs; + ++beginRhs; + ++result; } - return result; } - - //! Compute the number of elements in the union of the ranges - //! [\p beginLhs, \p endLhs) and [\p beginRhs, \p endRhs). - template - static std::size_t setUnionSize(ITR1 beginLhs, ITR1 endLhs, ITR2 beginRhs, ITR2 endRhs) - { - std::size_t result = 0u; - while (beginLhs != endLhs && beginRhs != endRhs) - { - if (*beginLhs < *beginRhs) - { - ++beginLhs; - } - else if (*beginRhs < *beginLhs) - { - ++beginRhs; - } - else - { - ++beginLhs; ++beginRhs; - } - ++result; + return result; + } + + //! Compute the number of elements in the union of the ranges + //! [\p beginLhs, \p endLhs) and [\p beginRhs, \p endRhs). + template + static std::size_t setUnionSize(ITR1 beginLhs, ITR1 endLhs, ITR2 beginRhs, ITR2 endRhs) { + std::size_t result = 0u; + while (beginLhs != endLhs && beginRhs != endRhs) { + if (*beginLhs < *beginRhs) { + ++beginLhs; + } else if (*beginRhs < *beginLhs) { + ++beginRhs; + } else { + ++beginLhs; + ++beginRhs; } - return result + std::distance(beginLhs, endLhs) + std::distance(beginRhs, endRhs); + ++result; } - - //! Compute the Jaccard index of the elements of the ranges - //! [\p beginLhs, \p endLhs) and [\p beginRhs, \p endRhs). - //! - //! This is defined as \f$\frac{|A\cap B|}{|A\cup B|}\f$. - template - static double jaccard(ITR1 beginLhs, ITR1 endLhs, ITR2 beginRhs, ITR2 endRhs) - { - std::size_t numer = 0u; - std::size_t denom = 0u; - while (beginLhs != endLhs && beginRhs != endRhs) - { - if (*beginLhs < *beginRhs) - { - ++beginLhs; - } - else if (*beginRhs < *beginLhs) - { - ++beginRhs; - } - else - { - ++beginLhs; ++beginRhs; ++numer; - } - ++denom; + return result + std::distance(beginLhs, endLhs) + std::distance(beginRhs, endRhs); + } + + //! Compute the Jaccard index of the elements of the ranges + //! [\p beginLhs, \p endLhs) and [\p beginRhs, \p endRhs). + //! + //! This is defined as \f$\frac{|A\cap B|}{|A\cup B|}\f$. + template + static double jaccard(ITR1 beginLhs, ITR1 endLhs, ITR2 beginRhs, ITR2 endRhs) { + std::size_t numer = 0u; + std::size_t denom = 0u; + while (beginLhs != endLhs && beginRhs != endRhs) { + if (*beginLhs < *beginRhs) { + ++beginLhs; + } else if (*beginRhs < *beginLhs) { + ++beginRhs; + } else { + ++beginLhs; + ++beginRhs; + ++numer; } - denom += std::distance(beginLhs, endLhs) + std::distance(beginRhs, endRhs); - return denom == 0 ? 0.0 : static_cast(numer) / static_cast(denom); + ++denom; } - - //! Compute the overlap coefficient (or, Szymkiewicz-Simpson - //! coefficient) of the elements of the ranges - //! [\p beginLhs, \p endLhs) and [\p beginRhs, \p endRhs). - //! - //! This is defined as \f$\frac{|A\cap B|}{\min(|A|,|B|)}\f$. - template - static double overlap(ITR1 beginLhs, ITR1 endLhs, ITR2 beginRhs, ITR2 endRhs) - { - std::size_t numer = 0u; - std::size_t nl = 0u; - std::size_t nr = 0u; - while (beginLhs != endLhs && beginRhs != endRhs) - { - if (*beginLhs < *beginRhs) - { - ++beginLhs; ++nl; - } - else if (*beginRhs < *beginLhs) - { - ++beginRhs; ++nr; - } - else - { - ++beginLhs; ++beginRhs; ++numer; ++nl; ++nr; - } + denom += std::distance(beginLhs, endLhs) + std::distance(beginRhs, endRhs); + return denom == 0 ? 0.0 : static_cast(numer) / static_cast(denom); + } + + //! Compute the overlap coefficient (or, Szymkiewicz-Simpson + //! coefficient) of the elements of the ranges + //! [\p beginLhs, \p endLhs) and [\p beginRhs, \p endRhs). + //! + //! This is defined as \f$\frac{|A\cap B|}{\min(|A|,|B|)}\f$. + template + static double overlap(ITR1 beginLhs, ITR1 endLhs, ITR2 beginRhs, ITR2 endRhs) { + std::size_t numer = 0u; + std::size_t nl = 0u; + std::size_t nr = 0u; + while (beginLhs != endLhs && beginRhs != endRhs) { + if (*beginLhs < *beginRhs) { + ++beginLhs; + ++nl; + } else if (*beginRhs < *beginLhs) { + ++beginRhs; + ++nr; + } else { + ++beginLhs; + ++beginRhs; + ++numer; + ++nl; + ++nr; } - nl += std::distance(beginLhs, endLhs); - nr += std::distance(beginRhs, endRhs); - double denom = static_cast(std::min(nl, nr)); - return denom == 0 ? 0.0 : static_cast(numer) / static_cast(denom); } + nl += std::distance(beginLhs, endLhs); + nr += std::distance(beginRhs, endRhs); + double denom = static_cast(std::min(nl, nr)); + return denom == 0 ? 0.0 : static_cast(numer) / static_cast(denom); + } }; - } } diff --git a/include/maths/CSignal.h b/include/maths/CSignal.h index 839208dca3..2fad66ddb8 100644 --- a/include/maths/CSignal.h +++ b/include/maths/CSignal.h @@ -15,67 +15,63 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { //! \brief Useful functions from signal processing. -class MATHS_EXPORT CSignal -{ - public: - using TDoubleVec = std::vector; - using TComplex = std::complex; - using TComplexVec = std::vector; - using TFloatMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; - using TFloatMeanAccumulatorVec = std::vector; - using TFloatMeanAccumulatorCRng = core::CVectorRange; +class MATHS_EXPORT CSignal { +public: + using TDoubleVec = std::vector; + using TComplex = std::complex; + using TComplexVec = std::vector; + using TFloatMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; + using TFloatMeanAccumulatorVec = std::vector; + using TFloatMeanAccumulatorCRng = core::CVectorRange; - public: - //! Compute the conjugate of \p f. - static void conj(TComplexVec &f); +public: + //! Compute the conjugate of \p f. + static void conj(TComplexVec& f); - //! Compute the Hadamard product of \p fx and \p fy. - static void hadamard(const TComplexVec &fx, TComplexVec &fy); + //! Compute the Hadamard product of \p fx and \p fy. + static void hadamard(const TComplexVec& fx, TComplexVec& fy); - //! Cooley-Tukey fast DFT transform implementation. - //! - //! \note This is a simple implementation radix 2 DIT which uses the chirp-z - //! idea to handle the case that the length of \p fx is not a power of 2. As - //! such it is definitely not a highly optimized FFT implementation. It should - //! be sufficiently fast for our needs. - static void fft(TComplexVec &f); + //! Cooley-Tukey fast DFT transform implementation. + //! + //! \note This is a simple implementation radix 2 DIT which uses the chirp-z + //! idea to handle the case that the length of \p fx is not a power of 2. As + //! such it is definitely not a highly optimized FFT implementation. It should + //! be sufficiently fast for our needs. + static void fft(TComplexVec& f); - //! This uses conjugate of the conjugate of the series is the inverse DFT trick - //! to compute this using fft. - static void ifft(TComplexVec &f); + //! This uses conjugate of the conjugate of the series is the inverse DFT trick + //! to compute this using fft. + static void ifft(TComplexVec& f); - //! Compute the discrete cyclic autocorrelation of \p values for the offset - //! \p offset. - //! - //! This is just - //!
-        //!   \f$\(\frac{1}{(n-k)\sigma^2}\sum_{k=1}^{n-k}{(f(k) - \mu)(f(k+p) - \mu)}\)\f$
-        //! 
- //! - //! \param[in] offset The offset as a distance in \p values. - //! \param[in] values The values for which to compute the autocorrelation. - static double autocorrelation(std::size_t offset, const TFloatMeanAccumulatorVec &values); + //! Compute the discrete cyclic autocorrelation of \p values for the offset + //! \p offset. + //! + //! This is just + //!
+    //!   \f$\(\frac{1}{(n-k)\sigma^2}\sum_{k=1}^{n-k}{(f(k) - \mu)(f(k+p) - \mu)}\)\f$
+    //! 
+ //! + //! \param[in] offset The offset as a distance in \p values. + //! \param[in] values The values for which to compute the autocorrelation. + static double autocorrelation(std::size_t offset, const TFloatMeanAccumulatorVec& values); - //! Compute the discrete cyclic autocorrelation of \p values for the offset - //! \p offset. - //! - //! \note Implementation for vector ranges. - static double autocorrelation(std::size_t offset, TFloatMeanAccumulatorCRng values); + //! Compute the discrete cyclic autocorrelation of \p values for the offset + //! \p offset. + //! + //! \note Implementation for vector ranges. + static double autocorrelation(std::size_t offset, TFloatMeanAccumulatorCRng values); - //! Get linear autocorrelations for all offsets up to the length of \p values. - //! - //! \param[in] values The values for which to compute autocorrelation. - //! \param[in] result Filled in with the autocorrelations of \p values for - //! offsets 1, 2, ..., length \p values - 1. - static void autocorrelations(const TFloatMeanAccumulatorVec &values, TDoubleVec &result); + //! Get linear autocorrelations for all offsets up to the length of \p values. + //! + //! \param[in] values The values for which to compute autocorrelation. + //! \param[in] result Filled in with the autocorrelations of \p values for + //! offsets 1, 2, ..., length \p values - 1. + static void autocorrelations(const TFloatMeanAccumulatorVec& values, TDoubleVec& result); }; - } } diff --git a/include/maths/CSolvers.h b/include/maths/CSolvers.h index 20505ae0b1..c4c08238d8 100644 --- a/include/maths/CSolvers.h +++ b/include/maths/CSolvers.h @@ -11,8 +11,8 @@ #include #include -#include #include +#include #include #include #include @@ -26,10 +26,8 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { //! \brief Collection of root solving functions. //! @@ -37,1067 +35,858 @@ namespace maths //! This implements a collection of root solving functions as static //! member functions. In particular, it implements Brent's method and //! interval bisection. -class MATHS_EXPORT CSolvers -{ - private: - using TDoubleDoublePr = std::pair; - - //! \name Helpers - //@{ - //! An inverse quadratic interpolation of three distinct - //! function values. WARNING the caller must ensure that - //! the \p fa != \p fb != \p fc. - static inline double inverseQuadraticInterpolate(const double a, - const double b, - const double c, - const double fa, - const double fb, - const double fc) - { - return a * fb * fc / (fa - fb) / (fa - fc) - + b * fa * fc / (fb - fa) / (fb - fc) - + c * fa * fb / (fc - fa) / (fc - fb); - } - - //! A secant interpolation of two distinct function values. - //! WARNING the caller must ensure that \p fa != \p fb. - static inline double secantInterpolate(const double a, - const double b, - const double fa, - const double fb) - { - return b - fb * (b - a) / (fb - fa); - } +class MATHS_EXPORT CSolvers { +private: + using TDoubleDoublePr = std::pair; + + //! \name Helpers + //@{ + //! An inverse quadratic interpolation of three distinct + //! function values. WARNING the caller must ensure that + //! the \p fa != \p fb != \p fc. + static inline double + inverseQuadraticInterpolate(const double a, const double b, const double c, const double fa, const double fb, const double fc) { + return a * fb * fc / (fa - fb) / (fa - fc) + b * fa * fc / (fb - fa) / (fb - fc) + c * fa * fb / (fc - fa) / (fc - fb); + } + + //! A secant interpolation of two distinct function values. + //! WARNING the caller must ensure that \p fa != \p fb. + static inline double secantInterpolate(const double a, const double b, const double fa, const double fb) { + return b - fb * (b - a) / (fb - fa); + } + + //! Bisect the interval [\p a, \p b]. + static inline double bisect(const double a, const double b) { return (a + b) / 2.0; } + + //! Shift the values such that a = b and b = c. + static inline void shift(double& a, double& b, const double c) { + a = b; + b = c; + } + + //! Shift the values such that a = b, b = c and c = d. + static inline void shift(double& a, double& b, double& c, const double d) { + a = b; + b = c; + c = d; + } + //@} + + //! Function wrapper which checks for NaN argument. + template + class CTrapNaNArgument { + public: + CTrapNaNArgument(const F& f) : m_F(f) {} - //! Bisect the interval [\p a, \p b]. - static inline double bisect(const double a, - const double b) - { - return (a + b) / 2.0; + inline double operator()(const double x) const { + if (CMathsFuncs::isNan(x)) { + throw std::invalid_argument("x is nan"); + } + return m_F(x); } - //! Shift the values such that a = b and b = c. - static inline void shift(double &a, - double &b, - const double c) - { - a = b; - b = c; + private: + F m_F; + }; + +private: + //! Minimizes \p f in the interval [\p a, \p b] subject + //! to the constraint that \p fx > \p lb. + template + static void minimize(double a, + double b, + double fa, + double fb, + const F& f, + double tolerance, + std::size_t& maxIterations, + double lb, + double& x, + double& fx) { + tolerance = std::max(tolerance, std::sqrt(std::numeric_limits::epsilon())); + const double golden = 0.3819660; + + if (fa < fb) { + x = a; + fx = fa; + } else { + x = b; + fx = fb; } - //! Shift the values such that a = b, b = c and c = d. - static inline void shift(double &a, - double &b, - double &c, - const double d) - { - a = b; - b = c; - c = d; - } - //@} + double w = x, v = x; + double fw = fx, fv = fx; + double s = 0.0, sLast = 0.0; - //! Function wrapper which checks for NaN argument. - template - class CTrapNaNArgument - { - public: - CTrapNaNArgument(const F &f) : m_F(f) {} - - inline double operator()(const double x) const - { - if (CMathsFuncs::isNan(x)) - { - throw std::invalid_argument("x is nan"); - } - return m_F(x); - } + std::size_t n = maxIterations; - private: - F m_F; - }; + do { + double xm = bisect(a, b); - private: - //! Minimizes \p f in the interval [\p a, \p b] subject - //! to the constraint that \p fx > \p lb. - template - static void minimize(double a, - double b, - double fa, - double fb, - const F &f, - double tolerance, - std::size_t &maxIterations, - double lb, - double &x, - double &fx) - { - tolerance = std::max(tolerance, - std::sqrt(std::numeric_limits::epsilon())); - const double golden = 0.3819660; - - if (fa < fb) - { - x = a; - fx = fa; - } - else - { - x = b; - fx = fb; + double t1 = tolerance * (std::fabs(x) + 0.25); + double t2 = 2.0 * t1; + if (fx <= lb || std::fabs(x - xm) <= (t2 - (b - a) / 2.0)) { + break; } - double w = x, v = x; - double fw = fx, fv = fx; - double s = 0.0, sLast = 0.0; + double sign = (x >= xm) ? -1.0 : 1.0; - std::size_t n = maxIterations; - - do - { - double xm = bisect(a, b); - - double t1 = tolerance * (std::fabs(x) + 0.25); - double t2 = 2.0 * t1; - if (fx <= lb || std::fabs(x - xm) <= (t2 - (b - a) / 2.0)) - { - break; + if (sLast > t1) { + // Fit parabola to abscissa. + double r = (x - w) * (fx - fv); + double q = (x - v) * (fx - fw); + double p = (x - v) * q - (x - w) * r; + q = 2 * (q - r); + if (q > 0) { + p = -p; } + q = std::fabs(q); - double sign = (x >= xm) ? -1.0 : 1.0; - - if (sLast > t1) - { - // Fit parabola to abscissa. - double r = (x - w) * (fx - fv); - double q = (x - v) * (fx - fw); - double p = (x - v) * q - (x - w) * r; - q = 2 * (q - r); - if (q > 0) - { - p = -p; - } - q = std::fabs(q); - - double td = sLast; - sLast = std::fabs(s); - - if (std::fabs(p) >= q * td / 2.0 - || p <= q * (a - x) - || p >= q * (b - x)) - { - // Minimum not in range or converging too slowly. - sLast = (x >= xm) ? x - a : b - x; - s = sign * std::max(golden * sLast, t1); - } - else - { - s = p / q; - double u = x + s; - if ((u - a) < t2 || (b - u) < t2) - { - s = sign * t1; - } - } - } - else - { - // Don't have a suitable abscissa so just use golden - // section in to the larger of [a, x] and [x, b]. + double td = sLast; + sLast = std::fabs(s); + + if (std::fabs(p) >= q * td / 2.0 || p <= q * (a - x) || p >= q * (b - x)) { + // Minimum not in range or converging too slowly. sLast = (x >= xm) ? x - a : b - x; s = sign * std::max(golden * sLast, t1); - } - - double u = x + s; - double fu = f(u); - LOG_TRACE("s = " << s << ", u = " << u) - LOG_TRACE("f(u) = " << fu << ", f(x) = " << fx); - - if (fu <= fx) - { - u >= x ? a = x : b = x; - shift(v, w, x, u); - shift(fv, fw, fx, fu); - } - else - { - u < x ? a = u : b = u; - if (fu <= fw || w == x) - { - shift(v, w, u); - shift(fv, fw, fu); - } - else if (fu <= fv || v == x || v == w) - { - v = u; - fv = fu; + } else { + s = p / q; + double u = x + s; + if ((u - a) < t2 || (b - u) < t2) { + s = sign * t1; } } - LOG_TRACE("a = " << a << ", b = " << b); - LOG_TRACE("x = " << x << ", v = " << v << ", w = " << w); - LOG_TRACE("f(x) = " << fx << ", f(v) = " << fv << ", f(w) = " << fw); + } else { + // Don't have a suitable abscissa so just use golden + // section in to the larger of [a, x] and [x, b]. + sLast = (x >= xm) ? x - a : b - x; + s = sign * std::max(golden * sLast, t1); + } + + double u = x + s; + double fu = f(u); + LOG_TRACE("s = " << s << ", u = " << u) + LOG_TRACE("f(u) = " << fu << ", f(x) = " << fx); + + if (fu <= fx) { + u >= x ? a = x : b = x; + shift(v, w, x, u); + shift(fv, fw, fx, fu); + } else { + u < x ? a = u : b = u; + if (fu <= fw || w == x) { + shift(v, w, u); + shift(fv, fw, fu); + } else if (fu <= fv || v == x || v == w) { + v = u; + fv = fu; + } } - while (--n > 0); - - maxIterations -= n; + LOG_TRACE("a = " << a << ", b = " << b); + LOG_TRACE("x = " << x << ", v = " << v << ", w = " << w); + LOG_TRACE("f(x) = " << fx << ", f(v) = " << fv << ", f(w) = " << fw); + } while (--n > 0); + + maxIterations -= n; + } + + //! Attempt to bracket a root of \p f using [\p a, \p b] + //! as a starting point. + template + static bool bracket(double& a, + double& b, + double& fa, + double& fb, + const F& f, + const double direction, + std::size_t& maxIterations, + const double min, + const double max) { + if (a > b) { + std::swap(a, b); + std::swap(fa, fb); } - //! Attempt to bracket a root of \p f using [\p a, \p b] - //! as a starting point. - template - static bool bracket(double &a, - double &b, - double &fa, - double &fb, - const F &f, - const double direction, - std::size_t &maxIterations, - const double min, - const double max) - { - if (a > b) - { - std::swap(a, b); - std::swap(fa, fb); - } + // Simple doubling search which switches to the secant + // method if this strategy fails to produce a bracket + // quickly. - // Simple doubling search which switches to the secant - // method if this strategy fails to produce a bracket - // quickly. + double step = b - a; + if (step == 0.0) { + step = 1.0; + } - double step = b - a; - if (step == 0.0) - { - step = 1.0; + std::size_t n = maxIterations; + for (/**/; n > 0; --n) { + if (fa * fb <= 0.0) { + break; } - std::size_t n = maxIterations; - for (/**/; n > 0; --n) - { - if (fa * fb <= 0.0) - { - break; - } - - step *= 2.0; - if (n < (3 * maxIterations) / 4) - { - double minStep = step; - double maxStep = step * step; - step = fa == fb ? - maxStep : std::min(std::max( std::fabs(b - a) - / std::fabs(fb - fa) - * std::fabs(fb), - minStep), maxStep); - } - a = b; - fa = fb; - b += direction * step; - b = std::max(std::min(b, max), min); - if (a == b) - { - // We've hit our domain constraints. - break; - } - - fb = f(b); + step *= 2.0; + if (n < (3 * maxIterations) / 4) { + double minStep = step; + double maxStep = step * step; + step = fa == fb ? maxStep : std::min(std::max(std::fabs(b - a) / std::fabs(fb - fa) * std::fabs(fb), minStep), maxStep); } - - if (a > b) - { - std::swap(a, b); - std::swap(fa, fb); + a = b; + fa = fb; + b += direction * step; + b = std::max(std::min(b, max), min); + if (a == b) { + // We've hit our domain constraints. + break; } - maxIterations = maxIterations - n; - return fa * fb <= 0.0; + fb = f(b); } - public: - //! Find a bracket for a root of \p f searching left. - //! WARNING to be guaranteed to work the function must - //! only have a single root which is guaranteed if it - //! is monotonic, for example. - //! - //! \param[in,out] a The bracket starting left end point. - //! Set to the bracketing left end point if a root could - //! be bracketed. - //! \param[in,out] b The bracket starting right end point. - //! Set to the bracketing right end point if a root could - //! be bracketed. - //! \param[in,out] fa The value of f(a). Set to the value - //! of f at the output value of a if a root could be - //! bracketed. - //! \param[in,out] fb The value of f(b). Set to the value - //! of f at the output value of b if a root could be - //! bracketed. - //! \param[in] f The function to evaluate. This is expected - //! to implement a function signature taking a double and - //! returning a double. - //! \param[in,out] maxIterations The maximum number of times - //! \p f is evaluated and set to the number of times it was - //! evaluated. - //! \param[in] min The minimum value in the function domain. - //! \param[in] max The maximum value in the function domain. - template - static inline bool leftBracket(double &a, - double &b, - double &fa, - double &fb, - const F &f, - std::size_t &maxIterations, - double min = -std::numeric_limits::max(), - double max = std::numeric_limits::max()) - { - return bracket(a, b, fa, fb, f, -1.0, maxIterations, min, max); + if (a > b) { + std::swap(a, b); + std::swap(fa, fb); } - //! Find a bracket for a root of \p f searching right. - //! WARNING to be guaranteed to work the function must - //! only have a single root which is guaranteed if it - //! is monotonic, for example. - //! - //! \param[in,out] a The bracket starting left end point. - //! Set to the bracketing left end point if a root could - //! be bracketed. - //! \param[in,out] b The bracket starting right end point. - //! Set to the bracketing right end point if a root could - //! be bracketed. - //! \param[in,out] fa The value of f(a). Set to the value - //! of f at the output value of a if a root could be - //! bracketed. - //! \param[in,out] fb The value of f(b). Set to the value - //! of f at the output value of b if a root could be - //! bracketed. - //! \param[in] f The function to evaluate. This is expected - //! to implement a function signature taking a double and - //! returning a double. - //! \param[in,out] maxIterations The maximum number of times - //! \p f is evaluated and set to the number of times it was - //! evaluated. - //! \param[in] min The minimum value in the function domain. - //! \param[in] max The maximum value in the function domain. - template - static inline bool rightBracket(double &a, - double &b, - double &fa, - double &fb, - const F &f, - std::size_t &maxIterations, - double min = -std::numeric_limits::max(), - double max = std::numeric_limits::max()) - { - return bracket(a, b, fa, fb, f, +1.0, maxIterations, min, max); + maxIterations = maxIterations - n; + return fa * fb <= 0.0; + } + +public: + //! Find a bracket for a root of \p f searching left. + //! WARNING to be guaranteed to work the function must + //! only have a single root which is guaranteed if it + //! is monotonic, for example. + //! + //! \param[in,out] a The bracket starting left end point. + //! Set to the bracketing left end point if a root could + //! be bracketed. + //! \param[in,out] b The bracket starting right end point. + //! Set to the bracketing right end point if a root could + //! be bracketed. + //! \param[in,out] fa The value of f(a). Set to the value + //! of f at the output value of a if a root could be + //! bracketed. + //! \param[in,out] fb The value of f(b). Set to the value + //! of f at the output value of b if a root could be + //! bracketed. + //! \param[in] f The function to evaluate. This is expected + //! to implement a function signature taking a double and + //! returning a double. + //! \param[in,out] maxIterations The maximum number of times + //! \p f is evaluated and set to the number of times it was + //! evaluated. + //! \param[in] min The minimum value in the function domain. + //! \param[in] max The maximum value in the function domain. + template + static inline bool leftBracket(double& a, + double& b, + double& fa, + double& fb, + const F& f, + std::size_t& maxIterations, + double min = -std::numeric_limits::max(), + double max = std::numeric_limits::max()) { + return bracket(a, b, fa, fb, f, -1.0, maxIterations, min, max); + } + + //! Find a bracket for a root of \p f searching right. + //! WARNING to be guaranteed to work the function must + //! only have a single root which is guaranteed if it + //! is monotonic, for example. + //! + //! \param[in,out] a The bracket starting left end point. + //! Set to the bracketing left end point if a root could + //! be bracketed. + //! \param[in,out] b The bracket starting right end point. + //! Set to the bracketing right end point if a root could + //! be bracketed. + //! \param[in,out] fa The value of f(a). Set to the value + //! of f at the output value of a if a root could be + //! bracketed. + //! \param[in,out] fb The value of f(b). Set to the value + //! of f at the output value of b if a root could be + //! bracketed. + //! \param[in] f The function to evaluate. This is expected + //! to implement a function signature taking a double and + //! returning a double. + //! \param[in,out] maxIterations The maximum number of times + //! \p f is evaluated and set to the number of times it was + //! evaluated. + //! \param[in] min The minimum value in the function domain. + //! \param[in] max The maximum value in the function domain. + template + static inline bool rightBracket(double& a, + double& b, + double& fa, + double& fb, + const F& f, + std::size_t& maxIterations, + double min = -std::numeric_limits::max(), + double max = std::numeric_limits::max()) { + return bracket(a, b, fa, fb, f, +1.0, maxIterations, min, max); + } + + //! \name Bracketed Solvers + //@{ + //! The preferred solver implementation. This uses the + //! TOMS 748 algorithm implemented by boost trapping + //! the case that it produces a bad interpolant, in which + //! case it falls back to Brent's method). WARNING this + //! follows the boost::math::tools::toms748_solve policy + //! and throws if a and b don't bracket the root. + //! + //! \param[in,out] a The left bracket. Set to the solution + //! interval left end point if [a,b] brackets a root. + //! \param[in,out] b The right bracket. Set to the solution + //! interval right end point if [a,b] brackets a root. + //! \param[in] f The function to evaluate. This is expected + //! to implement a function signature taking a double and + //! returning a double. + //! \param[in,out] maxIterations The maximum number of times + //! \p f is evaluated and set to the number of times it was + //! evaluated. + //! \param[in] equal The predicate to decide when to terminate + //! The test is applied to the interval end points a and b. + //! \param[out] bestGuess Filled in with the best estimate + //! of the root. + template + static inline void solve(double& a, double& b, const F& f, std::size_t& maxIterations, const EQUAL& equal, double& bestGuess) { + if (equal(a, b)) { + bestGuess = bisect(a, b); + maxIterations = 0u; + } else if (maxIterations < 3) { + bestGuess = bisect(a, b); + maxIterations = 0u; + } else { + maxIterations -= 2; + solve(a, b, f(a), f(b), f, maxIterations, equal, bestGuess); + maxIterations += 2; } - - //! \name Bracketed Solvers - //@{ - //! The preferred solver implementation. This uses the - //! TOMS 748 algorithm implemented by boost trapping - //! the case that it produces a bad interpolant, in which - //! case it falls back to Brent's method). WARNING this - //! follows the boost::math::tools::toms748_solve policy - //! and throws if a and b don't bracket the root. - //! - //! \param[in,out] a The left bracket. Set to the solution - //! interval left end point if [a,b] brackets a root. - //! \param[in,out] b The right bracket. Set to the solution - //! interval right end point if [a,b] brackets a root. - //! \param[in] f The function to evaluate. This is expected - //! to implement a function signature taking a double and - //! returning a double. - //! \param[in,out] maxIterations The maximum number of times - //! \p f is evaluated and set to the number of times it was - //! evaluated. - //! \param[in] equal The predicate to decide when to terminate - //! The test is applied to the interval end points a and b. - //! \param[out] bestGuess Filled in with the best estimate - //! of the root. - template - static inline void solve(double &a, - double &b, - const F &f, - std::size_t &maxIterations, - const EQUAL &equal, - double &bestGuess) - { - if (equal(a, b)) - { - bestGuess = bisect(a, b); - maxIterations = 0u; - } - else if (maxIterations < 3) - { - bestGuess = bisect(a, b); - maxIterations = 0u; - } - else - { - maxIterations -= 2; - solve(a, b, f(a), f(b), f, maxIterations, equal, bestGuess); - maxIterations += 2; - } + } + + //! The preferred solver implementation. This uses the + //! TOMS 748 algorithm implemented by boost trapping + //! the case that it produces a bad interpolant, in which + //! case it falls back to Brent's method). WARNING this + //! follows the boost::math::tools::toms748_solve policy + //! and throws if a and b don't bracket the root. + //! + //! \param[in,out] a The left bracket. Set to the solution + //! interval left end point if [a,b] brackets a root. + //! \param[in,out] b The right bracket. Set to the solution + //! interval right end point if [a,b] brackets a root. + //! \param[in] fa The value of \p f at \p a. + //! \param[in] fb The value of \p f at \p b. + //! \param[in] f The function to evaluate. This is expected + //! to implement a function signature taking a double and + //! returning a double. + //! \param[in,out] maxIterations The maximum number of times + //! \p f is evaluated and set to the number of times it was + //! evaluated. + //! \param[in] equal The predicate to decide when to terminate + //! The test is applied to the interval end points a and b. + //! \param[out] bestGuess Filled in with the best estimate + //! of the root. + template + static void + solve(double& a, double& b, double fa, double fb, const F& f, std::size_t& maxIterations, const EQUAL& equal, double& bestGuess) { + if (equal(a, b)) { + // There is a bug in boost's solver for the case that + // a == b so trap and return early. + maxIterations = 0; + bestGuess = (a + b) / 2.0; + return; } - - //! The preferred solver implementation. This uses the - //! TOMS 748 algorithm implemented by boost trapping - //! the case that it produces a bad interpolant, in which - //! case it falls back to Brent's method). WARNING this - //! follows the boost::math::tools::toms748_solve policy - //! and throws if a and b don't bracket the root. - //! - //! \param[in,out] a The left bracket. Set to the solution - //! interval left end point if [a,b] brackets a root. - //! \param[in,out] b The right bracket. Set to the solution - //! interval right end point if [a,b] brackets a root. - //! \param[in] fa The value of \p f at \p a. - //! \param[in] fb The value of \p f at \p b. - //! \param[in] f The function to evaluate. This is expected - //! to implement a function signature taking a double and - //! returning a double. - //! \param[in,out] maxIterations The maximum number of times - //! \p f is evaluated and set to the number of times it was - //! evaluated. - //! \param[in] equal The predicate to decide when to terminate - //! The test is applied to the interval end points a and b. - //! \param[out] bestGuess Filled in with the best estimate - //! of the root. - template - static void solve(double &a, - double &b, - double fa, - double fb, - const F &f, - std::size_t &maxIterations, - const EQUAL &equal, - double &bestGuess) - { - if (equal(a, b)) - { - // There is a bug in boost's solver for the case that - // a == b so trap and return early. - maxIterations = 0; - bestGuess = (a + b) / 2.0; - return; - } - try - { - CTrapNaNArgument fSafe(f); - // Need at least one step or the boost solver underflows - // size_t. - boost::uintmax_t n = std::max(maxIterations, std::size_t(1)); - TDoubleDoublePr bracket = - boost::math::tools::toms748_solve&>(fSafe, a, b, fa, fb, equal, n); - a = bracket.first; - b = bracket.second; - bestGuess = bisect(a, b); - maxIterations = static_cast(n); - return; - } - catch (const std::exception &e) - { - LOG_TRACE("Falling back to Brent's solver: " << e.what()); - // Avoid compiler warning in the case of LOG_TRACE being compiled out - static_cast(&e); - } - if (!brent(a, b, fa, fb, f, maxIterations, equal, bestGuess)) - { - throw std::invalid_argument("doesn't bracket root"); - } + try { + CTrapNaNArgument fSafe(f); + // Need at least one step or the boost solver underflows + // size_t. + boost::uintmax_t n = std::max(maxIterations, std::size_t(1)); + TDoubleDoublePr bracket = boost::math::tools::toms748_solve&>(fSafe, a, b, fa, fb, equal, n); + a = bracket.first; + b = bracket.second; + bestGuess = bisect(a, b); + maxIterations = static_cast(n); + return; + } catch (const std::exception& e) { + LOG_TRACE("Falling back to Brent's solver: " << e.what()); + // Avoid compiler warning in the case of LOG_TRACE being compiled out + static_cast(&e); } - - //! Implements Brent's method for root finding. - //! - //! \see here - //! for details. - //! - //! \param[in,out] a The left bracket. Set to the solution - //! interval left end point if [a,b] brackets a root. - //! \param[in,out] b The right bracket. Set to the solution - //! interval right end point if [a,b] brackets a root. - //! \param[in] f The function to evaluate. This is expected - //! to implement a function signature taking a double and - //! returning a double. - //! \param[in,out] maxIterations The maximum number of times - //! \p f is evaluated and set to the number of times it was - //! evaluated. - //! \param[in] equal The predicate to decide when to terminate - //! The test is applied to the interval end points a and b. - //! \param[out] bestGuess Filled in with the best estimate - //! of the root. - //! \return True if a, b bracket the root. - template - static bool brent(double &a, - double &b, - const F &f, - std::size_t &maxIterations, - const EQUAL &equal, - double &bestGuess) - { - if (equal(a, b)) - { - bestGuess = bisect(a, b); - maxIterations = 0u; - return true; - } - if (maxIterations < 3) - { - bestGuess = bisect(a, b); - maxIterations = 0u; - return true; - } - maxIterations -= 2; - bool result = brent(a, b, f(a), f(b), f, maxIterations, equal, bestGuess); - maxIterations += 2; - return result; + if (!brent(a, b, fa, fb, f, maxIterations, equal, bestGuess)) { + throw std::invalid_argument("doesn't bracket root"); + } + } + + //! Implements Brent's method for root finding. + //! + //! \see here + //! for details. + //! + //! \param[in,out] a The left bracket. Set to the solution + //! interval left end point if [a,b] brackets a root. + //! \param[in,out] b The right bracket. Set to the solution + //! interval right end point if [a,b] brackets a root. + //! \param[in] f The function to evaluate. This is expected + //! to implement a function signature taking a double and + //! returning a double. + //! \param[in,out] maxIterations The maximum number of times + //! \p f is evaluated and set to the number of times it was + //! evaluated. + //! \param[in] equal The predicate to decide when to terminate + //! The test is applied to the interval end points a and b. + //! \param[out] bestGuess Filled in with the best estimate + //! of the root. + //! \return True if a, b bracket the root. + template + static bool brent(double& a, double& b, const F& f, std::size_t& maxIterations, const EQUAL& equal, double& bestGuess) { + if (equal(a, b)) { + bestGuess = bisect(a, b); + maxIterations = 0u; + return true; + } + if (maxIterations < 3) { + bestGuess = bisect(a, b); + maxIterations = 0u; + return true; + } + maxIterations -= 2; + bool result = brent(a, b, f(a), f(b), f, maxIterations, equal, bestGuess); + maxIterations += 2; + return result; + } + + //! Implements Brent's method for root finding. + //! + //! \see http://en.wikipedia.org/wiki/Brent%27s_method for details. + //! + //! \param[in,out] a The left bracket. Set to the solution + //! interval left end point if [a,b] brackets a root. + //! \param[in,out] b The right bracket. Set to the solution + //! interval right end point if [a,b] brackets a root. + //! \param[in] fa The value of \p f at \p a. + //! \param[in] fb The value of \p f at \p b. + //! \param[in] f The function to evaluate. This is expected + //! to implement a function signature taking a double and + //! returning a double. + //! \param[in,out] maxIterations The maximum number of times + //! \p f is evaluated and set to the number of times it was + //! evaluated. + //! \param[in] equal The predicate to decide when to terminate + //! The test is applied to the interval end points a and b. + //! \param[out] bestGuess Filled in with the best estimate + //! of the root. + //! \return True if a, b bracket the root. + template + static bool + brent(double& a, double& b, double fa, double fb, const F& f, std::size_t& maxIterations, const EQUAL& equal, double& bestGuess) { + std::size_t n = maxIterations; + + if (fa == 0.0) { + // Root at left bracket. + bestGuess = b = a; + maxIterations -= n; + return true; + } + if (fb == 0.0) { + // Root at right bracket. + bestGuess = a = b; + maxIterations -= n; + return true; + } + if (fa * fb > 0.0) { + // Not bracketed. + maxIterations -= n; + return false; } - //! Implements Brent's method for root finding. - //! - //! \see http://en.wikipedia.org/wiki/Brent%27s_method for details. - //! - //! \param[in,out] a The left bracket. Set to the solution - //! interval left end point if [a,b] brackets a root. - //! \param[in,out] b The right bracket. Set to the solution - //! interval right end point if [a,b] brackets a root. - //! \param[in] fa The value of \p f at \p a. - //! \param[in] fb The value of \p f at \p b. - //! \param[in] f The function to evaluate. This is expected - //! to implement a function signature taking a double and - //! returning a double. - //! \param[in,out] maxIterations The maximum number of times - //! \p f is evaluated and set to the number of times it was - //! evaluated. - //! \param[in] equal The predicate to decide when to terminate - //! The test is applied to the interval end points a and b. - //! \param[out] bestGuess Filled in with the best estimate - //! of the root. - //! \return True if a, b bracket the root. - template - static bool brent(double &a, - double &b, - double fa, - double fb, - const F &f, - std::size_t &maxIterations, - const EQUAL &equal, - double &bestGuess) - { - std::size_t n = maxIterations; + if (std::fabs(fa) < std::fabs(fb)) { + std::swap(a, b); + std::swap(fa, fb); + } - if (fa == 0.0) - { - // Root at left bracket. - bestGuess = b = a; - maxIterations -= n; - return true; - } - if (fb == 0.0) - { - // Root at right bracket. - bestGuess = a = b; - maxIterations -= n; - return true; - } - if (fa * fb > 0.0) - { - // Not bracketed. - maxIterations -= n; - return false; - } + bool bisected = true; + double c = a; + double fc = fa; + double d = std::numeric_limits::max(); - if (std::fabs(fa) < std::fabs(fb)) - { - std::swap(a, b); - std::swap(fa, fb); - } + do { + double s = (fa != fc) && (fb != fc) ? inverseQuadraticInterpolate(a, b, c, fa, fb, fc) : secantInterpolate(a, b, fa, fb); - bool bisected = true; - double c = a; - double fc = fa; - double d = std::numeric_limits::max(); - - do - { - double s = (fa != fc) && (fb != fc) ? - inverseQuadraticInterpolate(a, b, c, fa, fb, fc) : - secantInterpolate(a, b, fa, fb); - - double e = (3.0 * a + b) / 4.0; - - if ( (!(((s > e) && (s < b)) || ((s < e) && (s > b)))) - || ( bisected && ((std::fabs(s - b) >= std::fabs(b - c) / 2.0) || equal(b, c))) - || (!bisected && ((std::fabs(s - b) >= std::fabs(c - d) / 2.0) || equal(c, d)))) - { - // Use bisection. - s = bisect(a, b); - bisected = true; - } - else - { - bisected = false; - } + double e = (3.0 * a + b) / 4.0; - double fs = f(s); - shift(d, c, b); - fc = fb; + if ((!(((s > e) && (s < b)) || ((s < e) && (s > b)))) || + (bisected && ((std::fabs(s - b) >= std::fabs(b - c) / 2.0) || equal(b, c))) || + (!bisected && ((std::fabs(s - b) >= std::fabs(c - d) / 2.0) || equal(c, d)))) { + // Use bisection. + s = bisect(a, b); + bisected = true; + } else { + bisected = false; + } - if (fs == 0.0) - { - // Root at s. - bestGuess = a = b = s; - maxIterations -= (n - 1); - return true; - } + double fs = f(s); + shift(d, c, b); + fc = fb; - if (fa * fs > 0.0) - { - a = s; - fa = fs; - } - else - { - b = s; - fb = fs; - } + if (fs == 0.0) { + // Root at s. + bestGuess = a = b = s; + maxIterations -= (n - 1); + return true; + } - if (std::fabs(fa) < std::fabs(fb)) - { - std::swap(a, b); - std::swap(fa, fb); - } + if (fa * fs > 0.0) { + a = s; + fa = fs; + } else { + b = s; + fb = fs; } - while (--n > 0 && !equal(a, b)); - if (b < a) - { + if (std::fabs(fa) < std::fabs(fb)) { std::swap(a, b); std::swap(fa, fb); } - bestGuess = (fa != fc) && (fb != fc) ? - inverseQuadraticInterpolate(a, b, c, fa, fb, fc) : - (fa != fb ? secantInterpolate(a, b, fa, fb) : bisect(a, b)); - bestGuess = std::min(std::max(a, bestGuess), b); - maxIterations -= n; + } while (--n > 0 && !equal(a, b)); + if (b < a) { + std::swap(a, b); + std::swap(fa, fb); + } + bestGuess = (fa != fc) && (fb != fc) ? inverseQuadraticInterpolate(a, b, c, fa, fb, fc) + : (fa != fb ? secantInterpolate(a, b, fa, fb) : bisect(a, b)); + bestGuess = std::min(std::max(a, bestGuess), b); + maxIterations -= n; + + return true; + } + + //! Bisection for which the function has not been evaluated + //! at the interval end points. WARNING this is worse than + //! Brent's method (although extremely numerically robust) + //! and is primarily intended for testing. + //! + //! \param[in,out] a The left bracket. Set to the solution + //! interval left end point if [a,b] brackets a root. + //! \param[in,out] b The right bracket. Set to the solution + //! interval right end point if [a,b] brackets a root. + //! \param[in] f The function to evaluate. This is expected + //! to implement a function signature taking a double and + //! returning a double. + //! \param[in,out] maxIterations The maximum number of times + //! \p f is evaluated and set to the number of times it was + //! evaluated. + //! \param[in] equal The predicate to decide when to terminate + //! The test is applied to the interval end points a and b. + //! \param[out] bestGuess Filled in with the best estimate + //! of the root. + //! \return True if a, b bracket the root and equal(a, b). + template + static bool bisection(double& a, double& b, const F& f, std::size_t& maxIterations, const EQUAL& equal, double& bestGuess) { + if (equal(a, b)) { + bestGuess = bisect(a, b); + maxIterations = 0u; return true; } - - //! Bisection for which the function has not been evaluated - //! at the interval end points. WARNING this is worse than - //! Brent's method (although extremely numerically robust) - //! and is primarily intended for testing. - //! - //! \param[in,out] a The left bracket. Set to the solution - //! interval left end point if [a,b] brackets a root. - //! \param[in,out] b The right bracket. Set to the solution - //! interval right end point if [a,b] brackets a root. - //! \param[in] f The function to evaluate. This is expected - //! to implement a function signature taking a double and - //! returning a double. - //! \param[in,out] maxIterations The maximum number of times - //! \p f is evaluated and set to the number of times it was - //! evaluated. - //! \param[in] equal The predicate to decide when to terminate - //! The test is applied to the interval end points a and b. - //! \param[out] bestGuess Filled in with the best estimate - //! of the root. - //! \return True if a, b bracket the root and equal(a, b). - template - static bool bisection(double &a, - double &b, - const F &f, - std::size_t &maxIterations, - const EQUAL &equal, - double &bestGuess) - { - if (equal(a, b)) - { - bestGuess = bisect(a, b); - maxIterations = 0u; - return true; - } - if (maxIterations < 3) - { - bestGuess = bisect(a, b); - maxIterations = 0u; - return true; - } - maxIterations -= 2; - bool result = bisection(a, b, f(a), f(b), f, maxIterations, equal, bestGuess); - maxIterations += 2; - return result; + if (maxIterations < 3) { + bestGuess = bisect(a, b); + maxIterations = 0u; + return true; } - - //! Bisection for which the function *has* been evaluated - //! at the interval end points. This means we can save - //! ourselves two function calls. WARNING this is worse than - //! Brent's method (although extremely numerically robust) - //! and is primarily intended for testing. - //! - //! \param[in,out] a The left bracket. Set to the solution - //! interval left end point if [a,b] brackets a root. - //! \param[in,out] b The right bracket. Set to the solution - //! interval right end point if [a,b] brackets a root. - //! \param[in] fa The value of \p f at \p a. - //! \param[in] fb The value of \p f at \p b. - //! \param[in] f The function to evaluate. This is expected - //! to implement a function signature taking a double and - //! returning a double. - //! \param[in,out] maxIterations The maximum number of times - //! \p f is evaluated and set to the number of times it was - //! evaluated. - //! \param[in] equal The predicate to decide when to terminate - //! The test is applied to the interval end points a and b. - //! \param[out] bestGuess Filled in with the best estimate - //! of the root. - //! \return True if a, b bracket the root and equal(a, b). - template - static bool bisection(double &a, - double &b, - double fa, - double fb, - const F &f, - std::size_t &maxIterations, - const EQUAL &equal, - double &bestGuess) - { - std::size_t n = maxIterations; - if (fa == 0.0) - { - // Root at left bracket. - bestGuess = b = a; - maxIterations -= n; - return true; - } - if (fb == 0.0) - { - // Root at right bracket. - bestGuess = a = b; - maxIterations -= n; - return true; - } - if (fa * fb > 0.0) - { - // Not bracketed. - maxIterations -= n; - return false; - } - - do - { - const double c = bisect(a, b); - const double fc = f(c); - if (fc == 0.0) - { - // Root at s. - bestGuess = a = b = c; - maxIterations -= (n - 1); - return true; - } - - if (fa * fc > 0.0) - { - a = c; - fa = fc; - } - else - { - b = c; - fb = fc; - } - } - while (--n > 0 && !equal(a, b)); - - bestGuess = fa != fb ? - secantInterpolate(a, b, fa, fb) : - bisect(a, b); - bestGuess = std::min(std::max(bestGuess, a), b); + maxIterations -= 2; + bool result = bisection(a, b, f(a), f(b), f, maxIterations, equal, bestGuess); + maxIterations += 2; + return result; + } + + //! Bisection for which the function *has* been evaluated + //! at the interval end points. This means we can save + //! ourselves two function calls. WARNING this is worse than + //! Brent's method (although extremely numerically robust) + //! and is primarily intended for testing. + //! + //! \param[in,out] a The left bracket. Set to the solution + //! interval left end point if [a,b] brackets a root. + //! \param[in,out] b The right bracket. Set to the solution + //! interval right end point if [a,b] brackets a root. + //! \param[in] fa The value of \p f at \p a. + //! \param[in] fb The value of \p f at \p b. + //! \param[in] f The function to evaluate. This is expected + //! to implement a function signature taking a double and + //! returning a double. + //! \param[in,out] maxIterations The maximum number of times + //! \p f is evaluated and set to the number of times it was + //! evaluated. + //! \param[in] equal The predicate to decide when to terminate + //! The test is applied to the interval end points a and b. + //! \param[out] bestGuess Filled in with the best estimate + //! of the root. + //! \return True if a, b bracket the root and equal(a, b). + template + static bool + bisection(double& a, double& b, double fa, double fb, const F& f, std::size_t& maxIterations, const EQUAL& equal, double& bestGuess) { + std::size_t n = maxIterations; + if (fa == 0.0) { + // Root at left bracket. + bestGuess = b = a; maxIterations -= n; - return true; } - //@} - - //! Minimize the function \p f on the interval [\p a, \p b] - //! This terminates if it has converged on a local minimum - //! or it has run out of iterations. This implements Brent's - //! method which combines golden section search and quadratic - //! minimization (see for example numerical recipes in C). - //! - //! Note that this converges to the unique minimum if there - //! is one. If the function \p f has several minima then you - //! could consider using globalMaximize. - //! - //! \param[in] a The left end of the interval on which to - //! minimize \p f. - //! \param[in] b The right end of the interval on which to - //! minimize \p f. - //! \param[in] fa The value of \p f at \p a. - //! \param[in] fb The value of \p f at \p b. - //! \param[in] f The function to evaluate. This is expected - //! to implement a function signature taking a double and - //! returning a double. - //! \param[in] tolerance The convergence threshold ignored - //! if too small so feel free to set to zero. - //! \param[in,out] maxIterations The maximum number of times - //! \p f is evaluated and set to the number of times it was - //! evaluated. - //! \param[out] x Set to argmin of f on [\p a, \p b]. - //! \param[out] fx Set to the value of f at \p x. - template - static inline void minimize(double a, - double b, - double fa, - double fb, - const F &f, - double tolerance, - std::size_t &maxIterations, - double &x, - double &fx) - { - minimize(a, b, fa, fb, f, - tolerance, maxIterations, - -std::numeric_limits::max(), - x, fx); + if (fb == 0.0) { + // Root at right bracket. + bestGuess = a = b; + maxIterations -= n; + return true; } - - //! Maximize the function \p f on the interval [\p a, \p b] - //! This terminates if it has converged on a local maximum - //! or it has run out of iterations. This minimizes minus - //! \p f using our standard minimization algorithm. - //! - //! Note that this converges to the unique maximum if there - //! is one. If the function \p f has several maxima then you - //! could consider using globalMaximize. - //! - //! \param[in] a The left end of the interval on which to - //! maximize \p f. - //! \param[in] b The right end of the interval on which to - //! maximize \p f. - //! \param[in] fa The value of \p f at \p a. - //! \param[in] fb The value of \p f at \p b. - //! \param[in] f The function to evaluate. This is expected - //! to implement a function signature taking a double and - //! returning a double. - //! \param[in] tolerance The convergence threshold ignored - //! if too small so feel free to set to zero. - //! \param[in,out] maxIterations The maximum number of times - //! \p f is evaluated and set to the number of times it was - //! evaluated. - //! \param[out] x Set to argmax of f on [\p a, \p b]. - //! \param[out] fx Set to the value of f at \p x. - template - static inline void maximize(double a, - double b, - double fa, - double fb, - const F &f, - double tolerance, - std::size_t &maxIterations, - double &x, - double &fx) - { - CCompositeFunctions::CMinus f_(f); - minimize(a, b, -fa, -fb, f_, tolerance, maxIterations, x, fx); - fx = -fx; + if (fa * fb > 0.0) { + // Not bracketed. + maxIterations -= n; + return false; } - //! Try and find a global minimum for the function evaluating - //! it at the points \p p and then searching for a local - //! minimum. - //! - //! \param[in] p The points at which to evaluate f looking - //! for a global minimum. - //! \param[in] f The function to evaluate. This is expected - //! to implement a function signature taking a double and - //! returning a double. - //! \param[out] x Set to argmin of f on [\p a, \p b]. - //! \param[out] fx Set to the value of f at \p x. - template - static bool globalMinimize(const T &p, - const F &f, - double &x, - double &fx) - { - using TDoubleSizePr = std::pair; - using TMinAccumulator = CBasicStatistics::COrderStatisticsStack; - - std::size_t n = p.size(); - - if (n == 0) - { - LOG_ERROR("Must provide points at which to evaluate function"); - return false; + do { + const double c = bisect(a, b); + const double fc = f(c); + if (fc == 0.0) { + // Root at s. + bestGuess = a = b = c; + maxIterations -= (n - 1); + return true; } - TMinAccumulator min; - T fp(p.size()); - for (std::size_t i = 0u; i < p.size(); ++i) - { - double fi = f(p[i]); - fp[i] = fi; - min.add(TDoubleSizePr(fi, i)); - } - LOG_TRACE("p = " << core::CContainerPrinter::print(p)); - LOG_TRACE("f(p) = " << core::CContainerPrinter::print(fp)); - - std::size_t i = min[0].second; - std::size_t maxIterations = 5; - if (i == 0) - { - minimize( p[0], p[1], - fp[0], fp[1], - f, 0.0, maxIterations, - x, fx); - } - else if (i == n - 1) - { - minimize( p[n - 2], p[n - 1], - fp[n - 2], fp[n - 1], - f, 0.0, maxIterations, - x, fx); - } - else - { - std::size_t ai = i - 1; - std::size_t bi = i + 1; - minimize( p[ai], p[bi], - fp[ai], fp[bi], - f, 0.0, maxIterations, - x, fx); - if (fp[i] < fx) - { - x = p[i]; - fx = fp[i]; - } - } - LOG_TRACE("x = " << x << " fx = " << fx); - return true; + if (fa * fc > 0.0) { + a = c; + fa = fc; + } else { + b = c; + fb = fc; + } + } while (--n > 0 && !equal(a, b)); + + bestGuess = fa != fb ? secantInterpolate(a, b, fa, fb) : bisect(a, b); + bestGuess = std::min(std::max(bestGuess, a), b); + maxIterations -= n; + + return true; + } + //@} + + //! Minimize the function \p f on the interval [\p a, \p b] + //! This terminates if it has converged on a local minimum + //! or it has run out of iterations. This implements Brent's + //! method which combines golden section search and quadratic + //! minimization (see for example numerical recipes in C). + //! + //! Note that this converges to the unique minimum if there + //! is one. If the function \p f has several minima then you + //! could consider using globalMaximize. + //! + //! \param[in] a The left end of the interval on which to + //! minimize \p f. + //! \param[in] b The right end of the interval on which to + //! minimize \p f. + //! \param[in] fa The value of \p f at \p a. + //! \param[in] fb The value of \p f at \p b. + //! \param[in] f The function to evaluate. This is expected + //! to implement a function signature taking a double and + //! returning a double. + //! \param[in] tolerance The convergence threshold ignored + //! if too small so feel free to set to zero. + //! \param[in,out] maxIterations The maximum number of times + //! \p f is evaluated and set to the number of times it was + //! evaluated. + //! \param[out] x Set to argmin of f on [\p a, \p b]. + //! \param[out] fx Set to the value of f at \p x. + template + static inline void + minimize(double a, double b, double fa, double fb, const F& f, double tolerance, std::size_t& maxIterations, double& x, double& fx) { + minimize(a, b, fa, fb, f, tolerance, maxIterations, -std::numeric_limits::max(), x, fx); + } + + //! Maximize the function \p f on the interval [\p a, \p b] + //! This terminates if it has converged on a local maximum + //! or it has run out of iterations. This minimizes minus + //! \p f using our standard minimization algorithm. + //! + //! Note that this converges to the unique maximum if there + //! is one. If the function \p f has several maxima then you + //! could consider using globalMaximize. + //! + //! \param[in] a The left end of the interval on which to + //! maximize \p f. + //! \param[in] b The right end of the interval on which to + //! maximize \p f. + //! \param[in] fa The value of \p f at \p a. + //! \param[in] fb The value of \p f at \p b. + //! \param[in] f The function to evaluate. This is expected + //! to implement a function signature taking a double and + //! returning a double. + //! \param[in] tolerance The convergence threshold ignored + //! if too small so feel free to set to zero. + //! \param[in,out] maxIterations The maximum number of times + //! \p f is evaluated and set to the number of times it was + //! evaluated. + //! \param[out] x Set to argmax of f on [\p a, \p b]. + //! \param[out] fx Set to the value of f at \p x. + template + static inline void + maximize(double a, double b, double fa, double fb, const F& f, double tolerance, std::size_t& maxIterations, double& x, double& fx) { + CCompositeFunctions::CMinus f_(f); + minimize(a, b, -fa, -fb, f_, tolerance, maxIterations, x, fx); + fx = -fx; + } + + //! Try and find a global minimum for the function evaluating + //! it at the points \p p and then searching for a local + //! minimum. + //! + //! \param[in] p The points at which to evaluate f looking + //! for a global minimum. + //! \param[in] f The function to evaluate. This is expected + //! to implement a function signature taking a double and + //! returning a double. + //! \param[out] x Set to argmin of f on [\p a, \p b]. + //! \param[out] fx Set to the value of f at \p x. + template + static bool globalMinimize(const T& p, const F& f, double& x, double& fx) { + using TDoubleSizePr = std::pair; + using TMinAccumulator = CBasicStatistics::COrderStatisticsStack; + + std::size_t n = p.size(); + + if (n == 0) { + LOG_ERROR("Must provide points at which to evaluate function"); + return false; } - //! Try and find a global minimum for the function evaluating - //! it at the points \p p and then searching for a local - //! minimum. - //! - //! \param[in] p The points at which to evaluate f looking - //! for a global minimum. - //! \param[in] f The function to evaluate. This is expected - //! to implement a function signature taking a double and - //! returning a double. - //! \param[out] x Set to argmin of f on [\p a, \p b]. - //! \param[out] fx Set to the value of f at \p x. - template - static bool globalMaximize(const T &p, - const F &f, - double &x, - double &fx) - { - CCompositeFunctions::CMinus f_(f); - bool result = globalMinimize(p, f_, x, fx); - fx = -fx; - return result; + TMinAccumulator min; + T fp(p.size()); + for (std::size_t i = 0u; i < p.size(); ++i) { + double fi = f(p[i]); + fp[i] = fi; + min.add(TDoubleSizePr(fi, i)); } - - //! Find the sublevel set of \p fc the function \p f on the - //! interval [\p a, \p b]. The sublevel set is defined as: - //!
-        //!   \f$L^-_{fc}(f) = {x : f(x) <= fc}\f$
-        //! 
- //! - //! It is assumed that the sublevel set is a closed interval. - //! WARNING this is equivalent to assuming that the function - //! has a unique minimum in the interval [\p a, \p b] and - //! the caller should ensure this condition is satisfied. - //! - //! \param[in] a The left end of the interval on which to - //! compute the sublevel set of \p fmax. - //! \param[in] b The right end of the interval on which to - //! compute the sublevel set of \p fmax. - //! \param[in] fa The value of \p f at \p a. - //! \param[in] fb The value of \p f at \p b. - //! \param[in] f The function to evaluate. This is expected - //! to implement a function signature taking a double and - //! returning a double. - //! \param[in] fc The function value, f(c), for which to - //! compute the sublevel set. - //! \param[in] maxIterations The maximum number of times - //! \p f is evaluated. - //! \param[out] result Filled in with the sublevel set of - //! \p fc if it isn't empty or the point which minimizes - //! \p f otherwise. - //! \return True if the sublevel set could be computed and - //! false otherwise. - //! \note This will evaluate \p f at most 3 * \p maxIterations. - template - static bool sublevelSet(double a, - double b, - double fa, - double fb, - const F &f, - const double fc, - std::size_t maxIterations, - TDoubleDoublePr &result) - { - if (a > b) - { - std::swap(a, b); - std::swap(fa, fb); - } - - double x, fx; - { - std::size_t n = maxIterations; - minimize(a, b, fa, fb, f, 0.0, n, fc, x, fx); + LOG_TRACE("p = " << core::CContainerPrinter::print(p)); + LOG_TRACE("f(p) = " << core::CContainerPrinter::print(fp)); + + std::size_t i = min[0].second; + std::size_t maxIterations = 5; + if (i == 0) { + minimize(p[0], p[1], fp[0], fp[1], f, 0.0, maxIterations, x, fx); + } else if (i == n - 1) { + minimize(p[n - 2], p[n - 1], fp[n - 2], fp[n - 1], f, 0.0, maxIterations, x, fx); + } else { + std::size_t ai = i - 1; + std::size_t bi = i + 1; + minimize(p[ai], p[bi], fp[ai], fp[bi], f, 0.0, maxIterations, x, fx); + if (fp[i] < fx) { + x = p[i]; + fx = fp[i]; } + } + LOG_TRACE("x = " << x << " fx = " << fx); + return true; + } + + //! Try and find a global minimum for the function evaluating + //! it at the points \p p and then searching for a local + //! minimum. + //! + //! \param[in] p The points at which to evaluate f looking + //! for a global minimum. + //! \param[in] f The function to evaluate. This is expected + //! to implement a function signature taking a double and + //! returning a double. + //! \param[out] x Set to argmin of f on [\p a, \p b]. + //! \param[out] fx Set to the value of f at \p x. + template + static bool globalMaximize(const T& p, const F& f, double& x, double& fx) { + CCompositeFunctions::CMinus f_(f); + bool result = globalMinimize(p, f_, x, fx); + fx = -fx; + return result; + } + + //! Find the sublevel set of \p fc the function \p f on the + //! interval [\p a, \p b]. The sublevel set is defined as: + //!
+    //!   \f$L^-_{fc}(f) = {x : f(x) <= fc}\f$
+    //! 
+ //! + //! It is assumed that the sublevel set is a closed interval. + //! WARNING this is equivalent to assuming that the function + //! has a unique minimum in the interval [\p a, \p b] and + //! the caller should ensure this condition is satisfied. + //! + //! \param[in] a The left end of the interval on which to + //! compute the sublevel set of \p fmax. + //! \param[in] b The right end of the interval on which to + //! compute the sublevel set of \p fmax. + //! \param[in] fa The value of \p f at \p a. + //! \param[in] fb The value of \p f at \p b. + //! \param[in] f The function to evaluate. This is expected + //! to implement a function signature taking a double and + //! returning a double. + //! \param[in] fc The function value, f(c), for which to + //! compute the sublevel set. + //! \param[in] maxIterations The maximum number of times + //! \p f is evaluated. + //! \param[out] result Filled in with the sublevel set of + //! \p fc if it isn't empty or the point which minimizes + //! \p f otherwise. + //! \return True if the sublevel set could be computed and + //! false otherwise. + //! \note This will evaluate \p f at most 3 * \p maxIterations. + template + static bool + sublevelSet(double a, double b, double fa, double fb, const F& f, const double fc, std::size_t maxIterations, TDoubleDoublePr& result) { + if (a > b) { + std::swap(a, b); + std::swap(fa, fb); + } - result = TDoubleDoublePr(x, x); - if (fx > fc) - { - return false; - } + double x, fx; + { + std::size_t n = maxIterations; + minimize(a, b, fa, fb, f, 0.0, n, fc, x, fx); + } - // [a, x] and [b, r] bracket the sublevel set end points. + result = TDoubleDoublePr(x, x); + if (fx > fc) { + return false; + } - CCompositeFunctions::CMinusConstant f_(f, fc); + // [a, x] and [b, r] bracket the sublevel set end points. - LOG_TRACE("a = " << a << ", x = " << x << ", b = " << b); - LOG_TRACE("f_(a) = " << fa - fc - << ", f_(x) = " << fx - fc - << ", f_(b) = " << fb - fc); + CCompositeFunctions::CMinusConstant f_(f, fc); - const double eps = std::sqrt(std::numeric_limits::epsilon()) * b; - CEqualWithTolerance equal(CToleranceTypes::E_AbsoluteTolerance, eps); - LOG_TRACE("eps = " << eps); + LOG_TRACE("a = " << a << ", x = " << x << ", b = " << b); + LOG_TRACE("f_(a) = " << fa - fc << ", f_(x) = " << fx - fc << ", f_(b) = " << fb - fc); - try - { - std::size_t n = maxIterations; - solve(a, x, fa - fc, fx - fc, f_, n, equal, result.first); - LOG_TRACE("iterations = " << n); - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to find left end point: " << e.what()); - return false; - } + const double eps = std::sqrt(std::numeric_limits::epsilon()) * b; + CEqualWithTolerance equal(CToleranceTypes::E_AbsoluteTolerance, eps); + LOG_TRACE("eps = " << eps); - try - { - std::size_t n = maxIterations; - solve(x, b, fx - fc, fb - fc, f_, n, equal, result.second); - LOG_TRACE("iterations = " << n); - } - catch (std::exception &e) - { - LOG_ERROR("Failed to find right end point: " << e.what()); - return false; - } + try { + std::size_t n = maxIterations; + solve(a, x, fa - fc, fx - fc, f_, n, equal, result.first); + LOG_TRACE("iterations = " << n); + } catch (const std::exception& e) { + LOG_ERROR("Failed to find left end point: " << e.what()); + return false; + } - return true; + try { + std::size_t n = maxIterations; + solve(x, b, fx - fc, fb - fc, f_, n, equal, result.second); + LOG_TRACE("iterations = " << n); + } catch (std::exception& e) { + LOG_ERROR("Failed to find right end point: " << e.what()); + return false; } -}; + return true; + } +}; } } diff --git a/include/maths/CSphericalCluster.h b/include/maths/CSphericalCluster.h index caff2b8953..af56dcd7c5 100644 --- a/include/maths/CSphericalCluster.h +++ b/include/maths/CSphericalCluster.h @@ -17,19 +17,12 @@ #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { //! \brief A cluster's count and variance. -struct MATHS_EXPORT SCountAndVariance -{ - SCountAndVariance(double count = 0.0, - double variance = 0.0) : - s_Count(count), - s_Variance(variance) - {} +struct MATHS_EXPORT SCountAndVariance { + SCountAndVariance(double count = 0.0, double variance = 0.0) : s_Count(count), s_Variance(variance) {} //! The count of point in the cluster. double s_Count; @@ -45,91 +38,74 @@ struct MATHS_EXPORT SCountAndVariance //! cluster with the count of points in the cluster and the spherically //! symmetric variance of those points. template -class CSphericalCluster -{ +class CSphericalCluster { +public: + using Type = CAnnotatedVector; + + class CHash { + public: + std::size_t operator()(const Type& o) const { + std::size_t seed = boost::hash_combine(m_PointHash(o), o.annotation().s_Count); + return boost::hash_combine(seed, o.annotation().s_Variance); + } + + private: + typename POINT::CHash m_PointHash; + }; + + class CEqual { public: - using Type = CAnnotatedVector; - - class CHash - { - public: - std::size_t operator()(const Type &o) const - { - std::size_t seed = boost::hash_combine(m_PointHash(o), o.annotation().s_Count); - return boost::hash_combine(seed, o.annotation().s_Variance); - } - - private: - typename POINT::CHash m_PointHash; - }; - - class CEqual - { - public: - std::size_t operator()(const Type &lhs, const Type &rhs) const - { - return static_cast(lhs) == static_cast(rhs) - && lhs.annotation().s_Count == rhs.annotation().s_Count - && lhs.annotation().s_Variance == rhs.annotation().s_Variance; - } - }; - - struct SLess - { - bool operator()(const Type &lhs, const Type &rhs) const - { - return COrderings::lexicographical_compare(static_cast(lhs), - lhs.annotation().s_Count, - lhs.annotation().s_Variance, - static_cast(rhs), - rhs.annotation().s_Count, - rhs.annotation().s_Variance); - } - }; + std::size_t operator()(const Type& lhs, const Type& rhs) const { + return static_cast(lhs) == static_cast(rhs) && + lhs.annotation().s_Count == rhs.annotation().s_Count && lhs.annotation().s_Variance == rhs.annotation().s_Variance; + } + }; + + struct SLess { + bool operator()(const Type& lhs, const Type& rhs) const { + return COrderings::lexicographical_compare(static_cast(lhs), + lhs.annotation().s_Count, + lhs.annotation().s_Variance, + static_cast(rhs), + rhs.annotation().s_Count, + rhs.annotation().s_Variance); + } + }; }; -namespace basic_statistics_detail -{ +namespace basic_statistics_detail { //! \brief Specialization for the implementation of the spherical //! cluster to the sample mean and variance estimator. template -struct SCentralMomentsCustomAdd, SCountAndVariance> > -{ +struct SCentralMomentsCustomAdd, SCountAndVariance>> { template - static inline void add(const CAnnotatedVector, SCountAndVariance> &x, + static inline void add(const CAnnotatedVector, SCountAndVariance>& x, typename SCoordinate::Type n, - CBasicStatistics::SSampleCentralMoments &moments) - { + CBasicStatistics::SSampleCentralMoments& moments) { using TCoordinate = typename SCoordinate::Type; moments.add(x, TCoordinate(x.annotation().s_Count) * n, 0); } template - static inline void add(const CAnnotatedVector, SCountAndVariance> &x, + static inline void add(const CAnnotatedVector, SCountAndVariance>& x, typename SCoordinate::Type n, - CBasicStatistics::SSampleCentralMoments &moments) - { + CBasicStatistics::SSampleCentralMoments& moments) { using TCoordinate = typename SCoordinate::Type; - moments += CBasicStatistics::accumulator(TCoordinate(x.annotation().s_Count) * n, - T(x), - T(x.annotation().s_Variance)); + moments += CBasicStatistics::accumulator(TCoordinate(x.annotation().s_Count) * n, T(x), T(x.annotation().s_Variance)); } }; //! \brief Specialization for the implementation of add spherical //! cluster to the covariances estimator. template -struct SCovariancesCustomAdd, SCountAndVariance> > -{ +struct SCovariancesCustomAdd, SCountAndVariance>> { template - static inline void add(const CAnnotatedVector, SCountAndVariance> &x, - const CAnnotatedVector, SCountAndVariance> &n, - CBasicStatistics::SSampleCovariances &covariances) - { + static inline void add(const CAnnotatedVector, SCountAndVariance>& x, + const CAnnotatedVector, SCountAndVariance>& n, + CBasicStatistics::SSampleCovariances& covariances) { CSymmetricMatrixNxN m(0); - for (std::size_t i = 0u; i < N; ++i) - { + for (std::size_t i = 0u; i < N; ++i) { m(i, i) = x.annotation().s_Variance; } covariances += CBasicStatistics::SSampleCovariances(T(x.annotation().s_Count) * n, x, m); @@ -146,24 +122,21 @@ struct SCovariancesCustomAdd, SCountAndVarianc //! See http://perso.ens-lyon.fr/patrick.flandrin/LedoitWolf_JMA2004.pdf //! for the details. template -struct SCovariancesLedoitWolf, SCountAndVariance> > -{ +struct SCovariancesLedoitWolf, SCountAndVariance>> { template - static void estimate(const std::vector, SCountAndVariance> > &points, - CBasicStatistics::SSampleCovariances &covariances) - { + static void estimate(const std::vector, SCountAndVariance>>& points, + CBasicStatistics::SSampleCovariances& covariances) { U d = static_cast(N); U n = CBasicStatistics::count(covariances); - const CVectorNx1 &m = CBasicStatistics::mean(covariances); - const CSymmetricMatrixNxN &s = CBasicStatistics::maximumLikelihoodCovariances(covariances); + const CVectorNx1& m = CBasicStatistics::mean(covariances); + const CSymmetricMatrixNxN& s = CBasicStatistics::maximumLikelihoodCovariances(covariances); double mn = s.trace() / d; double dn = pow2((s - CVectorNx1(mn).diagonal()).frobenius()) / d; double bn = 0.0; double z = n * n; - for (std::size_t i = 0u; i < points.size(); ++i) - { + for (std::size_t i = 0u; i < points.size(); ++i) { CVectorNx1 ci(points[i]); U ni = static_cast(points[i].annotation().s_Count); U vi = static_cast(points[i].annotation().s_Variance); @@ -172,25 +145,22 @@ struct SCovariancesLedoitWolf, SCountAndVarian bn = std::min(bn, dn); LOG_TRACE("m = " << mn << ", d = " << dn << ", b = " << bn); - covariances.s_Covariances = CVectorNx1(bn / dn * mn).diagonal() - + (U(1) - bn / dn) * covariances.s_Covariances; + covariances.s_Covariances = CVectorNx1(bn / dn * mn).diagonal() + (U(1) - bn / dn) * covariances.s_Covariances; } - template static U pow2(U x) { return x * x; } + template + static U pow2(U x) { + return x * x; + } }; - } //! Write a description of \p cluster for debugging. template -std::ostream &operator<<(std::ostream &o, - const CAnnotatedVector &cluster) -{ - return o << static_cast(cluster) - << " (" << cluster.annotation().s_Count - << "," << std::sqrt(cluster.annotation().s_Variance) << ")"; +std::ostream& operator<<(std::ostream& o, const CAnnotatedVector& cluster) { + return o << static_cast(cluster) << " (" << cluster.annotation().s_Count << "," + << std::sqrt(cluster.annotation().s_Variance) << ")"; } - } } diff --git a/include/maths/CSpline.h b/include/maths/CSpline.h index f175242324..aae869dd86 100644 --- a/include/maths/CSpline.h +++ b/include/maths/CSpline.h @@ -22,13 +22,10 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { -namespace spline_detail -{ +namespace spline_detail { using TDoubleVec = std::vector; using TFloatVec = std::vector; @@ -42,10 +39,7 @@ using TFloatVec = std::vector; //! \param[in,out] x Initially contains the input vector \f$y\f$, //! and returns the solution \f$x\f$, indexed from [0, ..., n - 1]. //! \note The contents of input vector c will be modified. -bool MATHS_EXPORT solveTridiagonal(const TDoubleVec &a, - const TDoubleVec &b, - TDoubleVec &c, - TDoubleVec &x); +bool MATHS_EXPORT solveTridiagonal(const TDoubleVec& a, const TDoubleVec& b, TDoubleVec& c, TDoubleVec& x); //! Solves: //!
@@ -61,50 +55,35 @@ bool MATHS_EXPORT solveTridiagonal(const TDoubleVec &a,
 //! \param[in,out] x Initially contains the input vector \f$y\f$,
 //! and returns the solution \f$x\f$, indexed from [0, ..., n - 1].
 //! \note The contents of input vector c will be modified.
-bool MATHS_EXPORT solvePeturbedTridiagonal(const TDoubleVec &a,
-                                           const TDoubleVec &b,
-                                           TDoubleVec &c,
-                                           TDoubleVec &u,
-                                           const TDoubleVec &v,
-                                           TDoubleVec &x);
-
+bool MATHS_EXPORT
+solvePeturbedTridiagonal(const TDoubleVec& a, const TDoubleVec& b, TDoubleVec& c, TDoubleVec& u, const TDoubleVec& v, TDoubleVec& x);
 }
 
 //! \brief Defines types used by the spline implementation.
-class MATHS_EXPORT CSplineTypes
-{
-    public:
-        using TDoubleVec = std::vector;
-        using TMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator;
-
-        //! Types of spline interpolation that this will perform.
-        //!
-        //! -# Linear interpolation of the knot points.
-        //! -# Standard cubic spline. The boundary conditions are
-        //!    chosen by a separate enumeration.
-        enum EType
-        {
-            E_Linear,
-            E_Cubic
-        };
-
-        //! Types of boundary condition for the spline.
-        //!
-        //! -# The natural boundary condition sets curvature to zero
-        //!    at the start and end of the interpolated interval.
-        //! -# The parabolic run out sets curvature at the start and
-        //!    end of the interval to the curvature at the second and
-        //!    penultimate knot point, respectively.
-        //! -# For the periodic boundary condition we identify the
-        //!    start and end of the interval and apply the usual
-        //!    spline condition that the first derivative is equal
-        //!    across the knot point.
-        enum EBoundaryCondition
-        {
-            E_Natural,
-            E_ParabolicRunout,
-            E_Periodic
-        };
+class MATHS_EXPORT CSplineTypes {
+public:
+    using TDoubleVec = std::vector;
+    using TMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator;
+
+    //! Types of spline interpolation that this will perform.
+    //!
+    //! -# Linear interpolation of the knot points.
+    //! -# Standard cubic spline. The boundary conditions are
+    //!    chosen by a separate enumeration.
+    enum EType { E_Linear, E_Cubic };
+
+    //! Types of boundary condition for the spline.
+    //!
+    //! -# The natural boundary condition sets curvature to zero
+    //!    at the start and end of the interpolated interval.
+    //! -# The parabolic run out sets curvature at the start and
+    //!    end of the interval to the curvature at the second and
+    //!    penultimate knot point, respectively.
+    //! -# For the periodic boundary condition we identify the
+    //!    start and end of the interval and apply the usual
+    //!    spline condition that the first derivative is equal
+    //!    across the knot point.
+    enum EBoundaryCondition { E_Natural, E_ParabolicRunout, E_Periodic };
 };
 
 //! \brief Implements a standard cubic spline.
@@ -136,604 +115,522 @@ class MATHS_EXPORT CSplineTypes
 //! for example knot points can be shared between several splines.
 template,
          typename VALUES = std::vector,
-         typename CURVATURES = std::vector >
-class CSpline : public CSplineTypes
-{
-    public:
-        using TKnots = typename boost::unwrap_reference::type;
-        using TValues = typename boost::unwrap_reference::type;
-        using TCurvatures = typename boost::unwrap_reference::type;
-        using TNonConstKnots = typename boost::remove_const::type;
-        using TNonConstValues = typename boost::remove_const::type;
-        using TNonConstCurvatures = typename boost::remove_const::type;
-
-    public:
-        CSpline(EType type) : m_Type(type) {}
-
-        CSpline(EType type,
-                const KNOTS &knots,
-                const VALUES &values,
-                const CURVATURES &curvatures) :
-            m_Type(type),
-            m_Knots(knots),
-            m_Values(values),
-            m_Curvatures(curvatures)
-        {}
-
-        //! Efficiently swap the contents of two spline objects.
-        void swap(CSpline &other)
-        {
-            using std::swap;
-            swap(m_Type, other.m_Type);
-            swap(m_Knots, other.m_Knots);
-            swap(m_Values, other.m_Values);
-            swap(m_Curvatures, other.m_Curvatures);
+         typename CURVATURES = std::vector>
+class CSpline : public CSplineTypes {
+public:
+    using TKnots = typename boost::unwrap_reference::type;
+    using TValues = typename boost::unwrap_reference::type;
+    using TCurvatures = typename boost::unwrap_reference::type;
+    using TNonConstKnots = typename boost::remove_const::type;
+    using TNonConstValues = typename boost::remove_const::type;
+    using TNonConstCurvatures = typename boost::remove_const::type;
+
+public:
+    CSpline(EType type) : m_Type(type) {}
+
+    CSpline(EType type, const KNOTS& knots, const VALUES& values, const CURVATURES& curvatures)
+        : m_Type(type), m_Knots(knots), m_Values(values), m_Curvatures(curvatures) {}
+
+    //! Efficiently swap the contents of two spline objects.
+    void swap(CSpline& other) {
+        using std::swap;
+        swap(m_Type, other.m_Type);
+        swap(m_Knots, other.m_Knots);
+        swap(m_Values, other.m_Values);
+        swap(m_Curvatures, other.m_Curvatures);
+    }
+
+    //! Check if the spline has been initialized.
+    bool initialized() const { return this->knots().size() > 0; }
+
+    //! Clear the contents of this spline and recover any
+    //! allocated memory.
+    void clear() {
+        TNonConstKnots noKnots;
+        this->knotsRef().swap(noKnots);
+        TNonConstValues noValues;
+        this->valuesRef().swap(noValues);
+        TNonConstCurvatures noCurvatures;
+        this->curvaturesRef().swap(noCurvatures);
+    }
+
+    //! Evaluate the value of the spline at the point \p x.
+    //!
+    //! \warning \p x should be in the interpolation interval as
+    //! defined by the last set of knot points supplied to the
+    //! interpolate function.
+    double value(double x) const {
+        if (this->knots().empty()) {
+            return 0.0;
         }
 
-        //! Check if the spline has been initialized.
-        bool initialized() const
-        {
-            return this->knots().size() > 0;
-        }
+        std::size_t k =
+            CTools::truncate(std::size_t(std::lower_bound(this->knots().begin(), this->knots().end(), x) - this->knots().begin()),
+                             std::size_t(1),
+                             this->knots().size() - 1);
 
-        //! Clear the contents of this spline and recover any
-        //! allocated memory.
-        void clear()
-        {
-            TNonConstKnots noKnots;
-            this->knotsRef().swap(noKnots);
-            TNonConstValues noValues;
-            this->valuesRef().swap(noValues);
-            TNonConstCurvatures noCurvatures;
-            this->curvaturesRef().swap(noCurvatures);
+        if (x == this->knots()[k]) {
+            return this->values()[k];
         }
 
-        //! Evaluate the value of the spline at the point \p x.
-        //!
-        //! \warning \p x should be in the interpolation interval as
-        //! defined by the last set of knot points supplied to the
-        //! interpolate function.
-        double value(double x) const
-        {
-            if (this->knots().empty())
-            {
-                return 0.0;
-            }
-
-            std::size_t k = CTools::truncate(
-                                std::size_t(std::lower_bound(this->knots().begin(),
-                                                             this->knots().end(), x)
-                                            - this->knots().begin()),
-                                std::size_t(1), this->knots().size() - 1);
+        switch (m_Type) {
+        case E_Linear: {
+            double h = this->knots()[k] - this->knots()[k - 1];
+            double c = (this->values()[k] - this->values()[k - 1]) / h;
+            double d = this->values()[k - 1];
+            double r = x - this->knots()[k - 1];
+            return c * r + d;
+        }
+        case E_Cubic: {
+            double h = this->knots()[k] - this->knots()[k - 1];
+            double a = (this->curvatures()[k] - this->curvatures()[k - 1]) / 6.0 / h;
+            double b = this->curvatures()[k - 1] / 2.0;
+            double c =
+                (this->values()[k] - this->values()[k - 1]) / h - (this->curvatures()[k] / 6.0 + this->curvatures()[k - 1] / 3.0) * h;
+            double d = this->values()[k - 1];
+            double r = x - this->knots()[k - 1];
+            return ((a * r + b) * r + c) * r + d;
+        }
+        }
 
-            if (x == this->knots()[k])
-            {
-                return this->values()[k];
-            }
+        LOG_ABORT("Unexpected type " << m_Type);
+    }
 
-            switch (m_Type)
-            {
-            case E_Linear:
-            {
-                double h = this->knots()[k] - this->knots()[k-1];
-                double c = (this->values()[k] - this->values()[k-1]) / h;
-                double d = this->values()[k-1];
-                double r = x - this->knots()[k-1];
-                return c * r + d;
-            }
-            case E_Cubic:
-            {
-                double h = this->knots()[k] - this->knots()[k-1];
-                double a = (this->curvatures()[k] - this->curvatures()[k-1]) / 6.0 / h;
-                double b = this->curvatures()[k-1] / 2.0;
-                double c = (this->values()[k] - this->values()[k-1]) / h
-                         - (this->curvatures()[k] / 6.0 + this->curvatures()[k-1] / 3.0) * h;
-                double d = this->values()[k-1];
-                double r = x - this->knots()[k-1];
-                return ((a * r + b) * r + c) * r + d;
-            }
-            }
-
-            LOG_ABORT("Unexpected type " << m_Type);
+    //! Get the mean value of the spline.
+    double mean() const {
+        if (this->knots().empty()) {
+            return 0.0;
         }
 
-        //! Get the mean value of the spline.
-        double mean() const
-        {
-            if (this->knots().empty())
-            {
-                return 0.0;
+        std::size_t n = this->knots().size();
+        double interval = (this->knots()[n - 1] - this->knots()[0]);
+
+        TMeanAccumulator result;
+        switch (m_Type) {
+        case E_Linear:
+            for (std::size_t i = 1u; i < this->knots().size(); ++i) {
+                double h = this->knots()[i] - this->knots()[i - 1];
+                double c = (this->values()[i] - this->values()[i - 1]) / h;
+                double d = this->values()[i - 1];
+                result.add(c / 2.0 * h + d, h / interval);
             }
-
-            std::size_t n = this->knots().size();
-            double interval = (this->knots()[n-1] - this->knots()[0]);
-
-            TMeanAccumulator result;
-            switch (m_Type)
-            {
-            case E_Linear:
-                for (std::size_t i = 1u; i < this->knots().size(); ++i)
-                {
-                    double h = this->knots()[i] - this->knots()[i-1];
-                    double c = (this->values()[i] - this->values()[i-1]) / h;
-                    double d = this->values()[i-1];
-                    result.add(c / 2.0 * h + d,  h / interval);
-                }
-                break;
-
-            case E_Cubic:
-                for (std::size_t i = 1u; i < this->knots().size(); ++i)
-                {
-                    double h = this->knots()[i] - this->knots()[i-1];
-                    double a = (this->curvatures()[i] - this->curvatures()[i-1]) / 6.0 / h;
-                    double b = this->curvatures()[i-1] / 2.0;
-                    double c = (this->values()[i] - this->values()[i-1]) / h
-                             - (this->curvatures()[i] / 6.0 + this->curvatures()[i-1] / 3.0) * h;
-                    double d = this->values()[i-1];
-                    result.add(((a * h / 4.0 + b / 3.0) * h + c / 2.0) * h + d, h / interval);
-                }
-                break;
+            break;
+
+        case E_Cubic:
+            for (std::size_t i = 1u; i < this->knots().size(); ++i) {
+                double h = this->knots()[i] - this->knots()[i - 1];
+                double a = (this->curvatures()[i] - this->curvatures()[i - 1]) / 6.0 / h;
+                double b = this->curvatures()[i - 1] / 2.0;
+                double c =
+                    (this->values()[i] - this->values()[i - 1]) / h - (this->curvatures()[i] / 6.0 + this->curvatures()[i - 1] / 3.0) * h;
+                double d = this->values()[i - 1];
+                result.add(((a * h / 4.0 + b / 3.0) * h + c / 2.0) * h + d, h / interval);
             }
-
-            return CBasicStatistics::mean(result);
+            break;
         }
 
-        //! Evaluate the slope of the spline at the point \p x.
-        //!
-        //! \warning \p x should be in the interpolation interval as
-        //! defined by the last set of knot points supplied to the
-        //! interpolate function.
-        double slope(double x) const
-        {
-            if (this->knots().empty())
-            {
-                return 0.0;
-            }
+        return CBasicStatistics::mean(result);
+    }
+
+    //! Evaluate the slope of the spline at the point \p x.
+    //!
+    //! \warning \p x should be in the interpolation interval as
+    //! defined by the last set of knot points supplied to the
+    //! interpolate function.
+    double slope(double x) const {
+        if (this->knots().empty()) {
+            return 0.0;
+        }
 
-            std::size_t k = CTools::truncate(
-                                std::size_t(std::lower_bound(this->knots().begin(),
-                                                             this->knots().end(), x)
-                                            - this->knots().begin()),
-                                std::size_t(1), this->knots().size() - 1);
-
-            switch (m_Type)
-            {
-            case E_Linear:
-            {
-                double h = this->knots()[k] - this->knots()[k-1];
-                return (this->values()[k] - this->values()[k-1]) / h;
-            }
-            case E_Cubic:
-            {
-                double h = this->knots()[k] - this->knots()[k-1];
-                double a = (this->curvatures()[k] - this->curvatures()[k-1]) / 6.0 / h;
-                double b = this->curvatures()[k-1] / 2.0;
-                double c = (this->values()[k] - this->values()[k-1]) / h
-                         - (this->curvatures()[k] / 6.0 + this->curvatures()[k-1] / 3.0) * h;
-                double r = x - this->knots()[k-1];
-                return ((3.0 * a * r + 2.0 * b) * r + c);
-            }
-            }
+        std::size_t k =
+            CTools::truncate(std::size_t(std::lower_bound(this->knots().begin(), this->knots().end(), x) - this->knots().begin()),
+                             std::size_t(1),
+                             this->knots().size() - 1);
 
-            LOG_ABORT("Unexpected type " << m_Type);
+        switch (m_Type) {
+        case E_Linear: {
+            double h = this->knots()[k] - this->knots()[k - 1];
+            return (this->values()[k] - this->values()[k - 1]) / h;
+        }
+        case E_Cubic: {
+            double h = this->knots()[k] - this->knots()[k - 1];
+            double a = (this->curvatures()[k] - this->curvatures()[k - 1]) / 6.0 / h;
+            double b = this->curvatures()[k - 1] / 2.0;
+            double c =
+                (this->values()[k] - this->values()[k - 1]) / h - (this->curvatures()[k] / 6.0 + this->curvatures()[k - 1] / 3.0) * h;
+            double r = x - this->knots()[k - 1];
+            return ((3.0 * a * r + 2.0 * b) * r + c);
+        }
         }
 
-        //! Compute the mean absolute slope of the spline.
-        //!
-        //! This is defined as
-        //! 
-        //!   \f$\frac{1}{|b-a|}\int_{[a,b]}{\left|\frac{df(s)}{ds}\right|}ds\f$
-        //! 
- double absSlope() const - { - double result = 0.0; - - std::size_t n = this->knots().size(); - - switch (m_Type) - { - case E_Linear: - for (std::size_t i = 1u; i < n; ++i) - { - result += std::fabs((this->values()[i] - this->values()[i-1])); - } - break; + LOG_ABORT("Unexpected type " << m_Type); + } - case E_Cubic: - for (std::size_t i = 1u; i < n; ++i) - { - double a = this->knots()[i-1]; - double b = this->knots()[i]; - double h = b - a; - double ai = (this->curvatures()[i] - this->curvatures()[i-1]) / 6.0 / h; - double bi = this->curvatures()[i-1] / 2.0; - double ci = (this->values()[i] - this->values()[i-1]) / h - - (this->curvatures()[i] / 6.0 + this->curvatures()[i-1] / 3.0) * h; - - double descriminant = bi * bi - 3.0 * ai * ci; - if (descriminant < 0.0) - { - result += std::fabs(((ai * h + bi) * h + ci) * h); - continue; - } - double rl = CTools::truncate(a - ( bi + descriminant) / 3.0 / ai, a, b); - double rr = CTools::truncate(a + (-bi + descriminant) / 3.0 / ai, a, b); - if (rl > rr) - { - std::swap(rl, rr); - } - result += std::fabs(((ai * (rl - a) + bi) * (rl - a) + ci) * (rl - a)) - + std::fabs(((ai * (rr - rl) + bi) * (rr - rl) + ci) * (rr - rl)) - + std::fabs(((ai * (b - rr) + bi) * (b - rr) + ci) * (b - rr)); - } - break; - } + //! Compute the mean absolute slope of the spline. + //! + //! This is defined as + //!
+    //!   \f$\frac{1}{|b-a|}\int_{[a,b]}{\left|\frac{df(s)}{ds}\right|}ds\f$
+    //! 
+ double absSlope() const { + double result = 0.0; - return result / (this->knots()[n-1] - this->knots()[0]); - } + std::size_t n = this->knots().size(); - //! Get the specified curvatures. - //! - //! \param[out] a Filled in with the cubic coefficient. - //! \param[out] b Filled in with the quadratic coefficient. - //! \param[out] c Filled in with the linear coefficient. - //! \param[out] d Filled in with the constant. - //! \note Null pointers are ignored. - void coefficients(TDoubleVec *a = 0, - TDoubleVec *b = 0, - TDoubleVec *c = 0, - TDoubleVec *d = 0) const - { - if (a) a->reserve(this->values().size()); - if (b) b->reserve(this->values().size()); - if (c) c->reserve(this->values().size()); - if (d) d->reserve(this->values().size()); - - switch (m_Type) - { - case E_Linear: - for (std::size_t i = 1u; i < this->knots().size(); ++i) - { - double h = this->knots()[i] - this->knots()[i-1]; - if (a) a->push_back(0.0); - if (b) b->push_back(0.0); - if (c) c->push_back((this->values()[i] - this->values()[i-1]) / h); - if (d) d->push_back(this->values()[i-1]); + switch (m_Type) { + case E_Linear: + for (std::size_t i = 1u; i < n; ++i) { + result += std::fabs((this->values()[i] - this->values()[i - 1])); + } + break; + + case E_Cubic: + for (std::size_t i = 1u; i < n; ++i) { + double a = this->knots()[i - 1]; + double b = this->knots()[i]; + double h = b - a; + double ai = (this->curvatures()[i] - this->curvatures()[i - 1]) / 6.0 / h; + double bi = this->curvatures()[i - 1] / 2.0; + double ci = + (this->values()[i] - this->values()[i - 1]) / h - (this->curvatures()[i] / 6.0 + this->curvatures()[i - 1] / 3.0) * h; + + double descriminant = bi * bi - 3.0 * ai * ci; + if (descriminant < 0.0) { + result += std::fabs(((ai * h + bi) * h + ci) * h); + continue; } - break; - - case E_Cubic: - for (std::size_t i = 1u; i < this->knots().size(); ++i) - { - double h = this->knots()[i] - this->knots()[i-1]; - if (a) a->push_back((this->curvatures()[i] - this->curvatures()[i-1]) / 6.0 / h); - if (b) b->push_back(this->curvatures()[i-1] / 2.0); - if (c) c->push_back( (this->values()[i] - this->values()[i-1]) / h - - (this->curvatures()[i] / 6.0 + this->curvatures()[i-1] / 3.0) * h); - if (d) d->push_back(this->values()[i-1]); + double rl = CTools::truncate(a - (bi + descriminant) / 3.0 / ai, a, b); + double rr = CTools::truncate(a + (-bi + descriminant) / 3.0 / ai, a, b); + if (rl > rr) { + std::swap(rl, rr); } - break; + result += std::fabs(((ai * (rl - a) + bi) * (rl - a) + ci) * (rl - a)) + + std::fabs(((ai * (rr - rl) + bi) * (rr - rl) + ci) * (rr - rl)) + + std::fabs(((ai * (b - rr) + bi) * (b - rr) + ci) * (b - rr)); } + break; } - //! Interpolate the function, using the selected spline style, - //! on the knot points \p knots, with the values \p values, - //! and applying the boundary conditions \p boundary. - //! - //! \param[in] knots The knot points for the spline. - //! \param[in] values The values of the function at \p knots. - //! \param[in] boundary Selects the boundary condition to use - //! for the interpolation. See EBoundaryCondition for more - //! details on this argument. - //! \warning \p knots must be ordered increasing. - //! \warning There must be a one-to-one correspondence between - //! \p knots and \p values. - //! \note If \p knots contain duplicates the standard spline - //! is ill posed. This implementation removes duplicates and - //! sets the function value to the mean of function values - //! over the duplicates. - bool interpolate(const TDoubleVec &knots, - const TDoubleVec &values, - EBoundaryCondition boundary) - { - if (knots.size() < 2) - { - LOG_ERROR("Insufficient knot points supplied"); - return false; + return result / (this->knots()[n - 1] - this->knots()[0]); + } + + //! Get the specified curvatures. + //! + //! \param[out] a Filled in with the cubic coefficient. + //! \param[out] b Filled in with the quadratic coefficient. + //! \param[out] c Filled in with the linear coefficient. + //! \param[out] d Filled in with the constant. + //! \note Null pointers are ignored. + void coefficients(TDoubleVec* a = 0, TDoubleVec* b = 0, TDoubleVec* c = 0, TDoubleVec* d = 0) const { + if (a) + a->reserve(this->values().size()); + if (b) + b->reserve(this->values().size()); + if (c) + c->reserve(this->values().size()); + if (d) + d->reserve(this->values().size()); + + switch (m_Type) { + case E_Linear: + for (std::size_t i = 1u; i < this->knots().size(); ++i) { + double h = this->knots()[i] - this->knots()[i - 1]; + if (a) + a->push_back(0.0); + if (b) + b->push_back(0.0); + if (c) + c->push_back((this->values()[i] - this->values()[i - 1]) / h); + if (d) + d->push_back(this->values()[i - 1]); } - if (knots.size() != values.size()) - { - LOG_ERROR("Number knots not equal to number of values: " - << " knots = " << core::CContainerPrinter::print(knots) - << " values = " << core::CContainerPrinter::print(values)); - return false; + break; + + case E_Cubic: + for (std::size_t i = 1u; i < this->knots().size(); ++i) { + double h = this->knots()[i] - this->knots()[i - 1]; + if (a) + a->push_back((this->curvatures()[i] - this->curvatures()[i - 1]) / 6.0 / h); + if (b) + b->push_back(this->curvatures()[i - 1] / 2.0); + if (c) + c->push_back((this->values()[i] - this->values()[i - 1]) / h - + (this->curvatures()[i] / 6.0 + this->curvatures()[i - 1] / 3.0) * h); + if (d) + d->push_back(this->values()[i - 1]); } + break; + } + } + + //! Interpolate the function, using the selected spline style, + //! on the knot points \p knots, with the values \p values, + //! and applying the boundary conditions \p boundary. + //! + //! \param[in] knots The knot points for the spline. + //! \param[in] values The values of the function at \p knots. + //! \param[in] boundary Selects the boundary condition to use + //! for the interpolation. See EBoundaryCondition for more + //! details on this argument. + //! \warning \p knots must be ordered increasing. + //! \warning There must be a one-to-one correspondence between + //! \p knots and \p values. + //! \note If \p knots contain duplicates the standard spline + //! is ill posed. This implementation removes duplicates and + //! sets the function value to the mean of function values + //! over the duplicates. + bool interpolate(const TDoubleVec& knots, const TDoubleVec& values, EBoundaryCondition boundary) { + if (knots.size() < 2) { + LOG_ERROR("Insufficient knot points supplied"); + return false; + } + if (knots.size() != values.size()) { + LOG_ERROR("Number knots not equal to number of values: " + << " knots = " << core::CContainerPrinter::print(knots) << " values = " << core::CContainerPrinter::print(values)); + return false; + } - TNonConstKnots oldKnots; - TNonConstValues oldValues; - oldKnots.swap(this->knotsRef()); - oldValues.swap(this->valuesRef()); - this->knotsRef().assign(knots.begin(), knots.end()); - this->valuesRef().assign(values.begin(), values.end()); - - // If two knots are equal to working precision then we - // de-duplicate and use the average of the values at the - // duplicate knots. The exit condition from this loop is - // rather subtle and ensures that "last" always points - // to the last element in the reduced knot set. - std::size_t last = std::numeric_limits::max(); - std::size_t n = this->knots().size(); - for (std::size_t i = 1u; i <= n; ++i) - { - std::size_t i_ = i-1; - double knot = this->knots()[i_]; - for (/**/; i < n && this->knots()[i] == knot; ++i) - { - } - if (i - i_ > 1) - { - TMeanAccumulator value; - for (std::size_t j = i_; j < i; ++j) - { - value.add(this->values()[j]); - } - this->valuesRef()[i_] = CBasicStatistics::mean(value); - } - if (++last != i_) - { - this->knotsRef()[last] = this->knots()[i_]; - this->valuesRef()[last] = this->values()[i_]; + TNonConstKnots oldKnots; + TNonConstValues oldValues; + oldKnots.swap(this->knotsRef()); + oldValues.swap(this->valuesRef()); + this->knotsRef().assign(knots.begin(), knots.end()); + this->valuesRef().assign(values.begin(), values.end()); + + // If two knots are equal to working precision then we + // de-duplicate and use the average of the values at the + // duplicate knots. The exit condition from this loop is + // rather subtle and ensures that "last" always points + // to the last element in the reduced knot set. + std::size_t last = std::numeric_limits::max(); + std::size_t n = this->knots().size(); + for (std::size_t i = 1u; i <= n; ++i) { + std::size_t i_ = i - 1; + double knot = this->knots()[i_]; + for (/**/; i < n && this->knots()[i] == knot; ++i) { + } + if (i - i_ > 1) { + TMeanAccumulator value; + for (std::size_t j = i_; j < i; ++j) { + value.add(this->values()[j]); } + this->valuesRef()[i_] = CBasicStatistics::mean(value); } - this->knotsRef().erase(this->knotsRef().begin() + last + 1, this->knotsRef().end()); - this->valuesRef().erase(this->valuesRef().begin() + last + 1, this->valuesRef().end()); - n = this->knots().size(); - LOG_TRACE("knots = " << core::CContainerPrinter::print(this->knots())); - LOG_TRACE("values = " << core::CContainerPrinter::print(this->values())); - - if (this->knots().size() < 2) - { - LOG_ERROR("Insufficient distinct knot points supplied"); - this->knotsRef().swap(oldKnots); - this->valuesRef().swap(oldValues); - return false; + if (++last != i_) { + this->knotsRef()[last] = this->knots()[i_]; + this->valuesRef()[last] = this->values()[i_]; } + } + this->knotsRef().erase(this->knotsRef().begin() + last + 1, this->knotsRef().end()); + this->valuesRef().erase(this->valuesRef().begin() + last + 1, this->valuesRef().end()); + n = this->knots().size(); + LOG_TRACE("knots = " << core::CContainerPrinter::print(this->knots())); + LOG_TRACE("values = " << core::CContainerPrinter::print(this->values())); + + if (this->knots().size() < 2) { + LOG_ERROR("Insufficient distinct knot points supplied"); + this->knotsRef().swap(oldKnots); + this->valuesRef().swap(oldValues); + return false; + } - switch (m_Type) - { - case E_Linear: - // Curvatures are all zero and we don't bother to store them. + switch (m_Type) { + case E_Linear: + // Curvatures are all zero and we don't bother to store them. + break; + + case E_Cubic: { + this->curvaturesRef().clear(); + this->curvaturesRef().reserve(n); + + // Construct the diagonals: a is the subdiagonal, b is the + // main diagonal and c is the superdiagonal. + + TDoubleVec a; + TDoubleVec b; + TDoubleVec c; + a.reserve(n - 1); + b.reserve(n); + c.reserve(n - 1); + + double h = this->knots()[1] - this->knots()[0]; + double h_ = this->knots()[n - 1] - this->knots()[n - 2]; + + switch (boundary) { + case E_Natural: + b.push_back(1.0); + c.push_back(0.0); + this->curvaturesRef().push_back(0.0); break; - case E_Cubic: - { - this->curvaturesRef().clear(); - this->curvaturesRef().reserve(n); - - // Construct the diagonals: a is the subdiagonal, b is the - // main diagonal and c is the superdiagonal. - - TDoubleVec a; - TDoubleVec b; - TDoubleVec c; - a.reserve(n - 1); - b.reserve(n); - c.reserve(n - 1); - - double h = this->knots()[1] - this->knots()[0]; - double h_ = this->knots()[n-1] - this->knots()[n-2]; - - switch (boundary) - { - case E_Natural: - b.push_back(1.0); - c.push_back(0.0); - this->curvaturesRef().push_back(0.0); - break; - - case E_ParabolicRunout: - b.push_back( 1.0); - c.push_back(-1.0); - this->curvaturesRef().push_back(0.0); - break; - - case E_Periodic: - b.push_back(2.0 * (h + h_)); - c.push_back(h - 1.0); - this->curvaturesRef().push_back(6.0 * ( (this->values()[1] - this->values()[0]) / h - - (this->values()[0] - this->values()[n-2]) / h_)); - break; - } - - for (std::size_t i = 1u; i + 1 < n; ++i) - { - h_ = h; - h = this->knots()[i+1] - this->knots()[i]; - a.push_back(h_); - b.push_back(2.0 * (h + h_)); - c.push_back(h); - this->curvaturesRef().push_back(6.0 * ( (this->values()[i+1] - this->values()[i]) / h - - (this->values()[i] - this->values()[i-1]) / h_)); - } + case E_ParabolicRunout: + b.push_back(1.0); + c.push_back(-1.0); + this->curvaturesRef().push_back(0.0); + break; - h_ = h; - h = this->knots()[1] - this->knots()[0]; - - switch (boundary) - { - case E_Natural: - a.push_back(0.0); - b.push_back(1.0); - this->curvaturesRef().push_back(0.0); - if (!spline_detail::solveTridiagonal(a, b, c, this->curvaturesRef())) - { - LOG_ERROR("Failed to calculate curvatures"); - return false; - } - break; - - case E_ParabolicRunout: - a.push_back(-1.0); - b.push_back( 1.0); - this->curvaturesRef().push_back(0.0); - if (!spline_detail::solveTridiagonal(a, b, c, this->curvaturesRef())) - { - LOG_ERROR("Failed to calculate curvatures"); - return false; - } - break; - - case E_Periodic: - { - a.push_back(h_ * (1.0 - h)); - b.push_back(2.0 * (h + h_)); - TDoubleVec u(n, 0.0); - u[0] = 1.0; - u[n-1] = h; - TDoubleVec v(n, 0.0); - v[1] = 1.0; - v[n-2] = h_; - this->curvaturesRef().push_back(6.0 * ( (this->values()[1] - this->values()[n-1]) / h - - (this->values()[n-1] - this->values()[n-2]) / h_)); - if (!spline_detail::solvePeturbedTridiagonal(a, b, c, u, v, this->curvaturesRef())) - { - LOG_ERROR("Failed to calculate curvatures"); - return false; - } - } - break; - } + case E_Periodic: + b.push_back(2.0 * (h + h_)); + c.push_back(h - 1.0); + this->curvaturesRef().push_back( + 6.0 * ((this->values()[1] - this->values()[0]) / h - (this->values()[0] - this->values()[n - 2]) / h_)); break; } - } - return true; - } - - //! Get a human readable description of this spline. - //! - //! \param[in] indent The indent to use at the start of new lines. - //! \param[in,out] result Filled in with the description. - void describe(const std::string &indent, std::string &result) const - { - result += "\n" + indent + "cubic spline"; - if (!this->initialized()) - { - result += " zero everywhere"; - return; + for (std::size_t i = 1u; i + 1 < n; ++i) { + h_ = h; + h = this->knots()[i + 1] - this->knots()[i]; + a.push_back(h_); + b.push_back(2.0 * (h + h_)); + c.push_back(h); + this->curvaturesRef().push_back( + 6.0 * ((this->values()[i + 1] - this->values()[i]) / h - (this->values()[i] - this->values()[i - 1]) / h_)); } - result += ":"; - switch (m_Type) - { - case E_Linear: - for (std::size_t i = 1u; i < this->knots().size(); ++i) - { - double h = this->knots()[i] - this->knots()[i-1]; - double c = (this->values()[i] - this->values()[i-1]) / h; - double d = this->values()[i-1]; - std::string kl = core::CStringUtils::typeToStringPretty(this->knots()[i-1]); - result += "\n" + indent + core::CStringUtils::typeToStringPretty(c) + " (x - " + kl + ") + " - + core::CStringUtils::typeToStringPretty(d) - + " x in [" + kl + "," + core::CStringUtils::typeToStringPretty(this->knots()[i]) + ")"; + h_ = h; + h = this->knots()[1] - this->knots()[0]; + + switch (boundary) { + case E_Natural: + a.push_back(0.0); + b.push_back(1.0); + this->curvaturesRef().push_back(0.0); + if (!spline_detail::solveTridiagonal(a, b, c, this->curvaturesRef())) { + LOG_ERROR("Failed to calculate curvatures"); + return false; } break; - case E_Cubic: - for (std::size_t i = 1u; i < this->knots().size(); ++i) - { - double h = this->knots()[i] - this->knots()[i-1]; - double a = (this->curvatures()[i] - this->curvatures()[i-1]) / 6.0 / h; - double b = this->curvatures()[i-1] / 2.0; - double c = (this->values()[i] - this->values()[i-1]) / h - - (this->curvatures()[i] / 6.0 + this->curvatures()[i-1] / 3.0) * h; - double d = this->values()[i-1]; - std::string kl = core::CStringUtils::typeToStringPretty(this->knots()[i-1]); - result += "\n" + indent + core::CStringUtils::typeToStringPretty(a) + " (x - " + kl + ")^3 + " - + core::CStringUtils::typeToStringPretty(b) + " (x - " + kl + ")^2 + " - + core::CStringUtils::typeToStringPretty(c) + " (x - " + kl + ") + " - + core::CStringUtils::typeToStringPretty(d) - + " x in [" + kl + "," + core::CStringUtils::typeToStringPretty(this->knots()[i]) + ")"; + case E_ParabolicRunout: + a.push_back(-1.0); + b.push_back(1.0); + this->curvaturesRef().push_back(0.0); + if (!spline_detail::solveTridiagonal(a, b, c, this->curvaturesRef())) { + LOG_ERROR("Failed to calculate curvatures"); + return false; } break; - } - } - - //! Get a checksum for this object. - uint64_t checksum(uint64_t seed = 0) const - { - seed = CChecksum::calculate(seed, m_Type); - seed = CChecksum::calculate(seed, m_Knots); - seed = CChecksum::calculate(seed, m_Values); - return CChecksum::calculate(seed, m_Curvatures); - } - - //! Get the memory used by this component - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const - { - mem->setName("CSpline"); - core::CMemoryDebug::dynamicSize("m_Knots", m_Knots, mem); - core::CMemoryDebug::dynamicSize("m_Values", m_Values, mem); - core::CMemoryDebug::dynamicSize("m_Curvatures", m_Curvatures, mem); - } - - //! Get the memory used by this component - std::size_t memoryUsage() const - { - std::size_t mem = core::CMemory::dynamicSize(m_Knots); - mem += core::CMemory::dynamicSize(m_Values); - mem += core::CMemory::dynamicSize(m_Curvatures); - return mem; - } - - //! Get the knot points of the spline. - inline const TNonConstKnots &knots() const - { - return boost::unwrap_ref(m_Knots); - } - //! Get the values at the knot points of the spline. - inline const TNonConstValues &values() const - { - return boost::unwrap_ref(m_Values); - } - - //! Get the curvatures at the knot points of the spline. - inline const TNonConstCurvatures &curvatures() const - { - return boost::unwrap_ref(m_Curvatures); + case E_Periodic: { + a.push_back(h_ * (1.0 - h)); + b.push_back(2.0 * (h + h_)); + TDoubleVec u(n, 0.0); + u[0] = 1.0; + u[n - 1] = h; + TDoubleVec v(n, 0.0); + v[1] = 1.0; + v[n - 2] = h_; + this->curvaturesRef().push_back( + 6.0 * ((this->values()[1] - this->values()[n - 1]) / h - (this->values()[n - 1] - this->values()[n - 2]) / h_)); + if (!spline_detail::solvePeturbedTridiagonal(a, b, c, u, v, this->curvaturesRef())) { + LOG_ERROR("Failed to calculate curvatures"); + return false; + } + } break; + } + break; } - - private: - //! Get the knot points of the spline. - inline TKnots &knotsRef() - { - return boost::unwrap_ref(m_Knots); } - //! Get the values at the knot points of the spline. - inline TNonConstValues &valuesRef() - { - return boost::unwrap_ref(m_Values); + return true; + } + + //! Get a human readable description of this spline. + //! + //! \param[in] indent The indent to use at the start of new lines. + //! \param[in,out] result Filled in with the description. + void describe(const std::string& indent, std::string& result) const { + result += "\n" + indent + "cubic spline"; + if (!this->initialized()) { + result += " zero everywhere"; + return; } - //! Get the curvatures at the knot points of the spline. - inline TCurvatures &curvaturesRef() - { - return boost::unwrap_ref(m_Curvatures); + result += ":"; + switch (m_Type) { + case E_Linear: + for (std::size_t i = 1u; i < this->knots().size(); ++i) { + double h = this->knots()[i] - this->knots()[i - 1]; + double c = (this->values()[i] - this->values()[i - 1]) / h; + double d = this->values()[i - 1]; + std::string kl = core::CStringUtils::typeToStringPretty(this->knots()[i - 1]); + result += "\n" + indent + core::CStringUtils::typeToStringPretty(c) + " (x - " + kl + ") + " + + core::CStringUtils::typeToStringPretty(d) + " x in [" + kl + "," + + core::CStringUtils::typeToStringPretty(this->knots()[i]) + ")"; + } + break; + + case E_Cubic: + for (std::size_t i = 1u; i < this->knots().size(); ++i) { + double h = this->knots()[i] - this->knots()[i - 1]; + double a = (this->curvatures()[i] - this->curvatures()[i - 1]) / 6.0 / h; + double b = this->curvatures()[i - 1] / 2.0; + double c = + (this->values()[i] - this->values()[i - 1]) / h - (this->curvatures()[i] / 6.0 + this->curvatures()[i - 1] / 3.0) * h; + double d = this->values()[i - 1]; + std::string kl = core::CStringUtils::typeToStringPretty(this->knots()[i - 1]); + result += "\n" + indent + core::CStringUtils::typeToStringPretty(a) + " (x - " + kl + ")^3 + " + + core::CStringUtils::typeToStringPretty(b) + " (x - " + kl + ")^2 + " + core::CStringUtils::typeToStringPretty(c) + + " (x - " + kl + ") + " + core::CStringUtils::typeToStringPretty(d) + " x in [" + kl + "," + + core::CStringUtils::typeToStringPretty(this->knots()[i]) + ")"; + } + break; } - - private: - //! The type of spline. - EType m_Type; - //! The spline knot points. - KNOTS m_Knots; - //! The spline values at the knot points. - VALUES m_Values; - //! The spline curvatures at the knot points. - CURVATURES m_Curvatures; + } + + //! Get a checksum for this object. + uint64_t checksum(uint64_t seed = 0) const { + seed = CChecksum::calculate(seed, m_Type); + seed = CChecksum::calculate(seed, m_Knots); + seed = CChecksum::calculate(seed, m_Values); + return CChecksum::calculate(seed, m_Curvatures); + } + + //! Get the memory used by this component + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { + mem->setName("CSpline"); + core::CMemoryDebug::dynamicSize("m_Knots", m_Knots, mem); + core::CMemoryDebug::dynamicSize("m_Values", m_Values, mem); + core::CMemoryDebug::dynamicSize("m_Curvatures", m_Curvatures, mem); + } + + //! Get the memory used by this component + std::size_t memoryUsage() const { + std::size_t mem = core::CMemory::dynamicSize(m_Knots); + mem += core::CMemory::dynamicSize(m_Values); + mem += core::CMemory::dynamicSize(m_Curvatures); + return mem; + } + + //! Get the knot points of the spline. + inline const TNonConstKnots& knots() const { return boost::unwrap_ref(m_Knots); } + + //! Get the values at the knot points of the spline. + inline const TNonConstValues& values() const { return boost::unwrap_ref(m_Values); } + + //! Get the curvatures at the knot points of the spline. + inline const TNonConstCurvatures& curvatures() const { return boost::unwrap_ref(m_Curvatures); } + +private: + //! Get the knot points of the spline. + inline TKnots& knotsRef() { return boost::unwrap_ref(m_Knots); } + + //! Get the values at the knot points of the spline. + inline TNonConstValues& valuesRef() { return boost::unwrap_ref(m_Values); } + + //! Get the curvatures at the knot points of the spline. + inline TCurvatures& curvaturesRef() { return boost::unwrap_ref(m_Curvatures); } + +private: + //! The type of spline. + EType m_Type; + //! The spline knot points. + KNOTS m_Knots; + //! The spline values at the knot points. + VALUES m_Values; + //! The spline curvatures at the knot points. + CURVATURES m_Curvatures; }; - } } diff --git a/include/maths/CStatisticalTests.h b/include/maths/CStatisticalTests.h index a46989d370..144c6a03d5 100644 --- a/include/maths/CStatisticalTests.h +++ b/include/maths/CStatisticalTests.h @@ -17,128 +17,122 @@ #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace maths -{ +namespace maths { //! \brief A collection of statistical tests and test statistics. -class MATHS_EXPORT CStatisticalTests -{ +class MATHS_EXPORT CStatisticalTests { +public: + using TUInt16Vec = std::vector; + using TDoubleVec = std::vector; + +public: + //! Get the significance of a left tail F-test for \p x when + //! the test statistic has \p d1 and \p d2 degrees of freedom + //! under the null hypothesis. + static double leftTailFTest(double x, double d1, double d2); + + //! Get the significance of a right tail F-test for \p x when + //! the test statistic has \p d1 and \p d2 degrees of freedom + //! under the null hypothesis. + static double rightTailFTest(double x, double d1, double d2); + + //! A two sample Kolmogorov-Smirnov test. + //! + //! This computes the test significance for rejecting the + //! null hypothesis that \p x and \p y are samples from the + //! same distribution. The smaller the significance the + //! more likely that \p x and \p y come from different + //! distributions. + //! + //! \note This is based on the implementation in Numerical + //! Recipes in C. + static double twoSampleKS(TDoubleVec x, TDoubleVec y); + + //! \brief Implements the Cramer-von Mises criterion. + //! + //! DESCRIPTION:\n + //! The Cramer-von Mises test is a non-parameteric goodness + //! of fit test for the values of random variable compared + //! to some estimated probability density function. In + //! particular, the statistic is: + //!
+    //!   \f$\displaystyle T = n\omega^2 = \frac{1}{12n}+\sum_{i=1}^n{\frac{2i-1}{2n}-F(x_{(i)})}\f$
+    //! 
+ //! Here, \f$x_{(i)}\f$ are the \f$n\f$ order statistics of + //! a collection of \f$n\f$ samples of the random variable + //! under test and \f$F(.)\f$ is the estimated cumulative + //! density function. Under the null hypothesis, that the + //! random variable is distributed according to \f$F(.)\f$, + //! the distribution of the values of \f$T\f$ are independent + //! of the form of the distribution function and can be + //! tabulated for different p-values and sample counts. For + //! large count the values are approximately independent of + //! count. We tabulate the values and interpolate the table. + //! + //! \see http://en.wikipedia.org/wiki/Cram%C3%A9r%E2%80%93von_Mises_criterion + //! for more information on this test statistic. + class MATHS_EXPORT CCramerVonMises { public: - using TUInt16Vec = std::vector; - using TDoubleVec = std::vector; + //! Enumeration of the p values for which the test + //! statistic value is tabulated. + static const double P_VALUES[16]; + //! Enumeration of the count, of the values used in + //! the test statistic, for which the value of the + //! test statistic is tabulated. + static const std::size_t N[13]; + //! The tabulated values of the test statistic for + //! specific p-values and counts. + static const double T_VALUES[13][16]; public: - //! Get the significance of a left tail F-test for \p x when - //! the test statistic has \p d1 and \p d2 degrees of freedom - //! under the null hypothesis. - static double leftTailFTest(double x, double d1, double d2); - - //! Get the significance of a right tail F-test for \p x when - //! the test statistic has \p d1 and \p d2 degrees of freedom - //! under the null hypothesis. - static double rightTailFTest(double x, double d1, double d2); - - //! A two sample Kolmogorov-Smirnov test. - //! - //! This computes the test significance for rejecting the - //! null hypothesis that \p x and \p y are samples from the - //! same distribution. The smaller the significance the - //! more likely that \p x and \p y come from different - //! distributions. - //! - //! \note This is based on the implementation in Numerical - //! Recipes in C. - static double twoSampleKS(TDoubleVec x, TDoubleVec y); - - //! \brief Implements the Cramer-von Mises criterion. - //! - //! DESCRIPTION:\n - //! The Cramer-von Mises test is a non-parameteric goodness - //! of fit test for the values of random variable compared - //! to some estimated probability density function. In - //! particular, the statistic is: - //!
-        //!   \f$\displaystyle T = n\omega^2 = \frac{1}{12n}+\sum_{i=1}^n{\frac{2i-1}{2n}-F(x_{(i)})}\f$
-        //! 
- //! Here, \f$x_{(i)}\f$ are the \f$n\f$ order statistics of - //! a collection of \f$n\f$ samples of the random variable - //! under test and \f$F(.)\f$ is the estimated cumulative - //! density function. Under the null hypothesis, that the - //! random variable is distributed according to \f$F(.)\f$, - //! the distribution of the values of \f$T\f$ are independent - //! of the form of the distribution function and can be - //! tabulated for different p-values and sample counts. For - //! large count the values are approximately independent of - //! count. We tabulate the values and interpolate the table. - //! - //! \see http://en.wikipedia.org/wiki/Cram%C3%A9r%E2%80%93von_Mises_criterion - //! for more information on this test statistic. - class MATHS_EXPORT CCramerVonMises - { - public: - //! Enumeration of the p values for which the test - //! statistic value is tabulated. - static const double P_VALUES[16]; - //! Enumeration of the count, of the values used in - //! the test statistic, for which the value of the - //! test statistic is tabulated. - static const std::size_t N[13]; - //! The tabulated values of the test statistic for - //! specific p-values and counts. - static const double T_VALUES[13][16]; - - public: - CCramerVonMises(std::size_t size); - - //! Create by traversing a state document. - CCramerVonMises(core::CStateRestoreTraverser &traverser); - - //! Persist state by passing information to the supplied inserter - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - - //! Add a value of the cumulative density function. - void addF(double f); - - //! Get the mean test p-value for the observations - //! to date. - double pValue() const; - - //! Age out old p-values of the test. - void age(double factor); - - //! Get a checksum for this object. - uint64_t checksum(uint64_t seed = 0) const; - - private: - //! Create by traversing a state document. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); - - private: - //! The scale used to convert doubles in the interval - //! [0,1] to 16 bit integers. - static const double SCALE; - - private: - using TMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; - - private: - //! The "count - 1" in the test statistic. - std::size_t m_Size; - //! The mean value of the test statistic. - TMeanAccumulator m_T; - //! The current values in the test statistic scaled - //! and converted to 16 bit integers. - TUInt16Vec m_F; - }; -}; + CCramerVonMises(std::size_t size); + + //! Create by traversing a state document. + CCramerVonMises(core::CStateRestoreTraverser& traverser); + + //! Persist state by passing information to the supplied inserter + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Add a value of the cumulative density function. + void addF(double f); + + //! Get the mean test p-value for the observations + //! to date. + double pValue() const; + + //! Age out old p-values of the test. + void age(double factor); + //! Get a checksum for this object. + uint64_t checksum(uint64_t seed = 0) const; + + private: + //! Create by traversing a state document. + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); + + private: + //! The scale used to convert doubles in the interval + //! [0,1] to 16 bit integers. + static const double SCALE; + + private: + using TMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; + + private: + //! The "count - 1" in the test statistic. + std::size_t m_Size; + //! The mean value of the test statistic. + TMeanAccumulator m_T; + //! The current values in the test statistic scaled + //! and converted to 16 bit integers. + TUInt16Vec m_F; + }; +}; } } #endif // INCLUDED_ml_maths_CStatisticalTests_h diff --git a/include/maths/CTimeSeriesChangeDetector.h b/include/maths/CTimeSeriesChangeDetector.h index 873ea1cd72..ab4b5f70a0 100644 --- a/include/maths/CTimeSeriesChangeDetector.h +++ b/include/maths/CTimeSeriesChangeDetector.h @@ -8,10 +8,10 @@ #define INCLUDED_ml_maths_CTimeSeriesChangeDetector_h #include -#include -#include #include #include +#include +#include #include #include @@ -20,43 +20,31 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace maths -{ +namespace maths { class CModelAddSamplesParams; class CPrior; class CTimeSeriesDecompositionInterface; struct SDistributionRestoreParams; struct SModelRestoreParams; -namespace time_series_change_detector_detail -{ +namespace time_series_change_detector_detail { class CUnivariateChangeModel; } //! \brief A description of a time series change. -struct MATHS_EXPORT SChangeDescription -{ +struct MATHS_EXPORT SChangeDescription { using TDouble2Vec = core::CSmallVector; using TPriorPtr = boost::shared_ptr; //! The types of change we can detect. - enum EDescription - { - E_LevelShift, - E_LinearScale, - E_TimeShift - }; + enum EDescription { E_LevelShift, E_LinearScale, E_TimeShift }; - SChangeDescription(EDescription decription, - double value, - const TPriorPtr &residualModel); + SChangeDescription(EDescription decription, double value, const TPriorPtr& residualModel); //! Get a description of this change. std::string print() const; @@ -74,371 +62,334 @@ struct MATHS_EXPORT SChangeDescription //! \brief Tests a variety of possible changes which might have //! occurred in a time series and selects one if it provides a //! good explanation of the recent behaviour. -class MATHS_EXPORT CUnivariateTimeSeriesChangeDetector -{ - public: - using TDouble4Vec = core::CSmallVector; - using TDouble4Vec1Vec = core::CSmallVector; - using TTimeDoublePr = std::pair; - using TTimeDoublePr1Vec = core::CSmallVector; - using TWeightStyleVec = maths_t::TWeightStyleVec; - using TDecompositionPtr = boost::shared_ptr; - using TPriorPtr = boost::shared_ptr; - using TOptionalChangeDescription = boost::optional; - - public: - CUnivariateTimeSeriesChangeDetector(const TDecompositionPtr &trendModel, - const TPriorPtr &residualModel, - core_t::TTime minimumTimeToDetect = 12 * core::constants::HOUR, - core_t::TTime maximumTimeToDetect = core::constants::DAY, - double minimumDeltaBicToDetect = 14.0); - - //! Initialize by reading state from \p traverser. - bool acceptRestoreTraverser(const SModelRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser); - - //! Persist state by passing information to \p inserter. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - - //! Check if there has been a change and get a description - //! if there has been. - TOptionalChangeDescription change(); - - //! The function used to decide whether to accept a change. - //! A change is accepted at a value of 1.0 for this function. - //! - //! \param[out] change Filled in with the index of the change - //! the most likely change. - double decisionFunction(std::size_t &change) const; - - //! Add \p samples to the change detector. - void addSamples(const TWeightStyleVec &weightStyles, - const TTimeDoublePr1Vec &samples, - const TDouble4Vec1Vec &weights); - - //! Check if we should stop testing. - bool stopTesting() const; - - //! Debug the memory used by this object. - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - - //! Get the memory used by this object. - std::size_t memoryUsage() const; - - //! Get a checksum for this object. - uint64_t checksum(uint64_t seed = 0) const; - - private: - using TChangeModel = time_series_change_detector_detail::CUnivariateChangeModel; - using TChangeModelPtr = boost::shared_ptr; - using TChangeModelPtr5Vec = core::CSmallVector; - using TMinMaxAccumulator = CBasicStatistics::CMinMax; - - private: - //! The minimum amount of time we need to observe before - //! selecting a change model. - core_t::TTime m_MinimumTimeToDetect; - - //! The maximum amount of time to try to detect a change. - core_t::TTime m_MaximumTimeToDetect; - - //! The minimum increase in BIC select a change model. - double m_MinimumDeltaBicToDetect; - - //! The start and end of the change model. - TMinMaxAccumulator m_TimeRange; - - //! The count of samples added to the change models. - std::size_t m_SampleCount; - - //! The current evidence of a change. - double m_CurrentEvidenceOfChange; - - //! The change models. - TChangeModelPtr5Vec m_ChangeModels; +class MATHS_EXPORT CUnivariateTimeSeriesChangeDetector { +public: + using TDouble4Vec = core::CSmallVector; + using TDouble4Vec1Vec = core::CSmallVector; + using TTimeDoublePr = std::pair; + using TTimeDoublePr1Vec = core::CSmallVector; + using TWeightStyleVec = maths_t::TWeightStyleVec; + using TDecompositionPtr = boost::shared_ptr; + using TPriorPtr = boost::shared_ptr; + using TOptionalChangeDescription = boost::optional; + +public: + CUnivariateTimeSeriesChangeDetector(const TDecompositionPtr& trendModel, + const TPriorPtr& residualModel, + core_t::TTime minimumTimeToDetect = 12 * core::constants::HOUR, + core_t::TTime maximumTimeToDetect = core::constants::DAY, + double minimumDeltaBicToDetect = 14.0); + + //! Initialize by reading state from \p traverser. + bool acceptRestoreTraverser(const SModelRestoreParams& params, core::CStateRestoreTraverser& traverser); + + //! Persist state by passing information to \p inserter. + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Check if there has been a change and get a description + //! if there has been. + TOptionalChangeDescription change(); + + //! The function used to decide whether to accept a change. + //! A change is accepted at a value of 1.0 for this function. + //! + //! \param[out] change Filled in with the index of the change + //! the most likely change. + double decisionFunction(std::size_t& change) const; + + //! Add \p samples to the change detector. + void addSamples(const TWeightStyleVec& weightStyles, const TTimeDoublePr1Vec& samples, const TDouble4Vec1Vec& weights); + + //! Check if we should stop testing. + bool stopTesting() const; + + //! Debug the memory used by this object. + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + + //! Get the memory used by this object. + std::size_t memoryUsage() const; + + //! Get a checksum for this object. + uint64_t checksum(uint64_t seed = 0) const; + +private: + using TChangeModel = time_series_change_detector_detail::CUnivariateChangeModel; + using TChangeModelPtr = boost::shared_ptr; + using TChangeModelPtr5Vec = core::CSmallVector; + using TMinMaxAccumulator = CBasicStatistics::CMinMax; + +private: + //! The minimum amount of time we need to observe before + //! selecting a change model. + core_t::TTime m_MinimumTimeToDetect; + + //! The maximum amount of time to try to detect a change. + core_t::TTime m_MaximumTimeToDetect; + + //! The minimum increase in BIC select a change model. + double m_MinimumDeltaBicToDetect; + + //! The start and end of the change model. + TMinMaxAccumulator m_TimeRange; + + //! The count of samples added to the change models. + std::size_t m_SampleCount; + + //! The current evidence of a change. + double m_CurrentEvidenceOfChange; + + //! The change models. + TChangeModelPtr5Vec m_ChangeModels; }; -namespace time_series_change_detector_detail -{ +namespace time_series_change_detector_detail { //! \brief Helper interface for change detection. Implementations of //! this are used to model specific types of changes which can occur. -class MATHS_EXPORT CUnivariateChangeModel : private core::CNonCopyable -{ - public: - using TDouble1Vec = core::CSmallVector; - using TDouble4Vec = core::CSmallVector; - using TDouble4Vec1Vec = core::CSmallVector; - using TTimeDoublePr = std::pair; - using TTimeDoublePr1Vec = core::CSmallVector; - using TWeightStyleVec = maths_t::TWeightStyleVec; - using TDecompositionPtr = boost::shared_ptr; - using TPriorPtr = boost::shared_ptr; - using TOptionalChangeDescription = boost::optional; - - public: - CUnivariateChangeModel(const TDecompositionPtr &trendModel, - const TPriorPtr &residualModel); - virtual ~CUnivariateChangeModel() = default; - - //! Initialize by reading state from \p traverser. - virtual bool acceptRestoreTraverser(const SModelRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser) = 0; - - //! Persist state by passing information to \p inserter. - virtual void acceptPersistInserter(core::CStatePersistInserter &inserter) const = 0; - - //! The BIC of applying the change. - virtual double bic() const = 0; - - //! The expected BIC of applying the change. - virtual double expectedBic() const = 0; - - //! Get a description of the change. - virtual TOptionalChangeDescription change() const = 0; - - //! Update the change model with \p samples. - virtual void addSamples(const std::size_t count, - TWeightStyleVec weightStyles, - const TTimeDoublePr1Vec &samples, - TDouble4Vec1Vec weights) = 0; - - //! Debug the memory used by this object. - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - - //! Get the memory used by this object. - std::size_t memoryUsage() const; - - //! Get the static size of this object. - virtual std::size_t staticSize() const = 0; - - //! Get a checksum for this object. - virtual uint64_t checksum(uint64_t seed) const = 0; - - protected: - //! Restore the residual model reading state from \p traverser. - bool restoreResidualModel(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser); - - //! Get the log-likelihood. - double logLikelihood() const; - - //! Get the expected log-likelihood. - double expectedLogLikelihood() const; - - //! Update the log-likelihood with \p samples. - void updateLogLikelihood(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights); - - //! Update the expected log-likelihoods. - void updateExpectedLogLikelihood(const TWeightStyleVec &weightStyles, - const TDouble4Vec1Vec &weights); - - //! Get the time series trend model. - const CTimeSeriesDecompositionInterface &trendModel() const; - - //! Get the time series residual model. - const CPrior &residualModel() const; - //! Get the time series residual model. - CPrior &residualModel(); - //! Get the time series residual model member variable. - TPriorPtr residualModelPtr() const; - - private: - //! The likelihood of the data under this model. - double m_LogLikelihood; - - //! The expected log-likelihood of the data under this model. - double m_ExpectedLogLikelihood; - - //! A model decomposing the time series trend. - TDecompositionPtr m_TrendModel; - - //! A reference to the underlying prior. - TPriorPtr m_ResidualModel; +class MATHS_EXPORT CUnivariateChangeModel : private core::CNonCopyable { +public: + using TDouble1Vec = core::CSmallVector; + using TDouble4Vec = core::CSmallVector; + using TDouble4Vec1Vec = core::CSmallVector; + using TTimeDoublePr = std::pair; + using TTimeDoublePr1Vec = core::CSmallVector; + using TWeightStyleVec = maths_t::TWeightStyleVec; + using TDecompositionPtr = boost::shared_ptr; + using TPriorPtr = boost::shared_ptr; + using TOptionalChangeDescription = boost::optional; + +public: + CUnivariateChangeModel(const TDecompositionPtr& trendModel, const TPriorPtr& residualModel); + virtual ~CUnivariateChangeModel() = default; + + //! Initialize by reading state from \p traverser. + virtual bool acceptRestoreTraverser(const SModelRestoreParams& params, core::CStateRestoreTraverser& traverser) = 0; + + //! Persist state by passing information to \p inserter. + virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const = 0; + + //! The BIC of applying the change. + virtual double bic() const = 0; + + //! The expected BIC of applying the change. + virtual double expectedBic() const = 0; + + //! Get a description of the change. + virtual TOptionalChangeDescription change() const = 0; + + //! Update the change model with \p samples. + virtual void + addSamples(const std::size_t count, TWeightStyleVec weightStyles, const TTimeDoublePr1Vec& samples, TDouble4Vec1Vec weights) = 0; + + //! Debug the memory used by this object. + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + + //! Get the memory used by this object. + std::size_t memoryUsage() const; + + //! Get the static size of this object. + virtual std::size_t staticSize() const = 0; + + //! Get a checksum for this object. + virtual uint64_t checksum(uint64_t seed) const = 0; + +protected: + //! Restore the residual model reading state from \p traverser. + bool restoreResidualModel(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser); + + //! Get the log-likelihood. + double logLikelihood() const; + + //! Get the expected log-likelihood. + double expectedLogLikelihood() const; + + //! Update the log-likelihood with \p samples. + void updateLogLikelihood(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights); + + //! Update the expected log-likelihoods. + void updateExpectedLogLikelihood(const TWeightStyleVec& weightStyles, const TDouble4Vec1Vec& weights); + + //! Get the time series trend model. + const CTimeSeriesDecompositionInterface& trendModel() const; + + //! Get the time series residual model. + const CPrior& residualModel() const; + //! Get the time series residual model. + CPrior& residualModel(); + //! Get the time series residual model member variable. + TPriorPtr residualModelPtr() const; + +private: + //! The likelihood of the data under this model. + double m_LogLikelihood; + + //! The expected log-likelihood of the data under this model. + double m_ExpectedLogLikelihood; + + //! A model decomposing the time series trend. + TDecompositionPtr m_TrendModel; + + //! A reference to the underlying prior. + TPriorPtr m_ResidualModel; }; //! \brief Used to capture the likelihood of the data given no change. -class MATHS_EXPORT CUnivariateNoChangeModel final : public CUnivariateChangeModel -{ - public: - CUnivariateNoChangeModel(const TDecompositionPtr &trendModel, - const TPriorPtr &residualModel); +class MATHS_EXPORT CUnivariateNoChangeModel final : public CUnivariateChangeModel { +public: + CUnivariateNoChangeModel(const TDecompositionPtr& trendModel, const TPriorPtr& residualModel); - //! Initialize by reading state from \p traverser. - virtual bool acceptRestoreTraverser(const SModelRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser); + //! Initialize by reading state from \p traverser. + virtual bool acceptRestoreTraverser(const SModelRestoreParams& params, core::CStateRestoreTraverser& traverser); - //! Persist state by passing information to \p inserter. - virtual void acceptPersistInserter(core::CStatePersistInserter &inserter) const; + //! Persist state by passing information to \p inserter. + virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const; - //! Returns the no change BIC. - virtual double bic() const; + //! Returns the no change BIC. + virtual double bic() const; - //! The expected BIC of applying the change. - virtual double expectedBic() const; + //! The expected BIC of applying the change. + virtual double expectedBic() const; - //! Returns a null object. - virtual TOptionalChangeDescription change() const; + //! Returns a null object. + virtual TOptionalChangeDescription change() const; - //! Get the log likelihood of \p samples. - virtual void addSamples(const std::size_t count, - TWeightStyleVec weightStyles, - const TTimeDoublePr1Vec &samples, - TDouble4Vec1Vec weights); + //! Get the log likelihood of \p samples. + virtual void + addSamples(const std::size_t count, TWeightStyleVec weightStyles, const TTimeDoublePr1Vec& samples, TDouble4Vec1Vec weights); - //! Get the static size of this object. - virtual std::size_t staticSize() const; + //! Get the static size of this object. + virtual std::size_t staticSize() const; - //! Get a checksum for this object. - virtual uint64_t checksum(uint64_t seed) const; + //! Get a checksum for this object. + virtual uint64_t checksum(uint64_t seed) const; }; //! \brief Captures the likelihood of the data given an arbitrary //! level shift. -class MATHS_EXPORT CUnivariateLevelShiftModel final : public CUnivariateChangeModel -{ - public: - CUnivariateLevelShiftModel(const TDecompositionPtr &trendModel, - const TPriorPtr &residualModel); +class MATHS_EXPORT CUnivariateLevelShiftModel final : public CUnivariateChangeModel { +public: + CUnivariateLevelShiftModel(const TDecompositionPtr& trendModel, const TPriorPtr& residualModel); - //! Initialize by reading state from \p traverser. - virtual bool acceptRestoreTraverser(const SModelRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser); + //! Initialize by reading state from \p traverser. + virtual bool acceptRestoreTraverser(const SModelRestoreParams& params, core::CStateRestoreTraverser& traverser); - //! Persist state by passing information to \p inserter. - virtual void acceptPersistInserter(core::CStatePersistInserter &inserter) const; + //! Persist state by passing information to \p inserter. + virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const; - //! The BIC of applying the level shift. - virtual double bic() const; + //! The BIC of applying the level shift. + virtual double bic() const; - //! The expected BIC of applying the change. - virtual double expectedBic() const; + //! The expected BIC of applying the change. + virtual double expectedBic() const; - //! Get a description of the level shift. - virtual TOptionalChangeDescription change() const; + //! Get a description of the level shift. + virtual TOptionalChangeDescription change() const; - //! Update with \p samples. - virtual void addSamples(const std::size_t count, - TWeightStyleVec weightStyles, - const TTimeDoublePr1Vec &samples, - TDouble4Vec1Vec weights); + //! Update with \p samples. + virtual void + addSamples(const std::size_t count, TWeightStyleVec weightStyles, const TTimeDoublePr1Vec& samples, TDouble4Vec1Vec weights); - //! Get the static size of this object. - virtual std::size_t staticSize() const; + //! Get the static size of this object. + virtual std::size_t staticSize() const; - //! Get a checksum for this object. - virtual uint64_t checksum(uint64_t seed) const; + //! Get a checksum for this object. + virtual uint64_t checksum(uint64_t seed) const; - private: - using TMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; +private: + using TMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; - private: - //! The optimal shift. - TMeanAccumulator m_Shift; +private: + //! The optimal shift. + TMeanAccumulator m_Shift; - //! The mode of the initial residual distribution model. - double m_ResidualModelMode; + //! The mode of the initial residual distribution model. + double m_ResidualModelMode; - //! The number of samples added so far. - double m_SampleCount; + //! The number of samples added so far. + double m_SampleCount; }; //! \brief Captures the likelihood of the data given an arbitrary //! linear scaling. -class MATHS_EXPORT CUnivariateLinearScaleModel final : public CUnivariateChangeModel -{ - public: - CUnivariateLinearScaleModel(const TDecompositionPtr &trendModel, - const TPriorPtr &residualModel); +class MATHS_EXPORT CUnivariateLinearScaleModel final : public CUnivariateChangeModel { +public: + CUnivariateLinearScaleModel(const TDecompositionPtr& trendModel, const TPriorPtr& residualModel); - //! Initialize by reading state from \p traverser. - virtual bool acceptRestoreTraverser(const SModelRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser); + //! Initialize by reading state from \p traverser. + virtual bool acceptRestoreTraverser(const SModelRestoreParams& params, core::CStateRestoreTraverser& traverser); - //! Persist state by passing information to \p inserter. - virtual void acceptPersistInserter(core::CStatePersistInserter &inserter) const; + //! Persist state by passing information to \p inserter. + virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const; - //! The BIC of applying the level shift. - virtual double bic() const; + //! The BIC of applying the level shift. + virtual double bic() const; - //! The expected BIC of applying the change. - virtual double expectedBic() const; + //! The expected BIC of applying the change. + virtual double expectedBic() const; - //! Get a description of the level shift. - virtual TOptionalChangeDescription change() const; + //! Get a description of the level shift. + virtual TOptionalChangeDescription change() const; - //! Update with \p samples. - virtual void addSamples(const std::size_t count, - TWeightStyleVec weightStyles, - const TTimeDoublePr1Vec &samples, - TDouble4Vec1Vec weights); + //! Update with \p samples. + virtual void + addSamples(const std::size_t count, TWeightStyleVec weightStyles, const TTimeDoublePr1Vec& samples, TDouble4Vec1Vec weights); - //! Get the static size of this object. - virtual std::size_t staticSize() const; + //! Get the static size of this object. + virtual std::size_t staticSize() const; - //! Get a checksum for this object. - virtual uint64_t checksum(uint64_t seed) const; + //! Get a checksum for this object. + virtual uint64_t checksum(uint64_t seed) const; - private: - using TMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; +private: + using TMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; - private: - //! The optimal shift. - TMeanAccumulator m_Scale; +private: + //! The optimal shift. + TMeanAccumulator m_Scale; - //! The mode of the initial residual distribution model. - double m_ResidualModelMode; + //! The mode of the initial residual distribution model. + double m_ResidualModelMode; - //! The number of samples added so far. - double m_SampleCount; + //! The number of samples added so far. + double m_SampleCount; }; //! \brief Captures the likelihood of the data given a specified //! time shift. -class MATHS_EXPORT CUnivariateTimeShiftModel final : public CUnivariateChangeModel -{ - public: - CUnivariateTimeShiftModel(const TDecompositionPtr &trendModel, - const TPriorPtr &residualModel, - core_t::TTime shift); +class MATHS_EXPORT CUnivariateTimeShiftModel final : public CUnivariateChangeModel { +public: + CUnivariateTimeShiftModel(const TDecompositionPtr& trendModel, const TPriorPtr& residualModel, core_t::TTime shift); - //! Initialize by reading state from \p traverser. - virtual bool acceptRestoreTraverser(const SModelRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser); + //! Initialize by reading state from \p traverser. + virtual bool acceptRestoreTraverser(const SModelRestoreParams& params, core::CStateRestoreTraverser& traverser); - //! Persist state by passing information to \p inserter. - virtual void acceptPersistInserter(core::CStatePersistInserter &inserter) const; + //! Persist state by passing information to \p inserter. + virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const; - //! The BIC of applying the time shift. - virtual double bic() const; + //! The BIC of applying the time shift. + virtual double bic() const; - //! The expected BIC of applying the change. - virtual double expectedBic() const; + //! The expected BIC of applying the change. + virtual double expectedBic() const; - //! Get a description of the time shift. - virtual TOptionalChangeDescription change() const; + //! Get a description of the time shift. + virtual TOptionalChangeDescription change() const; - //! Update with \p samples. - virtual void addSamples(const std::size_t count, - TWeightStyleVec weightStyles, - const TTimeDoublePr1Vec &samples, - TDouble4Vec1Vec weights); + //! Update with \p samples. + virtual void + addSamples(const std::size_t count, TWeightStyleVec weightStyles, const TTimeDoublePr1Vec& samples, TDouble4Vec1Vec weights); - //! Get the static size of this object. - virtual std::size_t staticSize() const; + //! Get the static size of this object. + virtual std::size_t staticSize() const; - //! Get a checksum for this object. - virtual uint64_t checksum(uint64_t seed) const; + //! Get a checksum for this object. + virtual uint64_t checksum(uint64_t seed) const; - private: - //! The shift in time of the time series trend model. - core_t::TTime m_Shift; +private: + //! The shift in time of the time series trend model. + core_t::TTime m_Shift; }; - } - } } diff --git a/include/maths/CTimeSeriesDecomposition.h b/include/maths/CTimeSeriesDecomposition.h index 9f00cadf6b..502aa280f1 100644 --- a/include/maths/CTimeSeriesDecomposition.h +++ b/include/maths/CTimeSeriesDecomposition.h @@ -7,24 +7,21 @@ #ifndef INCLUDED_ml_maths_CTimeSeriesDecomposition_h #define INCLUDED_ml_maths_CTimeSeriesDecomposition_h -#include #include #include +#include #include #include class CTimeSeriesDecompositionTest; -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace maths -{ +namespace maths { class CPrior; struct STimeSeriesDecompositionRestoreParams; @@ -51,213 +48,194 @@ struct STimeSeriesDecompositionRestoreParams; //! //! By default this assumes the data has one day and one week //! periodicity, i.e. \f${ T_i } = { 86400, 604800 }\f$. -class MATHS_EXPORT CTimeSeriesDecomposition : public CTimeSeriesDecompositionInterface, - private CTimeSeriesDecompositionDetail -{ - public: - using TSizeVec = std::vector; - - public: - //! \param[in] decayRate The rate at which information is lost. - //! \param[in] bucketLength The data bucketing length. - //! \param[in] seasonalComponentSize The number of buckets to - //! use estimate a seasonal component. - explicit CTimeSeriesDecomposition(double decayRate = 0.0, - core_t::TTime bucketLength = 0, - std::size_t seasonalComponentSize = DECOMPOSITION_COMPONENT_SIZE); - - //! Construct from part of a state document. - CTimeSeriesDecomposition(const STimeSeriesDecompositionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser); - - //! Deep copy. - CTimeSeriesDecomposition(const CTimeSeriesDecomposition &other, bool isForForecast = false); - - //! An efficient swap of the state of this and \p other. - void swap(CTimeSeriesDecomposition &other); - - //! Assign this object (using deep copy). - CTimeSeriesDecomposition &operator=(const CTimeSeriesDecomposition &other); - - //! Persist state by passing information to the supplied inserter. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - - //! Clone this decomposition. - virtual CTimeSeriesDecomposition *clone(bool isForForecast = false) const; - - //! Set the data type. - virtual void dataType(maths_t::EDataType dataType); - - //! Set the decay rate. - virtual void decayRate(double decayRate); - - //! Get the decay rate. - virtual double decayRate() const; - - //! Check if the decomposition has any initialized components. - virtual bool initialized() const; - - //! Adds a time series point \f$(t, f(t))\f$. - //! - //! \param[in] time The time of the function point. - //! \param[in] value The function value at \p time. - //! \param[in] weightStyles The styles of \p weights. Both the count - //! and the Winsorisation weight styles have an effect. See also - //! maths_t::ESampleWeightStyle for more details. - //! \param[in] weights The weights of \p value. The smaller - //! the count weight the less influence \p value has on the trend - //! and it's local variance. - //! \return True if number of estimated components changed - //! and false otherwise. - virtual bool addPoint(core_t::TTime time, - double value, - const maths_t::TWeightStyleVec &weightStyles = TWeights::COUNT, - const maths_t::TDouble4Vec &weights = TWeights::UNIT); - - //! Apply \p change at \p time. - //! - //! \param[in] time The time of the change point. - //! \param[in] value The value immediately before the change - //! point. - //! \param[in] change A description of the change to apply. - //! \return True if a new component was detected. - virtual bool applyChange(core_t::TTime time, double value, - const SChangeDescription &change); - - //! Propagate the decomposition forwards to \p time. - virtual void propagateForwardsTo(core_t::TTime time); - - //! Get the mean value of the time series in the vicinity of \p time. - virtual double meanValue(core_t::TTime time) const; - - //! Get the value of the time series at \p time. - //! - //! \param[in] time The time of interest. - //! \param[in] confidence The symmetric confidence interval for the prediction - //! the baseline as a percentage. - //! \param[in] components The components to include in the baseline. - virtual maths_t::TDoubleDoublePr value(core_t::TTime time, - double confidence = 0.0, - int components = E_All, - bool smooth = true) const; - - //! Forecast from \p start to \p end at \p dt intervals. - //! - //! \param[in] startTime The start of the forecast. - //! \param[in] endTime The end of the forecast. - //! \param[in] step The time increment. - //! \param[in] confidence The forecast confidence interval. - //! \param[in] minimumScale The minimum permitted seasonal scale. - //! \param[in] writer Forecast results are passed to this callback. - virtual void forecast(core_t::TTime startTime, - core_t::TTime endTime, - core_t::TTime step, - double confidence, - double minimumScale, - const TWriteForecastResult &writer); - - //! Detrend \p value from the time series being modeled by removing - //! any trend and periodic component at \p time. - virtual double detrend(core_t::TTime time, - double value, - double confidence, - int components = E_All) const; - - //! Get the mean variance of the baseline. - virtual double meanVariance() const; - - //! Compute the variance scale at \p time. - //! - //! \param[in] time The time of interest. - //! \param[in] variance The variance of the distribution - //! to scale. - //! \param[in] confidence The symmetric confidence interval - //! for the variance scale as a percentage. - virtual maths_t::TDoubleDoublePr scale(core_t::TTime time, - double variance, - double confidence, - bool smooth = true) const; - - //! Roll time forwards by \p skipInterval. - virtual void skipTime(core_t::TTime skipInterval); - - //! Get a checksum for this object. - virtual uint64_t checksum(uint64_t seed = 0) const; - - //! Debug the memory used by this object. - virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - - //! Get the memory used by this object. - virtual std::size_t memoryUsage() const; - - //! Get the static size of this object. - virtual std::size_t staticSize() const; - - //! Get the time shift which is being applied. - virtual core_t::TTime timeShift(void) const; - - //! Get the seasonal components. - virtual const maths_t::TSeasonalComponentVec &seasonalComponents() const; - - //! This is the latest time of any point added to this object or - //! the time skipped to. - virtual core_t::TTime lastValueTime() const; - - private: - using TMediatorPtr = boost::shared_ptr; - - private: - //! Set up the communication mediator. - void initializeMediator(); - - //! Create from part of a state document. - bool acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser); - - //! The correction to produce a smooth join between periodic - //! repeats and partitions. - template - maths_t::TDoubleDoublePr smooth(const F &f, - core_t::TTime time, - int components) const; - - //! Check if \p component has been selected. - bool selected(core_t::TTime time, - int components, - const CSeasonalComponent &component) const; - - //! Check if \p components match \p component. - bool matches(int components, const CSeasonalComponent &component) const; - - private: - //! The time over which discontinuities between weekdays - //! and weekends are smoothed out. - static const core_t::TTime SMOOTHING_INTERVAL; - - private: - //! Any time shift to supplied times. - core_t::TTime m_TimeShift; - - //! The time of the latest value added. - core_t::TTime m_LastValueTime; - - //! The time to which the trend has been propagated. - core_t::TTime m_LastPropagationTime; - - //! Handles the communication between the various tests and - //! components. - TMediatorPtr m_Mediator; - - //! The test for seasonal components. - CPeriodicityTest m_PeriodicityTest; - - //! The test for calendar cyclic components. - CCalendarTest m_CalendarCyclicTest; +class MATHS_EXPORT CTimeSeriesDecomposition : public CTimeSeriesDecompositionInterface, private CTimeSeriesDecompositionDetail { +public: + using TSizeVec = std::vector; + +public: + //! \param[in] decayRate The rate at which information is lost. + //! \param[in] bucketLength The data bucketing length. + //! \param[in] seasonalComponentSize The number of buckets to + //! use estimate a seasonal component. + explicit CTimeSeriesDecomposition(double decayRate = 0.0, + core_t::TTime bucketLength = 0, + std::size_t seasonalComponentSize = DECOMPOSITION_COMPONENT_SIZE); + + //! Construct from part of a state document. + CTimeSeriesDecomposition(const STimeSeriesDecompositionRestoreParams& params, core::CStateRestoreTraverser& traverser); + + //! Deep copy. + CTimeSeriesDecomposition(const CTimeSeriesDecomposition& other, bool isForForecast = false); + + //! An efficient swap of the state of this and \p other. + void swap(CTimeSeriesDecomposition& other); + + //! Assign this object (using deep copy). + CTimeSeriesDecomposition& operator=(const CTimeSeriesDecomposition& other); + + //! Persist state by passing information to the supplied inserter. + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Clone this decomposition. + virtual CTimeSeriesDecomposition* clone(bool isForForecast = false) const; + + //! Set the data type. + virtual void dataType(maths_t::EDataType dataType); + + //! Set the decay rate. + virtual void decayRate(double decayRate); + + //! Get the decay rate. + virtual double decayRate() const; + + //! Check if the decomposition has any initialized components. + virtual bool initialized() const; + + //! Adds a time series point \f$(t, f(t))\f$. + //! + //! \param[in] time The time of the function point. + //! \param[in] value The function value at \p time. + //! \param[in] weightStyles The styles of \p weights. Both the count + //! and the Winsorisation weight styles have an effect. See also + //! maths_t::ESampleWeightStyle for more details. + //! \param[in] weights The weights of \p value. The smaller + //! the count weight the less influence \p value has on the trend + //! and it's local variance. + //! \return True if number of estimated components changed + //! and false otherwise. + virtual bool addPoint(core_t::TTime time, + double value, + const maths_t::TWeightStyleVec& weightStyles = TWeights::COUNT, + const maths_t::TDouble4Vec& weights = TWeights::UNIT); + + //! Apply \p change at \p time. + //! + //! \param[in] time The time of the change point. + //! \param[in] value The value immediately before the change + //! point. + //! \param[in] change A description of the change to apply. + //! \return True if a new component was detected. + virtual bool applyChange(core_t::TTime time, double value, const SChangeDescription& change); + + //! Propagate the decomposition forwards to \p time. + virtual void propagateForwardsTo(core_t::TTime time); + + //! Get the mean value of the time series in the vicinity of \p time. + virtual double meanValue(core_t::TTime time) const; + + //! Get the value of the time series at \p time. + //! + //! \param[in] time The time of interest. + //! \param[in] confidence The symmetric confidence interval for the prediction + //! the baseline as a percentage. + //! \param[in] components The components to include in the baseline. + virtual maths_t::TDoubleDoublePr value(core_t::TTime time, double confidence = 0.0, int components = E_All, bool smooth = true) const; + + //! Forecast from \p start to \p end at \p dt intervals. + //! + //! \param[in] startTime The start of the forecast. + //! \param[in] endTime The end of the forecast. + //! \param[in] step The time increment. + //! \param[in] confidence The forecast confidence interval. + //! \param[in] minimumScale The minimum permitted seasonal scale. + //! \param[in] writer Forecast results are passed to this callback. + virtual void forecast(core_t::TTime startTime, + core_t::TTime endTime, + core_t::TTime step, + double confidence, + double minimumScale, + const TWriteForecastResult& writer); + + //! Detrend \p value from the time series being modeled by removing + //! any trend and periodic component at \p time. + virtual double detrend(core_t::TTime time, double value, double confidence, int components = E_All) const; + + //! Get the mean variance of the baseline. + virtual double meanVariance() const; + + //! Compute the variance scale at \p time. + //! + //! \param[in] time The time of interest. + //! \param[in] variance The variance of the distribution + //! to scale. + //! \param[in] confidence The symmetric confidence interval + //! for the variance scale as a percentage. + virtual maths_t::TDoubleDoublePr scale(core_t::TTime time, double variance, double confidence, bool smooth = true) const; + + //! Roll time forwards by \p skipInterval. + virtual void skipTime(core_t::TTime skipInterval); + + //! Get a checksum for this object. + virtual uint64_t checksum(uint64_t seed = 0) const; + + //! Debug the memory used by this object. + virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + + //! Get the memory used by this object. + virtual std::size_t memoryUsage() const; + + //! Get the static size of this object. + virtual std::size_t staticSize() const; + + //! Get the time shift which is being applied. + virtual core_t::TTime timeShift(void) const; + + //! Get the seasonal components. + virtual const maths_t::TSeasonalComponentVec& seasonalComponents() const; + + //! This is the latest time of any point added to this object or + //! the time skipped to. + virtual core_t::TTime lastValueTime() const; + +private: + using TMediatorPtr = boost::shared_ptr; + +private: + //! Set up the communication mediator. + void initializeMediator(); + + //! Create from part of a state document. + bool acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser); + + //! The correction to produce a smooth join between periodic + //! repeats and partitions. + template + maths_t::TDoubleDoublePr smooth(const F& f, core_t::TTime time, int components) const; + + //! Check if \p component has been selected. + bool selected(core_t::TTime time, int components, const CSeasonalComponent& component) const; + + //! Check if \p components match \p component. + bool matches(int components, const CSeasonalComponent& component) const; + +private: + //! The time over which discontinuities between weekdays + //! and weekends are smoothed out. + static const core_t::TTime SMOOTHING_INTERVAL; + +private: + //! Any time shift to supplied times. + core_t::TTime m_TimeShift; - //! The state for modeling the components of the decomposition. - CComponents m_Components; -}; + //! The time of the latest value added. + core_t::TTime m_LastValueTime; + + //! The time to which the trend has been propagated. + core_t::TTime m_LastPropagationTime; + + //! Handles the communication between the various tests and + //! components. + TMediatorPtr m_Mediator; + + //! The test for seasonal components. + CPeriodicityTest m_PeriodicityTest; + //! The test for calendar cyclic components. + CCalendarTest m_CalendarCyclicTest; + + //! The state for modeling the components of the decomposition. + CComponents m_Components; +}; } } diff --git a/include/maths/CTimeSeriesDecompositionDetail.h b/include/maths/CTimeSeriesDecompositionDetail.h index 6c1d84cd8b..cca21467c1 100644 --- a/include/maths/CTimeSeriesDecompositionDetail.h +++ b/include/maths/CTimeSeriesDecompositionDetail.h @@ -7,16 +7,16 @@ #ifndef INCLUDED_ml_maths_CTimeSeriesDecompositionDetail_h #define INCLUDED_ml_maths_CTimeSeriesDecompositionDetail_h -#include #include #include +#include #include +#include #include #include -#include #include -#include +#include #include #include @@ -26,725 +26,682 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { class CExpandingWindow; class CTimeSeriesDecomposition; //! \brief Utilities for computing the decomposition. -class MATHS_EXPORT CTimeSeriesDecompositionDetail -{ +class MATHS_EXPORT CTimeSeriesDecompositionDetail { +public: + using TPredictor = std::function; + using TDoubleVec = std::vector; + using TTimeVec = std::vector; + class CMediator; + + //! \brief The base message passed. + struct MATHS_EXPORT SMessage { + SMessage(core_t::TTime time, core_t::TTime lastTime); + + //! The message time. + core_t::TTime s_Time; + //! The last update time. + core_t::TTime s_LastTime; + }; + + //! \brief The message passed to add a point. + struct MATHS_EXPORT SAddValue : public SMessage, private core::CNonCopyable { + SAddValue(core_t::TTime time, + core_t::TTime lastTime, + double value, + const maths_t::TWeightStyleVec& weightStyles, + const maths_t::TDouble4Vec& weights, + double trend, + double seasonal, + double calendar, + const TPredictor& predictor, + const CPeriodicityHypothesisTestsConfig& periodicityTestConfig); + + //! The value to add. + double s_Value; + //! The styles of the weights. + const maths_t::TWeightStyleVec& s_WeightStyles; + //! The weights of associated with the value. + const maths_t::TDouble4Vec& s_Weights; + //! The trend component prediction at the value's time. + double s_Trend; + //! The seasonal component prediction at the value's time. + double s_Seasonal; + //! The calendar component prediction at the value's time. + double s_Calendar; + //! The predictor for value. + TPredictor s_Predictor; + //! The periodicity test configuration. + CPeriodicityHypothesisTestsConfig s_PeriodicityTestConfig; + }; + + //! \brief The message passed to indicate periodic components have + //! been detected. + struct MATHS_EXPORT SDetectedSeasonal : public SMessage { + SDetectedSeasonal(core_t::TTime time, + core_t::TTime lastTime, + const CPeriodicityHypothesisTestsResult& result, + const CExpandingWindow& window, + const TPredictor& predictor); + + //! The components found. + CPeriodicityHypothesisTestsResult s_Result; + //! The window tested. + const CExpandingWindow& s_Window; + //! The predictor for window values. + TPredictor s_Predictor; + }; + + //! \brief The message passed to indicate calendar components have + //! been detected. + struct MATHS_EXPORT SDetectedCalendar : public SMessage { + SDetectedCalendar(core_t::TTime time, core_t::TTime lastTime, CCalendarFeature feature); + + //! The calendar feature found. + CCalendarFeature s_Feature; + }; + + //! \brief The message passed to indicate new components are being + //! modeled. + struct MATHS_EXPORT SNewComponents : public SMessage { + enum EComponent { E_DiurnalSeasonal, E_GeneralSeasonal, E_CalendarCyclic }; + + SNewComponents(core_t::TTime time, core_t::TTime lastTime, EComponent component); + + //! The type of component. + EComponent s_Component; + }; + + //! \brief The basic interface for one aspect of the modeling of a time + //! series decomposition. + class MATHS_EXPORT CHandler : core::CNonCopyable { public: - using TPredictor = std::function; - using TDoubleVec = std::vector; - using TTimeVec = std::vector; - class CMediator; - - //! \brief The base message passed. - struct MATHS_EXPORT SMessage - { - SMessage(core_t::TTime time, core_t::TTime lastTime); - - //! The message time. - core_t::TTime s_Time; - //! The last update time. - core_t::TTime s_LastTime; - }; + CHandler(); + virtual ~CHandler(); - //! \brief The message passed to add a point. - struct MATHS_EXPORT SAddValue : public SMessage, - private core::CNonCopyable - { - SAddValue(core_t::TTime time, - core_t::TTime lastTime, - double value, - const maths_t::TWeightStyleVec &weightStyles, - const maths_t::TDouble4Vec &weights, - double trend, - double seasonal, - double calendar, - const TPredictor &predictor, - const CPeriodicityHypothesisTestsConfig &periodicityTestConfig); - - //! The value to add. - double s_Value; - //! The styles of the weights. - const maths_t::TWeightStyleVec &s_WeightStyles; - //! The weights of associated with the value. - const maths_t::TDouble4Vec &s_Weights; - //! The trend component prediction at the value's time. - double s_Trend; - //! The seasonal component prediction at the value's time. - double s_Seasonal; - //! The calendar component prediction at the value's time. - double s_Calendar; - //! The predictor for value. - TPredictor s_Predictor; - //! The periodicity test configuration. - CPeriodicityHypothesisTestsConfig s_PeriodicityTestConfig; - }; + //! Add a value. + virtual void handle(const SAddValue& message); - //! \brief The message passed to indicate periodic components have - //! been detected. - struct MATHS_EXPORT SDetectedSeasonal : public SMessage - { - SDetectedSeasonal(core_t::TTime time, - core_t::TTime lastTime, - const CPeriodicityHypothesisTestsResult &result, - const CExpandingWindow &window, - const TPredictor &predictor); - - //! The components found. - CPeriodicityHypothesisTestsResult s_Result; - //! The window tested. - const CExpandingWindow &s_Window; - //! The predictor for window values. - TPredictor s_Predictor; - }; + //! Handle when a diurnal component is detected. + virtual void handle(const SDetectedSeasonal& message); - //! \brief The message passed to indicate calendar components have - //! been detected. - struct MATHS_EXPORT SDetectedCalendar : public SMessage - { - SDetectedCalendar(core_t::TTime time, - core_t::TTime lastTime, - CCalendarFeature feature); + //! Handle when a calendar component is detected. + virtual void handle(const SDetectedCalendar& message); - //! The calendar feature found. - CCalendarFeature s_Feature; - }; + //! Handle when a new component is being modeled. + virtual void handle(const SNewComponents& message); - //! \brief The message passed to indicate new components are being - //! modeled. - struct MATHS_EXPORT SNewComponents : public SMessage - { - enum EComponent - { - E_DiurnalSeasonal, - E_GeneralSeasonal, - E_CalendarCyclic - }; - - SNewComponents(core_t::TTime time, - core_t::TTime lastTime, - EComponent component); - - //! The type of component. - EComponent s_Component; - }; + //! Set the mediator. + void mediator(CMediator* mediator); - //! \brief The basic interface for one aspect of the modeling of a time - //! series decomposition. - class MATHS_EXPORT CHandler : core::CNonCopyable - { - public: - CHandler(); - virtual ~CHandler(); + //! Get the mediator. + CMediator* mediator() const; - //! Add a value. - virtual void handle(const SAddValue &message); + private: + //! The controller responsible for forwarding messages. + CMediator* m_Mediator; + }; - //! Handle when a diurnal component is detected. - virtual void handle(const SDetectedSeasonal &message); + //! \brief Manages communication between handlers. + class MATHS_EXPORT CMediator : core::CNonCopyable { + public: + //! Forward \p message to all registered models. + template + void forward(const M& message) const; - //! Handle when a calendar component is detected. - virtual void handle(const SDetectedCalendar &message); + //! Register \p handler. + void registerHandler(CHandler& handler); - //! Handle when a new component is being modeled. - virtual void handle(const SNewComponents &message); + //! Debug the memory used by this object. + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - //! Set the mediator. - void mediator(CMediator *mediator); + //! Get the memory used by this object. + std::size_t memoryUsage() const; - //! Get the mediator. - CMediator *mediator() const; + private: + using THandlerRef = boost::reference_wrapper; + using THandlerRefVec = std::vector; - private: - //! The controller responsible for forwarding messages. - CMediator *m_Mediator; - }; + private: + //! The handlers which have added by registration. + THandlerRefVec m_Handlers; + }; - //! \brief Manages communication between handlers. - class MATHS_EXPORT CMediator : core::CNonCopyable - { - public: - //! Forward \p message to all registered models. - template - void forward(const M &message) const; + //! \brief Scans through increasingly low frequencies looking for custom + //! diurnal and any other large amplitude seasonal components. + class MATHS_EXPORT CPeriodicityTest : public CHandler { + public: + CPeriodicityTest(double decayRate, core_t::TTime bucketLength); + CPeriodicityTest(const CPeriodicityTest& other, bool isForForecast = false); - //! Register \p handler. - void registerHandler(CHandler &handler); + //! Initialize by reading state from \p traverser. + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); - //! Debug the memory used by this object. - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + //! Persist state by passing information to \p inserter. + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; - //! Get the memory used by this object. - std::size_t memoryUsage() const; + //! Efficiently swap the state of this and \p other. + void swap(CPeriodicityTest& other); - private: - using THandlerRef = boost::reference_wrapper; - using THandlerRefVec = std::vector; + //! Update the test with a new value. + virtual void handle(const SAddValue& message); - private: - //! The handlers which have added by registration. - THandlerRefVec m_Handlers; - }; + //! Reset the test. + virtual void handle(const SNewComponents& message); - //! \brief Scans through increasingly low frequencies looking for custom - //! diurnal and any other large amplitude seasonal components. - class MATHS_EXPORT CPeriodicityTest : public CHandler - { - public: - CPeriodicityTest(double decayRate, core_t::TTime bucketLength); - CPeriodicityTest(const CPeriodicityTest &other, bool isForForecast = false); + //! Test to see whether any seasonal components are present. + void test(const SAddValue& message); - //! Initialize by reading state from \p traverser. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); + //! Age the test to account for the interval \p end - \p start + //! elapsed time. + void propagateForwards(core_t::TTime start, core_t::TTime end); - //! Persist state by passing information to \p inserter. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; + //! Get a checksum for this object. + uint64_t checksum(uint64_t seed = 0) const; - //! Efficiently swap the state of this and \p other. - void swap(CPeriodicityTest &other); + //! Debug the memory used by this object. + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - //! Update the test with a new value. - virtual void handle(const SAddValue &message); + //! Get the memory used by this object. + std::size_t memoryUsage() const; - //! Reset the test. - virtual void handle(const SNewComponents &message); + private: + using TTimeAry = boost::array; + using TExpandingWindowPtr = boost::shared_ptr; + using TExpandingWindowPtrAry = boost::array; - //! Test to see whether any seasonal components are present. - void test(const SAddValue &message); + //! Test types (categorised as short and long period tests). + enum ETest { E_Short, E_Long }; - //! Age the test to account for the interval \p end - \p start - //! elapsed time. - void propagateForwards(core_t::TTime start, core_t::TTime end); + private: + //! The bucket lengths to use to test for short period components. + static const TTimeVec SHORT_BUCKET_LENGTHS; - //! Get a checksum for this object. - uint64_t checksum(uint64_t seed = 0) const; + //! The bucket lengths to use to test for long period components. + static const TTimeVec LONG_BUCKET_LENGTHS; - //! Debug the memory used by this object. - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + private: + //! Handle \p symbol. + void apply(std::size_t symbol, const SMessage& message); - //! Get the memory used by this object. - std::size_t memoryUsage() const; + //! Check if we should run the periodicity test on \p window. + bool shouldTest(const TExpandingWindowPtr& window, core_t::TTime time) const; - private: - using TTimeAry = boost::array; - using TExpandingWindowPtr = boost::shared_ptr; - using TExpandingWindowPtrAry = boost::array; + //! Get a new \p test. (Warning owned by the caller.) + CExpandingWindow* newWindow(ETest test) const; - //! Test types (categorised as short and long period tests). - enum ETest { E_Short, E_Long }; + //! Account for memory that is not yet allocated + //! during the initial state + std::size_t extraMemoryOnInitialization() const; - private: - //! The bucket lengths to use to test for short period components. - static const TTimeVec SHORT_BUCKET_LENGTHS; + private: + //! The state machine. + core::CStateMachine m_Machine; - //! The bucket lengths to use to test for long period components. - static const TTimeVec LONG_BUCKET_LENGTHS; + //! Controls the rate at which information is lost. + double m_DecayRate; - private: - //! Handle \p symbol. - void apply(std::size_t symbol, const SMessage &message); + //! The raw data bucketing interval. + core_t::TTime m_BucketLength; - //! Check if we should run the periodicity test on \p window. - bool shouldTest(const TExpandingWindowPtr &window, core_t::TTime time) const; + //! Expanding windows on the "recent" time series values. + TExpandingWindowPtrAry m_Windows; + }; - //! Get a new \p test. (Warning owned by the caller.) - CExpandingWindow *newWindow(ETest test) const; + //! \brief Tests for cyclic calendar components explaining large prediction + //! errors. + class MATHS_EXPORT CCalendarTest : public CHandler { + public: + CCalendarTest(double decayRate, core_t::TTime bucketLength); + CCalendarTest(const CCalendarTest& other, bool isForForecast = false); - //! Account for memory that is not yet allocated - //! during the initial state - std::size_t extraMemoryOnInitialization() const; + //! Initialize by reading state from \p traverser. + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); - private: - //! The state machine. - core::CStateMachine m_Machine; + //! Persist state by passing information to \p inserter. + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; - //! Controls the rate at which information is lost. - double m_DecayRate; + //! Efficiently swap the state of this and \p other. + void swap(CCalendarTest& other); - //! The raw data bucketing interval. - core_t::TTime m_BucketLength; + //! Update the test with a new value. + virtual void handle(const SAddValue& message); - //! Expanding windows on the "recent" time series values. - TExpandingWindowPtrAry m_Windows; - }; + //! Reset the test. + virtual void handle(const SNewComponents& message); - //! \brief Tests for cyclic calendar components explaining large prediction - //! errors. - class MATHS_EXPORT CCalendarTest : public CHandler - { - public: - CCalendarTest(double decayRate, core_t::TTime bucketLength); - CCalendarTest(const CCalendarTest &other, bool isForForecast = false); + //! Test to see whether any seasonal components are present. + void test(const SMessage& message); - //! Initialize by reading state from \p traverser. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); + //! Age the test to account for the interval \p end - \p start + //! elapsed time. + void propagateForwards(core_t::TTime start, core_t::TTime end); - //! Persist state by passing information to \p inserter. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; + //! Get a checksum for this object. + uint64_t checksum(uint64_t seed = 0) const; - //! Efficiently swap the state of this and \p other. - void swap(CCalendarTest &other); + //! Debug the memory used by this object. + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - //! Update the test with a new value. - virtual void handle(const SAddValue &message); + //! Get the memory used by this object. + std::size_t memoryUsage() const; - //! Reset the test. - virtual void handle(const SNewComponents &message); + private: + using TCalendarCyclicTestPtr = boost::shared_ptr; - //! Test to see whether any seasonal components are present. - void test(const SMessage &message); + private: + //! Handle \p symbol. + void apply(std::size_t symbol, const SMessage& message); - //! Age the test to account for the interval \p end - \p start - //! elapsed time. - void propagateForwards(core_t::TTime start, core_t::TTime end); + //! Check if we should run a test. + bool shouldTest(core_t::TTime time); - //! Get a checksum for this object. - uint64_t checksum(uint64_t seed = 0) const; + //! Get the month of \p time. + int month(core_t::TTime time) const; - //! Debug the memory used by this object. - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + //! Account for memory that is not yet allocated + //! during the initial state + std::size_t extraMemoryOnInitialization() const; - //! Get the memory used by this object. - std::size_t memoryUsage() const; + private: + //! The state machine. + core::CStateMachine m_Machine; - private: - using TCalendarCyclicTestPtr = boost::shared_ptr; + //! Controls the rate at which information is lost. + double m_DecayRate; - private: - //! Handle \p symbol. - void apply(std::size_t symbol, const SMessage &message); + //! The last month for which the test was run. + int m_LastMonth; - //! Check if we should run a test. - bool shouldTest(core_t::TTime time); + //! The test for arbitrary periodic components. + TCalendarCyclicTestPtr m_Test; + }; - //! Get the month of \p time. - int month(core_t::TTime time) const; + //! \brief Holds and updates the components of the decomposition. + class MATHS_EXPORT CComponents : public CHandler { + public: + CComponents(double decayRate, core_t::TTime bucketLength, std::size_t seasonalComponentSize); + CComponents(const CComponents& other); - //! Account for memory that is not yet allocated - //! during the initial state - std::size_t extraMemoryOnInitialization() const; - private: - //! The state machine. - core::CStateMachine m_Machine; + //! \brief Watches to see if the seasonal components state changes. + class MATHS_EXPORT CScopeNotifyOnStateChange : core::CNonCopyable { + public: + CScopeNotifyOnStateChange(CComponents& components); + ~CScopeNotifyOnStateChange(); - //! Controls the rate at which information is lost. - double m_DecayRate; + //! Check if the seasonal component's state changed. + bool changed() const; - //! The last month for which the test was run. - int m_LastMonth; + private: + //! The seasonal components this is watching. + CComponents& m_Components; - //! The test for arbitrary periodic components. - TCalendarCyclicTestPtr m_Test; + //! The flag used to watch for changes. + bool m_Watcher; }; - //! \brief Holds and updates the components of the decomposition. - class MATHS_EXPORT CComponents : public CHandler - { - public: - CComponents(double decayRate, - core_t::TTime bucketLength, - std::size_t seasonalComponentSize); - CComponents(const CComponents &other); - - //! \brief Watches to see if the seasonal components state changes. - class MATHS_EXPORT CScopeNotifyOnStateChange : core::CNonCopyable - { - public: - CScopeNotifyOnStateChange(CComponents &components); - ~CScopeNotifyOnStateChange(); - - //! Check if the seasonal component's state changed. - bool changed() const; - - private: - //! The seasonal components this is watching. - CComponents &m_Components; + //! Initialize by reading state from \p traverser. + bool acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser); - //! The flag used to watch for changes. - bool m_Watcher; - }; + //! Persist state by passing information to \p inserter. + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; - //! Initialize by reading state from \p traverser. - bool acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser); + //! Efficiently swap the state of this and \p other. + void swap(CComponents& other); - //! Persist state by passing information to \p inserter. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; + //! Update the components with a new value. + virtual void handle(const SAddValue& message); - //! Efficiently swap the state of this and \p other. - void swap(CComponents &other); + //! Create new seasonal components. + virtual void handle(const SDetectedSeasonal& message); - //! Update the components with a new value. - virtual void handle(const SAddValue &message); + //! Create a new calendar component. + virtual void handle(const SDetectedCalendar& message); - //! Create new seasonal components. - virtual void handle(const SDetectedSeasonal &message); + //! Start using the trend for prediction. + void useTrendForPrediction(void); - //! Create a new calendar component. - virtual void handle(const SDetectedCalendar &message); + //! Apply \p shift to the level at \p time and \p value. + void shiftLevel(core_t::TTime time, double value, double shift); - //! Start using the trend for prediction. - void useTrendForPrediction(void); + //! Apply a linear scale of \p scale. + void linearScale(core_t::TTime time, double scale); - //! Apply \p shift to the level at \p time and \p value. - void shiftLevel(core_t::TTime time, double value, double shift); + //! Maybe re-interpolate the components. + void interpolate(const SMessage& message); - //! Apply a linear scale of \p scale. - void linearScale(core_t::TTime time, double scale); + //! Maybe re-interpolate the components. + void interpolateForForecast(core_t::TTime time); - //! Maybe re-interpolate the components. - void interpolate(const SMessage &message); + //! Set the data type. + void dataType(maths_t::EDataType dataType); - //! Maybe re-interpolate the components. - void interpolateForForecast(core_t::TTime time); + //! Set the decay rate. + void decayRate(double decayRate); - //! Set the data type. - void dataType(maths_t::EDataType dataType); + //! Get the decay rate. + double decayRate() const; - //! Set the decay rate. - void decayRate(double decayRate); + //! Age the components to account for the interval \p end - \p start + //! elapsed time. + void propagateForwards(core_t::TTime start, core_t::TTime end); - //! Get the decay rate. - double decayRate() const; + //! Check if the decomposition has any initialized components. + bool initialized() const; - //! Age the components to account for the interval \p end - \p start - //! elapsed time. - void propagateForwards(core_t::TTime start, core_t::TTime end); + //! Get the long term trend. + const CTrendComponent& trend() const; - //! Check if the decomposition has any initialized components. - bool initialized() const; + //! Get the seasonal components. + const maths_t::TSeasonalComponentVec& seasonal() const; - //! Get the long term trend. - const CTrendComponent &trend() const; + //! Get the calendar components. + const maths_t::TCalendarComponentVec& calendar() const; - //! Get the seasonal components. - const maths_t::TSeasonalComponentVec &seasonal() const; + //! Return true if we're using the trend for prediction. + bool usingTrendForPrediction() const; - //! Get the calendar components. - const maths_t::TCalendarComponentVec &calendar() const; + //! Get configuration for the periodicity test. + CPeriodicityHypothesisTestsConfig periodicityTestConfig() const; - //! Return true if we're using the trend for prediction. - bool usingTrendForPrediction() const; + //! Get the mean value of the baseline in the vicinity of \p time. + double meanValue(core_t::TTime time) const; - //! Get configuration for the periodicity test. - CPeriodicityHypothesisTestsConfig periodicityTestConfig() const; + //! Get the mean variance of the baseline. + double meanVariance() const; - //! Get the mean value of the baseline in the vicinity of \p time. - double meanValue(core_t::TTime time) const; + //! Get the mean error variance scale for the components. + double meanVarianceScale() const; - //! Get the mean variance of the baseline. - double meanVariance() const; + //! Get a checksum for this object. + uint64_t checksum(uint64_t seed = 0) const; - //! Get the mean error variance scale for the components. - double meanVarianceScale() const; + //! Debug the memory used by this object. + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - //! Get a checksum for this object. - uint64_t checksum(uint64_t seed = 0) const; + //! Get the memory used by this object. + std::size_t memoryUsage() const; - //! Debug the memory used by this object. - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + private: + using TOptionalDouble = boost::optional; + using TMeanVarAccumulator = CBasicStatistics::SSampleMeanVar::TAccumulator; + using TSeasonalComponentPtrVec = std::vector; + using TCalendarComponentPtrVec = std::vector; + using TFloatMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; - //! Get the memory used by this object. - std::size_t memoryUsage() const; + //! \brief Tracks prediction errors with and without components. + //! + //! DESCRIPTION:\n + //! This tracks the prediction errors with and without seasonal and + //! calendar periodic components and tests to see if including the + //! component is worthwhile. + class MATHS_EXPORT CComponentErrors { + public: + //! Initialize from a delimited string. + bool fromDelimited(const std::string& str); - private: - using TOptionalDouble = boost::optional; - using TMeanVarAccumulator = CBasicStatistics::SSampleMeanVar::TAccumulator; - using TSeasonalComponentPtrVec = std::vector; - using TCalendarComponentPtrVec = std::vector; - using TFloatMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; + //! Convert to a delimited string. + std::string toDelimited() const; - //! \brief Tracks prediction errors with and without components. - //! - //! DESCRIPTION:\n - //! This tracks the prediction errors with and without seasonal and - //! calendar periodic components and tests to see if including the - //! component is worthwhile. - class MATHS_EXPORT CComponentErrors - { - public: - //! Initialize from a delimited string. - bool fromDelimited(const std::string &str); + //! Update the errors. + //! + //! \param[in] error The prediction error. + //! \param[in] prediction The prediction from the component. + //! \param[in] weight The weight of \p error. + void add(double error, double prediction, double weight); - //! Convert to a delimited string. - std::string toDelimited() const; + //! Clear the error statistics. + void clear(); - //! Update the errors. - //! - //! \param[in] error The prediction error. - //! \param[in] prediction The prediction from the component. - //! \param[in] weight The weight of \p error. - void add(double error, double prediction, double weight); + //! Check if we should discard \p seasonal. + bool remove(core_t::TTime bucketLength, CSeasonalComponent& seasonal) const; - //! Clear the error statistics. - void clear(); + //! Check if we should discard \p calendar. + bool remove(core_t::TTime bucketLength, CCalendarComponent& calendar) const; - //! Check if we should discard \p seasonal. - bool remove(core_t::TTime bucketLength, CSeasonalComponent &seasonal) const; + //! Age the errors by \p factor. + void age(double factor); - //! Check if we should discard \p calendar. - bool remove(core_t::TTime bucketLength, CCalendarComponent &calendar) const; + //! Get a checksum for this object. + uint64_t checksum(uint64_t seed) const; - //! Age the errors by \p factor. - void age(double factor); + private: + //! Truncate large, i.e. more than 6 sigma, errors. + static double winsorise(double squareError, const TFloatMeanAccumulator& variance); - //! Get a checksum for this object. - uint64_t checksum(uint64_t seed) const; + private: + //! The mean prediction error in the window. + TFloatMeanAccumulator m_MeanErrorWithComponent; - private: - //! Truncate large, i.e. more than 6 sigma, errors. - static double winsorise(double squareError, - const TFloatMeanAccumulator &variance); - - private: - //! The mean prediction error in the window. - TFloatMeanAccumulator m_MeanErrorWithComponent; - - //! The mean prediction error in the window without the component. - TFloatMeanAccumulator m_MeanErrorWithoutComponent; - }; + //! The mean prediction error in the window without the component. + TFloatMeanAccumulator m_MeanErrorWithoutComponent; + }; - using TComponentErrorsVec = std::vector; - using TComponentErrorsPtrVec = std::vector; + using TComponentErrorsVec = std::vector; + using TComponentErrorsPtrVec = std::vector; - //! \brief The seasonal components of the decomposition. - struct MATHS_EXPORT SSeasonal - { - //! Initialize by reading state from \p traverser. - bool acceptRestoreTraverser(double decayRate, - core_t::TTime bucketLength, - core::CStateRestoreTraverser &traverser); + //! \brief The seasonal components of the decomposition. + struct MATHS_EXPORT SSeasonal { + //! Initialize by reading state from \p traverser. + bool acceptRestoreTraverser(double decayRate, core_t::TTime bucketLength, core::CStateRestoreTraverser& traverser); - //! Persist state by passing information to \p inserter. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; + //! Persist state by passing information to \p inserter. + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; - //! Set the decay rate. - void decayRate(double decayRate); + //! Set the decay rate. + void decayRate(double decayRate); - //! Age the seasonal components to account for the interval \p end - //! - \p start elapsed time. - void propagateForwards(core_t::TTime start, core_t::TTime end); + //! Age the seasonal components to account for the interval \p end + //! - \p start elapsed time. + void propagateForwards(core_t::TTime start, core_t::TTime end); - //! Get the combined size of the seasonal components. - std::size_t size() const; + //! Get the combined size of the seasonal components. + std::size_t size() const; - //! Get the state to update. - void componentsErrorsAndDeltas(core_t::TTime time, - TSeasonalComponentPtrVec &components, - TComponentErrorsPtrVec &errors, - TDoubleVec &deltas); + //! Get the state to update. + void componentsErrorsAndDeltas(core_t::TTime time, + TSeasonalComponentPtrVec& components, + TComponentErrorsPtrVec& errors, + TDoubleVec& deltas); - //! Check if we need to interpolate any of the components. - bool shouldInterpolate(core_t::TTime time, core_t::TTime last) const; + //! Check if we need to interpolate any of the components. + bool shouldInterpolate(core_t::TTime time, core_t::TTime last) const; - //! Interpolate the components at \p time. - void interpolate(core_t::TTime time, core_t::TTime last, bool refine); + //! Interpolate the components at \p time. + void interpolate(core_t::TTime time, core_t::TTime last, bool refine); - //! Check if any of the components has been initialized. - bool initialized() const; + //! Check if any of the components has been initialized. + bool initialized() const; - //! Remove low value components - bool prune(core_t::TTime time, core_t::TTime bucketLength); + //! Remove low value components + bool prune(core_t::TTime time, core_t::TTime bucketLength); - //! Shift the components' time origin to \p time. - void shiftOrigin(core_t::TTime time); + //! Shift the components' time origin to \p time. + void shiftOrigin(core_t::TTime time); - //! Linearly scale the components' by \p scale. - void linearScale(core_t::TTime time, double scale); + //! Linearly scale the components' by \p scale. + void linearScale(core_t::TTime time, double scale); - //! Get a checksum for this object. - uint64_t checksum(uint64_t seed = 0) const; + //! Get a checksum for this object. + uint64_t checksum(uint64_t seed = 0) const; - //! Debug the memory used by this object. - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + //! Debug the memory used by this object. + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - //! Get the memory used by this object. - std::size_t memoryUsage() const; + //! Get the memory used by this object. + std::size_t memoryUsage() const; - //! The seasonal components. - maths_t::TSeasonalComponentVec s_Components; + //! The seasonal components. + maths_t::TSeasonalComponentVec s_Components; - //! The prediction errors relating to the component. - TComponentErrorsVec s_PredictionErrors; - }; + //! The prediction errors relating to the component. + TComponentErrorsVec s_PredictionErrors; + }; - using TSeasonalPtr = boost::shared_ptr; + using TSeasonalPtr = boost::shared_ptr; - //! \brief Calendar periodic components of the decomposition. - struct MATHS_EXPORT SCalendar - { - //! Initialize by reading state from \p traverser. - bool acceptRestoreTraverser(double decayRate, - core_t::TTime bucketLength, - core::CStateRestoreTraverser &traverser); + //! \brief Calendar periodic components of the decomposition. + struct MATHS_EXPORT SCalendar { + //! Initialize by reading state from \p traverser. + bool acceptRestoreTraverser(double decayRate, core_t::TTime bucketLength, core::CStateRestoreTraverser& traverser); - //! Persist state by passing information to \p inserter. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; + //! Persist state by passing information to \p inserter. + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; - //! Set the decay rate. - void decayRate(double decayRate); + //! Set the decay rate. + void decayRate(double decayRate); - //! Age the calendar components to account for the interval \p end - //! - \p start elapsed time. - void propagateForwards(core_t::TTime start, core_t::TTime end); + //! Age the calendar components to account for the interval \p end + //! - \p start elapsed time. + void propagateForwards(core_t::TTime start, core_t::TTime end); - //! Get the combined size of the seasonal components. - std::size_t size() const; + //! Get the combined size of the seasonal components. + std::size_t size() const; - //! Check if there is already a component for \p feature. - bool haveComponent(CCalendarFeature feature) const; + //! Check if there is already a component for \p feature. + bool haveComponent(CCalendarFeature feature) const; - //! Get the state to update. - void componentsAndErrors(core_t::TTime time, - TCalendarComponentPtrVec &components, - TComponentErrorsPtrVec &errors); + //! Get the state to update. + void componentsAndErrors(core_t::TTime time, TCalendarComponentPtrVec& components, TComponentErrorsPtrVec& errors); - //! Check if we need to interpolate any of the components. - bool shouldInterpolate(core_t::TTime time, core_t::TTime last) const; + //! Check if we need to interpolate any of the components. + bool shouldInterpolate(core_t::TTime time, core_t::TTime last) const; - //! Interpolate the components at \p time. - void interpolate(core_t::TTime time, core_t::TTime last, bool refine); + //! Interpolate the components at \p time. + void interpolate(core_t::TTime time, core_t::TTime last, bool refine); - //! Check if any of the components has been initialized. - bool initialized() const; + //! Check if any of the components has been initialized. + bool initialized() const; - //! Remove low value components. - bool prune(core_t::TTime time, core_t::TTime bucketLength); + //! Remove low value components. + bool prune(core_t::TTime time, core_t::TTime bucketLength); - //! Linearly scale the components' by \p scale. - void linearScale(core_t::TTime time, double scale); + //! Linearly scale the components' by \p scale. + void linearScale(core_t::TTime time, double scale); - //! Get a checksum for this object. - uint64_t checksum(uint64_t seed = 0) const; + //! Get a checksum for this object. + uint64_t checksum(uint64_t seed = 0) const; - //! Debug the memory used by this object. - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + //! Debug the memory used by this object. + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - //! Get the memory used by this object. - std::size_t memoryUsage() const; + //! Get the memory used by this object. + std::size_t memoryUsage() const; - //! The calendar components. - maths_t::TCalendarComponentVec s_Components; + //! The calendar components. + maths_t::TCalendarComponentVec s_Components; - //! The prediction errors after removing the component. - TComponentErrorsVec s_PredictionErrors; - }; + //! The prediction errors after removing the component. + TComponentErrorsVec s_PredictionErrors; + }; - using TCalendarPtr = boost::shared_ptr; + using TCalendarPtr = boost::shared_ptr; - private: - //! Get the total size of the components. - std::size_t size() const; + private: + //! Get the total size of the components. + std::size_t size() const; - //! Get the maximum permitted size of the components. - std::size_t maxSize() const; + //! Get the maximum permitted size of the components. + std::size_t maxSize() const; - //! Add new seasonal components to \p components. - bool addSeasonalComponents(const CPeriodicityHypothesisTestsResult &result, - const CExpandingWindow &window, - const TPredictor &predictor, - CTrendComponent &trend, - maths_t::TSeasonalComponentVec &components, - TComponentErrorsVec &errors) const; + //! Add new seasonal components to \p components. + bool addSeasonalComponents(const CPeriodicityHypothesisTestsResult& result, + const CExpandingWindow& window, + const TPredictor& predictor, + CTrendComponent& trend, + maths_t::TSeasonalComponentVec& components, + TComponentErrorsVec& errors) const; - //! Add a new calendar component to \p components. - bool addCalendarComponent(const CCalendarFeature &feature, - core_t::TTime time, - maths_t::TCalendarComponentVec &components, - TComponentErrorsVec &errors) const; + //! Add a new calendar component to \p components. + bool addCalendarComponent(const CCalendarFeature& feature, + core_t::TTime time, + maths_t::TCalendarComponentVec& components, + TComponentErrorsVec& errors) const; - //! Clear all component error statistics. - void clearComponentErrors(); + //! Clear all component error statistics. + void clearComponentErrors(); - //! Handle \p symbol. - void apply(std::size_t symbol, const SMessage &message); + //! Handle \p symbol. + void apply(std::size_t symbol, const SMessage& message); - //! Check if we should interpolate. - bool shouldInterpolate(core_t::TTime time, core_t::TTime last); + //! Check if we should interpolate. + bool shouldInterpolate(core_t::TTime time, core_t::TTime last); - //! Shift the various regression model time origins to \p time. - void shiftOrigin(core_t::TTime time); + //! Shift the various regression model time origins to \p time. + void shiftOrigin(core_t::TTime time); - //! Get the components in canonical form. - //! - //! This standardizes the level and gradient across the various - //! components. In particular, common offsets and gradients are - //! shifted into the long term trend or in the absence of that - //! the shortest component. - void canonicalize(core_t::TTime time); + //! Get the components in canonical form. + //! + //! This standardizes the level and gradient across the various + //! components. In particular, common offsets and gradients are + //! shifted into the long term trend or in the absence of that + //! the shortest component. + void canonicalize(core_t::TTime time); - //! Set a watcher for state changes. - void notifyOnNewComponents(bool *watcher); + //! Set a watcher for state changes. + void notifyOnNewComponents(bool* watcher); - private: - //! The state machine. - core::CStateMachine m_Machine; + private: + //! The state machine. + core::CStateMachine m_Machine; - //! Controls the rate at which information is lost. - double m_DecayRate; + //! Controls the rate at which information is lost. + double m_DecayRate; - //! The raw data bucketing interval. - core_t::TTime m_BucketLength; + //! The raw data bucketing interval. + core_t::TTime m_BucketLength; - //! The number of buckets to use to estimate a periodic component. - std::size_t m_SeasonalComponentSize; + //! The number of buckets to use to estimate a periodic component. + std::size_t m_SeasonalComponentSize; - //! The number of buckets to use to estimate a periodic component. - std::size_t m_CalendarComponentSize; + //! The number of buckets to use to estimate a periodic component. + std::size_t m_CalendarComponentSize; - //! The long term trend. - CTrendComponent m_Trend; + //! The long term trend. + CTrendComponent m_Trend; - //! The seasonal components. - TSeasonalPtr m_Seasonal; + //! The seasonal components. + TSeasonalPtr m_Seasonal; - //! The calendar components. - TCalendarPtr m_Calendar; + //! The calendar components. + TCalendarPtr m_Calendar; - //! The mean error variance scale for the components. - TFloatMeanAccumulator m_MeanVarianceScale; + //! The mean error variance scale for the components. + TFloatMeanAccumulator m_MeanVarianceScale; - //! The moments of the values added. - TMeanVarAccumulator m_Moments; + //! The moments of the values added. + TMeanVarAccumulator m_Moments; - //! The moments of the values added after subtracting a trend. - TMeanVarAccumulator m_MomentsMinusTrend; + //! The moments of the values added after subtracting a trend. + TMeanVarAccumulator m_MomentsMinusTrend; - //! Set to true if the trend model should be used for prediction. - bool m_UsingTrendForPrediction; + //! Set to true if the trend model should be used for prediction. + bool m_UsingTrendForPrediction; - //! Set to true if non-null when the seasonal components change. - bool *m_Watcher; - }; + //! Set to true if non-null when the seasonal components change. + bool* m_Watcher; + }; }; //! Create a free function which will be found by Koenig lookup. -inline void swap(CTimeSeriesDecompositionDetail::CPeriodicityTest &lhs, - CTimeSeriesDecompositionDetail::CPeriodicityTest &rhs) -{ +inline void swap(CTimeSeriesDecompositionDetail::CPeriodicityTest& lhs, CTimeSeriesDecompositionDetail::CPeriodicityTest& rhs) { lhs.swap(rhs); } //! Create a free function which will be found by Koenig lookup. -inline void swap(CTimeSeriesDecompositionDetail::CCalendarTest &lhs, - CTimeSeriesDecompositionDetail::CCalendarTest &rhs) -{ +inline void swap(CTimeSeriesDecompositionDetail::CCalendarTest& lhs, CTimeSeriesDecompositionDetail::CCalendarTest& rhs) { lhs.swap(rhs); } //! Create a free function which will be found by Koenig lookup. -inline void swap(CTimeSeriesDecompositionDetail::CComponents &lhs, - CTimeSeriesDecompositionDetail::CComponents &rhs) -{ +inline void swap(CTimeSeriesDecompositionDetail::CComponents& lhs, CTimeSeriesDecompositionDetail::CComponents& rhs) { lhs.swap(rhs); } - } } diff --git a/include/maths/CTimeSeriesDecompositionInterface.h b/include/maths/CTimeSeriesDecompositionInterface.h index 8e05ad9209..314f6916b5 100644 --- a/include/maths/CTimeSeriesDecompositionInterface.h +++ b/include/maths/CTimeSeriesDecompositionInterface.h @@ -8,8 +8,8 @@ #define INCLUDED_ml_maths_CTimeSeriesDecompositionInterface_h #include -#include #include +#include #include #include @@ -18,15 +18,13 @@ #include #include +#include #include #include #include -#include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { class CMultivariatePrior; class CPrior; class CSeasonalComponent; @@ -34,155 +32,143 @@ struct SChangeDescription; //! \brief The interface for decomposing times series into periodic, //! calendar periodic and trend components. -class MATHS_EXPORT CTimeSeriesDecompositionInterface -{ - public: - using TDouble3Vec = core::CSmallVector; - using TDouble3VecVec = std::vector; - using TDoubleAry = boost::array; - using TWeights = CConstantWeights; - using TWriteForecastResult = std::function; - - //! The components of the decomposition. - enum EComponents - { - E_Diurnal = 0x1, - E_NonDiurnal = 0x2, - E_Seasonal = 0x3, - E_Trend = 0x4, - E_Calendar = 0x8, - E_All = 0xf, - E_TrendForced = 0x10 //!< Force get the trend component (if - //!< it's not being used for prediction). - //!< This needs to be bigger than E_All. - }; - - public: - virtual ~CTimeSeriesDecompositionInterface() = default; - - //! Clone this decomposition. - virtual CTimeSeriesDecompositionInterface *clone(bool isForForecast = false) const = 0; - - //! Set the data type. - virtual void dataType(maths_t::EDataType dataType) = 0; - - //! Set the decay rate. - virtual void decayRate(double decayRate) = 0; - - //! Get the decay rate. - virtual double decayRate() const = 0; - - //! Check if this is initialized. - virtual bool initialized() const = 0; - - //! Adds a time series point \f$(t, f(t))\f$. - //! - //! \param[in] time The time of the function point. - //! \param[in] value The function value at \p time. - //! \param[in] weightStyles The styles of \p weights. Both the - //! count and the Winsorisation weight styles have an effect. - //! See maths_t::ESampleWeightStyle for more details. - //! \param[in] weights The weights of \p value. The smaller - //! the product count weight the less influence \p value has - //! on the trend and it's local variance. - //! \return True if number of estimated components changed - //! and false otherwise. - virtual bool addPoint(core_t::TTime time, - double value, - const maths_t::TWeightStyleVec &weightStyles = TWeights::COUNT, - const maths_t::TDouble4Vec &weights = TWeights::UNIT) = 0; - - //! Apply \p change at \p time. - //! - //! \param[in] time The time of the change point. - //! \param[in] value The value immediately before the change - //! point. - //! \param[in] change A description of the change to apply. - //! \return True if a new component was detected. - virtual bool applyChange(core_t::TTime time, double value, - const SChangeDescription &change) = 0; - - //! Propagate the decomposition forwards to \p time. - virtual void propagateForwardsTo(core_t::TTime time) = 0; - - //! Get the mean value of the time series in the vicinity of \p time. - virtual double meanValue(core_t::TTime time) const = 0; - - //! Get the value of the time series at \p time. - //! - //! \param[in] time The time of interest. - //! \param[in] confidence The symmetric confidence interval for the prediction - //! the baseline as a percentage. - //! \param[in] components The components to include in the baseline. - virtual maths_t::TDoubleDoublePr value(core_t::TTime time, - double confidence = 0.0, - int components = E_All, - bool smooth = true) const = 0; - - //! Forecast from \p start to \p end at \p dt intervals. - //! - //! \param[in] startTime The start of the forecast. - //! \param[in] endTime The end of the forecast. - //! \param[in] step The time increment. - //! \param[in] confidence The forecast confidence interval. - //! \param[in] minimumScale The minimum permitted seasonal scale. - //! \param[in] writer Forecast results are passed to this callback. - virtual void forecast(core_t::TTime startTime, - core_t::TTime endTime, - core_t::TTime step, - double confidence, - double minimumScale, - const TWriteForecastResult &writer) = 0; - - //! Detrend \p value from the time series being modeled by removing - //! any periodic component at \p time. - //! - //! \note That detrending preserves the time series mean. - virtual double detrend(core_t::TTime time, - double value, - double confidence, - int components = E_All) const = 0; - - //! Get the mean variance of the baseline. - virtual double meanVariance() const = 0; - - //! Compute the variance scale at \p time. - //! - //! \param[in] time The time of interest. - //! \param[in] variance The variance of the distribution to scale. - //! \param[in] confidence The symmetric confidence interval for the - //! variance scale as a percentage. - virtual maths_t::TDoubleDoublePr scale(core_t::TTime time, - double variance, - double confidence, - bool smooth = true) const = 0; - - //! Roll time forwards by \p skipInterval. - virtual void skipTime(core_t::TTime skipInterval) = 0; - - //! Get a checksum for this object. - virtual uint64_t checksum(uint64_t seed = 0) const = 0; - - //! Get the memory used by this instance - virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const = 0; - - //! Get the memory used by this instance - virtual std::size_t memoryUsage() const = 0; - - //! Get the static size of this object. - virtual std::size_t staticSize() const = 0; - - //! Get the time shift which is being applied. - virtual core_t::TTime timeShift(void) const = 0; - - //! Get the seasonal components. - virtual const maths_t::TSeasonalComponentVec &seasonalComponents() const = 0; - - //! This is the latest time of any point added to this object or - //! the time skipped to. - virtual core_t::TTime lastValueTime() const = 0; +class MATHS_EXPORT CTimeSeriesDecompositionInterface { +public: + using TDouble3Vec = core::CSmallVector; + using TDouble3VecVec = std::vector; + using TDoubleAry = boost::array; + using TWeights = CConstantWeights; + using TWriteForecastResult = std::function; + + //! The components of the decomposition. + enum EComponents { + E_Diurnal = 0x1, + E_NonDiurnal = 0x2, + E_Seasonal = 0x3, + E_Trend = 0x4, + E_Calendar = 0x8, + E_All = 0xf, + E_TrendForced = 0x10 //!< Force get the trend component (if + //!< it's not being used for prediction). + //!< This needs to be bigger than E_All. + }; + +public: + virtual ~CTimeSeriesDecompositionInterface() = default; + + //! Clone this decomposition. + virtual CTimeSeriesDecompositionInterface* clone(bool isForForecast = false) const = 0; + + //! Set the data type. + virtual void dataType(maths_t::EDataType dataType) = 0; + + //! Set the decay rate. + virtual void decayRate(double decayRate) = 0; + + //! Get the decay rate. + virtual double decayRate() const = 0; + + //! Check if this is initialized. + virtual bool initialized() const = 0; + + //! Adds a time series point \f$(t, f(t))\f$. + //! + //! \param[in] time The time of the function point. + //! \param[in] value The function value at \p time. + //! \param[in] weightStyles The styles of \p weights. Both the + //! count and the Winsorisation weight styles have an effect. + //! See maths_t::ESampleWeightStyle for more details. + //! \param[in] weights The weights of \p value. The smaller + //! the product count weight the less influence \p value has + //! on the trend and it's local variance. + //! \return True if number of estimated components changed + //! and false otherwise. + virtual bool addPoint(core_t::TTime time, + double value, + const maths_t::TWeightStyleVec& weightStyles = TWeights::COUNT, + const maths_t::TDouble4Vec& weights = TWeights::UNIT) = 0; + + //! Apply \p change at \p time. + //! + //! \param[in] time The time of the change point. + //! \param[in] value The value immediately before the change + //! point. + //! \param[in] change A description of the change to apply. + //! \return True if a new component was detected. + virtual bool applyChange(core_t::TTime time, double value, const SChangeDescription& change) = 0; + + //! Propagate the decomposition forwards to \p time. + virtual void propagateForwardsTo(core_t::TTime time) = 0; + + //! Get the mean value of the time series in the vicinity of \p time. + virtual double meanValue(core_t::TTime time) const = 0; + + //! Get the value of the time series at \p time. + //! + //! \param[in] time The time of interest. + //! \param[in] confidence The symmetric confidence interval for the prediction + //! the baseline as a percentage. + //! \param[in] components The components to include in the baseline. + virtual maths_t::TDoubleDoublePr + value(core_t::TTime time, double confidence = 0.0, int components = E_All, bool smooth = true) const = 0; + + //! Forecast from \p start to \p end at \p dt intervals. + //! + //! \param[in] startTime The start of the forecast. + //! \param[in] endTime The end of the forecast. + //! \param[in] step The time increment. + //! \param[in] confidence The forecast confidence interval. + //! \param[in] minimumScale The minimum permitted seasonal scale. + //! \param[in] writer Forecast results are passed to this callback. + virtual void forecast(core_t::TTime startTime, + core_t::TTime endTime, + core_t::TTime step, + double confidence, + double minimumScale, + const TWriteForecastResult& writer) = 0; + + //! Detrend \p value from the time series being modeled by removing + //! any periodic component at \p time. + //! + //! \note That detrending preserves the time series mean. + virtual double detrend(core_t::TTime time, double value, double confidence, int components = E_All) const = 0; + + //! Get the mean variance of the baseline. + virtual double meanVariance() const = 0; + + //! Compute the variance scale at \p time. + //! + //! \param[in] time The time of interest. + //! \param[in] variance The variance of the distribution to scale. + //! \param[in] confidence The symmetric confidence interval for the + //! variance scale as a percentage. + virtual maths_t::TDoubleDoublePr scale(core_t::TTime time, double variance, double confidence, bool smooth = true) const = 0; + + //! Roll time forwards by \p skipInterval. + virtual void skipTime(core_t::TTime skipInterval) = 0; + + //! Get a checksum for this object. + virtual uint64_t checksum(uint64_t seed = 0) const = 0; + + //! Get the memory used by this instance + virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const = 0; + + //! Get the memory used by this instance + virtual std::size_t memoryUsage() const = 0; + + //! Get the static size of this object. + virtual std::size_t staticSize() const = 0; + + //! Get the time shift which is being applied. + virtual core_t::TTime timeShift(void) const = 0; + + //! Get the seasonal components. + virtual const maths_t::TSeasonalComponentVec& seasonalComponents() const = 0; + + //! This is the latest time of any point added to this object or + //! the time skipped to. + virtual core_t::TTime lastValueTime() const = 0; }; - } } diff --git a/include/maths/CTimeSeriesDecompositionStateSerialiser.h b/include/maths/CTimeSeriesDecompositionStateSerialiser.h index 3cc1a6fc12..f4eb2fe978 100644 --- a/include/maths/CTimeSeriesDecompositionStateSerialiser.h +++ b/include/maths/CTimeSeriesDecompositionStateSerialiser.h @@ -15,15 +15,12 @@ #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace maths -{ +namespace maths { class CTimeSeriesDecompositionInterface; struct STimeSeriesDecompositionRestoreParams; @@ -40,28 +37,24 @@ struct STimeSeriesDecompositionRestoreParams; //! name/value pairs where the value may be a nested set of name/value //! pairs. Text format makes it easier to provide backwards/forwards //! compatibility in the future as the classes evolve. -class MATHS_EXPORT CTimeSeriesDecompositionStateSerialiser -{ - public: - //! Shared pointer to the CTimeSeriesDecompositionInterface abstract - //! base class. - using TDecompositionPtr = boost::shared_ptr; - - public: - //! Construct the appropriate CTimeSeriesDecompositionInterface - //! sub-class from its state document representation. Sets \p result - //! to NULL on failure. - bool operator()(const STimeSeriesDecompositionRestoreParams ¶ms, - TDecompositionPtr &result, - core::CStateRestoreTraverser &traverser) const; - - //! Persist state by passing information to the supplied inserter. - void operator()(const CTimeSeriesDecompositionInterface &decomposition, - core::CStatePersistInserter &inserter) const; +class MATHS_EXPORT CTimeSeriesDecompositionStateSerialiser { +public: + //! Shared pointer to the CTimeSeriesDecompositionInterface abstract + //! base class. + using TDecompositionPtr = boost::shared_ptr; + +public: + //! Construct the appropriate CTimeSeriesDecompositionInterface + //! sub-class from its state document representation. Sets \p result + //! to NULL on failure. + bool operator()(const STimeSeriesDecompositionRestoreParams& params, + TDecompositionPtr& result, + core::CStateRestoreTraverser& traverser) const; + + //! Persist state by passing information to the supplied inserter. + void operator()(const CTimeSeriesDecompositionInterface& decomposition, core::CStatePersistInserter& inserter) const; }; - } } #endif // INCLUDED_ml_maths_CTimeSeriesDecompositionStateSerialiser_h - diff --git a/include/maths/CTimeSeriesDecompositionStub.h b/include/maths/CTimeSeriesDecompositionStub.h index d4f62cf7a1..4d7351cd56 100644 --- a/include/maths/CTimeSeriesDecompositionStub.h +++ b/include/maths/CTimeSeriesDecompositionStub.h @@ -10,10 +10,8 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { //! \brief Stub out the interface if it is known that the time series //! being modeled can't have seasonality. @@ -22,94 +20,82 @@ namespace maths //! This is a lightweight (empty) class which implements the interface //! for the case that the time series being modeled is known a-priori //! not to have seasonality. -class MATHS_EXPORT CTimeSeriesDecompositionStub : public CTimeSeriesDecompositionInterface -{ - public: - //! Clone this decomposition. - virtual CTimeSeriesDecompositionStub *clone(bool isForForecast = false) const; +class MATHS_EXPORT CTimeSeriesDecompositionStub : public CTimeSeriesDecompositionInterface { +public: + //! Clone this decomposition. + virtual CTimeSeriesDecompositionStub* clone(bool isForForecast = false) const; - //! No-op. - virtual void dataType(maths_t::EDataType dataType); + //! No-op. + virtual void dataType(maths_t::EDataType dataType); - //! No-op. - virtual void decayRate(double decayRate); + //! No-op. + virtual void decayRate(double decayRate); - //! Get the decay rate. - virtual double decayRate() const; + //! Get the decay rate. + virtual double decayRate() const; - //! Returns false. - virtual bool initialized() const; + //! Returns false. + virtual bool initialized() const; - //! No-op returning false. - virtual bool addPoint(core_t::TTime time, - double value, - const maths_t::TWeightStyleVec &weightStyles = TWeights::COUNT, - const maths_t::TDouble4Vec &weights = TWeights::UNIT); + //! No-op returning false. + virtual bool addPoint(core_t::TTime time, + double value, + const maths_t::TWeightStyleVec& weightStyles = TWeights::COUNT, + const maths_t::TDouble4Vec& weights = TWeights::UNIT); - //! No-op returning false. - virtual bool applyChange(core_t::TTime time, double value, - const SChangeDescription &change); + //! No-op returning false. + virtual bool applyChange(core_t::TTime time, double value, const SChangeDescription& change); - //! No-op. - virtual void propagateForwardsTo(core_t::TTime time); + //! No-op. + virtual void propagateForwardsTo(core_t::TTime time); - //! Returns 0. - virtual double meanValue(core_t::TTime time) const; + //! Returns 0. + virtual double meanValue(core_t::TTime time) const; - //! Returns (0.0, 0.0). - virtual maths_t::TDoubleDoublePr value(core_t::TTime time, - double confidence = 0.0, - int components = E_All, - bool smooth = true) const; + //! Returns (0.0, 0.0). + virtual maths_t::TDoubleDoublePr value(core_t::TTime time, double confidence = 0.0, int components = E_All, bool smooth = true) const; - //! No-op. - virtual void forecast(core_t::TTime startTime, - core_t::TTime endTime, - core_t::TTime step, - double confidence, - double minimumScale, - const TWriteForecastResult &writer); + //! No-op. + virtual void forecast(core_t::TTime startTime, + core_t::TTime endTime, + core_t::TTime step, + double confidence, + double minimumScale, + const TWriteForecastResult& writer); - //! Returns \p value. - virtual double detrend(core_t::TTime time, - double value, - double confidence, - int components = E_All) const; + //! Returns \p value. + virtual double detrend(core_t::TTime time, double value, double confidence, int components = E_All) const; - //! Returns 0.0. - virtual double meanVariance() const; + //! Returns 0.0. + virtual double meanVariance() const; - //! Returns (1.0, 1.0). - virtual maths_t::TDoubleDoublePr scale(core_t::TTime time, - double variance, - double confidence, - bool smooth = true) const; + //! Returns (1.0, 1.0). + virtual maths_t::TDoubleDoublePr scale(core_t::TTime time, double variance, double confidence, bool smooth = true) const; - //! No-op. - virtual void skipTime(core_t::TTime skipInterval); + //! No-op. + virtual void skipTime(core_t::TTime skipInterval); - //! Get a checksum for this object. - virtual uint64_t checksum(uint64_t seed = 0) const; + //! Get a checksum for this object. + virtual uint64_t checksum(uint64_t seed = 0) const; - //! Debug the memory used by this object. - virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + //! Debug the memory used by this object. + virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - //! Get the memory used by this object. - virtual std::size_t memoryUsage() const; + //! Get the memory used by this object. + virtual std::size_t memoryUsage() const; - //! Get the static size of this object. - virtual std::size_t staticSize() const; + //! Get the static size of this object. + virtual std::size_t staticSize() const; - //! Returns zero. - virtual core_t::TTime timeShift() const; + //! Returns zero. + virtual core_t::TTime timeShift() const; - //! Returns an empty vector. - virtual const maths_t::TSeasonalComponentVec &seasonalComponents() const; + //! Returns an empty vector. + virtual const maths_t::TSeasonalComponentVec& seasonalComponents() const; - //! Returns 0. - virtual core_t::TTime lastValueTime() const; + //! Returns 0. + virtual core_t::TTime lastValueTime() const; }; - } } diff --git a/include/maths/CTimeSeriesModel.h b/include/maths/CTimeSeriesModel.h index af115fa050..7545cc8813 100644 --- a/include/maths/CTimeSeriesModel.h +++ b/include/maths/CTimeSeriesModel.h @@ -18,10 +18,8 @@ #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { class CDecayRateController; class CMultivariatePrior; class CPrior; @@ -35,301 +33,279 @@ struct SModelRestoreParams; //! Computes a Winsorisation weight for \p value based on its //! one tail p-value. MATHS_EXPORT -double tailWinsorisationWeight(const CPrior &prior, - double derate, - double scale, - double value); +double tailWinsorisationWeight(const CPrior& prior, double derate, double scale, double value); //! Computes a Winsorisation weight for \p value based on its //! marginal for \p dimension one tail p-value. MATHS_EXPORT -double tailWinsorisationWeight(const CMultivariatePrior &prior, +double tailWinsorisationWeight(const CMultivariatePrior& prior, std::size_t dimension, double derate, double scale, - const core::CSmallVector &value); + const core::CSmallVector& value); //! \brief A CModel implementation for modeling a univariate time series. -class MATHS_EXPORT CUnivariateTimeSeriesModel : public CModel -{ - public: - using TDouble4Vec = core::CSmallVector; - using TTimeDoublePr = std::pair; - using TTimeDoublePrCBuf = boost::circular_buffer; - using TDecompositionPtr = boost::shared_ptr; - using TDecayRateController2Ary = boost::array; - - public: - //! \param[in] params The model parameters. - //! \param[in] id The *unique* identifier for this time series. - //! \param[in] trendModel The time series trend decomposition. - //! \param[in] residualModel The prior for the time series residual model. - //! \param[in] controllers Optional decay rate controllers for the trend - //! and residual model. - //! \param[in] modelAnomalies If true we use a separate model to capture - //! the characteristics of anomalous time periods. - CUnivariateTimeSeriesModel(const CModelParams ¶ms, - std::size_t id, - const CTimeSeriesDecompositionInterface &trendModel, - const CPrior &residualModel, - const TDecayRateController2Ary *controllers = 0, - bool modelAnomalies = true); - CUnivariateTimeSeriesModel(const SModelRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser); - ~CUnivariateTimeSeriesModel(); - - //! Get the model identifier. - virtual std::size_t identifier() const; - - //! Create a copy of this model passing ownership to the caller. - virtual CUnivariateTimeSeriesModel *clone(std::size_t id) const; - - //! Create a copy of the state we need to persist passing ownership - //! to the caller. - virtual CUnivariateTimeSeriesModel *cloneForPersistence() const; - - //! Create a copy of the state we need to run forecasting. - virtual CUnivariateTimeSeriesModel *cloneForForecast() const; - - //! Return true if forecast is currently possible for this model. - virtual bool isForecastPossible() const; - - //! Tell this to model correlations. - virtual void modelCorrelations(CTimeSeriesCorrelations &model); - - //! Get the correlated time series identifier pairs if any. - virtual TSize2Vec1Vec correlates() const; - - //! Update the model with the bucket \p value. - virtual void addBucketValue(const TTimeDouble2VecSizeTrVec &value); - - //! Update the model with new samples. - virtual EUpdateResult addSamples(const CModelAddSamplesParams ¶ms, - TTimeDouble2VecSizeTrVec samples); - - //! Advance time by \p gap. - virtual void skipTime(core_t::TTime gap); - - //! Get the most likely value for the time series at \p time. - virtual TDouble2Vec mode(core_t::TTime time, - const maths_t::TWeightStyleVec &weightStyles, - const TDouble2Vec4Vec &weights) const; - - //! Get the most likely value for each correlate time series - //! at \p time, if there are any. - virtual TDouble2Vec1Vec correlateModes(core_t::TTime time, - const maths_t::TWeightStyleVec &weightStyles, - const TDouble2Vec4Vec1Vec &weights) const; - - //! Get the local maxima of the residual distribution. - virtual TDouble2Vec1Vec residualModes(const maths_t::TWeightStyleVec &weightStyles, - const TDouble2Vec4Vec &weights) const; - - //! Remove any trend components from \p value. - virtual void detrend(const TTime2Vec1Vec &time, - double confidenceInterval, - TDouble2Vec1Vec &value) const; - - //! Get the best (least MSE) predicted value at \p time. - virtual TDouble2Vec predict(core_t::TTime time, - const TSizeDoublePr1Vec &correlated = TSizeDoublePr1Vec(), - TDouble2Vec hint = TDouble2Vec()) const; - - //! Get the prediction and \p confidenceInterval percentage - //! confidence interval for the time series at \p time. - virtual TDouble2Vec3Vec confidenceInterval(core_t::TTime time, - double confidenceInterval, - const maths_t::TWeightStyleVec &weightStyles, - const TDouble2Vec4Vec &weights) const; - - //! Forecast the time series and get its \p confidenceInterval - //! percentage confidence interval between \p startTime and - //! \p endTime. - virtual bool forecast(core_t::TTime startTime, - core_t::TTime endTime, - double confidenceInterval, - const TDouble2Vec &minimum, - const TDouble2Vec &maximum, - const TForecastPushDatapointFunc &forecastPushDataPointFunc, - std::string &messageOut); - - //! Compute the probability of drawing \p value at \p time. - virtual bool probability(const CModelProbabilityParams ¶ms, - const TTime2Vec1Vec &time, - const TDouble2Vec1Vec &value, - double &probability, - TTail2Vec &tail, - bool &conditional, - TSize1Vec &mostAnomalousCorrelate) const; - - //! Get the Winsorisation weight to apply to \p value. - virtual TDouble2Vec winsorisationWeight(double derate, - core_t::TTime time, - const TDouble2Vec &value) const; - - //! Get the seasonal variance scale at \p time. - virtual TDouble2Vec seasonalWeight(double confidence, core_t::TTime time) const; - - //! Compute a checksum for this object. - virtual uint64_t checksum(uint64_t seed = 0) const; - - //! Debug the memory used by this object. - virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - - //! Get the memory used by this object. - virtual std::size_t memoryUsage() const; - - //! Initialize reading state from \p traverser. - bool acceptRestoreTraverser(const SModelRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser); - - //! Persist by passing information to \p inserter. - virtual void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - - //! Get the type of data being modeled. - virtual maths_t::EDataType dataType() const; - - //! \name Helpers - //@{ - //! Unpack the weights in \p weights. - static TDouble4Vec unpack(const TDouble2Vec4Vec &weights); - - //! Reinitialize \p residualModel using the detrended values - //! from \p slidingWindow. - static void reinitializeResidualModel(double learnRate, - const TDecompositionPtr &trend, - const TTimeDoublePrCBuf &slidingWindow, - CPrior &residualModel); - //@} - - //! \name Test Functions - //@{ - //! Get the sliding window of recent values. - const TTimeDoublePrCBuf &slidingWindow() const; - - //! Get the trend. - const CTimeSeriesDecompositionInterface &trendModel() const; - - //! Get the residual model. - const CPrior &residualModel(void) const; - //@} - - private: - using TSizeVec = std::vector; - using TDouble1Vec = core::CSmallVector; - using TDouble1VecVec = std::vector; - using TDouble2Vec4VecVec = std::vector; - using TVector = CVectorNx1; - using TVectorMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; - using TDecayRateController2AryPtr = boost::shared_ptr; - using TPriorPtr = boost::shared_ptr; - using TAnomalyModelPtr = boost::shared_ptr; - using TMultivariatePriorCPtrSizePr = std::pair; - using TMultivariatePriorCPtrSizePr1Vec = core::CSmallVector; - using TModelCPtr1Vec = core::CSmallVector; - using TChangeDetectorPtr = boost::shared_ptr; - - private: - CUnivariateTimeSeriesModel(const CUnivariateTimeSeriesModel &other, - std::size_t id, - bool isForForecast = false); - - //! Test for and apply any change we find. - EUpdateResult testAndApplyChange(const CModelAddSamplesParams ¶ms, - const TSizeVec &order, - const TTimeDouble2VecSizeTrVec &samples); - - //! Apply \p change to this model. - EUpdateResult applyChange(const SChangeDescription &change); - - //! Update the trend with \p samples. - EUpdateResult updateTrend(const maths_t::TWeightStyleVec &trendStyles, - const TTimeDouble2VecSizeTrVec &samples, - const TDouble2Vec4VecVec &trendWeights); - - - //! Compute the prediction errors for \p sample. - void appendPredictionErrors(double interval, double sample, TDouble1VecVec (&result)[2]); - - //! Reinitialize state after detecting a new component of the trend - //! decomposition. - void reinitializeStateGivenNewComponent(void); - - //! Get the models for the correlations and the models of the correlated - //! time series. - bool correlationModels(TSize1Vec &correlated, - TSize2Vec1Vec &variables, - TMultivariatePriorCPtrSizePr1Vec &correlationDistributionModels, - TModelCPtr1Vec &correlatedTimeSeriesModels) const; - - private: - //! A unique identifier for this model. - std::size_t m_Id; - - //! True if the data are non-negative. - bool m_IsNonNegative; - - //! True if the model can be forecast. - bool m_IsForecastable; - - //! A random number generator for sampling the sliding window. - CPRNG::CXorOShiro128Plus m_Rng; - - //! These control the trend and residual model decay rates (see - //! CDecayRateController for more details). - TDecayRateController2AryPtr m_Controllers; - - //! The time series trend decomposition. - TDecompositionPtr m_TrendModel; - - //! The time series' residual model. - TPriorPtr m_ResidualModel; - - //! A model for time periods when the basic model can't predict the - //! value of the time series. - TAnomalyModelPtr m_AnomalyModel; - - //! The last "normal" time and median value. - TTimeDoublePr m_CandidateChangePoint; - - //! If the time series appears to be undergoing change, the contiguous - //! interval of unpredictable values. - core_t::TTime m_CurrentChangeInterval; - - //! Used to test for changes in the time series. - TChangeDetectorPtr m_ChangeDetector; - - //! A sliding window of the recent samples (used to reinitialize the - //! residual model when a new trend component is detected). - TTimeDoublePrCBuf m_SlidingWindow; - - //! Models the correlations between time series. - CTimeSeriesCorrelations *m_Correlations; +class MATHS_EXPORT CUnivariateTimeSeriesModel : public CModel { +public: + using TDouble4Vec = core::CSmallVector; + using TTimeDoublePr = std::pair; + using TTimeDoublePrCBuf = boost::circular_buffer; + using TDecompositionPtr = boost::shared_ptr; + using TDecayRateController2Ary = boost::array; + +public: + //! \param[in] params The model parameters. + //! \param[in] id The *unique* identifier for this time series. + //! \param[in] trendModel The time series trend decomposition. + //! \param[in] residualModel The prior for the time series residual model. + //! \param[in] controllers Optional decay rate controllers for the trend + //! and residual model. + //! \param[in] modelAnomalies If true we use a separate model to capture + //! the characteristics of anomalous time periods. + CUnivariateTimeSeriesModel(const CModelParams& params, + std::size_t id, + const CTimeSeriesDecompositionInterface& trendModel, + const CPrior& residualModel, + const TDecayRateController2Ary* controllers = 0, + bool modelAnomalies = true); + CUnivariateTimeSeriesModel(const SModelRestoreParams& params, core::CStateRestoreTraverser& traverser); + ~CUnivariateTimeSeriesModel(); + + //! Get the model identifier. + virtual std::size_t identifier() const; + + //! Create a copy of this model passing ownership to the caller. + virtual CUnivariateTimeSeriesModel* clone(std::size_t id) const; + + //! Create a copy of the state we need to persist passing ownership + //! to the caller. + virtual CUnivariateTimeSeriesModel* cloneForPersistence() const; + + //! Create a copy of the state we need to run forecasting. + virtual CUnivariateTimeSeriesModel* cloneForForecast() const; + + //! Return true if forecast is currently possible for this model. + virtual bool isForecastPossible() const; + + //! Tell this to model correlations. + virtual void modelCorrelations(CTimeSeriesCorrelations& model); + + //! Get the correlated time series identifier pairs if any. + virtual TSize2Vec1Vec correlates() const; + + //! Update the model with the bucket \p value. + virtual void addBucketValue(const TTimeDouble2VecSizeTrVec& value); + + //! Update the model with new samples. + virtual EUpdateResult addSamples(const CModelAddSamplesParams& params, TTimeDouble2VecSizeTrVec samples); + + //! Advance time by \p gap. + virtual void skipTime(core_t::TTime gap); + + //! Get the most likely value for the time series at \p time. + virtual TDouble2Vec mode(core_t::TTime time, const maths_t::TWeightStyleVec& weightStyles, const TDouble2Vec4Vec& weights) const; + + //! Get the most likely value for each correlate time series + //! at \p time, if there are any. + virtual TDouble2Vec1Vec + correlateModes(core_t::TTime time, const maths_t::TWeightStyleVec& weightStyles, const TDouble2Vec4Vec1Vec& weights) const; + + //! Get the local maxima of the residual distribution. + virtual TDouble2Vec1Vec residualModes(const maths_t::TWeightStyleVec& weightStyles, const TDouble2Vec4Vec& weights) const; + + //! Remove any trend components from \p value. + virtual void detrend(const TTime2Vec1Vec& time, double confidenceInterval, TDouble2Vec1Vec& value) const; + + //! Get the best (least MSE) predicted value at \p time. + virtual TDouble2Vec + predict(core_t::TTime time, const TSizeDoublePr1Vec& correlated = TSizeDoublePr1Vec(), TDouble2Vec hint = TDouble2Vec()) const; + + //! Get the prediction and \p confidenceInterval percentage + //! confidence interval for the time series at \p time. + virtual TDouble2Vec3Vec confidenceInterval(core_t::TTime time, + double confidenceInterval, + const maths_t::TWeightStyleVec& weightStyles, + const TDouble2Vec4Vec& weights) const; + + //! Forecast the time series and get its \p confidenceInterval + //! percentage confidence interval between \p startTime and + //! \p endTime. + virtual bool forecast(core_t::TTime startTime, + core_t::TTime endTime, + double confidenceInterval, + const TDouble2Vec& minimum, + const TDouble2Vec& maximum, + const TForecastPushDatapointFunc& forecastPushDataPointFunc, + std::string& messageOut); + + //! Compute the probability of drawing \p value at \p time. + virtual bool probability(const CModelProbabilityParams& params, + const TTime2Vec1Vec& time, + const TDouble2Vec1Vec& value, + double& probability, + TTail2Vec& tail, + bool& conditional, + TSize1Vec& mostAnomalousCorrelate) const; + + //! Get the Winsorisation weight to apply to \p value. + virtual TDouble2Vec winsorisationWeight(double derate, core_t::TTime time, const TDouble2Vec& value) const; + + //! Get the seasonal variance scale at \p time. + virtual TDouble2Vec seasonalWeight(double confidence, core_t::TTime time) const; + + //! Compute a checksum for this object. + virtual uint64_t checksum(uint64_t seed = 0) const; + + //! Debug the memory used by this object. + virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + + //! Get the memory used by this object. + virtual std::size_t memoryUsage() const; + + //! Initialize reading state from \p traverser. + bool acceptRestoreTraverser(const SModelRestoreParams& params, core::CStateRestoreTraverser& traverser); + + //! Persist by passing information to \p inserter. + virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Get the type of data being modeled. + virtual maths_t::EDataType dataType() const; + + //! \name Helpers + //@{ + //! Unpack the weights in \p weights. + static TDouble4Vec unpack(const TDouble2Vec4Vec& weights); + + //! Reinitialize \p residualModel using the detrended values + //! from \p slidingWindow. + static void reinitializeResidualModel(double learnRate, + const TDecompositionPtr& trend, + const TTimeDoublePrCBuf& slidingWindow, + CPrior& residualModel); + //@} + + //! \name Test Functions + //@{ + //! Get the sliding window of recent values. + const TTimeDoublePrCBuf& slidingWindow() const; + + //! Get the trend. + const CTimeSeriesDecompositionInterface& trendModel() const; + + //! Get the residual model. + const CPrior& residualModel(void) const; + //@} + +private: + using TSizeVec = std::vector; + using TDouble1Vec = core::CSmallVector; + using TDouble1VecVec = std::vector; + using TDouble2Vec4VecVec = std::vector; + using TVector = CVectorNx1; + using TVectorMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; + using TDecayRateController2AryPtr = boost::shared_ptr; + using TPriorPtr = boost::shared_ptr; + using TAnomalyModelPtr = boost::shared_ptr; + using TMultivariatePriorCPtrSizePr = std::pair; + using TMultivariatePriorCPtrSizePr1Vec = core::CSmallVector; + using TModelCPtr1Vec = core::CSmallVector; + using TChangeDetectorPtr = boost::shared_ptr; + +private: + CUnivariateTimeSeriesModel(const CUnivariateTimeSeriesModel& other, std::size_t id, bool isForForecast = false); + + //! Test for and apply any change we find. + EUpdateResult testAndApplyChange(const CModelAddSamplesParams& params, const TSizeVec& order, const TTimeDouble2VecSizeTrVec& samples); + + //! Apply \p change to this model. + EUpdateResult applyChange(const SChangeDescription& change); + + //! Update the trend with \p samples. + EUpdateResult updateTrend(const maths_t::TWeightStyleVec& trendStyles, + const TTimeDouble2VecSizeTrVec& samples, + const TDouble2Vec4VecVec& trendWeights); + + //! Compute the prediction errors for \p sample. + void appendPredictionErrors(double interval, double sample, TDouble1VecVec (&result)[2]); + + //! Reinitialize state after detecting a new component of the trend + //! decomposition. + void reinitializeStateGivenNewComponent(void); + + //! Get the models for the correlations and the models of the correlated + //! time series. + bool correlationModels(TSize1Vec& correlated, + TSize2Vec1Vec& variables, + TMultivariatePriorCPtrSizePr1Vec& correlationDistributionModels, + TModelCPtr1Vec& correlatedTimeSeriesModels) const; + +private: + //! A unique identifier for this model. + std::size_t m_Id; + + //! True if the data are non-negative. + bool m_IsNonNegative; + + //! True if the model can be forecast. + bool m_IsForecastable; + + //! A random number generator for sampling the sliding window. + CPRNG::CXorOShiro128Plus m_Rng; + + //! These control the trend and residual model decay rates (see + //! CDecayRateController for more details). + TDecayRateController2AryPtr m_Controllers; + + //! The time series trend decomposition. + TDecompositionPtr m_TrendModel; + + //! The time series' residual model. + TPriorPtr m_ResidualModel; + + //! A model for time periods when the basic model can't predict the + //! value of the time series. + TAnomalyModelPtr m_AnomalyModel; + + //! The last "normal" time and median value. + TTimeDoublePr m_CandidateChangePoint; + + //! If the time series appears to be undergoing change, the contiguous + //! interval of unpredictable values. + core_t::TTime m_CurrentChangeInterval; + + //! Used to test for changes in the time series. + TChangeDetectorPtr m_ChangeDetector; + + //! A sliding window of the recent samples (used to reinitialize the + //! residual model when a new trend component is detected). + TTimeDoublePrCBuf m_SlidingWindow; + + //! Models the correlations between time series. + CTimeSeriesCorrelations* m_Correlations; }; //! \brief Manages the creation correlate models. -class MATHS_EXPORT CTimeSeriesCorrelateModelAllocator -{ - public: - using TMultivariatePriorPtr = boost::shared_ptr; +class MATHS_EXPORT CTimeSeriesCorrelateModelAllocator { +public: + using TMultivariatePriorPtr = boost::shared_ptr; - public: - virtual ~CTimeSeriesCorrelateModelAllocator() = default; +public: + virtual ~CTimeSeriesCorrelateModelAllocator() = default; - //! Check if we can still allocate any correlations. - virtual bool areAllocationsAllowed() const = 0; + //! Check if we can still allocate any correlations. + virtual bool areAllocationsAllowed() const = 0; - //! Check if \p correlations exceeds the memory limit. - virtual bool exceedsLimit(std::size_t correlations) const = 0; + //! Check if \p correlations exceeds the memory limit. + virtual bool exceedsLimit(std::size_t correlations) const = 0; - //! Get the maximum number of correlations we should model. - virtual std::size_t maxNumberCorrelations() const = 0; + //! Get the maximum number of correlations we should model. + virtual std::size_t maxNumberCorrelations() const = 0; - //! Get the chunk size in which to allocate correlations. - virtual std::size_t chunkSize() const = 0; + //! Get the chunk size in which to allocate correlations. + virtual std::size_t chunkSize() const = 0; - //! Create a new prior for a correlation model. - virtual TMultivariatePriorPtr newPrior() const = 0; + //! Create a new prior for a correlation model. + virtual TMultivariatePriorPtr newPrior() const = 0; }; //! \brief A model of the top k correlates. @@ -347,384 +323,358 @@ class MATHS_EXPORT CTimeSeriesCorrelateModelAllocator //! The user of this class simply needs to pass it to CUnivariateTimeSeriesModel on //! construction and manage the calls to update it after a batch of samples has been //! added and to refresh it before a batch of samples is added to the individual models. -class MATHS_EXPORT CTimeSeriesCorrelations -{ - public: - using TTime1Vec = core::CSmallVector; - using TDouble1Vec = core::CSmallVector; - using TDouble2Vec = core::CSmallVector; - using TDouble4Vec = core::CSmallVector; - using TDouble4Vec1Vec = core::CSmallVector; - using TSize1Vec = core::CSmallVector; - using TSizeSize1VecUMap = boost::unordered_map; - using TSize2Vec = core::CSmallVector; - using TSize2Vec1Vec = core::CSmallVector; - using TSizeSizePr = std::pair; - using TMultivariatePriorPtr = boost::shared_ptr; - using TMultivariatePriorPtrDoublePr = std::pair; - using TSizeSizePrMultivariatePriorPtrDoublePrUMap = boost::unordered_map; - using TMultivariatePriorCPtrSizePr = std::pair; - using TMultivariatePriorCPtrSizePr1Vec = core::CSmallVector; - - //! \brief Wraps up the sampled data for a feature. - struct MATHS_EXPORT SSampleData - { - //! The data type. - maths_t::EDataType s_Type; - //! The times of the samples. - TTime1Vec s_Times; - //! The detrended samples. - TDouble1Vec s_Samples; - //! The tags for each sample. - TSize1Vec s_Tags; - //! The sample weights. - TDouble4Vec1Vec s_Weights; - //! The interval by which to age the correlation model. - double s_Interval; - //! The decay rate multiplier. - double s_Multiplier; - }; - - using TSizeSampleDataUMap = boost::unordered_map; - - public: - CTimeSeriesCorrelations(double minimumSignificantCorrelation, double decayRate); - const CTimeSeriesCorrelations &operator=(const CTimeSeriesCorrelations&) = delete; - - //! Create a copy of this model passing ownership to the caller. - CTimeSeriesCorrelations *clone() const; - - //! Create a copy of the state we need to persist passing ownership - //! to the caller. - CTimeSeriesCorrelations *cloneForPersistence() const; - - //! Process all samples added from individual time series models. - //! - //! \note This should be called exactly once after every univariate - //! time series model has added its samples. - void processSamples(const maths_t::TWeightStyleVec &weightStyles); - - //! Refresh the models to account for any changes to the correlation - //! estimates. - //! - //! \note This should be called exactly once before every univariate - //! time series model adds its samples. - void refresh(const CTimeSeriesCorrelateModelAllocator &allocator); - - //! Get the correlation joint distribution models. - const TSizeSizePrMultivariatePriorPtrDoublePrUMap &correlationModels() const; - - //! Debug the memory used by this object. - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - - //! Get the memory used by this object. - std::size_t memoryUsage() const; - - //! Initialize reading state from \p traverser. - bool acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser); - - //! Persist by passing information to \p inserter. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - - private: - using TTimeDouble2VecSizeTr = core::CTriple; - using TTimeDouble2VecSizeTrVec = std::vector; - using TModelCPtrVec = std::vector; - using TModelCPtr1Vec = core::CSmallVector; - using TSizeSizePrMultivariatePriorPtrDoublePrPr = - std::pair; - - private: - CTimeSeriesCorrelations(const CTimeSeriesCorrelations &other, - bool isForPersistence = false); - - //! Restore the correlation distribution models reading state from - //! \p traverser. - bool restoreCorrelationModels(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser); - - //! Persist the correlation distribution models passing information - //! to \p inserter. - void persistCorrelationModels(core::CStatePersistInserter &inserter) const; - - //! Restore the \p model reading state from \p traverser. - static bool restore(const SDistributionRestoreParams ¶ms, - TSizeSizePrMultivariatePriorPtrDoublePrPr &model, - core::CStateRestoreTraverser &traverser); - - //! Persist the \p model passing information to \p inserter. - static void persist(const TSizeSizePrMultivariatePriorPtrDoublePrPr &model, - core::CStatePersistInserter &inserter); - - //! Add the time series identified by \p id. - void addTimeSeries(std::size_t id, const CUnivariateTimeSeriesModel &model); - - //! Remove the correlates of \p id. - void removeTimeSeries(std::size_t id); - - //! Add a sample for the time series identified by \p id. - void addSamples(std::size_t id, - const CModelAddSamplesParams ¶ms, - const TTimeDouble2VecSizeTrVec &samples, - double multiplier); - - //! Get the ids of the time series correlated with \p id. - TSize1Vec correlated(std::size_t id) const; - - //! Get the correlation models and the correlated time series models - //! for for \p id. - bool correlationModels(std::size_t id, - TSize1Vec &correlated, - TSize2Vec1Vec &variables, - TMultivariatePriorCPtrSizePr1Vec &correlationDistributionModels, - TModelCPtr1Vec &correlatedTimeSeriesModels) const; - - //! Refresh the mapping from time series identifier to correlate - //! identifiers. - void refreshLookup(); - - private: - //! The minimum significant Pearson correlation. - double m_MinimumSignificantCorrelation; - - //! Filled in with the sample data if we are modeling correlates. - TSizeSampleDataUMap m_SampleData; - - //! Estimates the Pearson correlations of the k-most correlated - //! time series. - CKMostCorrelated m_Correlations; - - //! A lookup by time series identifier for correlated time series. - TSizeSize1VecUMap m_CorrelatedLookup; - - //! Models of the joint distribution (of the residuals) of the pairs - //! of time series which have significant correlation. - TSizeSizePrMultivariatePriorPtrDoublePrUMap m_CorrelationDistributionModels; - - //! A collection of univariate time series models for which this is - //! modeling correlations (indexed by their identifier). - TModelCPtrVec m_TimeSeriesModels; - - friend class CUnivariateTimeSeriesModel; +class MATHS_EXPORT CTimeSeriesCorrelations { +public: + using TTime1Vec = core::CSmallVector; + using TDouble1Vec = core::CSmallVector; + using TDouble2Vec = core::CSmallVector; + using TDouble4Vec = core::CSmallVector; + using TDouble4Vec1Vec = core::CSmallVector; + using TSize1Vec = core::CSmallVector; + using TSizeSize1VecUMap = boost::unordered_map; + using TSize2Vec = core::CSmallVector; + using TSize2Vec1Vec = core::CSmallVector; + using TSizeSizePr = std::pair; + using TMultivariatePriorPtr = boost::shared_ptr; + using TMultivariatePriorPtrDoublePr = std::pair; + using TSizeSizePrMultivariatePriorPtrDoublePrUMap = boost::unordered_map; + using TMultivariatePriorCPtrSizePr = std::pair; + using TMultivariatePriorCPtrSizePr1Vec = core::CSmallVector; + + //! \brief Wraps up the sampled data for a feature. + struct MATHS_EXPORT SSampleData { + //! The data type. + maths_t::EDataType s_Type; + //! The times of the samples. + TTime1Vec s_Times; + //! The detrended samples. + TDouble1Vec s_Samples; + //! The tags for each sample. + TSize1Vec s_Tags; + //! The sample weights. + TDouble4Vec1Vec s_Weights; + //! The interval by which to age the correlation model. + double s_Interval; + //! The decay rate multiplier. + double s_Multiplier; + }; + + using TSizeSampleDataUMap = boost::unordered_map; + +public: + CTimeSeriesCorrelations(double minimumSignificantCorrelation, double decayRate); + const CTimeSeriesCorrelations& operator=(const CTimeSeriesCorrelations&) = delete; + + //! Create a copy of this model passing ownership to the caller. + CTimeSeriesCorrelations* clone() const; + + //! Create a copy of the state we need to persist passing ownership + //! to the caller. + CTimeSeriesCorrelations* cloneForPersistence() const; + + //! Process all samples added from individual time series models. + //! + //! \note This should be called exactly once after every univariate + //! time series model has added its samples. + void processSamples(const maths_t::TWeightStyleVec& weightStyles); + + //! Refresh the models to account for any changes to the correlation + //! estimates. + //! + //! \note This should be called exactly once before every univariate + //! time series model adds its samples. + void refresh(const CTimeSeriesCorrelateModelAllocator& allocator); + + //! Get the correlation joint distribution models. + const TSizeSizePrMultivariatePriorPtrDoublePrUMap& correlationModels() const; + + //! Debug the memory used by this object. + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + + //! Get the memory used by this object. + std::size_t memoryUsage() const; + + //! Initialize reading state from \p traverser. + bool acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser); + + //! Persist by passing information to \p inserter. + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + +private: + using TTimeDouble2VecSizeTr = core::CTriple; + using TTimeDouble2VecSizeTrVec = std::vector; + using TModelCPtrVec = std::vector; + using TModelCPtr1Vec = core::CSmallVector; + using TSizeSizePrMultivariatePriorPtrDoublePrPr = std::pair; + +private: + CTimeSeriesCorrelations(const CTimeSeriesCorrelations& other, bool isForPersistence = false); + + //! Restore the correlation distribution models reading state from + //! \p traverser. + bool restoreCorrelationModels(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser); + + //! Persist the correlation distribution models passing information + //! to \p inserter. + void persistCorrelationModels(core::CStatePersistInserter& inserter) const; + + //! Restore the \p model reading state from \p traverser. + static bool restore(const SDistributionRestoreParams& params, + TSizeSizePrMultivariatePriorPtrDoublePrPr& model, + core::CStateRestoreTraverser& traverser); + + //! Persist the \p model passing information to \p inserter. + static void persist(const TSizeSizePrMultivariatePriorPtrDoublePrPr& model, core::CStatePersistInserter& inserter); + + //! Add the time series identified by \p id. + void addTimeSeries(std::size_t id, const CUnivariateTimeSeriesModel& model); + + //! Remove the correlates of \p id. + void removeTimeSeries(std::size_t id); + + //! Add a sample for the time series identified by \p id. + void addSamples(std::size_t id, const CModelAddSamplesParams& params, const TTimeDouble2VecSizeTrVec& samples, double multiplier); + + //! Get the ids of the time series correlated with \p id. + TSize1Vec correlated(std::size_t id) const; + + //! Get the correlation models and the correlated time series models + //! for for \p id. + bool correlationModels(std::size_t id, + TSize1Vec& correlated, + TSize2Vec1Vec& variables, + TMultivariatePriorCPtrSizePr1Vec& correlationDistributionModels, + TModelCPtr1Vec& correlatedTimeSeriesModels) const; + + //! Refresh the mapping from time series identifier to correlate + //! identifiers. + void refreshLookup(); + +private: + //! The minimum significant Pearson correlation. + double m_MinimumSignificantCorrelation; + + //! Filled in with the sample data if we are modeling correlates. + TSizeSampleDataUMap m_SampleData; + + //! Estimates the Pearson correlations of the k-most correlated + //! time series. + CKMostCorrelated m_Correlations; + + //! A lookup by time series identifier for correlated time series. + TSizeSize1VecUMap m_CorrelatedLookup; + + //! Models of the joint distribution (of the residuals) of the pairs + //! of time series which have significant correlation. + TSizeSizePrMultivariatePriorPtrDoublePrUMap m_CorrelationDistributionModels; + + //! A collection of univariate time series models for which this is + //! modeling correlations (indexed by their identifier). + TModelCPtrVec m_TimeSeriesModels; + + friend class CUnivariateTimeSeriesModel; }; //! \brief A CModel implementation for modeling a multivariate time series. -class MATHS_EXPORT CMultivariateTimeSeriesModel : public CModel -{ - public: - using TDouble10Vec = core::CSmallVector; - using TDouble10Vec4Vec = core::CSmallVector; - using TTimeDouble2VecPr = std::pair; - using TTimeDouble2VecPrCBuf = boost::circular_buffer; - using TDecompositionPtr = boost::shared_ptr; - using TDecompositionPtr10Vec = core::CSmallVector; - using TDecayRateController2Ary = boost::array; - - public: - //! \param[in] params The model parameters. - //! \param[in] trendModel The time series trend decomposition. - //! \param[in] residualModel The prior for the time series residual model. - //! \param[in] controllers Optional decay rate controllers for the trend - //! and residual model. - //! \param[in] modelAnomalies If true we use a separate model to capture - //! the characteristics of anomalous time periods. - CMultivariateTimeSeriesModel(const CModelParams ¶ms, - const CTimeSeriesDecompositionInterface &trendModel, - const CMultivariatePrior &residualModel, - const TDecayRateController2Ary *controllers = 0, - bool modelAnomalies = true); - CMultivariateTimeSeriesModel(const CMultivariateTimeSeriesModel &other); - CMultivariateTimeSeriesModel(const SModelRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser); - - //! Returns 0 since these models don't need a unique identifier. - virtual std::size_t identifier() const; - - //! Create a copy of this model passing ownership to the caller. - virtual CMultivariateTimeSeriesModel *clone(std::size_t id) const; - - //! Create a copy of the state we need to persist passing ownership - //! to the caller. - virtual CMultivariateTimeSeriesModel *cloneForPersistence() const; - - //! Create a copy of the state we need to run forecasting. - virtual CMultivariateTimeSeriesModel *cloneForForecast() const; - - //! Returns false (not currently supported for multivariate features). - virtual bool isForecastPossible() const; - - //! No-op. - virtual void modelCorrelations(CTimeSeriesCorrelations &model); - - //! Returns empty. - virtual TSize2Vec1Vec correlates() const; - - //! Update the model with the bucket \p value. - virtual void addBucketValue(const TTimeDouble2VecSizeTrVec &value); - - //! Update the model with new samples. - virtual EUpdateResult addSamples(const CModelAddSamplesParams ¶ms, - TTimeDouble2VecSizeTrVec samples); - - //! Advance time by \p gap. - virtual void skipTime(core_t::TTime gap); - - //! Get the most likely value for the time series at \p time. - virtual TDouble2Vec mode(core_t::TTime time, - const maths_t::TWeightStyleVec &weightStyles, - const TDouble2Vec4Vec &weights) const; - - //! Returns empty. - virtual TDouble2Vec1Vec correlateModes(core_t::TTime time, - const maths_t::TWeightStyleVec &weightStyles, - const TDouble2Vec4Vec1Vec &weights) const; - - //! Get the local maxima of the residual distribution. - virtual TDouble2Vec1Vec residualModes(const maths_t::TWeightStyleVec &weightStyles, - const TDouble2Vec4Vec &weights) const; - - //! Remove any trend components from \p value. - virtual void detrend(const TTime2Vec1Vec &time, - double confidenceInterval, - TDouble2Vec1Vec &value) const; - - //! Get the best (least MSE) predicted value at \p time. - virtual TDouble2Vec predict(core_t::TTime time, - const TSizeDoublePr1Vec &correlated = TSizeDoublePr1Vec(), - TDouble2Vec hint = TDouble2Vec()) const; - - //! Get the prediction and \p confidenceInterval percentage - //! confidence interval for the time series at \p time. - virtual TDouble2Vec3Vec confidenceInterval(core_t::TTime time, - double confidenceInterval, - const maths_t::TWeightStyleVec &weightStyles, - const TDouble2Vec4Vec &weights) const; - - //! Not currently supported. - virtual bool forecast(core_t::TTime startTime, - core_t::TTime endTime, - double confidenceInterval, - const TDouble2Vec &minimum, - const TDouble2Vec &maximum, - const TForecastPushDatapointFunc &forecastPushDataPointFunc, - std::string &messageOut); - - //! Compute the probability of drawing \p value at \p time. - virtual bool probability(const CModelProbabilityParams ¶ms, - const TTime2Vec1Vec &time, - const TDouble2Vec1Vec &value, - double &probability, - TTail2Vec &tail, - bool &conditional, - TSize1Vec &mostAnomalousCorrelate) const; - - //! Get the Winsorisation weight to apply to \p value. - virtual TDouble2Vec winsorisationWeight(double derate, - core_t::TTime time, - const TDouble2Vec &value) const; - - //! Get the seasonal variance scale at \p time. - virtual TDouble2Vec seasonalWeight(double confidence, core_t::TTime time) const; - - //! Compute a checksum for this object. - virtual uint64_t checksum(uint64_t seed = 0) const; - - //! Debug the memory used by this object. - virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - - //! Get the memory used by this object. - virtual std::size_t memoryUsage() const; - - //! Initialize reading state from \p traverser. - bool acceptRestoreTraverser(const SModelRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser); - - //! Persist by passing information to \p inserter. - virtual void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - - //! Get the type of data being modeled. - virtual maths_t::EDataType dataType() const; - - //! \name Helpers - //@{ - //! Unpack the weights in \p weights. - static TDouble10Vec4Vec unpack(const TDouble2Vec4Vec &weights); - - //! Reinitialize \p residualModel using the detrended values - //! from \p slidingWindow. - static void reinitializeResidualModel(double learnRate, - const TDecompositionPtr10Vec &trend, - const TTimeDouble2VecPrCBuf &slidingWindow, - CMultivariatePrior &residualModel); - //@} - - //! \name Test Functions - //@{ - //! Get the sliding window of recent values. - const TTimeDouble2VecPrCBuf &slidingWindow() const; - - //! Get the trend. - const TDecompositionPtr10Vec &trendModel() const; - - //! Get the residual model. - const CMultivariatePrior &residualModel() const; - //@} - - private: - using TDouble1Vec = core::CSmallVector; - using TDouble1VecVec = std::vector; - using TDouble2Vec4VecVec = std::vector; - using TVector = CVectorNx1; - using TVectorMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; - using TDecayRateController2AryPtr = boost::shared_ptr; - using TMultivariatePriorPtr = boost::shared_ptr; - using TAnomalyModelPtr = boost::shared_ptr; - - private: - //! Update the trend with \p samples. - EUpdateResult updateTrend(const maths_t::TWeightStyleVec &trendStyles, - const TTimeDouble2VecSizeTrVec &samples, - const TDouble2Vec4VecVec &trendWeights); - - //! Compute the prediction errors for \p sample. - void appendPredictionErrors(double interval, - const TDouble2Vec &sample, - TDouble1VecVec (&result)[2]); - - //! Reinitialize state after detecting a new component of the trend - //! decomposition. - void reinitializeStateGivenNewComponent(void); - - //! Get the model dimension. - std::size_t dimension() const; - - private: - //! True if the data are non-negative. - bool m_IsNonNegative; - - //! A random number generator for sampling the sliding window. - CPRNG::CXorOShiro128Plus m_Rng; - - //! These control the trend and residual model decay rates (see - //! CDecayRateController for more details). - TDecayRateController2AryPtr m_Controllers; - - //! The time series trend decomposition. - TDecompositionPtr10Vec m_TrendModel; - - //! The time series residual model. - TMultivariatePriorPtr m_ResidualModel; - - //! A model for time periods when the basic model can't predict the - //! value of the time series. - TAnomalyModelPtr m_AnomalyModel; - - //! A sliding window of the recent samples (used to reinitialize the - //! residual model when a new trend component is detected). - TTimeDouble2VecPrCBuf m_SlidingWindow; -}; +class MATHS_EXPORT CMultivariateTimeSeriesModel : public CModel { +public: + using TDouble10Vec = core::CSmallVector; + using TDouble10Vec4Vec = core::CSmallVector; + using TTimeDouble2VecPr = std::pair; + using TTimeDouble2VecPrCBuf = boost::circular_buffer; + using TDecompositionPtr = boost::shared_ptr; + using TDecompositionPtr10Vec = core::CSmallVector; + using TDecayRateController2Ary = boost::array; + +public: + //! \param[in] params The model parameters. + //! \param[in] trendModel The time series trend decomposition. + //! \param[in] residualModel The prior for the time series residual model. + //! \param[in] controllers Optional decay rate controllers for the trend + //! and residual model. + //! \param[in] modelAnomalies If true we use a separate model to capture + //! the characteristics of anomalous time periods. + CMultivariateTimeSeriesModel(const CModelParams& params, + const CTimeSeriesDecompositionInterface& trendModel, + const CMultivariatePrior& residualModel, + const TDecayRateController2Ary* controllers = 0, + bool modelAnomalies = true); + CMultivariateTimeSeriesModel(const CMultivariateTimeSeriesModel& other); + CMultivariateTimeSeriesModel(const SModelRestoreParams& params, core::CStateRestoreTraverser& traverser); + + //! Returns 0 since these models don't need a unique identifier. + virtual std::size_t identifier() const; + + //! Create a copy of this model passing ownership to the caller. + virtual CMultivariateTimeSeriesModel* clone(std::size_t id) const; + + //! Create a copy of the state we need to persist passing ownership + //! to the caller. + virtual CMultivariateTimeSeriesModel* cloneForPersistence() const; + + //! Create a copy of the state we need to run forecasting. + virtual CMultivariateTimeSeriesModel* cloneForForecast() const; + + //! Returns false (not currently supported for multivariate features). + virtual bool isForecastPossible() const; + + //! No-op. + virtual void modelCorrelations(CTimeSeriesCorrelations& model); + + //! Returns empty. + virtual TSize2Vec1Vec correlates() const; + + //! Update the model with the bucket \p value. + virtual void addBucketValue(const TTimeDouble2VecSizeTrVec& value); + + //! Update the model with new samples. + virtual EUpdateResult addSamples(const CModelAddSamplesParams& params, TTimeDouble2VecSizeTrVec samples); + + //! Advance time by \p gap. + virtual void skipTime(core_t::TTime gap); + + //! Get the most likely value for the time series at \p time. + virtual TDouble2Vec mode(core_t::TTime time, const maths_t::TWeightStyleVec& weightStyles, const TDouble2Vec4Vec& weights) const; + + //! Returns empty. + virtual TDouble2Vec1Vec + correlateModes(core_t::TTime time, const maths_t::TWeightStyleVec& weightStyles, const TDouble2Vec4Vec1Vec& weights) const; + + //! Get the local maxima of the residual distribution. + virtual TDouble2Vec1Vec residualModes(const maths_t::TWeightStyleVec& weightStyles, const TDouble2Vec4Vec& weights) const; + + //! Remove any trend components from \p value. + virtual void detrend(const TTime2Vec1Vec& time, double confidenceInterval, TDouble2Vec1Vec& value) const; + + //! Get the best (least MSE) predicted value at \p time. + virtual TDouble2Vec + predict(core_t::TTime time, const TSizeDoublePr1Vec& correlated = TSizeDoublePr1Vec(), TDouble2Vec hint = TDouble2Vec()) const; + + //! Get the prediction and \p confidenceInterval percentage + //! confidence interval for the time series at \p time. + virtual TDouble2Vec3Vec confidenceInterval(core_t::TTime time, + double confidenceInterval, + const maths_t::TWeightStyleVec& weightStyles, + const TDouble2Vec4Vec& weights) const; + + //! Not currently supported. + virtual bool forecast(core_t::TTime startTime, + core_t::TTime endTime, + double confidenceInterval, + const TDouble2Vec& minimum, + const TDouble2Vec& maximum, + const TForecastPushDatapointFunc& forecastPushDataPointFunc, + std::string& messageOut); + + //! Compute the probability of drawing \p value at \p time. + virtual bool probability(const CModelProbabilityParams& params, + const TTime2Vec1Vec& time, + const TDouble2Vec1Vec& value, + double& probability, + TTail2Vec& tail, + bool& conditional, + TSize1Vec& mostAnomalousCorrelate) const; + + //! Get the Winsorisation weight to apply to \p value. + virtual TDouble2Vec winsorisationWeight(double derate, core_t::TTime time, const TDouble2Vec& value) const; + + //! Get the seasonal variance scale at \p time. + virtual TDouble2Vec seasonalWeight(double confidence, core_t::TTime time) const; + + //! Compute a checksum for this object. + virtual uint64_t checksum(uint64_t seed = 0) const; + + //! Debug the memory used by this object. + virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + + //! Get the memory used by this object. + virtual std::size_t memoryUsage() const; + + //! Initialize reading state from \p traverser. + bool acceptRestoreTraverser(const SModelRestoreParams& params, core::CStateRestoreTraverser& traverser); + + //! Persist by passing information to \p inserter. + virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Get the type of data being modeled. + virtual maths_t::EDataType dataType() const; + + //! \name Helpers + //@{ + //! Unpack the weights in \p weights. + static TDouble10Vec4Vec unpack(const TDouble2Vec4Vec& weights); + + //! Reinitialize \p residualModel using the detrended values + //! from \p slidingWindow. + static void reinitializeResidualModel(double learnRate, + const TDecompositionPtr10Vec& trend, + const TTimeDouble2VecPrCBuf& slidingWindow, + CMultivariatePrior& residualModel); + //@} + //! \name Test Functions + //@{ + //! Get the sliding window of recent values. + const TTimeDouble2VecPrCBuf& slidingWindow() const; + + //! Get the trend. + const TDecompositionPtr10Vec& trendModel() const; + + //! Get the residual model. + const CMultivariatePrior& residualModel() const; + //@} + +private: + using TDouble1Vec = core::CSmallVector; + using TDouble1VecVec = std::vector; + using TDouble2Vec4VecVec = std::vector; + using TVector = CVectorNx1; + using TVectorMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; + using TDecayRateController2AryPtr = boost::shared_ptr; + using TMultivariatePriorPtr = boost::shared_ptr; + using TAnomalyModelPtr = boost::shared_ptr; + +private: + //! Update the trend with \p samples. + EUpdateResult updateTrend(const maths_t::TWeightStyleVec& trendStyles, + const TTimeDouble2VecSizeTrVec& samples, + const TDouble2Vec4VecVec& trendWeights); + + //! Compute the prediction errors for \p sample. + void appendPredictionErrors(double interval, const TDouble2Vec& sample, TDouble1VecVec (&result)[2]); + + //! Reinitialize state after detecting a new component of the trend + //! decomposition. + void reinitializeStateGivenNewComponent(void); + + //! Get the model dimension. + std::size_t dimension() const; + +private: + //! True if the data are non-negative. + bool m_IsNonNegative; + + //! A random number generator for sampling the sliding window. + CPRNG::CXorOShiro128Plus m_Rng; + + //! These control the trend and residual model decay rates (see + //! CDecayRateController for more details). + TDecayRateController2AryPtr m_Controllers; + + //! The time series trend decomposition. + TDecompositionPtr10Vec m_TrendModel; + + //! The time series residual model. + TMultivariatePriorPtr m_ResidualModel; + + //! A model for time periods when the basic model can't predict the + //! value of the time series. + TAnomalyModelPtr m_AnomalyModel; + + //! A sliding window of the recent samples (used to reinitialize the + //! residual model when a new trend component is detected). + TTimeDouble2VecPrCBuf m_SlidingWindow; +}; } } diff --git a/include/maths/CTools.h b/include/maths/CTools.h index ebd00c4c0a..ce6d6be505 100644 --- a/include/maths/CTools.h +++ b/include/maths/CTools.h @@ -7,9 +7,9 @@ #ifndef INCLUDED_ml_maths_CTools_h #define INCLUDED_ml_maths_CTools_h -#include #include #include +#include #include #include @@ -25,12 +25,11 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { class CLogTDistribution; -template class CMixtureDistribution; +template +class CMixtureDistribution; //! \brief A collection of utility functionality. //! @@ -45,718 +44,643 @@ template class CMixtureDistribution; //! all member functions should be static and it should be state-less. //! If your functionality doesn't fit this pattern just make it a nested //! class. -class MATHS_EXPORT CTools : private core::CNonInstantiatable -{ +class MATHS_EXPORT CTools : private core::CNonInstantiatable { +public: + BOOST_MATH_DECLARE_DISTRIBUTIONS(double, boost::math::policies::policy<>) + using TDoubleDoublePr = std::pair; + using TDoubleDoublePrVec = std::vector; + using TDoubleVec = std::vector; + + //! The c.d.f. value for all x for an improper distribution. + static const double IMPROPER_CDF; + + //! \brief A tag for an improper distribution, which is 0 everywhere. + struct SImproperDistribution {}; + + //! \brief Computes minus the log of the c.d.f. of a specified sample + //! of an R.V. for various distributions. + struct MATHS_EXPORT SMinusLogCdf { + double operator()(const SImproperDistribution&, double x) const; + double operator()(const normal& normal_, double x) const; + double operator()(const students_t& students, double x) const; + double operator()(const negative_binomial& negativeBinomial, double x) const; + double operator()(const lognormal& logNormal, double x) const; + double operator()(const CLogTDistribution& logt, double x) const; + double operator()(const gamma& gamma_, double x) const; + double operator()(const beta& beta_, double x) const; + }; + + //! \brief Computes minus the log of the 1 - c.d.f. of a specified + //! sample of an R.V. for various distributions using full double + //! precision, i.e. these do not lose precision when the result is + //! close to 1 and the smallest value is the minimum double rather + //! than epsilon. + struct MATHS_EXPORT SMinusLogCdfComplement { + double operator()(const SImproperDistribution&, double) const; + double operator()(const normal& normal_, double x) const; + double operator()(const students_t& students, double x) const; + double operator()(const negative_binomial& negativeBinomial, double x) const; + double operator()(const lognormal& logNormal, double x) const; + double operator()(const CLogTDistribution& logt, double x) const; + double operator()(const gamma& gamma_, double x) const; + double operator()(const beta& beta_, double x) const; + }; + + //! \brief Computes the probability of seeing a more extreme sample + //! of an R.V. for various distributions. + //! + //! The one sided below calculation computes the probability of the set: + //!
+    //!   \f$\{y\ |\ y \leq x\}\f$
+    //! 
+ //! + //! and normalizes the result so that it equals one at the distribution + //! median. + //! + //! The two sided calculation computes the probability of the set: + //!
+    //!   \f$\{y\ |\ f(y) \leq f(x)\}\f$
+    //! 
+ //! + //! where,\n + //! \f$f(.)\f$ is the p.d.f. of the random variable. + //! + //! The one sided above calculation computes the probability of the set: + //!
+    //!   \f$\{y\ |\ y \geq x\}\f$
+    //! 
+ //! + //! and normalizes the result so that it equals one at the distribution + //! median. + class MATHS_EXPORT CProbabilityOfLessLikelySample { public: - BOOST_MATH_DECLARE_DISTRIBUTIONS(double, boost::math::policies::policy<>) - using TDoubleDoublePr = std::pair; - using TDoubleDoublePrVec = std::vector; - using TDoubleVec = std::vector; - - //! The c.d.f. value for all x for an improper distribution. - static const double IMPROPER_CDF; - - //! \brief A tag for an improper distribution, which is 0 everywhere. - struct SImproperDistribution {}; - - //! \brief Computes minus the log of the c.d.f. of a specified sample - //! of an R.V. for various distributions. - struct MATHS_EXPORT SMinusLogCdf - { - double operator()(const SImproperDistribution &, double x) const; - double operator()(const normal &normal_, double x) const; - double operator()(const students_t &students, double x) const; - double operator()(const negative_binomial &negativeBinomial, double x) const; - double operator()(const lognormal &logNormal, double x) const; - double operator()(const CLogTDistribution &logt, double x) const; - double operator()(const gamma &gamma_, double x) const; - double operator()(const beta &beta_, double x) const; - }; + CProbabilityOfLessLikelySample(maths_t::EProbabilityCalculation calculation); - //! \brief Computes minus the log of the 1 - c.d.f. of a specified - //! sample of an R.V. for various distributions using full double - //! precision, i.e. these do not lose precision when the result is - //! close to 1 and the smallest value is the minimum double rather - //! than epsilon. - struct MATHS_EXPORT SMinusLogCdfComplement - { - double operator()(const SImproperDistribution &, double) const; - double operator()(const normal &normal_, double x) const; - double operator()(const students_t &students, double x) const; - double operator()(const negative_binomial &negativeBinomial, double x) const; - double operator()(const lognormal &logNormal, double x) const; - double operator()(const CLogTDistribution &logt, double x) const; - double operator()(const gamma &gamma_, double x) const; - double operator()(const beta &beta_, double x) const; - }; + double operator()(const SImproperDistribution&, double, maths_t::ETail& tail) const; + double operator()(const normal& normal_, double x, maths_t::ETail& tail) const; + double operator()(const students_t& students, double x, maths_t::ETail& tail) const; + double operator()(const negative_binomial& negativeBinomial, double x, maths_t::ETail& tail) const; + double operator()(const lognormal& logNormal, double x, maths_t::ETail& tail) const; + double operator()(const CLogTDistribution& logt, double x, maths_t::ETail& tail) const; + double operator()(const gamma& gamma_, double x, maths_t::ETail& tail) const; + double operator()(const beta& beta_, double x, maths_t::ETail& tail) const; + + private: + //! Check the value is supported. + bool check(const TDoubleDoublePr& support, double x, double& px, maths_t::ETail& tail) const; + + //! Update the tail. + void tail(double x, double mode, maths_t::ETail& tail) const; - //! \brief Computes the probability of seeing a more extreme sample - //! of an R.V. for various distributions. + //! The style of calculation which, i.e. one or two tail. + maths_t::EProbabilityCalculation m_Calculation; + }; + + //! \brief Computes the probability of seeing a more extreme sample + //! from a mixture model. + //! + //! \sa CProbabilityOfLessLikelySample + class MATHS_EXPORT CMixtureProbabilityOfLessLikelySample { + public: + //! Computes the value of the smooth kernel of an integral + //! which approximates the probability of less likely samples. //! - //! The one sided below calculation computes the probability of the set: + //! In particular, we write the integral as //!
-        //!   \f$\{y\ |\ y \leq x\}\f$
+        //!   \f$P(\{s : f(s) < f(x)\}) = \int{I(f(s) < f(x)) f(s)}ds\f$
         //! 
//! - //! and normalizes the result so that it equals one at the distribution - //! median. - //! - //! The two sided calculation computes the probability of the set: + //! and approximate the indicator function as //!
-        //!   \f$\{y\ |\ f(y) \leq f(x)\}\f$
+        //!   \f$\displaystyle I(f(s) < f(x)) \approx (1+e^{-k}) \frac{e^{-k(f(s)/f(x)-1)}}{1+e^{-k(f(s)/f(x)-1)}}\f$
         //! 
//! - //! where,\n - //! \f$f(.)\f$ is the p.d.f. of the random variable. - //! - //! The one sided above calculation computes the probability of the set: - //!
-        //!   \f$\{y\ |\ y \geq x\}\f$
-        //! 
- //! - //! and normalizes the result so that it equals one at the distribution - //! median. - class MATHS_EXPORT CProbabilityOfLessLikelySample - { - public: - CProbabilityOfLessLikelySample(maths_t::EProbabilityCalculation calculation); - - double operator()(const SImproperDistribution &, double, maths_t::ETail &tail) const; - double operator()(const normal &normal_, double x, maths_t::ETail &tail) const; - double operator()(const students_t &students, double x, maths_t::ETail &tail) const; - double operator()(const negative_binomial &negativeBinomial, double x, maths_t::ETail &tail) const; - double operator()(const lognormal &logNormal, double x, maths_t::ETail &tail) const; - double operator()(const CLogTDistribution &logt, double x, maths_t::ETail &tail) const; - double operator()(const gamma &gamma_, double x, maths_t::ETail &tail) const; - double operator()(const beta &beta_, double x, maths_t::ETail &tail) const; - - private: - //! Check the value is supported. - bool check(const TDoubleDoublePr &support, - double x, - double &px, - maths_t::ETail &tail) const; - - //! Update the tail. - void tail(double x, - double mode, - maths_t::ETail &tail) const; - - //! The style of calculation which, i.e. one or two tail. - maths_t::EProbabilityCalculation m_Calculation; + //! Note that the larger the value of \f$k\f$ the better the + //! approximation. Note also that this computes the scaled + //! kernel, i.e. \f$k'(s) = k(s)/f(x)\f$ so the output must + //! be scaled by \f$f(x)\f$ to recover the true probability. + template + class CSmoothedKernel : private core::CNonCopyable { + public: + CSmoothedKernel(LOGF logf, double logF0, double k); + + void k(double k); + bool operator()(double x, double& result) const; + + private: + LOGF m_LogF; + double m_LogF0; + double m_K; + double m_Scale; }; - //! \brief Computes the probability of seeing a more extreme sample - //! from a mixture model. + public: + //! \param[in] n The number of modes. + //! \param[in] x The sample. + //! \param[in] logFx The log of the p.d.f. at the sample. + //! \param[in] a The left end of the interval to integrate. + //! \param[in] b The left end of the interval to integrate. + CMixtureProbabilityOfLessLikelySample(std::size_t n, double x, double logFx, double a, double b); + + //! Reinitialize the object for computing the the probability + //! of \f$\{y : f(y) <= f(x)\}\f$. //! - //! \sa CProbabilityOfLessLikelySample - class MATHS_EXPORT CMixtureProbabilityOfLessLikelySample - { - public: - //! Computes the value of the smooth kernel of an integral - //! which approximates the probability of less likely samples. - //! - //! In particular, we write the integral as - //!
-                //!   \f$P(\{s : f(s) < f(x)\}) = \int{I(f(s) < f(x)) f(s)}ds\f$
-                //! 
- //! - //! and approximate the indicator function as - //!
-                //!   \f$\displaystyle I(f(s) < f(x)) \approx (1+e^{-k}) \frac{e^{-k(f(s)/f(x)-1)}}{1+e^{-k(f(s)/f(x)-1)}}\f$
-                //! 
- //! - //! Note that the larger the value of \f$k\f$ the better the - //! approximation. Note also that this computes the scaled - //! kernel, i.e. \f$k'(s) = k(s)/f(x)\f$ so the output must - //! be scaled by \f$f(x)\f$ to recover the true probability. - template - class CSmoothedKernel : private core::CNonCopyable - { - public: - CSmoothedKernel(LOGF logf, double logF0, double k); - - void k(double k); - bool operator()(double x, double &result) const; - - private: - LOGF m_LogF; - double m_LogF0; - double m_K; - double m_Scale; - }; - - public: - //! \param[in] n The number of modes. - //! \param[in] x The sample. - //! \param[in] logFx The log of the p.d.f. at the sample. - //! \param[in] a The left end of the interval to integrate. - //! \param[in] b The left end of the interval to integrate. - CMixtureProbabilityOfLessLikelySample(std::size_t n, - double x, - double logFx, - double a, - double b); - - //! Reinitialize the object for computing the the probability - //! of \f$\{y : f(y) <= f(x)\}\f$. - //! - //! \param[in] x The sample. - //! \param[in] logFx The log of the p.d.f. at the sample. - void reinitialize(double x, double logFx); - - //! Add a mode of the distribution with mean \p mean and - //! standard deviation \p sd with normalized weight \p weight. - //! - //! \param[in] weight The mode weight, i.e. the proportion of - //! samples in the mode. - //! \param[in] modeMean The mode mean. - //! \param[in] modeSd The mode standard deviation. - void addMode(double weight, double modeMean, double modeSd); - - //! Find the left tail argument with the same p.d.f. value as - //! the sample. - //! - //! \param[in] logf The function which computes the log of the - //! mixture p.d.f. - //! \param[in] iterations The number of maximum number of - //! evaluations of the logf function. - //! \param[in] equal The function to test if two argument values - //! are equal. - //! \param[out] result Filled in with the argument with the same - //! p.d.f. value as the sample in the left tail. - //! - //! \tparam LOGF The type of the function (object) which computes - //! the log of the mixture p.d.f. It is expected to have a function - //! like signature double (double). - template - bool leftTail(const LOGF &logf, - std::size_t iterations, - const EQUAL &equal, - double &result) const; - - //! Find the right tail argument with the same p.d.f. value - //! as the sample. - //! - //! \param[in] logf The function which computes the log of the - //! mixture p.d.f. - //! \param[in] iterations The number of maximum number of - //! evaluations of the logf function. - //! \param[in] equal The function to test if two argument values - //! are equal. - //! \param[out] result Filled in with the argument with the same - //! p.d.f. value as the sample in the right tail. - //! - //! \tparam LOGF The type of the function (object) which computes - //! the log of the mixture p.d.f. It is expected to have a function - //! like signature double (double). - template - bool rightTail(const LOGF &logf, - std::size_t iterations, - const EQUAL &equal, - double &result) const; - - //! Compute the probability of a less likely sample. - //! - //! \param[in] logf The function which computes the log of the - //! mixture p.d.f. - //! \param[in] pTails The probability in the distribution tails, - //! which can be found from the c.d.f., and is not account for - //! by the integration. - //! - //! \tparam LOGF The type of the function (object) which computes - //! the log of the mixture p.d.f. It is expected to have a function - //! like signature bool (double, double &) where the first argument - //! is the p.d.f. argument and the second argument is filled in - //! with the log p.d.f. at the first argument. - template - double calculate(const LOGF &logf, double pTails); - - private: - using TMaxAccumulator = CBasicStatistics::SMax::TAccumulator; - - private: - static const double LOG_ROOT_TWO_PI; - - private: - //! Compute the seed integration intervals. - void intervals(TDoubleDoublePrVec &intervals); - - private: - //! The sample. - double m_X; - //! The log p.d.f. of the sample for which to compute the - //! probability. - double m_LogFx; - //! The integration interval [a, b]. - double m_A, m_B; - //! Filled in with the end points of the seed intervals for - //! adaptive quadrature. - TDoubleVec m_Endpoints; - //! The maximum deviation of the sample from any mode. - TMaxAccumulator m_MaxDeviation; - }; + //! \param[in] x The sample. + //! \param[in] logFx The log of the p.d.f. at the sample. + void reinitialize(double x, double logFx); - //! \brief Computes the expectation conditioned on a particular interval. + //! Add a mode of the distribution with mean \p mean and + //! standard deviation \p sd with normalized weight \p weight. //! - //! DESCRIPTION:\n - //! Computes the expectation of various R.V.s on the condition that the - //! variable is in a specified interval. In particular, this is the - //! quantity: - //!
-        //!   \f$E[ X 1{[a,b]} ] / E[ 1{a,b]} ]\f$
-        //! 
- struct MATHS_EXPORT SIntervalExpectation - { - double operator()(const normal &normal_, double a, double b) const; - double operator()(const lognormal &logNormal, double a, double b) const; - double operator()(const gamma &gamma_, double a, double b) const; - }; - - //! The smallest value of probability we permit. + //! \param[in] weight The mode weight, i.e. the proportion of + //! samples in the mode. + //! \param[in] modeMean The mode mean. + //! \param[in] modeSd The mode standard deviation. + void addMode(double weight, double modeMean, double modeSd); + + //! Find the left tail argument with the same p.d.f. value as + //! the sample. //! - //! This is used to stop calculations under/overflowing if we - //! allow the probability to be zero (for example). - static double smallestProbability(); - - //! \name Safe Probability Density Function - //! Unfortunately, boost::math::pdf and boost::math::cdf don't - //! handle values outside of the distribution support very well. - //! By default they throw and if you suppress this behaviour - //! they return 0.0 for the cdf! This wraps up the pdf and cdf - //! calls and does the appropriate checking. The functions are - //! extended to the whole real line in the usual way by treating - //! them as continuous. - //@{ - static double safePdf(const normal &normal_, double x); - static double safePdf(const students_t &students, double x); - static double safePdf(const poisson &poisson_, double x); - static double safePdf(const negative_binomial &negativeBinomial, double x); - static double safePdf(const lognormal &logNormal, double x); - static double safePdf(const gamma &gamma_, double x); - static double safePdf(const beta &beta_, double x); - static double safePdf(const binomial &binomial_, double x); - static double safePdf(const chi_squared &chi2, double x); - //@} - - //! \name Safe Cumulative Density Function - //! Wrappers around the boost::math::cdf functions which extend - //! them to the whole real line. - //! \see safePdf for details. - //@{ - static double safeCdf(const normal &normal_, double x); - static double safeCdf(const students_t &students, double x); - static double safeCdf(const poisson &poisson_, double x); - static double safeCdf(const negative_binomial &negativeBinomial, double x); - static double safeCdf(const lognormal &logNormal, double x); - static double safeCdf(const gamma &gamma_, double x); - static double safeCdf(const beta &beta_, double x); - static double safeCdf(const binomial &binomial_, double x); - static double safeCdf(const chi_squared &chi2, double x); - //@} - - //! \name Safe Cumulative Density Function Complement - //! Wrappers around the boost::math::cdf functions for complement - //! distributions which extend them to the whole real line. - //! \see safePdf for details. - //@{ - static double safeCdfComplement(const normal &normal_, double x); - static double safeCdfComplement(const students_t &students, double x); - static double safeCdfComplement(const poisson &poisson_, double x); - static double safeCdfComplement(const negative_binomial &negativeBinomial, double x); - static double safeCdfComplement(const lognormal &logNormal, double x); - static double safeCdfComplement(const gamma &gamma_, double x); - static double safeCdfComplement(const beta &beta_, double x); - static double safeCdfComplement(const binomial &binomial_, double x); - static double safeCdfComplement(const chi_squared &chi2, double x); - //@} - - //! Compute the anomalousness from the probability of seeing a - //! more extreme event for a distribution, i.e. for a sample - //! \f$x\f$ from a R.V. the probability \f$P(R)\f$ of the set: - //!
-        //!   \f$ R = \{y\ |\ f(y) \leq f(x)\} \f$
-        //! 
- //! where,\n - //! \f$f(.)\f$ is the p.d.f. of the random variable.\n\n - //! This is a monotonically decreasing function of \f$P(R)\f$ and - //! is chosen so that for \f$P(R)\f$ near one it is zero and as - //! \f$P(R) \rightarrow 0\f$ it saturates at 100. - static double anomalyScore(double p); - - //! The inverse of the anomalyScore function. - static double inverseAnomalyScore(double deviation); - - //! \name Differential Entropy - //! Compute the differential entropy of the specified distribution.\n\n - //! The differential entropy of an R.V. is defined as: - //!
-        //!   \f$ -E[\log(f(x))] \f$
-        //! 
- //! where,\n - //! \f$f(x)\f$ is the probability density function.\n\n - //! This computes the differential entropy in units of "nats", - //! i.e. the logarithm is the natural logarithm. - //@{ - static double differentialEntropy(const poisson &poisson_); - static double differentialEntropy(const normal &normal_); - static double differentialEntropy(const lognormal &logNormal); - static double differentialEntropy(const gamma &gamma_); - template - class CDifferentialEntropyKernel - { - public: - CDifferentialEntropyKernel(const CMixtureDistribution &mixture) : - m_Mixture(&mixture) - {} - - inline bool operator()(double x, double &result) const - { - double fx = pdf(*m_Mixture, x); - result = fx == 0.0 ? 0.0 : -fx * std::log(fx); - return true; - } - - private: - const CMixtureDistribution *m_Mixture; - }; - template - static double differentialEntropy(const CMixtureDistribution &mixture); - //@} - - //! Check if \p log will underflow the smallest positive value of T. + //! \param[in] logf The function which computes the log of the + //! mixture p.d.f. + //! \param[in] iterations The number of maximum number of + //! evaluations of the logf function. + //! \param[in] equal The function to test if two argument values + //! are equal. + //! \param[out] result Filled in with the argument with the same + //! p.d.f. value as the sample in the left tail. //! - //! \tparam T must be a floating point type. - template - static bool logWillUnderflow(T log) - { - static const T LOG_DENORM_MIN = std::log(std::numeric_limits::min()); - return log < LOG_DENORM_MIN; - } - - //! \name Fast Log - private: - //! The precision to use for fastLog, which gives good runtime - //! accuracy tradeoff. - static const int FAST_LOG_PRECISION = 14; - - //! Shift used to index the lookup table in fastLog. - static const std::size_t FAST_LOG_SHIFT = 52 - FAST_LOG_PRECISION; + //! \tparam LOGF The type of the function (object) which computes + //! the log of the mixture p.d.f. It is expected to have a function + //! like signature double (double). + template + bool leftTail(const LOGF& logf, std::size_t iterations, const EQUAL& equal, double& result) const; + + //! Find the right tail argument with the same p.d.f. value + //! as the sample. + //! + //! \param[in] logf The function which computes the log of the + //! mixture p.d.f. + //! \param[in] iterations The number of maximum number of + //! evaluations of the logf function. + //! \param[in] equal The function to test if two argument values + //! are equal. + //! \param[out] result Filled in with the argument with the same + //! p.d.f. value as the sample in the right tail. + //! + //! \tparam LOGF The type of the function (object) which computes + //! the log of the mixture p.d.f. It is expected to have a function + //! like signature double (double). + template + bool rightTail(const LOGF& logf, std::size_t iterations, const EQUAL& equal, double& result) const; - //! \brief Creates a lookup table for log2(x) with specified - //! accuracy. + //! Compute the probability of a less likely sample. //! - //! DESCRIPTION:\n - //! This implements a singleton lookup table for all values - //! of log base 2 of x for the mantissa of x in the range - //! [0, 2^52-1]. The specified accuracy, \p N, determines the - //! size of the lookup table, and values are equally spaced, - //! i.e. the separation is 2^52 / 2^N. This is used by fastLog - //! to read off the log base 2 to the specified precision. + //! \param[in] logf The function which computes the log of the + //! mixture p.d.f. + //! \param[in] pTails The probability in the distribution tails, + //! which can be found from the c.d.f., and is not account for + //! by the integration. //! - //! This is taken from the approach given in - //! http://www.icsi.berkeley.edu/pubs/techreports/TR-07-002.pdf - template - class CLookupTableForFastLog - { - public: - static const std::size_t BINS = 1 << BITS; - - public: - using TArray = boost::array; - - public: - //! Builds the table. - CLookupTableForFastLog() - { - // Notes: - // 1) The shift is the maximum mantissa / BINS. - // 2) The sign bit is set to 0 which is positive. - // 3) The exponent is set to 1022, which is 0 in two's - // complement. - // 4) This implementation is endian neutral because it - // is constructing a look up from the mantissa value - // (interpreted as an integer) to the corresponding - // double value and fastLog uses the same approach - // to extract the mantissa. - uint64_t dx = 0x10000000000000ull / BINS; - core::CIEEE754::SDoubleRep x; - x.s_Sign = 0; - x.s_Mantissa = (dx / 2) & core::CIEEE754::IEEE754_MANTISSA_MASK; - x.s_Exponent = 1022; - for (std::size_t i = 0u; i < BINS; ++i, - x.s_Mantissa = (x.s_Mantissa + dx) & core::CIEEE754::IEEE754_MANTISSA_MASK) - { - double value; - static_assert(sizeof(double) == sizeof(core::CIEEE754::SDoubleRep), - "SDoubleRep definition unsuitable for memcpy to double"); - // Use memcpy() rather than union to adhere to strict - // aliasing rules - std::memcpy(&value, &x, sizeof(double)); - m_Table[i] = std::log2(value); - } - } + //! \tparam LOGF The type of the function (object) which computes + //! the log of the mixture p.d.f. It is expected to have a function + //! like signature bool (double, double &) where the first argument + //! is the p.d.f. argument and the second argument is filled in + //! with the log p.d.f. at the first argument. + template + double calculate(const LOGF& logf, double pTails); - //! Lookup log2 for a given mantissa. - const double &operator[](uint64_t mantissa) const - { - return m_Table[mantissa >> FAST_LOG_SHIFT]; - } + private: + using TMaxAccumulator = CBasicStatistics::SMax::TAccumulator; - private: - //! The quantized log base 2 for the mantissa range. - TArray m_Table; - }; + private: + static const double LOG_ROOT_TWO_PI; - //! The table used for computing fast log. - static const CLookupTableForFastLog FAST_LOG_TABLE; + private: + //! Compute the seed integration intervals. + void intervals(TDoubleDoublePrVec& intervals); + private: + //! The sample. + double m_X; + //! The log p.d.f. of the sample for which to compute the + //! probability. + double m_LogFx; + //! The integration interval [a, b]. + double m_A, m_B; + //! Filled in with the end points of the seed intervals for + //! adaptive quadrature. + TDoubleVec m_Endpoints; + //! The maximum deviation of the sample from any mode. + TMaxAccumulator m_MaxDeviation; + }; + + //! \brief Computes the expectation conditioned on a particular interval. + //! + //! DESCRIPTION:\n + //! Computes the expectation of various R.V.s on the condition that the + //! variable is in a specified interval. In particular, this is the + //! quantity: + //!
+    //!   \f$E[ X 1{[a,b]} ] / E[ 1{a,b]} ]\f$
+    //! 
+ struct MATHS_EXPORT SIntervalExpectation { + double operator()(const normal& normal_, double a, double b) const; + double operator()(const lognormal& logNormal, double a, double b) const; + double operator()(const gamma& gamma_, double a, double b) const; + }; + + //! The smallest value of probability we permit. + //! + //! This is used to stop calculations under/overflowing if we + //! allow the probability to be zero (for example). + static double smallestProbability(); + + //! \name Safe Probability Density Function + //! Unfortunately, boost::math::pdf and boost::math::cdf don't + //! handle values outside of the distribution support very well. + //! By default they throw and if you suppress this behaviour + //! they return 0.0 for the cdf! This wraps up the pdf and cdf + //! calls and does the appropriate checking. The functions are + //! extended to the whole real line in the usual way by treating + //! them as continuous. + //@{ + static double safePdf(const normal& normal_, double x); + static double safePdf(const students_t& students, double x); + static double safePdf(const poisson& poisson_, double x); + static double safePdf(const negative_binomial& negativeBinomial, double x); + static double safePdf(const lognormal& logNormal, double x); + static double safePdf(const gamma& gamma_, double x); + static double safePdf(const beta& beta_, double x); + static double safePdf(const binomial& binomial_, double x); + static double safePdf(const chi_squared& chi2, double x); + //@} + + //! \name Safe Cumulative Density Function + //! Wrappers around the boost::math::cdf functions which extend + //! them to the whole real line. + //! \see safePdf for details. + //@{ + static double safeCdf(const normal& normal_, double x); + static double safeCdf(const students_t& students, double x); + static double safeCdf(const poisson& poisson_, double x); + static double safeCdf(const negative_binomial& negativeBinomial, double x); + static double safeCdf(const lognormal& logNormal, double x); + static double safeCdf(const gamma& gamma_, double x); + static double safeCdf(const beta& beta_, double x); + static double safeCdf(const binomial& binomial_, double x); + static double safeCdf(const chi_squared& chi2, double x); + //@} + + //! \name Safe Cumulative Density Function Complement + //! Wrappers around the boost::math::cdf functions for complement + //! distributions which extend them to the whole real line. + //! \see safePdf for details. + //@{ + static double safeCdfComplement(const normal& normal_, double x); + static double safeCdfComplement(const students_t& students, double x); + static double safeCdfComplement(const poisson& poisson_, double x); + static double safeCdfComplement(const negative_binomial& negativeBinomial, double x); + static double safeCdfComplement(const lognormal& logNormal, double x); + static double safeCdfComplement(const gamma& gamma_, double x); + static double safeCdfComplement(const beta& beta_, double x); + static double safeCdfComplement(const binomial& binomial_, double x); + static double safeCdfComplement(const chi_squared& chi2, double x); + //@} + + //! Compute the anomalousness from the probability of seeing a + //! more extreme event for a distribution, i.e. for a sample + //! \f$x\f$ from a R.V. the probability \f$P(R)\f$ of the set: + //!
+    //!   \f$ R = \{y\ |\ f(y) \leq f(x)\} \f$
+    //! 
+ //! where,\n + //! \f$f(.)\f$ is the p.d.f. of the random variable.\n\n + //! This is a monotonically decreasing function of \f$P(R)\f$ and + //! is chosen so that for \f$P(R)\f$ near one it is zero and as + //! \f$P(R) \rightarrow 0\f$ it saturates at 100. + static double anomalyScore(double p); + + //! The inverse of the anomalyScore function. + static double inverseAnomalyScore(double deviation); + + //! \name Differential Entropy + //! Compute the differential entropy of the specified distribution.\n\n + //! The differential entropy of an R.V. is defined as: + //!
+    //!   \f$ -E[\log(f(x))] \f$
+    //! 
+ //! where,\n + //! \f$f(x)\f$ is the probability density function.\n\n + //! This computes the differential entropy in units of "nats", + //! i.e. the logarithm is the natural logarithm. + //@{ + static double differentialEntropy(const poisson& poisson_); + static double differentialEntropy(const normal& normal_); + static double differentialEntropy(const lognormal& logNormal); + static double differentialEntropy(const gamma& gamma_); + template + class CDifferentialEntropyKernel { public: - //! Approximate implementation of log(\p x), which is accurate - //! to FAST_LOG_PRECISION bits of precision. - //! - //! \param[in] x The value for which to compute the natural log. - //! \note This is taken from the approach given in - //! http://www.icsi.berkeley.edu/pubs/techreports/TR-07-002.pdf - static double fastLog(double x) - { - uint64_t mantissa; - int log2; - core::CIEEE754::decompose(x, mantissa, log2); - return 0.693147180559945 * (FAST_LOG_TABLE[mantissa] + log2); - } - //@} + CDifferentialEntropyKernel(const CMixtureDistribution& mixture) : m_Mixture(&mixture) {} - private: - //! Get the location of the point \p x. - template - static double location(T x) - { - return x; - } - //! Set \p x to \p y. - template - static void setLocation(T &x, double y) - { - x = static_cast(y); - } - //! Get a writable location of the point \p x. - template - static double location(const typename CBasicStatistics::SSampleMean::TAccumulator &x) - { - return CBasicStatistics::mean(x); - } - //! Set the mean of \p x to \p y. - template - static void setLocation(typename CBasicStatistics::SSampleMean::TAccumulator &x, double y) - { - x.s_Moments[0] = static_cast(y); + inline bool operator()(double x, double& result) const { + double fx = pdf(*m_Mixture, x); + result = fx == 0.0 ? 0.0 : -fx * std::log(fx); + return true; } - //! \brief Utility class to represent points which are adjacent - //! in the spreading algorithm. - class MATHS_EXPORT CGroup - { - public: - using TMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; - - public: - //! Create a new points group. - template - CGroup(std::size_t index, const T &points) : - m_A(index), - m_B(index), - m_Centre() - { - m_Centre.add(location(points[index])); - } - - //! Merge this group and \p other group. - void merge(const CGroup &other, - double separation, - double min, - double max); - - //! Check if this group and \p other group overlap. - bool overlap(const CGroup &other, - double separation) const; - - //! Update the locations of the points in this group based - //! on its centre position. - template - bool spread(double separation, T &points) const - { - if (m_A == m_B) - { - return false; - } - bool result = false; - double x = this->leftEndpoint(separation); - for (std::size_t i = m_A; i <= m_B; ++i, x += separation) - { - if (location(points[i]) != x) - { - setLocation(points[i], x); - result = true; - } - } - return result; - } - - private: - //! Get the position of the left end point of this group. - double leftEndpoint(double separation) const; + private: + const CMixtureDistribution* m_Mixture; + }; + template + static double differentialEntropy(const CMixtureDistribution& mixture); + //@} + + //! Check if \p log will underflow the smallest positive value of T. + //! + //! \tparam T must be a floating point type. + template + static bool logWillUnderflow(T log) { + static const T LOG_DENORM_MIN = std::log(std::numeric_limits::min()); + return log < LOG_DENORM_MIN; + } + + //! \name Fast Log +private: + //! The precision to use for fastLog, which gives good runtime + //! accuracy tradeoff. + static const int FAST_LOG_PRECISION = 14; + + //! Shift used to index the lookup table in fastLog. + static const std::size_t FAST_LOG_SHIFT = 52 - FAST_LOG_PRECISION; + + //! \brief Creates a lookup table for log2(x) with specified + //! accuracy. + //! + //! DESCRIPTION:\n + //! This implements a singleton lookup table for all values + //! of log base 2 of x for the mantissa of x in the range + //! [0, 2^52-1]. The specified accuracy, \p N, determines the + //! size of the lookup table, and values are equally spaced, + //! i.e. the separation is 2^52 / 2^N. This is used by fastLog + //! to read off the log base 2 to the specified precision. + //! + //! This is taken from the approach given in + //! http://www.icsi.berkeley.edu/pubs/techreports/TR-07-002.pdf + template + class CLookupTableForFastLog { + public: + static const std::size_t BINS = 1 << BITS; - //! Get the position of the right end point of this group. - double rightEndpoint(double separation) const; + public: + using TArray = boost::array; - std::size_t m_A; - std::size_t m_B; - TMeanAccumulator m_Centre; - }; + public: + //! Builds the table. + CLookupTableForFastLog() { + // Notes: + // 1) The shift is the maximum mantissa / BINS. + // 2) The sign bit is set to 0 which is positive. + // 3) The exponent is set to 1022, which is 0 in two's + // complement. + // 4) This implementation is endian neutral because it + // is constructing a look up from the mantissa value + // (interpreted as an integer) to the corresponding + // double value and fastLog uses the same approach + // to extract the mantissa. + uint64_t dx = 0x10000000000000ull / BINS; + core::CIEEE754::SDoubleRep x; + x.s_Sign = 0; + x.s_Mantissa = (dx / 2) & core::CIEEE754::IEEE754_MANTISSA_MASK; + x.s_Exponent = 1022; + for (std::size_t i = 0u; i < BINS; ++i, x.s_Mantissa = (x.s_Mantissa + dx) & core::CIEEE754::IEEE754_MANTISSA_MASK) { + double value; + static_assert(sizeof(double) == sizeof(core::CIEEE754::SDoubleRep), + "SDoubleRep definition unsuitable for memcpy to double"); + // Use memcpy() rather than union to adhere to strict + // aliasing rules + std::memcpy(&value, &x, sizeof(double)); + m_Table[i] = std::log2(value); + } + } - //! \brief Orders two points by their position. - class CPointLess - { - public: - template - bool operator()(const T &lhs, const T &rhs) const - { - return location(lhs) < location(rhs); - } - }; + //! Lookup log2 for a given mantissa. + const double& operator[](uint64_t mantissa) const { return m_Table[mantissa >> FAST_LOG_SHIFT]; } + private: + //! The quantized log base 2 for the mantissa range. + TArray m_Table; + }; + + //! The table used for computing fast log. + static const CLookupTableForFastLog FAST_LOG_TABLE; + +public: + //! Approximate implementation of log(\p x), which is accurate + //! to FAST_LOG_PRECISION bits of precision. + //! + //! \param[in] x The value for which to compute the natural log. + //! \note This is taken from the approach given in + //! http://www.icsi.berkeley.edu/pubs/techreports/TR-07-002.pdf + static double fastLog(double x) { + uint64_t mantissa; + int log2; + core::CIEEE754::decompose(x, mantissa, log2); + return 0.693147180559945 * (FAST_LOG_TABLE[mantissa] + log2); + } + //@} + +private: + //! Get the location of the point \p x. + template + static double location(T x) { + return x; + } + //! Set \p x to \p y. + template + static void setLocation(T& x, double y) { + x = static_cast(y); + } + //! Get a writable location of the point \p x. + template + static double location(const typename CBasicStatistics::SSampleMean::TAccumulator& x) { + return CBasicStatistics::mean(x); + } + //! Set the mean of \p x to \p y. + template + static void setLocation(typename CBasicStatistics::SSampleMean::TAccumulator& x, double y) { + x.s_Moments[0] = static_cast(y); + } + + //! \brief Utility class to represent points which are adjacent + //! in the spreading algorithm. + class MATHS_EXPORT CGroup { public: - //! \brief Ensure the points are at least \p separation apart.\n\n - //! This solves the problem of finding the new positions for the - //! points \f$\{x_i\}\f$ such that there is no pair of points for - //! which \f$\left \|x_j - x_i \right \| < s\f$ where \f$s\f$ - //! denotes the minimum separation \p separation and the total - //! square distance the points move, i.e. - //!
-        //!   \f$ \sum_i{(x_i' - x_i)^2} \f$
-        //! 
- //! is minimized. - //! - //! \param[in] a The left end point of the interval containing - //! the shifted points. - //! \param[in] b The right end point of the interval containing - //! the shifted points. - //! \param[in] separation The minimum permitted separation between - //! points. - //! \param[in,out] points The points to spread. - template - static void spread(double a, double b, double separation, T &points); + using TMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; - //! Compute the sign of \p x and return T(-1) if it is negative and T(1) - //! otherwise. - //! - //! \param[in] x The value for which to check the sign. - //! \note Conversion of 0 and -1 to T should be well defined. - //! \note Zero maps to 1. + public: + //! Create a new points group. template - static T sign(const T &x) - { - return x < T(0) ? T(-1) : T(1); + CGroup(std::size_t index, const T& points) : m_A(index), m_B(index), m_Centre() { + m_Centre.add(location(points[index])); } - //! Truncate \p x to the range [\p a, \p b]. - //! - //! \tparam T Must support operator<. - template - static const T &truncate(const T &x, const T &a, const T &b) - { - return x < a ? a : (b < x ? b : x); - } + //! Merge this group and \p other group. + void merge(const CGroup& other, double separation, double min, double max); - //! Component-wise truncation of stack vectors. - template - static CVectorNx1 truncate(const CVectorNx1 &x, - const CVectorNx1 &a, - const CVectorNx1 &b) - { - CVectorNx1 result(x); - for (std::size_t i = 0u; i < N; ++i) - { - result(i) = truncate(result(i), a(i), b(i)); - } - return result; - } + //! Check if this group and \p other group overlap. + bool overlap(const CGroup& other, double separation) const; - //! Component-wise truncation of heap vectors. + //! Update the locations of the points in this group based + //! on its centre position. template - static CVector truncate(const CVector &x, - const CVector &a, - const CVector &b) - { - CVector result(x); - for (std::size_t i = 0u; i < result.dimension(); ++i) - { - result(i) = truncate(result(i), a(i), b(i)); + bool spread(double separation, T& points) const { + if (m_A == m_B) { + return false; } - return result; - } - - //! Component-wise truncation of small vector. - template - static core::CSmallVector truncate(const core::CSmallVector &x, - const core::CSmallVector &a, - const core::CSmallVector &b) - { - core::CSmallVector result(x); - for (std::size_t i = 0u; i < result.size(); ++i) - { - result[i] = truncate(result[i], a[i], b[i]); + bool result = false; + double x = this->leftEndpoint(separation); + for (std::size_t i = m_A; i <= m_B; ++i, x += separation) { + if (location(points[i]) != x) { + setLocation(points[i], x); + result = true; + } } return result; } - //! Shift \p x to the left by \p eps times \p x. - static double shiftLeft(double x, double eps = std::numeric_limits::epsilon()); + private: + //! Get the position of the left end point of this group. + double leftEndpoint(double separation) const; - //! Shift \p x to the right by \p eps times \p x. - static double shiftRight(double x, double eps = std::numeric_limits::epsilon()); + //! Get the position of the right end point of this group. + double rightEndpoint(double separation) const; - //! Compute \f$x^2\f$. - static double pow2(double x) - { - return x * x; - } + std::size_t m_A; + std::size_t m_B; + TMeanAccumulator m_Centre; + }; - //! Sigmoid function of \p p. - static double sigmoid(double p) - { - return 1.0 / (1.0 + 1.0 / p); + //! \brief Orders two points by their position. + class CPointLess { + public: + template + bool operator()(const T& lhs, const T& rhs) const { + return location(lhs) < location(rhs); } - - //! The logistic function. - //! - //! i.e. \f$sigmoid\left(\frac{sign (x - x0)}{width}\right)\f$. - //! - //! \param[in] x The argument. - //! \param[in] width The step width. - //! \param[in] x0 The centre of the step. - //! \param[in] sign Determines whether it's a step up or down. - static double logisticFunction(double x, double width, double x0 = 0.0, double sign = 1.0) - { - return sigmoid(std::exp(sign / std::fabs(sign) * (x - x0) / width)); + }; + +public: + //! \brief Ensure the points are at least \p separation apart.\n\n + //! This solves the problem of finding the new positions for the + //! points \f$\{x_i\}\f$ such that there is no pair of points for + //! which \f$\left \|x_j - x_i \right \| < s\f$ where \f$s\f$ + //! denotes the minimum separation \p separation and the total + //! square distance the points move, i.e. + //!
+    //!   \f$ \sum_i{(x_i' - x_i)^2} \f$
+    //! 
+ //! is minimized. + //! + //! \param[in] a The left end point of the interval containing + //! the shifted points. + //! \param[in] b The right end point of the interval containing + //! the shifted points. + //! \param[in] separation The minimum permitted separation between + //! points. + //! \param[in,out] points The points to spread. + template + static void spread(double a, double b, double separation, T& points); + + //! Compute the sign of \p x and return T(-1) if it is negative and T(1) + //! otherwise. + //! + //! \param[in] x The value for which to check the sign. + //! \note Conversion of 0 and -1 to T should be well defined. + //! \note Zero maps to 1. + template + static T sign(const T& x) { + return x < T(0) ? T(-1) : T(1); + } + + //! Truncate \p x to the range [\p a, \p b]. + //! + //! \tparam T Must support operator<. + template + static const T& truncate(const T& x, const T& a, const T& b) { + return x < a ? a : (b < x ? b : x); + } + + //! Component-wise truncation of stack vectors. + template + static CVectorNx1 truncate(const CVectorNx1& x, const CVectorNx1& a, const CVectorNx1& b) { + CVectorNx1 result(x); + for (std::size_t i = 0u; i < N; ++i) { + result(i) = truncate(result(i), a(i), b(i)); } - - //! A custom, numerically robust, implementation of \f$(1 - x) ^ p\f$. - //! - //! \note It is assumed that p is integer. - static double powOneMinusX(double x, double p); - - //! A custom, numerically robust, implementation of \f$1 - (1 - x) ^ p\f$. - //! - //! \note It is assumed that p is integer. - static double oneMinusPowOneMinusX(double x, double p); - - //! A custom implementation of \f$\log(1 - x)\f$ which handles the - //! cancellation error for small x. - static double logOneMinusX(double x); + return result; + } + + //! Component-wise truncation of heap vectors. + template + static CVector truncate(const CVector& x, const CVector& a, const CVector& b) { + CVector result(x); + for (std::size_t i = 0u; i < result.dimension(); ++i) { + result(i) = truncate(result(i), a(i), b(i)); + } + return result; + } + + //! Component-wise truncation of small vector. + template + static core::CSmallVector + truncate(const core::CSmallVector& x, const core::CSmallVector& a, const core::CSmallVector& b) { + core::CSmallVector result(x); + for (std::size_t i = 0u; i < result.size(); ++i) { + result[i] = truncate(result[i], a[i], b[i]); + } + return result; + } + + //! Shift \p x to the left by \p eps times \p x. + static double shiftLeft(double x, double eps = std::numeric_limits::epsilon()); + + //! Shift \p x to the right by \p eps times \p x. + static double shiftRight(double x, double eps = std::numeric_limits::epsilon()); + + //! Compute \f$x^2\f$. + static double pow2(double x) { return x * x; } + + //! Sigmoid function of \p p. + static double sigmoid(double p) { return 1.0 / (1.0 + 1.0 / p); } + + //! The logistic function. + //! + //! i.e. \f$sigmoid\left(\frac{sign (x - x0)}{width}\right)\f$. + //! + //! \param[in] x The argument. + //! \param[in] width The step width. + //! \param[in] x0 The centre of the step. + //! \param[in] sign Determines whether it's a step up or down. + static double logisticFunction(double x, double width, double x0 = 0.0, double sign = 1.0) { + return sigmoid(std::exp(sign / std::fabs(sign) * (x - x0) / width)); + } + + //! A custom, numerically robust, implementation of \f$(1 - x) ^ p\f$. + //! + //! \note It is assumed that p is integer. + static double powOneMinusX(double x, double p); + + //! A custom, numerically robust, implementation of \f$1 - (1 - x) ^ p\f$. + //! + //! \note It is assumed that p is integer. + static double oneMinusPowOneMinusX(double x, double p); + + //! A custom implementation of \f$\log(1 - x)\f$ which handles the + //! cancellation error for small x. + static double logOneMinusX(double x); }; - } } diff --git a/include/maths/CToolsDetail.h b/include/maths/CToolsDetail.h index d2efaa2dfb..61024ca2e8 100644 --- a/include/maths/CToolsDetail.h +++ b/include/maths/CToolsDetail.h @@ -21,32 +21,23 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { template -CTools::CMixtureProbabilityOfLessLikelySample::CSmoothedKernel::CSmoothedKernel(LOGF logf, - double logF0, - double k) : - m_LogF(logf), - m_LogF0(logF0), - m_K(k), - m_Scale(std::exp(m_LogF0) * (1.0 + std::exp(-k))) -{} +CTools::CMixtureProbabilityOfLessLikelySample::CSmoothedKernel::CSmoothedKernel(LOGF logf, double logF0, double k) + : m_LogF(logf), m_LogF0(logF0), m_K(k), m_Scale(std::exp(m_LogF0) * (1.0 + std::exp(-k))) { +} template -void CTools::CMixtureProbabilityOfLessLikelySample::CSmoothedKernel::k(double k) -{ +void CTools::CMixtureProbabilityOfLessLikelySample::CSmoothedKernel::k(double k) { double f0 = m_Scale / (1.0 + std::exp(-m_K)); m_K = k; m_Scale = f0 * (1.0 + std::exp(-k)); } template -bool CTools::CMixtureProbabilityOfLessLikelySample::CSmoothedKernel::operator()(double x, double &result) const -{ +bool CTools::CMixtureProbabilityOfLessLikelySample::CSmoothedKernel::operator()(double x, double& result) const { // We use the fact that if: // 1 + exp(-k(f(x)/f0 - 1)) < (1 + eps) * exp(-k(f(x)/f0 - 1)) // @@ -61,19 +52,16 @@ bool CTools::CMixtureProbabilityOfLessLikelySample::CSmoothedKernel::opera result = 0.0; double logFx; - if (!m_LogF(x, logFx)) - { + if (!m_LogF(x, logFx)) { LOG_ERROR("Failed to calculate likelihood at " << x); return false; } logFx -= m_LogF0; - if (m_K * (logFx - 1.0) >= core::constants::LOG_MAX_DOUBLE) - { + if (m_K * (logFx - 1.0) >= core::constants::LOG_MAX_DOUBLE) { return true; } double fx = std::exp(logFx); - if (fx < 1.0 + core::constants::LOG_DOUBLE_EPSILON / m_K) - { + if (fx < 1.0 + core::constants::LOG_DOUBLE_EPSILON / m_K) { result = m_Scale * fx; return true; } @@ -82,128 +70,99 @@ bool CTools::CMixtureProbabilityOfLessLikelySample::CSmoothedKernel::opera } template -bool CTools::CMixtureProbabilityOfLessLikelySample::leftTail(const LOGF &logf, +bool CTools::CMixtureProbabilityOfLessLikelySample::leftTail(const LOGF& logf, std::size_t iterations, - const EQUAL &equal, - double &result) const -{ - if (m_X <= m_A) - { + const EQUAL& equal, + double& result) const { + if (m_X <= m_A) { result = m_X; return true; } CCompositeFunctions::CMinusConstant f(logf, m_LogFx); - try - { + try { double xr = m_A; double fr = f(xr); - if (fr < 0.0) - { + if (fr < 0.0) { result = m_A; return true; } double xl = xr; double fl = fr; - if (m_MaxDeviation.count() > 0) - { + if (m_MaxDeviation.count() > 0) { xl = xr - m_MaxDeviation[0]; fl = f(xl); } iterations = std::max(iterations, std::size_t(4)); std::size_t n = iterations - 2; - if (!CSolvers::leftBracket(xl, xr, fl, fr, f, n)) - { + if (!CSolvers::leftBracket(xl, xr, fl, fr, f, n)) { result = xl; return false; } n = iterations - n; CSolvers::solve(xl, xr, fl, fr, f, n, equal, result); - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to find left root: " << e.what() - << ", a = " << m_A - << ", logf(x) = " << m_LogFx - << ", logf(a) = " << logf(m_A) - << ", max deviation = " << (m_MaxDeviation.count() > 0 ? m_MaxDeviation[0] : 0.0)); + } catch (const std::exception& e) { + LOG_ERROR("Failed to find left root: " << e.what() << ", a = " << m_A << ", logf(x) = " << m_LogFx << ", logf(a) = " << logf(m_A) + << ", max deviation = " << (m_MaxDeviation.count() > 0 ? m_MaxDeviation[0] : 0.0)); return false; } return true; } template -bool CTools::CMixtureProbabilityOfLessLikelySample::rightTail(const LOGF &logf, +bool CTools::CMixtureProbabilityOfLessLikelySample::rightTail(const LOGF& logf, std::size_t iterations, - const EQUAL &equal, - double &result) const -{ - if (m_X >= m_B) - { + const EQUAL& equal, + double& result) const { + if (m_X >= m_B) { result = m_X; return true; } CCompositeFunctions::CMinusConstant f(logf, m_LogFx); - try - { + try { double xl = m_B; double fl = f(xl); - if (fl < 0.0) - { + if (fl < 0.0) { result = m_B; return true; } double xr = xl; double fr = fl; - if (m_MaxDeviation.count() > 0) - { + if (m_MaxDeviation.count() > 0) { xr = xl + m_MaxDeviation[0]; fr = f(xr); } iterations = std::max(iterations, std::size_t(4)); std::size_t n = iterations - 2; - if (!CSolvers::rightBracket(xl, xr, fl, fr, f, n)) - { + if (!CSolvers::rightBracket(xl, xr, fl, fr, f, n)) { result = xr; return false; } n = iterations - n; CSolvers::solve(xl, xr, fl, fr, f, n, equal, result); - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to find right root: " << e.what() - << ",b = " << m_B - << ", logf(x) = " << m_LogFx - << ", logf(b) = " << logf(m_B)); + } catch (const std::exception& e) { + LOG_ERROR("Failed to find right root: " << e.what() << ",b = " << m_B << ", logf(x) = " << m_LogFx << ", logf(b) = " << logf(m_B)); return false; } return true; } template -double CTools::CMixtureProbabilityOfLessLikelySample::calculate(const LOGF &logf, double pTails) -{ +double CTools::CMixtureProbabilityOfLessLikelySample::calculate(const LOGF& logf, double pTails) { TDoubleDoublePrVec intervals; this->intervals(intervals); double p = 0.0; TDoubleVec pIntervals(intervals.size(), 0.0); CSmoothedKernel kernel(logf, m_LogFx, 3.0); - for (std::size_t i = 0u; i < intervals.size(); ++i) - { - if (!CIntegration::gaussLegendre(kernel, - intervals[i].first, - intervals[i].second, - pIntervals[i])) - { - LOG_ERROR("Couldn't integrate kernel over " - << core::CContainerPrinter::print(intervals[i])); + for (std::size_t i = 0u; i < intervals.size(); ++i) { + if (!CIntegration::gaussLegendre(kernel, intervals[i].first, intervals[i].second, pIntervals[i])) { + LOG_ERROR("Couldn't integrate kernel over " << core::CContainerPrinter::print(intervals[i])); } } @@ -220,39 +179,31 @@ double CTools::CMixtureProbabilityOfLessLikelySample::calculate(const LOGF &logf } template -double CTools::differentialEntropy(const CMixtureDistribution &mixture) -{ +double CTools::differentialEntropy(const CMixtureDistribution& mixture) { using TModeVec = typename CMixtureDistribution::TModeVec; static const double EPS = 1e-5; static const std::size_t INTERVALS = 8u; - const TDoubleVec &weights = mixture.weights(); - const TModeVec &modes = mixture.modes(); + const TDoubleVec& weights = mixture.weights(); + const TModeVec& modes = mixture.modes(); - if (weights.empty()) - { + if (weights.empty()) { return 0.0; } TDoubleDoublePrVec range; - for (std::size_t i = 0u; i < modes.size(); ++i) - { - range.push_back(TDoubleDoublePr(quantile(modes[i], EPS), - quantile(modes[i], 1.0 - EPS))); + for (std::size_t i = 0u; i < modes.size(); ++i) { + range.push_back(TDoubleDoublePr(quantile(modes[i], EPS), quantile(modes[i], 1.0 - EPS))); } std::sort(range.begin(), range.end(), COrderings::SFirstLess()); LOG_TRACE("range = " << core::CContainerPrinter::print(range)); std::size_t left = 0u; - for (std::size_t i = 1u; i < range.size(); ++i) - { - if (range[left].second < range[i].first) - { + for (std::size_t i = 1u; i < range.size(); ++i) { + if (range[left].second < range[i].first) { ++left; std::swap(range[left], range[i]); - } - else - { + } else { range[left].second = std::max(range[left].second, range[i].second); } } @@ -262,19 +213,13 @@ double CTools::differentialEntropy(const CMixtureDistribution &mixture) double result = 0.0; CDifferentialEntropyKernel kernel(mixture); - for (std::size_t i = 0u; i < range.size(); ++i) - { + for (std::size_t i = 0u; i < range.size(); ++i) { double a = range[i].first; - double d = (range[i].second - range[i].first) - / static_cast(INTERVALS); + double d = (range[i].second - range[i].first) / static_cast(INTERVALS); - for (std::size_t j = 0u; j < INTERVALS; ++j, a += d) - { + for (std::size_t j = 0u; j < INTERVALS; ++j, a += d) { double integral; - if (CIntegration::gaussLegendre(kernel, - a, a+d, - integral)) - { + if (CIntegration::gaussLegendre(kernel, a, a + d, integral)) { result += integral; } } @@ -285,25 +230,19 @@ double CTools::differentialEntropy(const CMixtureDistribution &mixture) } template -void CTools::spread(double a, double b, double separation, T &points) -{ - if (points.empty()) - { +void CTools::spread(double a, double b, double separation, T& points) { + if (points.empty()) { return; } - if (b <= a) - { + if (b <= a) { LOG_ERROR("Bad interval [" << a << "," << b << "]"); return; } std::size_t n = points.size() - 1; - if (b - a <= separation * static_cast(n + 1)) - { - for (std::size_t i = 0u; i <= n; ++i) - { - setLocation(points[i], a + (b - a) * static_cast(i) - / static_cast(n)); + if (b - a <= separation * static_cast(n + 1)) { + for (std::size_t i = 0u; i <= n; ++i) { + setLocation(points[i], a + (b - a) * static_cast(i) / static_cast(n)); } return; } @@ -322,46 +261,36 @@ void CTools::spread(double a, double b, double separation, T &points) // of traversal through the points we avoid the worst case n // traversals of the points. - for (std::size_t i = 0u; a > 0.0 && i <= n; ++i) - { + for (std::size_t i = 0u; a > 0.0 && i <= n; ++i) { points[i] -= a; } std::sort(points.begin(), points.end(), CPointLess()); bool moved = false; std::size_t iteration = 0u; - do - { + do { moved = false; bool forward = (iteration++ % 2 == 0); LOG_TRACE((forward ? "forward" : "backward")); CGroup last(forward ? 0 : n, points); - for (std::size_t i = 1u; i <= n; ++i) - { + for (std::size_t i = 1u; i <= n; ++i) { CGroup test(forward ? i : n - i, points); - if (last.overlap(test, separation)) - { - last.merge(test, separation, 0.0, b-a); - } - else - { + if (last.overlap(test, separation)) { + last.merge(test, separation, 0.0, b - a); + } else { moved |= last.spread(separation, points); last = test; } } moved |= last.spread(separation, points); - } - while (moved && iteration <= n); + } while (moved && iteration <= n); - for (std::size_t i = 0u; a > 0.0 && i <= n; ++i) - { + for (std::size_t i = 0u; a > 0.0 && i <= n; ++i) { points[i] += a; } - LOG_TRACE("# iterations = " << iteration - << " # points = " << n + 1); + LOG_TRACE("# iterations = " << iteration << " # points = " << n + 1); } - } } diff --git a/include/maths/CTrendComponent.h b/include/maths/CTrendComponent.h index d1bd7aaa4d..d825220c10 100644 --- a/include/maths/CTrendComponent.h +++ b/include/maths/CTrendComponent.h @@ -20,10 +20,8 @@ #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { struct SDistributionRestoreParams; //! \brief Models the trend component of a time series. @@ -43,229 +41,220 @@ struct SDistributionRestoreParams; //! we see w.r.t. the predictions from the next longer time scale component). //! This produces plausible looking and this sort of mean reversion is common //! in many real world time series. -class MATHS_EXPORT CTrendComponent -{ +class MATHS_EXPORT CTrendComponent { +public: + using TDoubleDoublePr = maths_t::TDoubleDoublePr; + using TDoubleDoublePrVec = std::vector; + using TDoubleVec = std::vector; + using TDoubleVecVec = std::vector; + using TDouble3Vec = core::CSmallVector; + using TDouble3VecVec = std::vector; + using TVector = CVectorNx1; + using TVectorVec = std::vector; + using TVectorVecVec = std::vector; + using TMatrix = CSymmetricMatrixNxN; + using TMatrixVec = std::vector; + using TSeasonalForecast = std::function; + using TWriteForecastResult = std::function; + +public: + CTrendComponent(double decayRate); + + //! Efficiently swap the state of this and \p other. + void swap(CTrendComponent& other); + + //! Persist state by passing information to \p inserter. + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Initialize by reading state from \p traverser. + bool acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser); + + //! Check if the trend has been estimated. + bool initialized() const; + + //! Clear all data. + void clear(); + + //! Shift the regression models' time origins to \p time. + void shiftOrigin(core_t::TTime time); + + //! Shift the slope of all regression models' whose decay rate is + //! greater than \p decayRate. + void shiftSlope(double decayRate, double shift); + + //! Apply a level shift of \p value at \p time and \p value. + void shiftLevel(core_t::TTime time, double value, double shift); + + //! Apply no level shift at \p time and \p value. + //! + //! This updates the model for the probability of a level shift. + void dontShiftLevel(core_t::TTime time, double value); + + //! Apply a linear scale by \p scale. + void linearScale(double scale); + + //! Adds a value \f$(t, f(t))\f$ to this component. + //! + //! \param[in] time The time of the point. + //! \param[in] value The value at \p time. + //! \param[in] weight The weight of \p value. The smaller this is the + //! less influence it has on the component. + void add(core_t::TTime time, double value, double weight = 1.0); + + //! Set the data type. + void dataType(maths_t::EDataType dataType); + + //! Get the base rate at which models lose information. + double defaultDecayRate() const; + + //! Set the rate base rate at which models lose information. + void decayRate(double decayRate); + + //! Age the trend to account for \p interval elapsed time. + void propagateForwardsByTime(core_t::TTime interval); + + //! Get the predicted value at \p time. + //! + //! \param[in] time The time of interest. + //! \param[in] confidence The symmetric confidence interval for the variance + //! as a percentage. + TDoubleDoublePr value(core_t::TTime time, double confidence) const; + + //! Get the variance of the residual about the predicted value at \p time. + //! + //! \param[in] confidence The symmetric confidence interval for the + //! variance as a percentage. + TDoubleDoublePr variance(double confidence) const; + + //! Forecast the trend model from \p startTime to \p endTime. + //! + //! \param[in] startTime The start time of the forecast interval. + //! \param[in] endTime The end time of the forecast interval. + //! \param[in] step The time step. + //! \param[in] confidence The confidence interval to calculate. + //! \param[in] seasonal Forecasts seasonal components. + //! \param[in] writer Writes out forecast results. + void forecast(core_t::TTime startTime, + core_t::TTime endTime, + core_t::TTime step, + double confidence, + const TSeasonalForecast& seasonal, + const TWriteForecastResult& writer) const; + + //! Get the interval which has been observed so far. + core_t::TTime observedInterval() const; + + //! Get the number of parameters used to describe the trend. + double parameters() const; + + //! Get a checksum for this object. + uint64_t checksum(uint64_t seed = 0) const; + + //! Get a debug description of this object. + std::string print() const; + +private: + using TRegression = CRegression::CLeastSquaresOnline<2, double>; + using TRegressionArray = TRegression::TArray; + using TRegressionArrayVec = std::vector; + using TMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; + using TMeanAccumulatorVec = std::vector; + using TMeanVarAccumulator = CBasicStatistics::SSampleMeanVar::TAccumulator; + + //! \brief A model of the trend at a specific time scale. + struct SModel { + explicit SModel(double weight); + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); + uint64_t checksum(uint64_t seed) const; + TMeanAccumulator s_Weight; + TRegression s_Regression; + TMeanVarAccumulator s_ResidualMoments; + }; + using TModelVec = std::vector; + + //! \brief Forecasts the level model by path roll out. + class CForecastLevel : private core::CNonCopyable { public: - using TDoubleDoublePr = maths_t::TDoubleDoublePr; - using TDoubleDoublePrVec = std::vector; - using TDoubleVec = std::vector; - using TDoubleVecVec = std::vector; - using TDouble3Vec = core::CSmallVector; - using TDouble3VecVec = std::vector; - using TVector = CVectorNx1; - using TVectorVec = std::vector; - using TVectorVecVec = std::vector; - using TMatrix = CSymmetricMatrixNxN; - using TMatrixVec = std::vector; - using TSeasonalForecast = std::function; - using TWriteForecastResult = std::function; + //! The default number of roll out paths to use. + static const std::size_t DEFAULT_NUMBER_PATHS{100u}; public: - CTrendComponent(double decayRate); - - //! Efficiently swap the state of this and \p other. - void swap(CTrendComponent &other); - - //! Persist state by passing information to \p inserter. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - - //! Initialize by reading state from \p traverser. - bool acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser); - - //! Check if the trend has been estimated. - bool initialized() const; - - //! Clear all data. - void clear(); - - //! Shift the regression models' time origins to \p time. - void shiftOrigin(core_t::TTime time); - - //! Shift the slope of all regression models' whose decay rate is - //! greater than \p decayRate. - void shiftSlope(double decayRate, double shift); - - //! Apply a level shift of \p value at \p time and \p value. - void shiftLevel(core_t::TTime time, double value, double shift); - - //! Apply no level shift at \p time and \p value. - //! - //! This updates the model for the probability of a level shift. - void dontShiftLevel(core_t::TTime time, double value); - - //! Apply a linear scale by \p scale. - void linearScale(double scale); - - //! Adds a value \f$(t, f(t))\f$ to this component. - //! - //! \param[in] time The time of the point. - //! \param[in] value The value at \p time. - //! \param[in] weight The weight of \p value. The smaller this is the - //! less influence it has on the component. - void add(core_t::TTime time, double value, double weight = 1.0); - - //! Set the data type. - void dataType(maths_t::EDataType dataType); - - //! Get the base rate at which models lose information. - double defaultDecayRate() const; - - //! Set the rate base rate at which models lose information. - void decayRate(double decayRate); - - //! Age the trend to account for \p interval elapsed time. - void propagateForwardsByTime(core_t::TTime interval); - - //! Get the predicted value at \p time. - //! - //! \param[in] time The time of interest. - //! \param[in] confidence The symmetric confidence interval for the variance - //! as a percentage. - TDoubleDoublePr value(core_t::TTime time, double confidence) const; - - //! Get the variance of the residual about the predicted value at \p time. - //! - //! \param[in] confidence The symmetric confidence interval for the - //! variance as a percentage. - TDoubleDoublePr variance(double confidence) const; - - //! Forecast the trend model from \p startTime to \p endTime. - //! - //! \param[in] startTime The start time of the forecast interval. - //! \param[in] endTime The end time of the forecast interval. - //! \param[in] step The time step. - //! \param[in] confidence The confidence interval to calculate. - //! \param[in] seasonal Forecasts seasonal components. - //! \param[in] writer Writes out forecast results. - void forecast(core_t::TTime startTime, - core_t::TTime endTime, - core_t::TTime step, - double confidence, - const TSeasonalForecast &seasonal, - const TWriteForecastResult &writer) const; - - //! Get the interval which has been observed so far. - core_t::TTime observedInterval() const; - - //! Get the number of parameters used to describe the trend. - double parameters() const; - - //! Get a checksum for this object. - uint64_t checksum(uint64_t seed = 0) const; - - //! Get a debug description of this object. - std::string print() const; + CForecastLevel(const CNaiveBayes& probability, + const CNormalMeanPrecConjugate& magnitude, + core_t::TTime timeOfLastChange, + std::size_t numberPaths = DEFAULT_NUMBER_PATHS); - private: - using TRegression = CRegression::CLeastSquaresOnline<2, double>; - using TRegressionArray = TRegression::TArray; - using TRegressionArrayVec = std::vector; - using TMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; - using TMeanAccumulatorVec = std::vector; - using TMeanVarAccumulator = CBasicStatistics::SSampleMeanVar::TAccumulator; - - //! \brief A model of the trend at a specific time scale. - struct SModel - { - explicit SModel(double weight); - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); - uint64_t checksum(uint64_t seed) const; - TMeanAccumulator s_Weight; - TRegression s_Regression; - TMeanVarAccumulator s_ResidualMoments; - }; - using TModelVec = std::vector; - - //! \brief Forecasts the level model by path roll out. - class CForecastLevel : private core::CNonCopyable - { - public: - //! The default number of roll out paths to use. - static const std::size_t DEFAULT_NUMBER_PATHS{100u}; - - public: - CForecastLevel(const CNaiveBayes &probability, - const CNormalMeanPrecConjugate &magnitude, - core_t::TTime timeOfLastChange, - std::size_t numberPaths = DEFAULT_NUMBER_PATHS); - - //! Forecast the time series level at \p time. - TDouble3Vec forecast(core_t::TTime time, - double prediction, - double confidence); - - private: - using TTimeVec = std::vector; - - private: - //! The model of the change probability. - const CNaiveBayes &m_Probability; - //! The model of the change magnitude. - const CNormalMeanPrecConjugate &m_Magnitude; - //! A random number generator for generating roll outs. - CPRNG::CXorOShiro128Plus m_Rng; - //! The current roll outs forecasted levels. - TDoubleVec m_Levels; - //! The current roll outs times of last change. - TTimeVec m_TimesOfLastChange; - //! Maintains the current bucket probability of change. - TDoubleVec m_ProbabilitiesOfChange; - //! Place holder for sampling. - TDoubleVec m_Uniform01; - }; + //! Forecast the time series level at \p time. + TDouble3Vec forecast(core_t::TTime time, double prediction, double confidence); private: - //! Get the factors by which to age the different regression models. - TDoubleVec factors(core_t::TTime interval) const; - - //! Get the initial weights to use for forecast predictions. - TDoubleVec initialForecastModelWeights() const; - - //! Get the initial weights to use for forecast prediction errors. - TDoubleVec initialForecastErrorWeights() const; - - //! Get the mean count of samples in the prediction. - double count() const; - - //! Get the predicted value at \p time. - double value(const TDoubleVec &weights, - const TRegressionArrayVec &models, - double time) const; - - //! Get the weight to assign to the prediction verses the long term mean. - double weightOfPrediction(core_t::TTime time) const; + using TTimeVec = std::vector; private: - //! The default rate at which information is aged out of the trend models. - double m_DefaultDecayRate; - - //! The target rate at which information is aged out of the ensemble. - double m_TargetDecayRate; - - //! The time the model was first updated. - core_t::TTime m_FirstUpdate; - //! The time the model was last updated. - core_t::TTime m_LastUpdate; - - //! The start time of the regression models. - core_t::TTime m_RegressionOrigin; - //! The regression models (we have them for multiple time scales). - TModelVec m_TrendModels; - //! The variance of the prediction errors. - double m_PredictionErrorVariance; - //! The mean and variance of the values added to the trend component. - TMeanVarAccumulator m_ValueMoments; - - //! The time of the last level change. - core_t::TTime m_TimeOfLastLevelChange; - //! A model of probability of level changes for the trend. - CNaiveBayes m_ProbabilityOfLevelChangeModel; - //! A model of magnitude of level changes for the trend. - CNormalMeanPrecConjugate m_MagnitudeOfLevelChangeModel; + //! The model of the change probability. + const CNaiveBayes& m_Probability; + //! The model of the change magnitude. + const CNormalMeanPrecConjugate& m_Magnitude; + //! A random number generator for generating roll outs. + CPRNG::CXorOShiro128Plus m_Rng; + //! The current roll outs forecasted levels. + TDoubleVec m_Levels; + //! The current roll outs times of last change. + TTimeVec m_TimesOfLastChange; + //! Maintains the current bucket probability of change. + TDoubleVec m_ProbabilitiesOfChange; + //! Place holder for sampling. + TDoubleVec m_Uniform01; + }; + +private: + //! Get the factors by which to age the different regression models. + TDoubleVec factors(core_t::TTime interval) const; + + //! Get the initial weights to use for forecast predictions. + TDoubleVec initialForecastModelWeights() const; + + //! Get the initial weights to use for forecast prediction errors. + TDoubleVec initialForecastErrorWeights() const; + + //! Get the mean count of samples in the prediction. + double count() const; + + //! Get the predicted value at \p time. + double value(const TDoubleVec& weights, const TRegressionArrayVec& models, double time) const; + + //! Get the weight to assign to the prediction verses the long term mean. + double weightOfPrediction(core_t::TTime time) const; + +private: + //! The default rate at which information is aged out of the trend models. + double m_DefaultDecayRate; + + //! The target rate at which information is aged out of the ensemble. + double m_TargetDecayRate; + + //! The time the model was first updated. + core_t::TTime m_FirstUpdate; + //! The time the model was last updated. + core_t::TTime m_LastUpdate; + + //! The start time of the regression models. + core_t::TTime m_RegressionOrigin; + //! The regression models (we have them for multiple time scales). + TModelVec m_TrendModels; + //! The variance of the prediction errors. + double m_PredictionErrorVariance; + //! The mean and variance of the values added to the trend component. + TMeanVarAccumulator m_ValueMoments; + + //! The time of the last level change. + core_t::TTime m_TimeOfLastLevelChange; + //! A model of probability of level changes for the trend. + CNaiveBayes m_ProbabilityOfLevelChangeModel; + //! A model of magnitude of level changes for the trend. + CNormalMeanPrecConjugate m_MagnitudeOfLevelChangeModel; }; - } } diff --git a/include/maths/CTrendTests.h b/include/maths/CTrendTests.h index 796742a136..93d2d6d325 100644 --- a/include/maths/CTrendTests.h +++ b/include/maths/CTrendTests.h @@ -7,9 +7,9 @@ #ifndef INCLUDED_ml_maths_CTrendTests_h #define INCLUDED_ml_maths_CTrendTests_h -#include #include #include +#include #include #include @@ -33,10 +33,8 @@ class CTrendTestsTest; -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { class CSeasonalTime; //! \brief A low memory footprint randomized test for probability. @@ -61,105 +59,103 @@ class CSeasonalTime; //! of samples grows so the significance for rejecting the //! null hypothesis (that the function is a-periodic) will //! shrink to zero. -class MATHS_EXPORT CRandomizedPeriodicityTest -{ - public: - //! The size of the projection sample coefficients - static const std::size_t N = 5; - - public: - CRandomizedPeriodicityTest(); - - //! \name Persistence - //@{ - //! Restore the static members by reading state from \p traverser. - static bool staticsAcceptRestoreTraverser(core::CStateRestoreTraverser &traverser); - - //! Persist the static members by passing information to \p inserter. - static void staticsAcceptPersistInserter(core::CStatePersistInserter &inserter); - - //! Initialize by reading state from \p traverser. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); - - //! Persist state by passing information to \p inserter. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - //@} - - //! Add a new value \p value at \p time. - void add(core_t::TTime time, double value); - - //! Test whether there is a periodic trend. - bool test() const; - - //! Reset the test static random vectors. - //! - //! \note For unit testing only. - static void reset(); - - //! Get a checksum for this object. - uint64_t checksum(uint64_t seed = 0) const; - - private: - using TDoubleVec = std::vector; - using TVector2 = CVectorNx1; - using TVector2MeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; - using TVector2N = CVectorNx1; - using TVector2NMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; - using TAtomicTime = std::atomic; - - private: - //! The length over which the periodic random projection decoheres. - static const core_t::TTime SAMPLE_INTERVAL; - //! The time between day resample events. - static const core_t::TTime DAY_RESAMPLE_INTERVAL; - //! The time between week resample events. - static const core_t::TTime WEEK_RESAMPLE_INTERVAL; - //! The random number generator. - static boost::random::mt19937_64 ms_Rng; - //! The permutations daily projections. - static TDoubleVec ms_DayRandomProjections[N]; - //! The daily periodic projections. - static TDoubleVec ms_DayPeriodicProjections[N]; - //! The time at which we re-sampled day projections. - static TAtomicTime ms_DayResampled; - //! The permutations weekly projections. - static TDoubleVec ms_WeekRandomProjections[N]; - //! The weekly periodic projections. - static TDoubleVec ms_WeekPeriodicProjections[N]; - //! The time at which we re-sampled week projections. - static TAtomicTime ms_WeekResampled; - //! The mutex for protecting state update. - static core::CMutex ms_Lock; - - private: - //! Refresh \p projections and update \p statistics. - static void updateStatistics(TVector2NMeanAccumulator &projections, - TVector2MeanAccumulator &statistics); - - //! Re-sample the projections. - static void resample(core_t::TTime time); - - //! Re-sample the specified projections. - static void resample(core_t::TTime period, - core_t::TTime resampleInterval, - TDoubleVec (&periodicProjections)[N], - TDoubleVec (&randomProjections)[N]); - - private: - //! The day projections. - TVector2NMeanAccumulator m_DayProjections; - //! The sample mean of the square day projections. - TVector2MeanAccumulator m_DayStatistics; - //! The last time the day projections were updated. - core_t::TTime m_DayRefreshedProjections; - //! The week projections. - TVector2NMeanAccumulator m_WeekProjections; - //! The sample mean of the square week projections. - TVector2MeanAccumulator m_WeekStatistics; - //! The last time the day projections were updated. - core_t::TTime m_WeekRefreshedProjections; - - friend class ::CTrendTestsTest; +class MATHS_EXPORT CRandomizedPeriodicityTest { +public: + //! The size of the projection sample coefficients + static const std::size_t N = 5; + +public: + CRandomizedPeriodicityTest(); + + //! \name Persistence + //@{ + //! Restore the static members by reading state from \p traverser. + static bool staticsAcceptRestoreTraverser(core::CStateRestoreTraverser& traverser); + + //! Persist the static members by passing information to \p inserter. + static void staticsAcceptPersistInserter(core::CStatePersistInserter& inserter); + + //! Initialize by reading state from \p traverser. + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); + + //! Persist state by passing information to \p inserter. + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + //@} + + //! Add a new value \p value at \p time. + void add(core_t::TTime time, double value); + + //! Test whether there is a periodic trend. + bool test() const; + + //! Reset the test static random vectors. + //! + //! \note For unit testing only. + static void reset(); + + //! Get a checksum for this object. + uint64_t checksum(uint64_t seed = 0) const; + +private: + using TDoubleVec = std::vector; + using TVector2 = CVectorNx1; + using TVector2MeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; + using TVector2N = CVectorNx1; + using TVector2NMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; + using TAtomicTime = std::atomic; + +private: + //! The length over which the periodic random projection decoheres. + static const core_t::TTime SAMPLE_INTERVAL; + //! The time between day resample events. + static const core_t::TTime DAY_RESAMPLE_INTERVAL; + //! The time between week resample events. + static const core_t::TTime WEEK_RESAMPLE_INTERVAL; + //! The random number generator. + static boost::random::mt19937_64 ms_Rng; + //! The permutations daily projections. + static TDoubleVec ms_DayRandomProjections[N]; + //! The daily periodic projections. + static TDoubleVec ms_DayPeriodicProjections[N]; + //! The time at which we re-sampled day projections. + static TAtomicTime ms_DayResampled; + //! The permutations weekly projections. + static TDoubleVec ms_WeekRandomProjections[N]; + //! The weekly periodic projections. + static TDoubleVec ms_WeekPeriodicProjections[N]; + //! The time at which we re-sampled week projections. + static TAtomicTime ms_WeekResampled; + //! The mutex for protecting state update. + static core::CMutex ms_Lock; + +private: + //! Refresh \p projections and update \p statistics. + static void updateStatistics(TVector2NMeanAccumulator& projections, TVector2MeanAccumulator& statistics); + + //! Re-sample the projections. + static void resample(core_t::TTime time); + + //! Re-sample the specified projections. + static void resample(core_t::TTime period, + core_t::TTime resampleInterval, + TDoubleVec (&periodicProjections)[N], + TDoubleVec (&randomProjections)[N]); + +private: + //! The day projections. + TVector2NMeanAccumulator m_DayProjections; + //! The sample mean of the square day projections. + TVector2MeanAccumulator m_DayStatistics; + //! The last time the day projections were updated. + core_t::TTime m_DayRefreshedProjections; + //! The week projections. + TVector2NMeanAccumulator m_WeekProjections; + //! The sample mean of the square week projections. + TVector2MeanAccumulator m_WeekStatistics; + //! The last time the day projections were updated. + core_t::TTime m_WeekRefreshedProjections; + + friend class ::CTrendTestsTest; }; //! \brief The basic idea of this test is to see if there is stronger @@ -175,82 +171,80 @@ class MATHS_EXPORT CRandomizedPeriodicityTest //! than one would expect given that this is expected to be binomial. //! Amongst features with statistically significant frequencies of large //! errors it returns the feature with the highest mean prediction error. -class MATHS_EXPORT CCalendarCyclicTest -{ - public: - using TOptionalFeature = boost::optional; +class MATHS_EXPORT CCalendarCyclicTest { +public: + using TOptionalFeature = boost::optional; - public: - explicit CCalendarCyclicTest(double decayRate = 0.0); +public: + explicit CCalendarCyclicTest(double decayRate = 0.0); - //! Initialize by reading state from \p traverser. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); + //! Initialize by reading state from \p traverser. + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); - //! Persist state by passing information to \p inserter. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; + //! Persist state by passing information to \p inserter. + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; - //! Age the bucket values to account for \p time elapsed time. - void propagateForwardsByTime(double time); + //! Age the bucket values to account for \p time elapsed time. + void propagateForwardsByTime(double time); - //! Add \p error at \p time. - void add(core_t::TTime time, double error, double weight = 1.0); + //! Add \p error at \p time. + void add(core_t::TTime time, double error, double weight = 1.0); - //! Check if there are calendar components. - TOptionalFeature test() const; + //! Check if there are calendar components. + TOptionalFeature test() const; - //! Get a checksum for this object. - uint64_t checksum(uint64_t seed = 0) const; + //! Get a checksum for this object. + uint64_t checksum(uint64_t seed = 0) const; - //! Debug the memory used by this object. - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + //! Debug the memory used by this object. + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - //! Get the memory used by this object. - std::size_t memoryUsage() const; + //! Get the memory used by this object. + std::size_t memoryUsage() const; - private: - using TTimeVec = std::vector; - using TUInt32CBuf = boost::circular_buffer; - using TTimeFloatPr = std::pair; - using TTimeFloatFMap = boost::container::flat_map; +private: + using TTimeVec = std::vector; + using TUInt32CBuf = boost::circular_buffer; + using TTimeFloatPr = std::pair; + using TTimeFloatFMap = boost::container::flat_map; - private: - //! Winsorise \p error. - double winsorise(double error) const; +private: + //! Winsorise \p error. + double winsorise(double error) const; - //! Get the significance of \p x large errors given \p n samples. - double significance(double n, double x) const; + //! Get the significance of \p x large errors given \p n samples. + double significance(double n, double x) const; - private: - //! The error bucketing interval. - static const core_t::TTime BUCKET; - //! The window length in buckets. - static const core_t::TTime WINDOW; - //! The percentile of a large error. - static const double LARGE_ERROR_PERCENTILE; - //! The minimum number of repeats for a testable feature. - static const unsigned int MINIMUM_REPEATS; - //! The bits used to count added values. - static const uint32_t COUNT_BITS; - //! The offsets that are used for different timezone offsets. - static const TTimeVec TIMEZONE_OFFSETS; +private: + //! The error bucketing interval. + static const core_t::TTime BUCKET; + //! The window length in buckets. + static const core_t::TTime WINDOW; + //! The percentile of a large error. + static const double LARGE_ERROR_PERCENTILE; + //! The minimum number of repeats for a testable feature. + static const unsigned int MINIMUM_REPEATS; + //! The bits used to count added values. + static const uint32_t COUNT_BITS; + //! The offsets that are used for different timezone offsets. + static const TTimeVec TIMEZONE_OFFSETS; - private: - //! The rate at which the error counts are aged. - double m_DecayRate; +private: + //! The rate at which the error counts are aged. + double m_DecayRate; - //! The time of the last error added. - core_t::TTime m_Bucket; + //! The time of the last error added. + core_t::TTime m_Bucket; - //! Used to estimate large error thresholds. - CQuantileSketch m_ErrorQuantiles; + //! Used to estimate large error thresholds. + CQuantileSketch m_ErrorQuantiles; - //! The counts of errors and large errors in a sliding window. - TUInt32CBuf m_ErrorCounts; + //! The counts of errors and large errors in a sliding window. + TUInt32CBuf m_ErrorCounts; - //! The bucket large error sums. - TTimeFloatFMap m_ErrorSums; + //! The bucket large error sums. + TTimeFloatFMap m_ErrorSums; }; - } } diff --git a/include/maths/CTypeConversions.h b/include/maths/CTypeConversions.h index 137ef49508..fa1887950c 100644 --- a/include/maths/CTypeConversions.h +++ b/include/maths/CTypeConversions.h @@ -12,105 +12,85 @@ #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { //! \brief Defines the promoted type. template -struct SPromoted -{ +struct SPromoted { using Type = T; }; //! \brief Defines the promoted type for float. template<> -struct SPromoted -{ +struct SPromoted { using Type = double; }; //! \brief Defines the promoted type for CFloatStorage. template<> -struct SPromoted -{ +struct SPromoted { using Type = double; }; //! \brief Defines the promoted type for a CVectorNx1. template -struct SPromoted> -{ +struct SPromoted> { using Type = CVectorNx1::Type, N>; }; //! \brief Defines the promoted type for a CVector. template -struct SPromoted> -{ +struct SPromoted> { using Type = CVector::Type>; }; //! \brief Defines the promoted type for a CSymmetricMatrixNxN. template -struct SPromoted> -{ +struct SPromoted> { using Type = CSymmetricMatrixNxN::Type, N>; }; //! \brief Defines the promoted type for a CSymmetricMatrix. template -struct SPromoted> -{ +struct SPromoted> { using Type = CSymmetricMatrix::Type>; }; //! \brief Defines the promoted type for an Eigen sparse matrix. template -struct SPromoted> -{ - using Type = Eigen::SparseMatrix::Type, - FLAGS, STORAGE_INDEX>; +struct SPromoted> { + using Type = Eigen::SparseMatrix::Type, FLAGS, STORAGE_INDEX>; }; //! \brief Defines the promoted type for an Eigen sparse vector. template -struct SPromoted> -{ - using Type = Eigen::SparseVector::Type, - FLAGS, STORAGE_INDEX>; +struct SPromoted> { + using Type = Eigen::SparseVector::Type, FLAGS, STORAGE_INDEX>; }; //! \brief Defines the promoted type for an Eigen dense matrix. template -struct SPromoted> -{ - using Type = Eigen::Matrix::Type, - ROWS, COLS, OPTIONS, MAX_ROWS, MAX_COLS>; +struct SPromoted> { + using Type = Eigen::Matrix::Type, ROWS, COLS, OPTIONS, MAX_ROWS, MAX_COLS>; }; //! \brief Defines the promoted type for a CAnnotatedVector. template -struct SPromoted> -{ +struct SPromoted> { using Type = CAnnotatedVector::Type, ANNOTATION>; }; - -namespace type_conversion_detail -{ +namespace type_conversion_detail { //! \brief Chooses between T and U based on the checks for //! integral and floating point types. template -struct SSelector -{ +struct SSelector { using Type = U; }; template -struct SSelector -{ +struct SSelector { using Type = T; }; @@ -118,195 +98,161 @@ struct SSelector //! \brief Defines a suitable floating point type. template -struct SFloatingPoint -{ - using Type = typename type_conversion_detail::SSelector< - T, U, boost::is_floating_point::value>::Type; +struct SFloatingPoint { + using Type = typename type_conversion_detail::SSelector::value>::Type; }; //! \brief Defines CVectorNx1 on a suitable floating point type. template -struct SFloatingPoint, U> -{ +struct SFloatingPoint, U> { using Type = CVectorNx1::Type, N>; }; //! \brief Defines CVector on a suitable floating point type. template -struct SFloatingPoint, U> -{ +struct SFloatingPoint, U> { using Type = CVector::Type>; }; //! \brief Defines CSymmetricMatrixNxN on a suitable floating point type. template -struct SFloatingPoint, U> -{ +struct SFloatingPoint, U> { using Type = CSymmetricMatrixNxN::Type, N>; }; //! \brief Defines CSymmetricMatrix on a suitable floating point type. template -struct SFloatingPoint, U> -{ +struct SFloatingPoint, U> { using Type = CSymmetricMatrix::Type>; }; //! \brief Defines an Eigen sparse matrix on a suitable floating point type. template -struct SFloatingPoint, U> -{ - using Type = Eigen::SparseMatrix::Type, - FLAGS, STORAGE_INDEX>; +struct SFloatingPoint, U> { + using Type = Eigen::SparseMatrix::Type, FLAGS, STORAGE_INDEX>; }; //! \brief Defines an Eigen sparse vector on a suitable floating point type. template -struct SFloatingPoint, U> -{ - using Type = Eigen::SparseVector::Type, - FLAGS, STORAGE_INDEX>; +struct SFloatingPoint, U> { + using Type = Eigen::SparseVector::Type, FLAGS, STORAGE_INDEX>; }; //! \brief Defines an Eigen dense matrix on a suitable floating point type. template -struct SFloatingPoint, U> -{ - using Type = Eigen::Matrix::Type, - ROWS, COLS, OPTIONS, MAX_ROWS, MAX_COLS>; +struct SFloatingPoint, U> { + using Type = Eigen::Matrix::Type, ROWS, COLS, OPTIONS, MAX_ROWS, MAX_COLS>; }; //! \brief Defines CAnnotatedVector on a suitable floating point type. template -struct SFloatingPoint, U> -{ +struct SFloatingPoint, U> { using Type = CAnnotatedVector::Type, ANNOTATION>; }; - //! \brief Extracts the coordinate type for a point. template -struct SCoordinate -{ +struct SCoordinate { using Type = T; }; //! \brief Extracts the coordinate type for CVectorNx1. template -struct SCoordinate> -{ +struct SCoordinate> { using Type = T; }; //! \brief Extracts the coordinate type for CVector. template -struct SCoordinate> -{ +struct SCoordinate> { using Type = T; }; //! \brief Extracts the coordinate type for CSymmetricMatrixNxN. template -struct SCoordinate> -{ +struct SCoordinate> { using Type = T; }; //! \brief Extracts the coordinate type for CSymmetricMatrix. template -struct SCoordinate> -{ +struct SCoordinate> { using Type = T; }; //! \brief Extracts the coordinate type for an Eigen sparse matrix. template -struct SCoordinate> -{ +struct SCoordinate> { using Type = SCALAR; }; //! \brief Extracts the coordinate type for an Eigen sparse vector. template -struct SCoordinate> -{ +struct SCoordinate> { using Type = SCALAR; }; //! \brief Extracts the coordinate type for an Eigen dense matrix. template -struct SCoordinate> -{ +struct SCoordinate> { using Type = SCALAR; }; //! \brief Extracts the coordinate type for the underlying vector type. template -struct SCoordinate> -{ +struct SCoordinate> { using Type = typename SCoordinate::Type; }; - //! \brief Extracts the conformable matrix type for a point. template -struct SConformableMatrix -{ +struct SConformableMatrix { using Type = POINT; }; //! \brief Extracts the conformable matrix type for a CVectorNx1. template -struct SConformableMatrix> -{ +struct SConformableMatrix> { using Type = CSymmetricMatrixNxN; }; //! \brief Extracts the conformable matrix type for a CVector. template -struct SConformableMatrix> -{ +struct SConformableMatrix> { using Type = CSymmetricMatrix; }; //! \brief Extracts the conformable matrix type for an Eigen sparse vector. template -struct SConformableMatrix> -{ +struct SConformableMatrix> { using Type = Eigen::SparseMatrix; }; //! \brief Extracts the conformable matrix type for an Eigen dense vector. template -struct SConformableMatrix> -{ +struct SConformableMatrix> { using Type = Eigen::Matrix; }; //! \brief Extracts the conformable matrix type for the underlying vector type. template -struct SConformableMatrix> -{ +struct SConformableMatrix> { using Type = typename SConformableMatrix::Type; }; - //! \brief Defines a type which strips off any annotation from a vector. //! This is the raw vector type by default. template -struct SStripped -{ +struct SStripped { using Type = VECTOR; }; //! \brief Specialisation for annotated vectors. This is the underlying //! vector type. template -struct SStripped> -{ +struct SStripped> { using Type = VECTOR; }; - } } diff --git a/include/maths/CXMeans.h b/include/maths/CXMeans.h index aa7755403e..2324ff9dd2 100644 --- a/include/maths/CXMeans.h +++ b/include/maths/CXMeans.h @@ -26,11 +26,8 @@ #include - -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { //! \brief Implementation of x-means algorithm. //! @@ -47,440 +44,354 @@ namespace maths //! CBasicStatistics::SSampleCentralMoments, support coordinate access //! by the brackets operator and have member functions called dimension //! and euclidean - which gives the Euclidean norm of the vector. -template > -class CXMeans -{ +template> +class CXMeans { +public: + using TDoubleVec = std::vector; + using TPointVec = std::vector; + using TPointVecVec = std::vector; + using TUInt64USet = boost::unordered_set; + using TUInt64USetItr = TUInt64USet::iterator; + using TMeanAccumulator = typename CBasicStatistics::SSampleMean::TAccumulator; + + //! A cluster. + //! + //! DESCRIPTION:\n + //! This associates the cost, cluster centre and points. It + //! also provides fast comparison by a checksum and sorts the + //! points for stable comparison. + class CCluster { public: - using TDoubleVec = std::vector; - using TPointVec = std::vector; - using TPointVecVec = std::vector; - using TUInt64USet = boost::unordered_set; - using TUInt64USetItr = TUInt64USet::iterator; - using TMeanAccumulator = typename CBasicStatistics::SSampleMean::TAccumulator; - - //! A cluster. - //! - //! DESCRIPTION:\n - //! This associates the cost, cluster centre and points. It - //! also provides fast comparison by a checksum and sorts the - //! points for stable comparison. - class CCluster - { - public: - CCluster() : - m_Cost(std::numeric_limits::max()), m_Checksum(0) - {} - - //! Check for equality using checksum and then points if the - //! checksum is ambiguous. - bool operator==(const CCluster &other) const - { - return m_Checksum == other.m_Checksum && m_Points == other.m_Points; - } + CCluster() : m_Cost(std::numeric_limits::max()), m_Checksum(0) {} - //! Total ordering by checksum breaking ties using expensive - //! comparison on all points. - bool operator<(const CCluster &rhs) const - { - return COrderings::lexicographical_compare(m_Checksum, m_Points, - rhs.m_Checksum, rhs.m_Points); - } + //! Check for equality using checksum and then points if the + //! checksum is ambiguous. + bool operator==(const CCluster& other) const { return m_Checksum == other.m_Checksum && m_Points == other.m_Points; } - //! Get the number of points in the cluster. - std::size_t size() const - { - return m_Points.size(); - } + //! Total ordering by checksum breaking ties using expensive + //! comparison on all points. + bool operator<(const CCluster& rhs) const { + return COrderings::lexicographical_compare(m_Checksum, m_Points, rhs.m_Checksum, rhs.m_Points); + } - //! Set the cluster cost. - void cost(double cost) - { - m_Cost = cost; - } - //! Get the cluster cost. - double cost() const - { - return m_Cost; - } + //! Get the number of points in the cluster. + std::size_t size() const { return m_Points.size(); } - //! Set the cluster centre. - void centre(const POINT ¢re) - { - m_Centre = centre; - } - //! Get the cluster centre. - const POINT ¢re() const - { - return m_Centre; - } + //! Set the cluster cost. + void cost(double cost) { m_Cost = cost; } + //! Get the cluster cost. + double cost() const { return m_Cost; } - //! Swap the points into place and recalculate the checksum. - void points(TPointVec &points) - { - m_Points.swap(points); - std::sort(m_Points.begin(), m_Points.end()); - m_Checksum = CChecksum::calculate(0, m_Points); - } - //! Get the cluster points. - const TPointVec &points() const - { - return m_Points; - } + //! Set the cluster centre. + void centre(const POINT& centre) { m_Centre = centre; } + //! Get the cluster centre. + const POINT& centre() const { return m_Centre; } - //! Get the cluster checksum. - uint64_t checksum() const - { - return m_Checksum; - } + //! Swap the points into place and recalculate the checksum. + void points(TPointVec& points) { + m_Points.swap(points); + std::sort(m_Points.begin(), m_Points.end()); + m_Checksum = CChecksum::calculate(0, m_Points); + } + //! Get the cluster points. + const TPointVec& points() const { return m_Points; } - private: - //! The information criterion cost of this cluster. - double m_Cost; - //! The centroid of the points in this cluster. - POINT m_Centre; - //! The points in the cluster. - TPointVec m_Points; - //! A checksum for the points in the cluster. - uint64_t m_Checksum; - }; + //! Get the cluster checksum. + uint64_t checksum() const { return m_Checksum; } - using TClusterVec = std::vector; + private: + //! The information criterion cost of this cluster. + double m_Cost; + //! The centroid of the points in this cluster. + POINT m_Centre; + //! The points in the cluster. + TPointVec m_Points; + //! A checksum for the points in the cluster. + uint64_t m_Checksum; + }; + + using TClusterVec = std::vector; + +public: + CXMeans(std::size_t kmax) : m_Kmax(kmax), m_MinCost(std::numeric_limits::max()) { + m_BestCentres.reserve(m_Kmax); + m_Clusters.reserve(m_Kmax); + } + + //! Set the points to cluster. + //! + //! \note These are swapped in to place. + void setPoints(TPointVec& points) { + m_Kmeans.setPoints(points); + m_Clusters.clear(); + m_Clusters.push_back(CCluster()); + double cost = COST(points).calculate(); + m_Clusters[0].cost(cost); + TMeanAccumulator centroid; + centroid.add(points); + m_Clusters[0].centre(CBasicStatistics::mean(centroid)); + m_Clusters[0].points(points); + m_MinCost = cost; + m_BestCentres.push_back(CBasicStatistics::mean(centroid)); + } + + //! Get the best centres found to date. + const TPointVec& centres() const { return m_BestCentres; } + + //! Get the best clusters found to date. + const TClusterVec& clusters() const { return m_Clusters; } + + //! Run the full x-means algorithm. + //! + //! This iterates between improve structure and improve + //! parameters until either kmax clusters have been created + //! or there was an improve structure round with no change. + //! + //! \param[in] improveParamsKmeansIterations The number of + //! iterations of Lloyd's algorithm to use in k-means for a + //! single round of improve parameters. + //! \param[in] improveStructureClusterSeeds The number of + //! random seeds to try when initializing k-means for a + //! single round of improve structure. + //! \param[in] improveStructureKmeansIterations The number + //! of iterations of Lloyd's algorithm to use in k-means for + //! a single round of improve structure. + void + run(std::size_t improveParamsKmeansIterations, std::size_t improveStructureClusterSeeds, std::size_t improveStructureKmeansIterations) { + while (this->improveStructure(improveStructureClusterSeeds, improveStructureKmeansIterations)) { + this->improveParams(improveParamsKmeansIterations); + } + this->polish(10 * improveParamsKmeansIterations); + } + +protected: + //! Single round of k-means on the full point set with the + //! current clusters using at most \p kmeansIterations. + //! + //! \param[in] kmeansIterations The limit on the number of + //! iterations of Lloyd's algorithm to use. + void improveParams(std::size_t kmeansIterations) { + using TClusterCPtr = const CCluster*; + using TClusterCPtrVec = std::vector; + + std::size_t n = m_Clusters.size(); + + // Setup k-means to run on the current centres and create + // sorted lookup of the current clusters. + TPointVec oldCentres; + oldCentres.reserve(n); + TClusterCPtrVec oldClusters; + oldClusters.reserve(n); + for (std::size_t i = 0u; i < n; ++i) { + oldCentres.push_back(m_Clusters[i].centre()); + oldClusters.push_back(&m_Clusters[i]); + } + std::sort(oldClusters.begin(), oldClusters.end(), COrderings::SPtrLess()); + m_Kmeans.setCentres(oldCentres); + + // k-means to improve parameters. + m_Kmeans.run(kmeansIterations); + const TPointVec& newCentres = m_Kmeans.centres(); + TPointVecVec newClusterPoints; + m_Kmeans.clusters(newClusterPoints); + + // Note that oldClusters holds pointers to the current + // clusters so we can't overwrite them until after the + // following loop. + + TClusterVec newClusters; + newClusters.reserve(newCentres.size()); + TUInt64USet preserved; + COST cost_; + + for (std::size_t i = 0u; i < n; ++i) { + newClusters.push_back(CCluster()); + CCluster& cluster = newClusters.back(); + cluster.centre(newCentres[i]); + cluster.points(newClusterPoints[i]); + typename TClusterCPtrVec::const_iterator j = + std::lower_bound(oldClusters.begin(), oldClusters.end(), &cluster, COrderings::SPtrLess()); + if (j != oldClusters.end() && **j == cluster) { + cluster.cost((*j)->cost()); + preserved.insert(cluster.checksum()); + } else { + cluster.cost(COST(cluster.points()).calculate()); + } + cost_.add(cluster.points()); + } - public: - CXMeans(std::size_t kmax) : - m_Kmax(kmax), - m_MinCost(std::numeric_limits::max()) - { - m_BestCentres.reserve(m_Kmax); - m_Clusters.reserve(m_Kmax); + // Refresh the clusters and inactive list. + m_Clusters.swap(newClusters); + for (TUInt64USetItr i = m_Inactive.begin(); i != m_Inactive.end(); /**/) { + if (preserved.count(*i) > 0) { + ++i; + } else { + i = m_Inactive.erase(i); + } } - //! Set the points to cluster. - //! - //! \note These are swapped in to place. - void setPoints(TPointVec &points) - { - m_Kmeans.setPoints(points); - m_Clusters.clear(); - m_Clusters.push_back(CCluster()); - double cost = COST(points).calculate(); - m_Clusters[0].cost(cost); - TMeanAccumulator centroid; - centroid.add(points); - m_Clusters[0].centre(CBasicStatistics::mean(centroid)); - m_Clusters[0].points(points); + // Refresh the best clustering found so far. + double cost = cost_.calculate(); + if (cost < m_MinCost) { + m_BestCentres.clear(); + for (std::size_t i = 0u; i < n; ++i) { + m_BestCentres.push_back(m_Clusters[i].centre()); + } m_MinCost = cost; - m_BestCentres.push_back(CBasicStatistics::mean(centroid)); } - - //! Get the best centres found to date. - const TPointVec ¢res() const - { - return m_BestCentres; + } + + //! Try splitting each cluster in two and keep only those + //! splits which improve the overall score. + //! + //! \param[in] clusterSeeds The number of different 2-splits + //! to try per cluster. + //! \param[in] kmeansIterations The limit on the number of + //! iterations of Lloyd's algorithm to use for each k-means. + bool improveStructure(std::size_t clusterSeeds, std::size_t kmeansIterations) { + if (m_Clusters.empty()) { + return false; } - //! Get the best clusters found to date. - const TClusterVec &clusters() const - { - return m_Clusters; + // Declared outside the loop to minimize allocations. + CKMeansFast kmeans; + TPointVec points; + TPointVecVec clusterPoints; + TPointVec bestClusterCentres; + TPointVecVec bestClusterPoints; + TPointVec seedClusterCentres; + + std::size_t largest = 0; + for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { + largest = std::max(largest, m_Clusters[i].size()); } - //! Run the full x-means algorithm. - //! - //! This iterates between improve structure and improve - //! parameters until either kmax clusters have been created - //! or there was an improve structure round with no change. - //! - //! \param[in] improveParamsKmeansIterations The number of - //! iterations of Lloyd's algorithm to use in k-means for a - //! single round of improve parameters. - //! \param[in] improveStructureClusterSeeds The number of - //! random seeds to try when initializing k-means for a - //! single round of improve structure. - //! \param[in] improveStructureKmeansIterations The number - //! of iterations of Lloyd's algorithm to use in k-means for - //! a single round of improve structure. - void run(std::size_t improveParamsKmeansIterations, - std::size_t improveStructureClusterSeeds, - std::size_t improveStructureKmeansIterations) - { - while (this->improveStructure(improveStructureClusterSeeds, - improveStructureKmeansIterations)) - { - this->improveParams(improveParamsKmeansIterations); - } - this->polish(10 * improveParamsKmeansIterations); - } + kmeans.reserve(largest); + points.reserve(largest); + clusterPoints.reserve(2); + bestClusterCentres.reserve(2); + bestClusterPoints.reserve(2); + seedClusterCentres.reserve(2); - protected: - //! Single round of k-means on the full point set with the - //! current clusters using at most \p kmeansIterations. - //! - //! \param[in] kmeansIterations The limit on the number of - //! iterations of Lloyd's algorithm to use. - void improveParams(std::size_t kmeansIterations) - { - using TClusterCPtr = const CCluster*; - using TClusterCPtrVec = std::vector; - - std::size_t n = m_Clusters.size(); - - // Setup k-means to run on the current centres and create - // sorted lookup of the current clusters. - TPointVec oldCentres; - oldCentres.reserve(n); - TClusterCPtrVec oldClusters; - oldClusters.reserve(n); - for (std::size_t i = 0u; i < n; ++i) - { - oldCentres.push_back(m_Clusters[i].centre()); - oldClusters.push_back(&m_Clusters[i]); - } - std::sort(oldClusters.begin(), - oldClusters.end(), - COrderings::SPtrLess()); - m_Kmeans.setCentres(oldCentres); + bool split = false; - // k-means to improve parameters. - m_Kmeans.run(kmeansIterations); - const TPointVec &newCentres = m_Kmeans.centres(); - TPointVecVec newClusterPoints; - m_Kmeans.clusters(newClusterPoints); - - // Note that oldClusters holds pointers to the current - // clusters so we can't overwrite them until after the - // following loop. - - TClusterVec newClusters; - newClusters.reserve(newCentres.size()); - TUInt64USet preserved; - COST cost_; - - for (std::size_t i = 0u; i < n; ++i) - { - newClusters.push_back(CCluster()); - CCluster &cluster = newClusters.back(); - cluster.centre(newCentres[i]); - cluster.points(newClusterPoints[i]); - typename TClusterCPtrVec::const_iterator j = - std::lower_bound(oldClusters.begin(), oldClusters.end(), - &cluster, COrderings::SPtrLess()); - if (j != oldClusters.end() && **j == cluster) - { - cluster.cost((*j)->cost()); - preserved.insert(cluster.checksum()); - } - else - { - cluster.cost(COST(cluster.points()).calculate()); - } - cost_.add(cluster.points()); + for (std::size_t i = 0u, n = m_Clusters.size(); i < n && m_Clusters.size() < m_Kmax; ++i) { + if (m_Inactive.count(m_Clusters[i].checksum()) > 0) { + continue; } - // Refresh the clusters and inactive list. - m_Clusters.swap(newClusters); - for (TUInt64USetItr i = m_Inactive.begin(); i != m_Inactive.end(); /**/) - { - if (preserved.count(*i) > 0) - { - ++i; - } - else - { - i = m_Inactive.erase(i); - } - } + LOG_TRACE("Working on cluster at " << m_Clusters[i].centre()); + LOG_TRACE("Cluster cost = " << m_Clusters[i].cost()); - // Refresh the best clustering found so far. - double cost = cost_.calculate(); - if (cost < m_MinCost) - { - m_BestCentres.clear(); - for (std::size_t i = 0u; i < n; ++i) - { - m_BestCentres.push_back(m_Clusters[i].centre()); - } - m_MinCost = cost; - } - } + points.reserve(m_Clusters[i].size()); + points.assign(m_Clusters[i].points().begin(), m_Clusters[i].points().end()); + kmeans.setPoints(points); + double minCost = std::numeric_limits::max(); - //! Try splitting each cluster in two and keep only those - //! splits which improve the overall score. - //! - //! \param[in] clusterSeeds The number of different 2-splits - //! to try per cluster. - //! \param[in] kmeansIterations The limit on the number of - //! iterations of Lloyd's algorithm to use for each k-means. - bool improveStructure(std::size_t clusterSeeds, - std::size_t kmeansIterations) - { - if (m_Clusters.empty()) - { - return false; - } + for (std::size_t j = 0u; j < clusterSeeds; ++j) { + this->generateSeedCentres(points, 2, seedClusterCentres); + LOG_TRACE("seed centres = " << core::CContainerPrinter::print(seedClusterCentres)); - // Declared outside the loop to minimize allocations. - CKMeansFast kmeans; - TPointVec points; - TPointVecVec clusterPoints; - TPointVec bestClusterCentres; - TPointVecVec bestClusterPoints; - TPointVec seedClusterCentres; - - std::size_t largest = 0; - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) - { - largest = std::max(largest, m_Clusters[i].size()); - } + kmeans.setCentres(seedClusterCentres); + kmeans.run(kmeansIterations); - kmeans.reserve(largest); - points.reserve(largest); - clusterPoints.reserve(2); - bestClusterCentres.reserve(2); - bestClusterPoints.reserve(2); - seedClusterCentres.reserve(2); - - bool split = false; - - for (std::size_t i = 0u, n = m_Clusters.size(); - i < n && m_Clusters.size() < m_Kmax; - ++i) - { - if (m_Inactive.count(m_Clusters[i].checksum()) > 0) - { - continue; - } + const TPointVec& centres = kmeans.centres(); + LOG_TRACE("centres = " << core::CContainerPrinter::print(centres)); + clusterPoints.clear(); + kmeans.clusters(clusterPoints); - LOG_TRACE("Working on cluster at " << m_Clusters[i].centre()); - LOG_TRACE("Cluster cost = " << m_Clusters[i].cost()); - - points.reserve(m_Clusters[i].size()); - points.assign(m_Clusters[i].points().begin(), - m_Clusters[i].points().end()); - kmeans.setPoints(points); - double minCost = std::numeric_limits::max(); - - for (std::size_t j = 0u; j < clusterSeeds; ++j) - { - this->generateSeedCentres(points, 2, seedClusterCentres); - LOG_TRACE("seed centres = " - << core::CContainerPrinter::print(seedClusterCentres)); - - kmeans.setCentres(seedClusterCentres); - kmeans.run(kmeansIterations); - - const TPointVec ¢res = kmeans.centres(); - LOG_TRACE("centres = " << core::CContainerPrinter::print(centres)); - clusterPoints.clear(); - kmeans.clusters(clusterPoints); - - double cost = COST(clusterPoints).calculate(); - LOG_TRACE("cost = " << cost); - - if (cost < minCost) - { - minCost = cost; - bestClusterCentres.assign(centres.begin(), centres.end()); - bestClusterPoints.swap(clusterPoints); - } - } + double cost = COST(clusterPoints).calculate(); + LOG_TRACE("cost = " << cost); - // Check if we should split. - if (minCost < m_Clusters[i].cost()) - { - m_Inactive.erase(m_Clusters[i].checksum()); - m_Clusters[i].cost(COST(bestClusterPoints[0]).calculate()); - m_Clusters[i].centre(bestClusterCentres[0]); - m_Clusters[i].points(bestClusterPoints[0]); - for (std::size_t j = 1u; j < bestClusterCentres.size(); ++j) - { - m_Clusters.push_back(CCluster()); - m_Clusters.back().cost(COST(bestClusterPoints[j]).calculate()); - m_Clusters.back().centre(bestClusterCentres[j]); - m_Clusters.back().points(bestClusterPoints[j]); - } - split = true; - } - else - { - LOG_TRACE("Setting inactive = " << m_Clusters[i].checksum()); - m_Inactive.insert(m_Clusters[i].checksum()); + if (cost < minCost) { + minCost = cost; + bestClusterCentres.assign(centres.begin(), centres.end()); + bestClusterPoints.swap(clusterPoints); } } - return split; - } - - //! Get the checksums of the clusters which are inactive. - const TUInt64USet &inactive() const - { - return m_Inactive; - } - - private: - //! Generate seed points for the cluster centres in k-splits - //! of \p points. - //! - //! These are used to initialize the k-means centres in the - //! step to improve structure. - void generateSeedCentres(const TPointVec &points, - std::size_t k, - TPointVec &result) const - { - CKMeansPlusPlusInitialization kmeansPlusPlus(m_Rng); - kmeansPlusPlus.run(points, k, result); - } - - //! Run k-means to improve the best centres. - //! - //! \param[in] kmeansIterations The limit on the number of - //! iterations of Lloyd's algorithm to use. - void polish(std::size_t kmeansIterations) - { - if (m_BestCentres.size() > 1) - { - m_Kmeans.setCentres(m_BestCentres); - m_Kmeans.run(kmeansIterations); - m_BestCentres = m_Kmeans.centres(); - TPointVecVec polishedClusterPoints; - m_Kmeans.clusters(polishedClusterPoints); - m_Clusters.clear(); - m_Clusters.reserve(m_BestCentres.size()); - for (std::size_t i = 0u; i < m_BestCentres.size(); ++i) - { + // Check if we should split. + if (minCost < m_Clusters[i].cost()) { + m_Inactive.erase(m_Clusters[i].checksum()); + m_Clusters[i].cost(COST(bestClusterPoints[0]).calculate()); + m_Clusters[i].centre(bestClusterCentres[0]); + m_Clusters[i].points(bestClusterPoints[0]); + for (std::size_t j = 1u; j < bestClusterCentres.size(); ++j) { m_Clusters.push_back(CCluster()); - CCluster &cluster = m_Clusters.back(); - cluster.cost(COST(polishedClusterPoints[i]).calculate()); - cluster.centre(m_BestCentres[i]); - cluster.points(polishedClusterPoints[i]); + m_Clusters.back().cost(COST(bestClusterPoints[j]).calculate()); + m_Clusters.back().centre(bestClusterCentres[j]); + m_Clusters.back().points(bestClusterPoints[j]); } + split = true; + } else { + LOG_TRACE("Setting inactive = " << m_Clusters[i].checksum()); + m_Inactive.insert(m_Clusters[i].checksum()); } } - private: - //! The random number generator. - mutable CPRNG::CXorShift1024Mult m_Rng; + return split; + } + + //! Get the checksums of the clusters which are inactive. + const TUInt64USet& inactive() const { return m_Inactive; } + +private: + //! Generate seed points for the cluster centres in k-splits + //! of \p points. + //! + //! These are used to initialize the k-means centres in the + //! step to improve structure. + void generateSeedCentres(const TPointVec& points, std::size_t k, TPointVec& result) const { + CKMeansPlusPlusInitialization kmeansPlusPlus(m_Rng); + kmeansPlusPlus.run(points, k, result); + } + + //! Run k-means to improve the best centres. + //! + //! \param[in] kmeansIterations The limit on the number of + //! iterations of Lloyd's algorithm to use. + void polish(std::size_t kmeansIterations) { + if (m_BestCentres.size() > 1) { + m_Kmeans.setCentres(m_BestCentres); + m_Kmeans.run(kmeansIterations); + m_BestCentres = m_Kmeans.centres(); + TPointVecVec polishedClusterPoints; + m_Kmeans.clusters(polishedClusterPoints); + m_Clusters.clear(); + m_Clusters.reserve(m_BestCentres.size()); + for (std::size_t i = 0u; i < m_BestCentres.size(); ++i) { + m_Clusters.push_back(CCluster()); + CCluster& cluster = m_Clusters.back(); + cluster.cost(COST(polishedClusterPoints[i]).calculate()); + cluster.centre(m_BestCentres[i]); + cluster.points(polishedClusterPoints[i]); + } + } + } - //! The maximum number of clusters. - std::size_t m_Kmax; +private: + //! The random number generator. + mutable CPRNG::CXorShift1024Mult m_Rng; - //! The current clusters. - TClusterVec m_Clusters; + //! The maximum number of clusters. + std::size_t m_Kmax; - //! Checksums of clusters which weren't modified in the last - //! iteration. - TUInt64USet m_Inactive; + //! The current clusters. + TClusterVec m_Clusters; - //! The fast k-means state for the full set of points. - CKMeansFast m_Kmeans; + //! Checksums of clusters which weren't modified in the last + //! iteration. + TUInt64USet m_Inactive; - //! The minimum cost clustering found to date. - double m_MinCost; + //! The fast k-means state for the full set of points. + CKMeansFast m_Kmeans; - //! The cluster centres corresponding to the maximum score. - TPointVec m_BestCentres; -}; + //! The minimum cost clustering found to date. + double m_MinCost; + //! The cluster centres corresponding to the maximum score. + TPointVec m_BestCentres; +}; } } diff --git a/include/maths/CXMeansOnline.h b/include/maths/CXMeansOnline.h index 5ddee37d8c..8b8e776cfd 100644 --- a/include/maths/CXMeansOnline.h +++ b/include/maths/CXMeansOnline.h @@ -16,15 +16,15 @@ #include #include #include -#include #include #include #include #include -#include #include +#include #include #include +#include #include #include @@ -33,10 +33,8 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { //! \brief A single pass online clusterer based on the x-means //! algorithm of Pelleg and Moore. @@ -74,1377 +72,1139 @@ namespace maths //! unsupervised clustering of the data which identifies reasonably //! separated clusters. template -class CXMeansOnline : public CClusterer > -{ +class CXMeansOnline : public CClusterer> { +public: + using TPoint = CVectorNx1; + using TPointVec = std::vector; + using TPointPrecise = typename CClusterer::TPointPrecise; + using TPointPreciseVec = typename CClusterer::TPointPreciseVec; + using TPointPreciseDoublePrVec = typename CClusterer::TPointPreciseDoublePrVec; + using TSizeDoublePr = typename CClusterer::TSizeDoublePr; + using TSizeDoublePr2Vec = typename CClusterer::TSizeDoublePr2Vec; + using TDoubleVec = std::vector; + using TDoubleVecVec = std::vector; + using TSizeVec = std::vector; + using TSizeVecVec = std::vector; + using TPrecise = typename SPromoted::Type; + using TMatrixPrecise = CSymmetricMatrixNxN; + using TCovariances = CBasicStatistics::SSampleCovariances; + using TSphericalCluster = typename CSphericalCluster::Type; + using TSphericalClusterVec = std::vector; + using TSphericalClusterVecVec = std::vector; + using TKMeansOnline = CKMeansOnline; + using TKMeansOnlineVec = std::vector; + class CCluster; + using TClusterClusterPr = std::pair; + using TOptionalClusterClusterPr = boost::optional; + + //! \brief Represents a cluster. + class CCluster { public: - using TPoint = CVectorNx1; - using TPointVec = std::vector; - using TPointPrecise = typename CClusterer::TPointPrecise; - using TPointPreciseVec = typename CClusterer::TPointPreciseVec; - using TPointPreciseDoublePrVec = typename CClusterer::TPointPreciseDoublePrVec; - using TSizeDoublePr = typename CClusterer::TSizeDoublePr; - using TSizeDoublePr2Vec = typename CClusterer::TSizeDoublePr2Vec; - using TDoubleVec = std::vector; - using TDoubleVecVec = std::vector; - using TSizeVec = std::vector; - using TSizeVecVec = std::vector; - using TPrecise = typename SPromoted::Type; - using TMatrixPrecise = CSymmetricMatrixNxN; - using TCovariances = CBasicStatistics::SSampleCovariances; - using TSphericalCluster = typename CSphericalCluster::Type; - using TSphericalClusterVec = std::vector; - using TSphericalClusterVecVec = std::vector; - using TKMeansOnline = CKMeansOnline; - using TKMeansOnlineVec = std::vector; - class CCluster; - using TClusterClusterPr = std::pair; - using TOptionalClusterClusterPr = boost::optional; - - //! \brief Represents a cluster. - class CCluster - { - public: - explicit CCluster(const CXMeansOnline &clusterer) : - m_Index(clusterer.m_ClusterIndexGenerator.next()), - m_DataType(clusterer.m_DataType), - m_DecayRate(clusterer.m_DecayRate), - m_Structure(STRUCTURE_SIZE, clusterer.m_DecayRate) - { - } + explicit CCluster(const CXMeansOnline& clusterer) + : m_Index(clusterer.m_ClusterIndexGenerator.next()), + m_DataType(clusterer.m_DataType), + m_DecayRate(clusterer.m_DecayRate), + m_Structure(STRUCTURE_SIZE, clusterer.m_DecayRate) {} + + //! Initialize by traversing a state document. + bool acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); + RESTORE_BUILT_IN(INDEX_TAG, m_Index) + RESTORE(COVARIANCES_TAG, m_Covariances.fromDelimited(traverser.value())) + RESTORE( + STRUCTURE_TAG, + traverser.traverseSubLevel(boost::bind(&TKMeansOnline::acceptRestoreTraverser, &m_Structure, boost::cref(params), _1))) + } while (traverser.next()); - //! Initialize by traversing a state document. - bool acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser) - { - do - { - const std::string &name = traverser.name(); - RESTORE_BUILT_IN(INDEX_TAG, m_Index) - RESTORE(COVARIANCES_TAG, m_Covariances.fromDelimited(traverser.value())) - RESTORE(STRUCTURE_TAG, traverser.traverseSubLevel(boost::bind(&TKMeansOnline::acceptRestoreTraverser, - &m_Structure, boost::cref(params), _1))) - } - while (traverser.next()); + return true; + } - return true; - } + //! Persist state by passing information to the supplied inserter. + void acceptPersistInserter(core::CStatePersistInserter& inserter) const { + inserter.insertValue(INDEX_TAG, m_Index); + inserter.insertValue(COVARIANCES_TAG, m_Covariances.toDelimited()); + inserter.insertLevel(STRUCTURE_TAG, boost::bind(&TKMeansOnline::acceptPersistInserter, m_Structure, _1)); + } - //! Persist state by passing information to the supplied inserter. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const - { - inserter.insertValue(INDEX_TAG, m_Index); - inserter.insertValue(COVARIANCES_TAG, m_Covariances.toDelimited()); - inserter.insertLevel(STRUCTURE_TAG, boost::bind(&TKMeansOnline::acceptPersistInserter, m_Structure, _1)); - } + //! Efficiently swap the contents of this and \p other. + void swap(CCluster& other) { + std::swap(m_Index, other.m_Index); + std::swap(m_DataType, other.m_DataType); + std::swap(m_DecayRate, other.m_DecayRate); + std::swap(m_Covariances, other.m_Covariances); + m_Structure.swap(other.m_Structure); + } - //! Efficiently swap the contents of this and \p other. - void swap(CCluster &other) - { - std::swap(m_Index, other.m_Index); - std::swap(m_DataType, other.m_DataType); - std::swap(m_DecayRate, other.m_DecayRate); - std::swap(m_Covariances, other.m_Covariances); - m_Structure.swap(other.m_Structure); - } + //! Set the type of data in the cluster. + void dataType(maths_t::EDataType value) { m_DataType = value; } - //! Set the type of data in the cluster. - void dataType(maths_t::EDataType value) - { - m_DataType = value; - } + //! Set the rate at which information is aged out. + void decayRate(double value) { + m_DecayRate = value; + m_Structure.decayRate(value); + } - //! Set the rate at which information is aged out. - void decayRate(double value) - { - m_DecayRate = value; - m_Structure.decayRate(value); - } + //! Add \p x_ to this cluster. + void add(const TPointPrecise& x, double count) { + switch (m_DataType) { + case maths_t::E_IntegerData: { + TSphericalCluster x_(x, SCountAndVariance(count, 1.0 / 12.0)); + m_Covariances.add(x_); + break; + } + case maths_t::E_DiscreteData: + case maths_t::E_ContinuousData: + case maths_t::E_MixedData: + m_Covariances.add(x, TPointPrecise(count)); + break; + } + m_Structure.add(x, count); + } - //! Add \p x_ to this cluster. - void add(const TPointPrecise &x, double count) - { - switch (m_DataType) - { - case maths_t::E_IntegerData: - { - TSphericalCluster x_(x, SCountAndVariance(count, 1.0 / 12.0)); - m_Covariances.add(x_); - break; - } - case maths_t::E_DiscreteData: - case maths_t::E_ContinuousData: - case maths_t::E_MixedData: - m_Covariances.add(x, TPointPrecise(count)); - break; - } - m_Structure.add(x, count); - } + //! Propagate the cluster forwards by \p time. + void propagateForwardsByTime(double time) { + double alpha = std::exp(-this->scaledDecayRate() * time); + m_Covariances.age(alpha); + m_Structure.age(alpha); + } - //! Propagate the cluster forwards by \p time. - void propagateForwardsByTime(double time) - { - double alpha = std::exp(-this->scaledDecayRate() * time); - m_Covariances.age(alpha); - m_Structure.age(alpha); - } + //! Get the unique index of this cluster. + std::size_t index() const { return m_Index; } - //! Get the unique index of this cluster. - std::size_t index() const - { - return m_Index; - } + //! Get the centre of the cluster. + //! + //! This is defined as the sample mean. + const TPointPrecise& centre() const { return CBasicStatistics::mean(m_Covariances); } - //! Get the centre of the cluster. - //! - //! This is defined as the sample mean. - const TPointPrecise ¢re() const - { - return CBasicStatistics::mean(m_Covariances); - } + //! Get the spread of the cluster. + //! + //! This is defined as the trace of the sample covariance matrix. + double spread() const { + return std::sqrt(CBasicStatistics::maximumLikelihoodCovariances(m_Covariances).trace() / static_cast(N)); + } - //! Get the spread of the cluster. - //! - //! This is defined as the trace of the sample covariance matrix. - double spread() const - { - return std::sqrt( CBasicStatistics::maximumLikelihoodCovariances(m_Covariances).trace() - / static_cast(N)); - } + //! Get the sample covariance matrix this cluster. + const TCovariances& covariances() const { return m_Covariances; } - //! Get the sample covariance matrix this cluster. - const TCovariances &covariances() const - { - return m_Covariances; - } + //! Get the total count of values added to the cluster. + double count() const { return CBasicStatistics::count(m_Covariances); } - //! Get the total count of values added to the cluster. - double count() const - { - return CBasicStatistics::count(m_Covariances); - } + //! Get the weight of the cluster. + double weight(maths_t::EClusterWeightCalc calc) const { + switch (calc) { + case maths_t::E_ClustersEqualWeight: + return 1.0; + case maths_t::E_ClustersFractionWeight: + return this->count(); + } + LOG_ABORT("Unexpected calculation style " << calc); + return 1.0; + } - //! Get the weight of the cluster. - double weight(maths_t::EClusterWeightCalc calc) const - { - switch (calc) - { - case maths_t::E_ClustersEqualWeight: return 1.0; - case maths_t::E_ClustersFractionWeight: return this->count(); - } - LOG_ABORT("Unexpected calculation style " << calc); - return 1.0; - } + //! Get the likelihood that \p x is from this cluster. + double logLikelihoodFromCluster(maths_t::EClusterWeightCalc calc, const TPointPrecise& x) const { + double likelihood; + const TPointPrecise& mean = CBasicStatistics::mean(m_Covariances); + const TMatrixPrecise& covariances = CBasicStatistics::maximumLikelihoodCovariances(m_Covariances); + maths_t::EFloatingPointErrorStatus status = gaussianLogLikelihood(covariances, x - mean, likelihood, false); + if (status & maths_t::E_FpFailed) { + LOG_ERROR("Unable to compute likelihood for " << x << " and cluster " << m_Index); + return core::constants::LOG_MIN_DOUBLE - 1.0; + } + if (status & maths_t::E_FpOverflowed) { + return likelihood; + } + return likelihood + std::log(this->weight(calc)); + } - //! Get the likelihood that \p x is from this cluster. - double logLikelihoodFromCluster(maths_t::EClusterWeightCalc calc, - const TPointPrecise &x) const - { - double likelihood; - const TPointPrecise &mean = CBasicStatistics::mean(m_Covariances); - const TMatrixPrecise &covariances = - CBasicStatistics::maximumLikelihoodCovariances(m_Covariances); - maths_t::EFloatingPointErrorStatus status = - gaussianLogLikelihood(covariances, x - mean, likelihood, false); - if (status & maths_t::E_FpFailed) - { - LOG_ERROR("Unable to compute likelihood for " << x - << " and cluster " << m_Index); - return core::constants::LOG_MIN_DOUBLE - 1.0; - } - if (status & maths_t::E_FpOverflowed) - { - return likelihood; - } - return likelihood + std::log(this->weight(calc)); - } + //! Get \p numberSamples from this cluster. + void sample(std::size_t numberSamples, TPointPreciseVec& samples) const { m_Structure.sample(numberSamples, samples); } - //! Get \p numberSamples from this cluster. - void sample(std::size_t numberSamples, TPointPreciseVec &samples) const - { - m_Structure.sample(numberSamples, samples); + //! Try and find a split by a full search of the binary tree + //! of possible optimal 2-splits of the data. + //! + //! \param[in] minimumCount The minimum count of a cluster + //! in the split. + //! \param[in] indexGenerator The unique cluster identifier + //! generator. + TOptionalClusterClusterPr + split(CPRNG::CXorOShiro128Plus& rng, double minimumCount, CClustererTypes::CIndexGenerator& indexGenerator) { + // We do our clustering top down to minimize space and avoid + // making splits before we are confident they exist. This is + // important for anomaly detection because we do *not* want + // to fit a cluster to an outlier and judge it to be not + // anomalous as a result. + // + // By analogy to x-means we choose a candidate split of the + // data by minimizing the total within class deviation of the + // two classes. In order to decide whether or not to split we + // 1) impose minimum count on the smaller cluster 2) use an + // information theoretic criterion. Specifically, we threshold + // the BIC gain of using the multi-mode distribution verses + // the single mode distribution. + + LOG_TRACE("split"); + + if (m_Structure.buffering()) { + return TOptionalClusterClusterPr(); + } + + std::size_t n = m_Structure.size(); + if (n < 2) { + return TOptionalClusterClusterPr(); + } + + TSizeVecVec split; + if (!this->splitSearch(rng, minimumCount, split)) { + return TOptionalClusterClusterPr(); + } + LOG_TRACE("split = " << core::CContainerPrinter::print(split)); + + TCovariances covariances[2]; + TSphericalClusterVec clusters; + this->sphericalClusters(clusters); + for (std::size_t i = 0u; i < 2; ++i) { + for (std::size_t j = 0u; j < split[i].size(); ++j) { + covariances[i].add(clusters[split[i][j]]); } + } + TKMeansOnlineVec structure; + m_Structure.split(split, structure); + LOG_TRACE("Splitting cluster " << this->index() << " at " << this->centre() << " left = " << structure[0].print() + << ", right = " << structure[1].print()); - //! Try and find a split by a full search of the binary tree - //! of possible optimal 2-splits of the data. - //! - //! \param[in] minimumCount The minimum count of a cluster - //! in the split. - //! \param[in] indexGenerator The unique cluster identifier - //! generator. - TOptionalClusterClusterPr split(CPRNG::CXorOShiro128Plus &rng, - double minimumCount, - CClustererTypes::CIndexGenerator &indexGenerator) - { - // We do our clustering top down to minimize space and avoid - // making splits before we are confident they exist. This is - // important for anomaly detection because we do *not* want - // to fit a cluster to an outlier and judge it to be not - // anomalous as a result. - // - // By analogy to x-means we choose a candidate split of the - // data by minimizing the total within class deviation of the - // two classes. In order to decide whether or not to split we - // 1) impose minimum count on the smaller cluster 2) use an - // information theoretic criterion. Specifically, we threshold - // the BIC gain of using the multi-mode distribution verses - // the single mode distribution. - - LOG_TRACE("split"); - - if (m_Structure.buffering()) - { - return TOptionalClusterClusterPr(); - } + std::size_t index[] = {indexGenerator.next(), indexGenerator.next()}; + indexGenerator.recycle(m_Index); - std::size_t n = m_Structure.size(); - if (n < 2) - { - return TOptionalClusterClusterPr(); - } + return TClusterClusterPr(CCluster(index[0], m_DataType, m_DecayRate, covariances[0], structure[0]), + CCluster(index[1], m_DataType, m_DecayRate, covariances[1], structure[1])); + } - TSizeVecVec split; - if (!this->splitSearch(rng, minimumCount, split)) - { - return TOptionalClusterClusterPr(); - } - LOG_TRACE("split = " << core::CContainerPrinter::print(split)); - - TCovariances covariances[2]; - TSphericalClusterVec clusters; - this->sphericalClusters(clusters); - for (std::size_t i = 0u; i < 2; ++i) - { - for (std::size_t j = 0u; j < split[i].size(); ++j) - { - covariances[i].add(clusters[split[i][j]]); - } - } - TKMeansOnlineVec structure; - m_Structure.split(split, structure); - LOG_TRACE("Splitting cluster " << this->index() - << " at " << this->centre() - << " left = " << structure[0].print() - << ", right = " << structure[1].print()); - - std::size_t index[] = { indexGenerator.next(), indexGenerator.next() }; - indexGenerator.recycle(m_Index); - - return TClusterClusterPr(CCluster(index[0], m_DataType, m_DecayRate, covariances[0], structure[0]), - CCluster(index[1], m_DataType, m_DecayRate, covariances[1], structure[1])); - } + //! Check if this and \p other cluster should merge. + //! + //! \param[in] other The cluster to merge with this one. + bool shouldMerge(CCluster& other) { return BICGain(*this, other) <= MAXIMUM_MERGE_DISTANCE; } + + //! Merge this and \p other cluster. + CCluster merge(CCluster& other, CClustererTypes::CIndexGenerator& indexGenerator) { + CKMeansOnline structure(m_Structure); + structure.merge(other.m_Structure); + CCluster result(indexGenerator.next(), m_DataType, m_DecayRate, m_Covariances + other.m_Covariances, structure); + indexGenerator.recycle(m_Index); + indexGenerator.recycle(other.m_Index); + return result; + } - //! Check if this and \p other cluster should merge. - //! - //! \param[in] other The cluster to merge with this one. - bool shouldMerge(CCluster &other) - { - return BICGain(*this, other) <= MAXIMUM_MERGE_DISTANCE; - } + //! Get a checksum for this object. + uint64_t checksum(uint64_t seed) const { + seed = CChecksum::calculate(seed, m_Index); + seed = CChecksum::calculate(seed, m_DataType); + seed = CChecksum::calculate(seed, m_DecayRate); + seed = CChecksum::calculate(seed, m_Covariances); + return CChecksum::calculate(seed, m_Structure); + } - //! Merge this and \p other cluster. - CCluster merge(CCluster &other, CClustererTypes::CIndexGenerator &indexGenerator) - { - CKMeansOnline structure(m_Structure); - structure.merge(other.m_Structure); - CCluster result(indexGenerator.next(), m_DataType, m_DecayRate, - m_Covariances + other.m_Covariances, structure); - indexGenerator.recycle(m_Index); - indexGenerator.recycle(other.m_Index); - return result; - } + //! Debug the memory used by this component. + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { + mem->setName("CXMeansOnline"); + core::CMemoryDebug::dynamicSize("m_Structure", m_Structure, mem); + } - //! Get a checksum for this object. - uint64_t checksum(uint64_t seed) const - { - seed = CChecksum::calculate(seed, m_Index); - seed = CChecksum::calculate(seed, m_DataType); - seed = CChecksum::calculate(seed, m_DecayRate); - seed = CChecksum::calculate(seed, m_Covariances); - return CChecksum::calculate(seed, m_Structure); - } + //! Get the memory used by this component. + std::size_t memoryUsage() const { return core::CMemory::dynamicSize(m_Structure); } - //! Debug the memory used by this component. - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const - { - mem->setName("CXMeansOnline"); - core::CMemoryDebug::dynamicSize("m_Structure", m_Structure, mem); - } + //! Get Bayes Information Criterion decrease in going from one + //! to two clusters. + //! + //! \note This is not necessarily positive. + static double BICGain(const CCluster& lhs, const CCluster& rhs) { return BICGain(lhs.m_Covariances, rhs.m_Covariances); } - //! Get the memory used by this component. - std::size_t memoryUsage() const - { - return core::CMemory::dynamicSize(m_Structure); - } + protected: + CCluster(std::size_t index, + maths_t::EDataType dataType, + double decayRate, + const TCovariances& covariances, + const CKMeansOnline& structure) + : m_Index(index), m_DataType(dataType), m_DecayRate(decayRate), m_Covariances(covariances), m_Structure(structure) {} + + //! Search for a split of the data that satisfies the constraints + //! on both the BIC divergence and minimum count. + //! + //! In order to handle the constraint on minimum count, we do a + //! breadth first search of the binary tree of optimal 2-splits + //! of subsets of the data looking for splits which satisfy the + //! constraints on *both* BIC divergence and count. The search + //! terminates at any node which can't be split subject to BIC. + //! + //! The intention of this is to find "natural" splits of the data + //! which would be obscured when splitting into the optimal 2-split. + //! This can occur when a small number of points (less than minimum + //! count) are sufficient far from the other that they split off + //! in preference to some other natural split of the data. Although + //! we can impose the count constraint when finding the optimal + //! 2-split this has associated problems. In particular, extreme + //! outliers then tend to rip sufficient points away from their + //! natural cluster in order to generate a new cluster. + bool splitSearch(CPRNG::CXorOShiro128Plus& rng, double minimumCount, TSizeVecVec& result) { + result.clear(); - //! Get Bayes Information Criterion decrease in going from one - //! to two clusters. - //! - //! \note This is not necessarily positive. - static double BICGain(const CCluster &lhs, const CCluster &rhs) - { - return BICGain(lhs.m_Covariances, rhs.m_Covariances); + // The search visits a binary tree of candidate 2-splits of + // the data breadth first. If a suitable split is found on a + // level of the tree then the search terminates returning that + // split. Note that if a subset of the data can be split we + // also check that the corresponding full 2-split can be split + // subject to the same constraints (to avoid merging the two + // clusters straight away). + + TSphericalClusterVec node; + this->sphericalClusters(node); + TSphericalClusterVec remainder; + remainder.reserve(node.size()); + TSphericalClusterVecVec candidate; + + for (;;) { + TKMeansOnline::kmeans(rng, node, 2, candidate); + LOG_TRACE("candidate = " << core::CContainerPrinter::print(candidate)); + + if (candidate.size() != 2) { + LOG_ERROR("Expected 2-split: " << core::CContainerPrinter::print(candidate)); + break; + } + if (candidate[0].empty() || candidate[1].empty()) { + // This can happen if all the points are co-located, + // in which case we can't split this node anyway. + break; } - protected: - CCluster(std::size_t index, - maths_t::EDataType dataType, - double decayRate, - const TCovariances &covariances, - const CKMeansOnline &structure) : - m_Index(index), - m_DataType(dataType), - m_DecayRate(decayRate), - m_Covariances(covariances), - m_Structure(structure) - {} - - //! Search for a split of the data that satisfies the constraints - //! on both the BIC divergence and minimum count. - //! - //! In order to handle the constraint on minimum count, we do a - //! breadth first search of the binary tree of optimal 2-splits - //! of subsets of the data looking for splits which satisfy the - //! constraints on *both* BIC divergence and count. The search - //! terminates at any node which can't be split subject to BIC. - //! - //! The intention of this is to find "natural" splits of the data - //! which would be obscured when splitting into the optimal 2-split. - //! This can occur when a small number of points (less than minimum - //! count) are sufficient far from the other that they split off - //! in preference to some other natural split of the data. Although - //! we can impose the count constraint when finding the optimal - //! 2-split this has associated problems. In particular, extreme - //! outliers then tend to rip sufficient points away from their - //! natural cluster in order to generate a new cluster. - bool splitSearch(CPRNG::CXorOShiro128Plus &rng, - double minimumCount, - TSizeVecVec &result) - { - result.clear(); - - // The search visits a binary tree of candidate 2-splits of - // the data breadth first. If a suitable split is found on a - // level of the tree then the search terminates returning that - // split. Note that if a subset of the data can be split we - // also check that the corresponding full 2-split can be split - // subject to the same constraints (to avoid merging the two - // clusters straight away). - - TSphericalClusterVec node; - this->sphericalClusters(node); - TSphericalClusterVec remainder; - remainder.reserve(node.size()); - TSphericalClusterVecVec candidate; - - for (;;) - { - TKMeansOnline::kmeans(rng, node, 2, candidate); - LOG_TRACE("candidate = " << core::CContainerPrinter::print(candidate)); - - if (candidate.size() != 2) - { - LOG_ERROR("Expected 2-split: " << core::CContainerPrinter::print(candidate)); - break; - } - if (candidate[0].empty() || candidate[1].empty()) - { - // This can happen if all the points are co-located, - // in which case we can't split this node anyway. - break; - } + // We use the Ledoit and Wolf optimal shrinkage estimate + // because the sample sizes here might be quite small in + // which case the variance of the covariance estimates can + // be large. + + TCovariances covariances[2]; + CBasicStatistics::covariancesLedoitWolf(candidate[0], covariances[0]); + CBasicStatistics::covariancesLedoitWolf(candidate[1], covariances[1]); + double n[] = {CBasicStatistics::count(covariances[0]), CBasicStatistics::count(covariances[1])}; + double nmin = std::min(n[0], n[1]); + + // Check the count constraint. + bool satisfiesCount = (nmin >= minimumCount); + LOG_TRACE("count = " << nmin << " (to split " << minimumCount << ")"); + + // Check the distance constraint. + double distance = BICGain(covariances[0], covariances[1]); + bool satisfiesDistance = (distance > MINIMUM_SPLIT_DISTANCE); + LOG_TRACE("BIC(1) - BIC(2) = " << distance << " (to split " << MINIMUM_SPLIT_DISTANCE << ")"); + + if (!satisfiesCount) { + // Recurse to the (one) node with sufficient count. + if (n[0] > minimumCount && candidate[0].size() > 1) { + node.swap(candidate[0]); + remainder.insert(remainder.end(), candidate[1].begin(), candidate[1].end()); + continue; + } + if (n[1] > minimumCount && candidate[1].size() > 1) { + node.swap(candidate[1]); + remainder.insert(remainder.end(), candidate[0].begin(), candidate[0].end()); + continue; + } + } else if (satisfiesDistance) { + LOG_TRACE("Checking full split"); - // We use the Ledoit and Wolf optimal shrinkage estimate - // because the sample sizes here might be quite small in - // which case the variance of the covariance estimates can - // be large. - - TCovariances covariances[2]; - CBasicStatistics::covariancesLedoitWolf(candidate[0], covariances[0]); - CBasicStatistics::covariancesLedoitWolf(candidate[1], covariances[1]); - double n[] = - { - CBasicStatistics::count(covariances[0]), - CBasicStatistics::count(covariances[1]) - }; - double nmin = std::min(n[0], n[1]); - - // Check the count constraint. - bool satisfiesCount = (nmin >= minimumCount); - LOG_TRACE("count = " << nmin << " (to split " << minimumCount << ")"); - - // Check the distance constraint. - double distance = BICGain(covariances[0], covariances[1]); - bool satisfiesDistance = (distance > MINIMUM_SPLIT_DISTANCE); - LOG_TRACE("BIC(1) - BIC(2) = " << distance - << " (to split " << MINIMUM_SPLIT_DISTANCE << ")"); - - if (!satisfiesCount) - { - // Recurse to the (one) node with sufficient count. - if (n[0] > minimumCount && candidate[0].size() > 1) - { - node.swap(candidate[0]); - remainder.insert(remainder.end(), candidate[1].begin(), candidate[1].end()); - continue; - } - if (n[1] > minimumCount && candidate[1].size() > 1) - { - node.swap(candidate[1]); - remainder.insert(remainder.end(), candidate[0].begin(), candidate[0].end()); - continue; - } - } - else if (satisfiesDistance) - { - LOG_TRACE("Checking full split"); - - TSizeVec assignment(remainder.size()); - for (std::size_t i = 0u; i < remainder.size(); ++i) - { - assignment[i] = nearest(remainder[i], covariances); - } - for (std::size_t i = 0u; i < assignment.size(); ++i) - { - std::size_t j = assignment[i]; - TCovariances ci; - ci.add(remainder[i]); - candidate[j].push_back(remainder[i]); - covariances[j] += ci; - n[j] += CBasicStatistics::count(ci); - } + TSizeVec assignment(remainder.size()); + for (std::size_t i = 0u; i < remainder.size(); ++i) { + assignment[i] = nearest(remainder[i], covariances); + } + for (std::size_t i = 0u; i < assignment.size(); ++i) { + std::size_t j = assignment[i]; + TCovariances ci; + ci.add(remainder[i]); + candidate[j].push_back(remainder[i]); + covariances[j] += ci; + n[j] += CBasicStatistics::count(ci); + } - distance = BICGain(covariances[0], covariances[1]); - LOG_TRACE("BIC(1) - BIC(2) = " << distance - << " (to split " << MINIMUM_SPLIT_DISTANCE << ")"); - - if (distance > MINIMUM_SPLIT_DISTANCE) - { - LOG_TRACE("splitting"); - - result.resize(candidate.size()); - TSphericalClusterVec clusters; - this->sphericalClusters(clusters); - TSizeVec indexes(boost::counting_iterator(0), - boost::counting_iterator(clusters.size())); - COrderings::simultaneousSort(clusters, - indexes, - typename CSphericalCluster::SLess()); - for (std::size_t i = 0u; i < candidate.size(); ++i) - { - for (std::size_t j = 0u; j < candidate[i].size(); ++j) - { - std::size_t k = std::lower_bound(clusters.begin(), - clusters.end(), - candidate[i][j], - typename CSphericalCluster::SLess()) - - clusters.begin(); - if (k >= clusters.size()) - { - LOG_ERROR("Missing " << candidate[i][j] - << ", clusters = " << core::CContainerPrinter::print(clusters)); - return false; - } - result[i].push_back(indexes[k]); - } + distance = BICGain(covariances[0], covariances[1]); + LOG_TRACE("BIC(1) - BIC(2) = " << distance << " (to split " << MINIMUM_SPLIT_DISTANCE << ")"); + + if (distance > MINIMUM_SPLIT_DISTANCE) { + LOG_TRACE("splitting"); + + result.resize(candidate.size()); + TSphericalClusterVec clusters; + this->sphericalClusters(clusters); + TSizeVec indexes(boost::counting_iterator(0), boost::counting_iterator(clusters.size())); + COrderings::simultaneousSort(clusters, indexes, typename CSphericalCluster::SLess()); + for (std::size_t i = 0u; i < candidate.size(); ++i) { + for (std::size_t j = 0u; j < candidate[i].size(); ++j) { + std::size_t k = + std::lower_bound( + clusters.begin(), clusters.end(), candidate[i][j], typename CSphericalCluster::SLess()) - + clusters.begin(); + if (k >= clusters.size()) { + LOG_ERROR("Missing " << candidate[i][j] << ", clusters = " << core::CContainerPrinter::print(clusters)); + return false; } + result[i].push_back(indexes[k]); } } - break; - } - - return !result.empty(); - } - - //! Get the spherical clusters being maintained in the fine- - //! grained structure model of this cluster. - void sphericalClusters(TSphericalClusterVec &result) const - { - m_Structure.clusters(result); - switch (m_DataType) - { - case maths_t::E_IntegerData: - { - for (std::size_t i = 0u; i < result.size(); ++i) - { - result[i].annotation().s_Variance += 1.0 / 12.0; - } - break; - } - case maths_t::E_DiscreteData: - case maths_t::E_ContinuousData: - case maths_t::E_MixedData: - break; } } + break; + } - //! Get the closest (in Mahalanobis distance) cluster to \p x. - static std::size_t nearest(const TSphericalCluster &x, const TCovariances (&c)[2]) - { - TPrecise d[] = { 0, 0 }; - TPointPrecise x_(x); - inverseQuadraticForm(CBasicStatistics::maximumLikelihoodCovariances(c[0]), - x_ - CBasicStatistics::mean(c[0]), - d[0]); - inverseQuadraticForm(CBasicStatistics::maximumLikelihoodCovariances(c[1]), - x_ - CBasicStatistics::mean(c[1]), - d[1]); - return d[0] < d[1] ? 0 : 1; - } - - //! Get the Bayes Information Criterion gain, subject to Gaussian - //! assumptions, in representing \p lhs and \p rhs using one or - //! two modes. - static double BICGain(const TCovariances &lhs, const TCovariances &rhs) - { - CGaussianInfoCriterion BIC1; - BIC1.add(lhs + rhs); - CGaussianInfoCriterion BIC2; - BIC2.add(lhs); - BIC2.add(rhs); - return BIC1.calculate() - BIC2.calculate(); - } + return !result.empty(); + } - private: - //! Get the scaled decay rate for use by propagateForwardsByTime. - double scaledDecayRate() const - { - return std::pow(0.5, static_cast(N)) * m_DecayRate; + //! Get the spherical clusters being maintained in the fine- + //! grained structure model of this cluster. + void sphericalClusters(TSphericalClusterVec& result) const { + m_Structure.clusters(result); + switch (m_DataType) { + case maths_t::E_IntegerData: { + for (std::size_t i = 0u; i < result.size(); ++i) { + result[i].annotation().s_Variance += 1.0 / 12.0; } + break; + } + case maths_t::E_DiscreteData: + case maths_t::E_ContinuousData: + case maths_t::E_MixedData: + break; + } + } - private: - //! A unique identifier for this cluster. - std::size_t m_Index; - - //! The type of data which will be clustered. - maths_t::EDataType m_DataType; + //! Get the closest (in Mahalanobis distance) cluster to \p x. + static std::size_t nearest(const TSphericalCluster& x, const TCovariances (&c)[2]) { + TPrecise d[] = {0, 0}; + TPointPrecise x_(x); + inverseQuadraticForm(CBasicStatistics::maximumLikelihoodCovariances(c[0]), x_ - CBasicStatistics::mean(c[0]), d[0]); + inverseQuadraticForm(CBasicStatistics::maximumLikelihoodCovariances(c[1]), x_ - CBasicStatistics::mean(c[1]), d[1]); + return d[0] < d[1] ? 0 : 1; + } - //! Controls the rate at which information is lost. - double m_DecayRate; + //! Get the Bayes Information Criterion gain, subject to Gaussian + //! assumptions, in representing \p lhs and \p rhs using one or + //! two modes. + static double BICGain(const TCovariances& lhs, const TCovariances& rhs) { + CGaussianInfoCriterion BIC1; + BIC1.add(lhs + rhs); + CGaussianInfoCriterion BIC2; + BIC2.add(lhs); + BIC2.add(rhs); + return BIC1.calculate() - BIC2.calculate(); + } - //! The mean, covariances of the data in this cluster. - TCovariances m_Covariances; + private: + //! Get the scaled decay rate for use by propagateForwardsByTime. + double scaledDecayRate() const { return std::pow(0.5, static_cast(N)) * m_DecayRate; } - //! The data representing the internal structure of this cluster. - TKMeansOnline m_Structure; - }; + private: + //! A unique identifier for this cluster. + std::size_t m_Index; - using TClusterVec = std::vector; - using TClusterVecItr = typename TClusterVec::iterator; - using TClusterVecCItr = typename TClusterVec::const_iterator; + //! The type of data which will be clustered. + maths_t::EDataType m_DataType; - public: - //! \name Life-cycle - //@{ - //! Construct a new clusterer. - //! - //! \param[in] dataType The type of data which will be clustered. - //! \param[in] weightCalc The style of the cluster weight calculation - //! (see maths_t::EClusterWeightCalc for details). - //! \param[in] decayRate Controls the rate at which information is - //! lost from the clusters. - //! \param[in] minimumClusterFraction The minimum fractional count - //! of points in a cluster. - //! \param[in] minimumClusterCount The minimum count of points in a - //! cluster. - //! \param[in] minimumCategoryCount The minimum count for a category - //! in the sketch to cluster. - //! \param[in] splitFunc Optional callback for when a cluster is split. - //! \param[in] mergeFunc Optional callback for when two clusters are - CXMeansOnline(maths_t::EDataType dataType, - maths_t::EClusterWeightCalc weightCalc, - double decayRate = 0.0, - double minimumClusterFraction = MINIMUM_CLUSTER_SPLIT_FRACTION, - double minimumClusterCount = MINIMUM_CLUSTER_SPLIT_COUNT, - double minimumCategoryCount = MINIMUM_CATEGORY_COUNT, - const CClustererTypes::TSplitFunc &splitFunc = CClustererTypes::CDoNothing(), - const CClustererTypes::TMergeFunc &mergeFunc = CClustererTypes::CDoNothing()) : - CClusterer(splitFunc, mergeFunc), - m_DataType(dataType), - m_InitialDecayRate(decayRate), - m_DecayRate(decayRate), - m_HistoryLength(0.0), - m_WeightCalc(weightCalc), - m_MinimumClusterFraction(minimumClusterFraction), - m_MinimumClusterCount(minimumClusterCount), - m_MinimumCategoryCount(minimumCategoryCount), - m_Clusters(1, CCluster(*this)) - {} - - //! Construct by traversing a state document. - CXMeansOnline(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser): - CClusterer(CClustererTypes::CDoNothing(), CClustererTypes::CDoNothing()), - m_DataType(params.s_DataType), - m_InitialDecayRate(params.s_DecayRate), - m_DecayRate(params.s_DecayRate), - m_HistoryLength(), - m_WeightCalc(maths_t::E_ClustersEqualWeight), - m_MinimumClusterFraction(), - m_MinimumClusterCount(), - m_MinimumCategoryCount(params.s_MinimumCategoryCount) - { - traverser.traverseSubLevel(boost::bind(&CXMeansOnline::acceptRestoreTraverser, - this, boost::cref(params), _1)); - } + //! Controls the rate at which information is lost. + double m_DecayRate; - //! Construct by traversing a state document. - CXMeansOnline(const SDistributionRestoreParams ¶ms, - const CClustererTypes::TSplitFunc &splitFunc, - const CClustererTypes::TMergeFunc &mergeFunc, - core::CStateRestoreTraverser &traverser) : - CClusterer(splitFunc, mergeFunc), - m_DataType(params.s_DataType), - m_InitialDecayRate(params.s_DecayRate), - m_DecayRate(params.s_DecayRate), - m_HistoryLength(), - m_WeightCalc(maths_t::E_ClustersEqualWeight), - m_MinimumClusterFraction(), - m_MinimumClusterCount(), - m_MinimumCategoryCount(params.s_MinimumCategoryCount) - { - traverser.traverseSubLevel(boost::bind(&CXMeansOnline::acceptRestoreTraverser, - this, boost::cref(params), _1)); + //! The mean, covariances of the data in this cluster. + TCovariances m_Covariances; + + //! The data representing the internal structure of this cluster. + TKMeansOnline m_Structure; + }; + + using TClusterVec = std::vector; + using TClusterVecItr = typename TClusterVec::iterator; + using TClusterVecCItr = typename TClusterVec::const_iterator; + +public: + //! \name Life-cycle + //@{ + //! Construct a new clusterer. + //! + //! \param[in] dataType The type of data which will be clustered. + //! \param[in] weightCalc The style of the cluster weight calculation + //! (see maths_t::EClusterWeightCalc for details). + //! \param[in] decayRate Controls the rate at which information is + //! lost from the clusters. + //! \param[in] minimumClusterFraction The minimum fractional count + //! of points in a cluster. + //! \param[in] minimumClusterCount The minimum count of points in a + //! cluster. + //! \param[in] minimumCategoryCount The minimum count for a category + //! in the sketch to cluster. + //! \param[in] splitFunc Optional callback for when a cluster is split. + //! \param[in] mergeFunc Optional callback for when two clusters are + CXMeansOnline(maths_t::EDataType dataType, + maths_t::EClusterWeightCalc weightCalc, + double decayRate = 0.0, + double minimumClusterFraction = MINIMUM_CLUSTER_SPLIT_FRACTION, + double minimumClusterCount = MINIMUM_CLUSTER_SPLIT_COUNT, + double minimumCategoryCount = MINIMUM_CATEGORY_COUNT, + const CClustererTypes::TSplitFunc& splitFunc = CClustererTypes::CDoNothing(), + const CClustererTypes::TMergeFunc& mergeFunc = CClustererTypes::CDoNothing()) + : CClusterer(splitFunc, mergeFunc), + m_DataType(dataType), + m_InitialDecayRate(decayRate), + m_DecayRate(decayRate), + m_HistoryLength(0.0), + m_WeightCalc(weightCalc), + m_MinimumClusterFraction(minimumClusterFraction), + m_MinimumClusterCount(minimumClusterCount), + m_MinimumCategoryCount(minimumCategoryCount), + m_Clusters(1, CCluster(*this)) {} + + //! Construct by traversing a state document. + CXMeansOnline(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) + : CClusterer(CClustererTypes::CDoNothing(), CClustererTypes::CDoNothing()), + m_DataType(params.s_DataType), + m_InitialDecayRate(params.s_DecayRate), + m_DecayRate(params.s_DecayRate), + m_HistoryLength(), + m_WeightCalc(maths_t::E_ClustersEqualWeight), + m_MinimumClusterFraction(), + m_MinimumClusterCount(), + m_MinimumCategoryCount(params.s_MinimumCategoryCount) { + traverser.traverseSubLevel(boost::bind(&CXMeansOnline::acceptRestoreTraverser, this, boost::cref(params), _1)); + } + + //! Construct by traversing a state document. + CXMeansOnline(const SDistributionRestoreParams& params, + const CClustererTypes::TSplitFunc& splitFunc, + const CClustererTypes::TMergeFunc& mergeFunc, + core::CStateRestoreTraverser& traverser) + : CClusterer(splitFunc, mergeFunc), + m_DataType(params.s_DataType), + m_InitialDecayRate(params.s_DecayRate), + m_DecayRate(params.s_DecayRate), + m_HistoryLength(), + m_WeightCalc(maths_t::E_ClustersEqualWeight), + m_MinimumClusterFraction(), + m_MinimumClusterCount(), + m_MinimumCategoryCount(params.s_MinimumCategoryCount) { + traverser.traverseSubLevel(boost::bind(&CXMeansOnline::acceptRestoreTraverser, this, boost::cref(params), _1)); + } + + //! The x-means clusterer has value semantics. + CXMeansOnline(const CXMeansOnline& other) + : CClusterer(other.splitFunc(), other.mergeFunc()), + m_Rng(other.m_Rng), + m_DataType(other.m_DataType), + m_InitialDecayRate(other.m_InitialDecayRate), + m_DecayRate(other.m_DecayRate), + m_HistoryLength(other.m_HistoryLength), + m_WeightCalc(other.m_WeightCalc), + m_MinimumClusterFraction(other.m_MinimumClusterFraction), + m_MinimumClusterCount(other.m_MinimumClusterCount), + m_MinimumCategoryCount(other.m_MinimumCategoryCount), + m_ClusterIndexGenerator(other.m_ClusterIndexGenerator.deepCopy()), + m_Clusters(other.m_Clusters) {} + + //! The x-means clusterer has value semantics. + CXMeansOnline& operator=(const CXMeansOnline& other) { + if (this != &other) { + CXMeansOnline tmp(other); + this->swap(tmp); } - - //! The x-means clusterer has value semantics. - CXMeansOnline(const CXMeansOnline &other) : - CClusterer(other.splitFunc(), other.mergeFunc()), - m_Rng(other.m_Rng), - m_DataType(other.m_DataType), - m_InitialDecayRate(other.m_InitialDecayRate), - m_DecayRate(other.m_DecayRate), - m_HistoryLength(other.m_HistoryLength), - m_WeightCalc(other.m_WeightCalc), - m_MinimumClusterFraction(other.m_MinimumClusterFraction), - m_MinimumClusterCount(other.m_MinimumClusterCount), - m_MinimumCategoryCount(other.m_MinimumCategoryCount), - m_ClusterIndexGenerator(other.m_ClusterIndexGenerator.deepCopy()), - m_Clusters(other.m_Clusters) - {} - - //! The x-means clusterer has value semantics. - CXMeansOnline &operator=(const CXMeansOnline &other) - { - if (this != &other) - { - CXMeansOnline tmp(other); - this->swap(tmp); - } - return *this; + return *this; + } + //@} + + virtual ~CXMeansOnline() {} + + //! Efficiently swap the contents of two k-means objects. + void swap(CXMeansOnline& other) { + this->CClusterer::swap(other); + std::swap(m_Rng, other.m_Rng); + std::swap(m_DataType, other.m_DataType); + std::swap(m_InitialDecayRate, other.m_InitialDecayRate); + std::swap(m_DecayRate, other.m_DecayRate); + std::swap(m_HistoryLength, other.m_HistoryLength); + std::swap(m_WeightCalc, other.m_WeightCalc); + std::swap(m_MinimumClusterFraction, other.m_MinimumClusterFraction); + std::swap(m_MinimumClusterCount, other.m_MinimumClusterCount); + std::swap(m_MinimumCategoryCount, other.m_MinimumCategoryCount); + std::swap(m_ClusterIndexGenerator, other.m_ClusterIndexGenerator); + m_Clusters.swap(other.m_Clusters); + } + + //! \name Clusterer Contract + //@{ + //! Get the tag name for this clusterer. + virtual std::string persistenceTag() const { return CClustererTypes::X_MEANS_ONLINE_TAG; } + + //! Persist state by passing information to the supplied inserter. + virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const { + for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { + inserter.insertLevel(CLUSTER_TAG, boost::bind(&CCluster::acceptPersistInserter, &m_Clusters[i], _1)); } - //@} - - virtual ~CXMeansOnline() {} - - //! Efficiently swap the contents of two k-means objects. - void swap(CXMeansOnline &other) - { - this->CClusterer::swap(other); - std::swap(m_Rng, other.m_Rng); - std::swap(m_DataType, other.m_DataType); - std::swap(m_InitialDecayRate, other.m_InitialDecayRate); - std::swap(m_DecayRate, other.m_DecayRate); - std::swap(m_HistoryLength, other.m_HistoryLength); - std::swap(m_WeightCalc, other.m_WeightCalc); - std::swap(m_MinimumClusterFraction, other.m_MinimumClusterFraction); - std::swap(m_MinimumClusterCount, other.m_MinimumClusterCount); - std::swap(m_MinimumCategoryCount, other.m_MinimumCategoryCount); - std::swap(m_ClusterIndexGenerator, other.m_ClusterIndexGenerator); - m_Clusters.swap(other.m_Clusters); + inserter.insertValue(DECAY_RATE_TAG, m_DecayRate, core::CIEEE754::E_SinglePrecision); + inserter.insertValue(HISTORY_LENGTH_TAG, m_HistoryLength, core::CIEEE754::E_SinglePrecision); + inserter.insertValue(RNG_TAG, m_Rng.toString()); + inserter.insertValue(WEIGHT_CALC_TAG, static_cast(m_WeightCalc)); + inserter.insertValue(MINIMUM_CLUSTER_FRACTION_TAG, m_MinimumClusterFraction); + inserter.insertValue(MINIMUM_CLUSTER_COUNT_TAG, m_MinimumClusterCount); + inserter.insertLevel(CLUSTER_INDEX_GENERATOR_TAG, + boost::bind(&CClustererTypes::CIndexGenerator::acceptPersistInserter, &m_ClusterIndexGenerator, _1)); + } + + //! Creates a copy of the clusterer. + //! + //! \warning Caller owns returned object. + virtual CXMeansOnline* clone() const { return new CXMeansOnline(*this); } + + //! Clear the current clusterer state. + virtual void clear() { + *this = CXMeansOnline(m_DataType, + m_WeightCalc, + m_InitialDecayRate, + m_MinimumClusterFraction, + m_MinimumClusterCount, + m_MinimumCategoryCount, + this->splitFunc(), + this->mergeFunc()); + } + + //! Get the number of clusters. + virtual std::size_t numberClusters() const { return m_Clusters.size(); } + + //! Set the type of data being clustered. + virtual void dataType(maths_t::EDataType dataType) { + m_DataType = dataType; + for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { + m_Clusters[i].dataType(dataType); } + } - //! \name Clusterer Contract - //@{ - //! Get the tag name for this clusterer. - virtual std::string persistenceTag() const - { - return CClustererTypes::X_MEANS_ONLINE_TAG; + //! Set the rate at which information is aged out. + virtual void decayRate(double decayRate) { + m_DecayRate = decayRate; + for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { + m_Clusters[i].decayRate(decayRate); } + } - //! Persist state by passing information to the supplied inserter. - virtual void acceptPersistInserter(core::CStatePersistInserter &inserter) const - { - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) - { - inserter.insertLevel(CLUSTER_TAG, - boost::bind(&CCluster::acceptPersistInserter, &m_Clusters[i], _1)); - } - inserter.insertValue(DECAY_RATE_TAG, m_DecayRate, core::CIEEE754::E_SinglePrecision); - inserter.insertValue(HISTORY_LENGTH_TAG, m_HistoryLength, core::CIEEE754::E_SinglePrecision); - inserter.insertValue(RNG_TAG, m_Rng.toString()); - inserter.insertValue(WEIGHT_CALC_TAG, static_cast(m_WeightCalc)); - inserter.insertValue(MINIMUM_CLUSTER_FRACTION_TAG, m_MinimumClusterFraction); - inserter.insertValue(MINIMUM_CLUSTER_COUNT_TAG, m_MinimumClusterCount); - inserter.insertLevel(CLUSTER_INDEX_GENERATOR_TAG, - boost::bind(&CClustererTypes::CIndexGenerator::acceptPersistInserter, - &m_ClusterIndexGenerator, _1)); - } + //! Check if the cluster identified by \p index exists. + virtual bool hasCluster(std::size_t index) const { return this->cluster(index) != 0; } - //! Creates a copy of the clusterer. - //! - //! \warning Caller owns returned object. - virtual CXMeansOnline *clone() const - { - return new CXMeansOnline(*this); + //! Get the centre of the cluster identified by \p index. + virtual bool clusterCentre(std::size_t index, TPointPrecise& result) const { + const CCluster* cluster = this->cluster(index); + if (!cluster) { + LOG_ERROR("Cluster " << index << " doesn't exist"); + return false; } - - //! Clear the current clusterer state. - virtual void clear() - { - *this = CXMeansOnline(m_DataType, m_WeightCalc, - m_InitialDecayRate, - m_MinimumClusterFraction, - m_MinimumClusterCount, - m_MinimumCategoryCount, - this->splitFunc(), this->mergeFunc()); + result = cluster->centre(); + return true; + } + + //! Get the spread of the cluster identified by \p index. + virtual bool clusterSpread(std::size_t index, double& result) const { + const CCluster* cluster = this->cluster(index); + if (!cluster) { + LOG_ERROR("Cluster " << index << " doesn't exist"); + return false; } - - //! Get the number of clusters. - virtual std::size_t numberClusters() const - { - return m_Clusters.size(); + result = cluster->spread(); + return true; + } + + //! Gets the index of the cluster(s) to which \p point belongs + //! together with their weighting factor. + virtual void cluster(const TPointPrecise& point, TSizeDoublePr2Vec& result, double count = 1.0) const { + result.clear(); + + if (m_Clusters.empty()) { + LOG_ERROR("No clusters"); + return; } - //! Set the type of data being clustered. - virtual void dataType(maths_t::EDataType dataType) - { - m_DataType = dataType; - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) - { - m_Clusters[i].dataType(dataType); - } + // This does a soft assignment. Given we are finding a + // partitioning clustering (as a result of targeting + // the k-means objective) we only consider the case that + // the point comes from either the left or right cluster. + // A-priori the probability a randomly selected point + // comes from a cluster is proportional to its weight: + // P(i) = n(i) / Sum_j{ n(j) } + // + // Bayes theorem then immediately gives that the probability + // that a given point is from the i'th cluster + // P(i | x) = L(x | i) * P(i) / Z + // + // where Z is the normalization constant: + // Z = Sum_i{ P(i | x) } + + result.reserve(m_Clusters.size()); + double renormalizer = boost::numeric::bounds::lowest(); + for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { + double likelihood = m_Clusters[i].logLikelihoodFromCluster(m_WeightCalc, point); + result.push_back(std::make_pair(m_Clusters[i].index(), likelihood)); + renormalizer = std::max(renormalizer, likelihood); } - - //! Set the rate at which information is aged out. - virtual void decayRate(double decayRate) - { - m_DecayRate = decayRate; - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) - { - m_Clusters[i].decayRate(decayRate); - } + double normalizer = 0.0; + for (std::size_t i = 0u; i < result.size(); ++i) { + result[i].second = std::exp(result[i].second - renormalizer); + normalizer += result[i].second; } - - //! Check if the cluster identified by \p index exists. - virtual bool hasCluster(std::size_t index) const - { - return this->cluster(index) != 0; + double pmax = 0.0; + for (std::size_t i = 0u; i < result.size(); ++i) { + result[i].second /= normalizer; + pmax = std::max(pmax, result[i].second); } - - //! Get the centre of the cluster identified by \p index. - virtual bool clusterCentre(std::size_t index, TPointPrecise &result) const - { - const CCluster *cluster = this->cluster(index); - if (!cluster) - { - LOG_ERROR("Cluster " << index << " doesn't exist"); - return false; - } - result = cluster->centre(); - return true; + result.erase(std::remove_if(result.begin(), result.end(), CProbabilityLessThan(HARD_ASSIGNMENT_THRESHOLD * pmax)), result.end()); + normalizer = 0.0; + for (std::size_t i = 0u; i < result.size(); ++i) { + normalizer += result[i].second; } - - //! Get the spread of the cluster identified by \p index. - virtual bool clusterSpread(std::size_t index, double &result) const - { - const CCluster *cluster = this->cluster(index); - if (!cluster) - { - LOG_ERROR("Cluster " << index << " doesn't exist"); - return false; - } - result = cluster->spread(); - return true; + for (std::size_t i = 0u; i < result.size(); ++i) { + result[i].second *= count / normalizer; } - - //! Gets the index of the cluster(s) to which \p point belongs - //! together with their weighting factor. - virtual void cluster(const TPointPrecise &point, - TSizeDoublePr2Vec &result, - double count = 1.0) const - { - result.clear(); - - if (m_Clusters.empty()) - { - LOG_ERROR("No clusters"); - return; + } + + //! Update the clustering with \p point and return its cluster(s) + //! together with their weighting factor. + virtual void add(const TPointPrecise& x, TSizeDoublePr2Vec& clusters, double count = 1.0) { + m_HistoryLength += 1.0; + + if (m_Clusters.size() == 1) { + LOG_TRACE("Adding " << x << " to " << m_Clusters[0].centre()); + m_Clusters[0].add(x, count); + clusters.push_back(std::make_pair(m_Clusters[0].index(), count)); + if (this->maybeSplit(m_Clusters.begin())) { + this->cluster(x, clusters, count); } + } else { + using TSizeDoublePr = std::pair; + using TMaxAccumulator = CBasicStatistics::COrderStatisticsStack>; - // This does a soft assignment. Given we are finding a - // partitioning clustering (as a result of targeting - // the k-means objective) we only consider the case that - // the point comes from either the left or right cluster. - // A-priori the probability a randomly selected point - // comes from a cluster is proportional to its weight: - // P(i) = n(i) / Sum_j{ n(j) } - // - // Bayes theorem then immediately gives that the probability - // that a given point is from the i'th cluster - // P(i | x) = L(x | i) * P(i) / Z - // - // where Z is the normalization constant: - // Z = Sum_i{ P(i | x) } - - result.reserve(m_Clusters.size()); - double renormalizer = boost::numeric::bounds::lowest(); - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) - { - double likelihood = m_Clusters[i].logLikelihoodFromCluster(m_WeightCalc, point); - result.push_back(std::make_pair(m_Clusters[i].index(), likelihood)); - renormalizer = std::max(renormalizer, likelihood); - } - double normalizer = 0.0; - for (std::size_t i = 0u; i < result.size(); ++i) - { - result[i].second = std::exp(result[i].second - renormalizer); - normalizer += result[i].second; - } - double pmax = 0.0; - for (std::size_t i = 0u; i < result.size(); ++i) - { - result[i].second /= normalizer; - pmax = std::max(pmax, result[i].second); + TMaxAccumulator closest; + for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { + closest.add(std::make_pair(m_Clusters[i].logLikelihoodFromCluster(m_WeightCalc, x), i)); } - result.erase(std::remove_if(result.begin(), result.end(), - CProbabilityLessThan(HARD_ASSIGNMENT_THRESHOLD * pmax)), - result.end()); - normalizer = 0.0; - for (std::size_t i = 0u; i < result.size(); ++i) - { - normalizer += result[i].second; - } - for (std::size_t i = 0u; i < result.size(); ++i) - { - result[i].second *= count / normalizer; - } - } - - //! Update the clustering with \p point and return its cluster(s) - //! together with their weighting factor. - virtual void add(const TPointPrecise &x, TSizeDoublePr2Vec &clusters, double count = 1.0) - { - m_HistoryLength += 1.0; - - if (m_Clusters.size() == 1) - { - LOG_TRACE("Adding " << x << " to " << m_Clusters[0].centre()); - m_Clusters[0].add(x, count); - clusters.push_back(std::make_pair(m_Clusters[0].index(), count)); - if (this->maybeSplit(m_Clusters.begin())) - { + closest.sort(); + LOG_TRACE("closest = " << closest.print()); + + double likelihood0 = closest[0].first; + double likelihood1 = closest[1].first; + + // Normalize the likelihood values. + double p0 = 1.0; + double p1 = std::exp(likelihood1 - likelihood0); + double normalizer = p0 + p1; + p0 /= normalizer; + p1 /= normalizer; + LOG_TRACE("probabilities = [" << p0 << "," << p1 << "]"); + + TClusterVecItr cluster0 = m_Clusters.begin() + closest[0].second; + TClusterVecItr cluster1 = m_Clusters.begin() + closest[1].second; + + if (p1 < HARD_ASSIGNMENT_THRESHOLD * p0) { + LOG_TRACE("Adding " << x << " to " << cluster0->centre()); + cluster0->add(x, count); + clusters.push_back(std::make_pair(cluster0->index(), count)); + if (this->maybeSplit(cluster0) || this->maybeMerge(cluster0)) { this->cluster(x, clusters, count); } - } - else - { - using TSizeDoublePr = std::pair; - using TMaxAccumulator = CBasicStatistics::COrderStatisticsStack >; - - TMaxAccumulator closest; - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) - { - closest.add(std::make_pair(m_Clusters[i].logLikelihoodFromCluster(m_WeightCalc, x), i)); - } - closest.sort(); - LOG_TRACE("closest = " << closest.print()); - - double likelihood0 = closest[0].first; - double likelihood1 = closest[1].first; - - // Normalize the likelihood values. - double p0 = 1.0; - double p1 = std::exp(likelihood1 - likelihood0); - double normalizer = p0 + p1; - p0 /= normalizer; - p1 /= normalizer; - LOG_TRACE("probabilities = [" << p0 << "," << p1 << "]"); - - TClusterVecItr cluster0 = m_Clusters.begin() + closest[0].second; - TClusterVecItr cluster1 = m_Clusters.begin() + closest[1].second; - - if (p1 < HARD_ASSIGNMENT_THRESHOLD * p0) - { - LOG_TRACE("Adding " << x << " to " << cluster0->centre()); - cluster0->add(x, count); - clusters.push_back(std::make_pair(cluster0->index(), count)); - if (this->maybeSplit(cluster0) || this->maybeMerge(cluster0)) - { - this->cluster(x, clusters, count); - } - } - else - { - // Get the weighted counts. - double count0 = count * p0; - double count1 = count * p1; - LOG_TRACE("Soft adding " << x - << " " << count0 << " to " << cluster0->centre() - << " and " << count1 << " to " << cluster1->centre()); - - cluster0->add(x, count0); - cluster1->add(x, count1); - clusters.push_back(std::make_pair(cluster0->index(), count0)); - clusters.push_back(std::make_pair(cluster1->index(), count1)); - if ( this->maybeSplit(cluster0) - || this->maybeSplit(cluster1) - || this->maybeMerge(cluster0) - || this->maybeMerge(cluster1)) - { - this->cluster(x, clusters, count); - } + } else { + // Get the weighted counts. + double count0 = count * p0; + double count1 = count * p1; + LOG_TRACE("Soft adding " << x << " " << count0 << " to " << cluster0->centre() << " and " << count1 << " to " + << cluster1->centre()); + + cluster0->add(x, count0); + cluster1->add(x, count1); + clusters.push_back(std::make_pair(cluster0->index(), count0)); + clusters.push_back(std::make_pair(cluster1->index(), count1)); + if (this->maybeSplit(cluster0) || this->maybeSplit(cluster1) || this->maybeMerge(cluster0) || this->maybeMerge(cluster1)) { + this->cluster(x, clusters, count); } } - - if (this->prune()) - { - this->cluster(x, clusters, count); - } } - //! Update the clustering with \p points. - virtual void add(const TPointPreciseDoublePrVec &x) - { - if (m_Clusters.empty()) - { - m_Clusters.push_back(CCluster(*this)); - } - TSizeDoublePr2Vec dummy; - for (std::size_t i = 0u; i < x.size(); ++i) - { - this->add(x[i].first, dummy, x[i].second); - } + if (this->prune()) { + this->cluster(x, clusters, count); } + } - //! Propagate the clustering forwards by \p time. - //! - //! The cluster priors relax back to non-informative and the - //! cluster probabilities become less at a rate controlled by - //! the decay rate parameter (optionally supplied to the constructor). - //! - //! \param time The time increment to apply. - //! \note \p time must be non negative. - virtual void propagateForwardsByTime(double time) - { - if (time < 0.0) - { - LOG_ERROR("Can't propagate backwards in time"); - return; - } - m_HistoryLength *= std::exp(-m_DecayRate * time); - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) - { - m_Clusters[i].propagateForwardsByTime(time); - } + //! Update the clustering with \p points. + virtual void add(const TPointPreciseDoublePrVec& x) { + if (m_Clusters.empty()) { + m_Clusters.push_back(CCluster(*this)); } - - //! Sample the cluster with index \p index. - //! - //! \param[in] index The index of the cluster to sample. - //! \param[in] numberSamples The desired number of samples. - //! \param[out] samples Filled in with the samples. - //! \return True if the cluster could be sampled and false otherwise. - virtual bool sample(std::size_t index, - std::size_t numberSamples, - TPointPreciseVec &samples) const - { - const CCluster *cluster = this->cluster(index); - if (!cluster) - { - LOG_ERROR("Cluster " << index << " doesn't exist"); - return false; - } - cluster->sample(numberSamples, samples); - return true; + TSizeDoublePr2Vec dummy; + for (std::size_t i = 0u; i < x.size(); ++i) { + this->add(x[i].first, dummy, x[i].second); } - - //! Get the probability of the cluster with index \p index. - //! - //! \param[in] index The index of the cluster of interest. - //! \return The probability of the cluster identified by \p index. - virtual double probability(std::size_t index) const - { - double weight = 0.0; - double Z = 0.0; - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) - { - const CCluster &cluster = m_Clusters[i]; - if (cluster.index() == index) - { - weight = cluster.weight(maths_t::E_ClustersFractionWeight); - } - Z += cluster.weight(maths_t::E_ClustersFractionWeight); - } - return Z == 0.0 ? 0.0 : weight / Z; + } + + //! Propagate the clustering forwards by \p time. + //! + //! The cluster priors relax back to non-informative and the + //! cluster probabilities become less at a rate controlled by + //! the decay rate parameter (optionally supplied to the constructor). + //! + //! \param time The time increment to apply. + //! \note \p time must be non negative. + virtual void propagateForwardsByTime(double time) { + if (time < 0.0) { + LOG_ERROR("Can't propagate backwards in time"); + return; } - - //! Debug the memory used by the object. - virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const - { - mem->setName("CXMeansOnline"); - core::CMemoryDebug::dynamicSize("m_ClusterIndexGenerator", m_ClusterIndexGenerator, mem); - core::CMemoryDebug::dynamicSize("m_Clusters", m_Clusters, mem); - } - - //! Get the memory used by the object. - virtual std::size_t memoryUsage() const - { - std::size_t mem = core::CMemory::dynamicSize(m_ClusterIndexGenerator); - mem += core::CMemory::dynamicSize(m_Clusters); - return mem; + m_HistoryLength *= std::exp(-m_DecayRate * time); + for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { + m_Clusters[i].propagateForwardsByTime(time); } - - //! Get the static size of this object - used for virtual hierarchies - virtual std::size_t staticSize() const - { - return sizeof(*this); - } - - //! Get a checksum for this object. - virtual uint64_t checksum(uint64_t seed = 0) const - { - seed = CChecksum::calculate(seed, m_DataType); - seed = CChecksum::calculate(seed, m_DecayRate); - seed = CChecksum::calculate(seed, m_HistoryLength); - seed = CChecksum::calculate(seed, m_WeightCalc); - return CChecksum::calculate(seed, m_Clusters); + } + + //! Sample the cluster with index \p index. + //! + //! \param[in] index The index of the cluster to sample. + //! \param[in] numberSamples The desired number of samples. + //! \param[out] samples Filled in with the samples. + //! \return True if the cluster could be sampled and false otherwise. + virtual bool sample(std::size_t index, std::size_t numberSamples, TPointPreciseVec& samples) const { + const CCluster* cluster = this->cluster(index); + if (!cluster) { + LOG_ERROR("Cluster " << index << " doesn't exist"); + return false; } - //@} - - //! The total count of points. - double count() const - { - double result = 0.0; - for (std::size_t i = 0; i < m_Clusters.size(); ++i) - { - result += m_Clusters[i].count(); + cluster->sample(numberSamples, samples); + return true; + } + + //! Get the probability of the cluster with index \p index. + //! + //! \param[in] index The index of the cluster of interest. + //! \return The probability of the cluster identified by \p index. + virtual double probability(std::size_t index) const { + double weight = 0.0; + double Z = 0.0; + for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { + const CCluster& cluster = m_Clusters[i]; + if (cluster.index() == index) { + weight = cluster.weight(maths_t::E_ClustersFractionWeight); } - return result; + Z += cluster.weight(maths_t::E_ClustersFractionWeight); } + return Z == 0.0 ? 0.0 : weight / Z; + } + + //! Debug the memory used by the object. + virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { + mem->setName("CXMeansOnline"); + core::CMemoryDebug::dynamicSize("m_ClusterIndexGenerator", m_ClusterIndexGenerator, mem); + core::CMemoryDebug::dynamicSize("m_Clusters", m_Clusters, mem); + } + + //! Get the memory used by the object. + virtual std::size_t memoryUsage() const { + std::size_t mem = core::CMemory::dynamicSize(m_ClusterIndexGenerator); + mem += core::CMemory::dynamicSize(m_Clusters); + return mem; + } + + //! Get the static size of this object - used for virtual hierarchies + virtual std::size_t staticSize() const { return sizeof(*this); } + + //! Get a checksum for this object. + virtual uint64_t checksum(uint64_t seed = 0) const { + seed = CChecksum::calculate(seed, m_DataType); + seed = CChecksum::calculate(seed, m_DecayRate); + seed = CChecksum::calculate(seed, m_HistoryLength); + seed = CChecksum::calculate(seed, m_WeightCalc); + return CChecksum::calculate(seed, m_Clusters); + } + //@} + + //! The total count of points. + double count() const { + double result = 0.0; + for (std::size_t i = 0; i < m_Clusters.size(); ++i) { + result += m_Clusters[i].count(); + } + return result; + } - //! Print a representation of the clusters that can be plotted in octave. - std::string printClusters() const - { - if (m_Clusters.empty()) - { - return std::string(); - } - if (m_Clusters[0].dimension() > 2) - { - return "Not supported"; - } - - // TODO + //! Print a representation of the clusters that can be plotted in octave. + std::string printClusters() const { + if (m_Clusters.empty()) { return std::string(); } - - //! Get the index generator. - CClustererTypes::CIndexGenerator &indexGenerator() - { - return m_ClusterIndexGenerator; + if (m_Clusters[0].dimension() > 2) { + return "Not supported"; } - protected: - //! Restore by traversing a state document - bool acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser) - { - do - { - const std::string &name = traverser.name(); - RESTORE_SETUP_TEARDOWN(CLUSTER_TAG, - CCluster cluster(*this), - traverser.traverseSubLevel(boost::bind(&CCluster::acceptRestoreTraverser, - &cluster, boost::cref(params), _1)), - m_Clusters.push_back(cluster)) - RESTORE_SETUP_TEARDOWN(DECAY_RATE_TAG, - double decayRate, - core::CStringUtils::stringToType(traverser.value(), decayRate), - this->decayRate(decayRate)) - RESTORE_BUILT_IN(HISTORY_LENGTH_TAG, m_HistoryLength) - RESTORE(RNG_TAG, m_Rng.fromString(traverser.value())); - RESTORE(CLUSTER_INDEX_GENERATOR_TAG, - traverser.traverseSubLevel(boost::bind(&CClustererTypes::CIndexGenerator::acceptRestoreTraverser, - &m_ClusterIndexGenerator, _1))) - RESTORE_SETUP_TEARDOWN(WEIGHT_CALC_TAG, - int weightCalc, - core::CStringUtils::stringToType(traverser.value(), weightCalc), - m_WeightCalc = static_cast(weightCalc)) - RESTORE_BUILT_IN(MINIMUM_CLUSTER_FRACTION_TAG, m_MinimumClusterFraction) - RESTORE_BUILT_IN(MINIMUM_CLUSTER_COUNT_TAG, m_MinimumClusterCount) + // TODO + return std::string(); + } + + //! Get the index generator. + CClustererTypes::CIndexGenerator& indexGenerator() { return m_ClusterIndexGenerator; } + +protected: + //! Restore by traversing a state document + bool acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); + RESTORE_SETUP_TEARDOWN( + CLUSTER_TAG, + CCluster cluster(*this), + traverser.traverseSubLevel(boost::bind(&CCluster::acceptRestoreTraverser, &cluster, boost::cref(params), _1)), + m_Clusters.push_back(cluster)) + RESTORE_SETUP_TEARDOWN(DECAY_RATE_TAG, + double decayRate, + core::CStringUtils::stringToType(traverser.value(), decayRate), + this->decayRate(decayRate)) + RESTORE_BUILT_IN(HISTORY_LENGTH_TAG, m_HistoryLength) + RESTORE(RNG_TAG, m_Rng.fromString(traverser.value())); + RESTORE(CLUSTER_INDEX_GENERATOR_TAG, + traverser.traverseSubLevel( + boost::bind(&CClustererTypes::CIndexGenerator::acceptRestoreTraverser, &m_ClusterIndexGenerator, _1))) + RESTORE_SETUP_TEARDOWN(WEIGHT_CALC_TAG, + int weightCalc, + core::CStringUtils::stringToType(traverser.value(), weightCalc), + m_WeightCalc = static_cast(weightCalc)) + RESTORE_BUILT_IN(MINIMUM_CLUSTER_FRACTION_TAG, m_MinimumClusterFraction) + RESTORE_BUILT_IN(MINIMUM_CLUSTER_COUNT_TAG, m_MinimumClusterCount) + } while (traverser.next()); + + return true; + } + + //! Get the cluster with the index \p index. + const CCluster* cluster(std::size_t index) const { + for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { + if (m_Clusters[i].index() == index) { + return &m_Clusters[i]; } - while (traverser.next()); - - return true; } - - //! Get the cluster with the index \p index. - const CCluster *cluster(std::size_t index) const - { - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) - { - if (m_Clusters[i].index() == index) - { - return &m_Clusters[i]; - } + return 0; + } + + //! Compute the minimum split count. + double minimumSplitCount() const { + double result = m_MinimumClusterCount; + if (m_MinimumClusterFraction > 0.0) { + double count = 0.0; + for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { + count += m_Clusters[i].count(); } - return 0; + double scale = std::max(m_HistoryLength * (1.0 - std::exp(-m_InitialDecayRate)), 1.0); + count *= m_MinimumClusterFraction / scale; + result = std::max(result, count); } + LOG_TRACE("minimumSplitCount = " << result); + return result; + } - //! Compute the minimum split count. - double minimumSplitCount() const - { - double result = m_MinimumClusterCount; - if (m_MinimumClusterFraction > 0.0) - { - double count = 0.0; - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) - { - count += m_Clusters[i].count(); - } - double scale = std::max(m_HistoryLength * (1.0 - std::exp(-m_InitialDecayRate)), 1.0); - count *= m_MinimumClusterFraction / scale; - result = std::max(result, count); - } - LOG_TRACE("minimumSplitCount = " << result); - return result; + //! Split \p cluster if we find a good split. + bool maybeSplit(TClusterVecItr cluster) { + if (cluster == m_Clusters.end()) { + return false; } - //! Split \p cluster if we find a good split. - bool maybeSplit(TClusterVecItr cluster) - { - if (cluster == m_Clusters.end()) - { - return false; - } + if (TOptionalClusterClusterPr split = cluster->split(m_Rng, this->minimumSplitCount(), m_ClusterIndexGenerator)) { + LOG_TRACE("Splitting cluster " << cluster->index() << " at " << cluster->centre()); + std::size_t index = cluster->index(); + *cluster = split->first; + m_Clusters.push_back(split->second); + (this->splitFunc())(index, split->first.index(), split->second.index()); + return true; + } - if (TOptionalClusterClusterPr split = - cluster->split(m_Rng, this->minimumSplitCount(), m_ClusterIndexGenerator)) - { - LOG_TRACE("Splitting cluster " << cluster->index() - << " at " << cluster->centre()); - std::size_t index = cluster->index(); - *cluster = split->first; - m_Clusters.push_back(split->second); - (this->splitFunc())(index, split->first.index(), split->second.index()); - return true; - } + return false; + } + //! Merge \p cluster and \p adjacentCluster if they are close enough. + bool maybeMerge(TClusterVecItr cluster) { + if (cluster == m_Clusters.end()) { return false; } - //! Merge \p cluster and \p adjacentCluster if they are close enough. - bool maybeMerge(TClusterVecItr cluster) - { - if (cluster == m_Clusters.end()) - { - return false; - } + CCluster* nearest = this->nearest(*cluster); + + if (nearest && nearest->shouldMerge(*cluster)) { + LOG_TRACE("Merging cluster " << nearest->index() << " at " << nearest->centre() << " and cluster " << cluster->index() << " at " + << cluster->centre()); + std::size_t index1 = nearest->index(); + std::size_t index2 = cluster->index(); + CCluster merged = nearest->merge(*cluster, m_ClusterIndexGenerator); + *nearest = merged; + m_Clusters.erase(cluster); + (this->mergeFunc())(index1, index2, merged.index()); + return true; + } - CCluster *nearest = this->nearest(*cluster); - - if (nearest && nearest->shouldMerge(*cluster)) - { - LOG_TRACE("Merging cluster " << nearest->index() - << " at " << nearest->centre() - << " and cluster " << cluster->index() - << " at " << cluster->centre()); - std::size_t index1 = nearest->index(); - std::size_t index2 = cluster->index(); - CCluster merged = nearest->merge(*cluster, m_ClusterIndexGenerator); - *nearest = merged; - m_Clusters.erase(cluster); - (this->mergeFunc())(index1, index2, merged.index()); - return true; - } + return false; + } + //! Remove any clusters which are effectively dead. + bool prune() { + if (m_Clusters.size() <= 1) { return false; } - //! Remove any clusters which are effectively dead. - bool prune() - { - if (m_Clusters.size() <= 1) - { - return false; - } + using TDoubleSizePr = std::pair; + using TMinAccumulator = CBasicStatistics::COrderStatisticsStack; - using TDoubleSizePr = std::pair; - using TMinAccumulator = CBasicStatistics::COrderStatisticsStack; + bool result = false; - bool result = false; + double minimumCount = this->minimumSplitCount() * CLUSTER_DELETE_FRACTION; - double minimumCount = this->minimumSplitCount() * CLUSTER_DELETE_FRACTION; - - // Get the clusters to prune. - for (;;) - { - TMinAccumulator prune; - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) - { - if (m_Clusters[i].count() < minimumCount) - { - prune.add(std::make_pair(m_Clusters[i].count(), i)); - } + // Get the clusters to prune. + for (;;) { + TMinAccumulator prune; + for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { + if (m_Clusters[i].count() < minimumCount) { + prune.add(std::make_pair(m_Clusters[i].count(), i)); } - if (prune.count() == 0) - { - break; - } - LOG_TRACE("prune = " << core::CContainerPrinter::print(prune)); - - result = true; - - // Merge the clusters to prune in increasing count order. - CCluster &cluster = m_Clusters[prune[0].second]; - CCluster *nearest = this->nearest(cluster); - if (nearest) - { - LOG_TRACE("Merging cluster " << cluster.index() - << " at " << cluster.centre() - << " and cluster " << nearest->index() - << " at " << nearest->centre()); - CCluster merge = nearest->merge(cluster, m_ClusterIndexGenerator); - (this->mergeFunc())(cluster.index(), nearest->index(), merge.index()); - nearest->swap(merge); - } - - // Actually remove the pruned clusters. - m_Clusters.erase(m_Clusters.begin() + prune[0].second); + } + if (prune.count() == 0) { + break; + } + LOG_TRACE("prune = " << core::CContainerPrinter::print(prune)); + + result = true; + + // Merge the clusters to prune in increasing count order. + CCluster& cluster = m_Clusters[prune[0].second]; + CCluster* nearest = this->nearest(cluster); + if (nearest) { + LOG_TRACE("Merging cluster " << cluster.index() << " at " << cluster.centre() << " and cluster " << nearest->index() + << " at " << nearest->centre()); + CCluster merge = nearest->merge(cluster, m_ClusterIndexGenerator); + (this->mergeFunc())(cluster.index(), nearest->index(), merge.index()); + nearest->swap(merge); } - return result; + // Actually remove the pruned clusters. + m_Clusters.erase(m_Clusters.begin() + prune[0].second); } - //! Get the cluster closest to \p cluster. - CCluster *nearest(const CCluster &cluster) - { - if (m_Clusters.size() == 1) - { - return &m_Clusters[0]; - } + return result; + } - using TMinAccumulator = CBasicStatistics::COrderStatisticsStack; + //! Get the cluster closest to \p cluster. + CCluster* nearest(const CCluster& cluster) { + if (m_Clusters.size() == 1) { + return &m_Clusters[0]; + } - CCluster *result = 0; - TMinAccumulator min; - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) - { - if (cluster.index() == m_Clusters[i].index()) - { - continue; - } + using TMinAccumulator = CBasicStatistics::COrderStatisticsStack; - if (min.add(CCluster::BICGain(cluster, m_Clusters[i]))) - { - result = &m_Clusters[i]; - } + CCluster* result = 0; + TMinAccumulator min; + for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { + if (cluster.index() == m_Clusters[i].index()) { + continue; } - if (!result) - { - LOG_ERROR("Couldn't find nearest cluster"); + + if (min.add(CCluster::BICGain(cluster, m_Clusters[i]))) { + result = &m_Clusters[i]; } - return result; } - - //! Get the clusters. - const TClusterVec &clusters() const - { - return m_Clusters; + if (!result) { + LOG_ERROR("Couldn't find nearest cluster"); } + return result; + } - private: - //! \brief Checks if probabilities are less than a specified threshold. - class CProbabilityLessThan - { - public: - CProbabilityLessThan(double threshold) : m_Threshold(threshold) {} - - bool operator()(const TSizeDoublePr &p) const - { - return p.second < m_Threshold; - } + //! Get the clusters. + const TClusterVec& clusters() const { return m_Clusters; } - private: - double m_Threshold; - }; +private: + //! \brief Checks if probabilities are less than a specified threshold. + class CProbabilityLessThan { + public: + CProbabilityLessThan(double threshold) : m_Threshold(threshold) {} - private: - //! \name Tags for Persisting CXMeansOnline - //@{ - static const std::string WEIGHT_CALC_TAG; - static const std::string MINIMUM_CLUSTER_FRACTION_TAG; - static const std::string MINIMUM_CLUSTER_COUNT_TAG; - static const std::string WINSORISATION_CONFIDENCE_INTERVAL_TAG; - static const std::string CLUSTER_INDEX_GENERATOR_TAG; - static const std::string CLUSTER_TAG; - static const std::string RNG_TAG; - static const std::string DECAY_RATE_TAG; - static const std::string HISTORY_LENGTH_TAG; - //@} - - //! \name Tags for Persisting CXMeansOnline::CCluster - //@{ - static const std::string INDEX_TAG; - static const std::string COVARIANCES_TAG; - static const std::string STRUCTURE_TAG; - //@} - - //! The minimum Kullback-Leibler divergence at which we'll - //! split a cluster. - static const double MINIMUM_SPLIT_DISTANCE; - - //! The maximum Kullback-Leibler divergence for which we'll - //! merge two cluster. This is intended to introduce hysteresis - //! in the cluster creation and deletion process and so should - //! be less than the minimum split distance. - static const double MAXIMUM_MERGE_DISTANCE; - - //! The default fraction of the minimum cluster split count - //! for which we'll delete a cluster. This is intended to - //! introduce hysteresis in the cluster creation and deletion - //! process and so should be in the range (0, 1). - static const double CLUSTER_DELETE_FRACTION; - - //! The size of the data we use to maintain cluster detail. - static const std::size_t STRUCTURE_SIZE; - - //! 1 - "smallest hard assignment weight". - static const double HARD_ASSIGNMENT_THRESHOLD; + bool operator()(const TSizeDoublePr& p) const { return p.second < m_Threshold; } private: - //! The random number generator. - CPRNG::CXorOShiro128Plus m_Rng; - - //! The type of data being clustered. - maths_t::EDataType m_DataType; - - //! The initial rate at which information is lost. - double m_InitialDecayRate; - - //! The rate at which information is lost. - double m_DecayRate; - - //! A measure of the length of history of the data clustered. - double m_HistoryLength; - - //! The style of the cluster weight calculation (see maths_t::EClusterWeightCalc). - maths_t::EClusterWeightCalc m_WeightCalc; - - //! The minimum cluster fractional count. - double m_MinimumClusterFraction; - - //! The minimum cluster count. - double m_MinimumClusterCount; - - //! The minimum count for a category in the sketch to cluster. - double m_MinimumCategoryCount; - - //! A generator of unique cluster indices. - CClustererTypes::CIndexGenerator m_ClusterIndexGenerator; - - //! The clusters. - TClusterVec m_Clusters; + double m_Threshold; + }; + +private: + //! \name Tags for Persisting CXMeansOnline + //@{ + static const std::string WEIGHT_CALC_TAG; + static const std::string MINIMUM_CLUSTER_FRACTION_TAG; + static const std::string MINIMUM_CLUSTER_COUNT_TAG; + static const std::string WINSORISATION_CONFIDENCE_INTERVAL_TAG; + static const std::string CLUSTER_INDEX_GENERATOR_TAG; + static const std::string CLUSTER_TAG; + static const std::string RNG_TAG; + static const std::string DECAY_RATE_TAG; + static const std::string HISTORY_LENGTH_TAG; + //@} + + //! \name Tags for Persisting CXMeansOnline::CCluster + //@{ + static const std::string INDEX_TAG; + static const std::string COVARIANCES_TAG; + static const std::string STRUCTURE_TAG; + //@} + + //! The minimum Kullback-Leibler divergence at which we'll + //! split a cluster. + static const double MINIMUM_SPLIT_DISTANCE; + + //! The maximum Kullback-Leibler divergence for which we'll + //! merge two cluster. This is intended to introduce hysteresis + //! in the cluster creation and deletion process and so should + //! be less than the minimum split distance. + static const double MAXIMUM_MERGE_DISTANCE; + + //! The default fraction of the minimum cluster split count + //! for which we'll delete a cluster. This is intended to + //! introduce hysteresis in the cluster creation and deletion + //! process and so should be in the range (0, 1). + static const double CLUSTER_DELETE_FRACTION; + + //! The size of the data we use to maintain cluster detail. + static const std::size_t STRUCTURE_SIZE; + + //! 1 - "smallest hard assignment weight". + static const double HARD_ASSIGNMENT_THRESHOLD; + +private: + //! The random number generator. + CPRNG::CXorOShiro128Plus m_Rng; + + //! The type of data being clustered. + maths_t::EDataType m_DataType; + + //! The initial rate at which information is lost. + double m_InitialDecayRate; + + //! The rate at which information is lost. + double m_DecayRate; + + //! A measure of the length of history of the data clustered. + double m_HistoryLength; + + //! The style of the cluster weight calculation (see maths_t::EClusterWeightCalc). + maths_t::EClusterWeightCalc m_WeightCalc; + + //! The minimum cluster fractional count. + double m_MinimumClusterFraction; + + //! The minimum cluster count. + double m_MinimumClusterCount; + + //! The minimum count for a category in the sketch to cluster. + double m_MinimumCategoryCount; + + //! A generator of unique cluster indices. + CClustererTypes::CIndexGenerator m_ClusterIndexGenerator; + + //! The clusters. + TClusterVec m_Clusters; }; template @@ -1479,7 +1239,6 @@ template const std::size_t CXMeansOnline::STRUCTURE_SIZE(24); template const double CXMeansOnline::HARD_ASSIGNMENT_THRESHOLD(0.01); - } } diff --git a/include/maths/CXMeansOnline1d.h b/include/maths/CXMeansOnline1d.h index 9012f1f16c..be8a6e86d1 100644 --- a/include/maths/CXMeansOnline1d.h +++ b/include/maths/CXMeansOnline1d.h @@ -10,9 +10,9 @@ #include #include -#include #include #include +#include #include #include @@ -24,48 +24,44 @@ class CXMeansOnline1dTest; -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace maths -{ +namespace maths { //! \brief Encodes the distributions available to model the modes. -class MATHS_EXPORT CAvailableModeDistributions -{ - public: - static const int NORMAL = 1; - static const int GAMMA = 2; - static const int LOG_NORMAL = 4; - static const int ALL = NORMAL + GAMMA + LOG_NORMAL; - - CAvailableModeDistributions(int value); - - //! Add the available distributions from \p rhs. - const CAvailableModeDistributions &operator+(const CAvailableModeDistributions &rhs); - - //! Get the number of parameters used to model a mode. - double parameters() const; - - //! Check if the normal distribution is available. - bool haveNormal() const; - //! Check if the gamma distribution is available. - bool haveGamma() const; - //! Check if the log-normal distribution is available. - bool haveLogNormal() const; - - //! Conversion to a string. - std::string toString() const; - //! Set from a string. - bool fromString(const std::string &value); - - private: - //! The encoding. - int m_Value; +class MATHS_EXPORT CAvailableModeDistributions { +public: + static const int NORMAL = 1; + static const int GAMMA = 2; + static const int LOG_NORMAL = 4; + static const int ALL = NORMAL + GAMMA + LOG_NORMAL; + + CAvailableModeDistributions(int value); + + //! Add the available distributions from \p rhs. + const CAvailableModeDistributions& operator+(const CAvailableModeDistributions& rhs); + + //! Get the number of parameters used to model a mode. + double parameters() const; + + //! Check if the normal distribution is available. + bool haveNormal() const; + //! Check if the gamma distribution is available. + bool haveGamma() const; + //! Check if the log-normal distribution is available. + bool haveLogNormal() const; + + //! Conversion to a string. + std::string toString() const; + //! Set from a string. + bool fromString(const std::string& value); + +private: + //! The encoding. + int m_Value; }; //! \brief A single pass online clusterer based on the x-means @@ -108,381 +104,360 @@ class MATHS_EXPORT CAvailableModeDistributions //! is expected to give largely order (of points processed) invariant //! unsupervised clustering of the data which identifies reasonably //! well separated clusters. -class MATHS_EXPORT CXMeansOnline1d : public CClusterer1d -{ +class MATHS_EXPORT CXMeansOnline1d : public CClusterer1d { +public: + class CCluster; + using TDoubleVec = CClusterer1d::TPointPreciseVec; + using TDoubleDoublePrVec = CClusterer1d::TPointPreciseDoublePrVec; + using TClusterClusterPr = std::pair; + using TOptionalClusterClusterPr = boost::optional; + using TDoubleDoublePr = std::pair; + using CClusterer1d::add; + + //! \brief Represents a cluster. + class MATHS_EXPORT CCluster { public: - class CCluster; - using TDoubleVec = CClusterer1d::TPointPreciseVec; - using TDoubleDoublePrVec = CClusterer1d::TPointPreciseDoublePrVec; - using TClusterClusterPr = std::pair; - using TOptionalClusterClusterPr = boost::optional; - using TDoubleDoublePr = std::pair; - using CClusterer1d::add; - - //! \brief Represents a cluster. - class MATHS_EXPORT CCluster - { - public: - explicit CCluster(const CXMeansOnline1d &clusterer); - - //! Construct by traversing a state document - bool acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser); - - //! Persist state by passing information to the supplied inserter - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - - //! Set the type of data in the cluster. - void dataType(maths_t::EDataType dataType); - - //! Add \p point to this cluster. - void add(double point, double count); - - //! Set the rate at which information is aged out. - void decayRate(double decayRate); - - //! Propagate the cluster forwards by \p time. - void propagateForwardsByTime(double time); - - //! Get the unique index of this cluster. - std::size_t index() const; - - //! Get the "centroid" of the cluster. This is the mean of the prior. - double centre() const; - - //! Get the "spread" of the cluster. This is variance of the prior. - double spread() const; - - //! Get the count \p p percentile position within the cluster. - double percentile(double p) const; - - //! Get the total count of values added to the cluster. - double count() const; - - //! Get the weight of the cluster. - double weight(maths_t::EClusterWeightCalc calc) const; - - //! Get the likelihood that \p point is from this cluster. - double logLikelihoodFromCluster(maths_t::EClusterWeightCalc calc, - double point) const; - - //! Get \p numberSamples from this cluster. - void sample(std::size_t numberSamples, - double smallest, - double largest, - TDoubleVec &samples) const; - - //! Try and find a split by a full search of the binary tree - //! of possible optimal 2-splits of the data. - //! - //! \param[in] distributions The distributions available to - //! model the clusters. - //! \param[in] minimumCount The minimum count of a cluster - //! in the split. - //! \param[in] smallest The smallest sample added to date. - //! \param[in] interval The Winsorisation interval. - //! \param[in] indexGenerator The unique cluster identifier - //! generator. - TOptionalClusterClusterPr split(CAvailableModeDistributions distributions, - double minimumCount, - double smallest, - const TDoubleDoublePr &interval, - CIndexGenerator &indexGenerator); - - //! Check if this and \p other cluster should merge. - //! - //! \param[in] other The cluster to merge with this one. - //! \param[in] distributions The distributions available to - //! model the clusters. - //! \param[in] smallest The smallest sample added to date. - //! \param[in] interval The Winsorisation interval. - bool shouldMerge(CCluster &other, - CAvailableModeDistributions distributions, - double smallest, - const TDoubleDoublePr &interval); - - //! Merge this and \p other cluster. - CCluster merge(CCluster &other, CIndexGenerator &indexGenerator); - - //! Get the prior describing this object. - const CNormalMeanPrecConjugate &prior() const; - - //! Get a checksum for this object. - uint64_t checksum(uint64_t seed) const; - - //! Debug the memory used by this object. - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - - //! Get the memory used by this cluster. - std::size_t memoryUsage() const; - - private: - CCluster(std::size_t index, - const CNormalMeanPrecConjugate &prior, - const CNaturalBreaksClassifier &structure); - - private: - //! A unique identifier for this cluster. - std::size_t m_Index; - - //! The data representing this cluster. - CNormalMeanPrecConjugate m_Prior; - - //! The data representing the internal structure of this cluster. - CNaturalBreaksClassifier m_Structure; - }; - - using TClusterVec = std::vector; - using TClusterVecItr = TClusterVec::iterator; - using TClusterVecCItr = TClusterVec::const_iterator; + explicit CCluster(const CXMeansOnline1d& clusterer); - public: - //! The central confidence interval on which to Winsorise. - static const double WINSORISATION_CONFIDENCE_INTERVAL; - - public: - //! Construct a new clusterer. - //! - //! \param[in] dataType The type of data which will be clustered. - //! \param[in] availableDistributions The distributions available to - //! model the modes. - //! \param[in] weightCalc The style of the cluster weight calculation - //! (see maths_t::EClusterWeightCalc for details). - //! \param[in] decayRate Controls the rate at which information is - //! lost from the clusters. - //! \param[in] minimumClusterFraction The minimum fractional count - //! of points in a cluster. - //! \param[in] minimumClusterCount The minimum count of points in a - //! cluster. - //! \param[in] minimumCategoryCount The minimum count of a category - //! in the sketch to cluster. - //! \param[in] winsorisationConfidenceInterval The central confidence - //! interval on which to Winsorise. - //! \param[in] splitFunc Optional callback for when a cluster is split. - //! \param[in] mergeFunc Optional callback for when two clusters are - CXMeansOnline1d(maths_t::EDataType dataType, - CAvailableModeDistributions availableDistributions, - maths_t::EClusterWeightCalc weightCalc, - double decayRate = 0.0, - double minimumClusterFraction = MINIMUM_CLUSTER_SPLIT_FRACTION, - double minimumClusterCount = MINIMUM_CLUSTER_SPLIT_COUNT, - double minimumCategoryCount = MINIMUM_CATEGORY_COUNT, - double winsorisationConfidenceInterval = WINSORISATION_CONFIDENCE_INTERVAL, - const TSplitFunc &splitFunc = CDoNothing(), - const TMergeFunc &mergeFunc = CDoNothing()); - - //! Construct by traversing a state document. - CXMeansOnline1d(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser); - - //! Construct by traversing a state document. - CXMeansOnline1d(const SDistributionRestoreParams ¶ms, - const TSplitFunc &splitFunc, - const TMergeFunc &mergeFunc, - core::CStateRestoreTraverser &traverser); - - //! The x-means clusterer has value semantics. - //@{ - CXMeansOnline1d(const CXMeansOnline1d &other); - CXMeansOnline1d &operator=(const CXMeansOnline1d &other); - //@} - - //! Efficiently swap the contents of two x-means objects. - void swap(CXMeansOnline1d &other); - - //! \name Clusterer Contract - //@{ - //! Get the tag name for this clusterer. - virtual std::string persistenceTag() const; - - //! Persist state by passing information to the supplied inserter. - virtual void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - - //! Creates a copy of the clusterer. - //! - //! \warning Caller owns returned object. - virtual CXMeansOnline1d *clone() const; + //! Construct by traversing a state document + bool acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser); - //! Clear the current clusterer state. - virtual void clear(); + //! Persist state by passing information to the supplied inserter + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; - //! Get the number of clusters. - virtual std::size_t numberClusters() const; + //! Set the type of data in the cluster. + void dataType(maths_t::EDataType dataType); - //! Set the type of data being clustered. - virtual void dataType(maths_t::EDataType dataType); + //! Add \p point to this cluster. + void add(double point, double count); //! Set the rate at which information is aged out. - virtual void decayRate(double decayRate); + void decayRate(double decayRate); - //! Check if the cluster identified by \p index exists. - virtual bool hasCluster(std::size_t index) const; + //! Propagate the cluster forwards by \p time. + void propagateForwardsByTime(double time); - //! Get the centre of the cluster identified by \p index. - virtual bool clusterCentre(std::size_t index, double &result) const; + //! Get the unique index of this cluster. + std::size_t index() const; - //! Get the spread of the cluster identified by \p index. - virtual bool clusterSpread(std::size_t index, double &result) const; + //! Get the "centroid" of the cluster. This is the mean of the prior. + double centre() const; - //! Gets the index of the cluster(s) to which \p point belongs - //! together with their weighting factor. - virtual void cluster(const double &point, - TSizeDoublePr2Vec &result, - double count = 1.0) const; + //! Get the "spread" of the cluster. This is variance of the prior. + double spread() const; - //! Update the clustering with \p point and return its cluster(s) - //! together with their weighting factor. - virtual void add(const double &point, - TSizeDoublePr2Vec &clusters, - double count = 1.0); + //! Get the count \p p percentile position within the cluster. + double percentile(double p) const; - //! Update the clustering with \p points. - virtual void add(const TDoubleDoublePrVec &points); + //! Get the total count of values added to the cluster. + double count() const; - //! Propagate the clustering forwards by \p time. - //! - //! The cluster priors relax back to non-informative and the - //! cluster probabilities become less at a rate controlled by - //! the decay rate parameter (optionally supplied to the constructor). - //! - //! \param[in] time The time increment to apply. - //! \note \p time must be non negative. - virtual void propagateForwardsByTime(double time); + //! Get the weight of the cluster. + double weight(maths_t::EClusterWeightCalc calc) const; + + //! Get the likelihood that \p point is from this cluster. + double logLikelihoodFromCluster(maths_t::EClusterWeightCalc calc, double point) const; - //! Sample the cluster with index \p index. + //! Get \p numberSamples from this cluster. + void sample(std::size_t numberSamples, double smallest, double largest, TDoubleVec& samples) const; + + //! Try and find a split by a full search of the binary tree + //! of possible optimal 2-splits of the data. //! - //! \param[in] index The index of the cluster to sample. - //! \param[in] numberSamples The desired number of samples. - //! \param[out] samples Filled in with the samples. - //! \return True if the cluster could be sampled and false otherwise. - virtual bool sample(std::size_t index, - std::size_t numberSamples, - TDoubleVec &samples) const; - - //! Get the probability of the cluster with index \p index. + //! \param[in] distributions The distributions available to + //! model the clusters. + //! \param[in] minimumCount The minimum count of a cluster + //! in the split. + //! \param[in] smallest The smallest sample added to date. + //! \param[in] interval The Winsorisation interval. + //! \param[in] indexGenerator The unique cluster identifier + //! generator. + TOptionalClusterClusterPr split(CAvailableModeDistributions distributions, + double minimumCount, + double smallest, + const TDoubleDoublePr& interval, + CIndexGenerator& indexGenerator); + + //! Check if this and \p other cluster should merge. //! - //! \param[in] index The index of the cluster of interest. - //! \return The probability of the cluster identified by \p index. - virtual double probability(std::size_t index) const; - - //! Debug the memory used by the object. - virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + //! \param[in] other The cluster to merge with this one. + //! \param[in] distributions The distributions available to + //! model the clusters. + //! \param[in] smallest The smallest sample added to date. + //! \param[in] interval The Winsorisation interval. + bool shouldMerge(CCluster& other, CAvailableModeDistributions distributions, double smallest, const TDoubleDoublePr& interval); - //! Get the memory used by this object. - virtual std::size_t memoryUsage() const; + //! Merge this and \p other cluster. + CCluster merge(CCluster& other, CIndexGenerator& indexGenerator); - //! Get the static size of this object - used for virtual hierarchies - virtual std::size_t staticSize() const; + //! Get the prior describing this object. + const CNormalMeanPrecConjugate& prior() const; //! Get a checksum for this object. - virtual uint64_t checksum(uint64_t seed = 0) const; - //@} - - //! The total count of points. - double count() const; - - //! Get the clusters. - const TClusterVec &clusters() const; + uint64_t checksum(uint64_t seed) const; - //! Print a representation of the clusters that can be plotted in octave. - std::string printClusters() const; + //! Debug the memory used by this object. + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - //! Get the index generator. - CIndexGenerator &indexGenerator(); + //! Get the memory used by this cluster. + std::size_t memoryUsage() const; private: - using TMinAccumulator = CBasicStatistics::COrderStatisticsStack; - using TMaxAccumulator = CBasicStatistics::COrderStatisticsStack >; + CCluster(std::size_t index, const CNormalMeanPrecConjugate& prior, const CNaturalBreaksClassifier& structure); private: - //! The minimum Kullback-Leibler divergence at which we'll - //! split a cluster. - static const double MINIMUM_SPLIT_DISTANCE; + //! A unique identifier for this cluster. + std::size_t m_Index; + + //! The data representing this cluster. + CNormalMeanPrecConjugate m_Prior; + + //! The data representing the internal structure of this cluster. + CNaturalBreaksClassifier m_Structure; + }; + + using TClusterVec = std::vector; + using TClusterVecItr = TClusterVec::iterator; + using TClusterVecCItr = TClusterVec::const_iterator; + +public: + //! The central confidence interval on which to Winsorise. + static const double WINSORISATION_CONFIDENCE_INTERVAL; + +public: + //! Construct a new clusterer. + //! + //! \param[in] dataType The type of data which will be clustered. + //! \param[in] availableDistributions The distributions available to + //! model the modes. + //! \param[in] weightCalc The style of the cluster weight calculation + //! (see maths_t::EClusterWeightCalc for details). + //! \param[in] decayRate Controls the rate at which information is + //! lost from the clusters. + //! \param[in] minimumClusterFraction The minimum fractional count + //! of points in a cluster. + //! \param[in] minimumClusterCount The minimum count of points in a + //! cluster. + //! \param[in] minimumCategoryCount The minimum count of a category + //! in the sketch to cluster. + //! \param[in] winsorisationConfidenceInterval The central confidence + //! interval on which to Winsorise. + //! \param[in] splitFunc Optional callback for when a cluster is split. + //! \param[in] mergeFunc Optional callback for when two clusters are + CXMeansOnline1d(maths_t::EDataType dataType, + CAvailableModeDistributions availableDistributions, + maths_t::EClusterWeightCalc weightCalc, + double decayRate = 0.0, + double minimumClusterFraction = MINIMUM_CLUSTER_SPLIT_FRACTION, + double minimumClusterCount = MINIMUM_CLUSTER_SPLIT_COUNT, + double minimumCategoryCount = MINIMUM_CATEGORY_COUNT, + double winsorisationConfidenceInterval = WINSORISATION_CONFIDENCE_INTERVAL, + const TSplitFunc& splitFunc = CDoNothing(), + const TMergeFunc& mergeFunc = CDoNothing()); + + //! Construct by traversing a state document. + CXMeansOnline1d(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser); + + //! Construct by traversing a state document. + CXMeansOnline1d(const SDistributionRestoreParams& params, + const TSplitFunc& splitFunc, + const TMergeFunc& mergeFunc, + core::CStateRestoreTraverser& traverser); + + //! The x-means clusterer has value semantics. + //@{ + CXMeansOnline1d(const CXMeansOnline1d& other); + CXMeansOnline1d& operator=(const CXMeansOnline1d& other); + //@} + + //! Efficiently swap the contents of two x-means objects. + void swap(CXMeansOnline1d& other); + + //! \name Clusterer Contract + //@{ + //! Get the tag name for this clusterer. + virtual std::string persistenceTag() const; + + //! Persist state by passing information to the supplied inserter. + virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Creates a copy of the clusterer. + //! + //! \warning Caller owns returned object. + virtual CXMeansOnline1d* clone() const; + + //! Clear the current clusterer state. + virtual void clear(); + + //! Get the number of clusters. + virtual std::size_t numberClusters() const; + + //! Set the type of data being clustered. + virtual void dataType(maths_t::EDataType dataType); + + //! Set the rate at which information is aged out. + virtual void decayRate(double decayRate); + + //! Check if the cluster identified by \p index exists. + virtual bool hasCluster(std::size_t index) const; + + //! Get the centre of the cluster identified by \p index. + virtual bool clusterCentre(std::size_t index, double& result) const; + + //! Get the spread of the cluster identified by \p index. + virtual bool clusterSpread(std::size_t index, double& result) const; + + //! Gets the index of the cluster(s) to which \p point belongs + //! together with their weighting factor. + virtual void cluster(const double& point, TSizeDoublePr2Vec& result, double count = 1.0) const; + + //! Update the clustering with \p point and return its cluster(s) + //! together with their weighting factor. + virtual void add(const double& point, TSizeDoublePr2Vec& clusters, double count = 1.0); + + //! Update the clustering with \p points. + virtual void add(const TDoubleDoublePrVec& points); + + //! Propagate the clustering forwards by \p time. + //! + //! The cluster priors relax back to non-informative and the + //! cluster probabilities become less at a rate controlled by + //! the decay rate parameter (optionally supplied to the constructor). + //! + //! \param[in] time The time increment to apply. + //! \note \p time must be non negative. + virtual void propagateForwardsByTime(double time); + + //! Sample the cluster with index \p index. + //! + //! \param[in] index The index of the cluster to sample. + //! \param[in] numberSamples The desired number of samples. + //! \param[out] samples Filled in with the samples. + //! \return True if the cluster could be sampled and false otherwise. + virtual bool sample(std::size_t index, std::size_t numberSamples, TDoubleVec& samples) const; + + //! Get the probability of the cluster with index \p index. + //! + //! \param[in] index The index of the cluster of interest. + //! \return The probability of the cluster identified by \p index. + virtual double probability(std::size_t index) const; - //! The maximum Kullback-Leibler divergence for which we'll - //! merge two cluster. This is intended to introduce hysteresis - //! in the cluster creation and deletion process and so should - //! be less than the minimum split distance. - static const double MAXIMUM_MERGE_DISTANCE; + //! Debug the memory used by the object. + virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - //! The default fraction of the minimum cluster split count - //! for which we'll delete a cluster. This is intended to - //! introduce hysteresis in the cluster creation and deletion - //! process and so should be in the range (0, 1). - static const double CLUSTER_DELETE_FRACTION; + //! Get the memory used by this object. + virtual std::size_t memoryUsage() const; - //! The size of the data we use to maintain cluster detail. - static const std::size_t STRUCTURE_SIZE; + //! Get the static size of this object - used for virtual hierarchies + virtual std::size_t staticSize() const; - private: - //! Restore by traversing a state document. - bool acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser); + //! Get a checksum for this object. + virtual uint64_t checksum(uint64_t seed = 0) const; + //@} - //! Get the cluster with the index \p index. - const CCluster *cluster(std::size_t index) const; + //! The total count of points. + double count() const; - //! Compute the minimum split count. - double minimumSplitCount() const; + //! Get the clusters. + const TClusterVec& clusters() const; - //! Split \p cluster if we find a good split. - bool maybeSplit(TClusterVecItr cluster); + //! Print a representation of the clusters that can be plotted in octave. + std::string printClusters() const; - //! Merge \p cluster and \p adjacentCluster if they are close enough. - bool maybeMerge(TClusterVecItr cluster, TClusterVecItr adjacentCluster); + //! Get the index generator. + CIndexGenerator& indexGenerator(); - //! Remove any clusters which are effectively dead. - bool prune(); +private: + using TMinAccumulator = CBasicStatistics::COrderStatisticsStack; + using TMaxAccumulator = CBasicStatistics::COrderStatisticsStack>; - //! Get the Winsorisation interval. - TDoubleDoublePr winsorisationInterval() const; +private: + //! The minimum Kullback-Leibler divergence at which we'll + //! split a cluster. + static const double MINIMUM_SPLIT_DISTANCE; - private: - //! The type of data being clustered. - maths_t::EDataType m_DataType; + //! The maximum Kullback-Leibler divergence for which we'll + //! merge two cluster. This is intended to introduce hysteresis + //! in the cluster creation and deletion process and so should + //! be less than the minimum split distance. + static const double MAXIMUM_MERGE_DISTANCE; - //! The distributions available to model the clusters. - CAvailableModeDistributions m_AvailableDistributions; + //! The default fraction of the minimum cluster split count + //! for which we'll delete a cluster. This is intended to + //! introduce hysteresis in the cluster creation and deletion + //! process and so should be in the range (0, 1). + static const double CLUSTER_DELETE_FRACTION; - //! The initial rate at which information is lost. - double m_InitialDecayRate; + //! The size of the data we use to maintain cluster detail. + static const std::size_t STRUCTURE_SIZE; - //! The rate at which information is lost. - double m_DecayRate; +private: + //! Restore by traversing a state document. + bool acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser); - //! A measure of the length of history of the data clustered. - double m_HistoryLength; + //! Get the cluster with the index \p index. + const CCluster* cluster(std::size_t index) const; - //! The style of the cluster weight calculation (see maths_t::EClusterWeightCalc). - maths_t::EClusterWeightCalc m_WeightCalc; + //! Compute the minimum split count. + double minimumSplitCount() const; - //! The minimum cluster fractional count. - double m_MinimumClusterFraction; + //! Split \p cluster if we find a good split. + bool maybeSplit(TClusterVecItr cluster); - //! The minimum cluster count. - double m_MinimumClusterCount; + //! Merge \p cluster and \p adjacentCluster if they are close enough. + bool maybeMerge(TClusterVecItr cluster, TClusterVecItr adjacentCluster); - //! The minimum count for a category in the sketch to cluster. - double m_MinimumCategoryCount; + //! Remove any clusters which are effectively dead. + bool prune(); - //! The data central confidence interval on which to Winsorise. - double m_WinsorisationConfidenceInterval; + //! Get the Winsorisation interval. + TDoubleDoublePr winsorisationInterval() const; - //! A generator of unique cluster indices. - CIndexGenerator m_ClusterIndexGenerator; +private: + //! The type of data being clustered. + maths_t::EDataType m_DataType; - //! The smallest sample added to date. - TMinAccumulator m_Smallest; + //! The distributions available to model the clusters. + CAvailableModeDistributions m_AvailableDistributions; - //! The largest sample added to date. - TMaxAccumulator m_Largest; + //! The initial rate at which information is lost. + double m_InitialDecayRate; - //! The clusters. - TClusterVec m_Clusters; + //! The rate at which information is lost. + double m_DecayRate; - friend ::CXMeansOnline1dTest; -}; + //! A measure of the length of history of the data clustered. + double m_HistoryLength; + + //! The style of the cluster weight calculation (see maths_t::EClusterWeightCalc). + maths_t::EClusterWeightCalc m_WeightCalc; + //! The minimum cluster fractional count. + double m_MinimumClusterFraction; + + //! The minimum cluster count. + double m_MinimumClusterCount; + + //! The minimum count for a category in the sketch to cluster. + double m_MinimumCategoryCount; + + //! The data central confidence interval on which to Winsorise. + double m_WinsorisationConfidenceInterval; + + //! A generator of unique cluster indices. + CIndexGenerator m_ClusterIndexGenerator; + + //! The smallest sample added to date. + TMinAccumulator m_Smallest; + + //! The largest sample added to date. + TMaxAccumulator m_Largest; + + //! The clusters. + TClusterVec m_Clusters; + + friend ::CXMeansOnline1dTest; +}; } } diff --git a/include/maths/CXMeansOnlineFactory.h b/include/maths/CXMeansOnlineFactory.h index 3afceaf5a8..e460edd8c6 100644 --- a/include/maths/CXMeansOnlineFactory.h +++ b/include/maths/CXMeansOnlineFactory.h @@ -16,86 +16,75 @@ #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStateRestoreTraverser; } -namespace maths -{ -template class CClusterer; +namespace maths { +template +class CClusterer; struct SDistributionRestoreParams; -namespace xmeans_online_factory_detail -{ +namespace xmeans_online_factory_detail { template class CFactory {}; -#define XMEANS_FACTORY(T, N) \ -template<> \ -class MATHS_EXPORT CFactory \ -{ \ - public: \ - static CClusterer > *make(maths_t::EDataType dataType, \ - maths_t::EClusterWeightCalc weightCalc, \ - double decayRate, \ - double minimumClusterFraction, \ - double minimumClusterCount, \ - double minimumCategoryCount); \ - static CClusterer > *restore(const SDistributionRestoreParams ¶ms, \ - const CClustererTypes::TSplitFunc &splitFunc, \ - const CClustererTypes::TMergeFunc &mergeFunc, \ - core::CStateRestoreTraverser &traverser); \ -} +#define XMEANS_FACTORY(T, N) \ + template<> \ + class MATHS_EXPORT CFactory { \ + public: \ + static CClusterer>* make(maths_t::EDataType dataType, \ + maths_t::EClusterWeightCalc weightCalc, \ + double decayRate, \ + double minimumClusterFraction, \ + double minimumClusterCount, \ + double minimumCategoryCount); \ + static CClusterer>* restore(const SDistributionRestoreParams& params, \ + const CClustererTypes::TSplitFunc& splitFunc, \ + const CClustererTypes::TMergeFunc& mergeFunc, \ + core::CStateRestoreTraverser& traverser); \ + } XMEANS_FACTORY(CFloatStorage, 2); XMEANS_FACTORY(CFloatStorage, 3); XMEANS_FACTORY(CFloatStorage, 4); XMEANS_FACTORY(CFloatStorage, 5); #undef XMEANS_FACTORY - } //! \brief Factory for multivariate x-means online clusterers. -class MATHS_EXPORT CXMeansOnlineFactory -{ - public: - //! Create a new x-means clusterer. - //! - //! \param[in] dataType The type of data which will be clustered. - //! \param[in] weightCalc The style of the cluster weight calculation - //! (see maths_t::EClusterWeightCalc for details). - //! \param[in] decayRate Controls the rate at which information is - //! lost from the clusters. - //! \param[in] minimumClusterFraction The minimum fractional count - //! of points in a cluster. - //! \param[in] minimumClusterCount The minimum count of points in a - //! cluster. - template - static inline CClusterer > *make(maths_t::EDataType dataType, - maths_t::EClusterWeightCalc weightCalc, - double decayRate, - double minimumClusterFraction, - double minimumClusterCount, - double minimumCategoryCount) - { - return xmeans_online_factory_detail::CFactory::make( - dataType, weightCalc, decayRate, - minimumClusterFraction, minimumClusterCount, minimumCategoryCount); - } +class MATHS_EXPORT CXMeansOnlineFactory { +public: + //! Create a new x-means clusterer. + //! + //! \param[in] dataType The type of data which will be clustered. + //! \param[in] weightCalc The style of the cluster weight calculation + //! (see maths_t::EClusterWeightCalc for details). + //! \param[in] decayRate Controls the rate at which information is + //! lost from the clusters. + //! \param[in] minimumClusterFraction The minimum fractional count + //! of points in a cluster. + //! \param[in] minimumClusterCount The minimum count of points in a + //! cluster. + template + static inline CClusterer>* make(maths_t::EDataType dataType, + maths_t::EClusterWeightCalc weightCalc, + double decayRate, + double minimumClusterFraction, + double minimumClusterCount, + double minimumCategoryCount) { + return xmeans_online_factory_detail::CFactory::make( + dataType, weightCalc, decayRate, minimumClusterFraction, minimumClusterCount, minimumCategoryCount); + } - //! Construct by traversing a state document. - template - static inline CClusterer > *restore(const SDistributionRestoreParams ¶ms, - const CClustererTypes::TSplitFunc &splitFunc, - const CClustererTypes::TMergeFunc &mergeFunc, - core::CStateRestoreTraverser &traverser) - { - return xmeans_online_factory_detail::CFactory::restore( - params, splitFunc, mergeFunc, traverser); - } + //! Construct by traversing a state document. + template + static inline CClusterer>* restore(const SDistributionRestoreParams& params, + const CClustererTypes::TSplitFunc& splitFunc, + const CClustererTypes::TMergeFunc& mergeFunc, + core::CStateRestoreTraverser& traverser) { + return xmeans_online_factory_detail::CFactory::restore(params, splitFunc, mergeFunc, traverser); + } }; - } } diff --git a/include/maths/Constants.h b/include/maths/Constants.h index 3615c40831..8e8c22ecf0 100644 --- a/include/maths/Constants.h +++ b/include/maths/Constants.h @@ -12,10 +12,8 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { //! The minimum coefficient of variation supported by the models. //! In general, if the coefficient of variation for the data becomes @@ -101,44 +99,39 @@ const double MINIMUM_CLUSTER_SPLIT_COUNT{24.0}; const double MINIMUM_CATEGORY_COUNT{0.5}; //! \brief A collection of weight styles and weights. -class MATHS_EXPORT CConstantWeights -{ - public: - using TDouble2Vec = core::CSmallVector; - using TDouble4Vec = core::CSmallVector; - using TDouble2Vec4Vec = core::CSmallVector; - using TDouble4Vec1Vec = core::CSmallVector; - using TDouble2Vec4Vec1Vec = core::CSmallVector; - - public: - //! A single count weight style. - static const maths_t::TWeightStyleVec COUNT; - //! A single count variance weight style. - static const maths_t::TWeightStyleVec COUNT_VARIANCE; - //! A single seasonal variance weight style. - static const maths_t::TWeightStyleVec SEASONAL_VARIANCE; - //! A unit weight. - static const TDouble4Vec UNIT; - //! A single unit weight. - static const TDouble4Vec1Vec SINGLE_UNIT; - //! Get a unit weight for data with \p dimension. - template - static core::CSmallVector unit(std::size_t dimension) - { - return TDouble2Vec4Vec{VECTOR(dimension, 1.0)}; - } - //! Get a single unit weight for data with \p dimension. - template - static core::CSmallVector, 1> singleUnit(std::size_t dimension) - { - return core::CSmallVector, 1>{ - core::CSmallVector{VECTOR(dimension, 1.0)}}; - } +class MATHS_EXPORT CConstantWeights { +public: + using TDouble2Vec = core::CSmallVector; + using TDouble4Vec = core::CSmallVector; + using TDouble2Vec4Vec = core::CSmallVector; + using TDouble4Vec1Vec = core::CSmallVector; + using TDouble2Vec4Vec1Vec = core::CSmallVector; + +public: + //! A single count weight style. + static const maths_t::TWeightStyleVec COUNT; + //! A single count variance weight style. + static const maths_t::TWeightStyleVec COUNT_VARIANCE; + //! A single seasonal variance weight style. + static const maths_t::TWeightStyleVec SEASONAL_VARIANCE; + //! A unit weight. + static const TDouble4Vec UNIT; + //! A single unit weight. + static const TDouble4Vec1Vec SINGLE_UNIT; + //! Get a unit weight for data with \p dimension. + template + static core::CSmallVector unit(std::size_t dimension) { + return TDouble2Vec4Vec{VECTOR(dimension, 1.0)}; + } + //! Get a single unit weight for data with \p dimension. + template + static core::CSmallVector, 1> singleUnit(std::size_t dimension) { + return core::CSmallVector, 1>{core::CSmallVector{VECTOR(dimension, 1.0)}}; + } }; //! Get the maximum amount we'll penalize a model in addSamples. MATHS_EXPORT double maxModelPenalty(double numberSamples); - } } diff --git a/include/maths/ImportExport.h b/include/maths/ImportExport.h index 28ae5880e7..2d6079f110 100644 --- a/include/maths/ImportExport.h +++ b/include/maths/ImportExport.h @@ -36,4 +36,3 @@ #endif #endif // INCLUDED_ml_maths_ImportExport_h - diff --git a/include/maths/MathsTypes.h b/include/maths/MathsTypes.h index 0fc2865030..ceda1782f7 100644 --- a/include/maths/MathsTypes.h +++ b/include/maths/MathsTypes.h @@ -15,16 +15,13 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { using core::CFloatStorage; class CCalendarComponent; class CSeasonalComponent; } -namespace maths_t -{ +namespace maths_t { using TDoubleDoublePr = std::pair; using TDouble4Vec = core::CSmallVector; @@ -45,13 +42,7 @@ using TCalendarComponentVec = std::vector; //! -# ContinuousData: which indicates the takes real values. //! -# MixedData: which indicates the data can be decomposed into //! some combination of the other three data types. -enum EDataType -{ - E_DiscreteData, - E_IntegerData, - E_ContinuousData, - E_MixedData -}; +enum EDataType { E_DiscreteData, E_IntegerData, E_ContinuousData, E_MixedData }; //! An enumeration of the types of weight which can be applied //! when adding samples, calculating marginal likelihood or @@ -70,8 +61,7 @@ enum EDataType //! -# WinsorisationWeight: only affects update where it basically //! behaves like CountWeight except for the way it interacts //! with clustering. -enum ESampleWeightStyle -{ +enum ESampleWeightStyle { E_SampleCountWeight, E_SampleSeasonalVarianceScaleWeight, E_SampleCountVarianceScaleWeight, @@ -85,114 +75,85 @@ using TWeightStyleVec = core::CSmallVector; //! Extract the effective sample count from a collection of weights. MATHS_EXPORT -double count(const TWeightStyleVec &weightStyles, - const TDouble4Vec &weights); +double count(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights); //! Extract the effective sample count from a collection of weights. MATHS_EXPORT -TDouble10Vec count(std::size_t dimension, - const TWeightStyleVec &weightStyles, - const TDouble10Vec4Vec &weights); +TDouble10Vec count(std::size_t dimension, const TWeightStyleVec& weightStyles, const TDouble10Vec4Vec& weights); //! Extract the effective sample count with which to update a model //! from a collection of weights. MATHS_EXPORT -double countForUpdate(const TWeightStyleVec &weightStyles, - const TDouble4Vec &weights); +double countForUpdate(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights); //! Extract the effective sample count with which to update a model //! from a collection of weights. MATHS_EXPORT -TDouble10Vec countForUpdate(std::size_t dimension, - const TWeightStyleVec &weightStyles, - const TDouble10Vec4Vec &weights); +TDouble10Vec countForUpdate(std::size_t dimension, const TWeightStyleVec& weightStyles, const TDouble10Vec4Vec& weights); //! Extract the winsorisation weight from a collection of weights. MATHS_EXPORT -double winsorisationWeight(const TWeightStyleVec &weightStyles, - const TDouble4Vec &weights); +double winsorisationWeight(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights); //! Extract the winsorisation weight from a collection of weights. MATHS_EXPORT -TDouble10Vec winsorisationWeight(const TWeightStyleVec &weightStyles, - const TDouble10Vec4Vec &weights); +TDouble10Vec winsorisationWeight(const TWeightStyleVec& weightStyles, const TDouble10Vec4Vec& weights); //! Extract the variance scale from a collection of weights. MATHS_EXPORT -double seasonalVarianceScale(const TWeightStyleVec &weightStyles, - const TDouble4Vec &weights); +double seasonalVarianceScale(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights); //! Extract the variance scale from a collection of weights. MATHS_EXPORT -TDouble10Vec seasonalVarianceScale(std::size_t dimension, - const TWeightStyleVec &weightStyles, - const TDouble10Vec4Vec &weights); +TDouble10Vec seasonalVarianceScale(std::size_t dimension, const TWeightStyleVec& weightStyles, const TDouble10Vec4Vec& weights); //! Extract the variance scale from a collection of weights. MATHS_EXPORT -double countVarianceScale(const TWeightStyleVec &weightStyles, - const TDouble4Vec &weights); +double countVarianceScale(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights); //! Extract the variance scale from a collection of weights. MATHS_EXPORT -TDouble10Vec countVarianceScale(std::size_t dimension, - const TWeightStyleVec &weightStyles, - const TDouble10Vec4Vec &weights); +TDouble10Vec countVarianceScale(std::size_t dimension, const TWeightStyleVec& weightStyles, const TDouble10Vec4Vec& weights); //! Check if a non-unit seasonal variance scale applies. MATHS_EXPORT -bool hasSeasonalVarianceScale(const TWeightStyleVec &weightStyles, - const TDouble4Vec &weights); +bool hasSeasonalVarianceScale(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights); //! Check if a non-unit seasonal variance scale applies. MATHS_EXPORT -bool hasSeasonalVarianceScale(const TWeightStyleVec &weightStyles, - const TDouble4Vec1Vec &weights); +bool hasSeasonalVarianceScale(const TWeightStyleVec& weightStyles, const TDouble4Vec1Vec& weights); //! Check if a non-unit seasonal variance scale applies. MATHS_EXPORT -bool hasSeasonalVarianceScale(const TWeightStyleVec &weightStyles, - const TDouble10Vec4Vec &weights); +bool hasSeasonalVarianceScale(const TWeightStyleVec& weightStyles, const TDouble10Vec4Vec& weights); //! Check if a non-unit seasonal variance scale applies. MATHS_EXPORT -bool hasSeasonalVarianceScale(const TWeightStyleVec &weightStyles, - const TDouble10Vec4Vec1Vec &weights); +bool hasSeasonalVarianceScale(const TWeightStyleVec& weightStyles, const TDouble10Vec4Vec1Vec& weights); //! Check if a non-unit count variance scale applies. MATHS_EXPORT -bool hasCountVarianceScale(const TWeightStyleVec &weightStyles, - const TDouble4Vec &weights); +bool hasCountVarianceScale(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights); //! Check if a non-unit seasonal variance scale applies. MATHS_EXPORT -bool hasCountVarianceScale(const TWeightStyleVec &weightStyles, - const TDouble4Vec1Vec &weights); +bool hasCountVarianceScale(const TWeightStyleVec& weightStyles, const TDouble4Vec1Vec& weights); //! Check if a non-unit seasonal variance scale applies. MATHS_EXPORT -bool hasCountVarianceScale(const TWeightStyleVec &weightStyles, - const TDouble10Vec4Vec &weights); +bool hasCountVarianceScale(const TWeightStyleVec& weightStyles, const TDouble10Vec4Vec& weights); //! Check if a non-unit seasonal variance scale applies. MATHS_EXPORT -bool hasCountVarianceScale(const TWeightStyleVec &weightStyles, - const TDouble10Vec4Vec1Vec &weights); +bool hasCountVarianceScale(const TWeightStyleVec& weightStyles, const TDouble10Vec4Vec1Vec& weights); //! Set \p style to weight or append if it isn't in \p weightStyles. MATHS_EXPORT -void setWeight(ESampleWeightStyle style, - double weight, - TWeightStyleVec &weightStyles, - TDouble4Vec &weights); +void setWeight(ESampleWeightStyle style, double weight, TWeightStyleVec& weightStyles, TDouble4Vec& weights); //! Set \p style to weight or append if it isn't in \p weightStyles. MATHS_EXPORT -void setWeight(ESampleWeightStyle style, - double weight, - std::size_t dimension, - TWeightStyleVec &weightStyles, - TDouble10Vec4Vec &weights); +void setWeight(ESampleWeightStyle style, double weight, std::size_t dimension, TWeightStyleVec& weightStyles, TDouble10Vec4Vec& weights); //! Enumerates the possible probability of less likely sample calculations. //! @@ -209,35 +170,20 @@ void setWeight(ESampleWeightStyle style, //! for the sample minimum or larger values for the sample maximum. //! Note that we normalize the one sided probabilities so they equal //! 1 at the distribution median. -enum EProbabilityCalculation - { - E_OneSidedBelow, - E_TwoSided, - E_OneSidedAbove - }; +enum EProbabilityCalculation { E_OneSidedBelow, E_TwoSided, E_OneSidedAbove }; //! This controls the calculation of the cluster probabilities. //! There are two styles available: //! -# Equal: all clusters have equal weight. //! -# Fraction: the weight of a cluster is proportional to the //! number of points which have been assigned to the cluster. -enum EClusterWeightCalc - { - E_ClustersEqualWeight, - E_ClustersFractionWeight - }; +enum EClusterWeightCalc { E_ClustersEqualWeight, E_ClustersFractionWeight }; //! A set of statuses which track the result of a floating point //! calculations. These provide finer grained information than //! a pass/fail boolean which can be used to take appropriate //! action in the calling context. -enum EFloatingPointErrorStatus - { - E_FpNoErrors = 0x0, - E_FpOverflowed = 0x1, - E_FpFailed = 0x2, - E_FpAllErrors = 0x3 - }; +enum EFloatingPointErrorStatus { E_FpNoErrors = 0x0, E_FpOverflowed = 0x1, E_FpFailed = 0x2, E_FpAllErrors = 0x3 }; //! Enumerates the cases that a collection of samples is either in //! the left tail, right tail or a mixture or neither of the tails @@ -251,16 +197,8 @@ enum EFloatingPointErrorStatus //! -# Mixed or neither is used to denote the case that some are //! to left, some to the right and/or some are between the left //! and rightmost modes. -enum ETail - { - E_UndeterminedTail = 0x0, - E_LeftTail = 0x1, - E_RightTail = 0x2, - E_MixedOrNeitherTail = 0x3 - }; - +enum ETail { E_UndeterminedTail = 0x0, E_LeftTail = 0x1, E_RightTail = 0x2, E_MixedOrNeitherTail = 0x3 }; } } #endif // INCLUDED_ml_maths_t_MathsTypes_h - diff --git a/include/maths/ProbabilityAggregators.h b/include/maths/ProbabilityAggregators.h index e49bc5a0a0..5142c9fb25 100644 --- a/include/maths/ProbabilityAggregators.h +++ b/include/maths/ProbabilityAggregators.h @@ -16,15 +16,12 @@ #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace maths -{ +namespace maths { //! \brief Computes the joint probability of seeing a more extreme //! collection of samples. @@ -46,68 +43,62 @@ namespace maths //! probability of a collection of samples which are sampled where //! each sample only appears with some specified frequency. The weights //! must be non-negative. -class MATHS_EXPORT CJointProbabilityOfLessLikelySamples : private boost::addable -{ - public: - using TOptionalDouble = boost::optional; +class MATHS_EXPORT CJointProbabilityOfLessLikelySamples : private boost::addable { +public: + using TOptionalDouble = boost::optional; - //! Functor wrapper of CJointProbabilityOfLessLikelySamples::add. - struct SAddProbability - { - CJointProbabilityOfLessLikelySamples & - operator()(CJointProbabilityOfLessLikelySamples &jointProbability, - double probability, - double weight = 1.0) const; - }; + //! Functor wrapper of CJointProbabilityOfLessLikelySamples::add. + struct SAddProbability { + CJointProbabilityOfLessLikelySamples& + operator()(CJointProbabilityOfLessLikelySamples& jointProbability, double probability, double weight = 1.0) const; + }; - public: - CJointProbabilityOfLessLikelySamples(); +public: + CJointProbabilityOfLessLikelySamples(); - //! Initialize from \p value if possible. - bool fromDelimited(const std::string &value); + //! Initialize from \p value if possible. + bool fromDelimited(const std::string& value); - //! Convert to a delimited string. - std::string toDelimited() const; + //! Convert to a delimited string. + std::string toDelimited() const; - //! Combine two joint probability calculators. - const CJointProbabilityOfLessLikelySamples & - operator+=(const CJointProbabilityOfLessLikelySamples &other); + //! Combine two joint probability calculators. + const CJointProbabilityOfLessLikelySamples& operator+=(const CJointProbabilityOfLessLikelySamples& other); - //! Add \p probability. - void add(double probability, double weight = 1.0); + //! Add \p probability. + void add(double probability, double weight = 1.0); - //! Calculate the joint probability of less likely samples - //! than those added so far. - bool calculate(double &result) const; + //! Calculate the joint probability of less likely samples + //! than those added so far. + bool calculate(double& result) const; - //! Compute the average probability of less likely samples - //! added so far. - bool averageProbability(double &result) const; + //! Compute the average probability of less likely samples + //! added so far. + bool averageProbability(double& result) const; - //! Get the first probability. - TOptionalDouble onlyProbability() const; + //! Get the first probability. + TOptionalDouble onlyProbability() const; - //! Get the total deviation of all samples added. - double distance() const; + //! Get the total deviation of all samples added. + double distance() const; - //! Get the count of all samples added. - double numberSamples() const; + //! Get the count of all samples added. + double numberSamples() const; - //! Get a checksum for an object of this class. - uint64_t checksum(uint64_t seed) const; + //! Get a checksum for an object of this class. + uint64_t checksum(uint64_t seed) const; - //! Print the joint probability for debugging. - std::ostream &print(std::ostream &o) const; + //! Print the joint probability for debugging. + std::ostream& print(std::ostream& o) const; - private: - TOptionalDouble m_OnlyProbability; - double m_Distance; - double m_NumberSamples; +private: + TOptionalDouble m_OnlyProbability; + double m_Distance; + double m_NumberSamples; }; MATHS_EXPORT -std::ostream &operator<<(std::ostream &o, - const CJointProbabilityOfLessLikelySamples &probability); +std::ostream& operator<<(std::ostream& o, const CJointProbabilityOfLessLikelySamples& probability); //! \brief Computes log of the joint probability of seeing a more //! extreme collection of samples. @@ -136,25 +127,23 @@ std::ostream &operator<<(std::ostream &o, //! For example, two probabilities should be treated as equal if the //! intervals defined by their upper and lower bounds intersect. class MATHS_EXPORT CLogJointProbabilityOfLessLikelySamples : protected CJointProbabilityOfLessLikelySamples, - private boost::addable -{ - public: - CLogJointProbabilityOfLessLikelySamples(); + private boost::addable { +public: + CLogJointProbabilityOfLessLikelySamples(); - //! Combine two log joint probability calculators. - const CLogJointProbabilityOfLessLikelySamples & - operator+=(const CLogJointProbabilityOfLessLikelySamples &other); + //! Combine two log joint probability calculators. + const CLogJointProbabilityOfLessLikelySamples& operator+=(const CLogJointProbabilityOfLessLikelySamples& other); - //! Add \p probability. - void add(double probability, double weight = 1.0); + //! Add \p probability. + void add(double probability, double weight = 1.0); - //! Calculate a lower bound for the log of the joint probability - //! of less likely samples than those added so far. - bool calculateLowerBound(double &result) const; + //! Calculate a lower bound for the log of the joint probability + //! of less likely samples than those added so far. + bool calculateLowerBound(double& result) const; - //! Calculate an upper bound for the log of the joint probability - //! of less likely samples than those added so far. - bool calculateUpperBound(double &result) const; + //! Calculate an upper bound for the log of the joint probability + //! of less likely samples than those added so far. + bool calculateUpperBound(double& result) const; }; //! \brief Computes probability of seeing the most extreme sample @@ -194,46 +183,42 @@ class MATHS_EXPORT CLogJointProbabilityOfLessLikelySamples : protected CJointPro //!
//! //! where we have used the fact that \f$(1 - F(x)) = p / 2\f$. -class MATHS_EXPORT CProbabilityOfExtremeSample : private boost::addable -{ - public: - CProbabilityOfExtremeSample(); +class MATHS_EXPORT CProbabilityOfExtremeSample : private boost::addable { +public: + CProbabilityOfExtremeSample(); - //! Initialize from \p value if possible. - bool fromDelimited(const std::string &value); + //! Initialize from \p value if possible. + bool fromDelimited(const std::string& value); - //! Convert to a delimited string. - std::string toDelimited() const; + //! Convert to a delimited string. + std::string toDelimited() const; - //! Combine two extreme probability calculators. - const CProbabilityOfExtremeSample & - operator+=(const CProbabilityOfExtremeSample &other); + //! Combine two extreme probability calculators. + const CProbabilityOfExtremeSample& operator+=(const CProbabilityOfExtremeSample& other); - //! Add \p probability. - bool add(double probability, double weight = 1.0); + //! Add \p probability. + bool add(double probability, double weight = 1.0); - //! Calculate the probability of seeing the most extreme - //! sample added so far. - bool calculate(double &result) const; + //! Calculate the probability of seeing the most extreme + //! sample added so far. + bool calculate(double& result) const; - //! Get a checksum for an object of this class. - uint64_t checksum(uint64_t seed) const; + //! Get a checksum for an object of this class. + uint64_t checksum(uint64_t seed) const; - //! Print the extreme probability for debugging. - std::ostream &print(std::ostream &o) const; + //! Print the extreme probability for debugging. + std::ostream& print(std::ostream& o) const; - private: - using TMinValueAccumulator = CBasicStatistics::COrderStatisticsStack; +private: + using TMinValueAccumulator = CBasicStatistics::COrderStatisticsStack; - private: - TMinValueAccumulator m_MinValue; - double m_NumberSamples; +private: + TMinValueAccumulator m_MinValue; + double m_NumberSamples; }; MATHS_EXPORT -std::ostream &operator<<(std::ostream &o, - const CProbabilityOfExtremeSample &probability); - +std::ostream& operator<<(std::ostream& o, const CProbabilityOfExtremeSample& probability); //! \brief Computes the probability of seeing the M most extreme //! samples in a collection of N samples. @@ -281,43 +266,40 @@ std::ostream &operator<<(std::ostream &o, //! The integral representing \f$P(R)\f$ can be evaluated in order \f$M^2\f$ //! as a polynomial in the individual probabilities \f$\{p_1, ..., p_M\}\f$ //! with recurrence relations used to compute the coefficients. -class MATHS_EXPORT CLogProbabilityOfMFromNExtremeSamples : private boost::addable -{ - public: - CLogProbabilityOfMFromNExtremeSamples(std::size_t m); +class MATHS_EXPORT CLogProbabilityOfMFromNExtremeSamples : private boost::addable { +public: + CLogProbabilityOfMFromNExtremeSamples(std::size_t m); - //! Initialize from \p value if possible. - bool fromDelimited(const std::string &value); + //! Initialize from \p value if possible. + bool fromDelimited(const std::string& value); - //! Convert to a delimited string. - std::string toDelimited() const; + //! Convert to a delimited string. + std::string toDelimited() const; - //! Combine two extreme probability calculators. - const CLogProbabilityOfMFromNExtremeSamples & - operator+=(const CLogProbabilityOfMFromNExtremeSamples &other); + //! Combine two extreme probability calculators. + const CLogProbabilityOfMFromNExtremeSamples& operator+=(const CLogProbabilityOfMFromNExtremeSamples& other); - //! Add \p probability. - void add(double probability); + //! Add \p probability. + void add(double probability); - //! Calculate the probability of seeing the "M" most extreme - //! samples added so far. - bool calculate(double &result); + //! Calculate the probability of seeing the "M" most extreme + //! samples added so far. + bool calculate(double& result); - //! Calculate the calibrated probability of seeing the "M" most - //! extreme samples added so far. - bool calibrated(double &result); + //! Calculate the calibrated probability of seeing the "M" most + //! extreme samples added so far. + bool calibrated(double& result); - //! Get a checksum for an object of this class. - uint64_t checksum(uint64_t seed) const; + //! Get a checksum for an object of this class. + uint64_t checksum(uint64_t seed) const; - private: - using TMinValueAccumulator = CBasicStatistics::COrderStatisticsHeap; +private: + using TMinValueAccumulator = CBasicStatistics::COrderStatisticsHeap; - private: - TMinValueAccumulator m_MinValues; - std::size_t m_NumberSamples; +private: + TMinValueAccumulator m_MinValues; + std::size_t m_NumberSamples; }; - } } diff --git a/include/model/CAnnotatedProbability.h b/include/model/CAnnotatedProbability.h index 9bf0c3e183..96c19b06af 100644 --- a/include/model/CAnnotatedProbability.h +++ b/include/model/CAnnotatedProbability.h @@ -18,24 +18,19 @@ #include #include -#include #include +#include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace model -{ +namespace model { -namespace annotated_probability -{ -enum EDescriptiveData -{ +namespace annotated_probability { +enum EDescriptiveData { E_PERSON_PERIOD = 0, E_PERSON_NEVER_SEEN_BEFORE = 1, E_PERSON_COUNT = 2, @@ -49,8 +44,7 @@ enum EDescriptiveData } //! \brief A collection of data describing an attribute's probability. -struct MODEL_EXPORT SAttributeProbability -{ +struct MODEL_EXPORT SAttributeProbability { using TDouble1Vec = core::CSmallVector; using TSizeDoublePr = std::pair; using TSizeDoublePr1Vec = core::CSmallVector; @@ -60,22 +54,22 @@ struct MODEL_EXPORT SAttributeProbability SAttributeProbability(); SAttributeProbability(std::size_t cid, - const core::CStoredStringPtr &attribute, + const core::CStoredStringPtr& attribute, double probability, model_t::CResultType type, model_t::EFeature feature, - const TStoredStringPtr1Vec &correlatedAttributes, - const TSizeDoublePr1Vec &correlated); + const TStoredStringPtr1Vec& correlatedAttributes, + const TSizeDoublePr1Vec& correlated); //! Total ordering of attribute probabilities by probability //! breaking ties using the attribute and finally the feature. - bool operator<(const SAttributeProbability &other) const; + bool operator<(const SAttributeProbability& other) const; //! Persist the probability passing information to \p inserter. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; //! Restore the probability reading state from \p traverser. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); //! Add the descriptive data \p value for \p key. void addDescriptiveData(annotated_probability::EDescriptiveData key, double value); @@ -109,8 +103,7 @@ struct MODEL_EXPORT SAttributeProbability //! This includes all associated data such as a set of the smallest //! attribute probabilities, the influences, extra descriptive data //! and so on. -struct MODEL_EXPORT SAnnotatedProbability -{ +struct MODEL_EXPORT SAnnotatedProbability { using TAttributeProbability1Vec = core::CSmallVector; using TStoredStringPtrStoredStringPtrPr = std::pair; using TStoredStringPtrStoredStringPtrPrDoublePr = std::pair; @@ -127,16 +120,16 @@ struct MODEL_EXPORT SAnnotatedProbability void addDescriptiveData(annotated_probability::EDescriptiveData key, double value); //! Efficiently swap the contents of this and \p other. - void swap(SAnnotatedProbability &other); + void swap(SAnnotatedProbability& other); //! Is the result type interim? bool isInterim() const; //! Persist the probability passing information to \p inserter. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; //! Restore the probability reading state from \p traverser. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); //! The probability of seeing the series' sample in a time interval. double s_Probability; @@ -163,7 +156,6 @@ struct MODEL_EXPORT SAnnotatedProbability //! The baseline bucket count for this probability (cached from the model). TOptionalDouble s_BaselineBucketCount; }; - } } diff --git a/include/model/CAnnotatedProbabilityBuilder.h b/include/model/CAnnotatedProbabilityBuilder.h index 57663f4b44..aaf49679b2 100644 --- a/include/model/CAnnotatedProbabilityBuilder.h +++ b/include/model/CAnnotatedProbabilityBuilder.h @@ -21,75 +21,67 @@ #include #include - -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { class CMultinomialConjugate; } -namespace model -{ +namespace model { class CModel; //! \brief Manages the creation of annotated probabilities using the //! builder pattern. -class MODEL_EXPORT CAnnotatedProbabilityBuilder : private core::CNonCopyable -{ - public: - using TSizeDoublePr = std::pair; - using TDouble1Vec = core::CSmallVector; - using TSize1Vec = core::CSmallVector; - using TSizeDoublePr1Vec = core::CSmallVector; - using TStoredStringPtr1Vec = core::CSmallVector; - - public: - CAnnotatedProbabilityBuilder(SAnnotatedProbability &annotatedProbability); - - CAnnotatedProbabilityBuilder(SAnnotatedProbability &annotatedProbability, - std::size_t numberAttributeProbabilities, - function_t::EFunction function, - std::size_t numberOfPeople); - - void attributeProbabilityPrior(const maths::CMultinomialConjugate *prior); - void personAttributeProbabilityPrior(const maths::CMultinomialConjugate *prior); - void personFrequency(double frequency, bool everSeenBefore); - void probability(double p); - void addAttributeProbability(std::size_t cid, - const core::CStoredStringPtr &attribute, - double pAttribute, - double pGivenAttribute, - model_t::CResultType type, - model_t::EFeature feature, - const TStoredStringPtr1Vec &correlatedAttributes, - const TSizeDoublePr1Vec &correlated); - void build(); - - private: - void addAttributeDescriptiveData(std::size_t cid, - double pAttribute, - SAttributeProbability &attributeProbability); - - void addDescriptiveData(); - - private: - using TMinAccumulator = maths::CBasicStatistics::COrderStatisticsHeap; - - private: - SAnnotatedProbability &m_Result; - std::size_t m_NumberAttributeProbabilities; - std::size_t m_NumberOfPeople; - const maths::CMultinomialConjugate *m_AttributeProbabilityPrior; - const maths::CMultinomialConjugate *m_PersonAttributeProbabilityPrior; - TMinAccumulator m_MinAttributeProbabilities; - std::size_t m_DistinctTotalAttributes; - std::size_t m_DistinctRareAttributes; - double m_RareAttributes; - bool m_IsPopulation; - bool m_IsRare; - bool m_IsFreqRare; +class MODEL_EXPORT CAnnotatedProbabilityBuilder : private core::CNonCopyable { +public: + using TSizeDoublePr = std::pair; + using TDouble1Vec = core::CSmallVector; + using TSize1Vec = core::CSmallVector; + using TSizeDoublePr1Vec = core::CSmallVector; + using TStoredStringPtr1Vec = core::CSmallVector; + +public: + CAnnotatedProbabilityBuilder(SAnnotatedProbability& annotatedProbability); + + CAnnotatedProbabilityBuilder(SAnnotatedProbability& annotatedProbability, + std::size_t numberAttributeProbabilities, + function_t::EFunction function, + std::size_t numberOfPeople); + + void attributeProbabilityPrior(const maths::CMultinomialConjugate* prior); + void personAttributeProbabilityPrior(const maths::CMultinomialConjugate* prior); + void personFrequency(double frequency, bool everSeenBefore); + void probability(double p); + void addAttributeProbability(std::size_t cid, + const core::CStoredStringPtr& attribute, + double pAttribute, + double pGivenAttribute, + model_t::CResultType type, + model_t::EFeature feature, + const TStoredStringPtr1Vec& correlatedAttributes, + const TSizeDoublePr1Vec& correlated); + void build(); + +private: + void addAttributeDescriptiveData(std::size_t cid, double pAttribute, SAttributeProbability& attributeProbability); + + void addDescriptiveData(); + +private: + using TMinAccumulator = maths::CBasicStatistics::COrderStatisticsHeap; + +private: + SAnnotatedProbability& m_Result; + std::size_t m_NumberAttributeProbabilities; + std::size_t m_NumberOfPeople; + const maths::CMultinomialConjugate* m_AttributeProbabilityPrior; + const maths::CMultinomialConjugate* m_PersonAttributeProbabilityPrior; + TMinAccumulator m_MinAttributeProbabilities; + std::size_t m_DistinctTotalAttributes; + std::size_t m_DistinctRareAttributes; + double m_RareAttributes; + bool m_IsPopulation; + bool m_IsRare; + bool m_IsFreqRare; }; - } } diff --git a/include/model/CAnomalyDetector.h b/include/model/CAnomalyDetector.h index a2b8657d6c..71d34a616e 100644 --- a/include/model/CAnomalyDetector.h +++ b/include/model/CAnomalyDetector.h @@ -7,8 +7,8 @@ #define INCLUDED_ml_model_CAnomalyDetector_h #include -#include #include +#include #include #include @@ -19,8 +19,8 @@ #include #include #include -#include #include +#include #include @@ -31,16 +31,12 @@ #include - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace model -{ +namespace model { class CDataGatherer; class CModel; class CSearchKey; @@ -70,318 +66,296 @@ class CSearchKey; //! choose to analyse certain field values either individually or as //! a population. -class MODEL_EXPORT CAnomalyDetector : private core::CNonCopyable -{ - public: - using TStrVec = std::vector; - using TStrCPtrVec = std::vector; - using TModelPlotDataVec = std::vector; - - using TDataGathererPtr = boost::shared_ptr; - using TModelFactoryCPtr = boost::shared_ptr; - using TModelPtr = boost::shared_ptr; - - //! A shared pointer to an instance of this class - using TAnomalyDetectorPtr = boost::shared_ptr; - - using TOutputModelPlotDataFunc = std::function; - using TStrSet = CAnomalyDetectorModelConfig::TStrSet; - - public: - //! State version. This must be incremented every time a change to the - //! state is made that requires existing state to be discarded - static const std::string STATE_VERSION; - - //! Name of the count field - static const std::string COUNT_NAME; - - //! Name of a time field (for the GUI to make a distinction between a counter and a time) - static const std::string TIME_NAME; - - //! Indicator that the GUI should expect a field name but no field value - //! (because for a distinct count we're only interested in the number of - //! different values, not the values themselves) - static const std::string DISTINCT_COUNT_NAME; - - //! Indicator that the GUI should use a description template based on - //! rare events rather than numerous events - static const std::string RARE_NAME; - - //! Indicator that the GUI should use a description template based on - //! information content of events - static const std::string INFO_CONTENT_NAME; - - //! Output function names for metric anomalies - static const std::string MEAN_NAME; - static const std::string MEDIAN_NAME; - static const std::string MIN_NAME; - static const std::string MAX_NAME; - static const std::string VARIANCE_NAME; - static const std::string SUM_NAME; - static const std::string LAT_LONG_NAME; - static const std::string EMPTY_STRING; - - - public: - CAnomalyDetector(int detectorIndex, - CLimits &limits, - const CAnomalyDetectorModelConfig &modelConfig, - const std::string &partitionFieldValue, - core_t::TTime firstTime, - const TModelFactoryCPtr &modelFactory); - - //! Create a copy that will result in the same persisted state as the - //! original. This is effectively a copy constructor that creates a - //! copy that's only valid for a single purpose. The boolean flag is - //! redundant except to create a signature that will not be mistaken for - //! a general purpose copy constructor. - CAnomalyDetector(bool isForPersistence, - const CAnomalyDetector &other); - - virtual ~CAnomalyDetector(); - - //! Get the total number of people which this is modeling. - size_t numberActivePeople() const; - - //! Get the total number of attributes which this is modeling. - size_t numberActiveAttributes() const; - - //! Get the maximum size of all the member containers. - size_t maxDimension() const; - - //! For the operationalised version of the product, we may create models - //! that need to reflect the fact that no data of a particular type was - //! seen for a period before the creation of the models, but WITHOUT - //! reporting any results for the majority of that period. This method - //! provides that facility. - void zeroModelsToTime(core_t::TTime time); - - //! Populate the object from a state document - bool acceptRestoreTraverser(const std::string &partitionFieldValue, - core::CStateRestoreTraverser &traverser); - - //! Restore state for statics - this is only called from the - //! simple count detector to ensure singleton behaviour - bool staticsAcceptRestoreTraverser(core::CStateRestoreTraverser &traverser); - - //! Find the partition field value given part of an state document. - //! - //! \note This is static so it can be called before the state is fully - //! deserialised, because we need this value before to restoring the - //! detector. - static bool partitionFieldAcceptRestoreTraverser(core::CStateRestoreTraverser &traverser, - std::string &partitionFieldValue); - - //! Find the detector keys given part of an state document. - //! - //! \note This is static so it can be called before the state is fully - //! deserialised, because we need these before to restoring the detector. - static bool keyAcceptRestoreTraverser(core::CStateRestoreTraverser &traverser, - CSearchKey &key); - - //! Persist the detector keys separately to the rest of the state. - //! This must be done for a 100% streaming state restoration because - //! the key must be known before a detector object is created into - //! which other state can be restored. - void keyAcceptPersistInserter(core::CStatePersistInserter &inserter) const; - - //! Persist the partition field separately to the rest of the state. - //! This must be done for a 100% streaming state restoration because - //! the partition field must be known before a detector object is - //! created into which other state can be restored. - void partitionFieldAcceptPersistInserter(core::CStatePersistInserter &inserter) const; - - //! Persist state for statics - this is only called from the - //! simple count detector to ensure singleton behaviour - void staticsAcceptPersistInserter(core::CStatePersistInserter &inserter) const; - - //! Persist state by passing information to the supplied inserter - //! - //! \note Some information is duplicated in keyAcceptPersistInserter() - //! and partitionFieldAcceptPersistInserter() due to historical reasons. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - - //! Get the cue for this detector. This consists of the search key cue - //! with the partition field value appended. - std::string toCue() const; - - //! Debug representation. Note that operator<<() is more efficient than - //! generating this debug string and immediately outputting it to a - //! stream. - std::string debug() const; - - //! Check if this is a simple count detector. - virtual bool isSimpleCount() const; - - //! Get the fields to extract from a record for processing by this detector. - const TStrVec &fieldsOfInterest() const; - - //! Extract and add the necessary details of an event record. - void addRecord(core_t::TTime time, - const TStrCPtrVec &fieldValues); - - //! Update the results with this detector model's results. - void buildResults(core_t::TTime bucketStartTime, - core_t::TTime bucketEndTime, - CHierarchicalResults &results); - - //! Update the results with this detector model's results. - void buildInterimResults(core_t::TTime bucketStartTime, - core_t::TTime bucketEndTime, - CHierarchicalResults &results); - - //! Generate the model plot data for the time series identified - //! by \p terms. - void generateModelPlot(core_t::TTime bucketStartTime, - core_t::TTime bucketEndTime, - double boundsPercentile, - const TStrSet &terms, - TModelPlotDataVec &modelPlots) const; - - //! Generate ForecastPrerequistes, e.g. memory requirements - CForecastDataSink::SForecastModelPrerequisites getForecastPrerequisites() const; - - //! Generate maths models for forecasting - CForecastDataSink::SForecastResultSeries getForecastModels() const; - - //! Remove dead models, i.e. those models that have more-or-less - //! reverted back to their non-informative state. BE CAREFUL WHEN - //! CALLING THIS METHOD that you do not hold pointers to any models - //! that may be deleted as a result of this call. - virtual void pruneModels(); - - //! Reset bucket. - void resetBucket(core_t::TTime bucketStart); - - //! Release memory that is no longer needed - void releaseMemory(core_t::TTime samplingCutoffTime); - - //! Print the detector memory usage to the given stream - void showMemoryUsage(std::ostream &stream) const; - - //! Get the memory used by this detector - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - - //! Return the total memory usage - std::size_t memoryUsage() const; - - //! Get end of the last complete bucket we've observed. - const core_t::TTime &lastBucketEndTime() const; - - //! Get writable end of the last complete bucket we've observed. - core_t::TTime &lastBucketEndTime(); - - //! Access to the bucket length being used in the current models. This - //! can be used to detect discrepancies between the model config and - //! existing models. - core_t::TTime modelBucketLength() const; - - //! Get a description of this anomaly detector. - std::string description() const; - - //! Roll time forwards to \p time. - void timeNow(core_t::TTime time); - - //! Rolls time to \p endTime while skipping sampling the models for buckets within the gap - //! \param[in] endTime The end of the time interval to skip sampling. - void skipSampling(core_t::TTime endTime); - - const TModelPtr &model() const; - TModelPtr &model(); - - protected: - //! This function is called before adding a record allowing - //! for varied preprocessing. - virtual const TStrCPtrVec &preprocessFieldValues(const TStrCPtrVec &fieldValues); - - //! Initializes simple counting by adding a person called "count". - void initSimpleCounting(); - - private: - // Shared code for building results - template - void buildResultsHelper(core_t::TTime bucketStartTime, - core_t::TTime bucketEndTime, - SAMPLE_FUNC sampleFunc, - LAST_SAMPLED_BUCKET_UPDATE_FUNC lastSampledBucketUpdateFunc, - CHierarchicalResults &results); +class MODEL_EXPORT CAnomalyDetector : private core::CNonCopyable { +public: + using TStrVec = std::vector; + using TStrCPtrVec = std::vector; + using TModelPlotDataVec = std::vector; + + using TDataGathererPtr = boost::shared_ptr; + using TModelFactoryCPtr = boost::shared_ptr; + using TModelPtr = boost::shared_ptr; + + //! A shared pointer to an instance of this class + using TAnomalyDetectorPtr = boost::shared_ptr; + + using TOutputModelPlotDataFunc = + std::function; + using TStrSet = CAnomalyDetectorModelConfig::TStrSet; + +public: + //! State version. This must be incremented every time a change to the + //! state is made that requires existing state to be discarded + static const std::string STATE_VERSION; + + //! Name of the count field + static const std::string COUNT_NAME; + + //! Name of a time field (for the GUI to make a distinction between a counter and a time) + static const std::string TIME_NAME; + + //! Indicator that the GUI should expect a field name but no field value + //! (because for a distinct count we're only interested in the number of + //! different values, not the values themselves) + static const std::string DISTINCT_COUNT_NAME; + + //! Indicator that the GUI should use a description template based on + //! rare events rather than numerous events + static const std::string RARE_NAME; + + //! Indicator that the GUI should use a description template based on + //! information content of events + static const std::string INFO_CONTENT_NAME; + + //! Output function names for metric anomalies + static const std::string MEAN_NAME; + static const std::string MEDIAN_NAME; + static const std::string MIN_NAME; + static const std::string MAX_NAME; + static const std::string VARIANCE_NAME; + static const std::string SUM_NAME; + static const std::string LAT_LONG_NAME; + static const std::string EMPTY_STRING; + +public: + CAnomalyDetector(int detectorIndex, + CLimits& limits, + const CAnomalyDetectorModelConfig& modelConfig, + const std::string& partitionFieldValue, + core_t::TTime firstTime, + const TModelFactoryCPtr& modelFactory); + + //! Create a copy that will result in the same persisted state as the + //! original. This is effectively a copy constructor that creates a + //! copy that's only valid for a single purpose. The boolean flag is + //! redundant except to create a signature that will not be mistaken for + //! a general purpose copy constructor. + CAnomalyDetector(bool isForPersistence, const CAnomalyDetector& other); + + virtual ~CAnomalyDetector(); + + //! Get the total number of people which this is modeling. + size_t numberActivePeople() const; + + //! Get the total number of attributes which this is modeling. + size_t numberActiveAttributes() const; + + //! Get the maximum size of all the member containers. + size_t maxDimension() const; + + //! For the operationalised version of the product, we may create models + //! that need to reflect the fact that no data of a particular type was + //! seen for a period before the creation of the models, but WITHOUT + //! reporting any results for the majority of that period. This method + //! provides that facility. + void zeroModelsToTime(core_t::TTime time); + + //! Populate the object from a state document + bool acceptRestoreTraverser(const std::string& partitionFieldValue, core::CStateRestoreTraverser& traverser); + + //! Restore state for statics - this is only called from the + //! simple count detector to ensure singleton behaviour + bool staticsAcceptRestoreTraverser(core::CStateRestoreTraverser& traverser); + + //! Find the partition field value given part of an state document. + //! + //! \note This is static so it can be called before the state is fully + //! deserialised, because we need this value before to restoring the + //! detector. + static bool partitionFieldAcceptRestoreTraverser(core::CStateRestoreTraverser& traverser, std::string& partitionFieldValue); + + //! Find the detector keys given part of an state document. + //! + //! \note This is static so it can be called before the state is fully + //! deserialised, because we need these before to restoring the detector. + static bool keyAcceptRestoreTraverser(core::CStateRestoreTraverser& traverser, CSearchKey& key); + + //! Persist the detector keys separately to the rest of the state. + //! This must be done for a 100% streaming state restoration because + //! the key must be known before a detector object is created into + //! which other state can be restored. + void keyAcceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Persist the partition field separately to the rest of the state. + //! This must be done for a 100% streaming state restoration because + //! the partition field must be known before a detector object is + //! created into which other state can be restored. + void partitionFieldAcceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Persist state for statics - this is only called from the + //! simple count detector to ensure singleton behaviour + void staticsAcceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Persist state by passing information to the supplied inserter + //! + //! \note Some information is duplicated in keyAcceptPersistInserter() + //! and partitionFieldAcceptPersistInserter() due to historical reasons. + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Get the cue for this detector. This consists of the search key cue + //! with the partition field value appended. + std::string toCue() const; + + //! Debug representation. Note that operator<<() is more efficient than + //! generating this debug string and immediately outputting it to a + //! stream. + std::string debug() const; + + //! Check if this is a simple count detector. + virtual bool isSimpleCount() const; + + //! Get the fields to extract from a record for processing by this detector. + const TStrVec& fieldsOfInterest() const; + + //! Extract and add the necessary details of an event record. + void addRecord(core_t::TTime time, const TStrCPtrVec& fieldValues); + + //! Update the results with this detector model's results. + void buildResults(core_t::TTime bucketStartTime, core_t::TTime bucketEndTime, CHierarchicalResults& results); + + //! Update the results with this detector model's results. + void buildInterimResults(core_t::TTime bucketStartTime, core_t::TTime bucketEndTime, CHierarchicalResults& results); + + //! Generate the model plot data for the time series identified + //! by \p terms. + void generateModelPlot(core_t::TTime bucketStartTime, + core_t::TTime bucketEndTime, + double boundsPercentile, + const TStrSet& terms, + TModelPlotDataVec& modelPlots) const; + + //! Generate ForecastPrerequistes, e.g. memory requirements + CForecastDataSink::SForecastModelPrerequisites getForecastPrerequisites() const; + + //! Generate maths models for forecasting + CForecastDataSink::SForecastResultSeries getForecastModels() const; + + //! Remove dead models, i.e. those models that have more-or-less + //! reverted back to their non-informative state. BE CAREFUL WHEN + //! CALLING THIS METHOD that you do not hold pointers to any models + //! that may be deleted as a result of this call. + virtual void pruneModels(); + + //! Reset bucket. + void resetBucket(core_t::TTime bucketStart); + + //! Release memory that is no longer needed + void releaseMemory(core_t::TTime samplingCutoffTime); + + //! Print the detector memory usage to the given stream + void showMemoryUsage(std::ostream& stream) const; + + //! Get the memory used by this detector + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + + //! Return the total memory usage + std::size_t memoryUsage() const; + + //! Get end of the last complete bucket we've observed. + const core_t::TTime& lastBucketEndTime() const; + + //! Get writable end of the last complete bucket we've observed. + core_t::TTime& lastBucketEndTime(); + + //! Access to the bucket length being used in the current models. This + //! can be used to detect discrepancies between the model config and + //! existing models. + core_t::TTime modelBucketLength() const; + + //! Get a description of this anomaly detector. + std::string description() const; - //! Updates the last sampled bucket - void updateLastSampledBucket(core_t::TTime bucketEndTime); + //! Roll time forwards to \p time. + void timeNow(core_t::TTime time); - //! Does not update the last sampled bucket. To be used - //! when interim results are calculated. - void noUpdateLastSampledBucket(core_t::TTime bucketEndTime) const; - - //! Sample the model in the interval [\p startTime, \p endTime]. - void sample(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor &resourceMonitor); + //! Rolls time to \p endTime while skipping sampling the models for buckets within the gap + //! \param[in] endTime The end of the time interval to skip sampling. + void skipSampling(core_t::TTime endTime); - //! Sample bucket statistics and any other state needed to compute - //! probabilities in the interval [\p startTime, \p endTime], but - //! does not update the model. - void sampleBucketStatistics(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor &resourceMonitor); - - //! Restores the state that was formerly part of the model ensemble class. - //! This includes the data gatherer and the model. - bool legacyModelEnsembleAcceptRestoreTraverser(const std::string &partitionFieldValue, - core::CStateRestoreTraverser &traverser); + const TModelPtr& model() const; + TModelPtr& model(); - //! Restores the state that was formerly part of the live models - //! in the model ensemble class. - bool legacyModelsAcceptRestoreTraverser(core::CStateRestoreTraverser &traverser); +protected: + //! This function is called before adding a record allowing + //! for varied preprocessing. + virtual const TStrCPtrVec& preprocessFieldValues(const TStrCPtrVec& fieldValues); - //! Persists the state that was formerly part of the model ensemble class. - //! This includes the data gatherer and the model. - void legacyModelEnsembleAcceptPersistInserter(core::CStatePersistInserter &inserter) const; + //! Initializes simple counting by adding a person called "count". + void initSimpleCounting(); - //! Persists the state that was formerly part of the live models - //! in the model ensemble class. - void legacyModelsAcceptPersistInserter(core::CStatePersistInserter &inserter) const; +private: + // Shared code for building results + template + void buildResultsHelper(core_t::TTime bucketStartTime, + core_t::TTime bucketEndTime, + SAMPLE_FUNC sampleFunc, + LAST_SAMPLED_BUCKET_UPDATE_FUNC lastSampledBucketUpdateFunc, + CHierarchicalResults& results); - protected: - //! Configurable limits - CLimits &m_Limits; + //! Updates the last sampled bucket + void updateLastSampledBucket(core_t::TTime bucketEndTime); - private: - //! An identifier for the search for which this is detecting anomalies. - int m_DetectorIndex; + //! Does not update the last sampled bucket. To be used + //! when interim results are calculated. + void noUpdateLastSampledBucket(core_t::TTime bucketEndTime) const; - //! Configurable behaviour - const CAnomalyDetectorModelConfig &m_ModelConfig; + //! Sample the model in the interval [\p startTime, \p endTime]. + void sample(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor); - //! The end of the last complete bucket we've observed. This is an OPEN - //! endpoint, i.e. this time is the lowest time NOT in the last bucket. - core_t::TTime m_LastBucketEndTime; + //! Sample bucket statistics and any other state needed to compute + //! probabilities in the interval [\p startTime, \p endTime], but + //! does not update the model. + void sampleBucketStatistics(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor); - //! The data gatherers. - TDataGathererPtr m_DataGatherer; + //! Restores the state that was formerly part of the model ensemble class. + //! This includes the data gatherer and the model. + bool legacyModelEnsembleAcceptRestoreTraverser(const std::string& partitionFieldValue, core::CStateRestoreTraverser& traverser); - //! The factory for new data gatherers and models. - TModelFactoryCPtr m_ModelFactory; + //! Restores the state that was formerly part of the live models + //! in the model ensemble class. + bool legacyModelsAcceptRestoreTraverser(core::CStateRestoreTraverser& traverser); - // The model of the data in which we are detecting anomalies. - TModelPtr m_Model; + //! Persists the state that was formerly part of the model ensemble class. + //! This includes the data gatherer and the model. + void legacyModelEnsembleAcceptPersistInserter(core::CStatePersistInserter& inserter) const; - //! Is this a cloned detector containing the bare minimum information - //! necessary to create a valid persisted state? - bool m_IsForPersistence; + //! Persists the state that was formerly part of the live models + //! in the model ensemble class. + void legacyModelsAcceptPersistInserter(core::CStatePersistInserter& inserter) const; - friend MODEL_EXPORT std::ostream &operator<<(std::ostream &, - const CAnomalyDetector &); +protected: + //! Configurable limits + CLimits& m_Limits; + +private: + //! An identifier for the search for which this is detecting anomalies. + int m_DetectorIndex; + + //! Configurable behaviour + const CAnomalyDetectorModelConfig& m_ModelConfig; + + //! The end of the last complete bucket we've observed. This is an OPEN + //! endpoint, i.e. this time is the lowest time NOT in the last bucket. + core_t::TTime m_LastBucketEndTime; + + //! The data gatherers. + TDataGathererPtr m_DataGatherer; + + //! The factory for new data gatherers and models. + TModelFactoryCPtr m_ModelFactory; + + // The model of the data in which we are detecting anomalies. + TModelPtr m_Model; + + //! Is this a cloned detector containing the bare minimum information + //! necessary to create a valid persisted state? + bool m_IsForPersistence; + + friend MODEL_EXPORT std::ostream& operator<<(std::ostream&, const CAnomalyDetector&); }; MODEL_EXPORT -std::ostream &operator<<(std::ostream &strm, const CAnomalyDetector &detector); - +std::ostream& operator<<(std::ostream& strm, const CAnomalyDetector& detector); } } diff --git a/include/model/CAnomalyDetectorModel.h b/include/model/CAnomalyDetectorModel.h index 1701766450..15bc4cb7e6 100644 --- a/include/model/CAnomalyDetectorModel.h +++ b/include/model/CAnomalyDetectorModel.h @@ -9,12 +9,12 @@ #include #include -#include #include #include +#include -#include #include +#include #include #include @@ -37,21 +37,17 @@ #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace maths -{ +namespace maths { class CMultivariatePrior; } -namespace model -{ +namespace model { class CAttributeFrequencyGreaterThan; class CInterimBucketCorrector; @@ -126,618 +122,581 @@ struct SAttributeProbability; //! //! The hierarchy is non-copyable because we don't currently need to be //! able to copy models and the "correct" copy semantics are not obvious. -class MODEL_EXPORT CAnomalyDetectorModel : private core::CNonCopyable -{ +class MODEL_EXPORT CAnomalyDetectorModel : private core::CNonCopyable { friend class CModelDetailsView; +public: + using TSizeVec = std::vector; + using TDoubleVec = std::vector; + using TDouble1Vec = core::CSmallVector; + using TDouble4Vec = core::CSmallVector; + using TDouble10Vec = core::CSmallVector; + using TDouble4Vec1Vec = core::CSmallVector; + using TDouble10Vec1Vec = core::CSmallVector; + using TDouble10Vec4Vec = core::CSmallVector; + using TDouble10Vec4Vec1Vec = core::CSmallVector; + using TDouble1VecDoublePr = std::pair; + using TDouble1VecDouble1VecPr = std::pair; + using TSizeDoublePr = std::pair; + using TSizeDoublePr1Vec = core::CSmallVector; + using TSize1Vec = core::CSmallVector; + using TSize2Vec = core::CSmallVector; + using TSize2Vec1Vec = core::CSmallVector; + using TDoubleDoublePr = std::pair; + using TDoubleDoublePrVec = std::vector; + using TSizeSizePr = std::pair; + using TStr1Vec = core::CSmallVector; + using TOptionalDouble = boost::optional; + using TOptionalDoubleVec = std::vector; + using TOptionalUInt64 = boost::optional; + using TOptionalSize = boost::optional; + using TAttributeProbability1Vec = core::CSmallVector; + using TInfluenceCalculatorCPtr = boost::shared_ptr; + using TFeatureInfluenceCalculatorCPtrPr = std::pair; + using TFeatureInfluenceCalculatorCPtrPrVec = std::vector; + using TFeatureInfluenceCalculatorCPtrPrVecVec = std::vector; + using TMultivariatePriorPtr = boost::shared_ptr; + using TFeatureMultivariatePriorPtrPr = std::pair; + using TFeatureMultivariatePriorPtrPrVec = std::vector; + using TMathsModelPtr = boost::shared_ptr; + using TMathsModelPtrVec = std::vector; + using TDataGathererPtr = boost::shared_ptr; + using TModelPtr = boost::shared_ptr; + using TModelCPtr = boost::shared_ptr; + using TCorrelationsPtr = boost::shared_ptr; + using CModelDetailsViewPtr = std::auto_ptr; + +public: + //! A value used to indicate a time variable is unset + static const core_t::TTime TIME_UNSET; + +public: + //! \name Life-cycle. + //@{ + //! \param[in] params The global configuration parameters. + //! \param[in] dataGatherer The object that gathers time series data. + //! \param[in] influenceCalculators The influence calculators to use + //! for each feature. + CAnomalyDetectorModel(const SModelParams& params, + const TDataGathererPtr& dataGatherer, + const TFeatureInfluenceCalculatorCPtrPrVecVec& influenceCalculators); + + //! Create a copy that will result in the same persisted state as the + //! original. This is effectively a copy constructor that creates a + //! copy that's only valid for a single purpose. The boolean flag is + //! redundant except to create a signature that will not be mistaken for + //! a general purpose copy constructor. + CAnomalyDetectorModel(bool isForPersistence, const CAnomalyDetectorModel& other); + + virtual ~CAnomalyDetectorModel() = default; + //@} + + //! Get a human understandable description of the model for debugging. + std::string description() const; + + //! \name Persistence + //@{ + //! Persist state by passing information to the supplied inserter. + virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const = 0; + + //! Restore the model reading state from the supplied traverser. + virtual bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) = 0; + + //! Create a clone of this model that will result in the same persisted + //! state. The clone may be incomplete in ways that do not affect the + //! persisted representation, and must not be used for any other + //! purpose. + //! \warning The caller owns the object returned. + virtual CAnomalyDetectorModel* cloneForPersistence() const = 0; + //@} + + //! Get the model category. + virtual model_t::EModelType category() const = 0; + + //! True if this is a population model. + virtual bool isPopulation() const = 0; + + //! Check if this is an event rate model. + virtual bool isEventRate() const = 0; + + //! Check if this is a metric model. + virtual bool isMetric() const = 0; + + //! \name Bucket Statistics + //!@{ + //! Get the count of the bucketing interval containing \p time + //! for the person identified by \p pid. + //! + //! \param[in] pid The identifier of the person of interest. + //! \param[in] time The time of interest. + //! \return The count in the bucketing interval at \p time for the + //! person identified by \p pid if available and null otherwise. + virtual TOptionalUInt64 currentBucketCount(std::size_t pid, core_t::TTime time) const = 0; + + //! Get the mean count of the person identified by \p pid in the + //! reference data set (for comparison). + //! + //! \param[in] pid The identifier of the person of interest. + virtual TOptionalDouble baselineBucketCount(std::size_t pid) const = 0; + + //! Get the bucket value of \p feature for the person identified + //! by \p pid and the attribute identified by \p cid in the + //! bucketing interval including \p time. + //! + //! \param[in] feature The feature of interest. + //! \param[in] pid The identifier of the person of interest. + //! \param[in] cid The identifier of the attribute of interest. + //! \param[in] time The time of interest. + //! \return The value of \p feature in the bucket containing + //! \p time if available and empty otherwise. + virtual TDouble1Vec currentBucketValue(model_t::EFeature feature, std::size_t pid, std::size_t cid, core_t::TTime time) const = 0; + + //! Get the appropriate baseline bucket value of \p feature for + //! the person identified by \p pid and the attribute identified + //! by \p cid as of the start of the current bucketing interval. + //! This has subtly different meanings dependent on the model. + //! + //! \param[in] feature The feature of interest. + //! \param[in] pid The identifier of the person of interest. + //! \param[in] cid The identifier of the attribute of interest. + //! \param[in] type A description of the type of result for which + //! to get the baseline. See CResultType for more details. + //! \param[in] correlated The correlated series' identifiers and + //! their values if any. + //! \param[in] time The time of interest. + //! \return The baseline mean value of \p feature if available + //! and empty otherwise. + virtual TDouble1Vec baselineBucketMean(model_t::EFeature feature, + std::size_t pid, + std::size_t cid, + model_t::CResultType type, + const TSizeDoublePr1Vec& correlated, + core_t::TTime time) const = 0; + + //! Check if bucket statistics are available for the specified time. + virtual bool bucketStatsAvailable(core_t::TTime time) const = 0; + //@} + + //! \name Person + //@{ + //! Get the name of the person identified by \p pid. This returns + //! a default fallback string if the person doesn't exist. + const std::string& personName(std::size_t pid) const; + + //! As above but with a specified fallback. + const std::string& personName(std::size_t pid, const std::string& fallback) const; + + //! Print the people identified by \p pids. + //! Optionally, this may be limited to return a string of the form: + //! A B C and n others + std::string printPeople(const TSizeVec& pids, size_t limit = std::numeric_limits::max()) const; + + //! Get the person unique identifiers which have a feature value + //! in the bucketing time interval including \p time. + //! + //! \param[in] time The time of interest. + //! \param[out] result Filled in with the person identifiers + //! in the bucketing time interval of interest. + virtual void currentBucketPersonIds(core_t::TTime time, TSizeVec& result) const = 0; + + // TODO this needs to be renamed to numberOfActivePeople, and + // the places where it is used carefully checked + // (currently only CModelInspector) + //! Get the total number of people currently being modeled. + std::size_t numberOfPeople() const; + //@} + + //! \name Attribute + //@{ + //! Get the name of the attribute identified by \p cid. This returns + //! a default fallback string if the attribute doesn't exist. + //! + //! \param[in] cid The identifier of the attribute of interest. + const std::string& attributeName(std::size_t cid) const; + + //! As above but with a specified fallback. + const std::string& attributeName(std::size_t cid, const std::string& fallback) const; + + //! Print the attributes identified by \p cids. + //! Optionally, this may be limited to return a string of the form: + //! A B C and n others + std::string printAttributes(const TSizeVec& cids, size_t limit = std::numeric_limits::max()) const; + //@} + + //! \name Update + //@{ + //! This samples the bucket statistics, and any state needed + //! by computeProbablity, in the time interval [\p startTime, + //! \p endTime], but does not update the model. This is needed + //! by the results preview. + //! + //! \param[in] startTime The start of the time interval to sample. + //! \param[in] endTime The end of the time interval to sample. + virtual void sampleBucketStatistics(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) = 0; + + //! Update the model with the samples of the process in the + //! time interval [\p startTime, \p endTime]. + //! + //! \param[in] startTime The start of the time interval to sample. + //! \param[in] endTime The end of the time interval to sample. + //! \param[in] resourceMonitor The resourceMonitor. + virtual void sample(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) = 0; + + //! This samples the bucket statistics, and any state needed + //! by computeProbablity, in the time interval [\p startTime, + //! \p endTime], but does not update the model. This is needed + //! by the results preview. + //! + //! \param[in] startTime The start of the time interval to sample. + //! \param[in] endTime The end of the time interval to sample. + virtual void sampleOutOfPhase(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) = 0; + + //! Rolls time to \p endTime while skipping sampling the models for + //! buckets within the gap. + //! + //! \param[in] endTime The end of the time interval to skip sampling. + void skipSampling(core_t::TTime endTime); + + //! Prune any person models which haven't been updated for a + //! specified period. + virtual void prune(std::size_t maximumAge) = 0; + + //! Prune any person models which haven't been updated for a + //! sufficiently long period, based on the prior decay rates. + void prune(); + + //! Calculate the maximum permitted prune window for this model + std::size_t defaultPruneWindow() const; + + //! Calculate the minimum permitted prune window for this model + std::size_t minimumPruneWindow() const; + //@} + + //! \name Probability + //@{ + //! Compute the probability of seeing the samples of the process + //! for the person identified by \p pid in the time interval + //! [\p startTime, \p endTime]. + //! + //! \param[in] pid The unique identifier of the person of interest. + //! \param[in] startTime The start of the time interval of interest. + //! \param[in] endTime The end of the time interval of interest. + //! \param[in] partitioningFields The partitioning field (name, value) + //! pairs for which to compute the the probability. + //! \param[in] numberAttributeProbabilities The maximum number of + //! attribute probabilities to retrieve. + //! \param[out] result A structure containing the probability, + //! the smallest \p numberAttributeProbabilities attribute + //! probabilities, the influences and any extra descriptive data. + virtual bool computeProbability(std::size_t pid, + core_t::TTime startTime, + core_t::TTime endTime, + CPartitioningFields& partitioningFields, + std::size_t numberAttributeProbabilities, + SAnnotatedProbability& result) const = 0; + + //! Update the results with this model's probability. + //! + //! \param[in] detector An identifier of the detector generating this + //! result. + //! \param[in] startTime The start of the time interval of interest. + //! \param[in] endTime The end of the time interval of interest. + //! \param[in] numberAttributeProbabilities The maximum number of + //! attribute probabilities to retrieve. + //! \param[in,out] results The model results are added. + bool addResults(int detector, + core_t::TTime startTime, + core_t::TTime endTime, + std::size_t numberAttributeProbabilities, + CHierarchicalResults& results) const; + + //! Compute the probability of seeing \p person's attribute processes + //! so far given the population distributions. + //! + //! \param[in] person The person of interest. + //! \param[in] numberAttributeProbabilities The maximum number of + //! attribute probabilities to retrieve. + //! \param[out] probability Filled in with the probability of seeing + //! the person's processes given the population processes. + //! \param[out] attributeProbabilities Filled in with the smallest + //! \p numberAttributeProbabilities attribute probabilities and + //! associated data describing the calculation. + virtual bool computeTotalProbability(const std::string& person, + std::size_t numberAttributeProbabilities, + TOptionalDouble& probability, + TAttributeProbability1Vec& attributeProbabilities) const = 0; + //@} + + //! Get the checksum of this model. + //! + //! \param[in] includeCurrentBucketStats If true then include + //! the current bucket statistics. (This is designed to handle + //! serialization, for which we don't serialize the current + //! bucket statistics.) + virtual uint64_t checksum(bool includeCurrentBucketStats = true) const = 0; + + //! Get the memory used by this model + virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const = 0; + + //! Get the memory used by this model + virtual std::size_t memoryUsage() const = 0; + + //! Estimate the memory usage of the model based on number of people, + //! attributes and correlations. Returns empty when the estimator + //! is unable to produce an estimate. + TOptionalSize estimateMemoryUsage(std::size_t numberPeople, std::size_t numberAttributes, std::size_t numberCorrelations) const; + + //! Estimate the memory usage of the model based on number of people, + //! attributes and correlations. When an estimate cannot be produced, + //! the memory usage is computed and the estimator is updated. + std::size_t + estimateMemoryUsageOrComputeAndUpdate(std::size_t numberPeople, std::size_t numberAttributes, std::size_t numberCorrelations); + + //! Get the static size of this object - used for virtual hierarchies + virtual std::size_t staticSize() const = 0; + + //! Get the time series data gatherer. + const CDataGatherer& dataGatherer() const; + //! Get the time series data gatherer. + CDataGatherer& dataGatherer(); + + //! Get the length of the time interval used to aggregate data. + core_t::TTime bucketLength() const; + + //! Get a view of the internals of the model for visualization. + virtual CModelDetailsViewPtr details() const = 0; + + //! Get the frequency of the person identified by \p pid. + double personFrequency(std::size_t pid) const; + //! Get the frequency of the attribute identified by \p cid. + virtual double attributeFrequency(std::size_t cid) const = 0; + + //! Returns true if the the \p is an unset first bucket time + static bool isTimeUnset(core_t::TTime); + + //! Get the descriptions of any occurring scheduled event descriptions for the bucket time + virtual const TStr1Vec& scheduledEventDescriptions(core_t::TTime time) const; + +protected: + using TStrCRef = boost::reference_wrapper; + using TSizeSize1VecUMap = boost::unordered_map; + using TFeatureSizeSize1VecUMapPr = std::pair; + using TFeatureSizeSize1VecUMapPrVec = std::vector; + + //! \brief The feature models. + struct MODEL_EXPORT SFeatureModels { + SFeatureModels(model_t::EFeature feature, TMathsModelPtr newModel); + + //! Restore the models reading state from \p traverser. + bool acceptRestoreTraverser(const SModelParams& params, core::CStateRestoreTraverser& traverser); + //! Persist the models passing state to \p inserter. + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Debug the memory used by this model. + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + //! Get the memory used by this model. + std::size_t memoryUsage() const; + + //! The feature. + model_t::EFeature s_Feature; + //! A prototype model. + TMathsModelPtr s_NewModel; + //! The person models. + TMathsModelPtrVec s_Models; + }; + using TFeatureModelsVec = std::vector; + + //! \brief The feature correlate models. + struct MODEL_EXPORT SFeatureCorrelateModels { + SFeatureCorrelateModels(model_t::EFeature feature, TMultivariatePriorPtr modelPrior, TCorrelationsPtr model); + + //! Restore the models reading state from \p traverser. + bool acceptRestoreTraverser(const SModelParams& params, core::CStateRestoreTraverser& traverser); + //! Persist the models passing state to \p inserter. + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Debug the memory used by this model. + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + //! Get the memory used by this model. + std::size_t memoryUsage() const; + + //! The feature. + model_t::EFeature s_Feature; + //! The prototype prior for a correlate model. + TMultivariatePriorPtr s_ModelPrior; + //! The correlate models. + TCorrelationsPtr s_Models; + }; + using TFeatureCorrelateModelsVec = std::vector; + + //! \brief Implements the allocator for new correlate priors. + class CTimeSeriesCorrelateModelAllocator : public maths::CTimeSeriesCorrelateModelAllocator { public: - using TSizeVec = std::vector; - using TDoubleVec = std::vector; - using TDouble1Vec = core::CSmallVector; - using TDouble4Vec = core::CSmallVector; - using TDouble10Vec = core::CSmallVector; - using TDouble4Vec1Vec = core::CSmallVector; - using TDouble10Vec1Vec = core::CSmallVector; - using TDouble10Vec4Vec = core::CSmallVector; - using TDouble10Vec4Vec1Vec = core::CSmallVector; - using TDouble1VecDoublePr = std::pair; - using TDouble1VecDouble1VecPr = std::pair; - using TSizeDoublePr = std::pair; - using TSizeDoublePr1Vec = core::CSmallVector; - using TSize1Vec = core::CSmallVector; - using TSize2Vec = core::CSmallVector; - using TSize2Vec1Vec = core::CSmallVector; - using TDoubleDoublePr = std::pair; - using TDoubleDoublePrVec = std::vector; - using TSizeSizePr = std::pair; - using TStr1Vec = core::CSmallVector; - using TOptionalDouble = boost::optional; - using TOptionalDoubleVec = std::vector; - using TOptionalUInt64 = boost::optional; - using TOptionalSize = boost::optional; - using TAttributeProbability1Vec = core::CSmallVector; - using TInfluenceCalculatorCPtr = boost::shared_ptr; - using TFeatureInfluenceCalculatorCPtrPr = std::pair; - using TFeatureInfluenceCalculatorCPtrPrVec = std::vector; - using TFeatureInfluenceCalculatorCPtrPrVecVec = std::vector; - using TMultivariatePriorPtr = boost::shared_ptr; - using TFeatureMultivariatePriorPtrPr = std::pair; - using TFeatureMultivariatePriorPtrPrVec = std::vector; - using TMathsModelPtr = boost::shared_ptr; - using TMathsModelPtrVec = std::vector; - using TDataGathererPtr = boost::shared_ptr; - using TModelPtr = boost::shared_ptr; - using TModelCPtr = boost::shared_ptr; - using TCorrelationsPtr = boost::shared_ptr; - using CModelDetailsViewPtr = std::auto_ptr; + using TMemoryUsage = std::function; public: - //! A value used to indicate a time variable is unset - static const core_t::TTime TIME_UNSET; + CTimeSeriesCorrelateModelAllocator(CResourceMonitor& resourceMonitor, + TMemoryUsage memoryUsage, + std::size_t resourceLimit, + std::size_t maxNumberCorrelations); - public: - //! \name Life-cycle. - //@{ - //! \param[in] params The global configuration parameters. - //! \param[in] dataGatherer The object that gathers time series data. - //! \param[in] influenceCalculators The influence calculators to use - //! for each feature. - CAnomalyDetectorModel(const SModelParams ¶ms, - const TDataGathererPtr &dataGatherer, - const TFeatureInfluenceCalculatorCPtrPrVecVec &influenceCalculators); - - //! Create a copy that will result in the same persisted state as the - //! original. This is effectively a copy constructor that creates a - //! copy that's only valid for a single purpose. The boolean flag is - //! redundant except to create a signature that will not be mistaken for - //! a general purpose copy constructor. - CAnomalyDetectorModel(bool isForPersistence, const CAnomalyDetectorModel &other); - - virtual ~CAnomalyDetectorModel() = default; - //@} - - //! Get a human understandable description of the model for debugging. - std::string description() const; - - //! \name Persistence - //@{ - //! Persist state by passing information to the supplied inserter. - virtual void acceptPersistInserter(core::CStatePersistInserter &inserter) const = 0; - - //! Restore the model reading state from the supplied traverser. - virtual bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) = 0; - - //! Create a clone of this model that will result in the same persisted - //! state. The clone may be incomplete in ways that do not affect the - //! persisted representation, and must not be used for any other - //! purpose. - //! \warning The caller owns the object returned. - virtual CAnomalyDetectorModel *cloneForPersistence() const = 0; - //@} - - //! Get the model category. - virtual model_t::EModelType category() const = 0; - - //! True if this is a population model. - virtual bool isPopulation() const = 0; - - //! Check if this is an event rate model. - virtual bool isEventRate() const = 0; - - //! Check if this is a metric model. - virtual bool isMetric() const = 0; - - //! \name Bucket Statistics - //!@{ - //! Get the count of the bucketing interval containing \p time - //! for the person identified by \p pid. - //! - //! \param[in] pid The identifier of the person of interest. - //! \param[in] time The time of interest. - //! \return The count in the bucketing interval at \p time for the - //! person identified by \p pid if available and null otherwise. - virtual TOptionalUInt64 currentBucketCount(std::size_t pid, - core_t::TTime time) const = 0; - - //! Get the mean count of the person identified by \p pid in the - //! reference data set (for comparison). - //! - //! \param[in] pid The identifier of the person of interest. - virtual TOptionalDouble baselineBucketCount(std::size_t pid) const = 0; - - //! Get the bucket value of \p feature for the person identified - //! by \p pid and the attribute identified by \p cid in the - //! bucketing interval including \p time. - //! - //! \param[in] feature The feature of interest. - //! \param[in] pid The identifier of the person of interest. - //! \param[in] cid The identifier of the attribute of interest. - //! \param[in] time The time of interest. - //! \return The value of \p feature in the bucket containing - //! \p time if available and empty otherwise. - virtual TDouble1Vec currentBucketValue(model_t::EFeature feature, - std::size_t pid, - std::size_t cid, - core_t::TTime time) const = 0; - - //! Get the appropriate baseline bucket value of \p feature for - //! the person identified by \p pid and the attribute identified - //! by \p cid as of the start of the current bucketing interval. - //! This has subtly different meanings dependent on the model. - //! - //! \param[in] feature The feature of interest. - //! \param[in] pid The identifier of the person of interest. - //! \param[in] cid The identifier of the attribute of interest. - //! \param[in] type A description of the type of result for which - //! to get the baseline. See CResultType for more details. - //! \param[in] correlated The correlated series' identifiers and - //! their values if any. - //! \param[in] time The time of interest. - //! \return The baseline mean value of \p feature if available - //! and empty otherwise. - virtual TDouble1Vec baselineBucketMean(model_t::EFeature feature, - std::size_t pid, - std::size_t cid, - model_t::CResultType type, - const TSizeDoublePr1Vec &correlated, - core_t::TTime time) const = 0; - - //! Check if bucket statistics are available for the specified time. - virtual bool bucketStatsAvailable(core_t::TTime time) const = 0; - //@} - - //! \name Person - //@{ - //! Get the name of the person identified by \p pid. This returns - //! a default fallback string if the person doesn't exist. - const std::string &personName(std::size_t pid) const; - - //! As above but with a specified fallback. - const std::string &personName(std::size_t pid, const std::string &fallback) const; - - //! Print the people identified by \p pids. - //! Optionally, this may be limited to return a string of the form: - //! A B C and n others - std::string printPeople(const TSizeVec &pids, - size_t limit = std::numeric_limits::max()) const; - - //! Get the person unique identifiers which have a feature value - //! in the bucketing time interval including \p time. - //! - //! \param[in] time The time of interest. - //! \param[out] result Filled in with the person identifiers - //! in the bucketing time interval of interest. - virtual void currentBucketPersonIds(core_t::TTime time, TSizeVec &result) const = 0; - - // TODO this needs to be renamed to numberOfActivePeople, and - // the places where it is used carefully checked - // (currently only CModelInspector) - //! Get the total number of people currently being modeled. - std::size_t numberOfPeople() const; - //@} - - //! \name Attribute - //@{ - //! Get the name of the attribute identified by \p cid. This returns - //! a default fallback string if the attribute doesn't exist. - //! - //! \param[in] cid The identifier of the attribute of interest. - const std::string &attributeName(std::size_t cid) const; - - //! As above but with a specified fallback. - const std::string &attributeName(std::size_t cid, - const std::string &fallback) const; - - //! Print the attributes identified by \p cids. - //! Optionally, this may be limited to return a string of the form: - //! A B C and n others - std::string printAttributes(const TSizeVec &cids, - size_t limit = std::numeric_limits::max()) const; - //@} - - //! \name Update - //@{ - //! This samples the bucket statistics, and any state needed - //! by computeProbablity, in the time interval [\p startTime, - //! \p endTime], but does not update the model. This is needed - //! by the results preview. - //! - //! \param[in] startTime The start of the time interval to sample. - //! \param[in] endTime The end of the time interval to sample. - virtual void sampleBucketStatistics(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor &resourceMonitor) = 0; - - //! Update the model with the samples of the process in the - //! time interval [\p startTime, \p endTime]. - //! - //! \param[in] startTime The start of the time interval to sample. - //! \param[in] endTime The end of the time interval to sample. - //! \param[in] resourceMonitor The resourceMonitor. - virtual void sample(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor &resourceMonitor) = 0; - - //! This samples the bucket statistics, and any state needed - //! by computeProbablity, in the time interval [\p startTime, - //! \p endTime], but does not update the model. This is needed - //! by the results preview. - //! - //! \param[in] startTime The start of the time interval to sample. - //! \param[in] endTime The end of the time interval to sample. - virtual void sampleOutOfPhase(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor &resourceMonitor) = 0; - - //! Rolls time to \p endTime while skipping sampling the models for - //! buckets within the gap. - //! - //! \param[in] endTime The end of the time interval to skip sampling. - void skipSampling(core_t::TTime endTime); - - //! Prune any person models which haven't been updated for a - //! specified period. - virtual void prune(std::size_t maximumAge) = 0; - - //! Prune any person models which haven't been updated for a - //! sufficiently long period, based on the prior decay rates. - void prune(); - - //! Calculate the maximum permitted prune window for this model - std::size_t defaultPruneWindow() const; - - //! Calculate the minimum permitted prune window for this model - std::size_t minimumPruneWindow() const; - //@} - - //! \name Probability - //@{ - //! Compute the probability of seeing the samples of the process - //! for the person identified by \p pid in the time interval - //! [\p startTime, \p endTime]. - //! - //! \param[in] pid The unique identifier of the person of interest. - //! \param[in] startTime The start of the time interval of interest. - //! \param[in] endTime The end of the time interval of interest. - //! \param[in] partitioningFields The partitioning field (name, value) - //! pairs for which to compute the the probability. - //! \param[in] numberAttributeProbabilities The maximum number of - //! attribute probabilities to retrieve. - //! \param[out] result A structure containing the probability, - //! the smallest \p numberAttributeProbabilities attribute - //! probabilities, the influences and any extra descriptive data. - virtual bool computeProbability(std::size_t pid, - core_t::TTime startTime, - core_t::TTime endTime, - CPartitioningFields &partitioningFields, - std::size_t numberAttributeProbabilities, - SAnnotatedProbability &result) const = 0; - - //! Update the results with this model's probability. - //! - //! \param[in] detector An identifier of the detector generating this - //! result. - //! \param[in] startTime The start of the time interval of interest. - //! \param[in] endTime The end of the time interval of interest. - //! \param[in] numberAttributeProbabilities The maximum number of - //! attribute probabilities to retrieve. - //! \param[in,out] results The model results are added. - bool addResults(int detector, - core_t::TTime startTime, - core_t::TTime endTime, - std::size_t numberAttributeProbabilities, - CHierarchicalResults &results) const; - - //! Compute the probability of seeing \p person's attribute processes - //! so far given the population distributions. - //! - //! \param[in] person The person of interest. - //! \param[in] numberAttributeProbabilities The maximum number of - //! attribute probabilities to retrieve. - //! \param[out] probability Filled in with the probability of seeing - //! the person's processes given the population processes. - //! \param[out] attributeProbabilities Filled in with the smallest - //! \p numberAttributeProbabilities attribute probabilities and - //! associated data describing the calculation. - virtual bool computeTotalProbability(const std::string &person, - std::size_t numberAttributeProbabilities, - TOptionalDouble &probability, - TAttributeProbability1Vec &attributeProbabilities) const = 0; - //@} - - //! Get the checksum of this model. - //! - //! \param[in] includeCurrentBucketStats If true then include - //! the current bucket statistics. (This is designed to handle - //! serialization, for which we don't serialize the current - //! bucket statistics.) - virtual uint64_t checksum(bool includeCurrentBucketStats = true) const = 0; - - //! Get the memory used by this model - virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const = 0; - - //! Get the memory used by this model - virtual std::size_t memoryUsage() const = 0; - - //! Estimate the memory usage of the model based on number of people, - //! attributes and correlations. Returns empty when the estimator - //! is unable to produce an estimate. - TOptionalSize estimateMemoryUsage(std::size_t numberPeople, - std::size_t numberAttributes, - std::size_t numberCorrelations) const; - - //! Estimate the memory usage of the model based on number of people, - //! attributes and correlations. When an estimate cannot be produced, - //! the memory usage is computed and the estimator is updated. - std::size_t estimateMemoryUsageOrComputeAndUpdate(std::size_t numberPeople, - std::size_t numberAttributes, - std::size_t numberCorrelations); - - //! Get the static size of this object - used for virtual hierarchies - virtual std::size_t staticSize() const = 0; - - //! Get the time series data gatherer. - const CDataGatherer &dataGatherer() const; - //! Get the time series data gatherer. - CDataGatherer &dataGatherer(); - - //! Get the length of the time interval used to aggregate data. - core_t::TTime bucketLength() const; - - //! Get a view of the internals of the model for visualization. - virtual CModelDetailsViewPtr details() const = 0; - - //! Get the frequency of the person identified by \p pid. - double personFrequency(std::size_t pid) const; - //! Get the frequency of the attribute identified by \p cid. - virtual double attributeFrequency(std::size_t cid) const = 0; - - //! Returns true if the the \p is an unset first bucket time - static bool isTimeUnset(core_t::TTime); - - //! Get the descriptions of any occurring scheduled event descriptions for the bucket time - virtual const TStr1Vec &scheduledEventDescriptions(core_t::TTime time) const; - - protected: - using TStrCRef = boost::reference_wrapper; - using TSizeSize1VecUMap = boost::unordered_map; - using TFeatureSizeSize1VecUMapPr = std::pair; - using TFeatureSizeSize1VecUMapPrVec = std::vector; - - //! \brief The feature models. - struct MODEL_EXPORT SFeatureModels - { - SFeatureModels(model_t::EFeature feature, TMathsModelPtr newModel); - - //! Restore the models reading state from \p traverser. - bool acceptRestoreTraverser(const SModelParams ¶ms, - core::CStateRestoreTraverser &traverser); - //! Persist the models passing state to \p inserter. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - - //! Debug the memory used by this model. - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - //! Get the memory used by this model. - std::size_t memoryUsage() const; - - //! The feature. - model_t::EFeature s_Feature; - //! A prototype model. - TMathsModelPtr s_NewModel; - //! The person models. - TMathsModelPtrVec s_Models; - }; - using TFeatureModelsVec = std::vector; - - //! \brief The feature correlate models. - struct MODEL_EXPORT SFeatureCorrelateModels - { - SFeatureCorrelateModels(model_t::EFeature feature, - TMultivariatePriorPtr modelPrior, - TCorrelationsPtr model); - - //! Restore the models reading state from \p traverser. - bool acceptRestoreTraverser(const SModelParams ¶ms, - core::CStateRestoreTraverser &traverser); - //! Persist the models passing state to \p inserter. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - - //! Debug the memory used by this model. - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - //! Get the memory used by this model. - std::size_t memoryUsage() const; - - //! The feature. - model_t::EFeature s_Feature; - //! The prototype prior for a correlate model. - TMultivariatePriorPtr s_ModelPrior; - //! The correlate models. - TCorrelationsPtr s_Models; - }; - using TFeatureCorrelateModelsVec = std::vector; - - //! \brief Implements the allocator for new correlate priors. - class CTimeSeriesCorrelateModelAllocator : public maths::CTimeSeriesCorrelateModelAllocator - { - public: - using TMemoryUsage = std::function; - - public: - CTimeSeriesCorrelateModelAllocator(CResourceMonitor &resourceMonitor, - TMemoryUsage memoryUsage, - std::size_t resourceLimit, - std::size_t maxNumberCorrelations); - - //! Check if we can still allocate any correlations. - virtual bool areAllocationsAllowed() const; - - //! Check if \p correlations exceeds the memory limit. - virtual bool exceedsLimit(std::size_t correlations) const; - - //! Get the maximum number of correlations we should model. - virtual std::size_t maxNumberCorrelations() const; - - //! Get the chunk size in which to allocate correlations. - virtual std::size_t chunkSize() const; - - //! Create a new prior for a correlation model. - virtual TMultivariatePriorPtr newPrior() const; - - //! Set the prototype prior. - void prototypePrior(const TMultivariatePriorPtr &prior); - - private: - //! The global resource monitor. - CResourceMonitor *m_ResourceMonitor; - //! Computes the current memory usage. - TMemoryUsage m_MemoryUsage; - //! The number of correlations which can still be modeled. - std::size_t m_ResourceLimit; - //! The maximum permitted number of correlations which can be modeled. - std::size_t m_MaxNumberCorrelations; - //! The prototype correlate prior. - TMultivariatePriorPtr m_PrototypePrior; - }; - - protected: - //! The maximum time a person or attribute is allowed to live - //! without update. - static const std::size_t MAXIMUM_PERMITTED_AGE; - - //! Convenience for persistence. - static const std::string EMPTY_STRING; - - protected: - //! Remove heavy hitting people from the \p data if necessary. - template - void applyFilter(model_t::EExcludeFrequent exclude, - bool updateStatistics, - const FILTER &filter, - T &data) const - { - if (this->params().s_ExcludeFrequent & exclude) - { - std::size_t initialSize = data.size(); - data.erase(std::remove_if(data.begin(), data.end(), filter), data.end()); - if (updateStatistics && data.size() != initialSize) - { - core::CStatistics::stat(stat_t::E_NumberExcludedFrequentInvocations).increment(1); - } + //! Check if we can still allocate any correlations. + virtual bool areAllocationsAllowed() const; + + //! Check if \p correlations exceeds the memory limit. + virtual bool exceedsLimit(std::size_t correlations) const; + + //! Get the maximum number of correlations we should model. + virtual std::size_t maxNumberCorrelations() const; + + //! Get the chunk size in which to allocate correlations. + virtual std::size_t chunkSize() const; + + //! Create a new prior for a correlation model. + virtual TMultivariatePriorPtr newPrior() const; + + //! Set the prototype prior. + void prototypePrior(const TMultivariatePriorPtr& prior); + + private: + //! The global resource monitor. + CResourceMonitor* m_ResourceMonitor; + //! Computes the current memory usage. + TMemoryUsage m_MemoryUsage; + //! The number of correlations which can still be modeled. + std::size_t m_ResourceLimit; + //! The maximum permitted number of correlations which can be modeled. + std::size_t m_MaxNumberCorrelations; + //! The prototype correlate prior. + TMultivariatePriorPtr m_PrototypePrior; + }; + +protected: + //! The maximum time a person or attribute is allowed to live + //! without update. + static const std::size_t MAXIMUM_PERMITTED_AGE; + + //! Convenience for persistence. + static const std::string EMPTY_STRING; + +protected: + //! Remove heavy hitting people from the \p data if necessary. + template + void applyFilter(model_t::EExcludeFrequent exclude, bool updateStatistics, const FILTER& filter, T& data) const { + if (this->params().s_ExcludeFrequent & exclude) { + std::size_t initialSize = data.size(); + data.erase(std::remove_if(data.begin(), data.end(), filter), data.end()); + if (updateStatistics && data.size() != initialSize) { + core::CStatistics::stat(stat_t::E_NumberExcludedFrequentInvocations).increment(1); } } + } - //! Get the predicate used for removing heavy hitting people. - CPersonFrequencyGreaterThan personFilter() const; + //! Get the predicate used for removing heavy hitting people. + CPersonFrequencyGreaterThan personFilter() const; - //! Get the predicate used for removing heavy hitting attributes. - CAttributeFrequencyGreaterThan attributeFilter() const; + //! Get the predicate used for removing heavy hitting attributes. + CAttributeFrequencyGreaterThan attributeFilter() const; - //! Get the global configuration parameters. - const SModelParams ¶ms() const; + //! Get the global configuration parameters. + const SModelParams& params() const; - //! Get the LearnRate parameter from the model configuration - - //! this may be affected by the current feature being used - virtual double learnRate(model_t::EFeature feature) const; + //! Get the LearnRate parameter from the model configuration - + //! this may be affected by the current feature being used + virtual double learnRate(model_t::EFeature feature) const; - //! Get the start time of the current bucket. - virtual core_t::TTime currentBucketStartTime() const = 0; + //! Get the start time of the current bucket. + virtual core_t::TTime currentBucketStartTime() const = 0; - //! Set the start time of the current bucket. - virtual void currentBucketStartTime(core_t::TTime time) = 0; + //! Set the start time of the current bucket. + virtual void currentBucketStartTime(core_t::TTime time) = 0; - //! Get the influence calculator for the influencer field identified - //! by \p iid and the \p feature. - const CInfluenceCalculator *influenceCalculator(model_t::EFeature feature, - std::size_t iid) const; + //! Get the influence calculator for the influencer field identified + //! by \p iid and the \p feature. + const CInfluenceCalculator* influenceCalculator(model_t::EFeature feature, std::size_t iid) const; - //! Get the person bucket counts. - const TDoubleVec &personBucketCounts() const; - //! Writable access to the person bucket counts. - TDoubleVec &personBucketCounts(); - //! Set the total count of buckets in the window. - void windowBucketCount(double windowBucketCount); - //! Get the total count of buckets in the window. - double windowBucketCount() const; + //! Get the person bucket counts. + const TDoubleVec& personBucketCounts() const; + //! Writable access to the person bucket counts. + TDoubleVec& personBucketCounts(); + //! Set the total count of buckets in the window. + void windowBucketCount(double windowBucketCount); + //! Get the total count of buckets in the window. + double windowBucketCount() const; - //! Create the time series models for "n" newly observed people - //! and "m" newly observed attributes. - virtual void createNewModels(std::size_t n, std::size_t m) = 0; + //! Create the time series models for "n" newly observed people + //! and "m" newly observed attributes. + virtual void createNewModels(std::size_t n, std::size_t m) = 0; - //! Reinitialize the time series models for recycled people and/or - //! attributes. - virtual void updateRecycledModels() = 0; + //! Reinitialize the time series models for recycled people and/or + //! attributes. + virtual void updateRecycledModels() = 0; - //! Clear out large state objects for people/attributes that are pruned - virtual void clearPrunedResources(const TSizeVec &people, - const TSizeVec &attributes) = 0; + //! Clear out large state objects for people/attributes that are pruned + virtual void clearPrunedResources(const TSizeVec& people, const TSizeVec& attributes) = 0; - //! Get the objects which calculates corrections for interim buckets. - const CInterimBucketCorrector &interimValueCorrector() const; + //! Get the objects which calculates corrections for interim buckets. + const CInterimBucketCorrector& interimValueCorrector() const; - //! Check if any of the sample-filtering detection rules apply to this series. - bool shouldIgnoreSample(model_t::EFeature feature, - std::size_t pid, - std::size_t cid, - core_t::TTime time) const; + //! Check if any of the sample-filtering detection rules apply to this series. + bool shouldIgnoreSample(model_t::EFeature feature, std::size_t pid, std::size_t cid, core_t::TTime time) const; - //! Check if any of the result-filtering detection rules apply to this series. - bool shouldIgnoreResult(model_t::EFeature feature, - const model_t::CResultType &resultType, - std::size_t pid, - std::size_t cid, - core_t::TTime time) const; + //! Check if any of the result-filtering detection rules apply to this series. + bool shouldIgnoreResult(model_t::EFeature feature, + const model_t::CResultType& resultType, + std::size_t pid, + std::size_t cid, + core_t::TTime time) const; - //! Get the non-estimated value of the the memory used by this model. - virtual std::size_t computeMemoryUsage() const = 0; + //! Get the non-estimated value of the the memory used by this model. + virtual std::size_t computeMemoryUsage() const = 0; - //! Restore interim bucket corrector. - bool interimBucketCorrectorAcceptRestoreTraverser(core::CStateRestoreTraverser &traverser); + //! Restore interim bucket corrector. + bool interimBucketCorrectorAcceptRestoreTraverser(core::CStateRestoreTraverser& traverser); - //! Persist the interim bucket corrector. - void interimBucketCorrectorAcceptPersistInserter(const std::string &tag, - core::CStatePersistInserter &inserter) const; + //! Persist the interim bucket corrector. + void interimBucketCorrectorAcceptPersistInserter(const std::string& tag, core::CStatePersistInserter& inserter) const; - //! Create a stub version of maths::CModel for use when pruning people - //! or attributes to free memory resource. - static maths::CModel *tinyModel(); + //! Create a stub version of maths::CModel for use when pruning people + //! or attributes to free memory resource. + static maths::CModel* tinyModel(); - private: - using TModelParamsCRef = boost::reference_wrapper; - using TInterimBucketCorrectorPtr = boost::shared_ptr; +private: + using TModelParamsCRef = boost::reference_wrapper; + using TInterimBucketCorrectorPtr = boost::shared_ptr; - private: - //! Set the current bucket total count. - virtual void currentBucketTotalCount(uint64_t totalCount) = 0; +private: + //! Set the current bucket total count. + virtual void currentBucketTotalCount(uint64_t totalCount) = 0; - //! Skip sampling the interval \p endTime - \p startTime. - virtual void doSkipSampling(core_t::TTime startTime, core_t::TTime endTime) = 0; + //! Skip sampling the interval \p endTime - \p startTime. + virtual void doSkipSampling(core_t::TTime startTime, core_t::TTime endTime) = 0; - //! Get the model memory usage estimator - virtual CMemoryUsageEstimator *memoryUsageEstimator() const = 0; + //! Get the model memory usage estimator + virtual CMemoryUsageEstimator* memoryUsageEstimator() const = 0; - private: - //! The global configuration parameters. - TModelParamsCRef m_Params; +private: + //! The global configuration parameters. + TModelParamsCRef m_Params; - //! The data gatherer. (This is not persisted by the model hierarchy.) - TDataGathererPtr m_DataGatherer; + //! The data gatherer. (This is not persisted by the model hierarchy.) + TDataGathererPtr m_DataGatherer; - //! The bucket count of each person in the exponentially decaying - //! window with decay rate equal to m_DecayRate. - TDoubleVec m_PersonBucketCounts; + //! The bucket count of each person in the exponentially decaying + //! window with decay rate equal to m_DecayRate. + TDoubleVec m_PersonBucketCounts; - //! The total number of buckets in the exponentially decaying window - //! with decay rate equal to m_DecayRate. - double m_BucketCount; + //! The total number of buckets in the exponentially decaying window + //! with decay rate equal to m_DecayRate. + double m_BucketCount; - //! The influence calculators to use for each feature which is being - //! modeled. - TFeatureInfluenceCalculatorCPtrPrVecVec m_InfluenceCalculators; + //! The influence calculators to use for each feature which is being + //! modeled. + TFeatureInfluenceCalculatorCPtrPrVecVec m_InfluenceCalculators; - //! A corrector that calculates adjustments for values of interim buckets. - TInterimBucketCorrectorPtr m_InterimBucketCorrector; + //! A corrector that calculates adjustments for values of interim buckets. + TInterimBucketCorrectorPtr m_InterimBucketCorrector; }; - } } diff --git a/include/model/CAnomalyDetectorModelConfig.h b/include/model/CAnomalyDetectorModelConfig.h index 82c1c00ee8..e06eaed75c 100644 --- a/include/model/CAnomalyDetectorModelConfig.h +++ b/include/model/CAnomalyDetectorModelConfig.h @@ -26,10 +26,8 @@ #include #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { class CDetectionRule; class CSearchKey; class CModelAutoConfigurer; @@ -47,469 +45,460 @@ class CModelFactory; //! -# Some of this information will be exposed to the user via a //! configuration file, //! -# Some may be calculated from data characteristics and so on. -class MODEL_EXPORT CAnomalyDetectorModelConfig -{ - public: - //! The possible factory types. - enum EFactoryType - { - E_EventRateFactory = 0, - E_MetricFactory = 1, - E_EventRatePopulationFactory = 2, - E_MetricPopulationFactory = 3, - E_CountingFactory = 4, - E_EventRatePeersFactory = 5, - E_UnknownFactory, - E_BadFactory - }; - - using TStrSet = std::set; - using TSizeVec = std::vector; - using TTimeVec = std::vector; - using TTimeVecCItr = TTimeVec::const_iterator; - using TDoubleDoublePr = std::pair; - using TDoubleDoublePrVec = std::vector; - using TFeatureVec = model_t::TFeatureVec; - using TStrVec = std::vector; - using TStrVecCItr = TStrVec::const_iterator; - using TModelFactoryPtr = boost::shared_ptr; - using TModelFactoryCPtr = boost::shared_ptr; - using TFactoryTypeFactoryPtrMap = std::map; - using TFactoryTypeFactoryPtrMapItr = TFactoryTypeFactoryPtrMap::iterator; - using TFactoryTypeFactoryPtrMapCItr = TFactoryTypeFactoryPtrMap::const_iterator; - using TSearchKeyFactoryCPtrMap = std::map; - - // Const ref to detection rules map - using TDetectionRuleVec = std::vector; - using TDetectionRuleVecCRef = boost::reference_wrapper; - using TIntDetectionRuleVecUMap = boost::unordered_map; - using TIntDetectionRuleVecUMapCRef = boost::reference_wrapper; - using TIntDetectionRuleVecUMapCItr = TIntDetectionRuleVecUMap::const_iterator; - - using TStrDetectionRulePr = std::pair; - using TStrDetectionRulePrVec = std::vector; - using TStrDetectionRulePrVecCRef = boost::reference_wrapper; - - public: - //! \name Data Gathering - //@{ - //! The default value used to separate components of a multivariate feature - //! in its string value. - static const std::string DEFAULT_MULTIVARIATE_COMPONENT_DELIMITER; - - //! Bucket length if none is specified on the command line. - static const core_t::TTime DEFAULT_BUCKET_LENGTH; - - //! Default maximum number of buckets for receiving out of order records. - static const std::size_t DEFAULT_LATENCY_BUCKETS; - - //! Default amount by which metric sample count is reduced for fine-grained - //! sampling when there is no latency. - static const std::size_t DEFAULT_SAMPLE_COUNT_FACTOR_NO_LATENCY; - - //! Default amount by which metric sample count is reduced for fine-grained - //! sampling when there is latency. - static const std::size_t DEFAULT_SAMPLE_COUNT_FACTOR_WITH_LATENCY; - - //! Default amount by which the metric sample queue expands when it is full. - static const double DEFAULT_SAMPLE_QUEUE_GROWTH_FACTOR; - - //! Bucket length corresponding to the default decay and learn rates. - static const core_t::TTime STANDARD_BUCKET_LENGTH; - - //! The default number of half buckets to store before choosing which - //! overlapping bucket has the biggest anomaly - static const std::size_t DEFAULT_BUCKET_RESULTS_DELAY; - //@} - - //! \name Modelling - //@{ - //! The default rate at which the model priors decay to non-informative - //! per standard bucket length. - static const double DEFAULT_DECAY_RATE; - - //! The initial rate, as a multiple of the default decay rate, at which - //! the model priors decay to non-informative per standard bucket length. - static const double DEFAULT_INITIAL_DECAY_RATE_MULTIPLIER; - - //! The rate at which information accrues in the model per standard - //! bucket length elapsed. - static const double DEFAULT_LEARN_RATE; - - //! The default minimum permitted fraction of points in a distribution - //! mode for individual modeling. - static const double DEFAULT_INDIVIDUAL_MINIMUM_MODE_FRACTION; - - //! The default minimum permitted fraction of points in a distribution - //! mode for population modeling. - static const double DEFAULT_POPULATION_MINIMUM_MODE_FRACTION; - - //! The default minimum count in a cluster we'll permit in a cluster. - static const double DEFAULT_MINIMUM_CLUSTER_SPLIT_COUNT; - - //! The default proportion of initial count at which we'll delete a - //! category from the sketch to cluster. - static const double DEFAULT_CATEGORY_DELETE_FRACTION; - - //! The default minimum frequency of non-empty buckets at which we model - //! all buckets. - static const double DEFAULT_CUTOFF_TO_MODEL_EMPTY_BUCKETS; - - //! The default size of the seasonal components we will model. - static const std::size_t DEFAULT_COMPONENT_SIZE; - - //! The default minimum time to detect a change point in a time series. - static const core_t::TTime DEFAULT_MINIMUM_TIME_TO_DETECT_CHANGE; - - //! The default maximum time to test for a change point in a time series. - static const core_t::TTime DEFAULT_MAXIMUM_TIME_TO_TEST_FOR_CHANGE; - - //! The maximum number of times we'll update a model in a bucketing - //! interval. This only applies to our metric statistics, which are - //! computed on a fixed number of measurements rather than a fixed - //! time interval. A value of zero implies no constraint. - static const double DEFAULT_MAXIMUM_UPDATES_PER_BUCKET; - - //! The default minimum value for the influence for which an influencing - //! field value is judged to have any influence on a feature value. - static const double DEFAULT_INFLUENCE_CUTOFF; - - //! The default scale factor of the decayRate that determines the minimum - //! size of the sliding prune window for purging older entries from the - //! model. - static const double DEFAULT_PRUNE_WINDOW_SCALE_MINIMUM; - - //! The default scale factor of the decayRate that determines the maximum - //! size of the sliding prune window for purging older entries from the - //! model. - static const double DEFAULT_PRUNE_WINDOW_SCALE_MAXIMUM; - - //! The default factor increase in priors used to model correlations. - static const double DEFAULT_CORRELATION_MODELS_OVERHEAD; - - //! The default threshold for the Pearson correlation coefficient at - //! which a correlate will be modeled. - static const double DEFAULT_MINIMUM_SIGNIFICANT_CORRELATION; - //@} - - //! \name Anomaly Score Calculation - //@{ - //! The default values for the aggregation styles' parameters. - static const double DEFAULT_AGGREGATION_STYLE_PARAMS[model_t::NUMBER_AGGREGATION_STYLES][model_t::NUMBER_AGGREGATION_PARAMS]; - - //! The default maximum probability which is deemed to be anomalous. - static const double DEFAULT_MAXIMUM_ANOMALOUS_PROBABILITY; - //@} - - //! \name Anomaly Score Normalization - //@{ - //! The default historic anomaly score percentile for which lower - //! values are classified as noise. - static const double DEFAULT_NOISE_PERCENTILE; - - //! The default multiplier applied to the noise level score in - //! order to be classified as anomalous. - static const double DEFAULT_NOISE_MULTIPLIER; - - //! We use a piecewise linear mapping between the raw anomaly score - //! and the normalized anomaly score with these default knot points. - //! In particular, if we define the percentile of a raw score \f$s\f$ - //! as \f$f_q(s)\f$ and \f$a = \max\{x \le f_q(s)\}\f$ and - //! \f$b = \min{x \ge f_q(s)}\f$ where \f$x\f$ ranges over the knot point - //! X- values then the normalized score would be:\n - //!
-        //!   \f$\displaystyle \bar{s} = \frac{(y(b) - y(a))(f_q(s) - a)}{b - a}\f$
-        //! 
- //! Here, \f$y(.)\f$ denote the corresponding knot point Y- values. - static const TDoubleDoublePr DEFAULT_NORMALIZED_SCORE_KNOT_POINTS[9]; - //@} - - public: - //! Create the default configuration. - //! - //! \param[in] bucketLength The bucketing interval length. - //! \param[in] summaryMode Indicates whether the data being gathered - //! are already summarized by an external aggregation process. - //! \param[in] summaryCountFieldName If \p summaryMode is E_Manual - //! then this is the name of the field holding the summary count. - //! \param[in] latency The amount of time records are buffered for, to - //! allow out-of-order records to be seen by the models in order. - //! \param[in] bucketResultsDelay The number of half-bucket results - //! to sit on before giving a definitive result. - //! \param[in] multivariateByFields Should multivariate analysis of - //! correlated 'by' fields be performed? - //! \param[in] multipleBucketLengths If specified, set multiple bucket - //! lengths to be analysed (CSV string of time values) - static CAnomalyDetectorModelConfig defaultConfig(core_t::TTime bucketLength, - model_t::ESummaryMode summaryMode, - const std::string &summaryCountFieldName, - core_t::TTime latency, - std::size_t bucketResultsDelay, - bool multivariateByFields, - const std::string &multipleBucketLengths); - - //! Overload using defaults. - static CAnomalyDetectorModelConfig defaultConfig(core_t::TTime bucketLength = DEFAULT_BUCKET_LENGTH, - model_t::ESummaryMode summaryMode = model_t::E_None, - const std::string &summaryCountFieldName = "") - { - return defaultConfig(bucketLength, - summaryMode, - summaryCountFieldName, - DEFAULT_LATENCY_BUCKETS * bucketLength, - DEFAULT_BUCKET_RESULTS_DELAY, - false, - ""); - } - - //! Get the factor to normalize all bucket lengths to the default - //! bucket length. - static double bucketNormalizationFactor(core_t::TTime bucketLength); - - //! Get the decay rate to use for the time series decomposition given - //! the model decay rate \p modelDecayRate. - static double trendDecayRate(double modelDecayRate, core_t::TTime bucketLength); - - //! Parse and verify the multiple bucket lengths - these should all be - //! multiples of the standard bucket length. - static TTimeVec multipleBucketLengths(core_t::TTime bucketLength, - const std::string &multipleBucketLengths); - - public: - CAnomalyDetectorModelConfig(); - - //! Set the data bucketing interval. - void bucketLength(core_t::TTime length); - //! Set the number of buckets to delay finalizing out-of-phase buckets. - void bucketResultsDelay(std::size_t delay); - - //! Set whether multivariate analysis of correlated 'by' fields should - //! be performed. - void multivariateByFields(bool enabled); - //! Set the model factories. - void factories(const TFactoryTypeFactoryPtrMap &factories); - //! Set the style and parameter value for raw score aggregation. - bool aggregationStyleParams(model_t::EAggregationStyle style, - model_t::EAggregationParam param, - double value); - //! Set the maximum anomalous probability. - void maximumAnomalousProbability(double probability); - //! Set the noise level as a percentile of historic raw anomaly scores. - bool noisePercentile(double percentile); - //! Set the noise multiplier to use when derating normalized scores - //! based on the noise score level. - bool noiseMultiplier(double multiplier); - //! Set the normalized score knot points for the piecewise linear curve - //! between historic raw score percentiles and normalized scores. - bool normalizedScoreKnotPoints(const TDoubleDoublePrVec &points); - - //! Populate the parameters from a configuration file. - bool init(const std::string &configFile); - - //! Populate the parameters from a configuration file, also retrieving - //! the raw property tree created from the config file. (The raw - //! property tree is only valid if the method returns true.) - bool init(const std::string &configFile, - boost::property_tree::ptree &propTree); - - //! Populate the parameters from a property tree. - bool init(const boost::property_tree::ptree &propTree); - - //! Configure modelPlotConfig params from file - bool configureModelPlot(const std::string &modelPlotConfigFile); - - //! Configure modelPlotConfig params from a property tree - //! expected to contain two properties: 'boundsPercentile' and 'terms' - bool configureModelPlot(const boost::property_tree::ptree &propTree); - - //! Get the factory for new models. - //! - //! \param[in] key The key of the detector for which the factory will be - //! used. - TModelFactoryCPtr factory(const CSearchKey &key) const; - - //! Get the factory for new models. - //! - //! \param[in] identifier The identifier of the search for which to get a model - //! factory. - //! \param[in] function The function being invoked. - //! \param[in] useNull If true then we will process missing fields as if their - //! value is equal to the empty string where possible. - //! \param[in] excludeFrequent Whether to discard frequent results - //! \param[in] personFieldName The name of the over field. - //! \param[in] attributeFieldName The name of the by field. - //! \param[in] valueFieldName The name of the field containing metric values. - //! \param[in] influenceFieldNames The list of influence field names. - TModelFactoryCPtr factory(int identifier, - function_t::EFunction function, - bool useNull = false, - model_t::EExcludeFrequent excludeFrequent = model_t::E_XF_None, - const std::string &partitionFieldName = std::string(), - const std::string &personFieldName = std::string(), - const std::string &attributeFieldName = std::string(), - const std::string &valueFieldName = std::string(), - const CSearchKey::TStoredStringPtrVec &influenceFieldNames = CSearchKey::TStoredStringPtrVec()) const; - - //! Set the rate at which the models lose information. - void decayRate(double value); - - //! Get the rate at which the models lose information. - double decayRate() const; - - //! Get the length of the baseline. - core_t::TTime baselineLength() const; - - //! Get the bucket length. - core_t::TTime bucketLength() const; - - //! Get the maximum latency in the arrival of out of order data. - core_t::TTime latency() const; - - //! Get the maximum latency in the arrival of out of order data in - //! numbers of buckets. - std::size_t latencyBuckets() const; - - //! Get the bucket result delay window. - std::size_t bucketResultsDelay() const; - - //! Get the multiple bucket lengths. - const TTimeVec &multipleBucketLengths() const; - - //! Should multivariate analysis of correlated 'by' fields be performed? - bool multivariateByFields() const; - - //! Set the central confidence interval for the model debug plot - //! to \p percentage. - //! - //! This controls upper and lower confidence interval error bars - //! returned by the model debug plot. - //! \note \p percentile should be in the range [0.0, 100.0). - void modelPlotBoundsPercentile(double percentile); - - //! Get the central confidence interval for the model debug plot. - double modelPlotBoundsPercentile() const; - - //! Set terms (by, over, or partition field values) to filter - //! model debug data. When empty, no filtering is applied. - void modelPlotTerms(TStrSet terms); - - //! Get the terms (by, over, or partition field values) - //! used to filter model debug data. Empty when no filtering applies. - const TStrSet &modelPlotTerms() const; - //@} - - //! \name Anomaly Score Calculation - //@{ - //! Get the value of the aggregation style parameter identified by - //! \p style and \p param. - double aggregationStyleParam(model_t::EAggregationStyle style, - model_t::EAggregationParam param) const; - - //! Get the maximum anomalous probability. - double maximumAnomalousProbability() const; - //@} - - //! \name Anomaly Score Normalization - //@{ - //! Get the historic anomaly score percentile for which lower - //! values are classified as noise. - double noisePercentile() const; - - //! Get the multiplier applied to the noise level score in order - //! to be classified as anomalous. - double noiseMultiplier() const; - - //! Get the normalized anomaly score knot points. - const TDoubleDoublePrVec &normalizedScoreKnotPoints() const; - //@} - - //! Check if we should create one normalizer per partition field value. - bool perPartitionNormalization() const; - - //! Set whether we should create one normalizer per partition field value. - void perPartitionNormalization(bool value); - - //! Sets the reference to the detection rules map - void detectionRules(TIntDetectionRuleVecUMapCRef detectionRules); - - //! Sets the reference to the scheduled events vector - void scheduledEvents(TStrDetectionRulePrVecCRef scheduledEvents); - - //! Process the stanza properties corresponding \p stanzaName. - //! - //! \param[in] propertyTree The properties of the stanza called - //! \p stanzaName. - bool processStanza(const boost::property_tree::ptree &propertyTree); - - //! Get the factor to normalize all bucket lengths to the default - //! bucket length. - double bucketNormalizationFactor() const; - - //! The time window during which samples are accepted. - core_t::TTime samplingAgeCutoff() const; +class MODEL_EXPORT CAnomalyDetectorModelConfig { +public: + //! The possible factory types. + enum EFactoryType { + E_EventRateFactory = 0, + E_MetricFactory = 1, + E_EventRatePopulationFactory = 2, + E_MetricPopulationFactory = 3, + E_CountingFactory = 4, + E_EventRatePeersFactory = 5, + E_UnknownFactory, + E_BadFactory + }; + + using TStrSet = std::set; + using TSizeVec = std::vector; + using TTimeVec = std::vector; + using TTimeVecCItr = TTimeVec::const_iterator; + using TDoubleDoublePr = std::pair; + using TDoubleDoublePrVec = std::vector; + using TFeatureVec = model_t::TFeatureVec; + using TStrVec = std::vector; + using TStrVecCItr = TStrVec::const_iterator; + using TModelFactoryPtr = boost::shared_ptr; + using TModelFactoryCPtr = boost::shared_ptr; + using TFactoryTypeFactoryPtrMap = std::map; + using TFactoryTypeFactoryPtrMapItr = TFactoryTypeFactoryPtrMap::iterator; + using TFactoryTypeFactoryPtrMapCItr = TFactoryTypeFactoryPtrMap::const_iterator; + using TSearchKeyFactoryCPtrMap = std::map; + + // Const ref to detection rules map + using TDetectionRuleVec = std::vector; + using TDetectionRuleVecCRef = boost::reference_wrapper; + using TIntDetectionRuleVecUMap = boost::unordered_map; + using TIntDetectionRuleVecUMapCRef = boost::reference_wrapper; + using TIntDetectionRuleVecUMapCItr = TIntDetectionRuleVecUMap::const_iterator; + + using TStrDetectionRulePr = std::pair; + using TStrDetectionRulePrVec = std::vector; + using TStrDetectionRulePrVecCRef = boost::reference_wrapper; + +public: + //! \name Data Gathering + //@{ + //! The default value used to separate components of a multivariate feature + //! in its string value. + static const std::string DEFAULT_MULTIVARIATE_COMPONENT_DELIMITER; + + //! Bucket length if none is specified on the command line. + static const core_t::TTime DEFAULT_BUCKET_LENGTH; + + //! Default maximum number of buckets for receiving out of order records. + static const std::size_t DEFAULT_LATENCY_BUCKETS; + + //! Default amount by which metric sample count is reduced for fine-grained + //! sampling when there is no latency. + static const std::size_t DEFAULT_SAMPLE_COUNT_FACTOR_NO_LATENCY; + + //! Default amount by which metric sample count is reduced for fine-grained + //! sampling when there is latency. + static const std::size_t DEFAULT_SAMPLE_COUNT_FACTOR_WITH_LATENCY; + + //! Default amount by which the metric sample queue expands when it is full. + static const double DEFAULT_SAMPLE_QUEUE_GROWTH_FACTOR; + + //! Bucket length corresponding to the default decay and learn rates. + static const core_t::TTime STANDARD_BUCKET_LENGTH; + + //! The default number of half buckets to store before choosing which + //! overlapping bucket has the biggest anomaly + static const std::size_t DEFAULT_BUCKET_RESULTS_DELAY; + //@} + + //! \name Modelling + //@{ + //! The default rate at which the model priors decay to non-informative + //! per standard bucket length. + static const double DEFAULT_DECAY_RATE; + + //! The initial rate, as a multiple of the default decay rate, at which + //! the model priors decay to non-informative per standard bucket length. + static const double DEFAULT_INITIAL_DECAY_RATE_MULTIPLIER; + + //! The rate at which information accrues in the model per standard + //! bucket length elapsed. + static const double DEFAULT_LEARN_RATE; + + //! The default minimum permitted fraction of points in a distribution + //! mode for individual modeling. + static const double DEFAULT_INDIVIDUAL_MINIMUM_MODE_FRACTION; + + //! The default minimum permitted fraction of points in a distribution + //! mode for population modeling. + static const double DEFAULT_POPULATION_MINIMUM_MODE_FRACTION; + + //! The default minimum count in a cluster we'll permit in a cluster. + static const double DEFAULT_MINIMUM_CLUSTER_SPLIT_COUNT; + + //! The default proportion of initial count at which we'll delete a + //! category from the sketch to cluster. + static const double DEFAULT_CATEGORY_DELETE_FRACTION; + + //! The default minimum frequency of non-empty buckets at which we model + //! all buckets. + static const double DEFAULT_CUTOFF_TO_MODEL_EMPTY_BUCKETS; + + //! The default size of the seasonal components we will model. + static const std::size_t DEFAULT_COMPONENT_SIZE; + + //! The default minimum time to detect a change point in a time series. + static const core_t::TTime DEFAULT_MINIMUM_TIME_TO_DETECT_CHANGE; + + //! The default maximum time to test for a change point in a time series. + static const core_t::TTime DEFAULT_MAXIMUM_TIME_TO_TEST_FOR_CHANGE; + + //! The maximum number of times we'll update a model in a bucketing + //! interval. This only applies to our metric statistics, which are + //! computed on a fixed number of measurements rather than a fixed + //! time interval. A value of zero implies no constraint. + static const double DEFAULT_MAXIMUM_UPDATES_PER_BUCKET; + + //! The default minimum value for the influence for which an influencing + //! field value is judged to have any influence on a feature value. + static const double DEFAULT_INFLUENCE_CUTOFF; + + //! The default scale factor of the decayRate that determines the minimum + //! size of the sliding prune window for purging older entries from the + //! model. + static const double DEFAULT_PRUNE_WINDOW_SCALE_MINIMUM; + + //! The default scale factor of the decayRate that determines the maximum + //! size of the sliding prune window for purging older entries from the + //! model. + static const double DEFAULT_PRUNE_WINDOW_SCALE_MAXIMUM; + + //! The default factor increase in priors used to model correlations. + static const double DEFAULT_CORRELATION_MODELS_OVERHEAD; + + //! The default threshold for the Pearson correlation coefficient at + //! which a correlate will be modeled. + static const double DEFAULT_MINIMUM_SIGNIFICANT_CORRELATION; + //@} + + //! \name Anomaly Score Calculation + //@{ + //! The default values for the aggregation styles' parameters. + static const double DEFAULT_AGGREGATION_STYLE_PARAMS[model_t::NUMBER_AGGREGATION_STYLES][model_t::NUMBER_AGGREGATION_PARAMS]; + + //! The default maximum probability which is deemed to be anomalous. + static const double DEFAULT_MAXIMUM_ANOMALOUS_PROBABILITY; + //@} + + //! \name Anomaly Score Normalization + //@{ + //! The default historic anomaly score percentile for which lower + //! values are classified as noise. + static const double DEFAULT_NOISE_PERCENTILE; + + //! The default multiplier applied to the noise level score in + //! order to be classified as anomalous. + static const double DEFAULT_NOISE_MULTIPLIER; + + //! We use a piecewise linear mapping between the raw anomaly score + //! and the normalized anomaly score with these default knot points. + //! In particular, if we define the percentile of a raw score \f$s\f$ + //! as \f$f_q(s)\f$ and \f$a = \max\{x \le f_q(s)\}\f$ and + //! \f$b = \min{x \ge f_q(s)}\f$ where \f$x\f$ ranges over the knot point + //! X- values then the normalized score would be:\n + //!
+    //!   \f$\displaystyle \bar{s} = \frac{(y(b) - y(a))(f_q(s) - a)}{b - a}\f$
+    //! 
+ //! Here, \f$y(.)\f$ denote the corresponding knot point Y- values. + static const TDoubleDoublePr DEFAULT_NORMALIZED_SCORE_KNOT_POINTS[9]; + //@} + +public: + //! Create the default configuration. + //! + //! \param[in] bucketLength The bucketing interval length. + //! \param[in] summaryMode Indicates whether the data being gathered + //! are already summarized by an external aggregation process. + //! \param[in] summaryCountFieldName If \p summaryMode is E_Manual + //! then this is the name of the field holding the summary count. + //! \param[in] latency The amount of time records are buffered for, to + //! allow out-of-order records to be seen by the models in order. + //! \param[in] bucketResultsDelay The number of half-bucket results + //! to sit on before giving a definitive result. + //! \param[in] multivariateByFields Should multivariate analysis of + //! correlated 'by' fields be performed? + //! \param[in] multipleBucketLengths If specified, set multiple bucket + //! lengths to be analysed (CSV string of time values) + static CAnomalyDetectorModelConfig defaultConfig(core_t::TTime bucketLength, + model_t::ESummaryMode summaryMode, + const std::string& summaryCountFieldName, + core_t::TTime latency, + std::size_t bucketResultsDelay, + bool multivariateByFields, + const std::string& multipleBucketLengths); + + //! Overload using defaults. + static CAnomalyDetectorModelConfig defaultConfig(core_t::TTime bucketLength = DEFAULT_BUCKET_LENGTH, + model_t::ESummaryMode summaryMode = model_t::E_None, + const std::string& summaryCountFieldName = "") { + return defaultConfig(bucketLength, + summaryMode, + summaryCountFieldName, + DEFAULT_LATENCY_BUCKETS * bucketLength, + DEFAULT_BUCKET_RESULTS_DELAY, + false, + ""); + } + + //! Get the factor to normalize all bucket lengths to the default + //! bucket length. + static double bucketNormalizationFactor(core_t::TTime bucketLength); + + //! Get the decay rate to use for the time series decomposition given + //! the model decay rate \p modelDecayRate. + static double trendDecayRate(double modelDecayRate, core_t::TTime bucketLength); + + //! Parse and verify the multiple bucket lengths - these should all be + //! multiples of the standard bucket length. + static TTimeVec multipleBucketLengths(core_t::TTime bucketLength, const std::string& multipleBucketLengths); + +public: + CAnomalyDetectorModelConfig(); + + //! Set the data bucketing interval. + void bucketLength(core_t::TTime length); + //! Set the number of buckets to delay finalizing out-of-phase buckets. + void bucketResultsDelay(std::size_t delay); + + //! Set whether multivariate analysis of correlated 'by' fields should + //! be performed. + void multivariateByFields(bool enabled); + //! Set the model factories. + void factories(const TFactoryTypeFactoryPtrMap& factories); + //! Set the style and parameter value for raw score aggregation. + bool aggregationStyleParams(model_t::EAggregationStyle style, model_t::EAggregationParam param, double value); + //! Set the maximum anomalous probability. + void maximumAnomalousProbability(double probability); + //! Set the noise level as a percentile of historic raw anomaly scores. + bool noisePercentile(double percentile); + //! Set the noise multiplier to use when derating normalized scores + //! based on the noise score level. + bool noiseMultiplier(double multiplier); + //! Set the normalized score knot points for the piecewise linear curve + //! between historic raw score percentiles and normalized scores. + bool normalizedScoreKnotPoints(const TDoubleDoublePrVec& points); + + //! Populate the parameters from a configuration file. + bool init(const std::string& configFile); + + //! Populate the parameters from a configuration file, also retrieving + //! the raw property tree created from the config file. (The raw + //! property tree is only valid if the method returns true.) + bool init(const std::string& configFile, boost::property_tree::ptree& propTree); + + //! Populate the parameters from a property tree. + bool init(const boost::property_tree::ptree& propTree); + + //! Configure modelPlotConfig params from file + bool configureModelPlot(const std::string& modelPlotConfigFile); + + //! Configure modelPlotConfig params from a property tree + //! expected to contain two properties: 'boundsPercentile' and 'terms' + bool configureModelPlot(const boost::property_tree::ptree& propTree); + + //! Get the factory for new models. + //! + //! \param[in] key The key of the detector for which the factory will be + //! used. + TModelFactoryCPtr factory(const CSearchKey& key) const; + + //! Get the factory for new models. + //! + //! \param[in] identifier The identifier of the search for which to get a model + //! factory. + //! \param[in] function The function being invoked. + //! \param[in] useNull If true then we will process missing fields as if their + //! value is equal to the empty string where possible. + //! \param[in] excludeFrequent Whether to discard frequent results + //! \param[in] personFieldName The name of the over field. + //! \param[in] attributeFieldName The name of the by field. + //! \param[in] valueFieldName The name of the field containing metric values. + //! \param[in] influenceFieldNames The list of influence field names. + TModelFactoryCPtr factory(int identifier, + function_t::EFunction function, + bool useNull = false, + model_t::EExcludeFrequent excludeFrequent = model_t::E_XF_None, + const std::string& partitionFieldName = std::string(), + const std::string& personFieldName = std::string(), + const std::string& attributeFieldName = std::string(), + const std::string& valueFieldName = std::string(), + const CSearchKey::TStoredStringPtrVec& influenceFieldNames = CSearchKey::TStoredStringPtrVec()) const; + + //! Set the rate at which the models lose information. + void decayRate(double value); + + //! Get the rate at which the models lose information. + double decayRate() const; + + //! Get the length of the baseline. + core_t::TTime baselineLength() const; + + //! Get the bucket length. + core_t::TTime bucketLength() const; + + //! Get the maximum latency in the arrival of out of order data. + core_t::TTime latency() const; + + //! Get the maximum latency in the arrival of out of order data in + //! numbers of buckets. + std::size_t latencyBuckets() const; + + //! Get the bucket result delay window. + std::size_t bucketResultsDelay() const; + + //! Get the multiple bucket lengths. + const TTimeVec& multipleBucketLengths() const; + + //! Should multivariate analysis of correlated 'by' fields be performed? + bool multivariateByFields() const; + + //! Set the central confidence interval for the model debug plot + //! to \p percentage. + //! + //! This controls upper and lower confidence interval error bars + //! returned by the model debug plot. + //! \note \p percentile should be in the range [0.0, 100.0). + void modelPlotBoundsPercentile(double percentile); + + //! Get the central confidence interval for the model debug plot. + double modelPlotBoundsPercentile() const; + + //! Set terms (by, over, or partition field values) to filter + //! model debug data. When empty, no filtering is applied. + void modelPlotTerms(TStrSet terms); + + //! Get the terms (by, over, or partition field values) + //! used to filter model debug data. Empty when no filtering applies. + const TStrSet& modelPlotTerms() const; + //@} + + //! \name Anomaly Score Calculation + //@{ + //! Get the value of the aggregation style parameter identified by + //! \p style and \p param. + double aggregationStyleParam(model_t::EAggregationStyle style, model_t::EAggregationParam param) const; + + //! Get the maximum anomalous probability. + double maximumAnomalousProbability() const; + //@} + + //! \name Anomaly Score Normalization + //@{ + //! Get the historic anomaly score percentile for which lower + //! values are classified as noise. + double noisePercentile() const; + + //! Get the multiplier applied to the noise level score in order + //! to be classified as anomalous. + double noiseMultiplier() const; + + //! Get the normalized anomaly score knot points. + const TDoubleDoublePrVec& normalizedScoreKnotPoints() const; + //@} + + //! Check if we should create one normalizer per partition field value. + bool perPartitionNormalization() const; + + //! Set whether we should create one normalizer per partition field value. + void perPartitionNormalization(bool value); + + //! Sets the reference to the detection rules map + void detectionRules(TIntDetectionRuleVecUMapCRef detectionRules); + + //! Sets the reference to the scheduled events vector + void scheduledEvents(TStrDetectionRulePrVecCRef scheduledEvents); + + //! Process the stanza properties corresponding \p stanzaName. + //! + //! \param[in] propertyTree The properties of the stanza called + //! \p stanzaName. + bool processStanza(const boost::property_tree::ptree& propertyTree); + + //! Get the factor to normalize all bucket lengths to the default + //! bucket length. + double bucketNormalizationFactor() const; + + //! The time window during which samples are accepted. + core_t::TTime samplingAgeCutoff() const; - private: - //! Bucket length. - core_t::TTime m_BucketLength; +private: + //! Bucket length. + core_t::TTime m_BucketLength; - //! Get the bucket result delay window: The numer of half buckets to - //! store before choosing which overlapping bucket has the biggest anomaly - std::size_t m_BucketResultsDelay; + //! Get the bucket result delay window: The numer of half buckets to + //! store before choosing which overlapping bucket has the biggest anomaly + std::size_t m_BucketResultsDelay; - //! Should multivariate analysis of correlated 'by' fields be performed? - bool m_MultivariateByFields; + //! Should multivariate analysis of correlated 'by' fields be performed? + bool m_MultivariateByFields; - //! The new model factories for each data type. - TFactoryTypeFactoryPtrMap m_Factories; + //! The new model factories for each data type. + TFactoryTypeFactoryPtrMap m_Factories; - //! A cache of customized factories requested from this config. - mutable TSearchKeyFactoryCPtrMap m_FactoryCache; - - //! The central confidence interval for the model debug plot. - double m_ModelPlotBoundsPercentile; - - //! Terms (by, over, or partition field values) used to filter model - //! debug data. Empty when no filtering applies. - TStrSet m_ModelPlotTerms; - //@} - - //! \name Anomaly Score Calculation - //@{ - //! The values for the aggregation styles' parameters. - double m_AggregationStyleParams[model_t::NUMBER_AGGREGATION_STYLES][model_t::NUMBER_AGGREGATION_PARAMS]; - - //! The maximum probability which is deemed to be anomalous. - double m_MaximumAnomalousProbability; - //@} - - //! \name Anomaly Score Normalization - //@{ - //! The historic anomaly score percentile for which lower values - //! are classified as noise. - double m_NoisePercentile; - - //! The multiplier applied to the noise level score in order to - //! be classified as anomalous. - double m_NoiseMultiplier; - - //! We use a piecewise linear mapping between the raw anomaly score - //! and the normalized anomaly score with these knot points. - //! \see DEFAULT_NORMALIZED_SCORE_KNOT_POINTS for details. - TDoubleDoublePrVec m_NormalizedScoreKnotPoints; - - //! If true then create one normalizer per partition field value. - bool m_PerPartitionNormalisation; - //@} - - //! A reference to the map containing detection rules per - //! detector key. Note that the owner of the map is CFieldConfig. - TIntDetectionRuleVecUMapCRef m_DetectionRules; + //! A cache of customized factories requested from this config. + mutable TSearchKeyFactoryCPtrMap m_FactoryCache; + + //! The central confidence interval for the model debug plot. + double m_ModelPlotBoundsPercentile; + + //! Terms (by, over, or partition field values) used to filter model + //! debug data. Empty when no filtering applies. + TStrSet m_ModelPlotTerms; + //@} + + //! \name Anomaly Score Calculation + //@{ + //! The values for the aggregation styles' parameters. + double m_AggregationStyleParams[model_t::NUMBER_AGGREGATION_STYLES][model_t::NUMBER_AGGREGATION_PARAMS]; + + //! The maximum probability which is deemed to be anomalous. + double m_MaximumAnomalousProbability; + //@} + + //! \name Anomaly Score Normalization + //@{ + //! The historic anomaly score percentile for which lower values + //! are classified as noise. + double m_NoisePercentile; + + //! The multiplier applied to the noise level score in order to + //! be classified as anomalous. + double m_NoiseMultiplier; + + //! We use a piecewise linear mapping between the raw anomaly score + //! and the normalized anomaly score with these knot points. + //! \see DEFAULT_NORMALIZED_SCORE_KNOT_POINTS for details. + TDoubleDoublePrVec m_NormalizedScoreKnotPoints; + + //! If true then create one normalizer per partition field value. + bool m_PerPartitionNormalisation; + //@} + + //! A reference to the map containing detection rules per + //! detector key. Note that the owner of the map is CFieldConfig. + TIntDetectionRuleVecUMapCRef m_DetectionRules; - //! A reference to the vector of scheduled events. - //! The owner of the vector is CFieldConfig - TStrDetectionRulePrVecCRef m_ScheduledEvents; + //! A reference to the vector of scheduled events. + //! The owner of the vector is CFieldConfig + TStrDetectionRulePrVecCRef m_ScheduledEvents; }; - } } diff --git a/include/model/CAnomalyScore.h b/include/model/CAnomalyScore.h index 9274689b38..782d153117 100644 --- a/include/model/CAnomalyScore.h +++ b/include/model/CAnomalyScore.h @@ -24,20 +24,15 @@ #include - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace maths -{ +namespace maths { class CPrior; } -namespace model -{ +namespace model { class CAnomalyDetectorModelConfig; class CLimits; @@ -46,266 +41,251 @@ class CLimits; //! DESCRIPTION:\n //! A collection of utility functions for computing and normalizing //! anomaly scores. -class MODEL_EXPORT CAnomalyScore -{ +class MODEL_EXPORT CAnomalyScore { +public: + using TDoubleVec = std::vector; + using TDoubleVecItr = TDoubleVec::iterator; + using TDoubleVecCItr = TDoubleVec::const_iterator; + using TOptionalDouble = boost::optional; + using TOptionalDoubleVec = std::vector; + using TStrVec = std::vector; + + //! Attributes for a persisted normalizer + static const std::string MLCUE_ATTRIBUTE; + static const std::string MLKEY_ATTRIBUTE; + static const std::string MLQUANTILESDESCRIPTION_ATTRIBUTE; + static const std::string MLVERSION_ATTRIBUTE; + static const std::string TIME_ATTRIBUTE; + + static const std::string CURRENT_FORMAT_VERSION; + + //! Severities + static const std::string WARNING_SEVERITY; + static const std::string MINOR_SEVERITY; + static const std::string MAJOR_SEVERITY; + static const std::string CRITICAL_SEVERITY; + +public: + //! \brief Wrapper around CAnomalyScore::compute. + class MODEL_EXPORT CComputer { public: - using TDoubleVec = std::vector; - using TDoubleVecItr = TDoubleVec::iterator; - using TDoubleVecCItr = TDoubleVec::const_iterator; - using TOptionalDouble = boost::optional; - using TOptionalDoubleVec = std::vector; - using TStrVec = std::vector; - - //! Attributes for a persisted normalizer - static const std::string MLCUE_ATTRIBUTE; - static const std::string MLKEY_ATTRIBUTE; - static const std::string MLQUANTILESDESCRIPTION_ATTRIBUTE; - static const std::string MLVERSION_ATTRIBUTE; - static const std::string TIME_ATTRIBUTE; - - static const std::string CURRENT_FORMAT_VERSION; - - //! Severities - static const std::string WARNING_SEVERITY; - static const std::string MINOR_SEVERITY; - static const std::string MAJOR_SEVERITY; - static const std::string CRITICAL_SEVERITY; - + CComputer(double jointProbabilityWeight, + double extremeProbabilityWeight, + std::size_t minExtremeSamples, + std::size_t maxExtremeSamples, + double maximumAnomalousProbability); + + //! Compute the overall anomaly score and aggregate probability. + bool operator()(const TDoubleVec& probabilities, double& overallAnomalyScore, double& overallProbability) const; + + private: + //! The weight to assign the joint probability. + double m_JointProbabilityWeight; + //! The weight to assign the extreme probability. + double m_ExtremeProbabilityWeight; + //! The minimum number of samples to include in the extreme + //! probability calculation. + std::size_t m_MinExtremeSamples; + //! The maximum number of samples to include in the extreme + //! probability calculation. + std::size_t m_MaxExtremeSamples; + //! The maximum probability which is deemed to be anomalous. + double m_MaximumAnomalousProbability; + }; + + //! \brief Manages the normalization of aggregate anomaly scores + //! based on historic values percentiles. + class MODEL_EXPORT CNormalizer : private core::CNonCopyable { public: - //! \brief Wrapper around CAnomalyScore::compute. - class MODEL_EXPORT CComputer - { - public: - CComputer(double jointProbabilityWeight, - double extremeProbabilityWeight, - std::size_t minExtremeSamples, - std::size_t maxExtremeSamples, - double maximumAnomalousProbability); - - //! Compute the overall anomaly score and aggregate probability. - bool operator()(const TDoubleVec &probabilities, - double &overallAnomalyScore, - double &overallProbability) const; - - private: - //! The weight to assign the joint probability. - double m_JointProbabilityWeight; - //! The weight to assign the extreme probability. - double m_ExtremeProbabilityWeight; - //! The minimum number of samples to include in the extreme - //! probability calculation. - std::size_t m_MinExtremeSamples; - //! The maximum number of samples to include in the extreme - //! probability calculation. - std::size_t m_MaxExtremeSamples; - //! The maximum probability which is deemed to be anomalous. - double m_MaximumAnomalousProbability; - }; - - //! \brief Manages the normalization of aggregate anomaly scores - //! based on historic values percentiles. - class MODEL_EXPORT CNormalizer : private core::CNonCopyable - { - public: - explicit CNormalizer(const CAnomalyDetectorModelConfig &config); - - //! Does this normalizer have enough information to normalize - //! anomaly scores? - bool canNormalize() const; - - //! This normalizes the aggregate scores, i.e. the sum - //! of \p scores, and scales all the scores by a constant - //! s.t. they sum to the normalized score. The normalized - //! score is in the range [0, 100]. - //! - //! \param[in,out] scores The raw scores to normalize. - //! Filled in with the normalized scores. - bool normalize(TDoubleVec &scores) const; - - //! As above but taking a single pre-aggregated \p score instead - //! of a vector of scores to be aggregated. - bool normalize(double &score) const; - - //! Estimate the quantile range including the \p score. - //! - //! \param[in] score The score to estimate. - //! \param[in] confidence The quantile central confidence interval. - //! \param[out] lowerBound The quantile lower bound of \p score. - //! \param[out] upperBound The quantile upper bound of \p score. - void quantile(double score, - double confidence, - double &lowerBound, - double &upperBound) const; - - //! Updates the quantile summaries with the total of - //! \p scores. - //! \return true if a big change occurred, otherwise false - bool updateQuantiles(const TDoubleVec &scores); - - //! Updates the quantile summaries with \p score. - //! \return true if a big change occurred, otherwise false - bool updateQuantiles(double score); - - //! Age the maximum score and quantile summary. - void propagateForwardByTime(double time); - - //! Report whether it would be possible to upgrade one version - //! of the quantiles to another. - static bool isUpgradable(const std::string &fromVersion, - const std::string &toVersion); - - //! Scale the maximum score and quantile summary. To be used - //! after upgrades if different versions of the product produce - //! different raw anomaly scores. - bool upgrade(const std::string &loadedVersion, - const std::string ¤tVersion); - - //! Set the normalizer back to how it was immediately after - //! construction - void clear(); - - //! \name Serialization - //@{ - //! Persist state by passing information to the supplied inserter - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - - //! Create from a state document. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); - //@} - - public: - //! Get a checksum of the object. - uint64_t checksum() const; - - private: - using TDoubleDoublePr = std::pair; - using TDoubleDoublePrVec = std::vector; - using TDoubleDoublePrVecCItr = TDoubleDoublePrVec::const_iterator; - using TGreaterDouble = std::greater; - using TMaxValueAccumulator = maths::CBasicStatistics::COrderStatisticsStack; - - private: - //! Used to convert raw scores in to integers so that we - //! can use the q-digest. - static const double DISCRETIZATION_FACTOR; - - //! We maintain a separate digest for the scores greater - //! than some high percentile (specified by this constant). - //! This is because we want the highest resolution in the - //! scores for the extreme (high quantile) raw scores. - static const double HIGH_PERCENTILE; - - //! The time between aging quantiles. These age at a slower - //! rate which we achieve by only aging them after a certain - //! period has elapsed. - static const double QUANTILE_DECAY_TIME; - - //! The increase in maximum score that will be considered a - //! big change when updating the quantiles. - static const double BIG_CHANGE_FACTOR; - - private: - //! Compute the discrete score from a raw score. - uint32_t discreteScore(double rawScore) const; - - //! Extract the raw score from a discrete score. - double rawScore(uint32_t discreteScore) const; - - private: - //! The percentile defining the largest noise score. - double m_NoisePercentile; - //! The multiplier used to estimate the anomaly threshold. - double m_NoiseMultiplier; - //! The normalized anomaly score knot points. - TDoubleDoublePrVec m_NormalizedScoreKnotPoints; - //! The maximum possible normalized score. - double m_MaximumNormalizedScore; - - //! The approximate HIGH_PERCENTILE percentile raw score. - uint32_t m_HighPercentileScore; - //! The number of scores less than the approximate - //! HIGH_PERCENTILE percentile raw score. - uint64_t m_HighPercentileCount; - - //! The maximum score ever received. - TMaxValueAccumulator m_MaxScore; - - //! The factor used to scale the quantile scores to convert - //! values per bucket length to values in absolute time. We - //! scale all values to an effective bucket length 30 mins. - //! So, a percentile of 99% would correspond to a 1 in 50 - //! hours event. - double m_BucketNormalizationFactor; - - //! A quantile summary of the raw scores. - maths::CQDigest m_RawScoreQuantileSummary; - //! A quantile summary of the raw score greater than the - //! approximate HIGH_PERCENTILE percentile raw score. - maths::CQDigest m_RawScoreHighQuantileSummary; - - //! The rate at which information is lost. - double m_DecayRate; - //! The time to when we next age the quantiles. - double m_TimeToQuantileDecay; - }; - - using TNormalizerP = boost::shared_ptr; + explicit CNormalizer(const CAnomalyDetectorModelConfig& config); - public: - //! Compute a joint anomaly score for a collection of probabilities. - //! - //! The joint anomaly score is assigned pro-rata to each member - //! of the collection based on its anomalousness. + //! Does this normalizer have enough information to normalize + //! anomaly scores? + bool canNormalize() const; + + //! This normalizes the aggregate scores, i.e. the sum + //! of \p scores, and scales all the scores by a constant + //! s.t. they sum to the normalized score. The normalized + //! score is in the range [0, 100]. //! - //! \param[in] jointProbabilityWeight The weight to assign the - //! joint probability. - //! \param[in] extremeProbabilityWeight The weight to assign the - //! extreme probability. - //! \param[in] minExtremeSamples The minimum number of samples to - //! include in the extreme probability calculation. - //! \param[in] maxExtremeSamples The maximum number of samples to - //! include in the extreme probability calculation. - //! \param[in] maximumAnomalousProbability The largest probability - //! with non-zero anomaly score. - //! \param[in] probabilities A collection of probabilities for - //! which to compute an aggregate probability and total score. - //! \param[out] overallAnomalyScore Filled in with the overall - //! anomaly score. - //! \param[out] overallProbability Filled in with the overall - //! probability. - static bool compute(double jointProbabilityWeight, - double extremeProbabilityWeight, - std::size_t minExtremeSamples, - std::size_t maxExtremeSamples, - double maximumAnomalousProbability, - const TDoubleVec &probabilities, - double &overallAnomalyScore, - double &overallProbability); - - //! Given a normalized score, find the most appropriate severity string - static const std::string &normalizedScoreToSeverity(double normalizedScore); - - //! Populate \p normalizer from its JSON representation - static bool normalizerFromJson(const std::string &json, - CNormalizer &normalizer); - - //! Populate \p normalizer from the restore traverser - static bool normalizerFromJson(core::CStateRestoreTraverser &traverser, - CNormalizer &normalizer); - - //! Convert \p normalizer to its JSON representation with a restoration - //! cue and description specified by the caller - static void normalizerToJson(const CNormalizer &normalizer, - const std::string &searchKey, - const std::string &cue, - const std::string &description, - core_t::TTime time, - std::string &json); -}; + //! \param[in,out] scores The raw scores to normalize. + //! Filled in with the normalized scores. + bool normalize(TDoubleVec& scores) const; + //! As above but taking a single pre-aggregated \p score instead + //! of a vector of scores to be aggregated. + bool normalize(double& score) const; + //! Estimate the quantile range including the \p score. + //! + //! \param[in] score The score to estimate. + //! \param[in] confidence The quantile central confidence interval. + //! \param[out] lowerBound The quantile lower bound of \p score. + //! \param[out] upperBound The quantile upper bound of \p score. + void quantile(double score, double confidence, double& lowerBound, double& upperBound) const; + + //! Updates the quantile summaries with the total of + //! \p scores. + //! \return true if a big change occurred, otherwise false + bool updateQuantiles(const TDoubleVec& scores); + + //! Updates the quantile summaries with \p score. + //! \return true if a big change occurred, otherwise false + bool updateQuantiles(double score); + + //! Age the maximum score and quantile summary. + void propagateForwardByTime(double time); + + //! Report whether it would be possible to upgrade one version + //! of the quantiles to another. + static bool isUpgradable(const std::string& fromVersion, const std::string& toVersion); + + //! Scale the maximum score and quantile summary. To be used + //! after upgrades if different versions of the product produce + //! different raw anomaly scores. + bool upgrade(const std::string& loadedVersion, const std::string& currentVersion); + + //! Set the normalizer back to how it was immediately after + //! construction + void clear(); + + //! \name Serialization + //@{ + //! Persist state by passing information to the supplied inserter + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Create from a state document. + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); + //@} + + public: + //! Get a checksum of the object. + uint64_t checksum() const; + + private: + using TDoubleDoublePr = std::pair; + using TDoubleDoublePrVec = std::vector; + using TDoubleDoublePrVecCItr = TDoubleDoublePrVec::const_iterator; + using TGreaterDouble = std::greater; + using TMaxValueAccumulator = maths::CBasicStatistics::COrderStatisticsStack; + + private: + //! Used to convert raw scores in to integers so that we + //! can use the q-digest. + static const double DISCRETIZATION_FACTOR; + + //! We maintain a separate digest for the scores greater + //! than some high percentile (specified by this constant). + //! This is because we want the highest resolution in the + //! scores for the extreme (high quantile) raw scores. + static const double HIGH_PERCENTILE; + + //! The time between aging quantiles. These age at a slower + //! rate which we achieve by only aging them after a certain + //! period has elapsed. + static const double QUANTILE_DECAY_TIME; + + //! The increase in maximum score that will be considered a + //! big change when updating the quantiles. + static const double BIG_CHANGE_FACTOR; + + private: + //! Compute the discrete score from a raw score. + uint32_t discreteScore(double rawScore) const; + + //! Extract the raw score from a discrete score. + double rawScore(uint32_t discreteScore) const; + + private: + //! The percentile defining the largest noise score. + double m_NoisePercentile; + //! The multiplier used to estimate the anomaly threshold. + double m_NoiseMultiplier; + //! The normalized anomaly score knot points. + TDoubleDoublePrVec m_NormalizedScoreKnotPoints; + //! The maximum possible normalized score. + double m_MaximumNormalizedScore; + + //! The approximate HIGH_PERCENTILE percentile raw score. + uint32_t m_HighPercentileScore; + //! The number of scores less than the approximate + //! HIGH_PERCENTILE percentile raw score. + uint64_t m_HighPercentileCount; + + //! The maximum score ever received. + TMaxValueAccumulator m_MaxScore; + + //! The factor used to scale the quantile scores to convert + //! values per bucket length to values in absolute time. We + //! scale all values to an effective bucket length 30 mins. + //! So, a percentile of 99% would correspond to a 1 in 50 + //! hours event. + double m_BucketNormalizationFactor; + + //! A quantile summary of the raw scores. + maths::CQDigest m_RawScoreQuantileSummary; + //! A quantile summary of the raw score greater than the + //! approximate HIGH_PERCENTILE percentile raw score. + maths::CQDigest m_RawScoreHighQuantileSummary; + + //! The rate at which information is lost. + double m_DecayRate; + //! The time to when we next age the quantiles. + double m_TimeToQuantileDecay; + }; + + using TNormalizerP = boost::shared_ptr; + +public: + //! Compute a joint anomaly score for a collection of probabilities. + //! + //! The joint anomaly score is assigned pro-rata to each member + //! of the collection based on its anomalousness. + //! + //! \param[in] jointProbabilityWeight The weight to assign the + //! joint probability. + //! \param[in] extremeProbabilityWeight The weight to assign the + //! extreme probability. + //! \param[in] minExtremeSamples The minimum number of samples to + //! include in the extreme probability calculation. + //! \param[in] maxExtremeSamples The maximum number of samples to + //! include in the extreme probability calculation. + //! \param[in] maximumAnomalousProbability The largest probability + //! with non-zero anomaly score. + //! \param[in] probabilities A collection of probabilities for + //! which to compute an aggregate probability and total score. + //! \param[out] overallAnomalyScore Filled in with the overall + //! anomaly score. + //! \param[out] overallProbability Filled in with the overall + //! probability. + static bool compute(double jointProbabilityWeight, + double extremeProbabilityWeight, + std::size_t minExtremeSamples, + std::size_t maxExtremeSamples, + double maximumAnomalousProbability, + const TDoubleVec& probabilities, + double& overallAnomalyScore, + double& overallProbability); + + //! Given a normalized score, find the most appropriate severity string + static const std::string& normalizedScoreToSeverity(double normalizedScore); + + //! Populate \p normalizer from its JSON representation + static bool normalizerFromJson(const std::string& json, CNormalizer& normalizer); + + //! Populate \p normalizer from the restore traverser + static bool normalizerFromJson(core::CStateRestoreTraverser& traverser, CNormalizer& normalizer); + + //! Convert \p normalizer to its JSON representation with a restoration + //! cue and description specified by the caller + static void normalizerToJson(const CNormalizer& normalizer, + const std::string& searchKey, + const std::string& cue, + const std::string& description, + core_t::TTime time, + std::string& json); +}; } } #endif // INCLUDED_ml_model_CAnomalyScore_h - diff --git a/include/model/CBucketGatherer.h b/include/model/CBucketGatherer.h index 6d814781c8..1907aab1b1 100644 --- a/include/model/CBucketGatherer.h +++ b/include/model/CBucketGatherer.h @@ -7,12 +7,12 @@ #ifndef INCLUDED_ml_model_CBucketGatherer_h #define INCLUDED_ml_model_CBucketGatherer_h -#include -#include #include #include -#include +#include +#include #include +#include #include #include @@ -22,10 +22,10 @@ #include #include +#include #include #include #include -#include #include #include @@ -34,16 +34,12 @@ #include - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace model -{ +namespace model { class CDataGatherer; class CEventData; class CSearchKey; @@ -61,431 +57,396 @@ class CResourceMonitor; //! This functionality has been separated from the CDataGatherer in order //! to allow the CDataGatherer to support multiple overlapping buckets and //! buckets with different time spans. -class MODEL_EXPORT CBucketGatherer -{ - public: - using TDoubleVec = std::vector; - using TDouble1Vec = core::CSmallVector; - using TSizeVec = std::vector; - using TStrVec = std::vector; - using TStrVecCItr = TStrVec::const_iterator; - using TStrCPtrVec = std::vector; - using TSizeUInt64Pr = std::pair; - using TSizeUInt64PrVec = std::vector; - using TFeatureVec = model_t::TFeatureVec; - using TOptionalDouble = boost::optional; - using TSizeSizePr = std::pair; - using TSizeSizePrUInt64Pr = std::pair; - using TSizeSizePrUInt64PrVec = std::vector; - using TDictionary = core::CCompressedDictionary<2>; - using TWordSizeUMap = TDictionary::CWordUMap::Type; - using TWordSizeUMapItr = TWordSizeUMap::iterator; - using TWordSizeUMapCItr = TWordSizeUMap::const_iterator; - using TSizeSizePrUInt64UMap = boost::unordered_map; - using TSizeSizePrUInt64UMapItr = TSizeSizePrUInt64UMap::iterator; - using TSizeSizePrUInt64UMapCItr = TSizeSizePrUInt64UMap::const_iterator; - using TSizeSizePrUInt64UMapQueue = CBucketQueue; - using TTimeSizeSizePrUInt64UMapMap = std::map; - using TSizeSizePrUInt64UMapQueueItr = TSizeSizePrUInt64UMapQueue::iterator; - using TSizeSizePrUInt64UMapQueueCItr = TSizeSizePrUInt64UMapQueue::const_iterator; - using TSizeSizePrUInt64UMapQueueCRItr = TSizeSizePrUInt64UMapQueue::const_reverse_iterator; - using TSizeSizePrUSet = boost::unordered_set; - using TSizeSizePrUSetCItr = TSizeSizePrUSet::const_iterator; - using TSizeSizePrUSetQueue = CBucketQueue; - using TTimeSizeSizePrUSetMap = std::map; - using TSizeSizePrUSetQueueCItr = TSizeSizePrUSetQueue::const_iterator; - using TStoredStringPtrVec = std::vector; - using TSizeSizePrStoredStringPtrPr = std::pair; - - //! \brief Hashes a ((size_t, size_t), string*) pair. - struct MODEL_EXPORT SSizeSizePrStoredStringPtrPrHash - { - std::size_t operator()(const TSizeSizePrStoredStringPtrPr &key) const - { - uint64_t seed = core::CHashing::hashCombine(static_cast(key.first.first), - static_cast(key.first.second)); - return core::CHashing::hashCombine(seed, s_Hasher(*key.second)); - } - core::CHashing::CMurmurHash2String s_Hasher; - }; - - //! \brief Checks two ((size_t, size_t), string*) pairs for equality. - struct MODEL_EXPORT SSizeSizePrStoredStringPtrPrEqual - { - bool operator()(const TSizeSizePrStoredStringPtrPr &lhs, - const TSizeSizePrStoredStringPtrPr &rhs) const - { - return lhs.first == rhs.first && *lhs.second == *rhs.second; - } - }; - - using TSizeSizePrStoredStringPtrPrUInt64UMap = - boost::unordered_map; - using TSizeSizePrStoredStringPtrPrUInt64UMapCItr = TSizeSizePrStoredStringPtrPrUInt64UMap::const_iterator; - using TSizeSizePrStoredStringPtrPrUInt64UMapItr = TSizeSizePrStoredStringPtrPrUInt64UMap::iterator; - using TSizeSizePrStoredStringPtrPrUInt64UMapVec = std::vector; - using TSizeSizePrStoredStringPtrPrUInt64UMapVecQueue = CBucketQueue; - using TSizeSizePrStoredStringPtrPrUInt64UMapVecCItr = TSizeSizePrStoredStringPtrPrUInt64UMapVec::const_iterator; - using TTimeSizeSizePrStoredStringPtrPrUInt64UMapVecMap = std::map; - using TSearchKeyCRef = boost::reference_wrapper; - using TFeatureAnyPr = std::pair; - using TFeatureAnyPrVec = std::vector; - using TMetricCategoryVec = std::vector; - using TTimeVec = std::vector; - using TTimeVecCItr = TTimeVec::const_iterator; - - public: - static const std::string EVENTRATE_BUCKET_GATHERER_TAG; - static const std::string METRIC_BUCKET_GATHERER_TAG; - - public: - //! \name Life-cycle - //@{ - //! Create a new data series gatherer. - //! - //! \param[in] dataGatherer The owning data gatherer. - //! \param[in] startTime The start of the time interval - //! for which to gather data. - CBucketGatherer(CDataGatherer &dataGatherer, - core_t::TTime startTime); - - //! Create a copy that will result in the same persisted state as the - //! original. This is effectively a copy constructor that creates a - //! copy that's only valid for a single purpose. The boolean flag is - //! redundant except to create a signature that will not be mistaken for - //! a general purpose copy constructor. - CBucketGatherer(bool isForPersistence, - const CBucketGatherer &other); - - virtual ~CBucketGatherer() = default; - //@} - - //! \name Persistence - //@{ - //! Persist state by passing information to the supplied inserter - virtual void baseAcceptPersistInserter(core::CStatePersistInserter &inserter) const; - - //! Restore the state - virtual bool baseAcceptRestoreTraverser(core::CStateRestoreTraverser &traverser); - - //! Create a clone of this data gatherer that will result in the same - //! persisted state. The clone may be incomplete in ways that do not - //! affect the persisted representation, and must not be used for any - //! other purpose. - //! \warning The caller owns the object returned. - virtual CBucketGatherer *cloneForPersistence() const = 0; - - //! The persistence tag name of the subclass. - virtual const std::string& persistenceTag() const = 0; - //@} - - //! \name Fields - //@{ - //! This is the common field in all searches "along" which the - //! probabilities are aggregated, i.e. the "by" field name for - //! individual models and the "over" field name for population - //! models. - virtual const std::string &personFieldName() const = 0; - - //! Get the attribute field name if one exists. - virtual const std::string &attributeFieldName() const = 0; - - //! Get the name of the field containing the metric value. - virtual const std::string &valueFieldName() const = 0; - - //! Get an iterator at the beginning the influencing field names. - virtual TStrVecCItr beginInfluencers() const = 0; - - //! Get an iterator at the end of the influencing field names. - virtual TStrVecCItr endInfluencers() const = 0; - - //! Get the fields for which to gather data. - //! - //! This defines the fields to extract from a record. These include - //! the fields which define the categories whose counts are being - //! analyzed, the fields containing metric series names and values - //! and the fields defining a population. - virtual const TStrVec &fieldsOfInterest() const = 0; - //@} - - //! Get a description of the component searches. - virtual std::string description() const = 0; - - //! \name Update - //@{ - //! Process the specified fields. - //! - //! This adds people and attributes as necessary and fills out the - //! event data from \p fieldValues. - virtual bool processFields(const TStrCPtrVec &fieldValues, - CEventData &result, - CResourceMonitor &resourceMonitor) = 0; - - //! Record the arrival of \p data at \p time. - bool addEventData(CEventData &data); - - //! Roll time forwards to \p time. - void timeNow(core_t::TTime time); - - //! Roll time to the end of the bucket that is latency after the sampled bucket. - void sampleNow(core_t::TTime sampleBucketStart); - - //! Roll time to the end of the bucket that is latency after the sampled bucket - //! without performing any updates that impact the model. - void skipSampleNow(core_t::TTime sampleBucketStart); - //@} - - //! \name People - //@{ - //! Get the non-zero counts by person for the bucketing interval - //! containing \p time. - //! - //! \param[in] time The time of interest. - //! \param[out] result Filled in with the non-zero counts by person. - //! The first element is the person identifier and the second their - //! count in the bucketing interval. The result is sorted by person. - //! \note We expect the non-zero counts to be sparse on the space - //! of people so use a sparse encoding: - //!
-        //!   \f$ pid \leftarrow c\f$
-        //! 
- //! where,\n - //! \f$pid\f$ is the person identifier,\n - //! \f$c\f$ is the count for the person. - void personNonZeroCounts(core_t::TTime time, - TSizeUInt64PrVec &result) const; - - //! Stop gathering data on the people identified by \p peopleToRemove. - virtual void recyclePeople(const TSizeVec &peopleToRemove) = 0; - - //! Remove all traces of people whose identifiers are greater than - //! or equal to \p lowestPersonToRemove. - virtual void removePeople(std::size_t lowestPersonToRemove) = 0; - //@} - - //! \name Attribute - //@{ - //! Stop gathering data on the attributes identified by \p attributesToRemove. - virtual void recycleAttributes(const TSizeVec &attributesToRemove) = 0; - - //! Remove all traces of attributes whose identifiers are greater than - //! or equal to \p lowestAttributeToRemove. - virtual void removeAttributes(std::size_t lowestAttributeToRemove) = 0; - //@} - - //! \name Time - //@{ - //! Get the start of the current bucketing time interval. - core_t::TTime currentBucketStartTime() const; - - //! Set the start of the current bucketing time interval. - void currentBucketStartTime(core_t::TTime time); - - //! The earliest time for which data can still arrive. - core_t::TTime earliestBucketStartTime() const; - - //! Get the length of the bucketing time interval. - core_t::TTime bucketLength() const; - - //! Check if data is available at \p time. - bool dataAvailable(core_t::TTime time) const; - - //! For each bucket in the interval [\p startTime, \p endTime], - //! validate that it can be sampled and increment \p startTime - //! to the first valid bucket or \p endTime if no valid buckets - //! exist. - //! - //! \param[in,out] startTime The start of the interval to sample. - //! \param[in] endTime The end of the interval to sample. - bool validateSampleTimes(core_t::TTime &startTime, - core_t::TTime endTime) const; - - //! Print the current bucket. - std::string printCurrentBucket() const; - //@} - - //! \name Counts - //@{ - //! Get the non-zero (person, attribute) pair counts in the - //! bucketing interval corresponding to the given time. - const TSizeSizePrUInt64UMap &bucketCounts(core_t::TTime time) const; - - //! Get the non-zero (person, attribute) pair counts for each - //! value of influencing field. - const TSizeSizePrStoredStringPtrPrUInt64UMapVec &influencerCounts(core_t::TTime time) const; - //@} - - //! Get the checksum of this gatherer. - virtual uint64_t checksum() const = 0; - - //! Debug the memory used by this component. - virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const = 0; - - //! Get the memory used by this component. - virtual std::size_t memoryUsage() const = 0; - - //! Get the static size of this object. - virtual std::size_t staticSize() const = 0; - - //! Clear this data gatherer. - virtual void clear() = 0; - - //! Reset bucket and return true if bucket was successfully - //! reset or false otherwise. - virtual bool resetBucket(core_t::TTime bucketStart) = 0; - - //! Release memory that is no longer needed - virtual void releaseMemory(core_t::TTime samplingCutoffTime) = 0; - - //! Remove the values in queue for the people or attributes - //! in \p toRemove. - //! - //! \tparam T This must be an associative array from person - //! id and/or attribute id to some corresponding value. - template - static void remove(const TSizeVec &toRemove, - const F &extractId, - CBucketQueue &queue) - { - for (auto bucketItr = queue.begin(); bucketItr != queue.end(); ++bucketItr) - { - T &bucket = *bucketItr; - for (auto i = bucket.begin(); i != bucket.end(); /**/) - { - if (std::binary_search(toRemove.begin(), toRemove.end(), extractId(*i))) - { - i = bucket.erase(i); - } - else - { - ++i; - } +class MODEL_EXPORT CBucketGatherer { +public: + using TDoubleVec = std::vector; + using TDouble1Vec = core::CSmallVector; + using TSizeVec = std::vector; + using TStrVec = std::vector; + using TStrVecCItr = TStrVec::const_iterator; + using TStrCPtrVec = std::vector; + using TSizeUInt64Pr = std::pair; + using TSizeUInt64PrVec = std::vector; + using TFeatureVec = model_t::TFeatureVec; + using TOptionalDouble = boost::optional; + using TSizeSizePr = std::pair; + using TSizeSizePrUInt64Pr = std::pair; + using TSizeSizePrUInt64PrVec = std::vector; + using TDictionary = core::CCompressedDictionary<2>; + using TWordSizeUMap = TDictionary::CWordUMap::Type; + using TWordSizeUMapItr = TWordSizeUMap::iterator; + using TWordSizeUMapCItr = TWordSizeUMap::const_iterator; + using TSizeSizePrUInt64UMap = boost::unordered_map; + using TSizeSizePrUInt64UMapItr = TSizeSizePrUInt64UMap::iterator; + using TSizeSizePrUInt64UMapCItr = TSizeSizePrUInt64UMap::const_iterator; + using TSizeSizePrUInt64UMapQueue = CBucketQueue; + using TTimeSizeSizePrUInt64UMapMap = std::map; + using TSizeSizePrUInt64UMapQueueItr = TSizeSizePrUInt64UMapQueue::iterator; + using TSizeSizePrUInt64UMapQueueCItr = TSizeSizePrUInt64UMapQueue::const_iterator; + using TSizeSizePrUInt64UMapQueueCRItr = TSizeSizePrUInt64UMapQueue::const_reverse_iterator; + using TSizeSizePrUSet = boost::unordered_set; + using TSizeSizePrUSetCItr = TSizeSizePrUSet::const_iterator; + using TSizeSizePrUSetQueue = CBucketQueue; + using TTimeSizeSizePrUSetMap = std::map; + using TSizeSizePrUSetQueueCItr = TSizeSizePrUSetQueue::const_iterator; + using TStoredStringPtrVec = std::vector; + using TSizeSizePrStoredStringPtrPr = std::pair; + + //! \brief Hashes a ((size_t, size_t), string*) pair. + struct MODEL_EXPORT SSizeSizePrStoredStringPtrPrHash { + std::size_t operator()(const TSizeSizePrStoredStringPtrPr& key) const { + uint64_t seed = core::CHashing::hashCombine(static_cast(key.first.first), static_cast(key.first.second)); + return core::CHashing::hashCombine(seed, s_Hasher(*key.second)); + } + core::CHashing::CMurmurHash2String s_Hasher; + }; + + //! \brief Checks two ((size_t, size_t), string*) pairs for equality. + struct MODEL_EXPORT SSizeSizePrStoredStringPtrPrEqual { + bool operator()(const TSizeSizePrStoredStringPtrPr& lhs, const TSizeSizePrStoredStringPtrPr& rhs) const { + return lhs.first == rhs.first && *lhs.second == *rhs.second; + } + }; + + using TSizeSizePrStoredStringPtrPrUInt64UMap = + boost::unordered_map; + using TSizeSizePrStoredStringPtrPrUInt64UMapCItr = TSizeSizePrStoredStringPtrPrUInt64UMap::const_iterator; + using TSizeSizePrStoredStringPtrPrUInt64UMapItr = TSizeSizePrStoredStringPtrPrUInt64UMap::iterator; + using TSizeSizePrStoredStringPtrPrUInt64UMapVec = std::vector; + using TSizeSizePrStoredStringPtrPrUInt64UMapVecQueue = CBucketQueue; + using TSizeSizePrStoredStringPtrPrUInt64UMapVecCItr = TSizeSizePrStoredStringPtrPrUInt64UMapVec::const_iterator; + using TTimeSizeSizePrStoredStringPtrPrUInt64UMapVecMap = std::map; + using TSearchKeyCRef = boost::reference_wrapper; + using TFeatureAnyPr = std::pair; + using TFeatureAnyPrVec = std::vector; + using TMetricCategoryVec = std::vector; + using TTimeVec = std::vector; + using TTimeVecCItr = TTimeVec::const_iterator; + +public: + static const std::string EVENTRATE_BUCKET_GATHERER_TAG; + static const std::string METRIC_BUCKET_GATHERER_TAG; + +public: + //! \name Life-cycle + //@{ + //! Create a new data series gatherer. + //! + //! \param[in] dataGatherer The owning data gatherer. + //! \param[in] startTime The start of the time interval + //! for which to gather data. + CBucketGatherer(CDataGatherer& dataGatherer, core_t::TTime startTime); + + //! Create a copy that will result in the same persisted state as the + //! original. This is effectively a copy constructor that creates a + //! copy that's only valid for a single purpose. The boolean flag is + //! redundant except to create a signature that will not be mistaken for + //! a general purpose copy constructor. + CBucketGatherer(bool isForPersistence, const CBucketGatherer& other); + + virtual ~CBucketGatherer() = default; + //@} + + //! \name Persistence + //@{ + //! Persist state by passing information to the supplied inserter + virtual void baseAcceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Restore the state + virtual bool baseAcceptRestoreTraverser(core::CStateRestoreTraverser& traverser); + + //! Create a clone of this data gatherer that will result in the same + //! persisted state. The clone may be incomplete in ways that do not + //! affect the persisted representation, and must not be used for any + //! other purpose. + //! \warning The caller owns the object returned. + virtual CBucketGatherer* cloneForPersistence() const = 0; + + //! The persistence tag name of the subclass. + virtual const std::string& persistenceTag() const = 0; + //@} + + //! \name Fields + //@{ + //! This is the common field in all searches "along" which the + //! probabilities are aggregated, i.e. the "by" field name for + //! individual models and the "over" field name for population + //! models. + virtual const std::string& personFieldName() const = 0; + + //! Get the attribute field name if one exists. + virtual const std::string& attributeFieldName() const = 0; + + //! Get the name of the field containing the metric value. + virtual const std::string& valueFieldName() const = 0; + + //! Get an iterator at the beginning the influencing field names. + virtual TStrVecCItr beginInfluencers() const = 0; + + //! Get an iterator at the end of the influencing field names. + virtual TStrVecCItr endInfluencers() const = 0; + + //! Get the fields for which to gather data. + //! + //! This defines the fields to extract from a record. These include + //! the fields which define the categories whose counts are being + //! analyzed, the fields containing metric series names and values + //! and the fields defining a population. + virtual const TStrVec& fieldsOfInterest() const = 0; + //@} + + //! Get a description of the component searches. + virtual std::string description() const = 0; + + //! \name Update + //@{ + //! Process the specified fields. + //! + //! This adds people and attributes as necessary and fills out the + //! event data from \p fieldValues. + virtual bool processFields(const TStrCPtrVec& fieldValues, CEventData& result, CResourceMonitor& resourceMonitor) = 0; + + //! Record the arrival of \p data at \p time. + bool addEventData(CEventData& data); + + //! Roll time forwards to \p time. + void timeNow(core_t::TTime time); + + //! Roll time to the end of the bucket that is latency after the sampled bucket. + void sampleNow(core_t::TTime sampleBucketStart); + + //! Roll time to the end of the bucket that is latency after the sampled bucket + //! without performing any updates that impact the model. + void skipSampleNow(core_t::TTime sampleBucketStart); + //@} + + //! \name People + //@{ + //! Get the non-zero counts by person for the bucketing interval + //! containing \p time. + //! + //! \param[in] time The time of interest. + //! \param[out] result Filled in with the non-zero counts by person. + //! The first element is the person identifier and the second their + //! count in the bucketing interval. The result is sorted by person. + //! \note We expect the non-zero counts to be sparse on the space + //! of people so use a sparse encoding: + //!
+    //!   \f$ pid \leftarrow c\f$
+    //! 
+ //! where,\n + //! \f$pid\f$ is the person identifier,\n + //! \f$c\f$ is the count for the person. + void personNonZeroCounts(core_t::TTime time, TSizeUInt64PrVec& result) const; + + //! Stop gathering data on the people identified by \p peopleToRemove. + virtual void recyclePeople(const TSizeVec& peopleToRemove) = 0; + + //! Remove all traces of people whose identifiers are greater than + //! or equal to \p lowestPersonToRemove. + virtual void removePeople(std::size_t lowestPersonToRemove) = 0; + //@} + + //! \name Attribute + //@{ + //! Stop gathering data on the attributes identified by \p attributesToRemove. + virtual void recycleAttributes(const TSizeVec& attributesToRemove) = 0; + + //! Remove all traces of attributes whose identifiers are greater than + //! or equal to \p lowestAttributeToRemove. + virtual void removeAttributes(std::size_t lowestAttributeToRemove) = 0; + //@} + + //! \name Time + //@{ + //! Get the start of the current bucketing time interval. + core_t::TTime currentBucketStartTime() const; + + //! Set the start of the current bucketing time interval. + void currentBucketStartTime(core_t::TTime time); + + //! The earliest time for which data can still arrive. + core_t::TTime earliestBucketStartTime() const; + + //! Get the length of the bucketing time interval. + core_t::TTime bucketLength() const; + + //! Check if data is available at \p time. + bool dataAvailable(core_t::TTime time) const; + + //! For each bucket in the interval [\p startTime, \p endTime], + //! validate that it can be sampled and increment \p startTime + //! to the first valid bucket or \p endTime if no valid buckets + //! exist. + //! + //! \param[in,out] startTime The start of the interval to sample. + //! \param[in] endTime The end of the interval to sample. + bool validateSampleTimes(core_t::TTime& startTime, core_t::TTime endTime) const; + + //! Print the current bucket. + std::string printCurrentBucket() const; + //@} + + //! \name Counts + //@{ + //! Get the non-zero (person, attribute) pair counts in the + //! bucketing interval corresponding to the given time. + const TSizeSizePrUInt64UMap& bucketCounts(core_t::TTime time) const; + + //! Get the non-zero (person, attribute) pair counts for each + //! value of influencing field. + const TSizeSizePrStoredStringPtrPrUInt64UMapVec& influencerCounts(core_t::TTime time) const; + //@} + + //! Get the checksum of this gatherer. + virtual uint64_t checksum() const = 0; + + //! Debug the memory used by this component. + virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const = 0; + + //! Get the memory used by this component. + virtual std::size_t memoryUsage() const = 0; + + //! Get the static size of this object. + virtual std::size_t staticSize() const = 0; + + //! Clear this data gatherer. + virtual void clear() = 0; + + //! Reset bucket and return true if bucket was successfully + //! reset or false otherwise. + virtual bool resetBucket(core_t::TTime bucketStart) = 0; + + //! Release memory that is no longer needed + virtual void releaseMemory(core_t::TTime samplingCutoffTime) = 0; + + //! Remove the values in queue for the people or attributes + //! in \p toRemove. + //! + //! \tparam T This must be an associative array from person + //! id and/or attribute id to some corresponding value. + template + static void remove(const TSizeVec& toRemove, const F& extractId, CBucketQueue& queue) { + for (auto bucketItr = queue.begin(); bucketItr != queue.end(); ++bucketItr) { + T& bucket = *bucketItr; + for (auto i = bucket.begin(); i != bucket.end(); /**/) { + if (std::binary_search(toRemove.begin(), toRemove.end(), extractId(*i))) { + i = bucket.erase(i); + } else { + ++i; } } } - - //! Remove the values in queue for the people or attributes - //! in \p toRemove. - //! - //! \tparam T This must be a vector of associative array from person - //! id and/or attribute id to some corresponding value. - template - static void remove(const TSizeVec &toRemove, - const F &extractId, - CBucketQueue > &queue) - { - for (auto bucketItr = queue.begin(); bucketItr != queue.end(); ++bucketItr) - { - for (std::size_t i = 0u; i < bucketItr->size(); ++i) - { - T &bucket = (*bucketItr)[i]; - for (auto j = bucket.begin(); j != bucket.end(); /**/) - { - if (std::binary_search(toRemove.begin(), toRemove.end(), extractId(j->first))) - { - j = bucket.erase(j); - } - else - { - ++j; - } + } + + //! Remove the values in queue for the people or attributes + //! in \p toRemove. + //! + //! \tparam T This must be a vector of associative array from person + //! id and/or attribute id to some corresponding value. + template + static void remove(const TSizeVec& toRemove, const F& extractId, CBucketQueue>& queue) { + for (auto bucketItr = queue.begin(); bucketItr != queue.end(); ++bucketItr) { + for (std::size_t i = 0u; i < bucketItr->size(); ++i) { + T& bucket = (*bucketItr)[i]; + for (auto j = bucket.begin(); j != bucket.end(); /**/) { + if (std::binary_search(toRemove.begin(), toRemove.end(), extractId(j->first))) { + j = bucket.erase(j); + } else { + ++j; } } } } - - //! Get the raw data for all features for the bucketing time interval - //! containing \p time. - //! - //! \param[in] time The time of interest. - //! \param[out] result Filled in with the feature data at \p time. - virtual void featureData(core_t::TTime time, core_t::TTime bucketLength, - TFeatureAnyPrVec &result) const = 0; - - //! Get a reference to the owning data gatherer. - const CDataGatherer &dataGatherer() const; - - //! Has this pid/cid pair had only explicit null records? - bool hasExplicitNullsOnly(core_t::TTime time, std::size_t pid, std::size_t cid) const; - - //! Create samples if possible for the bucket pointed out by \p time. - virtual void sample(core_t::TTime time); - - private: - //! Resize the necessary data structures so they can hold values - //! for the person and/or attribute identified by \p pid and \p cid, - //! respectively. - //! - //! \param[in] pid The identifier of the person to accommodate. - //! \param[in] cid The identifier of the attribute to accommodate. - virtual void resize(std::size_t pid, std::size_t cid) = 0; - - //! Record the arrival of \p values for attribute identified - //! by \p cid and person identified by \p pid. - //! - //! \param[in] pid The identifier of the person who generated - //! the value. - //! \param[in] cid The identifier of the value's attribute. - //! \param[in] time The time of the \p values. - //! \param[in] values The metric statistic value(s). - //! \param[in] count The number of measurements in the metric - //! statistic. - //! \param[in] stringValue The value for the function string argument - //! if there is one or null. - //! \param[in] influences The influencing field values which label - //! the value. - virtual void addValue(std::size_t pid, - std::size_t cid, - core_t::TTime time, - const CEventData::TDouble1VecArray &values, - std::size_t count, - const CEventData::TOptionalStr &stringValue, - const TStoredStringPtrVec &influences) = 0; - - //! Handle the start of a new bucketing interval. - virtual void startNewBucket(core_t::TTime time, bool skipUpdates) = 0; - - //! Roll time forwards to \p time and update depending on \p skipUpdates - void hiddenTimeNow(core_t::TTime time, bool skipUpdates); - - protected: - //! Reference to the owning data gatherer - CDataGatherer &m_DataGatherer; - - private: - //! The earliest time of any record that has arrived. - core_t::TTime m_EarliestTime; - - //! The start of the current bucketing interval. - core_t::TTime m_BucketStart; - - //! The non-zero (person, attribute) pair counts in the current - //! bucketing interval. - TSizeSizePrUInt64UMapQueue m_PersonAttributeCounts; - - //! The counts for longer bucketing intervals. - // TODO This is not queued so can't handle out of order data. - TTimeSizeSizePrUInt64UMapMap m_MultiBucketPersonAttributeCounts; - - //! A set per bucket that contains a (pid,cid) pair if at least - //! one explicit null record has been seen. - TSizeSizePrUSetQueue m_PersonAttributeExplicitNulls; - - //! The explicit nulls for longer bucketing intervals. - // TODO This is not queued so can't handle out of order data. - TTimeSizeSizePrUSetMap m_MultiBucketPersonAttributeExplicitNulls; - - //! The influencing field value counts per person and/or attribute. - TSizeSizePrStoredStringPtrPrUInt64UMapVecQueue m_InfluencerCounts; - - //! The influencing field value counts for longer bucketing intervals. - // TODO This is not queued so can't handle out of order data. - TTimeSizeSizePrStoredStringPtrPrUInt64UMapVecMap m_MultiBucketInfluencerCounts; + } + + //! Get the raw data for all features for the bucketing time interval + //! containing \p time. + //! + //! \param[in] time The time of interest. + //! \param[out] result Filled in with the feature data at \p time. + virtual void featureData(core_t::TTime time, core_t::TTime bucketLength, TFeatureAnyPrVec& result) const = 0; + + //! Get a reference to the owning data gatherer. + const CDataGatherer& dataGatherer() const; + + //! Has this pid/cid pair had only explicit null records? + bool hasExplicitNullsOnly(core_t::TTime time, std::size_t pid, std::size_t cid) const; + + //! Create samples if possible for the bucket pointed out by \p time. + virtual void sample(core_t::TTime time); + +private: + //! Resize the necessary data structures so they can hold values + //! for the person and/or attribute identified by \p pid and \p cid, + //! respectively. + //! + //! \param[in] pid The identifier of the person to accommodate. + //! \param[in] cid The identifier of the attribute to accommodate. + virtual void resize(std::size_t pid, std::size_t cid) = 0; + + //! Record the arrival of \p values for attribute identified + //! by \p cid and person identified by \p pid. + //! + //! \param[in] pid The identifier of the person who generated + //! the value. + //! \param[in] cid The identifier of the value's attribute. + //! \param[in] time The time of the \p values. + //! \param[in] values The metric statistic value(s). + //! \param[in] count The number of measurements in the metric + //! statistic. + //! \param[in] stringValue The value for the function string argument + //! if there is one or null. + //! \param[in] influences The influencing field values which label + //! the value. + virtual void addValue(std::size_t pid, + std::size_t cid, + core_t::TTime time, + const CEventData::TDouble1VecArray& values, + std::size_t count, + const CEventData::TOptionalStr& stringValue, + const TStoredStringPtrVec& influences) = 0; + + //! Handle the start of a new bucketing interval. + virtual void startNewBucket(core_t::TTime time, bool skipUpdates) = 0; + + //! Roll time forwards to \p time and update depending on \p skipUpdates + void hiddenTimeNow(core_t::TTime time, bool skipUpdates); + +protected: + //! Reference to the owning data gatherer + CDataGatherer& m_DataGatherer; + +private: + //! The earliest time of any record that has arrived. + core_t::TTime m_EarliestTime; + + //! The start of the current bucketing interval. + core_t::TTime m_BucketStart; + + //! The non-zero (person, attribute) pair counts in the current + //! bucketing interval. + TSizeSizePrUInt64UMapQueue m_PersonAttributeCounts; + + //! The counts for longer bucketing intervals. + // TODO This is not queued so can't handle out of order data. + TTimeSizeSizePrUInt64UMapMap m_MultiBucketPersonAttributeCounts; + + //! A set per bucket that contains a (pid,cid) pair if at least + //! one explicit null record has been seen. + TSizeSizePrUSetQueue m_PersonAttributeExplicitNulls; + + //! The explicit nulls for longer bucketing intervals. + // TODO This is not queued so can't handle out of order data. + TTimeSizeSizePrUSetMap m_MultiBucketPersonAttributeExplicitNulls; + + //! The influencing field value counts per person and/or attribute. + TSizeSizePrStoredStringPtrPrUInt64UMapVecQueue m_InfluencerCounts; + + //! The influencing field value counts for longer bucketing intervals. + // TODO This is not queued so can't handle out of order data. + TTimeSizeSizePrStoredStringPtrPrUInt64UMapVecMap m_MultiBucketInfluencerCounts; }; - } } diff --git a/include/model/CBucketQueue.h b/include/model/CBucketQueue.h index 279c65e817..f2e3d32ca8 100644 --- a/include/model/CBucketQueue.h +++ b/include/model/CBucketQueue.h @@ -8,20 +8,18 @@ #define INCLUDED_ml_model_CBucketQueue_h #include -#include #include -#include +#include #include +#include #include #include #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { //! \brief A fixed size queue with the purpose of storing bucket-associated data. //! @@ -32,380 +30,254 @@ namespace model //! The queue manages the matching of the time to the corresponding //! bucket. template -class CBucketQueue -{ +class CBucketQueue { +public: + using TQueue = boost::circular_buffer; + using value_type = typename TQueue::value_type; + using iterator = typename TQueue::iterator; + using const_iterator = typename TQueue::const_iterator; + using const_reverse_iterator = typename TQueue::const_reverse_iterator; + +public: + static const std::string BUCKET_TAG; + static const std::string INDEX_TAG; + + //! \brief Wraps persist and restore so they can be used with boost::bind. + template + class CSerializer { public: - using TQueue = boost::circular_buffer; - using value_type = typename TQueue::value_type; - using iterator = typename TQueue::iterator; - using const_iterator = typename TQueue::const_iterator; - using const_reverse_iterator = typename TQueue::const_reverse_iterator; - - public: - static const std::string BUCKET_TAG; - static const std::string INDEX_TAG; - - //! \brief Wraps persist and restore so they can be used with boost::bind. - template - class CSerializer - { - public: - CSerializer(const T &initial = T(), - const F &serializer = F()) : - m_InitialValue(initial), - m_Serializer(serializer) - {} - - void operator()(const CBucketQueue &queue, - core::CStatePersistInserter &inserter) const - { - queue.persist(m_Serializer, inserter); - } - - bool operator()(CBucketQueue &queue, - core::CStateRestoreTraverser &traverser) const - { - return queue.restore(m_Serializer, m_InitialValue, traverser); - } - - private: - T m_InitialValue; - F m_Serializer; - }; + CSerializer(const T& initial = T(), const F& serializer = F()) : m_InitialValue(initial), m_Serializer(serializer) {} + void operator()(const CBucketQueue& queue, core::CStatePersistInserter& inserter) const { queue.persist(m_Serializer, inserter); } - public: - //! Constructs a new queue of size \p latencyBuckets, with bucket-length - //! set to \p bucketLength and initialises it with \p latencyBuckets + 1 - //! up to the bucket corresponding to the supplied \p latestBucketStart. - //! - //! \param[in] latencyBuckets The number of buckets that are within - //! the latency window. - //! \param[in] bucketLength The bucket length. - //! \param[in] latestBucketStart The start time of the latest bucket. - CBucketQueue(std::size_t latencyBuckets, - core_t::TTime bucketLength, - core_t::TTime latestBucketStart, - T initial = T()) : - m_Queue(latencyBuckets + 1), - m_LatestBucketEnd(latestBucketStart + bucketLength - 1), - m_BucketLength(bucketLength) - { - this->fill(initial); - LOG_TRACE("Queue created :"); - LOG_TRACE("Bucket length = " << m_BucketLength); - LOG_TRACE("LatestBucketEnd = " << m_LatestBucketEnd); + bool operator()(CBucketQueue& queue, core::CStateRestoreTraverser& traverser) const { + return queue.restore(m_Serializer, m_InitialValue, traverser); } - //! Pushed an item to the queue and moves the time forward - //! by bucket length. If the \p time is earlier than the - //! latest bucket end, the push operation is ignored. - //! - //! \param[in] item The item to be pushed in the queue. - //! \param[in] time The time to which the item corresponds. - void push(const T &item, core_t::TTime time) - { - if (time <= m_LatestBucketEnd) - { - LOG_ERROR("Push was called with early time = " << time << ", latest bucket end time = " << m_LatestBucketEnd); - return; - } - m_LatestBucketEnd += m_BucketLength; - this->push(item); + private: + T m_InitialValue; + F m_Serializer; + }; + +public: + //! Constructs a new queue of size \p latencyBuckets, with bucket-length + //! set to \p bucketLength and initialises it with \p latencyBuckets + 1 + //! up to the bucket corresponding to the supplied \p latestBucketStart. + //! + //! \param[in] latencyBuckets The number of buckets that are within + //! the latency window. + //! \param[in] bucketLength The bucket length. + //! \param[in] latestBucketStart The start time of the latest bucket. + CBucketQueue(std::size_t latencyBuckets, core_t::TTime bucketLength, core_t::TTime latestBucketStart, T initial = T()) + : m_Queue(latencyBuckets + 1), m_LatestBucketEnd(latestBucketStart + bucketLength - 1), m_BucketLength(bucketLength) { + this->fill(initial); + LOG_TRACE("Queue created :"); + LOG_TRACE("Bucket length = " << m_BucketLength); + LOG_TRACE("LatestBucketEnd = " << m_LatestBucketEnd); + } + + //! Pushed an item to the queue and moves the time forward + //! by bucket length. If the \p time is earlier than the + //! latest bucket end, the push operation is ignored. + //! + //! \param[in] item The item to be pushed in the queue. + //! \param[in] time The time to which the item corresponds. + void push(const T& item, core_t::TTime time) { + if (time <= m_LatestBucketEnd) { + LOG_ERROR("Push was called with early time = " << time << ", latest bucket end time = " << m_LatestBucketEnd); + return; } + m_LatestBucketEnd += m_BucketLength; + this->push(item); + } - //! Pushes an item to the queue. This is only intended to be used - //! internally and from clients that perform restoration of the queue. - void push(const T &item) - { - m_Queue.push_front(item); - LOG_TRACE("Queue after push -> " << core::CContainerPrinter::print(*this)); - } + //! Pushes an item to the queue. This is only intended to be used + //! internally and from clients that perform restoration of the queue. + void push(const T& item) { + m_Queue.push_front(item); + LOG_TRACE("Queue after push -> " << core::CContainerPrinter::print(*this)); + } - //! Returns the item in the queue that corresponds to the bucket - //! indicated by \p time. - T &get(core_t::TTime time) - { - return m_Queue[this->index(time)]; - } + //! Returns the item in the queue that corresponds to the bucket + //! indicated by \p time. + T& get(core_t::TTime time) { return m_Queue[this->index(time)]; } - //! Returns the item in the queue that corresponds to the bucket - //! indicated by \p time. - const T &get(core_t::TTime time) const - { - return m_Queue[this->index(time)]; - } + //! Returns the item in the queue that corresponds to the bucket + //! indicated by \p time. + const T& get(core_t::TTime time) const { return m_Queue[this->index(time)]; } - //! Returns the size of the queue. - std::size_t size() const - { - return m_Queue.size(); - } + //! Returns the size of the queue. + std::size_t size() const { return m_Queue.size(); } - //! Is the queue empty? - bool empty() const - { - return m_Queue.empty(); - } + //! Is the queue empty? + bool empty() const { return m_Queue.empty(); } - //! Removes all items from the queue and fills with initial values - //! Note, the queue should never be empty. - void clear(const T &initial = T()) - { - this->fill(initial); - } + //! Removes all items from the queue and fills with initial values + //! Note, the queue should never be empty. + void clear(const T& initial = T()) { this->fill(initial); } - //! Resets the queue to \p startTime. - //! This will clear the queue and will fill it with default items. - void reset(core_t::TTime startTime, - const T &initial = T()) - { - m_LatestBucketEnd = startTime + m_BucketLength - 1; - this->fill(initial); - } + //! Resets the queue to \p startTime. + //! This will clear the queue and will fill it with default items. + void reset(core_t::TTime startTime, const T& initial = T()) { + m_LatestBucketEnd = startTime + m_BucketLength - 1; + this->fill(initial); + } - //! Returns an iterator pointing to the latest bucket and directed - //! towards the earlier buckets. - iterator begin() - { - return m_Queue.begin(); - } + //! Returns an iterator pointing to the latest bucket and directed + //! towards the earlier buckets. + iterator begin() { return m_Queue.begin(); } - //! Returns an iterator pointing to the end of the queue. - iterator end() - { - return m_Queue.end(); - } + //! Returns an iterator pointing to the end of the queue. + iterator end() { return m_Queue.end(); } - //! Returns an iterator pointing to the latest bucket and directed - //! towards the earlier buckets. - const_iterator begin() const - { - return m_Queue.begin(); - } + //! Returns an iterator pointing to the latest bucket and directed + //! towards the earlier buckets. + const_iterator begin() const { return m_Queue.begin(); } - //! Returns an iterator pointing to the end of the queue. - const_iterator end() const - { - return m_Queue.end(); - } + //! Returns an iterator pointing to the end of the queue. + const_iterator end() const { return m_Queue.end(); } - //! Returns a reverse_iterator pointing to the earliest bucket and directed - //! towards the later buckets. - const_reverse_iterator rbegin() const - { - return m_Queue.rbegin(); - } + //! Returns a reverse_iterator pointing to the earliest bucket and directed + //! towards the later buckets. + const_reverse_iterator rbegin() const { return m_Queue.rbegin(); } - //! Returns an iterator pointing to the end of the "reversed" queue. - const_reverse_iterator rend() const - { - return m_Queue.rend(); - } + //! Returns an iterator pointing to the end of the "reversed" queue. + const_reverse_iterator rend() const { return m_Queue.rend(); } - //! Returns the item in the queue corresponding to the earliest bucket. - T &earliest() - { - return m_Queue.back(); - } + //! Returns the item in the queue corresponding to the earliest bucket. + T& earliest() { return m_Queue.back(); } - //! Returns the item corresponding to the latest bucket. - T &latest() - { - return m_Queue.front(); - } + //! Returns the item corresponding to the latest bucket. + T& latest() { return m_Queue.front(); } - //! Returns the latest bucket end time, as tracked by the queue. - core_t::TTime latestBucketEnd() const - { - return m_LatestBucketEnd; - } + //! Returns the latest bucket end time, as tracked by the queue. + core_t::TTime latestBucketEnd() const { return m_LatestBucketEnd; } - //! Debug the memory used by this component. - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const - { - mem->setName("CBucketQueue"); - core::CMemoryDebug::dynamicSize("m_Queue", m_Queue, mem); - } + //! Debug the memory used by this component. + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { + mem->setName("CBucketQueue"); + core::CMemoryDebug::dynamicSize("m_Queue", m_Queue, mem); + } - //! Get the memory used by this component. - std::size_t memoryUsage() const - { - return core::CMemory::dynamicSize(m_Queue); - } + //! Get the memory used by this component. + std::size_t memoryUsage() const { return core::CMemory::dynamicSize(m_Queue); } - //! Prints the contents of the queue. - std::string print() const - { - return core::CContainerPrinter::print(m_Queue); - } + //! Prints the contents of the queue. + std::string print() const { return core::CContainerPrinter::print(m_Queue); } - //! Return the configured bucketlength of this queue - core_t::TTime bucketLength() const - { - return m_BucketLength; - } + //! Return the configured bucketlength of this queue + core_t::TTime bucketLength() const { return m_BucketLength; } - //! Generic persist interface that assumes the bucket items can - //! be persisted by core::CPersistUtils - void acceptPersistInserter(core::CStatePersistInserter &inserter) const - { - for (std::size_t i = 0; i < m_Queue.size(); i++) - { - inserter.insertValue(INDEX_TAG, i); - core::CPersistUtils::persist(BUCKET_TAG, m_Queue[i], inserter); - } + //! Generic persist interface that assumes the bucket items can + //! be persisted by core::CPersistUtils + void acceptPersistInserter(core::CStatePersistInserter& inserter) const { + for (std::size_t i = 0; i < m_Queue.size(); i++) { + inserter.insertValue(INDEX_TAG, i); + core::CPersistUtils::persist(BUCKET_TAG, m_Queue[i], inserter); } - - //! Generic restore interface that assumes the bucket items can - //! be restored by core::CPersistUtils - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) - { - std::size_t i = 0; - do - { - if (traverser.name() == INDEX_TAG) - { - if (core::CStringUtils::stringToType(traverser.value(), i) == false) - { - LOG_ERROR("Bad index in " << traverser.value()); - return false; - } + } + + //! Generic restore interface that assumes the bucket items can + //! be restored by core::CPersistUtils + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + std::size_t i = 0; + do { + if (traverser.name() == INDEX_TAG) { + if (core::CStringUtils::stringToType(traverser.value(), i) == false) { + LOG_ERROR("Bad index in " << traverser.value()); + return false; } - else if (traverser.name() == BUCKET_TAG) - { - if (i >= m_Queue.size()) - { - LOG_WARN("Bucket queue is smaller on restore than on persist: " << - i << " >= " << m_Queue.size() << - ". Extra buckets will be ignored."); - // Restore into a temporary - T dummy; - if (!(core::CPersistUtils::restore(BUCKET_TAG, dummy, traverser))) - { - LOG_ERROR("Invalid bucket"); - } + } else if (traverser.name() == BUCKET_TAG) { + if (i >= m_Queue.size()) { + LOG_WARN("Bucket queue is smaller on restore than on persist: " << i << " >= " << m_Queue.size() + << ". Extra buckets will be ignored."); + // Restore into a temporary + T dummy; + if (!(core::CPersistUtils::restore(BUCKET_TAG, dummy, traverser))) { + LOG_ERROR("Invalid bucket"); } - else - { - if (!(core::CPersistUtils::restore(BUCKET_TAG, m_Queue[i], traverser))) - { - LOG_ERROR("Invalid bucket"); - return false; - } + } else { + if (!(core::CPersistUtils::restore(BUCKET_TAG, m_Queue[i], traverser))) { + LOG_ERROR("Invalid bucket"); + return false; } } } - while (traverser.next()); - return true; - } - - private: - //! Persist the buckets in the queue using \p bucketPersist. - template - void persist(F bucketPersist, - core::CStatePersistInserter &inserter) const - { - for (std::size_t i = 0; i < m_Queue.size(); i++) - { - inserter.insertValue(INDEX_TAG, i); - inserter.insertLevel(BUCKET_TAG, - boost::bind(bucketPersist, - boost::cref(m_Queue[i]), - _1)); - } + } while (traverser.next()); + return true; + } + +private: + //! Persist the buckets in the queue using \p bucketPersist. + template + void persist(F bucketPersist, core::CStatePersistInserter& inserter) const { + for (std::size_t i = 0; i < m_Queue.size(); i++) { + inserter.insertValue(INDEX_TAG, i); + inserter.insertLevel(BUCKET_TAG, boost::bind(bucketPersist, boost::cref(m_Queue[i]), _1)); } - - //! Restore the buckets in the queue using \p bucketRestore. - template - bool restore(F bucketRestore, - const T &initial, - core::CStateRestoreTraverser &traverser) - { - std::size_t i = 0; - do - { - if (traverser.name() == INDEX_TAG) - { - if (core::CStringUtils::stringToType(traverser.value(), i) == false) - { - LOG_DEBUG("Bad index in " << traverser.value()); - return false; - } + } + + //! Restore the buckets in the queue using \p bucketRestore. + template + bool restore(F bucketRestore, const T& initial, core::CStateRestoreTraverser& traverser) { + std::size_t i = 0; + do { + if (traverser.name() == INDEX_TAG) { + if (core::CStringUtils::stringToType(traverser.value(), i) == false) { + LOG_DEBUG("Bad index in " << traverser.value()); + return false; } - else if (traverser.name() == BUCKET_TAG) - { - if (i >= m_Queue.size()) - { - LOG_WARN("Bucket queue is smaller on restore than on persist: " << - i << " >= " << m_Queue.size() << - ". Extra buckets will be ignored."); - if (traverser.hasSubLevel()) - { - // Restore into a temporary - T dummy = initial; - if (traverser.traverseSubLevel( - boost::bind(bucketRestore, - dummy, - _1)) == false) - { - LOG_ERROR("Invalid bucket"); - } + } else if (traverser.name() == BUCKET_TAG) { + if (i >= m_Queue.size()) { + LOG_WARN("Bucket queue is smaller on restore than on persist: " << i << " >= " << m_Queue.size() + << ". Extra buckets will be ignored."); + if (traverser.hasSubLevel()) { + // Restore into a temporary + T dummy = initial; + if (traverser.traverseSubLevel(boost::bind(bucketRestore, dummy, _1)) == false) { + LOG_ERROR("Invalid bucket"); } } - else - { - m_Queue[i] = initial; - if (traverser.hasSubLevel()) - { - if (traverser.traverseSubLevel( - boost::bind(bucketRestore, - boost::ref(m_Queue[i]), - _1)) == false) - { - LOG_ERROR("Invalid bucket"); - return false; - } + } else { + m_Queue[i] = initial; + if (traverser.hasSubLevel()) { + if (traverser.traverseSubLevel(boost::bind(bucketRestore, boost::ref(m_Queue[i]), _1)) == false) { + LOG_ERROR("Invalid bucket"); + return false; } } } } - while (traverser.next()); - return true; + } while (traverser.next()); + return true; + } + + //! Fill the queue with default constructed bucket values. + void fill(const T& initial) { + for (std::size_t i = 0; i < m_Queue.capacity(); ++i) { + this->push(initial); } + } - //! Fill the queue with default constructed bucket values. - void fill(const T &initial) - { - for (std::size_t i = 0; i < m_Queue.capacity(); ++i) - { - this->push(initial); - } + //! Get the index of the bucket containing \p time. + std::size_t index(core_t::TTime time) const { + if (m_BucketLength == 0) { + LOG_ERROR("Invalid bucketLength for queue!"); + return 0; } - - //! Get the index of the bucket containing \p time. - std::size_t index(core_t::TTime time) const - { - if (m_BucketLength == 0) - { - LOG_ERROR("Invalid bucketLength for queue!"); - return 0; - } - std::size_t index = static_cast( - (m_LatestBucketEnd - time) / m_BucketLength); - std::size_t size = this->size(); - if (index >= size) - { - LOG_ERROR("Time " << time << " is out of range. Returning earliest bucket index."); - return size - 1; - } - return index; + std::size_t index = static_cast((m_LatestBucketEnd - time) / m_BucketLength); + std::size_t size = this->size(); + if (index >= size) { + LOG_ERROR("Time " << time << " is out of range. Returning earliest bucket index."); + return size - 1; } + return index; + } - private: - TQueue m_Queue; - core_t::TTime m_LatestBucketEnd; - core_t::TTime m_BucketLength; +private: + TQueue m_Queue; + core_t::TTime m_LatestBucketEnd; + core_t::TTime m_BucketLength; }; template @@ -413,7 +285,6 @@ const std::string CBucketQueue::BUCKET_TAG("a"); template const std::string CBucketQueue::INDEX_TAG("b"); - } } diff --git a/include/model/CCountingModel.h b/include/model/CCountingModel.h index adac3872a5..49784e41bc 100644 --- a/include/model/CCountingModel.h +++ b/include/model/CCountingModel.h @@ -15,10 +15,8 @@ class CCountingModelTest; -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { //! \brief A very simple model for counting events in the sampled bucket. //! @@ -31,271 +29,253 @@ namespace model //! any time series models. This avoids a potential pathology where //! memory limiting can cause us to stop getting counts and also makes //! interpreting the maths library logging easier. -class MODEL_EXPORT CCountingModel : public CAnomalyDetectorModel -{ - public: - //! \name Life-cycle. - //@{ - //! \param[in] params The global configuration parameters. - //! \param[in] dataGatherer The object that gathers time series data. - CCountingModel(const SModelParams ¶ms, - const TDataGathererPtr &dataGatherer); - - //! Constructor used for restoring persisted models. - //! - //! \note The current bucket statistics are left default initialized - //! and so must be sampled for before this model can be used. - CCountingModel(const SModelParams ¶ms, - const TDataGathererPtr &dataGatherer, - core::CStateRestoreTraverser &traverser); - - //! Create a copy that will result in the same persisted state as the - //! original. This is effectively a copy constructor that creates a - //! copy that's only valid for a single purpose. The boolean flag is - //! redundant except to create a signature that will not be mistaken for - //! a general purpose copy constructor. - CCountingModel(bool isForPersistence, const CCountingModel &other); - //@} - - //! Returns event rate online. - virtual model_t::EModelType category() const; - - //! Returns false. - virtual bool isPopulation() const; - - //! Returns false. - virtual bool isEventRate() const; - - //! Returns false. - virtual bool isMetric() const; - - //! \name Persistence - //@{ - //! Persist state by passing information to the supplied inserter - virtual void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - - //! Add to the contents of the object. - virtual bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); - - //! Create a clone of this model that will result in the same persisted - //! state. The clone may be incomplete in ways that do not affect the - //! persisted representation, and must not be used for any other - //! purpose. - //! \warning The caller owns the object returned. - virtual CAnomalyDetectorModel *cloneForPersistence() const; - //@} - - //! \name Bucket Statistics - //!@{ - //! Get the count of the bucketing interval containing \p time - //! for the person identified by \p pid. - //! - //! \param[in] pid The identifier of the person of interest. - //! \param[in] time The time of interest. - //! \return The count in the bucketing interval at \p time for the - //! person identified by \p pid if available and null otherwise. - virtual TOptionalUInt64 currentBucketCount(std::size_t pid, - core_t::TTime time) const; - - //! Get the mean bucket count or the reference model mean bucket - //! count if one is defined for the person identified by \p pid. - //! - //! \param[in] pid The identifier of the person of interest. - virtual TOptionalDouble baselineBucketCount(std::size_t pid) const; - - //! Get the count of the bucketing interval containing \p time - //! for the person identified by \p pid. - //! - //! \param[in] feature Ignored. - //! \param[in] pid The identifier of the person of interest. - //! \param[in] cid Ignored. - //! \param[in] time The time of interest. - virtual TDouble1Vec currentBucketValue(model_t::EFeature feature, - std::size_t pid, - std::size_t cid, - core_t::TTime time) const; - - //! Get the mean bucket count or the reference model mean bucket - //! count if one is defined for the person identified by \p pid. - //! - //! \param[in] feature Ignored. - //! \param[in] pid The identifier of the person of interest. - //! \param[in] cid Ignored. - //! \param[in] type Ignored. - //! \param[in] correlated Ignored. - //! \param[in] time The time of interest. - virtual TDouble1Vec baselineBucketMean(model_t::EFeature feature, - std::size_t pid, - std::size_t cid, - model_t::CResultType type, - const TSizeDoublePr1Vec &correlated, - core_t::TTime time) const; - //@} - - //! \name Person - //@{ - //! Get the person unique identifiers which have a feature value - //! in the bucketing time interval including \p time. - //! - //! \param[in] time The time of interest. - //! \param[out] result Filled in with the person identifiers - //! in the bucketing time interval of interest. - virtual void currentBucketPersonIds(core_t::TTime time, - TSizeVec &result) const; - //@} - - //! \name Update - //@{ - //! This samples the bucket statistics, in the time interval - //! [\p startTime, \p endTime]. This is needed by the results - //! preview. - //! - //! \param[in] startTime The start of the time interval to sample. - //! \param[in] endTime The end of the time interval to sample. - virtual void sampleBucketStatistics(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor &resourceMonitor); - - //! This samples the bucket statistics, and any state needed - //! by computeProbablity, in the time interval [\p startTime, - //! \p endTime], but does not update the model. This is needed - //! by the results preview. - //! - //! \param[in] startTime The start of the time interval to sample. - //! \param[in] endTime The end of the time interval to sample. - virtual void sampleOutOfPhase(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor &resourceMonitor); - - //! This samples the bucket statistics, in the time interval - //! [\p startTime, \p endTime]. - //! - //! \param[in] startTime The start of the time interval to sample. - //! \param[in] endTime The end of the time interval to sample. - //! \param[in] resourceMonitor The resourceMonitor. - virtual void sample(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor &resourceMonitor); - - //! No-op. - virtual void prune(std::size_t maximumAge); - //@} - - //! \name Probability - //@{ - //! Sets \p probability to 1. - virtual bool computeProbability(std::size_t pid, - core_t::TTime startTime, - core_t::TTime endTime, - CPartitioningFields &partitioningFields, - std::size_t numberAttributeProbabilities, - SAnnotatedProbability &result) const; - - //! Sets \p probability to 1. - virtual bool computeTotalProbability(const std::string &person, - std::size_t numberAttributeProbabilities, - TOptionalDouble &probability, - TAttributeProbability1Vec &attributeProbabilities) const; - //@} - - //! Get the checksum of this model. - //! - //! \param[in] includeCurrentBucketStats If true then include - //! the current bucket statistics. (This is designed to handle - //! serialization, for which we don't serialize the current - //! bucket statistics.) - virtual uint64_t checksum(bool includeCurrentBucketStats = true) const; - - //! Get the memory used by this model - virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - - //! Get the memory used by this model - virtual std::size_t memoryUsage() const; - - //! Get the static size of this object - used for virtual hierarchies - virtual std::size_t staticSize() const; - - //! Returns null. - virtual CModelDetailsViewPtr details() const; - - //! Get the descriptions of any occurring scheduled event descriptions for the bucket time - virtual const TStr1Vec &scheduledEventDescriptions(core_t::TTime time) const; - - public: - using TSizeUInt64Pr = std::pair; - using TSizeUInt64PrVec = std::vector; - using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; - using TMeanAccumulatorVec = std::vector; - - protected: - //! Get the start time of the current bucket. - virtual core_t::TTime currentBucketStartTime() const; - - //! Set the start time of the current bucket. - virtual void currentBucketStartTime(core_t::TTime time); - - //! Get the non-estimated value of the the memory used by this model. - virtual std::size_t computeMemoryUsage() const; - - private: - //! Get the scheduled events that match at sampleTime. - SModelParams::TStrDetectionRulePrVec - checkScheduledEvents(core_t::TTime sampleTime) const; - - //! Check for scheduled events and append the descriptions of - //! matched events to the scheduled event descriptions. - void setMatchedEventsDescriptions(core_t::TTime sampleTime, core_t::TTime bucketStartTime); - - //! Returns one. - virtual double attributeFrequency(std::size_t cid) const; - - //! Monitor the resource usage while creating new models. - void createUpdateNewModels(core_t::TTime, - CResourceMonitor &resourceMonitor); - - //! Create the mean counts for "n" newly observed people. - virtual void createNewModels(std::size_t n, std::size_t m); - - //! Update start time and counts for the given bucket. - void updateCurrentBucketsStats(core_t::TTime time); - - //! Reinitialize the time series models for recycled people. - virtual void updateRecycledModels(); - - //! Initialize the time series models for newly observed people. - virtual void clearPrunedResources(const TSizeVec &people, - const TSizeVec &attributes); +class MODEL_EXPORT CCountingModel : public CAnomalyDetectorModel { +public: + //! \name Life-cycle. + //@{ + //! \param[in] params The global configuration parameters. + //! \param[in] dataGatherer The object that gathers time series data. + CCountingModel(const SModelParams& params, const TDataGathererPtr& dataGatherer); + + //! Constructor used for restoring persisted models. + //! + //! \note The current bucket statistics are left default initialized + //! and so must be sampled for before this model can be used. + CCountingModel(const SModelParams& params, const TDataGathererPtr& dataGatherer, core::CStateRestoreTraverser& traverser); + + //! Create a copy that will result in the same persisted state as the + //! original. This is effectively a copy constructor that creates a + //! copy that's only valid for a single purpose. The boolean flag is + //! redundant except to create a signature that will not be mistaken for + //! a general purpose copy constructor. + CCountingModel(bool isForPersistence, const CCountingModel& other); + //@} + + //! Returns event rate online. + virtual model_t::EModelType category() const; + + //! Returns false. + virtual bool isPopulation() const; + + //! Returns false. + virtual bool isEventRate() const; + + //! Returns false. + virtual bool isMetric() const; + + //! \name Persistence + //@{ + //! Persist state by passing information to the supplied inserter + virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Add to the contents of the object. + virtual bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); + + //! Create a clone of this model that will result in the same persisted + //! state. The clone may be incomplete in ways that do not affect the + //! persisted representation, and must not be used for any other + //! purpose. + //! \warning The caller owns the object returned. + virtual CAnomalyDetectorModel* cloneForPersistence() const; + //@} + + //! \name Bucket Statistics + //!@{ + //! Get the count of the bucketing interval containing \p time + //! for the person identified by \p pid. + //! + //! \param[in] pid The identifier of the person of interest. + //! \param[in] time The time of interest. + //! \return The count in the bucketing interval at \p time for the + //! person identified by \p pid if available and null otherwise. + virtual TOptionalUInt64 currentBucketCount(std::size_t pid, core_t::TTime time) const; + + //! Get the mean bucket count or the reference model mean bucket + //! count if one is defined for the person identified by \p pid. + //! + //! \param[in] pid The identifier of the person of interest. + virtual TOptionalDouble baselineBucketCount(std::size_t pid) const; + + //! Get the count of the bucketing interval containing \p time + //! for the person identified by \p pid. + //! + //! \param[in] feature Ignored. + //! \param[in] pid The identifier of the person of interest. + //! \param[in] cid Ignored. + //! \param[in] time The time of interest. + virtual TDouble1Vec currentBucketValue(model_t::EFeature feature, std::size_t pid, std::size_t cid, core_t::TTime time) const; + + //! Get the mean bucket count or the reference model mean bucket + //! count if one is defined for the person identified by \p pid. + //! + //! \param[in] feature Ignored. + //! \param[in] pid The identifier of the person of interest. + //! \param[in] cid Ignored. + //! \param[in] type Ignored. + //! \param[in] correlated Ignored. + //! \param[in] time The time of interest. + virtual TDouble1Vec baselineBucketMean(model_t::EFeature feature, + std::size_t pid, + std::size_t cid, + model_t::CResultType type, + const TSizeDoublePr1Vec& correlated, + core_t::TTime time) const; + //@} + + //! \name Person + //@{ + //! Get the person unique identifiers which have a feature value + //! in the bucketing time interval including \p time. + //! + //! \param[in] time The time of interest. + //! \param[out] result Filled in with the person identifiers + //! in the bucketing time interval of interest. + virtual void currentBucketPersonIds(core_t::TTime time, TSizeVec& result) const; + //@} + + //! \name Update + //@{ + //! This samples the bucket statistics, in the time interval + //! [\p startTime, \p endTime]. This is needed by the results + //! preview. + //! + //! \param[in] startTime The start of the time interval to sample. + //! \param[in] endTime The end of the time interval to sample. + virtual void sampleBucketStatistics(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor); + + //! This samples the bucket statistics, and any state needed + //! by computeProbablity, in the time interval [\p startTime, + //! \p endTime], but does not update the model. This is needed + //! by the results preview. + //! + //! \param[in] startTime The start of the time interval to sample. + //! \param[in] endTime The end of the time interval to sample. + virtual void sampleOutOfPhase(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor); + + //! This samples the bucket statistics, in the time interval + //! [\p startTime, \p endTime]. + //! + //! \param[in] startTime The start of the time interval to sample. + //! \param[in] endTime The end of the time interval to sample. + //! \param[in] resourceMonitor The resourceMonitor. + virtual void sample(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor); + + //! No-op. + virtual void prune(std::size_t maximumAge); + //@} + + //! \name Probability + //@{ + //! Sets \p probability to 1. + virtual bool computeProbability(std::size_t pid, + core_t::TTime startTime, + core_t::TTime endTime, + CPartitioningFields& partitioningFields, + std::size_t numberAttributeProbabilities, + SAnnotatedProbability& result) const; + + //! Sets \p probability to 1. + virtual bool computeTotalProbability(const std::string& person, + std::size_t numberAttributeProbabilities, + TOptionalDouble& probability, + TAttributeProbability1Vec& attributeProbabilities) const; + //@} + + //! Get the checksum of this model. + //! + //! \param[in] includeCurrentBucketStats If true then include + //! the current bucket statistics. (This is designed to handle + //! serialization, for which we don't serialize the current + //! bucket statistics.) + virtual uint64_t checksum(bool includeCurrentBucketStats = true) const; + + //! Get the memory used by this model + virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + + //! Get the memory used by this model + virtual std::size_t memoryUsage() const; + + //! Get the static size of this object - used for virtual hierarchies + virtual std::size_t staticSize() const; + + //! Returns null. + virtual CModelDetailsViewPtr details() const; + + //! Get the descriptions of any occurring scheduled event descriptions for the bucket time + virtual const TStr1Vec& scheduledEventDescriptions(core_t::TTime time) const; + +public: + using TSizeUInt64Pr = std::pair; + using TSizeUInt64PrVec = std::vector; + using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; + using TMeanAccumulatorVec = std::vector; + +protected: + //! Get the start time of the current bucket. + virtual core_t::TTime currentBucketStartTime() const; + + //! Set the start time of the current bucket. + virtual void currentBucketStartTime(core_t::TTime time); + + //! Get the non-estimated value of the the memory used by this model. + virtual std::size_t computeMemoryUsage() const; + +private: + //! Get the scheduled events that match at sampleTime. + SModelParams::TStrDetectionRulePrVec checkScheduledEvents(core_t::TTime sampleTime) const; + + //! Check for scheduled events and append the descriptions of + //! matched events to the scheduled event descriptions. + void setMatchedEventsDescriptions(core_t::TTime sampleTime, core_t::TTime bucketStartTime); + + //! Returns one. + virtual double attributeFrequency(std::size_t cid) const; + + //! Monitor the resource usage while creating new models. + void createUpdateNewModels(core_t::TTime, CResourceMonitor& resourceMonitor); + + //! Create the mean counts for "n" newly observed people. + virtual void createNewModels(std::size_t n, std::size_t m); + + //! Update start time and counts for the given bucket. + void updateCurrentBucketsStats(core_t::TTime time); + + //! Reinitialize the time series models for recycled people. + virtual void updateRecycledModels(); - //! Check if bucket statistics are available for the specified time. - bool bucketStatsAvailable(core_t::TTime time) const; - - //! Print the current bucketing interval. - std::string printCurrentBucket() const; + //! Initialize the time series models for newly observed people. + virtual void clearPrunedResources(const TSizeVec& people, const TSizeVec& attributes); - //! Set the current bucket total count. - virtual void currentBucketTotalCount(uint64_t totalCount); + //! Check if bucket statistics are available for the specified time. + bool bucketStatsAvailable(core_t::TTime time) const; - //! Perform derived class specific operations to accomplish skipping sampling - virtual void doSkipSampling(core_t::TTime startTime, core_t::TTime endTime); + //! Print the current bucketing interval. + std::string printCurrentBucket() const; - //! Get the model memory usage estimator - virtual CMemoryUsageEstimator *memoryUsageEstimator() const; + //! Set the current bucket total count. + virtual void currentBucketTotalCount(uint64_t totalCount); - private: - using TTimeStr1VecUMap = boost::unordered_map; + //! Perform derived class specific operations to accomplish skipping sampling + virtual void doSkipSampling(core_t::TTime startTime, core_t::TTime endTime); - private: - //! The start time of the last sampled bucket. - core_t::TTime m_StartTime; + //! Get the model memory usage estimator + virtual CMemoryUsageEstimator* memoryUsageEstimator() const; - //! The current bucket counts. - TSizeUInt64PrVec m_Counts; +private: + using TTimeStr1VecUMap = boost::unordered_map; - //! The baseline bucket counts. - TMeanAccumulatorVec m_MeanCounts; +private: + //! The start time of the last sampled bucket. + core_t::TTime m_StartTime; - //! Map of matched scheduled event descriptions by bucket time - TTimeStr1VecUMap m_ScheduledEventDescriptions; + //! The current bucket counts. + TSizeUInt64PrVec m_Counts; + + //! The baseline bucket counts. + TMeanAccumulatorVec m_MeanCounts; + + //! Map of matched scheduled event descriptions by bucket time + TTimeStr1VecUMap m_ScheduledEventDescriptions; friend class ::CCountingModelTest; }; diff --git a/include/model/CCountingModelFactory.h b/include/model/CCountingModelFactory.h index 3ed6d6ad83..b28f72fd11 100644 --- a/include/model/CCountingModelFactory.h +++ b/include/model/CCountingModelFactory.h @@ -10,14 +10,11 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStateRestoreTraverser; } -namespace model -{ +namespace model { //! \brief A factory class implementation for CCountingModel. //! @@ -25,142 +22,135 @@ namespace model //! This concrete factory implements the methods to make new models //! and data gatherers, and create default priors suitable for the //! CCountingModel class. -class MODEL_EXPORT CCountingModelFactory : public CModelFactory -{ - public: - //! Lift all overloads of the defaultPrior function into scope. - using CModelFactory::defaultPrior; - - public: - //! \note The default arguments supplied to the constructor are - //! intended for unit testing and are not necessarily good defaults. - //! The CModelConfig class is responsible for providing sensible - //! default values for the factory for use within our products. - explicit CCountingModelFactory(const SModelParams ¶ms, - model_t::ESummaryMode summaryMode = model_t::E_None, - const std::string &summaryCountFieldName = ""); - - //! Create a copy of the factory owned by the calling code. - virtual CCountingModelFactory *clone() const; - - //! \name Factory Methods - //@{ - //! Make a new counting model. - //! - //! \param[in] initData The parameters needed to initialize the model. - //! \warning It is owned by the calling code. - virtual CAnomalyDetectorModel *makeModel(const SModelInitializationData &initData) const; - - //! Make a new counting model from part of a state document. - //! - //! \param[in] initData Additional parameters needed to initialize - //! the model. - //! \param[in,out] traverser A state document traverser. - //! \warning It is owned by the calling code. - virtual CAnomalyDetectorModel *makeModel(const SModelInitializationData &initData, - core::CStateRestoreTraverser &traverser) const; - - //! Make a new event rate data gatherer. - //! - //! \param[in] initData The parameters needed to initialize the data - //! gatherer. - //! \warning It is owned by the calling code. - virtual CDataGatherer *makeDataGatherer(const SGathererInitializationData &initData) const; - - //! Make a new event rate data gatherer from part of a state document. - //! - //! \param[in] partitionFieldValue The partition field value. - //! \param[in,out] traverser A state document traverser. - //! \warning It is owned by the calling code. - virtual CDataGatherer *makeDataGatherer(const std::string &partitionFieldValue, - core::CStateRestoreTraverser &traverser) const; - //@} - - //! \name Defaults - //@{ - //! Get the default prior for \p feature which is a stub. - virtual TPriorPtr defaultPrior(model_t::EFeature feature, - const SModelParams ¶ms) const; - - //! Get the default prior for \p feature which is a stub. - virtual TMultivariatePriorPtr defaultMultivariatePrior(model_t::EFeature feature, - const SModelParams ¶ms) const; - - //! Get the default prior for pairs of correlated time series - //! of \p feature which is a stub. - virtual TMultivariatePriorPtr defaultCorrelatePrior(model_t::EFeature feature, - const SModelParams ¶ms) const; - //@} - - //! Get the search key corresponding to this factory. - virtual const CSearchKey &searchKey() const; - - //! Check if this makes the model used for a simple counting search. - virtual bool isSimpleCount() const; - - //! Check the pre-summarisation mode for this factory. - virtual model_t::ESummaryMode summaryMode() const; - - //! Get the default data type for models from this factory. - virtual maths_t::EDataType dataType() const; - - //! \name Customization by a specific search - //@{ - //! Set the identifier of the search for which this generates models. - virtual void identifier(int identifier); - - //! Set the name of the field whose values will be counted. - virtual void fieldNames(const std::string &partitionFieldName, - const std::string &overFieldName, - const std::string &byFieldName, - const std::string &valueFieldName, - const TStrVec &influenceFieldNames); - - //! Set whether the models should process missing person fields. - virtual void useNull(bool useNull); - - //! Set the features which will be modeled. - virtual void features(const TFeatureVec &features); - - //! Set the bucket results delay - virtual void bucketResultsDelay(std::size_t bucketResultsDelay); - //@} - - private: - //! Get the field values which partition the data for modeling. - virtual TStrCRefVec partitioningFields() const; - - private: - //! The identifier of the search for which this generates models. - int m_Identifier; - - //! Indicates whether the data being gathered are already summarized - //! by an external aggregation process. - model_t::ESummaryMode m_SummaryMode; - - //! If m_SummaryMode is E_Manual then this is the name of the field - //! holding the summary count. - std::string m_SummaryCountFieldName; - - //! The name of the field which splits the data. - std::string m_PartitionFieldName; - - //! The name of the field whose values will be counted. - std::string m_PersonFieldName; - - //! If true the models will process missing person fields. - bool m_UseNull; - - //! The count features which will be modeled. - TFeatureVec m_Features; - - //! The bucket results delay. - std::size_t m_BucketResultsDelay; - - //! A cached search key. - mutable TOptionalSearchKey m_SearchKeyCache; +class MODEL_EXPORT CCountingModelFactory : public CModelFactory { +public: + //! Lift all overloads of the defaultPrior function into scope. + using CModelFactory::defaultPrior; + +public: + //! \note The default arguments supplied to the constructor are + //! intended for unit testing and are not necessarily good defaults. + //! The CModelConfig class is responsible for providing sensible + //! default values for the factory for use within our products. + explicit CCountingModelFactory(const SModelParams& params, + model_t::ESummaryMode summaryMode = model_t::E_None, + const std::string& summaryCountFieldName = ""); + + //! Create a copy of the factory owned by the calling code. + virtual CCountingModelFactory* clone() const; + + //! \name Factory Methods + //@{ + //! Make a new counting model. + //! + //! \param[in] initData The parameters needed to initialize the model. + //! \warning It is owned by the calling code. + virtual CAnomalyDetectorModel* makeModel(const SModelInitializationData& initData) const; + + //! Make a new counting model from part of a state document. + //! + //! \param[in] initData Additional parameters needed to initialize + //! the model. + //! \param[in,out] traverser A state document traverser. + //! \warning It is owned by the calling code. + virtual CAnomalyDetectorModel* makeModel(const SModelInitializationData& initData, core::CStateRestoreTraverser& traverser) const; + + //! Make a new event rate data gatherer. + //! + //! \param[in] initData The parameters needed to initialize the data + //! gatherer. + //! \warning It is owned by the calling code. + virtual CDataGatherer* makeDataGatherer(const SGathererInitializationData& initData) const; + + //! Make a new event rate data gatherer from part of a state document. + //! + //! \param[in] partitionFieldValue The partition field value. + //! \param[in,out] traverser A state document traverser. + //! \warning It is owned by the calling code. + virtual CDataGatherer* makeDataGatherer(const std::string& partitionFieldValue, core::CStateRestoreTraverser& traverser) const; + //@} + + //! \name Defaults + //@{ + //! Get the default prior for \p feature which is a stub. + virtual TPriorPtr defaultPrior(model_t::EFeature feature, const SModelParams& params) const; + + //! Get the default prior for \p feature which is a stub. + virtual TMultivariatePriorPtr defaultMultivariatePrior(model_t::EFeature feature, const SModelParams& params) const; + + //! Get the default prior for pairs of correlated time series + //! of \p feature which is a stub. + virtual TMultivariatePriorPtr defaultCorrelatePrior(model_t::EFeature feature, const SModelParams& params) const; + //@} + + //! Get the search key corresponding to this factory. + virtual const CSearchKey& searchKey() const; + + //! Check if this makes the model used for a simple counting search. + virtual bool isSimpleCount() const; + + //! Check the pre-summarisation mode for this factory. + virtual model_t::ESummaryMode summaryMode() const; + + //! Get the default data type for models from this factory. + virtual maths_t::EDataType dataType() const; + + //! \name Customization by a specific search + //@{ + //! Set the identifier of the search for which this generates models. + virtual void identifier(int identifier); + + //! Set the name of the field whose values will be counted. + virtual void fieldNames(const std::string& partitionFieldName, + const std::string& overFieldName, + const std::string& byFieldName, + const std::string& valueFieldName, + const TStrVec& influenceFieldNames); + + //! Set whether the models should process missing person fields. + virtual void useNull(bool useNull); + + //! Set the features which will be modeled. + virtual void features(const TFeatureVec& features); + + //! Set the bucket results delay + virtual void bucketResultsDelay(std::size_t bucketResultsDelay); + //@} + +private: + //! Get the field values which partition the data for modeling. + virtual TStrCRefVec partitioningFields() const; + +private: + //! The identifier of the search for which this generates models. + int m_Identifier; + + //! Indicates whether the data being gathered are already summarized + //! by an external aggregation process. + model_t::ESummaryMode m_SummaryMode; + + //! If m_SummaryMode is E_Manual then this is the name of the field + //! holding the summary count. + std::string m_SummaryCountFieldName; + + //! The name of the field which splits the data. + std::string m_PartitionFieldName; + + //! The name of the field whose values will be counted. + std::string m_PersonFieldName; + + //! If true the models will process missing person fields. + bool m_UseNull; + + //! The count features which will be modeled. + TFeatureVec m_Features; + + //! The bucket results delay. + std::size_t m_BucketResultsDelay; + + //! A cached search key. + mutable TOptionalSearchKey m_SearchKeyCache; }; - } } diff --git a/include/model/CDataClassifier.h b/include/model/CDataClassifier.h index 8590ec0009..6ea14b98ec 100644 --- a/include/model/CDataClassifier.h +++ b/include/model/CDataClassifier.h @@ -15,65 +15,56 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace model -{ +namespace model { //! \brief Classifies a collection of values. //! //! DESCRIPTION:\n //! Currently, this checks whether the values are all integers. -class MODEL_EXPORT CDataClassifier -{ - public: - using TDouble1Vec = core::CSmallVector; - - public: - //! Update the classification with \p value. - void add(model_t::EFeature feature, - double value, - unsigned int count); - - //! Update the classification with \p value. - void add(model_t::EFeature feature, - const TDouble1Vec &value, - unsigned int count); - - //! Check if the values are all integers. - bool isInteger() const; - - //! Check if the values are all positive. - bool isNonNegative() const; - - // Consider adding function to check if the values live - // on a lattice: i.e. x = {a + b*i} for integer i. This - // would need to convert x(i) to integers and find the - // g.c.d. of x(2) - x(1), x(3) - x(2) and so on. - - //! \name Persistence - //@{ - //! Persist state by passing information to the supplied inserter - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - - //! Create from part of an XML document. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); - //@} - - private: - //! Set to false if the series contains non-integer values. - bool m_IsInteger = true; - - //! Set to false if the series contains negative values. - bool m_IsNonNegative = true; -}; +class MODEL_EXPORT CDataClassifier { +public: + using TDouble1Vec = core::CSmallVector; + +public: + //! Update the classification with \p value. + void add(model_t::EFeature feature, double value, unsigned int count); + + //! Update the classification with \p value. + void add(model_t::EFeature feature, const TDouble1Vec& value, unsigned int count); + + //! Check if the values are all integers. + bool isInteger() const; + + //! Check if the values are all positive. + bool isNonNegative() const; + // Consider adding function to check if the values live + // on a lattice: i.e. x = {a + b*i} for integer i. This + // would need to convert x(i) to integers and find the + // g.c.d. of x(2) - x(1), x(3) - x(2) and so on. + + //! \name Persistence + //@{ + //! Persist state by passing information to the supplied inserter + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Create from part of an XML document. + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); + //@} + +private: + //! Set to false if the series contains non-integer values. + bool m_IsInteger = true; + + //! Set to false if the series contains negative values. + bool m_IsNonNegative = true; +}; } } diff --git a/include/model/CDataGatherer.h b/include/model/CDataGatherer.h index ae2d07418a..3e515867ff 100644 --- a/include/model/CDataGatherer.h +++ b/include/model/CDataGatherer.h @@ -9,8 +9,8 @@ #include #include -#include #include +#include #include #include @@ -32,16 +32,12 @@ #include - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace model -{ +namespace model { class CEventData; class CMetricBucketGatherer; class CResourceMonitor; @@ -101,715 +97,679 @@ class CSearchKey; //! //! Time-based data gathering is handled by further classes derived from //! CBucketGatherer, for Metrics and EventRates accordingly. -class MODEL_EXPORT CDataGatherer -{ - public: - using TDoubleVec = std::vector; - using TDouble1Vec = core::CSmallVector; - using TSizeVec = std::vector; - using TStrVec = std::vector; - using TStrVecCItr = TStrVec::const_iterator; - using TStrCPtrVec = std::vector; - using TSizeUInt64Pr = std::pair; - using TSizeUInt64PrVec = std::vector; - using TFeatureVec = model_t::TFeatureVec; - using TFeatureVecCItr = TFeatureVec::const_iterator; - using TSizeSizePr = std::pair; - using TSizeSizePrUInt64Pr = std::pair; - using TSizeSizePrUInt64PrVec = std::vector; - using TSizeSizePrUInt64UMap = boost::unordered_map; - using TSizeSizePrUInt64UMapItr = TSizeSizePrUInt64UMap::iterator; - using TSizeSizePrUInt64UMapCItr = TSizeSizePrUInt64UMap::const_iterator; - using TSizeSizePrUInt64UMapQueue = CBucketQueue; - using TSizeSizePrUInt64UMapQueueItr = TSizeSizePrUInt64UMapQueue::iterator; - using TSizeSizePrUInt64UMapQueueCItr = TSizeSizePrUInt64UMapQueue::const_iterator; - using TSizeSizePrUInt64UMapQueueCRItr = TSizeSizePrUInt64UMapQueue::const_reverse_iterator; - using TSizeSizePrStoredStringPtrPrUInt64UMap = CBucketGatherer::TSizeSizePrStoredStringPtrPrUInt64UMap; - using TSizeSizePrStoredStringPtrPrUInt64UMapCItr = TSizeSizePrStoredStringPtrPrUInt64UMap::const_iterator; - using TSizeSizePrStoredStringPtrPrUInt64UMapItr = TSizeSizePrStoredStringPtrPrUInt64UMap::iterator; - using TSizeSizePrStoredStringPtrPrUInt64UMapVec = std::vector; - using TSizeSizePrStoredStringPtrPrUInt64UMapVecQueue = CBucketQueue; - using TSearchKeyCRef = boost::reference_wrapper; - using TBucketGathererPVec = std::vector; - using TBucketGathererPVecItr = TBucketGathererPVec::iterator; - using TBucketGathererPVecCItr = TBucketGathererPVec::const_iterator; - using TFeatureAnyPr = std::pair; - using TFeatureAnyPrVec = std::vector; - using TMetricCategoryVec = std::vector; - using TSampleCountsPtr = boost::shared_ptr; - using TTimeVec = std::vector; - using TTimeVecCItr = TTimeVec::const_iterator; - - public: - //! The summary count indicating an explicit null record. - static const std::size_t EXPLICIT_NULL_SUMMARY_COUNT; - - //! The expected memory usage per by field - static const std::size_t ESTIMATED_MEM_USAGE_PER_BY_FIELD; - - //! The expected memory usage per over field - static const std::size_t ESTIMATED_MEM_USAGE_PER_OVER_FIELD; - - public: - //! \name Life-cycle - //@{ - //! Create a new data series gatherer. - //! - //! \param[in] gathererType Indicates what sort of bucket data to gather: - //! EventRate/Metric, Population/Individual - //! \param[in] summaryMode Indicates whether the data being gathered - //! are already summarized by an external aggregation process. - //! \param[in] modelParams The global configuration parameters. - //! \param[in] summaryCountFieldName If \p summaryMode is E_Manual - //! then this is the name of the field holding the summary count. - //! \param[in] partitionFieldName The name of the field which splits - //! the data. - //! \param[in] partitionFieldValue The value of the field which splits - //! the data. - //! \param[in] personFieldName The name of the field which identifies - //! people. - //! \param[in] attributeFieldName The name of the field which defines - //! the person attributes. - //! \param[in] valueFieldName The name of the field which contains - //! the metric values. - //! \param[in] influenceFieldNames The field names for which we will - //! compute influences. - //! \param[in] useNull If true the gatherer will process missing - //! person and attribute field values (assuming they are empty). - //! \param[in] key The key of the search for which to gatherer data. - //! \param[in] features The features of the data to model. - //! \param[in] startTime The start of the time interval for which - //! to gather data. - //! \param[in] sampleCountOverride for the number of measurements - //! in a statistic. (Note that this is intended for testing only.) - //! A zero value means that the data gatherer class will determine - //! an appropriate value for the bucket length and data rate. - CDataGatherer(model_t::EAnalysisCategory gathererType, - model_t::ESummaryMode summaryMode, - const SModelParams &modelParams, - const std::string &summaryCountFieldName, - const std::string &partitionFieldName, - const std::string &partitionFieldValue, - const std::string &personFieldName, - const std::string &attributeFieldName, - const std::string &valueFieldName, - const TStrVec &influenceFieldNames, - bool useNull, - const CSearchKey &key, - const TFeatureVec &features, - core_t::TTime startTime, - int sampleCountOverride); - - //! Construct from a state document. - CDataGatherer(model_t::EAnalysisCategory gathererType, - model_t::ESummaryMode summaryMode, - const SModelParams &modelParams, - const std::string &summaryCountFieldName, - const std::string &partitionFieldName, - const std::string &partitionFieldValue, - const std::string &personFieldName, - const std::string &attributeFieldName, - const std::string &valueFieldName, - const TStrVec &influenceFieldNames, - bool useNull, - const CSearchKey &key, - core::CStateRestoreTraverser &traverser); - - //! Create a copy that will result in the same persisted state as the - //! original. This is effectively a copy constructor that creates a - //! copy that's only valid for a single purpose. The boolean flag is - //! redundant except to create a signature that will not be mistaken for - //! a general purpose copy constructor. - CDataGatherer(bool isForPersistence, const CDataGatherer &other); - - ~CDataGatherer(); - //@} - - //! \name Persistence - //@{ - //! Persist state by passing information to the supplied inserter. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - - //! Create a clone of this data gatherer that will result in the same - //! persisted state. The clone may be incomplete in ways that do not - //! affect the persisted representation, and must not be used for any - //! other purpose. - //! - //! \warning The caller owns the object returned. - CDataGatherer *cloneForPersistence() const; - //@} - - //! Check if the data being gathered are already summarized by an - //! external aggregation process. - model_t::ESummaryMode summaryMode() const; - - //! Get the function. - model::function_t::EFunction function() const; - - //! Get a description of the component searches. - std::string description() const; - - //! Is this a population data gatherer? - bool isPopulation() const; - - //! Get the maximum size of all the member containers. - std::size_t maxDimension() const; - - //! \name Fields - //@{ - //! Get the partition field name. - //! - //! The name of the partitioning field. - const std::string &partitionFieldName() const; - - //! Get the partition field value. - //! - //! The value of the partitioning field. - const std::string &partitionFieldValue() const; - - //! This is the common field in all searches "along" which the - //! probabilities are aggregated, i.e. the "by" field name for - //! individual models and the "over" field name for population - //! models. - const std::string &personFieldName() const; - - //! Get the attribute field name if one exists. - const std::string &attributeFieldName() const; - - //! Get the name of the field containing the metric value. - const std::string &valueFieldName() const; - - //! Get an iterator at the beginning the influencing field names. - TStrVecCItr beginInfluencers() const; - - //! Get an iterator at the end of the influencing field names. - TStrVecCItr endInfluencers() const; - - //! Return the search key for which this is gathering data. - const CSearchKey &searchKey() const; - - //! Get the fields for which to gather data. - //! - //! This defines the fields to extract from a record. These include - //! the fields which define the categories whose counts are being - //! analyzed, the fields containing metric series names and values - //! and the fields defining a population. - const TStrVec &fieldsOfInterest() const; - - //! Get the number of by field values. For a population model this will - //! be equal to numberActiveAttributes(); for an individual model - //! numberActivePeople(). - std::size_t numberByFieldValues() const; - - //! Get the number of over field values. For a population model this - //! will be equal to numberActivePeople(); for an individual model 0. - std::size_t numberOverFieldValues() const; - - //! Have we been configured to use NULL values? - bool useNull() const; - //@} - - //! \name Update - //@{ - //! Process the specified fields. - //! - //! This adds people and attributes as necessary and fills out the - //! event data from \p fieldValues. - bool processFields(const TStrCPtrVec &fieldValues, - CEventData &result, - CResourceMonitor &resourceMonitor); - - //! Record the arrival of \p data at \p time. - bool addArrival(const TStrCPtrVec &fieldValues, - CEventData &data, - CResourceMonitor &resourceMonitor); - - //! Roll time to the end of the bucket that is latency after the sampled bucket. - void sampleNow(core_t::TTime sampleBucketStart); - - //! Roll time to the end of the bucket that is latency after the sampled bucket - //! without performing any updates that impact the model. - void skipSampleNow(core_t::TTime sampleBucketStart); - //@} - - //! \name Features - //@{ - //! Get the number of features on which this is gathering data. - std::size_t numberFeatures() const; - - //! Check if this is gathering data on \p feature. - bool hasFeature(model_t::EFeature feature) const; - - //! Get the feature corresponding to \p i. - //! - //! \warning \p i must be in range for the features this gatherer - //! is collecting, i.e. it must be less than numberFeatures. - model_t::EFeature feature(std::size_t i) const; - - //! Get the collection of features for which data is being gathered. - const TFeatureVec &features() const; - - //! Get the data for all features for the bucketing time interval - //! containing \p time. - //! - //! \param[in] time The time of interest. - //! \param[out] result Filled in with the feature data at \p time. - //! \tparam T The type of the feature data. - template - bool featureData(core_t::TTime time, core_t::TTime bucketLength, - std::vector > &result) const - { - TFeatureAnyPrVec rawFeatureData; - this->chooseBucketGatherer(time).featureData(time, bucketLength, rawFeatureData); - - bool succeeded = true; - - result.clear(); - result.reserve(rawFeatureData.size()); - for (std::size_t i = 0u; i < rawFeatureData.size(); ++i) - { - TFeatureAnyPr &feature = rawFeatureData[i]; - - // Check the typeid before attempting the cast so we - // don't use throw to handle failure, which is slow. - if (feature.second.type() != typeid(T)) - { - LOG_ERROR("Bad type for feature = " << model_t::print(feature.first) - << ", expected " << typeid(T).name() - << " got " << feature.second.type().name()); - succeeded = false; - continue; - } - - // We emulate move semantics here to avoid the expensive - // copy if T is large (as we expect it might be sometimes). - // We have to adopt the using std::swap idiom (contravening - // coding guidelines) because T can be a built in type. - // Unfortunately, this implementation requires T to be - // default constructible. - using std::swap; - result.push_back(std::pair(feature.first, T())); - T &tmp = boost::any_cast(feature.second); - swap(result.back().second, tmp); +class MODEL_EXPORT CDataGatherer { +public: + using TDoubleVec = std::vector; + using TDouble1Vec = core::CSmallVector; + using TSizeVec = std::vector; + using TStrVec = std::vector; + using TStrVecCItr = TStrVec::const_iterator; + using TStrCPtrVec = std::vector; + using TSizeUInt64Pr = std::pair; + using TSizeUInt64PrVec = std::vector; + using TFeatureVec = model_t::TFeatureVec; + using TFeatureVecCItr = TFeatureVec::const_iterator; + using TSizeSizePr = std::pair; + using TSizeSizePrUInt64Pr = std::pair; + using TSizeSizePrUInt64PrVec = std::vector; + using TSizeSizePrUInt64UMap = boost::unordered_map; + using TSizeSizePrUInt64UMapItr = TSizeSizePrUInt64UMap::iterator; + using TSizeSizePrUInt64UMapCItr = TSizeSizePrUInt64UMap::const_iterator; + using TSizeSizePrUInt64UMapQueue = CBucketQueue; + using TSizeSizePrUInt64UMapQueueItr = TSizeSizePrUInt64UMapQueue::iterator; + using TSizeSizePrUInt64UMapQueueCItr = TSizeSizePrUInt64UMapQueue::const_iterator; + using TSizeSizePrUInt64UMapQueueCRItr = TSizeSizePrUInt64UMapQueue::const_reverse_iterator; + using TSizeSizePrStoredStringPtrPrUInt64UMap = CBucketGatherer::TSizeSizePrStoredStringPtrPrUInt64UMap; + using TSizeSizePrStoredStringPtrPrUInt64UMapCItr = TSizeSizePrStoredStringPtrPrUInt64UMap::const_iterator; + using TSizeSizePrStoredStringPtrPrUInt64UMapItr = TSizeSizePrStoredStringPtrPrUInt64UMap::iterator; + using TSizeSizePrStoredStringPtrPrUInt64UMapVec = std::vector; + using TSizeSizePrStoredStringPtrPrUInt64UMapVecQueue = CBucketQueue; + using TSearchKeyCRef = boost::reference_wrapper; + using TBucketGathererPVec = std::vector; + using TBucketGathererPVecItr = TBucketGathererPVec::iterator; + using TBucketGathererPVecCItr = TBucketGathererPVec::const_iterator; + using TFeatureAnyPr = std::pair; + using TFeatureAnyPrVec = std::vector; + using TMetricCategoryVec = std::vector; + using TSampleCountsPtr = boost::shared_ptr; + using TTimeVec = std::vector; + using TTimeVecCItr = TTimeVec::const_iterator; + +public: + //! The summary count indicating an explicit null record. + static const std::size_t EXPLICIT_NULL_SUMMARY_COUNT; + + //! The expected memory usage per by field + static const std::size_t ESTIMATED_MEM_USAGE_PER_BY_FIELD; + + //! The expected memory usage per over field + static const std::size_t ESTIMATED_MEM_USAGE_PER_OVER_FIELD; + +public: + //! \name Life-cycle + //@{ + //! Create a new data series gatherer. + //! + //! \param[in] gathererType Indicates what sort of bucket data to gather: + //! EventRate/Metric, Population/Individual + //! \param[in] summaryMode Indicates whether the data being gathered + //! are already summarized by an external aggregation process. + //! \param[in] modelParams The global configuration parameters. + //! \param[in] summaryCountFieldName If \p summaryMode is E_Manual + //! then this is the name of the field holding the summary count. + //! \param[in] partitionFieldName The name of the field which splits + //! the data. + //! \param[in] partitionFieldValue The value of the field which splits + //! the data. + //! \param[in] personFieldName The name of the field which identifies + //! people. + //! \param[in] attributeFieldName The name of the field which defines + //! the person attributes. + //! \param[in] valueFieldName The name of the field which contains + //! the metric values. + //! \param[in] influenceFieldNames The field names for which we will + //! compute influences. + //! \param[in] useNull If true the gatherer will process missing + //! person and attribute field values (assuming they are empty). + //! \param[in] key The key of the search for which to gatherer data. + //! \param[in] features The features of the data to model. + //! \param[in] startTime The start of the time interval for which + //! to gather data. + //! \param[in] sampleCountOverride for the number of measurements + //! in a statistic. (Note that this is intended for testing only.) + //! A zero value means that the data gatherer class will determine + //! an appropriate value for the bucket length and data rate. + CDataGatherer(model_t::EAnalysisCategory gathererType, + model_t::ESummaryMode summaryMode, + const SModelParams& modelParams, + const std::string& summaryCountFieldName, + const std::string& partitionFieldName, + const std::string& partitionFieldValue, + const std::string& personFieldName, + const std::string& attributeFieldName, + const std::string& valueFieldName, + const TStrVec& influenceFieldNames, + bool useNull, + const CSearchKey& key, + const TFeatureVec& features, + core_t::TTime startTime, + int sampleCountOverride); + + //! Construct from a state document. + CDataGatherer(model_t::EAnalysisCategory gathererType, + model_t::ESummaryMode summaryMode, + const SModelParams& modelParams, + const std::string& summaryCountFieldName, + const std::string& partitionFieldName, + const std::string& partitionFieldValue, + const std::string& personFieldName, + const std::string& attributeFieldName, + const std::string& valueFieldName, + const TStrVec& influenceFieldNames, + bool useNull, + const CSearchKey& key, + core::CStateRestoreTraverser& traverser); + + //! Create a copy that will result in the same persisted state as the + //! original. This is effectively a copy constructor that creates a + //! copy that's only valid for a single purpose. The boolean flag is + //! redundant except to create a signature that will not be mistaken for + //! a general purpose copy constructor. + CDataGatherer(bool isForPersistence, const CDataGatherer& other); + + ~CDataGatherer(); + //@} + + //! \name Persistence + //@{ + //! Persist state by passing information to the supplied inserter. + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Create a clone of this data gatherer that will result in the same + //! persisted state. The clone may be incomplete in ways that do not + //! affect the persisted representation, and must not be used for any + //! other purpose. + //! + //! \warning The caller owns the object returned. + CDataGatherer* cloneForPersistence() const; + //@} + + //! Check if the data being gathered are already summarized by an + //! external aggregation process. + model_t::ESummaryMode summaryMode() const; + + //! Get the function. + model::function_t::EFunction function() const; + + //! Get a description of the component searches. + std::string description() const; + + //! Is this a population data gatherer? + bool isPopulation() const; + + //! Get the maximum size of all the member containers. + std::size_t maxDimension() const; + + //! \name Fields + //@{ + //! Get the partition field name. + //! + //! The name of the partitioning field. + const std::string& partitionFieldName() const; + + //! Get the partition field value. + //! + //! The value of the partitioning field. + const std::string& partitionFieldValue() const; + + //! This is the common field in all searches "along" which the + //! probabilities are aggregated, i.e. the "by" field name for + //! individual models and the "over" field name for population + //! models. + const std::string& personFieldName() const; + + //! Get the attribute field name if one exists. + const std::string& attributeFieldName() const; + + //! Get the name of the field containing the metric value. + const std::string& valueFieldName() const; + + //! Get an iterator at the beginning the influencing field names. + TStrVecCItr beginInfluencers() const; + + //! Get an iterator at the end of the influencing field names. + TStrVecCItr endInfluencers() const; + + //! Return the search key for which this is gathering data. + const CSearchKey& searchKey() const; + + //! Get the fields for which to gather data. + //! + //! This defines the fields to extract from a record. These include + //! the fields which define the categories whose counts are being + //! analyzed, the fields containing metric series names and values + //! and the fields defining a population. + const TStrVec& fieldsOfInterest() const; + + //! Get the number of by field values. For a population model this will + //! be equal to numberActiveAttributes(); for an individual model + //! numberActivePeople(). + std::size_t numberByFieldValues() const; + + //! Get the number of over field values. For a population model this + //! will be equal to numberActivePeople(); for an individual model 0. + std::size_t numberOverFieldValues() const; + + //! Have we been configured to use NULL values? + bool useNull() const; + //@} + + //! \name Update + //@{ + //! Process the specified fields. + //! + //! This adds people and attributes as necessary and fills out the + //! event data from \p fieldValues. + bool processFields(const TStrCPtrVec& fieldValues, CEventData& result, CResourceMonitor& resourceMonitor); + + //! Record the arrival of \p data at \p time. + bool addArrival(const TStrCPtrVec& fieldValues, CEventData& data, CResourceMonitor& resourceMonitor); + + //! Roll time to the end of the bucket that is latency after the sampled bucket. + void sampleNow(core_t::TTime sampleBucketStart); + + //! Roll time to the end of the bucket that is latency after the sampled bucket + //! without performing any updates that impact the model. + void skipSampleNow(core_t::TTime sampleBucketStart); + //@} + + //! \name Features + //@{ + //! Get the number of features on which this is gathering data. + std::size_t numberFeatures() const; + + //! Check if this is gathering data on \p feature. + bool hasFeature(model_t::EFeature feature) const; + + //! Get the feature corresponding to \p i. + //! + //! \warning \p i must be in range for the features this gatherer + //! is collecting, i.e. it must be less than numberFeatures. + model_t::EFeature feature(std::size_t i) const; + + //! Get the collection of features for which data is being gathered. + const TFeatureVec& features() const; + + //! Get the data for all features for the bucketing time interval + //! containing \p time. + //! + //! \param[in] time The time of interest. + //! \param[out] result Filled in with the feature data at \p time. + //! \tparam T The type of the feature data. + template + bool featureData(core_t::TTime time, core_t::TTime bucketLength, std::vector>& result) const { + TFeatureAnyPrVec rawFeatureData; + this->chooseBucketGatherer(time).featureData(time, bucketLength, rawFeatureData); + + bool succeeded = true; + + result.clear(); + result.reserve(rawFeatureData.size()); + for (std::size_t i = 0u; i < rawFeatureData.size(); ++i) { + TFeatureAnyPr& feature = rawFeatureData[i]; + + // Check the typeid before attempting the cast so we + // don't use throw to handle failure, which is slow. + if (feature.second.type() != typeid(T)) { + LOG_ERROR("Bad type for feature = " << model_t::print(feature.first) << ", expected " << typeid(T).name() << " got " + << feature.second.type().name()); + succeeded = false; + continue; } - return succeeded; - } - //@} - - //! \name Person - //@{ - //! Get the number of active people (not pruned). - std::size_t numberActivePeople() const; - - //! Get the maximum person identifier seen so far - //! (some of which might have been pruned). - std::size_t numberPeople() const; - - //! Get the unique identifier of a person if it exists. - //! - //! \param[in] person The person of interest. - //! \param[out] result Filled in with the identifier of \p person - //! if they exist otherwise max std::size_t. - //! \return True if the person exists and false otherwise. - bool personId(const std::string &person, std::size_t &result) const; - - //! Get the unique identifier of an arbitrary known person. - //! \param[out] result Filled in with the identifier of a person - //! \return True if a person exists and false otherwise. - bool anyPersonId(std::size_t &result) const; - - //! Get the name of the person identified by \p pid if they exist. - //! - //! \param[in] pid The unique identifier of the person of interest. - //! \return The person name if they exist and a fallback otherwise. - const std::string &personName(std::size_t pid) const; - - //! Get the name of the person identified by \p pid if they exist. - //! - //! \param[in] pid The unique identifier of the person of interest. - //! \return The person name if they exist and a fallback otherwise. - const core::CStoredStringPtr &personNamePtr(std::size_t pid) const; - - //! Get the name of the person identified by \p pid if they exist. - //! - //! \param[in] pid The unique identifier of the person of interest. - //! \param[in] fallback The fall back name. - //! \return The person name if they exist and \p fallback otherwise. - const std::string &personName(std::size_t pid, const std::string &fallback) const; - - //! Get the non-zero counts by person for the bucketing interval - //! containing \p time. - //! - //! \param[in] time The time of interest. - //! \param[out] result Filled in with the non-zero counts by person. - //! The first element is the person identifier and the second their - //! count in the bucketing interval. The result is sorted by person. - //! \note We expect the non-zero counts to be sparse on the space - //! of people so use a sparse encoding: - //!
-        //!   \f$ pid \leftarrow c\f$
-        //! 
- //! where,\n - //! \f$pid\f$ is the person identifier,\n - //! \f$c\f$ is the count for the person. - void personNonZeroCounts(core_t::TTime time, TSizeUInt64PrVec &result) const; - - //! Stop gathering data on the people identified by \p peopleToRemove. - void recyclePeople(const TSizeVec &peopleToRemove); - - //! Remove all traces of people whose identifiers are greater than - //! or equal to \p lowestPersonToRemove. - void removePeople(std::size_t lowestPersonToRemove); - - //! Get unique identifiers of any people that have been recycled. - TSizeVec &recycledPersonIds(); - - //! Check that the person is no longer being modeled. - bool isPersonActive(std::size_t pid) const; - - //! Record a person called \p person. - std::size_t addPerson(const std::string &person, - CResourceMonitor &resourceMonitor, - bool &addedPerson); - //@} - - //! \name Attribute - //@{ - //! Get the number of active attributes (not pruned). - std::size_t numberActiveAttributes() const; - - //! Get the maximum attribute identifier seen so far - //! (some of which might have been pruned). - std::size_t numberAttributes() const; - - //! Get the unique identifier of an attribute if it exists. - //! - //! \param[in] attribute The person of interest. - //! \param[out] result Filled in with the identifier of \p attribute - //! if they exist otherwise max std::size_t. - //! \return True if the attribute exists and false otherwise. - bool attributeId(const std::string &attribute, std::size_t &result) const; - - //! Get the name of the attribute identified by \p cid if they exist. - //! - //! \param[in] cid The unique identifier of the attribute of interest. - //! \return The attribute name if it exists anda fallback otherwise. - const std::string &attributeName(std::size_t cid) const; - - //! Get the name of the attribute identified by \p pid if they exist. - //! - //! \param[in] cid The unique identifier of the attribute of interest. - //! \return The attribute name if they exist and a fallback otherwise. - const core::CStoredStringPtr &attributeNamePtr(std::size_t cid) const; - - //! Get the name of the attribute identified by \p cid if they exist. - //! - //! \param[in] cid The unique identifier of the attribute of interest. - //! \param[in] fallback The fall back name. - //! \return The attribute name if it exists and \p fallback otherwise. - const std::string &attributeName(std::size_t cid, const std::string &fallback) const; - - //! Stop gathering data on the attributes identified by \p attributesToRemove. - void recycleAttributes(const TSizeVec &attributesToRemove); - - //! Remove all traces of attributes whose identifiers are greater than - //! or equal to \p lowestAttributeToRemove. - void removeAttributes(std::size_t lowestAttributeToRemove); - - //! Get unique identifiers of any attributes that have been recycled. - TSizeVec &recycledAttributeIds(); - - //! Check that the person is no longer being modeled. - bool isAttributeActive(std::size_t cid) const; - //@} - - //! \name Metric - //@{ - //! Get the current number of measurements in a sample for - //! the model of the entity identified by \p id. - //! - //! If we are performing temporal analysis we have one sample - //! count per person and if we are performing population analysis - //! we have one sample count per attribute. - double sampleCount(std::size_t id) const; - - //! Get the effective number of measurements in a sample for - //! the model of the entity identified by \p id. - //! - //! If we are performing temporal analysis we have one sample - //! count per person and if we are performing population analysis - //! we have one sample count per attribute. - double effectiveSampleCount(std::size_t id) const; - - //! Reset the number of measurements in a sample for the entity - //! identified \p id. - //! - //! If we are performing individual analysis we have one sample - //! count per person and if we are performing population analysis - //! we have one sample count per attribute. - void resetSampleCount(std::size_t id); - - //! Get the sample counts. - TSampleCountsPtr sampleCounts() const; - //@} - - //! \name Time - //@{ - //! Get the start of the current bucketing time interval. - core_t::TTime currentBucketStartTime() const; - - //! Reset the current bucketing interval start time. - void currentBucketStartTime(core_t::TTime bucketStart); - - //! Get the length of the bucketing time interval. - core_t::TTime bucketLength() const; - - //! Check if data is available at \p time. - bool dataAvailable(core_t::TTime time) const; - - //! For each bucket in the interval [\p startTime, \p endTime], - //! validate that it can be sampled and increment \p startTime - //! to the first valid bucket or \p endTime if no valid buckets - //! exist. - //! - //! \param[in,out] startTime The start of the interval to sample. - //! \param[in] endTime The end of the interval to sample. - bool validateSampleTimes(core_t::TTime &startTime, - core_t::TTime endTime) const; - - //! Roll time forwards to \p time. Note this method is only supported - //! for testing purposes and should not normally be called. - void timeNow(core_t::TTime time); - - //! Print the current bucket. - std::string printCurrentBucket(core_t::TTime time) const; - - //! Record a attribute called \p attribute. - std::size_t addAttribute(const std::string &attribute, - CResourceMonitor &resourceMonitor, - bool &addedAttribute); - //@} - - //! \name Counts - //@{ - //! Get the non-zero (person, attribute) pair counts in the - //! bucketing interval corresponding to the given time. - const TSizeSizePrUInt64UMap &bucketCounts(core_t::TTime time) const; - - //! Get the non-zero (person, attribute) pair counts for each - //! value of influencing field. - const TSizeSizePrStoredStringPtrPrUInt64UMapVec &influencerCounts(core_t::TTime time) const; - //@} - - //! Get the checksum of this gatherer. - uint64_t checksum() const; - - //! Debug the memory used by this component. - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - - //! Get the memory used by this component. - std::size_t memoryUsage() const; - - //! Clear this data gatherer. - void clear(); - - //! Reset bucket and return true if bucket was successfully - //! reset or false otherwise. - //! Note that this should not be used in conjunction with out-of-phase buckets - //! where the concept of resetting a specific bucketed period of time is - //! not valid. - bool resetBucket(core_t::TTime bucketStart); - - //! Release memory that is no longer needed - void releaseMemory(core_t::TTime samplingCutoffTime); - - //! Get the global configuration parameters. - const SModelParams ¶ms() const; - - // \name Tuple - //@{ - //! Extract the person identifier from a tuple. - template - static inline std::size_t extractPersonId(const std::pair &tuple) - { - return tuple.first.first; + // We emulate move semantics here to avoid the expensive + // copy if T is large (as we expect it might be sometimes). + // We have to adopt the using std::swap idiom (contravening + // coding guidelines) because T can be a built in type. + // Unfortunately, this implementation requires T to be + // default constructible. + using std::swap; + result.push_back(std::pair(feature.first, T())); + T& tmp = boost::any_cast(feature.second); + swap(result.back().second, tmp); } - //! Extract the person identifier from a tuple. - template - static inline std::size_t extractPersonId(const std::pair &tuple) - { - return tuple.first.first; - } - //! Extract the person identifier from a tuple. - static inline std::size_t extractPersonId(const TSizeSizePr &tuple) - { - return tuple.first; - } - //! Extracts the person identifier from a tuple. - struct SExtractPersonId - { - template - std::size_t operator()(const TUPLE &t) const - { - return CDataGatherer::extractPersonId(t); - } - }; - //! Extract the attribute identifier from a tuple. - template - static inline std::size_t extractAttributeId(const std::pair &tuple) - { - return tuple.first.second; + return succeeded; + } + //@} + + //! \name Person + //@{ + //! Get the number of active people (not pruned). + std::size_t numberActivePeople() const; + + //! Get the maximum person identifier seen so far + //! (some of which might have been pruned). + std::size_t numberPeople() const; + + //! Get the unique identifier of a person if it exists. + //! + //! \param[in] person The person of interest. + //! \param[out] result Filled in with the identifier of \p person + //! if they exist otherwise max std::size_t. + //! \return True if the person exists and false otherwise. + bool personId(const std::string& person, std::size_t& result) const; + + //! Get the unique identifier of an arbitrary known person. + //! \param[out] result Filled in with the identifier of a person + //! \return True if a person exists and false otherwise. + bool anyPersonId(std::size_t& result) const; + + //! Get the name of the person identified by \p pid if they exist. + //! + //! \param[in] pid The unique identifier of the person of interest. + //! \return The person name if they exist and a fallback otherwise. + const std::string& personName(std::size_t pid) const; + + //! Get the name of the person identified by \p pid if they exist. + //! + //! \param[in] pid The unique identifier of the person of interest. + //! \return The person name if they exist and a fallback otherwise. + const core::CStoredStringPtr& personNamePtr(std::size_t pid) const; + + //! Get the name of the person identified by \p pid if they exist. + //! + //! \param[in] pid The unique identifier of the person of interest. + //! \param[in] fallback The fall back name. + //! \return The person name if they exist and \p fallback otherwise. + const std::string& personName(std::size_t pid, const std::string& fallback) const; + + //! Get the non-zero counts by person for the bucketing interval + //! containing \p time. + //! + //! \param[in] time The time of interest. + //! \param[out] result Filled in with the non-zero counts by person. + //! The first element is the person identifier and the second their + //! count in the bucketing interval. The result is sorted by person. + //! \note We expect the non-zero counts to be sparse on the space + //! of people so use a sparse encoding: + //!
+    //!   \f$ pid \leftarrow c\f$
+    //! 
+ //! where,\n + //! \f$pid\f$ is the person identifier,\n + //! \f$c\f$ is the count for the person. + void personNonZeroCounts(core_t::TTime time, TSizeUInt64PrVec& result) const; + + //! Stop gathering data on the people identified by \p peopleToRemove. + void recyclePeople(const TSizeVec& peopleToRemove); + + //! Remove all traces of people whose identifiers are greater than + //! or equal to \p lowestPersonToRemove. + void removePeople(std::size_t lowestPersonToRemove); + + //! Get unique identifiers of any people that have been recycled. + TSizeVec& recycledPersonIds(); + + //! Check that the person is no longer being modeled. + bool isPersonActive(std::size_t pid) const; + + //! Record a person called \p person. + std::size_t addPerson(const std::string& person, CResourceMonitor& resourceMonitor, bool& addedPerson); + //@} + + //! \name Attribute + //@{ + //! Get the number of active attributes (not pruned). + std::size_t numberActiveAttributes() const; + + //! Get the maximum attribute identifier seen so far + //! (some of which might have been pruned). + std::size_t numberAttributes() const; + + //! Get the unique identifier of an attribute if it exists. + //! + //! \param[in] attribute The person of interest. + //! \param[out] result Filled in with the identifier of \p attribute + //! if they exist otherwise max std::size_t. + //! \return True if the attribute exists and false otherwise. + bool attributeId(const std::string& attribute, std::size_t& result) const; + + //! Get the name of the attribute identified by \p cid if they exist. + //! + //! \param[in] cid The unique identifier of the attribute of interest. + //! \return The attribute name if it exists anda fallback otherwise. + const std::string& attributeName(std::size_t cid) const; + + //! Get the name of the attribute identified by \p pid if they exist. + //! + //! \param[in] cid The unique identifier of the attribute of interest. + //! \return The attribute name if they exist and a fallback otherwise. + const core::CStoredStringPtr& attributeNamePtr(std::size_t cid) const; + + //! Get the name of the attribute identified by \p cid if they exist. + //! + //! \param[in] cid The unique identifier of the attribute of interest. + //! \param[in] fallback The fall back name. + //! \return The attribute name if it exists and \p fallback otherwise. + const std::string& attributeName(std::size_t cid, const std::string& fallback) const; + + //! Stop gathering data on the attributes identified by \p attributesToRemove. + void recycleAttributes(const TSizeVec& attributesToRemove); + + //! Remove all traces of attributes whose identifiers are greater than + //! or equal to \p lowestAttributeToRemove. + void removeAttributes(std::size_t lowestAttributeToRemove); + + //! Get unique identifiers of any attributes that have been recycled. + TSizeVec& recycledAttributeIds(); + + //! Check that the person is no longer being modeled. + bool isAttributeActive(std::size_t cid) const; + //@} + + //! \name Metric + //@{ + //! Get the current number of measurements in a sample for + //! the model of the entity identified by \p id. + //! + //! If we are performing temporal analysis we have one sample + //! count per person and if we are performing population analysis + //! we have one sample count per attribute. + double sampleCount(std::size_t id) const; + + //! Get the effective number of measurements in a sample for + //! the model of the entity identified by \p id. + //! + //! If we are performing temporal analysis we have one sample + //! count per person and if we are performing population analysis + //! we have one sample count per attribute. + double effectiveSampleCount(std::size_t id) const; + + //! Reset the number of measurements in a sample for the entity + //! identified \p id. + //! + //! If we are performing individual analysis we have one sample + //! count per person and if we are performing population analysis + //! we have one sample count per attribute. + void resetSampleCount(std::size_t id); + + //! Get the sample counts. + TSampleCountsPtr sampleCounts() const; + //@} + + //! \name Time + //@{ + //! Get the start of the current bucketing time interval. + core_t::TTime currentBucketStartTime() const; + + //! Reset the current bucketing interval start time. + void currentBucketStartTime(core_t::TTime bucketStart); + + //! Get the length of the bucketing time interval. + core_t::TTime bucketLength() const; + + //! Check if data is available at \p time. + bool dataAvailable(core_t::TTime time) const; + + //! For each bucket in the interval [\p startTime, \p endTime], + //! validate that it can be sampled and increment \p startTime + //! to the first valid bucket or \p endTime if no valid buckets + //! exist. + //! + //! \param[in,out] startTime The start of the interval to sample. + //! \param[in] endTime The end of the interval to sample. + bool validateSampleTimes(core_t::TTime& startTime, core_t::TTime endTime) const; + + //! Roll time forwards to \p time. Note this method is only supported + //! for testing purposes and should not normally be called. + void timeNow(core_t::TTime time); + + //! Print the current bucket. + std::string printCurrentBucket(core_t::TTime time) const; + + //! Record a attribute called \p attribute. + std::size_t addAttribute(const std::string& attribute, CResourceMonitor& resourceMonitor, bool& addedAttribute); + //@} + + //! \name Counts + //@{ + //! Get the non-zero (person, attribute) pair counts in the + //! bucketing interval corresponding to the given time. + const TSizeSizePrUInt64UMap& bucketCounts(core_t::TTime time) const; + + //! Get the non-zero (person, attribute) pair counts for each + //! value of influencing field. + const TSizeSizePrStoredStringPtrPrUInt64UMapVec& influencerCounts(core_t::TTime time) const; + //@} + + //! Get the checksum of this gatherer. + uint64_t checksum() const; + + //! Debug the memory used by this component. + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + + //! Get the memory used by this component. + std::size_t memoryUsage() const; + + //! Clear this data gatherer. + void clear(); + + //! Reset bucket and return true if bucket was successfully + //! reset or false otherwise. + //! Note that this should not be used in conjunction with out-of-phase buckets + //! where the concept of resetting a specific bucketed period of time is + //! not valid. + bool resetBucket(core_t::TTime bucketStart); + + //! Release memory that is no longer needed + void releaseMemory(core_t::TTime samplingCutoffTime); + + //! Get the global configuration parameters. + const SModelParams& params() const; + + // \name Tuple + //@{ + //! Extract the person identifier from a tuple. + template + static inline std::size_t extractPersonId(const std::pair& tuple) { + return tuple.first.first; + } + //! Extract the person identifier from a tuple. + template + static inline std::size_t extractPersonId(const std::pair& tuple) { + return tuple.first.first; + } + //! Extract the person identifier from a tuple. + static inline std::size_t extractPersonId(const TSizeSizePr& tuple) { return tuple.first; } + //! Extracts the person identifier from a tuple. + struct SExtractPersonId { + template + std::size_t operator()(const TUPLE& t) const { + return CDataGatherer::extractPersonId(t); } - //! Extract the attribute identifier from a tuple. - template - static inline std::size_t extractAttributeId(const std::pair &tuple) - { - return tuple.first.second; + }; + + //! Extract the attribute identifier from a tuple. + template + static inline std::size_t extractAttributeId(const std::pair& tuple) { + return tuple.first.second; + } + //! Extract the attribute identifier from a tuple. + template + static inline std::size_t extractAttributeId(const std::pair& tuple) { + return tuple.first.second; + } + //! Extract the attribute identifier from a tuple. + static inline std::size_t extractAttributeId(const TSizeSizePr& tuple) { return tuple.second; } + //! Extracts the attribute identifier from a tuple. + struct SExtractAttributeId { + template + std::size_t operator()(const TUPLE& t) const { + return CDataGatherer::extractAttributeId(t); } - //! Extract the attribute identifier from a tuple. - static inline std::size_t extractAttributeId(const TSizeSizePr &tuple) - { - return tuple.second; - } - //! Extracts the attribute identifier from a tuple. - struct SExtractAttributeId - { - template - std::size_t operator()(const TUPLE &t) const - { - return CDataGatherer::extractAttributeId(t); - } - }; - - //! Extract the data from a tuple. - template - static inline const T &extractData(const std::pair &tuple) - { - return tuple.second; - } - //! Extract the data from a tuple. - template - static inline const T &extractData(const std::pair &tuple) - { - return tuple.second; - } - //@} - - //! In the case of manually named summarized statistics, map the first - //! feature to a metric category. - bool determineMetricCategory(TMetricCategoryVec &fieldMetricCategories) const; - - //! Helper to avoid code duplication when getting a count from a - //! field. Logs different errors for missing value and invalid value. - bool extractCountFromField(const std::string &fieldName, - const std::string *fieldValue, - std::size_t &count) const; - - //! Helper to avoid code duplication when getting a metric value from a - //! field. Logs different errors for missing value and invalid value. - bool extractMetricFromField(const std::string &fieldName, - std::string fieldValue, - TDouble1Vec &metricValue) const; - - //! Returns the startTime of the earliest bucket for which data are still - //! accepted. - core_t::TTime earliestBucketStartTime() const; - - //! Check the class invariants. - bool checkInvariants() const; - - private: - //! The summary count field value to indicate that the record should - //! be ignored. - static const std::string EXPLICIT_NULL; - - private: - using TModelParamsCRef = boost::reference_wrapper; - - private: - //! Select the correct bucket gatherer based on the time: if we have - //! out-of-phase buckets, select either in-phase or out-of-phase. - const CBucketGatherer &chooseBucketGatherer(core_t::TTime time) const; - - //! Select the correct bucket gatherer based on the time: if we have - //! out-of-phase buckets, select either in-phase or out-of-phase. - CBucketGatherer &chooseBucketGatherer(core_t::TTime time); - - //! Restore state from supplied traverser. - bool acceptRestoreTraverser(const std::string &summaryCountFieldName, - const std::string &personFieldName, - const std::string &attributeFieldName, - const std::string &valueFieldName, - const TStrVec &influenceFieldNames, - core::CStateRestoreTraverser &traverser); - - //! Restore a bucket gatherer from the supplied traverser. - bool restoreBucketGatherer(const std::string &summaryCountFieldName, - const std::string &personFieldName, - const std::string &attributeFieldName, - const std::string &valueFieldName, - const TStrVec &influenceFieldNames, - core::CStateRestoreTraverser &traverser); - - //! Persist a bucket gatherer by passing information to the supplied - //! inserter. - void persistBucketGatherers(core::CStatePersistInserter &inserter) const; - - //! Create the bucket specific data gatherer. - void createBucketGatherer(model_t::EAnalysisCategory gathererType, - const std::string &summaryCountFieldName, - const std::string &personFieldName, - const std::string &attributeFieldName, - const std::string &valueFieldName, - const TStrVec &influenceFieldNames, - core_t::TTime startTime, - unsigned int sampleCountOverride); - - private: - //! The type of the bucket gatherer(s) used. - model_t::EAnalysisCategory m_GathererType; - - //! The collection of features on which to gather data. - TFeatureVec m_Features; - - //! The collection of bucket gatherers which contain the bucket-specific - //! metrics and counts. - TBucketGathererPVec m_Gatherers; - - //! Indicates whether the data being gathered are already summarized - //! by an external aggregation process. - model_t::ESummaryMode m_SummaryMode; - - //! The global configuration parameters. - TModelParamsCRef m_Params; - - //! The partition field name or an empty string if there isn't one. - std::string m_PartitionFieldName; - - //! The value of the partition field for this detector. - core::CStoredStringPtr m_PartitionFieldValue; - - //! The key of the search for which data is being gathered. - TSearchKeyCRef m_SearchKey; - - //! A registry where person names are mapped to unique IDs. - CDynamicStringIdRegistry m_PeopleRegistry; - - //! A registry where attribute names are mapped to unique IDs. - CDynamicStringIdRegistry m_AttributesRegistry; - - //! True if this is a population data gatherer and false otherwise. - bool m_Population; - - //! If true the gatherer will process missing person field values. - bool m_UseNull; - - //! The object responsible for managing sample counts. - TSampleCountsPtr m_SampleCounts; + }; + + //! Extract the data from a tuple. + template + static inline const T& extractData(const std::pair& tuple) { + return tuple.second; + } + //! Extract the data from a tuple. + template + static inline const T& extractData(const std::pair& tuple) { + return tuple.second; + } + //@} + + //! In the case of manually named summarized statistics, map the first + //! feature to a metric category. + bool determineMetricCategory(TMetricCategoryVec& fieldMetricCategories) const; + + //! Helper to avoid code duplication when getting a count from a + //! field. Logs different errors for missing value and invalid value. + bool extractCountFromField(const std::string& fieldName, const std::string* fieldValue, std::size_t& count) const; + + //! Helper to avoid code duplication when getting a metric value from a + //! field. Logs different errors for missing value and invalid value. + bool extractMetricFromField(const std::string& fieldName, std::string fieldValue, TDouble1Vec& metricValue) const; + + //! Returns the startTime of the earliest bucket for which data are still + //! accepted. + core_t::TTime earliestBucketStartTime() const; + + //! Check the class invariants. + bool checkInvariants() const; + +private: + //! The summary count field value to indicate that the record should + //! be ignored. + static const std::string EXPLICIT_NULL; + +private: + using TModelParamsCRef = boost::reference_wrapper; + +private: + //! Select the correct bucket gatherer based on the time: if we have + //! out-of-phase buckets, select either in-phase or out-of-phase. + const CBucketGatherer& chooseBucketGatherer(core_t::TTime time) const; + + //! Select the correct bucket gatherer based on the time: if we have + //! out-of-phase buckets, select either in-phase or out-of-phase. + CBucketGatherer& chooseBucketGatherer(core_t::TTime time); + + //! Restore state from supplied traverser. + bool acceptRestoreTraverser(const std::string& summaryCountFieldName, + const std::string& personFieldName, + const std::string& attributeFieldName, + const std::string& valueFieldName, + const TStrVec& influenceFieldNames, + core::CStateRestoreTraverser& traverser); + + //! Restore a bucket gatherer from the supplied traverser. + bool restoreBucketGatherer(const std::string& summaryCountFieldName, + const std::string& personFieldName, + const std::string& attributeFieldName, + const std::string& valueFieldName, + const TStrVec& influenceFieldNames, + core::CStateRestoreTraverser& traverser); + + //! Persist a bucket gatherer by passing information to the supplied + //! inserter. + void persistBucketGatherers(core::CStatePersistInserter& inserter) const; + + //! Create the bucket specific data gatherer. + void createBucketGatherer(model_t::EAnalysisCategory gathererType, + const std::string& summaryCountFieldName, + const std::string& personFieldName, + const std::string& attributeFieldName, + const std::string& valueFieldName, + const TStrVec& influenceFieldNames, + core_t::TTime startTime, + unsigned int sampleCountOverride); + +private: + //! The type of the bucket gatherer(s) used. + model_t::EAnalysisCategory m_GathererType; + + //! The collection of features on which to gather data. + TFeatureVec m_Features; + + //! The collection of bucket gatherers which contain the bucket-specific + //! metrics and counts. + TBucketGathererPVec m_Gatherers; + + //! Indicates whether the data being gathered are already summarized + //! by an external aggregation process. + model_t::ESummaryMode m_SummaryMode; + + //! The global configuration parameters. + TModelParamsCRef m_Params; + + //! The partition field name or an empty string if there isn't one. + std::string m_PartitionFieldName; + + //! The value of the partition field for this detector. + core::CStoredStringPtr m_PartitionFieldValue; + + //! The key of the search for which data is being gathered. + TSearchKeyCRef m_SearchKey; + + //! A registry where person names are mapped to unique IDs. + CDynamicStringIdRegistry m_PeopleRegistry; + + //! A registry where attribute names are mapped to unique IDs. + CDynamicStringIdRegistry m_AttributesRegistry; + + //! True if this is a population data gatherer and false otherwise. + bool m_Population; + + //! If true the gatherer will process missing person field values. + bool m_UseNull; + + //! The object responsible for managing sample counts. + TSampleCountsPtr m_SampleCounts; }; - } } diff --git a/include/model/CDetectionRule.h b/include/model/CDetectionRule.h index f3467f4e49..e58ea6ba85 100644 --- a/include/model/CDetectionRule.h +++ b/include/model/CDetectionRule.h @@ -14,13 +14,10 @@ #include #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { class CAnomalyDetectorModel; - //! \brief A rule that dictates an action to be taken when certain conditions occur. //! //! DESCRIPTION:\n @@ -35,86 +32,75 @@ class CAnomalyDetectorModel; //! conditions trigger the rule, the rule will apply to all series within the //! partition. However, when no target is specified, the rule will trigger only //! for series that are described in the conditions themselves. -class MODEL_EXPORT CDetectionRule -{ - public: - using TRuleConditionVec = std::vector; - using TDouble1Vec = core::CSmallVector; - - //! Rule actions can apply to filtering results, skipping sampling or both. - //! This is meant to work as a bit mask so added values should be powers of 2. - enum ERuleAction - { - E_FilterResults = 1, - E_SkipSampling = 2 - }; - - enum EConditionsConnective - { - E_Or, - E_And - }; - public: - - //! Default constructor. - //! The rule's action defaults to FILTER_RESULTS and the connective to OR. - CDetectionRule(); - - //! Set the rule's action. - void action(int ruleAction); - - //! Set the conditions' connective. - void conditionsConnective(EConditionsConnective connective); - - //! Add a condition. - void addCondition(const CRuleCondition &condition); - - //! Set the target field name. - void targetFieldName(const std::string &targetFieldName); - - //! Set the target field value. - void targetFieldValue(const std::string &targetFieldValue); - - //! Check whether the rule applies on a series. - //! \p action is bitwise and'ed with the m_Action member - bool apply(ERuleAction action, - const CAnomalyDetectorModel &model, - model_t::EFeature feature, - const model_t::CResultType &resultType, - std::size_t pid, - std::size_t cid, - core_t::TTime time) const; - - //! Pretty-print the rule. - std::string print() const; - - private: - //! Check whether the given series is in the scope - //! of the rule's target. - bool isInScope(const CAnomalyDetectorModel &model, - std::size_t pid, - std::size_t cid) const; - - std::string printAction() const; - std::string printConditionsConnective() const; - - private: - //! The rule action. It works as a bit mask so its value - //! may not match any of the declared enum values but the - //! corresponding bit will be 1 when an action is enabled. - int m_Action; - - //! The conditions that trigger the rule. - TRuleConditionVec m_Conditions; - - //! The way the rule's conditions are logically connected (i.e. OR, AND). - EConditionsConnective m_ConditionsConnective; - - //! The optional target field name. Empty when not specified. - std::string m_TargetFieldName; - - //! The optional target field value. Empty when not specified. - std::string m_TargetFieldValue; +class MODEL_EXPORT CDetectionRule { +public: + using TRuleConditionVec = std::vector; + using TDouble1Vec = core::CSmallVector; + + //! Rule actions can apply to filtering results, skipping sampling or both. + //! This is meant to work as a bit mask so added values should be powers of 2. + enum ERuleAction { E_FilterResults = 1, E_SkipSampling = 2 }; + + enum EConditionsConnective { E_Or, E_And }; + +public: + //! Default constructor. + //! The rule's action defaults to FILTER_RESULTS and the connective to OR. + CDetectionRule(); + + //! Set the rule's action. + void action(int ruleAction); + + //! Set the conditions' connective. + void conditionsConnective(EConditionsConnective connective); + + //! Add a condition. + void addCondition(const CRuleCondition& condition); + + //! Set the target field name. + void targetFieldName(const std::string& targetFieldName); + + //! Set the target field value. + void targetFieldValue(const std::string& targetFieldValue); + + //! Check whether the rule applies on a series. + //! \p action is bitwise and'ed with the m_Action member + bool apply(ERuleAction action, + const CAnomalyDetectorModel& model, + model_t::EFeature feature, + const model_t::CResultType& resultType, + std::size_t pid, + std::size_t cid, + core_t::TTime time) const; + + //! Pretty-print the rule. + std::string print() const; + +private: + //! Check whether the given series is in the scope + //! of the rule's target. + bool isInScope(const CAnomalyDetectorModel& model, std::size_t pid, std::size_t cid) const; + + std::string printAction() const; + std::string printConditionsConnective() const; + +private: + //! The rule action. It works as a bit mask so its value + //! may not match any of the declared enum values but the + //! corresponding bit will be 1 when an action is enabled. + int m_Action; + + //! The conditions that trigger the rule. + TRuleConditionVec m_Conditions; + + //! The way the rule's conditions are logically connected (i.e. OR, AND). + EConditionsConnective m_ConditionsConnective; + + //! The optional target field name. Empty when not specified. + std::string m_TargetFieldName; + + //! The optional target field value. Empty when not specified. + std::string m_TargetFieldValue; }; } } diff --git a/include/model/CDetectorEqualizer.h b/include/model/CDetectorEqualizer.h index 246f10e9d7..4a8b2fa0a6 100644 --- a/include/model/CDetectorEqualizer.h +++ b/include/model/CDetectorEqualizer.h @@ -16,16 +16,12 @@ #include - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace model -{ +namespace model { class CLimits; class CModelConfig; @@ -36,55 +32,53 @@ class CModelConfig; //! for each detector. A corrected probability is obtained by converting //! raw probabilities to a rank and then reading off median probability //! for that rank over all detectors. -class MODEL_EXPORT CDetectorEqualizer -{ - public: - using TIntQuantileSketchPr = std::pair; - using TIntQuantileSketchPrVec = std::vector; +class MODEL_EXPORT CDetectorEqualizer { +public: + using TIntQuantileSketchPr = std::pair; + using TIntQuantileSketchPrVec = std::vector; - public: - //! Add \p probability to the detector's quantile sketch. - void add(int detector, double probability); +public: + //! Add \p probability to the detector's quantile sketch. + void add(int detector, double probability); - //! Correct \p probability to account for detector differences. - double correct(int detector, double probability); + //! Correct \p probability to account for detector differences. + double correct(int detector, double probability); - //! Clear all sketches. - void clear(); + //! Clear all sketches. + void clear(); - //! Age the sketches by reducing the count. - void age(double factor); + //! Age the sketches by reducing the count. + void age(double factor); - //! Persist state by passing information to \p inserter. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; + //! Persist state by passing information to \p inserter. + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; - //! Restore reading state from \p traverser. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); + //! Restore reading state from \p traverser. + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); - //! Get a checksum for the equalizer. - uint64_t checksum() const; + //! Get a checksum for the equalizer. + uint64_t checksum() const; - //! Get the largest probability that will be corrected. - static double largestProbabilityToCorrect(); + //! Get the largest probability that will be corrected. + static double largestProbabilityToCorrect(); - private: - //! Get the sketch for \p detector. - maths::CQuantileSketch &sketch(int detector); +private: + //! Get the sketch for \p detector. + maths::CQuantileSketch& sketch(int detector); - private: - //! The style of interpolation to use for the sketch. - static const maths::CQuantileSketch::EInterpolation SKETCH_INTERPOLATION; - //! The maximum size of the quantile sketch. - static const std::size_t SKETCH_SIZE; - //! The minimum count in a detector's sketch for which we'll - //! apply a correction to the probability. - static const double MINIMUM_COUNT_FOR_CORRECTION; +private: + //! The style of interpolation to use for the sketch. + static const maths::CQuantileSketch::EInterpolation SKETCH_INTERPOLATION; + //! The maximum size of the quantile sketch. + static const std::size_t SKETCH_SIZE; + //! The minimum count in a detector's sketch for which we'll + //! apply a correction to the probability. + static const double MINIMUM_COUNT_FOR_CORRECTION; - private: - //! The sketches (one for each detector). - TIntQuantileSketchPrVec m_Sketches; +private: + //! The sketches (one for each detector). + TIntQuantileSketchPrVec m_Sketches; }; - } } diff --git a/include/model/CDynamicStringIdRegistry.h b/include/model/CDynamicStringIdRegistry.h index 947327f5f5..db6979ca3d 100644 --- a/include/model/CDynamicStringIdRegistry.h +++ b/include/model/CDynamicStringIdRegistry.h @@ -7,11 +7,11 @@ #ifndef INCLUDED_ml_model_CDynamicStringIdRegistry_h #define INCLUDED_ml_model_CDynamicStringIdRegistry_h -#include #include #include #include #include +#include #include @@ -20,15 +20,12 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace model -{ +namespace model { class CResourceMonitor; //! \brief Register string names and map them to unique identifiers. @@ -38,144 +35,139 @@ class CResourceMonitor; //! The registry provides mapping from a registered string to its id and //! vice versa. In addition, the registry provides a recycling mechanism //! in order to reuse IDs whose mapped string is no longer relevant. -class MODEL_EXPORT CDynamicStringIdRegistry -{ - public: - using TDictionary = core::CCompressedDictionary<2>; - using TWordSizeUMap = TDictionary::CWordUMap::Type; - using TWordSizeUMapItr = TWordSizeUMap::iterator; - using TWordSizeUMapCItr = TWordSizeUMap::const_iterator; - using TSizeVec = std::vector; - using TStrVec = std::vector; - using TStoredStringPtrVec = std::vector; - - public: - //! An identifier which will never be used for a real string. - static const std::size_t INVALID_ID; - - public: - //! Constructs a registry for that expects names of type \p nameType. - //! - //! \param[in] nameType The type of the names expected to be registered. - //! Mainly used to disambiguate log messages. - //! \param[in] addedStat The statistic to be increased when a new name is registered. - //! \param[in] addNotAllowedStat The statistic to be increased when a new name failed - //! to register because no more additions are allowed. - //! \param[in] recycledStat The statistic to be increased when an ID is recycled. - CDynamicStringIdRegistry(const std::string &nameType, - stat_t::EStatTypes addedStat, - stat_t::EStatTypes addNotAllowedStat, - stat_t::EStatTypes recycledStat); - - //! Create a copy that will result in the same persisted state as the - //! original. This is effectively a copy constructor that creates a - //! copy that's only valid for a single purpose. The boolean flag is - //! redundant except to create a signature that will not be mistaken for - //! a general purpose copy constructor. - CDynamicStringIdRegistry(bool isForPersistence, const CDynamicStringIdRegistry &other); - - //! Get the name identified by \p id if it exists. - //! - //! \param[in] id The unique identifier of the name of interest. - //! \return The name if \p exists or \p fallback otherwise. - const std::string &name(std::size_t id, const std::string &fallback) const; - - //! Get the name identified by \p id if it exists, as a shared pointer - //! - //! \param[in] id The unique identifier of the name of interest. - //! \return The name as a string pointer - const core::CStoredStringPtr &namePtr(size_t id) const; - - //! Get the unique identifier of a name if it exists. - //! - //! \param[in] name The name of interest. - //! \param[out] result Filled in with the identifier of \p name - //! if it exists otherwise max std::size_t. - //! \return True if the name exists and false otherwise. - bool id(const std::string &name, std::size_t &result) const; - - //! Get the unique identifier of an arbitrary known name. - //! \param[out] result Filled in with the identifier of a name - //! \return True if a name exists and false otherwise. - bool anyId(std::size_t &result) const; - - //! Get the number of active names (not pruned). - std::size_t numberActiveNames() const; - - //! Get the maximum identifier seen so far - //! (some of which might have been pruned). - std::size_t numberNames() const; - - //! Check whether an identifier is active. - bool isIdActive(std::size_t id) const; - - //! Register a \p name and return its unique identifier. - std::size_t addName(const std::string &name, - core_t::TTime time, - CResourceMonitor &resourceMonitor, - bool &addedPerson); - - //! Remove all traces of names whose identifiers are greater than - //! or equal to \p lowestNameToRemove. - void removeNames(std::size_t lowestNameToRemove); - - //! Recycle the unique identifiers used by the names - //! identified by \p namesToRemove. - void recycleNames(const TSizeVec &namesToRemove, const std::string &defaultName); - - //! Get unique identifiers of any names that have been recycled. - TSizeVec &recycledIds(); - - //! Check the class invariants. - bool checkInvariants() const; - - //! Clear this registry. - void clear(); - - //! Get the checksum of this registry. - uint64_t checksum() const; - - //! Debug the memory used by this registry. - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - - //! Get the memory used by this registry. - std::size_t memoryUsage() const; - - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); - - private: - //! The type of the names expected to be registered. - std::string m_NameType; - - //! The statistic to be increased when a new name is registered. - stat_t::EStatTypes m_AddedStat; - - //! The statistic to be increased when a new name failed - //! to register because no more additions are allowed. - stat_t::EStatTypes m_AddNotAllowedStat; - - //! The statistic to be increased when an ID is recycled. - stat_t::EStatTypes m_RecycledStat; - - //! A compressed dictionary. - TDictionary m_Dictionary; - - //! Holds a unique identifier for each registered name which means - //! we can use direct address tables and fast hash maps and - //! sets keyed by names. - TWordSizeUMap m_Uids; +class MODEL_EXPORT CDynamicStringIdRegistry { +public: + using TDictionary = core::CCompressedDictionary<2>; + using TWordSizeUMap = TDictionary::CWordUMap::Type; + using TWordSizeUMapItr = TWordSizeUMap::iterator; + using TWordSizeUMapCItr = TWordSizeUMap::const_iterator; + using TSizeVec = std::vector; + using TStrVec = std::vector; + using TStoredStringPtrVec = std::vector; + +public: + //! An identifier which will never be used for a real string. + static const std::size_t INVALID_ID; + +public: + //! Constructs a registry for that expects names of type \p nameType. + //! + //! \param[in] nameType The type of the names expected to be registered. + //! Mainly used to disambiguate log messages. + //! \param[in] addedStat The statistic to be increased when a new name is registered. + //! \param[in] addNotAllowedStat The statistic to be increased when a new name failed + //! to register because no more additions are allowed. + //! \param[in] recycledStat The statistic to be increased when an ID is recycled. + CDynamicStringIdRegistry(const std::string& nameType, + stat_t::EStatTypes addedStat, + stat_t::EStatTypes addNotAllowedStat, + stat_t::EStatTypes recycledStat); + + //! Create a copy that will result in the same persisted state as the + //! original. This is effectively a copy constructor that creates a + //! copy that's only valid for a single purpose. The boolean flag is + //! redundant except to create a signature that will not be mistaken for + //! a general purpose copy constructor. + CDynamicStringIdRegistry(bool isForPersistence, const CDynamicStringIdRegistry& other); + + //! Get the name identified by \p id if it exists. + //! + //! \param[in] id The unique identifier of the name of interest. + //! \return The name if \p exists or \p fallback otherwise. + const std::string& name(std::size_t id, const std::string& fallback) const; + + //! Get the name identified by \p id if it exists, as a shared pointer + //! + //! \param[in] id The unique identifier of the name of interest. + //! \return The name as a string pointer + const core::CStoredStringPtr& namePtr(size_t id) const; + + //! Get the unique identifier of a name if it exists. + //! + //! \param[in] name The name of interest. + //! \param[out] result Filled in with the identifier of \p name + //! if it exists otherwise max std::size_t. + //! \return True if the name exists and false otherwise. + bool id(const std::string& name, std::size_t& result) const; + + //! Get the unique identifier of an arbitrary known name. + //! \param[out] result Filled in with the identifier of a name + //! \return True if a name exists and false otherwise. + bool anyId(std::size_t& result) const; + + //! Get the number of active names (not pruned). + std::size_t numberActiveNames() const; + + //! Get the maximum identifier seen so far + //! (some of which might have been pruned). + std::size_t numberNames() const; + + //! Check whether an identifier is active. + bool isIdActive(std::size_t id) const; + + //! Register a \p name and return its unique identifier. + std::size_t addName(const std::string& name, core_t::TTime time, CResourceMonitor& resourceMonitor, bool& addedPerson); + + //! Remove all traces of names whose identifiers are greater than + //! or equal to \p lowestNameToRemove. + void removeNames(std::size_t lowestNameToRemove); + + //! Recycle the unique identifiers used by the names + //! identified by \p namesToRemove. + void recycleNames(const TSizeVec& namesToRemove, const std::string& defaultName); + + //! Get unique identifiers of any names that have been recycled. + TSizeVec& recycledIds(); + + //! Check the class invariants. + bool checkInvariants() const; + + //! Clear this registry. + void clear(); + + //! Get the checksum of this registry. + uint64_t checksum() const; + + //! Debug the memory used by this registry. + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + + //! Get the memory used by this registry. + std::size_t memoryUsage() const; + + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); + +private: + //! The type of the names expected to be registered. + std::string m_NameType; + + //! The statistic to be increased when a new name is registered. + stat_t::EStatTypes m_AddedStat; + + //! The statistic to be increased when a new name failed + //! to register because no more additions are allowed. + stat_t::EStatTypes m_AddNotAllowedStat; + + //! The statistic to be increased when an ID is recycled. + stat_t::EStatTypes m_RecycledStat; + + //! A compressed dictionary. + TDictionary m_Dictionary; + + //! Holds a unique identifier for each registered name which means + //! we can use direct address tables and fast hash maps and + //! sets keyed by names. + TWordSizeUMap m_Uids; - //! Holds the name of each unique identifier. - TStoredStringPtrVec m_Names; + //! Holds the name of each unique identifier. + TStoredStringPtrVec m_Names; - //! A list of unique identifiers which are free to reuse. - TSizeVec m_FreeUids; + //! A list of unique identifiers which are free to reuse. + TSizeVec m_FreeUids; - //! A list of recycled unique identifiers. - TSizeVec m_RecycledUids; + //! A list of recycled unique identifiers. + TSizeVec m_RecycledUids; }; - } } diff --git a/include/model/CEventData.h b/include/model/CEventData.h index 6f3cf08db7..d35a430d51 100644 --- a/include/model/CEventData.h +++ b/include/model/CEventData.h @@ -7,8 +7,8 @@ #ifndef INCLUDED_ml_model_CEventData_h #define INCLUDED_ml_model_CEventData_h -#include #include +#include #include #include @@ -21,10 +21,8 @@ #include #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { //! \brief The description of the event data corresponding to //! a single record. @@ -44,115 +42,113 @@ namespace model //! Finally, each record has a unique time and person, although //! with different semantics when an over field is and isn't //! present, so this always holds the time and person identifier. -class MODEL_EXPORT CEventData -{ - public: - using TDouble1Vec = core::CSmallVector; - using TOptionalSize = boost::optional; - using TOptionalSizeVec = std::vector; - using TOptionalDouble = boost::optional; - // Fixed size array - one element per metric category - using TDouble1VecArray = boost::array; - // Second element in pair stores count - using TDouble1VecArraySizePr = std::pair; - using TOptionalDouble1VecArraySizePr = boost::optional; - using TOptionalDouble1VecArraySizePrVec = std::vector; - using TOptionalStr = boost::optional; - using TOptionalStrVec = std::vector; - - public: - //! Create uninitialized event data. - CEventData(); - - //! Efficiently swap the contents with \p other. - void swap(CEventData &other); - - //! Reset to uninitialized state. - void clear(); - - //! Set the time. - void time(core_t::TTime time); - - //! Set the person identifier. - //! - //! \warning There should only ever be one person identifier. - bool person(std::size_t pid); - - //! Add an attribute identifier. - void addAttribute(TOptionalSize cid = TOptionalSize()); - - //! Add a function argument metric statistic value. - void addValue(const TDouble1Vec &value = TDouble1Vec()); - - //! Set the function argument string value. - void stringValue(const std::string &value); - - //! Add an influencing field value. - void addInfluence(const TOptionalStr &influence); - - //! Add a count only statistic. - void addCountStatistic(size_t count); - - //! Add metric statistics. - void addStatistics(const TDouble1VecArraySizePr &statistics); - - //! Get the event time. - core_t::TTime time() const; - - //! Get the event person identifier. - TOptionalSize personId() const; - - //! Get the unique attribute identifier. - TOptionalSize attributeId() const; - - //! Get the function argument metric statistic value(s). - const TDouble1VecArray &values() const; - - //! Get the function argument string value. - const TOptionalStr &stringValue() const; - - //! Get the influencing field values. - const TOptionalStrVec &influences() const; - - //! Sets the data to be explicit null - void setExplicitNull(); - - //! Is explicit null? - bool isExplicitNull() const; - - //! Get the unique count of measurements comprising the statistic. - TOptionalSize count() const; - - //! Get a description of the event data for debug. - std::string print() const; - - private: - //! Read the \p i'th attribute identifier. - TOptionalSize attributeId(std::size_t i) const; - - //! Read the \p i'th statistic value(s). - const TDouble1VecArray &values(std::size_t i) const; - - //! Read the \p i'th statistic count. - TOptionalSize count(std::size_t i) const; - - private: - //! The event time. - core_t::TTime m_Time; - //! The event person identifier. - TOptionalSize m_Pid; - //! The event attribute identifier(s). - TOptionalSizeVec m_Cids; - //! The event value(s). - TOptionalDouble1VecArraySizePrVec m_Values; - //! The function argument string value for this event. - TOptionalStr m_StringValue; - //! The influencing field values. - TOptionalStrVec m_Influences; - //! Is it an explicit null record? - bool m_IsExplicitNull; +class MODEL_EXPORT CEventData { +public: + using TDouble1Vec = core::CSmallVector; + using TOptionalSize = boost::optional; + using TOptionalSizeVec = std::vector; + using TOptionalDouble = boost::optional; + // Fixed size array - one element per metric category + using TDouble1VecArray = boost::array; + // Second element in pair stores count + using TDouble1VecArraySizePr = std::pair; + using TOptionalDouble1VecArraySizePr = boost::optional; + using TOptionalDouble1VecArraySizePrVec = std::vector; + using TOptionalStr = boost::optional; + using TOptionalStrVec = std::vector; + +public: + //! Create uninitialized event data. + CEventData(); + + //! Efficiently swap the contents with \p other. + void swap(CEventData& other); + + //! Reset to uninitialized state. + void clear(); + + //! Set the time. + void time(core_t::TTime time); + + //! Set the person identifier. + //! + //! \warning There should only ever be one person identifier. + bool person(std::size_t pid); + + //! Add an attribute identifier. + void addAttribute(TOptionalSize cid = TOptionalSize()); + + //! Add a function argument metric statistic value. + void addValue(const TDouble1Vec& value = TDouble1Vec()); + + //! Set the function argument string value. + void stringValue(const std::string& value); + + //! Add an influencing field value. + void addInfluence(const TOptionalStr& influence); + + //! Add a count only statistic. + void addCountStatistic(size_t count); + + //! Add metric statistics. + void addStatistics(const TDouble1VecArraySizePr& statistics); + + //! Get the event time. + core_t::TTime time() const; + + //! Get the event person identifier. + TOptionalSize personId() const; + + //! Get the unique attribute identifier. + TOptionalSize attributeId() const; + + //! Get the function argument metric statistic value(s). + const TDouble1VecArray& values() const; + + //! Get the function argument string value. + const TOptionalStr& stringValue() const; + + //! Get the influencing field values. + const TOptionalStrVec& influences() const; + + //! Sets the data to be explicit null + void setExplicitNull(); + + //! Is explicit null? + bool isExplicitNull() const; + + //! Get the unique count of measurements comprising the statistic. + TOptionalSize count() const; + + //! Get a description of the event data for debug. + std::string print() const; + +private: + //! Read the \p i'th attribute identifier. + TOptionalSize attributeId(std::size_t i) const; + + //! Read the \p i'th statistic value(s). + const TDouble1VecArray& values(std::size_t i) const; + + //! Read the \p i'th statistic count. + TOptionalSize count(std::size_t i) const; + +private: + //! The event time. + core_t::TTime m_Time; + //! The event person identifier. + TOptionalSize m_Pid; + //! The event attribute identifier(s). + TOptionalSizeVec m_Cids; + //! The event value(s). + TOptionalDouble1VecArraySizePrVec m_Values; + //! The function argument string value for this event. + TOptionalStr m_StringValue; + //! The influencing field values. + TOptionalStrVec m_Influences; + //! Is it an explicit null record? + bool m_IsExplicitNull; }; - } } diff --git a/include/model/CEventRateBucketGatherer.h b/include/model/CEventRateBucketGatherer.h index 15a75faee5..70860e8c5a 100644 --- a/include/model/CEventRateBucketGatherer.h +++ b/include/model/CEventRateBucketGatherer.h @@ -7,10 +7,10 @@ #ifndef INCLUDED_ml_model_CEventRateBucketGatherer_h #define INCLUDED_ml_model_CEventRateBucketGatherer_h -#include #include -#include +#include #include +#include #include @@ -27,65 +27,62 @@ #include #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { //! \brief A structure to handle storing unique strings per person, //! attribute and influencer, used for the analytic functions //! "distinct_count" and "info_content" -class MODEL_EXPORT CUniqueStringFeatureData -{ - public: - using TDictionary1 = core::CCompressedDictionary<1>; - using TWord = TDictionary1::CWord; - using TWordSet = TDictionary1::TWordSet; - using TWordSetCItr = TWordSet::const_iterator; - using TWordStringUMap = TDictionary1::CWordUMap::Type; - using TWordStringUMapCItr = TWordStringUMap::const_iterator; - using TStoredStringPtrWordSetUMap = boost::unordered_map; - using TStoredStringPtrWordSetUMapCItr = TStoredStringPtrWordSetUMap::const_iterator; - using TStoredStringPtrWordSetUMapVec = std::vector; - using TStrCRef = SEventRateFeatureData::TStrCRef; - using TDouble1Vec = SEventRateFeatureData::TDouble1Vec; - using TDouble1VecDoublePr = SEventRateFeatureData::TDouble1VecDoublePr; - using TStrCRefDouble1VecDoublePrPr = SEventRateFeatureData::TStrCRefDouble1VecDoublePrPr; - using TStrCRefDouble1VecDoublePrPrVec = SEventRateFeatureData::TStrCRefDouble1VecDoublePrPrVec; - using TStoredStringPtrVec = CBucketGatherer::TStoredStringPtrVec; - - public: - //! Add a string into the collection - void insert(const std::string &value, const TStoredStringPtrVec &influences); - - //! Fill in a FeatureData structure with the influence strings and counts - void populateDistinctCountFeatureData(SEventRateFeatureData &featureData) const; - - //! Fill in a FeatureData structure with the influence info_content - void populateInfoContentFeatureData(SEventRateFeatureData &featureData) const; - - //! Persist state by passing information \p inserter. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - - //! Initialize state reading from \p traverser. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); - - //! Get the checksum of this object. - uint64_t checksum() const; - - //! Get the memory usage of this object in a tree structure. - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - - //! Get the memory usage of this object. - std::size_t memoryUsage() const; - - //! Print the unique strings for debug. - std::string print() const; - - private: - TDictionary1 m_Dictionary1; - TWordStringUMap m_UniqueStrings; - TStoredStringPtrWordSetUMapVec m_InfluencerUniqueStrings; +class MODEL_EXPORT CUniqueStringFeatureData { +public: + using TDictionary1 = core::CCompressedDictionary<1>; + using TWord = TDictionary1::CWord; + using TWordSet = TDictionary1::TWordSet; + using TWordSetCItr = TWordSet::const_iterator; + using TWordStringUMap = TDictionary1::CWordUMap::Type; + using TWordStringUMapCItr = TWordStringUMap::const_iterator; + using TStoredStringPtrWordSetUMap = boost::unordered_map; + using TStoredStringPtrWordSetUMapCItr = TStoredStringPtrWordSetUMap::const_iterator; + using TStoredStringPtrWordSetUMapVec = std::vector; + using TStrCRef = SEventRateFeatureData::TStrCRef; + using TDouble1Vec = SEventRateFeatureData::TDouble1Vec; + using TDouble1VecDoublePr = SEventRateFeatureData::TDouble1VecDoublePr; + using TStrCRefDouble1VecDoublePrPr = SEventRateFeatureData::TStrCRefDouble1VecDoublePrPr; + using TStrCRefDouble1VecDoublePrPrVec = SEventRateFeatureData::TStrCRefDouble1VecDoublePrPrVec; + using TStoredStringPtrVec = CBucketGatherer::TStoredStringPtrVec; + +public: + //! Add a string into the collection + void insert(const std::string& value, const TStoredStringPtrVec& influences); + + //! Fill in a FeatureData structure with the influence strings and counts + void populateDistinctCountFeatureData(SEventRateFeatureData& featureData) const; + + //! Fill in a FeatureData structure with the influence info_content + void populateInfoContentFeatureData(SEventRateFeatureData& featureData) const; + + //! Persist state by passing information \p inserter. + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Initialize state reading from \p traverser. + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); + + //! Get the checksum of this object. + uint64_t checksum() const; + + //! Get the memory usage of this object in a tree structure. + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + + //! Get the memory usage of this object. + std::size_t memoryUsage() const; + + //! Print the unique strings for debug. + std::string print() const; + +private: + TDictionary1 m_Dictionary1; + TWordStringUMap m_UniqueStrings; + TStoredStringPtrWordSetUMapVec m_InfluencerUniqueStrings; }; //! \brief Event rate data gathering class. @@ -95,437 +92,403 @@ class MODEL_EXPORT CUniqueStringFeatureData //! to model the event rate in an arbitrary time series. //! //! \sa CDataGatherer. -class MODEL_EXPORT CEventRateBucketGatherer : public CBucketGatherer -{ - public: - using TCategoryAnyMap = std::map; - using TStrCRef = SEventRateFeatureData::TStrCRef; - using TDouble1Vec = SEventRateFeatureData::TDouble1Vec; - using TDouble1VecDoublePr = SEventRateFeatureData::TDouble1VecDoublePr; - using TStrCRefDouble1VecDoublePrPr = SEventRateFeatureData::TStrCRefDouble1VecDoublePrPr; - using TStrCRefDouble1VecDoublePrPrVec = SEventRateFeatureData::TStrCRefDouble1VecDoublePrPrVec; - using TStrCRefDouble1VecDoublePrPrVecVec = SEventRateFeatureData::TStrCRefDouble1VecDoublePrPrVecVec; - using TSizeFeatureDataPr = std::pair; - using TSizeFeatureDataPrVec = std::vector; - using TSizeSizePrFeatureDataPr = std::pair; - using TSizeSizePrFeatureDataPrVec = std::vector; - - public: - //! \name Life-cycle - //@{ - //! Create an event rate bucket gatherer. - //! - //! \param[in] dataGatherer The owning data gatherer. - //! \param[in] summaryCountFieldName If summaryMode is E_Manual - //! then this is the name of the field holding the summary count. - //! \param[in] personFieldName The name of the field which identifies - //! people. - //! \param[in] attributeFieldName The name of the field which defines - //! the person attributes. - //! \param[in] valueFieldName The name of the field which contains - //! the metric values. - //! \param[in] influenceFieldNames The field names for which we will - //! compute influences. - //! \param[in] startTime The start of the time interval for which - //! to gather data. - CEventRateBucketGatherer(CDataGatherer &dataGatherer, - const std::string &summaryCountFieldName, - const std::string &personFieldName, - const std::string &attributeFieldName, - const std::string &valueFieldName, - const TStrVec &influenceFieldNames, - core_t::TTime startTime); - - //! Construct from a state document. - CEventRateBucketGatherer(CDataGatherer &dataGatherer, - const std::string &summaryCountFieldName, - const std::string &personFieldName, - const std::string &attributeFieldName, - const std::string &valueFieldName, - const TStrVec &influenceFieldNames, - core::CStateRestoreTraverser &traverser); - - //! Create a copy that will result in the same persisted state as the - //! original. This is effectively a copy constructor that creates a - //! copy that's only valid for a single purpose. The boolean flag is - //! redundant except to create a signature that will not be mistaken - //! for a general purpose copy constructor. - CEventRateBucketGatherer(bool isForPersistence, - const CEventRateBucketGatherer &other); - //@} - - //! \name Persistence - //@{ - //! Fill in the state from \p traverser. - virtual bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); - - //! Persist state by passing information to the supplied inserter - virtual void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - - //! Create a clone of this data gatherer that will result in the same - //! persisted state. The clone may be incomplete in ways that do not - //! affect the persisted representation, and must not be used for any - //! other purpose. - //! - //! \warning The caller owns the object returned. - virtual CBucketGatherer *cloneForPersistence() const; - - //! The persistence tag name of this derived class. - virtual const std::string& persistenceTag() const; - //@} - - //! \name Fields - //@{ - //! Get the person field name. - //! - //! This is the common field in all searches "along" which the - //! probabilities are aggregated, i.e. the "over" field name for - //! population searches and the "by" field name for individual - //! searches. - virtual const std::string &personFieldName() const; - - //! Get the attribute field name if one exists, i.e. the "by" for - //! population searches, field name and returns empty otherwise. - virtual const std::string &attributeFieldName() const; - - //! Returns an empty string. - virtual const std::string &valueFieldName() const; - - //! Get an iterator at the beginning the influencing field names. - virtual TStrVecCItr beginInfluencers() const; - - //! Get an iterator at the end of the influencing field names. - virtual TStrVecCItr endInfluencers() const; - - //! Get the fields for which to gather data. - //! - //! For individual searches this gets the field which defines the - //! categories whose counts are being analyzed. For population - //! searches this gets the fields identifying the people and person - //! attributes which are being analyzed. An empty string acts like - //! a wild card and matches all records. This is used for analysis - //! which is attribute independent such as total count. - virtual const TStrVec &fieldsOfInterest() const; - //@} - - //! Get a description of the search. - virtual std::string description() const; - - //! \name Update - //@{ - //! Process the specified fields. - //! - //! \note For individual searches \p fieldValues should contain one - //! field containing the by clause field value or a generic name if - //! none was specified. For population searches \p fieldValues should - //! contain two fields. The first field should contain the over clause - //! field value. The second field should the by clause field value - //! or a generic name if none was specified. - virtual bool processFields(const TStrCPtrVec &fieldValues, - CEventData &result, - CResourceMonitor &resourceMonitor); - //@} - - //! \name Person - //@{ - //! Stop gathering data on the people identified by \p peopleToRemove. - virtual void recyclePeople(const TSizeVec &peopleToRemove); - - //! Remove all traces of people whose identifiers are greater than - //! or equal to \p lowestPersonToRemove. - virtual void removePeople(std::size_t lowestPersonToRemove); - //@} - - //! \name Attribute - //@{ - //! Stop gathering data on the attributes identified by \p attributesToRemove. - virtual void recycleAttributes(const TSizeVec &attributesToRemove); - - //! Remove all traces of attributes whose identifiers are greater than - //! or equal to \p lowestAttributeToRemove. - virtual void removeAttributes(std::size_t lowestAttributeToRemove); - //@} - - //! Get the checksum of this gatherer. - virtual uint64_t checksum() const; - - //! Get the memory used by this object. - virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - - //! Get the memory used by this object. - virtual std::size_t memoryUsage() const; - - //! Get the static size of this object - used for virtual hierarchies - virtual std::size_t staticSize() const; - - //! Clear this data gatherer. - virtual void clear(); - - //! Reset bucket and return true if bucket was successfully reset or false otherwise. - virtual bool resetBucket(core_t::TTime bucketStart); - - //! Release memory that is no longer needed - virtual void releaseMemory(core_t::TTime samplingCutoffTime); - - //! \name Features - //@{ - //! Get the raw data for all features for the bucketing time interval - //! containing \p time. - //! - //! \param[in] time The time of interest. - //! \param[out] result Filled in with the feature data at \p time. - virtual void featureData(core_t::TTime time, core_t::TTime bucketLength, TFeatureAnyPrVec &result) const; - //@} - - private: - //! No-op. - virtual void sample(core_t::TTime time); - - //! Append the counts by person for the bucketing interval containing - //! \p time. - //! - //! \param[in] time The time of interest. - //! \param[in,out] result Append (person identifier, count) for each - //! person. The collection is sorted by person. - void personCounts(model_t::EFeature feature, +class MODEL_EXPORT CEventRateBucketGatherer : public CBucketGatherer { +public: + using TCategoryAnyMap = std::map; + using TStrCRef = SEventRateFeatureData::TStrCRef; + using TDouble1Vec = SEventRateFeatureData::TDouble1Vec; + using TDouble1VecDoublePr = SEventRateFeatureData::TDouble1VecDoublePr; + using TStrCRefDouble1VecDoublePrPr = SEventRateFeatureData::TStrCRefDouble1VecDoublePrPr; + using TStrCRefDouble1VecDoublePrPrVec = SEventRateFeatureData::TStrCRefDouble1VecDoublePrPrVec; + using TStrCRefDouble1VecDoublePrPrVecVec = SEventRateFeatureData::TStrCRefDouble1VecDoublePrPrVecVec; + using TSizeFeatureDataPr = std::pair; + using TSizeFeatureDataPrVec = std::vector; + using TSizeSizePrFeatureDataPr = std::pair; + using TSizeSizePrFeatureDataPrVec = std::vector; + +public: + //! \name Life-cycle + //@{ + //! Create an event rate bucket gatherer. + //! + //! \param[in] dataGatherer The owning data gatherer. + //! \param[in] summaryCountFieldName If summaryMode is E_Manual + //! then this is the name of the field holding the summary count. + //! \param[in] personFieldName The name of the field which identifies + //! people. + //! \param[in] attributeFieldName The name of the field which defines + //! the person attributes. + //! \param[in] valueFieldName The name of the field which contains + //! the metric values. + //! \param[in] influenceFieldNames The field names for which we will + //! compute influences. + //! \param[in] startTime The start of the time interval for which + //! to gather data. + CEventRateBucketGatherer(CDataGatherer& dataGatherer, + const std::string& summaryCountFieldName, + const std::string& personFieldName, + const std::string& attributeFieldName, + const std::string& valueFieldName, + const TStrVec& influenceFieldNames, + core_t::TTime startTime); + + //! Construct from a state document. + CEventRateBucketGatherer(CDataGatherer& dataGatherer, + const std::string& summaryCountFieldName, + const std::string& personFieldName, + const std::string& attributeFieldName, + const std::string& valueFieldName, + const TStrVec& influenceFieldNames, + core::CStateRestoreTraverser& traverser); + + //! Create a copy that will result in the same persisted state as the + //! original. This is effectively a copy constructor that creates a + //! copy that's only valid for a single purpose. The boolean flag is + //! redundant except to create a signature that will not be mistaken + //! for a general purpose copy constructor. + CEventRateBucketGatherer(bool isForPersistence, const CEventRateBucketGatherer& other); + //@} + + //! \name Persistence + //@{ + //! Fill in the state from \p traverser. + virtual bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); + + //! Persist state by passing information to the supplied inserter + virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Create a clone of this data gatherer that will result in the same + //! persisted state. The clone may be incomplete in ways that do not + //! affect the persisted representation, and must not be used for any + //! other purpose. + //! + //! \warning The caller owns the object returned. + virtual CBucketGatherer* cloneForPersistence() const; + + //! The persistence tag name of this derived class. + virtual const std::string& persistenceTag() const; + //@} + + //! \name Fields + //@{ + //! Get the person field name. + //! + //! This is the common field in all searches "along" which the + //! probabilities are aggregated, i.e. the "over" field name for + //! population searches and the "by" field name for individual + //! searches. + virtual const std::string& personFieldName() const; + + //! Get the attribute field name if one exists, i.e. the "by" for + //! population searches, field name and returns empty otherwise. + virtual const std::string& attributeFieldName() const; + + //! Returns an empty string. + virtual const std::string& valueFieldName() const; + + //! Get an iterator at the beginning the influencing field names. + virtual TStrVecCItr beginInfluencers() const; + + //! Get an iterator at the end of the influencing field names. + virtual TStrVecCItr endInfluencers() const; + + //! Get the fields for which to gather data. + //! + //! For individual searches this gets the field which defines the + //! categories whose counts are being analyzed. For population + //! searches this gets the fields identifying the people and person + //! attributes which are being analyzed. An empty string acts like + //! a wild card and matches all records. This is used for analysis + //! which is attribute independent such as total count. + virtual const TStrVec& fieldsOfInterest() const; + //@} + + //! Get a description of the search. + virtual std::string description() const; + + //! \name Update + //@{ + //! Process the specified fields. + //! + //! \note For individual searches \p fieldValues should contain one + //! field containing the by clause field value or a generic name if + //! none was specified. For population searches \p fieldValues should + //! contain two fields. The first field should contain the over clause + //! field value. The second field should the by clause field value + //! or a generic name if none was specified. + virtual bool processFields(const TStrCPtrVec& fieldValues, CEventData& result, CResourceMonitor& resourceMonitor); + //@} + + //! \name Person + //@{ + //! Stop gathering data on the people identified by \p peopleToRemove. + virtual void recyclePeople(const TSizeVec& peopleToRemove); + + //! Remove all traces of people whose identifiers are greater than + //! or equal to \p lowestPersonToRemove. + virtual void removePeople(std::size_t lowestPersonToRemove); + //@} + + //! \name Attribute + //@{ + //! Stop gathering data on the attributes identified by \p attributesToRemove. + virtual void recycleAttributes(const TSizeVec& attributesToRemove); + + //! Remove all traces of attributes whose identifiers are greater than + //! or equal to \p lowestAttributeToRemove. + virtual void removeAttributes(std::size_t lowestAttributeToRemove); + //@} + + //! Get the checksum of this gatherer. + virtual uint64_t checksum() const; + + //! Get the memory used by this object. + virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + + //! Get the memory used by this object. + virtual std::size_t memoryUsage() const; + + //! Get the static size of this object - used for virtual hierarchies + virtual std::size_t staticSize() const; + + //! Clear this data gatherer. + virtual void clear(); + + //! Reset bucket and return true if bucket was successfully reset or false otherwise. + virtual bool resetBucket(core_t::TTime bucketStart); + + //! Release memory that is no longer needed + virtual void releaseMemory(core_t::TTime samplingCutoffTime); + + //! \name Features + //@{ + //! Get the raw data for all features for the bucketing time interval + //! containing \p time. + //! + //! \param[in] time The time of interest. + //! \param[out] result Filled in with the feature data at \p time. + virtual void featureData(core_t::TTime time, core_t::TTime bucketLength, TFeatureAnyPrVec& result) const; + //@} + +private: + //! No-op. + virtual void sample(core_t::TTime time); + + //! Append the counts by person for the bucketing interval containing + //! \p time. + //! + //! \param[in] time The time of interest. + //! \param[in,out] result Append (person identifier, count) for each + //! person. The collection is sorted by person. + void personCounts(model_t::EFeature feature, core_t::TTime time, TFeatureAnyPrVec& result) const; + + //! Append the non-zero counts by person for bucketing interval + //! containing \p time. + //! + //! \param[in] time The time of interest. + //! \param[in,out] result Append (person identifier, count) for each + //! person present in the bucketing interval containing \p time. The + //! collection is sorted by person. + void nonZeroPersonCounts(model_t::EFeature feature, core_t::TTime time, TFeatureAnyPrVec& result) const; + + //! Append an indicator function for people present in the bucketing + //! interval containing \p time. + //! + //! \param[in] time The time of interest. + //! \param[in,out] result Append (person identifier, 1) for each person + //! present in the bucketing interval containing \p time. The collection + //! is sorted by person identifier. + void personIndicator(model_t::EFeature feature, core_t::TTime time, TFeatureAnyPrVec& result) const; + + //! Append the mean arrival times for people present in the current + //! bucketing interval. + //! + //! \param[in] time The time of interest. + //! \param[in,out] result Append (person identifier, mean arrival time) + //! for each person present in the bucketing interval containing \p time. + //! The collection is sorted by person identifier. + void personArrivalTimes(model_t::EFeature feature, core_t::TTime time, TFeatureAnyPrVec& result) const; + + //! Append the non-zero counts for each attribute by person for the + //! bucketing interval containing \p time. + //! + //! \param[in] time The time of interest. + //! \param[in,out] result Append the non-zero attribute counts by + //! person. The first element of the key is person and the second + //! attribute. The collection is sorted lexicographically by key. + //! \note We expect the pairs present to be sparse on the full outer + //! product space of attribute and person so use a sparse encoding. + void nonZeroAttributeCounts(model_t::EFeature feature, core_t::TTime time, TFeatureAnyPrVec& result) const; + + //! Append the number of unique people hitting each attribute. + //! + //! \param[in,out] result Append the count of people per attribute. + //! The person identifier is dummied to zero so that the result + //! type matches other population features. + void peoplePerAttribute(model_t::EFeature feature, TFeatureAnyPrVec& result) const; + + //! Append an indicator function for (person, attribute) pairs + //! present in the bucketing interval containing \p time. + //! + //! \param[in] time The time of interest. + //! \param[in,out] result Append one for each (person, attribute) + //! pair present in the bucketing interval containing \p time. The + //! first element of the key is person and the second attribute. The + //! collection is sorted lexicographically by key. + //! \note We expect the pairs present to be sparse on the full outer + //! product space of attribute and person so use a sparse encoding. + void attributeIndicator(model_t::EFeature feature, core_t::TTime time, TFeatureAnyPrVec& result) const; + + //! Append the number of unique values for each person + //! in the bucketing interval containing \p time. + //! + //! \param[in] time The time of interest. + //! \param[out] result Filled in with the unique value counts + //! by person + void bucketUniqueValuesPerPerson(model_t::EFeature feature, core_t::TTime time, TFeatureAnyPrVec& result) const; + + //! Append the number of unique values for each person and attribute + //! in the bucketing interval containing \p time. + //! + //! \param[in] time The time of interest. + //! \param[out] result Filled in with the unique value counts + //! by person and attribute + void bucketUniqueValuesPerPersonAttribute(model_t::EFeature feature, core_t::TTime time, TFeatureAnyPrVec& result) const; + + //! Append the compressed length of the unique attributes each person + //! hits in the bucketing interval containing \p time. + //! + //! \param[in] time The time of interest. + //! \param[out] result Filled in with the compressed length of the + //! unique values by person and attribute + void bucketCompressedLengthPerPerson(model_t::EFeature feature, core_t::TTime time, TFeatureAnyPrVec& result) const; + + //! Append the compressed length of the unique attributes each person + //! hits in the bucketing interval containing \p time. + //! + //! \param[in] time The time of interest. + //! \param[out] result Filled in with the compressed length of the + //! unique values by person and attribute + void bucketCompressedLengthPerPersonAttribute(model_t::EFeature feature, core_t::TTime time, TFeatureAnyPrVec& result) const; + + //! Append the time-of-day/week values for each person in the + //! bucketing interval \p time. + //! + //! \param[in] time The time of interest. + //! \param[out] result Filled in with the arrival time values + //! by person. + void bucketMeanTimesPerPerson(model_t::EFeature feature, core_t::TTime time, TFeatureAnyPrVec& result) const; + + //! Append the time-of-day/week values of each attribute and person + //! in the bucketing interval \p time. + //! + //! \param[in] time The time of interest. + //! \param[out] result Filled in with the arrival time values + //! by attribute and person + void bucketMeanTimesPerPersonAttribute(model_t::EFeature feature, core_t::TTime time, TFeatureAnyPrVec& result) const; + + //! Resize the necessary data structures so they can accommodate + //! the person and attribute identified by \p pid and \p cid, + //! respectively. + //! + //! \param[in] pid The identifier of the person to accommodate. + //! \param[in] cid The identifier of the attribute to accommodate. + virtual void resize(std::size_t pid, std::size_t cid); + + //! Record the arrival of the \p values for the person identified + //! by \p pid. + //! + //! \param[in] pid The identifier of the person who generated the + //! record(s). + //! \param[in] cid The identifier of the attribute who generated + //! the record(s). + //! \param[in] time The end time of the record(s). + //! \param[in] values Ignored. + //! \param[in] count The number of records. + //! \param[in] stringValue The value for the function string argument + //! if there is one or null. + //! \param[in] influences The influencing field values which label + //! the value. + virtual void addValue(std::size_t pid, + std::size_t cid, core_t::TTime time, - TFeatureAnyPrVec &result) const; - - //! Append the non-zero counts by person for bucketing interval - //! containing \p time. - //! - //! \param[in] time The time of interest. - //! \param[in,out] result Append (person identifier, count) for each - //! person present in the bucketing interval containing \p time. The - //! collection is sorted by person. - void nonZeroPersonCounts(model_t::EFeature feature, - core_t::TTime time, - TFeatureAnyPrVec &result) const; - - //! Append an indicator function for people present in the bucketing - //! interval containing \p time. - //! - //! \param[in] time The time of interest. - //! \param[in,out] result Append (person identifier, 1) for each person - //! present in the bucketing interval containing \p time. The collection - //! is sorted by person identifier. - void personIndicator(model_t::EFeature feature, - core_t::TTime time, - TFeatureAnyPrVec &result) const; - - //! Append the mean arrival times for people present in the current - //! bucketing interval. - //! - //! \param[in] time The time of interest. - //! \param[in,out] result Append (person identifier, mean arrival time) - //! for each person present in the bucketing interval containing \p time. - //! The collection is sorted by person identifier. - void personArrivalTimes(model_t::EFeature feature, - core_t::TTime time, - TFeatureAnyPrVec &result) const; - - //! Append the non-zero counts for each attribute by person for the - //! bucketing interval containing \p time. - //! - //! \param[in] time The time of interest. - //! \param[in,out] result Append the non-zero attribute counts by - //! person. The first element of the key is person and the second - //! attribute. The collection is sorted lexicographically by key. - //! \note We expect the pairs present to be sparse on the full outer - //! product space of attribute and person so use a sparse encoding. - void nonZeroAttributeCounts(model_t::EFeature feature, - core_t::TTime time, - TFeatureAnyPrVec &result) const; - - //! Append the number of unique people hitting each attribute. - //! - //! \param[in,out] result Append the count of people per attribute. - //! The person identifier is dummied to zero so that the result - //! type matches other population features. - void peoplePerAttribute(model_t::EFeature feature, - TFeatureAnyPrVec &result) const; - - //! Append an indicator function for (person, attribute) pairs - //! present in the bucketing interval containing \p time. - //! - //! \param[in] time The time of interest. - //! \param[in,out] result Append one for each (person, attribute) - //! pair present in the bucketing interval containing \p time. The - //! first element of the key is person and the second attribute. The - //! collection is sorted lexicographically by key. - //! \note We expect the pairs present to be sparse on the full outer - //! product space of attribute and person so use a sparse encoding. - void attributeIndicator(model_t::EFeature feature, - core_t::TTime time, - TFeatureAnyPrVec &result) const; - - //! Append the number of unique values for each person - //! in the bucketing interval containing \p time. - //! - //! \param[in] time The time of interest. - //! \param[out] result Filled in with the unique value counts - //! by person - void bucketUniqueValuesPerPerson(model_t::EFeature feature, - core_t::TTime time, - TFeatureAnyPrVec &result) const; - - //! Append the number of unique values for each person and attribute - //! in the bucketing interval containing \p time. - //! - //! \param[in] time The time of interest. - //! \param[out] result Filled in with the unique value counts - //! by person and attribute - void bucketUniqueValuesPerPersonAttribute(model_t::EFeature feature, - core_t::TTime time, - TFeatureAnyPrVec &result) const; - - //! Append the compressed length of the unique attributes each person - //! hits in the bucketing interval containing \p time. - //! - //! \param[in] time The time of interest. - //! \param[out] result Filled in with the compressed length of the - //! unique values by person and attribute - void bucketCompressedLengthPerPerson(model_t::EFeature feature, - core_t::TTime time, - TFeatureAnyPrVec &result) const; - - //! Append the compressed length of the unique attributes each person - //! hits in the bucketing interval containing \p time. - //! - //! \param[in] time The time of interest. - //! \param[out] result Filled in with the compressed length of the - //! unique values by person and attribute - void bucketCompressedLengthPerPersonAttribute(model_t::EFeature feature, - core_t::TTime time, - TFeatureAnyPrVec &result) const; - - //! Append the time-of-day/week values for each person in the - //! bucketing interval \p time. - //! - //! \param[in] time The time of interest. - //! \param[out] result Filled in with the arrival time values - //! by person. - void bucketMeanTimesPerPerson(model_t::EFeature feature, - core_t::TTime time, - TFeatureAnyPrVec &result) const; - - //! Append the time-of-day/week values of each attribute and person - //! in the bucketing interval \p time. - //! - //! \param[in] time The time of interest. - //! \param[out] result Filled in with the arrival time values - //! by attribute and person - void bucketMeanTimesPerPersonAttribute(model_t::EFeature feature, - core_t::TTime time, - TFeatureAnyPrVec &result) const; - - //! Resize the necessary data structures so they can accommodate - //! the person and attribute identified by \p pid and \p cid, - //! respectively. - //! - //! \param[in] pid The identifier of the person to accommodate. - //! \param[in] cid The identifier of the attribute to accommodate. - virtual void resize(std::size_t pid, std::size_t cid); - - //! Record the arrival of the \p values for the person identified - //! by \p pid. - //! - //! \param[in] pid The identifier of the person who generated the - //! record(s). - //! \param[in] cid The identifier of the attribute who generated - //! the record(s). - //! \param[in] time The end time of the record(s). - //! \param[in] values Ignored. - //! \param[in] count The number of records. - //! \param[in] stringValue The value for the function string argument - //! if there is one or null. - //! \param[in] influences The influencing field values which label - //! the value. - virtual void addValue(std::size_t pid, - std::size_t cid, - core_t::TTime time, - const CEventData::TDouble1VecArray &values, - std::size_t count, - const CEventData::TOptionalStr &stringValue, - const TStoredStringPtrVec &influences); - - //! Start a new bucket. - virtual void startNewBucket(core_t::TTime time, bool skipUpdates); - - //! Initialize the field names collection. - void initializeFieldNames(const std::string &personFieldName, - const std::string &attributeFieldName, - const std::string &valueFieldName, - const std::string &summaryCountFieldName, - const TStrVec &influenceFieldNames); - - //! Initialize the feature data gatherers. - void initializeFeatureData(); - - //! Copy the influencer person counts to \p results. - //! - //! \warning This assumes that \p result is sorted by person - //! identifier. - void addInfluencerCounts(core_t::TTime time, TSizeFeatureDataPrVec &result) const; - - //! Copy the influencer person and attribute counts to \p results. - //! - //! \warning This assumes that \p result is sorted by person - //! and attribute identifier. - void addInfluencerCounts(core_t::TTime time, TSizeSizePrFeatureDataPrVec &result) const; - - private: - //! The name of the field value of interest for keyed functions - std::string m_ValueFieldName; - - //! The names of the fields of interest. - //! - //! This is of the form: - //! -# The name of the field which identifies people, - //! -# [The name of the field which identifies people's attributes], - //! -# [The names of the influencing fields], - //! -# [The name of the field which identifies a function to key off], - //! -# [The name of the field containing the person(/attribute) count - //! if summarized data are being gathered] - TStrVec m_FieldNames; - - //! The position of the first influencer field - std::size_t m_BeginInfluencingFields; - - //! The position of the first count/value field. - std::size_t m_BeginValueField; - - //! The position of the field holding the summarised count. - std::size_t m_BeginSummaryFields; - - //! The data features we are gathering. - TCategoryAnyMap m_FeatureData; + const CEventData::TDouble1VecArray& values, + std::size_t count, + const CEventData::TOptionalStr& stringValue, + const TStoredStringPtrVec& influences); + + //! Start a new bucket. + virtual void startNewBucket(core_t::TTime time, bool skipUpdates); + + //! Initialize the field names collection. + void initializeFieldNames(const std::string& personFieldName, + const std::string& attributeFieldName, + const std::string& valueFieldName, + const std::string& summaryCountFieldName, + const TStrVec& influenceFieldNames); + + //! Initialize the feature data gatherers. + void initializeFeatureData(); + + //! Copy the influencer person counts to \p results. + //! + //! \warning This assumes that \p result is sorted by person + //! identifier. + void addInfluencerCounts(core_t::TTime time, TSizeFeatureDataPrVec& result) const; + + //! Copy the influencer person and attribute counts to \p results. + //! + //! \warning This assumes that \p result is sorted by person + //! and attribute identifier. + void addInfluencerCounts(core_t::TTime time, TSizeSizePrFeatureDataPrVec& result) const; + +private: + //! The name of the field value of interest for keyed functions + std::string m_ValueFieldName; + + //! The names of the fields of interest. + //! + //! This is of the form: + //! -# The name of the field which identifies people, + //! -# [The name of the field which identifies people's attributes], + //! -# [The names of the influencing fields], + //! -# [The name of the field which identifies a function to key off], + //! -# [The name of the field containing the person(/attribute) count + //! if summarized data are being gathered] + TStrVec m_FieldNames; + + //! The position of the first influencer field + std::size_t m_BeginInfluencingFields; + + //! The position of the first count/value field. + std::size_t m_BeginValueField; + + //! The position of the field holding the summarised count. + std::size_t m_BeginSummaryFields; + + //! The data features we are gathering. + TCategoryAnyMap m_FeatureData; }; - } } -namespace std -{ +namespace std { //! Overload pair swap so that we use efficient swap of the feature data //! when sorting. -inline void swap(ml::model::CEventRateBucketGatherer::TSizeFeatureDataPr &lhs, - ml::model::CEventRateBucketGatherer::TSizeFeatureDataPr &rhs) -{ +inline void swap(ml::model::CEventRateBucketGatherer::TSizeFeatureDataPr& lhs, + ml::model::CEventRateBucketGatherer::TSizeFeatureDataPr& rhs) { swap(lhs.first, rhs.first); lhs.second.swap(rhs.second); } //! Overload pair swap so that we use efficient swap of the feature data //! when sorting. -inline void swap(ml::model::CEventRateBucketGatherer::TSizeSizePrFeatureDataPr &lhs, - ml::model::CEventRateBucketGatherer::TSizeSizePrFeatureDataPr &rhs) -{ +inline void swap(ml::model::CEventRateBucketGatherer::TSizeSizePrFeatureDataPr& lhs, + ml::model::CEventRateBucketGatherer::TSizeSizePrFeatureDataPr& rhs) { swap(lhs.first, rhs.first); lhs.second.swap(rhs.second); } - } #endif // INCLUDED_ml_model_CEventRateBucketGatherer_h diff --git a/include/model/CEventRateModel.h b/include/model/CEventRateModel.h index 70e5a2e238..e072a623ac 100644 --- a/include/model/CEventRateModel.h +++ b/include/model/CEventRateModel.h @@ -25,20 +25,15 @@ #include - -namespace -{ +namespace { class CMockEventRateModel; } -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace model -{ +namespace model { //! \brief The event rate model common functionality. //! @@ -58,292 +53,279 @@ namespace model //! //! It assumes data are supplied in time order since this means minimal //! state can be maintained. -class MODEL_EXPORT CEventRateModel : public CIndividualModel -{ - public: - using TFeatureData = SEventRateFeatureData; - using TSizeFeatureDataPr = std::pair; - using TSizeFeatureDataPrVec = std::vector; - using TFeatureSizeFeatureDataPrVecPr = std::pair; - using TFeatureSizeFeatureDataPrVecPrVec = std::vector; - using TCategoryProbabilityCache = CModelTools::CCategoryProbabilityCache; - - //! The statistics we maintain about a bucketing interval. - struct MODEL_EXPORT SBucketStats - { - explicit SBucketStats(core_t::TTime startTime); - - //! The start time of this bucket. - core_t::TTime s_StartTime; - //! The non-zero person counts in the current bucket. - TSizeUInt64PrVec s_PersonCounts; - //! The total count in the current bucket. - uint64_t s_TotalCount; - //! The feature data samples for the current bucketing interval. - TFeatureSizeFeatureDataPrVecPrVec s_FeatureData; - //! A cache of the corrections applied to interim results. - //! The key is for non-correlated corrections - //! or for correlated corrections - mutable TFeatureSizeSizeTripleDouble1VecUMap s_InterimCorrections; - }; - - public: - //! \name Life-cycle - //@{ - //! \param[in] params The global configuration parameters. - //! \param[in] dataGatherer The object that gathers time series data. - //! \param[in] newFeatureModels The new models to use for each feature. - //! \param[in] newFeatureCorrelateModelPriors The prior to use for the - //! new model of correlates for each feature. - //! \param[in] featureCorrelatesModels The model of all correlates for - //! each feature. - //! \param[in] probabilityPrior The prior used for the joint probabilities - //! of seeing the people we are modeling. - //! \param[in] influenceCalculators The influence calculators to use - //! for each feature. - CEventRateModel(const SModelParams ¶ms, - const TDataGathererPtr &dataGatherer, - const TFeatureMathsModelPtrPrVec &newFeatureModels, - const TFeatureMultivariatePriorPtrPrVec &newFeatureCorrelateModelPriors, - const TFeatureCorrelationsPtrPrVec &featureCorrelatesModels, - const maths::CMultinomialConjugate &probabilityPrior, - const TFeatureInfluenceCalculatorCPtrPrVecVec &influenceCalculators); - - //! Constructor used for restoring persisted models. - //! - //! \note The current bucket statistics are left default initialized - //! and so must be sampled for before this model can be used. - CEventRateModel(const SModelParams ¶ms, - const TDataGathererPtr &dataGatherer, - const TFeatureMathsModelPtrPrVec &newFeatureModels, - const TFeatureMultivariatePriorPtrPrVec &newFeatureCorrelateModelPriors, - const TFeatureCorrelationsPtrPrVec &featureCorrelatesModels, - const TFeatureInfluenceCalculatorCPtrPrVecVec &influenceCalculators, - core::CStateRestoreTraverser &traverser); - - //! Create a copy that will result in the same persisted state as the - //! original. This is effectively a copy constructor that creates a - //! copy that's only valid for a single purpose. The boolean flag is - //! redundant except to create a signature that will not be mistaken - //! for a general purpose copy constructor. - CEventRateModel(bool isForPersistence, const CEventRateModel &other); - //@} - - //! \name Persistence - //@{ - //! Persist state by passing information to \p inserter. - virtual void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - - //! Restore reading state from \p traverser. - virtual bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); - - //! Create a clone of this model that will result in the same persisted - //! state. The clone may be incomplete in ways that do not affect the - //! persisted representation, and must not be used for any other - //! purpose. - //! \warning The caller owns the object returned. - virtual CAnomalyDetectorModel *cloneForPersistence() const; - //@} - - //! Get the model category. - virtual model_t::EModelType category() const; - - //! Returns true. - virtual bool isEventRate() const; - - //! Returns false. - virtual bool isMetric() const; - - //! \name Bucket Statistics - //@{ - //! Returns null. - virtual TOptionalDouble baselineBucketCount(std::size_t pid) const; - - //! Get the value of \p feature for the person identified - //! by \p pid in the bucketing interval containing \p time. - //! - //! \param[in] feature The feature of interest. - //! \param[in] pid The identifier of the person of interest. - //! \param[in] cid Ignored. - //! \param[in] time The time of interest. - virtual TDouble1Vec currentBucketValue(model_t::EFeature feature, - std::size_t pid, - std::size_t cid, - core_t::TTime time) const; - - //! Get the baseline bucket value of \p feature for the person - //! identified by \p pid as of the start of the current bucketing - //! interval. - //! - //! \param[in] feature The feature of interest. - //! \param[in] pid The identifier of the person of interest. - //! \param[in] cid Ignored. - //! \param[in] type A description of the type of result for which - //! to get the baseline. See CResultType for more details. - //! \param[in] correlated The correlated series' identifiers and - //! their values if any. - //! \param[in] time The time of interest. - virtual TDouble1Vec baselineBucketMean(model_t::EFeature feature, - std::size_t pid, - std::size_t cid, - model_t::CResultType type, - const TSizeDoublePr1Vec &correlated, - core_t::TTime time) const; - - //@} - - //! \name Person - //@{ - //! Get the person unique identifiers which have a feature value - //! in the bucketing time interval including \p time. - virtual void currentBucketPersonIds(core_t::TTime time, TSizeVec &result) const; - //@} - - //! \name Update - //@{ - //! Sample any state needed by computeProbablity in the time - //! interval [\p startTime, \p endTime] but do not update the - //! model. This is needed by the results preview. - //! - //! \param[in] startTime The start of the time interval to sample. - //! \param[in] endTime The end of the time interval to sample. - virtual void sampleBucketStatistics(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor &resourceMonitor); - - //! Update the model with features samples from the time interval - //! [\p startTime, \p endTime]. - //! - //! \param[in] startTime The start of the time interval to sample. - //! \param[in] endTime The end of the time interval to sample. - //! \param[in] resourceMonitor The resourceMonitor. - virtual void sample(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor &resourceMonitor); - //@} - - //! \name Probability - //@{ - //! Compute the probability of seeing the event counts in the - //! time interval [\p startTime, \p endTime] for the person - //! identified by \p pid. - //! - //! \param[in] pid The identifier of the person of interest. - //! \param[in] startTime The start of the time interval of interest. - //! \param[in] endTime The end of the time interval of interest. - //! \param[in] partitioningFields The partitioning field (name, value) - //! pairs for which to compute the the probability. - //! \param[in] numberAttributeProbabilities Ignored. - //! \param[out] result A structure containing the probability, - //! the smallest \p numberAttributeProbabilities attribute - //! probabilities, the influences and any extra descriptive data - virtual bool computeProbability(std::size_t pid, - core_t::TTime startTime, - core_t::TTime endTime, - CPartitioningFields &partitioningFields, - std::size_t numberAttributeProbabilities, - SAnnotatedProbability &result) const; - //@} - - //! Get the checksum of this model. - //! - //! \param[in] includeCurrentBucketStats If true then include - //! the current bucket statistics. (This is designed to handle - //! serialization, for which we don't serialize the current - //! bucket statistics.) - virtual uint64_t checksum(bool includeCurrentBucketStats = true) const; - - //! Debug the memory used by this model. - virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - - //! Get the memory used by this model. - virtual std::size_t memoryUsage() const; - - //! Get the static size of this object - used for virtual hierarchies. - virtual std::size_t staticSize() const; - - //! Get the non-estimated value of the the memory used by this model. - virtual std::size_t computeMemoryUsage() const; - - //! Get a view of the internals of the model for visualization. - virtual CModelDetailsViewPtr details() const; - - //! Get the value of the \p feature of the person identified - //! by \p pid for the bucketing interval containing \p time. - const TFeatureData *featureData(model_t::EFeature feature, - std::size_t pid, - core_t::TTime time) const; - - private: - //! Get the start time of the current bucket. - virtual core_t::TTime currentBucketStartTime() const; - - //! Set the start time of the current bucket. - virtual void currentBucketStartTime(core_t::TTime time); - - //! Get the person counts in the current bucket. - virtual const TSizeUInt64PrVec ¤tBucketPersonCounts() const; - - //! Get writable person counts in the current bucket. - virtual TSizeUInt64PrVec ¤tBucketPersonCounts(); - - //! Set the current bucket total count. - virtual void currentBucketTotalCount(uint64_t totalCount); - - //! Get the total count of the current bucket. - uint64_t currentBucketTotalCount() const; - - //! Get the interim corrections of the current bucket. - TFeatureSizeSizeTripleDouble1VecUMap ¤tBucketInterimCorrections() const; - - //! Create the time series models for "n" newly observed people. - virtual void createNewModels(std::size_t n, std::size_t m); - - //! Reinitialize the time series models for recycled people. - virtual void updateRecycledModels(); - - //! Clear out large state objects for people that are pruned. - virtual void clearPrunedResources(const TSizeVec &people, - const TSizeVec &attributes); - - //! Check if there are correlates for \p feature and the person - //! identified by \p pid. - bool correlates(model_t::EFeature feature, std::size_t pid, core_t::TTime time) const; - - //! Fill in the probability calculation parameters for \p feature - //! and person identified by \p pid. - void fill(model_t::EFeature feature, - std::size_t pid, - core_t::TTime bucketTime, - bool interim, - CProbabilityAndInfluenceCalculator::SParams ¶ms) const; - - //! Fill in the probability calculation parameters for the correlates - //! of \p feature and the person identified by \p pid. - void fill(model_t::EFeature feature, - std::size_t pid, - core_t::TTime bucketTime, - bool interim, - CProbabilityAndInfluenceCalculator::SCorrelateParams ¶ms, - TStrCRefDouble1VecDouble1VecPrPrVecVecVec &correlateInfluenceValues) const; - - private: - //! The statistics we maintain about the bucket. - SBucketStats m_CurrentBucketStats; - - //! The prior for the joint probabilities of seeing the people - //! we are modeling (this captures information about the person - //! rarity). - maths::CMultinomialConjugate m_ProbabilityPrior; - - //! A cache of the person probabilities as of the start of the - //! for the bucketing interval. - TCategoryProbabilityCache m_Probabilities; - - friend class CEventRateModelDetailsView; - friend class ::CMockEventRateModel; +class MODEL_EXPORT CEventRateModel : public CIndividualModel { +public: + using TFeatureData = SEventRateFeatureData; + using TSizeFeatureDataPr = std::pair; + using TSizeFeatureDataPrVec = std::vector; + using TFeatureSizeFeatureDataPrVecPr = std::pair; + using TFeatureSizeFeatureDataPrVecPrVec = std::vector; + using TCategoryProbabilityCache = CModelTools::CCategoryProbabilityCache; + + //! The statistics we maintain about a bucketing interval. + struct MODEL_EXPORT SBucketStats { + explicit SBucketStats(core_t::TTime startTime); + + //! The start time of this bucket. + core_t::TTime s_StartTime; + //! The non-zero person counts in the current bucket. + TSizeUInt64PrVec s_PersonCounts; + //! The total count in the current bucket. + uint64_t s_TotalCount; + //! The feature data samples for the current bucketing interval. + TFeatureSizeFeatureDataPrVecPrVec s_FeatureData; + //! A cache of the corrections applied to interim results. + //! The key is for non-correlated corrections + //! or for correlated corrections + mutable TFeatureSizeSizeTripleDouble1VecUMap s_InterimCorrections; + }; + +public: + //! \name Life-cycle + //@{ + //! \param[in] params The global configuration parameters. + //! \param[in] dataGatherer The object that gathers time series data. + //! \param[in] newFeatureModels The new models to use for each feature. + //! \param[in] newFeatureCorrelateModelPriors The prior to use for the + //! new model of correlates for each feature. + //! \param[in] featureCorrelatesModels The model of all correlates for + //! each feature. + //! \param[in] probabilityPrior The prior used for the joint probabilities + //! of seeing the people we are modeling. + //! \param[in] influenceCalculators The influence calculators to use + //! for each feature. + CEventRateModel(const SModelParams& params, + const TDataGathererPtr& dataGatherer, + const TFeatureMathsModelPtrPrVec& newFeatureModels, + const TFeatureMultivariatePriorPtrPrVec& newFeatureCorrelateModelPriors, + const TFeatureCorrelationsPtrPrVec& featureCorrelatesModels, + const maths::CMultinomialConjugate& probabilityPrior, + const TFeatureInfluenceCalculatorCPtrPrVecVec& influenceCalculators); + + //! Constructor used for restoring persisted models. + //! + //! \note The current bucket statistics are left default initialized + //! and so must be sampled for before this model can be used. + CEventRateModel(const SModelParams& params, + const TDataGathererPtr& dataGatherer, + const TFeatureMathsModelPtrPrVec& newFeatureModels, + const TFeatureMultivariatePriorPtrPrVec& newFeatureCorrelateModelPriors, + const TFeatureCorrelationsPtrPrVec& featureCorrelatesModels, + const TFeatureInfluenceCalculatorCPtrPrVecVec& influenceCalculators, + core::CStateRestoreTraverser& traverser); + + //! Create a copy that will result in the same persisted state as the + //! original. This is effectively a copy constructor that creates a + //! copy that's only valid for a single purpose. The boolean flag is + //! redundant except to create a signature that will not be mistaken + //! for a general purpose copy constructor. + CEventRateModel(bool isForPersistence, const CEventRateModel& other); + //@} + + //! \name Persistence + //@{ + //! Persist state by passing information to \p inserter. + virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Restore reading state from \p traverser. + virtual bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); + + //! Create a clone of this model that will result in the same persisted + //! state. The clone may be incomplete in ways that do not affect the + //! persisted representation, and must not be used for any other + //! purpose. + //! \warning The caller owns the object returned. + virtual CAnomalyDetectorModel* cloneForPersistence() const; + //@} + + //! Get the model category. + virtual model_t::EModelType category() const; + + //! Returns true. + virtual bool isEventRate() const; + + //! Returns false. + virtual bool isMetric() const; + + //! \name Bucket Statistics + //@{ + //! Returns null. + virtual TOptionalDouble baselineBucketCount(std::size_t pid) const; + + //! Get the value of \p feature for the person identified + //! by \p pid in the bucketing interval containing \p time. + //! + //! \param[in] feature The feature of interest. + //! \param[in] pid The identifier of the person of interest. + //! \param[in] cid Ignored. + //! \param[in] time The time of interest. + virtual TDouble1Vec currentBucketValue(model_t::EFeature feature, std::size_t pid, std::size_t cid, core_t::TTime time) const; + + //! Get the baseline bucket value of \p feature for the person + //! identified by \p pid as of the start of the current bucketing + //! interval. + //! + //! \param[in] feature The feature of interest. + //! \param[in] pid The identifier of the person of interest. + //! \param[in] cid Ignored. + //! \param[in] type A description of the type of result for which + //! to get the baseline. See CResultType for more details. + //! \param[in] correlated The correlated series' identifiers and + //! their values if any. + //! \param[in] time The time of interest. + virtual TDouble1Vec baselineBucketMean(model_t::EFeature feature, + std::size_t pid, + std::size_t cid, + model_t::CResultType type, + const TSizeDoublePr1Vec& correlated, + core_t::TTime time) const; + + //@} + + //! \name Person + //@{ + //! Get the person unique identifiers which have a feature value + //! in the bucketing time interval including \p time. + virtual void currentBucketPersonIds(core_t::TTime time, TSizeVec& result) const; + //@} + + //! \name Update + //@{ + //! Sample any state needed by computeProbablity in the time + //! interval [\p startTime, \p endTime] but do not update the + //! model. This is needed by the results preview. + //! + //! \param[in] startTime The start of the time interval to sample. + //! \param[in] endTime The end of the time interval to sample. + virtual void sampleBucketStatistics(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor); + + //! Update the model with features samples from the time interval + //! [\p startTime, \p endTime]. + //! + //! \param[in] startTime The start of the time interval to sample. + //! \param[in] endTime The end of the time interval to sample. + //! \param[in] resourceMonitor The resourceMonitor. + virtual void sample(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor); + //@} + + //! \name Probability + //@{ + //! Compute the probability of seeing the event counts in the + //! time interval [\p startTime, \p endTime] for the person + //! identified by \p pid. + //! + //! \param[in] pid The identifier of the person of interest. + //! \param[in] startTime The start of the time interval of interest. + //! \param[in] endTime The end of the time interval of interest. + //! \param[in] partitioningFields The partitioning field (name, value) + //! pairs for which to compute the the probability. + //! \param[in] numberAttributeProbabilities Ignored. + //! \param[out] result A structure containing the probability, + //! the smallest \p numberAttributeProbabilities attribute + //! probabilities, the influences and any extra descriptive data + virtual bool computeProbability(std::size_t pid, + core_t::TTime startTime, + core_t::TTime endTime, + CPartitioningFields& partitioningFields, + std::size_t numberAttributeProbabilities, + SAnnotatedProbability& result) const; + //@} + + //! Get the checksum of this model. + //! + //! \param[in] includeCurrentBucketStats If true then include + //! the current bucket statistics. (This is designed to handle + //! serialization, for which we don't serialize the current + //! bucket statistics.) + virtual uint64_t checksum(bool includeCurrentBucketStats = true) const; + + //! Debug the memory used by this model. + virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + + //! Get the memory used by this model. + virtual std::size_t memoryUsage() const; + + //! Get the static size of this object - used for virtual hierarchies. + virtual std::size_t staticSize() const; + + //! Get the non-estimated value of the the memory used by this model. + virtual std::size_t computeMemoryUsage() const; + + //! Get a view of the internals of the model for visualization. + virtual CModelDetailsViewPtr details() const; + + //! Get the value of the \p feature of the person identified + //! by \p pid for the bucketing interval containing \p time. + const TFeatureData* featureData(model_t::EFeature feature, std::size_t pid, core_t::TTime time) const; + +private: + //! Get the start time of the current bucket. + virtual core_t::TTime currentBucketStartTime() const; + + //! Set the start time of the current bucket. + virtual void currentBucketStartTime(core_t::TTime time); + + //! Get the person counts in the current bucket. + virtual const TSizeUInt64PrVec& currentBucketPersonCounts() const; + + //! Get writable person counts in the current bucket. + virtual TSizeUInt64PrVec& currentBucketPersonCounts(); + + //! Set the current bucket total count. + virtual void currentBucketTotalCount(uint64_t totalCount); + + //! Get the total count of the current bucket. + uint64_t currentBucketTotalCount() const; + + //! Get the interim corrections of the current bucket. + TFeatureSizeSizeTripleDouble1VecUMap& currentBucketInterimCorrections() const; + + //! Create the time series models for "n" newly observed people. + virtual void createNewModels(std::size_t n, std::size_t m); + + //! Reinitialize the time series models for recycled people. + virtual void updateRecycledModels(); + + //! Clear out large state objects for people that are pruned. + virtual void clearPrunedResources(const TSizeVec& people, const TSizeVec& attributes); + + //! Check if there are correlates for \p feature and the person + //! identified by \p pid. + bool correlates(model_t::EFeature feature, std::size_t pid, core_t::TTime time) const; + + //! Fill in the probability calculation parameters for \p feature + //! and person identified by \p pid. + void fill(model_t::EFeature feature, + std::size_t pid, + core_t::TTime bucketTime, + bool interim, + CProbabilityAndInfluenceCalculator::SParams& params) const; + + //! Fill in the probability calculation parameters for the correlates + //! of \p feature and the person identified by \p pid. + void fill(model_t::EFeature feature, + std::size_t pid, + core_t::TTime bucketTime, + bool interim, + CProbabilityAndInfluenceCalculator::SCorrelateParams& params, + TStrCRefDouble1VecDouble1VecPrPrVecVecVec& correlateInfluenceValues) const; + +private: + //! The statistics we maintain about the bucket. + SBucketStats m_CurrentBucketStats; + + //! The prior for the joint probabilities of seeing the people + //! we are modeling (this captures information about the person + //! rarity). + maths::CMultinomialConjugate m_ProbabilityPrior; + + //! A cache of the person probabilities as of the start of the + //! for the bucketing interval. + TCategoryProbabilityCache m_Probabilities; + + friend class CEventRateModelDetailsView; + friend class ::CMockEventRateModel; }; - } } diff --git a/include/model/CEventRateModelFactory.h b/include/model/CEventRateModelFactory.h index 961630f901..5a1f234531 100644 --- a/include/model/CEventRateModelFactory.h +++ b/include/model/CEventRateModelFactory.h @@ -10,14 +10,11 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStateRestoreTraverser; } -namespace model -{ +namespace model { //! \brief A factory class implementation for CEventRateModel. //! @@ -25,160 +22,153 @@ namespace model //! This concrete factory implements the methods to make new models //! and data gatherers, and create default priors suitable for the //! CEventRateModel class. -class MODEL_EXPORT CEventRateModelFactory : public CModelFactory -{ - public: - //! Lift all overloads into scope. - using CModelFactory::defaultMultivariatePrior; - using CModelFactory::defaultPrior; - - public: - //! \note The default arguments supplied to the constructor are - //! intended for unit testing and are not necessarily good defaults. - //! The CModelConfig class is responsible for providing sensible - //! default values for the factory for use within our products. - explicit CEventRateModelFactory(const SModelParams ¶ms, - model_t::ESummaryMode summaryMode = model_t::E_None, - const std::string &summaryCountFieldName = ""); - - //! Create a copy of the factory owned by the calling code. - virtual CEventRateModelFactory *clone() const; - - //! \name Factory Methods - //@{ - //! Make a new event rate model. - //! - //! \param[in] initData The parameters needed to initialize the model. - //! \warning It is owned by the calling code. - virtual CAnomalyDetectorModel *makeModel(const SModelInitializationData &initData) const; - - //! Make a new event rate model from part of a state document. - //! - //! \param[in] initData Additional parameters needed to initialize - //! the model. - //! \param[in,out] traverser A state document traverser. - //! \warning It is owned by the calling code. - virtual CAnomalyDetectorModel *makeModel(const SModelInitializationData &initData, - core::CStateRestoreTraverser &traverser) const; - - //! Make a new event rate data gatherer. - //! - //! \param[in] initData The parameters needed to initialize the data - //! gatherer. - //! \warning It is owned by the calling code. - virtual CDataGatherer *makeDataGatherer(const SGathererInitializationData &initData) const; - - //! Make a new event rate data gatherer from part of a state document. - //! - //! \param[in] partitionFieldValue The partition field value. - //! \param[in,out] traverser A state document traverser. - //! \warning It is owned by the calling code. - virtual CDataGatherer *makeDataGatherer(const std::string &partitionFieldValue, - core::CStateRestoreTraverser &traverser) const; - //@} - - //! \name Defaults - //@{ - //! Get the default prior for \p feature. - //! - //! \param[in] feature The feature for which to get the prior. - //! \param[in] params The model parameters. - virtual TPriorPtr defaultPrior(model_t::EFeature feature, - const SModelParams ¶ms) const; - - //! Get the default multivariate prior for \p feature. - //! - //! \param[in] feature The feature for which to get the prior. - //! \param[in] params The model parameters. - virtual TMultivariatePriorPtr defaultMultivariatePrior(model_t::EFeature feature, - const SModelParams ¶ms) const; - - //! Get the default prior for pairs of correlated time series - //! of \p feature. - //! - //! \param[in] feature The feature for which to get the prior. - //! \param[in] params The model parameters. - virtual TMultivariatePriorPtr defaultCorrelatePrior(model_t::EFeature feature, - const SModelParams ¶ms) const; - //@} - - //! Get the search key corresponding to this factory. - virtual const CSearchKey &searchKey() const; - - //! Check if this makes the model used for a simple counting search. - virtual bool isSimpleCount() const; - - //! Check the pre-summarisation mode for this factory. - virtual model_t::ESummaryMode summaryMode() const; - - //! Get the default data type for models from this factory. - virtual maths_t::EDataType dataType() const; - - //! \name Customization by a specific search - //@{ - //! Set the identifier of the search for which this generates models. - virtual void identifier(int identifier); - - //! Set the name of the field whose values will be counted. - virtual void fieldNames(const std::string &partitionFieldName, - const std::string &overFieldName, - const std::string &byFieldName, - const std::string &valueFieldName, - const TStrVec &influenceFieldNames); - - //! Set whether the models should process missing person fields. - virtual void useNull(bool useNull); - - //! Set the features which will be modeled. - virtual void features(const TFeatureVec &features); - - //! Set the bucket results delay - virtual void bucketResultsDelay(std::size_t bucketResultsDelay); - //@} - - private: - //! Get the field values which partition the data for modeling. - virtual TStrCRefVec partitioningFields() const; - - private: - //! The identifier of the search for which this generates models. - int m_Identifier; - - //! Indicates whether the data being gathered are already summarized - //! by an external aggregation process - model_t::ESummaryMode m_SummaryMode; - - //! If m_SummaryMode is E_Manual then this is the name of the field - //! holding the summary count. - std::string m_SummaryCountFieldName; - - //! The name of the field which splits the data. - std::string m_PartitionFieldName; - - //! The name of the field whose values will be counted. - std::string m_PersonFieldName; - - //! The name of the field value of interest for keyed functions - std::string m_ValueFieldName; - - //! The field names for which we are computing influence. These are - //! the fields which can be used to join results across different - //! searches. - TStrVec m_InfluenceFieldNames; - - //! If true the models will process missing person fields. - bool m_UseNull; - - //! The count features which will be modeled. - TFeatureVec m_Features; - - //! The bucket results delay. - std::size_t m_BucketResultsDelay; - - //! A cached search key. - mutable TOptionalSearchKey m_SearchKeyCache; +class MODEL_EXPORT CEventRateModelFactory : public CModelFactory { +public: + //! Lift all overloads into scope. + using CModelFactory::defaultMultivariatePrior; + using CModelFactory::defaultPrior; + +public: + //! \note The default arguments supplied to the constructor are + //! intended for unit testing and are not necessarily good defaults. + //! The CModelConfig class is responsible for providing sensible + //! default values for the factory for use within our products. + explicit CEventRateModelFactory(const SModelParams& params, + model_t::ESummaryMode summaryMode = model_t::E_None, + const std::string& summaryCountFieldName = ""); + + //! Create a copy of the factory owned by the calling code. + virtual CEventRateModelFactory* clone() const; + + //! \name Factory Methods + //@{ + //! Make a new event rate model. + //! + //! \param[in] initData The parameters needed to initialize the model. + //! \warning It is owned by the calling code. + virtual CAnomalyDetectorModel* makeModel(const SModelInitializationData& initData) const; + + //! Make a new event rate model from part of a state document. + //! + //! \param[in] initData Additional parameters needed to initialize + //! the model. + //! \param[in,out] traverser A state document traverser. + //! \warning It is owned by the calling code. + virtual CAnomalyDetectorModel* makeModel(const SModelInitializationData& initData, core::CStateRestoreTraverser& traverser) const; + + //! Make a new event rate data gatherer. + //! + //! \param[in] initData The parameters needed to initialize the data + //! gatherer. + //! \warning It is owned by the calling code. + virtual CDataGatherer* makeDataGatherer(const SGathererInitializationData& initData) const; + + //! Make a new event rate data gatherer from part of a state document. + //! + //! \param[in] partitionFieldValue The partition field value. + //! \param[in,out] traverser A state document traverser. + //! \warning It is owned by the calling code. + virtual CDataGatherer* makeDataGatherer(const std::string& partitionFieldValue, core::CStateRestoreTraverser& traverser) const; + //@} + + //! \name Defaults + //@{ + //! Get the default prior for \p feature. + //! + //! \param[in] feature The feature for which to get the prior. + //! \param[in] params The model parameters. + virtual TPriorPtr defaultPrior(model_t::EFeature feature, const SModelParams& params) const; + + //! Get the default multivariate prior for \p feature. + //! + //! \param[in] feature The feature for which to get the prior. + //! \param[in] params The model parameters. + virtual TMultivariatePriorPtr defaultMultivariatePrior(model_t::EFeature feature, const SModelParams& params) const; + + //! Get the default prior for pairs of correlated time series + //! of \p feature. + //! + //! \param[in] feature The feature for which to get the prior. + //! \param[in] params The model parameters. + virtual TMultivariatePriorPtr defaultCorrelatePrior(model_t::EFeature feature, const SModelParams& params) const; + //@} + + //! Get the search key corresponding to this factory. + virtual const CSearchKey& searchKey() const; + + //! Check if this makes the model used for a simple counting search. + virtual bool isSimpleCount() const; + + //! Check the pre-summarisation mode for this factory. + virtual model_t::ESummaryMode summaryMode() const; + + //! Get the default data type for models from this factory. + virtual maths_t::EDataType dataType() const; + + //! \name Customization by a specific search + //@{ + //! Set the identifier of the search for which this generates models. + virtual void identifier(int identifier); + + //! Set the name of the field whose values will be counted. + virtual void fieldNames(const std::string& partitionFieldName, + const std::string& overFieldName, + const std::string& byFieldName, + const std::string& valueFieldName, + const TStrVec& influenceFieldNames); + + //! Set whether the models should process missing person fields. + virtual void useNull(bool useNull); + + //! Set the features which will be modeled. + virtual void features(const TFeatureVec& features); + + //! Set the bucket results delay + virtual void bucketResultsDelay(std::size_t bucketResultsDelay); + //@} + +private: + //! Get the field values which partition the data for modeling. + virtual TStrCRefVec partitioningFields() const; + +private: + //! The identifier of the search for which this generates models. + int m_Identifier; + + //! Indicates whether the data being gathered are already summarized + //! by an external aggregation process + model_t::ESummaryMode m_SummaryMode; + + //! If m_SummaryMode is E_Manual then this is the name of the field + //! holding the summary count. + std::string m_SummaryCountFieldName; + + //! The name of the field which splits the data. + std::string m_PartitionFieldName; + + //! The name of the field whose values will be counted. + std::string m_PersonFieldName; + + //! The name of the field value of interest for keyed functions + std::string m_ValueFieldName; + + //! The field names for which we are computing influence. These are + //! the fields which can be used to join results across different + //! searches. + TStrVec m_InfluenceFieldNames; + + //! If true the models will process missing person fields. + bool m_UseNull; + + //! The count features which will be modeled. + TFeatureVec m_Features; + + //! The bucket results delay. + std::size_t m_BucketResultsDelay; + + //! A cached search key. + mutable TOptionalSearchKey m_SearchKeyCache; }; - } } diff --git a/include/model/CEventRatePopulationModel.h b/include/model/CEventRatePopulationModel.h index 9ed3723be8..a003c502c5 100644 --- a/include/model/CEventRatePopulationModel.h +++ b/include/model/CEventRatePopulationModel.h @@ -12,8 +12,8 @@ #include #include -#include #include +#include #include #include #include @@ -22,19 +22,15 @@ #include #include -namespace -{ +namespace { class CMockPopulationEventRateModel; } -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace model -{ +namespace model { //! \brief The model for computing the anomalousness of the rate at which //! each person in a population generates events in a data stream. @@ -76,344 +72,327 @@ namespace model //! //! It assumes data are supplied in time order since this means minimal //! state can be maintained. -class MODEL_EXPORT CEventRatePopulationModel : public CPopulationModel -{ - public: - using TFeatureMathsModelPtrPr = std::pair; - using TFeatureMathsModelPtrPrVec = std::vector; - using TFeatureMathsModelPtrVecPr = std::pair; - using TFeatureMathsModelPtrVecPrVec = std::vector; - using TFeatureCorrelationsPtrPr = std::pair; - using TFeatureCorrelationsPtrPrVec = std::vector; - using TSizeSizePrUInt64Pr = std::pair; - using TSizeSizePrUInt64PrVec = std::vector; - using TFeatureData = SEventRateFeatureData; - using TSizeSizePrFeatureDataPr = std::pair; - using TSizeSizePrFeatureDataPrVec = std::vector; - using TFeatureSizeSizePrFeatureDataPrVecMap = std::map; - using TCategoryProbabilityCache = CModelTools::CCategoryProbabilityCache; - using TProbabilityCache = CModelTools::CProbabilityCache; - - //! The statistics we maintain about a bucketing interval. - struct MODEL_EXPORT SBucketStats - { - explicit SBucketStats(core_t::TTime startTime); - - //! The start time of this bucket. - core_t::TTime s_StartTime; - //! The non-zero counts of messages by people in the bucketing - //! interval. - TSizeUInt64PrVec s_PersonCounts; - //! The total count in the current bucket. - uint64_t s_TotalCount; - //! The count features we are modeling. - TFeatureSizeSizePrFeatureDataPrVecMap s_FeatureData; - //! A cache of the corrections applied to interim results. - mutable TCorrectionKeyDouble1VecUMap s_InterimCorrections; - }; - - //! Lift the overloads of currentBucketValue into the class scope. - using CPopulationModel::currentBucketValue; - - //! Lift the overloads of baselineBucketMean into the class scope. - using CAnomalyDetectorModel::baselineBucketMean; - - //! Lift the overloads of acceptPersistInserter into the class scope. - using CPopulationModel::acceptPersistInserter; - - public: - //! \name Life-cycle - //@{ - //! \param[in] params The global configuration parameters. - //! \param[in] dataGatherer The object that gathers time series data. - //! \param[in] newFeatureModels The new models to use for each feature. - //! \param[in] newFeatureCorrelateModelPriors The prior to use for the - //! new model of correlates for each feature. - //! \param[in] featureCorrelatesModels The model of all correlates for - //! each feature. - //! \param[in] influenceCalculators The influence calculators to use - //! for each feature. - //! \note The current bucket statistics are left default initialized - //! and so must be sampled for before this model can be used. - CEventRatePopulationModel(const SModelParams ¶ms, - const TDataGathererPtr &dataGatherer, - const TFeatureMathsModelPtrPrVec &newFeatureModels, - const TFeatureMultivariatePriorPtrPrVec &newFeatureCorrelateModelPriors, - const TFeatureCorrelationsPtrPrVec &featureCorrelatesModels, - const TFeatureInfluenceCalculatorCPtrPrVecVec &influenceCalculators); - - //! Constructor used for restoring persisted models. - //! - //! \note The current bucket statistics are left default initialized - //! and so must be sampled for before this model can be used. - CEventRatePopulationModel(const SModelParams ¶ms, - const TDataGathererPtr &dataGatherer, - const TFeatureMathsModelPtrPrVec &newFeatureModels, - const TFeatureMultivariatePriorPtrPrVec &newFeatureCorrelateModelPriors, - const TFeatureCorrelationsPtrPrVec &featureCorrelatesModels, - const TFeatureInfluenceCalculatorCPtrPrVecVec &influenceCalculators, - core::CStateRestoreTraverser &traverser); - - //! Create a copy that will result in the same persisted state as the - //! original. This is effectively a copy constructor that creates a - //! copy that's only valid for a single purpose. The boolean flag is - //! redundant except to create a signature that will not be mistaken - //! for a general purpose copy constructor. - CEventRatePopulationModel(bool isForPersistence, - const CEventRatePopulationModel &other); - //@} - - //! \name Persistence - //@{ - //! Persist state by passing information to the supplied inserter - virtual void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - - //! Add to the contents of the object. - virtual bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); - - //! Create a clone of this model that will result in the same persisted - //! state. The clone may be incomplete in ways that do not affect the - //! persisted representation, and must not be used for any other - //! purpose. - //! \warning The caller owns the object returned. - virtual CAnomalyDetectorModel *cloneForPersistence() const; - //@} - - //! Get the model category. - virtual model_t::EModelType category() const; - - //! Returns true. - virtual bool isEventRate() const; - - //! Returns false. - virtual bool isMetric() const; - - //! \name Bucket Statistics - //@{ - //! Get the value of \p feature for the person identified - //! by \p pid and the attribute identified by \p cid in the - //! bucketing interval containing \p time. - //! - //! \param[in] feature The feature of interest. - //! \param[in] pid The identifier of the person of interest. - //! \param[in] cid The identifier of the attribute of interest. - //! \param[in] time The time of interest. - virtual TDouble1Vec currentBucketValue(model_t::EFeature feature, - std::size_t pid, - std::size_t cid, - core_t::TTime time) const; - - //! Get the population baseline mean of \p feature for the - //! attribute identified by \p cid as of the start of the - //! current bucketing interval. - //! - //! \param[in] feature The feature of interest - //! \param[in] pid The identifier of the person of interest. - //! \param[in] cid The identifier of the attribute of interest. - //! \param[in] type A description of the type of result for which - //! to get the baseline. See CResultType for more details. - //! \param[in] correlated The correlated series' identifiers and - //! their values if any. - //! \param[in] time The time of interest. - virtual TDouble1Vec baselineBucketMean(model_t::EFeature feature, - std::size_t pid, - std::size_t cid, - model_t::CResultType type, - const TSizeDoublePr1Vec &correlated, - core_t::TTime time) const; - - //! Check if bucket statistics are available for the specified time. - virtual bool bucketStatsAvailable(core_t::TTime time) const; - //@} - - //! \name Update - //@{ - //! This samples the bucket statistics, and any state needed - //! by computeProbablity, in the time interval [\p startTime, - //! \p endTime], but does not update the model. This is needed - //! by the results preview. - //! - //! \param[in] startTime The start of the time interval to sample. - //! \param[in] endTime The end of the time interval to sample. - virtual void sampleBucketStatistics(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor &resourceMonitor); - - //! Update the model with the samples of the various processes - //! in the time interval [\p startTime, \p endTime]. - //! - //! \param[in] startTime The start of the time interval to sample. - //! \param[in] endTime The end of the time interval to sample. - //! \param[in] resourceMonitor The resourceMonitor. - virtual void sample(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor &resourceMonitor); - - //! Prune any data for people and attributes which haven't been - //! seen for a sufficiently long period. This is based on the - //! prior decay rates and the number of batches into which we - //! are partitioning time. - virtual void prune(std::size_t maximumAge); - //@} - - //! \name Probability - //@{ - //! Compute the probability of seeing \p person's attribute values - //! for the buckets in the interval [\p startTime, \p endTime]. - //! - //! \param[in] pid The identifier of the person of interest. - //! \param[in] startTime The start of the interval of interest. - //! \param[in] endTime The end of the interval of interest. - //! \param[in] partitioningFields The partitioning field (name, value) - //! pairs for which to compute the the probability. - //! \param[in] numberAttributeProbabilities The maximum number of - //! attribute probabilities to retrieve. - //! \param[out] result A structure containing the probability, - //! the smallest \p numberAttributeProbabilities attribute - //! probabilities, the influences and any extra descriptive data - virtual bool computeProbability(std::size_t pid, - core_t::TTime startTime, - core_t::TTime endTime, - CPartitioningFields &partitioningFields, - std::size_t numberAttributeProbabilities, - SAnnotatedProbability &result) const; - - //! Clears \p probability and \p attributeProbabilities. - virtual bool computeTotalProbability(const std::string &person, - std::size_t numberAttributeProbabilities, - TOptionalDouble &probability, - TAttributeProbability1Vec &attributeProbabilities) const; - //@} - - //! Get the checksum of this model. - //! - //! \param[in] includeCurrentBucketStats If true then include - //! the current bucket statistics. (This is designed to handle - //! serialization, for which we don't serialize the current - //! bucket statistics.) - virtual uint64_t checksum(bool includeCurrentBucketStats = true) const; - - //! Debug the memory used by this model. - virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - - //! Get the memory used by this model. - virtual std::size_t memoryUsage() const; - - //! Get the static size of this object - used for virtual hierarchies - virtual std::size_t staticSize() const; - - //! Get the non-estimated memory used by this model. - virtual std::size_t computeMemoryUsage() const; - - //! Get a view of the internals of the model for visualization. - virtual CModelDetailsViewPtr details() const; - - //! Get the feature data corresponding to \p feature at \p time. - const TSizeSizePrFeatureDataPrVec &featureData(model_t::EFeature feature, - core_t::TTime time) const; - - private: - //! Initialize the feature models. - void initialize(const TFeatureMathsModelPtrPrVec &newFeatureModels, - const TFeatureMultivariatePriorPtrPrVec &newFeatureCorrelateModelPriors, - const TFeatureCorrelationsPtrPrVec &featureCorrelatesModels); - - //! Get the start time of the current bucket. - virtual core_t::TTime currentBucketStartTime() const; - - //! Set the start time of the current bucket. - virtual void currentBucketStartTime(core_t::TTime time); - - //! Set the current bucket total count. - virtual void currentBucketTotalCount(uint64_t totalCount); - - //! Get the total count of the current bucket. - uint64_t currentBucketTotalCount() const; - - //! Get the current bucket person counts. - virtual const TSizeUInt64PrVec &personCounts() const; - - //! Get the interim corrections of the current bucket. - TCorrectionKeyDouble1VecUMap ¤tBucketInterimCorrections() const; - - //! Initialize the time series models for "n" newly observed people - //! and "m" attributes. - virtual void createNewModels(std::size_t n, std::size_t m); - - //! Initialize the time series models for recycled attributes and/or people. - virtual void updateRecycledModels(); - - //! Update the correlation models. - virtual void refreshCorrelationModels(std::size_t resourceLimit, - CResourceMonitor &resourceMonitor); - - //! Clear out large state objects for people/attributes that are pruned - virtual void clearPrunedResources(const TSizeVec &people, - const TSizeVec &attributes); - - //! Skip sampling the interval \p endTime - \p startTime. - virtual void doSkipSampling(core_t::TTime startTime, core_t::TTime endTime); - - //! Get a read only model for \p feature and the attribute identified - //! by \p cid. - const maths::CModel *model(model_t::EFeature feature, std::size_t cid) const; - - //! Get a writable model for \p feature and the attribute identified - //! by \p cid. - maths::CModel *model(model_t::EFeature feature, std::size_t pid); - - //! Check if there are correlates for \p feature and the person and - //! attribute identified by \p pid and \p cid, respectively. - bool correlates(model_t::EFeature feature, - std::size_t pid, - std::size_t cid, - core_t::TTime time) const; - - //! Fill in the probability calculation parameters for \p feature and - //! person and attribute identified by \p pid and \p cid, respectively. - void fill(model_t::EFeature feature, - std::size_t pid, - std::size_t cid, - core_t::TTime bucketTime, - bool interim, - CProbabilityAndInfluenceCalculator::SParams ¶ms) const; +class MODEL_EXPORT CEventRatePopulationModel : public CPopulationModel { +public: + using TFeatureMathsModelPtrPr = std::pair; + using TFeatureMathsModelPtrPrVec = std::vector; + using TFeatureMathsModelPtrVecPr = std::pair; + using TFeatureMathsModelPtrVecPrVec = std::vector; + using TFeatureCorrelationsPtrPr = std::pair; + using TFeatureCorrelationsPtrPrVec = std::vector; + using TSizeSizePrUInt64Pr = std::pair; + using TSizeSizePrUInt64PrVec = std::vector; + using TFeatureData = SEventRateFeatureData; + using TSizeSizePrFeatureDataPr = std::pair; + using TSizeSizePrFeatureDataPrVec = std::vector; + using TFeatureSizeSizePrFeatureDataPrVecMap = std::map; + using TCategoryProbabilityCache = CModelTools::CCategoryProbabilityCache; + using TProbabilityCache = CModelTools::CProbabilityCache; + + //! The statistics we maintain about a bucketing interval. + struct MODEL_EXPORT SBucketStats { + explicit SBucketStats(core_t::TTime startTime); + + //! The start time of this bucket. + core_t::TTime s_StartTime; + //! The non-zero counts of messages by people in the bucketing + //! interval. + TSizeUInt64PrVec s_PersonCounts; + //! The total count in the current bucket. + uint64_t s_TotalCount; + //! The count features we are modeling. + TFeatureSizeSizePrFeatureDataPrVecMap s_FeatureData; + //! A cache of the corrections applied to interim results. + mutable TCorrectionKeyDouble1VecUMap s_InterimCorrections; + }; + + //! Lift the overloads of currentBucketValue into the class scope. + using CPopulationModel::currentBucketValue; + + //! Lift the overloads of baselineBucketMean into the class scope. + using CAnomalyDetectorModel::baselineBucketMean; + + //! Lift the overloads of acceptPersistInserter into the class scope. + using CPopulationModel::acceptPersistInserter; + +public: + //! \name Life-cycle + //@{ + //! \param[in] params The global configuration parameters. + //! \param[in] dataGatherer The object that gathers time series data. + //! \param[in] newFeatureModels The new models to use for each feature. + //! \param[in] newFeatureCorrelateModelPriors The prior to use for the + //! new model of correlates for each feature. + //! \param[in] featureCorrelatesModels The model of all correlates for + //! each feature. + //! \param[in] influenceCalculators The influence calculators to use + //! for each feature. + //! \note The current bucket statistics are left default initialized + //! and so must be sampled for before this model can be used. + CEventRatePopulationModel(const SModelParams& params, + const TDataGathererPtr& dataGatherer, + const TFeatureMathsModelPtrPrVec& newFeatureModels, + const TFeatureMultivariatePriorPtrPrVec& newFeatureCorrelateModelPriors, + const TFeatureCorrelationsPtrPrVec& featureCorrelatesModels, + const TFeatureInfluenceCalculatorCPtrPrVecVec& influenceCalculators); + + //! Constructor used for restoring persisted models. + //! + //! \note The current bucket statistics are left default initialized + //! and so must be sampled for before this model can be used. + CEventRatePopulationModel(const SModelParams& params, + const TDataGathererPtr& dataGatherer, + const TFeatureMathsModelPtrPrVec& newFeatureModels, + const TFeatureMultivariatePriorPtrPrVec& newFeatureCorrelateModelPriors, + const TFeatureCorrelationsPtrPrVec& featureCorrelatesModels, + const TFeatureInfluenceCalculatorCPtrPrVecVec& influenceCalculators, + core::CStateRestoreTraverser& traverser); + + //! Create a copy that will result in the same persisted state as the + //! original. This is effectively a copy constructor that creates a + //! copy that's only valid for a single purpose. The boolean flag is + //! redundant except to create a signature that will not be mistaken + //! for a general purpose copy constructor. + CEventRatePopulationModel(bool isForPersistence, const CEventRatePopulationModel& other); + //@} + + //! \name Persistence + //@{ + //! Persist state by passing information to the supplied inserter + virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Add to the contents of the object. + virtual bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); + + //! Create a clone of this model that will result in the same persisted + //! state. The clone may be incomplete in ways that do not affect the + //! persisted representation, and must not be used for any other + //! purpose. + //! \warning The caller owns the object returned. + virtual CAnomalyDetectorModel* cloneForPersistence() const; + //@} + + //! Get the model category. + virtual model_t::EModelType category() const; + + //! Returns true. + virtual bool isEventRate() const; + + //! Returns false. + virtual bool isMetric() const; + + //! \name Bucket Statistics + //@{ + //! Get the value of \p feature for the person identified + //! by \p pid and the attribute identified by \p cid in the + //! bucketing interval containing \p time. + //! + //! \param[in] feature The feature of interest. + //! \param[in] pid The identifier of the person of interest. + //! \param[in] cid The identifier of the attribute of interest. + //! \param[in] time The time of interest. + virtual TDouble1Vec currentBucketValue(model_t::EFeature feature, std::size_t pid, std::size_t cid, core_t::TTime time) const; + + //! Get the population baseline mean of \p feature for the + //! attribute identified by \p cid as of the start of the + //! current bucketing interval. + //! + //! \param[in] feature The feature of interest + //! \param[in] pid The identifier of the person of interest. + //! \param[in] cid The identifier of the attribute of interest. + //! \param[in] type A description of the type of result for which + //! to get the baseline. See CResultType for more details. + //! \param[in] correlated The correlated series' identifiers and + //! their values if any. + //! \param[in] time The time of interest. + virtual TDouble1Vec baselineBucketMean(model_t::EFeature feature, + std::size_t pid, + std::size_t cid, + model_t::CResultType type, + const TSizeDoublePr1Vec& correlated, + core_t::TTime time) const; + + //! Check if bucket statistics are available for the specified time. + virtual bool bucketStatsAvailable(core_t::TTime time) const; + //@} + + //! \name Update + //@{ + //! This samples the bucket statistics, and any state needed + //! by computeProbablity, in the time interval [\p startTime, + //! \p endTime], but does not update the model. This is needed + //! by the results preview. + //! + //! \param[in] startTime The start of the time interval to sample. + //! \param[in] endTime The end of the time interval to sample. + virtual void sampleBucketStatistics(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor); + + //! Update the model with the samples of the various processes + //! in the time interval [\p startTime, \p endTime]. + //! + //! \param[in] startTime The start of the time interval to sample. + //! \param[in] endTime The end of the time interval to sample. + //! \param[in] resourceMonitor The resourceMonitor. + virtual void sample(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor); + + //! Prune any data for people and attributes which haven't been + //! seen for a sufficiently long period. This is based on the + //! prior decay rates and the number of batches into which we + //! are partitioning time. + virtual void prune(std::size_t maximumAge); + //@} + + //! \name Probability + //@{ + //! Compute the probability of seeing \p person's attribute values + //! for the buckets in the interval [\p startTime, \p endTime]. + //! + //! \param[in] pid The identifier of the person of interest. + //! \param[in] startTime The start of the interval of interest. + //! \param[in] endTime The end of the interval of interest. + //! \param[in] partitioningFields The partitioning field (name, value) + //! pairs for which to compute the the probability. + //! \param[in] numberAttributeProbabilities The maximum number of + //! attribute probabilities to retrieve. + //! \param[out] result A structure containing the probability, + //! the smallest \p numberAttributeProbabilities attribute + //! probabilities, the influences and any extra descriptive data + virtual bool computeProbability(std::size_t pid, + core_t::TTime startTime, + core_t::TTime endTime, + CPartitioningFields& partitioningFields, + std::size_t numberAttributeProbabilities, + SAnnotatedProbability& result) const; + + //! Clears \p probability and \p attributeProbabilities. + virtual bool computeTotalProbability(const std::string& person, + std::size_t numberAttributeProbabilities, + TOptionalDouble& probability, + TAttributeProbability1Vec& attributeProbabilities) const; + //@} + + //! Get the checksum of this model. + //! + //! \param[in] includeCurrentBucketStats If true then include + //! the current bucket statistics. (This is designed to handle + //! serialization, for which we don't serialize the current + //! bucket statistics.) + virtual uint64_t checksum(bool includeCurrentBucketStats = true) const; + + //! Debug the memory used by this model. + virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + + //! Get the memory used by this model. + virtual std::size_t memoryUsage() const; + + //! Get the static size of this object - used for virtual hierarchies + virtual std::size_t staticSize() const; + + //! Get the non-estimated memory used by this model. + virtual std::size_t computeMemoryUsage() const; + + //! Get a view of the internals of the model for visualization. + virtual CModelDetailsViewPtr details() const; + + //! Get the feature data corresponding to \p feature at \p time. + const TSizeSizePrFeatureDataPrVec& featureData(model_t::EFeature feature, core_t::TTime time) const; + +private: + //! Initialize the feature models. + void initialize(const TFeatureMathsModelPtrPrVec& newFeatureModels, + const TFeatureMultivariatePriorPtrPrVec& newFeatureCorrelateModelPriors, + const TFeatureCorrelationsPtrPrVec& featureCorrelatesModels); + + //! Get the start time of the current bucket. + virtual core_t::TTime currentBucketStartTime() const; + + //! Set the start time of the current bucket. + virtual void currentBucketStartTime(core_t::TTime time); + + //! Set the current bucket total count. + virtual void currentBucketTotalCount(uint64_t totalCount); + + //! Get the total count of the current bucket. + uint64_t currentBucketTotalCount() const; + + //! Get the current bucket person counts. + virtual const TSizeUInt64PrVec& personCounts() const; + + //! Get the interim corrections of the current bucket. + TCorrectionKeyDouble1VecUMap& currentBucketInterimCorrections() const; + + //! Initialize the time series models for "n" newly observed people + //! and "m" attributes. + virtual void createNewModels(std::size_t n, std::size_t m); + + //! Initialize the time series models for recycled attributes and/or people. + virtual void updateRecycledModels(); + + //! Update the correlation models. + virtual void refreshCorrelationModels(std::size_t resourceLimit, CResourceMonitor& resourceMonitor); + + //! Clear out large state objects for people/attributes that are pruned + virtual void clearPrunedResources(const TSizeVec& people, const TSizeVec& attributes); + + //! Skip sampling the interval \p endTime - \p startTime. + virtual void doSkipSampling(core_t::TTime startTime, core_t::TTime endTime); + + //! Get a read only model for \p feature and the attribute identified + //! by \p cid. + const maths::CModel* model(model_t::EFeature feature, std::size_t cid) const; + + //! Get a writable model for \p feature and the attribute identified + //! by \p cid. + maths::CModel* model(model_t::EFeature feature, std::size_t pid); + + //! Check if there are correlates for \p feature and the person and + //! attribute identified by \p pid and \p cid, respectively. + bool correlates(model_t::EFeature feature, std::size_t pid, std::size_t cid, core_t::TTime time) const; + + //! Fill in the probability calculation parameters for \p feature and + //! person and attribute identified by \p pid and \p cid, respectively. + void fill(model_t::EFeature feature, + std::size_t pid, + std::size_t cid, + core_t::TTime bucketTime, + bool interim, + CProbabilityAndInfluenceCalculator::SParams& params) const; - //! Get the model memory usage estimator - virtual CMemoryUsageEstimator *memoryUsageEstimator() const; + //! Get the model memory usage estimator + virtual CMemoryUsageEstimator* memoryUsageEstimator() const; - private: - //! The statistics we maintain about the bucket. - SBucketStats m_CurrentBucketStats; +private: + //! The statistics we maintain about the bucket. + SBucketStats m_CurrentBucketStats; - //! The initial prior for attributes' probabilities. - maths::CMultinomialConjugate m_NewAttributeProbabilityPrior; + //! The initial prior for attributes' probabilities. + maths::CMultinomialConjugate m_NewAttributeProbabilityPrior; - //! The prior for the probabilities of the attributes we are modeling. - maths::CMultinomialConjugate m_AttributeProbabilityPrior; + //! The prior for the probabilities of the attributes we are modeling. + maths::CMultinomialConjugate m_AttributeProbabilityPrior; - //! A cache of the attribute probabilities. - TCategoryProbabilityCache m_AttributeProbabilities; + //! A cache of the attribute probabilities. + TCategoryProbabilityCache m_AttributeProbabilities; - //! The models of all the attribute correlates for each feature. - //! - //! IMPORTANT this must come before m_FeatureModels in the class declaration - //! so its destructor is called afterwards (12.6.2) because feature models - //! unregister themselves from correlation models. - TFeatureCorrelateModelsVec m_FeatureCorrelatesModels; + //! The models of all the attribute correlates for each feature. + //! + //! IMPORTANT this must come before m_FeatureModels in the class declaration + //! so its destructor is called afterwards (12.6.2) because feature models + //! unregister themselves from correlation models. + TFeatureCorrelateModelsVec m_FeatureCorrelatesModels; - //! The population attribute models for each feature. - TFeatureModelsVec m_FeatureModels; + //! The population attribute models for each feature. + TFeatureModelsVec m_FeatureModels; - //! A cache of the probability calculation results. - mutable TProbabilityCache m_Probabilities; + //! A cache of the probability calculation results. + mutable TProbabilityCache m_Probabilities; - //! The memory estimator. - mutable CMemoryUsageEstimator m_MemoryEstimator; + //! The memory estimator. + mutable CMemoryUsageEstimator m_MemoryEstimator; - friend class CEventRatePopulationModelDetailsView; - friend class ::CMockPopulationEventRateModel; + friend class CEventRatePopulationModelDetailsView; + friend class ::CMockPopulationEventRateModel; }; - } } diff --git a/include/model/CEventRatePopulationModelFactory.h b/include/model/CEventRatePopulationModelFactory.h index f366b5901e..d8e50a4cb3 100644 --- a/include/model/CEventRatePopulationModelFactory.h +++ b/include/model/CEventRatePopulationModelFactory.h @@ -10,14 +10,11 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStateRestoreTraverser; } -namespace model -{ +namespace model { //! \brief A factory class implementation for CEventRatePopulationModel. //! @@ -25,168 +22,161 @@ namespace model //! This concrete factory implements the methods to make new models //! and data gatherers, and create default priors suitable for the //! CEventRatePopulationModel class. -class MODEL_EXPORT CEventRatePopulationModelFactory : public CModelFactory -{ - public: - //! Lift all overloads into scope. - using CModelFactory::defaultMultivariatePrior; - using CModelFactory::defaultPrior; - - public: - //! \note The default arguments supplied to the constructor are - //! intended for unit testing and are not necessarily good defaults. - //! The CModelConfig class is responsible for providing sensible - //! default values for the factory for use within our products. - explicit CEventRatePopulationModelFactory(const SModelParams ¶ms, - model_t::ESummaryMode summaryMode = model_t::E_None, - const std::string &summaryCountFieldName = ""); - - //! Create a copy of the factory owned by the calling code. - virtual CEventRatePopulationModelFactory *clone() const; - - //! \name Factory Methods - //@{ - //! Make a new population model for event rates. - //! - //! \param[in] initData The parameters needed to initialize the model. - //! \warning It is owned by the calling code. - virtual CAnomalyDetectorModel *makeModel(const SModelInitializationData &initData) const; - - //! Make a new population event rate model from part of a state - //! document. - //! - //! \param[in] initData Additional parameters needed to initialize - //! the model. - //! \param[in,out] traverser A state document traverser. - //! \warning It is owned by the calling code. - virtual CAnomalyDetectorModel *makeModel(const SModelInitializationData &initData, - core::CStateRestoreTraverser &traverser) const; - - //! Make a new event rate population data gatherer. - //! \param[in] initData The parameters needed to initialize the - //! data gatherer. - //! \warning It is owned by the calling code. - virtual CDataGatherer *makeDataGatherer(const SGathererInitializationData &initData) const; - - //! Make a new population event rate data gatherer from part of - //! a state document. - //! - //! \param[in] partitionFieldValue The partition field value. - //! \param[in,out] traverser A state document traverser. - //! \warning It is owned by the calling code. - virtual CDataGatherer *makeDataGatherer(const std::string &partitionFieldValue, - core::CStateRestoreTraverser &traverser) const; - //@} - - //! \name Defaults - //@{ - //! Get the default prior for \p feature. - //! - //! \param[in] feature The feature for which to get the prior. - //! \param[in] params The model parameters. - virtual TPriorPtr defaultPrior(model_t::EFeature feature, - const SModelParams ¶ms) const; - - //! Get the default multivariate prior for \p feature. - //! - //! \param[in] feature The feature for which to get the prior. - //! \param[in] params The model parameters. - virtual TMultivariatePriorPtr defaultMultivariatePrior(model_t::EFeature feature, - const SModelParams ¶ms) const; - - //! Get the default prior for pairs of correlated time series - //! of \p feature. - //! - //! \param[in] feature The feature for which to get the prior. - //! \param[in] params The model parameters. - virtual TMultivariatePriorPtr defaultCorrelatePrior(model_t::EFeature feature, - const SModelParams ¶ms) const; - //@} - - //! Get the search key corresponding to this factory. - virtual const CSearchKey &searchKey() const; - - //! Returns false. - virtual bool isSimpleCount() const; - - //! Check the pre-summarisation mode for this factory. - virtual model_t::ESummaryMode summaryMode() const; - - //! Get the default data type for models from this factory. - virtual maths_t::EDataType dataType() const; - - //! \name Customization by a specific search - //@{ - //! Set the identifier of the search for which this generates models. - virtual void identifier(int identifier); - - //! Set the name of the field whose values will be counted. - virtual void fieldNames(const std::string &partitionFieldName, - const std::string &overFieldName, - const std::string &byFieldName, - const std::string &valueFieldName, - const TStrVec &influenceFieldNames); - - //! Set whether the models should process missing person and - //! attribute fields. - virtual void useNull(bool useNull); - - //! Set the features which will be modeled. - virtual void features(const TFeatureVec &features); - - //! Set the bucket results delay - virtual void bucketResultsDelay(std::size_t bucketResultsDelay); - //@} - - private: - //! Get the field values which partition the data for modeling. - virtual TStrCRefVec partitioningFields() const; - - private: - //! The identifier of the search for which this generates models. - int m_Identifier; - - //! Indicates whether the data being gathered are already summarized - //! by an external aggregation process. - model_t::ESummaryMode m_SummaryMode; - - //! If m_SummaryMode is E_Manual then this is the name of the field - //! holding the summary count. - std::string m_SummaryCountFieldName; - - //! The name of the field which splits the data. - std::string m_PartitionFieldName; - - //! The name of the field which defines the population which - //! will be analyzed. - std::string m_PersonFieldName; - - //! The name of the field which defines the person attributes - //! which will be analyzed. - std::string m_AttributeFieldName; - - //! The name of the field value of interest for keyed functions - std::string m_ValueFieldName; - - //! The field names for which we are computing influence. These are - //! the fields which can be used to join results across different - //! searches. - TStrVec m_InfluenceFieldNames; - - //! If true the models will process missing person and attribute - //! fields. - bool m_UseNull; - - //! The count features which will be modeled. - TFeatureVec m_Features; - - //! The bucket results delay. - std::size_t m_BucketResultsDelay; - - //! A cached search key. - mutable TOptionalSearchKey m_SearchKeyCache; +class MODEL_EXPORT CEventRatePopulationModelFactory : public CModelFactory { +public: + //! Lift all overloads into scope. + using CModelFactory::defaultMultivariatePrior; + using CModelFactory::defaultPrior; + +public: + //! \note The default arguments supplied to the constructor are + //! intended for unit testing and are not necessarily good defaults. + //! The CModelConfig class is responsible for providing sensible + //! default values for the factory for use within our products. + explicit CEventRatePopulationModelFactory(const SModelParams& params, + model_t::ESummaryMode summaryMode = model_t::E_None, + const std::string& summaryCountFieldName = ""); + + //! Create a copy of the factory owned by the calling code. + virtual CEventRatePopulationModelFactory* clone() const; + + //! \name Factory Methods + //@{ + //! Make a new population model for event rates. + //! + //! \param[in] initData The parameters needed to initialize the model. + //! \warning It is owned by the calling code. + virtual CAnomalyDetectorModel* makeModel(const SModelInitializationData& initData) const; + + //! Make a new population event rate model from part of a state + //! document. + //! + //! \param[in] initData Additional parameters needed to initialize + //! the model. + //! \param[in,out] traverser A state document traverser. + //! \warning It is owned by the calling code. + virtual CAnomalyDetectorModel* makeModel(const SModelInitializationData& initData, core::CStateRestoreTraverser& traverser) const; + + //! Make a new event rate population data gatherer. + //! \param[in] initData The parameters needed to initialize the + //! data gatherer. + //! \warning It is owned by the calling code. + virtual CDataGatherer* makeDataGatherer(const SGathererInitializationData& initData) const; + + //! Make a new population event rate data gatherer from part of + //! a state document. + //! + //! \param[in] partitionFieldValue The partition field value. + //! \param[in,out] traverser A state document traverser. + //! \warning It is owned by the calling code. + virtual CDataGatherer* makeDataGatherer(const std::string& partitionFieldValue, core::CStateRestoreTraverser& traverser) const; + //@} + + //! \name Defaults + //@{ + //! Get the default prior for \p feature. + //! + //! \param[in] feature The feature for which to get the prior. + //! \param[in] params The model parameters. + virtual TPriorPtr defaultPrior(model_t::EFeature feature, const SModelParams& params) const; + + //! Get the default multivariate prior for \p feature. + //! + //! \param[in] feature The feature for which to get the prior. + //! \param[in] params The model parameters. + virtual TMultivariatePriorPtr defaultMultivariatePrior(model_t::EFeature feature, const SModelParams& params) const; + + //! Get the default prior for pairs of correlated time series + //! of \p feature. + //! + //! \param[in] feature The feature for which to get the prior. + //! \param[in] params The model parameters. + virtual TMultivariatePriorPtr defaultCorrelatePrior(model_t::EFeature feature, const SModelParams& params) const; + //@} + + //! Get the search key corresponding to this factory. + virtual const CSearchKey& searchKey() const; + + //! Returns false. + virtual bool isSimpleCount() const; + + //! Check the pre-summarisation mode for this factory. + virtual model_t::ESummaryMode summaryMode() const; + + //! Get the default data type for models from this factory. + virtual maths_t::EDataType dataType() const; + + //! \name Customization by a specific search + //@{ + //! Set the identifier of the search for which this generates models. + virtual void identifier(int identifier); + + //! Set the name of the field whose values will be counted. + virtual void fieldNames(const std::string& partitionFieldName, + const std::string& overFieldName, + const std::string& byFieldName, + const std::string& valueFieldName, + const TStrVec& influenceFieldNames); + + //! Set whether the models should process missing person and + //! attribute fields. + virtual void useNull(bool useNull); + + //! Set the features which will be modeled. + virtual void features(const TFeatureVec& features); + + //! Set the bucket results delay + virtual void bucketResultsDelay(std::size_t bucketResultsDelay); + //@} + +private: + //! Get the field values which partition the data for modeling. + virtual TStrCRefVec partitioningFields() const; + +private: + //! The identifier of the search for which this generates models. + int m_Identifier; + + //! Indicates whether the data being gathered are already summarized + //! by an external aggregation process. + model_t::ESummaryMode m_SummaryMode; + + //! If m_SummaryMode is E_Manual then this is the name of the field + //! holding the summary count. + std::string m_SummaryCountFieldName; + + //! The name of the field which splits the data. + std::string m_PartitionFieldName; + + //! The name of the field which defines the population which + //! will be analyzed. + std::string m_PersonFieldName; + + //! The name of the field which defines the person attributes + //! which will be analyzed. + std::string m_AttributeFieldName; + + //! The name of the field value of interest for keyed functions + std::string m_ValueFieldName; + + //! The field names for which we are computing influence. These are + //! the fields which can be used to join results across different + //! searches. + TStrVec m_InfluenceFieldNames; + + //! If true the models will process missing person and attribute + //! fields. + bool m_UseNull; + + //! The count features which will be modeled. + TFeatureVec m_Features; + + //! The bucket results delay. + std::size_t m_BucketResultsDelay; + + //! A cached search key. + mutable TOptionalSearchKey m_SearchKeyCache; }; - } } diff --git a/include/model/CFeatureData.h b/include/model/CFeatureData.h index 756c574fc6..af0952eeca 100644 --- a/include/model/CFeatureData.h +++ b/include/model/CFeatureData.h @@ -8,8 +8,8 @@ #define INCLUDED_ml_model_CFeatureData_h #include -#include #include +#include #include #include @@ -22,27 +22,23 @@ #include #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { //! \brief Manages the indexing for the feature values in the statistics //! vectors passed from data gatherers to the model classes. -class MODEL_EXPORT CFeatureDataIndexing -{ - public: - using TSizeVec = std::vector; - - public: - //! Get the indices of the actual feature value(s) in the feature - //! data vector. - static const TSizeVec &valueIndices(std::size_t dimension); +class MODEL_EXPORT CFeatureDataIndexing { +public: + using TSizeVec = std::vector; + +public: + //! Get the indices of the actual feature value(s) in the feature + //! data vector. + static const TSizeVec& valueIndices(std::size_t dimension); }; //! \brief The data for an event rate series feature. -struct MODEL_EXPORT SEventRateFeatureData -{ +struct MODEL_EXPORT SEventRateFeatureData { using TDouble1Vec = core::CSmallVector; using TStrCRef = boost::reference_wrapper; using TDouble1VecDoublePr = std::pair; @@ -53,7 +49,7 @@ struct MODEL_EXPORT SEventRateFeatureData SEventRateFeatureData(uint64_t count); //! Efficiently swap the contents of this and \p other. - void swap(SEventRateFeatureData &other); + void swap(SEventRateFeatureData& other); //! Print the data for debug. std::string print() const; @@ -69,8 +65,7 @@ struct MODEL_EXPORT SEventRateFeatureData }; //! \brief The data for a metric series feature. -struct MODEL_EXPORT SMetricFeatureData -{ +struct MODEL_EXPORT SMetricFeatureData { using TDouble1Vec = core::CSmallVector; using TOptionalSample = boost::optional; using TSampleVec = std::vector; @@ -81,17 +76,15 @@ struct MODEL_EXPORT SMetricFeatureData using TStrCRefDouble1VecDoublePrPrVecVec = std::vector; SMetricFeatureData(core_t::TTime bucketTime, - const TDouble1Vec &bucketValue, + const TDouble1Vec& bucketValue, double bucketVarianceScale, double bucketCount, - TStrCRefDouble1VecDoublePrPrVecVec &influenceValues, + TStrCRefDouble1VecDoublePrPrVecVec& influenceValues, bool isInteger, bool isNonNegative, - const TSampleVec &samples); + const TSampleVec& samples); - SMetricFeatureData(bool isInteger, - bool isNonNegative, - const TSampleVec &samples); + SMetricFeatureData(bool isInteger, bool isNonNegative, const TSampleVec& samples); //! Print the data for debug. std::string print() const; @@ -113,7 +106,6 @@ struct MODEL_EXPORT SMetricFeatureData //! The samples. TSampleVec s_Samples; }; - } } diff --git a/include/model/CForecastDataSink.h b/include/model/CForecastDataSink.h index dac41387f4..4eda225718 100644 --- a/include/model/CForecastDataSink.h +++ b/include/model/CForecastDataSink.h @@ -8,9 +8,9 @@ #include #include -#include #include #include +#include #include @@ -22,177 +22,168 @@ #include -#include #include +#include #include #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { //! \brief //! Sink for data created from forecasting //! //! NOTE: Except for push, this is a stub implementation and going //! to change (e.g. the json writing should not happen in this class). -class MODEL_EXPORT CForecastDataSink final : private core::CNonCopyable -{ - public: - using TMathsModelPtr = std::unique_ptr; - using TStrUMap = boost::unordered_set; - - //! Wrapper for 1 timeseries model, its feature and by Field - struct MODEL_EXPORT SForecastModelWrapper - { - SForecastModelWrapper(model_t::EFeature feature, - TMathsModelPtr &&forecastModel, - const std::string &byFieldValue); - - SForecastModelWrapper(SForecastModelWrapper &&other); - - SForecastModelWrapper(const SForecastModelWrapper &that) = delete; - SForecastModelWrapper & operator=(const SForecastModelWrapper &) = delete; - - model_t::EFeature s_Feature; - TMathsModelPtr s_ForecastModel; - std::string s_ByFieldValue; - }; - - //! Everything that defines 1 series of forecasts - struct MODEL_EXPORT SForecastResultSeries - { - SForecastResultSeries(); - - SForecastResultSeries(SForecastResultSeries &&other); - - SForecastResultSeries(const SForecastResultSeries &that) = delete; - SForecastResultSeries & operator=(const SForecastResultSeries &) = delete; - - int s_DetectorIndex; - std::vector s_ToForecast; - std::string s_PartitionFieldName; - std::string s_PartitionFieldValue; - std::string s_ByFieldName; - }; - - //! \brief Data describing prerequisites prior predictions - struct MODEL_EXPORT SForecastModelPrerequisites - { - std::size_t s_NumberOfModels; - std::size_t s_NumberOfForecastableModels; - std::size_t s_MemoryUsageForDetector; - bool s_IsPopulation; - bool s_IsSupportedFunction; - }; - - private: - static const std::string JOB_ID; - static const std::string FORECAST_ID; - static const std::string FORECAST_ALIAS; - static const std::string DETECTOR_INDEX; - static const std::string MODEL_FORECAST; - static const std::string MODEL_FORECAST_STATS; - static const std::string PARTITION_FIELD_NAME; - static const std::string PARTITION_FIELD_VALUE; - static const std::string FEATURE; - static const std::string BY_FIELD_NAME; - static const std::string BY_FIELD_VALUE; - static const std::string LOWER; - static const std::string UPPER; - static const std::string PREDICTION; - static const std::string BUCKET_SPAN; - static const std::string PROCESSED_RECORD_COUNT; - static const std::string CREATE_TIME; - static const std::string TIMESTAMP; - static const std::string START_TIME; - static const std::string END_TIME; - static const std::string EXPIRY_TIME; - static const std::string MEMORY_USAGE; - static const std::string MESSAGES; - static const std::string PROCESSING_TIME_MS; - static const std::string PROGRESS; - static const std::string STATUS; - - public: - //! Create a DataSink instance - CForecastDataSink(const std::string &jobId, - const std::string &forecastId, - const std::string &forecastAlias, - core_t::TTime createTime, - core_t::TTime startTime, - core_t::TTime endTime, - core_t::TTime expiryTime, - size_t memoryUsage, - core::CJsonOutputStreamWrapper &outStream); - - //! Push a forecast datapoint - //! Note: No forecasting for models with over field, therefore no over field - void push(const maths::SErrorBar errorBar, - const std::string &feature, - const std::string &partitionFieldName, - const std::string &partitionFieldValue, - const std::string &byFieldName, - const std::string &byFieldValue, - int detectorIndex); - - //! Write Statistics about the forecast, also marks the ending - void writeStats(const double progress, uint64_t runtime, const TStrUMap &messages, bool successful = true); - - //! Write a scheduled message to signal that validation was successful - void writeScheduledMessage(); - - //! Write an error message to signal a problem with forecasting - void writeErrorMessage(const std::string &message); - - //! Write a message to signal that forecasting is complete - //! - //! This is used when exiting early but not as a result of an error - void writeFinalMessage(const std::string &message); - - //! get the number of forecast records written - uint64_t numRecordsWritten() const; - - private: - void writeCommonStatsFields(rapidjson::Value &doc); - void push(bool flush, rapidjson::Value &doc); - - private: - //! The job ID - std::string m_JobId; - - //! The forecast ID - std::string m_ForecastId; - - //! The forecast alias - std::string m_ForecastAlias; - - //! JSON line writer - core::CRapidJsonConcurrentLineWriter m_Writer; - - //! count of how many records written - uint64_t m_NumRecordsWritten; - - //! Forecast create time - core_t::TTime m_CreateTime; - - //! Forecast start time - core_t::TTime m_StartTime; - - //! Forecast end time - core_t::TTime m_EndTime; - - //! Forecast expiry time - core_t::TTime m_ExpiryTime; - - //! Forecast memory usage for models - size_t m_MemoryUsage; +class MODEL_EXPORT CForecastDataSink final : private core::CNonCopyable { +public: + using TMathsModelPtr = std::unique_ptr; + using TStrUMap = boost::unordered_set; + + //! Wrapper for 1 timeseries model, its feature and by Field + struct MODEL_EXPORT SForecastModelWrapper { + SForecastModelWrapper(model_t::EFeature feature, TMathsModelPtr&& forecastModel, const std::string& byFieldValue); + + SForecastModelWrapper(SForecastModelWrapper&& other); + + SForecastModelWrapper(const SForecastModelWrapper& that) = delete; + SForecastModelWrapper& operator=(const SForecastModelWrapper&) = delete; + + model_t::EFeature s_Feature; + TMathsModelPtr s_ForecastModel; + std::string s_ByFieldValue; + }; + + //! Everything that defines 1 series of forecasts + struct MODEL_EXPORT SForecastResultSeries { + SForecastResultSeries(); + + SForecastResultSeries(SForecastResultSeries&& other); + + SForecastResultSeries(const SForecastResultSeries& that) = delete; + SForecastResultSeries& operator=(const SForecastResultSeries&) = delete; + + int s_DetectorIndex; + std::vector s_ToForecast; + std::string s_PartitionFieldName; + std::string s_PartitionFieldValue; + std::string s_ByFieldName; + }; + + //! \brief Data describing prerequisites prior predictions + struct MODEL_EXPORT SForecastModelPrerequisites { + std::size_t s_NumberOfModels; + std::size_t s_NumberOfForecastableModels; + std::size_t s_MemoryUsageForDetector; + bool s_IsPopulation; + bool s_IsSupportedFunction; + }; + +private: + static const std::string JOB_ID; + static const std::string FORECAST_ID; + static const std::string FORECAST_ALIAS; + static const std::string DETECTOR_INDEX; + static const std::string MODEL_FORECAST; + static const std::string MODEL_FORECAST_STATS; + static const std::string PARTITION_FIELD_NAME; + static const std::string PARTITION_FIELD_VALUE; + static const std::string FEATURE; + static const std::string BY_FIELD_NAME; + static const std::string BY_FIELD_VALUE; + static const std::string LOWER; + static const std::string UPPER; + static const std::string PREDICTION; + static const std::string BUCKET_SPAN; + static const std::string PROCESSED_RECORD_COUNT; + static const std::string CREATE_TIME; + static const std::string TIMESTAMP; + static const std::string START_TIME; + static const std::string END_TIME; + static const std::string EXPIRY_TIME; + static const std::string MEMORY_USAGE; + static const std::string MESSAGES; + static const std::string PROCESSING_TIME_MS; + static const std::string PROGRESS; + static const std::string STATUS; + +public: + //! Create a DataSink instance + CForecastDataSink(const std::string& jobId, + const std::string& forecastId, + const std::string& forecastAlias, + core_t::TTime createTime, + core_t::TTime startTime, + core_t::TTime endTime, + core_t::TTime expiryTime, + size_t memoryUsage, + core::CJsonOutputStreamWrapper& outStream); + + //! Push a forecast datapoint + //! Note: No forecasting for models with over field, therefore no over field + void push(const maths::SErrorBar errorBar, + const std::string& feature, + const std::string& partitionFieldName, + const std::string& partitionFieldValue, + const std::string& byFieldName, + const std::string& byFieldValue, + int detectorIndex); + + //! Write Statistics about the forecast, also marks the ending + void writeStats(const double progress, uint64_t runtime, const TStrUMap& messages, bool successful = true); + + //! Write a scheduled message to signal that validation was successful + void writeScheduledMessage(); + + //! Write an error message to signal a problem with forecasting + void writeErrorMessage(const std::string& message); + + //! Write a message to signal that forecasting is complete + //! + //! This is used when exiting early but not as a result of an error + void writeFinalMessage(const std::string& message); + + //! get the number of forecast records written + uint64_t numRecordsWritten() const; + +private: + void writeCommonStatsFields(rapidjson::Value& doc); + void push(bool flush, rapidjson::Value& doc); + +private: + //! The job ID + std::string m_JobId; + + //! The forecast ID + std::string m_ForecastId; + + //! The forecast alias + std::string m_ForecastAlias; + + //! JSON line writer + core::CRapidJsonConcurrentLineWriter m_Writer; + + //! count of how many records written + uint64_t m_NumRecordsWritten; + + //! Forecast create time + core_t::TTime m_CreateTime; + + //! Forecast start time + core_t::TTime m_StartTime; + + //! Forecast end time + core_t::TTime m_EndTime; + + //! Forecast expiry time + core_t::TTime m_ExpiryTime; + + //! Forecast memory usage for models + size_t m_MemoryUsage; }; } /* namespace model */ } /* namespace ml */ - #endif /* INCLUDED_ml_model_CForecastDataSink_h */ diff --git a/include/model/CGathererTools.h b/include/model/CGathererTools.h index 75312e7219..350de3e170 100644 --- a/include/model/CGathererTools.h +++ b/include/model/CGathererTools.h @@ -8,9 +8,9 @@ #define INCLUDED_ml_model_CGathererTools_h #include -#include #include #include +#include #include #include @@ -31,16 +31,12 @@ #include - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace model -{ +namespace model { //! \brief A collection of utility functionality for the CDataGatherer //! and CBucketGatherer hierarchies. @@ -56,284 +52,259 @@ namespace model //! all member functions should be static and it should be state-less. //! If your functionality doesn't fit this pattern just make it a nested //! class. -class MODEL_EXPORT CGathererTools -{ +class MODEL_EXPORT CGathererTools { +public: + using TDoubleVec = std::vector; + using TOptionalDouble = boost::optional; + using TSampleVec = std::vector; + using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; + using TMedianAccumulator = maths::CFixedQuantileSketch; + using TMinAccumulator = maths::CBasicStatistics::COrderStatisticsStack; + using TMaxAccumulator = maths::CBasicStatistics::COrderStatisticsStack>; + using TVarianceAccumulator = maths::CBasicStatistics::SSampleMeanVar::TAccumulator; + using TMultivariateMeanAccumulator = CMetricMultivariateStatistic; + using TMultivariateMinAccumulator = CMetricMultivariateStatistic; + using TMultivariateMaxAccumulator = CMetricMultivariateStatistic; + + //! \brief Mean arrival time gatherer. + //! + //! DESCRIPTION:\n + //! Wraps up the functionality to sample the mean time between + //! measurements. + class MODEL_EXPORT CArrivalTimeGatherer { public: - using TDoubleVec = std::vector; - using TOptionalDouble = boost::optional; - using TSampleVec = std::vector; - using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; - using TMedianAccumulator = maths::CFixedQuantileSketch; - using TMinAccumulator = maths::CBasicStatistics::COrderStatisticsStack; - using TMaxAccumulator = maths::CBasicStatistics::COrderStatisticsStack >; - using TVarianceAccumulator = maths::CBasicStatistics::SSampleMeanVar::TAccumulator; - using TMultivariateMeanAccumulator = CMetricMultivariateStatistic; - using TMultivariateMinAccumulator = CMetricMultivariateStatistic; - using TMultivariateMaxAccumulator = CMetricMultivariateStatistic; - - //! \brief Mean arrival time gatherer. - //! - //! DESCRIPTION:\n - //! Wraps up the functionality to sample the mean time between - //! measurements. - class MODEL_EXPORT CArrivalTimeGatherer - { - public: - using TAccumulator = TMeanAccumulator; - - public: - //! The earliest possible time. - static const core_t::TTime FIRST_TIME; - - public: - CArrivalTimeGatherer(); - - //! Get the mean arrival time in this bucketing interval. - TOptionalDouble featureData() const; - - //! Update the state with a new measurement. - //! - //! \param[in] time The time of the measurement. - inline void add(core_t::TTime time) - { - this->add(time, 1); - } - - //! Update the state with a measurement count. - //! - //! \param[in] time The end time of the \p count messages. - //! \param[in] count The count of measurements. - inline void add(core_t::TTime time, unsigned int count) - { - if (m_LastTime == FIRST_TIME) - { - m_LastTime = time; - } - else - { - m_Value.add( static_cast(time - m_LastTime) - / static_cast(count)); - m_LastTime = time; - } - } - - //! Update the state to represent the start of a new bucket. - void startNewBucket(); - - //! \name Persistence - //@{ - //! Persist state by passing information to the supplied inserter - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; + using TAccumulator = TMeanAccumulator; - //! Create from part of an XML document. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); - //@} - - //! Get the checksum of this gatherer. - uint64_t checksum() const; - - //! Print this gatherer for debug. - std::string print() const; - - private: - //! The last time a message was added. - core_t::TTime m_LastTime; - - //! The mean time between messages received in the current - //! bucketing interval. - TAccumulator m_Value; - }; + public: + //! The earliest possible time. + static const core_t::TTime FIRST_TIME; - //! \brief Mean statistic gatherer. - //! - //! DESCRIPTION:\n - //! Wraps up the functionality to sample the arithmetic mean of - //! a fixed number of measurements, which are supplied to the add - //! function. - //! - //! This also computes the mean of all measurements in the current - //! bucketing interval. - using TMeanGatherer = CSampleGatherer; + public: + CArrivalTimeGatherer(); - //! \brief Multivariate mean statistic gatherer. - //! - //! See TMeanGatherer for details. - using TMultivariateMeanGatherer = CSampleGatherer; + //! Get the mean arrival time in this bucketing interval. + TOptionalDouble featureData() const; - //! \brief Median statistic gatherer. + //! Update the state with a new measurement. //! - //! DESCRIPTION:\n - //! Wraps up the functionality to sample the median of a fixed number - //! of measurements, which are supplied to the add function. - using TMedianGatherer = CSampleGatherer; - - // TODO Add multivariate median. + //! \param[in] time The time of the measurement. + inline void add(core_t::TTime time) { this->add(time, 1); } - //! \brief Minimum statistic gatherer. + //! Update the state with a measurement count. //! - //! DESCRIPTION:\n - //! Wraps up the functionality to sample the minimum of a fixed number - //! of measurements, which are supplied to the add function. - //! - //! This also computes the minimum of all measurements in the current + //! \param[in] time The end time of the \p count messages. + //! \param[in] count The count of measurements. + inline void add(core_t::TTime time, unsigned int count) { + if (m_LastTime == FIRST_TIME) { + m_LastTime = time; + } else { + m_Value.add(static_cast(time - m_LastTime) / static_cast(count)); + m_LastTime = time; + } + } + + //! Update the state to represent the start of a new bucket. + void startNewBucket(); + + //! \name Persistence + //@{ + //! Persist state by passing information to the supplied inserter + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Create from part of an XML document. + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); + //@} + + //! Get the checksum of this gatherer. + uint64_t checksum() const; + + //! Print this gatherer for debug. + std::string print() const; + + private: + //! The last time a message was added. + core_t::TTime m_LastTime; + + //! The mean time between messages received in the current //! bucketing interval. - using TMinGatherer = CSampleGatherer; - - //! \brief Multivariate minimum statistic gatherer. - //! - //! See TMinGatherer for details. - using TMultivariateMinGatherer = CSampleGatherer; + TAccumulator m_Value; + }; + + //! \brief Mean statistic gatherer. + //! + //! DESCRIPTION:\n + //! Wraps up the functionality to sample the arithmetic mean of + //! a fixed number of measurements, which are supplied to the add + //! function. + //! + //! This also computes the mean of all measurements in the current + //! bucketing interval. + using TMeanGatherer = CSampleGatherer; + + //! \brief Multivariate mean statistic gatherer. + //! + //! See TMeanGatherer for details. + using TMultivariateMeanGatherer = CSampleGatherer; + + //! \brief Median statistic gatherer. + //! + //! DESCRIPTION:\n + //! Wraps up the functionality to sample the median of a fixed number + //! of measurements, which are supplied to the add function. + using TMedianGatherer = CSampleGatherer; + + // TODO Add multivariate median. + + //! \brief Minimum statistic gatherer. + //! + //! DESCRIPTION:\n + //! Wraps up the functionality to sample the minimum of a fixed number + //! of measurements, which are supplied to the add function. + //! + //! This also computes the minimum of all measurements in the current + //! bucketing interval. + using TMinGatherer = CSampleGatherer; + + //! \brief Multivariate minimum statistic gatherer. + //! + //! See TMinGatherer for details. + using TMultivariateMinGatherer = CSampleGatherer; + + //! \brief Maximum statistic gatherer. + //! + //! DESCRIPTION:\n + //! Wraps up the functionality to sample the maximum of a fixed number + //! of measurements, which are supplied to the add function. + //! + //! This also computes the maximum of all measurements in the current + //! bucketing interval. + using TMaxGatherer = CSampleGatherer; + + //! \brief Multivariate maximum statistic gatherer. + //! + //! See TMaxGatherer for details. + using TMultivariateMaxGatherer = CSampleGatherer; + + //! \brief Variance statistic gatherer. + //! + //! DESCRIPTION:\n + //! Wraps up the functionality to sample the variance of a fixed number + //! of measurements, which are supplied to the add function. + //! + //! This also computes the variance of all measurements in the current + //! bucketing interval. + using TVarianceGatherer = CSampleGatherer; + + // TODO Add multivariate variance. + + //! \brief Bucket sum gatherer. + //! + //! DESCRIPTION:\n + //! Wraps up the functionality to sample the sum of a metric quantity + //! in a bucketing interval. + class MODEL_EXPORT CSumGatherer { + public: + using TDouble1Vec = core::CSmallVector; + using TStrVec = std::vector; + using TStrVecCItr = TStrVec::const_iterator; + using TOptionalStr = boost::optional; + using TOptionalStrVec = std::vector; + using TSampleVecQueue = CBucketQueue; + using TSampleVecQueueItr = TSampleVecQueue::iterator; + using TSampleVecQueueCItr = TSampleVecQueue::const_iterator; + using TStoredStringPtrDoubleUMap = boost::unordered_map; + using TStoredStringPtrDoubleUMapCItr = TStoredStringPtrDoubleUMap::const_iterator; + using TStoredStringPtrDoubleUMapQueue = CBucketQueue; + using TStoredStringPtrDoubleUMapQueueCRItr = TStoredStringPtrDoubleUMapQueue::const_reverse_iterator; + using TStoredStringPtrDoubleUMapQueueVec = std::vector; + using TStoredStringPtrVec = std::vector; - //! \brief Maximum statistic gatherer. - //! - //! DESCRIPTION:\n - //! Wraps up the functionality to sample the maximum of a fixed number - //! of measurements, which are supplied to the add function. - //! - //! This also computes the maximum of all measurements in the current - //! bucketing interval. - using TMaxGatherer = CSampleGatherer; + public: + CSumGatherer(const SModelParams& params, + std::size_t dimension, + core_t::TTime startTime, + core_t::TTime bucketLength, + TStrVecCItr beginInfluencers, + TStrVecCItr endInfluencers); - //! \brief Multivariate maximum statistic gatherer. - //! - //! See TMaxGatherer for details. - using TMultivariateMaxGatherer = CSampleGatherer; + //! Get the dimension of the underlying statistic. + std::size_t dimension() const; - //! \brief Variance statistic gatherer. - //! - //! DESCRIPTION:\n - //! Wraps up the functionality to sample the variance of a fixed number - //! of measurements, which are supplied to the add function. - //! - //! This also computes the variance of all measurements in the current - //! bucketing interval. - using TVarianceGatherer = CSampleGatherer; + //! Get the feature data for the current bucketing interval. + SMetricFeatureData featureData(core_t::TTime time, core_t::TTime bucketLength, const TSampleVec& emptySample) const; - // TODO Add multivariate variance. + //! Returns false. + bool sample(core_t::TTime time, unsigned int sampleCount); - //! \brief Bucket sum gatherer. + //! Update the state with a new measurement. //! - //! DESCRIPTION:\n - //! Wraps up the functionality to sample the sum of a metric quantity - //! in a bucketing interval. - class MODEL_EXPORT CSumGatherer - { - public: - using TDouble1Vec = core::CSmallVector; - using TStrVec = std::vector; - using TStrVecCItr = TStrVec::const_iterator; - using TOptionalStr = boost::optional; - using TOptionalStrVec = std::vector; - using TSampleVecQueue = CBucketQueue; - using TSampleVecQueueItr = TSampleVecQueue::iterator; - using TSampleVecQueueCItr = TSampleVecQueue::const_iterator; - using TStoredStringPtrDoubleUMap = boost::unordered_map; - using TStoredStringPtrDoubleUMapCItr = TStoredStringPtrDoubleUMap::const_iterator; - using TStoredStringPtrDoubleUMapQueue = CBucketQueue; - using TStoredStringPtrDoubleUMapQueueCRItr = TStoredStringPtrDoubleUMapQueue::const_reverse_iterator; - using TStoredStringPtrDoubleUMapQueueVec = std::vector; - using TStoredStringPtrVec = std::vector; - - public: - CSumGatherer(const SModelParams ¶ms, - std::size_t dimension, - core_t::TTime startTime, - core_t::TTime bucketLength, - TStrVecCItr beginInfluencers, - TStrVecCItr endInfluencers); - - //! Get the dimension of the underlying statistic. - std::size_t dimension() const; - - //! Get the feature data for the current bucketing interval. - SMetricFeatureData featureData(core_t::TTime time, core_t::TTime bucketLength, - const TSampleVec &emptySample) const; - - //! Returns false. - bool sample(core_t::TTime time, unsigned int sampleCount); - - //! Update the state with a new measurement. - //! - //! \param[in] time The time of \p value. - //! \param[in] value The measurement value. - //! \param[in] influences The influencing field values which - //! label \p value. - void add(core_t::TTime time, - const TDouble1Vec &value, - unsigned int count, - unsigned int /*sampleCount*/, - const TStoredStringPtrVec &influences) - { - TSampleVec &sum = m_BucketSums.get(time); - if (sum.empty()) - { - core_t::TTime bucketLength = m_BucketSums.bucketLength(); - sum.push_back(CSample(maths::CIntegerTools::floor(time, bucketLength), - TDoubleVec(1, 0.0), 1.0, 0.0)); - } - (sum[0].value())[0] += value[0]; - sum[0].count() += static_cast(count); - for (std::size_t i = 0u; i < influences.size(); ++i) - { - if (!influences[i]) - { - continue; - } - TStoredStringPtrDoubleUMap &sums = m_InfluencerBucketSums[i].get(time); - sums[influences[i]] += value[0]; - } + //! \param[in] time The time of \p value. + //! \param[in] value The measurement value. + //! \param[in] influences The influencing field values which + //! label \p value. + void add(core_t::TTime time, + const TDouble1Vec& value, + unsigned int count, + unsigned int /*sampleCount*/, + const TStoredStringPtrVec& influences) { + TSampleVec& sum = m_BucketSums.get(time); + if (sum.empty()) { + core_t::TTime bucketLength = m_BucketSums.bucketLength(); + sum.push_back(CSample(maths::CIntegerTools::floor(time, bucketLength), TDoubleVec(1, 0.0), 1.0, 0.0)); + } + (sum[0].value())[0] += value[0]; + sum[0].count() += static_cast(count); + for (std::size_t i = 0u; i < influences.size(); ++i) { + if (!influences[i]) { + continue; } + TStoredStringPtrDoubleUMap& sums = m_InfluencerBucketSums[i].get(time); + sums[influences[i]] += value[0]; + } + } - //! Update the state to represent the start of a new bucket. - void startNewBucket(core_t::TTime time); + //! Update the state to represent the start of a new bucket. + void startNewBucket(core_t::TTime time); - //! Reset bucket. - void resetBucket(core_t::TTime bucketStart); + //! Reset bucket. + void resetBucket(core_t::TTime bucketStart); - //! \name Persistence - //@{ - //! Persist state by passing information to the supplied inserter - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; + //! \name Persistence + //@{ + //! Persist state by passing information to the supplied inserter + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; - //! Create from part of a state document. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); - //@} + //! Create from part of a state document. + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); + //@} - //! Get the checksum of this gatherer. - uint64_t checksum() const; + //! Get the checksum of this gatherer. + uint64_t checksum() const; - //! Debug the memory used by this gatherer. - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + //! Debug the memory used by this gatherer. + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - //! Get the memory used by this gatherer. - std::size_t memoryUsage() const; + //! Get the memory used by this gatherer. + std::size_t memoryUsage() const; - //! Print this gatherer for debug. - std::string print() const; + //! Print this gatherer for debug. + std::string print() const; - //! Is the gatherer holding redundant data? - bool isRedundant(core_t::TTime samplingCutoffTime) const; - private: - //! Classifies the sum series. - CDataClassifier m_Classifier; + //! Is the gatherer holding redundant data? + bool isRedundant(core_t::TTime samplingCutoffTime) const; - //! The sum for each bucket within the latency window. - TSampleVecQueue m_BucketSums; + private: + //! Classifies the sum series. + CDataClassifier m_Classifier; - //! The sum for each influencing field value and bucket within - //! the latency window. - TStoredStringPtrDoubleUMapQueueVec m_InfluencerBucketSums; - }; -}; + //! The sum for each bucket within the latency window. + TSampleVecQueue m_BucketSums; + //! The sum for each influencing field value and bucket within + //! the latency window. + TStoredStringPtrDoubleUMapQueueVec m_InfluencerBucketSums; + }; +}; } } diff --git a/include/model/CHierarchicalResults.h b/include/model/CHierarchicalResults.h index a5bf3d8e36..2b0d131c86 100644 --- a/include/model/CHierarchicalResults.h +++ b/include/model/CHierarchicalResults.h @@ -28,20 +28,16 @@ class CHierarchicalResultsTest; -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace model -{ +namespace model { class CAnomalyDetectorModel; class CLimits; -namespace hierarchical_results_detail -{ +namespace hierarchical_results_detail { using TStoredStringPtrVec = std::vector; using TStoredStringPtrStoredStringPtrPr = std::pair; @@ -70,15 +66,14 @@ using TStr1Vec = core::CSmallVector; //! -# autodetect sum(bytes) over host //! -# autodetect rare by uri_path over clientip //! -# and so on. -struct MODEL_EXPORT SResultSpec -{ +struct MODEL_EXPORT SResultSpec { SResultSpec(); //! Persist the result specification by passing information to \p inserter. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; //! Restore the result specification reading state from \p traverser. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); //! Print of the specification for debugging. std::string print() const; @@ -134,8 +129,7 @@ struct MODEL_EXPORT SResultSpec //! aggregation process. //! //! \see buildHierarchicalResults for more details. -struct MODEL_EXPORT SNode -{ +struct MODEL_EXPORT SNode { using TAttributeProbabilityVec = std::vector; using TNodeCPtr = const SNode*; using TNodeCPtrVec = std::vector; @@ -143,7 +137,7 @@ struct MODEL_EXPORT SNode using TSizeNodePtrUMap = boost::unordered_map; SNode(); - SNode(const SResultSpec &simpleSearch, SAnnotatedProbability &annotatedProbability); + SNode(const SResultSpec& simpleSearch, SAnnotatedProbability& annotatedProbability); //! Returns the aggregate probability for the node double probability() const; @@ -155,21 +149,17 @@ struct MODEL_EXPORT SNode std::string print() const; //! Efficient swap - void swap(SNode &other); + void swap(SNode& other); //! Persist the node state by passing information to \p inserter. - void acceptPersistInserter1(core::CStatePersistInserter &inserter, - TNodePtrSizeUMap &nodePointers) const; + void acceptPersistInserter1(core::CStatePersistInserter& inserter, TNodePtrSizeUMap& nodePointers) const; //! Persist the node connectivity by passing information to \p inserter. - void acceptPersistInserter2(core::CStatePersistInserter &inserter, - const TNodePtrSizeUMap &nodePointers) const; + void acceptPersistInserter2(core::CStatePersistInserter& inserter, const TNodePtrSizeUMap& nodePointers) const; //! Restore the node state reading state from \p traverser. - bool acceptRestoreTraverser1(core::CStateRestoreTraverser &traverser, - TSizeNodePtrUMap &nodePointers); + bool acceptRestoreTraverser1(core::CStateRestoreTraverser& traverser, TSizeNodePtrUMap& nodePointers); //! Restore the node connectivity reading state from \p traverser. - bool acceptRestoreTraverser2(core::CStateRestoreTraverser &traverser, - const TSizeNodePtrUMap &nodePointers); + bool acceptRestoreTraverser2(core::CStateRestoreTraverser& traverser, const TSizeNodePtrUMap& nodePointers); //! \name Connectivity //@{ @@ -208,19 +198,19 @@ struct MODEL_EXPORT SNode //! \name Extra State for Results Output //@{ //! The model which generated the result. - const CAnomalyDetectorModel *s_Model; + const CAnomalyDetectorModel* s_Model; //! The start time of the bucket generating the anomaly. core_t::TTime s_BucketStartTime; - //! The length of the bucket for this result. + //! The length of the bucket for this result. core_t::TTime s_BucketLength; //@} }; //! Non-member node swap to work with standard algorithms MODEL_EXPORT -void swap(SNode &node1, SNode &node2); +void swap(SNode& node1, SNode& node2); } // hierarchical_results_detail:: @@ -252,231 +242,222 @@ class CHierarchicalResultsVisitor; //! invalid if it is kept longer than to output a single result. This is //! to minimize the amount of state that needs to be copied when outputting //! results (to minimize both runtime and transient memory usage). -class MODEL_EXPORT CHierarchicalResults -{ - public: - using TDoubleVec = std::vector; - using TAttributeProbabilityVec = std::vector; - using TResultSpec = hierarchical_results_detail::SResultSpec; - using TStoredStringPtr = core::CStoredStringPtr; - using TStoredStringPtrStoredStringPtrPr = hierarchical_results_detail::TStoredStringPtrStoredStringPtrPr; - using TStoredStringPtrStoredStringPtrPrDoublePr = hierarchical_results_detail::TStoredStringPtrStoredStringPtrPrDoublePr; - using TStoredStringPtrStoredStringPtrPrDoublePrVec = hierarchical_results_detail::TStoredStringPtrStoredStringPtrPrDoublePrVec; - using TNode = hierarchical_results_detail::SNode; - using TNodePtrSizeUMap = hierarchical_results_detail::SNode::TNodePtrSizeUMap; - using TSizeNodePtrUMap = hierarchical_results_detail::SNode::TSizeNodePtrUMap; - using TNodeDeque = std::deque; - using TStoredStringPtrStoredStringPtrPrNodeMap = - std::map; - using TStoredStringPtrNodeMap = std::map; - - public: - CHierarchicalResults(); - - //! Add a dummy result for a simple count detector. - void addSimpleCountResult(SAnnotatedProbability &annotatedProbability, - const CAnomalyDetectorModel *model = 0, - core_t::TTime bucketStartTime = 0); - - //! Add a simple search result. - //! - //! The general syntax for a simple search is - //!
-        //!   [partitionfield = w] function[(x)] [by y] [over z]
-        //! 
- //! - //! Examples include: - //! -# count - //! -# rare by x - //! -# partitionfield = x mean(y) - //! -# min(x) over z - //! -# partitionfield = x dc(y) over z - //! -# partitionfield = w max(x) by y over z - //! - //! If a given search doesn't have a field pass the empty string. - //! - //! \param[in] detector An identifier of the detector generating this - //! result. - //! \param[in] isPopulation True if this is a population result and - //! false otherwise. - //! \param[in] functionName The name of the function of the model's search. - //! \param[in] function The function of the model's search. - //! \param[in] partitionFieldName The partition field name or empty. - //! \param[in] partitionFieldValue The partition field value or empty. - //! \param[in] personFieldName The over field name or empty. - //! \param[in] personFieldValue The over field value or empty. - //! \param[in] valueFieldName The name of the field containing the - //! metric value. - //! \param[out] annotatedProbability A struct containing the probability, - //! the smallest attribute probabilities, the influencers, - //! and any extra descriptive data - //! \param[in] model The model which generated the result. - //! \note Values which are passed by non-constant reference are swapped - //! in to place. - void addModelResult(int detector, - bool isPopulation, - const std::string &functionName, - function_t::EFunction function, - const std::string &partitionFieldName, - const std::string &partitionFieldValue, - const std::string &personFieldName, - const std::string &personFieldValue, - const std::string &valueFieldName, - SAnnotatedProbability &annotatedProbability, - const CAnomalyDetectorModel *model = 0, - core_t::TTime bucketStartTime = 0); - - //! Add the influencer called \p name. - void addInfluencer(const std::string &name); - - //! Build a hierarchy from the current flat node list using the - //! default aggregation rules. - //! - //! The aggregation rules in priority order are: - //! -# Only aggregate searches with the same partition field name - //! and value. - //! -# Subject to 1, aggregate searches with the same person field - //! name and value: this is the by field name and value if no - //! over field is specified otherwise it is the over field name - //! name and value. - void buildHierarchy(); - - //! Creates the pivot nodes for influencing field values. - void createPivots(); - - //! Get the root node of the hierarchy. - const TNode *root() const; - - //! Get the influencer identified by \p influencerName and - //! \p influencerValue if one exists. - const TNode *influencer(const TStoredStringPtr &influencerName, - const TStoredStringPtr &influencerValue) const; - - //! Bottom up first visit the tree. - void bottomUpBreadthFirst(CHierarchicalResultsVisitor &visitor) const; - - //! Top down first visit the tree. - void topDownBreadthFirst(CHierarchicalResultsVisitor &visitor) const; - - //! Post-order depth first visit the tree. - void postorderDepthFirst(CHierarchicalResultsVisitor &visitor) const; - - //! Visit all the pivot nodes bottom up first. - void pivotsBottomUpBreadthFirst(CHierarchicalResultsVisitor &visitor) const; - - //! Visit all the pivot nodes top down first. - void pivotsTopDownBreadthFirst(CHierarchicalResultsVisitor &visitor) const; - - //! Check if there are no results at all including the simple - //! count result. - bool empty() const; - - //! Get the count of leaf (search) results, i.e. excluding the - //! simple count result. - std::size_t resultCount() const; - - //! Sets the result to be interm - void setInterim(); - - //! Get type of result - model_t::CResultType resultType() const; - - //! Persist the results by passing information to \p inserter. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - - //! Restore the results reading state from \p traverser. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); - - //! Print the results for debug. - std::string print() const; - - private: - //! Create a new node. - TNode &newNode(); - - //! Create a new leaf node for the simple search \p simpleSearch. - TNode &newLeaf(const TResultSpec &simpleSearch, SAnnotatedProbability &annotatedProbability); - - //! Create or retrieve a pivot node for the \p key. - TNode &newPivot(TStoredStringPtrStoredStringPtrPr key); - - //! Create or retrieve a pivot root node for the \p key. - TNode &newPivotRoot(const TStoredStringPtr &key); - - //! Post-order depth first visit the tree. - void postorderDepthFirst(const TNode *node, CHierarchicalResultsVisitor &visitor) const; - - private: - //! Storage for the nodes. - TNodeDeque m_Nodes; - - //! Storage for the pivot nodes. - TStoredStringPtrStoredStringPtrPrNodeMap m_PivotNodes; - - //! Pivot root nodes. - TStoredStringPtrNodeMap m_PivotRootNodes; +class MODEL_EXPORT CHierarchicalResults { +public: + using TDoubleVec = std::vector; + using TAttributeProbabilityVec = std::vector; + using TResultSpec = hierarchical_results_detail::SResultSpec; + using TStoredStringPtr = core::CStoredStringPtr; + using TStoredStringPtrStoredStringPtrPr = hierarchical_results_detail::TStoredStringPtrStoredStringPtrPr; + using TStoredStringPtrStoredStringPtrPrDoublePr = hierarchical_results_detail::TStoredStringPtrStoredStringPtrPrDoublePr; + using TStoredStringPtrStoredStringPtrPrDoublePrVec = hierarchical_results_detail::TStoredStringPtrStoredStringPtrPrDoublePrVec; + using TNode = hierarchical_results_detail::SNode; + using TNodePtrSizeUMap = hierarchical_results_detail::SNode::TNodePtrSizeUMap; + using TSizeNodePtrUMap = hierarchical_results_detail::SNode::TSizeNodePtrUMap; + using TNodeDeque = std::deque; + using TStoredStringPtrStoredStringPtrPrNodeMap = + std::map; + using TStoredStringPtrNodeMap = std::map; + +public: + CHierarchicalResults(); + + //! Add a dummy result for a simple count detector. + void addSimpleCountResult(SAnnotatedProbability& annotatedProbability, + const CAnomalyDetectorModel* model = 0, + core_t::TTime bucketStartTime = 0); + + //! Add a simple search result. + //! + //! The general syntax for a simple search is + //!
+    //!   [partitionfield = w] function[(x)] [by y] [over z]
+    //! 
+ //! + //! Examples include: + //! -# count + //! -# rare by x + //! -# partitionfield = x mean(y) + //! -# min(x) over z + //! -# partitionfield = x dc(y) over z + //! -# partitionfield = w max(x) by y over z + //! + //! If a given search doesn't have a field pass the empty string. + //! + //! \param[in] detector An identifier of the detector generating this + //! result. + //! \param[in] isPopulation True if this is a population result and + //! false otherwise. + //! \param[in] functionName The name of the function of the model's search. + //! \param[in] function The function of the model's search. + //! \param[in] partitionFieldName The partition field name or empty. + //! \param[in] partitionFieldValue The partition field value or empty. + //! \param[in] personFieldName The over field name or empty. + //! \param[in] personFieldValue The over field value or empty. + //! \param[in] valueFieldName The name of the field containing the + //! metric value. + //! \param[out] annotatedProbability A struct containing the probability, + //! the smallest attribute probabilities, the influencers, + //! and any extra descriptive data + //! \param[in] model The model which generated the result. + //! \note Values which are passed by non-constant reference are swapped + //! in to place. + void addModelResult(int detector, + bool isPopulation, + const std::string& functionName, + function_t::EFunction function, + const std::string& partitionFieldName, + const std::string& partitionFieldValue, + const std::string& personFieldName, + const std::string& personFieldValue, + const std::string& valueFieldName, + SAnnotatedProbability& annotatedProbability, + const CAnomalyDetectorModel* model = 0, + core_t::TTime bucketStartTime = 0); + + //! Add the influencer called \p name. + void addInfluencer(const std::string& name); + + //! Build a hierarchy from the current flat node list using the + //! default aggregation rules. + //! + //! The aggregation rules in priority order are: + //! -# Only aggregate searches with the same partition field name + //! and value. + //! -# Subject to 1, aggregate searches with the same person field + //! name and value: this is the by field name and value if no + //! over field is specified otherwise it is the over field name + //! name and value. + void buildHierarchy(); + + //! Creates the pivot nodes for influencing field values. + void createPivots(); + + //! Get the root node of the hierarchy. + const TNode* root() const; + + //! Get the influencer identified by \p influencerName and + //! \p influencerValue if one exists. + const TNode* influencer(const TStoredStringPtr& influencerName, const TStoredStringPtr& influencerValue) const; + + //! Bottom up first visit the tree. + void bottomUpBreadthFirst(CHierarchicalResultsVisitor& visitor) const; + + //! Top down first visit the tree. + void topDownBreadthFirst(CHierarchicalResultsVisitor& visitor) const; + + //! Post-order depth first visit the tree. + void postorderDepthFirst(CHierarchicalResultsVisitor& visitor) const; + + //! Visit all the pivot nodes bottom up first. + void pivotsBottomUpBreadthFirst(CHierarchicalResultsVisitor& visitor) const; + + //! Visit all the pivot nodes top down first. + void pivotsTopDownBreadthFirst(CHierarchicalResultsVisitor& visitor) const; + + //! Check if there are no results at all including the simple + //! count result. + bool empty() const; + + //! Get the count of leaf (search) results, i.e. excluding the + //! simple count result. + std::size_t resultCount() const; + + //! Sets the result to be interm + void setInterim(); + + //! Get type of result + model_t::CResultType resultType() const; + + //! Persist the results by passing information to \p inserter. + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Restore the results reading state from \p traverser. + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); + + //! Print the results for debug. + std::string print() const; + +private: + //! Create a new node. + TNode& newNode(); + + //! Create a new leaf node for the simple search \p simpleSearch. + TNode& newLeaf(const TResultSpec& simpleSearch, SAnnotatedProbability& annotatedProbability); + + //! Create or retrieve a pivot node for the \p key. + TNode& newPivot(TStoredStringPtrStoredStringPtrPr key); + + //! Create or retrieve a pivot root node for the \p key. + TNode& newPivotRoot(const TStoredStringPtr& key); - //! Is the result final or interim? - //! This field is transient - does not get persisted because interim results - //! never get persisted. - model_t::CResultType m_ResultType; + //! Post-order depth first visit the tree. + void postorderDepthFirst(const TNode* node, CHierarchicalResultsVisitor& visitor) const; + +private: + //! Storage for the nodes. + TNodeDeque m_Nodes; + + //! Storage for the pivot nodes. + TStoredStringPtrStoredStringPtrPrNodeMap m_PivotNodes; + + //! Pivot root nodes. + TStoredStringPtrNodeMap m_PivotRootNodes; + + //! Is the result final or interim? + //! This field is transient - does not get persisted because interim results + //! never get persisted. + model_t::CResultType m_ResultType; }; //! \brief Interface for visiting the results. -class MODEL_EXPORT CHierarchicalResultsVisitor -{ - public: - using TNode = CHierarchicalResults::TNode; +class MODEL_EXPORT CHierarchicalResultsVisitor { +public: + using TNode = CHierarchicalResults::TNode; - public: - virtual ~CHierarchicalResultsVisitor(); +public: + virtual ~CHierarchicalResultsVisitor(); - //! Visit a node. - virtual void visit(const CHierarchicalResults &results, - const TNode &node, - bool pivot) = 0; + //! Visit a node. + virtual void visit(const CHierarchicalResults& results, const TNode& node, bool pivot) = 0; - protected: - //! Check if this node is the root node. - static bool isRoot(const TNode &node); +protected: + //! Check if this node is the root node. + static bool isRoot(const TNode& node); - //! Check if the node is a leaf. - static bool isLeaf(const TNode &node); + //! Check if the node is a leaf. + static bool isLeaf(const TNode& node); - //! Check if the node is partition, i.e. if its children are - //! one or more partitions. - static bool isPartitioned(const TNode &node); + //! Check if the node is partition, i.e. if its children are + //! one or more partitions. + static bool isPartitioned(const TNode& node); - //! Check if this is a named partition. - static bool isPartition(const TNode &node); + //! Check if this is a named partition. + static bool isPartition(const TNode& node); - //! Check if the node is a named person. - static bool isPerson(const TNode &node); + //! Check if the node is a named person. + static bool isPerson(const TNode& node); - //! Check if the node is an attribute of a person. - static bool isAttribute(const TNode &node); + //! Check if the node is an attribute of a person. + static bool isAttribute(const TNode& node); - //! Check if the node is simple counting result. - static bool isSimpleCount(const TNode &node); + //! Check if the node is simple counting result. + static bool isSimpleCount(const TNode& node); - //! Check if the node is a population result. - static bool isPopulation(const TNode &node); + //! Check if the node is a population result. + static bool isPopulation(const TNode& node); - //! Check if we can ever write a result for the node. - static bool isTypeForWhichWeWriteResults(const TNode &node, bool pivot); + //! Check if we can ever write a result for the node. + static bool isTypeForWhichWeWriteResults(const TNode& node, bool pivot); - //! Get the nearest ancestor of the node for which we write results. - static const TNode *nearestAncestorForWhichWeWriteResults(const TNode &node); + //! Get the nearest ancestor of the node for which we write results. + static const TNode* nearestAncestorForWhichWeWriteResults(const TNode& node); - //! Check if we'll write a result for the node. - static bool shouldWriteResult(const CLimits &limits, - const CHierarchicalResults &results, - const TNode &node, - bool pivot); + //! Check if we'll write a result for the node. + static bool shouldWriteResult(const CLimits& limits, const CHierarchicalResults& results, const TNode& node, bool pivot); friend class ::CHierarchicalResultsTest; }; - } } diff --git a/include/model/CHierarchicalResultsAggregator.h b/include/model/CHierarchicalResultsAggregator.h index 8c8dab71f3..8b8b9dca0f 100644 --- a/include/model/CHierarchicalResultsAggregator.h +++ b/include/model/CHierarchicalResultsAggregator.h @@ -16,15 +16,12 @@ #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace model -{ +namespace model { class CAnomalyDetectorModelConfig; class CLimits; @@ -40,104 +37,95 @@ class CLimits; //! people in a population and collections of individual results in //! system change analysis. Special logic is used for named people, //! i.e. aggregations of multiple compatible simple searches. -class MODEL_EXPORT CHierarchicalResultsAggregator : public CHierarchicalResultsLevelSet -{ - public: - //! Enumeration of the possible jobs that the aggregator can - //! perform when invoked. - enum EJob - { - E_UpdateAndCorrect, - E_Correct, - E_NoOp - }; - - public: - CHierarchicalResultsAggregator(const CAnomalyDetectorModelConfig &modelConfig); - - //! Add a job for the subsequent invocations of the normalizer. - void setJob(EJob job); - - //! Update the parameters to reflect changes to the model configuration. - void refresh(const CAnomalyDetectorModelConfig &modelConfig); - - //! Clear all state such that all equalizers restart from scratch. - void clear(); - - //! Compute the aggregate probability for \p node. - virtual void visit(const CHierarchicalResults &results, const TNode &node, bool pivot); - - //! Age the quantile sketches. - void propagateForwardByTime(double time); - - //! \name Persistence - //@{ - //! Persist state by passing information to \p inserter. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - - //! Restore reading state from \p traverser. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); - - // Clone for persistence? - //@} - - //! Get a checksum of this object. - uint64_t checksum() const; - - private: - using TBase = CHierarchicalResultsLevelSet; - using TDetectorEqualizer = TBase::Type; - using TDetectorEqualizerPtrVec = TBase::TTypePtrVec; - using TIntSizePr = std::pair; - using TDouble1Vec = core::CSmallVector; - using TIntSizePrDouble1VecUMap = boost::unordered_map; - - private: - static const std::size_t N = model_t::E_AggregateAttributes + 1; - - private: - //! Aggregate at a leaf node. - void aggregateLeaf(const TNode &node); - - //! Aggregate at internal node. - void aggregateNode(const TNode &node, bool pivot); - - //! Partition the child probabilities into groups to aggregate together. - bool partitionChildProbabilities(const TNode &node, bool pivot, - std::size_t &numberDetectors, - TIntSizePrDouble1VecUMap (&partition)[N]); - - //! Compute the probability for each of the detectors. - void detectorProbabilities(const TNode &node, bool pivot, - std::size_t numberDetectors, - const TIntSizePrDouble1VecUMap (&partition)[N], - int &detector, - int &aggregation, - TDouble1Vec &probabilities); - - //! Compute a hash of \p node for gathering up related results. - std::size_t hash(const TNode &node) const; - - //! Correct the probability for \p node to equalize probabilities - //! across detectors. - double correctProbability(const TNode &node, bool pivot, - int detector, double probability); - - private: - //! The jobs that the aggregator will perform when invoked can be: - //! update or update + correct. - EJob m_Job; - - //! The rate information is lost from the quantile sketches. - double m_DecayRate; - - //! The weights to use for the different aggregation styles. - double m_Parameters[model_t::NUMBER_AGGREGATION_STYLES][model_t::NUMBER_AGGREGATION_PARAMS]; - - //! The maximum anomalous probability. - double m_MaximumAnomalousProbability; -}; +class MODEL_EXPORT CHierarchicalResultsAggregator : public CHierarchicalResultsLevelSet { +public: + //! Enumeration of the possible jobs that the aggregator can + //! perform when invoked. + enum EJob { E_UpdateAndCorrect, E_Correct, E_NoOp }; + +public: + CHierarchicalResultsAggregator(const CAnomalyDetectorModelConfig& modelConfig); + + //! Add a job for the subsequent invocations of the normalizer. + void setJob(EJob job); + + //! Update the parameters to reflect changes to the model configuration. + void refresh(const CAnomalyDetectorModelConfig& modelConfig); + + //! Clear all state such that all equalizers restart from scratch. + void clear(); + + //! Compute the aggregate probability for \p node. + virtual void visit(const CHierarchicalResults& results, const TNode& node, bool pivot); + + //! Age the quantile sketches. + void propagateForwardByTime(double time); + + //! \name Persistence + //@{ + //! Persist state by passing information to \p inserter. + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Restore reading state from \p traverser. + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); + + // Clone for persistence? + //@} + + //! Get a checksum of this object. + uint64_t checksum() const; + +private: + using TBase = CHierarchicalResultsLevelSet; + using TDetectorEqualizer = TBase::Type; + using TDetectorEqualizerPtrVec = TBase::TTypePtrVec; + using TIntSizePr = std::pair; + using TDouble1Vec = core::CSmallVector; + using TIntSizePrDouble1VecUMap = boost::unordered_map; +private: + static const std::size_t N = model_t::E_AggregateAttributes + 1; + +private: + //! Aggregate at a leaf node. + void aggregateLeaf(const TNode& node); + + //! Aggregate at internal node. + void aggregateNode(const TNode& node, bool pivot); + + //! Partition the child probabilities into groups to aggregate together. + bool partitionChildProbabilities(const TNode& node, bool pivot, std::size_t& numberDetectors, TIntSizePrDouble1VecUMap (&partition)[N]); + + //! Compute the probability for each of the detectors. + void detectorProbabilities(const TNode& node, + bool pivot, + std::size_t numberDetectors, + const TIntSizePrDouble1VecUMap (&partition)[N], + int& detector, + int& aggregation, + TDouble1Vec& probabilities); + + //! Compute a hash of \p node for gathering up related results. + std::size_t hash(const TNode& node) const; + + //! Correct the probability for \p node to equalize probabilities + //! across detectors. + double correctProbability(const TNode& node, bool pivot, int detector, double probability); + +private: + //! The jobs that the aggregator will perform when invoked can be: + //! update or update + correct. + EJob m_Job; + + //! The rate information is lost from the quantile sketches. + double m_DecayRate; + + //! The weights to use for the different aggregation styles. + double m_Parameters[model_t::NUMBER_AGGREGATION_STYLES][model_t::NUMBER_AGGREGATION_PARAMS]; + + //! The maximum anomalous probability. + double m_MaximumAnomalousProbability; +}; } } diff --git a/include/model/CHierarchicalResultsLevelSet.h b/include/model/CHierarchicalResultsLevelSet.h index f94fa99ce3..9dcd2c1427 100644 --- a/include/model/CHierarchicalResultsLevelSet.h +++ b/include/model/CHierarchicalResultsLevelSet.h @@ -16,11 +16,8 @@ #include - -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { //! \brief Defines a set of objects with one set for each distinct level //! in the hierarchical results. @@ -42,307 +39,254 @@ namespace model //! the level. T must have a clear function and propagateForwardByTime //! functions. template -class CHierarchicalResultsLevelSet : public CHierarchicalResultsVisitor -{ - protected: - using Type = T; - using TTypePtrVec = std::vector; - using TDictionary = core::CCompressedDictionary<1>; - using TWord = TDictionary::CWord; - using TWordTypePr = std::pair; - using TWordTypePrVec = std::vector; - using TWordTypePrVecItr = typename TWordTypePrVec::iterator; - using TWordTypePrVecCItr = typename TWordTypePrVec::const_iterator; - - protected: - explicit CHierarchicalResultsLevelSet(const T &bucketElement) : - m_BucketElement(bucketElement) - { - } - - //! Get the root unique element. - const T &bucketElement() const { return m_BucketElement; } - //! Get a writable root unique element. - T &bucketElement() { return m_BucketElement; } - - //! Get an influencer bucket element for \p influencerFieldName. - //! - //! \note Returns NULL if there isn't a matching one. - const T *influencerBucketElement(const std::string &influencerFieldName) const - { - return element(m_InfluencerBucketSet, influencerFieldName); - } - - //! Get an influencer element for \p influencerFieldName. - //! - //! \note Returns NULL if there isn't a matching one. - const T *influencerElement(const std::string &influencerFieldName) const - { - return element(m_InfluencerSet, influencerFieldName); - } - - //! Get a partition element for \p partitionFieldName. - //! - //! \note Returns NULL if there isn't a matching one. - const T *partitionElement(const std::string &partitionFieldName) const - { - return element(m_PartitionSet, partitionFieldName); - } - - //! Get a person element. - //! - //! \note Returns NULL if there isn't a matching one. - const T *personElement(const std::string &partitionFieldName, - const std::string &personFieldName) const - { - TWord word = ms_Dictionary.word(partitionFieldName, personFieldName); - TWordTypePrVecCItr i = element(m_PersonSet, word); - return (i != m_PersonSet.end() && i->first == word) ? &i->second : 0; +class CHierarchicalResultsLevelSet : public CHierarchicalResultsVisitor { +protected: + using Type = T; + using TTypePtrVec = std::vector; + using TDictionary = core::CCompressedDictionary<1>; + using TWord = TDictionary::CWord; + using TWordTypePr = std::pair; + using TWordTypePrVec = std::vector; + using TWordTypePrVecItr = typename TWordTypePrVec::iterator; + using TWordTypePrVecCItr = typename TWordTypePrVec::const_iterator; + +protected: + explicit CHierarchicalResultsLevelSet(const T& bucketElement) : m_BucketElement(bucketElement) {} + + //! Get the root unique element. + const T& bucketElement() const { return m_BucketElement; } + //! Get a writable root unique element. + T& bucketElement() { return m_BucketElement; } + + //! Get an influencer bucket element for \p influencerFieldName. + //! + //! \note Returns NULL if there isn't a matching one. + const T* influencerBucketElement(const std::string& influencerFieldName) const { + return element(m_InfluencerBucketSet, influencerFieldName); + } + + //! Get an influencer element for \p influencerFieldName. + //! + //! \note Returns NULL if there isn't a matching one. + const T* influencerElement(const std::string& influencerFieldName) const { return element(m_InfluencerSet, influencerFieldName); } + + //! Get a partition element for \p partitionFieldName. + //! + //! \note Returns NULL if there isn't a matching one. + const T* partitionElement(const std::string& partitionFieldName) const { return element(m_PartitionSet, partitionFieldName); } + + //! Get a person element. + //! + //! \note Returns NULL if there isn't a matching one. + const T* personElement(const std::string& partitionFieldName, const std::string& personFieldName) const { + TWord word = ms_Dictionary.word(partitionFieldName, personFieldName); + TWordTypePrVecCItr i = element(m_PersonSet, word); + return (i != m_PersonSet.end() && i->first == word) ? &i->second : 0; + } + + //! Get a leaf element. + //! + //! \note Returns NULL if there isn't a matching one. + const T* leafElement(const std::string& partitionFieldName, + const std::string& personFieldName, + const std::string& functionName, + const std::string& valueFieldName) const { + TWord word = ms_Dictionary.word(partitionFieldName, personFieldName, functionName, valueFieldName); + TWordTypePrVecCItr i = element(m_LeafSet, word); + return (i != m_LeafSet.end() && i->first == word) ? &i->second : 0; + } + + //! Get the influencer bucket set. + const TWordTypePrVec& influencerBucketSet() const { return m_InfluencerBucketSet; } + //! Get a writable influencer bucket set. + TWordTypePrVec& influencerBucketSet() { return m_InfluencerBucketSet; } + + //! Get the influencer set. + const TWordTypePrVec& influencerSet() const { return m_InfluencerSet; } + //! Get a writable influencer set. + TWordTypePrVec& influencerSet() { return m_InfluencerSet; } + + //! Get the partition set. + const TWordTypePrVec& partitionSet() const { return m_PartitionSet; } + //! Get a writable partition set. + TWordTypePrVec& partitionSet() { return m_PartitionSet; } + + //! Get the person set. + const TWordTypePrVec& personSet() const { return m_PersonSet; } + //! Get a writable person set. + TWordTypePrVec& personSet() { return m_PersonSet; } + + //! Get the leaf set. + const TWordTypePrVec& leafSet() const { return m_LeafSet; } + //! Get a writable leaf set. + TWordTypePrVec& leafSet() { return m_LeafSet; } + + //! Clear all the sets. + void clear() { + m_BucketElement.clear(); + m_InfluencerBucketSet.clear(); + m_InfluencerSet.clear(); + m_PartitionSet.clear(); + m_PersonSet.clear(); + m_LeafSet.clear(); + } + + //! Sort all the sets. + void sort() { + sort(m_InfluencerBucketSet); + sort(m_InfluencerSet); + sort(m_PartitionSet); + sort(m_PersonSet); + sort(m_LeafSet); + } + + //! Age the level set elements. + template + void age(F doAge) { + doAge(m_BucketElement); + age(m_InfluencerBucketSet, doAge); + age(m_InfluencerSet, doAge); + age(m_PartitionSet, doAge); + age(m_PersonSet, doAge); + age(m_LeafSet, doAge); + } + + //! Get and possibly add a normalizer for \p node. + template + void elements(const TNode& node, bool pivot, const FACTORY& factory, TTypePtrVec& result, bool distinctLeavesPerPartition = false) { + result.clear(); + if (this->isSimpleCount(node)) { + return; } - //! Get a leaf element. - //! - //! \note Returns NULL if there isn't a matching one. - const T *leafElement(const std::string &partitionFieldName, - const std::string &personFieldName, - const std::string &functionName, - const std::string &valueFieldName) const - { - TWord word = ms_Dictionary.word(partitionFieldName, - personFieldName, - functionName, - valueFieldName); - TWordTypePrVecCItr i = element(m_LeafSet, word); - return (i != m_LeafSet.end() && i->first == word) ? &i->second : 0; - } - - //! Get the influencer bucket set. - const TWordTypePrVec &influencerBucketSet() const { return m_InfluencerBucketSet; } - //! Get a writable influencer bucket set. - TWordTypePrVec &influencerBucketSet() { return m_InfluencerBucketSet; } - - //! Get the influencer set. - const TWordTypePrVec &influencerSet() const { return m_InfluencerSet; } - //! Get a writable influencer set. - TWordTypePrVec &influencerSet() { return m_InfluencerSet; } - - //! Get the partition set. - const TWordTypePrVec &partitionSet() const { return m_PartitionSet; } - //! Get a writable partition set. - TWordTypePrVec &partitionSet() { return m_PartitionSet; } - - //! Get the person set. - const TWordTypePrVec &personSet() const { return m_PersonSet; } - //! Get a writable person set. - TWordTypePrVec &personSet() { return m_PersonSet; } - - //! Get the leaf set. - const TWordTypePrVec &leafSet() const { return m_LeafSet; } - //! Get a writable leaf set. - TWordTypePrVec &leafSet() { return m_LeafSet; } - - //! Clear all the sets. - void clear() - { - m_BucketElement.clear(); - m_InfluencerBucketSet.clear(); - m_InfluencerSet.clear(); - m_PartitionSet.clear(); - m_PersonSet.clear(); - m_LeafSet.clear(); - } - - //! Sort all the sets. - void sort() - { - sort(m_InfluencerBucketSet); - sort(m_InfluencerSet); - sort(m_PartitionSet); - sort(m_PersonSet); - sort(m_LeafSet); - } - - //! Age the level set elements. - template - void age(F doAge) - { - doAge(m_BucketElement); - age(m_InfluencerBucketSet, doAge); - age(m_InfluencerSet, doAge); - age(m_PartitionSet, doAge); - age(m_PersonSet, doAge); - age(m_LeafSet, doAge); - } - - //! Get and possibly add a normalizer for \p node. - template - void elements(const TNode &node, bool pivot, - const FACTORY &factory, TTypePtrVec &result, - bool distinctLeavesPerPartition = false) - { - result.clear(); - if (this->isSimpleCount(node)) - { - return; - } - - if (pivot && this->isRoot(node)) - { - TWord word = ms_Dictionary.word(*node.s_Spec.s_PersonFieldName); - TWordTypePrVecItr i = element(m_InfluencerBucketSet, word); - if (i == m_InfluencerBucketSet.end() || i->first != word) - { - i = m_InfluencerBucketSet.insert( - i, TWordTypePr(word, factory.make(*node.s_Spec.s_PersonFieldName))); - } - result.push_back(&i->second); - return; - } - if (pivot && !this->isRoot(node)) - { - TWord word = ms_Dictionary.word(*node.s_Spec.s_PersonFieldName); - TWordTypePrVecItr i = element(m_InfluencerSet, word); - if (i == m_InfluencerSet.end() || i->first != word) - { - i = m_InfluencerSet.insert( - i, TWordTypePr(word, factory.make(*node.s_Spec.s_PersonFieldName))); - } - result.push_back(&i->second); - return; - } - - std::string partitionKey = distinctLeavesPerPartition ? - *node.s_Spec.s_PartitionFieldName + *node.s_Spec.s_PartitionFieldValue - : *node.s_Spec.s_PartitionFieldName; - - if (this->isLeaf(node)) - { - TWord word = ms_Dictionary.word(partitionKey, - *node.s_Spec.s_PersonFieldName, - *node.s_Spec.s_FunctionName, - *node.s_Spec.s_ValueFieldName); - TWordTypePrVecItr i = element(m_LeafSet, word); - if (i == m_LeafSet.end() || i->first != word) - { - i = m_LeafSet.insert( - i, TWordTypePr(word, factory.make(partitionKey, - *node.s_Spec.s_PersonFieldName, - *node.s_Spec.s_FunctionName, - *node.s_Spec.s_ValueFieldName))); - } - result.push_back(&i->second); - } - if (this->isPerson(node)) - { - TWord word = ms_Dictionary.word(partitionKey, - *node.s_Spec.s_PersonFieldName); - TWordTypePrVecItr i = element(m_PersonSet, word); - if (i == m_PersonSet.end() || i->first != word) - { - i = m_PersonSet.insert( - i, TWordTypePr(word, factory.make(partitionKey, - *node.s_Spec.s_PersonFieldName))); - } - result.push_back(&i->second); - } - if (this->isPartition(node)) - { - TWord word = ms_Dictionary.word(partitionKey); - - TWordTypePrVecItr i = element(m_PartitionSet, word); - if (i == m_PartitionSet.end() || i->first != word) - { - i = m_PartitionSet.insert(i, TWordTypePr(word, factory.make(partitionKey))); - } - result.push_back(&i->second); - } - if (this->isRoot(node)) - { - result.push_back(&m_BucketElement); + if (pivot && this->isRoot(node)) { + TWord word = ms_Dictionary.word(*node.s_Spec.s_PersonFieldName); + TWordTypePrVecItr i = element(m_InfluencerBucketSet, word); + if (i == m_InfluencerBucketSet.end() || i->first != word) { + i = m_InfluencerBucketSet.insert(i, TWordTypePr(word, factory.make(*node.s_Spec.s_PersonFieldName))); } + result.push_back(&i->second); + return; } - - //! Get a checksum of the set data. - uint64_t checksum(uint64_t seed) const - { - seed = maths::CChecksum::calculate(seed, m_BucketElement); - seed = maths::CChecksum::calculate(seed, m_InfluencerBucketSet); - seed = maths::CChecksum::calculate(seed, m_InfluencerSet); - seed = maths::CChecksum::calculate(seed, m_PartitionSet); - seed = maths::CChecksum::calculate(seed, m_PersonSet); - return maths::CChecksum::calculate(seed, m_LeafSet); + if (pivot && !this->isRoot(node)) { + TWord word = ms_Dictionary.word(*node.s_Spec.s_PersonFieldName); + TWordTypePrVecItr i = element(m_InfluencerSet, word); + if (i == m_InfluencerSet.end() || i->first != word) { + i = m_InfluencerSet.insert(i, TWordTypePr(word, factory.make(*node.s_Spec.s_PersonFieldName))); + } + result.push_back(&i->second); + return; } - private: - //! Get an element of \p set by name. - static const T *element(const TWordTypePrVec &set, const std::string &name) - { - TWord word = ms_Dictionary.word(name); - TWordTypePrVecCItr i = element(set, word); - return (i != set.end() && i->first == word) ? &i->second : 0; + std::string partitionKey = distinctLeavesPerPartition ? *node.s_Spec.s_PartitionFieldName + *node.s_Spec.s_PartitionFieldValue + : *node.s_Spec.s_PartitionFieldName; + + if (this->isLeaf(node)) { + TWord word = ms_Dictionary.word( + partitionKey, *node.s_Spec.s_PersonFieldName, *node.s_Spec.s_FunctionName, *node.s_Spec.s_ValueFieldName); + TWordTypePrVecItr i = element(m_LeafSet, word); + if (i == m_LeafSet.end() || i->first != word) { + i = m_LeafSet.insert( + i, + TWordTypePr( + word, + factory.make( + partitionKey, *node.s_Spec.s_PersonFieldName, *node.s_Spec.s_FunctionName, *node.s_Spec.s_ValueFieldName))); + } + result.push_back(&i->second); } - - //! Get the element corresponding to \p word if it exists - //! and return the end iterator otherwise. - static TWordTypePrVecCItr element(const TWordTypePrVec &set, const TWord &word) - { - return element(const_cast(set), word); + if (this->isPerson(node)) { + TWord word = ms_Dictionary.word(partitionKey, *node.s_Spec.s_PersonFieldName); + TWordTypePrVecItr i = element(m_PersonSet, word); + if (i == m_PersonSet.end() || i->first != word) { + i = m_PersonSet.insert(i, TWordTypePr(word, factory.make(partitionKey, *node.s_Spec.s_PersonFieldName))); + } + result.push_back(&i->second); } + if (this->isPartition(node)) { + TWord word = ms_Dictionary.word(partitionKey); - //! Get the element corresponding to \p word if it exists - //! and return the end iterator otherwise. - static TWordTypePrVecItr element(TWordTypePrVec &set, const TWord &word) - { - return std::lower_bound(set.begin(), set.end(), - word, maths::COrderings::SFirstLess()); + TWordTypePrVecItr i = element(m_PartitionSet, word); + if (i == m_PartitionSet.end() || i->first != word) { + i = m_PartitionSet.insert(i, TWordTypePr(word, factory.make(partitionKey))); + } + result.push_back(&i->second); } - - //! Sort \p set on its key. - static void sort(TWordTypePrVec &set) - { - std::sort(set.begin(), set.end(), maths::COrderings::SFirstLess()); + if (this->isRoot(node)) { + result.push_back(&m_BucketElement); } - - //! Propagate the set elements forwards by \p time. - template - static void age(TWordTypePrVec &set, F doAge) - { - for (std::size_t i = 0u; i < set.size(); ++i) - { - doAge(set[i].second); - } + } + + //! Get a checksum of the set data. + uint64_t checksum(uint64_t seed) const { + seed = maths::CChecksum::calculate(seed, m_BucketElement); + seed = maths::CChecksum::calculate(seed, m_InfluencerBucketSet); + seed = maths::CChecksum::calculate(seed, m_InfluencerSet); + seed = maths::CChecksum::calculate(seed, m_PartitionSet); + seed = maths::CChecksum::calculate(seed, m_PersonSet); + return maths::CChecksum::calculate(seed, m_LeafSet); + } + +private: + //! Get an element of \p set by name. + static const T* element(const TWordTypePrVec& set, const std::string& name) { + TWord word = ms_Dictionary.word(name); + TWordTypePrVecCItr i = element(set, word); + return (i != set.end() && i->first == word) ? &i->second : 0; + } + + //! Get the element corresponding to \p word if it exists + //! and return the end iterator otherwise. + static TWordTypePrVecCItr element(const TWordTypePrVec& set, const TWord& word) { + return element(const_cast(set), word); + } + + //! Get the element corresponding to \p word if it exists + //! and return the end iterator otherwise. + static TWordTypePrVecItr element(TWordTypePrVec& set, const TWord& word) { + return std::lower_bound(set.begin(), set.end(), word, maths::COrderings::SFirstLess()); + } + + //! Sort \p set on its key. + static void sort(TWordTypePrVec& set) { std::sort(set.begin(), set.end(), maths::COrderings::SFirstLess()); } + + //! Propagate the set elements forwards by \p time. + template + static void age(TWordTypePrVec& set, F doAge) { + for (std::size_t i = 0u; i < set.size(); ++i) { + doAge(set[i].second); } + } - private: - //! The word dictionary. This is static on the assumption that the - //! methods of this class will not be used before main() runs or - //! after it returns. - static TDictionary ms_Dictionary; +private: + //! The word dictionary. This is static on the assumption that the + //! methods of this class will not be used before main() runs or + //! after it returns. + static TDictionary ms_Dictionary; - private: - //! The value for the bucket. - T m_BucketElement; +private: + //! The value for the bucket. + T m_BucketElement; - //! The container for named influencer buckets. - TWordTypePrVec m_InfluencerBucketSet; + //! The container for named influencer buckets. + TWordTypePrVec m_InfluencerBucketSet; - //! The container for named influencers. - TWordTypePrVec m_InfluencerSet; + //! The container for named influencers. + TWordTypePrVec m_InfluencerSet; - //! The container for named partitions. - TWordTypePrVec m_PartitionSet; + //! The container for named partitions. + TWordTypePrVec m_PartitionSet; - //! The container for named people. - TWordTypePrVec m_PersonSet; + //! The container for named people. + TWordTypePrVec m_PersonSet; - //! The container for leaves comprising distinct named - //! (partition, person) field name pairs. - TWordTypePrVec m_LeafSet; + //! The container for leaves comprising distinct named + //! (partition, person) field name pairs. + TWordTypePrVec m_LeafSet; }; template typename CHierarchicalResultsLevelSet::TDictionary CHierarchicalResultsLevelSet::ms_Dictionary; - } } diff --git a/include/model/CHierarchicalResultsNormalizer.h b/include/model/CHierarchicalResultsNormalizer.h index 37bc254747..74ed6083c6 100644 --- a/include/model/CHierarchicalResultsNormalizer.h +++ b/include/model/CHierarchicalResultsNormalizer.h @@ -17,22 +17,17 @@ #include #include - -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { class CAnomalyDetectorModelConfig; -namespace hierarchical_results_normalizer_detail -{ +namespace hierarchical_results_normalizer_detail { using TNormalizerPtr = boost::shared_ptr; //! \brief A normalizer instance and a descriptive string. -struct MODEL_EXPORT SNormalizer -{ - SNormalizer(const std::string &description, const TNormalizerPtr &normalizer); +struct MODEL_EXPORT SNormalizer { + SNormalizer(const std::string& description, const TNormalizerPtr& normalizer); //! Clear the normalizer. void clear(); @@ -46,7 +41,6 @@ struct MODEL_EXPORT SNormalizer std::string s_Description; TNormalizerPtr s_Normalizer; }; - } //! \brief A collection of normalizers for some hierarchical results. @@ -82,141 +76,120 @@ struct MODEL_EXPORT SNormalizer //! present for the bucket, because a field value wasn't present in the //! bucket. The wasted memory in creating a small number of redundant //! normalizers is negligible. -class MODEL_EXPORT CHierarchicalResultsNormalizer : - public CHierarchicalResultsLevelSet, - private core::CNonCopyable -{ - public: - using TBase = CHierarchicalResultsLevelSet; - using TNormalizer = TBase::Type; - using TNormalizerPtrVec = TBase::TTypePtrVec; - using TWordNormalizerPr = TBase::TWordTypePr; - using TWordNormalizerPrVec = TBase::TWordTypePrVec; - using TStrVec = std::vector; - - //! Enumeration of the possible jobs that the normalizer can - //! perform when invoked. - enum EJob - { - E_Update, - E_Normalize, - E_NoOp - }; - - //! Enumeration of possible outcomes of restoring from XML. - enum ERestoreOutcome - { - E_Ok = 0, - E_Corrupt = 1, - E_Incomplete = 2 - }; - - public: - CHierarchicalResultsNormalizer(const CAnomalyDetectorModelConfig &modelConfig); - - //! Add a job for the subsequent invocations of the normalizer. - void setJob(EJob job); - - //! Clear all state such that all normalizers restart from scratch. - void clear(); - - //! Reset the hasLastUpdateCausedBigChange() flag - void resetBigChange(); - - //! Update the normalizer with the node's anomaly score. - virtual void visit(const CHierarchicalResults &results, const TNode &node, bool pivot); - - //! Age the maximum scores and quantile summaries. - void propagateForwardByTime(double time); - - //! Returns true if the last update caused a big change to the quantiles - //! or false otherwise. - bool hasLastUpdateCausedBigChange() const; - - //! Convert each normalizer to a JSON document and store these as an - //! array to the string provided. - void toJson(core_t::TTime time, const std::string &key, std::string &json, bool makeArray) const; - - //! Replace the state of this object with normalizers restored from - //! the JSON documents in the stream. - ERestoreOutcome fromJsonStream(std::istream &inputStream); - - //! Access to the root normalizer. - const CAnomalyScore::CNormalizer &bucketNormalizer() const; - - //! Get the influencer bucket normalizer for \p influencerFieldName. - //! - //! \note Returns NULL if there isn't a matching one. - const CAnomalyScore::CNormalizer * - influencerBucketNormalizer(const std::string &influencerFieldName) const; - - //! Get the influencer normalizer for \p influencerFieldName. - //! - //! \note Returns NULL if there isn't a matching one. - const CAnomalyScore::CNormalizer * - influencerNormalizer(const std::string &influencerFieldName) const; - - //! Get a partition normalizer. - //! - //! \note Returns NULL if there isn't a matching one. - const CAnomalyScore::CNormalizer * - partitionNormalizer(const std::string &partitionFieldName) const; - - //! Get a person normalizer. - //! - //! \note Returns NULL if there isn't a matching one. - const CAnomalyScore::CNormalizer * - personNormalizer(const std::string &partitionFieldName, - const std::string &personFieldName) const; - - //! Get a leaf normalizer. - //! - //! \note Returns NULL if there isn't a matching one. - const CAnomalyScore::CNormalizer * - leafNormalizer(const std::string &partitionFieldName, - const std::string &personFieldName, - const std::string &functionName, - const std::string &valueFieldName) const; - - private: - //! Get the normalizer corresponding to \p cue if they exist - //! and return NULL if it doesn't have an appropriate prefix. - //! Also, extract the hash value. - bool parseCue(const std::string &cue, - TWordNormalizerPrVec *&normalizers, - TDictionary::TUInt64Array &hashArray); - - //! Get the persistence cue for the root normalizer. - static const std::string &bucketCue(); - - //! Get the persistence cue for a influencer bucket normalizer. - static std::string influencerBucketCue(const TWord &word); - - //! Get the persistence cue for an influencer normalizer. - static std::string influencerCue(const TWord &word); - - //! Get the persistence cue for a partition normalizer. - static std::string partitionCue(const TWord &word); - - //! Get the persistence cue for a person normalizer. - static std::string personCue(const TWord &word); - - //! Get the persistence cue for a leaf normalizer. - static std::string leafCue(const TWord &word); - - private: - //! The jobs that the normalizer will perform when invoked - //! can be: update, normalize or update + normalize. - EJob m_Job; +class MODEL_EXPORT CHierarchicalResultsNormalizer + : public CHierarchicalResultsLevelSet, + private core::CNonCopyable { +public: + using TBase = CHierarchicalResultsLevelSet; + using TNormalizer = TBase::Type; + using TNormalizerPtrVec = TBase::TTypePtrVec; + using TWordNormalizerPr = TBase::TWordTypePr; + using TWordNormalizerPrVec = TBase::TWordTypePrVec; + using TStrVec = std::vector; + + //! Enumeration of the possible jobs that the normalizer can + //! perform when invoked. + enum EJob { E_Update, E_Normalize, E_NoOp }; + + //! Enumeration of possible outcomes of restoring from XML. + enum ERestoreOutcome { E_Ok = 0, E_Corrupt = 1, E_Incomplete = 2 }; + +public: + CHierarchicalResultsNormalizer(const CAnomalyDetectorModelConfig& modelConfig); + + //! Add a job for the subsequent invocations of the normalizer. + void setJob(EJob job); + + //! Clear all state such that all normalizers restart from scratch. + void clear(); - //! The model configuration file. - const CAnomalyDetectorModelConfig &m_ModelConfig; - - //! Whether the last update of the quantiles has caused a big change. - bool m_HasLastUpdateCausedBigChange; + //! Reset the hasLastUpdateCausedBigChange() flag + void resetBigChange(); -}; + //! Update the normalizer with the node's anomaly score. + virtual void visit(const CHierarchicalResults& results, const TNode& node, bool pivot); + + //! Age the maximum scores and quantile summaries. + void propagateForwardByTime(double time); + + //! Returns true if the last update caused a big change to the quantiles + //! or false otherwise. + bool hasLastUpdateCausedBigChange() const; + + //! Convert each normalizer to a JSON document and store these as an + //! array to the string provided. + void toJson(core_t::TTime time, const std::string& key, std::string& json, bool makeArray) const; + + //! Replace the state of this object with normalizers restored from + //! the JSON documents in the stream. + ERestoreOutcome fromJsonStream(std::istream& inputStream); + + //! Access to the root normalizer. + const CAnomalyScore::CNormalizer& bucketNormalizer() const; + + //! Get the influencer bucket normalizer for \p influencerFieldName. + //! + //! \note Returns NULL if there isn't a matching one. + const CAnomalyScore::CNormalizer* influencerBucketNormalizer(const std::string& influencerFieldName) const; + + //! Get the influencer normalizer for \p influencerFieldName. + //! + //! \note Returns NULL if there isn't a matching one. + const CAnomalyScore::CNormalizer* influencerNormalizer(const std::string& influencerFieldName) const; + + //! Get a partition normalizer. + //! + //! \note Returns NULL if there isn't a matching one. + const CAnomalyScore::CNormalizer* partitionNormalizer(const std::string& partitionFieldName) const; + //! Get a person normalizer. + //! + //! \note Returns NULL if there isn't a matching one. + const CAnomalyScore::CNormalizer* personNormalizer(const std::string& partitionFieldName, const std::string& personFieldName) const; + + //! Get a leaf normalizer. + //! + //! \note Returns NULL if there isn't a matching one. + const CAnomalyScore::CNormalizer* leafNormalizer(const std::string& partitionFieldName, + const std::string& personFieldName, + const std::string& functionName, + const std::string& valueFieldName) const; + +private: + //! Get the normalizer corresponding to \p cue if they exist + //! and return NULL if it doesn't have an appropriate prefix. + //! Also, extract the hash value. + bool parseCue(const std::string& cue, TWordNormalizerPrVec*& normalizers, TDictionary::TUInt64Array& hashArray); + + //! Get the persistence cue for the root normalizer. + static const std::string& bucketCue(); + + //! Get the persistence cue for a influencer bucket normalizer. + static std::string influencerBucketCue(const TWord& word); + + //! Get the persistence cue for an influencer normalizer. + static std::string influencerCue(const TWord& word); + + //! Get the persistence cue for a partition normalizer. + static std::string partitionCue(const TWord& word); + + //! Get the persistence cue for a person normalizer. + static std::string personCue(const TWord& word); + + //! Get the persistence cue for a leaf normalizer. + static std::string leafCue(const TWord& word); + +private: + //! The jobs that the normalizer will perform when invoked + //! can be: update, normalize or update + normalize. + EJob m_Job; + + //! The model configuration file. + const CAnomalyDetectorModelConfig& m_ModelConfig; + + //! Whether the last update of the quantiles has caused a big change. + bool m_HasLastUpdateCausedBigChange; +}; } } diff --git a/include/model/CHierarchicalResultsPopulator.h b/include/model/CHierarchicalResultsPopulator.h index d0c4912ca3..e2f0194ab6 100644 --- a/include/model/CHierarchicalResultsPopulator.h +++ b/include/model/CHierarchicalResultsPopulator.h @@ -10,30 +10,25 @@ #include #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { class CLimits; //! \brief FIXME //! //! DESCRIPTION:\n //! FIXME -class MODEL_EXPORT CHierarchicalResultsPopulator : public CHierarchicalResultsVisitor -{ - public: - //! Constructor - CHierarchicalResultsPopulator(const CLimits &limits); +class MODEL_EXPORT CHierarchicalResultsPopulator : public CHierarchicalResultsVisitor { +public: + //! Constructor + CHierarchicalResultsPopulator(const CLimits& limits); - //! Visit \p node. - virtual void visit(const CHierarchicalResults &results, const TNode &node, bool pivot); - - private: - const CLimits &m_Limits; + //! Visit \p node. + virtual void visit(const CHierarchicalResults& results, const TNode& node, bool pivot); +private: + const CLimits& m_Limits; }; - } } diff --git a/include/model/CHierarchicalResultsProbabilityFinalizer.h b/include/model/CHierarchicalResultsProbabilityFinalizer.h index d190891539..8a796fa7fd 100644 --- a/include/model/CHierarchicalResultsProbabilityFinalizer.h +++ b/include/model/CHierarchicalResultsProbabilityFinalizer.h @@ -10,10 +10,8 @@ #include #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { //! \brief Ensures that all probabilities are equal to the inverse //! deviation of the anomaly scores. @@ -26,13 +24,11 @@ namespace model //! when we write them out for normalization to work as expected. //! This visitor ensures this invariant holds in a bottom up //! breadth first pass over the results. -class MODEL_EXPORT CHierarchicalResultsProbabilityFinalizer : public CHierarchicalResultsVisitor -{ - public: - //! Finalize the probability of \p node. - virtual void visit(const CHierarchicalResults &results, const TNode &node, bool pivot); +class MODEL_EXPORT CHierarchicalResultsProbabilityFinalizer : public CHierarchicalResultsVisitor { +public: + //! Finalize the probability of \p node. + virtual void visit(const CHierarchicalResults& results, const TNode& node, bool pivot); }; - } } diff --git a/include/model/CIndividualModel.h b/include/model/CIndividualModel.h index 4257520e9d..7b25e8b864 100644 --- a/include/model/CIndividualModel.h +++ b/include/model/CIndividualModel.h @@ -8,9 +8,9 @@ #define INCLUDED_ml_model_CIndividualModel_h #include -#include #include #include +#include #include #include @@ -24,11 +24,8 @@ #include - -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { class CAnnotatedProbabilityBuilder; class CProbabilityAndInfluenceCalculator; @@ -47,281 +44,267 @@ class CProbabilityAndInfluenceCalculator; //! //! It assumes data are supplied in time order since this means minimal //! state can be maintained. -class MODEL_EXPORT CIndividualModel : public CAnomalyDetectorModel -{ - public: - using TSizeTimeUMap = boost::unordered_map; - using TTimeVec = std::vector; - using TSizeUInt64Pr = std::pair; - using TSizeUInt64PrVec = std::vector; - using TFeatureSizeSizeTriple = core::CTriple; - using TFeatureSizeSizeTripleDouble1VecUMap = boost::unordered_map; - using TFeatureMathsModelPtrPr = std::pair; - using TFeatureMathsModelPtrPrVec = std::vector; - using TFeatureMathsModelPtrVecPr = std::pair; - using TFeatureMathsModelPtrVecPrVec = std::vector; - using TFeatureCorrelationsPtrPr = std::pair; - using TFeatureCorrelationsPtrPrVec = std::vector; - - public: - //! \name Life-cycle - //@{ - //! \param[in] params The global configuration parameters. - //! \param[in] dataGatherer The object to gather time series data. - //! \param[in] newFeatureModels The new models to use for each feature. - //! \param[in] newFeatureCorrelateModelPriors The prior to use for the - //! new model of correlates for each feature. - //! \param[in] featureCorrelatesModels The model of all correlates for - //! each feature. - //! \param[in] influenceCalculators The influence calculators to use - //! for each feature. - //! \note The current bucket statistics are left default initialized - //! and so must be sampled for before this model can be used. - CIndividualModel(const SModelParams ¶ms, - const TDataGathererPtr &dataGatherer, - const TFeatureMathsModelPtrPrVec &newFeatureModels, - const TFeatureMultivariatePriorPtrPrVec &newFeatureCorrelateModelPriors, - const TFeatureCorrelationsPtrPrVec &featureCorrelatesModels, - const TFeatureInfluenceCalculatorCPtrPrVecVec &influenceCalculators); - - //! Create a copy that will result in the same persisted state as the - //! original. This is effectively a copy constructor that creates a - //! copy that's only valid for a single purpose. The boolean flag is - //! redundant except to create a signature that will not be mistaken - //! for a general purpose copy constructor. - CIndividualModel(bool isForPersistence, const CIndividualModel &other); - //@} - - //! Returns false. - virtual bool isPopulation() const; - - //! \name Bucket Statistics - //@{ - //! Get the count in the bucketing interval containing \p time - //! for the person identified by \p pid. - //! - //! \param[in] pid The person of interest. - //! \param[in] time The time of interest. - virtual TOptionalUInt64 currentBucketCount(std::size_t pid, core_t::TTime time) const; - - //! Check if bucket statistics are available for the specified time. - virtual bool bucketStatsAvailable(core_t::TTime time) const; - //@} - - //! \name Update - //@{ - //! Sample any state needed by computeProbablity in the time - //! interval [\p startTime, \p endTime] but do not update the - //! model. This is needed by the results preview. - //! - //! \param[in] startTime The start of the time interval to sample. - //! \param[in] endTime The end of the time interval to sample. - virtual void sampleBucketStatistics(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor &resourceMonitor) = 0; - - //! Sample any state needed by computeProbablity for the out- - //! of-phase bucket in the time interval [\p startTime, \p endTime] - //! but do not update the model. - //! - //! \param[in] startTime The start of the time interval to sample. - //! \param[in] endTime The end of the time interval to sample. - virtual void sampleOutOfPhase(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor &resourceMonitor); - - //! Update the model with features samples from the time interval - //! [\p startTime, \p endTime]. - //! - //! \param[in] startTime The start of the time interval to sample. - //! \param[in] endTime The end of the time interval to sample. - //! \param[in] resourceMonitor The resourceMonitor. - virtual void sample(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor &resourceMonitor) = 0; - - //! Prune any person models which haven't been updated for a - //! specified period. - virtual void prune(std::size_t maximumAge); - //@} - - //! \name Probability - //@{ - //! Clears \p probability and \p attributeProbabilities. - virtual bool computeTotalProbability(const std::string &person, - std::size_t numberAttributeProbabilities, - TOptionalDouble &probability, - TAttributeProbability1Vec &attributeProbabilities) const; - //@} - - //! Get the checksum of this model. - //! - //! \param[in] includeCurrentBucketStats If true then include - //! the current bucket statistics. (This is designed to handle - //! serialization, for which we don't serialize the current - //! bucket statistics.) - virtual uint64_t checksum(bool includeCurrentBucketStats = true) const = 0; - - //! Debug the memory used by this model. - virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const = 0; - - //! Get the memory used by this model. - virtual std::size_t memoryUsage() const = 0; - - //! Get the static size of this object - used for virtual hierarchies. - virtual std::size_t staticSize() const = 0; - - //! Get the non-estimated value of the the memory used by this model. - virtual std::size_t computeMemoryUsage() const = 0; - - protected: - using TStrCRefDouble1VecDouble1VecPrPr = std::pair; - using TStrCRefDouble1VecDouble1VecPrPrVec = std::vector; - using TStrCRefDouble1VecDouble1VecPrPrVecVec = std::vector; - using TStrCRefDouble1VecDouble1VecPrPrVecVecVec = std::vector; - - protected: - //! Persist state by passing information to the supplied inserter. - void doAcceptPersistInserter(core::CStatePersistInserter &inserter) const; - - //! Restore the model reading state from the supplied traverser. - bool doAcceptRestoreTraverser(core::CStateRestoreTraverser &traverser); - - //! Get the start time of the current bucket. - virtual core_t::TTime currentBucketStartTime() const = 0; - - //! Set the start time of the current bucket. - virtual void currentBucketStartTime(core_t::TTime time) = 0; - - //! Monitor the resource usage while creating new models. - void createUpdateNewModels(core_t::TTime time, - CResourceMonitor &resourceMonitor); - - //! Create the time series models for "n" newly observed people. - virtual void createNewModels(std::size_t n, std::size_t m) = 0; - - //! Reinitialize the time series models for recycled people. - virtual void updateRecycledModels() = 0; - - //! Update the correlation models. - void refreshCorrelationModels(std::size_t resourceLimit, - CResourceMonitor &resourceMonitor); - - //! Clear out large state objects for people that are pruned. - virtual void clearPrunedResources(const TSizeVec &people, - const TSizeVec &attributes) = 0; - - //! Get the person unique identifiers which have a feature value - //! in the bucketing time interval including \p time. - template - void currentBucketPersonIds(core_t::TTime time, const T &featureData, TSizeVec &result) const; - - //! Get the value of the \p feature of the person identified - //! by \p pid for the bucketing interval containing \p time. - template - const T *featureData(model_t::EFeature feature, - std::size_t pid, - core_t::TTime time, - const std::vector > > > &featureData) const; - - //! Sample the bucket statistics and write the results in to - //! \p featureData. - template - void sampleBucketStatistics(core_t::TTime startTime, - core_t::TTime endTime, - const FILTER &filter, - std::vector > &featureData, - CResourceMonitor &resourceMonitor); - - //! Add the probability and influences for \p feature and \p pid. - template - bool addProbabilityAndInfluences(std::size_t pid, - PARAMS ¶ms, - const INFLUENCES &influences, - CProbabilityAndInfluenceCalculator &pJoint, - CAnnotatedProbabilityBuilder &builder) const; - - //! Get the weight associated with an update to the prior from an empty bucket - //! for features which count empty buckets. - double emptyBucketWeight(model_t::EFeature feature, - std::size_t pid, - core_t::TTime time) const; - - //! Get the "probability the bucket is empty" to use to correct probabilities - //! for features which count empty buckets. - double probabilityBucketEmpty(model_t::EFeature feature, std::size_t pid) const; - - //! Get a read only model corresponding to \p feature of the person \p pid. - const maths::CModel *model(model_t::EFeature feature, std::size_t pid) const; - - //! Get a writable model corresponding to \p feature of the person \p pid. - maths::CModel *model(model_t::EFeature feature, std::size_t pid); - - //! Sample the correlate models. - void sampleCorrelateModels(const maths_t::TWeightStyleVec &weightStyles); - - //! Correct \p baseline with \p corrections for interim results. - void correctBaselineForInterim(model_t::EFeature feature, - std::size_t pid, - model_t::CResultType type, - const TSizeDoublePr1Vec &correlated, - const TFeatureSizeSizeTripleDouble1VecUMap &corrections, - TDouble1Vec &baseline) const; - - //! Get the first time each person was seen. - const TTimeVec &firstBucketTimes() const; - - //! Get the last time each persion was seen - const TTimeVec &lastBucketTimes() const; - - //! Get the amount by which to derate the initial decay rate - //! and the minimum Winsorisation weight for \p pid at \p time. - double derate(std::size_t pid, core_t::TTime time) const; - - //! Print the current bucketing interval. - std::string printCurrentBucket() const; - - private: - //! Get the person counts in the current bucket. - virtual const TSizeUInt64PrVec ¤tBucketPersonCounts() const = 0; - - //! Get writable person counts in the current bucket. - virtual TSizeUInt64PrVec ¤tBucketPersonCounts() = 0; - - //! Get the total number of correlation models. - std::size_t numberCorrelations() const; - - //! Returns one. - virtual double attributeFrequency(std::size_t cid) const; - - //! Perform derived class specific operations to accomplish skipping sampling - virtual void doSkipSampling(core_t::TTime startTime, core_t::TTime endTime); - - //! Get the model memory usage estimator - virtual CMemoryUsageEstimator *memoryUsageEstimator() const; - - private: - //! The time that each person was first seen. - TTimeVec m_FirstBucketTimes; - - //! The last time that each person was seen. - TTimeVec m_LastBucketTimes; - - //! The models of all the correlates for each feature. - //! - //! IMPORTANT this must come before m_FeatureModels in the class declaration - //! so its destructor is called afterwards (12.6.2) because feature models - //! unregister themselves from correlation models. - TFeatureCorrelateModelsVec m_FeatureCorrelatesModels; - - //! The individual person models for each feature. - TFeatureModelsVec m_FeatureModels; - - //! The memory estimator. - mutable CMemoryUsageEstimator m_MemoryEstimator; -}; +class MODEL_EXPORT CIndividualModel : public CAnomalyDetectorModel { +public: + using TSizeTimeUMap = boost::unordered_map; + using TTimeVec = std::vector; + using TSizeUInt64Pr = std::pair; + using TSizeUInt64PrVec = std::vector; + using TFeatureSizeSizeTriple = core::CTriple; + using TFeatureSizeSizeTripleDouble1VecUMap = boost::unordered_map; + using TFeatureMathsModelPtrPr = std::pair; + using TFeatureMathsModelPtrPrVec = std::vector; + using TFeatureMathsModelPtrVecPr = std::pair; + using TFeatureMathsModelPtrVecPrVec = std::vector; + using TFeatureCorrelationsPtrPr = std::pair; + using TFeatureCorrelationsPtrPrVec = std::vector; + +public: + //! \name Life-cycle + //@{ + //! \param[in] params The global configuration parameters. + //! \param[in] dataGatherer The object to gather time series data. + //! \param[in] newFeatureModels The new models to use for each feature. + //! \param[in] newFeatureCorrelateModelPriors The prior to use for the + //! new model of correlates for each feature. + //! \param[in] featureCorrelatesModels The model of all correlates for + //! each feature. + //! \param[in] influenceCalculators The influence calculators to use + //! for each feature. + //! \note The current bucket statistics are left default initialized + //! and so must be sampled for before this model can be used. + CIndividualModel(const SModelParams& params, + const TDataGathererPtr& dataGatherer, + const TFeatureMathsModelPtrPrVec& newFeatureModels, + const TFeatureMultivariatePriorPtrPrVec& newFeatureCorrelateModelPriors, + const TFeatureCorrelationsPtrPrVec& featureCorrelatesModels, + const TFeatureInfluenceCalculatorCPtrPrVecVec& influenceCalculators); + + //! Create a copy that will result in the same persisted state as the + //! original. This is effectively a copy constructor that creates a + //! copy that's only valid for a single purpose. The boolean flag is + //! redundant except to create a signature that will not be mistaken + //! for a general purpose copy constructor. + CIndividualModel(bool isForPersistence, const CIndividualModel& other); + //@} + + //! Returns false. + virtual bool isPopulation() const; + + //! \name Bucket Statistics + //@{ + //! Get the count in the bucketing interval containing \p time + //! for the person identified by \p pid. + //! + //! \param[in] pid The person of interest. + //! \param[in] time The time of interest. + virtual TOptionalUInt64 currentBucketCount(std::size_t pid, core_t::TTime time) const; + + //! Check if bucket statistics are available for the specified time. + virtual bool bucketStatsAvailable(core_t::TTime time) const; + //@} + + //! \name Update + //@{ + //! Sample any state needed by computeProbablity in the time + //! interval [\p startTime, \p endTime] but do not update the + //! model. This is needed by the results preview. + //! + //! \param[in] startTime The start of the time interval to sample. + //! \param[in] endTime The end of the time interval to sample. + virtual void sampleBucketStatistics(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) = 0; + + //! Sample any state needed by computeProbablity for the out- + //! of-phase bucket in the time interval [\p startTime, \p endTime] + //! but do not update the model. + //! + //! \param[in] startTime The start of the time interval to sample. + //! \param[in] endTime The end of the time interval to sample. + virtual void sampleOutOfPhase(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor); + + //! Update the model with features samples from the time interval + //! [\p startTime, \p endTime]. + //! + //! \param[in] startTime The start of the time interval to sample. + //! \param[in] endTime The end of the time interval to sample. + //! \param[in] resourceMonitor The resourceMonitor. + virtual void sample(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) = 0; + + //! Prune any person models which haven't been updated for a + //! specified period. + virtual void prune(std::size_t maximumAge); + //@} + + //! \name Probability + //@{ + //! Clears \p probability and \p attributeProbabilities. + virtual bool computeTotalProbability(const std::string& person, + std::size_t numberAttributeProbabilities, + TOptionalDouble& probability, + TAttributeProbability1Vec& attributeProbabilities) const; + //@} + + //! Get the checksum of this model. + //! + //! \param[in] includeCurrentBucketStats If true then include + //! the current bucket statistics. (This is designed to handle + //! serialization, for which we don't serialize the current + //! bucket statistics.) + virtual uint64_t checksum(bool includeCurrentBucketStats = true) const = 0; + + //! Debug the memory used by this model. + virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const = 0; + + //! Get the memory used by this model. + virtual std::size_t memoryUsage() const = 0; + + //! Get the static size of this object - used for virtual hierarchies. + virtual std::size_t staticSize() const = 0; + + //! Get the non-estimated value of the the memory used by this model. + virtual std::size_t computeMemoryUsage() const = 0; + +protected: + using TStrCRefDouble1VecDouble1VecPrPr = std::pair; + using TStrCRefDouble1VecDouble1VecPrPrVec = std::vector; + using TStrCRefDouble1VecDouble1VecPrPrVecVec = std::vector; + using TStrCRefDouble1VecDouble1VecPrPrVecVecVec = std::vector; + +protected: + //! Persist state by passing information to the supplied inserter. + void doAcceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Restore the model reading state from the supplied traverser. + bool doAcceptRestoreTraverser(core::CStateRestoreTraverser& traverser); + + //! Get the start time of the current bucket. + virtual core_t::TTime currentBucketStartTime() const = 0; + + //! Set the start time of the current bucket. + virtual void currentBucketStartTime(core_t::TTime time) = 0; + + //! Monitor the resource usage while creating new models. + void createUpdateNewModels(core_t::TTime time, CResourceMonitor& resourceMonitor); + + //! Create the time series models for "n" newly observed people. + virtual void createNewModels(std::size_t n, std::size_t m) = 0; + + //! Reinitialize the time series models for recycled people. + virtual void updateRecycledModels() = 0; + + //! Update the correlation models. + void refreshCorrelationModels(std::size_t resourceLimit, CResourceMonitor& resourceMonitor); + + //! Clear out large state objects for people that are pruned. + virtual void clearPrunedResources(const TSizeVec& people, const TSizeVec& attributes) = 0; + + //! Get the person unique identifiers which have a feature value + //! in the bucketing time interval including \p time. + template + void currentBucketPersonIds(core_t::TTime time, const T& featureData, TSizeVec& result) const; + + //! Get the value of the \p feature of the person identified + //! by \p pid for the bucketing interval containing \p time. + template + const T* featureData(model_t::EFeature feature, + std::size_t pid, + core_t::TTime time, + const std::vector>>>& featureData) const; + + //! Sample the bucket statistics and write the results in to + //! \p featureData. + template + void sampleBucketStatistics(core_t::TTime startTime, + core_t::TTime endTime, + const FILTER& filter, + std::vector>& featureData, + CResourceMonitor& resourceMonitor); + + //! Add the probability and influences for \p feature and \p pid. + template + bool addProbabilityAndInfluences(std::size_t pid, + PARAMS& params, + const INFLUENCES& influences, + CProbabilityAndInfluenceCalculator& pJoint, + CAnnotatedProbabilityBuilder& builder) const; + + //! Get the weight associated with an update to the prior from an empty bucket + //! for features which count empty buckets. + double emptyBucketWeight(model_t::EFeature feature, std::size_t pid, core_t::TTime time) const; + + //! Get the "probability the bucket is empty" to use to correct probabilities + //! for features which count empty buckets. + double probabilityBucketEmpty(model_t::EFeature feature, std::size_t pid) const; + + //! Get a read only model corresponding to \p feature of the person \p pid. + const maths::CModel* model(model_t::EFeature feature, std::size_t pid) const; + + //! Get a writable model corresponding to \p feature of the person \p pid. + maths::CModel* model(model_t::EFeature feature, std::size_t pid); + + //! Sample the correlate models. + void sampleCorrelateModels(const maths_t::TWeightStyleVec& weightStyles); + + //! Correct \p baseline with \p corrections for interim results. + void correctBaselineForInterim(model_t::EFeature feature, + std::size_t pid, + model_t::CResultType type, + const TSizeDoublePr1Vec& correlated, + const TFeatureSizeSizeTripleDouble1VecUMap& corrections, + TDouble1Vec& baseline) const; + + //! Get the first time each person was seen. + const TTimeVec& firstBucketTimes() const; + + //! Get the last time each persion was seen + const TTimeVec& lastBucketTimes() const; + + //! Get the amount by which to derate the initial decay rate + //! and the minimum Winsorisation weight for \p pid at \p time. + double derate(std::size_t pid, core_t::TTime time) const; + //! Print the current bucketing interval. + std::string printCurrentBucket() const; + +private: + //! Get the person counts in the current bucket. + virtual const TSizeUInt64PrVec& currentBucketPersonCounts() const = 0; + + //! Get writable person counts in the current bucket. + virtual TSizeUInt64PrVec& currentBucketPersonCounts() = 0; + + //! Get the total number of correlation models. + std::size_t numberCorrelations() const; + + //! Returns one. + virtual double attributeFrequency(std::size_t cid) const; + + //! Perform derived class specific operations to accomplish skipping sampling + virtual void doSkipSampling(core_t::TTime startTime, core_t::TTime endTime); + + //! Get the model memory usage estimator + virtual CMemoryUsageEstimator* memoryUsageEstimator() const; + +private: + //! The time that each person was first seen. + TTimeVec m_FirstBucketTimes; + + //! The last time that each person was seen. + TTimeVec m_LastBucketTimes; + + //! The models of all the correlates for each feature. + //! + //! IMPORTANT this must come before m_FeatureModels in the class declaration + //! so its destructor is called afterwards (12.6.2) because feature models + //! unregister themselves from correlation models. + TFeatureCorrelateModelsVec m_FeatureCorrelatesModels; + + //! The individual person models for each feature. + TFeatureModelsVec m_FeatureModels; + + //! The memory estimator. + mutable CMemoryUsageEstimator m_MemoryEstimator; +}; } } diff --git a/include/model/CIndividualModelDetail.h b/include/model/CIndividualModelDetail.h index fcfcd3b65b..160e763158 100644 --- a/include/model/CIndividualModelDetail.h +++ b/include/model/CIndividualModelDetail.h @@ -12,32 +12,23 @@ #include #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { template -void CIndividualModel::currentBucketPersonIds(core_t::TTime time, - const T &featureData, - TSizeVec &result) const -{ +void CIndividualModel::currentBucketPersonIds(core_t::TTime time, const T& featureData, TSizeVec& result) const { using TSizeUSet = boost::unordered_set; result.clear(); - if (!this->bucketStatsAvailable(time)) - { - LOG_ERROR("No statistics at " << time - << ", current bucket = " << this->printCurrentBucket()); + if (!this->bucketStatsAvailable(time)) { + LOG_ERROR("No statistics at " << time << ", current bucket = " << this->printCurrentBucket()); return; } TSizeUSet people; - for (const auto &feature : featureData) - { - for (const auto &data : feature.second) - { + for (const auto& feature : featureData) { + for (const auto& data : feature.second) { people.insert(data.first); } } @@ -46,58 +37,44 @@ void CIndividualModel::currentBucketPersonIds(core_t::TTime time, } template -const T *CIndividualModel::featureData(model_t::EFeature feature, - std::size_t pid, - core_t::TTime time, - const std::vector > > > &featureData) const -{ - if (!this->bucketStatsAvailable(time)) - { - LOG_ERROR("No statistics at " << time - << ", current bucket = " << this->printCurrentBucket()); +const T* +CIndividualModel::featureData(model_t::EFeature feature, + std::size_t pid, + core_t::TTime time, + const std::vector>>>& featureData) const { + if (!this->bucketStatsAvailable(time)) { + LOG_ERROR("No statistics at " << time << ", current bucket = " << this->printCurrentBucket()); return 0; } - auto i = std::lower_bound(featureData.begin(), - featureData.end(), - feature, maths::COrderings::SFirstLess()); - if (i == featureData.end() || i->first != feature) - { + auto i = std::lower_bound(featureData.begin(), featureData.end(), feature, maths::COrderings::SFirstLess()); + if (i == featureData.end() || i->first != feature) { LOG_ERROR("No data for feature " << model_t::print(feature)); return 0; } - auto j = std::lower_bound(i->second.begin(), - i->second.end(), - pid, maths::COrderings::SFirstLess()); + auto j = std::lower_bound(i->second.begin(), i->second.end(), pid, maths::COrderings::SFirstLess()); return (j != i->second.end() && j->first == pid) ? &j->second : 0; } template void CIndividualModel::sampleBucketStatistics(core_t::TTime startTime, core_t::TTime endTime, - const FILTER &filter, - std::vector > &featureData, - CResourceMonitor &resourceMonitor) -{ - CDataGatherer &gatherer = this->dataGatherer(); - - if (!gatherer.dataAvailable(startTime)) - { + const FILTER& filter, + std::vector>& featureData, + CResourceMonitor& resourceMonitor) { + CDataGatherer& gatherer = this->dataGatherer(); + + if (!gatherer.dataAvailable(startTime)) { return; } - for (core_t::TTime time = startTime, bucketLength = gatherer.bucketLength(); - time < endTime; - time += bucketLength) - { + for (core_t::TTime time = startTime, bucketLength = gatherer.bucketLength(); time < endTime; time += bucketLength) { this->CIndividualModel::sampleBucketStatistics(time, time + bucketLength, resourceMonitor); gatherer.featureData(time, bucketLength, featureData); - for (auto &feature_ : featureData) - { - T &data = feature_.second; + for (auto& feature_ : featureData) { + T& data = feature_.second; LOG_TRACE(model_t::print(feature_.first) << " data = " << core::CContainerPrinter::print(data)); this->applyFilter(model_t::E_XF_By, false, filter, data); } @@ -106,33 +83,25 @@ void CIndividualModel::sampleBucketStatistics(core_t::TTime startTime, template bool CIndividualModel::addProbabilityAndInfluences(std::size_t pid, - PARAMS ¶ms, - const INFLUENCES &influences, - CProbabilityAndInfluenceCalculator &pJoint, - CAnnotatedProbabilityBuilder &builder) const -{ + PARAMS& params, + const INFLUENCES& influences, + CProbabilityAndInfluenceCalculator& pJoint, + CAnnotatedProbabilityBuilder& builder) const { if (!pJoint.addAttributeProbability(CStringStore::names().get(EMPTY_STRING), model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID, 1.0, // attribute probability - params, builder)) - { - LOG_ERROR("Failed to compute P(" << params.describe() - << ", person = " << this->personName(pid) << ")"); + params, + builder)) { + LOG_ERROR("Failed to compute P(" << params.describe() << ", person = " << this->personName(pid) << ")"); return false; - } - else - { - LOG_TRACE("P(" << params.describe() - << ", person = " << this->personName(pid) << ") = " << params.s_Probability); + } else { + LOG_TRACE("P(" << params.describe() << ", person = " << this->personName(pid) << ") = " << params.s_Probability); } - if (!influences.empty()) - { - const CDataGatherer &gatherer = this->dataGatherer(); - for (std::size_t j = 0u; j < influences.size(); ++j) - { - if (const CInfluenceCalculator *influenceCalculator = this->influenceCalculator(params.s_Feature, j)) - { + if (!influences.empty()) { + const CDataGatherer& gatherer = this->dataGatherer(); + for (std::size_t j = 0u; j < influences.size(); ++j) { + if (const CInfluenceCalculator* influenceCalculator = this->influenceCalculator(params.s_Feature, j)) { pJoint.plugin(*influenceCalculator); pJoint.addInfluences(*(gatherer.beginInfluencers() + j), influences[j], params); } @@ -140,7 +109,6 @@ bool CIndividualModel::addProbabilityAndInfluences(std::size_t pid, } return true; } - } } diff --git a/include/model/CInterimBucketCorrector.h b/include/model/CInterimBucketCorrector.h index c533d49231..2a0ff6eff4 100644 --- a/include/model/CInterimBucketCorrector.h +++ b/include/model/CInterimBucketCorrector.h @@ -15,15 +15,12 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace model -{ +namespace model { //! \brief Calculate prediction-based corrections for interim results. //! @@ -41,76 +38,68 @@ namespace model //! distribution of events over time. The bucket count is modelled via a time //! series decomposition. While the decomposition is not initialiased, a mean //! accumulator is used. -class MODEL_EXPORT CInterimBucketCorrector -{ - private: - using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; - using TDouble1Vec = core::CSmallVector; - using TDouble10Vec = core::CSmallVector; - - public: - //! Constructs an interim bucket corrector for buckets of length \p bucketLength - CInterimBucketCorrector(core_t::TTime bucketLength); - - //! Copy constructor - CInterimBucketCorrector(const CInterimBucketCorrector &other); - - //! Updates the model of the bucket count with a new measurement - void update(core_t::TTime time, std::size_t bucketCount); - - //! Calculates corrections for the \p value based on the given \p mode - //! and the estimated bucket completeness. - //! - //! \param[in] time The time of interest. - //! \param[in] currentCount The total count in the bucket of interest. - //! \param[in] mode The mode that corresponds to the given \p value. - //! \param[in] value The value to be corrected. - double corrections(core_t::TTime time, - std::size_t currentCount, - double mode, - double value) const; - - //! Calculates corrections for the \p values based on the given \p modes - //! and the estimated bucket completeness. - //! - //! \param[in] time The time of interest. - //! \param[in] currentCount The total count in the bucket of interest. - //! \param[in] modes The modes that map to the given \p values. - //! \param[in] values The values to be corrected. - TDouble10Vec corrections(core_t::TTime time, - std::size_t currentCount, - const TDouble10Vec &modes, - const TDouble10Vec &values) const; - - //! Get the memory used by the corrector - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - - //! Get the memory used by the corrector - std::size_t memoryUsage() const; - - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); - - private: - //! Returns the mid point in the bucket that contains \p time. - core_t::TTime calcBucketMidPoint(core_t::TTime time) const; - - //! Calculates an estimate of completeness for a bucket that contains - //! \p time and whose current count is \p currentCount. The returned - //! value is within [0.0, 1.0]. - double estimateBucketCompleteness(core_t::TTime time, std::size_t currentCount) const; - - private: - //! The bucket length - core_t::TTime m_BucketLength; - - //! The decomposition of the overall bucket count - maths::CTimeSeriesDecomposition m_CountTrend; - - //! The mean statistic for the overall bucket count - TMeanAccumulator m_CountMean; +class MODEL_EXPORT CInterimBucketCorrector { +private: + using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; + using TDouble1Vec = core::CSmallVector; + using TDouble10Vec = core::CSmallVector; + +public: + //! Constructs an interim bucket corrector for buckets of length \p bucketLength + CInterimBucketCorrector(core_t::TTime bucketLength); + + //! Copy constructor + CInterimBucketCorrector(const CInterimBucketCorrector& other); + + //! Updates the model of the bucket count with a new measurement + void update(core_t::TTime time, std::size_t bucketCount); + + //! Calculates corrections for the \p value based on the given \p mode + //! and the estimated bucket completeness. + //! + //! \param[in] time The time of interest. + //! \param[in] currentCount The total count in the bucket of interest. + //! \param[in] mode The mode that corresponds to the given \p value. + //! \param[in] value The value to be corrected. + double corrections(core_t::TTime time, std::size_t currentCount, double mode, double value) const; + + //! Calculates corrections for the \p values based on the given \p modes + //! and the estimated bucket completeness. + //! + //! \param[in] time The time of interest. + //! \param[in] currentCount The total count in the bucket of interest. + //! \param[in] modes The modes that map to the given \p values. + //! \param[in] values The values to be corrected. + TDouble10Vec corrections(core_t::TTime time, std::size_t currentCount, const TDouble10Vec& modes, const TDouble10Vec& values) const; + + //! Get the memory used by the corrector + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + + //! Get the memory used by the corrector + std::size_t memoryUsage() const; + + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); + +private: + //! Returns the mid point in the bucket that contains \p time. + core_t::TTime calcBucketMidPoint(core_t::TTime time) const; + + //! Calculates an estimate of completeness for a bucket that contains + //! \p time and whose current count is \p currentCount. The returned + //! value is within [0.0, 1.0]. + double estimateBucketCompleteness(core_t::TTime time, std::size_t currentCount) const; + +private: + //! The bucket length + core_t::TTime m_BucketLength; + + //! The decomposition of the overall bucket count + maths::CTimeSeriesDecomposition m_CountTrend; + + //! The mean statistic for the overall bucket count + TMeanAccumulator m_CountMean; }; - } } diff --git a/include/model/CLimits.h b/include/model/CLimits.h index 976a36f5c7..624c423730 100644 --- a/include/model/CLimits.h +++ b/include/model/CLimits.h @@ -9,8 +9,8 @@ #include #include -#include #include +#include #include #include @@ -18,11 +18,8 @@ #include #include - -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { //! \brief //! Holds configurable limits for the models. @@ -33,7 +30,7 @@ namespace model //! //! IMPLEMENTATION DECISIONS:\n //! Configuration of Ml's analytics commands is stored in config -//! files which are similar in format to Windows .ini files but +//! files which are similar in format to Windows .ini files but //! with hash as the comment character instead of semi-colon. //! //! Boost's property_tree package can parse such config files, as @@ -45,111 +42,99 @@ namespace model //! the boost property_tree is copied into separate member //! variables. //! -class MODEL_EXPORT CLimits -{ - public: - //! Default number of events to consume during auto-config - static const size_t DEFAULT_AUTOCONFIG_EVENTS; - - //! Default maximum number of distinct values of a single field before - //! analysis of that field will be halted - static const size_t DEFAULT_ANOMALY_MAX_FIELD_VALUES; - - //! Default maximum number of time buckets to process during anomaly - //! detection before ceasing to output results - static const size_t DEFAULT_ANOMALY_MAX_TIME_BUCKETS; - - //! Default number of examples to display in results tables - static const size_t DEFAULT_RESULTS_MAX_EXAMPLES; - - //! Default threshold for unusual probabilities to be output even if - //! nothing is anomalous on a whole-system basis - static const double DEFAULT_RESULTS_UNUSUAL_PROBABILITY_THRESHOLD; - - public: - //! Default constructor - CLimits(); - - //! Default destructor - ~CLimits(); - - //! Initialise from a config file. This overwrites current settings - //! with any found in the config file. Settings that are not present - //! in the config file will be reset to their default values. - bool init(const std::string &configFile); - - //! Access to settings - size_t autoConfigEvents() const; - size_t anomalyMaxTimeBuckets() const; - size_t maxExamples() const; - double unusualProbabilityThreshold() const; - size_t memoryLimitMB() const; - - //! Access to the resource monitor - CResourceMonitor &resourceMonitor(); - - //! boost::ini_parser doesn't like UTF-8 ini files that begin with byte - //! order markers. This function advances the seek pointer of the - //! stream over a UTF-8 BOM, but only if one exists. - static void skipUtf8Bom(std::ifstream &strm); - - private: - //! Helper method for init(). - template - static bool processSetting(const boost::property_tree::ptree &propTree, - const std::string &iniPath, - const FIELDTYPE &defaultValue, - FIELDTYPE &value) - { - try - { - // This get() will throw an exception if the path isn't found - std::string valueStr(propTree.template get(iniPath)); - - // Use our own string-to-type conversion, because what's built - // into the boost::property_tree is too lax - if (core::CStringUtils::stringToType(valueStr, - value) == false) - { - LOG_ERROR("Invalid value for setting " << iniPath << - " : " << valueStr); - return false; - } +class MODEL_EXPORT CLimits { +public: + //! Default number of events to consume during auto-config + static const size_t DEFAULT_AUTOCONFIG_EVENTS; + + //! Default maximum number of distinct values of a single field before + //! analysis of that field will be halted + static const size_t DEFAULT_ANOMALY_MAX_FIELD_VALUES; + + //! Default maximum number of time buckets to process during anomaly + //! detection before ceasing to output results + static const size_t DEFAULT_ANOMALY_MAX_TIME_BUCKETS; + + //! Default number of examples to display in results tables + static const size_t DEFAULT_RESULTS_MAX_EXAMPLES; + + //! Default threshold for unusual probabilities to be output even if + //! nothing is anomalous on a whole-system basis + static const double DEFAULT_RESULTS_UNUSUAL_PROBABILITY_THRESHOLD; + +public: + //! Default constructor + CLimits(); + + //! Default destructor + ~CLimits(); + + //! Initialise from a config file. This overwrites current settings + //! with any found in the config file. Settings that are not present + //! in the config file will be reset to their default values. + bool init(const std::string& configFile); + + //! Access to settings + size_t autoConfigEvents() const; + size_t anomalyMaxTimeBuckets() const; + size_t maxExamples() const; + double unusualProbabilityThreshold() const; + size_t memoryLimitMB() const; + + //! Access to the resource monitor + CResourceMonitor& resourceMonitor(); + + //! boost::ini_parser doesn't like UTF-8 ini files that begin with byte + //! order markers. This function advances the seek pointer of the + //! stream over a UTF-8 BOM, but only if one exists. + static void skipUtf8Bom(std::ifstream& strm); + +private: + //! Helper method for init(). + template + static bool processSetting(const boost::property_tree::ptree& propTree, + const std::string& iniPath, + const FIELDTYPE& defaultValue, + FIELDTYPE& value) { + try { + // This get() will throw an exception if the path isn't found + std::string valueStr(propTree.template get(iniPath)); + + // Use our own string-to-type conversion, because what's built + // into the boost::property_tree is too lax + if (core::CStringUtils::stringToType(valueStr, value) == false) { + LOG_ERROR("Invalid value for setting " << iniPath << " : " << valueStr); + return false; } - catch (boost::property_tree::ptree_error &) - { - LOG_DEBUG("Using default value (" << defaultValue << - ") for unspecified setting " << iniPath); - value = defaultValue; - } - - return true; + } catch (boost::property_tree::ptree_error&) { + LOG_DEBUG("Using default value (" << defaultValue << ") for unspecified setting " << iniPath); + value = defaultValue; } - private: - //! Number of events to consume during auto-config - size_t m_AutoConfigEvents; + return true; + } - //! Maximum number of time buckets to process during anomaly detection - //! before ceasing to output results - size_t m_AnomalyMaxTimeBuckets; +private: + //! Number of events to consume during auto-config + size_t m_AutoConfigEvents; - //! How many examples should we display in results tables? - size_t m_MaxExamples; + //! Maximum number of time buckets to process during anomaly detection + //! before ceasing to output results + size_t m_AnomalyMaxTimeBuckets; - //! Probability threshold for results to be output - double m_UnusualProbabilityThreshold; + //! How many examples should we display in results tables? + size_t m_MaxExamples; - //! Size of the memory limit for the resource monitor, in MB - size_t m_MemoryLimitMB; - - //! Resource monitor instance - CResourceMonitor m_ResourceMonitor; -}; + //! Probability threshold for results to be output + double m_UnusualProbabilityThreshold; + //! Size of the memory limit for the resource monitor, in MB + size_t m_MemoryLimitMB; + //! Resource monitor instance + CResourceMonitor m_ResourceMonitor; +}; } } #endif // INCLUDED_ml_model_CLimits_h - diff --git a/include/model/CMemoryUsageEstimator.h b/include/model/CMemoryUsageEstimator.h index 017dec7131..e98d0707cd 100644 --- a/include/model/CMemoryUsageEstimator.h +++ b/include/model/CMemoryUsageEstimator.h @@ -17,15 +17,12 @@ #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace model -{ +namespace model { //! \brief Estimate memory usage based on previous model parameters. //! //! DESCRIPTION:\n @@ -40,64 +37,57 @@ namespace model //! forming the input matrix A, which is solved for the memory usage //! calculations in vector B. //! See http://eigen.tuxfamily.org/dox-devel/group__LeastSquares.html -class MODEL_EXPORT CMemoryUsageEstimator -{ - public: - //! Enumeration of the components included in the memory estimate. - enum EComponent - { - E_People = 0, - E_Attributes, - E_Correlations, - E_NumberPredictors - }; - using TSizeArray = boost::array; - using TOptionalSize = boost::optional; - - public: - //! Constructor - CMemoryUsageEstimator(); - - //! Get an estimate of the memory usage based on the given number - //! of different factors which contribute. - //! - //! This can fail, for example if too many estimations have taken - //! place, in which case a TOptionalSize() will be returned, indicating - //! that the caller must add a real value. - TOptionalSize estimate(const TSizeArray &predictors); - - //! Add an actual memory calculation value, along with the values of - //! the predictors. - void addValue(const TSizeArray &predictors, std::size_t memory); - - //! Debug the memory used by this component. - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - - //! Get the memory used by this component. - std::size_t memoryUsage() const; - - //! Persist this component. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - - //! Restore this component. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); - - private: - using TSizeArraySizePr = std::pair; - using TSizeArraySizePrBuf = boost::circular_buffer; - using TSizeArraySizePrBufCItr = TSizeArraySizePrBuf::const_iterator; - - private: - //! Get the maximum amount by which we'll extrapolate the memory usage. - std::size_t maximumExtrapolation(EComponent component) const; - - private: - //! The map of memory component values -> memory usage values - TSizeArraySizePrBuf m_Values; - - //! The number of times estimate has been called since the last - //! real value was added - std::size_t m_NumEstimatesSinceValue; +class MODEL_EXPORT CMemoryUsageEstimator { +public: + //! Enumeration of the components included in the memory estimate. + enum EComponent { E_People = 0, E_Attributes, E_Correlations, E_NumberPredictors }; + using TSizeArray = boost::array; + using TOptionalSize = boost::optional; + +public: + //! Constructor + CMemoryUsageEstimator(); + + //! Get an estimate of the memory usage based on the given number + //! of different factors which contribute. + //! + //! This can fail, for example if too many estimations have taken + //! place, in which case a TOptionalSize() will be returned, indicating + //! that the caller must add a real value. + TOptionalSize estimate(const TSizeArray& predictors); + + //! Add an actual memory calculation value, along with the values of + //! the predictors. + void addValue(const TSizeArray& predictors, std::size_t memory); + + //! Debug the memory used by this component. + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + + //! Get the memory used by this component. + std::size_t memoryUsage() const; + + //! Persist this component. + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Restore this component. + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); + +private: + using TSizeArraySizePr = std::pair; + using TSizeArraySizePrBuf = boost::circular_buffer; + using TSizeArraySizePrBufCItr = TSizeArraySizePrBuf::const_iterator; + +private: + //! Get the maximum amount by which we'll extrapolate the memory usage. + std::size_t maximumExtrapolation(EComponent component) const; + +private: + //! The map of memory component values -> memory usage values + TSizeArraySizePrBuf m_Values; + + //! The number of times estimate has been called since the last + //! real value was added + std::size_t m_NumEstimatesSinceValue; }; } // model diff --git a/include/model/CMetricBucketGatherer.h b/include/model/CMetricBucketGatherer.h index 320f5dca44..566f6bed08 100644 --- a/include/model/CMetricBucketGatherer.h +++ b/include/model/CMetricBucketGatherer.h @@ -21,15 +21,12 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace model -{ +namespace model { class CDataGatherer; class CResourceMonitor; @@ -40,289 +37,280 @@ class CResourceMonitor; //! to characterize metric time series. //! //! \sa CDataGatherer. -class MODEL_EXPORT CMetricBucketGatherer : public CBucketGatherer -{ - public: - using TCategorySizePr = std::pair; - using TCategorySizePrAnyMap = std::map; - using TCategorySizePrAnyMapItr = TCategorySizePrAnyMap::iterator; - using TCategorySizePrAnyMapCItr = TCategorySizePrAnyMap::const_iterator; - - public: - //! \name Life-cycle - //@{ - //! Create a new population metric data gatherer. - //! - //! \param[in] dataGatherer The owning data gatherer. - //! \param[in] summaryCountFieldName If \p summaryMode is E_Manual - //! then this is the name of the field holding the summary count. - //! \param[in] personFieldName The name of the field which identifies - //! people. - //! \param[in] attributeFieldName The name of the field which defines - //! the person attributes. - //! \param[in] valueFieldName The name of the field which contains - //! the metric values. - //! \param[in] influenceFieldNames The field names for which we will - //! compute influences. - //! \param[in] startTime The start of the time interval for which - //! to gather data. - CMetricBucketGatherer(CDataGatherer &dataGatherer, - const std::string &summaryCountFieldName, - const std::string &personFieldName, - const std::string &attributeFieldName, - const std::string &valueFieldName, - const TStrVec &influenceFieldNames, - core_t::TTime startTime); - - //! Construct from a state document. - CMetricBucketGatherer(CDataGatherer &dataGatherer, - const std::string &summaryCountFieldName, - const std::string &personFieldName, - const std::string &attributeFieldName, - const std::string &valueFieldName, - const TStrVec &influenceFieldNames, - core::CStateRestoreTraverser &traverser); - - //! Create a copy that will result in the same persisted state as the - //! original. This is effectively a copy constructor that creates a - //! copy that's only valid for a single purpose. The boolean flag is - //! redundant except to create a signature that will not be mistaken for - //! a general purpose copy constructor. - CMetricBucketGatherer(bool isForPersistence, - const CMetricBucketGatherer &other); - //@} - - //! \name Persistence - //@{ - //! Persist state by passing information to the supplied inserter - virtual void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - - //! Fill in the state from \p traverser. - virtual bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); - - //! Create a clone of this data gatherer that will result in the same - //! persisted state. The clone may be incomplete in ways that do not - //! affect the persisted representation, and must not be used for any - //! other purpose. - //! \warning The caller owns the object returned. - virtual CBucketGatherer *cloneForPersistence() const; - - //! The persistence tag name of this derived class. - virtual const std::string& persistenceTag() const; - - private: - //! Internal restore function. - bool acceptRestoreTraverserInternal(core::CStateRestoreTraverser &traverser, - bool isCurrentVersion); - //@} - - public: - //! \name Fields - //@{ - //! Get the person field name. - //! - //! This is the common field in all searches "along" which the - //! probabilities are aggregated, i.e. the "over" field name for - //! population searches and the "by" field name for individual - //! searches. - virtual const std::string &personFieldName() const; - - //! Get the attribute field name if one exists, i.e. the "by" for - //! population searches, field name and returns empty otherwise. - virtual const std::string &attributeFieldName() const; - - //! Returns an empty string. - virtual const std::string &valueFieldName() const; - - //! Get an iterator at the beginning the influencing field names. - virtual TStrVecCItr beginInfluencers() const; - - //! Get an iterator at the end of the influencing field names. - virtual TStrVecCItr endInfluencers() const; - - //! Get the fields for which to gather data. - //! - //! For individual searches this gets the field which defines the - //! categories whose counts are being analyzed. For population - //! searches this gets the fields identifying the people and person - //! attributes which are being analyzed. An empty string acts like - //! a wild card and matches all records. This is used for analysis - //! which is attribute independent such as total count. - virtual const TStrVec &fieldsOfInterest() const; - //@} - - //! Get a description of the search. - virtual std::string description() const; - - //! \name Update - //@{ - //! Process the specified fields. - //! - //! \note For individual searches \p fieldValues should contain two - //! fields. The first field should contain the by clause field value - //! or a generic name if none was specified. The second field should - //! contain a number corresponding to the metric value. For population - //! searches \p fieldValues should contain three fields. The first - //! field should contain the over clause field value. The second field - //! should the by clause field value or a generic name if none was - //! specified. The third field should contain a number corresponding - //! to the metric value. - virtual bool processFields(const TStrCPtrVec &fieldValues, - CEventData &result, - CResourceMonitor &resourceMonitor); - //@} - - //! \name Person - //@{ - //! Stop gathering data on the people identified by \p peopleToRemove. - virtual void recyclePeople(const TSizeVec &peopleToRemove); - - //! Remove all traces of people whose identifiers are greater than - //! or equal to \p lowestPersonToRemove. - virtual void removePeople(std::size_t lowestPersonToRemove); - //@} - - //! \name Attribute - //@{ - //! Stop gathering data on the attributes identified by \p attributesToRemove. - virtual void recycleAttributes(const TSizeVec &attributesToRemove); - - //! Remove all traces of attributes whose identifiers are greater - //! than or equal to \p lowestAttributeToRemove. - virtual void removeAttributes(std::size_t lowestAttributeToRemove); - //@} - - //! Get the checksum of this gatherer. - virtual uint64_t checksum() const; - - //! Debug the memory used by this object. - virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - - //! Get the memory used by this object. - virtual std::size_t memoryUsage() const; - - //! Get the static size of this object - used for virtual hierarchies - virtual std::size_t staticSize() const; - - //! Clear this data gatherer. - virtual void clear(); - - //! Reset bucket and return true if bucket was successfully reset or false otherwise. - virtual bool resetBucket(core_t::TTime bucketStart); - - //! Release memory that is no longer needed - virtual void releaseMemory(core_t::TTime samplingCutoffTime); - - //! \name Features - //@{ - //! Get the raw data for all features for the bucketing time interval - //! containing \p time. - //! - //! \param[in] time The time of interest. - //! \param[out] result Filled in with the feature data at \p time. - virtual void featureData(core_t::TTime time, core_t::TTime bucketLength, - TFeatureAnyPrVec &result) const; - //@} - - private: - //! Create samples if possible for the bucket pointed out by \p time. - virtual void sample(core_t::TTime time); - - //! Resize the necessary data structures so they can accommodate - //! the person and attribute identified by \p pid and \p cid, - //! respectively. - //! - //! \param[in] pid The identifier of the person to accommodate. - //! \param[in] cid The identifier of the attribute to accommodate. - virtual void resize(std::size_t pid, std::size_t cid); - - //! Record the arrival of \p values for attribute identified by - //! \p cid and person identified by \p pid. - //! - //! \param[in] pid The identifier of the person who generated - //! the value. - //! \param[in] cid The identifier of the value's attribute. - //! \param[in] time The time of the \p values. - //! \param[in] values The metric statistic value(s) - //! \param[in] count The number of measurements in the metric - //! statistic. - //! \param[in] stringValue Ignored. - //! \param[in] influences The influencing field values which - //! label the value. - virtual void addValue(std::size_t pid, - std::size_t cid, - core_t::TTime time, - const CEventData::TDouble1VecArray &values, - std::size_t count, - const CEventData::TOptionalStr &stringValue, - const TStoredStringPtrVec &influences); - - //! Start a new bucket. - virtual void startNewBucket(core_t::TTime time, bool skipUpdates); - - //! Initialize the field names collection. - //! initializeFieldNamesPart2() must be called after this. - //! In the event that the data gatherer is being restored from persisted - //! state, the sequence must be: - //! 1) initializeFieldNamesPart1() - //! 2) restore state - //! 3) initializeFieldNamesPart2() - void initializeFieldNamesPart1(const std::string &personFieldName, - const std::string &attributeFieldName, - const TStrVec &influenceFieldNames); - - //! Initialize the field names collection. - //! initializeFieldNamesPart1() must be called before this. - //! In the event that the data gatherer is being restored from persisted - //! state, the sequence must be: - //! 1) initializeFieldNamesPart1() - //! 2) restore state - //! 3) initializeFieldNamesPart2() - void initializeFieldNamesPart2(const std::string &valueFieldName, - const std::string &summaryCountFieldName); - - //! Initialize the feature data gatherers. - void initializeFeatureData(); - - private: - - //! The metric value field name. This is held separately to - //! m_FieldNames because in the case of summarization the field - //! names holding the summarized values will be mangled. - std::string m_ValueFieldName; - - //! The names of the fields of interest. - //! - //! The entries in order are: - //! -# The name of the field which identifies people, - //! -# For population models only, the name of the field which - //! identifies people's attributes, - //! -# The name of zero or more influencing fields, - //! -# The name of the field holding the count followed by the - //! field name(s) of the field(s) which hold the statistics - //! themselves, which must (for those that are present) be - //! ordered mean, min, max, sum. - //! -# For the API with user defined pre-summarisation, the name - //! of the field which holds the count then the name of the field - //! which holds the statistic value, - //! -# Otherwise the name of the field which holds the metric value. - TStrVec m_FieldNames; - - //! The position of the first influencing field. - std::size_t m_BeginInfluencingFields; - - //! The position of the first count/value field. - std::size_t m_BeginValueFields; - - //! For summarized values, this stores the metric categories - //! corresponding to the summarized field names in m_FieldNames; - //! for non-summarized input this will be empty - TMetricCategoryVec m_FieldMetricCategories; - - //! The data features we are gathering. - TCategorySizePrAnyMap m_FeatureData; +class MODEL_EXPORT CMetricBucketGatherer : public CBucketGatherer { +public: + using TCategorySizePr = std::pair; + using TCategorySizePrAnyMap = std::map; + using TCategorySizePrAnyMapItr = TCategorySizePrAnyMap::iterator; + using TCategorySizePrAnyMapCItr = TCategorySizePrAnyMap::const_iterator; + +public: + //! \name Life-cycle + //@{ + //! Create a new population metric data gatherer. + //! + //! \param[in] dataGatherer The owning data gatherer. + //! \param[in] summaryCountFieldName If \p summaryMode is E_Manual + //! then this is the name of the field holding the summary count. + //! \param[in] personFieldName The name of the field which identifies + //! people. + //! \param[in] attributeFieldName The name of the field which defines + //! the person attributes. + //! \param[in] valueFieldName The name of the field which contains + //! the metric values. + //! \param[in] influenceFieldNames The field names for which we will + //! compute influences. + //! \param[in] startTime The start of the time interval for which + //! to gather data. + CMetricBucketGatherer(CDataGatherer& dataGatherer, + const std::string& summaryCountFieldName, + const std::string& personFieldName, + const std::string& attributeFieldName, + const std::string& valueFieldName, + const TStrVec& influenceFieldNames, + core_t::TTime startTime); + + //! Construct from a state document. + CMetricBucketGatherer(CDataGatherer& dataGatherer, + const std::string& summaryCountFieldName, + const std::string& personFieldName, + const std::string& attributeFieldName, + const std::string& valueFieldName, + const TStrVec& influenceFieldNames, + core::CStateRestoreTraverser& traverser); + + //! Create a copy that will result in the same persisted state as the + //! original. This is effectively a copy constructor that creates a + //! copy that's only valid for a single purpose. The boolean flag is + //! redundant except to create a signature that will not be mistaken for + //! a general purpose copy constructor. + CMetricBucketGatherer(bool isForPersistence, const CMetricBucketGatherer& other); + //@} + + //! \name Persistence + //@{ + //! Persist state by passing information to the supplied inserter + virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Fill in the state from \p traverser. + virtual bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); + + //! Create a clone of this data gatherer that will result in the same + //! persisted state. The clone may be incomplete in ways that do not + //! affect the persisted representation, and must not be used for any + //! other purpose. + //! \warning The caller owns the object returned. + virtual CBucketGatherer* cloneForPersistence() const; + + //! The persistence tag name of this derived class. + virtual const std::string& persistenceTag() const; + +private: + //! Internal restore function. + bool acceptRestoreTraverserInternal(core::CStateRestoreTraverser& traverser, bool isCurrentVersion); + //@} + +public: + //! \name Fields + //@{ + //! Get the person field name. + //! + //! This is the common field in all searches "along" which the + //! probabilities are aggregated, i.e. the "over" field name for + //! population searches and the "by" field name for individual + //! searches. + virtual const std::string& personFieldName() const; + + //! Get the attribute field name if one exists, i.e. the "by" for + //! population searches, field name and returns empty otherwise. + virtual const std::string& attributeFieldName() const; + + //! Returns an empty string. + virtual const std::string& valueFieldName() const; + + //! Get an iterator at the beginning the influencing field names. + virtual TStrVecCItr beginInfluencers() const; + + //! Get an iterator at the end of the influencing field names. + virtual TStrVecCItr endInfluencers() const; + + //! Get the fields for which to gather data. + //! + //! For individual searches this gets the field which defines the + //! categories whose counts are being analyzed. For population + //! searches this gets the fields identifying the people and person + //! attributes which are being analyzed. An empty string acts like + //! a wild card and matches all records. This is used for analysis + //! which is attribute independent such as total count. + virtual const TStrVec& fieldsOfInterest() const; + //@} + + //! Get a description of the search. + virtual std::string description() const; + + //! \name Update + //@{ + //! Process the specified fields. + //! + //! \note For individual searches \p fieldValues should contain two + //! fields. The first field should contain the by clause field value + //! or a generic name if none was specified. The second field should + //! contain a number corresponding to the metric value. For population + //! searches \p fieldValues should contain three fields. The first + //! field should contain the over clause field value. The second field + //! should the by clause field value or a generic name if none was + //! specified. The third field should contain a number corresponding + //! to the metric value. + virtual bool processFields(const TStrCPtrVec& fieldValues, CEventData& result, CResourceMonitor& resourceMonitor); + //@} + + //! \name Person + //@{ + //! Stop gathering data on the people identified by \p peopleToRemove. + virtual void recyclePeople(const TSizeVec& peopleToRemove); + + //! Remove all traces of people whose identifiers are greater than + //! or equal to \p lowestPersonToRemove. + virtual void removePeople(std::size_t lowestPersonToRemove); + //@} + + //! \name Attribute + //@{ + //! Stop gathering data on the attributes identified by \p attributesToRemove. + virtual void recycleAttributes(const TSizeVec& attributesToRemove); + + //! Remove all traces of attributes whose identifiers are greater + //! than or equal to \p lowestAttributeToRemove. + virtual void removeAttributes(std::size_t lowestAttributeToRemove); + //@} + + //! Get the checksum of this gatherer. + virtual uint64_t checksum() const; + + //! Debug the memory used by this object. + virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + + //! Get the memory used by this object. + virtual std::size_t memoryUsage() const; + + //! Get the static size of this object - used for virtual hierarchies + virtual std::size_t staticSize() const; + + //! Clear this data gatherer. + virtual void clear(); + + //! Reset bucket and return true if bucket was successfully reset or false otherwise. + virtual bool resetBucket(core_t::TTime bucketStart); + + //! Release memory that is no longer needed + virtual void releaseMemory(core_t::TTime samplingCutoffTime); + + //! \name Features + //@{ + //! Get the raw data for all features for the bucketing time interval + //! containing \p time. + //! + //! \param[in] time The time of interest. + //! \param[out] result Filled in with the feature data at \p time. + virtual void featureData(core_t::TTime time, core_t::TTime bucketLength, TFeatureAnyPrVec& result) const; + //@} + +private: + //! Create samples if possible for the bucket pointed out by \p time. + virtual void sample(core_t::TTime time); + + //! Resize the necessary data structures so they can accommodate + //! the person and attribute identified by \p pid and \p cid, + //! respectively. + //! + //! \param[in] pid The identifier of the person to accommodate. + //! \param[in] cid The identifier of the attribute to accommodate. + virtual void resize(std::size_t pid, std::size_t cid); + + //! Record the arrival of \p values for attribute identified by + //! \p cid and person identified by \p pid. + //! + //! \param[in] pid The identifier of the person who generated + //! the value. + //! \param[in] cid The identifier of the value's attribute. + //! \param[in] time The time of the \p values. + //! \param[in] values The metric statistic value(s) + //! \param[in] count The number of measurements in the metric + //! statistic. + //! \param[in] stringValue Ignored. + //! \param[in] influences The influencing field values which + //! label the value. + virtual void addValue(std::size_t pid, + std::size_t cid, + core_t::TTime time, + const CEventData::TDouble1VecArray& values, + std::size_t count, + const CEventData::TOptionalStr& stringValue, + const TStoredStringPtrVec& influences); + + //! Start a new bucket. + virtual void startNewBucket(core_t::TTime time, bool skipUpdates); + + //! Initialize the field names collection. + //! initializeFieldNamesPart2() must be called after this. + //! In the event that the data gatherer is being restored from persisted + //! state, the sequence must be: + //! 1) initializeFieldNamesPart1() + //! 2) restore state + //! 3) initializeFieldNamesPart2() + void initializeFieldNamesPart1(const std::string& personFieldName, + const std::string& attributeFieldName, + const TStrVec& influenceFieldNames); + + //! Initialize the field names collection. + //! initializeFieldNamesPart1() must be called before this. + //! In the event that the data gatherer is being restored from persisted + //! state, the sequence must be: + //! 1) initializeFieldNamesPart1() + //! 2) restore state + //! 3) initializeFieldNamesPart2() + void initializeFieldNamesPart2(const std::string& valueFieldName, const std::string& summaryCountFieldName); + + //! Initialize the feature data gatherers. + void initializeFeatureData(); + +private: + //! The metric value field name. This is held separately to + //! m_FieldNames because in the case of summarization the field + //! names holding the summarized values will be mangled. + std::string m_ValueFieldName; + + //! The names of the fields of interest. + //! + //! The entries in order are: + //! -# The name of the field which identifies people, + //! -# For population models only, the name of the field which + //! identifies people's attributes, + //! -# The name of zero or more influencing fields, + //! -# The name of the field holding the count followed by the + //! field name(s) of the field(s) which hold the statistics + //! themselves, which must (for those that are present) be + //! ordered mean, min, max, sum. + //! -# For the API with user defined pre-summarisation, the name + //! of the field which holds the count then the name of the field + //! which holds the statistic value, + //! -# Otherwise the name of the field which holds the metric value. + TStrVec m_FieldNames; + + //! The position of the first influencing field. + std::size_t m_BeginInfluencingFields; + + //! The position of the first count/value field. + std::size_t m_BeginValueFields; + + //! For summarized values, this stores the metric categories + //! corresponding to the summarized field names in m_FieldNames; + //! for non-summarized input this will be empty + TMetricCategoryVec m_FieldMetricCategories; + + //! The data features we are gathering. + TCategorySizePrAnyMap m_FeatureData; }; - } } diff --git a/include/model/CMetricModel.h b/include/model/CMetricModel.h index 1e365ddb04..eecae258e3 100644 --- a/include/model/CMetricModel.h +++ b/include/model/CMetricModel.h @@ -22,20 +22,15 @@ #include - -namespace -{ +namespace { class CMockMetricModel; } -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace model -{ +namespace model { class CModelFactory; //! \brief The metric model common functionality. @@ -55,285 +50,272 @@ class CModelFactory; //! //! It assumes data are supplied in time order since this means minimal //! state can be maintained. -class MODEL_EXPORT CMetricModel : public CIndividualModel -{ - public: - using TFeatureData = SMetricFeatureData; - using TSizeFeatureDataPr = std::pair; - using TSizeFeatureDataPrVec = std::vector; - using TFeatureSizeFeatureDataPrVecPr = std::pair; - using TFeatureSizeFeatureDataPrVecPrVec = std::vector; - - //! The statistics we maintain about a bucketing interval. - struct MODEL_EXPORT SBucketStats - { - explicit SBucketStats(core_t::TTime startTime); - - //! The start time of this bucket. - core_t::TTime s_StartTime; - //! The non-zero person counts in the current bucket. - TSizeUInt64PrVec s_PersonCounts; - //! The total count in the current bucket. - uint64_t s_TotalCount; - //! The feature data samples for the current bucketing interval. - TFeatureSizeFeatureDataPrVecPrVec s_FeatureData; - //! A cache of the corrections applied to interim results. - //! The key is for non-correlated corrections - //! or for correlated corrections - mutable TFeatureSizeSizeTripleDouble1VecUMap s_InterimCorrections; - }; - - public: - //! \name Life-cycle - //@{ - //! \param[in] params The global configuration parameters. - //! \param[in] dataGatherer The object that gathers time series data. - //! \param[in] newFeatureModels The new models to use for each feature. - //! \param[in] newFeatureCorrelateModelPriors The prior to use for the - //! new model of correlates for each feature. - //! \param[in] featureCorrelatesModels The model of all correlates for - //! each feature. - //! \param[in] influenceCalculators The influence calculators to use - //! for each feature. - CMetricModel(const SModelParams ¶ms, - const TDataGathererPtr &dataGatherer, - const TFeatureMathsModelPtrPrVec &newFeatureModels, - const TFeatureMultivariatePriorPtrPrVec &newFeatureCorrelateModelPriors, - const TFeatureCorrelationsPtrPrVec &featureCorrelatesModels, - const TFeatureInfluenceCalculatorCPtrPrVecVec &influenceCalculators); - - //! Constructor used for restoring persisted models. - //! - //! \note The current bucket statistics are left default initialized - //! and so must be sampled for before this model can be used. - CMetricModel(const SModelParams ¶ms, - const TDataGathererPtr &dataGatherer, - const TFeatureMathsModelPtrPrVec &newFeatureModels, - const TFeatureMultivariatePriorPtrPrVec &newFeatureCorrelateModelPriors, - const TFeatureCorrelationsPtrPrVec &featureCorrelatesModels, - const TFeatureInfluenceCalculatorCPtrPrVecVec &influenceCalculators, - core::CStateRestoreTraverser &traverser); - - //! Create a copy that will result in the same persisted state as the - //! original. This is effectively a copy constructor that creates a - //! copy that's only valid for a single purpose. The boolean flag is - //! redundant except to create a signature that will not be mistaken - //! for a general purpose copy constructor. - CMetricModel(bool isForPersistence, const CMetricModel &other); - //@} - - //! \name Persistence - //@{ - //! Persist state by passing information to \p inserter. - virtual void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - - //! Restore reading state from \p traverser. - virtual bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); - - //! Create a clone of this model that will result in the same persisted - //! state. The clone may be incomplete in ways that do not affect the - //! persisted representation, and must not be used for any other - //! purpose. - //! \warning The caller owns the object returned. - virtual CAnomalyDetectorModel *cloneForPersistence() const; - //@} - - //! Get the model category. - virtual model_t::EModelType category() const; - - //! Returns false. - virtual bool isEventRate() const; - - //! Returns true. - virtual bool isMetric() const; - - //! \name Bucket Statistics - //@{ - //! Returns null. - virtual TOptionalDouble baselineBucketCount(std::size_t pid) const; - - //! Get the value of \p feature for the person identified - //! by \p pid in the bucketing interval containing \p time. - //! - //! \param[in] feature The feature of interest. - //! \param[in] pid The identifier of the person of interest. - //! \param[in] cid Ignored. - //! \param[in] time The time of interest. - virtual TDouble1Vec currentBucketValue(model_t::EFeature feature, - std::size_t pid, - std::size_t cid, - core_t::TTime time) const; - - //! Get the baseline bucket value of \p feature for the person - //! identified by \p pid as of the start of the current bucketing - //! interval. - //! - //! \param[in] feature The feature of interest. - //! \param[in] pid The identifier of the person of interest. - //! \param[in] cid Ignored. - //! \param[in] type A description of the type of result for which - //! to get the baseline. See CResultType for more details. - //! \param[in] correlated The correlated series' identifiers and - //! their values if any. - //! \param[in] time The time of interest. - virtual TDouble1Vec baselineBucketMean(model_t::EFeature feature, - std::size_t pid, - std::size_t cid, - model_t::CResultType type, - const TSizeDoublePr1Vec &correlated, - core_t::TTime time) const; - - //@} - - //! \name Person - //@{ - //! Get the person unique identifiers which have a feature value - //! in the bucketing time interval including \p time. - virtual void currentBucketPersonIds(core_t::TTime time, TSizeVec &result) const; - //@} - - //! \name Update - //@{ - //! Sample any state needed by computeProbablity in the time - //! interval [\p startTime, \p endTime] but do not update the - //! model. This is needed by the results preview. - //! - //! \param[in] startTime The start of the time interval to sample. - //! \param[in] endTime The end of the time interval to sample. - virtual void sampleBucketStatistics(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor &resourceMonitor); - - //! Update the model with features samples from the time interval - //! [\p startTime, \p endTime]. - //! - //! \param[in] startTime The start of the time interval to sample. - //! \param[in] endTime The end of the time interval to sample. - //! \param[in] resourceMonitor The resourceMonitor. - virtual void sample(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor &resourceMonitor); - //@} - - //! \name Probability - //@{ - //! Compute the probability of seeing the metric values in the - //! time interval [\p startTime, \p endTime] for the person - //! identified by \p pid. - //! - //! \param[in] pid The identifier of the person of interest. - //! \param[in] startTime The start of the time interval of interest. - //! \param[in] endTime The end of the time interval of interest. - //! \param[in] partitioningFields The partitioning field (name, value) - //! pairs for which to compute the the probability. - //! \param[in] numberAttributeProbabilities Ignored. - //! \param[out] result A structure containing the probability, - //! the smallest \p numberAttributeProbabilities attribute - //! probabilities, the influences and any extra descriptive data - virtual bool computeProbability(std::size_t pid, - core_t::TTime startTime, - core_t::TTime endTime, - CPartitioningFields &partitioningFields, - std::size_t numberAttributeProbabilities, - SAnnotatedProbability &result) const; - //@} - - //! Get the checksum of this model. - //! - //! \param[in] includeCurrentBucketStats If true then include - //! the current bucket statistics. (This is designed to handle - //! serialization, for which we don't serialize the current - //! bucket statistics.) - virtual uint64_t checksum(bool includeCurrentBucketStats = true) const; - - //! Debug the memory used by this model. - virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - - //! Get the memory used by this model. - virtual std::size_t memoryUsage() const; - - //! Get the static size of this object - used for virtual hierarchies. - virtual std::size_t staticSize() const; - - //! Get the non-estimated value of the the memory used by this model. - virtual std::size_t computeMemoryUsage() const; - - //! Get a view of the internals of the model for visualization. - virtual CModelDetailsViewPtr details() const; - - //! Get the value of the \p feature of the person identified - //! by \p pid for the bucketing interval containing \p time. - const TFeatureData *featureData(model_t::EFeature feature, - std::size_t pid, - core_t::TTime time) const; - - private: - using TOptionalSample = boost::optional; - using TTime2Vec = core::CSmallVector; - using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; - using TMeanAccumulator1Vec = core::CSmallVector; - - private: - //! Get the start time of the current bucket. - virtual core_t::TTime currentBucketStartTime() const; - - //! Set the start time of the current bucket. - virtual void currentBucketStartTime(core_t::TTime time); - - //! Get the total count of the current bucket. - uint64_t currentBucketTotalCount() const; - - //! Get the interim corrections of the current bucket. - TFeatureSizeSizeTripleDouble1VecUMap ¤tBucketInterimCorrections() const; - - //! Get the person counts in the current bucket. - virtual const TSizeUInt64PrVec ¤tBucketPersonCounts() const; - - //! Get writable person counts in the current bucket. - virtual TSizeUInt64PrVec ¤tBucketPersonCounts(); - - //! Set the current bucket total count. - virtual void currentBucketTotalCount(uint64_t totalCount); - - //! Create the time series models for "n" newly observed people. - virtual void createNewModels(std::size_t n, std::size_t m); - - //! Reinitialize the time series models for recycled people. - virtual void updateRecycledModels(); - - //! Clear out large state objects for people that are pruned. - virtual void clearPrunedResources(const TSizeVec &people, - const TSizeVec &attributes); - - //! Check if there are correlates for \p feature and the person - //! identified by \p pid. - bool correlates(model_t::EFeature feature, std::size_t pid, core_t::TTime time) const; - - //! Fill in the probability calculation parameters for \p feature - //! and person identified by \p pid. - void fill(model_t::EFeature feature, - std::size_t pid, - core_t::TTime bucketTime, - bool interim, - CProbabilityAndInfluenceCalculator::SParams ¶ms) const; - - //! Fill in the probability calculation parameters for the correlates - //! of \p feature and the person identified by \p pid. - void fill(model_t::EFeature feature, - std::size_t pid, - core_t::TTime bucketTime, - bool interim, - CProbabilityAndInfluenceCalculator::SCorrelateParams ¶ms, - TStrCRefDouble1VecDouble1VecPrPrVecVecVec &correlateInfluenceValues) const; - - private: - //! The statistics we maintain about the bucket. - SBucketStats m_CurrentBucketStats; - - friend class CMetricModelDetailsView; - friend class ::CMockMetricModel; +class MODEL_EXPORT CMetricModel : public CIndividualModel { +public: + using TFeatureData = SMetricFeatureData; + using TSizeFeatureDataPr = std::pair; + using TSizeFeatureDataPrVec = std::vector; + using TFeatureSizeFeatureDataPrVecPr = std::pair; + using TFeatureSizeFeatureDataPrVecPrVec = std::vector; + + //! The statistics we maintain about a bucketing interval. + struct MODEL_EXPORT SBucketStats { + explicit SBucketStats(core_t::TTime startTime); + + //! The start time of this bucket. + core_t::TTime s_StartTime; + //! The non-zero person counts in the current bucket. + TSizeUInt64PrVec s_PersonCounts; + //! The total count in the current bucket. + uint64_t s_TotalCount; + //! The feature data samples for the current bucketing interval. + TFeatureSizeFeatureDataPrVecPrVec s_FeatureData; + //! A cache of the corrections applied to interim results. + //! The key is for non-correlated corrections + //! or for correlated corrections + mutable TFeatureSizeSizeTripleDouble1VecUMap s_InterimCorrections; + }; + +public: + //! \name Life-cycle + //@{ + //! \param[in] params The global configuration parameters. + //! \param[in] dataGatherer The object that gathers time series data. + //! \param[in] newFeatureModels The new models to use for each feature. + //! \param[in] newFeatureCorrelateModelPriors The prior to use for the + //! new model of correlates for each feature. + //! \param[in] featureCorrelatesModels The model of all correlates for + //! each feature. + //! \param[in] influenceCalculators The influence calculators to use + //! for each feature. + CMetricModel(const SModelParams& params, + const TDataGathererPtr& dataGatherer, + const TFeatureMathsModelPtrPrVec& newFeatureModels, + const TFeatureMultivariatePriorPtrPrVec& newFeatureCorrelateModelPriors, + const TFeatureCorrelationsPtrPrVec& featureCorrelatesModels, + const TFeatureInfluenceCalculatorCPtrPrVecVec& influenceCalculators); + + //! Constructor used for restoring persisted models. + //! + //! \note The current bucket statistics are left default initialized + //! and so must be sampled for before this model can be used. + CMetricModel(const SModelParams& params, + const TDataGathererPtr& dataGatherer, + const TFeatureMathsModelPtrPrVec& newFeatureModels, + const TFeatureMultivariatePriorPtrPrVec& newFeatureCorrelateModelPriors, + const TFeatureCorrelationsPtrPrVec& featureCorrelatesModels, + const TFeatureInfluenceCalculatorCPtrPrVecVec& influenceCalculators, + core::CStateRestoreTraverser& traverser); + + //! Create a copy that will result in the same persisted state as the + //! original. This is effectively a copy constructor that creates a + //! copy that's only valid for a single purpose. The boolean flag is + //! redundant except to create a signature that will not be mistaken + //! for a general purpose copy constructor. + CMetricModel(bool isForPersistence, const CMetricModel& other); + //@} + + //! \name Persistence + //@{ + //! Persist state by passing information to \p inserter. + virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Restore reading state from \p traverser. + virtual bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); + + //! Create a clone of this model that will result in the same persisted + //! state. The clone may be incomplete in ways that do not affect the + //! persisted representation, and must not be used for any other + //! purpose. + //! \warning The caller owns the object returned. + virtual CAnomalyDetectorModel* cloneForPersistence() const; + //@} + + //! Get the model category. + virtual model_t::EModelType category() const; + + //! Returns false. + virtual bool isEventRate() const; + + //! Returns true. + virtual bool isMetric() const; + + //! \name Bucket Statistics + //@{ + //! Returns null. + virtual TOptionalDouble baselineBucketCount(std::size_t pid) const; + + //! Get the value of \p feature for the person identified + //! by \p pid in the bucketing interval containing \p time. + //! + //! \param[in] feature The feature of interest. + //! \param[in] pid The identifier of the person of interest. + //! \param[in] cid Ignored. + //! \param[in] time The time of interest. + virtual TDouble1Vec currentBucketValue(model_t::EFeature feature, std::size_t pid, std::size_t cid, core_t::TTime time) const; + + //! Get the baseline bucket value of \p feature for the person + //! identified by \p pid as of the start of the current bucketing + //! interval. + //! + //! \param[in] feature The feature of interest. + //! \param[in] pid The identifier of the person of interest. + //! \param[in] cid Ignored. + //! \param[in] type A description of the type of result for which + //! to get the baseline. See CResultType for more details. + //! \param[in] correlated The correlated series' identifiers and + //! their values if any. + //! \param[in] time The time of interest. + virtual TDouble1Vec baselineBucketMean(model_t::EFeature feature, + std::size_t pid, + std::size_t cid, + model_t::CResultType type, + const TSizeDoublePr1Vec& correlated, + core_t::TTime time) const; + + //@} + + //! \name Person + //@{ + //! Get the person unique identifiers which have a feature value + //! in the bucketing time interval including \p time. + virtual void currentBucketPersonIds(core_t::TTime time, TSizeVec& result) const; + //@} + + //! \name Update + //@{ + //! Sample any state needed by computeProbablity in the time + //! interval [\p startTime, \p endTime] but do not update the + //! model. This is needed by the results preview. + //! + //! \param[in] startTime The start of the time interval to sample. + //! \param[in] endTime The end of the time interval to sample. + virtual void sampleBucketStatistics(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor); + + //! Update the model with features samples from the time interval + //! [\p startTime, \p endTime]. + //! + //! \param[in] startTime The start of the time interval to sample. + //! \param[in] endTime The end of the time interval to sample. + //! \param[in] resourceMonitor The resourceMonitor. + virtual void sample(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor); + //@} + + //! \name Probability + //@{ + //! Compute the probability of seeing the metric values in the + //! time interval [\p startTime, \p endTime] for the person + //! identified by \p pid. + //! + //! \param[in] pid The identifier of the person of interest. + //! \param[in] startTime The start of the time interval of interest. + //! \param[in] endTime The end of the time interval of interest. + //! \param[in] partitioningFields The partitioning field (name, value) + //! pairs for which to compute the the probability. + //! \param[in] numberAttributeProbabilities Ignored. + //! \param[out] result A structure containing the probability, + //! the smallest \p numberAttributeProbabilities attribute + //! probabilities, the influences and any extra descriptive data + virtual bool computeProbability(std::size_t pid, + core_t::TTime startTime, + core_t::TTime endTime, + CPartitioningFields& partitioningFields, + std::size_t numberAttributeProbabilities, + SAnnotatedProbability& result) const; + //@} + + //! Get the checksum of this model. + //! + //! \param[in] includeCurrentBucketStats If true then include + //! the current bucket statistics. (This is designed to handle + //! serialization, for which we don't serialize the current + //! bucket statistics.) + virtual uint64_t checksum(bool includeCurrentBucketStats = true) const; + + //! Debug the memory used by this model. + virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + + //! Get the memory used by this model. + virtual std::size_t memoryUsage() const; + + //! Get the static size of this object - used for virtual hierarchies. + virtual std::size_t staticSize() const; + + //! Get the non-estimated value of the the memory used by this model. + virtual std::size_t computeMemoryUsage() const; + + //! Get a view of the internals of the model for visualization. + virtual CModelDetailsViewPtr details() const; + + //! Get the value of the \p feature of the person identified + //! by \p pid for the bucketing interval containing \p time. + const TFeatureData* featureData(model_t::EFeature feature, std::size_t pid, core_t::TTime time) const; + +private: + using TOptionalSample = boost::optional; + using TTime2Vec = core::CSmallVector; + using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; + using TMeanAccumulator1Vec = core::CSmallVector; + +private: + //! Get the start time of the current bucket. + virtual core_t::TTime currentBucketStartTime() const; + + //! Set the start time of the current bucket. + virtual void currentBucketStartTime(core_t::TTime time); + + //! Get the total count of the current bucket. + uint64_t currentBucketTotalCount() const; + + //! Get the interim corrections of the current bucket. + TFeatureSizeSizeTripleDouble1VecUMap& currentBucketInterimCorrections() const; + + //! Get the person counts in the current bucket. + virtual const TSizeUInt64PrVec& currentBucketPersonCounts() const; + + //! Get writable person counts in the current bucket. + virtual TSizeUInt64PrVec& currentBucketPersonCounts(); + + //! Set the current bucket total count. + virtual void currentBucketTotalCount(uint64_t totalCount); + + //! Create the time series models for "n" newly observed people. + virtual void createNewModels(std::size_t n, std::size_t m); + + //! Reinitialize the time series models for recycled people. + virtual void updateRecycledModels(); + + //! Clear out large state objects for people that are pruned. + virtual void clearPrunedResources(const TSizeVec& people, const TSizeVec& attributes); + + //! Check if there are correlates for \p feature and the person + //! identified by \p pid. + bool correlates(model_t::EFeature feature, std::size_t pid, core_t::TTime time) const; + + //! Fill in the probability calculation parameters for \p feature + //! and person identified by \p pid. + void fill(model_t::EFeature feature, + std::size_t pid, + core_t::TTime bucketTime, + bool interim, + CProbabilityAndInfluenceCalculator::SParams& params) const; + + //! Fill in the probability calculation parameters for the correlates + //! of \p feature and the person identified by \p pid. + void fill(model_t::EFeature feature, + std::size_t pid, + core_t::TTime bucketTime, + bool interim, + CProbabilityAndInfluenceCalculator::SCorrelateParams& params, + TStrCRefDouble1VecDouble1VecPrPrVecVecVec& correlateInfluenceValues) const; + +private: + //! The statistics we maintain about the bucket. + SBucketStats m_CurrentBucketStats; + + friend class CMetricModelDetailsView; + friend class ::CMockMetricModel; }; - } } diff --git a/include/model/CMetricModelFactory.h b/include/model/CMetricModelFactory.h index ae747df2f5..3843e94672 100644 --- a/include/model/CMetricModelFactory.h +++ b/include/model/CMetricModelFactory.h @@ -10,14 +10,11 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStateRestoreTraverser; } -namespace model -{ +namespace model { //! \brief A factory class implementation for CMetricModel. //! @@ -25,168 +22,161 @@ namespace model //! This concrete factory implements the methods to make new models //! and data gatherers, and create default priors suitable for the //! CMetricModel class. -class MODEL_EXPORT CMetricModelFactory : public CModelFactory -{ - public: - //! Lift all overloads into scope. - using CModelFactory::defaultMultivariatePrior; - using CModelFactory::defaultPrior; - - public: - //! \note The default arguments supplied to the constructor are - //! intended for unit testing and are not necessarily good defaults. - //! The CModelConfig class is responsible for providing sensible - //! default values for the factory for use within our products. - explicit CMetricModelFactory(const SModelParams ¶ms, - model_t::ESummaryMode summaryMode = model_t::E_None, - const std::string &summaryCountFieldName = ""); - - //! Create a copy of the factory owned by the calling code. - virtual CMetricModelFactory *clone() const; - - //! \name Factory Methods - //@{ - //! Make a new metric model. - //! - //! \param[in] initData The parameters needed to initialize the model. - //! \warning It is owned by the calling code. - virtual CAnomalyDetectorModel *makeModel(const SModelInitializationData &initData) const; - - //! Make a new metric model from part of a state document. - //! - //! \param[in] initData Additional parameters needed to initialize - //! the model. - //! \param[in,out] traverser A state document traverser. - //! \warning It is owned by the calling code. - virtual CAnomalyDetectorModel *makeModel(const SModelInitializationData &initData, - core::CStateRestoreTraverser &traverser) const; - - //! Make a new metric data gatherer. - //! - //! \param[in] initData The parameters needed to initialize the - //! data gatherer. - //! \warning It is owned by the calling code. - virtual CDataGatherer *makeDataGatherer(const SGathererInitializationData &initData) const; - - //! Make a new metric data gatherer from part of a state document. - //! - //! \param[in] partitionFieldValue The partition field value. - //! \param[in,out] traverser A state document traverser. - //! \warning It is owned by the calling code. - virtual CDataGatherer *makeDataGatherer(const std::string &partitionFieldValue, - core::CStateRestoreTraverser &traverser) const; - //@} - - //! \name Defaults - //@{ - //! Get the default prior for \p feature. - //! - //! \param[in] feature The feature for which to get the prior. - //! \param[in] params The model parameters. - virtual TPriorPtr defaultPrior(model_t::EFeature feature, - const SModelParams ¶ms) const; - - //! Get the default multivariate prior for \p feature. - //! - //! \param[in] feature The feature for which to get the prior. - //! \param[in] params The model parameters. - virtual TMultivariatePriorPtr defaultMultivariatePrior(model_t::EFeature feature, - const SModelParams ¶ms) const; - - //! Get the default prior for pairs of correlated time series - //! of \p feature. - //! - //! \param[in] feature The feature for which to get the prior. - //! \param[in] params The model parameters. - virtual TMultivariatePriorPtr defaultCorrelatePrior(model_t::EFeature feature, - const SModelParams ¶ms) const; - //@} - - //! Get the search key corresponding to this factory. - virtual const CSearchKey &searchKey() const; - - //! Returns false. - virtual bool isSimpleCount() const; - - //! Check the pre-summarisation mode for this factory. - virtual model_t::ESummaryMode summaryMode() const; - - //! Get the default data type for models from this factory. - virtual maths_t::EDataType dataType() const; - - //! \name Customization by a specific search - //@{ - //! Set the identifier of the search for which this generates models. - virtual void identifier(int identifier); - - //! Set the name of the field whose values will be counted. - virtual void fieldNames(const std::string &partitionFieldName, - const std::string &overFieldName, - const std::string &byFieldName, - const std::string &valueFieldName, - const TStrVec &influenceFieldNames); - - //! Set whether the models should process missing person fields. - virtual void useNull(bool useNull); - - //! Set the features which will be modeled. - virtual void features(const TFeatureVec &features); - - //! Set the modeled bucket length. - virtual void bucketLength(core_t::TTime bucketLength); - - //! Set the bucket results delay - virtual void bucketResultsDelay(std::size_t bucketResultsDelay); - //@} - - private: - //! Get the field values which partition the data for modeling. - virtual TStrCRefVec partitioningFields() const; - - private: - //! The identifier of the search for which this generates models. - int m_Identifier; - - //! Indicates whether the data being gathered are already summarized - //! by an external aggregation process. - model_t::ESummaryMode m_SummaryMode; - - //! If m_SummaryMode is E_Manual then this is the name of the field - //! holding the summary count. - std::string m_SummaryCountFieldName; - - //! The name of the field which splits the data. - std::string m_PartitionFieldName; - - //! The name of field whose values define the metric series' names - //! which will be analyzed. - std::string m_PersonFieldName; - - //! The name of field whose values define the metric series' values - //! which will be analyzed. - std::string m_ValueFieldName; - - //! The field names for which we are computing influence. These are - //! the fields which can be used to join results across different - //! searches. - TStrVec m_InfluenceFieldNames; - - //! If true the models will process missing person fields. - bool m_UseNull; - - //! The count features which will be modeled. - TFeatureVec m_Features; - - //! The bucket length to analyze. - core_t::TTime m_BucketLength; - - //! The bucket results delay. - std::size_t m_BucketResultsDelay; +class MODEL_EXPORT CMetricModelFactory : public CModelFactory { +public: + //! Lift all overloads into scope. + using CModelFactory::defaultMultivariatePrior; + using CModelFactory::defaultPrior; + +public: + //! \note The default arguments supplied to the constructor are + //! intended for unit testing and are not necessarily good defaults. + //! The CModelConfig class is responsible for providing sensible + //! default values for the factory for use within our products. + explicit CMetricModelFactory(const SModelParams& params, + model_t::ESummaryMode summaryMode = model_t::E_None, + const std::string& summaryCountFieldName = ""); + + //! Create a copy of the factory owned by the calling code. + virtual CMetricModelFactory* clone() const; + + //! \name Factory Methods + //@{ + //! Make a new metric model. + //! + //! \param[in] initData The parameters needed to initialize the model. + //! \warning It is owned by the calling code. + virtual CAnomalyDetectorModel* makeModel(const SModelInitializationData& initData) const; + + //! Make a new metric model from part of a state document. + //! + //! \param[in] initData Additional parameters needed to initialize + //! the model. + //! \param[in,out] traverser A state document traverser. + //! \warning It is owned by the calling code. + virtual CAnomalyDetectorModel* makeModel(const SModelInitializationData& initData, core::CStateRestoreTraverser& traverser) const; + + //! Make a new metric data gatherer. + //! + //! \param[in] initData The parameters needed to initialize the + //! data gatherer. + //! \warning It is owned by the calling code. + virtual CDataGatherer* makeDataGatherer(const SGathererInitializationData& initData) const; + + //! Make a new metric data gatherer from part of a state document. + //! + //! \param[in] partitionFieldValue The partition field value. + //! \param[in,out] traverser A state document traverser. + //! \warning It is owned by the calling code. + virtual CDataGatherer* makeDataGatherer(const std::string& partitionFieldValue, core::CStateRestoreTraverser& traverser) const; + //@} + + //! \name Defaults + //@{ + //! Get the default prior for \p feature. + //! + //! \param[in] feature The feature for which to get the prior. + //! \param[in] params The model parameters. + virtual TPriorPtr defaultPrior(model_t::EFeature feature, const SModelParams& params) const; + + //! Get the default multivariate prior for \p feature. + //! + //! \param[in] feature The feature for which to get the prior. + //! \param[in] params The model parameters. + virtual TMultivariatePriorPtr defaultMultivariatePrior(model_t::EFeature feature, const SModelParams& params) const; + + //! Get the default prior for pairs of correlated time series + //! of \p feature. + //! + //! \param[in] feature The feature for which to get the prior. + //! \param[in] params The model parameters. + virtual TMultivariatePriorPtr defaultCorrelatePrior(model_t::EFeature feature, const SModelParams& params) const; + //@} + + //! Get the search key corresponding to this factory. + virtual const CSearchKey& searchKey() const; + + //! Returns false. + virtual bool isSimpleCount() const; + + //! Check the pre-summarisation mode for this factory. + virtual model_t::ESummaryMode summaryMode() const; + + //! Get the default data type for models from this factory. + virtual maths_t::EDataType dataType() const; + + //! \name Customization by a specific search + //@{ + //! Set the identifier of the search for which this generates models. + virtual void identifier(int identifier); + + //! Set the name of the field whose values will be counted. + virtual void fieldNames(const std::string& partitionFieldName, + const std::string& overFieldName, + const std::string& byFieldName, + const std::string& valueFieldName, + const TStrVec& influenceFieldNames); + + //! Set whether the models should process missing person fields. + virtual void useNull(bool useNull); + + //! Set the features which will be modeled. + virtual void features(const TFeatureVec& features); + + //! Set the modeled bucket length. + virtual void bucketLength(core_t::TTime bucketLength); + + //! Set the bucket results delay + virtual void bucketResultsDelay(std::size_t bucketResultsDelay); + //@} + +private: + //! Get the field values which partition the data for modeling. + virtual TStrCRefVec partitioningFields() const; + +private: + //! The identifier of the search for which this generates models. + int m_Identifier; + + //! Indicates whether the data being gathered are already summarized + //! by an external aggregation process. + model_t::ESummaryMode m_SummaryMode; + + //! If m_SummaryMode is E_Manual then this is the name of the field + //! holding the summary count. + std::string m_SummaryCountFieldName; + + //! The name of the field which splits the data. + std::string m_PartitionFieldName; + + //! The name of field whose values define the metric series' names + //! which will be analyzed. + std::string m_PersonFieldName; + + //! The name of field whose values define the metric series' values + //! which will be analyzed. + std::string m_ValueFieldName; + + //! The field names for which we are computing influence. These are + //! the fields which can be used to join results across different + //! searches. + TStrVec m_InfluenceFieldNames; + + //! If true the models will process missing person fields. + bool m_UseNull; + + //! The count features which will be modeled. + TFeatureVec m_Features; + + //! The bucket length to analyze. + core_t::TTime m_BucketLength; - //! A cached search key. - mutable TOptionalSearchKey m_SearchKeyCache; -}; + //! The bucket results delay. + std::size_t m_BucketResultsDelay; + //! A cached search key. + mutable TOptionalSearchKey m_SearchKeyCache; +}; } } diff --git a/include/model/CMetricMultivariateStatistic.h b/include/model/CMetricMultivariateStatistic.h index 8db155ab3b..4e1a281652 100644 --- a/include/model/CMetricMultivariateStatistic.h +++ b/include/model/CMetricMultivariateStatistic.h @@ -20,10 +20,8 @@ #include #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { //! \brief Wraps up one of our basic statistic objects for vector valued //! quantities. @@ -42,166 +40,130 @@ namespace model //! -# Supported by core::CMemory::dynamicSize //! -# Have overload of operator<< template -class CMetricMultivariateStatistic -{ - public: - using TDouble1Vec = core::CSmallVector; - - public: - static const std::string VALUE_TAG; - - public: - CMetricMultivariateStatistic(std::size_t n) : m_Values(n) {} - - //! Persist to a state document. - void persist(core::CStatePersistInserter &inserter) const - { - for (std::size_t i = 0u; i < m_Values.size(); ++i) - { - CMetricStatisticWrappers::persist(m_Values[i], VALUE_TAG, inserter); - } - } +class CMetricMultivariateStatistic { +public: + using TDouble1Vec = core::CSmallVector; + +public: + static const std::string VALUE_TAG; - //! Restore from the supplied state document traverser. - bool restore(core::CStateRestoreTraverser &traverser) - { - std::size_t i = 0u; - do - { - const std::string &name = traverser.name(); - if (name == VALUE_TAG) - { - if (CMetricStatisticWrappers::restore(traverser, m_Values[i++]) == false) - { - LOG_ERROR("Invalid statistic in " << traverser.value()); - return false; - } +public: + CMetricMultivariateStatistic(std::size_t n) : m_Values(n) {} + + //! Persist to a state document. + void persist(core::CStatePersistInserter& inserter) const { + for (std::size_t i = 0u; i < m_Values.size(); ++i) { + CMetricStatisticWrappers::persist(m_Values[i], VALUE_TAG, inserter); + } + } + + //! Restore from the supplied state document traverser. + bool restore(core::CStateRestoreTraverser& traverser) { + std::size_t i = 0u; + do { + const std::string& name = traverser.name(); + if (name == VALUE_TAG) { + if (CMetricStatisticWrappers::restore(traverser, m_Values[i++]) == false) { + LOG_ERROR("Invalid statistic in " << traverser.value()); + return false; } } - while (traverser.next()); - return true; + } while (traverser.next()); + return true; + } + + //! Add a new measurement. + //! + //! \param[in] value The value of the statistic. + //! \param[in] count The number of measurements in the statistic. + void add(const TDouble1Vec& value, unsigned int count) { + if (value.size() != m_Values.size()) { + LOG_ERROR("Inconsistent input data:" + << " # values = " << value.size() << ", expected " << m_Values.size()); + return; } - - //! Add a new measurement. - //! - //! \param[in] value The value of the statistic. - //! \param[in] count The number of measurements in the statistic. - void add(const TDouble1Vec &value, unsigned int count) - { - if (value.size() != m_Values.size()) - { - LOG_ERROR("Inconsistent input data:" - << " # values = " << value.size() - << ", expected " << m_Values.size()); - return; + for (std::size_t i = 0u; i < value.size(); ++i) { + m_Values[i].add(value[i], count); + } + } + + //! Returns the aggregated value of all the measurements. + TDouble1Vec value() const { + std::size_t dimension = m_Values.size(); + TDouble1Vec result(dimension); + for (std::size_t i = 0u; i < dimension; ++i) { + TDouble1Vec vi = CMetricStatisticWrappers::value(m_Values[i]); + if (vi.size() > 1) { + result.resize(vi.size() * dimension); } - for (std::size_t i = 0u; i < value.size(); ++i) - { - m_Values[i].add(value[i], count); + for (std::size_t j = 0u; j < vi.size(); ++j) { + result[i + j * dimension] = vi[j]; } } - - //! Returns the aggregated value of all the measurements. - TDouble1Vec value() const - { - std::size_t dimension = m_Values.size(); - TDouble1Vec result(dimension); - for (std::size_t i = 0u; i < dimension; ++i) - { - TDouble1Vec vi = CMetricStatisticWrappers::value(m_Values[i]); - if (vi.size() > 1) - { - result.resize(vi.size() * dimension); - } - for (std::size_t j = 0u; j < vi.size(); ++j) - { - result[i + j * dimension] = vi[j]; - } + return result; + } + + //! Returns the aggregated value of all the measurements suitable + //! for computing influence. + TDouble1Vec influencerValue() const { + std::size_t dimension = m_Values.size(); + TDouble1Vec result(dimension); + for (std::size_t i = 0u; i < dimension; ++i) { + TDouble1Vec vi = CMetricStatisticWrappers::influencerValue(m_Values[i]); + if (vi.size() > 1) { + result.resize(vi.size() * dimension); } - return result; - } - - //! Returns the aggregated value of all the measurements suitable - //! for computing influence. - TDouble1Vec influencerValue() const - { - std::size_t dimension = m_Values.size(); - TDouble1Vec result(dimension); - for (std::size_t i = 0u; i < dimension; ++i) - { - TDouble1Vec vi = CMetricStatisticWrappers::influencerValue(m_Values[i]); - if (vi.size() > 1) - { - result.resize(vi.size() * dimension); - } - for (std::size_t j = 0u; j < vi.size(); ++j) - { - result[i + j * dimension] = vi[j]; - } + for (std::size_t j = 0u; j < vi.size(); ++j) { + result[i + j * dimension] = vi[j]; } - return result; } + return result; + } - //! Returns the count of all the measurements. - double count() const - { - return CMetricStatisticWrappers::count(m_Values[0]); - } + //! Returns the count of all the measurements. + double count() const { return CMetricStatisticWrappers::count(m_Values[0]); } - //! Combine two partial statistics. - const CMetricMultivariateStatistic &operator+=(const CMetricMultivariateStatistic &rhs) - { - for (std::size_t i = 0u; i < m_Values.size(); ++i) - { - m_Values[i] += rhs.m_Values[i]; - } - return *this; + //! Combine two partial statistics. + const CMetricMultivariateStatistic& operator+=(const CMetricMultivariateStatistic& rhs) { + for (std::size_t i = 0u; i < m_Values.size(); ++i) { + m_Values[i] += rhs.m_Values[i]; } + return *this; + } - //! Get the checksum of the partial statistic - uint64_t checksum(uint64_t seed) const - { - return maths::CChecksum::calculate(seed, m_Values); - } + //! Get the checksum of the partial statistic + uint64_t checksum(uint64_t seed) const { return maths::CChecksum::calculate(seed, m_Values); } - //! Debug the memory used by the statistic. - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const - { - mem->setName("CMetricPartialStatistic", sizeof(*this)); - core::CMemoryDebug::dynamicSize("m_Value", m_Values, mem); - } + //! Debug the memory used by the statistic. + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { + mem->setName("CMetricPartialStatistic", sizeof(*this)); + core::CMemoryDebug::dynamicSize("m_Value", m_Values, mem); + } - //! Get the memory used by the statistic. - std::size_t memoryUsage() const - { - return sizeof(*this) + core::CMemory::dynamicSize(m_Values); - } + //! Get the memory used by the statistic. + std::size_t memoryUsage() const { return sizeof(*this) + core::CMemory::dynamicSize(m_Values); } - //! Print partial statistic - std::string print() const - { - std::ostringstream result; - result << core::CContainerPrinter::print(m_Values); - return result.str(); - } + //! Print partial statistic + std::string print() const { + std::ostringstream result; + result << core::CContainerPrinter::print(m_Values); + return result.str(); + } - private: - using TStatistic2Vec = core::CSmallVector; +private: + using TStatistic2Vec = core::CSmallVector; - private: - TStatistic2Vec m_Values; +private: + TStatistic2Vec m_Values; }; template const std::string CMetricMultivariateStatistic::VALUE_TAG("a"); template -std::ostream &operator<<(std::ostream &o, - const CMetricMultivariateStatistic &statistic) -{ +std::ostream& operator<<(std::ostream& o, const CMetricMultivariateStatistic& statistic) { return o << statistic.print(); } - } } diff --git a/include/model/CMetricPartialStatistic.h b/include/model/CMetricPartialStatistic.h index 2975d63c6b..54c6bcf15b 100644 --- a/include/model/CMetricPartialStatistic.h +++ b/include/model/CMetricPartialStatistic.h @@ -9,10 +9,10 @@ #include #include -#include #include #include #include +#include #include #include @@ -24,10 +24,8 @@ #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { //! \brief A partial metric statistic. //! @@ -47,136 +45,103 @@ namespace model //! -# Supported by core::CMemory::dynamicSize //! -# Have overload of operator<< template -class CMetricPartialStatistic -{ - public: - using TDouble1Vec = core::CSmallVector; - using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; - - public: - static const std::string VALUE_TAG; - static const std::string TIME_TAG; - - public: - CMetricPartialStatistic(std::size_t dimension) : - m_Value(CMetricStatisticWrappers::template make(dimension)) - {} - - //! Persist to a state document. - void persist(core::CStatePersistInserter &inserter) const - { - CMetricStatisticWrappers::persist(m_Value, VALUE_TAG, inserter); - inserter.insertValue(TIME_TAG, m_Time.toDelimited()); - } - - //! Restore from the supplied state document traverser. - bool restore(core::CStateRestoreTraverser &traverser) - { - do - { - const std::string &name = traverser.name(); - if (name == VALUE_TAG) - { - if (CMetricStatisticWrappers::restore(traverser, m_Value) == false) - { - LOG_ERROR("Invalid statistic in " << traverser.value()); - return false; - } +class CMetricPartialStatistic { +public: + using TDouble1Vec = core::CSmallVector; + using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; + +public: + static const std::string VALUE_TAG; + static const std::string TIME_TAG; + +public: + CMetricPartialStatistic(std::size_t dimension) : m_Value(CMetricStatisticWrappers::template make(dimension)) {} + + //! Persist to a state document. + void persist(core::CStatePersistInserter& inserter) const { + CMetricStatisticWrappers::persist(m_Value, VALUE_TAG, inserter); + inserter.insertValue(TIME_TAG, m_Time.toDelimited()); + } + + //! Restore from the supplied state document traverser. + bool restore(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); + if (name == VALUE_TAG) { + if (CMetricStatisticWrappers::restore(traverser, m_Value) == false) { + LOG_ERROR("Invalid statistic in " << traverser.value()); + return false; } - else if (name == TIME_TAG) - { - if (m_Time.fromDelimited(traverser.value()) == false) - { - LOG_ERROR("Invalid time in " << traverser.value()); - return false; - } + } else if (name == TIME_TAG) { + if (m_Time.fromDelimited(traverser.value()) == false) { + LOG_ERROR("Invalid time in " << traverser.value()); + return false; } } - while (traverser.next()); - return true; - } - - //! Add a new measurement. - //! - //! \param[in] value The value of the statistic. - //! \param[in] time The time of the statistic. - //! \param[in] count The number of measurements in the statistic. - inline void add(const TDouble1Vec &value, - core_t::TTime time, - unsigned int count) - { - CMetricStatisticWrappers::add(value, count, m_Value); - m_Time.add(static_cast(time), count); - } - - //! Returns the aggregated value of all the measurements. - inline TDouble1Vec value() const - { - return CMetricStatisticWrappers::value(m_Value); - } - - //! Returns the combined count of all the measurements. - inline double count() const - { - return maths::CBasicStatistics::count(m_Time); - } - - //! Returns the mean time of all the measurements. - inline core_t::TTime time() const - { - return static_cast( - maths::CBasicStatistics::mean(m_Time) + 0.5); - } - - //! Combine two partial statistics. - inline const CMetricPartialStatistic &operator+=(const CMetricPartialStatistic &rhs) - { - m_Value += rhs.m_Value; - m_Time += rhs.m_Time; - return *this; - } - - //! Get the checksum of the partial statistic - inline uint64_t checksum(uint64_t seed) const - { - seed = maths::CChecksum::calculate(seed, m_Value); - return maths::CChecksum::calculate(seed, m_Time); - } - - //! Debug the memory used by the statistic. - inline void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const - { - mem->setName("CMetricPartialStatistic", sizeof(*this)); - core::CMemoryDebug::dynamicSize("m_Value", m_Value, mem); - core::CMemoryDebug::dynamicSize("m_Time", m_Time, mem); - } - - //! Get the memory used by the statistic. - inline std::size_t memoryUsage() const - { - return sizeof(*this) - + core::CMemory::dynamicSize(m_Value) - + core::CMemory::dynamicSize(m_Time); - } - - //! Print partial statistic - inline std::string print() const - { - std::ostringstream result; - result << m_Value << ' ' << maths::CBasicStatistics::mean(m_Time); - return result.str(); - } - - private: - STATISTIC m_Value; - TMeanAccumulator m_Time; + } while (traverser.next()); + return true; + } + + //! Add a new measurement. + //! + //! \param[in] value The value of the statistic. + //! \param[in] time The time of the statistic. + //! \param[in] count The number of measurements in the statistic. + inline void add(const TDouble1Vec& value, core_t::TTime time, unsigned int count) { + CMetricStatisticWrappers::add(value, count, m_Value); + m_Time.add(static_cast(time), count); + } + + //! Returns the aggregated value of all the measurements. + inline TDouble1Vec value() const { return CMetricStatisticWrappers::value(m_Value); } + + //! Returns the combined count of all the measurements. + inline double count() const { return maths::CBasicStatistics::count(m_Time); } + + //! Returns the mean time of all the measurements. + inline core_t::TTime time() const { return static_cast(maths::CBasicStatistics::mean(m_Time) + 0.5); } + + //! Combine two partial statistics. + inline const CMetricPartialStatistic& operator+=(const CMetricPartialStatistic& rhs) { + m_Value += rhs.m_Value; + m_Time += rhs.m_Time; + return *this; + } + + //! Get the checksum of the partial statistic + inline uint64_t checksum(uint64_t seed) const { + seed = maths::CChecksum::calculate(seed, m_Value); + return maths::CChecksum::calculate(seed, m_Time); + } + + //! Debug the memory used by the statistic. + inline void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { + mem->setName("CMetricPartialStatistic", sizeof(*this)); + core::CMemoryDebug::dynamicSize("m_Value", m_Value, mem); + core::CMemoryDebug::dynamicSize("m_Time", m_Time, mem); + } + + //! Get the memory used by the statistic. + inline std::size_t memoryUsage() const { + return sizeof(*this) + core::CMemory::dynamicSize(m_Value) + core::CMemory::dynamicSize(m_Time); + } + + //! Print partial statistic + inline std::string print() const { + std::ostringstream result; + result << m_Value << ' ' << maths::CBasicStatistics::mean(m_Time); + return result.str(); + } + +private: + STATISTIC m_Value; + TMeanAccumulator m_Time; }; template const std::string CMetricPartialStatistic::VALUE_TAG("a"); template const std::string CMetricPartialStatistic::TIME_TAG("b"); - } } diff --git a/include/model/CMetricPopulationModel.h b/include/model/CMetricPopulationModel.h index 78437bd4be..4482cd38fa 100644 --- a/include/model/CMetricPopulationModel.h +++ b/include/model/CMetricPopulationModel.h @@ -20,15 +20,12 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace model -{ +namespace model { //! \brief The model for computing the anomalousness of the values //! each person in a population generates in a data stream. //! @@ -54,330 +51,314 @@ namespace model //! //! It assumes data are supplied in time order since this means minimal //! state can be maintained. -class MODEL_EXPORT CMetricPopulationModel : public CPopulationModel -{ - public: - using TFeatureMathsModelPtrPr = std::pair; - using TFeatureMathsModelPtrPrVec = std::vector; - using TFeatureMathsModelPtrVecPr = std::pair; - using TFeatureMathsModelPtrVecPrVec = std::vector; - using TFeatureCorrelationsPtrPr = std::pair; - using TFeatureCorrelationsPtrPrVec = std::vector; - using TFeatureData = SMetricFeatureData; - using TSizeSizePrFeatureDataPr = std::pair; - using TSizeSizePrFeatureDataPrVec = std::vector; - using TFeatureSizeSizePrFeatureDataPrVecMap = std::map; - using TProbabilityCache = CModelTools::CProbabilityCache; - - //! The statistics we maintain about a bucketing interval. - struct MODEL_EXPORT SBucketStats - { - explicit SBucketStats(core_t::TTime startTime); - - //! The start time of this bucket. - core_t::TTime s_StartTime; - //! The non-zero counts of messages by people in the bucketing - //! interval. - TSizeUInt64PrVec s_PersonCounts; - //! The total count in the current bucket. - uint64_t s_TotalCount; - //! The metric features we are modeling. - TFeatureSizeSizePrFeatureDataPrVecMap s_FeatureData; - //! A cache of the corrections applied to interim results. - mutable TCorrectionKeyDouble1VecUMap s_InterimCorrections; - }; - - //! Lift the overloads of currentBucketValue into the class scope. - using CPopulationModel::currentBucketValue; - - //! Lift the overloads of baselineBucketMean into the class scope. - using CAnomalyDetectorModel::baselineBucketMean; - - //! Lift the overloads of acceptPersistInserter into the class scope. - using CPopulationModel::acceptPersistInserter; - - public: - //! \name Life-cycle - //@{ - //! \param[in] params The global configuration parameters. - //! \param[in] dataGatherer The object that gathers time series data. - //! \param[in] newFeatureModels The new models to use for each feature. - //! \param[in] newFeatureCorrelateModelPriors The prior to use for the - //! new model of correlates for each feature. - //! \param[in] featureCorrelatesModels The model of all correlates for - //! each feature. - //! \param[in] influenceCalculators The influence calculators to use - //! for each feature. - //! \note The current bucket statistics are left default initialized - //! and so must be sampled for before this model can be used. - CMetricPopulationModel(const SModelParams ¶ms, - const TDataGathererPtr &dataGatherer, - const TFeatureMathsModelPtrPrVec &newFeatureModels, - const TFeatureMultivariatePriorPtrPrVec &newFeatureCorrelateModelPriors, - const TFeatureCorrelationsPtrPrVec &featureCorrelatesModels, - const TFeatureInfluenceCalculatorCPtrPrVecVec &influenceCalculators); - - //! Constructor used for restoring persisted models. - //! - //! \note The current bucket statistics are left default initialized - //! and so must be sampled for before this model can be used. - CMetricPopulationModel(const SModelParams ¶ms, - const TDataGathererPtr &dataGatherer, - const TFeatureMathsModelPtrPrVec &newFeatureModels, - const TFeatureMultivariatePriorPtrPrVec &newFeatureCorrelateModelPriors, - const TFeatureCorrelationsPtrPrVec &featureCorrelatesModels, - const TFeatureInfluenceCalculatorCPtrPrVecVec &influenceCalculators, - core::CStateRestoreTraverser &traverser); - - //! Create a copy that will result in the same persisted state as the - //! original. This is effectively a copy constructor that creates a - //! copy that's only valid for a single purpose. The boolean flag is - //! redundant except to create a signature that will not be mistaken - //! for a general purpose copy constructor. - CMetricPopulationModel(bool isForPersistence, const CMetricPopulationModel &other); - //@} - - //! Returns false. - virtual bool isEventRate() const; - - //! Returns true. - virtual bool isMetric() const; - - //! \name Persistence - //@{ - //! Persist state by passing information to the supplied inserter - virtual void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - - //! Add to the contents of the object. - virtual bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); - - //! Create a clone of this model that will result in the same persisted - //! state. The clone may be incomplete in ways that do not affect the - //! persisted representation, and must not be used for any other - //! purpose. - //! \warning The caller owns the object returned. - virtual CAnomalyDetectorModel *cloneForPersistence() const; - //@} - - //! Get the model category. - virtual model_t::EModelType category() const; - - //! \name Bucket Statistics - //@{ - //! Get the value of \p feature for the person identified - //! by \p pid and the attribute identified by \p cid in the - //! bucketing interval containing \p time. - //! - //! \param[in] feature The feature of interest - //! \param[in] pid The identifier of the person of interest. - //! \param[in] cid The identifier of the attribute of interest. - //! \param[in] time The time of interest. - virtual TDouble1Vec currentBucketValue(model_t::EFeature feature, - std::size_t pid, - std::size_t cid, - core_t::TTime time) const; - - //! Get the population baseline mean of \p feature for the - //! attribute identified by \p cid as of the start of the - //! current bucketing interval. - //! - //! \param[in] feature The feature of interest - //! \param[in] pid The identifier of the person of interest. - //! \param[in] cid The identifier of the attribute of interest. - //! \param[in] type A description of the type of result for which - //! to get the baseline. See CResultType for more details. - //! \param[in] correlated The correlated series' identifiers and - //! their values if any. - //! \param[in] time The time of interest. - virtual TDouble1Vec baselineBucketMean(model_t::EFeature feature, - std::size_t pid, - std::size_t cid, - model_t::CResultType type, - const TSizeDoublePr1Vec &correlated, - core_t::TTime time) const; - - //! Check if bucket statistics are available for the specified time. - virtual bool bucketStatsAvailable(core_t::TTime time) const; - //@} - - //! \name Update - //@{ - //! This samples the bucket statistics, and any state needed - //! by computeProbablity, in the time interval [\p startTime, - //! \p endTime], but does not update the model. This is needed - //! by the results preview. - //! - //! \param[in] startTime The start of the time interval to sample. - //! \param[in] endTime The end of the time interval to sample. - virtual void sampleBucketStatistics(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor &resourceMonitor); - - //! Update the model with the samples of the various processes - //! in the time interval [\p startTime, \p endTime]. - //! - //! \param[in] startTime The start of the time interval to sample. - //! \param[in] endTime The end of the time interval to sample. - //! \param[in] resourceMonitor The resourceMonitor. - virtual void sample(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor &resourceMonitor); - - //! Prune any data for people and attributes which haven't been - //! seen for a sufficiently long period. This is based on the - //! prior decay rates and the number of batches into which we - //! are partitioning time. - virtual void prune(std::size_t maximumAge); - //@} - - //! \name Probability - //@{ - //! Compute the probability of seeing \p person's attribute values - //! for the buckets in the interval [\p startTime, \p endTime]. - //! - //! \param[in] pid The identifier of the person of interest. - //! \param[in] startTime The start of the interval of interest. - //! \param[in] endTime The end of the interval of interest. - //! \param[in] partitioningFields The partitioning field (name, value) - //! pairs for which to compute the the probability. - //! \param[in] numberAttributeProbabilities The maximum number of - //! attribute probabilities to retrieve. - //! \param[out] result A structure containing the probability, - //! the smallest \p numberAttributeProbabilities attribute - //! probabilities, the influences and any extra descriptive data - virtual bool computeProbability(std::size_t pid, - core_t::TTime startTime, - core_t::TTime endTime, - CPartitioningFields &partitioningFields, - std::size_t numberAttributeProbabilities, - SAnnotatedProbability &result) const; - - //! Clears \p probability and \p attributeProbabilities. - virtual bool computeTotalProbability(const std::string &person, - std::size_t numberAttributeProbabilities, - TOptionalDouble &probability, - TAttributeProbability1Vec &attributeProbabilities) const; - //@} - - //! Get the checksum of this model. - //! - //! \param[in] includeCurrentBucketStats If true then include - //! the current bucket statistics. (This is designed to handle - //! serialization, for which we don't serialize the current - //! bucket statistics.) - virtual uint64_t checksum(bool includeCurrentBucketStats = true) const; - - //! Get a view of the internals of the model for visualization. - virtual CModelDetailsViewPtr details() const; - - //! Get the feature data corresponding to \p feature at \p time. - const TSizeSizePrFeatureDataPrVec &featureData(model_t::EFeature feature, - core_t::TTime time) const; - - //! Debug the memory used by this model. - virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - - //! Get the memory used by this model. - virtual std::size_t memoryUsage() const; - - //! Get the static size of this object - used for virtual hierarchies - virtual std::size_t staticSize() const; - - //! Get the non-estimated memory used by this model. - virtual std::size_t computeMemoryUsage() const; - - private: - //! Initialize the feature models. - void initialize(const TFeatureMathsModelPtrPrVec &newFeatureModels, - const TFeatureMultivariatePriorPtrPrVec &newFeatureCorrelateModelPriors, - const TFeatureCorrelationsPtrPrVec &featureCorrelatesModels); - - //! Get the start time of the current bucket. - virtual core_t::TTime currentBucketStartTime() const; - - //! Set the start time of the current bucket. - virtual void currentBucketStartTime(core_t::TTime time); - - //! Get the total count of the current bucket. - uint64_t currentBucketTotalCount() const; - - //! Set the current bucket total count. - virtual void currentBucketTotalCount(uint64_t totalCount); - - //! Get the current bucket person counts. - virtual const TSizeUInt64PrVec &personCounts() const; - - //! Get the interim corrections of the current bucket. - TCorrectionKeyDouble1VecUMap ¤tBucketInterimCorrections() const; - - //! Initialize the time series models for "n" newly observed people - //! and "m" attributes. - virtual void createNewModels(std::size_t n, std::size_t m); - - //! Initialize the time series models for recycled attributes and/or people - virtual void updateRecycledModels(); - - //! Update the correlation models. - virtual void refreshCorrelationModels(std::size_t resourceLimit, - CResourceMonitor &resourceMonitor); - - //! Clear out large state objects for people/attributes that are pruned - virtual void clearPrunedResources(const TSizeVec &people, - const TSizeVec &attributes); - - //! Skip sampling the interval \p endTime - \p startTime. - virtual void doSkipSampling(core_t::TTime startTime, core_t::TTime endTime); - - //! Get a read only model for \p feature and the attribute identified - //! by \p cid. - const maths::CModel *model(model_t::EFeature feature, std::size_t cid) const; - - //! Get a writable model for \p feature and the attribute identified - //! by \p cid. - maths::CModel *model(model_t::EFeature feature, std::size_t pid); - - //! Check if there are correlates for \p feature and the person and - //! attribute identified by \p pid and \p cid, respectively. - bool correlates(model_t::EFeature feature, - std::size_t pid, - std::size_t cid, - core_t::TTime time) const; - - //! Fill in the probability calculation parameters for \p feature and - //! person and attribute identified by \p pid and \p cid, respectively. - void fill(model_t::EFeature feature, - std::size_t pid, - std::size_t cid, - core_t::TTime bucketTime, - bool interim, - CProbabilityAndInfluenceCalculator::SParams ¶ms) const; +class MODEL_EXPORT CMetricPopulationModel : public CPopulationModel { +public: + using TFeatureMathsModelPtrPr = std::pair; + using TFeatureMathsModelPtrPrVec = std::vector; + using TFeatureMathsModelPtrVecPr = std::pair; + using TFeatureMathsModelPtrVecPrVec = std::vector; + using TFeatureCorrelationsPtrPr = std::pair; + using TFeatureCorrelationsPtrPrVec = std::vector; + using TFeatureData = SMetricFeatureData; + using TSizeSizePrFeatureDataPr = std::pair; + using TSizeSizePrFeatureDataPrVec = std::vector; + using TFeatureSizeSizePrFeatureDataPrVecMap = std::map; + using TProbabilityCache = CModelTools::CProbabilityCache; + + //! The statistics we maintain about a bucketing interval. + struct MODEL_EXPORT SBucketStats { + explicit SBucketStats(core_t::TTime startTime); + + //! The start time of this bucket. + core_t::TTime s_StartTime; + //! The non-zero counts of messages by people in the bucketing + //! interval. + TSizeUInt64PrVec s_PersonCounts; + //! The total count in the current bucket. + uint64_t s_TotalCount; + //! The metric features we are modeling. + TFeatureSizeSizePrFeatureDataPrVecMap s_FeatureData; + //! A cache of the corrections applied to interim results. + mutable TCorrectionKeyDouble1VecUMap s_InterimCorrections; + }; + + //! Lift the overloads of currentBucketValue into the class scope. + using CPopulationModel::currentBucketValue; + + //! Lift the overloads of baselineBucketMean into the class scope. + using CAnomalyDetectorModel::baselineBucketMean; + + //! Lift the overloads of acceptPersistInserter into the class scope. + using CPopulationModel::acceptPersistInserter; + +public: + //! \name Life-cycle + //@{ + //! \param[in] params The global configuration parameters. + //! \param[in] dataGatherer The object that gathers time series data. + //! \param[in] newFeatureModels The new models to use for each feature. + //! \param[in] newFeatureCorrelateModelPriors The prior to use for the + //! new model of correlates for each feature. + //! \param[in] featureCorrelatesModels The model of all correlates for + //! each feature. + //! \param[in] influenceCalculators The influence calculators to use + //! for each feature. + //! \note The current bucket statistics are left default initialized + //! and so must be sampled for before this model can be used. + CMetricPopulationModel(const SModelParams& params, + const TDataGathererPtr& dataGatherer, + const TFeatureMathsModelPtrPrVec& newFeatureModels, + const TFeatureMultivariatePriorPtrPrVec& newFeatureCorrelateModelPriors, + const TFeatureCorrelationsPtrPrVec& featureCorrelatesModels, + const TFeatureInfluenceCalculatorCPtrPrVecVec& influenceCalculators); + + //! Constructor used for restoring persisted models. + //! + //! \note The current bucket statistics are left default initialized + //! and so must be sampled for before this model can be used. + CMetricPopulationModel(const SModelParams& params, + const TDataGathererPtr& dataGatherer, + const TFeatureMathsModelPtrPrVec& newFeatureModels, + const TFeatureMultivariatePriorPtrPrVec& newFeatureCorrelateModelPriors, + const TFeatureCorrelationsPtrPrVec& featureCorrelatesModels, + const TFeatureInfluenceCalculatorCPtrPrVecVec& influenceCalculators, + core::CStateRestoreTraverser& traverser); + + //! Create a copy that will result in the same persisted state as the + //! original. This is effectively a copy constructor that creates a + //! copy that's only valid for a single purpose. The boolean flag is + //! redundant except to create a signature that will not be mistaken + //! for a general purpose copy constructor. + CMetricPopulationModel(bool isForPersistence, const CMetricPopulationModel& other); + //@} + + //! Returns false. + virtual bool isEventRate() const; + + //! Returns true. + virtual bool isMetric() const; + + //! \name Persistence + //@{ + //! Persist state by passing information to the supplied inserter + virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Add to the contents of the object. + virtual bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); + + //! Create a clone of this model that will result in the same persisted + //! state. The clone may be incomplete in ways that do not affect the + //! persisted representation, and must not be used for any other + //! purpose. + //! \warning The caller owns the object returned. + virtual CAnomalyDetectorModel* cloneForPersistence() const; + //@} + + //! Get the model category. + virtual model_t::EModelType category() const; + + //! \name Bucket Statistics + //@{ + //! Get the value of \p feature for the person identified + //! by \p pid and the attribute identified by \p cid in the + //! bucketing interval containing \p time. + //! + //! \param[in] feature The feature of interest + //! \param[in] pid The identifier of the person of interest. + //! \param[in] cid The identifier of the attribute of interest. + //! \param[in] time The time of interest. + virtual TDouble1Vec currentBucketValue(model_t::EFeature feature, std::size_t pid, std::size_t cid, core_t::TTime time) const; + + //! Get the population baseline mean of \p feature for the + //! attribute identified by \p cid as of the start of the + //! current bucketing interval. + //! + //! \param[in] feature The feature of interest + //! \param[in] pid The identifier of the person of interest. + //! \param[in] cid The identifier of the attribute of interest. + //! \param[in] type A description of the type of result for which + //! to get the baseline. See CResultType for more details. + //! \param[in] correlated The correlated series' identifiers and + //! their values if any. + //! \param[in] time The time of interest. + virtual TDouble1Vec baselineBucketMean(model_t::EFeature feature, + std::size_t pid, + std::size_t cid, + model_t::CResultType type, + const TSizeDoublePr1Vec& correlated, + core_t::TTime time) const; + + //! Check if bucket statistics are available for the specified time. + virtual bool bucketStatsAvailable(core_t::TTime time) const; + //@} + + //! \name Update + //@{ + //! This samples the bucket statistics, and any state needed + //! by computeProbablity, in the time interval [\p startTime, + //! \p endTime], but does not update the model. This is needed + //! by the results preview. + //! + //! \param[in] startTime The start of the time interval to sample. + //! \param[in] endTime The end of the time interval to sample. + virtual void sampleBucketStatistics(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor); + + //! Update the model with the samples of the various processes + //! in the time interval [\p startTime, \p endTime]. + //! + //! \param[in] startTime The start of the time interval to sample. + //! \param[in] endTime The end of the time interval to sample. + //! \param[in] resourceMonitor The resourceMonitor. + virtual void sample(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor); + + //! Prune any data for people and attributes which haven't been + //! seen for a sufficiently long period. This is based on the + //! prior decay rates and the number of batches into which we + //! are partitioning time. + virtual void prune(std::size_t maximumAge); + //@} + + //! \name Probability + //@{ + //! Compute the probability of seeing \p person's attribute values + //! for the buckets in the interval [\p startTime, \p endTime]. + //! + //! \param[in] pid The identifier of the person of interest. + //! \param[in] startTime The start of the interval of interest. + //! \param[in] endTime The end of the interval of interest. + //! \param[in] partitioningFields The partitioning field (name, value) + //! pairs for which to compute the the probability. + //! \param[in] numberAttributeProbabilities The maximum number of + //! attribute probabilities to retrieve. + //! \param[out] result A structure containing the probability, + //! the smallest \p numberAttributeProbabilities attribute + //! probabilities, the influences and any extra descriptive data + virtual bool computeProbability(std::size_t pid, + core_t::TTime startTime, + core_t::TTime endTime, + CPartitioningFields& partitioningFields, + std::size_t numberAttributeProbabilities, + SAnnotatedProbability& result) const; + + //! Clears \p probability and \p attributeProbabilities. + virtual bool computeTotalProbability(const std::string& person, + std::size_t numberAttributeProbabilities, + TOptionalDouble& probability, + TAttributeProbability1Vec& attributeProbabilities) const; + //@} + + //! Get the checksum of this model. + //! + //! \param[in] includeCurrentBucketStats If true then include + //! the current bucket statistics. (This is designed to handle + //! serialization, for which we don't serialize the current + //! bucket statistics.) + virtual uint64_t checksum(bool includeCurrentBucketStats = true) const; + + //! Get a view of the internals of the model for visualization. + virtual CModelDetailsViewPtr details() const; + + //! Get the feature data corresponding to \p feature at \p time. + const TSizeSizePrFeatureDataPrVec& featureData(model_t::EFeature feature, core_t::TTime time) const; + + //! Debug the memory used by this model. + virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + + //! Get the memory used by this model. + virtual std::size_t memoryUsage() const; + + //! Get the static size of this object - used for virtual hierarchies + virtual std::size_t staticSize() const; + + //! Get the non-estimated memory used by this model. + virtual std::size_t computeMemoryUsage() const; + +private: + //! Initialize the feature models. + void initialize(const TFeatureMathsModelPtrPrVec& newFeatureModels, + const TFeatureMultivariatePriorPtrPrVec& newFeatureCorrelateModelPriors, + const TFeatureCorrelationsPtrPrVec& featureCorrelatesModels); + + //! Get the start time of the current bucket. + virtual core_t::TTime currentBucketStartTime() const; + + //! Set the start time of the current bucket. + virtual void currentBucketStartTime(core_t::TTime time); + + //! Get the total count of the current bucket. + uint64_t currentBucketTotalCount() const; + + //! Set the current bucket total count. + virtual void currentBucketTotalCount(uint64_t totalCount); + + //! Get the current bucket person counts. + virtual const TSizeUInt64PrVec& personCounts() const; + + //! Get the interim corrections of the current bucket. + TCorrectionKeyDouble1VecUMap& currentBucketInterimCorrections() const; + + //! Initialize the time series models for "n" newly observed people + //! and "m" attributes. + virtual void createNewModels(std::size_t n, std::size_t m); + + //! Initialize the time series models for recycled attributes and/or people + virtual void updateRecycledModels(); + + //! Update the correlation models. + virtual void refreshCorrelationModels(std::size_t resourceLimit, CResourceMonitor& resourceMonitor); + + //! Clear out large state objects for people/attributes that are pruned + virtual void clearPrunedResources(const TSizeVec& people, const TSizeVec& attributes); + + //! Skip sampling the interval \p endTime - \p startTime. + virtual void doSkipSampling(core_t::TTime startTime, core_t::TTime endTime); + + //! Get a read only model for \p feature and the attribute identified + //! by \p cid. + const maths::CModel* model(model_t::EFeature feature, std::size_t cid) const; + + //! Get a writable model for \p feature and the attribute identified + //! by \p cid. + maths::CModel* model(model_t::EFeature feature, std::size_t pid); + + //! Check if there are correlates for \p feature and the person and + //! attribute identified by \p pid and \p cid, respectively. + bool correlates(model_t::EFeature feature, std::size_t pid, std::size_t cid, core_t::TTime time) const; + + //! Fill in the probability calculation parameters for \p feature and + //! person and attribute identified by \p pid and \p cid, respectively. + void fill(model_t::EFeature feature, + std::size_t pid, + std::size_t cid, + core_t::TTime bucketTime, + bool interim, + CProbabilityAndInfluenceCalculator::SParams& params) const; - //! Get the model memory usage estimator - virtual CMemoryUsageEstimator *memoryUsageEstimator() const; + //! Get the model memory usage estimator + virtual CMemoryUsageEstimator* memoryUsageEstimator() const; - private: - //! The statistics we maintain about the bucket. - SBucketStats m_CurrentBucketStats; +private: + //! The statistics we maintain about the bucket. + SBucketStats m_CurrentBucketStats; - //! The models of all the attribute correlates for each feature. - //! - //! IMPORTANT this must come before m_FeatureModels in the class declaration - //! so its destructor is called afterwards (12.6.2) because feature models - //! unregister themselves from correlation models. - TFeatureCorrelateModelsVec m_FeatureCorrelatesModels; + //! The models of all the attribute correlates for each feature. + //! + //! IMPORTANT this must come before m_FeatureModels in the class declaration + //! so its destructor is called afterwards (12.6.2) because feature models + //! unregister themselves from correlation models. + TFeatureCorrelateModelsVec m_FeatureCorrelatesModels; - //! The population attribute models for each feature. - TFeatureModelsVec m_FeatureModels; + //! The population attribute models for each feature. + TFeatureModelsVec m_FeatureModels; - //! A cache of the probability calculation results. - mutable TProbabilityCache m_Probabilities; + //! A cache of the probability calculation results. + mutable TProbabilityCache m_Probabilities; - //! The memory estimator. - mutable CMemoryUsageEstimator m_MemoryEstimator; + //! The memory estimator. + mutable CMemoryUsageEstimator m_MemoryEstimator; - friend class CMetricPopulationModelDetailsView; + friend class CMetricPopulationModelDetailsView; }; - } } diff --git a/include/model/CMetricPopulationModelFactory.h b/include/model/CMetricPopulationModelFactory.h index 2b5d96d819..11fb1d4b91 100644 --- a/include/model/CMetricPopulationModelFactory.h +++ b/include/model/CMetricPopulationModelFactory.h @@ -10,14 +10,11 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStateRestoreTraverser; } -namespace model -{ +namespace model { //! \brief A factory class implementation for CMetricPopulationModel. //! @@ -25,174 +22,167 @@ namespace model //! This concrete factory implements the methods to make new models //! and data gatherers, and create default priors suitable for the //! CMetricPopulationModel class. -class MODEL_EXPORT CMetricPopulationModelFactory : public CModelFactory -{ - public: - //! Lift all overloads into scope. - using CModelFactory::defaultMultivariatePrior; - using CModelFactory::defaultPrior; - - public: - //! \note The default arguments supplied to the constructor are - //! intended for unit testing and are not necessarily good defaults. - //! The CModelConfig class is responsible for providing sensible - //! default values for the factory for use within our products. - explicit CMetricPopulationModelFactory(const SModelParams ¶ms, - model_t::ESummaryMode summaryMode = model_t::E_None, - const std::string &summaryCountFieldName = ""); - - //! Create a copy of the factory owned by the calling code. - virtual CMetricPopulationModelFactory *clone() const; - - //! \name Factory Methods - //@{ - //! Make a new population model for metric values. - //! - //! \param[in] initData The parameters needed to initialize the model. - //! \warning It is owned by the calling code. - virtual CAnomalyDetectorModel *makeModel(const SModelInitializationData &initData) const; - - //! Make a new metric population model from part of a state document. - //! - //! \param[in] initData Additional parameters needed to initialize - //! the model. - //! \param[in,out] traverser A state document traverser. - //! \warning It is owned by the calling code. - virtual CAnomalyDetectorModel *makeModel(const SModelInitializationData &initData, - core::CStateRestoreTraverser &traverser) const; - - //! Make a new metric population data gatherer. - //! - //! \param[in] initData The parameters needed to initialize the data - //! gatherer. - //! \warning It is owned by the calling code. - virtual CDataGatherer *makeDataGatherer(const SGathererInitializationData &initData) const; - - //! Make a new metric population data gatherer from part of a state - //! document. - //! - //! \param[in] partitionFieldValue The partition field value. - //! \param[in,out] traverser A state document traverser. - //! \warning It is owned by the calling code. - virtual CDataGatherer *makeDataGatherer(const std::string &partitionFieldValue, - core::CStateRestoreTraverser &traverser) const; - //@} - - //! \name Defaults - //@{ - //! Get the default prior for \p feature. - //! - //! \param[in] feature The feature for which to get the prior. - //! \param[in] params The model parameters. - virtual TPriorPtr defaultPrior(model_t::EFeature feature, - const SModelParams ¶ms) const; - - //! Get the default multivariate prior for \p feature. - //! - //! \param[in] feature The feature for which to get the prior. - //! \param[in] params The model parameters. - virtual TMultivariatePriorPtr defaultMultivariatePrior(model_t::EFeature feature, - const SModelParams ¶ms) const; - - //! Get the default prior for pairs of correlated time series - //! of \p feature. - //! - //! \param[in] feature The feature for which to get the prior. - //! \param[in] params The model parameters. - virtual TMultivariatePriorPtr defaultCorrelatePrior(model_t::EFeature feature, - const SModelParams ¶ms) const; - //@} - - //! Get the search key corresponding to this factory. - virtual const CSearchKey &searchKey() const; - - //! Returns false. - virtual bool isSimpleCount() const; - - //! Check the pre-summarisation mode for this factory. - virtual model_t::ESummaryMode summaryMode() const; - - //! Get the default data type for models from this factory. - virtual maths_t::EDataType dataType() const; - - //! \name Customization - //@{ - //! Set the identifier of the search for which this generates models. - virtual void identifier(int identifier); - - //! Set the name of the field whose values will be counted. - virtual void fieldNames(const std::string &partitionFieldName, - const std::string &overFieldName, - const std::string &byFieldName, - const std::string &valueFieldName, - const TStrVec &influenceFieldNames); - - //! Set whether the models should process missing person and - //! attribute fields. - virtual void useNull(bool useNull); - - //! Set the features which will be modeled. - virtual void features(const TFeatureVec &features); - - //! Set the bucket results delay - virtual void bucketResultsDelay(std::size_t bucketResultsDelay); - //@} - - private: - //! Get the field values which partition the data for modeling. - virtual TStrCRefVec partitioningFields() const; - - //! Restores a single population metric model. - bool modelAcceptRestoreTraverser(const SModelInitializationData &initData, - core::CStateRestoreTraverser &traverser, - CAnomalyDetectorModel *&model) const; - - private: - //! The identifier of the search for which this generates models. - int m_Identifier; - - //! Indicates whether the data being gathered are already summarized - //! by an external aggregation process - model_t::ESummaryMode m_SummaryMode; - - //! If m_SummaryMode is E_Manual then this is the name of the field - //! holding the summary count. - std::string m_SummaryCountFieldName; - - //! The name of the field which splits the data. - std::string m_PartitionFieldName; - - //! The name of the field which defines the population which - //! will be analyzed. - std::string m_PersonFieldName; - - //! The name of the field which defines the person attributes - //! which will be analyzed. - std::string m_AttributeFieldName; - - //! The name of field whose values define the metric series' - //! values which will be analyzed. - std::string m_ValueFieldName; - - //! The field names for which we are computing influence. These are - //! the fields which can be used to join results across different - //! searches. - TStrVec m_InfluenceFieldNames; - - //! If true the models will process missing person and attribute - //! fields. - bool m_UseNull; - - //! The count features which will be modeled. - TFeatureVec m_Features; - - //! The bucket results delay. - std::size_t m_BucketResultsDelay; - - //! A cached search key. - mutable TOptionalSearchKey m_SearchKeyCache; -}; +class MODEL_EXPORT CMetricPopulationModelFactory : public CModelFactory { +public: + //! Lift all overloads into scope. + using CModelFactory::defaultMultivariatePrior; + using CModelFactory::defaultPrior; + +public: + //! \note The default arguments supplied to the constructor are + //! intended for unit testing and are not necessarily good defaults. + //! The CModelConfig class is responsible for providing sensible + //! default values for the factory for use within our products. + explicit CMetricPopulationModelFactory(const SModelParams& params, + model_t::ESummaryMode summaryMode = model_t::E_None, + const std::string& summaryCountFieldName = ""); + + //! Create a copy of the factory owned by the calling code. + virtual CMetricPopulationModelFactory* clone() const; + + //! \name Factory Methods + //@{ + //! Make a new population model for metric values. + //! + //! \param[in] initData The parameters needed to initialize the model. + //! \warning It is owned by the calling code. + virtual CAnomalyDetectorModel* makeModel(const SModelInitializationData& initData) const; + + //! Make a new metric population model from part of a state document. + //! + //! \param[in] initData Additional parameters needed to initialize + //! the model. + //! \param[in,out] traverser A state document traverser. + //! \warning It is owned by the calling code. + virtual CAnomalyDetectorModel* makeModel(const SModelInitializationData& initData, core::CStateRestoreTraverser& traverser) const; + + //! Make a new metric population data gatherer. + //! + //! \param[in] initData The parameters needed to initialize the data + //! gatherer. + //! \warning It is owned by the calling code. + virtual CDataGatherer* makeDataGatherer(const SGathererInitializationData& initData) const; + + //! Make a new metric population data gatherer from part of a state + //! document. + //! + //! \param[in] partitionFieldValue The partition field value. + //! \param[in,out] traverser A state document traverser. + //! \warning It is owned by the calling code. + virtual CDataGatherer* makeDataGatherer(const std::string& partitionFieldValue, core::CStateRestoreTraverser& traverser) const; + //@} + + //! \name Defaults + //@{ + //! Get the default prior for \p feature. + //! + //! \param[in] feature The feature for which to get the prior. + //! \param[in] params The model parameters. + virtual TPriorPtr defaultPrior(model_t::EFeature feature, const SModelParams& params) const; + + //! Get the default multivariate prior for \p feature. + //! + //! \param[in] feature The feature for which to get the prior. + //! \param[in] params The model parameters. + virtual TMultivariatePriorPtr defaultMultivariatePrior(model_t::EFeature feature, const SModelParams& params) const; + + //! Get the default prior for pairs of correlated time series + //! of \p feature. + //! + //! \param[in] feature The feature for which to get the prior. + //! \param[in] params The model parameters. + virtual TMultivariatePriorPtr defaultCorrelatePrior(model_t::EFeature feature, const SModelParams& params) const; + //@} + + //! Get the search key corresponding to this factory. + virtual const CSearchKey& searchKey() const; + + //! Returns false. + virtual bool isSimpleCount() const; + + //! Check the pre-summarisation mode for this factory. + virtual model_t::ESummaryMode summaryMode() const; + + //! Get the default data type for models from this factory. + virtual maths_t::EDataType dataType() const; + + //! \name Customization + //@{ + //! Set the identifier of the search for which this generates models. + virtual void identifier(int identifier); + + //! Set the name of the field whose values will be counted. + virtual void fieldNames(const std::string& partitionFieldName, + const std::string& overFieldName, + const std::string& byFieldName, + const std::string& valueFieldName, + const TStrVec& influenceFieldNames); + + //! Set whether the models should process missing person and + //! attribute fields. + virtual void useNull(bool useNull); + + //! Set the features which will be modeled. + virtual void features(const TFeatureVec& features); + + //! Set the bucket results delay + virtual void bucketResultsDelay(std::size_t bucketResultsDelay); + //@} + +private: + //! Get the field values which partition the data for modeling. + virtual TStrCRefVec partitioningFields() const; + + //! Restores a single population metric model. + bool modelAcceptRestoreTraverser(const SModelInitializationData& initData, + core::CStateRestoreTraverser& traverser, + CAnomalyDetectorModel*& model) const; + +private: + //! The identifier of the search for which this generates models. + int m_Identifier; + + //! Indicates whether the data being gathered are already summarized + //! by an external aggregation process + model_t::ESummaryMode m_SummaryMode; + + //! If m_SummaryMode is E_Manual then this is the name of the field + //! holding the summary count. + std::string m_SummaryCountFieldName; + + //! The name of the field which splits the data. + std::string m_PartitionFieldName; + + //! The name of the field which defines the population which + //! will be analyzed. + std::string m_PersonFieldName; + + //! The name of the field which defines the person attributes + //! which will be analyzed. + std::string m_AttributeFieldName; + + //! The name of field whose values define the metric series' + //! values which will be analyzed. + std::string m_ValueFieldName; + + //! The field names for which we are computing influence. These are + //! the fields which can be used to join results across different + //! searches. + TStrVec m_InfluenceFieldNames; + + //! If true the models will process missing person and attribute + //! fields. + bool m_UseNull; + + //! The count features which will be modeled. + TFeatureVec m_Features; + + //! The bucket results delay. + std::size_t m_BucketResultsDelay; + //! A cached search key. + mutable TOptionalSearchKey m_SearchKeyCache; +}; } } diff --git a/include/model/CMetricStatisticWrappers.h b/include/model/CMetricStatisticWrappers.h index 6e02006987..92a310f57e 100644 --- a/include/model/CMetricStatisticWrappers.h +++ b/include/model/CMetricStatisticWrappers.h @@ -20,33 +20,24 @@ #include -namespace ml -{ -namespace model -{ -template class CMetricMultivariateStatistic; +namespace ml { +namespace model { +template +class CMetricMultivariateStatistic; -namespace metric_statistic_wrapper_detail -{ +namespace metric_statistic_wrapper_detail { //! \brief Makes a univariate metric statistic. template -struct SMake -{ - static STATISTIC dispatch(std::size_t /*dimension*/) - { - return STATISTIC(); - } +struct SMake { + static STATISTIC dispatch(std::size_t /*dimension*/) { return STATISTIC(); } }; //! \brief Makes a multivariate metric statistic. template -struct SMake > -{ - static CMetricMultivariateStatistic dispatch(std::size_t dimension) - { +struct SMake> { + static CMetricMultivariateStatistic dispatch(std::size_t dimension) { return CMetricMultivariateStatistic(dimension); } - }; } // metric_statistic_wrapper_detail:: @@ -62,8 +53,7 @@ struct SMake > //! It provides static functions for getting the statistic value //! and count if possible, and persisting and restoring them all //! of which delegate to the appropriate statistic functions. -struct MODEL_EXPORT CMetricStatisticWrappers -{ +struct MODEL_EXPORT CMetricStatisticWrappers { using TDouble1Vec = core::CSmallVector; using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; using TVarianceAccumulator = maths::CBasicStatistics::SSampleMeanVar::TAccumulator; @@ -71,91 +61,63 @@ struct MODEL_EXPORT CMetricStatisticWrappers //! Make a statistic. template - static STATISTIC make(std::size_t dimension) - { + static STATISTIC make(std::size_t dimension) { return metric_statistic_wrapper_detail::SMake::dispatch(dimension); } //! Add \p value to an order statistic. template - static void add(const TDouble1Vec &value, - unsigned int count, - maths::CBasicStatistics::COrderStatisticsStack &stat) - { + static void add(const TDouble1Vec& value, unsigned int count, maths::CBasicStatistics::COrderStatisticsStack& stat) { stat.add(value[0], count); } //! Add \p value to a mean statistic. - static void add(const TDouble1Vec &value, unsigned int count, TMeanAccumulator &stat) - { - stat.add(value[0], count); - } + static void add(const TDouble1Vec& value, unsigned int count, TMeanAccumulator& stat) { stat.add(value[0], count); } //! Add \p value to a variance statistic. - static void add(const TDouble1Vec &value, unsigned int count, TVarianceAccumulator &stat) - { - stat.add(value[0], count); - } + static void add(const TDouble1Vec& value, unsigned int count, TVarianceAccumulator& stat) { stat.add(value[0], count); } //! Add \p value to a median statistic. - static void add(const TDouble1Vec &value, unsigned int count, TMedianAccumulator &stat) - { - stat.add(value[0], count); - } + static void add(const TDouble1Vec& value, unsigned int count, TMedianAccumulator& stat) { stat.add(value[0], count); } //! Add \p value to a multivariate statistic. template - static void add(const TDouble1Vec &value, - unsigned int count, - CMetricMultivariateStatistic &stat) - { + static void add(const TDouble1Vec& value, unsigned int count, CMetricMultivariateStatistic& stat) { stat.add(value, count); } //! Get the median value of an order statistic. template - static TDouble1Vec value(const maths::CBasicStatistics::COrderStatisticsStack & stat) - { + static TDouble1Vec value(const maths::CBasicStatistics::COrderStatisticsStack& stat) { return TDouble1Vec{stat[0]}; } //! Get the value of a mean statistic. - static TDouble1Vec value(const TMeanAccumulator &stat) - { - return TDouble1Vec{maths::CBasicStatistics::mean(stat)}; - } + static TDouble1Vec value(const TMeanAccumulator& stat) { return TDouble1Vec{maths::CBasicStatistics::mean(stat)}; } //! Get the value of a variance statistic. - static TDouble1Vec value(const TVarianceAccumulator &stat) - { + static TDouble1Vec value(const TVarianceAccumulator& stat) { TDouble1Vec result; - if (maths::CBasicStatistics::count(stat) >= 2.0) - { - result.assign({maths::CBasicStatistics::maximumLikelihoodVariance(stat), - maths::CBasicStatistics::mean(stat)}); + if (maths::CBasicStatistics::count(stat) >= 2.0) { + result.assign({maths::CBasicStatistics::maximumLikelihoodVariance(stat), maths::CBasicStatistics::mean(stat)}); } return result; } //! Get the value of a median statistic. - static TDouble1Vec value(const TMedianAccumulator &stat) - { + static TDouble1Vec value(const TMedianAccumulator& stat) { double result; - if (!stat.quantile(50.0, result)) - { + if (!stat.quantile(50.0, result)) { return TDouble1Vec{0.0}; } return TDouble1Vec{result}; } //! Get the value of a multivariate statistic. template - static TDouble1Vec value(const CMetricMultivariateStatistic &stat) - { + static TDouble1Vec value(const CMetricMultivariateStatistic& stat) { return stat.value(); } //! Forward to the value function. template - static TDouble1Vec influencerValue(const STATISTIC &stat) - { + static TDouble1Vec influencerValue(const STATISTIC& stat) { return value(stat); } //! Get the variance influence value. - static TDouble1Vec influencerValue(const TVarianceAccumulator &stat) - { + static TDouble1Vec influencerValue(const TVarianceAccumulator& stat) { // We always return an influence value (independent of the count) // because this is not used to directly compute a variance only // to adjust the bucket variance. @@ -167,123 +129,89 @@ struct MODEL_EXPORT CMetricStatisticWrappers //! Get the value suitable for computing influence of a multivariate //! statistic. template - static TDouble1Vec influencerValue(const CMetricMultivariateStatistic &stat) - { + static TDouble1Vec influencerValue(const CMetricMultivariateStatistic& stat) { return stat.influencerValue(); } //! Returns 1.0 since this is not available. template - static double count(const maths::CBasicStatistics::COrderStatisticsStack &/*stat*/) - { + static double count(const maths::CBasicStatistics::COrderStatisticsStack& /*stat*/) { return 1.0; } //! Get the count of the statistic. - static double count(const TMeanAccumulator &stat) - { - return static_cast(maths::CBasicStatistics::count(stat)); - } + static double count(const TMeanAccumulator& stat) { return static_cast(maths::CBasicStatistics::count(stat)); } //! Get the count of the statistic. - static double count(const TVarianceAccumulator &stat) - { - return static_cast(maths::CBasicStatistics::count(stat)); - } + static double count(const TVarianceAccumulator& stat) { return static_cast(maths::CBasicStatistics::count(stat)); } //! Get the count of the statistic. - static double count(const TMedianAccumulator &stat) - { - return stat.count(); - } + static double count(const TMedianAccumulator& stat) { return stat.count(); } //! Get the count of a multivariate statistic. template - static double count(const CMetricMultivariateStatistic &stat) - { + static double count(const CMetricMultivariateStatistic& stat) { return stat.count(); } //! Persist an order statistic. template - static void persist(const maths::CBasicStatistics::COrderStatisticsStack &stat, - const std::string &tag, - core::CStatePersistInserter &inserter) - { + static void persist(const maths::CBasicStatistics::COrderStatisticsStack& stat, + const std::string& tag, + core::CStatePersistInserter& inserter) { inserter.insertValue(tag, stat.toDelimited()); } //! Persist a mean statistic. - static void persist(const TMeanAccumulator &stat, - const std::string &tag, - core::CStatePersistInserter &inserter) - { + static void persist(const TMeanAccumulator& stat, const std::string& tag, core::CStatePersistInserter& inserter) { inserter.insertValue(tag, stat.toDelimited()); } //! Persist a variance statistic. - static void persist(const TVarianceAccumulator &stat, - const std::string &tag, - core::CStatePersistInserter &inserter) - { + static void persist(const TVarianceAccumulator& stat, const std::string& tag, core::CStatePersistInserter& inserter) { inserter.insertValue(tag, stat.toDelimited()); } //! Persist a median statistic. - static void persist(const TMedianAccumulator &stat, - const std::string &tag, - core::CStatePersistInserter &inserter) - { + static void persist(const TMedianAccumulator& stat, const std::string& tag, core::CStatePersistInserter& inserter) { inserter.insertLevel(tag, boost::bind(&TMedianAccumulator::acceptPersistInserter, &stat, _1)); } //! Persist a multivariate statistic. template - static void persist(const CMetricMultivariateStatistic &stat, - const std::string &tag, - core::CStatePersistInserter &inserter) - { + static void + persist(const CMetricMultivariateStatistic& stat, const std::string& tag, core::CStatePersistInserter& inserter) { inserter.insertLevel(tag, boost::bind(&CMetricMultivariateStatistic::persist, &stat, _1)); } //! Restore an order statistic. template - static inline bool restore(core::CStateRestoreTraverser &traverser, - maths::CBasicStatistics::COrderStatisticsStack &stat) - { - if (stat.fromDelimited(traverser.value()) == false) - { + static inline bool restore(core::CStateRestoreTraverser& traverser, + maths::CBasicStatistics::COrderStatisticsStack& stat) { + if (stat.fromDelimited(traverser.value()) == false) { LOG_ERROR("Invalid statistic in " << traverser.value()); return false; } return true; } //! Restore a mean statistic. - static bool restore(core::CStateRestoreTraverser &traverser, TMeanAccumulator &stat) - { - if (stat.fromDelimited(traverser.value()) == false) - { + static bool restore(core::CStateRestoreTraverser& traverser, TMeanAccumulator& stat) { + if (stat.fromDelimited(traverser.value()) == false) { LOG_ERROR("Invalid mean in " << traverser.value()); return false; } return true; } //! Restore a variance statistic. - static bool restore(core::CStateRestoreTraverser &traverser, TVarianceAccumulator &stat) - { - if (stat.fromDelimited(traverser.value()) == false) - { + static bool restore(core::CStateRestoreTraverser& traverser, TVarianceAccumulator& stat) { + if (stat.fromDelimited(traverser.value()) == false) { LOG_ERROR("Invalid variance in " << traverser.value()); return false; } return true; } //! Restore a median statistic. - static bool restore(core::CStateRestoreTraverser &traverser, TMedianAccumulator &stat) - { + static bool restore(core::CStateRestoreTraverser& traverser, TMedianAccumulator& stat) { return traverser.traverseSubLevel(boost::bind(&TMedianAccumulator::acceptRestoreTraverser, &stat, _1)); } //! Restore a multivariate statistic. template - static bool restore(core::CStateRestoreTraverser &traverser, - CMetricMultivariateStatistic &stat) - { + static bool restore(core::CStateRestoreTraverser& traverser, CMetricMultivariateStatistic& stat) { return traverser.traverseSubLevel(boost::bind(&CMetricMultivariateStatistic::restore, &stat, _1)); } }; - } } diff --git a/include/model/CModelDetailsView.h b/include/model/CModelDetailsView.h index 071e85b74c..4cbc550224 100644 --- a/include/model/CModelDetailsView.h +++ b/include/model/CModelDetailsView.h @@ -17,10 +17,8 @@ #include #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { class CEventRateModel; class CEventRateOnlineModel; class CEventRatePopulationModel; @@ -35,169 +33,142 @@ class CMetricPopulationModel; //! avoid cluttering the CAnomalyDetectorModel interface. The intention is to expose all //! aspects of the mathematical models of both individual and population //! models for visualization purposes. -class MODEL_EXPORT CModelDetailsView -{ - public: - using TFeatureVec = std::vector; - using TStrSet = std::set; - - public: - virtual ~CModelDetailsView() = default; - - //! Get the identifier of the person called \p name if they exist. - bool personId(const std::string &person, std::size_t &result) const; - - //! Get the identifier of the person called \p name if they exist. - bool categoryId(const std::string &attribute, std::size_t &result) const; - - //! Get the collection of features for which data is being gathered. - const TFeatureVec &features() const; - - //! Get data for creating a model plot error bar at \p time for the - //! confidence interval \p boundsPercentile and the by fields identified - //! by \p terms. - //! - //! \note If \p terms is empty all by field error bars are returned. - void modelPlot(core_t::TTime time, - double boundsPercentile, - const TStrSet &terms, - CModelPlotData &modelPlotData) const; - - //! Get the feature prior for the specified by field \p byFieldId. - virtual const maths::CModel *model(model_t::EFeature feature, - std::size_t byFieldId) const = 0; - - private: - //! Add the model plot data for all by field values which match \p terms. - void addCurrentBucketValues(core_t::TTime time, - model_t::EFeature feature, - const TStrSet &terms, - CModelPlotData &modelPlotData) const; - - //! Get the model plot data for the specified by field value. - void modelPlotForByFieldId(core_t::TTime, - double boundsPercentile, - model_t::EFeature feature, - std::size_t byFieldId, - CModelPlotData &modelPlotData) const; - - //! Get the underlying model. - virtual const CAnomalyDetectorModel &base() const = 0; - - //! Get the count variance scale. - virtual double countVarianceScale(model_t::EFeature feature, - std::size_t byFieldId, - core_t::TTime time) const = 0; - - //! Returns true if the terms are empty or they contain the key. - static bool contains(const TStrSet &terms, const std::string &key); - - //! Check if the model has a by field. - bool hasByField() const; - //! Get the maximum by field identifier. - std::size_t maxByFieldId() const; - //! Try to get the by field identifier corresponding to \p byFieldValue. - bool byFieldId(const std::string &byFieldValue, std::size_t &result) const; - //! Get the by field value corresponding to \p byFieldId. - const std::string &byFieldValue(std::size_t byFieldId) const; - //! Get the by field corresponding to (\p pid, \p cid). - const std::string &byFieldValue(std::size_t pid, std::size_t cid) const; - //! Check if the by field identified by \p byFieldId is currently in use. - bool isByFieldIdActive(std::size_t byFieldId) const; +class MODEL_EXPORT CModelDetailsView { +public: + using TFeatureVec = std::vector; + using TStrSet = std::set; + +public: + virtual ~CModelDetailsView() = default; + + //! Get the identifier of the person called \p name if they exist. + bool personId(const std::string& person, std::size_t& result) const; + + //! Get the identifier of the person called \p name if they exist. + bool categoryId(const std::string& attribute, std::size_t& result) const; + + //! Get the collection of features for which data is being gathered. + const TFeatureVec& features() const; + + //! Get data for creating a model plot error bar at \p time for the + //! confidence interval \p boundsPercentile and the by fields identified + //! by \p terms. + //! + //! \note If \p terms is empty all by field error bars are returned. + void modelPlot(core_t::TTime time, double boundsPercentile, const TStrSet& terms, CModelPlotData& modelPlotData) const; + + //! Get the feature prior for the specified by field \p byFieldId. + virtual const maths::CModel* model(model_t::EFeature feature, std::size_t byFieldId) const = 0; + +private: + //! Add the model plot data for all by field values which match \p terms. + void addCurrentBucketValues(core_t::TTime time, model_t::EFeature feature, const TStrSet& terms, CModelPlotData& modelPlotData) const; + + //! Get the model plot data for the specified by field value. + void modelPlotForByFieldId(core_t::TTime, + double boundsPercentile, + model_t::EFeature feature, + std::size_t byFieldId, + CModelPlotData& modelPlotData) const; + + //! Get the underlying model. + virtual const CAnomalyDetectorModel& base() const = 0; + + //! Get the count variance scale. + virtual double countVarianceScale(model_t::EFeature feature, std::size_t byFieldId, core_t::TTime time) const = 0; + + //! Returns true if the terms are empty or they contain the key. + static bool contains(const TStrSet& terms, const std::string& key); + + //! Check if the model has a by field. + bool hasByField() const; + //! Get the maximum by field identifier. + std::size_t maxByFieldId() const; + //! Try to get the by field identifier corresponding to \p byFieldValue. + bool byFieldId(const std::string& byFieldValue, std::size_t& result) const; + //! Get the by field value corresponding to \p byFieldId. + const std::string& byFieldValue(std::size_t byFieldId) const; + //! Get the by field corresponding to (\p pid, \p cid). + const std::string& byFieldValue(std::size_t pid, std::size_t cid) const; + //! Check if the by field identified by \p byFieldId is currently in use. + bool isByFieldIdActive(std::size_t byFieldId) const; }; //! \brief A view into the details of a CEventRateModel object. //! //! \sa CModelDetailsView. -class MODEL_EXPORT CEventRateModelDetailsView : public CModelDetailsView -{ - public: - CEventRateModelDetailsView(const CEventRateModel &model); - - //! Get the feature model for the specified by field id. - virtual const maths::CModel *model(model_t::EFeature feature, - std::size_t byFieldId) const; - - private: - virtual const CAnomalyDetectorModel &base() const; - virtual double countVarianceScale(model_t::EFeature feature, - std::size_t byFieldId, - core_t::TTime time) const; - - private: - //! The model. - const CEventRateModel *m_Model; +class MODEL_EXPORT CEventRateModelDetailsView : public CModelDetailsView { +public: + CEventRateModelDetailsView(const CEventRateModel& model); + + //! Get the feature model for the specified by field id. + virtual const maths::CModel* model(model_t::EFeature feature, std::size_t byFieldId) const; + +private: + virtual const CAnomalyDetectorModel& base() const; + virtual double countVarianceScale(model_t::EFeature feature, std::size_t byFieldId, core_t::TTime time) const; + +private: + //! The model. + const CEventRateModel* m_Model; }; //! \brief A view into the details of a CEventRatePopulationModel object. //! //! \sa CModelDetailsView. -class MODEL_EXPORT CEventRatePopulationModelDetailsView : public CModelDetailsView -{ - public: - CEventRatePopulationModelDetailsView(const CEventRatePopulationModel &model); - - //! Get the feature model for the specified by field id. - virtual const maths::CModel *model(model_t::EFeature feature, - std::size_t byFieldId) const; - - private: - virtual const CAnomalyDetectorModel &base() const; - virtual double countVarianceScale(model_t::EFeature feature, - std::size_t byFieldId, - core_t::TTime time) const; - - private: - //! The model. - const CEventRatePopulationModel *m_Model; +class MODEL_EXPORT CEventRatePopulationModelDetailsView : public CModelDetailsView { +public: + CEventRatePopulationModelDetailsView(const CEventRatePopulationModel& model); + + //! Get the feature model for the specified by field id. + virtual const maths::CModel* model(model_t::EFeature feature, std::size_t byFieldId) const; + +private: + virtual const CAnomalyDetectorModel& base() const; + virtual double countVarianceScale(model_t::EFeature feature, std::size_t byFieldId, core_t::TTime time) const; + +private: + //! The model. + const CEventRatePopulationModel* m_Model; }; //! \brief A view into the details of a CMetricModel object. //! //! \sa CModelDetailsView. -class MODEL_EXPORT CMetricModelDetailsView : public CModelDetailsView -{ - public: - CMetricModelDetailsView(const CMetricModel &model); - - //! Get the feature model for the specified by field id. - virtual const maths::CModel *model(model_t::EFeature feature, - std::size_t byFieldId) const; - - private: - virtual const CAnomalyDetectorModel &base() const; - virtual double countVarianceScale(model_t::EFeature feature, - std::size_t byFieldId, - core_t::TTime time) const; - - private: - //! The model. - const CMetricModel *m_Model; +class MODEL_EXPORT CMetricModelDetailsView : public CModelDetailsView { +public: + CMetricModelDetailsView(const CMetricModel& model); + + //! Get the feature model for the specified by field id. + virtual const maths::CModel* model(model_t::EFeature feature, std::size_t byFieldId) const; + +private: + virtual const CAnomalyDetectorModel& base() const; + virtual double countVarianceScale(model_t::EFeature feature, std::size_t byFieldId, core_t::TTime time) const; + +private: + //! The model. + const CMetricModel* m_Model; }; //! \brief A view into the details of a CMetricPopulationModel object. //! //! \sa CModelDetailsView. -class MODEL_EXPORT CMetricPopulationModelDetailsView : public CModelDetailsView -{ - public: - CMetricPopulationModelDetailsView(const CMetricPopulationModel &model); - - //! Get the feature model for the specified by field id. - virtual const maths::CModel *model(model_t::EFeature feature, - std::size_t byFieldId) const; - - private: - virtual const CAnomalyDetectorModel &base() const; - virtual double countVarianceScale(model_t::EFeature feature, - std::size_t byFieldId, - core_t::TTime time) const; - - private: - //! The model. - const CMetricPopulationModel *m_Model; -}; +class MODEL_EXPORT CMetricPopulationModelDetailsView : public CModelDetailsView { +public: + CMetricPopulationModelDetailsView(const CMetricPopulationModel& model); + //! Get the feature model for the specified by field id. + virtual const maths::CModel* model(model_t::EFeature feature, std::size_t byFieldId) const; + +private: + virtual const CAnomalyDetectorModel& base() const; + virtual double countVarianceScale(model_t::EFeature feature, std::size_t byFieldId, core_t::TTime time) const; + +private: + //! The model. + const CMetricPopulationModel* m_Model; +}; } } diff --git a/include/model/CModelFactory.h b/include/model/CModelFactory.h index 8c208733eb..1ebd4b9c7b 100644 --- a/include/model/CModelFactory.h +++ b/include/model/CModelFactory.h @@ -7,8 +7,8 @@ #ifndef INCLUDED_ml_model_CModelFactory_h #define INCLUDED_ml_model_CModelFactory_h -#include #include +#include #include #include @@ -27,15 +27,12 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStateRestoreTraverser; } -namespace maths -{ +namespace maths { class CModel; class CMultinomialConjugate; class CMultivariatePrior; @@ -44,8 +41,7 @@ class CTimeSeriesCorrelations; class CTimeSeriesDecompositionInterface; } -namespace model -{ +namespace model { class CAnomalyDetectorModel; class CDataGatherer; class CDetectionRule; @@ -68,369 +64,349 @@ class CSearchKey; //! to either compute online or delta probabilities for log messages, //! metric values, etc. This hierarchy implements the factory pattern //! for the CAnomalyDetectorModel hierarchy for this purpose. -class MODEL_EXPORT CModelFactory -{ - public: - using TFeatureVec = std::vector; - using TStrVec = std::vector; - using TOptionalUInt = boost::optional; - using TStrCRef = boost::reference_wrapper; - using TStrCRefVec = std::vector; - using TDataGathererPtr = boost::shared_ptr; - using TPriorPtr = boost::shared_ptr; - using TMultivariatePriorPtr = boost::shared_ptr; - using TFeatureMultivariatePriorPtrPr = std::pair; - using TFeatureMultivariatePriorPtrPrVec = std::vector; - using TDecompositionCPtr = boost::shared_ptr; - using TMathsModelPtr = boost::shared_ptr; - using TCorrelationsPtr = boost::shared_ptr; - using TFeatureCorrelationsPtrPr = std::pair; - using TFeatureCorrelationsPtrPrVec = std::vector; - using TFeatureMathsModelPtrPr = std::pair; - using TFeatureMathsModelPtrPrVec = std::vector; - using TModelPtr = boost::shared_ptr; - using TModelCPtr = boost::shared_ptr; - using TInfluenceCalculatorCPtr = boost::shared_ptr; - using TFeatureInfluenceCalculatorCPtrPr = std::pair; - using TFeatureInfluenceCalculatorCPtrPrVec = std::vector; - using TFeatureInfluenceCalculatorCPtrPrVecVec = std::vector; - using TDetectionRuleVec = std::vector; - using TDetectionRuleVecCRef = boost::reference_wrapper; - using TStrDetectionRulePr = std::pair; - using TStrDetectionRulePrVec = std::vector; - using TStrDetectionRulePrVecCRef = boost::reference_wrapper; - - - - public: - //! Wrapper around the model initialization data. - //! - //! IMPLEMENTATION DECISIONS:\n - //! We wrap up the initialization data in an object so we don't - //! need to change the signature of every factory function each - //! time we need extra data to initialize a model. - struct MODEL_EXPORT SModelInitializationData - { - explicit SModelInitializationData(const TDataGathererPtr &dataGatherer); - - TDataGathererPtr s_DataGatherer; - }; - - //! Wrapper around the data gatherer initialization data. - //! - //! IMPLEMENTATION DECISIONS:\n - //! We wrap up the initialization data in an object so we don't - //! need to change the signature of every factory function each - //! time we need extra data to initialize a data gatherer. - struct MODEL_EXPORT SGathererInitializationData - { - SGathererInitializationData(core_t::TTime startTime, - const std::string &partitionFieldValue, - unsigned int sampleOverrideCount = 0u); - - //! This constructor is meant to simplify unit tests - SGathererInitializationData(const core_t::TTime startTime); - - core_t::TTime s_StartTime; - const std::string &s_PartitionFieldValue; - unsigned int s_SampleOverrideCount; - }; - - public: - static const std::string EMPTY_STRING; - - public: - CModelFactory(const SModelParams ¶ms); - virtual ~CModelFactory() = default; - - //! Create a copy of the factory owned by the calling code. - virtual CModelFactory *clone() const = 0; - - //! \name Factory Methods - //@{ - //! Make a new model. - //! - //! \param[in] initData The parameters needed to initialize the model. - //! \warning It is owned by the calling code. - virtual CAnomalyDetectorModel *makeModel(const SModelInitializationData &initData) const = 0; - - //! Make a new model from part of a state document. - //! - //! \param[in] initData Additional parameters needed to initialize - //! the model. - //! \param[in,out] traverser A state document traverser. - //! \warning It is owned by the calling code. - virtual CAnomalyDetectorModel *makeModel(const SModelInitializationData &initData, - core::CStateRestoreTraverser &traverser) const = 0; - - //! Make a new data gatherer. - //! - //! \param[in] initData The parameters needed to initialize the - //! data gatherer. - //! \warning It is owned by the calling code. - virtual CDataGatherer *makeDataGatherer(const SGathererInitializationData &initData) const = 0; - - //! Make a new data gatherer from part of a state document. - //! - //! \param[in,out] traverser A state document traverser. - //! \param[in] partitionFieldValue The partition field value. - //! \warning It is owned by the calling code. - virtual CDataGatherer *makeDataGatherer(const std::string &partitionFieldValue, - core::CStateRestoreTraverser &traverser) const = 0; - //@} - - //! \name Defaults - //@{ - //! Get the default models to use for \p features and \p bucketLength. - const TFeatureMathsModelPtrPrVec &defaultFeatureModels(const TFeatureVec &features, - core_t::TTime bucketLength, - double minimumSeasonalVarianceScale, - bool modelAnomalies) const; - - //! Get the default model to use for \p features and \p bucketLength. - TMathsModelPtr defaultFeatureModel(model_t::EFeature feature, - core_t::TTime bucketLength, - double minimumSeasonalVarianceScale, - bool modelAnomalies) const; - - //! Get the default correlate priors to use for correlated pairs of time - //! series of \p features. - const TFeatureMultivariatePriorPtrPrVec &defaultCorrelatePriors(const TFeatureVec &features) const; - - //! Get the default models for correlations of \p features. - const TFeatureCorrelationsPtrPrVec &defaultCorrelates(const TFeatureVec &features) const; - - //! Get the default prior to use for \p feature. - TPriorPtr defaultPrior(model_t::EFeature feature) const; - - //! Get the default prior to use for multivariate \p feature. - TMultivariatePriorPtr defaultMultivariatePrior(model_t::EFeature feature) const; - - //! Get the default prior to use for correlared pairs of time - //! series for univariate \p feature. - TMultivariatePriorPtr defaultCorrelatePrior(model_t::EFeature feature) const; - - //! Get the default prior for \p feature. - //! - //! \param[in] feature The feature for which to get the prior. - //! \param[in] params The model parameters. - virtual TPriorPtr defaultPrior(model_t::EFeature feature, - const SModelParams ¶ms) const = 0; - - //! Get the default prior for multivariate \p feature. - //! - //! \param[in] feature The feature for which to get the prior. - //! \param[in] params The model parameters. - virtual TMultivariatePriorPtr defaultMultivariatePrior(model_t::EFeature feature, - const SModelParams ¶ms) const = 0; - - //! Get the default prior for pairs of correlated time series - //! of \p feature. - //! - //! \param[in] feature The feature for which to get the prior. - //! \param[in] params The model parameters. - virtual TMultivariatePriorPtr defaultCorrelatePrior(model_t::EFeature feature, - const SModelParams ¶ms) const = 0; - - //! Get the default prior to use for categorical data. - maths::CMultinomialConjugate defaultCategoricalPrior() const; - - //! Get the default time series decomposition. - //! - //! \param[in] feature The feature for which to get the decomposition. - //! \param[in] bucketLength The data bucketing length. - TDecompositionCPtr defaultDecomposition(model_t::EFeature feature, - core_t::TTime bucketLength) const; - - //! Get the influence calculators to use for each feature in \p features. - const TFeatureInfluenceCalculatorCPtrPrVec & - defaultInfluenceCalculators(const std::string &influencerName, - const TFeatureVec &features) const; - //@} - - //! Get the search key corresponding to this factory. - virtual const CSearchKey &searchKey() const = 0; - - //! Check if this makes the model used for a simple counting search. - virtual bool isSimpleCount() const = 0; - - //! Check the pre-summarisation mode for this factory. - virtual model_t::ESummaryMode summaryMode() const = 0; - - //! Get the default data type for models from this factory. - virtual maths_t::EDataType dataType() const = 0; - - //! \name Customization by a specific search - //@{ - //! Set the identifier of the search for which this generates models. - virtual void identifier(int identifier) = 0; - - //! Set the record field names which will be modeled. - virtual void fieldNames(const std::string &partitionFieldName, - const std::string &overFieldName, - const std::string &byFieldName, - const std::string &valueFieldName, - const TStrVec &influenceFieldNames) = 0; - - //! Set whether the model should process missing field values. - virtual void useNull(bool useNull) = 0; - - //! Set the features which will be modeled. - virtual void features(const TFeatureVec &features) = 0; - - //! Set the amount by which metric sample count is reduced for - //! fine-grained sampling when there is latency. - void sampleCountFactor(std::size_t sampleCountFactor); - - //! Set the bucket results delay - virtual void bucketResultsDelay(std::size_t bucketResultsDelay) = 0; - - //! Set whether the model should exclude frequent hitters from the - //! calculations. - void excludeFrequent(model_t::EExcludeFrequent excludeFrequent); - - //! Set the detection rules for a detector. - void detectionRules(TDetectionRuleVecCRef detectionRules); - //@} - - //! Set the scheduled events - void scheduledEvents(TStrDetectionRulePrVecCRef scheduledEvents); - - //! \name Customization by mlmodel.conf - //@{ - //! Set the learn rate used for initializing models. - void learnRate(double learnRate); - - //! Set the decay rate used for initializing the models. - void decayRate(double decayRate); - - //! Set the initial decay rate multiplier used for initializing - //! models. - void initialDecayRateMultiplier(double multiplier); - - //! Set the maximum number of times we'll update a person's model - //! in a bucketing interval. - void maximumUpdatesPerBucket(double maximumUpdatesPerBucket); - - //! Set the prune window scale factor minimum - void pruneWindowScaleMinimum(double factor); - - //! Set the prune window scale factor maximum - void pruneWindowScaleMaximum(double factor); - - //! Set whether multivariate analysis of correlated 'by' fields should - //! be performed. - void multivariateByFields(bool enabled); - - //! Set the minimum mode fraction used for initializing the models. - void minimumModeFraction(double minimumModeFraction); - - //! Set the minimum mode count used for initializing the models. - void minimumModeCount(double minimumModeCount); - - //! Set the periods and the number of points we'll use to model - //! of the seasonal components in the data. - void componentSize(std::size_t componentSize); - //@} - - //! Update the bucket length, for ModelAutoConfig's benefit - void updateBucketLength(core_t::TTime length); - - //! Get global model configuration parameters. - const SModelParams &modelParams() const; - - //! Get the minimum mode fraction used for initializing the models. - double minimumModeFraction() const; - - //! Set the minimum mode count used for initializing the models. - double minimumModeCount() const; - - //! Get the number of points to use for approximating each seasonal - //! component. - std::size_t componentSize() const; - - protected: - using TMultivariatePriorPtrVec = std::vector; - using TOptionalSearchKey = boost::optional; - - protected: - //! Efficiently swap the contents of this and other. - //! - //! \note This only swaps the state held on this base class. - void swap(CModelFactory &other); - - //! Get a multivariate normal prior with dimension \p dimension. - //! - //! \param[in] dimension The dimension. - //! \param[in] params The model parameters. - //! \warning Up to ten dimensions are supported. - TMultivariatePriorPtr multivariateNormalPrior(std::size_t dimension, - const SModelParams ¶ms) const; - - //! Get a multivariate multimodal prior with dimension \p dimension. - //! - //! \param[in] dimension The dimension. - //! \param[in] params The model parameters. - //! \warning Up to ten dimensions are supported. - TMultivariatePriorPtr multivariateMultimodalPrior(std::size_t dimension, - const SModelParams ¶ms, - const maths::CMultivariatePrior &modePrior) const; - - //! Get a multivariate 1-of-n prior with dimension \p dimension. - //! - //! \param[in] dimension The dimension. - //! \param[in] params The model parameters. - //! \param[in] models The component models to select between. - TMultivariatePriorPtr multivariateOneOfNPrior(std::size_t dimension, - const SModelParams ¶ms, - const TMultivariatePriorPtrVec &models) const; - - //! Get the default prior for time-of-day and time-of-week modeling. - //! This is just a mixture of normals which allows more modes than - //! we typically do. - //! - //! \param[in] params The model parameters. - TPriorPtr timeOfDayPrior(const SModelParams ¶ms) const; - - //! Get the default prior for latitude and longitude modeling. - //! This is just a mixture of correlate normals which allows more - //! modes than we typically do. - //! - //! \param[in] params The model parameters. - TMultivariatePriorPtr latLongPrior(const SModelParams ¶ms) const; - - private: - using TFeatureVecMathsModelMap = std::map; - using TFeatureVecMultivariatePriorMap = std::map; - using TFeatureVecCorrelationsMap = std::map; - using TStrFeatureVecPr = std::pair; - using TStrFeatureVecPrInfluenceCalculatorCPtrMap = std::map; - - private: - //! Get the field values which partition the data for modeling. - virtual TStrCRefVec partitioningFields() const = 0; - - private: - //! The global model configuration parameters. - SModelParams m_ModelParams; - - //! A cache of models for collections of features. - mutable TFeatureVecMathsModelMap m_MathsModelCache; - - //! A cache of priors for correlate pairs of collections of features. - mutable TFeatureVecMultivariatePriorMap m_CorrelatePriorCache; - - //! A cache of models of the correlations of collections of features. - mutable TFeatureVecCorrelationsMap m_CorrelationsCache; - - //! A cache of influence calculators for collections of features. - mutable TStrFeatureVecPrInfluenceCalculatorCPtrMap m_InfluenceCalculatorCache; +class MODEL_EXPORT CModelFactory { +public: + using TFeatureVec = std::vector; + using TStrVec = std::vector; + using TOptionalUInt = boost::optional; + using TStrCRef = boost::reference_wrapper; + using TStrCRefVec = std::vector; + using TDataGathererPtr = boost::shared_ptr; + using TPriorPtr = boost::shared_ptr; + using TMultivariatePriorPtr = boost::shared_ptr; + using TFeatureMultivariatePriorPtrPr = std::pair; + using TFeatureMultivariatePriorPtrPrVec = std::vector; + using TDecompositionCPtr = boost::shared_ptr; + using TMathsModelPtr = boost::shared_ptr; + using TCorrelationsPtr = boost::shared_ptr; + using TFeatureCorrelationsPtrPr = std::pair; + using TFeatureCorrelationsPtrPrVec = std::vector; + using TFeatureMathsModelPtrPr = std::pair; + using TFeatureMathsModelPtrPrVec = std::vector; + using TModelPtr = boost::shared_ptr; + using TModelCPtr = boost::shared_ptr; + using TInfluenceCalculatorCPtr = boost::shared_ptr; + using TFeatureInfluenceCalculatorCPtrPr = std::pair; + using TFeatureInfluenceCalculatorCPtrPrVec = std::vector; + using TFeatureInfluenceCalculatorCPtrPrVecVec = std::vector; + using TDetectionRuleVec = std::vector; + using TDetectionRuleVecCRef = boost::reference_wrapper; + using TStrDetectionRulePr = std::pair; + using TStrDetectionRulePrVec = std::vector; + using TStrDetectionRulePrVecCRef = boost::reference_wrapper; + +public: + //! Wrapper around the model initialization data. + //! + //! IMPLEMENTATION DECISIONS:\n + //! We wrap up the initialization data in an object so we don't + //! need to change the signature of every factory function each + //! time we need extra data to initialize a model. + struct MODEL_EXPORT SModelInitializationData { + explicit SModelInitializationData(const TDataGathererPtr& dataGatherer); + + TDataGathererPtr s_DataGatherer; + }; + + //! Wrapper around the data gatherer initialization data. + //! + //! IMPLEMENTATION DECISIONS:\n + //! We wrap up the initialization data in an object so we don't + //! need to change the signature of every factory function each + //! time we need extra data to initialize a data gatherer. + struct MODEL_EXPORT SGathererInitializationData { + SGathererInitializationData(core_t::TTime startTime, const std::string& partitionFieldValue, unsigned int sampleOverrideCount = 0u); + + //! This constructor is meant to simplify unit tests + SGathererInitializationData(const core_t::TTime startTime); + + core_t::TTime s_StartTime; + const std::string& s_PartitionFieldValue; + unsigned int s_SampleOverrideCount; + }; + +public: + static const std::string EMPTY_STRING; + +public: + CModelFactory(const SModelParams& params); + virtual ~CModelFactory() = default; + + //! Create a copy of the factory owned by the calling code. + virtual CModelFactory* clone() const = 0; + + //! \name Factory Methods + //@{ + //! Make a new model. + //! + //! \param[in] initData The parameters needed to initialize the model. + //! \warning It is owned by the calling code. + virtual CAnomalyDetectorModel* makeModel(const SModelInitializationData& initData) const = 0; + + //! Make a new model from part of a state document. + //! + //! \param[in] initData Additional parameters needed to initialize + //! the model. + //! \param[in,out] traverser A state document traverser. + //! \warning It is owned by the calling code. + virtual CAnomalyDetectorModel* makeModel(const SModelInitializationData& initData, core::CStateRestoreTraverser& traverser) const = 0; + + //! Make a new data gatherer. + //! + //! \param[in] initData The parameters needed to initialize the + //! data gatherer. + //! \warning It is owned by the calling code. + virtual CDataGatherer* makeDataGatherer(const SGathererInitializationData& initData) const = 0; + + //! Make a new data gatherer from part of a state document. + //! + //! \param[in,out] traverser A state document traverser. + //! \param[in] partitionFieldValue The partition field value. + //! \warning It is owned by the calling code. + virtual CDataGatherer* makeDataGatherer(const std::string& partitionFieldValue, core::CStateRestoreTraverser& traverser) const = 0; + //@} + + //! \name Defaults + //@{ + //! Get the default models to use for \p features and \p bucketLength. + const TFeatureMathsModelPtrPrVec& defaultFeatureModels(const TFeatureVec& features, + core_t::TTime bucketLength, + double minimumSeasonalVarianceScale, + bool modelAnomalies) const; + + //! Get the default model to use for \p features and \p bucketLength. + TMathsModelPtr defaultFeatureModel(model_t::EFeature feature, + core_t::TTime bucketLength, + double minimumSeasonalVarianceScale, + bool modelAnomalies) const; + + //! Get the default correlate priors to use for correlated pairs of time + //! series of \p features. + const TFeatureMultivariatePriorPtrPrVec& defaultCorrelatePriors(const TFeatureVec& features) const; + + //! Get the default models for correlations of \p features. + const TFeatureCorrelationsPtrPrVec& defaultCorrelates(const TFeatureVec& features) const; + + //! Get the default prior to use for \p feature. + TPriorPtr defaultPrior(model_t::EFeature feature) const; + + //! Get the default prior to use for multivariate \p feature. + TMultivariatePriorPtr defaultMultivariatePrior(model_t::EFeature feature) const; + + //! Get the default prior to use for correlared pairs of time + //! series for univariate \p feature. + TMultivariatePriorPtr defaultCorrelatePrior(model_t::EFeature feature) const; + + //! Get the default prior for \p feature. + //! + //! \param[in] feature The feature for which to get the prior. + //! \param[in] params The model parameters. + virtual TPriorPtr defaultPrior(model_t::EFeature feature, const SModelParams& params) const = 0; + + //! Get the default prior for multivariate \p feature. + //! + //! \param[in] feature The feature for which to get the prior. + //! \param[in] params The model parameters. + virtual TMultivariatePriorPtr defaultMultivariatePrior(model_t::EFeature feature, const SModelParams& params) const = 0; + + //! Get the default prior for pairs of correlated time series + //! of \p feature. + //! + //! \param[in] feature The feature for which to get the prior. + //! \param[in] params The model parameters. + virtual TMultivariatePriorPtr defaultCorrelatePrior(model_t::EFeature feature, const SModelParams& params) const = 0; + + //! Get the default prior to use for categorical data. + maths::CMultinomialConjugate defaultCategoricalPrior() const; + + //! Get the default time series decomposition. + //! + //! \param[in] feature The feature for which to get the decomposition. + //! \param[in] bucketLength The data bucketing length. + TDecompositionCPtr defaultDecomposition(model_t::EFeature feature, core_t::TTime bucketLength) const; + + //! Get the influence calculators to use for each feature in \p features. + const TFeatureInfluenceCalculatorCPtrPrVec& defaultInfluenceCalculators(const std::string& influencerName, + const TFeatureVec& features) const; + //@} + + //! Get the search key corresponding to this factory. + virtual const CSearchKey& searchKey() const = 0; + + //! Check if this makes the model used for a simple counting search. + virtual bool isSimpleCount() const = 0; + + //! Check the pre-summarisation mode for this factory. + virtual model_t::ESummaryMode summaryMode() const = 0; + + //! Get the default data type for models from this factory. + virtual maths_t::EDataType dataType() const = 0; + + //! \name Customization by a specific search + //@{ + //! Set the identifier of the search for which this generates models. + virtual void identifier(int identifier) = 0; + + //! Set the record field names which will be modeled. + virtual void fieldNames(const std::string& partitionFieldName, + const std::string& overFieldName, + const std::string& byFieldName, + const std::string& valueFieldName, + const TStrVec& influenceFieldNames) = 0; + + //! Set whether the model should process missing field values. + virtual void useNull(bool useNull) = 0; + + //! Set the features which will be modeled. + virtual void features(const TFeatureVec& features) = 0; + + //! Set the amount by which metric sample count is reduced for + //! fine-grained sampling when there is latency. + void sampleCountFactor(std::size_t sampleCountFactor); + + //! Set the bucket results delay + virtual void bucketResultsDelay(std::size_t bucketResultsDelay) = 0; + + //! Set whether the model should exclude frequent hitters from the + //! calculations. + void excludeFrequent(model_t::EExcludeFrequent excludeFrequent); + + //! Set the detection rules for a detector. + void detectionRules(TDetectionRuleVecCRef detectionRules); + //@} + + //! Set the scheduled events + void scheduledEvents(TStrDetectionRulePrVecCRef scheduledEvents); + + //! \name Customization by mlmodel.conf + //@{ + //! Set the learn rate used for initializing models. + void learnRate(double learnRate); + + //! Set the decay rate used for initializing the models. + void decayRate(double decayRate); + + //! Set the initial decay rate multiplier used for initializing + //! models. + void initialDecayRateMultiplier(double multiplier); + + //! Set the maximum number of times we'll update a person's model + //! in a bucketing interval. + void maximumUpdatesPerBucket(double maximumUpdatesPerBucket); + + //! Set the prune window scale factor minimum + void pruneWindowScaleMinimum(double factor); + + //! Set the prune window scale factor maximum + void pruneWindowScaleMaximum(double factor); + + //! Set whether multivariate analysis of correlated 'by' fields should + //! be performed. + void multivariateByFields(bool enabled); + + //! Set the minimum mode fraction used for initializing the models. + void minimumModeFraction(double minimumModeFraction); + + //! Set the minimum mode count used for initializing the models. + void minimumModeCount(double minimumModeCount); + + //! Set the periods and the number of points we'll use to model + //! of the seasonal components in the data. + void componentSize(std::size_t componentSize); + //@} + + //! Update the bucket length, for ModelAutoConfig's benefit + void updateBucketLength(core_t::TTime length); + + //! Get global model configuration parameters. + const SModelParams& modelParams() const; + + //! Get the minimum mode fraction used for initializing the models. + double minimumModeFraction() const; + + //! Set the minimum mode count used for initializing the models. + double minimumModeCount() const; + + //! Get the number of points to use for approximating each seasonal + //! component. + std::size_t componentSize() const; + +protected: + using TMultivariatePriorPtrVec = std::vector; + using TOptionalSearchKey = boost::optional; + +protected: + //! Efficiently swap the contents of this and other. + //! + //! \note This only swaps the state held on this base class. + void swap(CModelFactory& other); + + //! Get a multivariate normal prior with dimension \p dimension. + //! + //! \param[in] dimension The dimension. + //! \param[in] params The model parameters. + //! \warning Up to ten dimensions are supported. + TMultivariatePriorPtr multivariateNormalPrior(std::size_t dimension, const SModelParams& params) const; + + //! Get a multivariate multimodal prior with dimension \p dimension. + //! + //! \param[in] dimension The dimension. + //! \param[in] params The model parameters. + //! \warning Up to ten dimensions are supported. + TMultivariatePriorPtr + multivariateMultimodalPrior(std::size_t dimension, const SModelParams& params, const maths::CMultivariatePrior& modePrior) const; + + //! Get a multivariate 1-of-n prior with dimension \p dimension. + //! + //! \param[in] dimension The dimension. + //! \param[in] params The model parameters. + //! \param[in] models The component models to select between. + TMultivariatePriorPtr + multivariateOneOfNPrior(std::size_t dimension, const SModelParams& params, const TMultivariatePriorPtrVec& models) const; + + //! Get the default prior for time-of-day and time-of-week modeling. + //! This is just a mixture of normals which allows more modes than + //! we typically do. + //! + //! \param[in] params The model parameters. + TPriorPtr timeOfDayPrior(const SModelParams& params) const; + + //! Get the default prior for latitude and longitude modeling. + //! This is just a mixture of correlate normals which allows more + //! modes than we typically do. + //! + //! \param[in] params The model parameters. + TMultivariatePriorPtr latLongPrior(const SModelParams& params) const; + +private: + using TFeatureVecMathsModelMap = std::map; + using TFeatureVecMultivariatePriorMap = std::map; + using TFeatureVecCorrelationsMap = std::map; + using TStrFeatureVecPr = std::pair; + using TStrFeatureVecPrInfluenceCalculatorCPtrMap = + std::map; + +private: + //! Get the field values which partition the data for modeling. + virtual TStrCRefVec partitioningFields() const = 0; + +private: + //! The global model configuration parameters. + SModelParams m_ModelParams; + + //! A cache of models for collections of features. + mutable TFeatureVecMathsModelMap m_MathsModelCache; + + //! A cache of priors for correlate pairs of collections of features. + mutable TFeatureVecMultivariatePriorMap m_CorrelatePriorCache; + + //! A cache of models of the correlations of collections of features. + mutable TFeatureVecCorrelationsMap m_CorrelationsCache; + + //! A cache of influence calculators for collections of features. + mutable TStrFeatureVecPrInfluenceCalculatorCPtrMap m_InfluenceCalculatorCache; }; - } } #endif // INCLUDED_ml_model_CModelFactory_h - diff --git a/include/model/CModelParams.h b/include/model/CModelParams.h index 6e7f02590a..f78131f739 100644 --- a/include/model/CModelParams.h +++ b/include/model/CModelParams.h @@ -12,9 +12,9 @@ #include +#include #include #include -#include #include @@ -22,15 +22,12 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { struct SDistributionRestoreParams; struct STimeSeriesDecompositionRestoreParams; } -namespace model -{ +namespace model { //! \brief Wraps up model global parameters. //! //! DESCIRIPTION:\n @@ -41,8 +38,7 @@ namespace model //! IMPLEMENTATION:\n //! This is purposely not implemented as a nested class so that it can //! be forward declared. -struct MODEL_EXPORT SModelParams -{ +struct MODEL_EXPORT SModelParams { using TDetectionRuleVec = std::vector; using TDetectionRuleVecCRef = boost::reference_wrapper; using TStrDetectionRulePr = std::pair; @@ -171,7 +167,6 @@ struct MODEL_EXPORT SModelParams //! The time window during which samples are accepted. core_t::TTime s_SamplingAgeCutoff; }; - } } diff --git a/include/model/CModelPlotData.h b/include/model/CModelPlotData.h index debf26b558..0a7599670d 100644 --- a/include/model/CModelPlotData.h +++ b/include/model/CModelPlotData.h @@ -7,7 +7,6 @@ #ifndef INCLUDED_ml_model_CModelPlotData_h #define INCLUDED_ml_model_CModelPlotData_h - #include #include @@ -17,83 +16,76 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace model -{ +namespace model { //! \brief Data necessary to create a model plot -class MODEL_EXPORT CModelPlotData -{ - public: - using TStrDoublePr = std::pair; - using TStrDoublePrVec = std::vector; - - public: - struct MODEL_EXPORT SByFieldData - { - SByFieldData(); - SByFieldData(double lowerBound, double upperBound, double median); - - void addValue(const std::string &personName, double value); - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); - - double s_LowerBound; - double s_UpperBound; - double s_Median; - - // Change to vector of pair str->double - TStrDoublePrVec s_ValuesPerOverField; - }; - - public: - using TStrByFieldDataUMap = boost::unordered_map; - using TFeatureStrByFieldDataUMapPr = std::pair; - using TFeatureStrByFieldDataUMapUMap = boost::unordered_map; - using TIntStrByFieldDataUMapUMap = boost::unordered_map; - using TFeatureStrByFieldDataUMapUMapCItr = TFeatureStrByFieldDataUMapUMap::const_iterator; - - public: - CModelPlotData(); - CModelPlotData(core_t::TTime time, - const std::string &partitionFieldName, - const std::string &partitionFieldValue, - const std::string &overFieldName, - const std::string &byFieldName, - core_t::TTime bucketSpan, - int detectorIndex); - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); - TFeatureStrByFieldDataUMapUMapCItr begin() const; - TFeatureStrByFieldDataUMapUMapCItr end() const; - SByFieldData &get(const model_t::EFeature &feature, const std::string &byFieldValue); - const std::string &partitionFieldName() const; - const std::string &partitionFieldValue() const; - const std::string &overFieldName() const; - const std::string &byFieldName() const; - core_t::TTime time() const; - core_t::TTime bucketSpan() const; - int detectorIndex() const; - std::string print() const; - - private: - TFeatureStrByFieldDataUMapUMap m_DataPerFeature; - core_t::TTime m_Time; - std::string m_PartitionFieldName; - std::string m_PartitionFieldValue; - std::string m_OverFieldName; - std::string m_ByFieldName; - core_t::TTime m_BucketSpan; - int m_DetectorIndex; +class MODEL_EXPORT CModelPlotData { +public: + using TStrDoublePr = std::pair; + using TStrDoublePrVec = std::vector; + +public: + struct MODEL_EXPORT SByFieldData { + SByFieldData(); + SByFieldData(double lowerBound, double upperBound, double median); + + void addValue(const std::string& personName, double value); + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); + + double s_LowerBound; + double s_UpperBound; + double s_Median; + + // Change to vector of pair str->double + TStrDoublePrVec s_ValuesPerOverField; + }; + +public: + using TStrByFieldDataUMap = boost::unordered_map; + using TFeatureStrByFieldDataUMapPr = std::pair; + using TFeatureStrByFieldDataUMapUMap = boost::unordered_map; + using TIntStrByFieldDataUMapUMap = boost::unordered_map; + using TFeatureStrByFieldDataUMapUMapCItr = TFeatureStrByFieldDataUMapUMap::const_iterator; + +public: + CModelPlotData(); + CModelPlotData(core_t::TTime time, + const std::string& partitionFieldName, + const std::string& partitionFieldValue, + const std::string& overFieldName, + const std::string& byFieldName, + core_t::TTime bucketSpan, + int detectorIndex); + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); + TFeatureStrByFieldDataUMapUMapCItr begin() const; + TFeatureStrByFieldDataUMapUMapCItr end() const; + SByFieldData& get(const model_t::EFeature& feature, const std::string& byFieldValue); + const std::string& partitionFieldName() const; + const std::string& partitionFieldValue() const; + const std::string& overFieldName() const; + const std::string& byFieldName() const; + core_t::TTime time() const; + core_t::TTime bucketSpan() const; + int detectorIndex() const; + std::string print() const; + +private: + TFeatureStrByFieldDataUMapUMap m_DataPerFeature; + core_t::TTime m_Time; + std::string m_PartitionFieldName; + std::string m_PartitionFieldValue; + std::string m_OverFieldName; + std::string m_ByFieldName; + core_t::TTime m_BucketSpan; + int m_DetectorIndex; }; - - } } diff --git a/include/model/CModelTools.h b/include/model/CModelTools.h index fcc60b6195..176fd91b17 100644 --- a/include/model/CModelTools.h +++ b/include/model/CModelTools.h @@ -13,8 +13,8 @@ #include #include -#include #include +#include #include #include @@ -29,15 +29,12 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { class CModel; class CMultinomialConjugate; } -namespace model -{ +namespace model { class CSample; struct SModelParams; @@ -54,274 +51,255 @@ struct SModelParams; //! all member functions should be static and it should be state-less. //! If your functionality doesn't fit this pattern just make it a nested //! class. -class MODEL_EXPORT CModelTools -{ +class MODEL_EXPORT CModelTools { +public: + using TDoubleVec = std::vector; + using TDouble2Vec = core::CSmallVector; + using TDouble2Vec1Vec = core::CSmallVector; + using TTimeDouble2VecPr = std::pair; + using TSizeSizePr = std::pair; + using TStoredStringPtrStoredStringPtrPr = std::pair; + using TSampleVec = std::vector; + + //! \brief De-duplicates nearly equal values. + class MODEL_EXPORT CFuzzyDeduplicate { public: - using TDoubleVec = std::vector; - using TDouble2Vec = core::CSmallVector; - using TDouble2Vec1Vec = core::CSmallVector; - using TTimeDouble2VecPr = std::pair; - using TSizeSizePr = std::pair; - using TStoredStringPtrStoredStringPtrPr = std::pair; - using TSampleVec = std::vector; - - //! \brief De-duplicates nearly equal values. - class MODEL_EXPORT CFuzzyDeduplicate - { - public: - //! Add a value. - void add(TDouble2Vec value); - //! Compute quantization epsilons. - void computeEpsilons(core_t::TTime bucketLength, - std::size_t desiredNumberSamples); - //! Check if we've got a near duplicate of \p value. - std::size_t duplicate(core_t::TTime time, TDouble2Vec value); - - private: - using TDouble2VecVec = std::vector; - struct MODEL_EXPORT SDuplicateValueHash - { - std::size_t operator()(const TTimeDouble2VecPr &value) const; - }; - using TTimeDouble2VecPrSizeUMap = boost::unordered_map; - - private: - //! Quantize \p value. - TDouble2Vec quantize(TDouble2Vec value) const; - //! Quantize \p time. - core_t::TTime quantize(core_t::TTime time) const; - - private: - //! If false then quantization is disabled. - bool m_Quantize = true; - //! The count of values added. - std::size_t m_Count = 0; - //! The time quantization interval. - core_t::TTime m_TimeEps = 0; - //! The value quantization interval. - TDouble2Vec m_ValueEps; - //! A random number generator used to sample added values. - maths::CPRNG::CXorOShiro128Plus m_Rng; - //! A random sample of the added values. - TDouble2VecVec m_RandomSample; - //! A collection of quantized values and their unique ids. - TTimeDouble2VecPrSizeUMap m_QuantizedValues; + //! Add a value. + void add(TDouble2Vec value); + //! Compute quantization epsilons. + void computeEpsilons(core_t::TTime bucketLength, std::size_t desiredNumberSamples); + //! Check if we've got a near duplicate of \p value. + std::size_t duplicate(core_t::TTime time, TDouble2Vec value); + + private: + using TDouble2VecVec = std::vector; + struct MODEL_EXPORT SDuplicateValueHash { + std::size_t operator()(const TTimeDouble2VecPr& value) const; }; + using TTimeDouble2VecPrSizeUMap = boost::unordered_map; + + private: + //! Quantize \p value. + TDouble2Vec quantize(TDouble2Vec value) const; + //! Quantize \p time. + core_t::TTime quantize(core_t::TTime time) const; + + private: + //! If false then quantization is disabled. + bool m_Quantize = true; + //! The count of values added. + std::size_t m_Count = 0; + //! The time quantization interval. + core_t::TTime m_TimeEps = 0; + //! The value quantization interval. + TDouble2Vec m_ValueEps; + //! A random number generator used to sample added values. + maths::CPRNG::CXorOShiro128Plus m_Rng; + //! A random sample of the added values. + TDouble2VecVec m_RandomSample; + //! A collection of quantized values and their unique ids. + TTimeDouble2VecPrSizeUMap m_QuantizedValues; + }; + + //! \brief Hashes a string pointer pair. + struct MODEL_EXPORT SStoredStringPtrStoredStringPtrPrHash { + std::size_t operator()(const TStoredStringPtrStoredStringPtrPr& target) const { + return static_cast(core::CHashing::hashCombine(static_cast(s_Hasher(*target.first)), + static_cast(s_Hasher(*target.second)))); + } + core::CHashing::CMurmurHash2String s_Hasher; + }; + + //! \brief Compares two string pointer pairs. + struct MODEL_EXPORT SStoredStringPtrStoredStringPtrPrEqual { + std::size_t operator()(const TStoredStringPtrStoredStringPtrPr& lhs, const TStoredStringPtrStoredStringPtrPr& rhs) const { + return *lhs.first == *rhs.first && *lhs.second == *rhs.second; + } + }; + + //! \brief Manages the aggregation of probabilities. + //! + //! DESCRIPTION:\n + //! This allows one to register either one of or both the joint + //! probability and extreme aggregation styles. The resulting + //! aggregate probability is the minimum of the aggregates of + //! the probabilities added so far for any of the registered + //! aggregation styles. + class MODEL_EXPORT CProbabilityAggregator { + public: + using TAggregator = boost::variant; + using TAggregatorDoublePr = std::pair; + using TAggregatorDoublePrVec = std::vector; - //! \brief Hashes a string pointer pair. - struct MODEL_EXPORT SStoredStringPtrStoredStringPtrPrHash - { - std::size_t operator()(const TStoredStringPtrStoredStringPtrPr &target) const - { - return static_cast( - core::CHashing::hashCombine(static_cast(s_Hasher(*target.first)), - static_cast(s_Hasher(*target.second)))); - } - core::CHashing::CMurmurHash2String s_Hasher; - }; + enum EStyle { E_Sum, E_Min }; - //! \brief Compares two string pointer pairs. - struct MODEL_EXPORT SStoredStringPtrStoredStringPtrPrEqual - { - std::size_t operator()(const TStoredStringPtrStoredStringPtrPr &lhs, - const TStoredStringPtrStoredStringPtrPr &rhs) const - { - return *lhs.first == *rhs.first && *lhs.second == *rhs.second; - } - }; + public: + CProbabilityAggregator(EStyle style); - //! \brief Manages the aggregation of probabilities. - //! - //! DESCRIPTION:\n - //! This allows one to register either one of or both the joint - //! probability and extreme aggregation styles. The resulting - //! aggregate probability is the minimum of the aggregates of - //! the probabilities added so far for any of the registered - //! aggregation styles. - class MODEL_EXPORT CProbabilityAggregator - { - public: - using TAggregator = boost::variant; - using TAggregatorDoublePr = std::pair; - using TAggregatorDoublePrVec = std::vector; - - enum EStyle - { - E_Sum, - E_Min - }; - - public: - CProbabilityAggregator(EStyle style); - - //! Check if any probabilities have been added. - bool empty() const; - - //! Add an aggregation style \p aggregator with weight \p weight. - void add(const TAggregator &aggregator, double weight = 1.0); - - //! Add \p probability. - void add(double probability, double weight = 1.0); - - //! Calculate the probability if possible. - bool calculate(double &result) const; - - private: - //! The style of aggregation to use. - EStyle m_Style; - - //! The total weight of all samples. - double m_TotalWeight; - - //! The collection of objects for computing "joint" probabilities. - TAggregatorDoublePrVec m_Aggregators; - }; + //! Check if any probabilities have been added. + bool empty() const; - using TStoredStringPtrStoredStringPtrPrProbabilityAggregatorUMap = - boost::unordered_map; + //! Add an aggregation style \p aggregator with weight \p weight. + void add(const TAggregator& aggregator, double weight = 1.0); - //! Wraps up the calculation of less likely probabilities for a - //! multinomial distribution. - //! - //! DESCRIPTION:\n - //! This caches the probabilities for each category, in the multinomial - //! distribution, since they can't be computed independently and for - //! a large number of categories it is very wasteful to repeatedly - //! compute them all. - class MODEL_EXPORT CCategoryProbabilityCache - { - public: - CCategoryProbabilityCache(); - CCategoryProbabilityCache(const maths::CMultinomialConjugate &prior); - - //! Calculate the probability of less likely categories than - //! \p attribute. - bool lookup(std::size_t category, double &result) const; - - //! Get the memory usage of the component - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - - //! Get the memory usage of the component - std::size_t memoryUsage() const; - - private: - //! The prior. - const maths::CMultinomialConjugate *m_Prior; - //! The cached probabilities. - mutable TDoubleVec m_Cache; - //! The smallest possible category probability. - mutable double m_SmallestProbability; - }; + //! Add \p probability. + void add(double probability, double weight = 1.0); + + //! Calculate the probability if possible. + bool calculate(double& result) const; + + private: + //! The style of aggregation to use. + EStyle m_Style; + + //! The total weight of all samples. + double m_TotalWeight; + + //! The collection of objects for computing "joint" probabilities. + TAggregatorDoublePrVec m_Aggregators; + }; + + using TStoredStringPtrStoredStringPtrPrProbabilityAggregatorUMap = boost::unordered_map; - //! \brief A cache of the probability calculation to use in cases that many - //! probabilities are being computed from the same model. + //! Wraps up the calculation of less likely probabilities for a + //! multinomial distribution. + //! + //! DESCRIPTION:\n + //! This caches the probabilities for each category, in the multinomial + //! distribution, since they can't be computed independently and for + //! a large number of categories it is very wasteful to repeatedly + //! compute them all. + class MODEL_EXPORT CCategoryProbabilityCache { + public: + CCategoryProbabilityCache(); + CCategoryProbabilityCache(const maths::CMultinomialConjugate& prior); + + //! Calculate the probability of less likely categories than + //! \p attribute. + bool lookup(std::size_t category, double& result) const; + + //! Get the memory usage of the component + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + + //! Get the memory usage of the component + std::size_t memoryUsage() const; + + private: + //! The prior. + const maths::CMultinomialConjugate* m_Prior; + //! The cached probabilities. + mutable TDoubleVec m_Cache; + //! The smallest possible category probability. + mutable double m_SmallestProbability; + }; + + //! \brief A cache of the probability calculation to use in cases that many + //! probabilities are being computed from the same model. + //! + //! DESCRIPTION:\n + //! This caches the probabilities for each feature and attribute since they + //! can be expensive to compute and for large populations we can repeatedly + //! calculate probabilities for the same model and similar parameters. + //! + //! This bounds the maximum relative error it'll introduce by only interpolating + //! an interval if the difference in the probability at its end points satisfy + //! \f$|P(b) - P(a)| < threshold \times min(P(A), P(b))\f$. + class MODEL_EXPORT CProbabilityCache { + public: + using TTail2Vec = core::CSmallVector; + using TSize1Vec = core::CSmallVector; + + public: + explicit CProbabilityCache(double maximumError); + + //! Clear the cache. + void clear(); + + //! Maybe add the modes of \p model. + void addModes(model_t::EFeature feature, std::size_t id, const maths::CModel& model); + + //! Add a new ("value", "probability") result. //! - //! DESCRIPTION:\n - //! This caches the probabilities for each feature and attribute since they - //! can be expensive to compute and for large populations we can repeatedly - //! calculate probabilities for the same model and similar parameters. + //! \param[in] id The unique model identifier. + //! \param[in] value The value. + //! \param[in] probability The result of running the probability + //! calculation for \p value. + //! \param[in] tail The tail of \p value. + //! \param[in] conditional True if the probability depends on the + //! correlation structure and false otherwise. + //! \param[in] mostAnomalousCorrelate The identifier of the most + //! anomalous correlate (or empty if there isn't one). + void addProbability(model_t::EFeature feature, + std::size_t id, + const TDouble2Vec1Vec& value, + double probability, + const TTail2Vec& tail, + bool conditional, + const TSize1Vec& mostAnomalousCorrelate); + + //! Try to lookup the probability of \p value in cache. //! - //! This bounds the maximum relative error it'll introduce by only interpolating - //! an interval if the difference in the probability at its end points satisfy - //! \f$|P(b) - P(a)| < threshold \times min(P(A), P(b))\f$. - class MODEL_EXPORT CProbabilityCache - { - public: - using TTail2Vec = core::CSmallVector; - using TSize1Vec = core::CSmallVector; - - public: - explicit CProbabilityCache(double maximumError); - - //! Clear the cache. - void clear(); - - //! Maybe add the modes of \p model. - void addModes(model_t::EFeature feature, std::size_t id, - const maths::CModel &model); - - //! Add a new ("value", "probability") result. - //! - //! \param[in] id The unique model identifier. - //! \param[in] value The value. - //! \param[in] probability The result of running the probability - //! calculation for \p value. - //! \param[in] tail The tail of \p value. - //! \param[in] conditional True if the probability depends on the - //! correlation structure and false otherwise. - //! \param[in] mostAnomalousCorrelate The identifier of the most - //! anomalous correlate (or empty if there isn't one). - void addProbability(model_t::EFeature feature, std::size_t id, - const TDouble2Vec1Vec &value, - double probability, const TTail2Vec &tail, - bool conditional, const TSize1Vec &mostAnomalousCorrelate); - - //! Try to lookup the probability of \p value in cache. - //! - //! \param[in] id The unique model identifier. - //! \param[in] value The value. - //! \param[out] probability An estimate of the probability - //! corresponding to \p likelihood. - //! \param[out] tail The tail of \p value. - //! \param[out] conditional True if the probability depends on the - //! correlation structure and false otherwise. - //! \param[out] mostAnomalousCorrelate The identifier of the most - //! anomalous correlate (or empty if there isn't one). - //! \return True if the probability can be estimated within an - //! acceptable error and false otherwise. - bool lookup(model_t::EFeature feature, std::size_t id, - const TDouble2Vec1Vec &value, - double &probability, TTail2Vec &tail, - bool &conditional, TSize1Vec &mostAnomalousCorrelate) const; - - private: - using TDouble1Vec = core::CSmallVector; - - //! \brief A cache of the result of a probability calculation. - struct MODEL_EXPORT SProbability - { - //! The value's probability. - double s_Probability; - //! The tail the value is in. - TTail2Vec s_Tail; - //! True if the probability depends on correlation structure. - bool s_Conditional; - //! The pairwise correlation with lowest probability. - TSize1Vec s_MostAnomalousCorrelate; - }; - - using TDoubleProbabilityFMap = boost::container::flat_map; - - //! \brief A cache of all the results of a probability calculation - //! for a specific model. - struct MODEL_EXPORT SProbabilityCache - { - //! The modes of the model's residual distribution for which - //! this is caching the result of the probability calculation. - TDouble1Vec s_Modes; - - //! The probability cache. - TDoubleProbabilityFMap s_Probabilities; - }; - - using TFeatureSizePr = std::pair; - using TFeatureSizePrProbabilityCacheUMap = - boost::unordered_map; - - private: - //! The maximum relative error we'll tolerate in the probability. - double m_MaximumError; - - //! The univariate probability cache. - TFeatureSizePrProbabilityCacheUMap m_Caches; + //! \param[in] id The unique model identifier. + //! \param[in] value The value. + //! \param[out] probability An estimate of the probability + //! corresponding to \p likelihood. + //! \param[out] tail The tail of \p value. + //! \param[out] conditional True if the probability depends on the + //! correlation structure and false otherwise. + //! \param[out] mostAnomalousCorrelate The identifier of the most + //! anomalous correlate (or empty if there isn't one). + //! \return True if the probability can be estimated within an + //! acceptable error and false otherwise. + bool lookup(model_t::EFeature feature, + std::size_t id, + const TDouble2Vec1Vec& value, + double& probability, + TTail2Vec& tail, + bool& conditional, + TSize1Vec& mostAnomalousCorrelate) const; + + private: + using TDouble1Vec = core::CSmallVector; + + //! \brief A cache of the result of a probability calculation. + struct MODEL_EXPORT SProbability { + //! The value's probability. + double s_Probability; + //! The tail the value is in. + TTail2Vec s_Tail; + //! True if the probability depends on correlation structure. + bool s_Conditional; + //! The pairwise correlation with lowest probability. + TSize1Vec s_MostAnomalousCorrelate; }; -}; + using TDoubleProbabilityFMap = boost::container::flat_map; + + //! \brief A cache of all the results of a probability calculation + //! for a specific model. + struct MODEL_EXPORT SProbabilityCache { + //! The modes of the model's residual distribution for which + //! this is caching the result of the probability calculation. + TDouble1Vec s_Modes; + + //! The probability cache. + TDoubleProbabilityFMap s_Probabilities; + }; + + using TFeatureSizePr = std::pair; + using TFeatureSizePrProbabilityCacheUMap = boost::unordered_map; + + private: + //! The maximum relative error we'll tolerate in the probability. + double m_MaximumError; + + //! The univariate probability cache. + TFeatureSizePrProbabilityCacheUMap m_Caches; + }; +}; } } diff --git a/include/model/CPartitioningFields.h b/include/model/CPartitioningFields.h index 7ecb676e00..724077e063 100644 --- a/include/model/CPartitioningFields.h +++ b/include/model/CPartitioningFields.h @@ -15,53 +15,47 @@ #include #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { //! \brief A wrapper around the partitioning fields of a model. //! //! DESCTIPTION:\n //! This wraps a list of field (name, value) pairs and encapsulates //! constant cost access to the partition field. -class MODEL_EXPORT CPartitioningFields -{ - public: - using TStrCRef = boost::reference_wrapper; - using TStrCRefStrCRefPr = std::pair; - using TStrCRefStrCRefPrVec = std::vector; +class MODEL_EXPORT CPartitioningFields { +public: + using TStrCRef = boost::reference_wrapper; + using TStrCRefStrCRefPr = std::pair; + using TStrCRefStrCRefPrVec = std::vector; - public: - CPartitioningFields(const std::string &partitionFieldName, - const std::string &partitionFieldValue); +public: + CPartitioningFields(const std::string& partitionFieldName, const std::string& partitionFieldValue); - //! Append the field (name, value) pair (\p fieldName, \p fieldValue). - void add(const std::string &fieldName, const std::string &fieldValue); + //! Append the field (name, value) pair (\p fieldName, \p fieldValue). + void add(const std::string& fieldName, const std::string& fieldValue); - //! Get the number of partitioning fields. - std::size_t size() const; + //! Get the number of partitioning fields. + std::size_t size() const; - //! Get a read only reference to the i'th field (name, value) pair. - const TStrCRefStrCRefPr &operator[](std::size_t i) const; - //! Get the i'th field (name, value) pair. - TStrCRefStrCRefPr &operator[](std::size_t i); + //! Get a read only reference to the i'th field (name, value) pair. + const TStrCRefStrCRefPr& operator[](std::size_t i) const; + //! Get the i'th field (name, value) pair. + TStrCRefStrCRefPr& operator[](std::size_t i); - //! Get a read only reference to the last field (name, value) pair. - const TStrCRefStrCRefPr &back() const; - //! Get the last field (name, value) pair. - TStrCRefStrCRefPr &back(); + //! Get a read only reference to the last field (name, value) pair. + const TStrCRefStrCRefPr& back() const; + //! Get the last field (name, value) pair. + TStrCRefStrCRefPr& back(); - //! Get the partition field value. - const std::string &partitionFieldValue() const; + //! Get the partition field value. + const std::string& partitionFieldValue() const; - private: - //! The partitioning fields (name, value) pairs. - TStrCRefStrCRefPrVec m_PartitioningFields; +private: + //! The partitioning fields (name, value) pairs. + TStrCRefStrCRefPrVec m_PartitioningFields; }; - } } #endif // INCLUDED_ml_model_CPartitioningFields_h - diff --git a/include/model/CPopulationModel.h b/include/model/CPopulationModel.h index 03fe221556..6e0a35a4be 100644 --- a/include/model/CPopulationModel.h +++ b/include/model/CPopulationModel.h @@ -9,9 +9,9 @@ #include #include -#include #include #include +#include #include #include @@ -28,19 +28,15 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace maths -{ +namespace maths { class CPrior; } -namespace model -{ +namespace model { //! \brief The most basic population model interface. //! @@ -60,258 +56,233 @@ namespace model //! //! It assumes data are supplied in time order since this means minimal //! state can be maintained. -class MODEL_EXPORT CPopulationModel : public CAnomalyDetectorModel -{ +class MODEL_EXPORT CPopulationModel : public CAnomalyDetectorModel { +public: + using TTimeVec = std::vector; + using TSizeUInt64Pr = std::pair; + using TSizeUInt64PrVec = std::vector; + using TCountMinSketchVec = std::vector; + using TBjkstUniqueValuesVec = std::vector; + using TSizeTimeUMap = boost::unordered_map; + + //! Lift the overloads of baselineBucketMean into the class scope. + using CAnomalyDetectorModel::baselineBucketMean; + + //! Lift the overloads of acceptPersistInserter into the class scope. + using CAnomalyDetectorModel::acceptPersistInserter; + +public: + //! \name Life-cycle. + //@{ + //! \param[in] params The global configuration parameters. + //! \param[in] dataGatherer The object that gathers time series data. + //! \param[in] influenceCalculators The influence calculators to use + //! for each feature. + CPopulationModel(const SModelParams& params, + const TDataGathererPtr& dataGatherer, + const TFeatureInfluenceCalculatorCPtrPrVecVec& influenceCalculators); + + //! Create a copy that will result in the same persisted state as the + //! original. This is effectively a copy constructor that creates a + //! copy that's only valid for a single purpose. The boolean flag is + //! redundant except to create a signature that will not be mistaken + //! for a general purpose copy constructor. + CPopulationModel(bool isForPersistence, const CPopulationModel& other); + //@} + + //! Returns true. + virtual bool isPopulation() const; + + //! \name Bucket Statistics + //@{ + //! Get the count of the bucketing interval containing \p time + //! for the person identified by \p pid. + virtual TOptionalUInt64 currentBucketCount(std::size_t pid, core_t::TTime time) const; + + //! Returns null. + virtual TOptionalDouble baselineBucketCount(std::size_t pid) const; + +protected: + //! Get the index range [begin, end) of the person corresponding to + //! \p pid in the vector \p data. This relies on the fact that \p data + //! is sort lexicographically by person then attribute identifier. + //! This will return an empty range if the person is not present. + template + static TSizeSizePr personRange(const T& data, std::size_t pid); + + //! Find the person attribute pair identified by \p pid and \p cid, + //! respectively, in \p data if it exists. Returns the end of the + //! vector if it doesn't. + template + static typename T::const_iterator find(const T& data, std::size_t pid, std::size_t cid); + + //! Extract the bucket value for count feature data. + static inline TDouble1Vec extractValue(model_t::EFeature /*feature*/, const std::pair& data); + //! Extract the bucket value for metric feature data. + static inline TDouble1Vec extractValue(model_t::EFeature feature, const std::pair& data); + //@} + +public: + //! \name Person + //@{ + //! Get the person unique identifiers which are present in the + //! bucketing time interval including \p time. + //! + //! \param[in] time The time of interest. + //! \param[out] result Filled in with the person identifiers + //! in the bucketing time interval of interest. + virtual void currentBucketPersonIds(core_t::TTime time, TSizeVec& result) const; + //@} + + //! \name Update + //@{ + //! Sample any state needed by computeProbablity for the out- + //! of-phase bucket in the time interval [\p startTime, \p endTime] + //! but do not update the model. + //! + //! \param[in] startTime The start of the time interval to sample. + //! \param[in] endTime The end of the time interval to sample. + virtual void sampleOutOfPhase(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor); + + //! Update the rates for \p feature and \p people. + virtual void sample(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) = 0; + //@} + + //! Get the checksum of this model. + //! + //! \param[in] includeCurrentBucketStats If true then include the + //! current bucket statistics. (This is designed to handle serialization, + //! for which we don't serialize the current bucket statistics.) + virtual uint64_t checksum(bool includeCurrentBucketStats = true) const = 0; + + //! Debug the memory used by this model. + virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const = 0; + + //! Get the memory used by this model. + virtual std::size_t memoryUsage() const = 0; + + //! Get the static size of this object - used for virtual hierarchies + virtual std::size_t staticSize() const = 0; + + //! Get the non-estimated value of the the memory used by this model. + virtual std::size_t computeMemoryUsage() const = 0; + + //! Get the frequency of the attribute identified by \p cid. + virtual double attributeFrequency(std::size_t cid) const; + + //! Get the weight for \p feature and the person identified by + //! \p pid based on their sample rate. + double sampleRateWeight(std::size_t pid, std::size_t cid) const; + +protected: + //! \brief A key for the partial bucket corrections map. + class MODEL_EXPORT CCorrectionKey { public: - using TTimeVec = std::vector; - using TSizeUInt64Pr = std::pair; - using TSizeUInt64PrVec = std::vector; - using TCountMinSketchVec = std::vector; - using TBjkstUniqueValuesVec = std::vector; - using TSizeTimeUMap = boost::unordered_map; + CCorrectionKey(model_t::EFeature feature, std::size_t pid, std::size_t cid, std::size_t correlated = 0); + bool operator==(const CCorrectionKey& rhs) const; + std::size_t hash() const; - //! Lift the overloads of baselineBucketMean into the class scope. - using CAnomalyDetectorModel::baselineBucketMean; + private: + model_t::EFeature m_Feature; + std::size_t m_Pid; + std::size_t m_Cid; + std::size_t m_Correlate; + }; - //! Lift the overloads of acceptPersistInserter into the class scope. - using CAnomalyDetectorModel::acceptPersistInserter; + //! \brief A hasher for the partial bucket corrections map key. + struct MODEL_EXPORT CHashCorrectionKey { + std::size_t operator()(const CCorrectionKey& key) const { return key.hash(); } + }; + using TCorrectionKeyDouble1VecUMap = boost::unordered_map; - public: - //! \name Life-cycle. - //@{ - //! \param[in] params The global configuration parameters. - //! \param[in] dataGatherer The object that gathers time series data. - //! \param[in] influenceCalculators The influence calculators to use - //! for each feature. - CPopulationModel(const SModelParams ¶ms, - const TDataGathererPtr &dataGatherer, - const TFeatureInfluenceCalculatorCPtrPrVecVec &influenceCalculators); - - //! Create a copy that will result in the same persisted state as the - //! original. This is effectively a copy constructor that creates a - //! copy that's only valid for a single purpose. The boolean flag is - //! redundant except to create a signature that will not be mistaken - //! for a general purpose copy constructor. - CPopulationModel(bool isForPersistence, const CPopulationModel &other); - //@} - - //! Returns true. - virtual bool isPopulation() const; - - //! \name Bucket Statistics - //@{ - //! Get the count of the bucketing interval containing \p time - //! for the person identified by \p pid. - virtual TOptionalUInt64 currentBucketCount(std::size_t pid, core_t::TTime time) const; - - //! Returns null. - virtual TOptionalDouble baselineBucketCount(std::size_t pid) const; - - protected: - //! Get the index range [begin, end) of the person corresponding to - //! \p pid in the vector \p data. This relies on the fact that \p data - //! is sort lexicographically by person then attribute identifier. - //! This will return an empty range if the person is not present. - template - static TSizeSizePr personRange(const T &data, std::size_t pid); - - //! Find the person attribute pair identified by \p pid and \p cid, - //! respectively, in \p data if it exists. Returns the end of the - //! vector if it doesn't. - template - static typename T::const_iterator find(const T &data, std::size_t pid, std::size_t cid); - - //! Extract the bucket value for count feature data. - static inline TDouble1Vec extractValue(model_t::EFeature /*feature*/, - const std::pair &data); - //! Extract the bucket value for metric feature data. - static inline TDouble1Vec extractValue(model_t::EFeature feature, - const std::pair &data); - //@} +protected: + //! Persist state by passing information to the supplied inserter. + void doAcceptPersistInserter(core::CStatePersistInserter& inserter) const; - public: - //! \name Person - //@{ - //! Get the person unique identifiers which are present in the - //! bucketing time interval including \p time. - //! - //! \param[in] time The time of interest. - //! \param[out] result Filled in with the person identifiers - //! in the bucketing time interval of interest. - virtual void currentBucketPersonIds(core_t::TTime time, - TSizeVec &result) const; - //@} - - //! \name Update - //@{ - //! Sample any state needed by computeProbablity for the out- - //! of-phase bucket in the time interval [\p startTime, \p endTime] - //! but do not update the model. - //! - //! \param[in] startTime The start of the time interval to sample. - //! \param[in] endTime The end of the time interval to sample. - virtual void sampleOutOfPhase(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor &resourceMonitor); - - //! Update the rates for \p feature and \p people. - virtual void sample(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor &resourceMonitor) = 0; - //@} - - //! Get the checksum of this model. - //! - //! \param[in] includeCurrentBucketStats If true then include the - //! current bucket statistics. (This is designed to handle serialization, - //! for which we don't serialize the current bucket statistics.) - virtual uint64_t checksum(bool includeCurrentBucketStats = true) const = 0; - - //! Debug the memory used by this model. - virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const = 0; - - //! Get the memory used by this model. - virtual std::size_t memoryUsage() const = 0; - - //! Get the static size of this object - used for virtual hierarchies - virtual std::size_t staticSize() const = 0; - - //! Get the non-estimated value of the the memory used by this model. - virtual std::size_t computeMemoryUsage() const = 0; - - //! Get the frequency of the attribute identified by \p cid. - virtual double attributeFrequency(std::size_t cid) const; - - //! Get the weight for \p feature and the person identified by - //! \p pid based on their sample rate. - double sampleRateWeight(std::size_t pid, std::size_t cid) const; - - protected: - //! \brief A key for the partial bucket corrections map. - class MODEL_EXPORT CCorrectionKey - { - public: - CCorrectionKey(model_t::EFeature feature, - std::size_t pid, - std::size_t cid, - std::size_t correlated = 0); - bool operator==(const CCorrectionKey &rhs) const; - std::size_t hash() const; - - private: - model_t::EFeature m_Feature; - std::size_t m_Pid; - std::size_t m_Cid; - std::size_t m_Correlate; - }; - - //! \brief A hasher for the partial bucket corrections map key. - struct MODEL_EXPORT CHashCorrectionKey - { - std::size_t operator()(const CCorrectionKey &key) const - { - return key.hash(); - } - }; - using TCorrectionKeyDouble1VecUMap = - boost::unordered_map; - - protected: - //! Persist state by passing information to the supplied inserter. - void doAcceptPersistInserter(core::CStatePersistInserter &inserter) const; - - //! Restore the model reading state from the supplied traverser. - bool doAcceptRestoreTraverser(core::CStateRestoreTraverser &traverser); - - //! Get the current bucket person counts. - virtual const TSizeUInt64PrVec &personCounts() const = 0; - - //! Check if bucket statistics are available for the specified time. - virtual bool bucketStatsAvailable(core_t::TTime time) const = 0; - - //! Monitor the resource usage while creating new models - void createUpdateNewModels(core_t::TTime time, CResourceMonitor &resourceMonitor); - - //! Initialize the time series models for "n" newly observed people - //! and "m" newly observed attributes. - virtual void createNewModels(std::size_t n, std::size_t m) = 0; - - //! Initialize the time series models for recycled attributes - //! and/or people. - virtual void updateRecycledModels() = 0; - - //! Update the correlation models. - virtual void refreshCorrelationModels(std::size_t resourceLimit, - CResourceMonitor &resourceMonitor) = 0; - - //! Clear out large state objects for people/attributes that are pruned. - virtual void clearPrunedResources(const TSizeVec &people, - const TSizeVec &attributes) = 0; - - //! Correct \p baseline with \p corrections for interim results. - void correctBaselineForInterim(model_t::EFeature feature, - std::size_t pid, - std::size_t cid, - model_t::CResultType type, - const TSizeDoublePr1Vec &correlated, - const TCorrectionKeyDouble1VecUMap &corrections, - TDouble1Vec &baseline) const; - - //! Get the time by which to propagate the priors on a sample. - double propagationTime(std::size_t cid, core_t::TTime) const; - - //! Remove heavy hitting people and attributes from the feature - //! data if necessary. - template - void applyFilters(bool updateStatistics, - const PERSON_FILTER &personFilter, - const ATTRIBUTE_FILTER &attributeFilter, - T &data) const; - - //! Get the first time each attribute was seen. - const TTimeVec &attributeFirstBucketTimes() const; - //! Get the last time each attribute was seen. - const TTimeVec &attributeLastBucketTimes() const; - - //! Get the people and attributes to remove if any. - void peopleAndAttributesToRemove(core_t::TTime time, - std::size_t maximumAge, - TSizeVec &peopleToRemove, - TSizeVec &attributesToRemove) const; - - //! Remove the \p people. - void removePeople(const TSizeVec &peopleToRemove); - - //! Skip sampling the interval \p endTime - \p startTime. - virtual void doSkipSampling(core_t::TTime startTime, core_t::TTime endTime) = 0; + //! Restore the model reading state from the supplied traverser. + bool doAcceptRestoreTraverser(core::CStateRestoreTraverser& traverser); - private: - using TOptionalCountMinSketch = boost::optional; + //! Get the current bucket person counts. + virtual const TSizeUInt64PrVec& personCounts() const = 0; - private: - //! The last time each person was seen. - TTimeVec m_PersonLastBucketTimes; + //! Check if bucket statistics are available for the specified time. + virtual bool bucketStatsAvailable(core_t::TTime time) const = 0; - //! The first time each attribute was seen. - TTimeVec m_AttributeFirstBucketTimes; + //! Monitor the resource usage while creating new models + void createUpdateNewModels(core_t::TTime time, CResourceMonitor& resourceMonitor); - //! The last time each attribute was seen. - TTimeVec m_AttributeLastBucketTimes; + //! Initialize the time series models for "n" newly observed people + //! and "m" newly observed attributes. + virtual void createNewModels(std::size_t n, std::size_t m) = 0; - //! The initial sketch to use for estimating the number of distinct people. - maths::CBjkstUniqueValues m_NewDistinctPersonCounts; + //! Initialize the time series models for recycled attributes + //! and/or people. + virtual void updateRecycledModels() = 0; - //! The number of distinct people generating each attribute. - TBjkstUniqueValuesVec m_DistinctPersonCounts; + //! Update the correlation models. + virtual void refreshCorrelationModels(std::size_t resourceLimit, CResourceMonitor& resourceMonitor) = 0; - //! The initial sketch to use for estimating person bucket counts. - TOptionalCountMinSketch m_NewPersonBucketCounts; + //! Clear out large state objects for people/attributes that are pruned. + virtual void clearPrunedResources(const TSizeVec& people, const TSizeVec& attributes) = 0; - //! The bucket count of each (person, attribute) pair in the exponentially - //! decaying window with decay rate equal to CAnomalyDetectorModel::m_DecayRate. - TCountMinSketchVec m_PersonAttributeBucketCounts; -}; + //! Correct \p baseline with \p corrections for interim results. + void correctBaselineForInterim(model_t::EFeature feature, + std::size_t pid, + std::size_t cid, + model_t::CResultType type, + const TSizeDoublePr1Vec& correlated, + const TCorrectionKeyDouble1VecUMap& corrections, + TDouble1Vec& baseline) const; + + //! Get the time by which to propagate the priors on a sample. + double propagationTime(std::size_t cid, core_t::TTime) const; + + //! Remove heavy hitting people and attributes from the feature + //! data if necessary. + template + void applyFilters(bool updateStatistics, const PERSON_FILTER& personFilter, const ATTRIBUTE_FILTER& attributeFilter, T& data) const; + + //! Get the first time each attribute was seen. + const TTimeVec& attributeFirstBucketTimes() const; + //! Get the last time each attribute was seen. + const TTimeVec& attributeLastBucketTimes() const; + //! Get the people and attributes to remove if any. + void + peopleAndAttributesToRemove(core_t::TTime time, std::size_t maximumAge, TSizeVec& peopleToRemove, TSizeVec& attributesToRemove) const; + + //! Remove the \p people. + void removePeople(const TSizeVec& peopleToRemove); + + //! Skip sampling the interval \p endTime - \p startTime. + virtual void doSkipSampling(core_t::TTime startTime, core_t::TTime endTime) = 0; + +private: + using TOptionalCountMinSketch = boost::optional; + +private: + //! The last time each person was seen. + TTimeVec m_PersonLastBucketTimes; + + //! The first time each attribute was seen. + TTimeVec m_AttributeFirstBucketTimes; + + //! The last time each attribute was seen. + TTimeVec m_AttributeLastBucketTimes; + + //! The initial sketch to use for estimating the number of distinct people. + maths::CBjkstUniqueValues m_NewDistinctPersonCounts; + + //! The number of distinct people generating each attribute. + TBjkstUniqueValuesVec m_DistinctPersonCounts; + + //! The initial sketch to use for estimating person bucket counts. + TOptionalCountMinSketch m_NewPersonBucketCounts; + + //! The bucket count of each (person, attribute) pair in the exponentially + //! decaying window with decay rate equal to CAnomalyDetectorModel::m_DecayRate. + TCountMinSketchVec m_PersonAttributeBucketCounts; +}; } } diff --git a/include/model/CPopulationModelDetail.h b/include/model/CPopulationModelDetail.h index 416f40641b..02aab01735 100644 --- a/include/model/CPopulationModelDetail.h +++ b/include/model/CPopulationModelDetail.h @@ -11,78 +11,55 @@ #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { template -CPopulationModel::TSizeSizePr CPopulationModel::personRange(const T &data, std::size_t pid) -{ +CPopulationModel::TSizeSizePr CPopulationModel::personRange(const T& data, std::size_t pid) { const std::size_t minCid = 0u; const std::size_t maxCid = std::numeric_limits::max(); - auto begin = std::lower_bound(data.begin(), data.end(), - std::make_pair(pid, minCid), - maths::COrderings::SFirstLess()); - auto end = std::upper_bound(begin, data.end(), - std::make_pair(pid, maxCid), - maths::COrderings::SFirstLess()); - return {static_cast(begin - data.begin()), - static_cast(end - data.begin())}; + auto begin = std::lower_bound(data.begin(), data.end(), std::make_pair(pid, minCid), maths::COrderings::SFirstLess()); + auto end = std::upper_bound(begin, data.end(), std::make_pair(pid, maxCid), maths::COrderings::SFirstLess()); + return {static_cast(begin - data.begin()), static_cast(end - data.begin())}; } template -typename T::const_iterator CPopulationModel::find(const T &data, std::size_t pid, std::size_t cid) -{ - auto i = std::lower_bound(data.begin(), data.end(), - std::make_pair(pid, cid), - maths::COrderings::SFirstLess()); - if ( i != data.end() - && ( CDataGatherer::extractPersonId(*i) != pid - || CDataGatherer::extractAttributeId(*i) != cid)) - { +typename T::const_iterator CPopulationModel::find(const T& data, std::size_t pid, std::size_t cid) { + auto i = std::lower_bound(data.begin(), data.end(), std::make_pair(pid, cid), maths::COrderings::SFirstLess()); + if (i != data.end() && (CDataGatherer::extractPersonId(*i) != pid || CDataGatherer::extractAttributeId(*i) != cid)) { i = data.end(); } return i; } -inline CPopulationModel::TDouble1Vec -CPopulationModel::extractValue(model_t::EFeature /*feature*/, - const std::pair &data) -{ +inline CPopulationModel::TDouble1Vec CPopulationModel::extractValue(model_t::EFeature /*feature*/, + const std::pair& data) { return TDouble1Vec{static_cast(CDataGatherer::extractData(data).s_Count)}; } -inline CPopulationModel::TDouble1Vec -CPopulationModel::extractValue(model_t::EFeature feature, - const std::pair &data) -{ - return CDataGatherer::extractData(data).s_BucketValue ? - CDataGatherer::extractData(data).s_BucketValue->value(model_t::dimension(feature)) : - TDouble1Vec(); +inline CPopulationModel::TDouble1Vec CPopulationModel::extractValue(model_t::EFeature feature, + const std::pair& data) { + return CDataGatherer::extractData(data).s_BucketValue + ? CDataGatherer::extractData(data).s_BucketValue->value(model_t::dimension(feature)) + : TDouble1Vec(); } template void CPopulationModel::applyFilters(bool updateStatistics, - const PERSON_FILTER &personFilter, - const ATTRIBUTE_FILTER &attributeFilter, - T &data) const -{ + const PERSON_FILTER& personFilter, + const ATTRIBUTE_FILTER& attributeFilter, + T& data) const { std::size_t initialSize = data.size(); - if (this->params().s_ExcludeFrequent & model_t::E_XF_Over) - { + if (this->params().s_ExcludeFrequent & model_t::E_XF_Over) { data.erase(std::remove_if(data.begin(), data.end(), personFilter), data.end()); } - if (this->params().s_ExcludeFrequent & model_t::E_XF_By) - { + if (this->params().s_ExcludeFrequent & model_t::E_XF_By) { data.erase(std::remove_if(data.begin(), data.end(), attributeFilter), data.end()); } - if (updateStatistics && data.size() != initialSize) - { + if (updateStatistics && data.size() != initialSize) { core::CStatistics::stat(stat_t::E_NumberExcludedFrequentInvocations).increment(1); } } - } } diff --git a/include/model/CProbabilityAndInfluenceCalculator.h b/include/model/CProbabilityAndInfluenceCalculator.h index 0ed46dc81a..30c68586ef 100644 --- a/include/model/CProbabilityAndInfluenceCalculator.h +++ b/include/model/CProbabilityAndInfluenceCalculator.h @@ -22,10 +22,8 @@ #include #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { class CAnnotatedProbabilityBuilder; class CInfluenceCalculator; @@ -50,293 +48,289 @@ class CInfluenceCalculator; //! which are selected at runtime. This is necessary because different features //! use different influence calculations, but the features are selected based //! on the commands the user runs. -class MODEL_EXPORT CProbabilityAndInfluenceCalculator -{ - public: - using TDouble1Vec = core::CSmallVector; - using TDouble2Vec = core::CSmallVector; - using TDouble4Vec = core::CSmallVector; - using TDouble2Vec1Vec = core::CSmallVector; - using TDouble4Vec1Vec = core::CSmallVector; - using TDouble1VecDoublePr = std::pair; - using TDouble1VecDouble1VecPr = std::pair; - using TBool2Vec = core::CSmallVector; - using TSize1Vec = core::CSmallVector; - using TSize2Vec = core::CSmallVector; - using TSize2Vec1Vec = core::CSmallVector; - using TTime2Vec = core::CSmallVector; - using TTime2Vec1Vec = core::CSmallVector; - using TTail2Vec = core::CSmallVector; - using TStrCRef = boost::reference_wrapper; - using TStrCRefDouble1VecDoublePrPr = std::pair; - using TStrCRefDouble1VecDoublePrPrVec = std::vector; - using TStrCRefDouble1VecDouble1VecPrPr = std::pair; - using TStrCRefDouble1VecDouble1VecPrPrVec = std::vector; - using TStrCRefDouble1VecDouble1VecPrPrVecVec = std::vector; - using TStoredStringPtrStoredStringPtrPr = std::pair; - using TStoredStringPtrStoredStringPtrPrVec = std::vector; - using TStoredStringPtrStoredStringPtrPrDoublePr = std::pair; - using TStoredStringPtrStoredStringPtrPrDoublePrVec = std::vector; - using TStoredStringPtr1Vec = core::CSmallVector; - - //! \brief Wraps up the parameters to the influence calculation. - struct MODEL_EXPORT SParams : private core::CNonCopyable - { - SParams(const CPartitioningFields &partitioningFields); - - //! Helper to print a description of the parameters. - std::string describe() const; - - //! The feature of s_Value. - model_t::EFeature s_Feature; - //! The model of s_Value. - const maths::CModel *s_Model; - //! The time after the creation of the prior. - core_t::TTime s_ElapsedTime; - //! The time of s_Value. - TTime2Vec1Vec s_Time; - //! The feature value. - TDouble2Vec1Vec s_Value; - //! The count of measurements in s_Value. - double s_Count; - //! The parameters needed to compute probabilities. - maths::CModelProbabilityParams s_ComputeProbabilityParams; - //! The probability of the s_Value. - double s_Probability; - //! The tail that the s_Value is in. - TTail2Vec s_Tail; - //! The name of the field for which to compute and influences. - core::CStoredStringPtr s_InfluencerName; - //! The influencer field values, and corresponding feature - //! value and count of measurements in the restrictions of - //! records to those influencer field values. - TStrCRefDouble1VecDoublePrPrVec s_InfluencerValues; - //! The partitioning field (name, value) pairs. - const CPartitioningFields &s_PartitioningFields; - //! The level at which influence occurs. - double s_Cutoff; - //! If true then add in influences greater than the cutoff. - bool s_IncludeCutoff; - //! Filled in with the influences of s_Value if any. - TStoredStringPtrStoredStringPtrPrDoublePrVec s_Influences; - }; - - //! \brief Wraps up the parameters to the influence calculation - //! for correlates. - struct MODEL_EXPORT SCorrelateParams : private core::CNonCopyable - { - SCorrelateParams(const CPartitioningFields &partitioningFields); - - //! Helper to print a description of the parameters. - std::string describe() const; - - //! The feature of s_Values. - model_t::EFeature s_Feature; - //! The model of s_Values. - const maths::CModel *s_Model; - //! The time after the creation of the s_Priors. - core_t::TTime s_ElapsedTime; - //! The times of s_Values. - TTime2Vec1Vec s_Times; - //! The feature values. - TDouble2Vec1Vec s_Values; - //! The count of measurements in s_Values. - TDouble2Vec1Vec s_Counts; - //! The variable identifiers for the primary and correlated - //! time series. - TSize2Vec1Vec s_Variables; - //! The correlated time series labels. - TStoredStringPtr1Vec s_CorrelatedLabels; - //! The correlated time series identifiers. - TSize1Vec s_Correlated; - //! The parameters needed to compute probabilities. - maths::CModelProbabilityParams s_ComputeProbabilityParams; - //! The probability of the s_Value. - double s_Probability; - //! The tail that the s_Value is in. - TTail2Vec s_Tail; - //! The index of the most anomalous correlate. - TSize1Vec s_MostAnomalousCorrelate; - //! The name of the field for which to compute and influences. - core::CStoredStringPtr s_InfluencerName; - //! The influencer field values, and corresponding feature - //! value and count of measurements in the restrictions of - //! records to those influencer field values. - TStrCRefDouble1VecDouble1VecPrPrVec s_InfluencerValues; - //! The partitioning field (name, value) pairs. - const CPartitioningFields &s_PartitioningFields; - //! The level at which influence occurs. - double s_Cutoff; - //! If true then add in influences greater than the cutoff. - bool s_IncludeCutoff; - //! Filled in with the influences of s_Values if any. - TStoredStringPtrStoredStringPtrPrDoublePrVec s_Influences; - }; - - public: - explicit CProbabilityAndInfluenceCalculator(double cutoff); - - //! Check if any probabilities have been added to the calculator. - bool empty() const; - - //! Get the minimum value for the influence for which an influencing - //! field value is judged to have any influence on a feature value. - double cutoff() const; - - //! Plug-in the influence calculation to use. - void plugin(const CInfluenceCalculator &influence); - - //! Add the joint probability aggregation style. - void addAggregator(const maths::CJointProbabilityOfLessLikelySamples &aggregator); - - //! Add the extreme probability aggregation style. - void addAggregator(const maths::CProbabilityOfExtremeSample &aggregator); - - //! Add a cache for the two probability calculations. - void addCache(CModelTools::CProbabilityCache &cache); - - //! Add the probabilities and influences from \p other. - void add(const CProbabilityAndInfluenceCalculator &other, double weight = 1.0); - - //! Add an attribute probability for \p value of the univariate - //! feature \p feature. - //! - //! This is a wrapper around addProbability which fills in an attribute - //! probability on \p builder. - //! - //! \param[in] attribute The attribute. - //! \param[in] cid The attribute identifier. - //! \param[in] pAttribute The probability of attribute. - //! \param[in,out] params The parameters used in the probability calculation. - //! \param[out] builder An attribute probability for \p attribute and - //! \p value is added to this builder if it can be computed. - //! \param[in] weight The weight to use when updating the aggregate - //! probabilities. - bool addAttributeProbability(const core::CStoredStringPtr &attribute, - std::size_t cid, - double pAttribute, - SParams ¶ms, - CAnnotatedProbabilityBuilder &builder, - double weight = 1.0); - - //! Add an attribute probability for \p values of the correlates - //! of the univariate feature \p feature. - bool addAttributeProbability(const core::CStoredStringPtr &attribute, - std::size_t cid, - double pAttribute, - SCorrelateParams ¶ms, - CAnnotatedProbabilityBuilder &builder, - double weight = 1.0); - - //! Compute and add the probability for \p value of the univariate - //! feature \p feature. - //! - //! \param[in] feature The value's feature. - //! \param[in] id A unique identifier of the value's model. - //! \param[in] model The value's model. - //! \param[in] elapsedTime The time that has elapsed since the - //! model was created. - //! \param[in] params Extra parameters needed by \p model to compute - //! the probability. - //! \param[in] time The value's time. - //! \param[in] value The value for which to compute the probability. - //! \param[out] probability Set to the probability of \p value - //! if one could be calculated. - //! \param[out] tail Set to the tail that \p value is in. - //! \param[out] type Filled in with the type of anomaly, i.e. is the - //! value anomalous in its own right or as a result of conditioning - //! on a correlated variable. - //! \param[out] mostAnomalousCorrelate Filled in with the index of the - //! most anomalous correlated time series. - //! \param[in] weight The weight to use when updating the aggregate - //! probabilities. - bool addProbability(model_t::EFeature feature, - std::size_t id, - const maths::CModel &model, - core_t::TTime elapsedTime, - const maths::CModelProbabilityParams ¶ms, - const TTime2Vec1Vec &time, - const TDouble2Vec1Vec &value, - double &probability, - TTail2Vec &tail, - model_t::CResultType &type, - TSize1Vec &mostAnomalousCorrelate, - double weight = 1.0); - - //! Add the probability to the overall aggregate probability and - //! all influencer aggregate probabilities. - //! - //! \param[in] probability The probability to add. - //! \param[in] weight The weight to use when updating the aggregate - //! probabilities. - void addProbability(double probability, double weight = 1.0); - - //! Compute and add the influences from \p influencerValues. - //! - //! \param[in] influencerName The name of the field for which - //! to compute and influences. - //! \param[in] influencerValues The influencer field values and - //! feature values and counts of measurements in the restrictions - //! of records to the corresponding influencer field values. - //! \param[in,out] params The parameters used in the probability calculation. - //! \param[in] weight The weight to use when updating the aggregate - //! probabilities. - void addInfluences(const std::string &influencerName, - const TStrCRefDouble1VecDoublePrPrVec &influencerValues, - SParams ¶ms, - double weight = 1.0); - - //! Compute and add the influences from \p influencerValues for - //! the correlates of a univariate feature. - void addInfluences(const std::string &influencerName, - const TStrCRefDouble1VecDouble1VecPrPrVecVec &influencerValues, - SCorrelateParams ¶ms, - double weight = 1.0); - - //! Calculate the overall probability of all values added. - //! - //! \param[out] probability Filled in with the overall probability - //! of all values added via addProbability. - bool calculate(double &probability) const; - - //! Calculate the overall probability of all values added and - //! any influences and their weights. - //! - //! \param[out] probability Filled in with the overall probability - //! of all values added via addProbability. - //! \param[out] influences Filled in with all influences of the - //! overall probability. - bool calculate(double &probability, - TStoredStringPtrStoredStringPtrPrDoublePrVec &influences) const; - - private: - //! Actually commit any influences we've found. - void commitInfluences(model_t::EFeature feature, double logp, double weight); - - private: - //! The minimum value for the influence for which an influencing - //! field value is judged to have any influence on a feature value. - double m_Cutoff; - - //! The plug-in used to adapt the influence calculation for - //! different features. - const CInfluenceCalculator *m_InfluenceCalculator; - - //! The template probability calculator. - CModelTools::CProbabilityAggregator m_ProbabilityTemplate; - - //! The probability calculator. - CModelTools::CProbabilityAggregator m_Probability; - - //! The probability calculation cache if there is one. - CModelTools::CProbabilityCache *m_ProbabilityCache; - - //! The influence probability calculator. - CModelTools::TStoredStringPtrStoredStringPtrPrProbabilityAggregatorUMap m_InfluencerProbabilities; - - //! Placeholder for the influence weights so that it isn't - //! allocated in a loop. - TStoredStringPtrStoredStringPtrPrDoublePrVec m_Influences; +class MODEL_EXPORT CProbabilityAndInfluenceCalculator { +public: + using TDouble1Vec = core::CSmallVector; + using TDouble2Vec = core::CSmallVector; + using TDouble4Vec = core::CSmallVector; + using TDouble2Vec1Vec = core::CSmallVector; + using TDouble4Vec1Vec = core::CSmallVector; + using TDouble1VecDoublePr = std::pair; + using TDouble1VecDouble1VecPr = std::pair; + using TBool2Vec = core::CSmallVector; + using TSize1Vec = core::CSmallVector; + using TSize2Vec = core::CSmallVector; + using TSize2Vec1Vec = core::CSmallVector; + using TTime2Vec = core::CSmallVector; + using TTime2Vec1Vec = core::CSmallVector; + using TTail2Vec = core::CSmallVector; + using TStrCRef = boost::reference_wrapper; + using TStrCRefDouble1VecDoublePrPr = std::pair; + using TStrCRefDouble1VecDoublePrPrVec = std::vector; + using TStrCRefDouble1VecDouble1VecPrPr = std::pair; + using TStrCRefDouble1VecDouble1VecPrPrVec = std::vector; + using TStrCRefDouble1VecDouble1VecPrPrVecVec = std::vector; + using TStoredStringPtrStoredStringPtrPr = std::pair; + using TStoredStringPtrStoredStringPtrPrVec = std::vector; + using TStoredStringPtrStoredStringPtrPrDoublePr = std::pair; + using TStoredStringPtrStoredStringPtrPrDoublePrVec = std::vector; + using TStoredStringPtr1Vec = core::CSmallVector; + + //! \brief Wraps up the parameters to the influence calculation. + struct MODEL_EXPORT SParams : private core::CNonCopyable { + SParams(const CPartitioningFields& partitioningFields); + + //! Helper to print a description of the parameters. + std::string describe() const; + + //! The feature of s_Value. + model_t::EFeature s_Feature; + //! The model of s_Value. + const maths::CModel* s_Model; + //! The time after the creation of the prior. + core_t::TTime s_ElapsedTime; + //! The time of s_Value. + TTime2Vec1Vec s_Time; + //! The feature value. + TDouble2Vec1Vec s_Value; + //! The count of measurements in s_Value. + double s_Count; + //! The parameters needed to compute probabilities. + maths::CModelProbabilityParams s_ComputeProbabilityParams; + //! The probability of the s_Value. + double s_Probability; + //! The tail that the s_Value is in. + TTail2Vec s_Tail; + //! The name of the field for which to compute and influences. + core::CStoredStringPtr s_InfluencerName; + //! The influencer field values, and corresponding feature + //! value and count of measurements in the restrictions of + //! records to those influencer field values. + TStrCRefDouble1VecDoublePrPrVec s_InfluencerValues; + //! The partitioning field (name, value) pairs. + const CPartitioningFields& s_PartitioningFields; + //! The level at which influence occurs. + double s_Cutoff; + //! If true then add in influences greater than the cutoff. + bool s_IncludeCutoff; + //! Filled in with the influences of s_Value if any. + TStoredStringPtrStoredStringPtrPrDoublePrVec s_Influences; + }; + + //! \brief Wraps up the parameters to the influence calculation + //! for correlates. + struct MODEL_EXPORT SCorrelateParams : private core::CNonCopyable { + SCorrelateParams(const CPartitioningFields& partitioningFields); + + //! Helper to print a description of the parameters. + std::string describe() const; + + //! The feature of s_Values. + model_t::EFeature s_Feature; + //! The model of s_Values. + const maths::CModel* s_Model; + //! The time after the creation of the s_Priors. + core_t::TTime s_ElapsedTime; + //! The times of s_Values. + TTime2Vec1Vec s_Times; + //! The feature values. + TDouble2Vec1Vec s_Values; + //! The count of measurements in s_Values. + TDouble2Vec1Vec s_Counts; + //! The variable identifiers for the primary and correlated + //! time series. + TSize2Vec1Vec s_Variables; + //! The correlated time series labels. + TStoredStringPtr1Vec s_CorrelatedLabels; + //! The correlated time series identifiers. + TSize1Vec s_Correlated; + //! The parameters needed to compute probabilities. + maths::CModelProbabilityParams s_ComputeProbabilityParams; + //! The probability of the s_Value. + double s_Probability; + //! The tail that the s_Value is in. + TTail2Vec s_Tail; + //! The index of the most anomalous correlate. + TSize1Vec s_MostAnomalousCorrelate; + //! The name of the field for which to compute and influences. + core::CStoredStringPtr s_InfluencerName; + //! The influencer field values, and corresponding feature + //! value and count of measurements in the restrictions of + //! records to those influencer field values. + TStrCRefDouble1VecDouble1VecPrPrVec s_InfluencerValues; + //! The partitioning field (name, value) pairs. + const CPartitioningFields& s_PartitioningFields; + //! The level at which influence occurs. + double s_Cutoff; + //! If true then add in influences greater than the cutoff. + bool s_IncludeCutoff; + //! Filled in with the influences of s_Values if any. + TStoredStringPtrStoredStringPtrPrDoublePrVec s_Influences; + }; + +public: + explicit CProbabilityAndInfluenceCalculator(double cutoff); + + //! Check if any probabilities have been added to the calculator. + bool empty() const; + + //! Get the minimum value for the influence for which an influencing + //! field value is judged to have any influence on a feature value. + double cutoff() const; + + //! Plug-in the influence calculation to use. + void plugin(const CInfluenceCalculator& influence); + + //! Add the joint probability aggregation style. + void addAggregator(const maths::CJointProbabilityOfLessLikelySamples& aggregator); + + //! Add the extreme probability aggregation style. + void addAggregator(const maths::CProbabilityOfExtremeSample& aggregator); + + //! Add a cache for the two probability calculations. + void addCache(CModelTools::CProbabilityCache& cache); + + //! Add the probabilities and influences from \p other. + void add(const CProbabilityAndInfluenceCalculator& other, double weight = 1.0); + + //! Add an attribute probability for \p value of the univariate + //! feature \p feature. + //! + //! This is a wrapper around addProbability which fills in an attribute + //! probability on \p builder. + //! + //! \param[in] attribute The attribute. + //! \param[in] cid The attribute identifier. + //! \param[in] pAttribute The probability of attribute. + //! \param[in,out] params The parameters used in the probability calculation. + //! \param[out] builder An attribute probability for \p attribute and + //! \p value is added to this builder if it can be computed. + //! \param[in] weight The weight to use when updating the aggregate + //! probabilities. + bool addAttributeProbability(const core::CStoredStringPtr& attribute, + std::size_t cid, + double pAttribute, + SParams& params, + CAnnotatedProbabilityBuilder& builder, + double weight = 1.0); + + //! Add an attribute probability for \p values of the correlates + //! of the univariate feature \p feature. + bool addAttributeProbability(const core::CStoredStringPtr& attribute, + std::size_t cid, + double pAttribute, + SCorrelateParams& params, + CAnnotatedProbabilityBuilder& builder, + double weight = 1.0); + + //! Compute and add the probability for \p value of the univariate + //! feature \p feature. + //! + //! \param[in] feature The value's feature. + //! \param[in] id A unique identifier of the value's model. + //! \param[in] model The value's model. + //! \param[in] elapsedTime The time that has elapsed since the + //! model was created. + //! \param[in] params Extra parameters needed by \p model to compute + //! the probability. + //! \param[in] time The value's time. + //! \param[in] value The value for which to compute the probability. + //! \param[out] probability Set to the probability of \p value + //! if one could be calculated. + //! \param[out] tail Set to the tail that \p value is in. + //! \param[out] type Filled in with the type of anomaly, i.e. is the + //! value anomalous in its own right or as a result of conditioning + //! on a correlated variable. + //! \param[out] mostAnomalousCorrelate Filled in with the index of the + //! most anomalous correlated time series. + //! \param[in] weight The weight to use when updating the aggregate + //! probabilities. + bool addProbability(model_t::EFeature feature, + std::size_t id, + const maths::CModel& model, + core_t::TTime elapsedTime, + const maths::CModelProbabilityParams& params, + const TTime2Vec1Vec& time, + const TDouble2Vec1Vec& value, + double& probability, + TTail2Vec& tail, + model_t::CResultType& type, + TSize1Vec& mostAnomalousCorrelate, + double weight = 1.0); + + //! Add the probability to the overall aggregate probability and + //! all influencer aggregate probabilities. + //! + //! \param[in] probability The probability to add. + //! \param[in] weight The weight to use when updating the aggregate + //! probabilities. + void addProbability(double probability, double weight = 1.0); + + //! Compute and add the influences from \p influencerValues. + //! + //! \param[in] influencerName The name of the field for which + //! to compute and influences. + //! \param[in] influencerValues The influencer field values and + //! feature values and counts of measurements in the restrictions + //! of records to the corresponding influencer field values. + //! \param[in,out] params The parameters used in the probability calculation. + //! \param[in] weight The weight to use when updating the aggregate + //! probabilities. + void addInfluences(const std::string& influencerName, + const TStrCRefDouble1VecDoublePrPrVec& influencerValues, + SParams& params, + double weight = 1.0); + + //! Compute and add the influences from \p influencerValues for + //! the correlates of a univariate feature. + void addInfluences(const std::string& influencerName, + const TStrCRefDouble1VecDouble1VecPrPrVecVec& influencerValues, + SCorrelateParams& params, + double weight = 1.0); + + //! Calculate the overall probability of all values added. + //! + //! \param[out] probability Filled in with the overall probability + //! of all values added via addProbability. + bool calculate(double& probability) const; + + //! Calculate the overall probability of all values added and + //! any influences and their weights. + //! + //! \param[out] probability Filled in with the overall probability + //! of all values added via addProbability. + //! \param[out] influences Filled in with all influences of the + //! overall probability. + bool calculate(double& probability, TStoredStringPtrStoredStringPtrPrDoublePrVec& influences) const; + +private: + //! Actually commit any influences we've found. + void commitInfluences(model_t::EFeature feature, double logp, double weight); + +private: + //! The minimum value for the influence for which an influencing + //! field value is judged to have any influence on a feature value. + double m_Cutoff; + + //! The plug-in used to adapt the influence calculation for + //! different features. + const CInfluenceCalculator* m_InfluenceCalculator; + + //! The template probability calculator. + CModelTools::CProbabilityAggregator m_ProbabilityTemplate; + + //! The probability calculator. + CModelTools::CProbabilityAggregator m_Probability; + + //! The probability calculation cache if there is one. + CModelTools::CProbabilityCache* m_ProbabilityCache; + + //! The influence probability calculator. + CModelTools::TStoredStringPtrStoredStringPtrPrProbabilityAggregatorUMap m_InfluencerProbabilities; + + //! Placeholder for the influence weights so that it isn't + //! allocated in a loop. + TStoredStringPtrStoredStringPtrPrDoublePrVec m_Influences; }; //! \brief Interface for influence calculations. @@ -360,51 +354,45 @@ class MODEL_EXPORT CProbabilityAndInfluenceCalculator //! be an anomaly if only the records labeled with the field value //! were present", or 2) "would there still be an anomaly if none //! of the records labeled with the field value were present". -class MODEL_EXPORT CInfluenceCalculator : private core::CNonCopyable -{ - public: - //! See core::CMemory. - static bool dynamicSizeAlwaysZero() - { - return true; - } - using TParams = CProbabilityAndInfluenceCalculator::SParams; - using TCorrelateParams = CProbabilityAndInfluenceCalculator::SCorrelateParams; - - public: - virtual ~CInfluenceCalculator(); - - //! Compute the influence from the probability of set difference - //! statistics. - static double intersectionInfluence(double logp, double logpi); - - //! Compute the influence from the probability of set intersection - //! statistics. - static double complementInfluence(double logp, double logpi); - - //! Compute the influences on a univariate value. - virtual void computeInfluences(TParams ¶ms) const = 0; - - //! Compute the influences on a correlate value. - virtual void computeInfluences(TCorrelateParams ¶ms) const = 0; +class MODEL_EXPORT CInfluenceCalculator : private core::CNonCopyable { +public: + //! See core::CMemory. + static bool dynamicSizeAlwaysZero() { return true; } + using TParams = CProbabilityAndInfluenceCalculator::SParams; + using TCorrelateParams = CProbabilityAndInfluenceCalculator::SCorrelateParams; + +public: + virtual ~CInfluenceCalculator(); + + //! Compute the influence from the probability of set difference + //! statistics. + static double intersectionInfluence(double logp, double logpi); + + //! Compute the influence from the probability of set intersection + //! statistics. + static double complementInfluence(double logp, double logpi); + + //! Compute the influences on a univariate value. + virtual void computeInfluences(TParams& params) const = 0; + + //! Compute the influences on a correlate value. + virtual void computeInfluences(TCorrelateParams& params) const = 0; }; //! \brief A stub implementation for the case that the influence //! can't be calculated. -class MODEL_EXPORT CInfluenceUnavailableCalculator : public CInfluenceCalculator -{ - public: - virtual void computeInfluences(TParams ¶ms) const; - virtual void computeInfluences(TCorrelateParams ¶ms) const; +class MODEL_EXPORT CInfluenceUnavailableCalculator : public CInfluenceCalculator { +public: + virtual void computeInfluences(TParams& params) const; + virtual void computeInfluences(TCorrelateParams& params) const; }; //! \brief A stub implementation for the case that every influence //! is 1, irrespective of the feature value and influence values. -class MODEL_EXPORT CIndicatorInfluenceCalculator : public CInfluenceCalculator -{ - public: - virtual void computeInfluences(TParams ¶ms) const; - virtual void computeInfluences(TCorrelateParams ¶ms) const; +class MODEL_EXPORT CIndicatorInfluenceCalculator : public CInfluenceCalculator { +public: + virtual void computeInfluences(TParams& params) const; + virtual void computeInfluences(TCorrelateParams& params) const; }; //! \brief Computes the influences for sum like features. @@ -428,11 +416,10 @@ class MODEL_EXPORT CIndicatorInfluenceCalculator : public CInfluenceCalculator //! Otherwise, the anomaly is likely due to the absence of counts for //! one of the influencing field values, in which case we'd need to //! know what its typical count is and we don't have this information. -class MODEL_EXPORT CLogProbabilityComplementInfluenceCalculator : public CInfluenceCalculator -{ - public: - virtual void computeInfluences(TParams ¶ms) const; - virtual void computeInfluences(TCorrelateParams ¶ms) const; +class MODEL_EXPORT CLogProbabilityComplementInfluenceCalculator : public CInfluenceCalculator { +public: + virtual void computeInfluences(TParams& params) const; + virtual void computeInfluences(TCorrelateParams& params) const; }; //! \brief Computes the influences for minimum like features. @@ -460,11 +447,10 @@ class MODEL_EXPORT CLogProbabilityComplementInfluenceCalculator : public CInflue //! field value were present". Note, we can determine whether there //! is influence in this case if the anomalous value is in either //! the left or right tail. -class MODEL_EXPORT CLogProbabilityInfluenceCalculator : public CInfluenceCalculator -{ - public: - virtual void computeInfluences(TParams ¶ms) const; - virtual void computeInfluences(TCorrelateParams ¶ms) const; +class MODEL_EXPORT CLogProbabilityInfluenceCalculator : public CInfluenceCalculator { +public: + virtual void computeInfluences(TParams& params) const; + virtual void computeInfluences(TCorrelateParams& params) const; }; //! \brief Computes the influences for the mean feature. @@ -482,11 +468,10 @@ class MODEL_EXPORT CLogProbabilityInfluenceCalculator : public CInfluenceCalcula //! //! \see CLogProbabilityComplementInfluenceCalculator for more details //! on the calculation. -class MODEL_EXPORT CMeanInfluenceCalculator : public CInfluenceCalculator -{ - public: - virtual void computeInfluences(TParams ¶ms) const; - virtual void computeInfluences(TCorrelateParams ¶ms) const; +class MODEL_EXPORT CMeanInfluenceCalculator : public CInfluenceCalculator { +public: + virtual void computeInfluences(TParams& params) const; + virtual void computeInfluences(TCorrelateParams& params) const; }; //! \brief Computes the influences for the mean feature. @@ -504,13 +489,11 @@ class MODEL_EXPORT CMeanInfluenceCalculator : public CInfluenceCalculator //! //! \see CLogProbabilityComplementInfluenceCalculator for more details //! on the calculation. -class MODEL_EXPORT CVarianceInfluenceCalculator : public CInfluenceCalculator -{ - public: - virtual void computeInfluences(TParams ¶ms) const; - virtual void computeInfluences(TCorrelateParams ¶ms) const; +class MODEL_EXPORT CVarianceInfluenceCalculator : public CInfluenceCalculator { +public: + virtual void computeInfluences(TParams& params) const; + virtual void computeInfluences(TCorrelateParams& params) const; }; - } } diff --git a/include/model/CResourceMonitor.h b/include/model/CResourceMonitor.h index e3a21438c7..b9749a0510 100644 --- a/include/model/CResourceMonitor.h +++ b/include/model/CResourceMonitor.h @@ -18,10 +18,8 @@ class CResourceMonitorTest; class CResourceLimitTest; class CAnomalyJobLimitTest; -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { class CAnomalyDetector; class CAnomalyDetectorModel; @@ -31,194 +29,190 @@ class CResourcePruner; //! //! DESCRIPTION:\n //! Assess memory used by models and decide on further memory allocations. -class MODEL_EXPORT CResourceMonitor -{ - public: - struct MODEL_EXPORT SResults - { - std::size_t s_Usage; - std::size_t s_ByFields; - std::size_t s_PartitionFields; - std::size_t s_OverFields; - std::size_t s_AllocationFailures; - model_t::EMemoryStatus s_MemoryStatus; - core_t::TTime s_BucketStartTime; - }; - - public: - using TModelPtrSizePr = std::pair; - using TModelPtrSizeMap = std::map; - using TMemoryUsageReporterFunc = std::function; - using TTimeSizeMap = std::map; - - //! The minimum time between prunes - static const core_t::TTime MINIMUM_PRUNE_FREQUENCY; +class MODEL_EXPORT CResourceMonitor { +public: + struct MODEL_EXPORT SResults { + std::size_t s_Usage; + std::size_t s_ByFields; + std::size_t s_PartitionFields; + std::size_t s_OverFields; + std::size_t s_AllocationFailures; + model_t::EMemoryStatus s_MemoryStatus; + core_t::TTime s_BucketStartTime; + }; + +public: + using TModelPtrSizePr = std::pair; + using TModelPtrSizeMap = std::map; + using TMemoryUsageReporterFunc = std::function; + using TTimeSizeMap = std::map; - //! Default memory limit for resource monitor - static const std::size_t DEFAULT_MEMORY_LIMIT_MB; + //! The minimum time between prunes + static const core_t::TTime MINIMUM_PRUNE_FREQUENCY; - public: - //! Default constructor - CResourceMonitor(); + //! Default memory limit for resource monitor + static const std::size_t DEFAULT_MEMORY_LIMIT_MB; - //! Query the resource monitor to find out if the models are - //! taking up too much memory and further allocations should be banned - bool areAllocationsAllowed() const; +public: + //! Default constructor + CResourceMonitor(); - //! Query the resource monitor to found out if it's Ok to - //! create structures of a certain size - bool areAllocationsAllowed(std::size_t size) const; + //! Query the resource monitor to find out if the models are + //! taking up too much memory and further allocations should be banned + bool areAllocationsAllowed() const; - //! Return the amount of remaining space for allocations - std::size_t allocationLimit() const; + //! Query the resource monitor to found out if it's Ok to + //! create structures of a certain size + bool areAllocationsAllowed(std::size_t size) const; - //! Tell this resource monitor about a CAnomalyDetector class - - //! these classes contain all the model memory and are used - //! to query the current overall usage - void registerComponent(CAnomalyDetector &detector); + //! Return the amount of remaining space for allocations + std::size_t allocationLimit() const; - //! Tell this resource monitor that a CAnomalyDetector class is - //! going to be deleted. - void unRegisterComponent(CAnomalyDetector &detector); + //! Tell this resource monitor about a CAnomalyDetector class - + //! these classes contain all the model memory and are used + //! to query the current overall usage + void registerComponent(CAnomalyDetector& detector); - //! Set a callback used when the memory usage grows - void memoryUsageReporter(const TMemoryUsageReporterFunc &reporter); + //! Tell this resource monitor that a CAnomalyDetector class is + //! going to be deleted. + void unRegisterComponent(CAnomalyDetector& detector); - //! Recalculate the memory usage if there is a memory limit - void refresh(CAnomalyDetector &detector); + //! Set a callback used when the memory usage grows + void memoryUsageReporter(const TMemoryUsageReporterFunc& reporter); - //! Recalculate the memory usage regardless of whether there is a memory limit - void forceRefresh(CAnomalyDetector &detector); + //! Recalculate the memory usage if there is a memory limit + void refresh(CAnomalyDetector& detector); - //! Set the internal memory limit, as specified in a limits config file - void memoryLimit(std::size_t limitMBs); + //! Recalculate the memory usage regardless of whether there is a memory limit + void forceRefresh(CAnomalyDetector& detector); - //! Get the memory status - model_t::EMemoryStatus getMemoryStatus(); + //! Set the internal memory limit, as specified in a limits config file + void memoryLimit(std::size_t limitMBs); - //! Send a memory usage report if it's changed by more than a certain percentage - void sendMemoryUsageReportIfSignificantlyChanged(core_t::TTime bucketStartTime); + //! Get the memory status + model_t::EMemoryStatus getMemoryStatus(); - //! Send a memory usage report - void sendMemoryUsageReport(core_t::TTime bucketStartTime); + //! Send a memory usage report if it's changed by more than a certain percentage + void sendMemoryUsageReportIfSignificantlyChanged(core_t::TTime bucketStartTime); - //! Create a memory usage report - SResults createMemoryUsageReport(core_t::TTime bucketStartTime); + //! Send a memory usage report + void sendMemoryUsageReport(core_t::TTime bucketStartTime); - //! We are being told that a class has failed to allocate memory - //! based on the resource limits, and we will report this to the - //! user when we can - void acceptAllocationFailureResult(core_t::TTime time); + //! Create a memory usage report + SResults createMemoryUsageReport(core_t::TTime bucketStartTime); - //! We are being told that aggressive pruning has taken place - //! to avoid hitting the resource limit, and we should report this - //! to the user when we can - void acceptPruningResult(); + //! We are being told that a class has failed to allocate memory + //! based on the resource limits, and we will report this to the + //! user when we can + void acceptAllocationFailureResult(core_t::TTime time); - //! Accessor for no limit flag - bool haveNoLimit() const; + //! We are being told that aggressive pruning has taken place + //! to avoid hitting the resource limit, and we should report this + //! to the user when we can + void acceptPruningResult(); - //! Prune models where necessary - bool pruneIfRequired(core_t::TTime endTime); + //! Accessor for no limit flag + bool haveNoLimit() const; - //! Accounts for any extra memory to the one - //! reported by the components. - //! Used in conjunction with clearExtraMemory() - //! in order to ensure enough memory remains - //! for model's parts that have not been fully allocated yet. - void addExtraMemory(std::size_t reserved); + //! Prune models where necessary + bool pruneIfRequired(core_t::TTime endTime); - //! Clears all extra memory - void clearExtraMemory(); - private: + //! Accounts for any extra memory to the one + //! reported by the components. + //! Used in conjunction with clearExtraMemory() + //! in order to ensure enough memory remains + //! for model's parts that have not been fully allocated yet. + void addExtraMemory(std::size_t reserved); - //! Updates the memory limit fields and the prune threshold - //! to the given value. - void updateMemoryLimitsAndPruneThreshold(std::size_t limitMBs); + //! Clears all extra memory + void clearExtraMemory(); - //! Update the given model and recalculate the total usage - void memUsage(CAnomalyDetectorModel *model); +private: + //! Updates the memory limit fields and the prune threshold + //! to the given value. + void updateMemoryLimitsAndPruneThreshold(std::size_t limitMBs); - //! Determine if we need to send a usage report, based on - //! increased usage, or increased errors - bool needToSendReport(); + //! Update the given model and recalculate the total usage + void memUsage(CAnomalyDetectorModel* model); - //! After a change in memory usage, check whether allocations - //! shoule be allowed or not - void updateAllowAllocations(); + //! Determine if we need to send a usage report, based on + //! increased usage, or increased errors + bool needToSendReport(); - //! Returns the sum of used memory plus any extra memory - std::size_t totalMemory() const; + //! After a change in memory usage, check whether allocations + //! shoule be allowed or not + void updateAllowAllocations(); - private: - //! The registered collection of components - TModelPtrSizeMap m_Models; + //! Returns the sum of used memory plus any extra memory + std::size_t totalMemory() const; - //! Is there enough free memory to allow creating new components - bool m_AllowAllocations; +private: + //! The registered collection of components + TModelPtrSizeMap m_Models; - //! The upper limit for memory usage, checked on increasing values - std::size_t m_ByteLimitHigh; + //! Is there enough free memory to allow creating new components + bool m_AllowAllocations; - //! The lower limit for memory usage, checked on decreasing values - std::size_t m_ByteLimitLow; + //! The upper limit for memory usage, checked on increasing values + std::size_t m_ByteLimitHigh; - //! Memory usage by anomaly detectors on the most recent calculation - std::size_t m_CurrentAnomalyDetectorMemory; + //! The lower limit for memory usage, checked on decreasing values + std::size_t m_ByteLimitLow; - //! Extra memory to enable accounting of soon to be allocated memory - std::size_t m_ExtraMemory; + //! Memory usage by anomaly detectors on the most recent calculation + std::size_t m_CurrentAnomalyDetectorMemory; - //! The total memory usage on the previous usage report - std::size_t m_PreviousTotal; + //! Extra memory to enable accounting of soon to be allocated memory + std::size_t m_ExtraMemory; - //! The highest known value for total memory usage - std::size_t m_Peak; + //! The total memory usage on the previous usage report + std::size_t m_PreviousTotal; - //! Callback function to fire when memory usage increases by 1% - TMemoryUsageReporterFunc m_MemoryUsageReporter; + //! The highest known value for total memory usage + std::size_t m_Peak; - //! Keep track of classes telling us about allocation failures - TTimeSizeMap m_AllocationFailures; + //! Callback function to fire when memory usage increases by 1% + TMemoryUsageReporterFunc m_MemoryUsageReporter; - //! The time at which the last allocation failure was reported - core_t::TTime m_LastAllocationFailureReport; + //! Keep track of classes telling us about allocation failures + TTimeSizeMap m_AllocationFailures; - //! Keep track of the model memory status - model_t::EMemoryStatus m_MemoryStatus; + //! The time at which the last allocation failure was reported + core_t::TTime m_LastAllocationFailureReport; - //! Keep track of whether pruning has started, for efficiency in most cases - bool m_HasPruningStarted; + //! Keep track of the model memory status + model_t::EMemoryStatus m_MemoryStatus; - //! The threshold at which pruning should kick in and head - //! towards for the sweet spot - std::size_t m_PruneThreshold; + //! Keep track of whether pruning has started, for efficiency in most cases + bool m_HasPruningStarted; - //! The last time we did a full prune of all the models - core_t::TTime m_LastPruneTime; + //! The threshold at which pruning should kick in and head + //! towards for the sweet spot + std::size_t m_PruneThreshold; - //! Number of buckets to go back when pruning - std::size_t m_PruneWindow; + //! The last time we did a full prune of all the models + core_t::TTime m_LastPruneTime; - //! The largest that the prune window can grow to - determined from the models - std::size_t m_PruneWindowMaximum; + //! Number of buckets to go back when pruning + std::size_t m_PruneWindow; - //! The smallest that the prune window can shrink to - 4 weeks - std::size_t m_PruneWindowMinimum; + //! The largest that the prune window can grow to - determined from the models + std::size_t m_PruneWindowMaximum; - //! Don't do any sort of memory checking if this is set - bool m_NoLimit; + //! The smallest that the prune window can shrink to - 4 weeks + std::size_t m_PruneWindowMinimum; - //! Test friends - friend class ::CResourceMonitorTest; - friend class ::CResourceLimitTest; - friend class ::CAnomalyJobLimitTest; -}; + //! Don't do any sort of memory checking if this is set + bool m_NoLimit; + //! Test friends + friend class ::CResourceMonitorTest; + friend class ::CResourceLimitTest; + friend class ::CAnomalyJobLimitTest; +}; } // model } // ml - #endif // INCLUDED_ml_model_CResourceMonitor_h diff --git a/include/model/CResultsQueue.h b/include/model/CResultsQueue.h index b733e1e0ee..9285fe66bf 100644 --- a/include/model/CResultsQueue.h +++ b/include/model/CResultsQueue.h @@ -9,10 +9,8 @@ #include #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { class CHierarchicalResults; //! \brief A queue for CHierarchicalResults objects. @@ -20,65 +18,60 @@ class CHierarchicalResults; //! DESCRIPTION:\n //! A queue for CHierarchicalResults objects that handles //! overlapping bucket result selection -class MODEL_EXPORT CResultsQueue -{ - public: - using THierarchicalResultsQueue = CBucketQueue; +class MODEL_EXPORT CResultsQueue { +public: + using THierarchicalResultsQueue = CBucketQueue; - public: - //! Constructor - CResultsQueue(std::size_t delayBuckets, - core_t::TTime bucketLength); +public: + //! Constructor + CResultsQueue(std::size_t delayBuckets, core_t::TTime bucketLength); - //! Reset the underlying queue - void reset(core_t::TTime time); + //! Reset the underlying queue + void reset(core_t::TTime time); - //! Have we got unsent items in the queue? - bool hasInterimResults() const; + //! Have we got unsent items in the queue? + bool hasInterimResults() const; - //! Push to the underlying queue - void push(const CHierarchicalResults &item, core_t::TTime time); + //! Push to the underlying queue + void push(const CHierarchicalResults& item, core_t::TTime time); - //! Push to the underlying queue - void push(const CHierarchicalResults &item); + //! Push to the underlying queue + void push(const CHierarchicalResults& item); - //! Get a result from the queue - const CHierarchicalResults &get(core_t::TTime time) const; + //! Get a result from the queue + const CHierarchicalResults& get(core_t::TTime time) const; - //! Get a result from the queue - CHierarchicalResults &get(core_t::TTime time); + //! Get a result from the queue + CHierarchicalResults& get(core_t::TTime time); - //! Returns the size of the queue. - std::size_t size() const; + //! Returns the size of the queue. + std::size_t size() const; - //! Get the latest result from the queue - CHierarchicalResults &latest(); + //! Get the latest result from the queue + CHierarchicalResults& latest(); - //! Returns the latest bucket end time, as tracked by the queue - core_t::TTime latestBucketEnd() const; + //! Returns the latest bucket end time, as tracked by the queue + core_t::TTime latestBucketEnd() const; - //! Select which queued result object to output, based on anomaly score - //! and which have been output most recently - core_t::TTime chooseResultTime(core_t::TTime bucketStartTime, - core_t::TTime bucketLength, - model::CHierarchicalResults &results); + //! Select which queued result object to output, based on anomaly score + //! and which have been output most recently + core_t::TTime chooseResultTime(core_t::TTime bucketStartTime, core_t::TTime bucketLength, model::CHierarchicalResults& results); - //! Standard persistence - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; + //! Standard persistence + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; - //! Standard restoration - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); + //! Standard restoration + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); - private: - //! The collection of results objects - THierarchicalResultsQueue m_Results; +private: + //! The collection of results objects + THierarchicalResultsQueue m_Results; - //! Which of the previous results did we output? - size_t m_LastResultsIndex; + //! Which of the previous results did we output? + size_t m_LastResultsIndex; }; } // model } // ml - #endif // INCLUDED_ml_model_CResultsQueue_h diff --git a/include/model/CRuleCondition.h b/include/model/CRuleCondition.h index fedff8766e..282f4673b5 100644 --- a/include/model/CRuleCondition.h +++ b/include/model/CRuleCondition.h @@ -7,21 +7,18 @@ #ifndef INCLUDED_ml_model_CRuleCondition_h #define INCLUDED_ml_model_CRuleCondition_h -#include #include +#include #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CPatternSet; } -namespace model -{ +namespace model { class CAnomalyDetectorModel; //! \brief A condition that may trigger a rule. @@ -32,102 +29,86 @@ class CAnomalyDetectorModel; //! that will be performed. The specified fieldName/fieldValue, //! when present, determines the series against which the //! condition is checked. -class MODEL_EXPORT CRuleCondition -{ - public: - using TPatternSetCRef = boost::reference_wrapper; - - public: - enum ERuleConditionType - { - E_Categorical, - E_NumericalActual, - E_NumericalTypical, - E_NumericalDiffAbs, - E_Time - }; - - enum EConditionOperator - { - E_LT, - E_LTE, - E_GT, - E_GTE - }; - - struct SCondition - { - SCondition(EConditionOperator op, double threshold); - - bool test(double value) const; - - EConditionOperator s_Op; - double s_Threshold; - }; - - public: - //! Default constructor. - CRuleCondition(); - - //! Set the condition type. - void type(ERuleConditionType ruleType); - - //! Set the field name. Empty means it is not specified. - void fieldName(const std::string &fieldName); - - //! Set the field value. Empty means it is not specified. - void fieldValue(const std::string &fieldValue); - - //! Get the numerical condition. - SCondition &condition(); - - //! Set the value filter (used for categorical only). - void valueFilter(const core::CPatternSet &valueFilter); - - //! Is the condition categorical? - bool isCategorical() const; - - //! Is the condition numerical? - bool isNumerical() const; - - //! Pretty-print the condition. - std::string print() const; - - //! Test the condition against a series. - bool test(const CAnomalyDetectorModel &model, - model_t::EFeature feature, - const model_t::CResultType &resultType, - bool isScoped, - std::size_t pid, - std::size_t cid, - core_t::TTime time) const; - - private: - bool checkCondition(const CAnomalyDetectorModel &model, - model_t::EFeature feature, - model_t::CResultType resultType, - std::size_t pid, - std::size_t cid, - core_t::TTime time) const; - std::string print(ERuleConditionType type) const; - std::string print(EConditionOperator op) const; - - private: - //! The condition type. - ERuleConditionType m_Type; - - //! The numerical condition. - SCondition m_Condition; - - //! The field name. Empty when not specified. - std::string m_FieldName; - - //! The field value. Empty when not specified. - std::string m_FieldValue; - - TPatternSetCRef m_ValueFilter; -}; +class MODEL_EXPORT CRuleCondition { +public: + using TPatternSetCRef = boost::reference_wrapper; + +public: + enum ERuleConditionType { E_Categorical, E_NumericalActual, E_NumericalTypical, E_NumericalDiffAbs, E_Time }; + + enum EConditionOperator { E_LT, E_LTE, E_GT, E_GTE }; + + struct SCondition { + SCondition(EConditionOperator op, double threshold); + + bool test(double value) const; + + EConditionOperator s_Op; + double s_Threshold; + }; + +public: + //! Default constructor. + CRuleCondition(); + + //! Set the condition type. + void type(ERuleConditionType ruleType); + + //! Set the field name. Empty means it is not specified. + void fieldName(const std::string& fieldName); + + //! Set the field value. Empty means it is not specified. + void fieldValue(const std::string& fieldValue); + //! Get the numerical condition. + SCondition& condition(); + + //! Set the value filter (used for categorical only). + void valueFilter(const core::CPatternSet& valueFilter); + + //! Is the condition categorical? + bool isCategorical() const; + + //! Is the condition numerical? + bool isNumerical() const; + + //! Pretty-print the condition. + std::string print() const; + + //! Test the condition against a series. + bool test(const CAnomalyDetectorModel& model, + model_t::EFeature feature, + const model_t::CResultType& resultType, + bool isScoped, + std::size_t pid, + std::size_t cid, + core_t::TTime time) const; + +private: + bool checkCondition(const CAnomalyDetectorModel& model, + model_t::EFeature feature, + model_t::CResultType resultType, + std::size_t pid, + std::size_t cid, + core_t::TTime time) const; + std::string print(ERuleConditionType type) const; + std::string print(EConditionOperator op) const; + +private: + //! The condition type. + ERuleConditionType m_Type; + + //! The numerical condition. + SCondition m_Condition; + + //! The field name. Empty when not specified. + std::string m_FieldName; + + //! The field value. Empty when not specified. + std::string m_FieldValue; + + TPatternSetCRef m_ValueFilter; +}; } } diff --git a/include/model/CSample.h b/include/model/CSample.h index cf9910d985..b5e2dbc58a 100644 --- a/include/model/CSample.h +++ b/include/model/CSample.h @@ -8,81 +8,75 @@ #define INCLUDED_ml_model_CSample_h #include -#include #include +#include #include #include #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { //! \brief A metric sample statistic. -class MODEL_EXPORT CSample -{ - public: - using TDouble1Vec = core::CSmallVector; +class MODEL_EXPORT CSample { +public: + using TDouble1Vec = core::CSmallVector; - struct MODEL_EXPORT SToString - { - std::string operator()(const CSample &sample) const; - }; + struct MODEL_EXPORT SToString { + std::string operator()(const CSample& sample) const; + }; - struct MODEL_EXPORT SFromString - { - bool operator()(const std::string &token, CSample &value) const; - }; + struct MODEL_EXPORT SFromString { + bool operator()(const std::string& token, CSample& value) const; + }; - public: - CSample(); - CSample(core_t::TTime time, const TDouble1Vec &value, double varianceScale, double count); +public: + CSample(); + CSample(core_t::TTime time, const TDouble1Vec& value, double varianceScale, double count); - //! Get the time. - core_t::TTime time() const { return m_Time; } + //! Get the time. + core_t::TTime time() const { return m_Time; } - //! Get the variance scale. - double varianceScale() const { return m_VarianceScale; } + //! Get the variance scale. + double varianceScale() const { return m_VarianceScale; } - //! Get the count. - double count() const { return m_Count; } + //! Get the count. + double count() const { return m_Count; } - //! Get a writable count. - double &count() { return m_Count; } + //! Get a writable count. + double& count() { return m_Count; } - //! Get the value and any ancillary statistics needed to calculate - //! influence. - const TDouble1Vec &value() const { return m_Value; } + //! Get the value and any ancillary statistics needed to calculate + //! influence. + const TDouble1Vec& value() const { return m_Value; } - //! Get a writable value and any ancillary statistics needed to - //! calculate influence. - TDouble1Vec &value() { return m_Value; } + //! Get a writable value and any ancillary statistics needed to + //! calculate influence. + TDouble1Vec& value() { return m_Value; } - //! Get the value of the feature. - TDouble1Vec value(std::size_t dimension) const; + //! Get the value of the feature. + TDouble1Vec value(std::size_t dimension) const; - //! Get a checksum. - uint64_t checksum() const; + //! Get a checksum. + uint64_t checksum() const; - //! Print the sample for debug. - std::string print() const; + //! Print the sample for debug. + std::string print() const; - //! Debug the memory used by this object. - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + //! Debug the memory used by this object. + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - //! Get the memory used by this object. - std::size_t memoryUsage() const; + //! Get the memory used by this object. + std::size_t memoryUsage() const; - private: - core_t::TTime m_Time; - TDouble1Vec m_Value; - double m_VarianceScale; - double m_Count; +private: + core_t::TTime m_Time; + TDouble1Vec m_Value; + double m_VarianceScale; + double m_Count; }; - } } diff --git a/include/model/CSampleCounts.h b/include/model/CSampleCounts.h index 8f77d64677..89f9673cb1 100644 --- a/include/model/CSampleCounts.h +++ b/include/model/CSampleCounts.h @@ -17,16 +17,12 @@ #include #include - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace model -{ +namespace model { class CDataGatherer; //! \brief Manages setting of sample counts. @@ -39,99 +35,93 @@ class CDataGatherer; //! likelihood function is approximate and so if the mean bucket //! bucket count wanders too far from the sample count we reset the //! the sample count. -class MODEL_EXPORT CSampleCounts -{ - public: - using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; - using TMeanAccumulatorVec = std::vector; - using TSizeVec = std::vector; +class MODEL_EXPORT CSampleCounts { +public: + using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; + using TMeanAccumulatorVec = std::vector; + using TSizeVec = std::vector; - public: - explicit CSampleCounts(unsigned int sampleCountOverride = 0); +public: + explicit CSampleCounts(unsigned int sampleCountOverride = 0); - //! Create a copy that will result in the same persisted state as the - //! original. This is effectively a copy constructor that creates a - //! copy that's only valid for a single purpose. The boolean flag is - //! redundant except to create a signature that will not be mistaken for - //! a general purpose copy constructor. - CSampleCounts(bool isForPersistence, - const CSampleCounts &other); + //! Create a copy that will result in the same persisted state as the + //! original. This is effectively a copy constructor that creates a + //! copy that's only valid for a single purpose. The boolean flag is + //! redundant except to create a signature that will not be mistaken for + //! a general purpose copy constructor. + CSampleCounts(bool isForPersistence, const CSampleCounts& other); - CSampleCounts *cloneForPersistence() const; + CSampleCounts* cloneForPersistence() const; - //! Persist the sample counts to a state document. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; + //! Persist the sample counts to a state document. + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; - //! Restore some sample counts from a state document traverser. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); + //! Restore some sample counts from a state document traverser. + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); - //! Get the sample count identified by \p id. - unsigned int count(std::size_t id) const; + //! Get the sample count identified by \p id. + unsigned int count(std::size_t id) const; - //! Get the effective sample count identified by \p id. - double effectiveSampleCount(std::size_t id) const; + //! Get the effective sample count identified by \p id. + double effectiveSampleCount(std::size_t id) const; - //! Reset the sample count identified by \p id. - void resetSampleCount(const CDataGatherer &gatherer, - std::size_t id); + //! Reset the sample count identified by \p id. + void resetSampleCount(const CDataGatherer& gatherer, std::size_t id); - //! Update the effective sample variances to reflect new sample for \p id. - void updateSampleVariance(std::size_t id); + //! Update the effective sample variances to reflect new sample for \p id. + void updateSampleVariance(std::size_t id); - //! Update the mean non-zero bucket counts and age the count data. - void updateMeanNonZeroBucketCount(std::size_t id, - double count, - double alpha); + //! Update the mean non-zero bucket counts and age the count data. + void updateMeanNonZeroBucketCount(std::size_t id, double count, double alpha); - //! Refresh the sample count identified by \p id. - void refresh(const CDataGatherer &gatherer); + //! Refresh the sample count identified by \p id. + void refresh(const CDataGatherer& gatherer); - //! Recycle the sample counts identified by \p idsToRemove. - void recycle(const TSizeVec &idsToRemove); + //! Recycle the sample counts identified by \p idsToRemove. + void recycle(const TSizeVec& idsToRemove); - //! Remove all traces of attributes whose identifiers are - //! greater than or equal to \p lowestIdToRemove. - void remove(std::size_t lowestIdToRemove); + //! Remove all traces of attributes whose identifiers are + //! greater than or equal to \p lowestIdToRemove. + void remove(std::size_t lowestIdToRemove); - //! Resize the sample counts so they can accommodate \p id. - void resize(std::size_t id); + //! Resize the sample counts so they can accommodate \p id. + void resize(std::size_t id); - //! Get the sample counts checksum. - uint64_t checksum(const CDataGatherer &gatherer) const; + //! Get the sample counts checksum. + uint64_t checksum(const CDataGatherer& gatherer) const; - //! Debug the memory used by this object. - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + //! Debug the memory used by this object. + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - //! Get the memory used by this object. - std::size_t memoryUsage() const; + //! Get the memory used by this object. + std::size_t memoryUsage() const; - //! Clear the sample counts. - void clear(); + //! Clear the sample counts. + void clear(); - private: - using TUIntVec = std::vector; +private: + using TUIntVec = std::vector; - private: - //! Get the name of the entity identified by \p id. - const std::string &name(const CDataGatherer &gatherer, - std::size_t id) const; +private: + //! Get the name of the entity identified by \p id. + const std::string& name(const CDataGatherer& gatherer, std::size_t id) const; - private: - //! This overrides the sample counts if non-zero. - unsigned int m_SampleCountOverride; +private: + //! This overrides the sample counts if non-zero. + unsigned int m_SampleCountOverride; - //! The "fixed" number of measurements in a sample. - TUIntVec m_SampleCounts; + //! The "fixed" number of measurements in a sample. + TUIntVec m_SampleCounts; - //! The mean number of measurements per bucket. - TMeanAccumulatorVec m_MeanNonZeroBucketCounts; + //! The mean number of measurements per bucket. + TMeanAccumulatorVec m_MeanNonZeroBucketCounts; - //! The effective sample variance in the data supplied to the - //! model. The sample count is reset if the mean bucket count - //! moves too far from the current value. This is an approximate - //! estimate of the effective variance, due to the averaging - //! process, of the samples with which the model has been updated. - TMeanAccumulatorVec m_EffectiveSampleVariances; + //! The effective sample variance in the data supplied to the + //! model. The sample count is reset if the mean bucket count + //! moves too far from the current value. This is an approximate + //! estimate of the effective variance, due to the averaging + //! process, of the samples with which the model has been updated. + TMeanAccumulatorVec m_EffectiveSampleVariances; }; } // model diff --git a/include/model/CSampleGatherer.h b/include/model/CSampleGatherer.h index e0ca5afc4c..813798bea4 100644 --- a/include/model/CSampleGatherer.h +++ b/include/model/CSampleGatherer.h @@ -10,10 +10,10 @@ #include #include #include -#include #include #include #include +#include #include #include @@ -35,10 +35,8 @@ #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { //! \brief Metric statistic gatherer. //! @@ -54,413 +52,327 @@ namespace model //! \tparam STATISTIC This must satisfy the requirements imposed //! by CMetricPartialStatistic. template -class CSampleGatherer -{ - public: - using TDouble1Vec = core::CSmallVector; - using TStrVec = std::vector; - using TStrVecCItr = TStrVec::const_iterator; - using TStrCRef = boost::reference_wrapper; - using TDouble1VecDoublePr = std::pair; - using TStrCRefDouble1VecDoublePrPr = std::pair; - using TStrCRefDouble1VecDoublePrPrVec = std::vector; - using TStrCRefDouble1VecDoublePrPrVecVec = std::vector; - using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; - using TSampleQueue = CSampleQueue; - using TSampleVec = typename TSampleQueue::TSampleVec; - using TMetricPartialStatistic = CMetricPartialStatistic; - using TStatBucketQueue = CBucketQueue; - using TStoredStringPtrVec = std::vector; - using TStoredStringPtrStatUMap = boost::unordered_map; - using TStoredStringPtrStatUMapBucketQueue = CBucketQueue; - using TStoredStringPtrStatUMapBucketQueueVec = std::vector; - - public: - static const std::string CLASSIFIER_TAG; - static const std::string SAMPLE_STATS_TAG; - static const std::string BUCKET_STATS_TAG; - static const std::string INFLUENCER_BUCKET_STATS_TAG; - static const std::string DIMENSION_TAG; - - public: - CSampleGatherer(const SModelParams ¶ms, - std::size_t dimension, - core_t::TTime startTime, - core_t::TTime bucketLength, - TStrVecCItr beginInfluencers, - TStrVecCItr endInfluencers) : - m_Dimension(dimension), - m_SampleStats(dimension, - params.s_SampleCountFactor, - params.s_LatencyBuckets, - params.s_SampleQueueGrowthFactor, - bucketLength), - m_BucketStats(params.s_LatencyBuckets, - bucketLength, - startTime, - TMetricPartialStatistic(dimension)), - m_InfluencerBucketStats(std::distance(beginInfluencers, endInfluencers), - TStoredStringPtrStatUMapBucketQueue(params.s_LatencyBuckets + 3, - bucketLength, - startTime, - TStoredStringPtrStatUMap(1))) - {} - - //! \name Persistence - //@{ - //! Persist state by passing information to the supplied inserter. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const - { - inserter.insertValue(DIMENSION_TAG, m_Dimension); - inserter.insertLevel(CLASSIFIER_TAG, - boost::bind(&CDataClassifier::acceptPersistInserter, - &m_Classifier, - _1)); - if (m_SampleStats.size() > 0) - { - inserter.insertLevel(SAMPLE_STATS_TAG, - boost::bind(&TSampleQueue::acceptPersistInserter, - &m_SampleStats, - _1)); - } - if (m_BucketStats.size() > 0) - { - inserter.insertLevel(BUCKET_STATS_TAG, - boost::bind(TStatBucketQueueSerializer( - TMetricPartialStatistic(m_Dimension)), - boost::cref(m_BucketStats), - _1)); - } - for (const auto &stats : m_InfluencerBucketStats) - { - inserter.insertLevel(INFLUENCER_BUCKET_STATS_TAG, - boost::bind(TStoredStringPtrStatUMapBucketQueueSerializer( - TStoredStringPtrStatUMap(1), - CStoredStringPtrStatUMapSerializer(m_Dimension)), - boost::cref(stats), - _1)); - } +class CSampleGatherer { +public: + using TDouble1Vec = core::CSmallVector; + using TStrVec = std::vector; + using TStrVecCItr = TStrVec::const_iterator; + using TStrCRef = boost::reference_wrapper; + using TDouble1VecDoublePr = std::pair; + using TStrCRefDouble1VecDoublePrPr = std::pair; + using TStrCRefDouble1VecDoublePrPrVec = std::vector; + using TStrCRefDouble1VecDoublePrPrVecVec = std::vector; + using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; + using TSampleQueue = CSampleQueue; + using TSampleVec = typename TSampleQueue::TSampleVec; + using TMetricPartialStatistic = CMetricPartialStatistic; + using TStatBucketQueue = CBucketQueue; + using TStoredStringPtrVec = std::vector; + using TStoredStringPtrStatUMap = boost::unordered_map; + using TStoredStringPtrStatUMapBucketQueue = CBucketQueue; + using TStoredStringPtrStatUMapBucketQueueVec = std::vector; + +public: + static const std::string CLASSIFIER_TAG; + static const std::string SAMPLE_STATS_TAG; + static const std::string BUCKET_STATS_TAG; + static const std::string INFLUENCER_BUCKET_STATS_TAG; + static const std::string DIMENSION_TAG; + +public: + CSampleGatherer(const SModelParams& params, + std::size_t dimension, + core_t::TTime startTime, + core_t::TTime bucketLength, + TStrVecCItr beginInfluencers, + TStrVecCItr endInfluencers) + : m_Dimension(dimension), + m_SampleStats(dimension, params.s_SampleCountFactor, params.s_LatencyBuckets, params.s_SampleQueueGrowthFactor, bucketLength), + m_BucketStats(params.s_LatencyBuckets, bucketLength, startTime, TMetricPartialStatistic(dimension)), + m_InfluencerBucketStats( + std::distance(beginInfluencers, endInfluencers), + TStoredStringPtrStatUMapBucketQueue(params.s_LatencyBuckets + 3, bucketLength, startTime, TStoredStringPtrStatUMap(1))) {} + + //! \name Persistence + //@{ + //! Persist state by passing information to the supplied inserter. + void acceptPersistInserter(core::CStatePersistInserter& inserter) const { + inserter.insertValue(DIMENSION_TAG, m_Dimension); + inserter.insertLevel(CLASSIFIER_TAG, boost::bind(&CDataClassifier::acceptPersistInserter, &m_Classifier, _1)); + if (m_SampleStats.size() > 0) { + inserter.insertLevel(SAMPLE_STATS_TAG, boost::bind(&TSampleQueue::acceptPersistInserter, &m_SampleStats, _1)); } - - //! Create from part of a state document. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) - { - std::size_t i = 0u; - do - { - const std::string &name = traverser.name(); - TMetricPartialStatistic stat(m_Dimension); - RESTORE_BUILT_IN(DIMENSION_TAG, m_Dimension) - RESTORE(CLASSIFIER_TAG, traverser.traverseSubLevel( - boost::bind(&CDataClassifier::acceptRestoreTraverser, - &m_Classifier, _1))) - RESTORE(SAMPLE_STATS_TAG, traverser.traverseSubLevel( - boost::bind(&TSampleQueue::acceptRestoreTraverser, - &m_SampleStats, _1))) - RESTORE(BUCKET_STATS_TAG, traverser.traverseSubLevel( - boost::bind(TStatBucketQueueSerializer( - TMetricPartialStatistic(m_Dimension)), - boost::ref(m_BucketStats), _1))) - RESTORE(INFLUENCER_BUCKET_STATS_TAG, - i < m_InfluencerBucketStats.size() - && traverser.traverseSubLevel(boost::bind(TStoredStringPtrStatUMapBucketQueueSerializer( - TStoredStringPtrStatUMap(1), - CStoredStringPtrStatUMapSerializer(m_Dimension)), - boost::ref(m_InfluencerBucketStats[i++]), _1))) - } - while (traverser.next()); - return true; + if (m_BucketStats.size() > 0) { + inserter.insertLevel( + BUCKET_STATS_TAG, + boost::bind(TStatBucketQueueSerializer(TMetricPartialStatistic(m_Dimension)), boost::cref(m_BucketStats), _1)); } - //@} - - //! Get the dimension of the underlying statistic. - std::size_t dimension() const - { - return m_Dimension; + for (const auto& stats : m_InfluencerBucketStats) { + inserter.insertLevel(INFLUENCER_BUCKET_STATS_TAG, + boost::bind(TStoredStringPtrStatUMapBucketQueueSerializer( + TStoredStringPtrStatUMap(1), CStoredStringPtrStatUMapSerializer(m_Dimension)), + boost::cref(stats), + _1)); } - - //! Get the feature data for the bucketing interval containing - //! \p time. - //! - //! \param[in] time The start time of the sampled bucket. - //! \param[in] effectiveSampleCount The effective historical - //! number of measurements in a sample. - SMetricFeatureData featureData(core_t::TTime time, - core_t::TTime /*bucketLength*/, - double effectiveSampleCount) const - { - const TMetricPartialStatistic &bucketPartial = m_BucketStats.get(time); - double count = bucketPartial.count(); - if (count > 0.0) - { - core_t::TTime bucketTime = bucketPartial.time(); - TDouble1Vec bucketValue = bucketPartial.value(); - if (bucketValue.size() > 0) - { - TStrCRefDouble1VecDoublePrPrVecVec influenceValues(m_InfluencerBucketStats.size()); - for (std::size_t i = 0u; i < m_InfluencerBucketStats.size(); ++i) - { - const TStoredStringPtrStatUMap &influencerStats = m_InfluencerBucketStats[i].get(time); - influenceValues[i].reserve(influencerStats.size()); - for (const auto &stat : influencerStats) - { - influenceValues[i].emplace_back( - boost::cref(*stat.first), - std::make_pair(CMetricStatisticWrappers::influencerValue(stat.second), - CMetricStatisticWrappers::count(stat.second))); - } + } + + //! Create from part of a state document. + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + std::size_t i = 0u; + do { + const std::string& name = traverser.name(); + TMetricPartialStatistic stat(m_Dimension); + RESTORE_BUILT_IN(DIMENSION_TAG, m_Dimension) + RESTORE(CLASSIFIER_TAG, traverser.traverseSubLevel(boost::bind(&CDataClassifier::acceptRestoreTraverser, &m_Classifier, _1))) + RESTORE(SAMPLE_STATS_TAG, traverser.traverseSubLevel(boost::bind(&TSampleQueue::acceptRestoreTraverser, &m_SampleStats, _1))) + RESTORE(BUCKET_STATS_TAG, + traverser.traverseSubLevel( + boost::bind(TStatBucketQueueSerializer(TMetricPartialStatistic(m_Dimension)), boost::ref(m_BucketStats), _1))) + RESTORE(INFLUENCER_BUCKET_STATS_TAG, + i < m_InfluencerBucketStats.size() && + traverser.traverseSubLevel( + boost::bind(TStoredStringPtrStatUMapBucketQueueSerializer( + TStoredStringPtrStatUMap(1), CStoredStringPtrStatUMapSerializer(m_Dimension)), + boost::ref(m_InfluencerBucketStats[i++]), + _1))) + } while (traverser.next()); + return true; + } + //@} + + //! Get the dimension of the underlying statistic. + std::size_t dimension() const { return m_Dimension; } + + //! Get the feature data for the bucketing interval containing + //! \p time. + //! + //! \param[in] time The start time of the sampled bucket. + //! \param[in] effectiveSampleCount The effective historical + //! number of measurements in a sample. + SMetricFeatureData featureData(core_t::TTime time, core_t::TTime /*bucketLength*/, double effectiveSampleCount) const { + const TMetricPartialStatistic& bucketPartial = m_BucketStats.get(time); + double count = bucketPartial.count(); + if (count > 0.0) { + core_t::TTime bucketTime = bucketPartial.time(); + TDouble1Vec bucketValue = bucketPartial.value(); + if (bucketValue.size() > 0) { + TStrCRefDouble1VecDoublePrPrVecVec influenceValues(m_InfluencerBucketStats.size()); + for (std::size_t i = 0u; i < m_InfluencerBucketStats.size(); ++i) { + const TStoredStringPtrStatUMap& influencerStats = m_InfluencerBucketStats[i].get(time); + influenceValues[i].reserve(influencerStats.size()); + for (const auto& stat : influencerStats) { + influenceValues[i].emplace_back(boost::cref(*stat.first), + std::make_pair(CMetricStatisticWrappers::influencerValue(stat.second), + CMetricStatisticWrappers::count(stat.second))); } - return {bucketTime, bucketValue, - model_t::varianceScale(FEATURE, effectiveSampleCount, count), - count, influenceValues, m_Classifier.isInteger(), - m_Classifier.isNonNegative(), m_Samples}; } + return {bucketTime, + bucketValue, + model_t::varianceScale(FEATURE, effectiveSampleCount, count), + count, + influenceValues, + m_Classifier.isInteger(), + m_Classifier.isNonNegative(), + m_Samples}; } - return {m_Classifier.isInteger(), m_Classifier.isNonNegative(), m_Samples}; } - - //! Create samples if possible for the given bucket. - //! - //! \param[in] time The start time of the sampled bucket. - //! \param[in] sampleCount The measurement count in a sample. - //! \return True if there are new samples and false otherwise. - bool sample(core_t::TTime time, unsigned int sampleCount) - { - if (sampleCount > 0 && m_SampleStats.canSample(time)) - { - TSampleVec newSamples; - m_SampleStats.sample(time, sampleCount, FEATURE, newSamples); - m_Samples.insert(m_Samples.end(), newSamples.begin(), newSamples.end()); - return !newSamples.empty(); - } - return false; + return {m_Classifier.isInteger(), m_Classifier.isNonNegative(), m_Samples}; + } + + //! Create samples if possible for the given bucket. + //! + //! \param[in] time The start time of the sampled bucket. + //! \param[in] sampleCount The measurement count in a sample. + //! \return True if there are new samples and false otherwise. + bool sample(core_t::TTime time, unsigned int sampleCount) { + if (sampleCount > 0 && m_SampleStats.canSample(time)) { + TSampleVec newSamples; + m_SampleStats.sample(time, sampleCount, FEATURE, newSamples); + m_Samples.insert(m_Samples.end(), newSamples.begin(), newSamples.end()); + return !newSamples.empty(); } - - //! Update the state with a new measurement. - //! - //! \param[in] time The time of \p value. - //! \param[in] value The measurement value. - //! \param[in] sampleCount The measurement count in a sample. - //! \param[in] influences The influencing field values which - //! label \p value. - inline void add(core_t::TTime time, - const TDouble1Vec &value, - unsigned int sampleCount, - const TStoredStringPtrVec &influences) - { - this->add(time, value, 1, sampleCount, influences); + return false; + } + + //! Update the state with a new measurement. + //! + //! \param[in] time The time of \p value. + //! \param[in] value The measurement value. + //! \param[in] sampleCount The measurement count in a sample. + //! \param[in] influences The influencing field values which + //! label \p value. + inline void add(core_t::TTime time, const TDouble1Vec& value, unsigned int sampleCount, const TStoredStringPtrVec& influences) { + this->add(time, value, 1, sampleCount, influences); + } + + //! Update the state with a new mean statistic. + //! + //! \param[in] time The approximate time of \p statistic. + //! \param[in] statistic The statistic value. + //! \param[in] count The number of measurements in \p statistic. + //! \param[in] sampleCount The measurement count in a sample. + //! \param[in] influences The influencing field values which + //! label \p value. + void add(core_t::TTime time, + const TDouble1Vec& statistic, + unsigned int count, + unsigned int sampleCount, + const TStoredStringPtrVec& influences) { + if (sampleCount > 0) { + m_SampleStats.add(time, statistic, count, sampleCount); } - - //! Update the state with a new mean statistic. - //! - //! \param[in] time The approximate time of \p statistic. - //! \param[in] statistic The statistic value. - //! \param[in] count The number of measurements in \p statistic. - //! \param[in] sampleCount The measurement count in a sample. - //! \param[in] influences The influencing field values which - //! label \p value. - void add(core_t::TTime time, - const TDouble1Vec &statistic, - unsigned int count, - unsigned int sampleCount, - const TStoredStringPtrVec &influences) - { - if (sampleCount > 0) - { - m_SampleStats.add(time, statistic, count, sampleCount); - } - m_BucketStats.get(time).add(statistic, time, count); - m_Classifier.add(FEATURE, statistic, count); - std::size_t n = std::min(influences.size(), m_InfluencerBucketStats.size()); - for (std::size_t i = 0u; i < n; ++i) - { - if (!influences[i]) - { - continue; - } - TStoredStringPtrStatUMap &stats = m_InfluencerBucketStats[i].get(time); - auto j = stats.emplace(influences[i], - CMetricStatisticWrappers::template make(m_Dimension)).first; - CMetricStatisticWrappers::add(statistic, count, j->second); + m_BucketStats.get(time).add(statistic, time, count); + m_Classifier.add(FEATURE, statistic, count); + std::size_t n = std::min(influences.size(), m_InfluencerBucketStats.size()); + for (std::size_t i = 0u; i < n; ++i) { + if (!influences[i]) { + continue; } + TStoredStringPtrStatUMap& stats = m_InfluencerBucketStats[i].get(time); + auto j = stats.emplace(influences[i], CMetricStatisticWrappers::template make(m_Dimension)).first; + CMetricStatisticWrappers::add(statistic, count, j->second); } + } - //! Update the state to represent the start of a new bucket. - void startNewBucket(core_t::TTime time) - { - m_BucketStats.push(TMetricPartialStatistic(m_Dimension), time); - for (auto &stats : m_InfluencerBucketStats) - { - stats.push(TStoredStringPtrStatUMap(1), time); - } - m_Samples.clear(); + //! Update the state to represent the start of a new bucket. + void startNewBucket(core_t::TTime time) { + m_BucketStats.push(TMetricPartialStatistic(m_Dimension), time); + for (auto& stats : m_InfluencerBucketStats) { + stats.push(TStoredStringPtrStatUMap(1), time); } - - //! Reset the bucket state for the bucket containing \p bucketStart. - void resetBucket(core_t::TTime bucketStart) - { - m_BucketStats.get(bucketStart) = TMetricPartialStatistic(m_Dimension); - for (auto &stats : m_InfluencerBucketStats) - { - stats.get(bucketStart) = TStoredStringPtrStatUMap(1); - } - m_SampleStats.resetBucket(bucketStart); + m_Samples.clear(); + } + + //! Reset the bucket state for the bucket containing \p bucketStart. + void resetBucket(core_t::TTime bucketStart) { + m_BucketStats.get(bucketStart) = TMetricPartialStatistic(m_Dimension); + for (auto& stats : m_InfluencerBucketStats) { + stats.get(bucketStart) = TStoredStringPtrStatUMap(1); } + m_SampleStats.resetBucket(bucketStart); + } - //! Is the gatherer holding redundant data? - bool isRedundant(core_t::TTime samplingCutoffTime) const - { - if (m_SampleStats.latestEnd() >= samplingCutoffTime) - { + //! Is the gatherer holding redundant data? + bool isRedundant(core_t::TTime samplingCutoffTime) const { + if (m_SampleStats.latestEnd() >= samplingCutoffTime) { + return false; + } + for (const auto& bucket : m_BucketStats) { + if (bucket.count() > 0.0) { return false; } - for (const auto &bucket : m_BucketStats) - { - if (bucket.count() > 0.0) - { - return false; - } - } - return true; - } - - //! Get the checksum of this gatherer. - uint64_t checksum() const - { - uint64_t seed = static_cast(m_Classifier.isInteger()); - seed = maths::CChecksum::calculate(seed, m_Classifier.isNonNegative()); - seed = maths::CChecksum::calculate(seed, m_SampleStats); - seed = maths::CChecksum::calculate(seed, m_BucketStats); - return maths::CChecksum::calculate(seed, m_InfluencerBucketStats); - } - - //! Debug the memory used by this gatherer. - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const - { - mem->setName("CSampleGatherer", sizeof(*this)); - core::CMemoryDebug::dynamicSize("m_SampleStats", m_SampleStats, mem); - core::CMemoryDebug::dynamicSize("m_BucketStats", m_BucketStats, mem); - core::CMemoryDebug::dynamicSize("m_InfluencerBucketStats", - m_InfluencerBucketStats, mem); - core::CMemoryDebug::dynamicSize("m_Samples", m_Samples, mem); } - - //! Get the memory used by this gatherer. - std::size_t memoryUsage() const - { - return sizeof(*this) - + core::CMemory::dynamicSize(m_SampleStats) - + core::CMemory::dynamicSize(m_BucketStats) - + core::CMemory::dynamicSize(m_InfluencerBucketStats) - + core::CMemory::dynamicSize(m_Samples); + return true; + } + + //! Get the checksum of this gatherer. + uint64_t checksum() const { + uint64_t seed = static_cast(m_Classifier.isInteger()); + seed = maths::CChecksum::calculate(seed, m_Classifier.isNonNegative()); + seed = maths::CChecksum::calculate(seed, m_SampleStats); + seed = maths::CChecksum::calculate(seed, m_BucketStats); + return maths::CChecksum::calculate(seed, m_InfluencerBucketStats); + } + + //! Debug the memory used by this gatherer. + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { + mem->setName("CSampleGatherer", sizeof(*this)); + core::CMemoryDebug::dynamicSize("m_SampleStats", m_SampleStats, mem); + core::CMemoryDebug::dynamicSize("m_BucketStats", m_BucketStats, mem); + core::CMemoryDebug::dynamicSize("m_InfluencerBucketStats", m_InfluencerBucketStats, mem); + core::CMemoryDebug::dynamicSize("m_Samples", m_Samples, mem); + } + + //! Get the memory used by this gatherer. + std::size_t memoryUsage() const { + return sizeof(*this) + core::CMemory::dynamicSize(m_SampleStats) + core::CMemory::dynamicSize(m_BucketStats) + + core::CMemory::dynamicSize(m_InfluencerBucketStats) + core::CMemory::dynamicSize(m_Samples); + } + + //! Print this gatherer for debug. + std::string print() const { + std::ostringstream result; + result << m_Classifier.isInteger() << ' ' << m_Classifier.isNonNegative() << ' ' << m_BucketStats.print() << ' ' + << m_SampleStats.print() << ' ' << core::CContainerPrinter::print(m_Samples) << ' ' + << core::CContainerPrinter::print(m_InfluencerBucketStats); + return result.str(); + } + +private: + static const std::string MAP_KEY_TAG; + static const std::string MAP_VALUE_TAG; + +private: + //! \brief Manages persistence of bucket statistics. + struct SStatSerializer { + void operator()(const TMetricPartialStatistic& stat, core::CStatePersistInserter& inserter) const { stat.persist(inserter); } + + bool operator()(TMetricPartialStatistic& stat, core::CStateRestoreTraverser& traverser) const { return stat.restore(traverser); } + }; + using TStatBucketQueueSerializer = typename TStatBucketQueue::template CSerializer; + + //! \brief Manages persistence of influence bucket statistics. + class CStoredStringPtrStatUMapSerializer { + public: + CStoredStringPtrStatUMapSerializer(std::size_t dimension) + : m_Initial(CMetricStatisticWrappers::template make(dimension)) {} + + void operator()(const TStoredStringPtrStatUMap& map, core::CStatePersistInserter& inserter) const { + using TStatCRef = boost::reference_wrapper; + using TStrCRefStatCRefPr = std::pair; + using TStrCRefStatCRefPrVec = std::vector; + TStrCRefStatCRefPrVec ordered; + ordered.reserve(map.size()); + for (const auto& stat : map) { + ordered.emplace_back(TStrCRef(*stat.first), TStatCRef(stat.second)); + } + std::sort(ordered.begin(), ordered.end(), maths::COrderings::SFirstLess()); + for (const auto& stat : ordered) { + inserter.insertValue(MAP_KEY_TAG, stat.first); + CMetricStatisticWrappers::persist(stat.second.get(), MAP_VALUE_TAG, inserter); + } } - //! Print this gatherer for debug. - std::string print() const - { - std::ostringstream result; - result << m_Classifier.isInteger() - << ' ' << m_Classifier.isNonNegative() - << ' ' << m_BucketStats.print() - << ' ' << m_SampleStats.print() - << ' ' << core::CContainerPrinter::print(m_Samples) - << ' ' << core::CContainerPrinter::print(m_InfluencerBucketStats); - return result.str(); + bool operator()(TStoredStringPtrStatUMap& map, core::CStateRestoreTraverser& traverser) const { + std::string key; + do { + const std::string& name = traverser.name(); + RESTORE_NO_ERROR(MAP_KEY_TAG, key = traverser.value()) + RESTORE(MAP_VALUE_TAG, + CMetricStatisticWrappers::restore(traverser, + map.insert({CStringStore::influencers().get(key), m_Initial}).first->second)) + } while (traverser.next()); + return true; } private: - static const std::string MAP_KEY_TAG; - static const std::string MAP_VALUE_TAG; + STATISTIC m_Initial; + }; + using TStoredStringPtrStatUMapBucketQueueSerializer = + typename TStoredStringPtrStatUMapBucketQueue::template CSerializer; - private: - //! \brief Manages persistence of bucket statistics. - struct SStatSerializer - { - void operator()(const TMetricPartialStatistic &stat, - core::CStatePersistInserter &inserter) const - { - stat.persist(inserter); - } +private: + //! The dimension of the statistic being gathered. + std::size_t m_Dimension; - bool operator()(TMetricPartialStatistic &stat, - core::CStateRestoreTraverser &traverser) const - { - return stat.restore(traverser); - } - }; - using TStatBucketQueueSerializer = typename TStatBucketQueue::template CSerializer; - - //! \brief Manages persistence of influence bucket statistics. - class CStoredStringPtrStatUMapSerializer - { - public: - CStoredStringPtrStatUMapSerializer(std::size_t dimension) : - m_Initial(CMetricStatisticWrappers::template make(dimension)) - {} - - void operator()(const TStoredStringPtrStatUMap &map, core::CStatePersistInserter &inserter) const - { - using TStatCRef = boost::reference_wrapper; - using TStrCRefStatCRefPr = std::pair; - using TStrCRefStatCRefPrVec = std::vector; - TStrCRefStatCRefPrVec ordered; - ordered.reserve(map.size()); - for (const auto &stat : map) - { - ordered.emplace_back(TStrCRef(*stat.first), TStatCRef(stat.second)); - } - std::sort(ordered.begin(), ordered.end(), maths::COrderings::SFirstLess()); - for (const auto &stat : ordered) - { - inserter.insertValue(MAP_KEY_TAG, stat.first); - CMetricStatisticWrappers::persist(stat.second.get(), MAP_VALUE_TAG, inserter); - } - } + //! Classifies the sampled statistics. + CDataClassifier m_Classifier; - bool operator()(TStoredStringPtrStatUMap &map, core::CStateRestoreTraverser &traverser) const - { - std::string key; - do - { - const std::string &name = traverser.name(); - RESTORE_NO_ERROR(MAP_KEY_TAG, key = traverser.value()) - RESTORE(MAP_VALUE_TAG, CMetricStatisticWrappers::restore( - traverser, - map.insert({CStringStore::influencers().get(key), - m_Initial}).first->second)) - } - while (traverser.next()); - return true; - } + //! The queue holding the partial aggregate statistics within + //! latency window used for building samples. + TSampleQueue m_SampleStats; - private: - STATISTIC m_Initial; - }; - using TStoredStringPtrStatUMapBucketQueueSerializer = - typename TStoredStringPtrStatUMapBucketQueue::template CSerializer; + //! The aggregation of the measurements received for each + //! bucket within latency window. + TStatBucketQueue m_BucketStats; - private: - //! The dimension of the statistic being gathered. - std::size_t m_Dimension; + //! The aggregation of the measurements received for each + //! bucket and influencing field within latency window. + TStoredStringPtrStatUMapBucketQueueVec m_InfluencerBucketStats; - //! Classifies the sampled statistics. - CDataClassifier m_Classifier; - - //! The queue holding the partial aggregate statistics within - //! latency window used for building samples. - TSampleQueue m_SampleStats; - - //! The aggregation of the measurements received for each - //! bucket within latency window. - TStatBucketQueue m_BucketStats; - - //! The aggregation of the measurements received for each - //! bucket and influencing field within latency window. - TStoredStringPtrStatUMapBucketQueueVec m_InfluencerBucketStats; - - //! The samples of the aggregate statistic in the current - //! bucketing interval. - TSampleVec m_Samples; + //! The samples of the aggregate statistic in the current + //! bucketing interval. + TSampleVec m_Samples; }; template @@ -478,15 +390,11 @@ const std::string CSampleGatherer::MAP_KEY_TAG("a"); template const std::string CSampleGatherer::MAP_VALUE_TAG("b"); - //! Overload print operator for feature data. MODEL_EXPORT -inline std::ostream &operator<<(std::ostream &o, - const SMetricFeatureData &fd) -{ +inline std::ostream& operator<<(std::ostream& o, const SMetricFeatureData& fd) { return o << fd.print(); } - } } diff --git a/include/model/CSampleQueue.h b/include/model/CSampleQueue.h index a448ca8699..b3ce95d56b 100644 --- a/include/model/CSampleQueue.h +++ b/include/model/CSampleQueue.h @@ -9,14 +9,13 @@ #include #include -#include -#include -#include -#include #include +#include +#include #include #include #include +#include #include #include @@ -34,10 +33,8 @@ #include #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { //! \brief A queue that manages the sampling of statistics //! @@ -53,561 +50,398 @@ namespace model //! The template STATISTIC has to comply with the requirements of //! the CMetricPartialStatistic template. template -class CSampleQueue -{ - private: - using TDouble1Vec = core::CSmallVector; - using TMetricPartialStatistic = CMetricPartialStatistic; - - private: - //! A struct grouping together the data that form a sub-sample. - //! A sub-sample is comprised of a partial statistic and a start - //! and an end time marking the interval range for the sub-sample. - struct SSubSample - { - static const std::string SAMPLE_START_TAG; - static const std::string SAMPLE_END_TAG; - static const std::string SAMPLE_TAG; - - SSubSample(std::size_t dimension, core_t::TTime time) : - s_Statistic(dimension), - s_Start(time), - s_End(time) - { +class CSampleQueue { +private: + using TDouble1Vec = core::CSmallVector; + using TMetricPartialStatistic = CMetricPartialStatistic; + +private: + //! A struct grouping together the data that form a sub-sample. + //! A sub-sample is comprised of a partial statistic and a start + //! and an end time marking the interval range for the sub-sample. + struct SSubSample { + static const std::string SAMPLE_START_TAG; + static const std::string SAMPLE_END_TAG; + static const std::string SAMPLE_TAG; + + SSubSample(std::size_t dimension, core_t::TTime time) : s_Statistic(dimension), s_Start(time), s_End(time) {} + + void add(const TDouble1Vec& measurement, core_t::TTime time, unsigned int count) { + s_Statistic.add(measurement, time, count); + // Using explicit tests instead of std::min and std::max to work + // around g++ 4.1 optimiser bug + if (time < s_Start) { + s_Start = time; } - - void add(const TDouble1Vec &measurement, - core_t::TTime time, - unsigned int count) - { - s_Statistic.add(measurement, time, count); - // Using explicit tests instead of std::min and std::max to work - // around g++ 4.1 optimiser bug - if (time < s_Start) - { - s_Start = time; - } - if (time > s_End) - { - s_End = time; - } + if (time > s_End) { + s_End = time; } + } - //! Check if \p time overlaps the interval or doesn't extend - //! it to be more than \p targetSpan long. - bool isClose(core_t::TTime time, - core_t::TTime targetSpan) const - { - if (time > s_End) - { - return time < s_Start + targetSpan; - } - if (time >= s_Start) - { - return true; - } - return time > s_End - targetSpan; + //! Check if \p time overlaps the interval or doesn't extend + //! it to be more than \p targetSpan long. + bool isClose(core_t::TTime time, core_t::TTime targetSpan) const { + if (time > s_End) { + return time < s_Start + targetSpan; } - - // This assumes that buckets are aligned to n * bucketLength - bool isInSameBucket(core_t::TTime time, core_t::TTime bucketLength) const - { - core_t::TTime timeBucket = maths::CIntegerTools::floor(time, bucketLength); - core_t::TTime subSampleBucket = maths::CIntegerTools::floor(s_Start, bucketLength); - return timeBucket == subSampleBucket; + if (time >= s_Start) { + return true; } + return time > s_End - targetSpan; + } - //! Combine the statistic and construct the union interval. - const SSubSample &operator+=(const SSubSample &rhs) - { - s_Statistic += rhs.s_Statistic; - s_Start = std::min(s_Start, rhs.s_Start); - s_End = std::max(s_End, rhs.s_End); - return *this; - } + // This assumes that buckets are aligned to n * bucketLength + bool isInSameBucket(core_t::TTime time, core_t::TTime bucketLength) const { + core_t::TTime timeBucket = maths::CIntegerTools::floor(time, bucketLength); + core_t::TTime subSampleBucket = maths::CIntegerTools::floor(s_Start, bucketLength); + return timeBucket == subSampleBucket; + } - //! Persist to a state document. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const - { - inserter.insertLevel(SAMPLE_TAG, - boost::bind(&TMetricPartialStatistic::persist, - &s_Statistic, - _1)); - inserter.insertValue(SAMPLE_START_TAG, s_Start); - inserter.insertValue(SAMPLE_END_TAG, s_End); - } + //! Combine the statistic and construct the union interval. + const SSubSample& operator+=(const SSubSample& rhs) { + s_Statistic += rhs.s_Statistic; + s_Start = std::min(s_Start, rhs.s_Start); + s_End = std::max(s_End, rhs.s_End); + return *this; + } + + //! Persist to a state document. + void acceptPersistInserter(core::CStatePersistInserter& inserter) const { + inserter.insertLevel(SAMPLE_TAG, boost::bind(&TMetricPartialStatistic::persist, &s_Statistic, _1)); + inserter.insertValue(SAMPLE_START_TAG, s_Start); + inserter.insertValue(SAMPLE_END_TAG, s_End); + } - //! Restore from a state document. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) - { - do - { - const std::string &name = traverser.name(); - if (name == SAMPLE_TAG) - { - if (traverser.traverseSubLevel(boost::bind(&TMetricPartialStatistic::restore, - &s_Statistic, - _1)) == false) - { - LOG_ERROR("Invalid sample value"); - return false; - } + //! Restore from a state document. + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); + if (name == SAMPLE_TAG) { + if (traverser.traverseSubLevel(boost::bind(&TMetricPartialStatistic::restore, &s_Statistic, _1)) == false) { + LOG_ERROR("Invalid sample value"); + return false; } - else if (name == SAMPLE_START_TAG) - { - if (core::CStringUtils::stringToType(traverser.value(), s_Start) == false) - { - LOG_ERROR("Invalid attribute identifier in " << traverser.value()); - return false; - } + } else if (name == SAMPLE_START_TAG) { + if (core::CStringUtils::stringToType(traverser.value(), s_Start) == false) { + LOG_ERROR("Invalid attribute identifier in " << traverser.value()); + return false; } - else if (name == SAMPLE_END_TAG) - { - if (core::CStringUtils::stringToType(traverser.value(), s_End) == false) - { - LOG_ERROR("Invalid attribute identifier in " << traverser.value()); - return false; - } + } else if (name == SAMPLE_END_TAG) { + if (core::CStringUtils::stringToType(traverser.value(), s_End) == false) { + LOG_ERROR("Invalid attribute identifier in " << traverser.value()); + return false; } } - while (traverser.next()); - return true; - } - - //! Get a checksum of this sub-sample. - uint64_t checksum() const - { - uint64_t seed = maths::CChecksum::calculate(0, s_Statistic); - seed = maths::CChecksum::calculate(seed, s_Start); - return maths::CChecksum::calculate(seed, s_End); - } - - //! Debug the memory used by the sub-sample. - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const - { - mem->setName("SSubSample", sizeof(*this)); - core::CMemoryDebug::dynamicSize("s_Statistic", s_Statistic, mem); - } - - //! Get the memory used by the sub-sample. - std::size_t memoryUsage() const - { - return sizeof(*this) + core::CMemory::dynamicSize(s_Statistic); - } - - //! Print the sub-sample for debug. - std::string print() const - { - return "{[" + core::CStringUtils::typeToString(s_Start) + ", " - + core::CStringUtils::typeToString(s_End) + "] -> " - + s_Statistic.print() + "}"; - } - - TMetricPartialStatistic s_Statistic; - core_t::TTime s_Start; - core_t::TTime s_End; - }; - - public: - using TQueue = boost::circular_buffer; - using iterator = typename TQueue::iterator; - using reverse_iterator = typename TQueue::reverse_iterator; - using const_reverse_iterator = typename TQueue::const_reverse_iterator; - using TSampleVec = std::vector; - using TOptionalSubSample = boost::optional; - - public: - static const std::string SUB_SAMPLE_TAG; - - public: - //! Constructs a new queue. - //! - //! \param[in] dimension The dimension of the metric statistic. - //! \param[in] sampleCountFactor The queue attempts to keep the sub-samples - //! size to the current sample count divided by the sampleCountFactor. - //! \param[in] latencyBuckets The number of buckets that are in the latency window. - //! \param[in] growthFactor The factor with which the queue's size grows whenever - //! a new item is inserted and the queue is full. - //! \param[in] bucketLength The bucket length. - CSampleQueue(std::size_t dimension, - std::size_t sampleCountFactor, - std::size_t latencyBuckets, - double growthFactor, - core_t::TTime bucketLength) : - m_Dimension(dimension), - m_Queue(std::max(sampleCountFactor * latencyBuckets, std::size_t(1))), - m_SampleCountFactor(sampleCountFactor), - m_GrowthFactor(growthFactor), - m_BucketLength(bucketLength), - m_Latency(static_cast(latencyBuckets) * bucketLength) - { + } while (traverser.next()); + return true; } - //! Adds a measurement to the queue. - //! - //! \param[in] time The time of the measurement. - //! \param[in] measurement The value of the measurement. - //! \param[in] count The count of the measurement. - //! \param[in] sampleCount The target sample count. - void add(core_t::TTime time, - const TDouble1Vec &measurement, - unsigned int count, - unsigned int sampleCount) - { - if (m_Queue.empty()) - { - this->pushFrontNewSubSample(measurement, time, count); - } - else if (time >= m_Queue[0].s_Start) - { - this->addAfterLatestStartTime(measurement, time, count, sampleCount); - } - else - { - this->addHistorical(measurement, time, count, sampleCount); - } + //! Get a checksum of this sub-sample. + uint64_t checksum() const { + uint64_t seed = maths::CChecksum::calculate(0, s_Statistic); + seed = maths::CChecksum::calculate(seed, s_Start); + return maths::CChecksum::calculate(seed, s_End); } - //! Can the queue possible create samples? - bool canSample(core_t::TTime bucketStart) const - { - core_t::TTime bucketEnd = bucketStart + m_BucketLength - 1; - return m_Queue.empty() ? false : m_Queue.back().s_End <= bucketEnd; + //! Debug the memory used by the sub-sample. + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { + mem->setName("SSubSample", sizeof(*this)); + core::CMemoryDebug::dynamicSize("s_Statistic", s_Statistic, mem); } - //! Combines as many sub-samples as possible in order to create samples. - //! - //! \param[in] bucketStart The start time of the bucket to sample. - //! \param[in] sampleCount The target sample count. - //! \param[in] feature The feature to which the measurements correspond. - //! \param[out] samples The newly created samples. - void sample(core_t::TTime bucketStart, - unsigned int sampleCount, - model_t::EFeature feature, - TSampleVec &samples) - { - core_t::TTime latencyCutoff = bucketStart + m_BucketLength - 1; - TOptionalSubSample combinedSubSample; - - while (m_Queue.empty() == false && m_Queue.back().s_End <= latencyCutoff) - { - if (combinedSubSample) - { - *combinedSubSample += m_Queue.back(); - } - else - { - combinedSubSample = TOptionalSubSample(m_Queue.back()); - } - - m_Queue.pop_back(); - - double count = combinedSubSample->s_Statistic.count(); - double countIncludingNext = (m_Queue.empty()) ? count - : count + m_Queue.back().s_Statistic.count(); - double countRatio = sampleCount / count; - double countRatioIncludingNext = sampleCount / countIncludingNext; - - if (countIncludingNext >= sampleCount - && (std::abs(1.0 - countRatio) <= std::abs(1.0 - countRatioIncludingNext))) - { - TDouble1Vec sample = combinedSubSample->s_Statistic.value(); - core_t::TTime sampleTime = combinedSubSample->s_Statistic.time(); - double vs = model_t::varianceScale(feature, sampleCount, count); - samples.push_back(CSample(sampleTime, sample, vs, count)); - combinedSubSample = TOptionalSubSample(); - } - } + //! Get the memory used by the sub-sample. + std::size_t memoryUsage() const { return sizeof(*this) + core::CMemory::dynamicSize(s_Statistic); } - if (combinedSubSample) - { - m_Queue.push_back(*combinedSubSample); - } + //! Print the sub-sample for debug. + std::string print() const { + return "{[" + core::CStringUtils::typeToString(s_Start) + ", " + core::CStringUtils::typeToString(s_End) + "] -> " + + s_Statistic.print() + "}"; } - void resetBucket(core_t::TTime bucketStart) - { - // The queue is ordered in descending sub-sample start time. - - iterator firstEarlierThanBucket = std::upper_bound(m_Queue.begin(), - m_Queue.end(), - bucketStart, - timeLater); - - // This is equivalent to lower_bound(., ., bucketStart + m_BucketLength - 1, .); - iterator latestWithinBucket = std::upper_bound(m_Queue.begin(), - m_Queue.end(), - bucketStart + m_BucketLength, - timeLater); - - m_Queue.erase(latestWithinBucket, firstEarlierThanBucket); + TMetricPartialStatistic s_Statistic; + core_t::TTime s_Start; + core_t::TTime s_End; + }; + +public: + using TQueue = boost::circular_buffer; + using iterator = typename TQueue::iterator; + using reverse_iterator = typename TQueue::reverse_iterator; + using const_reverse_iterator = typename TQueue::const_reverse_iterator; + using TSampleVec = std::vector; + using TOptionalSubSample = boost::optional; + +public: + static const std::string SUB_SAMPLE_TAG; + +public: + //! Constructs a new queue. + //! + //! \param[in] dimension The dimension of the metric statistic. + //! \param[in] sampleCountFactor The queue attempts to keep the sub-samples + //! size to the current sample count divided by the sampleCountFactor. + //! \param[in] latencyBuckets The number of buckets that are in the latency window. + //! \param[in] growthFactor The factor with which the queue's size grows whenever + //! a new item is inserted and the queue is full. + //! \param[in] bucketLength The bucket length. + CSampleQueue(std::size_t dimension, + std::size_t sampleCountFactor, + std::size_t latencyBuckets, + double growthFactor, + core_t::TTime bucketLength) + : m_Dimension(dimension), + m_Queue(std::max(sampleCountFactor * latencyBuckets, std::size_t(1))), + m_SampleCountFactor(sampleCountFactor), + m_GrowthFactor(growthFactor), + m_BucketLength(bucketLength), + m_Latency(static_cast(latencyBuckets) * bucketLength) {} + + //! Adds a measurement to the queue. + //! + //! \param[in] time The time of the measurement. + //! \param[in] measurement The value of the measurement. + //! \param[in] count The count of the measurement. + //! \param[in] sampleCount The target sample count. + void add(core_t::TTime time, const TDouble1Vec& measurement, unsigned int count, unsigned int sampleCount) { + if (m_Queue.empty()) { + this->pushFrontNewSubSample(measurement, time, count); + } else if (time >= m_Queue[0].s_Start) { + this->addAfterLatestStartTime(measurement, time, count, sampleCount); + } else { + this->addHistorical(measurement, time, count, sampleCount); } + } + + //! Can the queue possible create samples? + bool canSample(core_t::TTime bucketStart) const { + core_t::TTime bucketEnd = bucketStart + m_BucketLength - 1; + return m_Queue.empty() ? false : m_Queue.back().s_End <= bucketEnd; + } + + //! Combines as many sub-samples as possible in order to create samples. + //! + //! \param[in] bucketStart The start time of the bucket to sample. + //! \param[in] sampleCount The target sample count. + //! \param[in] feature The feature to which the measurements correspond. + //! \param[out] samples The newly created samples. + void sample(core_t::TTime bucketStart, unsigned int sampleCount, model_t::EFeature feature, TSampleVec& samples) { + core_t::TTime latencyCutoff = bucketStart + m_BucketLength - 1; + TOptionalSubSample combinedSubSample; + + while (m_Queue.empty() == false && m_Queue.back().s_End <= latencyCutoff) { + if (combinedSubSample) { + *combinedSubSample += m_Queue.back(); + } else { + combinedSubSample = TOptionalSubSample(m_Queue.back()); + } - //! Returns the item in the queue at position \p index. - const SSubSample &operator[](std::size_t index) const - { - return m_Queue[index]; - } + m_Queue.pop_back(); - //! Returns the size of the queue. - std::size_t size() const - { - return m_Queue.size(); - } + double count = combinedSubSample->s_Statistic.count(); + double countIncludingNext = (m_Queue.empty()) ? count : count + m_Queue.back().s_Statistic.count(); + double countRatio = sampleCount / count; + double countRatioIncludingNext = sampleCount / countIncludingNext; - //! Returns the capacity of the queue. - std::size_t capacity() const - { - return m_Queue.capacity(); + if (countIncludingNext >= sampleCount && (std::abs(1.0 - countRatio) <= std::abs(1.0 - countRatioIncludingNext))) { + TDouble1Vec sample = combinedSubSample->s_Statistic.value(); + core_t::TTime sampleTime = combinedSubSample->s_Statistic.time(); + double vs = model_t::varianceScale(feature, sampleCount, count); + samples.push_back(CSample(sampleTime, sample, vs, count)); + combinedSubSample = TOptionalSubSample(); + } } - //! Is the queue empty? - bool empty() const - { - return m_Queue.empty(); + if (combinedSubSample) { + m_Queue.push_back(*combinedSubSample); } + } - core_t::TTime latestEnd() const - { - return m_Queue.empty() ? 0 : m_Queue.front().s_End; - } + void resetBucket(core_t::TTime bucketStart) { + // The queue is ordered in descending sub-sample start time. - //! \name Persistence - //@{ - //! Persist state by passing information to the supplied inserter. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const - { - for (const_reverse_iterator itr = m_Queue.rbegin(); itr != m_Queue.rend(); ++itr) - { - inserter.insertLevel(SUB_SAMPLE_TAG, - boost::bind(&SSubSample::acceptPersistInserter, - *itr, - _1)); - } - } + iterator firstEarlierThanBucket = std::upper_bound(m_Queue.begin(), m_Queue.end(), bucketStart, timeLater); - //! Restore by getting information from the state document traverser. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) - { - do - { - const std::string &name = traverser.name(); - if (name == SUB_SAMPLE_TAG) - { - SSubSample subSample(m_Dimension, 0); - if (traverser.traverseSubLevel(boost::bind(&SSubSample::acceptRestoreTraverser, - &subSample, - _1)) == false) - { - LOG_ERROR("Invalid sub-sample in " << traverser.value()); - return false; - } - this->resizeIfFull(); - m_Queue.push_front(subSample); - } - } - while (traverser.next()); + // This is equivalent to lower_bound(., ., bucketStart + m_BucketLength - 1, .); + iterator latestWithinBucket = std::upper_bound(m_Queue.begin(), m_Queue.end(), bucketStart + m_BucketLength, timeLater); - return true; - } - //@} + m_Queue.erase(latestWithinBucket, firstEarlierThanBucket); + } - //! Returns the checksum of the queue. - uint64_t checksum() const - { - return maths::CChecksum::calculate(0, m_Queue); - } + //! Returns the item in the queue at position \p index. + const SSubSample& operator[](std::size_t index) const { return m_Queue[index]; } - //! Debug the memory used by the queue. - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const - { - mem->setName("CSampleQueue", sizeof(*this)); - core::CMemoryDebug::dynamicSize("m_Queue", m_Queue, mem); - } + //! Returns the size of the queue. + std::size_t size() const { return m_Queue.size(); } - //! Get the memory used by the queue. - std::size_t memoryUsage() const - { - return sizeof(*this) + core::CMemory::dynamicSize(m_Queue); - } + //! Returns the capacity of the queue. + std::size_t capacity() const { return m_Queue.capacity(); } - //! Prints the contents of the queue. - std::string print() const - { - return core::CContainerPrinter::print(m_Queue); - } + //! Is the queue empty? + bool empty() const { return m_Queue.empty(); } - private: - void pushFrontNewSubSample(const TDouble1Vec &measurement, - core_t::TTime time, - unsigned int count) - { - this->resizeIfFull(); - SSubSample newSubSample(m_Dimension, time); - newSubSample.s_Statistic.add(measurement, time, count); - m_Queue.push_front(newSubSample); - } + core_t::TTime latestEnd() const { return m_Queue.empty() ? 0 : m_Queue.front().s_End; } - void pushBackNewSubSample(const TDouble1Vec &measurement, - core_t::TTime time, - unsigned int count) - { - this->resizeIfFull(); - SSubSample newSubSample(m_Dimension, time); - newSubSample.s_Statistic.add(measurement, time, count); - m_Queue.push_back(newSubSample); + //! \name Persistence + //@{ + //! Persist state by passing information to the supplied inserter. + void acceptPersistInserter(core::CStatePersistInserter& inserter) const { + for (const_reverse_iterator itr = m_Queue.rbegin(); itr != m_Queue.rend(); ++itr) { + inserter.insertLevel(SUB_SAMPLE_TAG, boost::bind(&SSubSample::acceptPersistInserter, *itr, _1)); } - - void insertNewSubSample(iterator pos, - const TDouble1Vec &measurement, - core_t::TTime time, - unsigned int count) - { - this->resizeIfFull(); - SSubSample newSubSample(m_Dimension, time); - newSubSample.s_Statistic.add(measurement, time, count); - m_Queue.insert(pos, newSubSample); + } + + //! Restore by getting information from the state document traverser. + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); + if (name == SUB_SAMPLE_TAG) { + SSubSample subSample(m_Dimension, 0); + if (traverser.traverseSubLevel(boost::bind(&SSubSample::acceptRestoreTraverser, &subSample, _1)) == false) { + LOG_ERROR("Invalid sub-sample in " << traverser.value()); + return false; + } + this->resizeIfFull(); + m_Queue.push_front(subSample); + } + } while (traverser.next()); + + return true; + } + //@} + + //! Returns the checksum of the queue. + uint64_t checksum() const { return maths::CChecksum::calculate(0, m_Queue); } + + //! Debug the memory used by the queue. + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { + mem->setName("CSampleQueue", sizeof(*this)); + core::CMemoryDebug::dynamicSize("m_Queue", m_Queue, mem); + } + + //! Get the memory used by the queue. + std::size_t memoryUsage() const { return sizeof(*this) + core::CMemory::dynamicSize(m_Queue); } + + //! Prints the contents of the queue. + std::string print() const { return core::CContainerPrinter::print(m_Queue); } + +private: + void pushFrontNewSubSample(const TDouble1Vec& measurement, core_t::TTime time, unsigned int count) { + this->resizeIfFull(); + SSubSample newSubSample(m_Dimension, time); + newSubSample.s_Statistic.add(measurement, time, count); + m_Queue.push_front(newSubSample); + } + + void pushBackNewSubSample(const TDouble1Vec& measurement, core_t::TTime time, unsigned int count) { + this->resizeIfFull(); + SSubSample newSubSample(m_Dimension, time); + newSubSample.s_Statistic.add(measurement, time, count); + m_Queue.push_back(newSubSample); + } + + void insertNewSubSample(iterator pos, const TDouble1Vec& measurement, core_t::TTime time, unsigned int count) { + this->resizeIfFull(); + SSubSample newSubSample(m_Dimension, time); + newSubSample.s_Statistic.add(measurement, time, count); + m_Queue.insert(pos, newSubSample); + } + + void resizeIfFull() { + if (m_Queue.full()) { + std::size_t currentSize = m_Queue.size(); + std::size_t newSize = static_cast(static_cast(currentSize) * (1.0 + m_GrowthFactor)); + m_Queue.set_capacity(std::max(newSize, currentSize + 1)); } + } - void resizeIfFull() - { - if (m_Queue.full()) - { - std::size_t currentSize = m_Queue.size(); - std::size_t newSize = static_cast( - static_cast(currentSize) - * (1.0 + m_GrowthFactor)); - m_Queue.set_capacity(std::max(newSize, currentSize + 1)); - } + void addAfterLatestStartTime(const TDouble1Vec& measurement, core_t::TTime time, unsigned int count, unsigned int sampleCount) { + if (time >= m_Queue[0].s_End && this->shouldCreateNewSubSampleAfterLatest(time, sampleCount)) { + this->pushFrontNewSubSample(measurement, time, count); + } else { + m_Queue[0].add(measurement, time, count); } + } - void addAfterLatestStartTime(const TDouble1Vec &measurement, - core_t::TTime time, - unsigned int count, - unsigned int sampleCount) - { - if (time >= m_Queue[0].s_End && - this->shouldCreateNewSubSampleAfterLatest(time, sampleCount)) - { - this->pushFrontNewSubSample(measurement, time, count); - } - else - { - m_Queue[0].add(measurement, time, count); - } + bool shouldCreateNewSubSampleAfterLatest(core_t::TTime time, unsigned int sampleCount) { + if (m_Queue[0].s_Statistic.count() >= static_cast(this->targetSubSampleCount(sampleCount))) { + return true; } - bool shouldCreateNewSubSampleAfterLatest(core_t::TTime time, - unsigned int sampleCount) - { - if (m_Queue[0].s_Statistic.count() >= static_cast(this->targetSubSampleCount(sampleCount))) - { + // If latency is non-zero, we also want to check whether the new measurement + // is too far from the latest sub-sample or whether they belong in different buckets. + if (m_Latency > 0) { + if (!m_Queue[0].isClose(time, this->targetSubSampleSpan()) || !m_Queue[0].isInSameBucket(time, m_BucketLength)) { return true; } - - // If latency is non-zero, we also want to check whether the new measurement - // is too far from the latest sub-sample or whether they belong in different buckets. - if (m_Latency > 0) - { - if (!m_Queue[0].isClose(time, this->targetSubSampleSpan()) - || !m_Queue[0].isInSameBucket(time, m_BucketLength)) - { - return true; - } - } - return false; } + return false; + } - core_t::TTime targetSubSampleSpan() const - { - return (m_BucketLength + static_cast(m_SampleCountFactor) - 1) - / static_cast(m_SampleCountFactor); - } + core_t::TTime targetSubSampleSpan() const { + return (m_BucketLength + static_cast(m_SampleCountFactor) - 1) / static_cast(m_SampleCountFactor); + } - std::size_t targetSubSampleCount(unsigned int sampleCount) const - { - return static_cast(sampleCount) / m_SampleCountFactor; - } + std::size_t targetSubSampleCount(unsigned int sampleCount) const { return static_cast(sampleCount) / m_SampleCountFactor; } - void addHistorical(const TDouble1Vec &measurement, - core_t::TTime time, - unsigned int count, - unsigned int sampleCount) - { - // We have to resize before we do the search of the upper bound. Otherwise, - // a later resize will invalidate the upper bound iterator. - this->resizeIfFull(); - - reverse_iterator upperBound = - std::upper_bound(m_Queue.rbegin(), m_Queue.rend(), time, timeEarlier); - core_t::TTime targetSubSampleSpan = this->targetSubSampleSpan(); - - if (upperBound == m_Queue.rbegin()) - { - if ((upperBound->s_Statistic.count() >= static_cast(this->targetSubSampleCount(sampleCount))) - || !upperBound->isClose(time, targetSubSampleSpan) - || !(*upperBound).isInSameBucket(time, m_BucketLength)) - { - this->pushBackNewSubSample(measurement, time, count); - } - else - { - upperBound->add(measurement, time, count); - } - return; - } + void addHistorical(const TDouble1Vec& measurement, core_t::TTime time, unsigned int count, unsigned int sampleCount) { + // We have to resize before we do the search of the upper bound. Otherwise, + // a later resize will invalidate the upper bound iterator. + this->resizeIfFull(); - SSubSample &left = *(upperBound - 1); - SSubSample &right = *upperBound; - if (time <= left.s_End) - { - left.add(measurement, time, count); - return; - } - bool sameBucketWithLeft = left.isInSameBucket(time, m_BucketLength); - bool sameBucketWithRight = right.isInSameBucket(time, m_BucketLength); - std::size_t spaceLimit = this->targetSubSampleCount(sampleCount); - bool leftHasSpace = static_cast(left.s_Statistic.count()) < spaceLimit; - bool rightHasSpace = static_cast(right.s_Statistic.count()) < spaceLimit; - core_t::TTime leftDistance = time - left.s_End; - core_t::TTime rightDistance = right.s_Start - time; - SSubSample &candidate = maths::COrderings::lexicographical_compare( - -static_cast(sameBucketWithLeft), - -static_cast(leftHasSpace), - leftDistance, - -static_cast(sameBucketWithRight), - -static_cast(rightHasSpace), - rightDistance) ? left : right; - - if (candidate.isInSameBucket(time, m_BucketLength) && - (candidate.isClose(time, targetSubSampleSpan) || - right.s_Start <= left.s_End + targetSubSampleSpan)) - { - candidate.add(measurement, time, count); - return; + reverse_iterator upperBound = std::upper_bound(m_Queue.rbegin(), m_Queue.rend(), time, timeEarlier); + core_t::TTime targetSubSampleSpan = this->targetSubSampleSpan(); + + if (upperBound == m_Queue.rbegin()) { + if ((upperBound->s_Statistic.count() >= static_cast(this->targetSubSampleCount(sampleCount))) || + !upperBound->isClose(time, targetSubSampleSpan) || !(*upperBound).isInSameBucket(time, m_BucketLength)) { + this->pushBackNewSubSample(measurement, time, count); + } else { + upperBound->add(measurement, time, count); } - this->insertNewSubSample(upperBound.base(), measurement, time, count); + return; } - static bool timeEarlier(core_t::TTime time, - const SSubSample &subSample) - { - return time < subSample.s_Start; + SSubSample& left = *(upperBound - 1); + SSubSample& right = *upperBound; + if (time <= left.s_End) { + left.add(measurement, time, count); + return; } - - static bool timeLater(core_t::TTime time, - const SSubSample &subSample) - { - return time > subSample.s_Start; + bool sameBucketWithLeft = left.isInSameBucket(time, m_BucketLength); + bool sameBucketWithRight = right.isInSameBucket(time, m_BucketLength); + std::size_t spaceLimit = this->targetSubSampleCount(sampleCount); + bool leftHasSpace = static_cast(left.s_Statistic.count()) < spaceLimit; + bool rightHasSpace = static_cast(right.s_Statistic.count()) < spaceLimit; + core_t::TTime leftDistance = time - left.s_End; + core_t::TTime rightDistance = right.s_Start - time; + SSubSample& candidate = maths::COrderings::lexicographical_compare(-static_cast(sameBucketWithLeft), + -static_cast(leftHasSpace), + leftDistance, + -static_cast(sameBucketWithRight), + -static_cast(rightHasSpace), + rightDistance) + ? left + : right; + + if (candidate.isInSameBucket(time, m_BucketLength) && + (candidate.isClose(time, targetSubSampleSpan) || right.s_Start <= left.s_End + targetSubSampleSpan)) { + candidate.add(measurement, time, count); + return; } + this->insertNewSubSample(upperBound.base(), measurement, time, count); + } + + static bool timeEarlier(core_t::TTime time, const SSubSample& subSample) { return time < subSample.s_Start; } - private: - std::size_t m_Dimension; - TQueue m_Queue; - std::size_t m_SampleCountFactor; - double m_GrowthFactor; - core_t::TTime m_BucketLength; - core_t::TTime m_Latency; + static bool timeLater(core_t::TTime time, const SSubSample& subSample) { return time > subSample.s_Start; } + +private: + std::size_t m_Dimension; + TQueue m_Queue; + std::size_t m_SampleCountFactor; + double m_GrowthFactor; + core_t::TTime m_BucketLength; + core_t::TTime m_Latency; }; template @@ -618,7 +452,6 @@ template const std::string CSampleQueue::SSubSample::SAMPLE_TAG("c"); template const std::string CSampleQueue::SUB_SAMPLE_TAG("a"); - } } diff --git a/include/model/CSearchKey.h b/include/model/CSearchKey.h index 21f466866e..b584ff31d2 100644 --- a/include/model/CSearchKey.h +++ b/include/model/CSearchKey.h @@ -23,16 +23,12 @@ #include - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace model -{ +namespace model { //! \brief //! Associative store key for simple searches. @@ -72,212 +68,186 @@ namespace model //! class is intended purely to store the information and be used as //! a key in associative containers. //! -class MODEL_EXPORT CSearchKey -{ - public: - using TStrVec = std::vector; - using TStoredStringPtrVec = std::vector; - - //! The type of a search key which mixes in the partition field - //! value. - using TStrKeyPr = std::pair; - - //! The type of a constant reference string search key pair. - //! - //! \note This is intended for map lookups when one doesn't want - //! to copy the strings. - using TStrCRefKeyCRefPr = std::pair, - boost::reference_wrapper>; - - public: - //! If the "by" field name is "count" then the key represents - //! a simple count detector - static const std::string COUNT_NAME; - - //! Character used to delimit the "cue" representation of the key - static const char CUE_DELIMITER; - - //! An empty string. - static const std::string EMPTY_STRING; - - public: - //! Construct with an over field and a partitioning field - //! - //! \note Use the pass-by-value-and-swap trick to improve performance - //! when the arguments are temporaries. - explicit CSearchKey(int identifier = 0, - function_t::EFunction function = function_t::E_IndividualCount, - bool useNull = false, - model_t::EExcludeFrequent excludeFrequent = model_t::E_XF_None, - std::string fieldName = EMPTY_STRING, - std::string byFieldName = EMPTY_STRING, - std::string overFieldName = EMPTY_STRING, - std::string partitionFieldName = EMPTY_STRING, - const TStrVec &influenceFieldNames = TStrVec()); - - //! Create the key from part of an state document. - //! - //! \param[in,out] traverser A state document traverser. - //! \param[out] successful Set to true if the state could be fully - //! deserialised and false otherwise. - CSearchKey(core::CStateRestoreTraverser &traverser, bool &successful); - - private: - //! Initialise by traversing a state document. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); - - public: - //! Persist state by passing information to the supplied inserter - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; - - //! Efficiently swap the contents of two objects of this class. - void swap(CSearchKey &other); - - //! Check if this and \p rhs are equal. - bool operator==(const CSearchKey &rhs) const; - - //! Check if this is less than \p rhs. - bool operator<(const CSearchKey &rhs) const; - - //! Get an identifier for this search. - int identifier() const; - - //! Get the unique simple counting search key. - //! - //! Definition: the function is individual count and the "by" - //! field name is "count". - static const CSearchKey &simpleCountKey(); - - //! Does this key represent a simple counting search? - bool isSimpleCount() const; - - //! Do the function and by field name identify a simple - //! counting search. - static bool isSimpleCount(function_t::EFunction function, - const std::string &byFieldName); - - //! Is the function type for use with the individual models? - bool isMetric() const; - - //! Is the function type for use with the population models? - bool isPopulation() const; - - //! Create a "cue" suitable to be used in persisted state. - std::string toCue() const; - - //! Debug representation. Note that operator<<() is more efficient than - //! generating this debug string and immediately outputting it to a - //! stream. - std::string debug() const; - - //! Get the function. - function_t::EFunction function() const; - - //! Get whether to use null field values. - bool useNull() const; - - //! Get the ExcludeFrequent setting - model_t::EExcludeFrequent excludeFrequent() const; - - //! Check if there is a field called \p name. - bool hasField(const std::string &name) const; - - //! Get the value field name. - const std::string &fieldName() const; - - //! Get the by field name. - const std::string &byFieldName() const; +class MODEL_EXPORT CSearchKey { +public: + using TStrVec = std::vector; + using TStoredStringPtrVec = std::vector; + + //! The type of a search key which mixes in the partition field + //! value. + using TStrKeyPr = std::pair; + + //! The type of a constant reference string search key pair. + //! + //! \note This is intended for map lookups when one doesn't want + //! to copy the strings. + using TStrCRefKeyCRefPr = std::pair, boost::reference_wrapper>; + +public: + //! If the "by" field name is "count" then the key represents + //! a simple count detector + static const std::string COUNT_NAME; + + //! Character used to delimit the "cue" representation of the key + static const char CUE_DELIMITER; + + //! An empty string. + static const std::string EMPTY_STRING; + +public: + //! Construct with an over field and a partitioning field + //! + //! \note Use the pass-by-value-and-swap trick to improve performance + //! when the arguments are temporaries. + explicit CSearchKey(int identifier = 0, + function_t::EFunction function = function_t::E_IndividualCount, + bool useNull = false, + model_t::EExcludeFrequent excludeFrequent = model_t::E_XF_None, + std::string fieldName = EMPTY_STRING, + std::string byFieldName = EMPTY_STRING, + std::string overFieldName = EMPTY_STRING, + std::string partitionFieldName = EMPTY_STRING, + const TStrVec& influenceFieldNames = TStrVec()); + + //! Create the key from part of an state document. + //! + //! \param[in,out] traverser A state document traverser. + //! \param[out] successful Set to true if the state could be fully + //! deserialised and false otherwise. + CSearchKey(core::CStateRestoreTraverser& traverser, bool& successful); + +private: + //! Initialise by traversing a state document. + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); + +public: + //! Persist state by passing information to the supplied inserter + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Efficiently swap the contents of two objects of this class. + void swap(CSearchKey& other); + + //! Check if this and \p rhs are equal. + bool operator==(const CSearchKey& rhs) const; + + //! Check if this is less than \p rhs. + bool operator<(const CSearchKey& rhs) const; + + //! Get an identifier for this search. + int identifier() const; + + //! Get the unique simple counting search key. + //! + //! Definition: the function is individual count and the "by" + //! field name is "count". + static const CSearchKey& simpleCountKey(); + + //! Does this key represent a simple counting search? + bool isSimpleCount() const; + + //! Do the function and by field name identify a simple + //! counting search. + static bool isSimpleCount(function_t::EFunction function, const std::string& byFieldName); + + //! Is the function type for use with the individual models? + bool isMetric() const; + + //! Is the function type for use with the population models? + bool isPopulation() const; + + //! Create a "cue" suitable to be used in persisted state. + std::string toCue() const; + + //! Debug representation. Note that operator<<() is more efficient than + //! generating this debug string and immediately outputting it to a + //! stream. + std::string debug() const; + + //! Get the function. + function_t::EFunction function() const; + + //! Get whether to use null field values. + bool useNull() const; + + //! Get the ExcludeFrequent setting + model_t::EExcludeFrequent excludeFrequent() const; + + //! Check if there is a field called \p name. + bool hasField(const std::string& name) const; + + //! Get the value field name. + const std::string& fieldName() const; + + //! Get the by field name. + const std::string& byFieldName() const; - //! Get the over field name. - const std::string &overFieldName() const; - - //! Get the partition field name. - const std::string &partitionFieldName() const; + //! Get the over field name. + const std::string& overFieldName() const; + + //! Get the partition field name. + const std::string& partitionFieldName() const; - //! Get the influence field names. - const TStoredStringPtrVec &influenceFieldNames() const; + //! Get the influence field names. + const TStoredStringPtrVec& influenceFieldNames() const; - //! Get a hash of the contents of this key. - uint64_t hash() const; + //! Get a hash of the contents of this key. + uint64_t hash() const; - private: - int m_Identifier; - function_t::EFunction m_Function; - bool m_UseNull; - model_t::EExcludeFrequent m_ExcludeFrequent; - core::CStoredStringPtr m_FieldName; - core::CStoredStringPtr m_ByFieldName; - core::CStoredStringPtr m_OverFieldName; - core::CStoredStringPtr m_PartitionFieldName; - TStoredStringPtrVec m_InfluenceFieldNames; +private: + int m_Identifier; + function_t::EFunction m_Function; + bool m_UseNull; + model_t::EExcludeFrequent m_ExcludeFrequent; + core::CStoredStringPtr m_FieldName; + core::CStoredStringPtr m_ByFieldName; + core::CStoredStringPtr m_OverFieldName; + core::CStoredStringPtr m_PartitionFieldName; + TStoredStringPtrVec m_InfluenceFieldNames; - //! Used for efficient comparison. - mutable uint64_t m_Hash; + //! Used for efficient comparison. + mutable uint64_t m_Hash; // For debug output - friend MODEL_EXPORT std::ostream &operator<<(std::ostream &, - const CSearchKey &); + friend MODEL_EXPORT std::ostream& operator<<(std::ostream&, const CSearchKey&); }; MODEL_EXPORT -std::ostream &operator<<(std::ostream &strm, const CSearchKey &key); +std::ostream& operator<<(std::ostream& strm, const CSearchKey& key); //! Hashes a (string, search key) pair. -class CStrKeyPrHash -{ - public: - std::size_t operator()(const CSearchKey::TStrKeyPr &key) const - { - return this->hash(key); - } - std::size_t operator()(const CSearchKey::TStrCRefKeyCRefPr &key) const - { - return this->hash(key); - } - - private: - template - std::size_t hash(const T &key) const - { - core::CHashing::CSafeMurmurHash2String64 stringHasher; - uint64_t result = stringHasher(boost::unwrap_ref(key.first)); - core::CHashing::hashCombine(boost::unwrap_ref(key.second).hash(), result); - return static_cast(result); - } +class CStrKeyPrHash { +public: + std::size_t operator()(const CSearchKey::TStrKeyPr& key) const { return this->hash(key); } + std::size_t operator()(const CSearchKey::TStrCRefKeyCRefPr& key) const { return this->hash(key); } + +private: + template + std::size_t hash(const T& key) const { + core::CHashing::CSafeMurmurHash2String64 stringHasher; + uint64_t result = stringHasher(boost::unwrap_ref(key.first)); + core::CHashing::hashCombine(boost::unwrap_ref(key.second).hash(), result); + return static_cast(result); + } }; //! Checks if two (string, search key) pairs are equal. -class CStrKeyPrEqual -{ - public: - bool operator()(const CSearchKey::TStrKeyPr &lhs, const CSearchKey::TStrKeyPr &rhs) const - { - return this->equal(lhs, rhs); - } - bool operator()(const CSearchKey::TStrCRefKeyCRefPr &lhs, const CSearchKey::TStrKeyPr &rhs) const - { - return this->equal(lhs, rhs); - } - bool operator()(const CSearchKey::TStrKeyPr &lhs, const CSearchKey::TStrCRefKeyCRefPr &rhs) const - { - return this->equal(lhs, rhs); - } - bool operator()(const CSearchKey::TStrCRefKeyCRefPr &lhs, const CSearchKey::TStrCRefKeyCRefPr &rhs) const - { - return this->equal(lhs, rhs); - } - - private: - template - bool equal(const U &lhs, const V &rhs) const - { - return boost::unwrap_ref(lhs.second) == boost::unwrap_ref(rhs.second) - && boost::unwrap_ref(lhs.first) == boost::unwrap_ref(rhs.first); - } +class CStrKeyPrEqual { +public: + bool operator()(const CSearchKey::TStrKeyPr& lhs, const CSearchKey::TStrKeyPr& rhs) const { return this->equal(lhs, rhs); } + bool operator()(const CSearchKey::TStrCRefKeyCRefPr& lhs, const CSearchKey::TStrKeyPr& rhs) const { return this->equal(lhs, rhs); } + bool operator()(const CSearchKey::TStrKeyPr& lhs, const CSearchKey::TStrCRefKeyCRefPr& rhs) const { return this->equal(lhs, rhs); } + bool operator()(const CSearchKey::TStrCRefKeyCRefPr& lhs, const CSearchKey::TStrCRefKeyCRefPr& rhs) const { + return this->equal(lhs, rhs); + } + +private: + template + bool equal(const U& lhs, const V& rhs) const { + return boost::unwrap_ref(lhs.second) == boost::unwrap_ref(rhs.second) && + boost::unwrap_ref(lhs.first) == boost::unwrap_ref(rhs.first); + } }; - } } #endif // INCLUDED_ml_model_CSearchKey_h - diff --git a/include/model/CSimpleCountDetector.h b/include/model/CSimpleCountDetector.h index fb31bdebea..dd06402c1d 100644 --- a/include/model/CSimpleCountDetector.h +++ b/include/model/CSimpleCountDetector.h @@ -13,11 +13,8 @@ #include - -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { class CAnomalyDetectorModelConfig; class CLimits; @@ -40,42 +37,37 @@ class CLimits; //! passes through the same data path as the output of the real //! anomaly detector classes. //! -class MODEL_EXPORT CSimpleCountDetector : public CAnomalyDetector -{ - public: - CSimpleCountDetector(int identifier, - model_t::ESummaryMode summaryMode, - const CAnomalyDetectorModelConfig &modelConfig, - CLimits &limits, - const std::string &partitionFieldValue, - core_t::TTime firstTime, - const TModelFactoryCPtr &modelFactory); +class MODEL_EXPORT CSimpleCountDetector : public CAnomalyDetector { +public: + CSimpleCountDetector(int identifier, + model_t::ESummaryMode summaryMode, + const CAnomalyDetectorModelConfig& modelConfig, + CLimits& limits, + const std::string& partitionFieldValue, + core_t::TTime firstTime, + const TModelFactoryCPtr& modelFactory); - CSimpleCountDetector(bool isForPersistence, - const CAnomalyDetector &other); + CSimpleCountDetector(bool isForPersistence, const CAnomalyDetector& other); - //! Returns true. - virtual bool isSimpleCount() const; + //! Returns true. + virtual bool isSimpleCount() const; - //! Don't prune the simple count detector! - virtual void pruneModels(); + //! Don't prune the simple count detector! + virtual void pruneModels(); - private: - //! This function is called before adding a record allowing - //! for varied preprocessing. - virtual const TStrCPtrVec &preprocessFieldValues(const TStrCPtrVec &fieldValues); +private: + //! This function is called before adding a record allowing + //! for varied preprocessing. + virtual const TStrCPtrVec& preprocessFieldValues(const TStrCPtrVec& fieldValues); - private: - //! Field values are strange compared to other anomaly detectors, - //! because the "count" field always has value "count". We need - //! a vector to override the real value of any "count" field that - //! might be present in the data. - TStrCPtrVec m_FieldValues; +private: + //! Field values are strange compared to other anomaly detectors, + //! because the "count" field always has value "count". We need + //! a vector to override the real value of any "count" field that + //! might be present in the data. + TStrCPtrVec m_FieldValues; }; - - } } #endif // INCLUDED_ml_model_CSimpleCountDetector_h - diff --git a/include/model/CStringStore.h b/include/model/CStringStore.h index 3d92ecd2c5..631263f553 100644 --- a/include/model/CStringStore.h +++ b/include/model/CStringStore.h @@ -6,8 +6,8 @@ #ifndef INCLUDED_ml_model_CStringStore_h #define INCLUDED_ml_model_CStringStore_h -#include #include +#include #include #include @@ -22,17 +22,14 @@ class CResourceMonitorTest; class CStringStoreTest; -namespace ml -{ +namespace ml { -namespace core -{ +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace model -{ +namespace model { //! \brief //! DESCRIPTION:\n @@ -52,100 +49,89 @@ namespace model //! strings. //! Write access is locked for the benefit of future threading. //! -class MODEL_EXPORT CStringStore : private core::CNonCopyable -{ - public: - struct MODEL_EXPORT SHashStoredStringPtr - { - std::size_t operator()(const core::CStoredStringPtr &key) const - { - boost::hash hasher; - return hasher(*key); - } - }; - struct MODEL_EXPORT SStoredStringPtrEqual - { - bool operator()(const core::CStoredStringPtr &lhs, - const core::CStoredStringPtr &rhs) const - { - return *lhs == *rhs; - } - }; - - public: - //! Call this to tidy up any strings no longer needed. - static void tidyUpNotThreadSafe(); - - //! Singleton pattern for person/attribute names. - static CStringStore &names(); - - //! Singleton pattern for influencer names. - static CStringStore &influencers(); - - //! Fast method to get the pointer for an empty string. - const core::CStoredStringPtr &getEmpty() const; - - //! (Possibly) add \p value to the store and get back a pointer to it. - core::CStoredStringPtr get(const std::string &value); - - //! (Possibly) remove \p value from the store. - void remove(const std::string &value); - - //! Prune strings which have been removed. - void pruneRemovedNotThreadSafe(); - - //! Iterate over the string store and remove unused entries. - void pruneNotThreadSafe(); - - //! Get the memory used by this string store - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - - //! Get the memory used by this string store - std::size_t memoryUsage() const; - - private: - using TStoredStringPtrUSet = boost::unordered_set; - using TStrVec = std::vector; - - private: - //! Constructor of a Singleton is private. - CStringStore(); - - //! Bludgeoning device to delete all objects in store. - void clearEverythingTestOnly(); - - private: - //! Fence for reading operations (in which case we "leak" a string - //! if we try to write at the same time). See get for details. - std::atomic_int m_Reading; - - //! Fence for writing operations (in which case we "leak" a string - //! if we try to read at the same time). See get for details. - std::atomic_int m_Writing; - - //! The empty string is often used so we store it outside the set. - core::CStoredStringPtr m_EmptyString; - - //! Set to keep the person/attribute string pointers - TStoredStringPtrUSet m_Strings; - - //! A list of the strings to remove. - TStrVec m_Removed; - - //! Running count of memory usage by stored strings. Avoids the need to - //! recalculate repeatedly. - std::size_t m_StoredStringsMemUse; - - //! Locking primitive - mutable core::CFastMutex m_Mutex; +class MODEL_EXPORT CStringStore : private core::CNonCopyable { +public: + struct MODEL_EXPORT SHashStoredStringPtr { + std::size_t operator()(const core::CStoredStringPtr& key) const { + boost::hash hasher; + return hasher(*key); + } + }; + struct MODEL_EXPORT SStoredStringPtrEqual { + bool operator()(const core::CStoredStringPtr& lhs, const core::CStoredStringPtr& rhs) const { return *lhs == *rhs; } + }; + +public: + //! Call this to tidy up any strings no longer needed. + static void tidyUpNotThreadSafe(); + + //! Singleton pattern for person/attribute names. + static CStringStore& names(); + + //! Singleton pattern for influencer names. + static CStringStore& influencers(); + + //! Fast method to get the pointer for an empty string. + const core::CStoredStringPtr& getEmpty() const; + + //! (Possibly) add \p value to the store and get back a pointer to it. + core::CStoredStringPtr get(const std::string& value); + + //! (Possibly) remove \p value from the store. + void remove(const std::string& value); + + //! Prune strings which have been removed. + void pruneRemovedNotThreadSafe(); + + //! Iterate over the string store and remove unused entries. + void pruneNotThreadSafe(); + + //! Get the memory used by this string store + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + + //! Get the memory used by this string store + std::size_t memoryUsage() const; + +private: + using TStoredStringPtrUSet = boost::unordered_set; + using TStrVec = std::vector; + +private: + //! Constructor of a Singleton is private. + CStringStore(); + + //! Bludgeoning device to delete all objects in store. + void clearEverythingTestOnly(); + +private: + //! Fence for reading operations (in which case we "leak" a string + //! if we try to write at the same time). See get for details. + std::atomic_int m_Reading; + + //! Fence for writing operations (in which case we "leak" a string + //! if we try to read at the same time). See get for details. + std::atomic_int m_Writing; + + //! The empty string is often used so we store it outside the set. + core::CStoredStringPtr m_EmptyString; + + //! Set to keep the person/attribute string pointers + TStoredStringPtrUSet m_Strings; + + //! A list of the strings to remove. + TStrVec m_Removed; + + //! Running count of memory usage by stored strings. Avoids the need to + //! recalculate repeatedly. + std::size_t m_StoredStringsMemUse; + + //! Locking primitive + mutable core::CFastMutex m_Mutex; friend class ::CResourceMonitorTest; friend class ::CStringStoreTest; }; - } // model } // ml diff --git a/include/model/FrequencyPredicates.h b/include/model/FrequencyPredicates.h index 575ef896b9..afd07f8f2c 100644 --- a/include/model/FrequencyPredicates.h +++ b/include/model/FrequencyPredicates.h @@ -10,66 +10,58 @@ #include #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { //! \brief Wrapper around personFrequency to test whether //! frequency is greater than a specified frequency. -class MODEL_EXPORT CPersonFrequencyGreaterThan -{ - public: - CPersonFrequencyGreaterThan(const CAnomalyDetectorModel &model, double threshold); +class MODEL_EXPORT CPersonFrequencyGreaterThan { +public: + CPersonFrequencyGreaterThan(const CAnomalyDetectorModel& model, double threshold); - //! Test whether the person's frequency, whose identifier - //! is the first element of \p t, is greater than the - //! threshold supplied to the constructor. - template - bool operator()(const std::pair &t) - { - return m_Model->personFrequency(t.first) > m_Threshold; - } + //! Test whether the person's frequency, whose identifier + //! is the first element of \p t, is greater than the + //! threshold supplied to the constructor. + template + bool operator()(const std::pair& t) { + return m_Model->personFrequency(t.first) > m_Threshold; + } - //! Test whether the person's frequency, whose identifier - //! is encoded in the first element of \p t, is greater - //! than the threshold supplied to the constructor. - template - bool operator()(const std::pair, T> &t) - { - return m_Model->personFrequency(CDataGatherer::extractPersonId(t)) > m_Threshold; - } + //! Test whether the person's frequency, whose identifier + //! is encoded in the first element of \p t, is greater + //! than the threshold supplied to the constructor. + template + bool operator()(const std::pair, T>& t) { + return m_Model->personFrequency(CDataGatherer::extractPersonId(t)) > m_Threshold; + } - private: - //! The model containing the person frequencies. - const CAnomalyDetectorModel *m_Model; - //! The test threshold frequency. - double m_Threshold; +private: + //! The model containing the person frequencies. + const CAnomalyDetectorModel* m_Model; + //! The test threshold frequency. + double m_Threshold; }; //! \brief Wrapper around personFrequency to test whether //! frequency is greater than a specified frequency. -class MODEL_EXPORT CAttributeFrequencyGreaterThan -{ - public: - CAttributeFrequencyGreaterThan(const CAnomalyDetectorModel &model, double threshold); +class MODEL_EXPORT CAttributeFrequencyGreaterThan { +public: + CAttributeFrequencyGreaterThan(const CAnomalyDetectorModel& model, double threshold); - //! Test whether the person's frequency, whose identifier - //! is encoded in the first element of \p t, is greater - //! than the threshold supplied to the constructor. - template - bool operator()(const std::pair, T> &t) - { - return m_Model->attributeFrequency(CDataGatherer::extractAttributeId(t)) > m_Threshold; - } + //! Test whether the person's frequency, whose identifier + //! is encoded in the first element of \p t, is greater + //! than the threshold supplied to the constructor. + template + bool operator()(const std::pair, T>& t) { + return m_Model->attributeFrequency(CDataGatherer::extractAttributeId(t)) > m_Threshold; + } - private: - //! The model containing the person frequencies. - const CAnomalyDetectorModel *m_Model; - //! The test threshold frequency. - double m_Threshold; +private: + //! The model containing the person frequencies. + const CAnomalyDetectorModel* m_Model; + //! The test threshold frequency. + double m_Threshold; }; - } } diff --git a/include/model/FunctionTypes.h b/include/model/FunctionTypes.h index 67fdd09d21..5dee32a319 100644 --- a/include/model/FunctionTypes.h +++ b/include/model/FunctionTypes.h @@ -14,12 +14,9 @@ #include #include -namespace ml -{ -namespace model -{ -namespace function_t -{ +namespace ml { +namespace model { +namespace function_t { //! An enumeration of possible functions we can run on a data stream //! on which we do anomaly detection. These map to a set of data @@ -101,8 +98,7 @@ namespace function_t //! -# Population metric sum: for which we look at the sum of the metric //! values each person generates in a bucket optionally partitioned by //! a category. This is used for analyzing metric data as a population. -enum EFunction -{ +enum EFunction { // IMPORTANT: The integer values associated with these enum values are // stored in persisted state. DO NOT CHANGE EXISTING NUMBERS, as this // will invalidate persisted state. Any new enum values that are added @@ -233,15 +229,15 @@ bool isForecastSupported(EFunction function); //! Get the mapping from function to data features. MODEL_EXPORT -const model_t::TFeatureVec &features(EFunction function); +const model_t::TFeatureVec& features(EFunction function); //! The inverse mapping from features to function. MODEL_EXPORT -EFunction function(const model_t::TFeatureVec &features); +EFunction function(const model_t::TFeatureVec& features); //! Get the name of \p function. MODEL_EXPORT -const std::string &name(EFunction function); +const std::string& name(EFunction function); //! Get a string description of \p function. MODEL_EXPORT @@ -249,11 +245,9 @@ std::string print(EFunction function); //! Overload std stream << operator. MODEL_EXPORT -std::ostream &operator<<(std::ostream &o, EFunction function); - +std::ostream& operator<<(std::ostream& o, EFunction function); } } } #endif // INCLUDED_ml_model_function_t_FunctionTypes_h - diff --git a/include/model/ImportExport.h b/include/model/ImportExport.h index f0d76d7707..89d74697e7 100644 --- a/include/model/ImportExport.h +++ b/include/model/ImportExport.h @@ -36,4 +36,3 @@ #endif #endif // INCLUDED_ml_model_ImportExport_h - diff --git a/include/model/ModelTypes.h b/include/model/ModelTypes.h index d468bc4916..de979e346d 100644 --- a/include/model/ModelTypes.h +++ b/include/model/ModelTypes.h @@ -8,8 +8,8 @@ #define INCLUDED_ml_model_t_ModelTypes_h #include -#include #include +#include #include @@ -21,28 +21,22 @@ #include - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } -namespace maths -{ +namespace maths { class CMultivariatePrior; class CPrior; class CTimeSeriesDecompositionInterface; } -namespace model -{ +namespace model { class CInfluenceCalculator; struct SModelParams; } -namespace model_t -{ +namespace model_t { using TDouble1Vec = core::CSmallVector; using TDouble2Vec = core::CSmallVector; @@ -56,12 +50,7 @@ using TInfluenceCalculatorCPtr = boost::shared_ptr(E_Interim)) != 0; - } - - //! Get as interim or final enumeration. - EInterimOrFinal asInterimOrFinal() const - { - return this->isInterim() ? E_Interim : E_Final; - } - - //! Check if this is unconditional. - bool isUnconditional() const - { - return (m_Type & static_cast(E_Unconditional)) != 0; - } - - //! Get as conditional or unconditional enumeration. - EConditionalOrUnconditional asConditionalOrUnconditional() const - { - return this->isUnconditional() ? E_Unconditional : E_Conditional; - } - - //! Get as an unsigned integer. - unsigned int asUint() const { return m_Type; } - - private: - //! Encodes the result type. - unsigned int m_Type; +class MODEL_EXPORT CResultType { +public: + enum EInterimOrFinal { E_Interim = 1, E_Final = 2 }; + + enum EConditionalOrUnconditional { E_Unconditional = 4, E_Conditional = 8 }; + +public: + CResultType() : m_Type(0) {} + CResultType(EInterimOrFinal type) : m_Type(type) {} + CResultType(EConditionalOrUnconditional type) : m_Type(type) {} + explicit CResultType(unsigned int type) : m_Type(type) {} + + //! Set whether or not this is interim. + void set(EInterimOrFinal type) { + m_Type = m_Type & (E_Unconditional | E_Conditional); + m_Type = m_Type | type; + } + + //! Set whether or not this is conditional. + void set(EConditionalOrUnconditional type) { + m_Type = m_Type & (E_Interim | E_Final); + m_Type = m_Type | type; + } + + //! Check if this is interim. + bool isInterim() const { return (m_Type & static_cast(E_Interim)) != 0; } + + //! Get as interim or final enumeration. + EInterimOrFinal asInterimOrFinal() const { return this->isInterim() ? E_Interim : E_Final; } + + //! Check if this is unconditional. + bool isUnconditional() const { return (m_Type & static_cast(E_Unconditional)) != 0; } + + //! Get as conditional or unconditional enumeration. + EConditionalOrUnconditional asConditionalOrUnconditional() const { return this->isUnconditional() ? E_Unconditional : E_Conditional; } + + //! Get as an unsigned integer. + unsigned int asUint() const { return m_Type; } + +private: + //! Encodes the result type. + unsigned int m_Type; }; //! The feature naming is systematic and all subsequent feature @@ -328,8 +294,7 @@ class MODEL_EXPORT CResultType //! -# PopulationHighInfoContentByBucketAndPerson: is for detecting //! unusualy high information content (or compressibility) of values //! for each person and attribute in a bucketing interval. -enum EFeature -{ +enum EFeature { // IMPORTANT: The integer values associated with these enum values are // stored in persisted state. DO NOT CHANGE EXISTING NUMBERS, as this // will invalidate persisted state. Any new enum values that are added @@ -454,8 +419,7 @@ std::size_t dimension(EFeature feature); //! For features which have extra statistics in order to compute //! influence remove those extra statistics. MODEL_EXPORT -TDouble2Vec1Vec stripExtraStatistics(EFeature feature, - const TDouble2Vec1Vec &values); +TDouble2Vec1Vec stripExtraStatistics(EFeature feature, const TDouble2Vec1Vec& values); //! Check if \p feature is categorical, i.e. we should create //! a distribution over the distinct values observed for the @@ -522,7 +486,7 @@ double offsetCountToZero(EFeature feature, double count); //! Offset count features so that their range starts at zero. MODEL_EXPORT -void offsetCountToZero(EFeature feature, TDouble1Vec &count); +void offsetCountToZero(EFeature feature, TDouble1Vec& count); //! The inverse of offsetCountToZero. MODEL_EXPORT @@ -530,7 +494,7 @@ double inverseOffsetCountToZero(EFeature feature, double count); //! The inverse of offsetCountToZero. MODEL_EXPORT -void inverseOffsetCountToZero(EFeature feature, TDouble1Vec &count); +void inverseOffsetCountToZero(EFeature feature, TDouble1Vec& count); //! Check if the feature counts empty buckets. MODEL_EXPORT @@ -544,7 +508,7 @@ double emptyBucketCountWeight(EFeature feature, double frequency, double cutoff) //! Get the rate at which \p feature learns. MODEL_EXPORT -double learnRate(EFeature feature, const model::SModelParams ¶ms); +double learnRate(EFeature feature, const model::SModelParams& params); //! Get the type of probability calculation to use for \p feature. MODEL_EXPORT @@ -556,10 +520,7 @@ maths_t::EProbabilityCalculation probabilityCalculation(EFeature feature); //! some metric features the time is deduced in which case \p time //! is used. MODEL_EXPORT -core_t::TTime sampleTime(EFeature feature, - core_t::TTime bucketStartTime, - core_t::TTime bucketLength, - core_t::TTime time = 0); +core_t::TTime sampleTime(EFeature feature, core_t::TTime bucketStartTime, core_t::TTime bucketLength, core_t::TTime time = 0); //! Get the support for \p feature. MODEL_EXPORT @@ -567,9 +528,7 @@ TDouble1VecDouble1VecPr support(EFeature feature); //! Get the adjusted probability for \p feature. MODEL_EXPORT -double adjustProbability(model_t::EFeature feature, - core_t::TTime elapsedTime, - double probability); +double adjustProbability(model_t::EFeature feature, core_t::TTime elapsedTime, double probability); //! Get the influence calculator for \p feature. MODEL_EXPORT @@ -583,7 +542,7 @@ bool requiresInterimResultAdjustment(EFeature feature); //! Maps internal feature names to human readable output function //! names that can be included in the results. MODEL_EXPORT -const std::string &outputFunctionName(EFeature feature); +const std::string& outputFunctionName(EFeature feature); //! Get a string description of \p feature. MODEL_EXPORT @@ -599,118 +558,123 @@ std::string print(EFeature feature); //! generally a bad idea so don't take this as a precedent for //! crazy macro magic (see item 1.14 of our coding standards for //! guidelines). -#define CASE_INDIVIDUAL_COUNT case model_t::E_IndividualCountByBucketAndPerson: \ - case model_t::E_IndividualNonZeroCountByBucketAndPerson: \ - case model_t::E_IndividualTotalBucketCountByPerson: \ - case model_t::E_IndividualIndicatorOfBucketPerson: \ - case model_t::E_IndividualLowCountsByBucketAndPerson: \ - case model_t::E_IndividualHighCountsByBucketAndPerson: \ - case model_t::E_IndividualArrivalTimesByPerson: \ - case model_t::E_IndividualLongArrivalTimesByPerson: \ - case model_t::E_IndividualShortArrivalTimesByPerson: \ - case model_t::E_IndividualLowNonZeroCountByBucketAndPerson: \ - case model_t::E_IndividualHighNonZeroCountByBucketAndPerson: \ - case model_t::E_IndividualUniqueCountByBucketAndPerson: \ - case model_t::E_IndividualLowUniqueCountByBucketAndPerson: \ - case model_t::E_IndividualHighUniqueCountByBucketAndPerson: \ - case model_t::E_IndividualInfoContentByBucketAndPerson: \ - case model_t::E_IndividualHighInfoContentByBucketAndPerson: \ - case model_t::E_IndividualLowInfoContentByBucketAndPerson: \ - case model_t::E_IndividualTimeOfDayByBucketAndPerson: \ - case model_t::E_IndividualTimeOfWeekByBucketAndPerson +#define CASE_INDIVIDUAL_COUNT \ + case model_t::E_IndividualCountByBucketAndPerson: \ + case model_t::E_IndividualNonZeroCountByBucketAndPerson: \ + case model_t::E_IndividualTotalBucketCountByPerson: \ + case model_t::E_IndividualIndicatorOfBucketPerson: \ + case model_t::E_IndividualLowCountsByBucketAndPerson: \ + case model_t::E_IndividualHighCountsByBucketAndPerson: \ + case model_t::E_IndividualArrivalTimesByPerson: \ + case model_t::E_IndividualLongArrivalTimesByPerson: \ + case model_t::E_IndividualShortArrivalTimesByPerson: \ + case model_t::E_IndividualLowNonZeroCountByBucketAndPerson: \ + case model_t::E_IndividualHighNonZeroCountByBucketAndPerson: \ + case model_t::E_IndividualUniqueCountByBucketAndPerson: \ + case model_t::E_IndividualLowUniqueCountByBucketAndPerson: \ + case model_t::E_IndividualHighUniqueCountByBucketAndPerson: \ + case model_t::E_IndividualInfoContentByBucketAndPerson: \ + case model_t::E_IndividualHighInfoContentByBucketAndPerson: \ + case model_t::E_IndividualLowInfoContentByBucketAndPerson: \ + case model_t::E_IndividualTimeOfDayByBucketAndPerson: \ + case model_t::E_IndividualTimeOfWeekByBucketAndPerson //! Individual metric feature case statement block. -#define CASE_INDIVIDUAL_METRIC case model_t::E_IndividualMeanByPerson: \ - case model_t::E_IndividualMedianByPerson: \ - case model_t::E_IndividualMinByPerson: \ - case model_t::E_IndividualMaxByPerson: \ - case model_t::E_IndividualSumByBucketAndPerson: \ - case model_t::E_IndividualLowMeanByPerson: \ - case model_t::E_IndividualHighMeanByPerson: \ - case model_t::E_IndividualLowSumByBucketAndPerson: \ - case model_t::E_IndividualHighSumByBucketAndPerson: \ - case model_t::E_IndividualNonNullSumByBucketAndPerson: \ - case model_t::E_IndividualLowNonNullSumByBucketAndPerson: \ - case model_t::E_IndividualHighNonNullSumByBucketAndPerson: \ - case model_t::E_IndividualMeanLatLongByPerson: \ - case model_t::E_IndividualMaxVelocityByPerson: \ - case model_t::E_IndividualMinVelocityByPerson: \ - case model_t::E_IndividualMeanVelocityByPerson: \ - case model_t::E_IndividualSumVelocityByPerson: \ - case model_t::E_IndividualVarianceByPerson: \ - case model_t::E_IndividualLowVarianceByPerson: \ - case model_t::E_IndividualHighVarianceByPerson: \ - case model_t::E_IndividualLowMedianByPerson: \ - case model_t::E_IndividualHighMedianByPerson +#define CASE_INDIVIDUAL_METRIC \ + case model_t::E_IndividualMeanByPerson: \ + case model_t::E_IndividualMedianByPerson: \ + case model_t::E_IndividualMinByPerson: \ + case model_t::E_IndividualMaxByPerson: \ + case model_t::E_IndividualSumByBucketAndPerson: \ + case model_t::E_IndividualLowMeanByPerson: \ + case model_t::E_IndividualHighMeanByPerson: \ + case model_t::E_IndividualLowSumByBucketAndPerson: \ + case model_t::E_IndividualHighSumByBucketAndPerson: \ + case model_t::E_IndividualNonNullSumByBucketAndPerson: \ + case model_t::E_IndividualLowNonNullSumByBucketAndPerson: \ + case model_t::E_IndividualHighNonNullSumByBucketAndPerson: \ + case model_t::E_IndividualMeanLatLongByPerson: \ + case model_t::E_IndividualMaxVelocityByPerson: \ + case model_t::E_IndividualMinVelocityByPerson: \ + case model_t::E_IndividualMeanVelocityByPerson: \ + case model_t::E_IndividualSumVelocityByPerson: \ + case model_t::E_IndividualVarianceByPerson: \ + case model_t::E_IndividualLowVarianceByPerson: \ + case model_t::E_IndividualHighVarianceByPerson: \ + case model_t::E_IndividualLowMedianByPerson: \ + case model_t::E_IndividualHighMedianByPerson //! Population count feature case statement block. -#define CASE_POPULATION_COUNT case model_t::E_PopulationAttributeTotalCountByPerson: \ - case model_t::E_PopulationCountByBucketPersonAndAttribute: \ - case model_t::E_PopulationIndicatorOfBucketPersonAndAttribute: \ - case model_t::E_PopulationUniqueCountByBucketPersonAndAttribute: \ - case model_t::E_PopulationLowUniqueCountByBucketPersonAndAttribute: \ - case model_t::E_PopulationHighUniqueCountByBucketPersonAndAttribute: \ - case model_t::E_PopulationUniquePersonCountByAttribute: \ - case model_t::E_PopulationLowCountsByBucketPersonAndAttribute: \ - case model_t::E_PopulationHighCountsByBucketPersonAndAttribute: \ - case model_t::E_PopulationInfoContentByBucketPersonAndAttribute: \ - case model_t::E_PopulationLowInfoContentByBucketPersonAndAttribute: \ - case model_t::E_PopulationHighInfoContentByBucketPersonAndAttribute: \ - case model_t::E_PopulationTimeOfDayByBucketPersonAndAttribute: \ - case model_t::E_PopulationTimeOfWeekByBucketPersonAndAttribute +#define CASE_POPULATION_COUNT \ + case model_t::E_PopulationAttributeTotalCountByPerson: \ + case model_t::E_PopulationCountByBucketPersonAndAttribute: \ + case model_t::E_PopulationIndicatorOfBucketPersonAndAttribute: \ + case model_t::E_PopulationUniqueCountByBucketPersonAndAttribute: \ + case model_t::E_PopulationLowUniqueCountByBucketPersonAndAttribute: \ + case model_t::E_PopulationHighUniqueCountByBucketPersonAndAttribute: \ + case model_t::E_PopulationUniquePersonCountByAttribute: \ + case model_t::E_PopulationLowCountsByBucketPersonAndAttribute: \ + case model_t::E_PopulationHighCountsByBucketPersonAndAttribute: \ + case model_t::E_PopulationInfoContentByBucketPersonAndAttribute: \ + case model_t::E_PopulationLowInfoContentByBucketPersonAndAttribute: \ + case model_t::E_PopulationHighInfoContentByBucketPersonAndAttribute: \ + case model_t::E_PopulationTimeOfDayByBucketPersonAndAttribute: \ + case model_t::E_PopulationTimeOfWeekByBucketPersonAndAttribute //! Population metric feature case statement block. -#define CASE_POPULATION_METRIC case model_t::E_PopulationMeanByPersonAndAttribute: \ - case model_t::E_PopulationMedianByPersonAndAttribute: \ - case model_t::E_PopulationMinByPersonAndAttribute: \ - case model_t::E_PopulationMaxByPersonAndAttribute: \ - case model_t::E_PopulationSumByBucketPersonAndAttribute: \ - case model_t::E_PopulationLowMeanByPersonAndAttribute: \ - case model_t::E_PopulationHighMeanByPersonAndAttribute: \ - case model_t::E_PopulationLowSumByBucketPersonAndAttribute: \ - case model_t::E_PopulationHighSumByBucketPersonAndAttribute: \ - case model_t::E_PopulationMeanLatLongByPersonAndAttribute: \ - case model_t::E_PopulationMaxVelocityByPersonAndAttribute: \ - case model_t::E_PopulationMinVelocityByPersonAndAttribute: \ - case model_t::E_PopulationMeanVelocityByPersonAndAttribute: \ - case model_t::E_PopulationSumVelocityByPersonAndAttribute: \ - case model_t::E_PopulationVarianceByPersonAndAttribute: \ - case model_t::E_PopulationLowVarianceByPersonAndAttribute: \ - case model_t::E_PopulationHighVarianceByPersonAndAttribute: \ - case model_t::E_PopulationLowMedianByPersonAndAttribute: \ - case model_t::E_PopulationHighMedianByPersonAndAttribute +#define CASE_POPULATION_METRIC \ + case model_t::E_PopulationMeanByPersonAndAttribute: \ + case model_t::E_PopulationMedianByPersonAndAttribute: \ + case model_t::E_PopulationMinByPersonAndAttribute: \ + case model_t::E_PopulationMaxByPersonAndAttribute: \ + case model_t::E_PopulationSumByBucketPersonAndAttribute: \ + case model_t::E_PopulationLowMeanByPersonAndAttribute: \ + case model_t::E_PopulationHighMeanByPersonAndAttribute: \ + case model_t::E_PopulationLowSumByBucketPersonAndAttribute: \ + case model_t::E_PopulationHighSumByBucketPersonAndAttribute: \ + case model_t::E_PopulationMeanLatLongByPersonAndAttribute: \ + case model_t::E_PopulationMaxVelocityByPersonAndAttribute: \ + case model_t::E_PopulationMinVelocityByPersonAndAttribute: \ + case model_t::E_PopulationMeanVelocityByPersonAndAttribute: \ + case model_t::E_PopulationSumVelocityByPersonAndAttribute: \ + case model_t::E_PopulationVarianceByPersonAndAttribute: \ + case model_t::E_PopulationLowVarianceByPersonAndAttribute: \ + case model_t::E_PopulationHighVarianceByPersonAndAttribute: \ + case model_t::E_PopulationLowMedianByPersonAndAttribute: \ + case model_t::E_PopulationHighMedianByPersonAndAttribute //! Peers count feature case statement block. -#define CASE_PEERS_COUNT case model_t::E_PeersAttributeTotalCountByPerson: \ - case model_t::E_PeersCountByBucketPersonAndAttribute: \ - case model_t::E_PeersUniqueCountByBucketPersonAndAttribute: \ - case model_t::E_PeersLowCountsByBucketPersonAndAttribute: \ - case model_t::E_PeersHighCountsByBucketPersonAndAttribute: \ - case model_t::E_PeersInfoContentByBucketPersonAndAttribute: \ - case model_t::E_PeersLowInfoContentByBucketPersonAndAttribute: \ - case model_t::E_PeersHighInfoContentByBucketPersonAndAttribute: \ - case model_t::E_PeersLowUniqueCountByBucketPersonAndAttribute: \ - case model_t::E_PeersHighUniqueCountByBucketPersonAndAttribute: \ - case model_t::E_PeersTimeOfDayByBucketPersonAndAttribute: \ - case model_t::E_PeersTimeOfWeekByBucketPersonAndAttribute +#define CASE_PEERS_COUNT \ + case model_t::E_PeersAttributeTotalCountByPerson: \ + case model_t::E_PeersCountByBucketPersonAndAttribute: \ + case model_t::E_PeersUniqueCountByBucketPersonAndAttribute: \ + case model_t::E_PeersLowCountsByBucketPersonAndAttribute: \ + case model_t::E_PeersHighCountsByBucketPersonAndAttribute: \ + case model_t::E_PeersInfoContentByBucketPersonAndAttribute: \ + case model_t::E_PeersLowInfoContentByBucketPersonAndAttribute: \ + case model_t::E_PeersHighInfoContentByBucketPersonAndAttribute: \ + case model_t::E_PeersLowUniqueCountByBucketPersonAndAttribute: \ + case model_t::E_PeersHighUniqueCountByBucketPersonAndAttribute: \ + case model_t::E_PeersTimeOfDayByBucketPersonAndAttribute: \ + case model_t::E_PeersTimeOfWeekByBucketPersonAndAttribute // Peers metric features case statement block. -#define CASE_PEERS_METRIC case model_t::E_PeersMeanByPersonAndAttribute: \ - case model_t::E_PeersMedianByPersonAndAttribute: \ - case model_t::E_PeersMinByPersonAndAttribute: \ - case model_t::E_PeersMaxByPersonAndAttribute: \ - case model_t::E_PeersSumByBucketPersonAndAttribute: \ - case model_t::E_PeersLowMeanByPersonAndAttribute: \ - case model_t::E_PeersHighMeanByPersonAndAttribute: \ - case model_t::E_PeersLowSumByBucketPersonAndAttribute: \ - case model_t::E_PeersHighSumByBucketPersonAndAttribute +#define CASE_PEERS_METRIC \ + case model_t::E_PeersMeanByPersonAndAttribute: \ + case model_t::E_PeersMedianByPersonAndAttribute: \ + case model_t::E_PeersMinByPersonAndAttribute: \ + case model_t::E_PeersMaxByPersonAndAttribute: \ + case model_t::E_PeersSumByBucketPersonAndAttribute: \ + case model_t::E_PeersLowMeanByPersonAndAttribute: \ + case model_t::E_PeersHighMeanByPersonAndAttribute: \ + case model_t::E_PeersLowSumByBucketPersonAndAttribute: \ + case model_t::E_PeersHighSumByBucketPersonAndAttribute //! The categories of metric feature. //! //! These enumerate the distinct types of metric statistic //! which we gather. -enum EMetricCategory -{ +enum EMetricCategory { E_Mean, E_Min, E_Max, @@ -730,7 +694,7 @@ static const size_t NUM_METRIC_CATEGORIES = 9; //! Get the metric feature data corresponding to \p feature //! if there is one. MODEL_EXPORT -bool metricCategory(EFeature feature, EMetricCategory &result); +bool metricCategory(EFeature feature, EMetricCategory& result); //! Get a string description of \p category. MODEL_EXPORT @@ -740,13 +704,7 @@ std::string print(EMetricCategory category); //! //! The enumerate the distinct type of event rate statistics //! which we gather. -enum EEventRateCategory -{ - E_MeanArrivalTimes, - E_AttributePeople, - E_UniqueValues, - E_DiurnalTimes -}; +enum EEventRateCategory { E_MeanArrivalTimes, E_AttributePeople, E_UniqueValues, E_DiurnalTimes }; //! Get a string description of \p category. MODEL_EXPORT @@ -763,15 +721,7 @@ std::string print(EEventRateCategory category); //! peer groups. //! -# Population metric: analysis of message values in //! peer groups. -enum EAnalysisCategory -{ - E_EventRate, - E_Metric, - E_PopulationEventRate, - E_PopulationMetric, - E_PeersEventRate, - E_PeersMetric -}; +enum EAnalysisCategory { E_EventRate, E_Metric, E_PopulationEventRate, E_PopulationMetric, E_PeersEventRate, E_PeersMetric }; //! Get the category of analysis to which \p feature belongs. MODEL_EXPORT @@ -783,8 +733,7 @@ std::string print(EAnalysisCategory category); //! The different ways we might be told the fields for receiving pre-summarised //! data. -enum ESummaryMode -{ +enum ESummaryMode { E_None, //!< No pre-summarisation of input E_Manual //!< Config defines the field names for pre-summarised input }; @@ -795,20 +744,13 @@ enum ESummaryMode //! -# E_XF_By: remove popular "attributes" from populations //! -# E_XF_Over: remove popular "people" from populations //! -# E_XF_Both: remove popular "people" and "attributes" from populations -enum EExcludeFrequent -{ - E_XF_None = 0, - E_XF_By = 1, - E_XF_Over = 2, - E_XF_Both = 3 -}; +enum EExcludeFrequent { E_XF_None = 0, E_XF_By = 1, E_XF_Over = 2, E_XF_Both = 3 }; //! An enumeration of the ResourceMonitor memory status - //! Start in the OK state. Moves into soft limit if aggressive pruning //! has taken place to avoid hitting the memory limit, //! and goes to hard limit if samples have been dropped -enum EMemoryStatus -{ +enum EMemoryStatus { E_MemoryStatusOk = 0, //!< Memory usage normal E_MemoryStatusSoftLimit = 1, //!< Pruning has taken place to reduce usage E_MemoryStatusHardLimit = 2 //!< Samples have been dropped @@ -825,12 +767,7 @@ std::string print(EMemoryStatus memoryStatus); //! values of the by field. //! -# AggregateDetectors: the style used to aggregate distinct detector //! results. -enum EAggregationStyle -{ - E_AggregatePeople = 0, - E_AggregateAttributes = 1, - E_AggregateDetectors = 2 -}; +enum EAggregationStyle { E_AggregatePeople = 0, E_AggregateAttributes = 1, E_AggregateDetectors = 2 }; const std::size_t NUMBER_AGGREGATION_STYLES = E_AggregateDetectors + 1; //! Controllable aggregation parameters: @@ -842,18 +779,11 @@ const std::size_t NUMBER_AGGREGATION_STYLES = E_AggregateDetectors + 1; //! m from n probability calculation. //! -# MaxExtremeSamples: the maximum number m of samples to consider in the //! m from n probability calculation. -enum EAggregationParam -{ - E_JointProbabilityWeight = 0, - E_ExtremeProbabilityWeight = 1, - E_MinExtremeSamples = 2, - E_MaxExtremeSamples = 3 -}; +enum EAggregationParam { E_JointProbabilityWeight = 0, E_ExtremeProbabilityWeight = 1, E_MinExtremeSamples = 2, E_MaxExtremeSamples = 3 }; const std::size_t NUMBER_AGGREGATION_PARAMS = E_MaxExtremeSamples + 1; //! The dummy attribute identifier used for modeling individual features. const std::size_t INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID = 0u; - } } diff --git a/include/test/CMultiFileDataAdder.h b/include/test/CMultiFileDataAdder.h index 29d6cd2cb2..57b0ab0086 100644 --- a/include/test/CMultiFileDataAdder.h +++ b/include/test/CMultiFileDataAdder.h @@ -12,10 +12,8 @@ #include -namespace ml -{ -namespace test -{ +namespace ml { +namespace test { //! \brief //! A file based persister for writing Ml models. @@ -35,44 +33,36 @@ namespace test //! file. For an option to persist an entire model to a single //! file, use the CSingleStreamDataAdder class. //! -class TEST_EXPORT CMultiFileDataAdder : public core::CDataAdder -{ - public: - //! Default file extension for persisted files. - static const std::string JSON_FILE_EXT; +class TEST_EXPORT CMultiFileDataAdder : public core::CDataAdder { +public: + //! Default file extension for persisted files. + static const std::string JSON_FILE_EXT; - public: - //! Constructor uses the pass-by-value-and-swap idiom - CMultiFileDataAdder(std::string baseFilename, - std::string fileExtension = JSON_FILE_EXT); +public: + //! Constructor uses the pass-by-value-and-swap idiom + CMultiFileDataAdder(std::string baseFilename, std::string fileExtension = JSON_FILE_EXT); - //! Add streamed data - //! \param index Sub-directory name - //! \param id File name (without extension) - virtual TOStreamP addStreamed(const std::string &index, - const std::string &id); + //! Add streamed data + //! \param index Sub-directory name + //! \param id File name (without extension) + virtual TOStreamP addStreamed(const std::string& index, const std::string& id); - //! Clients that get a stream using addStreamed() must call this - //! method one they've finished sending data to the stream. - virtual bool streamComplete(TOStreamP &strm, - bool force); + //! Clients that get a stream using addStreamed() must call this + //! method one they've finished sending data to the stream. + virtual bool streamComplete(TOStreamP& strm, bool force); - private: - //! Make a file name of the form base/_index/id.extension - std::string makeFilename(const std::string &index, - const std::string &id) const; +private: + //! Make a file name of the form base/_index/id.extension + std::string makeFilename(const std::string& index, const std::string& id) const; - private: - //! Name of the file to serialise models to - std::string m_BaseFilename; +private: + //! Name of the file to serialise models to + std::string m_BaseFilename; - //! The extension for the peristed files - std::string m_FileExtension; + //! The extension for the peristed files + std::string m_FileExtension; }; - - } } #endif // INCLUDED_ml_test_CMultiFileDataAdder_h - diff --git a/include/test/CMultiFileSearcher.h b/include/test/CMultiFileSearcher.h index 99a8d25eca..d47586a77b 100644 --- a/include/test/CMultiFileSearcher.h +++ b/include/test/CMultiFileSearcher.h @@ -12,11 +12,8 @@ #include - -namespace ml -{ -namespace test -{ +namespace ml { +namespace test { //! \brief //! Retrieves data previously persisted to file. @@ -33,36 +30,30 @@ namespace test //! class. For data persisted by the CSingleStreamDataAdder class, use the //! CSingleStreamSearcher. //! -class TEST_EXPORT CMultiFileSearcher : public core::CDataSearcher -{ - public: - //! File extension for persisted files. - static const std::string JSON_FILE_EXT; +class TEST_EXPORT CMultiFileSearcher : public core::CDataSearcher { +public: + //! File extension for persisted files. + static const std::string JSON_FILE_EXT; - public: - //! Constructor uses the pass-by-value-and-move idiom - CMultiFileSearcher(std::string baseFilename, - std::string baseDocId, - std::string fileExtension = JSON_FILE_EXT); +public: + //! Constructor uses the pass-by-value-and-move idiom + CMultiFileSearcher(std::string baseFilename, std::string baseDocId, std::string fileExtension = JSON_FILE_EXT); - //! Load the file - //! \return Pointer to the input stream - may be NULL - virtual TIStreamP search(size_t currentDocNum, size_t limit); + //! Load the file + //! \return Pointer to the input stream - may be NULL + virtual TIStreamP search(size_t currentDocNum, size_t limit); - private: - //! Name of the file to serialise models to - std::string m_BaseFilename; +private: + //! Name of the file to serialise models to + std::string m_BaseFilename; - //! Base ID for stored documents - std::string m_BaseDocId; + //! Base ID for stored documents + std::string m_BaseDocId; - //! The extension for the peristed files - std::string m_FileExtension; + //! The extension for the peristed files + std::string m_FileExtension; }; - - } } #endif // INCLUDED_ml_test_CMultiFileSearcher_h - diff --git a/include/test/CRandomNumbers.h b/include/test/CRandomNumbers.h index cc1de472af..0008d491fa 100644 --- a/include/test/CRandomNumbers.h +++ b/include/test/CRandomNumbers.h @@ -17,165 +17,131 @@ #include -namespace ml -{ -namespace test -{ +namespace ml { +namespace test { //! \brief Creates random numbers from a variety of distributions. -class TEST_EXPORT CRandomNumbers -{ +class TEST_EXPORT CRandomNumbers { +public: + using TDoubleVec = std::vector; + using TDoubleVecVec = std::vector; + using TUIntVec = std::vector; + using TSizeVec = std::vector; + using TStrVec = std::vector; + using TGenerator = maths::CPRNG::CXorShift1024Mult; + using TGeneratorPtr = boost::shared_ptr; + +public: + //! A uniform generator on the interval [a,b]. + class TEST_EXPORT CUniform0nGenerator { public: - using TDoubleVec = std::vector; - using TDoubleVecVec = std::vector; - using TUIntVec = std::vector; - using TSizeVec = std::vector; - using TStrVec = std::vector; - using TGenerator = maths::CPRNG::CXorShift1024Mult; - using TGeneratorPtr = boost::shared_ptr; + CUniform0nGenerator(const TGenerator& generator); - public: - //! A uniform generator on the interval [a,b]. - class TEST_EXPORT CUniform0nGenerator - { - public: - CUniform0nGenerator(const TGenerator &generator); - - std::size_t operator()(std::size_t n) const; - - private: - TGeneratorPtr m_Generator; - }; - - public: - //! \brief Generate random samples from the specified distribution - //! using a custom random number generator. - template - static void generateSamples(RNG &randomNumberGenerator, - const Distribution &distribution, - std::size_t numberSamples, - Container &samples); - - //! Shuffle the elements of a sequence using a random number generator. - //! - //! Reorders the elements in the range \p [first,last) using the - //! internal random number generator to provide a random distribution. - //! - //! \note We provide our own implementation of std::random_shuffle - //! based on the libc++ implementation because this is different from - //! the libstdc++ implementation which can cause platform specific test - //! failures. - template - void random_shuffle(ITR first, ITR last); - - //! Generate normal random samples with the specified mean and - //! variance using the default random number generator. - void generateNormalSamples(double mean, - double variance, - std::size_t numberSamples, - TDoubleVec &samples); - - //! Generate multivariate normal random samples with the specified - //! mean and covariance matrix the default random number generator. - void generateMultivariateNormalSamples(const TDoubleVec &mean, - const TDoubleVecVec &covariances, - std::size_t numberSamples, - TDoubleVecVec &samples); - - //! Generate Poisson random samples with the specified rate using - //! the default random number generator. - void generatePoissonSamples(double rate, - std::size_t numberSamples, - TUIntVec &samples); - - //! Generate Student's t random samples with the specified degrees - //! freedom using the default random number generator. - void generateStudentsSamples(double degreesFreedom, - std::size_t numberSamples, - TDoubleVec &samples); - - //! Generate log-normal random samples with the specified location - //! and scale using the default random number generator. - void generateLogNormalSamples(double location, - double squareScale, - std::size_t numberSamples, - TDoubleVec &samples); - - //! Generate uniform random samples in the interval [a,b) using - //! the default random number generator. - void generateUniformSamples(double a, - double b, - std::size_t numberSamples, - TDoubleVec &samples); - - //! Generate uniform integer samples from the the set [a, a+1, ..., b) - //! using the default random number generator. - void generateUniformSamples(std::size_t a, - std::size_t b, - std::size_t numberSamples, - TSizeVec &samples); - - //! Generate gamma random samples with the specified shape and rate - //! using the default random number generator. - void generateGammaSamples(double shape, - double scale, - std::size_t numberSamples, - TDoubleVec &samples); - - //! Generate multinomial random samples on the specified categories - //! using the default random number generator. - void generateMultinomialSamples(const TDoubleVec &categories, - const TDoubleVec &probabilities, - std::size_t numberSamples, - TDoubleVec &samples); - - //! Generate random samples from a Diriclet distribution with - //! concentration parameters \p concentrations. - void generateDirichletSamples(const TDoubleVec &concentrations, - std::size_t numberSamples, - TDoubleVecVec &samples); - - //! Generate a collection of random words of specified length using - //! the default random number generator. - void generateWords(std::size_t length, - std::size_t numberSamples, - TStrVec &samples); - - //! Generate a collection of |\p sizes| random mean vectors and - //! covariance matrices and a collection of samples from those - //! distributions. - //! - //! \param[in] sizes The number of points to generate from each - //! cluster. - //! \param[out] means Filled in with the distribution mean for - //! each cluster. - //! \param[out] covariances Filled in with the distribution covariance - //! matrix for each cluster. - //! \param[out] points Filled in with the samples from each cluster. - template - void generateRandomMultivariateNormals(const TSizeVec &sizes, - std::vector > &means, - std::vector > &covariances, - std::vector > > &points); - - //! Get a uniform generator in the range [0, n). This can be used - //! in conjunction with std::random_shuffle if you want a seeded - //! platform independent implementation. - CUniform0nGenerator uniformGenerator(); - - //! Throw away \p n random numbers. - void discard(std::size_t n); + std::size_t operator()(std::size_t n) const; private: - //! The random number generator. - TGenerator m_Generator; + TGeneratorPtr m_Generator; + }; + +public: + //! \brief Generate random samples from the specified distribution + //! using a custom random number generator. + template + static void + generateSamples(RNG& randomNumberGenerator, const Distribution& distribution, std::size_t numberSamples, Container& samples); + + //! Shuffle the elements of a sequence using a random number generator. + //! + //! Reorders the elements in the range \p [first,last) using the + //! internal random number generator to provide a random distribution. + //! + //! \note We provide our own implementation of std::random_shuffle + //! based on the libc++ implementation because this is different from + //! the libstdc++ implementation which can cause platform specific test + //! failures. + template + void random_shuffle(ITR first, ITR last); + + //! Generate normal random samples with the specified mean and + //! variance using the default random number generator. + void generateNormalSamples(double mean, double variance, std::size_t numberSamples, TDoubleVec& samples); + + //! Generate multivariate normal random samples with the specified + //! mean and covariance matrix the default random number generator. + void generateMultivariateNormalSamples(const TDoubleVec& mean, + const TDoubleVecVec& covariances, + std::size_t numberSamples, + TDoubleVecVec& samples); + + //! Generate Poisson random samples with the specified rate using + //! the default random number generator. + void generatePoissonSamples(double rate, std::size_t numberSamples, TUIntVec& samples); + + //! Generate Student's t random samples with the specified degrees + //! freedom using the default random number generator. + void generateStudentsSamples(double degreesFreedom, std::size_t numberSamples, TDoubleVec& samples); + + //! Generate log-normal random samples with the specified location + //! and scale using the default random number generator. + void generateLogNormalSamples(double location, double squareScale, std::size_t numberSamples, TDoubleVec& samples); + + //! Generate uniform random samples in the interval [a,b) using + //! the default random number generator. + void generateUniformSamples(double a, double b, std::size_t numberSamples, TDoubleVec& samples); + + //! Generate uniform integer samples from the the set [a, a+1, ..., b) + //! using the default random number generator. + void generateUniformSamples(std::size_t a, std::size_t b, std::size_t numberSamples, TSizeVec& samples); + + //! Generate gamma random samples with the specified shape and rate + //! using the default random number generator. + void generateGammaSamples(double shape, double scale, std::size_t numberSamples, TDoubleVec& samples); + + //! Generate multinomial random samples on the specified categories + //! using the default random number generator. + void generateMultinomialSamples(const TDoubleVec& categories, + const TDoubleVec& probabilities, + std::size_t numberSamples, + TDoubleVec& samples); + + //! Generate random samples from a Diriclet distribution with + //! concentration parameters \p concentrations. + void generateDirichletSamples(const TDoubleVec& concentrations, std::size_t numberSamples, TDoubleVecVec& samples); + + //! Generate a collection of random words of specified length using + //! the default random number generator. + void generateWords(std::size_t length, std::size_t numberSamples, TStrVec& samples); + + //! Generate a collection of |\p sizes| random mean vectors and + //! covariance matrices and a collection of samples from those + //! distributions. + //! + //! \param[in] sizes The number of points to generate from each + //! cluster. + //! \param[out] means Filled in with the distribution mean for + //! each cluster. + //! \param[out] covariances Filled in with the distribution covariance + //! matrix for each cluster. + //! \param[out] points Filled in with the samples from each cluster. + template + void generateRandomMultivariateNormals(const TSizeVec& sizes, + std::vector>& means, + std::vector>& covariances, + std::vector>>& points); + + //! Get a uniform generator in the range [0, n). This can be used + //! in conjunction with std::random_shuffle if you want a seeded + //! platform independent implementation. + CUniform0nGenerator uniformGenerator(); + + //! Throw away \p n random numbers. + void discard(std::size_t n); + +private: + //! The random number generator. + TGenerator m_Generator; }; - - } } #endif // INCLUDED_ml_test_CRandomNumbers_h - diff --git a/include/test/CRandomNumbersDetail.h b/include/test/CRandomNumbersDetail.h index 5da7f45822..437dafe96b 100644 --- a/include/test/CRandomNumbersDetail.h +++ b/include/test/CRandomNumbersDetail.h @@ -21,38 +21,27 @@ #include #include -namespace ml -{ -namespace test -{ - -template -void CRandomNumbers::generateSamples(RNG &randomNumberGenerator, - const Distribution &distribution, +namespace ml { +namespace test { + +template +void CRandomNumbers::generateSamples(RNG& randomNumberGenerator, + const Distribution& distribution, std::size_t numberSamples, - Container &samples) -{ + Container& samples) { samples.clear(); samples.reserve(numberSamples); - std::generate_n(std::back_inserter(samples), - numberSamples, - boost::bind(distribution, boost::ref(randomNumberGenerator))); + std::generate_n(std::back_inserter(samples), numberSamples, boost::bind(distribution, boost::ref(randomNumberGenerator))); } template -void CRandomNumbers::random_shuffle(ITR first, ITR last) -{ +void CRandomNumbers::random_shuffle(ITR first, ITR last) { CUniform0nGenerator rand(m_Generator); auto d = last - first; - if (d > 1) - { - for (--last; first < last; ++first, --d) - { + if (d > 1) { + for (--last; first < last; ++first, --d) { auto i = rand(d); - if (i > 0) - { + if (i > 0) { std::iter_swap(first, first + i); } } @@ -60,11 +49,10 @@ void CRandomNumbers::random_shuffle(ITR first, ITR last) } template -void CRandomNumbers::generateRandomMultivariateNormals(const TSizeVec &sizes, - std::vector > &means, - std::vector > &covariances, - std::vector > > &points) -{ +void CRandomNumbers::generateRandomMultivariateNormals(const TSizeVec& sizes, + std::vector>& means, + std::vector>& covariances, + std::vector>>& points) { means.clear(); covariances.clear(); points.clear(); @@ -73,20 +61,17 @@ void CRandomNumbers::generateRandomMultivariateNormals(const TSizeVec &sizes, TDoubleVec means_; this->generateUniformSamples(-100.0, 100.0, N * k, means_); - for (std::size_t i = 0; i < N * k; i += N) - { + for (std::size_t i = 0; i < N * k; i += N) { maths::CVectorNx1 mean(&means_[i], &means_[i + N]); means.push_back(mean); } TDoubleVec variances; this->generateUniformSamples(10.0, 100.0, N * k, variances); - for (std::size_t i = 0; i < k; ++i) - { + for (std::size_t i = 0; i < k; ++i) { Eigen::Matrix covariance = Eigen::Matrix::Zero(); - for (std::size_t j = 0u; j < N; ++j) - { + for (std::size_t j = 0u; j < N; ++j) { covariance(j, j) = variances[i * N + j]; } @@ -94,23 +79,21 @@ void CRandomNumbers::generateRandomMultivariateNormals(const TSizeVec &sizes, TSizeVec coordinates; this->generateUniformSamples(0, N, 4, coordinates); std::sort(coordinates.begin(), coordinates.end()); - coordinates.erase(std::unique(coordinates.begin(), - coordinates.end()), coordinates.end()); + coordinates.erase(std::unique(coordinates.begin(), coordinates.end()), coordinates.end()); TDoubleVec thetas; this->generateUniformSamples(0.0, boost::math::constants::two_pi(), 2, thetas); Eigen::Matrix rotation = Eigen::Matrix::Identity(); - for (std::size_t j = 1u; j < coordinates.size(); j += 2) - { - double ct = std::cos(thetas[j/2]); - double st = std::sin(thetas[j/2]); + for (std::size_t j = 1u; j < coordinates.size(); j += 2) { + double ct = std::cos(thetas[j / 2]); + double st = std::sin(thetas[j / 2]); Eigen::Matrix r = Eigen::Matrix::Identity(); - r(coordinates[j/2], coordinates[j/2]) = ct; - r(coordinates[j/2], coordinates[j/2+1]) = -st; - r(coordinates[j/2+1], coordinates[j/2]) = st; - r(coordinates[j/2+1], coordinates[j/2+1]) = ct; + r(coordinates[j / 2], coordinates[j / 2]) = ct; + r(coordinates[j / 2], coordinates[j / 2 + 1]) = -st; + r(coordinates[j / 2 + 1], coordinates[j / 2]) = st; + r(coordinates[j / 2 + 1], coordinates[j / 2 + 1]) = ct; rotation *= r; } covariance = rotation.transpose() * covariance * rotation; @@ -120,20 +103,16 @@ void CRandomNumbers::generateRandomMultivariateNormals(const TSizeVec &sizes, points.resize(k); TDoubleVecVec pointsi; - for (std::size_t i = 0u; i < k; ++i) - { + for (std::size_t i = 0u; i < k; ++i) { LOG_TRACE("mean = " << means[i]); LOG_TRACE("covariance = " << covariances[i]); - this->generateMultivariateNormalSamples(means[i].template toVector(), - covariances[i].template toVectors(), - sizes[i], pointsi); - for (std::size_t j = 0u; j < pointsi.size(); ++j) - { + this->generateMultivariateNormalSamples( + means[i].template toVector(), covariances[i].template toVectors(), sizes[i], pointsi); + for (std::size_t j = 0u; j < pointsi.size(); ++j) { points[i].emplace_back(pointsi[j]); } } } - } } diff --git a/include/test/CShellCmdEscape.h b/include/test/CShellCmdEscape.h index 0819a941c0..23c304bc19 100644 --- a/include/test/CShellCmdEscape.h +++ b/include/test/CShellCmdEscape.h @@ -12,12 +12,8 @@ #include - -namespace ml -{ -namespace test -{ - +namespace ml { +namespace test { //! \brief //! Escape special characters in a shell command @@ -29,17 +25,13 @@ namespace test //! On Unix characters are escaped for sh/ksh/bash. On Windows, //! for cmd.exe. //! -class TEST_EXPORT CShellCmdEscape : private core::CNonInstantiatable -{ - public: - //! Modifies the command such that special characters are appropriately - //! escaped - static void escapeCmd(std::string &cmd); +class TEST_EXPORT CShellCmdEscape : private core::CNonInstantiatable { +public: + //! Modifies the command such that special characters are appropriately + //! escaped + static void escapeCmd(std::string& cmd); }; - - } } #endif // INCLUDED_ml_test_CShellCmdEscape_h - diff --git a/include/test/CTestRunner.h b/include/test/CTestRunner.h index 531ad36e48..4929edafb9 100644 --- a/include/test/CTestRunner.h +++ b/include/test/CTestRunner.h @@ -13,11 +13,8 @@ #include #include - -namespace ml -{ -namespace test -{ +namespace ml { +namespace test { //! \brief //! A class to wrap cppunit tests. @@ -71,53 +68,46 @@ namespace test //! for interactive development where changes to the code are likely to //! alter the test results. //! -class TEST_EXPORT CTestRunner : public CppUnit::TextTestRunner -{ - public: - //! Name of a file storing directories in which tests should be skipped - //! together with the previous test result - static const std::string SKIP_FILE_NAME; +class TEST_EXPORT CTestRunner : public CppUnit::TextTestRunner { +public: + //! Name of a file storing directories in which tests should be skipped + //! together with the previous test result + static const std::string SKIP_FILE_NAME; - //! Name of file storing results in XML format (to allow display by a - //! continuous integration system) - static const std::string XML_RESULT_FILE_NAME; + //! Name of file storing results in XML format (to allow display by a + //! continuous integration system) + static const std::string XML_RESULT_FILE_NAME; - public: - CTestRunner(int argc, const char **argv); - virtual ~CTestRunner(); +public: + CTestRunner(int argc, const char** argv); + virtual ~CTestRunner(); - //! The command to run tests - DO NOT CALL run() - virtual bool runTests(); + //! The command to run tests - DO NOT CALL run() + virtual bool runTests(); - protected: - //! Time the unit tests - bool timeTests(const std::string &topPath, - const std::string &testPath); +protected: + //! Time the unit tests + bool timeTests(const std::string& topPath, const std::string& testPath); - //! Is the current directory listed in the skip file? If so, did the - //! previously run tests pass? - bool checkSkipFile(const std::string &cwd, - bool &passed) const; + //! Is the current directory listed in the skip file? If so, did the + //! previously run tests pass? + bool checkSkipFile(const std::string& cwd, bool& passed) const; - //! Add the current directory to the skip file (if it exists) so that - //! tests for the same directory aren't re-run. - bool updateSkipFile(const std::string &cwd, - bool passed) const; + //! Add the current directory to the skip file (if it exists) so that + //! tests for the same directory aren't re-run. + bool updateSkipFile(const std::string& cwd, bool passed) const; - private: - void processCmdLine(int argc, const char **argv); +private: + void processCmdLine(int argc, const char** argv); - private: - using TStrVec = std::vector; - using TStrVecItr = TStrVec::iterator; +private: + using TStrVec = std::vector; + using TStrVecItr = TStrVec::iterator; - TStrVec m_TestCases; - std::string m_ExeName; + TStrVec m_TestCases; + std::string m_ExeName; }; - - } } #endif // INCLUDED_ml_test_CTestRunner_h - diff --git a/include/test/CTestTimer.h b/include/test/CTestTimer.h index a1b8dd4304..f93e7b5505 100644 --- a/include/test/CTestTimer.h +++ b/include/test/CTestTimer.h @@ -17,11 +17,8 @@ #include - -namespace ml -{ -namespace test -{ +namespace ml { +namespace test { //! \brief //! A class to time cppunit tests. @@ -39,38 +36,34 @@ namespace test //! is added to cppunit, then this class will need to be //! rewritten. //! -class TEST_EXPORT CTestTimer : public CppUnit::TestListener -{ - public: - //! Called at the start of each test - virtual void startTest(CppUnit::Test *test); +class TEST_EXPORT CTestTimer : public CppUnit::TestListener { +public: + //! Called at the start of each test + virtual void startTest(CppUnit::Test* test); - //! Called at the end of each test - virtual void endTest(CppUnit::Test *test); + //! Called at the end of each test + virtual void endTest(CppUnit::Test* test); - //! Get the time taken for a given test - uint64_t timeForTest(const std::string &testName) const; + //! Get the time taken for a given test + uint64_t timeForTest(const std::string& testName) const; - //! Get the total time taken for all tests - uint64_t totalTime() const; + //! Get the total time taken for all tests + uint64_t totalTime() const; - //! Get the average time taken for the tests - uint64_t averageTime() const; + //! Get the average time taken for the tests + uint64_t averageTime() const; - private: - //! Used to time each test - core::CStopWatch m_StopWatch; +private: + //! Used to time each test + core::CStopWatch m_StopWatch; - using TStrUInt64Map = std::map; - using TStrUInt64MapCItr = TStrUInt64Map::const_iterator; + using TStrUInt64Map = std::map; + using TStrUInt64MapCItr = TStrUInt64Map::const_iterator; - //! Map of test name to time taken (in ms) - TStrUInt64Map m_TestTimes; + //! Map of test name to time taken (in ms) + TStrUInt64Map m_TestTimes; }; - - } } #endif // INCLUDED_ml_test_CTestTimer_h - diff --git a/include/test/CTestTmpDir.h b/include/test/CTestTmpDir.h index cd82646061..b4aff8bc15 100644 --- a/include/test/CTestTmpDir.h +++ b/include/test/CTestTmpDir.h @@ -12,12 +12,8 @@ #include - -namespace ml -{ -namespace test -{ - +namespace ml { +namespace test { //! \brief //! Return the name of the temporary directory for the system. @@ -29,16 +25,12 @@ namespace test //! On Unix the temporary directory is /tmp. On Windows it's a //! sub-directory of the current user's home directory. //! -class TEST_EXPORT CTestTmpDir : private core::CNonInstantiatable -{ - public: - //! Returns /tmp on Unix or an expansion of %TEMP% on Windows - static std::string tmpDir(); +class TEST_EXPORT CTestTmpDir : private core::CNonInstantiatable { +public: + //! Returns /tmp on Unix or an expansion of %TEMP% on Windows + static std::string tmpDir(); }; - - } } #endif // INCLUDED_ml_test_CTestTmpDir_h - diff --git a/include/test/CTimeSeriesTestData.h b/include/test/CTimeSeriesTestData.h index 8e0d363b42..74efb355fe 100644 --- a/include/test/CTimeSeriesTestData.h +++ b/include/test/CTimeSeriesTestData.h @@ -14,122 +14,109 @@ #include #include - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CRegex; } -namespace test -{ - -class TEST_EXPORT CTimeSeriesTestData -{ - public: - using TDoubleVec = std::vector; - using TDoubleVecItr = TDoubleVec::iterator; - using TTimeDoublePr = std::pair; - using TTimeDoublePrVec = std::vector; - using TTimeDoublePrVecItr = TTimeDoublePrVec::iterator; - using TTimeDoublePrVecRItr = TTimeDoublePrVec::reverse_iterator; - using TTimeDoublePrVecCItr = TTimeDoublePrVec::const_iterator; - using TTimeDoubleVecPr = std::pair; - using TTimeDoubleVecPrVec = std::vector; - - public: - //! The default regular expression to extract the date - //! time and value. - static const std::string DEFAULT_REGEX; - //! The default regular expression to extract the date - //! time and bivalued quantities. - static const std::string DEFAULT_BIVALUED_REGEX; - //! Empty these don't use strptime. - static const std::string DEFAULT_DATE_FORMAT; - //! A regular expression suitable for csv with unix time. - static const std::string CSV_UNIX_REGEX; - //! A regular expression suitable for bivalued quantities - //! for csv fields with unix time. - static const std::string CSV_UNIX_BIVALUED_REGEX; - //! Empty these don't use strptime. - static const std::string CSV_UNIX_DATE_FORMAT; - //! A regular expression suitable for csv ISO8601 date & time format. - static const std::string CSV_ISO8601_REGEX; - //! A regular expression suitable for bivalued quantities - //! for csv ISO8601 date & time format. - static const std::string CSV_ISO8601_BIVALUED_REGEX; - //! The date format for csv ISO8601 date & time format. - static const std::string CSV_ISO8601_DATE_FORMAT; - - public: - //! Initialise from a text file - static bool parse(const std::string &fileName, - TTimeDoublePrVec &results, - const std::string ®ex = DEFAULT_REGEX, - const std::string &dateFormat = DEFAULT_DATE_FORMAT); - - //! Initialise from a text file (return min/max time) - static bool parse(const std::string &fileName, - TTimeDoublePrVec &results, - core_t::TTime &minTime, - core_t::TTime &maxTime, - const std::string ®ex = DEFAULT_REGEX, - const std::string &dateFormat = DEFAULT_DATE_FORMAT); - - //! Initialise multivalued from a text file - static bool parse(const std::string &fileName, - TTimeDoubleVecPrVec &results, - const std::string ®ex = DEFAULT_BIVALUED_REGEX, - const std::string &dateFormat = DEFAULT_DATE_FORMAT); - - //! Initialise multivalued from a text file (return min/max time) - static bool parse(const std::string &fileName, - TTimeDoubleVecPrVec &results, - core_t::TTime &minTime, - core_t::TTime &maxTime, - const std::string ®ex = DEFAULT_BIVALUED_REGEX, - const std::string &dateFormat = DEFAULT_DATE_FORMAT); - - //! Initialise from a text file and parse to counter - static bool parseCounter(const std::string &fileName, - TTimeDoublePrVec &results); - - //! Transform results just to 'value' - static void transform(const TTimeDoublePrVec &data, - TDoubleVec &results); - - //! 1st derivative - static void derive(const TTimeDoublePrVec &data, - TTimeDoublePrVec &results); - - //! Pad a vector from minTime to maxTime with zeros - static bool pad(const TTimeDoublePrVec &data, - core_t::TTime minTime, - core_t::TTime maxTime, - TTimeDoublePrVec &results); - - private: - template - static bool parse(const std::string &fileName, - const std::string ®ex, - const std::string &dateFormat, - std::vector > &results, - core_t::TTime &minTime, - core_t::TTime &maxTime); - - template - static bool parseLine(const core::CRegex &tokenRegex, - const std::string &dateFormat, - const std::string &line, - std::vector > &results); - - //! Prevent construction of this static class - CTimeSeriesTestData(); - CTimeSeriesTestData(const CTimeSeriesTestData &); +namespace test { + +class TEST_EXPORT CTimeSeriesTestData { +public: + using TDoubleVec = std::vector; + using TDoubleVecItr = TDoubleVec::iterator; + using TTimeDoublePr = std::pair; + using TTimeDoublePrVec = std::vector; + using TTimeDoublePrVecItr = TTimeDoublePrVec::iterator; + using TTimeDoublePrVecRItr = TTimeDoublePrVec::reverse_iterator; + using TTimeDoublePrVecCItr = TTimeDoublePrVec::const_iterator; + using TTimeDoubleVecPr = std::pair; + using TTimeDoubleVecPrVec = std::vector; + +public: + //! The default regular expression to extract the date + //! time and value. + static const std::string DEFAULT_REGEX; + //! The default regular expression to extract the date + //! time and bivalued quantities. + static const std::string DEFAULT_BIVALUED_REGEX; + //! Empty these don't use strptime. + static const std::string DEFAULT_DATE_FORMAT; + //! A regular expression suitable for csv with unix time. + static const std::string CSV_UNIX_REGEX; + //! A regular expression suitable for bivalued quantities + //! for csv fields with unix time. + static const std::string CSV_UNIX_BIVALUED_REGEX; + //! Empty these don't use strptime. + static const std::string CSV_UNIX_DATE_FORMAT; + //! A regular expression suitable for csv ISO8601 date & time format. + static const std::string CSV_ISO8601_REGEX; + //! A regular expression suitable for bivalued quantities + //! for csv ISO8601 date & time format. + static const std::string CSV_ISO8601_BIVALUED_REGEX; + //! The date format for csv ISO8601 date & time format. + static const std::string CSV_ISO8601_DATE_FORMAT; + +public: + //! Initialise from a text file + static bool parse(const std::string& fileName, + TTimeDoublePrVec& results, + const std::string& regex = DEFAULT_REGEX, + const std::string& dateFormat = DEFAULT_DATE_FORMAT); + + //! Initialise from a text file (return min/max time) + static bool parse(const std::string& fileName, + TTimeDoublePrVec& results, + core_t::TTime& minTime, + core_t::TTime& maxTime, + const std::string& regex = DEFAULT_REGEX, + const std::string& dateFormat = DEFAULT_DATE_FORMAT); + + //! Initialise multivalued from a text file + static bool parse(const std::string& fileName, + TTimeDoubleVecPrVec& results, + const std::string& regex = DEFAULT_BIVALUED_REGEX, + const std::string& dateFormat = DEFAULT_DATE_FORMAT); + + //! Initialise multivalued from a text file (return min/max time) + static bool parse(const std::string& fileName, + TTimeDoubleVecPrVec& results, + core_t::TTime& minTime, + core_t::TTime& maxTime, + const std::string& regex = DEFAULT_BIVALUED_REGEX, + const std::string& dateFormat = DEFAULT_DATE_FORMAT); + + //! Initialise from a text file and parse to counter + static bool parseCounter(const std::string& fileName, TTimeDoublePrVec& results); + + //! Transform results just to 'value' + static void transform(const TTimeDoublePrVec& data, TDoubleVec& results); + + //! 1st derivative + static void derive(const TTimeDoublePrVec& data, TTimeDoublePrVec& results); + + //! Pad a vector from minTime to maxTime with zeros + static bool pad(const TTimeDoublePrVec& data, core_t::TTime minTime, core_t::TTime maxTime, TTimeDoublePrVec& results); + +private: + template + static bool parse(const std::string& fileName, + const std::string& regex, + const std::string& dateFormat, + std::vector>& results, + core_t::TTime& minTime, + core_t::TTime& maxTime); + + template + static bool parseLine(const core::CRegex& tokenRegex, + const std::string& dateFormat, + const std::string& line, + std::vector>& results); + + //! Prevent construction of this static class + CTimeSeriesTestData(); + CTimeSeriesTestData(const CTimeSeriesTestData&); }; - } } #endif // INCLUDED_ml_test_CTimeSeriesTestData_h - diff --git a/include/test/CTimingXmlOutputterHook.h b/include/test/CTimingXmlOutputterHook.h index 3e61f55606..a7681f10a9 100644 --- a/include/test/CTimingXmlOutputterHook.h +++ b/include/test/CTimingXmlOutputterHook.h @@ -14,11 +14,8 @@ #include - -namespace ml -{ -namespace test -{ +namespace ml { +namespace test { class CTestTimer; //! \brief @@ -33,43 +30,32 @@ class CTestTimer; //! because the xUnit Jenkins plugin's default XSL transform //! knows this format. //! -class TEST_EXPORT CTimingXmlOutputterHook : public CppUnit::XmlOutputterHook -{ - public: - CTimingXmlOutputterHook(const CTestTimer &testTimer, - const std::string &topPath, - const std::string &testPath); +class TEST_EXPORT CTimingXmlOutputterHook : public CppUnit::XmlOutputterHook { +public: + CTimingXmlOutputterHook(const CTestTimer& testTimer, const std::string& topPath, const std::string& testPath); - virtual void failTestAdded(CppUnit::XmlDocument *document, - CppUnit::XmlElement *testElement, - CppUnit::Test *test, - CppUnit::TestFailure *failure); + virtual void + failTestAdded(CppUnit::XmlDocument* document, CppUnit::XmlElement* testElement, CppUnit::Test* test, CppUnit::TestFailure* failure); - virtual void successfulTestAdded(CppUnit::XmlDocument *document, - CppUnit::XmlElement *testElement, - CppUnit::Test *test); + virtual void successfulTestAdded(CppUnit::XmlDocument* document, CppUnit::XmlElement* testElement, CppUnit::Test* test); - virtual void statisticsAdded(CppUnit::XmlDocument *document, - CppUnit::XmlElement *statisticsElement); + virtual void statisticsAdded(CppUnit::XmlDocument* document, CppUnit::XmlElement* statisticsElement); - private: - //! Convert a time in ms to a time in seconds in string form - static std::string toSecondsStr(uint64_t ms); +private: + //! Convert a time in ms to a time in seconds in string form + static std::string toSecondsStr(uint64_t ms); - private: - //! Reference to test timer that we can query - const CTestTimer &m_TestTimer; +private: + //! Reference to test timer that we can query + const CTestTimer& m_TestTimer; - //! "bin" or "lib", to make the Jenkins output nicer - const std::string &m_TopPath; + //! "bin" or "lib", to make the Jenkins output nicer + const std::string& m_TopPath; - //! Name of the directory above the "unittest" directory being tested - const std::string &m_TestPath; + //! Name of the directory above the "unittest" directory being tested + const std::string& m_TestPath; }; - - } } #endif // INCLUDED_ml_test_CTimingXmlOutputterHook_h - diff --git a/include/test/ImportExport.h b/include/test/ImportExport.h index d7b87914b1..7220ebffb5 100644 --- a/include/test/ImportExport.h +++ b/include/test/ImportExport.h @@ -36,4 +36,3 @@ #endif #endif // INCLUDED_ml_test_ImportExport_h - diff --git a/include/ver/CBuildInfo.h b/include/ver/CBuildInfo.h index 349ea20e67..ebf9c8528e 100644 --- a/include/ver/CBuildInfo.h +++ b/include/ver/CBuildInfo.h @@ -10,12 +10,8 @@ #include - -namespace ml -{ -namespace ver -{ - +namespace ml { +namespace ver { //! \brief //! Wrapper for version/build numbers @@ -40,31 +36,27 @@ namespace ver //! have been mixed up - each program will have its own distinct copy of the //! version library embedded in it. //! -class CBuildInfo : private core::CNonInstantiatable -{ - public: - //! Get the version number to be printed out - static const std::string &versionNumber(); +class CBuildInfo : private core::CNonInstantiatable { +public: + //! Get the version number to be printed out + static const std::string& versionNumber(); - //! Get the build number to be printed out - static const std::string &buildNumber(); + //! Get the build number to be printed out + static const std::string& buildNumber(); - //! Get the copyright message to be printed out - static const std::string ©right(); + //! Get the copyright message to be printed out + static const std::string& copyright(); - //! Get the full information to be printed out (this includes the name - //! of the program, plus the version number, build number and copyright) - static std::string fullInfo(); + //! Get the full information to be printed out (this includes the name + //! of the program, plus the version number, build number and copyright) + static std::string fullInfo(); - private: - static const std::string VERSION_NUMBER; - static const std::string BUILD_NUMBER; - static const std::string COPYRIGHT; +private: + static const std::string VERSION_NUMBER; + static const std::string BUILD_NUMBER; + static const std::string COPYRIGHT; }; - - } } #endif // INCLUDED_ml_core_CBuildInfo_h - diff --git a/lib/api/CAnomalyJob.cc b/lib/api/CAnomalyJob.cc index 8eeac44c31..118a678bd8 100644 --- a/lib/api/CAnomalyJob.cc +++ b/lib/api/CAnomalyJob.cc @@ -8,16 +8,16 @@ #include #include #include -#include #include #include -#include +#include #include #include #include #include #include #include +#include #include #include @@ -42,27 +42,23 @@ #include #include -#include #include +#include #include #include #include -namespace ml -{ -namespace api -{ +namespace ml { +namespace api { // We use short field names to reduce the state size -namespace -{ +namespace { using TStrCRef = boost::reference_wrapper; //! Convert a (string, key) pair to something readable. template -inline std::string pairDebug(const T &t) -{ +inline std::string pairDebug(const T& t) { return boost::unwrap_ref(t.second).debug() + '/' + boost::unwrap_ref(t.first); } @@ -91,17 +87,16 @@ const std::string CAnomalyJob::EMPTY_STRING; const CAnomalyJob::TAnomalyDetectorPtr CAnomalyJob::NULL_DETECTOR; - -CAnomalyJob::CAnomalyJob(const std::string &jobId, - model::CLimits &limits, - CFieldConfig &fieldConfig, - model::CAnomalyDetectorModelConfig &modelConfig, - core::CJsonOutputStreamWrapper &outputStream, - const TPersistCompleteFunc &persistCompleteFunc, - CBackgroundPersister *periodicPersister, +CAnomalyJob::CAnomalyJob(const std::string& jobId, + model::CLimits& limits, + CFieldConfig& fieldConfig, + model::CAnomalyDetectorModelConfig& modelConfig, + core::CJsonOutputStreamWrapper& outputStream, + const TPersistCompleteFunc& persistCompleteFunc, + CBackgroundPersister* periodicPersister, core_t::TTime maxQuantileInterval, - const std::string &timeFieldName, - const std::string &timeFieldFormat, + const std::string& timeFieldName, + const std::string& timeFieldFormat, size_t maxAnomalyRecords) : m_JobId(jobId), m_Limits(limits), @@ -123,71 +118,53 @@ CAnomalyJob::CAnomalyJob(const std::string &jobId, m_LastResultsTime(0), m_Aggregator(modelConfig), m_Normalizer(modelConfig), - m_ResultsQueue(m_ModelConfig.bucketResultsDelay(), - this->effectiveBucketLength()), - m_ModelPlotQueue(m_ModelConfig.bucketResultsDelay(), - this->effectiveBucketLength(), - 0) -{ + m_ResultsQueue(m_ModelConfig.bucketResultsDelay(), this->effectiveBucketLength()), + m_ModelPlotQueue(m_ModelConfig.bucketResultsDelay(), this->effectiveBucketLength(), 0) { m_JsonOutputWriter.limitNumberRecords(maxAnomalyRecords); - m_Limits.resourceMonitor().memoryUsageReporter( - boost::bind(&CJsonOutputWriter::reportMemoryUsage, &m_JsonOutputWriter, _1)); + m_Limits.resourceMonitor().memoryUsageReporter(boost::bind(&CJsonOutputWriter::reportMemoryUsage, &m_JsonOutputWriter, _1)); } -CAnomalyJob::~CAnomalyJob() -{ +CAnomalyJob::~CAnomalyJob() { m_ForecastRunner.finishForecasts(); } -void CAnomalyJob::newOutputStream() -{ +void CAnomalyJob::newOutputStream() { m_JsonOutputWriter.newOutputStream(); } -COutputHandler &CAnomalyJob::outputHandler() -{ +COutputHandler& CAnomalyJob::outputHandler() { return m_JsonOutputWriter; } -bool CAnomalyJob::handleRecord(const TStrStrUMap &dataRowFields) -{ +bool CAnomalyJob::handleRecord(const TStrStrUMap& dataRowFields) { // Non-empty control fields take precedence over everything else TStrStrUMapCItr iter = dataRowFields.find(CONTROL_FIELD_NAME); - if (iter != dataRowFields.end() && !iter->second.empty()) - { + if (iter != dataRowFields.end() && !iter->second.empty()) { return this->handleControlMessage(iter->second); } core_t::TTime time(0); iter = dataRowFields.find(m_TimeFieldName); - if (iter == dataRowFields.end()) - { + if (iter == dataRowFields.end()) { core::CStatistics::stat(stat_t::E_NumberRecordsNoTimeField).increment(); - LOG_ERROR("Found record with no " << m_TimeFieldName << " field:" << core_t::LINE_ENDING << - this->debugPrintRecord(dataRowFields)); + LOG_ERROR("Found record with no " << m_TimeFieldName << " field:" << core_t::LINE_ENDING << this->debugPrintRecord(dataRowFields)); return true; } - if (m_TimeFieldFormat.empty()) - { - if (core::CStringUtils::stringToType(iter->second, time) == false) - { + if (m_TimeFieldFormat.empty()) { + if (core::CStringUtils::stringToType(iter->second, time) == false) { core::CStatistics::stat(stat_t::E_NumberTimeFieldConversionErrors).increment(); - LOG_ERROR("Cannot interpret " << m_TimeFieldName << " field in record:" << core_t::LINE_ENDING << - this->debugPrintRecord(dataRowFields)); + LOG_ERROR("Cannot interpret " << m_TimeFieldName << " field in record:" << core_t::LINE_ENDING + << this->debugPrintRecord(dataRowFields)); return true; } - } - else - { + } else { // Use this library function instead of raw strptime() as it works // around many operating system specific issues. - if (core::CTimeUtils::strptime(m_TimeFieldFormat, iter->second, time) == false) - { + if (core::CTimeUtils::strptime(m_TimeFieldFormat, iter->second, time) == false) { core::CStatistics::stat(stat_t::E_NumberTimeFieldConversionErrors).increment(); - LOG_ERROR("Cannot interpret " << m_TimeFieldName << - " field using format " << m_TimeFieldFormat << " in record:" << core_t::LINE_ENDING << - this->debugPrintRecord(dataRowFields)); + LOG_ERROR("Cannot interpret " << m_TimeFieldName << " field using format " << m_TimeFieldFormat + << " in record:" << core_t::LINE_ENDING << this->debugPrintRecord(dataRowFields)); return true; } } @@ -196,44 +173,37 @@ bool CAnomalyJob::handleRecord(const TStrStrUMap &dataRowFields) // is zero, then it should be after the current bucket end. If // latency is non-zero, then it should be after the current bucket // end minus the latency. - if (time < m_LastFinalisedBucketEndTime) - { + if (time < m_LastFinalisedBucketEndTime) { core::CStatistics::stat(stat_t::E_NumberTimeOrderErrors).increment(); std::ostringstream ss; ss << "Records must be in ascending time order. " - << "Record '" << this->debugPrintRecord(dataRowFields) - << "' time " << time << " is before bucket time " << m_LastFinalisedBucketEndTime; + << "Record '" << this->debugPrintRecord(dataRowFields) << "' time " << time << " is before bucket time " + << m_LastFinalisedBucketEndTime; LOG_ERROR(ss.str()); return true; } this->outputBucketResultsUntil(time); - if (m_DetectorKeys.empty()) - { + if (m_DetectorKeys.empty()) { this->populateDetectorKeys(m_FieldConfig, m_DetectorKeys); } - for (std::size_t i = 0u; i < m_DetectorKeys.size(); ++i) - { - const std::string &partitionFieldName(m_DetectorKeys[i].partitionFieldName()); + for (std::size_t i = 0u; i < m_DetectorKeys.size(); ++i) { + const std::string& partitionFieldName(m_DetectorKeys[i].partitionFieldName()); // An empty partitionFieldName means no partitioning - TStrStrUMapCItr itr = partitionFieldName.empty() ? - dataRowFields.end() : - dataRowFields.find(partitionFieldName); - const std::string &partitionFieldValue(itr == dataRowFields.end() ? - EMPTY_STRING : itr->second); + TStrStrUMapCItr itr = partitionFieldName.empty() ? dataRowFields.end() : dataRowFields.find(partitionFieldName); + const std::string& partitionFieldValue(itr == dataRowFields.end() ? EMPTY_STRING : itr->second); // TODO - should usenull apply to the partition field too? - const TAnomalyDetectorPtr &detector = this->detectorForKey(false, // not restoring + const TAnomalyDetectorPtr& detector = this->detectorForKey(false, // not restoring time, m_DetectorKeys[i], partitionFieldValue, m_Limits.resourceMonitor()); - if (detector == 0) - { + if (detector == 0) { // There wasn't enough memory to create the detector continue; } @@ -249,8 +219,7 @@ bool CAnomalyJob::handleRecord(const TStrStrUMap &dataRowFields) return true; } -void CAnomalyJob::finalise() -{ +void CAnomalyJob::finalise() { // Persist final state of normalizer m_JsonOutputWriter.persistNormalizer(m_Normalizer, m_LastNormalizerPersistTime); @@ -262,27 +231,22 @@ void CAnomalyJob::finalise() // Wait for any ongoing periodic persist to complete, so that the data adder // is not used by both a periodic periodic persist and final persist at the // same time - if (m_PeriodicPersister != nullptr) - { + if (m_PeriodicPersister != nullptr) { m_PeriodicPersister->waitForIdle(); } } -bool CAnomalyJob::initNormalizer(const std::string &quantilesStateFile) -{ +bool CAnomalyJob::initNormalizer(const std::string& quantilesStateFile) { std::ifstream inputStream(quantilesStateFile.c_str()); return m_Normalizer.fromJsonStream(inputStream) == model::CHierarchicalResultsNormalizer::E_Ok; } -uint64_t CAnomalyJob::numRecordsHandled() const -{ +uint64_t CAnomalyJob::numRecordsHandled() const { return m_NumRecordsHandled; } -void CAnomalyJob::description() const -{ - if (m_Detectors.empty()) - { +void CAnomalyJob::description() const { + if (m_Detectors.empty()) { return; } @@ -294,10 +258,8 @@ void CAnomalyJob::description() const LOG_INFO("\tpartition " << partition.get()); LOG_INFO("\t\tkey " << detectors[0].first.second.get()); LOG_INFO("\t\t\t" << detectors[0].second->description()); - for (std::size_t i = 1u; i < detectors.size(); ++i) - { - if (detectors[i].first.first.get() != partition.get()) - { + for (std::size_t i = 1u; i < detectors.size(); ++i) { + if (detectors[i].first.first.get() != partition.get()) { partition = detectors[i].first.first; LOG_INFO("\tpartition " << partition.get()); } @@ -306,10 +268,8 @@ void CAnomalyJob::description() const } } -void CAnomalyJob::descriptionAndDebugMemoryUsage() const -{ - if (m_Detectors.empty()) - { +void CAnomalyJob::descriptionAndDebugMemoryUsage() const { + if (m_Detectors.empty()) { LOG_INFO("No detectors"); return; } @@ -325,11 +285,9 @@ void CAnomalyJob::descriptionAndDebugMemoryUsage() const ss << "\t\t\t" << detectors[0].second->description() << std::endl; detectors[0].second->showMemoryUsage(ss); - for (std::size_t i = 1u; i < detectors.size(); ++i) - { + for (std::size_t i = 1u; i < detectors.size(); ++i) { ss << std::endl; - if (detectors[i].first.first.get() != partition.get()) - { + if (detectors[i].first.first.get() != partition.get()) { partition = detectors[i].first.first; ss << "\tpartition " << partition.get() << std::endl; } @@ -340,119 +298,97 @@ void CAnomalyJob::descriptionAndDebugMemoryUsage() const LOG_INFO(ss.str()); } -const CAnomalyJob::SRestoredStateDetail &CAnomalyJob::restoreStateStatus() const -{ +const CAnomalyJob::SRestoredStateDetail& CAnomalyJob::restoreStateStatus() const { return m_RestoredStateDetail; } -bool CAnomalyJob::handleControlMessage(const std::string &controlMessage) -{ - if (controlMessage.empty()) - { +bool CAnomalyJob::handleControlMessage(const std::string& controlMessage) { + if (controlMessage.empty()) { LOG_ERROR("Programmatic error - handleControlMessage should only be " "called with non-empty control messages"); return false; } - switch (controlMessage[0]) - { - case ' ': - // Spaces are just used to fill the buffers and force prior messages - // through the system - we don't need to do anything else - LOG_TRACE("Received space control message of length " << - controlMessage.length()); - break; - case CONTROL_FIELD_NAME_CHAR: - // Silent no-op. This is a simple way to ignore repeated header - // rows in input. - break; - case 'f': - // Flush ID comes after the initial f - this->acknowledgeFlush(controlMessage.substr(1)); - break; - case 'i': - this->generateInterimResults(controlMessage); - break; - case 'r': - this->resetBuckets(controlMessage); - break; - case 's': - this->skipTime(controlMessage.substr(1)); - break; - case 't': - this->advanceTime(controlMessage.substr(1)); - break; - case 'u': - this->updateConfig(controlMessage.substr(1)); - break; - case 'p': - this->doForecast(controlMessage); - break; - case 'w': - { - if (m_PeriodicPersister != nullptr) - { - m_PeriodicPersister->startBackgroundPersist(); - } - } - break; - default: - LOG_WARN("Ignoring unknown control message of length " << - controlMessage.length() << " beginning with '" << - controlMessage[0] << '\''); - // Don't return false here (for the time being at least), as it - // seems excessive to cause the entire job to fail - break; + switch (controlMessage[0]) { + case ' ': + // Spaces are just used to fill the buffers and force prior messages + // through the system - we don't need to do anything else + LOG_TRACE("Received space control message of length " << controlMessage.length()); + break; + case CONTROL_FIELD_NAME_CHAR: + // Silent no-op. This is a simple way to ignore repeated header + // rows in input. + break; + case 'f': + // Flush ID comes after the initial f + this->acknowledgeFlush(controlMessage.substr(1)); + break; + case 'i': + this->generateInterimResults(controlMessage); + break; + case 'r': + this->resetBuckets(controlMessage); + break; + case 's': + this->skipTime(controlMessage.substr(1)); + break; + case 't': + this->advanceTime(controlMessage.substr(1)); + break; + case 'u': + this->updateConfig(controlMessage.substr(1)); + break; + case 'p': + this->doForecast(controlMessage); + break; + case 'w': { + if (m_PeriodicPersister != nullptr) { + m_PeriodicPersister->startBackgroundPersist(); + } + } break; + default: + LOG_WARN("Ignoring unknown control message of length " << controlMessage.length() << " beginning with '" << controlMessage[0] + << '\''); + // Don't return false here (for the time being at least), as it + // seems excessive to cause the entire job to fail + break; } return true; } -void CAnomalyJob::acknowledgeFlush(const std::string &flushId) -{ - if (flushId.empty()) - { +void CAnomalyJob::acknowledgeFlush(const std::string& flushId) { + if (flushId.empty()) { LOG_ERROR("Received flush control message with no ID"); - } - else - { + } else { LOG_TRACE("Received flush control message with ID " << flushId); } m_JsonOutputWriter.acknowledgeFlush(flushId, m_LastFinalisedBucketEndTime); } -void CAnomalyJob::updateConfig(const std::string &config) -{ +void CAnomalyJob::updateConfig(const std::string& config) { LOG_DEBUG("Received update config request: " << config); CConfigUpdater configUpdater(m_FieldConfig, m_ModelConfig); - if (configUpdater.update(config) == false) - { + if (configUpdater.update(config) == false) { LOG_ERROR("Failed to update configuration"); } } -void CAnomalyJob::advanceTime(const std::string &time_) -{ - if (time_.empty()) - { +void CAnomalyJob::advanceTime(const std::string& time_) { + if (time_.empty()) { LOG_ERROR("Received request to advance time with no time"); return; } core_t::TTime time(0); - if (core::CStringUtils::stringToType(time_, time) == false) - { + if (core::CStringUtils::stringToType(time_, time) == false) { LOG_ERROR("Received request to advance time to invalid time " << time_); return; } - if (m_LastFinalisedBucketEndTime == 0) - { - LOG_DEBUG("Manually advancing time to " << time << - " before any valid data has been seen"); - } - else - { + if (m_LastFinalisedBucketEndTime == 0) { + LOG_DEBUG("Manually advancing time to " << time << " before any valid data has been seen"); + } else { LOG_TRACE("Received request to advance time to " << time); } @@ -461,24 +397,19 @@ void CAnomalyJob::advanceTime(const std::string &time_) this->timeNow(time); } -void CAnomalyJob::outputBucketResultsUntil(core_t::TTime time) -{ +void CAnomalyJob::outputBucketResultsUntil(core_t::TTime time) { // If the bucket time has increased, output results for all field names core_t::TTime bucketLength = m_ModelConfig.bucketLength(); core_t::TTime effectiveBucketLength = this->effectiveBucketLength(); core_t::TTime latency = m_ModelConfig.latency(); - if (m_LastFinalisedBucketEndTime == 0) - { + if (m_LastFinalisedBucketEndTime == 0) { m_LastFinalisedBucketEndTime = - std::max(m_LastFinalisedBucketEndTime, - maths::CIntegerTools::floor(time, effectiveBucketLength) - latency); + std::max(m_LastFinalisedBucketEndTime, maths::CIntegerTools::floor(time, effectiveBucketLength) - latency); } - for (core_t::TTime lastBucketEndTime = m_LastFinalisedBucketEndTime; - lastBucketEndTime + bucketLength + latency <= time; - lastBucketEndTime += effectiveBucketLength) - { + for (core_t::TTime lastBucketEndTime = m_LastFinalisedBucketEndTime; lastBucketEndTime + bucketLength + latency <= time; + lastBucketEndTime += effectiveBucketLength) { this->outputResults(lastBucketEndTime); m_Limits.resourceMonitor().sendMemoryUsageReportIfSignificantlyChanged(lastBucketEndTime); m_LastFinalisedBucketEndTime = lastBucketEndTime + effectiveBucketLength; @@ -486,24 +417,20 @@ void CAnomalyJob::outputBucketResultsUntil(core_t::TTime time) // Check for periodic persistence immediately after calculating results // for the last bucket but before adding the first piece of data for the // next bucket - if (m_PeriodicPersister != nullptr) - { + if (m_PeriodicPersister != nullptr) { m_PeriodicPersister->startBackgroundPersistIfAppropriate(); } } } -void CAnomalyJob::skipTime(const std::string &time_) -{ - if (time_.empty()) - { +void CAnomalyJob::skipTime(const std::string& time_) { + if (time_.empty()) { LOG_ERROR("Received request to skip time with no time"); return; } core_t::TTime time(0); - if (core::CStringUtils::stringToType(time_, time) == false) - { + if (core::CStringUtils::stringToType(time_, time) == false) { LOG_ERROR("Received request to skip time to invalid time " << time_); return; } @@ -511,17 +438,14 @@ void CAnomalyJob::skipTime(const std::string &time_) this->skipSampling(maths::CIntegerTools::ceil(time, m_ModelConfig.bucketLength())); } -void CAnomalyJob::skipSampling(core_t::TTime endTime) -{ +void CAnomalyJob::skipSampling(core_t::TTime endTime) { LOG_INFO("Skipping time to: " << endTime); this->flushAndResetResultsQueue(endTime); - for (const auto &detector_ : m_Detectors) - { - model::CAnomalyDetector *detector(detector_.second.get()); - if (detector == 0) - { + for (const auto& detector_ : m_Detectors) { + model::CAnomalyDetector* detector(detector_.second.get()); + if (detector == 0) { LOG_ERROR("Unexpected NULL pointer for key '" << pairDebug(detector_.first) << '\''); continue; } @@ -531,24 +455,18 @@ void CAnomalyJob::skipSampling(core_t::TTime endTime) m_LastFinalisedBucketEndTime = endTime; } -void CAnomalyJob::flushAndResetResultsQueue(core_t::TTime startTime) -{ +void CAnomalyJob::flushAndResetResultsQueue(core_t::TTime startTime) { LOG_DEBUG("Flush & reset results queue: " << startTime); - if (m_ModelConfig.bucketResultsDelay() != 0) - { + if (m_ModelConfig.bucketResultsDelay() != 0) { core_t::TTime effectiveBucketLength = this->effectiveBucketLength(); - core_t::TTime earliestResultTime = - m_LastFinalisedBucketEndTime - m_ResultsQueue.size() * effectiveBucketLength; - for (core_t::TTime bucketStart = earliestResultTime; - bucketStart < m_LastFinalisedBucketEndTime; - bucketStart += effectiveBucketLength) - { - model::CHierarchicalResults &results = m_ResultsQueue.latest(); + core_t::TTime earliestResultTime = m_LastFinalisedBucketEndTime - m_ResultsQueue.size() * effectiveBucketLength; + for (core_t::TTime bucketStart = earliestResultTime; bucketStart < m_LastFinalisedBucketEndTime; + bucketStart += effectiveBucketLength) { + model::CHierarchicalResults& results = m_ResultsQueue.latest(); core_t::TTime resultsTime = m_ResultsQueue.chooseResultTime(bucketStart, m_ModelConfig.bucketLength(), results); - if (resultsTime != 0) - { + if (resultsTime != 0) { core::CStopWatch timer(true); - model::CHierarchicalResults &resultsToOutput = m_ResultsQueue.get(resultsTime); + model::CHierarchicalResults& resultsToOutput = m_ResultsQueue.get(resultsTime); uint64_t processingTime = timer.stop(); // Model plots must be written first so the Java persists them // once the bucket result is processed @@ -566,13 +484,10 @@ void CAnomalyJob::flushAndResetResultsQueue(core_t::TTime startTime) m_ModelPlotQueue.reset(resetTime); } -void CAnomalyJob::timeNow(core_t::TTime time) -{ - for (const auto &detector_ : m_Detectors) - { - model::CAnomalyDetector *detector(detector_.second.get()); - if (detector == 0) - { +void CAnomalyJob::timeNow(core_t::TTime time) { + for (const auto& detector_ : m_Detectors) { + model::CAnomalyDetector* detector(detector_.second.get()); + if (detector == 0) { LOG_ERROR("Unexpected NULL pointer for key '" << pairDebug(detector_.first) << '\''); continue; } @@ -580,82 +495,64 @@ void CAnomalyJob::timeNow(core_t::TTime time) } } -core_t::TTime CAnomalyJob::effectiveBucketLength() const -{ - return m_ModelConfig.bucketResultsDelay() ? m_ModelConfig.bucketLength() / 2 : - m_ModelConfig.bucketLength(); +core_t::TTime CAnomalyJob::effectiveBucketLength() const { + return m_ModelConfig.bucketResultsDelay() ? m_ModelConfig.bucketLength() / 2 : m_ModelConfig.bucketLength(); } -void CAnomalyJob::generateInterimResults(const std::string &controlMessage) -{ +void CAnomalyJob::generateInterimResults(const std::string& controlMessage) { LOG_TRACE("Generating interim results"); - if (m_LastFinalisedBucketEndTime == 0) - { + if (m_LastFinalisedBucketEndTime == 0) { LOG_TRACE("Cannot create interim results having seen data for less than one bucket ever"); return; } core_t::TTime start = m_LastFinalisedBucketEndTime; - core_t::TTime end = m_LastFinalisedBucketEndTime + - (m_ModelConfig.latencyBuckets() + 1) * this->effectiveBucketLength(); + core_t::TTime end = m_LastFinalisedBucketEndTime + (m_ModelConfig.latencyBuckets() + 1) * this->effectiveBucketLength(); - if (this->parseTimeRangeInControlMessage(controlMessage, start, end)) - { + if (this->parseTimeRangeInControlMessage(controlMessage, start, end)) { LOG_TRACE("Time range for results: " << start << " : " << end); this->outputResultsWithinRange(true, start, end); } } -bool CAnomalyJob::parseTimeRangeInControlMessage(const std::string &controlMessage, - core_t::TTime &start, - core_t::TTime &end) -{ +bool CAnomalyJob::parseTimeRangeInControlMessage(const std::string& controlMessage, core_t::TTime& start, core_t::TTime& end) { using TStrVec = core::CStringUtils::TStrVec; TStrVec tokens; std::string remainder; core::CStringUtils::tokenise(" ", controlMessage.substr(1, std::string::npos), tokens, remainder); - if (!remainder.empty()) - { + if (!remainder.empty()) { tokens.push_back(remainder); } std::size_t tokensSize = tokens.size(); - if (tokensSize == 0) - { + if (tokensSize == 0) { // Default range return true; } - if (tokensSize != 2) - { - LOG_ERROR("Control message " << controlMessage << " has " << tokensSize << - " parameters when only zero or two are allowed."); + if (tokensSize != 2) { + LOG_ERROR("Control message " << controlMessage << " has " << tokensSize << " parameters when only zero or two are allowed."); return false; } - if (core::CStringUtils::stringToType(tokens[0], start) - && core::CStringUtils::stringToType(tokens[1], end)) - { + if (core::CStringUtils::stringToType(tokens[0], start) && core::CStringUtils::stringToType(tokens[1], end)) { return true; } LOG_ERROR("Cannot parse control message: " << controlMessage); return false; } -void CAnomalyJob::doForecast(const std::string &controlMessage) -{ +void CAnomalyJob::doForecast(const std::string& controlMessage) { // make a copy of the detectors vector, note: this is a shallow, not a deep copy TAnomalyDetectorPtrVec detectorVector; this->detectors(detectorVector); // push request into forecast queue, validates - if (!m_ForecastRunner.pushForecastJob(controlMessage, detectorVector, m_LastResultsTime)) - { + if (!m_ForecastRunner.pushForecastJob(controlMessage, detectorVector, m_LastResultsTime)) { // ForecastRunner already logged about it and send a status, so no need to log at info here LOG_DEBUG("Forecast request failed"); } } -void CAnomalyJob::outputResults(core_t::TTime bucketStartTime) -{ +void CAnomalyJob::outputResults(core_t::TTime bucketStartTime) { using TKeyAnomalyDetectorPtrUMapCItr = TKeyAnomalyDetectorPtrUMap::const_iterator; using TKeyAnomalyDetectorPtrUMapCItrVec = std::vector; @@ -665,29 +562,24 @@ void CAnomalyJob::outputResults(core_t::TTime bucketStartTime) core_t::TTime bucketLength = m_ModelConfig.bucketLength(); - if (m_ModelPlotQueue.latestBucketEnd() < bucketLength) - { + if (m_ModelPlotQueue.latestBucketEnd() < bucketLength) { m_ModelPlotQueue.reset(bucketStartTime - m_ModelPlotQueue.bucketLength()); } m_ResultsQueue.push(model::CHierarchicalResults(), bucketStartTime); - model::CHierarchicalResults &results = m_ResultsQueue.get(bucketStartTime); + model::CHierarchicalResults& results = m_ResultsQueue.get(bucketStartTime); m_ModelPlotQueue.push(TModelPlotDataVec(), bucketStartTime); TKeyAnomalyDetectorPtrUMapCItrVec iterators; iterators.reserve(m_Detectors.size()); - for (TKeyAnomalyDetectorPtrUMapCItr itr = m_Detectors.begin(); itr != m_Detectors.end(); ++itr) - { + for (TKeyAnomalyDetectorPtrUMapCItr itr = m_Detectors.begin(); itr != m_Detectors.end(); ++itr) { iterators.push_back(itr); } - std::sort(iterators.begin(), iterators.end(), - core::CFunctional::SDereference()); + std::sort(iterators.begin(), iterators.end(), core::CFunctional::SDereference()); - for (std::size_t i = 0u; i < iterators.size(); ++i) - { - model::CAnomalyDetector *detector(iterators[i]->second.get()); - if (detector == 0) - { + for (std::size_t i = 0u; i < iterators.size(); ++i) { + model::CAnomalyDetector* detector(iterators[i]->second.get()); + if (detector == 0) { LOG_ERROR("Unexpected NULL pointer for key '" << pairDebug(iterators[i]->first) << '\''); continue; } @@ -697,8 +589,7 @@ void CAnomalyJob::outputResults(core_t::TTime bucketStartTime) this->generateModelPlot(bucketStartTime, bucketStartTime + bucketLength, *detector); } - if (!results.empty()) - { + if (!results.empty()) { results.buildHierarchy(); this->updateAggregatorAndAggregate(false, results); @@ -715,18 +606,15 @@ void CAnomalyJob::outputResults(core_t::TTime bucketStartTime) } core_t::TTime resultsTime = m_ResultsQueue.chooseResultTime(bucketStartTime, bucketLength, results); - if (resultsTime != 0) - { - model::CHierarchicalResults &resultsToOutput = m_ResultsQueue.get(resultsTime); + if (resultsTime != 0) { + model::CHierarchicalResults& resultsToOutput = m_ResultsQueue.get(resultsTime); uint64_t processingTime = timer.stop(); // Model plots must be written first so the Java persists them // once the bucket result is processed this->writeOutModelPlot(resultsTime); this->writeOutResults(false, resultsToOutput, resultsTime, processingTime, cumulativeTime); cumulativeTime = 0; - } - else - { + } else { cumulativeTime += timer.stop(); } @@ -734,8 +622,7 @@ void CAnomalyJob::outputResults(core_t::TTime bucketStartTime) model::CStringStore::tidyUpNotThreadSafe(); } -void CAnomalyJob::outputInterimResults(core_t::TTime bucketStartTime) -{ +void CAnomalyJob::outputInterimResults(core_t::TTime bucketStartTime) { core::CStopWatch timer(true); core_t::TTime bucketLength = m_ModelConfig.bucketLength(); @@ -743,21 +630,16 @@ void CAnomalyJob::outputInterimResults(core_t::TTime bucketStartTime) model::CHierarchicalResults results; results.setInterim(); - for (const auto &detector_ : m_Detectors) - { - model::CAnomalyDetector *detector(detector_.second.get()); - if (detector == 0) - { + for (const auto& detector_ : m_Detectors) { + model::CAnomalyDetector* detector(detector_.second.get()); + if (detector == 0) { LOG_ERROR("Unexpected NULL pointer for key '" << pairDebug(detector_.first) << '\''); continue; } - detector->buildInterimResults(bucketStartTime, - bucketStartTime + bucketLength, - results); + detector->buildInterimResults(bucketStartTime, bucketStartTime + bucketLength, results); } - if (!results.empty()) - { + if (!results.empty()) { results.buildHierarchy(); this->updateAggregatorAndAggregate(true, results); @@ -776,78 +658,63 @@ void CAnomalyJob::outputInterimResults(core_t::TTime bucketStartTime) // For the case where there are out-of-phase buckets, and there is a gap for an // intermediate bucket, output it as interim too. uint64_t processingTime = timer.stop(); - if (m_ResultsQueue.hasInterimResults()) - { + if (m_ResultsQueue.hasInterimResults()) { core_t::TTime olderTime = bucketStartTime - bucketLength; - model::CHierarchicalResults &olderResult = m_ResultsQueue.get(olderTime); + model::CHierarchicalResults& olderResult = m_ResultsQueue.get(olderTime); this->writeOutResults(true, olderResult, olderTime, processingTime, 0l); } this->writeOutResults(true, results, bucketStartTime, processingTime, 0l); } -void CAnomalyJob::writeOutResults(bool interim, model::CHierarchicalResults &results, - core_t::TTime bucketTime, uint64_t processingTime, - uint64_t sumPastProcessingTime) -{ - if (!results.empty()) - { - LOG_TRACE("Got results object here: " << results.root()->s_RawAnomalyScore << " / " << - results.root()->s_NormalizedAnomalyScore << ", count " << results.resultCount() - << " at " << bucketTime); +void CAnomalyJob::writeOutResults(bool interim, + model::CHierarchicalResults& results, + core_t::TTime bucketTime, + uint64_t processingTime, + uint64_t sumPastProcessingTime) { + if (!results.empty()) { + LOG_TRACE("Got results object here: " << results.root()->s_RawAnomalyScore << " / " << results.root()->s_NormalizedAnomalyScore + << ", count " << results.resultCount() << " at " << bucketTime); using TScopedAllocator = ml::core::CScopedRapidJsonPoolAllocator; static const std::string ALLOCATOR_ID("CAnomalyJob::writeOutResults"); TScopedAllocator scopedAllocator(ALLOCATOR_ID, m_JsonOutputWriter); - api::CHierarchicalResultsWriter writer(m_Limits, m_ModelConfig, - boost::bind(&CJsonOutputWriter::acceptResult, - &m_JsonOutputWriter, - _1), - boost::bind(&CJsonOutputWriter::acceptInfluencer, - &m_JsonOutputWriter, - _1, _2, _3)); + api::CHierarchicalResultsWriter writer(m_Limits, + m_ModelConfig, + boost::bind(&CJsonOutputWriter::acceptResult, &m_JsonOutputWriter, _1), + boost::bind(&CJsonOutputWriter::acceptInfluencer, &m_JsonOutputWriter, _1, _2, _3)); results.bottomUpBreadthFirst(writer); results.pivotsBottomUpBreadthFirst(writer); // Add the bucketTime bucket influencer. // Note that the influencer will only be accepted if there are records. - m_JsonOutputWriter.acceptBucketTimeInfluencer( - bucketTime, - results.root()->s_AnnotatedProbability.s_Probability, - results.root()->s_RawAnomalyScore, - results.root()->s_NormalizedAnomalyScore); - - if (m_JsonOutputWriter.endOutputBatch(interim, - sumPastProcessingTime + processingTime) == false) - { + m_JsonOutputWriter.acceptBucketTimeInfluencer(bucketTime, + results.root()->s_AnnotatedProbability.s_Probability, + results.root()->s_RawAnomalyScore, + results.root()->s_NormalizedAnomalyScore); + + if (m_JsonOutputWriter.endOutputBatch(interim, sumPastProcessingTime + processingTime) == false) { LOG_ERROR("Problem writing anomaly output"); } m_LastResultsTime = bucketTime; } } - -void CAnomalyJob::resetBuckets(const std::string &controlMessage) -{ - if (controlMessage.length() == 1) - { +void CAnomalyJob::resetBuckets(const std::string& controlMessage) { + if (controlMessage.length() == 1) { LOG_ERROR("Received reset buckets control message without time range"); return; } core_t::TTime start = 0; core_t::TTime end = 0; - if (this->parseTimeRangeInControlMessage(controlMessage, start, end)) - { + if (this->parseTimeRangeInControlMessage(controlMessage, start, end)) { core_t::TTime bucketLength = m_ModelConfig.bucketLength(); core_t::TTime time = maths::CIntegerTools::floor(start, bucketLength); core_t::TTime bucketEnd = maths::CIntegerTools::ceil(end, bucketLength); - while (time < bucketEnd) - { - for (const auto &detector_ : m_Detectors) - { - model::CAnomalyDetector *detector = detector_.second.get(); - if (detector == 0) - { + while (time < bucketEnd) { + for (const auto& detector_ : m_Detectors) { + model::CAnomalyDetector* detector = detector_.second.get(); + if (detector == 0) { LOG_ERROR("Unexpected NULL pointer for key '" << pairDebug(detector_.first) << '\''); continue; } @@ -859,38 +726,30 @@ void CAnomalyJob::resetBuckets(const std::string &controlMessage) } } -bool CAnomalyJob::restoreState(core::CDataSearcher &restoreSearcher, - core_t::TTime &completeToTime) -{ +bool CAnomalyJob::restoreState(core::CDataSearcher& restoreSearcher, core_t::TTime& completeToTime) { // Pass on the request in case we're chained - if (this->outputHandler().restoreState(restoreSearcher, - completeToTime) == false) - { + if (this->outputHandler().restoreState(restoreSearcher, completeToTime) == false) { return false; } size_t numDetectors(0); - try - { + try { // Restore from Elasticsearch compressed data core::CStateDecompressor decompressor(restoreSearcher); decompressor.setStateRestoreSearch(ML_STATE_INDEX); core::CDataSearcher::TIStreamP strm(decompressor.search(1, 1)); - if (strm == 0) - { + if (strm == 0) { LOG_ERROR("Unable to connect to data store"); return false; } - if (strm->bad()) - { + if (strm->bad()) { LOG_ERROR("State restoration search returned bad stream"); return false; } - if (strm->fail()) - { + if (strm->fail()) { // This is fatal. If the stream exists and has failed then state is missing LOG_ERROR("State restoration search returned failed stream"); return false; @@ -899,52 +758,38 @@ bool CAnomalyJob::restoreState(core::CDataSearcher &restoreSearcher, // We're dealing with streaming JSON state core::CJsonStateRestoreTraverser traverser(*strm); - if (this->restoreState(traverser, completeToTime, numDetectors) == false) - { + if (this->restoreState(traverser, completeToTime, numDetectors) == false) { LOG_ERROR("Failed to restore detectors"); return false; } LOG_DEBUG("Finished restoration, with " << numDetectors << " detectors"); - if (numDetectors == 1 && m_Detectors.empty()) - { + if (numDetectors == 1 && m_Detectors.empty()) { // non fatal error m_RestoredStateDetail.s_RestoredStateStatus = E_NoDetectorsRecovered; return true; } - if (completeToTime > 0) - { - core_t::TTime lastBucketEndTime(maths::CIntegerTools::ceil(completeToTime, - m_ModelConfig.bucketLength())); + if (completeToTime > 0) { + core_t::TTime lastBucketEndTime(maths::CIntegerTools::ceil(completeToTime, m_ModelConfig.bucketLength())); - for (const auto &detector_ : m_Detectors) - { - model::CAnomalyDetector *detector(detector_.second.get()); - if (detector == 0) - { + for (const auto& detector_ : m_Detectors) { + model::CAnomalyDetector* detector(detector_.second.get()); + if (detector == 0) { LOG_ERROR("Unexpected NULL pointer for key '" << pairDebug(detector_.first) << '\''); continue; } - LOG_DEBUG("Setting lastBucketEndTime to " - << lastBucketEndTime << " in detector for '" - << detector->description() << '\''); + LOG_DEBUG("Setting lastBucketEndTime to " << lastBucketEndTime << " in detector for '" << detector->description() << '\''); detector->lastBucketEndTime() = lastBucketEndTime; } - } - else - { - if (!m_Detectors.empty()) - { - LOG_ERROR("Inconsistency - " << m_Detectors.size() << - " detectors have been restored but completeToTime is " << - completeToTime); + } else { + if (!m_Detectors.empty()) { + LOG_ERROR("Inconsistency - " << m_Detectors.size() << " detectors have been restored but completeToTime is " + << completeToTime); } } - } - catch (std::exception &e) - { + } catch (std::exception& e) { LOG_ERROR("Failed to restore state! " << e.what()); return false; } @@ -952,100 +797,73 @@ bool CAnomalyJob::restoreState(core::CDataSearcher &restoreSearcher, return true; } -bool CAnomalyJob::restoreState(core::CStateRestoreTraverser &traverser, - core_t::TTime &completeToTime, - std::size_t &numDetectors) -{ +bool CAnomalyJob::restoreState(core::CStateRestoreTraverser& traverser, core_t::TTime& completeToTime, std::size_t& numDetectors) { m_RestoredStateDetail.s_RestoredStateStatus = E_Failure; m_RestoredStateDetail.s_Extra = boost::none; // Call name() to prime the traverser if it hasn't started traverser.name(); - if (traverser.isEof()) - { + if (traverser.isEof()) { m_RestoredStateDetail.s_RestoredStateStatus = E_NoDetectorsRecovered; LOG_ERROR("Expected persisted state but no state exists"); return false; } core_t::TTime lastBucketEndTime(0); - if (traverser.name() != TIME_TAG || - core::CStringUtils::stringToType(traverser.value(), - lastBucketEndTime) == false) - { + if (traverser.name() != TIME_TAG || core::CStringUtils::stringToType(traverser.value(), lastBucketEndTime) == false) { m_RestoredStateDetail.s_RestoredStateStatus = E_UnexpectedTag; - LOG_ERROR("Cannot restore anomaly detector - '" << TIME_TAG << - "' element expected but found " << - traverser.name() << '=' << traverser.value()); + LOG_ERROR("Cannot restore anomaly detector - '" << TIME_TAG << "' element expected but found " << traverser.name() << '=' + << traverser.value()); return false; } m_LastFinalisedBucketEndTime = lastBucketEndTime; - if (lastBucketEndTime > completeToTime) - { + if (lastBucketEndTime > completeToTime) { LOG_INFO("Processing is already complete to time " << lastBucketEndTime); completeToTime = lastBucketEndTime; } - if ((traverser.next() == false) || (traverser.name() != VERSION_TAG)) - { + if ((traverser.next() == false) || (traverser.name() != VERSION_TAG)) { m_RestoredStateDetail.s_RestoredStateStatus = E_UnexpectedTag; - LOG_ERROR("Cannot restore anomaly detector " << - VERSION_TAG << " was expected"); + LOG_ERROR("Cannot restore anomaly detector " << VERSION_TAG << " was expected"); return false; } - const std::string &stateVersion = traverser.value(); - if (stateVersion != model::CAnomalyDetector::STATE_VERSION) - { + const std::string& stateVersion = traverser.value(); + if (stateVersion != model::CAnomalyDetector::STATE_VERSION) { m_RestoredStateDetail.s_RestoredStateStatus = E_IncorrectVersion; - LOG_ERROR("Restored anomaly detector state version is " << stateVersion << - " - ignoring it as current state version is " << - model::CAnomalyDetector::STATE_VERSION); + LOG_ERROR("Restored anomaly detector state version is " << stateVersion << " - ignoring it as current state version is " + << model::CAnomalyDetector::STATE_VERSION); // This counts as successful restoration return true; } - while (traverser.next()) - { - const std::string &name = traverser.name(); - if (name == TOP_LEVEL_DETECTOR_TAG) - { - if (traverser.traverseSubLevel(boost::bind(&CAnomalyJob::restoreSingleDetector, this, _1)) == false) - { + while (traverser.next()) { + const std::string& name = traverser.name(); + if (name == TOP_LEVEL_DETECTOR_TAG) { + if (traverser.traverseSubLevel(boost::bind(&CAnomalyJob::restoreSingleDetector, this, _1)) == false) { LOG_ERROR("Cannot restore anomaly detector"); return false; } ++numDetectors; - } - else if (name == RESULTS_AGGREGATOR_TAG) - { - if (traverser.traverseSubLevel(boost::bind(&model::CHierarchicalResultsAggregator::acceptRestoreTraverser, - &m_Aggregator, _1)) == false) - { + } else if (name == RESULTS_AGGREGATOR_TAG) { + if (traverser.traverseSubLevel( + boost::bind(&model::CHierarchicalResultsAggregator::acceptRestoreTraverser, &m_Aggregator, _1)) == false) { LOG_ERROR("Cannot restore results aggregator"); return false; } - } - else if (name == HIERARCHICAL_RESULTS_TAG) - { + } else if (name == HIERARCHICAL_RESULTS_TAG) { core::CPersistUtils::restore(HIERARCHICAL_RESULTS_TAG, m_ResultsQueue, traverser); - } - else if (name == MODEL_PLOT_TAG) - { - core_t::TTime resultsQueueResetTime = m_ModelConfig.bucketResultsDelay() == 0 ? - m_LastFinalisedBucketEndTime : - m_LastFinalisedBucketEndTime - this->effectiveBucketLength(); + } else if (name == MODEL_PLOT_TAG) { + core_t::TTime resultsQueueResetTime = m_ModelConfig.bucketResultsDelay() == 0 + ? m_LastFinalisedBucketEndTime + : m_LastFinalisedBucketEndTime - this->effectiveBucketLength(); m_ModelPlotQueue.reset(resultsQueueResetTime); core::CPersistUtils::restore(MODEL_PLOT_TAG, m_ModelPlotQueue, traverser); - } - else if (name == LATEST_RECORD_TIME_TAG) - { + } else if (name == LATEST_RECORD_TIME_TAG) { core::CPersistUtils::restore(LATEST_RECORD_TIME_TAG, m_LatestRecordTime, traverser); - } - else if (name == LAST_RESULTS_TIME_TAG) - { + } else if (name == LAST_RESULTS_TIME_TAG) { core::CPersistUtils::restore(LAST_RESULTS_TIME_TAG, m_LastResultsTime, traverser); } } @@ -1055,80 +873,65 @@ bool CAnomalyJob::restoreState(core::CStateRestoreTraverser &traverser, return true; } -bool CAnomalyJob::restoreSingleDetector(core::CStateRestoreTraverser &traverser) -{ - if (traverser.name() != KEY_TAG) - { - LOG_ERROR("Cannot restore anomaly detector - " << KEY_TAG << - " element expected but found " << traverser.name() << '=' << traverser.value()); +bool CAnomalyJob::restoreSingleDetector(core::CStateRestoreTraverser& traverser) { + if (traverser.name() != KEY_TAG) { + LOG_ERROR("Cannot restore anomaly detector - " << KEY_TAG << " element expected but found " << traverser.name() << '=' + << traverser.value()); m_RestoredStateDetail.s_RestoredStateStatus = E_UnexpectedTag; return false; } model::CSearchKey key; - if (traverser.traverseSubLevel(boost::bind(&model::CAnomalyDetector::keyAcceptRestoreTraverser, - _1, - boost::ref(key))) == false) - { + if (traverser.traverseSubLevel(boost::bind(&model::CAnomalyDetector::keyAcceptRestoreTraverser, _1, boost::ref(key))) == false) { LOG_ERROR("Cannot restore anomaly detector - no key found in " << KEY_TAG); m_RestoredStateDetail.s_RestoredStateStatus = E_UnexpectedTag; return false; } - if (traverser.next() == false) - { - LOG_ERROR("Cannot restore anomaly detector - end of object reached when " << - PARTITION_FIELD_TAG << " was expected"); + if (traverser.next() == false) { + LOG_ERROR("Cannot restore anomaly detector - end of object reached when " << PARTITION_FIELD_TAG << " was expected"); m_RestoredStateDetail.s_RestoredStateStatus = E_UnexpectedTag; return false; } - if (traverser.name() != PARTITION_FIELD_TAG) - { - LOG_ERROR("Cannot restore anomaly detector - " << PARTITION_FIELD_TAG << - " element expected but found " << traverser.name() << '=' << traverser.value()); + if (traverser.name() != PARTITION_FIELD_TAG) { + LOG_ERROR("Cannot restore anomaly detector - " << PARTITION_FIELD_TAG << " element expected but found " << traverser.name() << '=' + << traverser.value()); m_RestoredStateDetail.s_RestoredStateStatus = E_UnexpectedTag; return false; } std::string partitionFieldValue; - if (traverser.traverseSubLevel(boost::bind(&model::CAnomalyDetector::partitionFieldAcceptRestoreTraverser, - _1, - boost::ref(partitionFieldValue))) == false) - { + if (traverser.traverseSubLevel( + boost::bind(&model::CAnomalyDetector::partitionFieldAcceptRestoreTraverser, _1, boost::ref(partitionFieldValue))) == false) { LOG_ERROR("Cannot restore anomaly detector - " - "no partition field value found in " << PARTITION_FIELD_TAG); + "no partition field value found in " + << PARTITION_FIELD_TAG); m_RestoredStateDetail.s_RestoredStateStatus = E_UnexpectedTag; return false; } - if (traverser.next() == false) - { - LOG_ERROR("Cannot restore anomaly detector - end of object reached when " << - DETECTOR_TAG << " was expected"); + if (traverser.next() == false) { + LOG_ERROR("Cannot restore anomaly detector - end of object reached when " << DETECTOR_TAG << " was expected"); m_RestoredStateDetail.s_RestoredStateStatus = E_UnexpectedTag; return false; } - if (traverser.name() != DETECTOR_TAG) - { - LOG_ERROR("Cannot restore anomaly detector - " << DETECTOR_TAG << - " element expected but found " << - traverser.name() << '=' << traverser.value()); + if (traverser.name() != DETECTOR_TAG) { + LOG_ERROR("Cannot restore anomaly detector - " << DETECTOR_TAG << " element expected but found " << traverser.name() << '=' + << traverser.value()); m_RestoredStateDetail.s_RestoredStateStatus = E_UnexpectedTag; return false; } - if (this->restoreDetectorState(key, partitionFieldValue, traverser) == false || - traverser.haveBadState()) - { + if (this->restoreDetectorState(key, partitionFieldValue, traverser) == false || traverser.haveBadState()) { LOG_ERROR("Delegated portion of anomaly detector restore failed"); m_RestoredStateDetail.s_RestoredStateStatus = E_Failure; return false; @@ -1138,49 +941,39 @@ bool CAnomalyJob::restoreSingleDetector(core::CStateRestoreTraverser &traverser) return true; } -bool CAnomalyJob::restoreDetectorState(const model::CSearchKey &key, - const std::string &partitionFieldValue, - core::CStateRestoreTraverser &traverser) -{ - const TAnomalyDetectorPtr &detector = this->detectorForKey(true, // for restoring - 0, // time reset later +bool CAnomalyJob::restoreDetectorState(const model::CSearchKey& key, + const std::string& partitionFieldValue, + core::CStateRestoreTraverser& traverser) { + const TAnomalyDetectorPtr& detector = this->detectorForKey(true, // for restoring + 0, // time reset later key, partitionFieldValue, m_Limits.resourceMonitor()); - if (!detector) - { - LOG_ERROR("Detector with key '" << - key.debug() << '/' << partitionFieldValue << "' " - "was not recreated on restore - " - "memory limit is too low to continue this job"); + if (!detector) { + LOG_ERROR("Detector with key '" << key.debug() << '/' << partitionFieldValue + << "' " + "was not recreated on restore - " + "memory limit is too low to continue this job"); m_RestoredStateDetail.s_RestoredStateStatus = E_MemoryLimitReached; return false; } - LOG_DEBUG("Restoring state for detector with key '" - << key.debug() << '/' << partitionFieldValue << '\''); + LOG_DEBUG("Restoring state for detector with key '" << key.debug() << '/' << partitionFieldValue << '\''); - if (traverser.traverseSubLevel(boost::bind(&model::CAnomalyDetector::acceptRestoreTraverser, - detector.get(), - boost::cref(partitionFieldValue), - _1)) == false) - { - LOG_ERROR("Error restoring anomaly detector for key '" << - key.debug() << '/' << partitionFieldValue << '\''); + if (traverser.traverseSubLevel( + boost::bind(&model::CAnomalyDetector::acceptRestoreTraverser, detector.get(), boost::cref(partitionFieldValue), _1)) == false) { + LOG_ERROR("Error restoring anomaly detector for key '" << key.debug() << '/' << partitionFieldValue << '\''); return false; } return true; } -bool CAnomalyJob::persistState(core::CDataAdder &persister) -{ - if (m_PeriodicPersister != nullptr) - { +bool CAnomalyJob::persistState(core::CDataAdder& persister) { + if (m_PeriodicPersister != nullptr) { // This will not happen if finalise() was called before persisting state - if (m_PeriodicPersister->isBusy()) - { + if (m_PeriodicPersister->isBusy()) { LOG_ERROR("Cannot do final persistence of state - periodic " "persister still busy"); return false; @@ -1188,13 +981,11 @@ bool CAnomalyJob::persistState(core::CDataAdder &persister) } // Pass on the request in case we're chained - if (this->outputHandler().persistState(persister) == false) - { + if (this->outputHandler().persistState(persister) == false) { return false; } - if (m_LastFinalisedBucketEndTime == 0) - { + if (m_LastFinalisedBucketEndTime == 0) { LOG_INFO("Will not persist detectors as no results have been output"); return true; } @@ -1204,22 +995,21 @@ bool CAnomalyJob::persistState(core::CDataAdder &persister) std::string normaliserState; m_Normalizer.toJson(m_LastResultsTime, "api", normaliserState, true); - return this->persistState("State persisted due to job close at ", - m_ResultsQueue, - m_ModelPlotQueue, - m_LastFinalisedBucketEndTime, - detectors, - m_Limits.resourceMonitor().createMemoryUsageReport( - m_LastFinalisedBucketEndTime - m_ModelConfig.bucketLength()), - m_Aggregator, - normaliserState, - m_LatestRecordTime, - m_LastResultsTime, - persister); + return this->persistState( + "State persisted due to job close at ", + m_ResultsQueue, + m_ModelPlotQueue, + m_LastFinalisedBucketEndTime, + detectors, + m_Limits.resourceMonitor().createMemoryUsageReport(m_LastFinalisedBucketEndTime - m_ModelConfig.bucketLength()), + m_Aggregator, + normaliserState, + m_LatestRecordTime, + m_LastResultsTime, + persister); } -bool CAnomalyJob::backgroundPersistState(CBackgroundPersister &backgroundPersister) -{ +bool CAnomalyJob::backgroundPersistState(CBackgroundPersister& backgroundPersister) { LOG_INFO("Background persist starting data copy"); // Pass arguments by value: this is what we want for @@ -1227,50 +1017,38 @@ bool CAnomalyJob::backgroundPersistState(CBackgroundPersister &backgroundPersist // Do NOT add boost::ref wrappers around these arguments - they // MUST be copied for thread safety TBackgroundPersistArgsPtr args = boost::make_shared( - m_ResultsQueue, - m_ModelPlotQueue, - m_LastFinalisedBucketEndTime, - m_Limits.resourceMonitor().createMemoryUsageReport( - m_LastFinalisedBucketEndTime - m_ModelConfig.bucketLength()), - m_Aggregator, - m_LatestRecordTime, - m_LastResultsTime); + m_ResultsQueue, + m_ModelPlotQueue, + m_LastFinalisedBucketEndTime, + m_Limits.resourceMonitor().createMemoryUsageReport(m_LastFinalisedBucketEndTime - m_ModelConfig.bucketLength()), + m_Aggregator, + m_LatestRecordTime, + m_LastResultsTime); // The normaliser is non-copyable, so we have to make do with JSONifying it now; // it should be relatively fast though m_Normalizer.toJson(m_LastResultsTime, "api", args->s_NormalizerState, true); - TKeyCRefAnomalyDetectorPtrPrVec &copiedDetectors = args->s_Detectors; + TKeyCRefAnomalyDetectorPtrPrVec& copiedDetectors = args->s_Detectors; copiedDetectors.reserve(m_Detectors.size()); - for (const auto &detector_ : m_Detectors) - { - model::CAnomalyDetector *detector(detector_.second.get()); - if (detector == 0) - { + for (const auto& detector_ : m_Detectors) { + model::CAnomalyDetector* detector(detector_.second.get()); + if (detector == 0) { LOG_ERROR("Unexpected NULL pointer for key '" << pairDebug(detector_.first) << '\''); continue; } - model::CSearchKey::TStrCRefKeyCRefPr key(boost::cref(detector_.first.first), - boost::cref(detector_.first.second)); - if (detector->isSimpleCount()) - { - copiedDetectors.push_back(TKeyCRefAnomalyDetectorPtrPr( - key, TAnomalyDetectorPtr(new model::CSimpleCountDetector(true, *detector)))); - } - else - { - copiedDetectors.push_back(TKeyCRefAnomalyDetectorPtrPr( - key, TAnomalyDetectorPtr(new model::CAnomalyDetector(true, *detector)))); + model::CSearchKey::TStrCRefKeyCRefPr key(boost::cref(detector_.first.first), boost::cref(detector_.first.second)); + if (detector->isSimpleCount()) { + copiedDetectors.push_back( + TKeyCRefAnomalyDetectorPtrPr(key, TAnomalyDetectorPtr(new model::CSimpleCountDetector(true, *detector)))); + } else { + copiedDetectors.push_back(TKeyCRefAnomalyDetectorPtrPr(key, TAnomalyDetectorPtr(new model::CAnomalyDetector(true, *detector)))); } } std::sort(copiedDetectors.begin(), copiedDetectors.end(), maths::COrderings::SFirstLess()); - if (backgroundPersister.addPersistFunc(boost::bind(&CAnomalyJob::runBackgroundPersist, - this, - args, - _1)) == false) - { + if (backgroundPersister.addPersistFunc(boost::bind(&CAnomalyJob::runBackgroundPersist, this, args, _1)) == false) { LOG_ERROR("Failed to add anomaly detector background persistence function"); return false; } @@ -1278,11 +1056,8 @@ bool CAnomalyJob::backgroundPersistState(CBackgroundPersister &backgroundPersist return true; } -bool CAnomalyJob::runBackgroundPersist(TBackgroundPersistArgsPtr args, - core::CDataAdder &persister) -{ - if (!args) - { +bool CAnomalyJob::runBackgroundPersist(TBackgroundPersistArgsPtr args, core::CDataAdder& persister) { + if (!args) { LOG_ERROR("Unexpected NULL pointer passed to background persist"); return false; } @@ -1300,29 +1075,25 @@ bool CAnomalyJob::runBackgroundPersist(TBackgroundPersistArgsPtr args, persister); } -bool CAnomalyJob::persistState(const std::string &descriptionPrefix, - const model::CResultsQueue &resultsQueue, - const TModelPlotDataVecQueue &modelPlotQueue, +bool CAnomalyJob::persistState(const std::string& descriptionPrefix, + const model::CResultsQueue& resultsQueue, + const TModelPlotDataVecQueue& modelPlotQueue, core_t::TTime lastFinalisedBucketEnd, - const TKeyCRefAnomalyDetectorPtrPrVec &detectors, - const model::CResourceMonitor::SResults &modelSizeStats, - const model::CHierarchicalResultsAggregator &aggregator, - const std::string &normalizerState, + const TKeyCRefAnomalyDetectorPtrPrVec& detectors, + const model::CResourceMonitor::SResults& modelSizeStats, + const model::CHierarchicalResultsAggregator& aggregator, + const std::string& normalizerState, core_t::TTime latestRecordTime, core_t::TTime lastResultsTime, - core::CDataAdder &persister) -{ + core::CDataAdder& persister) { // Persist state for each detector separately by streaming - try - { + try { core::CStateCompressor compressor(persister); core_t::TTime snapshotTimestamp(core::CTimeUtils::now()); const std::string snapShotId(core::CStringUtils::typeToString(snapshotTimestamp)); - core::CDataAdder::TOStreamP strm = compressor.addStreamed(ML_STATE_INDEX, - m_JobId + '_' + STATE_TYPE + '_' + snapShotId); - if (strm != 0) - { + core::CDataAdder::TOStreamP strm = compressor.addStreamed(ML_STATE_INDEX, m_JobId + '_' + STATE_TYPE + '_' + snapShotId); + if (strm != 0) { // IMPORTANT - this method can run in a background thread while the // analytics carries on processing new buckets in the main thread. // Therefore, this method must NOT access any member variables whose @@ -1334,66 +1105,56 @@ bool CAnomalyJob::persistState(const std::string &descriptionPrefix, inserter.insertValue(TIME_TAG, lastFinalisedBucketEnd); inserter.insertValue(VERSION_TAG, model::CAnomalyDetector::STATE_VERSION); - if (resultsQueue.size() > 1) - { + if (resultsQueue.size() > 1) { core::CPersistUtils::persist(HIERARCHICAL_RESULTS_TAG, resultsQueue, inserter); } - if (modelPlotQueue.size() > 1) - { + if (modelPlotQueue.size() > 1) { core::CPersistUtils::persist(MODEL_PLOT_TAG, modelPlotQueue, inserter); } - for (const auto &detector_ : detectors) - { - const model::CAnomalyDetector *detector(detector_.second.get()); - if (detector == 0) - { + for (const auto& detector_ : detectors) { + const model::CAnomalyDetector* detector(detector_.second.get()); + if (detector == 0) { LOG_ERROR("Unexpected NULL pointer for key '" << pairDebug(detector_.first) << '\''); continue; } inserter.insertLevel(TOP_LEVEL_DETECTOR_TAG, - boost::bind(&CAnomalyJob::persistIndividualDetector, - boost::cref(*detector), _1)); + boost::bind(&CAnomalyJob::persistIndividualDetector, boost::cref(*detector), _1)); LOG_DEBUG("Persisted state for '" << detector->description() << "'"); } inserter.insertLevel(RESULTS_AGGREGATOR_TAG, - boost::bind(&model::CHierarchicalResultsAggregator::acceptPersistInserter, - &aggregator, _1)); + boost::bind(&model::CHierarchicalResultsAggregator::acceptPersistInserter, &aggregator, _1)); core::CPersistUtils::persist(LATEST_RECORD_TIME_TAG, latestRecordTime, inserter); core::CPersistUtils::persist(LAST_RESULTS_TIME_TAG, lastResultsTime, inserter); } - if (compressor.streamComplete(strm, true) == false || strm->bad()) - { + if (compressor.streamComplete(strm, true) == false || strm->bad()) { LOG_ERROR("Failed to complete last persistence stream"); return false; } - if (m_PersistCompleteFunc) - { + if (m_PersistCompleteFunc) { CModelSnapshotJsonWriter::SModelSnapshotReport modelSnapshotReport{ - MODEL_SNAPSHOT_MIN_VERSION, - snapshotTimestamp, - descriptionPrefix + core::CTimeUtils::toIso8601(snapshotTimestamp), - snapShotId, - compressor.numCompressedDocs(), - modelSizeStats, - normalizerState, - latestRecordTime, - // This needs to be the last final result time as it serves - // as the time after which all results are deleted when a - // model snapshot is reverted - lastFinalisedBucketEnd - m_ModelConfig.bucketLength()}; + MODEL_SNAPSHOT_MIN_VERSION, + snapshotTimestamp, + descriptionPrefix + core::CTimeUtils::toIso8601(snapshotTimestamp), + snapShotId, + compressor.numCompressedDocs(), + modelSizeStats, + normalizerState, + latestRecordTime, + // This needs to be the last final result time as it serves + // as the time after which all results are deleted when a + // model snapshot is reverted + lastFinalisedBucketEnd - m_ModelConfig.bucketLength()}; m_PersistCompleteFunc(modelSnapshotReport); } } - } - catch (std::exception &e) - { + } catch (std::exception& e) { LOG_ERROR("Failed to persist state! " << e.what()); return false; } @@ -1401,11 +1162,9 @@ bool CAnomalyJob::persistState(const std::string &descriptionPrefix, return true; } -bool CAnomalyJob::periodicPersistState(CBackgroundPersister &persister) -{ +bool CAnomalyJob::periodicPersistState(CBackgroundPersister& persister) { // Pass on the request in case we're chained - if (this->outputHandler().periodicPersistState(persister) == false) - { + if (this->outputHandler().periodicPersistState(persister) == false) { return false; } @@ -1413,11 +1172,9 @@ bool CAnomalyJob::periodicPersistState(CBackgroundPersister &persister) this->pruneAllModels(); // Make sure model size stats are up to date - for (const auto &detector_ : m_Detectors) - { - model::CAnomalyDetector *detector = detector_.second.get(); - if (detector == 0) - { + for (const auto& detector_ : m_Detectors) { + model::CAnomalyDetector* detector = detector_.second.get(); + if (detector == 0) { LOG_ERROR("Unexpected NULL pointer for key '" << pairDebug(detector_.first) << '\''); continue; } @@ -1427,16 +1184,13 @@ bool CAnomalyJob::periodicPersistState(CBackgroundPersister &persister) return this->backgroundPersistState(persister); } -void CAnomalyJob::updateAggregatorAndAggregate(bool isInterim, - model::CHierarchicalResults &results) -{ +void CAnomalyJob::updateAggregatorAndAggregate(bool isInterim, model::CHierarchicalResults& results) { m_Aggregator.refresh(m_ModelConfig); m_Aggregator.setJob(model::CHierarchicalResultsAggregator::E_Correct); // The equalizers are NOT updated with interim results. - if (isInterim == false) - { + if (isInterim == false) { m_Aggregator.setJob(model::CHierarchicalResultsAggregator::E_UpdateAndCorrect); m_Aggregator.propagateForwardByTime(1.0); } @@ -1446,16 +1200,13 @@ void CAnomalyJob::updateAggregatorAndAggregate(bool isInterim, results.pivotsBottomUpBreadthFirst(m_Aggregator); } -void CAnomalyJob::updateQuantilesAndNormalize(bool isInterim, - model::CHierarchicalResults &results) -{ +void CAnomalyJob::updateQuantilesAndNormalize(bool isInterim, model::CHierarchicalResults& results) { m_Normalizer.resetBigChange(); // The normalizers are NOT updated with interim results, in other // words interim results are normalized with respect to previous // final results. - if (isInterim == false) - { + if (isInterim == false) { m_Normalizer.propagateForwardByTime(1.0); m_Normalizer.setJob(model::CHierarchicalResultsNormalizer::E_Update); results.bottomUpBreadthFirst(m_Normalizer); @@ -1466,47 +1217,33 @@ void CAnomalyJob::updateQuantilesAndNormalize(bool isInterim, results.bottomUpBreadthFirst(m_Normalizer); results.pivotsBottomUpBreadthFirst(m_Normalizer); - if ((isInterim == false && - m_Normalizer.hasLastUpdateCausedBigChange()) || - (m_MaxQuantileInterval > 0 && - core::CTimeUtils::now() > m_LastNormalizerPersistTime + m_MaxQuantileInterval)) - { + if ((isInterim == false && m_Normalizer.hasLastUpdateCausedBigChange()) || + (m_MaxQuantileInterval > 0 && core::CTimeUtils::now() > m_LastNormalizerPersistTime + m_MaxQuantileInterval)) { m_JsonOutputWriter.persistNormalizer(m_Normalizer, m_LastNormalizerPersistTime); } } -void CAnomalyJob::outputResultsWithinRange(bool isInterim, - core_t::TTime start, - core_t::TTime end) -{ - if (m_LastFinalisedBucketEndTime <= 0) - { +void CAnomalyJob::outputResultsWithinRange(bool isInterim, core_t::TTime start, core_t::TTime end) { + if (m_LastFinalisedBucketEndTime <= 0) { return; } - if (start < m_LastFinalisedBucketEndTime) - { - LOG_WARN("Cannot output results for range (" << start << ", " << m_LastFinalisedBucketEndTime << - "): Start time is before last finalized bucket end time " << - m_LastFinalisedBucketEndTime << '.'); + if (start < m_LastFinalisedBucketEndTime) { + LOG_WARN("Cannot output results for range (" << start << ", " << m_LastFinalisedBucketEndTime + << "): Start time is before last finalized bucket end time " + << m_LastFinalisedBucketEndTime << '.'); start = m_LastFinalisedBucketEndTime; } - if (start > end) - { - LOG_ERROR("Cannot output results for range (" << start << ", " << end << - "): Start time is later than end time."); + if (start > end) { + LOG_ERROR("Cannot output results for range (" << start << ", " << end << "): Start time is later than end time."); return; } core_t::TTime bucketLength = m_ModelConfig.bucketLength(); core_t::TTime time = maths::CIntegerTools::floor(start, bucketLength); core_t::TTime bucketEnd = maths::CIntegerTools::ceil(end, bucketLength); - while (time < bucketEnd) - { - if (isInterim) - { + while (time < bucketEnd) { + if (isInterim) { this->outputInterimResults(time); - } - else - { + } else { this->outputResults(time); } m_Limits.resourceMonitor().sendMemoryUsageReportIfSignificantlyChanged(time); @@ -1514,50 +1251,36 @@ void CAnomalyJob::outputResultsWithinRange(bool isInterim, } } -void CAnomalyJob::generateModelPlot(core_t::TTime startTime, - core_t::TTime endTime, - const model::CAnomalyDetector &detector) -{ +void CAnomalyJob::generateModelPlot(core_t::TTime startTime, core_t::TTime endTime, const model::CAnomalyDetector& detector) { double modelPlotBoundsPercentile(m_ModelConfig.modelPlotBoundsPercentile()); - if (modelPlotBoundsPercentile > 0.0) - { + if (modelPlotBoundsPercentile > 0.0) { LOG_TRACE("Generating model debug data at " << startTime); - detector.generateModelPlot(startTime, endTime, - m_ModelConfig.modelPlotBoundsPercentile(), - m_ModelConfig.modelPlotTerms(), - m_ModelPlotQueue.get(startTime)); + detector.generateModelPlot( + startTime, endTime, m_ModelConfig.modelPlotBoundsPercentile(), m_ModelConfig.modelPlotTerms(), m_ModelPlotQueue.get(startTime)); } } -void CAnomalyJob::writeOutModelPlot(core_t::TTime resultsTime) -{ +void CAnomalyJob::writeOutModelPlot(core_t::TTime resultsTime) { double modelPlotBoundsPercentile(m_ModelConfig.modelPlotBoundsPercentile()); - if (modelPlotBoundsPercentile > 0.0) - { + if (modelPlotBoundsPercentile > 0.0) { LOG_TRACE("Writing debug data at time " << resultsTime); CModelPlotDataJsonWriter modelPlotWriter(m_OutputStream); this->writeOutModelPlot(resultsTime, modelPlotWriter); } } -void CAnomalyJob::writeOutModelPlot(core_t::TTime resultsTime, - CModelPlotDataJsonWriter &writer) -{ - for (const auto &plot : m_ModelPlotQueue.get(resultsTime)) - { +void CAnomalyJob::writeOutModelPlot(core_t::TTime resultsTime, CModelPlotDataJsonWriter& writer) { + for (const auto& plot : m_ModelPlotQueue.get(resultsTime)) { writer.writeFlat(m_JobId, plot); } } -void CAnomalyJob::refreshMemoryAndReport() -{ +void CAnomalyJob::refreshMemoryAndReport() { // Make sure model size stats are up to date and then send a final memory // usage report - for (const auto &detector_ : m_Detectors) - { - model::CAnomalyDetector *detector = detector_.second.get(); - if (detector == 0) - { + for (const auto& detector_ : m_Detectors) { + model::CAnomalyDetector* detector = detector_.second.get(); + if (detector == 0) { LOG_ERROR("Unexpected NULL pointer for key '" << pairDebug(detector_.first) << '\''); continue; } @@ -1566,79 +1289,56 @@ void CAnomalyJob::refreshMemoryAndReport() m_Limits.resourceMonitor().sendMemoryUsageReport(m_LastFinalisedBucketEndTime - m_ModelConfig.bucketLength()); } -void CAnomalyJob::persistIndividualDetector(const model::CAnomalyDetector &detector, - core::CStatePersistInserter &inserter) -{ +void CAnomalyJob::persistIndividualDetector(const model::CAnomalyDetector& detector, core::CStatePersistInserter& inserter) { inserter.insertLevel(KEY_TAG, boost::bind(&model::CAnomalyDetector::keyAcceptPersistInserter, &detector, _1)); - inserter.insertLevel(PARTITION_FIELD_TAG, - boost::bind(&model::CAnomalyDetector::partitionFieldAcceptPersistInserter, &detector, _1)); + inserter.insertLevel(PARTITION_FIELD_TAG, boost::bind(&model::CAnomalyDetector::partitionFieldAcceptPersistInserter, &detector, _1)); inserter.insertLevel(DETECTOR_TAG, boost::bind(&model::CAnomalyDetector::acceptPersistInserter, &detector, _1)); } -void CAnomalyJob::detectors(TAnomalyDetectorPtrVec &detectors) const -{ +void CAnomalyJob::detectors(TAnomalyDetectorPtrVec& detectors) const { detectors.clear(); detectors.reserve(m_Detectors.size()); - for (const auto &detector : m_Detectors) - { - detectors.push_back(detector.second); + for (const auto& detector : m_Detectors) { + detectors.push_back(detector.second); } } -void CAnomalyJob::sortedDetectors(TKeyCRefAnomalyDetectorPtrPrVec &detectors) const -{ +void CAnomalyJob::sortedDetectors(TKeyCRefAnomalyDetectorPtrPrVec& detectors) const { detectors.reserve(m_Detectors.size()); - for (const auto &detector : m_Detectors) - { + for (const auto& detector : m_Detectors) { detectors.push_back(TKeyCRefAnomalyDetectorPtrPr( - model::CSearchKey::TStrCRefKeyCRefPr(boost::cref(detector.first.first), - boost::cref(detector.first.second)), - detector.second)); + model::CSearchKey::TStrCRefKeyCRefPr(boost::cref(detector.first.first), boost::cref(detector.first.second)), detector.second)); } std::sort(detectors.begin(), detectors.end(), maths::COrderings::SFirstLess()); } -const CAnomalyJob::TKeyAnomalyDetectorPtrUMap &CAnomalyJob::detectorPartitionMap() const -{ +const CAnomalyJob::TKeyAnomalyDetectorPtrUMap& CAnomalyJob::detectorPartitionMap() const { return m_Detectors; } -const CAnomalyJob::TAnomalyDetectorPtr & -CAnomalyJob::detectorForKey(bool isRestoring, - core_t::TTime time, - const model::CSearchKey &key, - const std::string &partitionFieldValue, - model::CResourceMonitor &resourceMonitor) -{ +const CAnomalyJob::TAnomalyDetectorPtr& CAnomalyJob::detectorForKey(bool isRestoring, + core_t::TTime time, + const model::CSearchKey& key, + const std::string& partitionFieldValue, + model::CResourceMonitor& resourceMonitor) { // The simple count detector always lives in a special null partition. - const std::string &partition = key.isSimpleCount() ? EMPTY_STRING : partitionFieldValue; + const std::string& partition = key.isSimpleCount() ? EMPTY_STRING : partitionFieldValue; // Try and get the detector. - auto itr = m_Detectors.find(model::CSearchKey::TStrCRefKeyCRefPr(boost::cref(partition), - boost::cref(key)), - model::CStrKeyPrHash(), - model::CStrKeyPrEqual()); + auto itr = m_Detectors.find( + model::CSearchKey::TStrCRefKeyCRefPr(boost::cref(partition), boost::cref(key)), model::CStrKeyPrHash(), model::CStrKeyPrEqual()); // Check if we need to and are allowed to create a new detector. - if (itr == m_Detectors.end() && resourceMonitor.areAllocationsAllowed()) - { + if (itr == m_Detectors.end() && resourceMonitor.areAllocationsAllowed()) { // Create an placeholder for the anomaly detector. - model::CAnomalyDetector::TAnomalyDetectorPtr &detector = - m_Detectors.emplace(model::CSearchKey::TStrKeyPr(partition, key), - TAnomalyDetectorPtr()).first->second; + model::CAnomalyDetector::TAnomalyDetectorPtr& detector = + m_Detectors.emplace(model::CSearchKey::TStrKeyPr(partition, key), TAnomalyDetectorPtr()).first->second; - LOG_TRACE("Creating new detector for key '" - << key.debug() << '/' << partition << '\'' << ", time " << time); + LOG_TRACE("Creating new detector for key '" << key.debug() << '/' << partition << '\'' << ", time " << time); LOG_TRACE("Detector count " << m_Detectors.size()) - detector = this->makeDetector(key.identifier(), - m_ModelConfig, - m_Limits, - partition, - time, - m_ModelConfig.factory(key)); - if (detector == 0) - { + detector = this->makeDetector(key.identifier(), m_ModelConfig, m_Limits, partition, time, m_ModelConfig.factory(key)); + if (detector == 0) { // This should never happen as CAnomalyDetectorUtils::makeDetector() // contracts to never return NULL LOG_ABORT("Failed to create anomaly detector for key '" << key.debug() << '\''); @@ -1646,31 +1346,24 @@ CAnomalyJob::detectorForKey(bool isRestoring, detector->zeroModelsToTime(time - m_ModelConfig.latency()); - if (isRestoring == false) - { + if (isRestoring == false) { m_Limits.resourceMonitor().forceRefresh(*detector); } return detector; - } - else if (itr == m_Detectors.end()) - { - LOG_TRACE("No memory to create new detector for key '" - << key.debug() << '/' << partition << '\''); + } else if (itr == m_Detectors.end()) { + LOG_TRACE("No memory to create new detector for key '" << key.debug() << '/' << partition << '\''); return NULL_DETECTOR; } return itr->second; } -void CAnomalyJob::pruneAllModels() -{ +void CAnomalyJob::pruneAllModels() { LOG_INFO("Pruning all models"); - for (const auto &detector_ : m_Detectors) - { - model::CAnomalyDetector *detector = detector_.second.get(); - if (detector == 0) - { + for (const auto& detector_ : m_Detectors) { + model::CAnomalyDetector* detector = detector_.second.get(); + if (detector == 0) { LOG_ERROR("Unexpected NULL pointer for key '" << pairDebug(detector_.first) << '\''); continue; } @@ -1678,39 +1371,26 @@ void CAnomalyJob::pruneAllModels() } } -model::CAnomalyDetector::TAnomalyDetectorPtr -CAnomalyJob::makeDetector(int identifier, - const model::CAnomalyDetectorModelConfig &modelConfig, - model::CLimits &limits, - const std::string &partitionFieldValue, - core_t::TTime firstTime, - const model::CAnomalyDetector::TModelFactoryCPtr &modelFactory) -{ - return modelFactory->isSimpleCount() ? - boost::make_shared(identifier, - modelFactory->summaryMode(), - modelConfig, - boost::ref(limits), - partitionFieldValue, - firstTime, - modelFactory) : - boost::make_shared(identifier, - boost::ref(limits), - modelConfig, - partitionFieldValue, - firstTime, - modelFactory); +model::CAnomalyDetector::TAnomalyDetectorPtr CAnomalyJob::makeDetector(int identifier, + const model::CAnomalyDetectorModelConfig& modelConfig, + model::CLimits& limits, + const std::string& partitionFieldValue, + core_t::TTime firstTime, + const model::CAnomalyDetector::TModelFactoryCPtr& modelFactory) { + return modelFactory->isSimpleCount() + ? boost::make_shared( + identifier, modelFactory->summaryMode(), modelConfig, boost::ref(limits), partitionFieldValue, firstTime, modelFactory) + : boost::make_shared( + identifier, boost::ref(limits), modelConfig, partitionFieldValue, firstTime, modelFactory); } -void CAnomalyJob::populateDetectorKeys(const CFieldConfig &fieldConfig, TKeyVec &keys) -{ +void CAnomalyJob::populateDetectorKeys(const CFieldConfig& fieldConfig, TKeyVec& keys) { keys.clear(); // Add a key for the simple count detector. keys.push_back(model::CSearchKey::simpleCountKey()); - for (const auto &fieldOptions : fieldConfig.fieldOptions()) - { + for (const auto& fieldOptions : fieldConfig.fieldOptions()) { keys.emplace_back(fieldOptions.configKey(), fieldOptions.function(), fieldOptions.useNull(), @@ -1723,50 +1403,37 @@ void CAnomalyJob::populateDetectorKeys(const CFieldConfig &fieldConfig, TKeyVec } } -const std::string *CAnomalyJob::fieldValue(const std::string &fieldName, const TStrStrUMap &dataRowFields) -{ - TStrStrUMapCItr itr = fieldName.empty() ? - dataRowFields.end() : - dataRowFields.find(fieldName); - const std::string &fieldValue(itr == dataRowFields.end() ? - EMPTY_STRING : - itr->second); - return !fieldName.empty() && fieldValue.empty() ? - static_cast(0) : &fieldValue; +const std::string* CAnomalyJob::fieldValue(const std::string& fieldName, const TStrStrUMap& dataRowFields) { + TStrStrUMapCItr itr = fieldName.empty() ? dataRowFields.end() : dataRowFields.find(fieldName); + const std::string& fieldValue(itr == dataRowFields.end() ? EMPTY_STRING : itr->second); + return !fieldName.empty() && fieldValue.empty() ? static_cast(0) : &fieldValue; } -void CAnomalyJob::addRecord(const TAnomalyDetectorPtr detector, - core_t::TTime time, - const TStrStrUMap &dataRowFields) -{ +void CAnomalyJob::addRecord(const TAnomalyDetectorPtr detector, core_t::TTime time, const TStrStrUMap& dataRowFields) { model::CAnomalyDetector::TStrCPtrVec fieldValues; - const TStrVec &fieldNames = detector->fieldsOfInterest(); + const TStrVec& fieldNames = detector->fieldsOfInterest(); fieldValues.reserve(fieldNames.size()); - for (std::size_t i = 0u; i < fieldNames.size(); ++i) - { + for (std::size_t i = 0u; i < fieldNames.size(); ++i) { fieldValues.push_back(fieldValue(fieldNames[i], dataRowFields)); } detector->addRecord(time, fieldValues); } -CAnomalyJob::SBackgroundPersistArgs::SBackgroundPersistArgs(const model::CResultsQueue &resultsQueue, - const TModelPlotDataVecQueue &modelPlotQueue, +CAnomalyJob::SBackgroundPersistArgs::SBackgroundPersistArgs(const model::CResultsQueue& resultsQueue, + const TModelPlotDataVecQueue& modelPlotQueue, core_t::TTime time, - const model::CResourceMonitor::SResults &modelSizeStats, - const model::CHierarchicalResultsAggregator &aggregator, + const model::CResourceMonitor::SResults& modelSizeStats, + const model::CHierarchicalResultsAggregator& aggregator, core_t::TTime latestRecordTime, - core_t::TTime lastResultsTime) : - s_ResultsQueue(resultsQueue), - s_ModelPlotQueue(modelPlotQueue), - s_Time(time), - s_ModelSizeStats(modelSizeStats), - s_Aggregator(aggregator), - s_LatestRecordTime(latestRecordTime), - s_LastResultsTime(lastResultsTime) -{ + core_t::TTime lastResultsTime) + : s_ResultsQueue(resultsQueue), + s_ModelPlotQueue(modelPlotQueue), + s_Time(time), + s_ModelSizeStats(modelSizeStats), + s_Aggregator(aggregator), + s_LatestRecordTime(latestRecordTime), + s_LastResultsTime(lastResultsTime) { } - - } } diff --git a/lib/api/CBackgroundPersister.cc b/lib/api/CBackgroundPersister.cc index 3341bd0c42..44677e68a9 100644 --- a/lib/api/CBackgroundPersister.cc +++ b/lib/api/CBackgroundPersister.cc @@ -12,27 +12,21 @@ #include #include -namespace ml -{ -namespace api -{ +namespace ml { +namespace api { -namespace -{ +namespace { const core_t::TTime PERSIST_INTERVAL_INCREMENT(300); // 5 minutes } -CBackgroundPersister::CBackgroundPersister(core_t::TTime periodicPersistInterval, - core::CDataAdder &dataAdder) +CBackgroundPersister::CBackgroundPersister(core_t::TTime periodicPersistInterval, core::CDataAdder& dataAdder) : m_PeriodicPersistInterval(periodicPersistInterval), m_LastPeriodicPersistTime(core::CTimeUtils::now()), m_DataAdder(dataAdder), m_IsBusy(false), m_IsShutdown(false), - m_BackgroundThread(*this) -{ - if (m_PeriodicPersistInterval < PERSIST_INTERVAL_INCREMENT) - { + m_BackgroundThread(*this) { + if (m_PeriodicPersistInterval < PERSIST_INTERVAL_INCREMENT) { // This may be dynamically increased further depending on how long // persistence takes m_PeriodicPersistInterval = PERSIST_INTERVAL_INCREMENT; @@ -40,41 +34,35 @@ CBackgroundPersister::CBackgroundPersister(core_t::TTime periodicPersistInterval } CBackgroundPersister::CBackgroundPersister(core_t::TTime periodicPersistInterval, - const TFirstProcessorPeriodicPersistFunc &firstProcessorPeriodicPersistFunc, - core::CDataAdder &dataAdder) + const TFirstProcessorPeriodicPersistFunc& firstProcessorPeriodicPersistFunc, + core::CDataAdder& dataAdder) : m_PeriodicPersistInterval(periodicPersistInterval), m_LastPeriodicPersistTime(core::CTimeUtils::now()), m_FirstProcessorPeriodicPersistFunc(firstProcessorPeriodicPersistFunc), m_DataAdder(dataAdder), m_IsBusy(false), m_IsShutdown(false), - m_BackgroundThread(*this) -{ - if (m_PeriodicPersistInterval < PERSIST_INTERVAL_INCREMENT) - { + m_BackgroundThread(*this) { + if (m_PeriodicPersistInterval < PERSIST_INTERVAL_INCREMENT) { // This may be dynamically increased further depending on how long // persistence takes m_PeriodicPersistInterval = PERSIST_INTERVAL_INCREMENT; } } -CBackgroundPersister::~CBackgroundPersister() -{ +CBackgroundPersister::~CBackgroundPersister() { this->waitForIdle(); } -bool CBackgroundPersister::isBusy() const -{ +bool CBackgroundPersister::isBusy() const { return m_IsBusy; } -bool CBackgroundPersister::waitForIdle() -{ +bool CBackgroundPersister::waitForIdle() { { core::CScopedFastLock lock(m_Mutex); - if (!m_BackgroundThread.isStarted()) - { + if (!m_BackgroundThread.isStarted()) { return true; } } @@ -82,26 +70,21 @@ bool CBackgroundPersister::waitForIdle() return m_BackgroundThread.waitForFinish(); } -bool CBackgroundPersister::addPersistFunc(core::CDataAdder::TPersistFunc persistFunc) -{ - if (!persistFunc) - { +bool CBackgroundPersister::addPersistFunc(core::CDataAdder::TPersistFunc persistFunc) { + if (!persistFunc) { return false; } core::CScopedFastLock lock(m_Mutex); - if (this->isBusy()) - { + if (this->isBusy()) { return false; } - if (m_BackgroundThread.isStarted()) - { + if (m_BackgroundThread.isStarted()) { // This join should be fast as the busy flag is false so the thread // should either have already exited or be on the verge of exiting - if (m_BackgroundThread.waitForFinish() == false) - { + if (m_BackgroundThread.waitForFinish() == false) { return false; } } @@ -111,26 +94,21 @@ bool CBackgroundPersister::addPersistFunc(core::CDataAdder::TPersistFunc persist return true; } -bool CBackgroundPersister::startPersist() -{ +bool CBackgroundPersister::startPersist() { core::CScopedFastLock lock(m_Mutex); - if (this->isBusy()) - { + if (this->isBusy()) { return false; } - if (m_PersistFuncs.empty()) - { + if (m_PersistFuncs.empty()) { return false; } - if (m_BackgroundThread.isStarted()) - { + if (m_BackgroundThread.isStarted()) { // This join should be fast as the busy flag is false so the thread // should either have already exited or be on the verge of exiting - if (m_BackgroundThread.waitForFinish() == false) - { + if (m_BackgroundThread.waitForFinish() == false) { return false; } } @@ -141,12 +119,10 @@ bool CBackgroundPersister::startPersist() return m_IsBusy; } -bool CBackgroundPersister::clear() -{ +bool CBackgroundPersister::clear() { core::CScopedFastLock lock(m_Mutex); - if (this->isBusy()) - { + if (this->isBusy()) { return false; } @@ -155,12 +131,10 @@ bool CBackgroundPersister::clear() return true; } -bool CBackgroundPersister::firstProcessorPeriodicPersistFunc(const TFirstProcessorPeriodicPersistFunc &firstProcessorPeriodicPersistFunc) -{ +bool CBackgroundPersister::firstProcessorPeriodicPersistFunc(const TFirstProcessorPeriodicPersistFunc& firstProcessorPeriodicPersistFunc) { core::CScopedFastLock lock(m_Mutex); - if (this->isBusy()) - { + if (this->isBusy()) { return false; } @@ -169,36 +143,29 @@ bool CBackgroundPersister::firstProcessorPeriodicPersistFunc(const TFirstProcess return true; } -bool CBackgroundPersister::startBackgroundPersist() -{ - if (this->isBusy()) - { +bool CBackgroundPersister::startBackgroundPersist() { + if (this->isBusy()) { LOG_WARN("Cannot start background persist as a previous " - "persist is still in progress"); + "persist is still in progress"); return false; } return this->startBackgroundPersist(core::CTimeUtils::now()); } -bool CBackgroundPersister::startBackgroundPersistIfAppropriate() -{ +bool CBackgroundPersister::startBackgroundPersistIfAppropriate() { core_t::TTime due(m_LastPeriodicPersistTime + m_PeriodicPersistInterval); core_t::TTime now(core::CTimeUtils::now()); - if (now < due) - { + if (now < due) { // Persist is not due return false; } - if (this->isBusy()) - { + if (this->isBusy()) { m_PeriodicPersistInterval += PERSIST_INTERVAL_INCREMENT; - LOG_WARN("Periodic persist is due at " << due << - " but previous persist started at " << - core::CTimeUtils::toIso8601(m_LastPeriodicPersistTime) << - " is still in progress - increased persistence interval to " << - m_PeriodicPersistInterval << " seconds"); + LOG_WARN("Periodic persist is due at " + << due << " but previous persist started at " << core::CTimeUtils::toIso8601(m_LastPeriodicPersistTime) + << " is still in progress - increased persistence interval to " << m_PeriodicPersistInterval << " seconds"); return false; } @@ -206,11 +173,9 @@ bool CBackgroundPersister::startBackgroundPersistIfAppropriate() return this->startBackgroundPersist(now); } -bool CBackgroundPersister::startBackgroundPersist(core_t::TTime timeOfPersistence) -{ +bool CBackgroundPersister::startBackgroundPersist(core_t::TTime timeOfPersistence) { bool backgroundPersistSetupOk = m_FirstProcessorPeriodicPersistFunc(*this); - if (!backgroundPersistSetupOk) - { + if (!backgroundPersistSetupOk) { LOG_ERROR("Failed to create background persistence functions"); // It's possible that some functions were added before the failure, so // remove these @@ -222,8 +187,7 @@ bool CBackgroundPersister::startBackgroundPersist(core_t::TTime timeOfPersistenc LOG_INFO("Background persist starting background thread"); - if (this->startPersist() == false) - { + if (this->startPersist() == false) { LOG_ERROR("Failed to start background persistence"); this->clear(); return false; @@ -232,19 +196,14 @@ bool CBackgroundPersister::startBackgroundPersist(core_t::TTime timeOfPersistenc return true; } -CBackgroundPersister::CBackgroundThread::CBackgroundThread(CBackgroundPersister &owner) - : m_Owner(owner) -{ +CBackgroundPersister::CBackgroundThread::CBackgroundThread(CBackgroundPersister& owner) : m_Owner(owner) { } -void CBackgroundPersister::CBackgroundThread::run() -{ +void CBackgroundPersister::CBackgroundThread::run() { // The isBusy check will prevent concurrent access to // m_Owner.m_PersistFuncs here - while (!m_Owner.m_PersistFuncs.empty()) - { - if (!m_Owner.m_IsShutdown) - { + while (!m_Owner.m_PersistFuncs.empty()) { + if (!m_Owner.m_IsShutdown) { m_Owner.m_PersistFuncs.front()(m_Owner.m_DataAdder); } m_Owner.m_PersistFuncs.pop_front(); @@ -254,12 +213,8 @@ void CBackgroundPersister::CBackgroundThread::run() m_Owner.m_IsBusy = false; } -void CBackgroundPersister::CBackgroundThread::shutdown() -{ +void CBackgroundPersister::CBackgroundThread::shutdown() { m_Owner.m_IsShutdown = true; } - - } } - diff --git a/lib/api/CBaseTokenListDataTyper.cc b/lib/api/CBaseTokenListDataTyper.cc index 2887f0d37c..10cc45bcb4 100644 --- a/lib/api/CBaseTokenListDataTyper.cc +++ b/lib/api/CBaseTokenListDataTyper.cc @@ -20,17 +20,14 @@ #include #include -namespace ml -{ -namespace api -{ +namespace ml { +namespace api { // Initialise statics const std::string CBaseTokenListDataTyper::PRETOKENISED_TOKEN_FIELD("..."); // We use short field names to reduce the state size -namespace -{ +namespace { const std::string TOKEN_TAG("a"); const std::string TOKEN_TYPE_COUNT_TAG("b"); const std::string TYPE_TAG("c"); @@ -40,75 +37,50 @@ const std::string TIME_ATTRIBUTE("time"); const std::string EMPTY_STRING; } - -CBaseTokenListDataTyper::CBaseTokenListDataTyper(const TTokenListReverseSearchCreatorIntfCPtr &reverseSearchCreator, +CBaseTokenListDataTyper::CBaseTokenListDataTyper(const TTokenListReverseSearchCreatorIntfCPtr& reverseSearchCreator, double threshold, - const std::string &fieldName) + const std::string& fieldName) : CDataTyper(fieldName), m_ReverseSearchCreator(reverseSearchCreator), m_LowerThreshold(std::min(0.99, std::max(0.01, threshold))), // Upper threshold is half way between the lower threshold and 1 m_UpperThreshold((1.0 + m_LowerThreshold) / 2.0), - m_HasChanged(false) -{ + m_HasChanged(false) { } -void CBaseTokenListDataTyper::dumpStats() const -{ +void CBaseTokenListDataTyper::dumpStats() const { // Type number is vector index plus one int typeNum(1); - for (const auto &type : m_Types) - { - LOG_DEBUG("Type=" << typeNum << '-' << type.numMatches() << - ' ' << type.baseString()); + for (const auto& type : m_Types) { + LOG_DEBUG("Type=" << typeNum << '-' << type.numMatches() << ' ' << type.baseString()); ++typeNum; } } -int CBaseTokenListDataTyper::computeType(bool isDryRun, - const TStrStrUMap &fields, - const std::string &str, - size_t rawStringLen) -{ +int CBaseTokenListDataTyper::computeType(bool isDryRun, const TStrStrUMap& fields, const std::string& str, size_t rawStringLen) { // First tokenise string size_t workWeight(0); auto preTokenisedIter = fields.find(PRETOKENISED_TOKEN_FIELD); - if (preTokenisedIter != fields.end()) - { - if (this->addPretokenisedTokens(preTokenisedIter->second, - m_WorkTokenIds, - m_WorkTokenUniqueIds, - workWeight) == false) - { + if (preTokenisedIter != fields.end()) { + if (this->addPretokenisedTokens(preTokenisedIter->second, m_WorkTokenIds, m_WorkTokenUniqueIds, workWeight) == false) { return -1; } - } - else - { - this->tokeniseString(fields, - str, - m_WorkTokenIds, - m_WorkTokenUniqueIds, - workWeight); + } else { + this->tokeniseString(fields, str, m_WorkTokenIds, m_WorkTokenUniqueIds, workWeight); } // Determine the minimum and maximum token weight that could possibly // match the weight we've got - size_t minWeight(CBaseTokenListDataTyper::minMatchingWeight(workWeight, - m_LowerThreshold)); - size_t maxWeight(CBaseTokenListDataTyper::maxMatchingWeight(workWeight, - m_LowerThreshold)); + size_t minWeight(CBaseTokenListDataTyper::minMatchingWeight(workWeight, m_LowerThreshold)); + size_t maxWeight(CBaseTokenListDataTyper::maxMatchingWeight(workWeight, m_LowerThreshold)); // We search previous types in descending order of the number of matches // we've seen for them TSizeSizePrListItr bestSoFarIter(m_TypesByCount.end()); double bestSoFarSimilarity(m_LowerThreshold); - for (TSizeSizePrListItr iter = m_TypesByCount.begin(); - iter != m_TypesByCount.end(); - ++iter) - { - const CTokenListType &compType = m_Types[iter->second]; - const TSizeSizePrVec &baseTokenIds = compType.baseTokenIds(); + for (TSizeSizePrListItr iter = m_TypesByCount.begin(); iter != m_TypesByCount.end(); ++iter) { + const CTokenListType& compType = m_Types[iter->second]; + const TSizeSizePrVec& baseTokenIds = compType.baseTokenIds(); size_t baseWeight(compType.baseWeight()); // Check whether the current record matches the search for the existing @@ -116,16 +88,13 @@ int CBaseTokenListDataTyper::computeType(bool isDryRun, // further checks. The first condition here ensures that we never say // a string with tokens matches the reverse search of a string with no // tokens (which the other criteria alone might say matched). - bool matchesSearch((baseWeight == 0) == (workWeight == 0) && - compType.maxMatchingStringLen() >= rawStringLen && + bool matchesSearch((baseWeight == 0) == (workWeight == 0) && compType.maxMatchingStringLen() >= rawStringLen && compType.isMissingCommonTokenWeightZero(m_WorkTokenUniqueIds) && compType.containsCommonTokensInOrder(m_WorkTokenIds)); - if (!matchesSearch) - { + if (!matchesSearch) { // Quickly rule out wildly different token weights prior to doing // the expensive similarity calculations - if (baseWeight < minWeight || baseWeight > maxWeight) - { + if (baseWeight < minWeight || baseWeight > maxWeight) { continue; } @@ -134,46 +103,31 @@ int CBaseTokenListDataTyper::computeType(bool isDryRun, size_t origUniqueTokenWeight(compType.origUniqueTokenWeight()); size_t commonUniqueTokenWeight(compType.commonUniqueTokenWeight()); size_t missingCommonTokenWeight(compType.missingCommonTokenWeight(m_WorkTokenUniqueIds)); - double proportionOfOrig(double(commonUniqueTokenWeight - missingCommonTokenWeight) / - double(origUniqueTokenWeight)); - if (proportionOfOrig < m_LowerThreshold) - { + double proportionOfOrig(double(commonUniqueTokenWeight - missingCommonTokenWeight) / double(origUniqueTokenWeight)); + if (proportionOfOrig < m_LowerThreshold) { continue; } } - double similarity(this->similarity(m_WorkTokenIds, - workWeight, - baseTokenIds, - baseWeight)); + double similarity(this->similarity(m_WorkTokenIds, workWeight, baseTokenIds, baseWeight)); LOG_TRACE(similarity << '-' << compType.baseString() << '|' << str); - if (matchesSearch || similarity > m_UpperThreshold) - { - if (similarity <= m_LowerThreshold) - { + if (matchesSearch || similarity > m_UpperThreshold) { + if (similarity <= m_LowerThreshold) { // Not an ideal situation, but log at trace level to avoid // excessive log file spam - LOG_TRACE("Reverse search match below threshold : " << - similarity << '-' << compType.baseString() << '|' << str); + LOG_TRACE("Reverse search match below threshold : " << similarity << '-' << compType.baseString() << '|' << str); } // This is a strong match, so accept it immediately and stop // looking for better matches - use vector index plus one as type int type(1 + int(iter->second)); - this->addTypeMatch(isDryRun, - str, - rawStringLen, - m_WorkTokenIds, - m_WorkTokenUniqueIds, - similarity, - iter); + this->addTypeMatch(isDryRun, str, rawStringLen, m_WorkTokenIds, m_WorkTokenUniqueIds, similarity, iter); return type; } - if (similarity > bestSoFarSimilarity) - { + if (similarity > bestSoFarSimilarity) { // This is a weak match, but remember it because it's the best we've // seen bestSoFarIter = iter; @@ -181,44 +135,29 @@ int CBaseTokenListDataTyper::computeType(bool isDryRun, // Recalculate the minimum and maximum token counts that might // produce a better match - minWeight = CBaseTokenListDataTyper::minMatchingWeight(workWeight, - similarity); - maxWeight = CBaseTokenListDataTyper::maxMatchingWeight(workWeight, - similarity); + minWeight = CBaseTokenListDataTyper::minMatchingWeight(workWeight, similarity); + maxWeight = CBaseTokenListDataTyper::maxMatchingWeight(workWeight, similarity); } } - if (bestSoFarIter != m_TypesByCount.end()) - { + if (bestSoFarIter != m_TypesByCount.end()) { // Return the best match - use vector index plus one as type int type(1 + int(bestSoFarIter->second)); - this->addTypeMatch(isDryRun, - str, - rawStringLen, - m_WorkTokenIds, - m_WorkTokenUniqueIds, - bestSoFarSimilarity, - bestSoFarIter); + this->addTypeMatch(isDryRun, str, rawStringLen, m_WorkTokenIds, m_WorkTokenUniqueIds, bestSoFarSimilarity, bestSoFarIter); return type; } // If we get here we haven't matched, so create a new type - CTokenListType obj(isDryRun, - str, - rawStringLen, - m_WorkTokenIds, - workWeight, - m_WorkTokenUniqueIds); + CTokenListType obj(isDryRun, str, rawStringLen, m_WorkTokenIds, workWeight, m_WorkTokenUniqueIds); m_TypesByCount.push_back(TSizeSizePr(1, m_Types.size())); m_Types.push_back(obj); m_HasChanged = true; // Increment the counts of types that use a given token - for (const auto &workTokenId : m_WorkTokenIds) - { + for (const auto& workTokenId : m_WorkTokenIds) { // We get away with casting away constness ONLY because the type count // is not used in any of the multi-index keys - const_cast(m_TokenIdLookup[workTokenId.first]).incTypeCount(); + const_cast(m_TokenIdLookup[workTokenId.first]).incTypeCount(); } // Type is vector index plus one @@ -226,13 +165,11 @@ int CBaseTokenListDataTyper::computeType(bool isDryRun, } bool CBaseTokenListDataTyper::createReverseSearch(int type, - std::string &part1, - std::string &part2, - size_t &maxMatchingLength, - bool &wasCached) -{ - if (m_ReverseSearchCreator == 0) - { + std::string& part1, + std::string& part2, + size_t& maxMatchingLength, + bool& wasCached) { + if (m_ReverseSearchCreator == 0) { LOG_ERROR("Cannot create reverse search - no reverse search creator"); part1.clear(); @@ -242,11 +179,9 @@ bool CBaseTokenListDataTyper::createReverseSearch(int type, } // Find the correct type object - type is vector index plus one - if (type < 1 || static_cast(type) > m_Types.size()) - { + if (type < 1 || static_cast(type) > m_Types.size()) { // -1 is a special case for a NULL/empty field - if (type != -1) - { + if (type != -1) { LOG_ERROR("Programmatic error - invalid type: " << type); part1.clear(); @@ -258,27 +193,21 @@ bool CBaseTokenListDataTyper::createReverseSearch(int type, return m_ReverseSearchCreator->createNullSearch(part1, part2); } - CTokenListType &typeObj = m_Types[type - 1]; + CTokenListType& typeObj = m_Types[type - 1]; maxMatchingLength = typeObj.maxMatchingStringLen(); // If we can retrieve cached reverse search terms we'll save a lot of time - if (typeObj.cachedReverseSearch(part1, part2) == true) - { + if (typeObj.cachedReverseSearch(part1, part2) == true) { wasCached = true; return true; } - const TSizeSizePrVec &baseTokenIds = typeObj.baseTokenIds(); - const TSizeSizePrVec &commonUniqueTokenIds = typeObj.commonUniqueTokenIds(); - if (commonUniqueTokenIds.empty()) - { + const TSizeSizePrVec& baseTokenIds = typeObj.baseTokenIds(); + const TSizeSizePrVec& commonUniqueTokenIds = typeObj.commonUniqueTokenIds(); + if (commonUniqueTokenIds.empty()) { // There's quite a high chance this call will return false - if (m_ReverseSearchCreator->createNoUniqueTokenSearch(type, - typeObj.baseString(), - typeObj.maxMatchingStringLen(), - part1, - part2) == false) - { + if (m_ReverseSearchCreator->createNoUniqueTokenSearch(type, typeObj.baseString(), typeObj.maxMatchingStringLen(), part1, part2) == + false) { // More detail should have been logged by the failed call LOG_ERROR("Could not create reverse search"); @@ -300,18 +229,12 @@ bool CBaseTokenListDataTyper::createReverseSearch(int type, using TSizeSizeSizePrMMap = std::multimap; TSizeSizeSizePrMMap rareIdsWithCost; size_t lowestCost(std::numeric_limits::max()); - for (const auto &commonUniqueTokenId : commonUniqueTokenIds) - { + for (const auto& commonUniqueTokenId : commonUniqueTokenIds) { size_t tokenId(commonUniqueTokenId.first); - size_t occurrences(std::count_if(baseTokenIds.begin(), - baseTokenIds.end(), - CSizePairFirstElementEquals(tokenId))); - const CTokenInfoItem &info = m_TokenIdLookup[tokenId]; - size_t cost(m_ReverseSearchCreator->costOfToken(info.str(), - occurrences)); - rareIdsWithCost.insert(TSizeSizeSizePrMMap::value_type(info.typeCount(), - TSizeSizePr(tokenId, - cost))); + size_t occurrences(std::count_if(baseTokenIds.begin(), baseTokenIds.end(), CSizePairFirstElementEquals(tokenId))); + const CTokenInfoItem& info = m_TokenIdLookup[tokenId]; + size_t cost(m_ReverseSearchCreator->costOfToken(info.str(), occurrences)); + rareIdsWithCost.insert(TSizeSizeSizePrMMap::value_type(info.typeCount(), TSizeSizePr(tokenId, cost))); lowestCost = std::min(cost, lowestCost); } @@ -319,18 +242,13 @@ bool CBaseTokenListDataTyper::createReverseSearch(int type, TSizeSet costedCommonUniqueTokenIds; size_t cheapestCost(std::numeric_limits::max()); auto cheapestIter = rareIdsWithCost.end(); - for (auto iter = rareIdsWithCost.begin(); - iter != rareIdsWithCost.end() && availableCost > lowestCost; - ++iter) - { - if (iter->second.second < cheapestCost) - { + for (auto iter = rareIdsWithCost.begin(); iter != rareIdsWithCost.end() && availableCost > lowestCost; ++iter) { + if (iter->second.second < cheapestCost) { cheapestCost = iter->second.second; cheapestIter = iter; } - if (availableCost < iter->second.second) - { + if (availableCost < iter->second.second) { // We can't afford this token continue; } @@ -340,19 +258,15 @@ bool CBaseTokenListDataTyper::createReverseSearch(int type, availableCost -= iter->second.second; } - if (costedCommonUniqueTokenIds.empty()) - { - if (cheapestIter == rareIdsWithCost.end()) - { + if (costedCommonUniqueTokenIds.empty()) { + if (cheapestIter == rareIdsWithCost.end()) { LOG_ERROR("Inconsistency - rareIdsWithCost is empty but " - "commonUniqueTokenIds wasn't for " << type); - } - else - { + "commonUniqueTokenIds wasn't for " + << type); + } else { LOG_ERROR("No token was short enough to include in reverse search " - "for " << type << - " - cheapest token was " << cheapestIter->second.first << - " with cost " << cheapestCost); + "for " + << type << " - cheapest token was " << cheapestIter->second.first << " with cost " << cheapestCost); } part1.clear(); @@ -364,30 +278,18 @@ bool CBaseTokenListDataTyper::createReverseSearch(int type, // If we get here we're going to create a search in the standard way - there // shouldn't be any more errors after this point - m_ReverseSearchCreator->initStandardSearch(type, - typeObj.baseString(), - typeObj.maxMatchingStringLen(), - part1, - part2); - - for (auto costedCommonUniqueTokenId : costedCommonUniqueTokenIds) - { - m_ReverseSearchCreator->addCommonUniqueToken(m_TokenIdLookup[costedCommonUniqueTokenId].str(), - part1, - part2); + m_ReverseSearchCreator->initStandardSearch(type, typeObj.baseString(), typeObj.maxMatchingStringLen(), part1, part2); + + for (auto costedCommonUniqueTokenId : costedCommonUniqueTokenIds) { + m_ReverseSearchCreator->addCommonUniqueToken(m_TokenIdLookup[costedCommonUniqueTokenId].str(), part1, part2); } bool first(true); size_t end(typeObj.outOfOrderCommonTokenIndex()); - for (size_t index = 0; index < end; ++index) - { + for (size_t index = 0; index < end; ++index) { size_t tokenId(baseTokenIds[index].first); - if (costedCommonUniqueTokenIds.find(tokenId) != costedCommonUniqueTokenIds.end()) - { - m_ReverseSearchCreator->addInOrderCommonToken(m_TokenIdLookup[tokenId].str(), - first, - part1, - part2); + if (costedCommonUniqueTokenIds.find(tokenId) != costedCommonUniqueTokenIds.end()) { + m_ReverseSearchCreator->addInOrderCommonToken(m_TokenIdLookup[tokenId].str(), first, part1, part2); first = false; } } @@ -399,30 +301,24 @@ bool CBaseTokenListDataTyper::createReverseSearch(int type, return true; } -namespace -{ - -class CPairFirstElementGreater -{ - public: - //! This operator is designed for pairs that are small enough for - //! passing by value to be most efficient - template - bool operator()(const PAIR pr1, const PAIR pr2) - { - return pr1.first > pr2.first; - } -}; +namespace { +class CPairFirstElementGreater { +public: + //! This operator is designed for pairs that are small enough for + //! passing by value to be most efficient + template + bool operator()(const PAIR pr1, const PAIR pr2) { + return pr1.first > pr2.first; + } +}; } -bool CBaseTokenListDataTyper::hasChanged() const -{ +bool CBaseTokenListDataTyper::hasChanged() const { return m_HasChanged; } -bool CBaseTokenListDataTyper::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ +bool CBaseTokenListDataTyper::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { m_Types.clear(); m_TypesByCount.clear(); m_TokenIdLookup.clear(); @@ -430,44 +326,33 @@ bool CBaseTokenListDataTyper::acceptRestoreTraverser(core::CStateRestoreTraverse m_WorkTokenUniqueIds.clear(); m_HasChanged = false; - do - { - const std::string &name = traverser.name(); - if (name == TOKEN_TAG) - { + do { + const std::string& name = traverser.name(); + if (name == TOKEN_TAG) { size_t nextIndex(m_TokenIdLookup.size()); m_TokenIdLookup.push_back(CTokenInfoItem(traverser.value(), nextIndex)); - } - else if (name == TOKEN_TYPE_COUNT_TAG) - { - if (m_TokenIdLookup.empty()) - { - LOG_ERROR("Token type count precedes token string in " << - traverser.value()); + } else if (name == TOKEN_TYPE_COUNT_TAG) { + if (m_TokenIdLookup.empty()) { + LOG_ERROR("Token type count precedes token string in " << traverser.value()); return false; } size_t typeCount(0); - if (core::CStringUtils::stringToType(traverser.value(), - typeCount) == false) - { + if (core::CStringUtils::stringToType(traverser.value(), typeCount) == false) { LOG_ERROR("Invalid token type count in " << traverser.value()); return false; } // We get away with casting away constness ONLY because the type // count is not used in any of the multi-index keys - const_cast(m_TokenIdLookup.back()).typeCount(typeCount); - } - else if (name == TYPE_TAG) - { + const_cast(m_TokenIdLookup.back()).typeCount(typeCount); + } else if (name == TYPE_TAG) { CTokenListType type(traverser); TSizeSizePr countAndIndex(type.numMatches(), m_Types.size()); m_Types.push_back(type); m_TypesByCount.push_back(countAndIndex); } - } - while (traverser.next()); + } while (traverser.next()); // Types are persisted in order of creation, but this list needs to be // sorted by count instead @@ -476,69 +361,47 @@ bool CBaseTokenListDataTyper::acceptRestoreTraverser(core::CStateRestoreTraverse return true; } -void CBaseTokenListDataTyper::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ - CBaseTokenListDataTyper::acceptPersistInserter(m_TokenIdLookup, - m_Types, - inserter); +void CBaseTokenListDataTyper::acceptPersistInserter(core::CStatePersistInserter& inserter) const { + CBaseTokenListDataTyper::acceptPersistInserter(m_TokenIdLookup, m_Types, inserter); } -void CBaseTokenListDataTyper::acceptPersistInserter(const TTokenMIndex &tokenIdLookup, - const TTokenListTypeVec &types, - core::CStatePersistInserter &inserter) -{ - for (const CTokenInfoItem &item : tokenIdLookup) - { +void CBaseTokenListDataTyper::acceptPersistInserter(const TTokenMIndex& tokenIdLookup, + const TTokenListTypeVec& types, + core::CStatePersistInserter& inserter) { + for (const CTokenInfoItem& item : tokenIdLookup) { inserter.insertValue(TOKEN_TAG, item.str()); inserter.insertValue(TOKEN_TYPE_COUNT_TAG, item.typeCount()); } - for (const CTokenListType &type : types) - { - inserter.insertLevel(TYPE_TAG, - boost::bind(&CTokenListType::acceptPersistInserter, - &type, - _1)); + for (const CTokenListType& type : types) { + inserter.insertLevel(TYPE_TAG, boost::bind(&CTokenListType::acceptPersistInserter, &type, _1)); } } -CDataTyper::TPersistFunc CBaseTokenListDataTyper::makePersistFunc() const -{ - return boost::bind(&CBaseTokenListDataTyper::acceptPersistInserter, - m_TokenIdLookup, - m_Types, - _1); +CDataTyper::TPersistFunc CBaseTokenListDataTyper::makePersistFunc() const { + return boost::bind(&CBaseTokenListDataTyper::acceptPersistInserter, m_TokenIdLookup, m_Types, _1); } void CBaseTokenListDataTyper::addTypeMatch(bool isDryRun, - const std::string &str, + const std::string& str, size_t rawStringLen, - const TSizeSizePrVec &tokenIds, - const TSizeSizeMap &tokenUniqueIds, + const TSizeSizePrVec& tokenIds, + const TSizeSizeMap& tokenUniqueIds, double similarity, - TSizeSizePrListItr &iter) -{ - if (m_Types[iter->second].addString(isDryRun, - str, - rawStringLen, - tokenIds, - tokenUniqueIds, - similarity) == true) - { + TSizeSizePrListItr& iter) { + if (m_Types[iter->second].addString(isDryRun, str, rawStringLen, tokenIds, tokenUniqueIds, similarity) == true) { m_HasChanged = true; } - size_t &count = iter->first; + size_t& count = iter->first; ++count; // Search backwards for the point where the incremented count belongs TSizeSizePrListItr swapIter(m_TypesByCount.end()); TSizeSizePrListItr checkIter(iter); - while (checkIter != m_TypesByCount.begin()) - { + while (checkIter != m_TypesByCount.begin()) { --checkIter; - if (count <= checkIter->first) - { + if (count <= checkIter->first) { break; } swapIter = checkIter; @@ -546,16 +409,13 @@ void CBaseTokenListDataTyper::addTypeMatch(bool isDryRun, // Move the iterator we've matched nearer the front of the list if it // deserves this - if (swapIter != m_TypesByCount.end()) - { + if (swapIter != m_TypesByCount.end()) { std::iter_swap(swapIter, iter); } } -size_t CBaseTokenListDataTyper::minMatchingWeight(size_t weight, double threshold) -{ - if (weight == 0) - { +size_t CBaseTokenListDataTyper::minMatchingWeight(size_t weight, double threshold) { + if (weight == 0) { return 0; } @@ -571,10 +431,8 @@ size_t CBaseTokenListDataTyper::minMatchingWeight(size_t weight, double threshol return static_cast(std::floor(double(weight) * threshold + EPSILON)) + 1; } -size_t CBaseTokenListDataTyper::maxMatchingWeight(size_t weight, double threshold) -{ - if (weight == 0) - { +size_t CBaseTokenListDataTyper::maxMatchingWeight(size_t weight, double threshold) { + if (weight == 0) { return 0; } @@ -590,11 +448,9 @@ size_t CBaseTokenListDataTyper::maxMatchingWeight(size_t weight, double threshol return static_cast(std::ceil(double(weight) / threshold - EPSILON)) - 1; } -size_t CBaseTokenListDataTyper::idForToken(const std::string &token) -{ +size_t CBaseTokenListDataTyper::idForToken(const std::string& token) { auto iter = boost::multi_index::get(m_TokenIdLookup).find(token); - if (iter != boost::multi_index::get(m_TokenIdLookup).end()) - { + if (iter != boost::multi_index::get(m_TokenIdLookup).end()) { return iter->index(); } @@ -603,21 +459,18 @@ size_t CBaseTokenListDataTyper::idForToken(const std::string &token) return nextIndex; } -bool CBaseTokenListDataTyper::addPretokenisedTokens(const std::string &tokensCsv, - TSizeSizePrVec &tokenIds, - TSizeSizeMap &tokenUniqueIds, - size_t &totalWeight) -{ +bool CBaseTokenListDataTyper::addPretokenisedTokens(const std::string& tokensCsv, + TSizeSizePrVec& tokenIds, + TSizeSizeMap& tokenUniqueIds, + size_t& totalWeight) { tokenIds.clear(); tokenUniqueIds.clear(); totalWeight = 0; m_CsvLineParser.reset(tokensCsv); std::string token; - while (!m_CsvLineParser.atEnd()) - { - if (m_CsvLineParser.parseNext(token) == false) - { + while (!m_CsvLineParser.atEnd()) { + if (m_CsvLineParser.parseNext(token) == false) { return false; } @@ -627,79 +480,50 @@ bool CBaseTokenListDataTyper::addPretokenisedTokens(const std::string &tokensCsv return true; } -CBaseTokenListDataTyper::CTokenInfoItem::CTokenInfoItem(const std::string &str, - size_t index) - : m_Str(str), - m_Index(index), - m_TypeCount(0) -{ +CBaseTokenListDataTyper::CTokenInfoItem::CTokenInfoItem(const std::string& str, size_t index) : m_Str(str), m_Index(index), m_TypeCount(0) { } -const std::string &CBaseTokenListDataTyper::CTokenInfoItem::str() const -{ +const std::string& CBaseTokenListDataTyper::CTokenInfoItem::str() const { return m_Str; } -size_t CBaseTokenListDataTyper::CTokenInfoItem::index() const -{ +size_t CBaseTokenListDataTyper::CTokenInfoItem::index() const { return m_Index; } -size_t CBaseTokenListDataTyper::CTokenInfoItem::typeCount() const -{ +size_t CBaseTokenListDataTyper::CTokenInfoItem::typeCount() const { return m_TypeCount; } -void CBaseTokenListDataTyper::CTokenInfoItem::typeCount(size_t typeCount) -{ +void CBaseTokenListDataTyper::CTokenInfoItem::typeCount(size_t typeCount) { m_TypeCount = typeCount; } -void CBaseTokenListDataTyper::CTokenInfoItem::incTypeCount() -{ +void CBaseTokenListDataTyper::CTokenInfoItem::incTypeCount() { ++m_TypeCount; } -CBaseTokenListDataTyper::CSizePairFirstElementEquals::CSizePairFirstElementEquals(size_t value) - : m_Value(value) -{ +CBaseTokenListDataTyper::CSizePairFirstElementEquals::CSizePairFirstElementEquals(size_t value) : m_Value(value) { } -CBaseTokenListDataTyper::SIdTranslater::SIdTranslater(const CBaseTokenListDataTyper &typer, - const TSizeSizePrVec &tokenIds, - char separator) - : s_Typer(typer), - s_TokenIds(tokenIds), - s_Separator(separator) -{ +CBaseTokenListDataTyper::SIdTranslater::SIdTranslater(const CBaseTokenListDataTyper& typer, const TSizeSizePrVec& tokenIds, char separator) + : s_Typer(typer), s_TokenIds(tokenIds), s_Separator(separator) { } -std::ostream &operator<<(std::ostream &strm, - const CBaseTokenListDataTyper::SIdTranslater &translator) -{ - for (auto iter = translator.s_TokenIds.begin(); - iter != translator.s_TokenIds.end(); - ++iter) - { - if (iter != translator.s_TokenIds.begin()) - { +std::ostream& operator<<(std::ostream& strm, const CBaseTokenListDataTyper::SIdTranslater& translator) { + for (auto iter = translator.s_TokenIds.begin(); iter != translator.s_TokenIds.end(); ++iter) { + if (iter != translator.s_TokenIds.begin()) { strm << translator.s_Separator; } - if (iter->first < translator.s_Typer.m_TokenIdLookup.size()) - { + if (iter->first < translator.s_Typer.m_TokenIdLookup.size()) { strm << translator.s_Typer.m_TokenIdLookup[iter->first].str(); - } - else - { + } else { strm << "Out of bounds!"; } } return strm; } - - } } - diff --git a/lib/api/CBenchMarker.cc b/lib/api/CBenchMarker.cc index 490061fd3a..6d90d943b7 100644 --- a/lib/api/CBenchMarker.cc +++ b/lib/api/CBenchMarker.cc @@ -14,74 +14,51 @@ #include #include +namespace ml { +namespace api { -namespace ml -{ -namespace api -{ - - -CBenchMarker::CBenchMarker() - : m_TotalMessages(0), - m_ScoredMessages(0) -{ +CBenchMarker::CBenchMarker() : m_TotalMessages(0), m_ScoredMessages(0) { } -bool CBenchMarker::init(const std::string ®exFilename) -{ +bool CBenchMarker::init(const std::string& regexFilename) { // Reset in case of reinitialisation m_TotalMessages = 0; m_ScoredMessages = 0; m_Measures.clear(); std::ifstream ifs(regexFilename.c_str()); - if (!ifs.is_open()) - { + if (!ifs.is_open()) { return false; } std::string line; - while (std::getline(ifs, line)) - { - if (line.empty()) - { + while (std::getline(ifs, line)) { + if (line.empty()) { continue; } core::CRegex regex; - if (regex.init(line) == false) - { + if (regex.init(line) == false) { return false; } - m_Measures.push_back(TRegexIntSizeStrPrMapPr(regex, - TIntSizeStrPrMap())); + m_Measures.push_back(TRegexIntSizeStrPrMapPr(regex, TIntSizeStrPrMap())); } return (m_Measures.size() > 0); } -void CBenchMarker::addResult(const std::string &message, - int type) -{ +void CBenchMarker::addResult(const std::string& message, int type) { bool scored(false); size_t position(0); - for (TRegexIntSizeStrPrMapPrVecItr measureVecIter = m_Measures.begin(); - measureVecIter != m_Measures.end(); - ++measureVecIter) - { - const core::CRegex ®ex = measureVecIter->first; - if (regex.search(message, position) == true) - { - TIntSizeStrPrMap &counts = measureVecIter->second; + for (TRegexIntSizeStrPrMapPrVecItr measureVecIter = m_Measures.begin(); measureVecIter != m_Measures.end(); ++measureVecIter) { + const core::CRegex& regex = measureVecIter->first; + if (regex.search(message, position) == true) { + TIntSizeStrPrMap& counts = measureVecIter->second; TIntSizeStrPrMapItr mapIter = counts.find(type); - if (mapIter == counts.end()) - { - counts.insert(TIntSizeStrPrMap::value_type(type, - TSizeStrPr(1, message))); - } - else - { + if (mapIter == counts.end()) { + counts.insert(TIntSizeStrPrMap::value_type(type, TSizeStrPr(1, message))); + } else { ++(mapIter->second.first); } ++m_ScoredMessages; @@ -92,14 +69,12 @@ void CBenchMarker::addResult(const std::string &message, ++m_TotalMessages; - if (!scored) - { + if (!scored) { LOG_TRACE("Message not included in scoring: " << message); } } -void CBenchMarker::dumpResults() const -{ +void CBenchMarker::dumpResults() const { // Sort the results in descending order of actual type occurrence using TSizeRegexIntSizeStrPrMapPrVecCItrPr = std::pair; using TSizeRegexIntSizeStrPrMapPrVecCItrPrVec = std::vector; @@ -108,17 +83,11 @@ void CBenchMarker::dumpResults() const TSizeRegexIntSizeStrPrMapPrVecCItrPrVec sortVec; sortVec.reserve(m_Measures.size()); - for (TRegexIntSizeStrPrMapPrVecCItr measureVecIter = m_Measures.begin(); - measureVecIter != m_Measures.end(); - ++measureVecIter) - { - const TIntSizeStrPrMap &counts = measureVecIter->second; + for (TRegexIntSizeStrPrMapPrVecCItr measureVecIter = m_Measures.begin(); measureVecIter != m_Measures.end(); ++measureVecIter) { + const TIntSizeStrPrMap& counts = measureVecIter->second; size_t total(0); - for (TIntSizeStrPrMapCItr mapIter = counts.begin(); - mapIter != counts.end(); - ++mapIter) - { + for (TIntSizeStrPrMapCItr mapIter = counts.begin(); mapIter != counts.end(); ++mapIter) { total += mapIter->second.first; } @@ -141,43 +110,31 @@ void CBenchMarker::dumpResults() const // Iterate backwards through the sorted vector, so that the most common // actual types are looked at first - for (TSizeRegexIntSizeStrPrMapPrVecCItrPrVecCItr sortedVecIter = sortVec.begin(); - sortedVecIter != sortVec.end(); - ++sortedVecIter) - { + for (TSizeRegexIntSizeStrPrMapPrVecCItrPrVecCItr sortedVecIter = sortVec.begin(); sortedVecIter != sortVec.end(); ++sortedVecIter) { size_t total(sortedVecIter->first); - if (total > 0) - { + if (total > 0) { ++observedActuals; } TRegexIntSizeStrPrMapPrVecCItr measureVecIter = sortedVecIter->second; - const core::CRegex ®ex = measureVecIter->first; - strm << "Manual category defined by regex " << regex.str() << core_t::LINE_ENDING - << "\tNumber of messages in manual category " << total << core_t::LINE_ENDING; + const core::CRegex& regex = measureVecIter->first; + strm << "Manual category defined by regex " << regex.str() << core_t::LINE_ENDING << "\tNumber of messages in manual category " + << total << core_t::LINE_ENDING; - const TIntSizeStrPrMap &counts = measureVecIter->second; - strm << "\tNumber of Ml categories that include this manual category " - << counts.size() << core_t::LINE_ENDING; + const TIntSizeStrPrMap& counts = measureVecIter->second; + strm << "\tNumber of Ml categories that include this manual category " << counts.size() << core_t::LINE_ENDING; - if (counts.size() == 1) - { + if (counts.size() == 1) { size_t count(counts.begin()->second.first); int type(counts.begin()->first); - if (usedTypes.find(type) != usedTypes.end()) - { - strm << "\t\t" << count << "\t(CATEGORY ALREADY USED)\t" - << counts.begin()->second.second << core_t::LINE_ENDING; - } - else - { + if (usedTypes.find(type) != usedTypes.end()) { + strm << "\t\t" << count << "\t(CATEGORY ALREADY USED)\t" << counts.begin()->second.second << core_t::LINE_ENDING; + } else { good += count; usedTypes.insert(type); } - } - else if (counts.size() > 1) - { + } else if (counts.size() > 1) { strm << "\tBreakdown:" << core_t::LINE_ENDING; // Assume the category with the count closest to the actual count is @@ -185,51 +142,36 @@ void CBenchMarker::dumpResults() const // are bad. size_t max(0); int maxType(-1); - for (TIntSizeStrPrMapCItr mapIter = counts.begin(); - mapIter != counts.end(); - ++mapIter) - { + for (TIntSizeStrPrMapCItr mapIter = counts.begin(); mapIter != counts.end(); ++mapIter) { int type(mapIter->first); size_t count(mapIter->second.first); - const std::string &example = mapIter->second.second; + const std::string& example = mapIter->second.second; strm << "\t\t" << count; - if (usedTypes.find(type) != usedTypes.end()) - { + if (usedTypes.find(type) != usedTypes.end()) { strm << "\t(CATEGORY ALREADY USED)"; - } - else - { - if (count > max) - { + } else { + if (count > max) { max = count; maxType = type; } } strm << '\t' << example << core_t::LINE_ENDING; } - if (maxType > -1) - { + if (maxType > -1) { good += max; usedTypes.insert(maxType); } } } - strm << "Total number of messages passed to benchmarker " - << m_TotalMessages << core_t::LINE_ENDING + strm << "Total number of messages passed to benchmarker " << m_TotalMessages << core_t::LINE_ENDING << "Total number of scored messages " << m_ScoredMessages << core_t::LINE_ENDING - << "Number of scored messages correctly categorised by Ml " - << good << core_t::LINE_ENDING - << "Overall accuracy for scored messages " - << (double(good) / double(m_ScoredMessages)) * 100.0 << '%' << core_t::LINE_ENDING - << "Percentage of manual categories detected at all " - << (double(usedTypes.size()) / double(observedActuals)) * 100.0 << '%'; + << "Number of scored messages correctly categorised by Ml " << good << core_t::LINE_ENDING + << "Overall accuracy for scored messages " << (double(good) / double(m_ScoredMessages)) * 100.0 << '%' << core_t::LINE_ENDING + << "Percentage of manual categories detected at all " << (double(usedTypes.size()) / double(observedActuals)) * 100.0 << '%'; LOG_DEBUG(strm.str()); } - - } } - diff --git a/lib/api/CCategoryExamplesCollector.cc b/lib/api/CCategoryExamplesCollector.cc index 8b4928cde2..23764e93ad 100644 --- a/lib/api/CCategoryExamplesCollector.cc +++ b/lib/api/CCategoryExamplesCollector.cc @@ -15,14 +15,10 @@ #include #include +namespace ml { +namespace api { -namespace ml -{ -namespace api -{ - -namespace -{ +namespace { const std::string EXAMPLES_BY_CATEGORY_TAG("a"); const std::string CATEGORY_TAG("b"); @@ -34,159 +30,119 @@ const std::string ELLIPSIS(3, '.'); } // unnamed - const size_t CCategoryExamplesCollector::MAX_EXAMPLE_LENGTH(1000); - -CCategoryExamplesCollector::CCategoryExamplesCollector(std::size_t maxExamples) - : m_MaxExamples(maxExamples) -{ +CCategoryExamplesCollector::CCategoryExamplesCollector(std::size_t maxExamples) : m_MaxExamples(maxExamples) { } -CCategoryExamplesCollector::CCategoryExamplesCollector(std::size_t maxExamples, core::CStateRestoreTraverser &traverser) - : m_MaxExamples(maxExamples) -{ - traverser.traverseSubLevel( - boost::bind(&CCategoryExamplesCollector::acceptRestoreTraverser, this, _1)); +CCategoryExamplesCollector::CCategoryExamplesCollector(std::size_t maxExamples, core::CStateRestoreTraverser& traverser) + : m_MaxExamples(maxExamples) { + traverser.traverseSubLevel(boost::bind(&CCategoryExamplesCollector::acceptRestoreTraverser, this, _1)); } -bool CCategoryExamplesCollector::add(std::size_t category, const std::string &example) -{ - if (m_MaxExamples == 0) - { +bool CCategoryExamplesCollector::add(std::size_t category, const std::string& example) { + if (m_MaxExamples == 0) { return false; } - TStrSet &examplesForCategory = m_ExamplesByCategory[category]; - if (examplesForCategory.size() >= m_MaxExamples) - { + TStrSet& examplesForCategory = m_ExamplesByCategory[category]; + if (examplesForCategory.size() >= m_MaxExamples) { return false; } return examplesForCategory.insert(truncateExample(example)).second; } -std::size_t CCategoryExamplesCollector::numberOfExamplesForCategory(std::size_t category) const -{ +std::size_t CCategoryExamplesCollector::numberOfExamplesForCategory(std::size_t category) const { auto iterator = m_ExamplesByCategory.find(category); return (iterator == m_ExamplesByCategory.end()) ? 0 : iterator->second.size(); } -const CCategoryExamplesCollector::TStrSet &CCategoryExamplesCollector::examples( - std::size_t category) const -{ +const CCategoryExamplesCollector::TStrSet& CCategoryExamplesCollector::examples(std::size_t category) const { auto iterator = m_ExamplesByCategory.find(category); - if (iterator == m_ExamplesByCategory.end()) - { + if (iterator == m_ExamplesByCategory.end()) { return EMPTY_EXAMPLES; } return iterator->second; } -void CCategoryExamplesCollector::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CCategoryExamplesCollector::acceptPersistInserter(core::CStatePersistInserter& inserter) const { // Persist the examples sorted by category ID to make it easier to compare // persisted state - using TSizeStrSetCPtrPr = std::pair; + using TSizeStrSetCPtrPr = std::pair; using TSizeStrSetCPtrPrVec = std::vector; TSizeStrSetCPtrPrVec orderedData; orderedData.reserve(m_ExamplesByCategory.size()); - for (const auto &exampleByCategory : m_ExamplesByCategory) - { - orderedData.emplace_back(exampleByCategory.first, - &exampleByCategory.second); + for (const auto& exampleByCategory : m_ExamplesByCategory) { + orderedData.emplace_back(exampleByCategory.first, &exampleByCategory.second); } std::sort(orderedData.begin(), orderedData.end()); - for (const auto &exampleByCategory : orderedData) - { - inserter.insertLevel(EXAMPLES_BY_CATEGORY_TAG, - boost::bind(&CCategoryExamplesCollector::persistExamples, - this, - exampleByCategory.first, - boost::cref(*exampleByCategory.second), - _1)); + for (const auto& exampleByCategory : orderedData) { + inserter.insertLevel( + EXAMPLES_BY_CATEGORY_TAG, + boost::bind( + &CCategoryExamplesCollector::persistExamples, this, exampleByCategory.first, boost::cref(*exampleByCategory.second), _1)); } } void CCategoryExamplesCollector::persistExamples(std::size_t category, - const TStrSet &examples, - core::CStatePersistInserter &inserter) const -{ + const TStrSet& examples, + core::CStatePersistInserter& inserter) const { inserter.insertValue(CATEGORY_TAG, category); - for (TStrSetCItr itr = examples.begin(); itr != examples.end(); ++itr) - { + for (TStrSetCItr itr = examples.begin(); itr != examples.end(); ++itr) { inserter.insertValue(EXAMPLE_TAG, *itr); } } -bool CCategoryExamplesCollector::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ +bool CCategoryExamplesCollector::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { m_ExamplesByCategory.clear(); - do - { - const std::string &name = traverser.name(); - if (name == EXAMPLES_BY_CATEGORY_TAG) - { - if (traverser.traverseSubLevel( - boost::bind(&CCategoryExamplesCollector::restoreExamples, this, _1)) == false) - { + do { + const std::string& name = traverser.name(); + if (name == EXAMPLES_BY_CATEGORY_TAG) { + if (traverser.traverseSubLevel(boost::bind(&CCategoryExamplesCollector::restoreExamples, this, _1)) == false) { LOG_ERROR("Error restoring examples by category"); return false; } } - } - while (traverser.next()); + } while (traverser.next()); return true; } -bool CCategoryExamplesCollector::restoreExamples(core::CStateRestoreTraverser &traverser) -{ +bool CCategoryExamplesCollector::restoreExamples(core::CStateRestoreTraverser& traverser) { std::size_t category = 0; TStrSet examples; - do - { - const std::string &name = traverser.name(); - if (name == CATEGORY_TAG) - { - if (core::CStringUtils::stringToType(traverser.value(), category) == false) - { + do { + const std::string& name = traverser.name(); + if (name == CATEGORY_TAG) { + if (core::CStringUtils::stringToType(traverser.value(), category) == false) { LOG_ERROR("Error restoring category: " << traverser.value()); return false; } - } - else if (name == EXAMPLE_TAG) - { + } else if (name == EXAMPLE_TAG) { examples.insert(traverser.value()); } - } - while (traverser.next()); + } while (traverser.next()); - LOG_TRACE("Restoring examples for category " << category << ": " << - core::CContainerPrinter::print(examples)); + LOG_TRACE("Restoring examples for category " << category << ": " << core::CContainerPrinter::print(examples)); m_ExamplesByCategory[category].swap(examples); return true; } -void CCategoryExamplesCollector::clear() -{ +void CCategoryExamplesCollector::clear() { m_ExamplesByCategory.clear(); } -std::string CCategoryExamplesCollector::truncateExample(std::string example) -{ - if (example.length() > MAX_EXAMPLE_LENGTH) - { +std::string CCategoryExamplesCollector::truncateExample(std::string example) { + if (example.length() > MAX_EXAMPLE_LENGTH) { size_t replacePos(MAX_EXAMPLE_LENGTH - ELLIPSIS.length()); // Ensure truncation doesn't result in a partial UTF-8 character - while (replacePos > 0 && - core::CStringUtils::utf8ByteType(example[replacePos]) == -1) - { + while (replacePos > 0 && core::CStringUtils::utf8ByteType(example[replacePos]) == -1) { --replacePos; } example.replace(replacePos, example.length() - replacePos, ELLIPSIS); @@ -196,7 +152,5 @@ std::string CCategoryExamplesCollector::truncateExample(std::string example) // semantics on return return example; } - - } } diff --git a/lib/api/CCmdSkeleton.cc b/lib/api/CCmdSkeleton.cc index a7e01cd437..5dd80ee684 100644 --- a/lib/api/CCmdSkeleton.cc +++ b/lib/api/CCmdSkeleton.cc @@ -14,45 +14,28 @@ #include - -namespace ml -{ -namespace api -{ - - -CCmdSkeleton::CCmdSkeleton(core::CDataSearcher *restoreSearcher, - core::CDataAdder *persister, - CInputParser &inputParser, - CDataProcessor &processor) - : m_RestoreSearcher(restoreSearcher), - m_Persister(persister), - m_InputParser(inputParser), - m_Processor(processor) -{ +namespace ml { +namespace api { + +CCmdSkeleton::CCmdSkeleton(core::CDataSearcher* restoreSearcher, + core::CDataAdder* persister, + CInputParser& inputParser, + CDataProcessor& processor) + : m_RestoreSearcher(restoreSearcher), m_Persister(persister), m_InputParser(inputParser), m_Processor(processor) { } -bool CCmdSkeleton::ioLoop() -{ - if (m_RestoreSearcher == 0) - { +bool CCmdSkeleton::ioLoop() { + if (m_RestoreSearcher == 0) { LOG_DEBUG("No restoration source specified - will not attempt to restore state"); - } - else - { + } else { core_t::TTime completeToTime(0); - if (m_Processor.restoreState(*m_RestoreSearcher, - completeToTime) == false) - { + if (m_Processor.restoreState(*m_RestoreSearcher, completeToTime) == false) { LOG_FATAL("Failed to restore state"); return false; } } - if (m_InputParser.readStream(boost::bind(&CDataProcessor::handleRecord, - &m_Processor, - _1)) == false) - { + if (m_InputParser.readStream(boost::bind(&CDataProcessor::handleRecord, &m_Processor, _1)) == false) { LOG_FATAL("Failed to handle all input data"); return false; } @@ -65,30 +48,24 @@ bool CCmdSkeleton::ioLoop() return this->persistState(); } -bool CCmdSkeleton::persistState() -{ - if (m_Persister == 0) - { +bool CCmdSkeleton::persistState() { + if (m_Persister == 0) { LOG_DEBUG("No persistence sink specified - will not attempt to persist state"); return true; } - if (m_Processor.numRecordsHandled() == 0) - { + if (m_Processor.numRecordsHandled() == 0) { LOG_DEBUG("Zero records were handled - will not attempt to persist state"); return true; } // Attempt to persist state - if (m_Processor.persistState(*m_Persister) == false) - { + if (m_Processor.persistState(*m_Persister) == false) { LOG_FATAL("Failed to persist state"); return false; } return true; } - - } } diff --git a/lib/api/CConfigUpdater.cc b/lib/api/CConfigUpdater.cc index 6bec1b8ca5..b8d73396b1 100644 --- a/lib/api/CConfigUpdater.cc +++ b/lib/api/CConfigUpdater.cc @@ -10,10 +10,8 @@ #include #include -namespace ml -{ -namespace api -{ +namespace ml { +namespace api { const std::string CConfigUpdater::MODEL_DEBUG_CONFIG("modelPlotConfig"); const std::string CConfigUpdater::DETECTOR_RULES("detectorRules"); @@ -22,76 +20,53 @@ const std::string CConfigUpdater::RULES_JSON("rulesJson"); const std::string CConfigUpdater::FILTERS("filters"); const std::string CConfigUpdater::SCHEDULED_EVENTS("scheduledEvents"); -CConfigUpdater::CConfigUpdater(CFieldConfig &fieldConfig, model::CAnomalyDetectorModelConfig &modelConfig) - : m_FieldConfig(fieldConfig), - m_ModelConfig(modelConfig) -{ +CConfigUpdater::CConfigUpdater(CFieldConfig& fieldConfig, model::CAnomalyDetectorModelConfig& modelConfig) + : m_FieldConfig(fieldConfig), m_ModelConfig(modelConfig) { } -bool CConfigUpdater::update(const std::string &config) -{ +bool CConfigUpdater::update(const std::string& config) { boost::property_tree::ptree propTree; - try - { + try { std::istringstream strm(config); boost::property_tree::ini_parser::read_ini(strm, propTree); - } - catch (boost::property_tree::ptree_error &e) - { + } catch (boost::property_tree::ptree_error& e) { LOG_ERROR("Error parsing config from '" << config << "' : " << e.what()); return false; } - for (boost::property_tree::ptree::const_iterator stanzaItr = propTree.begin(); - stanzaItr != propTree.end(); - ++stanzaItr) - { - const std::string &stanzaName = stanzaItr->first; - const boost::property_tree::ptree &subTree = stanzaItr->second; + for (boost::property_tree::ptree::const_iterator stanzaItr = propTree.begin(); stanzaItr != propTree.end(); ++stanzaItr) { + const std::string& stanzaName = stanzaItr->first; + const boost::property_tree::ptree& subTree = stanzaItr->second; - if (stanzaName == MODEL_DEBUG_CONFIG) - { - if (m_ModelConfig.configureModelPlot(subTree) == false) - { + if (stanzaName == MODEL_DEBUG_CONFIG) { + if (m_ModelConfig.configureModelPlot(subTree) == false) { LOG_ERROR("Could not parse modelPlotConfig"); return false; } - } - else if (stanzaName == DETECTOR_RULES) - { + } else if (stanzaName == DETECTOR_RULES) { std::string detectorIndexString = subTree.get(DETECTOR_INDEX, std::string()); int detectorIndex; - if (core::CStringUtils::stringToType(detectorIndexString, detectorIndex) == false) - { + if (core::CStringUtils::stringToType(detectorIndexString, detectorIndex) == false) { LOG_ERROR("Invalid detector index: " << detectorIndexString); return false; } std::string rulesJson = subTree.get(RULES_JSON, std::string()); - if (m_FieldConfig.parseRules(detectorIndex, rulesJson) == false) - { + if (m_FieldConfig.parseRules(detectorIndex, rulesJson) == false) { LOG_ERROR("Failed to update detector rules for detector: " << detectorIndex); return false; } - } - else if (stanzaName == FILTERS) - { - if (m_FieldConfig.updateFilters(subTree) == false) - { + } else if (stanzaName == FILTERS) { + if (m_FieldConfig.updateFilters(subTree) == false) { LOG_ERROR("Failed to update filters"); return false; } - } - else if (stanzaName == SCHEDULED_EVENTS) - { - if (m_FieldConfig.updateScheduledEvents(subTree) == false) - { + } else if (stanzaName == SCHEDULED_EVENTS) { + if (m_FieldConfig.updateScheduledEvents(subTree) == false) { LOG_ERROR("Failed to update scheduled events"); return false; } - } - else - { + } else { LOG_WARN("Ignoring unknown update config stanza: " << stanzaName); return false; } @@ -99,6 +74,5 @@ bool CConfigUpdater::update(const std::string &config) return true; } - } } diff --git a/lib/api/CCsvInputParser.cc b/lib/api/CCsvInputParser.cc index 351e411a8b..e10304fcb4 100644 --- a/lib/api/CCsvInputParser.cc +++ b/lib/api/CCsvInputParser.cc @@ -13,23 +13,17 @@ #include - -namespace ml -{ -namespace api -{ - +namespace ml { +namespace api { // Initialise statics -const char CCsvInputParser::COMMA(','); -const char CCsvInputParser::QUOTE('"'); -const char CCsvInputParser::RECORD_END('\n'); -const char CCsvInputParser::STRIP_BEFORE_END('\r'); +const char CCsvInputParser::COMMA(','); +const char CCsvInputParser::QUOTE('"'); +const char CCsvInputParser::RECORD_END('\n'); +const char CCsvInputParser::STRIP_BEFORE_END('\r'); const size_t CCsvInputParser::WORK_BUFFER_SIZE(131072); // 128kB - -CCsvInputParser::CCsvInputParser(const std::string &input, - char separator) +CCsvInputParser::CCsvInputParser(const std::string& input, char separator) : CInputParser(), m_StringInputBuf(input), m_StrmIn(m_StringInputBuf), @@ -37,44 +31,36 @@ CCsvInputParser::CCsvInputParser(const std::string &input, m_WorkBufferPtr(0), m_WorkBufferEnd(0), m_NoMoreRecords(false), - m_LineParser(separator) -{ + m_LineParser(separator) { } -CCsvInputParser::CCsvInputParser(std::istream &strmIn, - char separator) +CCsvInputParser::CCsvInputParser(std::istream& strmIn, char separator) : CInputParser(), m_StrmIn(strmIn), m_WorkBuffer(0), m_WorkBufferPtr(0), m_WorkBufferEnd(0), m_NoMoreRecords(false), - m_LineParser(separator) -{ + m_LineParser(separator) { } -const std::string &CCsvInputParser::fieldNameStr() const -{ +const std::string& CCsvInputParser::fieldNameStr() const { return m_FieldNameStr; } -bool CCsvInputParser::readStream(const TReaderFunc &readerFunc) -{ +bool CCsvInputParser::readStream(const TReaderFunc& readerFunc) { // Reset the record buffer pointers in case we're reading a new stream m_WorkBufferEnd = m_WorkBufferPtr; m_NoMoreRecords = false; - TStrVec &fieldNames = this->fieldNames(); + TStrVec& fieldNames = this->fieldNames(); - if (!this->gotFieldNames()) - { - if (this->parseCsvRecordFromStream() == false) - { + if (!this->gotFieldNames()) { + if (this->parseCsvRecordFromStream() == false) { LOG_ERROR("Failed to parse CSV record from stream"); return false; } - if (this->parseFieldNames() == false) - { + if (this->parseFieldNames() == false) { LOG_ERROR("Failed to parse field names from stream"); return false; } @@ -86,34 +72,26 @@ bool CCsvInputParser::readStream(const TReaderFunc &readerFunc) // name - this avoids the need to repeatedly compute the same hashes TStrRefVec fieldValRefs; fieldValRefs.reserve(fieldNames.size()); - for (TStrVecCItr iter = fieldNames.begin(); - iter != fieldNames.end(); - ++iter) - { + for (TStrVecCItr iter = fieldNames.begin(); iter != fieldNames.end(); ++iter) { fieldValRefs.push_back(boost::ref(recordFields[*iter])); } - while (!m_NoMoreRecords) - { - if (this->parseCsvRecordFromStream() == false) - { + while (!m_NoMoreRecords) { + if (this->parseCsvRecordFromStream() == false) { LOG_ERROR("Failed to parse CSV record from stream"); return false; } - if (m_NoMoreRecords) - { + if (m_NoMoreRecords) { break; } - if (this->parseDataRecord(fieldValRefs) == false) - { + if (this->parseDataRecord(fieldValRefs) == false) { LOG_ERROR("Failed to parse data record from stream"); return false; } - if (readerFunc(recordFields) == false) - { + if (readerFunc(recordFields) == false) { LOG_ERROR("Record handler function forced exit"); return false; } @@ -122,8 +100,7 @@ bool CCsvInputParser::readStream(const TReaderFunc &readerFunc) return true; } -bool CCsvInputParser::parseCsvRecordFromStream() -{ +bool CCsvInputParser::parseCsvRecordFromStream() { // For maximum performance, read the stream in large chunks that can be // moved around by memcpy(). Using memcpy() is an order of magnitude faster // than the naive approach of checking and copying one character at a time. @@ -131,8 +108,7 @@ bool CCsvInputParser::parseCsvRecordFromStream() // for the delimiter and then memcpy() to transfer data to the target // std::string, but sadly this is not the case for the Microsoft and Apache // STLs. - if (m_WorkBuffer == nullptr) - { + if (m_WorkBuffer == nullptr) { m_WorkBuffer.reset(new char[WORK_BUFFER_SIZE]); m_WorkBufferPtr = m_WorkBuffer.get(); m_WorkBufferEnd = m_WorkBufferPtr; @@ -140,23 +116,18 @@ bool CCsvInputParser::parseCsvRecordFromStream() bool startOfRecord(true); size_t quoteCount(0); - for (;;) - { + for (;;) { size_t avail(m_WorkBufferEnd - m_WorkBufferPtr); - if (avail == 0) - { - if (m_StrmIn.eof()) - { + if (avail == 0) { + if (m_StrmIn.eof()) { // We have no buffered data and there's no more to read, so stop m_NoMoreRecords = true; break; } m_WorkBufferPtr = m_WorkBuffer.get(); - m_StrmIn.read(m_WorkBuffer.get(), - static_cast(WORK_BUFFER_SIZE)); - if (m_StrmIn.bad()) - { + m_StrmIn.read(m_WorkBuffer.get(), static_cast(WORK_BUFFER_SIZE)); + if (m_StrmIn.bad()) { LOG_ERROR("Input stream is bad"); m_CurrentRowStr.clear(); m_WorkBufferEnd = m_WorkBufferPtr; @@ -167,56 +138,41 @@ bool CCsvInputParser::parseCsvRecordFromStream() m_WorkBufferEnd = m_WorkBufferPtr + avail; } - const char *delimPtr(reinterpret_cast(::memchr(m_WorkBufferPtr, - RECORD_END, - avail))); - const char *endPtr(m_WorkBufferEnd); - if (delimPtr != nullptr) - { + const char* delimPtr(reinterpret_cast(::memchr(m_WorkBufferPtr, RECORD_END, avail))); + const char* endPtr(m_WorkBufferEnd); + if (delimPtr != nullptr) { endPtr = delimPtr; - if (endPtr > m_WorkBufferPtr && *(endPtr - 1) == STRIP_BEFORE_END) - { + if (endPtr > m_WorkBufferPtr && *(endPtr - 1) == STRIP_BEFORE_END) { --endPtr; } } - if (startOfRecord) - { + if (startOfRecord) { m_CurrentRowStr.assign(m_WorkBufferPtr, endPtr - m_WorkBufferPtr); startOfRecord = false; - } - else - { - if (endPtr == m_WorkBufferPtr) - { + } else { + if (endPtr == m_WorkBufferPtr) { size_t strLen(m_CurrentRowStr.length()); - if (strLen > 0 && m_CurrentRowStr[strLen - 1] == STRIP_BEFORE_END) - { + if (strLen > 0 && m_CurrentRowStr[strLen - 1] == STRIP_BEFORE_END) { m_CurrentRowStr.erase(strLen - 1); } - } - else - { + } else { m_CurrentRowStr.append(m_WorkBufferPtr, endPtr - m_WorkBufferPtr); } } quoteCount += std::count(m_WorkBufferPtr, endPtr, QUOTE); - if (delimPtr != nullptr) - { + if (delimPtr != nullptr) { m_WorkBufferPtr = delimPtr + 1; // In Excel style CSV, quote characters are escaped by doubling them // up. Therefore, if what we've read of a record up to now contains // an odd number of quote characters then we need to read more. - if ((quoteCount % 2) == 0) - { + if ((quoteCount % 2) == 0) { break; } m_CurrentRowStr += RECORD_END; - } - else - { + } else { m_WorkBufferPtr = m_WorkBufferEnd; } } @@ -226,20 +182,17 @@ bool CCsvInputParser::parseCsvRecordFromStream() return true; } -bool CCsvInputParser::parseFieldNames() -{ +bool CCsvInputParser::parseFieldNames() { LOG_TRACE("Parse field names"); m_FieldNameStr.clear(); - TStrVec &fieldNames = this->fieldNames(); + TStrVec& fieldNames = this->fieldNames(); fieldNames.clear(); m_LineParser.reset(m_CurrentRowStr); - while (!m_LineParser.atEnd()) - { + while (!m_LineParser.atEnd()) { std::string fieldName; - if (m_LineParser.parseNext(fieldName) == false) - { + if (m_LineParser.parseNext(fieldName) == false) { LOG_ERROR("Failed to get next CSV token"); return false; } @@ -247,16 +200,12 @@ bool CCsvInputParser::parseFieldNames() fieldNames.emplace_back(std::move(fieldName)); } - if (fieldNames.empty()) - { + if (fieldNames.empty()) { // Don't scare the user with error messages if we've just received an // empty input - if (m_NoMoreRecords) - { + if (m_NoMoreRecords) { LOG_DEBUG("Received input with settings only"); - } - else - { + } else { LOG_ERROR("No field names found in:" << core_t::LINE_ENDING << m_CurrentRowStr); } return false; @@ -270,29 +219,22 @@ bool CCsvInputParser::parseFieldNames() return true; } -bool CCsvInputParser::parseDataRecord(const TStrRefVec &fieldValRefs) -{ - for (TStrRefVecCItr iter = fieldValRefs.begin(); - iter != fieldValRefs.end(); - ++iter) - { - if (m_LineParser.parseNext(iter->get()) == false) - { +bool CCsvInputParser::parseDataRecord(const TStrRefVec& fieldValRefs) { + for (TStrRefVecCItr iter = fieldValRefs.begin(); iter != fieldValRefs.end(); ++iter) { + if (m_LineParser.parseNext(iter->get()) == false) { LOG_ERROR("Failed to get next CSV token"); return false; } } - if (!m_LineParser.atEnd()) - { + if (!m_LineParser.atEnd()) { std::string extraField; size_t numExtraFields(0); - while (m_LineParser.parseNext(extraField) == true) - { + while (m_LineParser.parseNext(extraField) == true) { ++numExtraFields; } - LOG_ERROR("Data record contains " << numExtraFields << " more fields than header:" << core_t::LINE_ENDING - << m_CurrentRowStr << core_t::LINE_ENDING << "and:" << core_t::LINE_ENDING << m_FieldNameStr); + LOG_ERROR("Data record contains " << numExtraFields << " more fields than header:" << core_t::LINE_ENDING << m_CurrentRowStr + << core_t::LINE_ENDING << "and:" << core_t::LINE_ENDING << m_FieldNameStr); return false; } @@ -308,12 +250,10 @@ CCsvInputParser::CCsvLineParser::CCsvLineParser(char separator) m_LineCurrent(nullptr), m_LineEnd(nullptr), m_WorkFieldEnd(nullptr), - m_WorkFieldCapacity(0) -{ + m_WorkFieldCapacity(0) { } -void CCsvInputParser::CCsvLineParser::reset(const std::string &line) -{ +void CCsvInputParser::CCsvLineParser::reset(const std::string& line) { m_SeparatorAfterLastField = false; m_Line = &line; @@ -324,45 +264,35 @@ void CCsvInputParser::CCsvLineParser::reset(const std::string &line) // it turns out to be a single field - this avoids the need to check if it's // big enough when it's populated (unlike std::vector or std::string) size_t minCapacity(line.length() + 1); - if (m_WorkFieldCapacity < minCapacity) - { + if (m_WorkFieldCapacity < minCapacity) { m_WorkFieldCapacity = minCapacity; m_WorkField.reset(new char[minCapacity]); } m_WorkFieldEnd = m_WorkField.get(); } -bool CCsvInputParser::CCsvLineParser::parseNext(std::string &value) -{ - if (m_Line == nullptr) - { +bool CCsvInputParser::CCsvLineParser::parseNext(std::string& value) { + if (m_Line == nullptr) { return false; } - if (this->parseNextToken(m_LineEnd, m_LineCurrent) == false) - { + if (this->parseNextToken(m_LineEnd, m_LineCurrent) == false) { return false; } value.assign(m_WorkField.get(), m_WorkFieldEnd - m_WorkField.get()); return true; } -bool CCsvInputParser::CCsvLineParser::atEnd() const -{ +bool CCsvInputParser::CCsvLineParser::atEnd() const { return m_LineCurrent == m_LineEnd; } -bool CCsvInputParser::CCsvLineParser::parseNextToken(const char *end, - const char *¤t) -{ +bool CCsvInputParser::CCsvLineParser::parseNextToken(const char* end, const char*& current) { m_WorkFieldEnd = m_WorkField.get(); - if (current == end) - { + if (current == end) { // Allow one empty token at the end of a line - if (!m_SeparatorAfterLastField) - { - LOG_ERROR("Trying to read too many fields from record:" << - core_t::LINE_ENDING << *m_Line); + if (!m_SeparatorAfterLastField) { + LOG_ERROR("Trying to read too many fields from record:" << core_t::LINE_ENDING << *m_Line); return false; } m_SeparatorAfterLastField = false; @@ -370,30 +300,24 @@ bool CCsvInputParser::CCsvLineParser::parseNextToken(const char *end, } bool insideQuotes(false); - do - { - if (insideQuotes) - { - if (*current == QUOTE) - { + do { + if (insideQuotes) { + if (*current == QUOTE) { // We need to look at the character after the quote ++current; - if (current == end) - { + if (current == end) { m_SeparatorAfterLastField = false; return true; } // The quoting state needs to be reversed UNLESS there are two // adjacent quotes - if (*current != QUOTE) - { + if (*current != QUOTE) { insideQuotes = false; // Cater for the case where the character after the quote is // the separator - if (*current == m_Separator) - { + if (*current == m_Separator) { ++current; m_SeparatorAfterLastField = true; return true; @@ -402,45 +326,33 @@ bool CCsvInputParser::CCsvLineParser::parseNextToken(const char *end, } *(m_WorkFieldEnd++) = *current; - } - else - { - if (*current == m_Separator) - { + } else { + if (*current == m_Separator) { ++current; m_SeparatorAfterLastField = true; return true; } - if (*current == QUOTE) - { + if (*current == QUOTE) { // We're not currently inside quotes so a quote puts us inside // quotes regardless of the next character, and we never want to // include this quote in the field value insideQuotes = true; - } - else - { + } else { *(m_WorkFieldEnd++) = *current; } } - } - while (++current != end); + } while (++current != end); m_SeparatorAfterLastField = false; // Inconsistency if the last character of the string is an unmatched quote - if (insideQuotes) - { - LOG_ERROR("Unmatched final quote in record:" << core_t::LINE_ENDING << - *m_Line); + if (insideQuotes) { + LOG_ERROR("Unmatched final quote in record:" << core_t::LINE_ENDING << *m_Line); return false; } return true; } - - } } - diff --git a/lib/api/CCsvOutputWriter.cc b/lib/api/CCsvOutputWriter.cc index e13f40f0e7..23d3a999fc 100644 --- a/lib/api/CCsvOutputWriter.cc +++ b/lib/api/CCsvOutputWriter.cc @@ -11,61 +11,41 @@ #include #include - -namespace ml -{ -namespace api -{ +namespace ml { +namespace api { // Initialise statics const char CCsvOutputWriter::COMMA(','); const char CCsvOutputWriter::QUOTE('"'); const char CCsvOutputWriter::RECORD_END('\n'); - -CCsvOutputWriter::CCsvOutputWriter(bool outputMessages, - bool outputHeader, - char escape, - char separator) +CCsvOutputWriter::CCsvOutputWriter(bool outputMessages, bool outputHeader, char escape, char separator) : m_StrmOut(m_StringOutputBuf), m_OutputMessages(outputMessages), m_OutputHeader(outputHeader), m_Escape(escape), - m_Separator(separator) -{ - if (m_Separator == QUOTE || - m_Separator == m_Escape || - m_Separator == RECORD_END) - { + m_Separator(separator) { + if (m_Separator == QUOTE || m_Separator == m_Escape || m_Separator == RECORD_END) { LOG_ERROR("CSV output writer will not generate parsable output because " - "separator character (" << m_Separator << ") is the same as " - "the quote, escape and/or record end characters"); + "separator character (" + << m_Separator + << ") is the same as " + "the quote, escape and/or record end characters"); } } -CCsvOutputWriter::CCsvOutputWriter(std::ostream &strmOut, - bool outputMessages, - bool outputHeader, - char escape, - char separator) - : m_StrmOut(strmOut), - m_OutputMessages(outputMessages), - m_OutputHeader(outputHeader), - m_Escape(escape), - m_Separator(separator) -{ - if (m_Separator == QUOTE || - m_Separator == m_Escape || - m_Separator == RECORD_END) - { +CCsvOutputWriter::CCsvOutputWriter(std::ostream& strmOut, bool outputMessages, bool outputHeader, char escape, char separator) + : m_StrmOut(strmOut), m_OutputMessages(outputMessages), m_OutputHeader(outputHeader), m_Escape(escape), m_Separator(separator) { + if (m_Separator == QUOTE || m_Separator == m_Escape || m_Separator == RECORD_END) { LOG_ERROR("CSV output writer will not generate parsable output because " - "separator character (" << m_Separator << ") is the same as " - "the quote, escape and/or record end characters"); + "separator character (" + << m_Separator + << ") is the same as " + "the quote, escape and/or record end characters"); } } -CCsvOutputWriter::~CCsvOutputWriter() -{ +CCsvOutputWriter::~CCsvOutputWriter() { // Since we didn't flush the stream whilst working, we flush it on // destruction m_StrmOut.flush(); @@ -76,28 +56,19 @@ CCsvOutputWriter::~CCsvOutputWriter() core::CSleep::sleep(20); } -bool CCsvOutputWriter::fieldNames(const TStrVec &fieldNames, - const TStrVec &extraFieldNames) -{ +bool CCsvOutputWriter::fieldNames(const TStrVec& fieldNames, const TStrVec& extraFieldNames) { m_FieldNames = fieldNames; // Only add extra field names if they're not already present - for (TStrVecCItr iter = extraFieldNames.begin(); - iter != extraFieldNames.end(); - ++iter) - { - if (std::find(m_FieldNames.begin(), - m_FieldNames.end(), - *iter) == m_FieldNames.end()) - { + for (TStrVecCItr iter = extraFieldNames.begin(); iter != extraFieldNames.end(); ++iter) { + if (std::find(m_FieldNames.begin(), m_FieldNames.end(), *iter) == m_FieldNames.end()) { m_FieldNames.push_back(*iter); } } m_Hashes.clear(); - if (m_FieldNames.empty()) - { + if (m_FieldNames.empty()) { LOG_ERROR("Attempt to set empty field names"); return false; } @@ -114,8 +85,7 @@ bool CCsvOutputWriter::fieldNames(const TStrVec &fieldNames, this->appendField(*iter); m_Hashes.push_back(EMPTY_FIELD_OVERRIDES.hash_function()(*iter)); - for (++iter; iter != m_FieldNames.end(); ++iter) - { + for (++iter; iter != m_FieldNames.end(); ++iter) { m_WorkRecord += m_Separator; this->appendField(*iter); m_Hashes.push_back(EMPTY_FIELD_OVERRIDES.hash_function()(*iter)); @@ -124,12 +94,8 @@ bool CCsvOutputWriter::fieldNames(const TStrVec &fieldNames, m_WorkRecord += RECORD_END; // Messages are output in arrears - this is not ideal - TODO - if (m_OutputMessages) - { - for (TStrStrPrSetCItr msgIter = m_Messages.begin(); - msgIter != m_Messages.end(); - ++msgIter) - { + if (m_OutputMessages) { + for (TStrStrPrSetCItr msgIter = m_Messages.begin(); msgIter != m_Messages.end(); ++msgIter) { m_StrmOut << msgIter->first << '=' << msgIter->second << RECORD_END; LOG_DEBUG("Forwarded " << msgIter->first << '=' << msgIter->second); } @@ -140,24 +106,19 @@ bool CCsvOutputWriter::fieldNames(const TStrVec &fieldNames, m_StrmOut << RECORD_END; } - if (m_OutputHeader) - { + if (m_OutputHeader) { m_StrmOut << m_WorkRecord; } return true; } -const COutputHandler::TStrVec &CCsvOutputWriter::fieldNames() const -{ +const COutputHandler::TStrVec& CCsvOutputWriter::fieldNames() const { return m_FieldNames; } -bool CCsvOutputWriter::writeRow(const TStrStrUMap &dataRowFields, - const TStrStrUMap &overrideDataRowFields) -{ - if (m_FieldNames.empty()) - { +bool CCsvOutputWriter::writeRow(const TStrStrUMap& dataRowFields, const TStrStrUMap& overrideDataRowFields) { + if (m_FieldNames.empty()) { LOG_ERROR("Attempt to write data before field names"); return false; } @@ -171,42 +132,29 @@ bool CCsvOutputWriter::writeRow(const TStrStrUMap &dataRowFields, TStrVecCItr fieldNameIter = m_FieldNames.begin(); TPreComputedHashVecCItr preComputedHashIter = m_Hashes.begin(); - TStrStrUMapCItr fieldValueIter = overrideDataRowFields.find(*fieldNameIter, - *preComputedHashIter, - pred); - if (fieldValueIter == overrideDataRowFields.end()) - { - fieldValueIter = dataRowFields.find(*fieldNameIter, - *preComputedHashIter, - pred); - if (fieldValueIter == dataRowFields.end()) - { + TStrStrUMapCItr fieldValueIter = overrideDataRowFields.find(*fieldNameIter, *preComputedHashIter, pred); + if (fieldValueIter == overrideDataRowFields.end()) { + fieldValueIter = dataRowFields.find(*fieldNameIter, *preComputedHashIter, pred); + if (fieldValueIter == dataRowFields.end()) { LOG_ERROR("Data fields to be written do not include a value for " - "field " << *fieldNameIter); + "field " + << *fieldNameIter); return false; } } this->appendField(fieldValueIter->second); - for (++fieldNameIter, ++preComputedHashIter; - fieldNameIter != m_FieldNames.end() && - preComputedHashIter != m_Hashes.end(); - ++fieldNameIter, ++preComputedHashIter) - { + for (++fieldNameIter, ++preComputedHashIter; fieldNameIter != m_FieldNames.end() && preComputedHashIter != m_Hashes.end(); + ++fieldNameIter, ++preComputedHashIter) { m_WorkRecord += m_Separator; - fieldValueIter = overrideDataRowFields.find(*fieldNameIter, - *preComputedHashIter, - pred); - if (fieldValueIter == overrideDataRowFields.end()) - { - fieldValueIter = dataRowFields.find(*fieldNameIter, - *preComputedHashIter, - pred); - if (fieldValueIter == dataRowFields.end()) - { + fieldValueIter = overrideDataRowFields.find(*fieldNameIter, *preComputedHashIter, pred); + if (fieldValueIter == overrideDataRowFields.end()) { + fieldValueIter = dataRowFields.find(*fieldNameIter, *preComputedHashIter, pred); + if (fieldValueIter == dataRowFields.end()) { LOG_ERROR("Data fields to be written do not include a value for " - "field " << *fieldNameIter); + "field " + << *fieldNameIter); return false; } } @@ -220,17 +168,15 @@ bool CCsvOutputWriter::writeRow(const TStrStrUMap &dataRowFields, return true; } -std::string CCsvOutputWriter::internalString() const -{ - const_cast(m_StrmOut).flush(); +std::string CCsvOutputWriter::internalString() const { + const_cast(m_StrmOut).flush(); // This is only of any value if the first constructor was used - it's up to // the caller to know this return m_StringOutputBuf.str(); } -void CCsvOutputWriter::appendField(const std::string &field) -{ +void CCsvOutputWriter::appendField(const std::string& field) { // Note: although std::string::find_first_of() would be less verbose, it's // also considerably less efficient (at least on Linux) than this hardcoded // loop. The reason is that it flips the find around, calling memchr() once @@ -238,51 +184,33 @@ void CCsvOutputWriter::appendField(const std::string &field) // called. This is not sensible when we're only checking for a small number // of possible characters. bool needOuterQuotes(false); - for (std::string::const_iterator iter = field.begin(); - iter != field.end(); - ++iter) - { + for (std::string::const_iterator iter = field.begin(); iter != field.end(); ++iter) { char curChar(*iter); - if (curChar == m_Separator || - curChar == QUOTE || - curChar == RECORD_END || - curChar == m_Escape) - { + if (curChar == m_Separator || curChar == QUOTE || curChar == RECORD_END || curChar == m_Escape) { needOuterQuotes = true; break; } } - if (needOuterQuotes) - { + if (needOuterQuotes) { m_WorkRecord += QUOTE; - for (std::string::const_iterator iter = field.begin(); - iter != field.end(); - ++iter) - { + for (std::string::const_iterator iter = field.begin(); iter != field.end(); ++iter) { char curChar(*iter); - if (curChar == QUOTE || curChar == m_Escape) - { + if (curChar == QUOTE || curChar == m_Escape) { m_WorkRecord += m_Escape; } m_WorkRecord += curChar; } m_WorkRecord += QUOTE; - } - else - { + } else { m_WorkRecord += field; } } -std::ostream &CCsvOutputWriter::outputStream() -{ +std::ostream& CCsvOutputWriter::outputStream() { return m_StrmOut; } - - } } - diff --git a/lib/api/CDataProcessor.cc b/lib/api/CDataProcessor.cc index 820714d1cc..eca12db80a 100644 --- a/lib/api/CDataProcessor.cc +++ b/lib/api/CDataProcessor.cc @@ -7,29 +7,23 @@ #include -namespace ml -{ -namespace api -{ +namespace ml { +namespace api { // statics const std::string CDataProcessor::CONTROL_FIELD_NAME(1, CONTROL_FIELD_NAME_CHAR); -CDataProcessor::CDataProcessor() -{ +CDataProcessor::CDataProcessor() { } -CDataProcessor::~CDataProcessor() -{ +CDataProcessor::~CDataProcessor() { // Most compilers put the vtable in the object file containing the // definition of the first non-inlined virtual function, so DON'T move this // empty definition to the header file! } -std::string CDataProcessor::debugPrintRecord(const TStrStrUMap &dataRowFields) -{ - if (dataRowFields.empty()) - { +std::string CDataProcessor::debugPrintRecord(const TStrStrUMap& dataRowFields) { + if (dataRowFields.empty()) { return ""; } @@ -40,12 +34,8 @@ std::string CDataProcessor::debugPrintRecord(const TStrStrUMap &dataRowFields) // We want to print the field names on one line, followed by the field // values on the next line - for (TStrStrUMapCItr rowIter = dataRowFields.begin(); - rowIter != dataRowFields.end(); - ++rowIter) - { - if (rowIter != dataRowFields.begin()) - { + for (TStrStrUMapCItr rowIter = dataRowFields.begin(); rowIter != dataRowFields.end(); ++rowIter) { + if (rowIter != dataRowFields.begin()) { fieldNames.push_back(','); fieldValues.push_back(','); } @@ -58,13 +48,9 @@ std::string CDataProcessor::debugPrintRecord(const TStrStrUMap &dataRowFields) return result.str(); } -bool CDataProcessor::periodicPersistState(CBackgroundPersister &/*persister*/) -{ +bool CDataProcessor::periodicPersistState(CBackgroundPersister& /*persister*/) { // No-op return true; } - - } } - diff --git a/lib/api/CDataTyper.cc b/lib/api/CDataTyper.cc index de80659d1f..13364db450 100644 --- a/lib/api/CDataTyper.cc +++ b/lib/api/CDataTyper.cc @@ -5,50 +5,32 @@ */ #include - -namespace ml -{ -namespace api -{ - +namespace ml { +namespace api { // Initialise statics const CDataTyper::TStrStrUMap CDataTyper::EMPTY_FIELDS; - -CDataTyper::CDataTyper(const std::string &fieldName) - : m_FieldName(fieldName), - m_LastPersistTime(0) -{ +CDataTyper::CDataTyper(const std::string& fieldName) : m_FieldName(fieldName), m_LastPersistTime(0) { } -CDataTyper::~CDataTyper() -{ +CDataTyper::~CDataTyper() { } -int CDataTyper::computeType(bool isDryRun, - const std::string &str, - size_t rawStringLen) -{ +int CDataTyper::computeType(bool isDryRun, const std::string& str, size_t rawStringLen) { return this->computeType(isDryRun, EMPTY_FIELDS, str, rawStringLen); } -const std::string &CDataTyper::fieldName() const -{ +const std::string& CDataTyper::fieldName() const { return m_FieldName; } -core_t::TTime CDataTyper::lastPersistTime() const -{ +core_t::TTime CDataTyper::lastPersistTime() const { return m_LastPersistTime; } -void CDataTyper::lastPersistTime(core_t::TTime lastPersistTime) -{ +void CDataTyper::lastPersistTime(core_t::TTime lastPersistTime) { m_LastPersistTime = lastPersistTime; } - - } } - diff --git a/lib/api/CDetectionRulesJsonParser.cc b/lib/api/CDetectionRulesJsonParser.cc index cd061cdbae..4ccd3d23bb 100644 --- a/lib/api/CDetectionRulesJsonParser.cc +++ b/lib/api/CDetectionRulesJsonParser.cc @@ -8,13 +8,10 @@ #include #include -namespace ml -{ -namespace api -{ +namespace ml { +namespace api { -namespace -{ +namespace { const std::string ACTIONS("actions"); const std::string FILTER_RESULTS("filter_results"); const std::string SKIP_SAMPLING("skip_sampling"); @@ -42,50 +39,41 @@ const std::string FIELD_VALUE("field_value"); const std::string FILTER_ID("filter_id"); } -CDetectionRulesJsonParser::CDetectionRulesJsonParser(TStrPatternSetUMap &filtersByIdMap) - : m_FiltersByIdMap(filtersByIdMap) -{ +CDetectionRulesJsonParser::CDetectionRulesJsonParser(TStrPatternSetUMap& filtersByIdMap) : m_FiltersByIdMap(filtersByIdMap) { } -bool CDetectionRulesJsonParser::parseRules(const std::string &json, TDetectionRuleVec &rules) -{ +bool CDetectionRulesJsonParser::parseRules(const std::string& json, TDetectionRuleVec& rules) { LOG_DEBUG("Parsing detection rules"); rules.clear(); rapidjson::Document doc; - if (doc.Parse<0>(json.c_str()).HasParseError()) - { - LOG_ERROR("An error occurred while parsing detection rules from JSON: " << - doc.GetParseError()); + if (doc.Parse<0>(json.c_str()).HasParseError()) { + LOG_ERROR("An error occurred while parsing detection rules from JSON: " << doc.GetParseError()); return false; } - if (!doc.IsArray()) - { + if (!doc.IsArray()) { LOG_ERROR("Could not parse detection rules from non-array JSON object: " << json); return false; } - if (doc.Empty()) - { + if (doc.Empty()) { return true; } rules.resize(doc.Size()); - for (unsigned int i = 0; i < doc.Size(); ++i) - { - if (!doc[i].IsObject()) - { + for (unsigned int i = 0; i < doc.Size(); ++i) { + if (!doc[i].IsObject()) { LOG_ERROR("Could not parse detection rules: " - << "expected detection rules array to contain objects. JSON: " << json); + << "expected detection rules array to contain objects. JSON: " << json); rules.clear(); return false; } - model::CDetectionRule &rule = rules[i]; + model::CDetectionRule& rule = rules[i]; - rapidjson::Value &ruleObject = doc[i]; + rapidjson::Value& ruleObject = doc[i]; bool isValid = true; @@ -94,20 +82,17 @@ bool CDetectionRulesJsonParser::parseRules(const std::string &json, TDetectionRu isValid &= parseConditionsConnective(ruleObject, rule); isValid &= parseRuleConditions(ruleObject, rule); - if (isValid == false) - { + if (isValid == false) { LOG_ERROR("Failed to parse detection rules from JSON: " << json); rules.clear(); return false; } // Optional fields - if (hasStringMember(ruleObject, TARGET_FIELD_NAME)) - { + if (hasStringMember(ruleObject, TARGET_FIELD_NAME)) { rule.targetFieldName(ruleObject[TARGET_FIELD_NAME.c_str()].GetString()); } - if (hasStringMember(ruleObject, TARGET_FIELD_VALUE)) - { + if (hasStringMember(ruleObject, TARGET_FIELD_VALUE)) { rule.targetFieldValue(ruleObject[TARGET_FIELD_VALUE.c_str()].GetString()); } } @@ -115,51 +100,37 @@ bool CDetectionRulesJsonParser::parseRules(const std::string &json, TDetectionRu return true; } -bool CDetectionRulesJsonParser::hasStringMember(const rapidjson::Value &object, - const std::string &name) -{ - const char *nameAsCStr = name.c_str(); +bool CDetectionRulesJsonParser::hasStringMember(const rapidjson::Value& object, const std::string& name) { + const char* nameAsCStr = name.c_str(); return object.HasMember(nameAsCStr) && object[nameAsCStr].IsString(); } -bool CDetectionRulesJsonParser::hasArrayMember(const rapidjson::Value &object, - const std::string &name) -{ - const char *nameAsCStr = name.c_str(); +bool CDetectionRulesJsonParser::hasArrayMember(const rapidjson::Value& object, const std::string& name) { + const char* nameAsCStr = name.c_str(); return object.HasMember(nameAsCStr) && object[nameAsCStr].IsArray(); } -bool CDetectionRulesJsonParser::parseRuleActions(const rapidjson::Value &ruleObject, - model::CDetectionRule &rule) -{ - if (!hasArrayMember(ruleObject, ACTIONS)) - { +bool CDetectionRulesJsonParser::parseRuleActions(const rapidjson::Value& ruleObject, model::CDetectionRule& rule) { + if (!hasArrayMember(ruleObject, ACTIONS)) { LOG_ERROR("Missing rule field: " << ACTIONS); return false; } - const rapidjson::Value &array = ruleObject[ACTIONS.c_str()]; - if (array.Empty()) - { + const rapidjson::Value& array = ruleObject[ACTIONS.c_str()]; + if (array.Empty()) { LOG_ERROR("At least one rule action is required"); return false; } int action = 0; - for (unsigned int i = 0; i < array.Size(); ++i) - { + for (unsigned int i = 0; i < array.Size(); ++i) { model::CRuleCondition ruleCondition; - const std::string &parsedAction = array[i].GetString(); - if (parsedAction == FILTER_RESULTS) - { + const std::string& parsedAction = array[i].GetString(); + if (parsedAction == FILTER_RESULTS) { action |= model::CDetectionRule::E_FilterResults; - } - else if (parsedAction == SKIP_SAMPLING) - { + } else if (parsedAction == SKIP_SAMPLING) { action |= model::CDetectionRule::E_SkipSampling; - } - else - { + } else { LOG_ERROR("Invalid rule action: " << parsedAction); return false; } @@ -169,55 +140,41 @@ bool CDetectionRulesJsonParser::parseRuleActions(const rapidjson::Value &ruleObj return true; } -bool CDetectionRulesJsonParser::parseConditionsConnective(const rapidjson::Value &ruleObject, - model::CDetectionRule &rule) -{ - if (!hasStringMember(ruleObject, CONDITIONS_CONNECTIVE)) - { +bool CDetectionRulesJsonParser::parseConditionsConnective(const rapidjson::Value& ruleObject, model::CDetectionRule& rule) { + if (!hasStringMember(ruleObject, CONDITIONS_CONNECTIVE)) { LOG_ERROR("Missing rule field: " << CONDITIONS_CONNECTIVE); return false; } - const std::string &connective = ruleObject[CONDITIONS_CONNECTIVE.c_str()].GetString(); - if (connective == OR) - { + const std::string& connective = ruleObject[CONDITIONS_CONNECTIVE.c_str()].GetString(); + if (connective == OR) { rule.conditionsConnective(model::CDetectionRule::E_Or); - } - else if (connective == AND) - { + } else if (connective == AND) { rule.conditionsConnective(model::CDetectionRule::E_And); - } - else - { + } else { LOG_ERROR("Invalid conditionsConnective: " << connective); return false; } return true; } -bool CDetectionRulesJsonParser::parseRuleConditions(const rapidjson::Value &ruleObject, - model::CDetectionRule &rule) -{ - if (!hasArrayMember(ruleObject, CONDITIONS)) - { +bool CDetectionRulesJsonParser::parseRuleConditions(const rapidjson::Value& ruleObject, model::CDetectionRule& rule) { + if (!hasArrayMember(ruleObject, CONDITIONS)) { LOG_ERROR("Missing rule field: " << CONDITIONS); return false; } - const rapidjson::Value &array = ruleObject[CONDITIONS.c_str()]; - if (array.Empty()) - { + const rapidjson::Value& array = ruleObject[CONDITIONS.c_str()]; + if (array.Empty()) { LOG_ERROR("At least one condition is required"); return false; } - for (unsigned int i = 0; i < array.Size(); ++i) - { + for (unsigned int i = 0; i < array.Size(); ++i) { model::CRuleCondition condition; - const rapidjson::Value &conditionObject = array[i]; + const rapidjson::Value& conditionObject = array[i]; - if (!conditionObject.IsObject()) - { + if (!conditionObject.IsObject()) { LOG_ERROR("Unexpected condition type: array conditions is expected to contain objects"); return false; } @@ -226,27 +183,21 @@ bool CDetectionRulesJsonParser::parseRuleConditions(const rapidjson::Value &rule // Required fields isValid &= parseRuleConditionType(conditionObject, condition); - if (condition.isNumerical()) - { + if (condition.isNumerical()) { isValid &= parseCondition(conditionObject, condition); - } - else if (condition.isCategorical()) - { + } else if (condition.isCategorical()) { isValid &= this->parseFilterId(conditionObject, condition); } - if (isValid == false) - { + if (isValid == false) { return false; } // Optional fields - if (hasStringMember(conditionObject, FIELD_NAME)) - { + if (hasStringMember(conditionObject, FIELD_NAME)) { condition.fieldName(conditionObject[FIELD_NAME.c_str()].GetString()); } - if (hasStringMember(conditionObject, FIELD_VALUE)) - { + if (hasStringMember(conditionObject, FIELD_VALUE)) { condition.fieldValue(conditionObject[FIELD_VALUE.c_str()].GetString()); } @@ -255,18 +206,14 @@ bool CDetectionRulesJsonParser::parseRuleConditions(const rapidjson::Value &rule return true; } -bool CDetectionRulesJsonParser::parseFilterId(const rapidjson::Value &conditionObject, - model::CRuleCondition &ruleCondition) -{ - if (!hasStringMember(conditionObject, FILTER_ID)) - { +bool CDetectionRulesJsonParser::parseFilterId(const rapidjson::Value& conditionObject, model::CRuleCondition& ruleCondition) { + if (!hasStringMember(conditionObject, FILTER_ID)) { LOG_ERROR("Missing condition field: " << FILTER_ID); return false; } - const std::string &filterId = conditionObject[FILTER_ID.c_str()].GetString(); + const std::string& filterId = conditionObject[FILTER_ID.c_str()].GetString(); auto filterEntry = m_FiltersByIdMap.find(filterId); - if (filterEntry == m_FiltersByIdMap.end()) - { + if (filterEntry == m_FiltersByIdMap.end()) { LOG_ERROR("Filter with id [" << filterId << "] could not be found"); return false; } @@ -274,114 +221,78 @@ bool CDetectionRulesJsonParser::parseFilterId(const rapidjson::Value &conditionO return true; } -bool CDetectionRulesJsonParser::parseRuleConditionType(const rapidjson::Value &ruleConditionObject, - model::CRuleCondition &ruleCondition) -{ - if (!hasStringMember(ruleConditionObject, TYPE)) - { +bool CDetectionRulesJsonParser::parseRuleConditionType(const rapidjson::Value& ruleConditionObject, model::CRuleCondition& ruleCondition) { + if (!hasStringMember(ruleConditionObject, TYPE)) { LOG_ERROR("Missing ruleCondition field: " << TYPE); return false; } - const std::string &type = ruleConditionObject[TYPE.c_str()].GetString(); - if (type == CATEGORICAL) - { + const std::string& type = ruleConditionObject[TYPE.c_str()].GetString(); + if (type == CATEGORICAL) { ruleCondition.type(model::CRuleCondition::E_Categorical); - } - else if (type == NUMERICAL_ACTUAL) - { + } else if (type == NUMERICAL_ACTUAL) { ruleCondition.type(model::CRuleCondition::E_NumericalActual); - } - else if (type == NUMERICAL_TYPICAL) - { + } else if (type == NUMERICAL_TYPICAL) { ruleCondition.type(model::CRuleCondition::E_NumericalTypical); - } - else if (type == NUMERICAL_DIFF_ABS) - { + } else if (type == NUMERICAL_DIFF_ABS) { ruleCondition.type(model::CRuleCondition::E_NumericalDiffAbs); - } - else if (type == TIME) - { + } else if (type == TIME) { ruleCondition.type(model::CRuleCondition::E_Time); - } - else - { + } else { LOG_ERROR("Invalid conditionType: " << type); return false; } return true; } -bool CDetectionRulesJsonParser::parseCondition(const rapidjson::Value &ruleConditionObject, - model::CRuleCondition &ruleCondition) -{ - if (!ruleConditionObject.HasMember(CONDITION.c_str())) - { +bool CDetectionRulesJsonParser::parseCondition(const rapidjson::Value& ruleConditionObject, model::CRuleCondition& ruleCondition) { + if (!ruleConditionObject.HasMember(CONDITION.c_str())) { LOG_ERROR("Missing ruleCondition field: " << CONDITION); return false; } - const rapidjson::Value &conditionObject = ruleConditionObject[CONDITION.c_str()]; - if (!conditionObject.IsObject()) - { + const rapidjson::Value& conditionObject = ruleConditionObject[CONDITION.c_str()]; + if (!conditionObject.IsObject()) { LOG_ERROR("Unexpected type for condition; object was expected"); return false; } - return parseConditionOperator(conditionObject, ruleCondition) - && parseConditionThreshold(conditionObject, ruleCondition); + return parseConditionOperator(conditionObject, ruleCondition) && parseConditionThreshold(conditionObject, ruleCondition); } -bool CDetectionRulesJsonParser::parseConditionOperator(const rapidjson::Value &conditionObject, - model::CRuleCondition &ruleCondition) -{ - if (!hasStringMember(conditionObject, OPERATOR)) - { +bool CDetectionRulesJsonParser::parseConditionOperator(const rapidjson::Value& conditionObject, model::CRuleCondition& ruleCondition) { + if (!hasStringMember(conditionObject, OPERATOR)) { LOG_ERROR("Missing condition field: " << OPERATOR); return false; } - const std::string &operatorString = conditionObject[OPERATOR.c_str()].GetString(); - if (operatorString == LT) - { + const std::string& operatorString = conditionObject[OPERATOR.c_str()].GetString(); + if (operatorString == LT) { ruleCondition.condition().s_Op = model::CRuleCondition::E_LT; - } - else if (operatorString == LTE) - { + } else if (operatorString == LTE) { ruleCondition.condition().s_Op = model::CRuleCondition::E_LTE; - } - else if (operatorString == GT) - { + } else if (operatorString == GT) { ruleCondition.condition().s_Op = model::CRuleCondition::E_GT; - } - else if (operatorString == GTE) - { + } else if (operatorString == GTE) { ruleCondition.condition().s_Op = model::CRuleCondition::E_GTE; - } - else - { + } else { LOG_ERROR("Invalid operator value: " << operatorString); return false; } return true; } -bool CDetectionRulesJsonParser::parseConditionThreshold(const rapidjson::Value &conditionObject, - model::CRuleCondition &ruleCondition) -{ - if (!hasStringMember(conditionObject, VALUE)) - { +bool CDetectionRulesJsonParser::parseConditionThreshold(const rapidjson::Value& conditionObject, model::CRuleCondition& ruleCondition) { + if (!hasStringMember(conditionObject, VALUE)) { LOG_ERROR("Missing condition field: " << VALUE); return false; } const std::string valueString = conditionObject[VALUE.c_str()].GetString(); - if (core::CStringUtils::stringToType(valueString, ruleCondition.condition().s_Threshold) == false) - { + if (core::CStringUtils::stringToType(valueString, ruleCondition.condition().s_Threshold) == false) { LOG_ERROR("Invalid operator value: " << valueString); return false; } return true; } - } } diff --git a/lib/api/CFieldConfig.cc b/lib/api/CFieldConfig.cc index 99ba8fd401..06cb961743 100644 --- a/lib/api/CFieldConfig.cc +++ b/lib/api/CFieldConfig.cc @@ -7,10 +7,10 @@ #include #include -#include #include #include #include +#include #include #include @@ -29,12 +29,8 @@ #include #include - -namespace ml -{ -namespace api -{ - +namespace ml { +namespace api { // Initialise statics const std::string CFieldConfig::DETECTOR_PREFIX("detector."); @@ -47,8 +43,8 @@ const std::string CFieldConfig::DESCRIPTION_SUFFIX(".description"); const std::string CFieldConfig::RULES_SUFFIX(".rules"); const std::string CFieldConfig::CATEGORIZATION_FIELD_OPTION("categorizationfield"); const std::string CFieldConfig::SUMMARY_COUNT_FIELD_OPTION("summarycountfield"); -const char CFieldConfig::SUFFIX_SEPARATOR('.'); -const char CFieldConfig::FIELDNAME_SEPARATOR('-'); +const char CFieldConfig::SUFFIX_SEPARATOR('.'); +const char CFieldConfig::FIELDNAME_SEPARATOR('-'); const std::string CFieldConfig::IS_ENABLED_SUFFIX(".isEnabled"); const std::string CFieldConfig::BY_SUFFIX(".by"); const std::string CFieldConfig::OVER_SUFFIX(".over"); @@ -139,82 +135,60 @@ const std::string CFieldConfig::NONE_TOKEN("none"); const std::string CFieldConfig::CLEAR("clear"); -CFieldConfig::CFieldConfig() -{ +CFieldConfig::CFieldConfig() { } -CFieldConfig::CFieldConfig(const std::string &categorizationFieldName) - : m_CategorizationFieldName(categorizationFieldName) -{ +CFieldConfig::CFieldConfig(const std::string& categorizationFieldName) : m_CategorizationFieldName(categorizationFieldName) { this->seenField(categorizationFieldName); } -CFieldConfig::CFieldConfig(const std::string &fieldName, - const std::string &byFieldName, +CFieldConfig::CFieldConfig(const std::string& fieldName, + const std::string& byFieldName, bool useNull, - const std::string &summaryCountFieldName) - : m_SummaryCountFieldName(summaryCountFieldName) -{ - CFieldOptions options(fieldName, - 1, - byFieldName, - false, - useNull); + const std::string& summaryCountFieldName) + : m_SummaryCountFieldName(summaryCountFieldName) { + CFieldOptions options(fieldName, 1, byFieldName, false, useNull); m_FieldOptions.insert(options); // For historical reasons, the only function name we interpret in this // constructor is "count" - every other word is considered to be a metric // field name - if (fieldName != COUNT_NAME) - { + if (fieldName != COUNT_NAME) { this->seenField(fieldName); } this->seenField(byFieldName); } -CFieldConfig::CFieldConfig(const std::string &fieldName, - const std::string &byFieldName, - const std::string &partitionFieldName, - bool useNull) -{ - CFieldOptions options(fieldName, - 1, - byFieldName, - partitionFieldName, - false, - false, - useNull); +CFieldConfig::CFieldConfig(const std::string& fieldName, + const std::string& byFieldName, + const std::string& partitionFieldName, + bool useNull) { + CFieldOptions options(fieldName, 1, byFieldName, partitionFieldName, false, false, useNull); m_FieldOptions.insert(options); // For historical reasons, the only function name we interpret in this // constructor is "count" - every other word is considered to be a metric // field name - if (fieldName != COUNT_NAME) - { + if (fieldName != COUNT_NAME) { this->seenField(fieldName); } this->seenField(byFieldName); this->seenField(partitionFieldName); } -bool CFieldConfig::initFromCmdLine(const std::string &configFile, - const TStrVec &tokens) -{ - if (tokens.empty() && configFile.empty()) - { +bool CFieldConfig::initFromCmdLine(const std::string& configFile, const TStrVec& tokens) { + if (tokens.empty() && configFile.empty()) { LOG_ERROR("Neither a fieldname clause nor a field config file was specified"); return false; } - if (tokens.empty()) - { + if (tokens.empty()) { return this->initFromFile(configFile); } - if (!configFile.empty()) - { + if (!configFile.empty()) { LOG_ERROR("Cannot specify both a fieldname clause and a field config file"); return false; } @@ -222,8 +196,7 @@ bool CFieldConfig::initFromCmdLine(const std::string &configFile, return this->initFromClause(tokens); } -bool CFieldConfig::initFromFile(const std::string &configFile) -{ +bool CFieldConfig::initFromFile(const std::string& configFile) { LOG_DEBUG("Reading config file " << configFile); m_FieldOptions.clear(); @@ -235,20 +208,16 @@ bool CFieldConfig::initFromFile(const std::string &configFile) m_ScheduledEvents.clear(); boost::property_tree::ptree propTree; - try - { + try { std::ifstream strm(configFile.c_str()); - if (!strm.is_open()) - { + if (!strm.is_open()) { LOG_ERROR("Error opening config file " << configFile); return false; } model::CLimits::skipUtf8Bom(strm); boost::property_tree::ini_parser::read_ini(strm, propTree); - } - catch (boost::property_tree::ptree_error &e) - { + } catch (boost::property_tree::ptree_error& e) { LOG_ERROR("Error reading config file " << configFile << ": " << e.what()); return false; } @@ -256,46 +225,26 @@ bool CFieldConfig::initFromFile(const std::string &configFile) TIntSet handledConfigs; TIntSet handledScheduledEvents; - for (boost::property_tree::ptree::iterator level1Iter = propTree.begin(); - level1Iter != propTree.end(); - ++level1Iter) - { - const std::string &level1Key = level1Iter->first; - const std::string &value = level1Iter->second.data(); - if (level1Key.length() > DETECTOR_PREFIX.length() && - level1Key.compare(0, DETECTOR_PREFIX.length(), DETECTOR_PREFIX) == 0) - { - if (this->processDetector(propTree, - level1Key, - value, - handledConfigs) == false) - { + for (boost::property_tree::ptree::iterator level1Iter = propTree.begin(); level1Iter != propTree.end(); ++level1Iter) { + const std::string& level1Key = level1Iter->first; + const std::string& value = level1Iter->second.data(); + if (level1Key.length() > DETECTOR_PREFIX.length() && level1Key.compare(0, DETECTOR_PREFIX.length(), DETECTOR_PREFIX) == 0) { + if (this->processDetector(propTree, level1Key, value, handledConfigs) == false) { LOG_ERROR("Error reading config file " << configFile); return false; } - } - else if (level1Key.length() > CATEGORIZATION_FILTER_PREFIX.length() && - level1Key.compare(0, CATEGORIZATION_FILTER_PREFIX.length(), CATEGORIZATION_FILTER_PREFIX) == 0) - { + } else if (level1Key.length() > CATEGORIZATION_FILTER_PREFIX.length() && + level1Key.compare(0, CATEGORIZATION_FILTER_PREFIX.length(), CATEGORIZATION_FILTER_PREFIX) == 0) { this->addCategorizationFilter(value); - } - else if (level1Key.length() > INFLUENCER_PREFIX.length() && - level1Key.compare(0, INFLUENCER_PREFIX.length(), INFLUENCER_PREFIX) == 0) - { + } else if (level1Key.length() > INFLUENCER_PREFIX.length() && + level1Key.compare(0, INFLUENCER_PREFIX.length(), INFLUENCER_PREFIX) == 0) { this->addInfluencerFieldName(value); - } - else if (level1Key.length() > FILTER_PREFIX.length() && - level1Key.compare(0, FILTER_PREFIX.length(), FILTER_PREFIX) == 0) - { + } else if (level1Key.length() > FILTER_PREFIX.length() && level1Key.compare(0, FILTER_PREFIX.length(), FILTER_PREFIX) == 0) { this->processFilter(level1Key, value); - } - else if (level1Key.length() > SCHEDULED_EVENT_PREFIX.length() && - level1Key.compare(0, SCHEDULED_EVENT_PREFIX.length(), SCHEDULED_EVENT_PREFIX) == 0) - { + } else if (level1Key.length() > SCHEDULED_EVENT_PREFIX.length() && + level1Key.compare(0, SCHEDULED_EVENT_PREFIX.length(), SCHEDULED_EVENT_PREFIX) == 0) { this->processScheduledEvent(propTree, level1Key, value, handledScheduledEvents); - } - else - { + } else { LOG_ERROR("Invalid setting " << level1Key << " = " << value << " in config file " << configFile); return false; } @@ -306,27 +255,18 @@ bool CFieldConfig::initFromFile(const std::string &configFile) return true; } -bool CFieldConfig::tokenise(const std::string &clause, - TStrVec ©Tokens) -{ +bool CFieldConfig::tokenise(const std::string& clause, TStrVec& copyTokens) { // Tokenise on spaces or commas. Double quotes are used // for quoting, and the escape character is a backslash. using TCharEscapedListSeparator = boost::escaped_list_separator; - TCharEscapedListSeparator els("\\", - core::CStringUtils::WHITESPACE_CHARS + ',', - "\""); - try - { + TCharEscapedListSeparator els("\\", core::CStringUtils::WHITESPACE_CHARS + ',', "\""); + try { using TCharEscapedListSeparatorTokenizer = boost::tokenizer; TCharEscapedListSeparatorTokenizer tokenizer(clause, els); - for (TCharEscapedListSeparatorTokenizer::iterator iter = tokenizer.begin(); - iter != tokenizer.end(); - ++iter) - { - const std::string &token = *iter; - if (token.empty()) - { + for (TCharEscapedListSeparatorTokenizer::iterator iter = tokenizer.begin(); iter != tokenizer.end(); ++iter) { + const std::string& token = *iter; + if (token.empty()) { // boost::escaped_list_separator creates empty tokens for // multiple adjacent separators, but we don't want these continue; @@ -335,9 +275,7 @@ bool CFieldConfig::tokenise(const std::string &clause, copyTokens.push_back(token); LOG_TRACE(token); } - } - catch (boost::escaped_list_error &e) - { + } catch (boost::escaped_list_error& e) { LOG_ERROR("Cannot parse clause " << clause << ": " << e.what()); return false; } @@ -345,66 +283,43 @@ bool CFieldConfig::tokenise(const std::string &clause, return true; } -void CFieldConfig::retokenise(const TStrVec &tokens, - TStrVec ©Tokens) -{ - for (const auto &token : tokens) - { +void CFieldConfig::retokenise(const TStrVec& tokens, TStrVec& copyTokens) { + for (const auto& token : tokens) { size_t commaPos(token.find(',')); - if (commaPos == std::string::npos) - { + if (commaPos == std::string::npos) { copyTokens.push_back(token); - } - else - { + } else { size_t startPos(0); - do - { - if (commaPos > startPos) - { + do { + if (commaPos > startPos) { copyTokens.resize(copyTokens.size() + 1); - copyTokens.back().assign(token, - startPos, - commaPos - startPos); + copyTokens.back().assign(token, startPos, commaPos - startPos); } startPos = commaPos + 1; commaPos = token.find(',', startPos); - } - while (commaPos != std::string::npos); + } while (commaPos != std::string::npos); - if (startPos < token.length()) - { + if (startPos < token.length()) { copyTokens.resize(copyTokens.size() + 1); - copyTokens.back().assign(token, - startPos, - token.length() - startPos); + copyTokens.back().assign(token, startPos, token.length() - startPos); } } } - for (const auto ©Token : copyTokens) - { + for (const auto& copyToken : copyTokens) { LOG_DEBUG(copyToken); } } -bool CFieldConfig::findLastByOverTokens(const TStrVec ©Tokens, - std::size_t &lastByTokenIndex, - std::size_t &lastOverTokenIndex) -{ - for (size_t index = 0; index < copyTokens.size(); ++index) - { +bool CFieldConfig::findLastByOverTokens(const TStrVec& copyTokens, std::size_t& lastByTokenIndex, std::size_t& lastOverTokenIndex) { + for (size_t index = 0; index < copyTokens.size(); ++index) { if (copyTokens[index].length() == BY_TOKEN.length() && - core::CStrCaseCmp::strCaseCmp(copyTokens[index].c_str(), BY_TOKEN.c_str()) == 0) - { - if (lastByTokenIndex != copyTokens.size()) - { - LOG_ERROR("Multiple '" << copyTokens[lastByTokenIndex] << - "' tokens in analysis clause - tokens " << - core::CStringUtils::typeToString(1 + lastByTokenIndex) << - " and " << - core::CStringUtils::typeToString(1 + index)); + core::CStrCaseCmp::strCaseCmp(copyTokens[index].c_str(), BY_TOKEN.c_str()) == 0) { + if (lastByTokenIndex != copyTokens.size()) { + LOG_ERROR("Multiple '" << copyTokens[lastByTokenIndex] << "' tokens in analysis clause - tokens " + << core::CStringUtils::typeToString(1 + lastByTokenIndex) << " and " + << core::CStringUtils::typeToString(1 + index)); return false; } @@ -412,14 +327,11 @@ bool CFieldConfig::findLastByOverTokens(const TStrVec ©Tokens, } if (copyTokens[index].length() == OVER_TOKEN.length() && - core::CStrCaseCmp::strCaseCmp(copyTokens[index].c_str(), OVER_TOKEN.c_str()) == 0) - { - if (lastOverTokenIndex != copyTokens.size()) - { - LOG_ERROR("Multiple '" << copyTokens[lastOverTokenIndex] << - "' tokens in analysis clause - tokens " << - core::CStringUtils::typeToString(1 + lastOverTokenIndex) << - " and " << core::CStringUtils::typeToString(1 + index)); + core::CStrCaseCmp::strCaseCmp(copyTokens[index].c_str(), OVER_TOKEN.c_str()) == 0) { + if (lastOverTokenIndex != copyTokens.size()) { + LOG_ERROR("Multiple '" << copyTokens[lastOverTokenIndex] << "' tokens in analysis clause - tokens " + << core::CStringUtils::typeToString(1 + lastOverTokenIndex) << " and " + << core::CStringUtils::typeToString(1 + index)); return false; } @@ -429,46 +341,31 @@ bool CFieldConfig::findLastByOverTokens(const TStrVec ©Tokens, return true; } -bool CFieldConfig::validateByOverField(const TStrVec ©Tokens, +bool CFieldConfig::validateByOverField(const TStrVec& copyTokens, const std::size_t thisIndex, const std::size_t otherIndex, - const TStrVec &clashingNames, - std::string &fieldName) -{ - if (thisIndex != copyTokens.size()) - { - if (thisIndex == 0) - { - LOG_ERROR("Analysis clause begins with a '" << - copyTokens[thisIndex] << "' token"); + const TStrVec& clashingNames, + std::string& fieldName) { + if (thisIndex != copyTokens.size()) { + if (thisIndex == 0) { + LOG_ERROR("Analysis clause begins with a '" << copyTokens[thisIndex] << "' token"); return false; } - if (thisIndex + 1 == copyTokens.size() || - thisIndex + 1 == otherIndex) - { - LOG_ERROR("No field name follows the '" << - copyTokens[thisIndex] << - "' token in the analysis clause"); + if (thisIndex + 1 == copyTokens.size() || thisIndex + 1 == otherIndex) { + LOG_ERROR("No field name follows the '" << copyTokens[thisIndex] << "' token in the analysis clause"); return false; } - if (thisIndex + 2 < copyTokens.size() && - thisIndex + 2 < otherIndex) - { - LOG_ERROR("Only one field name may follow the '" << - copyTokens[thisIndex] << - "' token in the analysis clause"); + if (thisIndex + 2 < copyTokens.size() && thisIndex + 2 < otherIndex) { + LOG_ERROR("Only one field name may follow the '" << copyTokens[thisIndex] << "' token in the analysis clause"); return false; } fieldName = copyTokens[thisIndex + 1]; - for (const auto &clashingName : clashingNames) - { - if (fieldName == clashingName) - { - LOG_ERROR("The '" << copyTokens[thisIndex] << - "' field cannot be " << fieldName); + for (const auto& clashingName : clashingNames) { + if (fieldName == clashingName) { + LOG_ERROR("The '" << copyTokens[thisIndex] << "' field cannot be " << fieldName); return false; } } @@ -477,18 +374,11 @@ bool CFieldConfig::validateByOverField(const TStrVec ©Tokens, return true; } -std::string CFieldConfig::findParameter(const std::string ¶meter, - TStrVec ©Tokens) -{ - for (TStrVecItr iter = copyTokens.begin(); iter != copyTokens.end(); ++iter) - { - const std::string &token = *iter; +std::string CFieldConfig::findParameter(const std::string& parameter, TStrVec& copyTokens) { + for (TStrVecItr iter = copyTokens.begin(); iter != copyTokens.end(); ++iter) { + const std::string& token = *iter; std::size_t equalPos = token.find('='); - if (equalPos == parameter.length() && - core::CStrCaseCmp::strNCaseCmp(parameter.c_str(), - token.c_str(), - equalPos) == 0) - { + if (equalPos == parameter.length() && core::CStrCaseCmp::strNCaseCmp(parameter.c_str(), token.c_str(), equalPos) == 0) { std::string value(token, equalPos + 1, token.length() - equalPos); LOG_TRACE("Found parameter " << parameter << " : " << value); copyTokens.erase(iter); @@ -498,8 +388,7 @@ std::string CFieldConfig::findParameter(const std::string ¶meter, return std::string(); } -bool CFieldConfig::initFromClause(const TStrVec &tokens) -{ +bool CFieldConfig::initFromClause(const TStrVec& tokens) { m_FieldOptions.clear(); m_FieldNameSuperset.clear(); m_CategorizationFilters.clear(); @@ -512,8 +401,7 @@ bool CFieldConfig::initFromClause(const TStrVec &tokens) // and/or commas, so here we split them again on the commas. TStrVec copyTokens; this->retokenise(tokens, copyTokens); - if (copyTokens.empty()) - { + if (copyTokens.empty()) { LOG_ERROR("No fields specified for analysis"); return false; } @@ -521,24 +409,16 @@ bool CFieldConfig::initFromClause(const TStrVec &tokens) std::string defaultCategorizationFieldName; std::string summaryCountFieldName; - if (this->parseClause(true, - 0, - EMPTY_STRING, - copyTokens, - m_FieldOptions, - defaultCategorizationFieldName, - summaryCountFieldName) == false) - { + if (this->parseClause(true, 0, EMPTY_STRING, copyTokens, m_FieldOptions, defaultCategorizationFieldName, summaryCountFieldName) == + false) { // parseClause() will have logged the problem return false; } - if (!defaultCategorizationFieldName.empty()) - { + if (!defaultCategorizationFieldName.empty()) { m_CategorizationFieldName.swap(defaultCategorizationFieldName); } - if (!summaryCountFieldName.empty()) - { + if (!summaryCountFieldName.empty()) { m_SummaryCountFieldName.swap(summaryCountFieldName); } @@ -547,14 +427,11 @@ bool CFieldConfig::initFromClause(const TStrVec &tokens) return true; } -bool CFieldConfig::addOptions(const CFieldOptions &options) -{ +bool CFieldConfig::addOptions(const CFieldOptions& options) { using TFieldOptionsMIndexItrBoolPr = std::pair; TFieldOptionsMIndexItrBoolPr result(m_FieldOptions.insert(options)); - if (result.second == false) - { - LOG_ERROR("Duplicate config found: " << options << core_t::LINE_ENDING - << "It clashes with config " << *result.first); + if (result.second == false) { + LOG_ERROR("Duplicate config found: " << options << core_t::LINE_ENDING << "It clashes with config " << *result.first); return false; } @@ -568,42 +445,32 @@ bool CFieldConfig::addOptions(const CFieldOptions &options) bool CFieldConfig::parseClause(bool allowMultipleFunctions, int configKey, - const std::string &description, - TStrVec ©Tokens, - TFieldOptionsMIndex &optionsIndex, - std::string &categorizationFieldName, - std::string &summaryCountFieldName) -{ - std::string partitionFieldName = this->findParameter(PARTITION_FIELD_OPTION, - copyTokens); + const std::string& description, + TStrVec& copyTokens, + TFieldOptionsMIndex& optionsIndex, + std::string& categorizationFieldName, + std::string& summaryCountFieldName) { + std::string partitionFieldName = this->findParameter(PARTITION_FIELD_OPTION, copyTokens); // Allow any number of influencerfield arguments - std::string influencerFieldName = this->findParameter(INFLUENCER_FIELD_OPTION, - copyTokens); - while (!influencerFieldName.empty()) - { + std::string influencerFieldName = this->findParameter(INFLUENCER_FIELD_OPTION, copyTokens); + while (!influencerFieldName.empty()) { this->addInfluencerFieldName(influencerFieldName); - influencerFieldName = this->findParameter(INFLUENCER_FIELD_OPTION, - copyTokens); + influencerFieldName = this->findParameter(INFLUENCER_FIELD_OPTION, copyTokens); } - categorizationFieldName = - this->findParameter(CATEGORIZATION_FIELD_OPTION, copyTokens); + categorizationFieldName = this->findParameter(CATEGORIZATION_FIELD_OPTION, copyTokens); - if (!categorizationFieldName.empty()) - { + if (!categorizationFieldName.empty()) { this->seenField(categorizationFieldName); } - summaryCountFieldName = - this->findParameter(SUMMARY_COUNT_FIELD_OPTION, copyTokens); + summaryCountFieldName = this->findParameter(SUMMARY_COUNT_FIELD_OPTION, copyTokens); this->seenField(summaryCountFieldName); std::string useNullStr = this->findParameter(USE_NULL_OPTION, copyTokens); bool useNull(false); - if (!useNullStr.empty() && - core::CStringUtils::stringToType(useNullStr, useNull) == false) - { + if (!useNullStr.empty() && core::CStringUtils::stringToType(useNullStr, useNull) == false) { LOG_ERROR("Cannot convert usenull value to boolean: " << useNullStr); return false; } @@ -614,10 +481,7 @@ bool CFieldConfig::parseClause(bool allowMultipleFunctions, // check all tokens so that we can report errors) size_t lastByTokenIndex(copyTokens.size()); size_t lastOverTokenIndex(copyTokens.size()); - if (!this->findLastByOverTokens(copyTokens, - lastByTokenIndex, - lastOverTokenIndex)) - { + if (!this->findLastByOverTokens(copyTokens, lastByTokenIndex, lastOverTokenIndex)) { return false; } @@ -625,23 +489,13 @@ bool CFieldConfig::parseClause(bool allowMultipleFunctions, clashingNames.push_back(COUNT_NAME); clashingNames.push_back(partitionFieldName); std::string byFieldName; - if (!this->validateByOverField(copyTokens, - lastByTokenIndex, - lastOverTokenIndex, - clashingNames, - byFieldName)) - { + if (!this->validateByOverField(copyTokens, lastByTokenIndex, lastOverTokenIndex, clashingNames, byFieldName)) { return false; } std::string overFieldName; clashingNames.push_back(byFieldName); - if (!this->validateByOverField(copyTokens, - lastOverTokenIndex, - lastByTokenIndex, - clashingNames, - overFieldName)) - { + if (!this->validateByOverField(copyTokens, lastOverTokenIndex, lastByTokenIndex, clashingNames, overFieldName)) { return false; } @@ -651,37 +505,25 @@ bool CFieldConfig::parseClause(bool allowMultipleFunctions, //! Validate the "excludefrequent" flag if it has been set bool byExcludeFrequent(false); bool overExcludeFrequent(false); - if (this->decipherExcludeFrequentSetting(excludeFrequentString, - hasByField, - isPopulation, - byExcludeFrequent, - overExcludeFrequent) == false) - { + if (this->decipherExcludeFrequentSetting(excludeFrequentString, hasByField, isPopulation, byExcludeFrequent, overExcludeFrequent) == + false) { LOG_ERROR("Unknown setting for excludefrequent: " << excludeFrequentString); return false; } int tokenNum(0); size_t stop(std::min(lastByTokenIndex, lastOverTokenIndex)); - if (stop > 1 && !allowMultipleFunctions) - { - LOG_ERROR("Only one analysis function is allowed in this context but " << - core::CStringUtils::typeToString(stop) << - " were specified"); + if (stop > 1 && !allowMultipleFunctions) { + LOG_ERROR("Only one analysis function is allowed in this context but " << core::CStringUtils::typeToString(stop) + << " were specified"); return false; } - for (size_t index = 0; index < stop; ++index) - { + for (size_t index = 0; index < stop; ++index) { model::function_t::EFunction function; std::string fieldName; - if (this->parseFieldString(!summaryCountFieldName.empty(), - isPopulation, - hasByField, - copyTokens[index], - function, - fieldName) == false) - { + if (this->parseFieldString(!summaryCountFieldName.empty(), isPopulation, hasByField, copyTokens[index], function, fieldName) == + false) { LOG_ERROR("Failed to process token '" << copyTokens[index] << "'"); // External error reporting is done within parseFieldString() so @@ -699,19 +541,15 @@ bool CFieldConfig::parseClause(bool allowMultipleFunctions, byExcludeFrequent, overExcludeFrequent, useNull); - if (!description.empty()) - { + if (!description.empty()) { options.description(description); } using TFieldOptionsMIndexItrBoolPr = std::pair; TFieldOptionsMIndexItrBoolPr result(optionsIndex.insert(options)); - if (result.second == false) - { - LOG_ERROR("Token " << - core::CStringUtils::typeToString(options.configKey()) << - " in the analysis clause is a duplicate of token " << - result.first->configKey()); + if (result.second == false) { + LOG_ERROR("Token " << core::CStringUtils::typeToString(options.configKey()) + << " in the analysis clause is a duplicate of token " << result.first->configKey()); return false; } @@ -723,15 +561,12 @@ bool CFieldConfig::parseClause(bool allowMultipleFunctions, return true; } -bool CFieldConfig::parseRules(int detectorIndex, const std::string &rules) -{ +bool CFieldConfig::parseRules(int detectorIndex, const std::string& rules) { return parseRules(m_DetectorRules[detectorIndex], rules); } -bool CFieldConfig::parseRules(TDetectionRuleVec &detectionRules, const std::string &rules) -{ - if (rules.empty()) - { +bool CFieldConfig::parseRules(TDetectionRuleVec& detectionRules, const std::string& rules) { + if (rules.empty()) { return true; } @@ -739,142 +574,107 @@ bool CFieldConfig::parseRules(TDetectionRuleVec &detectionRules, const std::stri return rulesParser.parseRules(rules, detectionRules); } -const CFieldConfig::TFieldOptionsMIndex &CFieldConfig::fieldOptions() const -{ +const CFieldConfig::TFieldOptionsMIndex& CFieldConfig::fieldOptions() const { return m_FieldOptions; } -const std::string &CFieldConfig::categorizationFieldName() const -{ +const std::string& CFieldConfig::categorizationFieldName() const { return m_CategorizationFieldName; } -const CFieldConfig::TStrPatternSetUMap &CFieldConfig::ruleFilters() const -{ +const CFieldConfig::TStrPatternSetUMap& CFieldConfig::ruleFilters() const { return m_RuleFilters; } -const CFieldConfig::TStrVec &CFieldConfig::categorizationFilters() const -{ +const CFieldConfig::TStrVec& CFieldConfig::categorizationFilters() const { return m_CategorizationFilters; } -const std::string &CFieldConfig::summaryCountFieldName() const -{ +const std::string& CFieldConfig::summaryCountFieldName() const { return m_SummaryCountFieldName; } -bool CFieldConfig::havePartitionFields() const -{ - for (const auto &fieldOption : m_FieldOptions) - { - if (!fieldOption.partitionFieldName().empty()) - { +bool CFieldConfig::havePartitionFields() const { + for (const auto& fieldOption : m_FieldOptions) { + if (!fieldOption.partitionFieldName().empty()) { return true; } } return false; } -const CFieldConfig::TStrSet &CFieldConfig::fieldNameSuperset() const -{ +const CFieldConfig::TStrSet& CFieldConfig::fieldNameSuperset() const { return m_FieldNameSuperset; } -bool CFieldConfig::processDetector(const boost::property_tree::ptree &propTree, - const std::string &key, - const std::string &value, - TIntSet &handledConfigs) -{ +bool CFieldConfig::processDetector(const boost::property_tree::ptree& propTree, + const std::string& key, + const std::string& value, + TIntSet& handledConfigs) { // Drive the map population off the first setting in the file that is for a // particular detector // Here we pull out the "1" in "detector.1.clause" size_t sepPos(key.rfind(SUFFIX_SEPARATOR)); - if (sepPos == std::string::npos || - sepPos <= DETECTOR_PREFIX.length() || - sepPos == key.length() - 1) - { + if (sepPos == std::string::npos || sepPos <= DETECTOR_PREFIX.length() || sepPos == key.length() - 1) { LOG_ERROR("Unrecognised configuration option " << key << " = " << value); return false; } - std::string configKeyString(key, - DETECTOR_PREFIX.length(), - sepPos - DETECTOR_PREFIX.length()); + std::string configKeyString(key, DETECTOR_PREFIX.length(), sepPos - DETECTOR_PREFIX.length()); int configKey; - if (core::CStringUtils::stringToType(configKeyString, configKey) == false) - { + if (core::CStringUtils::stringToType(configKeyString, configKey) == false) { LOG_ERROR("Cannot convert config key to integer: " << configKeyString); return false; } // Check if we've already seen this key - if (handledConfigs.insert(configKey).second == false) - { + if (handledConfigs.insert(configKey).second == false) { // Not an error return true; } - std::string description(propTree.get(boost::property_tree::ptree::path_type(DETECTOR_PREFIX + configKeyString + DESCRIPTION_SUFFIX, '\t'), - EMPTY_STRING)); + std::string description( + propTree.get(boost::property_tree::ptree::path_type(DETECTOR_PREFIX + configKeyString + DESCRIPTION_SUFFIX, '\t'), EMPTY_STRING)); - std::string clause(propTree.get(boost::property_tree::ptree::path_type(DETECTOR_PREFIX + configKeyString + CLAUSE_SUFFIX, '\t'), - EMPTY_STRING)); + std::string clause( + propTree.get(boost::property_tree::ptree::path_type(DETECTOR_PREFIX + configKeyString + CLAUSE_SUFFIX, '\t'), EMPTY_STRING)); - std::string rules(propTree.get(boost::property_tree::ptree::path_type(DETECTOR_PREFIX + configKeyString + RULES_SUFFIX, '\t'), - EMPTY_STRING)); + std::string rules( + propTree.get(boost::property_tree::ptree::path_type(DETECTOR_PREFIX + configKeyString + RULES_SUFFIX, '\t'), EMPTY_STRING)); TStrVec tokens; - if (this->tokenise(clause, tokens) == false) - { + if (this->tokenise(clause, tokens) == false) { // tokenise() will already have logged the error return false; } // This is an active configuration - if (this->addActiveDetector(configKey, - description, - rules, - tokens) == false) - { + if (this->addActiveDetector(configKey, description, rules, tokens) == false) { return false; } return true; } -bool CFieldConfig::addActiveDetector(int configKey, - const std::string &description, - const std::string &rules, - TStrVec ©Tokens) -{ +bool CFieldConfig::addActiveDetector(int configKey, const std::string& description, const std::string& rules, TStrVec& copyTokens) { std::string categorizationFieldName; std::string summaryCountFieldName; - if (this->parseClause(false, - configKey, - description, - copyTokens, - m_FieldOptions, - categorizationFieldName, - summaryCountFieldName) == false) - { + if (this->parseClause(false, configKey, description, copyTokens, m_FieldOptions, categorizationFieldName, summaryCountFieldName) == + false) { // parseClause() will have logged the error return false; } - if (!categorizationFieldName.empty()) - { + if (!categorizationFieldName.empty()) { m_CategorizationFieldName.swap(categorizationFieldName); } - if (!summaryCountFieldName.empty()) - { + if (!summaryCountFieldName.empty()) { m_SummaryCountFieldName.swap(summaryCountFieldName); } - TDetectionRuleVec &detectionRules = m_DetectorRules[configKey]; - if (this->parseRules(detectionRules, rules) == false) - { + TDetectionRuleVec& detectionRules = m_DetectorRules[configKey]; + if (this->parseRules(detectionRules, rules) == false) { // parseRules() will have logged the error return false; } @@ -885,10 +685,9 @@ bool CFieldConfig::addActiveDetector(int configKey, bool CFieldConfig::parseFieldString(bool haveSummaryCountField, bool isPopulation, bool hasByField, - const std::string &str, - model::function_t::EFunction &function, - std::string &fieldName) -{ + const std::string& str, + model::function_t::EFunction& function, + std::string& fieldName) { // Parse using a regex core::CRegex regex; @@ -901,22 +700,19 @@ bool CFieldConfig::parseFieldString(bool haveSummaryCountField, // a metric field name // etc. std::string regexStr("([^()]+)(?:\\((.*)\\))?"); - if (!regex.init(regexStr)) - { + if (!regex.init(regexStr)) { LOG_FATAL("Unable to init regex " << regexStr); return false; } core::CRegex::TStrVec tokens; - if (regex.tokenise(str, tokens) == false) - { + if (regex.tokenise(str, tokens) == false) { LOG_ERROR("Unable to parse a function from " << str); return false; } - if (tokens.size() != 2) - { + if (tokens.size() != 2) { LOG_INFO("Got wrong number of tokens:: " << tokens.size()); return false; } @@ -924,8 +720,8 @@ bool CFieldConfig::parseFieldString(bool haveSummaryCountField, // Overall string "x(y)" => outerToken is "x" and innerToken is "y" // Overall string "x" => outerToken is "x" and innerToken is empty // Overall string "x()" => outerToken is "x" and innerToken is empty - const std::string &outerToken = tokens[0]; - const std::string &innerToken = tokens[1]; + const std::string& outerToken = tokens[0]; + const std::string& innerToken = tokens[1]; // Some functions must take an argument, some mustn't and for the rest it's // optional. Validate this based on the contents of these flags after @@ -935,333 +731,160 @@ bool CFieldConfig::parseFieldString(bool haveSummaryCountField, bool byFieldRequired(false); bool byFieldInvalid(false); - if (outerToken == FUNCTION_COUNT || - outerToken == FUNCTION_COUNT_ABBREV) - { - function = isPopulation ? - model::function_t::E_PopulationCount : - model::function_t::E_IndividualRareCount; + if (outerToken == FUNCTION_COUNT || outerToken == FUNCTION_COUNT_ABBREV) { + function = isPopulation ? model::function_t::E_PopulationCount : model::function_t::E_IndividualRareCount; argumentInvalid = true; - } - else if (outerToken == FUNCTION_DISTINCT_COUNT || - outerToken == FUNCTION_DISTINCT_COUNT_ABBREV) - { - function = isPopulation ? - model::function_t::E_PopulationDistinctCount : - model::function_t::E_IndividualDistinctCount; + } else if (outerToken == FUNCTION_DISTINCT_COUNT || outerToken == FUNCTION_DISTINCT_COUNT_ABBREV) { + function = isPopulation ? model::function_t::E_PopulationDistinctCount : model::function_t::E_IndividualDistinctCount; argumentRequired = true; - } - else if (outerToken == FUNCTION_LOW_DISTINCT_COUNT || - outerToken == FUNCTION_LOW_DISTINCT_COUNT_ABBREV) - { - function = isPopulation ? - model::function_t::E_PopulationLowDistinctCount : - model::function_t::E_IndividualLowDistinctCount; + } else if (outerToken == FUNCTION_LOW_DISTINCT_COUNT || outerToken == FUNCTION_LOW_DISTINCT_COUNT_ABBREV) { + function = isPopulation ? model::function_t::E_PopulationLowDistinctCount : model::function_t::E_IndividualLowDistinctCount; argumentRequired = true; - } - else if (outerToken == FUNCTION_HIGH_DISTINCT_COUNT || - outerToken == FUNCTION_HIGH_DISTINCT_COUNT_ABBREV) - { - function = isPopulation ? - model::function_t::E_PopulationHighDistinctCount : - model::function_t::E_IndividualHighDistinctCount; + } else if (outerToken == FUNCTION_HIGH_DISTINCT_COUNT || outerToken == FUNCTION_HIGH_DISTINCT_COUNT_ABBREV) { + function = isPopulation ? model::function_t::E_PopulationHighDistinctCount : model::function_t::E_IndividualHighDistinctCount; argumentRequired = true; - } - else if (outerToken == FUNCTION_NON_ZERO_COUNT || - outerToken == FUNCTION_NON_ZERO_COUNT_ABBREV) - { + } else if (outerToken == FUNCTION_NON_ZERO_COUNT || outerToken == FUNCTION_NON_ZERO_COUNT_ABBREV) { function = model::function_t::E_IndividualNonZeroCount; argumentInvalid = true; - } - else if (outerToken == FUNCTION_RARE_NON_ZERO_COUNT || - outerToken == FUNCTION_RARE_NON_ZERO_COUNT_ABBREV) - { + } else if (outerToken == FUNCTION_RARE_NON_ZERO_COUNT || outerToken == FUNCTION_RARE_NON_ZERO_COUNT_ABBREV) { function = model::function_t::E_IndividualRareNonZeroCount; argumentInvalid = true; byFieldRequired = true; - } - else if (outerToken == FUNCTION_RARE) - { - function = isPopulation ? - model::function_t::E_PopulationRare : - model::function_t::E_IndividualRare; + } else if (outerToken == FUNCTION_RARE) { + function = isPopulation ? model::function_t::E_PopulationRare : model::function_t::E_IndividualRare; argumentInvalid = true; byFieldRequired = true; - } - else if (outerToken == FUNCTION_RARE_COUNT) - { + } else if (outerToken == FUNCTION_RARE_COUNT) { function = model::function_t::E_PopulationRareCount; argumentInvalid = true; byFieldRequired = true; - } - else if (outerToken == FUNCTION_LOW_COUNT || - outerToken == FUNCTION_LOW_COUNT_ABBREV) - { - function = isPopulation ? - model::function_t::E_PopulationLowCounts : - model::function_t::E_IndividualLowCounts; + } else if (outerToken == FUNCTION_LOW_COUNT || outerToken == FUNCTION_LOW_COUNT_ABBREV) { + function = isPopulation ? model::function_t::E_PopulationLowCounts : model::function_t::E_IndividualLowCounts; argumentInvalid = true; - } - else if (outerToken == FUNCTION_HIGH_COUNT || - outerToken == FUNCTION_HIGH_COUNT_ABBREV) - { - function = isPopulation ? - model::function_t::E_PopulationHighCounts : - model::function_t::E_IndividualHighCounts; + } else if (outerToken == FUNCTION_HIGH_COUNT || outerToken == FUNCTION_HIGH_COUNT_ABBREV) { + function = isPopulation ? model::function_t::E_PopulationHighCounts : model::function_t::E_IndividualHighCounts; argumentInvalid = true; - } - else if (outerToken == FUNCTION_LOW_NON_ZERO_COUNT || - outerToken == FUNCTION_LOW_NON_ZERO_COUNT_ABBREV) - { + } else if (outerToken == FUNCTION_LOW_NON_ZERO_COUNT || outerToken == FUNCTION_LOW_NON_ZERO_COUNT_ABBREV) { function = model::function_t::E_IndividualLowNonZeroCount; argumentInvalid = true; - } - else if (outerToken == FUNCTION_HIGH_NON_ZERO_COUNT || - outerToken == FUNCTION_HIGH_NON_ZERO_COUNT_ABBREV) - { + } else if (outerToken == FUNCTION_HIGH_NON_ZERO_COUNT || outerToken == FUNCTION_HIGH_NON_ZERO_COUNT_ABBREV) { function = model::function_t::E_IndividualHighNonZeroCount; argumentInvalid = true; - } - else if (outerToken == FUNCTION_FREQ_RARE || - outerToken == FUNCTION_FREQ_RARE_ABBREV) - { + } else if (outerToken == FUNCTION_FREQ_RARE || outerToken == FUNCTION_FREQ_RARE_ABBREV) { function = model::function_t::E_PopulationFreqRare; argumentInvalid = true; byFieldRequired = true; - } - else if (outerToken == FUNCTION_FREQ_RARE_COUNT || - outerToken == FUNCTION_FREQ_RARE_COUNT_ABBREV) - { + } else if (outerToken == FUNCTION_FREQ_RARE_COUNT || outerToken == FUNCTION_FREQ_RARE_COUNT_ABBREV) { function = model::function_t::E_PopulationFreqRareCount; argumentInvalid = true; byFieldRequired = true; - } - else if (outerToken == FUNCTION_INFO_CONTENT) - { - function = isPopulation ? - model::function_t::E_PopulationInfoContent : - model::function_t::E_IndividualInfoContent; + } else if (outerToken == FUNCTION_INFO_CONTENT) { + function = isPopulation ? model::function_t::E_PopulationInfoContent : model::function_t::E_IndividualInfoContent; argumentRequired = true; - } - else if (outerToken == FUNCTION_LOW_INFO_CONTENT) - { - function = isPopulation ? - model::function_t::E_PopulationLowInfoContent : - model::function_t::E_IndividualLowInfoContent; + } else if (outerToken == FUNCTION_LOW_INFO_CONTENT) { + function = isPopulation ? model::function_t::E_PopulationLowInfoContent : model::function_t::E_IndividualLowInfoContent; argumentRequired = true; - } - else if (outerToken == FUNCTION_HIGH_INFO_CONTENT) - { - function = isPopulation ? - model::function_t::E_PopulationHighInfoContent : - model::function_t::E_IndividualHighInfoContent; + } else if (outerToken == FUNCTION_HIGH_INFO_CONTENT) { + function = isPopulation ? model::function_t::E_PopulationHighInfoContent : model::function_t::E_IndividualHighInfoContent; argumentRequired = true; - } - else if (outerToken == FUNCTION_METRIC) - { - if (haveSummaryCountField) - { - LOG_ERROR("Function " << outerToken << - "() cannot be used with a summary count field"); + } else if (outerToken == FUNCTION_METRIC) { + if (haveSummaryCountField) { + LOG_ERROR("Function " << outerToken << "() cannot be used with a summary count field"); return false; } - function = isPopulation ? - model::function_t::E_PopulationMetric : - model::function_t::E_IndividualMetric; + function = isPopulation ? model::function_t::E_PopulationMetric : model::function_t::E_IndividualMetric; argumentRequired = true; - } - else if (outerToken == FUNCTION_AVERAGE || - outerToken == FUNCTION_MEAN) - { - function = isPopulation ? - model::function_t::E_PopulationMetricMean : - model::function_t::E_IndividualMetricMean; + } else if (outerToken == FUNCTION_AVERAGE || outerToken == FUNCTION_MEAN) { + function = isPopulation ? model::function_t::E_PopulationMetricMean : model::function_t::E_IndividualMetricMean; argumentRequired = true; - } - else if (outerToken == FUNCTION_LOW_AVERAGE || - outerToken == FUNCTION_LOW_MEAN) - { - function = isPopulation ? - model::function_t::E_PopulationMetricLowMean : - model::function_t::E_IndividualMetricLowMean; + } else if (outerToken == FUNCTION_LOW_AVERAGE || outerToken == FUNCTION_LOW_MEAN) { + function = isPopulation ? model::function_t::E_PopulationMetricLowMean : model::function_t::E_IndividualMetricLowMean; argumentRequired = true; - } - else if (outerToken == FUNCTION_HIGH_AVERAGE || - outerToken == FUNCTION_HIGH_MEAN) - { - function = isPopulation ? - model::function_t::E_PopulationMetricHighMean : - model::function_t::E_IndividualMetricHighMean; + } else if (outerToken == FUNCTION_HIGH_AVERAGE || outerToken == FUNCTION_HIGH_MEAN) { + function = isPopulation ? model::function_t::E_PopulationMetricHighMean : model::function_t::E_IndividualMetricHighMean; argumentRequired = true; - } - else if (outerToken == FUNCTION_MEDIAN) - { - function = isPopulation ? - model::function_t::E_PopulationMetricMedian : - model::function_t::E_IndividualMetricMedian; + } else if (outerToken == FUNCTION_MEDIAN) { + function = isPopulation ? model::function_t::E_PopulationMetricMedian : model::function_t::E_IndividualMetricMedian; argumentRequired = true; - } - else if (outerToken == FUNCTION_LOW_MEDIAN) - { - function = isPopulation ? - model::function_t::E_PopulationMetricLowMedian : - model::function_t::E_IndividualMetricLowMedian; + } else if (outerToken == FUNCTION_LOW_MEDIAN) { + function = isPopulation ? model::function_t::E_PopulationMetricLowMedian : model::function_t::E_IndividualMetricLowMedian; argumentRequired = true; - } - else if (outerToken == FUNCTION_HIGH_MEDIAN) - { - function = isPopulation ? - model::function_t::E_PopulationMetricHighMedian : - model::function_t::E_IndividualMetricHighMedian; + } else if (outerToken == FUNCTION_HIGH_MEDIAN) { + function = isPopulation ? model::function_t::E_PopulationMetricHighMedian : model::function_t::E_IndividualMetricHighMedian; argumentRequired = true; - } - else if (outerToken == FUNCTION_MIN) - { - function = isPopulation ? - model::function_t::E_PopulationMetricMin : - model::function_t::E_IndividualMetricMin; + } else if (outerToken == FUNCTION_MIN) { + function = isPopulation ? model::function_t::E_PopulationMetricMin : model::function_t::E_IndividualMetricMin; argumentRequired = true; - } - else if (outerToken == FUNCTION_MAX) - { - function = isPopulation ? - model::function_t::E_PopulationMetricMax : - model::function_t::E_IndividualMetricMax; + } else if (outerToken == FUNCTION_MAX) { + function = isPopulation ? model::function_t::E_PopulationMetricMax : model::function_t::E_IndividualMetricMax; argumentRequired = true; - } - else if (outerToken == FUNCTION_VARIANCE) - { - function = isPopulation ? - model::function_t::E_PopulationMetricVariance : - model::function_t::E_IndividualMetricVariance; + } else if (outerToken == FUNCTION_VARIANCE) { + function = isPopulation ? model::function_t::E_PopulationMetricVariance : model::function_t::E_IndividualMetricVariance; argumentRequired = true; - } - else if (outerToken == FUNCTION_LOW_VARIANCE) - { - function = isPopulation ? - model::function_t::E_PopulationMetricLowVariance : - model::function_t::E_IndividualMetricLowVariance; + } else if (outerToken == FUNCTION_LOW_VARIANCE) { + function = isPopulation ? model::function_t::E_PopulationMetricLowVariance : model::function_t::E_IndividualMetricLowVariance; argumentRequired = true; - } - else if (outerToken == FUNCTION_HIGH_VARIANCE) - { - function = isPopulation ? - model::function_t::E_PopulationMetricHighVariance : - model::function_t::E_IndividualMetricHighVariance; + } else if (outerToken == FUNCTION_HIGH_VARIANCE) { + function = isPopulation ? model::function_t::E_PopulationMetricHighVariance : model::function_t::E_IndividualMetricHighVariance; argumentRequired = true; - } - else if (outerToken == FUNCTION_SUM) - { - function = isPopulation ? - model::function_t::E_PopulationMetricSum : - model::function_t::E_IndividualMetricSum; + } else if (outerToken == FUNCTION_SUM) { + function = isPopulation ? model::function_t::E_PopulationMetricSum : model::function_t::E_IndividualMetricSum; argumentRequired = true; - } - else if (outerToken == FUNCTION_LOW_SUM) - { - function = isPopulation ? - model::function_t::E_PopulationMetricLowSum : - model::function_t::E_IndividualMetricLowSum; + } else if (outerToken == FUNCTION_LOW_SUM) { + function = isPopulation ? model::function_t::E_PopulationMetricLowSum : model::function_t::E_IndividualMetricLowSum; argumentRequired = true; - } - else if (outerToken == FUNCTION_HIGH_SUM) - { - function = isPopulation ? - model::function_t::E_PopulationMetricHighSum : - model::function_t::E_IndividualMetricHighSum; + } else if (outerToken == FUNCTION_HIGH_SUM) { + function = isPopulation ? model::function_t::E_PopulationMetricHighSum : model::function_t::E_IndividualMetricHighSum; argumentRequired = true; - } - else if (outerToken == FUNCTION_NON_NULL_SUM || - outerToken == FUNCTION_NON_NULL_SUM_ABBREV) - { + } else if (outerToken == FUNCTION_NON_NULL_SUM || outerToken == FUNCTION_NON_NULL_SUM_ABBREV) { function = model::function_t::E_IndividualMetricNonNullSum; argumentRequired = true; - } - else if (outerToken == FUNCTION_LOW_NON_NULL_SUM || - outerToken == FUNCTION_LOW_NON_NULL_SUM_ABBREV) - { + } else if (outerToken == FUNCTION_LOW_NON_NULL_SUM || outerToken == FUNCTION_LOW_NON_NULL_SUM_ABBREV) { function = model::function_t::E_IndividualMetricLowNonNullSum; argumentRequired = true; - } - else if (outerToken == FUNCTION_HIGH_NON_NULL_SUM || - outerToken == FUNCTION_HIGH_NON_NULL_SUM_ABBREV) - { + } else if (outerToken == FUNCTION_HIGH_NON_NULL_SUM || outerToken == FUNCTION_HIGH_NON_NULL_SUM_ABBREV) { function = model::function_t::E_IndividualMetricHighNonNullSum; argumentRequired = true; - } - else if (outerToken == FUNCTION_TIME_OF_DAY) - { - function = isPopulation ? - model::function_t::E_PopulationTimeOfDay : - model::function_t::E_IndividualTimeOfDay; + } else if (outerToken == FUNCTION_TIME_OF_DAY) { + function = isPopulation ? model::function_t::E_PopulationTimeOfDay : model::function_t::E_IndividualTimeOfDay; argumentRequired = false; argumentInvalid = true; - } - else if (outerToken == FUNCTION_TIME_OF_WEEK) - { - function = isPopulation ? - model::function_t::E_PopulationTimeOfWeek : - model::function_t::E_IndividualTimeOfWeek; + } else if (outerToken == FUNCTION_TIME_OF_WEEK) { + function = isPopulation ? model::function_t::E_PopulationTimeOfWeek : model::function_t::E_IndividualTimeOfWeek; argumentRequired = false; argumentInvalid = true; - } - else if (outerToken == FUNCTION_LAT_LONG) - { - function = isPopulation ? - model::function_t::E_PopulationLatLong: - model::function_t::E_IndividualLatLong; + } else if (outerToken == FUNCTION_LAT_LONG) { + function = isPopulation ? model::function_t::E_PopulationLatLong : model::function_t::E_IndividualLatLong; argumentRequired = true; - } - else if (outerToken == FUNCTION_MAX_VELOCITY) - { - function = isPopulation ? - model::function_t::E_PopulationMaxVelocity: - model::function_t::E_IndividualMaxVelocity; + } else if (outerToken == FUNCTION_MAX_VELOCITY) { + function = isPopulation ? model::function_t::E_PopulationMaxVelocity : model::function_t::E_IndividualMaxVelocity; argumentRequired = true; - } - else if (outerToken == FUNCTION_MIN_VELOCITY) - { - function = isPopulation ? - model::function_t::E_PopulationMinVelocity: - model::function_t::E_IndividualMinVelocity; + } else if (outerToken == FUNCTION_MIN_VELOCITY) { + function = isPopulation ? model::function_t::E_PopulationMinVelocity : model::function_t::E_IndividualMinVelocity; argumentRequired = true; - } - else if (outerToken == FUNCTION_MEAN_VELOCITY) - { - function = isPopulation ? - model::function_t::E_PopulationMeanVelocity: - model::function_t::E_IndividualMeanVelocity; + } else if (outerToken == FUNCTION_MEAN_VELOCITY) { + function = isPopulation ? model::function_t::E_PopulationMeanVelocity : model::function_t::E_IndividualMeanVelocity; argumentRequired = true; - } - else if (outerToken == FUNCTION_SUM_VELOCITY) - { - function = isPopulation ? - model::function_t::E_PopulationSumVelocity: - model::function_t::E_IndividualSumVelocity; + } else if (outerToken == FUNCTION_SUM_VELOCITY) { + function = isPopulation ? model::function_t::E_PopulationSumVelocity : model::function_t::E_IndividualSumVelocity; argumentRequired = true; - } - else - { + } else { // We expect an individual metric here, but if the original string // contained brackets then there's probably been a typo because a metric // name should not be followed by brackets - if (str.find('(') != std::string::npos) - { + if (str.find('(') != std::string::npos) { LOG_ERROR(outerToken << "() is not a known function"); return false; } - if (haveSummaryCountField) - { + if (haveSummaryCountField) { LOG_ERROR("Implicit function metric() cannot be " "used with a summary count field"); return false; } - function = isPopulation ? - model::function_t::E_PopulationMetric : - model::function_t::E_IndividualMetric; + function = isPopulation ? model::function_t::E_PopulationMetric : model::function_t::E_IndividualMetric; // This is inconsistent notation, but kept for backwards compatibility fieldName = outerToken; @@ -1270,41 +893,32 @@ bool CFieldConfig::parseFieldString(bool haveSummaryCountField, } // Validate - if (model::function_t::isPopulation(function) && !isPopulation) - { - LOG_ERROR("Function " << outerToken << - "() requires an 'over' field"); + if (model::function_t::isPopulation(function) && !isPopulation) { + LOG_ERROR("Function " << outerToken << "() requires an 'over' field"); return false; } - if (isPopulation && !model::function_t::isPopulation(function)) - { - LOG_ERROR("Function " << outerToken << - "() cannot be used with an 'over' field"); + if (isPopulation && !model::function_t::isPopulation(function)) { + LOG_ERROR("Function " << outerToken << "() cannot be used with an 'over' field"); return false; } - if (byFieldRequired && !hasByField) - { + if (byFieldRequired && !hasByField) { LOG_ERROR("Function " << outerToken << "() requires a 'by' field"); return false; } - if (byFieldInvalid && hasByField) - { - LOG_ERROR("Function " << outerToken << - "() cannot be used with a 'by' field"); + if (byFieldInvalid && hasByField) { + LOG_ERROR("Function " << outerToken << "() cannot be used with a 'by' field"); return false; } - if (argumentRequired && innerToken.empty()) - { + if (argumentRequired && innerToken.empty()) { LOG_ERROR("Function " << outerToken << "() requires an argument"); return false; } - if (argumentInvalid && !innerToken.empty()) - { + if (argumentInvalid && !innerToken.empty()) { LOG_ERROR("Function " << outerToken << "() does not take an argument"); return false; } @@ -1314,10 +928,8 @@ bool CFieldConfig::parseFieldString(bool haveSummaryCountField, return true; } -void CFieldConfig::seenField(const std::string &fieldName) -{ - if (fieldName.empty()) - { +void CFieldConfig::seenField(const std::string& fieldName) { + if (fieldName.empty()) { return; } @@ -1325,16 +937,13 @@ void CFieldConfig::seenField(const std::string &fieldName) m_FieldNameSuperset.insert(fieldName); } -std::string CFieldConfig::debug() const -{ +std::string CFieldConfig::debug() const { std::ostringstream strm; bool needLineBreak(false); - if (!m_FieldOptions.empty()) - { - if (needLineBreak) - { + if (!m_FieldOptions.empty()) { + if (needLineBreak) { strm << core_t::LINE_ENDING; } this->debug(m_FieldOptions, strm); @@ -1343,47 +952,34 @@ std::string CFieldConfig::debug() const return strm.str(); } -void CFieldConfig::debug(const TFieldOptionsMIndex &fieldOptions, - std::ostream &strm) const -{ - for (const auto &fieldOption : fieldOptions) - { +void CFieldConfig::debug(const TFieldOptionsMIndex& fieldOptions, std::ostream& strm) const { + for (const auto& fieldOption : fieldOptions) { strm << fieldOption << '|'; } } -bool CFieldConfig::decipherExcludeFrequentSetting(const std::string &excludeFrequentString, +bool CFieldConfig::decipherExcludeFrequentSetting(const std::string& excludeFrequentString, bool hasByField, bool isPopulation, - bool &byExcludeFrequent, - bool &overExcludeFrequent) -{ + bool& byExcludeFrequent, + bool& overExcludeFrequent) { byExcludeFrequent = false; overExcludeFrequent = false; - if (!excludeFrequentString.empty()) - { + if (!excludeFrequentString.empty()) { if (excludeFrequentString.length() == ALL_TOKEN.length() && - core::CStrCaseCmp::strCaseCmp(excludeFrequentString.c_str(), ALL_TOKEN.c_str()) == 0) - { + core::CStrCaseCmp::strCaseCmp(excludeFrequentString.c_str(), ALL_TOKEN.c_str()) == 0) { byExcludeFrequent = hasByField; overExcludeFrequent = isPopulation; - } - else if (excludeFrequentString.length() == BY_TOKEN.length() && - core::CStrCaseCmp::strCaseCmp(excludeFrequentString.c_str(), BY_TOKEN.c_str()) == 0) - { + } else if (excludeFrequentString.length() == BY_TOKEN.length() && + core::CStrCaseCmp::strCaseCmp(excludeFrequentString.c_str(), BY_TOKEN.c_str()) == 0) { byExcludeFrequent = hasByField; - } - else if (excludeFrequentString.length() == OVER_TOKEN.length() && - core::CStrCaseCmp::strCaseCmp(excludeFrequentString.c_str(), OVER_TOKEN.c_str()) == 0) - { + } else if (excludeFrequentString.length() == OVER_TOKEN.length() && + core::CStrCaseCmp::strCaseCmp(excludeFrequentString.c_str(), OVER_TOKEN.c_str()) == 0) { overExcludeFrequent = isPopulation; - } - else - { + } else { if (excludeFrequentString.length() != NONE_TOKEN.length() || - core::CStrCaseCmp::strCaseCmp(excludeFrequentString.c_str(), NONE_TOKEN.c_str()) != 0) - { + core::CStrCaseCmp::strCaseCmp(excludeFrequentString.c_str(), NONE_TOKEN.c_str()) != 0) { LOG_ERROR("Unexpected excludeFrequent value = " << excludeFrequentString); return false; } @@ -1393,76 +989,57 @@ bool CFieldConfig::decipherExcludeFrequentSetting(const std::string &excludeFreq return true; } -void CFieldConfig::addInfluencerFieldsFromByOverPartitionFields() -{ +void CFieldConfig::addInfluencerFieldsFromByOverPartitionFields() { this->addInfluencerFieldsFromByOverPartitionFields(m_FieldOptions); } -void CFieldConfig::addInfluencerFieldsFromByOverPartitionFields(const TFieldOptionsMIndex &fieldOptions) -{ - for (const auto &fieldOption : fieldOptions) - { +void CFieldConfig::addInfluencerFieldsFromByOverPartitionFields(const TFieldOptionsMIndex& fieldOptions) { + for (const auto& fieldOption : fieldOptions) { this->addInfluencerFieldName(fieldOption.byFieldName(), true); this->addInfluencerFieldName(fieldOption.overFieldName(), true); this->addInfluencerFieldName(fieldOption.partitionFieldName(), true); } } -const CFieldConfig::TStrVec &CFieldConfig::influencerFieldNames() const -{ +const CFieldConfig::TStrVec& CFieldConfig::influencerFieldNames() const { return m_Influencers; } -const CFieldConfig::TIntDetectionRuleVecUMap &CFieldConfig::detectionRules() const -{ +const CFieldConfig::TIntDetectionRuleVecUMap& CFieldConfig::detectionRules() const { return m_DetectorRules; } -const CFieldConfig::TStrDetectionRulePrVec &CFieldConfig::scheduledEvents() const -{ +const CFieldConfig::TStrDetectionRulePrVec& CFieldConfig::scheduledEvents() const { return m_ScheduledEvents; } -void CFieldConfig::influencerFieldNames(TStrVec influencers) -{ +void CFieldConfig::influencerFieldNames(TStrVec influencers) { LOG_DEBUG("Set influencers : " << core::CContainerPrinter::print(influencers)); - std::for_each(influencers.begin(), - influencers.end(), - boost::bind(&CFieldConfig::seenField, this, _1)); + std::for_each(influencers.begin(), influencers.end(), boost::bind(&CFieldConfig::seenField, this, _1)); m_Influencers.swap(influencers); } -void CFieldConfig::addInfluencerFieldName(const std::string &influencer, - bool quiet) -{ - if (influencer.empty()) - { - if (!quiet) - { +void CFieldConfig::addInfluencerFieldName(const std::string& influencer, bool quiet) { + if (influencer.empty()) { + if (!quiet) { LOG_WARN("Ignoring blank influencer field"); } return; } - if (std::find(m_Influencers.begin(), - m_Influencers.end(), - influencer) == m_Influencers.end()) - { + if (std::find(m_Influencers.begin(), m_Influencers.end(), influencer) == m_Influencers.end()) { LOG_TRACE("Add influencer : " << influencer); this->seenField(influencer); m_Influencers.push_back(influencer); } } -void CFieldConfig::sortInfluencers() -{ +void CFieldConfig::sortInfluencers() { std::sort(m_Influencers.begin(), m_Influencers.end()); } -void CFieldConfig::addCategorizationFilter(const std::string &filter) -{ - if (filter.empty()) - { +void CFieldConfig::addCategorizationFilter(const std::string& filter) { + if (filter.empty()) { LOG_WARN("Ignoring blank categorization filter"); return; } @@ -1470,90 +1047,75 @@ void CFieldConfig::addCategorizationFilter(const std::string &filter) TStrVec tokens; this->tokenise(filter, tokens); - if (tokens.size() != 1) - { - LOG_ERROR("Unexpected number of tokens: " << tokens.size() - << "; ignoring categorization filter: " << filter); + if (tokens.size() != 1) { + LOG_ERROR("Unexpected number of tokens: " << tokens.size() << "; ignoring categorization filter: " << filter); return; } m_CategorizationFilters.push_back(tokens[0]); } -bool CFieldConfig::processFilter(const std::string &key, - const std::string &value) -{ +bool CFieldConfig::processFilter(const std::string& key, const std::string& value) { // expected format is filter.=[json, array] size_t sepPos(key.find(SUFFIX_SEPARATOR)); - if (sepPos == std::string::npos) - { + if (sepPos == std::string::npos) { LOG_ERROR("Unrecognised filter key: " + key); return false; } std::string filterId = key.substr(sepPos + 1); - core::CPatternSet &filter = m_RuleFilters[filterId]; + core::CPatternSet& filter = m_RuleFilters[filterId]; return filter.initFromJson(value); } -bool CFieldConfig::updateFilters(const boost::property_tree::ptree &propTree) -{ - for (const auto &filterEntry : propTree) - { - const std::string &key = filterEntry.first; - const std::string &value = filterEntry.second.data(); - if (this->processFilter(key, value) == false) - { +bool CFieldConfig::updateFilters(const boost::property_tree::ptree& propTree) { + for (const auto& filterEntry : propTree) { + const std::string& key = filterEntry.first; + const std::string& value = filterEntry.second.data(); + if (this->processFilter(key, value) == false) { return false; } } return true; } -bool CFieldConfig::processScheduledEvent(const boost::property_tree::ptree &propTree, - const std::string &key, - const std::string &value, - TIntSet &handledScheduledEvents) -{ +bool CFieldConfig::processScheduledEvent(const boost::property_tree::ptree& propTree, + const std::string& key, + const std::string& value, + TIntSet& handledScheduledEvents) { // Here we pull out the "1" in "scheduledevent.1.description" // description may contain a '.' size_t sepPos(key.find(SUFFIX_SEPARATOR, SCHEDULED_EVENT_PREFIX.length() + 1)); - if (sepPos == std::string::npos || - sepPos == key.length() - 1) - { + if (sepPos == std::string::npos || sepPos == key.length() - 1) { LOG_ERROR("Unrecognised configuration option " << key << " = " << value); return false; } std::string indexString(key, SCHEDULED_EVENT_PREFIX.length(), sepPos - SCHEDULED_EVENT_PREFIX.length()); int indexKey; - if (core::CStringUtils::stringToType(indexString, indexKey) == false) - { + if (core::CStringUtils::stringToType(indexString, indexKey) == false) { LOG_ERROR("Cannot convert config key to integer: " << indexString); return false; } // Check if we've already seen this key - if (handledScheduledEvents.insert(indexKey).second == false) - { + if (handledScheduledEvents.insert(indexKey).second == false) { // Not an error return true; } - std::string description(propTree.get(boost::property_tree::ptree::path_type(SCHEDULED_EVENT_PREFIX + indexString + DESCRIPTION_SUFFIX, '\t'), - EMPTY_STRING)); + std::string description(propTree.get( + boost::property_tree::ptree::path_type(SCHEDULED_EVENT_PREFIX + indexString + DESCRIPTION_SUFFIX, '\t'), EMPTY_STRING)); - std::string rules(propTree.get(boost::property_tree::ptree::path_type(SCHEDULED_EVENT_PREFIX + indexString + RULES_SUFFIX, '\t'), - EMPTY_STRING)); + std::string rules( + propTree.get(boost::property_tree::ptree::path_type(SCHEDULED_EVENT_PREFIX + indexString + RULES_SUFFIX, '\t'), EMPTY_STRING)); TDetectionRuleVec detectionRules; - if (this->parseRules(detectionRules, rules) == false) - { + if (this->parseRules(detectionRules, rules) == false) { // parseRules() will have logged the error return false; } - if (detectionRules.size() != 1) - { + if (detectionRules.size() != 1) { LOG_ERROR("Scheduled events must have exactly 1 rule"); return false; } @@ -1563,96 +1125,78 @@ bool CFieldConfig::processScheduledEvent(const boost::property_tree::ptree &prop return true; } -bool CFieldConfig::updateScheduledEvents(const boost::property_tree::ptree &propTree) -{ +bool CFieldConfig::updateScheduledEvents(const boost::property_tree::ptree& propTree) { m_ScheduledEvents.clear(); bool isClear = propTree.get(CLEAR, false); - if (isClear) - { + if (isClear) { return true; } TIntSet handledScheduledEvents; - for (const auto &scheduledEventEntry : propTree) - { - const std::string &key = scheduledEventEntry.first; - const std::string &value = scheduledEventEntry.second.data(); - if (this->processScheduledEvent(propTree, key, value, handledScheduledEvents) == false) - { + for (const auto& scheduledEventEntry : propTree) { + const std::string& key = scheduledEventEntry.first; + const std::string& value = scheduledEventEntry.second.data(); + if (this->processScheduledEvent(propTree, key, value, handledScheduledEvents) == false) { return false; } } return true; } -CFieldConfig::CFieldOptions::CFieldOptions(const std::string &fieldName, - int configKey) - : m_Function(fieldName == COUNT_NAME ? - model::function_t::E_IndividualRareCount : - model::function_t::E_IndividualMetric), +CFieldConfig::CFieldOptions::CFieldOptions(const std::string& fieldName, int configKey) + : m_Function(fieldName == COUNT_NAME ? model::function_t::E_IndividualRareCount : model::function_t::E_IndividualMetric), m_FieldName(fieldName), m_ConfigKey(configKey), m_ByHasExcludeFrequent(false), m_OverHasExcludeFrequent(false), - m_UseNull(true) -{ + m_UseNull(true) { } -CFieldConfig::CFieldOptions::CFieldOptions(const std::string &fieldName, +CFieldConfig::CFieldOptions::CFieldOptions(const std::string& fieldName, int configKey, - const std::string &byFieldName, + const std::string& byFieldName, bool byHasExcludeFrequent, bool useNull) // For historical reasons, the only function name we interpret in this // constructor is "count" - every other word is considered to be a metric // field name. - : m_Function(fieldName == COUNT_NAME ? - model::function_t::E_IndividualRareCount : - model::function_t::E_IndividualMetric), - m_FieldName(fieldName == COUNT_NAME ? - EMPTY_STRING : - fieldName), + : m_Function(fieldName == COUNT_NAME ? model::function_t::E_IndividualRareCount : model::function_t::E_IndividualMetric), + m_FieldName(fieldName == COUNT_NAME ? EMPTY_STRING : fieldName), m_ConfigKey(configKey), m_ByFieldName(byFieldName), m_ByHasExcludeFrequent(byHasExcludeFrequent), m_OverHasExcludeFrequent(false), - m_UseNull(useNull) -{ + m_UseNull(useNull) { } -CFieldConfig::CFieldOptions::CFieldOptions(const std::string &fieldName, +CFieldConfig::CFieldOptions::CFieldOptions(const std::string& fieldName, int configKey, - const std::string &byFieldName, - const std::string &partitionFieldName, + const std::string& byFieldName, + const std::string& partitionFieldName, bool byHasExcludeFrequent, bool overHasExcludeFrequent, bool useNull) // For historical reasons, the only function name we interpret in this // constructor is "count" - every other word is considered to be a metric // field name. - : m_Function(fieldName == COUNT_NAME ? - model::function_t::E_IndividualRareCount : - model::function_t::E_IndividualMetric), - m_FieldName(fieldName == COUNT_NAME ? - EMPTY_STRING : - fieldName), + : m_Function(fieldName == COUNT_NAME ? model::function_t::E_IndividualRareCount : model::function_t::E_IndividualMetric), + m_FieldName(fieldName == COUNT_NAME ? EMPTY_STRING : fieldName), m_ConfigKey(configKey), m_ByFieldName(byFieldName), m_PartitionFieldName(partitionFieldName), m_ByHasExcludeFrequent(byHasExcludeFrequent), m_OverHasExcludeFrequent(overHasExcludeFrequent), - m_UseNull(useNull) -{ + m_UseNull(useNull) { } CFieldConfig::CFieldOptions::CFieldOptions(model::function_t::EFunction function, - const std::string &fieldName, + const std::string& fieldName, int configKey, - const std::string &byFieldName, - const std::string &overFieldName, - const std::string &partitionFieldName, + const std::string& byFieldName, + const std::string& overFieldName, + const std::string& partitionFieldName, bool byHasExcludeFrequent, bool overHasExcludeFrequent, bool useNull) @@ -1664,491 +1208,456 @@ CFieldConfig::CFieldOptions::CFieldOptions(model::function_t::EFunction function m_PartitionFieldName(partitionFieldName), m_ByHasExcludeFrequent(byHasExcludeFrequent), m_OverHasExcludeFrequent(overHasExcludeFrequent), - m_UseNull(useNull) -{ + m_UseNull(useNull) { } -void CFieldConfig::CFieldOptions::description(std::string description) -{ +void CFieldConfig::CFieldOptions::description(std::string description) { m_Description.swap(description); } -const std::string &CFieldConfig::CFieldOptions::description() const -{ +const std::string& CFieldConfig::CFieldOptions::description() const { return m_Description; } -model::function_t::EFunction CFieldConfig::CFieldOptions::function() const -{ +model::function_t::EFunction CFieldConfig::CFieldOptions::function() const { return m_Function; } -const std::string &CFieldConfig::CFieldOptions::fieldName() const -{ +const std::string& CFieldConfig::CFieldOptions::fieldName() const { return m_FieldName; } -int CFieldConfig::CFieldOptions::configKey() const -{ +int CFieldConfig::CFieldOptions::configKey() const { return m_ConfigKey; } -const std::string &CFieldConfig::CFieldOptions::byFieldName() const -{ +const std::string& CFieldConfig::CFieldOptions::byFieldName() const { return m_ByFieldName; } -const std::string &CFieldConfig::CFieldOptions::overFieldName() const -{ +const std::string& CFieldConfig::CFieldOptions::overFieldName() const { return m_OverFieldName; } -const std::string &CFieldConfig::CFieldOptions::partitionFieldName() const -{ +const std::string& CFieldConfig::CFieldOptions::partitionFieldName() const { return m_PartitionFieldName; } -bool CFieldConfig::CFieldOptions::useNull() const -{ +bool CFieldConfig::CFieldOptions::useNull() const { return m_UseNull; } -model_t::EExcludeFrequent CFieldConfig::CFieldOptions::excludeFrequent() const -{ - if (m_OverHasExcludeFrequent) - { - if (m_ByHasExcludeFrequent) - { +model_t::EExcludeFrequent CFieldConfig::CFieldOptions::excludeFrequent() const { + if (m_OverHasExcludeFrequent) { + if (m_ByHasExcludeFrequent) { return model_t::E_XF_Both; - } - else - { + } else { return model_t::E_XF_Over; } - } - else - { - if (m_ByHasExcludeFrequent) - { + } else { + if (m_ByHasExcludeFrequent) { return model_t::E_XF_By; } } return model_t::E_XF_None; } -const std::string &CFieldConfig::CFieldOptions::terseFunctionName() const -{ - switch (m_Function) - { - case model::function_t::E_IndividualCount: - // For backwards compatibility the "count" function name maps to - // E_IndividualRareCount, not E_IndividualCount as you might think - return EMPTY_STRING; - case model::function_t::E_IndividualNonZeroCount: - return FUNCTION_NON_ZERO_COUNT_ABBREV; - case model::function_t::E_IndividualRareCount: - // For backwards compatibility the "count" function name maps to - // E_IndividualRareCount, not E_IndividualCount as you might think - return FUNCTION_COUNT_ABBREV; - case model::function_t::E_IndividualRareNonZeroCount: - return FUNCTION_RARE_NON_ZERO_COUNT_ABBREV; - case model::function_t::E_IndividualRare: - return FUNCTION_RARE; - case model::function_t::E_IndividualLowCounts: - return FUNCTION_LOW_COUNT_ABBREV; - case model::function_t::E_IndividualHighCounts: - return FUNCTION_HIGH_COUNT_ABBREV; - case model::function_t::E_IndividualLowNonZeroCount: - return FUNCTION_LOW_NON_ZERO_COUNT; - case model::function_t::E_IndividualHighNonZeroCount: - return FUNCTION_HIGH_NON_ZERO_COUNT; - case model::function_t::E_IndividualDistinctCount: - return FUNCTION_DISTINCT_COUNT_ABBREV; - case model::function_t::E_IndividualLowDistinctCount: - return FUNCTION_LOW_DISTINCT_COUNT_ABBREV; - case model::function_t::E_IndividualHighDistinctCount: - return FUNCTION_HIGH_DISTINCT_COUNT_ABBREV; - case model::function_t::E_IndividualInfoContent: - return FUNCTION_INFO_CONTENT; - case model::function_t::E_IndividualLowInfoContent: - return FUNCTION_LOW_INFO_CONTENT; - case model::function_t::E_IndividualHighInfoContent: - return FUNCTION_HIGH_INFO_CONTENT; - case model::function_t::E_IndividualTimeOfDay: - return FUNCTION_TIME_OF_DAY; - case model::function_t::E_IndividualTimeOfWeek: - return FUNCTION_TIME_OF_WEEK; - case model::function_t::E_IndividualMetric: - return FUNCTION_METRIC; - case model::function_t::E_IndividualMetricMean: - return FUNCTION_AVERAGE; - case model::function_t::E_IndividualMetricLowMean: - return FUNCTION_LOW_MEAN; - case model::function_t::E_IndividualMetricHighMean: - return FUNCTION_HIGH_MEAN; - case model::function_t::E_IndividualMetricMedian: - return FUNCTION_MEDIAN; - case model::function_t::E_IndividualMetricLowMedian: - return FUNCTION_LOW_MEDIAN; - case model::function_t::E_IndividualMetricHighMedian: - return FUNCTION_HIGH_MEDIAN; - case model::function_t::E_IndividualMetricMin: - return FUNCTION_MIN; - case model::function_t::E_IndividualMetricMax: - return FUNCTION_MAX; - case model::function_t::E_IndividualMetricVariance: - return FUNCTION_VARIANCE; - case model::function_t::E_IndividualMetricLowVariance: - return FUNCTION_LOW_VARIANCE; - case model::function_t::E_IndividualMetricHighVariance: - return FUNCTION_HIGH_VARIANCE; - case model::function_t::E_IndividualMetricSum: - return FUNCTION_SUM; - case model::function_t::E_IndividualMetricLowSum: - return FUNCTION_LOW_SUM; - case model::function_t::E_IndividualMetricHighSum: - return FUNCTION_HIGH_SUM; - case model::function_t::E_IndividualMetricNonNullSum: - return FUNCTION_NON_NULL_SUM_ABBREV; - case model::function_t::E_IndividualMetricLowNonNullSum: - return FUNCTION_LOW_NON_NULL_SUM_ABBREV; - case model::function_t::E_IndividualMetricHighNonNullSum: - return FUNCTION_HIGH_NON_NULL_SUM_ABBREV; - case model::function_t::E_IndividualLatLong: - return FUNCTION_LAT_LONG; - case model::function_t::E_IndividualMinVelocity: - return FUNCTION_MIN_VELOCITY; - case model::function_t::E_IndividualMaxVelocity: - return FUNCTION_MAX_VELOCITY; - case model::function_t::E_IndividualMeanVelocity: - return FUNCTION_MEAN_VELOCITY; - case model::function_t::E_IndividualSumVelocity: - return FUNCTION_SUM_VELOCITY; - case model::function_t::E_PopulationCount: - return FUNCTION_COUNT_ABBREV; - case model::function_t::E_PopulationDistinctCount: - return FUNCTION_DISTINCT_COUNT_ABBREV; - case model::function_t::E_PopulationLowDistinctCount: - return FUNCTION_LOW_DISTINCT_COUNT_ABBREV; - case model::function_t::E_PopulationHighDistinctCount: - return FUNCTION_HIGH_DISTINCT_COUNT_ABBREV; - case model::function_t::E_PopulationRare: - return FUNCTION_RARE; - case model::function_t::E_PopulationRareCount: - return FUNCTION_RARE_COUNT; - case model::function_t::E_PopulationFreqRare: - return FUNCTION_FREQ_RARE_ABBREV; - case model::function_t::E_PopulationFreqRareCount: - return FUNCTION_FREQ_RARE_COUNT_ABBREV; - case model::function_t::E_PopulationLowCounts: - return FUNCTION_LOW_COUNT_ABBREV; - case model::function_t::E_PopulationHighCounts: - return FUNCTION_HIGH_COUNT_ABBREV; - case model::function_t::E_PopulationInfoContent: - return FUNCTION_INFO_CONTENT; - case model::function_t::E_PopulationLowInfoContent: - return FUNCTION_LOW_INFO_CONTENT; - case model::function_t::E_PopulationHighInfoContent: - return FUNCTION_HIGH_INFO_CONTENT; - case model::function_t::E_PopulationTimeOfDay: - return FUNCTION_TIME_OF_DAY; - case model::function_t::E_PopulationTimeOfWeek: - return FUNCTION_TIME_OF_WEEK; - case model::function_t::E_PopulationMetric: - return FUNCTION_METRIC; - case model::function_t::E_PopulationMetricMean: - return FUNCTION_AVERAGE; - case model::function_t::E_PopulationMetricLowMean: - return FUNCTION_LOW_MEAN; - case model::function_t::E_PopulationMetricHighMean: - return FUNCTION_HIGH_MEAN; - case model::function_t::E_PopulationMetricMedian: - return FUNCTION_MEDIAN; - case model::function_t::E_PopulationMetricLowMedian: - return FUNCTION_LOW_MEDIAN; - case model::function_t::E_PopulationMetricHighMedian: - return FUNCTION_HIGH_MEDIAN; - case model::function_t::E_PopulationMetricMin: - return FUNCTION_MIN; - case model::function_t::E_PopulationMetricMax: - return FUNCTION_MAX; - case model::function_t::E_PopulationMetricSum: - return FUNCTION_SUM; - case model::function_t::E_PopulationMetricVariance: - return FUNCTION_VARIANCE; - case model::function_t::E_PopulationMetricLowVariance: - return FUNCTION_LOW_VARIANCE; - case model::function_t::E_PopulationMetricHighVariance: - return FUNCTION_HIGH_VARIANCE; - case model::function_t::E_PopulationMetricLowSum: - return FUNCTION_LOW_SUM; - case model::function_t::E_PopulationMetricHighSum: - return FUNCTION_HIGH_SUM; - case model::function_t::E_PopulationLatLong: - return FUNCTION_LAT_LONG; - case model::function_t::E_PopulationMinVelocity: - return FUNCTION_MIN_VELOCITY; - case model::function_t::E_PopulationMaxVelocity: - return FUNCTION_MAX_VELOCITY; - case model::function_t::E_PopulationMeanVelocity: - return FUNCTION_MEAN_VELOCITY; - case model::function_t::E_PopulationSumVelocity: - return FUNCTION_SUM_VELOCITY; - case model::function_t::E_PeersCount: - return FUNCTION_COUNT_ABBREV; - case model::function_t::E_PeersLowCounts: - return FUNCTION_LOW_COUNT_ABBREV; - case model::function_t::E_PeersHighCounts: - return FUNCTION_HIGH_COUNT_ABBREV; - case model::function_t::E_PeersDistinctCount: - return FUNCTION_DISTINCT_COUNT_ABBREV; - case model::function_t::E_PeersLowDistinctCount: - return FUNCTION_LOW_DISTINCT_COUNT_ABBREV; - case model::function_t::E_PeersHighDistinctCount: - return FUNCTION_HIGH_DISTINCT_COUNT_ABBREV; - case model::function_t::E_PeersInfoContent: - return FUNCTION_INFO_CONTENT; - case model::function_t::E_PeersLowInfoContent: - return FUNCTION_LOW_INFO_CONTENT; - case model::function_t::E_PeersHighInfoContent: - return FUNCTION_HIGH_INFO_CONTENT; - case model::function_t::E_PeersTimeOfDay: - return FUNCTION_TIME_OF_DAY; - case model::function_t::E_PeersTimeOfWeek: - return FUNCTION_TIME_OF_WEEK; +const std::string& CFieldConfig::CFieldOptions::terseFunctionName() const { + switch (m_Function) { + case model::function_t::E_IndividualCount: + // For backwards compatibility the "count" function name maps to + // E_IndividualRareCount, not E_IndividualCount as you might think + return EMPTY_STRING; + case model::function_t::E_IndividualNonZeroCount: + return FUNCTION_NON_ZERO_COUNT_ABBREV; + case model::function_t::E_IndividualRareCount: + // For backwards compatibility the "count" function name maps to + // E_IndividualRareCount, not E_IndividualCount as you might think + return FUNCTION_COUNT_ABBREV; + case model::function_t::E_IndividualRareNonZeroCount: + return FUNCTION_RARE_NON_ZERO_COUNT_ABBREV; + case model::function_t::E_IndividualRare: + return FUNCTION_RARE; + case model::function_t::E_IndividualLowCounts: + return FUNCTION_LOW_COUNT_ABBREV; + case model::function_t::E_IndividualHighCounts: + return FUNCTION_HIGH_COUNT_ABBREV; + case model::function_t::E_IndividualLowNonZeroCount: + return FUNCTION_LOW_NON_ZERO_COUNT; + case model::function_t::E_IndividualHighNonZeroCount: + return FUNCTION_HIGH_NON_ZERO_COUNT; + case model::function_t::E_IndividualDistinctCount: + return FUNCTION_DISTINCT_COUNT_ABBREV; + case model::function_t::E_IndividualLowDistinctCount: + return FUNCTION_LOW_DISTINCT_COUNT_ABBREV; + case model::function_t::E_IndividualHighDistinctCount: + return FUNCTION_HIGH_DISTINCT_COUNT_ABBREV; + case model::function_t::E_IndividualInfoContent: + return FUNCTION_INFO_CONTENT; + case model::function_t::E_IndividualLowInfoContent: + return FUNCTION_LOW_INFO_CONTENT; + case model::function_t::E_IndividualHighInfoContent: + return FUNCTION_HIGH_INFO_CONTENT; + case model::function_t::E_IndividualTimeOfDay: + return FUNCTION_TIME_OF_DAY; + case model::function_t::E_IndividualTimeOfWeek: + return FUNCTION_TIME_OF_WEEK; + case model::function_t::E_IndividualMetric: + return FUNCTION_METRIC; + case model::function_t::E_IndividualMetricMean: + return FUNCTION_AVERAGE; + case model::function_t::E_IndividualMetricLowMean: + return FUNCTION_LOW_MEAN; + case model::function_t::E_IndividualMetricHighMean: + return FUNCTION_HIGH_MEAN; + case model::function_t::E_IndividualMetricMedian: + return FUNCTION_MEDIAN; + case model::function_t::E_IndividualMetricLowMedian: + return FUNCTION_LOW_MEDIAN; + case model::function_t::E_IndividualMetricHighMedian: + return FUNCTION_HIGH_MEDIAN; + case model::function_t::E_IndividualMetricMin: + return FUNCTION_MIN; + case model::function_t::E_IndividualMetricMax: + return FUNCTION_MAX; + case model::function_t::E_IndividualMetricVariance: + return FUNCTION_VARIANCE; + case model::function_t::E_IndividualMetricLowVariance: + return FUNCTION_LOW_VARIANCE; + case model::function_t::E_IndividualMetricHighVariance: + return FUNCTION_HIGH_VARIANCE; + case model::function_t::E_IndividualMetricSum: + return FUNCTION_SUM; + case model::function_t::E_IndividualMetricLowSum: + return FUNCTION_LOW_SUM; + case model::function_t::E_IndividualMetricHighSum: + return FUNCTION_HIGH_SUM; + case model::function_t::E_IndividualMetricNonNullSum: + return FUNCTION_NON_NULL_SUM_ABBREV; + case model::function_t::E_IndividualMetricLowNonNullSum: + return FUNCTION_LOW_NON_NULL_SUM_ABBREV; + case model::function_t::E_IndividualMetricHighNonNullSum: + return FUNCTION_HIGH_NON_NULL_SUM_ABBREV; + case model::function_t::E_IndividualLatLong: + return FUNCTION_LAT_LONG; + case model::function_t::E_IndividualMinVelocity: + return FUNCTION_MIN_VELOCITY; + case model::function_t::E_IndividualMaxVelocity: + return FUNCTION_MAX_VELOCITY; + case model::function_t::E_IndividualMeanVelocity: + return FUNCTION_MEAN_VELOCITY; + case model::function_t::E_IndividualSumVelocity: + return FUNCTION_SUM_VELOCITY; + case model::function_t::E_PopulationCount: + return FUNCTION_COUNT_ABBREV; + case model::function_t::E_PopulationDistinctCount: + return FUNCTION_DISTINCT_COUNT_ABBREV; + case model::function_t::E_PopulationLowDistinctCount: + return FUNCTION_LOW_DISTINCT_COUNT_ABBREV; + case model::function_t::E_PopulationHighDistinctCount: + return FUNCTION_HIGH_DISTINCT_COUNT_ABBREV; + case model::function_t::E_PopulationRare: + return FUNCTION_RARE; + case model::function_t::E_PopulationRareCount: + return FUNCTION_RARE_COUNT; + case model::function_t::E_PopulationFreqRare: + return FUNCTION_FREQ_RARE_ABBREV; + case model::function_t::E_PopulationFreqRareCount: + return FUNCTION_FREQ_RARE_COUNT_ABBREV; + case model::function_t::E_PopulationLowCounts: + return FUNCTION_LOW_COUNT_ABBREV; + case model::function_t::E_PopulationHighCounts: + return FUNCTION_HIGH_COUNT_ABBREV; + case model::function_t::E_PopulationInfoContent: + return FUNCTION_INFO_CONTENT; + case model::function_t::E_PopulationLowInfoContent: + return FUNCTION_LOW_INFO_CONTENT; + case model::function_t::E_PopulationHighInfoContent: + return FUNCTION_HIGH_INFO_CONTENT; + case model::function_t::E_PopulationTimeOfDay: + return FUNCTION_TIME_OF_DAY; + case model::function_t::E_PopulationTimeOfWeek: + return FUNCTION_TIME_OF_WEEK; + case model::function_t::E_PopulationMetric: + return FUNCTION_METRIC; + case model::function_t::E_PopulationMetricMean: + return FUNCTION_AVERAGE; + case model::function_t::E_PopulationMetricLowMean: + return FUNCTION_LOW_MEAN; + case model::function_t::E_PopulationMetricHighMean: + return FUNCTION_HIGH_MEAN; + case model::function_t::E_PopulationMetricMedian: + return FUNCTION_MEDIAN; + case model::function_t::E_PopulationMetricLowMedian: + return FUNCTION_LOW_MEDIAN; + case model::function_t::E_PopulationMetricHighMedian: + return FUNCTION_HIGH_MEDIAN; + case model::function_t::E_PopulationMetricMin: + return FUNCTION_MIN; + case model::function_t::E_PopulationMetricMax: + return FUNCTION_MAX; + case model::function_t::E_PopulationMetricSum: + return FUNCTION_SUM; + case model::function_t::E_PopulationMetricVariance: + return FUNCTION_VARIANCE; + case model::function_t::E_PopulationMetricLowVariance: + return FUNCTION_LOW_VARIANCE; + case model::function_t::E_PopulationMetricHighVariance: + return FUNCTION_HIGH_VARIANCE; + case model::function_t::E_PopulationMetricLowSum: + return FUNCTION_LOW_SUM; + case model::function_t::E_PopulationMetricHighSum: + return FUNCTION_HIGH_SUM; + case model::function_t::E_PopulationLatLong: + return FUNCTION_LAT_LONG; + case model::function_t::E_PopulationMinVelocity: + return FUNCTION_MIN_VELOCITY; + case model::function_t::E_PopulationMaxVelocity: + return FUNCTION_MAX_VELOCITY; + case model::function_t::E_PopulationMeanVelocity: + return FUNCTION_MEAN_VELOCITY; + case model::function_t::E_PopulationSumVelocity: + return FUNCTION_SUM_VELOCITY; + case model::function_t::E_PeersCount: + return FUNCTION_COUNT_ABBREV; + case model::function_t::E_PeersLowCounts: + return FUNCTION_LOW_COUNT_ABBREV; + case model::function_t::E_PeersHighCounts: + return FUNCTION_HIGH_COUNT_ABBREV; + case model::function_t::E_PeersDistinctCount: + return FUNCTION_DISTINCT_COUNT_ABBREV; + case model::function_t::E_PeersLowDistinctCount: + return FUNCTION_LOW_DISTINCT_COUNT_ABBREV; + case model::function_t::E_PeersHighDistinctCount: + return FUNCTION_HIGH_DISTINCT_COUNT_ABBREV; + case model::function_t::E_PeersInfoContent: + return FUNCTION_INFO_CONTENT; + case model::function_t::E_PeersLowInfoContent: + return FUNCTION_LOW_INFO_CONTENT; + case model::function_t::E_PeersHighInfoContent: + return FUNCTION_HIGH_INFO_CONTENT; + case model::function_t::E_PeersTimeOfDay: + return FUNCTION_TIME_OF_DAY; + case model::function_t::E_PeersTimeOfWeek: + return FUNCTION_TIME_OF_WEEK; } LOG_ERROR("Unexpected function = " << m_Function); return EMPTY_STRING; } -const std::string &CFieldConfig::CFieldOptions::verboseFunctionName() const -{ - switch (m_Function) - { - case model::function_t::E_IndividualCount: - // For backwards compatibility the "count" function name maps to - // E_IndividualRareCount, not E_IndividualCount as you might think - return EMPTY_STRING; - case model::function_t::E_IndividualNonZeroCount: - return FUNCTION_NON_ZERO_COUNT; - case model::function_t::E_IndividualRareCount: - // For backwards compatibility the "count" function name maps to - // E_IndividualRareCount, not E_IndividualCount as you might think - return FUNCTION_COUNT; - case model::function_t::E_IndividualRareNonZeroCount: - return FUNCTION_RARE_NON_ZERO_COUNT; - case model::function_t::E_IndividualRare: - return FUNCTION_RARE; - case model::function_t::E_IndividualLowCounts: - return FUNCTION_LOW_COUNT; - case model::function_t::E_IndividualHighCounts: - return FUNCTION_HIGH_COUNT; - case model::function_t::E_IndividualLowNonZeroCount: - return FUNCTION_LOW_NON_ZERO_COUNT; - case model::function_t::E_IndividualHighNonZeroCount: - return FUNCTION_HIGH_NON_ZERO_COUNT; - case model::function_t::E_IndividualDistinctCount: - return FUNCTION_DISTINCT_COUNT; - case model::function_t::E_IndividualLowDistinctCount: - return FUNCTION_LOW_DISTINCT_COUNT; - case model::function_t::E_IndividualHighDistinctCount: - return FUNCTION_HIGH_DISTINCT_COUNT; - case model::function_t::E_IndividualInfoContent: - return FUNCTION_INFO_CONTENT; - case model::function_t::E_IndividualLowInfoContent: - return FUNCTION_LOW_INFO_CONTENT; - case model::function_t::E_IndividualHighInfoContent: - return FUNCTION_HIGH_INFO_CONTENT; - case model::function_t::E_IndividualTimeOfDay: - return FUNCTION_TIME_OF_DAY; - case model::function_t::E_IndividualTimeOfWeek: - return FUNCTION_TIME_OF_WEEK; - case model::function_t::E_IndividualMetric: - return FUNCTION_METRIC; - case model::function_t::E_IndividualMetricMean: - return FUNCTION_AVERAGE; - case model::function_t::E_IndividualMetricLowMean: - return FUNCTION_LOW_MEAN; - case model::function_t::E_IndividualMetricHighMean: - return FUNCTION_HIGH_MEAN; - case model::function_t::E_IndividualMetricMedian: - return FUNCTION_MEDIAN; - case model::function_t::E_IndividualMetricLowMedian: - return FUNCTION_LOW_MEDIAN; - case model::function_t::E_IndividualMetricHighMedian: - return FUNCTION_HIGH_MEDIAN; - case model::function_t::E_IndividualMetricMin: - return FUNCTION_MIN; - case model::function_t::E_IndividualMetricMax: - return FUNCTION_MAX; - case model::function_t::E_IndividualMetricVariance: - return FUNCTION_VARIANCE; - case model::function_t::E_IndividualMetricLowVariance: - return FUNCTION_LOW_VARIANCE; - case model::function_t::E_IndividualMetricHighVariance: - return FUNCTION_HIGH_VARIANCE; - case model::function_t::E_IndividualMetricSum: - return FUNCTION_SUM; - case model::function_t::E_IndividualMetricLowSum: - return FUNCTION_LOW_SUM; - case model::function_t::E_IndividualMetricHighSum: - return FUNCTION_HIGH_SUM; - case model::function_t::E_IndividualMetricNonNullSum: - return FUNCTION_NON_NULL_SUM; - case model::function_t::E_IndividualMetricLowNonNullSum: - return FUNCTION_LOW_NON_NULL_SUM; - case model::function_t::E_IndividualMetricHighNonNullSum: - return FUNCTION_HIGH_NON_NULL_SUM; - case model::function_t::E_IndividualLatLong: - return FUNCTION_LAT_LONG; - case model::function_t::E_IndividualMaxVelocity: - return FUNCTION_MAX_VELOCITY; - case model::function_t::E_IndividualMinVelocity: - return FUNCTION_MIN_VELOCITY; - case model::function_t::E_IndividualMeanVelocity: - return FUNCTION_MEAN_VELOCITY; - case model::function_t::E_IndividualSumVelocity: - return FUNCTION_SUM_VELOCITY; - case model::function_t::E_PopulationCount: - return FUNCTION_COUNT; - case model::function_t::E_PopulationDistinctCount: - return FUNCTION_DISTINCT_COUNT; - case model::function_t::E_PopulationLowDistinctCount: - return FUNCTION_LOW_DISTINCT_COUNT; - case model::function_t::E_PopulationHighDistinctCount: - return FUNCTION_HIGH_DISTINCT_COUNT; - case model::function_t::E_PopulationRare: - return FUNCTION_RARE; - case model::function_t::E_PopulationRareCount: - return FUNCTION_RARE_COUNT; - case model::function_t::E_PopulationFreqRare: - return FUNCTION_FREQ_RARE; - case model::function_t::E_PopulationFreqRareCount: - return FUNCTION_FREQ_RARE_COUNT; - case model::function_t::E_PopulationLowCounts: - return FUNCTION_LOW_COUNT; - case model::function_t::E_PopulationHighCounts: - return FUNCTION_HIGH_COUNT; - case model::function_t::E_PopulationInfoContent: - return FUNCTION_INFO_CONTENT; - case model::function_t::E_PopulationLowInfoContent: - return FUNCTION_LOW_INFO_CONTENT; - case model::function_t::E_PopulationHighInfoContent: - return FUNCTION_HIGH_INFO_CONTENT; - case model::function_t::E_PopulationTimeOfDay: - return FUNCTION_TIME_OF_DAY; - case model::function_t::E_PopulationTimeOfWeek: - return FUNCTION_TIME_OF_WEEK; - case model::function_t::E_PopulationMetric: - return FUNCTION_METRIC; - case model::function_t::E_PopulationMetricMean: - return FUNCTION_AVERAGE; - case model::function_t::E_PopulationMetricLowMean: - return FUNCTION_LOW_MEAN; - case model::function_t::E_PopulationMetricHighMean: - return FUNCTION_HIGH_MEAN; - case model::function_t::E_PopulationMetricMedian: - return FUNCTION_MEDIAN; - case model::function_t::E_PopulationMetricLowMedian: - return FUNCTION_LOW_MEDIAN; - case model::function_t::E_PopulationMetricHighMedian: - return FUNCTION_HIGH_MEDIAN; - case model::function_t::E_PopulationMetricMin: - return FUNCTION_MIN; - case model::function_t::E_PopulationMetricMax: - return FUNCTION_MAX; - case model::function_t::E_PopulationMetricVariance: - return FUNCTION_VARIANCE; - case model::function_t::E_PopulationMetricLowVariance: - return FUNCTION_LOW_VARIANCE; - case model::function_t::E_PopulationMetricHighVariance: - return FUNCTION_HIGH_VARIANCE; - case model::function_t::E_PopulationMetricSum: - return FUNCTION_SUM; - case model::function_t::E_PopulationMetricLowSum: - return FUNCTION_LOW_SUM; - case model::function_t::E_PopulationMetricHighSum: - return FUNCTION_HIGH_SUM; - case model::function_t::E_PopulationLatLong: - return FUNCTION_LAT_LONG; - case model::function_t::E_PopulationMaxVelocity: - return FUNCTION_MAX_VELOCITY; - case model::function_t::E_PopulationMinVelocity: - return FUNCTION_MIN_VELOCITY; - case model::function_t::E_PopulationMeanVelocity: - return FUNCTION_MEAN_VELOCITY; - case model::function_t::E_PopulationSumVelocity: - return FUNCTION_SUM_VELOCITY; - case model::function_t::E_PeersCount: - return FUNCTION_COUNT; - case model::function_t::E_PeersLowCounts: - return FUNCTION_LOW_COUNT; - case model::function_t::E_PeersHighCounts: - return FUNCTION_HIGH_COUNT; - case model::function_t::E_PeersDistinctCount: - return FUNCTION_DISTINCT_COUNT; - case model::function_t::E_PeersLowDistinctCount: - return FUNCTION_LOW_DISTINCT_COUNT; - case model::function_t::E_PeersHighDistinctCount: - return FUNCTION_HIGH_DISTINCT_COUNT; - case model::function_t::E_PeersInfoContent: - return FUNCTION_INFO_CONTENT; - case model::function_t::E_PeersLowInfoContent: - return FUNCTION_LOW_INFO_CONTENT; - case model::function_t::E_PeersHighInfoContent: - return FUNCTION_HIGH_INFO_CONTENT; - case model::function_t::E_PeersTimeOfDay: - return FUNCTION_TIME_OF_DAY; - case model::function_t::E_PeersTimeOfWeek: - return FUNCTION_TIME_OF_WEEK; +const std::string& CFieldConfig::CFieldOptions::verboseFunctionName() const { + switch (m_Function) { + case model::function_t::E_IndividualCount: + // For backwards compatibility the "count" function name maps to + // E_IndividualRareCount, not E_IndividualCount as you might think + return EMPTY_STRING; + case model::function_t::E_IndividualNonZeroCount: + return FUNCTION_NON_ZERO_COUNT; + case model::function_t::E_IndividualRareCount: + // For backwards compatibility the "count" function name maps to + // E_IndividualRareCount, not E_IndividualCount as you might think + return FUNCTION_COUNT; + case model::function_t::E_IndividualRareNonZeroCount: + return FUNCTION_RARE_NON_ZERO_COUNT; + case model::function_t::E_IndividualRare: + return FUNCTION_RARE; + case model::function_t::E_IndividualLowCounts: + return FUNCTION_LOW_COUNT; + case model::function_t::E_IndividualHighCounts: + return FUNCTION_HIGH_COUNT; + case model::function_t::E_IndividualLowNonZeroCount: + return FUNCTION_LOW_NON_ZERO_COUNT; + case model::function_t::E_IndividualHighNonZeroCount: + return FUNCTION_HIGH_NON_ZERO_COUNT; + case model::function_t::E_IndividualDistinctCount: + return FUNCTION_DISTINCT_COUNT; + case model::function_t::E_IndividualLowDistinctCount: + return FUNCTION_LOW_DISTINCT_COUNT; + case model::function_t::E_IndividualHighDistinctCount: + return FUNCTION_HIGH_DISTINCT_COUNT; + case model::function_t::E_IndividualInfoContent: + return FUNCTION_INFO_CONTENT; + case model::function_t::E_IndividualLowInfoContent: + return FUNCTION_LOW_INFO_CONTENT; + case model::function_t::E_IndividualHighInfoContent: + return FUNCTION_HIGH_INFO_CONTENT; + case model::function_t::E_IndividualTimeOfDay: + return FUNCTION_TIME_OF_DAY; + case model::function_t::E_IndividualTimeOfWeek: + return FUNCTION_TIME_OF_WEEK; + case model::function_t::E_IndividualMetric: + return FUNCTION_METRIC; + case model::function_t::E_IndividualMetricMean: + return FUNCTION_AVERAGE; + case model::function_t::E_IndividualMetricLowMean: + return FUNCTION_LOW_MEAN; + case model::function_t::E_IndividualMetricHighMean: + return FUNCTION_HIGH_MEAN; + case model::function_t::E_IndividualMetricMedian: + return FUNCTION_MEDIAN; + case model::function_t::E_IndividualMetricLowMedian: + return FUNCTION_LOW_MEDIAN; + case model::function_t::E_IndividualMetricHighMedian: + return FUNCTION_HIGH_MEDIAN; + case model::function_t::E_IndividualMetricMin: + return FUNCTION_MIN; + case model::function_t::E_IndividualMetricMax: + return FUNCTION_MAX; + case model::function_t::E_IndividualMetricVariance: + return FUNCTION_VARIANCE; + case model::function_t::E_IndividualMetricLowVariance: + return FUNCTION_LOW_VARIANCE; + case model::function_t::E_IndividualMetricHighVariance: + return FUNCTION_HIGH_VARIANCE; + case model::function_t::E_IndividualMetricSum: + return FUNCTION_SUM; + case model::function_t::E_IndividualMetricLowSum: + return FUNCTION_LOW_SUM; + case model::function_t::E_IndividualMetricHighSum: + return FUNCTION_HIGH_SUM; + case model::function_t::E_IndividualMetricNonNullSum: + return FUNCTION_NON_NULL_SUM; + case model::function_t::E_IndividualMetricLowNonNullSum: + return FUNCTION_LOW_NON_NULL_SUM; + case model::function_t::E_IndividualMetricHighNonNullSum: + return FUNCTION_HIGH_NON_NULL_SUM; + case model::function_t::E_IndividualLatLong: + return FUNCTION_LAT_LONG; + case model::function_t::E_IndividualMaxVelocity: + return FUNCTION_MAX_VELOCITY; + case model::function_t::E_IndividualMinVelocity: + return FUNCTION_MIN_VELOCITY; + case model::function_t::E_IndividualMeanVelocity: + return FUNCTION_MEAN_VELOCITY; + case model::function_t::E_IndividualSumVelocity: + return FUNCTION_SUM_VELOCITY; + case model::function_t::E_PopulationCount: + return FUNCTION_COUNT; + case model::function_t::E_PopulationDistinctCount: + return FUNCTION_DISTINCT_COUNT; + case model::function_t::E_PopulationLowDistinctCount: + return FUNCTION_LOW_DISTINCT_COUNT; + case model::function_t::E_PopulationHighDistinctCount: + return FUNCTION_HIGH_DISTINCT_COUNT; + case model::function_t::E_PopulationRare: + return FUNCTION_RARE; + case model::function_t::E_PopulationRareCount: + return FUNCTION_RARE_COUNT; + case model::function_t::E_PopulationFreqRare: + return FUNCTION_FREQ_RARE; + case model::function_t::E_PopulationFreqRareCount: + return FUNCTION_FREQ_RARE_COUNT; + case model::function_t::E_PopulationLowCounts: + return FUNCTION_LOW_COUNT; + case model::function_t::E_PopulationHighCounts: + return FUNCTION_HIGH_COUNT; + case model::function_t::E_PopulationInfoContent: + return FUNCTION_INFO_CONTENT; + case model::function_t::E_PopulationLowInfoContent: + return FUNCTION_LOW_INFO_CONTENT; + case model::function_t::E_PopulationHighInfoContent: + return FUNCTION_HIGH_INFO_CONTENT; + case model::function_t::E_PopulationTimeOfDay: + return FUNCTION_TIME_OF_DAY; + case model::function_t::E_PopulationTimeOfWeek: + return FUNCTION_TIME_OF_WEEK; + case model::function_t::E_PopulationMetric: + return FUNCTION_METRIC; + case model::function_t::E_PopulationMetricMean: + return FUNCTION_AVERAGE; + case model::function_t::E_PopulationMetricLowMean: + return FUNCTION_LOW_MEAN; + case model::function_t::E_PopulationMetricHighMean: + return FUNCTION_HIGH_MEAN; + case model::function_t::E_PopulationMetricMedian: + return FUNCTION_MEDIAN; + case model::function_t::E_PopulationMetricLowMedian: + return FUNCTION_LOW_MEDIAN; + case model::function_t::E_PopulationMetricHighMedian: + return FUNCTION_HIGH_MEDIAN; + case model::function_t::E_PopulationMetricMin: + return FUNCTION_MIN; + case model::function_t::E_PopulationMetricMax: + return FUNCTION_MAX; + case model::function_t::E_PopulationMetricVariance: + return FUNCTION_VARIANCE; + case model::function_t::E_PopulationMetricLowVariance: + return FUNCTION_LOW_VARIANCE; + case model::function_t::E_PopulationMetricHighVariance: + return FUNCTION_HIGH_VARIANCE; + case model::function_t::E_PopulationMetricSum: + return FUNCTION_SUM; + case model::function_t::E_PopulationMetricLowSum: + return FUNCTION_LOW_SUM; + case model::function_t::E_PopulationMetricHighSum: + return FUNCTION_HIGH_SUM; + case model::function_t::E_PopulationLatLong: + return FUNCTION_LAT_LONG; + case model::function_t::E_PopulationMaxVelocity: + return FUNCTION_MAX_VELOCITY; + case model::function_t::E_PopulationMinVelocity: + return FUNCTION_MIN_VELOCITY; + case model::function_t::E_PopulationMeanVelocity: + return FUNCTION_MEAN_VELOCITY; + case model::function_t::E_PopulationSumVelocity: + return FUNCTION_SUM_VELOCITY; + case model::function_t::E_PeersCount: + return FUNCTION_COUNT; + case model::function_t::E_PeersLowCounts: + return FUNCTION_LOW_COUNT; + case model::function_t::E_PeersHighCounts: + return FUNCTION_HIGH_COUNT; + case model::function_t::E_PeersDistinctCount: + return FUNCTION_DISTINCT_COUNT; + case model::function_t::E_PeersLowDistinctCount: + return FUNCTION_LOW_DISTINCT_COUNT; + case model::function_t::E_PeersHighDistinctCount: + return FUNCTION_HIGH_DISTINCT_COUNT; + case model::function_t::E_PeersInfoContent: + return FUNCTION_INFO_CONTENT; + case model::function_t::E_PeersLowInfoContent: + return FUNCTION_LOW_INFO_CONTENT; + case model::function_t::E_PeersHighInfoContent: + return FUNCTION_HIGH_INFO_CONTENT; + case model::function_t::E_PeersTimeOfDay: + return FUNCTION_TIME_OF_DAY; + case model::function_t::E_PeersTimeOfWeek: + return FUNCTION_TIME_OF_WEEK; } LOG_ERROR("Unexpected function = " << m_Function); return EMPTY_STRING; } -std::ostream &CFieldConfig::CFieldOptions::debugPrintClause(std::ostream &strm) const -{ +std::ostream& CFieldConfig::CFieldOptions::debugPrintClause(std::ostream& strm) const { strm << this->verboseFunctionName(); - if (!m_FieldName.empty()) - { + if (!m_FieldName.empty()) { strm << '(' << m_FieldName << ')'; } bool considerUseNull(false); - if (!m_ByFieldName.empty()) - { + if (!m_ByFieldName.empty()) { strm << ' ' << BY_TOKEN << ' ' << m_ByFieldName; considerUseNull = true; } - if (!m_OverFieldName.empty()) - { + if (!m_OverFieldName.empty()) { strm << ' ' << OVER_TOKEN << ' ' << m_OverFieldName; considerUseNull = true; } - if (!m_PartitionFieldName.empty()) - { + if (!m_PartitionFieldName.empty()) { strm << ' ' << PARTITION_FIELD_OPTION << '=' << m_PartitionFieldName; } - if (m_UseNull && considerUseNull) - { + if (m_UseNull && considerUseNull) { strm << ' ' << USE_NULL_OPTION << "=1"; } - if (m_OverHasExcludeFrequent) - { - if (m_ByHasExcludeFrequent) - { + if (m_OverHasExcludeFrequent) { + if (m_ByHasExcludeFrequent) { strm << ' ' << EXCLUDE_FREQUENT_OPTION << '=' << ALL_TOKEN; - } - else - { + } else { strm << ' ' << EXCLUDE_FREQUENT_OPTION << '=' << OVER_TOKEN; } - } - else - { - if (m_ByHasExcludeFrequent) - { + } else { + if (m_ByHasExcludeFrequent) { strm << ' ' << EXCLUDE_FREQUENT_OPTION << '=' << BY_TOKEN; } } @@ -2156,8 +1665,7 @@ std::ostream &CFieldConfig::CFieldOptions::debugPrintClause(std::ostream &strm) return strm; } -void CFieldConfig::CFieldOptions::swap(CFieldOptions &other) -{ +void CFieldConfig::CFieldOptions::swap(CFieldOptions& other) { m_Description.swap(other.m_Description); std::swap(m_Function, other.m_Function); m_FieldName.swap(other.m_FieldName); @@ -2170,20 +1678,14 @@ void CFieldConfig::CFieldOptions::swap(CFieldOptions &other) std::swap(m_UseNull, other.m_UseNull); } -void swap(CFieldConfig::CFieldOptions &lhs, CFieldConfig::CFieldOptions &rhs) -{ +void swap(CFieldConfig::CFieldOptions& lhs, CFieldConfig::CFieldOptions& rhs) { lhs.swap(rhs); } -std::ostream &operator<<(std::ostream &strm, - const CFieldConfig::CFieldOptions &options) -{ +std::ostream& operator<<(std::ostream& strm, const CFieldConfig::CFieldOptions& options) { options.debugPrintClause(strm); strm << " (config key: " << options.m_ConfigKey << " description: " << options.m_Description << ')'; return strm; } - - } } - diff --git a/lib/api/CFieldDataTyper.cc b/lib/api/CFieldDataTyper.cc index 3db024470a..24353cc640 100644 --- a/lib/api/CFieldDataTyper.cc +++ b/lib/api/CFieldDataTyper.cc @@ -7,12 +7,12 @@ #include #include -#include #include #include #include #include #include +#include #include #include @@ -25,14 +25,10 @@ #include +namespace ml { +namespace api { -namespace ml -{ -namespace api -{ - -namespace -{ +namespace { const std::string VERSION_TAG("a"); const std::string TYPER_TAG("b"); @@ -42,17 +38,16 @@ const std::string EXAMPLES_COLLECTOR_TAG("c"); // Initialise statics const std::string CFieldDataTyper::ML_STATE_INDEX(".ml-state"); const std::string CFieldDataTyper::MLCATEGORY_NAME("mlcategory"); -const double CFieldDataTyper::SIMILARITY_THRESHOLD(0.7); +const double CFieldDataTyper::SIMILARITY_THRESHOLD(0.7); const std::string CFieldDataTyper::STATE_TYPE("categorizer_state"); const std::string CFieldDataTyper::STATE_VERSION("1"); - -CFieldDataTyper::CFieldDataTyper(const std::string &jobId, - const CFieldConfig &config, - const model::CLimits &limits, - COutputHandler &outputHandler, - CJsonOutputWriter &jsonOutputWriter, - CBackgroundPersister *periodicPersister) +CFieldDataTyper::CFieldDataTyper(const std::string& jobId, + const CFieldConfig& config, + const model::CLimits& limits, + COutputHandler& outputHandler, + CJsonOutputWriter& jsonOutputWriter, + CBackgroundPersister* periodicPersister) : m_JobId(jobId), m_OutputHandler(outputHandler), m_ExtraFieldNames(1, MLCATEGORY_NAME), @@ -64,41 +59,33 @@ CFieldDataTyper::CFieldDataTyper(const std::string &jobId, m_ExamplesCollector(limits.maxExamples()), m_CategorizationFieldName(config.categorizationFieldName()), m_CategorizationFilter(), - m_PeriodicPersister(periodicPersister) -{ + m_PeriodicPersister(periodicPersister) { this->createTyper(m_CategorizationFieldName); LOG_DEBUG("Configuring categorization filtering"); m_CategorizationFilter.configure(config.categorizationFilters()); } -CFieldDataTyper::~CFieldDataTyper() -{ +CFieldDataTyper::~CFieldDataTyper() { m_DataTyper->dumpStats(); } -void CFieldDataTyper::newOutputStream() -{ +void CFieldDataTyper::newOutputStream() { m_WriteFieldNames = true; m_OutputHandler.newOutputStream(); } -bool CFieldDataTyper::handleRecord(const TStrStrUMap &dataRowFields) -{ +bool CFieldDataTyper::handleRecord(const TStrStrUMap& dataRowFields) { // First time through we output the field names - if (m_WriteFieldNames) - { + if (m_WriteFieldNames) { TStrVec fieldNames; fieldNames.reserve(dataRowFields.size() + 1); - for (const auto &entry : dataRowFields) - { + for (const auto& entry : dataRowFields) { fieldNames.push_back(entry.first); } - if (m_OutputHandler.fieldNames(fieldNames, m_ExtraFieldNames) == false) - { - LOG_ERROR("Unable to set field names for output:" << core_t::LINE_ENDING << - this->debugPrintRecord(dataRowFields)); + if (m_OutputHandler.fieldNames(fieldNames, m_ExtraFieldNames) == false) { + LOG_ERROR("Unable to set field names for output:" << core_t::LINE_ENDING << this->debugPrintRecord(dataRowFields)); return false; } m_WriteFieldNames = false; @@ -106,10 +93,8 @@ bool CFieldDataTyper::handleRecord(const TStrStrUMap &dataRowFields) // Non-empty control fields take precedence over everything else TStrStrUMapCItr iter = dataRowFields.find(CONTROL_FIELD_NAME); - if (iter != dataRowFields.end() && !iter->second.empty()) - { - if (m_OutputHandler.consumesControlMessages()) - { + if (iter != dataRowFields.end() && !iter->second.empty()) { + if (m_OutputHandler.consumesControlMessages()) { return m_OutputHandler.writeRow(dataRowFields, m_Overrides); } return this->handleControlMessage(iter->second); @@ -117,159 +102,121 @@ bool CFieldDataTyper::handleRecord(const TStrStrUMap &dataRowFields) m_OutputFieldCategory = core::CStringUtils::typeToString(this->computeType(dataRowFields)); - if (m_OutputHandler.writeRow(dataRowFields, m_Overrides) == false) - { - LOG_ERROR("Unable to write output with type " << - m_OutputFieldCategory << " for input:" << core_t::LINE_ENDING << - this->debugPrintRecord(dataRowFields)); + if (m_OutputHandler.writeRow(dataRowFields, m_Overrides) == false) { + LOG_ERROR("Unable to write output with type " << m_OutputFieldCategory << " for input:" << core_t::LINE_ENDING + << this->debugPrintRecord(dataRowFields)); return false; } ++m_NumRecordsHandled; return true; } -void CFieldDataTyper::finalise() -{ +void CFieldDataTyper::finalise() { // Pass on the request in case we're chained m_OutputHandler.finalise(); // Wait for any ongoing periodic persist to complete, so that the data adder // is not used by both a periodic periodic persist and final persist at the // same time - if (m_PeriodicPersister != nullptr) - { + if (m_PeriodicPersister != nullptr) { m_PeriodicPersister->waitForIdle(); } } -uint64_t CFieldDataTyper::numRecordsHandled() const -{ +uint64_t CFieldDataTyper::numRecordsHandled() const { return m_NumRecordsHandled; } -COutputHandler &CFieldDataTyper::outputHandler() -{ +COutputHandler& CFieldDataTyper::outputHandler() { return m_OutputHandler; } -int CFieldDataTyper::computeType(const TStrStrUMap &dataRowFields) -{ - const std::string &categorizationFieldName = m_DataTyper->fieldName(); +int CFieldDataTyper::computeType(const TStrStrUMap& dataRowFields) { + const std::string& categorizationFieldName = m_DataTyper->fieldName(); TStrStrUMapCItr fieldIter = dataRowFields.find(categorizationFieldName); - if (fieldIter == dataRowFields.end()) - { - LOG_WARN("Assigning type -1 to record with no " << - categorizationFieldName << " field:" << core_t::LINE_ENDING << - this->debugPrintRecord(dataRowFields)); + if (fieldIter == dataRowFields.end()) { + LOG_WARN("Assigning type -1 to record with no " << categorizationFieldName << " field:" << core_t::LINE_ENDING + << this->debugPrintRecord(dataRowFields)); return -1; } - const std::string &fieldValue = fieldIter->second; - if (fieldValue.empty()) - { - LOG_WARN("Assigning type -1 to record with blank " << - categorizationFieldName << " field:" << core_t::LINE_ENDING << - this->debugPrintRecord(dataRowFields)); + const std::string& fieldValue = fieldIter->second; + if (fieldValue.empty()) { + LOG_WARN("Assigning type -1 to record with blank " << categorizationFieldName << " field:" << core_t::LINE_ENDING + << this->debugPrintRecord(dataRowFields)); return -1; } int type = -1; - if (m_CategorizationFilter.empty()) - { + if (m_CategorizationFilter.empty()) { type = m_DataTyper->computeType(false, dataRowFields, fieldValue, fieldValue.length()); - } - else - { + } else { std::string filtered = m_CategorizationFilter.apply(fieldValue); type = m_DataTyper->computeType(false, dataRowFields, filtered, fieldValue.length()); } - if (type < 1) - { + if (type < 1) { return -1; } bool exampleAdded = m_ExamplesCollector.add(static_cast(type), fieldValue); bool searchTermsChanged = this->createReverseSearch(type); - if (exampleAdded || searchTermsChanged) - { - const TStrSet &examples = m_ExamplesCollector.examples(static_cast(type)); - m_JsonOutputWriter.writeCategoryDefinition(type, - m_SearchTerms, - m_SearchTermsRegex, - m_MaxMatchingLength, - examples); + if (exampleAdded || searchTermsChanged) { + const TStrSet& examples = m_ExamplesCollector.examples(static_cast(type)); + m_JsonOutputWriter.writeCategoryDefinition(type, m_SearchTerms, m_SearchTermsRegex, m_MaxMatchingLength, examples); } // Check if a periodic persist is due. - if (m_PeriodicPersister != nullptr) - { + if (m_PeriodicPersister != nullptr) { m_PeriodicPersister->startBackgroundPersistIfAppropriate(); } return type; } -void CFieldDataTyper::createTyper(const std::string &fieldName) -{ +void CFieldDataTyper::createTyper(const std::string& fieldName) { // TODO - if we ever have more than one data typer class, this should be // replaced with a factory - TTokenListDataTyperKeepsFields::TTokenListReverseSearchCreatorIntfCPtr - reverseSearchCreator(new CTokenListReverseSearchCreator(fieldName)); - m_DataTyper.reset(new TTokenListDataTyperKeepsFields(reverseSearchCreator, - SIMILARITY_THRESHOLD, - fieldName)); + TTokenListDataTyperKeepsFields::TTokenListReverseSearchCreatorIntfCPtr reverseSearchCreator( + new CTokenListReverseSearchCreator(fieldName)); + m_DataTyper.reset(new TTokenListDataTyperKeepsFields(reverseSearchCreator, SIMILARITY_THRESHOLD, fieldName)); LOG_TRACE("Created new categorizer for field '" << fieldName << "'"); } -bool CFieldDataTyper::createReverseSearch(int type) -{ +bool CFieldDataTyper::createReverseSearch(int type) { bool wasCached(false); - if (m_DataTyper->createReverseSearch(type, - m_SearchTerms, - m_SearchTermsRegex, - m_MaxMatchingLength, - wasCached) == false) - { + if (m_DataTyper->createReverseSearch(type, m_SearchTerms, m_SearchTermsRegex, m_MaxMatchingLength, wasCached) == false) { m_SearchTerms.clear(); m_SearchTermsRegex.clear(); } return !wasCached; } -bool CFieldDataTyper::restoreState(core::CDataSearcher &restoreSearcher, - core_t::TTime &completeToTime) -{ +bool CFieldDataTyper::restoreState(core::CDataSearcher& restoreSearcher, core_t::TTime& completeToTime) { // Pass on the request in case we're chained - if (m_OutputHandler.restoreState(restoreSearcher, - completeToTime) == false) - { + if (m_OutputHandler.restoreState(restoreSearcher, completeToTime) == false) { return false; } LOG_DEBUG("Restore typer state"); - try - { + try { // Restore from Elasticsearch compressed data core::CStateDecompressor decompressor(restoreSearcher); decompressor.setStateRestoreSearch(ML_STATE_INDEX); core::CDataSearcher::TIStreamP strm(decompressor.search(1, 1)); - if (strm == 0) - { + if (strm == 0) { LOG_ERROR("Unable to connect to data store"); return false; } - if (strm->bad()) - { + if (strm->bad()) { LOG_ERROR("Categorizer state restoration returned a bad stream"); return false; } - if (strm->fail()) - { + if (strm->fail()) { // This is fatal. If the stream exists and has failed then state is missing LOG_ERROR("Categorizer state restoration returned a failed stream"); return false; @@ -278,14 +225,11 @@ bool CFieldDataTyper::restoreState(core::CDataSearcher &restoreSearcher, // We're dealing with streaming JSON state core::CJsonStateRestoreTraverser traverser(*strm); - if (this->acceptRestoreTraverser(traverser) == false) - { + if (this->acceptRestoreTraverser(traverser) == false) { LOG_ERROR("JSON restore failed"); return false; } - } - catch (std::exception &e) - { + } catch (std::exception& e) { LOG_ERROR("Failed to restore state! " << e.what()); // This is fatal in terms of the categorizer we attempted to restore, // but returning false here can throw the system into a repeated cycle @@ -298,97 +242,70 @@ bool CFieldDataTyper::restoreState(core::CDataSearcher &restoreSearcher, return true; } -bool CFieldDataTyper::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ - const std::string &firstFieldName = traverser.name(); - if (traverser.isEof()) - { +bool CFieldDataTyper::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + const std::string& firstFieldName = traverser.name(); + if (traverser.isEof()) { LOG_ERROR("Expected categorizer persisted state but no state exists"); return false; } - if (firstFieldName == VERSION_TAG) - { + if (firstFieldName == VERSION_TAG) { std::string version; - if (core::CStringUtils::stringToType(traverser.value(), version) == false) - { + if (core::CStringUtils::stringToType(traverser.value(), version) == false) { LOG_ERROR("Cannot restore categorizer, invalid version: " << traverser.value()); return false; } - if (version != STATE_VERSION) - { + if (version != STATE_VERSION) { LOG_DEBUG("Categorizer has not been restored as the version has changed"); return true; } - } - else - { - LOG_ERROR("Cannot restore categorizer - " << VERSION_TAG - << " element expected but found " - << traverser.name() << '=' << traverser.value()); + } else { + LOG_ERROR("Cannot restore categorizer - " << VERSION_TAG << " element expected but found " << traverser.name() << '=' + << traverser.value()); return false; } - if (traverser.next() == false) - { - LOG_ERROR("Cannot restore categorizer - end of object reached when " - << TYPER_TAG << " was expected"); + if (traverser.next() == false) { + LOG_ERROR("Cannot restore categorizer - end of object reached when " << TYPER_TAG << " was expected"); return false; } - if (traverser.name() == TYPER_TAG) - { - if (traverser.traverseSubLevel(boost::bind( - &CDataTyper::acceptRestoreTraverser, m_DataTyper, _1)) == false) - { + if (traverser.name() == TYPER_TAG) { + if (traverser.traverseSubLevel(boost::bind(&CDataTyper::acceptRestoreTraverser, m_DataTyper, _1)) == false) { LOG_ERROR("Cannot restore categorizer, unexpected element: " << traverser.value()); return false; } - } - else - { - LOG_ERROR("Cannot restore categorizer - " << TYPER_TAG - << " element expected but found " - << traverser.name() << '=' << traverser.value()); + } else { + LOG_ERROR("Cannot restore categorizer - " << TYPER_TAG << " element expected but found " << traverser.name() << '=' + << traverser.value()); return false; } - if (traverser.next() == false) - { - LOG_ERROR("Cannot restore categorizer - end of object reached when " - << EXAMPLES_COLLECTOR_TAG << " was expected"); + if (traverser.next() == false) { + LOG_ERROR("Cannot restore categorizer - end of object reached when " << EXAMPLES_COLLECTOR_TAG << " was expected"); return false; } - if (traverser.name() == EXAMPLES_COLLECTOR_TAG) - { - if (traverser.traverseSubLevel(boost::bind(&CCategoryExamplesCollector::acceptRestoreTraverser, - boost::ref(m_ExamplesCollector), - _1)) == false || - traverser.haveBadState()) - { + if (traverser.name() == EXAMPLES_COLLECTOR_TAG) { + if (traverser.traverseSubLevel( + boost::bind(&CCategoryExamplesCollector::acceptRestoreTraverser, boost::ref(m_ExamplesCollector), _1)) == false || + traverser.haveBadState()) { LOG_ERROR("Cannot restore categorizer, unexpected element: " << traverser.value()); return false; } - } - else - { - LOG_ERROR("Cannot restore categorizer - " << EXAMPLES_COLLECTOR_TAG - << " element expected but found " - << traverser.name() << '=' << traverser.value()); + } else { + LOG_ERROR("Cannot restore categorizer - " << EXAMPLES_COLLECTOR_TAG << " element expected but found " << traverser.name() << '=' + << traverser.value()); return false; } return true; } -bool CFieldDataTyper::persistState(core::CDataAdder &persister) -{ - if (m_PeriodicPersister != nullptr) - { +bool CFieldDataTyper::persistState(core::CDataAdder& persister) { + if (m_PeriodicPersister != nullptr) { // This will not happen if finalise() was called before persisting state - if (m_PeriodicPersister->isBusy()) - { + if (m_PeriodicPersister->isBusy()) { LOG_ERROR("Cannot do final persistence of state - periodic " "persister still busy"); return false; @@ -396,37 +313,29 @@ bool CFieldDataTyper::persistState(core::CDataAdder &persister) } // Pass on the request in case we're chained - if (m_OutputHandler.persistState(persister) == false) - { + if (m_OutputHandler.persistState(persister) == false) { return false; } LOG_DEBUG("Persist typer state"); - return this->doPersistState(m_DataTyper->makePersistFunc(), - m_ExamplesCollector, - persister); + return this->doPersistState(m_DataTyper->makePersistFunc(), m_ExamplesCollector, persister); } -bool CFieldDataTyper::doPersistState(const CDataTyper::TPersistFunc &dataTyperPersistFunc, - const CCategoryExamplesCollector &examplesCollector, - core::CDataAdder &persister) -{ - try - { +bool CFieldDataTyper::doPersistState(const CDataTyper::TPersistFunc& dataTyperPersistFunc, + const CCategoryExamplesCollector& examplesCollector, + core::CDataAdder& persister) { + try { core::CStateCompressor compressor(persister); - core::CDataAdder::TOStreamP - strm = compressor.addStreamed(ML_STATE_INDEX, m_JobId + '_' + STATE_TYPE); + core::CDataAdder::TOStreamP strm = compressor.addStreamed(ML_STATE_INDEX, m_JobId + '_' + STATE_TYPE); - if (strm == 0) - { + if (strm == 0) { LOG_ERROR("Failed to create persistence stream"); return false; } - if (!strm->good()) - { + if (!strm->good()) { LOG_ERROR("Persistence stream is bad before stream of " "state for the categorizer"); return false; @@ -439,46 +348,36 @@ bool CFieldDataTyper::doPersistState(const CDataTyper::TPersistFunc &dataTyperPe this->acceptPersistInserter(dataTyperPersistFunc, examplesCollector, inserter); } - if (strm->bad()) - { + if (strm->bad()) { LOG_ERROR("Persistence stream went bad during stream of " "state for the categorizer"); return false; } - if (compressor.streamComplete(strm, true) == false || strm->bad()) - { + if (compressor.streamComplete(strm, true) == false || strm->bad()) { LOG_ERROR("Failed to complete last persistence stream"); return false; } - } - catch (std::exception &e) - { + } catch (std::exception& e) { LOG_ERROR("Failed to persist state! " << e.what()); return false; } return true; } -void CFieldDataTyper::acceptPersistInserter(const CDataTyper::TPersistFunc &dataTyperPersistFunc, - const CCategoryExamplesCollector &examplesCollector, - core::CStatePersistInserter &inserter) const -{ +void CFieldDataTyper::acceptPersistInserter(const CDataTyper::TPersistFunc& dataTyperPersistFunc, + const CCategoryExamplesCollector& examplesCollector, + core::CStatePersistInserter& inserter) const { inserter.insertValue(VERSION_TAG, STATE_VERSION); inserter.insertLevel(TYPER_TAG, dataTyperPersistFunc); - inserter.insertLevel(EXAMPLES_COLLECTOR_TAG, boost::bind( - &CCategoryExamplesCollector::acceptPersistInserter, - &examplesCollector, - _1)); + inserter.insertLevel(EXAMPLES_COLLECTOR_TAG, boost::bind(&CCategoryExamplesCollector::acceptPersistInserter, &examplesCollector, _1)); } -bool CFieldDataTyper::periodicPersistState(CBackgroundPersister &persister) -{ +bool CFieldDataTyper::periodicPersistState(CBackgroundPersister& persister) { LOG_DEBUG("Periodic persist typer state"); // Pass on the request in case we're chained - if (m_OutputHandler.periodicPersistState(persister) == false) - { + if (m_OutputHandler.periodicPersistState(persister) == false) { return false; } @@ -489,8 +388,7 @@ bool CFieldDataTyper::periodicPersistState(CBackgroundPersister &persister) // MUST be copied for thread safety m_DataTyper->makePersistFunc(), m_ExamplesCollector, - _1)) == false) - { + _1)) == false) { LOG_ERROR("Failed to add categorizer background persistence function"); return false; } @@ -498,8 +396,7 @@ bool CFieldDataTyper::periodicPersistState(CBackgroundPersister &persister) return true; } -void CFieldDataTyper::resetAfterCorruptRestore() -{ +void CFieldDataTyper::resetAfterCorruptRestore() { LOG_WARN("Discarding corrupt categorizer state - will re-categorize from scratch"); m_SearchTerms.clear(); @@ -508,57 +405,45 @@ void CFieldDataTyper::resetAfterCorruptRestore() m_ExamplesCollector.clear(); } -bool CFieldDataTyper::handleControlMessage(const std::string &controlMessage) -{ - if (controlMessage.empty()) - { +bool CFieldDataTyper::handleControlMessage(const std::string& controlMessage) { + if (controlMessage.empty()) { LOG_ERROR("Programmatic error - handleControlMessage should only be " "called with non-empty control messages"); return false; } - switch (controlMessage[0]) - { - case ' ': - // Spaces are just used to fill the buffers and force prior messages - // through the system - we don't need to do anything else - LOG_TRACE("Received space control message of length " << - controlMessage.length()); - break; - case CONTROL_FIELD_NAME_CHAR: - // Silent no-op. This is a simple way to ignore repeated header - // rows in input. - break; - case 'f': - // Flush ID comes after the initial f - this->acknowledgeFlush(controlMessage.substr(1)); - break; - default: - LOG_WARN("Ignoring unknown control message of length " << - controlMessage.length() << " beginning with '" << - controlMessage[0] << '\''); - // Don't return false here (for the time being at least), as it - // seems excessive to cause the entire job to fail - break; + switch (controlMessage[0]) { + case ' ': + // Spaces are just used to fill the buffers and force prior messages + // through the system - we don't need to do anything else + LOG_TRACE("Received space control message of length " << controlMessage.length()); + break; + case CONTROL_FIELD_NAME_CHAR: + // Silent no-op. This is a simple way to ignore repeated header + // rows in input. + break; + case 'f': + // Flush ID comes after the initial f + this->acknowledgeFlush(controlMessage.substr(1)); + break; + default: + LOG_WARN("Ignoring unknown control message of length " << controlMessage.length() << " beginning with '" << controlMessage[0] + << '\''); + // Don't return false here (for the time being at least), as it + // seems excessive to cause the entire job to fail + break; } return true; } -void CFieldDataTyper::acknowledgeFlush(const std::string &flushId) -{ - if (flushId.empty()) - { +void CFieldDataTyper::acknowledgeFlush(const std::string& flushId) { + if (flushId.empty()) { LOG_ERROR("Received flush control message with no ID"); - } - else - { + } else { LOG_TRACE("Received flush control message with ID " << flushId); } m_JsonOutputWriter.acknowledgeFlush(flushId, 0); } - - } } - diff --git a/lib/api/CForecastRunner.cc b/lib/api/CForecastRunner.cc index c50228d11b..8274283a66 100644 --- a/lib/api/CForecastRunner.cc +++ b/lib/api/CForecastRunner.cc @@ -14,26 +14,26 @@ #include #include -#include #include +#include #include -namespace ml -{ -namespace api -{ +namespace ml { +namespace api { -namespace -{ +namespace { const std::string EMPTY_STRING; } const std::string CForecastRunner::ERROR_FORECAST_REQUEST_FAILED_TO_PARSE("Failed to parse forecast request: "); const std::string CForecastRunner::ERROR_NO_FORECAST_ID("forecast ID must be specified and non empty"); -const std::string CForecastRunner::ERROR_TOO_MANY_JOBS("Forecast cannot be executed due to queue limit. Please wait for requests to finish and try again"); -const std::string CForecastRunner::ERROR_NO_MODELS("Forecast cannot be executed as model is not yet established. Job requires more time to learn"); -const std::string CForecastRunner::ERROR_NO_DATA_PROCESSED("Forecast cannot be executed as job requires data to have been processed and modeled"); +const std::string CForecastRunner::ERROR_TOO_MANY_JOBS( + "Forecast cannot be executed due to queue limit. Please wait for requests to finish and try again"); +const std::string + CForecastRunner::ERROR_NO_MODELS("Forecast cannot be executed as model is not yet established. Job requires more time to learn"); +const std::string + CForecastRunner::ERROR_NO_DATA_PROCESSED("Forecast cannot be executed as job requires data to have been processed and modeled"); const std::string CForecastRunner::ERROR_NO_CREATE_TIME("Forecast create time must be specified and non zero"); const std::string CForecastRunner::ERROR_BAD_MEMORY_STATUS("Forecast cannot be executed as model memory status is not OK"); const std::string CForecastRunner::ERROR_MEMORY_LIMIT("Forecast cannot be executed as forecast memory usage is predicted to exceed 20MB"); @@ -46,39 +46,36 @@ const std::string CForecastRunner::INFO_DEFAULT_EXPIRY("Forecast expires_in not const std::string CForecastRunner::INFO_NO_MODELS_CAN_CURRENTLY_BE_FORECAST("Insufficient history to forecast for all models"); CForecastRunner::SForecast::SForecast() - :s_ForecastId(), - s_ForecastAlias(), - s_ForecastSeries(), - s_CreateTime(0), - s_StartTime(0), - s_Duration(0), - s_ExpiryTime(0), - s_BoundsPercentile(0), - s_NumberOfModels(0), - s_NumberOfForecastableModels(0), - s_MemoryUsage(0), - s_Messages() -{ + : s_ForecastId(), + s_ForecastAlias(), + s_ForecastSeries(), + s_CreateTime(0), + s_StartTime(0), + s_Duration(0), + s_ExpiryTime(0), + s_BoundsPercentile(0), + s_NumberOfModels(0), + s_NumberOfForecastableModels(0), + s_MemoryUsage(0), + s_Messages() { } -CForecastRunner::SForecast::SForecast(SForecast &&other) - :s_ForecastId(std::move(other.s_ForecastId)), - s_ForecastAlias(std::move(other.s_ForecastAlias)), - s_ForecastSeries(std::move(other.s_ForecastSeries)), - s_CreateTime(other.s_CreateTime), - s_StartTime(other.s_StartTime), - s_Duration(other.s_Duration), - s_ExpiryTime(other.s_ExpiryTime), - s_BoundsPercentile(other.s_BoundsPercentile), - s_NumberOfModels(other.s_NumberOfModels), - s_NumberOfForecastableModels(other.s_NumberOfForecastableModels), - s_MemoryUsage(other.s_MemoryUsage), - s_Messages(other.s_Messages) -{ +CForecastRunner::SForecast::SForecast(SForecast&& other) + : s_ForecastId(std::move(other.s_ForecastId)), + s_ForecastAlias(std::move(other.s_ForecastAlias)), + s_ForecastSeries(std::move(other.s_ForecastSeries)), + s_CreateTime(other.s_CreateTime), + s_StartTime(other.s_StartTime), + s_Duration(other.s_Duration), + s_ExpiryTime(other.s_ExpiryTime), + s_BoundsPercentile(other.s_BoundsPercentile), + s_NumberOfModels(other.s_NumberOfModels), + s_NumberOfForecastableModels(other.s_NumberOfForecastableModels), + s_MemoryUsage(other.s_MemoryUsage), + s_Messages(other.s_Messages) { } -CForecastRunner::SForecast &CForecastRunner::SForecast::operator=(SForecast &&other) -{ +CForecastRunner::SForecast& CForecastRunner::SForecast::operator=(SForecast&& other) { s_ForecastId = std::move(other.s_ForecastId); s_ForecastAlias = std::move(other.s_ForecastAlias); s_ForecastSeries = std::move(other.s_ForecastSeries); @@ -95,14 +92,14 @@ CForecastRunner::SForecast &CForecastRunner::SForecast::operator=(SForecast &&ot return *this; } -CForecastRunner::CForecastRunner(const std::string &jobId, core::CJsonOutputStreamWrapper &strmOut, model::CResourceMonitor &resourceMonitor) - :m_JobId(jobId), m_ConcurrentOutputStream(strmOut), m_ResourceMonitor(resourceMonitor), m_Shutdown(false) -{ +CForecastRunner::CForecastRunner(const std::string& jobId, + core::CJsonOutputStreamWrapper& strmOut, + model::CResourceMonitor& resourceMonitor) + : m_JobId(jobId), m_ConcurrentOutputStream(strmOut), m_ResourceMonitor(resourceMonitor), m_Shutdown(false) { m_Worker = std::thread([this] { this->forecastWorker(); }); } -CForecastRunner::~CForecastRunner() -{ +CForecastRunner::~CForecastRunner() { // shutdown m_Shutdown = true; // signal the worker @@ -113,26 +110,21 @@ CForecastRunner::~CForecastRunner() m_Worker.join(); } -void CForecastRunner::finishForecasts() -{ +void CForecastRunner::finishForecasts() { std::unique_lock lock(m_Mutex); // note: forecast could still be active - while (!m_Shutdown && !m_ForecastJobs.empty()) - { + while (!m_Shutdown && !m_ForecastJobs.empty()) { // items in the queue, wait m_WorkCompleteCondition.wait(lock); } } -void CForecastRunner::forecastWorker() -{ +void CForecastRunner::forecastWorker() { SForecast forecastJob; - while (!m_Shutdown) - { - if (this->tryGetJob(forecastJob)) - { - LOG_INFO("Start forecasting from " << core::CTimeUtils::toIso8601(forecastJob.s_StartTime) - << " to " << core::CTimeUtils::toIso8601(forecastJob.forecastEnd())); + while (!m_Shutdown) { + if (this->tryGetJob(forecastJob)) { + LOG_INFO("Start forecasting from " << core::CTimeUtils::toIso8601(forecastJob.s_StartTime) << " to " + << core::CTimeUtils::toIso8601(forecastJob.forecastEnd())); core::CStopWatch timer(true); uint64_t lastStatsUpdate = 0; @@ -158,18 +150,17 @@ void CForecastRunner::forecastWorker() sink.writeStats(0.0, 0, forecastJob.s_Messages); // while loops allow us to free up memory for every model right after each forecast is done - while (!forecastJob.s_ForecastSeries.empty()) - { - TForecastResultSeries &series = forecastJob.s_ForecastSeries.back(); + while (!forecastJob.s_ForecastSeries.empty()) { + TForecastResultSeries& series = forecastJob.s_ForecastSeries.back(); - while (!series.s_ToForecast.empty()) - { - const TForecastModelWrapper &model = series.s_ToForecast.back(); + while (!series.s_ToForecast.empty()) { + const TForecastModelWrapper& model = series.s_ToForecast.back(); model_t::TDouble1VecDouble1VecPr support = model_t::support(model.s_Feature); bool success = model.s_ForecastModel->forecast(forecastJob.s_StartTime, forecastJob.forecastEnd(), forecastJob.s_BoundsPercentile, - support.first, support.second, + support.first, + support.second, boost::bind(&model::CForecastDataSink::push, &sink, _1, @@ -179,29 +170,25 @@ void CForecastRunner::forecastWorker() series.s_ByFieldName, model.s_ByFieldValue, series.s_DetectorIndex), - message); + message); series.s_ToForecast.pop_back(); - if (success == false) - { + if (success == false) { LOG_DEBUG("Detector " << series.s_DetectorIndex << " failed to forecast"); ++failedForecasts; } - if (message.empty() == false) - { + if (message.empty() == false) { messages.insert("Detector[" + std::to_string(series.s_DetectorIndex) + "]: " + message); message.clear(); } ++processedModels; - if (processedModels != totalNumberOfForecastableModels) - { + if (processedModels != totalNumberOfForecastableModels) { uint64_t elapsedTime = timer.lap(); - if (elapsedTime - lastStatsUpdate > MINIMUM_TIME_ELAPSED_FOR_STATS_UPDATE) - { - sink.writeStats(processedModels/totalNumberOfForecastableModels, elapsedTime, forecastJob.s_Messages); + if (elapsedTime - lastStatsUpdate > MINIMUM_TIME_ELAPSED_FOR_STATS_UPDATE) { + sink.writeStats(processedModels / totalNumberOfForecastableModels, elapsedTime, forecastJob.s_Messages); lastStatsUpdate = elapsedTime; } } @@ -224,27 +211,23 @@ void CForecastRunner::forecastWorker() this->deleteAllForecastJobs(); } -void CForecastRunner::deleteAllForecastJobs() -{ +void CForecastRunner::deleteAllForecastJobs() { std::unique_lock lock(m_Mutex); m_ForecastJobs.clear(); m_WorkAvailableCondition.notify_all(); } -bool CForecastRunner::tryGetJob(SForecast &forecastJob) -{ +bool CForecastRunner::tryGetJob(SForecast& forecastJob) { std::unique_lock lock(m_Mutex); - if (!m_ForecastJobs.empty()) - { + if (!m_ForecastJobs.empty()) { std::swap(forecastJob, m_ForecastJobs.front()); m_ForecastJobs.pop_front(); return true; } // m_Shutdown might have been set meanwhile - if (m_Shutdown) - { + if (m_Shutdown) { return false; } @@ -252,39 +235,29 @@ bool CForecastRunner::tryGetJob(SForecast &forecastJob) return false; } -bool CForecastRunner::pushForecastJob(const std::string &controlMessage, - const TAnomalyDetectorPtrVec &detectors, - const core_t::TTime lastResultsTime) -{ +bool CForecastRunner::pushForecastJob(const std::string& controlMessage, + const TAnomalyDetectorPtrVec& detectors, + const core_t::TTime lastResultsTime) { SForecast forecastJob; - if (parseAndValidateForecastRequest(controlMessage, - forecastJob, - lastResultsTime, - boost::bind(&CForecastRunner::sendErrorMessage, - this, - _1, - _2)) == false) - { + if (parseAndValidateForecastRequest( + controlMessage, forecastJob, lastResultsTime, boost::bind(&CForecastRunner::sendErrorMessage, this, _1, _2)) == false) { return false; } - if (m_ResourceMonitor.getMemoryStatus() != model_t::E_MemoryStatusOk) - { + if (m_ResourceMonitor.getMemoryStatus() != model_t::E_MemoryStatusOk) { this->sendErrorMessage(forecastJob, ERROR_BAD_MEMORY_STATUS); return false; } size_t totalNumberOfModels = 0; size_t totalNumberOfForecastModels = 0; - bool atLeastOneNonPopulationModel = false; - bool atLeastOneSupportedFunction = false; + bool atLeastOneNonPopulationModel = false; + bool atLeastOneSupportedFunction = false; size_t totalMemoryUsage = 0; // 1st loop over the detectors to check prerequisites - for (const auto &detector : detectors) - { - if (detector.get() == nullptr) - { + for (const auto& detector : detectors) { + if (detector.get() == nullptr) { LOG_ERROR("Unexpected empty detector found"); continue; } @@ -297,28 +270,24 @@ bool CForecastRunner::pushForecastJob(const std::string &controlMessage, atLeastOneSupportedFunction = atLeastOneSupportedFunction || prerequisites.s_IsSupportedFunction; totalMemoryUsage += prerequisites.s_MemoryUsageForDetector; - if (totalMemoryUsage >= MAX_FORECAST_MODEL_MEMORY) - { + if (totalMemoryUsage >= MAX_FORECAST_MODEL_MEMORY) { // note: for now MAX_FORECAST_MODEL_MEMORY is a static limit, a user can not change it this->sendErrorMessage(forecastJob, ERROR_MEMORY_LIMIT); return false; } } - if (atLeastOneNonPopulationModel == false) - { + if (atLeastOneNonPopulationModel == false) { this->sendErrorMessage(forecastJob, ERROR_NOT_SUPPORTED_FOR_POPULATION_MODELS); return false; } - if (atLeastOneSupportedFunction == false) - { + if (atLeastOneSupportedFunction == false) { this->sendErrorMessage(forecastJob, ERROR_NO_SUPPORTED_FUNCTIONS); return false; } - if (totalNumberOfForecastModels == 0) - { + if (totalNumberOfForecastModels == 0) { this->sendFinalMessage(forecastJob, INFO_NO_MODELS_CAN_CURRENTLY_BE_FORECAST); return false; } @@ -333,10 +302,8 @@ bool CForecastRunner::pushForecastJob(const std::string &controlMessage, // 2nd loop over the detectors to clone models for forecasting TForecastResultSeriesVec s; - for (const auto &detector : detectors) - { - if (detector.get() == nullptr) - { + for (const auto& detector : detectors) { + if (detector.get() == nullptr) { LOG_ERROR("Unexpected empty detector found"); continue; } @@ -347,18 +314,15 @@ bool CForecastRunner::pushForecastJob(const std::string &controlMessage, return this->push(forecastJob); } -bool CForecastRunner::push(SForecast &forecastJob) -{ +bool CForecastRunner::push(SForecast& forecastJob) { std::unique_lock lock(m_Mutex); - if (m_ForecastJobs.size() == MAX_FORECAST_JOBS_IN_QUEUE) - { + if (m_ForecastJobs.size() == MAX_FORECAST_JOBS_IN_QUEUE) { this->sendErrorMessage(forecastJob, ERROR_TOO_MANY_JOBS); return false; } - if (forecastJob.s_NumberOfModels == 0) - { + if (forecastJob.s_NumberOfModels == 0) { this->sendErrorMessage(forecastJob, ERROR_NO_MODELS); return false; } @@ -370,18 +334,16 @@ bool CForecastRunner::push(SForecast &forecastJob) return true; } -bool CForecastRunner::parseAndValidateForecastRequest(const std::string &controlMessage, - SForecast &forecastJob, +bool CForecastRunner::parseAndValidateForecastRequest(const std::string& controlMessage, + SForecast& forecastJob, const core_t::TTime lastResultsTime, - const TErrorFunc &errorFunction) -{ + const TErrorFunc& errorFunction) { std::istringstream stringStream(controlMessage.substr(1)); forecastJob.s_StartTime = lastResultsTime; core_t::TTime expiresIn = 0l; boost::property_tree::ptree properties; - try - { + try { boost::property_tree::read_json(stringStream, properties); forecastJob.s_ForecastId = properties.get("forecast_id", EMPTY_STRING); @@ -394,28 +356,23 @@ bool CForecastRunner::parseAndValidateForecastRequest(const std::string &control // note: this is not exposed on x-pack side forecastJob.s_BoundsPercentile = properties.get("boundspercentile", 95.0); - } - catch (const std::exception &e) - { + } catch (const std::exception& e) { LOG_ERROR(ERROR_FORECAST_REQUEST_FAILED_TO_PARSE << e.what()); return false; } - if (forecastJob.s_ForecastId.empty()) - { + if (forecastJob.s_ForecastId.empty()) { LOG_ERROR(ERROR_NO_FORECAST_ID); return false; } // from now we have a forecast ID and can send error messages - if (lastResultsTime == 0l) - { + if (lastResultsTime == 0l) { errorFunction(forecastJob, ERROR_NO_DATA_PROCESSED); return false; } - if (forecastJob.s_CreateTime == 0) - { + if (forecastJob.s_CreateTime == 0) { errorFunction(forecastJob, ERROR_NO_CREATE_TIME); return false; } @@ -426,28 +383,23 @@ bool CForecastRunner::parseAndValidateForecastRequest(const std::string &control // if you change this value, also change the log string // todo: refactor validation out from here core_t::TTime maxDuration = 8 * core::constants::WEEK; - if (forecastJob.s_Duration > maxDuration) - { - LOG_INFO(WARNING_DURATION_LIMIT); - forecastJob.s_Messages.insert(WARNING_DURATION_LIMIT); - forecastJob.s_Duration = maxDuration; + if (forecastJob.s_Duration > maxDuration) { + LOG_INFO(WARNING_DURATION_LIMIT); + forecastJob.s_Messages.insert(WARNING_DURATION_LIMIT); + forecastJob.s_Duration = maxDuration; } - if (forecastJob.s_Duration == 0) - { + if (forecastJob.s_Duration == 0) { // only log forecastJob.s_Duration = core::constants::DAY; LOG_INFO(INFO_DEFAULT_DURATION); } - if (expiresIn < -1l) - { + if (expiresIn < -1l) { // only log expiresIn = DEFAULT_EXPIRY_TIME; LOG_INFO(WARNING_INVALID_EXPIRY); - } - else if (expiresIn == -1l) - { + } else if (expiresIn == -1l) { // only log expiresIn = DEFAULT_EXPIRY_TIME; LOG_DEBUG(INFO_DEFAULT_EXPIRY); @@ -455,11 +407,10 @@ bool CForecastRunner::parseAndValidateForecastRequest(const std::string &control forecastJob.s_ExpiryTime = forecastJob.s_CreateTime + expiresIn; - return true; + return true; } -void CForecastRunner::sendScheduledMessage(const SForecast &forecastJob) const -{ +void CForecastRunner::sendScheduledMessage(const SForecast& forecastJob) const { LOG_DEBUG("job passed forecast validation, scheduled for forecasting"); model::CForecastDataSink sink(m_JobId, forecastJob.s_ForecastId, @@ -473,20 +424,17 @@ void CForecastRunner::sendScheduledMessage(const SForecast &forecastJob) const sink.writeScheduledMessage(); } -void CForecastRunner::sendErrorMessage(const SForecast &forecastJob, const std::string &message) const -{ +void CForecastRunner::sendErrorMessage(const SForecast& forecastJob, const std::string& message) const { LOG_ERROR(message); this->sendMessage(&model::CForecastDataSink::writeErrorMessage, forecastJob, message); } -void CForecastRunner::sendFinalMessage(const SForecast &forecastJob, const std::string &message) const -{ +void CForecastRunner::sendFinalMessage(const SForecast& forecastJob, const std::string& message) const { this->sendMessage(&model::CForecastDataSink::writeFinalMessage, forecastJob, message); } template -void CForecastRunner::sendMessage(WRITE write, const SForecast &forecastJob, const std::string &message) const -{ +void CForecastRunner::sendMessage(WRITE write, const SForecast& forecastJob, const std::string& message) const { model::CForecastDataSink sink(m_JobId, forecastJob.s_ForecastId, forecastJob.s_ForecastAlias, @@ -500,16 +448,13 @@ void CForecastRunner::sendMessage(WRITE write, const SForecast &forecastJob, con (sink.*write)(message); } -void CForecastRunner::SForecast::reset() -{ +void CForecastRunner::SForecast::reset() { // clean up all non-simple types s_ForecastSeries.clear(); } -core_t::TTime CForecastRunner::SForecast::forecastEnd() const -{ +core_t::TTime CForecastRunner::SForecast::forecastEnd() const { return s_StartTime + s_Duration; } - } } diff --git a/lib/api/CHierarchicalResultsWriter.cc b/lib/api/CHierarchicalResultsWriter.cc index 8dade91da1..2f439b4e08 100644 --- a/lib/api/CHierarchicalResultsWriter.cc +++ b/lib/api/CHierarchicalResultsWriter.cc @@ -12,13 +12,10 @@ #include -namespace ml -{ -namespace api -{ +namespace ml { +namespace api { -namespace -{ +namespace { using TOptionalDouble = boost::optional; using TOptionalUInt64 = boost::optional; using TDouble1Vec = core::CSmallVector; @@ -28,29 +25,29 @@ const CHierarchicalResultsWriter::TStr1Vec EMPTY_STRING_LIST; } CHierarchicalResultsWriter::SResults::SResults(bool isAllTimeResult, - bool isOverallResult, - const std::string &partitionFieldName, - const std::string &partitionFieldValue, - const std::string &overFieldName, - const std::string &overFieldValue, - const std::string &byFieldName, - const std::string &byFieldValue, - const std::string &correlatedByFieldValue, - core_t::TTime bucketStartTime, - const std::string &functionName, - const std::string &functionDescription, - const TDouble1Vec &functionValue, - const TDouble1Vec &populationAverage, - double rawAnomalyScore, - double normalizedAnomalyScore, - double probability, - const TOptionalUInt64 ¤tRate, - const std::string &metricValueField, - const TStoredStringPtrStoredStringPtrPrDoublePrVec &influences, - bool useNull, - bool metric, - int identifier, - core_t::TTime bucketSpan) + bool isOverallResult, + const std::string& partitionFieldName, + const std::string& partitionFieldValue, + const std::string& overFieldName, + const std::string& overFieldValue, + const std::string& byFieldName, + const std::string& byFieldValue, + const std::string& correlatedByFieldValue, + core_t::TTime bucketStartTime, + const std::string& functionName, + const std::string& functionDescription, + const TDouble1Vec& functionValue, + const TDouble1Vec& populationAverage, + double rawAnomalyScore, + double normalizedAnomalyScore, + double probability, + const TOptionalUInt64& currentRate, + const std::string& metricValueField, + const TStoredStringPtrStoredStringPtrPrDoublePrVec& influences, + bool useNull, + bool metric, + int identifier, + core_t::TTime bucketSpan) : s_ResultType(E_PopulationResult), s_IsAllTimeResult(isAllTimeResult), s_IsOverallResult(isOverallResult), @@ -78,33 +75,32 @@ CHierarchicalResultsWriter::SResults::SResults(bool isAllTimeResult, s_NormalizedAnomalyScore(normalizedAnomalyScore), s_Probability(probability), s_Influences(influences), - s_Identifier(identifier) -{ + s_Identifier(identifier) { } CHierarchicalResultsWriter::SResults::SResults(EResultType resultType, - const std::string &partitionFieldName, - const std::string &partitionFieldValue, - const std::string &byFieldName, - const std::string &byFieldValue, - const std::string &correlatedByFieldValue, - core_t::TTime bucketStartTime, - const std::string &functionName, - const std::string &functionDescription, - const TOptionalDouble &baselineRate, - const TOptionalUInt64 ¤tRate, - const TDouble1Vec &baselineMean, - const TDouble1Vec ¤tMean, - double rawAnomalyScore, - double normalizedAnomalyScore, - double probability, - const std::string &metricValueField, - const TStoredStringPtrStoredStringPtrPrDoublePrVec &influences, - bool useNull, - bool metric, - int identifier, - core_t::TTime bucketSpan, - TStr1Vec scheduledEventDescriptions) + const std::string& partitionFieldName, + const std::string& partitionFieldValue, + const std::string& byFieldName, + const std::string& byFieldValue, + const std::string& correlatedByFieldValue, + core_t::TTime bucketStartTime, + const std::string& functionName, + const std::string& functionDescription, + const TOptionalDouble& baselineRate, + const TOptionalUInt64& currentRate, + const TDouble1Vec& baselineMean, + const TDouble1Vec& currentMean, + double rawAnomalyScore, + double normalizedAnomalyScore, + double probability, + const std::string& metricValueField, + const TStoredStringPtrStoredStringPtrPrDoublePrVec& influences, + bool useNull, + bool metric, + int identifier, + core_t::TTime bucketSpan, + TStr1Vec scheduledEventDescriptions) : s_ResultType(resultType), s_IsAllTimeResult(false), s_IsOverallResult(true), @@ -135,32 +131,24 @@ CHierarchicalResultsWriter::SResults::SResults(EResultType resultType, s_Probability(probability), s_Influences(influences), s_Identifier(identifier), - s_ScheduledEventDescriptions(scheduledEventDescriptions) -{ + s_ScheduledEventDescriptions(scheduledEventDescriptions) { } -CHierarchicalResultsWriter::CHierarchicalResultsWriter(const model::CLimits &limits, - const model::CAnomalyDetectorModelConfig &modelConfig, - const TResultWriterFunc &resultWriterFunc, - const TPivotWriterFunc &pivotWriterFunc) : - m_Limits(limits), - m_ModelConfig(modelConfig), - m_ResultWriterFunc(resultWriterFunc), - m_PivotWriterFunc(pivotWriterFunc), - m_BucketTime(0) -{ +CHierarchicalResultsWriter::CHierarchicalResultsWriter(const model::CLimits& limits, + const model::CAnomalyDetectorModelConfig& modelConfig, + const TResultWriterFunc& resultWriterFunc, + const TPivotWriterFunc& pivotWriterFunc) + : m_Limits(limits), + m_ModelConfig(modelConfig), + m_ResultWriterFunc(resultWriterFunc), + m_PivotWriterFunc(pivotWriterFunc), + m_BucketTime(0) { } -void CHierarchicalResultsWriter::visit(const model::CHierarchicalResults &results, - const TNode &node, - bool pivot) -{ - if (pivot) - { +void CHierarchicalResultsWriter::visit(const model::CHierarchicalResults& results, const TNode& node, bool pivot) { + if (pivot) { this->writePivotResult(results, node); - } - else - { + } else { this->writePopulationResult(results, node); this->writeIndividualResult(results, node); this->writePartitionResult(results, node); @@ -168,14 +156,9 @@ void CHierarchicalResultsWriter::visit(const model::CHierarchicalResults &result } } -void CHierarchicalResultsWriter::writePopulationResult(const model::CHierarchicalResults &results, - const TNode &node) -{ - if ( this->isSimpleCount(node) - || !this->isLeaf(node) - || !this->isPopulation(node) - || !this->shouldWriteResult(m_Limits, results, node, false)) - { +void CHierarchicalResultsWriter::writePopulationResult(const model::CHierarchicalResults& results, const TNode& node) { + if (this->isSimpleCount(node) || !this->isLeaf(node) || !this->isPopulation(node) || + !this->shouldWriteResult(m_Limits, results, node, false)) { return; } @@ -185,14 +168,14 @@ void CHierarchicalResultsWriter::writePopulationResult(const model::CHierarchica // The attribute probabilities are returned in sorted order. This // is used to set the human readable description of the anomaly in // the GUI. - const std::string &functionDescription = node.s_AnnotatedProbability.s_AttributeProbabilities.empty() ? - EMPTY_STRING : model_t::outputFunctionName(node.s_AnnotatedProbability.s_AttributeProbabilities[0].s_Feature); + const std::string& functionDescription = + node.s_AnnotatedProbability.s_AttributeProbabilities.empty() + ? EMPTY_STRING + : model_t::outputFunctionName(node.s_AnnotatedProbability.s_AttributeProbabilities[0].s_Feature); TOptionalDouble null; - for (std::size_t i = 0; i < node.s_AnnotatedProbability.s_AttributeProbabilities.size(); ++i) - { - const model::SAttributeProbability &attributeProbability = - node.s_AnnotatedProbability.s_AttributeProbabilities[i]; + for (std::size_t i = 0; i < node.s_AnnotatedProbability.s_AttributeProbabilities.size(); ++i) { + const model::SAttributeProbability& attributeProbability = node.s_AnnotatedProbability.s_AttributeProbabilities[i]; // TODO - At present the display code can only cope with all the // attribute rows having the same output function name as the @@ -207,29 +190,26 @@ void CHierarchicalResultsWriter::writePopulationResult(const model::CHierarchica // it can't handle output from multiple different keys. This // needs to change at some point. model_t::EFeature feature = attributeProbability.s_Feature; - if (functionDescription != model_t::outputFunctionName(feature)) - { + if (functionDescription != model_t::outputFunctionName(feature)) { continue; } - const std::string &attribute = *attributeProbability.s_Attribute; - const TDouble1Vec &personAttributeValue = attributeProbability.s_CurrentBucketValue; - if (personAttributeValue.empty()) - { + const std::string& attribute = *attributeProbability.s_Attribute; + const TDouble1Vec& personAttributeValue = attributeProbability.s_CurrentBucketValue; + if (personAttributeValue.empty()) { LOG_ERROR("Failed to get current bucket value for " << attribute); continue; } - const TDouble1Vec &attributeMean = attributeProbability.s_BaselineBucketMean; - if (attributeMean.empty()) - { + const TDouble1Vec& attributeMean = attributeProbability.s_BaselineBucketMean; + if (attributeMean.empty()) { LOG_ERROR("Failed to get population mean for " << attribute); continue; } m_ResultWriterFunc( TResults(false, - false, // not an overall result + false, // not an overall result *node.s_Spec.s_PartitionFieldName, *node.s_Spec.s_PartitionFieldValue, *node.s_Spec.s_PersonFieldName, @@ -251,36 +231,34 @@ void CHierarchicalResultsWriter::writePopulationResult(const model::CHierarchica node.s_Spec.s_UseNull, model::function_t::isMetric(node.s_Spec.s_Function), node.s_Spec.s_Detector, - node.s_BucketLength) - ); + node.s_BucketLength)); } // Overall result for this person - m_ResultWriterFunc( - TResults(false, - true, // this is an overall result - *node.s_Spec.s_PartitionFieldName, - *node.s_Spec.s_PartitionFieldValue, - *node.s_Spec.s_PersonFieldName, - *node.s_Spec.s_PersonFieldValue, - *node.s_Spec.s_ByFieldName, - EMPTY_STRING, - EMPTY_STRING, - node.s_BucketStartTime, - *node.s_Spec.s_FunctionName, - functionDescription, - TDouble1Vec(1, 0.0), // no function value in overall result - TDouble1Vec(1, 0.0), // no population average in overall result - node.s_RawAnomalyScore, - node.s_NormalizedAnomalyScore, - node.probability(), - node.s_AnnotatedProbability.s_CurrentBucketCount, - *node.s_Spec.s_ValueFieldName, - node.s_AnnotatedProbability.s_Influences, - node.s_Spec.s_UseNull, - model::function_t::isMetric(node.s_Spec.s_Function), - node.s_Spec.s_Detector, - node.s_BucketLength)); + m_ResultWriterFunc(TResults(false, + true, // this is an overall result + *node.s_Spec.s_PartitionFieldName, + *node.s_Spec.s_PartitionFieldValue, + *node.s_Spec.s_PersonFieldName, + *node.s_Spec.s_PersonFieldValue, + *node.s_Spec.s_ByFieldName, + EMPTY_STRING, + EMPTY_STRING, + node.s_BucketStartTime, + *node.s_Spec.s_FunctionName, + functionDescription, + TDouble1Vec(1, 0.0), // no function value in overall result + TDouble1Vec(1, 0.0), // no population average in overall result + node.s_RawAnomalyScore, + node.s_NormalizedAnomalyScore, + node.probability(), + node.s_AnnotatedProbability.s_CurrentBucketCount, + *node.s_Spec.s_ValueFieldName, + node.s_AnnotatedProbability.s_Influences, + node.s_Spec.s_UseNull, + model::function_t::isMetric(node.s_Spec.s_Function), + node.s_Spec.s_Detector, + node.s_BucketLength)); // TODO - could also output "all time" results here // These would have the first argument to the SResults constructor @@ -289,23 +267,17 @@ void CHierarchicalResultsWriter::writePopulationResult(const model::CHierarchica // results) } -void CHierarchicalResultsWriter::writeIndividualResult(const model::CHierarchicalResults &results, - const TNode &node) -{ - if ( this->isSimpleCount(node) - || !this->isLeaf(node) - || this->isPopulation(node) - || !this->shouldWriteResult(m_Limits, results, node, false)) - { +void CHierarchicalResultsWriter::writeIndividualResult(const model::CHierarchicalResults& results, const TNode& node) { + if (this->isSimpleCount(node) || !this->isLeaf(node) || this->isPopulation(node) || + !this->shouldWriteResult(m_Limits, results, node, false)) { return; } - model_t::EFeature feature = node.s_AnnotatedProbability.s_AttributeProbabilities.empty() ? - model_t::E_IndividualCountByBucketAndPerson : - node.s_AnnotatedProbability.s_AttributeProbabilities[0].s_Feature; + model_t::EFeature feature = node.s_AnnotatedProbability.s_AttributeProbabilities.empty() + ? model_t::E_IndividualCountByBucketAndPerson + : node.s_AnnotatedProbability.s_AttributeProbabilities[0].s_Feature; - const model::SAttributeProbability &attributeProbability = - node.s_AnnotatedProbability.s_AttributeProbabilities[0]; + const model::SAttributeProbability& attributeProbability = node.s_AnnotatedProbability.s_AttributeProbabilities[0]; m_ResultWriterFunc( TResults(E_Result, @@ -330,117 +302,94 @@ void CHierarchicalResultsWriter::writeIndividualResult(const model::CHierarchica model::function_t::isMetric(node.s_Spec.s_Function), node.s_Spec.s_Detector, node.s_BucketLength, - EMPTY_STRING_LIST) - ); + EMPTY_STRING_LIST)); } -void CHierarchicalResultsWriter::writePartitionResult(const model::CHierarchicalResults &results, - const TNode &node) -{ - if ( !m_ModelConfig.perPartitionNormalization() - || this->isSimpleCount(node) - || this->isPopulation(node) - || !this->isPartition(node) - || !this->shouldWriteResult(m_Limits, results, node, false) - ) - { +void CHierarchicalResultsWriter::writePartitionResult(const model::CHierarchicalResults& results, const TNode& node) { + if (!m_ModelConfig.perPartitionNormalization() || this->isSimpleCount(node) || this->isPopulation(node) || !this->isPartition(node) || + !this->shouldWriteResult(m_Limits, results, node, false)) { return; } - model_t::EFeature feature = node.s_AnnotatedProbability.s_AttributeProbabilities.empty() ? - model_t::E_IndividualCountByBucketAndPerson : - node.s_AnnotatedProbability.s_AttributeProbabilities[0].s_Feature; + model_t::EFeature feature = node.s_AnnotatedProbability.s_AttributeProbabilities.empty() + ? model_t::E_IndividualCountByBucketAndPerson + : node.s_AnnotatedProbability.s_AttributeProbabilities[0].s_Feature; TDouble1Vec emptyDoubleVec; - m_ResultWriterFunc( - TResults(E_PartitionResult, - *node.s_Spec.s_PartitionFieldName, - *node.s_Spec.s_PartitionFieldValue, - *node.s_Spec.s_ByFieldName, - *node.s_Spec.s_PersonFieldValue, - EMPTY_STRING, - node.s_BucketStartTime, - *node.s_Spec.s_FunctionName, - model_t::outputFunctionName(feature), - node.s_AnnotatedProbability.s_BaselineBucketCount, - node.s_AnnotatedProbability.s_CurrentBucketCount, - emptyDoubleVec, - emptyDoubleVec, - node.s_RawAnomalyScore, - node.s_NormalizedAnomalyScore, - node.probability(), - *node.s_Spec.s_ValueFieldName, - node.s_AnnotatedProbability.s_Influences, - node.s_Spec.s_UseNull, - model::function_t::isMetric(node.s_Spec.s_Function), - node.s_Spec.s_Detector, - node.s_BucketLength, - EMPTY_STRING_LIST) - ); + m_ResultWriterFunc(TResults(E_PartitionResult, + *node.s_Spec.s_PartitionFieldName, + *node.s_Spec.s_PartitionFieldValue, + *node.s_Spec.s_ByFieldName, + *node.s_Spec.s_PersonFieldValue, + EMPTY_STRING, + node.s_BucketStartTime, + *node.s_Spec.s_FunctionName, + model_t::outputFunctionName(feature), + node.s_AnnotatedProbability.s_BaselineBucketCount, + node.s_AnnotatedProbability.s_CurrentBucketCount, + emptyDoubleVec, + emptyDoubleVec, + node.s_RawAnomalyScore, + node.s_NormalizedAnomalyScore, + node.probability(), + *node.s_Spec.s_ValueFieldName, + node.s_AnnotatedProbability.s_Influences, + node.s_Spec.s_UseNull, + model::function_t::isMetric(node.s_Spec.s_Function), + node.s_Spec.s_Detector, + node.s_BucketLength, + EMPTY_STRING_LIST)); } -void CHierarchicalResultsWriter::writePivotResult(const model::CHierarchicalResults &results, - const TNode &node) -{ - if ( this->isSimpleCount(node) - || !this->shouldWriteResult(m_Limits, results, node, true)) - { +void CHierarchicalResultsWriter::writePivotResult(const model::CHierarchicalResults& results, const TNode& node) { + if (this->isSimpleCount(node) || !this->shouldWriteResult(m_Limits, results, node, true)) { return; } LOG_TRACE("bucket start time " << m_BucketTime); - if (!m_PivotWriterFunc(m_BucketTime, node, this->isRoot(node))) - { + if (!m_PivotWriterFunc(m_BucketTime, node, this->isRoot(node))) { LOG_ERROR("Failed to write influencer result for " << node.s_Spec.print()); return; } } -void CHierarchicalResultsWriter::writeSimpleCountResult(const TNode &node) -{ - if (!this->isSimpleCount(node)) - { +void CHierarchicalResultsWriter::writeSimpleCountResult(const TNode& node) { + if (!this->isSimpleCount(node)) { return; } m_BucketTime = node.s_BucketStartTime; TOptionalDouble baselineCount = node.s_AnnotatedProbability.s_BaselineBucketCount; - TOptionalUInt64 currentCount = node.s_AnnotatedProbability.s_CurrentBucketCount; - - m_ResultWriterFunc( - TResults(E_SimpleCountResult, - *node.s_Spec.s_PartitionFieldName, - *node.s_Spec.s_PartitionFieldValue, - *node.s_Spec.s_ByFieldName, - *node.s_Spec.s_PersonFieldValue, - EMPTY_STRING, - m_BucketTime, - EMPTY_STRING, - EMPTY_STRING, - baselineCount, - currentCount, - baselineCount ? TDouble1Vec(1, *baselineCount) : TDouble1Vec(), - currentCount ? TDouble1Vec(1, static_cast(*currentCount)) : TDouble1Vec(), - node.s_RawAnomalyScore, - node.s_NormalizedAnomalyScore, - node.probability(), - *node.s_Spec.s_ValueFieldName , - node.s_AnnotatedProbability.s_Influences, - node.s_Spec.s_UseNull, - model::function_t::isMetric(node.s_Spec.s_Function), - node.s_Spec.s_Detector, - node.s_BucketLength, - node.s_Spec.s_ScheduledEventDescriptions) - ); - + TOptionalUInt64 currentCount = node.s_AnnotatedProbability.s_CurrentBucketCount; + + m_ResultWriterFunc(TResults(E_SimpleCountResult, + *node.s_Spec.s_PartitionFieldName, + *node.s_Spec.s_PartitionFieldValue, + *node.s_Spec.s_ByFieldName, + *node.s_Spec.s_PersonFieldValue, + EMPTY_STRING, + m_BucketTime, + EMPTY_STRING, + EMPTY_STRING, + baselineCount, + currentCount, + baselineCount ? TDouble1Vec(1, *baselineCount) : TDouble1Vec(), + currentCount ? TDouble1Vec(1, static_cast(*currentCount)) : TDouble1Vec(), + node.s_RawAnomalyScore, + node.s_NormalizedAnomalyScore, + node.probability(), + *node.s_Spec.s_ValueFieldName, + node.s_AnnotatedProbability.s_Influences, + node.s_Spec.s_UseNull, + model::function_t::isMetric(node.s_Spec.s_Function), + node.s_Spec.s_Detector, + node.s_BucketLength, + node.s_Spec.s_ScheduledEventDescriptions)); } -void CHierarchicalResultsWriter::findParentProbabilities(const TNode &node, - double &personProbability, - double &partitionProbability) -{ +void CHierarchicalResultsWriter::findParentProbabilities(const TNode& node, double& personProbability, double& partitionProbability) { // The idea is that if person doesn't exist then the person probability is // set to the leaf probability, and if partition doesn't exist then the // partition probability is set to the person probability (or if person @@ -449,24 +398,18 @@ void CHierarchicalResultsWriter::findParentProbabilities(const TNode &node, personProbability = node.probability(); partitionProbability = node.probability(); - for (const TNode *parent = node.s_Parent; - parent != 0; - parent = parent->s_Parent) - { - if (CHierarchicalResultsWriter::isPartition(*parent)) - { + for (const TNode* parent = node.s_Parent; parent != 0; parent = parent->s_Parent) { + if (CHierarchicalResultsWriter::isPartition(*parent)) { partitionProbability = parent->probability(); // This makes the assumption that partition will be higher than // person in the hierarchy break; } - if (CHierarchicalResultsWriter::isPerson(*parent)) - { + if (CHierarchicalResultsWriter::isPerson(*parent)) { personProbability = parent->probability(); partitionProbability = parent->probability(); } } } - } } diff --git a/lib/api/CInputParser.cc b/lib/api/CInputParser.cc index 15a5383d55..5d90e9fe0d 100644 --- a/lib/api/CInputParser.cc +++ b/lib/api/CInputParser.cc @@ -5,54 +5,37 @@ */ #include +namespace ml { +namespace api { -namespace ml -{ -namespace api -{ - - -CInputParser::CInputParser() - : m_GotFieldNames(false), - m_GotData(false) -{ +CInputParser::CInputParser() : m_GotFieldNames(false), m_GotData(false) { } -CInputParser::~CInputParser() -{ +CInputParser::~CInputParser() { } -bool CInputParser::gotFieldNames() const -{ +bool CInputParser::gotFieldNames() const { return m_GotFieldNames; } -bool CInputParser::gotData() const -{ +bool CInputParser::gotData() const { return m_GotData; } -const CInputParser::TStrVec &CInputParser::fieldNames() const -{ +const CInputParser::TStrVec& CInputParser::fieldNames() const { return m_FieldNames; } -void CInputParser::gotFieldNames(bool gotFieldNames) -{ +void CInputParser::gotFieldNames(bool gotFieldNames) { m_GotFieldNames = gotFieldNames; } -void CInputParser::gotData(bool gotData) -{ +void CInputParser::gotData(bool gotData) { m_GotData = gotData; } -CInputParser::TStrVec &CInputParser::fieldNames() -{ +CInputParser::TStrVec& CInputParser::fieldNames() { return m_FieldNames; } - - } } - diff --git a/lib/api/CIoManager.cc b/lib/api/CIoManager.cc index c79fb1d3b3..b477bb5136 100644 --- a/lib/api/CIoManager.cc +++ b/lib/api/CIoManager.cc @@ -11,62 +11,47 @@ #include #include +namespace ml { +namespace api { -namespace ml -{ -namespace api -{ +namespace { -namespace -{ - -bool setUpIStream(const std::string &fileName, - bool isFileNamedPipe, - core::CNamedPipeFactory::TIStreamP &stream) -{ - if (fileName.empty()) - { +bool setUpIStream(const std::string& fileName, bool isFileNamedPipe, core::CNamedPipeFactory::TIStreamP& stream) { + if (fileName.empty()) { stream.reset(); return true; } - if (isFileNamedPipe) - { + if (isFileNamedPipe) { stream = core::CNamedPipeFactory::openPipeStreamRead(fileName); return stream != 0 && !stream->bad(); } - std::ifstream *fileStream(0); + std::ifstream* fileStream(0); stream.reset(fileStream = new std::ifstream(fileName.c_str())); return fileStream->is_open(); } -bool setUpOStream(const std::string &fileName, - bool isFileNamedPipe, - core::CNamedPipeFactory::TOStreamP &stream) -{ - if (fileName.empty()) - { +bool setUpOStream(const std::string& fileName, bool isFileNamedPipe, core::CNamedPipeFactory::TOStreamP& stream) { + if (fileName.empty()) { stream.reset(); return true; } - if (isFileNamedPipe) - { + if (isFileNamedPipe) { stream = core::CNamedPipeFactory::openPipeStreamWrite(fileName); return stream != 0 && !stream->bad(); } - std::ofstream *fileStream(0); + std::ofstream* fileStream(0); stream.reset(fileStream = new std::ofstream(fileName.c_str())); return fileStream->is_open(); } - } -CIoManager::CIoManager(const std::string &inputFileName, +CIoManager::CIoManager(const std::string& inputFileName, bool isInputFileNamedPipe, - const std::string &outputFileName, + const std::string& outputFileName, bool isOutputFileNamedPipe, - const std::string &restoreFileName, + const std::string& restoreFileName, bool isRestoreFileNamedPipe, - const std::string &persistFileName, + const std::string& persistFileName, bool isPersistFileNamedPipe) : m_IoInitialised(false), m_InputFileName(inputFileName), @@ -76,13 +61,11 @@ CIoManager::CIoManager(const std::string &inputFileName, m_RestoreFileName(restoreFileName), m_IsRestoreFileNamedPipe(isRestoreFileNamedPipe && !restoreFileName.empty()), m_PersistFileName(persistFileName), - m_IsPersistFileNamedPipe(isPersistFileNamedPipe && !persistFileName.empty()) -{ + m_IsPersistFileNamedPipe(isPersistFileNamedPipe && !persistFileName.empty()) { // On some platforms input/output can be considerably faster if C and C++ IO // functionality is NOT synchronised. bool wasSynchronised(std::ios::sync_with_stdio(false)); - if (wasSynchronised) - { + if (wasSynchronised) { LOG_TRACE("C++ streams no longer synchronised with C stdio"); } @@ -93,12 +76,10 @@ CIoManager::CIoManager(const std::string &inputFileName, std::cerr.tie(0); } -CIoManager::~CIoManager() -{ +CIoManager::~CIoManager() { } -bool CIoManager::initIo() -{ +bool CIoManager::initIo() { m_IoInitialised = setUpIStream(m_InputFileName, m_IsInputFileNamedPipe, m_InputStream) && setUpOStream(m_OutputFileName, m_IsOutputFileNamedPipe, m_OutputStream) && setUpIStream(m_RestoreFileName, m_IsRestoreFileNamedPipe, m_RestoreStream) && @@ -106,57 +87,44 @@ bool CIoManager::initIo() return m_IoInitialised; } -std::istream &CIoManager::inputStream() -{ - if (m_InputStream != 0) - { +std::istream& CIoManager::inputStream() { + if (m_InputStream != 0) { return *m_InputStream; } - if (!m_IoInitialised) - { + if (!m_IoInitialised) { LOG_ERROR("Accessing input stream before IO is initialised"); } return std::cin; } -std::ostream &CIoManager::outputStream() -{ - if (m_OutputStream != 0) - { +std::ostream& CIoManager::outputStream() { + if (m_OutputStream != 0) { return *m_OutputStream; } - if (!m_IoInitialised) - { + if (!m_IoInitialised) { LOG_ERROR("Accessing output stream before IO is initialised"); } return std::cout; } -core::CNamedPipeFactory::TIStreamP CIoManager::restoreStream() -{ - if (!m_IoInitialised) - { +core::CNamedPipeFactory::TIStreamP CIoManager::restoreStream() { + if (!m_IoInitialised) { LOG_ERROR("Accessing restore stream before IO is initialised"); } return m_RestoreStream; } -core::CNamedPipeFactory::TOStreamP CIoManager::persistStream() -{ - if (!m_IoInitialised) - { +core::CNamedPipeFactory::TOStreamP CIoManager::persistStream() { + if (!m_IoInitialised) { LOG_ERROR("Accessing persist stream before IO is initialised"); } return m_PersistStream; } - - } } - diff --git a/lib/api/CJsonOutputWriter.cc b/lib/api/CJsonOutputWriter.cc index cbdb34e1b0..3460ce4510 100644 --- a/lib/api/CJsonOutputWriter.cc +++ b/lib/api/CJsonOutputWriter.cc @@ -18,13 +18,10 @@ #include #include -namespace ml -{ -namespace api -{ +namespace ml { +namespace api { -namespace -{ +namespace { // JSON field names const std::string JOB_ID("job_id"); @@ -80,12 +77,9 @@ const std::string QUANTILES("quantiles"); //! Assumes the document contains the field. //! The caller is responsible for ensuring this, and a //! program crash is likely if this requirement is not met. -double doubleFromDocument(const CJsonOutputWriter::TDocumentWeakPtr &weakDoc, - const std::string &field) -{ +double doubleFromDocument(const CJsonOutputWriter::TDocumentWeakPtr& weakDoc, const std::string& field) { CJsonOutputWriter::TDocumentPtr docPtr = weakDoc.lock(); - if (!docPtr) - { + if (!docPtr) { LOG_ERROR("Inconsistent program state. JSON document unavailable."); return 0.0; } @@ -93,96 +87,67 @@ double doubleFromDocument(const CJsonOutputWriter::TDocumentWeakPtr &weakDoc, } //! Sort rapidjson documents by the probability lowest to highest -class CProbabilityLess -{ - public: - bool operator()(const CJsonOutputWriter::TDocumentWeakPtrIntPr &lhs, - const CJsonOutputWriter::TDocumentWeakPtrIntPr &rhs) const - { - return doubleFromDocument(lhs.first, PROBABILITY) - < doubleFromDocument(rhs.first, PROBABILITY); - } +class CProbabilityLess { +public: + bool operator()(const CJsonOutputWriter::TDocumentWeakPtrIntPr& lhs, const CJsonOutputWriter::TDocumentWeakPtrIntPr& rhs) const { + return doubleFromDocument(lhs.first, PROBABILITY) < doubleFromDocument(rhs.first, PROBABILITY); + } }; const CProbabilityLess PROBABILITY_LESS = CProbabilityLess(); - //! Sort rapidjson documents by detector name first then probability lowest to highest -class CDetectorThenProbabilityLess -{ - public: - bool operator()(const CJsonOutputWriter::TDocumentWeakPtrIntPr &lhs, - const CJsonOutputWriter::TDocumentWeakPtrIntPr &rhs) const - { - if (lhs.second == rhs.second) - { - return doubleFromDocument(lhs.first, PROBABILITY) - < doubleFromDocument(rhs.first, PROBABILITY); - } - return lhs.second < rhs.second; +class CDetectorThenProbabilityLess { +public: + bool operator()(const CJsonOutputWriter::TDocumentWeakPtrIntPr& lhs, const CJsonOutputWriter::TDocumentWeakPtrIntPr& rhs) const { + if (lhs.second == rhs.second) { + return doubleFromDocument(lhs.first, PROBABILITY) < doubleFromDocument(rhs.first, PROBABILITY); } + return lhs.second < rhs.second; + } }; const CDetectorThenProbabilityLess DETECTOR_PROBABILITY_LESS = CDetectorThenProbabilityLess(); //! Sort influences from highes to lowest -class CInfluencesLess -{ - public: - bool operator()(const std::pair &lhs, - const std::pair &rhs) const - { - return lhs.second > rhs.second; - } +class CInfluencesLess { +public: + bool operator()(const std::pair& lhs, const std::pair& rhs) const { + return lhs.second > rhs.second; + } }; const CInfluencesLess INFLUENCE_LESS = CInfluencesLess(); //! Sort influencer from highest to lowest by score -class CInfluencerGreater -{ - public: - CInfluencerGreater(const std::string &field) - : m_Field(field) - { - } +class CInfluencerGreater { +public: + CInfluencerGreater(const std::string& field) : m_Field(field) {} - bool operator()(const CJsonOutputWriter::TDocumentWeakPtr &lhs, - const CJsonOutputWriter::TDocumentWeakPtr &rhs) const - { - return doubleFromDocument(lhs, m_Field) > doubleFromDocument(rhs, m_Field); - } + bool operator()(const CJsonOutputWriter::TDocumentWeakPtr& lhs, const CJsonOutputWriter::TDocumentWeakPtr& rhs) const { + return doubleFromDocument(lhs, m_Field) > doubleFromDocument(rhs, m_Field); + } - private: - const std::string &m_Field; +private: + const std::string& m_Field; }; const CInfluencerGreater INFLUENCER_GREATER = CInfluencerGreater(INITIAL_INFLUENCER_SCORE); const CInfluencerGreater BUCKET_INFLUENCER_GREATER = CInfluencerGreater(INITIAL_SCORE); - } - -CJsonOutputWriter::CJsonOutputWriter(const std::string &jobId, core::CJsonOutputStreamWrapper &strmOut) - : m_JobId(jobId), - m_Writer(strmOut), - m_LastNonInterimBucketTime(0), - m_Finalised(false), - m_RecordOutputLimit(0) -{ +CJsonOutputWriter::CJsonOutputWriter(const std::string& jobId, core::CJsonOutputStreamWrapper& strmOut) + : m_JobId(jobId), m_Writer(strmOut), m_LastNonInterimBucketTime(0), m_Finalised(false), m_RecordOutputLimit(0) { // Don't write any output in the constructor because, the way things work at // the moment, the output stream might be redirected after construction } -CJsonOutputWriter::~CJsonOutputWriter() -{ +CJsonOutputWriter::~CJsonOutputWriter() { finalise(); } -void CJsonOutputWriter::finalise() -{ - if (m_Finalised) - { +void CJsonOutputWriter::finalise() { + if (m_Finalised) { return; } @@ -192,14 +157,11 @@ void CJsonOutputWriter::finalise() m_Finalised = true; } -bool CJsonOutputWriter::acceptResult(const CHierarchicalResultsWriter::TResults &results) -{ - SBucketData &bucketData = m_BucketDataByTime[results.s_BucketStartTime]; +bool CJsonOutputWriter::acceptResult(const CHierarchicalResultsWriter::TResults& results) { + SBucketData& bucketData = m_BucketDataByTime[results.s_BucketStartTime]; - if (results.s_ResultType == CHierarchicalResultsWriter::E_SimpleCountResult) - { - if (!results.s_CurrentRate) - { + if (results.s_ResultType == CHierarchicalResultsWriter::E_SimpleCountResult) { + if (!results.s_CurrentRate) { LOG_ERROR("Simple count detector has no current rate"); return false; } @@ -211,8 +173,7 @@ bool CJsonOutputWriter::acceptResult(const CHierarchicalResultsWriter::TResults } TDocumentWeakPtr newDoc; - if (!results.s_IsOverallResult) - { + if (!results.s_IsOverallResult) { newDoc = m_Writer.makeStorableDoc(); this->addPopulationCauseFields(results, newDoc); m_NestedDocs.push_back(newDoc); @@ -220,8 +181,7 @@ bool CJsonOutputWriter::acceptResult(const CHierarchicalResultsWriter::TResults return true; } - if (results.s_ResultType == CHierarchicalResultsWriter::E_PartitionResult) - { + if (results.s_ResultType == CHierarchicalResultsWriter::E_PartitionResult) { TDocumentWeakPtr partitionDoc = m_Writer.makeStorableDoc(); this->addPartitionScores(results, partitionDoc); bucketData.s_PartitionScoreDocuments.push_back(partitionDoc); @@ -231,26 +191,21 @@ bool CJsonOutputWriter::acceptResult(const CHierarchicalResultsWriter::TResults ++bucketData.s_RecordCount; - TDocumentWeakPtrIntPrVec &detectorDocumentsToWrite = bucketData.s_DocumentsToWrite; + TDocumentWeakPtrIntPrVec& detectorDocumentsToWrite = bucketData.s_DocumentsToWrite; bool makeHeap(false); // If a max number of records to output has not been set or we haven't // reached that limit yet just append the new document to the array - if (m_RecordOutputLimit == 0 || - bucketData.s_RecordCount <= m_RecordOutputLimit) - { + if (m_RecordOutputLimit == 0 || bucketData.s_RecordCount <= m_RecordOutputLimit) { newDoc = m_Writer.makeStorableDoc(); detectorDocumentsToWrite.push_back(TDocumentWeakPtrIntPr(newDoc, results.s_Identifier)); // the document array is now full, make a max heap makeHeap = bucketData.s_RecordCount == m_RecordOutputLimit; - } - else - { + } else { // Have reached the limit of records to write so compare the new doc // to the highest probability anomaly doc and replace if more anomalous - if (results.s_Probability >= bucketData.s_HighestProbability) - { + if (results.s_Probability >= bucketData.s_HighestProbability) { // Discard any associated nested docs m_NestedDocs.clear(); return true; @@ -258,8 +213,7 @@ bool CJsonOutputWriter::acceptResult(const CHierarchicalResultsWriter::TResults newDoc = m_Writer.makeStorableDoc(); // remove the highest prob doc and insert new one - std::pop_heap(detectorDocumentsToWrite.begin(), detectorDocumentsToWrite.end(), - PROBABILITY_LESS); + std::pop_heap(detectorDocumentsToWrite.begin(), detectorDocumentsToWrite.end(), PROBABILITY_LESS); detectorDocumentsToWrite.pop_back(); detectorDocumentsToWrite.push_back(TDocumentWeakPtrIntPr(newDoc, results.s_Identifier)); @@ -269,53 +223,37 @@ bool CJsonOutputWriter::acceptResult(const CHierarchicalResultsWriter::TResults // The check for population results must come first because some population // results are also metrics - if (results.s_ResultType == CHierarchicalResultsWriter::E_PopulationResult) - { + if (results.s_ResultType == CHierarchicalResultsWriter::E_PopulationResult) { this->addPopulationFields(results, newDoc); - } - else if (results.s_IsMetric) - { + } else if (results.s_IsMetric) { this->addMetricFields(results, newDoc); - } - else - { + } else { this->addEventRateFields(results, newDoc); } - this->addInfluences(results.s_Influences, newDoc); - if (makeHeap) - { - std::make_heap(detectorDocumentsToWrite.begin(), detectorDocumentsToWrite.end(), - PROBABILITY_LESS); + if (makeHeap) { + std::make_heap(detectorDocumentsToWrite.begin(), detectorDocumentsToWrite.end(), PROBABILITY_LESS); - bucketData.s_HighestProbability = doubleFromDocument( - detectorDocumentsToWrite.front().first, PROBABILITY); + bucketData.s_HighestProbability = doubleFromDocument(detectorDocumentsToWrite.front().first, PROBABILITY); makeHeap = false; } return true; } -bool CJsonOutputWriter::acceptInfluencer(core_t::TTime time, - const model::CHierarchicalResults::TNode &node, - bool isBucketInfluencer) -{ +bool CJsonOutputWriter::acceptInfluencer(core_t::TTime time, const model::CHierarchicalResults::TNode& node, bool isBucketInfluencer) { TDocumentWeakPtr newDoc = m_Writer.makeStorableDoc(); - SBucketData &bucketData = m_BucketDataByTime[time]; - TDocumentWeakPtrVec &documents = (isBucketInfluencer) ? bucketData.s_BucketInfluencerDocuments : - bucketData.s_InfluencerDocuments; + SBucketData& bucketData = m_BucketDataByTime[time]; + TDocumentWeakPtrVec& documents = (isBucketInfluencer) ? bucketData.s_BucketInfluencerDocuments : bucketData.s_InfluencerDocuments; bool isLimitedWrite(m_RecordOutputLimit > 0); - if (isLimitedWrite && documents.size() == m_RecordOutputLimit) - { - double &lowestScore = (isBucketInfluencer) ? bucketData.s_LowestBucketInfluencerScore : - bucketData.s_LowestInfluencerScore; + if (isLimitedWrite && documents.size() == m_RecordOutputLimit) { + double& lowestScore = (isBucketInfluencer) ? bucketData.s_LowestBucketInfluencerScore : bucketData.s_LowestInfluencerScore; - if (node.s_NormalizedAnomalyScore < lowestScore) - { + if (node.s_NormalizedAnomalyScore < lowestScore) { // Don't write this influencer return true; } @@ -329,28 +267,19 @@ bool CJsonOutputWriter::acceptInfluencer(core_t::TTime time, bool sortVectorAfterWritingDoc = isLimitedWrite && documents.size() >= m_RecordOutputLimit; - if (sortVectorAfterWritingDoc) - { - std::sort(documents.begin(), - documents.end(), - isBucketInfluencer ? BUCKET_INFLUENCER_GREATER : INFLUENCER_GREATER); + if (sortVectorAfterWritingDoc) { + std::sort(documents.begin(), documents.end(), isBucketInfluencer ? BUCKET_INFLUENCER_GREATER : INFLUENCER_GREATER); } - if (isBucketInfluencer) - { + if (isBucketInfluencer) { bucketData.s_MaxBucketInfluencerNormalizedAnomalyScore = - std::max(bucketData.s_MaxBucketInfluencerNormalizedAnomalyScore, - node.s_NormalizedAnomalyScore); + std::max(bucketData.s_MaxBucketInfluencerNormalizedAnomalyScore, node.s_NormalizedAnomalyScore); - bucketData.s_LowestBucketInfluencerScore = std::min( - bucketData.s_LowestBucketInfluencerScore, - doubleFromDocument(documents.back(), INITIAL_SCORE)); - } - else - { - bucketData.s_LowestInfluencerScore = std::min( - bucketData.s_LowestInfluencerScore, - doubleFromDocument(documents.back(), INITIAL_INFLUENCER_SCORE)); + bucketData.s_LowestBucketInfluencerScore = + std::min(bucketData.s_LowestBucketInfluencerScore, doubleFromDocument(documents.back(), INITIAL_SCORE)); + } else { + bucketData.s_LowestInfluencerScore = + std::min(bucketData.s_LowestInfluencerScore, doubleFromDocument(documents.back(), INITIAL_INFLUENCER_SCORE)); } return true; @@ -359,18 +288,15 @@ bool CJsonOutputWriter::acceptInfluencer(core_t::TTime time, void CJsonOutputWriter::acceptBucketTimeInfluencer(core_t::TTime time, double probability, double rawAnomalyScore, - double normalizedAnomalyScore) -{ - SBucketData &bucketData = m_BucketDataByTime[time]; - if (bucketData.s_RecordCount == 0) - { + double normalizedAnomalyScore) { + SBucketData& bucketData = m_BucketDataByTime[time]; + if (bucketData.s_RecordCount == 0) { return; } TDocumentWeakPtr doc = m_Writer.makeStorableDoc(); TDocumentPtr newDoc = doc.lock(); - if (!newDoc) - { + if (!newDoc) { LOG_ERROR("Failed to create new JSON document"); return; } @@ -380,20 +306,15 @@ void CJsonOutputWriter::acceptBucketTimeInfluencer(core_t::TTime time, m_Writer.addDoubleFieldToObj(INITIAL_SCORE, normalizedAnomalyScore, *newDoc); m_Writer.addDoubleFieldToObj(ANOMALY_SCORE, normalizedAnomalyScore, *newDoc); - bucketData.s_MaxBucketInfluencerNormalizedAnomalyScore = std::max( - bucketData.s_MaxBucketInfluencerNormalizedAnomalyScore, normalizedAnomalyScore); + bucketData.s_MaxBucketInfluencerNormalizedAnomalyScore = + std::max(bucketData.s_MaxBucketInfluencerNormalizedAnomalyScore, normalizedAnomalyScore); bucketData.s_BucketInfluencerDocuments.push_back(doc); } -bool CJsonOutputWriter::endOutputBatch(bool isInterim, uint64_t bucketProcessingTime) -{ - for (TTimeBucketDataMapItr iter = m_BucketDataByTime.begin(); - iter != m_BucketDataByTime.end(); - ++iter) - { +bool CJsonOutputWriter::endOutputBatch(bool isInterim, uint64_t bucketProcessingTime) { + for (TTimeBucketDataMapItr iter = m_BucketDataByTime.begin(); iter != m_BucketDataByTime.end(); ++iter) { this->writeBucket(isInterim, iter->first, iter->second, bucketProcessingTime); - if (!isInterim) - { + if (!isInterim) { m_LastNonInterimBucketTime = iter->first; } } @@ -403,49 +324,35 @@ bool CJsonOutputWriter::endOutputBatch(bool isInterim, uint64_t bucketProcessing m_BucketDataByTime.clear(); m_NestedDocs.clear(); - return true; } -bool CJsonOutputWriter::fieldNames(const TStrVec &/*fieldNames*/, - const TStrVec &/*extraFieldNames*/) -{ +bool CJsonOutputWriter::fieldNames(const TStrVec& /*fieldNames*/, const TStrVec& /*extraFieldNames*/) { return true; } -const CJsonOutputWriter::TStrVec &CJsonOutputWriter::fieldNames() const -{ +const CJsonOutputWriter::TStrVec& CJsonOutputWriter::fieldNames() const { return EMPTY_FIELD_NAMES; } -bool CJsonOutputWriter::writeRow(const TStrStrUMap &dataRowFields, - const TStrStrUMap &overrideDataRowFields) -{ +bool CJsonOutputWriter::writeRow(const TStrStrUMap& dataRowFields, const TStrStrUMap& overrideDataRowFields) { rapidjson::Document doc = m_Writer.makeDoc(); - // Write all the fields to the document as strings // No need to copy the strings as the doc is written straight away - for (TStrStrUMapCItr fieldValueIter = dataRowFields.begin(); - fieldValueIter != dataRowFields.end(); - ++fieldValueIter) - { - const std::string &name = fieldValueIter->first; - const std::string &value = fieldValueIter->second; + for (TStrStrUMapCItr fieldValueIter = dataRowFields.begin(); fieldValueIter != dataRowFields.end(); ++fieldValueIter) { + const std::string& name = fieldValueIter->first; + const std::string& value = fieldValueIter->second; // Only output fields that aren't overridden - if (overrideDataRowFields.find(name) == overrideDataRowFields.end()) - { + if (overrideDataRowFields.find(name) == overrideDataRowFields.end()) { m_Writer.addMemberRef(name, value, doc); } } - for (TStrStrUMapCItr fieldValueIter = overrideDataRowFields.begin(); - fieldValueIter != overrideDataRowFields.end(); - ++fieldValueIter) - { - const std::string &name = fieldValueIter->first; - const std::string &value = fieldValueIter->second; + for (TStrStrUMapCItr fieldValueIter = overrideDataRowFields.begin(); fieldValueIter != overrideDataRowFields.end(); ++fieldValueIter) { + const std::string& name = fieldValueIter->first; + const std::string& value = fieldValueIter->second; m_Writer.addMemberRef(name, value, doc); } @@ -455,19 +362,12 @@ bool CJsonOutputWriter::writeRow(const TStrStrUMap &dataRowFields, return true; } -void CJsonOutputWriter::writeBucket(bool isInterim, - core_t::TTime bucketTime, - SBucketData &bucketData, - uint64_t bucketProcessingTime) -{ +void CJsonOutputWriter::writeBucket(bool isInterim, core_t::TTime bucketTime, SBucketData& bucketData, uint64_t bucketProcessingTime) { // Write records - if (!bucketData.s_DocumentsToWrite.empty()) - { + if (!bucketData.s_DocumentsToWrite.empty()) { // Sort the results so they are grouped by detector and // ordered by probability - std::sort(bucketData.s_DocumentsToWrite.begin(), - bucketData.s_DocumentsToWrite.end(), - DETECTOR_PROBABILITY_LESS); + std::sort(bucketData.s_DocumentsToWrite.begin(), bucketData.s_DocumentsToWrite.end(), DETECTOR_PROBABILITY_LESS); m_Writer.StartObject(); m_Writer.String(RECORDS); @@ -476,14 +376,12 @@ void CJsonOutputWriter::writeBucket(bool isInterim, // Iterate over the different detectors that we have results for for (TDocumentWeakPtrIntPrVecItr detectorIter = bucketData.s_DocumentsToWrite.begin(); detectorIter != bucketData.s_DocumentsToWrite.end(); - ++detectorIter) - { + ++detectorIter) { // Write the document, adding some extra fields as we go int detectorIndex = detectorIter->second; TDocumentWeakPtr weakDoc = detectorIter->first; TDocumentPtr docPtr = weakDoc.lock(); - if (!docPtr) - { + if (!docPtr) { LOG_ERROR("Inconsistent program state. JSON document unavailable."); continue; } @@ -493,8 +391,7 @@ void CJsonOutputWriter::writeBucket(bool isInterim, m_Writer.addStringFieldCopyToObj(JOB_ID, m_JobId, *docPtr); m_Writer.addTimeFieldToObj(TIMESTAMP, bucketTime, *docPtr); - if (isInterim) - { + if (isInterim) { m_Writer.addBoolFieldToObj(IS_INTERIM, isInterim, *docPtr); } m_Writer.write(*docPtr); @@ -504,27 +401,23 @@ void CJsonOutputWriter::writeBucket(bool isInterim, } // Write influencers - if (!bucketData.s_InfluencerDocuments.empty()) - { + if (!bucketData.s_InfluencerDocuments.empty()) { m_Writer.StartObject(); m_Writer.String(INFLUENCERS); m_Writer.StartArray(); for (TDocumentWeakPtrVecItr influencerIter = bucketData.s_InfluencerDocuments.begin(); influencerIter != bucketData.s_InfluencerDocuments.end(); - ++influencerIter) - { + ++influencerIter) { TDocumentWeakPtr weakDoc = *influencerIter; TDocumentPtr docPtr = weakDoc.lock(); - if (!docPtr) - { + if (!docPtr) { LOG_ERROR("Inconsistent program state. JSON document unavailable."); continue; } m_Writer.addStringFieldCopyToObj(JOB_ID, m_JobId, *docPtr); m_Writer.addTimeFieldToObj(TIMESTAMP, bucketTime, *docPtr); - if (isInterim) - { + if (isInterim) { m_Writer.addBoolFieldToObj(IS_INTERIM, isInterim, *docPtr); } m_Writer.addIntFieldToObj(BUCKET_SPAN, bucketData.s_BucketSpan, *docPtr); @@ -550,37 +443,31 @@ void CJsonOutputWriter::writeBucket(bool isInterim, m_Writer.Double(bucketData.s_MaxBucketInfluencerNormalizedAnomalyScore); m_Writer.String(EVENT_COUNT); m_Writer.Uint64(bucketData.s_InputEventCount); - if (isInterim) - { + if (isInterim) { m_Writer.String(IS_INTERIM); m_Writer.Bool(isInterim); } m_Writer.String(BUCKET_SPAN); m_Writer.Int64(bucketData.s_BucketSpan); - if (!bucketData.s_BucketInfluencerDocuments.empty()) - { + if (!bucketData.s_BucketInfluencerDocuments.empty()) { // Write the array of influencers m_Writer.String(BUCKET_INFLUENCERS); m_Writer.StartArray(); for (TDocumentWeakPtrVecItr influencerIter = bucketData.s_BucketInfluencerDocuments.begin(); influencerIter != bucketData.s_BucketInfluencerDocuments.end(); - ++influencerIter) - { + ++influencerIter) { TDocumentWeakPtr weakDoc = *influencerIter; TDocumentPtr docPtr = weakDoc.lock(); - if (!docPtr) - { + if (!docPtr) { LOG_ERROR("Inconsistent program state. JSON document unavailable."); continue; } - m_Writer.addStringFieldCopyToObj(JOB_ID, m_JobId, *docPtr); m_Writer.addTimeFieldToObj(TIMESTAMP, bucketTime, *docPtr); m_Writer.addIntFieldToObj(BUCKET_SPAN, bucketData.s_BucketSpan, *docPtr); - if (isInterim) - { + if (isInterim) { m_Writer.addBoolFieldToObj(IS_INTERIM, isInterim, *docPtr); } m_Writer.write(*docPtr); @@ -588,24 +475,20 @@ void CJsonOutputWriter::writeBucket(bool isInterim, m_Writer.EndArray(); } - if (!bucketData.s_PartitionScoreDocuments.empty()) - { + if (!bucketData.s_PartitionScoreDocuments.empty()) { // Write the array of partition-anonaly score pairs m_Writer.String(PARTITION_SCORES); m_Writer.StartArray(); for (TDocumentWeakPtrVecItr partitionScoresIter = bucketData.s_PartitionScoreDocuments.begin(); partitionScoresIter != bucketData.s_PartitionScoreDocuments.end(); - ++partitionScoresIter) - { + ++partitionScoresIter) { TDocumentWeakPtr weakDoc = *partitionScoresIter; TDocumentPtr docPtr = weakDoc.lock(); - if (!docPtr) - { + if (!docPtr) { LOG_ERROR("Inconsistent program state. JSON document unavailable."); continue; } - m_Writer.write(*docPtr); } m_Writer.EndArray(); @@ -614,12 +497,10 @@ void CJsonOutputWriter::writeBucket(bool isInterim, m_Writer.String(PROCESSING_TIME); m_Writer.Uint64(bucketProcessingTime); - if (bucketData.s_ScheduledEventDescriptions.empty() == false) - { + if (bucketData.s_ScheduledEventDescriptions.empty() == false) { m_Writer.String(SCHEDULED_EVENTS); m_Writer.StartArray(); - for (const auto &it : bucketData.s_ScheduledEventDescriptions) - { + for (const auto& it : bucketData.s_ScheduledEventDescriptions) { m_Writer.String(it); } m_Writer.EndArray(); @@ -629,12 +510,9 @@ void CJsonOutputWriter::writeBucket(bool isInterim, m_Writer.EndObject(); } -void CJsonOutputWriter::addMetricFields(const CHierarchicalResultsWriter::TResults &results, - TDocumentWeakPtr weakDoc) -{ +void CJsonOutputWriter::addMetricFields(const CHierarchicalResultsWriter::TResults& results, TDocumentWeakPtr weakDoc) { TDocumentPtr docPtr = weakDoc.lock(); - if (!docPtr) - { + if (!docPtr) { LOG_ERROR("Inconsistent program state. JSON document unavailable."); return; } @@ -645,18 +523,15 @@ void CJsonOutputWriter::addMetricFields(const CHierarchicalResultsWriter::TResul m_Writer.addDoubleFieldToObj(RECORD_SCORE, results.s_NormalizedAnomalyScore, *docPtr); m_Writer.addDoubleFieldToObj(PROBABILITY, results.s_Probability, *docPtr); m_Writer.addStringFieldCopyToObj(FIELD_NAME, results.s_MetricValueField, *docPtr); - if (!results.s_ByFieldName.empty()) - { + if (!results.s_ByFieldName.empty()) { m_Writer.addStringFieldCopyToObj(BY_FIELD_NAME, results.s_ByFieldName, *docPtr); // If name is present then force output of value too, even when empty m_Writer.addStringFieldCopyToObj(BY_FIELD_VALUE, results.s_ByFieldValue, *docPtr, true); // But allow correlatedByFieldValue to be unset if blank m_Writer.addStringFieldCopyToObj(CORRELATED_BY_FIELD_VALUE, results.s_CorrelatedByFieldValue, *docPtr); } - if (!results.s_PartitionFieldName.empty()) - { - m_Writer.addStringFieldCopyToObj(PARTITION_FIELD_NAME, results.s_PartitionFieldName, - *docPtr); + if (!results.s_PartitionFieldName.empty()) { + m_Writer.addStringFieldCopyToObj(PARTITION_FIELD_NAME, results.s_PartitionFieldName, *docPtr); // If name is present then force output of value too, even when empty m_Writer.addStringFieldCopyToObj(PARTITION_FIELD_VALUE, results.s_PartitionFieldValue, *docPtr, true); } @@ -666,12 +541,9 @@ void CJsonOutputWriter::addMetricFields(const CHierarchicalResultsWriter::TResul m_Writer.addDoubleArrayFieldToObj(ACTUAL, results.s_CurrentMean, *docPtr); } -void CJsonOutputWriter::addPopulationFields(const CHierarchicalResultsWriter::TResults &results, - TDocumentWeakPtr weakDoc) -{ +void CJsonOutputWriter::addPopulationFields(const CHierarchicalResultsWriter::TResults& results, TDocumentWeakPtr weakDoc) { TDocumentPtr docPtr = weakDoc.lock(); - if (!docPtr) - { + if (!docPtr) { LOG_ERROR("Inconsistent program state. JSON document unavailable."); return; } @@ -686,14 +558,12 @@ void CJsonOutputWriter::addPopulationFields(const CHierarchicalResultsWriter::TR // There are no by field values at this level for population // results - they're in the "causes" object m_Writer.addStringFieldCopyToObj(BY_FIELD_NAME, results.s_ByFieldName, *docPtr); - if (!results.s_OverFieldName.empty()) - { + if (!results.s_OverFieldName.empty()) { m_Writer.addStringFieldCopyToObj(OVER_FIELD_NAME, results.s_OverFieldName, *docPtr); // If name is present then force output of value too, even when empty m_Writer.addStringFieldCopyToObj(OVER_FIELD_VALUE, results.s_OverFieldValue, *docPtr, true); } - if (!results.s_PartitionFieldName.empty()) - { + if (!results.s_PartitionFieldName.empty()) { m_Writer.addStringFieldCopyToObj(PARTITION_FIELD_NAME, results.s_PartitionFieldName, *docPtr); // If name is present then force output of value too, even when empty m_Writer.addStringFieldCopyToObj(PARTITION_FIELD_VALUE, results.s_PartitionFieldValue, *docPtr, true); @@ -702,38 +572,30 @@ void CJsonOutputWriter::addPopulationFields(const CHierarchicalResultsWriter::TR m_Writer.addStringFieldCopyToObj(FUNCTION_DESCRIPTION, results.s_FunctionDescription, *docPtr); // Add nested causes - if (m_NestedDocs.size() > 0) - { + if (m_NestedDocs.size() > 0) { rapidjson::Value causeArray = m_Writer.makeArray(m_NestedDocs.size()); - for (size_t index = 0; index < m_NestedDocs.size(); ++index) - { + for (size_t index = 0; index < m_NestedDocs.size(); ++index) { TDocumentWeakPtr nwDocPtr = m_NestedDocs[index]; TDocumentPtr nDocPtr = nwDocPtr.lock(); - if (!nDocPtr) - { + if (!nDocPtr) { LOG_ERROR("Inconsistent program state. JSON document unavailable."); continue; } - rapidjson::Value &docAsValue = *nDocPtr; + rapidjson::Value& docAsValue = *nDocPtr; m_Writer.pushBack(docAsValue, causeArray); } m_Writer.addMember(CAUSES, causeArray, *docPtr); m_NestedDocs.clear(); - } - else - { + } else { LOG_WARN("Expected some causes for a population anomaly but got none"); } } -void CJsonOutputWriter::addPopulationCauseFields(const CHierarchicalResultsWriter::TResults &results, - TDocumentWeakPtr weakDoc) -{ +void CJsonOutputWriter::addPopulationCauseFields(const CHierarchicalResultsWriter::TResults& results, TDocumentWeakPtr weakDoc) { TDocumentPtr docPtr = weakDoc.lock(); - if (!docPtr) - { + if (!docPtr) { LOG_ERROR("Inconsistent program state. JSON document unavailable."); return; } @@ -743,22 +605,19 @@ void CJsonOutputWriter::addPopulationCauseFields(const CHierarchicalResultsWrite // function, typical, actual, influences m_Writer.addDoubleFieldToObj(PROBABILITY, results.s_Probability, *docPtr); m_Writer.addStringFieldCopyToObj(FIELD_NAME, results.s_MetricValueField, *docPtr); - if (!results.s_ByFieldName.empty()) - { + if (!results.s_ByFieldName.empty()) { m_Writer.addStringFieldCopyToObj(BY_FIELD_NAME, results.s_ByFieldName, *docPtr); // If name is present then force output of value too, even when empty m_Writer.addStringFieldCopyToObj(BY_FIELD_VALUE, results.s_ByFieldValue, *docPtr, true); // But allow correlatedByFieldValue to be unset if blank m_Writer.addStringFieldCopyToObj(CORRELATED_BY_FIELD_VALUE, results.s_CorrelatedByFieldValue, *docPtr); } - if (!results.s_OverFieldName.empty()) - { + if (!results.s_OverFieldName.empty()) { m_Writer.addStringFieldCopyToObj(OVER_FIELD_NAME, results.s_OverFieldName, *docPtr); // If name is present then force output of value too, even when empty m_Writer.addStringFieldCopyToObj(OVER_FIELD_VALUE, results.s_OverFieldValue, *docPtr, true); } - if (!results.s_PartitionFieldName.empty()) - { + if (!results.s_PartitionFieldName.empty()) { m_Writer.addStringFieldCopyToObj(PARTITION_FIELD_NAME, results.s_PartitionFieldName, *docPtr); // If name is present then force output of value too, even when empty m_Writer.addStringFieldCopyToObj(PARTITION_FIELD_VALUE, results.s_PartitionFieldValue, *docPtr, true); @@ -769,17 +628,14 @@ void CJsonOutputWriter::addPopulationCauseFields(const CHierarchicalResultsWrite m_Writer.addDoubleArrayFieldToObj(ACTUAL, results.s_FunctionValue, *docPtr); } -void CJsonOutputWriter::addInfluences(const CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPrDoublePrVec &influenceResults, - TDocumentWeakPtr weakDoc) -{ - if (influenceResults.empty()) - { +void CJsonOutputWriter::addInfluences(const CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPrDoublePrVec& influenceResults, + TDocumentWeakPtr weakDoc) { + if (influenceResults.empty()) { return; } TDocumentPtr docPtr = weakDoc.lock(); - if (!docPtr) - { + if (!docPtr) { LOG_ERROR("Inconsistent program state. JSON document unavailable."); return; } @@ -788,19 +644,17 @@ void CJsonOutputWriter::addInfluences(const CHierarchicalResultsWriter::TStoredS //! influenceResults. These strings must exist up to the time the results //! are written - using TCharPtrDoublePr = std::pair; + using TCharPtrDoublePr = std::pair; using TCharPtrDoublePrVec = std::vector; using TCharPtrDoublePrVecIter = TCharPtrDoublePrVec::iterator; - using TCharPtrCharPtrDoublePrVecPr = std::pair; + using TCharPtrCharPtrDoublePrVecPr = std::pair; using TStrCharPtrCharPtrDoublePrVecPrUMap = boost::unordered_map; using TStrCharPtrCharPtrDoublePrVecPrUMapIter = TStrCharPtrCharPtrDoublePrVecPrUMap::iterator; - TStrCharPtrCharPtrDoublePrVecPrUMap influences; // group by influence field - for (const auto &influenceResult : influenceResults) - { + for (const auto& influenceResult : influenceResults) { TCharPtrCharPtrDoublePrVecPr infResult(influenceResult.first.first->c_str(), TCharPtrDoublePrVec()); auto insertResult = influences.emplace(*influenceResult.first.first, infResult); @@ -808,22 +662,17 @@ void CJsonOutputWriter::addInfluences(const CHierarchicalResultsWriter::TStoredS } // Order by influence - for (TStrCharPtrCharPtrDoublePrVecPrUMapIter iter = influences.begin(); iter != influences.end(); ++iter) - { + for (TStrCharPtrCharPtrDoublePrVecPrUMapIter iter = influences.begin(); iter != influences.end(); ++iter) { std::sort(iter->second.second.begin(), iter->second.second.end(), INFLUENCE_LESS); } rapidjson::Value influencesDoc = m_Writer.makeArray(influences.size()); - for (TStrCharPtrCharPtrDoublePrVecPrUMapIter iter = influences.begin(); iter != influences.end(); ++iter) - { + for (TStrCharPtrCharPtrDoublePrVecPrUMapIter iter = influences.begin(); iter != influences.end(); ++iter) { rapidjson::Value influenceDoc(rapidjson::kObjectType); rapidjson::Value values = m_Writer.makeArray(influences.size()); - for (TCharPtrDoublePrVecIter arrayIter = iter->second.second.begin(); - arrayIter != iter->second.second.end(); - ++arrayIter) - { + for (TCharPtrDoublePrVecIter arrayIter = iter->second.second.begin(); arrayIter != iter->second.second.end(); ++arrayIter) { m_Writer.pushBack(arrayIter->first, values); } @@ -832,16 +681,13 @@ void CJsonOutputWriter::addInfluences(const CHierarchicalResultsWriter::TStoredS m_Writer.pushBack(influenceDoc, influencesDoc); } - // Note influences are written using the field name "influencers" + // Note influences are written using the field name "influencers" m_Writer.addMember(INFLUENCERS, influencesDoc, *docPtr); } -void CJsonOutputWriter::addEventRateFields(const CHierarchicalResultsWriter::TResults &results, - TDocumentWeakPtr weakDoc) -{ +void CJsonOutputWriter::addEventRateFields(const CHierarchicalResultsWriter::TResults& results, TDocumentWeakPtr weakDoc) { TDocumentPtr docPtr = weakDoc.lock(); - if (!docPtr) - { + if (!docPtr) { LOG_ERROR("Inconsistent program state. JSON document unavailable."); return; } @@ -853,16 +699,14 @@ void CJsonOutputWriter::addEventRateFields(const CHierarchicalResultsWriter::TRe m_Writer.addDoubleFieldToObj(RECORD_SCORE, results.s_NormalizedAnomalyScore, *docPtr); m_Writer.addDoubleFieldToObj(PROBABILITY, results.s_Probability, *docPtr); m_Writer.addStringFieldCopyToObj(FIELD_NAME, results.s_MetricValueField, *docPtr); - if (!results.s_ByFieldName.empty()) - { + if (!results.s_ByFieldName.empty()) { m_Writer.addStringFieldCopyToObj(BY_FIELD_NAME, results.s_ByFieldName, *docPtr); // If name is present then force output of value too, even when empty m_Writer.addStringFieldCopyToObj(BY_FIELD_VALUE, results.s_ByFieldValue, *docPtr, true); // But allow correlatedByFieldValue to be unset if blank m_Writer.addStringFieldCopyToObj(CORRELATED_BY_FIELD_VALUE, results.s_CorrelatedByFieldValue, *docPtr); } - if (!results.s_PartitionFieldName.empty()) - { + if (!results.s_PartitionFieldName.empty()) { m_Writer.addStringFieldCopyToObj(PARTITION_FIELD_NAME, results.s_PartitionFieldName, *docPtr); // If name is present then force output of value too, even when empty m_Writer.addStringFieldCopyToObj(PARTITION_FIELD_VALUE, results.s_PartitionFieldValue, *docPtr, true); @@ -874,12 +718,10 @@ void CJsonOutputWriter::addEventRateFields(const CHierarchicalResultsWriter::TRe } void CJsonOutputWriter::addInfluencerFields(bool isBucketInfluencer, - const model::CHierarchicalResults::TNode &node, - TDocumentWeakPtr weakDoc) -{ + const model::CHierarchicalResults::TNode& node, + TDocumentWeakPtr weakDoc) { TDocumentPtr docPtr = weakDoc.lock(); - if (!docPtr) - { + if (!docPtr) { LOG_ERROR("Inconsistent program state. JSON document unavailable."); return; } @@ -887,28 +729,21 @@ void CJsonOutputWriter::addInfluencerFields(bool isBucketInfluencer, m_Writer.addDoubleFieldToObj(PROBABILITY, node.probability(), *docPtr); m_Writer.addDoubleFieldToObj(isBucketInfluencer ? INITIAL_SCORE : INITIAL_INFLUENCER_SCORE, node.s_NormalizedAnomalyScore, *docPtr); m_Writer.addDoubleFieldToObj(isBucketInfluencer ? ANOMALY_SCORE : INFLUENCER_SCORE, node.s_NormalizedAnomalyScore, *docPtr); - const std::string &personFieldName = *node.s_Spec.s_PersonFieldName; + const std::string& personFieldName = *node.s_Spec.s_PersonFieldName; m_Writer.addStringFieldCopyToObj(INFLUENCER_FIELD_NAME, personFieldName, *docPtr); - if (isBucketInfluencer) - { + if (isBucketInfluencer) { m_Writer.addDoubleFieldToObj(RAW_ANOMALY_SCORE, node.s_RawAnomalyScore, *docPtr); - } - else - { - if (!personFieldName.empty()) - { + } else { + if (!personFieldName.empty()) { // If name is present then force output of value too, even when empty m_Writer.addStringFieldCopyToObj(INFLUENCER_FIELD_VALUE, *node.s_Spec.s_PersonFieldValue, *docPtr, true); } } } -void CJsonOutputWriter::addPartitionScores(const CHierarchicalResultsWriter::TResults &results, - TDocumentWeakPtr weakDoc) -{ +void CJsonOutputWriter::addPartitionScores(const CHierarchicalResultsWriter::TResults& results, TDocumentWeakPtr weakDoc) { TDocumentPtr docPtr = weakDoc.lock(); - if (!docPtr) - { + if (!docPtr) { LOG_ERROR("Inconsistent program state. JSON document unavailable."); return; } @@ -920,19 +755,15 @@ void CJsonOutputWriter::addPartitionScores(const CHierarchicalResultsWriter::TRe m_Writer.addDoubleFieldToObj(RECORD_SCORE, results.s_NormalizedAnomalyScore, *docPtr); } -void CJsonOutputWriter::limitNumberRecords(size_t count) -{ +void CJsonOutputWriter::limitNumberRecords(size_t count) { m_RecordOutputLimit = count; } -size_t CJsonOutputWriter::limitNumberRecords() const -{ +size_t CJsonOutputWriter::limitNumberRecords() const { return m_RecordOutputLimit; } -void CJsonOutputWriter::persistNormalizer(const model::CHierarchicalResultsNormalizer &normalizer, - core_t::TTime &persistTime) -{ +void CJsonOutputWriter::persistNormalizer(const model::CHierarchicalResultsNormalizer& normalizer, core_t::TTime& persistTime) { std::string quantilesState; normalizer.toJson(m_LastNonInterimBucketTime, "api", quantilesState, true); @@ -946,19 +777,15 @@ void CJsonOutputWriter::persistNormalizer(const model::CHierarchicalResultsNorma LOG_DEBUG("Wrote quantiles state at " << persistTime); } -void CJsonOutputWriter::pushAllocator(const std::string &allocatorName) -{ +void CJsonOutputWriter::pushAllocator(const std::string& allocatorName) { m_Writer.pushAllocator(allocatorName); } -void CJsonOutputWriter::popAllocator() -{ +void CJsonOutputWriter::popAllocator() { m_Writer.popAllocator(); } - -void CJsonOutputWriter::reportMemoryUsage(const model::CResourceMonitor::SResults &results) -{ +void CJsonOutputWriter::reportMemoryUsage(const model::CResourceMonitor::SResults& results) { m_Writer.StartObject(); CModelSizeStatsJsonWriter::write(m_JobId, results, m_Writer); m_Writer.EndObject(); @@ -966,8 +793,7 @@ void CJsonOutputWriter::reportMemoryUsage(const model::CResourceMonitor::SResult LOG_TRACE("Wrote memory usage results"); } -void CJsonOutputWriter::acknowledgeFlush(const std::string &flushId, core_t::TTime lastFinalizedBucketEnd) -{ +void CJsonOutputWriter::acknowledgeFlush(const std::string& flushId, core_t::TTime lastFinalizedBucketEnd) { m_Writer.StartObject(); m_Writer.String(FLUSH); m_Writer.StartObject(); @@ -986,11 +812,10 @@ void CJsonOutputWriter::acknowledgeFlush(const std::string &flushId, core_t::TTi } void CJsonOutputWriter::writeCategoryDefinition(int categoryId, - const std::string &terms, - const std::string ®ex, + const std::string& terms, + const std::string& regex, std::size_t maxMatchingFieldLength, - const TStrSet &examples) -{ + const TStrSet& examples) { m_Writer.StartObject(); m_Writer.String(CATEGORY_DEFINITION); m_Writer.StartObject(); @@ -1006,9 +831,8 @@ void CJsonOutputWriter::writeCategoryDefinition(int categoryId, m_Writer.Uint64(maxMatchingFieldLength); m_Writer.String(EXAMPLES); m_Writer.StartArray(); - for (TStrSetCItr itr = examples.begin(); itr != examples.end(); ++itr) - { - const std::string &example = *itr; + for (TStrSetCItr itr = examples.begin(); itr != examples.end(); ++itr) { + const std::string& example = *itr; m_Writer.String(example); } m_Writer.EndArray(); @@ -1023,10 +847,7 @@ CJsonOutputWriter::SBucketData::SBucketData() s_BucketSpan(0), s_HighestProbability(-1), s_LowestInfluencerScore(101.0), - s_LowestBucketInfluencerScore(101.0) -{ + s_LowestBucketInfluencerScore(101.0) { } - } } - diff --git a/lib/api/CLengthEncodedInputParser.cc b/lib/api/CLengthEncodedInputParser.cc index 8fcc8d1be6..4d37d47b17 100644 --- a/lib/api/CLengthEncodedInputParser.cc +++ b/lib/api/CLengthEncodedInputParser.cc @@ -22,59 +22,41 @@ #endif #include - -namespace ml -{ -namespace api -{ - +namespace ml { +namespace api { // Initialise statics const size_t CLengthEncodedInputParser::WORK_BUFFER_SIZE(8192); // 8kB -CLengthEncodedInputParser::CLengthEncodedInputParser(std::istream &strmIn) - : CInputParser(), - m_StrmIn(strmIn), - m_WorkBuffer(0), - m_WorkBufferPtr(0), - m_WorkBufferEnd(0), - m_NoMoreRecords(false) -{ +CLengthEncodedInputParser::CLengthEncodedInputParser(std::istream& strmIn) + : CInputParser(), m_StrmIn(strmIn), m_WorkBuffer(0), m_WorkBufferPtr(0), m_WorkBufferEnd(0), m_NoMoreRecords(false) { // This test is not ideal because std::cin's stream buffer could have been // changed - if (strmIn.rdbuf() == std::cin.rdbuf()) - { + if (strmIn.rdbuf() == std::cin.rdbuf()) { LOG_DEBUG("Length encoded input parser input is connected to stdin"); int result = core::CSetMode::setBinaryMode(::fileno(stdin)); - if (result == -1) - { + if (result == -1) { LOG_WARN("Cannot set the stdin to binary mode"); } - } - else - { + } else { LOG_DEBUG("Length encoded input parser input is not connected to stdin"); } } -bool CLengthEncodedInputParser::readStream(const TReaderFunc &readerFunc) -{ +bool CLengthEncodedInputParser::readStream(const TReaderFunc& readerFunc) { // Reset the record buffer pointers in case we're reading a new stream m_WorkBufferEnd = m_WorkBufferPtr; m_NoMoreRecords = false; - TStrVec &fieldNames = this->fieldNames(); + TStrVec& fieldNames = this->fieldNames(); - if (!this->gotFieldNames()) - { - if (this->parseRecordFromStream(fieldNames) == false) - { + if (!this->gotFieldNames()) { + if (this->parseRecordFromStream(fieldNames) == false) { LOG_ERROR("Failed to parse length encoded header from stream"); return false; } - if (fieldNames.empty()) - { + if (fieldNames.empty()) { // If we parsed no field names at all, return true, as // completely empty input is technically valid LOG_INFO("Field names are empty") @@ -90,30 +72,23 @@ bool CLengthEncodedInputParser::readStream(const TReaderFunc &readerFunc) // name - this avoids the need to repeatedly compute the same hashes TStrRefVec fieldValRefs; fieldValRefs.reserve(fieldNames.size()); - for (TStrVecCItr iter = fieldNames.begin(); - iter != fieldNames.end(); - ++iter) - { + for (TStrVecCItr iter = fieldNames.begin(); iter != fieldNames.end(); ++iter) { fieldValRefs.push_back(boost::ref(recordFields[*iter])); } - while (!m_NoMoreRecords) - { - if (this->parseRecordFromStream(fieldValRefs) == false) - { + while (!m_NoMoreRecords) { + if (this->parseRecordFromStream(fieldValRefs) == false) { LOG_ERROR("Failed to parse length encoded data record from stream"); return false; } - if (m_NoMoreRecords) - { + if (m_NoMoreRecords) { break; } this->gotData(true); - if (readerFunc(recordFields) == false) - { + if (readerFunc(recordFields) == false) { LOG_ERROR("Record handler function forced exit"); return false; } @@ -122,9 +97,8 @@ bool CLengthEncodedInputParser::readStream(const TReaderFunc &readerFunc) return true; } -template -bool CLengthEncodedInputParser::parseRecordFromStream(STR_VEC &results) -{ +template +bool CLengthEncodedInputParser::parseRecordFromStream(STR_VEC& results) { // For maximum performance, read the stream in large chunks that can be // moved around by memcpy(). Using memcpy() is an order of magnitude faster // than the naive approach of checking and copying one character at a time. @@ -132,18 +106,15 @@ bool CLengthEncodedInputParser::parseRecordFromStream(STR_VEC &results) // for the delimiter and then memcpy() to transfer data to the target // std::string, but sadly this is not the case for the Microsoft and Apache // STLs. - if (m_WorkBuffer.get() == 0) - { + if (m_WorkBuffer.get() == 0) { m_WorkBuffer.reset(new char[WORK_BUFFER_SIZE]); m_WorkBufferPtr = m_WorkBuffer.get(); m_WorkBufferEnd = m_WorkBufferPtr; } uint32_t numFields(0); - if (this->parseUInt32FromStream(numFields) == false) - { - if (m_StrmIn.eof()) - { + if (this->parseUInt32FromStream(numFields) == false) { + if (m_StrmIn.eof()) { // End-of-file is not an error at this point in the parsing m_NoMoreRecords = true; return true; @@ -153,12 +124,9 @@ bool CLengthEncodedInputParser::parseRecordFromStream(STR_VEC &results) return false; } - if (results.size() != numFields) - { - if (RESIZE_ALLOWED) - { - if (numFields == 0) - { + if (results.size() != numFields) { + if (RESIZE_ALLOWED) { + if (numFields == 0) { LOG_WARN("Number of fields is 0 in input"); } @@ -172,22 +140,16 @@ bool CLengthEncodedInputParser::parseRecordFromStream(STR_VEC &results) static_assert(!RESIZE_ALLOWED || !std::is_same::value, "RESIZE_ALLOWED must be false for reference vectors"); std::string temp; - results.resize(numFields, - typename STR_VEC::value_type(temp)); - } - else - { - LOG_ERROR("Incorrect number of fields in input stream record: expected " - << results.size() << " but got " << numFields); + results.resize(numFields, typename STR_VEC::value_type(temp)); + } else { + LOG_ERROR("Incorrect number of fields in input stream record: expected " << results.size() << " but got " << numFields); return false; } } - for (size_t index = 0; index < numFields; ++index) - { + for (size_t index = 0; index < numFields; ++index) { uint32_t length(0); - if (this->parseUInt32FromStream(length) == false) - { + if (this->parseUInt32FromStream(length) == false) { LOG_ERROR("Unable to read field length from input stream"); return false; } @@ -200,15 +162,12 @@ bool CLengthEncodedInputParser::parseRecordFromStream(STR_VEC &results) // which is unlikely, so assume corruption in this case. See bug 1040 // in Bugzilla for more details. static const uint32_t HIGH_BYTE_MASK(0xFF000000); - if ((length & HIGH_BYTE_MASK) != 0u) - { - LOG_ERROR("Parsed field length " << length - << " is suspiciously large - assuming corrupt input stream"); + if ((length & HIGH_BYTE_MASK) != 0u) { + LOG_ERROR("Parsed field length " << length << " is suspiciously large - assuming corrupt input stream"); return false; } - if (this->parseStringFromStream(length, results[index]) == false) - { + if (this->parseStringFromStream(length, results[index]) == false) { LOG_ERROR("Unable to read field data from input stream"); return false; } @@ -217,14 +176,11 @@ bool CLengthEncodedInputParser::parseRecordFromStream(STR_VEC &results) return true; } -bool CLengthEncodedInputParser::parseUInt32FromStream(uint32_t &num) -{ +bool CLengthEncodedInputParser::parseUInt32FromStream(uint32_t& num) { size_t avail(m_WorkBufferEnd - m_WorkBufferPtr); - if (avail < sizeof(uint32_t)) - { + if (avail < sizeof(uint32_t)) { avail = this->refillBuffer(); - if (avail < sizeof(uint32_t)) - { + if (avail < sizeof(uint32_t)) { return false; } } @@ -240,82 +196,62 @@ bool CLengthEncodedInputParser::parseUInt32FromStream(uint32_t &num) return true; } -bool CLengthEncodedInputParser::parseStringFromStream(size_t length, - std::string &str) -{ - if (length == 0) - { +bool CLengthEncodedInputParser::parseStringFromStream(size_t length, std::string& str) { + if (length == 0) { str.clear(); return true; } bool append(false); size_t avail(m_WorkBufferEnd - m_WorkBufferPtr); - do - { - if (avail == 0) - { + do { + if (avail == 0) { avail = this->refillBuffer(); - if (avail == 0) - { + if (avail == 0) { return false; } } size_t copyLen(std::min(length, avail)); - if (append) - { + if (append) { str.append(m_WorkBufferPtr, copyLen); - } - else - { + } else { str.assign(m_WorkBufferPtr, copyLen); append = true; } m_WorkBufferPtr += copyLen; avail -= copyLen; length -= copyLen; - } - while (length > 0); + } while (length > 0); return true; } -size_t CLengthEncodedInputParser::refillBuffer() -{ +size_t CLengthEncodedInputParser::refillBuffer() { // NB: This assumes the buffer is allocated, which is OK for a private // method. Callers are responsible for ensuring that the buffer isn't NULL // when calling this method. size_t avail(m_WorkBufferEnd - m_WorkBufferPtr); - if (m_StrmIn.eof()) - { + if (m_StrmIn.eof()) { // We can't read any more data - whatever's available now won't change return avail; } - if (avail > 0) - { + if (avail > 0) { ::memcpy(m_WorkBuffer.get(), m_WorkBufferPtr, avail); } m_WorkBufferPtr = m_WorkBuffer.get(); - m_StrmIn.read(m_WorkBuffer.get() + avail, - static_cast(WORK_BUFFER_SIZE - avail)); - if (m_StrmIn.bad()) - { + m_StrmIn.read(m_WorkBuffer.get() + avail, static_cast(WORK_BUFFER_SIZE - avail)); + if (m_StrmIn.bad()) { LOG_ERROR("Input stream is bad"); - } - else - { + } else { avail += static_cast(m_StrmIn.gcount()); } m_WorkBufferEnd = m_WorkBufferPtr + avail; return avail; } - - } } - diff --git a/lib/api/CLineifiedInputParser.cc b/lib/api/CLineifiedInputParser.cc index 2234fd58e1..858ed080b1 100644 --- a/lib/api/CLineifiedInputParser.cc +++ b/lib/api/CLineifiedInputParser.cc @@ -11,31 +11,18 @@ #include - -namespace ml -{ -namespace api -{ - +namespace ml { +namespace api { // Initialise statics -const char CLineifiedInputParser::LINE_END('\n'); +const char CLineifiedInputParser::LINE_END('\n'); const size_t CLineifiedInputParser::WORK_BUFFER_SIZE(131072); // 128kB - -CLineifiedInputParser::CLineifiedInputParser(std::istream &strmIn) - : CInputParser(), - m_StrmIn(strmIn), - m_WorkBuffer(0), - m_WorkBufferCapacity(0), - m_WorkBufferPtr(0), - m_WorkBufferEnd(0) -{ +CLineifiedInputParser::CLineifiedInputParser(std::istream& strmIn) + : CInputParser(), m_StrmIn(strmIn), m_WorkBuffer(0), m_WorkBufferCapacity(0), m_WorkBufferPtr(0), m_WorkBufferEnd(0) { } -CLineifiedInputParser::TCharPSizePr -CLineifiedInputParser::parseLine() -{ +CLineifiedInputParser::TCharPSizePr CLineifiedInputParser::parseLine() { // For maximum performance, read the stream in large chunks that can be // moved around by memcpy(). Using memcpy() is an order of magnitude faster // than the naive approach of checking and copying one character at a time. @@ -43,38 +30,29 @@ CLineifiedInputParser::parseLine() // for the delimiter and then memcpy() to transfer data to the target // std::string, but sadly this is not the case for the Microsoft and Apache // STLs. - if (m_WorkBuffer.get() == 0) - { + if (m_WorkBuffer.get() == 0) { m_WorkBuffer.reset(new char[WORK_BUFFER_SIZE]); m_WorkBufferCapacity = WORK_BUFFER_SIZE; m_WorkBufferPtr = m_WorkBuffer.get(); m_WorkBufferEnd = m_WorkBufferPtr; } - for (;;) - { + for (;;) { size_t avail(m_WorkBufferEnd - m_WorkBufferPtr); - if (avail > 0) - { - char *delimPtr(reinterpret_cast(::memchr(m_WorkBufferPtr, - LINE_END, - avail))); - if (delimPtr != 0) - { + if (avail > 0) { + char* delimPtr(reinterpret_cast(::memchr(m_WorkBufferPtr, LINE_END, avail))); + if (delimPtr != 0) { *delimPtr = '\0'; TCharPSizePr result(m_WorkBufferPtr, delimPtr - m_WorkBufferPtr); m_WorkBufferPtr = delimPtr + 1; return result; } - if (m_WorkBufferPtr > m_WorkBuffer.get()) - { + if (m_WorkBufferPtr > m_WorkBuffer.get()) { // We didn't find a line ending, but we started part way through the // the buffer, so shuffle it up and refill it ::memmove(m_WorkBuffer.get(), m_WorkBufferPtr, avail); - } - else - { + } else { // We didn't find a line ending and started at the beginning of a // full buffer so expand it m_WorkBufferCapacity += WORK_BUFFER_SIZE; @@ -86,20 +64,16 @@ CLineifiedInputParser::parseLine() m_WorkBufferEnd = m_WorkBufferPtr + avail; } - if (m_StrmIn.eof()) - { + if (m_StrmIn.eof()) { // We have no lines in the buffered data and are already at the end // of the stream, so stop now break; } - m_StrmIn.read(m_WorkBufferEnd, - static_cast(m_WorkBufferCapacity - avail)); + m_StrmIn.read(m_WorkBufferEnd, static_cast(m_WorkBufferCapacity - avail)); std::streamsize bytesRead(m_StrmIn.gcount()); - if (bytesRead == 0) - { - if (m_StrmIn.bad()) - { + if (bytesRead == 0) { + if (m_StrmIn.bad()) { LOG_ERROR("Input stream is bad"); } // We needed to read more data and didn't get any, so stop @@ -108,15 +82,11 @@ CLineifiedInputParser::parseLine() m_WorkBufferEnd += bytesRead; } - return TCharPSizePr(static_cast(0), 0); + return TCharPSizePr(static_cast(0), 0); } -void CLineifiedInputParser::resetBuffer() -{ +void CLineifiedInputParser::resetBuffer() { m_WorkBufferEnd = m_WorkBufferPtr; } - - } } - diff --git a/lib/api/CLineifiedJsonInputParser.cc b/lib/api/CLineifiedJsonInputParser.cc index 461a456156..c7e6ce2e2f 100644 --- a/lib/api/CLineifiedJsonInputParser.cc +++ b/lib/api/CLineifiedJsonInputParser.cc @@ -10,23 +10,15 @@ #include +namespace ml { +namespace api { -namespace ml -{ -namespace api -{ - - -CLineifiedJsonInputParser::CLineifiedJsonInputParser(std::istream &strmIn, - bool allDocsSameStructure) - : CLineifiedInputParser(strmIn), - m_AllDocsSameStructure(allDocsSameStructure) -{ +CLineifiedJsonInputParser::CLineifiedJsonInputParser(std::istream& strmIn, bool allDocsSameStructure) + : CLineifiedInputParser(strmIn), m_AllDocsSameStructure(allDocsSameStructure) { } -bool CLineifiedJsonInputParser::readStream(const TReaderFunc &readerFunc) -{ - TStrVec &fieldNames = this->fieldNames(); +bool CLineifiedJsonInputParser::readStream(const TReaderFunc& readerFunc) { + TStrVec& fieldNames = this->fieldNames(); TStrRefVec fieldValRefs; // Reset the record buffer pointers in case we're reading a new stream @@ -35,40 +27,27 @@ bool CLineifiedJsonInputParser::readStream(const TReaderFunc &readerFunc) // We reuse the same field map for every record TStrStrUMap recordFields; - char *begin(this->parseLine().first); - while (begin != 0) - { + char* begin(this->parseLine().first); + while (begin != 0) { rapidjson::Document document; - if (this->parseDocument(begin, document) == false) - { + if (this->parseDocument(begin, document) == false) { LOG_ERROR("Failed to parse JSON document"); return false; } - if (m_AllDocsSameStructure) - { - if (this->decodeDocumentWithCommonFields(document, - fieldNames, - fieldValRefs, - recordFields) == false) - { + if (m_AllDocsSameStructure) { + if (this->decodeDocumentWithCommonFields(document, fieldNames, fieldValRefs, recordFields) == false) { LOG_ERROR("Failed to decode JSON document"); return false; } - } - else - { - if (this->decodeDocumentWithArbitraryFields(document, - fieldNames, - recordFields) == false) - { + } else { + if (this->decodeDocumentWithArbitraryFields(document, fieldNames, recordFields) == false) { LOG_ERROR("Failed to decode JSON document"); return false; } } - if (readerFunc(recordFields) == false) - { + if (readerFunc(recordFields) == false) { LOG_ERROR("Record handler function forced exit"); return false; } @@ -79,18 +58,14 @@ bool CLineifiedJsonInputParser::readStream(const TReaderFunc &readerFunc) return true; } -bool CLineifiedJsonInputParser::parseDocument(char *begin, - rapidjson::Document &document) -{ +bool CLineifiedJsonInputParser::parseDocument(char* begin, rapidjson::Document& document) { // Parse JSON string using Rapidjson - if (document.ParseInsitu(begin).HasParseError()) - { + if (document.ParseInsitu(begin).HasParseError()) { LOG_ERROR("JSON parse error: " << document.GetParseError()); return false; } - if (!document.IsObject()) - { + if (!document.IsObject()) { LOG_ERROR("Top level of JSON document must be an object: " << document.GetType()); return false; } @@ -98,28 +73,20 @@ bool CLineifiedJsonInputParser::parseDocument(char *begin, return true; } -bool CLineifiedJsonInputParser::decodeDocumentWithCommonFields(const rapidjson::Document &document, - TStrVec &fieldNames, - TStrRefVec &fieldValRefs, - TStrStrUMap &recordFields) -{ - if (fieldValRefs.empty()) - { +bool CLineifiedJsonInputParser::decodeDocumentWithCommonFields(const rapidjson::Document& document, + TStrVec& fieldNames, + TStrRefVec& fieldValRefs, + TStrStrUMap& recordFields) { + if (fieldValRefs.empty()) { // We haven't yet decoded any documents, so decode the first one long-hand - if (this->decodeDocumentWithArbitraryFields(document, - fieldNames, - recordFields) == false) - { + if (this->decodeDocumentWithArbitraryFields(document, fieldNames, recordFields) == false) { return false; } // Cache references to the strings in the map corresponding to each field // name for next time fieldValRefs.reserve(fieldNames.size()); - for (TStrVecCItr iter = fieldNames.begin(); - iter != fieldNames.end(); - ++iter) - { + for (TStrVecCItr iter = fieldNames.begin(); iter != fieldNames.end(); ++iter) { fieldValRefs.push_back(boost::ref(recordFields[*iter])); } @@ -127,85 +94,70 @@ bool CLineifiedJsonInputParser::decodeDocumentWithCommonFields(const rapidjson:: } TStrRefVecItr refIter = fieldValRefs.begin(); - for (rapidjson::Value::ConstMemberIterator iter = document.MemberBegin(); - iter != document.MemberEnd(); - ++iter, ++refIter) - { - if (refIter == fieldValRefs.end()) - { + for (rapidjson::Value::ConstMemberIterator iter = document.MemberBegin(); iter != document.MemberEnd(); ++iter, ++refIter) { + if (refIter == fieldValRefs.end()) { LOG_ERROR("More fields than field references"); return false; } - switch (iter->value.GetType()) - { - case rapidjson::kNullType: - refIter->get().clear(); - break; - case rapidjson::kFalseType: - refIter->get() = '0'; - break; - case rapidjson::kTrueType: - refIter->get() = '1'; - break; - case rapidjson::kObjectType: - case rapidjson::kArrayType: - LOG_ERROR("Can't handle nested objects/arrays in JSON documents: " << - fieldNames.back()); - return false; - case rapidjson::kStringType: - refIter->get().assign(iter->value.GetString(), - iter->value.GetStringLength()); - break; - case rapidjson::kNumberType: - core::CStringUtils::typeToString(iter->value.GetDouble()).swap(refIter->get()); - break; + switch (iter->value.GetType()) { + case rapidjson::kNullType: + refIter->get().clear(); + break; + case rapidjson::kFalseType: + refIter->get() = '0'; + break; + case rapidjson::kTrueType: + refIter->get() = '1'; + break; + case rapidjson::kObjectType: + case rapidjson::kArrayType: + LOG_ERROR("Can't handle nested objects/arrays in JSON documents: " << fieldNames.back()); + return false; + case rapidjson::kStringType: + refIter->get().assign(iter->value.GetString(), iter->value.GetStringLength()); + break; + case rapidjson::kNumberType: + core::CStringUtils::typeToString(iter->value.GetDouble()).swap(refIter->get()); + break; } } return true; } -bool CLineifiedJsonInputParser::decodeDocumentWithArbitraryFields(const rapidjson::Document &document, - TStrVec &fieldNames, - TStrStrUMap &recordFields) -{ +bool CLineifiedJsonInputParser::decodeDocumentWithArbitraryFields(const rapidjson::Document& document, + TStrVec& fieldNames, + TStrStrUMap& recordFields) { // The major drawback of having self-describing messages is that we can't // make assumptions about what fields exist or what order they're in fieldNames.clear(); recordFields.clear(); - for (rapidjson::Value::ConstMemberIterator iter = document.MemberBegin(); - iter != document.MemberEnd(); - ++iter) - { - fieldNames.push_back(std::string(iter->name.GetString(), - iter->name.GetStringLength())); - - switch (iter->value.GetType()) - { - case rapidjson::kNullType: - recordFields[fieldNames.back()]; - break; - case rapidjson::kFalseType: - recordFields[fieldNames.back()] = '0'; - break; - case rapidjson::kTrueType: - recordFields[fieldNames.back()] = '1'; - break; - case rapidjson::kObjectType: - case rapidjson::kArrayType: - LOG_ERROR("Can't handle nested objects/arrays in JSON documents: " << - fieldNames.back()); - fieldNames.pop_back(); - return false; - case rapidjson::kStringType: - recordFields[fieldNames.back()].assign(iter->value.GetString(), - iter->value.GetStringLength()); - break; - case rapidjson::kNumberType: - core::CStringUtils::typeToString(iter->value.GetDouble()).swap(recordFields[fieldNames.back()]); - break; + for (rapidjson::Value::ConstMemberIterator iter = document.MemberBegin(); iter != document.MemberEnd(); ++iter) { + fieldNames.push_back(std::string(iter->name.GetString(), iter->name.GetStringLength())); + + switch (iter->value.GetType()) { + case rapidjson::kNullType: + recordFields[fieldNames.back()]; + break; + case rapidjson::kFalseType: + recordFields[fieldNames.back()] = '0'; + break; + case rapidjson::kTrueType: + recordFields[fieldNames.back()] = '1'; + break; + case rapidjson::kObjectType: + case rapidjson::kArrayType: + LOG_ERROR("Can't handle nested objects/arrays in JSON documents: " << fieldNames.back()); + fieldNames.pop_back(); + return false; + case rapidjson::kStringType: + recordFields[fieldNames.back()].assign(iter->value.GetString(), iter->value.GetStringLength()); + break; + case rapidjson::kNumberType: + core::CStringUtils::typeToString(iter->value.GetDouble()).swap(recordFields[fieldNames.back()]); + break; } } @@ -214,8 +166,5 @@ bool CLineifiedJsonInputParser::decodeDocumentWithArbitraryFields(const rapidjso return true; } - - } } - diff --git a/lib/api/CLineifiedJsonOutputWriter.cc b/lib/api/CLineifiedJsonOutputWriter.cc index eac7d8c3de..9d2801d52f 100644 --- a/lib/api/CLineifiedJsonOutputWriter.cc +++ b/lib/api/CLineifiedJsonOutputWriter.cc @@ -10,45 +10,26 @@ #include - -namespace ml -{ -namespace api -{ - +namespace ml { +namespace api { CLineifiedJsonOutputWriter::CLineifiedJsonOutputWriter() - : m_OutStream(m_StringOutputBuf), - m_WriteStream(m_OutStream), - m_Writer(m_WriteStream) -{ + : m_OutStream(m_StringOutputBuf), m_WriteStream(m_OutStream), m_Writer(m_WriteStream) { } -CLineifiedJsonOutputWriter::CLineifiedJsonOutputWriter(const TStrSet &numericFields) - : m_NumericFields(numericFields), - m_OutStream(m_StringOutputBuf), - m_WriteStream(m_OutStream), - m_Writer(m_WriteStream) -{ +CLineifiedJsonOutputWriter::CLineifiedJsonOutputWriter(const TStrSet& numericFields) + : m_NumericFields(numericFields), m_OutStream(m_StringOutputBuf), m_WriteStream(m_OutStream), m_Writer(m_WriteStream) { } -CLineifiedJsonOutputWriter::CLineifiedJsonOutputWriter(std::ostream &strmOut) - : m_OutStream(strmOut), - m_WriteStream(m_OutStream), - m_Writer(m_WriteStream) -{ +CLineifiedJsonOutputWriter::CLineifiedJsonOutputWriter(std::ostream& strmOut) + : m_OutStream(strmOut), m_WriteStream(m_OutStream), m_Writer(m_WriteStream) { } -CLineifiedJsonOutputWriter::CLineifiedJsonOutputWriter(const TStrSet &numericFields, std::ostream &strmOut) - : m_NumericFields(numericFields), - m_OutStream(strmOut), - m_WriteStream(m_OutStream), - m_Writer(m_WriteStream) -{ +CLineifiedJsonOutputWriter::CLineifiedJsonOutputWriter(const TStrSet& numericFields, std::ostream& strmOut) + : m_NumericFields(numericFields), m_OutStream(strmOut), m_WriteStream(m_OutStream), m_Writer(m_WriteStream) { } -CLineifiedJsonOutputWriter::~CLineifiedJsonOutputWriter() -{ +CLineifiedJsonOutputWriter::~CLineifiedJsonOutputWriter() { // Since we didn't flush the stream whilst working, we flush it on // destruction m_WriteStream.Flush(); @@ -59,42 +40,34 @@ CLineifiedJsonOutputWriter::~CLineifiedJsonOutputWriter() core::CSleep::sleep(20); } -bool CLineifiedJsonOutputWriter::fieldNames(const TStrVec &/*fieldNames*/, - const TStrVec &/*extraFieldNames*/) -{ +bool CLineifiedJsonOutputWriter::fieldNames(const TStrVec& /*fieldNames*/, const TStrVec& /*extraFieldNames*/) { return true; } -const CLineifiedJsonOutputWriter::TStrVec &CLineifiedJsonOutputWriter::fieldNames() const -{ +const CLineifiedJsonOutputWriter::TStrVec& CLineifiedJsonOutputWriter::fieldNames() const { return EMPTY_FIELD_NAMES; } -bool CLineifiedJsonOutputWriter::writeRow(const TStrStrUMap &dataRowFields, - const TStrStrUMap &overrideDataRowFields) -{ +bool CLineifiedJsonOutputWriter::writeRow(const TStrStrUMap& dataRowFields, const TStrStrUMap& overrideDataRowFields) { rapidjson::Document doc = m_Writer.makeDoc(); // Write all the fields to the document as strings // No need to copy the strings as the doc is written straight away TStrStrUMapCItr fieldValueIter = dataRowFields.begin(); - for (; fieldValueIter != dataRowFields.end(); ++fieldValueIter) - { - const std::string &name = fieldValueIter->first; - const std::string &value = fieldValueIter->second; + for (; fieldValueIter != dataRowFields.end(); ++fieldValueIter) { + const std::string& name = fieldValueIter->first; + const std::string& value = fieldValueIter->second; // Only output fields that aren't overridden - if (overrideDataRowFields.find(name) == overrideDataRowFields.end()) - { + if (overrideDataRowFields.find(name) == overrideDataRowFields.end()) { this->writeField(name, value, doc); } } fieldValueIter = overrideDataRowFields.begin(); - for (; fieldValueIter != overrideDataRowFields.end(); ++fieldValueIter) - { - const std::string &name = fieldValueIter->first; - const std::string &value = fieldValueIter->second; + for (; fieldValueIter != overrideDataRowFields.end(); ++fieldValueIter) { + const std::string& name = fieldValueIter->first; + const std::string& value = fieldValueIter->second; this->writeField(name, value, doc); } @@ -105,36 +78,25 @@ bool CLineifiedJsonOutputWriter::writeRow(const TStrStrUMap &dataRowFields, return true; } -std::string CLineifiedJsonOutputWriter::internalString() const -{ - const_cast(m_WriteStream).Flush(); +std::string CLineifiedJsonOutputWriter::internalString() const { + const_cast(m_WriteStream).Flush(); // This is only of any value if the first constructor was used - it's up to // the caller to know this return m_StringOutputBuf.str(); } -void CLineifiedJsonOutputWriter::writeField(const std::string &name, - const std::string &value, - rapidjson::Document &doc) const -{ - if (m_NumericFields.find(name) != m_NumericFields.end()) - { +void CLineifiedJsonOutputWriter::writeField(const std::string& name, const std::string& value, rapidjson::Document& doc) const { + if (m_NumericFields.find(name) != m_NumericFields.end()) { double numericValue(0.0); - if (core::CStringUtils::stringToType(value, numericValue) == false) - { + if (core::CStringUtils::stringToType(value, numericValue) == false) { LOG_WARN("Non-numeric value output in numeric JSON document"); // Write a 0 instead of returning } m_Writer.addDoubleFieldToObj(name, numericValue, doc); - } - else - { + } else { m_Writer.addStringFieldCopyToObj(name, value, doc, true); } } - - } } - diff --git a/lib/api/CLineifiedXmlInputParser.cc b/lib/api/CLineifiedXmlInputParser.cc index c2c0dd9c35..c2f2b402dd 100644 --- a/lib/api/CLineifiedXmlInputParser.cc +++ b/lib/api/CLineifiedXmlInputParser.cc @@ -10,25 +10,15 @@ #include +namespace ml { +namespace api { -namespace ml -{ -namespace api -{ - - -CLineifiedXmlInputParser::CLineifiedXmlInputParser(core::CXmlParserIntf &parser, - std::istream &strmIn, - bool allDocsSameStructure) - : CLineifiedInputParser(strmIn), - m_Parser(parser), - m_AllDocsSameStructure(allDocsSameStructure) -{ +CLineifiedXmlInputParser::CLineifiedXmlInputParser(core::CXmlParserIntf& parser, std::istream& strmIn, bool allDocsSameStructure) + : CLineifiedInputParser(strmIn), m_Parser(parser), m_AllDocsSameStructure(allDocsSameStructure) { } -bool CLineifiedXmlInputParser::readStream(const TReaderFunc &readerFunc) -{ - TStrVec &fieldNames = this->fieldNames(); +bool CLineifiedXmlInputParser::readStream(const TReaderFunc& readerFunc) { + TStrVec& fieldNames = this->fieldNames(); TStrRefVec fieldValRefs; // Reset the record buffer pointers in case we're reading a new stream @@ -38,39 +28,27 @@ bool CLineifiedXmlInputParser::readStream(const TReaderFunc &readerFunc) TStrStrUMap recordFields; TCharPSizePr beginLenPair(this->parseLine()); - while (beginLenPair.first != 0) - { - if (m_Parser.parseBufferInSitu(beginLenPair.first, - beginLenPair.second) == false) - { + while (beginLenPair.first != 0) { + if (m_Parser.parseBufferInSitu(beginLenPair.first, beginLenPair.second) == false) { LOG_ERROR("Failed to parse XML document"); return false; } - if (m_Parser.navigateRoot() == false || - m_Parser.navigateFirstChild() == false) - { + if (m_Parser.navigateRoot() == false || m_Parser.navigateFirstChild() == false) { LOG_ERROR("XML document has unexpected structure"); return false; } - if (m_AllDocsSameStructure) - { - if (this->decodeDocumentWithCommonFields(fieldNames, - fieldValRefs, - recordFields) == false) - { + if (m_AllDocsSameStructure) { + if (this->decodeDocumentWithCommonFields(fieldNames, fieldValRefs, recordFields) == false) { LOG_ERROR("Failed to decode XML document"); return false; } - } - else - { + } else { this->decodeDocumentWithArbitraryFields(fieldNames, recordFields); } - if (readerFunc(recordFields) == false) - { + if (readerFunc(recordFields) == false) { LOG_ERROR("Record handler function forced exit"); return false; } @@ -81,22 +59,15 @@ bool CLineifiedXmlInputParser::readStream(const TReaderFunc &readerFunc) return true; } -bool CLineifiedXmlInputParser::decodeDocumentWithCommonFields(TStrVec &fieldNames, - TStrRefVec &fieldValRefs, - TStrStrUMap &recordFields) -{ - if (fieldValRefs.empty()) - { +bool CLineifiedXmlInputParser::decodeDocumentWithCommonFields(TStrVec& fieldNames, TStrRefVec& fieldValRefs, TStrStrUMap& recordFields) { + if (fieldValRefs.empty()) { // We haven't yet decoded any documents, so decode the first one long-hand this->decodeDocumentWithArbitraryFields(fieldNames, recordFields); // Cache references to the strings in the map corresponding to each field // name for next time fieldValRefs.reserve(fieldNames.size()); - for (TStrVecCItr iter = fieldNames.begin(); - iter != fieldNames.end(); - ++iter) - { + for (TStrVecCItr iter = fieldNames.begin(); iter != fieldNames.end(); ++iter) { fieldValRefs.push_back(boost::ref(recordFields[*iter])); } @@ -105,52 +76,40 @@ bool CLineifiedXmlInputParser::decodeDocumentWithCommonFields(TStrVec &fieldName size_t i(0); bool more(true); - do - { + do { m_Parser.currentNodeValue(fieldValRefs[i]); ++i; more = m_Parser.navigateNext(); - } - while (i < fieldValRefs.size() && more); + } while (i < fieldValRefs.size() && more); - if (i < fieldValRefs.size() || more) - { - while (more) - { + if (i < fieldValRefs.size() || more) { + while (more) { ++i; more = m_Parser.navigateNext(); } - LOG_ERROR("Incorrect number of fields: expected " - << fieldValRefs.size() << ", got " << i); + LOG_ERROR("Incorrect number of fields: expected " << fieldValRefs.size() << ", got " << i); return false; } return true; } -void CLineifiedXmlInputParser::decodeDocumentWithArbitraryFields(TStrVec &fieldNames, - TStrStrUMap &recordFields) -{ +void CLineifiedXmlInputParser::decodeDocumentWithArbitraryFields(TStrVec& fieldNames, TStrStrUMap& recordFields) { // The major drawback of having self-describing messages is that we can't // make assumptions about what fields exist or what order they're in fieldNames.clear(); recordFields.clear(); - do - { + do { fieldNames.push_back(std::string()); - std::string &name = fieldNames.back(); + std::string& name = fieldNames.back(); m_Parser.currentNodeName(name); m_Parser.currentNodeValue(recordFields[name]); - } - while (m_Parser.navigateNext()); + } while (m_Parser.navigateNext()); this->gotFieldNames(true); this->gotData(true); } - - } } - diff --git a/lib/api/CLineifiedXmlOutputWriter.cc b/lib/api/CLineifiedXmlOutputWriter.cc index 9173a7d8cd..aac78821aa 100644 --- a/lib/api/CLineifiedXmlOutputWriter.cc +++ b/lib/api/CLineifiedXmlOutputWriter.cc @@ -11,33 +11,21 @@ #include +namespace ml { +namespace api { -namespace ml -{ -namespace api -{ - -namespace -{ +namespace { const std::string EMPTY_STRING; } - -CLineifiedXmlOutputWriter::CLineifiedXmlOutputWriter(const std::string &rootName) - : m_RootName(rootName), - m_OutStream(m_StringOutputBuf) -{ +CLineifiedXmlOutputWriter::CLineifiedXmlOutputWriter(const std::string& rootName) : m_RootName(rootName), m_OutStream(m_StringOutputBuf) { } -CLineifiedXmlOutputWriter::CLineifiedXmlOutputWriter(const std::string &rootName, - std::ostream &strmOut) - : m_RootName(rootName), - m_OutStream(strmOut) -{ +CLineifiedXmlOutputWriter::CLineifiedXmlOutputWriter(const std::string& rootName, std::ostream& strmOut) + : m_RootName(rootName), m_OutStream(strmOut) { } -CLineifiedXmlOutputWriter::~CLineifiedXmlOutputWriter() -{ +CLineifiedXmlOutputWriter::~CLineifiedXmlOutputWriter() { // Since we didn't flush the stream whilst working, we flush it on // destruction m_OutStream.flush(); @@ -48,41 +36,32 @@ CLineifiedXmlOutputWriter::~CLineifiedXmlOutputWriter() core::CSleep::sleep(20); } -bool CLineifiedXmlOutputWriter::fieldNames(const TStrVec &/*fieldNames*/, - const TStrVec &/*extraFieldNames*/) -{ +bool CLineifiedXmlOutputWriter::fieldNames(const TStrVec& /*fieldNames*/, const TStrVec& /*extraFieldNames*/) { return true; } -const CLineifiedXmlOutputWriter::TStrVec &CLineifiedXmlOutputWriter::fieldNames() const -{ +const CLineifiedXmlOutputWriter::TStrVec& CLineifiedXmlOutputWriter::fieldNames() const { return EMPTY_FIELD_NAMES; } -bool CLineifiedXmlOutputWriter::writeRow(const TStrStrUMap &dataRowFields, - const TStrStrUMap &overrideDataRowFields) -{ - core::CXmlNodeWithChildren::TXmlNodeWithChildrenP root(m_Pool.newNode(m_RootName, - EMPTY_STRING)); +bool CLineifiedXmlOutputWriter::writeRow(const TStrStrUMap& dataRowFields, const TStrStrUMap& overrideDataRowFields) { + core::CXmlNodeWithChildren::TXmlNodeWithChildrenP root(m_Pool.newNode(m_RootName, EMPTY_STRING)); TStrStrUMapCItr fieldValueIter = dataRowFields.begin(); - for (; fieldValueIter != dataRowFields.end(); ++fieldValueIter) - { - const std::string &name = fieldValueIter->first; - const std::string &value = fieldValueIter->second; + for (; fieldValueIter != dataRowFields.end(); ++fieldValueIter) { + const std::string& name = fieldValueIter->first; + const std::string& value = fieldValueIter->second; // Only output fields that aren't overridden - if (overrideDataRowFields.find(name) == overrideDataRowFields.end()) - { + if (overrideDataRowFields.find(name) == overrideDataRowFields.end()) { root->addChildP(m_Pool.newNode(name, value)); } } fieldValueIter = overrideDataRowFields.begin(); - for (; fieldValueIter != overrideDataRowFields.end(); ++fieldValueIter) - { - const std::string &name = fieldValueIter->first; - const std::string &value = fieldValueIter->second; + for (; fieldValueIter != overrideDataRowFields.end(); ++fieldValueIter) { + const std::string& name = fieldValueIter->first; + const std::string& value = fieldValueIter->second; root->addChildP(m_Pool.newNode(name, value)); } @@ -95,16 +74,12 @@ bool CLineifiedXmlOutputWriter::writeRow(const TStrStrUMap &dataRowFields, return true; } -std::string CLineifiedXmlOutputWriter::internalString() const -{ - const_cast(m_OutStream).flush(); +std::string CLineifiedXmlOutputWriter::internalString() const { + const_cast(m_OutStream).flush(); // This is only of any value if the first constructor was used - it's up to // the caller to know this return m_StringOutputBuf.str(); } - - } } - diff --git a/lib/api/CModelPlotDataJsonWriter.cc b/lib/api/CModelPlotDataJsonWriter.cc index 9002d15b3f..88cf3d937f 100644 --- a/lib/api/CModelPlotDataJsonWriter.cc +++ b/lib/api/CModelPlotDataJsonWriter.cc @@ -7,10 +7,8 @@ #include #include -namespace ml -{ -namespace api -{ +namespace ml { +namespace api { // JSON field names const std::string CModelPlotDataJsonWriter::JOB_ID("job_id"); @@ -31,51 +29,57 @@ const std::string CModelPlotDataJsonWriter::MEDIAN("model_median"); const std::string CModelPlotDataJsonWriter::ACTUAL("actual"); const std::string CModelPlotDataJsonWriter::BUCKET_SPAN("bucket_span"); -CModelPlotDataJsonWriter::CModelPlotDataJsonWriter(core::CJsonOutputStreamWrapper &outStream) - : m_Writer(outStream) -{ +CModelPlotDataJsonWriter::CModelPlotDataJsonWriter(core::CJsonOutputStreamWrapper& outStream) : m_Writer(outStream) { } -void CModelPlotDataJsonWriter::writeFlat(const std::string &jobId, const model::CModelPlotData &data) -{ - const std::string &partitionFieldName = data.partitionFieldName(); - const std::string &partitionFieldValue = data.partitionFieldValue(); - const std::string &overFieldName = data.overFieldName(); - const std::string &byFieldName = data.byFieldName(); +void CModelPlotDataJsonWriter::writeFlat(const std::string& jobId, const model::CModelPlotData& data) { + const std::string& partitionFieldName = data.partitionFieldName(); + const std::string& partitionFieldValue = data.partitionFieldValue(); + const std::string& overFieldName = data.overFieldName(); + const std::string& byFieldName = data.byFieldName(); core_t::TTime time = data.time(); int detectorIndex = data.detectorIndex(); - for (TFeatureStrByFieldDataUMapUMapCItr featureItr = data.begin(); - featureItr != data.end(); - ++featureItr) - { + for (TFeatureStrByFieldDataUMapUMapCItr featureItr = data.begin(); featureItr != data.end(); ++featureItr) { std::string feature = model_t::print(featureItr->first); - const TStrByFieldDataUMap &byDataMap = featureItr->second; - for (TStrByFieldDataUMapCItr byItr = byDataMap.begin(); byItr != byDataMap.end(); ++byItr) - { - const std::string &byFieldValue = byItr->first; - const TByFieldData &byData = byItr->second; - const TStrDoublePrVec &values = byData.s_ValuesPerOverField; - if (values.empty()) - { + const TStrByFieldDataUMap& byDataMap = featureItr->second; + for (TStrByFieldDataUMapCItr byItr = byDataMap.begin(); byItr != byDataMap.end(); ++byItr) { + const std::string& byFieldValue = byItr->first; + const TByFieldData& byData = byItr->second; + const TStrDoublePrVec& values = byData.s_ValuesPerOverField; + if (values.empty()) { rapidjson::Value doc = m_Writer.makeObject(); - this->writeFlatRow(time, jobId, detectorIndex, partitionFieldName, partitionFieldValue, feature, - byFieldName, byFieldValue, byData, data.bucketSpan(), doc); + this->writeFlatRow(time, + jobId, + detectorIndex, + partitionFieldName, + partitionFieldValue, + feature, + byFieldName, + byFieldValue, + byData, + data.bucketSpan(), + doc); rapidjson::Value wrapper = m_Writer.makeObject(); m_Writer.addMember(MODEL_PLOT, doc, wrapper); m_Writer.write(wrapper); - } - else - { - for (std::size_t valueIndex = 0; valueIndex < values.size(); ++valueIndex) - { - const TStrDoublePr &keyValue = values[valueIndex]; + } else { + for (std::size_t valueIndex = 0; valueIndex < values.size(); ++valueIndex) { + const TStrDoublePr& keyValue = values[valueIndex]; rapidjson::Value doc = m_Writer.makeObject(); - this->writeFlatRow(time, jobId, detectorIndex, partitionFieldName, partitionFieldValue, feature, - byFieldName, byFieldValue, byData, data.bucketSpan(), doc); - if (!overFieldName.empty()) - { + this->writeFlatRow(time, + jobId, + detectorIndex, + partitionFieldName, + partitionFieldValue, + feature, + byFieldName, + byFieldValue, + byData, + data.bucketSpan(), + doc); + if (!overFieldName.empty()) { m_Writer.addStringFieldCopyToObj(OVER_FIELD_NAME, overFieldName, doc); m_Writer.addStringFieldCopyToObj(OVER_FIELD_VALUE, keyValue.first, doc, true); } @@ -93,30 +97,27 @@ void CModelPlotDataJsonWriter::writeFlat(const std::string &jobId, const model:: } void CModelPlotDataJsonWriter::writeFlatRow(core_t::TTime time, - const std::string &jobId, - int detectorIndex, - const std::string &partitionFieldName, - const std::string &partitionFieldValue, - const std::string &feature, - const std::string &byFieldName, - const std::string &byFieldValue, - const TByFieldData &byData, - core_t::TTime bucketSpan, - rapidjson::Value &doc) -{ + const std::string& jobId, + int detectorIndex, + const std::string& partitionFieldName, + const std::string& partitionFieldValue, + const std::string& feature, + const std::string& byFieldName, + const std::string& byFieldValue, + const TByFieldData& byData, + core_t::TTime bucketSpan, + rapidjson::Value& doc) { m_Writer.addStringFieldCopyToObj(JOB_ID, jobId, doc, true); m_Writer.addIntFieldToObj(DETECTOR_INDEX, detectorIndex, doc); m_Writer.addStringFieldCopyToObj(FEATURE, feature, doc, true); // time is in Java format - milliseconds since the epoch m_Writer.addTimeFieldToObj(TIME, time, doc); m_Writer.addIntFieldToObj(BUCKET_SPAN, bucketSpan, doc); - if (!partitionFieldName.empty()) - { + if (!partitionFieldName.empty()) { m_Writer.addStringFieldCopyToObj(PARTITION_FIELD_NAME, partitionFieldName, doc); m_Writer.addStringFieldCopyToObj(PARTITION_FIELD_VALUE, partitionFieldValue, doc, true); } - if (!byFieldName.empty()) - { + if (!byFieldName.empty()) { m_Writer.addStringFieldCopyToObj(BY_FIELD_NAME, byFieldName, doc); m_Writer.addStringFieldCopyToObj(BY_FIELD_VALUE, byFieldValue, doc, true); } @@ -124,6 +125,5 @@ void CModelPlotDataJsonWriter::writeFlatRow(core_t::TTime time, m_Writer.addDoubleFieldToObj(UPPER, byData.s_UpperBound, doc); m_Writer.addDoubleFieldToObj(MEDIAN, byData.s_Median, doc); } - } } diff --git a/lib/api/CModelSizeStatsJsonWriter.cc b/lib/api/CModelSizeStatsJsonWriter.cc index 72f7dcb81e..ff2b94286d 100644 --- a/lib/api/CModelSizeStatsJsonWriter.cc +++ b/lib/api/CModelSizeStatsJsonWriter.cc @@ -8,12 +8,9 @@ #include -namespace ml -{ -namespace api -{ -namespace -{ +namespace ml { +namespace api { +namespace { // JSON field names const std::string JOB_ID("job_id"); @@ -26,13 +23,11 @@ const std::string BUCKET_ALLOCATION_FAILURES_COUNT("bucket_allocation_failures_c const std::string MEMORY_STATUS("memory_status"); const std::string TIMESTAMP("timestamp"); const std::string LOG_TIME("log_time"); - } -void CModelSizeStatsJsonWriter::write(const std::string &jobId, - const model::CResourceMonitor::SResults &results, - core::CRapidJsonConcurrentLineWriter &writer) -{ +void CModelSizeStatsJsonWriter::write(const std::string& jobId, + const model::CResourceMonitor::SResults& results, + core::CRapidJsonConcurrentLineWriter& writer) { writer.String(MODEL_SIZE_STATS); writer.StartObject(); @@ -73,6 +68,5 @@ void CModelSizeStatsJsonWriter::write(const std::string &jobId, writer.EndObject(); } - } } diff --git a/lib/api/CModelSnapshotJsonWriter.cc b/lib/api/CModelSnapshotJsonWriter.cc index 7c994b39d5..e070bfb278 100644 --- a/lib/api/CModelSnapshotJsonWriter.cc +++ b/lib/api/CModelSnapshotJsonWriter.cc @@ -8,12 +8,9 @@ #include -namespace ml -{ -namespace api -{ -namespace -{ +namespace ml { +namespace api { +namespace { // JSON field names const std::string JOB_ID("job_id"); @@ -27,19 +24,15 @@ const std::string LATEST_RECORD_TIME("latest_record_time_stamp"); const std::string LATEST_RESULT_TIME("latest_result_time_stamp"); const std::string QUANTILES("quantiles"); const std::string QUANTILE_STATE("quantile_state"); - } -CModelSnapshotJsonWriter::CModelSnapshotJsonWriter(const std::string &jobId, core::CJsonOutputStreamWrapper &strmOut) - : m_JobId(jobId), - m_Writer(strmOut) -{ +CModelSnapshotJsonWriter::CModelSnapshotJsonWriter(const std::string& jobId, core::CJsonOutputStreamWrapper& strmOut) + : m_JobId(jobId), m_Writer(strmOut) { // Don't write any output in the constructor because, the way things work at // the moment, the output stream might be redirected after construction } -void CModelSnapshotJsonWriter::write(const SModelSnapshotReport &report) -{ +void CModelSnapshotJsonWriter::write(const SModelSnapshotReport& report) { m_Writer.StartObject(); m_Writer.String(MODEL_SNAPSHOT); m_Writer.StartObject(); @@ -62,13 +55,11 @@ void CModelSnapshotJsonWriter::write(const SModelSnapshotReport &report) CModelSizeStatsJsonWriter::write(m_JobId, report.s_ModelSizeStats, m_Writer); - if (report.s_LatestRecordTime > 0) - { + if (report.s_LatestRecordTime > 0) { m_Writer.String(LATEST_RECORD_TIME); m_Writer.Time(report.s_LatestRecordTime); } - if (report.s_LatestFinalResultTime > 0) - { + if (report.s_LatestFinalResultTime > 0) { m_Writer.String(LATEST_RESULT_TIME); m_Writer.Time(report.s_LatestFinalResultTime); } @@ -83,15 +74,14 @@ void CModelSnapshotJsonWriter::write(const SModelSnapshotReport &report) m_Writer.flush(); - LOG_DEBUG("Wrote model snapshot report with ID " << report.s_SnapshotId << - " for: " << report.s_Description << ", latest final results at " << report.s_LatestFinalResultTime); + LOG_DEBUG("Wrote model snapshot report with ID " << report.s_SnapshotId << " for: " << report.s_Description + << ", latest final results at " << report.s_LatestFinalResultTime); } -void CModelSnapshotJsonWriter::writeQuantileState(const std::string &jobId, - const std::string &state, +void CModelSnapshotJsonWriter::writeQuantileState(const std::string& jobId, + const std::string& state, core_t::TTime time, - core::CRapidJsonConcurrentLineWriter &writer) -{ + core::CRapidJsonConcurrentLineWriter& writer) { writer.StartObject(); writer.String(JOB_ID); writer.String(jobId); @@ -101,6 +91,5 @@ void CModelSnapshotJsonWriter::writeQuantileState(const std::string &jobId, writer.Time(time); writer.EndObject(); } - } } diff --git a/lib/api/CNullOutput.cc b/lib/api/CNullOutput.cc index a97221686c..faffe35fd6 100644 --- a/lib/api/CNullOutput.cc +++ b/lib/api/CNullOutput.cc @@ -5,29 +5,19 @@ */ #include +namespace ml { +namespace api { -namespace ml -{ -namespace api -{ - -bool CNullOutput::fieldNames(const TStrVec &/*fieldNames*/, - const TStrVec &/*extraFieldNames*/) -{ +bool CNullOutput::fieldNames(const TStrVec& /*fieldNames*/, const TStrVec& /*extraFieldNames*/) { return true; } -const COutputHandler::TStrVec &CNullOutput::fieldNames() const -{ +const COutputHandler::TStrVec& CNullOutput::fieldNames() const { return EMPTY_FIELD_NAMES; } -bool CNullOutput::writeRow(const TStrStrUMap &/*dataRowFields*/, - const TStrStrUMap &/*overrideDataRowFields*/) -{ +bool CNullOutput::writeRow(const TStrStrUMap& /*dataRowFields*/, const TStrStrUMap& /*overrideDataRowFields*/) { return true; } - } } - diff --git a/lib/api/COutputChainer.cc b/lib/api/COutputChainer.cc index 5bf0ab9321..66da336405 100644 --- a/lib/api/COutputChainer.cc +++ b/lib/api/COutputChainer.cc @@ -9,37 +9,22 @@ #include +namespace ml { +namespace api { -namespace ml -{ -namespace api -{ - - -COutputChainer::COutputChainer(CDataProcessor &dataProcessor) - : m_DataProcessor(dataProcessor) -{ +COutputChainer::COutputChainer(CDataProcessor& dataProcessor) : m_DataProcessor(dataProcessor) { } -void COutputChainer::newOutputStream() -{ +void COutputChainer::newOutputStream() { m_DataProcessor.newOutputStream(); } -bool COutputChainer::fieldNames(const TStrVec &fieldNames, - const TStrVec &extraFieldNames) -{ +bool COutputChainer::fieldNames(const TStrVec& fieldNames, const TStrVec& extraFieldNames) { m_FieldNames = fieldNames; // Only add extra field names if they're not already present - for (TStrVecCItr iter = extraFieldNames.begin(); - iter != extraFieldNames.end(); - ++iter) - { - if (std::find(m_FieldNames.begin(), - m_FieldNames.end(), - *iter) == m_FieldNames.end()) - { + for (TStrVecCItr iter = extraFieldNames.begin(); iter != extraFieldNames.end(); ++iter) { + if (std::find(m_FieldNames.begin(), m_FieldNames.end(), *iter) == m_FieldNames.end()) { m_FieldNames.push_back(*iter); } } @@ -48,8 +33,7 @@ bool COutputChainer::fieldNames(const TStrVec &fieldNames, m_WorkRecordFieldRefs.clear(); m_WorkRecordFields.clear(); - if (m_FieldNames.empty()) - { + if (m_FieldNames.empty()) { LOG_ERROR("Attempt to set empty field names"); return false; } @@ -60,10 +44,7 @@ bool COutputChainer::fieldNames(const TStrVec &fieldNames, // Pre-compute the hashes for each field name (assuming the hash function is // the same for our empty overrides map as it is for the ones provided by // callers) - for (TStrVecCItr iter = m_FieldNames.begin(); - iter != m_FieldNames.end(); - ++iter) - { + for (TStrVecCItr iter = m_FieldNames.begin(); iter != m_FieldNames.end(); ++iter) { m_Hashes.push_back(EMPTY_FIELD_OVERRIDES.hash_function()(*iter)); m_WorkRecordFieldRefs.push_back(boost::ref(m_WorkRecordFields[*iter])); } @@ -71,16 +52,12 @@ bool COutputChainer::fieldNames(const TStrVec &fieldNames, return true; } -const COutputHandler::TStrVec &COutputChainer::fieldNames() const -{ +const COutputHandler::TStrVec& COutputChainer::fieldNames() const { return m_FieldNames; } -bool COutputChainer::writeRow(const TStrStrUMap &dataRowFields, - const TStrStrUMap &overrideDataRowFields) -{ - if (m_FieldNames.empty()) - { +bool COutputChainer::writeRow(const TStrStrUMap& dataRowFields, const TStrStrUMap& overrideDataRowFields) { + if (m_FieldNames.empty()) { LOG_ERROR("Attempt to output data before field names"); return false; } @@ -91,23 +68,13 @@ bool COutputChainer::writeRow(const TStrStrUMap &dataRowFields, TPreComputedHashVecCItr preComputedHashIter = m_Hashes.begin(); TStrRefVecCItr fieldRefIter = m_WorkRecordFieldRefs.begin(); for (TStrVecCItr fieldNameIter = m_FieldNames.begin(); - fieldNameIter != m_FieldNames.end() && - preComputedHashIter != m_Hashes.end() && - fieldRefIter != m_WorkRecordFieldRefs.end(); - ++fieldNameIter, ++preComputedHashIter, ++fieldRefIter) - { - TStrStrUMapCItr fieldValueIter = overrideDataRowFields.find(*fieldNameIter, - *preComputedHashIter, - pred); - if (fieldValueIter == overrideDataRowFields.end()) - { - fieldValueIter = dataRowFields.find(*fieldNameIter, - *preComputedHashIter, - pred); - if (fieldValueIter == dataRowFields.end()) - { - LOG_ERROR("Output fields do not include a value for field " << - *fieldNameIter); + fieldNameIter != m_FieldNames.end() && preComputedHashIter != m_Hashes.end() && fieldRefIter != m_WorkRecordFieldRefs.end(); + ++fieldNameIter, ++preComputedHashIter, ++fieldRefIter) { + TStrStrUMapCItr fieldValueIter = overrideDataRowFields.find(*fieldNameIter, *preComputedHashIter, pred); + if (fieldValueIter == overrideDataRowFields.end()) { + fieldValueIter = dataRowFields.find(*fieldNameIter, *preComputedHashIter, pred); + if (fieldValueIter == dataRowFields.end()) { + LOG_ERROR("Output fields do not include a value for field " << *fieldNameIter); return false; } } @@ -115,49 +82,36 @@ bool COutputChainer::writeRow(const TStrStrUMap &dataRowFields, // Use the start/length version of assign to bypass GNU copy-on-write, // since we don't want the strings in m_WorkRecordFields to share // representations with strings in our input maps. - fieldRefIter->get().assign(fieldValueIter->second, - 0, - fieldValueIter->second.length()); + fieldRefIter->get().assign(fieldValueIter->second, 0, fieldValueIter->second.length()); } - if (m_DataProcessor.handleRecord(m_WorkRecordFields) == false) - { - LOG_ERROR("Chained data processor function returned false for record:" << core_t::LINE_ENDING << - CDataProcessor::debugPrintRecord(m_WorkRecordFields)); + if (m_DataProcessor.handleRecord(m_WorkRecordFields) == false) { + LOG_ERROR("Chained data processor function returned false for record:" << core_t::LINE_ENDING + << CDataProcessor::debugPrintRecord(m_WorkRecordFields)); return false; } return true; } -void COutputChainer::finalise() -{ +void COutputChainer::finalise() { m_DataProcessor.finalise(); } -bool COutputChainer::restoreState(core::CDataSearcher &restoreSearcher, - core_t::TTime &completeToTime) -{ - return m_DataProcessor.restoreState(restoreSearcher, - completeToTime); +bool COutputChainer::restoreState(core::CDataSearcher& restoreSearcher, core_t::TTime& completeToTime) { + return m_DataProcessor.restoreState(restoreSearcher, completeToTime); } -bool COutputChainer::persistState(core::CDataAdder &persister) -{ +bool COutputChainer::persistState(core::CDataAdder& persister) { return m_DataProcessor.persistState(persister); } -bool COutputChainer::periodicPersistState(CBackgroundPersister &persister) -{ +bool COutputChainer::periodicPersistState(CBackgroundPersister& persister) { return m_DataProcessor.periodicPersistState(persister); } -bool COutputChainer::consumesControlMessages() -{ +bool COutputChainer::consumesControlMessages() { return true; } - - } } - diff --git a/lib/api/COutputHandler.cc b/lib/api/COutputHandler.cc index 6e37a635b4..483bf6f00c 100644 --- a/lib/api/COutputHandler.cc +++ b/lib/api/COutputHandler.cc @@ -5,82 +5,61 @@ */ #include - -namespace ml -{ -namespace api -{ +namespace ml { +namespace api { // Initialise statics -const COutputHandler::TStrVec COutputHandler::EMPTY_FIELD_NAMES; +const COutputHandler::TStrVec COutputHandler::EMPTY_FIELD_NAMES; const COutputHandler::TStrStrUMap COutputHandler::EMPTY_FIELD_OVERRIDES; - -COutputHandler::COutputHandler() -{ +COutputHandler::COutputHandler() { } -COutputHandler::~COutputHandler() -{ +COutputHandler::~COutputHandler() { } -void COutputHandler::newOutputStream() -{ +void COutputHandler::newOutputStream() { // NOOP unless overridden } -bool COutputHandler::fieldNames(const TStrVec &fieldNames) -{ +bool COutputHandler::fieldNames(const TStrVec& fieldNames) { return this->fieldNames(fieldNames, EMPTY_FIELD_NAMES); } -bool COutputHandler::writeRow(const TStrStrUMap &dataRowFields) -{ +bool COutputHandler::writeRow(const TStrStrUMap& dataRowFields) { // Since the overrides are checked first, but we know there aren't any, it's // most efficient to pretend everything's an override return this->writeRow(EMPTY_FIELD_OVERRIDES, dataRowFields); } -void COutputHandler::finalise() -{ +void COutputHandler::finalise() { // NOOP unless overridden } -bool COutputHandler::restoreState(core::CDataSearcher & /* restoreSearcher */, - core_t::TTime & /* completeToTime */) -{ +bool COutputHandler::restoreState(core::CDataSearcher& /* restoreSearcher */, core_t::TTime& /* completeToTime */) { // NOOP unless overridden return true; } -bool COutputHandler::persistState(core::CDataAdder & /* persister */) -{ +bool COutputHandler::persistState(core::CDataAdder& /* persister */) { // NOOP unless overridden return true; } -bool COutputHandler::periodicPersistState(CBackgroundPersister & /* persister */) -{ +bool COutputHandler::periodicPersistState(CBackgroundPersister& /* persister */) { // NOOP unless overridden return true; } -COutputHandler::CPreComputedHash::CPreComputedHash(size_t hash) - : m_Hash(hash) -{ +COutputHandler::CPreComputedHash::CPreComputedHash(size_t hash) : m_Hash(hash) { } -size_t COutputHandler::CPreComputedHash::operator()(const std::string &) const -{ +size_t COutputHandler::CPreComputedHash::operator()(const std::string&) const { return m_Hash; } -bool COutputHandler::consumesControlMessages() -{ +bool COutputHandler::consumesControlMessages() { return false; } - - } } - diff --git a/lib/api/CResultNormalizer.cc b/lib/api/CResultNormalizer.cc index 915df63623..ef83277d46 100644 --- a/lib/api/CResultNormalizer.cc +++ b/lib/api/CResultNormalizer.cc @@ -11,12 +11,8 @@ #include - -namespace ml -{ -namespace api -{ - +namespace ml { +namespace api { // Initialise statics const std::string CResultNormalizer::LEVEL("level"); @@ -34,46 +30,36 @@ const std::string CResultNormalizer::BUCKET_INFLUENCER_LEVEL("inflb"); const std::string CResultNormalizer::INFLUENCER_LEVEL("infl"); const std::string CResultNormalizer::ZERO("0"); - -CResultNormalizer::CResultNormalizer(const model::CAnomalyDetectorModelConfig &modelConfig, - COutputHandler &outputHandler) +CResultNormalizer::CResultNormalizer(const model::CAnomalyDetectorModelConfig& modelConfig, COutputHandler& outputHandler) : m_ModelConfig(modelConfig), m_OutputHandler(outputHandler), m_WriteFieldNames(true), m_OutputFieldNormalizedScore(m_OutputFields[NORMALIZED_SCORE_NAME]), - m_Normalizer(m_ModelConfig) -{ + m_Normalizer(m_ModelConfig) { } -bool CResultNormalizer::initNormalizer(const std::string &stateFileName) -{ +bool CResultNormalizer::initNormalizer(const std::string& stateFileName) { std::ifstream inputStream(stateFileName.c_str()); - model::CHierarchicalResultsNormalizer::ERestoreOutcome outcome( - m_Normalizer.fromJsonStream(inputStream)); - if (outcome != model::CHierarchicalResultsNormalizer::E_Ok) - { + model::CHierarchicalResultsNormalizer::ERestoreOutcome outcome(m_Normalizer.fromJsonStream(inputStream)); + if (outcome != model::CHierarchicalResultsNormalizer::E_Ok) { LOG_ERROR("Failed to restore JSON state for quantiles"); return false; } return true; } -bool CResultNormalizer::handleRecord(const TStrStrUMap &dataRowFields) -{ - if (m_WriteFieldNames) - { +bool CResultNormalizer::handleRecord(const TStrStrUMap& dataRowFields) { + if (m_WriteFieldNames) { TStrVec fieldNames; fieldNames.reserve(dataRowFields.size()); - for (const auto &entry : dataRowFields) - { + for (const auto& entry : dataRowFields) { fieldNames.push_back(entry.first); } TStrVec extraFieldNames; extraFieldNames.push_back(NORMALIZED_SCORE_NAME); - if (m_OutputHandler.fieldNames(fieldNames, extraFieldNames) == false) - { + if (m_OutputHandler.fieldNames(fieldNames, extraFieldNames) == false) { LOG_ERROR("Unable to set field names for output"); return false; } @@ -89,79 +75,47 @@ bool CResultNormalizer::handleRecord(const TStrStrUMap &dataRowFields) double probability(0.0); bool isValidRecord(false); - if (m_ModelConfig.perPartitionNormalization()) - { - isValidRecord = parseDataFields(dataRowFields, level, partition, partitionValue, - person, function, valueFieldName, probability); - } - else - { - isValidRecord = parseDataFields(dataRowFields, level, partition, person, - function, valueFieldName, probability); + if (m_ModelConfig.perPartitionNormalization()) { + isValidRecord = parseDataFields(dataRowFields, level, partition, partitionValue, person, function, valueFieldName, probability); + } else { + isValidRecord = parseDataFields(dataRowFields, level, partition, person, function, valueFieldName, probability); } - std::string partitionKey = m_ModelConfig.perPartitionNormalization() - ? partition + partitionValue : partition; + std::string partitionKey = m_ModelConfig.perPartitionNormalization() ? partition + partitionValue : partition; - if (isValidRecord) - { - const model::CAnomalyScore::CNormalizer *levelNormalizer = 0; - double score = probability > m_ModelConfig.maximumAnomalousProbability() ? - 0.0 : maths::CTools::anomalyScore(probability); - if (level == ROOT_LEVEL) - { + if (isValidRecord) { + const model::CAnomalyScore::CNormalizer* levelNormalizer = 0; + double score = probability > m_ModelConfig.maximumAnomalousProbability() ? 0.0 : maths::CTools::anomalyScore(probability); + if (level == ROOT_LEVEL) { levelNormalizer = &m_Normalizer.bucketNormalizer(); - } - else if (level == LEAF_LEVEL) - { + } else if (level == LEAF_LEVEL) { levelNormalizer = m_Normalizer.leafNormalizer(partitionKey, person, function, valueFieldName); - } - else if (level == PARTITION_LEVEL) - { + } else if (level == PARTITION_LEVEL) { levelNormalizer = m_Normalizer.partitionNormalizer(partitionKey); - } - else if (level == BUCKET_INFLUENCER_LEVEL) - { + } else if (level == BUCKET_INFLUENCER_LEVEL) { levelNormalizer = m_Normalizer.influencerBucketNormalizer(person); - } - else if (level == INFLUENCER_LEVEL) - { + } else if (level == INFLUENCER_LEVEL) { levelNormalizer = m_Normalizer.influencerNormalizer(person); - } - else - { + } else { LOG_ERROR("Unexpected : " << level); } - if (levelNormalizer != 0) - { - if (levelNormalizer->canNormalize() && - levelNormalizer->normalize(score) == false) - { - LOG_ERROR("Failed to normalize score " << score << - " at level " << level << - " with partition field name " << partition << - " and person field name " << person); + if (levelNormalizer != 0) { + if (levelNormalizer->canNormalize() && levelNormalizer->normalize(score) == false) { + LOG_ERROR("Failed to normalize score " << score << " at level " << level << " with partition field name " << partition + << " and person field name " << person); } - } - else - { + } else { LOG_ERROR("No normalizer available" - " at level '" << level << - "' with partition field name '" << partition << - "' and person field name '" << person << "'"); + " at level '" + << level << "' with partition field name '" << partition << "' and person field name '" << person << "'"); } - m_OutputFieldNormalizedScore = - (score > 0.0) ? - core::CStringUtils::typeToStringPretty(score) : ZERO; - } - else - { + m_OutputFieldNormalizedScore = (score > 0.0) ? core::CStringUtils::typeToStringPretty(score) : ZERO; + } else { m_OutputFieldNormalizedScore.clear(); } - if (m_OutputHandler.writeRow(dataRowFields, m_OutputFields) == false) - { + if (m_OutputHandler.writeRow(dataRowFields, m_OutputFields) == false) { LOG_ERROR("Unable to write normalized output"); return false; } @@ -169,34 +123,32 @@ bool CResultNormalizer::handleRecord(const TStrStrUMap &dataRowFields) return true; } -bool CResultNormalizer::parseDataFields(const TStrStrUMap &dataRowFields, - std::string &level, std::string &partition, - std::string &person, std::string &function, - std::string &valueFieldName, double &probability) -{ - return this->parseDataField(dataRowFields, LEVEL, level) - && this->parseDataField(dataRowFields, PARTITION_FIELD_NAME, partition) - && this->parseDataField(dataRowFields, PERSON_FIELD_NAME, person) - && this->parseDataField(dataRowFields, FUNCTION_NAME, function) - && this->parseDataField(dataRowFields, VALUE_FIELD_NAME, valueFieldName) - && this->parseDataField(dataRowFields, PROBABILITY_NAME, probability); +bool CResultNormalizer::parseDataFields(const TStrStrUMap& dataRowFields, + std::string& level, + std::string& partition, + std::string& person, + std::string& function, + std::string& valueFieldName, + double& probability) { + return this->parseDataField(dataRowFields, LEVEL, level) && this->parseDataField(dataRowFields, PARTITION_FIELD_NAME, partition) && + this->parseDataField(dataRowFields, PERSON_FIELD_NAME, person) && this->parseDataField(dataRowFields, FUNCTION_NAME, function) && + this->parseDataField(dataRowFields, VALUE_FIELD_NAME, valueFieldName) && + this->parseDataField(dataRowFields, PROBABILITY_NAME, probability); } -bool CResultNormalizer::parseDataFields(const TStrStrUMap &dataRowFields, - std::string &level, std::string &partition, - std::string &partitionValue, std::string &person, - std::string &function, std::string &valueFieldName, - double &probability) -{ - return this->parseDataField(dataRowFields, LEVEL, level) - && this->parseDataField(dataRowFields, PARTITION_FIELD_NAME, partition) - && this->parseDataField(dataRowFields, PARTITION_FIELD_VALUE, partitionValue) - && this->parseDataField(dataRowFields, PERSON_FIELD_NAME, person) - && this->parseDataField(dataRowFields, FUNCTION_NAME, function) - && this->parseDataField(dataRowFields, VALUE_FIELD_NAME, valueFieldName) - && this->parseDataField(dataRowFields, PROBABILITY_NAME, probability); +bool CResultNormalizer::parseDataFields(const TStrStrUMap& dataRowFields, + std::string& level, + std::string& partition, + std::string& partitionValue, + std::string& person, + std::string& function, + std::string& valueFieldName, + double& probability) { + return this->parseDataField(dataRowFields, LEVEL, level) && this->parseDataField(dataRowFields, PARTITION_FIELD_NAME, partition) && + this->parseDataField(dataRowFields, PARTITION_FIELD_VALUE, partitionValue) && + this->parseDataField(dataRowFields, PERSON_FIELD_NAME, person) && this->parseDataField(dataRowFields, FUNCTION_NAME, function) && + this->parseDataField(dataRowFields, VALUE_FIELD_NAME, valueFieldName) && + this->parseDataField(dataRowFields, PROBABILITY_NAME, probability); } - } } - diff --git a/lib/api/CSingleStreamDataAdder.cc b/lib/api/CSingleStreamDataAdder.cc index 30d4fc611f..5dfdbdf33b 100644 --- a/lib/api/CSingleStreamDataAdder.cc +++ b/lib/api/CSingleStreamDataAdder.cc @@ -9,26 +9,16 @@ #include - -namespace ml -{ -namespace api -{ - +namespace ml { +namespace api { const size_t CSingleStreamDataAdder::MAX_DOCUMENT_SIZE(16 * 1024 * 1024); // 16MB -CSingleStreamDataAdder::CSingleStreamDataAdder(const TOStreamP &stream) - : m_Stream(stream) -{ +CSingleStreamDataAdder::CSingleStreamDataAdder(const TOStreamP& stream) : m_Stream(stream) { } -CSingleStreamDataAdder::TOStreamP -CSingleStreamDataAdder::addStreamed(const std::string &/*index*/, - const std::string &id) -{ - if (m_Stream != 0 && !m_Stream->bad()) - { +CSingleStreamDataAdder::TOStreamP CSingleStreamDataAdder::addStreamed(const std::string& /*index*/, const std::string& id) { + if (m_Stream != 0 && !m_Stream->bad()) { // Start with metadata, leaving the index for the receiving code to set (*m_Stream) << "{\"index\":{\"_id\":\"" << id << "\"}}\n"; } @@ -36,16 +26,13 @@ CSingleStreamDataAdder::addStreamed(const std::string &/*index*/, return m_Stream; } -bool CSingleStreamDataAdder::streamComplete(TOStreamP &stream, bool force) -{ - if (stream != m_Stream) - { +bool CSingleStreamDataAdder::streamComplete(TOStreamP& stream, bool force) { + if (stream != m_Stream) { LOG_ERROR("Attempt to use the single stream data adder with multiple streams"); return false; } - if (stream != 0 && !stream->bad()) - { + if (stream != 0 && !stream->bad()) { // Each Elasticsearch document must be followed by a newline stream->put('\n'); @@ -53,8 +40,7 @@ bool CSingleStreamDataAdder::streamComplete(TOStreamP &stream, bool force) stream->put('\0'); // If force flush to ensure all data is pushed through the remote end - if (force) - { + if (force) { stream->flush(); } } @@ -62,12 +48,8 @@ bool CSingleStreamDataAdder::streamComplete(TOStreamP &stream, bool force) return stream != 0 && !stream->bad(); } -std::size_t CSingleStreamDataAdder::maxDocumentSize() const -{ +std::size_t CSingleStreamDataAdder::maxDocumentSize() const { return MAX_DOCUMENT_SIZE; } - - } } - diff --git a/lib/api/CSingleStreamSearcher.cc b/lib/api/CSingleStreamSearcher.cc index deb16da64a..f3516c5628 100644 --- a/lib/api/CSingleStreamSearcher.cc +++ b/lib/api/CSingleStreamSearcher.cc @@ -7,29 +7,19 @@ #include -namespace ml -{ -namespace api -{ +namespace ml { +namespace api { -CSingleStreamSearcher::CSingleStreamSearcher(const TIStreamP &stream) - : m_Stream(stream) -{ +CSingleStreamSearcher::CSingleStreamSearcher(const TIStreamP& stream) : m_Stream(stream) { } -CSingleStreamSearcher::TIStreamP -CSingleStreamSearcher::search(size_t /*currentDocNum*/, size_t /*limit*/) -{ +CSingleStreamSearcher::TIStreamP CSingleStreamSearcher::search(size_t /*currentDocNum*/, size_t /*limit*/) { // documents in a stream are separated by '\0', skip over it in case to not confuse clients (see #279) - if (m_Stream->peek() == 0) - { + if (m_Stream->peek() == 0) { m_Stream->get(); } return m_Stream; } - - } } - diff --git a/lib/api/CStateRestoreStreamFilter.cc b/lib/api/CStateRestoreStreamFilter.cc index 68449ed5c3..35f51639da 100644 --- a/lib/api/CStateRestoreStreamFilter.cc +++ b/lib/api/CStateRestoreStreamFilter.cc @@ -7,18 +7,14 @@ #include -namespace ml -{ -namespace api -{ +namespace ml { +namespace api { CStateRestoreStreamFilter::CStateRestoreStreamFilter() - :boost::iostreams::basic_line_filter(true), m_DocCount(0), m_RewrotePreviousLine(false) -{ + : boost::iostreams::basic_line_filter(true), m_DocCount(0), m_RewrotePreviousLine(false) { } -CStateRestoreStreamFilter::string_type CStateRestoreStreamFilter::do_filter(const string_type &line) -{ +CStateRestoreStreamFilter::string_type CStateRestoreStreamFilter::do_filter(const string_type& line) { // Persist format is: // { bulk metadata } // { document source } @@ -28,61 +24,47 @@ CStateRestoreStreamFilter::string_type CStateRestoreStreamFilter::do_filter(cons // { Elasticsearch get response } // '\0' - if (line.empty()) - { + if (line.empty()) { return line; } size_t leftOffset = 0; size_t rightOffset = line.length() - 1; - if (line[0] == '\0') - { - if (line.length() == 1) - { + if (line[0] == '\0') { + if (line.length() == 1) { return std::string(); } leftOffset++; } - if (line.compare(leftOffset, 16, "{\"index\":{\"_id\":") == 0) - { + if (line.compare(leftOffset, 16, "{\"index\":{\"_id\":") == 0) { m_DocCount++; // Strip the leading {"index": and the two closing braces - leftOffset +=9; + leftOffset += 9; - for (size_t count = 0; count < 2; ++count) - { + for (size_t count = 0; count < 2; ++count) { size_t lastBrace(line.find_last_of('}', rightOffset)); - if (lastBrace != std::string::npos) - { + if (lastBrace != std::string::npos) { rightOffset = lastBrace - 1; } } m_RewrotePreviousLine = true; - return line.substr(leftOffset, rightOffset - leftOffset + 1) - + ",\"_version\":1,\"found\":true,\"_source\":"; + return line.substr(leftOffset, rightOffset - leftOffset + 1) + ",\"_version\":1,\"found\":true,\"_source\":"; - } - else if (m_RewrotePreviousLine) - { + } else if (m_RewrotePreviousLine) { return line + '}' + '\0' + '\n'; - } - else - { + } else { m_RewrotePreviousLine = false; return line; } } -size_t CStateRestoreStreamFilter::getDocCount() const -{ +size_t CStateRestoreStreamFilter::getDocCount() const { return m_DocCount; } - } } - diff --git a/lib/api/CTokenListReverseSearchCreator.cc b/lib/api/CTokenListReverseSearchCreator.cc index 9a580db2b7..4330056073 100644 --- a/lib/api/CTokenListReverseSearchCreator.cc +++ b/lib/api/CTokenListReverseSearchCreator.cc @@ -7,81 +7,65 @@ #include +namespace ml { +namespace api { -namespace ml -{ -namespace api -{ - -CTokenListReverseSearchCreator::CTokenListReverseSearchCreator(const std::string &fieldName) - : CTokenListReverseSearchCreatorIntf(fieldName) -{ +CTokenListReverseSearchCreator::CTokenListReverseSearchCreator(const std::string& fieldName) + : CTokenListReverseSearchCreatorIntf(fieldName) { } -size_t CTokenListReverseSearchCreator::availableCost() const -{ +size_t CTokenListReverseSearchCreator::availableCost() const { // This is pretty arbitrary, but MUST be less than the maximum length of a // field in ES (currently 32766 bytes), and ideally should be quite a lot // less as a huge reverse search is pretty unwieldy return 10000; } -size_t CTokenListReverseSearchCreator::costOfToken(const std::string &token, - size_t numOccurrences) const -{ +size_t CTokenListReverseSearchCreator::costOfToken(const std::string& token, size_t numOccurrences) const { size_t tokenLength = token.length(); - return ( - 1 + tokenLength + // length of what we add to the terms (part 1) - 3 + tokenLength // length of what we add to the regex (part 2) - ) * numOccurrences; + return (1 + tokenLength + // length of what we add to the terms (part 1) + 3 + tokenLength // length of what we add to the regex (part 2) + ) * + numOccurrences; } -bool CTokenListReverseSearchCreator::createNullSearch(std::string &part1, - std::string &part2) const -{ +bool CTokenListReverseSearchCreator::createNullSearch(std::string& part1, std::string& part2) const { part1.clear(); part2.clear(); return true; } bool CTokenListReverseSearchCreator::createNoUniqueTokenSearch(int /*type*/, - const std::string &/*example*/, + const std::string& /*example*/, size_t /*maxMatchingStringLen*/, - std::string &part1, - std::string &part2) const -{ + std::string& part1, + std::string& part2) const { part1.clear(); part2.clear(); return true; } void CTokenListReverseSearchCreator::initStandardSearch(int /*type*/, - const std::string &/*example*/, + const std::string& /*example*/, size_t /*maxMatchingStringLen*/, - std::string &part1, - std::string &part2) const -{ + std::string& part1, + std::string& part2) const { part1.clear(); part2.clear(); } -void CTokenListReverseSearchCreator::addCommonUniqueToken(const std::string &/*token*/, - std::string &/*part1*/, - std::string &/*part2*/) const -{ +void CTokenListReverseSearchCreator::addCommonUniqueToken(const std::string& /*token*/, + std::string& /*part1*/, + std::string& /*part2*/) const { } -void CTokenListReverseSearchCreator::addInOrderCommonToken(const std::string &token, +void CTokenListReverseSearchCreator::addInOrderCommonToken(const std::string& token, bool first, - std::string &part1, - std::string &part2) const -{ - if (first) - { + std::string& part1, + std::string& part2) const { + if (first) { part2 += ".*?"; - } - else - { + } else { part1 += ' '; part2 += ".+?"; } @@ -89,12 +73,8 @@ void CTokenListReverseSearchCreator::addInOrderCommonToken(const std::string &to part2 += core::CRegex::escapeRegexSpecial(token); } -void CTokenListReverseSearchCreator::closeStandardSearch(std::string &/*part1*/, - std::string &part2) const -{ +void CTokenListReverseSearchCreator::closeStandardSearch(std::string& /*part1*/, std::string& part2) const { part2 += ".*"; } - } } - diff --git a/lib/api/CTokenListReverseSearchCreatorIntf.cc b/lib/api/CTokenListReverseSearchCreatorIntf.cc index a4194ba89f..b8466062f3 100644 --- a/lib/api/CTokenListReverseSearchCreatorIntf.cc +++ b/lib/api/CTokenListReverseSearchCreatorIntf.cc @@ -5,34 +5,21 @@ */ #include +namespace ml { +namespace api { -namespace ml -{ -namespace api -{ - - -CTokenListReverseSearchCreatorIntf::CTokenListReverseSearchCreatorIntf(const std::string &fieldName) - : m_FieldName(fieldName) -{ +CTokenListReverseSearchCreatorIntf::CTokenListReverseSearchCreatorIntf(const std::string& fieldName) : m_FieldName(fieldName) { } -CTokenListReverseSearchCreatorIntf::~CTokenListReverseSearchCreatorIntf() -{ +CTokenListReverseSearchCreatorIntf::~CTokenListReverseSearchCreatorIntf() { } -void CTokenListReverseSearchCreatorIntf::closeStandardSearch(std::string &/*part1*/, - std::string &/*part2*/) const -{ +void CTokenListReverseSearchCreatorIntf::closeStandardSearch(std::string& /*part1*/, std::string& /*part2*/) const { // Default is to do nothing } -const std::string &CTokenListReverseSearchCreatorIntf::fieldName() const -{ +const std::string& CTokenListReverseSearchCreatorIntf::fieldName() const { return m_FieldName; } - - } } - diff --git a/lib/api/CTokenListType.cc b/lib/api/CTokenListType.cc index df8da20615..2ed9994131 100644 --- a/lib/api/CTokenListType.cc +++ b/lib/api/CTokenListType.cc @@ -15,16 +15,11 @@ #include #include - -namespace ml -{ -namespace api -{ - +namespace ml { +namespace api { // We use short field names to reduce the state size -namespace -{ +namespace { const std::string BASE_STRING("a"); const std::string BASE_TOKEN_ID("b"); const std::string BASE_TOKEN_WEIGHT("c"); @@ -38,28 +33,18 @@ const std::string NUM_MATCHES("i"); const std::string EMPTY_STRING; //! Functor for comparing just the first element of a pair of sizes -class CSizePairFirstElementLess : public std::binary_function -{ - public: - bool operator()(CTokenListType::TSizeSizePr lhs, - CTokenListType::TSizeSizePr rhs) - { - return lhs.first < rhs.first; - } +class CSizePairFirstElementLess : public std::binary_function { +public: + bool operator()(CTokenListType::TSizeSizePr lhs, CTokenListType::TSizeSizePr rhs) { return lhs.first < rhs.first; } }; - - } - CTokenListType::CTokenListType(bool isDryRun, - const std::string &baseString, + const std::string& baseString, size_t rawStringLen, - const TSizeSizePrVec &baseTokenIds, + const TSizeSizePrVec& baseTokenIds, size_t baseWeight, - const TSizeSizeMap &uniqueTokenIds) + const TSizeSizeMap& uniqueTokenIds) : m_BaseString(baseString), m_BaseTokenIds(baseTokenIds), m_BaseWeight(baseWeight), @@ -70,155 +55,106 @@ CTokenListType::CTokenListType(bool isDryRun, m_CommonUniqueTokenIds(uniqueTokenIds.begin(), uniqueTokenIds.end()), m_CommonUniqueTokenWeight(0), m_OrigUniqueTokenWeight(0), - m_NumMatches(isDryRun ? 0 : 1) -{ - for (TSizeSizeMapCItr iter = uniqueTokenIds.begin(); - iter != uniqueTokenIds.end(); - ++iter) - { + m_NumMatches(isDryRun ? 0 : 1) { + for (TSizeSizeMapCItr iter = uniqueTokenIds.begin(); iter != uniqueTokenIds.end(); ++iter) { m_CommonUniqueTokenWeight += iter->second; } m_OrigUniqueTokenWeight = m_CommonUniqueTokenWeight; } -CTokenListType::CTokenListType(core::CStateRestoreTraverser &traverser) +CTokenListType::CTokenListType(core::CStateRestoreTraverser& traverser) : m_BaseWeight(0), m_MaxStringLen(0), m_OutOfOrderCommonTokenIndex(0), m_CommonUniqueTokenWeight(0), m_OrigUniqueTokenWeight(0), - m_NumMatches(0) -{ - traverser.traverseSubLevel(boost::bind(&CTokenListType::acceptRestoreTraverser, - this, - _1)); - + m_NumMatches(0) { + traverser.traverseSubLevel(boost::bind(&CTokenListType::acceptRestoreTraverser, this, _1)); } -bool CTokenListType::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ +bool CTokenListType::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { bool expectWeight(false); - do - { - const std::string &name = traverser.name(); - if (name == BASE_STRING) - { + do { + const std::string& name = traverser.name(); + if (name == BASE_STRING) { m_BaseString = traverser.value(); - } - else if (name == BASE_TOKEN_ID) - { + } else if (name == BASE_TOKEN_ID) { TSizeSizePr tokenAndWeight(0, 0); - if (core::CStringUtils::stringToType(traverser.value(), - tokenAndWeight.first) == false) - { + if (core::CStringUtils::stringToType(traverser.value(), tokenAndWeight.first) == false) { LOG_ERROR("Invalid base token ID in " << traverser.value()); return false; } m_BaseTokenIds.push_back(tokenAndWeight); - } - else if (name == BASE_TOKEN_WEIGHT) - { - if (m_BaseTokenIds.empty()) - { - LOG_ERROR("Base token weight precedes base token ID in " << - traverser.value()); + } else if (name == BASE_TOKEN_WEIGHT) { + if (m_BaseTokenIds.empty()) { + LOG_ERROR("Base token weight precedes base token ID in " << traverser.value()); return false; } - TSizeSizePr &tokenAndWeight = m_BaseTokenIds.back(); - if (core::CStringUtils::stringToType(traverser.value(), - tokenAndWeight.second) == false) - { + TSizeSizePr& tokenAndWeight = m_BaseTokenIds.back(); + if (core::CStringUtils::stringToType(traverser.value(), tokenAndWeight.second) == false) { LOG_ERROR("Invalid base token weight in " << traverser.value()); return false; } m_BaseWeight += tokenAndWeight.second; - } - else if (name == MAX_STRING_LEN) - { - if (core::CStringUtils::stringToType(traverser.value(), - m_MaxStringLen) == false) - { + } else if (name == MAX_STRING_LEN) { + if (core::CStringUtils::stringToType(traverser.value(), m_MaxStringLen) == false) { LOG_ERROR("Invalid maximum string length in " << traverser.value()); return false; } - } - else if (name == OUT_OF_ORDER_COMMON_TOKEN_INDEX) - { - if (core::CStringUtils::stringToType(traverser.value(), - m_OutOfOrderCommonTokenIndex) == false) - { + } else if (name == OUT_OF_ORDER_COMMON_TOKEN_INDEX) { + if (core::CStringUtils::stringToType(traverser.value(), m_OutOfOrderCommonTokenIndex) == false) { LOG_ERROR("Invalid maximum string length in " << traverser.value()); return false; } - } - else if (name == COMMON_UNIQUE_TOKEN_ID) - { + } else if (name == COMMON_UNIQUE_TOKEN_ID) { TSizeSizePr tokenAndWeight(0, 0); - if (core::CStringUtils::stringToType(traverser.value(), - tokenAndWeight.first) == false) - { + if (core::CStringUtils::stringToType(traverser.value(), tokenAndWeight.first) == false) { LOG_ERROR("Invalid common unique token ID in " << traverser.value()); return false; } m_CommonUniqueTokenIds.push_back(tokenAndWeight); expectWeight = true; - } - else if (name == COMMON_UNIQUE_TOKEN_WEIGHT) - { - if (!expectWeight) - { - LOG_ERROR("Common unique token weight precedes common unique token ID in " << - traverser.value()); + } else if (name == COMMON_UNIQUE_TOKEN_WEIGHT) { + if (!expectWeight) { + LOG_ERROR("Common unique token weight precedes common unique token ID in " << traverser.value()); return false; } - TSizeSizePr &tokenAndWeight = m_CommonUniqueTokenIds.back(); - if (core::CStringUtils::stringToType(traverser.value(), - tokenAndWeight.second) == false) - { + TSizeSizePr& tokenAndWeight = m_CommonUniqueTokenIds.back(); + if (core::CStringUtils::stringToType(traverser.value(), tokenAndWeight.second) == false) { LOG_ERROR("Invalid common unique token weight in " << traverser.value()); return false; } expectWeight = false; m_CommonUniqueTokenWeight += tokenAndWeight.second; - } - else if (name == ORIG_UNIQUE_TOKEN_WEIGHT) - { - if (core::CStringUtils::stringToType(traverser.value(), - m_OrigUniqueTokenWeight) == false) - { + } else if (name == ORIG_UNIQUE_TOKEN_WEIGHT) { + if (core::CStringUtils::stringToType(traverser.value(), m_OrigUniqueTokenWeight) == false) { LOG_ERROR("Invalid maximum string length in " << traverser.value()); return false; } - } - else if (name == NUM_MATCHES) - { - if (core::CStringUtils::stringToType(traverser.value(), - m_NumMatches) == false) - { + } else if (name == NUM_MATCHES) { + if (core::CStringUtils::stringToType(traverser.value(), m_NumMatches) == false) { LOG_ERROR("Invalid maximum string length in " << traverser.value()); return false; } } - } - while (traverser.next()); + } while (traverser.next()); return true; } bool CTokenListType::addString(bool isDryRun, - const std::string & /* str */, + const std::string& /* str */, size_t rawStringLen, - const TSizeSizePrVec &tokenIds, - const TSizeSizeMap &uniqueTokenIds, - double /* similarity */) -{ + const TSizeSizePrVec& tokenIds, + const TSizeSizeMap& uniqueTokenIds, + double /* similarity */) { bool changed(false); // Remove any token IDs from the common unique token map that aren't present @@ -226,25 +162,16 @@ bool CTokenListType::addString(bool isDryRun, // accordingly TSizeSizePrVecItr commonIter = m_CommonUniqueTokenIds.begin(); TSizeSizeMapCItr newIter = uniqueTokenIds.begin(); - while (commonIter != m_CommonUniqueTokenIds.end()) - { - if (newIter == uniqueTokenIds.end() || - commonIter->first < newIter->first) - { + while (commonIter != m_CommonUniqueTokenIds.end()) { + if (newIter == uniqueTokenIds.end() || commonIter->first < newIter->first) { m_CommonUniqueTokenWeight -= commonIter->second; commonIter = m_CommonUniqueTokenIds.erase(commonIter); changed = true; - } - else - { - if (commonIter->first == newIter->first) - { - if (commonIter->second == newIter->second) - { + } else { + if (commonIter->first == newIter->first) { + if (commonIter->second == newIter->second) { ++commonIter; - } - else - { + } else { m_CommonUniqueTokenWeight -= commonIter->second; commonIter = m_CommonUniqueTokenIds.erase(commonIter); changed = true; @@ -258,14 +185,11 @@ bool CTokenListType::addString(bool isDryRun, // aren't in the same order in the new string, and adjust the common weight // accordingly TSizeSizePrVecCItr testIter = tokenIds.begin(); - for (size_t index = 0; index < m_OutOfOrderCommonTokenIndex; ++index) - { + for (size_t index = 0; index < m_OutOfOrderCommonTokenIndex; ++index) { // Ignore tokens that are not in the common unique tokens - if (std::binary_search(m_CommonUniqueTokenIds.begin(), - m_CommonUniqueTokenIds.end(), - m_BaseTokenIds[index], - CSizePairFirstElementLess()) == false) - { + if (std::binary_search( + m_CommonUniqueTokenIds.begin(), m_CommonUniqueTokenIds.end(), m_BaseTokenIds[index], CSizePairFirstElementLess()) == + false) { continue; } @@ -274,34 +198,28 @@ bool CTokenListType::addString(bool isDryRun, // this, it means the test tokens don't contain the base tokens in the // same order, in which case the out-of-order common token index needs // to be reset. - do - { - if (testIter == tokenIds.end()) - { + do { + if (testIter == tokenIds.end()) { m_OutOfOrderCommonTokenIndex = index; changed = true; break; } - } - while ((testIter++)->first != m_BaseTokenIds[index].first); + } while ((testIter++)->first != m_BaseTokenIds[index].first); } - if (rawStringLen > m_MaxStringLen) - { + if (rawStringLen > m_MaxStringLen) { m_MaxStringLen = rawStringLen; changed = true; } // Changes up to this point invalidate the cached reverse search, whereas // simply incrementing the number of matches doesn't - if (changed) - { + if (changed) { m_ReverseSearchPart1.clear(); m_ReverseSearchPart2.clear(); } - if (!isDryRun) - { + if (!isDryRun) { ++m_NumMatches; changed = true; } @@ -309,77 +227,60 @@ bool CTokenListType::addString(bool isDryRun, return changed; } -const std::string &CTokenListType::baseString() const -{ +const std::string& CTokenListType::baseString() const { return m_BaseString; } -const CTokenListType::TSizeSizePrVec &CTokenListType::baseTokenIds() const -{ +const CTokenListType::TSizeSizePrVec& CTokenListType::baseTokenIds() const { return m_BaseTokenIds; } -size_t CTokenListType::baseWeight() const -{ +size_t CTokenListType::baseWeight() const { return m_BaseWeight; } -const CTokenListType::TSizeSizePrVec &CTokenListType::commonUniqueTokenIds() const -{ +const CTokenListType::TSizeSizePrVec& CTokenListType::commonUniqueTokenIds() const { return m_CommonUniqueTokenIds; } -size_t CTokenListType::commonUniqueTokenWeight() const -{ +size_t CTokenListType::commonUniqueTokenWeight() const { return m_CommonUniqueTokenWeight; } -size_t CTokenListType::origUniqueTokenWeight() const -{ +size_t CTokenListType::origUniqueTokenWeight() const { return m_OrigUniqueTokenWeight; } -size_t CTokenListType::maxStringLen() const -{ +size_t CTokenListType::maxStringLen() const { return m_MaxStringLen; } -size_t CTokenListType::outOfOrderCommonTokenIndex() const -{ +size_t CTokenListType::outOfOrderCommonTokenIndex() const { return m_OutOfOrderCommonTokenIndex; } -size_t CTokenListType::maxMatchingStringLen() const -{ +size_t CTokenListType::maxMatchingStringLen() const { // Add a 10% margin of error return (m_MaxStringLen * 11) / 10; } -size_t CTokenListType::missingCommonTokenWeight(const TSizeSizeMap &uniqueTokenIds) const -{ +size_t CTokenListType::missingCommonTokenWeight(const TSizeSizeMap& uniqueTokenIds) const { size_t presentWeight(0); TSizeSizePrVecCItr commonIter = m_CommonUniqueTokenIds.begin(); TSizeSizeMapCItr testIter = uniqueTokenIds.begin(); - while (commonIter != m_CommonUniqueTokenIds.end() && - testIter != uniqueTokenIds.end()) - { - if (commonIter->first == testIter->first) - { + while (commonIter != m_CommonUniqueTokenIds.end() && testIter != uniqueTokenIds.end()) { + if (commonIter->first == testIter->first) { // Don't increment the weight if a given token appears a different // number of times in the two strings - if (commonIter->second == testIter->second) - { + if (commonIter->second == testIter->second) { presentWeight += commonIter->second; } ++commonIter; ++testIter; - } - else if (commonIter->first < testIter->first) - { + } else if (commonIter->first < testIter->first) { ++commonIter; - } - else // if (commonIter->first > testIter->first) + } else // if (commonIter->first > testIter->first) { ++testIter; } @@ -391,8 +292,7 @@ size_t CTokenListType::missingCommonTokenWeight(const TSizeSizeMap &uniqueTokenI return m_CommonUniqueTokenWeight - presentWeight; } -bool CTokenListType::isMissingCommonTokenWeightZero(const TSizeSizeMap &uniqueTokenIds) const -{ +bool CTokenListType::isMissingCommonTokenWeightZero(const TSizeSizeMap& uniqueTokenIds) const { // This method could be implemented as: // return this->missingCommonTokenWeight(uniqueTokenIds) == 0; // @@ -400,20 +300,15 @@ bool CTokenListType::isMissingCommonTokenWeightZero(const TSizeSizeMap &uniqueTo TSizeSizePrVecCItr commonIter = m_CommonUniqueTokenIds.begin(); TSizeSizeMapCItr testIter = uniqueTokenIds.begin(); - while (commonIter != m_CommonUniqueTokenIds.end() && - testIter != uniqueTokenIds.end()) - { - if (commonIter->first < testIter->first) - { + while (commonIter != m_CommonUniqueTokenIds.end() && testIter != uniqueTokenIds.end()) { + if (commonIter->first < testIter->first) { return false; } - if (commonIter->first == testIter->first) - { + if (commonIter->first == testIter->first) { // The tokens must appear the same number of times in the two // strings - if (commonIter->second != testIter->second) - { + if (commonIter->second != testIter->second) { return false; } ++commonIter; @@ -425,19 +320,12 @@ bool CTokenListType::isMissingCommonTokenWeightZero(const TSizeSizeMap &uniqueTo return commonIter == m_CommonUniqueTokenIds.end(); } -bool CTokenListType::containsCommonTokensInOrder(const TSizeSizePrVec &tokenIds) const -{ +bool CTokenListType::containsCommonTokensInOrder(const TSizeSizePrVec& tokenIds) const { TSizeSizePrVecCItr testIter = tokenIds.begin(); - for (TSizeSizePrVecCItr baseIter = m_BaseTokenIds.begin(); - baseIter != m_BaseTokenIds.end(); - ++baseIter) - { + for (TSizeSizePrVecCItr baseIter = m_BaseTokenIds.begin(); baseIter != m_BaseTokenIds.end(); ++baseIter) { // Ignore tokens that are not in the common unique tokens - if (std::binary_search(m_CommonUniqueTokenIds.begin(), - m_CommonUniqueTokenIds.end(), - *baseIter, - CSizePairFirstElementLess()) == false) - { + if (std::binary_search(m_CommonUniqueTokenIds.begin(), m_CommonUniqueTokenIds.end(), *baseIter, CSizePairFirstElementLess()) == + false) { continue; } @@ -445,32 +333,24 @@ bool CTokenListType::containsCommonTokensInOrder(const TSizeSizePrVec &tokenIds) // base token. If we reach the end of the test tokens whilst doing // this, it means the test tokens don't contain the base tokens in the // correct order. - do - { - if (testIter == tokenIds.end()) - { + do { + if (testIter == tokenIds.end()) { return false; } - } - while ((testIter++)->first != baseIter->first); + } while ((testIter++)->first != baseIter->first); } return true; } -size_t CTokenListType::numMatches() const -{ +size_t CTokenListType::numMatches() const { return m_NumMatches; } -void CTokenListType::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CTokenListType::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(BASE_STRING, m_BaseString); - for (TSizeSizePrVecCItr iter = m_BaseTokenIds.begin(); - iter != m_BaseTokenIds.end(); - ++iter) - { + for (TSizeSizePrVecCItr iter = m_BaseTokenIds.begin(); iter != m_BaseTokenIds.end(); ++iter) { inserter.insertValue(BASE_TOKEN_ID, iter->first); inserter.insertValue(BASE_TOKEN_WEIGHT, iter->second); } @@ -478,10 +358,7 @@ void CTokenListType::acceptPersistInserter(core::CStatePersistInserter &inserter inserter.insertValue(MAX_STRING_LEN, m_MaxStringLen); inserter.insertValue(OUT_OF_ORDER_COMMON_TOKEN_INDEX, m_OutOfOrderCommonTokenIndex); - for (TSizeSizePrVecCItr iter = m_CommonUniqueTokenIds.begin(); - iter != m_CommonUniqueTokenIds.end(); - ++iter) - { + for (TSizeSizePrVecCItr iter = m_CommonUniqueTokenIds.begin(); iter != m_CommonUniqueTokenIds.end(); ++iter) { inserter.insertValue(COMMON_UNIQUE_TOKEN_ID, iter->first); inserter.insertValue(COMMON_UNIQUE_TOKEN_WEIGHT, iter->second); } @@ -490,9 +367,7 @@ void CTokenListType::acceptPersistInserter(core::CStatePersistInserter &inserter inserter.insertValue(NUM_MATCHES, m_NumMatches); } -bool CTokenListType::cachedReverseSearch(std::string &part1, - std::string &part2) const -{ +bool CTokenListType::cachedReverseSearch(std::string& part1, std::string& part2) const { part1 = m_ReverseSearchPart1; part2 = m_ReverseSearchPart2; @@ -508,14 +383,9 @@ bool CTokenListType::cachedReverseSearch(std::string &part1, return !missed; } -void CTokenListType::cacheReverseSearch(const std::string &part1, - const std::string &part2) -{ +void CTokenListType::cacheReverseSearch(const std::string& part1, const std::string& part2) { m_ReverseSearchPart1 = part1; m_ReverseSearchPart2 = part2; } - - } } - diff --git a/lib/api/dump_state/Main.cc b/lib/api/dump_state/Main.cc index 554c54a4cc..27a5ac5e64 100644 --- a/lib/api/dump_state/Main.cc +++ b/lib/api/dump_state/Main.cc @@ -21,9 +21,9 @@ //! #include #include -#include #include #include +#include #include @@ -32,9 +32,9 @@ #include #include -#include #include #include +#include #include #include #include @@ -48,35 +48,29 @@ #include #include - static std::string persistedNormalizerState; static std::vector persistedStateFiles; -std::string versionNumber() -{ +std::string versionNumber() { ml::core::CRegex regex; regex.init("\\d\\.\\d\\.\\d"); std::string longVersion = ml::ver::CBuildInfo::versionNumber(); std::size_t pos; std::string version; - if (regex.search(longVersion, pos)) - { + if (regex.search(longVersion, pos)) { version = longVersion.substr(pos, 5); } return version; } -void reportPersistComplete(ml::api::CModelSnapshotJsonWriter::SModelSnapshotReport modelSnapshotReport) -{ +void reportPersistComplete(ml::api::CModelSnapshotJsonWriter::SModelSnapshotReport modelSnapshotReport) { LOG_INFO("Persist complete with description: " << modelSnapshotReport.s_Description); persistedNormalizerState = modelSnapshotReport.s_NormalizerState; } -bool writeNormalizerState(const std::string &outputFileName) -{ +bool writeNormalizerState(const std::string& outputFileName) { std::ofstream out(outputFileName); - if (!out.is_open()) - { + if (!out.is_open()) { LOG_ERROR("Failed to open normalizer state output file " << outputFileName); return false; } @@ -88,8 +82,7 @@ bool writeNormalizerState(const std::string &outputFileName) return true; } -bool persistCategorizerStateToFile(const std::string &outputFileName) -{ +bool persistCategorizerStateToFile(const std::string& outputFileName) { ml::model::CLimits limits; ml::api::CFieldConfig config("count", "mlcategory"); @@ -107,17 +100,15 @@ bool persistCategorizerStateToFile(const std::string &outputFileName) // Persist the categorizer state to file { - std::ofstream *out = nullptr; + std::ofstream* out = nullptr; ml::api::CSingleStreamDataAdder::TOStreamP ptr(out = new std::ofstream(outputFileName)); - if (!out->is_open()) - { + if (!out->is_open()) { LOG_ERROR("Failed to open categorizer state output file " << outputFileName); return false; } ml::api::CSingleStreamDataAdder persister(ptr); - if (!typer.persistState(persister)) - { + if (!typer.persistState(persister)) { LOG_ERROR("Error persisting state to " << outputFileName); return false; } @@ -127,16 +118,14 @@ bool persistCategorizerStateToFile(const std::string &outputFileName) return true; } -bool persistAnomalyDetectorStateToFile(const std::string &configFileName, - const std::string &inputFilename, - const std::string &outputFileName, +bool persistAnomalyDetectorStateToFile(const std::string& configFileName, + const std::string& inputFilename, + const std::string& outputFileName, int latencyBuckets, - const std::string &timeFormat = std::string()) -{ + const std::string& timeFormat = std::string()) { // Open the input and output files std::ifstream inputStrm(inputFilename); - if (!inputStrm.is_open()) - { + if (!inputStrm.is_open()) { LOG_ERROR("Cannot open input file " << inputFilename); return false; } @@ -146,22 +135,15 @@ bool persistAnomalyDetectorStateToFile(const std::string &configFileName, ml::model::CLimits limits; ml::api::CFieldConfig fieldConfig; - if (!fieldConfig.initFromFile(configFileName)) - { + if (!fieldConfig.initFromFile(configFileName)) { LOG_ERROR("Failed to init field config from " << configFileName); return false; } ml::core_t::TTime bucketSize(3600); std::string jobId("foo"); - ml::model::CAnomalyDetectorModelConfig modelConfig = - ml::model::CAnomalyDetectorModelConfig::defaultConfig(bucketSize, - ml::model_t::E_None, - "", - bucketSize * latencyBuckets, - 0, - false, - ""); + ml::model::CAnomalyDetectorModelConfig modelConfig = ml::model::CAnomalyDetectorModelConfig::defaultConfig( + bucketSize, ml::model_t::E_None, "", bucketSize * latencyBuckets, 0, false, ""); ml::api::CAnomalyJob origJob(jobId, limits, @@ -176,36 +158,28 @@ bool persistAnomalyDetectorStateToFile(const std::string &configFileName, using TScopedInputParserP = boost::scoped_ptr; TScopedInputParserP parser; - if (inputFilename.rfind(".csv") == inputFilename.length() - 4) - { + if (inputFilename.rfind(".csv") == inputFilename.length() - 4) { parser.reset(new ml::api::CCsvInputParser(inputStrm)); - } - else - { + } else { parser.reset(new ml::api::CLineifiedJsonInputParser(inputStrm)); } - if (!parser->readStream(boost::bind(&ml::api::CAnomalyJob::handleRecord, - &origJob, - _1))) - { + if (!parser->readStream(boost::bind(&ml::api::CAnomalyJob::handleRecord, &origJob, _1))) { LOG_ERROR("Failed to processs input"); return false; } // Persist the job state to file { - std::ofstream *out = nullptr; + std::ofstream* out = nullptr; ml::api::CSingleStreamDataAdder::TOStreamP ptr(out = new std::ofstream(outputFileName)); - if (!out->is_open()) - { + if (!out->is_open()) { LOG_ERROR("Failed to open state output file " << outputFileName); return false; } ml::api::CSingleStreamDataAdder persister(ptr); - if (!origJob.persistState(persister)) - { + if (!origJob.persistState(persister)) { LOG_ERROR("Error persisting state to " << outputFileName); return false; } @@ -216,120 +190,103 @@ bool persistAnomalyDetectorStateToFile(const std::string &configFileName, return true; } -bool persistByDetector(const std::string &version) -{ +bool persistByDetector(const std::string& version) { return persistAnomalyDetectorStateToFile("../unittest/testfiles/new_mlfields.conf", - "../unittest/testfiles/big_ascending.txt", - "../unittest/testfiles/state/" + version + "/by_detector_state.json", - 0, - "%d/%b/%Y:%T %z"); + "../unittest/testfiles/big_ascending.txt", + "../unittest/testfiles/state/" + version + "/by_detector_state.json", + 0, + "%d/%b/%Y:%T %z"); } -bool persistOverDetector(const std::string &version) -{ +bool persistOverDetector(const std::string& version) { return persistAnomalyDetectorStateToFile("../unittest/testfiles/new_mlfields_over.conf", - "../unittest/testfiles/big_ascending.txt", - "../unittest/testfiles/state/" + version + "/over_detector_state.json", - 0, - "%d/%b/%Y:%T %z"); + "../unittest/testfiles/big_ascending.txt", + "../unittest/testfiles/state/" + version + "/over_detector_state.json", + 0, + "%d/%b/%Y:%T %z"); } -bool persistPartitionDetector(const std::string &version) -{ +bool persistPartitionDetector(const std::string& version) { return persistAnomalyDetectorStateToFile("../unittest/testfiles/new_mlfields_partition.conf", - "../unittest/testfiles/big_ascending.txt", - "../unittest/testfiles/state/" + version + "/partition_detector_state.json", - 0, - "%d/%b/%Y:%T %z"); + "../unittest/testfiles/big_ascending.txt", + "../unittest/testfiles/state/" + version + "/partition_detector_state.json", + 0, + "%d/%b/%Y:%T %z"); } -bool persistDcDetector(const std::string &version) -{ +bool persistDcDetector(const std::string& version) { return persistAnomalyDetectorStateToFile("../unittest/testfiles/new_persist_dc.conf", - "../unittest/testfiles/files_users_programs.csv", - "../unittest/testfiles/state/" + version + "/dc_detector_state.json", - 5); + "../unittest/testfiles/files_users_programs.csv", + "../unittest/testfiles/state/" + version + "/dc_detector_state.json", + 5); } -bool persistCountDetector(const std::string &version) -{ +bool persistCountDetector(const std::string& version) { return persistAnomalyDetectorStateToFile("../unittest/testfiles/new_persist_count.conf", - "../unittest/testfiles/files_users_programs.csv", - "../unittest/testfiles/state/" + version + "/count_detector_state.json", - 5); + "../unittest/testfiles/files_users_programs.csv", + "../unittest/testfiles/state/" + version + "/count_detector_state.json", + 5); } -int main(int /*argc*/, char **/*argv*/) -{ +int main(int /*argc*/, char** /*argv*/) { ml::core::CLogger::instance().setLoggingLevel(ml::core::CLogger::E_Info); std::string version = versionNumber(); - if (version.empty()) - { + if (version.empty()) { LOG_ERROR("Cannot get version number"); return EXIT_FAILURE; } LOG_INFO("Saving model state for version: " << version); bool persisted = persistByDetector(version); - if (!persisted) - { + if (!persisted) { LOG_ERROR("Failed to persist state for by detector"); return EXIT_FAILURE; } - if (persistedNormalizerState.empty()) - { + if (persistedNormalizerState.empty()) { LOG_ERROR("Normalizer state not persisted"); return EXIT_FAILURE; } - if (!writeNormalizerState("../unittest/testfiles/state/" + version + "/normalizer_state.json")) - { + if (!writeNormalizerState("../unittest/testfiles/state/" + version + "/normalizer_state.json")) { LOG_ERROR("Error writing normalizer state file"); return EXIT_FAILURE; } persisted = persistOverDetector(version); - if (!persisted) - { + if (!persisted) { LOG_ERROR("Failed to persist state for over detector"); return EXIT_FAILURE; } persisted = persistPartitionDetector(version); - if (!persisted) - { + if (!persisted) { LOG_ERROR("Failed to persist state for partition detector"); return EXIT_FAILURE; } persisted = persistDcDetector(version); - if (!persisted) - { + if (!persisted) { LOG_ERROR("Failed to persist state for DC detector"); return EXIT_FAILURE; } persisted = persistCountDetector(version); - if (!persisted) - { + if (!persisted) { LOG_ERROR("Failed to persist state for count detector"); return EXIT_FAILURE; } persisted = persistCategorizerStateToFile("../unittest/testfiles/state/" + version + "/categorizer_state.json"); - if (!persisted) - { + if (!persisted) { LOG_ERROR("Failed to persist categorizer state"); return EXIT_FAILURE; } LOG_INFO("Written state files:"); - for (const auto &stateFile : persistedStateFiles) - { + for (const auto& stateFile : persistedStateFiles) { LOG_INFO("\t" << stateFile) } return EXIT_SUCCESS; } - diff --git a/lib/api/unittest/CAnomalyJobLimitTest.cc b/lib/api/unittest/CAnomalyJobLimitTest.cc index 4b48367b82..1729ee8afd 100644 --- a/lib/api/unittest/CAnomalyJobLimitTest.cc +++ b/lib/api/unittest/CAnomalyJobLimitTest.cc @@ -12,8 +12,8 @@ #include #include #include -#include #include +#include #include #include @@ -23,15 +23,14 @@ #include +#include #include #include #include -#include using namespace ml; -std::set getUniqueValues(const std::string &key, const std::string &output) -{ +std::set getUniqueValues(const std::string& key, const std::string& output) { std::set values; rapidjson::Document doc; doc.Parse(output); @@ -40,46 +39,32 @@ std::set getUniqueValues(const std::string &key, const std::string size_t i = 0; - while(true) - { - rapidjson::Value *p1 = rapidjson::Pointer("/" + std::to_string(i)).Get(doc); - if (p1 != nullptr) - { + while (true) { + rapidjson::Value* p1 = rapidjson::Pointer("/" + std::to_string(i)).Get(doc); + if (p1 != nullptr) { size_t j = 0; - while(true) - { - rapidjson::Value *p2 = rapidjson::Pointer("/" + std::to_string(i) - + "/records/" + std::to_string(j)).Get(doc); - if (p2 != nullptr) - { + while (true) { + rapidjson::Value* p2 = rapidjson::Pointer("/" + std::to_string(i) + "/records/" + std::to_string(j)).Get(doc); + if (p2 != nullptr) { size_t k = 0; - while (true) - { - rapidjson::Value *p3 = rapidjson::Pointer("/" + std::to_string(i) - + "/records/" + std::to_string(j) - + "/causes/" + std::to_string(k) - + "/" + key).Get(doc); - - if (p3 != nullptr) - { + while (true) { + rapidjson::Value* p3 = rapidjson::Pointer("/" + std::to_string(i) + "/records/" + std::to_string(j) + "/causes/" + + std::to_string(k) + "/" + key) + .Get(doc); + + if (p3 != nullptr) { values.insert(p3->GetString()); - } - else - { + } else { break; } ++k; } - } - else - { + } else { break; } ++j; } - } - else - { + } else { break; } ++i; @@ -88,21 +73,17 @@ std::set getUniqueValues(const std::string &key, const std::string return values; } -CppUnit::Test* CAnomalyJobLimitTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CAnomalyJobLimitTest"); +CppUnit::Test* CAnomalyJobLimitTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CAnomalyJobLimitTest"); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CAnomalyJobLimitTest::testLimit", - &CAnomalyJobLimitTest::testLimit) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CAnomalyJobLimitTest::testAccuracy", - &CAnomalyJobLimitTest::testAccuracy) ); + suiteOfTests->addTest( + new CppUnit::TestCaller("CAnomalyJobLimitTest::testLimit", &CAnomalyJobLimitTest::testLimit)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CAnomalyJobLimitTest::testAccuracy", &CAnomalyJobLimitTest::testAccuracy)); return suiteOfTests; } -void CAnomalyJobLimitTest::testAccuracy() -{ +void CAnomalyJobLimitTest::testAccuracy() { // Check that the amount of memory used when we go over the // resource limit is close enough to the limit that we specified @@ -123,10 +104,9 @@ void CAnomalyJobLimitTest::testAccuracy() CPPUNIT_ASSERT(fieldConfig.initFromClause(clause)); - model::CAnomalyDetectorModelConfig modelConfig = - model::CAnomalyDetectorModelConfig::defaultConfig(3600); + model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(3600); std::stringstream outputStrm; - core::CJsonOutputStreamWrapper wrappedOutputStream (outputStrm); + core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); model::CLimits limits; //limits.resourceMonitor().m_ByteLimitHigh = 100000; @@ -134,20 +114,14 @@ void CAnomalyJobLimitTest::testAccuracy() { LOG_TRACE("Setting up job"); - api::CAnomalyJob job("job", - limits, - fieldConfig, - modelConfig, - wrappedOutputStream); + api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); std::ifstream inputStrm("testfiles/resource_accuracy.csv"); CPPUNIT_ASSERT(inputStrm.is_open()); api::CCsvInputParser parser(inputStrm); LOG_TRACE("Reading file"); - CPPUNIT_ASSERT(parser.readStream(boost::bind(&api::CAnomalyJob::handleRecord, - &job, - _1))); + CPPUNIT_ASSERT(parser.readStream(boost::bind(&api::CAnomalyJob::handleRecord, &job, _1))); LOG_TRACE("Checking results"); @@ -169,33 +143,25 @@ void CAnomalyJobLimitTest::testAccuracy() CPPUNIT_ASSERT(fieldConfig.initFromClause(clause)); - model::CAnomalyDetectorModelConfig modelConfig = - model::CAnomalyDetectorModelConfig::defaultConfig(3600); + model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(3600); model::CLimits limits; std::stringstream outputStrm; { - core::CJsonOutputStreamWrapper wrappedOutputStream (outputStrm); + core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); limits.resourceMonitor().m_ByteLimitHigh = nonLimitedUsage / 10; - limits.resourceMonitor().m_ByteLimitLow = - limits.resourceMonitor().m_ByteLimitHigh - 1024; + limits.resourceMonitor().m_ByteLimitLow = limits.resourceMonitor().m_ByteLimitHigh - 1024; LOG_TRACE("Setting up job"); - api::CAnomalyJob job("job", - limits, - fieldConfig, - modelConfig, - wrappedOutputStream); + api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); std::ifstream inputStrm("testfiles/resource_accuracy.csv"); CPPUNIT_ASSERT(inputStrm.is_open()); api::CCsvInputParser parser(inputStrm); LOG_TRACE("Reading file"); - CPPUNIT_ASSERT(parser.readStream(boost::bind(&api::CAnomalyJob::handleRecord, - &job, - _1))); + CPPUNIT_ASSERT(parser.readStream(boost::bind(&api::CAnomalyJob::handleRecord, &job, _1))); LOG_TRACE("Checking results"); @@ -211,13 +177,12 @@ void CAnomalyJobLimitTest::testAccuracy() } } -void CAnomalyJobLimitTest::testLimit() -{ +void CAnomalyJobLimitTest::testLimit() { using TStrSet = std::set; std::stringstream outputStrm; { - core::CJsonOutputStreamWrapper wrappedOutputStream (outputStrm); + core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); // Run the data without any resource limits and check that // all the expected fields are in the results set model::CLimits limits; @@ -232,24 +197,17 @@ void CAnomalyJobLimitTest::testLimit() CPPUNIT_ASSERT(fieldConfig.initFromClause(clause)); - model::CAnomalyDetectorModelConfig modelConfig = - model::CAnomalyDetectorModelConfig::defaultConfig(3600); + model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(3600); LOG_TRACE("Setting up job"); - api::CAnomalyJob job("job", - limits, - fieldConfig, - modelConfig, - wrappedOutputStream); + api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); std::ifstream inputStrm("testfiles/resource_limits_3_2over_3partition.csv"); CPPUNIT_ASSERT(inputStrm.is_open()); api::CCsvInputParser parser(inputStrm); LOG_TRACE("Reading file"); - CPPUNIT_ASSERT(parser.readStream(boost::bind(&api::CAnomalyJob::handleRecord, - &job, - _1))); + CPPUNIT_ASSERT(parser.readStream(boost::bind(&api::CAnomalyJob::handleRecord, &job, _1))); LOG_TRACE("Checking results"); CPPUNIT_ASSERT_EQUAL(uint64_t(1176), job.numRecordsHandled()); } @@ -280,27 +238,20 @@ void CAnomalyJobLimitTest::testLimit() CPPUNIT_ASSERT(fieldConfig.initFromClause(clause)); - model::CAnomalyDetectorModelConfig modelConfig = - model::CAnomalyDetectorModelConfig::defaultConfig(3600); + model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(3600); //::CMockOutputWriter resultsHandler; - core::CJsonOutputStreamWrapper wrappedOutputStream (outputStrm); + core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); LOG_TRACE("Setting up job"); - api::CAnomalyJob job("job", - limits, - fieldConfig, - modelConfig, - wrappedOutputStream); + api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); std::ifstream inputStrm("testfiles/resource_limits_3_2over_3partition_first8.csv"); CPPUNIT_ASSERT(inputStrm.is_open()); api::CCsvInputParser parser(inputStrm); LOG_TRACE("Reading file"); - CPPUNIT_ASSERT(parser.readStream(boost::bind(&api::CAnomalyJob::handleRecord, - &job, - _1))); + CPPUNIT_ASSERT(parser.readStream(boost::bind(&api::CAnomalyJob::handleRecord, &job, _1))); // Now turn on the resource limiting limits.resourceMonitor().m_ByteLimitHigh = 0; limits.resourceMonitor().m_ByteLimitLow = 0; @@ -311,9 +262,7 @@ void CAnomalyJobLimitTest::testLimit() api::CCsvInputParser parser2(inputStrm2); LOG_TRACE("Reading second file"); - CPPUNIT_ASSERT(parser2.readStream(boost::bind(&api::CAnomalyJob::handleRecord, - &job, - _1))); + CPPUNIT_ASSERT(parser2.readStream(boost::bind(&api::CAnomalyJob::handleRecord, &job, _1))); LOG_TRACE("Checking results"); CPPUNIT_ASSERT_EQUAL(uint64_t(1180), job.numRecordsHandled()); } @@ -327,7 +276,3 @@ void CAnomalyJobLimitTest::testLimit() CPPUNIT_ASSERT_EQUAL(std::size_t(2), people.size()); CPPUNIT_ASSERT_EQUAL(std::size_t(1), attributes.size()); } - - - - diff --git a/lib/api/unittest/CAnomalyJobLimitTest.h b/lib/api/unittest/CAnomalyJobLimitTest.h index 71ac8383e1..17bc4c1487 100644 --- a/lib/api/unittest/CAnomalyJobLimitTest.h +++ b/lib/api/unittest/CAnomalyJobLimitTest.h @@ -8,16 +8,12 @@ #include -class CAnomalyJobLimitTest : public CppUnit::TestFixture -{ - public: - void testLimit(); - void testAccuracy(); - - static CppUnit::Test *suite(); - +class CAnomalyJobLimitTest : public CppUnit::TestFixture { +public: + void testLimit(); + void testAccuracy(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CAnomalyJobLimitTest_h - diff --git a/lib/api/unittest/CAnomalyJobTest.cc b/lib/api/unittest/CAnomalyJobTest.cc index c5f03a812a..32eff6ae7e 100644 --- a/lib/api/unittest/CAnomalyJobTest.cc +++ b/lib/api/unittest/CAnomalyJobTest.cc @@ -23,12 +23,11 @@ #include +#include #include #include -#include -namespace -{ +namespace { //! \brief //! Mock object for state restore unit tests. @@ -36,14 +35,10 @@ namespace //! DESCRIPTION:\n //! CDataSearcher that returns an empty stream. //! -class CEmptySearcher : public ml::core::CDataSearcher -{ - public: - //! Do a search that results in an empty input stream. - virtual TIStreamP search(size_t /*currentDocNum*/, size_t /*limit*/) - { - return TIStreamP(new std::istringstream()); - } +class CEmptySearcher : public ml::core::CDataSearcher { +public: + //! Do a search that results in an empty input stream. + virtual TIStreamP search(size_t /*currentDocNum*/, size_t /*limit*/) { return TIStreamP(new std::istringstream()); } }; //! \brief @@ -56,133 +51,92 @@ class CEmptySearcher : public ml::core::CDataSearcher //! Only the minimal set of required functions are implemented. //! -class CSingleResultVisitor : public ml::model::CHierarchicalResultsVisitor -{ - public: - CSingleResultVisitor() : m_LastResult(0.0) - { } +class CSingleResultVisitor : public ml::model::CHierarchicalResultsVisitor { +public: + CSingleResultVisitor() : m_LastResult(0.0) {} - virtual ~CSingleResultVisitor() - { } + virtual ~CSingleResultVisitor() {} - virtual void visit(const ml::model::CHierarchicalResults &/*results*/, - const TNode &node, - bool /*pivot*/) - { - if (!this->isSimpleCount(node) && this->isLeaf(node)) - { - if (node.s_AnnotatedProbability.s_AttributeProbabilities.size() == 0) - { - return; - } - if (!node.s_Model) - { - return; - } - const ml::model::SAttributeProbability &attribute = - node.s_AnnotatedProbability.s_AttributeProbabilities[0]; - - m_LastResult = node.s_Model->currentBucketValue(attribute.s_Feature, - 0, 0, node.s_BucketStartTime)[0]; + virtual void visit(const ml::model::CHierarchicalResults& /*results*/, const TNode& node, bool /*pivot*/) { + if (!this->isSimpleCount(node) && this->isLeaf(node)) { + if (node.s_AnnotatedProbability.s_AttributeProbabilities.size() == 0) { + return; } - } + if (!node.s_Model) { + return; + } + const ml::model::SAttributeProbability& attribute = node.s_AnnotatedProbability.s_AttributeProbabilities[0]; - double lastResults() const - { - return m_LastResult; + m_LastResult = node.s_Model->currentBucketValue(attribute.s_Feature, 0, 0, node.s_BucketStartTime)[0]; } + } - private: - double m_LastResult; + double lastResults() const { return m_LastResult; } + +private: + double m_LastResult; }; -class CMultiResultVisitor : public ml::model::CHierarchicalResultsVisitor -{ - public: - CMultiResultVisitor() : m_LastResult(0.0) - { } +class CMultiResultVisitor : public ml::model::CHierarchicalResultsVisitor { +public: + CMultiResultVisitor() : m_LastResult(0.0) {} - virtual ~CMultiResultVisitor() - { } + virtual ~CMultiResultVisitor() {} - virtual void visit(const ml::model::CHierarchicalResults &/*results*/, - const TNode &node, - bool /*pivot*/) - { - if (!this->isSimpleCount(node) && this->isLeaf(node)) - { - if (node.s_AnnotatedProbability.s_AttributeProbabilities.size() == 0) - { - return; - } - if (!node.s_Model) - { - return; - } - std::size_t pid; - const ml::model::CDataGatherer &gatherer = node.s_Model->dataGatherer(); - if (!gatherer.personId(*node.s_Spec.s_PersonFieldValue, pid)) - { - LOG_ERROR("No identifier for '" - << *node.s_Spec.s_PersonFieldValue << "'"); - return; - } - for (std::size_t i = 0; i < node.s_AnnotatedProbability.s_AttributeProbabilities.size(); ++i) - { - const ml::model::SAttributeProbability &attribute = - node.s_AnnotatedProbability.s_AttributeProbabilities[i]; - m_LastResult += node.s_Model->currentBucketValue(attribute.s_Feature, - pid, attribute.s_Cid, node.s_BucketStartTime)[0]; - } + virtual void visit(const ml::model::CHierarchicalResults& /*results*/, const TNode& node, bool /*pivot*/) { + if (!this->isSimpleCount(node) && this->isLeaf(node)) { + if (node.s_AnnotatedProbability.s_AttributeProbabilities.size() == 0) { + return; + } + if (!node.s_Model) { + return; + } + std::size_t pid; + const ml::model::CDataGatherer& gatherer = node.s_Model->dataGatherer(); + if (!gatherer.personId(*node.s_Spec.s_PersonFieldValue, pid)) { + LOG_ERROR("No identifier for '" << *node.s_Spec.s_PersonFieldValue << "'"); + return; + } + for (std::size_t i = 0; i < node.s_AnnotatedProbability.s_AttributeProbabilities.size(); ++i) { + const ml::model::SAttributeProbability& attribute = node.s_AnnotatedProbability.s_AttributeProbabilities[i]; + m_LastResult += node.s_Model->currentBucketValue(attribute.s_Feature, pid, attribute.s_Cid, node.s_BucketStartTime)[0]; } } + } - double lastResults() const - { - return m_LastResult; - } + double lastResults() const { return m_LastResult; } - private: - double m_LastResult; +private: + double m_LastResult; }; -class CResultsScoreVisitor : public ml::model::CHierarchicalResultsVisitor -{ - public: - CResultsScoreVisitor(int score) : m_Score(score) - { } +class CResultsScoreVisitor : public ml::model::CHierarchicalResultsVisitor { +public: + CResultsScoreVisitor(int score) : m_Score(score) {} - virtual ~CResultsScoreVisitor() - { } + virtual ~CResultsScoreVisitor() {} - virtual void visit(const ml::model::CHierarchicalResults &/*results*/, - const TNode &node, - bool /*pivot*/) - { - if (this->isRoot(node)) - { - node.s_NormalizedAnomalyScore = m_Score; - } + virtual void visit(const ml::model::CHierarchicalResults& /*results*/, const TNode& node, bool /*pivot*/) { + if (this->isRoot(node)) { + node.s_NormalizedAnomalyScore = m_Score; } + } - private: - int m_Score; +private: + int m_Score; }; -size_t countBuckets(const std::string &key, const std::string &output) -{ +size_t countBuckets(const std::string& key, const std::string& output) { size_t count = 0; rapidjson::Document doc; doc.Parse(output); CPPUNIT_ASSERT(!doc.HasParseError()); CPPUNIT_ASSERT(doc.IsArray()); - const rapidjson::Value &allRecords = doc.GetArray(); - for (auto &r : allRecords.GetArray()) - { + const rapidjson::Value& allRecords = doc.GetArray(); + for (auto& r : allRecords.GetArray()) { rapidjson::Value::ConstMemberIterator recordsIt = r.GetObject().FindMember(key); - if (recordsIt != r.GetObject().MemberEnd()) - { + if (recordsIt != r.GetObject().MemberEnd()) { ++count; } } @@ -190,30 +144,24 @@ size_t countBuckets(const std::string &key, const std::string &output) return count; } -bool findLine(const std::string ®ex, const ml::core::CRegex::TStrVec &lines) -{ +bool findLine(const std::string& regex, const ml::core::CRegex::TStrVec& lines) { ml::core::CRegex rx; rx.init(regex); std::size_t pos = 0; - for (ml::core::CRegex::TStrVecCItr i = lines.begin(); i != lines.end(); ++i) - { - if (rx.search(*i, pos)) - { + for (ml::core::CRegex::TStrVecCItr i = lines.begin(); i != lines.end(); ++i) { + if (rx.search(*i, pos)) { return true; } } return false; } - const ml::core_t::TTime BUCKET_SIZE(3600); - } using namespace ml; -void CAnomalyJobTest::testBadTimes() -{ +void CAnomalyJobTest::testBadTimes() { { // Test with no time field model::CLimits limits; @@ -222,13 +170,11 @@ void CAnomalyJobTest::testBadTimes() clauses.push_back("value"); clauses.push_back("partitionfield=greenhouse"); fieldConfig.initFromClause(clauses); - model::CAnomalyDetectorModelConfig modelConfig = - model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); + model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); std::stringstream outputStrm; - core::CJsonOutputStreamWrapper wrappedOutputStream (outputStrm); + core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, - wrappedOutputStream); + api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); api::CAnomalyJob::TStrStrUMap dataRows; dataRows["wibble"] = "12345678"; @@ -246,13 +192,11 @@ void CAnomalyJobTest::testBadTimes() clauses.push_back("value"); clauses.push_back("partitionfield=greenhouse"); fieldConfig.initFromClause(clauses); - model::CAnomalyDetectorModelConfig modelConfig = - model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); + model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); std::stringstream outputStrm; - core::CJsonOutputStreamWrapper wrappedOutputStream (outputStrm); + core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, - wrappedOutputStream); + api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); api::CAnomalyJob::TStrStrUMap dataRows; dataRows["time"] = "hello"; @@ -270,14 +214,20 @@ void CAnomalyJobTest::testBadTimes() clauses.push_back("value"); clauses.push_back("partitionfield=greenhouse"); fieldConfig.initFromClause(clauses); - model::CAnomalyDetectorModelConfig modelConfig = - model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); + model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); std::stringstream outputStrm; - core::CJsonOutputStreamWrapper wrappedOutputStream (outputStrm); - - api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, - wrappedOutputStream, - api::CAnomalyJob::TPersistCompleteFunc(), nullptr, -1, "time", "%Y%m%m%H%M%S"); + core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); + + api::CAnomalyJob job("job", + limits, + fieldConfig, + modelConfig, + wrappedOutputStream, + api::CAnomalyJob::TPersistCompleteFunc(), + nullptr, + -1, + "time", + "%Y%m%m%H%M%S"); api::CAnomalyJob::TStrStrUMap dataRows; dataRows["time"] = "hello world"; @@ -289,8 +239,7 @@ void CAnomalyJobTest::testBadTimes() } } -void CAnomalyJobTest::testOutOfSequence() -{ +void CAnomalyJobTest::testOutOfSequence() { { // Test out of sequence record model::CLimits limits; @@ -299,13 +248,11 @@ void CAnomalyJobTest::testOutOfSequence() clauses.push_back("value"); clauses.push_back("partitionfield=greenhouse"); fieldConfig.initFromClause(clauses); - model::CAnomalyDetectorModelConfig modelConfig = - model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); + model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); std::stringstream outputStrm; - core::CJsonOutputStreamWrapper wrappedOutputStream (outputStrm); + core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, - wrappedOutputStream); + api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); job.description(); job.descriptionAndDebugMemoryUsage(); @@ -327,8 +274,7 @@ void CAnomalyJobTest::testOutOfSequence() } } -void CAnomalyJobTest::testControlMessages() -{ +void CAnomalyJobTest::testControlMessages() { { // Test control messages model::CLimits limits; @@ -337,10 +283,9 @@ void CAnomalyJobTest::testControlMessages() clauses.push_back("value"); clauses.push_back("partitionfield=greenhouse"); fieldConfig.initFromClause(clauses); - model::CAnomalyDetectorModelConfig modelConfig = - model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); + model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); std::stringstream outputStrm; - core::CJsonOutputStreamWrapper wrappedOutputStream (outputStrm); + core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); @@ -369,8 +314,7 @@ void CAnomalyJobTest::testControlMessages() clauses.push_back("count"); clauses.push_back("partitionfield=greenhouse"); fieldConfig.initFromClause(clauses); - model::CAnomalyDetectorModelConfig modelConfig = - model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); + model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); api::CAnomalyJob::TStrStrUMap dataRows; dataRows["value"] = "2.0"; @@ -378,25 +322,21 @@ void CAnomalyJobTest::testControlMessages() std::stringstream outputStrm; { - core::CJsonOutputStreamWrapper wrappedOutputStream (outputStrm); + core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); core_t::TTime time = 12345678; - for (std::size_t i = 0; i < 50; i++, time += (BUCKET_SIZE / 2)) - { + for (std::size_t i = 0; i < 50; i++, time += (BUCKET_SIZE / 2)) { std::stringstream ss; ss << time; dataRows["time"] = ss.str(); - if (i == 40) - { - for (std::size_t j = 0; j < 100; j++) - { + if (i == 40) { + for (std::size_t j = 0; j < 100; j++) { CPPUNIT_ASSERT(job.handleRecord(dataRows)); } } CPPUNIT_ASSERT(job.handleRecord(dataRows)); - if (i < 2) - { + if (i < 2) { // We haven't processed one full bucket but it should be safe to flush. dataRows["."] = "f1"; CPPUNIT_ASSERT(job.handleRecord(dataRows)); @@ -410,17 +350,15 @@ void CAnomalyJobTest::testControlMessages() CPPUNIT_ASSERT(!doc.HasParseError()); CPPUNIT_ASSERT(doc.IsArray()); - const rapidjson::Value &allRecords = doc.GetArray(); + const rapidjson::Value& allRecords = doc.GetArray(); bool foundRecord = false; - for (auto &r : allRecords.GetArray()) - { + for (auto& r : allRecords.GetArray()) { rapidjson::Value::ConstMemberIterator recordsIt = r.GetObject().FindMember("records"); - if (recordsIt != r.GetObject().MemberEnd()) - { - auto &recordsArray = recordsIt->value.GetArray()[0]; - rapidjson::Value::ConstMemberIterator actualIt = recordsArray.FindMember("actual"); + if (recordsIt != r.GetObject().MemberEnd()) { + auto& recordsArray = recordsIt->value.GetArray()[0]; + rapidjson::Value::ConstMemberIterator actualIt = recordsArray.FindMember("actual"); CPPUNIT_ASSERT(actualIt != recordsArray.MemberEnd()); - const rapidjson::Value::ConstArray &values = actualIt->value.GetArray(); + const rapidjson::Value::ConstArray& values = actualIt->value.GetArray(); CPPUNIT_ASSERT_EQUAL(102.0, values[0].GetDouble()); foundRecord = true; @@ -430,30 +368,25 @@ void CAnomalyJobTest::testControlMessages() CPPUNIT_ASSERT(foundRecord); std::stringstream outputStrm2; { - core::CJsonOutputStreamWrapper wrappedOutputStream (outputStrm2); + core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm2); api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); core_t::TTime time = 12345678; - for (std::size_t i = 0; i < 50; i++, time += (BUCKET_SIZE / 2)) - { + for (std::size_t i = 0; i < 50; i++, time += (BUCKET_SIZE / 2)) { std::stringstream ss; ss << time; dataRows["time"] = ss.str(); - if (i == 40) - { - for (std::size_t j = 0; j < 100; j++) - { + if (i == 40) { + for (std::size_t j = 0; j < 100; j++) { CPPUNIT_ASSERT(job.handleRecord(dataRows)); } } CPPUNIT_ASSERT(job.handleRecord(dataRows)); - if (i == 40) - { + if (i == 40) { api::CAnomalyJob::TStrStrUMap rows; rows["."] = "r" + ss.str() + " " + ss.str(); CPPUNIT_ASSERT(job.handleRecord(rows)); - for (std::size_t j = 0; j < 100; j++) - { + for (std::size_t j = 0; j < 100; j++) { CPPUNIT_ASSERT(job.handleRecord(dataRows)); } } @@ -465,17 +398,15 @@ void CAnomalyJobTest::testControlMessages() CPPUNIT_ASSERT(!doc2.HasParseError()); CPPUNIT_ASSERT(doc2.IsArray()); - const rapidjson::Value &allRecords2 = doc2.GetArray(); + const rapidjson::Value& allRecords2 = doc2.GetArray(); foundRecord = false; - for (auto &r : allRecords2.GetArray()) - { + for (auto& r : allRecords2.GetArray()) { rapidjson::Value::ConstMemberIterator recordsIt = r.GetObject().FindMember("records"); - if (recordsIt != r.GetObject().MemberEnd()) - { - auto &recordsArray = recordsIt->value.GetArray()[0]; - rapidjson::Value::ConstMemberIterator actualIt = recordsArray.FindMember("actual"); + if (recordsIt != r.GetObject().MemberEnd()) { + auto& recordsArray = recordsIt->value.GetArray()[0]; + rapidjson::Value::ConstMemberIterator actualIt = recordsArray.FindMember("actual"); CPPUNIT_ASSERT(actualIt != recordsArray.MemberEnd()); - const rapidjson::Value::ConstArray &values = actualIt->value.GetArray(); + const rapidjson::Value::ConstArray& values = actualIt->value.GetArray(); CPPUNIT_ASSERT_EQUAL(101.0, values[0].GetDouble()); foundRecord = true; @@ -486,26 +417,23 @@ void CAnomalyJobTest::testControlMessages() } } -void CAnomalyJobTest::testSkipTimeControlMessage() -{ +void CAnomalyJobTest::testSkipTimeControlMessage() { model::CLimits limits; api::CFieldConfig fieldConfig; api::CFieldConfig::TStrVec clauses; clauses.push_back("count"); fieldConfig.initFromClause(clauses); - model::CAnomalyDetectorModelConfig modelConfig = - model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); + model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); std::stringstream outputStrm; - core::CJsonOutputStreamWrapper wrappedOutputStream (outputStrm); + core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); api::CAnomalyJob::TStrStrUMap dataRows; core_t::TTime time = 3600; - for (std::size_t i = 0; i < 10; ++i, time += BUCKET_SIZE) - { + for (std::size_t i = 0; i < 10; ++i, time += BUCKET_SIZE) { std::ostringstream ss; ss << time; dataRows["time"] = ss.str(); @@ -526,8 +454,7 @@ void CAnomalyJobTest::testSkipTimeControlMessage() CPPUNIT_ASSERT_EQUAL(std::size_t(9), countBuckets("bucket", outputStrm.str() + "]")); // Let's send a few buckets after skip time - for (std::size_t i = 0; i < 3; ++i, time += BUCKET_SIZE) - { + for (std::size_t i = 0; i < 3; ++i, time += BUCKET_SIZE) { std::ostringstream ss; ss << time; dataRows["time"] = ss.str(); @@ -539,8 +466,7 @@ void CAnomalyJobTest::testSkipTimeControlMessage() CPPUNIT_ASSERT_EQUAL(std::size_t(11), countBuckets("bucket", outputStrm.str() + "]")); } -void CAnomalyJobTest::testOutOfPhase() -{ +void CAnomalyJobTest::testOutOfPhase() { // Ensure the right data ends up in the right buckets // First we test that it works as expected for non-out-of-phase, // then we crank in the out-of-phase @@ -560,14 +486,12 @@ void CAnomalyJobTest::testOutOfPhase() api::CFieldConfig::TStrVec clauses; clauses.push_back("mean(value)"); fieldConfig.initFromClause(clauses); - model::CAnomalyDetectorModelConfig modelConfig = - model::CAnomalyDetectorModelConfig::defaultConfig(bucketSize); + model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(bucketSize); std::stringstream outputStrm; - core::CJsonOutputStreamWrapper wrappedOutputStream (outputStrm); + core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, - wrappedOutputStream); + api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); api::CAnomalyJob::TStrStrUMap dataRows; @@ -682,13 +606,11 @@ void CAnomalyJobTest::testOutOfPhase() api::CFieldConfig::TStrVec clauses; clauses.push_back("mean(value)"); fieldConfig.initFromClause(clauses); - model::CAnomalyDetectorModelConfig modelConfig = - model::CAnomalyDetectorModelConfig::defaultConfig(bucketSize); + model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(bucketSize); std::stringstream outputStrm; - core::CJsonOutputStreamWrapper wrappedOutputStream (outputStrm); + core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, - wrappedOutputStream); + api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); api::CAnomalyJob::TStrStrUMap dataRows; @@ -789,13 +711,11 @@ void CAnomalyJobTest::testOutOfPhase() api::CFieldConfig::TStrVec clauses; clauses.push_back("count"); fieldConfig.initFromClause(clauses); - model::CAnomalyDetectorModelConfig modelConfig = - model::CAnomalyDetectorModelConfig::defaultConfig(bucketSize); + model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(bucketSize); std::stringstream outputStrm; - core::CJsonOutputStreamWrapper wrappedOutputStream (outputStrm); + core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, - wrappedOutputStream); + api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); api::CAnomalyJob::TStrStrUMap dataRows; @@ -909,14 +829,12 @@ void CAnomalyJobTest::testOutOfPhase() api::CFieldConfig::TStrVec clauses; clauses.push_back("count"); fieldConfig.initFromClause(clauses); - model::CAnomalyDetectorModelConfig modelConfig = - model::CAnomalyDetectorModelConfig::defaultConfig(bucketSize); + model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(bucketSize); std::stringstream outputStrm; - core::CJsonOutputStreamWrapper wrappedOutputStream (outputStrm); + core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, - wrappedOutputStream); + api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); api::CAnomalyJob::TStrStrUMap dataRows; @@ -1032,10 +950,9 @@ void CAnomalyJobTest::testOutOfPhase() model::CAnomalyDetectorModelConfig::defaultConfig(bucketSize, model_t::E_None, "", 0, 2, false, ""); std::stringstream outputStrm; - core::CJsonOutputStreamWrapper wrappedOutputStream (outputStrm); + core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, - wrappedOutputStream); + api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); api::CAnomalyJob::TStrStrUMap dataRows; @@ -1091,7 +1008,7 @@ void CAnomalyJobTest::testOutOfPhase() dataRows["time"] = "10499"; dataRows["value"] = "5.0"; CPPUNIT_ASSERT(job.handleRecord(dataRows)); - LOG_DEBUG("Result time is " << ( job.m_ResultsQueue.latestBucketEnd() - 49 )); + LOG_DEBUG("Result time is " << (job.m_ResultsQueue.latestBucketEnd() - 49)); { CSingleResultVisitor visitor; job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); @@ -1148,7 +1065,7 @@ void CAnomalyJobTest::testOutOfPhase() dataRows["time"] = "10895"; dataRows["value"] = "6.0"; CPPUNIT_ASSERT(job.handleRecord(dataRows)); - LOG_DEBUG("Result time is " << ( job.m_ResultsQueue.latestBucketEnd() )); + LOG_DEBUG("Result time is " << (job.m_ResultsQueue.latestBucketEnd())); CPPUNIT_ASSERT_EQUAL(core_t::TTime(10799), job.m_ResultsQueue.latestBucketEnd()); { CSingleResultVisitor visitor; @@ -1178,10 +1095,9 @@ void CAnomalyJobTest::testOutOfPhase() model::CAnomalyDetectorModelConfig::defaultConfig(bucketSize, model_t::E_None, "", 0, 2, false, ""); std::stringstream outputStrm; - core::CJsonOutputStreamWrapper wrappedOutputStream (outputStrm); + core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, - wrappedOutputStream); + api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); api::CAnomalyJob::TStrStrUMap dataRows; @@ -1236,7 +1152,7 @@ void CAnomalyJobTest::testOutOfPhase() dataRows["time"] = "10499"; dataRows["value"] = "5.0"; CPPUNIT_ASSERT(job.handleRecord(dataRows)); - LOG_DEBUG("Result time is " << ( job.m_ResultsQueue.latestBucketEnd() - 49 )); + LOG_DEBUG("Result time is " << (job.m_ResultsQueue.latestBucketEnd() - 49)); { CSingleResultVisitor visitor; job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); @@ -1293,7 +1209,7 @@ void CAnomalyJobTest::testOutOfPhase() dataRows["time"] = "10895"; dataRows["value"] = "6.0"; CPPUNIT_ASSERT(job.handleRecord(dataRows)); - LOG_DEBUG("Result time is " << ( job.m_ResultsQueue.latestBucketEnd() )); + LOG_DEBUG("Result time is " << (job.m_ResultsQueue.latestBucketEnd())); CPPUNIT_ASSERT_EQUAL(core_t::TTime(10799), job.m_ResultsQueue.latestBucketEnd()); { CSingleResultVisitor visitor; @@ -1325,10 +1241,9 @@ void CAnomalyJobTest::testOutOfPhase() model::CAnomalyDetectorModelConfig::defaultConfig(bucketSize, model_t::E_None, "", 0, 2, false, ""); std::stringstream outputStrm; - core::CJsonOutputStreamWrapper wrappedOutputStream (outputStrm); + core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, - wrappedOutputStream); + api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); api::CAnomalyJob::TStrStrUMap dataRows; @@ -1429,7 +1344,7 @@ void CAnomalyJobTest::testOutOfPhase() dataRows["time"] = "10499"; dataRows["person"] = "Cara"; CPPUNIT_ASSERT(job.handleRecord(dataRows)); - LOG_DEBUG("Result time is " << ( job.m_ResultsQueue.latestBucketEnd() - 49 )); + LOG_DEBUG("Result time is " << (job.m_ResultsQueue.latestBucketEnd() - 49)); { CMultiResultVisitor visitor; job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); @@ -1486,7 +1401,7 @@ void CAnomalyJobTest::testOutOfPhase() dataRows["time"] = "10895"; dataRows["person"] = "Cara"; CPPUNIT_ASSERT(job.handleRecord(dataRows)); - LOG_DEBUG("Result time is " << ( job.m_ResultsQueue.latestBucketEnd() )); + LOG_DEBUG("Result time is " << (job.m_ResultsQueue.latestBucketEnd())); CPPUNIT_ASSERT_EQUAL(core_t::TTime(10799), job.m_ResultsQueue.latestBucketEnd()); { CMultiResultVisitor visitor; @@ -1504,8 +1419,7 @@ void CAnomalyJobTest::testOutOfPhase() } } -void CAnomalyJobTest::testBucketSelection() -{ +void CAnomalyJobTest::testBucketSelection() { LOG_DEBUG("*** testBucketSelection ***"); core_t::TTime bucketSize = 100; model::CLimits limits; @@ -1519,18 +1433,16 @@ void CAnomalyJobTest::testBucketSelection() model::CAnomalyDetectorModelConfig::defaultConfig(bucketSize, model_t::E_None, "", 0, 2, false, ""); std::stringstream outputStrm; - core::CJsonOutputStreamWrapper wrappedOutputStream (outputStrm); + core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, - wrappedOutputStream); + api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); job.m_ResultsQueue.reset(950); { model::SAnnotatedProbability prob(1.0); model::CHierarchicalResults results; - results.addModelResult(0, false, "mean", model::function_t::E_IndividualMetricMean, - "", "", "", "", "value", prob, 0, 1000); + results.addModelResult(0, false, "mean", model::function_t::E_IndividualMetricMean, "", "", "", "", "value", prob, 0, 1000); CResultsScoreVisitor visitor(10); results.topDownBreadthFirst(visitor); job.m_ResultsQueue.push(results, 1000); @@ -1540,8 +1452,7 @@ void CAnomalyJobTest::testBucketSelection() model::SAnnotatedProbability prob(1.0); model::CHierarchicalResults results; - results.addModelResult(0, false, "mean", model::function_t::E_IndividualMetricMean, - "", "", "", "", "value", prob, 0, 1000); + results.addModelResult(0, false, "mean", model::function_t::E_IndividualMetricMean, "", "", "", "", "value", prob, 0, 1000); CResultsScoreVisitor visitor(20); results.topDownBreadthFirst(visitor); job.m_ResultsQueue.push(results, 1050); @@ -1551,8 +1462,7 @@ void CAnomalyJobTest::testBucketSelection() model::SAnnotatedProbability prob(1.0); model::CHierarchicalResults results; - results.addModelResult(0, false, "mean", model::function_t::E_IndividualMetricMean, - "", "", "", "", "value", prob, 0, 1000); + results.addModelResult(0, false, "mean", model::function_t::E_IndividualMetricMean, "", "", "", "", "value", prob, 0, 1000); CResultsScoreVisitor visitor(15); results.topDownBreadthFirst(visitor); job.m_ResultsQueue.push(results, 1100); @@ -1563,8 +1473,7 @@ void CAnomalyJobTest::testBucketSelection() model::SAnnotatedProbability prob(1.0); model::CHierarchicalResults results; - results.addModelResult(0, false, "mean", model::function_t::E_IndividualMetricMean, - "", "", "", "", "value", prob, 0, 1000); + results.addModelResult(0, false, "mean", model::function_t::E_IndividualMetricMean, "", "", "", "", "value", prob, 0, 1000); CResultsScoreVisitor visitor(20); results.topDownBreadthFirst(visitor); job.m_ResultsQueue.push(results, 1150); @@ -1575,8 +1484,7 @@ void CAnomalyJobTest::testBucketSelection() model::SAnnotatedProbability prob(1.0); model::CHierarchicalResults results; - results.addModelResult(0, false, "mean", model::function_t::E_IndividualMetricMean, - "", "", "", "", "value", prob, 0, 1000); + results.addModelResult(0, false, "mean", model::function_t::E_IndividualMetricMean, "", "", "", "", "value", prob, 0, 1000); CResultsScoreVisitor visitor(25); results.topDownBreadthFirst(visitor); job.m_ResultsQueue.push(results, 1200); @@ -1587,8 +1495,7 @@ void CAnomalyJobTest::testBucketSelection() model::SAnnotatedProbability prob(1.0); model::CHierarchicalResults results; - results.addModelResult(0, false, "mean", model::function_t::E_IndividualMetricMean, - "", "", "", "", "value", prob, 0, 1000); + results.addModelResult(0, false, "mean", model::function_t::E_IndividualMetricMean, "", "", "", "", "value", prob, 0, 1000); CResultsScoreVisitor visitor(0); results.topDownBreadthFirst(visitor); job.m_ResultsQueue.push(results, 1250); @@ -1599,8 +1506,7 @@ void CAnomalyJobTest::testBucketSelection() model::SAnnotatedProbability prob(1.0); model::CHierarchicalResults results; - results.addModelResult(0, false, "mean", model::function_t::E_IndividualMetricMean, - "", "", "", "", "value", prob, 0, 1000); + results.addModelResult(0, false, "mean", model::function_t::E_IndividualMetricMean, "", "", "", "", "value", prob, 0, 1000); CResultsScoreVisitor visitor(5); results.topDownBreadthFirst(visitor); job.m_ResultsQueue.push(results, 1300); @@ -1611,8 +1517,7 @@ void CAnomalyJobTest::testBucketSelection() model::SAnnotatedProbability prob(1.0); model::CHierarchicalResults results; - results.addModelResult(0, false, "mean", model::function_t::E_IndividualMetricMean, - "", "", "", "", "value", prob, 0, 1000); + results.addModelResult(0, false, "mean", model::function_t::E_IndividualMetricMean, "", "", "", "", "value", prob, 0, 1000); CResultsScoreVisitor visitor(5); results.topDownBreadthFirst(visitor); job.m_ResultsQueue.push(results, 1350); @@ -1623,8 +1528,7 @@ void CAnomalyJobTest::testBucketSelection() model::SAnnotatedProbability prob(1.0); model::CHierarchicalResults results; - results.addModelResult(0, false, "mean", model::function_t::E_IndividualMetricMean, - "", "", "", "", "value", prob, 0, 1000); + results.addModelResult(0, false, "mean", model::function_t::E_IndividualMetricMean, "", "", "", "", "value", prob, 0, 1000); CResultsScoreVisitor visitor(1); results.topDownBreadthFirst(visitor); job.m_ResultsQueue.push(results, 1400); @@ -1633,8 +1537,7 @@ void CAnomalyJobTest::testBucketSelection() } } -void CAnomalyJobTest::testModelPlot() -{ +void CAnomalyJobTest::testModelPlot() { LOG_DEBUG("*** testModelPlot ***"); { // Test non-overlapping buckets @@ -1653,10 +1556,9 @@ void CAnomalyJobTest::testModelPlot() std::stringstream outputStrm; { - core::CJsonOutputStreamWrapper wrappedOutputStream (outputStrm); + core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, - wrappedOutputStream); + api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); api::CAnomalyJob::TStrStrUMap dataRows; dataRows["time"] = "10000000"; @@ -1728,10 +1630,9 @@ void CAnomalyJobTest::testModelPlot() std::stringstream outputStrm; { - core::CJsonOutputStreamWrapper wrappedOutputStream (outputStrm); + core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, - wrappedOutputStream); + api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); api::CAnomalyJob::TStrStrUMap dataRows; @@ -1821,11 +1722,10 @@ void CAnomalyJobTest::testModelPlot() } } -void CAnomalyJobTest::testInterimResultEdgeCases() -{ +void CAnomalyJobTest::testInterimResultEdgeCases() { LOG_DEBUG("*** testInterimResultEdgeCases ***"); - const char *logFile = "test.log"; + const char* logFile = "test.log"; core_t::TTime bucketSize = 3600; model::CLimits limits; @@ -1833,12 +1733,11 @@ void CAnomalyJobTest::testInterimResultEdgeCases() api::CFieldConfig::TStrVec clauses{"count", "by", "error"}; fieldConfig.initFromClause(clauses); - model::CAnomalyDetectorModelConfig modelConfig = - model::CAnomalyDetectorModelConfig::defaultConfig(bucketSize); + model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(bucketSize); std::stringstream outputStrm; - core::CJsonOutputStreamWrapper wrappedOutputStream (outputStrm); + core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); @@ -1876,8 +1775,7 @@ void CAnomalyJobTest::testInterimResultEdgeCases() std::ifstream log(logFile); CPPUNIT_ASSERT(log.is_open()); char line[256]; - while (log.getline(line, 256)) - { + while (log.getline(line, 256)) { LOG_DEBUG("Got '" << line << "'"); CPPUNIT_ASSERT(false); } @@ -1885,57 +1783,41 @@ void CAnomalyJobTest::testInterimResultEdgeCases() std::remove(logFile); } -void CAnomalyJobTest::testRestoreFailsWithEmptyStream() -{ +void CAnomalyJobTest::testRestoreFailsWithEmptyStream() { model::CLimits limits; api::CFieldConfig fieldConfig; api::CFieldConfig::TStrVec clauses; clauses.push_back("value"); clauses.push_back("partitionfield=greenhouse"); fieldConfig.initFromClause(clauses); - model::CAnomalyDetectorModelConfig modelConfig = - model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); + model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); std::ostringstream outputStrm; core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, - wrappedOutputStream); + api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); core_t::TTime completeToTime(0); CEmptySearcher restoreSearcher; CPPUNIT_ASSERT(job.restoreState(restoreSearcher, completeToTime) == false); } -CppUnit::Test* CAnomalyJobTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CAnomalyJobTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CAnomalyJobTest::testBadTimes", - &CAnomalyJobTest::testBadTimes) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CAnomalyJobTest::testOutOfSequence", - &CAnomalyJobTest::testOutOfSequence) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CAnomalyJobTest::testControlMessages", - &CAnomalyJobTest::testControlMessages) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CAnomalyJobTest::testSkipTimeControlMessage", - &CAnomalyJobTest::testSkipTimeControlMessage) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CAnomalyJobTest::testOutOfPhase", - &CAnomalyJobTest::testOutOfPhase) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CAnomalyJobTest::testBucketSelection", - &CAnomalyJobTest::testBucketSelection) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CAnomalyJobTest::testModelPlot", - &CAnomalyJobTest::testModelPlot) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CAnomalyJobTest::testInterimResultEdgeCases", - &CAnomalyJobTest::testInterimResultEdgeCases) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CAnomalyJobTest::testRestoreFailsWithEmptyStream", - &CAnomalyJobTest::testRestoreFailsWithEmptyStream) ); +CppUnit::Test* CAnomalyJobTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CAnomalyJobTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CAnomalyJobTest::testBadTimes", &CAnomalyJobTest::testBadTimes)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CAnomalyJobTest::testOutOfSequence", &CAnomalyJobTest::testOutOfSequence)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CAnomalyJobTest::testControlMessages", &CAnomalyJobTest::testControlMessages)); + suiteOfTests->addTest(new CppUnit::TestCaller("CAnomalyJobTest::testSkipTimeControlMessage", + &CAnomalyJobTest::testSkipTimeControlMessage)); + suiteOfTests->addTest(new CppUnit::TestCaller("CAnomalyJobTest::testOutOfPhase", &CAnomalyJobTest::testOutOfPhase)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CAnomalyJobTest::testBucketSelection", &CAnomalyJobTest::testBucketSelection)); + suiteOfTests->addTest(new CppUnit::TestCaller("CAnomalyJobTest::testModelPlot", &CAnomalyJobTest::testModelPlot)); + suiteOfTests->addTest(new CppUnit::TestCaller("CAnomalyJobTest::testInterimResultEdgeCases", + &CAnomalyJobTest::testInterimResultEdgeCases)); + suiteOfTests->addTest(new CppUnit::TestCaller("CAnomalyJobTest::testRestoreFailsWithEmptyStream", + &CAnomalyJobTest::testRestoreFailsWithEmptyStream)); return suiteOfTests; } diff --git a/lib/api/unittest/CAnomalyJobTest.h b/lib/api/unittest/CAnomalyJobTest.h index e083e278ae..84fb840f98 100644 --- a/lib/api/unittest/CAnomalyJobTest.h +++ b/lib/api/unittest/CAnomalyJobTest.h @@ -10,23 +10,20 @@ #include -class CAnomalyJobTest : public CppUnit::TestFixture -{ - public: - void testLicense(); - void testBadTimes(); - void testOutOfSequence(); - void testControlMessages(); - void testSkipTimeControlMessage(); - void testOutOfPhase(); - void testBucketSelection(); - void testModelPlot(); - void testInterimResultEdgeCases(); - void testRestoreFailsWithEmptyStream(); - - static CppUnit::Test *suite(); +class CAnomalyJobTest : public CppUnit::TestFixture { +public: + void testLicense(); + void testBadTimes(); + void testOutOfSequence(); + void testControlMessages(); + void testSkipTimeControlMessage(); + void testOutOfPhase(); + void testBucketSelection(); + void testModelPlot(); + void testInterimResultEdgeCases(); + void testRestoreFailsWithEmptyStream(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CAnomalyJobTest_h - diff --git a/lib/api/unittest/CBackgroundPersisterTest.cc b/lib/api/unittest/CBackgroundPersisterTest.cc index 3a2722c5a9..c23e277086 100644 --- a/lib/api/unittest/CBackgroundPersisterTest.cc +++ b/lib/api/unittest/CBackgroundPersisterTest.cc @@ -6,9 +6,9 @@ #include "CBackgroundPersisterTest.h" #include -#include #include #include +#include #include #include @@ -33,56 +33,44 @@ #include #include -namespace -{ +namespace { void reportPersistComplete(ml::api::CModelSnapshotJsonWriter::SModelSnapshotReport modelSnapshotReport, - std::string &snapshotIdOut, - size_t &numDocsOut) -{ + std::string& snapshotIdOut, + size_t& numDocsOut) { LOG_DEBUG("Persist complete with description: " << modelSnapshotReport.s_Description); snapshotIdOut = modelSnapshotReport.s_SnapshotId; numDocsOut = modelSnapshotReport.s_NumDocs; } - } -CppUnit::Test *CBackgroundPersisterTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CBackgroundPersisterTest"); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CBackgroundPersisterTest::testDetectorPersistBy", - &CBackgroundPersisterTest::testDetectorPersistBy) ); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CBackgroundPersisterTest::testDetectorPersistOver", - &CBackgroundPersisterTest::testDetectorPersistOver) ); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CBackgroundPersisterTest::testDetectorPersistPartition", - &CBackgroundPersisterTest::testDetectorPersistPartition) ); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CBackgroundPersisterTest::testCategorizationOnlyPersist", - &CBackgroundPersisterTest::testCategorizationOnlyPersist) ); +CppUnit::Test* CBackgroundPersisterTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CBackgroundPersisterTest"); + suiteOfTests->addTest(new CppUnit::TestCaller("CBackgroundPersisterTest::testDetectorPersistBy", + &CBackgroundPersisterTest::testDetectorPersistBy)); + suiteOfTests->addTest(new CppUnit::TestCaller("CBackgroundPersisterTest::testDetectorPersistOver", + &CBackgroundPersisterTest::testDetectorPersistOver)); + suiteOfTests->addTest(new CppUnit::TestCaller("CBackgroundPersisterTest::testDetectorPersistPartition", + &CBackgroundPersisterTest::testDetectorPersistPartition)); + suiteOfTests->addTest(new CppUnit::TestCaller("CBackgroundPersisterTest::testCategorizationOnlyPersist", + &CBackgroundPersisterTest::testCategorizationOnlyPersist)); return suiteOfTests; } -void CBackgroundPersisterTest::testDetectorPersistBy() -{ +void CBackgroundPersisterTest::testDetectorPersistBy() { this->foregroundBackgroundCompCategorizationAndAnomalyDetection("testfiles/new_mlfields.conf"); } -void CBackgroundPersisterTest::testDetectorPersistOver() -{ +void CBackgroundPersisterTest::testDetectorPersistOver() { this->foregroundBackgroundCompCategorizationAndAnomalyDetection("testfiles/new_mlfields_over.conf"); } -void CBackgroundPersisterTest::testDetectorPersistPartition() -{ +void CBackgroundPersisterTest::testDetectorPersistPartition() { this->foregroundBackgroundCompCategorizationAndAnomalyDetection("testfiles/new_mlfields_partition.conf"); } -void CBackgroundPersisterTest::testCategorizationOnlyPersist() -{ +void CBackgroundPersisterTest::testCategorizationOnlyPersist() { // Start by creating a categorizer with non-trivial state static const std::string JOB_ID("job"); @@ -99,17 +87,17 @@ void CBackgroundPersisterTest::testCategorizationOnlyPersist() ml::model::CLimits limits; ml::api::CFieldConfig fieldConfig("agent"); - std::ostringstream *backgroundStream(nullptr); + std::ostringstream* backgroundStream(nullptr); ml::api::CSingleStreamDataAdder::TOStreamP backgroundStreamPtr(backgroundStream = new std::ostringstream()); ml::api::CSingleStreamDataAdder backgroundDataAdder(backgroundStreamPtr); // The 300 second persist interval is irrelevant here - we bypass the timer // in this test and kick off the background persistence chain explicitly ml::api::CBackgroundPersister backgroundPersister(300, backgroundDataAdder); - std::ostringstream *foregroundStream(nullptr); + std::ostringstream* foregroundStream(nullptr); ml::api::CSingleStreamDataAdder::TOStreamP foregroundStreamPtr(foregroundStream = new std::ostringstream()); { - ml::core::CJsonOutputStreamWrapper wrappedOutputStream (outputStrm); + ml::core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); ml::api::CJsonOutputWriter outputWriter(JOB_ID, wrappedOutputStream); // All output we're interested in goes via the JSON output writer, so @@ -117,18 +105,11 @@ void CBackgroundPersisterTest::testCategorizationOnlyPersist() ml::api::CNullOutput nullOutput; // The typer knows how to assign categories to records - ml::api::CFieldDataTyper typer(JOB_ID, - fieldConfig, - limits, - nullOutput, - outputWriter, - &backgroundPersister); + ml::api::CFieldDataTyper typer(JOB_ID, fieldConfig, limits, nullOutput, outputWriter, &backgroundPersister); ml::api::CLineifiedJsonInputParser parser(inputStrm); - CPPUNIT_ASSERT(parser.readStream(boost::bind(&ml::api::CDataProcessor::handleRecord, - &typer, - _1))); + CPPUNIT_ASSERT(parser.readStream(boost::bind(&ml::api::CDataProcessor::handleRecord, &typer, _1))); // Persist the processors' state in the background CPPUNIT_ASSERT(typer.periodicPersistState(backgroundPersister)); @@ -154,8 +135,7 @@ void CBackgroundPersisterTest::testCategorizationOnlyPersist() CPPUNIT_ASSERT_EQUAL(backgroundState, foregroundState); } -void CBackgroundPersisterTest::foregroundBackgroundCompCategorizationAndAnomalyDetection(const std::string &configFileName) -{ +void CBackgroundPersisterTest::foregroundBackgroundCompCategorizationAndAnomalyDetection(const std::string& configFileName) { // Start by creating processors with non-trivial state static const ml::core_t::TTime BUCKET_SIZE(3600); @@ -174,10 +154,9 @@ void CBackgroundPersisterTest::foregroundBackgroundCompCategorizationAndAnomalyD ml::api::CFieldConfig fieldConfig; CPPUNIT_ASSERT(fieldConfig.initFromFile(configFileName)); - ml::model::CAnomalyDetectorModelConfig modelConfig = - ml::model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); + ml::model::CAnomalyDetectorModelConfig modelConfig = ml::model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); - std::ostringstream *backgroundStream(0); + std::ostringstream* backgroundStream(0); ml::api::CSingleStreamDataAdder::TOStreamP backgroundStreamPtr(backgroundStream = new std::ostringstream()); ml::api::CSingleStreamDataAdder backgroundDataAdder(backgroundStreamPtr); // The 300 second persist interval is irrelevant here - we bypass the timer @@ -190,10 +169,10 @@ void CBackgroundPersisterTest::foregroundBackgroundCompCategorizationAndAnomalyD std::string backgroundSnapshotId; std::string foregroundSnapshotId; - std::ostringstream *foregroundStream(0); + std::ostringstream* foregroundStream(0); ml::api::CSingleStreamDataAdder::TOStreamP foregroundStreamPtr(foregroundStream = new std::ostringstream()); { - ml::core::CJsonOutputStreamWrapper wrappedOutputStream (outputStrm); + ml::core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); ml::api::CJsonOutputWriter outputWriter(JOB_ID, wrappedOutputStream); ml::api::CAnomalyJob job(JOB_ID, @@ -201,16 +180,13 @@ void CBackgroundPersisterTest::foregroundBackgroundCompCategorizationAndAnomalyD fieldConfig, modelConfig, wrappedOutputStream, - boost::bind(&reportPersistComplete, - _1, - boost::ref(snapshotId), - boost::ref(numDocs)), + boost::bind(&reportPersistComplete, _1, boost::ref(snapshotId), boost::ref(numDocs)), &backgroundPersister, -1, "time", "%d/%b/%Y:%T %z"); - ml::api::CDataProcessor *firstProcessor(&job); + ml::api::CDataProcessor* firstProcessor(&job); // Chain the detector's input ml::api::COutputChainer outputChainer(job); @@ -218,17 +194,14 @@ void CBackgroundPersisterTest::foregroundBackgroundCompCategorizationAndAnomalyD // The typer knows how to assign categories to records ml::api::CFieldDataTyper typer(JOB_ID, fieldConfig, limits, outputChainer, outputWriter); - if (fieldConfig.fieldNameSuperset().count(ml::api::CFieldDataTyper::MLCATEGORY_NAME) > 0) - { + if (fieldConfig.fieldNameSuperset().count(ml::api::CFieldDataTyper::MLCATEGORY_NAME) > 0) { LOG_DEBUG("Applying the categorization typer for anomaly detection"); firstProcessor = &typer; } ml::api::CLineifiedJsonInputParser parser(inputStrm); - CPPUNIT_ASSERT(parser.readStream(boost::bind(&ml::api::CDataProcessor::handleRecord, - firstProcessor, - _1))); + CPPUNIT_ASSERT(parser.readStream(boost::bind(&ml::api::CDataProcessor::handleRecord, firstProcessor, _1))); // Persist the processors' state in the background CPPUNIT_ASSERT(firstProcessor->periodicPersistState(backgroundPersister)); @@ -250,12 +223,8 @@ void CBackgroundPersisterTest::foregroundBackgroundCompCategorizationAndAnomalyD // The snapshot ID can be different between the two persists, so replace the // first occurrence of it (which is in the bulk metadata) - CPPUNIT_ASSERT_EQUAL(size_t(1), ml::core::CStringUtils::replaceFirst(backgroundSnapshotId, - "snap", - backgroundState)); - CPPUNIT_ASSERT_EQUAL(size_t(1), ml::core::CStringUtils::replaceFirst(foregroundSnapshotId, - "snap", - foregroundState)); + CPPUNIT_ASSERT_EQUAL(size_t(1), ml::core::CStringUtils::replaceFirst(backgroundSnapshotId, "snap", backgroundState)); + CPPUNIT_ASSERT_EQUAL(size_t(1), ml::core::CStringUtils::replaceFirst(foregroundSnapshotId, "snap", foregroundState)); // Replace the zero byte separators so the expected/actual strings don't get // truncated by CppUnit if the test fails @@ -264,4 +233,3 @@ void CBackgroundPersisterTest::foregroundBackgroundCompCategorizationAndAnomalyD CPPUNIT_ASSERT_EQUAL(backgroundState, foregroundState); } - diff --git a/lib/api/unittest/CBackgroundPersisterTest.h b/lib/api/unittest/CBackgroundPersisterTest.h index ab6ecf46b9..224a0995e9 100644 --- a/lib/api/unittest/CBackgroundPersisterTest.h +++ b/lib/api/unittest/CBackgroundPersisterTest.h @@ -10,20 +10,17 @@ #include +class CBackgroundPersisterTest : public CppUnit::TestFixture { +public: + void testDetectorPersistBy(); + void testDetectorPersistOver(); + void testDetectorPersistPartition(); + void testCategorizationOnlyPersist(); -class CBackgroundPersisterTest : public CppUnit::TestFixture -{ - public: - void testDetectorPersistBy(); - void testDetectorPersistOver(); - void testDetectorPersistPartition(); - void testCategorizationOnlyPersist(); + static CppUnit::Test* suite(); - static CppUnit::Test *suite(); - - private: - void foregroundBackgroundCompCategorizationAndAnomalyDetection(const std::string &configFileName); +private: + void foregroundBackgroundCompCategorizationAndAnomalyDetection(const std::string& configFileName); }; #endif // INCLUDED_CBackgroundPersisterTest_h - diff --git a/lib/api/unittest/CBaseTokenListDataTyperTest.cc b/lib/api/unittest/CBaseTokenListDataTyperTest.cc index 11eb958807..3e7c422320 100644 --- a/lib/api/unittest/CBaseTokenListDataTyperTest.cc +++ b/lib/api/unittest/CBaseTokenListDataTyperTest.cc @@ -7,23 +7,18 @@ #include +CppUnit::Test* CBaseTokenListDataTyperTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CBaseTokenListDataTyperTest"); -CppUnit::Test *CBaseTokenListDataTyperTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CBaseTokenListDataTyperTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CBaseTokenListDataTyperTest::testMinMatchingWeights", - &CBaseTokenListDataTyperTest::testMinMatchingWeights) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CBaseTokenListDataTyperTest::testMaxMatchingWeights", - &CBaseTokenListDataTyperTest::testMaxMatchingWeights) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CBaseTokenListDataTyperTest::testMinMatchingWeights", + &CBaseTokenListDataTyperTest::testMinMatchingWeights)); + suiteOfTests->addTest(new CppUnit::TestCaller("CBaseTokenListDataTyperTest::testMaxMatchingWeights", + &CBaseTokenListDataTyperTest::testMaxMatchingWeights)); return suiteOfTests; } -void CBaseTokenListDataTyperTest::testMinMatchingWeights() -{ +void CBaseTokenListDataTyperTest::testMinMatchingWeights() { CPPUNIT_ASSERT_EQUAL(size_t(0), ml::api::CBaseTokenListDataTyper::minMatchingWeight(0, 0.7)); CPPUNIT_ASSERT_EQUAL(size_t(1), ml::api::CBaseTokenListDataTyper::minMatchingWeight(1, 0.7)); CPPUNIT_ASSERT_EQUAL(size_t(2), ml::api::CBaseTokenListDataTyper::minMatchingWeight(2, 0.7)); @@ -37,8 +32,7 @@ void CBaseTokenListDataTyperTest::testMinMatchingWeights() CPPUNIT_ASSERT_EQUAL(size_t(8), ml::api::CBaseTokenListDataTyper::minMatchingWeight(10, 0.7)); } -void CBaseTokenListDataTyperTest::testMaxMatchingWeights() -{ +void CBaseTokenListDataTyperTest::testMaxMatchingWeights() { CPPUNIT_ASSERT_EQUAL(size_t(0), ml::api::CBaseTokenListDataTyper::maxMatchingWeight(0, 0.7)); CPPUNIT_ASSERT_EQUAL(size_t(1), ml::api::CBaseTokenListDataTyper::maxMatchingWeight(1, 0.7)); CPPUNIT_ASSERT_EQUAL(size_t(2), ml::api::CBaseTokenListDataTyper::maxMatchingWeight(2, 0.7)); @@ -51,4 +45,3 @@ void CBaseTokenListDataTyperTest::testMaxMatchingWeights() CPPUNIT_ASSERT_EQUAL(size_t(12), ml::api::CBaseTokenListDataTyper::maxMatchingWeight(9, 0.7)); CPPUNIT_ASSERT_EQUAL(size_t(14), ml::api::CBaseTokenListDataTyper::maxMatchingWeight(10, 0.7)); } - diff --git a/lib/api/unittest/CBaseTokenListDataTyperTest.h b/lib/api/unittest/CBaseTokenListDataTyperTest.h index 172e0074e2..2d23715bcb 100644 --- a/lib/api/unittest/CBaseTokenListDataTyperTest.h +++ b/lib/api/unittest/CBaseTokenListDataTyperTest.h @@ -8,15 +8,12 @@ #include +class CBaseTokenListDataTyperTest : public CppUnit::TestFixture { +public: + void testMinMatchingWeights(); + void testMaxMatchingWeights(); -class CBaseTokenListDataTyperTest : public CppUnit::TestFixture -{ - public: - void testMinMatchingWeights(); - void testMaxMatchingWeights(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CBaseTokenListDataTyperTest_h - diff --git a/lib/api/unittest/CCategoryExamplesCollectorTest.cc b/lib/api/unittest/CCategoryExamplesCollectorTest.cc index 10bb92c71c..cc63403065 100644 --- a/lib/api/unittest/CCategoryExamplesCollectorTest.cc +++ b/lib/api/unittest/CCategoryExamplesCollectorTest.cc @@ -14,8 +14,7 @@ using namespace ml; using namespace api; -void CCategoryExamplesCollectorTest::testAddGivenMaxExamplesIsZero() -{ +void CCategoryExamplesCollectorTest::testAddGivenMaxExamplesIsZero() { CCategoryExamplesCollector examplesCollector(0); CPPUNIT_ASSERT(examplesCollector.add(1, "foo") == false); CPPUNIT_ASSERT(examplesCollector.add(2, "foo") == false); @@ -23,15 +22,13 @@ void CCategoryExamplesCollectorTest::testAddGivenMaxExamplesIsZero() CPPUNIT_ASSERT_EQUAL(examplesCollector.numberOfExamplesForCategory(2), std::size_t(0)); } -void CCategoryExamplesCollectorTest::testAddGivenSameCategoryExamplePairAddedTwice() -{ +void CCategoryExamplesCollectorTest::testAddGivenSameCategoryExamplePairAddedTwice() { CCategoryExamplesCollector examplesCollector(4); CPPUNIT_ASSERT(examplesCollector.add(1, "foo") == true); CPPUNIT_ASSERT(examplesCollector.add(1, "foo") == false); } -void CCategoryExamplesCollectorTest::testAddGivenMoreThanMaxExamplesAreAddedForSameCategory() -{ +void CCategoryExamplesCollectorTest::testAddGivenMoreThanMaxExamplesAreAddedForSameCategory() { CCategoryExamplesCollector examplesCollector(3); CPPUNIT_ASSERT(examplesCollector.add(1, "foo1") == true); CPPUNIT_ASSERT_EQUAL(examplesCollector.numberOfExamplesForCategory(1), std::size_t(1)); @@ -43,8 +40,7 @@ void CCategoryExamplesCollectorTest::testAddGivenMoreThanMaxExamplesAreAddedForS CPPUNIT_ASSERT_EQUAL(examplesCollector.numberOfExamplesForCategory(1), std::size_t(3)); } -void CCategoryExamplesCollectorTest::testAddGivenCategoryAddedIsNotSubsequent() -{ +void CCategoryExamplesCollectorTest::testAddGivenCategoryAddedIsNotSubsequent() { CCategoryExamplesCollector examplesCollector(2); CPPUNIT_ASSERT(examplesCollector.add(1, "foo") == true); CPPUNIT_ASSERT(examplesCollector.add(3, "bar") == true); @@ -53,8 +49,7 @@ void CCategoryExamplesCollectorTest::testAddGivenCategoryAddedIsNotSubsequent() CPPUNIT_ASSERT_EQUAL(examplesCollector.numberOfExamplesForCategory(3), std::size_t(1)); } -void CCategoryExamplesCollectorTest::testExamples() -{ +void CCategoryExamplesCollectorTest::testExamples() { CCategoryExamplesCollector examplesCollector(3); examplesCollector.add(1, "foo"); examplesCollector.add(1, "bar"); @@ -70,8 +65,7 @@ void CCategoryExamplesCollectorTest::testExamples() CPPUNIT_ASSERT(examples2.find("invalid") == examples2.end()); } -void CCategoryExamplesCollectorTest::testPersist() -{ +void CCategoryExamplesCollectorTest::testPersist() { CCategoryExamplesCollector examplesCollector(3); examplesCollector.add(1, "foo"); examplesCollector.add(1, "bar"); @@ -104,8 +98,7 @@ void CCategoryExamplesCollectorTest::testPersist() CPPUNIT_ASSERT(restoredExamplesCollector.numberOfExamplesForCategory(3) == 1); } -void CCategoryExamplesCollectorTest::testTruncation() -{ +void CCategoryExamplesCollectorTest::testTruncation() { CPPUNIT_ASSERT(CCategoryExamplesCollector::MAX_EXAMPLE_LENGTH > 5); const std::string baseExample(CCategoryExamplesCollector::MAX_EXAMPLE_LENGTH - 5, 'a'); const std::string ellipsis(3, '.'); @@ -150,31 +143,26 @@ void CCategoryExamplesCollectorTest::testTruncation() } } -CppUnit::Test *CCategoryExamplesCollectorTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CCategoryExamplesCollectorTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCategoryExamplesCollectorTest::testAddGivenMaxExamplesIsZero", - &CCategoryExamplesCollectorTest::testAddGivenMaxExamplesIsZero) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCategoryExamplesCollectorTest::testAddGivenSameCategoryExamplePairAddedTwice", - &CCategoryExamplesCollectorTest::testAddGivenSameCategoryExamplePairAddedTwice) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCategoryExamplesCollectorTest::testAddGivenMoreThanMaxExamplesAreAddedForSameCategory", - &CCategoryExamplesCollectorTest::testAddGivenMoreThanMaxExamplesAreAddedForSameCategory) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCategoryExamplesCollectorTest::testAddGivenCategoryAddedIsNotSubsequent", - &CCategoryExamplesCollectorTest::testAddGivenCategoryAddedIsNotSubsequent) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCategoryExamplesCollectorTest::testExamples", - &CCategoryExamplesCollectorTest::testExamples) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCategoryExamplesCollectorTest::testPersist", - &CCategoryExamplesCollectorTest::testPersist) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCategoryExamplesCollectorTest::testTruncation", - &CCategoryExamplesCollectorTest::testTruncation) ); +CppUnit::Test* CCategoryExamplesCollectorTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CCategoryExamplesCollectorTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCategoryExamplesCollectorTest::testAddGivenMaxExamplesIsZero", &CCategoryExamplesCollectorTest::testAddGivenMaxExamplesIsZero)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCategoryExamplesCollectorTest::testAddGivenSameCategoryExamplePairAddedTwice", + &CCategoryExamplesCollectorTest::testAddGivenSameCategoryExamplePairAddedTwice)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCategoryExamplesCollectorTest::testAddGivenMoreThanMaxExamplesAreAddedForSameCategory", + &CCategoryExamplesCollectorTest::testAddGivenMoreThanMaxExamplesAreAddedForSameCategory)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CCategoryExamplesCollectorTest::testAddGivenCategoryAddedIsNotSubsequent", + &CCategoryExamplesCollectorTest::testAddGivenCategoryAddedIsNotSubsequent)); + suiteOfTests->addTest(new CppUnit::TestCaller("CCategoryExamplesCollectorTest::testExamples", + &CCategoryExamplesCollectorTest::testExamples)); + suiteOfTests->addTest(new CppUnit::TestCaller("CCategoryExamplesCollectorTest::testPersist", + &CCategoryExamplesCollectorTest::testPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller("CCategoryExamplesCollectorTest::testTruncation", + &CCategoryExamplesCollectorTest::testTruncation)); return suiteOfTests; } diff --git a/lib/api/unittest/CCategoryExamplesCollectorTest.h b/lib/api/unittest/CCategoryExamplesCollectorTest.h index 5513038353..1c698a33b5 100644 --- a/lib/api/unittest/CCategoryExamplesCollectorTest.h +++ b/lib/api/unittest/CCategoryExamplesCollectorTest.h @@ -8,20 +8,17 @@ #include +class CCategoryExamplesCollectorTest : public CppUnit::TestFixture { +public: + void testAddGivenMaxExamplesIsZero(); + void testAddGivenSameCategoryExamplePairAddedTwice(); + void testAddGivenMoreThanMaxExamplesAreAddedForSameCategory(); + void testAddGivenCategoryAddedIsNotSubsequent(); + void testExamples(); + void testPersist(); + void testTruncation(); -class CCategoryExamplesCollectorTest : public CppUnit::TestFixture -{ - public: - void testAddGivenMaxExamplesIsZero(); - void testAddGivenSameCategoryExamplePairAddedTwice(); - void testAddGivenMoreThanMaxExamplesAreAddedForSameCategory(); - void testAddGivenCategoryAddedIsNotSubsequent(); - void testExamples(); - void testPersist(); - void testTruncation(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CCategoryExamplesCollectorTest_h - diff --git a/lib/api/unittest/CConfigUpdaterTest.cc b/lib/api/unittest/CConfigUpdaterTest.cc index 80fecbba4b..0615ed9185 100644 --- a/lib/api/unittest/CConfigUpdaterTest.cc +++ b/lib/api/unittest/CConfigUpdaterTest.cc @@ -16,55 +16,43 @@ #include #include - using namespace ml; using namespace api; -CppUnit::Test *CConfigUpdaterTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CConfigUpdaterTest"); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CConfigUpdaterTest::testUpdateGivenUpdateCannotBeParsed", - &CConfigUpdaterTest::testUpdateGivenUpdateCannotBeParsed) ); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CConfigUpdaterTest::testUpdateGivenUnknownStanzas", - &CConfigUpdaterTest::testUpdateGivenUnknownStanzas) ); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CConfigUpdaterTest::testUpdateGivenModelPlotConfig", - &CConfigUpdaterTest::testUpdateGivenModelPlotConfig) ); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CConfigUpdaterTest::testUpdateGivenDetectorRules", - &CConfigUpdaterTest::testUpdateGivenDetectorRules) ); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CConfigUpdaterTest::testUpdateGivenRulesWithInvalidDetectorIndex", - &CConfigUpdaterTest::testUpdateGivenRulesWithInvalidDetectorIndex) ); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CConfigUpdaterTest::testUpdateGivenFilters", - &CConfigUpdaterTest::testUpdateGivenFilters) ); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CConfigUpdaterTest::testUpdateGivenScheduledEvents", - &CConfigUpdaterTest::testUpdateGivenScheduledEvents) ); +CppUnit::Test* CConfigUpdaterTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CConfigUpdaterTest"); + suiteOfTests->addTest(new CppUnit::TestCaller("CConfigUpdaterTest::testUpdateGivenUpdateCannotBeParsed", + &CConfigUpdaterTest::testUpdateGivenUpdateCannotBeParsed)); + suiteOfTests->addTest(new CppUnit::TestCaller("CConfigUpdaterTest::testUpdateGivenUnknownStanzas", + &CConfigUpdaterTest::testUpdateGivenUnknownStanzas)); + suiteOfTests->addTest(new CppUnit::TestCaller("CConfigUpdaterTest::testUpdateGivenModelPlotConfig", + &CConfigUpdaterTest::testUpdateGivenModelPlotConfig)); + suiteOfTests->addTest(new CppUnit::TestCaller("CConfigUpdaterTest::testUpdateGivenDetectorRules", + &CConfigUpdaterTest::testUpdateGivenDetectorRules)); + suiteOfTests->addTest(new CppUnit::TestCaller("CConfigUpdaterTest::testUpdateGivenRulesWithInvalidDetectorIndex", + &CConfigUpdaterTest::testUpdateGivenRulesWithInvalidDetectorIndex)); + suiteOfTests->addTest(new CppUnit::TestCaller("CConfigUpdaterTest::testUpdateGivenFilters", + &CConfigUpdaterTest::testUpdateGivenFilters)); + suiteOfTests->addTest(new CppUnit::TestCaller("CConfigUpdaterTest::testUpdateGivenScheduledEvents", + &CConfigUpdaterTest::testUpdateGivenScheduledEvents)); return suiteOfTests; } -void CConfigUpdaterTest::testUpdateGivenUpdateCannotBeParsed() -{ +void CConfigUpdaterTest::testUpdateGivenUpdateCannotBeParsed() { CFieldConfig fieldConfig; model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(); CConfigUpdater configUpdater(fieldConfig, modelConfig); CPPUNIT_ASSERT(configUpdater.update("this is invalid") == false); } -void CConfigUpdaterTest::testUpdateGivenUnknownStanzas() -{ +void CConfigUpdaterTest::testUpdateGivenUnknownStanzas() { CFieldConfig fieldConfig; model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(); CConfigUpdater configUpdater(fieldConfig, modelConfig); CPPUNIT_ASSERT(configUpdater.update("[unknown1]\na = 1\n[unknown2]\nb = 2\n") == false); } -void CConfigUpdaterTest::testUpdateGivenModelPlotConfig() -{ +void CConfigUpdaterTest::testUpdateGivenModelPlotConfig() { using TStrSet = model::CAnomalyDetectorModelConfig::TStrSet; CFieldConfig fieldConfig; @@ -88,8 +76,7 @@ void CConfigUpdaterTest::testUpdateGivenModelPlotConfig() CPPUNIT_ASSERT(terms.find(std::string("d")) != terms.end()); } -void CConfigUpdaterTest::testUpdateGivenDetectorRules() -{ +void CConfigUpdaterTest::testUpdateGivenDetectorRules() { CFieldConfig fieldConfig; std::string originalRules0("[{\"actions\":[\"filter_results\"],\"conditions_connective\":\"or\","); originalRules0 += "\"conditions\":[{\"type\":\"numerical_actual\",\"condition\":{\"operator\":\"lt\",\"value\":\"5\"}}]}]"; @@ -101,7 +88,9 @@ void CConfigUpdaterTest::testUpdateGivenDetectorRules() model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(); std::string configUpdate0("[detectorRules]\ndetectorIndex = 0\nrulesJson = []\n"); - std::string configUpdate1("[detectorRules]\ndetectorIndex = 1\nrulesJson = [{\"actions\":[\"filter_results\"],\"conditions_connective\":\"or\",\"conditions\":[{\"type\":\"numerical_typical\",\"condition\":{\"operator\":\"lt\",\"value\":\"15\"}}]}]"); + std::string configUpdate1("[detectorRules]\ndetectorIndex = 1\nrulesJson = " + "[{\"actions\":[\"filter_results\"],\"conditions_connective\":\"or\",\"conditions\":[{\"type\":\"numerical_" + "typical\",\"condition\":{\"operator\":\"lt\",\"value\":\"15\"}}]}]"); CConfigUpdater configUpdater(fieldConfig, modelConfig); @@ -115,8 +104,7 @@ void CConfigUpdaterTest::testUpdateGivenDetectorRules() CPPUNIT_ASSERT_EQUAL(std::string("FILTER_RESULTS IF TYPICAL < 15.000000"), itr->second[0].print()); } -void CConfigUpdaterTest::testUpdateGivenRulesWithInvalidDetectorIndex() -{ +void CConfigUpdaterTest::testUpdateGivenRulesWithInvalidDetectorIndex() { CFieldConfig fieldConfig; std::string originalRules("[{\"actions\":[\"filter_results\"],\"conditions_connective\":\"or\","); originalRules += "\"conditions\":[{\"type\":\"numerical_actual\",\"condition\":{\"operator\":\"lt\",\"value\":\"5\"}}]}]"; @@ -131,8 +119,7 @@ void CConfigUpdaterTest::testUpdateGivenRulesWithInvalidDetectorIndex() CPPUNIT_ASSERT(configUpdater.update(configUpdate) == false); } -void CConfigUpdaterTest::testUpdateGivenFilters() -{ +void CConfigUpdaterTest::testUpdateGivenFilters() { CFieldConfig fieldConfig; fieldConfig.processFilter("filter.filter_1", "[\"aaa\",\"bbb\"]"); fieldConfig.processFilter("filter.filter_2", "[\"ccc\",\"ddd\"]"); @@ -181,14 +168,13 @@ void CConfigUpdaterTest::testUpdateGivenFilters() CPPUNIT_ASSERT(ruleFilters["filter_3"].contains("new")); } -void CConfigUpdaterTest::testUpdateGivenScheduledEvents() -{ +void CConfigUpdaterTest::testUpdateGivenScheduledEvents() { std::string validRule1 = "[{\"actions\":[\"filter_results\",\"skip_sampling\"],\"conditions_connective\":\"and\"," - "\"conditions\":[{\"type\":\"time\",\"condition\":{\"operator\":\"gte\",\"value\":\"1\"}}," - "{\"type\":\"time\",\"condition\":{\"operator\":\"lt\",\"value\":\"2\"}}]}]"; + "\"conditions\":[{\"type\":\"time\",\"condition\":{\"operator\":\"gte\",\"value\":\"1\"}}," + "{\"type\":\"time\",\"condition\":{\"operator\":\"lt\",\"value\":\"2\"}}]}]"; std::string validRule2 = "[{\"actions\":[\"filter_results\",\"skip_sampling\"],\"conditions_connective\":\"and\"," - "\"conditions\":[{\"type\":\"time\",\"condition\":{\"operator\":\"gte\",\"value\":\"3\"}}," - "{\"type\":\"time\",\"condition\":{\"operator\":\"lt\",\"value\":\"4\"}}]}]"; + "\"conditions\":[{\"type\":\"time\",\"condition\":{\"operator\":\"gte\",\"value\":\"3\"}}," + "{\"type\":\"time\",\"condition\":{\"operator\":\"lt\",\"value\":\"4\"}}]}]"; CFieldConfig fieldConfig; @@ -201,12 +187,14 @@ void CConfigUpdaterTest::testUpdateGivenScheduledEvents() propTree.put(boost::property_tree::ptree::path_type("scheduledevent.1.rules", '\t'), validRule2); fieldConfig.updateScheduledEvents(propTree); - const auto &events = fieldConfig.scheduledEvents(); + const auto& events = fieldConfig.scheduledEvents(); CPPUNIT_ASSERT_EQUAL(std::size_t(2), events.size()); CPPUNIT_ASSERT_EQUAL(std::string("old_event_1"), events[0].first); - CPPUNIT_ASSERT_EQUAL(std::string("FILTER_RESULTS AND SKIP_SAMPLING IF TIME >= 1.000000 AND TIME < 2.000000"), events[0].second.print()); + CPPUNIT_ASSERT_EQUAL(std::string("FILTER_RESULTS AND SKIP_SAMPLING IF TIME >= 1.000000 AND TIME < 2.000000"), + events[0].second.print()); CPPUNIT_ASSERT_EQUAL(std::string("old_event_2"), events[1].first); - CPPUNIT_ASSERT_EQUAL(std::string("FILTER_RESULTS AND SKIP_SAMPLING IF TIME >= 3.000000 AND TIME < 4.000000"), events[1].second.print()); + CPPUNIT_ASSERT_EQUAL(std::string("FILTER_RESULTS AND SKIP_SAMPLING IF TIME >= 3.000000 AND TIME < 4.000000"), + events[1].second.print()); } model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(); @@ -215,31 +203,38 @@ void CConfigUpdaterTest::testUpdateGivenScheduledEvents() // Test an update that replaces the events { std::stringstream configUpdate; - configUpdate << "[scheduledEvents]" << "\n"; - configUpdate << "scheduledevent.0.description = new_event_1" << "\n"; + configUpdate << "[scheduledEvents]" + << "\n"; + configUpdate << "scheduledevent.0.description = new_event_1" + << "\n"; configUpdate << "scheduledevent.0.rules = " << validRule2 << "\n"; - configUpdate << "scheduledevent.1.description = new_event_2" << "\n"; + configUpdate << "scheduledevent.1.description = new_event_2" + << "\n"; configUpdate << "scheduledevent.1.rules = " << validRule1 << "\n"; CPPUNIT_ASSERT(configUpdater.update(configUpdate.str())); - const auto &events = fieldConfig.scheduledEvents(); + const auto& events = fieldConfig.scheduledEvents(); CPPUNIT_ASSERT_EQUAL(std::size_t(2), events.size()); CPPUNIT_ASSERT_EQUAL(std::string("new_event_1"), events[0].first); - CPPUNIT_ASSERT_EQUAL(std::string("FILTER_RESULTS AND SKIP_SAMPLING IF TIME >= 3.000000 AND TIME < 4.000000"), events[0].second.print()); + CPPUNIT_ASSERT_EQUAL(std::string("FILTER_RESULTS AND SKIP_SAMPLING IF TIME >= 3.000000 AND TIME < 4.000000"), + events[0].second.print()); CPPUNIT_ASSERT_EQUAL(std::string("new_event_2"), events[1].first); - CPPUNIT_ASSERT_EQUAL(std::string("FILTER_RESULTS AND SKIP_SAMPLING IF TIME >= 1.000000 AND TIME < 2.000000"), events[1].second.print()); + CPPUNIT_ASSERT_EQUAL(std::string("FILTER_RESULTS AND SKIP_SAMPLING IF TIME >= 1.000000 AND TIME < 2.000000"), + events[1].second.print()); } // Now test an update that clears the events { std::stringstream configUpdate; - configUpdate << "[scheduledEvents]" << "\n"; - configUpdate << "clear = true" << "\n"; + configUpdate << "[scheduledEvents]" + << "\n"; + configUpdate << "clear = true" + << "\n"; CPPUNIT_ASSERT(configUpdater.update(configUpdate.str())); - const auto &events = fieldConfig.scheduledEvents(); + const auto& events = fieldConfig.scheduledEvents(); CPPUNIT_ASSERT(events.empty()); } } diff --git a/lib/api/unittest/CConfigUpdaterTest.h b/lib/api/unittest/CConfigUpdaterTest.h index 13cf3ba3e0..a4f464e5ae 100644 --- a/lib/api/unittest/CConfigUpdaterTest.h +++ b/lib/api/unittest/CConfigUpdaterTest.h @@ -8,20 +8,17 @@ #include +class CConfigUpdaterTest : public CppUnit::TestFixture { +public: + void testUpdateGivenUpdateCannotBeParsed(); + void testUpdateGivenUnknownStanzas(); + void testUpdateGivenModelPlotConfig(); + void testUpdateGivenDetectorRules(); + void testUpdateGivenRulesWithInvalidDetectorIndex(); + void testUpdateGivenFilters(); + void testUpdateGivenScheduledEvents(); -class CConfigUpdaterTest : public CppUnit::TestFixture -{ - public: - void testUpdateGivenUpdateCannotBeParsed(); - void testUpdateGivenUnknownStanzas(); - void testUpdateGivenModelPlotConfig(); - void testUpdateGivenDetectorRules(); - void testUpdateGivenRulesWithInvalidDetectorIndex(); - void testUpdateGivenFilters(); - void testUpdateGivenScheduledEvents(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CConfigUpdaterTest_h - diff --git a/lib/api/unittest/CCsvInputParserTest.cc b/lib/api/unittest/CCsvInputParserTest.cc index 025c6ddbda..f3e12478d5 100644 --- a/lib/api/unittest/CCsvInputParserTest.cc +++ b/lib/api/unittest/CCsvInputParserTest.cc @@ -6,10 +6,10 @@ #include "CCsvInputParserTest.h" #include -#include #include #include #include +#include #include @@ -20,210 +20,152 @@ #include #include - -CppUnit::Test *CCsvInputParserTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CCsvInputParserTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCsvInputParserTest::testSimpleDelims", - &CCsvInputParserTest::testSimpleDelims) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCsvInputParserTest::testComplexDelims", - &CCsvInputParserTest::testComplexDelims) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCsvInputParserTest::testThroughput", - &CCsvInputParserTest::testThroughput) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCsvInputParserTest::testDateParse", - &CCsvInputParserTest::testDateParse) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCsvInputParserTest::testQuoteParsing", - &CCsvInputParserTest::testQuoteParsing) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCsvInputParserTest::testLineParser", - &CCsvInputParserTest::testLineParser) ); +CppUnit::Test* CCsvInputParserTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CCsvInputParserTest"); + + suiteOfTests->addTest( + new CppUnit::TestCaller("CCsvInputParserTest::testSimpleDelims", &CCsvInputParserTest::testSimpleDelims)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CCsvInputParserTest::testComplexDelims", &CCsvInputParserTest::testComplexDelims)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CCsvInputParserTest::testThroughput", &CCsvInputParserTest::testThroughput)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CCsvInputParserTest::testDateParse", &CCsvInputParserTest::testDateParse)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CCsvInputParserTest::testQuoteParsing", &CCsvInputParserTest::testQuoteParsing)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CCsvInputParserTest::testLineParser", &CCsvInputParserTest::testLineParser)); return suiteOfTests; } -namespace -{ +namespace { +class CVisitor { +public: + CVisitor() : m_Fast(true), m_RecordCount(0) {} -class CVisitor -{ - public: - CVisitor() - : m_Fast(true), - m_RecordCount(0) - { - } + CVisitor(const ml::api::CCsvInputParser::TStrVec& expectedFieldNames) + : m_Fast(false), m_RecordCount(0), m_ExpectedFieldNames(expectedFieldNames) {} - CVisitor(const ml::api::CCsvInputParser::TStrVec &expectedFieldNames) - : m_Fast(false), - m_RecordCount(0), - m_ExpectedFieldNames(expectedFieldNames) - { - } - - //! Handle a record - bool operator()(const ml::api::CCsvInputParser::TStrStrUMap &dataRowFields) - { - ++m_RecordCount; - - // For the throughput test, the assertions below will skew the - // results, so bypass them - if (m_Fast) - { - return true; - } - - // Check the field names - for (const auto &entry : dataRowFields) - { - auto iter = std::find(m_ExpectedFieldNames.begin(), m_ExpectedFieldNames.end(), entry.first); - CPPUNIT_ASSERT(iter != m_ExpectedFieldNames.end()); - } - - CPPUNIT_ASSERT_EQUAL(m_ExpectedFieldNames.size(), dataRowFields.size()); - - // Check the line count is consistent with the _raw field - ml::api::CCsvInputParser::TStrStrUMapCItr rawIter = dataRowFields.find("_raw"); - CPPUNIT_ASSERT(rawIter != dataRowFields.end()); - ml::api::CCsvInputParser::TStrStrUMapCItr lineCountIter = dataRowFields.find("linecount"); - CPPUNIT_ASSERT(lineCountIter != dataRowFields.end()); - - size_t expectedLineCount(1 + std::count(rawIter->second.begin(), - rawIter->second.end(), - '\n')); - size_t lineCount(0); - CPPUNIT_ASSERT(ml::core::CStringUtils::stringToType(lineCountIter->second, lineCount)); - CPPUNIT_ASSERT_EQUAL(expectedLineCount, lineCount); + //! Handle a record + bool operator()(const ml::api::CCsvInputParser::TStrStrUMap& dataRowFields) { + ++m_RecordCount; + // For the throughput test, the assertions below will skew the + // results, so bypass them + if (m_Fast) { return true; } - size_t recordCount() const - { - return m_RecordCount; + // Check the field names + for (const auto& entry : dataRowFields) { + auto iter = std::find(m_ExpectedFieldNames.begin(), m_ExpectedFieldNames.end(), entry.first); + CPPUNIT_ASSERT(iter != m_ExpectedFieldNames.end()); } - private: - bool m_Fast; - size_t m_RecordCount; - ml::api::CCsvInputParser::TStrVec m_ExpectedFieldNames; -}; + CPPUNIT_ASSERT_EQUAL(m_ExpectedFieldNames.size(), dataRowFields.size()); -class CTimeCheckingVisitor -{ - public: - using TTimeVec = std::vector; - - public: - CTimeCheckingVisitor(const std::string &timeField, - const std::string &timeFormat, - const TTimeVec &expectedTimes) - : m_RecordCount(0), - m_TimeField(timeField), - m_TimeFormat(timeFormat), - m_ExpectedTimes(expectedTimes) - { - } + // Check the line count is consistent with the _raw field + ml::api::CCsvInputParser::TStrStrUMapCItr rawIter = dataRowFields.find("_raw"); + CPPUNIT_ASSERT(rawIter != dataRowFields.end()); + ml::api::CCsvInputParser::TStrStrUMapCItr lineCountIter = dataRowFields.find("linecount"); + CPPUNIT_ASSERT(lineCountIter != dataRowFields.end()); - //! Handle a record - bool operator()(const ml::api::CCsvInputParser::TStrStrUMap &dataRowFields) - { - // Check the time field exists - CPPUNIT_ASSERT(m_RecordCount < m_ExpectedTimes.size()); - - auto iter = dataRowFields.find(m_TimeField); - CPPUNIT_ASSERT(iter != dataRowFields.end()); - - // Now check the actual time - ml::api::CCsvInputParser::TStrStrUMapCItr fieldIter = dataRowFields.find(m_TimeField); - CPPUNIT_ASSERT(fieldIter != dataRowFields.end()); - ml::core_t::TTime timeVal(0); - if (m_TimeFormat.empty()) - { - CPPUNIT_ASSERT(ml::core::CStringUtils::stringToType(fieldIter->second, - timeVal)); - } - else - { - CPPUNIT_ASSERT(ml::core::CTimeUtils::strptime(m_TimeFormat, - fieldIter->second, - timeVal)); - LOG_DEBUG("Converted " << fieldIter->second << - " to " << timeVal << - " using format " << m_TimeFormat); - } - CPPUNIT_ASSERT_EQUAL(m_ExpectedTimes[m_RecordCount], timeVal); - - ++m_RecordCount; + size_t expectedLineCount(1 + std::count(rawIter->second.begin(), rawIter->second.end(), '\n')); + size_t lineCount(0); + CPPUNIT_ASSERT(ml::core::CStringUtils::stringToType(lineCountIter->second, lineCount)); + CPPUNIT_ASSERT_EQUAL(expectedLineCount, lineCount); - return true; - } + return true; + } - size_t recordCount() const - { - return m_RecordCount; - } + size_t recordCount() const { return m_RecordCount; } - private: - size_t m_RecordCount; - std::string m_TimeField; - std::string m_TimeFormat; - TTimeVec m_ExpectedTimes; +private: + bool m_Fast; + size_t m_RecordCount; + ml::api::CCsvInputParser::TStrVec m_ExpectedFieldNames; }; -class CQuoteCheckingVisitor -{ - public: - CQuoteCheckingVisitor() - : m_RecordCount(0) - { +class CTimeCheckingVisitor { +public: + using TTimeVec = std::vector; + +public: + CTimeCheckingVisitor(const std::string& timeField, const std::string& timeFormat, const TTimeVec& expectedTimes) + : m_RecordCount(0), m_TimeField(timeField), m_TimeFormat(timeFormat), m_ExpectedTimes(expectedTimes) {} + + //! Handle a record + bool operator()(const ml::api::CCsvInputParser::TStrStrUMap& dataRowFields) { + // Check the time field exists + CPPUNIT_ASSERT(m_RecordCount < m_ExpectedTimes.size()); + + auto iter = dataRowFields.find(m_TimeField); + CPPUNIT_ASSERT(iter != dataRowFields.end()); + + // Now check the actual time + ml::api::CCsvInputParser::TStrStrUMapCItr fieldIter = dataRowFields.find(m_TimeField); + CPPUNIT_ASSERT(fieldIter != dataRowFields.end()); + ml::core_t::TTime timeVal(0); + if (m_TimeFormat.empty()) { + CPPUNIT_ASSERT(ml::core::CStringUtils::stringToType(fieldIter->second, timeVal)); + } else { + CPPUNIT_ASSERT(ml::core::CTimeUtils::strptime(m_TimeFormat, fieldIter->second, timeVal)); + LOG_DEBUG("Converted " << fieldIter->second << " to " << timeVal << " using format " << m_TimeFormat); } + CPPUNIT_ASSERT_EQUAL(m_ExpectedTimes[m_RecordCount], timeVal); + + ++m_RecordCount; + + return true; + } - //! Handle a record - bool operator()(const ml::api::CCsvInputParser::TStrStrUMap &dataRowFields) - { - // Now check quoted fields - ml::api::CCsvInputParser::TStrStrUMapCItr fieldIter = dataRowFields.find("q1"); - CPPUNIT_ASSERT(fieldIter != dataRowFields.end()); - CPPUNIT_ASSERT_EQUAL(std::string(""), fieldIter->second); + size_t recordCount() const { return m_RecordCount; } - fieldIter = dataRowFields.find("q2"); - CPPUNIT_ASSERT(fieldIter != dataRowFields.end()); - CPPUNIT_ASSERT_EQUAL(std::string(""), fieldIter->second); +private: + size_t m_RecordCount; + std::string m_TimeField; + std::string m_TimeFormat; + TTimeVec m_ExpectedTimes; +}; - fieldIter = dataRowFields.find("q3"); - CPPUNIT_ASSERT(fieldIter != dataRowFields.end()); - CPPUNIT_ASSERT_EQUAL(std::string("\""), fieldIter->second); +class CQuoteCheckingVisitor { +public: + CQuoteCheckingVisitor() : m_RecordCount(0) {} - fieldIter = dataRowFields.find("q4"); - CPPUNIT_ASSERT(fieldIter != dataRowFields.end()); - CPPUNIT_ASSERT_EQUAL(std::string("\"\""), fieldIter->second); + //! Handle a record + bool operator()(const ml::api::CCsvInputParser::TStrStrUMap& dataRowFields) { + // Now check quoted fields + ml::api::CCsvInputParser::TStrStrUMapCItr fieldIter = dataRowFields.find("q1"); + CPPUNIT_ASSERT(fieldIter != dataRowFields.end()); + CPPUNIT_ASSERT_EQUAL(std::string(""), fieldIter->second); - ++m_RecordCount; + fieldIter = dataRowFields.find("q2"); + CPPUNIT_ASSERT(fieldIter != dataRowFields.end()); + CPPUNIT_ASSERT_EQUAL(std::string(""), fieldIter->second); - return true; - } + fieldIter = dataRowFields.find("q3"); + CPPUNIT_ASSERT(fieldIter != dataRowFields.end()); + CPPUNIT_ASSERT_EQUAL(std::string("\""), fieldIter->second); - size_t recordCount() const - { - return m_RecordCount; - } + fieldIter = dataRowFields.find("q4"); + CPPUNIT_ASSERT(fieldIter != dataRowFields.end()); + CPPUNIT_ASSERT_EQUAL(std::string("\"\""), fieldIter->second); - private: - size_t m_RecordCount; -}; + ++m_RecordCount; + + return true; + } + size_t recordCount() const { return m_RecordCount; } +private: + size_t m_RecordCount; +}; } -void CCsvInputParserTest::testSimpleDelims() -{ +void CCsvInputParserTest::testSimpleDelims() { std::ifstream simpleStrm("testfiles/simple.txt"); CPPUNIT_ASSERT(simpleStrm.is_open()); @@ -264,8 +206,7 @@ void CCsvInputParserTest::testSimpleDelims() CPPUNIT_ASSERT_EQUAL(size_t(15), visitor.recordCount()); } -void CCsvInputParserTest::testComplexDelims() -{ +void CCsvInputParserTest::testComplexDelims() { std::ifstream complexStrm("testfiles/complex.txt"); CPPUNIT_ASSERT(complexStrm.is_open()); @@ -304,26 +245,22 @@ void CCsvInputParserTest::testComplexDelims() CPPUNIT_ASSERT(parser.readStream(std::ref(visitor))); } -void CCsvInputParserTest::testThroughput() -{ +void CCsvInputParserTest::testThroughput() { std::ifstream ifs("testfiles/simple.txt"); CPPUNIT_ASSERT(ifs.is_open()); std::string line; std::string header; - if (std::getline(ifs, line).good()) - { + if (std::getline(ifs, line).good()) { header = line; header += '\n'; } std::string restOfFile; size_t nonHeaderLines(0); - while (std::getline(ifs, line).good()) - { - if (line.empty()) - { + while (std::getline(ifs, line).good()) { + if (line.empty()) { break; } ++nonHeaderLines; @@ -338,8 +275,7 @@ void CCsvInputParserTest::testThroughput() // Construct a large test input static const size_t TEST_SIZE(10000); std::string input(header); - for (size_t count = 0; count < TEST_SIZE; ++count) - { + for (size_t count = 0; count < TEST_SIZE; ++count) { input += restOfFile; } LOG_DEBUG("Input size is " << input.length()); @@ -349,58 +285,25 @@ void CCsvInputParserTest::testThroughput() CVisitor visitor; ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO("Starting throughput test at " << - ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO("Starting throughput test at " << ml::core::CTimeUtils::toTimeString(start)); CPPUNIT_ASSERT(parser.readStream(std::ref(visitor))); ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO("Finished throughput test at " << - ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO("Finished throughput test at " << ml::core::CTimeUtils::toTimeString(end)); CPPUNIT_ASSERT_EQUAL(recordsPerBlock * TEST_SIZE, visitor.recordCount()); - LOG_INFO("Parsing " << visitor.recordCount() << - " records took " << (end - start) << " seconds"); + LOG_INFO("Parsing " << visitor.recordCount() << " records took " << (end - start) << " seconds"); } -void CCsvInputParserTest::testDateParse() -{ +void CCsvInputParserTest::testDateParse() { static const ml::core_t::TTime EXPECTED_TIMES[] = { - 1359331200, - 1359331200, - 1359331207, - 1359331220, - 1359331259, - 1359331262, - 1359331269, - 1359331270, - 1359331272, - 1359331296, - 1359331301, - 1359331311, - 1359331314, - 1359331315, - 1359331316, - 1359331321, - 1359331328, - 1359331333, - 1359331349, - 1359331352, - 1359331370, - 1359331382, - 1359331385, - 1359331386, - 1359331395, - 1359331404, - 1359331416, - 1359331416, - 1359331424, - 1359331429 - }; - - CTimeCheckingVisitor::TTimeVec expectedTimes(boost::begin(EXPECTED_TIMES), - boost::end(EXPECTED_TIMES)); + 1359331200, 1359331200, 1359331207, 1359331220, 1359331259, 1359331262, 1359331269, 1359331270, 1359331272, 1359331296, + 1359331301, 1359331311, 1359331314, 1359331315, 1359331316, 1359331321, 1359331328, 1359331333, 1359331349, 1359331352, + 1359331370, 1359331382, 1359331385, 1359331386, 1359331395, 1359331404, 1359331416, 1359331416, 1359331424, 1359331429}; + + CTimeCheckingVisitor::TTimeVec expectedTimes(boost::begin(EXPECTED_TIMES), boost::end(EXPECTED_TIMES)); // Ensure we are in UK timewise CPPUNIT_ASSERT(ml::core::CTimezone::setTimezone("Europe/London")); @@ -409,9 +312,7 @@ void CCsvInputParserTest::testDateParse() std::ifstream csvStrm("testfiles/s.csv"); CPPUNIT_ASSERT(csvStrm.is_open()); - CTimeCheckingVisitor visitor("time", - "", - expectedTimes); + CTimeCheckingVisitor visitor("time", "", expectedTimes); ml::api::CCsvInputParser parser(csvStrm); @@ -421,9 +322,7 @@ void CCsvInputParserTest::testDateParse() std::ifstream csvStrm("testfiles/bdYIMSp.csv"); CPPUNIT_ASSERT(csvStrm.is_open()); - CTimeCheckingVisitor visitor("date", - "%b %d %Y %I:%M:%S %p", - expectedTimes); + CTimeCheckingVisitor visitor("date", "%b %d %Y %I:%M:%S %p", expectedTimes); ml::api::CCsvInputParser parser(csvStrm); @@ -433,9 +332,7 @@ void CCsvInputParserTest::testDateParse() std::ifstream csvStrm("testfiles/YmdHMS.csv"); CPPUNIT_ASSERT(csvStrm.is_open()); - CTimeCheckingVisitor visitor("time", - "%Y-%m-%d %H:%M:%S", - expectedTimes); + CTimeCheckingVisitor visitor("time", "%Y-%m-%d %H:%M:%S", expectedTimes); ml::api::CCsvInputParser parser(csvStrm); @@ -445,9 +342,7 @@ void CCsvInputParserTest::testDateParse() std::ifstream csvStrm("testfiles/YmdHMSZ_GMT.csv"); CPPUNIT_ASSERT(csvStrm.is_open()); - CTimeCheckingVisitor visitor("mytime", - "%Y-%m-%d %H:%M:%S %Z", - expectedTimes); + CTimeCheckingVisitor visitor("mytime", "%Y-%m-%d %H:%M:%S %Z", expectedTimes); ml::api::CCsvInputParser parser(csvStrm); @@ -461,9 +356,7 @@ void CCsvInputParserTest::testDateParse() std::ifstream csvStrm("testfiles/YmdHMSZ_EST.csv"); CPPUNIT_ASSERT(csvStrm.is_open()); - CTimeCheckingVisitor visitor("datetime", - "%Y-%m-%d %H:%M:%S %Z", - expectedTimes); + CTimeCheckingVisitor visitor("datetime", "%Y-%m-%d %H:%M:%S %Z", expectedTimes); ml::api::CCsvInputParser parser(csvStrm); @@ -475,17 +368,14 @@ void CCsvInputParserTest::testDateParse() CPPUNIT_ASSERT(ml::core::CTimezone::setTimezone("")); } -void CCsvInputParserTest::testQuoteParsing() -{ +void CCsvInputParserTest::testQuoteParsing() { // Expect: // q1 = // q2 = // q3 = " // q4 = "" - std::string input( - "b,q1,q2,q3,q4,e\n" - "x,,\"\",\"\"\"\",\"\"\"\"\"\",x\n" - ); + std::string input("b,q1,q2,q3,q4,e\n" + "x,,\"\",\"\"\"\",\"\"\"\"\"\",x\n"); ml::api::CCsvInputParser parser(input); @@ -496,8 +386,7 @@ void CCsvInputParserTest::testQuoteParsing() CPPUNIT_ASSERT_EQUAL(size_t(1), visitor.recordCount()); } -void CCsvInputParserTest::testLineParser() -{ +void CCsvInputParserTest::testLineParser() { ml::api::CCsvInputParser::CCsvLineParser lineParser; std::string token; @@ -563,4 +452,3 @@ void CCsvInputParserTest::testLineParser() CPPUNIT_ASSERT(!lineParser.parseNext(token)); } } - diff --git a/lib/api/unittest/CCsvInputParserTest.h b/lib/api/unittest/CCsvInputParserTest.h index fcf16ce80d..417bf26412 100644 --- a/lib/api/unittest/CCsvInputParserTest.h +++ b/lib/api/unittest/CCsvInputParserTest.h @@ -8,19 +8,16 @@ #include +class CCsvInputParserTest : public CppUnit::TestFixture { +public: + void testSimpleDelims(); + void testComplexDelims(); + void testThroughput(); + void testDateParse(); + void testQuoteParsing(); + void testLineParser(); -class CCsvInputParserTest : public CppUnit::TestFixture -{ - public: - void testSimpleDelims(); - void testComplexDelims(); - void testThroughput(); - void testDateParse(); - void testQuoteParsing(); - void testLineParser(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CCsvInputParserTest_h - diff --git a/lib/api/unittest/CCsvOutputWriterTest.cc b/lib/api/unittest/CCsvOutputWriterTest.cc index 09d3b04921..1054161a72 100644 --- a/lib/api/unittest/CCsvOutputWriterTest.cc +++ b/lib/api/unittest/CCsvOutputWriterTest.cc @@ -15,32 +15,23 @@ #include #include - -CppUnit::Test *CCsvOutputWriterTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CCsvOutputWriterTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCsvOutputWriterTest::testAdd", - &CCsvOutputWriterTest::testAdd) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCsvOutputWriterTest::testOverwrite", - &CCsvOutputWriterTest::testOverwrite) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCsvOutputWriterTest::testThroughput", - &CCsvOutputWriterTest::testThroughput) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCsvOutputWriterTest::testExcelQuoting", - &CCsvOutputWriterTest::testExcelQuoting) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCsvOutputWriterTest::testNonExcelQuoting", - &CCsvOutputWriterTest::testNonExcelQuoting) ); +CppUnit::Test* CCsvOutputWriterTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CCsvOutputWriterTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CCsvOutputWriterTest::testAdd", &CCsvOutputWriterTest::testAdd)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CCsvOutputWriterTest::testOverwrite", &CCsvOutputWriterTest::testOverwrite)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CCsvOutputWriterTest::testThroughput", &CCsvOutputWriterTest::testThroughput)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CCsvOutputWriterTest::testExcelQuoting", &CCsvOutputWriterTest::testExcelQuoting)); + suiteOfTests->addTest(new CppUnit::TestCaller("CCsvOutputWriterTest::testNonExcelQuoting", + &CCsvOutputWriterTest::testNonExcelQuoting)); return suiteOfTests; } -void CCsvOutputWriterTest::testAdd() -{ +void CCsvOutputWriterTest::testAdd() { // In this test, the output is the input plus an extra field - no input // fields are changed @@ -116,41 +107,28 @@ void CCsvOutputWriterTest::testAdd() LOG_DEBUG("Output is:\n" << output); - for (ml::api::CCsvOutputWriter::TStrVecCItr iter = fieldNames.begin(); - iter != fieldNames.end(); - ++iter) - { + for (ml::api::CCsvOutputWriter::TStrVecCItr iter = fieldNames.begin(); iter != fieldNames.end(); ++iter) { LOG_DEBUG("Checking output contains '" << *iter << "'"); CPPUNIT_ASSERT(output.find(*iter) != std::string::npos); } - for (ml::api::CCsvOutputWriter::TStrVecCItr iter = mlFieldNames.begin(); - iter != mlFieldNames.end(); - ++iter) - { + for (ml::api::CCsvOutputWriter::TStrVecCItr iter = mlFieldNames.begin(); iter != mlFieldNames.end(); ++iter) { LOG_DEBUG("Checking output contains '" << *iter << "'"); CPPUNIT_ASSERT(output.find(*iter) != std::string::npos); } - for (ml::api::CCsvOutputWriter::TStrStrUMapCItr iter = originalFields.begin(); - iter != originalFields.end(); - ++iter) - { + for (ml::api::CCsvOutputWriter::TStrStrUMapCItr iter = originalFields.begin(); iter != originalFields.end(); ++iter) { LOG_DEBUG("Checking output contains '" << iter->second << "'"); CPPUNIT_ASSERT(output.find(iter->second) != std::string::npos); } - for (ml::api::CCsvOutputWriter::TStrStrUMapCItr iter = mlFields.begin(); - iter != mlFields.end(); - ++iter) - { + for (ml::api::CCsvOutputWriter::TStrStrUMapCItr iter = mlFields.begin(); iter != mlFields.end(); ++iter) { LOG_DEBUG("Checking output contains '" << iter->second << "'"); CPPUNIT_ASSERT(output.find(iter->second) != std::string::npos); } } -void CCsvOutputWriterTest::testOverwrite() -{ +void CCsvOutputWriterTest::testOverwrite() { // In this test, some fields from the input are changed in the output ml::api::CCsvOutputWriter writer; @@ -228,50 +206,34 @@ void CCsvOutputWriterTest::testOverwrite() LOG_DEBUG("Output is:\n" << output); - for (ml::api::CCsvOutputWriter::TStrVecCItr iter = fieldNames.begin(); - iter != fieldNames.end(); - ++iter) - { + for (ml::api::CCsvOutputWriter::TStrVecCItr iter = fieldNames.begin(); iter != fieldNames.end(); ++iter) { LOG_DEBUG("Checking output contains '" << *iter << "'"); CPPUNIT_ASSERT(output.find(*iter) != std::string::npos); } - for (ml::api::CCsvOutputWriter::TStrVecCItr iter = mlFieldNames.begin(); - iter != mlFieldNames.end(); - ++iter) - { + for (ml::api::CCsvOutputWriter::TStrVecCItr iter = mlFieldNames.begin(); iter != mlFieldNames.end(); ++iter) { LOG_DEBUG("Checking output contains '" << *iter << "'"); CPPUNIT_ASSERT(output.find(*iter) != std::string::npos); } - for (ml::api::CCsvOutputWriter::TStrStrUMapCItr iter = originalFields.begin(); - iter != originalFields.end(); - ++iter) - { + for (ml::api::CCsvOutputWriter::TStrStrUMapCItr iter = originalFields.begin(); iter != originalFields.end(); ++iter) { // The Ml fields should override the originals - if (mlFields.find(iter->first) == mlFields.end()) - { + if (mlFields.find(iter->first) == mlFields.end()) { LOG_DEBUG("Checking output contains '" << iter->second << "'"); CPPUNIT_ASSERT(output.find(iter->second) != std::string::npos); - } - else - { + } else { LOG_DEBUG("Checking output does not contain '" << iter->second << "'"); CPPUNIT_ASSERT(output.find(iter->second) == std::string::npos); } } - for (ml::api::CCsvOutputWriter::TStrStrUMapCItr iter = mlFields.begin(); - iter != mlFields.end(); - ++iter) - { + for (ml::api::CCsvOutputWriter::TStrStrUMapCItr iter = mlFields.begin(); iter != mlFields.end(); ++iter) { LOG_DEBUG("Checking output contains '" << iter->second << "'"); CPPUNIT_ASSERT(output.find(iter->second) != std::string::npos); } } -void CCsvOutputWriterTest::testThroughput() -{ +void CCsvOutputWriterTest::testThroughput() { // In this test, some fields from the input are changed in the output // Write to /dev/null (Unix) or nul (Windows) @@ -349,26 +311,21 @@ void CCsvOutputWriterTest::testThroughput() static const size_t TEST_SIZE(75000); ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO("Starting throughput test at " << - ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO("Starting throughput test at " << ml::core::CTimeUtils::toTimeString(start)); CPPUNIT_ASSERT(writer.fieldNames(fieldNames, mlFieldNames)); - for (size_t count = 0; count < TEST_SIZE; ++count) - { + for (size_t count = 0; count < TEST_SIZE; ++count) { CPPUNIT_ASSERT(writer.writeRow(originalFields, mlFields)); } ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO("Finished throughput test at " << - ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO("Finished throughput test at " << ml::core::CTimeUtils::toTimeString(end)); - LOG_INFO("Writing " << TEST_SIZE << - " records took " << (end - start) << " seconds"); + LOG_INFO("Writing " << TEST_SIZE << " records took " << (end - start) << " seconds"); } -void CCsvOutputWriterTest::testExcelQuoting() -{ +void CCsvOutputWriterTest::testExcelQuoting() { ml::api::CCsvOutputWriter writer; ml::api::CCsvOutputWriter::TStrVec fieldNames; @@ -397,30 +354,25 @@ void CCsvOutputWriterTest::testExcelQuoting() LOG_DEBUG("Output is:\n" << output); - CPPUNIT_ASSERT_EQUAL(std::string( - "no_special," - "contains_quote," - "contains_quote_quote," - "contains_separator," - "contains_quote_separator," - "contains_newline," - "contains_quote_newline\n" - "a," - "\"\"\"\"," - "\"\"\"\"\"\"," - "\",\"," - "\"\"\",\"," - "\"\n\"," - "\"\"\"\n\"\n" - ), + CPPUNIT_ASSERT_EQUAL(std::string("no_special," + "contains_quote," + "contains_quote_quote," + "contains_separator," + "contains_quote_separator," + "contains_newline," + "contains_quote_newline\n" + "a," + "\"\"\"\"," + "\"\"\"\"\"\"," + "\",\"," + "\"\"\",\"," + "\"\n\"," + "\"\"\"\n\"\n"), output); } -void CCsvOutputWriterTest::testNonExcelQuoting() -{ - ml::api::CCsvOutputWriter writer(false, - true, - '\\'); +void CCsvOutputWriterTest::testNonExcelQuoting() { + ml::api::CCsvOutputWriter writer(false, true, '\\'); ml::api::CCsvOutputWriter::TStrVec fieldNames; fieldNames.push_back("no_special"); @@ -450,24 +402,21 @@ void CCsvOutputWriterTest::testNonExcelQuoting() LOG_DEBUG("Output is:\n" << output); - CPPUNIT_ASSERT_EQUAL(std::string( - "no_special," - "contains_quote," - "contains_escape," - "contains_escape_quote," - "contains_separator," - "contains_escape_separator," - "contains_newline," - "contains_escape_newline\n" - "a," - "\"\\\"\"," - "\"\\\\\"," - "\"\\\\\\\"\"," - "\",\"," - "\"\\\\,\"," - "\"\n\"," - "\"\\\\\n\"\n" - ), + CPPUNIT_ASSERT_EQUAL(std::string("no_special," + "contains_quote," + "contains_escape," + "contains_escape_quote," + "contains_separator," + "contains_escape_separator," + "contains_newline," + "contains_escape_newline\n" + "a," + "\"\\\"\"," + "\"\\\\\"," + "\"\\\\\\\"\"," + "\",\"," + "\"\\\\,\"," + "\"\n\"," + "\"\\\\\n\"\n"), output); } - diff --git a/lib/api/unittest/CCsvOutputWriterTest.h b/lib/api/unittest/CCsvOutputWriterTest.h index 3b5d956482..fa9881a418 100644 --- a/lib/api/unittest/CCsvOutputWriterTest.h +++ b/lib/api/unittest/CCsvOutputWriterTest.h @@ -8,18 +8,15 @@ #include +class CCsvOutputWriterTest : public CppUnit::TestFixture { +public: + void testAdd(); + void testOverwrite(); + void testThroughput(); + void testExcelQuoting(); + void testNonExcelQuoting(); -class CCsvOutputWriterTest : public CppUnit::TestFixture -{ - public: - void testAdd(); - void testOverwrite(); - void testThroughput(); - void testExcelQuoting(); - void testNonExcelQuoting(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CCsvOutputWriterTest_h - diff --git a/lib/api/unittest/CDetectionRulesJsonParserTest.cc b/lib/api/unittest/CDetectionRulesJsonParserTest.cc index a21467265b..7426267aa3 100644 --- a/lib/api/unittest/CDetectionRulesJsonParserTest.cc +++ b/lib/api/unittest/CDetectionRulesJsonParserTest.cc @@ -15,75 +15,69 @@ using namespace ml; using namespace api; -namespace -{ +namespace { using TStrPatternSetUMap = CDetectionRulesJsonParser::TStrPatternSetUMap; TStrPatternSetUMap EMPTY_VALUE_FILTER_MAP; } -CppUnit::Test *CDetectionRulesJsonParserTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CDetectionRulesJsonParserTest"); +CppUnit::Test* CDetectionRulesJsonParserTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CDetectionRulesJsonParserTest"); suiteOfTests->addTest(new CppUnit::TestCaller( - "CDetectionRulesJsonParserTest::testParseRulesGivenEmptyString", - &CDetectionRulesJsonParserTest::testParseRulesGivenEmptyString)); + "CDetectionRulesJsonParserTest::testParseRulesGivenEmptyString", &CDetectionRulesJsonParserTest::testParseRulesGivenEmptyString)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CDetectionRulesJsonParserTest::testParseRulesGivenEmptyArray", - &CDetectionRulesJsonParserTest::testParseRulesGivenEmptyArray)); + "CDetectionRulesJsonParserTest::testParseRulesGivenEmptyArray", &CDetectionRulesJsonParserTest::testParseRulesGivenEmptyArray)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CDetectionRulesJsonParserTest::testParseRulesGivenArrayContainsStrings", + &CDetectionRulesJsonParserTest::testParseRulesGivenArrayContainsStrings)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CDetectionRulesJsonParserTest::testParseRulesGivenMissingRuleAction", + &CDetectionRulesJsonParserTest::testParseRulesGivenMissingRuleAction)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CDetectionRulesJsonParserTest::testParseRulesGivenRuleActionIsNotArray", + &CDetectionRulesJsonParserTest::testParseRulesGivenRuleActionIsNotArray)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CDetectionRulesJsonParserTest::testParseRulesGivenInvalidRuleAction", + &CDetectionRulesJsonParserTest::testParseRulesGivenInvalidRuleAction)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CDetectionRulesJsonParserTest::testParseRulesGivenArrayContainsStrings", - &CDetectionRulesJsonParserTest::testParseRulesGivenArrayContainsStrings)); + "CDetectionRulesJsonParserTest::testParseRulesGivenMissingConditionsConnective", + &CDetectionRulesJsonParserTest::testParseRulesGivenMissingConditionsConnective)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CDetectionRulesJsonParserTest::testParseRulesGivenMissingRuleAction", - &CDetectionRulesJsonParserTest::testParseRulesGivenMissingRuleAction)); + "CDetectionRulesJsonParserTest::testParseRulesGivenInvalidConditionsConnective", + &CDetectionRulesJsonParserTest::testParseRulesGivenInvalidConditionsConnective)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CDetectionRulesJsonParserTest::testParseRulesGivenMissingRuleConditions", + &CDetectionRulesJsonParserTest::testParseRulesGivenMissingRuleConditions)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CDetectionRulesJsonParserTest::testParseRulesGivenRuleActionIsNotArray", - &CDetectionRulesJsonParserTest::testParseRulesGivenRuleActionIsNotArray)); + "CDetectionRulesJsonParserTest::testParseRulesGivenRuleConditionsIsNotArray", + &CDetectionRulesJsonParserTest::testParseRulesGivenRuleConditionsIsNotArray)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CDetectionRulesJsonParserTest::testParseRulesGivenInvalidRuleAction", - &CDetectionRulesJsonParserTest::testParseRulesGivenInvalidRuleAction)); + "CDetectionRulesJsonParserTest::testParseRulesGivenMissingConditionOperator", + &CDetectionRulesJsonParserTest::testParseRulesGivenMissingConditionOperator)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CDetectionRulesJsonParserTest::testParseRulesGivenMissingConditionsConnective", - &CDetectionRulesJsonParserTest::testParseRulesGivenMissingConditionsConnective)); + "CDetectionRulesJsonParserTest::testParseRulesGivenInvalidConditionOperator", + &CDetectionRulesJsonParserTest::testParseRulesGivenInvalidConditionOperator)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CDetectionRulesJsonParserTest::testParseRulesGivenInvalidConditionsConnective", - &CDetectionRulesJsonParserTest::testParseRulesGivenInvalidConditionsConnective)); + "CDetectionRulesJsonParserTest::testParseRulesGivenNumericalActualRuleWithConnectiveOr", + &CDetectionRulesJsonParserTest::testParseRulesGivenNumericalActualRuleWithConnectiveOr)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CDetectionRulesJsonParserTest::testParseRulesGivenMissingRuleConditions", - &CDetectionRulesJsonParserTest::testParseRulesGivenMissingRuleConditions)); + "CDetectionRulesJsonParserTest::testParseRulesGivenNumericalTypicalAndDiffAbsRuleWithConnectiveAnd", + &CDetectionRulesJsonParserTest::testParseRulesGivenNumericalTypicalAndDiffAbsRuleWithConnectiveAnd)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CDetectionRulesJsonParserTest::testParseRulesGivenMultipleRules", + &CDetectionRulesJsonParserTest::testParseRulesGivenMultipleRules)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CDetectionRulesJsonParserTest::testParseRulesGivenCategoricalRule", + &CDetectionRulesJsonParserTest::testParseRulesGivenCategoricalRule)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CDetectionRulesJsonParserTest::testParseRulesGivenRuleConditionsIsNotArray", - &CDetectionRulesJsonParserTest::testParseRulesGivenRuleConditionsIsNotArray)); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CDetectionRulesJsonParserTest::testParseRulesGivenMissingConditionOperator", - &CDetectionRulesJsonParserTest::testParseRulesGivenMissingConditionOperator)); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CDetectionRulesJsonParserTest::testParseRulesGivenInvalidConditionOperator", - &CDetectionRulesJsonParserTest::testParseRulesGivenInvalidConditionOperator)); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CDetectionRulesJsonParserTest::testParseRulesGivenNumericalActualRuleWithConnectiveOr", - &CDetectionRulesJsonParserTest::testParseRulesGivenNumericalActualRuleWithConnectiveOr)); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CDetectionRulesJsonParserTest::testParseRulesGivenNumericalTypicalAndDiffAbsRuleWithConnectiveAnd", - &CDetectionRulesJsonParserTest::testParseRulesGivenNumericalTypicalAndDiffAbsRuleWithConnectiveAnd)); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CDetectionRulesJsonParserTest::testParseRulesGivenMultipleRules", - &CDetectionRulesJsonParserTest::testParseRulesGivenMultipleRules)); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CDetectionRulesJsonParserTest::testParseRulesGivenCategoricalRule", - &CDetectionRulesJsonParserTest::testParseRulesGivenCategoricalRule)); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CDetectionRulesJsonParserTest::testParseRulesGivenTimeRule", - &CDetectionRulesJsonParserTest::testParseRulesGivenTimeRule)); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CDetectionRulesJsonParserTest::testParseRulesGivenDifferentActions", - &CDetectionRulesJsonParserTest::testParseRulesGivenDifferentActions)); + "CDetectionRulesJsonParserTest::testParseRulesGivenTimeRule", &CDetectionRulesJsonParserTest::testParseRulesGivenTimeRule)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CDetectionRulesJsonParserTest::testParseRulesGivenDifferentActions", + &CDetectionRulesJsonParserTest::testParseRulesGivenDifferentActions)); return suiteOfTests; } -void CDetectionRulesJsonParserTest::testParseRulesGivenEmptyString() -{ +void CDetectionRulesJsonParserTest::testParseRulesGivenEmptyString() { LOG_DEBUG("*** testParseRulesGivenEmptyString ***"); CDetectionRulesJsonParser parser(EMPTY_VALUE_FILTER_MAP); @@ -95,8 +89,7 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenEmptyString() CPPUNIT_ASSERT(rules.empty()); } -void CDetectionRulesJsonParserTest::testParseRulesGivenEmptyArray() -{ +void CDetectionRulesJsonParserTest::testParseRulesGivenEmptyArray() { LOG_DEBUG("*** testParseRulesGivenEmptyArray ***"); CDetectionRulesJsonParser parser(EMPTY_VALUE_FILTER_MAP); @@ -108,8 +101,7 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenEmptyArray() CPPUNIT_ASSERT(rules.empty()); } -void CDetectionRulesJsonParserTest::testParseRulesGivenArrayContainsStrings() -{ +void CDetectionRulesJsonParserTest::testParseRulesGivenArrayContainsStrings() { LOG_DEBUG("*** testParseRulesGivenArrayContainsStrings ***"); CDetectionRulesJsonParser parser(EMPTY_VALUE_FILTER_MAP); @@ -121,8 +113,7 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenArrayContainsStrings() CPPUNIT_ASSERT(rules.empty()); } -void CDetectionRulesJsonParserTest::testParseRulesGivenMissingRuleAction() -{ +void CDetectionRulesJsonParserTest::testParseRulesGivenMissingRuleAction() { LOG_DEBUG("*** testParseRulesGivenMissingRuleAction ***"); CDetectionRulesJsonParser parser(EMPTY_VALUE_FILTER_MAP); @@ -140,8 +131,7 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenMissingRuleAction() CPPUNIT_ASSERT(rules.empty()); } -void CDetectionRulesJsonParserTest::testParseRulesGivenRuleActionIsNotArray() -{ +void CDetectionRulesJsonParserTest::testParseRulesGivenRuleActionIsNotArray() { LOG_DEBUG("*** testParseRulesGivenRuleActionIsNotArray ***"); CDetectionRulesJsonParser parser(EMPTY_VALUE_FILTER_MAP); @@ -160,8 +150,7 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenRuleActionIsNotArray() CPPUNIT_ASSERT(rules.empty()); } -void CDetectionRulesJsonParserTest::testParseRulesGivenInvalidRuleAction() -{ +void CDetectionRulesJsonParserTest::testParseRulesGivenInvalidRuleAction() { LOG_DEBUG("*** testParseRulesGivenInvalidRuleAction ***"); CDetectionRulesJsonParser parser(EMPTY_VALUE_FILTER_MAP); @@ -180,8 +169,7 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenInvalidRuleAction() CPPUNIT_ASSERT(rules.empty()); } -void CDetectionRulesJsonParserTest::testParseRulesGivenMissingConditionsConnective() -{ +void CDetectionRulesJsonParserTest::testParseRulesGivenMissingConditionsConnective() { LOG_DEBUG("*** testParseRulesGivenMissingConditionsConnective ***"); CDetectionRulesJsonParser parser(EMPTY_VALUE_FILTER_MAP); @@ -198,8 +186,7 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenMissingConditionsConnecti CPPUNIT_ASSERT(rules.empty()); } -void CDetectionRulesJsonParserTest::testParseRulesGivenInvalidConditionsConnective() -{ +void CDetectionRulesJsonParserTest::testParseRulesGivenInvalidConditionsConnective() { LOG_DEBUG("*** testParseRulesGivenInvalidConditionsConnective ***"); CDetectionRulesJsonParser parser(EMPTY_VALUE_FILTER_MAP); @@ -217,8 +204,7 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenInvalidConditionsConnecti CPPUNIT_ASSERT(rules.empty()); } -void CDetectionRulesJsonParserTest::testParseRulesGivenMissingRuleConditions() -{ +void CDetectionRulesJsonParserTest::testParseRulesGivenMissingRuleConditions() { LOG_DEBUG("*** testParseRulesGivenMissingRuleConditions ***"); CDetectionRulesJsonParser parser(EMPTY_VALUE_FILTER_MAP); @@ -233,8 +219,7 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenMissingRuleConditions() CPPUNIT_ASSERT(rules.empty()); } -void CDetectionRulesJsonParserTest::testParseRulesGivenRuleConditionsIsNotArray() -{ +void CDetectionRulesJsonParserTest::testParseRulesGivenRuleConditionsIsNotArray() { LOG_DEBUG("*** testParseRulesGivenRuleConditionsIsNotArray ***"); CDetectionRulesJsonParser parser(EMPTY_VALUE_FILTER_MAP); @@ -250,8 +235,7 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenRuleConditionsIsNotArray( CPPUNIT_ASSERT(rules.empty()); } -void CDetectionRulesJsonParserTest::testParseRulesGivenMissingConditionOperator() -{ +void CDetectionRulesJsonParserTest::testParseRulesGivenMissingConditionOperator() { LOG_DEBUG("*** testParseRulesGivenMissingConditionOperator ***"); CDetectionRulesJsonParser parser(EMPTY_VALUE_FILTER_MAP); @@ -268,8 +252,7 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenMissingConditionOperator( CPPUNIT_ASSERT(rules.empty()); } -void CDetectionRulesJsonParserTest::testParseRulesGivenInvalidConditionOperator() -{ +void CDetectionRulesJsonParserTest::testParseRulesGivenInvalidConditionOperator() { LOG_DEBUG("*** testParseRulesGivenInvalidConditionOperator ***"); CDetectionRulesJsonParser parser(EMPTY_VALUE_FILTER_MAP); @@ -286,8 +269,7 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenInvalidConditionOperator( CPPUNIT_ASSERT(rules.empty()); } -void CDetectionRulesJsonParserTest::testParseRulesGivenNumericalActualRuleWithConnectiveOr() -{ +void CDetectionRulesJsonParserTest::testParseRulesGivenNumericalActualRuleWithConnectiveOr() { LOG_DEBUG("*** testParseRulesGivenNumericalActualRuleWithConnectiveOr ***"); CDetectionRulesJsonParser parser(EMPTY_VALUE_FILTER_MAP); @@ -309,8 +291,7 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenNumericalActualRuleWithCo CPPUNIT_ASSERT_EQUAL(std::string("FILTER_RESULTS IF ACTUAL < 5.000000 OR ACTUAL(metric) <= 2.300000"), rules[0].print()); } -void CDetectionRulesJsonParserTest::testParseRulesGivenNumericalTypicalAndDiffAbsRuleWithConnectiveAnd() -{ +void CDetectionRulesJsonParserTest::testParseRulesGivenNumericalTypicalAndDiffAbsRuleWithConnectiveAnd() { LOG_DEBUG("*** testParseRulesGivenNumericalTypicalAndDiffAbsRuleWithConnectiveAnd ***"); CDetectionRulesJsonParser parser(EMPTY_VALUE_FILTER_MAP); @@ -321,7 +302,8 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenNumericalTypicalAndDiffAb rulesJson += " \"conditions_connective\":\"and\","; rulesJson += " \"conditions\": ["; rulesJson += " {\"type\":\"numerical_typical\", \"condition\":{\"operator\":\"gt\",\"value\":\"5\"}},"; - rulesJson += " {\"type\":\"numerical_diff_abs\", \"field_name\":\"metric\", \"field_value\":\"cpu\",\"condition\":{\"operator\":\"gte\",\"value\":\"2.3\"}}"; + rulesJson += " {\"type\":\"numerical_diff_abs\", \"field_name\":\"metric\", " + "\"field_value\":\"cpu\",\"condition\":{\"operator\":\"gte\",\"value\":\"2.3\"}}"; rulesJson += " ]"; rulesJson += "}"; rulesJson += "]"; @@ -332,8 +314,7 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenNumericalTypicalAndDiffAb CPPUNIT_ASSERT_EQUAL(std::string("FILTER_RESULTS IF TYPICAL > 5.000000 AND DIFF_ABS(metric:cpu) >= 2.300000"), rules[0].print()); } -void CDetectionRulesJsonParserTest::testParseRulesGivenMultipleRules() -{ +void CDetectionRulesJsonParserTest::testParseRulesGivenMultipleRules() { LOG_DEBUG("*** testParseRulesGivenMultipleRules ***"); CDetectionRulesJsonParser parser(EMPTY_VALUE_FILTER_MAP); @@ -366,8 +347,7 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenMultipleRules() CPPUNIT_ASSERT_EQUAL(std::string("SKIP_SAMPLING (id:42) IF ACTUAL < 2.000000"), rules[1].print()); } -void CDetectionRulesJsonParserTest::testParseRulesGivenCategoricalRule() -{ +void CDetectionRulesJsonParserTest::testParseRulesGivenCategoricalRule() { LOG_DEBUG("*** testParseRulesGivenCategoricalRule ***"); TStrPatternSetUMap filtersById; @@ -393,8 +373,7 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenCategoricalRule() CPPUNIT_ASSERT_EQUAL(std::string("FILTER_RESULTS IF (foo) IN FILTER"), rules[0].print()); } -void CDetectionRulesJsonParserTest::testParseRulesGivenTimeRule() -{ +void CDetectionRulesJsonParserTest::testParseRulesGivenTimeRule() { LOG_DEBUG("*** testParseRulesGivenTimeRule ***"); CDetectionRulesJsonParser parser(EMPTY_VALUE_FILTER_MAP); @@ -415,8 +394,7 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenTimeRule() CPPUNIT_ASSERT_EQUAL(std::string("FILTER_RESULTS IF TIME >= 5000.000000 AND TIME < 10000.000000"), rules[0].print()); } -void CDetectionRulesJsonParserTest::testParseRulesGivenDifferentActions() -{ +void CDetectionRulesJsonParserTest::testParseRulesGivenDifferentActions() { LOG_DEBUG("*** testParseRulesGivenDifferentActions ***"); { diff --git a/lib/api/unittest/CDetectionRulesJsonParserTest.h b/lib/api/unittest/CDetectionRulesJsonParserTest.h index f895495ff1..49f0bfaa7c 100644 --- a/lib/api/unittest/CDetectionRulesJsonParserTest.h +++ b/lib/api/unittest/CDetectionRulesJsonParserTest.h @@ -8,28 +8,27 @@ #include -class CDetectionRulesJsonParserTest : public CppUnit::TestFixture -{ - public: - void testParseRulesGivenEmptyString(); - void testParseRulesGivenEmptyArray(); - void testParseRulesGivenArrayContainsStrings(); - void testParseRulesGivenMissingRuleAction(); - void testParseRulesGivenRuleActionIsNotArray(); - void testParseRulesGivenInvalidRuleAction(); - void testParseRulesGivenMissingConditionsConnective(); - void testParseRulesGivenInvalidConditionsConnective(); - void testParseRulesGivenMissingRuleConditions(); - void testParseRulesGivenRuleConditionsIsNotArray(); - void testParseRulesGivenMissingConditionOperator(); - void testParseRulesGivenInvalidConditionOperator(); - void testParseRulesGivenNumericalActualRuleWithConnectiveOr(); - void testParseRulesGivenNumericalTypicalAndDiffAbsRuleWithConnectiveAnd(); - void testParseRulesGivenMultipleRules(); - void testParseRulesGivenCategoricalRule(); - void testParseRulesGivenTimeRule(); - void testParseRulesGivenDifferentActions(); - static CppUnit::Test *suite(); +class CDetectionRulesJsonParserTest : public CppUnit::TestFixture { +public: + void testParseRulesGivenEmptyString(); + void testParseRulesGivenEmptyArray(); + void testParseRulesGivenArrayContainsStrings(); + void testParseRulesGivenMissingRuleAction(); + void testParseRulesGivenRuleActionIsNotArray(); + void testParseRulesGivenInvalidRuleAction(); + void testParseRulesGivenMissingConditionsConnective(); + void testParseRulesGivenInvalidConditionsConnective(); + void testParseRulesGivenMissingRuleConditions(); + void testParseRulesGivenRuleConditionsIsNotArray(); + void testParseRulesGivenMissingConditionOperator(); + void testParseRulesGivenInvalidConditionOperator(); + void testParseRulesGivenNumericalActualRuleWithConnectiveOr(); + void testParseRulesGivenNumericalTypicalAndDiffAbsRuleWithConnectiveAnd(); + void testParseRulesGivenMultipleRules(); + void testParseRulesGivenCategoricalRule(); + void testParseRulesGivenTimeRule(); + void testParseRulesGivenDifferentActions(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CDetectionRulesJsonParserTest_h diff --git a/lib/api/unittest/CFieldConfigTest.cc b/lib/api/unittest/CFieldConfigTest.cc index a032ad346f..df0dcb9720 100644 --- a/lib/api/unittest/CFieldConfigTest.cc +++ b/lib/api/unittest/CFieldConfigTest.cc @@ -13,79 +13,50 @@ #include - -CppUnit::Test *CFieldConfigTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CFieldConfigTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CFieldConfigTest::testTrivial", - &CFieldConfigTest::testTrivial) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CFieldConfigTest::testValid", - &CFieldConfigTest::testValid) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CFieldConfigTest::testInvalid", - &CFieldConfigTest::testInvalid) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CFieldConfigTest::testValidSummaryCountFieldName", - &CFieldConfigTest::testValidSummaryCountFieldName) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CFieldConfigTest::testValidClauses", - &CFieldConfigTest::testValidClauses) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CFieldConfigTest::testInvalidClauses", - &CFieldConfigTest::testInvalidClauses) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CFieldConfigTest::testFieldOptions", - &CFieldConfigTest::testFieldOptions) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CFieldConfigTest::testValidPopulationClauses", - &CFieldConfigTest::testValidPopulationClauses) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CFieldConfigTest::testValidPopulation", - &CFieldConfigTest::testValidPopulation) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CFieldConfigTest::testDefaultCategorizationField", - &CFieldConfigTest::testDefaultCategorizationField) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CFieldConfigTest::testCategorizationFieldWithFilters", - &CFieldConfigTest::testCategorizationFieldWithFilters) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CFieldConfigTest::testExcludeFrequentClauses", - &CFieldConfigTest::testExcludeFrequentClauses) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CFieldConfigTest::testExcludeFrequent", - &CFieldConfigTest::testExcludeFrequent) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CFieldConfigTest::testSlashes", - &CFieldConfigTest::testSlashes) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CFieldConfigTest::testBracketPercent", - &CFieldConfigTest::testBracketPercent) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CFieldConfigTest::testClauseTokenise", - &CFieldConfigTest::testClauseTokenise) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CFieldConfigTest::testUtf8Bom", - &CFieldConfigTest::testUtf8Bom) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CFieldConfigTest::testAddByOverPartitionInfluencers", - &CFieldConfigTest::testAddByOverPartitionInfluencers) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CFieldConfigTest::testAddOptions", - &CFieldConfigTest::testAddOptions) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CFieldConfigTest::testScheduledEvents", - &CFieldConfigTest::testScheduledEvents) ); +CppUnit::Test* CFieldConfigTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CFieldConfigTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CFieldConfigTest::testTrivial", &CFieldConfigTest::testTrivial)); + suiteOfTests->addTest(new CppUnit::TestCaller("CFieldConfigTest::testValid", &CFieldConfigTest::testValid)); + suiteOfTests->addTest(new CppUnit::TestCaller("CFieldConfigTest::testInvalid", &CFieldConfigTest::testInvalid)); + suiteOfTests->addTest(new CppUnit::TestCaller("CFieldConfigTest::testValidSummaryCountFieldName", + &CFieldConfigTest::testValidSummaryCountFieldName)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CFieldConfigTest::testValidClauses", &CFieldConfigTest::testValidClauses)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CFieldConfigTest::testInvalidClauses", &CFieldConfigTest::testInvalidClauses)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CFieldConfigTest::testFieldOptions", &CFieldConfigTest::testFieldOptions)); + suiteOfTests->addTest(new CppUnit::TestCaller("CFieldConfigTest::testValidPopulationClauses", + &CFieldConfigTest::testValidPopulationClauses)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CFieldConfigTest::testValidPopulation", &CFieldConfigTest::testValidPopulation)); + suiteOfTests->addTest(new CppUnit::TestCaller("CFieldConfigTest::testDefaultCategorizationField", + &CFieldConfigTest::testDefaultCategorizationField)); + suiteOfTests->addTest(new CppUnit::TestCaller("CFieldConfigTest::testCategorizationFieldWithFilters", + &CFieldConfigTest::testCategorizationFieldWithFilters)); + suiteOfTests->addTest(new CppUnit::TestCaller("CFieldConfigTest::testExcludeFrequentClauses", + &CFieldConfigTest::testExcludeFrequentClauses)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CFieldConfigTest::testExcludeFrequent", &CFieldConfigTest::testExcludeFrequent)); + suiteOfTests->addTest(new CppUnit::TestCaller("CFieldConfigTest::testSlashes", &CFieldConfigTest::testSlashes)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CFieldConfigTest::testBracketPercent", &CFieldConfigTest::testBracketPercent)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CFieldConfigTest::testClauseTokenise", &CFieldConfigTest::testClauseTokenise)); + suiteOfTests->addTest(new CppUnit::TestCaller("CFieldConfigTest::testUtf8Bom", &CFieldConfigTest::testUtf8Bom)); + suiteOfTests->addTest(new CppUnit::TestCaller("CFieldConfigTest::testAddByOverPartitionInfluencers", + &CFieldConfigTest::testAddByOverPartitionInfluencers)); + suiteOfTests->addTest(new CppUnit::TestCaller("CFieldConfigTest::testAddOptions", &CFieldConfigTest::testAddOptions)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CFieldConfigTest::testScheduledEvents", &CFieldConfigTest::testScheduledEvents)); return suiteOfTests; } -void CFieldConfigTest::testTrivial() -{ +void CFieldConfigTest::testTrivial() { ml::api::CFieldConfig config("count", "mlcategory"); - const ml::api::CFieldConfig::TFieldOptionsMIndex &fields = config.fieldOptions(); + const ml::api::CFieldConfig::TFieldOptionsMIndex& fields = config.fieldOptions(); CPPUNIT_ASSERT_EQUAL(size_t(1), fields.size()); ml::api::CFieldConfig::TFieldOptionsMIndexCItr iter = fields.begin(); CPPUNIT_ASSERT(iter != fields.end()); @@ -97,31 +68,25 @@ void CFieldConfigTest::testTrivial() CPPUNIT_ASSERT_EQUAL(false, ml::model::function_t::isMetric(iter->function())); CPPUNIT_ASSERT_EQUAL(false, ml::model::function_t::isPopulation(iter->function())); - const ml::api::CFieldConfig::TStrSet &superset = config.fieldNameSuperset(); + const ml::api::CFieldConfig::TStrSet& superset = config.fieldNameSuperset(); CPPUNIT_ASSERT_EQUAL(size_t(1), superset.size()); CPPUNIT_ASSERT_EQUAL(size_t(1), superset.count("mlcategory")); } -void CFieldConfigTest::testValid() -{ - this->testValidFile(boost::bind(&ml::api::CFieldConfig::initFromFile, _1, _2), - "testfiles/new_mlfields.conf"); +void CFieldConfigTest::testValid() { + this->testValidFile(boost::bind(&ml::api::CFieldConfig::initFromFile, _1, _2), "testfiles/new_mlfields.conf"); } -void CFieldConfigTest::testInvalid() -{ - this->testInvalidFile(boost::bind(&ml::api::CFieldConfig::initFromFile, _1, _2), - "testfiles/new_invalidmlfields.conf"); +void CFieldConfigTest::testInvalid() { + this->testInvalidFile(boost::bind(&ml::api::CFieldConfig::initFromFile, _1, _2), "testfiles/new_invalidmlfields.conf"); } -void CFieldConfigTest::testValidSummaryCountFieldName() -{ +void CFieldConfigTest::testValidSummaryCountFieldName() { this->testValidSummaryCountFieldNameFile(boost::bind(&ml::api::CFieldConfig::initFromFile, _1, _2), "testfiles/new_mlfields_summarycount.conf"); } -void CFieldConfigTest::testValidClauses() -{ +void CFieldConfigTest::testValidClauses() { ml::api::CFieldConfig config; { @@ -134,7 +99,7 @@ void CFieldConfigTest::testValidClauses() LOG_DEBUG(config.debug()); - const ml::api::CFieldConfig::TFieldOptionsMIndex &fields = config.fieldOptions(); + const ml::api::CFieldConfig::TFieldOptionsMIndex& fields = config.fieldOptions(); CPPUNIT_ASSERT_EQUAL(size_t(1), fields.size()); ml::api::CFieldConfig::TFieldOptionsMIndexCItr iter = fields.begin(); CPPUNIT_ASSERT(iter != fields.end()); @@ -157,7 +122,7 @@ void CFieldConfigTest::testValidClauses() LOG_DEBUG(config.debug()); - const ml::api::CFieldConfig::TFieldOptionsMIndex &fields = config.fieldOptions(); + const ml::api::CFieldConfig::TFieldOptionsMIndex& fields = config.fieldOptions(); CPPUNIT_ASSERT_EQUAL(size_t(1), fields.size()); ml::api::CFieldConfig::TFieldOptionsMIndexCItr iter = fields.begin(); CPPUNIT_ASSERT(iter != fields.end()); @@ -182,7 +147,7 @@ void CFieldConfigTest::testValidClauses() LOG_DEBUG(config.debug()); - const ml::api::CFieldConfig::TFieldOptionsMIndex &fields = config.fieldOptions(); + const ml::api::CFieldConfig::TFieldOptionsMIndex& fields = config.fieldOptions(); CPPUNIT_ASSERT_EQUAL(size_t(1), fields.size()); ml::api::CFieldConfig::TFieldOptionsMIndexCItr iter = fields.begin(); CPPUNIT_ASSERT(iter != fields.end()); @@ -211,7 +176,7 @@ void CFieldConfigTest::testValidClauses() LOG_DEBUG(config.debug()); - const ml::api::CFieldConfig::TFieldOptionsMIndex &fields = config.fieldOptions(); + const ml::api::CFieldConfig::TFieldOptionsMIndex& fields = config.fieldOptions(); CPPUNIT_ASSERT_EQUAL(size_t(1), fields.size()); ml::api::CFieldConfig::TFieldOptionsMIndexCItr iter = fields.begin(); CPPUNIT_ASSERT(iter != fields.end()); @@ -242,7 +207,7 @@ void CFieldConfigTest::testValidClauses() LOG_DEBUG(config.debug()); - const ml::api::CFieldConfig::TFieldOptionsMIndex &fields = config.fieldOptions(); + const ml::api::CFieldConfig::TFieldOptionsMIndex& fields = config.fieldOptions(); CPPUNIT_ASSERT_EQUAL(size_t(1), fields.size()); ml::api::CFieldConfig::TFieldOptionsMIndexCItr iter = fields.begin(); CPPUNIT_ASSERT(iter != fields.end()); @@ -270,7 +235,7 @@ void CFieldConfigTest::testValidClauses() LOG_DEBUG(config.debug()); - const ml::api::CFieldConfig::TFieldOptionsMIndex &fields = config.fieldOptions(); + const ml::api::CFieldConfig::TFieldOptionsMIndex& fields = config.fieldOptions(); CPPUNIT_ASSERT_EQUAL(size_t(2), fields.size()); ml::api::CFieldConfig::TFieldOptionsMIndexCItr iter = fields.begin(); CPPUNIT_ASSERT(iter != fields.end()); @@ -307,7 +272,7 @@ void CFieldConfigTest::testValidClauses() LOG_DEBUG(config.debug()); - const ml::api::CFieldConfig::TFieldOptionsMIndex &fields = config.fieldOptions(); + const ml::api::CFieldConfig::TFieldOptionsMIndex& fields = config.fieldOptions(); CPPUNIT_ASSERT_EQUAL(size_t(2), fields.size()); ml::api::CFieldConfig::TFieldOptionsMIndexCItr iter = fields.begin(); CPPUNIT_ASSERT(iter != fields.end()); @@ -346,7 +311,7 @@ void CFieldConfigTest::testValidClauses() LOG_DEBUG(config.debug()); - const ml::api::CFieldConfig::TFieldOptionsMIndex &fields = config.fieldOptions(); + const ml::api::CFieldConfig::TFieldOptionsMIndex& fields = config.fieldOptions(); CPPUNIT_ASSERT_EQUAL(size_t(2), fields.size()); ml::api::CFieldConfig::TFieldOptionsMIndexCItr iter = fields.begin(); CPPUNIT_ASSERT(iter != fields.end()); @@ -384,7 +349,7 @@ void CFieldConfigTest::testValidClauses() LOG_DEBUG(config.debug()); - const ml::api::CFieldConfig::TFieldOptionsMIndex &fields = config.fieldOptions(); + const ml::api::CFieldConfig::TFieldOptionsMIndex& fields = config.fieldOptions(); CPPUNIT_ASSERT_EQUAL(size_t(2), fields.size()); ml::api::CFieldConfig::TFieldOptionsMIndexCItr iter = fields.begin(); CPPUNIT_ASSERT(iter != fields.end()); @@ -422,7 +387,7 @@ void CFieldConfigTest::testValidClauses() LOG_DEBUG(config.debug()); - const ml::api::CFieldConfig::TFieldOptionsMIndex &fields = config.fieldOptions(); + const ml::api::CFieldConfig::TFieldOptionsMIndex& fields = config.fieldOptions(); CPPUNIT_ASSERT_EQUAL(size_t(2), fields.size()); ml::api::CFieldConfig::TFieldOptionsMIndexCItr iter = fields.begin(); CPPUNIT_ASSERT(iter != fields.end()); @@ -462,7 +427,7 @@ void CFieldConfigTest::testValidClauses() LOG_DEBUG(config.debug()); - const ml::api::CFieldConfig::TFieldOptionsMIndex &fields = config.fieldOptions(); + const ml::api::CFieldConfig::TFieldOptionsMIndex& fields = config.fieldOptions(); CPPUNIT_ASSERT_EQUAL(size_t(3), fields.size()); ml::api::CFieldConfig::TFieldOptionsMIndexCItr iter = fields.begin(); CPPUNIT_ASSERT(iter != fields.end()); @@ -501,8 +466,7 @@ void CFieldConfigTest::testValidClauses() } } -void CFieldConfigTest::testInvalidClauses() -{ +void CFieldConfigTest::testInvalidClauses() { ml::api::CFieldConfig config; { @@ -637,8 +601,7 @@ void CFieldConfigTest::testInvalidClauses() } } -void CFieldConfigTest::testFieldOptions() -{ +void CFieldConfigTest::testFieldOptions() { { ml::api::CFieldConfig::CFieldOptions opt("count", 42); @@ -655,22 +618,9 @@ void CFieldConfigTest::testFieldOptions() { ml::model::function_t::EFunction function; std::string fieldName; - CPPUNIT_ASSERT(ml::api::CFieldConfig::parseFieldString(false, - true, - true, - "c", - function, - fieldName)); - - ml::api::CFieldConfig::CFieldOptions opt(function, - fieldName, - 1, - "byField", - "overField", - "partitionField", - false, - false, - true); + CPPUNIT_ASSERT(ml::api::CFieldConfig::parseFieldString(false, true, true, "c", function, fieldName)); + + ml::api::CFieldConfig::CFieldOptions opt(function, fieldName, 1, "byField", "overField", "partitionField", false, false, true); CPPUNIT_ASSERT_EQUAL(ml::model::function_t::E_PopulationCount, opt.function()); CPPUNIT_ASSERT(opt.fieldName().empty()); @@ -687,12 +637,7 @@ void CFieldConfigTest::testFieldOptions() { ml::model::function_t::EFunction function; std::string fieldName; - CPPUNIT_ASSERT(ml::api::CFieldConfig::parseFieldString(false, - false, - false, - "count()", - function, - fieldName)); + CPPUNIT_ASSERT(ml::api::CFieldConfig::parseFieldString(false, false, false, "count()", function, fieldName)); ml::api::CFieldConfig::CFieldOptions opt(function, fieldName, 3, "", "", "", false, false, false); @@ -724,22 +669,9 @@ void CFieldConfigTest::testFieldOptions() { ml::model::function_t::EFunction function; std::string fieldName; - CPPUNIT_ASSERT(ml::api::CFieldConfig::parseFieldString(false, - true, - false, - "dc(category)", - function, - fieldName)); - - ml::api::CFieldConfig::CFieldOptions opt(function, - fieldName, - 5, - "", - "overField", - "", - false, - false, - false); + CPPUNIT_ASSERT(ml::api::CFieldConfig::parseFieldString(false, true, false, "dc(category)", function, fieldName)); + + ml::api::CFieldConfig::CFieldOptions opt(function, fieldName, 5, "", "overField", "", false, false, false); CPPUNIT_ASSERT_EQUAL(ml::model::function_t::E_PopulationDistinctCount, opt.function()); CPPUNIT_ASSERT(opt.byFieldName().empty()); @@ -756,22 +688,9 @@ void CFieldConfigTest::testFieldOptions() { ml::model::function_t::EFunction function; std::string fieldName; - CPPUNIT_ASSERT(ml::api::CFieldConfig::parseFieldString(false, - true, - false, - "info_content(mlsub)", - function, - fieldName)); - - ml::api::CFieldConfig::CFieldOptions opt(function, - fieldName, - 6, - "", - "mlhrd", - "", - false, - false, - false); + CPPUNIT_ASSERT(ml::api::CFieldConfig::parseFieldString(false, true, false, "info_content(mlsub)", function, fieldName)); + + ml::api::CFieldConfig::CFieldOptions opt(function, fieldName, 6, "", "mlhrd", "", false, false, false); CPPUNIT_ASSERT_EQUAL(ml::model::function_t::E_PopulationInfoContent, opt.function()); CPPUNIT_ASSERT(opt.byFieldName().empty()); @@ -788,22 +707,9 @@ void CFieldConfigTest::testFieldOptions() { ml::model::function_t::EFunction function; std::string fieldName; - CPPUNIT_ASSERT(ml::api::CFieldConfig::parseFieldString(false, - true, - false, - "high_info_content(mlsub)", - function, - fieldName)); - - ml::api::CFieldConfig::CFieldOptions opt(function, - fieldName, - 1, - "", - "mlhrd", - "datacenter", - false, - false, - false); + CPPUNIT_ASSERT(ml::api::CFieldConfig::parseFieldString(false, true, false, "high_info_content(mlsub)", function, fieldName)); + + ml::api::CFieldConfig::CFieldOptions opt(function, fieldName, 1, "", "mlhrd", "datacenter", false, false, false); CPPUNIT_ASSERT_EQUAL(ml::model::function_t::E_PopulationHighInfoContent, opt.function()); CPPUNIT_ASSERT(opt.byFieldName().empty()); @@ -820,22 +726,9 @@ void CFieldConfigTest::testFieldOptions() { ml::model::function_t::EFunction function; std::string fieldName; - CPPUNIT_ASSERT(ml::api::CFieldConfig::parseFieldString(false, - true, - true, - "rare()", - function, - fieldName)); - - ml::api::CFieldConfig::CFieldOptions opt(function, - fieldName, - 1, - "byField", - "overField", - "", - false, - false, - false); + CPPUNIT_ASSERT(ml::api::CFieldConfig::parseFieldString(false, true, true, "rare()", function, fieldName)); + + ml::api::CFieldConfig::CFieldOptions opt(function, fieldName, 1, "byField", "overField", "", false, false, false); CPPUNIT_ASSERT_EQUAL(ml::model::function_t::E_PopulationRare, opt.function()); CPPUNIT_ASSERT(opt.fieldName().empty()); @@ -852,22 +745,9 @@ void CFieldConfigTest::testFieldOptions() { ml::model::function_t::EFunction function; std::string fieldName; - CPPUNIT_ASSERT(ml::api::CFieldConfig::parseFieldString(false, - true, - true, - "rare_count", - function, - fieldName)); - - ml::api::CFieldConfig::CFieldOptions opt(function, - fieldName, - 1, - "byField", - "overField", - "partitionField", - false, - false, - true); + CPPUNIT_ASSERT(ml::api::CFieldConfig::parseFieldString(false, true, true, "rare_count", function, fieldName)); + + ml::api::CFieldConfig::CFieldOptions opt(function, fieldName, 1, "byField", "overField", "partitionField", false, false, true); CPPUNIT_ASSERT_EQUAL(ml::model::function_t::E_PopulationRareCount, opt.function()); CPPUNIT_ASSERT(opt.fieldName().empty()); @@ -883,8 +763,7 @@ void CFieldConfigTest::testFieldOptions() } } -void CFieldConfigTest::testValidPopulationClauses() -{ +void CFieldConfigTest::testValidPopulationClauses() { { ml::api::CFieldConfig config; @@ -899,7 +778,7 @@ void CFieldConfigTest::testValidPopulationClauses() LOG_DEBUG(config.debug()); - const ml::api::CFieldConfig::TFieldOptionsMIndex &fields = config.fieldOptions(); + const ml::api::CFieldConfig::TFieldOptionsMIndex& fields = config.fieldOptions(); CPPUNIT_ASSERT_EQUAL(size_t(1), fields.size()); ml::api::CFieldConfig::TFieldOptionsMIndexCItr iter = fields.begin(); CPPUNIT_ASSERT(iter != fields.end()); @@ -926,7 +805,7 @@ void CFieldConfigTest::testValidPopulationClauses() LOG_DEBUG(config.debug()); - const ml::api::CFieldConfig::TFieldOptionsMIndex &fields = config.fieldOptions(); + const ml::api::CFieldConfig::TFieldOptionsMIndex& fields = config.fieldOptions(); CPPUNIT_ASSERT_EQUAL(size_t(1), fields.size()); ml::api::CFieldConfig::TFieldOptionsMIndexCItr iter = fields.begin(); CPPUNIT_ASSERT(iter != fields.end()); @@ -953,7 +832,7 @@ void CFieldConfigTest::testValidPopulationClauses() LOG_DEBUG(config.debug()); - const ml::api::CFieldConfig::TFieldOptionsMIndex &fields = config.fieldOptions(); + const ml::api::CFieldConfig::TFieldOptionsMIndex& fields = config.fieldOptions(); CPPUNIT_ASSERT_EQUAL(size_t(1), fields.size()); ml::api::CFieldConfig::TFieldOptionsMIndexCItr iter = fields.begin(); CPPUNIT_ASSERT(iter != fields.end()); @@ -980,7 +859,7 @@ void CFieldConfigTest::testValidPopulationClauses() LOG_DEBUG(config.debug()); - const ml::api::CFieldConfig::TFieldOptionsMIndex &fields = config.fieldOptions(); + const ml::api::CFieldConfig::TFieldOptionsMIndex& fields = config.fieldOptions(); CPPUNIT_ASSERT_EQUAL(size_t(1), fields.size()); ml::api::CFieldConfig::TFieldOptionsMIndexCItr iter = fields.begin(); CPPUNIT_ASSERT(iter != fields.end()); @@ -1010,7 +889,7 @@ void CFieldConfigTest::testValidPopulationClauses() LOG_DEBUG(config.debug()); - const ml::api::CFieldConfig::TFieldOptionsMIndex &fields = config.fieldOptions(); + const ml::api::CFieldConfig::TFieldOptionsMIndex& fields = config.fieldOptions(); CPPUNIT_ASSERT_EQUAL(size_t(2), fields.size()); ml::api::CFieldConfig::TFieldOptionsMIndexCItr iter = fields.begin(); CPPUNIT_ASSERT(iter != fields.end()); @@ -1047,7 +926,7 @@ void CFieldConfigTest::testValidPopulationClauses() LOG_DEBUG(config.debug()); - const ml::api::CFieldConfig::TFieldOptionsMIndex &fields = config.fieldOptions(); + const ml::api::CFieldConfig::TFieldOptionsMIndex& fields = config.fieldOptions(); CPPUNIT_ASSERT_EQUAL(size_t(1), fields.size()); ml::api::CFieldConfig::TFieldOptionsMIndexCItr iter = fields.begin(); CPPUNIT_ASSERT(iter != fields.end()); @@ -1074,7 +953,7 @@ void CFieldConfigTest::testValidPopulationClauses() LOG_DEBUG(config.debug()); - const ml::api::CFieldConfig::TFieldOptionsMIndex &fields = config.fieldOptions(); + const ml::api::CFieldConfig::TFieldOptionsMIndex& fields = config.fieldOptions(); CPPUNIT_ASSERT_EQUAL(size_t(1), fields.size()); ml::api::CFieldConfig::TFieldOptionsMIndexCItr iter = fields.begin(); CPPUNIT_ASSERT(iter != fields.end()); @@ -1103,7 +982,7 @@ void CFieldConfigTest::testValidPopulationClauses() LOG_DEBUG(config.debug()); - const ml::api::CFieldConfig::TFieldOptionsMIndex &fields = config.fieldOptions(); + const ml::api::CFieldConfig::TFieldOptionsMIndex& fields = config.fieldOptions(); CPPUNIT_ASSERT_EQUAL(size_t(1), fields.size()); ml::api::CFieldConfig::TFieldOptionsMIndexCItr iter = fields.begin(); CPPUNIT_ASSERT(iter != fields.end()); @@ -1133,7 +1012,7 @@ void CFieldConfigTest::testValidPopulationClauses() LOG_DEBUG(config.debug()); - const ml::api::CFieldConfig::TFieldOptionsMIndex &fields = config.fieldOptions(); + const ml::api::CFieldConfig::TFieldOptionsMIndex& fields = config.fieldOptions(); CPPUNIT_ASSERT_EQUAL(size_t(2), fields.size()); ml::api::CFieldConfig::TFieldOptionsMIndexCItr iter = fields.begin(); CPPUNIT_ASSERT(iter != fields.end()); @@ -1158,20 +1037,16 @@ void CFieldConfigTest::testValidPopulationClauses() } } -void CFieldConfigTest::testValidPopulation() -{ - this->testValidPopulationFile(boost::bind(&ml::api::CFieldConfig::initFromFile, _1, _2), - "testfiles/new_populationmlfields.conf"); +void CFieldConfigTest::testValidPopulation() { + this->testValidPopulationFile(boost::bind(&ml::api::CFieldConfig::initFromFile, _1, _2), "testfiles/new_populationmlfields.conf"); } -void CFieldConfigTest::testDefaultCategorizationField() -{ +void CFieldConfigTest::testDefaultCategorizationField() { this->testDefaultCategorizationFieldFile(boost::bind(&ml::api::CFieldConfig::initFromFile, _1, _2), "testfiles/new_mlfields_sos_message_cat.conf"); } -void CFieldConfigTest::testCategorizationFieldWithFilters() -{ +void CFieldConfigTest::testCategorizationFieldWithFilters() { std::string fileName("testfiles/new_mlfields_categorization_filters.conf"); ml::api::CFieldConfig config; @@ -1182,17 +1057,16 @@ void CFieldConfigTest::testCategorizationFieldWithFilters() LOG_DEBUG(config.debug()); - const std::string &categorizationFieldName = config.categorizationFieldName(); + const std::string& categorizationFieldName = config.categorizationFieldName(); CPPUNIT_ASSERT_EQUAL(std::string("message"), categorizationFieldName); - const ml::api::CFieldConfig::TStrVec &filters = config.categorizationFilters(); + const ml::api::CFieldConfig::TStrVec& filters = config.categorizationFilters(); CPPUNIT_ASSERT(filters.empty() == false); CPPUNIT_ASSERT_EQUAL(std::size_t(2), filters.size()); CPPUNIT_ASSERT_EQUAL(std::string("foo"), config.categorizationFilters()[0]); CPPUNIT_ASSERT_EQUAL(std::string(" "), config.categorizationFilters()[1]); } -void CFieldConfigTest::testExcludeFrequentClauses() -{ +void CFieldConfigTest::testExcludeFrequentClauses() { { // Basic case with no excludefrequent ml::api::CFieldConfig config; @@ -1211,7 +1085,7 @@ void CFieldConfigTest::testExcludeFrequentClauses() LOG_TRACE(config.debug()); - const ml::api::CFieldConfig::TFieldOptionsMIndex &fields = config.fieldOptions(); + const ml::api::CFieldConfig::TFieldOptionsMIndex& fields = config.fieldOptions(); CPPUNIT_ASSERT_EQUAL(size_t(1), fields.size()); ml::api::CFieldConfig::TFieldOptionsMIndexCItr iter = fields.begin(); CPPUNIT_ASSERT(iter != fields.end()); @@ -1244,7 +1118,7 @@ void CFieldConfigTest::testExcludeFrequentClauses() LOG_TRACE(config.debug()); - const ml::api::CFieldConfig::TFieldOptionsMIndex &fields = config.fieldOptions(); + const ml::api::CFieldConfig::TFieldOptionsMIndex& fields = config.fieldOptions(); CPPUNIT_ASSERT_EQUAL(size_t(1), fields.size()); ml::api::CFieldConfig::TFieldOptionsMIndexCItr iter = fields.begin(); CPPUNIT_ASSERT(iter != fields.end()); @@ -1277,7 +1151,7 @@ void CFieldConfigTest::testExcludeFrequentClauses() LOG_TRACE(config.debug()); - const ml::api::CFieldConfig::TFieldOptionsMIndex &fields = config.fieldOptions(); + const ml::api::CFieldConfig::TFieldOptionsMIndex& fields = config.fieldOptions(); CPPUNIT_ASSERT_EQUAL(size_t(1), fields.size()); ml::api::CFieldConfig::TFieldOptionsMIndexCItr iter = fields.begin(); CPPUNIT_ASSERT(iter != fields.end()); @@ -1310,7 +1184,7 @@ void CFieldConfigTest::testExcludeFrequentClauses() LOG_TRACE(config.debug()); - const ml::api::CFieldConfig::TFieldOptionsMIndex &fields = config.fieldOptions(); + const ml::api::CFieldConfig::TFieldOptionsMIndex& fields = config.fieldOptions(); CPPUNIT_ASSERT_EQUAL(size_t(1), fields.size()); ml::api::CFieldConfig::TFieldOptionsMIndexCItr iter = fields.begin(); CPPUNIT_ASSERT(iter != fields.end()); @@ -1356,7 +1230,7 @@ void CFieldConfigTest::testExcludeFrequentClauses() LOG_TRACE(config.debug()); - const ml::api::CFieldConfig::TFieldOptionsMIndex &fields = config.fieldOptions(); + const ml::api::CFieldConfig::TFieldOptionsMIndex& fields = config.fieldOptions(); CPPUNIT_ASSERT_EQUAL(size_t(1), fields.size()); ml::api::CFieldConfig::TFieldOptionsMIndexCItr iter = fields.begin(); CPPUNIT_ASSERT(iter != fields.end()); @@ -1386,7 +1260,7 @@ void CFieldConfigTest::testExcludeFrequentClauses() LOG_TRACE(config.debug()); - const ml::api::CFieldConfig::TFieldOptionsMIndex &fields = config.fieldOptions(); + const ml::api::CFieldConfig::TFieldOptionsMIndex& fields = config.fieldOptions(); CPPUNIT_ASSERT_EQUAL(size_t(1), fields.size()); ml::api::CFieldConfig::TFieldOptionsMIndexCItr iter = fields.begin(); CPPUNIT_ASSERT(iter != fields.end()); @@ -1401,26 +1275,19 @@ void CFieldConfigTest::testExcludeFrequentClauses() } } -void CFieldConfigTest::testExcludeFrequent() -{ - this->testExcludeFrequentFile(boost::bind(&ml::api::CFieldConfig::initFromFile, _1, _2), - "testfiles/new_mlfields_excludefrequent.conf"); +void CFieldConfigTest::testExcludeFrequent() { + this->testExcludeFrequentFile(boost::bind(&ml::api::CFieldConfig::initFromFile, _1, _2), "testfiles/new_mlfields_excludefrequent.conf"); } -void CFieldConfigTest::testSlashes() -{ - this->testSlashesFile(boost::bind(&ml::api::CFieldConfig::initFromFile, _1, _2), - "testfiles/new_mlfields_slashes.conf"); +void CFieldConfigTest::testSlashes() { + this->testSlashesFile(boost::bind(&ml::api::CFieldConfig::initFromFile, _1, _2), "testfiles/new_mlfields_slashes.conf"); } -void CFieldConfigTest::testBracketPercent() -{ - this->testBracketPercentFile(boost::bind(&ml::api::CFieldConfig::initFromFile, _1, _2), - "testfiles/new_mlfields_bracket_percent.conf"); +void CFieldConfigTest::testBracketPercent() { + this->testBracketPercentFile(boost::bind(&ml::api::CFieldConfig::initFromFile, _1, _2), "testfiles/new_mlfields_bracket_percent.conf"); } -void CFieldConfigTest::testClauseTokenise() -{ +void CFieldConfigTest::testClauseTokenise() { ml::api::CFieldConfig config; { @@ -1575,15 +1442,13 @@ void CFieldConfigTest::testClauseTokenise() } } -void CFieldConfigTest::testUtf8Bom() -{ +void CFieldConfigTest::testUtf8Bom() { ml::api::CFieldConfig config; CPPUNIT_ASSERT(config.initFromFile("testfiles/new_mlfields_with_utf8_bom.conf")); } -void CFieldConfigTest::testAddByOverPartitionInfluencers() -{ +void CFieldConfigTest::testAddByOverPartitionInfluencers() { ml::api::CFieldConfig config; CPPUNIT_ASSERT(config.initFromFile("testfiles/new_mlfields_excludefrequent.conf")); @@ -1604,37 +1469,22 @@ void CFieldConfigTest::testAddByOverPartitionInfluencers() CPPUNIT_ASSERT_EQUAL(std::string("src_ip"), copyInfluencers[5]); } -void CFieldConfigTest::testAddOptions() -{ +void CFieldConfigTest::testAddOptions() { ml::api::CFieldConfig configFromFile; ml::api::CFieldConfig configFromScratch; CPPUNIT_ASSERT(configFromFile.initFromFile("testfiles/new_populationmlfields.conf")); - ml::api::CFieldConfig::CFieldOptions options1("count", - 1, - "SRC", - false, - false); + ml::api::CFieldConfig::CFieldOptions options1("count", 1, "SRC", false, false); CPPUNIT_ASSERT(configFromScratch.addOptions(options1)); - ml::api::CFieldConfig::CFieldOptions options2(ml::model::function_t::E_PopulationCount, - "", - 2, - "DPT", - "SRC", - "", - false, - false, - true); + ml::api::CFieldConfig::CFieldOptions options2(ml::model::function_t::E_PopulationCount, "", 2, "DPT", "SRC", "", false, false, true); CPPUNIT_ASSERT(configFromScratch.addOptions(options2)); CPPUNIT_ASSERT_EQUAL(configFromFile.debug(), configFromScratch.debug()); } -void CFieldConfigTest::testValidFile(TInitFromFileFunc initFunc, - const std::string &fileName) -{ +void CFieldConfigTest::testValidFile(TInitFromFileFunc initFunc, const std::string& fileName) { ml::api::CFieldConfig config; CPPUNIT_ASSERT(initFunc(&config, fileName)); @@ -1644,7 +1494,7 @@ void CFieldConfigTest::testValidFile(TInitFromFileFunc initFunc, LOG_DEBUG(config.debug()); - const ml::api::CFieldConfig::TFieldOptionsMIndex &fields = config.fieldOptions(); + const ml::api::CFieldConfig::TFieldOptionsMIndex& fields = config.fieldOptions(); CPPUNIT_ASSERT_EQUAL(size_t(7), fields.size()); ml::api::CFieldConfig::TFieldOptionsMIndexCItr iter = fields.begin(); @@ -1719,7 +1569,7 @@ void CFieldConfigTest::testValidFile(TInitFromFileFunc initFunc, iter++; } - const ml::api::CFieldConfig::TStrSet &superset = config.fieldNameSuperset(); + const ml::api::CFieldConfig::TStrSet& superset = config.fieldNameSuperset(); CPPUNIT_ASSERT_EQUAL(size_t(8), superset.size()); CPPUNIT_ASSERT_EQUAL(size_t(1), superset.count("agent")); CPPUNIT_ASSERT_EQUAL(size_t(1), superset.count("bytes")); @@ -1731,17 +1581,13 @@ void CFieldConfigTest::testValidFile(TInitFromFileFunc initFunc, CPPUNIT_ASSERT_EQUAL(size_t(1), superset.count("response")); } -void CFieldConfigTest::testInvalidFile(TInitFromFileFunc initFunc, - const std::string &fileName) -{ +void CFieldConfigTest::testInvalidFile(TInitFromFileFunc initFunc, const std::string& fileName) { ml::api::CFieldConfig config; CPPUNIT_ASSERT(!initFunc(&config, fileName)); } -void CFieldConfigTest::testValidSummaryCountFieldNameFile(TInitFromFileFunc initFunc, - const std::string &fileName) -{ +void CFieldConfigTest::testValidSummaryCountFieldNameFile(TInitFromFileFunc initFunc, const std::string& fileName) { ml::api::CFieldConfig config; CPPUNIT_ASSERT(initFunc(&config, fileName)); @@ -1749,9 +1595,7 @@ void CFieldConfigTest::testValidSummaryCountFieldNameFile(TInitFromFileFunc init CPPUNIT_ASSERT_EQUAL(std::string("count"), config.summaryCountFieldName()); } -void CFieldConfigTest::testValidPopulationFile(TInitFromFileFunc initFunc, - const std::string &fileName) -{ +void CFieldConfigTest::testValidPopulationFile(TInitFromFileFunc initFunc, const std::string& fileName) { { ml::api::CFieldConfig config; CPPUNIT_ASSERT(initFunc(&config, fileName)); @@ -1760,7 +1604,7 @@ void CFieldConfigTest::testValidPopulationFile(TInitFromFileFunc initFunc, LOG_DEBUG(config.debug()); - const ml::api::CFieldConfig::TFieldOptionsMIndex &fields = config.fieldOptions(); + const ml::api::CFieldConfig::TFieldOptionsMIndex& fields = config.fieldOptions(); CPPUNIT_ASSERT_EQUAL(size_t(2), fields.size()); ml::api::CFieldConfig::TFieldOptionsMIndexCItr iter = fields.begin(); CPPUNIT_ASSERT(iter != fields.end()); @@ -1783,9 +1627,7 @@ void CFieldConfigTest::testValidPopulationFile(TInitFromFileFunc initFunc, } } -void CFieldConfigTest::testDefaultCategorizationFieldFile(TInitFromFileFunc initFunc, - const std::string &fileName) -{ +void CFieldConfigTest::testDefaultCategorizationFieldFile(TInitFromFileFunc initFunc, const std::string& fileName) { ml::api::CFieldConfig config; CPPUNIT_ASSERT(initFunc(&config, fileName)); @@ -1794,11 +1636,11 @@ void CFieldConfigTest::testDefaultCategorizationFieldFile(TInitFromFileFunc init LOG_DEBUG(config.debug()); - const std::string &categorizationFieldName = config.categorizationFieldName(); + const std::string& categorizationFieldName = config.categorizationFieldName(); CPPUNIT_ASSERT_EQUAL(std::string("message"), categorizationFieldName); CPPUNIT_ASSERT(config.categorizationFilters().empty()); - const ml::api::CFieldConfig::TFieldOptionsMIndex &fields = config.fieldOptions(); + const ml::api::CFieldConfig::TFieldOptionsMIndex& fields = config.fieldOptions(); CPPUNIT_ASSERT_EQUAL(size_t(1), fields.size()); ml::api::CFieldConfig::TFieldOptionsMIndexCItr iter = fields.begin(); CPPUNIT_ASSERT(iter != fields.end()); @@ -1811,16 +1653,14 @@ void CFieldConfigTest::testDefaultCategorizationFieldFile(TInitFromFileFunc init CPPUNIT_ASSERT_EQUAL(false, ml::model::function_t::isPopulation(iter->function())); } -void CFieldConfigTest::testExcludeFrequentFile(TInitFromFileFunc initFunc, - const std::string &fileName) -{ +void CFieldConfigTest::testExcludeFrequentFile(TInitFromFileFunc initFunc, const std::string& fileName) { ml::api::CFieldConfig config; CPPUNIT_ASSERT(initFunc(&config, fileName)); CPPUNIT_ASSERT(config.havePartitionFields()); CPPUNIT_ASSERT(config.summaryCountFieldName().empty()); - const ml::api::CFieldConfig::TFieldOptionsMIndex &fields = config.fieldOptions(); + const ml::api::CFieldConfig::TFieldOptionsMIndex& fields = config.fieldOptions(); CPPUNIT_ASSERT_EQUAL(size_t(8), fields.size()); ml::api::CFieldConfig::TFieldOptionsMIndexCItr iter = fields.begin(); @@ -1830,7 +1670,7 @@ void CFieldConfigTest::testExcludeFrequentFile(TInitFromFileFunc initFunc, CPPUNIT_ASSERT_EQUAL(std::string("sum"), iter->verboseFunctionName()); CPPUNIT_ASSERT_EQUAL(std::string("bytes"), iter->fieldName()); CPPUNIT_ASSERT_EQUAL(std::string("dest_ip"), iter->byFieldName()); - CPPUNIT_ASSERT_EQUAL(std::string("src_ip"), iter->overFieldName()); + CPPUNIT_ASSERT_EQUAL(std::string("src_ip"), iter->overFieldName()); CPPUNIT_ASSERT(iter->partitionFieldName().empty()); iter++; } @@ -1860,7 +1700,7 @@ void CFieldConfigTest::testExcludeFrequentFile(TInitFromFileFunc initFunc, CPPUNIT_ASSERT_EQUAL(std::string("sum"), iter->verboseFunctionName()); CPPUNIT_ASSERT_EQUAL(std::string("bytes"), iter->fieldName()); CPPUNIT_ASSERT_EQUAL(std::string("src_ip"), iter->byFieldName()); - CPPUNIT_ASSERT_EQUAL(std::string("dest_ip"), iter->overFieldName()); + CPPUNIT_ASSERT_EQUAL(std::string("dest_ip"), iter->overFieldName()); CPPUNIT_ASSERT(iter->partitionFieldName().empty()); iter++; } @@ -1869,9 +1709,9 @@ void CFieldConfigTest::testExcludeFrequentFile(TInitFromFileFunc initFunc, CPPUNIT_ASSERT_EQUAL(ml::model_t::E_XF_By, iter->excludeFrequent()); CPPUNIT_ASSERT_EQUAL(std::string("sum"), iter->verboseFunctionName()); CPPUNIT_ASSERT_EQUAL(std::string("bytes"), iter->fieldName()); - CPPUNIT_ASSERT_EQUAL(std::string("src_ip"), iter->byFieldName()); + CPPUNIT_ASSERT_EQUAL(std::string("src_ip"), iter->byFieldName()); CPPUNIT_ASSERT(iter->overFieldName().empty()); - CPPUNIT_ASSERT_EQUAL(std::string("host"), iter->partitionFieldName()); + CPPUNIT_ASSERT_EQUAL(std::string("host"), iter->partitionFieldName()); iter++; } { @@ -1880,7 +1720,7 @@ void CFieldConfigTest::testExcludeFrequentFile(TInitFromFileFunc initFunc, CPPUNIT_ASSERT_EQUAL(std::string("sum"), iter->verboseFunctionName()); CPPUNIT_ASSERT_EQUAL(std::string("bytes"), iter->fieldName()); CPPUNIT_ASSERT(iter->byFieldName().empty()); - CPPUNIT_ASSERT_EQUAL(std::string("dest_ip"), iter->overFieldName()); + CPPUNIT_ASSERT_EQUAL(std::string("dest_ip"), iter->overFieldName()); CPPUNIT_ASSERT(iter->partitionFieldName().empty()); iter++; } @@ -1906,35 +1746,28 @@ void CFieldConfigTest::testExcludeFrequentFile(TInitFromFileFunc initFunc, } } -void CFieldConfigTest::testSlashesFile(TInitFromFileFunc initFunc, - const std::string &fileName) -{ +void CFieldConfigTest::testSlashesFile(TInitFromFileFunc initFunc, const std::string& fileName) { ml::api::CFieldConfig config; CPPUNIT_ASSERT(initFunc(&config, fileName)); LOG_DEBUG(config.debug()); - const ml::api::CFieldConfig::TFieldOptionsMIndex &fields = config.fieldOptions(); + const ml::api::CFieldConfig::TFieldOptionsMIndex& fields = config.fieldOptions(); - for (ml::api::CFieldConfig::TFieldOptionsMIndexCItr iter = fields.begin(); - iter != fields.end(); - ++iter) - { + for (ml::api::CFieldConfig::TFieldOptionsMIndexCItr iter = fields.begin(); iter != fields.end(); ++iter) { CPPUNIT_ASSERT_EQUAL(std::string("host"), iter->partitionFieldName()); } } -void CFieldConfigTest::testBracketPercentFile(TInitFromFileFunc initFunc, - const std::string &fileName) -{ +void CFieldConfigTest::testBracketPercentFile(TInitFromFileFunc initFunc, const std::string& fileName) { ml::api::CFieldConfig config; CPPUNIT_ASSERT(initFunc(&config, fileName)); LOG_DEBUG(config.debug()); - const ml::api::CFieldConfig::TFieldOptionsMIndex &fields = config.fieldOptions(); + const ml::api::CFieldConfig::TFieldOptionsMIndex& fields = config.fieldOptions(); ml::api::CFieldConfig::TFieldOptionsMIndexCItr iter = fields.begin(); CPPUNIT_ASSERT(iter != fields.end()); @@ -1947,8 +1780,7 @@ void CFieldConfigTest::testBracketPercentFile(TInitFromFileFunc initFunc, CPPUNIT_ASSERT_EQUAL(std::string("This string should have quotes removed"), config.categorizationFieldName()); } -void CFieldConfigTest::testScheduledEvents() -{ +void CFieldConfigTest::testScheduledEvents() { ml::api::CFieldConfig config; CPPUNIT_ASSERT(config.initFromFile("testfiles/scheduled_events.conf")); @@ -1957,9 +1789,8 @@ void CFieldConfigTest::testScheduledEvents() CPPUNIT_ASSERT_EQUAL(std::size_t{2}, events.size()); CPPUNIT_ASSERT_EQUAL(std::string("May Bank Holiday"), events[0].first); CPPUNIT_ASSERT_EQUAL(std::string("FILTER_RESULTS AND SKIP_SAMPLING IF TIME >= 1525132800.000000 AND TIME < 1525219200.000000"), - events[0].second.print()); + events[0].second.print()); CPPUNIT_ASSERT_EQUAL(std::string("New Years Day"), events[1].first); CPPUNIT_ASSERT_EQUAL(std::string("FILTER_RESULTS AND SKIP_SAMPLING IF TIME >= 1514764800.000000 AND TIME < 1514851200.000000"), - events[1].second.print()); + events[1].second.print()); } - diff --git a/lib/api/unittest/CFieldConfigTest.h b/lib/api/unittest/CFieldConfigTest.h index 73458f8794..59ef9b30f7 100644 --- a/lib/api/unittest/CFieldConfigTest.h +++ b/lib/api/unittest/CFieldConfigTest.h @@ -12,56 +12,43 @@ #include - -class CFieldConfigTest : public CppUnit::TestFixture -{ - public: - using TInitFromFileFunc = std::function; - - public: - void testTrivial(); - void testValid(); - void testInvalid(); - void testValidSummaryCountFieldName(); - void testValidClauses(); - void testInvalidClauses(); - void testFieldOptions(); - void testValidPopulationClauses(); - void testValidPopulation(); - void testDefaultCategorizationField(); - void testCategorizationFieldWithFilters(); - void testExcludeFrequentClauses(); - void testExcludeFrequent(); - void testSlashes(); - void testBracketPercent(); - void testClauseTokenise(); - void testUtf8Bom(); - void testAddByOverPartitionInfluencers(); - void testAddOptions(); - void testScheduledEvents(); - - static CppUnit::Test *suite(); - - private: - void testValidFile(TInitFromFileFunc initFunc, - const std::string &fileName); - void testInvalidFile(TInitFromFileFunc initFunc, - const std::string &fileName); - void testValidSummaryCountFieldNameFile(TInitFromFileFunc initFunc, - const std::string &fileName); - void testValidPopulationFile(TInitFromFileFunc initFunc, - const std::string &fileName); - void testDefaultCategorizationFieldFile(TInitFromFileFunc initFunc, - const std::string &fileName); - void testExcludeFrequentFile(TInitFromFileFunc initFunc, - const std::string &fileName); - void testSlashesFile(TInitFromFileFunc initFunc, - const std::string &fileName); - void testBracketPercentFile(TInitFromFileFunc initFunc, - const std::string &fileName); +class CFieldConfigTest : public CppUnit::TestFixture { +public: + using TInitFromFileFunc = std::function; + +public: + void testTrivial(); + void testValid(); + void testInvalid(); + void testValidSummaryCountFieldName(); + void testValidClauses(); + void testInvalidClauses(); + void testFieldOptions(); + void testValidPopulationClauses(); + void testValidPopulation(); + void testDefaultCategorizationField(); + void testCategorizationFieldWithFilters(); + void testExcludeFrequentClauses(); + void testExcludeFrequent(); + void testSlashes(); + void testBracketPercent(); + void testClauseTokenise(); + void testUtf8Bom(); + void testAddByOverPartitionInfluencers(); + void testAddOptions(); + void testScheduledEvents(); + + static CppUnit::Test* suite(); + +private: + void testValidFile(TInitFromFileFunc initFunc, const std::string& fileName); + void testInvalidFile(TInitFromFileFunc initFunc, const std::string& fileName); + void testValidSummaryCountFieldNameFile(TInitFromFileFunc initFunc, const std::string& fileName); + void testValidPopulationFile(TInitFromFileFunc initFunc, const std::string& fileName); + void testDefaultCategorizationFieldFile(TInitFromFileFunc initFunc, const std::string& fileName); + void testExcludeFrequentFile(TInitFromFileFunc initFunc, const std::string& fileName); + void testSlashesFile(TInitFromFileFunc initFunc, const std::string& fileName); + void testBracketPercentFile(TInitFromFileFunc initFunc, const std::string& fileName); }; #endif // INCLUDED_CFieldConfigTest_h - diff --git a/lib/api/unittest/CFieldDataTyperTest.cc b/lib/api/unittest/CFieldDataTyperTest.cc index 7f4f7ad89f..19491806ed 100644 --- a/lib/api/unittest/CFieldDataTyperTest.cc +++ b/lib/api/unittest/CFieldDataTyperTest.cc @@ -6,9 +6,9 @@ #include "CFieldDataTyperTest.h" -#include #include #include +#include #include @@ -26,8 +26,7 @@ using namespace ml; using namespace api; -namespace -{ +namespace { //! \brief //! Mock object for state restore unit tests. @@ -35,130 +34,73 @@ namespace //! DESCRIPTION:\n //! CDataSearcher that returns an empty stream. //! -class CEmptySearcher : public ml::core::CDataSearcher -{ - public: - //! Do a search that results in an empty input stream. - virtual TIStreamP search(size_t /*currentDocNum*/, size_t /*limit*/) - { - return TIStreamP(new std::istringstream()); - } +class CEmptySearcher : public ml::core::CDataSearcher { +public: + //! Do a search that results in an empty input stream. + virtual TIStreamP search(size_t /*currentDocNum*/, size_t /*limit*/) { return TIStreamP(new std::istringstream()); } }; -class CTestOutputHandler : public COutputHandler -{ - public: - CTestOutputHandler() : COutputHandler(), m_NewStream(false), - m_Finalised(false), m_Records(0) - { - } - - virtual ~CTestOutputHandler() - { - } - - virtual void finalise() - { - m_Finalised = true; - } - - bool hasFinalised() const - { - return m_Finalised; - } - - virtual void newOutputStream() - { - m_NewStream = true; - } - - bool isNewStream() const - { - return m_NewStream; - } - - virtual bool fieldNames(const TStrVec &/*fieldNames*/, - const TStrVec &/*extraFieldNames*/) - { - return true; - } - - virtual const TStrVec &fieldNames() const - { - return m_FieldNames; - } - - virtual bool writeRow(const TStrStrUMap &/*dataRowFields*/, - const TStrStrUMap &/*overrideDataRowFields*/) - { - m_Records++; - return true; - } - - uint64_t getNumRows() const - { - return m_Records; - } - - private: - TStrVec m_FieldNames; - - bool m_NewStream; - - bool m_Finalised; - - uint64_t m_Records; -}; +class CTestOutputHandler : public COutputHandler { +public: + CTestOutputHandler() : COutputHandler(), m_NewStream(false), m_Finalised(false), m_Records(0) {} + + virtual ~CTestOutputHandler() {} + + virtual void finalise() { m_Finalised = true; } + + bool hasFinalised() const { return m_Finalised; } + + virtual void newOutputStream() { m_NewStream = true; } + + bool isNewStream() const { return m_NewStream; } + + virtual bool fieldNames(const TStrVec& /*fieldNames*/, const TStrVec& /*extraFieldNames*/) { return true; } + + virtual const TStrVec& fieldNames() const { return m_FieldNames; } + + virtual bool writeRow(const TStrStrUMap& /*dataRowFields*/, const TStrStrUMap& /*overrideDataRowFields*/) { + m_Records++; + return true; + } -class CTestDataSearcher : public core::CDataSearcher -{ - public: - CTestDataSearcher(const std::string &data) - : m_Stream(new std::istringstream(data)) - { - } - - virtual TIStreamP search(size_t /*currentDocNum*/, size_t /*limit*/) - { - return m_Stream; - } - - private: - TIStreamP m_Stream; + uint64_t getNumRows() const { return m_Records; } + +private: + TStrVec m_FieldNames; + + bool m_NewStream; + + bool m_Finalised; + + uint64_t m_Records; }; -class CTestDataAdder : public core::CDataAdder -{ - public: - CTestDataAdder() - : m_Stream(new std::ostringstream) - { - } - - virtual TOStreamP addStreamed(const std::string &/*index*/, - const std::string &/*id*/) - { - return m_Stream; - } - - virtual bool streamComplete(TOStreamP &/*strm*/, bool /*force*/) - { - return true; - } - - TOStreamP getStream() - { - return m_Stream; - } - - private: - TOStreamP m_Stream; +class CTestDataSearcher : public core::CDataSearcher { +public: + CTestDataSearcher(const std::string& data) : m_Stream(new std::istringstream(data)) {} + + virtual TIStreamP search(size_t /*currentDocNum*/, size_t /*limit*/) { return m_Stream; } + +private: + TIStreamP m_Stream; }; +class CTestDataAdder : public core::CDataAdder { +public: + CTestDataAdder() : m_Stream(new std::ostringstream) {} + + virtual TOStreamP addStreamed(const std::string& /*index*/, const std::string& /*id*/) { return m_Stream; } + + virtual bool streamComplete(TOStreamP& /*strm*/, bool /*force*/) { return true; } + + TOStreamP getStream() { return m_Stream; } + +private: + TOStreamP m_Stream; +}; } -void CFieldDataTyperTest::testAll() -{ +void CFieldDataTyperTest::testAll() { model::CLimits limits; CFieldConfig config; CPPUNIT_ASSERT(config.initFromFile("testfiles/new_persist_categorization.conf")); @@ -208,7 +150,7 @@ void CFieldDataTyperTest::testAll() { CTestDataAdder adder; typer.persistState(adder); - std::ostringstream &ss = dynamic_cast(*adder.getStream()); + std::ostringstream& ss = dynamic_cast(*adder.getStream()); origJson = ss.str(); } @@ -219,7 +161,7 @@ void CFieldDataTyperTest::testAll() CFieldConfig config2("x", "y"); CTestOutputHandler handler2; std::ostringstream outputStrm2; - core::CJsonOutputStreamWrapper wrappedOutputStream2 (outputStrm2); + core::CJsonOutputStreamWrapper wrappedOutputStream2(outputStrm2); CJsonOutputWriter writer2("job", wrappedOutputStream2); CFieldDataTyper newTyper("job", config2, limits2, handler2, writer2); @@ -229,14 +171,13 @@ void CFieldDataTyperTest::testAll() CTestDataAdder adder; newTyper.persistState(adder); - std::ostringstream &ss = dynamic_cast(*adder.getStream()); + std::ostringstream& ss = dynamic_cast(*adder.getStream()); newJson = ss.str(); } CPPUNIT_ASSERT_EQUAL(origJson, newJson); } -void CFieldDataTyperTest::testNodeReverseSearch() -{ +void CFieldDataTyperTest::testNodeReverseSearch() { model::CLimits limits; CFieldConfig config; CPPUNIT_ASSERT(config.initFromFile("testfiles/new_persist_categorization.conf")); @@ -261,7 +202,7 @@ void CFieldDataTyperTest::testNodeReverseSearch() typer.finalise(); } - const std::string &output = outputStrm.str(); + const std::string& output = outputStrm.str(); LOG_DEBUG("Output is: " << output); // Assert that the reverse search contains all expected tokens when @@ -275,8 +216,7 @@ void CFieldDataTyperTest::testNodeReverseSearch() CPPUNIT_ASSERT(output.find("\"message\"") == std::string::npos); } -void CFieldDataTyperTest::testPassOnControlMessages() -{ +void CFieldDataTyperTest::testPassOnControlMessages() { model::CLimits limits; CFieldConfig config; CPPUNIT_ASSERT(config.initFromFile("testfiles/new_persist_categorization.conf")); @@ -299,13 +239,12 @@ void CFieldDataTyperTest::testPassOnControlMessages() typer.finalise(); } - const std::string &output = outputStrm.str(); + const std::string& output = outputStrm.str(); LOG_DEBUG("Output is: " << output); CPPUNIT_ASSERT_EQUAL(std::string("[]"), output); } -void CFieldDataTyperTest::testHandleControlMessages() -{ +void CFieldDataTyperTest::testHandleControlMessages() { model::CLimits limits; CFieldConfig config; CPPUNIT_ASSERT(config.initFromFile("testfiles/new_persist_categorization.conf")); @@ -326,14 +265,12 @@ void CFieldDataTyperTest::testHandleControlMessages() typer.finalise(); } - const std::string &output = outputStrm.str(); + const std::string& output = outputStrm.str(); LOG_DEBUG("Output is: " << output); - CPPUNIT_ASSERT_EQUAL(std::string::size_type(0), - output.find("[{\"flush\":{\"id\":\"7\",\"last_finalized_bucket_end\":0}}")); + CPPUNIT_ASSERT_EQUAL(std::string::size_type(0), output.find("[{\"flush\":{\"id\":\"7\",\"last_finalized_bucket_end\":0}}")); } -void CFieldDataTyperTest::testRestoreStateFailsWithEmptyState() -{ +void CFieldDataTyperTest::testRestoreStateFailsWithEmptyState() { model::CLimits limits; CFieldConfig config; CPPUNIT_ASSERT(config.initFromFile("testfiles/new_persist_categorization.conf")); @@ -349,25 +286,17 @@ void CFieldDataTyperTest::testRestoreStateFailsWithEmptyState() CPPUNIT_ASSERT(typer.restoreState(restoreSearcher, completeToTime) == false); } -CppUnit::Test* CFieldDataTyperTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CFieldDataTyperTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CFieldDataTyperTest::testAll", - &CFieldDataTyperTest::testAll) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CFieldDataTyperTest::testNodeReverseSearch", - &CFieldDataTyperTest::testNodeReverseSearch) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CFieldDataTyperTest::testPassOnControlMessages", - &CFieldDataTyperTest::testPassOnControlMessages) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CFieldDataTyperTest::testHandleControlMessages", - &CFieldDataTyperTest::testHandleControlMessages) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CFieldDataTyperTest::testRestoreStateFailsWithEmptyState", - &CFieldDataTyperTest::testRestoreStateFailsWithEmptyState) ); +CppUnit::Test* CFieldDataTyperTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CFieldDataTyperTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CFieldDataTyperTest::testAll", &CFieldDataTyperTest::testAll)); + suiteOfTests->addTest(new CppUnit::TestCaller("CFieldDataTyperTest::testNodeReverseSearch", + &CFieldDataTyperTest::testNodeReverseSearch)); + suiteOfTests->addTest(new CppUnit::TestCaller("CFieldDataTyperTest::testPassOnControlMessages", + &CFieldDataTyperTest::testPassOnControlMessages)); + suiteOfTests->addTest(new CppUnit::TestCaller("CFieldDataTyperTest::testHandleControlMessages", + &CFieldDataTyperTest::testHandleControlMessages)); + suiteOfTests->addTest(new CppUnit::TestCaller("CFieldDataTyperTest::testRestoreStateFailsWithEmptyState", + &CFieldDataTyperTest::testRestoreStateFailsWithEmptyState)); return suiteOfTests; } - diff --git a/lib/api/unittest/CFieldDataTyperTest.h b/lib/api/unittest/CFieldDataTyperTest.h index 01d7b932fa..f7d8def360 100644 --- a/lib/api/unittest/CFieldDataTyperTest.h +++ b/lib/api/unittest/CFieldDataTyperTest.h @@ -10,17 +10,15 @@ #include -class CFieldDataTyperTest : public CppUnit::TestFixture -{ - public: - void testAll(); - void testNodeReverseSearch(); - void testPassOnControlMessages(); - void testHandleControlMessages(); - void testRestoreStateFailsWithEmptyState(); - - static CppUnit::Test *suite(); +class CFieldDataTyperTest : public CppUnit::TestFixture { +public: + void testAll(); + void testNodeReverseSearch(); + void testPassOnControlMessages(); + void testHandleControlMessages(); + void testRestoreStateFailsWithEmptyState(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CFieldDataTyperTest_h diff --git a/lib/api/unittest/CForecastRunnerTest.cc b/lib/api/unittest/CForecastRunnerTest.cc index 147f493937..97737534f7 100644 --- a/lib/api/unittest/CForecastRunnerTest.cc +++ b/lib/api/unittest/CForecastRunnerTest.cc @@ -18,68 +18,50 @@ #include #include -#include #include +#include -namespace -{ +namespace { -using TGenerateRecord = void (*)(ml::core_t::TTime time, - ml::api::CAnomalyJob::TStrStrUMap &dataRows); +using TGenerateRecord = void (*)(ml::core_t::TTime time, ml::api::CAnomalyJob::TStrStrUMap& dataRows); const ml::core_t::TTime START_TIME{12000000}; const ml::core_t::TTime BUCKET_LENGTH{3600}; -void generateRecord(ml::core_t::TTime time, - ml::api::CAnomalyJob::TStrStrUMap &dataRows) -{ +void generateRecord(ml::core_t::TTime time, ml::api::CAnomalyJob::TStrStrUMap& dataRows) { dataRows["time"] = ml::core::CStringUtils::typeToString(time); } -void generateRecordWithSummaryCount(ml::core_t::TTime time, - ml::api::CAnomalyJob::TStrStrUMap &dataRows) -{ +void generateRecordWithSummaryCount(ml::core_t::TTime time, ml::api::CAnomalyJob::TStrStrUMap& dataRows) { double x = static_cast(time - START_TIME) / BUCKET_LENGTH; double count = (std::sin(x / 4.0) + 1.0) * 42.0 * std::pow(1.005, x); dataRows["time"] = ml::core::CStringUtils::typeToString(time); dataRows["count"] = ml::core::CStringUtils::typeToString(count); } -void generateRecordWithStatus(ml::core_t::TTime time, - ml::api::CAnomalyJob::TStrStrUMap &dataRows) -{ +void generateRecordWithStatus(ml::core_t::TTime time, ml::api::CAnomalyJob::TStrStrUMap& dataRows) { dataRows["time"] = ml::core::CStringUtils::typeToString(time); - dataRows["status"] = (time / BUCKET_LENGTH) % 919 == 0 ? "404" : "200"; + dataRows["status"] = (time / BUCKET_LENGTH) % 919 == 0 ? "404" : "200"; } -void generatePopulationRecord(ml::core_t::TTime time, - ml::api::CAnomalyJob::TStrStrUMap &dataRows) -{ - dataRows["time"] = ml::core::CStringUtils::typeToString(time); +void generatePopulationRecord(ml::core_t::TTime time, ml::api::CAnomalyJob::TStrStrUMap& dataRows) { + dataRows["time"] = ml::core::CStringUtils::typeToString(time); dataRows["person"] = "jill"; } -void populateJob(TGenerateRecord generateRecord, - ml::api::CAnomalyJob &job, - std::size_t buckets = 1000) -{ +void populateJob(TGenerateRecord generateRecord, ml::api::CAnomalyJob& job, std::size_t buckets = 1000) { ml::core_t::TTime time = START_TIME; ml::api::CAnomalyJob::TStrStrUMap dataRows; - for (std::size_t bucket = 0u; - bucket < 2 * buckets; - ++bucket, time += (BUCKET_LENGTH / 2)) - { + for (std::size_t bucket = 0u; bucket < 2 * buckets; ++bucket, time += (BUCKET_LENGTH / 2)) { generateRecord(time, dataRows); CPPUNIT_ASSERT(job.handleRecord(dataRows)); } CPPUNIT_ASSERT_EQUAL(uint64_t(2 * buckets), job.numRecordsHandled()); } - } -void CForecastRunnerTest::testSummaryCount() -{ +void CForecastRunnerTest::testSummaryCount() { LOG_INFO("*** test forecast on summary count ***"); std::stringstream outputStrm; @@ -91,18 +73,15 @@ void CForecastRunnerTest::testSummaryCount() clauses.push_back("count"); clauses.push_back("summarycountfield=count"); fieldConfig.initFromClause(clauses); - ml::model::CAnomalyDetectorModelConfig modelConfig = - ml::model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); + ml::model::CAnomalyDetectorModelConfig modelConfig = ml::model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); ml::api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, streamWrapper); populateJob(generateRecordWithSummaryCount, job); ml::api::CAnomalyJob::TStrStrUMap dataRows; - dataRows["."] = "p{\"duration\":" + std::to_string(13 * BUCKET_LENGTH) - + ",\"forecast_id\": \"42\"" - + ",\"forecast_alias\": \"sumcount\"" - + ",\"create_time\": \"1511370819\"" - + ",\"expires_in\": \"" + std::to_string(100 * ml::core::constants::DAY) + "\" }"; + dataRows["."] = "p{\"duration\":" + std::to_string(13 * BUCKET_LENGTH) + ",\"forecast_id\": \"42\"" + + ",\"forecast_alias\": \"sumcount\"" + ",\"create_time\": \"1511370819\"" + ",\"expires_in\": \"" + + std::to_string(100 * ml::core::constants::DAY) + "\" }"; CPPUNIT_ASSERT(job.handleRecord(dataRows)); } @@ -112,17 +91,13 @@ void CForecastRunnerTest::testSummaryCount() CPPUNIT_ASSERT(doc.GetArray().Size() > 0); bool foundScheduledRecord = false; bool foundStartedRecord = false; - for (const auto &m : doc.GetArray()) - { - if (m.HasMember("model_forecast_request_stats")) - { - const rapidjson::Value &forecastStart = m["model_forecast_request_stats"]; - if (std::strcmp("scheduled", forecastStart["forecast_status"].GetString()) == 0) - { + for (const auto& m : doc.GetArray()) { + if (m.HasMember("model_forecast_request_stats")) { + const rapidjson::Value& forecastStart = m["model_forecast_request_stats"]; + if (std::strcmp("scheduled", forecastStart["forecast_status"].GetString()) == 0) { CPPUNIT_ASSERT(!foundStartedRecord); foundScheduledRecord = true; - } else if (std::strcmp("started", forecastStart["forecast_status"].GetString()) == 0) - { + } else if (std::strcmp("started", forecastStart["forecast_status"].GetString()) == 0) { CPPUNIT_ASSERT(foundScheduledRecord); foundStartedRecord = true; break; @@ -132,9 +107,9 @@ void CForecastRunnerTest::testSummaryCount() CPPUNIT_ASSERT(foundScheduledRecord); CPPUNIT_ASSERT(foundStartedRecord); - const rapidjson::Value &lastElement = doc[doc.GetArray().Size() - 1]; + const rapidjson::Value& lastElement = doc[doc.GetArray().Size() - 1]; CPPUNIT_ASSERT(lastElement.HasMember("model_forecast_request_stats")); - const rapidjson::Value &forecastStats = lastElement["model_forecast_request_stats"]; + const rapidjson::Value& forecastStats = lastElement["model_forecast_request_stats"]; CPPUNIT_ASSERT_EQUAL(std::string("42"), std::string(forecastStats["forecast_id"].GetString())); CPPUNIT_ASSERT_EQUAL(std::string("sumcount"), std::string(forecastStats["forecast_alias"].GetString())); @@ -146,11 +121,11 @@ void CForecastRunnerTest::testSummaryCount() CPPUNIT_ASSERT_EQUAL(15591600 * int64_t(1000), forecastStats["timestamp"].GetInt64()); CPPUNIT_ASSERT_EQUAL(15591600 * int64_t(1000), forecastStats["forecast_start_timestamp"].GetInt64()); CPPUNIT_ASSERT_EQUAL((15591600 + 13 * BUCKET_LENGTH) * int64_t(1000), forecastStats["forecast_end_timestamp"].GetInt64()); - CPPUNIT_ASSERT_EQUAL((1511370819 + 100 * ml::core::constants::DAY) * int64_t(1000), forecastStats["forecast_expiry_timestamp"].GetInt64()); + CPPUNIT_ASSERT_EQUAL((1511370819 + 100 * ml::core::constants::DAY) * int64_t(1000), + forecastStats["forecast_expiry_timestamp"].GetInt64()); } -void CForecastRunnerTest::testPopulation() -{ +void CForecastRunnerTest::testPopulation() { LOG_INFO("*** test forecast on population ***"); std::stringstream outputStrm; @@ -163,25 +138,23 @@ void CForecastRunnerTest::testPopulation() clauses.push_back("over"); clauses.push_back("person"); fieldConfig.initFromClause(clauses); - ml::model::CAnomalyDetectorModelConfig modelConfig = - ml::model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); + ml::model::CAnomalyDetectorModelConfig modelConfig = ml::model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); ml::api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, streamWrapper); populateJob(generatePopulationRecord, job); ml::api::CAnomalyJob::TStrStrUMap dataRows; - dataRows["."] = "p{\"duration\":" + std::to_string(13 * BUCKET_LENGTH) - + ",\"forecast_id\": \"31\"" - + ",\"create_time\": \"1511370819\" }"; + dataRows["."] = + "p{\"duration\":" + std::to_string(13 * BUCKET_LENGTH) + ",\"forecast_id\": \"31\"" + ",\"create_time\": \"1511370819\" }"; CPPUNIT_ASSERT(job.handleRecord(dataRows)); } rapidjson::Document doc; doc.Parse(outputStrm.str()); CPPUNIT_ASSERT(!doc.HasParseError()); - const rapidjson::Value &lastElement = doc[doc.GetArray().Size() - 1]; + const rapidjson::Value& lastElement = doc[doc.GetArray().Size() - 1]; CPPUNIT_ASSERT(lastElement.HasMember("model_forecast_request_stats")); - const rapidjson::Value &forecastStats = lastElement["model_forecast_request_stats"]; + const rapidjson::Value& forecastStats = lastElement["model_forecast_request_stats"]; CPPUNIT_ASSERT(!doc.HasParseError()); CPPUNIT_ASSERT_EQUAL(std::string("31"), std::string(forecastStats["forecast_id"].GetString())); @@ -189,11 +162,11 @@ void CForecastRunnerTest::testPopulation() CPPUNIT_ASSERT_EQUAL(std::string("failed"), std::string(forecastStats["forecast_status"].GetString())); CPPUNIT_ASSERT_EQUAL(ml::api::CForecastRunner::ERROR_NOT_SUPPORTED_FOR_POPULATION_MODELS, std::string(forecastStats["forecast_messages"].GetArray()[0].GetString())); - CPPUNIT_ASSERT_EQUAL((1511370819 + 14 * ml::core::constants::DAY) * int64_t(1000), forecastStats["forecast_expiry_timestamp"].GetInt64()); + CPPUNIT_ASSERT_EQUAL((1511370819 + 14 * ml::core::constants::DAY) * int64_t(1000), + forecastStats["forecast_expiry_timestamp"].GetInt64()); } -void CForecastRunnerTest::testRare() -{ +void CForecastRunnerTest::testRare() { LOG_INFO("*** test forecast on rare ***"); std::stringstream outputStrm; @@ -207,25 +180,22 @@ void CForecastRunnerTest::testRare() clauses.push_back("status"); fieldConfig.initFromClause(clauses); - ml::model::CAnomalyDetectorModelConfig modelConfig = - ml::model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); + ml::model::CAnomalyDetectorModelConfig modelConfig = ml::model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); ml::api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, streamWrapper); populateJob(generateRecordWithStatus, job, 5000); ml::api::CAnomalyJob::TStrStrUMap dataRows; - dataRows["."] = "p{\"duration\":" + std::to_string(13 * BUCKET_LENGTH) - + ",\"forecast_id\": \"42\"" - + ",\"create_time\": \"1511370819\"" - + ",\"expires_in\": \"8640000\" }"; + dataRows["."] = "p{\"duration\":" + std::to_string(13 * BUCKET_LENGTH) + ",\"forecast_id\": \"42\"" + + ",\"create_time\": \"1511370819\"" + ",\"expires_in\": \"8640000\" }"; CPPUNIT_ASSERT(job.handleRecord(dataRows)); } rapidjson::Document doc; doc.Parse(outputStrm.str()); CPPUNIT_ASSERT(!doc.HasParseError()); - const rapidjson::Value &lastElement = doc[doc.GetArray().Size() - 1]; + const rapidjson::Value& lastElement = doc[doc.GetArray().Size() - 1]; CPPUNIT_ASSERT(lastElement.HasMember("model_forecast_request_stats")); - const rapidjson::Value &forecastStats = lastElement["model_forecast_request_stats"]; + const rapidjson::Value& forecastStats = lastElement["model_forecast_request_stats"]; CPPUNIT_ASSERT(!doc.HasParseError()); CPPUNIT_ASSERT_EQUAL(std::string("42"), std::string(forecastStats["forecast_id"].GetString())); @@ -233,11 +203,11 @@ void CForecastRunnerTest::testRare() CPPUNIT_ASSERT_EQUAL(std::string("failed"), std::string(forecastStats["forecast_status"].GetString())); CPPUNIT_ASSERT_EQUAL(ml::api::CForecastRunner::ERROR_NO_SUPPORTED_FUNCTIONS, std::string(forecastStats["forecast_messages"].GetArray()[0].GetString())); - CPPUNIT_ASSERT_EQUAL((1511370819 + 14 * ml::core::constants::DAY) * int64_t(1000), forecastStats["forecast_expiry_timestamp"].GetInt64()); + CPPUNIT_ASSERT_EQUAL((1511370819 + 14 * ml::core::constants::DAY) * int64_t(1000), + forecastStats["forecast_expiry_timestamp"].GetInt64()); } -void CForecastRunnerTest::testInsufficientData() -{ +void CForecastRunnerTest::testInsufficientData() { LOG_INFO("*** test insufficient data ***"); std::stringstream outputStrm; @@ -248,41 +218,38 @@ void CForecastRunnerTest::testInsufficientData() ml::api::CFieldConfig::TStrVec clauses; clauses.push_back("count"); fieldConfig.initFromClause(clauses); - ml::model::CAnomalyDetectorModelConfig modelConfig = - ml::model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); + ml::model::CAnomalyDetectorModelConfig modelConfig = ml::model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); ml::api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, streamWrapper); populateJob(generateRecord, job, 3); ml::api::CAnomalyJob::TStrStrUMap dataRows; - dataRows["."] = "p{\"duration\":" + std::to_string(13 * BUCKET_LENGTH) - + ",\"forecast_id\": \"31\"" - + ",\"create_time\": \"1511370819\" }"; + dataRows["."] = + "p{\"duration\":" + std::to_string(13 * BUCKET_LENGTH) + ",\"forecast_id\": \"31\"" + ",\"create_time\": \"1511370819\" }"; CPPUNIT_ASSERT(job.handleRecord(dataRows)); } rapidjson::Document doc; doc.Parse(outputStrm.str()); CPPUNIT_ASSERT(!doc.HasParseError()); - const rapidjson::Value &lastElement = doc[doc.GetArray().Size() - 1]; + const rapidjson::Value& lastElement = doc[doc.GetArray().Size() - 1]; CPPUNIT_ASSERT(lastElement.HasMember("model_forecast_request_stats")); - const rapidjson::Value &forecastStats = lastElement["model_forecast_request_stats"]; + const rapidjson::Value& forecastStats = lastElement["model_forecast_request_stats"]; CPPUNIT_ASSERT(!doc.HasParseError()); CPPUNIT_ASSERT_EQUAL(std::string("31"), std::string(forecastStats["forecast_id"].GetString())); CPPUNIT_ASSERT_EQUAL(std::string("finished"), std::string(forecastStats["forecast_status"].GetString())); CPPUNIT_ASSERT_EQUAL(ml::api::CForecastRunner::INFO_NO_MODELS_CAN_CURRENTLY_BE_FORECAST, std::string(forecastStats["forecast_messages"].GetArray()[0].GetString())); - CPPUNIT_ASSERT_EQUAL((1511370819 + 14 * ml::core::constants::DAY) * int64_t(1000), forecastStats["forecast_expiry_timestamp"].GetInt64()); + CPPUNIT_ASSERT_EQUAL((1511370819 + 14 * ml::core::constants::DAY) * int64_t(1000), + forecastStats["forecast_expiry_timestamp"].GetInt64()); } -void CForecastRunnerTest::testValidateDuration() -{ +void CForecastRunnerTest::testValidateDuration() { ml::api::CForecastRunner::SForecast forecastJob; - std::string message ("p{\"duration\":" + std::to_string(10 * ml::core::constants::WEEK) + - ",\"forecast_id\": \"42\"" + - ",\"create_time\": \"1511370819\" }"); + std::string message("p{\"duration\":" + std::to_string(10 * ml::core::constants::WEEK) + ",\"forecast_id\": \"42\"" + + ",\"create_time\": \"1511370819\" }"); CPPUNIT_ASSERT(ml::api::CForecastRunner::parseAndValidateForecastRequest(message, forecastJob, 1400000000)); CPPUNIT_ASSERT_EQUAL(8 * ml::core::constants::WEEK, forecastJob.s_Duration); @@ -290,35 +257,28 @@ void CForecastRunnerTest::testValidateDuration() CPPUNIT_ASSERT_EQUAL(ml::api::CForecastRunner::WARNING_DURATION_LIMIT, *forecastJob.s_Messages.begin()); } -void CForecastRunnerTest::testValidateDefaultExpiry() -{ +void CForecastRunnerTest::testValidateDefaultExpiry() { ml::api::CForecastRunner::SForecast forecastJob; - std::string message ("p{\"duration\":" + std::to_string(2 * ml::core::constants::WEEK) + - ",\"forecast_id\": \"42\"" + - ",\"create_time\": \"1511370819\" }"); + std::string message("p{\"duration\":" + std::to_string(2 * ml::core::constants::WEEK) + ",\"forecast_id\": \"42\"" + + ",\"create_time\": \"1511370819\" }"); CPPUNIT_ASSERT(ml::api::CForecastRunner::parseAndValidateForecastRequest(message, forecastJob, 1400000000)); CPPUNIT_ASSERT_EQUAL(2 * ml::core::constants::WEEK, forecastJob.s_Duration); CPPUNIT_ASSERT_EQUAL(14 * ml::core::constants::DAY + 1511370819, forecastJob.s_ExpiryTime); - std::string message2 ("p{\"duration\":" + std::to_string(2 * ml::core::constants::WEEK) + - ",\"forecast_id\": \"42\"" + - ",\"create_time\": \"1511370819\"" + - ",\"expires_in\": -1 }"); + std::string message2("p{\"duration\":" + std::to_string(2 * ml::core::constants::WEEK) + ",\"forecast_id\": \"42\"" + + ",\"create_time\": \"1511370819\"" + ",\"expires_in\": -1 }"); CPPUNIT_ASSERT(ml::api::CForecastRunner::parseAndValidateForecastRequest(message2, forecastJob, 1400000000)); CPPUNIT_ASSERT_EQUAL(2 * ml::core::constants::WEEK, forecastJob.s_Duration); CPPUNIT_ASSERT_EQUAL(14 * ml::core::constants::DAY + 1511370819, forecastJob.s_ExpiryTime); } -void CForecastRunnerTest::testValidateNoExpiry() -{ +void CForecastRunnerTest::testValidateNoExpiry() { ml::api::CForecastRunner::SForecast forecastJob; - std::string message ("p{\"duration\":" + std::to_string(3 * ml::core::constants::WEEK) + - ",\"forecast_id\": \"42\"" + - ",\"create_time\": \"1511370819\"" + - ",\"expires_in\": 0 }"); + std::string message("p{\"duration\":" + std::to_string(3 * ml::core::constants::WEEK) + ",\"forecast_id\": \"42\"" + + ",\"create_time\": \"1511370819\"" + ",\"expires_in\": 0 }"); CPPUNIT_ASSERT(ml::api::CForecastRunner::parseAndValidateForecastRequest(message, forecastJob, 1400000000)); CPPUNIT_ASSERT_EQUAL(3 * ml::core::constants::WEEK, forecastJob.s_Duration); @@ -326,73 +286,55 @@ void CForecastRunnerTest::testValidateNoExpiry() CPPUNIT_ASSERT_EQUAL(forecastJob.s_CreateTime, forecastJob.s_ExpiryTime); } -void CForecastRunnerTest::testValidateInvalidExpiry() -{ +void CForecastRunnerTest::testValidateInvalidExpiry() { ml::api::CForecastRunner::SForecast forecastJob; - std::string message ("p{\"duration\":" + std::to_string(3 * ml::core::constants::WEEK) + - ",\"forecast_id\": \"42\"" + - ",\"create_time\": \"1511370819\""+ - ",\"expires_in\": -244 }"); + std::string message("p{\"duration\":" + std::to_string(3 * ml::core::constants::WEEK) + ",\"forecast_id\": \"42\"" + + ",\"create_time\": \"1511370819\"" + ",\"expires_in\": -244 }"); CPPUNIT_ASSERT(ml::api::CForecastRunner::parseAndValidateForecastRequest(message, forecastJob, 1400000000)); CPPUNIT_ASSERT_EQUAL(3 * ml::core::constants::WEEK, forecastJob.s_Duration); CPPUNIT_ASSERT_EQUAL(14 * ml::core::constants::DAY + 1511370819, forecastJob.s_ExpiryTime); } -void CForecastRunnerTest::testValidateBrokenMessage() -{ +void CForecastRunnerTest::testValidateBrokenMessage() { ml::api::CForecastRunner::SForecast forecastJob; - std::string message ("p{\"dura"); + std::string message("p{\"dura"); CPPUNIT_ASSERT(ml::api::CForecastRunner::parseAndValidateForecastRequest(message, forecastJob, 1400000000) == false); } -void CForecastRunnerTest::testValidateMissingId() -{ +void CForecastRunnerTest::testValidateMissingId() { ml::api::CForecastRunner::SForecast forecastJob; - std::string message ("p{\"duration\":" + std::to_string(3 * ml::core::constants::WEEK) + - ",\"create_time\": \"1511370819\"}"); + std::string message("p{\"duration\":" + std::to_string(3 * ml::core::constants::WEEK) + ",\"create_time\": \"1511370819\"}"); CPPUNIT_ASSERT(ml::api::CForecastRunner::parseAndValidateForecastRequest(message, forecastJob, 1400000000) == false); } -CppUnit::Test *CForecastRunnerTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CForecastRunnerTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CForecastRunnerTest::testSummaryCount", - &CForecastRunnerTest::testSummaryCount) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CForecastRunnerTest::testPopulation", - &CForecastRunnerTest::testPopulation) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CForecastRunnerTest::testRare", - &CForecastRunnerTest::testRare) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CForecastRunnerTest::testInsufficientData", - &CForecastRunnerTest::testInsufficientData) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CForecastRunnerTest::testValidateDuration", - &CForecastRunnerTest::testValidateDuration) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CForecastRunnerTest::testValidateExpiry", - &CForecastRunnerTest::testValidateDefaultExpiry) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CForecastRunnerTest::testValidateNoExpiry", - &CForecastRunnerTest::testValidateNoExpiry) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CForecastRunnerTest::testValidateInvalidExpiry", - &CForecastRunnerTest::testValidateInvalidExpiry) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CForecastRunnerTest::testBrokenMessage", - &CForecastRunnerTest::testValidateBrokenMessage) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CForecastRunnerTest::testMissingId", - &CForecastRunnerTest::testValidateMissingId) ); +CppUnit::Test* CForecastRunnerTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CForecastRunnerTest"); + + suiteOfTests->addTest( + new CppUnit::TestCaller("CForecastRunnerTest::testSummaryCount", &CForecastRunnerTest::testSummaryCount)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CForecastRunnerTest::testPopulation", &CForecastRunnerTest::testPopulation)); + suiteOfTests->addTest(new CppUnit::TestCaller("CForecastRunnerTest::testRare", &CForecastRunnerTest::testRare)); + suiteOfTests->addTest(new CppUnit::TestCaller("CForecastRunnerTest::testInsufficientData", + &CForecastRunnerTest::testInsufficientData)); + suiteOfTests->addTest(new CppUnit::TestCaller("CForecastRunnerTest::testValidateDuration", + &CForecastRunnerTest::testValidateDuration)); + suiteOfTests->addTest(new CppUnit::TestCaller("CForecastRunnerTest::testValidateExpiry", + &CForecastRunnerTest::testValidateDefaultExpiry)); + suiteOfTests->addTest(new CppUnit::TestCaller("CForecastRunnerTest::testValidateNoExpiry", + &CForecastRunnerTest::testValidateNoExpiry)); + suiteOfTests->addTest(new CppUnit::TestCaller("CForecastRunnerTest::testValidateInvalidExpiry", + &CForecastRunnerTest::testValidateInvalidExpiry)); + suiteOfTests->addTest(new CppUnit::TestCaller("CForecastRunnerTest::testBrokenMessage", + &CForecastRunnerTest::testValidateBrokenMessage)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CForecastRunnerTest::testMissingId", &CForecastRunnerTest::testValidateMissingId)); return suiteOfTests; } diff --git a/lib/api/unittest/CForecastRunnerTest.h b/lib/api/unittest/CForecastRunnerTest.h index 0dac6c04a8..1c1bb24572 100644 --- a/lib/api/unittest/CForecastRunnerTest.h +++ b/lib/api/unittest/CForecastRunnerTest.h @@ -13,22 +13,20 @@ //! //! DESCRIPTION:\n //! A couple of module tests of forecast including regression tests -class CForecastRunnerTest : public CppUnit::TestFixture -{ - public: - void testSummaryCount(); - void testPopulation(); - void testRare(); - void testInsufficientData(); - void testValidateDuration(); - void testValidateDefaultExpiry(); - void testValidateNoExpiry(); - void testValidateInvalidExpiry(); - void testValidateBrokenMessage(); - void testValidateMissingId(); +class CForecastRunnerTest : public CppUnit::TestFixture { +public: + void testSummaryCount(); + void testPopulation(); + void testRare(); + void testInsufficientData(); + void testValidateDuration(); + void testValidateDefaultExpiry(); + void testValidateNoExpiry(); + void testValidateInvalidExpiry(); + void testValidateBrokenMessage(); + void testValidateMissingId(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; - #endif // INCLUDED_CForecastRunnerTest_h diff --git a/lib/api/unittest/CIoManagerTest.cc b/lib/api/unittest/CIoManagerTest.cc index bf4f6b2e10..8d87d6c475 100644 --- a/lib/api/unittest/CIoManagerTest.cc +++ b/lib/api/unittest/CIoManagerTest.cc @@ -18,184 +18,142 @@ #include - -namespace -{ +namespace { const uint32_t SLEEP_TIME_MS = 100; const uint32_t PAUSE_TIME_MS = 10; -const size_t MAX_ATTEMPTS = 100; -const size_t TEST_SIZE = 10000; -const char TEST_CHAR = 'a'; -const char *GOOD_INPUT_FILE_NAME = "testfiles/good_input_file"; -const char *GOOD_OUTPUT_FILE_NAME = "testfiles/good_output_file"; +const size_t MAX_ATTEMPTS = 100; +const size_t TEST_SIZE = 10000; +const char TEST_CHAR = 'a'; +const char* GOOD_INPUT_FILE_NAME = "testfiles/good_input_file"; +const char* GOOD_OUTPUT_FILE_NAME = "testfiles/good_output_file"; #ifdef Windows -const char *GOOD_INPUT_PIPE_NAME = "\\\\.\\pipe\\good_input_pipe"; -const char *GOOD_OUTPUT_PIPE_NAME = "\\\\.\\pipe\\good_output_pipe"; +const char* GOOD_INPUT_PIPE_NAME = "\\\\.\\pipe\\good_input_pipe"; +const char* GOOD_OUTPUT_PIPE_NAME = "\\\\.\\pipe\\good_output_pipe"; #else -const char *GOOD_INPUT_PIPE_NAME = "testfiles/good_input_pipe"; -const char *GOOD_OUTPUT_PIPE_NAME = "testfiles/good_output_pipe"; +const char* GOOD_INPUT_PIPE_NAME = "testfiles/good_input_pipe"; +const char* GOOD_OUTPUT_PIPE_NAME = "testfiles/good_output_pipe"; #endif -const char *BAD_INPUT_FILE_NAME = "can't_create_a_file_here/bad_input_file"; -const char *BAD_OUTPUT_FILE_NAME = "can't_create_a_file_here/bad_output_file"; -const char *BAD_INPUT_PIPE_NAME = "can't_create_a_pipe_here/bad_input_pipe"; -const char *BAD_OUTPUT_PIPE_NAME = "can't_create_a_pipe_here/bad_output_pipe"; - -class CThreadDataWriter : public ml::core::CThread -{ - public: - CThreadDataWriter(const std::string &fileName, size_t size) - : m_FileName(fileName), - m_Size(size) - { +const char* BAD_INPUT_FILE_NAME = "can't_create_a_file_here/bad_input_file"; +const char* BAD_OUTPUT_FILE_NAME = "can't_create_a_file_here/bad_output_file"; +const char* BAD_INPUT_PIPE_NAME = "can't_create_a_pipe_here/bad_input_pipe"; +const char* BAD_OUTPUT_PIPE_NAME = "can't_create_a_pipe_here/bad_output_pipe"; + +class CThreadDataWriter : public ml::core::CThread { +public: + CThreadDataWriter(const std::string& fileName, size_t size) : m_FileName(fileName), m_Size(size) {} + +protected: + virtual void run() { + // Wait for the file to exist + ml::core::CSleep::sleep(SLEEP_TIME_MS); + + std::ofstream strm(m_FileName.c_str()); + for (size_t i = 0; i < m_Size && strm.good(); ++i) { + strm << TEST_CHAR; } + } - protected: - virtual void run() - { - // Wait for the file to exist - ml::core::CSleep::sleep(SLEEP_TIME_MS); + virtual void shutdown() {} - std::ofstream strm(m_FileName.c_str()); - for (size_t i = 0; i < m_Size && strm.good(); ++i) - { - strm << TEST_CHAR; - } - } +private: + std::string m_FileName; + size_t m_Size; +}; - virtual void shutdown() - { - } +class CThreadDataReader : public ml::core::CThread { +public: + CThreadDataReader(const std::string& fileName) : m_FileName(fileName), m_Shutdown(false) {} - private: - std::string m_FileName; - size_t m_Size; -}; + const std::string& data() const { + // The memory barriers associated with the mutex lock should ensure + // the thread calling this method has as up-to-date view of m_Data's + // member variables as the thread that updated it + ml::core::CScopedLock lock(m_Mutex); + return m_Data; + } -class CThreadDataReader : public ml::core::CThread -{ - public: - CThreadDataReader(const std::string &fileName) - : m_FileName(fileName), - m_Shutdown(false) - { - } +protected: + virtual void run() { + m_Data.clear(); - const std::string &data() const - { - // The memory barriers associated with the mutex lock should ensure - // the thread calling this method has as up-to-date view of m_Data's - // member variables as the thread that updated it - ml::core::CScopedLock lock(m_Mutex); - return m_Data; - } + std::ifstream strm; - protected: - virtual void run() - { - m_Data.clear(); - - std::ifstream strm; - - // Try to open the file repeatedly to allow time for the other - // thread to create it - size_t attempt(1); - do - { - if (m_Shutdown) - { - return; - } - CPPUNIT_ASSERT(attempt++ <= MAX_ATTEMPTS); - ml::core::CSleep::sleep(PAUSE_TIME_MS); - strm.open(m_FileName.c_str()); + // Try to open the file repeatedly to allow time for the other + // thread to create it + size_t attempt(1); + do { + if (m_Shutdown) { + return; } - while (!strm.is_open()); - - static const std::streamsize BUF_SIZE = 512; - char buffer[BUF_SIZE]; - while (strm.good()) - { - if (m_Shutdown) - { - return; + CPPUNIT_ASSERT(attempt++ <= MAX_ATTEMPTS); + ml::core::CSleep::sleep(PAUSE_TIME_MS); + strm.open(m_FileName.c_str()); + } while (!strm.is_open()); + + static const std::streamsize BUF_SIZE = 512; + char buffer[BUF_SIZE]; + while (strm.good()) { + if (m_Shutdown) { + return; + } + strm.read(buffer, BUF_SIZE); + CPPUNIT_ASSERT(!strm.bad()); + if (strm.gcount() > 0) { + ml::core::CScopedLock lock(m_Mutex); + // This code deals with the test character we write to + // detect the short-lived connection problem on Windows + const char* copyFrom = buffer; + size_t copyLen = static_cast(strm.gcount()); + if (m_Data.empty() && *buffer == ml::core::CNamedPipeFactory::TEST_CHAR) { + ++copyFrom; + --copyLen; } - strm.read(buffer, BUF_SIZE); - CPPUNIT_ASSERT(!strm.bad()); - if (strm.gcount() > 0) - { - ml::core::CScopedLock lock(m_Mutex); - // This code deals with the test character we write to - // detect the short-lived connection problem on Windows - const char *copyFrom = buffer; - size_t copyLen = static_cast(strm.gcount()); - if (m_Data.empty() && - *buffer == ml::core::CNamedPipeFactory::TEST_CHAR) - { - ++copyFrom; - --copyLen; - } - if (copyLen > 0) - { - m_Data.append(copyFrom, copyLen); - } + if (copyLen > 0) { + m_Data.append(copyFrom, copyLen); } } } + } - virtual void shutdown() - { - m_Shutdown = true; - } + virtual void shutdown() { m_Shutdown = true; } - private: - mutable ml::core::CMutex m_Mutex; - std::string m_FileName; - std::string m_Data; - volatile bool m_Shutdown; +private: + mutable ml::core::CMutex m_Mutex; + std::string m_FileName; + std::string m_Data; + volatile bool m_Shutdown; }; - } -CppUnit::Test *CIoManagerTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CIoManagerTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CIoManagerTest::testStdinStdout", - &CIoManagerTest::testStdinStdout) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CIoManagerTest::testFileIoGood", - &CIoManagerTest::testFileIoGood) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CIoManagerTest::testFileIoBad", - &CIoManagerTest::testFileIoBad) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CIoManagerTest::testNamedPipeIoGood", - &CIoManagerTest::testNamedPipeIoGood) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CIoManagerTest::testNamedPipeIoBad", - &CIoManagerTest::testNamedPipeIoBad) ); +CppUnit::Test* CIoManagerTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CIoManagerTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CIoManagerTest::testStdinStdout", &CIoManagerTest::testStdinStdout)); + suiteOfTests->addTest(new CppUnit::TestCaller("CIoManagerTest::testFileIoGood", &CIoManagerTest::testFileIoGood)); + suiteOfTests->addTest(new CppUnit::TestCaller("CIoManagerTest::testFileIoBad", &CIoManagerTest::testFileIoBad)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CIoManagerTest::testNamedPipeIoGood", &CIoManagerTest::testNamedPipeIoGood)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CIoManagerTest::testNamedPipeIoBad", &CIoManagerTest::testNamedPipeIoBad)); return suiteOfTests; } -void CIoManagerTest::testStdinStdout() -{ +void CIoManagerTest::testStdinStdout() { ml::api::CIoManager ioMgr("", false, "", false); CPPUNIT_ASSERT(ioMgr.initIo()); // Assign to a different pointer in case of "this" pointer manipulation due // to multiple inheritance - std::istream *cinAsIStream = &std::cin; + std::istream* cinAsIStream = &std::cin; CPPUNIT_ASSERT_EQUAL(cinAsIStream, &ioMgr.inputStream()); - std::ostream *coutAsIStream = &std::cout; + std::ostream* coutAsIStream = &std::cout; CPPUNIT_ASSERT_EQUAL(coutAsIStream, &ioMgr.outputStream()); } -void CIoManagerTest::testFileIoGood() -{ +void CIoManagerTest::testFileIoGood() { // Remove output file that possibly might have been left behind by a // previous failed test - ignore the error code from this call though as // it'll generally fail @@ -207,56 +165,36 @@ void CIoManagerTest::testFileIoGood() strm << std::string(TEST_SIZE, TEST_CHAR); strm.close(); - this->testCommon(GOOD_INPUT_FILE_NAME, - false, - GOOD_OUTPUT_FILE_NAME, - false, - true); + this->testCommon(GOOD_INPUT_FILE_NAME, false, GOOD_OUTPUT_FILE_NAME, false, true); CPPUNIT_ASSERT_EQUAL(0, ::remove(GOOD_INPUT_FILE_NAME)); CPPUNIT_ASSERT_EQUAL(0, ::remove(GOOD_OUTPUT_FILE_NAME)); } -void CIoManagerTest::testFileIoBad() -{ - this->testCommon(BAD_INPUT_FILE_NAME, - false, - BAD_OUTPUT_FILE_NAME, - false, - false); +void CIoManagerTest::testFileIoBad() { + this->testCommon(BAD_INPUT_FILE_NAME, false, BAD_OUTPUT_FILE_NAME, false, false); } -void CIoManagerTest::testNamedPipeIoGood() -{ +void CIoManagerTest::testNamedPipeIoGood() { // For the named pipe test, data needs to be written to the IO manager's // input pipe after the IO manager has started CThreadDataWriter threadWriter(GOOD_INPUT_PIPE_NAME, TEST_SIZE); CPPUNIT_ASSERT(threadWriter.start()); - this->testCommon(GOOD_INPUT_PIPE_NAME, - true, - GOOD_OUTPUT_PIPE_NAME, - true, - true); + this->testCommon(GOOD_INPUT_PIPE_NAME, true, GOOD_OUTPUT_PIPE_NAME, true, true); CPPUNIT_ASSERT(threadWriter.stop()); } -void CIoManagerTest::testNamedPipeIoBad() -{ - this->testCommon(BAD_INPUT_PIPE_NAME, - true, - BAD_OUTPUT_PIPE_NAME, - true, - false); +void CIoManagerTest::testNamedPipeIoBad() { + this->testCommon(BAD_INPUT_PIPE_NAME, true, BAD_OUTPUT_PIPE_NAME, true, false); } -void CIoManagerTest::testCommon(const std::string &inputFileName, +void CIoManagerTest::testCommon(const std::string& inputFileName, bool isInputFileNamedPipe, - const std::string &outputFileName, + const std::string& outputFileName, bool isOutputFileNamedPipe, - bool isGood) -{ + bool isGood) { // Test reader reads from the IO manager's output stream. CThreadDataReader threadReader(outputFileName); CPPUNIT_ASSERT(threadReader.start()); @@ -264,43 +202,32 @@ void CIoManagerTest::testCommon(const std::string &inputFileName, std::string processedData; { - ml::api::CIoManager ioMgr(inputFileName, - isInputFileNamedPipe, - outputFileName, - isOutputFileNamedPipe); + ml::api::CIoManager ioMgr(inputFileName, isInputFileNamedPipe, outputFileName, isOutputFileNamedPipe); CPPUNIT_ASSERT_EQUAL(isGood, ioMgr.initIo()); - if (isGood) - { + if (isGood) { static const std::streamsize BUF_SIZE = 512; char buffer[BUF_SIZE]; - do - { + do { ioMgr.inputStream().read(buffer, BUF_SIZE); CPPUNIT_ASSERT(!ioMgr.inputStream().bad()); - if (ioMgr.inputStream().gcount() > 0) - { + if (ioMgr.inputStream().gcount() > 0) { processedData.append(buffer, static_cast(ioMgr.inputStream().gcount())); } CPPUNIT_ASSERT(!ioMgr.outputStream().bad()); ioMgr.outputStream().write(buffer, static_cast(ioMgr.inputStream().gcount())); - } - while (!ioMgr.inputStream().eof()); + } while (!ioMgr.inputStream().eof()); CPPUNIT_ASSERT(!ioMgr.outputStream().bad()); } } - if (isGood) - { + if (isGood) { CPPUNIT_ASSERT(threadReader.waitForFinish()); CPPUNIT_ASSERT_EQUAL(TEST_SIZE, processedData.length()); CPPUNIT_ASSERT_EQUAL(std::string(TEST_SIZE, TEST_CHAR), processedData); CPPUNIT_ASSERT_EQUAL(processedData.length(), threadReader.data().length()); CPPUNIT_ASSERT_EQUAL(processedData, threadReader.data()); - } - else - { + } else { CPPUNIT_ASSERT(threadReader.stop()); CPPUNIT_ASSERT(processedData.empty()); } } - diff --git a/lib/api/unittest/CIoManagerTest.h b/lib/api/unittest/CIoManagerTest.h index 0526024cde..eaaa706e00 100644 --- a/lib/api/unittest/CIoManagerTest.h +++ b/lib/api/unittest/CIoManagerTest.h @@ -8,24 +8,22 @@ #include -class CIoManagerTest : public CppUnit::TestFixture -{ - public: - void testStdinStdout(); - void testFileIoGood(); - void testFileIoBad(); - void testNamedPipeIoGood(); - void testNamedPipeIoBad(); +class CIoManagerTest : public CppUnit::TestFixture { +public: + void testStdinStdout(); + void testFileIoGood(); + void testFileIoBad(); + void testNamedPipeIoGood(); + void testNamedPipeIoBad(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); - private: - void testCommon(const std::string &inputFileName, - bool isInputFileNamedPipe, - const std::string &outputFileName, - bool isOutputFileNamedPipe, - bool isGood); +private: + void testCommon(const std::string& inputFileName, + bool isInputFileNamedPipe, + const std::string& outputFileName, + bool isOutputFileNamedPipe, + bool isGood); }; #endif // INCLUDED_CIoManagerTest_h - diff --git a/lib/api/unittest/CJsonOutputWriterTest.cc b/lib/api/unittest/CJsonOutputWriterTest.cc index 5c5e7700bb..b73c8df045 100644 --- a/lib/api/unittest/CJsonOutputWriterTest.cc +++ b/lib/api/unittest/CJsonOutputWriterTest.cc @@ -10,8 +10,8 @@ #include #include #include -#include #include +#include #include #include @@ -36,65 +36,46 @@ using TDouble1Vec = ml::core::CSmallVector; using TStr1Vec = ml::core::CSmallVector; const TStr1Vec EMPTY_STRING_LIST; -CppUnit::Test *CJsonOutputWriterTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CJsonOutputWriterTest"); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CJsonOutputWriterTest::testSimpleWrite", - &CJsonOutputWriterTest::testSimpleWrite) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CJsonOutputWriterTest::testWriteNonAnomalousBucket", - &CJsonOutputWriterTest::testWriteNonAnomalousBucket) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CJsonOutputWriterTest::testBucketWrite", - &CJsonOutputWriterTest::testBucketWrite) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CJsonOutputWriterTest::testBucketWriteInterim", - &CJsonOutputWriterTest::testBucketWriteInterim) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CJsonOutputWriterTest::testLimitedRecordsWrite", - &CJsonOutputWriterTest::testLimitedRecordsWrite) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CJsonOutputWriterTest::testLimitedRecordsWriteInterim", - &CJsonOutputWriterTest::testLimitedRecordsWriteInterim) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CJsonOutputWriterTest::testFlush", - &CJsonOutputWriterTest::testFlush) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CJsonOutputWriterTest::testWriteCategoryDefinition", - &CJsonOutputWriterTest::testWriteCategoryDefinition) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CJsonOutputWriterTest::testWriteWithInfluences", - &CJsonOutputWriterTest::testWriteWithInfluences) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CJsonOutputWriterTest::testWriteInfluencers", - &CJsonOutputWriterTest::testWriteInfluencers) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CJsonOutputWriterTest::testWriteInfluencersWithLimit", - &CJsonOutputWriterTest::testWriteInfluencersWithLimit) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CJsonOutputWriterTest::testPersistNormalizer", - &CJsonOutputWriterTest::testPersistNormalizer) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CJsonOutputWriterTest::testPartitionScores", - &CJsonOutputWriterTest::testPartitionScores) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CJsonOutputWriterTest::testReportMemoryUsage", - &CJsonOutputWriterTest::testReportMemoryUsage) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CJsonOutputWriterTest::testWriteScheduledEvent", - &CJsonOutputWriterTest::testWriteScheduledEvent) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CJsonOutputWriterTest::testThroughputWithScopedAllocator", - &CJsonOutputWriterTest::testThroughputWithScopedAllocator) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CJsonOutputWriterTest::testThroughputWithoutScopedAllocator", - &CJsonOutputWriterTest::testThroughputWithoutScopedAllocator) ); +CppUnit::Test* CJsonOutputWriterTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CJsonOutputWriterTest"); + suiteOfTests->addTest( + new CppUnit::TestCaller("CJsonOutputWriterTest::testSimpleWrite", &CJsonOutputWriterTest::testSimpleWrite)); + suiteOfTests->addTest(new CppUnit::TestCaller("CJsonOutputWriterTest::testWriteNonAnomalousBucket", + &CJsonOutputWriterTest::testWriteNonAnomalousBucket)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CJsonOutputWriterTest::testBucketWrite", &CJsonOutputWriterTest::testBucketWrite)); + suiteOfTests->addTest(new CppUnit::TestCaller("CJsonOutputWriterTest::testBucketWriteInterim", + &CJsonOutputWriterTest::testBucketWriteInterim)); + suiteOfTests->addTest(new CppUnit::TestCaller("CJsonOutputWriterTest::testLimitedRecordsWrite", + &CJsonOutputWriterTest::testLimitedRecordsWrite)); + suiteOfTests->addTest(new CppUnit::TestCaller("CJsonOutputWriterTest::testLimitedRecordsWriteInterim", + &CJsonOutputWriterTest::testLimitedRecordsWriteInterim)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CJsonOutputWriterTest::testFlush", &CJsonOutputWriterTest::testFlush)); + suiteOfTests->addTest(new CppUnit::TestCaller("CJsonOutputWriterTest::testWriteCategoryDefinition", + &CJsonOutputWriterTest::testWriteCategoryDefinition)); + suiteOfTests->addTest(new CppUnit::TestCaller("CJsonOutputWriterTest::testWriteWithInfluences", + &CJsonOutputWriterTest::testWriteWithInfluences)); + suiteOfTests->addTest(new CppUnit::TestCaller("CJsonOutputWriterTest::testWriteInfluencers", + &CJsonOutputWriterTest::testWriteInfluencers)); + suiteOfTests->addTest(new CppUnit::TestCaller("CJsonOutputWriterTest::testWriteInfluencersWithLimit", + &CJsonOutputWriterTest::testWriteInfluencersWithLimit)); + suiteOfTests->addTest(new CppUnit::TestCaller("CJsonOutputWriterTest::testPersistNormalizer", + &CJsonOutputWriterTest::testPersistNormalizer)); + suiteOfTests->addTest(new CppUnit::TestCaller("CJsonOutputWriterTest::testPartitionScores", + &CJsonOutputWriterTest::testPartitionScores)); + suiteOfTests->addTest(new CppUnit::TestCaller("CJsonOutputWriterTest::testReportMemoryUsage", + &CJsonOutputWriterTest::testReportMemoryUsage)); + suiteOfTests->addTest(new CppUnit::TestCaller("CJsonOutputWriterTest::testWriteScheduledEvent", + &CJsonOutputWriterTest::testWriteScheduledEvent)); + suiteOfTests->addTest(new CppUnit::TestCaller("CJsonOutputWriterTest::testThroughputWithScopedAllocator", + &CJsonOutputWriterTest::testThroughputWithScopedAllocator)); + suiteOfTests->addTest(new CppUnit::TestCaller("CJsonOutputWriterTest::testThroughputWithoutScopedAllocator", + &CJsonOutputWriterTest::testThroughputWithoutScopedAllocator)); return suiteOfTests; } -void CJsonOutputWriterTest::testSimpleWrite() -{ +void CJsonOutputWriterTest::testSimpleWrite() { // Data isn't grouped by bucket/detector record it // is written straight through and everything is a string ml::api::CJsonOutputWriter::TStrStrUMap dataFields; @@ -113,7 +94,7 @@ void CJsonOutputWriterTest::testSimpleWrite() // The output writer won't close the JSON structures until is is destroyed { - ml::core::CJsonOutputStreamWrapper outputStream (sstream); + ml::core::CJsonOutputStreamWrapper outputStream(sstream); ml::api::CJsonOutputWriter writer("job", outputStream); writer.writeRow(emptyFields, dataFields); @@ -128,7 +109,7 @@ void CJsonOutputWriterTest::testSimpleWrite() CPPUNIT_ASSERT(arrayDoc.IsArray()); CPPUNIT_ASSERT_EQUAL(rapidjson::SizeType(2), arrayDoc.Size()); - const rapidjson::Value &object = arrayDoc[rapidjson::SizeType(0)]; + const rapidjson::Value& object = arrayDoc[rapidjson::SizeType(0)]; CPPUNIT_ASSERT(object.IsObject()); CPPUNIT_ASSERT(object.HasMember("by_field_name")); @@ -144,7 +125,7 @@ void CJsonOutputWriterTest::testSimpleWrite() CPPUNIT_ASSERT(object.HasMember("field_name")); CPPUNIT_ASSERT_EQUAL(std::string("responsetime"), std::string(object["field_name"].GetString())); - const rapidjson::Value &object2 = arrayDoc[rapidjson::SizeType(1)]; + const rapidjson::Value& object2 = arrayDoc[rapidjson::SizeType(1)]; CPPUNIT_ASSERT(object.IsObject()); CPPUNIT_ASSERT(object2.HasMember("by_field_name")); @@ -161,8 +142,7 @@ void CJsonOutputWriterTest::testSimpleWrite() CPPUNIT_ASSERT_EQUAL(std::string("responsetime"), std::string(object2["field_name"].GetString())); } -void CJsonOutputWriterTest::testWriteNonAnomalousBucket() -{ +void CJsonOutputWriterTest::testWriteNonAnomalousBucket() { std::ostringstream sstream; std::string function("mean"); @@ -170,33 +150,33 @@ void CJsonOutputWriterTest::testWriteNonAnomalousBucket() std::string emptyString; ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPrDoublePrVec influences; { - ml::core::CJsonOutputStreamWrapper outputStream (sstream); + ml::core::CJsonOutputStreamWrapper outputStream(sstream); ml::api::CJsonOutputWriter writer("job", outputStream); ml::api::CHierarchicalResultsWriter::SResults result(false, - false, - emptyString, - emptyString, - emptyString, - emptyString, - emptyString, - emptyString, - emptyString, - 1, - function, - functionDescription, - TDouble1Vec(1, 42.0), - TDouble1Vec(1, 42.0), - 0.0, - 0.0, - 1.0, - 30, - emptyString, - influences, - false, - false, - 1, - 100); + false, + emptyString, + emptyString, + emptyString, + emptyString, + emptyString, + emptyString, + emptyString, + 1, + function, + functionDescription, + TDouble1Vec(1, 42.0), + TDouble1Vec(1, 42.0), + 0.0, + 0.0, + 1.0, + 30, + emptyString, + influences, + false, + false, + 1, + 100); CPPUNIT_ASSERT(writer.acceptResult(result)); writer.acceptBucketTimeInfluencer(1, 1.0, 0.0, 0.0); @@ -216,11 +196,11 @@ void CJsonOutputWriterTest::testWriteNonAnomalousBucket() CPPUNIT_ASSERT(arrayDoc.IsArray()); CPPUNIT_ASSERT_EQUAL(rapidjson::SizeType(1), arrayDoc.Size()); - const rapidjson::Value &bucketWrapper = arrayDoc[rapidjson::SizeType(0)]; + const rapidjson::Value& bucketWrapper = arrayDoc[rapidjson::SizeType(0)]; CPPUNIT_ASSERT(bucketWrapper.IsObject()); CPPUNIT_ASSERT(bucketWrapper.HasMember("bucket")); - const rapidjson::Value &bucket = bucketWrapper["bucket"]; + const rapidjson::Value& bucket = bucketWrapper["bucket"]; CPPUNIT_ASSERT(bucket.HasMember("job_id")); CPPUNIT_ASSERT_EQUAL(std::string("job"), std::string(bucket["job_id"].GetString())); CPPUNIT_ASSERT_EQUAL(1000, bucket["timestamp"].GetInt()); @@ -230,14 +210,13 @@ void CJsonOutputWriterTest::testWriteNonAnomalousBucket() CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, bucket["anomaly_score"].GetDouble(), 0.00001); } -void CJsonOutputWriterTest::testFlush() -{ +void CJsonOutputWriterTest::testFlush() { std::string testId("testflush"); ml::core_t::TTime lastFinalizedBucketEnd(123456789); std::ostringstream sstream; { - ml::core::CJsonOutputStreamWrapper outputStream (sstream); + ml::core::CJsonOutputStreamWrapper outputStream(sstream); ml::api::CJsonOutputWriter writer("job", outputStream); writer.acknowledgeFlush(testId, lastFinalizedBucketEnd); @@ -255,22 +234,19 @@ void CJsonOutputWriterTest::testFlush() arrayDoc.Accept(writer); LOG_DEBUG("Flush:\n" << strbuf.GetString()); - - const rapidjson::Value &flushWrapper = arrayDoc[rapidjson::SizeType(0)]; + const rapidjson::Value& flushWrapper = arrayDoc[rapidjson::SizeType(0)]; CPPUNIT_ASSERT(flushWrapper.IsObject()); CPPUNIT_ASSERT(flushWrapper.HasMember("flush")); - const rapidjson::Value &flush = flushWrapper["flush"]; + const rapidjson::Value& flush = flushWrapper["flush"]; CPPUNIT_ASSERT(flush.IsObject()); CPPUNIT_ASSERT(flush.HasMember("id")); CPPUNIT_ASSERT_EQUAL(testId, std::string(flush["id"].GetString())); CPPUNIT_ASSERT(flush.HasMember("last_finalized_bucket_end")); - CPPUNIT_ASSERT_EQUAL(lastFinalizedBucketEnd * 1000, - static_cast(flush["last_finalized_bucket_end"].GetInt64())); + CPPUNIT_ASSERT_EQUAL(lastFinalizedBucketEnd * 1000, static_cast(flush["last_finalized_bucket_end"].GetInt64())); } -void CJsonOutputWriterTest::testWriteCategoryDefinition() -{ +void CJsonOutputWriterTest::testWriteCategoryDefinition() { int categoryId(42); std::string terms("foo bar"); std::string regex(".*?foo.+?bar.*"); @@ -283,7 +259,7 @@ void CJsonOutputWriterTest::testWriteCategoryDefinition() std::ostringstream sstream; { - ml::core::CJsonOutputStreamWrapper outputStream (sstream); + ml::core::CJsonOutputStreamWrapper outputStream(sstream); ml::api::CJsonOutputWriter writer("job", outputStream); writer.writeCategoryDefinition(categoryId, terms, regex, maxMatchingLength, examples); @@ -301,11 +277,11 @@ void CJsonOutputWriterTest::testWriteCategoryDefinition() arrayDoc.Accept(writer); LOG_DEBUG("CategoryDefinition:\n" << strbuf.GetString()); - const rapidjson::Value &categoryWrapper = arrayDoc[rapidjson::SizeType(0)]; + const rapidjson::Value& categoryWrapper = arrayDoc[rapidjson::SizeType(0)]; CPPUNIT_ASSERT(categoryWrapper.IsObject()); CPPUNIT_ASSERT(categoryWrapper.HasMember("category_definition")); - const rapidjson::Value &category = categoryWrapper["category_definition"]; + const rapidjson::Value& category = categoryWrapper["category_definition"]; CPPUNIT_ASSERT(category.HasMember("job_id")); CPPUNIT_ASSERT_EQUAL(std::string("job"), std::string(category["job_id"].GetString())); CPPUNIT_ASSERT(category.IsObject()); @@ -320,43 +296,37 @@ void CJsonOutputWriterTest::testWriteCategoryDefinition() CPPUNIT_ASSERT(category.HasMember("examples")); TStrSet writtenExamplesSet; - const rapidjson::Value &writtenExamples = category["examples"]; - for (rapidjson::SizeType i = 0; i < writtenExamples.Size(); i++) - { + const rapidjson::Value& writtenExamples = category["examples"]; + for (rapidjson::SizeType i = 0; i < writtenExamples.Size(); i++) { writtenExamplesSet.insert(std::string(writtenExamples[i].GetString())); } CPPUNIT_ASSERT(writtenExamplesSet == examples); } -void CJsonOutputWriterTest::testBucketWrite() -{ +void CJsonOutputWriterTest::testBucketWrite() { this->testBucketWriteHelper(false); } -void CJsonOutputWriterTest::testBucketWriteInterim() -{ +void CJsonOutputWriterTest::testBucketWriteInterim() { this->testBucketWriteHelper(true); } -void CJsonOutputWriterTest::testLimitedRecordsWrite() -{ +void CJsonOutputWriterTest::testLimitedRecordsWrite() { this->testLimitedRecordsWriteHelper(false); } -void CJsonOutputWriterTest::testLimitedRecordsWriteInterim() -{ +void CJsonOutputWriterTest::testLimitedRecordsWriteInterim() { this->testLimitedRecordsWriteHelper(true); } -void CJsonOutputWriterTest::testBucketWriteHelper(bool isInterim) -{ +void CJsonOutputWriterTest::testBucketWriteHelper(bool isInterim) { // groups output by bucket/detector std::ostringstream sstream; // The output writer won't close the JSON structures until is is destroyed { - ml::core::CJsonOutputStreamWrapper outputStream (sstream); + ml::core::CJsonOutputStreamWrapper outputStream(sstream); ml::api::CJsonOutputWriter writer("job", outputStream); std::string partitionFieldName("tfn"); @@ -374,131 +344,126 @@ void CJsonOutputWriterTest::testBucketWriteHelper(bool isInterim) { ml::api::CHierarchicalResultsWriter::SResults result11(false, - false, - partitionFieldName, - partitionFieldValue, - overFieldName, - overFieldValue, - byFieldName, - byFieldValue, - correlatedByFieldValue, - 1, - function, - functionDescription, - TDouble1Vec(1, 10090.0), - TDouble1Vec(1, 6953.0), - 2.24, - 0.5, - 0.0, - 79, - fieldName, - influences, - false, - false, - 1, - 100); + false, + partitionFieldName, + partitionFieldValue, + overFieldName, + overFieldValue, + byFieldName, + byFieldValue, + correlatedByFieldValue, + 1, + function, + functionDescription, + TDouble1Vec(1, 10090.0), + TDouble1Vec(1, 6953.0), + 2.24, + 0.5, + 0.0, + 79, + fieldName, + influences, + false, + false, + 1, + 100); ml::api::CHierarchicalResultsWriter::SResults result112(false, - true, - partitionFieldName, - partitionFieldValue, - overFieldName, - overFieldValue, - byFieldName, - byFieldValue, - correlatedByFieldValue, - 1, - function, - functionDescription, - TDouble1Vec(1, 10090.0), - TDouble1Vec(1, 6953.0), - 2.24, - 0.5, - 0.0, - 79, - fieldName, - influences, - false, - false, - 1, - 100); - - ml::api::CHierarchicalResultsWriter::SResults result12( - ml::api::CHierarchicalResultsWriter::E_Result, - partitionFieldName, - partitionFieldValue, - byFieldName, - byFieldValue, - correlatedByFieldValue, - 1, - function, - functionDescription, - 42.0, - 79, - TDouble1Vec(1, 6953.0), - TDouble1Vec(1, 10090.0), - 2.24, - 0.8, - 0.0, - fieldName, - influences, - false, - true, - 2, - 100, - EMPTY_STRING_LIST); - - ml::api::CHierarchicalResultsWriter::SResults result13( - ml::api::CHierarchicalResultsWriter::E_SimpleCountResult, - partitionFieldName, - partitionFieldValue, - byFieldName, - byFieldValue, - correlatedByFieldValue, - 1, - function, - functionDescription, - 42.0, - 79, - TDouble1Vec(1, 6953.0), - TDouble1Vec(1, 10090.0), - 2.24, - 0.5, - 0.0, - fieldName, - influences, - false, - false, - 3, - 100, - EMPTY_STRING_LIST); - - ml::api::CHierarchicalResultsWriter::SResults result14( - ml::api::CHierarchicalResultsWriter::E_Result, - partitionFieldName, - partitionFieldValue, - byFieldName, - byFieldValue, - correlatedByFieldValue, - 1, - function, - functionDescription, - 42.0, - 79, - TDouble1Vec(1, 6953.0), - TDouble1Vec(1, 10090.0), - 2.24, - 0.0, - 0.0, - fieldName, - influences, - false, - false, - 4, - 100, - EMPTY_STRING_LIST); - - + true, + partitionFieldName, + partitionFieldValue, + overFieldName, + overFieldValue, + byFieldName, + byFieldValue, + correlatedByFieldValue, + 1, + function, + functionDescription, + TDouble1Vec(1, 10090.0), + TDouble1Vec(1, 6953.0), + 2.24, + 0.5, + 0.0, + 79, + fieldName, + influences, + false, + false, + 1, + 100); + + ml::api::CHierarchicalResultsWriter::SResults result12(ml::api::CHierarchicalResultsWriter::E_Result, + partitionFieldName, + partitionFieldValue, + byFieldName, + byFieldValue, + correlatedByFieldValue, + 1, + function, + functionDescription, + 42.0, + 79, + TDouble1Vec(1, 6953.0), + TDouble1Vec(1, 10090.0), + 2.24, + 0.8, + 0.0, + fieldName, + influences, + false, + true, + 2, + 100, + EMPTY_STRING_LIST); + + ml::api::CHierarchicalResultsWriter::SResults result13(ml::api::CHierarchicalResultsWriter::E_SimpleCountResult, + partitionFieldName, + partitionFieldValue, + byFieldName, + byFieldValue, + correlatedByFieldValue, + 1, + function, + functionDescription, + 42.0, + 79, + TDouble1Vec(1, 6953.0), + TDouble1Vec(1, 10090.0), + 2.24, + 0.5, + 0.0, + fieldName, + influences, + false, + false, + 3, + 100, + EMPTY_STRING_LIST); + + ml::api::CHierarchicalResultsWriter::SResults result14(ml::api::CHierarchicalResultsWriter::E_Result, + partitionFieldName, + partitionFieldValue, + byFieldName, + byFieldValue, + correlatedByFieldValue, + 1, + function, + functionDescription, + 42.0, + 79, + TDouble1Vec(1, 6953.0), + TDouble1Vec(1, 10090.0), + 2.24, + 0.0, + 0.0, + fieldName, + influences, + false, + false, + 4, + 100, + EMPTY_STRING_LIST); // 1st bucket CPPUNIT_ASSERT(writer.acceptResult(result11)); @@ -515,128 +480,126 @@ void CJsonOutputWriterTest::testBucketWriteHelper(bool isInterim) { ml::api::CHierarchicalResultsWriter::SResults result21(false, - false, - partitionFieldName, - partitionFieldValue, - overFieldName, - overFieldValue, - byFieldName, - byFieldValue, - correlatedByFieldValue, - 2, - function, - functionDescription, - TDouble1Vec(1, 10090.0), - TDouble1Vec(1, 6953.0), - 2.24, - 0.6, - 0.0, - 79, - fieldName, - influences, - false, - false, - 1, - 100); + false, + partitionFieldName, + partitionFieldValue, + overFieldName, + overFieldValue, + byFieldName, + byFieldValue, + correlatedByFieldValue, + 2, + function, + functionDescription, + TDouble1Vec(1, 10090.0), + TDouble1Vec(1, 6953.0), + 2.24, + 0.6, + 0.0, + 79, + fieldName, + influences, + false, + false, + 1, + 100); ml::api::CHierarchicalResultsWriter::SResults result212(false, - true, - partitionFieldName, - partitionFieldValue, - overFieldName, - overFieldValue, - byFieldName, - byFieldValue, - correlatedByFieldValue, - 2, - function, - functionDescription, - TDouble1Vec(1, 10090.0), - TDouble1Vec(1, 6953.0), - 2.24, - 0.6, - 0.0, - 79, - fieldName, - influences, - false, - false, - 1, - 100); - - ml::api::CHierarchicalResultsWriter::SResults result22( - ml::api::CHierarchicalResultsWriter::E_Result, - partitionFieldName, - partitionFieldValue, - byFieldName, - byFieldValue, - correlatedByFieldValue, - 2, - function, - functionDescription, - 42.0, - 79, - TDouble1Vec(1, 6953.0), - TDouble1Vec(1, 10090.0), - 2.24, - 0.8, - 0.0, - fieldName, - influences, - false, - true, - 2, - 100, - EMPTY_STRING_LIST); - - ml::api::CHierarchicalResultsWriter::SResults result23( - ml::api::CHierarchicalResultsWriter::E_SimpleCountResult, - partitionFieldName, - partitionFieldValue, - byFieldName, - byFieldValue, - correlatedByFieldValue, - 2, - function, - functionDescription, - 42.0, - 79, - TDouble1Vec(1, 6953.0), - TDouble1Vec(1, 10090.0), - 2.24, - 0.0, - 0.0, - fieldName, - influences, - false, - false, - 3, - 100, - EMPTY_STRING_LIST); + true, + partitionFieldName, + partitionFieldValue, + overFieldName, + overFieldValue, + byFieldName, + byFieldValue, + correlatedByFieldValue, + 2, + function, + functionDescription, + TDouble1Vec(1, 10090.0), + TDouble1Vec(1, 6953.0), + 2.24, + 0.6, + 0.0, + 79, + fieldName, + influences, + false, + false, + 1, + 100); + + ml::api::CHierarchicalResultsWriter::SResults result22(ml::api::CHierarchicalResultsWriter::E_Result, + partitionFieldName, + partitionFieldValue, + byFieldName, + byFieldValue, + correlatedByFieldValue, + 2, + function, + functionDescription, + 42.0, + 79, + TDouble1Vec(1, 6953.0), + TDouble1Vec(1, 10090.0), + 2.24, + 0.8, + 0.0, + fieldName, + influences, + false, + true, + 2, + 100, + EMPTY_STRING_LIST); + + ml::api::CHierarchicalResultsWriter::SResults result23(ml::api::CHierarchicalResultsWriter::E_SimpleCountResult, + partitionFieldName, + partitionFieldValue, + byFieldName, + byFieldValue, + correlatedByFieldValue, + 2, + function, + functionDescription, + 42.0, + 79, + TDouble1Vec(1, 6953.0), + TDouble1Vec(1, 10090.0), + 2.24, + 0.0, + 0.0, + fieldName, + influences, + false, + false, + 3, + 100, + EMPTY_STRING_LIST); ml::api::CHierarchicalResultsWriter::SResults result24(ml::api::CHierarchicalResultsWriter::E_Result, - partitionFieldName, - partitionFieldValue, - byFieldName, - byFieldValue, - correlatedByFieldValue, - 2, - function, - functionDescription, - 42.0, - 79, - TDouble1Vec(1, 6953.0), - TDouble1Vec(1, 10090.0), - 2.24, - 0.0, - 0.0, - fieldName, - influences, - false, - false, - 4, - 100, - EMPTY_STRING_LIST); + partitionFieldName, + partitionFieldValue, + byFieldName, + byFieldValue, + correlatedByFieldValue, + 2, + function, + functionDescription, + 42.0, + 79, + TDouble1Vec(1, 6953.0), + TDouble1Vec(1, 10090.0), + 2.24, + 0.0, + 0.0, + fieldName, + influences, + false, + false, + 4, + 100, + EMPTY_STRING_LIST); // 2nd bucket CPPUNIT_ASSERT(writer.acceptResult(result21)); @@ -653,129 +616,126 @@ void CJsonOutputWriterTest::testBucketWriteHelper(bool isInterim) { ml::api::CHierarchicalResultsWriter::SResults result31(false, - false, - partitionFieldName, - partitionFieldValue, - overFieldName, - overFieldValue, - byFieldName, - byFieldValue, - correlatedByFieldValue, - 3, - function, - functionDescription, - TDouble1Vec(1, 10090.0), - TDouble1Vec(1, 6953.0), - 2.24, - 0.8, - 0.0, - 79, - fieldName, - influences, - false, - false, - 1, - 100); + false, + partitionFieldName, + partitionFieldValue, + overFieldName, + overFieldValue, + byFieldName, + byFieldValue, + correlatedByFieldValue, + 3, + function, + functionDescription, + TDouble1Vec(1, 10090.0), + TDouble1Vec(1, 6953.0), + 2.24, + 0.8, + 0.0, + 79, + fieldName, + influences, + false, + false, + 1, + 100); ml::api::CHierarchicalResultsWriter::SResults result312(false, - true, - partitionFieldName, - partitionFieldValue, - overFieldName, - overFieldValue, - byFieldName, - byFieldValue, - correlatedByFieldValue, - 3, - function, - functionDescription, - TDouble1Vec(1, 10090.0), - TDouble1Vec(1, 6953.0), - 2.24, - 0.8, - 0.0, - 79, - fieldName, - influences, - false, - false, - 1, - 100); - - ml::api::CHierarchicalResultsWriter::SResults result32( - ml::api::CHierarchicalResultsWriter::E_Result, - partitionFieldName, - partitionFieldValue, - byFieldName, - byFieldValue, - correlatedByFieldValue, - 3, - function, - functionDescription, - 42.0, - 79, - TDouble1Vec(1, 6953.0), - TDouble1Vec(1, 10090.0), - 2.24, - 0.0, - 0.0, - fieldName, - influences, - false, - true, - 2, - 100, - EMPTY_STRING_LIST); - - ml::api::CHierarchicalResultsWriter::SResults result33( - ml::api::CHierarchicalResultsWriter::E_SimpleCountResult, - partitionFieldName, - partitionFieldValue, - byFieldName, - byFieldValue, - correlatedByFieldValue, - 3, - function, - functionDescription, - 42.0, - 79, - TDouble1Vec(1, 6953.0), - TDouble1Vec(1, 10090.0), - 2.24, - 0.0, - 0.0, - fieldName, - influences, - false, - false, - 3, - 100, - EMPTY_STRING_LIST); - - ml::api::CHierarchicalResultsWriter::SResults result34( - ml::api::CHierarchicalResultsWriter::E_Result, - partitionFieldName, - partitionFieldValue, - byFieldName, - byFieldValue, - correlatedByFieldValue, - 3, - function, - functionDescription, - 42.0, - 79, - TDouble1Vec(1, 6953.0), - TDouble1Vec(1, 10090.0), - 2.24, - 0.0, - 0.0, - fieldName, - influences, - false, - false, - 4, - 100, - EMPTY_STRING_LIST); + true, + partitionFieldName, + partitionFieldValue, + overFieldName, + overFieldValue, + byFieldName, + byFieldValue, + correlatedByFieldValue, + 3, + function, + functionDescription, + TDouble1Vec(1, 10090.0), + TDouble1Vec(1, 6953.0), + 2.24, + 0.8, + 0.0, + 79, + fieldName, + influences, + false, + false, + 1, + 100); + + ml::api::CHierarchicalResultsWriter::SResults result32(ml::api::CHierarchicalResultsWriter::E_Result, + partitionFieldName, + partitionFieldValue, + byFieldName, + byFieldValue, + correlatedByFieldValue, + 3, + function, + functionDescription, + 42.0, + 79, + TDouble1Vec(1, 6953.0), + TDouble1Vec(1, 10090.0), + 2.24, + 0.0, + 0.0, + fieldName, + influences, + false, + true, + 2, + 100, + EMPTY_STRING_LIST); + + ml::api::CHierarchicalResultsWriter::SResults result33(ml::api::CHierarchicalResultsWriter::E_SimpleCountResult, + partitionFieldName, + partitionFieldValue, + byFieldName, + byFieldValue, + correlatedByFieldValue, + 3, + function, + functionDescription, + 42.0, + 79, + TDouble1Vec(1, 6953.0), + TDouble1Vec(1, 10090.0), + 2.24, + 0.0, + 0.0, + fieldName, + influences, + false, + false, + 3, + 100, + EMPTY_STRING_LIST); + + ml::api::CHierarchicalResultsWriter::SResults result34(ml::api::CHierarchicalResultsWriter::E_Result, + partitionFieldName, + partitionFieldValue, + byFieldName, + byFieldValue, + correlatedByFieldValue, + 3, + function, + functionDescription, + 42.0, + 79, + TDouble1Vec(1, 6953.0), + TDouble1Vec(1, 10090.0), + 2.24, + 0.0, + 0.0, + fieldName, + influences, + false, + false, + 4, + 100, + EMPTY_STRING_LIST); // 3rd bucket CPPUNIT_ASSERT(writer.acceptResult(result31)); @@ -811,13 +771,12 @@ void CJsonOutputWriterTest::testBucketWriteHelper(bool isInterim) int bucketTimes[] = {1000, 1000, 2000, 2000, 3000, 3000}; // Assert buckets - for (rapidjson::SizeType i = 1; i < arrayDoc.Size(); i = i + 2) - { + for (rapidjson::SizeType i = 1; i < arrayDoc.Size(); i = i + 2) { int buckettime = bucketTimes[i]; - const rapidjson::Value &bucketWrapper = arrayDoc[i]; + const rapidjson::Value& bucketWrapper = arrayDoc[i]; CPPUNIT_ASSERT(bucketWrapper.HasMember("bucket")); - const rapidjson::Value &bucket = bucketWrapper["bucket"]; + const rapidjson::Value& bucket = bucketWrapper["bucket"]; CPPUNIT_ASSERT(bucket.IsObject()); CPPUNIT_ASSERT(bucket.HasMember("job_id")); CPPUNIT_ASSERT_EQUAL(std::string("job"), std::string(bucket["job_id"].GetString())); @@ -826,10 +785,10 @@ void CJsonOutputWriterTest::testBucketWriteHelper(bool isInterim) // except the population detector which has a single record and clauses CPPUNIT_ASSERT_EQUAL(buckettime, bucket["timestamp"].GetInt()); CPPUNIT_ASSERT(bucket.HasMember("bucket_influencers")); - const rapidjson::Value &bucketInfluencers = bucket["bucket_influencers"]; + const rapidjson::Value& bucketInfluencers = bucket["bucket_influencers"]; CPPUNIT_ASSERT(bucketInfluencers.IsArray()); CPPUNIT_ASSERT_EQUAL(rapidjson::SizeType(1), bucketInfluencers.Size()); - const rapidjson::Value &bucketInfluencer = bucketInfluencers[rapidjson::SizeType(0)]; + const rapidjson::Value& bucketInfluencer = bucketInfluencers[rapidjson::SizeType(0)]; CPPUNIT_ASSERT_DOUBLES_EQUAL(13.44, bucketInfluencer["raw_anomaly_score"].GetDouble(), 0.00001); CPPUNIT_ASSERT_DOUBLES_EQUAL(0.01, bucketInfluencer["probability"].GetDouble(), 0.00001); CPPUNIT_ASSERT_DOUBLES_EQUAL(70.0, bucketInfluencer["initial_anomaly_score"].GetDouble(), 0.00001); @@ -842,33 +801,29 @@ void CJsonOutputWriterTest::testBucketWriteHelper(bool isInterim) CPPUNIT_ASSERT_DOUBLES_EQUAL(70.0, bucket["anomaly_score"].GetDouble(), 0.00001); CPPUNIT_ASSERT(bucket.HasMember("initial_anomaly_score")); CPPUNIT_ASSERT_DOUBLES_EQUAL(70.0, bucket["initial_anomaly_score"].GetDouble(), 0.00001); - if (isInterim) - { + if (isInterim) { CPPUNIT_ASSERT(bucket.HasMember("is_interim")); CPPUNIT_ASSERT_EQUAL(isInterim, bucket["is_interim"].GetBool()); - } - else - { + } else { CPPUNIT_ASSERT(!bucket.HasMember("is_interim")); } CPPUNIT_ASSERT_EQUAL(uint64_t(10ll), bucket["processing_time_ms"].GetUint64()); } - for (rapidjson::SizeType i = 0; i < arrayDoc.Size(); i = i + 2) - { + for (rapidjson::SizeType i = 0; i < arrayDoc.Size(); i = i + 2) { int buckettime = bucketTimes[i]; - const rapidjson::Value &recordsWrapper = arrayDoc[i]; + const rapidjson::Value& recordsWrapper = arrayDoc[i]; CPPUNIT_ASSERT(recordsWrapper.HasMember("records")); - const rapidjson::Value &records = recordsWrapper["records"]; + const rapidjson::Value& records = recordsWrapper["records"]; CPPUNIT_ASSERT(records.IsArray()); CPPUNIT_ASSERT_EQUAL(rapidjson::SizeType(5), records.Size()); // 1st record is for population detector { - const rapidjson::Value &record = records[rapidjson::SizeType(0)]; + const rapidjson::Value& record = records[rapidjson::SizeType(0)]; CPPUNIT_ASSERT(record.HasMember("job_id")); CPPUNIT_ASSERT_EQUAL(std::string("job"), std::string(record["job_id"].GetString())); CPPUNIT_ASSERT(record.HasMember("detector_index")); @@ -895,23 +850,19 @@ void CJsonOutputWriterTest::testBucketWriteHelper(bool isInterim) // presence CPPUNIT_ASSERT(record.HasMember("initial_record_score")); CPPUNIT_ASSERT(record.HasMember("record_score")); - if (isInterim) - { + if (isInterim) { CPPUNIT_ASSERT(record.HasMember("is_interim")); CPPUNIT_ASSERT_EQUAL(isInterim, record["is_interim"].GetBool()); - } - else - { + } else { CPPUNIT_ASSERT(!record.HasMember("is_interim")); } CPPUNIT_ASSERT(record.HasMember("causes")); - const rapidjson::Value &causes = record["causes"]; + const rapidjson::Value& causes = record["causes"]; CPPUNIT_ASSERT(causes.IsArray()); CPPUNIT_ASSERT_EQUAL(rapidjson::SizeType(2), causes.Size()); - for (rapidjson::SizeType k = 0; k < causes.Size(); k++) - { - const rapidjson::Value &cause = causes[k]; + for (rapidjson::SizeType k = 0; k < causes.Size(); k++) { + const rapidjson::Value& cause = causes[k]; CPPUNIT_ASSERT(cause.HasMember("probability")); CPPUNIT_ASSERT_EQUAL(0.0, cause["probability"].GetDouble()); CPPUNIT_ASSERT(cause.HasMember("field_name")); @@ -944,9 +895,8 @@ void CJsonOutputWriterTest::testBucketWriteHelper(bool isInterim) // Next 2 records are for metric detector { - for (rapidjson::SizeType k = 1; k < 3; k++) - { - const rapidjson::Value &record = records[k]; + for (rapidjson::SizeType k = 1; k < 3; k++) { + const rapidjson::Value& record = records[k]; CPPUNIT_ASSERT(record.HasMember("job_id")); CPPUNIT_ASSERT_EQUAL(std::string("job"), std::string(record["job_id"].GetString())); CPPUNIT_ASSERT(record.HasMember("detector_index")); @@ -985,13 +935,10 @@ void CJsonOutputWriterTest::testBucketWriteHelper(bool isInterim) // presence CPPUNIT_ASSERT(record.HasMember("initial_record_score")); CPPUNIT_ASSERT(record.HasMember("record_score")); - if (isInterim) - { + if (isInterim) { CPPUNIT_ASSERT(record.HasMember("is_interim")); CPPUNIT_ASSERT_EQUAL(isInterim, record["is_interim"].GetBool()); - } - else - { + } else { CPPUNIT_ASSERT(!record.HasMember("is_interim")); } } @@ -999,9 +946,8 @@ void CJsonOutputWriterTest::testBucketWriteHelper(bool isInterim) // Last 2 records are for event rate detector { - for (rapidjson::SizeType k = 3; k < 5; k++) - { - const rapidjson::Value &record = records[k]; + for (rapidjson::SizeType k = 3; k < 5; k++) { + const rapidjson::Value& record = records[k]; CPPUNIT_ASSERT(record.HasMember("job_id")); CPPUNIT_ASSERT_EQUAL(std::string("job"), std::string(record["job_id"].GetString())); CPPUNIT_ASSERT(record.HasMember("detector_index")); @@ -1039,13 +985,10 @@ void CJsonOutputWriterTest::testBucketWriteHelper(bool isInterim) // presence CPPUNIT_ASSERT(record.HasMember("initial_record_score")); CPPUNIT_ASSERT(record.HasMember("record_score")); - if (isInterim) - { + if (isInterim) { CPPUNIT_ASSERT(record.HasMember("is_interim")); CPPUNIT_ASSERT_EQUAL(isInterim, record["is_interim"].GetBool()); - } - else - { + } else { CPPUNIT_ASSERT(!record.HasMember("is_interim")); } } @@ -1053,8 +996,7 @@ void CJsonOutputWriterTest::testBucketWriteHelper(bool isInterim) } } -void CJsonOutputWriterTest::testLimitedRecordsWriteHelper(bool isInterim) -{ +void CJsonOutputWriterTest::testLimitedRecordsWriteHelper(bool isInterim) { // Tests CJsonOutputWriter::limitNumberRecords(size_t) // set the record limit for each detector to 2 @@ -1062,7 +1004,7 @@ void CJsonOutputWriterTest::testLimitedRecordsWriteHelper(bool isInterim) // The output writer won't close the JSON structures until is is destroyed { - ml::core::CJsonOutputStreamWrapper outputStream (sstream); + ml::core::CJsonOutputStreamWrapper outputStream(sstream); ml::api::CJsonOutputWriter writer("job", outputStream); writer.limitNumberRecords(2); @@ -1080,108 +1022,104 @@ void CJsonOutputWriterTest::testLimitedRecordsWriteHelper(bool isInterim) { // 1st bucket - ml::api::CHierarchicalResultsWriter::SResults result111( - ml::api::CHierarchicalResultsWriter::E_Result, - partitionFieldName, - partitionFieldValue, - byFieldName, - byFieldValue, - emptyString, - 1, - function, - functionDescription, - 42.0, - 79, - TDouble1Vec(1, 6953.0), - TDouble1Vec(1, 10090.0), - 0.0, - 0.1, - 0.1, - fieldName, - influences, - false, - true, - 1, - 100, - EMPTY_STRING_LIST); + ml::api::CHierarchicalResultsWriter::SResults result111(ml::api::CHierarchicalResultsWriter::E_Result, + partitionFieldName, + partitionFieldValue, + byFieldName, + byFieldValue, + emptyString, + 1, + function, + functionDescription, + 42.0, + 79, + TDouble1Vec(1, 6953.0), + TDouble1Vec(1, 10090.0), + 0.0, + 0.1, + 0.1, + fieldName, + influences, + false, + true, + 1, + 100, + EMPTY_STRING_LIST); CPPUNIT_ASSERT(writer.acceptResult(result111)); - ml::api::CHierarchicalResultsWriter::SResults result112( - ml::api::CHierarchicalResultsWriter::E_Result, - partitionFieldName, - partitionFieldValue, - byFieldName, - byFieldValue, - emptyString, - 1, - function, - functionDescription, - 42.0, - 79, - TDouble1Vec(1, 6953.0), - TDouble1Vec(1, 10090.0), - 0.0, - 0.1, - 0.2, - fieldName, - influences, - false, - true, - 1, - 100, - EMPTY_STRING_LIST); + ml::api::CHierarchicalResultsWriter::SResults result112(ml::api::CHierarchicalResultsWriter::E_Result, + partitionFieldName, + partitionFieldValue, + byFieldName, + byFieldValue, + emptyString, + 1, + function, + functionDescription, + 42.0, + 79, + TDouble1Vec(1, 6953.0), + TDouble1Vec(1, 10090.0), + 0.0, + 0.1, + 0.2, + fieldName, + influences, + false, + true, + 1, + 100, + EMPTY_STRING_LIST); CPPUNIT_ASSERT(writer.acceptResult(result112)); - ml::api::CHierarchicalResultsWriter::SResults result113( - ml::api::CHierarchicalResultsWriter::E_Result, - partitionFieldName, - partitionFieldValue, - byFieldName, - byFieldValue, - emptyString, - 1, - function, - functionDescription, - 42.0, - 79, - TDouble1Vec(1, 6953.0), - TDouble1Vec(1, 10090.0), - 2.0, - 0.0, - 0.4, - fieldName, - influences, - false, - true, - 1, - 100, - EMPTY_STRING_LIST); + ml::api::CHierarchicalResultsWriter::SResults result113(ml::api::CHierarchicalResultsWriter::E_Result, + partitionFieldName, + partitionFieldValue, + byFieldName, + byFieldValue, + emptyString, + 1, + function, + functionDescription, + 42.0, + 79, + TDouble1Vec(1, 6953.0), + TDouble1Vec(1, 10090.0), + 2.0, + 0.0, + 0.4, + fieldName, + influences, + false, + true, + 1, + 100, + EMPTY_STRING_LIST); CPPUNIT_ASSERT(writer.acceptResult(result113)); - ml::api::CHierarchicalResultsWriter::SResults result114( - ml::api::CHierarchicalResultsWriter::E_Result, - partitionFieldName, - partitionFieldValue, - byFieldName, - byFieldValue, - emptyString, - 1, - function, - functionDescription, - 42.0, - 79, - TDouble1Vec(1, 6953.0), - TDouble1Vec(1, 10090.0), - 12.0, - 0.0, - 0.4, - fieldName, - influences, - false, - true, - 1, - 100, - EMPTY_STRING_LIST); + ml::api::CHierarchicalResultsWriter::SResults result114(ml::api::CHierarchicalResultsWriter::E_Result, + partitionFieldName, + partitionFieldValue, + byFieldName, + byFieldValue, + emptyString, + 1, + function, + functionDescription, + 42.0, + 79, + TDouble1Vec(1, 6953.0), + TDouble1Vec(1, 10090.0), + 12.0, + 0.0, + 0.4, + fieldName, + influences, + false, + true, + 1, + 100, + EMPTY_STRING_LIST); CPPUNIT_ASSERT(writer.acceptResult(result114)); CPPUNIT_ASSERT(writer.acceptResult(result114)); @@ -1189,159 +1127,159 @@ void CJsonOutputWriterTest::testLimitedRecordsWriteHelper(bool isInterim) overFieldValue = "ofv"; ml::api::CHierarchicalResultsWriter::SResults result121(false, - false, - partitionFieldName, - partitionFieldValue, - overFieldName, - overFieldValue, - emptyString, - emptyString, - emptyString, - 1, - function, - functionDescription, - TDouble1Vec(1, 10090.0), - TDouble1Vec(1, 6953.0), - 12.0, - 0.0, - 0.01, - 79, - fieldName, - influences, - false, - true, - 2, - 100); + false, + partitionFieldName, + partitionFieldValue, + overFieldName, + overFieldValue, + emptyString, + emptyString, + emptyString, + 1, + function, + functionDescription, + TDouble1Vec(1, 10090.0), + TDouble1Vec(1, 6953.0), + 12.0, + 0.0, + 0.01, + 79, + fieldName, + influences, + false, + true, + 2, + 100); CPPUNIT_ASSERT(writer.acceptResult(result121)); ml::api::CHierarchicalResultsWriter::SResults result122(false, - true, - partitionFieldName, - partitionFieldValue, - overFieldName, - overFieldValue, - byFieldName, - byFieldValue, - emptyString, - 1, - function, - functionDescription, - TDouble1Vec(1, 10090.0), - TDouble1Vec(1, 6953.0), - 12.0, - 0.0, - 0.01, - 79, - fieldName, - influences, - false, - true, - 2, - 100); + true, + partitionFieldName, + partitionFieldValue, + overFieldName, + overFieldValue, + byFieldName, + byFieldValue, + emptyString, + 1, + function, + functionDescription, + TDouble1Vec(1, 10090.0), + TDouble1Vec(1, 6953.0), + 12.0, + 0.0, + 0.01, + 79, + fieldName, + influences, + false, + true, + 2, + 100); CPPUNIT_ASSERT(writer.acceptResult(result122)); ml::api::CHierarchicalResultsWriter::SResults result123(false, - false, - partitionFieldName, - partitionFieldValue, - overFieldName, - overFieldValue, - byFieldName, - byFieldValue, - emptyString, - 1, - function, - functionDescription, - TDouble1Vec(1, 10090.0), - TDouble1Vec(1, 6953.0), - 0.5, - 0.0, - 0.5, - 79, - fieldName, - influences, - false, - true, - 2, - 100); + false, + partitionFieldName, + partitionFieldValue, + overFieldName, + overFieldValue, + byFieldName, + byFieldValue, + emptyString, + 1, + function, + functionDescription, + TDouble1Vec(1, 10090.0), + TDouble1Vec(1, 6953.0), + 0.5, + 0.0, + 0.5, + 79, + fieldName, + influences, + false, + true, + 2, + 100); CPPUNIT_ASSERT(writer.acceptResult(result123)); ml::api::CHierarchicalResultsWriter::SResults result124(false, - true, - partitionFieldName, - partitionFieldValue, - overFieldName, - overFieldValue, - emptyString, - emptyString, - emptyString, - 1, - function, - functionDescription, - TDouble1Vec(1, 10090.0), - TDouble1Vec(1, 6953.0), - 0.5, - 0.0, - 0.5, - 79, - fieldName, - influences, - false, - true, - 2, - 100); + true, + partitionFieldName, + partitionFieldValue, + overFieldName, + overFieldValue, + emptyString, + emptyString, + emptyString, + 1, + function, + functionDescription, + TDouble1Vec(1, 10090.0), + TDouble1Vec(1, 6953.0), + 0.5, + 0.0, + 0.5, + 79, + fieldName, + influences, + false, + true, + 2, + 100); CPPUNIT_ASSERT(writer.acceptResult(result124)); ml::api::CHierarchicalResultsWriter::SResults result125(false, - false, - partitionFieldName, - partitionFieldValue, - overFieldName, - overFieldValue, - byFieldName, - byFieldValue, - emptyString, - 1, - function, - functionDescription, - TDouble1Vec(1, 10090.0), - TDouble1Vec(1, 6953.0), - 6.0, - 0.0, - 0.5, - 79, - fieldName, - influences, - false, - true, - 2, - 100); + false, + partitionFieldName, + partitionFieldValue, + overFieldName, + overFieldValue, + byFieldName, + byFieldValue, + emptyString, + 1, + function, + functionDescription, + TDouble1Vec(1, 10090.0), + TDouble1Vec(1, 6953.0), + 6.0, + 0.0, + 0.5, + 79, + fieldName, + influences, + false, + true, + 2, + 100); CPPUNIT_ASSERT(writer.acceptResult(result125)); ml::api::CHierarchicalResultsWriter::SResults result126(false, - true, - partitionFieldName, - partitionFieldValue, - overFieldName, - overFieldValue, - emptyString, - emptyString, - emptyString, - 1, - function, - functionDescription, - TDouble1Vec(1, 10090.0), - TDouble1Vec(1, 6953.0), - 6.0, - 0.0, - 0.05, - 79, - fieldName, - influences, - false, - true, - 2, - 100); + true, + partitionFieldName, + partitionFieldValue, + overFieldName, + overFieldValue, + emptyString, + emptyString, + emptyString, + 1, + function, + functionDescription, + TDouble1Vec(1, 10090.0), + TDouble1Vec(1, 6953.0), + 6.0, + 0.0, + 0.05, + 79, + fieldName, + influences, + false, + true, + 2, + 100); CPPUNIT_ASSERT(writer.acceptResult(result126)); } @@ -1350,82 +1288,79 @@ void CJsonOutputWriterTest::testLimitedRecordsWriteHelper(bool isInterim) overFieldName.clear(); overFieldValue.clear(); - ml::api::CHierarchicalResultsWriter::SResults result211( - ml::api::CHierarchicalResultsWriter::E_Result, - partitionFieldName, - partitionFieldValue, - byFieldName, - byFieldValue, - emptyString, - 2, - function, - functionDescription, - 42.0, - 79, - TDouble1Vec(1, 6953.0), - TDouble1Vec(1, 10090.0), - 1.0, - 0.0, - 0.05, - fieldName, - influences, - false, - true, - 1, - 100, - EMPTY_STRING_LIST); + ml::api::CHierarchicalResultsWriter::SResults result211(ml::api::CHierarchicalResultsWriter::E_Result, + partitionFieldName, + partitionFieldValue, + byFieldName, + byFieldValue, + emptyString, + 2, + function, + functionDescription, + 42.0, + 79, + TDouble1Vec(1, 6953.0), + TDouble1Vec(1, 10090.0), + 1.0, + 0.0, + 0.05, + fieldName, + influences, + false, + true, + 1, + 100, + EMPTY_STRING_LIST); CPPUNIT_ASSERT(writer.acceptResult(result211)); - ml::api::CHierarchicalResultsWriter::SResults result212( - ml::api::CHierarchicalResultsWriter::E_Result, - partitionFieldName, - partitionFieldValue, - byFieldName, - byFieldValue, - emptyString, - 2, - function, - functionDescription, - 42.0, - 79, - TDouble1Vec(1, 6953.0), - TDouble1Vec(1, 10090.0), - 7.0, - 0.0, - 0.001, - fieldName, - influences, - false, - true, - 1, - 100, - EMPTY_STRING_LIST); + ml::api::CHierarchicalResultsWriter::SResults result212(ml::api::CHierarchicalResultsWriter::E_Result, + partitionFieldName, + partitionFieldValue, + byFieldName, + byFieldValue, + emptyString, + 2, + function, + functionDescription, + 42.0, + 79, + TDouble1Vec(1, 6953.0), + TDouble1Vec(1, 10090.0), + 7.0, + 0.0, + 0.001, + fieldName, + influences, + false, + true, + 1, + 100, + EMPTY_STRING_LIST); CPPUNIT_ASSERT(writer.acceptResult(result212)); - ml::api::CHierarchicalResultsWriter::SResults result213( - ml::api::CHierarchicalResultsWriter::E_Result, - partitionFieldName, - partitionFieldValue, - byFieldName, - byFieldValue, - emptyString, - 2, - function, - functionDescription, - 42.0, - 79, - TDouble1Vec(1, 6953.0), - TDouble1Vec(1, 10090.0), - 0.6, - 0.0, - 0.1, - fieldName, - influences, - false, - true, - 1, - 100, - EMPTY_STRING_LIST); + ml::api::CHierarchicalResultsWriter::SResults result213(ml::api::CHierarchicalResultsWriter::E_Result, + partitionFieldName, + partitionFieldValue, + byFieldName, + byFieldValue, + emptyString, + 2, + function, + functionDescription, + 42.0, + 79, + TDouble1Vec(1, 6953.0), + TDouble1Vec(1, 10090.0), + 0.6, + 0.0, + 0.1, + fieldName, + influences, + false, + true, + 1, + 100, + EMPTY_STRING_LIST); CPPUNIT_ASSERT(writer.acceptResult(result213)); CPPUNIT_ASSERT(writer.acceptResult(result213)); @@ -1433,108 +1368,108 @@ void CJsonOutputWriterTest::testLimitedRecordsWriteHelper(bool isInterim) overFieldValue = "ofv"; ml::api::CHierarchicalResultsWriter::SResults result221(false, - false, - partitionFieldName, - partitionFieldValue, - overFieldName, - overFieldValue, - byFieldName, - byFieldValue, - emptyString, - 2, - function, - functionDescription, - TDouble1Vec(1, 10090.0), - TDouble1Vec(1, 6953.0), - 0.6, - 0.0, - 0.1, - 79, - fieldName, - influences, - false, - true, - 2, - 100); + false, + partitionFieldName, + partitionFieldValue, + overFieldName, + overFieldValue, + byFieldName, + byFieldValue, + emptyString, + 2, + function, + functionDescription, + TDouble1Vec(1, 10090.0), + TDouble1Vec(1, 6953.0), + 0.6, + 0.0, + 0.1, + 79, + fieldName, + influences, + false, + true, + 2, + 100); CPPUNIT_ASSERT(writer.acceptResult(result221)); CPPUNIT_ASSERT(writer.acceptResult(result221)); ml::api::CHierarchicalResultsWriter::SResults result222(false, - false, - partitionFieldName, - partitionFieldValue, - overFieldName, - overFieldValue, - emptyString, - emptyString, - emptyString, - 2, - function, - functionDescription, - TDouble1Vec(1, 10090.0), - TDouble1Vec(1, 6953.0), - 0.6, - 0.0, - 0.1, - 79, - fieldName, - influences, - false, - true, - 2, - 100); + false, + partitionFieldName, + partitionFieldValue, + overFieldName, + overFieldValue, + emptyString, + emptyString, + emptyString, + 2, + function, + functionDescription, + TDouble1Vec(1, 10090.0), + TDouble1Vec(1, 6953.0), + 0.6, + 0.0, + 0.1, + 79, + fieldName, + influences, + false, + true, + 2, + 100); CPPUNIT_ASSERT(writer.acceptResult(result222)); ml::api::CHierarchicalResultsWriter::SResults result223(false, - false, - partitionFieldName, - partitionFieldValue, - overFieldName, - overFieldValue, - byFieldName, - byFieldValue, - emptyString, - 2, - function, - functionDescription, - TDouble1Vec(1, 10090.0), - TDouble1Vec(1, 6953.0), - 3.0, - 0.0, - 0.02, - 79, - fieldName, - influences, - false, - true, - 2, - 100); + false, + partitionFieldName, + partitionFieldValue, + overFieldName, + overFieldValue, + byFieldName, + byFieldValue, + emptyString, + 2, + function, + functionDescription, + TDouble1Vec(1, 10090.0), + TDouble1Vec(1, 6953.0), + 3.0, + 0.0, + 0.02, + 79, + fieldName, + influences, + false, + true, + 2, + 100); CPPUNIT_ASSERT(writer.acceptResult(result223)); ml::api::CHierarchicalResultsWriter::SResults result224(false, - true, - partitionFieldName, - partitionFieldValue, - overFieldName, - overFieldValue, - emptyString, - emptyString, - emptyString, - 2, - function, - functionDescription, - TDouble1Vec(1, 10090.0), - TDouble1Vec(1, 6953.0), - 20.0, - 0.0, - 0.02, - 79, - fieldName, - influences, - false, - true, - 2, - 100); + true, + partitionFieldName, + partitionFieldValue, + overFieldName, + overFieldValue, + emptyString, + emptyString, + emptyString, + 2, + function, + functionDescription, + TDouble1Vec(1, 10090.0), + TDouble1Vec(1, 6953.0), + 20.0, + 0.0, + 0.02, + 79, + fieldName, + influences, + false, + true, + 2, + 100); CPPUNIT_ASSERT(writer.acceptResult(result224)); } @@ -1543,85 +1478,84 @@ void CJsonOutputWriterTest::testLimitedRecordsWriteHelper(bool isInterim) overFieldName.clear(); overFieldValue.clear(); - ml::api::CHierarchicalResultsWriter::SResults result311( - ml::api::CHierarchicalResultsWriter::E_Result, - partitionFieldName, - partitionFieldValue, - byFieldName, - byFieldValue, - emptyString, - 3, - function, - functionDescription, - 42.0, - 79, - TDouble1Vec(1, 6953.0), - TDouble1Vec(1, 10090.0), - 30.0, - 0.0, - 0.02, - fieldName, - influences, - false, - true, - 1, - 100, - EMPTY_STRING_LIST); + ml::api::CHierarchicalResultsWriter::SResults result311(ml::api::CHierarchicalResultsWriter::E_Result, + partitionFieldName, + partitionFieldValue, + byFieldName, + byFieldValue, + emptyString, + 3, + function, + functionDescription, + 42.0, + 79, + TDouble1Vec(1, 6953.0), + TDouble1Vec(1, 10090.0), + 30.0, + 0.0, + 0.02, + fieldName, + influences, + false, + true, + 1, + 100, + EMPTY_STRING_LIST); CPPUNIT_ASSERT(writer.acceptResult(result311)); overFieldName = "ofn"; overFieldValue = "ofv"; ml::api::CHierarchicalResultsWriter::SResults result321(false, - false, - partitionFieldName, - partitionFieldValue, - overFieldName, - overFieldValue, - byFieldName, - byFieldValue, - emptyString, - 3, - function, - functionDescription, - TDouble1Vec(1, 10090.0), - TDouble1Vec(1, 6953.0), - 31.0, - 0.0, - 0.0002, - 79, - fieldName, - influences, - false, - true, - 2, - 100); + false, + partitionFieldName, + partitionFieldValue, + overFieldName, + overFieldValue, + byFieldName, + byFieldValue, + emptyString, + 3, + function, + functionDescription, + TDouble1Vec(1, 10090.0), + TDouble1Vec(1, 6953.0), + 31.0, + 0.0, + 0.0002, + 79, + fieldName, + influences, + false, + true, + 2, + 100); CPPUNIT_ASSERT(writer.acceptResult(result321)); ml::api::CHierarchicalResultsWriter::SResults result322(false, - true, - partitionFieldName, - partitionFieldValue, - overFieldName, - overFieldValue, - emptyString, - emptyString, - emptyString, - 3, - function, - functionDescription, - TDouble1Vec(1, 10090.0), - TDouble1Vec(1, 6953.0), - 31.0, - 0.0, - 0.0002, - 79, - fieldName, - influences, - false, - true, - 2, - 100); + true, + partitionFieldName, + partitionFieldValue, + overFieldName, + overFieldValue, + emptyString, + emptyString, + emptyString, + 3, + function, + functionDescription, + TDouble1Vec(1, 10090.0), + TDouble1Vec(1, 6953.0), + 31.0, + 0.0, + 0.0002, + 79, + fieldName, + influences, + false, + true, + 2, + 100); CPPUNIT_ASSERT(writer.acceptResult(result322)); } @@ -1648,50 +1582,42 @@ void CJsonOutputWriterTest::testLimitedRecordsWriteHelper(bool isInterim) // records are sorted by probability. // bucket total anomaly score is the sum of all anomalies not just those printed. { - const rapidjson::Value &bucketWrapper = arrayDoc[rapidjson::SizeType(1)]; + const rapidjson::Value& bucketWrapper = arrayDoc[rapidjson::SizeType(1)]; CPPUNIT_ASSERT(bucketWrapper.IsObject()); CPPUNIT_ASSERT(bucketWrapper.HasMember("bucket")); - const rapidjson::Value &bucket = bucketWrapper["bucket"]; + const rapidjson::Value& bucket = bucketWrapper["bucket"]; CPPUNIT_ASSERT(bucket.IsObject()); // It's hard to predict what these will be, so just assert their presence CPPUNIT_ASSERT(bucket.HasMember("anomaly_score")); - if (isInterim) - { + if (isInterim) { CPPUNIT_ASSERT(bucket.HasMember("is_interim")); CPPUNIT_ASSERT_EQUAL(isInterim, bucket["is_interim"].GetBool()); - } - else - { + } else { CPPUNIT_ASSERT(!bucket.HasMember("is_interim")); } - const rapidjson::Value &recordsWrapper = arrayDoc[rapidjson::SizeType(0)]; + const rapidjson::Value& recordsWrapper = arrayDoc[rapidjson::SizeType(0)]; CPPUNIT_ASSERT(recordsWrapper.IsObject()); CPPUNIT_ASSERT(recordsWrapper.HasMember("records")); - const rapidjson::Value &records = recordsWrapper["records"]; + const rapidjson::Value& records = recordsWrapper["records"]; CPPUNIT_ASSERT(records.IsArray()); - double EXPECTED_PROBABILITIES [] = {0.01, 0.05, 0.001, 0.02, 0.0002}; + double EXPECTED_PROBABILITIES[] = {0.01, 0.05, 0.001, 0.02, 0.0002}; int probIndex = 0; - for (rapidjson::SizeType i = 0; i < records.Size(); i++) - { + for (rapidjson::SizeType i = 0; i < records.Size(); i++) { CPPUNIT_ASSERT(records[i].HasMember("detector_index")); CPPUNIT_ASSERT(records[i].HasMember("initial_record_score")); CPPUNIT_ASSERT(records[i].HasMember("record_score")); CPPUNIT_ASSERT(records[i].HasMember("probability")); - CPPUNIT_ASSERT_EQUAL(EXPECTED_PROBABILITIES[probIndex], - records[i]["probability"].GetDouble()); + CPPUNIT_ASSERT_EQUAL(EXPECTED_PROBABILITIES[probIndex], records[i]["probability"].GetDouble()); ++probIndex; - if (isInterim) - { + if (isInterim) { CPPUNIT_ASSERT(records[i].HasMember("is_interim")); CPPUNIT_ASSERT_EQUAL(isInterim, records[i]["is_interim"].GetBool()); - } - else - { + } else { CPPUNIT_ASSERT(!records[i].HasMember("is_interim")); } } @@ -1699,43 +1625,36 @@ void CJsonOutputWriterTest::testLimitedRecordsWriteHelper(bool isInterim) CPPUNIT_ASSERT_EQUAL(rapidjson::SizeType(2), records.Size()); } { - const rapidjson::Value &bucketWrapper = arrayDoc[rapidjson::SizeType(3)]; + const rapidjson::Value& bucketWrapper = arrayDoc[rapidjson::SizeType(3)]; CPPUNIT_ASSERT(bucketWrapper.IsObject()); CPPUNIT_ASSERT(bucketWrapper.HasMember("bucket")); - const rapidjson::Value &bucket = bucketWrapper["bucket"]; + const rapidjson::Value& bucket = bucketWrapper["bucket"]; CPPUNIT_ASSERT(bucket.IsObject()); // It's hard to predict what these will be, so just assert their presence CPPUNIT_ASSERT(bucket.HasMember("anomaly_score")); - if (isInterim) - { + if (isInterim) { CPPUNIT_ASSERT(bucket.HasMember("is_interim")); CPPUNIT_ASSERT_EQUAL(isInterim, bucket["is_interim"].GetBool()); - } - else - { + } else { CPPUNIT_ASSERT(!bucket.HasMember("is_interim")); } - const rapidjson::Value &recordsWrapper = arrayDoc[rapidjson::SizeType(2)]; + const rapidjson::Value& recordsWrapper = arrayDoc[rapidjson::SizeType(2)]; CPPUNIT_ASSERT(recordsWrapper.IsObject()); CPPUNIT_ASSERT(recordsWrapper.HasMember("records")); - const rapidjson::Value &records = recordsWrapper["records"]; + const rapidjson::Value& records = recordsWrapper["records"]; CPPUNIT_ASSERT(records.IsArray()); - for (rapidjson::SizeType i = 0; i < records.Size(); i++) - { + for (rapidjson::SizeType i = 0; i < records.Size(); i++) { //CPPUNIT_ASSERT_EQUAL(0.1, records1[rapidjson::SizeType(0)]["probability"].GetDouble()); CPPUNIT_ASSERT(records[i].HasMember("detector_index")); CPPUNIT_ASSERT(records[i].HasMember("initial_record_score")); CPPUNIT_ASSERT(records[i].HasMember("record_score")); - if (isInterim) - { + if (isInterim) { CPPUNIT_ASSERT(records[i].HasMember("is_interim")); CPPUNIT_ASSERT_EQUAL(isInterim, records[i]["is_interim"].GetBool()); - } - else - { + } else { CPPUNIT_ASSERT(!records[i].HasMember("is_interim")); } } @@ -1743,43 +1662,36 @@ void CJsonOutputWriterTest::testLimitedRecordsWriteHelper(bool isInterim) CPPUNIT_ASSERT_EQUAL(rapidjson::SizeType(2), records.Size()); } { - const rapidjson::Value &bucketWrapper = arrayDoc[rapidjson::SizeType(5)]; + const rapidjson::Value& bucketWrapper = arrayDoc[rapidjson::SizeType(5)]; CPPUNIT_ASSERT(bucketWrapper.IsObject()); CPPUNIT_ASSERT(bucketWrapper.HasMember("bucket")); - const rapidjson::Value &bucket = bucketWrapper["bucket"]; + const rapidjson::Value& bucket = bucketWrapper["bucket"]; CPPUNIT_ASSERT(bucket.IsObject()); // It's hard to predict what these will be, so just assert their presence CPPUNIT_ASSERT(bucket.HasMember("anomaly_score")); - if (isInterim) - { + if (isInterim) { CPPUNIT_ASSERT(bucket.HasMember("is_interim")); CPPUNIT_ASSERT_EQUAL(isInterim, bucket["is_interim"].GetBool()); - } - else - { + } else { CPPUNIT_ASSERT(!bucket.HasMember("is_interim")); } - const rapidjson::Value &recordsWrapper = arrayDoc[rapidjson::SizeType(4)]; + const rapidjson::Value& recordsWrapper = arrayDoc[rapidjson::SizeType(4)]; CPPUNIT_ASSERT(recordsWrapper.IsObject()); CPPUNIT_ASSERT(recordsWrapper.HasMember("records")); - const rapidjson::Value &records = recordsWrapper["records"]; + const rapidjson::Value& records = recordsWrapper["records"]; CPPUNIT_ASSERT(records.IsArray()); - for (rapidjson::SizeType i = 0; i < records.Size(); i++) - { + for (rapidjson::SizeType i = 0; i < records.Size(); i++) { CPPUNIT_ASSERT(records[i].HasMember("detector_index")); //CPPUNIT_ASSERT_EQUAL(0.1, records1[rapidjson::SizeType(0)]["probability"].GetDouble()); CPPUNIT_ASSERT(records[i].HasMember("initial_record_score")); CPPUNIT_ASSERT(records[i].HasMember("record_score")); - if (isInterim) - { + if (isInterim) { CPPUNIT_ASSERT(records[i].HasMember("is_interim")); CPPUNIT_ASSERT_EQUAL(isInterim, records[i]["is_interim"].GetBool()); - } - else - { + } else { CPPUNIT_ASSERT(!records[i].HasMember("is_interim")); } } @@ -1788,11 +1700,8 @@ void CJsonOutputWriterTest::testLimitedRecordsWriteHelper(bool isInterim) } } -ml::model::CHierarchicalResults::TNode createInfluencerNode(const std::string &personName, - const std::string &personValue, - double probability, - double normalisedAnomalyScore) -{ +ml::model::CHierarchicalResults::TNode +createInfluencerNode(const std::string& personName, const std::string& personValue, double probability, double normalisedAnomalyScore) { ml::model::CHierarchicalResults::TResultSpec spec; spec.s_PersonFieldName = ml::model::CStringStore::names().get(personName); spec.s_PersonFieldValue = ml::model::CStringStore::names().get(personValue); @@ -1805,12 +1714,8 @@ ml::model::CHierarchicalResults::TNode createInfluencerNode(const std::string &p return node; } -ml::model::CHierarchicalResults::TNode createBucketInfluencerNode( - const std::string &personName, - double probability, - double normalisedAnomalyScore, - double rawAnomalyScore) -{ +ml::model::CHierarchicalResults::TNode +createBucketInfluencerNode(const std::string& personName, double probability, double normalisedAnomalyScore, double rawAnomalyScore) { ml::model::CHierarchicalResults::TResultSpec spec; spec.s_PersonFieldName = ml::model::CStringStore::names().get(personName); @@ -1823,8 +1728,7 @@ ml::model::CHierarchicalResults::TNode createBucketInfluencerNode( return node; } -void CJsonOutputWriterTest::testWriteInfluencers() -{ +void CJsonOutputWriterTest::testWriteInfluencers() { std::ostringstream sstream; { @@ -1832,12 +1736,10 @@ void CJsonOutputWriterTest::testWriteInfluencers() std::string daisy("daisy"); std::string jim("jim"); - ml::model::CHierarchicalResults::TNode node1 = - createInfluencerNode(user, daisy, 0.5, 10.0); - ml::model::CHierarchicalResults::TNode node2 = - createInfluencerNode(user, jim, 0.9, 100.0); + ml::model::CHierarchicalResults::TNode node1 = createInfluencerNode(user, daisy, 0.5, 10.0); + ml::model::CHierarchicalResults::TNode node2 = createInfluencerNode(user, jim, 0.9, 100.0); - ml::core::CJsonOutputStreamWrapper outputStream (sstream); + ml::core::CJsonOutputStreamWrapper outputStream(sstream); ml::api::CJsonOutputWriter writer("job", outputStream); CPPUNIT_ASSERT(writer.acceptInfluencer(ml::core_t::TTime(42), node1, false)); CPPUNIT_ASSERT(writer.acceptInfluencer(ml::core_t::TTime(42), node2, false)); @@ -1858,11 +1760,11 @@ void CJsonOutputWriterTest::testWriteInfluencers() CPPUNIT_ASSERT_EQUAL(rapidjson::SizeType(2), doc.Size()); - const rapidjson::Value &influencers = doc[rapidjson::SizeType(0)]["influencers"]; + const rapidjson::Value& influencers = doc[rapidjson::SizeType(0)]["influencers"]; CPPUNIT_ASSERT(influencers.IsArray()); CPPUNIT_ASSERT_EQUAL(rapidjson::SizeType(2), influencers.Size()); - const rapidjson::Value &influencer = influencers[rapidjson::SizeType(0)]; + const rapidjson::Value& influencer = influencers[rapidjson::SizeType(0)]; CPPUNIT_ASSERT(influencer.HasMember("job_id")); CPPUNIT_ASSERT_EQUAL(std::string("job"), std::string(influencer["job_id"].GetString())); CPPUNIT_ASSERT_DOUBLES_EQUAL(0.5, influencer["probability"].GetDouble(), 0.001); @@ -1875,7 +1777,7 @@ void CJsonOutputWriterTest::testWriteInfluencers() CPPUNIT_ASSERT(influencer["is_interim"].GetBool()); CPPUNIT_ASSERT(influencer.HasMember("bucket_span")); - const rapidjson::Value &influencer2 = influencers[rapidjson::SizeType(1)]; + const rapidjson::Value& influencer2 = influencers[rapidjson::SizeType(1)]; CPPUNIT_ASSERT_DOUBLES_EQUAL(0.9, influencer2["probability"].GetDouble(), 0.001); CPPUNIT_ASSERT_DOUBLES_EQUAL(100.0, influencer2["initial_influencer_score"].GetDouble(), 0.001); CPPUNIT_ASSERT(influencer2.HasMember("influencer_score")); @@ -1886,12 +1788,11 @@ void CJsonOutputWriterTest::testWriteInfluencers() CPPUNIT_ASSERT(influencer2["is_interim"].GetBool()); CPPUNIT_ASSERT(influencer2.HasMember("bucket_span")); - const rapidjson::Value &bucket = doc[rapidjson::SizeType(1)]["bucket"]; + const rapidjson::Value& bucket = doc[rapidjson::SizeType(1)]["bucket"]; CPPUNIT_ASSERT(bucket.HasMember("influencers") == false); } -void CJsonOutputWriterTest::testWriteInfluencersWithLimit() -{ +void CJsonOutputWriterTest::testWriteInfluencersWithLimit() { std::ostringstream sstream; { @@ -1903,23 +1804,16 @@ void CJsonOutputWriterTest::testWriteInfluencersWithLimit() std::string bob("bob"); std::string laptop("laptop"); - ml::model::CHierarchicalResults::TNode node1 = - createInfluencerNode(user, daisy, 0.5, 10.0); - ml::model::CHierarchicalResults::TNode node2 = - createInfluencerNode(user, jim, 0.9, 100.0); - ml::model::CHierarchicalResults::TNode node3 = - createInfluencerNode(user, bob, 0.3, 9.0); - ml::model::CHierarchicalResults::TNode node4 = - createInfluencerNode(computer, laptop, 0.3, 12.0); - - ml::model::CHierarchicalResults::TNode bnode1 = - createBucketInfluencerNode(user, 0.5, 10.0, 1.0); - ml::model::CHierarchicalResults::TNode bnode2 = - createBucketInfluencerNode(computer, 0.9, 100.0, 10.0); - ml::model::CHierarchicalResults::TNode bnode3 = - createBucketInfluencerNode(monitor, 0.3, 9.0, 0.9); - - ml::core::CJsonOutputStreamWrapper outputStream (sstream); + ml::model::CHierarchicalResults::TNode node1 = createInfluencerNode(user, daisy, 0.5, 10.0); + ml::model::CHierarchicalResults::TNode node2 = createInfluencerNode(user, jim, 0.9, 100.0); + ml::model::CHierarchicalResults::TNode node3 = createInfluencerNode(user, bob, 0.3, 9.0); + ml::model::CHierarchicalResults::TNode node4 = createInfluencerNode(computer, laptop, 0.3, 12.0); + + ml::model::CHierarchicalResults::TNode bnode1 = createBucketInfluencerNode(user, 0.5, 10.0, 1.0); + ml::model::CHierarchicalResults::TNode bnode2 = createBucketInfluencerNode(computer, 0.9, 100.0, 10.0); + ml::model::CHierarchicalResults::TNode bnode3 = createBucketInfluencerNode(monitor, 0.3, 9.0, 0.9); + + ml::core::CJsonOutputStreamWrapper outputStream(sstream); ml::api::CJsonOutputWriter writer("job", outputStream); writer.limitNumberRecords(2); @@ -1932,7 +1826,6 @@ void CJsonOutputWriterTest::testWriteInfluencersWithLimit() CPPUNIT_ASSERT(writer.acceptInfluencer(ml::core_t::TTime(0), bnode2, true)); CPPUNIT_ASSERT(writer.acceptInfluencer(ml::core_t::TTime(0), bnode3, true)); - // can't add a bucket influencer unless a result has been added std::string pfn("partition_field_name"); std::string pfv("partition_field_value"); @@ -1943,33 +1836,32 @@ void CJsonOutputWriterTest::testWriteInfluencersWithLimit() std::string fn("field_name"); std::string emptyStr; ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPrDoublePrVec influences; - ml::api::CHierarchicalResultsWriter::SResults result( - ml::api::CHierarchicalResultsWriter::E_Result, - pfn, - pfv, - bfn, - bfv, - emptyStr, - 0, - fun, fund, - 42.0, - 79, - TDouble1Vec(1, 6953.0), - TDouble1Vec(1, 10090.0), - 0.0, - 0.1, - 0.1, - fn, - influences, - false, - true, - 1, - 100, - EMPTY_STRING_LIST); + ml::api::CHierarchicalResultsWriter::SResults result(ml::api::CHierarchicalResultsWriter::E_Result, + pfn, + pfv, + bfn, + bfv, + emptyStr, + 0, + fun, + fund, + 42.0, + 79, + TDouble1Vec(1, 6953.0), + TDouble1Vec(1, 10090.0), + 0.0, + 0.1, + 0.1, + fn, + influences, + false, + true, + 1, + 100, + EMPTY_STRING_LIST); CPPUNIT_ASSERT(writer.acceptResult(result)); - writer.acceptBucketTimeInfluencer(ml::core_t::TTime(0), 0.6, 1.0, 10.0); // Finished adding results @@ -1979,7 +1871,6 @@ void CJsonOutputWriterTest::testWriteInfluencersWithLimit() rapidjson::Document doc; doc.Parse(sstream.str().c_str()); - rapidjson::StringBuffer strbuf; using TStringBufferPrettyWriter = rapidjson::PrettyWriter; TStringBufferPrettyWriter writer(strbuf); @@ -1987,11 +1878,11 @@ void CJsonOutputWriterTest::testWriteInfluencersWithLimit() LOG_DEBUG("limited write influencers:\n" << strbuf.GetString()); - const rapidjson::Value &influencers = doc[rapidjson::SizeType(1)]["influencers"]; + const rapidjson::Value& influencers = doc[rapidjson::SizeType(1)]["influencers"]; CPPUNIT_ASSERT(influencers.IsArray()); CPPUNIT_ASSERT_EQUAL(rapidjson::SizeType(2), influencers.Size()); - const rapidjson::Value &influencer = influencers[rapidjson::SizeType(0)]; + const rapidjson::Value& influencer = influencers[rapidjson::SizeType(0)]; CPPUNIT_ASSERT_DOUBLES_EQUAL(0.9, influencer["probability"].GetDouble(), 0.001); CPPUNIT_ASSERT_DOUBLES_EQUAL(100.0, influencer["initial_influencer_score"].GetDouble(), 0.001); CPPUNIT_ASSERT(influencer.HasMember("influencer_score")); @@ -2000,7 +1891,7 @@ void CJsonOutputWriterTest::testWriteInfluencersWithLimit() CPPUNIT_ASSERT_EQUAL(std::string("jim"), std::string(influencer["influencer_field_value"].GetString())); CPPUNIT_ASSERT(influencer.HasMember("bucket_span")); - const rapidjson::Value &influencer2 = influencers[rapidjson::SizeType(1)]; + const rapidjson::Value& influencer2 = influencers[rapidjson::SizeType(1)]; CPPUNIT_ASSERT_DOUBLES_EQUAL(0.3, influencer2["probability"].GetDouble(), 0.001); CPPUNIT_ASSERT_DOUBLES_EQUAL(12.0, influencer2["initial_influencer_score"].GetDouble(), 0.001); CPPUNIT_ASSERT(influencer2.HasMember("influencer_score")); @@ -2010,13 +1901,13 @@ void CJsonOutputWriterTest::testWriteInfluencersWithLimit() CPPUNIT_ASSERT(influencer2.HasMember("bucket_span")); // bucket influencers - const rapidjson::Value &bucketResult = doc[rapidjson::SizeType(2)]["bucket"]; + const rapidjson::Value& bucketResult = doc[rapidjson::SizeType(2)]["bucket"]; CPPUNIT_ASSERT(bucketResult.HasMember("bucket_influencers")); - const rapidjson::Value &bucketInfluencers = bucketResult["bucket_influencers"]; + const rapidjson::Value& bucketInfluencers = bucketResult["bucket_influencers"]; CPPUNIT_ASSERT(bucketInfluencers.IsArray()); CPPUNIT_ASSERT_EQUAL(rapidjson::SizeType(3), bucketInfluencers.Size()); - const rapidjson::Value &binf = bucketInfluencers[rapidjson::SizeType(0)]; + const rapidjson::Value& binf = bucketInfluencers[rapidjson::SizeType(0)]; CPPUNIT_ASSERT_DOUBLES_EQUAL(0.9, binf["probability"].GetDouble(), 0.001); CPPUNIT_ASSERT_DOUBLES_EQUAL(100.0, binf["initial_anomaly_score"].GetDouble(), 0.001); CPPUNIT_ASSERT(binf.HasMember("anomaly_score")); @@ -2024,7 +1915,7 @@ void CJsonOutputWriterTest::testWriteInfluencersWithLimit() CPPUNIT_ASSERT_EQUAL(std::string("computer"), std::string(binf["influencer_field_name"].GetString())); CPPUNIT_ASSERT_DOUBLES_EQUAL(10.0, binf["raw_anomaly_score"].GetDouble(), 0.001); - const rapidjson::Value &binf2 = bucketInfluencers[rapidjson::SizeType(1)]; + const rapidjson::Value& binf2 = bucketInfluencers[rapidjson::SizeType(1)]; CPPUNIT_ASSERT_DOUBLES_EQUAL(0.5, binf2["probability"].GetDouble(), 0.001); CPPUNIT_ASSERT_DOUBLES_EQUAL(10.0, binf2["initial_anomaly_score"].GetDouble(), 0.001); CPPUNIT_ASSERT(binf2.HasMember("anomaly_score")); @@ -2032,7 +1923,7 @@ void CJsonOutputWriterTest::testWriteInfluencersWithLimit() CPPUNIT_ASSERT_EQUAL(std::string("user"), std::string(binf2["influencer_field_name"].GetString())); CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, binf2["raw_anomaly_score"].GetDouble(), 0.001); - const rapidjson::Value &binf3 = bucketInfluencers[rapidjson::SizeType(2)]; + const rapidjson::Value& binf3 = bucketInfluencers[rapidjson::SizeType(2)]; CPPUNIT_ASSERT_DOUBLES_EQUAL(0.6, binf3["probability"].GetDouble(), 0.001); CPPUNIT_ASSERT_DOUBLES_EQUAL(10.0, binf3["initial_anomaly_score"].GetDouble(), 0.001); CPPUNIT_ASSERT(binf3.HasMember("anomaly_score")); @@ -2041,8 +1932,7 @@ void CJsonOutputWriterTest::testWriteInfluencersWithLimit() CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, binf3["raw_anomaly_score"].GetDouble(), 0.001); } -void CJsonOutputWriterTest::testWriteWithInfluences() -{ +void CJsonOutputWriterTest::testWriteWithInfluences() { std::ostringstream sstream; { @@ -2066,30 +1956,22 @@ void CJsonOutputWriterTest::testWriteWithInfluences() std::string localhost("localhost"); std::string webserver("web-server"); - ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPr field1 = - ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPr( - ml::model::CStringStore::names().get(user), - ml::model::CStringStore::names().get(dave)); + ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPr(ml::model::CStringStore::names().get(user), + ml::model::CStringStore::names().get(dave)); ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPr field2 = - ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPr( - ml::model::CStringStore::names().get(user), - ml::model::CStringStore::names().get(cat)); + ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPr(ml::model::CStringStore::names().get(user), + ml::model::CStringStore::names().get(cat)); ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPr field3 = - ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPr( - ml::model::CStringStore::names().get(user), - ml::model::CStringStore::names().get(jo)); - + ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPr(ml::model::CStringStore::names().get(user), + ml::model::CStringStore::names().get(jo)); ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPr hostField1 = - ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPr( - ml::model::CStringStore::names().get(host), - ml::model::CStringStore::names().get(localhost)); + ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPr(ml::model::CStringStore::names().get(host), + ml::model::CStringStore::names().get(localhost)); ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPr hostField2 = - ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPr( - ml::model::CStringStore::names().get(host), - ml::model::CStringStore::names().get(webserver)); - + ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPr(ml::model::CStringStore::names().get(host), + ml::model::CStringStore::names().get(webserver)); influences.push_back(ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPrDoublePr(field1, 0.4)); influences.push_back(ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPrDoublePr(field2, 1.0)); @@ -2100,30 +1982,30 @@ void CJsonOutputWriterTest::testWriteWithInfluences() // The output writer won't close the JSON structures until is is destroyed ml::api::CHierarchicalResultsWriter::SResults result(ml::api::CHierarchicalResultsWriter::E_Result, - partitionFieldName, - partitionFieldValue, - byFieldName, - byFieldValue, - emptyString, - 1, - function, - functionDescription, - 42.0, - 79, - TDouble1Vec(1, 6953.0), - TDouble1Vec(1, 10090.0), - 0.0, - 0.1, - 0.1, - fieldName, - influences, - false, - true, - 1, - 100, - EMPTY_STRING_LIST); - - ml::core::CJsonOutputStreamWrapper outputStream (sstream); + partitionFieldName, + partitionFieldValue, + byFieldName, + byFieldValue, + emptyString, + 1, + function, + functionDescription, + 42.0, + 79, + TDouble1Vec(1, 6953.0), + TDouble1Vec(1, 10090.0), + 0.0, + 0.1, + 0.1, + fieldName, + influences, + false, + true, + 1, + 100, + EMPTY_STRING_LIST); + + ml::core::CJsonOutputStreamWrapper outputStream(sstream); ml::api::CJsonOutputWriter writer("job", outputStream); CPPUNIT_ASSERT(writer.acceptResult(result)); @@ -2145,63 +2027,54 @@ void CJsonOutputWriterTest::testWriteWithInfluences() } CPPUNIT_ASSERT(doc[rapidjson::SizeType(1)].HasMember("bucket")); - const rapidjson::Value &bucket = doc[rapidjson::SizeType(1)]["bucket"]; + const rapidjson::Value& bucket = doc[rapidjson::SizeType(1)]["bucket"]; CPPUNIT_ASSERT(bucket.HasMember("records") == false); CPPUNIT_ASSERT(doc[rapidjson::SizeType(0)].HasMember("records")); - const rapidjson::Value &records = doc[rapidjson::SizeType(0)]["records"]; + const rapidjson::Value& records = doc[rapidjson::SizeType(0)]["records"]; CPPUNIT_ASSERT(records[rapidjson::SizeType(0)].HasMember("influencers")); - const rapidjson::Value &influences = records[rapidjson::SizeType(0)]["influencers"]; + const rapidjson::Value& influences = records[rapidjson::SizeType(0)]["influencers"]; CPPUNIT_ASSERT(influences.IsArray()); CPPUNIT_ASSERT_EQUAL(rapidjson::SizeType(2), influences.Size()); { - const rapidjson::Value &influence = influences[rapidjson::SizeType(0)]; + const rapidjson::Value& influence = influences[rapidjson::SizeType(0)]; CPPUNIT_ASSERT(influence.HasMember("influencer_field_name")); - CPPUNIT_ASSERT_EQUAL(std::string("host"), - std::string(influence["influencer_field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("host"), std::string(influence["influencer_field_name"].GetString())); CPPUNIT_ASSERT(influence.HasMember("influencer_field_values")); - const rapidjson::Value &influencerFieldValues = influence["influencer_field_values"]; + const rapidjson::Value& influencerFieldValues = influence["influencer_field_values"]; CPPUNIT_ASSERT(influencerFieldValues.IsArray()); CPPUNIT_ASSERT_EQUAL(rapidjson::SizeType(2), influencerFieldValues.Size()); // Check influencers are ordered - CPPUNIT_ASSERT_EQUAL(std::string("web-server"), - std::string(influencerFieldValues[rapidjson::SizeType(0)].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string("localhost"), - std::string(influencerFieldValues[rapidjson::SizeType(1)].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("web-server"), std::string(influencerFieldValues[rapidjson::SizeType(0)].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("localhost"), std::string(influencerFieldValues[rapidjson::SizeType(1)].GetString())); } { - const rapidjson::Value &influence = influences[rapidjson::SizeType(1)]; + const rapidjson::Value& influence = influences[rapidjson::SizeType(1)]; CPPUNIT_ASSERT(influence.HasMember("influencer_field_name")); - CPPUNIT_ASSERT_EQUAL(std::string("user"), - std::string(influence["influencer_field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("user"), std::string(influence["influencer_field_name"].GetString())); CPPUNIT_ASSERT(influence.HasMember("influencer_field_values")); - const rapidjson::Value &influencerFieldValues = influence["influencer_field_values"]; + const rapidjson::Value& influencerFieldValues = influence["influencer_field_values"]; CPPUNIT_ASSERT(influencerFieldValues.IsArray()); CPPUNIT_ASSERT_EQUAL(rapidjson::SizeType(3), influencerFieldValues.Size()); // Check influencers are ordered - CPPUNIT_ASSERT_EQUAL(std::string("cat"), - std::string(influencerFieldValues[rapidjson::SizeType(0)].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string("dave"), - std::string(influencerFieldValues[rapidjson::SizeType(1)].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string("jo"), - std::string(influencerFieldValues[rapidjson::SizeType(2)].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("cat"), std::string(influencerFieldValues[rapidjson::SizeType(0)].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("dave"), std::string(influencerFieldValues[rapidjson::SizeType(1)].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("jo"), std::string(influencerFieldValues[rapidjson::SizeType(2)].GetString())); } } -void CJsonOutputWriterTest::testPersistNormalizer() -{ +void CJsonOutputWriterTest::testPersistNormalizer() { ml::model::CAnomalyDetectorModelConfig modelConfig = ml::model::CAnomalyDetectorModelConfig::defaultConfig(); - std::ostringstream sstream; ml::core_t::TTime persistTime(1); { - ml::core::CJsonOutputStreamWrapper outputStream (sstream); + ml::core::CJsonOutputStreamWrapper outputStream(sstream); ml::api::CJsonOutputWriter writer("job", outputStream); ml::model::CHierarchicalResultsNormalizer normalizer(modelConfig); @@ -2217,22 +2090,21 @@ void CJsonOutputWriterTest::testPersistNormalizer() CPPUNIT_ASSERT(doc.IsArray()); - const rapidjson::Value &quantileWrapper = doc[rapidjson::SizeType(0)]; + const rapidjson::Value& quantileWrapper = doc[rapidjson::SizeType(0)]; CPPUNIT_ASSERT(quantileWrapper.HasMember("quantiles")); - const rapidjson::Value &quantileState = quantileWrapper["quantiles"]; + const rapidjson::Value& quantileState = quantileWrapper["quantiles"]; CPPUNIT_ASSERT(quantileState.HasMember("job_id")); CPPUNIT_ASSERT_EQUAL(std::string("job"), std::string(quantileState["job_id"].GetString())); CPPUNIT_ASSERT(quantileState.HasMember("quantile_state")); CPPUNIT_ASSERT(quantileState.HasMember("timestamp")); } -void CJsonOutputWriterTest::testPartitionScores() -{ +void CJsonOutputWriterTest::testPartitionScores() { ml::model::CAnomalyDetectorModelConfig modelConfig = ml::model::CAnomalyDetectorModelConfig::defaultConfig(); std::ostringstream sstream; { - ml::core::CJsonOutputStreamWrapper outputStream (sstream); + ml::core::CJsonOutputStreamWrapper outputStream(sstream); ml::api::CJsonOutputWriter writer("job", outputStream); std::string emptyString; @@ -2240,38 +2112,35 @@ void CJsonOutputWriterTest::testPartitionScores() std::string partitionFieldName("part1"); - for (int i = 0; i < 4; ++i) - { + for (int i = 0; i < 4; ++i) { // For the first iteration use an empty string for the value std::string partitionFieldValue; - if (i > 0) - { + if (i > 0) { partitionFieldValue = 'p' + ml::core::CStringUtils::typeToString(i); } - ml::api::CHierarchicalResultsWriter::SResults result( - ml::api::CHierarchicalResultsWriter::E_PartitionResult, - partitionFieldName, - partitionFieldValue, - emptyString, - emptyString, - emptyString, - 1, - emptyString, - emptyString, - 42.0, - 79, - TDouble1Vec(1, 6953.0), - TDouble1Vec(1, 10090.0), - 0.0, - double(i), // normalised anomaly score - 0.1, - emptyString, - influences, - false, - true, - 1, - 100, - EMPTY_STRING_LIST); + ml::api::CHierarchicalResultsWriter::SResults result(ml::api::CHierarchicalResultsWriter::E_PartitionResult, + partitionFieldName, + partitionFieldValue, + emptyString, + emptyString, + emptyString, + 1, + emptyString, + emptyString, + 42.0, + 79, + TDouble1Vec(1, 6953.0), + TDouble1Vec(1, 10090.0), + 0.0, + double(i), // normalised anomaly score + 0.1, + emptyString, + influences, + false, + true, + 1, + 100, + EMPTY_STRING_LIST); writer.acceptResult(result); } @@ -2284,18 +2153,17 @@ void CJsonOutputWriterTest::testPartitionScores() LOG_DEBUG(sstream.str()); - const rapidjson::Value &bucketWrapper = doc[rapidjson::SizeType(0)]; + const rapidjson::Value& bucketWrapper = doc[rapidjson::SizeType(0)]; CPPUNIT_ASSERT(bucketWrapper.HasMember("bucket")); - const rapidjson::Value &bucket = bucketWrapper["bucket"]; + const rapidjson::Value& bucket = bucketWrapper["bucket"]; CPPUNIT_ASSERT(bucket.HasMember("partition_scores")); - const rapidjson::Value &partitionScores = bucket["partition_scores"]; + const rapidjson::Value& partitionScores = bucket["partition_scores"]; CPPUNIT_ASSERT(partitionScores.IsArray()); CPPUNIT_ASSERT_EQUAL(rapidjson::SizeType(4), partitionScores.Size()); - for (rapidjson::SizeType i = 0; i < partitionScores.Size(); ++i) - { - const rapidjson::Value &pDoc = partitionScores[i]; + for (rapidjson::SizeType i = 0; i < partitionScores.Size(); ++i) { + const rapidjson::Value& pDoc = partitionScores[i]; CPPUNIT_ASSERT(pDoc.IsObject()); CPPUNIT_ASSERT(pDoc.HasMember("probability")); CPPUNIT_ASSERT_DOUBLES_EQUAL(0.1, pDoc["probability"].GetDouble(), 0.01); @@ -2307,8 +2175,7 @@ void CJsonOutputWriterTest::testPartitionScores() CPPUNIT_ASSERT(pDoc.HasMember("partition_field_name")); CPPUNIT_ASSERT_EQUAL(std::string("part1"), std::string(pDoc["partition_field_name"].GetString())); std::string fieldValue; - if (i > 0) - { + if (i > 0) { fieldValue = 'p' + ml::core::CStringUtils::typeToString(i); } CPPUNIT_ASSERT(pDoc.HasMember("partition_field_value")); @@ -2316,11 +2183,10 @@ void CJsonOutputWriterTest::testPartitionScores() } } -void CJsonOutputWriterTest::testReportMemoryUsage() -{ +void CJsonOutputWriterTest::testReportMemoryUsage() { std::ostringstream sstream; { - ml::core::CJsonOutputStreamWrapper outputStream (sstream); + ml::core::CJsonOutputStreamWrapper outputStream(sstream); ml::api::CJsonOutputWriter writer("job", outputStream); ml::model::CResourceMonitor::SResults resourceUsage; @@ -2341,9 +2207,9 @@ void CJsonOutputWriterTest::testReportMemoryUsage() rapidjson::Document doc; doc.Parse(sstream.str().c_str()); - const rapidjson::Value &resourceWrapper = doc[rapidjson::SizeType(0)]; + const rapidjson::Value& resourceWrapper = doc[rapidjson::SizeType(0)]; CPPUNIT_ASSERT(resourceWrapper.HasMember("model_size_stats")); - const rapidjson::Value &sizeStats = resourceWrapper["model_size_stats"]; + const rapidjson::Value& sizeStats = resourceWrapper["model_size_stats"]; CPPUNIT_ASSERT(sizeStats.HasMember("job_id")); CPPUNIT_ASSERT_EQUAL(std::string("job"), std::string(sizeStats["job_id"].GetString())); @@ -2367,8 +2233,7 @@ void CJsonOutputWriterTest::testReportMemoryUsage() CPPUNIT_ASSERT(nowMs + 1000ll >= sizeStats["log_time"].GetInt64()); } -void CJsonOutputWriterTest::testWriteScheduledEvent() -{ +void CJsonOutputWriterTest::testWriteScheduledEvent() { std::ostringstream sstream; { @@ -2382,61 +2247,60 @@ void CJsonOutputWriterTest::testWriteScheduledEvent() std::string emptyString; ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPrDoublePrVec influences; - ml::core::CJsonOutputStreamWrapper outputStream (sstream); + ml::core::CJsonOutputStreamWrapper outputStream(sstream); ml::api::CJsonOutputWriter writer("job", outputStream); // This result has no scheduled events ml::api::CHierarchicalResultsWriter::SResults result(ml::api::CHierarchicalResultsWriter::E_SimpleCountResult, - partitionFieldName, - partitionFieldValue, - byFieldName, - byFieldValue, - emptyString, - 100, - function, - functionDescription, - 42.0, - 79, - TDouble1Vec(1, 6953.0), - TDouble1Vec(1, 10090.0), - 0.0, - 0.1, - 0.1, - fieldName, - influences, - false, - true, - 1, - 100, - EMPTY_STRING_LIST); + partitionFieldName, + partitionFieldValue, + byFieldName, + byFieldValue, + emptyString, + 100, + function, + functionDescription, + 42.0, + 79, + TDouble1Vec(1, 6953.0), + TDouble1Vec(1, 10090.0), + 0.0, + 0.1, + 0.1, + fieldName, + influences, + false, + true, + 1, + 100, + EMPTY_STRING_LIST); CPPUNIT_ASSERT(writer.acceptResult(result)); // This result has 2 scheduled events std::vector eventDescriptions{"event-foo", "event-bar"}; ml::api::CHierarchicalResultsWriter::SResults result2(ml::api::CHierarchicalResultsWriter::E_SimpleCountResult, - partitionFieldName, - partitionFieldValue, - byFieldName, - byFieldValue, - emptyString, - 200, - function, - functionDescription, - 42.0, - 79, - TDouble1Vec(1, 6953.0), - TDouble1Vec(1, 10090.0), - 0.0, - 0.1, - 0.1, - fieldName, - influences, - false, - true, - 1, - 100, - eventDescriptions); - + partitionFieldName, + partitionFieldValue, + byFieldName, + byFieldValue, + emptyString, + 200, + function, + functionDescription, + 42.0, + 79, + TDouble1Vec(1, 6953.0), + TDouble1Vec(1, 10090.0), + 0.0, + 0.1, + 0.1, + fieldName, + influences, + false, + true, + 1, + 100, + eventDescriptions); CPPUNIT_ASSERT(writer.acceptResult(result2)); CPPUNIT_ASSERT(writer.endOutputBatch(false, 1U)); @@ -2458,37 +2322,32 @@ void CJsonOutputWriterTest::testWriteScheduledEvent() CPPUNIT_ASSERT(doc.IsArray()); CPPUNIT_ASSERT_EQUAL(rapidjson::SizeType(2), doc.Size()); // the first bucket has no events - const rapidjson::Value &bucket = doc[rapidjson::SizeType(1)]["bucket"]; + const rapidjson::Value& bucket = doc[rapidjson::SizeType(1)]["bucket"]; CPPUNIT_ASSERT(bucket.HasMember("scheduled_event") == false); - const rapidjson::Value &bucketWithEvents = doc[rapidjson::SizeType(1)]["bucket"]; + const rapidjson::Value& bucketWithEvents = doc[rapidjson::SizeType(1)]["bucket"]; CPPUNIT_ASSERT(bucketWithEvents.HasMember("scheduled_events")); - const rapidjson::Value &events = bucketWithEvents["scheduled_events"]; + const rapidjson::Value& events = bucketWithEvents["scheduled_events"]; CPPUNIT_ASSERT(events.IsArray()); CPPUNIT_ASSERT_EQUAL(rapidjson::SizeType(2), events.Size()); - CPPUNIT_ASSERT_EQUAL(std::string("event-foo"), - std::string(events[rapidjson::SizeType(0)].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string("event-bar"), - std::string(events[rapidjson::SizeType(1)].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("event-foo"), std::string(events[rapidjson::SizeType(0)].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("event-bar"), std::string(events[rapidjson::SizeType(1)].GetString())); } -void CJsonOutputWriterTest::testThroughputWithScopedAllocator() -{ +void CJsonOutputWriterTest::testThroughputWithScopedAllocator() { this->testThroughputHelper(true); } -void CJsonOutputWriterTest::testThroughputWithoutScopedAllocator() -{ +void CJsonOutputWriterTest::testThroughputWithoutScopedAllocator() { this->testThroughputHelper(false); } -void CJsonOutputWriterTest::testThroughputHelper(bool useScopedAllocator) -{ +void CJsonOutputWriterTest::testThroughputHelper(bool useScopedAllocator) { // Write to /dev/null (Unix) or nul (Windows) std::ofstream ofs(ml::core::COsFileFuncs::NULL_FILENAME); CPPUNIT_ASSERT(ofs.is_open()); - ml::core::CJsonOutputStreamWrapper outputStream (ofs); + ml::core::CJsonOutputStreamWrapper outputStream(ofs); ml::api::CJsonOutputWriter writer("job", outputStream); std::string partitionFieldName("tfn"); @@ -2505,131 +2364,126 @@ void CJsonOutputWriterTest::testThroughputHelper(bool useScopedAllocator) ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPrDoublePrVec influences; ml::api::CHierarchicalResultsWriter::SResults result11(false, - false, - partitionFieldName, - partitionFieldValue, - overFieldName, - overFieldValue, - byFieldName, - byFieldValue, - correlatedByFieldValue, - 1, - function, - functionDescription, - TDouble1Vec(1, 10090.0), - TDouble1Vec(1, 6953.0), - 2.24, - 0.5, - 0.0, - 79, - fieldName, - influences, - false, - false, - 1, - 100); + false, + partitionFieldName, + partitionFieldValue, + overFieldName, + overFieldValue, + byFieldName, + byFieldValue, + correlatedByFieldValue, + 1, + function, + functionDescription, + TDouble1Vec(1, 10090.0), + TDouble1Vec(1, 6953.0), + 2.24, + 0.5, + 0.0, + 79, + fieldName, + influences, + false, + false, + 1, + 100); ml::api::CHierarchicalResultsWriter::SResults result112(false, - true, - partitionFieldName, - partitionFieldValue, - overFieldName, - overFieldValue, - byFieldName, - byFieldValue, - correlatedByFieldValue, - 1, - function, - functionDescription, - TDouble1Vec(1, 10090.0), - TDouble1Vec(1, 6953.0), - 2.24, - 0.5, - 0.0, - 79, - fieldName, - influences, - false, - false, - 1, - 100); - - ml::api::CHierarchicalResultsWriter::SResults result12( - ml::api::CHierarchicalResultsWriter::E_Result, - partitionFieldName, - partitionFieldValue, - byFieldName, - byFieldValue, - correlatedByFieldValue, - 1, - function, - functionDescription, - 42.0, - 79, - TDouble1Vec(1, 6953.0), - TDouble1Vec(1, 10090.0), - 2.24, - 0.8, - 0.0, - fieldName, - influences, - false, - true, - 2, - 100, - EMPTY_STRING_LIST); - - ml::api::CHierarchicalResultsWriter::SResults result13( - ml::api::CHierarchicalResultsWriter::E_SimpleCountResult, - partitionFieldName, - partitionFieldValue, - byFieldName, - byFieldValue, - correlatedByFieldValue, - 1, - function, - functionDescription, - 42.0, - 79, - TDouble1Vec(1, 6953.0), - TDouble1Vec(1, 10090.0), - 2.24, - 0.5, - 0.0, - fieldName, - influences, - false, - false, - 3, - 100, - EMPTY_STRING_LIST); - - ml::api::CHierarchicalResultsWriter::SResults result14( - ml::api::CHierarchicalResultsWriter::E_Result, - partitionFieldName, - partitionFieldValue, - byFieldName, - byFieldValue, - correlatedByFieldValue, - 1, - function, - functionDescription, - 42.0, - 79, - TDouble1Vec(1, 6953.0), - TDouble1Vec(1, 10090.0), - 2.24, - 0.0, - 0.0, - fieldName, - influences, - false, - false, - 4, - 100, - EMPTY_STRING_LIST); - - + true, + partitionFieldName, + partitionFieldValue, + overFieldName, + overFieldValue, + byFieldName, + byFieldValue, + correlatedByFieldValue, + 1, + function, + functionDescription, + TDouble1Vec(1, 10090.0), + TDouble1Vec(1, 6953.0), + 2.24, + 0.5, + 0.0, + 79, + fieldName, + influences, + false, + false, + 1, + 100); + + ml::api::CHierarchicalResultsWriter::SResults result12(ml::api::CHierarchicalResultsWriter::E_Result, + partitionFieldName, + partitionFieldValue, + byFieldName, + byFieldValue, + correlatedByFieldValue, + 1, + function, + functionDescription, + 42.0, + 79, + TDouble1Vec(1, 6953.0), + TDouble1Vec(1, 10090.0), + 2.24, + 0.8, + 0.0, + fieldName, + influences, + false, + true, + 2, + 100, + EMPTY_STRING_LIST); + + ml::api::CHierarchicalResultsWriter::SResults result13(ml::api::CHierarchicalResultsWriter::E_SimpleCountResult, + partitionFieldName, + partitionFieldValue, + byFieldName, + byFieldValue, + correlatedByFieldValue, + 1, + function, + functionDescription, + 42.0, + 79, + TDouble1Vec(1, 6953.0), + TDouble1Vec(1, 10090.0), + 2.24, + 0.5, + 0.0, + fieldName, + influences, + false, + false, + 3, + 100, + EMPTY_STRING_LIST); + + ml::api::CHierarchicalResultsWriter::SResults result14(ml::api::CHierarchicalResultsWriter::E_Result, + partitionFieldName, + partitionFieldValue, + byFieldName, + byFieldValue, + correlatedByFieldValue, + 1, + function, + functionDescription, + 42.0, + 79, + TDouble1Vec(1, 6953.0), + TDouble1Vec(1, 10090.0), + 2.24, + 0.0, + 0.0, + fieldName, + influences, + false, + false, + 4, + 100, + EMPTY_STRING_LIST); // 1st bucket writer.acceptBucketTimeInfluencer(1, 0.01, 13.44, 70.0); @@ -2638,13 +2492,10 @@ void CJsonOutputWriterTest::testThroughputHelper(bool useScopedAllocator) static const size_t TEST_SIZE(75000); ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO("Starting throughput test at " << - ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO("Starting throughput test at " << ml::core::CTimeUtils::toTimeString(start)); - for (size_t count = 0; count < TEST_SIZE; ++count) - { - if (useScopedAllocator) - { + for (size_t count = 0; count < TEST_SIZE; ++count) { + if (useScopedAllocator) { using TScopedAllocator = ml::core::CScopedRapidJsonPoolAllocator; static const std::string ALLOCATOR_ID("CAnomalyJob::writeOutResults"); TScopedAllocator scopedAllocator(ALLOCATOR_ID, writer); @@ -2659,12 +2510,9 @@ void CJsonOutputWriterTest::testThroughputHelper(bool useScopedAllocator) CPPUNIT_ASSERT(writer.acceptResult(result14)); CPPUNIT_ASSERT(writer.acceptResult(result14)); - // Finished adding results CPPUNIT_ASSERT(writer.endOutputBatch(false, 1U)); - } - else - { + } else { CPPUNIT_ASSERT(writer.acceptResult(result11)); CPPUNIT_ASSERT(writer.acceptResult(result11)); CPPUNIT_ASSERT(writer.acceptResult(result112)); @@ -2675,16 +2523,13 @@ void CJsonOutputWriterTest::testThroughputHelper(bool useScopedAllocator) CPPUNIT_ASSERT(writer.acceptResult(result14)); CPPUNIT_ASSERT(writer.acceptResult(result14)); - // Finished adding results CPPUNIT_ASSERT(writer.endOutputBatch(false, 1U)); } } ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO("Finished throughput test at " << - ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO("Finished throughput test at " << ml::core::CTimeUtils::toTimeString(end)); - LOG_INFO("Writing " << TEST_SIZE << - " records took " << (end - start) << " seconds"); + LOG_INFO("Writing " << TEST_SIZE << " records took " << (end - start) << " seconds"); } diff --git a/lib/api/unittest/CJsonOutputWriterTest.h b/lib/api/unittest/CJsonOutputWriterTest.h index 1479e8d0c6..2d61a299ad 100644 --- a/lib/api/unittest/CJsonOutputWriterTest.h +++ b/lib/api/unittest/CJsonOutputWriterTest.h @@ -8,35 +8,32 @@ #include +class CJsonOutputWriterTest : public CppUnit::TestFixture { +public: + void testSimpleWrite(); + void testWriteNonAnomalousBucket(); + void testBucketWrite(); + void testBucketWriteInterim(); + void testLimitedRecordsWrite(); + void testLimitedRecordsWriteInterim(); + void testFlush(); + void testWriteCategoryDefinition(); + void testWriteWithInfluences(); + void testWriteInfluencers(); + void testWriteInfluencersWithLimit(); + void testPersistNormalizer(); + void testPartitionScores(); + void testReportMemoryUsage(); + void testWriteScheduledEvent(); + void testThroughputWithScopedAllocator(); + void testThroughputWithoutScopedAllocator(); -class CJsonOutputWriterTest : public CppUnit::TestFixture -{ - public: - void testSimpleWrite(); - void testWriteNonAnomalousBucket(); - void testBucketWrite(); - void testBucketWriteInterim(); - void testLimitedRecordsWrite(); - void testLimitedRecordsWriteInterim(); - void testFlush(); - void testWriteCategoryDefinition(); - void testWriteWithInfluences(); - void testWriteInfluencers(); - void testWriteInfluencersWithLimit(); - void testPersistNormalizer(); - void testPartitionScores(); - void testReportMemoryUsage(); - void testWriteScheduledEvent(); - void testThroughputWithScopedAllocator(); - void testThroughputWithoutScopedAllocator(); + static CppUnit::Test* suite(); - static CppUnit::Test *suite(); - - private: - void testBucketWriteHelper(bool isInterim); - void testLimitedRecordsWriteHelper(bool isInterim); - void testThroughputHelper(bool useScopedAllocator); +private: + void testBucketWriteHelper(bool isInterim); + void testLimitedRecordsWriteHelper(bool isInterim); + void testThroughputHelper(bool useScopedAllocator); }; #endif // INCLUDED_CJsonOutputWriterTest_h - diff --git a/lib/api/unittest/CLengthEncodedInputParserTest.cc b/lib/api/unittest/CLengthEncodedInputParserTest.cc index c55134a7f8..9f1666652c 100644 --- a/lib/api/unittest/CLengthEncodedInputParserTest.cc +++ b/lib/api/unittest/CLengthEncodedInputParserTest.cc @@ -12,8 +12,8 @@ #include #include -#include #include +#include #include #include @@ -24,174 +24,129 @@ #include #endif -CppUnit::Test *CLengthEncodedInputParserTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CLengthEncodedInputParserTest"); +CppUnit::Test* CLengthEncodedInputParserTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CLengthEncodedInputParserTest"); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLengthEncodedInputParserTest::testCsvEquivalence", - &CLengthEncodedInputParserTest::testCsvEquivalence) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLengthEncodedInputParserTest::testThroughput", - &CLengthEncodedInputParserTest::testThroughput) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLengthEncodedInputParserTest::testCorruptStreamDetection", - &CLengthEncodedInputParserTest::testCorruptStreamDetection) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CLengthEncodedInputParserTest::testCsvEquivalence", + &CLengthEncodedInputParserTest::testCsvEquivalence)); + suiteOfTests->addTest(new CppUnit::TestCaller("CLengthEncodedInputParserTest::testThroughput", + &CLengthEncodedInputParserTest::testThroughput)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLengthEncodedInputParserTest::testCorruptStreamDetection", &CLengthEncodedInputParserTest::testCorruptStreamDetection)); return suiteOfTests; } -namespace -{ - +namespace { -class CSetupVisitor -{ - public: - CSetupVisitor() - : m_RecordsPerBlock(0) - { - } - - //! Handle a record - bool operator()(const ml::api::CCsvInputParser::TStrStrUMap &dataRowFields) - { - if (m_EncodedFieldNames.empty()) - { - this->appendNumber(dataRowFields.size(), m_EncodedFieldNames); - for (const auto &entry : dataRowFields) - { - const std::string &fieldName = entry.first; - this->appendNumber(fieldName.length(), m_EncodedFieldNames); - m_EncodedFieldNames += fieldName; - } - } +class CSetupVisitor { +public: + CSetupVisitor() : m_RecordsPerBlock(0) {} - this->appendNumber(dataRowFields.size(), m_EncodedDataBlock); - for (const auto &entry : dataRowFields) - { - const std::string &fieldValue = entry.second; - this->appendNumber(fieldValue.length(), m_EncodedDataBlock); - m_EncodedDataBlock += fieldValue; + //! Handle a record + bool operator()(const ml::api::CCsvInputParser::TStrStrUMap& dataRowFields) { + if (m_EncodedFieldNames.empty()) { + this->appendNumber(dataRowFields.size(), m_EncodedFieldNames); + for (const auto& entry : dataRowFields) { + const std::string& fieldName = entry.first; + this->appendNumber(fieldName.length(), m_EncodedFieldNames); + m_EncodedFieldNames += fieldName; } - - ++m_RecordsPerBlock; - - return true; } - std::string input(size_t testSize) const - { - std::string str; - str.reserve(m_EncodedFieldNames.length() + - testSize * m_EncodedDataBlock.length()); - - // Assign like this to avoid GNU copy-on-write (which would defeat - // the preceding reserve) - str.assign(m_EncodedFieldNames, 0, m_EncodedFieldNames.length()); + this->appendNumber(dataRowFields.size(), m_EncodedDataBlock); + for (const auto& entry : dataRowFields) { + const std::string& fieldValue = entry.second; + this->appendNumber(fieldValue.length(), m_EncodedDataBlock); + m_EncodedDataBlock += fieldValue; + } - // Duplicate the binary data according to the test size - for (size_t count = 0; count < testSize; ++count) - { - str += m_EncodedDataBlock; - } + ++m_RecordsPerBlock; - LOG_DEBUG("Input size is " << str.length()); + return true; + } - return str; - } + std::string input(size_t testSize) const { + std::string str; + str.reserve(m_EncodedFieldNames.length() + testSize * m_EncodedDataBlock.length()); - size_t recordsPerBlock() const - { - return m_RecordsPerBlock; - } + // Assign like this to avoid GNU copy-on-write (which would defeat + // the preceding reserve) + str.assign(m_EncodedFieldNames, 0, m_EncodedFieldNames.length()); - private: - template - void appendNumber(NUM_TYPE num, std::string &str) - { - uint32_t netNum(htonl(static_cast(num))); - str.append(reinterpret_cast(&netNum), sizeof(netNum)); + // Duplicate the binary data according to the test size + for (size_t count = 0; count < testSize; ++count) { + str += m_EncodedDataBlock; } - private: - std::string m_EncodedFieldNames; - size_t m_RecordsPerBlock; - std::string m_EncodedDataBlock; -}; + LOG_DEBUG("Input size is " << str.length()); -class CVisitor -{ - public: - CVisitor() - : m_Fast(true), - m_RecordCount(0) - { - } + return str; + } - CVisitor(const ml::api::CCsvInputParser::TStrVec &expectedFieldNames) - : m_Fast(false), - m_RecordCount(0), - m_ExpectedFieldNames(expectedFieldNames) - { - } + size_t recordsPerBlock() const { return m_RecordsPerBlock; } - //! Handle a record - bool operator()(const ml::api::CLengthEncodedInputParser::TStrStrUMap &dataRowFields) - { - ++m_RecordCount; +private: + template + void appendNumber(NUM_TYPE num, std::string& str) { + uint32_t netNum(htonl(static_cast(num))); + str.append(reinterpret_cast(&netNum), sizeof(netNum)); + } - // For the throughput test, the assertions below will skew the - // results, so bypass them - if (m_Fast) - { - return true; - } +private: + std::string m_EncodedFieldNames; + size_t m_RecordsPerBlock; + std::string m_EncodedDataBlock; +}; - // Check the field names - CPPUNIT_ASSERT_EQUAL(m_ExpectedFieldNames.size(), dataRowFields.size()); - for (ml::api::CCsvInputParser::TStrStrUMapCItr iter = dataRowFields.begin(); - iter != dataRowFields.end(); - ++iter) - { - LOG_DEBUG("Field " << iter->first << " is " << iter->second); - CPPUNIT_ASSERT(std::find(m_ExpectedFieldNames.begin(), m_ExpectedFieldNames.end(), iter->first) - != m_ExpectedFieldNames.end()); - } +class CVisitor { +public: + CVisitor() : m_Fast(true), m_RecordCount(0) {} - // Check the line count is consistent with the _raw field - ml::api::CCsvInputParser::TStrStrUMapCItr rawIter = dataRowFields.find("_raw"); - CPPUNIT_ASSERT(rawIter != dataRowFields.end()); - ml::api::CCsvInputParser::TStrStrUMapCItr lineCountIter = dataRowFields.find("linecount"); - CPPUNIT_ASSERT(lineCountIter != dataRowFields.end()); + CVisitor(const ml::api::CCsvInputParser::TStrVec& expectedFieldNames) + : m_Fast(false), m_RecordCount(0), m_ExpectedFieldNames(expectedFieldNames) {} - size_t expectedLineCount(1 + std::count(rawIter->second.begin(), - rawIter->second.end(), - '\n')); - size_t lineCount(0); - CPPUNIT_ASSERT(ml::core::CStringUtils::stringToType(lineCountIter->second, lineCount)); - CPPUNIT_ASSERT_EQUAL(expectedLineCount, lineCount); + //! Handle a record + bool operator()(const ml::api::CLengthEncodedInputParser::TStrStrUMap& dataRowFields) { + ++m_RecordCount; + // For the throughput test, the assertions below will skew the + // results, so bypass them + if (m_Fast) { return true; } - size_t recordCount() const - { - return m_RecordCount; + // Check the field names + CPPUNIT_ASSERT_EQUAL(m_ExpectedFieldNames.size(), dataRowFields.size()); + for (ml::api::CCsvInputParser::TStrStrUMapCItr iter = dataRowFields.begin(); iter != dataRowFields.end(); ++iter) { + LOG_DEBUG("Field " << iter->first << " is " << iter->second); + CPPUNIT_ASSERT(std::find(m_ExpectedFieldNames.begin(), m_ExpectedFieldNames.end(), iter->first) != m_ExpectedFieldNames.end()); } - private: - bool m_Fast; - size_t m_RecordCount; - ml::api::CCsvInputParser::TStrVec m_ExpectedFieldNames; + // Check the line count is consistent with the _raw field + ml::api::CCsvInputParser::TStrStrUMapCItr rawIter = dataRowFields.find("_raw"); + CPPUNIT_ASSERT(rawIter != dataRowFields.end()); + ml::api::CCsvInputParser::TStrStrUMapCItr lineCountIter = dataRowFields.find("linecount"); + CPPUNIT_ASSERT(lineCountIter != dataRowFields.end()); -}; + size_t expectedLineCount(1 + std::count(rawIter->second.begin(), rawIter->second.end(), '\n')); + size_t lineCount(0); + CPPUNIT_ASSERT(ml::core::CStringUtils::stringToType(lineCountIter->second, lineCount)); + CPPUNIT_ASSERT_EQUAL(expectedLineCount, lineCount); + return true; + } + size_t recordCount() const { return m_RecordCount; } + +private: + bool m_Fast; + size_t m_RecordCount; + ml::api::CCsvInputParser::TStrVec m_ExpectedFieldNames; +}; } -void CLengthEncodedInputParserTest::testCsvEquivalence() -{ +void CLengthEncodedInputParserTest::testCsvEquivalence() { std::ifstream ifs("testfiles/simple.txt"); CPPUNIT_ASSERT(ifs.is_open()); @@ -202,8 +157,7 @@ void CLengthEncodedInputParserTest::testCsvEquivalence() CPPUNIT_ASSERT(setupParser.readStream(std::ref(setupVisitor))); // Input must be binary otherwise Windows will stop at CTRL+Z - std::istringstream input(setupVisitor.input(1), - std::ios::in | std::ios::binary); + std::istringstream input(setupVisitor.input(1), std::ios::in | std::ios::binary); ml::api::CLengthEncodedInputParser parser(input); @@ -242,8 +196,7 @@ void CLengthEncodedInputParserTest::testCsvEquivalence() CPPUNIT_ASSERT_EQUAL(size_t(15), visitor.recordCount()); } -void CLengthEncodedInputParserTest::testThroughput() -{ +void CLengthEncodedInputParserTest::testThroughput() { // NB: For fair comparison with the other input formats (CSV and Google // Protocol Buffers), the input data and test size must be identical @@ -261,41 +214,35 @@ void CLengthEncodedInputParserTest::testThroughput() // Construct a large test input static const size_t TEST_SIZE(10000); // Input must be binary otherwise Windows will stop at CTRL+Z - std::istringstream input(setupVisitor.input(TEST_SIZE), - std::ios::in | std::ios::binary); + std::istringstream input(setupVisitor.input(TEST_SIZE), std::ios::in | std::ios::binary); ml::api::CLengthEncodedInputParser parser(input); CVisitor visitor; ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO("Starting throughput test at " << - ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO("Starting throughput test at " << ml::core::CTimeUtils::toTimeString(start)); CPPUNIT_ASSERT(parser.readStream(std::ref(visitor))); ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO("Finished throughput test at " << - ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO("Finished throughput test at " << ml::core::CTimeUtils::toTimeString(end)); CPPUNIT_ASSERT_EQUAL(setupVisitor.recordsPerBlock() * TEST_SIZE, visitor.recordCount()); - LOG_INFO("Parsing " << visitor.recordCount() << - " records took " << (end - start) << " seconds"); + LOG_INFO("Parsing " << visitor.recordCount() << " records took " << (end - start) << " seconds"); } -void CLengthEncodedInputParserTest::testCorruptStreamDetection() -{ +void CLengthEncodedInputParserTest::testCorruptStreamDetection() { uint32_t numFields(1); uint32_t numFieldsNet(htonl(numFields)); - std::string dodgyInput(reinterpret_cast(&numFieldsNet), sizeof(uint32_t)); + std::string dodgyInput(reinterpret_cast(&numFieldsNet), sizeof(uint32_t)); // This is going to create a length field consisting of four 'a' characters // interpreted as a uint32_t dodgyInput.append(1000, 'a'); // Input must be binary otherwise Windows will stop at CTRL+Z - std::istringstream input(dodgyInput, - std::ios::in | std::ios::binary); + std::istringstream input(dodgyInput, std::ios::in | std::ios::binary); ml::api::CLengthEncodedInputParser parser(input); @@ -304,4 +251,3 @@ void CLengthEncodedInputParserTest::testCorruptStreamDetection() LOG_INFO("Expect the next parse to report a suspiciously long length"); CPPUNIT_ASSERT(!parser.readStream(std::ref(visitor))); } - diff --git a/lib/api/unittest/CLengthEncodedInputParserTest.h b/lib/api/unittest/CLengthEncodedInputParserTest.h index bc69ab7467..43823c41db 100644 --- a/lib/api/unittest/CLengthEncodedInputParserTest.h +++ b/lib/api/unittest/CLengthEncodedInputParserTest.h @@ -8,16 +8,13 @@ #include +class CLengthEncodedInputParserTest : public CppUnit::TestFixture { +public: + void testCsvEquivalence(); + void testThroughput(); + void testCorruptStreamDetection(); -class CLengthEncodedInputParserTest : public CppUnit::TestFixture -{ - public: - void testCsvEquivalence(); - void testThroughput(); - void testCorruptStreamDetection(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CLengthEncodedInputParserTest_h - diff --git a/lib/api/unittest/CLineifiedJsonInputParserTest.cc b/lib/api/unittest/CLineifiedJsonInputParserTest.cc index 781a35e21d..6ab32d1d7a 100644 --- a/lib/api/unittest/CLineifiedJsonInputParserTest.cc +++ b/lib/api/unittest/CLineifiedJsonInputParserTest.cc @@ -12,116 +12,87 @@ #include #include -#include #include +#include #include +CppUnit::Test* CLineifiedJsonInputParserTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CLineifiedJsonInputParserTest"); -CppUnit::Test *CLineifiedJsonInputParserTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CLineifiedJsonInputParserTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLineifiedJsonInputParserTest::testThroughputArbitrary", - &CLineifiedJsonInputParserTest::testThroughputArbitrary) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLineifiedJsonInputParserTest::testThroughputCommon", - &CLineifiedJsonInputParserTest::testThroughputCommon) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CLineifiedJsonInputParserTest::testThroughputArbitrary", + &CLineifiedJsonInputParserTest::testThroughputArbitrary)); + suiteOfTests->addTest(new CppUnit::TestCaller("CLineifiedJsonInputParserTest::testThroughputCommon", + &CLineifiedJsonInputParserTest::testThroughputCommon)); return suiteOfTests; } -namespace -{ +namespace { +class CSetupVisitor { +public: + CSetupVisitor() : m_RecordsPerBlock(0) {} -class CSetupVisitor -{ - public: - CSetupVisitor() - : m_RecordsPerBlock(0) - { - } + //! Handle a record + bool operator()(const ml::api::CCsvInputParser::TStrStrUMap& dataRowFields) { + ++m_RecordsPerBlock; - //! Handle a record - bool operator()(const ml::api::CCsvInputParser::TStrStrUMap &dataRowFields) - { - ++m_RecordsPerBlock; + CPPUNIT_ASSERT(m_OutputWriter.writeRow(dataRowFields)); - CPPUNIT_ASSERT(m_OutputWriter.writeRow(dataRowFields)); + return true; + } - return true; - } + std::string input(size_t testSize) const { + const std::string& block = m_OutputWriter.internalString(); - std::string input(size_t testSize) const - { - const std::string &block = m_OutputWriter.internalString(); + std::string str; + str.reserve(testSize * block.length()); - std::string str; - str.reserve(testSize * block.length()); + // Duplicate the binary data according to the test size + for (size_t count = 0; count < testSize; ++count) { + str.append(block); + } - // Duplicate the binary data according to the test size - for (size_t count = 0; count < testSize; ++count) - { - str.append(block); - } + LOG_DEBUG("Input size is " << str.length()); - LOG_DEBUG("Input size is " << str.length()); + return str; + } - return str; - } + size_t recordsPerBlock() const { return m_RecordsPerBlock; } - size_t recordsPerBlock() const - { - return m_RecordsPerBlock; - } - - private: - size_t m_RecordsPerBlock; - ml::api::CLineifiedJsonOutputWriter m_OutputWriter; +private: + size_t m_RecordsPerBlock; + ml::api::CLineifiedJsonOutputWriter m_OutputWriter; }; -class CVisitor -{ - public: - CVisitor() - : m_RecordCount(0) - { - } +class CVisitor { +public: + CVisitor() : m_RecordCount(0) {} - //! Handle a record - bool operator()(const ml::api::CLineifiedJsonInputParser::TStrStrUMap &/*dataRowFields*/) - { - ++m_RecordCount; - return true; - } + //! Handle a record + bool operator()(const ml::api::CLineifiedJsonInputParser::TStrStrUMap& /*dataRowFields*/) { + ++m_RecordCount; + return true; + } - size_t recordCount() const - { - return m_RecordCount; - } + size_t recordCount() const { return m_RecordCount; } - private: - size_t m_RecordCount; +private: + size_t m_RecordCount; }; - - } -void CLineifiedJsonInputParserTest::testThroughputArbitrary() -{ +void CLineifiedJsonInputParserTest::testThroughputArbitrary() { LOG_INFO("Testing assuming arbitrary fields in JSON documents"); this->runTest(false); } -void CLineifiedJsonInputParserTest::testThroughputCommon() -{ +void CLineifiedJsonInputParserTest::testThroughputCommon() { LOG_INFO("Testing assuming all JSON documents have the same fields"); this->runTest(true); } -void CLineifiedJsonInputParserTest::runTest(bool allDocsSameStructure) -{ +void CLineifiedJsonInputParserTest::runTest(bool allDocsSameStructure) { // NB: For fair comparison with the other input formats (CSV and Google // Protocol Buffers), the input data and test size must be identical @@ -140,24 +111,19 @@ void CLineifiedJsonInputParserTest::runTest(bool allDocsSameStructure) static const size_t TEST_SIZE(5000); std::istringstream input(setupVisitor.input(TEST_SIZE)); - ml::api::CLineifiedJsonInputParser parser(input, - allDocsSameStructure); + ml::api::CLineifiedJsonInputParser parser(input, allDocsSameStructure); CVisitor visitor; ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO("Starting throughput test at " << - ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO("Starting throughput test at " << ml::core::CTimeUtils::toTimeString(start)); CPPUNIT_ASSERT(parser.readStream(std::ref(visitor))); ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO("Finished throughput test at " << - ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO("Finished throughput test at " << ml::core::CTimeUtils::toTimeString(end)); CPPUNIT_ASSERT_EQUAL(setupVisitor.recordsPerBlock() * TEST_SIZE, visitor.recordCount()); - LOG_INFO("Parsing " << visitor.recordCount() << - " records took " << (end - start) << " seconds"); + LOG_INFO("Parsing " << visitor.recordCount() << " records took " << (end - start) << " seconds"); } - diff --git a/lib/api/unittest/CLineifiedJsonInputParserTest.h b/lib/api/unittest/CLineifiedJsonInputParserTest.h index a96b58bfca..e00efe1a3e 100644 --- a/lib/api/unittest/CLineifiedJsonInputParserTest.h +++ b/lib/api/unittest/CLineifiedJsonInputParserTest.h @@ -8,18 +8,15 @@ #include +class CLineifiedJsonInputParserTest : public CppUnit::TestFixture { +public: + void testThroughputArbitrary(); + void testThroughputCommon(); -class CLineifiedJsonInputParserTest : public CppUnit::TestFixture -{ - public: - void testThroughputArbitrary(); - void testThroughputCommon(); + static CppUnit::Test* suite(); - static CppUnit::Test *suite(); - - private: - void runTest(bool allDocsSameStructure); +private: + void runTest(bool allDocsSameStructure); }; #endif // INCLUDED_CLineifiedJsonInputParserTest_h - diff --git a/lib/api/unittest/CLineifiedJsonOutputWriterTest.cc b/lib/api/unittest/CLineifiedJsonOutputWriterTest.cc index 68e4f0b18b..dd4f63201f 100644 --- a/lib/api/unittest/CLineifiedJsonOutputWriterTest.cc +++ b/lib/api/unittest/CLineifiedJsonOutputWriterTest.cc @@ -11,23 +11,18 @@ #include +CppUnit::Test* CLineifiedJsonOutputWriterTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CLineifiedJsonOutputWriterTest"); -CppUnit::Test *CLineifiedJsonOutputWriterTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CLineifiedJsonOutputWriterTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLineifiedJsonOutputWriterTest::testStringOutput", - &CLineifiedJsonOutputWriterTest::testStringOutput) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLineifiedJsonOutputWriterTest::testNumericOutput", - &CLineifiedJsonOutputWriterTest::testNumericOutput) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CLineifiedJsonOutputWriterTest::testStringOutput", + &CLineifiedJsonOutputWriterTest::testStringOutput)); + suiteOfTests->addTest(new CppUnit::TestCaller("CLineifiedJsonOutputWriterTest::testNumericOutput", + &CLineifiedJsonOutputWriterTest::testNumericOutput)); return suiteOfTests; } -void CLineifiedJsonOutputWriterTest::testStringOutput() -{ +void CLineifiedJsonOutputWriterTest::testStringOutput() { ml::api::CLineifiedJsonOutputWriter::TStrStrUMap dataRowFields; dataRowFields["probability"] = "0.01"; dataRowFields["normalized_score"] = "2.2"; @@ -37,24 +32,22 @@ void CLineifiedJsonOutputWriterTest::testStringOutput() ml::api::CLineifiedJsonOutputWriter writer; CPPUNIT_ASSERT(writer.writeRow(dataRowFields, overrideDataRowFields)); - const std::string &output = writer.internalString(); + const std::string& output = writer.internalString(); CPPUNIT_ASSERT_EQUAL(std::string("{\"probability\":\"0.01\",\"normalized_score\":\"3.3\"}\n"), output); } -void CLineifiedJsonOutputWriterTest::testNumericOutput() -{ +void CLineifiedJsonOutputWriterTest::testNumericOutput() { ml::api::CLineifiedJsonOutputWriter::TStrStrUMap dataRowFields; dataRowFields["probability"] = "0.01"; dataRowFields["normalized_score"] = "2.2"; ml::api::CLineifiedJsonOutputWriter::TStrStrUMap overrideDataRowFields; overrideDataRowFields["normalized_score"] = "3.3"; - ml::api::CLineifiedJsonOutputWriter writer({ "probability", "normalized_score" }); + ml::api::CLineifiedJsonOutputWriter writer({"probability", "normalized_score"}); CPPUNIT_ASSERT(writer.writeRow(dataRowFields, overrideDataRowFields)); - const std::string &output = writer.internalString(); + const std::string& output = writer.internalString(); CPPUNIT_ASSERT_EQUAL(std::string("{\"probability\":0.01,\"normalized_score\":3.3}\n"), output); } - diff --git a/lib/api/unittest/CLineifiedJsonOutputWriterTest.h b/lib/api/unittest/CLineifiedJsonOutputWriterTest.h index d8ad8b8bb8..2f0298e3c6 100644 --- a/lib/api/unittest/CLineifiedJsonOutputWriterTest.h +++ b/lib/api/unittest/CLineifiedJsonOutputWriterTest.h @@ -8,15 +8,12 @@ #include +class CLineifiedJsonOutputWriterTest : public CppUnit::TestFixture { +public: + void testStringOutput(); + void testNumericOutput(); -class CLineifiedJsonOutputWriterTest : public CppUnit::TestFixture -{ - public: - void testStringOutput(); - void testNumericOutput(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CLineifiedJsonOutputWriterTest_h - diff --git a/lib/api/unittest/CLineifiedXmlInputParserTest.cc b/lib/api/unittest/CLineifiedXmlInputParserTest.cc index ac2131863c..5545420550 100644 --- a/lib/api/unittest/CLineifiedXmlInputParserTest.cc +++ b/lib/api/unittest/CLineifiedXmlInputParserTest.cc @@ -14,135 +14,102 @@ #include #include -#include #include +#include #include +CppUnit::Test* CLineifiedXmlInputParserTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CLineifiedXmlInputParserTest"); -CppUnit::Test *CLineifiedXmlInputParserTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CLineifiedXmlInputParserTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLineifiedXmlInputParserTest::testThroughputArbitraryConformant", - &CLineifiedXmlInputParserTest::testThroughputArbitraryConformant) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLineifiedXmlInputParserTest::testThroughputCommonConformant", - &CLineifiedXmlInputParserTest::testThroughputCommonConformant) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLineifiedXmlInputParserTest::testThroughputArbitraryRapid", - &CLineifiedXmlInputParserTest::testThroughputArbitraryRapid) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLineifiedXmlInputParserTest::testThroughputCommonRapid", - &CLineifiedXmlInputParserTest::testThroughputCommonRapid) ); + suiteOfTests->addTest( + new CppUnit::TestCaller("CLineifiedXmlInputParserTest::testThroughputArbitraryConformant", + &CLineifiedXmlInputParserTest::testThroughputArbitraryConformant)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLineifiedXmlInputParserTest::testThroughputCommonConformant", &CLineifiedXmlInputParserTest::testThroughputCommonConformant)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLineifiedXmlInputParserTest::testThroughputArbitraryRapid", &CLineifiedXmlInputParserTest::testThroughputArbitraryRapid)); + suiteOfTests->addTest(new CppUnit::TestCaller("CLineifiedXmlInputParserTest::testThroughputCommonRapid", + &CLineifiedXmlInputParserTest::testThroughputCommonRapid)); return suiteOfTests; } -namespace -{ +namespace { +class CSetupVisitor { +public: + CSetupVisitor() : m_RecordsPerBlock(0), m_OutputWriter("root") {} -class CSetupVisitor -{ - public: - CSetupVisitor() - : m_RecordsPerBlock(0), - m_OutputWriter("root") - { - } + //! Handle a record + bool operator()(const ml::api::CCsvInputParser::TStrStrUMap& dataRowFields) { + ++m_RecordsPerBlock; + CPPUNIT_ASSERT(m_OutputWriter.writeRow(dataRowFields)); - //! Handle a record - bool operator()(const ml::api::CCsvInputParser::TStrStrUMap &dataRowFields) - { - ++m_RecordsPerBlock; - CPPUNIT_ASSERT(m_OutputWriter.writeRow(dataRowFields)); + return true; + } - return true; - } + std::string input(size_t testSize) const { + const std::string& block = m_OutputWriter.internalString(); - std::string input(size_t testSize) const - { - const std::string &block = m_OutputWriter.internalString(); + std::string str; + str.reserve(testSize * block.length()); - std::string str; - str.reserve(testSize * block.length()); + // Duplicate the binary data according to the test size + for (size_t count = 0; count < testSize; ++count) { + str.append(block); + } - // Duplicate the binary data according to the test size - for (size_t count = 0; count < testSize; ++count) - { - str.append(block); - } + LOG_DEBUG("Input size is " << str.length()); - LOG_DEBUG("Input size is " << str.length()); + return str; + } - return str; - } + size_t recordsPerBlock() const { return m_RecordsPerBlock; } - size_t recordsPerBlock() const - { - return m_RecordsPerBlock; - } - - private: - size_t m_RecordsPerBlock; - ml::api::CLineifiedXmlOutputWriter m_OutputWriter; +private: + size_t m_RecordsPerBlock; + ml::api::CLineifiedXmlOutputWriter m_OutputWriter; }; -class CVisitor -{ - public: - CVisitor() - : m_RecordCount(0) - { - } +class CVisitor { +public: + CVisitor() : m_RecordCount(0) {} - //! Handle a record - bool operator()(const ml::api::CLineifiedXmlInputParser::TStrStrUMap &/*dataRowFields*/) - { - ++m_RecordCount; - return true; - } + //! Handle a record + bool operator()(const ml::api::CLineifiedXmlInputParser::TStrStrUMap& /*dataRowFields*/) { + ++m_RecordCount; + return true; + } - size_t recordCount() const - { - return m_RecordCount; - } + size_t recordCount() const { return m_RecordCount; } - private: - size_t m_RecordCount; +private: + size_t m_RecordCount; }; - - } -void CLineifiedXmlInputParserTest::testThroughputArbitraryConformant() -{ +void CLineifiedXmlInputParserTest::testThroughputArbitraryConformant() { LOG_INFO("Testing using a standards-conformant XML parser assuming arbitrary fields in XML documents"); this->runTest(false); } -void CLineifiedXmlInputParserTest::testThroughputCommonConformant() -{ +void CLineifiedXmlInputParserTest::testThroughputCommonConformant() { LOG_INFO("Testing using a standards-conformant XML parser assuming all XML documents have the same fields"); this->runTest(true); } -void CLineifiedXmlInputParserTest::testThroughputArbitraryRapid() -{ +void CLineifiedXmlInputParserTest::testThroughputArbitraryRapid() { LOG_INFO("Testing using a rapid XML parser assuming arbitrary fields in XML documents"); this->runTest(false); } -void CLineifiedXmlInputParserTest::testThroughputCommonRapid() -{ +void CLineifiedXmlInputParserTest::testThroughputCommonRapid() { LOG_INFO("Testing using a rapid XML parser assuming all XML documents have the same fields"); this->runTest(true); } -template -void CLineifiedXmlInputParserTest::runTest(bool allDocsSameStructure) -{ +template +void CLineifiedXmlInputParserTest::runTest(bool allDocsSameStructure) { // NB: For fair comparison with the other input formats (CSV and Google // Protocol Buffers), the input data and test size must be identical @@ -162,25 +129,19 @@ void CLineifiedXmlInputParserTest::runTest(bool allDocsSameStructure) std::istringstream input(setupVisitor.input(TEST_SIZE)); PARSER underlyingParser; - ml::api::CLineifiedXmlInputParser parser(underlyingParser, - input, - allDocsSameStructure); + ml::api::CLineifiedXmlInputParser parser(underlyingParser, input, allDocsSameStructure); CVisitor visitor; ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO("Starting throughput test at " << - ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO("Starting throughput test at " << ml::core::CTimeUtils::toTimeString(start)); CPPUNIT_ASSERT(parser.readStream(std::ref(visitor))); ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO("Finished throughput test at " << - ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO("Finished throughput test at " << ml::core::CTimeUtils::toTimeString(end)); CPPUNIT_ASSERT_EQUAL(setupVisitor.recordsPerBlock() * TEST_SIZE, visitor.recordCount()); - LOG_INFO("Parsing " << visitor.recordCount() << - " records took " << (end - start) << " seconds"); + LOG_INFO("Parsing " << visitor.recordCount() << " records took " << (end - start) << " seconds"); } - diff --git a/lib/api/unittest/CLineifiedXmlInputParserTest.h b/lib/api/unittest/CLineifiedXmlInputParserTest.h index e6e4af882f..35e79f56f5 100644 --- a/lib/api/unittest/CLineifiedXmlInputParserTest.h +++ b/lib/api/unittest/CLineifiedXmlInputParserTest.h @@ -8,21 +8,18 @@ #include +class CLineifiedXmlInputParserTest : public CppUnit::TestFixture { +public: + void testThroughputArbitraryConformant(); + void testThroughputCommonConformant(); + void testThroughputArbitraryRapid(); + void testThroughputCommonRapid(); -class CLineifiedXmlInputParserTest : public CppUnit::TestFixture -{ - public: - void testThroughputArbitraryConformant(); - void testThroughputCommonConformant(); - void testThroughputArbitraryRapid(); - void testThroughputCommonRapid(); + static CppUnit::Test* suite(); - static CppUnit::Test *suite(); - - private: - template - void runTest(bool allDocsSameStructure); +private: + template + void runTest(bool allDocsSameStructure); }; #endif // INCLUDED_CLineifiedXmlInputParserTest_h - diff --git a/lib/api/unittest/CMockDataAdder.cc b/lib/api/unittest/CMockDataAdder.cc index e4d9e63816..ea84ab6e5c 100644 --- a/lib/api/unittest/CMockDataAdder.cc +++ b/lib/api/unittest/CMockDataAdder.cc @@ -10,36 +10,26 @@ #include "CMockSearcher.h" - -CMockDataAdder::CMockDataAdder() -{ +CMockDataAdder::CMockDataAdder() { } -CMockDataAdder::TOStreamP CMockDataAdder::addStreamed(const std::string &index, - const std::string &/*id*/) -{ +CMockDataAdder::TOStreamP CMockDataAdder::addStreamed(const std::string& index, const std::string& /*id*/) { LOG_TRACE("Add Streamed for index " << index); - if (m_Streams.find(index) == m_Streams.end()) - { + if (m_Streams.find(index) == m_Streams.end()) { m_Streams[index] = TOStreamP(new std::ostringstream); } return m_Streams[index]; } -bool CMockDataAdder::streamComplete(TOStreamP &strm, - bool /*force*/) -{ +bool CMockDataAdder::streamComplete(TOStreamP& strm, bool /*force*/) { LOG_TRACE("Stream complete"); bool found = false; - for (TStrOStreamPMapItr i = m_Streams.begin(); i != m_Streams.end(); ++i) - { - if (i->second == strm) - { + for (TStrOStreamPMapItr i = m_Streams.begin(); i != m_Streams.end(); ++i) { + if (i->second == strm) { LOG_TRACE("Found stream for " << i->first); - std::ostringstream *ss = dynamic_cast(i->second.get()); - if (ss != 0) - { - const std::string &result = ss->str(); + std::ostringstream* ss = dynamic_cast(i->second.get()); + if (ss != 0) { + const std::string& result = ss->str(); LOG_TRACE("Adding data: " << result); m_Events[i->first].push_back('[' + result + ']'); found = true; @@ -49,14 +39,11 @@ bool CMockDataAdder::streamComplete(TOStreamP &strm, return found; } -const CMockDataAdder::TStrStrVecMap &CMockDataAdder::events() const -{ +const CMockDataAdder::TStrStrVecMap& CMockDataAdder::events() const { return m_Events; } -void CMockDataAdder::clear() -{ +void CMockDataAdder::clear() { m_Events.clear(); m_Streams.clear(); } - diff --git a/lib/api/unittest/CMockDataAdder.h b/lib/api/unittest/CMockDataAdder.h index 9bc7258088..79ea7a90a9 100644 --- a/lib/api/unittest/CMockDataAdder.h +++ b/lib/api/unittest/CMockDataAdder.h @@ -12,7 +12,6 @@ #include #include - //! \brief //! Mock data adder for unit testing. //! @@ -22,47 +21,43 @@ //! //! IMPLEMENTATION DECISIONS:\n //! -class CMockDataAdder : public ml::core::CDataAdder -{ - public: - using TStrVec = std::vector; - using TStrStrVecMap = std::map; - using TStrStrVecMapCItr = TStrStrVecMap::const_iterator; - using TStrOStreamPMap = std::map; - using TStrOStreamPMapCItr = TStrOStreamPMap::const_iterator; - using TStrOStreamPMapItr = TStrOStreamPMap::iterator; - - public: - CMockDataAdder(); - - //! Add streamed data - return of NULL stream indicates failure. - //! Since the data to be written isn't known at the time this function - //! returns it is not possible to detect all error conditions - //! immediately. If the stream goes bad whilst being written to then - //! this also indicates failure. - virtual TOStreamP addStreamed(const std::string &index, - const std::string &id); - - //! Clients that get a stream using addStreamed() must call this - //! method one they've finished sending data to the stream. - //! They should set force to true when the very last stream is - //! complete, in case the persister needs to close off some - //! sort of cached data structure. - virtual bool streamComplete(TOStreamP &strm, - bool force); - - //! Access persisted events - const TStrStrVecMap &events() const; - - //! Wipe the contents of the data store - void clear(); - - private: - //! Persisted events - TStrStrVecMap m_Events; - - TStrOStreamPMap m_Streams; +class CMockDataAdder : public ml::core::CDataAdder { +public: + using TStrVec = std::vector; + using TStrStrVecMap = std::map; + using TStrStrVecMapCItr = TStrStrVecMap::const_iterator; + using TStrOStreamPMap = std::map; + using TStrOStreamPMapCItr = TStrOStreamPMap::const_iterator; + using TStrOStreamPMapItr = TStrOStreamPMap::iterator; + +public: + CMockDataAdder(); + + //! Add streamed data - return of NULL stream indicates failure. + //! Since the data to be written isn't known at the time this function + //! returns it is not possible to detect all error conditions + //! immediately. If the stream goes bad whilst being written to then + //! this also indicates failure. + virtual TOStreamP addStreamed(const std::string& index, const std::string& id); + + //! Clients that get a stream using addStreamed() must call this + //! method one they've finished sending data to the stream. + //! They should set force to true when the very last stream is + //! complete, in case the persister needs to close off some + //! sort of cached data structure. + virtual bool streamComplete(TOStreamP& strm, bool force); + + //! Access persisted events + const TStrStrVecMap& events() const; + + //! Wipe the contents of the data store + void clear(); + +private: + //! Persisted events + TStrStrVecMap m_Events; + + TStrOStreamPMap m_Streams; }; #endif // INCLUDED_CMockDataAdder_h - diff --git a/lib/api/unittest/CMockDataProcessor.cc b/lib/api/unittest/CMockDataProcessor.cc index b4d293c2a3..16ccf002f8 100644 --- a/lib/api/unittest/CMockDataProcessor.cc +++ b/lib/api/unittest/CMockDataProcessor.cc @@ -9,42 +9,31 @@ #include - -CMockDataProcessor::CMockDataProcessor(ml::api::COutputHandler &outputHandler) - : m_OutputHandler(outputHandler), - m_NumRecordsHandled(0), - m_WriteFieldNames(true) -{ +CMockDataProcessor::CMockDataProcessor(ml::api::COutputHandler& outputHandler) + : m_OutputHandler(outputHandler), m_NumRecordsHandled(0), m_WriteFieldNames(true) { } -void CMockDataProcessor::newOutputStream() -{ +void CMockDataProcessor::newOutputStream() { m_OutputHandler.newOutputStream(); } -bool CMockDataProcessor::handleRecord(const TStrStrUMap &dataRowFields) -{ +bool CMockDataProcessor::handleRecord(const TStrStrUMap& dataRowFields) { // First time through we output the field names - if (m_WriteFieldNames) - { + if (m_WriteFieldNames) { TStrVec fieldNames; fieldNames.reserve(dataRowFields.size()); - for (const auto &entry : dataRowFields) - { + for (const auto& entry : dataRowFields) { fieldNames.push_back(entry.first); } - if (m_OutputHandler.fieldNames(fieldNames) == false) - { - LOG_ERROR("Unable to set field names for output:\n" << - this->debugPrintRecord(dataRowFields)); + if (m_OutputHandler.fieldNames(fieldNames) == false) { + LOG_ERROR("Unable to set field names for output:\n" << this->debugPrintRecord(dataRowFields)); return false; } m_WriteFieldNames = false; } - if (m_OutputHandler.writeRow(dataRowFields, m_FieldOverrides) == false) - { + if (m_OutputHandler.writeRow(dataRowFields, m_FieldOverrides) == false) { LOG_ERROR("Unable to write output"); return false; } @@ -54,41 +43,31 @@ bool CMockDataProcessor::handleRecord(const TStrStrUMap &dataRowFields) return true; } -void CMockDataProcessor::finalise() -{ +void CMockDataProcessor::finalise() { } -bool CMockDataProcessor::restoreState(ml::core::CDataSearcher &restoreSearcher, - ml::core_t::TTime &completeToTime) -{ +bool CMockDataProcessor::restoreState(ml::core::CDataSearcher& restoreSearcher, ml::core_t::TTime& completeToTime) { // Pass on the request in case we're chained - if (m_OutputHandler.restoreState(restoreSearcher, - completeToTime) == false) - { + if (m_OutputHandler.restoreState(restoreSearcher, completeToTime) == false) { return false; } return true; } -bool CMockDataProcessor::persistState(ml::core::CDataAdder &persister) -{ +bool CMockDataProcessor::persistState(ml::core::CDataAdder& persister) { // Pass on the request in case we're chained - if (m_OutputHandler.persistState(persister) == false) - { + if (m_OutputHandler.persistState(persister) == false) { return false; } return true; } -uint64_t CMockDataProcessor::numRecordsHandled() const -{ +uint64_t CMockDataProcessor::numRecordsHandled() const { return m_NumRecordsHandled; } -ml::api::COutputHandler &CMockDataProcessor::outputHandler() -{ +ml::api::COutputHandler& CMockDataProcessor::outputHandler() { return m_OutputHandler; } - diff --git a/lib/api/unittest/CMockDataProcessor.h b/lib/api/unittest/CMockDataProcessor.h index d30d0907ec..76b6d69843 100644 --- a/lib/api/unittest/CMockDataProcessor.h +++ b/lib/api/unittest/CMockDataProcessor.h @@ -14,11 +14,8 @@ #include - -namespace ml -{ -namespace api -{ +namespace ml { +namespace api { class COutputHandler; } } @@ -32,42 +29,38 @@ class COutputHandler; //! IMPLEMENTATION DECISIONS:\n //! Only the minimal set of required functions are implemented. //! -class CMockDataProcessor : public ml::api::CDataProcessor -{ - public: - CMockDataProcessor(ml::api::COutputHandler &outputHandler); +class CMockDataProcessor : public ml::api::CDataProcessor { +public: + CMockDataProcessor(ml::api::COutputHandler& outputHandler); - //! We're going to be writing to a new output stream - virtual void newOutputStream(); + //! We're going to be writing to a new output stream + virtual void newOutputStream(); - virtual bool handleRecord(const TStrStrUMap &dataRowFields); + virtual bool handleRecord(const TStrStrUMap& dataRowFields); - virtual void finalise(); + virtual void finalise(); - //! Restore previously saved state - virtual bool restoreState(ml::core::CDataSearcher &restoreSearcher, - ml::core_t::TTime &completeToTime); + //! Restore previously saved state + virtual bool restoreState(ml::core::CDataSearcher& restoreSearcher, ml::core_t::TTime& completeToTime); - //! Persist current state - virtual bool persistState(ml::core::CDataAdder &persister); + //! Persist current state + virtual bool persistState(ml::core::CDataAdder& persister); - //! How many records did we handle? - virtual uint64_t numRecordsHandled() const; + //! How many records did we handle? + virtual uint64_t numRecordsHandled() const; - //! Access the output handler - virtual ml::api::COutputHandler &outputHandler(); + //! Access the output handler + virtual ml::api::COutputHandler& outputHandler(); - private: - ml::api::COutputHandler &m_OutputHandler; +private: + ml::api::COutputHandler& m_OutputHandler; - //! Empty field overrides - TStrStrUMap m_FieldOverrides; + //! Empty field overrides + TStrStrUMap m_FieldOverrides; - uint64_t m_NumRecordsHandled; + uint64_t m_NumRecordsHandled; - bool m_WriteFieldNames; + bool m_WriteFieldNames; }; - #endif // INCLUDED_ml_api_CMockDataProcessor_h - diff --git a/lib/api/unittest/CMockSearcher.cc b/lib/api/unittest/CMockSearcher.cc index cf1b3f4887..8166711dc5 100644 --- a/lib/api/unittest/CMockSearcher.cc +++ b/lib/api/unittest/CMockSearcher.cc @@ -9,15 +9,11 @@ #include "CMockDataAdder.h" -CMockSearcher::CMockSearcher(const CMockDataAdder &mockDataAdder) : - m_MockDataAdder(mockDataAdder) -{ +CMockSearcher::CMockSearcher(const CMockDataAdder& mockDataAdder) : m_MockDataAdder(mockDataAdder) { } -CMockSearcher::TIStreamP CMockSearcher::search(size_t currentDocNum, size_t /*limit*/) -{ - if (currentDocNum == 0) - { +CMockSearcher::TIStreamP CMockSearcher::search(size_t currentDocNum, size_t /*limit*/) { + if (currentDocNum == 0) { LOG_ERROR("Current doc number cannot be 0 - data store requires 1-based numbers"); return TIStreamP(); } @@ -26,23 +22,16 @@ CMockSearcher::TIStreamP CMockSearcher::search(size_t currentDocNum, size_t /*li const CMockDataAdder::TStrStrVecMap events = m_MockDataAdder.events(); CMockDataAdder::TStrStrVecMapCItr iter = events.find(m_SearchTerms[0]); - if (iter == events.end()) - { + if (iter == events.end()) { LOG_TRACE("Can't find search " << m_SearchTerms[0]); stream.reset(new std::stringstream("{}")); - } - else - { + } else { LOG_TRACE("Got search data for " << m_SearchTerms[0]); - if (currentDocNum > iter->second.size()) - { + if (currentDocNum > iter->second.size()) { stream.reset(new std::stringstream("[ ]")); - } - else - { + } else { stream.reset(new std::stringstream(iter->second[currentDocNum - 1])); } } return stream; } - diff --git a/lib/api/unittest/CMockSearcher.h b/lib/api/unittest/CMockSearcher.h index 238f4f5717..d955737a61 100644 --- a/lib/api/unittest/CMockSearcher.h +++ b/lib/api/unittest/CMockSearcher.h @@ -22,19 +22,18 @@ class CMockDataAdder; //! appear to be for the searched index. The actual search string is NOT //! properly applied. This is OK for the current scope of the unit testing. //! -class CMockSearcher : public ml::core::CDataSearcher -{ - public: - CMockSearcher(const CMockDataAdder &mockDataAdder); +class CMockSearcher : public ml::core::CDataSearcher { +public: + CMockSearcher(const CMockDataAdder& mockDataAdder); - //! Do a search that results in an input stream. - //! A return value of NULL indicates a technical problem with the - //! creation of the stream. Other errors may be indicated by the - //! returned stream going into the "bad" state. - virtual TIStreamP search(size_t currentDocNum, size_t limit); + //! Do a search that results in an input stream. + //! A return value of NULL indicates a technical problem with the + //! creation of the stream. Other errors may be indicated by the + //! returned stream going into the "bad" state. + virtual TIStreamP search(size_t currentDocNum, size_t limit); - private: - const CMockDataAdder &m_MockDataAdder; +private: + const CMockDataAdder& m_MockDataAdder; }; #endif // INCLUDED_CMockSearcher_h diff --git a/lib/api/unittest/CModelPlotDataJsonWriterTest.cc b/lib/api/unittest/CModelPlotDataJsonWriterTest.cc index 53f52885b9..1d9ba7505a 100644 --- a/lib/api/unittest/CModelPlotDataJsonWriterTest.cc +++ b/lib/api/unittest/CModelPlotDataJsonWriterTest.cc @@ -15,27 +15,24 @@ #include #include -CppUnit::Test* CModelPlotDataJsonWriterTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CModelPlotDataJsonWriterTest"); +CppUnit::Test* CModelPlotDataJsonWriterTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CModelPlotDataJsonWriterTest"); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CModelPlotDataJsonWriterTest::testWriteFlat", - &CModelPlotDataJsonWriterTest::testWriteFlat) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CModelPlotDataJsonWriterTest::testWriteFlat", + &CModelPlotDataJsonWriterTest::testWriteFlat)); return suiteOfTests; } -void CModelPlotDataJsonWriterTest::testWriteFlat() -{ +void CModelPlotDataJsonWriterTest::testWriteFlat() { std::ostringstream sstream; { - ml::core::CJsonOutputStreamWrapper outputStream (sstream); + ml::core::CJsonOutputStreamWrapper outputStream(sstream); ml::api::CModelPlotDataJsonWriter writer(outputStream); ml::model::CModelPlotData plotData(1, "pName", "pValue", "", "bName", 300, 1); - plotData.get(ml::model_t::E_IndividualCountByBucketAndPerson, "bName") = ml::model::CModelPlotData::SByFieldData(1.0, 2.0, 3.0); + plotData.get(ml::model_t::E_IndividualCountByBucketAndPerson, "bName") = ml::model::CModelPlotData::SByFieldData(1.0, 2.0, 3.0); writer.writeFlat("job-id", plotData); } @@ -43,14 +40,13 @@ void CModelPlotDataJsonWriterTest::testWriteFlat() rapidjson::Document doc; doc.Parse(sstream.str()); CPPUNIT_ASSERT(!doc.HasParseError()); - const rapidjson::Value &firstElement = doc[0]; + const rapidjson::Value& firstElement = doc[0]; CPPUNIT_ASSERT(firstElement.HasMember("model_plot")); - const rapidjson::Value &modelPlot = firstElement["model_plot"]; + const rapidjson::Value& modelPlot = firstElement["model_plot"]; CPPUNIT_ASSERT(modelPlot.HasMember("job_id")); CPPUNIT_ASSERT_EQUAL(std::string("job-id"), std::string(modelPlot["job_id"].GetString())); CPPUNIT_ASSERT(modelPlot.HasMember("model_feature")); - CPPUNIT_ASSERT_EQUAL(std::string("'count per bucket by person'"), - std::string(modelPlot["model_feature"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("'count per bucket by person'"), std::string(modelPlot["model_feature"].GetString())); CPPUNIT_ASSERT(modelPlot.HasMember("timestamp")); CPPUNIT_ASSERT_EQUAL(int64_t(1000), modelPlot["timestamp"].GetInt64()); CPPUNIT_ASSERT(modelPlot.HasMember("partition_field_name")); diff --git a/lib/api/unittest/CModelPlotDataJsonWriterTest.h b/lib/api/unittest/CModelPlotDataJsonWriterTest.h index 379a342056..8c74c475f4 100644 --- a/lib/api/unittest/CModelPlotDataJsonWriterTest.h +++ b/lib/api/unittest/CModelPlotDataJsonWriterTest.h @@ -8,13 +8,11 @@ #include -class CModelPlotDataJsonWriterTest : public CppUnit::TestFixture -{ - public: - void testWriteFlat(); +class CModelPlotDataJsonWriterTest : public CppUnit::TestFixture { +public: + void testWriteFlat(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CModelPlotDataJsonWriterTest_h - diff --git a/lib/api/unittest/CModelSnapshotJsonWriterTest.cc b/lib/api/unittest/CModelSnapshotJsonWriterTest.cc index e1b9065930..1c9f277b98 100644 --- a/lib/api/unittest/CModelSnapshotJsonWriterTest.cc +++ b/lib/api/unittest/CModelSnapshotJsonWriterTest.cc @@ -19,29 +19,26 @@ using namespace ml; using namespace api; -CppUnit::Test *CModelSnapshotJsonWriterTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CModelSnapshotJsonWriterTest"); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CModelSnapshotJsonWriterTest::testWrite", - &CModelSnapshotJsonWriterTest::testWrite) ); +CppUnit::Test* CModelSnapshotJsonWriterTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CModelSnapshotJsonWriterTest"); + suiteOfTests->addTest(new CppUnit::TestCaller("CModelSnapshotJsonWriterTest::testWrite", + &CModelSnapshotJsonWriterTest::testWrite)); return suiteOfTests; } -void CModelSnapshotJsonWriterTest::testWrite() -{ +void CModelSnapshotJsonWriterTest::testWrite() { std::ostringstream sstream; // The output writer won't close the JSON structures until is is destroyed { model::CResourceMonitor::SResults modelSizeStats{ - 10000, // bytes used - 3, // # by fields - 1, // # partition fields - 150, // # over fields - 4, // # allocation failures + 10000, // bytes used + 3, // # by fields + 1, // # partition fields + 150, // # over fields + 4, // # allocation failures model_t::E_MemoryStatusOk, // memory status - core_t::TTime(1521046309) // bucket start time + core_t::TTime(1521046309) // bucket start time }; CModelSnapshotJsonWriter::SModelSnapshotReport report{ @@ -53,7 +50,7 @@ void CModelSnapshotJsonWriterTest::testWrite() modelSizeStats, "some normalizer state", core_t::TTime(1521046409), // last record time - core_t::TTime(1521040000) // last result time + core_t::TTime(1521040000) // last result time }; core::CJsonOutputStreamWrapper wrappedOutStream(sstream); @@ -67,11 +64,11 @@ void CModelSnapshotJsonWriterTest::testWrite() CPPUNIT_ASSERT(arrayDoc.IsArray()); CPPUNIT_ASSERT_EQUAL(rapidjson::SizeType(1), arrayDoc.Size()); - const rapidjson::Value &object = arrayDoc[rapidjson::SizeType(0)]; + const rapidjson::Value& object = arrayDoc[rapidjson::SizeType(0)]; CPPUNIT_ASSERT(object.IsObject()); CPPUNIT_ASSERT(object.HasMember("model_snapshot")); - const rapidjson::Value &snapshot = object["model_snapshot"]; + const rapidjson::Value& snapshot = object["model_snapshot"]; CPPUNIT_ASSERT(snapshot.HasMember("job_id")); CPPUNIT_ASSERT_EQUAL(std::string("job"), std::string(snapshot["job_id"].GetString())); CPPUNIT_ASSERT(snapshot.HasMember("min_version")); @@ -90,7 +87,7 @@ void CModelSnapshotJsonWriterTest::testWrite() CPPUNIT_ASSERT_EQUAL(int64_t(1521040000000), snapshot["latest_result_time_stamp"].GetInt64()); CPPUNIT_ASSERT(snapshot.HasMember("model_size_stats")); - const rapidjson::Value &modelSizeStats = snapshot["model_size_stats"]; + const rapidjson::Value& modelSizeStats = snapshot["model_size_stats"]; CPPUNIT_ASSERT(modelSizeStats.HasMember("job_id")); CPPUNIT_ASSERT_EQUAL(std::string("job"), std::string(modelSizeStats["job_id"].GetString())); CPPUNIT_ASSERT(modelSizeStats.HasMember("model_bytes")); @@ -110,7 +107,7 @@ void CModelSnapshotJsonWriterTest::testWrite() CPPUNIT_ASSERT(modelSizeStats.HasMember("log_time")); CPPUNIT_ASSERT(snapshot.HasMember("quantiles")); - const rapidjson::Value &quantiles = snapshot["quantiles"]; + const rapidjson::Value& quantiles = snapshot["quantiles"]; CPPUNIT_ASSERT(quantiles.HasMember("job_id")); CPPUNIT_ASSERT_EQUAL(std::string("job"), std::string(quantiles["job_id"].GetString())); CPPUNIT_ASSERT(quantiles.HasMember("quantile_state")); diff --git a/lib/api/unittest/CModelSnapshotJsonWriterTest.h b/lib/api/unittest/CModelSnapshotJsonWriterTest.h index 7395082573..0b34b5217e 100644 --- a/lib/api/unittest/CModelSnapshotJsonWriterTest.h +++ b/lib/api/unittest/CModelSnapshotJsonWriterTest.h @@ -8,14 +8,11 @@ #include +class CModelSnapshotJsonWriterTest : public CppUnit::TestFixture { +public: + void testWrite(); -class CModelSnapshotJsonWriterTest : public CppUnit::TestFixture -{ - public: - void testWrite(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CModelSnapshotJsonWriterTest_h - diff --git a/lib/api/unittest/CMultiFileDataAdderTest.cc b/lib/api/unittest/CMultiFileDataAdderTest.cc index 4a98a4f07b..2000358ac5 100644 --- a/lib/api/unittest/CMultiFileDataAdderTest.cc +++ b/lib/api/unittest/CMultiFileDataAdderTest.cc @@ -7,8 +7,8 @@ #include #include -#include #include +#include #include @@ -38,48 +38,37 @@ #include #include -namespace -{ +namespace { using TStrVec = std::vector; void reportPersistComplete(ml::api::CModelSnapshotJsonWriter::SModelSnapshotReport modelSnapshotReport, - std::string &snapshotIdOut, - size_t &numDocsOut) -{ + std::string& snapshotIdOut, + size_t& numDocsOut) { LOG_INFO("Persist complete with description: " << modelSnapshotReport.s_Description); snapshotIdOut = modelSnapshotReport.s_SnapshotId; numDocsOut = modelSnapshotReport.s_NumDocs; } - } -CppUnit::Test *CMultiFileDataAdderTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CMultiFileDataAdderTest"); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CMultiFileDataAdderTest::testSimpleWrite", - &CMultiFileDataAdderTest::testSimpleWrite) ); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CMultiFileDataAdderTest::testDetectorPersistBy", - &CMultiFileDataAdderTest::testDetectorPersistBy) ); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CMultiFileDataAdderTest::testDetectorPersistOver", - &CMultiFileDataAdderTest::testDetectorPersistOver) ); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CMultiFileDataAdderTest::testDetectorPersistPartition", - &CMultiFileDataAdderTest::testDetectorPersistPartition) ); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CMultiFileDataAdderTest::testDetectorPersistDc", - &CMultiFileDataAdderTest::testDetectorPersistDc) ); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CMultiFileDataAdderTest::testDetectorPersistCount", - &CMultiFileDataAdderTest::testDetectorPersistCount) ); +CppUnit::Test* CMultiFileDataAdderTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CMultiFileDataAdderTest"); + suiteOfTests->addTest(new CppUnit::TestCaller("CMultiFileDataAdderTest::testSimpleWrite", + &CMultiFileDataAdderTest::testSimpleWrite)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMultiFileDataAdderTest::testDetectorPersistBy", + &CMultiFileDataAdderTest::testDetectorPersistBy)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMultiFileDataAdderTest::testDetectorPersistOver", + &CMultiFileDataAdderTest::testDetectorPersistOver)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMultiFileDataAdderTest::testDetectorPersistPartition", + &CMultiFileDataAdderTest::testDetectorPersistPartition)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMultiFileDataAdderTest::testDetectorPersistDc", + &CMultiFileDataAdderTest::testDetectorPersistDc)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMultiFileDataAdderTest::testDetectorPersistCount", + &CMultiFileDataAdderTest::testDetectorPersistCount)); return suiteOfTests; } -void CMultiFileDataAdderTest::testSimpleWrite() -{ +void CMultiFileDataAdderTest::testSimpleWrite() { static const std::string EVENT("Hello Event"); static const std::string SUMMARY_EVENT("Hello Summary Event"); @@ -139,49 +128,30 @@ void CMultiFileDataAdderTest::testSimpleWrite() CPPUNIT_ASSERT_NO_THROW(boost::filesystem::remove_all(workDir)); } -void CMultiFileDataAdderTest::testDetectorPersistBy() -{ - this->detectorPersistHelper("testfiles/new_mlfields.conf", - "testfiles/big_ascending.txt", - 0, - "%d/%b/%Y:%T %z"); +void CMultiFileDataAdderTest::testDetectorPersistBy() { + this->detectorPersistHelper("testfiles/new_mlfields.conf", "testfiles/big_ascending.txt", 0, "%d/%b/%Y:%T %z"); } -void CMultiFileDataAdderTest::testDetectorPersistOver() -{ - this->detectorPersistHelper("testfiles/new_mlfields_over.conf", - "testfiles/big_ascending.txt", - 0, - "%d/%b/%Y:%T %z"); +void CMultiFileDataAdderTest::testDetectorPersistOver() { + this->detectorPersistHelper("testfiles/new_mlfields_over.conf", "testfiles/big_ascending.txt", 0, "%d/%b/%Y:%T %z"); } -void CMultiFileDataAdderTest::testDetectorPersistPartition() -{ - this->detectorPersistHelper("testfiles/new_mlfields_partition.conf", - "testfiles/big_ascending.txt", - 0, - "%d/%b/%Y:%T %z"); +void CMultiFileDataAdderTest::testDetectorPersistPartition() { + this->detectorPersistHelper("testfiles/new_mlfields_partition.conf", "testfiles/big_ascending.txt", 0, "%d/%b/%Y:%T %z"); } -void CMultiFileDataAdderTest::testDetectorPersistDc() -{ - this->detectorPersistHelper("testfiles/new_persist_dc.conf", - "testfiles/files_users_programs.csv", - 5); +void CMultiFileDataAdderTest::testDetectorPersistDc() { + this->detectorPersistHelper("testfiles/new_persist_dc.conf", "testfiles/files_users_programs.csv", 5); } -void CMultiFileDataAdderTest::testDetectorPersistCount() -{ - this->detectorPersistHelper("testfiles/new_persist_count.conf", - "testfiles/files_users_programs.csv", - 5); +void CMultiFileDataAdderTest::testDetectorPersistCount() { + this->detectorPersistHelper("testfiles/new_persist_count.conf", "testfiles/files_users_programs.csv", 5); } -void CMultiFileDataAdderTest::detectorPersistHelper(const std::string &configFileName, - const std::string &inputFilename, +void CMultiFileDataAdderTest::detectorPersistHelper(const std::string& configFileName, + const std::string& inputFilename, int latencyBuckets, - const std::string &timeFormat) -{ + const std::string& timeFormat) { // Start by creating a detector with non-trivial state static const ml::core_t::TTime BUCKET_SIZE(3600); static const std::string JOB_ID("job"); @@ -192,20 +162,14 @@ void CMultiFileDataAdderTest::detectorPersistHelper(const std::string &configFil std::ofstream outputStrm(ml::core::COsFileFuncs::NULL_FILENAME); CPPUNIT_ASSERT(outputStrm.is_open()); - ml::core::CJsonOutputStreamWrapper wrappedOutputStream (outputStrm); + ml::core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); ml::model::CLimits limits; ml::api::CFieldConfig fieldConfig; CPPUNIT_ASSERT(fieldConfig.initFromFile(configFileName)); - ml::model::CAnomalyDetectorModelConfig modelConfig = - ml::model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE, - ml::model_t::E_None, - "", - BUCKET_SIZE * latencyBuckets, - 0, - false, - ""); + ml::model::CAnomalyDetectorModelConfig modelConfig = ml::model::CAnomalyDetectorModelConfig::defaultConfig( + BUCKET_SIZE, ml::model_t::E_None, "", BUCKET_SIZE * latencyBuckets, 0, false, ""); std::string origSnapshotId; std::size_t numOrigDocs(0); @@ -214,10 +178,7 @@ void CMultiFileDataAdderTest::detectorPersistHelper(const std::string &configFil fieldConfig, modelConfig, wrappedOutputStream, - boost::bind(&reportPersistComplete, - _1, - boost::ref(origSnapshotId), - boost::ref(numOrigDocs)), + boost::bind(&reportPersistComplete, _1, boost::ref(origSnapshotId), boost::ref(numOrigDocs)), nullptr, -1, "time", @@ -225,18 +186,13 @@ void CMultiFileDataAdderTest::detectorPersistHelper(const std::string &configFil using TScopedInputParserP = boost::scoped_ptr; TScopedInputParserP parser; - if (inputFilename.rfind(".csv") == inputFilename.length() - 4) - { + if (inputFilename.rfind(".csv") == inputFilename.length() - 4) { parser.reset(new ml::api::CCsvInputParser(inputStrm)); - } - else - { + } else { parser.reset(new ml::api::CLineifiedJsonInputParser(inputStrm)); } - CPPUNIT_ASSERT(parser->readStream(boost::bind(&ml::api::CAnomalyJob::handleRecord, - &origJob, - _1))); + CPPUNIT_ASSERT(parser->readStream(boost::bind(&ml::api::CAnomalyJob::handleRecord, &origJob, _1))); // Persist the detector state to file(s) @@ -254,8 +210,7 @@ void CMultiFileDataAdderTest::detectorPersistHelper(const std::string &configFil std::string temp; TStrVec origFileContents(numOrigDocs); - for (size_t index = 0; index < numOrigDocs; ++index) - { + for (size_t index = 0; index < numOrigDocs; ++index) { std::string expectedOrigFilename(baseOrigOutputFilename); expectedOrigFilename += "/_"; expectedOrigFilename += ml::api::CAnomalyJob::ML_STATE_INDEX; @@ -265,8 +220,7 @@ void CMultiFileDataAdderTest::detectorPersistHelper(const std::string &configFil LOG_DEBUG("Trying to open file: " << expectedOrigFilename); std::ifstream origFile(expectedOrigFilename.c_str()); CPPUNIT_ASSERT(origFile.is_open()); - std::string json((std::istreambuf_iterator(origFile)), - std::istreambuf_iterator()); + std::string json((std::istreambuf_iterator(origFile)), std::istreambuf_iterator()); origFileContents[index] = json; // Ensure that the JSON is valid, by parsing string using Rapidjson @@ -284,10 +238,7 @@ void CMultiFileDataAdderTest::detectorPersistHelper(const std::string &configFil fieldConfig, modelConfig, wrappedOutputStream, - boost::bind(&reportPersistComplete, - _1, - boost::ref(restoredSnapshotId), - boost::ref(numRestoredDocs))); + boost::bind(&reportPersistComplete, _1, boost::ref(restoredSnapshotId), boost::ref(numRestoredDocs))); { ml::core_t::TTime completeToTime(0); @@ -311,8 +262,7 @@ void CMultiFileDataAdderTest::detectorPersistHelper(const std::string &configFil std::string restoredBaseDocId(JOB_ID + '_' + ml::api::CAnomalyJob::STATE_TYPE + '_' + restoredSnapshotId); - for (size_t index = 0; index < numRestoredDocs; ++index) - { + for (size_t index = 0; index < numRestoredDocs; ++index) { std::string expectedRestoredFilename(baseRestoredOutputFilename); expectedRestoredFilename += "/_"; expectedRestoredFilename += ml::api::CAnomalyJob::ML_STATE_INDEX; @@ -321,8 +271,7 @@ void CMultiFileDataAdderTest::detectorPersistHelper(const std::string &configFil expectedRestoredFilename += ml::test::CMultiFileDataAdder::JSON_FILE_EXT; std::ifstream restoredFile(expectedRestoredFilename.c_str()); CPPUNIT_ASSERT(restoredFile.is_open()); - std::string json((std::istreambuf_iterator(restoredFile)), - std::istreambuf_iterator()); + std::string json((std::istreambuf_iterator(restoredFile)), std::istreambuf_iterator()); CPPUNIT_ASSERT_EQUAL(origFileContents[index], json); } @@ -333,4 +282,3 @@ void CMultiFileDataAdderTest::detectorPersistHelper(const std::string &configFil boost::filesystem::path restoredDir(baseRestoredOutputFilename); CPPUNIT_ASSERT_NO_THROW(boost::filesystem::remove_all(restoredDir)); } - diff --git a/lib/api/unittest/CMultiFileDataAdderTest.h b/lib/api/unittest/CMultiFileDataAdderTest.h index a56b420436..f970dc5123 100644 --- a/lib/api/unittest/CMultiFileDataAdderTest.h +++ b/lib/api/unittest/CMultiFileDataAdderTest.h @@ -8,25 +8,22 @@ #include +class CMultiFileDataAdderTest : public CppUnit::TestFixture { +public: + void testSimpleWrite(); + void testDetectorPersistBy(); + void testDetectorPersistOver(); + void testDetectorPersistPartition(); + void testDetectorPersistDc(); + void testDetectorPersistCount(); -class CMultiFileDataAdderTest : public CppUnit::TestFixture -{ - public: - void testSimpleWrite(); - void testDetectorPersistBy(); - void testDetectorPersistOver(); - void testDetectorPersistPartition(); - void testDetectorPersistDc(); - void testDetectorPersistCount(); + static CppUnit::Test* suite(); - static CppUnit::Test *suite(); - - private: - void detectorPersistHelper(const std::string &configFileName, - const std::string &inputFilename, - int latencyBuckets, - const std::string &timeFormat = std::string()); +private: + void detectorPersistHelper(const std::string& configFileName, + const std::string& inputFilename, + int latencyBuckets, + const std::string& timeFormat = std::string()); }; #endif // INCLUDED_CMultiFileDataAdderTest_h - diff --git a/lib/api/unittest/COutputChainerTest.cc b/lib/api/unittest/COutputChainerTest.cc index 11092882b5..3ec73adf56 100644 --- a/lib/api/unittest/COutputChainerTest.cc +++ b/lib/api/unittest/COutputChainerTest.cc @@ -21,20 +21,16 @@ #include +CppUnit::Test* COutputChainerTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("COutputChainerTest"); -CppUnit::Test *COutputChainerTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("COutputChainerTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "COutputChainerTest::testChaining", - &COutputChainerTest::testChaining) ); + suiteOfTests->addTest( + new CppUnit::TestCaller("COutputChainerTest::testChaining", &COutputChainerTest::testChaining)); return suiteOfTests; } -void COutputChainerTest::testChaining() -{ +void COutputChainerTest::testChaining() { static const ml::core_t::TTime BUCKET_SIZE(3600); std::string inputFileName("testfiles/big_ascending.txt"); @@ -47,7 +43,7 @@ void COutputChainerTest::testChaining() std::ofstream outputStrm(outputFileName.c_str()); CPPUNIT_ASSERT(outputStrm.is_open()); - ml::core::CJsonOutputStreamWrapper wrappedOutputStream (outputStrm); + ml::core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); // Set up the processing chain as: // big.txt -> typer -> chainer -> detector -> chainerOutput.txt @@ -55,8 +51,7 @@ void COutputChainerTest::testChaining() ml::api::CFieldConfig fieldConfig; CPPUNIT_ASSERT(fieldConfig.initFromFile("testfiles/new_mlfields.conf")); - ml::model::CAnomalyDetectorModelConfig modelConfig = - ml::model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); + ml::model::CAnomalyDetectorModelConfig modelConfig = ml::model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); ml::api::CAnomalyJob job("job", limits, @@ -75,9 +70,7 @@ void COutputChainerTest::testChaining() ml::api::CLineifiedJsonInputParser parser(inputStrm); - CPPUNIT_ASSERT(parser.readStream(boost::bind(&CMockDataProcessor::handleRecord, - &mockProcessor, - _1))); + CPPUNIT_ASSERT(parser.readStream(boost::bind(&CMockDataProcessor::handleRecord, &mockProcessor, _1))); } // Check the results by re-reading the output file @@ -87,8 +80,7 @@ void COutputChainerTest::testChaining() std::string expectedLineStart("{\"bucket\":{\"job_id\":\"job\",\"timestamp\":1431853200000,"); - while (line.length() == 0 || line.find(modelSizeString) != std::string::npos) - { + while (line.length() == 0 || line.find(modelSizeString) != std::string::npos) { std::getline(reReadStrm, line); LOG_DEBUG("Read line: " << line); } @@ -104,4 +96,3 @@ void COutputChainerTest::testChaining() reReadStrm.close(); CPPUNIT_ASSERT_EQUAL(0, ::remove(outputFileName.c_str())); } - diff --git a/lib/api/unittest/COutputChainerTest.h b/lib/api/unittest/COutputChainerTest.h index 3145ef92e8..e8cfaf8a22 100644 --- a/lib/api/unittest/COutputChainerTest.h +++ b/lib/api/unittest/COutputChainerTest.h @@ -8,14 +8,11 @@ #include +class COutputChainerTest : public CppUnit::TestFixture { +public: + void testChaining(); -class COutputChainerTest : public CppUnit::TestFixture -{ - public: - void testChaining(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_COutputChainerTest_h - diff --git a/lib/api/unittest/CRestorePreviousStateTest.cc b/lib/api/unittest/CRestorePreviousStateTest.cc index 3fd6d382b6..2c604392cf 100644 --- a/lib/api/unittest/CRestorePreviousStateTest.cc +++ b/lib/api/unittest/CRestorePreviousStateTest.cc @@ -32,116 +32,98 @@ #include #include -namespace -{ +namespace { void reportPersistComplete(ml::api::CModelSnapshotJsonWriter::SModelSnapshotReport modelSnapshotReport, - std::string &snapshotIdOut, - size_t &numDocsOut) -{ + std::string& snapshotIdOut, + size_t& numDocsOut) { LOG_DEBUG("Persist complete with description: " << modelSnapshotReport.s_Description); snapshotIdOut = modelSnapshotReport.s_SnapshotId; numDocsOut = modelSnapshotReport.s_NumDocs; } -struct SRestoreTestConfig -{ +struct SRestoreTestConfig { std::string s_Version; bool s_DetectorRestoreIsSymmetric; bool s_CategorizerRestoreIsSymmetric; }; -const std::vector BWC_VERSIONS { SRestoreTestConfig{"5.6.0", false, true}, - SRestoreTestConfig{"6.0.0", false, true}, - SRestoreTestConfig{"6.1.0", false, true} }; +const std::vector BWC_VERSIONS{SRestoreTestConfig{"5.6.0", false, true}, + SRestoreTestConfig{"6.0.0", false, true}, + SRestoreTestConfig{"6.1.0", false, true}}; } -CppUnit::Test *CRestorePreviousStateTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CRestorePreviousStateTest"); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CRestorePreviousStateTest::testRestoreDetectorPersistBy", - &CRestorePreviousStateTest::testRestoreDetectorBy) ); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CRestorePreviousStateTest::testRestoreDetectorOver", - &CRestorePreviousStateTest::testRestoreDetectorOver) ); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CRestorePreviousStateTest::testRestoreDetectorPartition", - &CRestorePreviousStateTest::testRestoreDetectorPartition) ); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CRestorePreviousStateTest::testRestoreDetectorDc", - &CRestorePreviousStateTest::testRestoreDetectorDc) ); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CRestorePreviousStateTest::testRestoreDetectorCount", - &CRestorePreviousStateTest::testRestoreDetectorCount) ); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CRestorePreviousStateTest::testRestoreNormalizer", - &CRestorePreviousStateTest::testRestoreNormalizer) ); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CRestorePreviousStateTest::testRestoreCategorizer", - &CRestorePreviousStateTest::testRestoreCategorizer) ); +CppUnit::Test* CRestorePreviousStateTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CRestorePreviousStateTest"); + suiteOfTests->addTest(new CppUnit::TestCaller("CRestorePreviousStateTest::testRestoreDetectorPersistBy", + &CRestorePreviousStateTest::testRestoreDetectorBy)); + suiteOfTests->addTest(new CppUnit::TestCaller("CRestorePreviousStateTest::testRestoreDetectorOver", + &CRestorePreviousStateTest::testRestoreDetectorOver)); + suiteOfTests->addTest(new CppUnit::TestCaller("CRestorePreviousStateTest::testRestoreDetectorPartition", + &CRestorePreviousStateTest::testRestoreDetectorPartition)); + suiteOfTests->addTest(new CppUnit::TestCaller("CRestorePreviousStateTest::testRestoreDetectorDc", + &CRestorePreviousStateTest::testRestoreDetectorDc)); + suiteOfTests->addTest(new CppUnit::TestCaller("CRestorePreviousStateTest::testRestoreDetectorCount", + &CRestorePreviousStateTest::testRestoreDetectorCount)); + suiteOfTests->addTest(new CppUnit::TestCaller("CRestorePreviousStateTest::testRestoreNormalizer", + &CRestorePreviousStateTest::testRestoreNormalizer)); + suiteOfTests->addTest(new CppUnit::TestCaller("CRestorePreviousStateTest::testRestoreCategorizer", + &CRestorePreviousStateTest::testRestoreCategorizer)); return suiteOfTests; } -void CRestorePreviousStateTest::testRestoreDetectorBy() -{ - for (const auto &version : BWC_VERSIONS) - { +void CRestorePreviousStateTest::testRestoreDetectorBy() { + for (const auto& version : BWC_VERSIONS) { LOG_INFO("Test restoring state from version " << version.s_Version); this->anomalyDetectorRestoreHelper("testfiles/state/" + version.s_Version + "/by_detector_state.json", - "testfiles/new_mlfields.conf", - version.s_DetectorRestoreIsSymmetric, 0); + "testfiles/new_mlfields.conf", + version.s_DetectorRestoreIsSymmetric, + 0); } } -void CRestorePreviousStateTest::testRestoreDetectorOver() -{ - for (const auto &version : BWC_VERSIONS) - { +void CRestorePreviousStateTest::testRestoreDetectorOver() { + for (const auto& version : BWC_VERSIONS) { LOG_INFO("Test restoring state from version " << version.s_Version); this->anomalyDetectorRestoreHelper("testfiles/state/" + version.s_Version + "/over_detector_state.json", - "testfiles/new_mlfields_over.conf", - version.s_DetectorRestoreIsSymmetric, 0); + "testfiles/new_mlfields_over.conf", + version.s_DetectorRestoreIsSymmetric, + 0); } } -void CRestorePreviousStateTest::testRestoreDetectorPartition() -{ - for (const auto &version : BWC_VERSIONS) - { +void CRestorePreviousStateTest::testRestoreDetectorPartition() { + for (const auto& version : BWC_VERSIONS) { LOG_INFO("Test restoring state from version " << version.s_Version); this->anomalyDetectorRestoreHelper("testfiles/state/" + version.s_Version + "/partition_detector_state.json", - "testfiles/new_mlfields_partition.conf", - version.s_DetectorRestoreIsSymmetric, 0); + "testfiles/new_mlfields_partition.conf", + version.s_DetectorRestoreIsSymmetric, + 0); } } -void CRestorePreviousStateTest::testRestoreDetectorDc() -{ - for (const auto &version : BWC_VERSIONS) - { +void CRestorePreviousStateTest::testRestoreDetectorDc() { + for (const auto& version : BWC_VERSIONS) { LOG_INFO("Test restoring state from version " << version.s_Version); this->anomalyDetectorRestoreHelper("testfiles/state/" + version.s_Version + "/dc_detector_state.json", - "testfiles/new_persist_dc.conf", - version.s_DetectorRestoreIsSymmetric, 5); + "testfiles/new_persist_dc.conf", + version.s_DetectorRestoreIsSymmetric, + 5); } } -void CRestorePreviousStateTest::testRestoreDetectorCount() -{ - for (const auto &version : BWC_VERSIONS) - { +void CRestorePreviousStateTest::testRestoreDetectorCount() { + for (const auto& version : BWC_VERSIONS) { LOG_INFO("Test restoring state from version " << version.s_Version); this->anomalyDetectorRestoreHelper("testfiles/state/" + version.s_Version + "/count_detector_state.json", - "testfiles/new_persist_count.conf", - version.s_DetectorRestoreIsSymmetric, 5); + "testfiles/new_persist_count.conf", + version.s_DetectorRestoreIsSymmetric, + 5); } } -void CRestorePreviousStateTest::testRestoreNormalizer() -{ - for (const auto &version : BWC_VERSIONS) - { +void CRestorePreviousStateTest::testRestoreNormalizer() { + for (const auto& version : BWC_VERSIONS) { ml::model::CAnomalyDetectorModelConfig modelConfig = ml::model::CAnomalyDetectorModelConfig::defaultConfig(3600); ml::api::CCsvOutputWriter outputWriter; ml::api::CResultNormalizer normalizer(modelConfig, outputWriter); @@ -149,23 +131,20 @@ void CRestorePreviousStateTest::testRestoreNormalizer() } } -void CRestorePreviousStateTest::testRestoreCategorizer() -{ - for (const auto &version : BWC_VERSIONS) - { +void CRestorePreviousStateTest::testRestoreCategorizer() { + for (const auto& version : BWC_VERSIONS) { LOG_INFO("Test restoring state from version " << version.s_Version); categorizerRestoreHelper("testfiles/state/" + version.s_Version + "/categorizer_state.json", - version.s_CategorizerRestoreIsSymmetric); + version.s_CategorizerRestoreIsSymmetric); } } -void CRestorePreviousStateTest::categorizerRestoreHelper(const std::string &stateFile, bool isSymmetric) -{ +void CRestorePreviousStateTest::categorizerRestoreHelper(const std::string& stateFile, bool isSymmetric) { ml::model::CLimits limits; ml::api::CFieldConfig config("count", "mlcategory"); std::ostringstream outputStrm; - ml::core::CJsonOutputStreamWrapper wrappedOutputStream (outputStrm); + ml::core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); ml::api::CJsonOutputWriter writer("job", wrappedOutputStream); ml::api::CFieldDataTyper restoredTyper("job", config, limits, writer, writer); @@ -186,13 +165,12 @@ void CRestorePreviousStateTest::categorizerRestoreHelper(const std::string &stat CPPUNIT_ASSERT(restoredTyper.restoreState(retriever, completeToTime)); } - if (isSymmetric) - { + if (isSymmetric) { // Test the persisted state of the restored detector is the // same as the riginial std::string newPersistedState; { - std::ostringstream *strm(0); + std::ostringstream* strm(0); ml::api::CSingleStreamDataAdder::TOStreamP ptr(strm = new std::ostringstream()); ml::api::CSingleStreamDataAdder persister(ptr); CPPUNIT_ASSERT(restoredTyper.persistState(persister)); @@ -202,11 +180,10 @@ void CRestorePreviousStateTest::categorizerRestoreHelper(const std::string &stat } } -void CRestorePreviousStateTest::anomalyDetectorRestoreHelper(const std::string &stateFile, - const std::string &configFileName, +void CRestorePreviousStateTest::anomalyDetectorRestoreHelper(const std::string& stateFile, + const std::string& configFileName, bool isSymmetric, - int latencyBuckets) -{ + int latencyBuckets) { // Open the input state file std::ifstream inputStrm(stateFile.c_str()); CPPUNIT_ASSERT(inputStrm.is_open()); @@ -220,37 +197,28 @@ void CRestorePreviousStateTest::anomalyDetectorRestoreHelper(const std::string & ml::api::CFieldConfig fieldConfig; CPPUNIT_ASSERT(fieldConfig.initFromFile(configFileName)); - ml::model::CAnomalyDetectorModelConfig modelConfig = - ml::model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE, - ml::model_t::E_None, - "", - BUCKET_SIZE * latencyBuckets, - 0, - false, - ""); + ml::model::CAnomalyDetectorModelConfig modelConfig = ml::model::CAnomalyDetectorModelConfig::defaultConfig( + BUCKET_SIZE, ml::model_t::E_None, "", BUCKET_SIZE * latencyBuckets, 0, false, ""); std::ofstream outputStrm(ml::core::COsFileFuncs::NULL_FILENAME); CPPUNIT_ASSERT(outputStrm.is_open()); - ml::core::CJsonOutputStreamWrapper wrappedOutputStream (outputStrm); + ml::core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); std::string restoredSnapshotId; std::size_t numRestoredDocs(0); ml::api::CAnomalyJob restoredJob(JOB_ID, - limits, - fieldConfig, - modelConfig, - wrappedOutputStream, - boost::bind(&reportPersistComplete, - _1, - boost::ref(restoredSnapshotId), - boost::ref(numRestoredDocs))); + limits, + fieldConfig, + modelConfig, + wrappedOutputStream, + boost::bind(&reportPersistComplete, _1, boost::ref(restoredSnapshotId), boost::ref(numRestoredDocs))); std::size_t numDocsInStateFile(0); { ml::core_t::TTime completeToTime(0); - std::stringstream *output = new std::stringstream(); + std::stringstream* output = new std::stringstream(); ml::api::CSingleStreamSearcher::TIStreamP strm(output); boost::iostreams::filtering_ostream in; in.push(ml::api::CStateRestoreStreamFilter()); @@ -260,18 +228,17 @@ void CRestorePreviousStateTest::anomalyDetectorRestoreHelper(const std::string & numDocsInStateFile = in.component(0)->getDocCount(); - ml::api::CSingleStreamSearcher retriever(strm); + ml::api::CSingleStreamSearcher retriever(strm); CPPUNIT_ASSERT(restoredJob.restoreState(retriever, completeToTime)); CPPUNIT_ASSERT(completeToTime > 0); } - if (isSymmetric) - { + if (isSymmetric) { // Test the persisted state of the restored detector is the // same as the original std::string newPersistedState; { - std::ostringstream *strm(0); + std::ostringstream* strm(0); ml::api::CSingleStreamDataAdder::TOStreamP ptr(strm = new std::ostringstream()); ml::api::CSingleStreamDataAdder persister(ptr); CPPUNIT_ASSERT(restoredJob.persistState(persister)); @@ -283,8 +250,7 @@ void CRestorePreviousStateTest::anomalyDetectorRestoreHelper(const std::string & } } -std::string CRestorePreviousStateTest::stripDocIds(const std::string &persistedState) -{ +std::string CRestorePreviousStateTest::stripDocIds(const std::string& persistedState) { // State is persisted in the Elasticsearch bulk format. // This is an index action followed by the document source: // { "index": { "id": "foo" ... }}\n @@ -295,11 +261,9 @@ std::string CRestorePreviousStateTest::stripDocIds(const std::string &persistedS std::ostringstream output; std::string line; - while (std::getline(input, line)) - { + while (std::getline(input, line)) { // Remove lines with the document IDs - if (line.compare(0, 16, "{\"index\":{\"_id\":") != 0) - { + if (line.compare(0, 16, "{\"index\":{\"_id\":") != 0) { output << line; } } @@ -308,4 +272,3 @@ std::string CRestorePreviousStateTest::stripDocIds(const std::string &persistedS LOG_TRACE("Stripped:" << strippedText << ml::core_t::LINE_ENDING); return strippedText; } - diff --git a/lib/api/unittest/CRestorePreviousStateTest.h b/lib/api/unittest/CRestorePreviousStateTest.h index ea85706e04..5dd11379d7 100644 --- a/lib/api/unittest/CRestorePreviousStateTest.h +++ b/lib/api/unittest/CRestorePreviousStateTest.h @@ -8,30 +8,25 @@ #include +class CRestorePreviousStateTest : public CppUnit::TestFixture { +public: + void testRestoreDetectorBy(); + void testRestoreDetectorOver(); + void testRestoreDetectorPartition(); + void testRestoreDetectorDc(); + void testRestoreDetectorCount(); + void testRestoreNormalizer(); + void testRestoreCategorizer(); -class CRestorePreviousStateTest : public CppUnit::TestFixture -{ - public: - void testRestoreDetectorBy(); - void testRestoreDetectorOver(); - void testRestoreDetectorPartition(); - void testRestoreDetectorDc(); - void testRestoreDetectorCount(); - void testRestoreNormalizer(); - void testRestoreCategorizer(); + static CppUnit::Test* suite(); - static CppUnit::Test *suite(); +private: + void + anomalyDetectorRestoreHelper(const std::string& stateFile, const std::string& configFileName, bool isSymmetric, int latencyBuckets); - private: - void anomalyDetectorRestoreHelper(const std::string &stateFile, - const std::string &configFileName, - bool isSymmetric, - int latencyBuckets); + void categorizerRestoreHelper(const std::string& stateFile, bool isSymmetric); - void categorizerRestoreHelper(const std::string &stateFile, - bool isSymmetric); - - std::string stripDocIds(const std::string &peristedState); + std::string stripDocIds(const std::string& peristedState); }; #endif // INCLUDED_CRestorePreviousStateTest_h diff --git a/lib/api/unittest/CResultNormalizerTest.cc b/lib/api/unittest/CResultNormalizerTest.cc index abc9ff4c23..1879696954 100644 --- a/lib/api/unittest/CResultNormalizerTest.cc +++ b/lib/api/unittest/CResultNormalizerTest.cc @@ -18,24 +18,20 @@ #include #include -#include #include +#include -CppUnit::Test *CResultNormalizerTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CResultNormalizerTest"); +CppUnit::Test* CResultNormalizerTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CResultNormalizerTest"); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CResultNormalizerTest::testInitNormalizer", - &CResultNormalizerTest::testInitNormalizer) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CResultNormalizerTest::testInitNormalizer", + &CResultNormalizerTest::testInitNormalizer)); return suiteOfTests; } -void CResultNormalizerTest::testInitNormalizer() -{ - ml::model::CAnomalyDetectorModelConfig modelConfig = - ml::model::CAnomalyDetectorModelConfig::defaultConfig(3600); +void CResultNormalizerTest::testInitNormalizer() { + ml::model::CAnomalyDetectorModelConfig modelConfig = ml::model::CAnomalyDetectorModelConfig::defaultConfig(3600); ml::api::CLineifiedJsonOutputWriter outputWriter; @@ -45,9 +41,7 @@ void CResultNormalizerTest::testInitNormalizer() std::ifstream inputStrm("testfiles/normalizerInput.csv"); ml::api::CCsvInputParser inputParser(inputStrm, ml::api::CCsvInputParser::COMMA); - CPPUNIT_ASSERT(inputParser.readStream(boost::bind(&ml::api::CResultNormalizer::handleRecord, - &normalizer, - _1))); + CPPUNIT_ASSERT(inputParser.readStream(boost::bind(&ml::api::CResultNormalizer::handleRecord, &normalizer, _1))); std::string results(outputWriter.internalString()); LOG_DEBUG("Results:\n" << results); @@ -56,8 +50,7 @@ void CResultNormalizerTest::testInitNormalizer() std::vector resultDocs; std::stringstream ss(results); std::string docString; - while (std::getline(ss, docString)) - { + while (std::getline(ss, docString)) { resultDocs.emplace_back(); resultDocs.back().Parse(docString.c_str()); } @@ -68,7 +61,7 @@ void CResultNormalizerTest::testInitNormalizer() // the normaliser is 2.56098e-205, so this should map to the highest normalised // score which is 98.28496 { - const rapidjson::Document &doc = resultDocs[0]; + const rapidjson::Document& doc = resultDocs[0]; CPPUNIT_ASSERT_EQUAL(std::string(""), std::string(doc["value_field_name"].GetString())); CPPUNIT_ASSERT_EQUAL(std::string(""), std::string(doc["function_name"].GetString())); CPPUNIT_ASSERT_EQUAL(std::string("2.56098e-205"), std::string(doc["probability"].GetString())); @@ -78,7 +71,7 @@ void CResultNormalizerTest::testInitNormalizer() CPPUNIT_ASSERT_EQUAL(std::string("98.28496"), std::string(doc["normalized_score"].GetString())); } { - const rapidjson::Document &doc = resultDocs[1]; + const rapidjson::Document& doc = resultDocs[1]; CPPUNIT_ASSERT_EQUAL(std::string(""), std::string(doc["value_field_name"].GetString())); CPPUNIT_ASSERT_EQUAL(std::string(""), std::string(doc["function_name"].GetString())); CPPUNIT_ASSERT_EQUAL(std::string("2.93761e-203"), std::string(doc["probability"].GetString())); @@ -88,7 +81,7 @@ void CResultNormalizerTest::testInitNormalizer() CPPUNIT_ASSERT_EQUAL(std::string("97.26764"), std::string(doc["normalized_score"].GetString())); } { - const rapidjson::Document &doc = resultDocs[2]; + const rapidjson::Document& doc = resultDocs[2]; CPPUNIT_ASSERT_EQUAL(std::string(""), std::string(doc["value_field_name"].GetString())); CPPUNIT_ASSERT_EQUAL(std::string(""), std::string(doc["function_name"].GetString())); CPPUNIT_ASSERT_EQUAL(std::string("5.56572e-204"), std::string(doc["probability"].GetString())); @@ -98,7 +91,7 @@ void CResultNormalizerTest::testInitNormalizer() CPPUNIT_ASSERT_EQUAL(std::string("98.56057"), std::string(doc["normalized_score"].GetString())); } { - const rapidjson::Document &doc = resultDocs[4]; + const rapidjson::Document& doc = resultDocs[4]; CPPUNIT_ASSERT_EQUAL(std::string(""), std::string(doc["value_field_name"].GetString())); CPPUNIT_ASSERT_EQUAL(std::string("count"), std::string(doc["function_name"].GetString())); CPPUNIT_ASSERT_EQUAL(std::string("1e-300"), std::string(doc["probability"].GetString())); @@ -108,7 +101,7 @@ void CResultNormalizerTest::testInitNormalizer() CPPUNIT_ASSERT_EQUAL(std::string("99.19481"), std::string(doc["normalized_score"].GetString())); } { - const rapidjson::Document &doc = resultDocs[15]; + const rapidjson::Document& doc = resultDocs[15]; CPPUNIT_ASSERT_EQUAL(std::string(""), std::string(doc["value_field_name"].GetString())); CPPUNIT_ASSERT_EQUAL(std::string(""), std::string(doc["function_name"].GetString())); CPPUNIT_ASSERT_EQUAL(std::string("1e-10"), std::string(doc["probability"].GetString())); @@ -118,7 +111,7 @@ void CResultNormalizerTest::testInitNormalizer() CPPUNIT_ASSERT_EQUAL(std::string("31.20283"), std::string(doc["normalized_score"].GetString())); } { - const rapidjson::Document &doc = resultDocs[35]; + const rapidjson::Document& doc = resultDocs[35]; CPPUNIT_ASSERT_EQUAL(std::string(""), std::string(doc["value_field_name"].GetString())); CPPUNIT_ASSERT_EQUAL(std::string(""), std::string(doc["function_name"].GetString())); CPPUNIT_ASSERT_EQUAL(std::string("1"), std::string(doc["probability"].GetString())); @@ -128,7 +121,7 @@ void CResultNormalizerTest::testInitNormalizer() CPPUNIT_ASSERT_EQUAL(std::string("0"), std::string(doc["normalized_score"].GetString())); } { - const rapidjson::Document &doc = resultDocs[36]; + const rapidjson::Document& doc = resultDocs[36]; CPPUNIT_ASSERT_EQUAL(std::string(""), std::string(doc["value_field_name"].GetString())); CPPUNIT_ASSERT_EQUAL(std::string(""), std::string(doc["function_name"].GetString())); CPPUNIT_ASSERT_EQUAL(std::string("1"), std::string(doc["probability"].GetString())); @@ -138,7 +131,7 @@ void CResultNormalizerTest::testInitNormalizer() CPPUNIT_ASSERT_EQUAL(std::string("0"), std::string(doc["normalized_score"].GetString())); } { - const rapidjson::Document &doc = resultDocs[37]; + const rapidjson::Document& doc = resultDocs[37]; CPPUNIT_ASSERT_EQUAL(std::string(""), std::string(doc["value_field_name"].GetString())); CPPUNIT_ASSERT_EQUAL(std::string("count"), std::string(doc["function_name"].GetString())); CPPUNIT_ASSERT_EQUAL(std::string("1"), std::string(doc["probability"].GetString())); diff --git a/lib/api/unittest/CResultNormalizerTest.h b/lib/api/unittest/CResultNormalizerTest.h index 95b1a06e8b..1716354a49 100644 --- a/lib/api/unittest/CResultNormalizerTest.h +++ b/lib/api/unittest/CResultNormalizerTest.h @@ -8,14 +8,11 @@ #include +class CResultNormalizerTest : public CppUnit::TestFixture { +public: + void testInitNormalizer(); -class CResultNormalizerTest : public CppUnit::TestFixture -{ - public: - void testInitNormalizer(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CResultNormalizerTest_h - diff --git a/lib/api/unittest/CSingleStreamDataAdderTest.cc b/lib/api/unittest/CSingleStreamDataAdderTest.cc index 7ce4298ca8..ca44603bf3 100644 --- a/lib/api/unittest/CSingleStreamDataAdderTest.cc +++ b/lib/api/unittest/CSingleStreamDataAdderTest.cc @@ -6,9 +6,9 @@ #include "CSingleStreamDataAdderTest.h" #include -#include #include #include +#include #include @@ -36,94 +36,62 @@ #include #include -namespace -{ +namespace { void reportPersistComplete(ml::api::CModelSnapshotJsonWriter::SModelSnapshotReport modelSnapshotReport, - std::string &snapshotIdOut, - size_t &numDocsOut) -{ + std::string& snapshotIdOut, + size_t& numDocsOut) { LOG_INFO("Persist complete with description: " << modelSnapshotReport.s_Description); snapshotIdOut = modelSnapshotReport.s_SnapshotId; numDocsOut = modelSnapshotReport.s_NumDocs; } - } -CppUnit::Test *CSingleStreamDataAdderTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CSingleStreamDataAdderTest"); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CSingleStreamDataAdderTest::testDetectorPersistBy", - &CSingleStreamDataAdderTest::testDetectorPersistBy) ); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CSingleStreamDataAdderTest::testDetectorPersistOver", - &CSingleStreamDataAdderTest::testDetectorPersistOver) ); +CppUnit::Test* CSingleStreamDataAdderTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CSingleStreamDataAdderTest"); + suiteOfTests->addTest(new CppUnit::TestCaller("CSingleStreamDataAdderTest::testDetectorPersistBy", + &CSingleStreamDataAdderTest::testDetectorPersistBy)); + suiteOfTests->addTest(new CppUnit::TestCaller("CSingleStreamDataAdderTest::testDetectorPersistOver", + &CSingleStreamDataAdderTest::testDetectorPersistOver)); + suiteOfTests->addTest(new CppUnit::TestCaller("CSingleStreamDataAdderTest::testDetectorPersistPartition", + &CSingleStreamDataAdderTest::testDetectorPersistPartition)); + suiteOfTests->addTest(new CppUnit::TestCaller("CSingleStreamDataAdderTest::testDetectorPersistDc", + &CSingleStreamDataAdderTest::testDetectorPersistDc)); + suiteOfTests->addTest(new CppUnit::TestCaller("CSingleStreamDataAdderTest::testDetectorPersistCount", + &CSingleStreamDataAdderTest::testDetectorPersistCount)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CSingleStreamDataAdderTest::testDetectorPersistPartition", - &CSingleStreamDataAdderTest::testDetectorPersistPartition) ); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CSingleStreamDataAdderTest::testDetectorPersistDc", - &CSingleStreamDataAdderTest::testDetectorPersistDc) ); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CSingleStreamDataAdderTest::testDetectorPersistCount", - &CSingleStreamDataAdderTest::testDetectorPersistCount) ); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CSingleStreamDataAdderTest::testDetectorPersistCategorization", - &CSingleStreamDataAdderTest::testDetectorPersistCategorization) ); + "CSingleStreamDataAdderTest::testDetectorPersistCategorization", &CSingleStreamDataAdderTest::testDetectorPersistCategorization)); return suiteOfTests; } -void CSingleStreamDataAdderTest::testDetectorPersistBy() -{ - this->detectorPersistHelper("testfiles/new_mlfields.conf", - "testfiles/big_ascending.txt", - 0, - "%d/%b/%Y:%T %z"); +void CSingleStreamDataAdderTest::testDetectorPersistBy() { + this->detectorPersistHelper("testfiles/new_mlfields.conf", "testfiles/big_ascending.txt", 0, "%d/%b/%Y:%T %z"); } -void CSingleStreamDataAdderTest::testDetectorPersistOver() -{ - this->detectorPersistHelper("testfiles/new_mlfields_over.conf", - "testfiles/big_ascending.txt", - 0, - "%d/%b/%Y:%T %z"); +void CSingleStreamDataAdderTest::testDetectorPersistOver() { + this->detectorPersistHelper("testfiles/new_mlfields_over.conf", "testfiles/big_ascending.txt", 0, "%d/%b/%Y:%T %z"); } -void CSingleStreamDataAdderTest::testDetectorPersistPartition() -{ - this->detectorPersistHelper("testfiles/new_mlfields_partition.conf", - "testfiles/big_ascending.txt", - 0, - "%d/%b/%Y:%T %z"); +void CSingleStreamDataAdderTest::testDetectorPersistPartition() { + this->detectorPersistHelper("testfiles/new_mlfields_partition.conf", "testfiles/big_ascending.txt", 0, "%d/%b/%Y:%T %z"); } -void CSingleStreamDataAdderTest::testDetectorPersistDc() -{ - this->detectorPersistHelper("testfiles/new_persist_dc.conf", - "testfiles/files_users_programs.csv", - 5); +void CSingleStreamDataAdderTest::testDetectorPersistDc() { + this->detectorPersistHelper("testfiles/new_persist_dc.conf", "testfiles/files_users_programs.csv", 5); } -void CSingleStreamDataAdderTest::testDetectorPersistCount() -{ - this->detectorPersistHelper("testfiles/new_persist_count.conf", - "testfiles/files_users_programs.csv", - 5); +void CSingleStreamDataAdderTest::testDetectorPersistCount() { + this->detectorPersistHelper("testfiles/new_persist_count.conf", "testfiles/files_users_programs.csv", 5); } -void CSingleStreamDataAdderTest::testDetectorPersistCategorization() -{ - this->detectorPersistHelper("testfiles/new_persist_categorization.conf", - "testfiles/time_messages.csv", - 0); +void CSingleStreamDataAdderTest::testDetectorPersistCategorization() { + this->detectorPersistHelper("testfiles/new_persist_categorization.conf", "testfiles/time_messages.csv", 0); } -void CSingleStreamDataAdderTest::detectorPersistHelper(const std::string &configFileName, - const std::string &inputFilename, +void CSingleStreamDataAdderTest::detectorPersistHelper(const std::string& configFileName, + const std::string& inputFilename, int latencyBuckets, - const std::string &timeFormat) -{ + const std::string& timeFormat) { // Start by creating a detector with non-trivial state static const ml::core_t::TTime BUCKET_SIZE(3600); static const std::string JOB_ID("job"); @@ -139,16 +107,10 @@ void CSingleStreamDataAdderTest::detectorPersistHelper(const std::string &config ml::api::CFieldConfig fieldConfig; CPPUNIT_ASSERT(fieldConfig.initFromFile(configFileName)); - ml::model::CAnomalyDetectorModelConfig modelConfig = - ml::model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE, - ml::model_t::E_None, - "", - BUCKET_SIZE * latencyBuckets, - 0, - false, - ""); + ml::model::CAnomalyDetectorModelConfig modelConfig = ml::model::CAnomalyDetectorModelConfig::defaultConfig( + BUCKET_SIZE, ml::model_t::E_None, "", BUCKET_SIZE * latencyBuckets, 0, false, ""); - ml::core::CJsonOutputStreamWrapper wrappedOutputStream (outputStrm); + ml::core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); ml::api::CJsonOutputWriter outputWriter(JOB_ID, wrappedOutputStream); std::string origSnapshotId; @@ -158,17 +120,13 @@ void CSingleStreamDataAdderTest::detectorPersistHelper(const std::string &config fieldConfig, modelConfig, wrappedOutputStream, - boost::bind(&reportPersistComplete, - _1, - boost::ref(origSnapshotId), - boost::ref(numOrigDocs)), + boost::bind(&reportPersistComplete, _1, boost::ref(origSnapshotId), boost::ref(numOrigDocs)), nullptr, -1, "time", timeFormat); - - ml::api::CDataProcessor *firstProcessor(&origJob); + ml::api::CDataProcessor* firstProcessor(&origJob); // Chain the detector's input ml::api::COutputChainer outputChainer(origJob); @@ -176,32 +134,26 @@ void CSingleStreamDataAdderTest::detectorPersistHelper(const std::string &config // The typer knows how to assign categories to records ml::api::CFieldDataTyper typer(JOB_ID, fieldConfig, limits, outputChainer, outputWriter); - if (fieldConfig.fieldNameSuperset().count(ml::api::CFieldDataTyper::MLCATEGORY_NAME) > 0) - { + if (fieldConfig.fieldNameSuperset().count(ml::api::CFieldDataTyper::MLCATEGORY_NAME) > 0) { LOG_DEBUG("Applying the categorization typer for anomaly detection"); firstProcessor = &typer; } using TScopedInputParserP = boost::scoped_ptr; TScopedInputParserP parser; - if (inputFilename.rfind(".csv") == inputFilename.length() - 4) - { + if (inputFilename.rfind(".csv") == inputFilename.length() - 4) { parser.reset(new ml::api::CCsvInputParser(inputStrm)); - } - else - { + } else { parser.reset(new ml::api::CLineifiedJsonInputParser(inputStrm)); } - CPPUNIT_ASSERT(parser->readStream(boost::bind(&ml::api::CDataProcessor::handleRecord, - firstProcessor, - _1))); + CPPUNIT_ASSERT(parser->readStream(boost::bind(&ml::api::CDataProcessor::handleRecord, firstProcessor, _1))); // Persist the detector state to a stringstream std::string origPersistedState; { - std::ostringstream *strm(0); + std::ostringstream* strm(0); ml::api::CSingleStreamDataAdder::TOStreamP ptr(strm = new std::ostringstream()); ml::api::CSingleStreamDataAdder persister(ptr); CPPUNIT_ASSERT(firstProcessor->persistState(persister)); @@ -217,14 +169,9 @@ void CSingleStreamDataAdderTest::detectorPersistHelper(const std::string &config fieldConfig, modelConfig, wrappedOutputStream, - boost::bind(&reportPersistComplete, - _1, - boost::ref(restoredSnapshotId), - boost::ref(numRestoredDocs))); + boost::bind(&reportPersistComplete, _1, boost::ref(restoredSnapshotId), boost::ref(numRestoredDocs))); - - - ml::api::CDataProcessor *restoredFirstProcessor(&restoredJob); + ml::api::CDataProcessor* restoredFirstProcessor(&restoredJob); // Chain the detector's input ml::api::COutputChainer restoredOutputChainer(restoredJob); @@ -234,14 +181,12 @@ void CSingleStreamDataAdderTest::detectorPersistHelper(const std::string &config size_t numCategorizerDocs(0); - if (fieldConfig.fieldNameSuperset().count(ml::api::CFieldDataTyper::MLCATEGORY_NAME) > 0) - { + if (fieldConfig.fieldNameSuperset().count(ml::api::CFieldDataTyper::MLCATEGORY_NAME) > 0) { LOG_DEBUG("Applying the categorization typer for anomaly detection"); numCategorizerDocs = 1; restoredFirstProcessor = &restoredTyper; } - { ml::core_t::TTime completeToTime(0); @@ -260,7 +205,7 @@ void CSingleStreamDataAdderTest::detectorPersistHelper(const std::string &config // Finally, persist the new detector state and compare the result std::string newPersistedState; { - std::ostringstream *strm(0); + std::ostringstream* strm(0); ml::api::CSingleStreamDataAdder::TOStreamP ptr(strm = new std::ostringstream()); ml::api::CSingleStreamDataAdder persister(ptr); CPPUNIT_ASSERT(restoredFirstProcessor->persistState(persister)); @@ -271,12 +216,8 @@ void CSingleStreamDataAdderTest::detectorPersistHelper(const std::string &config // The snapshot ID can be different between the two persists, so replace the // first occurrence of it (which is in the bulk metadata) - CPPUNIT_ASSERT_EQUAL(size_t(1), ml::core::CStringUtils::replaceFirst(origSnapshotId, - "snap", - origPersistedState)); - CPPUNIT_ASSERT_EQUAL(size_t(1), ml::core::CStringUtils::replaceFirst(restoredSnapshotId, - "snap", - newPersistedState)); + CPPUNIT_ASSERT_EQUAL(size_t(1), ml::core::CStringUtils::replaceFirst(origSnapshotId, "snap", origPersistedState)); + CPPUNIT_ASSERT_EQUAL(size_t(1), ml::core::CStringUtils::replaceFirst(restoredSnapshotId, "snap", newPersistedState)); CPPUNIT_ASSERT_EQUAL(origPersistedState, newPersistedState); } diff --git a/lib/api/unittest/CSingleStreamDataAdderTest.h b/lib/api/unittest/CSingleStreamDataAdderTest.h index 9ffe041186..ec31977ba9 100644 --- a/lib/api/unittest/CSingleStreamDataAdderTest.h +++ b/lib/api/unittest/CSingleStreamDataAdderTest.h @@ -8,25 +8,22 @@ #include +class CSingleStreamDataAdderTest : public CppUnit::TestFixture { +public: + void testDetectorPersistBy(); + void testDetectorPersistOver(); + void testDetectorPersistPartition(); + void testDetectorPersistDc(); + void testDetectorPersistCount(); + void testDetectorPersistCategorization(); -class CSingleStreamDataAdderTest : public CppUnit::TestFixture -{ - public: - void testDetectorPersistBy(); - void testDetectorPersistOver(); - void testDetectorPersistPartition(); - void testDetectorPersistDc(); - void testDetectorPersistCount(); - void testDetectorPersistCategorization(); + static CppUnit::Test* suite(); - static CppUnit::Test *suite(); - - private: - void detectorPersistHelper(const std::string &configFileName, - const std::string &inputFilename, - int latencyBuckets, - const std::string &timeFormat = std::string()); +private: + void detectorPersistHelper(const std::string& configFileName, + const std::string& inputFilename, + int latencyBuckets, + const std::string& timeFormat = std::string()); }; #endif // INCLUDED_CSingleStreamDataAdderTest_h - diff --git a/lib/api/unittest/CStateRestoreStreamFilterTest.cc b/lib/api/unittest/CStateRestoreStreamFilterTest.cc index fe4b873c53..6d4632cfb1 100644 --- a/lib/api/unittest/CStateRestoreStreamFilterTest.cc +++ b/lib/api/unittest/CStateRestoreStreamFilterTest.cc @@ -13,33 +13,29 @@ #include #include +CppUnit::Test* CStateRestoreStreamFilterTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CRestoreStreamFilterTest"); -CppUnit::Test *CStateRestoreStreamFilterTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CRestoreStreamFilterTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CRestoreStreamFilterTest::testBulkIndexHeaderRemoval", - &CStateRestoreStreamFilterTest::testBulkIndexHeaderRemoval) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CRestoreStreamFilterTest::testBulkIndexHeaderRemovalZerobyte", - &CStateRestoreStreamFilterTest::testBulkIndexHeaderRemovalZerobyte) ); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRestoreStreamFilterTest::testBulkIndexHeaderRemoval", &CStateRestoreStreamFilterTest::testBulkIndexHeaderRemoval)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CRestoreStreamFilterTest::testBulkIndexHeaderRemovalZerobyte", + &CStateRestoreStreamFilterTest::testBulkIndexHeaderRemovalZerobyte)); return suiteOfTests; } -void CStateRestoreStreamFilterTest::testBulkIndexHeaderRemoval() -{ - std::istringstream input ("{\"index\":{\"_id\":\"some_id\"}}\n" - "{\"compressed\" : [ \"a\",\"b\"]}"); +void CStateRestoreStreamFilterTest::testBulkIndexHeaderRemoval() { + std::istringstream input("{\"index\":{\"_id\":\"some_id\"}}\n" + "{\"compressed\" : [ \"a\",\"b\"]}"); boost::iostreams::filtering_istream in; in.push(ml::api::CStateRestoreStreamFilter()); in.push(input); std::string output(std::istreambuf_iterator{in}, std::istreambuf_iterator{}); - std::string expected ("{\"_id\":\"some_id\",\"_version\":1,\"found\":true,\"_source\":" - "{\"compressed\" : [ \"a\",\"b\"]}}"); + std::string expected("{\"_id\":\"some_id\",\"_version\":1,\"found\":true,\"_source\":" + "{\"compressed\" : [ \"a\",\"b\"]}}"); expected += '\0'; expected += '\n'; @@ -50,14 +46,13 @@ void CStateRestoreStreamFilterTest::testBulkIndexHeaderRemoval() CPPUNIT_ASSERT_EQUAL(expected, output); } -void CStateRestoreStreamFilterTest::testBulkIndexHeaderRemovalZerobyte() -{ +void CStateRestoreStreamFilterTest::testBulkIndexHeaderRemovalZerobyte() { std::stringstream input; input << "{\"index\":{\"_id\":\"some_id\"}}\n"; input << "{\"compressed\" : [ \"a\",\"b\"]}\n"; input << '\0'; - input <<"{\"index\":{\"_id\":\"some_other_id\"}}\n"; + input << "{\"index\":{\"_id\":\"some_other_id\"}}\n"; input << "{\"compressed\" : [ \"c\",\"d\"]}\n"; boost::iostreams::filtering_istream in; @@ -65,8 +60,8 @@ void CStateRestoreStreamFilterTest::testBulkIndexHeaderRemovalZerobyte() in.push(input); std::string output(std::istreambuf_iterator{in}, std::istreambuf_iterator{}); - std::string expected ("{\"_id\":\"some_id\",\"_version\":1,\"found\":true,\"_source\":" - "{\"compressed\" : [ \"a\",\"b\"]}}"); + std::string expected("{\"_id\":\"some_id\",\"_version\":1,\"found\":true,\"_source\":" + "{\"compressed\" : [ \"a\",\"b\"]}}"); expected += '\0'; expected += '\n'; expected += "{\"_id\":\"some_other_id\",\"_version\":1,\"found\":true,\"_source\":" @@ -80,6 +75,3 @@ void CStateRestoreStreamFilterTest::testBulkIndexHeaderRemovalZerobyte() CPPUNIT_ASSERT_EQUAL(expected, output); } - - - diff --git a/lib/api/unittest/CStateRestoreStreamFilterTest.h b/lib/api/unittest/CStateRestoreStreamFilterTest.h index c57f972d9a..ef566ab6ba 100644 --- a/lib/api/unittest/CStateRestoreStreamFilterTest.h +++ b/lib/api/unittest/CStateRestoreStreamFilterTest.h @@ -8,14 +8,12 @@ #include +class CStateRestoreStreamFilterTest : public CppUnit::TestFixture { +public: + void testBulkIndexHeaderRemoval(); + void testBulkIndexHeaderRemovalZerobyte(); -class CStateRestoreStreamFilterTest : public CppUnit::TestFixture -{ - public: - void testBulkIndexHeaderRemoval(); - void testBulkIndexHeaderRemovalZerobyte(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CRestoreStreamFilterTest_h diff --git a/lib/api/unittest/CStringStoreTest.cc b/lib/api/unittest/CStringStoreTest.cc index decc98de46..75708376c3 100644 --- a/lib/api/unittest/CStringStoreTest.cc +++ b/lib/api/unittest/CStringStoreTest.cc @@ -25,22 +25,18 @@ using namespace ml; -namespace -{ -size_t countBuckets(const std::string &key, const std::string &output) -{ +namespace { +size_t countBuckets(const std::string& key, const std::string& output) { size_t count = 0; rapidjson::Document doc; doc.Parse(output); CPPUNIT_ASSERT(!doc.HasParseError()); CPPUNIT_ASSERT(doc.IsArray()); - const rapidjson::Value &allRecords = doc.GetArray(); - for (auto &r : allRecords.GetArray()) - { + const rapidjson::Value& allRecords = doc.GetArray(); + for (auto& r : allRecords.GetArray()) { rapidjson::Value::ConstMemberIterator recordsIt = r.GetObject().FindMember(key); - if (recordsIt != r.GetObject().MemberEnd()) - { + if (recordsIt != r.GetObject().MemberEnd()) { ++count; } } @@ -48,19 +44,15 @@ size_t countBuckets(const std::string &key, const std::string &output) return count; } -core_t::TTime playData(core_t::TTime start, core_t::TTime span, int numBuckets, - int numPeople, int numPartitions, int anomaly, - api::CAnomalyJob &job) -{ - std::string people[] = { "Elgar", "Holst", "Delius", "Vaughan Williams", "Bliss", "Warlock", "Walton" }; - if (numPeople > 7) - { +core_t::TTime +playData(core_t::TTime start, core_t::TTime span, int numBuckets, int numPeople, int numPartitions, int anomaly, api::CAnomalyJob& job) { + std::string people[] = {"Elgar", "Holst", "Delius", "Vaughan Williams", "Bliss", "Warlock", "Walton"}; + if (numPeople > 7) { LOG_ERROR("Too many people: " << numPeople); return start; } - std::string partitions[] = { "tuba", "flute", "violin", "triangle", "jew's harp" }; - if (numPartitions > 5) - { + std::string partitions[] = {"tuba", "flute", "violin", "triangle", "jew's harp"}; + if (numPartitions > 5) { LOG_ERROR("Too many partitions: " << numPartitions); return start; } @@ -68,68 +60,48 @@ core_t::TTime playData(core_t::TTime start, core_t::TTime span, int numBuckets, ss << "time,notes,composer,instrument\n"; core_t::TTime t; int bucketNum = 0; - for (t = start; t < start + span * numBuckets; t += span, bucketNum++) - { - for (int i = 0; i < numPeople; i++) - { - for (int j = 0; j < numPartitions; j++) - { + for (t = start; t < start + span * numBuckets; t += span, bucketNum++) { + for (int i = 0; i < numPeople; i++) { + for (int j = 0; j < numPartitions; j++) { ss << t << "," << (people[i].size() * partitions[j].size()) << ","; ss << people[i] << "," << partitions[j] << "\n"; } } - if (bucketNum == anomaly) - { + if (bucketNum == anomaly) { ss << t << "," << 5564 << "," << people[numPeople - 1] << "," << partitions[numPartitions - 1] << "\n"; } } api::CCsvInputParser parser(ss); - CPPUNIT_ASSERT(parser.readStream(boost::bind(&api::CAnomalyJob::handleRecord, - &job, - _1))); + CPPUNIT_ASSERT(parser.readStream(boost::bind(&api::CAnomalyJob::handleRecord, &job, _1))); return t; } //! Helper class to look up a string in core::CStoredStringPtr set -struct SLookup -{ - std::size_t operator()(const std::string &key) const - { +struct SLookup { + std::size_t operator()(const std::string& key) const { boost::hash hasher; return hasher(key); } - bool operator()(const std::string &lhs, - const core::CStoredStringPtr &rhs) const - { - return lhs == *rhs; - } + bool operator()(const std::string& lhs, const core::CStoredStringPtr& rhs) const { return lhs == *rhs; } }; - } // namespace -bool CStringStoreTest::nameExists(const std::string &string) -{ +bool CStringStoreTest::nameExists(const std::string& string) { model::CStringStore::TStoredStringPtrUSet names = model::CStringStore::names().m_Strings; - return names.find(string, - ::SLookup(), - ::SLookup()) != names.end(); + return names.find(string, ::SLookup(), ::SLookup()) != names.end(); } -bool CStringStoreTest::influencerExists(const std::string &string) -{ +bool CStringStoreTest::influencerExists(const std::string& string) { model::CStringStore::TStoredStringPtrUSet names = model::CStringStore::influencers().m_Strings; - return names.find(string, - ::SLookup(), - ::SLookup()) != names.end(); + return names.find(string, ::SLookup(), ::SLookup()) != names.end(); } -void CStringStoreTest::testPersonStringPruning() -{ +void CStringStoreTest::testPersonStringPruning() { core_t::TTime BUCKET_SPAN(10000); core_t::TTime time = 100000000; @@ -142,8 +114,7 @@ void CStringStoreTest::testPersonStringPruning() CPPUNIT_ASSERT(fieldConfig.initFromClause(clause)); - model::CAnomalyDetectorModelConfig modelConfig = - model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SPAN); + model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SPAN); modelConfig.decayRate(0.001); modelConfig.bucketResultsDelay(2); @@ -164,13 +135,9 @@ void CStringStoreTest::testPersonStringPruning() LOG_TRACE("Setting up job"); std::ostringstream outputStrm; - ml::core::CJsonOutputStreamWrapper wrappedOutputStream (outputStrm); + ml::core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - api::CAnomalyJob job("job", - limits, - fieldConfig, - modelConfig, - wrappedOutputStream); + api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); // There will be one anomaly in this batch, which will be stuck in the // results queue. @@ -213,13 +180,8 @@ void CStringStoreTest::testPersonStringPruning() CPPUNIT_ASSERT_EQUAL(std::size_t(0), model::CStringStore::names().m_Strings.size()); std::ostringstream outputStrm; - ml::core::CJsonOutputStreamWrapper wrappedOutputStream (outputStrm); - api::CAnomalyJob job("job", - limits, - fieldConfig, - modelConfig, - wrappedOutputStream, - api::CAnomalyJob::TPersistCompleteFunc()); + ml::core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); + api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream, api::CAnomalyJob::TPersistCompleteFunc()); core_t::TTime completeToTime(0); CPPUNIT_ASSERT(job.restoreState(searcher, completeToTime)); @@ -256,13 +218,8 @@ void CStringStoreTest::testPersonStringPruning() CPPUNIT_ASSERT_EQUAL(std::size_t(0), model::CStringStore::names().m_Strings.size()); std::ostringstream outputStrm; - ml::core::CJsonOutputStreamWrapper wrappedOutputStream (outputStrm); - api::CAnomalyJob job("job", - limits, - fieldConfig, - modelConfig, - wrappedOutputStream, - api::CAnomalyJob::TPersistCompleteFunc()); + ml::core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); + api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream, api::CAnomalyJob::TPersistCompleteFunc()); core_t::TTime completeToTime(0); CPPUNIT_ASSERT(job.restoreState(searcher, completeToTime)); @@ -300,13 +257,8 @@ void CStringStoreTest::testPersonStringPruning() CPPUNIT_ASSERT_EQUAL(std::size_t(0), model::CStringStore::names().m_Strings.size()); std::ostringstream outputStrm; - ml::core::CJsonOutputStreamWrapper wrappedOutputStream (outputStrm); - api::CAnomalyJob job("job", - limits, - fieldConfig, - modelConfig, - wrappedOutputStream, - api::CAnomalyJob::TPersistCompleteFunc()); + ml::core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); + api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream, api::CAnomalyJob::TPersistCompleteFunc()); core_t::TTime completeToTime(0); CPPUNIT_ASSERT(job.restoreState(searcher, completeToTime)); @@ -329,9 +281,7 @@ void CStringStoreTest::testPersonStringPruning() } } - -void CStringStoreTest::testAttributeStringPruning() -{ +void CStringStoreTest::testAttributeStringPruning() { core_t::TTime BUCKET_SPAN(10000); core_t::TTime time = 100000000; @@ -344,8 +294,7 @@ void CStringStoreTest::testAttributeStringPruning() CPPUNIT_ASSERT(fieldConfig.initFromClause(clause)); - model::CAnomalyDetectorModelConfig modelConfig = - model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SPAN); + model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SPAN); modelConfig.decayRate(0.001); modelConfig.bucketResultsDelay(2); @@ -365,13 +314,9 @@ void CStringStoreTest::testAttributeStringPruning() LOG_TRACE("Setting up job"); std::ostringstream outputStrm; - ml::core::CJsonOutputStreamWrapper wrappedOutputStream (outputStrm); + ml::core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - api::CAnomalyJob job("job", - limits, - fieldConfig, - modelConfig, - wrappedOutputStream); + api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); // There will be one anomaly in this batch, which will be stuck in the // results queue. @@ -402,7 +347,6 @@ void CStringStoreTest::testAttributeStringPruning() CPPUNIT_ASSERT(job.persistState(adder)); wrappedOutputStream.syncFlush(); CPPUNIT_ASSERT_EQUAL(std::size_t(1), countBuckets("records", outputStrm.str() + "]")); - } LOG_DEBUG("Restoring job"); { @@ -413,14 +357,9 @@ void CStringStoreTest::testAttributeStringPruning() CPPUNIT_ASSERT_EQUAL(std::size_t(0), model::CStringStore::names().m_Strings.size()); std::ostringstream outputStrm; - ml::core::CJsonOutputStreamWrapper wrappedOutputStream (outputStrm); + ml::core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - api::CAnomalyJob job("job", - limits, - fieldConfig, - modelConfig, - wrappedOutputStream, - api::CAnomalyJob::TPersistCompleteFunc()); + api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream, api::CAnomalyJob::TPersistCompleteFunc()); core_t::TTime completeToTime(0); CPPUNIT_ASSERT(job.restoreState(searcher, completeToTime)); @@ -457,14 +396,9 @@ void CStringStoreTest::testAttributeStringPruning() CPPUNIT_ASSERT_EQUAL(std::size_t(0), model::CStringStore::names().m_Strings.size()); std::ostringstream outputStrm; - ml::core::CJsonOutputStreamWrapper wrappedOutputStream (outputStrm); + ml::core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - api::CAnomalyJob job("job", - limits, - fieldConfig, - modelConfig, - wrappedOutputStream, - api::CAnomalyJob::TPersistCompleteFunc()); + api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream, api::CAnomalyJob::TPersistCompleteFunc()); core_t::TTime completeToTime(0); CPPUNIT_ASSERT(job.restoreState(searcher, completeToTime)); @@ -502,14 +436,9 @@ void CStringStoreTest::testAttributeStringPruning() CPPUNIT_ASSERT_EQUAL(std::size_t(0), model::CStringStore::names().m_Strings.size()); std::ostringstream outputStrm; - ml::core::CJsonOutputStreamWrapper wrappedOutputStream (outputStrm); + ml::core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - api::CAnomalyJob job("job", - limits, - fieldConfig, - modelConfig, - wrappedOutputStream, - api::CAnomalyJob::TPersistCompleteFunc()); + api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream, api::CAnomalyJob::TPersistCompleteFunc()); core_t::TTime completeToTime(0); CPPUNIT_ASSERT(job.restoreState(searcher, completeToTime)); @@ -529,13 +458,10 @@ void CStringStoreTest::testAttributeStringPruning() CPPUNIT_ASSERT(this->nameExists("flute")); CPPUNIT_ASSERT(this->nameExists("tuba")); CPPUNIT_ASSERT(!this->nameExists("Delius")); - } } - -void CStringStoreTest::testInfluencerStringPruning() -{ +void CStringStoreTest::testInfluencerStringPruning() { core_t::TTime BUCKET_SPAN(10000); core_t::TTime time = 100000000; @@ -547,8 +473,7 @@ void CStringStoreTest::testInfluencerStringPruning() CPPUNIT_ASSERT(fieldConfig.initFromClause(clause)); - model::CAnomalyDetectorModelConfig modelConfig = - model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SPAN); + model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SPAN); modelConfig.bucketResultsDelay(2); model::CLimits limits; @@ -567,13 +492,9 @@ void CStringStoreTest::testInfluencerStringPruning() LOG_TRACE("Setting up job"); std::ostringstream outputStrm; - ml::core::CJsonOutputStreamWrapper wrappedOutputStream (outputStrm); + ml::core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - api::CAnomalyJob job("job", - limits, - fieldConfig, - modelConfig, - wrappedOutputStream); + api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); // Play in a few buckets with influencers, and see that they stick around for // 3 buckets @@ -662,19 +583,14 @@ void CStringStoreTest::testInfluencerStringPruning() } } +CppUnit::Test* CStringStoreTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CStringStoreTest"); -CppUnit::Test* CStringStoreTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CStringStoreTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CStringStoreTest::testPersonStringPruning", - &CStringStoreTest::testPersonStringPruning) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CStringStoreTest::testAttributeStringPruning", - &CStringStoreTest::testAttributeStringPruning) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CStringStoreTest::testInfluencerStringPruning", - &CStringStoreTest::testInfluencerStringPruning) ); + suiteOfTests->addTest( + new CppUnit::TestCaller("CStringStoreTest::testPersonStringPruning", &CStringStoreTest::testPersonStringPruning)); + suiteOfTests->addTest(new CppUnit::TestCaller("CStringStoreTest::testAttributeStringPruning", + &CStringStoreTest::testAttributeStringPruning)); + suiteOfTests->addTest(new CppUnit::TestCaller("CStringStoreTest::testInfluencerStringPruning", + &CStringStoreTest::testInfluencerStringPruning)); return suiteOfTests; } diff --git a/lib/api/unittest/CStringStoreTest.h b/lib/api/unittest/CStringStoreTest.h index 882a30f272..5ce1e96396 100644 --- a/lib/api/unittest/CStringStoreTest.h +++ b/lib/api/unittest/CStringStoreTest.h @@ -10,19 +10,17 @@ #include -class CStringStoreTest : public CppUnit::TestFixture -{ - public: - void testPersonStringPruning(); - void testAttributeStringPruning(); - void testInfluencerStringPruning(); +class CStringStoreTest : public CppUnit::TestFixture { +public: + void testPersonStringPruning(); + void testAttributeStringPruning(); + void testInfluencerStringPruning(); - static CppUnit::Test *suite(); - - private: - bool nameExists(const std::string &string); - bool influencerExists(const std::string &string); + static CppUnit::Test* suite(); +private: + bool nameExists(const std::string& string); + bool influencerExists(const std::string& string); }; #endif // INCLUDED_CStringStoreTest_h diff --git a/lib/api/unittest/CTokenListDataTyperTest.cc b/lib/api/unittest/CTokenListDataTyperTest.cc index a1c4d15460..48c21a9b04 100644 --- a/lib/api/unittest/CTokenListDataTyperTest.cc +++ b/lib/api/unittest/CTokenListDataTyperTest.cc @@ -15,87 +15,66 @@ #include #include - -namespace -{ - -using TTokenListDataTyperKeepsFields = - ml::api::CTokenListDataTyper; +namespace { + +using TTokenListDataTyperKeepsFields = ml::api::CTokenListDataTyper; const TTokenListDataTyperKeepsFields::TTokenListReverseSearchCreatorIntfCPtr NO_REVERSE_SEARCH_CREATOR; - } -CppUnit::Test *CTokenListDataTyperTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CTokenListDataTyperTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTokenListDataTyperTest::testHexData", - &CTokenListDataTyperTest::testHexData) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTokenListDataTyperTest::testRmdsData", - &CTokenListDataTyperTest::testRmdsData) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTokenListDataTyperTest::testProxyData", - &CTokenListDataTyperTest::testProxyData) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTokenListDataTyperTest::testFxData", - &CTokenListDataTyperTest::testFxData) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTokenListDataTyperTest::testApacheData", - &CTokenListDataTyperTest::testApacheData) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTokenListDataTyperTest::testBrokerageData", - &CTokenListDataTyperTest::testBrokerageData) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTokenListDataTyperTest::testVmwareData", - &CTokenListDataTyperTest::testVmwareData) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTokenListDataTyperTest::testBankData", - &CTokenListDataTyperTest::testBankData) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTokenListDataTyperTest::testJavaGcData", - &CTokenListDataTyperTest::testJavaGcData) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTokenListDataTyperTest::testPersist", - &CTokenListDataTyperTest::testPersist) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTokenListDataTyperTest::testLongReverseSearch", - &CTokenListDataTyperTest::testLongReverseSearch) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTokenListDataTyperTest::testPreTokenised", - &CTokenListDataTyperTest::testPreTokenised) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTokenListDataTyperTest::testPreTokenisedPerformance", - &CTokenListDataTyperTest::testPreTokenisedPerformance) ); +CppUnit::Test* CTokenListDataTyperTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CTokenListDataTyperTest"); + + suiteOfTests->addTest( + new CppUnit::TestCaller("CTokenListDataTyperTest::testHexData", &CTokenListDataTyperTest::testHexData)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CTokenListDataTyperTest::testRmdsData", &CTokenListDataTyperTest::testRmdsData)); + suiteOfTests->addTest(new CppUnit::TestCaller("CTokenListDataTyperTest::testProxyData", + &CTokenListDataTyperTest::testProxyData)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CTokenListDataTyperTest::testFxData", &CTokenListDataTyperTest::testFxData)); + suiteOfTests->addTest(new CppUnit::TestCaller("CTokenListDataTyperTest::testApacheData", + &CTokenListDataTyperTest::testApacheData)); + suiteOfTests->addTest(new CppUnit::TestCaller("CTokenListDataTyperTest::testBrokerageData", + &CTokenListDataTyperTest::testBrokerageData)); + suiteOfTests->addTest(new CppUnit::TestCaller("CTokenListDataTyperTest::testVmwareData", + &CTokenListDataTyperTest::testVmwareData)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CTokenListDataTyperTest::testBankData", &CTokenListDataTyperTest::testBankData)); + suiteOfTests->addTest(new CppUnit::TestCaller("CTokenListDataTyperTest::testJavaGcData", + &CTokenListDataTyperTest::testJavaGcData)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CTokenListDataTyperTest::testPersist", &CTokenListDataTyperTest::testPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller("CTokenListDataTyperTest::testLongReverseSearch", + &CTokenListDataTyperTest::testLongReverseSearch)); + suiteOfTests->addTest(new CppUnit::TestCaller("CTokenListDataTyperTest::testPreTokenised", + &CTokenListDataTyperTest::testPreTokenised)); + suiteOfTests->addTest(new CppUnit::TestCaller("CTokenListDataTyperTest::testPreTokenisedPerformance", + &CTokenListDataTyperTest::testPreTokenisedPerformance)); return suiteOfTests; } -void CTokenListDataTyperTest::setUp() -{ +void CTokenListDataTyperTest::setUp() { // Enable trace level logging for these unit tests ml::core::CLogger::instance().setLoggingLevel(ml::core::CLogger::E_Trace); } -void CTokenListDataTyperTest::tearDown() -{ +void CTokenListDataTyperTest::tearDown() { // Revert to debug level logging for any subsequent unit tests ml::core::CLogger::instance().setLoggingLevel(ml::core::CLogger::E_Debug); } -void CTokenListDataTyperTest::testHexData() -{ +void CTokenListDataTyperTest::testHexData() { TTokenListDataTyperKeepsFields typer(NO_REVERSE_SEARCH_CREATOR, 0.7, "whatever"); CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, "[0x0000000800000000 ", 500)); @@ -105,8 +84,7 @@ void CTokenListDataTyperTest::testHexData() CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, " 0x0000000800000000,", 500)); } -void CTokenListDataTyperTest::testRmdsData() -{ +void CTokenListDataTyperTest::testRmdsData() { TTokenListDataTyperKeepsFields typer(NO_REVERSE_SEARCH_CREATOR, 0.7, "whatever"); CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, " Source ML_SERVICE2 on 13122:867 has shut down.", 500)); @@ -121,29 +99,68 @@ void CTokenListDataTyperTest::testRmdsData() CPPUNIT_ASSERT_EQUAL(4, typer.computeType(false, " Service CUBE_CHIX has shut down.", 500)); } -void CTokenListDataTyperTest::testProxyData() -{ +void CTokenListDataTyperTest::testProxyData() { TTokenListDataTyperKeepsFields typer(NO_REVERSE_SEARCH_CREATOR, 0.7, "whatever"); - CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, " [1094662464] INFO transaction <3c26701d3140-kn8n1c8f5d2o> - Transaction TID: z9hG4bKy6aEy6aEy6aEaUgi!UmU-Ma.9-6bf50ea0192.168.251.8SUBSCRIBE deleted", 500)); - CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, " [1091504448] INFO transaction <3c26701ad775-1cref2zy3w9e> - Transaction TID: z9hG4bK_UQA_UQA_UQAsO0i!OG!yYK.25-5bee09e0192.168.251.8SUBSCRIBE deleted", 500)); - CPPUNIT_ASSERT_EQUAL(2, typer.computeType(false, " [1094662464] INFO transactionuser <6508700927200972648@10.10.18.82> - ---------------- DESTROYING RegistrationServer ---------------", 500)); - CPPUNIT_ASSERT_EQUAL(3, typer.computeType(false, " [1111529792] INFO proxy <45409105041220090733@192.168.251.123> - +++++++++++++++ CREATING ProxyCore ++++++++++++++++", 500)); - CPPUNIT_ASSERT_EQUAL(4, typer.computeType(false, " [1091504448] INFO transactionuser <3c26709ab9f0-iih26eh8pxxa> - +++++++++++++++ CREATING PresenceAgent ++++++++++++++++", 500)); - CPPUNIT_ASSERT_EQUAL(5, typer.computeType(false, " [1111529792] INFO session <45409105041220090733@192.168.251.123> - ----------------- PROXY Session DESTROYED --------------------", 500)); - CPPUNIT_ASSERT_EQUAL(5, typer.computeType(false, " [1094662464] INFO session - ----------------- PROXY Session DESTROYED --------------------", 500)); + CPPUNIT_ASSERT_EQUAL(1, + typer.computeType(false, + " [1094662464] INFO transaction <3c26701d3140-kn8n1c8f5d2o> - Transaction TID: " + "z9hG4bKy6aEy6aEy6aEaUgi!UmU-Ma.9-6bf50ea0192.168.251.8SUBSCRIBE deleted", + 500)); + CPPUNIT_ASSERT_EQUAL(1, + typer.computeType(false, + " [1091504448] INFO transaction <3c26701ad775-1cref2zy3w9e> - Transaction TID: " + "z9hG4bK_UQA_UQA_UQAsO0i!OG!yYK.25-5bee09e0192.168.251.8SUBSCRIBE deleted", + 500)); + CPPUNIT_ASSERT_EQUAL(2, + typer.computeType(false, + " [1094662464] INFO transactionuser <6508700927200972648@10.10.18.82> - ---------------- " + "DESTROYING RegistrationServer ---------------", + 500)); + CPPUNIT_ASSERT_EQUAL( + 3, + typer.computeType( + false, + " [1111529792] INFO proxy <45409105041220090733@192.168.251.123> - +++++++++++++++ CREATING ProxyCore ++++++++++++++++", + 500)); + CPPUNIT_ASSERT_EQUAL( + 4, + typer.computeType( + false, + " [1091504448] INFO transactionuser <3c26709ab9f0-iih26eh8pxxa> - +++++++++++++++ CREATING PresenceAgent ++++++++++++++++", + 500)); + CPPUNIT_ASSERT_EQUAL(5, + typer.computeType(false, + " [1111529792] INFO session <45409105041220090733@192.168.251.123> - ----------------- PROXY " + "Session DESTROYED --------------------", + 500)); + CPPUNIT_ASSERT_EQUAL(5, + typer.computeType(false, + " [1094662464] INFO session - ----------------- " + "PROXY Session DESTROYED --------------------", + 500)); } -void CTokenListDataTyperTest::testFxData() -{ +void CTokenListDataTyperTest::testFxData() { TTokenListDataTyperKeepsFields typer(NO_REVERSE_SEARCH_CREATOR, 0.7, "whatever"); - CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, "javax.ejb.FinderException - findFxCover([]): null", 500)); - CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, "javax.ejb.FinderException - findFxCover([]): null", 500)); + CPPUNIT_ASSERT_EQUAL( + 1, + typer.computeType(false, + "javax.ejb.FinderException - findFxCover([]): " + "null", + 500)); + CPPUNIT_ASSERT_EQUAL( + 1, + typer.computeType(false, + "javax.ejb.FinderException - findFxCover([]): " + "null", + 500)); } -void CTokenListDataTyperTest::testApacheData() -{ +void CTokenListDataTyperTest::testApacheData() { TTokenListDataTyperKeepsFields typer(NO_REVERSE_SEARCH_CREATOR, 0.7, "whatever"); CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, " org.apache.coyote.http11.Http11BaseProtocol destroy", 500)); @@ -152,41 +169,86 @@ void CTokenListDataTyperTest::testApacheData() CPPUNIT_ASSERT_EQUAL(4, typer.computeType(false, " org.apache.coyote.http11.Http11BaseProtocol stop", 500)); } -void CTokenListDataTyperTest::testBrokerageData() -{ +void CTokenListDataTyperTest::testBrokerageData() { TTokenListDataTyperKeepsFields typer(NO_REVERSE_SEARCH_CREATOR, 0.7, "whatever"); - CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, "AUDIT ; tomcat-http--16; ee96c0c4567c0c11d6b90f9bc8b54aaa77; REQ4e42023e0a0328d020003e460005aa33; applnx911.elastic.co; ; Request Complete: /mlgw/mlb/ofsummary/summary [T=283ms,CUSTPREF-WEB_ACCOUNT_PREFERENCES=95,MAUI-ETSPROF2=155,NBMSG-NB_MESSAGING_SERVICE=164,CustAcctProfile=BRK=2;NB=0;FILI=0;CESG=0;CC=0;AcctTotal=2,migrated=2]", 500)); - CPPUNIT_ASSERT_EQUAL(2, typer.computeType(false, "AUDIT ; tomcat-http--39; ee763e95747c0b11d6b90f9bc8b54aaa77; REQ4e42023e0a0429a020000c6f0002aa33; applnx811.elastic.co; ; Request Complete: /mlgw/mlb/ofaccounts/brokerageAccountHistory [T=414ms,CUSTPREF-INS_PERSON_WEB_ACCT_PREFERENCES=298,MAUI-PSL04XD=108]", 500)); - CPPUNIT_ASSERT_EQUAL(3, typer.computeType(false, "AUDIT ; tomcat-http--39; ee256201da7c0c11d6b90f9bc8b54aaa77; REQ4e42023b0a022925200027180002aa33; applnx711.elastic.co; ; Request Complete: /mlgw/mlb/ofpositions/brokerageAccountPositionsIframe [T=90ms,CacheStore-GetAttribute=5,MAUI-ECAPPOS=50,RR-QUOTE_TRANSACTION=11]", 500)); + CPPUNIT_ASSERT_EQUAL( + 1, + typer.computeType(false, + "AUDIT ; tomcat-http--16; ee96c0c4567c0c11d6b90f9bc8b54aaa77; REQ4e42023e0a0328d020003e460005aa33; " + "applnx911.elastic.co; ; Request Complete: /mlgw/mlb/ofsummary/summary " + "[T=283ms,CUSTPREF-WEB_ACCOUNT_PREFERENCES=95,MAUI-ETSPROF2=155,NBMSG-NB_MESSAGING_SERVICE=164,CustAcctProfile=" + "BRK=2;NB=0;FILI=0;CESG=0;CC=0;AcctTotal=2,migrated=2]", + 500)); + CPPUNIT_ASSERT_EQUAL( + 2, + typer.computeType(false, + "AUDIT ; tomcat-http--39; ee763e95747c0b11d6b90f9bc8b54aaa77; REQ4e42023e0a0429a020000c6f0002aa33; " + "applnx811.elastic.co; ; Request Complete: /mlgw/mlb/ofaccounts/brokerageAccountHistory " + "[T=414ms,CUSTPREF-INS_PERSON_WEB_ACCT_PREFERENCES=298,MAUI-PSL04XD=108]", + 500)); + CPPUNIT_ASSERT_EQUAL( + 3, + typer.computeType(false, + "AUDIT ; tomcat-http--39; ee256201da7c0c11d6b90f9bc8b54aaa77; REQ4e42023b0a022925200027180002aa33; " + "applnx711.elastic.co; ; Request Complete: /mlgw/mlb/ofpositions/brokerageAccountPositionsIframe " + "[T=90ms,CacheStore-GetAttribute=5,MAUI-ECAPPOS=50,RR-QUOTE_TRANSACTION=11]", + 500)); } -void CTokenListDataTyperTest::testVmwareData() -{ +void CTokenListDataTyperTest::testVmwareData() { TTokenListDataTyperKeepsFields typer(NO_REVERSE_SEARCH_CREATOR, 0.7, "whatever"); - CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, "Vpxa: [49EC0B90 verbose 'VpxaHalCnxHostagent' opID=WFU-ddeadb59] [WaitForUpdatesDone] Received callback", 103)); - CPPUNIT_ASSERT_EQUAL(2, typer.computeType(false, "Vpxa: [49EC0B90 verbose 'Default' opID=WFU-ddeadb59] [VpxaHalVmHostagent] 11: GuestInfo changed 'guest.disk", 107)); - CPPUNIT_ASSERT_EQUAL(3, typer.computeType(false, "Vpxa: [49EC0B90 verbose 'VpxaHalCnxHostagent' opID=WFU-ddeadb59] [WaitForUpdatesDone] Completed callback", 104)); - CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, "Vpxa: [49EC0B90 verbose 'VpxaHalCnxHostagent' opID=WFU-35689729] [WaitForUpdatesDone] Received callback", 103)); - CPPUNIT_ASSERT_EQUAL(2, typer.computeType(false, "Vpxa: [49EC0B90 verbose 'Default' opID=WFU-35689729] [VpxaHalVmHostagent] 15: GuestInfo changed 'guest.disk", 107)); - CPPUNIT_ASSERT_EQUAL(3, typer.computeType(false, "Vpxa: [49EC0B90 verbose 'VpxaHalCnxHostagent' opID=WFU-35689729] [WaitForUpdatesDone] Completed callback", 104)); + CPPUNIT_ASSERT_EQUAL( + 1, + typer.computeType( + false, "Vpxa: [49EC0B90 verbose 'VpxaHalCnxHostagent' opID=WFU-ddeadb59] [WaitForUpdatesDone] Received callback", 103)); + CPPUNIT_ASSERT_EQUAL( + 2, + typer.computeType( + false, "Vpxa: [49EC0B90 verbose 'Default' opID=WFU-ddeadb59] [VpxaHalVmHostagent] 11: GuestInfo changed 'guest.disk", 107)); + CPPUNIT_ASSERT_EQUAL( + 3, + typer.computeType( + false, "Vpxa: [49EC0B90 verbose 'VpxaHalCnxHostagent' opID=WFU-ddeadb59] [WaitForUpdatesDone] Completed callback", 104)); + CPPUNIT_ASSERT_EQUAL( + 1, + typer.computeType( + false, "Vpxa: [49EC0B90 verbose 'VpxaHalCnxHostagent' opID=WFU-35689729] [WaitForUpdatesDone] Received callback", 103)); + CPPUNIT_ASSERT_EQUAL( + 2, + typer.computeType( + false, "Vpxa: [49EC0B90 verbose 'Default' opID=WFU-35689729] [VpxaHalVmHostagent] 15: GuestInfo changed 'guest.disk", 107)); + CPPUNIT_ASSERT_EQUAL( + 3, + typer.computeType( + false, "Vpxa: [49EC0B90 verbose 'VpxaHalCnxHostagent' opID=WFU-35689729] [WaitForUpdatesDone] Completed callback", 104)); } -void CTokenListDataTyperTest::testBankData() -{ +void CTokenListDataTyperTest::testBankData() { TTokenListDataTyperKeepsFields typer(NO_REVERSE_SEARCH_CREATOR, 0.7, "whatever"); - CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, "INFO [co.elastic.settlement.synchronization.PaymentFlowProcessorImpl] Process payment flow for tradeId=80894728 and backOfficeId=9354474", 500)); - CPPUNIT_ASSERT_EQUAL(2, typer.computeType(false, "INFO [co.elastic.settlement.synchronization.PaymentFlowProcessorImpl] Synchronization of payment flow is complete for tradeId=80013186 and backOfficeId=265573", 500)); + CPPUNIT_ASSERT_EQUAL(1, + typer.computeType(false, + "INFO [co.elastic.settlement.synchronization.PaymentFlowProcessorImpl] Process payment flow " + "for tradeId=80894728 and backOfficeId=9354474", + 500)); + CPPUNIT_ASSERT_EQUAL(2, + typer.computeType(false, + "INFO [co.elastic.settlement.synchronization.PaymentFlowProcessorImpl] Synchronization of " + "payment flow is complete for tradeId=80013186 and backOfficeId=265573", + 500)); // This is not great, but it's tricky when only 1 word differs from the // first type - CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, "INFO [co.elastic.settlement.synchronization.PaymentFlowProcessorImpl] Synchronize payment flow for tradeId=80894721 and backOfficeId=9354469", 500)); + CPPUNIT_ASSERT_EQUAL(1, + typer.computeType(false, + "INFO [co.elastic.settlement.synchronization.PaymentFlowProcessorImpl] Synchronize payment " + "flow for tradeId=80894721 and backOfficeId=9354469", + 500)); } -void CTokenListDataTyperTest::testJavaGcData() -{ +void CTokenListDataTyperTest::testJavaGcData() { TTokenListDataTyperKeepsFields typer(NO_REVERSE_SEARCH_CREATOR, 0.7, "whatever"); CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, "2016-04-27T19:57:43.644-0700: 1922084.903: [GC", 46)); @@ -203,17 +265,37 @@ void CTokenListDataTyperTest::testJavaGcData() CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, "2016-04-30T19:57:43.646-0700: 1922087.906: [GC", 46)); CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, "2016-04-30T19:57:43.647-0700: 1922087.906: [GC", 46)); - CPPUNIT_ASSERT_EQUAL(2, typer.computeType(false, "PSYoungGen total 2572800K, used 1759355K [0x0000000759500000, 0x0000000800000000, 0x0000000800000000)", 106)); - CPPUNIT_ASSERT_EQUAL(2, typer.computeType(false, "PSYoungGen total 2572801K, used 1759355K [0x0000000759500000, 0x0000000800000000, 0x0000000800000000)", 106)); - CPPUNIT_ASSERT_EQUAL(2, typer.computeType(false, "PSYoungGen total 2572802K, used 1759355K [0x0000000759500000, 0x0000000800000000, 0x0000000800000000)", 106)); - CPPUNIT_ASSERT_EQUAL(2, typer.computeType(false, "PSYoungGen total 2572803K, used 1759355K [0x0000000759500000, 0x0000000800000000, 0x0000000800000000)", 106)); - CPPUNIT_ASSERT_EQUAL(2, typer.computeType(false, "PSYoungGen total 2572803K, used 1759355K [0x0000000759600000, 0x0000000800000000, 0x0000000800000000)", 106)); - CPPUNIT_ASSERT_EQUAL(2, typer.computeType(false, "PSYoungGen total 2572803K, used 1759355K [0x0000000759700000, 0x0000000800000000, 0x0000000800000000)", 106)); - CPPUNIT_ASSERT_EQUAL(2, typer.computeType(false, "PSYoungGen total 2572803K, used 1759355K [0x0000000759800000, 0x0000000800000000, 0x0000000800000000)", 106)); + CPPUNIT_ASSERT_EQUAL( + 2, + typer.computeType( + false, "PSYoungGen total 2572800K, used 1759355K [0x0000000759500000, 0x0000000800000000, 0x0000000800000000)", 106)); + CPPUNIT_ASSERT_EQUAL( + 2, + typer.computeType( + false, "PSYoungGen total 2572801K, used 1759355K [0x0000000759500000, 0x0000000800000000, 0x0000000800000000)", 106)); + CPPUNIT_ASSERT_EQUAL( + 2, + typer.computeType( + false, "PSYoungGen total 2572802K, used 1759355K [0x0000000759500000, 0x0000000800000000, 0x0000000800000000)", 106)); + CPPUNIT_ASSERT_EQUAL( + 2, + typer.computeType( + false, "PSYoungGen total 2572803K, used 1759355K [0x0000000759500000, 0x0000000800000000, 0x0000000800000000)", 106)); + CPPUNIT_ASSERT_EQUAL( + 2, + typer.computeType( + false, "PSYoungGen total 2572803K, used 1759355K [0x0000000759600000, 0x0000000800000000, 0x0000000800000000)", 106)); + CPPUNIT_ASSERT_EQUAL( + 2, + typer.computeType( + false, "PSYoungGen total 2572803K, used 1759355K [0x0000000759700000, 0x0000000800000000, 0x0000000800000000)", 106)); + CPPUNIT_ASSERT_EQUAL( + 2, + typer.computeType( + false, "PSYoungGen total 2572803K, used 1759355K [0x0000000759800000, 0x0000000800000000, 0x0000000800000000)", 106)); } -void CTokenListDataTyperTest::testPersist() -{ +void CTokenListDataTyperTest::testPersist() { TTokenListDataTyperKeepsFields origTyper(NO_REVERSE_SEARCH_CREATOR, 0.7, "whatever"); origTyper.computeType(false, " Source ML_SERVICE2 on 13122:867 has shut down.", 500); @@ -242,9 +324,8 @@ void CTokenListDataTyperTest::testPersist() ml::core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); ml::core::CRapidXmlStateRestoreTraverser traverser(parser); - CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind(&TTokenListDataTyperKeepsFields::acceptRestoreTraverser, - &restoredTyper, - _1))); + CPPUNIT_ASSERT( + traverser.traverseSubLevel(boost::bind(&TTokenListDataTyperKeepsFields::acceptRestoreTraverser, &restoredTyper, _1))); } // The XML representation of the new typer should be the same as the original @@ -257,19 +338,16 @@ void CTokenListDataTyperTest::testPersist() CPPUNIT_ASSERT_EQUAL(origXml, newXml); } -void CTokenListDataTyperTest::testLongReverseSearch() -{ - TTokenListDataTyperKeepsFields::TTokenListReverseSearchCreatorIntfCPtr - reverseSearchCreator(new ml::api::CTokenListReverseSearchCreator("_raw")); +void CTokenListDataTyperTest::testLongReverseSearch() { + TTokenListDataTyperKeepsFields::TTokenListReverseSearchCreatorIntfCPtr reverseSearchCreator( + new ml::api::CTokenListReverseSearchCreator("_raw")); TTokenListDataTyperKeepsFields typer(reverseSearchCreator, 0.7, "_raw"); // Create a long message with lots of junk that will create a ridiculous // reverse search if not constrained std::string longMessage("a few dictionary words to start off"); - for (size_t i = 1; i < 26; ++i) - { - for (size_t j = 0; j <= i; ++j) - { + for (size_t i = 1; i < 26; ++i) { + for (size_t j = 0; j <= i; ++j) { longMessage += ' '; longMessage.append(20, char('a' + j)); } @@ -307,8 +385,7 @@ void CTokenListDataTyperTest::testLongReverseSearch() CPPUNIT_ASSERT(terms.find("off") != std::string::npos); } -void CTokenListDataTyperTest::testPreTokenised() -{ +void CTokenListDataTyperTest::testPreTokenised() { TTokenListDataTyperKeepsFields typer(NO_REVERSE_SEARCH_CREATOR, 0.7, "whatever"); CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, " Source ML_SERVICE2 on 13122:867 has shut down.", 500)); @@ -345,8 +422,7 @@ void CTokenListDataTyperTest::testPreTokenised() CPPUNIT_ASSERT_EQUAL(5, typer.computeType(false, fields, " Service CUBE_CHIX has shut down.", 500)); } -void CTokenListDataTyperTest::testPreTokenisedPerformance() -{ +void CTokenListDataTyperTest::testPreTokenisedPerformance() { static const size_t TEST_SIZE(100000); ml::core::CStopWatch stopWatch; @@ -357,9 +433,11 @@ void CTokenListDataTyperTest::testPreTokenisedPerformance() LOG_DEBUG("Before test with inline tokenisation"); stopWatch.start(); - for (size_t count = 0; count < TEST_SIZE; ++count) - { - CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, "Vpxa: [49EC0B90 verbose 'VpxaHalCnxHostagent' opID=WFU-ddeadb59] [WaitForUpdatesDone] Received callback", 103)); + for (size_t count = 0; count < TEST_SIZE; ++count) { + CPPUNIT_ASSERT_EQUAL( + 1, + typer.computeType( + false, "Vpxa: [49EC0B90 verbose 'VpxaHalCnxHostagent' opID=WFU-ddeadb59] [WaitForUpdatesDone] Received callback", 103)); } inlineTokenisationTime = stopWatch.stop(); @@ -370,7 +448,8 @@ void CTokenListDataTyperTest::testPreTokenisedPerformance() stopWatch.reset(); TTokenListDataTyperKeepsFields::TStrStrUMap fields; - fields[TTokenListDataTyperKeepsFields::PRETOKENISED_TOKEN_FIELD] = "Vpxa,verbose,VpxaHalCnxHostagent,opID,WFU-ddeadb59,WaitForUpdatesDone,Received,callback"; + fields[TTokenListDataTyperKeepsFields::PRETOKENISED_TOKEN_FIELD] = + "Vpxa,verbose,VpxaHalCnxHostagent,opID,WFU-ddeadb59,WaitForUpdatesDone,Received,callback"; uint64_t preTokenisationTime(0); { @@ -379,9 +458,13 @@ void CTokenListDataTyperTest::testPreTokenisedPerformance() LOG_DEBUG("Before test with pre-tokenisation"); stopWatch.start(); - for (size_t count = 0; count < TEST_SIZE; ++count) - { - CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, fields, "Vpxa: [49EC0B90 verbose 'VpxaHalCnxHostagent' opID=WFU-ddeadb59] [WaitForUpdatesDone] Received callback", 103)); + for (size_t count = 0; count < TEST_SIZE; ++count) { + CPPUNIT_ASSERT_EQUAL( + 1, + typer.computeType(false, + fields, + "Vpxa: [49EC0B90 verbose 'VpxaHalCnxHostagent' opID=WFU-ddeadb59] [WaitForUpdatesDone] Received callback", + 103)); } preTokenisationTime = stopWatch.stop(); @@ -391,4 +474,3 @@ void CTokenListDataTyperTest::testPreTokenisedPerformance() CPPUNIT_ASSERT(preTokenisationTime <= inlineTokenisationTime); } - diff --git a/lib/api/unittest/CTokenListDataTyperTest.h b/lib/api/unittest/CTokenListDataTyperTest.h index f583888357..d7c6aacb57 100644 --- a/lib/api/unittest/CTokenListDataTyperTest.h +++ b/lib/api/unittest/CTokenListDataTyperTest.h @@ -8,29 +8,26 @@ #include +class CTokenListDataTyperTest : public CppUnit::TestFixture { +public: + void testHexData(); + void testRmdsData(); + void testProxyData(); + void testFxData(); + void testApacheData(); + void testBrokerageData(); + void testVmwareData(); + void testBankData(); + void testJavaGcData(); + void testPersist(); + void testLongReverseSearch(); + void testPreTokenised(); + void testPreTokenisedPerformance(); -class CTokenListDataTyperTest : public CppUnit::TestFixture -{ - public: - void testHexData(); - void testRmdsData(); - void testProxyData(); - void testFxData(); - void testApacheData(); - void testBrokerageData(); - void testVmwareData(); - void testBankData(); - void testJavaGcData(); - void testPersist(); - void testLongReverseSearch(); - void testPreTokenised(); - void testPreTokenisedPerformance(); + void setUp(); + void tearDown(); - void setUp(); - void tearDown(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CTokenListDataTyperTest_h - diff --git a/lib/api/unittest/CTokenListReverseSearchCreatorTest.cc b/lib/api/unittest/CTokenListReverseSearchCreatorTest.cc index 0869d40518..0142a33081 100644 --- a/lib/api/unittest/CTokenListReverseSearchCreatorTest.cc +++ b/lib/api/unittest/CTokenListReverseSearchCreatorTest.cc @@ -10,43 +10,34 @@ using namespace ml; using namespace api; -CppUnit::Test *CTokenListReverseSearchCreatorTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CTokenListReverseSearchCreatorTest"); +CppUnit::Test* CTokenListReverseSearchCreatorTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CTokenListReverseSearchCreatorTest"); suiteOfTests->addTest(new CppUnit::TestCaller( - "CTokenListReverseSearchCreatorTest::testCostOfToken", - &CTokenListReverseSearchCreatorTest::testCostOfToken) ); + "CTokenListReverseSearchCreatorTest::testCostOfToken", &CTokenListReverseSearchCreatorTest::testCostOfToken)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CTokenListReverseSearchCreatorTest::testCreateNullSearch", - &CTokenListReverseSearchCreatorTest::testCreateNullSearch) ); + "CTokenListReverseSearchCreatorTest::testCreateNullSearch", &CTokenListReverseSearchCreatorTest::testCreateNullSearch)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CTokenListReverseSearchCreatorTest::testCreateNoUniqueTokenSearch", + &CTokenListReverseSearchCreatorTest::testCreateNoUniqueTokenSearch)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CTokenListReverseSearchCreatorTest::testCreateNoUniqueTokenSearch", - &CTokenListReverseSearchCreatorTest::testCreateNoUniqueTokenSearch) ); + "CTokenListReverseSearchCreatorTest::testInitStandardSearch", &CTokenListReverseSearchCreatorTest::testInitStandardSearch)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CTokenListReverseSearchCreatorTest::testInitStandardSearch", - &CTokenListReverseSearchCreatorTest::testInitStandardSearch) ); + "CTokenListReverseSearchCreatorTest::testAddCommonUniqueToken", &CTokenListReverseSearchCreatorTest::testAddCommonUniqueToken)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CTokenListReverseSearchCreatorTest::testAddCommonUniqueToken", - &CTokenListReverseSearchCreatorTest::testAddCommonUniqueToken) ); + "CTokenListReverseSearchCreatorTest::testAddInOrderCommonToken", &CTokenListReverseSearchCreatorTest::testAddInOrderCommonToken)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CTokenListReverseSearchCreatorTest::testAddInOrderCommonToken", - &CTokenListReverseSearchCreatorTest::testAddInOrderCommonToken) ); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CTokenListReverseSearchCreatorTest::testCloseStandardSearch", - &CTokenListReverseSearchCreatorTest::testCloseStandardSearch) ); + "CTokenListReverseSearchCreatorTest::testCloseStandardSearch", &CTokenListReverseSearchCreatorTest::testCloseStandardSearch)); return suiteOfTests; } -void CTokenListReverseSearchCreatorTest::testCostOfToken() -{ +void CTokenListReverseSearchCreatorTest::testCostOfToken() { CTokenListReverseSearchCreator reverseSearchCreator("foo"); CPPUNIT_ASSERT_EQUAL(std::size_t(110), reverseSearchCreator.costOfToken("someToken", 5)); } -void CTokenListReverseSearchCreatorTest::testCreateNullSearch() -{ +void CTokenListReverseSearchCreatorTest::testCreateNullSearch() { CTokenListReverseSearchCreator reverseSearchCreator("foo"); std::string reverseSearchPart1; @@ -58,42 +49,31 @@ void CTokenListReverseSearchCreatorTest::testCreateNullSearch() CPPUNIT_ASSERT_EQUAL(std::string(""), reverseSearchPart2); } -void CTokenListReverseSearchCreatorTest::testCreateNoUniqueTokenSearch() -{ +void CTokenListReverseSearchCreatorTest::testCreateNoUniqueTokenSearch() { CTokenListReverseSearchCreator reverseSearchCreator("status"); std::string reverseSearchPart1; std::string reverseSearchPart2; - CPPUNIT_ASSERT(reverseSearchCreator.createNoUniqueTokenSearch(1, - "404", - 4, - reverseSearchPart1, - reverseSearchPart2)); + CPPUNIT_ASSERT(reverseSearchCreator.createNoUniqueTokenSearch(1, "404", 4, reverseSearchPart1, reverseSearchPart2)); CPPUNIT_ASSERT_EQUAL(std::string(""), reverseSearchPart1); CPPUNIT_ASSERT_EQUAL(std::string(""), reverseSearchPart2); } -void CTokenListReverseSearchCreatorTest::testInitStandardSearch() -{ +void CTokenListReverseSearchCreatorTest::testInitStandardSearch() { CTokenListReverseSearchCreator reverseSearchCreator("foo"); std::string reverseSearchPart1; std::string reverseSearchPart2; - reverseSearchCreator.initStandardSearch(1, - "User 'foo' logged in host '0.0.0.0'", - 1, - reverseSearchPart1, - reverseSearchPart2); + reverseSearchCreator.initStandardSearch(1, "User 'foo' logged in host '0.0.0.0'", 1, reverseSearchPart1, reverseSearchPart2); CPPUNIT_ASSERT_EQUAL(std::string(""), reverseSearchPart1); CPPUNIT_ASSERT_EQUAL(std::string(""), reverseSearchPart2); } -void CTokenListReverseSearchCreatorTest::testAddCommonUniqueToken() -{ +void CTokenListReverseSearchCreatorTest::testAddCommonUniqueToken() { CTokenListReverseSearchCreator reverseSearchCreator("foo"); std::string reverseSearchPart1; @@ -106,8 +86,7 @@ void CTokenListReverseSearchCreatorTest::testAddCommonUniqueToken() CPPUNIT_ASSERT_EQUAL(std::string(""), reverseSearchPart2); } -void CTokenListReverseSearchCreatorTest::testAddInOrderCommonToken() -{ +void CTokenListReverseSearchCreatorTest::testAddInOrderCommonToken() { CTokenListReverseSearchCreator reverseSearchCreator("foo"); std::string reverseSearchPart1; @@ -122,8 +101,7 @@ void CTokenListReverseSearchCreatorTest::testAddInOrderCommonToken() CPPUNIT_ASSERT_EQUAL(std::string(".*?user.+?logged.+?b=0\\.15\\+a.+?logged"), reverseSearchPart2); } -void CTokenListReverseSearchCreatorTest::testCloseStandardSearch() -{ +void CTokenListReverseSearchCreatorTest::testCloseStandardSearch() { CTokenListReverseSearchCreator reverseSearchCreator("foo"); std::string reverseSearchPart1; diff --git a/lib/api/unittest/CTokenListReverseSearchCreatorTest.h b/lib/api/unittest/CTokenListReverseSearchCreatorTest.h index e21b7e9b4f..fb48517680 100644 --- a/lib/api/unittest/CTokenListReverseSearchCreatorTest.h +++ b/lib/api/unittest/CTokenListReverseSearchCreatorTest.h @@ -8,20 +8,17 @@ #include +class CTokenListReverseSearchCreatorTest : public CppUnit::TestFixture { +public: + void testCostOfToken(); + void testCreateNullSearch(); + void testCreateNoUniqueTokenSearch(); + void testInitStandardSearch(); + void testAddCommonUniqueToken(); + void testAddInOrderCommonToken(); + void testCloseStandardSearch(); -class CTokenListReverseSearchCreatorTest : public CppUnit::TestFixture -{ - public: - void testCostOfToken(); - void testCreateNullSearch(); - void testCreateNoUniqueTokenSearch(); - void testInitStandardSearch(); - void testAddCommonUniqueToken(); - void testAddInOrderCommonToken(); - void testCloseStandardSearch(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CTokenListReverseSearchCreatorTest_h - diff --git a/lib/api/unittest/Main.cc b/lib/api/unittest/Main.cc index 7380abfbb1..67cfa0ea65 100644 --- a/lib/api/unittest/Main.cc +++ b/lib/api/unittest/Main.cc @@ -35,40 +35,37 @@ #include "CTokenListDataTyperTest.h" #include "CTokenListReverseSearchCreatorTest.h" - -int main(int argc, const char **argv) -{ +int main(int argc, const char** argv) { ml::test::CTestRunner runner(argc, argv); - runner.addTest( CAnomalyJobLimitTest::suite() ); - runner.addTest( CAnomalyJobTest::suite() ); - runner.addTest( CBackgroundPersisterTest::suite() ); - runner.addTest( CBaseTokenListDataTyperTest::suite() ); - runner.addTest( CCategoryExamplesCollectorTest::suite() ); - runner.addTest( CConfigUpdaterTest::suite() ); - runner.addTest( CCsvInputParserTest::suite() ); - runner.addTest( CCsvOutputWriterTest::suite() ); - runner.addTest( CDetectionRulesJsonParserTest::suite() ); - runner.addTest( CFieldConfigTest::suite() ); - runner.addTest( CFieldDataTyperTest::suite() ); - runner.addTest( CForecastRunnerTest::suite() ); - runner.addTest( CIoManagerTest::suite() ); - runner.addTest( CJsonOutputWriterTest::suite() ); - runner.addTest( CLengthEncodedInputParserTest::suite() ); - runner.addTest( CLineifiedJsonInputParserTest::suite() ); - runner.addTest( CLineifiedJsonOutputWriterTest::suite() ); - runner.addTest( CLineifiedXmlInputParserTest::suite() ); - runner.addTest( CModelPlotDataJsonWriterTest::suite() ); - runner.addTest( CModelSnapshotJsonWriterTest::suite() ); - runner.addTest( CMultiFileDataAdderTest::suite() ); - runner.addTest( COutputChainerTest::suite() ); - runner.addTest( CRestorePreviousStateTest::suite() ); - runner.addTest( CResultNormalizerTest::suite() ); - runner.addTest( CSingleStreamDataAdderTest::suite() ); - runner.addTest( CStringStoreTest::suite() ); - runner.addTest( CTokenListDataTyperTest::suite() ); - runner.addTest( CTokenListReverseSearchCreatorTest::suite() ); + runner.addTest(CAnomalyJobLimitTest::suite()); + runner.addTest(CAnomalyJobTest::suite()); + runner.addTest(CBackgroundPersisterTest::suite()); + runner.addTest(CBaseTokenListDataTyperTest::suite()); + runner.addTest(CCategoryExamplesCollectorTest::suite()); + runner.addTest(CConfigUpdaterTest::suite()); + runner.addTest(CCsvInputParserTest::suite()); + runner.addTest(CCsvOutputWriterTest::suite()); + runner.addTest(CDetectionRulesJsonParserTest::suite()); + runner.addTest(CFieldConfigTest::suite()); + runner.addTest(CFieldDataTyperTest::suite()); + runner.addTest(CForecastRunnerTest::suite()); + runner.addTest(CIoManagerTest::suite()); + runner.addTest(CJsonOutputWriterTest::suite()); + runner.addTest(CLengthEncodedInputParserTest::suite()); + runner.addTest(CLineifiedJsonInputParserTest::suite()); + runner.addTest(CLineifiedJsonOutputWriterTest::suite()); + runner.addTest(CLineifiedXmlInputParserTest::suite()); + runner.addTest(CModelPlotDataJsonWriterTest::suite()); + runner.addTest(CModelSnapshotJsonWriterTest::suite()); + runner.addTest(CMultiFileDataAdderTest::suite()); + runner.addTest(COutputChainerTest::suite()); + runner.addTest(CRestorePreviousStateTest::suite()); + runner.addTest(CResultNormalizerTest::suite()); + runner.addTest(CSingleStreamDataAdderTest::suite()); + runner.addTest(CStringStoreTest::suite()); + runner.addTest(CTokenListDataTyperTest::suite()); + runner.addTest(CTokenListReverseSearchCreatorTest::suite()); return !runner.runTests(); } - diff --git a/lib/config/CAutoconfigurer.cc b/lib/config/CAutoconfigurer.cc index 53cd71dc48..a7c337a183 100644 --- a/lib/config/CAutoconfigurer.cc +++ b/lib/config/CAutoconfigurer.cc @@ -6,9 +6,9 @@ #include -#include #include #include +#include #include @@ -21,9 +21,9 @@ #include #include #include +#include #include #include -#include #include #include @@ -34,16 +34,12 @@ #include #include -namespace ml -{ -namespace config -{ -namespace -{ +namespace ml { +namespace config { +namespace { //! Check if we should report progress. -bool reportProgress(uint64_t records) -{ +bool reportProgress(uint64_t records) { static const double LOG_10 = maths::CTools::fastLog(10.0); double log10 = maths::CTools::fastLog(static_cast(records) / 100.0) / LOG_10; uint64_t nextPow10 = static_cast(std::pow(10, std::ceil(log10))); @@ -51,184 +47,163 @@ bool reportProgress(uint64_t records) } const std::size_t UPDATE_SCORE_RECORD_COUNT_INTERVAL = 50000; -const core_t::TTime UPDATE_SCORE_TIME_INTERVAL = 172800; - +const core_t::TTime UPDATE_SCORE_TIME_INTERVAL = 172800; } //! \brief The implementation of automatic configuration. -class CONFIG_EXPORT CAutoconfigurerImpl : public core::CNonCopyable -{ - public: - using TStrVec = std::vector; - using TStrStrUMap = boost::unordered_map; - using TStrStrUMapCItr = TStrStrUMap::const_iterator; +class CONFIG_EXPORT CAutoconfigurerImpl : public core::CNonCopyable { +public: + using TStrVec = std::vector; + using TStrStrUMap = boost::unordered_map; + using TStrStrUMapCItr = TStrStrUMap::const_iterator; - public: - CAutoconfigurerImpl(const CAutoconfigurerParams ¶ms, CReportWriter &reportWriter); +public: + CAutoconfigurerImpl(const CAutoconfigurerParams& params, CReportWriter& reportWriter); - //! Receive a single record to be processed. - bool handleRecord(const TStrStrUMap &fieldValues); + //! Receive a single record to be processed. + bool handleRecord(const TStrStrUMap& fieldValues); - //! Generate the report. - void finalise(); + //! Generate the report. + void finalise(); - //! Get the report writer. - CReportWriter &reportWriter(); + //! Get the report writer. + CReportWriter& reportWriter(); - //! How many records did we handle? - uint64_t numRecordsHandled() const; + //! How many records did we handle? + uint64_t numRecordsHandled() const; - private: - using TTimeStrStrUMapPr = std::pair; - using TTimeStrStrUMapPrVec = std::vector; - using TOptionalUserDataType = boost::optional; - using TDetectorSpecificationVec = std::vector; - using TFieldStatisticsVec = std::vector; +private: + using TTimeStrStrUMapPr = std::pair; + using TTimeStrStrUMapPrVec = std::vector; + using TOptionalUserDataType = boost::optional; + using TDetectorSpecificationVec = std::vector; + using TFieldStatisticsVec = std::vector; - private: - //! Extract the time from \p fieldValues. - bool extractTime(const TStrStrUMap &fieldValues, - core_t::TTime &time) const; +private: + //! Extract the time from \p fieldValues. + bool extractTime(const TStrStrUMap& fieldValues, core_t::TTime& time) const; - //! Initialize the field statistics. - void initializeFieldStatisticsOnce(const TStrStrUMap &fieldValues); + //! Initialize the field statistics. + void initializeFieldStatisticsOnce(const TStrStrUMap& fieldValues); - //! Actually process the content of the record. - void processRecord(core_t::TTime time, const TStrStrUMap &dataRowFields); + //! Actually process the content of the record. + void processRecord(core_t::TTime time, const TStrStrUMap& dataRowFields); - //! Update the statistics with \p time and \p fieldValues and maybe - //! recompute detector scores and prune. - void updateStatisticsAndMaybeComputeScores(core_t::TTime time, - const TStrStrUMap &fieldValues); + //! Update the statistics with \p time and \p fieldValues and maybe + //! recompute detector scores and prune. + void updateStatisticsAndMaybeComputeScores(core_t::TTime time, const TStrStrUMap& fieldValues); - //! Compute the detector scores. - void computeScores(bool final); + //! Compute the detector scores. + void computeScores(bool final); - //! Generate the candidate detectors to evaluate. - void generateCandidateDetectorsOnce(); + //! Generate the candidate detectors to evaluate. + void generateCandidateDetectorsOnce(); - //! Run the records in the buffer through the detector scorers. - void replayBuffer(); + //! Run the records in the buffer through the detector scorers. + void replayBuffer(); - private: - //! The parameters. - CAutoconfigurerParams m_Params; +private: + //! The parameters. + CAutoconfigurerParams m_Params; - //! Set to true the first time initializeOnce is called. - bool m_Initialized; + //! Set to true the first time initializeOnce is called. + bool m_Initialized; - //! The number of records supplied to handleRecord. - uint64_t m_NumberRecords; + //! The number of records supplied to handleRecord. + uint64_t m_NumberRecords; - //! The number of records with no time field. - uint64_t m_NumberRecordsWithNoOrInvalidTime; + //! The number of records with no time field. + uint64_t m_NumberRecordsWithNoOrInvalidTime; - //! The last time the detector scores were refreshed. - core_t::TTime m_LastTimeScoresWereRefreshed; + //! The last time the detector scores were refreshed. + core_t::TTime m_LastTimeScoresWereRefreshed; - //! A buffer of the records before the configuration has begun. - TTimeStrStrUMapPrVec m_Buffer; + //! A buffer of the records before the configuration has begun. + TTimeStrStrUMapPrVec m_Buffer; - //! The field semantics and summary statistics. - TFieldStatisticsVec m_FieldStatistics; + //! The field semantics and summary statistics. + TFieldStatisticsVec m_FieldStatistics; - //! The detector count data statistics. - CDataCountStatisticsDirectAddressTable m_DetectorCountStatistics; + //! The detector count data statistics. + CDataCountStatisticsDirectAddressTable m_DetectorCountStatistics; - //! The field role penalties. - CAutoconfigurerFieldRolePenalties m_FieldRolePenalties; + //! The field role penalties. + CAutoconfigurerFieldRolePenalties m_FieldRolePenalties; - //! The detector penalties. - CAutoconfigurerDetectorPenalties m_DetectorPenalties; + //! The detector penalties. + CAutoconfigurerDetectorPenalties m_DetectorPenalties; - //! Set to true the first time generateCandidateDetectorsOnce is called. - bool m_GeneratedCandidateFieldNames; + //! Set to true the first time generateCandidateDetectorsOnce is called. + bool m_GeneratedCandidateFieldNames; - //! The candidate detectors. - TDetectorSpecificationVec m_CandidateDetectors; + //! The candidate detectors. + TDetectorSpecificationVec m_CandidateDetectors; - //! Efficiently extracts the detector's records. - CDetectorRecordDirectAddressTable m_DetectorRecordFactory; + //! Efficiently extracts the detector's records. + CDetectorRecordDirectAddressTable m_DetectorRecordFactory; - //! Writes out a report on the data and recommended configurations. - CReportWriter &m_ReportWriter; + //! Writes out a report on the data and recommended configurations. + CReportWriter& m_ReportWriter; }; - //////// CAutoconfigurer //////// -CAutoconfigurer::CAutoconfigurer(const CAutoconfigurerParams ¶ms, - CReportWriter &reportWriter) : - m_Impl(new CAutoconfigurerImpl(params, reportWriter)) -{ +CAutoconfigurer::CAutoconfigurer(const CAutoconfigurerParams& params, CReportWriter& reportWriter) + : m_Impl(new CAutoconfigurerImpl(params, reportWriter)) { } -void CAutoconfigurer::newOutputStream() -{ +void CAutoconfigurer::newOutputStream() { m_Impl->reportWriter().newOutputStream(); } -bool CAutoconfigurer::handleRecord(const TStrStrUMap &fieldValues) -{ +bool CAutoconfigurer::handleRecord(const TStrStrUMap& fieldValues) { return m_Impl->handleRecord(fieldValues); } -void CAutoconfigurer::finalise() -{ +void CAutoconfigurer::finalise() { m_Impl->finalise(); } -bool CAutoconfigurer::restoreState(core::CDataSearcher &/*restoreSearcher*/, - core_t::TTime &/*completeToTime*/) -{ +bool CAutoconfigurer::restoreState(core::CDataSearcher& /*restoreSearcher*/, core_t::TTime& /*completeToTime*/) { return true; } -bool CAutoconfigurer::persistState(core::CDataAdder &/*persister*/) -{ +bool CAutoconfigurer::persistState(core::CDataAdder& /*persister*/) { return true; } -uint64_t CAutoconfigurer::numRecordsHandled() const -{ +uint64_t CAutoconfigurer::numRecordsHandled() const { return m_Impl->numRecordsHandled(); } -api::COutputHandler &CAutoconfigurer::outputHandler() -{ +api::COutputHandler& CAutoconfigurer::outputHandler() { return m_Impl->reportWriter(); } - //////// CAutoconfigurerImpl //////// -CAutoconfigurerImpl::CAutoconfigurerImpl(const CAutoconfigurerParams ¶ms, - CReportWriter &reportWriter) : - m_Params(params), - m_Initialized(false), - m_NumberRecords(0), - m_NumberRecordsWithNoOrInvalidTime(0), - m_LastTimeScoresWereRefreshed(boost::numeric::bounds::lowest()), - m_DetectorCountStatistics(m_Params), - m_FieldRolePenalties(m_Params), - m_DetectorPenalties(m_Params, m_FieldRolePenalties), - m_GeneratedCandidateFieldNames(false), - m_ReportWriter(reportWriter) -{ +CAutoconfigurerImpl::CAutoconfigurerImpl(const CAutoconfigurerParams& params, CReportWriter& reportWriter) + : m_Params(params), + m_Initialized(false), + m_NumberRecords(0), + m_NumberRecordsWithNoOrInvalidTime(0), + m_LastTimeScoresWereRefreshed(boost::numeric::bounds::lowest()), + m_DetectorCountStatistics(m_Params), + m_FieldRolePenalties(m_Params), + m_DetectorPenalties(m_Params, m_FieldRolePenalties), + m_GeneratedCandidateFieldNames(false), + m_ReportWriter(reportWriter) { } -bool CAutoconfigurerImpl::handleRecord(const TStrStrUMap &fieldValues) -{ +bool CAutoconfigurerImpl::handleRecord(const TStrStrUMap& fieldValues) { ++m_NumberRecords; - if (reportProgress(m_NumberRecords)) - { + if (reportProgress(m_NumberRecords)) { LOG_DEBUG("Processed " << m_NumberRecords << " records"); } core_t::TTime time = 0; - if (!this->extractTime(fieldValues, time)) - { + if (!this->extractTime(fieldValues, time)) { ++m_NumberRecordsWithNoOrInvalidTime; return true; } @@ -238,8 +213,7 @@ bool CAutoconfigurerImpl::handleRecord(const TStrStrUMap &fieldValues) return true; } -void CAutoconfigurerImpl::finalise() -{ +void CAutoconfigurerImpl::finalise() { LOG_TRACE("CAutoconfigurerImpl::finalise..."); this->computeScores(true); @@ -247,26 +221,21 @@ void CAutoconfigurerImpl::finalise() m_ReportWriter.addTotalRecords(m_NumberRecords); m_ReportWriter.addInvalidRecords(m_NumberRecordsWithNoOrInvalidTime); - for (std::size_t i = 0u; i < m_FieldStatistics.size(); ++i) - { - const std::string &name = m_FieldStatistics[i].name(); + for (std::size_t i = 0u; i < m_FieldStatistics.size(); ++i) { + const std::string& name = m_FieldStatistics[i].name(); config_t::EDataType type = m_FieldStatistics[i].type(); - if (const CDataSummaryStatistics *summary = m_FieldStatistics[i].summary()) - { + if (const CDataSummaryStatistics* summary = m_FieldStatistics[i].summary()) { m_ReportWriter.addFieldStatistics(name, type, *summary); } - if (const CCategoricalDataSummaryStatistics *summary = m_FieldStatistics[i].categoricalSummary()) - { + if (const CCategoricalDataSummaryStatistics* summary = m_FieldStatistics[i].categoricalSummary()) { m_ReportWriter.addFieldStatistics(name, type, *summary); } - if (const CNumericDataSummaryStatistics *summary = m_FieldStatistics[i].numericSummary()) - { + if (const CNumericDataSummaryStatistics* summary = m_FieldStatistics[i].numericSummary()) { m_ReportWriter.addFieldStatistics(name, type, *summary); } } - for (std::size_t i = 0u; i < m_CandidateDetectors.size(); ++i) - { + for (std::size_t i = 0u; i < m_CandidateDetectors.size(); ++i) { m_ReportWriter.addDetector(m_CandidateDetectors[i]); } @@ -275,64 +244,48 @@ void CAutoconfigurerImpl::finalise() LOG_TRACE("CAutoconfigurerImpl::finalise done"); } -CReportWriter &CAutoconfigurerImpl::reportWriter() -{ +CReportWriter& CAutoconfigurerImpl::reportWriter() { return m_ReportWriter; } -uint64_t CAutoconfigurerImpl::numRecordsHandled() const -{ +uint64_t CAutoconfigurerImpl::numRecordsHandled() const { return m_NumberRecords; } -bool CAutoconfigurerImpl::extractTime(const TStrStrUMap &fieldValues, - core_t::TTime &time) const -{ +bool CAutoconfigurerImpl::extractTime(const TStrStrUMap& fieldValues, core_t::TTime& time) const { TStrStrUMapCItr i = fieldValues.find(m_Params.timeFieldName()); - if (i == fieldValues.end()) - { - LOG_ERROR("No time field '" << m_Params.timeFieldName() - << "' in record:" << core_t::LINE_ENDING - << CAutoconfigurer::debugPrintRecord(fieldValues)); + if (i == fieldValues.end()) { + LOG_ERROR("No time field '" << m_Params.timeFieldName() << "' in record:" << core_t::LINE_ENDING + << CAutoconfigurer::debugPrintRecord(fieldValues)); return false; } - if (m_Params.timeFieldFormat().empty()) - { - if (!core::CStringUtils::stringToType(i->second, time)) - { - LOG_ERROR("Cannot interpret time field '" << m_Params.timeFieldName() - << "' in record:" << core_t::LINE_ENDING - << CAutoconfigurer::debugPrintRecord(fieldValues)); + if (m_Params.timeFieldFormat().empty()) { + if (!core::CStringUtils::stringToType(i->second, time)) { + LOG_ERROR("Cannot interpret time field '" << m_Params.timeFieldName() << "' in record:" << core_t::LINE_ENDING + << CAutoconfigurer::debugPrintRecord(fieldValues)); return false; } - } - else if (!core::CTimeUtils::strptime(m_Params.timeFieldFormat(), i->second, time)) - { - LOG_ERROR("Cannot interpret time field '" << m_Params.timeFieldName() - << "' using format '" << m_Params.timeFieldFormat() - << "' in record:" << core_t::LINE_ENDING - << CAutoconfigurer::debugPrintRecord(fieldValues)); + } else if (!core::CTimeUtils::strptime(m_Params.timeFieldFormat(), i->second, time)) { + LOG_ERROR("Cannot interpret time field '" << m_Params.timeFieldName() << "' using format '" << m_Params.timeFieldFormat() + << "' in record:" << core_t::LINE_ENDING + << CAutoconfigurer::debugPrintRecord(fieldValues)); return false; } return true; } -void CAutoconfigurerImpl::initializeFieldStatisticsOnce(const TStrStrUMap &fieldValues) -{ - if (m_Initialized) - { +void CAutoconfigurerImpl::initializeFieldStatisticsOnce(const TStrStrUMap& fieldValues) { + if (m_Initialized) { return; } m_FieldStatistics.reserve(fieldValues.size()); - for (const auto &entry : fieldValues) - { - const std::string &fieldName = entry.first; - if (fieldName != m_Params.timeFieldName() && m_Params.fieldOfInterest(fieldName)) - { + for (const auto& entry : fieldValues) { + const std::string& fieldName = entry.first; + if (fieldName != m_Params.timeFieldName() && m_Params.fieldOfInterest(fieldName)) { LOG_DEBUG("Adding field '" << fieldName << "'"); m_FieldStatistics.push_back(CFieldStatistics(fieldName, m_Params)); } @@ -341,66 +294,51 @@ void CAutoconfigurerImpl::initializeFieldStatisticsOnce(const TStrStrUMap &field m_Initialized = true; } -void CAutoconfigurerImpl::processRecord(core_t::TTime time, const TStrStrUMap &fieldValues) -{ - for (std::size_t i = 0u; i < m_FieldStatistics.size(); ++i) - { +void CAutoconfigurerImpl::processRecord(core_t::TTime time, const TStrStrUMap& fieldValues) { + for (std::size_t i = 0u; i < m_FieldStatistics.size(); ++i) { TStrStrUMapCItr j = fieldValues.find(m_FieldStatistics[i].name()); - if (j != fieldValues.end()) - { + if (j != fieldValues.end()) { m_FieldStatistics[i].add(time, j->second); } } - if (m_NumberRecords < m_Params.minimumRecordsToAttemptConfig()) - { + if (m_NumberRecords < m_Params.minimumRecordsToAttemptConfig()) { m_Buffer.push_back(std::make_pair(time, fieldValues)); - } - else - { + } else { this->generateCandidateDetectorsOnce(); this->replayBuffer(); this->updateStatisticsAndMaybeComputeScores(time, fieldValues); } } -void CAutoconfigurerImpl::updateStatisticsAndMaybeComputeScores(core_t::TTime time, - const TStrStrUMap &fieldValues) -{ +void CAutoconfigurerImpl::updateStatisticsAndMaybeComputeScores(core_t::TTime time, const TStrStrUMap& fieldValues) { CDetectorRecordDirectAddressTable::TDetectorRecordVec records; m_DetectorRecordFactory.detectorRecords(time, fieldValues, m_CandidateDetectors, records); m_DetectorCountStatistics.add(records); - if ( m_NumberRecords % UPDATE_SCORE_RECORD_COUNT_INTERVAL == 0 - && time >= m_LastTimeScoresWereRefreshed + UPDATE_SCORE_TIME_INTERVAL) - { + if (m_NumberRecords % UPDATE_SCORE_RECORD_COUNT_INTERVAL == 0 && time >= m_LastTimeScoresWereRefreshed + UPDATE_SCORE_TIME_INTERVAL) { this->computeScores(false); m_LastTimeScoresWereRefreshed = time; } } -void CAutoconfigurerImpl::computeScores(bool final) -{ +void CAutoconfigurerImpl::computeScores(bool final) { LOG_TRACE("CAutoconfigurerImpl::computeScores..."); std::size_t last = 0u; - for (std::size_t i = 0u; i < m_CandidateDetectors.size(); ++i) - { + for (std::size_t i = 0u; i < m_CandidateDetectors.size(); ++i) { LOG_TRACE("Refreshing scores for " << m_CandidateDetectors[i].description()); m_CandidateDetectors[i].refreshScores(); LOG_TRACE("score = " << m_CandidateDetectors[i].score()); - if (m_CandidateDetectors[i].score() > (final ? m_Params.minimumDetectorScore() : 0.0)) - { - if (i > last) - { + if (m_CandidateDetectors[i].score() > (final ? m_Params.minimumDetectorScore() : 0.0)) { + if (i > last) { m_CandidateDetectors[i].swap(m_CandidateDetectors[last]); } ++last; } } - if (last < m_CandidateDetectors.size()) - { + if (last < m_CandidateDetectors.size()) { LOG_DEBUG("Removing " << m_CandidateDetectors.size() - last << " detectors"); m_CandidateDetectors.erase(m_CandidateDetectors.begin() + last, m_CandidateDetectors.end()); m_DetectorRecordFactory.build(m_CandidateDetectors); @@ -410,67 +348,49 @@ void CAutoconfigurerImpl::computeScores(bool final) LOG_TRACE("CAutoconfigurerImpl::computeScores done"); } -void CAutoconfigurerImpl::generateCandidateDetectorsOnce() -{ - if (m_GeneratedCandidateFieldNames) - { +void CAutoconfigurerImpl::generateCandidateDetectorsOnce() { + if (m_GeneratedCandidateFieldNames) { return; } LOG_DEBUG("Generate Candidate Detectors:"); - using TAddField = void (CDetectorEnumerator::*)(const std::string &); - using TCanUse = bool (CAutoconfigurerParams::*)(const std::string &) const; + using TAddField = void (CDetectorEnumerator::*)(const std::string&); + using TCanUse = bool (CAutoconfigurerParams::*)(const std::string&) const; CDetectorEnumerator enumerator(m_Params); - for (std::size_t i = 0u; i < m_Params.functionsCategoriesToConfigure().size(); ++i) - { + for (std::size_t i = 0u; i < m_Params.functionsCategoriesToConfigure().size(); ++i) { enumerator.addFunction(m_Params.functionsCategoriesToConfigure()[i]); } - for (std::size_t i = 0u; i < m_FieldStatistics.size(); ++i) - { - static std::string FIELD_NAMES[] = - { - std::string("categorical argument"), - std::string("metric argument"), - std::string("by field"), - std::string("rare function by field"), - std::string("over field"), - std::string("partition field") - }; - static TAddField ADD_FIELD[] = - { - &CDetectorEnumerator::addCategoricalFunctionArgument, - &CDetectorEnumerator::addMetricFunctionArgument, - &CDetectorEnumerator::addByField, - &CDetectorEnumerator::addRareByField, - &CDetectorEnumerator::addOverField, - &CDetectorEnumerator::addPartitionField - }; - static TCanUse CAN_USE[] = - { - &CAutoconfigurerParams::canUseForFunctionArgument, - &CAutoconfigurerParams::canUseForFunctionArgument, - &CAutoconfigurerParams::canUseForByField, - &CAutoconfigurerParams::canUseForByField, - &CAutoconfigurerParams::canUseForOverField, - &CAutoconfigurerParams::canUseForPartitionField - }; - double scores[] = - { - m_FieldStatistics[i].score(m_FieldRolePenalties.categoricalFunctionArgumentPenalty()), - m_FieldStatistics[i].score(m_FieldRolePenalties.metricFunctionArgumentPenalty()), - m_FieldStatistics[i].score(m_FieldRolePenalties.byPenalty()), - m_FieldStatistics[i].score(m_FieldRolePenalties.rareByPenalty()), - m_FieldStatistics[i].score(m_FieldRolePenalties.overPenalty()), - m_FieldStatistics[i].score(m_FieldRolePenalties.partitionPenalty()) - }; - - const std::string &fieldName = m_FieldStatistics[i].name(); - for (std::size_t j = 0u; j < boost::size(FIELD_NAMES); ++j) - { - if ((m_Params.*CAN_USE[j])(fieldName) && scores[j] > 0.0) - { + for (std::size_t i = 0u; i < m_FieldStatistics.size(); ++i) { + static std::string FIELD_NAMES[] = {std::string("categorical argument"), + std::string("metric argument"), + std::string("by field"), + std::string("rare function by field"), + std::string("over field"), + std::string("partition field")}; + static TAddField ADD_FIELD[] = {&CDetectorEnumerator::addCategoricalFunctionArgument, + &CDetectorEnumerator::addMetricFunctionArgument, + &CDetectorEnumerator::addByField, + &CDetectorEnumerator::addRareByField, + &CDetectorEnumerator::addOverField, + &CDetectorEnumerator::addPartitionField}; + static TCanUse CAN_USE[] = {&CAutoconfigurerParams::canUseForFunctionArgument, + &CAutoconfigurerParams::canUseForFunctionArgument, + &CAutoconfigurerParams::canUseForByField, + &CAutoconfigurerParams::canUseForByField, + &CAutoconfigurerParams::canUseForOverField, + &CAutoconfigurerParams::canUseForPartitionField}; + double scores[] = {m_FieldStatistics[i].score(m_FieldRolePenalties.categoricalFunctionArgumentPenalty()), + m_FieldStatistics[i].score(m_FieldRolePenalties.metricFunctionArgumentPenalty()), + m_FieldStatistics[i].score(m_FieldRolePenalties.byPenalty()), + m_FieldStatistics[i].score(m_FieldRolePenalties.rareByPenalty()), + m_FieldStatistics[i].score(m_FieldRolePenalties.overPenalty()), + m_FieldStatistics[i].score(m_FieldRolePenalties.partitionPenalty())}; + + const std::string& fieldName = m_FieldStatistics[i].name(); + for (std::size_t j = 0u; j < boost::size(FIELD_NAMES); ++j) { + if ((m_Params.*CAN_USE[j])(fieldName) && scores[j] > 0.0) { LOG_DEBUG(FIELD_NAMES[j] << " '" << fieldName << "' with score " << scores[j]); (enumerator.*ADD_FIELD[j])(fieldName); } @@ -484,9 +404,8 @@ void CAutoconfigurerImpl::generateCandidateDetectorsOnce() m_DetectorCountStatistics.build(m_CandidateDetectors); m_DetectorRecordFactory.build(m_CandidateDetectors); - for (std::size_t i = 0u; i < m_CandidateDetectors.size(); ++i) - { - CDetectorSpecification &spec = m_CandidateDetectors[i]; + for (std::size_t i = 0u; i < m_CandidateDetectors.size(); ++i) { + CDetectorSpecification& spec = m_CandidateDetectors[i]; spec.addFieldStatistics(m_FieldStatistics); spec.setPenalty(m_DetectorPenalties.penaltyFor(spec)); spec.setCountStatistics(m_DetectorCountStatistics.statistics(spec)); @@ -495,12 +414,9 @@ void CAutoconfigurerImpl::generateCandidateDetectorsOnce() m_GeneratedCandidateFieldNames = true; } -void CAutoconfigurerImpl::replayBuffer() -{ - for (std::size_t i = 0u; i < m_Buffer.size(); ++i) - { - if (reportProgress(i)) - { +void CAutoconfigurerImpl::replayBuffer() { + for (std::size_t i = 0u; i < m_Buffer.size(); ++i) { + if (reportProgress(i)) { LOG_DEBUG("Replayed " << i << " records"); } this->updateStatisticsAndMaybeComputeScores(m_Buffer[i].first, m_Buffer[i].second); @@ -508,6 +424,5 @@ void CAutoconfigurerImpl::replayBuffer() TTimeStrStrUMapPrVec empty; m_Buffer.swap(empty); } - } } diff --git a/lib/config/CAutoconfigurerDetectorPenalties.cc b/lib/config/CAutoconfigurerDetectorPenalties.cc index 87ab93d3a0..1febb72fec 100644 --- a/lib/config/CAutoconfigurerDetectorPenalties.cc +++ b/lib/config/CAutoconfigurerDetectorPenalties.cc @@ -22,78 +22,54 @@ #include -namespace ml -{ -namespace config -{ -namespace -{ +namespace ml { +namespace config { +namespace { //! Get the index of the detector \p spec's field role penalty. -std::size_t fieldRolePenaltyIndex(const CDetectorSpecification &spec) -{ - static const std::size_t SKIPS[] = { 1, 2, 3, 6, 9, 18 }; - return (spec.argumentField() ? SKIPS[0 + config_t::isMetric(spec.function())] : 0) - + (spec.byField() ? SKIPS[2 + config_t::isRare(spec.function())] : 0) - + (spec.overField() ? SKIPS[4] : 0) - + (spec.partitionField() ? SKIPS[5] : 0); +std::size_t fieldRolePenaltyIndex(const CDetectorSpecification& spec) { + static const std::size_t SKIPS[] = {1, 2, 3, 6, 9, 18}; + return (spec.argumentField() ? SKIPS[0 + config_t::isMetric(spec.function())] : 0) + + (spec.byField() ? SKIPS[2 + config_t::isRare(spec.function())] : 0) + (spec.overField() ? SKIPS[4] : 0) + + (spec.partitionField() ? SKIPS[5] : 0); } - } -CAutoconfigurerDetectorPenalties::CAutoconfigurerDetectorPenalties(const CAutoconfigurerParams ¶ms, - const CAutoconfigurerFieldRolePenalties &fieldRolePenalties) : - m_Params(params), - m_FieldRolePenalties(fieldRolePenalties) -{} +CAutoconfigurerDetectorPenalties::CAutoconfigurerDetectorPenalties(const CAutoconfigurerParams& params, + const CAutoconfigurerFieldRolePenalties& fieldRolePenalties) + : m_Params(params), m_FieldRolePenalties(fieldRolePenalties) { +} -CAutoconfigurerDetectorPenalties::TPenaltyPtr - CAutoconfigurerDetectorPenalties::penaltyFor(const CDetectorSpecification &spec) -{ - return TPenaltyPtr(( this->fieldRolePenalty(spec) - * CSpanTooSmallForBucketLengthPenalty(m_Params) - * CPolledDataPenalty(m_Params) - * CLongTailPenalty(m_Params) - * CLowInformationContentPenalty(m_Params) - * CNotEnoughDataPenalty(m_Params) - * CTooMuchDataPenalty(m_Params) - * CLowVariationPenalty(m_Params) - * CSparseCountPenalty(m_Params)).clone()); +CAutoconfigurerDetectorPenalties::TPenaltyPtr CAutoconfigurerDetectorPenalties::penaltyFor(const CDetectorSpecification& spec) { + return TPenaltyPtr((this->fieldRolePenalty(spec) * CSpanTooSmallForBucketLengthPenalty(m_Params) * CPolledDataPenalty(m_Params) * + CLongTailPenalty(m_Params) * CLowInformationContentPenalty(m_Params) * CNotEnoughDataPenalty(m_Params) * + CTooMuchDataPenalty(m_Params) * CLowVariationPenalty(m_Params) * CSparseCountPenalty(m_Params)) + .clone()); } -const CPenalty &CAutoconfigurerDetectorPenalties::fieldRolePenalty(const CDetectorSpecification &spec) -{ +const CPenalty& CAutoconfigurerDetectorPenalties::fieldRolePenalty(const CDetectorSpecification& spec) { m_DetectorFieldRolePenalties.resize(36); - TPenaltyPtr &result = m_DetectorFieldRolePenalties[fieldRolePenaltyIndex(spec)]; - if (!result) - { + TPenaltyPtr& result = m_DetectorFieldRolePenalties[fieldRolePenaltyIndex(spec)]; + if (!result) { CDetectorFieldRolePenalty penalty(m_Params); - const CAutoconfigurerFieldRolePenalties &penalties = m_FieldRolePenalties; - if (spec.argumentField()) - { + const CAutoconfigurerFieldRolePenalties& penalties = m_FieldRolePenalties; + if (spec.argumentField()) { penalty.addPenalty(constants::ARGUMENT_INDEX, - config_t::isMetric(spec.function()) ? - penalties.metricFunctionArgumentPenalty() : - penalties.categoricalFunctionArgumentPenalty()); + config_t::isMetric(spec.function()) ? penalties.metricFunctionArgumentPenalty() + : penalties.categoricalFunctionArgumentPenalty()); } - if (spec.byField()) - { - penalty.addPenalty(constants::BY_INDEX, - config_t::isRare(spec.function()) ? penalties.rareByPenalty() : - penalties.byPenalty()); + if (spec.byField()) { + penalty.addPenalty(constants::BY_INDEX, config_t::isRare(spec.function()) ? penalties.rareByPenalty() : penalties.byPenalty()); } - if (spec.overField()) - { + if (spec.overField()) { penalty.addPenalty(constants::OVER_INDEX, penalties.overPenalty()); } - if (spec.partitionField()) - { + if (spec.partitionField()) { penalty.addPenalty(constants::PARTITION_INDEX, penalties.partitionPenalty()); } result.reset(penalty.clone()); } return *result; } - } } diff --git a/lib/config/CAutoconfigurerFieldRolePenalties.cc b/lib/config/CAutoconfigurerFieldRolePenalties.cc index bd94b9418c..262186b65a 100644 --- a/lib/config/CAutoconfigurerFieldRolePenalties.cc +++ b/lib/config/CAutoconfigurerFieldRolePenalties.cc @@ -14,89 +14,62 @@ #include -namespace ml -{ -namespace config -{ -namespace -{ +namespace ml { +namespace config { +namespace { const std::size_t CATEGORICAL_ARGUMENT_INDEX = 0u; -const std::size_t METRIC_ARGUMENT_INDEX = 1u; -const std::size_t BY_INDEX = 2u; -const std::size_t RARE_BY_INDEX = 3u; -const std::size_t OVER_INDEX = 4u; -const std::size_t PARTITION_INDEX = 5u; +const std::size_t METRIC_ARGUMENT_INDEX = 1u; +const std::size_t BY_INDEX = 2u; +const std::size_t RARE_BY_INDEX = 3u; +const std::size_t OVER_INDEX = 4u; +const std::size_t PARTITION_INDEX = 5u; using TCountThreshold = std::size_t (CAutoconfigurerParams::*)() const; -const std::size_t PENALTY_INDICES[] = - { - BY_INDEX, - RARE_BY_INDEX, - OVER_INDEX, - PARTITION_INDEX - }; -const TCountThreshold PENALTY_THRESHOLD[] = - { - &CAutoconfigurerParams::highNumberByFieldValues, - &CAutoconfigurerParams::highNumberRareByFieldValues, - &CAutoconfigurerParams::lowNumberOverFieldValues, - &CAutoconfigurerParams::highNumberPartitionFieldValues - }; -const TCountThreshold HARD_CUTOFF[] = - { - &CAutoconfigurerParams::maximumNumberByFieldValues, - &CAutoconfigurerParams::maximumNumberRareByFieldValues, - &CAutoconfigurerParams::minimumNumberOverFieldValues, - &CAutoconfigurerParams::maximumNumberPartitionFieldValues - }; +const std::size_t PENALTY_INDICES[] = {BY_INDEX, RARE_BY_INDEX, OVER_INDEX, PARTITION_INDEX}; +const TCountThreshold PENALTY_THRESHOLD[] = {&CAutoconfigurerParams::highNumberByFieldValues, + &CAutoconfigurerParams::highNumberRareByFieldValues, + &CAutoconfigurerParams::lowNumberOverFieldValues, + &CAutoconfigurerParams::highNumberPartitionFieldValues}; +const TCountThreshold HARD_CUTOFF[] = {&CAutoconfigurerParams::maximumNumberByFieldValues, + &CAutoconfigurerParams::maximumNumberRareByFieldValues, + &CAutoconfigurerParams::minimumNumberOverFieldValues, + &CAutoconfigurerParams::maximumNumberPartitionFieldValues}; } -CAutoconfigurerFieldRolePenalties::CAutoconfigurerFieldRolePenalties(const CAutoconfigurerParams ¶ms) -{ - m_Penalties[CATEGORICAL_ARGUMENT_INDEX].reset( - ( CCantBeNumeric(params) * CDontUseUnaryField(params)).clone()); +CAutoconfigurerFieldRolePenalties::CAutoconfigurerFieldRolePenalties(const CAutoconfigurerParams& params) { + m_Penalties[CATEGORICAL_ARGUMENT_INDEX].reset((CCantBeNumeric(params) * CDontUseUnaryField(params)).clone()); m_Penalties[METRIC_ARGUMENT_INDEX].reset(new CCantBeCategorical(params)); - for (std::size_t i = 0u; i < boost::size(PENALTY_INDICES); ++i) - { + for (std::size_t i = 0u; i < boost::size(PENALTY_INDICES); ++i) { m_Penalties[PENALTY_INDICES[i]].reset( - ( CCantBeNumeric(params) - * CDistinctCountThresholdPenalty(params, - (params.*PENALTY_THRESHOLD[i])(), - (params.*HARD_CUTOFF[i])()) - * CDontUseUnaryField(params)).clone()); + (CCantBeNumeric(params) * CDistinctCountThresholdPenalty(params, (params.*PENALTY_THRESHOLD[i])(), (params.*HARD_CUTOFF[i])()) * + CDontUseUnaryField(params)) + .clone()); } } -const CPenalty &CAutoconfigurerFieldRolePenalties::categoricalFunctionArgumentPenalty() const -{ +const CPenalty& CAutoconfigurerFieldRolePenalties::categoricalFunctionArgumentPenalty() const { return *m_Penalties[CATEGORICAL_ARGUMENT_INDEX]; } -const CPenalty &CAutoconfigurerFieldRolePenalties::metricFunctionArgumentPenalty() const -{ +const CPenalty& CAutoconfigurerFieldRolePenalties::metricFunctionArgumentPenalty() const { return *m_Penalties[METRIC_ARGUMENT_INDEX]; } -const CPenalty &CAutoconfigurerFieldRolePenalties::byPenalty() const -{ +const CPenalty& CAutoconfigurerFieldRolePenalties::byPenalty() const { return *m_Penalties[BY_INDEX]; } -const CPenalty &CAutoconfigurerFieldRolePenalties::rareByPenalty() const -{ +const CPenalty& CAutoconfigurerFieldRolePenalties::rareByPenalty() const { return *m_Penalties[RARE_BY_INDEX]; } -const CPenalty &CAutoconfigurerFieldRolePenalties::overPenalty() const -{ +const CPenalty& CAutoconfigurerFieldRolePenalties::overPenalty() const { return *m_Penalties[OVER_INDEX]; } -const CPenalty &CAutoconfigurerFieldRolePenalties::partitionPenalty() const -{ +const CPenalty& CAutoconfigurerFieldRolePenalties::partitionPenalty() const { return *m_Penalties[PARTITION_INDEX]; } - } } diff --git a/lib/config/CAutoconfigurerParams.cc b/lib/config/CAutoconfigurerParams.cc index f2029735e3..60adf40b43 100644 --- a/lib/config/CAutoconfigurerParams.cc +++ b/lib/config/CAutoconfigurerParams.cc @@ -13,505 +13,383 @@ #include -#include #include +#include #include -#include #include +#include #include #include -namespace ml -{ -namespace config -{ -namespace -{ +namespace ml { +namespace config { +namespace { using TStrVec = std::vector; //! \brief A constraint which applies to a value of type T. template -class CConstraint -{ - public: - virtual ~CConstraint() {} - virtual bool operator()(const T &/*value*/) const { return true; } - virtual bool operator()(const std::vector &/*value*/) const { return true; } - virtual std::string print() const = 0; +class CConstraint { +public: + virtual ~CConstraint() {} + virtual bool operator()(const T& /*value*/) const { return true; } + virtual bool operator()(const std::vector& /*value*/) const { return true; } + virtual std::string print() const = 0; }; //! \brief Represents the fact that T is unconstrained. template -class CUnconstrained : public CConstraint -{ - public: - bool operator()(const T &/*value*/) const - { - return true; - } - std::string print() const - { - return "unconstrained"; - } +class CUnconstrained : public CConstraint { +public: + bool operator()(const T& /*value*/) const { return true; } + std::string print() const { return "unconstrained"; } }; //! \brief A collection constraint which apply in conjunction to a value //! of type T. template -class CConstraintConjunction : public CConstraint -{ - public: - using TConstraintCPtr = boost::shared_ptr>; - - public: - CConstraintConjunction *addConstraint(const CConstraint *constraint) - { - m_Constraints.push_back(TConstraintCPtr(constraint)); - return this; - } - bool operator()(const T &value) const - { - return this->evaluate(value); - } - bool operator()(const std::vector &value) const - { - return this->evaluate(value); - } - std::string print() const - { - std::string result; - if (m_Constraints.size() > 0) - { - result += m_Constraints[0]->print(); - for (std::size_t i = 1u; i < m_Constraints.size(); ++i) - { - result += " && " + m_Constraints[i]->print(); - } +class CConstraintConjunction : public CConstraint { +public: + using TConstraintCPtr = boost::shared_ptr>; + +public: + CConstraintConjunction* addConstraint(const CConstraint* constraint) { + m_Constraints.push_back(TConstraintCPtr(constraint)); + return this; + } + bool operator()(const T& value) const { return this->evaluate(value); } + bool operator()(const std::vector& value) const { return this->evaluate(value); } + std::string print() const { + std::string result; + if (m_Constraints.size() > 0) { + result += m_Constraints[0]->print(); + for (std::size_t i = 1u; i < m_Constraints.size(); ++i) { + result += " && " + m_Constraints[i]->print(); } - return result; } + return result; + } - private: - template - bool evaluate(const U &value) const - { - for (std::size_t i = 0u; i < m_Constraints.size(); ++i) - { - if (!(*m_Constraints[i])(value)) - { - return false; - } +private: + template + bool evaluate(const U& value) const { + for (std::size_t i = 0u; i < m_Constraints.size(); ++i) { + if (!(*m_Constraints[i])(value)) { + return false; } - return true; } + return true; + } - private: - std::vector m_Constraints; +private: + std::vector m_Constraints; }; //! \brief Less than. -template class CLess : public std::less -{ - public: - std::string print() const { return "<"; } +template +class CLess : public std::less { +public: + std::string print() const { return "<"; } }; //! \brief Less than or equal to. -template class CLessEqual : public std::less_equal -{ - public: - std::string print() const { return "<="; } +template +class CLessEqual : public std::less_equal { +public: + std::string print() const { return "<="; } }; //! \brief Greater than. -template class CGreater : public std::greater -{ - public: - std::string print() const { return ">"; } +template +class CGreater : public std::greater { +public: + std::string print() const { return ">"; } }; //! \brief Greater than or equal to. -template class CGreaterEqual : public std::greater_equal -{ - public: - std::string print() const { return ">="; } +template +class CGreaterEqual : public std::greater_equal { +public: + std::string print() const { return ">="; } }; //! \brief The constraint that a value of type T is greater than another. template class PREDICATE> -class CValueIs : public CConstraint -{ - public: - CValueIs(const T &rhs) : m_Rhs(&rhs) {} - bool operator()(const T &lhs) const - { - return m_Pred(lhs, *m_Rhs); - } - std::string print() const - { - return m_Pred.print() + core::CStringUtils::typeToString(*m_Rhs); - } - - private: - const T *m_Rhs; - PREDICATE m_Pred; +class CValueIs : public CConstraint { +public: + CValueIs(const T& rhs) : m_Rhs(&rhs) {} + bool operator()(const T& lhs) const { return m_Pred(lhs, *m_Rhs); } + std::string print() const { return m_Pred.print() + core::CStringUtils::typeToString(*m_Rhs); } + +private: + const T* m_Rhs; + PREDICATE m_Pred; }; //! \brief The constraint that a value of type T is greater than another. template class PREDICATE> -class CVectorValueIs : public CConstraint -{ - public: - CVectorValueIs(const std::vector &rhs) : m_Rhs(&rhs) {} - bool operator()(const std::vector &lhs) const - { - std::size_t n = std::min(lhs.size(), m_Rhs->size()); - for (std::size_t i = 0u; i < n; ++i) - { - if (!m_Pred(lhs[i], (*m_Rhs)[i])) - { - return false; - } +class CVectorValueIs : public CConstraint { +public: + CVectorValueIs(const std::vector& rhs) : m_Rhs(&rhs) {} + bool operator()(const std::vector& lhs) const { + std::size_t n = std::min(lhs.size(), m_Rhs->size()); + for (std::size_t i = 0u; i < n; ++i) { + if (!m_Pred(lhs[i], (*m_Rhs)[i])) { + return false; } - return true; - } - std::string print() const - { - return m_Pred.print() + core::CContainerPrinter::print(*m_Rhs); } + return true; + } + std::string print() const { return m_Pred.print() + core::CContainerPrinter::print(*m_Rhs); } - private: - const std::vector *m_Rhs; - PREDICATE m_Pred; +private: + const std::vector* m_Rhs; + PREDICATE m_Pred; }; //! \brief The constraint that a vector isn't empty. template -class CNotEmpty : public CConstraint -{ - public: - bool operator()(const std::vector &value) const - { - return !value.empty(); - } - std::string print() const - { - return "not empty"; - } +class CNotEmpty : public CConstraint { +public: + bool operator()(const std::vector& value) const { return !value.empty(); } + std::string print() const { return "not empty"; } }; //! \brief The constraint that a vector has a fixed size. template -class CSizeIs : public CConstraint -{ - public: - CSizeIs(std::size_t size) : m_Size(size) {} - bool operator()(const std::vector &value) const - { - return value.size() == m_Size; - } - std::string print() const - { - return "size is " + core::CStringUtils::typeToString(m_Size); - } - - private: - std::size_t m_Size; +class CSizeIs : public CConstraint { +public: + CSizeIs(std::size_t size) : m_Size(size) {} + bool operator()(const std::vector& value) const { return value.size() == m_Size; } + std::string print() const { return "size is " + core::CStringUtils::typeToString(m_Size); } + +private: + std::size_t m_Size; }; - //! \brief Wrapper around parameters so we can process an array in init. -class CParameter : private core::CNonCopyable -{ - public: - virtual ~CParameter() {} - bool fromString(std::string value) - { - core::CStringUtils::trimWhitespace(value); - value = core::CStringUtils::normaliseWhitespace(value); - return this->fromStringImpl(value); - } +class CParameter : private core::CNonCopyable { +public: + virtual ~CParameter() {} + bool fromString(std::string value) { + core::CStringUtils::trimWhitespace(value); + value = core::CStringUtils::normaliseWhitespace(value); + return this->fromStringImpl(value); + } - private: - virtual bool fromStringImpl(const std::string &value) = 0; +private: + virtual bool fromStringImpl(const std::string& value) = 0; }; //! \brief A parameter which is a built-in type. template -class CBuiltinParameter : public CParameter -{ - public: - using TConstraintCPtr = boost::shared_ptr>; - - public: - CBuiltinParameter(T &value) : - m_Value(value), - m_Constraint(new CUnconstrained) - {} - CBuiltinParameter(T &value, const CConstraint *constraint) : - m_Value(value), - m_Constraint(constraint) - {} - CBuiltinParameter(T &value, TConstraintCPtr constraint) : - m_Value(value), - m_Constraint(constraint) - {} - - private: - virtual bool fromStringImpl(const std::string &value) - { - if (boost::is_unsigned::value && this->hasSign(value)) - { - return false; - } - T value_; - if (!core::CStringUtils::stringToType(value, value_)) - { - return false; - } - if (!(*m_Constraint)(value_)) - { - LOG_ERROR("'" << value_ << "' doesn't satisfy '" << m_Constraint->print() << "'"); - return false; - } - m_Value = value_; - return true; +class CBuiltinParameter : public CParameter { +public: + using TConstraintCPtr = boost::shared_ptr>; + +public: + CBuiltinParameter(T& value) : m_Value(value), m_Constraint(new CUnconstrained) {} + CBuiltinParameter(T& value, const CConstraint* constraint) : m_Value(value), m_Constraint(constraint) {} + CBuiltinParameter(T& value, TConstraintCPtr constraint) : m_Value(value), m_Constraint(constraint) {} + +private: + virtual bool fromStringImpl(const std::string& value) { + if (boost::is_unsigned::value && this->hasSign(value)) { + return false; } - - bool hasSign(const std::string &value) const - { - return value[0] == '-'; + T value_; + if (!core::CStringUtils::stringToType(value, value_)) { + return false; + } + if (!(*m_Constraint)(value_)) { + LOG_ERROR("'" << value_ << "' doesn't satisfy '" << m_Constraint->print() << "'"); + return false; } + m_Value = value_; + return true; + } + + bool hasSign(const std::string& value) const { return value[0] == '-'; } - private: - T &m_Value; - TConstraintCPtr m_Constraint; +private: + T& m_Value; + TConstraintCPtr m_Constraint; }; //! \brief A parameter which is a vector of a built-in type. template -class CBuiltinVectorParameter : public CParameter -{ - public: - CBuiltinVectorParameter(std::vector &value) : - m_Value(value), - m_Constraint(new CUnconstrained) - {} - CBuiltinVectorParameter(std::vector &value, const CConstraint *constraint) : - m_Value(value), - m_Constraint(constraint) - {} - - private: - virtual bool fromStringImpl(const std::string &value) - { - std::string remainder; - TStrVec tokens; - core::CStringUtils::tokenise(std::string(" "), value, tokens, remainder); - if (!remainder.empty()) - { - tokens.push_back(remainder); - } - std::vector value_(tokens.size()); - for (std::size_t i = 0u; i < tokens.size(); ++i) - { - CBuiltinParameter param(value_[i], m_Constraint); - if (!param.fromString(tokens[i])) - { - return false; - } - } - if (!(*m_Constraint)(value_)) - { - LOG_ERROR("'" << core::CContainerPrinter::print(value_) - << "' doesn't satisfy '" << m_Constraint->print() << "'"); +class CBuiltinVectorParameter : public CParameter { +public: + CBuiltinVectorParameter(std::vector& value) : m_Value(value), m_Constraint(new CUnconstrained) {} + CBuiltinVectorParameter(std::vector& value, const CConstraint* constraint) : m_Value(value), m_Constraint(constraint) {} + +private: + virtual bool fromStringImpl(const std::string& value) { + std::string remainder; + TStrVec tokens; + core::CStringUtils::tokenise(std::string(" "), value, tokens, remainder); + if (!remainder.empty()) { + tokens.push_back(remainder); + } + std::vector value_(tokens.size()); + for (std::size_t i = 0u; i < tokens.size(); ++i) { + CBuiltinParameter param(value_[i], m_Constraint); + if (!param.fromString(tokens[i])) { return false; } - m_Value.swap(value_); - return true; } + if (!(*m_Constraint)(value_)) { + LOG_ERROR("'" << core::CContainerPrinter::print(value_) << "' doesn't satisfy '" << m_Constraint->print() << "'"); + return false; + } + m_Value.swap(value_); + return true; + } - private: - std::vector &m_Value; - boost::shared_ptr> m_Constraint; +private: + std::vector& m_Value; + boost::shared_ptr> m_Constraint; }; //! \brief A parameter which is a vector of strings. -class COptionalStrVecParameter : public CParameter -{ - public: - COptionalStrVecParameter(CAutoconfigurerParams::TOptionalStrVec &value) : - m_Value(value), - m_Constraint(new CUnconstrained) - {} - COptionalStrVecParameter(CAutoconfigurerParams::TOptionalStrVec &value, - const CConstraint *constraint) : - m_Value(value), - m_Constraint(constraint) - {} - - virtual bool fromStringImpl(const std::string &value) - { - std::string remainder; - TStrVec value_; - core::CStringUtils::tokenise(std::string(" "), value, value_, remainder); - if (!remainder.empty()) - { - value_.push_back(remainder); - } - if (!(*m_Constraint)(value_)) - { - LOG_ERROR("'" << core::CContainerPrinter::print(value_) - << "' doesn't satisfy '" << m_Constraint->print() << "'"); - return false; - } - m_Value.reset(TStrVec()); - (*m_Value).swap(value_); - return true; +class COptionalStrVecParameter : public CParameter { +public: + COptionalStrVecParameter(CAutoconfigurerParams::TOptionalStrVec& value) + : m_Value(value), m_Constraint(new CUnconstrained) {} + COptionalStrVecParameter(CAutoconfigurerParams::TOptionalStrVec& value, const CConstraint* constraint) + : m_Value(value), m_Constraint(constraint) {} + + virtual bool fromStringImpl(const std::string& value) { + std::string remainder; + TStrVec value_; + core::CStringUtils::tokenise(std::string(" "), value, value_, remainder); + if (!remainder.empty()) { + value_.push_back(remainder); } + if (!(*m_Constraint)(value_)) { + LOG_ERROR("'" << core::CContainerPrinter::print(value_) << "' doesn't satisfy '" << m_Constraint->print() << "'"); + return false; + } + m_Value.reset(TStrVec()); + (*m_Value).swap(value_); + return true; + } - private: - CAutoconfigurerParams::TOptionalStrVec &m_Value; - boost::shared_ptr> m_Constraint; +private: + CAutoconfigurerParams::TOptionalStrVec& m_Value; + boost::shared_ptr> m_Constraint; }; //! \brief The field data type parameter. -class CFieldDataTypeParameter : public CParameter -{ - public: - CFieldDataTypeParameter(CAutoconfigurerParams::TStrUserDataTypePrVec &value) : m_Value(value) {} - - private: - virtual bool fromStringImpl(const std::string &value) - { - std::string remainder; - TStrVec tokens; - core::CStringUtils::tokenise(std::string(" "), value, tokens, remainder); - if (!remainder.empty()) - { - tokens.push_back(remainder); - } - if (tokens.size() % 2 != 0) - { - LOG_ERROR("Unmatched field and type in '" << value << "'"); - return false; - } +class CFieldDataTypeParameter : public CParameter { +public: + CFieldDataTypeParameter(CAutoconfigurerParams::TStrUserDataTypePrVec& value) : m_Value(value) {} + +private: + virtual bool fromStringImpl(const std::string& value) { + std::string remainder; + TStrVec tokens; + core::CStringUtils::tokenise(std::string(" "), value, tokens, remainder); + if (!remainder.empty()) { + tokens.push_back(remainder); + } + if (tokens.size() % 2 != 0) { + LOG_ERROR("Unmatched field and type in '" << value << "'"); + return false; + } - CAutoconfigurerParams::TStrUserDataTypePrVec value_; - value_.reserve(tokens.size()); - for (std::size_t i = 0u; i < tokens.size(); i += 2) - { - config_t::EUserDataType type; - if (!this->fromString(core::CStringUtils::toLower(tokens[i+1]), type)) - { - LOG_ERROR("Couldn't interpret '" << tokens[i+1] << "' as a data type:" - << " ignoring field data type for '" << tokens[i] << "'"); - continue; - } - value_.push_back(std::make_pair(tokens[i], type)); + CAutoconfigurerParams::TStrUserDataTypePrVec value_; + value_.reserve(tokens.size()); + for (std::size_t i = 0u; i < tokens.size(); i += 2) { + config_t::EUserDataType type; + if (!this->fromString(core::CStringUtils::toLower(tokens[i + 1]), type)) { + LOG_ERROR("Couldn't interpret '" << tokens[i + 1] << "' as a data type:" + << " ignoring field data type for '" << tokens[i] << "'"); + continue; } - std::sort(value_.begin(), value_.end(), maths::COrderings::SFirstLess()); - m_Value.swap(value_); - - return true; + value_.push_back(std::make_pair(tokens[i], type)); } + std::sort(value_.begin(), value_.end(), maths::COrderings::SFirstLess()); + m_Value.swap(value_); + + return true; + } - bool fromString(const std::string &value, config_t::EUserDataType &type) const - { - for (int i = config_t::E_UserCategorical; i <= config_t::E_UserNumeric; ++i) - { - type = static_cast(i); - if (value == config_t::print(type)) - { - return true; - } + bool fromString(const std::string& value, config_t::EUserDataType& type) const { + for (int i = config_t::E_UserCategorical; i <= config_t::E_UserNumeric; ++i) { + type = static_cast(i); + if (value == config_t::print(type)) { + return true; } - return false; } + return false; + } - private: - CAutoconfigurerParams::TStrUserDataTypePrVec &m_Value; +private: + CAutoconfigurerParams::TStrUserDataTypePrVec& m_Value; }; //! \brief The function category parameter. -class CFunctionCategoryParameter : public CParameter -{ - public: - CFunctionCategoryParameter(CAutoconfigurerParams::TFunctionCategoryVec &value) : - m_Value(value), - m_Constraint(new CUnconstrained) - {} - CFunctionCategoryParameter(CAutoconfigurerParams::TFunctionCategoryVec &value, - const CConstraint *constraint) : - m_Value(value), - m_Constraint(constraint) - {} - - private: - virtual bool fromStringImpl(const std::string &value) - { - std::string remainder; - TStrVec tokens; - core::CStringUtils::tokenise(std::string(" "), value, tokens, remainder); - if (!remainder.empty()) - { - tokens.push_back(remainder); - } - std::sort(tokens.begin(), tokens.end()); - tokens.erase(std::unique(tokens.begin(), tokens.end()), tokens.end()); - - CAutoconfigurerParams::TFunctionCategoryVec value_; - value_.reserve(tokens.size()); - for (std::size_t i = 0u; i < tokens.size(); ++i) - { - config_t::EFunctionCategory function; - if (!this->fromString(core::CStringUtils::toLower(tokens[i]), function)) - { - LOG_ERROR("Couldn't interpret '" << tokens[i] << "' as a function"); - return false; - } - value_.push_back(function); - } - std::sort(value_.begin(), value_.end()); - if (!(*m_Constraint)(value_)) - { - LOG_ERROR("'" << core::CContainerPrinter::print(value_) - << "' doesn't satisfy '" << m_Constraint->print() << "'"); +class CFunctionCategoryParameter : public CParameter { +public: + CFunctionCategoryParameter(CAutoconfigurerParams::TFunctionCategoryVec& value) + : m_Value(value), m_Constraint(new CUnconstrained) {} + CFunctionCategoryParameter(CAutoconfigurerParams::TFunctionCategoryVec& value, + const CConstraint* constraint) + : m_Value(value), m_Constraint(constraint) {} + +private: + virtual bool fromStringImpl(const std::string& value) { + std::string remainder; + TStrVec tokens; + core::CStringUtils::tokenise(std::string(" "), value, tokens, remainder); + if (!remainder.empty()) { + tokens.push_back(remainder); + } + std::sort(tokens.begin(), tokens.end()); + tokens.erase(std::unique(tokens.begin(), tokens.end()), tokens.end()); + + CAutoconfigurerParams::TFunctionCategoryVec value_; + value_.reserve(tokens.size()); + for (std::size_t i = 0u; i < tokens.size(); ++i) { + config_t::EFunctionCategory function; + if (!this->fromString(core::CStringUtils::toLower(tokens[i]), function)) { + LOG_ERROR("Couldn't interpret '" << tokens[i] << "' as a function"); return false; } - m_Value.swap(value_); - - return true; + value_.push_back(function); } + std::sort(value_.begin(), value_.end()); + if (!(*m_Constraint)(value_)) { + LOG_ERROR("'" << core::CContainerPrinter::print(value_) << "' doesn't satisfy '" << m_Constraint->print() << "'"); + return false; + } + m_Value.swap(value_); + + return true; + } - bool fromString(const std::string &value, config_t::EFunctionCategory &function) const - { - for (int i = config_t::E_Count; i <= config_t::E_Median; ++i) - { - function = static_cast(i); - if (value == config_t::print(function)) - { - return true; - } + bool fromString(const std::string& value, config_t::EFunctionCategory& function) const { + for (int i = config_t::E_Count; i <= config_t::E_Median; ++i) { + function = static_cast(i); + if (value == config_t::print(function)) { + return true; } - return false; } + return false; + } - private: - CAutoconfigurerParams::TFunctionCategoryVec &m_Value; - boost::shared_ptr> m_Constraint; +private: + CAutoconfigurerParams::TFunctionCategoryVec& m_Value; + boost::shared_ptr> m_Constraint; }; //! boost::ini_parser doesn't like UTF-8 ini files that begin with //! byte order markers. This function advances the seek pointer of //! the stream over a UTF-8 BOM, but only if one exists. -void skipUtf8Bom(std::ifstream &strm) -{ - if (strm.tellg() != std::streampos(0)) - { +void skipUtf8Bom(std::ifstream& strm) { + if (strm.tellg() != std::streampos(0)) { return; } std::ios_base::iostate origState(strm.rdstate()); // The 3 bytes 0xEF, 0xBB, 0xBF form a UTF-8 byte order marker (BOM) - if (strm.get() == 0xEF && strm.get() == 0xBB && strm.get() == 0xBF) - { + if (strm.get() == 0xEF && strm.get() == 0xBB && strm.get() == 0xBF) { LOG_DEBUG("Skipping UTF-8 BOM"); return; } @@ -524,42 +402,30 @@ void skipUtf8Bom(std::ifstream &strm) //! Helper method for CAutoconfigurerParams::init() to extract parameter //! value from the property file. -static bool processSetting(const boost::property_tree::ptree &propTree, - const std::string &iniPath, - CParameter ¶meter) -{ - try - { +static bool processSetting(const boost::property_tree::ptree& propTree, const std::string& iniPath, CParameter& parameter) { + try { // This get() will throw an exception if the path isn't found std::string value = propTree.get(iniPath); // Use our own string-to-type conversion, because what's built // into the boost::property_tree is too lax. - if (!parameter.fromString(value)) - { + if (!parameter.fromString(value)) { LOG_ERROR("Invalid value for setting '" << iniPath << "' : " << value); return false; } - } - catch (boost::property_tree::ptree_error &) - { - LOG_INFO("Keeping default value for unspecified setting '" << iniPath << "'"); - } + } catch (boost::property_tree::ptree_error&) { LOG_INFO("Keeping default value for unspecified setting '" << iniPath << "'"); } return true; } //! Check if value can be used for one of the detector fields. -bool canUse(const CAutoconfigurerParams::TOptionalStrVec &primary, - const CAutoconfigurerParams::TOptionalStrVec &secondary, - const std::string &value) -{ - if (primary) - { +bool canUse(const CAutoconfigurerParams::TOptionalStrVec& primary, + const CAutoconfigurerParams::TOptionalStrVec& secondary, + const std::string& value) { + if (primary) { return std::find(primary->begin(), primary->end(), value) != primary->end(); } - if (secondary) - { + if (secondary) { return std::find(secondary->begin(), secondary->end(), value) != secondary->end(); } return true; @@ -570,19 +436,16 @@ const std::size_t MINIMUM_RECORDS_TO_ATTEMPT_CONFIG(10000); const double MINIMUM_DETECTOR_SCORE(0.1); const std::size_t NUMBER_OF_MOST_FREQUENT_FIELDS_COUNTS(10); std::string DEFAULT_DETECTOR_CONFIG_LINE_ENDING("\n"); -const config_t::EFunctionCategory FUNCTION_CATEGORIES[] = - { - config_t::E_Count, - config_t::E_Rare, - config_t::E_DistinctCount, - config_t::E_InfoContent, - config_t::E_Mean, - config_t::E_Min, - config_t::E_Max, - config_t::E_Sum, - config_t::E_Varp, - config_t::E_Median - }; +const config_t::EFunctionCategory FUNCTION_CATEGORIES[] = {config_t::E_Count, + config_t::E_Rare, + config_t::E_DistinctCount, + config_t::E_InfoContent, + config_t::E_Mean, + config_t::E_Min, + config_t::E_Max, + config_t::E_Sum, + config_t::E_Varp, + config_t::E_Median}; const std::size_t HIGH_NUMBER_BY_FIELD_VALUES(500); const std::size_t MAXIMUM_NUMBER_BY_FIELD_VALUES(1000); const std::size_t HIGH_NUMBER_RARE_BY_FIELD_VALUES(50000); @@ -595,26 +458,11 @@ const double HIGH_CARDINALITY_IN_TAIL_FACTOR(1.1); const uint64_t HIGH_CARDINALITY_IN_TAIL_INCREMENT(10); const double HIGH_CARDINALITY_HIGH_TAIL_FRACTION(0.005); const double HIGH_CARDINALITY_MAXIMUM_TAIL_FRACTION(0.05); -const double LOW_POPULATED_BUCKET_FRACTIONS[] = - { - 1.0 / 3.0, 1.0 / 50.0 - }; -const double MINIMUM_POPULATED_BUCKET_FRACTIONS[] = - { - 1.0 / 50.0, 1.0 / 500.0 - }; -const double HIGH_POPULATED_BUCKET_FRACTIONS[] = - { - 1.1, 1.0 / 10.0 - }; -const double MAXIMUM_POPULATED_BUCKET_FRACTIONS[] = - { - 1.2, 5.0 / 10.0 - }; -const core_t::TTime CANDIDATE_BUCKET_LENGTHS[] = - { - 60, 300, 600, 1800, 3600, 7200, 14400, constants::LONGEST_BUCKET_LENGTH - }; +const double LOW_POPULATED_BUCKET_FRACTIONS[] = {1.0 / 3.0, 1.0 / 50.0}; +const double MINIMUM_POPULATED_BUCKET_FRACTIONS[] = {1.0 / 50.0, 1.0 / 500.0}; +const double HIGH_POPULATED_BUCKET_FRACTIONS[] = {1.1, 1.0 / 10.0}; +const double MAXIMUM_POPULATED_BUCKET_FRACTIONS[] = {1.2, 5.0 / 10.0}; +const core_t::TTime CANDIDATE_BUCKET_LENGTHS[] = {60, 300, 600, 1800, 3600, 7200, 14400, constants::LONGEST_BUCKET_LENGTH}; const double LOW_NUMBER_OF_BUCKETS_FOR_CONFIG(500.0); const double MINIMUM_NUMBER_OF_BUCKETS_FOR_CONFIG(50.0); const double POLLED_DATA_MINIMUM_MASS_AT_INTERVAL(0.99); @@ -629,554 +477,462 @@ const double LOW_ENTROPY_FOR_INFO_CONTENT(0.01); const double MINIMUM_ENTROPY_FOR_INFO_CONTENT(1e-6); const double LOW_DISTINCT_COUNT_FOR_INFO_CONTENT(500000.0); const double MINIMUM_DISTINCT_COUNT_FOR_INFO_CONTENT(5000.0); - } -CAutoconfigurerParams::CAutoconfigurerParams(const std::string &timeFieldName, - const std::string &timeFieldFormat, +CAutoconfigurerParams::CAutoconfigurerParams(const std::string& timeFieldName, + const std::string& timeFieldFormat, bool verbose, - bool writeDetectorConfigs) : - m_TimeFieldName(timeFieldName), - m_TimeFieldFormat(timeFieldFormat), - m_Verbose(verbose), - m_WriteDetectorConfigs(writeDetectorConfigs), - m_DetectorConfigLineEnding(DEFAULT_DETECTOR_CONFIG_LINE_ENDING), - m_FunctionCategoriesToConfigure(boost::begin(FUNCTION_CATEGORIES), boost::end(FUNCTION_CATEGORIES)), - m_MinimumExamplesToClassify(MINIMUM_EXAMPLES_TO_CLASSIFY), - m_NumberOfMostFrequentFieldsCounts(NUMBER_OF_MOST_FREQUENT_FIELDS_COUNTS), - m_MinimumRecordsToAttemptConfig(MINIMUM_RECORDS_TO_ATTEMPT_CONFIG), - m_MinimumDetectorScore(MINIMUM_DETECTOR_SCORE), - m_HighNumberByFieldValues(HIGH_NUMBER_BY_FIELD_VALUES), - m_MaximumNumberByFieldValues(MAXIMUM_NUMBER_BY_FIELD_VALUES), - m_HighNumberRareByFieldValues(HIGH_NUMBER_RARE_BY_FIELD_VALUES), - m_MaximumNumberRareByFieldValues(MAXIMUM_NUMBER_RARE_BY_FIELD_VALUES), - m_HighNumberPartitionFieldValues(HIGH_NUMBER_PARTITION_FIELD_VALUES), - m_MaximumNumberPartitionFieldValues(MAXIMUM_NUMBER_PARTITION_FIELD_VALUES), - m_LowNumberOverFieldValues(LOW_NUMBER_OVER_FIELD_VALUES), - m_MinimumNumberOverFieldValues(MINIMUM_NUMBER_OVER_FIELD_VALUES), - m_HighCardinalityInTailFactor(HIGH_CARDINALITY_IN_TAIL_FACTOR), - m_HighCardinalityInTailIncrement(HIGH_CARDINALITY_IN_TAIL_INCREMENT), - m_HighCardinalityHighTailFraction(HIGH_CARDINALITY_HIGH_TAIL_FRACTION), - m_HighCardinalityMaximumTailFraction(HIGH_CARDINALITY_MAXIMUM_TAIL_FRACTION), - m_LowPopulatedBucketFractions(boost::begin(LOW_POPULATED_BUCKET_FRACTIONS), boost::end(LOW_POPULATED_BUCKET_FRACTIONS)), - m_MinimumPopulatedBucketFractions(boost::begin(MINIMUM_POPULATED_BUCKET_FRACTIONS), boost::end(MINIMUM_POPULATED_BUCKET_FRACTIONS)), - m_HighPopulatedBucketFractions(boost::begin(HIGH_POPULATED_BUCKET_FRACTIONS), boost::end(HIGH_POPULATED_BUCKET_FRACTIONS)), - m_MaximumPopulatedBucketFractions(boost::begin(MAXIMUM_POPULATED_BUCKET_FRACTIONS), boost::end(MAXIMUM_POPULATED_BUCKET_FRACTIONS)), - m_CandidateBucketLengths(boost::begin(CANDIDATE_BUCKET_LENGTHS), boost::end(CANDIDATE_BUCKET_LENGTHS)), - m_LowNumberOfBucketsForConfig(LOW_NUMBER_OF_BUCKETS_FOR_CONFIG), - m_MinimumNumberOfBucketsForConfig(MINIMUM_NUMBER_OF_BUCKETS_FOR_CONFIG), - m_PolledDataMinimumMassAtInterval(POLLED_DATA_MINIMUM_MASS_AT_INTERVAL), - m_PolledDataJitter(POLLED_DATA_JITTER), - m_LowCoefficientOfVariation(LOW_COEFFICIENT_OF_VARIATION), - m_MinimumCoefficientOfVariation(MINIMUM_COEFFICIENT_OF_VARIATION), - m_LowLengthRangeForInfoContent(LOW_LENGTH_RANGE_FOR_INFO_CONTENT), - m_MinimumLengthRangeForInfoContent(MINIMUM_LENGTH_RANGE_FOR_INFO_CONTENT), - m_LowMaximumLengthForInfoContent(LOW_MAXIMUM_LENGTH_FOR_INFO_CONTENT), - m_MinimumMaximumLengthForInfoContent(MINIMUM_MAXIMUM_LENGTH_FOR_INFO_CONTENT), - m_LowEntropyForInfoContent(LOW_ENTROPY_FOR_INFO_CONTENT), - m_MinimumEntropyForInfoContent(MINIMUM_ENTROPY_FOR_INFO_CONTENT), - m_LowDistinctCountForInfoContent(LOW_DISTINCT_COUNT_FOR_INFO_CONTENT), - m_MinimumDistinctCountForInfoContent(MINIMUM_DISTINCT_COUNT_FOR_INFO_CONTENT) -{ + bool writeDetectorConfigs) + : m_TimeFieldName(timeFieldName), + m_TimeFieldFormat(timeFieldFormat), + m_Verbose(verbose), + m_WriteDetectorConfigs(writeDetectorConfigs), + m_DetectorConfigLineEnding(DEFAULT_DETECTOR_CONFIG_LINE_ENDING), + m_FunctionCategoriesToConfigure(boost::begin(FUNCTION_CATEGORIES), boost::end(FUNCTION_CATEGORIES)), + m_MinimumExamplesToClassify(MINIMUM_EXAMPLES_TO_CLASSIFY), + m_NumberOfMostFrequentFieldsCounts(NUMBER_OF_MOST_FREQUENT_FIELDS_COUNTS), + m_MinimumRecordsToAttemptConfig(MINIMUM_RECORDS_TO_ATTEMPT_CONFIG), + m_MinimumDetectorScore(MINIMUM_DETECTOR_SCORE), + m_HighNumberByFieldValues(HIGH_NUMBER_BY_FIELD_VALUES), + m_MaximumNumberByFieldValues(MAXIMUM_NUMBER_BY_FIELD_VALUES), + m_HighNumberRareByFieldValues(HIGH_NUMBER_RARE_BY_FIELD_VALUES), + m_MaximumNumberRareByFieldValues(MAXIMUM_NUMBER_RARE_BY_FIELD_VALUES), + m_HighNumberPartitionFieldValues(HIGH_NUMBER_PARTITION_FIELD_VALUES), + m_MaximumNumberPartitionFieldValues(MAXIMUM_NUMBER_PARTITION_FIELD_VALUES), + m_LowNumberOverFieldValues(LOW_NUMBER_OVER_FIELD_VALUES), + m_MinimumNumberOverFieldValues(MINIMUM_NUMBER_OVER_FIELD_VALUES), + m_HighCardinalityInTailFactor(HIGH_CARDINALITY_IN_TAIL_FACTOR), + m_HighCardinalityInTailIncrement(HIGH_CARDINALITY_IN_TAIL_INCREMENT), + m_HighCardinalityHighTailFraction(HIGH_CARDINALITY_HIGH_TAIL_FRACTION), + m_HighCardinalityMaximumTailFraction(HIGH_CARDINALITY_MAXIMUM_TAIL_FRACTION), + m_LowPopulatedBucketFractions(boost::begin(LOW_POPULATED_BUCKET_FRACTIONS), boost::end(LOW_POPULATED_BUCKET_FRACTIONS)), + m_MinimumPopulatedBucketFractions(boost::begin(MINIMUM_POPULATED_BUCKET_FRACTIONS), boost::end(MINIMUM_POPULATED_BUCKET_FRACTIONS)), + m_HighPopulatedBucketFractions(boost::begin(HIGH_POPULATED_BUCKET_FRACTIONS), boost::end(HIGH_POPULATED_BUCKET_FRACTIONS)), + m_MaximumPopulatedBucketFractions(boost::begin(MAXIMUM_POPULATED_BUCKET_FRACTIONS), boost::end(MAXIMUM_POPULATED_BUCKET_FRACTIONS)), + m_CandidateBucketLengths(boost::begin(CANDIDATE_BUCKET_LENGTHS), boost::end(CANDIDATE_BUCKET_LENGTHS)), + m_LowNumberOfBucketsForConfig(LOW_NUMBER_OF_BUCKETS_FOR_CONFIG), + m_MinimumNumberOfBucketsForConfig(MINIMUM_NUMBER_OF_BUCKETS_FOR_CONFIG), + m_PolledDataMinimumMassAtInterval(POLLED_DATA_MINIMUM_MASS_AT_INTERVAL), + m_PolledDataJitter(POLLED_DATA_JITTER), + m_LowCoefficientOfVariation(LOW_COEFFICIENT_OF_VARIATION), + m_MinimumCoefficientOfVariation(MINIMUM_COEFFICIENT_OF_VARIATION), + m_LowLengthRangeForInfoContent(LOW_LENGTH_RANGE_FOR_INFO_CONTENT), + m_MinimumLengthRangeForInfoContent(MINIMUM_LENGTH_RANGE_FOR_INFO_CONTENT), + m_LowMaximumLengthForInfoContent(LOW_MAXIMUM_LENGTH_FOR_INFO_CONTENT), + m_MinimumMaximumLengthForInfoContent(MINIMUM_MAXIMUM_LENGTH_FOR_INFO_CONTENT), + m_LowEntropyForInfoContent(LOW_ENTROPY_FOR_INFO_CONTENT), + m_MinimumEntropyForInfoContent(MINIMUM_ENTROPY_FOR_INFO_CONTENT), + m_LowDistinctCountForInfoContent(LOW_DISTINCT_COUNT_FOR_INFO_CONTENT), + m_MinimumDistinctCountForInfoContent(MINIMUM_DISTINCT_COUNT_FOR_INFO_CONTENT) { this->refreshPenaltyIndices(); } -bool CAutoconfigurerParams::init(const std::string &file) -{ - if (file.empty()) - { +bool CAutoconfigurerParams::init(const std::string& file) { + if (file.empty()) { return true; } using TParameterPtr = boost::shared_ptr; boost::property_tree::ptree propTree; - try - { + try { std::ifstream strm(file.c_str()); - if (!strm.is_open()) - { + if (!strm.is_open()) { LOG_ERROR("Error opening file " << file); return false; } skipUtf8Bom(strm); boost::property_tree::ini_parser::read_ini(strm, propTree); - } - catch (boost::property_tree::ptree_error &e) - { + } catch (boost::property_tree::ptree_error& e) { LOG_ERROR("Error reading file " << file << " : " << e.what()); return false; } static const core_t::TTime ZERO_TIME = 0; static const double ZERO_DOUBLE = 0.0; - static const double ONE_DOUBLE = 1.0; - static const std::string LABELS[] = - { - std::string("scope.fields_of_interest"), - std::string("scope.permitted_argument_fields"), - std::string("scope.permitted_by_fields"), - std::string("scope.permitted_over_fields"), - std::string("scope.permitted_partition_fields"), - std::string("scope.functions_of_interest"), - std::string("statistics.field_data_types"), - std::string("statistics.minimum_examples_to_classify"), - std::string("statistics.number_of_most_frequent_to_count"), - std::string("configuration.minimum_records_to_attempt_config"), - std::string("configuration.high_number_of_by_fields"), - std::string("configuration.maximum_number_of_by_fields"), - std::string("configuration.high_number_of_rare_by_fields"), - std::string("configuration.maximum_number_of_rare_by_fields"), - std::string("configuration.high_number_of_partition_fields"), - std::string("configuration.maximum_of_number_partition_fields"), - std::string("configuration.low_number_of_over_fields"), - std::string("configuration.minimum_number_of_over_fields"), - std::string("configuration.high_cardinality_in_tail_factor"), - std::string("configuration.high_cardinality_in_tail_increment"), - std::string("configuration.high_cardinality_high_tail_fraction"), - std::string("configuration.high_cardinality_maximum_tail_fraction"), - std::string("configuration.low_populated_bucket_ratio"), - std::string("configuration.minimum_populated_bucket_ratio"), - std::string("configuration.high_populated_bucket_ratio"), - std::string("configuration.maximum_populated_bucket_ratio"), - std::string("configuration.candidate_bucket_lengths"), - std::string("configuration.low_number_buckets_for_config"), - std::string("configuration.minimum_number_buckets_for_config"), - std::string("configuration.polled_data_minimum_mass_at_interval"), - std::string("configuration.polled_data_jitter"), - std::string("configuration.low_coefficient_of_variation"), - std::string("configuration.minimum_coefficient_of_variation"), - std::string("configuration.low_length_range_for_info_content"), - std::string("configuration.minimum_length_range_for_info_content"), - std::string("configuration.low_maximum_length_for_info_content"), - std::string("configuration.minimum_maximum_length_for_info_content"), - std::string("configuration.low_entropy_for_info_content"), - std::string("configuration.minimum_entropy_for_info_content"), - std::string("configuration.low_distinct_count_for_info_content"), - std::string("configuration.minimum_distinct_count_for_info_content") - - }; - TParameterPtr parameters[] = - { - TParameterPtr(new COptionalStrVecParameter(m_FieldsOfInterest, new CNotEmpty)), - TParameterPtr(new COptionalStrVecParameter(m_FieldsToUseInAutoconfigureByRole[constants::ARGUMENT_INDEX])), - TParameterPtr(new COptionalStrVecParameter(m_FieldsToUseInAutoconfigureByRole[constants::BY_INDEX])), - TParameterPtr(new COptionalStrVecParameter(m_FieldsToUseInAutoconfigureByRole[constants::OVER_INDEX])), - TParameterPtr(new COptionalStrVecParameter(m_FieldsToUseInAutoconfigureByRole[constants::PARTITION_INDEX])), - TParameterPtr(new CFunctionCategoryParameter(m_FunctionCategoriesToConfigure)), - TParameterPtr(new CFieldDataTypeParameter(m_FieldDataTypes)), - TParameterPtr(new CBuiltinParameter(m_MinimumExamplesToClassify)), - TParameterPtr(new CBuiltinParameter(m_NumberOfMostFrequentFieldsCounts)), - TParameterPtr(new CBuiltinParameter( - m_MinimumRecordsToAttemptConfig, - new CValueIs(m_MinimumExamplesToClassify))), - TParameterPtr(new CBuiltinParameter(m_HighNumberByFieldValues)), - TParameterPtr(new CBuiltinParameter( - m_MaximumNumberByFieldValues, - new CValueIs(m_HighNumberByFieldValues))), - TParameterPtr(new CBuiltinParameter(m_HighNumberRareByFieldValues)), - TParameterPtr(new CBuiltinParameter( - m_MaximumNumberRareByFieldValues, - new CValueIs(m_HighNumberRareByFieldValues))), - TParameterPtr(new CBuiltinParameter(m_HighNumberPartitionFieldValues)), - TParameterPtr(new CBuiltinParameter( - m_MaximumNumberPartitionFieldValues, - new CValueIs(m_HighNumberPartitionFieldValues))), - TParameterPtr(new CBuiltinParameter(m_LowNumberOverFieldValues)), - TParameterPtr(new CBuiltinParameter( - m_MinimumNumberOverFieldValues, - new CValueIs(m_LowNumberOverFieldValues))), - TParameterPtr(new CBuiltinParameter( - m_HighCardinalityInTailFactor, - new CValueIs(ONE_DOUBLE))), - TParameterPtr(new CBuiltinParameter(m_HighCardinalityInTailIncrement)), - TParameterPtr(new CBuiltinParameter( - m_HighCardinalityHighTailFraction, - (new CConstraintConjunction) - ->addConstraint(new CValueIs(ZERO_DOUBLE)) - ->addConstraint(new CValueIs(ONE_DOUBLE)))), - TParameterPtr(new CBuiltinParameter( - m_HighCardinalityMaximumTailFraction, - (new CConstraintConjunction) - ->addConstraint(new CValueIs(m_HighCardinalityHighTailFraction)) - ->addConstraint(new CValueIs(ONE_DOUBLE)))), - TParameterPtr(new CBuiltinVectorParameter( - m_LowPopulatedBucketFractions, - (new CConstraintConjunction) - ->addConstraint(new CValueIs(ZERO_DOUBLE)) - ->addConstraint(new CValueIs(ONE_DOUBLE)) - ->addConstraint(new CSizeIs(2)))), - TParameterPtr(new CBuiltinVectorParameter( - m_MinimumPopulatedBucketFractions, - (new CConstraintConjunction) - ->addConstraint(new CValueIs(ZERO_DOUBLE)) - ->addConstraint(new CVectorValueIs(m_LowPopulatedBucketFractions)) - ->addConstraint(new CSizeIs(2)))), - TParameterPtr(new CBuiltinParameter( - m_HighPopulatedBucketFractions[1], - (new CConstraintConjunction) - ->addConstraint(new CValueIs(ZERO_DOUBLE)) - ->addConstraint(new CValueIs(ONE_DOUBLE)))), - TParameterPtr(new CBuiltinParameter( - m_MaximumPopulatedBucketFractions[1], - (new CConstraintConjunction) - ->addConstraint(new CVectorValueIs(m_HighPopulatedBucketFractions)) - ->addConstraint(new CValueIs(ONE_DOUBLE)))), - TParameterPtr(new CBuiltinVectorParameter( - m_CandidateBucketLengths, - (new CConstraintConjunction) - ->addConstraint(new CValueIs(ZERO_TIME)) - ->addConstraint(new CNotEmpty))), - TParameterPtr(new CBuiltinParameter( - m_LowNumberOfBucketsForConfig, - new CValueIs(ZERO_DOUBLE))), - TParameterPtr(new CBuiltinParameter( - m_MinimumNumberOfBucketsForConfig, - (new CConstraintConjunction) - ->addConstraint(new CValueIs(ZERO_DOUBLE)) - ->addConstraint(new CValueIs(m_LowNumberOfBucketsForConfig)))), - TParameterPtr(new CBuiltinParameter( - m_PolledDataMinimumMassAtInterval, - (new CConstraintConjunction) - ->addConstraint(new CValueIs(ZERO_DOUBLE)) - ->addConstraint(new CValueIs(ONE_DOUBLE)))), - TParameterPtr(new CBuiltinParameter( - m_PolledDataJitter, - (new CConstraintConjunction) - ->addConstraint(new CValueIs(ZERO_DOUBLE)) - ->addConstraint(new CValueIs(ONE_DOUBLE)))), - TParameterPtr(new CBuiltinParameter( - m_LowCoefficientOfVariation, - (new CConstraintConjunction) - ->addConstraint(new CValueIs(ZERO_DOUBLE)))), - TParameterPtr(new CBuiltinParameter( - m_MinimumCoefficientOfVariation, - (new CConstraintConjunction) - ->addConstraint(new CValueIs(ZERO_DOUBLE)) - ->addConstraint(new CValueIs(m_LowCoefficientOfVariation)))), - TParameterPtr(new CBuiltinParameter( - m_LowLengthRangeForInfoContent, - (new CConstraintConjunction) - ->addConstraint(new CValueIs(ZERO_DOUBLE)))), - TParameterPtr(new CBuiltinParameter( - m_MinimumLengthRangeForInfoContent, - (new CConstraintConjunction) - ->addConstraint(new CValueIs(ZERO_DOUBLE)) - ->addConstraint(new CValueIs(m_LowLengthRangeForInfoContent)))), - TParameterPtr(new CBuiltinParameter( - m_LowMaximumLengthForInfoContent, - (new CConstraintConjunction) - ->addConstraint(new CValueIs(ZERO_DOUBLE)))), - TParameterPtr(new CBuiltinParameter( - m_MinimumMaximumLengthForInfoContent, - (new CConstraintConjunction) - ->addConstraint(new CValueIs(ZERO_DOUBLE)) - ->addConstraint(new CValueIs(m_LowMaximumLengthForInfoContent)))), - TParameterPtr(new CBuiltinParameter( - m_LowEntropyForInfoContent, - (new CConstraintConjunction) - ->addConstraint(new CValueIs(ZERO_DOUBLE)) - ->addConstraint(new CValueIs(ONE_DOUBLE)))), - TParameterPtr(new CBuiltinParameter( - m_MinimumEntropyForInfoContent, - (new CConstraintConjunction) - ->addConstraint(new CValueIs(ZERO_DOUBLE)) - ->addConstraint(new CValueIs(m_LowEntropyForInfoContent)))), - TParameterPtr(new CBuiltinParameter( - m_LowDistinctCountForInfoContent, - (new CConstraintConjunction) - ->addConstraint(new CValueIs(ZERO_DOUBLE)))), - TParameterPtr(new CBuiltinParameter( - m_MinimumDistinctCountForInfoContent, - (new CConstraintConjunction) - ->addConstraint(new CValueIs(ZERO_DOUBLE)) - ->addConstraint(new CValueIs(m_LowDistinctCountForInfoContent)))) - }; + static const double ONE_DOUBLE = 1.0; + static const std::string LABELS[] = {std::string("scope.fields_of_interest"), + std::string("scope.permitted_argument_fields"), + std::string("scope.permitted_by_fields"), + std::string("scope.permitted_over_fields"), + std::string("scope.permitted_partition_fields"), + std::string("scope.functions_of_interest"), + std::string("statistics.field_data_types"), + std::string("statistics.minimum_examples_to_classify"), + std::string("statistics.number_of_most_frequent_to_count"), + std::string("configuration.minimum_records_to_attempt_config"), + std::string("configuration.high_number_of_by_fields"), + std::string("configuration.maximum_number_of_by_fields"), + std::string("configuration.high_number_of_rare_by_fields"), + std::string("configuration.maximum_number_of_rare_by_fields"), + std::string("configuration.high_number_of_partition_fields"), + std::string("configuration.maximum_of_number_partition_fields"), + std::string("configuration.low_number_of_over_fields"), + std::string("configuration.minimum_number_of_over_fields"), + std::string("configuration.high_cardinality_in_tail_factor"), + std::string("configuration.high_cardinality_in_tail_increment"), + std::string("configuration.high_cardinality_high_tail_fraction"), + std::string("configuration.high_cardinality_maximum_tail_fraction"), + std::string("configuration.low_populated_bucket_ratio"), + std::string("configuration.minimum_populated_bucket_ratio"), + std::string("configuration.high_populated_bucket_ratio"), + std::string("configuration.maximum_populated_bucket_ratio"), + std::string("configuration.candidate_bucket_lengths"), + std::string("configuration.low_number_buckets_for_config"), + std::string("configuration.minimum_number_buckets_for_config"), + std::string("configuration.polled_data_minimum_mass_at_interval"), + std::string("configuration.polled_data_jitter"), + std::string("configuration.low_coefficient_of_variation"), + std::string("configuration.minimum_coefficient_of_variation"), + std::string("configuration.low_length_range_for_info_content"), + std::string("configuration.minimum_length_range_for_info_content"), + std::string("configuration.low_maximum_length_for_info_content"), + std::string("configuration.minimum_maximum_length_for_info_content"), + std::string("configuration.low_entropy_for_info_content"), + std::string("configuration.minimum_entropy_for_info_content"), + std::string("configuration.low_distinct_count_for_info_content"), + std::string("configuration.minimum_distinct_count_for_info_content") + + }; + TParameterPtr parameters[] = { + TParameterPtr(new COptionalStrVecParameter(m_FieldsOfInterest, new CNotEmpty)), + TParameterPtr(new COptionalStrVecParameter(m_FieldsToUseInAutoconfigureByRole[constants::ARGUMENT_INDEX])), + TParameterPtr(new COptionalStrVecParameter(m_FieldsToUseInAutoconfigureByRole[constants::BY_INDEX])), + TParameterPtr(new COptionalStrVecParameter(m_FieldsToUseInAutoconfigureByRole[constants::OVER_INDEX])), + TParameterPtr(new COptionalStrVecParameter(m_FieldsToUseInAutoconfigureByRole[constants::PARTITION_INDEX])), + TParameterPtr(new CFunctionCategoryParameter(m_FunctionCategoriesToConfigure)), + TParameterPtr(new CFieldDataTypeParameter(m_FieldDataTypes)), + TParameterPtr(new CBuiltinParameter(m_MinimumExamplesToClassify)), + TParameterPtr(new CBuiltinParameter(m_NumberOfMostFrequentFieldsCounts)), + TParameterPtr(new CBuiltinParameter(m_MinimumRecordsToAttemptConfig, + new CValueIs(m_MinimumExamplesToClassify))), + TParameterPtr(new CBuiltinParameter(m_HighNumberByFieldValues)), + TParameterPtr(new CBuiltinParameter(m_MaximumNumberByFieldValues, + new CValueIs(m_HighNumberByFieldValues))), + TParameterPtr(new CBuiltinParameter(m_HighNumberRareByFieldValues)), + TParameterPtr(new CBuiltinParameter(m_MaximumNumberRareByFieldValues, + new CValueIs(m_HighNumberRareByFieldValues))), + TParameterPtr(new CBuiltinParameter(m_HighNumberPartitionFieldValues)), + TParameterPtr(new CBuiltinParameter(m_MaximumNumberPartitionFieldValues, + new CValueIs(m_HighNumberPartitionFieldValues))), + TParameterPtr(new CBuiltinParameter(m_LowNumberOverFieldValues)), + TParameterPtr(new CBuiltinParameter(m_MinimumNumberOverFieldValues, + new CValueIs(m_LowNumberOverFieldValues))), + TParameterPtr(new CBuiltinParameter(m_HighCardinalityInTailFactor, new CValueIs(ONE_DOUBLE))), + TParameterPtr(new CBuiltinParameter(m_HighCardinalityInTailIncrement)), + TParameterPtr(new CBuiltinParameter(m_HighCardinalityHighTailFraction, + (new CConstraintConjunction) + ->addConstraint(new CValueIs(ZERO_DOUBLE)) + ->addConstraint(new CValueIs(ONE_DOUBLE)))), + TParameterPtr( + new CBuiltinParameter(m_HighCardinalityMaximumTailFraction, + (new CConstraintConjunction) + ->addConstraint(new CValueIs(m_HighCardinalityHighTailFraction)) + ->addConstraint(new CValueIs(ONE_DOUBLE)))), + TParameterPtr(new CBuiltinVectorParameter(m_LowPopulatedBucketFractions, + (new CConstraintConjunction) + ->addConstraint(new CValueIs(ZERO_DOUBLE)) + ->addConstraint(new CValueIs(ONE_DOUBLE)) + ->addConstraint(new CSizeIs(2)))), + TParameterPtr(new CBuiltinVectorParameter( + m_MinimumPopulatedBucketFractions, + (new CConstraintConjunction) + ->addConstraint(new CValueIs(ZERO_DOUBLE)) + ->addConstraint(new CVectorValueIs(m_LowPopulatedBucketFractions)) + ->addConstraint(new CSizeIs(2)))), + TParameterPtr(new CBuiltinParameter(m_HighPopulatedBucketFractions[1], + (new CConstraintConjunction) + ->addConstraint(new CValueIs(ZERO_DOUBLE)) + ->addConstraint(new CValueIs(ONE_DOUBLE)))), + TParameterPtr( + new CBuiltinParameter(m_MaximumPopulatedBucketFractions[1], + (new CConstraintConjunction) + ->addConstraint(new CVectorValueIs(m_HighPopulatedBucketFractions)) + ->addConstraint(new CValueIs(ONE_DOUBLE)))), + TParameterPtr(new CBuiltinVectorParameter(m_CandidateBucketLengths, + (new CConstraintConjunction) + ->addConstraint(new CValueIs(ZERO_TIME)) + ->addConstraint(new CNotEmpty))), + TParameterPtr(new CBuiltinParameter(m_LowNumberOfBucketsForConfig, new CValueIs(ZERO_DOUBLE))), + TParameterPtr(new CBuiltinParameter(m_MinimumNumberOfBucketsForConfig, + (new CConstraintConjunction) + ->addConstraint(new CValueIs(ZERO_DOUBLE)) + ->addConstraint(new CValueIs(m_LowNumberOfBucketsForConfig)))), + TParameterPtr(new CBuiltinParameter(m_PolledDataMinimumMassAtInterval, + (new CConstraintConjunction) + ->addConstraint(new CValueIs(ZERO_DOUBLE)) + ->addConstraint(new CValueIs(ONE_DOUBLE)))), + TParameterPtr(new CBuiltinParameter(m_PolledDataJitter, + (new CConstraintConjunction) + ->addConstraint(new CValueIs(ZERO_DOUBLE)) + ->addConstraint(new CValueIs(ONE_DOUBLE)))), + TParameterPtr(new CBuiltinParameter( + m_LowCoefficientOfVariation, + (new CConstraintConjunction)->addConstraint(new CValueIs(ZERO_DOUBLE)))), + TParameterPtr(new CBuiltinParameter(m_MinimumCoefficientOfVariation, + (new CConstraintConjunction) + ->addConstraint(new CValueIs(ZERO_DOUBLE)) + ->addConstraint(new CValueIs(m_LowCoefficientOfVariation)))), + TParameterPtr(new CBuiltinParameter( + m_LowLengthRangeForInfoContent, + (new CConstraintConjunction)->addConstraint(new CValueIs(ZERO_DOUBLE)))), + TParameterPtr(new CBuiltinParameter(m_MinimumLengthRangeForInfoContent, + (new CConstraintConjunction) + ->addConstraint(new CValueIs(ZERO_DOUBLE)) + ->addConstraint(new CValueIs(m_LowLengthRangeForInfoContent)))), + TParameterPtr(new CBuiltinParameter( + m_LowMaximumLengthForInfoContent, + (new CConstraintConjunction)->addConstraint(new CValueIs(ZERO_DOUBLE)))), + TParameterPtr( + new CBuiltinParameter(m_MinimumMaximumLengthForInfoContent, + (new CConstraintConjunction) + ->addConstraint(new CValueIs(ZERO_DOUBLE)) + ->addConstraint(new CValueIs(m_LowMaximumLengthForInfoContent)))), + TParameterPtr(new CBuiltinParameter(m_LowEntropyForInfoContent, + (new CConstraintConjunction) + ->addConstraint(new CValueIs(ZERO_DOUBLE)) + ->addConstraint(new CValueIs(ONE_DOUBLE)))), + TParameterPtr(new CBuiltinParameter(m_MinimumEntropyForInfoContent, + (new CConstraintConjunction) + ->addConstraint(new CValueIs(ZERO_DOUBLE)) + ->addConstraint(new CValueIs(m_LowEntropyForInfoContent)))), + TParameterPtr(new CBuiltinParameter( + m_LowDistinctCountForInfoContent, + (new CConstraintConjunction)->addConstraint(new CValueIs(ZERO_DOUBLE)))), + TParameterPtr( + new CBuiltinParameter(m_MinimumDistinctCountForInfoContent, + (new CConstraintConjunction) + ->addConstraint(new CValueIs(ZERO_DOUBLE)) + ->addConstraint(new CValueIs(m_LowDistinctCountForInfoContent))))}; bool result = true; - for (std::size_t i = 0u; i < boost::size(LABELS); ++i) - { - if (processSetting(propTree, LABELS[i], *parameters[i]) == false) - { + for (std::size_t i = 0u; i < boost::size(LABELS); ++i) { + if (processSetting(propTree, LABELS[i], *parameters[i]) == false) { result = false; } } - if (!result) - { + if (!result) { LOG_ERROR("Error processing config file " << file); } this->refreshPenaltyIndices(); return result; } -const std::string &CAutoconfigurerParams::timeFieldName() const -{ +const std::string& CAutoconfigurerParams::timeFieldName() const { return m_TimeFieldName; } -const std::string &CAutoconfigurerParams::timeFieldFormat() const -{ +const std::string& CAutoconfigurerParams::timeFieldFormat() const { return m_TimeFieldFormat; } -bool CAutoconfigurerParams::verbose() const -{ +bool CAutoconfigurerParams::verbose() const { return m_Verbose; } -bool CAutoconfigurerParams::writeDetectorConfigs() const -{ +bool CAutoconfigurerParams::writeDetectorConfigs() const { return m_WriteDetectorConfigs; } -const std::string &CAutoconfigurerParams::detectorConfigLineEnding() const -{ +const std::string& CAutoconfigurerParams::detectorConfigLineEnding() const { return m_DetectorConfigLineEnding; } -bool CAutoconfigurerParams::fieldOfInterest(const std::string &field) const -{ - if (m_FieldsOfInterest) - { +bool CAutoconfigurerParams::fieldOfInterest(const std::string& field) const { + if (m_FieldsOfInterest) { return std::find(m_FieldsOfInterest->begin(), m_FieldsOfInterest->end(), field) != m_FieldsOfInterest->end(); } return true; } -bool CAutoconfigurerParams::canUseForFunctionArgument(const std::string &argument) const -{ +bool CAutoconfigurerParams::canUseForFunctionArgument(const std::string& argument) const { return canUse(m_FieldsToUseInAutoconfigureByRole[constants::ARGUMENT_INDEX], m_FieldsOfInterest, argument); } -bool CAutoconfigurerParams::canUseForByField(const std::string &by) const -{ +bool CAutoconfigurerParams::canUseForByField(const std::string& by) const { return canUse(m_FieldsToUseInAutoconfigureByRole[constants::BY_INDEX], m_FieldsOfInterest, by); } -bool CAutoconfigurerParams::canUseForOverField(const std::string &over) const -{ +bool CAutoconfigurerParams::canUseForOverField(const std::string& over) const { return canUse(m_FieldsToUseInAutoconfigureByRole[constants::OVER_INDEX], m_FieldsOfInterest, over); } -bool CAutoconfigurerParams::canUseForPartitionField(const std::string &partition) const -{ +bool CAutoconfigurerParams::canUseForPartitionField(const std::string& partition) const { return canUse(m_FieldsToUseInAutoconfigureByRole[constants::PARTITION_INDEX], m_FieldsOfInterest, partition); } -const CAutoconfigurerParams::TFunctionCategoryVec &CAutoconfigurerParams::functionsCategoriesToConfigure() const -{ +const CAutoconfigurerParams::TFunctionCategoryVec& CAutoconfigurerParams::functionsCategoriesToConfigure() const { return m_FunctionCategoriesToConfigure; } -CAutoconfigurerParams::TOptionalUserDataType CAutoconfigurerParams::dataType(const std::string &field) const -{ +CAutoconfigurerParams::TOptionalUserDataType CAutoconfigurerParams::dataType(const std::string& field) const { TStrUserDataTypePrVec::const_iterator result = - std::lower_bound(m_FieldDataTypes.begin(), - m_FieldDataTypes.end(), - field, maths::COrderings::SFirstLess()); - return result != m_FieldDataTypes.end() && result->first == field ? - TOptionalUserDataType(result->second) : TOptionalUserDataType(); + std::lower_bound(m_FieldDataTypes.begin(), m_FieldDataTypes.end(), field, maths::COrderings::SFirstLess()); + return result != m_FieldDataTypes.end() && result->first == field ? TOptionalUserDataType(result->second) : TOptionalUserDataType(); } -uint64_t CAutoconfigurerParams::minimumExamplesToClassify() const -{ +uint64_t CAutoconfigurerParams::minimumExamplesToClassify() const { return m_MinimumExamplesToClassify; } -std::size_t CAutoconfigurerParams::numberOfMostFrequentFieldsCounts() const -{ +std::size_t CAutoconfigurerParams::numberOfMostFrequentFieldsCounts() const { return m_NumberOfMostFrequentFieldsCounts; } -uint64_t CAutoconfigurerParams::minimumRecordsToAttemptConfig() const -{ +uint64_t CAutoconfigurerParams::minimumRecordsToAttemptConfig() const { return m_MinimumRecordsToAttemptConfig; } -double CAutoconfigurerParams::minimumDetectorScore() const -{ +double CAutoconfigurerParams::minimumDetectorScore() const { return m_MinimumDetectorScore; } -std::size_t CAutoconfigurerParams::highNumberByFieldValues() const -{ +std::size_t CAutoconfigurerParams::highNumberByFieldValues() const { return m_HighNumberByFieldValues; } -std::size_t CAutoconfigurerParams::maximumNumberByFieldValues() const -{ +std::size_t CAutoconfigurerParams::maximumNumberByFieldValues() const { return m_MaximumNumberByFieldValues; } -std::size_t CAutoconfigurerParams::highNumberRareByFieldValues() const -{ +std::size_t CAutoconfigurerParams::highNumberRareByFieldValues() const { return m_HighNumberRareByFieldValues; } -std::size_t CAutoconfigurerParams::maximumNumberRareByFieldValues() const -{ +std::size_t CAutoconfigurerParams::maximumNumberRareByFieldValues() const { return m_MaximumNumberRareByFieldValues; } -std::size_t CAutoconfigurerParams::highNumberPartitionFieldValues() const -{ +std::size_t CAutoconfigurerParams::highNumberPartitionFieldValues() const { return m_HighNumberPartitionFieldValues; } -std::size_t CAutoconfigurerParams::maximumNumberPartitionFieldValues() const -{ +std::size_t CAutoconfigurerParams::maximumNumberPartitionFieldValues() const { return m_MaximumNumberPartitionFieldValues; } -std::size_t CAutoconfigurerParams::lowNumberOverFieldValues() const -{ +std::size_t CAutoconfigurerParams::lowNumberOverFieldValues() const { return m_LowNumberOverFieldValues; } -std::size_t CAutoconfigurerParams::minimumNumberOverFieldValues() const -{ +std::size_t CAutoconfigurerParams::minimumNumberOverFieldValues() const { return m_MinimumNumberOverFieldValues; } -double CAutoconfigurerParams::highCardinalityInTailFactor() const -{ +double CAutoconfigurerParams::highCardinalityInTailFactor() const { return m_HighCardinalityInTailFactor; } -uint64_t CAutoconfigurerParams::highCardinalityInTailIncrement() const -{ +uint64_t CAutoconfigurerParams::highCardinalityInTailIncrement() const { return m_HighCardinalityInTailIncrement; } -double CAutoconfigurerParams::highCardinalityHighTailFraction() const -{ +double CAutoconfigurerParams::highCardinalityHighTailFraction() const { return m_HighCardinalityHighTailFraction; } -double CAutoconfigurerParams::highCardinalityMaximumTailFraction() const -{ +double CAutoconfigurerParams::highCardinalityMaximumTailFraction() const { return m_HighCardinalityMaximumTailFraction; } -double CAutoconfigurerParams::lowPopulatedBucketFraction(config_t::EFunctionCategory function, bool ignoreEmpty) const -{ +double CAutoconfigurerParams::lowPopulatedBucketFraction(config_t::EFunctionCategory function, bool ignoreEmpty) const { return m_LowPopulatedBucketFractions[config_t::hasDoAndDontIgnoreEmptyVersions(function) && ignoreEmpty]; } -double CAutoconfigurerParams::minimumPopulatedBucketFraction(config_t::EFunctionCategory function, bool ignoreEmpty) const -{ +double CAutoconfigurerParams::minimumPopulatedBucketFraction(config_t::EFunctionCategory function, bool ignoreEmpty) const { return m_MinimumPopulatedBucketFractions[config_t::hasDoAndDontIgnoreEmptyVersions(function) && ignoreEmpty]; } -double CAutoconfigurerParams::highPopulatedBucketFraction(config_t::EFunctionCategory function, bool ignoreEmpty) const -{ +double CAutoconfigurerParams::highPopulatedBucketFraction(config_t::EFunctionCategory function, bool ignoreEmpty) const { return m_HighPopulatedBucketFractions[config_t::hasDoAndDontIgnoreEmptyVersions(function) && ignoreEmpty]; } -double CAutoconfigurerParams::maximumPopulatedBucketFraction(config_t::EFunctionCategory function, bool ignoreEmpty) const -{ +double CAutoconfigurerParams::maximumPopulatedBucketFraction(config_t::EFunctionCategory function, bool ignoreEmpty) const { return m_MaximumPopulatedBucketFractions[config_t::hasDoAndDontIgnoreEmptyVersions(function) && ignoreEmpty]; } -const CAutoconfigurerParams::TTimeVec &CAutoconfigurerParams::candidateBucketLengths() const -{ +const CAutoconfigurerParams::TTimeVec& CAutoconfigurerParams::candidateBucketLengths() const { return m_CandidateBucketLengths; } -double CAutoconfigurerParams::lowNumberOfBucketsForConfig() const -{ +double CAutoconfigurerParams::lowNumberOfBucketsForConfig() const { return m_LowNumberOfBucketsForConfig; } -double CAutoconfigurerParams::minimumNumberOfBucketsForConfig() const -{ +double CAutoconfigurerParams::minimumNumberOfBucketsForConfig() const { return m_MinimumNumberOfBucketsForConfig; } -double CAutoconfigurerParams::polledDataMinimumMassAtInterval() const -{ +double CAutoconfigurerParams::polledDataMinimumMassAtInterval() const { return m_PolledDataMinimumMassAtInterval; } -double CAutoconfigurerParams::polledDataJitter() const -{ +double CAutoconfigurerParams::polledDataJitter() const { return m_PolledDataJitter; } -double CAutoconfigurerParams::lowCoefficientOfVariation() const -{ +double CAutoconfigurerParams::lowCoefficientOfVariation() const { return m_LowCoefficientOfVariation; } -double CAutoconfigurerParams::minimumCoefficientOfVariation() const -{ +double CAutoconfigurerParams::minimumCoefficientOfVariation() const { return m_MinimumCoefficientOfVariation; } -double CAutoconfigurerParams::lowLengthRangeForInfoContent() const -{ +double CAutoconfigurerParams::lowLengthRangeForInfoContent() const { return m_LowLengthRangeForInfoContent; } -double CAutoconfigurerParams::minimumLengthRangeForInfoContent() const -{ +double CAutoconfigurerParams::minimumLengthRangeForInfoContent() const { return m_MinimumLengthRangeForInfoContent; } -double CAutoconfigurerParams::lowMaximumLengthForInfoContent() const -{ +double CAutoconfigurerParams::lowMaximumLengthForInfoContent() const { return m_LowMaximumLengthForInfoContent; } -double CAutoconfigurerParams::minimumMaximumLengthForInfoContent() const -{ +double CAutoconfigurerParams::minimumMaximumLengthForInfoContent() const { return m_MinimumMaximumLengthForInfoContent; } -double CAutoconfigurerParams::lowEntropyForInfoContent() const -{ +double CAutoconfigurerParams::lowEntropyForInfoContent() const { return m_LowEntropyForInfoContent; } -double CAutoconfigurerParams::minimumEntropyForInfoContent() const -{ +double CAutoconfigurerParams::minimumEntropyForInfoContent() const { return m_MinimumEntropyForInfoContent; } -double CAutoconfigurerParams::lowDistinctCountForInfoContent() const -{ +double CAutoconfigurerParams::lowDistinctCountForInfoContent() const { return m_LowDistinctCountForInfoContent; } -double CAutoconfigurerParams::minimumDistinctCountForInfoContent() const -{ +double CAutoconfigurerParams::minimumDistinctCountForInfoContent() const { return m_MinimumDistinctCountForInfoContent; } -const CAutoconfigurerParams::TSizeVec &CAutoconfigurerParams::penaltyIndicesFor(std::size_t bid) const -{ +const CAutoconfigurerParams::TSizeVec& CAutoconfigurerParams::penaltyIndicesFor(std::size_t bid) const { return m_BucketLengthPenaltyIndices[bid]; } -const CAutoconfigurerParams::TSizeVec &CAutoconfigurerParams::penaltyIndicesFor(bool ignoreEmpty) const -{ +const CAutoconfigurerParams::TSizeVec& CAutoconfigurerParams::penaltyIndicesFor(bool ignoreEmpty) const { return m_IgnoreEmptyPenaltyIndices[ignoreEmpty]; } -std::size_t CAutoconfigurerParams::penaltyIndexFor(std::size_t bid, bool ignoreEmpty) const -{ +std::size_t CAutoconfigurerParams::penaltyIndexFor(std::size_t bid, bool ignoreEmpty) const { TSizeVec result; - std::set_intersection(this->penaltyIndicesFor(bid).begin(), this->penaltyIndicesFor(bid).end(), - this->penaltyIndicesFor(ignoreEmpty).begin(), this->penaltyIndicesFor(ignoreEmpty).end(), + std::set_intersection(this->penaltyIndicesFor(bid).begin(), + this->penaltyIndicesFor(bid).end(), + this->penaltyIndicesFor(ignoreEmpty).begin(), + this->penaltyIndicesFor(ignoreEmpty).end(), std::back_inserter(result)); return result[0]; } -std::string CAutoconfigurerParams::print() const -{ -#define PRINT_STRING(field) result += " "#field" = " + m_##field + "\n" -#define PRINT_VALUE(field) result += " "#field" = " + core::CStringUtils::typeToString(m_##field) + "\n" -#define PRINT_CONTAINER(field) result += " "#field" = " + core::CContainerPrinter::print(m_##field) + "\n" +std::string CAutoconfigurerParams::print() const { +#define PRINT_STRING(field) result += " " #field " = " + m_##field + "\n" +#define PRINT_VALUE(field) result += " " #field " = " + core::CStringUtils::typeToString(m_##field) + "\n" +#define PRINT_CONTAINER(field) result += " " #field " = " + core::CContainerPrinter::print(m_##field) + "\n" std::string result; PRINT_STRING(TimeFieldName); @@ -1187,21 +943,17 @@ std::string CAutoconfigurerParams::print() const PRINT_CONTAINER(FieldsToUseInAutoconfigureByRole[constants::OVER_INDEX]); PRINT_CONTAINER(FieldsToUseInAutoconfigureByRole[constants::PARTITION_INDEX]); result += " FunctionCategoriesToConfigure = ["; - if (m_FunctionCategoriesToConfigure.size() > 0) - { + if (m_FunctionCategoriesToConfigure.size() > 0) { result += config_t::print(m_FunctionCategoriesToConfigure[0]); - for (std::size_t i = 1u; i < m_FunctionCategoriesToConfigure.size(); ++i) - { + for (std::size_t i = 1u; i < m_FunctionCategoriesToConfigure.size(); ++i) { result += ", " + config_t::print(m_FunctionCategoriesToConfigure[i]); } } result += "]\n"; result += " FieldDataType = ["; - if (m_FieldDataTypes.size() > 0) - { + if (m_FieldDataTypes.size() > 0) { result += "(" + m_FieldDataTypes[0].first + "," + config_t::print(m_FieldDataTypes[0].second) + ")"; - for (std::size_t i = 1u; i < m_FieldDataTypes.size(); ++i) - { + for (std::size_t i = 1u; i < m_FieldDataTypes.size(); ++i) { result += ", (" + m_FieldDataTypes[i].first + "," + config_t::print(m_FieldDataTypes[i].second) + ")"; } } @@ -1244,18 +996,15 @@ std::string CAutoconfigurerParams::print() const return result; } -void CAutoconfigurerParams::refreshPenaltyIndices() -{ +void CAutoconfigurerParams::refreshPenaltyIndices() { m_BucketLengthPenaltyIndices.resize(m_CandidateBucketLengths.size(), TSizeVec(2)); m_IgnoreEmptyPenaltyIndices.resize(2, TSizeVec(m_CandidateBucketLengths.size())); - for (std::size_t i = 0u, n = m_CandidateBucketLengths.size(); i < m_CandidateBucketLengths.size(); ++i) - { + for (std::size_t i = 0u, n = m_CandidateBucketLengths.size(); i < m_CandidateBucketLengths.size(); ++i) { m_BucketLengthPenaltyIndices[i][0] = 0 + i; m_BucketLengthPenaltyIndices[i][1] = n + i; - m_IgnoreEmptyPenaltyIndices[0][i] = 0 + i; - m_IgnoreEmptyPenaltyIndices[1][i] = n + i; + m_IgnoreEmptyPenaltyIndices[0][i] = 0 + i; + m_IgnoreEmptyPenaltyIndices[1][i] = n + i; } } - } } diff --git a/lib/config/CDataCountStatistics.cc b/lib/config/CDataCountStatistics.cc index 7a6c7e387e..97311bc8eb 100644 --- a/lib/config/CDataCountStatistics.cc +++ b/lib/config/CDataCountStatistics.cc @@ -31,18 +31,14 @@ #include #include -namespace ml -{ -namespace config -{ -namespace -{ +namespace ml { +namespace config { +namespace { using TBoolVec = std::vector; //! We sample a subset of short bucket lengths buckets for runtime. -TBoolVec bucketSampleMask(core_t::TTime bucketLength) -{ +TBoolVec bucketSampleMask(core_t::TTime bucketLength) { double n = std::ceil(std::max(3600.0 / static_cast(bucketLength), 1.0)); TBoolVec result(static_cast(std::sqrt(n)), true); result.resize(static_cast(n), false); @@ -51,17 +47,12 @@ TBoolVec bucketSampleMask(core_t::TTime bucketLength) //! Insert with the same semantics as boost::unordered_map/set::emplace. template -std::size_t emplace(const std::string *name, std::vector> &stats) -{ - std::size_t i = static_cast( - std::lower_bound(stats.begin(), stats.end(), - name, maths::COrderings::SFirstLess()) - stats.begin()); - if (i == stats.size()) - { +std::size_t emplace(const std::string* name, std::vector>& stats) { + std::size_t i = + static_cast(std::lower_bound(stats.begin(), stats.end(), name, maths::COrderings::SFirstLess()) - stats.begin()); + if (i == stats.size()) { stats.push_back(std::make_pair(name, T())); - } - else if (*name != *stats[i].first) - { + } else if (*name != *stats[i].first) { stats.insert(stats.begin() + i, std::make_pair(name, T())); } return i; @@ -70,58 +61,47 @@ std::size_t emplace(const std::string *name, std::vectorfunction() == config_t::E_DistinctCount) - { - if (const std::string *name = record->argumentFieldName()) - { - const std::string &value = *record->argumentFieldValue(); + for (TDetectorRecordCItr record = beginRecords; record != endRecords; ++record) { + if (record->function() == config_t::E_DistinctCount) { + if (const std::string* name = record->argumentFieldName()) { + const std::string& value = *record->argumentFieldValue(); std::size_t i = emplace(name, m_CurrentBucketArgumentDataPerPartition); - SBucketArgumentData &data = - m_CurrentBucketArgumentDataPerPartition[i].second.emplace(partition, BJKST).first->second; + SBucketArgumentData& data = m_CurrentBucketArgumentDataPerPartition[i].second.emplace(partition, BJKST).first->second; data.s_DistinctValues.add(CTools::category32(value)); data.s_MeanStringLength.add(static_cast(value.length())); } @@ -159,16 +131,12 @@ void CBucketCountStatistics::add(const TSizeSizeSizeTr &partition, } } -void CBucketCountStatistics::capture() -{ +void CBucketCountStatistics::capture() { using TSizeSizeSizeTrUInt64UMapCItr = TSizeSizeSizeTrUInt64UMap::const_iterator; using TSizeSizeSizeTrArgumentDataUMapItr = TSizeSizeSizeTrArgumentDataUMap::iterator; m_BucketPartitionCount += m_CurrentBucketPartitionCounts.size(); - for (TSizeSizeSizeTrUInt64UMapCItr i = m_CurrentBucketPartitionCounts.begin(); - i != m_CurrentBucketPartitionCounts.end(); - ++i) - { + for (TSizeSizeSizeTrUInt64UMapCItr i = m_CurrentBucketPartitionCounts.begin(); i != m_CurrentBucketPartitionCounts.end(); ++i) { TSizeSizePr id(i->first.first, i->first.third); double count = static_cast(i->second); m_CountMomentsPerPartition[id].add(count); @@ -176,15 +144,13 @@ void CBucketCountStatistics::capture() } m_CurrentBucketPartitionCounts.clear(); - for (std::size_t i = 0u; i < m_CurrentBucketArgumentDataPerPartition.size(); ++i) - { - const std::string *name = m_CurrentBucketArgumentDataPerPartition[i].first; - TSizeSizeSizeTrArgumentDataUMap &values = m_CurrentBucketArgumentDataPerPartition[i].second; + for (std::size_t i = 0u; i < m_CurrentBucketArgumentDataPerPartition.size(); ++i) { + const std::string* name = m_CurrentBucketArgumentDataPerPartition[i].first; + TSizeSizeSizeTrArgumentDataUMap& values = m_CurrentBucketArgumentDataPerPartition[i].second; std::size_t j = emplace(name, m_ArgumentMomentsPerPartition); - for (TSizeSizeSizeTrArgumentDataUMapItr k = values.begin(); k != values.end(); ++k) - { + for (TSizeSizeSizeTrArgumentDataUMapItr k = values.begin(); k != values.end(); ++k) { TSizeSizePr id(k->first.first, k->first.third); - SArgumentMoments &moments = m_ArgumentMomentsPerPartition[j].second[id]; + SArgumentMoments& moments = m_ArgumentMomentsPerPartition[j].second[id]; double dc = static_cast(k->second.s_DistinctValues.number()); double info = dc * maths::CBasicStatistics::mean(k->second.s_MeanStringLength); moments.s_DistinctCount.add(dc); @@ -194,57 +160,48 @@ void CBucketCountStatistics::capture() } } -uint64_t CBucketCountStatistics::bucketPartitionCount() const -{ +uint64_t CBucketCountStatistics::bucketPartitionCount() const { return m_BucketPartitionCount; } -const CBucketCountStatistics::TSizeSizePrMomentsUMap &CBucketCountStatistics::countMomentsPerPartition() const -{ +const CBucketCountStatistics::TSizeSizePrMomentsUMap& CBucketCountStatistics::countMomentsPerPartition() const { return m_CountMomentsPerPartition; } -const CBucketCountStatistics::TSizeSizePrQuantileUMap &CBucketCountStatistics::countQuantilesPerPartition() const -{ +const CBucketCountStatistics::TSizeSizePrQuantileUMap& CBucketCountStatistics::countQuantilesPerPartition() const { return m_CountQuantiles; } -const CBucketCountStatistics::TSizeSizePrArgumentMomentsUMap & - CBucketCountStatistics::argumentMomentsPerPartition(const std::string &name) const -{ +const CBucketCountStatistics::TSizeSizePrArgumentMomentsUMap& +CBucketCountStatistics::argumentMomentsPerPartition(const std::string& name) const { using TStrCPtrPartitionArgumentMomentsUMapPrVecCItr = TStrCPtrSizeSizePrArgumentMomentsUMapPrVec::const_iterator; static const TSizeSizePrArgumentMomentsUMap EMPTY; - TStrCPtrPartitionArgumentMomentsUMapPrVecCItr result = - std::lower_bound(m_ArgumentMomentsPerPartition.begin(), - m_ArgumentMomentsPerPartition.end(), - &name, maths::COrderings::SFirstLess()); + TStrCPtrPartitionArgumentMomentsUMapPrVecCItr result = std::lower_bound( + m_ArgumentMomentsPerPartition.begin(), m_ArgumentMomentsPerPartition.end(), &name, maths::COrderings::SFirstLess()); return result != m_ArgumentMomentsPerPartition.end() && *result->first == name ? result->second : EMPTY; } - //////// CDataCountStatistics //////// -CDataCountStatistics::CDataCountStatistics(const CAutoconfigurerParams ¶ms) : - m_Params(params), - m_RecordCount(0), - m_ArrivalTimeDistribution(maths::CQuantileSketch::E_PiecewiseConstant, SKETCH_SIZE), - m_BucketIndices(params.candidateBucketLengths().size(), 0), - m_BucketCounts(params.candidateBucketLengths().size(), 0), - m_BucketStatistics(params.candidateBucketLengths().size()) -{ - const TTimeVec &candidates = params.candidateBucketLengths(); +CDataCountStatistics::CDataCountStatistics(const CAutoconfigurerParams& params) + : m_Params(params), + m_RecordCount(0), + m_ArrivalTimeDistribution(maths::CQuantileSketch::E_PiecewiseConstant, SKETCH_SIZE), + m_BucketIndices(params.candidateBucketLengths().size(), 0), + m_BucketCounts(params.candidateBucketLengths().size(), 0), + m_BucketStatistics(params.candidateBucketLengths().size()) { + const TTimeVec& candidates = params.candidateBucketLengths(); m_BucketMasks.reserve(candidates.size()); - for (std::size_t bid = 0u; bid < candidates.size(); ++bid) - { + for (std::size_t bid = 0u; bid < candidates.size(); ++bid) { m_BucketMasks.push_back(bucketSampleMask(candidates[bid])); maths::CSampling::random_shuffle(m_Rng, m_BucketMasks[bid].begin(), m_BucketMasks[bid].end()); } } -CDataCountStatistics::~CDataCountStatistics() {} +CDataCountStatistics::~CDataCountStatistics() { +} -void CDataCountStatistics::add(TDetectorRecordCItr beginRecords, TDetectorRecordCItr endRecords) -{ +void CDataCountStatistics::add(TDetectorRecordCItr beginRecords, TDetectorRecordCItr endRecords) { ++m_RecordCount; core_t::TTime time = beginRecords->time(); @@ -252,28 +209,22 @@ void CDataCountStatistics::add(TDetectorRecordCItr beginRecords, TDetectorRecord m_Earliest.add(time); m_Latest.add(time); - if (m_LastRecordTime) - { + if (m_LastRecordTime) { m_ArrivalTimeDistribution.add(static_cast(time - *m_LastRecordTime)); } m_LastRecordTime = time; this->fillLastBucketEndTimes(time); - const TTimeVec &candidates = this->params().candidateBucketLengths(); - for (std::size_t bid = 0u; bid < m_LastBucketEndTimes.size(); ++bid) - { - if (time - m_LastBucketEndTimes[bid] >= candidates[bid]) - { - for (core_t::TTime i = 0; i < (time - m_LastBucketEndTimes[bid]) / candidates[bid]; ++i) - { - if (m_BucketMasks[bid][m_BucketIndices[bid]++]) - { + const TTimeVec& candidates = this->params().candidateBucketLengths(); + for (std::size_t bid = 0u; bid < m_LastBucketEndTimes.size(); ++bid) { + if (time - m_LastBucketEndTimes[bid] >= candidates[bid]) { + for (core_t::TTime i = 0; i < (time - m_LastBucketEndTimes[bid]) / candidates[bid]; ++i) { + if (m_BucketMasks[bid][m_BucketIndices[bid]++]) { ++m_BucketCounts[bid]; m_BucketStatistics[bid].capture(); } - if ((m_BucketIndices[bid] % m_BucketMasks.size()) == 0) - { + if ((m_BucketIndices[bid] % m_BucketMasks.size()) == 0) { m_BucketIndices[bid] = 0; maths::CSampling::random_shuffle(m_Rng, m_BucketMasks[bid].begin(), m_BucketMasks[bid].end()); } @@ -284,62 +235,50 @@ void CDataCountStatistics::add(TDetectorRecordCItr beginRecords, TDetectorRecord std::size_t partition = beginRecords->partitionFieldValueHash(); m_Partitions.insert(partition); - if (this->samplePartition(partition)) - { + if (this->samplePartition(partition)) { std::size_t by = beginRecords->byFieldValueHash(); std::size_t over = beginRecords->overFieldValueHash(); m_SampledPartitions.insert(partition); m_SampledTimeSeries.insert(std::make_pair(by, partition)); CBucketCountStatistics::TSizeSizeSizeTr id(by, over, partition); - for (std::size_t bid = 0u; bid < m_BucketStatistics.size(); ++bid) - { - if (m_BucketMasks[bid][m_BucketIndices[bid]]) - { + for (std::size_t bid = 0u; bid < m_BucketStatistics.size(); ++bid) { + if (m_BucketMasks[bid][m_BucketIndices[bid]]) { m_BucketStatistics[bid].add(id, beginRecords, endRecords); } } } } -uint64_t CDataCountStatistics::recordCount() const -{ +uint64_t CDataCountStatistics::recordCount() const { return m_RecordCount; } -const CDataCountStatistics::TUInt64Vec &CDataCountStatistics::bucketCounts() const -{ +const CDataCountStatistics::TUInt64Vec& CDataCountStatistics::bucketCounts() const { return m_BucketCounts; } -const maths::CQuantileSketch &CDataCountStatistics::arrivalTimeDistribution() const -{ +const maths::CQuantileSketch& CDataCountStatistics::arrivalTimeDistribution() const { return m_ArrivalTimeDistribution; } -core_t::TTime CDataCountStatistics::timeRange() const -{ +core_t::TTime CDataCountStatistics::timeRange() const { return m_Latest[0] - m_Earliest[0]; } -std::size_t CDataCountStatistics::numberSampledTimeSeries() const -{ +std::size_t CDataCountStatistics::numberSampledTimeSeries() const { return m_SampledTimeSeries.size(); } -const CDataCountStatistics::TBucketStatisticsVec &CDataCountStatistics::bucketStatistics() const -{ +const CDataCountStatistics::TBucketStatisticsVec& CDataCountStatistics::bucketStatistics() const { return m_BucketStatistics; } -const CAutoconfigurerParams &CDataCountStatistics::params() const -{ +const CAutoconfigurerParams& CDataCountStatistics::params() const { return m_Params; } -bool CDataCountStatistics::samplePartition(std::size_t partition) const -{ - if (m_SampledPartitions.count(partition) > 0) - { +bool CDataCountStatistics::samplePartition(std::size_t partition) const { + if (m_SampledPartitions.count(partition) > 0) { return true; } maths::CPRNG::CXorOShiro128Plus rng(partition); @@ -348,60 +287,46 @@ bool CDataCountStatistics::samplePartition(std::size_t partition) const return maths::CSampling::uniformSample(rng, 0.0, 1.0) < p; } -void CDataCountStatistics::fillLastBucketEndTimes(core_t::TTime time) -{ - if (m_LastBucketEndTimes.empty()) - { - const TTimeVec &candidates = this->params().candidateBucketLengths(); +void CDataCountStatistics::fillLastBucketEndTimes(core_t::TTime time) { + if (m_LastBucketEndTimes.empty()) { + const TTimeVec& candidates = this->params().candidateBucketLengths(); m_LastBucketEndTimes.reserve(candidates.size()); - for (std::size_t i = 0u; i < candidates.size(); ++i) - { + for (std::size_t i = 0u; i < candidates.size(); ++i) { m_LastBucketEndTimes.push_back(maths::CIntegerTools::ceil(time, candidates[i])); } } } - //////// CPartitionDataCountStatistics //////// -CPartitionDataCountStatistics::CPartitionDataCountStatistics(const CAutoconfigurerParams ¶ms) : - CDataCountStatistics(params) -{} +CPartitionDataCountStatistics::CPartitionDataCountStatistics(const CAutoconfigurerParams& params) : CDataCountStatistics(params) { +} -void CPartitionDataCountStatistics::add(TDetectorRecordCItr beginRecords, TDetectorRecordCItr endRecords) -{ - if (beginRecords != endRecords) - { +void CPartitionDataCountStatistics::add(TDetectorRecordCItr beginRecords, TDetectorRecordCItr endRecords) { + if (beginRecords != endRecords) { this->CDataCountStatistics::add(beginRecords, endRecords); } } - //////// CByAndPartitionDataCountStatistics //////// -CByAndPartitionDataCountStatistics::CByAndPartitionDataCountStatistics(const CAutoconfigurerParams ¶ms) : - CDataCountStatistics(params) -{} +CByAndPartitionDataCountStatistics::CByAndPartitionDataCountStatistics(const CAutoconfigurerParams& params) : CDataCountStatistics(params) { +} -void CByAndPartitionDataCountStatistics::add(TDetectorRecordCItr beginRecords, TDetectorRecordCItr endRecords) -{ - if (beginRecords != endRecords) - { +void CByAndPartitionDataCountStatistics::add(TDetectorRecordCItr beginRecords, TDetectorRecordCItr endRecords) { + if (beginRecords != endRecords) { this->CDataCountStatistics::add(beginRecords, endRecords); } } - //////// CByOverAndPartitionDataCountStatistics //////// -CByOverAndPartitionDataCountStatistics::CByOverAndPartitionDataCountStatistics(const CAutoconfigurerParams ¶ms) : - CDataCountStatistics(params) -{} +CByOverAndPartitionDataCountStatistics::CByOverAndPartitionDataCountStatistics(const CAutoconfigurerParams& params) + : CDataCountStatistics(params) { +} -void CByOverAndPartitionDataCountStatistics::add(TDetectorRecordCItr beginRecords, TDetectorRecordCItr endRecords) -{ - if (beginRecords == endRecords) - { +void CByOverAndPartitionDataCountStatistics::add(TDetectorRecordCItr beginRecords, TDetectorRecordCItr endRecords) { + if (beginRecords == endRecords) { return; } @@ -410,8 +335,7 @@ void CByOverAndPartitionDataCountStatistics::add(TDetectorRecordCItr beginRecord this->CDataCountStatistics::add(beginRecords, endRecords); std::size_t partition = beginRecords->partitionFieldValueHash(); - if (this->samplePartition(partition)) - { + if (this->samplePartition(partition)) { std::size_t by = beginRecords->byFieldValueHash(); std::size_t over = beginRecords->overFieldValueHash(); TSizeSizePrCBjkstUMapItr i = m_DistinctOverValues.emplace(std::make_pair(by, partition), BJKST).first; @@ -419,45 +343,35 @@ void CByOverAndPartitionDataCountStatistics::add(TDetectorRecordCItr beginRecord } } -const CByOverAndPartitionDataCountStatistics::TSizeSizePrCBjkstUMap & - CByOverAndPartitionDataCountStatistics::sampledByAndPartitionDistinctOverCounts() const -{ +const CByOverAndPartitionDataCountStatistics::TSizeSizePrCBjkstUMap& +CByOverAndPartitionDataCountStatistics::sampledByAndPartitionDistinctOverCounts() const { return m_DistinctOverValues; } - //////// CDataCountStatisticsDirectAddressTable //////// -CDataCountStatisticsDirectAddressTable::CDataCountStatisticsDirectAddressTable(const CAutoconfigurerParams ¶ms) : - m_Params(params) -{} +CDataCountStatisticsDirectAddressTable::CDataCountStatisticsDirectAddressTable(const CAutoconfigurerParams& params) : m_Params(params) { +} -void CDataCountStatisticsDirectAddressTable::build(const TDetectorSpecificationVec &specs) -{ - using TCountStatisticsKeySizeUMap = - boost::unordered_map; +void CDataCountStatisticsDirectAddressTable::build(const TDetectorSpecificationVec& specs) { + using TCountStatisticsKeySizeUMap = boost::unordered_map; std::size_t size = 0u; - for (std::size_t i = 0u; i < specs.size(); ++i) - { + for (std::size_t i = 0u; i < specs.size(); ++i) { size = std::max(size, specs[i].id() + 1); } m_DetectorSchema.resize(size); TCountStatisticsKeySizeUMap uniques; - for (std::size_t i = 0u; i < specs.size(); ++i) - { - std::size_t id = specs[i].id(); - std::size_t next = uniques.size(); + for (std::size_t i = 0u; i < specs.size(); ++i) { + std::size_t id = specs[i].id(); + std::size_t next = uniques.size(); std::size_t index = uniques.emplace(specs[i], next).first->second; - if (index == next) - { + if (index == next) { m_RecordSchema.push_back(TPtrDiffVec(1, static_cast(id))); m_DataCountStatistics.push_back(this->stats(specs[i])); - } - else - { + } else { m_RecordSchema[index].push_back(static_cast(id)); } m_DetectorSchema[id] = index; @@ -466,30 +380,24 @@ void CDataCountStatisticsDirectAddressTable::build(const TDetectorSpecificationV LOG_DEBUG("There are " << m_DataCountStatistics.size() << " sets of count statistics"); } -void CDataCountStatisticsDirectAddressTable::pruneUnsed(const TDetectorSpecificationVec &specs) -{ +void CDataCountStatisticsDirectAddressTable::pruneUnsed(const TDetectorSpecificationVec& specs) { using TSizeUSet = boost::unordered_set; TSizeUSet used; - for (std::size_t i = 0u; i < specs.size(); ++i) - { + for (std::size_t i = 0u; i < specs.size(); ++i) { used.insert(m_DetectorSchema[specs[i].id()]); } std::size_t last = 0u; - for (std::size_t i = 0u; i < m_DataCountStatistics.size(); ++i) - { - if (last != i) - { + for (std::size_t i = 0u; i < m_DataCountStatistics.size(); ++i) { + if (last != i) { m_DataCountStatistics[last].swap(m_DataCountStatistics[i]); m_RecordSchema[last].swap(m_RecordSchema[i]); - for (std::size_t j = 0u; j < m_RecordSchema[last].size(); ++j) - { + for (std::size_t j = 0u; j < m_RecordSchema[last].size(); ++j) { m_DetectorSchema[m_RecordSchema[last][j]] = last; } } - if (used.count(i) > 0) - { + if (used.count(i) > 0) { ++last; } } @@ -497,33 +405,21 @@ void CDataCountStatisticsDirectAddressTable::pruneUnsed(const TDetectorSpecifica m_RecordSchema.erase(m_RecordSchema.begin() + last, m_RecordSchema.end()); } -void CDataCountStatisticsDirectAddressTable::add(const TDetectorRecordVec &records) -{ - for (std::size_t i = 0u; i < m_RecordSchema.size(); ++i) - { - m_DataCountStatistics[i]->add(core::begin_masked(records, m_RecordSchema[i]), - core::end_masked( records, m_RecordSchema[i])); +void CDataCountStatisticsDirectAddressTable::add(const TDetectorRecordVec& records) { + for (std::size_t i = 0u; i < m_RecordSchema.size(); ++i) { + m_DataCountStatistics[i]->add(core::begin_masked(records, m_RecordSchema[i]), core::end_masked(records, m_RecordSchema[i])); } } -const CDataCountStatistics & - CDataCountStatisticsDirectAddressTable::statistics(const CDetectorSpecification &spec) const -{ +const CDataCountStatistics& CDataCountStatisticsDirectAddressTable::statistics(const CDetectorSpecification& spec) const { return *m_DataCountStatistics[m_DetectorSchema[spec.id()]]; } CDataCountStatisticsDirectAddressTable::TDataCountStatisticsPtr - CDataCountStatisticsDirectAddressTable::stats(const CDetectorSpecification &spec) const -{ - using TStatistics = CDataCountStatistics *(*)(const CAutoconfigurerParams &); - static TStatistics STATISTICS[] = - { - &partitionCountStatistics, - &byAndPartitionStatistics, - &byOverAndPartitionStatistics - }; +CDataCountStatisticsDirectAddressTable::stats(const CDetectorSpecification& spec) const { + using TStatistics = CDataCountStatistics* (*)(const CAutoconfigurerParams&); + static TStatistics STATISTICS[] = {&partitionCountStatistics, &byAndPartitionStatistics, &byOverAndPartitionStatistics}; return TDataCountStatisticsPtr((STATISTICS[spec.overField() ? 2 : (spec.byField() ? 1 : 0)])(m_Params)); } - } } diff --git a/lib/config/CDataSemantics.cc b/lib/config/CDataSemantics.cc index 74398ae5d9..c72acb953f 100644 --- a/lib/config/CDataSemantics.cc +++ b/lib/config/CDataSemantics.cc @@ -16,12 +16,9 @@ #include -namespace ml -{ -namespace config -{ -namespace -{ +namespace ml { +namespace config { +namespace { using TDoubleVec = std::vector; using TSizeVec = std::vector; @@ -32,117 +29,98 @@ using TSizeVec = std::vector; //! a mixture model from the clustering. This is used to test for //! numeric categorical fields by testing the BIC of a mixture model //! verses a categorical model for the data. -class CMixtureData -{ - public: - CMixtureData(double count, std::size_t N) : m_Count(count), m_Classifier(N) {} - - //! Add the data point \p xi with count \p ni. - void add(double xi, double ni) - { - m_Classifier.add(xi, ni); +class CMixtureData { +public: + CMixtureData(double count, std::size_t N) : m_Count(count), m_Classifier(N) {} + + //! Add the data point \p xi with count \p ni. + void add(double xi, double ni) { m_Classifier.add(xi, ni); } + + //! Compute the scale for a mixture of \p m. + double scale(std::size_t m) { + using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; + + TSizeVec split; + m_Classifier.naturalBreaks(m, 2, split); + split.insert(split.begin(), 0); + m_Classifier.categories(m, 2, m_Categories); + TSizeVec counts; + counts.reserve(m); + for (std::size_t i = 1u; i < split.size(); ++i) { + counts.push_back(split[i] - split[i - 1]); } - //! Compute the scale for a mixture of \p m. - double scale(std::size_t m) - { - using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; - - TSizeVec split; - m_Classifier.naturalBreaks(m, 2, split); - split.insert(split.begin(), 0); - m_Classifier.categories(m, 2, m_Categories); - TSizeVec counts; - counts.reserve(m); - for (std::size_t i = 1u; i < split.size(); ++i) - { - counts.push_back(split[i] - split[i - 1]); - } - - TMeanAccumulator scale; - for (std::size_t i = 0u; i < m_Categories.size(); ++i) - { - double ci = maths::CBasicStatistics::count(m_Categories[i]); - double vi = maths::CBasicStatistics::maximumLikelihoodVariance(m_Categories[i]); - double si = std::max(3.0 * std::sqrt(vi), 1.0 / boost::math::constants::root_two_pi()); - scale.add(static_cast(counts[i]) / si, ci); - } - return maths::CBasicStatistics::mean(scale); + TMeanAccumulator scale; + for (std::size_t i = 0u; i < m_Categories.size(); ++i) { + double ci = maths::CBasicStatistics::count(m_Categories[i]); + double vi = maths::CBasicStatistics::maximumLikelihoodVariance(m_Categories[i]); + double si = std::max(3.0 * std::sqrt(vi), 1.0 / boost::math::constants::root_two_pi()); + scale.add(static_cast(counts[i]) / si, ci); } + return maths::CBasicStatistics::mean(scale); + } - //! Populate a mixture of \p m. - void populate(std::size_t m) - { - this->clear(); - - TSizeVec split; - m_Classifier.naturalBreaks(m, 2, split); - split.insert(split.begin(), 0); - m_Classifier.categories(m, 2, m_Categories); - TSizeVec counts; - counts.reserve(m); - for (std::size_t i = 1u; i < split.size(); ++i) - { - counts.push_back(split[i] - split[i - 1]); - } - LOG_TRACE("m_Categories = " << core::CContainerPrinter::print(m_Categories)); - - for (std::size_t i = 0u; i < m_Categories.size(); ++i) - { - double ci = maths::CBasicStatistics::count(m_Categories[i]); - double mi = maths::CBasicStatistics::mean(m_Categories[i]); - double vi = maths::CBasicStatistics::maximumLikelihoodVariance(m_Categories[i]); - double si = std::max(std::sqrt(vi), 1.0 / boost::math::constants::root_two_pi()); - m_Gmm.weights().push_back(ci / m_Count); - m_Gmm.modes().push_back(boost::math::normal_distribution<>(mi, si)); - } - LOG_TRACE("GMM = '" << m_Gmm.print() << "'"); + //! Populate a mixture of \p m. + void populate(std::size_t m) { + this->clear(); + + TSizeVec split; + m_Classifier.naturalBreaks(m, 2, split); + split.insert(split.begin(), 0); + m_Classifier.categories(m, 2, m_Categories); + TSizeVec counts; + counts.reserve(m); + for (std::size_t i = 1u; i < split.size(); ++i) { + counts.push_back(split[i] - split[i - 1]); } - - //! Get the number of parameters in the mixture. - double parameters() const - { - return 3.0 * static_cast(m_Categories.size()) - 1.0; + LOG_TRACE("m_Categories = " << core::CContainerPrinter::print(m_Categories)); + + for (std::size_t i = 0u; i < m_Categories.size(); ++i) { + double ci = maths::CBasicStatistics::count(m_Categories[i]); + double mi = maths::CBasicStatistics::mean(m_Categories[i]); + double vi = maths::CBasicStatistics::maximumLikelihoodVariance(m_Categories[i]); + double si = std::max(std::sqrt(vi), 1.0 / boost::math::constants::root_two_pi()); + m_Gmm.weights().push_back(ci / m_Count); + m_Gmm.modes().push_back(boost::math::normal_distribution<>(mi, si)); } + LOG_TRACE("GMM = '" << m_Gmm.print() << "'"); + } - //! Compute the value of the density function at \p x. - double pdf(double x) const - { - return maths::pdf(m_Gmm, x); - } + //! Get the number of parameters in the mixture. + double parameters() const { return 3.0 * static_cast(m_Categories.size()) - 1.0; } - private: - using TNormalVec = std::vector>; - using TGMM = maths::CMixtureDistribution>; + //! Compute the value of the density function at \p x. + double pdf(double x) const { return maths::pdf(m_Gmm, x); } - private: - void clear() - { - m_Categories.clear(); - m_Gmm.weights().clear(); - m_Gmm.modes().clear(); - } +private: + using TNormalVec = std::vector>; + using TGMM = maths::CMixtureDistribution>; + +private: + void clear() { + m_Categories.clear(); + m_Gmm.weights().clear(); + m_Gmm.modes().clear(); + } - private: - double m_Count; - maths::CNaturalBreaksClassifier m_Classifier; - maths::CNaturalBreaksClassifier::TTupleVec m_Categories; - TGMM m_Gmm; +private: + double m_Count; + maths::CNaturalBreaksClassifier m_Classifier; + maths::CNaturalBreaksClassifier::TTupleVec m_Categories; + TGMM m_Gmm; }; +} +CDataSemantics::CDataSemantics(TOptionalUserDataType override) + : m_Type(config_t::E_UndeterminedType), + m_Override(override), + m_Count(0.0), + m_NumericProportion(0.0), + m_IntegerProportion(0.0), + m_EmpiricalDistributionOverflowed(false) { } -CDataSemantics::CDataSemantics(TOptionalUserDataType override) : - m_Type(config_t::E_UndeterminedType), - m_Override(override), - m_Count(0.0), - m_NumericProportion(0.0), - m_IntegerProportion(0.0), - m_EmpiricalDistributionOverflowed(false) -{} - -void CDataSemantics::add(const std::string &example) -{ +void CDataSemantics::add(const std::string& example) { m_Count += 1.0; maths::COrdinal value; @@ -152,43 +130,29 @@ void CDataSemantics::add(const std::string &example) int64_t asInt64; uint64_t asUInt64; double asDouble; - if (core::CStringUtils::stringToTypeSilent(trimmed, asInt64)) - { + if (core::CStringUtils::stringToTypeSilent(trimmed, asInt64)) { value = this->addInteger(asInt64); - } - else if (core::CStringUtils::stringToTypeSilent(trimmed, asUInt64)) - { + } else if (core::CStringUtils::stringToTypeSilent(trimmed, asUInt64)) { value = this->addPositiveInteger(asUInt64); - } - else if (core::CStringUtils::stringToTypeSilent(trimmed, asDouble)) - { + } else if (core::CStringUtils::stringToTypeSilent(trimmed, asDouble)) { value = this->addReal(asDouble); } - if (!value.isNan()) - { + if (!value.isNan()) { m_Smallest.add(value); m_Largest.add(value); - } - else if ( m_NonNumericValues.size() < 2 - && std::find(m_NonNumericValues.begin(), - m_NonNumericValues.end(), trimmed) == m_NonNumericValues.end()) - { + } else if (m_NonNumericValues.size() < 2 && + std::find(m_NonNumericValues.begin(), m_NonNumericValues.end(), trimmed) == m_NonNumericValues.end()) { m_NonNumericValues.push_back(trimmed); } - if ( m_DistinctValues.size() < 3 - && std::find(m_DistinctValues.begin(), - m_DistinctValues.end(), example) == m_DistinctValues.end()) - { + if (m_DistinctValues.size() < 3 && std::find(m_DistinctValues.begin(), m_DistinctValues.end(), example) == m_DistinctValues.end()) { m_DistinctValues.push_back(example); } - if (!m_EmpiricalDistributionOverflowed && !value.isNan()) - { + if (!m_EmpiricalDistributionOverflowed && !value.isNan()) { ++m_EmpiricalDistribution[value]; - if (m_EmpiricalDistribution.size() > MAXIMUM_EMPIRICAL_DISTRIBUTION_SIZE) - { + if (m_EmpiricalDistribution.size() > MAXIMUM_EMPIRICAL_DISTRIBUTION_SIZE) { m_EmpiricalDistributionOverflowed = true; TOrdinalSizeUMap empty; m_EmpiricalDistribution.swap(empty); @@ -196,12 +160,9 @@ void CDataSemantics::add(const std::string &example) } } -void CDataSemantics::computeType() -{ - if (m_Override) - { - switch (*m_Override) - { +void CDataSemantics::computeType() { + if (m_Override) { + switch (*m_Override) { case config_t::E_UserCategorical: m_Type = this->categoricalType(); return; @@ -212,21 +173,18 @@ void CDataSemantics::computeType() } LOG_TRACE("count = " << m_Count); - if (m_Count == 0.0) - { + if (m_Count == 0.0) { m_Type = config_t::E_UndeterminedType; return; } - if (m_DistinctValues.size() == 2) - { + if (m_DistinctValues.size() == 2) { m_Type = config_t::E_Binary; return; } LOG_TRACE("numeric proportion = " << m_NumericProportion); - if (!this->isNumeric() || !this->GMMGoodFit()) - { + if (!this->isNumeric() || !this->GMMGoodFit()) { m_Type = config_t::E_Categorical; return; } @@ -235,41 +193,33 @@ void CDataSemantics::computeType() m_Type = this->isInteger() ? this->integerType() : this->realType(); } -config_t::EDataType CDataSemantics::type() const -{ +config_t::EDataType CDataSemantics::type() const { return m_Type; } -config_t::EDataType CDataSemantics::categoricalType() const -{ +config_t::EDataType CDataSemantics::categoricalType() const { return m_DistinctValues.size() == 2 ? config_t::E_Binary : config_t::E_Categorical; } -config_t::EDataType CDataSemantics::realType() const -{ +config_t::EDataType CDataSemantics::realType() const { return m_Smallest[0] < maths::COrdinal(0.0) ? config_t::E_Real : config_t::E_PositiveReal; } -config_t::EDataType CDataSemantics::integerType() const -{ +config_t::EDataType CDataSemantics::integerType() const { return m_Smallest[0] < maths::COrdinal(uint64_t(0)) ? config_t::E_Integer : config_t::E_PositiveInteger; } -bool CDataSemantics::isNumeric() const -{ - return m_NumericProportion >= NUMERIC_PROPORTION_FOR_METRIC_STRICT - || (m_NonNumericValues.size() < 2 && m_NumericProportion >= NUMERIC_PROPORTION_FOR_METRIC_WITH_SUSPECTED_MISSING_VALUES); +bool CDataSemantics::isNumeric() const { + return m_NumericProportion >= NUMERIC_PROPORTION_FOR_METRIC_STRICT || + (m_NonNumericValues.size() < 2 && m_NumericProportion >= NUMERIC_PROPORTION_FOR_METRIC_WITH_SUSPECTED_MISSING_VALUES); } -bool CDataSemantics::isInteger() const -{ +bool CDataSemantics::isInteger() const { return m_IntegerProportion / m_NumericProportion >= INTEGER_PRORORTION_FOR_INTEGER; } -bool CDataSemantics::GMMGoodFit() const -{ - if (m_EmpiricalDistributionOverflowed) - { +bool CDataSemantics::GMMGoodFit() const { + if (m_EmpiricalDistributionOverflowed) { return true; } @@ -281,16 +231,13 @@ bool CDataSemantics::GMMGoodFit() const std::size_t N = m_EmpiricalDistribution.size(); LOG_TRACE("N = " << N); - double logc = std::log(m_Count); + double logc = std::log(m_Count); double smallest = m_Smallest[0].asDouble(); - double offset = std::max(-smallest + 1.0, 0.0); + double offset = std::max(-smallest + 1.0, 0.0); LOG_TRACE("offset = " << offset); double categoricalBIC = static_cast(N - 1) * logc; - for (TOrdinalSizeUMapCItr i = m_EmpiricalDistribution.begin(); - i != m_EmpiricalDistribution.end(); - ++i) - { + for (TOrdinalSizeUMapCItr i = m_EmpiricalDistribution.begin(); i != m_EmpiricalDistribution.end(); ++i) { double ni = static_cast(i->second); categoricalBIC -= 2.0 * ni * std::log(ni / m_Count); } @@ -299,15 +246,11 @@ bool CDataSemantics::GMMGoodFit() const std::size_t M = std::min(m_EmpiricalDistribution.size() / 2, std::size_t(100)); LOG_TRACE("m = " << M); - for (std::size_t m = 1u; m <= M; ++m) - { + for (std::size_t m = 1u; m <= M; ++m) { double scale = 1.0; { CMixtureData scaling(m_Count, N); - for (TOrdinalSizeUMapCItr i = m_EmpiricalDistribution.begin(); - i != m_EmpiricalDistribution.end(); - ++i) - { + for (TOrdinalSizeUMapCItr i = m_EmpiricalDistribution.begin(); i != m_EmpiricalDistribution.end(); ++i) { double xi = i->first.asDouble(); double ni = static_cast(i->second); scaling.add(xi, ni); @@ -318,27 +261,20 @@ bool CDataSemantics::GMMGoodFit() const CMixtureData light(m_Count, N); CMixtureData heavy(m_Count, N); - for (TOrdinalSizeUMapCItr i = m_EmpiricalDistribution.begin(); - i != m_EmpiricalDistribution.end(); - ++i) - { + for (TOrdinalSizeUMapCItr i = m_EmpiricalDistribution.begin(); i != m_EmpiricalDistribution.end(); ++i) { double xi = smallest + scale * (i->first.asDouble() - smallest); double ni = static_cast(i->second); light.add(xi, ni); heavy.add(std::log(xi + offset), ni); } - try - { + try { light.populate(m); heavy.populate(m); double lightGmmBIC = light.parameters() * logc; double heavyGmmBIC = heavy.parameters() * logc; - for (TOrdinalSizeUMapCItr i = m_EmpiricalDistribution.begin(); - i != m_EmpiricalDistribution.end(); - ++i) - { + for (TOrdinalSizeUMapCItr i = m_EmpiricalDistribution.begin(); i != m_EmpiricalDistribution.end(); ++i) { double xi = smallest + scale * (i->first.asDouble() - smallest); double ni = static_cast(i->second); double fx = light.pdf(xi); @@ -348,39 +284,31 @@ bool CDataSemantics::GMMGoodFit() const } LOG_TRACE("light BIC = " << lightGmmBIC << ", heavy BIC = " << heavyGmmBIC); - if (std::min(lightGmmBIC, heavyGmmBIC) < categoricalBIC) - { + if (std::min(lightGmmBIC, heavyGmmBIC) < categoricalBIC) { return true; } - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to compute BIC for " << m << " modes: " << e.what()); - } + } catch (const std::exception& e) { LOG_ERROR("Failed to compute BIC for " << m << " modes: " << e.what()); } } return false; } template -maths::COrdinal CDataSemantics::addInteger(INT value) -{ +maths::COrdinal CDataSemantics::addInteger(INT value) { m_NumericProportion = (m_NumericProportion * (m_Count - 1.0) + 1.0) / m_Count; m_IntegerProportion = (m_IntegerProportion * (m_Count - 1.0) + 1.0) / m_Count; return maths::COrdinal(static_cast(value)); } template -maths::COrdinal CDataSemantics::addPositiveInteger(UINT value) -{ +maths::COrdinal CDataSemantics::addPositiveInteger(UINT value) { m_NumericProportion = (m_NumericProportion * (m_Count - 1.0) + 1.0) / m_Count; m_IntegerProportion = (m_IntegerProportion * (m_Count - 1.0) + 1.0) / m_Count; return maths::COrdinal(static_cast(value)); } template -maths::COrdinal CDataSemantics::addReal(REAL value) -{ +maths::COrdinal CDataSemantics::addReal(REAL value) { m_NumericProportion = (m_NumericProportion * (m_Count - 1.0) + 1.0) / m_Count; return maths::COrdinal(static_cast(value)); } @@ -389,7 +317,5 @@ const std::size_t CDataSemantics::MAXIMUM_EMPIRICAL_DISTRIBUTION_SIZE(10000); const double CDataSemantics::NUMERIC_PROPORTION_FOR_METRIC_STRICT(0.99); const double CDataSemantics::NUMERIC_PROPORTION_FOR_METRIC_WITH_SUSPECTED_MISSING_VALUES(0.5); const double CDataSemantics::INTEGER_PRORORTION_FOR_INTEGER(0.999); - } } - diff --git a/lib/config/CDataSummaryStatistics.cc b/lib/config/CDataSummaryStatistics.cc index 0e0fd508c8..16f8d39473 100644 --- a/lib/config/CDataSummaryStatistics.cc +++ b/lib/config/CDataSummaryStatistics.cc @@ -20,18 +20,14 @@ #include -namespace ml -{ -namespace config -{ -namespace -{ +namespace ml { +namespace config { +namespace { using TDerefSecondLess = core::CFunctional::SDereference; using TDerefSecondGreater = core::CFunctional::SDereference; -std::size_t topNSize(std::size_t n) -{ +std::size_t topNSize(std::size_t n) { return static_cast(std::ceil(1.5 * static_cast(n))); } @@ -46,99 +42,83 @@ const double CLUSTER_MINIMUM_COUNT = 10.0; core::CHashing::CMurmurHash2String HASHER; double PROBABILITY_TO_SAMPLE_ENTROPY = 0.5; double PROBABILITY_TO_SAMPLE_N_GRAMS = 0.02; - } -CDataSummaryStatistics::CDataSummaryStatistics() : m_Count(0) -{ +CDataSummaryStatistics::CDataSummaryStatistics() : m_Count(0) { } -uint64_t CDataSummaryStatistics::count() const -{ +uint64_t CDataSummaryStatistics::count() const { return m_Count; } -core_t::TTime CDataSummaryStatistics::earliest() const -{ +core_t::TTime CDataSummaryStatistics::earliest() const { return m_Earliest[0]; } -core_t::TTime CDataSummaryStatistics::latest() const -{ +core_t::TTime CDataSummaryStatistics::latest() const { return m_Latest[0]; } -double CDataSummaryStatistics::meanRate() const -{ +double CDataSummaryStatistics::meanRate() const { return static_cast(m_Count) / static_cast(m_Latest[0] - m_Earliest[0]); } -void CDataSummaryStatistics::add(core_t::TTime time) -{ +void CDataSummaryStatistics::add(core_t::TTime time) { m_Earliest.add(time); m_Latest.add(time); ++m_Count; } +CCategoricalDataSummaryStatistics::CCategoricalDataSummaryStatistics(std::size_t n, std::size_t toApproximate) + : m_ToApproximate(toApproximate), + m_Approximating(toApproximate == 0), + m_DistinctValues(DS_NUMBER_HASHES, DS_MAX_SIZE), + m_CountSketch(CS_ROWS, CS_COLUMNS), + m_N(std::max(n, std::size_t(1))), + m_TopN(topNSize(m_N)), // This is important to stop invalidation of + // the lowest top-n iterator by an insertion. + m_LowestTopN(m_TopN.end()), + m_EmpiricalEntropy(ES_K), + m_DistinctNGrams(NUMBER_N_GRAMS, maths::CBjkstUniqueValues(DS_NUMBER_HASHES, DS_MAX_SIZE)), + m_NGramEmpricalEntropy(NUMBER_N_GRAMS, maths::CEntropySketch(ES_K)) { +} -CCategoricalDataSummaryStatistics::CCategoricalDataSummaryStatistics(std::size_t n, - std::size_t toApproximate) : - m_ToApproximate(toApproximate), - m_Approximating(toApproximate == 0), - m_DistinctValues(DS_NUMBER_HASHES, DS_MAX_SIZE), - m_CountSketch(CS_ROWS, CS_COLUMNS), - m_N(std::max(n, std::size_t(1))), - m_TopN(topNSize(m_N)), // This is important to stop invalidation of - // the lowest top-n iterator by an insertion. - m_LowestTopN(m_TopN.end()), - m_EmpiricalEntropy(ES_K), - m_DistinctNGrams(NUMBER_N_GRAMS, maths::CBjkstUniqueValues(DS_NUMBER_HASHES, DS_MAX_SIZE)), - m_NGramEmpricalEntropy(NUMBER_N_GRAMS, maths::CEntropySketch(ES_K)) -{} - -CCategoricalDataSummaryStatistics::CCategoricalDataSummaryStatistics(const CDataSummaryStatistics &other, +CCategoricalDataSummaryStatistics::CCategoricalDataSummaryStatistics(const CDataSummaryStatistics& other, std::size_t n, - std::size_t toApproximate) : - CDataSummaryStatistics(other), - m_ToApproximate(toApproximate), - m_Approximating(toApproximate == 0), - m_DistinctValues(DS_NUMBER_HASHES, DS_MAX_SIZE), - m_CountSketch(CS_ROWS, CS_COLUMNS), - m_N(std::max(n, std::size_t(1))), - m_TopN(topNSize(m_N)), // This is important to stop invalidation of - // the lowest top-n iterator by an insertion. - m_LowestTopN(m_TopN.end()), - m_EmpiricalEntropy(ES_K), - m_DistinctNGrams(NUMBER_N_GRAMS, maths::CBjkstUniqueValues(DS_NUMBER_HASHES, DS_MAX_SIZE)), - m_NGramEmpricalEntropy(NUMBER_N_GRAMS, maths::CEntropySketch(ES_K)) -{} - -void CCategoricalDataSummaryStatistics::add(core_t::TTime time, const std::string &example) -{ + std::size_t toApproximate) + : CDataSummaryStatistics(other), + m_ToApproximate(toApproximate), + m_Approximating(toApproximate == 0), + m_DistinctValues(DS_NUMBER_HASHES, DS_MAX_SIZE), + m_CountSketch(CS_ROWS, CS_COLUMNS), + m_N(std::max(n, std::size_t(1))), + m_TopN(topNSize(m_N)), // This is important to stop invalidation of + // the lowest top-n iterator by an insertion. + m_LowestTopN(m_TopN.end()), + m_EmpiricalEntropy(ES_K), + m_DistinctNGrams(NUMBER_N_GRAMS, maths::CBjkstUniqueValues(DS_NUMBER_HASHES, DS_MAX_SIZE)), + m_NGramEmpricalEntropy(NUMBER_N_GRAMS, maths::CEntropySketch(ES_K)) { +} + +void CCategoricalDataSummaryStatistics::add(core_t::TTime time, const std::string& example) { this->CDataSummaryStatistics::add(time); std::size_t category; - if (!m_Approximating) - { + if (!m_Approximating) { category = CTools::category64(example); ++m_ValueCounts[category]; - } - else - { + } else { category = CTools::category32(example); m_DistinctValues.add(static_cast(category)); m_CountSketch.add(static_cast(category), 1.0); } m_MinLength.add(example.length()); m_MaxLength.add(example.length()); - if (maths::CSampling::uniformSample(m_Rng, 0.0, 1.0) < PROBABILITY_TO_SAMPLE_ENTROPY) - { + if (maths::CSampling::uniformSample(m_Rng, 0.0, 1.0) < PROBABILITY_TO_SAMPLE_ENTROPY) { m_EmpiricalEntropy.add(HASHER(example)); } - if (maths::CSampling::uniformSample(m_Rng, 0.0, 1.0) < PROBABILITY_TO_SAMPLE_N_GRAMS) - { - for (std::size_t n = 1u; n <= 5; ++n) - { + if (maths::CSampling::uniformSample(m_Rng, 0.0, 1.0) < PROBABILITY_TO_SAMPLE_N_GRAMS) { + for (std::size_t n = 1u; n <= 5; ++n) { this->addNGrams(n, example); } } @@ -146,36 +126,26 @@ void CCategoricalDataSummaryStatistics::add(core_t::TTime time, const std::strin this->updateCalibrators(category); TStrUInt64UMapItr i = m_TopN.find(example); - if (i == m_TopN.end()) - { - if (m_TopN.size() > topNSize(m_N)) - { + if (i == m_TopN.end()) { + if (m_TopN.size() > topNSize(m_N)) { double estimate_ = this->calibratedCount(category); - if (estimate_ > 0.0) - { + if (estimate_ > 0.0) { std::size_t estimate = static_cast(estimate_ + 0.5); - if (m_LowestTopN == m_TopN.end()) - { + if (m_LowestTopN == m_TopN.end()) { this->findLowestTopN(); } - if (estimate > m_LowestTopN->second) - { + if (estimate > m_LowestTopN->second) { m_TopN.erase(m_LowestTopN); m_TopN.insert(std::make_pair(example, estimate)); this->findLowestTopN(); } } - } - else - { + } else { i = m_TopN.insert(std::make_pair(example, std::size_t(1))).first; } - } - else - { + } else { ++i->second; - if (i == m_LowestTopN) - { + if (i == m_LowestTopN) { this->findLowestTopN(); } } @@ -183,73 +153,60 @@ void CCategoricalDataSummaryStatistics::add(core_t::TTime time, const std::strin this->approximateIfCardinalityTooHigh(); } -std::size_t CCategoricalDataSummaryStatistics::distinctCount() const -{ +std::size_t CCategoricalDataSummaryStatistics::distinctCount() const { return !m_Approximating ? m_ValueCounts.size() : m_DistinctValues.number(); } -std::size_t CCategoricalDataSummaryStatistics::minimumLength() const -{ +std::size_t CCategoricalDataSummaryStatistics::minimumLength() const { return m_MinLength[0]; } -std::size_t CCategoricalDataSummaryStatistics::maximumLength() const -{ +std::size_t CCategoricalDataSummaryStatistics::maximumLength() const { return m_MaxLength[0]; } -double CCategoricalDataSummaryStatistics::entropy() const -{ +double CCategoricalDataSummaryStatistics::entropy() const { return m_EmpiricalEntropy.calculate(); } -void CCategoricalDataSummaryStatistics::topN(TStrSizePrVec &result) const -{ +void CCategoricalDataSummaryStatistics::topN(TStrSizePrVec& result) const { result.clear(); result.reserve(m_N); TStrUInt64UMapCItrVec topN; this->topN(topN); - for (std::size_t i = 0u; i < topN.size(); ++i) - { + for (std::size_t i = 0u; i < topN.size(); ++i) { result.push_back(*topN[i]); } } -double CCategoricalDataSummaryStatistics::meanCountInRemainders() const -{ +double CCategoricalDataSummaryStatistics::meanCountInRemainders() const { TStrUInt64UMapCItrVec topN; this->topN(topN); uint64_t total = 0; - for (std::size_t i = 0u; i < topN.size(); ++i) - { + for (std::size_t i = 0u; i < topN.size(); ++i) { total += topN[i]->second; } - return static_cast(this->count() - std::min(total, this->count())) - / static_cast(std::max(static_cast(m_DistinctValues.number()), m_TopN.size())); + return static_cast(this->count() - std::min(total, this->count())) / + static_cast(std::max(static_cast(m_DistinctValues.number()), m_TopN.size())); } -void CCategoricalDataSummaryStatistics::addNGrams(std::size_t n, const std::string &example) -{ - for (std::size_t i = n; i < example.length(); ++i) - { +void CCategoricalDataSummaryStatistics::addNGrams(std::size_t n, const std::string& example) { + for (std::size_t i = n; i < example.length(); ++i) { std::size_t hash = HASHER(example.substr(i - n, n)); m_DistinctNGrams[n - 1].add(CTools::category32(hash)); m_NGramEmpricalEntropy[n - 1].add(hash); } } -void CCategoricalDataSummaryStatistics::approximateIfCardinalityTooHigh() -{ +void CCategoricalDataSummaryStatistics::approximateIfCardinalityTooHigh() { using TSizeUInt64UMapCItr = TSizeUInt64UMap::const_iterator; - if (m_ValueCounts.size() >= m_ToApproximate) - { - for (TSizeUInt64UMapCItr i = m_ValueCounts.begin(); i != m_ValueCounts.end(); ++i) - { + if (m_ValueCounts.size() >= m_ToApproximate) { + for (TSizeUInt64UMapCItr i = m_ValueCounts.begin(); i != m_ValueCounts.end(); ++i) { uint32_t category = CTools::category32(i->first); double count = static_cast(i->second); m_DistinctValues.add(category); @@ -258,109 +215,91 @@ void CCategoricalDataSummaryStatistics::approximateIfCardinalityTooHigh() } } -void CCategoricalDataSummaryStatistics::updateCalibrators(std::size_t category_) -{ - uint32_t category = m_Approximating ? - static_cast(category_) : CTools::category32(category_); - std::size_t i = std::lower_bound(m_Calibrators.begin(), - m_Calibrators.end(), - category, - maths::COrderings::SFirstLess()) - m_Calibrators.begin(); - if (i == m_Calibrators.size() || m_Calibrators[i].first != category) - { - if (m_Calibrators.size() < 5) - { +void CCategoricalDataSummaryStatistics::updateCalibrators(std::size_t category_) { + uint32_t category = m_Approximating ? static_cast(category_) : CTools::category32(category_); + std::size_t i = + std::lower_bound(m_Calibrators.begin(), m_Calibrators.end(), category, maths::COrderings::SFirstLess()) - m_Calibrators.begin(); + if (i == m_Calibrators.size() || m_Calibrators[i].first != category) { + if (m_Calibrators.size() < 5) { m_Calibrators.insert(m_Calibrators.begin() + i, std::make_pair(category, 1)); } - } - else - { + } else { ++m_Calibrators[i].second; } } -double CCategoricalDataSummaryStatistics::calibratedCount(std::size_t category) const -{ - if (!m_Approximating) - { +double CCategoricalDataSummaryStatistics::calibratedCount(std::size_t category) const { + if (!m_Approximating) { return static_cast(m_ValueCounts.find(category)->second); } using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; TMeanAccumulator error; - if (m_CountSketch.sketched()) - { - for (std::size_t j = 0u; j < m_Calibrators.size(); ++j) - { - error.add( m_CountSketch.count(m_Calibrators[j].first) - - static_cast(m_Calibrators[j].second)); + if (m_CountSketch.sketched()) { + for (std::size_t j = 0u; j < m_Calibrators.size(); ++j) { + error.add(m_CountSketch.count(m_Calibrators[j].first) - static_cast(m_Calibrators[j].second)); } } return m_CountSketch.count(static_cast(category)) - maths::CBasicStatistics::mean(error); } -void CCategoricalDataSummaryStatistics::findLowestTopN() -{ +void CCategoricalDataSummaryStatistics::findLowestTopN() { using TMinAccumulator = maths::CBasicStatistics::COrderStatisticsStack; TMinAccumulator lowest; - for (TStrUInt64UMapItr i = m_TopN.begin(); i != m_TopN.end(); ++i) - { + for (TStrUInt64UMapItr i = m_TopN.begin(); i != m_TopN.end(); ++i) { lowest.add(i); } m_LowestTopN = lowest[0]; } -void CCategoricalDataSummaryStatistics::topN(TStrUInt64UMapCItrVec &result) const -{ +void CCategoricalDataSummaryStatistics::topN(TStrUInt64UMapCItrVec& result) const { using TMaxAccumulator = maths::CBasicStatistics::COrderStatisticsHeap; TMaxAccumulator topN(m_N); - for (TStrUInt64UMapCItr i = m_TopN.begin(); i != m_TopN.end(); ++i) - { + for (TStrUInt64UMapCItr i = m_TopN.begin(); i != m_TopN.end(); ++i) { topN.add(i); } topN.sort(); result.assign(topN.begin(), topN.end()); } -CNumericDataSummaryStatistics::CNumericDataSummaryStatistics(bool integer) : - m_NonNumericCount(0), - m_QuantileSketch(maths::CQuantileSketch::E_Linear, QS_SIZE), - m_Clusters(integer ? maths_t::E_IntegerData : maths_t::E_ContinuousData, - maths::CAvailableModeDistributions::NORMAL, - maths_t::E_ClustersFractionWeight, - 0.0, // No decay - CLUSTER_MINIMUM_FRACTION, // We're only interested in clusters which - // comprise at least 0.5% of the data. - CLUSTER_MINIMUM_COUNT) // We need a few points to get a reasonable - // variance estimate. -{} - -CNumericDataSummaryStatistics::CNumericDataSummaryStatistics(const CDataSummaryStatistics &other, - bool integer) : - CDataSummaryStatistics(other), - m_NonNumericCount(0), - m_QuantileSketch(maths::CQuantileSketch::E_Linear, QS_SIZE), - m_Clusters(integer ? maths_t::E_IntegerData : maths_t::E_ContinuousData, - maths::CAvailableModeDistributions::NORMAL, - maths_t::E_ClustersFractionWeight, - 0.0, // No decay - CLUSTER_MINIMUM_FRACTION, // We're only interested in clusters which - // comprise at least 0.5% of the data. - CLUSTER_MINIMUM_COUNT) // Need a few points to get a reasonable - // variance estimate. -{} - -void CNumericDataSummaryStatistics::add(core_t::TTime time, const std::string &example) +CNumericDataSummaryStatistics::CNumericDataSummaryStatistics(bool integer) + : m_NonNumericCount(0), + m_QuantileSketch(maths::CQuantileSketch::E_Linear, QS_SIZE), + m_Clusters(integer ? maths_t::E_IntegerData : maths_t::E_ContinuousData, + maths::CAvailableModeDistributions::NORMAL, + maths_t::E_ClustersFractionWeight, + 0.0, // No decay + CLUSTER_MINIMUM_FRACTION, // We're only interested in clusters which + // comprise at least 0.5% of the data. + CLUSTER_MINIMUM_COUNT) // We need a few points to get a reasonable + // variance estimate. { +} + +CNumericDataSummaryStatistics::CNumericDataSummaryStatistics(const CDataSummaryStatistics& other, bool integer) + : CDataSummaryStatistics(other), + m_NonNumericCount(0), + m_QuantileSketch(maths::CQuantileSketch::E_Linear, QS_SIZE), + m_Clusters(integer ? maths_t::E_IntegerData : maths_t::E_ContinuousData, + maths::CAvailableModeDistributions::NORMAL, + maths_t::E_ClustersFractionWeight, + 0.0, // No decay + CLUSTER_MINIMUM_FRACTION, // We're only interested in clusters which + // comprise at least 0.5% of the data. + CLUSTER_MINIMUM_COUNT) // Need a few points to get a reasonable + // variance estimate. +{ +} + +void CNumericDataSummaryStatistics::add(core_t::TTime time, const std::string& example) { std::string trimmed = example; core::CStringUtils::trimWhitespace(trimmed); this->CDataSummaryStatistics::add(time); double value; - if (!core::CStringUtils::stringToTypeSilent(trimmed, value)) - { + if (!core::CStringUtils::stringToTypeSilent(trimmed, value)) { ++m_NonNumericCount; return; } @@ -369,33 +308,28 @@ void CNumericDataSummaryStatistics::add(core_t::TTime time, const std::string &e m_Clusters.add(value); } -double CNumericDataSummaryStatistics::minimum() const -{ +double CNumericDataSummaryStatistics::minimum() const { double result; m_QuantileSketch.minimum(result); return result; } -double CNumericDataSummaryStatistics::median() const -{ +double CNumericDataSummaryStatistics::median() const { double result; m_QuantileSketch.quantile(50.0, result); return result; } -double CNumericDataSummaryStatistics::maximum() const -{ +double CNumericDataSummaryStatistics::maximum() const { double result; m_QuantileSketch.maximum(result); return result; } -bool CNumericDataSummaryStatistics::densityChart(TDoubleDoublePrVec &result) const -{ +bool CNumericDataSummaryStatistics::densityChart(TDoubleDoublePrVec& result) const { result.clear(); - if (m_Clusters.clusters().empty()) - { + if (m_Clusters.clusters().empty()) { return true; } @@ -403,57 +337,45 @@ bool CNumericDataSummaryStatistics::densityChart(TDoubleDoublePrVec &result) con using TNormalVec = std::vector>; using TGMM = maths::CMixtureDistribution>; - const maths::CXMeansOnline1d::TClusterVec &clusters = m_Clusters.clusters(); + const maths::CXMeansOnline1d::TClusterVec& clusters = m_Clusters.clusters(); std::size_t n = clusters.size(); - try - { + try { TDoubleVec weights; TNormalVec modes; weights.reserve(n); modes.reserve(n); - for (std::size_t i = 0u; i < n; ++i) - { - LOG_TRACE("weight = " << clusters[i].count() - << ", mean = " << clusters[i].centre() - << ", sd = " << clusters[i].spread()); + for (std::size_t i = 0u; i < n; ++i) { + LOG_TRACE("weight = " << clusters[i].count() << ", mean = " << clusters[i].centre() << ", sd = " << clusters[i].spread()); weights.push_back(clusters[i].count()); - modes.push_back(boost::math::normal_distribution<>(clusters[i].centre(), - clusters[i].spread())); + modes.push_back(boost::math::normal_distribution<>(clusters[i].centre(), clusters[i].spread())); } TGMM gmm(weights, modes); - static const double QUANTILES[] = - { 0.001, 0.005, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.995, 0.999 }; + static const double QUANTILES[] = {0.001, 0.005, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.995, 0.999}; TDoubleVec pillars; pillars.reserve(boost::size(QUANTILES)); - for (std::size_t i = 0u; i < boost::size(QUANTILES); ++i) - { + for (std::size_t i = 0u; i < boost::size(QUANTILES); ++i) { pillars.push_back(maths::quantile(gmm, QUANTILES[i])); } LOG_TRACE("pillars = " << core::CContainerPrinter::print(pillars)); result.reserve(10 * boost::size(QUANTILES)); - for (std::size_t i = 1u; i < pillars.size(); ++i) - { - double x = pillars[i-1]; - double b = pillars[i]; + for (std::size_t i = 1u; i < pillars.size(); ++i) { + double x = pillars[i - 1]; + double b = pillars[i]; double dx = (b - x) / 10.0; - for (std::size_t j = 0u; j < 10; ++j, x += dx) - { + for (std::size_t j = 0u; j < 10; ++j, x += dx) { result.push_back(std::make_pair(x, maths::pdf(gmm, x))); } } - } - catch (const std::exception &e) - { + } catch (const std::exception& e) { LOG_ERROR("Failed to compute density chart: " << e.what()); return false; } return true; } - } } diff --git a/lib/config/CDetectorEnumerator.cc b/lib/config/CDetectorEnumerator.cc index 15b2376d7b..d891f900c1 100644 --- a/lib/config/CDetectorEnumerator.cc +++ b/lib/config/CDetectorEnumerator.cc @@ -15,13 +15,10 @@ #include #include -namespace ml -{ -namespace config -{ +namespace ml { +namespace config { -namespace -{ +namespace { using TStrVec = std::vector; using TStrVecCRef = boost::reference_wrapper; @@ -29,29 +26,23 @@ using TStrVecCRefVec = std::vector; //! Add detectors for the partitioning fields \p candidates. void add(std::size_t p, - const std::size_t *indices, - const TStrVecCRef *candidates, + const std::size_t* indices, + const TStrVecCRef* candidates, std::size_t a, std::size_t b, - CDetectorEnumerator::TDetectorSpecificationVec &result) -{ + CDetectorEnumerator::TDetectorSpecificationVec& result) { LOG_TRACE("a = " << a << " b = " << b); - if (a == b) - { + if (a == b) { return; } - for (std::size_t i = 0u; i < p; ++i) - { - for (std::size_t j = a; j < b; ++j) - { - const TStrVec &ci = candidates[i]; + for (std::size_t i = 0u; i < p; ++i) { + for (std::size_t j = a; j < b; ++j) { + const TStrVec& ci = candidates[i]; LOG_TRACE("candidates = " << core::CContainerPrinter::print(ci)); - for (std::size_t k = 0u; k < ci.size(); ++k) - { - if (result[j].canAddPartitioning(indices[i], ci[k])) - { + for (std::size_t k = 0u; k < ci.size(); ++k) { + if (result[j].canAddPartitioning(indices[i], ci[k])) { std::size_t id = result.size(); result.push_back(CDetectorSpecification(result[j])); result.back().id(id); @@ -61,158 +52,107 @@ void add(std::size_t p, } } } - } -CDetectorEnumerator::CDetectorEnumerator(const CAutoconfigurerParams ¶ms) : - m_Params(params) -{} +CDetectorEnumerator::CDetectorEnumerator(const CAutoconfigurerParams& params) : m_Params(params) { +} -void CDetectorEnumerator::addFunction(config_t::EFunctionCategory category) -{ +void CDetectorEnumerator::addFunction(config_t::EFunctionCategory category) { m_Functions.push_back(category); } -void CDetectorEnumerator::addCategoricalFunctionArgument(const std::string &argument) -{ +void CDetectorEnumerator::addCategoricalFunctionArgument(const std::string& argument) { m_CandidateCategoricalFunctionArguments.push_back(argument); } -void CDetectorEnumerator::addMetricFunctionArgument(const std::string &argument) -{ +void CDetectorEnumerator::addMetricFunctionArgument(const std::string& argument) { m_CandidateMetricFunctionArguments.push_back(argument); } -void CDetectorEnumerator::addByField(const std::string &by) -{ +void CDetectorEnumerator::addByField(const std::string& by) { m_CandidateByFields.push_back(by); } -void CDetectorEnumerator::addRareByField(const std::string &by) -{ +void CDetectorEnumerator::addRareByField(const std::string& by) { m_CandidateRareByFields.push_back(by); } -void CDetectorEnumerator::addOverField(const std::string &over) -{ +void CDetectorEnumerator::addOverField(const std::string& over) { m_CandidateOverFields.push_back(over); } -void CDetectorEnumerator::addPartitionField(const std::string &partition) -{ +void CDetectorEnumerator::addPartitionField(const std::string& partition) { m_CandidatePartitionFields.push_back(partition); } -void CDetectorEnumerator::generate(TDetectorSpecificationVec &result) -{ +void CDetectorEnumerator::generate(TDetectorSpecificationVec& result) { result.clear(); this->generateNoPartitioning(result); std::size_t n0 = result.size(); - this->addOnePartitioning( 0, n0, result); + this->addOnePartitioning(0, n0, result); std::size_t n1 = result.size(); - this->addTwoPartitioning( n0, n1, result); + this->addTwoPartitioning(n0, n1, result); std::size_t n2 = result.size(); this->addThreePartitioning(n1, n2, result); } -void CDetectorEnumerator::generateNoPartitioning(TDetectorSpecificationVec &result) const -{ - for (std::size_t i = 0u; i < m_Functions.size(); ++i) - { +void CDetectorEnumerator::generateNoPartitioning(TDetectorSpecificationVec& result) const { + for (std::size_t i = 0u; i < m_Functions.size(); ++i) { config_t::EFunctionCategory function = m_Functions[i]; - if (config_t::isRare(function)) - { + if (config_t::isRare(function)) { continue; } - try - { + try { std::size_t id = result.size(); - if (config_t::hasArgument(function)) - { - const TStrVec &arguments = config_t::isMetric(function) ? - m_CandidateMetricFunctionArguments : - m_CandidateCategoricalFunctionArguments; - for (std::size_t j = 0u; j < arguments.size(); ++j) - { + if (config_t::hasArgument(function)) { + const TStrVec& arguments = + config_t::isMetric(function) ? m_CandidateMetricFunctionArguments : m_CandidateCategoricalFunctionArguments; + for (std::size_t j = 0u; j < arguments.size(); ++j) { result.push_back(CDetectorSpecification(m_Params, function, arguments[j], id)); } - } - else - { + } else { result.push_back(CDetectorSpecification(m_Params, function, id)); } - } - catch (std::exception &e) - { - LOG_ERROR("Bad detector: " << e.what()); - } + } catch (std::exception& e) { LOG_ERROR("Bad detector: " << e.what()); } } } -void CDetectorEnumerator::addOnePartitioning(std::size_t a, std::size_t b, - TDetectorSpecificationVec &result) const -{ - TStrVecCRef candidates[] = - { - boost::cref(m_CandidateByFields), - boost::cref(m_CandidateOverFields), - boost::cref(m_CandidatePartitionFields) - }; - add(boost::size(constants::CFieldIndices::PARTITIONING), - constants::CFieldIndices::PARTITIONING, candidates, a, b, result); - - for (std::size_t i = 0u; i < m_Functions.size(); ++i) - { +void CDetectorEnumerator::addOnePartitioning(std::size_t a, std::size_t b, TDetectorSpecificationVec& result) const { + TStrVecCRef candidates[] = { + boost::cref(m_CandidateByFields), boost::cref(m_CandidateOverFields), boost::cref(m_CandidatePartitionFields)}; + add(boost::size(constants::CFieldIndices::PARTITIONING), constants::CFieldIndices::PARTITIONING, candidates, a, b, result); + + for (std::size_t i = 0u; i < m_Functions.size(); ++i) { config_t::EFunctionCategory function = m_Functions[i]; - if (config_t::isRare(function)) - { - try - { - for (std::size_t j = 0u; j < m_CandidateRareByFields.size(); ++j) - { + if (config_t::isRare(function)) { + try { + for (std::size_t j = 0u; j < m_CandidateRareByFields.size(); ++j) { std::size_t id = result.size(); result.push_back(CDetectorSpecification(m_Params, function, id)); result.back().addPartitioning(constants::BY_INDEX, m_CandidateRareByFields[j]); } - } - catch (std::exception &e) - { - LOG_ERROR("Bad detector: " << e.what()); - } + } catch (std::exception& e) { LOG_ERROR("Bad detector: " << e.what()); } } } } -void CDetectorEnumerator::addTwoPartitioning(std::size_t a, std::size_t b, - TDetectorSpecificationVec &result) const -{ - static std::size_t OVER_AND_PARTITION[] = - { - constants::OVER_INDEX, - constants::PARTITION_INDEX - }; - TStrVecCRef candidates[] = - { - boost::cref(m_CandidateOverFields), - boost::cref(m_CandidatePartitionFields) - }; +void CDetectorEnumerator::addTwoPartitioning(std::size_t a, std::size_t b, TDetectorSpecificationVec& result) const { + static std::size_t OVER_AND_PARTITION[] = {constants::OVER_INDEX, constants::PARTITION_INDEX}; + TStrVecCRef candidates[] = {boost::cref(m_CandidateOverFields), boost::cref(m_CandidatePartitionFields)}; add(boost::size(OVER_AND_PARTITION), OVER_AND_PARTITION, candidates, a, b, result); } -void CDetectorEnumerator::addThreePartitioning(std::size_t a, std::size_t b, - TDetectorSpecificationVec &result) const -{ - static std::size_t PARTITION[] = { constants::PARTITION_INDEX }; - TStrVecCRef candidates[] = { boost::cref(m_CandidatePartitionFields) }; +void CDetectorEnumerator::addThreePartitioning(std::size_t a, std::size_t b, TDetectorSpecificationVec& result) const { + static std::size_t PARTITION[] = {constants::PARTITION_INDEX}; + TStrVecCRef candidates[] = {boost::cref(m_CandidatePartitionFields)}; add(boost::size(PARTITION), PARTITION, candidates, a, b, result); } - } } diff --git a/lib/config/CDetectorFieldRolePenalty.cc b/lib/config/CDetectorFieldRolePenalty.cc index d609f39159..a7e495f33e 100644 --- a/lib/config/CDetectorFieldRolePenalty.cc +++ b/lib/config/CDetectorFieldRolePenalty.cc @@ -14,72 +14,53 @@ #include #include -namespace ml -{ -namespace config -{ -namespace -{ - -using TGetStatistics = const CFieldStatistics *(CDetectorSpecification::*)() const; -const TGetStatistics STATISTIC[] = - { - &CDetectorSpecification::argumentFieldStatistics, - &CDetectorSpecification::byFieldStatistics, - &CDetectorSpecification::overFieldStatistics, - &CDetectorSpecification::partitionFieldStatistics, - }; +namespace ml { +namespace config { +namespace { +using TGetStatistics = const CFieldStatistics* (CDetectorSpecification::*)() const; +const TGetStatistics STATISTIC[] = { + &CDetectorSpecification::argumentFieldStatistics, + &CDetectorSpecification::byFieldStatistics, + &CDetectorSpecification::overFieldStatistics, + &CDetectorSpecification::partitionFieldStatistics, +}; } -CDetectorFieldRolePenalty::CDetectorFieldRolePenalty(const CAutoconfigurerParams ¶ms) : - CPenalty(params) -{ +CDetectorFieldRolePenalty::CDetectorFieldRolePenalty(const CAutoconfigurerParams& params) : CPenalty(params) { std::fill_n(m_FieldRolePenalties, constants::NUMBER_FIELD_INDICES, static_cast(0)); } -CDetectorFieldRolePenalty *CDetectorFieldRolePenalty::clone() const -{ +CDetectorFieldRolePenalty* CDetectorFieldRolePenalty::clone() const { return new CDetectorFieldRolePenalty(*this); } -std::string CDetectorFieldRolePenalty::name() const -{ +std::string CDetectorFieldRolePenalty::name() const { std::string arguments; - for (std::size_t i = 0u; i < constants::NUMBER_FIELD_INDICES; ++i) - { - if (m_FieldRolePenalties[i]) - { - arguments += (arguments.empty() ? "'" : ", '") - + constants::name(i) + ' ' + m_FieldRolePenalties[i]->name() + "'"; + for (std::size_t i = 0u; i < constants::NUMBER_FIELD_INDICES; ++i) { + if (m_FieldRolePenalties[i]) { + arguments += (arguments.empty() ? "'" : ", '") + constants::name(i) + ' ' + m_FieldRolePenalties[i]->name() + "'"; } } return "field role penalty(" + arguments + ")"; } -void CDetectorFieldRolePenalty::addPenalty(std::size_t index, const CPenalty &penalty) -{ +void CDetectorFieldRolePenalty::addPenalty(std::size_t index, const CPenalty& penalty) { m_FieldRolePenalties[index] = &penalty; } -void CDetectorFieldRolePenalty::penaltyFromMe(CDetectorSpecification &spec) const -{ +void CDetectorFieldRolePenalty::penaltyFromMe(CDetectorSpecification& spec) const { double penalty = 1.0; - for (std::size_t i = 0u; i < constants::NUMBER_FIELD_INDICES; ++i) - { - if (const CFieldStatistics *stats = (spec.*STATISTIC[i])()) - { + for (std::size_t i = 0u; i < constants::NUMBER_FIELD_INDICES; ++i) { + if (const CFieldStatistics* stats = (spec.*STATISTIC[i])()) { std::string description; m_FieldRolePenalties[i]->penalty(*stats, penalty, description); - if (!description.empty()) - { + if (!description.empty()) { description += " for the '" + constants::name(i) + "' field"; } spec.applyPenalty(penalty, description); } } } - } } - diff --git a/lib/config/CDetectorRecord.cc b/lib/config/CDetectorRecord.cc index e3bb02eff5..0d83ee5d79 100644 --- a/lib/config/CDetectorRecord.cc +++ b/lib/config/CDetectorRecord.cc @@ -16,129 +16,98 @@ #include -namespace ml -{ -namespace config -{ -namespace -{ - -using TField = const CDetectorSpecification::TOptionalStr &(CDetectorSpecification::*)() const; -const TField FIELDS[] = - { - &CDetectorSpecification::argumentField, - &CDetectorSpecification::byField, - &CDetectorSpecification::overField, - &CDetectorSpecification::partitionField, - }; +namespace ml { +namespace config { +namespace { + +using TField = const CDetectorSpecification::TOptionalStr& (CDetectorSpecification::*)() const; +const TField FIELDS[] = { + &CDetectorSpecification::argumentField, + &CDetectorSpecification::byField, + &CDetectorSpecification::overField, + &CDetectorSpecification::partitionField, +}; const std::string NULL_STRING("null"); //! Print the string \p ptr or null if it is zero. -const std::string &extract(const std::string *ptr) -{ +const std::string& extract(const std::string* ptr) { return ptr ? *ptr : NULL_STRING; } const core::CHashing::CMurmurHash2String HASHER; - } - CDetectorRecord::CDetectorRecord(core_t::TTime time, config_t::EFunctionCategory function, - const TStrCPtrAry &fieldNames, - const TStrCPtrAry &fieldValues, - const TSizeAry &hashedFieldValues) : - m_Time(time), - m_Function(function), - m_FieldNames(fieldNames), - m_FieldValues(fieldValues), - m_HashedFieldValues(hashedFieldValues) -{} - -core_t::TTime CDetectorRecord::time() const -{ + const TStrCPtrAry& fieldNames, + const TStrCPtrAry& fieldValues, + const TSizeAry& hashedFieldValues) + : m_Time(time), m_Function(function), m_FieldNames(fieldNames), m_FieldValues(fieldValues), m_HashedFieldValues(hashedFieldValues) { +} + +core_t::TTime CDetectorRecord::time() const { return m_Time; } -config_t::EFunctionCategory CDetectorRecord::function() const -{ +config_t::EFunctionCategory CDetectorRecord::function() const { return m_Function; } -const std::string *CDetectorRecord::argumentFieldName() const -{ +const std::string* CDetectorRecord::argumentFieldName() const { return m_FieldNames[constants::ARGUMENT_INDEX]; } -const std::string *CDetectorRecord::byFieldName() const -{ +const std::string* CDetectorRecord::byFieldName() const { return m_FieldNames[constants::BY_INDEX]; } -const std::string *CDetectorRecord::overFieldName() const -{ +const std::string* CDetectorRecord::overFieldName() const { return m_FieldNames[constants::OVER_INDEX]; } -const std::string *CDetectorRecord::partitionFieldName() const -{ +const std::string* CDetectorRecord::partitionFieldName() const { return m_FieldNames[constants::PARTITION_INDEX]; } -const std::string *CDetectorRecord::argumentFieldValue() const -{ +const std::string* CDetectorRecord::argumentFieldValue() const { return m_FieldValues[constants::ARGUMENT_INDEX]; } -const std::string *CDetectorRecord::byFieldValue() const -{ +const std::string* CDetectorRecord::byFieldValue() const { return m_FieldValues[constants::BY_INDEX]; } -const std::string *CDetectorRecord::overFieldValue() const -{ +const std::string* CDetectorRecord::overFieldValue() const { return m_FieldValues[constants::OVER_INDEX]; } -const std::string *CDetectorRecord::partitionFieldValue() const -{ +const std::string* CDetectorRecord::partitionFieldValue() const { return m_FieldValues[constants::PARTITION_INDEX]; } -std::size_t CDetectorRecord::argumentFieldValueHash() const -{ +std::size_t CDetectorRecord::argumentFieldValueHash() const { return m_HashedFieldValues[constants::ARGUMENT_INDEX]; } -std::size_t CDetectorRecord::byFieldValueHash() const -{ +std::size_t CDetectorRecord::byFieldValueHash() const { return m_HashedFieldValues[constants::BY_INDEX]; } -std::size_t CDetectorRecord::overFieldValueHash() const -{ +std::size_t CDetectorRecord::overFieldValueHash() const { return m_HashedFieldValues[constants::OVER_INDEX]; } -std::size_t CDetectorRecord::partitionFieldValueHash() const -{ +std::size_t CDetectorRecord::partitionFieldValueHash() const { return m_HashedFieldValues[constants::PARTITION_INDEX]; } -std::string CDetectorRecord::print() const -{ - return core::CStringUtils::typeToString(m_Time) - + ' ' + extract(this->argumentFieldValue()) - + ' ' + extract(this->byFieldValue()) - + ' ' + extract(this->overFieldValue()) - + ' ' + extract(this->partitionFieldValue()); +std::string CDetectorRecord::print() const { + return core::CStringUtils::typeToString(m_Time) + ' ' + extract(this->argumentFieldValue()) + ' ' + extract(this->byFieldValue()) + + ' ' + extract(this->overFieldValue()) + ' ' + extract(this->partitionFieldValue()); } - -void CDetectorRecordDirectAddressTable::build(const TDetectorSpecificationVec &specs) -{ +void CDetectorRecordDirectAddressTable::build(const TDetectorSpecificationVec& specs) { using TStrSizeUMap = boost::unordered_map; using TStrSizeUMapCItr = TStrSizeUMap::const_iterator; @@ -146,12 +115,9 @@ void CDetectorRecordDirectAddressTable::build(const TDetectorSpecificationVec &s TStrSizeUMap uniques; size_t size = 0u; - for (std::size_t i = 0u; i < specs.size(); ++i) - { - for (std::size_t j = 0u; j < boost::size(FIELDS); ++j) - { - if (const CDetectorSpecification::TOptionalStr &field = ((specs[i]).*FIELDS[j])()) - { + for (std::size_t i = 0u; i < specs.size(); ++i) { + for (std::size_t j = 0u; j < boost::size(FIELDS); ++j) { + if (const CDetectorSpecification::TOptionalStr& field = ((specs[i]).*FIELDS[j])()) { uniques.emplace(*field, uniques.size()); } } @@ -159,8 +125,7 @@ void CDetectorRecordDirectAddressTable::build(const TDetectorSpecificationVec &s } m_FieldSchema.resize(uniques.size()); - for (TStrSizeUMapCItr i = uniques.begin(); i != uniques.end(); ++i) - { + for (TStrSizeUMapCItr i = uniques.begin(); i != uniques.end(); ++i) { m_FieldSchema[i->second] = std::make_pair(i->first, i->second); } m_FieldValueTable.resize(m_FieldSchema.size() + 1, 0); @@ -168,12 +133,10 @@ void CDetectorRecordDirectAddressTable::build(const TDetectorSpecificationVec &s LOG_TRACE("field schema = " << core::CContainerPrinter::print(m_FieldSchema)); m_DetectorFieldSchema.resize(size + 1); - for (std::size_t i = 0u; i < specs.size(); ++i) - { + for (std::size_t i = 0u; i < specs.size(); ++i) { TSizeAry entry; - for (std::size_t j = 0u; j < boost::size(FIELDS); ++j) - { - const CDetectorSpecification::TOptionalStr &field = ((specs[i]).*FIELDS[j])(); + for (std::size_t j = 0u; j < boost::size(FIELDS); ++j) { + const CDetectorSpecification::TOptionalStr& field = ((specs[i]).*FIELDS[j])(); entry[constants::CFieldIndices::ALL[j]] = field ? uniques[*field] : m_FieldSchema.size(); } LOG_TRACE("Fields for " << specs[i].description() << " = " << core::CContainerPrinter::print(entry)); @@ -182,27 +145,23 @@ void CDetectorRecordDirectAddressTable::build(const TDetectorSpecificationVec &s } void CDetectorRecordDirectAddressTable::detectorRecords(core_t::TTime time, - const TStrStrUMap &fieldValues, - const TDetectorSpecificationVec &specs, - TDetectorRecordVec &result) -{ + const TStrStrUMap& fieldValues, + const TDetectorSpecificationVec& specs, + TDetectorRecordVec& result) { result.clear(); - if (specs.empty()) - { + if (specs.empty()) { return; } using TStrStrUMapCItr = TStrStrUMap::const_iterator; std::size_t size = 0u; - for (std::size_t i = 0u; i < specs.size(); ++i) - { + for (std::size_t i = 0u; i < specs.size(); ++i) { size = std::max(size, specs[i].id()); } - for (std::size_t i = 0u; i < m_FieldSchema.size(); ++i) - { + for (std::size_t i = 0u; i < m_FieldSchema.size(); ++i) { TStrStrUMapCItr j = fieldValues.find(m_FieldSchema[i].first); m_FieldValueTable[i] = j != fieldValues.end() ? &j->second : 0; m_HashedFieldValueTable[i] = HASHER(m_FieldValueTable[i] ? *m_FieldValueTable[i] : NULL_STRING); @@ -212,13 +171,11 @@ void CDetectorRecordDirectAddressTable::detectorRecords(core_t::TTime time, CDetectorRecord::TStrCPtrAry vi; CDetectorRecord::TSizeAry hi; result.resize(size + 1, CDetectorRecord(time, config_t::E_Count, ni, vi, hi)); - for (std::size_t i = 0u; i < specs.size(); ++i) - { + for (std::size_t i = 0u; i < specs.size(); ++i) { std::size_t id = specs[i].id(); - const TSizeAry &schema = m_DetectorFieldSchema[id]; + const TSizeAry& schema = m_DetectorFieldSchema[id]; config_t::EFunctionCategory function = specs[i].function(); - for (std::size_t j = 0u; j < TSizeAry::size(); ++j) - { + for (std::size_t j = 0u; j < TSizeAry::size(); ++j) { ni[j] = (specs[i].*FIELDS[j])().get_ptr(); vi[j] = m_FieldValueTable[schema[j]]; hi[j] = m_HashedFieldValueTable[schema[j]]; @@ -227,12 +184,10 @@ void CDetectorRecordDirectAddressTable::detectorRecords(core_t::TTime time, } } -void CDetectorRecordDirectAddressTable::clear() -{ +void CDetectorRecordDirectAddressTable::clear() { m_FieldSchema.clear(); m_DetectorFieldSchema.clear(); m_FieldValueTable.clear(); } - } } diff --git a/lib/config/CDetectorSpecification.cc b/lib/config/CDetectorSpecification.cc index ff680d8717..31a3dd51fb 100644 --- a/lib/config/CDetectorSpecification.cc +++ b/lib/config/CDetectorSpecification.cc @@ -12,59 +12,55 @@ #include #include -#include #include +#include #include #include #include -#include #include +#include -namespace ml -{ -namespace config -{ -namespace -{ +namespace ml { +namespace config { +namespace { using TSizeVec = std::vector; using TDoubleVec = std::vector; //! \brief Checks if the name of some statistics matches a specified value. -class CNameEquals -{ - public: - CNameEquals(const std::string &value) : m_Value(&value) {} - - bool operator()(const CFieldStatistics &stats) const - { - return stats.name() == *m_Value; - } +class CNameEquals { +public: + CNameEquals(const std::string& value) : m_Value(&value) {} + + bool operator()(const CFieldStatistics& stats) const { return stats.name() == *m_Value; } - private: - const std::string *m_Value; +private: + const std::string* m_Value; }; //! Extract the full function name from the configuration. std::string fullFunctionName(config_t::ESide side, CDetectorSpecification::EFuzzyBool ignoreEmpty, bool isPopulation, - config_t::EFunctionCategory function) -{ + config_t::EFunctionCategory function) { std::string result; - switch (side) - { - case config_t::E_LowSide: result += "low_"; break; - case config_t::E_HighSide: result += "high_"; break; - case config_t::E_TwoSide: break; - case config_t::E_UndeterminedSide: result += "[low_|high_]"; break; + switch (side) { + case config_t::E_LowSide: + result += "low_"; + break; + case config_t::E_HighSide: + result += "high_"; + break; + case config_t::E_TwoSide: + break; + case config_t::E_UndeterminedSide: + result += "[low_|high_]"; + break; } - if (!isPopulation && config_t::hasDoAndDontIgnoreEmptyVersions(function)) - { - switch (ignoreEmpty) - { + if (!isPopulation && config_t::hasDoAndDontIgnoreEmptyVersions(function)) { + switch (ignoreEmpty) { case CDetectorSpecification::E_True: result += function == config_t::E_Count ? "non_zero_" : "non_null_"; break; @@ -80,89 +76,68 @@ std::string fullFunctionName(config_t::ESide side, } //! Get the maximum penalty indexed by \p indices. -double maxPenalty(const TSizeVec &indices, const TDoubleVec &penalties) -{ +double maxPenalty(const TSizeVec& indices, const TDoubleVec& penalties) { double result = 0.0; - for (std::size_t i = 0u; i < indices.size(); ++i) - { + for (std::size_t i = 0u; i < indices.size(); ++i) { result = std::max(result, penalties[indices[i]]); } return result; } //! Set the penalties indexed by \p indices to \p value. -void fill(const TSizeVec &indices, double value, TDoubleVec &penalties) -{ - for (std::size_t i = 0u; i < indices.size(); ++i) - { +void fill(const TSizeVec& indices, double value, TDoubleVec& penalties) { + for (std::size_t i = 0u; i < indices.size(); ++i) { penalties[indices[i]] = value; } } //! Mapping from ignore empty unique identifer to its value. -const bool IGNORE_EMPTY[] = { false, true }; +const bool IGNORE_EMPTY[] = {false, true}; //! Get the ignore empty unique identifier. -std::size_t ignoreEmptyId(bool ignoreEmpty) -{ - return std::find(boost::begin(IGNORE_EMPTY), - boost::end(IGNORE_EMPTY), - ignoreEmpty) - boost::begin(IGNORE_EMPTY); +std::size_t ignoreEmptyId(bool ignoreEmpty) { + return std::find(boost::begin(IGNORE_EMPTY), boost::end(IGNORE_EMPTY), ignoreEmpty) - boost::begin(IGNORE_EMPTY); } - } -CDetectorSpecification::CDetectorSpecification(const CAutoconfigurerParams ¶ms, - config_t::EFunctionCategory function, - std::size_t id) : - m_Params(params), - m_Function(function), - m_Side(config_t::hasSidedCalculation(function) ? config_t::E_UndeterminedSide : config_t::E_TwoSide), - m_IgnoreEmpty(config_t::hasDoAndDontIgnoreEmptyVersions(function) ? E_Maybe : E_True), - m_Penalties(2 * params.candidateBucketLengths().size()), - m_PenaltyDescriptions(2 * params.candidateBucketLengths().size()), - m_Id(id), - m_CountStatistics(0) -{ +CDetectorSpecification::CDetectorSpecification(const CAutoconfigurerParams& params, config_t::EFunctionCategory function, std::size_t id) + : m_Params(params), + m_Function(function), + m_Side(config_t::hasSidedCalculation(function) ? config_t::E_UndeterminedSide : config_t::E_TwoSide), + m_IgnoreEmpty(config_t::hasDoAndDontIgnoreEmptyVersions(function) ? E_Maybe : E_True), + m_Penalties(2 * params.candidateBucketLengths().size()), + m_PenaltyDescriptions(2 * params.candidateBucketLengths().size()), + m_Id(id), + m_CountStatistics(0) { this->initializePenalties(); - if (config_t::hasArgument(function)) - { + if (config_t::hasArgument(function)) { throw std::logic_error(std::string("No argument supplied for '") + config_t::print(function) + "'"); } - std::fill_n(m_FieldStatistics, - constants::NUMBER_FIELD_INDICES, - static_cast(0)); + std::fill_n(m_FieldStatistics, constants::NUMBER_FIELD_INDICES, static_cast(0)); } -CDetectorSpecification::CDetectorSpecification(const CAutoconfigurerParams ¶ms, +CDetectorSpecification::CDetectorSpecification(const CAutoconfigurerParams& params, config_t::EFunctionCategory function, - const std::string &argument, - std::size_t id) : - m_Params(params), - m_Function(function), - m_Side(config_t::hasSidedCalculation(function) ? config_t::E_UndeterminedSide : config_t::E_TwoSide), - m_IgnoreEmpty(config_t::hasDoAndDontIgnoreEmptyVersions(function) ? E_Maybe : E_True), - m_Penalties(2 * params.candidateBucketLengths().size()), - m_PenaltyDescriptions(2 * params.candidateBucketLengths().size()), - m_Id(id), - m_CountStatistics(0) -{ + const std::string& argument, + std::size_t id) + : m_Params(params), + m_Function(function), + m_Side(config_t::hasSidedCalculation(function) ? config_t::E_UndeterminedSide : config_t::E_TwoSide), + m_IgnoreEmpty(config_t::hasDoAndDontIgnoreEmptyVersions(function) ? E_Maybe : E_True), + m_Penalties(2 * params.candidateBucketLengths().size()), + m_PenaltyDescriptions(2 * params.candidateBucketLengths().size()), + m_Id(id), + m_CountStatistics(0) { this->initializePenalties(); - if (!config_t::hasArgument(function)) - { + if (!config_t::hasArgument(function)) { LOG_ERROR("Ignoring argument '" + argument + "' for '" + config_t::print(function) + "'"); - } - else - { + } else { m_FunctionFields[constants::ARGUMENT_INDEX] = argument; } - std::fill_n(m_FieldStatistics, - constants::NUMBER_FIELD_INDICES, - static_cast(0)); + std::fill_n(m_FieldStatistics, constants::NUMBER_FIELD_INDICES, static_cast(0)); } -void CDetectorSpecification::swap(CDetectorSpecification &other) -{ +void CDetectorSpecification::swap(CDetectorSpecification& other) { std::swap(m_Params, other.m_Params); std::swap(m_Function, other.m_Function); std::swap(m_Side, other.m_Side); @@ -173,340 +148,269 @@ void CDetectorSpecification::swap(CDetectorSpecification &other) m_PenaltyDescriptions.swap(other.m_PenaltyDescriptions); std::swap(m_Penalty, other.m_Penalty); std::swap(m_Id, other.m_Id); - for (std::size_t i = 0u; i < constants::NUMBER_FIELD_INDICES; ++i) - { + for (std::size_t i = 0u; i < constants::NUMBER_FIELD_INDICES; ++i) { m_FunctionFields[i].swap(other.m_FunctionFields[i]); std::swap(m_FieldStatistics[i], other.m_FieldStatistics[i]); } std::swap(m_CountStatistics, other.m_CountStatistics); } -void CDetectorSpecification::side(config_t::ESide side) -{ +void CDetectorSpecification::side(config_t::ESide side) { m_Side = side; } -void CDetectorSpecification::ignoreEmpty(bool ignoreEmpty) -{ +void CDetectorSpecification::ignoreEmpty(bool ignoreEmpty) { m_IgnoreEmpty = ignoreEmpty ? E_True : E_False; } -bool CDetectorSpecification::canAddPartitioning(std::size_t index, const std::string &value) const -{ +bool CDetectorSpecification::canAddPartitioning(std::size_t index, const std::string& value) const { // Rules: // 1) We can only add a field to a detector whose index is greater // than any field currently set. // 2) We can't have duplicate fields. - return static_cast(index) > this->highestFieldIndex() - && std::find(boost::begin(m_FunctionFields), - boost::end(m_FunctionFields), - value) == boost::end(m_FunctionFields); + return static_cast(index) > this->highestFieldIndex() && + std::find(boost::begin(m_FunctionFields), boost::end(m_FunctionFields), value) == boost::end(m_FunctionFields); } -void CDetectorSpecification::addPartitioning(std::size_t index, const std::string &value) -{ +void CDetectorSpecification::addPartitioning(std::size_t index, const std::string& value) { m_FunctionFields[index] = value; - if (index == constants::OVER_INDEX) - { + if (index == constants::OVER_INDEX) { m_IgnoreEmpty = E_True; } } -void CDetectorSpecification::addInfluencer(const std::string &influencer) -{ +void CDetectorSpecification::addInfluencer(const std::string& influencer) { std::size_t n = m_Influencers.size(); m_Influencers.push_back(influencer); - if (n > 0) - { + if (n > 0) { std::inplace_merge(m_Influencers.begin(), m_Influencers.begin() + n, m_Influencers.end()); } } -void CDetectorSpecification::bucketLength(core_t::TTime bucketLength) -{ +void CDetectorSpecification::bucketLength(core_t::TTime bucketLength) { m_BucketLength = bucketLength; } -void CDetectorSpecification::addFieldStatistics(const TFieldStatisticsVec &stats) -{ - for (std::size_t i = 0u; i < boost::size(constants::CFieldIndices::ALL); ++i) - { - if (const TOptionalStr &field = m_FunctionFields[constants::CFieldIndices::ALL[i]]) - { - m_FieldStatistics[constants::CFieldIndices::ALL[i]] = - &(*std::find_if(stats.begin(), stats.end(), CNameEquals(*field))); +void CDetectorSpecification::addFieldStatistics(const TFieldStatisticsVec& stats) { + for (std::size_t i = 0u; i < boost::size(constants::CFieldIndices::ALL); ++i) { + if (const TOptionalStr& field = m_FunctionFields[constants::CFieldIndices::ALL[i]]) { + m_FieldStatistics[constants::CFieldIndices::ALL[i]] = &(*std::find_if(stats.begin(), stats.end(), CNameEquals(*field))); } } } -void CDetectorSpecification::setCountStatistics(const CDataCountStatistics &stats) -{ +void CDetectorSpecification::setCountStatistics(const CDataCountStatistics& stats) { m_CountStatistics = &stats; } -void CDetectorSpecification::setPenalty(const TPenaltyPtr &penalty) -{ +void CDetectorSpecification::setPenalty(const TPenaltyPtr& penalty) { m_Penalty = penalty; } -double CDetectorSpecification::score() const -{ +double CDetectorSpecification::score() const { TSizeVecCPtrAry indicesInUse = this->penaltyIndicesInUse(); double penalty = 0.0; - for (std::size_t iid = 0u; iid < TSizeVecCPtrAry::size(); ++iid) - { + for (std::size_t iid = 0u; iid < TSizeVecCPtrAry::size(); ++iid) { penalty = std::max(penalty, maxPenalty(*indicesInUse[iid], m_Penalties)); } return CPenalty::score(penalty); } -void CDetectorSpecification::scores(TParamScoresVec &result) const -{ +void CDetectorSpecification::scores(TParamScoresVec& result) const { result.reserve(m_Penalties.size()); - const TTimeVec &candidates = this->params().candidateBucketLengths(); - for (std::size_t iid = 0u; iid < boost::size(IGNORE_EMPTY); ++iid) - { - for (std::size_t bid = 0u; bid < candidates.size(); ++bid) - { + const TTimeVec& candidates = this->params().candidateBucketLengths(); + for (std::size_t iid = 0u; iid < boost::size(IGNORE_EMPTY); ++iid) { + for (std::size_t bid = 0u; bid < candidates.size(); ++bid) { std::size_t pid = this->params().penaltyIndexFor(bid, IGNORE_EMPTY[iid]); double score = CPenalty::score(m_Penalties[pid]); - const TStrVec &descriptions = m_PenaltyDescriptions[pid]; - if (score > this->params().minimumDetectorScore()) - { - const std::string &name = config_t::ignoreEmptyVersionName(m_Function, - IGNORE_EMPTY[iid], - this->isPopulation()); + const TStrVec& descriptions = m_PenaltyDescriptions[pid]; + if (score > this->params().minimumDetectorScore()) { + const std::string& name = config_t::ignoreEmptyVersionName(m_Function, IGNORE_EMPTY[iid], this->isPopulation()); result.push_back(SParamScores(candidates[bid], name, score, descriptions)); } } } } -void CDetectorSpecification::applyPenalty(double penalty, const std::string &description) -{ +void CDetectorSpecification::applyPenalty(double penalty, const std::string& description) { LOG_TRACE("penalty = " << penalty); - if (penalty == 1.0) - { + if (penalty == 1.0) { return; } - for (std::size_t i = 0u; i < m_Penalties.size(); ++i) - { + for (std::size_t i = 0u; i < m_Penalties.size(); ++i) { m_Penalties[i] *= penalty; - if (!description.empty()) - { + if (!description.empty()) { m_PenaltyDescriptions[i].push_back(description); } } } -void CDetectorSpecification::applyPenalties(const TSizeVec &indices, - const TDoubleVec &penalties, - const TStrVec &descriptions) -{ +void CDetectorSpecification::applyPenalties(const TSizeVec& indices, const TDoubleVec& penalties, const TStrVec& descriptions) { LOG_TRACE("penalties = " << core::CContainerPrinter::print(penalties)); - for (std::size_t i = 0u; i < indices.size(); ++i) - { - if (penalties[i] == 1.0) - { + for (std::size_t i = 0u; i < indices.size(); ++i) { + if (penalties[i] == 1.0) { continue; } m_Penalties[indices[i]] *= penalties[i]; - if (!descriptions[i].empty()) - { + if (!descriptions[i].empty()) { m_PenaltyDescriptions[indices[i]].push_back(descriptions[i]); } } LOG_TRACE("cumulative = " << core::CContainerPrinter::print(m_Penalties)); } -void CDetectorSpecification::refreshScores() -{ +void CDetectorSpecification::refreshScores() { LOG_TRACE("*** Refreshing scores ***"); this->initializePenalties(); m_Penalty->penalize(*this); this->refreshIgnoreEmpty(); } -config_t::EFunctionCategory CDetectorSpecification::function() const -{ +config_t::EFunctionCategory CDetectorSpecification::function() const { return m_Function; } -const CDetectorSpecification::TOptionalStr &CDetectorSpecification::argumentField() const -{ +const CDetectorSpecification::TOptionalStr& CDetectorSpecification::argumentField() const { return m_FunctionFields[constants::ARGUMENT_INDEX]; } -const CDetectorSpecification::TOptionalStr &CDetectorSpecification::byField() const -{ +const CDetectorSpecification::TOptionalStr& CDetectorSpecification::byField() const { return m_FunctionFields[constants::BY_INDEX]; } -const CDetectorSpecification::TOptionalStr &CDetectorSpecification::overField() const -{ +const CDetectorSpecification::TOptionalStr& CDetectorSpecification::overField() const { return m_FunctionFields[constants::OVER_INDEX]; } -const CDetectorSpecification::TOptionalStr &CDetectorSpecification::partitionField() const -{ +const CDetectorSpecification::TOptionalStr& CDetectorSpecification::partitionField() const { return m_FunctionFields[constants::PARTITION_INDEX]; } -const CDetectorSpecification::TStrVec &CDetectorSpecification::influences() const -{ +const CDetectorSpecification::TStrVec& CDetectorSpecification::influences() const { return m_Influencers; } -void CDetectorSpecification::candidateBucketLengths(TTimeVec &result) const -{ - const TTimeVec &candidates = this->params().candidateBucketLengths(); +void CDetectorSpecification::candidateBucketLengths(TTimeVec& result) const { + const TTimeVec& candidates = this->params().candidateBucketLengths(); result.reserve(candidates.size()); - for (std::size_t bid = 0u; bid < candidates.size(); ++bid) - { - if (CPenalty::score(maxPenalty(this->params().penaltyIndicesFor(bid), m_Penalties)) > 0.0) - { + for (std::size_t bid = 0u; bid < candidates.size(); ++bid) { + if (CPenalty::score(maxPenalty(this->params().penaltyIndicesFor(bid), m_Penalties)) > 0.0) { result.push_back(candidates[bid]); } } } -bool CDetectorSpecification::isPopulation() const -{ +bool CDetectorSpecification::isPopulation() const { return static_cast(this->overField()); } -bool CDetectorSpecification::operator<(const CDetectorSpecification &rhs) const -{ -#define LESS(lhs, rhs) \ -if (lhs < rhs) return true; \ -if (rhs < lhs) return false +bool CDetectorSpecification::operator<(const CDetectorSpecification& rhs) const { +#define LESS(lhs, rhs) \ + if (lhs < rhs) \ + return true; \ + if (rhs < lhs) \ + return false LESS(m_Function, rhs.m_Function); LESS(m_Side, rhs.m_Side); LESS(m_IgnoreEmpty, rhs.m_IgnoreEmpty); maths::COrderings::SOptionalLess less; - if (less(m_BucketLength, rhs.m_BucketLength)) - { + if (less(m_BucketLength, rhs.m_BucketLength)) { return true; } - if (less(rhs.m_BucketLength, m_BucketLength)) - { + if (less(rhs.m_BucketLength, m_BucketLength)) { return false; } - if (std::lexicographical_compare(boost::begin( m_FunctionFields), boost::end( m_FunctionFields), - boost::begin(rhs.m_FunctionFields), boost::end(rhs.m_FunctionFields), - less)) - { + if (std::lexicographical_compare(boost::begin(m_FunctionFields), + boost::end(m_FunctionFields), + boost::begin(rhs.m_FunctionFields), + boost::end(rhs.m_FunctionFields), + less)) { return true; } - if (std::lexicographical_compare(boost::begin(rhs.m_FunctionFields), boost::end(rhs.m_FunctionFields), - boost::begin( m_FunctionFields), boost::end( m_FunctionFields), - less)) - { + if (std::lexicographical_compare(boost::begin(rhs.m_FunctionFields), + boost::end(rhs.m_FunctionFields), + boost::begin(m_FunctionFields), + boost::end(m_FunctionFields), + less)) { return false; } return m_Influencers < rhs.m_Influencers; } -bool CDetectorSpecification::operator==(const CDetectorSpecification &rhs) const -{ - return m_Function == rhs.m_Function - && m_Side == rhs.m_Side - && m_IgnoreEmpty == rhs.m_IgnoreEmpty - && m_BucketLength == rhs.m_BucketLength - && std::equal(boost::begin(m_FunctionFields), - boost::end(m_FunctionFields), - boost::begin(rhs.m_FunctionFields)) - && m_Influencers == rhs.m_Influencers; +bool CDetectorSpecification::operator==(const CDetectorSpecification& rhs) const { + return m_Function == rhs.m_Function && m_Side == rhs.m_Side && m_IgnoreEmpty == rhs.m_IgnoreEmpty && + m_BucketLength == rhs.m_BucketLength && + std::equal(boost::begin(m_FunctionFields), boost::end(m_FunctionFields), boost::begin(rhs.m_FunctionFields)) && + m_Influencers == rhs.m_Influencers; } -std::size_t CDetectorSpecification::id() const -{ +std::size_t CDetectorSpecification::id() const { return m_Id; } -void CDetectorSpecification::id(std::size_t id) -{ +void CDetectorSpecification::id(std::size_t id) { m_Id = id; } -const CFieldStatistics *CDetectorSpecification::argumentFieldStatistics() const -{ +const CFieldStatistics* CDetectorSpecification::argumentFieldStatistics() const { return m_FieldStatistics[constants::ARGUMENT_INDEX]; } -const CFieldStatistics *CDetectorSpecification::byFieldStatistics() const -{ +const CFieldStatistics* CDetectorSpecification::byFieldStatistics() const { return m_FieldStatistics[constants::BY_INDEX]; } -const CFieldStatistics *CDetectorSpecification::overFieldStatistics() const -{ +const CFieldStatistics* CDetectorSpecification::overFieldStatistics() const { return m_FieldStatistics[constants::OVER_INDEX]; } -const CFieldStatistics *CDetectorSpecification::partitionFieldStatistics() const -{ +const CFieldStatistics* CDetectorSpecification::partitionFieldStatistics() const { return m_FieldStatistics[constants::PARTITION_INDEX]; } -const CDataCountStatistics *CDetectorSpecification::countStatistics() const -{ +const CDataCountStatistics* CDetectorSpecification::countStatistics() const { return m_CountStatistics; } -std::string CDetectorSpecification::detectorConfig() const -{ - if (!this->params().writeDetectorConfigs()) - { +std::string CDetectorSpecification::detectorConfig() const { + if (!this->params().writeDetectorConfigs()) { return ""; } using TDoubleTimePr = std::pair; - using TMaxAccumulator = maths::CBasicStatistics::COrderStatisticsStack; + using TMaxAccumulator = maths::CBasicStatistics::COrderStatisticsStack; - const TTimeVec &candidates = this->params().candidateBucketLengths(); + const TTimeVec& candidates = this->params().candidateBucketLengths(); TMaxAccumulator best; - for (std::size_t bid = 0u; bid < candidates.size(); ++bid) - { - const TSizeVec &indices = this->params().penaltyIndicesFor(bid); - for (std::size_t i = 0u; i < indices.size(); ++i) - { + for (std::size_t bid = 0u; bid < candidates.size(); ++bid) { + const TSizeVec& indices = this->params().penaltyIndicesFor(bid); + for (std::size_t i = 0u; i < indices.size(); ++i) { best.add(std::make_pair(m_Penalties[indices[i]], candidates[bid])); } } std::ostringstream result; - if (CPenalty::score(best[0].first) > this->params().minimumDetectorScore()) - { - const std::string &newLine = this->params().detectorConfigLineEnding(); - result << "{" << newLine - << " \"analysisConfig\": {" << newLine - << " \"bucketSpan\": " << best[0].second << newLine - << " }," << newLine - << " \"detectors\": [" << newLine - << " {" << newLine - << " \"function\":\"" << config_t::print(m_Function) << "\""; - if (const CDetectorSpecification::TOptionalStr &argument = this->argumentField()) - { + if (CPenalty::score(best[0].first) > this->params().minimumDetectorScore()) { + const std::string& newLine = this->params().detectorConfigLineEnding(); + result << "{" << newLine << " \"analysisConfig\": {" << newLine << " \"bucketSpan\": " << best[0].second << newLine << " }," + << newLine << " \"detectors\": [" << newLine << " {" << newLine << " \"function\":\"" << config_t::print(m_Function) + << "\""; + if (const CDetectorSpecification::TOptionalStr& argument = this->argumentField()) { result << "," << newLine << " \"fieldName\": \"" << *argument << "\""; } - if (const CDetectorSpecification::TOptionalStr &by = this->byField()) - { + if (const CDetectorSpecification::TOptionalStr& by = this->byField()) { result << "," << newLine << " \"byFieldName\": \"" << *by << "\""; } - if (const CDetectorSpecification::TOptionalStr &over = this->overField()) - { + if (const CDetectorSpecification::TOptionalStr& over = this->overField()) { result << "," << newLine << " \"overFieldName\": \"" << *over << "\""; } - if (const CDetectorSpecification::TOptionalStr &partition = this->partitionField()) - { + if (const CDetectorSpecification::TOptionalStr& partition = this->partitionField()) { result << "," << newLine << " \"partitionFieldName\": \"" << *partition << "\""; } result << newLine << " }" << newLine << " ]" << newLine << "}"; @@ -514,93 +418,76 @@ std::string CDetectorSpecification::detectorConfig() const return result.str(); } -std::string CDetectorSpecification::description() const -{ +std::string CDetectorSpecification::description() const { std::ostringstream result; result << fullFunctionName(m_Side, m_IgnoreEmpty, this->isPopulation(), m_Function) << (this->argumentField() ? std::string("(") + *this->argumentField() + ")" : std::string()) << (this->byField() ? std::string(" by '") + *this->byField() + "'" : std::string()) - << (this->overField() ? std::string(" over '") + *this->overField() + "'": std::string()) + << (this->overField() ? std::string(" over '") + *this->overField() + "'" : std::string()) << (this->partitionField() ? std::string(" partition '") + *this->partitionField() + "'" : std::string()); return result.str(); } -const CAutoconfigurerParams &CDetectorSpecification::params() const -{ +const CAutoconfigurerParams& CDetectorSpecification::params() const { return m_Params; } -int CDetectorSpecification::highestFieldIndex() const -{ +int CDetectorSpecification::highestFieldIndex() const { int result = -1; - for (std::size_t i = 0u; i < boost::size(m_FunctionFields); ++i) - { - if (m_FunctionFields[i]) - { + for (std::size_t i = 0u; i < boost::size(m_FunctionFields); ++i) { + if (m_FunctionFields[i]) { result = static_cast(i); } } return result; } -CDetectorSpecification::TSizeVecCPtrAry CDetectorSpecification::penaltyIndicesInUse() const -{ +CDetectorSpecification::TSizeVecCPtrAry CDetectorSpecification::penaltyIndicesInUse() const { static const TSizeVec EMPTY; TSizeVecCPtrAry result; - switch (m_IgnoreEmpty) - { + switch (m_IgnoreEmpty) { case E_True: - result[ignoreEmptyId(true)] = &this->params().penaltyIndicesFor(true); + result[ignoreEmptyId(true)] = &this->params().penaltyIndicesFor(true); result[ignoreEmptyId(false)] = &EMPTY; break; case E_False: - result[ignoreEmptyId(true)] = &EMPTY; + result[ignoreEmptyId(true)] = &EMPTY; result[ignoreEmptyId(false)] = &this->params().penaltyIndicesFor(false); break; case E_Maybe: - result[ignoreEmptyId(true)] = &this->params().penaltyIndicesFor(true); + result[ignoreEmptyId(true)] = &this->params().penaltyIndicesFor(true); result[ignoreEmptyId(false)] = &this->params().penaltyIndicesFor(false); break; } return result; } -void CDetectorSpecification::initializePenalties() -{ +void CDetectorSpecification::initializePenalties() { std::fill_n(m_Penalties.begin(), m_Penalties.size(), 0.0); TSizeVecCPtrAry indicesInUse = this->penaltyIndicesInUse(); - for (std::size_t iid = 0u; iid < TSizeVecCPtrAry::size(); ++iid) - { + for (std::size_t iid = 0u; iid < TSizeVecCPtrAry::size(); ++iid) { fill(*indicesInUse[iid], 1.0, m_Penalties); } std::fill_n(m_PenaltyDescriptions.begin(), m_PenaltyDescriptions.size(), TStrVec()); } -void CDetectorSpecification::refreshIgnoreEmpty() -{ - if (!config_t::hasDoAndDontIgnoreEmptyVersions(m_Function) || this->isPopulation()) - { +void CDetectorSpecification::refreshIgnoreEmpty() { + if (!config_t::hasDoAndDontIgnoreEmptyVersions(m_Function) || this->isPopulation()) { return; } - static const EFuzzyBool STATUS[] = { E_Maybe, E_False, E_True, E_Maybe }; + static const EFuzzyBool STATUS[] = {E_Maybe, E_False, E_True, E_Maybe}; - double ptrue = maxPenalty(this->params().penaltyIndicesFor(true), m_Penalties); + double ptrue = maxPenalty(this->params().penaltyIndicesFor(true), m_Penalties); double pfalse = maxPenalty(this->params().penaltyIndicesFor(false), m_Penalties); - m_IgnoreEmpty = STATUS[ (CPenalty::score(ptrue) > 0.0 ? 2 : 0) - + (CPenalty::score(pfalse) > 0.0 ? 1 : 0)]; + m_IgnoreEmpty = STATUS[(CPenalty::score(ptrue) > 0.0 ? 2 : 0) + (CPenalty::score(pfalse) > 0.0 ? 1 : 0)]; } - CDetectorSpecification::SParamScores::SParamScores(core_t::TTime bucketLength, - const std::string &ignoreEmpty, + const std::string& ignoreEmpty, double score, - const TStrVec &descriptions) : - s_BucketLength(bucketLength), - s_IgnoreEmpty(ignoreEmpty), - s_Score(score), - s_Descriptions(descriptions) -{} - + const TStrVec& descriptions) + : s_BucketLength(bucketLength), s_IgnoreEmpty(ignoreEmpty), s_Score(score), s_Descriptions(descriptions) { +} } } diff --git a/lib/config/CFieldRolePenalty.cc b/lib/config/CFieldRolePenalty.cc index b48f8c97f2..f8dfbfbf5f 100644 --- a/lib/config/CFieldRolePenalty.cc +++ b/lib/config/CFieldRolePenalty.cc @@ -10,157 +10,116 @@ #include #include -#include #include +#include #include -namespace ml -{ -namespace config -{ -namespace -{ +namespace ml { +namespace config { +namespace { const std::string EMPTY; const std::string SPACE(" "); //! Get the penalty description prefix. -std::string prefix(const std::string &description) -{ +std::string prefix(const std::string& description) { return description.empty() ? EMPTY : SPACE; } - } //////// CCantBeNumeric //////// -CCantBeNumeric::CCantBeNumeric(const CAutoconfigurerParams ¶ms) : - CPenalty(params) -{} +CCantBeNumeric::CCantBeNumeric(const CAutoconfigurerParams& params) : CPenalty(params) { +} -CCantBeNumeric *CCantBeNumeric::clone() const -{ +CCantBeNumeric* CCantBeNumeric::clone() const { return new CCantBeNumeric(*this); } -std::string CCantBeNumeric::name() const -{ +std::string CCantBeNumeric::name() const { return "can't be numeric"; } -void CCantBeNumeric::penaltyFromMe(const CFieldStatistics &stats, - double &penalty, - std::string &description) const -{ - if (config_t::isNumeric(stats.type())) - { +void CCantBeNumeric::penaltyFromMe(const CFieldStatistics& stats, double& penalty, std::string& description) const { + if (config_t::isNumeric(stats.type())) { penalty = 0.0; description += prefix(description) + "Can't use numeric"; } } - //////// CCantBeCategorical //////// -CCantBeCategorical::CCantBeCategorical(const CAutoconfigurerParams ¶ms) : - CPenalty(params) -{} +CCantBeCategorical::CCantBeCategorical(const CAutoconfigurerParams& params) : CPenalty(params) { +} -CCantBeCategorical *CCantBeCategorical::clone() const -{ +CCantBeCategorical* CCantBeCategorical::clone() const { return new CCantBeCategorical(*this); } -std::string CCantBeCategorical::name() const -{ +std::string CCantBeCategorical::name() const { return "Can't be categorical"; } -void CCantBeCategorical::penaltyFromMe(const CFieldStatistics &stats, - double &penalty, - std::string &description) const -{ - if (config_t::isCategorical(stats.type())) - { +void CCantBeCategorical::penaltyFromMe(const CFieldStatistics& stats, double& penalty, std::string& description) const { + if (config_t::isCategorical(stats.type())) { penalty = 0.0; description += prefix(description) + "Can't use categorical"; } } - //////// CDontUseUnaryField //////// -CDontUseUnaryField::CDontUseUnaryField(const CAutoconfigurerParams ¶ms) : - CPenalty(params) -{} +CDontUseUnaryField::CDontUseUnaryField(const CAutoconfigurerParams& params) : CPenalty(params) { +} -CDontUseUnaryField *CDontUseUnaryField::clone() const -{ +CDontUseUnaryField* CDontUseUnaryField::clone() const { return new CDontUseUnaryField(*this); } -std::string CDontUseUnaryField::name() const -{ +std::string CDontUseUnaryField::name() const { return "don't use unary field"; } -void CDontUseUnaryField::penaltyFromMe(const CFieldStatistics &stats, - double &penalty, - std::string &description) const -{ - if (const CCategoricalDataSummaryStatistics *summary = stats.categoricalSummary()) - { - if (summary->distinctCount() == 1) - { +void CDontUseUnaryField::penaltyFromMe(const CFieldStatistics& stats, double& penalty, std::string& description) const { + if (const CCategoricalDataSummaryStatistics* summary = stats.categoricalSummary()) { + if (summary->distinctCount() == 1) { penalty = 0.0; description += prefix(description) + "There's no point using a unary field"; } } } - //////// CDistinctCountThreshold //////// -CDistinctCountThresholdPenalty::CDistinctCountThresholdPenalty(const CAutoconfigurerParams ¶ms, +CDistinctCountThresholdPenalty::CDistinctCountThresholdPenalty(const CAutoconfigurerParams& params, std::size_t distinctCountForPenaltyOfOne, - std::size_t distinctCountForPenaltyOfZero) : - CPenalty(params), - m_DistinctCountForPenaltyOfOne(static_cast(distinctCountForPenaltyOfOne)), - m_DistinctCountForPenaltyOfZero(static_cast(distinctCountForPenaltyOfZero)) -{} - -CDistinctCountThresholdPenalty *CDistinctCountThresholdPenalty::clone() const -{ + std::size_t distinctCountForPenaltyOfZero) + : CPenalty(params), + m_DistinctCountForPenaltyOfOne(static_cast(distinctCountForPenaltyOfOne)), + m_DistinctCountForPenaltyOfZero(static_cast(distinctCountForPenaltyOfZero)) { +} + +CDistinctCountThresholdPenalty* CDistinctCountThresholdPenalty::clone() const { return new CDistinctCountThresholdPenalty(*this); } -std::string CDistinctCountThresholdPenalty::name() const -{ - return "distinct count thresholds " - + core::CStringUtils::typeToString(m_DistinctCountForPenaltyOfZero) + " and " - + core::CStringUtils::typeToString(m_DistinctCountForPenaltyOfOne); -} - -void CDistinctCountThresholdPenalty::penaltyFromMe(const CFieldStatistics &stats, - double &penalty, - std::string &description) const -{ - if (const CCategoricalDataSummaryStatistics *summary = stats.categoricalSummary()) - { - double penalty_ = CTools::interpolate(m_DistinctCountForPenaltyOfZero, - m_DistinctCountForPenaltyOfOne, - 0.0, 1.0, static_cast(summary->distinctCount())); - if (penalty_ < 1.0) - { +std::string CDistinctCountThresholdPenalty::name() const { + return "distinct count thresholds " + core::CStringUtils::typeToString(m_DistinctCountForPenaltyOfZero) + " and " + + core::CStringUtils::typeToString(m_DistinctCountForPenaltyOfOne); +} + +void CDistinctCountThresholdPenalty::penaltyFromMe(const CFieldStatistics& stats, double& penalty, std::string& description) const { + if (const CCategoricalDataSummaryStatistics* summary = stats.categoricalSummary()) { + double penalty_ = CTools::interpolate( + m_DistinctCountForPenaltyOfZero, m_DistinctCountForPenaltyOfOne, 0.0, 1.0, static_cast(summary->distinctCount())); + if (penalty_ < 1.0) { penalty *= penalty_; - description += prefix(description) - + "A distinct count of " + core::CStringUtils::typeToString(summary->distinctCount()) - + " is" + (penalty_ == 0.0 ? " too " : " ") - + (m_DistinctCountForPenaltyOfZero > m_DistinctCountForPenaltyOfOne ? "high" : "low"); + description += prefix(description) + "A distinct count of " + core::CStringUtils::typeToString(summary->distinctCount()) + + " is" + (penalty_ == 0.0 ? " too " : " ") + + (m_DistinctCountForPenaltyOfZero > m_DistinctCountForPenaltyOfOne ? "high" : "low"); } } } - } } diff --git a/lib/config/CFieldStatistics.cc b/lib/config/CFieldStatistics.cc index 3afb1a518e..873fe4bbbf 100644 --- a/lib/config/CFieldStatistics.cc +++ b/lib/config/CFieldStatistics.cc @@ -11,76 +11,51 @@ #include #include -namespace ml -{ -namespace config -{ -namespace -{ +namespace ml { +namespace config { +namespace { //! \brief Adds an example to the summary statistics. -class CAddToStatistics : public boost::static_visitor -{ - public: - CAddToStatistics(core_t::TTime time, const std::string &example) : - m_Time(time), m_Example(&example) - {} - - void operator()(CDataSummaryStatistics &summary) const - { - summary.add(m_Time); - } +class CAddToStatistics : public boost::static_visitor { +public: + CAddToStatistics(core_t::TTime time, const std::string& example) : m_Time(time), m_Example(&example) {} - void operator()(CCategoricalDataSummaryStatistics &summary) const - { - summary.add(m_Time, *m_Example); - } + void operator()(CDataSummaryStatistics& summary) const { summary.add(m_Time); } - void operator()(CNumericDataSummaryStatistics &summary) const - { - summary.add(m_Time, *m_Example); - } + void operator()(CCategoricalDataSummaryStatistics& summary) const { summary.add(m_Time, *m_Example); } - private: - core_t::TTime m_Time; - const std::string *m_Example; -}; + void operator()(CNumericDataSummaryStatistics& summary) const { summary.add(m_Time, *m_Example); } +private: + core_t::TTime m_Time; + const std::string* m_Example; +}; } -CFieldStatistics::CFieldStatistics(const std::string &fieldName, const CAutoconfigurerParams ¶ms) : - m_Params(params), - m_FieldName(fieldName), - m_NumberExamples(0), - m_Semantics(params.dataType(fieldName)), - m_SummaryStatistics(CDataSummaryStatistics()) -{ +CFieldStatistics::CFieldStatistics(const std::string& fieldName, const CAutoconfigurerParams& params) + : m_Params(params), + m_FieldName(fieldName), + m_NumberExamples(0), + m_Semantics(params.dataType(fieldName)), + m_SummaryStatistics(CDataSummaryStatistics()) { } -const std::string &CFieldStatistics::name() const -{ +const std::string& CFieldStatistics::name() const { return m_FieldName; } -void CFieldStatistics::maybeStartCapturingTypeStatistics() -{ - if (m_NumberExamples > this->params().minimumExamplesToClassify()) - { - if (const CDataSummaryStatistics *summary = this->summary()) - { +void CFieldStatistics::maybeStartCapturingTypeStatistics() { + if (m_NumberExamples > this->params().minimumExamplesToClassify()) { + if (const CDataSummaryStatistics* summary = this->summary()) { m_Semantics.computeType(); config_t::EDataType type = m_Semantics.type(); LOG_DEBUG("Classified '" << m_FieldName << "' as " << config_t::print(type)); - if (config_t::isCategorical(type)) - { - m_SummaryStatistics = CCategoricalDataSummaryStatistics( - *summary, this->params().numberOfMostFrequentFieldsCounts()); + if (config_t::isCategorical(type)) { + m_SummaryStatistics = CCategoricalDataSummaryStatistics(*summary, this->params().numberOfMostFrequentFieldsCounts()); this->replayBuffer(); - } - else if (config_t::isNumeric(type)) - { + } else if (config_t::isNumeric(type)) { m_SummaryStatistics = CNumericDataSummaryStatistics(*summary, config_t::isInteger(type)); this->replayBuffer(); } @@ -88,11 +63,9 @@ void CFieldStatistics::maybeStartCapturingTypeStatistics() } } -void CFieldStatistics::add(core_t::TTime time, const std::string &example) -{ +void CFieldStatistics::add(core_t::TTime time, const std::string& example) { ++m_NumberExamples; - if (m_NumberExamples < this->params().minimumExamplesToClassify()) - { + if (m_NumberExamples < this->params().minimumExamplesToClassify()) { m_Buffer.push_back(std::make_pair(time, example)); } m_Semantics.add(example); @@ -100,47 +73,38 @@ void CFieldStatistics::add(core_t::TTime time, const std::string &example) this->maybeStartCapturingTypeStatistics(); } -config_t::EDataType CFieldStatistics::type() const -{ +config_t::EDataType CFieldStatistics::type() const { return m_Semantics.type(); } -const CDataSummaryStatistics *CFieldStatistics::summary() const -{ +const CDataSummaryStatistics* CFieldStatistics::summary() const { return boost::get(&m_SummaryStatistics); } -const CCategoricalDataSummaryStatistics *CFieldStatistics::categoricalSummary() const -{ +const CCategoricalDataSummaryStatistics* CFieldStatistics::categoricalSummary() const { return boost::get(&m_SummaryStatistics); } -const CNumericDataSummaryStatistics *CFieldStatistics::numericSummary() const -{ +const CNumericDataSummaryStatistics* CFieldStatistics::numericSummary() const { return boost::get(&m_SummaryStatistics); } -double CFieldStatistics::score(const CPenalty &penalty) const -{ +double CFieldStatistics::score(const CPenalty& penalty) const { double penality_ = 1.0; penalty.penalty(*this, penality_); return CPenalty::score(penality_); } -const CAutoconfigurerParams &CFieldStatistics::params() const -{ +const CAutoconfigurerParams& CFieldStatistics::params() const { return m_Params; } -void CFieldStatistics::replayBuffer() -{ - for (std::size_t i = 0u; i < m_Buffer.size(); ++i) - { +void CFieldStatistics::replayBuffer() { + for (std::size_t i = 0u; i < m_Buffer.size(); ++i) { this->add(m_Buffer[i].first, m_Buffer[i].second); } TTimeStrPrVec empty; m_Buffer.swap(empty); } - } } diff --git a/lib/config/CLongTailPenalty.cc b/lib/config/CLongTailPenalty.cc index c00d958fcf..5da6cd324c 100644 --- a/lib/config/CLongTailPenalty.cc +++ b/lib/config/CLongTailPenalty.cc @@ -6,8 +6,8 @@ #include -#include #include +#include #include #include @@ -21,62 +21,45 @@ #include -namespace ml -{ -namespace config -{ -namespace -{ +namespace ml { +namespace config { +namespace { //! Return \p count. -uint64_t count(const CBucketCountStatistics::TMoments &count) -{ +uint64_t count(const CBucketCountStatistics::TMoments& count) { return static_cast(maths::CBasicStatistics::count(count)); } //! Extract the distinct count. -uint64_t count(const maths::CBjkstUniqueValues &distinct) -{ +uint64_t count(const maths::CBjkstUniqueValues& distinct) { return distinct.number(); } - } -CLongTailPenalty::CLongTailPenalty(const CAutoconfigurerParams ¶ms) : - CPenalty(params) -{ +CLongTailPenalty::CLongTailPenalty(const CAutoconfigurerParams& params) : CPenalty(params) { } -CLongTailPenalty *CLongTailPenalty::clone() const -{ +CLongTailPenalty* CLongTailPenalty::clone() const { return new CLongTailPenalty(*this); } -std::string CLongTailPenalty::name() const -{ +std::string CLongTailPenalty::name() const { return "long tail"; } -void CLongTailPenalty::penaltyFromMe(CDetectorSpecification &spec) const -{ - if (config_t::isRare(spec.function())) - { - if (const CByAndPartitionDataCountStatistics *byAndPartitionStats = - dynamic_cast(spec.countStatistics())) - { +void CLongTailPenalty::penaltyFromMe(CDetectorSpecification& spec) const { + if (config_t::isRare(spec.function())) { + if (const CByAndPartitionDataCountStatistics* byAndPartitionStats = + dynamic_cast(spec.countStatistics())) { this->penaltyFor(*byAndPartitionStats, spec); - } - else if (const CByOverAndPartitionDataCountStatistics *byOverAndPartitionStats = - dynamic_cast(spec.countStatistics())) - { + } else if (const CByOverAndPartitionDataCountStatistics* byOverAndPartitionStats = + dynamic_cast(spec.countStatistics())) { this->penaltyFor(*byOverAndPartitionStats, spec); } } - } +} -void CLongTailPenalty::penaltyFor(const CByAndPartitionDataCountStatistics &stats, - CDetectorSpecification &spec) const -{ +void CLongTailPenalty::penaltyFor(const CByAndPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const { std::size_t n = stats.bucketStatistics().size(); TSizeVec indices; @@ -86,21 +69,18 @@ void CLongTailPenalty::penaltyFor(const CByAndPartitionDataCountStatistics &stat penalties.reserve(2 * n); descriptions.reserve(2 * n); - for (std::size_t bid = 0u; bid < n; ++bid) - { + for (std::size_t bid = 0u; bid < n; ++bid) { // Penalize the case that many by fields values appear in close // to the minimum number of buckets. TSizeUInt64UMap totals; TSizeUInt64UMap tail; - this->extractTailCounts( - stats.bucketStatistics()[bid].countMomentsPerPartition(), totals, tail); - const TSizeVec &indices_ = this->params().penaltyIndicesFor(bid); + this->extractTailCounts(stats.bucketStatistics()[bid].countMomentsPerPartition(), totals, tail); + const TSizeVec& indices_ = this->params().penaltyIndicesFor(bid); indices.insert(indices.end(), indices_.begin(), indices_.end()); double penalty = this->penaltyFor(tail, totals); - std::string description = penalty < 1.0 ? - std::string("A significant proportion of categories have similar frequency at '") - + CTools::prettyPrint(this->params().candidateBucketLengths()[bid]) - + "' resolution" : std::string(); + std::string description = penalty < 1.0 ? std::string("A significant proportion of categories have similar frequency at '") + + CTools::prettyPrint(this->params().candidateBucketLengths()[bid]) + "' resolution" + : std::string(); std::fill_n(std::back_inserter(penalties), indices_.size(), penalty); std::fill_n(std::back_inserter(descriptions), indices_.size(), description); } @@ -108,69 +88,57 @@ void CLongTailPenalty::penaltyFor(const CByAndPartitionDataCountStatistics &stat spec.applyPenalties(indices, penalties, descriptions); } -void CLongTailPenalty::penaltyFor(const CByOverAndPartitionDataCountStatistics &stats, - CDetectorSpecification &spec) const -{ +void CLongTailPenalty::penaltyFor(const CByOverAndPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const { // Penalize the case that many by fields values have close to the // minimum number of over field values. TSizeUInt64UMap totals; TSizeUInt64UMap tail; - this->extractTailCounts( - stats.sampledByAndPartitionDistinctOverCounts(), totals, tail); + this->extractTailCounts(stats.sampledByAndPartitionDistinctOverCounts(), totals, tail); double penalty = this->penaltyFor(tail, totals); spec.applyPenalty(penalty, penalty < 1.0 ? "A significant proportion of categories have a similar frequency in the population" : ""); } template -void CLongTailPenalty::extractTailCounts(const MAP &counts, - TSizeUInt64UMap &totals, - TSizeUInt64UMap &tail) const -{ +void CLongTailPenalty::extractTailCounts(const MAP& counts, TSizeUInt64UMap& totals, TSizeUInt64UMap& tail) const { using TMinAccumulator = maths::CBasicStatistics::COrderStatisticsStack; using TSizeMinAccumulatorUMap = boost::unordered_map; using TItr = typename MAP::const_iterator; TSizeMinAccumulatorUMap mins; - for (TItr i = counts.begin(); i != counts.end(); ++i) - { + for (TItr i = counts.begin(); i != counts.end(); ++i) { uint64_t n = count(i->second); std::size_t partition = STATS::partition(*i); mins[partition].add(n); totals[partition] += n; } - for (TItr i = counts.begin(); i != counts.end(); ++i) - { + for (TItr i = counts.begin(); i != counts.end(); ++i) { uint64_t n = count(i->second); std::size_t partition = STATS::partition(*i); - const TMinAccumulator &min = mins[partition]; - if ( n <= static_cast( this->params().highCardinalityInTailFactor() - * static_cast(min[0]) + 0.5) - || n <= this->params().highCardinalityInTailIncrement() + min[0]) - { + const TMinAccumulator& min = mins[partition]; + if (n <= static_cast(this->params().highCardinalityInTailFactor() * static_cast(min[0]) + 0.5) || + n <= this->params().highCardinalityInTailIncrement() + min[0]) { tail[partition] += n; } } } -double CLongTailPenalty::penaltyFor(TSizeUInt64UMap &tail, TSizeUInt64UMap &totals) const -{ +double CLongTailPenalty::penaltyFor(TSizeUInt64UMap& tail, TSizeUInt64UMap& totals) const { using TSizeUInt64UMapCItr = TSizeUInt64UMap::const_iterator; using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; TMeanAccumulator result; - for (TSizeUInt64UMapCItr i = tail.begin(); i != tail.end(); ++i) - { - double rare = static_cast(i->second); - double total = static_cast(totals[i->first]); + for (TSizeUInt64UMapCItr i = tail.begin(); i != tail.end(); ++i) { + double rare = static_cast(i->second); + double total = static_cast(totals[i->first]); double penalty = CTools::logInterpolate(this->params().highCardinalityHighTailFraction(), - this->params().highCardinalityMaximumTailFraction(), - 1.0, std::min(10.0 / total, 1.0), rare / total); + this->params().highCardinalityMaximumTailFraction(), + 1.0, + std::min(10.0 / total, 1.0), + rare / total); result.add(std::sqrt(-std::min(maths::CTools::fastLog(penalty), 0.0)), total); } return std::exp(-std::pow(maths::CBasicStatistics::mean(result), 2.0)); } - } } - diff --git a/lib/config/CLowInformationContentPenalty.cc b/lib/config/CLowInformationContentPenalty.cc index 62e301181c..6c82e9d719 100644 --- a/lib/config/CLowInformationContentPenalty.cc +++ b/lib/config/CLowInformationContentPenalty.cc @@ -14,66 +14,62 @@ #include -namespace ml -{ -namespace config -{ -namespace -{ +namespace ml { +namespace config { +namespace { const double LOG_MIN = 0.5 * std::log(0.9 * constants::DETECTOR_SCORE_EPSILON / constants::MAXIMUM_DETECTOR_SCORE); } -CLowInformationContentPenalty::CLowInformationContentPenalty(const CAutoconfigurerParams ¶ms) : - CPenalty(params) -{} +CLowInformationContentPenalty::CLowInformationContentPenalty(const CAutoconfigurerParams& params) : CPenalty(params) { +} -CLowInformationContentPenalty *CLowInformationContentPenalty::clone() const -{ +CLowInformationContentPenalty* CLowInformationContentPenalty::clone() const { return new CLowInformationContentPenalty(*this); } -std::string CLowInformationContentPenalty::name() const -{ +std::string CLowInformationContentPenalty::name() const { return "low information content"; } -void CLowInformationContentPenalty::penaltyFromMe(CDetectorSpecification &spec) const -{ - if (config_t::isInfoContent(spec.function())) - { - if (const CFieldStatistics *stats = spec.argumentFieldStatistics()) - { - if (const CCategoricalDataSummaryStatistics *summary = stats->categoricalSummary()) - { +void CLowInformationContentPenalty::penaltyFromMe(CDetectorSpecification& spec) const { + if (config_t::isInfoContent(spec.function())) { + if (const CFieldStatistics* stats = spec.argumentFieldStatistics()) { + if (const CCategoricalDataSummaryStatistics* summary = stats->categoricalSummary()) { // Note that the empirical entropy is maximized when double minimumLength = static_cast(summary->minimumLength()); double maximumLength = static_cast(summary->maximumLength()); - double cardinality = static_cast(summary->distinctCount()); - double entropy = summary->entropy(); - double penalty = cardinality == 1.0 ? - 0.0 : std::exp(CTools::interpolate(this->params().lowLengthRangeForInfoContent(), - this->params().minimumLengthRangeForInfoContent(), - 0.0, LOG_MIN, maximumLength - minimumLength)) - * std::exp(CTools::interpolate(this->params().lowMaximumLengthForInfoContent(), - this->params().minimumMaximumLengthForInfoContent(), - 0.0, LOG_MIN, maximumLength)) - * std::exp(CTools::logInterpolate(this->params().lowEntropyForInfoContent(), - this->params().minimumEntropyForInfoContent(), - 0.0, LOG_MIN, entropy / std::log(cardinality))) - * std::exp(CTools::logInterpolate(this->params().lowDistinctCountForInfoContent(), - this->params().minimumDistinctCountForInfoContent(), - LOG_MIN, 0.0, cardinality)); + double cardinality = static_cast(summary->distinctCount()); + double entropy = summary->entropy(); + double penalty = cardinality == 1.0 + ? 0.0 + : std::exp(CTools::interpolate(this->params().lowLengthRangeForInfoContent(), + this->params().minimumLengthRangeForInfoContent(), + 0.0, + LOG_MIN, + maximumLength - minimumLength)) * + std::exp(CTools::interpolate(this->params().lowMaximumLengthForInfoContent(), + this->params().minimumMaximumLengthForInfoContent(), + 0.0, + LOG_MIN, + maximumLength)) * + std::exp(CTools::logInterpolate(this->params().lowEntropyForInfoContent(), + this->params().minimumEntropyForInfoContent(), + 0.0, + LOG_MIN, + entropy / std::log(cardinality))) * + std::exp(CTools::logInterpolate(this->params().lowDistinctCountForInfoContent(), + this->params().minimumDistinctCountForInfoContent(), + LOG_MIN, + 0.0, + cardinality)); std::string description; - if (penalty < 1.0) - { - description = "There is weak evidence that '" - + *spec.argumentField() + "' carries information"; + if (penalty < 1.0) { + description = "There is weak evidence that '" + *spec.argumentField() + "' carries information"; } spec.applyPenalty(penalty, description); } } } } - } } diff --git a/lib/config/CLowVariationPenalty.cc b/lib/config/CLowVariationPenalty.cc index f2bb517dae..a078758a8c 100644 --- a/lib/config/CLowVariationPenalty.cc +++ b/lib/config/CLowVariationPenalty.cc @@ -18,12 +18,9 @@ #include #include -namespace ml -{ -namespace config -{ -namespace -{ +namespace ml { +namespace config { +namespace { using TDoubleVec = std::vector; using TSizeVec = std::vector; @@ -37,30 +34,22 @@ const double INF = boost::numeric::bounds::highest(); //! Compute the coefficient of variation from \p moments. template -double cov(const MOMENTS &moments) -{ - double m = std::fabs(maths::CBasicStatistics::mean(moments)); - double sd = std::sqrt(maths::CBasicStatistics::maximumLikelihoodVariance(moments)); +double cov(const MOMENTS& moments) { + double m = std::fabs(maths::CBasicStatistics::mean(moments)); + double sd = std::sqrt(maths::CBasicStatistics::maximumLikelihoodVariance(moments)); return sd == 0.0 ? 0.0 : (m == 0.0 ? INF : sd / m); } //! Compute the penalty for the partition moments \p moments. template -void penaltyImpl(const CAutoconfigurerParams ¶ms, - const MOMENTS &moments, - double &penalty, - double &proportionWithLowVariation) -{ +void penaltyImpl(const CAutoconfigurerParams& params, const MOMENTS& moments, double& penalty, double& proportionWithLowVariation) { TMeanAccumulator penalty_; proportionWithLowVariation = 0.0; - for (typename MOMENTS::const_iterator i = moments.begin(); i != moments.end(); ++i) - { - double pi = CTools::logInterpolate(params.lowCoefficientOfVariation(), - params.minimumCoefficientOfVariation(), - 1.0, MIN, cov(i->second)); + for (typename MOMENTS::const_iterator i = moments.begin(); i != moments.end(); ++i) { + double pi = + CTools::logInterpolate(params.lowCoefficientOfVariation(), params.minimumCoefficientOfVariation(), 1.0, MIN, cov(i->second)); penalty_.add(maths::CTools::fastLog(pi), maths::CBasicStatistics::count(i->second)); - if (pi < 1.0) - { + if (pi < 1.0) { proportionWithLowVariation += 1.0; } } @@ -69,24 +58,16 @@ void penaltyImpl(const CAutoconfigurerParams ¶ms, } //! Compute the distinct count penalty for the partition moments \p moments. -struct SDistinctCountPenalty -{ +struct SDistinctCountPenalty { template - void operator()(const CAutoconfigurerParams ¶ms, - const MOMENTS &moments, - double &penalty, - double &proportionWithLowVariation) const - { + void + operator()(const CAutoconfigurerParams& params, const MOMENTS& moments, double& penalty, double& proportionWithLowVariation) const { TMeanAccumulator penalty_; - for (typename MOMENTS::const_iterator i = moments.begin(); i != moments.end(); ++i) - { - double pi = CTools::logInterpolate(params.lowCoefficientOfVariation(), - params.minimumCoefficientOfVariation(), - 1.0, MIN, cov(i->second.s_DistinctCount)); - penalty_.add(maths::CTools::fastLog(pi), - maths::CBasicStatistics::count(i->second.s_DistinctCount)); - if (pi < 1.0) - { + for (typename MOMENTS::const_iterator i = moments.begin(); i != moments.end(); ++i) { + double pi = CTools::logInterpolate( + params.lowCoefficientOfVariation(), params.minimumCoefficientOfVariation(), 1.0, MIN, cov(i->second.s_DistinctCount)); + penalty_.add(maths::CTools::fastLog(pi), maths::CBasicStatistics::count(i->second.s_DistinctCount)); + if (pi < 1.0) { proportionWithLowVariation += 1.0; } } @@ -96,24 +77,16 @@ struct SDistinctCountPenalty }; //! Compute the info content penalty for the partition moments \p moments. -struct SInfoContentPenalty -{ +struct SInfoContentPenalty { template - void operator()(const CAutoconfigurerParams ¶ms, - const MOMENTS &moments, - double &penalty, - double &proportionWithLowVariation) const - { + void + operator()(const CAutoconfigurerParams& params, const MOMENTS& moments, double& penalty, double& proportionWithLowVariation) const { TMeanAccumulator penalty_; - for (typename MOMENTS::const_iterator i = moments.begin(); i != moments.end(); ++i) - { - double pi = CTools::logInterpolate(params.lowCoefficientOfVariation(), - params.minimumCoefficientOfVariation(), - 1.0, MIN, cov(i->second.s_InfoContent)); - penalty_.add(maths::CTools::fastLog(pi), - maths::CBasicStatistics::count(i->second.s_InfoContent)); - if (pi < 1.0) - { + for (typename MOMENTS::const_iterator i = moments.begin(); i != moments.end(); ++i) { + double pi = CTools::logInterpolate( + params.lowCoefficientOfVariation(), params.minimumCoefficientOfVariation(), 1.0, MIN, cov(i->second.s_InfoContent)); + penalty_.add(maths::CTools::fastLog(pi), maths::CBasicStatistics::count(i->second.s_InfoContent)); + if (pi < 1.0) { proportionWithLowVariation += 1.0; } } @@ -123,36 +96,23 @@ struct SInfoContentPenalty }; //! Get the description prefix. -std::string descriptionPrefix(const CDetectorSpecification &spec, - double proportionWithLowVariation) -{ - if (spec.byField() && spec.partitionField()) - { - return "A significant proportion, " - + CTools::prettyPrint(100.0 * proportionWithLowVariation) - + "%, of distinct partition and by fields combinations"; +std::string descriptionPrefix(const CDetectorSpecification& spec, double proportionWithLowVariation) { + if (spec.byField() && spec.partitionField()) { + return "A significant proportion, " + CTools::prettyPrint(100.0 * proportionWithLowVariation) + + "%, of distinct partition and by fields combinations"; } - if (spec.byField()) - { - return "A significant proportion, " - + CTools::prettyPrint(100.0 * proportionWithLowVariation) - + "%, of distinct by fields"; + if (spec.byField()) { + return "A significant proportion, " + CTools::prettyPrint(100.0 * proportionWithLowVariation) + "%, of distinct by fields"; } - if (spec.partitionField()) - { - return "A significant proportion, " - + CTools::prettyPrint(100.0 * proportionWithLowVariation) - + "%, of distinct partition fields"; + if (spec.partitionField()) { + return "A significant proportion, " + CTools::prettyPrint(100.0 * proportionWithLowVariation) + "%, of distinct partition fields"; } return ""; } //! Apply the penalties for count analysis from \p stats. template -void penaltyForCountImpl(const CAutoconfigurerParams ¶ms, - const STATS &stats, - CDetectorSpecification &spec) -{ +void penaltyForCountImpl(const CAutoconfigurerParams& params, const STATS& stats, CDetectorSpecification& spec) { std::size_t n = stats.bucketStatistics().size(); TSizeVec indices; @@ -162,27 +122,19 @@ void penaltyForCountImpl(const CAutoconfigurerParams ¶ms, penalties.reserve(2 * n); descriptions.reserve(2 * n); - for (std::size_t bid = 0u; bid < n; ++bid) - { - const TSizeVec &indices_ = params.penaltyIndicesFor(bid); + for (std::size_t bid = 0u; bid < n; ++bid) { + const TSizeVec& indices_ = params.penaltyIndicesFor(bid); double penalty; double proportionWithLowVariation; - penaltyImpl(params, stats.bucketStatistics()[bid].countMomentsPerPartition(), - penalty, proportionWithLowVariation); + penaltyImpl(params, stats.bucketStatistics()[bid].countMomentsPerPartition(), penalty, proportionWithLowVariation); indices.insert(indices.end(), indices_.begin(), indices_.end()); std::string description; - if (penalty < 1.0) - { - if (spec.byField() || spec.partitionField()) - { - description = descriptionPrefix(spec, proportionWithLowVariation) - + " have "+ (penalty == MIN ? "too " : "") + "low" - + " variation in their bucket counts"; - } - else - { - description = std::string("The variation in the bucket counts is ") - + (penalty == MIN ? "too " : "") + "low"; + if (penalty < 1.0) { + if (spec.byField() || spec.partitionField()) { + description = descriptionPrefix(spec, proportionWithLowVariation) + " have " + (penalty == MIN ? "too " : "") + "low" + + " variation in their bucket counts"; + } else { + description = std::string("The variation in the bucket counts is ") + (penalty == MIN ? "too " : "") + "low"; } } std::fill_n(std::back_inserter(penalties), indices_.size(), penalty); @@ -194,12 +146,11 @@ void penaltyForCountImpl(const CAutoconfigurerParams ¶ms, //! Apply the penalties for distinct count analysis from \p stats. template -void penaltyForImpl(const CAutoconfigurerParams ¶ms, - const STATS &stats, +void penaltyForImpl(const CAutoconfigurerParams& params, + const STATS& stats, PENALTY computePenalty, - const std::string &function, - CDetectorSpecification &spec) -{ + const std::string& function, + CDetectorSpecification& spec) { std::size_t n = stats.bucketStatistics().size(); TSizeVec indices; @@ -209,28 +160,20 @@ void penaltyForImpl(const CAutoconfigurerParams ¶ms, penalties.reserve(2 * n); descriptions.reserve(2 * n); - for (std::size_t bid = 0u; bid < n; ++bid) - { - const TSizeVec &indices_ = params.penaltyIndicesFor(bid); + for (std::size_t bid = 0u; bid < n; ++bid) { + const TSizeVec& indices_ = params.penaltyIndicesFor(bid); indices.insert(indices.end(), indices_.begin(), indices_.end()); - const std::string &argument = *spec.argumentField(); + const std::string& argument = *spec.argumentField(); double penalty = 0.0; double proportionWithLowVariation = 0.0; - computePenalty(params, stats.bucketStatistics()[bid].argumentMomentsPerPartition(argument), - penalty, proportionWithLowVariation); + computePenalty(params, stats.bucketStatistics()[bid].argumentMomentsPerPartition(argument), penalty, proportionWithLowVariation); std::string description; - if (penalty < 1.0) - { - if (spec.byField() || spec.partitionField()) - { - description = descriptionPrefix(spec, proportionWithLowVariation) - + " have " + (penalty == MIN ? "too " : "") + "low" - + " variation in their bucket " + function; - } - else - { - description = std::string("The variation in the bucket ") + function + " is " - + (penalty == MIN ? "too " : "") + "low"; + if (penalty < 1.0) { + if (spec.byField() || spec.partitionField()) { + description = descriptionPrefix(spec, proportionWithLowVariation) + " have " + (penalty == MIN ? "too " : "") + "low" + + " variation in their bucket " + function; + } else { + description = std::string("The variation in the bucket ") + function + " is " + (penalty == MIN ? "too " : "") + "low"; } } std::fill_n(std::back_inserter(penalties), indices_.size(), penalty); @@ -239,113 +182,87 @@ void penaltyForImpl(const CAutoconfigurerParams ¶ms, spec.applyPenalties(indices, penalties, descriptions); } - } -CLowVariationPenalty::CLowVariationPenalty(const CAutoconfigurerParams ¶ms) : CPenalty(params) -{} +CLowVariationPenalty::CLowVariationPenalty(const CAutoconfigurerParams& params) : CPenalty(params) { +} -CLowVariationPenalty *CLowVariationPenalty::clone() const -{ +CLowVariationPenalty* CLowVariationPenalty::clone() const { return new CLowVariationPenalty(*this); } -std::string CLowVariationPenalty::name() const -{ +std::string CLowVariationPenalty::name() const { return "low variation"; } -void CLowVariationPenalty::penaltyFromMe(CDetectorSpecification &spec) const -{ -#define APPLY_COUNTING_PENALTY(penalty) \ - if (const CDataCountStatistics *stats_ = spec.countStatistics()) \ - { \ - if (const CPartitionDataCountStatistics *partitionStats = \ - dynamic_cast(stats_)) \ - { \ - this->penalty(*partitionStats, spec); \ - } \ - else if (const CByAndPartitionDataCountStatistics *byAndPartitionStats = \ - dynamic_cast(stats_)) \ - { \ - this->penalty(*byAndPartitionStats, spec); \ - } \ - else if (const CByOverAndPartitionDataCountStatistics *byOverAndPartitionStats = \ - dynamic_cast(stats_)) \ - { \ - this->penalty(*byOverAndPartitionStats, spec); \ - } \ - } +void CLowVariationPenalty::penaltyFromMe(CDetectorSpecification& spec) const { +#define APPLY_COUNTING_PENALTY(penalty) \ + if (const CDataCountStatistics* stats_ = spec.countStatistics()) { \ + if (const CPartitionDataCountStatistics* partitionStats = dynamic_cast(stats_)) { \ + this->penalty(*partitionStats, spec); \ + } else if (const CByAndPartitionDataCountStatistics* byAndPartitionStats = \ + dynamic_cast(stats_)) { \ + this->penalty(*byAndPartitionStats, spec); \ + } else if (const CByOverAndPartitionDataCountStatistics* byOverAndPartitionStats = \ + dynamic_cast(stats_)) { \ + this->penalty(*byOverAndPartitionStats, spec); \ + } \ + } - switch (spec.function()) - { - case config_t::E_Count: APPLY_COUNTING_PENALTY(penaltiesForCount) break; - case config_t::E_Rare: break; - case config_t::E_DistinctCount: APPLY_COUNTING_PENALTY(penaltyForDistinctCount) break; - case config_t::E_InfoContent: APPLY_COUNTING_PENALTY(penaltyForInfoContent) break; + switch (spec.function()) { + case config_t::E_Count: + APPLY_COUNTING_PENALTY(penaltiesForCount) break; + case config_t::E_Rare: + break; + case config_t::E_DistinctCount: + APPLY_COUNTING_PENALTY(penaltyForDistinctCount) break; + case config_t::E_InfoContent: + APPLY_COUNTING_PENALTY(penaltyForInfoContent) break; case config_t::E_Mean: case config_t::E_Min: case config_t::E_Max: case config_t::E_Sum: case config_t::E_Varp: - case config_t::E_Median: break; + case config_t::E_Median: + break; } } -void CLowVariationPenalty::penaltiesForCount(const CPartitionDataCountStatistics &stats, - CDetectorSpecification &spec) const -{ +void CLowVariationPenalty::penaltiesForCount(const CPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const { penaltyForCountImpl(this->params(), stats, spec); } -void CLowVariationPenalty::penaltiesForCount(const CByAndPartitionDataCountStatistics &stats, - CDetectorSpecification &spec) const -{ +void CLowVariationPenalty::penaltiesForCount(const CByAndPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const { penaltyForCountImpl(this->params(), stats, spec); } -void CLowVariationPenalty::penaltiesForCount(const CByOverAndPartitionDataCountStatistics &stats, - CDetectorSpecification &spec) const -{ +void CLowVariationPenalty::penaltiesForCount(const CByOverAndPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const { penaltyForCountImpl(this->params(), stats, spec); } -void CLowVariationPenalty::penaltyForDistinctCount(const CPartitionDataCountStatistics &stats, - CDetectorSpecification &spec) const -{ +void CLowVariationPenalty::penaltyForDistinctCount(const CPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const { penaltyForImpl(this->params(), stats, SDistinctCountPenalty(), "distinct counts", spec); } -void CLowVariationPenalty::penaltyForDistinctCount(const CByAndPartitionDataCountStatistics &stats, - CDetectorSpecification &spec) const -{ +void CLowVariationPenalty::penaltyForDistinctCount(const CByAndPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const { penaltyForImpl(this->params(), stats, SDistinctCountPenalty(), "distinct counts", spec); } -void CLowVariationPenalty::penaltyForDistinctCount(const CByOverAndPartitionDataCountStatistics &stats, - CDetectorSpecification &spec) const -{ +void CLowVariationPenalty::penaltyForDistinctCount(const CByOverAndPartitionDataCountStatistics& stats, + CDetectorSpecification& spec) const { penaltyForImpl(this->params(), stats, SDistinctCountPenalty(), "distinct counts", spec); } -void CLowVariationPenalty::penaltyForInfoContent(const CPartitionDataCountStatistics &stats, - CDetectorSpecification &spec) const -{ +void CLowVariationPenalty::penaltyForInfoContent(const CPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const { penaltyForImpl(this->params(), stats, SInfoContentPenalty(), "info content", spec); } -void CLowVariationPenalty::penaltyForInfoContent(const CByAndPartitionDataCountStatistics &stats, - CDetectorSpecification &spec) const -{ +void CLowVariationPenalty::penaltyForInfoContent(const CByAndPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const { penaltyForImpl(this->params(), stats, SInfoContentPenalty(), "info content", spec); } -void CLowVariationPenalty::penaltyForInfoContent(const CByOverAndPartitionDataCountStatistics &stats, - CDetectorSpecification &spec) const -{ +void CLowVariationPenalty::penaltyForInfoContent(const CByOverAndPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const { penaltyForImpl(this->params(), stats, SInfoContentPenalty(), "info content", spec); } - } } - diff --git a/lib/config/CNotEnoughDataPenalty.cc b/lib/config/CNotEnoughDataPenalty.cc index 4fd5abe17e..169d5cfa36 100644 --- a/lib/config/CNotEnoughDataPenalty.cc +++ b/lib/config/CNotEnoughDataPenalty.cc @@ -21,107 +21,76 @@ #include #include -namespace ml -{ -namespace config -{ -namespace -{ +namespace ml { +namespace config { +namespace { using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; //! Get the description prefix. -std::string descriptionPrefix(const CDetectorSpecification &spec, - const TMeanAccumulator &meanOccupied, - std::size_t partitions) -{ - if (spec.byField() && spec.partitionField()) - { - return "A significant proportion, " - + CTools::prettyPrint(100.0 * maths::CBasicStatistics::count(meanOccupied) - / static_cast(partitions)) - + "%, of distinct partition and by fields combinations are sparse."; - } - else if (spec.byField()) - { - return "A significant proportion, " - + CTools::prettyPrint(100.0 * maths::CBasicStatistics::count(meanOccupied) - / static_cast(partitions)) - + "%, of distinct by fields are sparse."; - } - else if (spec.partitionField()) - { - return "A significant proportion, " - + CTools::prettyPrint(100.0 * maths::CBasicStatistics::count(meanOccupied) - / static_cast(partitions)) - + "%, of distinct partition fields are sparse."; +std::string descriptionPrefix(const CDetectorSpecification& spec, const TMeanAccumulator& meanOccupied, std::size_t partitions) { + if (spec.byField() && spec.partitionField()) { + return "A significant proportion, " + + CTools::prettyPrint(100.0 * maths::CBasicStatistics::count(meanOccupied) / static_cast(partitions)) + + "%, of distinct partition and by fields combinations are sparse."; + } else if (spec.byField()) { + return "A significant proportion, " + + CTools::prettyPrint(100.0 * maths::CBasicStatistics::count(meanOccupied) / static_cast(partitions)) + + "%, of distinct by fields are sparse."; + } else if (spec.partitionField()) { + return "A significant proportion, " + + CTools::prettyPrint(100.0 * maths::CBasicStatistics::count(meanOccupied) / static_cast(partitions)) + + "%, of distinct partition fields are sparse."; } return ""; } -const bool IGNORE_EMPTY[] = { false, true }; +const bool IGNORE_EMPTY[] = {false, true}; } -CNotEnoughDataPenalty::CNotEnoughDataPenalty(const CAutoconfigurerParams ¶ms) : - CPenalty(params) -{} +CNotEnoughDataPenalty::CNotEnoughDataPenalty(const CAutoconfigurerParams& params) : CPenalty(params) { +} -CNotEnoughDataPenalty *CNotEnoughDataPenalty::clone() const -{ +CNotEnoughDataPenalty* CNotEnoughDataPenalty::clone() const { return new CNotEnoughDataPenalty(*this); } -std::string CNotEnoughDataPenalty::name() const -{ +std::string CNotEnoughDataPenalty::name() const { return "not enough data"; } -void CNotEnoughDataPenalty::penaltyFromMe(CDetectorSpecification &spec) const -{ - if (!config_t::isRare(spec.function())) - { - if (const CPartitionDataCountStatistics *partitionStats = - dynamic_cast(spec.countStatistics())) - { +void CNotEnoughDataPenalty::penaltyFromMe(CDetectorSpecification& spec) const { + if (!config_t::isRare(spec.function())) { + if (const CPartitionDataCountStatistics* partitionStats = + dynamic_cast(spec.countStatistics())) { this->penaltyFor(*partitionStats, spec); - } - else if (const CByAndPartitionDataCountStatistics *byAndPartitionStats = - dynamic_cast(spec.countStatistics())) - { + } else if (const CByAndPartitionDataCountStatistics* byAndPartitionStats = + dynamic_cast(spec.countStatistics())) { this->penaltyFor(*byAndPartitionStats, spec); - } - else if (const CByOverAndPartitionDataCountStatistics *byOverAndPartitionStats = - dynamic_cast(spec.countStatistics())) - { + } else if (const CByOverAndPartitionDataCountStatistics* byOverAndPartitionStats = + dynamic_cast(spec.countStatistics())) { this->penaltyFor(*byOverAndPartitionStats, spec); } } } -void CNotEnoughDataPenalty::penaltyFor(const CPartitionDataCountStatistics &stats, - CDetectorSpecification &spec) const -{ +void CNotEnoughDataPenalty::penaltyFor(const CPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const { this->penaltyFor(stats.bucketCounts(), stats.bucketStatistics(), spec); } -void CNotEnoughDataPenalty::penaltyFor(const CByAndPartitionDataCountStatistics &stats, - CDetectorSpecification &spec) const -{ +void CNotEnoughDataPenalty::penaltyFor(const CByAndPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const { this->penaltyFor(stats.bucketCounts(), stats.bucketStatistics(), spec); } -void CNotEnoughDataPenalty::penaltyFor(const CByOverAndPartitionDataCountStatistics &stats, - CDetectorSpecification &spec) const -{ +void CNotEnoughDataPenalty::penaltyFor(const CByOverAndPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const { this->penaltyFor(stats.bucketCounts(), stats.bucketStatistics(), spec); } -void CNotEnoughDataPenalty::penaltyFor(const TUInt64Vec &bucketCounts, - const TBucketCountStatisticsVec &statistics, - CDetectorSpecification &spec) const -{ +void CNotEnoughDataPenalty::penaltyFor(const TUInt64Vec& bucketCounts, + const TBucketCountStatisticsVec& statistics, + CDetectorSpecification& spec) const { using TSizeSizePrMomentsUMapCItr = CBucketCountStatistics::TSizeSizePrMomentsUMap::const_iterator; - const CAutoconfigurerParams::TTimeVec &candidates = this->params().candidateBucketLengths(); + const CAutoconfigurerParams::TTimeVec& candidates = this->params().candidateBucketLengths(); LOG_TRACE("bucket counts = " << core::CContainerPrinter::print(bucketCounts)); @@ -135,29 +104,25 @@ void CNotEnoughDataPenalty::penaltyFor(const TUInt64Vec &bucketCounts, config_t::EFunctionCategory function = spec.function(); // Per partition occupancy. - for (std::size_t i = 0u; i < boost::size(IGNORE_EMPTY); ++i) - { - for (std::size_t bid = 0u; bid < candidates.size(); ++bid) - { + for (std::size_t i = 0u; i < boost::size(IGNORE_EMPTY); ++i) { + for (std::size_t bid = 0u; bid < candidates.size(); ++bid) { uint64_t bc = bucketCounts[bid]; - if (bc > 0) - { - const CBucketCountStatistics &si = statistics[bid]; - const CBucketCountStatistics::TSizeSizePrMomentsUMap &mi = si.countMomentsPerPartition(); + if (bc > 0) { + const CBucketCountStatistics& si = statistics[bid]; + const CBucketCountStatistics::TSizeSizePrMomentsUMap& mi = si.countMomentsPerPartition(); TMeanAccumulator penalty_; TMeanAccumulator meanOccupied; - for (TSizeSizePrMomentsUMapCItr j = mi.begin(); j != mi.end(); ++j) - { + for (TSizeSizePrMomentsUMapCItr j = mi.begin(); j != mi.end(); ++j) { double occupied = maths::CBasicStatistics::count(j->second) / static_cast(bc); - double penalty = CTools::logInterpolate( - this->params().lowPopulatedBucketFraction(function, IGNORE_EMPTY[i]), - this->params().minimumPopulatedBucketFraction(function, IGNORE_EMPTY[i]), - 1.0, 1.0 / static_cast(bc), occupied); + double penalty = CTools::logInterpolate(this->params().lowPopulatedBucketFraction(function, IGNORE_EMPTY[i]), + this->params().minimumPopulatedBucketFraction(function, IGNORE_EMPTY[i]), + 1.0, + 1.0 / static_cast(bc), + occupied); penalty_.add(maths::CTools::fastLog(penalty)); - if (penalty < 1.0) - { + if (penalty < 1.0) { meanOccupied.add(occupied); } } @@ -167,20 +132,15 @@ void CNotEnoughDataPenalty::penaltyFor(const TUInt64Vec &bucketCounts, indices.push_back(index); penalties.push_back(penalty); descriptions.push_back(""); - if (penalty < 1.0) - { - if (spec.byField() || spec.partitionField()) - { - descriptions.back() = descriptionPrefix(spec, meanOccupied, si.countMomentsPerPartition().size()) - + " On average, only " - + CTools::prettyPrint(100.0 * maths::CBasicStatistics::mean(meanOccupied)) - + "% of their buckets have a value"; - } - else - { - descriptions.back() = std::string("On average only ") - + CTools::prettyPrint(100.0 * maths::CBasicStatistics::mean(meanOccupied)) - + "% of partition buckets have a value"; + if (penalty < 1.0) { + if (spec.byField() || spec.partitionField()) { + descriptions.back() = + descriptionPrefix(spec, meanOccupied, si.countMomentsPerPartition().size()) + " On average, only " + + CTools::prettyPrint(100.0 * maths::CBasicStatistics::mean(meanOccupied)) + "% of their buckets have a value"; + } else { + descriptions.back() = std::string("On average only ") + + CTools::prettyPrint(100.0 * maths::CBasicStatistics::mean(meanOccupied)) + + "% of partition buckets have a value"; } } } @@ -188,6 +148,5 @@ void CNotEnoughDataPenalty::penaltyFor(const TUInt64Vec &bucketCounts, } spec.applyPenalties(indices, penalties, descriptions); } - } } diff --git a/lib/config/CPenalty.cc b/lib/config/CPenalty.cc index dca2082e03..900177db57 100644 --- a/lib/config/CPenalty.cc +++ b/lib/config/CPenalty.cc @@ -15,161 +15,122 @@ #include -namespace ml -{ -namespace config -{ -namespace -{ +namespace ml { +namespace config { +namespace { const std::string PENALTY_NAME("CPenalty"); } -CPenalty::CPenalty(const CAutoconfigurerParams ¶ms) : m_Params(params) -{} +CPenalty::CPenalty(const CAutoconfigurerParams& params) : m_Params(params) { +} -CPenalty::CPenalty(const CPenalty &other) : - m_Params(other.m_Params) -{ +CPenalty::CPenalty(const CPenalty& other) : m_Params(other.m_Params) { m_Penalties.reserve(other.m_Penalties.size()); - for (std::size_t i = 0u; i < other.m_Penalties.size(); ++i) - { + for (std::size_t i = 0u; i < other.m_Penalties.size(); ++i) { m_Penalties.push_back(TPenaltyCPtr(other.m_Penalties[i]->clone())); } } -CPenalty::CPenalty(CClosure closure) : - m_Params(closure.penalties()[0]->params()) -{ +CPenalty::CPenalty(CClosure closure) : m_Params(closure.penalties()[0]->params()) { m_Penalties.swap(closure.penalties()); } -CPenalty::~CPenalty() {} +CPenalty::~CPenalty() { +} -std::string CPenalty::name() const -{ +std::string CPenalty::name() const { std::string result; - for (std::size_t i = 0u; i < m_Penalties.size(); ++i) - { + for (std::size_t i = 0u; i < m_Penalties.size(); ++i) { result += (result.empty() ? "'" : " x '") + m_Penalties[i]->name() + "'"; } return result; } -CPenalty *CPenalty::clone() const -{ +CPenalty* CPenalty::clone() const { return new CPenalty(*this); } -const CPenalty &CPenalty::operator*=(const CPenalty &rhs) -{ +const CPenalty& CPenalty::operator*=(const CPenalty& rhs) { m_Penalties.push_back(TPenaltyCPtr(rhs.clone())); return *this; } -const CPenalty &CPenalty::operator*=(CClosure rhs) -{ +const CPenalty& CPenalty::operator*=(CClosure rhs) { m_Penalties.insert(m_Penalties.end(), rhs.penalties().begin(), rhs.penalties().end()); return *this; } -void CPenalty::penalty(const CFieldStatistics &stats, - double &penalty, - std::string &description) const -{ +void CPenalty::penalty(const CFieldStatistics& stats, double& penalty, std::string& description) const { this->penaltyFromMe(stats, penalty, description); - if (scoreIsZeroFor(penalty)) - { + if (scoreIsZeroFor(penalty)) { return; } - for (std::size_t i = 0u; i < m_Penalties.size(); ++i) - { + for (std::size_t i = 0u; i < m_Penalties.size(); ++i) { m_Penalties[i]->penalty(stats, penalty, description); - if (scoreIsZeroFor(penalty)) - { + if (scoreIsZeroFor(penalty)) { break; } } } -void CPenalty::penalize(CDetectorSpecification &spec) const -{ +void CPenalty::penalize(CDetectorSpecification& spec) const { this->penaltyFromMe(spec); - if (spec.score() == 0.0) - { + if (spec.score() == 0.0) { return; } - for (std::size_t i = 0u; i < m_Penalties.size(); ++i) - { + for (std::size_t i = 0u; i < m_Penalties.size(); ++i) { LOG_TRACE("Applying '" << m_Penalties[i]->name() << "' to " << spec.description()); m_Penalties[i]->penalize(spec); - if (spec.score() == 0.0) - { + if (spec.score() == 0.0) { break; } } } -double CPenalty::score(double penalty) -{ - return constants::DETECTOR_SCORE_EPSILON * std::floor( constants::MAXIMUM_DETECTOR_SCORE * penalty - / constants::DETECTOR_SCORE_EPSILON); +double CPenalty::score(double penalty) { + return constants::DETECTOR_SCORE_EPSILON * std::floor(constants::MAXIMUM_DETECTOR_SCORE * penalty / constants::DETECTOR_SCORE_EPSILON); } -bool CPenalty::scoreIsZeroFor(double penalty) -{ +bool CPenalty::scoreIsZeroFor(double penalty) { return penalty * constants::MAXIMUM_DETECTOR_SCORE < constants::DETECTOR_SCORE_EPSILON; } -const CAutoconfigurerParams &CPenalty::params() const -{ +const CAutoconfigurerParams& CPenalty::params() const { return m_Params; } -void CPenalty::penaltyFromMe(const CFieldStatistics &/*stats*/, - double &/*penalty*/, - std::string &/*description*/) const -{ +void CPenalty::penaltyFromMe(const CFieldStatistics& /*stats*/, double& /*penalty*/, std::string& /*description*/) const { } -void CPenalty::penaltyFromMe(CDetectorSpecification &/*spec*/) const -{ +void CPenalty::penaltyFromMe(CDetectorSpecification& /*spec*/) const { } -CPenalty::CClosure::CClosure(const CPenalty &penalty) -{ +CPenalty::CClosure::CClosure(const CPenalty& penalty) { this->add(penalty); } -CPenalty *CPenalty::CClosure::clone() const -{ +CPenalty* CPenalty::CClosure::clone() const { return new CPenalty(*this); } -CPenalty::CClosure &CPenalty::CClosure::add(const CPenalty &penalty) -{ +CPenalty::CClosure& CPenalty::CClosure::add(const CPenalty& penalty) { m_Penalties.push_back(TPenaltyCPtr(penalty.clone())); return *this; } -CPenalty::TPenaltyCPtrVec &CPenalty::CClosure::penalties() -{ +CPenalty::TPenaltyCPtrVec& CPenalty::CClosure::penalties() { return m_Penalties; } -CPenalty::CClosure operator*(const CPenalty &lhs, const CPenalty &rhs) -{ +CPenalty::CClosure operator*(const CPenalty& lhs, const CPenalty& rhs) { return CPenalty::CClosure(lhs).add(rhs); } -CPenalty::CClosure operator*(CPenalty::CClosure lhs, const CPenalty &rhs) -{ +CPenalty::CClosure operator*(CPenalty::CClosure lhs, const CPenalty& rhs) { return lhs.add(rhs); } -CPenalty::CClosure operator*(const CPenalty &lhs, CPenalty::CClosure rhs) -{ +CPenalty::CClosure operator*(const CPenalty& lhs, CPenalty::CClosure rhs) { return rhs.add(lhs); } - } } - diff --git a/lib/config/CPolledDataPenalty.cc b/lib/config/CPolledDataPenalty.cc index 3136caa3fb..df82598060 100644 --- a/lib/config/CPolledDataPenalty.cc +++ b/lib/config/CPolledDataPenalty.cc @@ -9,8 +9,8 @@ #include #include -#include #include +#include #include #include @@ -21,36 +21,27 @@ #include #include -namespace ml -{ -namespace config -{ -namespace -{ +namespace ml { +namespace config { +namespace { const double LOG_TENTH_NUMBER_POLLING_INTERVALS = 10.0; } -CPolledDataPenalty::CPolledDataPenalty(const CAutoconfigurerParams ¶ms) : - CPenalty(params) -{} +CPolledDataPenalty::CPolledDataPenalty(const CAutoconfigurerParams& params) : CPenalty(params) { +} -CPolledDataPenalty *CPolledDataPenalty::clone() const -{ +CPolledDataPenalty* CPolledDataPenalty::clone() const { return new CPolledDataPenalty(*this); } -std::string CPolledDataPenalty::name() const -{ +std::string CPolledDataPenalty::name() const { return "polled data penalty"; } -void CPolledDataPenalty::penaltyFromMe(CDetectorSpecification &spec) const -{ - if (const CDataCountStatistics *stats = spec.countStatistics()) - { - if (TOptionalTime interval = this->pollingInterval(*stats)) - { - const TTimeVec &candidates = this->params().candidateBucketLengths(); +void CPolledDataPenalty::penaltyFromMe(CDetectorSpecification& spec) const { + if (const CDataCountStatistics* stats = spec.countStatistics()) { + if (TOptionalTime interval = this->pollingInterval(*stats)) { + const TTimeVec& candidates = this->params().candidateBucketLengths(); TSizeVec indices; TDoubleVec penalties; @@ -59,22 +50,19 @@ void CPolledDataPenalty::penaltyFromMe(CDetectorSpecification &spec) const penalties.reserve(2 * candidates.size()); descriptions.reserve(2 * candidates.size()); - for (std::size_t bid = 0u; bid < candidates.size(); ++bid) - { - if (candidates[bid] < *interval) - { - const TSizeVec &indices_ = this->params().penaltyIndicesFor(bid); + for (std::size_t bid = 0u; bid < candidates.size(); ++bid) { + if (candidates[bid] < *interval) { + const TSizeVec& indices_ = this->params().penaltyIndicesFor(bid); indices.insert(indices.end(), indices_.begin(), indices_.end()); std::fill_n(std::back_inserter(penalties), indices_.size(), - std::pow(0.1, static_cast(stats->timeRange()) - / static_cast(*interval) - / LOG_TENTH_NUMBER_POLLING_INTERVALS)); + std::pow(0.1, + static_cast(stats->timeRange()) / static_cast(*interval) / + LOG_TENTH_NUMBER_POLLING_INTERVALS)); std::fill_n(std::back_inserter(descriptions), indices_.size(), - CTools::prettyPrint(candidates[bid]) - + " is shorter than possible polling interval " - + CTools::prettyPrint(*interval)); + CTools::prettyPrint(candidates[bid]) + " is shorter than possible polling interval " + + CTools::prettyPrint(*interval)); } } @@ -83,25 +71,20 @@ void CPolledDataPenalty::penaltyFromMe(CDetectorSpecification &spec) const } } -CPolledDataPenalty::TOptionalTime - CPolledDataPenalty::pollingInterval(const CDataCountStatistics &stats) const -{ +CPolledDataPenalty::TOptionalTime CPolledDataPenalty::pollingInterval(const CDataCountStatistics& stats) const { using TMaxAccumulator = - maths::CBasicStatistics::COrderStatisticsStack; + maths::CBasicStatistics::COrderStatisticsStack; - const maths::CQuantileSketch &F = stats.arrivalTimeDistribution(); - const maths::CQuantileSketch::TFloatFloatPrVec &knots = F.knots(); - if (knots.size() == 1) - { + const maths::CQuantileSketch& F = stats.arrivalTimeDistribution(); + const maths::CQuantileSketch::TFloatFloatPrVec& knots = F.knots(); + if (knots.size() == 1) { return static_cast(knots[0].first); } // Find the two biggest steps in the c.d.f. TMaxAccumulator steps; - for (std::size_t i = 0u; i < knots.size(); ++i) - { + for (std::size_t i = 0u; i < knots.size(); ++i) { steps.add(knots[i]); } @@ -110,9 +93,8 @@ CPolledDataPenalty::TOptionalTime // value of the larger abscissa. double lower = steps[0].first; double upper = steps[1].first; - double mass = (steps[0].second + steps[1].second) / F.count(); - if (lower > upper) - { + double mass = (steps[0].second + steps[1].second) / F.count(); + if (lower > upper) { std::swap(lower, upper); } @@ -123,18 +105,12 @@ CPolledDataPenalty::TOptionalTime F.cdf(upper + 0.01 * upper, f[3]); mass = f[1] - f[0] + f[3] - f[2]; - if ( mass > this->params().polledDataMinimumMassAtInterval() - && lower < this->params().polledDataJitter() * upper) - { + if (mass > this->params().polledDataMinimumMassAtInterval() && lower < this->params().polledDataJitter() * upper) { return static_cast(upper); - } - else - { + } else { } return TOptionalTime(); } - } } - diff --git a/lib/config/CReportWriter.cc b/lib/config/CReportWriter.cc index 82969f261d..d1e286545b 100644 --- a/lib/config/CReportWriter.cc +++ b/lib/config/CReportWriter.cc @@ -17,70 +17,55 @@ #include -namespace ml -{ -namespace config -{ -namespace -{ +namespace ml { +namespace config { +namespace { using TSizeVec = std::vector; using TStrVec = std::vector; using TStrVecVec = std::vector; //! Pad \p value. -inline std::string pad(std::size_t padTo, const std::string &value) -{ - return std::string((padTo - value.length()) / 2, ' ') - + value - + std::string((padTo - value.length() + 1) / 2, ' '); +inline std::string pad(std::size_t padTo, const std::string& value) { + return std::string((padTo - value.length()) / 2, ' ') + value + std::string((padTo - value.length() + 1) / 2, ' '); } //! Pass the string back. -const std::string &print(const std::string &s) -{ +const std::string& print(const std::string& s) { return s; } //! Convert to a string. template -inline std::string print(const T &t) -{ +inline std::string print(const T& t) { return core::CStringUtils::typeToString(t); } //! Convert to a string. -inline std::string print(double t) -{ +inline std::string print(double t) { return core::CStringUtils::typeToStringPrecise(t, core::CIEEE754::E_SinglePrecision); } //! Write out a pair space delimited. template -inline std::string print(const std::pair &p, std::size_t padTo = 0) -{ - std::string first = print(p.first); +inline std::string print(const std::pair& p, std::size_t padTo = 0) { + std::string first = print(p.first); std::string second = print(p.second); - return (padTo > 0 ? pad(padTo, first) : first) - + " " - + (padTo > 0 ? pad(padTo, second) : second); + return (padTo > 0 ? pad(padTo, first) : first) + " " + (padTo > 0 ? pad(padTo, second) : second); } //! Write out a vector of pairs new line delimited. template -inline std::string print(const std::vector> &v, std::size_t padTo = 0) -{ +inline std::string print(const std::vector>& v, std::size_t padTo = 0) { std::string result; - for (std::size_t i = 0u; i < v.size(); ++i) - { + for (std::size_t i = 0u; i < v.size(); ++i) { result += print(v[i], padTo) + "\n"; } return result; } //! -TStrVec splitMultifields(const std::string &field) -{ +TStrVec splitMultifields(const std::string& field) { TStrVec fields; std::string remainder; core::CStringUtils::tokenise("\n", field, fields, remainder); @@ -89,14 +74,11 @@ TStrVec splitMultifields(const std::string &field) } //! Compute the length of the longest field for \p statistic. -std::size_t longest(const TStrVecVec &fields, std::size_t statistic) -{ +std::size_t longest(const TStrVecVec& fields, std::size_t statistic) { std::size_t longest = 0u; - for (std::size_t i = 0u; i < fields.size(); ++i) - { + for (std::size_t i = 0u; i < fields.size(); ++i) { TStrVec fi = splitMultifields(fields[i][statistic]); - for (std::size_t j = 0u; j < fi.size(); ++j) - { + for (std::size_t j = 0u; j < fi.size(); ++j) { longest = std::max(longest, fi[j].length()); } } @@ -105,25 +87,17 @@ std::size_t longest(const TStrVecVec &fields, std::size_t statistic) //! Write a row of the summary statistic table. template -void writeTableRow(std::ostream &o, - const TSizeVec &padTo, - const std::size_t (&stats)[N], - const ROW &row) -{ +void writeTableRow(std::ostream& o, const TSizeVec& padTo, const std::size_t (&stats)[N], const ROW& row) { TStrVecVec columnFields; columnFields.reserve(N); std::size_t height = 1u; - for (std::size_t i = 0u; i < N; ++i) - { + for (std::size_t i = 0u; i < N; ++i) { columnFields.push_back(splitMultifields(row[stats[i]])); height = std::max(height, columnFields[i].size()); } - for (std::size_t i = 0u; i < height; ++i) - { - for (std::size_t j = 0u; j < N; ++j) - { - o << (i < columnFields[j].size() ? pad(padTo[j], columnFields[j][i]) : - std::string(padTo[j], ' ')); + for (std::size_t i = 0u; i < height; ++i) { + for (std::size_t j = 0u; j < N; ++j) { + o << (i < columnFields[j].size() ? pad(padTo[j], columnFields[j][i]) : std::string(padTo[j], ' ')); } o << "\n"; } @@ -131,17 +105,12 @@ void writeTableRow(std::ostream &o, //! Write the summary statistic table. template -void writeTable(std::ostream &o, - const std::string (&labels)[M], - const std::size_t (&stats)[N], - const TStrVecVec &values) -{ +void writeTable(std::ostream& o, const std::string (&labels)[M], const std::size_t (&stats)[N], const TStrVecVec& values) { // Compute the table pads. TSizeVec padTo(N, 0); std::size_t tableWidth = 0; - for (std::size_t i = 0u; i < N; ++i) - { - const std::string &label = labels[stats[i]]; + for (std::size_t i = 0u; i < N; ++i) { + const std::string& label = labels[stats[i]]; padTo[i] = std::max(longest(values, stats[i]), label.length()) + 4; tableWidth += padTo[i]; } @@ -149,98 +118,80 @@ void writeTable(std::ostream &o, // Write the table. writeTableRow(o, padTo, stats, labels); o << std::string(tableWidth, '-') << "\n"; - for (std::size_t i = 0u; i < values.size(); ++i) - { + for (std::size_t i = 0u; i < values.size(); ++i) { writeTableRow(o, padTo, stats, values[i]); } } const TStrVec NO_STRINGS; - } -CReportWriter::CReportWriter(std::ostream &writeStream) : m_WriteStream(writeStream) -{ +CReportWriter::CReportWriter(std::ostream& writeStream) : m_WriteStream(writeStream) { } -bool CReportWriter::fieldNames(const TStrVec &/*fieldNames*/, - const TStrVec &/*extraFieldNames*/) -{ +bool CReportWriter::fieldNames(const TStrVec& /*fieldNames*/, const TStrVec& /*extraFieldNames*/) { return true; } -const CReportWriter::TStrVec &CReportWriter::fieldNames() const -{ +const CReportWriter::TStrVec& CReportWriter::fieldNames() const { return NO_STRINGS; } -bool CReportWriter::writeRow(const TStrStrUMap &/*dataRowFields*/, - const TStrStrUMap &/*overrideDataRowFields*/) -{ +bool CReportWriter::writeRow(const TStrStrUMap& /*dataRowFields*/, const TStrStrUMap& /*overrideDataRowFields*/) { return true; } -void CReportWriter::addTotalRecords(uint64_t n) -{ +void CReportWriter::addTotalRecords(uint64_t n) { m_TotalRecords = print(n); } -void CReportWriter::addInvalidRecords(uint64_t n) -{ +void CReportWriter::addInvalidRecords(uint64_t n) { m_InvalidRecords = print(n); } -void CReportWriter::addFieldStatistics(const std::string &field, - config_t::EDataType type, - const CDataSummaryStatistics &summary) -{ +void CReportWriter::addFieldStatistics(const std::string& field, config_t::EDataType type, const CDataSummaryStatistics& summary) { std::size_t n = m_UnclassifiedFields.size(); m_UnclassifiedFields.push_back(TStrVec(NUMBER_STATISTICS)); - m_UnclassifiedFields[n][FIELD_NAME] = field; - m_UnclassifiedFields[n][DATA_TYPE] = config_t::print(type); + m_UnclassifiedFields[n][FIELD_NAME] = field; + m_UnclassifiedFields[n][DATA_TYPE] = config_t::print(type); m_UnclassifiedFields[n][EARLIEST_TIME] = core::CTimeUtils::toLocalString(summary.earliest()); - m_UnclassifiedFields[n][LATEST_TIME] = core::CTimeUtils::toLocalString(summary.latest()); - m_UnclassifiedFields[n][MEAN_RATE] = CTools::prettyPrint(summary.meanRate()); + m_UnclassifiedFields[n][LATEST_TIME] = core::CTimeUtils::toLocalString(summary.latest()); + m_UnclassifiedFields[n][MEAN_RATE] = CTools::prettyPrint(summary.meanRate()); } -void CReportWriter::addFieldStatistics(const std::string &field, +void CReportWriter::addFieldStatistics(const std::string& field, config_t::EDataType type, - const CCategoricalDataSummaryStatistics &summary) -{ + const CCategoricalDataSummaryStatistics& summary) { std::size_t n = m_CategoricalFields.size(); m_CategoricalFields.push_back(TStrVec(NUMBER_STATISTICS)); - m_CategoricalFields[n][FIELD_NAME] = field; - m_CategoricalFields[n][DATA_TYPE] = config_t::print(type); + m_CategoricalFields[n][FIELD_NAME] = field; + m_CategoricalFields[n][DATA_TYPE] = config_t::print(type); m_CategoricalFields[n][EARLIEST_TIME] = core::CTimeUtils::toLocalString(summary.earliest()); - m_CategoricalFields[n][LATEST_TIME] = core::CTimeUtils::toLocalString(summary.latest()); - m_CategoricalFields[n][MEAN_RATE] = CTools::prettyPrint(summary.meanRate()); + m_CategoricalFields[n][LATEST_TIME] = core::CTimeUtils::toLocalString(summary.latest()); + m_CategoricalFields[n][MEAN_RATE] = CTools::prettyPrint(summary.meanRate()); m_CategoricalFields[n][CATEGORICAL_DISTINCT_COUNT] = print(summary.distinctCount()); CCategoricalDataSummaryStatistics::TStrSizePrVec topn; summary.topN(topn); m_CategoricalFields[n][CATEGORICAL_TOP_N_COUNTS] = print(topn); } -void CReportWriter::addFieldStatistics(const std::string &field, - config_t::EDataType type, - const CNumericDataSummaryStatistics &summary) -{ +void CReportWriter::addFieldStatistics(const std::string& field, config_t::EDataType type, const CNumericDataSummaryStatistics& summary) { std::size_t n = m_NumericFields.size(); m_NumericFields.push_back(TStrVec(NUMBER_STATISTICS)); - m_NumericFields[n][FIELD_NAME] = field; - m_NumericFields[n][DATA_TYPE] = config_t::print(type); - m_NumericFields[n][EARLIEST_TIME] = core::CTimeUtils::toLocalString(summary.earliest()); - m_NumericFields[n][LATEST_TIME] = core::CTimeUtils::toLocalString(summary.latest()); - m_NumericFields[n][MEAN_RATE] = CTools::prettyPrint(summary.meanRate()); + m_NumericFields[n][FIELD_NAME] = field; + m_NumericFields[n][DATA_TYPE] = config_t::print(type); + m_NumericFields[n][EARLIEST_TIME] = core::CTimeUtils::toLocalString(summary.earliest()); + m_NumericFields[n][LATEST_TIME] = core::CTimeUtils::toLocalString(summary.latest()); + m_NumericFields[n][MEAN_RATE] = CTools::prettyPrint(summary.meanRate()); m_NumericFields[n][NUMERIC_MINIMUM] = CTools::prettyPrint(summary.minimum()); - m_NumericFields[n][NUMERIC_MEDIAN] = CTools::prettyPrint(summary.median()); + m_NumericFields[n][NUMERIC_MEDIAN] = CTools::prettyPrint(summary.median()); m_NumericFields[n][NUMERIC_MAXIMUM] = CTools::prettyPrint(summary.maximum()); CNumericDataSummaryStatistics::TDoubleDoublePrVec densitychart; summary.densityChart(densitychart); m_NumericFields[n][NUMERIC_DENSITY_CHART] = print(densitychart, 15); } -void CReportWriter::addDetector(const CDetectorSpecification &spec) -{ +void CReportWriter::addDetector(const CDetectorSpecification& spec) { std::size_t n = m_Detectors.size(); m_Detectors.push_back(TStrVecVecVec(NUMBER_ATTRIBUTES)); m_Detectors[n][DESCRIPTION].push_back(TStrVec(1, spec.description())); @@ -248,143 +199,97 @@ void CReportWriter::addDetector(const CDetectorSpecification &spec) CDetectorSpecification::TParamScoresVec scores; spec.scores(scores); m_Detectors[n][PARAMETER_SCORES].resize(scores.size(), TStrVec(NUMBER_PARAMETERS)); - for (std::size_t i = 0u; i < scores.size(); ++i) - { + for (std::size_t i = 0u; i < scores.size(); ++i) { m_Detectors[n][PARAMETER_SCORES][i][BUCKET_LENGTH_PARAMETER] = CTools::prettyPrint(scores[i].s_BucketLength); - m_Detectors[n][PARAMETER_SCORES][i][IGNORE_EMPTY_PARAMETER] = scores[i].s_IgnoreEmpty; - m_Detectors[n][PARAMETER_SCORES][i][SCORE_PARAMETER] = CTools::prettyPrint(scores[i].s_Score); - m_Detectors[n][PARAMETER_SCORES][i][DESCRIPTION_PARAMETER] = scores[i].s_Descriptions.empty() ? - std::string("-") : scores[i].s_Descriptions[0]; - for (std::size_t j = 1u; j < scores[i].s_Descriptions.size(); ++j) - { + m_Detectors[n][PARAMETER_SCORES][i][IGNORE_EMPTY_PARAMETER] = scores[i].s_IgnoreEmpty; + m_Detectors[n][PARAMETER_SCORES][i][SCORE_PARAMETER] = CTools::prettyPrint(scores[i].s_Score); + m_Detectors[n][PARAMETER_SCORES][i][DESCRIPTION_PARAMETER] = + scores[i].s_Descriptions.empty() ? std::string("-") : scores[i].s_Descriptions[0]; + for (std::size_t j = 1u; j < scores[i].s_Descriptions.size(); ++j) { m_Detectors[n][PARAMETER_SCORES][i][DESCRIPTION_PARAMETER] += "\n" + scores[i].s_Descriptions[j]; } } m_Detectors[n][DETECTOR_CONFIG].push_back(TStrVec(1, spec.detectorConfig())); } -void CReportWriter::write() const -{ +void CReportWriter::write() const { m_WriteStream << "============\n"; m_WriteStream << "DATA SUMMARY\n"; m_WriteStream << "============\n\n"; - m_WriteStream << "Found " << ( m_UnclassifiedFields.size() - + m_CategoricalFields.size() - + m_NumericFields.size()) << " fields\n"; + m_WriteStream << "Found " << (m_UnclassifiedFields.size() + m_CategoricalFields.size() + m_NumericFields.size()) << " fields\n"; m_WriteStream << "Processed " << m_TotalRecords << " records\n"; m_WriteStream << "There were " << m_InvalidRecords << " invalid records\n"; - if (m_UnclassifiedFields.size() > 0) - { + if (m_UnclassifiedFields.size() > 0) { m_WriteStream << "\nUnclassified Fields\n"; - m_WriteStream << "===================\n\n"; + m_WriteStream << "===================\n\n"; writeTable(m_WriteStream, STATISTIC_LABELS, UNCLASSIFIED_STATISTICS, m_UnclassifiedFields); } - if (m_CategoricalFields.size() > 0) - { + if (m_CategoricalFields.size() > 0) { m_WriteStream << "\nCategorical Fields\n"; - m_WriteStream << "==================\n\n"; + m_WriteStream << "==================\n\n"; writeTable(m_WriteStream, STATISTIC_LABELS, CATEGORICAL_STATISTICS, m_CategoricalFields); - for (std::size_t i = 0u; i < m_CategoricalFields.size(); ++i) - { + for (std::size_t i = 0u; i < m_CategoricalFields.size(); ++i) { m_WriteStream << "\nMost frequent for '" << m_CategoricalFields[i][FIELD_NAME] << "':\n"; m_WriteStream << m_CategoricalFields[i][CATEGORICAL_TOP_N_COUNTS]; } } - if (m_NumericFields.size() > 0) - { + if (m_NumericFields.size() > 0) { m_WriteStream << "\nNumeric Fields\n"; - m_WriteStream << "==============\n\n"; + m_WriteStream << "==============\n\n"; writeTable(m_WriteStream, STATISTIC_LABELS, NUMERIC_STATISTICS, m_NumericFields); - for (std::size_t i = 0u; i < m_NumericFields.size(); ++i) - { + for (std::size_t i = 0u; i < m_NumericFields.size(); ++i) { m_WriteStream << "\nProbability density for '" << m_NumericFields[i][FIELD_NAME] << "':\n"; m_WriteStream << pad(15, "x") << pad(15, "f(x)") << "\n"; m_WriteStream << m_NumericFields[i][NUMERIC_DENSITY_CHART]; } } - if (m_Detectors.size() > 0) - { + if (m_Detectors.size() > 0) { m_WriteStream << "\n\n\n===================\n"; - m_WriteStream << "CANDIDATE DETECTORS\n"; - m_WriteStream << "==================="; - for (std::size_t i = 0u; i < m_Detectors.size(); ++i) - { + m_WriteStream << "CANDIDATE DETECTORS\n"; + m_WriteStream << "==================="; + for (std::size_t i = 0u; i < m_Detectors.size(); ++i) { m_WriteStream << "\n\n\n" << m_Detectors[i][DESCRIPTION][0][0] << "\n"; m_WriteStream << std::string(m_Detectors[i][DESCRIPTION][0][0].length(), '=') << "\n"; m_WriteStream << "\n Best parameters score: " << m_Detectors[i][OVERALL_SCORE][0][0] << "\n\n"; writeTable(m_WriteStream, PARAMETER_LABELS, DETECTOR_PARAMETERS, m_Detectors[i][PARAMETER_SCORES]); - if (!m_Detectors[i][DETECTOR_CONFIG][0][0].empty()) - { + if (!m_Detectors[i][DETECTOR_CONFIG][0][0].empty()) { m_WriteStream << "\n" << m_Detectors[i][DETECTOR_CONFIG][0][0] << "\n"; } } } } -const std::string CReportWriter::STATISTIC_LABELS[NUMBER_STATISTICS] = - { - std::string("Field Name"), - std::string("Data Type"), - std::string("Earliest Time"), - std::string("Latest Time"), - std::string("Mean Rate"), - std::string("Distinct Categories"), - std::string("Most Frequent Categories"), - std::string("Minimum"), - std::string("Median"), - std::string("Maximum"), - std::string("Probability Density Chart") - }; - -const std::string CReportWriter::PARAMETER_LABELS[NUMBER_PARAMETERS] = - { - std::string("Bucket Length"), - std::string("Ignore Empty"), - std::string("Score"), - std::string("Explanation") - }; - -const std::size_t CReportWriter::UNCLASSIFIED_STATISTICS[] = - { - FIELD_NAME, - DATA_TYPE, - EARLIEST_TIME, - LATEST_TIME, - MEAN_RATE - }; +const std::string CReportWriter::STATISTIC_LABELS[NUMBER_STATISTICS] = {std::string("Field Name"), + std::string("Data Type"), + std::string("Earliest Time"), + std::string("Latest Time"), + std::string("Mean Rate"), + std::string("Distinct Categories"), + std::string("Most Frequent Categories"), + std::string("Minimum"), + std::string("Median"), + std::string("Maximum"), + std::string("Probability Density Chart")}; + +const std::string CReportWriter::PARAMETER_LABELS[NUMBER_PARAMETERS] = {std::string("Bucket Length"), + std::string("Ignore Empty"), + std::string("Score"), + std::string("Explanation")}; + +const std::size_t CReportWriter::UNCLASSIFIED_STATISTICS[] = {FIELD_NAME, DATA_TYPE, EARLIEST_TIME, LATEST_TIME, MEAN_RATE}; const std::size_t CReportWriter::CATEGORICAL_STATISTICS[] = - { - FIELD_NAME, - DATA_TYPE, - EARLIEST_TIME, - LATEST_TIME, - MEAN_RATE, - CATEGORICAL_DISTINCT_COUNT - }; + {FIELD_NAME, DATA_TYPE, EARLIEST_TIME, LATEST_TIME, MEAN_RATE, CATEGORICAL_DISTINCT_COUNT}; const std::size_t CReportWriter::NUMERIC_STATISTICS[] = - { - FIELD_NAME, - DATA_TYPE, - EARLIEST_TIME, - LATEST_TIME, - MEAN_RATE, - NUMERIC_MINIMUM, - NUMERIC_MEDIAN, - NUMERIC_MAXIMUM - }; - -const std::size_t CReportWriter::DETECTOR_PARAMETERS[] = - { - BUCKET_LENGTH_PARAMETER, - IGNORE_EMPTY_PARAMETER, - SCORE_PARAMETER, - DESCRIPTION_PARAMETER - }; + {FIELD_NAME, DATA_TYPE, EARLIEST_TIME, LATEST_TIME, MEAN_RATE, NUMERIC_MINIMUM, NUMERIC_MEDIAN, NUMERIC_MAXIMUM}; +const std::size_t CReportWriter::DETECTOR_PARAMETERS[] = {BUCKET_LENGTH_PARAMETER, + IGNORE_EMPTY_PARAMETER, + SCORE_PARAMETER, + DESCRIPTION_PARAMETER}; } } diff --git a/lib/config/CSpanTooSmallForBucketLengthPenalty.cc b/lib/config/CSpanTooSmallForBucketLengthPenalty.cc index 7b08d5398f..41b4f10dcc 100644 --- a/lib/config/CSpanTooSmallForBucketLengthPenalty.cc +++ b/lib/config/CSpanTooSmallForBucketLengthPenalty.cc @@ -11,30 +11,23 @@ #include #include -namespace ml -{ -namespace config -{ +namespace ml { +namespace config { -CSpanTooSmallForBucketLengthPenalty::CSpanTooSmallForBucketLengthPenalty(const CAutoconfigurerParams ¶ms) : - CPenalty(params) -{} +CSpanTooSmallForBucketLengthPenalty::CSpanTooSmallForBucketLengthPenalty(const CAutoconfigurerParams& params) : CPenalty(params) { +} -CSpanTooSmallForBucketLengthPenalty *CSpanTooSmallForBucketLengthPenalty::clone() const -{ +CSpanTooSmallForBucketLengthPenalty* CSpanTooSmallForBucketLengthPenalty::clone() const { return new CSpanTooSmallForBucketLengthPenalty(*this); } -std::string CSpanTooSmallForBucketLengthPenalty::name() const -{ +std::string CSpanTooSmallForBucketLengthPenalty::name() const { return "span too small for bucket length"; } -void CSpanTooSmallForBucketLengthPenalty::penaltyFromMe(CDetectorSpecification &spec) const -{ - if (const CDataCountStatistics *stats = spec.countStatistics()) - { - const TTimeVec &candidates = this->params().candidateBucketLengths(); +void CSpanTooSmallForBucketLengthPenalty::penaltyFromMe(CDetectorSpecification& spec) const { + if (const CDataCountStatistics* stats = spec.countStatistics()) { + const TTimeVec& candidates = this->params().candidateBucketLengths(); TSizeVec indices; TDoubleVec penalties; @@ -43,14 +36,14 @@ void CSpanTooSmallForBucketLengthPenalty::penaltyFromMe(CDetectorSpecification & penalties.reserve(2 * candidates.size()); descriptions.reserve(2 * candidates.size()); - for (std::size_t bid = 0u; bid < candidates.size(); ++bid) - { - const TSizeVec &indices_ = this->params().penaltyIndicesFor(bid); + for (std::size_t bid = 0u; bid < candidates.size(); ++bid) { + const TSizeVec& indices_ = this->params().penaltyIndicesFor(bid); indices.insert(indices.end(), indices_.begin(), indices_.end()); double penalty = CTools::logInterpolate(this->params().minimumNumberOfBucketsForConfig(), this->params().lowNumberOfBucketsForConfig(), - 0.0, 1.0, static_cast( stats->timeRange() - / candidates[bid])); + 0.0, + 1.0, + static_cast(stats->timeRange() / candidates[bid])); std::string description = penalty < 1.0 ? "The data span is too short to properly assess the bucket length" : ""; std::fill_n(std::back_inserter(penalties), indices_.size(), penalty); std::fill_n(std::back_inserter(descriptions), indices_.size(), description); @@ -59,6 +52,5 @@ void CSpanTooSmallForBucketLengthPenalty::penaltyFromMe(CDetectorSpecification & spec.applyPenalties(indices, penalties, descriptions); } } - } } diff --git a/lib/config/CSparseCountPenalty.cc b/lib/config/CSparseCountPenalty.cc index ec62e16f2a..089e44c50e 100644 --- a/lib/config/CSparseCountPenalty.cc +++ b/lib/config/CSparseCountPenalty.cc @@ -12,28 +12,23 @@ #include #include -#include #include +#include #include #include #include -namespace ml -{ -namespace config -{ -namespace -{ +namespace ml { +namespace config { +namespace { using TDoubleVec = std::vector; //! Extract the \p n quantiles from \p quantiles. -void extract(const maths::CQuantileSketch &quantiles, std::size_t n, TDoubleVec &result) -{ - for (std::size_t i = 1u; i <= n; ++i) - { +void extract(const maths::CQuantileSketch& quantiles, std::size_t n, TDoubleVec& result) { + for (std::size_t i = 1u; i <= n; ++i) { double x; quantiles.quantile(100.0 * static_cast(i) / static_cast(n + 1), x); result[i - 1] = x; @@ -41,16 +36,11 @@ void extract(const maths::CQuantileSketch &quantiles, std::size_t n, TDoubleVec } //! Get the quantiles adjusted for empty buckets. -const maths::CQuantileSketch &correctForEmptyBuckets(bool ignoreEmpty, - uint64_t buckets, - maths::CQuantileSketch &placeholder, - const maths::CQuantileSketch &quantiles) -{ - if (!ignoreEmpty) - { +const maths::CQuantileSketch& +correctForEmptyBuckets(bool ignoreEmpty, uint64_t buckets, maths::CQuantileSketch& placeholder, const maths::CQuantileSketch& quantiles) { + if (!ignoreEmpty) { double n = static_cast(buckets) - quantiles.count(); - if (n > 0.0) - { + if (n > 0.0) { placeholder = quantiles; placeholder.add(0.0, static_cast(buckets) - quantiles.count()); return placeholder; @@ -60,50 +50,42 @@ const maths::CQuantileSketch &correctForEmptyBuckets(bool ignoreEmpty, } //! Get the mean adjusted for empty buckets. -double correctForEmptyBuckets(bool ignoreEmpty, - uint64_t buckets, - const CBucketCountStatistics::TMoments &moments) -{ +double correctForEmptyBuckets(bool ignoreEmpty, uint64_t buckets, const CBucketCountStatistics::TMoments& moments) { double n = maths::CBasicStatistics::count(moments); double m = maths::CBasicStatistics::mean(moments); return ignoreEmpty ? m : n / static_cast(buckets) * m; } const uint64_t MINIMUM_BUCKETS_TO_TEST = 20; -const bool IGNORE_EMPTY[] = { false, true }; - +const bool IGNORE_EMPTY[] = {false, true}; } -CSparseCountPenalty::CSparseCountPenalty(const CAutoconfigurerParams ¶ms) : CPenalty(params) {} +CSparseCountPenalty::CSparseCountPenalty(const CAutoconfigurerParams& params) : CPenalty(params) { +} -CSparseCountPenalty *CSparseCountPenalty::clone() const -{ +CSparseCountPenalty* CSparseCountPenalty::clone() const { return new CSparseCountPenalty(*this); } -std::string CSparseCountPenalty::name() const -{ +std::string CSparseCountPenalty::name() const { return "sparse count penalty"; } -void CSparseCountPenalty::penaltyFromMe(CDetectorSpecification &spec) const -{ - if (spec.function() != config_t::E_Count || spec.function() == config_t::E_Sum) - { +void CSparseCountPenalty::penaltyFromMe(CDetectorSpecification& spec) const { + if (spec.function() != config_t::E_Count || spec.function() == config_t::E_Sum) { return; } using TDoubleVecVec = std::vector; using TSizeSizePrQuantileUMap = CBucketCountStatistics::TSizeSizePrQuantileUMap; using TSizeSizePrQuantileUMapCItr = TSizeSizePrQuantileUMap::const_iterator; - using TSizeSizePrQuantileUMapCPtrVec = std::vector; - using TSizeSizePrMomentsUMapCPtrVec = std::vector; + using TSizeSizePrQuantileUMapCPtrVec = std::vector; + using TSizeSizePrMomentsUMapCPtrVec = std::vector; using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; using TMeanAccumulatorVec = std::vector; - if (const CDataCountStatistics *stats = spec.countStatistics()) - { - const CAutoconfigurerParams::TTimeVec &candidates = this->params().candidateBucketLengths(); + if (const CDataCountStatistics* stats = spec.countStatistics()) { + const CAutoconfigurerParams::TTimeVec& candidates = this->params().candidateBucketLengths(); TSizeSizePrQuantileUMapCPtrVec quantiles; quantiles.reserve(candidates.size()); @@ -111,20 +93,16 @@ void CSparseCountPenalty::penaltyFromMe(CDetectorSpecification &spec) const moments.reserve(candidates.size()); core_t::TTime longest = 0; - for (std::size_t bid = 0u; bid < candidates.size(); ++bid) - { - if (stats->bucketCounts()[bid] > MINIMUM_BUCKETS_TO_TEST) - { + for (std::size_t bid = 0u; bid < candidates.size(); ++bid) { + if (stats->bucketCounts()[bid] > MINIMUM_BUCKETS_TO_TEST) { quantiles.push_back(&(stats->bucketStatistics()[bid].countQuantilesPerPartition())); - moments.push_back( &(stats->bucketStatistics()[bid].countMomentsPerPartition())); + moments.push_back(&(stats->bucketStatistics()[bid].countMomentsPerPartition())); longest = std::max(longest, candidates[bid]); } } - if (quantiles.size() > 3) - { - for (std::size_t iid = 0u; iid < boost::size(IGNORE_EMPTY); ++iid) - { + if (quantiles.size() > 3) { + for (std::size_t iid = 0u; iid < boost::size(IGNORE_EMPTY); ++iid) { std::size_t nb = quantiles.size(); std::size_t nq = 19; TDoubleVecVec xq(nb, TDoubleVec(nq)); @@ -134,48 +112,41 @@ void CSparseCountPenalty::penaltyFromMe(CDetectorSpecification &spec) const TMeanAccumulatorVec penalties_(nb - 1); maths::CQuantileSketch placeholder(maths::CQuantileSketch::E_Linear, 1); - for (TSizeSizePrQuantileUMapCItr q0 = quantiles[0]->begin(); q0 != quantiles[0]->end(); ++q0) - { - const CBucketCountStatistics::TSizeSizePr &partition = q0->first; + for (TSizeSizePrQuantileUMapCItr q0 = quantiles[0]->begin(); q0 != quantiles[0]->end(); ++q0) { + const CBucketCountStatistics::TSizeSizePr& partition = q0->first; uint64_t bc = stats->bucketCounts()[0]; - const maths::CQuantileSketch &qe0 = correctForEmptyBuckets(IGNORE_EMPTY[iid], bc, placeholder, q0->second); - const CBucketCountStatistics::TMoments &m0 = moments[0]->find(partition)->second; + const maths::CQuantileSketch& qe0 = correctForEmptyBuckets(IGNORE_EMPTY[iid], bc, placeholder, q0->second); + const CBucketCountStatistics::TMoments& m0 = moments[0]->find(partition)->second; double me0 = correctForEmptyBuckets(IGNORE_EMPTY[iid], bc, m0); extract(qe0, nq, xq[0]); means[0] = me0; counts[0] = maths::CBasicStatistics::count(m0); bool skip = false; - for (std::size_t bid = 1u; bid < nb; ++bid) - { + for (std::size_t bid = 1u; bid < nb; ++bid) { TSizeSizePrQuantileUMapCItr qi = quantiles[bid]->find(partition); - if (qi == quantiles[bid]->end()) - { + if (qi == quantiles[bid]->end()) { skip = true; break; } bc = stats->bucketCounts()[bid]; - const maths::CQuantileSketch &qei = correctForEmptyBuckets(IGNORE_EMPTY[iid], bc, placeholder, qi->second); - const CBucketCountStatistics::TMoments &mi = moments[bid]->find(partition)->second; + const maths::CQuantileSketch& qei = correctForEmptyBuckets(IGNORE_EMPTY[iid], bc, placeholder, qi->second); + const CBucketCountStatistics::TMoments& mi = moments[bid]->find(partition)->second; double mei = correctForEmptyBuckets(IGNORE_EMPTY[iid], bc, mi); extract(qei, nq, xq[bid]); means[bid] = mei; counts[bid] = maths::CBasicStatistics::count(mi); } - if (skip) - { + if (skip) { continue; } std::fill_n(significances.begin(), nb - 1, 0.0); - for (std::size_t i = 0u; i < 2; ++i) - { - for (std::size_t bid = 0u; bid + 1 < nb; ++bid) - { - significances[bid] = std::max(significances[bid], - maths::CStatisticalTests::twoSampleKS(xq[bid], xq[nb - 1])); + for (std::size_t i = 0u; i < 2; ++i) { + for (std::size_t bid = 0u; bid + 1 < nb; ++bid) { + significances[bid] = std::max(significances[bid], maths::CStatisticalTests::twoSampleKS(xq[bid], xq[nb - 1])); } // If the rate is high w.r.t. the bucket length we expect the mean and variance @@ -186,22 +157,18 @@ void CSparseCountPenalty::penaltyFromMe(CDetectorSpecification &spec) const // of the distribution are scaled appropriately as can be verified from their // definition in terms of the integral of the derivative of the distribution. - for (std::size_t bid = 0u; bid < nb; ++bid) - { - if (longest == candidates[bid]) - { + for (std::size_t bid = 0u; bid < nb; ++bid) { + if (longest == candidates[bid]) { continue; } double scale = static_cast(longest) / static_cast(candidates[bid]); - for (std::size_t j = 0u; j < xq[bid].size(); ++j) - { + for (std::size_t j = 0u; j < xq[bid].size(); ++j) { xq[bid][j] = scale * means[bid] + std::sqrt(scale) * (xq[bid][j] - means[bid]); } } } - for (std::size_t bid = 0u; bid + 1 < nb; ++bid) - { + for (std::size_t bid = 0u; bid + 1 < nb; ++bid) { double pi = std::min(10.0 * significances[bid], 1.0); penalties_[bid].add(std::min(maths::CTools::fastLog(pi), 0.0), counts[bid]); } @@ -214,14 +181,12 @@ void CSparseCountPenalty::penaltyFromMe(CDetectorSpecification &spec) const penalties.reserve(2 * (nb - 1)); descriptions.reserve(2 * (nb - 1)); - for (std::size_t bid = 0u; bid < penalties_.size(); ++bid) - { + for (std::size_t bid = 0u; bid < penalties_.size(); ++bid) { std::size_t index = this->params().penaltyIndexFor(bid, IGNORE_EMPTY[iid]); indices.push_back(index); double penalty = std::exp(maths::CBasicStatistics::mean(penalties_[bid])); std::string description; - if (penalty < 1.0) - { + if (penalty < 1.0) { description = "The bucket length does not properly capture the variation in event rate"; } penalties.push_back(penalty); @@ -233,6 +198,5 @@ void CSparseCountPenalty::penaltyFromMe(CDetectorSpecification &spec) const } } } - } } diff --git a/lib/config/CTooMuchDataPenalty.cc b/lib/config/CTooMuchDataPenalty.cc index 26cf6f4658..3302f36a9f 100644 --- a/lib/config/CTooMuchDataPenalty.cc +++ b/lib/config/CTooMuchDataPenalty.cc @@ -19,106 +19,74 @@ #include #include -namespace ml -{ -namespace config -{ -namespace -{ +namespace ml { +namespace config { +namespace { using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; //! Get the description prefix. -std::string descriptionPrefix(const CDetectorSpecification &spec, - const TMeanAccumulator &meanOccupied, - std::size_t partitions) -{ - if (spec.byField() && spec.partitionField()) - { - return "A significant proportion, " - + CTools::prettyPrint(100.0 * maths::CBasicStatistics::count(meanOccupied) - / static_cast(partitions)) - + "%, of distinct partition and by fields combinations have values in many buckets."; - } - else if (spec.byField()) - { - return "A significant proportion, " - + CTools::prettyPrint(100.0 * maths::CBasicStatistics::count(meanOccupied) - / static_cast(partitions)) - + "%, of distinct by fields have values in many buckets."; - } - else if (spec.partitionField()) - { - return "A significant proportion, " - + CTools::prettyPrint(100.0 * maths::CBasicStatistics::count(meanOccupied) - / static_cast(partitions)) - + "%, of distinct partition fields have values in many buckets."; +std::string descriptionPrefix(const CDetectorSpecification& spec, const TMeanAccumulator& meanOccupied, std::size_t partitions) { + if (spec.byField() && spec.partitionField()) { + return "A significant proportion, " + + CTools::prettyPrint(100.0 * maths::CBasicStatistics::count(meanOccupied) / static_cast(partitions)) + + "%, of distinct partition and by fields combinations have values in many buckets."; + } else if (spec.byField()) { + return "A significant proportion, " + + CTools::prettyPrint(100.0 * maths::CBasicStatistics::count(meanOccupied) / static_cast(partitions)) + + "%, of distinct by fields have values in many buckets."; + } else if (spec.partitionField()) { + return "A significant proportion, " + + CTools::prettyPrint(100.0 * maths::CBasicStatistics::count(meanOccupied) / static_cast(partitions)) + + "%, of distinct partition fields have values in many buckets."; } return ""; } - } -CTooMuchDataPenalty::CTooMuchDataPenalty(const CAutoconfigurerParams ¶ms) : - CPenalty(params) -{} +CTooMuchDataPenalty::CTooMuchDataPenalty(const CAutoconfigurerParams& params) : CPenalty(params) { +} -CTooMuchDataPenalty *CTooMuchDataPenalty::clone() const -{ +CTooMuchDataPenalty* CTooMuchDataPenalty::clone() const { return new CTooMuchDataPenalty(*this); } -std::string CTooMuchDataPenalty::name() const -{ +std::string CTooMuchDataPenalty::name() const { return "too much data"; } -void CTooMuchDataPenalty::penaltyFromMe(CDetectorSpecification &spec) const -{ - if (config_t::hasDoAndDontIgnoreEmptyVersions(spec.function()) && !spec.isPopulation()) - { - if (const CPartitionDataCountStatistics *partitionStats = - dynamic_cast(spec.countStatistics())) - { +void CTooMuchDataPenalty::penaltyFromMe(CDetectorSpecification& spec) const { + if (config_t::hasDoAndDontIgnoreEmptyVersions(spec.function()) && !spec.isPopulation()) { + if (const CPartitionDataCountStatistics* partitionStats = + dynamic_cast(spec.countStatistics())) { this->penaltyFor(*partitionStats, spec); - } - else if (const CByAndPartitionDataCountStatistics *byAndPartitionStats = - dynamic_cast(spec.countStatistics())) - { + } else if (const CByAndPartitionDataCountStatistics* byAndPartitionStats = + dynamic_cast(spec.countStatistics())) { this->penaltyFor(*byAndPartitionStats, spec); - } - else if (const CByOverAndPartitionDataCountStatistics *byOverAndPartitionStats = - dynamic_cast(spec.countStatistics())) - { + } else if (const CByOverAndPartitionDataCountStatistics* byOverAndPartitionStats = + dynamic_cast(spec.countStatistics())) { this->penaltyFor(*byOverAndPartitionStats, spec); } } } -void CTooMuchDataPenalty::penaltyFor(const CPartitionDataCountStatistics &stats, - CDetectorSpecification &spec) const -{ +void CTooMuchDataPenalty::penaltyFor(const CPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const { this->penaltyFor(stats.bucketCounts(), stats.bucketStatistics(), spec); } -void CTooMuchDataPenalty::penaltyFor(const CByAndPartitionDataCountStatistics &stats, - CDetectorSpecification &spec) const -{ +void CTooMuchDataPenalty::penaltyFor(const CByAndPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const { this->penaltyFor(stats.bucketCounts(), stats.bucketStatistics(), spec); } -void CTooMuchDataPenalty::penaltyFor(const CByOverAndPartitionDataCountStatistics &stats, - CDetectorSpecification &spec) const -{ +void CTooMuchDataPenalty::penaltyFor(const CByOverAndPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const { this->penaltyFor(stats.bucketCounts(), stats.bucketStatistics(), spec); } -void CTooMuchDataPenalty::penaltyFor(const TUInt64Vec &bucketCounts, - const TBucketCountStatisticsVec &statistics, - CDetectorSpecification &spec) const -{ +void CTooMuchDataPenalty::penaltyFor(const TUInt64Vec& bucketCounts, + const TBucketCountStatisticsVec& statistics, + CDetectorSpecification& spec) const { using TSizeSizePrMomentsUMapCItr = CBucketCountStatistics::TSizeSizePrMomentsUMap::const_iterator; - const CAutoconfigurerParams::TTimeVec &candidates = this->params().candidateBucketLengths(); + const CAutoconfigurerParams::TTimeVec& candidates = this->params().candidateBucketLengths(); LOG_TRACE("bucket counts = " << core::CContainerPrinter::print(bucketCounts)); @@ -131,53 +99,43 @@ void CTooMuchDataPenalty::penaltyFor(const TUInt64Vec &bucketCounts, config_t::EFunctionCategory function = spec.function(); - for (std::size_t bid = 0u; bid < candidates.size(); ++bid) - { + for (std::size_t bid = 0u; bid < candidates.size(); ++bid) { uint64_t bc = bucketCounts[bid]; - if (bc > 0) - { - const CBucketCountStatistics &si = statistics[bid]; - const CBucketCountStatistics::TSizeSizePrMomentsUMap &mi = si.countMomentsPerPartition(); + if (bc > 0) { + const CBucketCountStatistics& si = statistics[bid]; + const CBucketCountStatistics::TSizeSizePrMomentsUMap& mi = si.countMomentsPerPartition(); TMeanAccumulator penalty_; TMeanAccumulator penalizedOccupancy; - for (TSizeSizePrMomentsUMapCItr j = mi.begin(); j != mi.end(); ++j) - { + for (TSizeSizePrMomentsUMapCItr j = mi.begin(); j != mi.end(); ++j) { double occupied = maths::CBasicStatistics::count(j->second) / static_cast(bc); - double penalty = CTools::logInterpolate( - this->params().highPopulatedBucketFraction(function, true), - this->params().maximumPopulatedBucketFraction(function, true), - 1.0, 1.0 / static_cast(bucketCounts[bid]), occupied); + double penalty = CTools::logInterpolate(this->params().highPopulatedBucketFraction(function, true), + this->params().maximumPopulatedBucketFraction(function, true), + 1.0, + 1.0 / static_cast(bucketCounts[bid]), + occupied); penalty_.add(maths::CTools::fastLog(penalty)); - if (penalty < 1.0) - { + if (penalty < 1.0) { penalizedOccupancy.add(occupied); } } - if (maths::CBasicStatistics::count(penalizedOccupancy) > 0.95 * static_cast(mi.size())) - { + if (maths::CBasicStatistics::count(penalizedOccupancy) > 0.95 * static_cast(mi.size())) { double penalty = std::min(std::exp(maths::CBasicStatistics::mean(penalty_)), 1.0); std::size_t index = this->params().penaltyIndexFor(bid, true); indices.push_back(index); penalties.push_back(penalty); descriptions.push_back(""); - if (penalty < 1.0) - { - if (spec.byField() || spec.partitionField()) - { - descriptions.back() = descriptionPrefix(spec, penalizedOccupancy, mi.size()) - + " On average, " - + CTools::prettyPrint(100.0 * maths::CBasicStatistics::mean(penalizedOccupancy)) - + "% of their buckets have a value"; - } - else - { - descriptions.back() = "A significant proportion, " - + CTools::prettyPrint(100.0 * maths::CBasicStatistics::mean(penalizedOccupancy)) - + "%, of " + CTools::prettyPrint(candidates[bid]) - + " buckets have a value"; + if (penalty < 1.0) { + if (spec.byField() || spec.partitionField()) { + descriptions.back() = descriptionPrefix(spec, penalizedOccupancy, mi.size()) + " On average, " + + CTools::prettyPrint(100.0 * maths::CBasicStatistics::mean(penalizedOccupancy)) + + "% of their buckets have a value"; + } else { + descriptions.back() = "A significant proportion, " + + CTools::prettyPrint(100.0 * maths::CBasicStatistics::mean(penalizedOccupancy)) + "%, of " + + CTools::prettyPrint(candidates[bid]) + " buckets have a value"; } } } @@ -185,6 +143,5 @@ void CTooMuchDataPenalty::penaltyFor(const TUInt64Vec &bucketCounts, } spec.applyPenalties(indices, penalties, descriptions); } - } } diff --git a/lib/config/CTools.cc b/lib/config/CTools.cc index 671fe7b874..756d64ba39 100644 --- a/lib/config/CTools.cc +++ b/lib/config/CTools.cc @@ -16,122 +16,80 @@ #include #include -namespace ml -{ -namespace config -{ -namespace -{ +namespace ml { +namespace config { +namespace { const core::CHashing::CMurmurHash2String HASHER; const uint64_t LOWER_BITS = 0xffffffff; const uint64_t UPPER_BITS = LOWER_BITS << 32; } -uint32_t CTools::category32(const std::string &value) -{ +uint32_t CTools::category32(const std::string& value) { return category32(HASHER(value)); } -uint32_t CTools::category32(std::size_t category64) -{ +uint32_t CTools::category32(std::size_t category64) { return static_cast(((category64 & UPPER_BITS) >> 32) ^ (category64 & LOWER_BITS)); } -std::size_t CTools::category64(const std::string &value) -{ +std::size_t CTools::category64(const std::string& value) { return HASHER(value); } -double CTools::interpolate(double a, double b, double pa, double pb, double x) -{ - return maths::CTools::truncate(pa + (pb - pa) * (x - a) / (b - a), - std::min(pa, pb), std::max(pa, pb)); +double CTools::interpolate(double a, double b, double pa, double pb, double x) { + return maths::CTools::truncate(pa + (pb - pa) * (x - a) / (b - a), std::min(pa, pb), std::max(pa, pb)); } -double CTools::powInterpolate(double p, double a, double b, double pa, double pb, double x) -{ - return maths::CTools::truncate(pa + (pb - pa) * std::pow((x - a) / (b - a), p), - std::min(pa, pb), std::max(pa, pb)); +double CTools::powInterpolate(double p, double a, double b, double pa, double pb, double x) { + return maths::CTools::truncate(pa + (pb - pa) * std::pow((x - a) / (b - a), p), std::min(pa, pb), std::max(pa, pb)); } -double CTools::logInterpolate(double a, double b, double pa, double pb, double x) -{ +double CTools::logInterpolate(double a, double b, double pa, double pb, double x) { double la = maths::CTools::fastLog(a); double lb = maths::CTools::fastLog(b); double lx = maths::CTools::fastLog(x); - return maths::CTools::truncate(pa + (pb - pa) * (lx - la) / (lb - la), - std::min(pa, pb), std::max(pa, pb)); + return maths::CTools::truncate(pa + (pb - pa) * (lx - la) / (lb - la), std::min(pa, pb), std::max(pa, pb)); } -std::string CTools::prettyPrint(double d) -{ +std::string CTools::prettyPrint(double d) { char buf[20]; ::memset(buf, 0, sizeof(buf)); - if (std::fabs(d) <= 1e-3) - { + if (std::fabs(d) <= 1e-3) { std::sprintf(buf, "%.2e", d); - } - else if (std::fabs(d) < 0.1) - { + } else if (std::fabs(d) < 0.1) { std::sprintf(buf, "%.3f", d); - } - else if (std::fabs(d) < 1.0) - { + } else if (std::fabs(d) < 1.0) { std::sprintf(buf, "%.2f", d); - } - else if (std::fabs(d) < 1e2) - { + } else if (std::fabs(d) < 1e2) { std::sprintf(buf, "%.1f", d); - } - else if (std::fabs(d) < 1e5) - { + } else if (std::fabs(d) < 1e5) { std::sprintf(buf, "%.0f", d); - } - else if (std::fabs(d) < 1e13) - { + } else if (std::fabs(d) < 1e13) { std::sprintf(buf, "%.0f", d); - char *end = std::find(buf, buf + 20, '\0'); - for (char *pos = end; - pos - buf > 3 && std::isdigit(static_cast(pos[-4])); - pos -= 3, ++end) - { + char* end = std::find(buf, buf + 20, '\0'); + for (char *pos = end; pos - buf > 3 && std::isdigit(static_cast(pos[-4])); pos -= 3, ++end) { std::copy_backward(pos - 3, end, end + 1); pos[-3] = ','; } - } - else - { + } else { std::sprintf(buf, "%.2e", d); } return buf; } -std::string CTools::prettyPrint(core_t::TTime time) -{ - static const char *SUFFIXES[] = { " week", " day", " hr", " min", " sec" }; +std::string CTools::prettyPrint(core_t::TTime time) { + static const char* SUFFIXES[] = {" week", " day", " hr", " min", " sec"}; std::string result; - core_t::TTime intervals[] = - { - (time / 604800) , - (time / 86400) % 7 , - (time / 3600) % 24, - (time / 60) % 60, - time % 60 - }; - for (std::size_t i = 0u; i < boost::size(intervals); ++i) - { - if (intervals[i] != 0) - { - result += (result.empty() ? "" : " ") - + core::CStringUtils::typeToString(intervals[i]) - + SUFFIXES[i]; + core_t::TTime intervals[] = {(time / 604800), (time / 86400) % 7, (time / 3600) % 24, (time / 60) % 60, time % 60}; + for (std::size_t i = 0u; i < boost::size(intervals); ++i) { + if (intervals[i] != 0) { + result += (result.empty() ? "" : " ") + core::CStringUtils::typeToString(intervals[i]) + SUFFIXES[i]; } } return result; } - } } diff --git a/lib/config/ConfigTypes.cc b/lib/config/ConfigTypes.cc index 82996debf4..d038620a8c 100644 --- a/lib/config/ConfigTypes.cc +++ b/lib/config/ConfigTypes.cc @@ -8,75 +8,48 @@ #include -namespace ml -{ -namespace config_t -{ -namespace -{ +namespace ml { +namespace config_t { +namespace { -const std::string USER_DATA_TYPE_NAMES[] = - { - std::string("categorical"), - std::string("numeric") - }; +const std::string USER_DATA_TYPE_NAMES[] = {std::string("categorical"), std::string("numeric")}; -const std::string DATA_TYPE_NAMES[] = - { - std::string(""), - std::string("binary"), - std::string("categorical"), - std::string("positive integer"), - std::string("integer"), - std::string("positive real"), - std::string("real") - }; +const std::string DATA_TYPE_NAMES[] = {std::string(""), + std::string("binary"), + std::string("categorical"), + std::string("positive integer"), + std::string("integer"), + std::string("positive real"), + std::string("real")}; -const std::string FUNCTION_CATEGORY_NAMES[] = - { - std::string("count"), - std::string("rare"), - std::string("distinct_count"), - std::string("info_content"), - std::string("mean"), - std::string("min"), - std::string("max"), - std::string("sum"), - std::string("varp"), - std::string("median") - }; +const std::string FUNCTION_CATEGORY_NAMES[] = {std::string("count"), + std::string("rare"), + std::string("distinct_count"), + std::string("info_content"), + std::string("mean"), + std::string("min"), + std::string("max"), + std::string("sum"), + std::string("varp"), + std::string("median")}; -const std::string IGNORE_EMPTY_VERSION_NAMES[][2] = - { - { std::string("n/a"), std::string("n/a") }, - { std::string("count"), std::string("non_zero_count") }, - { std::string("sum"), std::string("non_null_sum") } - }; - -const std::string SIDE_NAME[] = - { - std::string("high"), - std::string("low"), - std::string("both"), - std::string("") - }; +const std::string IGNORE_EMPTY_VERSION_NAMES[][2] = {{std::string("n/a"), std::string("n/a")}, + {std::string("count"), std::string("non_zero_count")}, + {std::string("sum"), std::string("non_null_sum")}}; +const std::string SIDE_NAME[] = {std::string("high"), std::string("low"), std::string("both"), std::string("")}; } -const std::string &print(EUserDataType type) -{ +const std::string& print(EUserDataType type) { return USER_DATA_TYPE_NAMES[type]; } -std::ostream &operator<<(std::ostream &o, EUserDataType type) -{ +std::ostream& operator<<(std::ostream& o, EUserDataType type) { return o << USER_DATA_TYPE_NAMES[type]; } -bool isCategorical(EDataType type) -{ - switch (type) - { +bool isCategorical(EDataType type) { + switch (type) { case E_Binary: case E_Categorical: return true; @@ -90,10 +63,8 @@ bool isCategorical(EDataType type) return false; } -bool isNumeric(EDataType type) -{ - switch (type) - { +bool isNumeric(EDataType type) { + switch (type) { case E_PositiveInteger: case E_Integer: case E_PositiveReal: @@ -107,10 +78,8 @@ bool isNumeric(EDataType type) return false; } -bool isInteger(EDataType type) -{ - switch (type) - { +bool isInteger(EDataType type) { + switch (type) { case E_PositiveInteger: case E_Integer: return true; @@ -124,20 +93,16 @@ bool isInteger(EDataType type) return false; } -const std::string &print(EDataType type) -{ +const std::string& print(EDataType type) { return DATA_TYPE_NAMES[type]; } -std::ostream &operator<<(std::ostream &o, EDataType type) -{ +std::ostream& operator<<(std::ostream& o, EDataType type) { return o << DATA_TYPE_NAMES[type]; } -bool hasArgument(EFunctionCategory function) -{ - switch (function) - { +bool hasArgument(EFunctionCategory function) { + switch (function) { case E_Count: case E_Rare: return false; @@ -154,10 +119,8 @@ bool hasArgument(EFunctionCategory function) return true; } -bool isCount(EFunctionCategory function) -{ - switch (function) - { +bool isCount(EFunctionCategory function) { + switch (function) { case E_Rare: case E_DistinctCount: case E_InfoContent: @@ -174,10 +137,8 @@ bool isCount(EFunctionCategory function) return true; } -bool isRare(EFunctionCategory function) -{ - switch (function) - { +bool isRare(EFunctionCategory function) { + switch (function) { case E_Count: case E_DistinctCount: case E_InfoContent: @@ -194,10 +155,8 @@ bool isRare(EFunctionCategory function) return true; } -bool isInfoContent(EFunctionCategory function) -{ - switch (function) - { +bool isInfoContent(EFunctionCategory function) { + switch (function) { case E_Count: case E_Rare: case E_DistinctCount: @@ -214,10 +173,8 @@ bool isInfoContent(EFunctionCategory function) return true; } -bool isMetric(EFunctionCategory function) -{ - switch (function) - { +bool isMetric(EFunctionCategory function) { + switch (function) { case E_Count: case E_Rare: case E_DistinctCount: @@ -234,10 +191,8 @@ bool isMetric(EFunctionCategory function) return true; } -bool hasSidedCalculation(EFunctionCategory function) -{ - switch (function) - { +bool hasSidedCalculation(EFunctionCategory function) { + switch (function) { case E_Rare: return false; case E_Count: @@ -254,10 +209,8 @@ bool hasSidedCalculation(EFunctionCategory function) return true; } -bool hasDoAndDontIgnoreEmptyVersions(EFunctionCategory function) -{ - switch (function) - { +bool hasDoAndDontIgnoreEmptyVersions(EFunctionCategory function) { + switch (function) { case E_Rare: case E_DistinctCount: case E_InfoContent: @@ -274,17 +227,15 @@ bool hasDoAndDontIgnoreEmptyVersions(EFunctionCategory function) return true; } -const std::string &ignoreEmptyVersionName(EFunctionCategory function, - bool ignoreEmpty, - bool isPopulation) -{ +const std::string& ignoreEmptyVersionName(EFunctionCategory function, bool ignoreEmpty, bool isPopulation) { std::size_t index = 0u; - switch (function) - { + switch (function) { case E_Count: - index = 1u; break; + index = 1u; + break; case E_Sum: - index = 2u; break; + index = 2u; + break; case E_Rare: case E_DistinctCount: case E_InfoContent: @@ -298,25 +249,20 @@ const std::string &ignoreEmptyVersionName(EFunctionCategory function, return IGNORE_EMPTY_VERSION_NAMES[index][ignoreEmpty && !isPopulation]; } -const std::string &print(EFunctionCategory function) -{ +const std::string& print(EFunctionCategory function) { return FUNCTION_CATEGORY_NAMES[function]; } -std::ostream &operator<<(std::ostream &o, EFunctionCategory function) -{ +std::ostream& operator<<(std::ostream& o, EFunctionCategory function) { return o << FUNCTION_CATEGORY_NAMES[function]; } -const std::string &print(ESide side) -{ +const std::string& print(ESide side) { return SIDE_NAME[side]; } -std::ostream &operator<<(std::ostream &o, ESide side) -{ +std::ostream& operator<<(std::ostream& o, ESide side) { return o << SIDE_NAME[side]; } - } } diff --git a/lib/config/Constants.cc b/lib/config/Constants.cc index 3caa8f1843..75ef61946f 100644 --- a/lib/config/Constants.cc +++ b/lib/config/Constants.cc @@ -6,44 +6,20 @@ #include -namespace ml -{ -namespace config -{ -namespace constants -{ -namespace -{ - -const std::string FIELD_NAME[] = - { - std::string("argument"), - std::string("by"), - std::string("over"), - std::string("partition") - }; +namespace ml { +namespace config { +namespace constants { +namespace { +const std::string FIELD_NAME[] = {std::string("argument"), std::string("by"), std::string("over"), std::string("partition")}; } -const std::size_t CFieldIndices::PARTITIONING[] = - { - BY_INDEX, - OVER_INDEX, - PARTITION_INDEX - }; -const std::size_t CFieldIndices::ALL[] = - { - ARGUMENT_INDEX, - BY_INDEX, - OVER_INDEX, - PARTITION_INDEX - }; +const std::size_t CFieldIndices::PARTITIONING[] = {BY_INDEX, OVER_INDEX, PARTITION_INDEX}; +const std::size_t CFieldIndices::ALL[] = {ARGUMENT_INDEX, BY_INDEX, OVER_INDEX, PARTITION_INDEX}; -const std::string &name(std::size_t index) -{ +const std::string& name(std::size_t index) { return FIELD_NAME[index]; } - } } } diff --git a/lib/config/unittest/CAutoconfigurerParamsTest.cc b/lib/config/unittest/CAutoconfigurerParamsTest.cc index 4edc14791a..d7ebe1033c 100644 --- a/lib/config/unittest/CAutoconfigurerParamsTest.cc +++ b/lib/config/unittest/CAutoconfigurerParamsTest.cc @@ -12,8 +12,7 @@ using namespace ml; -void CAutoconfigurerParamsTest::testDefaults() -{ +void CAutoconfigurerParamsTest::testDefaults() { LOG_DEBUG(""); LOG_DEBUG("+-------------------------------------------+"); LOG_DEBUG("| CAutoconfigurerParamsTest::testDefaults |"); @@ -21,55 +20,55 @@ void CAutoconfigurerParamsTest::testDefaults() config::CAutoconfigurerParams params("time", "", false, false); std::string actual = params.print(); - std::string expected = " TimeFieldName = time\n" - " TimeFieldFormat = \n" - " FieldsOfInterest = \"null\"\n" - " FieldsToUseInAutoconfigureByRole[constants::ARGUMENT_INDEX] = \"null\"\n" - " FieldsToUseInAutoconfigureByRole[constants::BY_INDEX] = \"null\"\n" - " FieldsToUseInAutoconfigureByRole[constants::OVER_INDEX] = \"null\"\n" - " FieldsToUseInAutoconfigureByRole[constants::PARTITION_INDEX] = \"null\"\n" - " FunctionCategoriesToConfigure = [count, rare, distinct_count, info_content, mean, min, max, sum, varp, median]\n" - " FieldDataType = []\n" - " MinimumExamplesToClassify = 1000\n" - " NumberOfMostFrequentFieldsCounts = 10\n" - " MinimumRecordsToAttemptConfig = 10000\n" - " HighNumberByFieldValues = 500\n" - " MaximumNumberByFieldValues = 1000\n" - " HighNumberRareByFieldValues = 50000\n" - " MaximumNumberRareByFieldValues = 500000\n" - " HighNumberPartitionFieldValues = 500000\n" - " MaximumNumberPartitionFieldValues = 5000000\n" - " LowNumberOverFieldValues = 50\n" - " MinimumNumberOverFieldValues = 5\n" - " HighCardinalityInTailFactor = 1.100000\n" - " HighCardinalityInTailIncrement = 10\n" - " HighCardinalityHighTailFraction = 0.005000\n" - " HighCardinalityMaximumTailFraction = 0.050000\n" - " LowPopulatedBucketFractions = [0.3333333, 0.02]\n" - " MinimumPopulatedBucketFractions = [0.02, 0.002]\n" - " HighPopulatedBucketFractions[1] = 0.100000\n" - " MaximumPopulatedBucketFractions[1] = 0.500000\n" - " CandidateBucketLengths = [60, 300, 600, 1800, 3600, 7200, 14400, 86400]\n" - " LowNumberOfBucketsForConfig = 500.000000\n" - " MinimumNumberOfBucketsForConfig = 50.000000\n" - " PolledDataMinimumMassAtInterval = 0.990000\n" - " PolledDataJitter = 0.010000\n" - " LowCoefficientOfVariation = 0.001000\n" - " MinimumCoefficientOfVariation = 0.000001\n" - " LowLengthRangeForInfoContent = 10.000000\n" - " MinimumLengthRangeForInfoContent = 1.000000\n" - " LowMaximumLengthForInfoContent = 25.000000\n" - " MinimumMaximumLengthForInfoContent = 5.000000\n" - " LowEntropyForInfoContent = 0.010000\n" - " MinimumEntropyForInfoContent = 0.000001\n" - " LowDistinctCountForInfoContent = 500000.000000\n" - " MinimumDistinctCountForInfoContent = 5000.000000\n"; + std::string expected = + " TimeFieldName = time\n" + " TimeFieldFormat = \n" + " FieldsOfInterest = \"null\"\n" + " FieldsToUseInAutoconfigureByRole[constants::ARGUMENT_INDEX] = \"null\"\n" + " FieldsToUseInAutoconfigureByRole[constants::BY_INDEX] = \"null\"\n" + " FieldsToUseInAutoconfigureByRole[constants::OVER_INDEX] = \"null\"\n" + " FieldsToUseInAutoconfigureByRole[constants::PARTITION_INDEX] = \"null\"\n" + " FunctionCategoriesToConfigure = [count, rare, distinct_count, info_content, mean, min, max, sum, varp, median]\n" + " FieldDataType = []\n" + " MinimumExamplesToClassify = 1000\n" + " NumberOfMostFrequentFieldsCounts = 10\n" + " MinimumRecordsToAttemptConfig = 10000\n" + " HighNumberByFieldValues = 500\n" + " MaximumNumberByFieldValues = 1000\n" + " HighNumberRareByFieldValues = 50000\n" + " MaximumNumberRareByFieldValues = 500000\n" + " HighNumberPartitionFieldValues = 500000\n" + " MaximumNumberPartitionFieldValues = 5000000\n" + " LowNumberOverFieldValues = 50\n" + " MinimumNumberOverFieldValues = 5\n" + " HighCardinalityInTailFactor = 1.100000\n" + " HighCardinalityInTailIncrement = 10\n" + " HighCardinalityHighTailFraction = 0.005000\n" + " HighCardinalityMaximumTailFraction = 0.050000\n" + " LowPopulatedBucketFractions = [0.3333333, 0.02]\n" + " MinimumPopulatedBucketFractions = [0.02, 0.002]\n" + " HighPopulatedBucketFractions[1] = 0.100000\n" + " MaximumPopulatedBucketFractions[1] = 0.500000\n" + " CandidateBucketLengths = [60, 300, 600, 1800, 3600, 7200, 14400, 86400]\n" + " LowNumberOfBucketsForConfig = 500.000000\n" + " MinimumNumberOfBucketsForConfig = 50.000000\n" + " PolledDataMinimumMassAtInterval = 0.990000\n" + " PolledDataJitter = 0.010000\n" + " LowCoefficientOfVariation = 0.001000\n" + " MinimumCoefficientOfVariation = 0.000001\n" + " LowLengthRangeForInfoContent = 10.000000\n" + " MinimumLengthRangeForInfoContent = 1.000000\n" + " LowMaximumLengthForInfoContent = 25.000000\n" + " MinimumMaximumLengthForInfoContent = 5.000000\n" + " LowEntropyForInfoContent = 0.010000\n" + " MinimumEntropyForInfoContent = 0.000001\n" + " LowDistinctCountForInfoContent = 500000.000000\n" + " MinimumDistinctCountForInfoContent = 5000.000000\n"; LOG_DEBUG("parameters =\n" << actual); CPPUNIT_ASSERT_EQUAL(expected, actual); } -void CAutoconfigurerParamsTest::testInit() -{ +void CAutoconfigurerParamsTest::testInit() { LOG_DEBUG(""); LOG_DEBUG("+---------------------------------------+"); LOG_DEBUG("| CAutoconfigurerParamsTest::testInit |"); @@ -131,16 +130,13 @@ void CAutoconfigurerParamsTest::testInit() CPPUNIT_ASSERT_EQUAL(expected, actual); } -CppUnit::Test *CAutoconfigurerParamsTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CAutoconfigurerParamsTest"); +CppUnit::Test* CAutoconfigurerParamsTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CAutoconfigurerParamsTest"); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CAutoconfigurerParamsTest::testDefaults", - &CAutoconfigurerParamsTest::testDefaults) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CAutoconfigurerParamsTest::testInit", - &CAutoconfigurerParamsTest::testInit) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CAutoconfigurerParamsTest::testDefaults", + &CAutoconfigurerParamsTest::testDefaults)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CAutoconfigurerParamsTest::testInit", &CAutoconfigurerParamsTest::testInit)); return suiteOfTests; } diff --git a/lib/config/unittest/CAutoconfigurerParamsTest.h b/lib/config/unittest/CAutoconfigurerParamsTest.h index 7bdf278990..90ef90d247 100644 --- a/lib/config/unittest/CAutoconfigurerParamsTest.h +++ b/lib/config/unittest/CAutoconfigurerParamsTest.h @@ -9,13 +9,12 @@ #include -class CAutoconfigurerParamsTest : public CppUnit::TestFixture -{ - public: - void testDefaults(); - void testInit(); +class CAutoconfigurerParamsTest : public CppUnit::TestFixture { +public: + void testDefaults(); + void testInit(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CAutoconfigurerParamsTest_h diff --git a/lib/config/unittest/CDataSemanticsTest.cc b/lib/config/unittest/CDataSemanticsTest.cc index e3d05adb71..a198ca661c 100644 --- a/lib/config/unittest/CDataSemanticsTest.cc +++ b/lib/config/unittest/CDataSemanticsTest.cc @@ -22,8 +22,7 @@ using TDoubleVec = std::vector; using TSizeVec = std::vector; using TStrVec = std::vector; -void CDataSemanticsTest::testBinary() -{ +void CDataSemanticsTest::testBinary() { LOG_DEBUG(""); LOG_DEBUG("+----------------------------------+"); LOG_DEBUG("| CDataSemanticsTest::testBinary |"); @@ -31,23 +30,17 @@ void CDataSemanticsTest::testBinary() // Try a numeric and non-numeric example of a binary variable. - std::string categories[][2] = - { - {"false", "true"}, - {"0", "1"} - }; + std::string categories[][2] = {{"false", "true"}, {"0", "1"}}; test::CRandomNumbers rng; TSizeVec v; rng.generateUniformSamples(0, 2, 100, v); - for (std::size_t i = 0u; i < boost::size(categories); ++i) - { + for (std::size_t i = 0u; i < boost::size(categories); ++i) { config::CDataSemantics semantics; CPPUNIT_ASSERT_EQUAL(config_t::E_UndeterminedType, semantics.type()); - for (std::size_t j = 0u; j < v.size(); ++j) - { + for (std::size_t j = 0u; j < v.size(); ++j) { semantics.add(categories[i][v[j]]); } semantics.computeType(); @@ -55,8 +48,7 @@ void CDataSemanticsTest::testBinary() } } -void CDataSemanticsTest::testNonNumericCategorical() -{ +void CDataSemanticsTest::testNonNumericCategorical() { LOG_DEBUG(""); LOG_DEBUG("+-------------------------------------------------+"); LOG_DEBUG("| CDataSemanticsTest::testNonNumericCategorical |"); @@ -75,19 +67,16 @@ void CDataSemanticsTest::testNonNumericCategorical() config::CDataSemantics semantics; - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { semantics.add(categories[samples[i]]); - if (i > 10) - { + if (i > 10) { semantics.computeType(); CPPUNIT_ASSERT_EQUAL(config_t::E_Categorical, semantics.type()); } } } -void CDataSemanticsTest::testNumericCategorical() -{ +void CDataSemanticsTest::testNumericCategorical() { LOG_DEBUG(""); LOG_DEBUG("+----------------------------------------------+"); LOG_DEBUG("| CDataSemanticsTest::testNumericCategorical |"); @@ -96,27 +85,18 @@ void CDataSemanticsTest::testNumericCategorical() // Test plausible http status code distribution is correctly // identified as categorical. - double codes[] = - { - 200, 201, 202, 303, 400, 403, 404, 500, 501, 503, 506, 598, 599 - }; - double frequencies[] = - { - 0.7715, 0.03, 0.05, 0.001, 0.005, 0.041, 0.061, 0.002, 0.0005, 0.021, 0.001, 0.002, 0.014 - }; + double codes[] = {200, 201, 202, 303, 400, 403, 404, 500, 501, 503, 506, 598, 599}; + double frequencies[] = {0.7715, 0.03, 0.05, 0.001, 0.005, 0.041, 0.061, 0.002, 0.0005, 0.021, 0.001, 0.002, 0.014}; test::CRandomNumbers rng; TDoubleVec status; - rng.generateMultinomialSamples(TDoubleVec(boost::begin(codes), boost::end(codes)), - TDoubleVec(boost::begin(frequencies), boost::end(frequencies)), - 5000, - status); + rng.generateMultinomialSamples( + TDoubleVec(boost::begin(codes), boost::end(codes)), TDoubleVec(boost::begin(frequencies), boost::end(frequencies)), 5000, status); config::CDataSemantics semantics; - for (std::size_t i = 0u; i < status.size(); ++i) - { + for (std::size_t i = 0u; i < status.size(); ++i) { semantics.add(core::CStringUtils::typeToString(static_cast(status[i]))); } semantics.computeType(); @@ -124,8 +104,7 @@ void CDataSemanticsTest::testNumericCategorical() CPPUNIT_ASSERT_EQUAL(config_t::E_Categorical, semantics.type()); } -void CDataSemanticsTest::testInteger() -{ +void CDataSemanticsTest::testInteger() { LOG_DEBUG(""); LOG_DEBUG("+-----------------------------------+"); LOG_DEBUG("| CDataSemanticsTest::testInteger |"); @@ -142,10 +121,8 @@ void CDataSemanticsTest::testInteger() rng.generateUniformSamples(0.0, 25.0, 500, samples); config::CDataSemantics semantics; - for (std::size_t i = 0u; i < samples.size(); ++i) - { - if (i % 10 == 0) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { + if (i % 10 == 0) { LOG_DEBUG(" adding " << static_cast(samples[i])); } semantics.add(core::CStringUtils::typeToString(static_cast(samples[i]))); @@ -166,15 +143,12 @@ void CDataSemanticsTest::testInteger() rng.generateNormalSamples(-10.0, 100.0, 500, samples); config::CDataSemantics semantics; - for (std::size_t i = 0u; i < samples.size(); ++i) - { - if (i % 10 == 0) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { + if (i % 10 == 0) { LOG_DEBUG(" adding " << static_cast(samples[i])); } semantics.add(core::CStringUtils::typeToString(static_cast(samples[i]))); - if ((i + 1) % 100 == 0) - { + if ((i + 1) % 100 == 0) { semantics.computeType(); LOG_DEBUG(" type = " << semantics.type()); CPPUNIT_ASSERT_EQUAL(config_t::E_Integer, semantics.type()); @@ -187,15 +161,12 @@ void CDataSemanticsTest::testInteger() rng.generateLogNormalSamples(0.1, 2.0, 500, samples); config::CDataSemantics semantics; - for (std::size_t i = 0u; i < samples.size(); ++i) - { - if (i % 10 == 0) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { + if (i % 10 == 0) { LOG_DEBUG(" adding " << static_cast(samples[i])); } semantics.add(core::CStringUtils::typeToString(static_cast(samples[i]))); - if ((i + 1) % 100 == 0) - { + if ((i + 1) % 100 == 0) { semantics.computeType(); LOG_DEBUG(" type = " << semantics.type()); CPPUNIT_ASSERT_EQUAL(config_t::E_PositiveInteger, semantics.type()); @@ -215,19 +186,15 @@ void CDataSemanticsTest::testInteger() rng.random_shuffle(samples.begin(), samples.end()); config::CDataSemantics semantics; - for (std::size_t i = 0u; i < samples.size(); ++i) - { - if (i % 2 == 0) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { + if (i % 2 == 0) { LOG_DEBUG(" adding " << static_cast(samples[i])); } semantics.add(core::CStringUtils::typeToString(static_cast(samples[i]))); - if ((i + 1) % 10 == 0) - { + if ((i + 1) % 10 == 0) { semantics.computeType(); LOG_DEBUG(" type = " << semantics.type()); - if (i > 30) - { + if (i > 30) { CPPUNIT_ASSERT_EQUAL(config_t::E_Integer, semantics.type()); } } @@ -235,8 +202,7 @@ void CDataSemanticsTest::testInteger() } } -void CDataSemanticsTest::testReal() -{ +void CDataSemanticsTest::testReal() { LOG_DEBUG(""); LOG_DEBUG("+--------------------------------+"); LOG_DEBUG("| CDataSemanticsTest::testReal |"); @@ -253,15 +219,12 @@ void CDataSemanticsTest::testReal() rng.generateUniformSamples(0.0, 10.0, 500, samples); config::CDataSemantics semantics; - for (std::size_t i = 0u; i < samples.size(); ++i) - { - if (i % 10 == 0) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { + if (i % 10 == 0) { LOG_DEBUG(" adding " << samples[i]); } semantics.add(core::CStringUtils::typeToString(samples[i])); - if ((i + 1) % 50 == 0) - { + if ((i + 1) % 50 == 0) { semantics.computeType(); LOG_DEBUG(" type = " << semantics.type()); CPPUNIT_ASSERT_EQUAL(config_t::E_PositiveReal, semantics.type()); @@ -274,15 +237,12 @@ void CDataSemanticsTest::testReal() rng.generateNormalSamples(-10.0, 100.0, 500, samples); config::CDataSemantics semantics; - for (std::size_t i = 0u; i < samples.size(); ++i) - { - if (i % 10 == 0) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { + if (i % 10 == 0) { LOG_DEBUG(" adding " << samples[i]); } semantics.add(core::CStringUtils::typeToString(samples[i])); - if ((i + 1) % 50 == 0) - { + if ((i + 1) % 50 == 0) { semantics.computeType(); LOG_DEBUG(" type = " << semantics.type()); CPPUNIT_ASSERT_EQUAL(config_t::E_Real, semantics.type()); @@ -295,15 +255,12 @@ void CDataSemanticsTest::testReal() rng.generateLogNormalSamples(0.1, 1.5, 500, samples); config::CDataSemantics semantics; - for (std::size_t i = 0u; i < samples.size(); ++i) - { - if (i % 10 == 0) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { + if (i % 10 == 0) { LOG_DEBUG(" adding " << samples[i]); } semantics.add(core::CStringUtils::typeToString(samples[i])); - if ((i + 1) % 50 == 0) - { + if ((i + 1) % 50 == 0) { semantics.computeType(); LOG_DEBUG(" type = " << semantics.type()); CPPUNIT_ASSERT_EQUAL(config_t::E_PositiveReal, semantics.type()); @@ -323,16 +280,13 @@ void CDataSemanticsTest::testReal() rng.random_shuffle(samples.begin(), samples.end()); config::CDataSemantics semantics; - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { LOG_DEBUG(" adding " << samples[i]); semantics.add(core::CStringUtils::typeToString(samples[i])); - if ((i + 1) % 5 == 0) - { + if ((i + 1) % 5 == 0) { semantics.computeType(); LOG_DEBUG(" type = " << semantics.type()); - if (i > 25) - { + if (i > 25) { CPPUNIT_ASSERT_EQUAL(config_t::E_Real, semantics.type()); } } @@ -340,25 +294,16 @@ void CDataSemanticsTest::testReal() } } -CppUnit::Test *CDataSemanticsTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CDataSemanticsTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CDataSemanticsTest::testBinary", - &CDataSemanticsTest::testBinary) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CDataSemanticsTest::testNonNumericCategorical", - &CDataSemanticsTest::testNonNumericCategorical) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CDataSemanticsTest::testNumericCategorical", - &CDataSemanticsTest::testNumericCategorical) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CDataSemanticsTest::testInteger", - &CDataSemanticsTest::testInteger) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CDataSemanticsTest::testReal", - &CDataSemanticsTest::testReal) ); +CppUnit::Test* CDataSemanticsTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CDataSemanticsTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CDataSemanticsTest::testBinary", &CDataSemanticsTest::testBinary)); + suiteOfTests->addTest(new CppUnit::TestCaller("CDataSemanticsTest::testNonNumericCategorical", + &CDataSemanticsTest::testNonNumericCategorical)); + suiteOfTests->addTest(new CppUnit::TestCaller("CDataSemanticsTest::testNumericCategorical", + &CDataSemanticsTest::testNumericCategorical)); + suiteOfTests->addTest(new CppUnit::TestCaller("CDataSemanticsTest::testInteger", &CDataSemanticsTest::testInteger)); + suiteOfTests->addTest(new CppUnit::TestCaller("CDataSemanticsTest::testReal", &CDataSemanticsTest::testReal)); return suiteOfTests; } diff --git a/lib/config/unittest/CDataSemanticsTest.h b/lib/config/unittest/CDataSemanticsTest.h index dde597af37..9ab0650ce8 100644 --- a/lib/config/unittest/CDataSemanticsTest.h +++ b/lib/config/unittest/CDataSemanticsTest.h @@ -9,16 +9,15 @@ #include -class CDataSemanticsTest : public CppUnit::TestFixture -{ - public: - void testBinary(); - void testNonNumericCategorical(); - void testNumericCategorical(); - void testInteger(); - void testReal(); +class CDataSemanticsTest : public CppUnit::TestFixture { +public: + void testBinary(); + void testNonNumericCategorical(); + void testNumericCategorical(); + void testInteger(); + void testReal(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CDataSemanticsTest_h diff --git a/lib/config/unittest/CDataSummaryStatisticsTest.cc b/lib/config/unittest/CDataSummaryStatisticsTest.cc index b394eb082f..93a146dd43 100644 --- a/lib/config/unittest/CDataSummaryStatisticsTest.cc +++ b/lib/config/unittest/CDataSummaryStatisticsTest.cc @@ -31,8 +31,7 @@ using TSizeVec = std::vector; using TStrVec = std::vector; using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; -void CDataSummaryStatisticsTest::testRate() -{ +void CDataSummaryStatisticsTest::testRate() { LOG_DEBUG(""); LOG_DEBUG("+----------------------------------------+"); LOG_DEBUG("| CDataSummaryStatisticsTest::testRate |"); @@ -40,31 +39,27 @@ void CDataSummaryStatisticsTest::testRate() // Test we correctly estimate a range of rates. - double rate[] = { 10.0, 100.0, 500.0 }; + double rate[] = {10.0, 100.0, 500.0}; double n = 100000.0; test::CRandomNumbers rng; - for (std::size_t i = 0u; i < boost::size(rate); ++i) - { + for (std::size_t i = 0u; i < boost::size(rate); ++i) { TDoubleVec times; rng.generateUniformSamples(0.0, n / rate[i], static_cast(n), times); config::CDataSummaryStatistics summary; - for (std::size_t j = 0u; j < times.size(); ++j) - { + for (std::size_t j = 0u; j < times.size(); ++j) { summary.add(static_cast(times[j])); } - LOG_DEBUG("earliest = " << summary.earliest() - << ", latest = " << summary.latest()); + LOG_DEBUG("earliest = " << summary.earliest() << ", latest = " << summary.latest()); LOG_DEBUG("rate = " << summary.meanRate()); CPPUNIT_ASSERT_DOUBLES_EQUAL(rate[i], summary.meanRate(), 2.0 * rate[i] * rate[i] / n); } } -void CDataSummaryStatisticsTest::testCategoricalDistinctCount() -{ +void CDataSummaryStatisticsTest::testCategoricalDistinctCount() { LOG_DEBUG(""); LOG_DEBUG("+------------------------------------------------------------+"); LOG_DEBUG("| CDataSummaryStatisticsTest::testCategoricalDistinctCount |"); @@ -77,20 +72,17 @@ void CDataSummaryStatisticsTest::testCategoricalDistinctCount() LOG_DEBUG("*** Exact ***"); test::CRandomNumbers rng; - std::size_t n[] = { 10, 100, 1000 }; - for (std::size_t i = 0u; i < boost::size(n); ++i) - { + std::size_t n[] = {10, 100, 1000}; + for (std::size_t i = 0u; i < boost::size(n); ++i) { TStrVec categories; rng.generateWords(5, n[i], categories); config::CCategoricalDataSummaryStatistics summary(100); - for (std::size_t j = 0u; j < categories.size(); ++j) - { + for (std::size_t j = 0u; j < categories.size(); ++j) { summary.add(static_cast(j), categories[j]); } - LOG_DEBUG("# categories = " << categories.size() - << ", distinct count = " << summary.distinctCount()); + LOG_DEBUG("# categories = " << categories.size() << ", distinct count = " << summary.distinctCount()); } } @@ -98,20 +90,16 @@ void CDataSummaryStatisticsTest::testCategoricalDistinctCount() LOG_DEBUG("*** Sketched ***"); config::CCategoricalDataSummaryStatistics summary(100); - for (std::size_t i = 0u; i < 1000000; ++i) - { + for (std::size_t i = 0u; i < 1000000; ++i) { summary.add(static_cast(i), core::CStringUtils::typeToString(i)); } LOG_DEBUG("# categories = 1000000, distinct count = " << summary.distinctCount()); - CPPUNIT_ASSERT_DOUBLES_EQUAL(1000000.0, - static_cast(summary.distinctCount()), - 0.005 * 1000000.0); + CPPUNIT_ASSERT_DOUBLES_EQUAL(1000000.0, static_cast(summary.distinctCount()), 0.005 * 1000000.0); } } -void CDataSummaryStatisticsTest::testCategoricalTopN() -{ +void CDataSummaryStatisticsTest::testCategoricalTopN() { LOG_DEBUG(""); LOG_DEBUG("+---------------------------------------------------+"); LOG_DEBUG("| CDataSummaryStatisticsTest::testCategoricalTopN |"); @@ -125,35 +113,27 @@ void CDataSummaryStatisticsTest::testCategoricalTopN() TStrVec categories; rng.generateWords(5, 100000, categories); - std::size_t freq[] = { 0, 23, 59, 100, 110, 174, 230, 540, 672, 810 }; - std::size_t counts[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + std::size_t freq[] = {0, 23, 59, 100, 110, 174, 230, 540, 672, 810}; + std::size_t counts[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; config::CCategoricalDataSummaryStatistics summary(20); TDoubleVec p; TSizeVec index; - for (std::size_t j = 0u; j < 2000000; ++j) - { + for (std::size_t j = 0u; j < 2000000; ++j) { rng.generateUniformSamples(0.0, 1.0, 1, p); - if (p[0] < 0.05) - { - std::size_t b = std::upper_bound(boost::begin(freq), - boost::end(freq), - j / 2000) - boost::begin(freq); + if (p[0] < 0.05) { + std::size_t b = std::upper_bound(boost::begin(freq), boost::end(freq), j / 2000) - boost::begin(freq); rng.generateUniformSamples(0, b, 1, index); index[0] = freq[index[0]]; - } - else - { + } else { rng.generateUniformSamples(0, categories.size(), 1, index); } - const std::size_t *f = std::lower_bound(boost::begin(freq), - boost::end(freq), index[0]); - if (f != boost::end(freq) && *f == index[0]) - { + const std::size_t* f = std::lower_bound(boost::begin(freq), boost::end(freq), index[0]); + if (f != boost::end(freq) && *f == index[0]) { ++counts[f - boost::begin(freq)]; } @@ -164,15 +144,14 @@ void CDataSummaryStatisticsTest::testCategoricalTopN() summary.topN(topn); TMeanAccumulator meanError; - for (std::size_t i = 0u; i < boost::size(freq); ++i) - { + for (std::size_t i = 0u; i < boost::size(freq); ++i) { LOG_DEBUG(""); LOG_DEBUG("actual: " << categories[freq[i]] << " appeared " << counts[i] << " times"); LOG_DEBUG("estimated: " << topn[i].first << " appeared " << topn[i].second << " times"); - double exact = static_cast(counts[i]); + double exact = static_cast(counts[i]); double approx = static_cast(topn[i].second); - double error = std::fabs(exact - approx) / exact; + double error = std::fabs(exact - approx) / exact; CPPUNIT_ASSERT_EQUAL(categories[freq[i]], topn[i].first); CPPUNIT_ASSERT(error < 0.05); @@ -183,8 +162,7 @@ void CDataSummaryStatisticsTest::testCategoricalTopN() CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError) < 0.005); } -void CDataSummaryStatisticsTest::testNumericBasicStatistics() -{ +void CDataSummaryStatisticsTest::testNumericBasicStatistics() { LOG_DEBUG(""); LOG_DEBUG("+----------------------------------------------------------+"); LOG_DEBUG("| CDataSummaryStatisticsTest::testNumericBasicStatistics |"); @@ -197,8 +175,7 @@ void CDataSummaryStatisticsTest::testNumericBasicStatistics() config::CNumericDataSummaryStatistics summary(true); - for (std::size_t i = 0u; i <= 500; ++i) - { + for (std::size_t i = 0u; i <= 500; ++i) { summary.add(static_cast(i), core::CStringUtils::typeToString(i)); } @@ -212,14 +189,12 @@ void CDataSummaryStatisticsTest::testNumericBasicStatistics() { LOG_DEBUG("*** Uniform ***"); - for (std::size_t i = 0u; i < 10; ++i) - { + for (std::size_t i = 0u; i < 10; ++i) { TDoubleVec samples; rng.generateUniformSamples(-10.0, 50.0, 1000, samples); config::CNumericDataSummaryStatistics summary(false); - for (std::size_t j = 0u; j < samples.size(); ++j) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { summary.add(static_cast(j), core::CStringUtils::typeToString(samples[j])); } @@ -227,8 +202,8 @@ void CDataSummaryStatisticsTest::testNumericBasicStatistics() LOG_DEBUG("median = " << summary.median()); LOG_DEBUG("maximum = " << summary.maximum()); CPPUNIT_ASSERT_DOUBLES_EQUAL(-10.0, summary.minimum(), 0.15); - CPPUNIT_ASSERT_DOUBLES_EQUAL( 20.0, summary.median(), 1.0); - CPPUNIT_ASSERT_DOUBLES_EQUAL( 50.0, summary.maximum(), 0.15); + CPPUNIT_ASSERT_DOUBLES_EQUAL(20.0, summary.median(), 1.0); + CPPUNIT_ASSERT_DOUBLES_EQUAL(50.0, summary.maximum(), 0.15); } } @@ -242,22 +217,19 @@ void CDataSummaryStatisticsTest::testNumericBasicStatistics() LOG_DEBUG("distribution median = " << boost::math::median(lognormal)); TMeanAccumulator meanError; - for (std::size_t i = 0u; i < 10; ++i) - { + for (std::size_t i = 0u; i < 10; ++i) { TDoubleVec samples; rng.generateLogNormalSamples(location, scale * scale, 1000, samples); config::CNumericDataSummaryStatistics summary(false); - for (std::size_t j = 0u; j < samples.size(); ++j) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { summary.add(static_cast(j), core::CStringUtils::typeToString(samples[j])); } LOG_DEBUG("median = " << summary.median()); CPPUNIT_ASSERT(std::fabs(summary.median() - boost::math::median(lognormal)) < 0.25); - meanError.add( std::fabs(summary.median() - boost::math::median(lognormal)) - / boost::math::median(lognormal)); + meanError.add(std::fabs(summary.median() - boost::math::median(lognormal)) / boost::math::median(lognormal)); } LOG_DEBUG("mean error = " << maths::CBasicStatistics::mean(meanError)); @@ -265,8 +237,7 @@ void CDataSummaryStatisticsTest::testNumericBasicStatistics() } } -void CDataSummaryStatisticsTest::testNumericDistribution() -{ +void CDataSummaryStatisticsTest::testNumericDistribution() { LOG_DEBUG(""); LOG_DEBUG("+-------------------------------------------------------+"); LOG_DEBUG("| CDataSummaryStatisticsTest::testNumericDistribution |"); @@ -284,8 +255,7 @@ void CDataSummaryStatisticsTest::testNumericDistribution() boost::math::lognormal_distribution<> d(1.0, std::sqrt(3.0)); config::CNumericDataSummaryStatistics statistics(false); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { statistics.add(static_cast(i), core::CStringUtils::typeToString(samples[i])); } @@ -295,10 +265,8 @@ void CDataSummaryStatisticsTest::testNumericDistribution() TMeanAccumulator meanAbsError; TMeanAccumulator mean; - for (std::size_t i = 0u; i < chart.size(); ++i) - { - if (chart[i].first < 0.0) - { + for (std::size_t i = 0u; i < chart.size(); ++i) { + if (chart[i].first < 0.0) { continue; } double fexpected = boost::math::pdf(d, std::max(chart[i].first, 0.0)); @@ -310,8 +278,7 @@ void CDataSummaryStatisticsTest::testNumericDistribution() LOG_DEBUG("meanAbsError = " << maths::CBasicStatistics::mean(meanAbsError)); LOG_DEBUG("mean = " << maths::CBasicStatistics::mean(mean)); - CPPUNIT_ASSERT( maths::CBasicStatistics::mean(meanAbsError) - / maths::CBasicStatistics::mean(mean) < 0.3); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanAbsError) / maths::CBasicStatistics::mean(mean) < 0.3); } { @@ -328,17 +295,16 @@ void CDataSummaryStatisticsTest::testNumericDistribution() rng.generateGammaSamples(100.0, 5.0, 100, modeSamples); samples.insert(samples.end(), modeSamples.begin(), modeSamples.end()); - double weights[] = { 1.0 / 5.5, 2.0 / 5.5, 1.5 / 5.5, 1.0 / 5.5 }; + double weights[] = {1.0 / 5.5, 2.0 / 5.5, 1.5 / 5.5, 1.0 / 5.5}; boost::math::normal_distribution<> m0(10.0, std::sqrt(10.0)); - boost::math::gamma_distribution<> m1(100.0, 1.0); + boost::math::gamma_distribution<> m1(100.0, 1.0); boost::math::normal_distribution<> m2(200.0, 10.0); - boost::math::gamma_distribution<> m3(100.0, 5.0); + boost::math::gamma_distribution<> m3(100.0, 5.0); rng.random_shuffle(samples.begin(), samples.end()); config::CNumericDataSummaryStatistics statistics(false); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { statistics.add(static_cast(i), core::CStringUtils::typeToString(samples[i])); } @@ -348,12 +314,9 @@ void CDataSummaryStatisticsTest::testNumericDistribution() TMeanAccumulator meanAbsError; TMeanAccumulator meanRelError; - for (std::size_t i = 0u; i < chart.size(); ++i) - { - double fexpected = weights[0] * boost::math::pdf(m0, chart[i].first) - + weights[1] * boost::math::pdf(m1, chart[i].first) - + weights[2] * boost::math::pdf(m2, chart[i].first) - + weights[3] * boost::math::pdf(m3, chart[i].first); + for (std::size_t i = 0u; i < chart.size(); ++i) { + double fexpected = weights[0] * boost::math::pdf(m0, chart[i].first) + weights[1] * boost::math::pdf(m1, chart[i].first) + + weights[2] * boost::math::pdf(m2, chart[i].first) + weights[3] * boost::math::pdf(m3, chart[i].first); double f = chart[i].second; LOG_DEBUG("x = " << chart[i].first << ", fexpected(x) = " << fexpected << ", f(x) = " << f); meanAbsError.add(std::fabs(f - fexpected)); @@ -367,25 +330,19 @@ void CDataSummaryStatisticsTest::testNumericDistribution() } } -CppUnit::Test *CDataSummaryStatisticsTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CDataSummaryStatisticsTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CDataSummaryStatisticsTest::testRate", - &CDataSummaryStatisticsTest::testRate) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CDataSummaryStatisticsTest::testCategoricalDistinctCount", - &CDataSummaryStatisticsTest::testCategoricalDistinctCount) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CDataSummaryStatisticsTest::testCategoricalTopN", - &CDataSummaryStatisticsTest::testCategoricalTopN) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CDataSummaryStatisticsTest::testNumericBasicStatistics", - &CDataSummaryStatisticsTest::testNumericBasicStatistics) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CDataSummaryStatisticsTest::testNumericDistribution", - &CDataSummaryStatisticsTest::testNumericDistribution) ); +CppUnit::Test* CDataSummaryStatisticsTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CDataSummaryStatisticsTest"); + + suiteOfTests->addTest( + new CppUnit::TestCaller("CDataSummaryStatisticsTest::testRate", &CDataSummaryStatisticsTest::testRate)); + suiteOfTests->addTest(new CppUnit::TestCaller("CDataSummaryStatisticsTest::testCategoricalDistinctCount", + &CDataSummaryStatisticsTest::testCategoricalDistinctCount)); + suiteOfTests->addTest(new CppUnit::TestCaller("CDataSummaryStatisticsTest::testCategoricalTopN", + &CDataSummaryStatisticsTest::testCategoricalTopN)); + suiteOfTests->addTest(new CppUnit::TestCaller("CDataSummaryStatisticsTest::testNumericBasicStatistics", + &CDataSummaryStatisticsTest::testNumericBasicStatistics)); + suiteOfTests->addTest(new CppUnit::TestCaller("CDataSummaryStatisticsTest::testNumericDistribution", + &CDataSummaryStatisticsTest::testNumericDistribution)); return suiteOfTests; } diff --git a/lib/config/unittest/CDataSummaryStatisticsTest.h b/lib/config/unittest/CDataSummaryStatisticsTest.h index 61e80a62aa..be236dc051 100644 --- a/lib/config/unittest/CDataSummaryStatisticsTest.h +++ b/lib/config/unittest/CDataSummaryStatisticsTest.h @@ -9,16 +9,15 @@ #include -class CDataSummaryStatisticsTest : public CppUnit::TestFixture -{ - public: - void testRate(); - void testCategoricalDistinctCount(); - void testCategoricalTopN(); - void testNumericBasicStatistics(); - void testNumericDistribution(); +class CDataSummaryStatisticsTest : public CppUnit::TestFixture { +public: + void testRate(); + void testCategoricalDistinctCount(); + void testCategoricalTopN(); + void testNumericBasicStatistics(); + void testNumericDistribution(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CDataSummaryStatisticsTest_h diff --git a/lib/config/unittest/CDetectorEnumeratorTest.cc b/lib/config/unittest/CDetectorEnumeratorTest.cc index 1d89104c6f..7b1ceffbfa 100644 --- a/lib/config/unittest/CDetectorEnumeratorTest.cc +++ b/lib/config/unittest/CDetectorEnumeratorTest.cc @@ -16,22 +16,17 @@ using namespace ml; -namespace -{ -std::string print(const config::CDetectorEnumerator::TDetectorSpecificationVec &spec, - const std::string &indent = std::string()) -{ +namespace { +std::string print(const config::CDetectorEnumerator::TDetectorSpecificationVec& spec, const std::string& indent = std::string()) { std::ostringstream result; - for (std::size_t i = 0u; i < spec.size(); ++i) - { + for (std::size_t i = 0u; i < spec.size(); ++i) { result << indent << spec[i].description() << "\n"; } return result.str(); } } -void CDetectorEnumeratorTest::testAll() -{ +void CDetectorEnumeratorTest::testAll() { LOG_DEBUG(""); LOG_DEBUG("+------------------------------------+"); LOG_DEBUG("| CDetectorEnumeratorTest::testAll |"); @@ -217,13 +212,11 @@ void CDetectorEnumeratorTest::testAll() } } -CppUnit::Test *CDetectorEnumeratorTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CDetectorEnumeratorTest"); +CppUnit::Test* CDetectorEnumeratorTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CDetectorEnumeratorTest"); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CDetectorEnumeratorTest::testAll", - &CDetectorEnumeratorTest::testAll) ); + suiteOfTests->addTest( + new CppUnit::TestCaller("CDetectorEnumeratorTest::testAll", &CDetectorEnumeratorTest::testAll)); return suiteOfTests; } diff --git a/lib/config/unittest/CDetectorEnumeratorTest.h b/lib/config/unittest/CDetectorEnumeratorTest.h index c6ccb9be39..b5b3d8764b 100644 --- a/lib/config/unittest/CDetectorEnumeratorTest.h +++ b/lib/config/unittest/CDetectorEnumeratorTest.h @@ -9,12 +9,11 @@ #include -class CDetectorEnumeratorTest : public CppUnit::TestFixture -{ - public: - void testAll(); +class CDetectorEnumeratorTest : public CppUnit::TestFixture { +public: + void testAll(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CDetectorEnumeratorTest_h diff --git a/lib/config/unittest/CReportWriterTest.cc b/lib/config/unittest/CReportWriterTest.cc index f45c1fa222..167fc15c82 100644 --- a/lib/config/unittest/CReportWriterTest.cc +++ b/lib/config/unittest/CReportWriterTest.cc @@ -7,8 +7,8 @@ #include "CReportWriterTest.h" #include -#include #include +#include #include #include @@ -23,8 +23,7 @@ using TDoubleVec = std::vector; using TSizeVec = std::vector; using TStrVec = std::vector; -void CReportWriterTest::testPretty() -{ +void CReportWriterTest::testPretty() { LOG_DEBUG(""); LOG_DEBUG("+---------------------------------+"); LOG_DEBUG("| CReportWriterTest::testPretty |"); @@ -33,72 +32,48 @@ void CReportWriterTest::testPretty() test::CRandomNumbers rng; core_t::TTime startTime = 1459468810; - core_t::TTime endTime = startTime + 7 * core::constants::DAY; - - std::string fields[] = - { - std::string("name"), - std::string("phylum"), - std::string("species"), - std::string("code"), - std::string("weight") - }; - - std::string categories1[] = - { - std::string("Annelida"), - std::string("Nematoda"), - std::string("Arthropoda"), - std::string("Chordata") - }; - - std::size_t breaks[] = { 0, 6, 10, 13, 20 }; - std::string categories2[] = - { - // Annelida - std::string("Amage auricula"), - std::string("Eunice purpurea"), - std::string("Dorvillea kastjani"), - std::string("Dalhousiella carpenteri"), - std::string("Dysponetus gracilisi"), - std::string("Macellicephala incerta"), - // Nematoda - std::string("Microlaimus robustidens"), - std::string("Theristus longisetosus"), - std::string("Rhynchonema cemae"), - std::string("Contracaecum chubutensis"), - // Arthropoda - std::string("black widow"), - std::string("Daddy longleg"), - std::string("Lobster"), - // Chordata - std::string("hag fish"), - std::string("hen"), - std::string("elephant"), - std::string("dog"), - std::string("shrew"), - std::string("weasel"), - std::string("lemming") - }; + core_t::TTime endTime = startTime + 7 * core::constants::DAY; + + std::string fields[] = {std::string("name"), std::string("phylum"), std::string("species"), std::string("code"), std::string("weight")}; + + std::string categories1[] = {std::string("Annelida"), std::string("Nematoda"), std::string("Arthropoda"), std::string("Chordata")}; + + std::size_t breaks[] = {0, 6, 10, 13, 20}; + std::string categories2[] = {// Annelida + std::string("Amage auricula"), + std::string("Eunice purpurea"), + std::string("Dorvillea kastjani"), + std::string("Dalhousiella carpenteri"), + std::string("Dysponetus gracilisi"), + std::string("Macellicephala incerta"), + // Nematoda + std::string("Microlaimus robustidens"), + std::string("Theristus longisetosus"), + std::string("Rhynchonema cemae"), + std::string("Contracaecum chubutensis"), + // Arthropoda + std::string("black widow"), + std::string("Daddy longleg"), + std::string("Lobster"), + // Chordata + std::string("hag fish"), + std::string("hen"), + std::string("elephant"), + std::string("dog"), + std::string("shrew"), + std::string("weasel"), + std::string("lemming")}; TStrVec codes; rng.generateWords(6, 2000, codes); - double weights[] = - { - 0.01, 0.05, 0.1, 0.05, 0.01, - 0.5, 0.001, 0.0003, 0.01, 0.0004, - 1.3, 1.1, 520.0, 1200.0, 810.1, - 1000000.0, 5334.0, 70.0, 180.0, 100.3 - }; + double weights[] = {0.01, 0.05, 0.1, 0.05, 0.01, 0.5, 0.001, 0.0003, 0.01, 0.0004, + 1.3, 1.1, 520.0, 1200.0, 810.1, 1000000.0, 5334.0, 70.0, 180.0, 100.3}; config::CDataSummaryStatistics stats1; - config::CCategoricalDataSummaryStatistics stats2[] = - { - config::CCategoricalDataSummaryStatistics(10), - config::CCategoricalDataSummaryStatistics(10), - config::CCategoricalDataSummaryStatistics(10) - }; + config::CCategoricalDataSummaryStatistics stats2[] = {config::CCategoricalDataSummaryStatistics(10), + config::CCategoricalDataSummaryStatistics(10), + config::CCategoricalDataSummaryStatistics(10)}; config::CNumericDataSummaryStatistics stats3(false); uint64_t n = 0; @@ -107,12 +82,9 @@ void CReportWriterTest::testPretty() TDoubleVec dt; TDoubleVec weight; TSizeVec index; - for (core_t::TTime time = startTime; time < endTime; time += static_cast(dt[0])) - { - double progress = static_cast(time - startTime) - / static_cast((endTime - startTime)); - if (progress > lastProgress + 0.05) - { + for (core_t::TTime time = startTime; time < endTime; time += static_cast(dt[0])) { + double progress = static_cast(time - startTime) / static_cast((endTime - startTime)); + if (progress > lastProgress + 0.05) { LOG_DEBUG("Processed " << progress * 100.0 << "%"); lastProgress = progress; } @@ -121,17 +93,17 @@ void CReportWriterTest::testPretty() stats1.add(time); rng.generateUniformSamples(0, boost::size(categories1), 1, index); - const std::string &phylum = categories1[index[0]]; + const std::string& phylum = categories1[index[0]]; stats2[0].add(time, phylum); - rng.generateUniformSamples(breaks[index[0]], breaks[index[0]+1], 1, index); - const std::string &species = categories2[index[0]]; + rng.generateUniformSamples(breaks[index[0]], breaks[index[0] + 1], 1, index); + const std::string& species = categories2[index[0]]; stats2[1].add(time, species); double weight_ = weights[index[0]]; rng.generateUniformSamples(0, codes.size(), 1, index); - const std::string &code = codes[index[0]]; + const std::string& code = codes[index[0]]; stats2[2].add(time, code); double range = weight_ > 1.0 ? std::sqrt(weight_) : weight_ * weight_; @@ -158,21 +130,17 @@ void CReportWriterTest::testPretty() LOG_DEBUG(o.str()); } -void CReportWriterTest::testJSON() -{ +void CReportWriterTest::testJSON() { LOG_DEBUG(""); LOG_DEBUG("+-------------------------------+"); LOG_DEBUG("| CReportWriterTest::testJSON |"); LOG_DEBUG("+-------------------------------+"); } -CppUnit::Test *CReportWriterTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CReportWriterTest"); +CppUnit::Test* CReportWriterTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CReportWriterTest"); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CReportWriterTest::testPretty", - &CReportWriterTest::testPretty) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CReportWriterTest::testPretty", &CReportWriterTest::testPretty)); return suiteOfTests; } diff --git a/lib/config/unittest/CReportWriterTest.h b/lib/config/unittest/CReportWriterTest.h index 0f2a4d4135..09ff918a27 100644 --- a/lib/config/unittest/CReportWriterTest.h +++ b/lib/config/unittest/CReportWriterTest.h @@ -9,13 +9,12 @@ #include -class CReportWriterTest : public CppUnit::TestFixture -{ - public: - void testPretty(); - void testJSON(); +class CReportWriterTest : public CppUnit::TestFixture { +public: + void testPretty(); + void testJSON(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CReportWriterTest_h diff --git a/lib/config/unittest/Main.cc b/lib/config/unittest/Main.cc index 0a2cf3d08f..f3412c60ac 100644 --- a/lib/config/unittest/Main.cc +++ b/lib/config/unittest/Main.cc @@ -12,15 +12,14 @@ #include "CDetectorEnumeratorTest.h" #include "CReportWriterTest.h" -int main(int argc, const char **argv) -{ +int main(int argc, const char** argv) { ml::test::CTestRunner runner(argc, argv); - runner.addTest( CAutoconfigurerParamsTest::suite() ); - runner.addTest( CDataSemanticsTest::suite() ); - runner.addTest( CDataSummaryStatisticsTest::suite() ); - runner.addTest( CDetectorEnumeratorTest::suite() ); - runner.addTest( CReportWriterTest::suite() ); + runner.addTest(CAutoconfigurerParamsTest::suite()); + runner.addTest(CDataSemanticsTest::suite()); + runner.addTest(CDataSummaryStatisticsTest::suite()); + runner.addTest(CDetectorEnumeratorTest::suite()); + runner.addTest(CReportWriterTest::suite()); return !runner.runTests(); } diff --git a/lib/core/CBase64Filter.cc b/lib/core/CBase64Filter.cc index cb846e787c..8b28eca3fe 100644 --- a/lib/core/CBase64Filter.cc +++ b/lib/core/CBase64Filter.cc @@ -6,25 +6,19 @@ #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { -CBase64Encoder::CBase64Encoder() : m_Buffer(4096) -{ +CBase64Encoder::CBase64Encoder() : m_Buffer(4096) { } -CBase64Encoder::~CBase64Encoder() -{ +CBase64Encoder::~CBase64Encoder() { } -CBase64Decoder::CBase64Decoder() : m_BufferIn(4096), m_BufferOut(4096), m_Eos(false) -{ +CBase64Decoder::CBase64Decoder() : m_BufferIn(4096), m_BufferOut(4096), m_Eos(false) { } -CBase64Decoder::~CBase64Decoder() -{ +CBase64Decoder::~CBase64Decoder() { } } // core diff --git a/lib/core/CBufferFlushTimer.cc b/lib/core/CBufferFlushTimer.cc index 042b28fa7f..ae91ca8be4 100644 --- a/lib/core/CBufferFlushTimer.cc +++ b/lib/core/CBufferFlushTimer.cc @@ -9,26 +9,16 @@ #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -CBufferFlushTimer::CBufferFlushTimer() - : m_LastMaxTime(0), - m_LastFlushTime(0) -{ +CBufferFlushTimer::CBufferFlushTimer() : m_LastMaxTime(0), m_LastFlushTime(0) { } -core_t::TTime CBufferFlushTimer::flushTime(core_t::TTime bufferDelay, - core_t::TTime bufferMaxTime) -{ +core_t::TTime CBufferFlushTimer::flushTime(core_t::TTime bufferDelay, core_t::TTime bufferMaxTime) { core_t::TTime now(CTimeUtils::now()); - if (bufferMaxTime == 0) - { + if (bufferMaxTime == 0) { // If we get here then there's no evidence to be flushed. // However, downstream components might still be relying on // us to return a sensible time, so pretend the buffer time @@ -36,18 +26,15 @@ core_t::TTime CBufferFlushTimer::flushTime(core_t::TTime bufferDelay, bufferMaxTime = m_LastMaxTime; } - if (m_LastMaxTime == bufferMaxTime) - { + if (m_LastMaxTime == bufferMaxTime) { // Same max time core_t::TTime ahead(now - m_LastFlushTime); // If max time has been the same for bufferDelay seconds // flush based on elapsed real time - if (ahead > bufferDelay) - { + if (ahead > bufferDelay) { // Defend against wrap - if (bufferMaxTime - bufferDelay >= std::numeric_limits::max() - ahead) - { + if (bufferMaxTime - bufferDelay >= std::numeric_limits::max() - ahead) { return std::numeric_limits::max(); } @@ -63,8 +50,5 @@ core_t::TTime CBufferFlushTimer::flushTime(core_t::TTime bufferDelay, return bufferMaxTime - bufferDelay; } - - } } - diff --git a/lib/core/CCTimeR.cc b/lib/core/CCTimeR.cc index 7eb5a1d4f7..f43ed65e6d 100644 --- a/lib/core/CCTimeR.cc +++ b/lib/core/CCTimeR.cc @@ -5,19 +5,11 @@ */ #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -char *CCTimeR::cTimeR(const time_t *clock, char *result) -{ +char* CCTimeR::cTimeR(const time_t* clock, char* result) { return ::ctime_r(clock, result); } - - } } - diff --git a/lib/core/CCTimeR_Windows.cc b/lib/core/CCTimeR_Windows.cc index cb19205af7..7b3fe9f48c 100644 --- a/lib/core/CCTimeR_Windows.cc +++ b/lib/core/CCTimeR_Windows.cc @@ -5,15 +5,10 @@ */ #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -char *CCTimeR::cTimeR(const time_t *clock, char *result) -{ +char* CCTimeR::cTimeR(const time_t* clock, char* result) { // This is effectively bypassing the security feature of the Windows // ctime_s() call, but the wrapper function has the arguments of the // vulnerable Unix ctime_r() function, so we don't know the real buffer @@ -24,8 +19,5 @@ char *CCTimeR::cTimeR(const time_t *clock, char *result) return result; } - - } } - diff --git a/lib/core/CCompressOStream.cc b/lib/core/CCompressOStream.cc index 3e0ebb4f85..59d19664cf 100644 --- a/lib/core/CCompressOStream.cc +++ b/lib/core/CCompressOStream.cc @@ -12,48 +12,37 @@ #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { -CCompressOStream::CCompressOStream(CStateCompressor::CChunkFilter &filter) : - std::ostream(&m_StreamBuf), - m_UploadThread(*this, - m_StreamBuf, - filter) -{ +CCompressOStream::CCompressOStream(CStateCompressor::CChunkFilter& filter) + : std::ostream(&m_StreamBuf), m_UploadThread(*this, m_StreamBuf, filter) { - if (m_UploadThread.start() == false) - { + if (m_UploadThread.start() == false) { this->setstate(std::ios_base::failbit | std::ios_base::badbit); } } -CCompressOStream::~CCompressOStream() -{ +CCompressOStream::~CCompressOStream() { this->close(); } -void CCompressOStream::close() -{ - if (m_UploadThread.isStarted()) - { +void CCompressOStream::close() { + if (m_UploadThread.isStarted()) { LOG_TRACE("Thread has been started, so stopping it"); - if (m_UploadThread.stop() == false) - { + if (m_UploadThread.stop() == false) { this->setstate(std::ios_base::failbit | std::ios_base::badbit); } } } -CCompressOStream::CCompressThread::CCompressThread(CCompressOStream &stream, - CDualThreadStreamBuf &streamBuf, - CStateCompressor::CChunkFilter &filter) : - m_Stream(stream), - m_StreamBuf(streamBuf), - m_FilterSink(filter), - m_OutFilter() +CCompressOStream::CCompressThread::CCompressThread(CCompressOStream& stream, + CDualThreadStreamBuf& streamBuf, + CStateCompressor::CChunkFilter& filter) + : m_Stream(stream), + m_StreamBuf(streamBuf), + m_FilterSink(filter), + m_OutFilter() { m_OutFilter.push(boost::iostreams::gzip_compressor()); @@ -61,26 +50,21 @@ CCompressOStream::CCompressThread::CCompressThread(CCompressOStream &stream, m_OutFilter.push(boost::ref(m_FilterSink)); } -void CCompressOStream::CCompressThread::run() -{ +void CCompressOStream::CCompressThread::run() { LOG_TRACE("CompressThread run"); char buf[4096]; std::size_t bytesDone = 0; bool closeMe = false; - while (closeMe == false) - { + while (closeMe == false) { std::streamsize n = m_StreamBuf.sgetn(buf, 4096); LOG_TRACE("Read from in stream: " << n); - if (n != -1) - { + if (n != -1) { bytesDone += n; m_OutFilter.write(buf, n); } - if (m_StreamBuf.endOfFile() && - (m_StreamBuf.in_avail() == 0)) - { + if (m_StreamBuf.endOfFile() && (m_StreamBuf.in_avail() == 0)) { closeMe = true; } } @@ -88,13 +72,10 @@ void CCompressOStream::CCompressThread::run() boost::iostreams::close(m_OutFilter); } -void CCompressOStream::CCompressThread::shutdown() -{ +void CCompressOStream::CCompressThread::shutdown() { m_StreamBuf.signalEndOfFile(); LOG_TRACE("CompressThread shutdown called"); - } } // core } // ml - diff --git a/lib/core/CCompressUtils.cc b/lib/core/CCompressUtils.cc index 1df6cca5e1..0f593a00c5 100644 --- a/lib/core/CCompressUtils.cc +++ b/lib/core/CCompressUtils.cc @@ -9,42 +9,30 @@ #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -CCompressUtils::CCompressUtils(bool lengthOnly, int level) - : m_State(E_Unused), - m_LengthOnly(lengthOnly) -{ +CCompressUtils::CCompressUtils(bool lengthOnly, int level) : m_State(E_Unused), m_LengthOnly(lengthOnly) { ::memset(&m_ZlibStrm, 0, sizeof(z_stream)); m_ZlibStrm.zalloc = Z_NULL; m_ZlibStrm.zfree = Z_NULL; int ret(::deflateInit(&m_ZlibStrm, level)); - if (ret != Z_OK) - { + if (ret != Z_OK) { LOG_ABORT("Error initialising Z stream: " << ::zError(ret)); } } -CCompressUtils::~CCompressUtils() -{ +CCompressUtils::~CCompressUtils() { int ret(::deflateEnd(&m_ZlibStrm)); - if (ret != Z_OK) - { + if (ret != Z_OK) { LOG_ERROR("Error ending Z stream: " << ::zError(ret)); } } -bool CCompressUtils::addString(const std::string &str) -{ - if (m_State == E_Finished) - { +bool CCompressUtils::addString(const std::string& str) { + if (m_State == E_Finished) { // If the previous compression has finished and we're adding a new // string then we need to reset the stream so that a new compression // starts from scratch @@ -54,24 +42,19 @@ bool CCompressUtils::addString(const std::string &str) return this->doCompress(false, str); } -bool CCompressUtils::compressedData(bool finish, TByteVec &result) -{ - if (m_LengthOnly) - { +bool CCompressUtils::compressedData(bool finish, TByteVec& result) { + if (m_LengthOnly) { LOG_ERROR("Cannot get compressed data from length-only compressor"); return false; } - if (m_State == E_Unused) - { + if (m_State == E_Unused) { LOG_ERROR("Cannot get compressed data - no strings added"); return false; } - if (finish && m_State == E_Compressing) - { - if (this->doCompress(finish, std::string()) == false) - { + if (finish && m_State == E_Compressing) { + if (this->doCompress(finish, std::string()) == false) { LOG_ERROR("Cannot finish compression"); return false; } @@ -82,18 +65,14 @@ bool CCompressUtils::compressedData(bool finish, TByteVec &result) return true; } -bool CCompressUtils::compressedLength(bool finish, size_t &length) -{ - if (m_State == E_Unused) - { +bool CCompressUtils::compressedLength(bool finish, size_t& length) { + if (m_State == E_Unused) { LOG_ERROR("Cannot get compressed data - no strings added"); return false; } - if (finish && m_State == E_Compressing) - { - if (this->doCompress(finish, std::string()) == false) - { + if (finish && m_State == E_Compressing) { + if (this->doCompress(finish, std::string()) == false) { LOG_ERROR("Cannot finish compression"); return false; } @@ -104,11 +83,9 @@ bool CCompressUtils::compressedLength(bool finish, size_t &length) return true; } -void CCompressUtils::reset() -{ +void CCompressUtils::reset() { int ret(::deflateReset(&m_ZlibStrm)); - if (ret != Z_OK) - { + if (ret != Z_OK) { // deflateReset() will only fail if one or more of the critical members // of the current stream struct are NULL. If this happens then memory // corruption must have occurred, because there's nowhere where we set @@ -119,47 +96,38 @@ void CCompressUtils::reset() m_State = E_Unused; } -bool CCompressUtils::doCompress(bool finish, const std::string &str) -{ - if (str.empty() && m_State == E_Compressing && !finish) - { +bool CCompressUtils::doCompress(bool finish, const std::string& str) { + if (str.empty() && m_State == E_Compressing && !finish) { return true; } m_State = E_Compressing; - m_ZlibStrm.next_in = reinterpret_cast(const_cast(str.data())); + m_ZlibStrm.next_in = reinterpret_cast(const_cast(str.data())); m_ZlibStrm.avail_in = static_cast(str.size()); static const size_t CHUNK_SIZE = 4096; Bytef out[CHUNK_SIZE]; int flush(finish ? Z_FINISH : Z_NO_FLUSH); - do - { + do { m_ZlibStrm.next_out = out; m_ZlibStrm.avail_out = CHUNK_SIZE; int ret(::deflate(&m_ZlibStrm, flush)); - if (ret == Z_STREAM_ERROR) - { + if (ret == Z_STREAM_ERROR) { LOG_ERROR("Error deflating: " << ::zError(ret)); return false; } size_t have(CHUNK_SIZE - m_ZlibStrm.avail_out); - if (!m_LengthOnly) - { + if (!m_LengthOnly) { m_FullResult.insert(m_FullResult.end(), &out[0], &out[have]); } - } - while (m_ZlibStrm.avail_out == 0); + } while (m_ZlibStrm.avail_out == 0); m_State = finish ? E_Finished : E_Compressing; return true; } - - } } - diff --git a/lib/core/CCompressedDictionary.cc b/lib/core/CCompressedDictionary.cc index 1ae95d472c..f06863ac7a 100644 --- a/lib/core/CCompressedDictionary.cc +++ b/lib/core/CCompressedDictionary.cc @@ -6,10 +6,6 @@ #include -namespace ml -{ -namespace core -{ - -} +namespace ml { +namespace core {} } diff --git a/lib/core/CCondition.cc b/lib/core/CCondition.cc index 194d152fd4..840d225768 100644 --- a/lib/core/CCondition.cc +++ b/lib/core/CCondition.cc @@ -12,39 +12,28 @@ #include #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -CCondition::CCondition(CMutex &mutex) - : m_Mutex(mutex) -{ +CCondition::CCondition(CMutex& mutex) : m_Mutex(mutex) { int ret(::pthread_cond_init(&m_Condition, 0)); - if (ret != 0) - { + if (ret != 0) { LOG_WARN(::strerror(ret)); } } -CCondition::~CCondition() -{ +CCondition::~CCondition() { int ret(::pthread_cond_destroy(&m_Condition)); - if (ret != 0) - { + if (ret != 0) { LOG_WARN(::strerror(ret)); } } -bool CCondition::wait() -{ +bool CCondition::wait() { // Note: pthread_cond_wait() returns 0 if interrupted by a signal, so the // caller must check a condition that will detect spurious wakeups int ret(::pthread_cond_wait(&m_Condition, &m_Mutex.m_Mutex)); - if (ret != 0) - { + if (ret != 0) { LOG_WARN(::strerror(errno)); return false; } @@ -52,22 +41,18 @@ bool CCondition::wait() return true; } -bool CCondition::wait(uint32_t t) -{ +bool CCondition::wait(uint32_t t) { timespec tm; - if (CCondition::convert(t, tm) == false) - { + if (CCondition::convert(t, tm) == false) { return false; } // Note: pthread_cond_timedwait() returns 0 if interrupted by a signal, so // the caller must check a condition that will detect spurious wakeups int ret(::pthread_cond_timedwait(&m_Condition, &m_Mutex.m_Mutex, &tm)); - if (ret != 0) - { - if (ret != ETIMEDOUT) - { + if (ret != 0) { + if (ret != ETIMEDOUT) { LOG_WARN(t << ' ' << ::strerror(errno)); return false; } @@ -76,29 +61,23 @@ bool CCondition::wait(uint32_t t) return true; } -void CCondition::signal() -{ +void CCondition::signal() { int ret(::pthread_cond_signal(&m_Condition)); - if (ret != 0) - { + if (ret != 0) { LOG_WARN(::strerror(ret)); } } -void CCondition::broadcast() -{ +void CCondition::broadcast() { int ret(::pthread_cond_broadcast(&m_Condition)); - if (ret != 0) - { + if (ret != 0) { LOG_WARN(::strerror(ret)); } } -bool CCondition::convert(uint32_t t, timespec &tm) -{ +bool CCondition::convert(uint32_t t, timespec& tm) { timeval now; - if (::gettimeofday(&now, 0) < 0) - { + if (::gettimeofday(&now, 0) < 0) { LOG_WARN(::strerror(errno)); return false; } @@ -108,12 +87,9 @@ bool CCondition::convert(uint32_t t, timespec &tm) tm.tv_sec = now.tv_sec + (t / 1000); uint32_t remainder(static_cast(t % 1000)); - if (remainder == 0) - { + if (remainder == 0) { tm.tv_nsec = now.tv_usec * 1000; - } - else - { + } else { // s is in microseconds uint32_t s((remainder * 1000U) + static_cast(now.tv_usec)); @@ -123,8 +99,5 @@ bool CCondition::convert(uint32_t t, timespec &tm) return true; } - - } } - diff --git a/lib/core/CCondition_Windows.cc b/lib/core/CCondition_Windows.cc index 09a5b11619..8a53dbbbe5 100644 --- a/lib/core/CCondition_Windows.cc +++ b/lib/core/CCondition_Windows.cc @@ -9,31 +9,20 @@ #include #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -CCondition::CCondition(CMutex &mutex) - : m_Mutex(mutex) -{ +CCondition::CCondition(CMutex& mutex) : m_Mutex(mutex) { InitializeConditionVariable(&m_Condition); } -CCondition::~CCondition() -{ +CCondition::~CCondition() { // There's no need to explicitly destroy a Windows condition variable } -bool CCondition::wait() -{ - BOOL success(SleepConditionVariableCS(&m_Condition, - &m_Mutex.m_Mutex, - INFINITE)); - if (success == FALSE) - { +bool CCondition::wait() { + BOOL success(SleepConditionVariableCS(&m_Condition, &m_Mutex.m_Mutex, INFINITE)); + if (success == FALSE) { LOG_WARN("Condition wait failed : " << CWindowsError()); return false; } @@ -41,16 +30,11 @@ bool CCondition::wait() return true; } -bool CCondition::wait(uint32_t t) -{ - BOOL success(SleepConditionVariableCS(&m_Condition, - &m_Mutex.m_Mutex, - t)); - if (success == FALSE) - { +bool CCondition::wait(uint32_t t) { + BOOL success(SleepConditionVariableCS(&m_Condition, &m_Mutex.m_Mutex, t)); + if (success == FALSE) { DWORD errorCode(GetLastError()); - if (errorCode != WAIT_TIMEOUT && errorCode != ERROR_TIMEOUT) - { + if (errorCode != WAIT_TIMEOUT && errorCode != ERROR_TIMEOUT) { LOG_WARN("Condition wait failed : " << CWindowsError(errorCode)); return false; } @@ -59,17 +43,12 @@ bool CCondition::wait(uint32_t t) return true; } -void CCondition::signal() -{ +void CCondition::signal() { WakeConditionVariable(&m_Condition); } -void CCondition::broadcast() -{ +void CCondition::broadcast() { WakeAllConditionVariable(&m_Condition); } - - } } - diff --git a/lib/core/CContainerPrinter.cc b/lib/core/CContainerPrinter.cc index f8208a1d7d..a1381ca8a2 100644 --- a/lib/core/CContainerPrinter.cc +++ b/lib/core/CContainerPrinter.cc @@ -6,12 +6,9 @@ #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { // Template defined in the header file. - } } diff --git a/lib/core/CCrashHandler.cc b/lib/core/CCrashHandler.cc index 91d756dd5f..803ed290ba 100644 --- a/lib/core/CCrashHandler.cc +++ b/lib/core/CCrashHandler.cc @@ -5,16 +5,11 @@ */ #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { -void CCrashHandler::installCrashHandler() -{ +void CCrashHandler::installCrashHandler() { // do nothing, see platform specific actions } - } } - diff --git a/lib/core/CCrashHandler_Linux.cc b/lib/core/CCrashHandler_Linux.cc index 4409ac7212..da97107e22 100644 --- a/lib/core/CCrashHandler_Linux.cc +++ b/lib/core/CCrashHandler_Linux.cc @@ -13,14 +13,11 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { //! get useful information for debugging -void crashHandler(int sig, siginfo_t *info, void *context) -{ +void crashHandler(int sig, siginfo_t* info, void* context) { // reset all handlers signal(SIGILL, SIG_DFL); signal(SIGABRT, SIG_DFL); @@ -31,8 +28,8 @@ void crashHandler(int sig, siginfo_t *info, void *context) // note: Not using backtrace(...) as it does only contain information for the main thread, // but the segfault could have happened on a different thread. - ucontext_t *uContext = static_cast(context); - void *errorAddress = 0; + ucontext_t* uContext = static_cast(context); + void* errorAddress = 0; // various platform specifics, although we do not need all of them #ifdef REG_RIP // x86_64 @@ -46,28 +43,28 @@ void crashHandler(int sig, siginfo_t *info, void *context) #elif defined(__ppc__) || defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) errorAddress = reinterpret_cast(uContext->uc_mcontext.regs->nip); #else -# error ":/ sorry, ain't know no nothing none not of your architecture!" +#error ":/ sorry, ain't know no nothing none not of your architecture!" #endif Dl_info symbolInfo; dladdr(errorAddress, &symbolInfo); - fprintf(stderr, "si_signo %d, si_code: %d, si_errno: %d, address: %p, library: %s, base: %p, normalized address: %p\n", + fprintf(stderr, + "si_signo %d, si_code: %d, si_errno: %d, address: %p, library: %s, base: %p, normalized address: %p\n", info->si_signo, info->si_code, info->si_errno, - errorAddress, symbolInfo.dli_fname, symbolInfo.dli_fbase, - reinterpret_cast( - reinterpret_cast(errorAddress) - - reinterpret_cast(symbolInfo.dli_fbase))); + errorAddress, + symbolInfo.dli_fname, + symbolInfo.dli_fbase, + reinterpret_cast(reinterpret_cast(errorAddress) - reinterpret_cast(symbolInfo.dli_fbase))); // Still generate a core dump, // see http://www.alexonlinux.com/how-to-handle-sigsegv-but-also-generate-core-dump raise(sig); } -void CCrashHandler::installCrashHandler() -{ +void CCrashHandler::installCrashHandler() { struct sigaction actionOnCrash; std::memset(&actionOnCrash, 0, sizeof actionOnCrash); actionOnCrash.sa_flags = (SA_SIGINFO | SA_ONSTACK | SA_NODEFER); @@ -81,7 +78,5 @@ void CCrashHandler::installCrashHandler() sigaction(SIGSEGV, &actionOnCrash, nullptr); sigaction(SIGSTKFLT, &actionOnCrash, nullptr); } - } } - diff --git a/lib/core/CDataAdder.cc b/lib/core/CDataAdder.cc index a5c95fec9a..20fd2a7b1f 100644 --- a/lib/core/CDataAdder.cc +++ b/lib/core/CDataAdder.cc @@ -8,41 +8,30 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { - -CDataAdder::~CDataAdder() -{ +CDataAdder::~CDataAdder() { // Most compilers put the vtable in the object file containing the // definition of the first non-inlined virtual function, so DON'T move this // empty definition to the header file! } -std::size_t CDataAdder::maxDocumentsPerBatchSave() const -{ +std::size_t CDataAdder::maxDocumentsPerBatchSave() const { return std::numeric_limits::max(); } -std::size_t CDataAdder::maxDocumentSize() const -{ +std::size_t CDataAdder::maxDocumentSize() const { return std::numeric_limits::max(); } -std::string CDataAdder::makeCurrentDocId(const std::string &baseId, - size_t currentDocNum) -{ +std::string CDataAdder::makeCurrentDocId(const std::string& baseId, size_t currentDocNum) { std::ostringstream strm; - if (!baseId.empty()) - { + if (!baseId.empty()) { strm << baseId << '#'; } strm << currentDocNum; return strm.str(); } - - } } diff --git a/lib/core/CDataSearcher.cc b/lib/core/CDataSearcher.cc index d08f165ed6..e33dc26ec7 100644 --- a/lib/core/CDataSearcher.cc +++ b/lib/core/CDataSearcher.cc @@ -5,37 +5,25 @@ */ #include - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { const std::string CDataSearcher::EMPTY_STRING; - -CDataSearcher::CDataSearcher() - : m_SearchTerms(2) -{ +CDataSearcher::CDataSearcher() : m_SearchTerms(2) { } -CDataSearcher::~CDataSearcher() -{ +CDataSearcher::~CDataSearcher() { } -void CDataSearcher::setStateRestoreSearch(const std::string &index) -{ +void CDataSearcher::setStateRestoreSearch(const std::string& index) { m_SearchTerms[0] = index; m_SearchTerms[1].clear(); } -void CDataSearcher::setStateRestoreSearch(const std::string &index, - const std::string &id) -{ +void CDataSearcher::setStateRestoreSearch(const std::string& index, const std::string& id) { m_SearchTerms[0] = index; m_SearchTerms[1] = id; } - - } } diff --git a/lib/core/CDelimiter.cc b/lib/core/CDelimiter.cc index a4bb3d35f4..c7fa799dc9 100644 --- a/lib/core/CDelimiter.cc +++ b/lib/core/CDelimiter.cc @@ -10,80 +10,60 @@ #include #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { const std::string CDelimiter::DEFAULT_DELIMITER(","); - CDelimiter::CDelimiter() : m_Valid(m_Delimiter.init(DEFAULT_DELIMITER)), m_HaveFollowingRegex(false), m_WaiveFollowingRegexAfterTime(false), m_Quote('\0'), - m_Escape('\0') -{ + m_Escape('\0') { } -CDelimiter::CDelimiter(const std::string &delimiter) +CDelimiter::CDelimiter(const std::string& delimiter) : m_Valid(m_Delimiter.init(delimiter)), m_HaveFollowingRegex(false), m_WaiveFollowingRegexAfterTime(false), m_Quote('\0'), - m_Escape('\0') -{ - if (!m_Valid) - { + m_Escape('\0') { + if (!m_Valid) { LOG_ERROR("Unable to set delimiter regex to " << delimiter); } } -CDelimiter::CDelimiter(const std::string &delimiter, const std::string &followingRegex, bool orTime) +CDelimiter::CDelimiter(const std::string& delimiter, const std::string& followingRegex, bool orTime) : m_Valid(m_Delimiter.init(delimiter)), m_HaveFollowingRegex(m_FollowingRegex.init(followingRegex)), m_WaiveFollowingRegexAfterTime(orTime), m_Quote('\0'), - m_Escape('\0') -{ - if (!m_Valid) - { + m_Escape('\0') { + if (!m_Valid) { LOG_ERROR("Unable to set delimiter regex to " << delimiter); } - if (!m_HaveFollowingRegex) - { + if (!m_HaveFollowingRegex) { LOG_ERROR("Unable to set following regex to " << followingRegex); } } -bool CDelimiter::operator==(const CDelimiter &rhs) const -{ - if (m_Valid != rhs.m_Valid || - m_HaveFollowingRegex != rhs.m_HaveFollowingRegex || - m_WaiveFollowingRegexAfterTime != rhs.m_WaiveFollowingRegexAfterTime || - m_Quote != rhs.m_Quote || - m_Escape != rhs.m_Escape) - { +bool CDelimiter::operator==(const CDelimiter& rhs) const { + if (m_Valid != rhs.m_Valid || m_HaveFollowingRegex != rhs.m_HaveFollowingRegex || + m_WaiveFollowingRegexAfterTime != rhs.m_WaiveFollowingRegexAfterTime || m_Quote != rhs.m_Quote || m_Escape != rhs.m_Escape) { return false; } // Only test more complex conditions if simple ones passed - if (m_Valid) - { - if (m_Delimiter.str() != rhs.m_Delimiter.str()) - { + if (m_Valid) { + if (m_Delimiter.str() != rhs.m_Delimiter.str()) { return false; } } - if (m_HaveFollowingRegex) - { - if (m_FollowingRegex.str() != rhs.m_FollowingRegex.str()) - { + if (m_HaveFollowingRegex) { + if (m_FollowingRegex.str() != rhs.m_FollowingRegex.str()) { return false; } } @@ -91,92 +71,65 @@ bool CDelimiter::operator==(const CDelimiter &rhs) const return true; } -bool CDelimiter::operator!=(const CDelimiter &rhs) const -{ +bool CDelimiter::operator!=(const CDelimiter& rhs) const { return !this->operator==(rhs); } // Check whether the text that followed the primary delimiter was acceptable -bool CDelimiter::isFollowingTextAcceptable(size_t searchPos, - const std::string &str, - bool timePassed) const -{ +bool CDelimiter::isFollowingTextAcceptable(size_t searchPos, const std::string& str, bool timePassed) const { bool answer(false); - if (m_HaveFollowingRegex) - { - if (m_WaiveFollowingRegexAfterTime && - timePassed && - searchPos == str.length()) - { + if (m_HaveFollowingRegex) { + if (m_WaiveFollowingRegexAfterTime && timePassed && searchPos == str.length()) { answer = true; - } - else - { + } else { size_t foundPos(0); - bool found = m_FollowingRegex.search(searchPos, - str, - foundPos); - if (found && foundPos == searchPos) - { + bool found = m_FollowingRegex.search(searchPos, str, foundPos); + if (found && foundPos == searchPos) { answer = true; } } - } - else - { + } else { answer = true; } return answer; } -bool CDelimiter::valid() const -{ +bool CDelimiter::valid() const { return m_Valid; } -std::string CDelimiter::delimiter() const -{ +std::string CDelimiter::delimiter() const { return m_Delimiter.str(); } -void CDelimiter::tokenise(const std::string &str, - CStringUtils::TStrVec &tokens, - std::string &remainder) const -{ +void CDelimiter::tokenise(const std::string& str, CStringUtils::TStrVec& tokens, std::string& remainder) const { std::string exampleDelimiter; this->tokenise(str, false, tokens, exampleDelimiter, remainder); } -void CDelimiter::tokenise(const std::string &str, - bool timePassed, - CStringUtils::TStrVec &tokens, - std::string &remainder) const -{ +void CDelimiter::tokenise(const std::string& str, bool timePassed, CStringUtils::TStrVec& tokens, std::string& remainder) const { std::string exampleDelimiter; this->tokenise(str, timePassed, tokens, exampleDelimiter, remainder); } -void CDelimiter::tokenise(const std::string &str, - CStringUtils::TStrVec &tokens, - std::string &exampleDelimiter, - std::string &remainder) const -{ +void CDelimiter::tokenise(const std::string& str, + CStringUtils::TStrVec& tokens, + std::string& exampleDelimiter, + std::string& remainder) const { this->tokenise(str, false, tokens, exampleDelimiter, remainder); } -void CDelimiter::tokenise(const std::string &str, +void CDelimiter::tokenise(const std::string& str, bool timePassed, - CStringUtils::TStrVec &tokens, - std::string &exampleDelimiter, - std::string &remainder) const -{ + CStringUtils::TStrVec& tokens, + std::string& exampleDelimiter, + std::string& remainder) const { tokens.clear(); exampleDelimiter.clear(); - if (!m_Valid) - { + if (!m_Valid) { LOG_ERROR("Cannot tokenise using invalid delimiter"); remainder.clear(); return; @@ -189,13 +142,11 @@ void CDelimiter::tokenise(const std::string &str, bool expectingQuote(false); - for (;;) - { + for (;;) { size_t quotePos(this->getNextQuote(str, searchPos)); // Check if the very first character is a quote - if (quotePos == 0) - { + if (quotePos == 0) { searchPos = 1; tokenStartPos = 1; expectingQuote = true; @@ -204,8 +155,7 @@ void CDelimiter::tokenise(const std::string &str, // If we're expecting a quote and don't find one, the rest of the string // is remainder - if (expectingQuote && quotePos == std::string::npos) - { + if (expectingQuote && quotePos == std::string::npos) { // Don't unescape the result, as this might be from a partial read // that needs to be prepended to the next read remainder.assign(str, tokenStartPos, std::string::npos); @@ -214,21 +164,14 @@ void CDelimiter::tokenise(const std::string &str, } // Search for the delimiter - bool found(m_Delimiter.search(expectingQuote ? (quotePos + 1) : searchPos, - str, - delimStartPos, - delimLength)); - if (!found) - { - if (expectingQuote && quotePos < str.length()) - { + bool found(m_Delimiter.search(expectingQuote ? (quotePos + 1) : searchPos, str, delimStartPos, delimLength)); + if (!found) { + if (expectingQuote && quotePos < str.length()) { // If we're expecting a quote and find one, treat this as // another token remainder.assign(str, tokenStartPos, quotePos - tokenStartPos); CStringUtils::unEscape(m_Escape, remainder); - } - else - { + } else { // If we're not expecting a quote, don't unescape the result, // as this might be from a partial read that needs to be // prepended to the next read @@ -239,28 +182,22 @@ void CDelimiter::tokenise(const std::string &str, } // Check for stray quotes - if (!expectingQuote && quotePos <= delimStartPos) - { + if (!expectingQuote && quotePos <= delimStartPos) { LOG_WARN("String to be delimited does not conform to config:" - " quote pos " << quotePos << - " delim pos " << delimStartPos); + " quote pos " + << quotePos << " delim pos " << delimStartPos); } // Move the search position beyond the last, regardless of // whether it's acceptable as the end of the token searchPos = delimStartPos + delimLength; - if (this->isFollowingTextAcceptable(searchPos, - str, - timePassed)) - { - if (exampleDelimiter.empty()) - { + if (this->isFollowingTextAcceptable(searchPos, str, timePassed)) { + if (exampleDelimiter.empty()) { exampleDelimiter.assign(str, delimStartPos, delimLength); } size_t tokenLength(delimStartPos - tokenStartPos); - if (expectingQuote) - { + if (expectingQuote) { tokenLength = quotePos - tokenStartPos; } @@ -270,82 +207,63 @@ void CDelimiter::tokenise(const std::string &str, tokens.push_back(str.substr(tokenStartPos, tokenLength)); CStringUtils::unEscape(m_Escape, tokens.back()); - if (this->getNextQuote(str, searchPos) == searchPos) - { + if (this->getNextQuote(str, searchPos) == searchPos) { // Quote comes immediately after delimiter, so skip and // expect a quote next ++searchPos; expectingQuote = true; - } - else - { + } else { expectingQuote = false; } tokenStartPos = searchPos; - } - else - { - if (this->getNextQuote(str, searchPos) == searchPos) - { + } else { + if (this->getNextQuote(str, searchPos) == searchPos) { // Quote comes immediately after delimiter, so skip and // expect a quote next ++searchPos; expectingQuote = true; - } - else - { + } else { expectingQuote = false; } } } } -void CDelimiter::quote(char quote, char escape) -{ +void CDelimiter::quote(char quote, char escape) { m_Quote = quote; m_Escape = escape; } -char CDelimiter::quote() const -{ +char CDelimiter::quote() const { return m_Quote; } -size_t CDelimiter::getNextQuote(const std::string &str, size_t startPos) const -{ +size_t CDelimiter::getNextQuote(const std::string& str, size_t startPos) const { size_t result(std::string::npos); - if (m_Quote != '\0') - { - while (startPos < str.length()) - { + if (m_Quote != '\0') { + while (startPos < str.length()) { size_t quotePos(str.find(m_Quote, startPos)); // If no quote found at all then give up - if (quotePos == std::string::npos) - { + if (quotePos == std::string::npos) { break; } // If quote is not escaped, set result and stop - different logic is // needed for the case where the escape character is the same as the // quote character - if (m_Quote == m_Escape) - { - if (quotePos == str.length() - 1 || str[quotePos + 1] != m_Escape) - { + if (m_Quote == m_Escape) { + if (quotePos == str.length() - 1 || str[quotePos + 1] != m_Escape) { result = quotePos; break; } // Continue searching beyond the escaped quote startPos = quotePos + 2; - } - else - { - if (quotePos == startPos || str[quotePos - 1] != m_Escape) - { + } else { + if (quotePos == startPos || str[quotePos - 1] != m_Escape) { result = quotePos; break; } @@ -359,36 +277,28 @@ size_t CDelimiter::getNextQuote(const std::string &str, size_t startPos) const return result; } -std::ostream &operator<<(std::ostream &strm, const CDelimiter &delimiter) -{ +std::ostream& operator<<(std::ostream& strm, const CDelimiter& delimiter) { strm << "Delimiter { "; - if (delimiter.m_Valid) - { + if (delimiter.m_Valid) { strm << "Regex " << delimiter.m_Delimiter.str(); - if (delimiter.m_Quote != '\0') - { + if (delimiter.m_Quote != '\0') { strm << ", Quote " << delimiter.m_Quote; - if (delimiter.m_Escape != '\0') - { + if (delimiter.m_Escape != '\0') { strm << ", Escape " << delimiter.m_Escape; } } - if (delimiter.m_HaveFollowingRegex) - { + if (delimiter.m_HaveFollowingRegex) { strm << ", Following Regex " << delimiter.m_FollowingRegex.str(); } - if (delimiter.m_WaiveFollowingRegexAfterTime) - { + if (delimiter.m_WaiveFollowingRegexAfterTime) { strm << ", Following Regex Waived After Time"; } - } - else - { + } else { strm << "Invalid!"; } @@ -396,8 +306,5 @@ std::ostream &operator<<(std::ostream &strm, const CDelimiter &delimiter) return strm; } - - } } - diff --git a/lib/core/CDetachedProcessSpawner.cc b/lib/core/CDetachedProcessSpawner.cc index d59f692dcc..3f758ecbe1 100644 --- a/lib/core/CDetachedProcessSpawner.cc +++ b/lib/core/CDetachedProcessSpawner.cc @@ -28,27 +28,22 @@ #include // environ is a global variable from the C runtime library -extern char **environ; +extern char** environ; - -namespace -{ +namespace { //! Attempt to close all file descriptors except the standard ones. The //! standard file descriptors will be reopened on /dev/null in the spawned //! process. Returns false and sets errno if the actions cannot be initialised //! at all, but other errors are ignored. -bool setupFileActions(posix_spawn_file_actions_t *fileActions) -{ - if (::posix_spawn_file_actions_init(fileActions) != 0) - { +bool setupFileActions(posix_spawn_file_actions_t* fileActions) { + if (::posix_spawn_file_actions_init(fileActions) != 0) { return false; } struct rlimit rlim; ::memset(&rlim, 0, sizeof(struct rlimit)); - if (::getrlimit(RLIMIT_NOFILE, &rlim) != 0) - { + if (::getrlimit(RLIMIT_NOFILE, &rlim) != 0) { rlim.rlim_cur = 36; // POSIX default } @@ -58,25 +53,18 @@ bool setupFileActions(posix_spawn_file_actions_t *fileActions) // in reality it's unlikely that any file descriptors above a million will // be open at the time this function is called. int maxFd(rlim.rlim_cur > 1000000 ? 1000000 : static_cast(rlim.rlim_cur)); - for (int fd = 0; fd <= maxFd; ++fd) - { - if (fd == STDIN_FILENO) - { + for (int fd = 0; fd <= maxFd; ++fd) { + if (fd == STDIN_FILENO) { ::posix_spawn_file_actions_addopen(fileActions, fd, "/dev/null", O_RDONLY, S_IRUSR); - } - else if (fd == STDOUT_FILENO || fd == STDERR_FILENO) - { + } else if (fd == STDOUT_FILENO || fd == STDERR_FILENO) { ::posix_spawn_file_actions_addopen(fileActions, fd, "/dev/null", O_WRONLY, S_IWUSR); - } - else - { + } else { // Close other files that are open. There is a race condition here, // in that files could be opened or closed between this code running // and the posix_spawn() function being called. However, this would // violate the restrictions stated in the contract detailed in the // Doxygen description of this class. - if (::fcntl(fd, F_GETFL) != -1) - { + if (::fcntl(fd, F_GETFL) != -1) { ::posix_spawn_file_actions_addclose(fileActions, fd); } } @@ -84,231 +72,169 @@ bool setupFileActions(posix_spawn_file_actions_t *fileActions) return true; } - } -namespace ml -{ -namespace core -{ -namespace detail -{ - -class CTrackerThread : public CThread -{ - public: - using TPidSet = std::set; - - public: - CTrackerThread() - : m_Shutdown(false), - m_Condition(m_Mutex) - { - } +namespace ml { +namespace core { +namespace detail { - //! Mutex is accessible so the code outside the class can avoid race - //! conditions. - CMutex &mutex() - { - return m_Mutex; - } +class CTrackerThread : public CThread { +public: + using TPidSet = std::set; - //! Add a PID to track. - void addPid(CProcess::TPid pid) - { - CScopedLock lock(m_Mutex); - m_Pids.insert(pid); - m_Condition.signal(); - } +public: + CTrackerThread() : m_Shutdown(false), m_Condition(m_Mutex) {} - bool terminatePid(CProcess::TPid pid) - { - if (!this->havePid(pid)) - { - LOG_ERROR("Will not attempt to kill process " << pid << ": not a child process"); - return false; - } + //! Mutex is accessible so the code outside the class can avoid race + //! conditions. + CMutex& mutex() { return m_Mutex; } - if (::kill(pid, SIGTERM) == -1) - { - // Don't log an error if the process exited normally in between - // checking whether it was our child process and killing it - if (errno != ESRCH) - { - LOG_ERROR("Failed to kill process " << pid << ": " << ::strerror(errno)); - } - else - { - // But log at debug in case there's a bug in this area - LOG_DEBUG("No such process while trying to kill PID " << pid); - } - return false; - } + //! Add a PID to track. + void addPid(CProcess::TPid pid) { + CScopedLock lock(m_Mutex); + m_Pids.insert(pid); + m_Condition.signal(); + } - return true; + bool terminatePid(CProcess::TPid pid) { + if (!this->havePid(pid)) { + LOG_ERROR("Will not attempt to kill process " << pid << ": not a child process"); + return false; } - bool havePid(CProcess::TPid pid) const - { - if (pid <= 0) - { - return false; + if (::kill(pid, SIGTERM) == -1) { + // Don't log an error if the process exited normally in between + // checking whether it was our child process and killing it + if (errno != ESRCH) { + LOG_ERROR("Failed to kill process " << pid << ": " << ::strerror(errno)); + } else { + // But log at debug in case there's a bug in this area + LOG_DEBUG("No such process while trying to kill PID " << pid); } + return false; + } + + return true; + } - CScopedLock lock(m_Mutex); - // Do an extra cycle of waiting for zombies, so we give the most - // up-to-date answer possible - const_cast(this)->checkForDeadChildren(); - return m_Pids.find(pid) != m_Pids.end(); + bool havePid(CProcess::TPid pid) const { + if (pid <= 0) { + return false; } - protected: - virtual void run() - { - CScopedLock lock(m_Mutex); - - while (!m_Shutdown) - { - // Reap zombies every 50ms if child processes are running, - // otherwise wait for a child process to start. - if (m_Pids.empty()) - { - m_Condition.wait(); - } - else - { - m_Condition.wait(50); - } + CScopedLock lock(m_Mutex); + // Do an extra cycle of waiting for zombies, so we give the most + // up-to-date answer possible + const_cast(this)->checkForDeadChildren(); + return m_Pids.find(pid) != m_Pids.end(); + } - this->checkForDeadChildren(); +protected: + virtual void run() { + CScopedLock lock(m_Mutex); + + while (!m_Shutdown) { + // Reap zombies every 50ms if child processes are running, + // otherwise wait for a child process to start. + if (m_Pids.empty()) { + m_Condition.wait(); + } else { + m_Condition.wait(50); } - } - virtual void shutdown() - { - LOG_DEBUG("Shutting down spawned process tracker thread"); - CScopedLock lock(m_Mutex); - m_Shutdown = true; - m_Condition.signal(); + this->checkForDeadChildren(); } + } + + virtual void shutdown() { + LOG_DEBUG("Shutting down spawned process tracker thread"); + CScopedLock lock(m_Mutex); + m_Shutdown = true; + m_Condition.signal(); + } - private: - //! Reap zombie child processes and adjust the set of live child PIDs - //! accordingly. MUST be called with m_Mutex locked. - void checkForDeadChildren() - { - int status = 0; - for (;;) - { - CProcess::TPid pid = ::waitpid(-1, &status, WNOHANG); - // 0 means there are child processes but none have died - if (pid == 0) - { +private: + //! Reap zombie child processes and adjust the set of live child PIDs + //! accordingly. MUST be called with m_Mutex locked. + void checkForDeadChildren() { + int status = 0; + for (;;) { + CProcess::TPid pid = ::waitpid(-1, &status, WNOHANG); + // 0 means there are child processes but none have died + if (pid == 0) { + break; + } + // -1 means error + if (pid == -1) { + if (errno != EINTR) { break; } - // -1 means error - if (pid == -1) - { - if (errno != EINTR) - { - break; - } - } - else - { - if (WIFSIGNALED(status)) - { - int signal = WTERMSIG(status); - if (signal == SIGTERM) - { - // We expect this when a job is force-closed, so log - // at a lower level - LOG_INFO("Child process with PID " << pid << - " was terminated by signal " << signal); - } - else - { - // This should never happen if the system is working - // normally - possible reasons are the Linux OOM - // killer, manual intervention and bugs that cause - // access violations - LOG_ERROR("Child process with PID " << pid << - " was terminated by signal " << signal); - } + } else { + if (WIFSIGNALED(status)) { + int signal = WTERMSIG(status); + if (signal == SIGTERM) { + // We expect this when a job is force-closed, so log + // at a lower level + LOG_INFO("Child process with PID " << pid << " was terminated by signal " << signal); + } else { + // This should never happen if the system is working + // normally - possible reasons are the Linux OOM + // killer, manual intervention and bugs that cause + // access violations + LOG_ERROR("Child process with PID " << pid << " was terminated by signal " << signal); } - else - { - int exitCode = WEXITSTATUS(status); - if (exitCode == 0) - { - // This is the happy case - LOG_DEBUG("Child process with PID " << pid << " has exited"); - } - else - { - LOG_WARN("Child process with PID " << pid << - " has exited with exit code " << exitCode); - } + } else { + int exitCode = WEXITSTATUS(status); + if (exitCode == 0) { + // This is the happy case + LOG_DEBUG("Child process with PID " << pid << " has exited"); + } else { + LOG_WARN("Child process with PID " << pid << " has exited with exit code " << exitCode); } - m_Pids.erase(pid); } + m_Pids.erase(pid); } } + } - private: - bool m_Shutdown; - TPidSet m_Pids; - mutable CMutex m_Mutex; - CCondition m_Condition; +private: + bool m_Shutdown; + TPidSet m_Pids; + mutable CMutex m_Mutex; + CCondition m_Condition; }; - } - -CDetachedProcessSpawner::CDetachedProcessSpawner(const TStrVec &permittedProcessPaths) - : m_PermittedProcessPaths(permittedProcessPaths), - m_TrackerThread(boost::make_shared()) -{ - if (m_TrackerThread->start() == false) - { +CDetachedProcessSpawner::CDetachedProcessSpawner(const TStrVec& permittedProcessPaths) + : m_PermittedProcessPaths(permittedProcessPaths), m_TrackerThread(boost::make_shared()) { + if (m_TrackerThread->start() == false) { LOG_ERROR("Failed to start spawned process tracker thread"); } } -CDetachedProcessSpawner::~CDetachedProcessSpawner() -{ - if (m_TrackerThread->stop() == false) - { +CDetachedProcessSpawner::~CDetachedProcessSpawner() { + if (m_TrackerThread->stop() == false) { LOG_ERROR("Failed to stop spawned process tracker thread"); } } -bool CDetachedProcessSpawner::spawn(const std::string &processPath, const TStrVec &args) -{ +bool CDetachedProcessSpawner::spawn(const std::string& processPath, const TStrVec& args) { CProcess::TPid dummy(0); return this->spawn(processPath, args, dummy); } -bool CDetachedProcessSpawner::spawn(const std::string &processPath, - const TStrVec &args, - CProcess::TPid &childPid) -{ - if (std::find(m_PermittedProcessPaths.begin(), - m_PermittedProcessPaths.end(), - processPath) == m_PermittedProcessPaths.end()) - { +bool CDetachedProcessSpawner::spawn(const std::string& processPath, const TStrVec& args, CProcess::TPid& childPid) { + if (std::find(m_PermittedProcessPaths.begin(), m_PermittedProcessPaths.end(), processPath) == m_PermittedProcessPaths.end()) { LOG_ERROR("Spawning process '" << processPath << "' is not permitted"); return false; } - if (::access(processPath.c_str(), X_OK) != 0) - { - LOG_ERROR("Cannot execute '" << processPath << "': " << - ::strerror(errno)); + if (::access(processPath.c_str(), X_OK) != 0) { + LOG_ERROR("Cannot execute '" << processPath << "': " << ::strerror(errno)); return false; } - using TCharPVec = std::vector; + using TCharPVec = std::vector; // Size of argv is two bigger than the number of arguments because: // 1) We add the program name at the beginning // 2) The list of arguments must be terminated by a NULL pointer @@ -317,25 +243,20 @@ bool CDetachedProcessSpawner::spawn(const std::string &processPath, // These const_casts may cause const data to get modified BUT only in the // child post-fork, so this won't corrupt parent process data - argv.push_back(const_cast(processPath.c_str())); - for (size_t index = 0; index < args.size(); ++index) - { - argv.push_back(const_cast(args[index].c_str())); + argv.push_back(const_cast(processPath.c_str())); + for (size_t index = 0; index < args.size(); ++index) { + argv.push_back(const_cast(args[index].c_str())); } - argv.push_back(static_cast(0)); + argv.push_back(static_cast(0)); posix_spawn_file_actions_t fileActions; - if (setupFileActions(&fileActions) == false) - { - LOG_ERROR("Failed to set up file actions prior to spawn of '" << - processPath << "': " << ::strerror(errno)); + if (setupFileActions(&fileActions) == false) { + LOG_ERROR("Failed to set up file actions prior to spawn of '" << processPath << "': " << ::strerror(errno)); return false; } posix_spawnattr_t spawnAttributes; - if (::posix_spawnattr_init(&spawnAttributes) != 0) - { - LOG_ERROR("Failed to set up spawn attributes prior to spawn of '" << - processPath << "': " << ::strerror(errno)); + if (::posix_spawnattr_init(&spawnAttributes) != 0) { + LOG_ERROR("Failed to set up spawn attributes prior to spawn of '" << processPath << "': " << ::strerror(errno)); return false; } ::posix_spawnattr_setflags(&spawnAttributes, POSIX_SPAWN_SETPGROUP); @@ -346,20 +267,13 @@ bool CDetachedProcessSpawner::spawn(const std::string &processPath, // quickly CScopedLock lock(m_TrackerThread->mutex()); - int err(::posix_spawn(&childPid, - processPath.c_str(), - &fileActions, - &spawnAttributes, - &argv[0], - environ)); + int err(::posix_spawn(&childPid, processPath.c_str(), &fileActions, &spawnAttributes, &argv[0], environ)); ::posix_spawn_file_actions_destroy(&fileActions); ::posix_spawnattr_destroy(&spawnAttributes); - if (err != 0) - { - LOG_ERROR("Failed to spawn '" << processPath << "': " << - ::strerror(err)); + if (err != 0) { + LOG_ERROR("Failed to spawn '" << processPath << "': " << ::strerror(err)); return false; } @@ -371,17 +285,12 @@ bool CDetachedProcessSpawner::spawn(const std::string &processPath, return true; } -bool CDetachedProcessSpawner::terminateChild(CProcess::TPid pid) -{ +bool CDetachedProcessSpawner::terminateChild(CProcess::TPid pid) { return m_TrackerThread->terminatePid(pid); } -bool CDetachedProcessSpawner::hasChild(CProcess::TPid pid) const -{ +bool CDetachedProcessSpawner::hasChild(CProcess::TPid pid) const { return m_TrackerThread->havePid(pid); } - - } } - diff --git a/lib/core/CDetachedProcessSpawner_Windows.cc b/lib/core/CDetachedProcessSpawner_Windows.cc index 082b3df088..97274d4d0a 100644 --- a/lib/core/CDetachedProcessSpawner_Windows.cc +++ b/lib/core/CDetachedProcessSpawner_Windows.cc @@ -18,200 +18,151 @@ #include +namespace ml { +namespace core { +namespace detail { -namespace ml -{ -namespace core -{ -namespace detail -{ - -class CTrackerThread : public CThread -{ - public: - using TPidHandleMap = std::map; - - public: - CTrackerThread() - : m_Shutdown(false), - m_Condition(m_Mutex) - { - } +class CTrackerThread : public CThread { +public: + using TPidHandleMap = std::map; - virtual ~CTrackerThread() - { - // Close the handles to any child processes that outlived us - CScopedLock lock(m_Mutex); +public: + CTrackerThread() : m_Shutdown(false), m_Condition(m_Mutex) {} - for (const auto &entry : m_Pids) - { - CloseHandle(entry.second); - } - } + virtual ~CTrackerThread() { + // Close the handles to any child processes that outlived us + CScopedLock lock(m_Mutex); - //! Mutex is accessible so the code outside the class can avoid race - //! conditions. - CMutex &mutex() - { - return m_Mutex; + for (const auto& entry : m_Pids) { + CloseHandle(entry.second); } + } - //! Add a PID to track, together with its corresponding process handle. - void addPid(CProcess::TPid pid, HANDLE processHandle) - { - CScopedLock lock(m_Mutex); - m_Pids.insert({ pid, processHandle }); - m_Condition.signal(); - } + //! Mutex is accessible so the code outside the class can avoid race + //! conditions. + CMutex& mutex() { return m_Mutex; } - bool terminatePid(CProcess::TPid pid) - { - HANDLE handle = this->handleForPid(pid); - if (handle == INVALID_HANDLE_VALUE) - { - LOG_ERROR("Will not attempt to kill process " << pid << - ": not a child process"); - return false; - } + //! Add a PID to track, together with its corresponding process handle. + void addPid(CProcess::TPid pid, HANDLE processHandle) { + CScopedLock lock(m_Mutex); + m_Pids.insert({pid, processHandle}); + m_Condition.signal(); + } - UINT exitCode = 0; - if (TerminateProcess(handle, exitCode) == FALSE) - { - LOG_ERROR("Failed to kill process " << pid << ": " << - CWindowsError()); - return false; - } + bool terminatePid(CProcess::TPid pid) { + HANDLE handle = this->handleForPid(pid); + if (handle == INVALID_HANDLE_VALUE) { + LOG_ERROR("Will not attempt to kill process " << pid << ": not a child process"); + return false; + } - return true; + UINT exitCode = 0; + if (TerminateProcess(handle, exitCode) == FALSE) { + LOG_ERROR("Failed to kill process " << pid << ": " << CWindowsError()); + return false; } - //! Given a process ID, return the corresponding process handle. - HANDLE handleForPid(CProcess::TPid pid) const - { - if (pid == 0) - { - return INVALID_HANDLE_VALUE; - } + return true; + } - CScopedLock lock(m_Mutex); - // Do an extra cycle of waiting for zombies, so we give the most - // up-to-date answer possible - const_cast(this)->checkForDeadChildren(); - auto iter = m_Pids.find(pid); - return iter == m_Pids.end() ? INVALID_HANDLE_VALUE : iter->second; + //! Given a process ID, return the corresponding process handle. + HANDLE handleForPid(CProcess::TPid pid) const { + if (pid == 0) { + return INVALID_HANDLE_VALUE; } - protected: - virtual void run() - { - CScopedLock lock(m_Mutex); - - while (!m_Shutdown) - { - // Reap zombies every 50ms if child processes are running, - // otherwise wait for a child process to start. - if (m_Pids.empty()) - { - m_Condition.wait(); - } - else - { - m_Condition.wait(50); - } - - this->checkForDeadChildren(); + CScopedLock lock(m_Mutex); + // Do an extra cycle of waiting for zombies, so we give the most + // up-to-date answer possible + const_cast(this)->checkForDeadChildren(); + auto iter = m_Pids.find(pid); + return iter == m_Pids.end() ? INVALID_HANDLE_VALUE : iter->second; + } + +protected: + virtual void run() { + CScopedLock lock(m_Mutex); + + while (!m_Shutdown) { + // Reap zombies every 50ms if child processes are running, + // otherwise wait for a child process to start. + if (m_Pids.empty()) { + m_Condition.wait(); + } else { + m_Condition.wait(50); } - } - virtual void shutdown() - { - LOG_DEBUG("Shutting down spawned process tracker thread"); - CScopedLock lock(m_Mutex); - m_Shutdown = true; - m_Condition.signal(); + this->checkForDeadChildren(); } + } + + virtual void shutdown() { + LOG_DEBUG("Shutting down spawned process tracker thread"); + CScopedLock lock(m_Mutex); + m_Shutdown = true; + m_Condition.signal(); + } - private: - //! Reap zombie child processes and adjust the set of live child PIDs - //! accordingly. MUST be called with m_Mutex locked. - void checkForDeadChildren() - { - auto iter = m_Pids.begin(); - while (iter != m_Pids.end()) - { - // The reason for using WaitForSingleObject() here instead of - // WaitForMultipleObjects() (which would avoid the need to wait - // on a condition variable above) is that the latter function - // can only wait for 64 objects simultaneously. We could easily - // have more child processes than this, so it would lead to code - // complexity and headaches getting test coverage to use - // WaitForMultipleObjects(). - HANDLE processHandle = iter->second; - if (WaitForSingleObject(processHandle, 0) == WAIT_OBJECT_0) - { - CloseHandle(processHandle); - iter = m_Pids.erase(iter); - } - else - { - ++iter; - } +private: + //! Reap zombie child processes and adjust the set of live child PIDs + //! accordingly. MUST be called with m_Mutex locked. + void checkForDeadChildren() { + auto iter = m_Pids.begin(); + while (iter != m_Pids.end()) { + // The reason for using WaitForSingleObject() here instead of + // WaitForMultipleObjects() (which would avoid the need to wait + // on a condition variable above) is that the latter function + // can only wait for 64 objects simultaneously. We could easily + // have more child processes than this, so it would lead to code + // complexity and headaches getting test coverage to use + // WaitForMultipleObjects(). + HANDLE processHandle = iter->second; + if (WaitForSingleObject(processHandle, 0) == WAIT_OBJECT_0) { + CloseHandle(processHandle); + iter = m_Pids.erase(iter); + } else { + ++iter; } } + } - private: - bool m_Shutdown; - TPidHandleMap m_Pids; - mutable CMutex m_Mutex; - CCondition m_Condition; +private: + bool m_Shutdown; + TPidHandleMap m_Pids; + mutable CMutex m_Mutex; + CCondition m_Condition; }; - } - -CDetachedProcessSpawner::CDetachedProcessSpawner(const TStrVec &permittedProcessPaths) - : m_PermittedProcessPaths(permittedProcessPaths), - m_TrackerThread(boost::make_shared()) -{ - if (m_TrackerThread->start() == false) - { +CDetachedProcessSpawner::CDetachedProcessSpawner(const TStrVec& permittedProcessPaths) + : m_PermittedProcessPaths(permittedProcessPaths), m_TrackerThread(boost::make_shared()) { + if (m_TrackerThread->start() == false) { LOG_ERROR("Failed to start spawned process tracker thread"); } } -CDetachedProcessSpawner::~CDetachedProcessSpawner() -{ - if (m_TrackerThread->stop() == false) - { +CDetachedProcessSpawner::~CDetachedProcessSpawner() { + if (m_TrackerThread->stop() == false) { LOG_ERROR("Failed to stop spawned process tracker thread"); } } -bool CDetachedProcessSpawner::spawn(const std::string &processPath, const TStrVec &args) -{ +bool CDetachedProcessSpawner::spawn(const std::string& processPath, const TStrVec& args) { CProcess::TPid dummy(0); return this->spawn(processPath, args, dummy); } -bool CDetachedProcessSpawner::spawn(const std::string &processPath, - const TStrVec &args, - CProcess::TPid &childPid) -{ - if (std::find(m_PermittedProcessPaths.begin(), - m_PermittedProcessPaths.end(), - processPath) == m_PermittedProcessPaths.end()) - { +bool CDetachedProcessSpawner::spawn(const std::string& processPath, const TStrVec& args, CProcess::TPid& childPid) { + if (std::find(m_PermittedProcessPaths.begin(), m_PermittedProcessPaths.end(), processPath) == m_PermittedProcessPaths.end()) { LOG_ERROR("Spawning process '" << processPath << "' is not permitted"); return false; } - bool processPathHasExeExt(processPath.length() > 4 && - processPath.compare(processPath.length() - 4, 4, ".exe") == 0); + bool processPathHasExeExt(processPath.length() > 4 && processPath.compare(processPath.length() - 4, 4, ".exe") == 0); // Windows takes command lines as a single string std::string cmdLine(CShellArgQuoter::quote(processPath)); - for (size_t index = 0; index < args.size(); ++index) - { + for (size_t index = 0; index < args.size(); ++index) { cmdLine += ' '; cmdLine += CShellArgQuoter::quote(args[index]); } @@ -230,7 +181,7 @@ bool CDetachedProcessSpawner::spawn(const std::string &processPath, CScopedLock lock(m_TrackerThread->mutex()); if (CreateProcess((processPathHasExeExt ? processPath : processPath + ".exe").c_str(), - const_cast(cmdLine.c_str()), + const_cast(cmdLine.c_str()), 0, 0, FALSE, @@ -251,8 +202,7 @@ bool CDetachedProcessSpawner::spawn(const std::string &processPath, 0, 0, &startupInfo, - &processInformation) == FALSE) - { + &processInformation) == FALSE) { LOG_ERROR("Failed to spawn '" << processPath << "': " << CWindowsError()); return false; } @@ -268,17 +218,12 @@ bool CDetachedProcessSpawner::spawn(const std::string &processPath, return true; } -bool CDetachedProcessSpawner::terminateChild(CProcess::TPid pid) -{ +bool CDetachedProcessSpawner::terminateChild(CProcess::TPid pid) { return m_TrackerThread->terminatePid(pid); } -bool CDetachedProcessSpawner::hasChild(CProcess::TPid pid) const -{ +bool CDetachedProcessSpawner::hasChild(CProcess::TPid pid) const { return m_TrackerThread->handleForPid(pid) != INVALID_HANDLE_VALUE; } - - } } - diff --git a/lib/core/CDualThreadStreamBuf.cc b/lib/core/CDualThreadStreamBuf.cc index 47f8c0f9da..68dd36ec21 100644 --- a/lib/core/CDualThreadStreamBuf.cc +++ b/lib/core/CDualThreadStreamBuf.cc @@ -12,17 +12,12 @@ #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { // Initialise statics const size_t CDualThreadStreamBuf::DEFAULT_BUFFER_CAPACITY(65536); - CDualThreadStreamBuf::CDualThreadStreamBuf(size_t bufferCapacity) : m_WriteBuffer(new char[bufferCapacity]), m_WriteBufferCapacity(bufferCapacity), @@ -35,11 +30,10 @@ CDualThreadStreamBuf::CDualThreadStreamBuf(size_t bufferCapacity) m_WriteBytesSwapped(0), m_IntermediateBufferCondition(m_IntermediateBufferMutex), m_Eof(false), - m_FatalError(false) -{ + m_FatalError(false) { // Initialise write buffer pointers to indicate an empty buffer - char *begin(m_WriteBuffer.get()); - char *end(begin + m_WriteBufferCapacity); + char* begin(m_WriteBuffer.get()); + char* end(begin + m_WriteBufferCapacity); this->setp(begin, end); // Initialise read buffer pointers to indicate a buffer that has underflowed @@ -48,17 +42,14 @@ CDualThreadStreamBuf::CDualThreadStreamBuf(size_t bufferCapacity) this->setg(begin, end, end); } -void CDualThreadStreamBuf::signalEndOfFile() -{ +void CDualThreadStreamBuf::signalEndOfFile() { CScopedLock lock(m_IntermediateBufferMutex); - if (m_Eof) - { + if (m_Eof) { return; } - if (m_FatalError) - { + if (m_FatalError) { // If there's been a fatal error we don't care about losing data, so // just set the end-of-file flag and return m_Eof = true; @@ -66,16 +57,12 @@ void CDualThreadStreamBuf::signalEndOfFile() return; } - if (this->pptr() > this->pbase()) - { + if (this->pptr() > this->pbase()) { // Swapping the write buffer should wake up the reader thread - if (this->swapWriteBuffer() == false) - { + if (this->swapWriteBuffer() == false) { LOG_ERROR("Failed to swap write buffer on setting end-of-file"); } - } - else - { + } else { // We don't need to swap the write buffer, but we do need to wake up // the reader thread m_IntermediateBufferCondition.signal(); @@ -88,17 +75,15 @@ void CDualThreadStreamBuf::signalEndOfFile() m_Eof = true; } -bool CDualThreadStreamBuf::endOfFile() const -{ +bool CDualThreadStreamBuf::endOfFile() const { return m_Eof; } -void CDualThreadStreamBuf::signalFatalError() -{ +void CDualThreadStreamBuf::signalFatalError() { CScopedLock lock(m_IntermediateBufferMutex); // Chuck away the current read buffer - char *begin(m_ReadBuffer.get()); + char* begin(m_ReadBuffer.get()); this->setg(begin, begin, begin); // Set a flag to indicate that future reads and writes should fail @@ -107,13 +92,11 @@ void CDualThreadStreamBuf::signalFatalError() m_IntermediateBufferCondition.signal(); } -bool CDualThreadStreamBuf::hasFatalError() const -{ +bool CDualThreadStreamBuf::hasFatalError() const { return m_FatalError; } -std::streamsize CDualThreadStreamBuf::showmanyc() -{ +std::streamsize CDualThreadStreamBuf::showmanyc() { // Note that, unlike a file, we have no way of finding out what the total // amount of unread data is @@ -122,8 +105,7 @@ std::streamsize CDualThreadStreamBuf::showmanyc() CScopedLock lock(m_IntermediateBufferMutex); - if (!m_FatalError) - { + if (!m_FatalError) { // Add on unread contents of intermediate buffer ret += (m_IntermediateBufferEnd - m_IntermediateBuffer.get()); } @@ -131,21 +113,17 @@ std::streamsize CDualThreadStreamBuf::showmanyc() return ret; } -int CDualThreadStreamBuf::sync() -{ +int CDualThreadStreamBuf::sync() { CScopedLock lock(m_IntermediateBufferMutex); - if (m_FatalError) - { + if (m_FatalError) { return -1; } // If there is no data in the write buffer then sync is a no-op - if (this->pptr() > this->pbase()) - { + if (this->pptr() > this->pbase()) { // Swapping the write buffer should wake up the reader thread - if (this->swapWriteBuffer() == false) - { + if (this->swapWriteBuffer() == false) { LOG_ERROR("Failed to swap write buffer on sync"); return -1; } @@ -154,35 +132,28 @@ int CDualThreadStreamBuf::sync() return 0; } -std::streamsize CDualThreadStreamBuf::xsgetn(char *s, std::streamsize n) -{ +std::streamsize CDualThreadStreamBuf::xsgetn(char* s, std::streamsize n) { // Not locked; expected to be called only in the reader thread (see Doxygen // comments) std::streamsize ret(0); - if (m_FatalError) - { + if (m_FatalError) { return ret; } - while (ret < n) - { + while (ret < n) { std::streamsize bufLen(this->egptr() - this->gptr()); - if (bufLen > 0) - { + if (bufLen > 0) { std::streamsize copyLen(std::min(bufLen, n - ret)); ::memcpy(s, this->gptr(), static_cast(copyLen)); s += copyLen; ret += copyLen; this->gbump(static_cast(copyLen)); - } - else - { + } else { // uflow() will call underflow(), so may block, but the buffers are // hopefully big enough that this should be rare int c(this->uflow()); - if (c == traits_type::eof()) - { + if (c == traits_type::eof()) { break; } *s = char(c); @@ -194,22 +165,18 @@ std::streamsize CDualThreadStreamBuf::xsgetn(char *s, std::streamsize n) return ret; } -int CDualThreadStreamBuf::underflow() -{ +int CDualThreadStreamBuf::underflow() { CScopedLock lock(m_IntermediateBufferMutex); - if (m_FatalError || this->swapReadBuffer() == false) - { + if (m_FatalError || this->swapReadBuffer() == false) { return traits_type::eof(); } return int(m_ReadBuffer[0]); } -int CDualThreadStreamBuf::pbackfail(int c) -{ - if (c == traits_type::eof()) - { +int CDualThreadStreamBuf::pbackfail(int c) { + if (c == traits_type::eof()) { // The standard says that pbackfail() may be called with an argument of // EOF to indicate that the current character at the ungotten position // should be retained. Because this class does not support seeking, we @@ -229,22 +196,16 @@ int CDualThreadStreamBuf::pbackfail(int c) std::streamsize countBeforeCurrent(this->gptr() - this->eback()); std::streamsize countAfterCurrent(this->egptr() - this->gptr()); - char *newBegin(newReadBuffer.get()); - char *newCurrent(newBegin + countBeforeCurrent); - char *newEnd(newCurrent + 1 + countAfterCurrent); + char* newBegin(newReadBuffer.get()); + char* newCurrent(newBegin + countBeforeCurrent); + char* newEnd(newCurrent + 1 + countAfterCurrent); - if (countBeforeCurrent > 0) - { - ::memcpy(newBegin, - this->eback(), - static_cast(countBeforeCurrent)); + if (countBeforeCurrent > 0) { + ::memcpy(newBegin, this->eback(), static_cast(countBeforeCurrent)); } *newCurrent = char(c); - if (countAfterCurrent > 0) - { - ::memcpy(newCurrent + 1, - this->gptr(), - static_cast(countAfterCurrent)); + if (countAfterCurrent > 0) { + ::memcpy(newCurrent + 1, this->gptr(), static_cast(countAfterCurrent)); } m_ReadBuffer.swap(newReadBuffer); @@ -253,42 +214,34 @@ int CDualThreadStreamBuf::pbackfail(int c) return c; } -std::streamsize CDualThreadStreamBuf::xsputn(const char *s, std::streamsize n) -{ +std::streamsize CDualThreadStreamBuf::xsputn(const char* s, std::streamsize n) { // Not locked; expected to be called only in the writer thread (see Doxygen // comments) std::streamsize ret(0); - if (m_Eof) - { + if (m_Eof) { LOG_ERROR("Inconsistency - trying to add data to stream buffer after end-of-file"); return ret; } - if (m_FatalError) - { + if (m_FatalError) { return ret; } - while (ret < n) - { + while (ret < n) { std::streamsize bufAvail(this->epptr() - this->pptr()); - if (bufAvail > 0) - { + if (bufAvail > 0) { std::streamsize copyLen(std::min(bufAvail, n - ret)); ::memcpy(this->pptr(), s, static_cast(copyLen)); s += copyLen; ret += copyLen; this->pbump(static_cast(copyLen)); - } - else - { + } else { // overflow() may block, but the buffers are hopefully big enough // that this should be rare int c(this->overflow(int(*s))); - if (c == traits_type::eof()) - { + if (c == traits_type::eof()) { break; } ++s; @@ -299,25 +252,20 @@ std::streamsize CDualThreadStreamBuf::xsputn(const char *s, std::streamsize n) return ret; } -int CDualThreadStreamBuf::overflow(int c) -{ +int CDualThreadStreamBuf::overflow(int c) { int ret(traits_type::eof()); CScopedLock lock(m_IntermediateBufferMutex); - if (m_Eof || m_FatalError || this->swapWriteBuffer() == false) - { + if (m_Eof || m_FatalError || this->swapWriteBuffer() == false) { return ret; } - if (c == ret) - { + if (c == ret) { m_Eof = true; // If the argument indicated EOF, we don't put it in the new buffer ret = traits_type::not_eof(c); - } - else - { + } else { m_WriteBuffer[0] = char(c); this->pbump(1); ret = c; @@ -326,38 +274,28 @@ int CDualThreadStreamBuf::overflow(int c) return ret; } -std::streampos CDualThreadStreamBuf::seekoff(std::streamoff off, - std::ios_base::seekdir way, - std::ios_base::openmode which) -{ +std::streampos CDualThreadStreamBuf::seekoff(std::streamoff off, std::ios_base::seekdir way, std::ios_base::openmode which) { std::streampos pos(static_cast(-1)); - if (off != 0) - { + if (off != 0) { LOG_ERROR("Seeking not supported on stream buffer"); return pos; } - if (way != std::ios_base::cur) - { + if (way != std::ios_base::cur) { LOG_ERROR("Seeking from beginning or end not supported on stream buffer"); return pos; } - if (which == std::ios_base::in) - { + if (which == std::ios_base::in) { CScopedLock lock(m_IntermediateBufferMutex); pos = static_cast(m_ReadBytesSwapped); pos -= (this->egptr() - this->gptr()); - } - else if (which == std::ios_base::out) - { + } else if (which == std::ios_base::out) { CScopedLock lock(m_IntermediateBufferMutex); pos = static_cast(m_WriteBytesSwapped); pos += (this->pptr() - this->pbase()); - } - else - { + } else { LOG_ERROR("Unexpected mode for seek on stream buffer: " << which); } @@ -365,14 +303,11 @@ std::streampos CDualThreadStreamBuf::seekoff(std::streamoff off, } // NB: m_IntermediateBufferMutex MUST be locked when this method is called -bool CDualThreadStreamBuf::swapWriteBuffer() -{ +bool CDualThreadStreamBuf::swapWriteBuffer() { // Wait until the intermediate buffer is empty - while (m_IntermediateBufferEnd > m_IntermediateBuffer.get()) - { + while (m_IntermediateBufferEnd > m_IntermediateBuffer.get()) { m_IntermediateBufferCondition.wait(); - if (m_FatalError) - { + if (m_FatalError) { return false; } } @@ -383,8 +318,8 @@ bool CDualThreadStreamBuf::swapWriteBuffer() m_IntermediateBufferEnd = this->pptr(); m_WriteBuffer.swap(m_IntermediateBuffer); std::swap(m_WriteBufferCapacity, m_IntermediateBufferCapacity); - char *begin(m_WriteBuffer.get()); - char *end(begin + m_WriteBufferCapacity); + char* begin(m_WriteBuffer.get()); + char* end(begin + m_WriteBufferCapacity); this->setp(begin, end); // Signal any waiting reader @@ -394,29 +329,22 @@ bool CDualThreadStreamBuf::swapWriteBuffer() } // NB: m_IntermediateBufferMutex MUST be locked when this method is called -bool CDualThreadStreamBuf::swapReadBuffer() -{ +bool CDualThreadStreamBuf::swapReadBuffer() { // Wait until the intermediate buffer contains data - while (!m_Eof && - m_IntermediateBufferEnd == m_IntermediateBuffer.get()) - { + while (!m_Eof && m_IntermediateBufferEnd == m_IntermediateBuffer.get()) { m_IntermediateBufferCondition.wait(); - if (m_FatalError) - { + if (m_FatalError) { return false; } } - char *begin(m_IntermediateBuffer.get()); - char *end(m_IntermediateBufferEnd); - if (begin >= end) - { - if (!m_Eof) - { + char* begin(m_IntermediateBuffer.get()); + char* end(m_IntermediateBufferEnd); + if (begin >= end) { + if (!m_Eof) { LOG_ERROR("Inconsistency - intermediate buffer empty after wait " - "when not at end-of-file: begin = " << - static_cast(begin) << " end = " << - static_cast(end)); + "when not at end-of-file: begin = " + << static_cast(begin) << " end = " << static_cast(end)); } return false; } @@ -434,8 +362,5 @@ bool CDualThreadStreamBuf::swapReadBuffer() return true; } - - } } - diff --git a/lib/core/CFastMutex.cc b/lib/core/CFastMutex.cc index f434124823..312affb545 100644 --- a/lib/core/CFastMutex.cc +++ b/lib/core/CFastMutex.cc @@ -10,50 +10,35 @@ #include #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -CFastMutex::CFastMutex() -{ +CFastMutex::CFastMutex() { int ret(pthread_mutex_init(&m_Mutex, 0)); - if (ret != 0) - { + if (ret != 0) { LOG_WARN(::strerror(ret)); } } -CFastMutex::~CFastMutex() -{ +CFastMutex::~CFastMutex() { int ret(pthread_mutex_destroy(&m_Mutex)); - if (ret != 0) - { + if (ret != 0) { LOG_WARN(::strerror(ret)); } } -void CFastMutex::lock() -{ +void CFastMutex::lock() { int ret(pthread_mutex_lock(&m_Mutex)); - if (ret != 0) - { + if (ret != 0) { LOG_WARN(::strerror(ret)); } } -void CFastMutex::unlock() -{ +void CFastMutex::unlock() { int ret(pthread_mutex_unlock(&m_Mutex)); - if (ret != 0) - { + if (ret != 0) { LOG_WARN(::strerror(ret)); } } - - } } - diff --git a/lib/core/CFastMutex_MacOSX.cc b/lib/core/CFastMutex_MacOSX.cc index e4f713cad5..f96cd16fdf 100644 --- a/lib/core/CFastMutex_MacOSX.cc +++ b/lib/core/CFastMutex_MacOSX.cc @@ -5,35 +5,24 @@ */ #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { CFastMutex::CFastMutex() // The OSSpinLock type is just an integer, and zero means unlocked. See // "man spinlock" for details. - : m_Mutex(0) -{ + : m_Mutex(0) { } -CFastMutex::~CFastMutex() -{ +CFastMutex::~CFastMutex() { } -void CFastMutex::lock() -{ +void CFastMutex::lock() { OSSpinLockLock(&m_Mutex); } -void CFastMutex::unlock() -{ +void CFastMutex::unlock() { OSSpinLockUnlock(&m_Mutex); } - - } } - diff --git a/lib/core/CFastMutex_Windows.cc b/lib/core/CFastMutex_Windows.cc index 8adb286c5c..526de0c476 100644 --- a/lib/core/CFastMutex_Windows.cc +++ b/lib/core/CFastMutex_Windows.cc @@ -5,34 +5,23 @@ */ #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -CFastMutex::CFastMutex() -{ +CFastMutex::CFastMutex() { InitializeSRWLock(&m_Mutex); } -CFastMutex::~CFastMutex() -{ +CFastMutex::~CFastMutex() { // There is no function to destroy the read/write lock on Windows } -void CFastMutex::lock() -{ +void CFastMutex::lock() { AcquireSRWLockExclusive(&m_Mutex); } -void CFastMutex::unlock() -{ +void CFastMutex::unlock() { ReleaseSRWLockExclusive(&m_Mutex); } - - } } - diff --git a/lib/core/CFileDeleter.cc b/lib/core/CFileDeleter.cc index 48a043e0a1..854834e0e2 100644 --- a/lib/core/CFileDeleter.cc +++ b/lib/core/CFileDeleter.cc @@ -11,33 +11,20 @@ #include #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -CFileDeleter::CFileDeleter(const std::string &fileName) - : m_FileName(fileName) -{ +CFileDeleter::CFileDeleter(const std::string& fileName) : m_FileName(fileName) { } -CFileDeleter::~CFileDeleter() -{ - if (m_FileName.empty()) - { +CFileDeleter::~CFileDeleter() { + if (m_FileName.empty()) { return; } - if (::remove(m_FileName.c_str()) == -1) - { - LOG_WARN("Failed to remove file " << m_FileName << - " : " << ::strerror(errno)); + if (::remove(m_FileName.c_str()) == -1) { + LOG_WARN("Failed to remove file " << m_FileName << " : " << ::strerror(errno)); } } - - } } - diff --git a/lib/core/CFlatPrefixTree.cc b/lib/core/CFlatPrefixTree.cc index 4d758050c1..c1747068c3 100644 --- a/lib/core/CFlatPrefixTree.cc +++ b/lib/core/CFlatPrefixTree.cc @@ -17,13 +17,10 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { -namespace -{ +namespace { const uint32_t NO_CHILD = std::numeric_limits::max(); const char PADDING_NODE = '$'; const char LEAF_NODE = 'l'; @@ -31,79 +28,56 @@ const char BRANCH_NODE = 'b'; const char LEAF_AND_BRANCH_NODE = '*'; const std::string EMPTY_STRING = ""; -struct SCharNotEqualTo -{ - SCharNotEqualTo(char c, std::size_t pos) - : s_Char(c), s_Pos(pos) - { - } +struct SCharNotEqualTo { + SCharNotEqualTo(char c, std::size_t pos) : s_Char(c), s_Pos(pos) {} - bool operator() (const std::string &str) - { - return str[s_Pos] != s_Char; - } + bool operator()(const std::string& str) { return str[s_Pos] != s_Char; } char s_Char; std::size_t s_Pos; }; } -CFlatPrefixTree::SNode::SNode(char c, char type, uint32_t next) - : s_Char(c), s_Type(type), s_Next(next) -{ +CFlatPrefixTree::SNode::SNode(char c, char type, uint32_t next) : s_Char(c), s_Type(type), s_Next(next) { } -bool CFlatPrefixTree::SNode::operator<(char rhs) const -{ +bool CFlatPrefixTree::SNode::operator<(char rhs) const { return s_Char < rhs; } -CFlatPrefixTree::SDistinctChar::SDistinctChar(char c, - char type, - std::size_t start, - std::size_t end) - : s_Char(c), s_Type(type), s_Start(start), s_End(end) -{ +CFlatPrefixTree::SDistinctChar::SDistinctChar(char c, char type, std::size_t start, std::size_t end) + : s_Char(c), s_Type(type), s_Start(start), s_End(end) { } -CFlatPrefixTree::CFlatPrefixTree() - : m_FlatTree() -{ +CFlatPrefixTree::CFlatPrefixTree() : m_FlatTree() { } -bool CFlatPrefixTree::build(const TStrVec &prefixes) -{ +bool CFlatPrefixTree::build(const TStrVec& prefixes) { m_FlatTree.clear(); - if (boost::algorithm::is_sorted(prefixes) == false) - { + if (boost::algorithm::is_sorted(prefixes) == false) { LOG_ERROR("FlatPrefixTree cannot be build from an unsorted vector of prefixes"); return false; } - if (prefixes.size() > 1) - { - for (std::size_t i = 0; i < prefixes.size() - 1; ++i) - { - if (prefixes[i] == prefixes[i + 1]) - { + if (prefixes.size() > 1) { + for (std::size_t i = 0; i < prefixes.size() - 1; ++i) { + if (prefixes[i] == prefixes[i + 1]) { LOG_ERROR("FlatPrefixTree cannot be build from a vector containing duplicate prefixes: " << prefixes[i]); return false; } } } - if (prefixes.empty() == false) - { + if (prefixes.empty() == false) { // Ignore empty string if present std::size_t startIndex = prefixes[0] == EMPTY_STRING ? 1 : 0; this->buildRecursively(prefixes, startIndex, prefixes.size(), 0); } - if (m_FlatTree.size() >= NO_CHILD) - { - LOG_ERROR("Failed to build the tree: " << m_FlatTree.size() - << " nodes were required; no more than " << NO_CHILD << " are supported."); + if (m_FlatTree.size() >= NO_CHILD) { + LOG_ERROR("Failed to build the tree: " << m_FlatTree.size() << " nodes were required; no more than " << NO_CHILD + << " are supported."); m_FlatTree.clear(); return false; } @@ -112,11 +86,7 @@ bool CFlatPrefixTree::build(const TStrVec &prefixes) return true; } -void CFlatPrefixTree::buildRecursively(const TStrVec &prefixes, - std::size_t prefixesStart, - std::size_t prefixesEnd, - std::size_t charPos) -{ +void CFlatPrefixTree::buildRecursively(const TStrVec& prefixes, std::size_t prefixesStart, std::size_t prefixesEnd, std::size_t charPos) { // First, we extract the distinct characters for the current character position and we // record their start/end indices in the prefixes vector. TDistinctCharVec distinctCharsWithRange; @@ -127,42 +97,36 @@ void CFlatPrefixTree::buildRecursively(const TStrVec &prefixes, // the number of distinct characters, and a node for each distinct character. m_FlatTree.push_back(SNode(PADDING_NODE, PADDING_NODE, static_cast(distinctCharsWithRange.size()))); std::size_t treeSizeBeforeNewChars = m_FlatTree.size(); - for (std::size_t i = 0; i < distinctCharsWithRange.size(); ++i) - { - SDistinctChar &distinctChar = distinctCharsWithRange[i]; + for (std::size_t i = 0; i < distinctCharsWithRange.size(); ++i) { + SDistinctChar& distinctChar = distinctCharsWithRange[i]; m_FlatTree.push_back(SNode(distinctChar.s_Char, distinctChar.s_Type, NO_CHILD)); } // Finally, for the nodes that have children, we set their next child index to the current // tree size and we recurse. - for (std::size_t i = 0; i < distinctCharsWithRange.size(); ++i) - { - SDistinctChar &distinctChar = distinctCharsWithRange[i]; - if (distinctChar.s_Type != LEAF_NODE) - { + for (std::size_t i = 0; i < distinctCharsWithRange.size(); ++i) { + SDistinctChar& distinctChar = distinctCharsWithRange[i]; + if (distinctChar.s_Type != LEAF_NODE) { m_FlatTree[treeSizeBeforeNewChars + i].s_Next = static_cast(m_FlatTree.size()); this->buildRecursively(prefixes, distinctChar.s_Start, distinctChar.s_End, charPos + 1); } } } -void CFlatPrefixTree::extractDistinctCharacters(const TStrVec &prefixes, +void CFlatPrefixTree::extractDistinctCharacters(const TStrVec& prefixes, std::size_t prefixesStart, std::size_t prefixesEnd, std::size_t charPos, - TDistinctCharVec &distinctChars) -{ + TDistinctCharVec& distinctChars) { TStrVecCItr pos = prefixes.begin() + prefixesStart; TStrVecCItr end = prefixes.begin() + prefixesEnd; - while (pos != end) - { + while (pos != end) { char leadingChar = (*pos)[charPos]; TStrVecCItr next = std::find_if(pos, end, SCharNotEqualTo(leadingChar, charPos)); std::size_t startIndex = pos - prefixes.begin(); std::size_t endIndex = next - prefixes.begin(); char type = charPos + 1 == prefixes[startIndex].length() ? LEAF_NODE : BRANCH_NODE; - if (type == LEAF_NODE && endIndex - startIndex > 1) - { + if (type == LEAF_NODE && endIndex - startIndex > 1) { type = LEAF_AND_BRANCH_NODE; ++startIndex; } @@ -172,41 +136,33 @@ void CFlatPrefixTree::extractDistinctCharacters(const TStrVec &prefixes, } } -bool CFlatPrefixTree::matchesStart(const std::string &key) const -{ +bool CFlatPrefixTree::matchesStart(const std::string& key) const { return this->matches(key.begin(), key.end(), false); } -bool CFlatPrefixTree::matchesFully(const std::string &key) const -{ +bool CFlatPrefixTree::matchesFully(const std::string& key) const { return this->matches(key.begin(), key.end(), true); } -bool CFlatPrefixTree::matchesStart(TStrCItr start, TStrCItr end) const -{ +bool CFlatPrefixTree::matchesStart(TStrCItr start, TStrCItr end) const { return this->matches(start, end, false); } -bool CFlatPrefixTree::matchesFully(TStrCItr start, TStrCItr end) const -{ +bool CFlatPrefixTree::matchesFully(TStrCItr start, TStrCItr end) const { return this->matches(start, end, true); } -bool CFlatPrefixTree::matchesStart(TStrCRItr start, TStrCRItr end) const -{ +bool CFlatPrefixTree::matchesStart(TStrCRItr start, TStrCRItr end) const { return this->matches(start, end, false); } -bool CFlatPrefixTree::matchesFully(TStrCRItr start, TStrCRItr end) const -{ +bool CFlatPrefixTree::matchesFully(TStrCRItr start, TStrCRItr end) const { return this->matches(start, end, true); } template -bool CFlatPrefixTree::matches(ITR start, ITR end, bool requireFullMatch) const -{ - if (m_FlatTree.empty() || start == end) - { +bool CFlatPrefixTree::matches(ITR start, ITR end, bool requireFullMatch) const { + if (m_FlatTree.empty() || start == end) { return false; } @@ -216,42 +172,35 @@ bool CFlatPrefixTree::matches(ITR start, ITR end, bool requireFullMatch) const TNodeVecCItr levelEnd; char currentChar; char lastMatchedType = BRANCH_NODE; - while (currentStringPos < end && currentTreeIndex != NO_CHILD) - { + while (currentStringPos < end && currentTreeIndex != NO_CHILD) { levelStart = m_FlatTree.begin() + currentTreeIndex + 1; levelEnd = levelStart + m_FlatTree[currentTreeIndex].s_Next; currentChar = *currentStringPos; TNodeVecCItr searchResult = std::lower_bound(levelStart, levelEnd, currentChar); - if (searchResult == levelEnd || searchResult->s_Char != currentChar) - { + if (searchResult == levelEnd || searchResult->s_Char != currentChar) { break; } ++currentStringPos; currentTreeIndex = searchResult->s_Next; lastMatchedType = searchResult->s_Type; - if (requireFullMatch == false && lastMatchedType != BRANCH_NODE) - { + if (requireFullMatch == false && lastMatchedType != BRANCH_NODE) { break; } } - if (lastMatchedType != BRANCH_NODE) - { + if (lastMatchedType != BRANCH_NODE) { return requireFullMatch ? currentStringPos == end : true; } return false; } -void CFlatPrefixTree::clear() -{ +void CFlatPrefixTree::clear() { m_FlatTree.clear(); } -std::string CFlatPrefixTree::print() const -{ +std::string CFlatPrefixTree::print() const { std::string result; result += "["; - for (std::size_t i = 0; i < m_FlatTree.size(); ++i) - { + for (std::size_t i = 0; i < m_FlatTree.size(); ++i) { result += "("; result += m_FlatTree[i].s_Char; result += ", "; @@ -263,6 +212,5 @@ std::string CFlatPrefixTree::print() const result += "]"; return result; } - } } diff --git a/lib/core/CGmTimeR.cc b/lib/core/CGmTimeR.cc index dd5cef5c4c..5cc04e0d28 100644 --- a/lib/core/CGmTimeR.cc +++ b/lib/core/CGmTimeR.cc @@ -5,20 +5,11 @@ */ #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -struct tm *CGmTimeR::gmTimeR(const time_t *clock, - struct tm *result) -{ +struct tm* CGmTimeR::gmTimeR(const time_t* clock, struct tm* result) { return ::gmtime_r(clock, result); } - - } } - diff --git a/lib/core/CGmTimeR_Windows.cc b/lib/core/CGmTimeR_Windows.cc index 87def19ed0..9e512a6f0f 100644 --- a/lib/core/CGmTimeR_Windows.cc +++ b/lib/core/CGmTimeR_Windows.cc @@ -5,22 +5,13 @@ */ #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -struct tm *CGmTimeR::gmTimeR(const time_t *clock, - struct tm *result) -{ +struct tm* CGmTimeR::gmTimeR(const time_t* clock, struct tm* result) { ::gmtime_s(result, clock); return result; } - - } } - diff --git a/lib/core/CHashing.cc b/lib/core/CHashing.cc index 82a494b4a7..c646e2bc57 100644 --- a/lib/core/CHashing.cc +++ b/lib/core/CHashing.cc @@ -10,165 +10,113 @@ #include #include -#include #include +#include #include #include #include #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - -namespace -{ +namespace { using TUniform32 = boost::random::uniform_int_distribution; - } const uint64_t CHashing::CUniversalHash::BIG_PRIME = 4294967291ull; boost::random::mt11213b CHashing::CUniversalHash::ms_Generator; -CFastMutex CHashing::CUniversalHash::ms_Mutex; +CFastMutex CHashing::CUniversalHash::ms_Mutex; -CHashing::CUniversalHash::CUInt32Hash::CUInt32Hash() : - m_M(1000), m_A(1), m_B(0) -{ +CHashing::CUniversalHash::CUInt32Hash::CUInt32Hash() : m_M(1000), m_A(1), m_B(0) { } -CHashing::CUniversalHash::CUInt32Hash::CUInt32Hash(uint32_t m, - uint32_t a, - uint32_t b) : - m_M(m), m_A(a), m_B(b) -{ +CHashing::CUniversalHash::CUInt32Hash::CUInt32Hash(uint32_t m, uint32_t a, uint32_t b) : m_M(m), m_A(a), m_B(b) { } -uint32_t CHashing::CUniversalHash::CUInt32Hash::m() const -{ +uint32_t CHashing::CUniversalHash::CUInt32Hash::m() const { return m_M; } -uint32_t CHashing::CUniversalHash::CUInt32Hash::a() const -{ +uint32_t CHashing::CUniversalHash::CUInt32Hash::a() const { return m_A; } -uint32_t CHashing::CUniversalHash::CUInt32Hash::b() const -{ +uint32_t CHashing::CUniversalHash::CUInt32Hash::b() const { return m_B; } -std::string CHashing::CUniversalHash::CUInt32Hash::print() const -{ +std::string CHashing::CUniversalHash::CUInt32Hash::print() const { std::ostringstream result; - result << "\"((" << m_A << " * x + " << m_B << ") mod " - << BIG_PRIME << ") mod " << m_M << "\""; + result << "\"((" << m_A << " * x + " << m_B << ") mod " << BIG_PRIME << ") mod " << m_M << "\""; return result.str(); } - -CHashing::CUniversalHash::CUInt32UnrestrictedHash::CUInt32UnrestrictedHash() : - m_A(1), m_B(0) -{ +CHashing::CUniversalHash::CUInt32UnrestrictedHash::CUInt32UnrestrictedHash() : m_A(1), m_B(0) { } -CHashing::CUniversalHash::CUInt32UnrestrictedHash::CUInt32UnrestrictedHash(uint32_t a, - uint32_t b) : - m_A(a), m_B(b) -{ +CHashing::CUniversalHash::CUInt32UnrestrictedHash::CUInt32UnrestrictedHash(uint32_t a, uint32_t b) : m_A(a), m_B(b) { } -uint32_t CHashing::CUniversalHash::CUInt32UnrestrictedHash::a() const -{ +uint32_t CHashing::CUniversalHash::CUInt32UnrestrictedHash::a() const { return m_A; } -uint32_t CHashing::CUniversalHash::CUInt32UnrestrictedHash::b() const -{ +uint32_t CHashing::CUniversalHash::CUInt32UnrestrictedHash::b() const { return m_B; } -std::string CHashing::CUniversalHash::CUInt32UnrestrictedHash::print() const -{ +std::string CHashing::CUniversalHash::CUInt32UnrestrictedHash::print() const { std::ostringstream result; result << "\"(" << m_A << " * x + " << m_B << ") mod " << BIG_PRIME << "\""; return result.str(); } - -CHashing::CUniversalHash::CUInt32VecHash::CUInt32VecHash(uint32_t m, - const TUInt32Vec &a, - uint32_t b) : - m_M(m), m_A(a), m_B(b) -{ +CHashing::CUniversalHash::CUInt32VecHash::CUInt32VecHash(uint32_t m, const TUInt32Vec& a, uint32_t b) : m_M(m), m_A(a), m_B(b) { } -uint32_t CHashing::CUniversalHash::CUInt32VecHash::m() const -{ +uint32_t CHashing::CUniversalHash::CUInt32VecHash::m() const { return m_M; } -const CHashing::CUniversalHash::TUInt32Vec & -CHashing::CUniversalHash::CUInt32VecHash::a() const -{ +const CHashing::CUniversalHash::TUInt32Vec& CHashing::CUniversalHash::CUInt32VecHash::a() const { return m_A; } -uint32_t CHashing::CUniversalHash::CUInt32VecHash::b() const -{ +uint32_t CHashing::CUniversalHash::CUInt32VecHash::b() const { return m_B; } -std::string CHashing::CUniversalHash::CUInt32VecHash::print() const -{ +std::string CHashing::CUniversalHash::CUInt32VecHash::print() const { std::ostringstream result; result << "\"((" << m_A[0] << "* x0"; - for (std::size_t i = 1u; i < m_A.size(); ++i) - { + for (std::size_t i = 1u; i < m_A.size(); ++i) { result << " + " << m_A[i] << "* x" << i; } result << ") mod " << BIG_PRIME << ") mod " << m_M << "\""; return result.str(); } - -CHashing::CUniversalHash::CToString::CToString(const char delimiter) : - m_Delimiter(delimiter) -{ +CHashing::CUniversalHash::CToString::CToString(const char delimiter) : m_Delimiter(delimiter) { } -std::string CHashing::CUniversalHash::CToString::operator()(const CUInt32UnrestrictedHash &hash) const -{ - return CStringUtils::typeToString(hash.a()) - + m_Delimiter - + CStringUtils::typeToString(hash.b()); +std::string CHashing::CUniversalHash::CToString::operator()(const CUInt32UnrestrictedHash& hash) const { + return CStringUtils::typeToString(hash.a()) + m_Delimiter + CStringUtils::typeToString(hash.b()); } -std::string CHashing::CUniversalHash::CToString::operator()(const CUInt32Hash &hash) const -{ - return CStringUtils::typeToString(hash.m()) - + m_Delimiter - + CStringUtils::typeToString(hash.a()) - + m_Delimiter - + CStringUtils::typeToString(hash.b()); +std::string CHashing::CUniversalHash::CToString::operator()(const CUInt32Hash& hash) const { + return CStringUtils::typeToString(hash.m()) + m_Delimiter + CStringUtils::typeToString(hash.a()) + m_Delimiter + + CStringUtils::typeToString(hash.b()); } - -CHashing::CUniversalHash::CFromString::CFromString(const char delimiter) : - m_Delimiter(delimiter) -{ +CHashing::CUniversalHash::CFromString::CFromString(const char delimiter) : m_Delimiter(delimiter) { } -bool CHashing::CUniversalHash::CFromString::operator()(const std::string &token, - CUInt32UnrestrictedHash &hash) const -{ +bool CHashing::CUniversalHash::CFromString::operator()(const std::string& token, CUInt32UnrestrictedHash& hash) const { std::size_t delimPos = token.find(m_Delimiter); - if (delimPos == std::string::npos) - { + if (delimPos == std::string::npos) { LOG_ERROR("Invalid hash state " << token); return false; } @@ -176,16 +124,12 @@ bool CHashing::CUniversalHash::CFromString::operator()(const std::string &token, uint32_t a; uint32_t b; m_Token.assign(token, 0, delimPos); - if (CStringUtils::stringToType(m_Token, a) == false) - { + if (CStringUtils::stringToType(m_Token, a) == false) { LOG_ERROR("Invalid multiplier in " << m_Token); return false; } - m_Token.assign(token, - delimPos + 1, - token.length() - delimPos); - if (CStringUtils::stringToType(m_Token, b) == false) - { + m_Token.assign(token, delimPos + 1, token.length() - delimPos); + if (CStringUtils::stringToType(m_Token, b) == false) { LOG_ERROR("Invalid offset in " << m_Token); return false; } @@ -193,18 +137,14 @@ bool CHashing::CUniversalHash::CFromString::operator()(const std::string &token, return true; } -bool CHashing::CUniversalHash::CFromString::operator()(const std::string &token, - CUInt32Hash &hash) const -{ +bool CHashing::CUniversalHash::CFromString::operator()(const std::string& token, CUInt32Hash& hash) const { std::size_t firstDelimPos = token.find(m_Delimiter); - if (firstDelimPos == std::string::npos) - { + if (firstDelimPos == std::string::npos) { LOG_ERROR("Invalid hash state " << token); return false; } std::size_t secondDelimPos = token.find(m_Delimiter, firstDelimPos + 1); - if (secondDelimPos == std::string::npos) - { + if (secondDelimPos == std::string::npos) { LOG_ERROR("Invalid hash state " << token); return false; } @@ -213,24 +153,17 @@ bool CHashing::CUniversalHash::CFromString::operator()(const std::string &token, uint32_t a; uint32_t b; m_Token.assign(token, 0, firstDelimPos); - if (CStringUtils::stringToType(m_Token, m) == false) - { + if (CStringUtils::stringToType(m_Token, m) == false) { LOG_ERROR("Invalid range in " << m_Token); return false; } - m_Token.assign(token, - firstDelimPos + 1, - secondDelimPos - firstDelimPos - 1); - if (CStringUtils::stringToType(m_Token, a) == false) - { + m_Token.assign(token, firstDelimPos + 1, secondDelimPos - firstDelimPos - 1); + if (CStringUtils::stringToType(m_Token, a) == false) { LOG_ERROR("Invalid offset in " << m_Token); return false; } - m_Token.assign(token, - secondDelimPos + 1, - token.length() - secondDelimPos); - if (CStringUtils::stringToType(m_Token, b) == false) - { + m_Token.assign(token, secondDelimPos + 1, token.length() - secondDelimPos); + if (CStringUtils::stringToType(m_Token, b) == false) { LOG_ERROR("Invalid multiplier in " << m_Token); return false; } @@ -239,10 +172,7 @@ bool CHashing::CUniversalHash::CFromString::operator()(const std::string &token, return true; } -void CHashing::CUniversalHash::generateHashes(std::size_t k, - uint32_t m, - TUInt32HashVec &result) -{ +void CHashing::CUniversalHash::generateHashes(std::size_t k, uint32_t m, TUInt32HashVec& result) { TUInt32Vec a, b; a.reserve(k); b.reserve(k); @@ -251,32 +181,25 @@ void CHashing::CUniversalHash::generateHashes(std::size_t k, CScopedFastLock scopedLock(ms_Mutex); TUniform32 uniform1(1u, static_cast(BIG_PRIME - 1)); - std::generate_n(std::back_inserter(a), k, - boost::bind(uniform1, boost::ref(ms_Generator))); - for (std::size_t i = 0u; i < a.size(); ++i) - { - if (a[i] == 0) - { + std::generate_n(std::back_inserter(a), k, boost::bind(uniform1, boost::ref(ms_Generator))); + for (std::size_t i = 0u; i < a.size(); ++i) { + if (a[i] == 0) { LOG_ERROR("Expected a in [1," << BIG_PRIME << ")"); a[i] = 1u; } } TUniform32 uniform0(0u, static_cast(BIG_PRIME - 1)); - std::generate_n(std::back_inserter(b), k, - boost::bind(uniform0, boost::ref(ms_Generator))); + std::generate_n(std::back_inserter(b), k, boost::bind(uniform0, boost::ref(ms_Generator))); } result.reserve(k); - for (std::size_t i = 0u; i < k; ++i) - { + for (std::size_t i = 0u; i < k; ++i) { result.push_back(CUInt32Hash(m, a[i], b[i])); } } -void CHashing::CUniversalHash::generateHashes(std::size_t k, - TUInt32UnrestrictedHashVec &result) -{ +void CHashing::CUniversalHash::generateHashes(std::size_t k, TUInt32UnrestrictedHashVec& result) { TUInt32Vec a, b; a.reserve(k); b.reserve(k); @@ -285,34 +208,25 @@ void CHashing::CUniversalHash::generateHashes(std::size_t k, CScopedFastLock scopedLock(ms_Mutex); TUniform32 uniform1(1u, static_cast(BIG_PRIME - 1)); - std::generate_n(std::back_inserter(a), k, - boost::bind(uniform1, boost::ref(ms_Generator))); - for (std::size_t i = 0u; i < a.size(); ++i) - { - if (a[i] == 0) - { + std::generate_n(std::back_inserter(a), k, boost::bind(uniform1, boost::ref(ms_Generator))); + for (std::size_t i = 0u; i < a.size(); ++i) { + if (a[i] == 0) { LOG_ERROR("Expected a in [1," << BIG_PRIME << ")"); a[i] = 1u; } } TUniform32 uniform0(0u, static_cast(BIG_PRIME - 1)); - std::generate_n(std::back_inserter(b), k, - boost::bind(uniform0, boost::ref(ms_Generator))); + std::generate_n(std::back_inserter(b), k, boost::bind(uniform0, boost::ref(ms_Generator))); } result.reserve(k); - for (std::size_t i = 0u; i < k; ++i) - { + for (std::size_t i = 0u; i < k; ++i) { result.push_back(CUInt32UnrestrictedHash(a[i], b[i])); } } -void CHashing::CUniversalHash::generateHashes(std::size_t k, - std::size_t n, - uint32_t m, - TUInt32VecHashVec &result) -{ +void CHashing::CUniversalHash::generateHashes(std::size_t k, std::size_t n, uint32_t m, TUInt32VecHashVec& result) { using TUInt32VecVec = std::vector; TUInt32VecVec a; @@ -323,17 +237,13 @@ void CHashing::CUniversalHash::generateHashes(std::size_t k, { CScopedFastLock scopedLock(ms_Mutex); - for (std::size_t i = 0u; i < k; ++i) - { + for (std::size_t i = 0u; i < k; ++i) { a.push_back(TUInt32Vec()); a.back().reserve(n); TUniform32 uniform1(1u, static_cast(BIG_PRIME - 1)); - std::generate_n(std::back_inserter(a.back()), n, - boost::bind(uniform1, boost::ref(ms_Generator))); - for (std::size_t j = 0u; j < a.back().size(); ++j) - { - if ((a.back())[j] == 0) - { + std::generate_n(std::back_inserter(a.back()), n, boost::bind(uniform1, boost::ref(ms_Generator))); + for (std::size_t j = 0u; j < a.back().size(); ++j) { + if ((a.back())[j] == 0) { LOG_ERROR("Expected a in [1," << BIG_PRIME << ")"); (a.back())[j] = 1u; } @@ -341,21 +251,16 @@ void CHashing::CUniversalHash::generateHashes(std::size_t k, } TUniform32 uniform0(0u, static_cast(BIG_PRIME - 1)); - std::generate_n(std::back_inserter(b), k, - boost::bind(uniform0, boost::ref(ms_Generator))); + std::generate_n(std::back_inserter(b), k, boost::bind(uniform0, boost::ref(ms_Generator))); } result.reserve(k); - for (std::size_t i = 0u; i < k; ++i) - { + for (std::size_t i = 0u; i < k; ++i) { result.push_back(CUInt32VecHash(m, a[i], b[i])); } } -uint32_t CHashing::murmurHash32(const void *key, - int length, - uint32_t seed) -{ +uint32_t CHashing::murmurHash32(const void* key, int length, uint32_t seed) { const uint32_t m = 0x5bd1e995; const int r = 24; @@ -363,12 +268,11 @@ uint32_t CHashing::murmurHash32(const void *key, // Note, remainder = length % 4 const int remainder = length & 0x3; - const uint32_t *data = static_cast(key); + const uint32_t* data = static_cast(key); // Note, shift = (length - remainder) / 4 - const uint32_t *end = data + ((length - remainder) >> 2); + const uint32_t* end = data + ((length - remainder) >> 2); - while (data != end) - { + while (data != end) { uint32_t k = *reinterpret_cast(data); k *= m; @@ -381,10 +285,9 @@ uint32_t CHashing::murmurHash32(const void *key, ++data; } - const unsigned char *remainingData = reinterpret_cast(end); + const unsigned char* remainingData = reinterpret_cast(end); - switch (remainder) - { + switch (remainder) { case 3: h ^= remainingData[2] << 16; BOOST_FALLTHROUGH; @@ -406,23 +309,19 @@ uint32_t CHashing::murmurHash32(const void *key, return h; } -uint32_t CHashing::safeMurmurHash32(const void *key, - int length, - uint32_t seed) -{ +uint32_t CHashing::safeMurmurHash32(const void* key, int length, uint32_t seed) { const uint32_t m = 0x5bd1e995; const int r = 24; uint32_t h = seed ^ length; - const unsigned char *data = static_cast(key); + const unsigned char* data = static_cast(key); // Endian and alignment neutral implementation of the main loop. - while (length >= 4) - { + while (length >= 4) { uint32_t k; - k = data[0]; + k = data[0]; k |= data[1] << 8; k |= data[2] << 16; k |= data[3] << 24; @@ -438,8 +337,7 @@ uint32_t CHashing::safeMurmurHash32(const void *key, length -= 4; } - switch (length) - { + switch (length) { case 3: h ^= data[2] << 16; BOOST_FALLTHROUGH; @@ -461,10 +359,7 @@ uint32_t CHashing::safeMurmurHash32(const void *key, return h; } -uint64_t CHashing::murmurHash64(const void *key, - int length, - uint64_t seed) -{ +uint64_t CHashing::murmurHash64(const void* key, int length, uint64_t seed) { const uint64_t m = 0xc6a4a7935bd1e995ull; const int r = 47; @@ -472,12 +367,11 @@ uint64_t CHashing::murmurHash64(const void *key, // Note, remainder = length % 8 const int remainder = length & 0x7; - const uint64_t *data = static_cast(key); + const uint64_t* data = static_cast(key); // Note, shift = (length - remainder) / 8 - const uint64_t *end = data + ((length - remainder) >> 3); + const uint64_t* end = data + ((length - remainder) >> 3); - while (data != end) - { + while (data != end) { uint64_t k = *data; k *= m; @@ -490,10 +384,9 @@ uint64_t CHashing::murmurHash64(const void *key, ++data; } - const unsigned char *remainingData = reinterpret_cast(end); + const unsigned char* remainingData = reinterpret_cast(end); - switch (remainder) - { + switch (remainder) { case 7: h ^= uint64_t(remainingData[6]) << 48; BOOST_FALLTHROUGH; @@ -527,23 +420,19 @@ uint64_t CHashing::murmurHash64(const void *key, return h; } -uint64_t CHashing::safeMurmurHash64(const void *key, - int length, - uint64_t seed) -{ +uint64_t CHashing::safeMurmurHash64(const void* key, int length, uint64_t seed) { const uint64_t m = 0xc6a4a7935bd1e995ull; const int r = 47; uint64_t h = seed ^ (length * m); - const unsigned char *data = static_cast(key); + const unsigned char* data = static_cast(key); // Endian and alignment neutral implementation. - while (length >= 8) - { + while (length >= 8) { uint64_t k; - k = uint64_t(data[0]); + k = uint64_t(data[0]); k |= uint64_t(data[1]) << 8; k |= uint64_t(data[2]) << 16; k |= uint64_t(data[3]) << 24; @@ -563,8 +452,7 @@ uint64_t CHashing::safeMurmurHash64(const void *key, length -= 8; } - switch (length) - { + switch (length) { case 7: h ^= uint64_t(data[6]) << 48; BOOST_FALLTHROUGH; @@ -598,15 +486,13 @@ uint64_t CHashing::safeMurmurHash64(const void *key, return h; } -uint32_t CHashing::hashCombine(uint32_t seed, uint32_t h) -{ +uint32_t CHashing::hashCombine(uint32_t seed, uint32_t h) { static const uint32_t C = 0x9e3779b9; seed ^= h + C + (seed << 6) + (seed >> 2); return seed; } -uint64_t CHashing::hashCombine(uint64_t seed, uint64_t h) -{ +uint64_t CHashing::hashCombine(uint64_t seed, uint64_t h) { // As with boost::hash_combine use the binary expansion of an irrational // number to generate 64 random independent bits, i.e. // C = 2^64 / "golden ratio" = 2^65 / (1 + 5^(1/2)) @@ -614,6 +500,5 @@ uint64_t CHashing::hashCombine(uint64_t seed, uint64_t h) seed ^= h + C + (seed << 6) + (seed >> 2); return seed; } - } } diff --git a/lib/core/CHexUtils.cc b/lib/core/CHexUtils.cc index 5e9b598f21..4ca104b20d 100644 --- a/lib/core/CHexUtils.cc +++ b/lib/core/CHexUtils.cc @@ -14,93 +14,61 @@ #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -CHexUtils::CHexUtils(const uint8_t *pkt, - size_t pktLen, - bool printHeader, - bool printAscii) - : m_Pkt(pkt), - m_PktLen(pktLen), - m_PrintHeader(printHeader), - m_PrintAscii(printAscii) -{ +CHexUtils::CHexUtils(const uint8_t* pkt, size_t pktLen, bool printHeader, bool printAscii) + : m_Pkt(pkt), m_PktLen(pktLen), m_PrintHeader(printHeader), m_PrintAscii(printAscii) { } -CHexUtils::CHexUtils(const TDataVec &data, - bool printHeader, - bool printAscii) - : m_Pkt((data.size() > 0) ? &data[0] : 0), - m_PktLen(data.size()), - m_PrintHeader(printHeader), - m_PrintAscii(printAscii) -{ +CHexUtils::CHexUtils(const TDataVec& data, bool printHeader, bool printAscii) + : m_Pkt((data.size() > 0) ? &data[0] : 0), m_PktLen(data.size()), m_PrintHeader(printHeader), m_PrintAscii(printAscii) { } -void CHexUtils::dump(const uint8_t *pkt, size_t pktLen) -{ +void CHexUtils::dump(const uint8_t* pkt, size_t pktLen) { CHexUtils hex(pkt, pktLen); std::cout << hex << std::endl; } -std::ostream &operator<<(std::ostream &strm, const CHexUtils &hex) -{ - if (hex.m_PrintHeader) - { +std::ostream& operator<<(std::ostream& strm, const CHexUtils& hex) { + if (hex.m_PrintHeader) { strm << "DataSize: " << hex.m_PktLen << " {" << core_t::LINE_ENDING; } - if (hex.m_Pkt != 0) - { + if (hex.m_Pkt != 0) { strm << std::hex; std::string text; - for (size_t i = 0; i < hex.m_PktLen; ++i) - { - strm << std::setfill('0') << std::setw(2) - << static_cast(hex.m_Pkt[i]) << ' '; + for (size_t i = 0; i < hex.m_PktLen; ++i) { + strm << std::setfill('0') << std::setw(2) << static_cast(hex.m_Pkt[i]) << ' '; - if (::isprint(hex.m_Pkt[i])) - { + if (::isprint(hex.m_Pkt[i])) { text += static_cast(hex.m_Pkt[i]); - } - else - { + } else { text += '.'; } - if (((i + 1) % 8) == 0) - { + if (((i + 1) % 8) == 0) { strm << ' '; } - if (hex.m_PrintAscii && ((i + 1) % 16) == 0) - { + if (hex.m_PrintAscii && ((i + 1) % 16) == 0) { strm << text << core_t::LINE_ENDING; text.clear(); } } - if (hex.m_PrintAscii && (hex.m_PktLen % 16) != 0) - { + if (hex.m_PrintAscii && (hex.m_PktLen % 16) != 0) { // pad space size_t max(((hex.m_PktLen / 16) + 1) * 16); - for (size_t i = hex.m_PktLen; i <= max; ++i) - { - if (i != max) - { + for (size_t i = hex.m_PktLen; i <= max; ++i) { + if (i != max) { strm << " "; } - if (((i + 1) % 8) == 0) - { + if (((i + 1) % 8) == 0) { strm << ' '; } } @@ -110,14 +78,11 @@ std::ostream &operator<<(std::ostream &strm, const CHexUtils &hex) strm << std::dec; } - if (hex.m_PrintHeader) - { + if (hex.m_PrintHeader) { strm << '}'; } return strm; } - - } } diff --git a/lib/core/CIEEE754.cc b/lib/core/CIEEE754.cc index da61c28df5..e5a1674d35 100644 --- a/lib/core/CIEEE754.cc +++ b/lib/core/CIEEE754.cc @@ -8,13 +8,10 @@ #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { -double CIEEE754::round(double value, EPrecision precision) -{ +double CIEEE754::round(double value, EPrecision precision) { // This first decomposes the value into the mantissa // and exponent to avoid the problem with overflow if // the values are close to max double. @@ -22,22 +19,15 @@ double CIEEE754::round(double value, EPrecision precision) int exponent; double mantissa = std::frexp(value, &exponent); - switch (precision) - { - case E_HalfPrecision: - { + switch (precision) { + case E_HalfPrecision: { static const double PRECISION = 2048.0; - mantissa = mantissa < 0.0 ? - std::ceil(mantissa * PRECISION - 0.5) / PRECISION : - std::floor(mantissa * PRECISION + 0.5) / PRECISION; + mantissa = mantissa < 0.0 ? std::ceil(mantissa * PRECISION - 0.5) / PRECISION : std::floor(mantissa * PRECISION + 0.5) / PRECISION; break; } - case E_SinglePrecision: - { + case E_SinglePrecision: { static const double PRECISION = 16777216.0; - mantissa = mantissa < 0.0 ? - std::ceil(mantissa * PRECISION - 0.5) / PRECISION : - std::floor(mantissa * PRECISION + 0.5) / PRECISION; + mantissa = mantissa < 0.0 ? std::ceil(mantissa * PRECISION - 0.5) / PRECISION : std::floor(mantissa * PRECISION + 0.5) / PRECISION; break; } case E_DoublePrecision: @@ -47,7 +37,5 @@ double CIEEE754::round(double value, EPrecision precision) return std::ldexp(mantissa, exponent); } - } } - diff --git a/lib/core/CJsonLogLayout.cc b/lib/core/CJsonLogLayout.cc index 7911c17ddb..304931eec1 100644 --- a/lib/core/CJsonLogLayout.cc +++ b/lib/core/CJsonLogLayout.cc @@ -25,16 +25,14 @@ #include - -namespace -{ +namespace { const std::string LOGGER_NAME("logger"); const std::string TIMESTAMP_NAME("timestamp"); const std::string LEVEL_NAME("level"); const std::string PID_NAME("pid"); // Cast this to int64_t as the type varies between int32_t and uint32_t on // different platforms and int64_t covers both -const int64_t PID(static_cast(ml::core::CProcess::instance().id())); +const int64_t PID(static_cast(ml::core::CProcess::instance().id())); const std::string THREAD_NAME("thread"); const std::string MESSAGE_NAME("message"); const std::string NDC_NAME("ndc"); @@ -54,58 +52,39 @@ using namespace log4cxx::helpers; IMPLEMENT_LOG4CXX_OBJECT(CJsonLogLayout) -CJsonLogLayout::CJsonLogLayout() - : m_LocationInfo(true), - m_Properties(false) -{ +CJsonLogLayout::CJsonLogLayout() : m_LocationInfo(true), m_Properties(false) { } -void CJsonLogLayout::locationInfo(bool locationInfo) -{ +void CJsonLogLayout::locationInfo(bool locationInfo) { m_LocationInfo = locationInfo; } -bool CJsonLogLayout::locationInfo() const -{ +bool CJsonLogLayout::locationInfo() const { return m_LocationInfo; } -void CJsonLogLayout::properties(bool properties) -{ +void CJsonLogLayout::properties(bool properties) { m_Properties = properties; } -bool CJsonLogLayout::properties() const -{ +bool CJsonLogLayout::properties() const { return m_Properties; } -void CJsonLogLayout::activateOptions(Pool &/*p*/) -{ +void CJsonLogLayout::activateOptions(Pool& /*p*/) { // NO-OP } -void CJsonLogLayout::setOption(const LogString &option, - const LogString &value) -{ - if (StringHelper::equalsIgnoreCase(option, - LOG4CXX_STR("LOCATIONINFO"), - LOG4CXX_STR("locationinfo"))) - { +void CJsonLogLayout::setOption(const LogString& option, const LogString& value) { + if (StringHelper::equalsIgnoreCase(option, LOG4CXX_STR("LOCATIONINFO"), LOG4CXX_STR("locationinfo"))) { this->locationInfo(OptionConverter::toBoolean(value, false)); } - if (StringHelper::equalsIgnoreCase(option, - LOG4CXX_STR("PROPERTIES"), - LOG4CXX_STR("properties"))) - { + if (StringHelper::equalsIgnoreCase(option, LOG4CXX_STR("PROPERTIES"), LOG4CXX_STR("properties"))) { this->properties(OptionConverter::toBoolean(value, false)); } } -void CJsonLogLayout::format(LogString &output, - const spi::LoggingEventPtr &event, - Pool &/*p*/) const -{ +void CJsonLogLayout::format(LogString& output, const spi::LoggingEventPtr& event, Pool& /*p*/) const { using TStringBufferWriter = rapidjson::Writer; rapidjson::StringBuffer buffer; TStringBufferWriter writer(buffer); @@ -135,27 +114,23 @@ void CJsonLogLayout::format(LogString &output, writer.String(message); LogString logNdc; - if (event->getNDC(logNdc)) - { + if (event->getNDC(logNdc)) { writer.String(NDC_NAME); LOG4CXX_ENCODE_CHAR(ndc, logNdc); writer.String(ndc); } - if (m_LocationInfo) - { - const spi::LocationInfo &locInfo = event->getLocationInformation(); + if (m_LocationInfo) { + const spi::LocationInfo& locInfo = event->getLocationInformation(); - const std::string &className = locInfo.getClassName(); - if (!className.empty()) - { + const std::string& className = locInfo.getClassName(); + if (!className.empty()) { writer.String(CLASS_NAME); writer.String(className); } - const std::string &methodName = locInfo.getMethodName(); - if (!methodName.empty()) - { + const std::string& methodName = locInfo.getMethodName(); + if (!methodName.empty()) { writer.String(METHOD_NAME); writer.String(methodName); } @@ -167,37 +142,27 @@ void CJsonLogLayout::format(LogString &output, writer.Int(locInfo.getLineNumber()); } - if (m_Properties) - { - const spi::LoggingEvent::KeySet &propertySet = event->getPropertyKeySet(); - const spi::LoggingEvent::KeySet &keySet = event->getMDCKeySet(); - if (!(keySet.empty() && propertySet.empty())) - { + if (m_Properties) { + const spi::LoggingEvent::KeySet& propertySet = event->getPropertyKeySet(); + const spi::LoggingEvent::KeySet& keySet = event->getMDCKeySet(); + if (!(keySet.empty() && propertySet.empty())) { writer.String(PROPERTIES_NAME); writer.StartObject(); - for (spi::LoggingEvent::KeySet::const_iterator i = keySet.begin(); - i != keySet.end(); - ++i) - { - const LogString &key = *i; + for (spi::LoggingEvent::KeySet::const_iterator i = keySet.begin(); i != keySet.end(); ++i) { + const LogString& key = *i; LogString value; - if (event->getMDC(key, value)) - { + if (event->getMDC(key, value)) { LOG4CXX_ENCODE_CHAR(name, key); writer.String(name); LOG4CXX_ENCODE_CHAR(val, value); writer.String(val); } } - for (spi::LoggingEvent::KeySet::const_iterator i = propertySet.begin(); - i != propertySet.end(); - ++i) - { - const LogString &key = *i; + for (spi::LoggingEvent::KeySet::const_iterator i = propertySet.begin(); i != propertySet.end(); ++i) { + const LogString& key = *i; LogString value; - if (event->getProperty(key, value)) - { + if (event->getProperty(key, value)) { LOG4CXX_ENCODE_CHAR(name, key); writer.String(name); LOG4CXX_ENCODE_CHAR(val, value); @@ -215,14 +180,11 @@ void CJsonLogLayout::format(LogString &output, output.append(LOG4CXX_EOL); } -bool CJsonLogLayout::ignoresThrowable() const -{ +bool CJsonLogLayout::ignoresThrowable() const { return false; } -std::string CJsonLogLayout::cropPath(const std::string &filename) -{ - boost::filesystem::path p(filename); - return p.filename().string(); +std::string CJsonLogLayout::cropPath(const std::string& filename) { + boost::filesystem::path p(filename); + return p.filename().string(); } - diff --git a/lib/core/CJsonOutputStreamWrapper.cc b/lib/core/CJsonOutputStreamWrapper.cc index 16bf683231..c5d6d929f9 100644 --- a/lib/core/CJsonOutputStreamWrapper.cc +++ b/lib/core/CJsonOutputStreamWrapper.cc @@ -8,102 +8,73 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { const char CJsonOutputStreamWrapper::JSON_ARRAY_START('['); const char CJsonOutputStreamWrapper::JSON_ARRAY_END(']'); const char CJsonOutputStreamWrapper::JSON_ARRAY_DELIMITER(','); -CJsonOutputStreamWrapper::CJsonOutputStreamWrapper(std::ostream &outStream) - : m_ConcurrentOutputStream(outStream), m_FirstObject(true) -{ +CJsonOutputStreamWrapper::CJsonOutputStreamWrapper(std::ostream& outStream) : m_ConcurrentOutputStream(outStream), m_FirstObject(true) { // initialize the bufferpool - for(size_t i =0; i < BUFFER_POOL_SIZE; ++i) - { + for (size_t i = 0; i < BUFFER_POOL_SIZE; ++i) { m_StringBuffers[i].Reserve(BUFFER_START_SIZE); m_StringBufferQueue.push(&m_StringBuffers[i]); } - m_ConcurrentOutputStream([](std::ostream &o) - { - o.put(JSON_ARRAY_START); - } ); + m_ConcurrentOutputStream([](std::ostream& o) { o.put(JSON_ARRAY_START); }); } -CJsonOutputStreamWrapper::~CJsonOutputStreamWrapper() -{ - m_ConcurrentOutputStream([](std::ostream &o) - { - o.put(JSON_ARRAY_END); - } ); +CJsonOutputStreamWrapper::~CJsonOutputStreamWrapper() { + m_ConcurrentOutputStream([](std::ostream& o) { o.put(JSON_ARRAY_END); }); } -void CJsonOutputStreamWrapper::acquireBuffer(TGenericLineWriter &writer, rapidjson::StringBuffer *&buffer) -{ +void CJsonOutputStreamWrapper::acquireBuffer(TGenericLineWriter& writer, rapidjson::StringBuffer*& buffer) { buffer = m_StringBufferQueue.pop(); writer.Reset(*buffer); } -void CJsonOutputStreamWrapper::releaseBuffer(TGenericLineWriter &writer, rapidjson::StringBuffer *buffer) -{ +void CJsonOutputStreamWrapper::releaseBuffer(TGenericLineWriter& writer, rapidjson::StringBuffer* buffer) { writer.Flush(); // check for data that has to be written - if (buffer->GetLength() > 0) - { - m_ConcurrentOutputStream([this, buffer](std::ostream &o) - { - if (m_FirstObject) - { + if (buffer->GetLength() > 0) { + m_ConcurrentOutputStream([this, buffer](std::ostream& o) { + if (m_FirstObject) { m_FirstObject = false; - } - else - { + } else { o.put(JSON_ARRAY_DELIMITER); } o.write(buffer->GetString(), buffer->GetLength()); o.flush(); this->returnAndCheckBuffer(buffer); - } ); - } - else - { + }); + } else { m_StringBufferQueue.push(buffer); } } -void CJsonOutputStreamWrapper::flushBuffer(TGenericLineWriter &writer, - rapidjson::StringBuffer *&buffer) -{ +void CJsonOutputStreamWrapper::flushBuffer(TGenericLineWriter& writer, rapidjson::StringBuffer*& buffer) { writer.Flush(); - m_ConcurrentOutputStream([this, buffer](std::ostream &o) - { - if (m_FirstObject) - { + m_ConcurrentOutputStream([this, buffer](std::ostream& o) { + if (m_FirstObject) { m_FirstObject = false; - } - else - { + } else { o.put(JSON_ARRAY_DELIMITER); } o.write(buffer->GetString(), buffer->GetLength()); this->returnAndCheckBuffer(buffer); - } ); + }); acquireBuffer(writer, buffer); } -void CJsonOutputStreamWrapper::returnAndCheckBuffer(rapidjson::StringBuffer *buffer) -{ +void CJsonOutputStreamWrapper::returnAndCheckBuffer(rapidjson::StringBuffer* buffer) { buffer->Clear(); - if (buffer->stack_.GetCapacity() > BUFFER_REALLOC_TRIGGER_SIZE) - { + if (buffer->stack_.GetCapacity() > BUFFER_REALLOC_TRIGGER_SIZE) { // we have to free and realloc buffer->ShrinkToFit(); buffer->Reserve(BUFFER_START_SIZE); @@ -112,35 +83,27 @@ void CJsonOutputStreamWrapper::returnAndCheckBuffer(rapidjson::StringBuffer *buf m_StringBufferQueue.push(buffer); } -void CJsonOutputStreamWrapper::flush() -{ - m_ConcurrentOutputStream([](std::ostream &o) - { - o.flush(); - } ); +void CJsonOutputStreamWrapper::flush() { + m_ConcurrentOutputStream([](std::ostream& o) { o.flush(); }); } -void CJsonOutputStreamWrapper::syncFlush() -{ +void CJsonOutputStreamWrapper::syncFlush() { std::mutex m; std::condition_variable c; std::unique_lock lock(m); - m_ConcurrentOutputStream([&m, &c](std::ostream &o) - { + m_ConcurrentOutputStream([&m, &c](std::ostream& o) { o.flush(); std::unique_lock waitLock(m); c.notify_all(); - } ); + }); c.wait(lock); } -void CJsonOutputStreamWrapper::debugMemoryUsage(CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CJsonOutputStreamWrapper::debugMemoryUsage(CMemoryUsage::TMemoryUsagePtr mem) const { std::size_t bufferSize = 0; - for (size_t i =0; i < BUFFER_POOL_SIZE; ++i) - { + for (size_t i = 0; i < BUFFER_POOL_SIZE; ++i) { // GetSize() returns the length of the string, not the used memory, need to inspect internals bufferSize += m_StringBuffers[i].stack_.GetCapacity(); } @@ -156,11 +119,9 @@ void CJsonOutputStreamWrapper::debugMemoryUsage(CMemoryUsage::TMemoryUsagePtr me m_ConcurrentOutputStream.debugMemoryUsage(mem->addChild()); } -std::size_t CJsonOutputStreamWrapper::memoryUsage() const -{ +std::size_t CJsonOutputStreamWrapper::memoryUsage() const { std::size_t memoryUsage = 0; - for (size_t i =0; i < BUFFER_POOL_SIZE; ++i) - { + for (size_t i = 0; i < BUFFER_POOL_SIZE; ++i) { // GetSize() returns the length of the string, not the used memory, need to inspect internals memoryUsage += m_StringBuffers[i].stack_.GetCapacity(); } @@ -172,8 +133,5 @@ std::size_t CJsonOutputStreamWrapper::memoryUsage() const memoryUsage += m_ConcurrentOutputStream.memoryUsage(); return memoryUsage; } - - } } - diff --git a/lib/core/CJsonStatePersistInserter.cc b/lib/core/CJsonStatePersistInserter.cc index d3f70ad9e8..bfd8bb7689 100644 --- a/lib/core/CJsonStatePersistInserter.cc +++ b/lib/core/CJsonStatePersistInserter.cc @@ -7,56 +7,39 @@ #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -CJsonStatePersistInserter::CJsonStatePersistInserter(std::ostream &outputStream) - : m_WriteStream(outputStream), - m_Writer(m_WriteStream) -{ +CJsonStatePersistInserter::CJsonStatePersistInserter(std::ostream& outputStream) : m_WriteStream(outputStream), m_Writer(m_WriteStream) { m_Writer.StartObject(); } -CJsonStatePersistInserter::~CJsonStatePersistInserter() -{ +CJsonStatePersistInserter::~CJsonStatePersistInserter() { m_Writer.EndObject(); m_WriteStream.Flush(); } -void CJsonStatePersistInserter::insertValue(const std::string &name, - const std::string &value) -{ +void CJsonStatePersistInserter::insertValue(const std::string& name, const std::string& value) { m_Writer.String(name); m_Writer.String(value); } -void CJsonStatePersistInserter::insertInteger(const std::string &name, size_t value) -{ +void CJsonStatePersistInserter::insertInteger(const std::string& name, size_t value) { m_Writer.String(name); m_Writer.Uint64(value); } -void CJsonStatePersistInserter::flush() -{ +void CJsonStatePersistInserter::flush() { m_WriteStream.Flush(); } -void CJsonStatePersistInserter::newLevel(const std::string &name) -{ +void CJsonStatePersistInserter::newLevel(const std::string& name) { m_Writer.String(name); m_Writer.StartObject(); } -void CJsonStatePersistInserter::endLevel() -{ +void CJsonStatePersistInserter::endLevel() { m_Writer.EndObject(); } - - } } - diff --git a/lib/core/CJsonStateRestoreTraverser.cc b/lib/core/CJsonStateRestoreTraverser.cc index 5c18c9c0f2..2236737b45 100644 --- a/lib/core/CJsonStateRestoreTraverser.cc +++ b/lib/core/CJsonStateRestoreTraverser.cc @@ -8,78 +8,57 @@ #include #include -#include #include +#include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { -namespace -{ +namespace { const std::string EMPTY_STRING; } - -CJsonStateRestoreTraverser::CJsonStateRestoreTraverser(std::istream &inputStream) - : m_ReadStream(inputStream), - m_Handler(), - m_Started(false), - m_DesiredLevel(0), - m_IsArrayOfObjects(false) -{ +CJsonStateRestoreTraverser::CJsonStateRestoreTraverser(std::istream& inputStream) + : m_ReadStream(inputStream), m_Handler(), m_Started(false), m_DesiredLevel(0), m_IsArrayOfObjects(false) { } -bool CJsonStateRestoreTraverser::isEof() const -{ +bool CJsonStateRestoreTraverser::isEof() const { // Rapid JSON istreamwrapper returns \0 when it reaches EOF return m_ReadStream.Peek() == '\0'; } -bool CJsonStateRestoreTraverser::next() -{ - if (!m_Started) - { - if (this->start() == false) - { +bool CJsonStateRestoreTraverser::next() { + if (!m_Started) { + if (this->start() == false) { return false; } } - if (this->nextIsEndOfLevel()) - { + if (this->nextIsEndOfLevel()) { return false; } - if (this->nextLevel() == m_DesiredLevel || - (this->currentLevel() == m_DesiredLevel && this->nextLevel() == m_DesiredLevel + 1)) - { + if (this->nextLevel() == m_DesiredLevel || (this->currentLevel() == m_DesiredLevel && this->nextLevel() == m_DesiredLevel + 1)) { return this->advance(); } // If we get here then we're skipping over a nested object that's not of // interest - while (this->nextLevel() > m_DesiredLevel) - { - if (this->advance() == false) - { + while (this->nextLevel() > m_DesiredLevel) { + if (this->advance() == false) { return false; } } - if (this->nextLevel() == m_DesiredLevel) - { + if (this->nextLevel() == m_DesiredLevel) { return this->advance() && !this->nextIsEndOfLevel(); } return false; } -bool CJsonStateRestoreTraverser::nextObject() -{ - if (!m_IsArrayOfObjects) - { +bool CJsonStateRestoreTraverser::nextObject() { + if (!m_IsArrayOfObjects) { return false; } @@ -90,12 +69,9 @@ bool CJsonStateRestoreTraverser::nextObject() return ok; } -bool CJsonStateRestoreTraverser::hasSubLevel() const -{ - if (!m_Started) - { - if (const_cast(this)->start() == false) - { +bool CJsonStateRestoreTraverser::hasSubLevel() const { + if (!m_Started) { + if (const_cast(this)->start() == false) { return false; } } @@ -103,12 +79,9 @@ bool CJsonStateRestoreTraverser::hasSubLevel() const return this->currentLevel() == 1 + m_DesiredLevel; } -const std::string &CJsonStateRestoreTraverser::name() const -{ - if (!m_Started) - { - if (const_cast(this)->start() == false) - { +const std::string& CJsonStateRestoreTraverser::name() const { + if (!m_Started) { + if (const_cast(this)->start() == false) { return EMPTY_STRING; } } @@ -116,12 +89,9 @@ const std::string &CJsonStateRestoreTraverser::name() const return this->currentName(); } -const std::string &CJsonStateRestoreTraverser::value() const -{ - if (!m_Started) - { - if (const_cast(this)->start() == false) - { +const std::string& CJsonStateRestoreTraverser::value() const { + if (!m_Started) { + if (const_cast(this)->start() == false) { return EMPTY_STRING; } } @@ -129,18 +99,14 @@ const std::string &CJsonStateRestoreTraverser::value() const return this->currentValue(); } -bool CJsonStateRestoreTraverser::descend() -{ - if (!m_Started) - { - if (this->start() == false) - { +bool CJsonStateRestoreTraverser::descend() { + if (!m_Started) { + if (this->start() == false) { return false; } } - if (this->currentLevel() != 1 + m_DesiredLevel) - { + if (this->currentLevel() != 1 + m_DesiredLevel) { return false; } @@ -149,8 +115,7 @@ bool CJsonStateRestoreTraverser::descend() // Don't advance if the next level has no elements. Instead set the current // element to be completely empty so that the sub-level traverser will find // nothing and then ascend. - if (this->nextIsEndOfLevel()) - { + if (this->nextIsEndOfLevel()) { m_Handler.s_Name[1 - m_Handler.s_NextIndex].clear(); m_Handler.s_Value[1 - m_Handler.s_NextIndex].clear(); return true; @@ -159,22 +124,18 @@ bool CJsonStateRestoreTraverser::descend() return this->advance(); } -bool CJsonStateRestoreTraverser::ascend() -{ +bool CJsonStateRestoreTraverser::ascend() { // If we're trying to ascend above the root level then something has gone // wrong - if (m_DesiredLevel == 0) - { + if (m_DesiredLevel == 0) { LOG_ERROR("Inconsistency - trying to ascend above JSON root"); return false; } --m_DesiredLevel; - while (this->nextLevel() > m_DesiredLevel) - { - if (this->advance() == false) - { + while (this->nextLevel() > m_DesiredLevel) { + if (this->advance() == false) { return false; } } @@ -186,61 +147,46 @@ bool CJsonStateRestoreTraverser::ascend() return this->advance(); } -void CJsonStateRestoreTraverser::debug() const -{ - LOG_DEBUG("Current: name = " << this->currentName() << - " value = " << this->currentValue() << - " level = " << this->currentLevel() << - ", Next: name = " << this->nextName() << - " value = " << this->nextValue() << - " level = " << this->nextLevel() << - " is array of objects = " << m_IsArrayOfObjects); +void CJsonStateRestoreTraverser::debug() const { + LOG_DEBUG("Current: name = " << this->currentName() << " value = " << this->currentValue() << " level = " << this->currentLevel() + << ", Next: name = " << this->nextName() << " value = " << this->nextValue() + << " level = " << this->nextLevel() << " is array of objects = " << m_IsArrayOfObjects); } -size_t CJsonStateRestoreTraverser::currentLevel() const -{ +size_t CJsonStateRestoreTraverser::currentLevel() const { return m_Handler.s_Level[1 - m_Handler.s_NextIndex]; } -bool CJsonStateRestoreTraverser::currentIsEndOfLevel() const -{ +bool CJsonStateRestoreTraverser::currentIsEndOfLevel() const { return m_Handler.s_IsEndOfLevel[1 - m_Handler.s_NextIndex]; } -const std::string &CJsonStateRestoreTraverser::currentName() const -{ +const std::string& CJsonStateRestoreTraverser::currentName() const { return m_Handler.s_Name[1 - m_Handler.s_NextIndex]; } -const std::string &CJsonStateRestoreTraverser::currentValue() const -{ +const std::string& CJsonStateRestoreTraverser::currentValue() const { return m_Handler.s_Value[1 - m_Handler.s_NextIndex]; } -size_t CJsonStateRestoreTraverser::nextLevel() const -{ +size_t CJsonStateRestoreTraverser::nextLevel() const { return m_Handler.s_Level[m_Handler.s_NextIndex]; } -bool CJsonStateRestoreTraverser::nextIsEndOfLevel() const -{ +bool CJsonStateRestoreTraverser::nextIsEndOfLevel() const { return m_Handler.s_IsEndOfLevel[m_Handler.s_NextIndex]; } -const std::string &CJsonStateRestoreTraverser::nextName() const -{ +const std::string& CJsonStateRestoreTraverser::nextName() const { return m_Handler.s_Name[m_Handler.s_NextIndex]; } -const std::string &CJsonStateRestoreTraverser::nextValue() const -{ +const std::string& CJsonStateRestoreTraverser::nextValue() const { return m_Handler.s_Value[m_Handler.s_NextIndex]; } -bool CJsonStateRestoreTraverser::parseNext(bool remember) -{ - if (m_Reader.HasParseError()) - { +bool CJsonStateRestoreTraverser::parseNext(bool remember) { + if (m_Reader.HasParseError()) { this->logError(); return false; } @@ -251,53 +197,40 @@ bool CJsonStateRestoreTraverser::parseNext(bool remember) return m_Reader.IterativeParseNext(m_ReadStream, m_Handler); } -bool CJsonStateRestoreTraverser::skipArray() -{ - int depth =0; +bool CJsonStateRestoreTraverser::skipArray() { + int depth = 0; // we must have received a key, revert the state change to ignore it m_Handler.s_NextIndex = 1 - m_Handler.s_NextIndex; - do - { - if (m_Handler.s_Type == SRapidJsonHandler::E_TokenArrayStart || - m_Handler.s_Type == SRapidJsonHandler::E_TokenObjectStart) - { + do { + if (m_Handler.s_Type == SRapidJsonHandler::E_TokenArrayStart || m_Handler.s_Type == SRapidJsonHandler::E_TokenObjectStart) { ++depth; - } - else if (m_Handler.s_Type == SRapidJsonHandler::E_TokenArrayEnd || - m_Handler.s_Type == SRapidJsonHandler::E_TokenObjectEnd) - { + } else if (m_Handler.s_Type == SRapidJsonHandler::E_TokenArrayEnd || m_Handler.s_Type == SRapidJsonHandler::E_TokenObjectEnd) { --depth; } - if (parseNext(depth == 0) == false) - { + if (parseNext(depth == 0) == false) { this->logError(); return false; } - } - while (depth > 0); + } while (depth > 0); return true; } -bool CJsonStateRestoreTraverser::start() -{ +bool CJsonStateRestoreTraverser::start() { m_Started = true; m_Reader.IterativeParseInit(); - if (this->parseNext(false) == false) - { + if (this->parseNext(false) == false) { this->logError(); return false; } // If the first token is start of array then this could be // an array of docs. Next should be start object - if (m_Handler.s_Type == SRapidJsonHandler::E_TokenArrayStart) - { - if (this->parseNext(false) == false) - { + if (m_Handler.s_Type == SRapidJsonHandler::E_TokenArrayStart) { + if (this->parseNext(false) == false) { this->logError(); return false; } @@ -307,12 +240,8 @@ bool CJsonStateRestoreTraverser::start() // For Ml state the first token should be the start of a JSON // object, but we don't store it - if (m_Handler.s_Type != SRapidJsonHandler::E_TokenObjectStart) - { - if (m_IsArrayOfObjects && - m_Handler.s_Type == SRapidJsonHandler::E_TokenArrayEnd && - this->isEof()) - { + if (m_Handler.s_Type != SRapidJsonHandler::E_TokenObjectStart) { + if (m_IsArrayOfObjects && m_Handler.s_Type == SRapidJsonHandler::E_TokenArrayEnd && this->isEof()) { LOG_DEBUG("JSON document is an empty array"); return false; } @@ -325,28 +254,21 @@ bool CJsonStateRestoreTraverser::start() return this->advance() && this->advance(); } -bool CJsonStateRestoreTraverser::advance() -{ +bool CJsonStateRestoreTraverser::advance() { bool keepGoing(true); - while (keepGoing) - { - if (this->parseNext(true) == false) - { - if (!this->isEof()) - { + while (keepGoing) { + if (this->parseNext(true) == false) { + if (!this->isEof()) { this->logError(); } return false; } - if (m_Handler.s_Type == SRapidJsonHandler::E_TokenArrayStart) - { + if (m_Handler.s_Type == SRapidJsonHandler::E_TokenArrayStart) { LOG_ERROR("JSON state should not contain arrays"); this->skipArray(); - } - else if (m_Handler.s_Type != SRapidJsonHandler::E_TokenKey) - { + } else if (m_Handler.s_Type != SRapidJsonHandler::E_TokenKey) { keepGoing = false; } } @@ -354,128 +276,103 @@ bool CJsonStateRestoreTraverser::advance() return true; } -void CJsonStateRestoreTraverser::logError() -{ - const char *error(rapidjson::GetParseError_En(m_Reader.GetParseErrorCode())); - LOG_ERROR("Error parsing JSON at offset " << m_Reader.GetErrorOffset() << - ": " << ((error != 0) ? error : "No message")); +void CJsonStateRestoreTraverser::logError() { + const char* error(rapidjson::GetParseError_En(m_Reader.GetParseErrorCode())); + LOG_ERROR("Error parsing JSON at offset " << m_Reader.GetErrorOffset() << ": " << ((error != 0) ? error : "No message")); this->setBadState(); } CJsonStateRestoreTraverser::SRapidJsonHandler::SRapidJsonHandler() - : s_Type(SRapidJsonHandler::E_TokenNull), - s_NextIndex(0), - s_RememberValue(false) -{ + : s_Type(SRapidJsonHandler::E_TokenNull), s_NextIndex(0), s_RememberValue(false) { s_Level[0] = 0; s_Level[1] = 0; s_IsEndOfLevel[0] = false; s_IsEndOfLevel[1] = false; } -bool CJsonStateRestoreTraverser::SRapidJsonHandler::Null() -{ +bool CJsonStateRestoreTraverser::SRapidJsonHandler::Null() { s_Type = E_TokenNull; return true; } -bool CJsonStateRestoreTraverser::SRapidJsonHandler::Bool(bool b) -{ +bool CJsonStateRestoreTraverser::SRapidJsonHandler::Bool(bool b) { s_Type = E_TokenBool; - if (s_RememberValue) - { + if (s_RememberValue) { s_Value[s_NextIndex].assign(CStringUtils::typeToString(b)); } return true; } -bool CJsonStateRestoreTraverser::SRapidJsonHandler::Int(int i) -{ +bool CJsonStateRestoreTraverser::SRapidJsonHandler::Int(int i) { s_Type = E_TokenInt; - if (s_RememberValue) - { + if (s_RememberValue) { s_Value[s_NextIndex].assign(CStringUtils::typeToString(i)); } return true; } -bool CJsonStateRestoreTraverser::SRapidJsonHandler::Uint(unsigned u) -{ +bool CJsonStateRestoreTraverser::SRapidJsonHandler::Uint(unsigned u) { s_Type = E_TokenUInt; - if (s_RememberValue) - { + if (s_RememberValue) { s_Value[s_NextIndex].assign(CStringUtils::typeToString(u)); } return true; } -bool CJsonStateRestoreTraverser::SRapidJsonHandler::Int64(int64_t i) -{ +bool CJsonStateRestoreTraverser::SRapidJsonHandler::Int64(int64_t i) { s_Type = E_TokenInt64; - if (s_RememberValue) - { + if (s_RememberValue) { s_Value[s_NextIndex].assign(CStringUtils::typeToString(i)); } return true; } -bool CJsonStateRestoreTraverser::SRapidJsonHandler::Uint64(uint64_t u) -{ +bool CJsonStateRestoreTraverser::SRapidJsonHandler::Uint64(uint64_t u) { s_Type = E_TokenUInt64; - if (s_RememberValue) - { + if (s_RememberValue) { s_Value[s_NextIndex].assign(CStringUtils::typeToString(u)); } return true; } -bool CJsonStateRestoreTraverser::SRapidJsonHandler::Double(double d) -{ +bool CJsonStateRestoreTraverser::SRapidJsonHandler::Double(double d) { s_Type = E_TokenDouble; - if (s_RememberValue) - { + if (s_RememberValue) { s_Value[s_NextIndex].assign(CStringUtils::typeToString(d)); } return true; } -bool CJsonStateRestoreTraverser::SRapidJsonHandler::RawNumber(const char*, rapidjson::SizeType, bool) -{ +bool CJsonStateRestoreTraverser::SRapidJsonHandler::RawNumber(const char*, rapidjson::SizeType, bool) { return false; } -bool CJsonStateRestoreTraverser::SRapidJsonHandler::String(const char* str, rapidjson::SizeType length, bool) -{ +bool CJsonStateRestoreTraverser::SRapidJsonHandler::String(const char* str, rapidjson::SizeType length, bool) { s_Type = E_TokenString; - if (s_RememberValue) - { + if (s_RememberValue) { s_Value[s_NextIndex].assign(str, length); } return true; } -bool CJsonStateRestoreTraverser::SRapidJsonHandler::StartObject() -{ +bool CJsonStateRestoreTraverser::SRapidJsonHandler::StartObject() { s_Type = E_TokenObjectStart; - if (s_RememberValue) - { + if (s_RememberValue) { ++s_Level[s_NextIndex]; s_Value[s_NextIndex].clear(); } return true; } -bool CJsonStateRestoreTraverser::SRapidJsonHandler::Key(const char* str, rapidjson::SizeType length, bool) -{ +bool CJsonStateRestoreTraverser::SRapidJsonHandler::Key(const char* str, rapidjson::SizeType length, bool) { s_Type = E_TokenKey; - if (s_RememberValue) - { + if (s_RememberValue) { s_NextIndex = 1 - s_NextIndex; s_Level[s_NextIndex] = s_Level[1 - s_NextIndex]; s_IsEndOfLevel[s_NextIndex] = false; @@ -485,12 +382,10 @@ bool CJsonStateRestoreTraverser::SRapidJsonHandler::Key(const char* str, rapidjs return true; } -bool CJsonStateRestoreTraverser::SRapidJsonHandler::EndObject(rapidjson::SizeType) -{ +bool CJsonStateRestoreTraverser::SRapidJsonHandler::EndObject(rapidjson::SizeType) { s_Type = E_TokenObjectEnd; - if (s_RememberValue) - { + if (s_RememberValue) { s_NextIndex = 1 - s_NextIndex; s_Level[s_NextIndex] = s_Level[1 - s_NextIndex] - 1; s_IsEndOfLevel[s_NextIndex] = true; @@ -501,18 +396,14 @@ bool CJsonStateRestoreTraverser::SRapidJsonHandler::EndObject(rapidjson::SizeTyp return true; } -bool CJsonStateRestoreTraverser::SRapidJsonHandler::StartArray() -{ +bool CJsonStateRestoreTraverser::SRapidJsonHandler::StartArray() { s_Type = E_TokenArrayStart; return true; } -bool CJsonStateRestoreTraverser::SRapidJsonHandler::EndArray(rapidjson::SizeType) -{ +bool CJsonStateRestoreTraverser::SRapidJsonHandler::EndArray(rapidjson::SizeType) { s_Type = E_TokenArrayEnd; return true; } - } } - diff --git a/lib/core/CLogger.cc b/lib/core/CLogger.cc index 0571e1aeb2..226f61e1c6 100644 --- a/lib/core/CLogger.cc +++ b/lib/core/CLogger.cc @@ -6,13 +6,13 @@ #include #include -#include #include #include #include #include #include #include +#include #include #include @@ -24,8 +24,8 @@ #include #include -#include #include +#include #include #include @@ -35,59 +35,41 @@ #ifdef Windows __declspec(dllimport) #endif -extern char **environ; - + extern char** environ; -namespace -{ +namespace { // To ensure the singleton is constructed before multiple threads may require it // call instance() during the static initialisation phase of the program. Of // course, the instance may already be constructed before this if another static // object has used it. -const ml::core::CLogger &DO_NOT_USE_THIS_VARIABLE = - ml::core::CLogger::instance(); +const ml::core::CLogger& DO_NOT_USE_THIS_VARIABLE = ml::core::CLogger::instance(); } +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -CLogger::CLogger() - : m_Logger(0), - m_Reconfigured(false), - m_ProgramName(CProgName::progName()), - m_OrigStderrFd(-1) -{ +CLogger::CLogger() : m_Logger(0), m_Reconfigured(false), m_ProgramName(CProgName::progName()), m_OrigStderrFd(-1) { CCrashHandler::installCrashHandler(); this->reset(); } -CLogger::~CLogger() -{ +CLogger::~CLogger() { log4cxx::LogManager::shutdown(); m_Logger = 0; - if (m_PipeFile != 0) - { + if (m_PipeFile != 0) { // Revert the stderr file descriptor. - if (m_OrigStderrFd != -1) - { + if (m_OrigStderrFd != -1) { COsFileFuncs::dup2(m_OrigStderrFd, ::fileno(stderr)); } m_PipeFile.reset(); } } -void CLogger::reset() -{ - if (m_PipeFile != 0) - { +void CLogger::reset() { + if (m_PipeFile != 0) { // Revert the stderr file descriptor. - if (m_OrigStderrFd != -1) - { + if (m_OrigStderrFd != -1) { COsFileFuncs::dup2(m_OrigStderrFd, ::fileno(stderr)); } m_PipeFile.reset(); @@ -96,8 +78,7 @@ void CLogger::reset() m_Reconfigured = false; // Configure the logger - try - { + try { // This info can come from an XML file or other source. // When the logger first starts up, configure it by setting properties // equivalent to this properties file: @@ -119,14 +100,10 @@ void CLogger::reset() // using a real properties file. log4cxx::helpers::Properties props; - props.put(LOG4CXX_STR("log4j.rootLogger"), - LOG4CXX_STR("DEBUG, A1")); - props.put(LOG4CXX_STR("log4j.appender.A1"), - LOG4CXX_STR("org.apache.log4j.ConsoleAppender")); - props.put(LOG4CXX_STR("log4j.appender.A1.Target"), - LOG4CXX_STR("System.err")); - props.put(LOG4CXX_STR("log4j.appender.A1.layout"), - LOG4CXX_STR("org.apache.log4j.PatternLayout")); + props.put(LOG4CXX_STR("log4j.rootLogger"), LOG4CXX_STR("DEBUG, A1")); + props.put(LOG4CXX_STR("log4j.appender.A1"), LOG4CXX_STR("org.apache.log4j.ConsoleAppender")); + props.put(LOG4CXX_STR("log4j.appender.A1.Target"), LOG4CXX_STR("System.err")); + props.put(LOG4CXX_STR("log4j.appender.A1.layout"), LOG4CXX_STR("org.apache.log4j.PatternLayout")); // The pattern includes the process ID to make it easier to see if a // process dies and restarts @@ -134,8 +111,7 @@ void CLogger::reset() strm << "%d %d{%Z} [" << CProcess::instance().id() << "] %-5p %F@%L %m%n"; log4cxx::LogString logPattern; log4cxx::helpers::Transcoder::decode(strm.str(), logPattern); - props.put(LOG4CXX_STR("log4j.appender.A1.layout.ConversionPattern"), - logPattern); + props.put(LOG4CXX_STR("log4j.appender.A1.layout.ConversionPattern"), logPattern); // Make sure the timezone names have been frigged on Windows before // configuring the properties @@ -144,48 +120,34 @@ void CLogger::reset() log4cxx::PropertyConfigurator::configure(props); m_Logger = log4cxx::Logger::getRootLogger(); - } - catch (log4cxx::helpers::Exception &e) - { - if (m_Logger != 0) - { + } catch (log4cxx::helpers::Exception& e) { + if (m_Logger != 0) { // (Can't use the Ml LOG_ERROR macro here, as the object // it references is only part constructed.) - LOG4CXX_ERROR(m_Logger, - "Could not initialise logger: " << e.what()); - } - else - { + LOG4CXX_ERROR(m_Logger, "Could not initialise logger: " << e.what()); + } else { // We can't use the log macros if the pointer to the logger is NULL - std::cerr << "Could not initialise logger: " << e.what() - << std::endl; + std::cerr << "Could not initialise logger: " << e.what() << std::endl; } } } -CLogger &CLogger::instance() -{ +CLogger& CLogger::instance() { static CLogger instance; return instance; } -bool CLogger::hasBeenReconfigured() const -{ +bool CLogger::hasBeenReconfigured() const { return m_Reconfigured; } -void CLogger::logEnvironment() const -{ +void CLogger::logEnvironment() const { std::string env("Environment variables:"); // environ is a global variable from the C runtime library - if (environ == 0) - { + if (environ == 0) { env += " (None found)"; - } - else - { - for (char **envPtr = environ; *envPtr != 0; ++envPtr) - { + } else { + for (char** envPtr = environ; *envPtr != 0; ++envPtr) { env += core_t::LINE_ENDING; env += *envPtr; } @@ -193,45 +155,40 @@ void CLogger::logEnvironment() const LOG_INFO(env); } -log4cxx::LoggerPtr CLogger::logger() -{ +log4cxx::LoggerPtr CLogger::logger() { return m_Logger; } -void CLogger::fatal() -{ +void CLogger::fatal() { throw std::runtime_error("Ml Fatal Exception"); } -bool CLogger::setLoggingLevel(ELevel level) -{ +bool CLogger::setLoggingLevel(ELevel level) { log4cxx::LevelPtr levelToSet(0); - switch (level) - { - case E_Fatal: - levelToSet = log4cxx::Level::getFatal(); - break; - case E_Error: - levelToSet = log4cxx::Level::getError(); - break; - case E_Warn: - levelToSet = log4cxx::Level::getWarn(); - break; - case E_Info: - levelToSet = log4cxx::Level::getInfo(); - break; - case E_Debug: - levelToSet = log4cxx::Level::getDebug(); - break; - case E_Trace: - levelToSet = log4cxx::Level::getTrace(); - break; + switch (level) { + case E_Fatal: + levelToSet = log4cxx::Level::getFatal(); + break; + case E_Error: + levelToSet = log4cxx::Level::getError(); + break; + case E_Warn: + levelToSet = log4cxx::Level::getWarn(); + break; + case E_Info: + levelToSet = log4cxx::Level::getInfo(); + break; + case E_Debug: + levelToSet = log4cxx::Level::getDebug(); + break; + case E_Trace: + levelToSet = log4cxx::Level::getTrace(); + break; } // Defend against corrupt argument - if (levelToSet == 0) - { + if (levelToSet == 0) { return false; } @@ -239,8 +196,7 @@ bool CLogger::setLoggingLevel(ELevel level) // active logger when we call its setLevel() method, but because it's a // smart pointer, at least it will still exist log4cxx::LoggerPtr loggerToChange(m_Logger); - if (loggerToChange == 0) - { + if (loggerToChange == 0) { return false; } @@ -251,17 +207,13 @@ bool CLogger::setLoggingLevel(ELevel level) // change will have no effect. Therefore, we adjust all appender thresholds // here as well for appenders that write to a file or the console. log4cxx::AppenderList appendersToChange(loggerToChange->getAllAppenders()); - for (log4cxx::AppenderList::iterator iter = appendersToChange.begin(); - iter != appendersToChange.end(); - ++iter) - { - log4cxx::Appender *appenderToChange(*iter); + for (log4cxx::AppenderList::iterator iter = appendersToChange.begin(); iter != appendersToChange.end(); ++iter) { + log4cxx::Appender* appenderToChange(*iter); // Unfortunately, thresholds are a concept lower down the inheritance // hierarchy than the Appender base class, so we have to downcast. - log4cxx::WriterAppender *writerToChange(dynamic_cast(appenderToChange)); - if (writerToChange != 0) - { + log4cxx::WriterAppender* writerToChange(dynamic_cast(appenderToChange)); + if (writerToChange != 0) { writerToChange->setThreshold(levelToSet); } } @@ -269,13 +221,9 @@ bool CLogger::setLoggingLevel(ELevel level) return true; } -bool CLogger::reconfigure(const std::string &pipeName, - const std::string &propertiesFile) -{ - if (pipeName.empty()) - { - if (propertiesFile.empty()) - { +bool CLogger::reconfigure(const std::string& pipeName, const std::string& propertiesFile) { + if (pipeName.empty()) { + if (propertiesFile.empty()) { // Both empty is OK - it just means we keep logging to stderr return true; } @@ -284,19 +232,15 @@ bool CLogger::reconfigure(const std::string &pipeName, return this->reconfigureLogToNamedPipe(pipeName); } -bool CLogger::reconfigureLogToNamedPipe(const std::string &pipeName) -{ - if (m_Reconfigured) - { +bool CLogger::reconfigureLogToNamedPipe(const std::string& pipeName) { + if (m_Reconfigured) { LOG_ERROR("Cannot log to a named pipe after logger reconfiguration"); return false; } m_PipeFile = CNamedPipeFactory::openPipeFileWrite(pipeName); - if (m_PipeFile == 0) - { - LOG_ERROR("Cannot log to named pipe " << pipeName << - " as it could not be opened for writing"); + if (m_PipeFile == 0) { + LOG_ERROR("Cannot log to named pipe " << pipeName << " as it could not be opened for writing"); return false; } @@ -305,8 +249,7 @@ bool CLogger::reconfigureLogToNamedPipe(const std::string &pipeName) m_OrigStderrFd = COsFileFuncs::dup(::fileno(stderr)); COsFileFuncs::dup2(::fileno(m_PipeFile.get()), ::fileno(stderr)); - if (this->reconfigureLogJson() == false) - { + if (this->reconfigureLogJson() == false) { return false; } @@ -315,30 +258,22 @@ bool CLogger::reconfigureLogToNamedPipe(const std::string &pipeName) return true; } -bool CLogger::reconfigureLogJson() -{ +bool CLogger::reconfigureLogJson() { log4cxx::helpers::Properties props; log4cxx::LogString logStr; log4cxx::helpers::Transcoder::decode(m_ProgramName, logStr); - props.put(LOG4CXX_STR("log4j.logger.") + logStr, - LOG4CXX_STR("DEBUG, A2")); - props.put(LOG4CXX_STR("log4j.appender.A2"), - LOG4CXX_STR("org.apache.log4j.ConsoleAppender")); - props.put(LOG4CXX_STR("log4j.appender.A2.Target"), - LOG4CXX_STR("System.err")); - props.put(LOG4CXX_STR("log4j.appender.A2.layout"), - LOG4CXX_STR("org.apache.log4j.CJsonLogLayout")); + props.put(LOG4CXX_STR("log4j.logger.") + logStr, LOG4CXX_STR("DEBUG, A2")); + props.put(LOG4CXX_STR("log4j.appender.A2"), LOG4CXX_STR("org.apache.log4j.ConsoleAppender")); + props.put(LOG4CXX_STR("log4j.appender.A2.Target"), LOG4CXX_STR("System.err")); + props.put(LOG4CXX_STR("log4j.appender.A2.layout"), LOG4CXX_STR("org.apache.log4j.CJsonLogLayout")); return this->reconfigureFromProps(props); } -bool CLogger::reconfigureFromFile(const std::string &propertiesFile) -{ +bool CLogger::reconfigureFromFile(const std::string& propertiesFile) { COsFileFuncs::TStat statBuf; - if (COsFileFuncs::stat(propertiesFile.c_str(), &statBuf) != 0) - { - LOG_ERROR("Unable to access properties file " << propertiesFile << - " for logger re-initialisation: " << ::strerror(errno)); + if (COsFileFuncs::stat(propertiesFile.c_str(), &statBuf) != 0) { + LOG_ERROR("Unable to access properties file " << propertiesFile << " for logger re-initialisation: " << ::strerror(errno)); return false; } @@ -346,24 +281,19 @@ bool CLogger::reconfigureFromFile(const std::string &propertiesFile) // we get the chance to massage the properties to include the name of the // current application, before log4cxx uses them. log4cxx::helpers::Properties props; - try - { + try { // InputStreamPtr is a smart pointer log4cxx::helpers::InputStreamPtr inputStream(new log4cxx::helpers::FileInputStream(propertiesFile)); props.load(inputStream); - } - catch (const log4cxx::helpers::Exception &e) - { - LOG_ERROR("Unable to read from properties file " << propertiesFile << - " for logger re-initialisation: " << e.what()); + } catch (const log4cxx::helpers::Exception& e) { + LOG_ERROR("Unable to read from properties file " << propertiesFile << " for logger re-initialisation: " << e.what()); return false; } // Massage the properties with our extensions this->massageProperties(props); - if (this->reconfigureFromProps(props) == false) - { + if (this->reconfigureFromProps(props) == false) { return false; } @@ -372,11 +302,9 @@ bool CLogger::reconfigureFromFile(const std::string &propertiesFile) return true; } -bool CLogger::reconfigureFromProps(log4cxx::helpers::Properties &props) -{ +bool CLogger::reconfigureFromProps(log4cxx::helpers::Properties& props) { // Now attempt the reconfiguration using the new properties - try - { + try { log4cxx::LogManager::resetConfiguration(); log4cxx::PropertyConfigurator::configure(props); @@ -387,25 +315,17 @@ bool CLogger::reconfigureFromProps(log4cxx::helpers::Properties &props) // TCP server can be identified as having come from this process. m_Logger = log4cxx::Logger::getLogger(m_ProgramName); - if (m_Logger == 0) - { + if (m_Logger == 0) { // We can't use the log macros if the pointer to the logger is NULL - std::cerr << "Failed to reinitialise logger for " - << m_ProgramName << std::endl; + std::cerr << "Failed to reinitialise logger for " << m_ProgramName << std::endl; return false; } - } - catch (log4cxx::helpers::Exception &e) - { - if (m_Logger != 0) - { + } catch (log4cxx::helpers::Exception& e) { + if (m_Logger != 0) { LOG_ERROR("Failed to reinitialise logger: " << e.what()); - } - else - { + } else { // We can't use the log macros if the pointer to the logger is NULL - std::cerr << "Failed to reinitialise logger: " << e.what() - << std::endl; + std::cerr << "Failed to reinitialise logger: " << e.what() << std::endl; } return false; @@ -420,8 +340,7 @@ bool CLogger::reconfigureFromProps(log4cxx::helpers::Properties &props) return true; } -void CLogger::massageProperties(log4cxx::helpers::Properties &props) const -{ +void CLogger::massageProperties(log4cxx::helpers::Properties& props) const { // Get the process ID as a string std::ostringstream pidStrm; pidStrm << CProcess::instance().id(); @@ -446,10 +365,7 @@ void CLogger::massageProperties(log4cxx::helpers::Properties &props) const using TLogStringVecCItr = TLogStringVec::const_iterator; TLogStringVec propNames(props.propertyNames()); - for (TLogStringVecCItr iter = propNames.begin(); - iter != propNames.end(); - ++iter) - { + for (TLogStringVecCItr iter = propNames.begin(); iter != propNames.end(); ++iter) { log4cxx::LogString oldKey(*iter); log4cxx::LogString newKey; newKey.reserve(oldKey.length()); @@ -460,53 +376,36 @@ void CLogger::massageProperties(log4cxx::helpers::Properties &props) const newValue.reserve(oldValue.length()); this->massageString(mappings, oldValue, newValue); - if (newValue != oldValue || newKey != oldKey) - { + if (newValue != oldValue || newKey != oldKey) { props.put(newKey, newValue); } } } -void CLogger::massageString(const TLogCharLogStrMap &mappings, - const log4cxx::LogString &oldStr, - log4cxx::LogString &newStr) const -{ +void CLogger::massageString(const TLogCharLogStrMap& mappings, const log4cxx::LogString& oldStr, log4cxx::LogString& newStr) const { newStr.clear(); - for (log4cxx::LogString::const_iterator iter = oldStr.begin(); - iter != oldStr.end(); - ++iter) - { + for (log4cxx::LogString::const_iterator iter = oldStr.begin(); iter != oldStr.end(); ++iter) { // We ONLY want to replace the patterns in our map - other patterns are // left for log4cxx itself - if (*iter == static_cast('%')) - { + if (*iter == static_cast('%')) { ++iter; - if (iter == oldStr.end()) - { + if (iter == oldStr.end()) { newStr += static_cast('%'); break; } TLogCharLogStrMapCItr mapping = mappings.find(*iter); - if (mapping == mappings.end()) - { + if (mapping == mappings.end()) { newStr += static_cast('%'); newStr += *iter; - } - else - { + } else { newStr += mapping->second; } - } - else - { + } else { newStr += *iter; } } } - - } } - diff --git a/lib/core/CMemory.cc b/lib/core/CMemory.cc index fe081c2776..a4acea26c6 100644 --- a/lib/core/CMemory.cc +++ b/lib/core/CMemory.cc @@ -5,19 +5,13 @@ */ #include - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { const std::string CMemory::EMPTY_STRING; CMemory::CAnyVisitor CMemory::ms_AnyVisitor = CMemory::CAnyVisitor(); - const std::string CMemoryDebug::EMPTY_STRING; CMemoryDebug::CAnyVisitor CMemoryDebug::ms_AnyVisitor = CMemoryDebug::CAnyVisitor(); - } } - diff --git a/lib/core/CMemoryUsage.cc b/lib/core/CMemoryUsage.cc index 580d2e0652..6e20a30587 100644 --- a/lib/core/CMemoryUsage.cc +++ b/lib/core/CMemoryUsage.cc @@ -4,33 +4,25 @@ * you may not use this file except in compliance with the Elastic License. */ +#include #include #include -#include #include -namespace ml -{ +namespace ml { -namespace core -{ +namespace core { -namespace memory_detail -{ +namespace memory_detail { //! Comparison function class to compare CMemoryUsage objects by //! their description -class CMemoryUsageComparison : public std::unary_function -{ +class CMemoryUsageComparison : public std::unary_function { public: - explicit CMemoryUsageComparison(const std::string &baseline) : m_Baseline(baseline) - { } + explicit CMemoryUsageComparison(const std::string& baseline) : m_Baseline(baseline) {} - bool operator() (const CMemoryUsage *rhs) - { - return m_Baseline == rhs->m_Description.s_Name; - } + bool operator()(const CMemoryUsage* rhs) { return m_Baseline == rhs->m_Description.s_Name; } private: std::string m_Baseline; @@ -38,125 +30,97 @@ class CMemoryUsageComparison : public std::unary_function //! Comparison function class to compare CMemoryUsage objects by //! their description, but ignoring the first in the collection -class CMemoryUsageComparisonTwo : public std::binary_function -{ +class CMemoryUsageComparisonTwo : public std::binary_function { public: - explicit CMemoryUsageComparisonTwo(const std::string &baseline, - const CMemoryUsage * firstItem) : m_Baseline(baseline), - m_FirstItem(firstItem) - { } - - bool operator() (const CMemoryUsage *rhs) - { - return (rhs != m_FirstItem) && (m_Baseline == rhs->m_Description.s_Name); - } + explicit CMemoryUsageComparisonTwo(const std::string& baseline, const CMemoryUsage* firstItem) + : m_Baseline(baseline), m_FirstItem(firstItem) {} + + bool operator()(const CMemoryUsage* rhs) { return (rhs != m_FirstItem) && (m_Baseline == rhs->m_Description.s_Name); } private: std::string m_Baseline; - const CMemoryUsage * m_FirstItem; + const CMemoryUsage* m_FirstItem; }; - } -CMemoryUsage::CMemoryUsage() : m_Description("", 0ull) -{ +CMemoryUsage::CMemoryUsage() : m_Description("", 0ull) { } -CMemoryUsage::~CMemoryUsage() -{ - for (TMemoryUsagePtrListItr i = m_Children.begin(); i != m_Children.end(); ++i) - { +CMemoryUsage::~CMemoryUsage() { + for (TMemoryUsagePtrListItr i = m_Children.begin(); i != m_Children.end(); ++i) { delete *i; } } -CMemoryUsage::TMemoryUsagePtr CMemoryUsage::addChild() -{ +CMemoryUsage::TMemoryUsagePtr CMemoryUsage::addChild() { TMemoryUsagePtr child(new CMemoryUsage); m_Children.push_back(child); return child; } -CMemoryUsage::TMemoryUsagePtr CMemoryUsage::addChild(std::size_t initialAmount) -{ +CMemoryUsage::TMemoryUsagePtr CMemoryUsage::addChild(std::size_t initialAmount) { TMemoryUsagePtr child(new CMemoryUsage); child->m_Description.s_Memory = initialAmount; m_Children.push_back(child); return child; } -void CMemoryUsage::addItem(const SMemoryUsage &item) -{ +void CMemoryUsage::addItem(const SMemoryUsage& item) { m_Items.push_back(item); } -void CMemoryUsage::addItem(const std::string &name, std::size_t memory) -{ +void CMemoryUsage::addItem(const std::string& name, std::size_t memory) { SMemoryUsage item(name, memory); this->addItem(item); } -void CMemoryUsage::setName(const SMemoryUsage &item) -{ +void CMemoryUsage::setName(const SMemoryUsage& item) { std::size_t initialAmount = m_Description.s_Memory; m_Description = item; m_Description.s_Memory += initialAmount; } -void CMemoryUsage::setName(const std::string &name, std::size_t memory) -{ +void CMemoryUsage::setName(const std::string& name, std::size_t memory) { SMemoryUsage item(name, memory); this->setName(item); } -void CMemoryUsage::setName(const std::string &name) -{ +void CMemoryUsage::setName(const std::string& name) { SMemoryUsage item(name, 0); this->setName(item); } -std::size_t CMemoryUsage::usage() const -{ +std::size_t CMemoryUsage::usage() const { std::size_t mem = m_Description.s_Memory; - for (TMemoryUsageVecCitr i = m_Items.begin(); i != m_Items.end(); ++i) - { + for (TMemoryUsageVecCitr i = m_Items.begin(); i != m_Items.end(); ++i) { mem += i->s_Memory; } - for (TMemoryUsagePtrListCItr i = m_Children.begin(); i != m_Children.end(); ++i) - { + for (TMemoryUsagePtrListCItr i = m_Children.begin(); i != m_Children.end(); ++i) { mem += (*i)->usage(); } return mem; } -std::size_t CMemoryUsage::unusage() const -{ +std::size_t CMemoryUsage::unusage() const { std::size_t mem = m_Description.s_Unused; - for (TMemoryUsageVecCitr i = m_Items.begin(); i != m_Items.end(); ++i) - { + for (TMemoryUsageVecCitr i = m_Items.begin(); i != m_Items.end(); ++i) { mem += i->s_Unused; } - for (TMemoryUsagePtrListCItr i = m_Children.begin(); i != m_Children.end(); ++i) - { + for (TMemoryUsagePtrListCItr i = m_Children.begin(); i != m_Children.end(); ++i) { mem += (*i)->unusage(); } return mem; } -void CMemoryUsage::summary(CMemoryUsageJsonWriter &writer) const -{ +void CMemoryUsage::summary(CMemoryUsageJsonWriter& writer) const { writer.startObject(); writer.addItem(m_Description); - if (m_Items.size() > 0) - { + if (m_Items.size() > 0) { writer.startArray("items"); - for (TMemoryUsageVecCitr i = m_Items.begin(); i != m_Items.end(); ++i) - { + for (TMemoryUsageVecCitr i = m_Items.begin(); i != m_Items.end(); ++i) { writer.startObject(); writer.addItem(*i); writer.endObject(); @@ -164,11 +128,9 @@ void CMemoryUsage::summary(CMemoryUsageJsonWriter &writer) const writer.endArray(); } - if (!m_Children.empty()) - { + if (!m_Children.empty()) { writer.startArray("subItems"); - for (TMemoryUsagePtrListCItr i = m_Children.begin(); i != m_Children.end(); ++i) - { + for (TMemoryUsagePtrListCItr i = m_Children.begin(); i != m_Children.end(); ++i) { (*i)->summary(writer); } writer.endArray(); @@ -177,39 +139,30 @@ void CMemoryUsage::summary(CMemoryUsageJsonWriter &writer) const writer.endObject(); } -void CMemoryUsage::compress() -{ +void CMemoryUsage::compress() { using TStrSizeMap = std::map; using TStrSizeMapCItr = TStrSizeMap::const_iterator; - if (!m_Children.empty()) - { + if (!m_Children.empty()) { // Find out which of the children occur the most TStrSizeMap itemsByName; - for (TMemoryUsagePtrListCItr i = m_Children.begin(); i != m_Children.end(); ++i) - { + for (TMemoryUsagePtrListCItr i = m_Children.begin(); i != m_Children.end(); ++i) { itemsByName[(*i)->m_Description.s_Name]++; - LOG_TRACE("Item " << (*i)->m_Description.s_Name << " : " << itemsByName[(*i)->m_Description.s_Name]); + LOG_TRACE("Item " << (*i)->m_Description.s_Name << " : " << itemsByName[(*i)->m_Description.s_Name]); } - for (TStrSizeMapCItr i = itemsByName.begin(); - i != itemsByName.end(); ++i) - { + for (TStrSizeMapCItr i = itemsByName.begin(); i != itemsByName.end(); ++i) { // For commonly-occuring children, add up their usage // then delete them - if (i->second > 1) - { + if (i->second > 1) { std::size_t counter = 0; memory_detail::CMemoryUsageComparison compareName(i->first); - TMemoryUsagePtrListItr firstChild = std::find_if(m_Children.begin(), - m_Children.end(), compareName); + TMemoryUsagePtrListItr firstChild = std::find_if(m_Children.begin(), m_Children.end(), compareName); memory_detail::CMemoryUsageComparisonTwo comparison(i->first, *firstChild); TMemoryUsagePtrListItr j = m_Children.begin(); - while ((j = std::find_if(j, m_Children.end(), comparison)) != - m_Children.end()) - { + while ((j = std::find_if(j, m_Children.end(), comparison)) != m_Children.end()) { LOG_TRACE("Trying to remove " << *j); (*firstChild)->m_Description.s_Memory += (*j)->usage(); (*firstChild)->m_Description.s_Unused += (*j)->unusage(); @@ -223,14 +176,12 @@ void CMemoryUsage::compress() } } } - for (TMemoryUsagePtrListItr i = m_Children.begin(); i != m_Children.end(); ++i) - { + for (TMemoryUsagePtrListItr i = m_Children.begin(); i != m_Children.end(); ++i) { (*i)->compress(); } } -void CMemoryUsage::print(std::ostream &outStream) const -{ +void CMemoryUsage::print(std::ostream& outStream) const { CMemoryUsageJsonWriter writer(outStream); this->summary(writer); writer.finalise(); diff --git a/lib/core/CMemoryUsageJsonWriter.cc b/lib/core/CMemoryUsageJsonWriter.cc index 9d93150fe2..e40449a8a6 100644 --- a/lib/core/CMemoryUsageJsonWriter.cc +++ b/lib/core/CMemoryUsageJsonWriter.cc @@ -6,73 +6,59 @@ #include -namespace -{ +namespace { const std::string MEMORY("memory"); const std::string UNUSED("unused"); } -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { -CMemoryUsageJsonWriter::CMemoryUsageJsonWriter(std::ostream &outStream) : - m_WriteStream(outStream), m_Writer(m_WriteStream), m_Finalised(false) -{ +CMemoryUsageJsonWriter::CMemoryUsageJsonWriter(std::ostream& outStream) + : m_WriteStream(outStream), m_Writer(m_WriteStream), m_Finalised(false) { } -CMemoryUsageJsonWriter::~CMemoryUsageJsonWriter() -{ +CMemoryUsageJsonWriter::~CMemoryUsageJsonWriter() { this->finalise(); } -void CMemoryUsageJsonWriter::startObject() -{ +void CMemoryUsageJsonWriter::startObject() { m_Writer.StartObject(); } -void CMemoryUsageJsonWriter::endObject() -{ +void CMemoryUsageJsonWriter::endObject() { m_Writer.EndObject(); } -void CMemoryUsageJsonWriter::startArray(const std::string &description) -{ +void CMemoryUsageJsonWriter::startArray(const std::string& description) { m_Writer.String(description); m_Writer.StartArray(); } -void CMemoryUsageJsonWriter::endArray() -{ +void CMemoryUsageJsonWriter::endArray() { m_Writer.EndArray(); } -void CMemoryUsageJsonWriter::addItem(const CMemoryUsage::SMemoryUsage &item) -{ +void CMemoryUsageJsonWriter::addItem(const CMemoryUsage::SMemoryUsage& item) { m_Writer.String(item.s_Name); m_Writer.StartObject(); m_Writer.String(MEMORY); m_Writer.Int64(item.s_Memory); - if (item.s_Unused) - { + if (item.s_Unused) { m_Writer.String(UNUSED); m_Writer.Uint64(item.s_Unused); } m_Writer.EndObject(); } -void CMemoryUsageJsonWriter::finalise() -{ - if (m_Finalised) - { +void CMemoryUsageJsonWriter::finalise() { + if (m_Finalised) { return; } m_WriteStream.Flush(); m_Finalised = true; } - } // core } // ml diff --git a/lib/core/CMonotonicTime.cc b/lib/core/CMonotonicTime.cc index c22ebfe0e0..f6802ba71c 100644 --- a/lib/core/CMonotonicTime.cc +++ b/lib/core/CMonotonicTime.cc @@ -9,29 +9,21 @@ #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { CMonotonicTime::CMonotonicTime() // Scaling factors never vary for clock_gettime() - : m_ScalingFactor1(0), - m_ScalingFactor2(0), - m_ScalingFactor3(0) -{ + : m_ScalingFactor1(0), m_ScalingFactor2(0), m_ScalingFactor3(0) { } -uint64_t CMonotonicTime::milliseconds() const -{ +uint64_t CMonotonicTime::milliseconds() const { struct timespec ts; int rc(-1); - // For milliseconds, use the coarse timers if available, as millisecond - // granularity is good enough +// For milliseconds, use the coarse timers if available, as millisecond +// granularity is good enough #if defined(CLOCK_MONOTONIC_COARSE) rc = ::clock_gettime(CLOCK_MONOTONIC_COARSE, &ts); #elif defined(CLOCK_MONOTONIC) @@ -44,8 +36,7 @@ uint64_t CMonotonicTime::milliseconds() const rc = ::clock_gettime(CLOCK_REALTIME, &ts); #endif - if (rc < 0) - { + if (rc < 0) { LOG_ERROR("Failed to get reading from hi-res clock"); // Return a very approximate time @@ -58,14 +49,13 @@ uint64_t CMonotonicTime::milliseconds() const return result; } -uint64_t CMonotonicTime::nanoseconds() const -{ +uint64_t CMonotonicTime::nanoseconds() const { struct timespec ts; int rc(-1); - // Don't use the coarse timers here, as they only provide around millisecond - // granularity +// Don't use the coarse timers here, as they only provide around millisecond +// granularity #if defined(CLOCK_MONOTONIC) rc = ::clock_gettime(CLOCK_MONOTONIC, &ts); #else @@ -73,8 +63,7 @@ uint64_t CMonotonicTime::nanoseconds() const rc = ::clock_gettime(CLOCK_REALTIME, &ts); #endif - if (rc < 0) - { + if (rc < 0) { LOG_ERROR("Failed to get reading from hi-res clock"); // Return a very approximate time @@ -86,8 +75,5 @@ uint64_t CMonotonicTime::nanoseconds() const return result; } - - } } - diff --git a/lib/core/CMonotonicTime_MacOSX.cc b/lib/core/CMonotonicTime_MacOSX.cc index 6bb07b41ff..c66ad4d5c6 100644 --- a/lib/core/CMonotonicTime_MacOSX.cc +++ b/lib/core/CMonotonicTime_MacOSX.cc @@ -9,44 +9,28 @@ #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -CMonotonicTime::CMonotonicTime() - : m_ScalingFactor1(1), - m_ScalingFactor2(1000000), - m_ScalingFactor3(1) -{ +CMonotonicTime::CMonotonicTime() : m_ScalingFactor1(1), m_ScalingFactor2(1000000), m_ScalingFactor3(1) { mach_timebase_info_data_t info; - if (::mach_timebase_info(&info) != 0) - { + if (::mach_timebase_info(&info) != 0) { // Assume numerator and denominator for nanoseconds are both 1 (which is // true on a 2010 MacBook Pro) LOG_ERROR("Failed to get time base info"); - } - else - { + } else { m_ScalingFactor1 = info.numer; m_ScalingFactor2 *= info.denom; m_ScalingFactor3 *= info.denom; } } -uint64_t CMonotonicTime::milliseconds() const -{ +uint64_t CMonotonicTime::milliseconds() const { return ::mach_absolute_time() * m_ScalingFactor1 / m_ScalingFactor2; } -uint64_t CMonotonicTime::nanoseconds() const -{ +uint64_t CMonotonicTime::nanoseconds() const { return ::mach_absolute_time() * m_ScalingFactor1 / m_ScalingFactor3; } - - } } - diff --git a/lib/core/CMonotonicTime_Windows.cc b/lib/core/CMonotonicTime_Windows.cc index 8752c2070b..e8c608008f 100644 --- a/lib/core/CMonotonicTime_Windows.cc +++ b/lib/core/CMonotonicTime_Windows.cc @@ -8,41 +8,30 @@ #include #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { CMonotonicTime::CMonotonicTime() : m_ScalingFactor1(0), // Only one variable scaling factor is needed on Windows m_ScalingFactor2(0), - m_ScalingFactor3(0) -{ + m_ScalingFactor3(0) { LARGE_INTEGER largeInt; - if (QueryPerformanceFrequency(&largeInt) == FALSE) - { + if (QueryPerformanceFrequency(&largeInt) == FALSE) { LOG_WARN("High frequency performance counters not available"); - } - else - { + } else { // The high frequency counter ticks this many times per second m_ScalingFactor1 = static_cast(largeInt.QuadPart); } } -uint64_t CMonotonicTime::milliseconds() const -{ +uint64_t CMonotonicTime::milliseconds() const { // This is only accurate to about 15 milliseconds return GetTickCount64(); } -uint64_t CMonotonicTime::nanoseconds() const -{ - if (m_ScalingFactor1 == 0) - { +uint64_t CMonotonicTime::nanoseconds() const { + if (m_ScalingFactor1 == 0) { // High frequency performance counters are not available, so return an // approximation return GetTickCount64() * 1000000ULL; @@ -51,8 +40,7 @@ uint64_t CMonotonicTime::nanoseconds() const LARGE_INTEGER largeInt; // This function call is slow - if (QueryPerformanceCounter(&largeInt) == FALSE) - { + if (QueryPerformanceCounter(&largeInt) == FALSE) { // Failed to obtain high frequency performance counter reading, so // return an approximation return GetTickCount64() * 1000000ULL; @@ -80,8 +68,5 @@ uint64_t CMonotonicTime::nanoseconds() const return fullSecondNanoseconds + extraNanoseconds; } - - } } - diff --git a/lib/core/CMutex.cc b/lib/core/CMutex.cc index 8ea1455a4d..597c5c016b 100644 --- a/lib/core/CMutex.cc +++ b/lib/core/CMutex.cc @@ -10,70 +10,52 @@ #include #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -CMutex::CMutex() -{ +CMutex::CMutex() { pthread_mutexattr_t attr; int ret(pthread_mutexattr_init(&attr)); - if (ret != 0) - { + if (ret != 0) { LOG_WARN(::strerror(ret)); } ret = pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE); - if (ret != 0) - { + if (ret != 0) { LOG_WARN(::strerror(ret)); } ret = pthread_mutex_init(&m_Mutex, &attr); - if (ret != 0) - { + if (ret != 0) { LOG_WARN(::strerror(ret)); } ret = pthread_mutexattr_destroy(&attr); - if (ret != 0) - { + if (ret != 0) { LOG_WARN(::strerror(ret)); } } -CMutex::~CMutex() -{ +CMutex::~CMutex() { int ret(pthread_mutex_destroy(&m_Mutex)); - if (ret != 0) - { + if (ret != 0) { LOG_WARN(::strerror(ret)); } } -void CMutex::lock() -{ +void CMutex::lock() { int ret(pthread_mutex_lock(&m_Mutex)); - if (ret != 0) - { + if (ret != 0) { LOG_WARN(::strerror(ret)); } } -void CMutex::unlock() -{ +void CMutex::unlock() { int ret(pthread_mutex_unlock(&m_Mutex)); - if (ret != 0) - { + if (ret != 0) { LOG_WARN(::strerror(ret)); } } - - } } - diff --git a/lib/core/CMutex_Windows.cc b/lib/core/CMutex_Windows.cc index 074a4001ef..a748523c2b 100644 --- a/lib/core/CMutex_Windows.cc +++ b/lib/core/CMutex_Windows.cc @@ -5,43 +5,31 @@ */ #include - -namespace -{ +namespace { // 4000 is a value that Microsoft uses in some of their code, so it's // hopefully a reasonably sensible setting static const DWORD SPIN_COUNT(4000); } -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { -CMutex::CMutex() -{ +CMutex::CMutex() { // On Windows Vista and above this function cannot fail, hence no need to // check the return code InitializeCriticalSectionAndSpinCount(&m_Mutex, SPIN_COUNT); } -CMutex::~CMutex() -{ +CMutex::~CMutex() { DeleteCriticalSection(&m_Mutex); } -void CMutex::lock() -{ +void CMutex::lock() { EnterCriticalSection(&m_Mutex); } -void CMutex::unlock() -{ +void CMutex::unlock() { LeaveCriticalSection(&m_Mutex); } - - } } - diff --git a/lib/core/CNamedPipeFactory.cc b/lib/core/CNamedPipeFactory.cc index 5db9b832ca..21c3b88de6 100644 --- a/lib/core/CNamedPipeFactory.cc +++ b/lib/core/CNamedPipeFactory.cc @@ -20,16 +20,12 @@ #include #include - -namespace -{ +namespace { //! fclose() doesn't check for NULL pointers, so wrap it for use as a shared_ptr //! deleter -void safeFClose(FILE *file) -{ - if (file != 0) - { +void safeFClose(FILE* file) { + if (file != 0) { ::fclose(file); } } @@ -38,8 +34,7 @@ void safeFClose(FILE *file) //! in the same process) at the other end of one of our named pipes to abruptly //! terminate our processes. Instead we should handle remote reader death by //! gracefully reacting to write failures. -bool ignoreSigPipe() -{ +bool ignoreSigPipe() { struct sigaction sa; sigemptyset(&sa.sa_mask); sa.sa_handler = SIG_IGN; @@ -63,149 +58,119 @@ const bool SIGPIPE_IGNORED(ignoreSigPipe()); //! http://www.boost.org/doc/libs/1_65_1/libs/iostreams/doc/concepts/sink.html) //! that will retry writes that get interrupted. //! -class CRetryingFileDescriptorSink : private boost::iostreams::file_descriptor -{ - public: - //! These don't conform to the coding standards because they are - //! dictated by the Boost.Iostreams library - using char_type = char; - using category = boost::iostreams::sink_tag; - using boost::iostreams::file_descriptor::handle_type; - - public: - CRetryingFileDescriptorSink(handle_type fd, - boost::iostreams::file_descriptor_flags flags) - { - // This is confusing naming in the (Boost) base class. It doesn't - // open the file; the file must already be open. Effectively this - // means "take ownership of the file". - this->open(fd, flags); - } +class CRetryingFileDescriptorSink : private boost::iostreams::file_descriptor { +public: + //! These don't conform to the coding standards because they are + //! dictated by the Boost.Iostreams library + using char_type = char; + using category = boost::iostreams::sink_tag; + using boost::iostreams::file_descriptor::handle_type; + +public: + CRetryingFileDescriptorSink(handle_type fd, boost::iostreams::file_descriptor_flags flags) { + // This is confusing naming in the (Boost) base class. It doesn't + // open the file; the file must already be open. Effectively this + // means "take ownership of the file". + this->open(fd, flags); + } - //! Write to the file descriptor provided to the constructor, retrying - //! in the event of an interrupted system call. The method signature is - //! defined by Boost's Sink concept. - std::streamsize write(const char *s, std::streamsize n) - { - std::streamsize totalBytesWritten = 0; - while (n > 0) - { - ssize_t ret = ::write(this->handle(), s, static_cast(n)); - if (ret == -1) - { - if (errno != EINTR) - { - std::string reason("Failed writing to named pipe: "); - reason += ::strerror(errno); - LOG_ERROR(reason); - // We don't usually throw exceptions, but Boost.Iostreams - // requires it here - boost::throw_exception(std::ios_base::failure(reason)); - } - } - else - { - totalBytesWritten += ret; - s += ret; - n -= ret; + //! Write to the file descriptor provided to the constructor, retrying + //! in the event of an interrupted system call. The method signature is + //! defined by Boost's Sink concept. + std::streamsize write(const char* s, std::streamsize n) { + std::streamsize totalBytesWritten = 0; + while (n > 0) { + ssize_t ret = ::write(this->handle(), s, static_cast(n)); + if (ret == -1) { + if (errno != EINTR) { + std::string reason("Failed writing to named pipe: "); + reason += ::strerror(errno); + LOG_ERROR(reason); + // We don't usually throw exceptions, but Boost.Iostreams + // requires it here + boost::throw_exception(std::ios_base::failure(reason)); } + } else { + totalBytesWritten += ret; + s += ret; + n -= ret; } - return totalBytesWritten; } + return totalBytesWritten; + } }; - } -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { // Initialise static const char CNamedPipeFactory::TEST_CHAR('\n'); - -CNamedPipeFactory::TIStreamP CNamedPipeFactory::openPipeStreamRead(const std::string &fileName) -{ +CNamedPipeFactory::TIStreamP CNamedPipeFactory::openPipeStreamRead(const std::string& fileName) { TPipeHandle fd = CNamedPipeFactory::initPipeHandle(fileName, false); - if (fd == -1) - { + if (fd == -1) { return TIStreamP(); } using TFileDescriptorSourceStream = boost::iostreams::stream; - return TIStreamP(new TFileDescriptorSourceStream( - boost::iostreams::file_descriptor_source(fd, boost::iostreams::close_handle))); + return TIStreamP(new TFileDescriptorSourceStream(boost::iostreams::file_descriptor_source(fd, boost::iostreams::close_handle))); } -CNamedPipeFactory::TOStreamP CNamedPipeFactory::openPipeStreamWrite(const std::string &fileName) -{ +CNamedPipeFactory::TOStreamP CNamedPipeFactory::openPipeStreamWrite(const std::string& fileName) { TPipeHandle fd = CNamedPipeFactory::initPipeHandle(fileName, true); - if (fd == -1) - { + if (fd == -1) { return TOStreamP(); } using TRetryingFileDescriptorSinkStream = boost::iostreams::stream; - return TOStreamP(new TRetryingFileDescriptorSinkStream( - CRetryingFileDescriptorSink(fd, boost::iostreams::close_handle))); + return TOStreamP(new TRetryingFileDescriptorSinkStream(CRetryingFileDescriptorSink(fd, boost::iostreams::close_handle))); } -CNamedPipeFactory::TFileP CNamedPipeFactory::openPipeFileRead(const std::string &fileName) -{ +CNamedPipeFactory::TFileP CNamedPipeFactory::openPipeFileRead(const std::string& fileName) { TPipeHandle fd = CNamedPipeFactory::initPipeHandle(fileName, false); - if (fd == -1) - { + if (fd == -1) { return TFileP(); } return TFileP(::fdopen(fd, "r"), safeFClose); } -CNamedPipeFactory::TFileP CNamedPipeFactory::openPipeFileWrite(const std::string &fileName) -{ +CNamedPipeFactory::TFileP CNamedPipeFactory::openPipeFileWrite(const std::string& fileName) { TPipeHandle fd = CNamedPipeFactory::initPipeHandle(fileName, true); - if (fd == -1) - { + if (fd == -1) { return TFileP(); } return TFileP(::fdopen(fd, "w"), safeFClose); } -bool CNamedPipeFactory::isNamedPipe(const std::string &fileName) -{ +bool CNamedPipeFactory::isNamedPipe(const std::string& fileName) { COsFileFuncs::TStat statbuf; - if (COsFileFuncs::stat(fileName.c_str(), &statbuf) < 0) - { + if (COsFileFuncs::stat(fileName.c_str(), &statbuf) < 0) { return false; } return (statbuf.st_mode & S_IFMT) == S_IFIFO; } -std::string CNamedPipeFactory::defaultPath() -{ +std::string CNamedPipeFactory::defaultPath() { // In production this needs to match the setting of java.io.tmpdir. We rely // on the JVM that spawns our controller daemon setting TMPDIR in the // environment of the spawned process. For unit testing and adhoc testing // $TMPDIR is generally set on Mac OS X (to something like // /var/folders/k5/5sqcdlps5sg3cvlp783gcz740000h0/T/) and not set on other // platforms. - const char *tmpDir(::getenv("TMPDIR")); + const char* tmpDir(::getenv("TMPDIR")); // Make sure path ends with a slash so it's ready to have a file name // appended. (_PATH_VARTMP already has this on all platforms I've seen, // but a user-defined $TMPDIR might not.) std::string path((tmpDir == 0) ? _PATH_VARTMP : tmpDir); - if (path[path.length() - 1] != '/') - { + if (path[path.length() - 1] != '/') { path += '/'; } return path; } -CNamedPipeFactory::TPipeHandle CNamedPipeFactory::initPipeHandle(const std::string &fileName, bool forWrite) -{ - if (!SIGPIPE_IGNORED) - { +CNamedPipeFactory::TPipeHandle CNamedPipeFactory::initPipeHandle(const std::string& fileName, bool forWrite) { + if (!SIGPIPE_IGNORED) { LOG_WARN("Failed to ignore SIGPIPE - this process will not terminate " "gracefully if a process it is writing to via a named pipe dies"); } @@ -215,29 +180,22 @@ CNamedPipeFactory::TPipeHandle CNamedPipeFactory::initPipeHandle(const std::stri // If the name already exists, ensure it refers directly (i.e. not via a // symlink) to a named pipe COsFileFuncs::TStat statbuf; - if (COsFileFuncs::lstat(fileName.c_str(), &statbuf) == 0) - { - if ((statbuf.st_mode & S_IFMT) != S_IFIFO) - { - LOG_ERROR("Unable to create named pipe " << fileName << " - a file " - "of this name already exists, but it is not a FIFO"); + if (COsFileFuncs::lstat(fileName.c_str(), &statbuf) == 0) { + if ((statbuf.st_mode & S_IFMT) != S_IFIFO) { + LOG_ERROR("Unable to create named pipe " << fileName + << " - a file " + "of this name already exists, but it is not a FIFO"); return -1; } - if ((statbuf.st_mode & (S_IRGRP | S_IWGRP | S_IXGRP | S_IROTH | S_IWOTH | S_IXOTH)) != 0) - { - LOG_ERROR("Will not use pre-existing named pipe " << fileName << - " - it has permissions that are too open"); + if ((statbuf.st_mode & (S_IRGRP | S_IWGRP | S_IXGRP | S_IROTH | S_IWOTH | S_IXOTH)) != 0) { + LOG_ERROR("Will not use pre-existing named pipe " << fileName << " - it has permissions that are too open"); return -1; } - } - else - { + } else { // The file didn't exist, so create a new FIFO for it, with permissions // for the current user only - if (::mkfifo(fileName.c_str(), S_IRUSR | S_IWUSR) == -1) - { - LOG_ERROR("Unable to create named pipe " << fileName << - ": " << ::strerror(errno)); + if (::mkfifo(fileName.c_str(), S_IRUSR | S_IWUSR) == -1) { + LOG_ERROR("Unable to create named pipe " << fileName << ": " << ::strerror(errno)); return -1; } madeFifo = true; @@ -245,23 +203,15 @@ CNamedPipeFactory::TPipeHandle CNamedPipeFactory::initPipeHandle(const std::stri // The open call here will block if there is no other connection to the // named pipe - int fd = COsFileFuncs::open(fileName.c_str(), - forWrite ? COsFileFuncs::WRONLY : COsFileFuncs::RDONLY); - if (fd == -1) - { - LOG_ERROR("Unable to open named pipe " << fileName << - (forWrite ? " for writing: " : " for reading: ") << ::strerror(errno)); - } - else - { + int fd = COsFileFuncs::open(fileName.c_str(), forWrite ? COsFileFuncs::WRONLY : COsFileFuncs::RDONLY); + if (fd == -1) { + LOG_ERROR("Unable to open named pipe " << fileName << (forWrite ? " for writing: " : " for reading: ") << ::strerror(errno)); + } else { // Write a test character to the pipe - this is really only necessary on // Windows, but doing it on *nix too will mean the inability of the Java // code to tolerate the test character will be discovered sooner. - if (forWrite && - COsFileFuncs::write(fd, &TEST_CHAR, sizeof(TEST_CHAR)) <= 0) - { - LOG_ERROR("Unable to test named pipe " << fileName << ": " << - ::strerror(errno)); + if (forWrite && COsFileFuncs::write(fd, &TEST_CHAR, sizeof(TEST_CHAR)) <= 0) { + LOG_ERROR("Unable to test named pipe " << fileName << ": " << ::strerror(errno)); COsFileFuncs::close(fd); fd = -1; } @@ -272,15 +222,11 @@ CNamedPipeFactory::TPipeHandle CNamedPipeFactory::initPipeHandle(const std::stri // structure at this point. This avoids the need to unlink it later. A // deleted file should still be accessible on *nix to the file handles that // already had it open when it was deleted. - if (madeFifo) - { + if (madeFifo) { ::unlink(fileName.c_str()); } return fd; } - - } } - diff --git a/lib/core/CNamedPipeFactory_Windows.cc b/lib/core/CNamedPipeFactory_Windows.cc index b0b521e142..608a01e300 100644 --- a/lib/core/CNamedPipeFactory_Windows.cc +++ b/lib/core/CNamedPipeFactory_Windows.cc @@ -14,96 +14,69 @@ #include #include - -namespace -{ +namespace { //! fclose() doesn't check for NULL pointers, so wrap it for use as a shared_ptr //! deleter -void safeFClose(FILE *file) -{ - if (file != 0) - { +void safeFClose(FILE* file) { + if (file != 0) { ::fclose(file); } } //! On Windows ALL named pipes are under this path const std::string PIPE_PREFIX("\\\\.\\pipe\\"); - } -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { // Initialise static const char CNamedPipeFactory::TEST_CHAR('\n'); - -CNamedPipeFactory::TIStreamP CNamedPipeFactory::openPipeStreamRead(const std::string &fileName) -{ +CNamedPipeFactory::TIStreamP CNamedPipeFactory::openPipeStreamRead(const std::string& fileName) { TPipeHandle handle = CNamedPipeFactory::initPipeHandle(fileName, false); - if (handle == INVALID_HANDLE_VALUE) - { + if (handle == INVALID_HANDLE_VALUE) { return TIStreamP(); } using TFileDescriptorSourceStream = boost::iostreams::stream; - return TIStreamP(new TFileDescriptorSourceStream( - boost::iostreams::file_descriptor_source(handle, boost::iostreams::close_handle))); + return TIStreamP(new TFileDescriptorSourceStream(boost::iostreams::file_descriptor_source(handle, boost::iostreams::close_handle))); } -CNamedPipeFactory::TOStreamP CNamedPipeFactory::openPipeStreamWrite(const std::string &fileName) -{ +CNamedPipeFactory::TOStreamP CNamedPipeFactory::openPipeStreamWrite(const std::string& fileName) { TPipeHandle handle = CNamedPipeFactory::initPipeHandle(fileName, true); - if (handle == INVALID_HANDLE_VALUE) - { + if (handle == INVALID_HANDLE_VALUE) { return TOStreamP(); } using TFileDescriptorSinkStream = boost::iostreams::stream; - return TOStreamP(new TFileDescriptorSinkStream( - boost::iostreams::file_descriptor_sink(handle, boost::iostreams::close_handle))); + return TOStreamP(new TFileDescriptorSinkStream(boost::iostreams::file_descriptor_sink(handle, boost::iostreams::close_handle))); } -CNamedPipeFactory::TFileP CNamedPipeFactory::openPipeFileRead(const std::string &fileName) -{ +CNamedPipeFactory::TFileP CNamedPipeFactory::openPipeFileRead(const std::string& fileName) { TPipeHandle handle = CNamedPipeFactory::initPipeHandle(fileName, false); - if (handle == INVALID_HANDLE_VALUE) - { + if (handle == INVALID_HANDLE_VALUE) { return TFileP(); } - return TFileP(::fdopen(::_open_osfhandle(reinterpret_cast(handle), _O_RDONLY), - "rb"), - safeFClose); + return TFileP(::fdopen(::_open_osfhandle(reinterpret_cast(handle), _O_RDONLY), "rb"), safeFClose); } -CNamedPipeFactory::TFileP CNamedPipeFactory::openPipeFileWrite(const std::string &fileName) -{ +CNamedPipeFactory::TFileP CNamedPipeFactory::openPipeFileWrite(const std::string& fileName) { TPipeHandle handle = CNamedPipeFactory::initPipeHandle(fileName, true); - if (handle == INVALID_HANDLE_VALUE) - { + if (handle == INVALID_HANDLE_VALUE) { return TFileP(); } - return TFileP(::fdopen(::_open_osfhandle(reinterpret_cast(handle), 0), - "wb"), - safeFClose); + return TFileP(::fdopen(::_open_osfhandle(reinterpret_cast(handle), 0), "wb"), safeFClose); } -bool CNamedPipeFactory::isNamedPipe(const std::string &fileName) -{ - return fileName.length() > PIPE_PREFIX.length() && - fileName.compare(0, PIPE_PREFIX.length(), PIPE_PREFIX) == 0; +bool CNamedPipeFactory::isNamedPipe(const std::string& fileName) { + return fileName.length() > PIPE_PREFIX.length() && fileName.compare(0, PIPE_PREFIX.length(), PIPE_PREFIX) == 0; } -std::string CNamedPipeFactory::defaultPath() -{ +std::string CNamedPipeFactory::defaultPath() { return PIPE_PREFIX; } -CNamedPipeFactory::TPipeHandle CNamedPipeFactory::initPipeHandle(const std::string &fileName, bool forWrite) -{ +CNamedPipeFactory::TPipeHandle CNamedPipeFactory::initPipeHandle(const std::string& fileName, bool forWrite) { // Size of named pipe buffer static const DWORD BUFFER_SIZE(4096); @@ -119,10 +92,8 @@ CNamedPipeFactory::TPipeHandle CNamedPipeFactory::initPipeHandle(const std::stri forWrite ? 1 : BUFFER_SIZE, NMPWAIT_USE_DEFAULT_WAIT, 0)); - if (handle == INVALID_HANDLE_VALUE) - { - LOG_ERROR("Unable to create named pipe " << fileName << - ": " << CWindowsError()); + if (handle == INVALID_HANDLE_VALUE) { + LOG_ERROR("Unable to create named pipe " << fileName << ": " << CWindowsError()); return INVALID_HANDLE_VALUE; } @@ -150,20 +121,16 @@ CNamedPipeFactory::TPipeHandle CNamedPipeFactory::initPipeHandle(const std::stri // JSON and it's easy to make them tolerate blank lines. bool sufferedShortLivedConnection(false); DWORD attempt(0); - do - { + do { ++attempt; // This call will block if there is no other connection to the named // pipe - if (ConnectNamedPipe(handle, 0) == FALSE) - { + if (ConnectNamedPipe(handle, 0) == FALSE) { // ERROR_PIPE_CONNECTED means the pipe was already connected so // there was no need to connect it again - not a problem DWORD errCode(GetLastError()); - if (errCode != ERROR_PIPE_CONNECTED) - { - LOG_ERROR("Unable to connect named pipe " << fileName << - ": " << CWindowsError(errCode)); + if (errCode != ERROR_PIPE_CONNECTED) { + LOG_ERROR("Unable to connect named pipe " << fileName << ": " << CWindowsError(errCode)); // Close the pipe (even though it was successfully opened) so // that the net effect of this failed call is nothing CloseHandle(handle); @@ -178,26 +145,15 @@ CNamedPipeFactory::TPipeHandle CNamedPipeFactory::initPipeHandle(const std::stri // relies on the Java side of all connections tolerating an initial // blank line) DWORD bytesWritten(0); - if (WriteFile(handle, - &TEST_CHAR, - sizeof(TEST_CHAR), - &bytesWritten, - 0) == FALSE || bytesWritten == 0) - { + if (WriteFile(handle, &TEST_CHAR, sizeof(TEST_CHAR), &bytesWritten, 0) == FALSE || bytesWritten == 0) { DisconnectNamedPipe(handle); sufferedShortLivedConnection = true; - } - else - { + } else { sufferedShortLivedConnection = false; } - } - while (sufferedShortLivedConnection); + } while (sufferedShortLivedConnection); return handle; } - - } } - diff --git a/lib/core/COsFileFuncs.cc b/lib/core/COsFileFuncs.cc index f241f1d0d3..fb5d0fc156 100644 --- a/lib/core/COsFileFuncs.cc +++ b/lib/core/COsFileFuncs.cc @@ -7,12 +7,8 @@ #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { const int COsFileFuncs::APPEND(O_APPEND); const int COsFileFuncs::BINARY(0); @@ -31,88 +27,69 @@ const int COsFileFuncs::READABLE(R_OK); const int COsFileFuncs::WRITABLE(W_OK); const int COsFileFuncs::EXECUTABLE(X_OK); -const char *COsFileFuncs::NULL_FILENAME("/dev/null"); - +const char* COsFileFuncs::NULL_FILENAME("/dev/null"); -int COsFileFuncs::open(const char *path, int oflag) -{ +int COsFileFuncs::open(const char* path, int oflag) { return ::open(path, oflag); } -int COsFileFuncs::open(const char *path, int oflag, TMode pmode) -{ +int COsFileFuncs::open(const char* path, int oflag, TMode pmode) { return ::open(path, oflag, pmode); } -int COsFileFuncs::dup(int fildes) -{ +int COsFileFuncs::dup(int fildes) { return ::dup(fildes); } -int COsFileFuncs::dup2(int fildes, int fildes2) -{ +int COsFileFuncs::dup2(int fildes, int fildes2) { return ::dup2(fildes, fildes2); } -COsFileFuncs::TOffset COsFileFuncs::lseek(int fildes, TOffset offset, int whence) -{ +COsFileFuncs::TOffset COsFileFuncs::lseek(int fildes, TOffset offset, int whence) { return ::lseek(fildes, offset, whence); } -COsFileFuncs::TSignedSize COsFileFuncs::read(int fildes, void *buf, size_t nbyte) -{ +COsFileFuncs::TSignedSize COsFileFuncs::read(int fildes, void* buf, size_t nbyte) { return ::read(fildes, buf, nbyte); } -COsFileFuncs::TSignedSize COsFileFuncs::write(int fildes, const void *buf, size_t nbyte) -{ +COsFileFuncs::TSignedSize COsFileFuncs::write(int fildes, const void* buf, size_t nbyte) { return ::write(fildes, buf, nbyte); } -int COsFileFuncs::close(int fildes) -{ +int COsFileFuncs::close(int fildes) { return ::close(fildes); } -int COsFileFuncs::fstat(int fildes, TStat *buf) -{ +int COsFileFuncs::fstat(int fildes, TStat* buf) { return ::fstat(fildes, buf); } -int COsFileFuncs::stat(const char *path, TStat *buf) -{ +int COsFileFuncs::stat(const char* path, TStat* buf) { return ::stat(path, buf); } -int COsFileFuncs::lstat(const char *path, TStat *buf) -{ +int COsFileFuncs::lstat(const char* path, TStat* buf) { return ::lstat(path, buf); } -int COsFileFuncs::access(const char *path, int amode) -{ +int COsFileFuncs::access(const char* path, int amode) { return ::access(path, amode); } -char *COsFileFuncs::getcwd(char *buf, size_t size) -{ +char* COsFileFuncs::getcwd(char* buf, size_t size) { return ::getcwd(buf, size); } -int COsFileFuncs::chdir(const char *path) -{ +int COsFileFuncs::chdir(const char* path) { return ::chdir(path); } -int COsFileFuncs::mkdir(const char *path) -{ +int COsFileFuncs::mkdir(const char* path) { // Windows doesn't support a mode, hence this method doesn't either. // Instead, always create the directory with permissions for the current // user only. return ::mkdir(path, S_IRUSR | S_IWUSR | S_IXUSR); } - - } } - diff --git a/lib/core/COsFileFuncs_Windows.cc b/lib/core/COsFileFuncs_Windows.cc index 2db147e9c5..6e297ab459 100644 --- a/lib/core/COsFileFuncs_Windows.cc +++ b/lib/core/COsFileFuncs_Windows.cc @@ -8,20 +8,17 @@ #include #include -#include #include +#include #include - -namespace -{ +namespace { //! Convert from the time structure Windows uses for timestamping files to epoch //! seconds. A FILETIME structure stores the number of 100ns ticks since //! midnight on 1/1/1601 UTC (Gregorian Calendar even though many countries were //! still using the Julian Calendar then). -__time64_t fileTimeToTimeT(const FILETIME &fileTime) -{ +__time64_t fileTimeToTimeT(const FILETIME& fileTime) { static const ULONGLONG TICKS_PER_SECOND = 10000000ull; static const __time64_t SECONDS_1601_TO_1970 = 11644473600ll; ULARGE_INTEGER largeInt; @@ -29,14 +26,10 @@ __time64_t fileTimeToTimeT(const FILETIME &fileTime) largeInt.HighPart = fileTime.dwHighDateTime; return static_cast<__time64_t>(largeInt.QuadPart / TICKS_PER_SECOND) - SECONDS_1601_TO_1970; } - } -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { const int COsFileFuncs::APPEND(O_APPEND); const int COsFileFuncs::BINARY(O_BINARY); @@ -58,21 +51,17 @@ const int COsFileFuncs::WRITABLE(2); // For Windows, consider "executable" the same as "readable" for the time being const int COsFileFuncs::EXECUTABLE(4); -const char *COsFileFuncs::NULL_FILENAME("nul"); - +const char* COsFileFuncs::NULL_FILENAME("nul"); -int COsFileFuncs::open(const char *path, int oflag) -{ +int COsFileFuncs::open(const char* path, int oflag) { return COsFileFuncs::open(path, oflag, 0); } -int COsFileFuncs::open(const char *path, int oflag, TMode pmode) -{ +int COsFileFuncs::open(const char* path, int oflag, TMode pmode) { // To allow the underlying file to be renamed, we have to resort to using // the Windows API file functions. Otherwise we can use the POSIX // compatibility layer. - if ((oflag & RENAMABLE) == 0) - { + if ((oflag & RENAMABLE) == 0) { // This is the simple case. Windows won't allow the file to be renamed // whilst it's open. return ::_open(path, oflag, pmode); @@ -80,84 +69,72 @@ int COsFileFuncs::open(const char *path, int oflag, TMode pmode) // Determine the correct access flags DWORD desiredAccess(GENERIC_READ); - if ((oflag & RDWR) != 0) - { + if ((oflag & RDWR) != 0) { desiredAccess = GENERIC_READ | GENERIC_WRITE; - } - else if ((oflag & WRONLY) != 0) - { + } else if ((oflag & WRONLY) != 0) { desiredAccess = GENERIC_WRITE; } DWORD creationDisposition(0); - switch (oflag & (CREAT | EXCL | TRUNC)) - { - case CREAT: - creationDisposition = OPEN_ALWAYS; - break; - case CREAT | EXCL: - case CREAT | TRUNC | EXCL: - creationDisposition = CREATE_NEW; - break; - case TRUNC: - creationDisposition = TRUNCATE_EXISTING; - break; - case TRUNC | EXCL: - // This doesn't make sense - errno = EINVAL; - return -1; - case CREAT | TRUNC: - creationDisposition = CREATE_ALWAYS; - break; - default: - creationDisposition = OPEN_EXISTING; - break; + switch (oflag & (CREAT | EXCL | TRUNC)) { + case CREAT: + creationDisposition = OPEN_ALWAYS; + break; + case CREAT | EXCL: + case CREAT | TRUNC | EXCL: + creationDisposition = CREATE_NEW; + break; + case TRUNC: + creationDisposition = TRUNCATE_EXISTING; + break; + case TRUNC | EXCL: + // This doesn't make sense + errno = EINVAL; + return -1; + case CREAT | TRUNC: + creationDisposition = CREATE_ALWAYS; + break; + default: + creationDisposition = OPEN_EXISTING; + break; } DWORD attributes(FILE_ATTRIBUTE_NORMAL); - if ((oflag & CREAT) != 0 && (pmode & S_IWRITE) == 0) - { + if ((oflag & CREAT) != 0 && (pmode & S_IWRITE) == 0) { attributes = FILE_ATTRIBUTE_READONLY; } - HANDLE handle = CreateFile(path, - desiredAccess, - FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE, - 0, - creationDisposition, - attributes, - 0); - if (handle == INVALID_HANDLE_VALUE) - { - switch (GetLastError()) - { - case ERROR_FILE_NOT_FOUND: - case ERROR_PATH_NOT_FOUND: - case ERROR_INVALID_DRIVE: - case ERROR_BAD_PATHNAME: - errno = ENOENT; - break; - case ERROR_TOO_MANY_OPEN_FILES: - errno = EMFILE; - break; - case ERROR_ACCESS_DENIED: - case ERROR_NETWORK_ACCESS_DENIED: - case ERROR_LOCK_VIOLATION: - case ERROR_DRIVE_LOCKED: - errno = EACCES; - break; - case ERROR_INVALID_HANDLE: - errno = EBADF; - break; - case ERROR_NOT_ENOUGH_MEMORY: - errno = ENOMEM; - break; - case ERROR_DISK_FULL: - errno = ENOSPC; - break; - default: - errno = EINVAL; - break; + HANDLE handle = + CreateFile(path, desiredAccess, FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE, 0, creationDisposition, attributes, 0); + if (handle == INVALID_HANDLE_VALUE) { + switch (GetLastError()) { + case ERROR_FILE_NOT_FOUND: + case ERROR_PATH_NOT_FOUND: + case ERROR_INVALID_DRIVE: + case ERROR_BAD_PATHNAME: + errno = ENOENT; + break; + case ERROR_TOO_MANY_OPEN_FILES: + errno = EMFILE; + break; + case ERROR_ACCESS_DENIED: + case ERROR_NETWORK_ACCESS_DENIED: + case ERROR_LOCK_VIOLATION: + case ERROR_DRIVE_LOCKED: + errno = EACCES; + break; + case ERROR_INVALID_HANDLE: + errno = EBADF; + break; + case ERROR_NOT_ENOUGH_MEMORY: + errno = ENOMEM; + break; + case ERROR_DISK_FULL: + errno = ENOSPC; + break; + default: + errno = EINVAL; + break; } return -1; } @@ -167,42 +144,34 @@ int COsFileFuncs::open(const char *path, int oflag, TMode pmode) return ::_open_osfhandle(reinterpret_cast(handle), filteredFlags); } -int COsFileFuncs::dup(int fildes) -{ +int COsFileFuncs::dup(int fildes) { return ::_dup(fildes); } -int COsFileFuncs::dup2(int fildes, int fildes2) -{ +int COsFileFuncs::dup2(int fildes, int fildes2) { return ::_dup2(fildes, fildes2); } -COsFileFuncs::TOffset COsFileFuncs::lseek(int fildes, TOffset offset, int whence) -{ +COsFileFuncs::TOffset COsFileFuncs::lseek(int fildes, TOffset offset, int whence) { return ::_lseeki64(fildes, offset, whence); } -COsFileFuncs::TSignedSize COsFileFuncs::read(int fildes, void *buf, size_t nbyte) -{ +COsFileFuncs::TSignedSize COsFileFuncs::read(int fildes, void* buf, size_t nbyte) { return ::_read(fildes, buf, static_cast(nbyte)); } -COsFileFuncs::TSignedSize COsFileFuncs::write(int fildes, const void *buf, size_t nbyte) -{ +COsFileFuncs::TSignedSize COsFileFuncs::write(int fildes, const void* buf, size_t nbyte) { return ::_write(fildes, buf, static_cast(nbyte)); } -int COsFileFuncs::close(int fildes) -{ +int COsFileFuncs::close(int fildes) { return ::_close(fildes); } -int COsFileFuncs::fstat(int fildes, TStat *buf) -{ +int COsFileFuncs::fstat(int fildes, TStat* buf) { struct _stati64 tmpBuf; int res(::_fstati64(fildes, &tmpBuf)); - if (res != 0) - { + if (res != 0) { return res; } @@ -223,14 +192,12 @@ int COsFileFuncs::fstat(int fildes, TStat *buf) // By default, Windows always sets the st_ino member to 0 - try to do // something better HANDLE handle(reinterpret_cast(::_get_osfhandle(fildes))); - if (handle == INVALID_HANDLE_VALUE) - { + if (handle == INVALID_HANDLE_VALUE) { return -1; } BY_HANDLE_FILE_INFORMATION info; - if (GetFileInformationByHandle(handle, &info) == FALSE) - { + if (GetFileInformationByHandle(handle, &info) == FALSE) { errno = EACCES; return -1; } @@ -240,12 +207,10 @@ int COsFileFuncs::fstat(int fildes, TStat *buf) return 0; } -int COsFileFuncs::stat(const char *path, TStat *buf) -{ +int COsFileFuncs::stat(const char* path, TStat* buf) { struct _stati64 tmpBuf; int res(::_stati64(path, &tmpBuf)); - if (res != 0) - { + if (res != 0) { return res; } @@ -264,8 +229,7 @@ int COsFileFuncs::stat(const char *path, TStat *buf) buf->st_ctime = tmpBuf.st_ctime; // If we're dealing with something other than a normal file, we're done - if ((buf->st_mode & _S_IFMT) != _S_IFREG) - { + if ((buf->st_mode & _S_IFMT) != _S_IFREG) { return res; } @@ -277,15 +241,13 @@ int COsFileFuncs::stat(const char *path, TStat *buf) OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0); - if (handle == INVALID_HANDLE_VALUE) - { + if (handle == INVALID_HANDLE_VALUE) { errno = EACCES; return -1; } BY_HANDLE_FILE_INFORMATION info; - if (GetFileInformationByHandle(handle, &info) == FALSE) - { + if (GetFileInformationByHandle(handle, &info) == FALSE) { CloseHandle(handle); errno = EACCES; @@ -299,17 +261,13 @@ int COsFileFuncs::stat(const char *path, TStat *buf) return 0; } -int COsFileFuncs::lstat(const char *path, TStat *buf) -{ +int COsFileFuncs::lstat(const char* path, TStat* buf) { // Windows has no lstat() function, but it's only different to stat() in the // case where the path points at a symlink, so often we can simply call // stat() - WIN32_FILE_ATTRIBUTE_DATA attributes = { 0 }; - if (path == nullptr || - buf == nullptr || - GetFileAttributesEx(path, GetFileExInfoStandard, &attributes) == FALSE || - (attributes.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) == 0) - { + WIN32_FILE_ATTRIBUTE_DATA attributes = {0}; + if (path == nullptr || buf == nullptr || GetFileAttributesEx(path, GetFileExInfoStandard, &attributes) == FALSE || + (attributes.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) == 0) { return COsFileFuncs::stat(path, buf); } @@ -337,27 +295,20 @@ int COsFileFuncs::lstat(const char *path, TStat *buf) return 0; } -int COsFileFuncs::access(const char *path, int amode) -{ +int COsFileFuncs::access(const char* path, int amode) { return ::_access(path, amode); } -char *COsFileFuncs::getcwd(char *buf, size_t size) -{ +char* COsFileFuncs::getcwd(char* buf, size_t size) { return ::_getcwd(buf, static_cast(size)); } -int COsFileFuncs::chdir(const char *path) -{ +int COsFileFuncs::chdir(const char* path) { return ::_chdir(path); } -int COsFileFuncs::mkdir(const char *path) -{ +int COsFileFuncs::mkdir(const char* path) { return ::_mkdir(path); } - - } } - diff --git a/lib/core/CPOpen.cc b/lib/core/CPOpen.cc index 0540770d85..20c697ac73 100644 --- a/lib/core/CPOpen.cc +++ b/lib/core/CPOpen.cc @@ -5,25 +5,15 @@ */ #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -FILE *CPOpen::pOpen(const char *command, - const char *mode) -{ +FILE* CPOpen::pOpen(const char* command, const char* mode) { return ::popen(command, mode); } -int CPOpen::pClose(FILE *stream) -{ +int CPOpen::pClose(FILE* stream) { return ::pclose(stream); } - - } } - diff --git a/lib/core/CPOpen_Windows.cc b/lib/core/CPOpen_Windows.cc index 9c4c89932a..8071aef33c 100644 --- a/lib/core/CPOpen_Windows.cc +++ b/lib/core/CPOpen_Windows.cc @@ -7,18 +7,11 @@ #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -FILE *CPOpen::pOpen(const char *command, - const char *mode) -{ - if (command == 0) - { +FILE* CPOpen::pOpen(const char* command, const char* mode) { + if (command == 0) { return 0; } @@ -46,12 +39,8 @@ FILE *CPOpen::pOpen(const char *command, return ::_popen(quoted.c_str(), mode); } -int CPOpen::pClose(FILE *stream) -{ +int CPOpen::pClose(FILE* stream) { return ::_pclose(stream); } - - } } - diff --git a/lib/core/CPatternSet.cc b/lib/core/CPatternSet.cc index 11246c1385..b5399f7e61 100644 --- a/lib/core/CPatternSet.cc +++ b/lib/core/CPatternSet.cc @@ -14,79 +14,56 @@ #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { -namespace -{ +namespace { const char WILDCARD = '*'; } -CPatternSet::CPatternSet() - : m_FullMatchPatterns(), - m_PrefixPatterns(), - m_SuffixPatterns(), - m_ContainsPatterns() -{ +CPatternSet::CPatternSet() : m_FullMatchPatterns(), m_PrefixPatterns(), m_SuffixPatterns(), m_ContainsPatterns() { } -bool CPatternSet::initFromJson(const std::string &json) -{ +bool CPatternSet::initFromJson(const std::string& json) { TStrVec fullPatterns; TStrVec prefixPatterns; TStrVec suffixPatterns; TStrVec containsPatterns; rapidjson::Document doc; - if (doc.Parse<0>(json.c_str()).HasParseError()) - { - LOG_ERROR("An error occurred while parsing pattern set from JSON: " - + std::string(rapidjson::GetParseError_En(doc.GetParseError()))); + if (doc.Parse<0>(json.c_str()).HasParseError()) { + LOG_ERROR("An error occurred while parsing pattern set from JSON: " + + std::string(rapidjson::GetParseError_En(doc.GetParseError()))); return false; } - if (!doc.IsArray()) - { + if (!doc.IsArray()) { LOG_ERROR("Could not parse pattern set from non-array JSON object: " << json); return false; } - - for (unsigned int i = 0; i < doc.Size(); ++i) - { - if (!doc[i].IsString()) - { + for (unsigned int i = 0; i < doc.Size(); ++i) { + if (!doc[i].IsString()) { LOG_ERROR("Could not parse pattern set: unexpected non-string item in JSON: " << json); this->clear(); return false; } std::string pattern = doc[i].GetString(); std::size_t length = pattern.length(); - if (length == 0) - { + if (length == 0) { continue; } - if (pattern[0] == WILDCARD) - { - if (length > 2 && pattern[length - 1] == WILDCARD) - { + if (pattern[0] == WILDCARD) { + if (length > 2 && pattern[length - 1] == WILDCARD) { std::string middle = pattern.substr(1, length - 2); containsPatterns.push_back(middle); - } - else if (length > 1) - { + } else if (length > 1) { std::string suffix = pattern.substr(1); suffixPatterns.push_back(std::string(suffix.rbegin(), suffix.rend())); } - } - else if (length > 1 && pattern[length - 1] == WILDCARD) - { + } else if (length > 1 && pattern[length - 1] == WILDCARD) { prefixPatterns.push_back(pattern.substr(0, length - 1)); - } - else - { + } else { fullPatterns.push_back(pattern); } } @@ -95,49 +72,38 @@ bool CPatternSet::initFromJson(const std::string &json) this->sortAndPruneDuplicates(prefixPatterns); this->sortAndPruneDuplicates(suffixPatterns); this->sortAndPruneDuplicates(containsPatterns); - return m_FullMatchPatterns.build(fullPatterns) - && m_PrefixPatterns.build(prefixPatterns) - && m_SuffixPatterns.build(suffixPatterns) - && m_ContainsPatterns.build(containsPatterns); + return m_FullMatchPatterns.build(fullPatterns) && m_PrefixPatterns.build(prefixPatterns) && m_SuffixPatterns.build(suffixPatterns) && + m_ContainsPatterns.build(containsPatterns); } -void CPatternSet::sortAndPruneDuplicates(TStrVec &keys) -{ +void CPatternSet::sortAndPruneDuplicates(TStrVec& keys) { std::sort(keys.begin(), keys.end()); keys.erase(std::unique(keys.begin(), keys.end()), keys.end()); } -bool CPatternSet::contains(const std::string &key) const -{ - if (m_PrefixPatterns.matchesStart(key)) - { +bool CPatternSet::contains(const std::string& key) const { + if (m_PrefixPatterns.matchesStart(key)) { return true; } - if (m_SuffixPatterns.matchesStart(key.rbegin(), key.rend())) - { + if (m_SuffixPatterns.matchesStart(key.rbegin(), key.rend())) { return true; } - if (m_FullMatchPatterns.matchesFully(key)) - { + if (m_FullMatchPatterns.matchesFully(key)) { return true; } - for (TStrCItr keyItr = key.begin(); keyItr != key.end(); ++keyItr) - { - if (m_ContainsPatterns.matchesStart(keyItr, key.end())) - { + for (TStrCItr keyItr = key.begin(); keyItr != key.end(); ++keyItr) { + if (m_ContainsPatterns.matchesStart(keyItr, key.end())) { return true; } } return false; } -void CPatternSet::clear() -{ +void CPatternSet::clear() { m_FullMatchPatterns.clear(); m_PrefixPatterns.clear(); m_SuffixPatterns.clear(); m_ContainsPatterns.clear(); } - } } diff --git a/lib/core/CPersistUtils.cc b/lib/core/CPersistUtils.cc index a9fa5482dc..665828d955 100644 --- a/lib/core/CPersistUtils.cc +++ b/lib/core/CPersistUtils.cc @@ -6,13 +6,10 @@ #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { const char CPersistUtils::DELIMITER(':'); const char CPersistUtils::PAIR_DELIMITER(';'); - } } diff --git a/lib/core/CProcess.cc b/lib/core/CProcess.cc index ff20f32347..2572384c79 100644 --- a/lib/core/CProcess.cc +++ b/lib/core/CProcess.cc @@ -10,60 +10,42 @@ #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ +const char* CProcess::STARTING_MSG("Process Starting."); +const char* CProcess::STARTED_MSG("Process Started."); +const char* CProcess::STOPPING_MSG("Process Shutting Down."); +const char* CProcess::STOPPED_MSG("Process Exiting."); -const char *CProcess::STARTING_MSG("Process Starting."); -const char *CProcess::STARTED_MSG("Process Started."); -const char *CProcess::STOPPING_MSG("Process Shutting Down."); -const char *CProcess::STOPPED_MSG("Process Exiting."); - - -CProcess::CProcess() - : m_IsService(false), - m_Initialised(false), - m_Running(false), - m_MlMainFunc(0) -{ +CProcess::CProcess() : m_IsService(false), m_Initialised(false), m_Running(false), m_MlMainFunc(0) { } -CProcess &CProcess::instance() -{ +CProcess& CProcess::instance() { static CProcess instance; return instance; } -bool CProcess::isService() const -{ +bool CProcess::isService() const { return m_IsService; } -CProcess::TPid CProcess::id() const -{ +CProcess::TPid CProcess::id() const { return ::getpid(); } -CProcess::TPid CProcess::parentId() const -{ +CProcess::TPid CProcess::parentId() const { return ::getppid(); } -bool CProcess::startDispatcher(TMlMainFunc mlMain, - int argc, - char *argv[]) -{ - if (mlMain == 0) - { +bool CProcess::startDispatcher(TMlMainFunc mlMain, int argc, char* argv[]) { + if (mlMain == 0) { LOG_ABORT("NULL mlMain() function passed"); } m_MlMainFunc = mlMain; m_Args.reserve(argc); - for (int count = 0; count < argc; ++count) - { + for (int count = 0; count < argc; ++count) { m_Args.push_back(argv[count]); } @@ -75,27 +57,22 @@ bool CProcess::startDispatcher(TMlMainFunc mlMain, // Only log process status messages if the logger has been reconfigured to // log somewhere more sensible that STDERR. (This prevents us spoiling the // output from --version and --help.) - if (CLogger::instance().hasBeenReconfigured()) - { + if (CLogger::instance().hasBeenReconfigured()) { LOG_INFO(STOPPED_MSG); } return success; } -bool CProcess::isInitialised() const -{ +bool CProcess::isInitialised() const { return m_Initialised; } -void CProcess::initialisationComplete(const TShutdownFunc &shutdownFunc) -{ +void CProcess::initialisationComplete(const TShutdownFunc& shutdownFunc) { CScopedFastLock lock(m_ShutdownFuncMutex); - if (!m_Initialised) - { - if (CLogger::instance().hasBeenReconfigured()) - { + if (!m_Initialised) { + if (CLogger::instance().hasBeenReconfigured()) { LOG_INFO(STARTED_MSG); } m_Initialised = true; @@ -104,14 +81,11 @@ void CProcess::initialisationComplete(const TShutdownFunc &shutdownFunc) m_ShutdownFunc = shutdownFunc; } -void CProcess::initialisationComplete() -{ +void CProcess::initialisationComplete() { CScopedFastLock lock(m_ShutdownFuncMutex); - if (!m_Initialised) - { - if (CLogger::instance().hasBeenReconfigured()) - { + if (!m_Initialised) { + if (CLogger::instance().hasBeenReconfigured()) { LOG_INFO(STARTED_MSG); } m_Initialised = true; @@ -122,22 +96,18 @@ void CProcess::initialisationComplete() m_ShutdownFunc.swap(emptyFunc); } -bool CProcess::isRunning() const -{ +bool CProcess::isRunning() const { return m_Running; } -bool CProcess::shutdown() -{ - if (CLogger::instance().hasBeenReconfigured()) - { +bool CProcess::shutdown() { + if (CLogger::instance().hasBeenReconfigured()) { LOG_INFO(STOPPING_MSG); } CScopedFastLock lock(m_ShutdownFuncMutex); - if (!m_ShutdownFunc) - { + if (!m_ShutdownFunc) { return false; } @@ -145,8 +115,5 @@ bool CProcess::shutdown() return true; } - - } } - diff --git a/lib/core/CProcessPriority.cc b/lib/core/CProcessPriority.cc index 1c551ab3b6..78eb5aeb70 100644 --- a/lib/core/CProcessPriority.cc +++ b/lib/core/CProcessPriority.cc @@ -5,20 +5,12 @@ */ #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -void CProcessPriority::reducePriority() -{ +void CProcessPriority::reducePriority() { // Default is to do nothing - see platform-specific implementation files for // platforms where we do more } - - } } - diff --git a/lib/core/CProcessPriority_Linux.cc b/lib/core/CProcessPriority_Linux.cc index df2f4ef05a..ec5cd42862 100644 --- a/lib/core/CProcessPriority_Linux.cc +++ b/lib/core/CProcessPriority_Linux.cc @@ -12,27 +12,20 @@ #include #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ +namespace { -namespace -{ - -bool writeToSystemFile(const std::string &fileName, const std::string &value) -{ +bool writeToSystemFile(const std::string& fileName, const std::string& value) { // Use low level functions to write rather than C++ wrappers, as these are // system files. int fd = ::open(fileName.c_str(), O_WRONLY); - if (fd == -1) - { + if (fd == -1) { return false; } - if (::write(fd, value.c_str(), value.length()) < static_cast(value.length())) - { + if (::write(fd, value.c_str(), value.length()) < static_cast(value.length())) { ::close(fd); return false; } @@ -44,32 +37,24 @@ bool writeToSystemFile(const std::string &fileName, const std::string &value) return true; } -void increaseOomKillerAdj() -{ +void increaseOomKillerAdj() { // oom_score_adj is supported by newer kernels and oom_adj by older kernels. // oom_score_adj is on a scale of -1000 to 1000. // oom_adj is on a scale of -16 to 15. // In both cases higher numbers mean the process is more likely to be killed // in low memory situations. - if (writeToSystemFile("/proc/self/oom_score_adj", "667\n") == false && - writeToSystemFile("/proc/self/oom_adj", "10\n") == false) - { + if (writeToSystemFile("/proc/self/oom_score_adj", "667\n") == false && writeToSystemFile("/proc/self/oom_adj", "10\n") == false) { LOG_WARN("Could not increase OOM killer adjustment using " - "/proc/self/oom_score_adj or /proc/self/oom_adj: " << - ::strerror(errno)); + "/proc/self/oom_score_adj or /proc/self/oom_adj: " + << ::strerror(errno)); } } - } -void CProcessPriority::reducePriority() -{ +void CProcessPriority::reducePriority() { // Currently the only action is to increase the OOM killer adjustment, but // there could be others in the future. increaseOomKillerAdj(); } - - } } - diff --git a/lib/core/CProcess_Windows.cc b/lib/core/CProcess_Windows.cc index 1749ca13fe..11278520b7 100644 --- a/lib/core/CProcess_Windows.cc +++ b/lib/core/CProcess_Windows.cc @@ -16,9 +16,7 @@ #include #include - -namespace -{ +namespace { // The wait hint of 10 seconds is about three times the observed time Ml // services generally take to shut down. However, it is better to give too high @@ -28,11 +26,9 @@ const DWORD STOP_WAIT_HINT_MSECS(10000); //! This needs to be called quickly after startup, because it will only work //! while the parent process is still running. -DWORD findParentProcessId() -{ +DWORD findParentProcessId() { HANDLE snapshotHandle(CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0)); - if (snapshotHandle == INVALID_HANDLE_VALUE) - { + if (snapshotHandle == INVALID_HANDLE_VALUE) { // Log at the point of retrieval, as this is too early in the program // lifecycle return 0; @@ -45,17 +41,13 @@ DWORD findParentProcessId() DWORD pid(GetCurrentProcessId()); DWORD ppid(0); - if (Process32First(snapshotHandle, &processEntry) != FALSE) - { - do - { - if (processEntry.th32ProcessID == pid) - { + if (Process32First(snapshotHandle, &processEntry) != FALSE) { + do { + if (processEntry.th32ProcessID == pid) { ppid = processEntry.th32ParentProcessID; break; } - } - while (Process32Next(snapshotHandle, &processEntry) != FALSE); + } while (Process32Next(snapshotHandle, &processEntry) != FALSE); } CloseHandle(snapshotHandle); @@ -65,66 +57,45 @@ DWORD findParentProcessId() // If this is zero it indicates that an error occurred when getting it const DWORD PPID(findParentProcessId()); - } +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - -const char *CProcess::STARTING_MSG("Process Starting."); -const char *CProcess::STARTED_MSG("Process Started."); -const char *CProcess::STOPPING_MSG("Process Shutting Down."); -const char *CProcess::STOPPED_MSG("Process Exiting."); +const char* CProcess::STARTING_MSG("Process Starting."); +const char* CProcess::STARTED_MSG("Process Started."); +const char* CProcess::STOPPING_MSG("Process Shutting Down."); +const char* CProcess::STOPPED_MSG("Process Exiting."); - -CProcess::CProcess() - : m_IsService(false), - m_Initialised(false), - m_Running(false), - m_MlMainFunc(0), - m_ServiceHandle(0) -{ +CProcess::CProcess() : m_IsService(false), m_Initialised(false), m_Running(false), m_MlMainFunc(0), m_ServiceHandle(0) { } -CProcess &CProcess::instance() -{ +CProcess& CProcess::instance() { static CProcess instance; return instance; } -bool CProcess::isService() const -{ +bool CProcess::isService() const { return m_IsService; } -CProcess::TPid CProcess::id() const -{ +CProcess::TPid CProcess::id() const { return GetCurrentProcessId(); } -CProcess::TPid CProcess::parentId() const -{ - if (PPID == 0) - { +CProcess::TPid CProcess::parentId() const { + if (PPID == 0) { LOG_ERROR("Failed to find parent process ID"); } return PPID; } -bool CProcess::startDispatcher(TMlMainFunc mlMain, - int argc, - char *argv[]) -{ - if (mlMain == 0) - { +bool CProcess::startDispatcher(TMlMainFunc mlMain, int argc, char* argv[]) { + if (mlMain == 0) { LOG_ABORT("NULL mlMain() function passed"); } - if (argc <= 0) - { + if (argc <= 0) { // Arguments are invalid, but at this point it's debatable whether // logging will work if we're running as a service, as ServiceMain() // hasn't yet run. @@ -134,20 +105,15 @@ bool CProcess::startDispatcher(TMlMainFunc mlMain, m_MlMainFunc = mlMain; m_Args.reserve(argc); - for (int count = 0; count < argc; ++count) - { + for (int count = 0; count < argc; ++count) { m_Args.push_back(argv[count]); } - SERVICE_TABLE_ENTRY serviceTable[] = { - { argv[0], &CProcess::serviceMain }, - { 0, 0 } - }; + SERVICE_TABLE_ENTRY serviceTable[] = {{argv[0], &CProcess::serviceMain}, {0, 0}}; // Start on the assumption we ARE running as a Windows service m_IsService = true; - if (StartServiceCtrlDispatcher(serviceTable) != FALSE) - { + if (StartServiceCtrlDispatcher(serviceTable) != FALSE) { // We're running as a Windows service, so Windows will // call serviceMain. The service dispatcher won't return // until Windows wants the process to exit. @@ -155,13 +121,11 @@ bool CProcess::startDispatcher(TMlMainFunc mlMain, } DWORD errCode(GetLastError()); - if (errCode != ERROR_FAILED_SERVICE_CONTROLLER_CONNECT) - { + if (errCode != ERROR_FAILED_SERVICE_CONTROLLER_CONNECT) { // We're supposed to be running as a service, but something's gone // wrong. At this point it's debatable whether logging will work, as // ServiceMain() hasn't yet run. - LOG_ERROR("Windows service dispatcher failed " << - CWindowsError(errCode)); + LOG_ERROR("Windows service dispatcher failed " << CWindowsError(errCode)); return false; } @@ -174,27 +138,22 @@ bool CProcess::startDispatcher(TMlMainFunc mlMain, // Only log process status messages if the logger has been reconfigured to // log somewhere more sensible that STDERR. (This prevents us spoiling the // output from --version and --help.) - if (CLogger::instance().hasBeenReconfigured()) - { + if (CLogger::instance().hasBeenReconfigured()) { LOG_INFO(STOPPED_MSG); } return success; } -bool CProcess::isInitialised() const -{ +bool CProcess::isInitialised() const { return m_Initialised; } -void CProcess::initialisationComplete(const TShutdownFunc &shutdownFunc) -{ +void CProcess::initialisationComplete(const TShutdownFunc& shutdownFunc) { CScopedFastLock lock(m_ShutdownFuncMutex); - if (!m_Initialised) - { - if (CLogger::instance().hasBeenReconfigured()) - { + if (!m_Initialised) { + if (CLogger::instance().hasBeenReconfigured()) { LOG_INFO(STARTED_MSG); } m_Initialised = true; @@ -207,14 +166,11 @@ void CProcess::initialisationComplete(const TShutdownFunc &shutdownFunc) this->serviceCtrlHandler(SERVICE_CONTROL_INTERROGATE); } -void CProcess::initialisationComplete() -{ +void CProcess::initialisationComplete() { CScopedFastLock lock(m_ShutdownFuncMutex); - if (!m_Initialised) - { - if (CLogger::instance().hasBeenReconfigured()) - { + if (!m_Initialised) { + if (CLogger::instance().hasBeenReconfigured()) { LOG_INFO(STARTED_MSG); } m_Initialised = true; @@ -229,49 +185,38 @@ void CProcess::initialisationComplete() this->serviceCtrlHandler(SERVICE_CONTROL_INTERROGATE); } -bool CProcess::isRunning() const -{ +bool CProcess::isRunning() const { return m_Running; } -void WINAPI CProcess::serviceMain(DWORD argc, char *argv[]) -{ +void WINAPI CProcess::serviceMain(DWORD argc, char* argv[]) { // This is a static method, so get the singleton instance - CProcess &process = CProcess::instance(); + CProcess& process = CProcess::instance(); // Since we're an "own process" service the name is not required - process.m_ServiceHandle = RegisterServiceCtrlHandler("", - &serviceCtrlHandler); - if (process.m_ServiceHandle == 0) - { + process.m_ServiceHandle = RegisterServiceCtrlHandler("", &serviceCtrlHandler); + if (process.m_ServiceHandle == 0) { return; } - if (process.m_MlMainFunc != 0) - { - using TScopedCharPArray = boost::scoped_array; + if (process.m_MlMainFunc != 0) { + using TScopedCharPArray = boost::scoped_array; // Merge the arguments from the service itself with the arguments // passed to the original main() call int mergedArgC(static_cast(process.m_Args.size())); - if (argv != 0 && argc > 1) - { + if (argv != 0 && argc > 1) { mergedArgC += static_cast(argc - 1); } size_t index(0); - TScopedCharPArray mergedArgV(new char *[mergedArgC]); - for (TStrVecCItr iter = process.m_Args.begin(); - iter != process.m_Args.end(); - ++iter) - { - mergedArgV[index++] = const_cast(iter->c_str()); + TScopedCharPArray mergedArgV(new char*[mergedArgC]); + for (TStrVecCItr iter = process.m_Args.begin(); iter != process.m_Args.end(); ++iter) { + mergedArgV[index++] = const_cast(iter->c_str()); } - if (argv != 0 && argc > 1) - { - for (size_t arg = 1; arg < argc; ++arg) - { + if (argv != 0 && argc > 1) { + for (size_t arg = 1; arg < argc; ++arg) { mergedArgV[index++] = argv[arg]; } } @@ -282,8 +227,7 @@ void WINAPI CProcess::serviceMain(DWORD argc, char *argv[]) // Only log process status messages if the logger has been reconfigured // to log somewhere more sensible that STDERR. (This prevents us // spoiling the output from --version and --help.) - if (CLogger::instance().hasBeenReconfigured()) - { + if (CLogger::instance().hasBeenReconfigured()) { LOG_INFO(STOPPED_MSG); } @@ -302,14 +246,12 @@ void WINAPI CProcess::serviceMain(DWORD argc, char *argv[]) } } -void WINAPI CProcess::serviceCtrlHandler(DWORD ctrlType) -{ +void WINAPI CProcess::serviceCtrlHandler(DWORD ctrlType) { // This is a static method, so get the singleton instance - CProcess &process = CProcess::instance(); + CProcess& process = CProcess::instance(); // If we're not running as a service, do nothing - if (process.m_ServiceHandle == 0) - { + if (process.m_ServiceHandle == 0) { return; } @@ -323,67 +265,53 @@ void WINAPI CProcess::serviceCtrlHandler(DWORD ctrlType) // documentation, we don't accept stop until we've finished initialising, as // apparently doing this can cause a crash. serviceStatus.dwControlsAccepted = SERVICE_ACCEPT_SHUTDOWN; - if (process.isInitialised()) - { + if (process.isInitialised()) { serviceStatus.dwControlsAccepted |= SERVICE_ACCEPT_STOP; } serviceStatus.dwWin32ExitCode = NO_ERROR; serviceStatus.dwServiceSpecificExitCode = 0; serviceStatus.dwCheckPoint = 0; - switch (ctrlType) - { - case SERVICE_CONTROL_INTERROGATE: - { - serviceStatus.dwWaitHint = 0; - if (process.isRunning()) - { - if (process.isInitialised()) - { - serviceStatus.dwCurrentState = SERVICE_RUNNING; - } - else - { - serviceStatus.dwCurrentState = SERVICE_START_PENDING; - } - } - else - { - serviceStatus.dwCurrentState = SERVICE_STOPPED; + switch (ctrlType) { + case SERVICE_CONTROL_INTERROGATE: { + serviceStatus.dwWaitHint = 0; + if (process.isRunning()) { + if (process.isInitialised()) { + serviceStatus.dwCurrentState = SERVICE_RUNNING; + } else { + serviceStatus.dwCurrentState = SERVICE_START_PENDING; } - SetServiceStatus(process.m_ServiceHandle, &serviceStatus); - break; + } else { + serviceStatus.dwCurrentState = SERVICE_STOPPED; } - case SERVICE_CONTROL_SHUTDOWN: - case SERVICE_CONTROL_STOP: - { - serviceStatus.dwWaitHint = STOP_WAIT_HINT_MSECS; - serviceStatus.dwCurrentState = SERVICE_STOP_PENDING; - SetServiceStatus(process.m_ServiceHandle, &serviceStatus); - if (process.shutdown() == false) - { - // This won't stop the process gracefully, and will trigger an - // error message from Windows, but that's probably better than - // having a rogue process hanging around after it's been told to - // stop - ::exit(EXIT_SUCCESS); - } - break; + SetServiceStatus(process.m_ServiceHandle, &serviceStatus); + break; + } + case SERVICE_CONTROL_SHUTDOWN: + case SERVICE_CONTROL_STOP: { + serviceStatus.dwWaitHint = STOP_WAIT_HINT_MSECS; + serviceStatus.dwCurrentState = SERVICE_STOP_PENDING; + SetServiceStatus(process.m_ServiceHandle, &serviceStatus); + if (process.shutdown() == false) { + // This won't stop the process gracefully, and will trigger an + // error message from Windows, but that's probably better than + // having a rogue process hanging around after it's been told to + // stop + ::exit(EXIT_SUCCESS); } + break; + } } } -bool CProcess::shutdown() -{ - if (CLogger::instance().hasBeenReconfigured()) - { +bool CProcess::shutdown() { + if (CLogger::instance().hasBeenReconfigured()) { LOG_INFO(STOPPING_MSG); } CScopedFastLock lock(m_ShutdownFuncMutex); - if (!m_ShutdownFunc) - { + if (!m_ShutdownFunc) { return false; } @@ -391,8 +319,5 @@ bool CProcess::shutdown() return true; } - - } } - diff --git a/lib/core/CProgName_Linux.cc b/lib/core/CProgName_Linux.cc index b41b1463d9..bb15f49de1 100644 --- a/lib/core/CProgName_Linux.cc +++ b/lib/core/CProgName_Linux.cc @@ -8,44 +8,32 @@ #include // Secret global variable in the Linux glibc... -extern char *__progname; +extern char* __progname; +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -std::string CProgName::progName() -{ - if (__progname == 0) - { +std::string CProgName::progName() { + if (__progname == 0) { return std::string(); } return __progname; } -std::string CProgName::progDir() -{ +std::string CProgName::progDir() { static const size_t BUFFER_SIZE(2048); std::string path(BUFFER_SIZE, '\0'); ssize_t len(::readlink("/proc/self/exe", &path[0], BUFFER_SIZE)); - if (len == -1) - { + if (len == -1) { return std::string(); } size_t lastSlash(path.rfind('/', static_cast(len))); - if (lastSlash == std::string::npos) - { + if (lastSlash == std::string::npos) { return std::string(); } path.resize(lastSlash); return path; } - - } } - diff --git a/lib/core/CProgName_MacOSX.cc b/lib/core/CProgName_MacOSX.cc index 70b2ee7ec2..09b65f4f8f 100644 --- a/lib/core/CProgName_MacOSX.cc +++ b/lib/core/CProgName_MacOSX.cc @@ -11,35 +11,26 @@ #include #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -std::string CProgName::progName() -{ - const char *progName(::getprogname()); - if (progName == 0) - { +std::string CProgName::progName() { + const char* progName(::getprogname()); + if (progName == 0) { return std::string(); } return progName; } -std::string CProgName::progDir() -{ +std::string CProgName::progDir() { uint32_t bufferSize(2048); std::string path(bufferSize, '\0'); - if (_NSGetExecutablePath(&path[0], &bufferSize) != 0) - { + if (_NSGetExecutablePath(&path[0], &bufferSize) != 0) { return std::string(); } size_t lastSlash(path.rfind('/')); - if (lastSlash == std::string::npos) - { + if (lastSlash == std::string::npos) { return std::string(); } path.resize(lastSlash); @@ -48,8 +39,5 @@ std::string CProgName::progDir() // canonical, e.g. containing /./ return boost::filesystem::canonical(boost::filesystem::path(path)).string(); } - - } } - diff --git a/lib/core/CProgName_Windows.cc b/lib/core/CProgName_Windows.cc index 827c496f9a..f8d9ed4787 100644 --- a/lib/core/CProgName_Windows.cc +++ b/lib/core/CProgName_Windows.cc @@ -11,60 +11,48 @@ #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -std::string CProgName::progName() -{ +std::string CProgName::progName() { static const size_t BUFFER_SIZE(2048); - char buffer[BUFFER_SIZE] = { '\0' }; - if (GetModuleFileName(0, buffer, BUFFER_SIZE - 1) == FALSE) - { + char buffer[BUFFER_SIZE] = {'\0'}; + if (GetModuleFileName(0, buffer, BUFFER_SIZE - 1) == FALSE) { return std::string(); } // Always return the long file name of the program, even if it was invoked // using the 8.3 name - char longPathBuffer[BUFFER_SIZE] = { '\0' }; + char longPathBuffer[BUFFER_SIZE] = {'\0'}; if (GetLongPathName(buffer, longPathBuffer, BUFFER_SIZE - 1) == FALSE) { return std::string(); } - char *progName(longPathBuffer); + char* progName(longPathBuffer); // Strip the path - char *lastSlash(std::max(::strrchr(progName, '/'), - ::strrchr(progName, '\\'))); - if (lastSlash != 0) - { + char* lastSlash(std::max(::strrchr(progName, '/'), ::strrchr(progName, '\\'))); + if (lastSlash != 0) { progName = lastSlash + 1; } // Strip the extension - char *lastDot(::strrchr(progName, '.')); - if (lastDot != 0) - { + char* lastDot(::strrchr(progName, '.')); + if (lastDot != 0) { *lastDot = '\0'; } return progName; } -std::string CProgName::progDir() -{ +std::string CProgName::progDir() { static const size_t BUFFER_SIZE(2048); std::string path(BUFFER_SIZE, '\0'); - if (GetModuleFileName(0, &path[0], BUFFER_SIZE) == FALSE) - { + if (GetModuleFileName(0, &path[0], BUFFER_SIZE) == FALSE) { return std::string(); } size_t lastSlash(path.find_last_of("\\/")); - if (lastSlash == std::string::npos) - { + if (lastSlash == std::string::npos) { return std::string(); } path.resize(lastSlash); @@ -73,15 +61,11 @@ std::string CProgName::progDir() // support extended paths, so strip any leading extended length indicator. // (We have to accept that if the path is more than 260 characters long // after doing this then the program won't work.) - if (path.compare(0, 4, "\\\\?\\") == 0) - { + if (path.compare(0, 4, "\\\\?\\") == 0) { path.erase(0, 4); } return path; } - - } } - diff --git a/lib/core/CRapidJsonConcurrentLineWriter.cc b/lib/core/CRapidJsonConcurrentLineWriter.cc index 568bcc4549..0eef07b285 100644 --- a/lib/core/CRapidJsonConcurrentLineWriter.cc +++ b/lib/core/CRapidJsonConcurrentLineWriter.cc @@ -6,51 +6,40 @@ #include -namespace ml -{ -namespace core -{ - -CRapidJsonConcurrentLineWriter::CRapidJsonConcurrentLineWriter(CJsonOutputStreamWrapper &outStream) - : m_OutputStreamWrapper(outStream) -{ +namespace ml { +namespace core { + +CRapidJsonConcurrentLineWriter::CRapidJsonConcurrentLineWriter(CJsonOutputStreamWrapper& outStream) : m_OutputStreamWrapper(outStream) { m_OutputStreamWrapper.acquireBuffer(*this, m_StringBuffer); } -CRapidJsonConcurrentLineWriter::~CRapidJsonConcurrentLineWriter() -{ +CRapidJsonConcurrentLineWriter::~CRapidJsonConcurrentLineWriter() { m_OutputStreamWrapper.releaseBuffer(*this, m_StringBuffer); } -void CRapidJsonConcurrentLineWriter::flush() -{ +void CRapidJsonConcurrentLineWriter::flush() { TRapidJsonLineWriterBase::Flush(); m_OutputStreamWrapper.flush(); } -bool CRapidJsonConcurrentLineWriter::EndObject(rapidjson::SizeType memberCount ) -{ +bool CRapidJsonConcurrentLineWriter::EndObject(rapidjson::SizeType memberCount) { bool baseReturnCode = TRapidJsonLineWriterBase::EndObject(memberCount); - if (TRapidJsonLineWriterBase::IsComplete()) - { + if (TRapidJsonLineWriterBase::IsComplete()) { m_OutputStreamWrapper.flushBuffer(*this, m_StringBuffer); } return baseReturnCode; } -void CRapidJsonConcurrentLineWriter::debugMemoryUsage(CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CRapidJsonConcurrentLineWriter::debugMemoryUsage(CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CRapidJsonConcurrentLineWriter", sizeof(*this)); m_OutputStreamWrapper.debugMemoryUsage(mem->addChild()); } -std::size_t CRapidJsonConcurrentLineWriter::memoryUsage() const -{ +std::size_t CRapidJsonConcurrentLineWriter::memoryUsage() const { return m_OutputStreamWrapper.memoryUsage(); } - } } diff --git a/lib/core/CRapidXmlParser.cc b/lib/core/CRapidXmlParser.cc index fe03ec42ca..ea05db50a2 100644 --- a/lib/core/CRapidXmlParser.cc +++ b/lib/core/CRapidXmlParser.cc @@ -16,53 +16,35 @@ #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -CRapidXmlParser::CRapidXmlParser() - : m_XmlBufSize(0), - m_NavigatedNode(0) -{ +CRapidXmlParser::CRapidXmlParser() : m_XmlBufSize(0), m_NavigatedNode(0) { } -CRapidXmlParser::~CRapidXmlParser() -{ +CRapidXmlParser::~CRapidXmlParser() { } -bool CRapidXmlParser::parseString(const std::string &xml) -{ - return this->parseBufferNonDestructive(xml.c_str(), - xml.length()); +bool CRapidXmlParser::parseString(const std::string& xml) { + return this->parseBufferNonDestructive(xml.c_str(), xml.length()); } -bool CRapidXmlParser::parseBuffer(const char *begin, size_t length) -{ - return this->parseBufferNonDestructive(begin, - length); +bool CRapidXmlParser::parseBuffer(const char* begin, size_t length) { + return this->parseBufferNonDestructive(begin, length); } -bool CRapidXmlParser::parseBufferInSitu(char *begin, size_t length) -{ - return this->parseBufferDestructive(begin, - length); +bool CRapidXmlParser::parseBufferInSitu(char* begin, size_t length) { + return this->parseBufferDestructive(begin, length); } -bool CRapidXmlParser::parseStringIgnoreCdata(const std::string &xml) -{ - return this->parseBufferNonDestructive(xml.c_str(), - xml.length()); +bool CRapidXmlParser::parseStringIgnoreCdata(const std::string& xml) { + return this->parseBufferNonDestructive(xml.c_str(), + xml.length()); } -std::string CRapidXmlParser::rootElementName() const -{ - const TCharRapidXmlNode *root(m_Doc.first_node()); - if (root == 0) - { +std::string CRapidXmlParser::rootElementName() const { + const TCharRapidXmlNode* root(m_Doc.first_node()); + if (root == 0) { LOG_ERROR("Error getting root element"); return std::string(); } @@ -70,43 +52,33 @@ std::string CRapidXmlParser::rootElementName() const return std::string(root->name(), root->name_size()); } -bool CRapidXmlParser::rootElementAttributes(TStrStrMap &rootAttributes) const -{ +bool CRapidXmlParser::rootElementAttributes(TStrStrMap& rootAttributes) const { rootAttributes.clear(); - const TCharRapidXmlNode *root(m_Doc.first_node()); - if (root == 0) - { + const TCharRapidXmlNode* root(m_Doc.first_node()); + if (root == 0) { LOG_ERROR("Error getting root element"); return false; } - for (const TCharRapidXmlAttribute *attr = root->first_attribute(); - attr != 0; - attr = attr->next_attribute()) - { + for (const TCharRapidXmlAttribute* attr = root->first_attribute(); attr != 0; attr = attr->next_attribute()) { // NB: where there are multiple attributes with the same name this keeps // the last one (only keeping one attribute with a given name is a // limitation throughout our XML encapsulation classes, but it // hasn't been a problem in practice to date) - rootAttributes[std::string(attr->name(), - attr->name_size())].assign(attr->value(), - attr->value_size()); + rootAttributes[std::string(attr->name(), attr->name_size())].assign(attr->value(), attr->value_size()); } return true; } -std::string CRapidXmlParser::dumpToString() const -{ +std::string CRapidXmlParser::dumpToString() const { std::string result; - rapidxml::print(std::back_inserter(result), - m_Doc); + rapidxml::print(std::back_inserter(result), m_Doc); return result; } -bool CRapidXmlParser::toNodeHierarchy(CXmlNodeWithChildren::TXmlNodeWithChildrenP &rootNodePtr) const -{ +bool CRapidXmlParser::toNodeHierarchy(CXmlNodeWithChildren::TXmlNodeWithChildrenP& rootNodePtr) const { // Because both the pool and the nodes use shared pointers, it doesn't // matter if the pool that originally allocates the nodes is destroyed // before the nodes themselves. Hence we can get away with implementing @@ -116,9 +88,7 @@ bool CRapidXmlParser::toNodeHierarchy(CXmlNodeWithChildren::TXmlNodeWithChildren return this->toNodeHierarchy(pool, rootNodePtr); } -bool CRapidXmlParser::toNodeHierarchy(CStringCache &cache, - CXmlNodeWithChildren::TXmlNodeWithChildrenP &rootNodePtr) const -{ +bool CRapidXmlParser::toNodeHierarchy(CStringCache& cache, CXmlNodeWithChildren::TXmlNodeWithChildrenP& rootNodePtr) const { // Because both the pool and the nodes use shared pointers, it doesn't // matter if the pool that originally allocates the nodes is destroyed // before the nodes themselves. Hence we can get away with implementing @@ -128,20 +98,16 @@ bool CRapidXmlParser::toNodeHierarchy(CStringCache &cache, return this->toNodeHierarchy(pool, cache, rootNodePtr); } -bool CRapidXmlParser::toNodeHierarchy(CXmlNodeWithChildrenPool &pool, - CXmlNodeWithChildren::TXmlNodeWithChildrenP &rootNodePtr) const -{ +bool CRapidXmlParser::toNodeHierarchy(CXmlNodeWithChildrenPool& pool, CXmlNodeWithChildren::TXmlNodeWithChildrenP& rootNodePtr) const { rootNodePtr.reset(); - const TCharRapidXmlNode *root(m_Doc.first_node()); - if (root == 0) - { + const TCharRapidXmlNode* root(m_Doc.first_node()); + if (root == 0) { LOG_ERROR("Error getting root element"); return false; } - if (root->type() != rapidxml::node_element) - { + if (root->type() != rapidxml::node_element) { LOG_ERROR("Node type " << root->type() << " not supported"); return false; } @@ -149,51 +115,43 @@ bool CRapidXmlParser::toNodeHierarchy(CXmlNodeWithChildrenPool &pool, return this->toNodeHierarchy(*root, pool, 0, rootNodePtr); } -bool CRapidXmlParser::toNodeHierarchy(CXmlNodeWithChildrenPool &pool, - CStringCache &cache, - CXmlNodeWithChildren::TXmlNodeWithChildrenP &rootNodePtr) const -{ +bool CRapidXmlParser::toNodeHierarchy(CXmlNodeWithChildrenPool& pool, + CStringCache& cache, + CXmlNodeWithChildren::TXmlNodeWithChildrenP& rootNodePtr) const { rootNodePtr.reset(); - const TCharRapidXmlNode *root(m_Doc.first_node()); - if (root == 0) - { + const TCharRapidXmlNode* root(m_Doc.first_node()); + if (root == 0) { LOG_ERROR("Error getting root element"); return false; } - if (root->type() != rapidxml::node_element) - { + if (root->type() != rapidxml::node_element) { LOG_ERROR("Node type " << root->type() << " not supported"); return false; } // Only use the cache if the current platform employs copy-on-write strings. // If all strings are distinct then the cache is pointless. - CStringCache *cachePtr(cache.haveCopyOnWriteStrings() ? &cache : 0); + CStringCache* cachePtr(cache.haveCopyOnWriteStrings() ? &cache : 0); return this->toNodeHierarchy(*root, pool, cachePtr, rootNodePtr); } -bool CRapidXmlParser::toNodeHierarchy(const TCharRapidXmlNode &parentNode, - CXmlNodeWithChildrenPool &pool, - CStringCache *cache, - CXmlNodeWithChildren::TXmlNodeWithChildrenP &nodePtr) const -{ +bool CRapidXmlParser::toNodeHierarchy(const TCharRapidXmlNode& parentNode, + CXmlNodeWithChildrenPool& pool, + CStringCache* cache, + CXmlNodeWithChildren::TXmlNodeWithChildrenP& nodePtr) const { // Create the parent node nodePtr = pool.newNode(); // Here we take advantage of friendship to directly modify the CXmlNode's // name and value. - if (cache != 0) - { + if (cache != 0) { // Get the name from the cache if there is one, as we expect relatively // few distinct names repeated many times - nodePtr->m_Name = cache->stringFor(parentNode.name(), - parentNode.name_size()); - } - else - { + nodePtr->m_Name = cache->stringFor(parentNode.name(), parentNode.name_size()); + } else { nodePtr->m_Name.assign(parentNode.name(), parentNode.name_size()); } @@ -206,9 +164,8 @@ bool CRapidXmlParser::toNodeHierarchy(const TCharRapidXmlNode &parentNode, nodePtr->m_Value.assign(parentNode.value(), parentNode.value_size()); size_t numAttributes(0); - const TCharRapidXmlAttribute *attr(parentNode.first_attribute()); - while (attr != 0) - { + const TCharRapidXmlAttribute* attr(parentNode.first_attribute()); + while (attr != 0) { ++numAttributes; attr = attr->next_attribute(); } @@ -218,24 +175,16 @@ bool CRapidXmlParser::toNodeHierarchy(const TCharRapidXmlNode &parentNode, // Take advantage of friendship to add attributes directly to the parent // node attr = parentNode.first_attribute(); - for (CXmlNode::TStrStrPrVecItr iter = nodePtr->m_Attributes.begin(); - iter != nodePtr->m_Attributes.end(); - ++iter) - { + for (CXmlNode::TStrStrPrVecItr iter = nodePtr->m_Attributes.begin(); iter != nodePtr->m_Attributes.end(); ++iter) { // Here we take advantage of friendship to directly modify the // CXmlNode's attributes map, thus avoiding the need to build a // separate map and then copy it - if (cache != 0) - { + if (cache != 0) { // Get attribute names and values from the cache if there is one, as // we expect relatively few distinct attributes repeated many times - iter->first = cache->stringFor(attr->name(), - attr->name_size()); - iter->second = cache->stringFor(attr->value(), - attr->value_size()); - } - else - { + iter->first = cache->stringFor(attr->name(), attr->name_size()); + iter->second = cache->stringFor(attr->value(), attr->value_size()); + } else { iter->first.assign(attr->name(), attr->name_size()); iter->second.assign(attr->value(), attr->value_size()); } @@ -244,26 +193,20 @@ bool CRapidXmlParser::toNodeHierarchy(const TCharRapidXmlNode &parentNode, } // Recursively add the children to the parent - const TCharRapidXmlNode *childNode(parentNode.first_node()); - while (childNode != 0) - { - if (childNode->type() == rapidxml::node_element) - { + const TCharRapidXmlNode* childNode(parentNode.first_node()); + while (childNode != 0) { + if (childNode->type() == rapidxml::node_element) { CXmlNodeWithChildren::TXmlNodeWithChildrenP childPtr; - if (this->toNodeHierarchy(*childNode, pool, cache, childPtr) == false) - { + if (this->toNodeHierarchy(*childNode, pool, cache, childPtr) == false) { return false; } nodePtr->addChildP(childPtr); - } - else if (childNode->type() == rapidxml::node_cdata) - { + } else if (childNode->type() == rapidxml::node_cdata) { // Append CDATA text to the value - see comment above regarding // garbling in complex documents - nodePtr->m_Value.append(childNode->value(), - childNode->value_size()); + nodePtr->m_Value.append(childNode->value(), childNode->value_size()); } childNode = childNode->next_sibling(); @@ -272,24 +215,19 @@ bool CRapidXmlParser::toNodeHierarchy(const TCharRapidXmlNode &parentNode, return true; } -bool CRapidXmlParser::navigateRoot() -{ +bool CRapidXmlParser::navigateRoot() { m_NavigatedNode = m_Doc.first_node(); return m_NavigatedNode != 0; } -bool CRapidXmlParser::navigateFirstChild() -{ - if (m_NavigatedNode == 0) - { +bool CRapidXmlParser::navigateFirstChild() { + if (m_NavigatedNode == 0) { return false; } - TCharRapidXmlNode *childNode(m_NavigatedNode->first_node()); - while (childNode != 0) - { - if (childNode->type() == rapidxml::node_element) - { + TCharRapidXmlNode* childNode(m_NavigatedNode->first_node()); + while (childNode != 0) { + if (childNode->type() == rapidxml::node_element) { m_NavigatedNode = childNode; return true; } @@ -300,18 +238,14 @@ bool CRapidXmlParser::navigateFirstChild() return false; } -bool CRapidXmlParser::navigateNext() -{ - if (m_NavigatedNode == 0) - { +bool CRapidXmlParser::navigateNext() { + if (m_NavigatedNode == 0) { return false; } - TCharRapidXmlNode *nextNode(m_NavigatedNode->next_sibling()); - while (nextNode != 0) - { - if (nextNode->type() == rapidxml::node_element) - { + TCharRapidXmlNode* nextNode(m_NavigatedNode->next_sibling()); + while (nextNode != 0) { + if (nextNode->type() == rapidxml::node_element) { m_NavigatedNode = nextNode; return true; } @@ -322,18 +256,14 @@ bool CRapidXmlParser::navigateNext() return false; } -bool CRapidXmlParser::navigateParent() -{ - if (m_NavigatedNode == 0) - { +bool CRapidXmlParser::navigateParent() { + if (m_NavigatedNode == 0) { return false; } - TCharRapidXmlNode *parentNode(m_NavigatedNode->parent()); - while (parentNode != 0) - { - if (parentNode->type() == rapidxml::node_element) - { + TCharRapidXmlNode* parentNode(m_NavigatedNode->parent()); + while (parentNode != 0) { + if (parentNode->type() == rapidxml::node_element) { m_NavigatedNode = parentNode; return true; } @@ -344,10 +274,8 @@ bool CRapidXmlParser::navigateParent() return false; } -bool CRapidXmlParser::currentNodeName(std::string &name) -{ - if (m_NavigatedNode == 0) - { +bool CRapidXmlParser::currentNodeName(std::string& name) { + if (m_NavigatedNode == 0) { return false; } @@ -356,10 +284,8 @@ bool CRapidXmlParser::currentNodeName(std::string &name) return true; } -bool CRapidXmlParser::currentNodeValue(std::string &value) -{ - if (m_NavigatedNode == 0) - { +bool CRapidXmlParser::currentNodeValue(std::string& value) { + if (m_NavigatedNode == 0) { return false; } @@ -372,11 +298,9 @@ bool CRapidXmlParser::currentNodeValue(std::string &value) value.assign(m_NavigatedNode->value(), m_NavigatedNode->value_size()); // Add any CDATA children to the value - const TCharRapidXmlNode *childNode(m_NavigatedNode->first_node()); - while (childNode != 0) - { - if (childNode->type() == rapidxml::node_cdata) - { + const TCharRapidXmlNode* childNode(m_NavigatedNode->first_node()); + while (childNode != 0) { + if (childNode->type() == rapidxml::node_cdata) { // Append CDATA text to the value - see comment above regarding // garbling in complex documents value.append(childNode->value(), childNode->value_size()); @@ -388,16 +312,11 @@ bool CRapidXmlParser::currentNodeValue(std::string &value) return true; } -void CRapidXmlParser::convert(const CXmlNodeWithChildren &root, - std::string &result) -{ +void CRapidXmlParser::convert(const CXmlNodeWithChildren& root, std::string& result) { CRapidXmlParser::convert(true, root, result); } -void CRapidXmlParser::convert(bool indent, - const CXmlNodeWithChildren &root, - std::string &result) -{ +void CRapidXmlParser::convert(bool indent, const CXmlNodeWithChildren& root, std::string& result) { // Create a temporary document TCharRapidXmlDocument doc; @@ -406,27 +325,19 @@ void CRapidXmlParser::convert(bool indent, size_t approxLen(12 + nameLen * 2 + valueLen); // Root node - TCharRapidXmlNode *rootNode(doc.allocate_node(rapidxml::node_element, - root.name().c_str(), - root.value().empty() ? 0 : root.value().c_str(), - nameLen, - valueLen)); + TCharRapidXmlNode* rootNode( + doc.allocate_node(rapidxml::node_element, root.name().c_str(), root.value().empty() ? 0 : root.value().c_str(), nameLen, valueLen)); doc.append_node(rootNode); - const CXmlNode::TStrStrPrVec &attrs = root.attributes(); + const CXmlNode::TStrStrPrVec& attrs = root.attributes(); - for (CXmlNode::TStrStrPrVecCItr attrIter = attrs.begin(); - attrIter != attrs.end(); - ++attrIter) - { + for (CXmlNode::TStrStrPrVecCItr attrIter = attrs.begin(); attrIter != attrs.end(); ++attrIter) { nameLen = attrIter->first.length(); valueLen = attrIter->second.length(); approxLen += 5 + nameLen + valueLen; - TCharRapidXmlAttribute *attr(doc.allocate_attribute(attrIter->first.c_str(), - attrIter->second.empty() ? 0 : attrIter->second.c_str(), - nameLen, - valueLen)); + TCharRapidXmlAttribute* attr( + doc.allocate_attribute(attrIter->first.c_str(), attrIter->second.empty() ? 0 : attrIter->second.c_str(), nameLen, valueLen)); rootNode->append_attribute(attr); } @@ -436,70 +347,46 @@ void CRapidXmlParser::convert(bool indent, // Print to the string result.clear(); result.reserve(approxLen); - if (indent) - { - rapidxml::print(std::back_inserter(result), - doc); - } - else - { - rapidxml::print(std::back_inserter(result), - doc, - rapidxml::print_no_indenting); + if (indent) { + rapidxml::print(std::back_inserter(result), doc); + } else { + rapidxml::print(std::back_inserter(result), doc, rapidxml::print_no_indenting); } } -void CRapidXmlParser::convertChildren(const CXmlNodeWithChildren ¤t, - TCharRapidXmlDocument &doc, - TCharRapidXmlNode &xmlNode, - size_t &approxLen) -{ - const CXmlNodeWithChildren::TChildNodePVec &childVec = current.children(); +void CRapidXmlParser::convertChildren(const CXmlNodeWithChildren& current, + TCharRapidXmlDocument& doc, + TCharRapidXmlNode& xmlNode, + size_t& approxLen) { + const CXmlNodeWithChildren::TChildNodePVec& childVec = current.children(); // If a node has both children and a value, RapidXML requires that we add a // data node containing the value - if (xmlNode.value_size() > 0 && !childVec.empty()) - { - TCharRapidXmlNode *dataNode(doc.allocate_node(rapidxml::node_data, - 0, - xmlNode.value(), - 0, - xmlNode.value_size())); + if (xmlNode.value_size() > 0 && !childVec.empty()) { + TCharRapidXmlNode* dataNode(doc.allocate_node(rapidxml::node_data, 0, xmlNode.value(), 0, xmlNode.value_size())); xmlNode.append_node(dataNode); } - for (CXmlNodeWithChildren::TChildNodePVecCItr childIter = childVec.begin(); - childIter != childVec.end(); - ++childIter) - { - const CXmlNodeWithChildren *child = childIter->get(); - if (child != 0) - { + for (CXmlNodeWithChildren::TChildNodePVecCItr childIter = childVec.begin(); childIter != childVec.end(); ++childIter) { + const CXmlNodeWithChildren* child = childIter->get(); + if (child != 0) { size_t nameLen(child->name().length()); size_t valueLen(child->value().length()); approxLen += 10 + nameLen * 2 + valueLen; - TCharRapidXmlNode *childNode(doc.allocate_node(rapidxml::node_element, - child->name().c_str(), - child->value().empty() ? 0 : child->value().c_str(), - nameLen, - valueLen)); + TCharRapidXmlNode* childNode(doc.allocate_node( + rapidxml::node_element, child->name().c_str(), child->value().empty() ? 0 : child->value().c_str(), nameLen, valueLen)); xmlNode.append_node(childNode); - const CXmlNode::TStrStrPrVec &attrs = child->attributes(); + const CXmlNode::TStrStrPrVec& attrs = child->attributes(); - for (CXmlNode::TStrStrPrVecCItr attrIter = attrs.begin(); - attrIter != attrs.end(); - ++attrIter) - { + for (CXmlNode::TStrStrPrVecCItr attrIter = attrs.begin(); attrIter != attrs.end(); ++attrIter) { nameLen = attrIter->first.length(); valueLen = attrIter->second.length(); approxLen += 5 + nameLen + valueLen; - TCharRapidXmlAttribute *attr(doc.allocate_attribute(attrIter->first.c_str(), - attrIter->second.empty() ? 0 : attrIter->second.c_str(), - nameLen, - valueLen)); + TCharRapidXmlAttribute* attr(doc.allocate_attribute( + attrIter->first.c_str(), attrIter->second.empty() ? 0 : attrIter->second.c_str(), nameLen, valueLen)); childNode->append_attribute(attr); } @@ -509,47 +396,35 @@ void CRapidXmlParser::convertChildren(const CXmlNodeWithChildren ¤t, } template -bool CRapidXmlParser::parseBufferNonDestructive(const char *begin, size_t length) -{ - if (m_XmlBufSize <= length) - { +bool CRapidXmlParser::parseBufferNonDestructive(const char* begin, size_t length) { + if (m_XmlBufSize <= length) { m_XmlBufSize = length + 1; m_XmlBuf.reset(new char[m_XmlBufSize]); } ::memcpy(m_XmlBuf.get(), begin, length); m_XmlBuf[length] = '\0'; - if (this->parseBufferDestructive(m_XmlBuf.get(), length) == false) - { + if (this->parseBufferDestructive(m_XmlBuf.get(), length) == false) { // Only log the full XML string at the debug level, so that it doesn't // get sent to the socket logger - LOG_DEBUG("XML that cannot be parsed is " << - std::string(begin, length)); + LOG_DEBUG("XML that cannot be parsed is " << std::string(begin, length)); return false; } return true; } template -bool CRapidXmlParser::parseBufferDestructive(char *begin, size_t length) -{ +bool CRapidXmlParser::parseBufferDestructive(char* begin, size_t length) { m_Doc.clear(); m_NavigatedNode = 0; - try - { + try { m_Doc.parse(begin); - } - catch (rapidxml::parse_error &e) - { - LOG_ERROR("Unable to parse XML of length " << length << ": " << - e.what()); + } catch (rapidxml::parse_error& e) { + LOG_ERROR("Unable to parse XML of length " << length << ": " << e.what()); return false; } return true; } - - } } - diff --git a/lib/core/CRapidXmlStatePersistInserter.cc b/lib/core/CRapidXmlStatePersistInserter.cc index 08e97ed2d3..419ae85e68 100644 --- a/lib/core/CRapidXmlStatePersistInserter.cc +++ b/lib/core/CRapidXmlStatePersistInserter.cc @@ -11,43 +11,25 @@ #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -CRapidXmlStatePersistInserter::CRapidXmlStatePersistInserter(const std::string &rootName) - : m_LevelParent(m_Doc.allocate_node(rapidxml::node_element, - this->nameFromCache(rootName), - 0, - rootName.length())), - m_ApproxLen(12 + rootName.length() * 2) -{ +CRapidXmlStatePersistInserter::CRapidXmlStatePersistInserter(const std::string& rootName) + : m_LevelParent(m_Doc.allocate_node(rapidxml::node_element, this->nameFromCache(rootName), 0, rootName.length())), + m_ApproxLen(12 + rootName.length() * 2) { m_Doc.append_node(m_LevelParent); } -CRapidXmlStatePersistInserter::CRapidXmlStatePersistInserter(const std::string &rootName, - const TStrStrMap &rootAttributes) - : m_LevelParent(m_Doc.allocate_node(rapidxml::node_element, - this->nameFromCache(rootName), - 0, - rootName.length())), - m_ApproxLen(12 + rootName.length() * 2) -{ +CRapidXmlStatePersistInserter::CRapidXmlStatePersistInserter(const std::string& rootName, const TStrStrMap& rootAttributes) + : m_LevelParent(m_Doc.allocate_node(rapidxml::node_element, this->nameFromCache(rootName), 0, rootName.length())), + m_ApproxLen(12 + rootName.length() * 2) { m_Doc.append_node(m_LevelParent); - for (TStrStrMapCItr iter = rootAttributes.begin(); - iter != rootAttributes.end(); - ++iter) - { - const std::string &name = iter->first; - const std::string &value = iter->second; - m_LevelParent->append_attribute(m_Doc.allocate_attribute(m_Doc.allocate_string(name.c_str(), - name.length()), - value.empty() ? 0 : m_Doc.allocate_string(value.c_str(), - value.length()), + for (TStrStrMapCItr iter = rootAttributes.begin(); iter != rootAttributes.end(); ++iter) { + const std::string& name = iter->first; + const std::string& value = iter->second; + m_LevelParent->append_attribute(m_Doc.allocate_attribute(m_Doc.allocate_string(name.c_str(), name.length()), + value.empty() ? 0 : m_Doc.allocate_string(value.c_str(), value.length()), name.length(), value.length())); @@ -55,13 +37,10 @@ CRapidXmlStatePersistInserter::CRapidXmlStatePersistInserter(const std::string & } } -void CRapidXmlStatePersistInserter::insertValue(const std::string &name, - const std::string &value) -{ +void CRapidXmlStatePersistInserter::insertValue(const std::string& name, const std::string& value) { m_LevelParent->append_node(m_Doc.allocate_node(rapidxml::node_element, this->nameFromCache(name), - value.empty() ? 0 : m_Doc.allocate_string(value.c_str(), - value.length()), + value.empty() ? 0 : m_Doc.allocate_string(value.c_str(), value.length()), name.length(), value.length())); @@ -69,36 +48,24 @@ void CRapidXmlStatePersistInserter::insertValue(const std::string &name, m_ApproxLen += 5 + name.length() * 2 + value.length(); } -void CRapidXmlStatePersistInserter::toXml(std::string &xml) const -{ +void CRapidXmlStatePersistInserter::toXml(std::string& xml) const { this->toXml(true, xml); } -void CRapidXmlStatePersistInserter::toXml(bool indent, std::string &xml) const -{ +void CRapidXmlStatePersistInserter::toXml(bool indent, std::string& xml) const { xml.clear(); // Hopefully the 4096 will be enough to cover any escaping required xml.reserve(m_ApproxLen + 4096); - if (indent) - { - rapidxml::print(std::back_inserter(xml), - m_Doc); - } - else - { - rapidxml::print(std::back_inserter(xml), - m_Doc, - rapidxml::print_no_indenting); + if (indent) { + rapidxml::print(std::back_inserter(xml), m_Doc); + } else { + rapidxml::print(std::back_inserter(xml), m_Doc, rapidxml::print_no_indenting); } } -void CRapidXmlStatePersistInserter::newLevel(const std::string &name) -{ - TCharRapidXmlNode *child(m_Doc.allocate_node(rapidxml::node_element, - this->nameFromCache(name), - 0, - name.length())); +void CRapidXmlStatePersistInserter::newLevel(const std::string& name) { + TCharRapidXmlNode* child(m_Doc.allocate_node(rapidxml::node_element, this->nameFromCache(name), 0, name.length())); m_LevelParent->append_node(child); m_ApproxLen += 5 + name.length() * 2; @@ -107,11 +74,9 @@ void CRapidXmlStatePersistInserter::newLevel(const std::string &name) m_LevelParent = child; } -void CRapidXmlStatePersistInserter::endLevel() -{ - TCharRapidXmlNode *levelGrandParent(m_LevelParent->parent()); - if (levelGrandParent == 0) - { +void CRapidXmlStatePersistInserter::endLevel() { + TCharRapidXmlNode* levelGrandParent(m_LevelParent->parent()); + if (levelGrandParent == 0) { LOG_ERROR("Logic error - ending more levels than have been started"); return; } @@ -120,12 +85,8 @@ void CRapidXmlStatePersistInserter::endLevel() m_LevelParent = levelGrandParent; } -const char *CRapidXmlStatePersistInserter::nameFromCache(const std::string &name) -{ +const char* CRapidXmlStatePersistInserter::nameFromCache(const std::string& name) { return m_NameCache.stringFor(name.c_str(), name.length()).c_str(); } - - } } - diff --git a/lib/core/CRapidXmlStateRestoreTraverser.cc b/lib/core/CRapidXmlStateRestoreTraverser.cc index 32cac91d69..72e5a61a03 100644 --- a/lib/core/CRapidXmlStateRestoreTraverser.cc +++ b/lib/core/CRapidXmlStateRestoreTraverser.cc @@ -7,33 +7,21 @@ #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -CRapidXmlStateRestoreTraverser::CRapidXmlStateRestoreTraverser(const CRapidXmlParser &parser) - : m_Parser(parser), - m_CurrentNode(m_Parser.m_Doc.first_node()), - m_IsNameCacheValid(false), - m_IsValueCacheValid(false) -{ - if (m_CurrentNode != 0 && - m_CurrentNode->type() != rapidxml::node_element) - { +CRapidXmlStateRestoreTraverser::CRapidXmlStateRestoreTraverser(const CRapidXmlParser& parser) + : m_Parser(parser), m_CurrentNode(m_Parser.m_Doc.first_node()), m_IsNameCacheValid(false), m_IsValueCacheValid(false) { + if (m_CurrentNode != 0 && m_CurrentNode->type() != rapidxml::node_element) { LOG_ERROR("Node type " << m_CurrentNode->type() << " not supported"); m_CurrentNode = 0; this->setBadState(); } } -bool CRapidXmlStateRestoreTraverser::next() -{ - CRapidXmlParser::TCharRapidXmlNode *next(this->nextNodeElement()); - if (next == 0) - { +bool CRapidXmlStateRestoreTraverser::next() { + CRapidXmlParser::TCharRapidXmlNode* next(this->nextNodeElement()); + if (next == 0) { return false; } @@ -45,22 +33,15 @@ bool CRapidXmlStateRestoreTraverser::next() return true; } -bool CRapidXmlStateRestoreTraverser::hasSubLevel() const -{ +bool CRapidXmlStateRestoreTraverser::hasSubLevel() const { return this->firstChildNodeElement() != 0; } -const std::string &CRapidXmlStateRestoreTraverser::name() const -{ - if (!m_IsNameCacheValid) - { - if (m_CurrentNode != 0) - { - m_CachedName.assign(m_CurrentNode->name(), - m_CurrentNode->name_size()); - } - else - { +const std::string& CRapidXmlStateRestoreTraverser::name() const { + if (!m_IsNameCacheValid) { + if (m_CurrentNode != 0) { + m_CachedName.assign(m_CurrentNode->name(), m_CurrentNode->name_size()); + } else { m_CachedName.clear(); } m_IsNameCacheValid = true; @@ -69,19 +50,13 @@ const std::string &CRapidXmlStateRestoreTraverser::name() const return m_CachedName; } -const std::string &CRapidXmlStateRestoreTraverser::value() const -{ - if (!m_IsValueCacheValid) - { - if (m_CurrentNode != 0) - { +const std::string& CRapidXmlStateRestoreTraverser::value() const { + if (!m_IsValueCacheValid) { + if (m_CurrentNode != 0) { // NB: this doesn't work for CDATA - see implementation decisions in // the header - m_CachedValue.assign(m_CurrentNode->value(), - m_CurrentNode->value_size()); - } - else - { + m_CachedValue.assign(m_CurrentNode->value(), m_CurrentNode->value_size()); + } else { m_CachedValue.clear(); } m_IsValueCacheValid = true; @@ -89,11 +64,9 @@ const std::string &CRapidXmlStateRestoreTraverser::value() const return m_CachedValue; } -bool CRapidXmlStateRestoreTraverser::descend() -{ - CRapidXmlParser::TCharRapidXmlNode *child(this->firstChildNodeElement()); - if (child == 0) - { +bool CRapidXmlStateRestoreTraverser::descend() { + CRapidXmlParser::TCharRapidXmlNode* child(this->firstChildNodeElement()); + if (child == 0) { return false; } @@ -105,16 +78,13 @@ bool CRapidXmlStateRestoreTraverser::descend() return true; } -bool CRapidXmlStateRestoreTraverser::ascend() -{ - if (m_CurrentNode == 0) - { +bool CRapidXmlStateRestoreTraverser::ascend() { + if (m_CurrentNode == 0) { return false; } - CRapidXmlParser::TCharRapidXmlNode *parent(m_CurrentNode->parent()); - if (parent == 0) - { + CRapidXmlParser::TCharRapidXmlNode* parent(m_CurrentNode->parent()); + if (parent == 0) { return false; } @@ -126,21 +96,15 @@ bool CRapidXmlStateRestoreTraverser::ascend() return true; } -CRapidXmlParser::TCharRapidXmlNode *CRapidXmlStateRestoreTraverser::nextNodeElement() const -{ - if (m_CurrentNode == 0) - { +CRapidXmlParser::TCharRapidXmlNode* CRapidXmlStateRestoreTraverser::nextNodeElement() const { + if (m_CurrentNode == 0) { return 0; } - for (CRapidXmlParser::TCharRapidXmlNode *nextNode = m_CurrentNode->next_sibling(); - nextNode != 0; - nextNode = nextNode->next_sibling()) - { + for (CRapidXmlParser::TCharRapidXmlNode* nextNode = m_CurrentNode->next_sibling(); nextNode != 0; nextNode = nextNode->next_sibling()) { // We ignore comments, CDATA and any other type of node that's not an // element - if (nextNode->type() == rapidxml::node_element) - { + if (nextNode->type() == rapidxml::node_element) { return nextNode; } } @@ -148,21 +112,15 @@ CRapidXmlParser::TCharRapidXmlNode *CRapidXmlStateRestoreTraverser::nextNodeElem return 0; } -CRapidXmlParser::TCharRapidXmlNode *CRapidXmlStateRestoreTraverser::firstChildNodeElement() const -{ - if (m_CurrentNode == 0) - { +CRapidXmlParser::TCharRapidXmlNode* CRapidXmlStateRestoreTraverser::firstChildNodeElement() const { + if (m_CurrentNode == 0) { return 0; } - for (CRapidXmlParser::TCharRapidXmlNode *child = m_CurrentNode->first_node(); - child != 0; - child = child->next_sibling()) - { + for (CRapidXmlParser::TCharRapidXmlNode* child = m_CurrentNode->first_node(); child != 0; child = child->next_sibling()) { // We ignore comments, CDATA and any other type of node that's not an // element - if (child->type() == rapidxml::node_element) - { + if (child->type() == rapidxml::node_element) { return child; } } @@ -170,12 +128,8 @@ CRapidXmlParser::TCharRapidXmlNode *CRapidXmlStateRestoreTraverser::firstChildNo return 0; } -bool CRapidXmlStateRestoreTraverser::isEof() const -{ +bool CRapidXmlStateRestoreTraverser::isEof() const { return false; } - - } } - diff --git a/lib/core/CReadWriteLock.cc b/lib/core/CReadWriteLock.cc index 13d7d4080b..fdd6a6fc27 100644 --- a/lib/core/CReadWriteLock.cc +++ b/lib/core/CReadWriteLock.cc @@ -10,70 +10,51 @@ #include #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -CReadWriteLock::CReadWriteLock() -{ +CReadWriteLock::CReadWriteLock() { // Valgrind can complain if this is not initialised memset(&m_ReadWriteLock, 0x00, sizeof(m_ReadWriteLock)); int ret(pthread_rwlock_init(&m_ReadWriteLock, 0)); - if (ret != 0) - { + if (ret != 0) { LOG_WARN(::strerror(ret)); } } -CReadWriteLock::~CReadWriteLock() -{ +CReadWriteLock::~CReadWriteLock() { int ret(pthread_rwlock_destroy(&m_ReadWriteLock)); - if (ret != 0) - { + if (ret != 0) { LOG_WARN(::strerror(ret)); } } -void CReadWriteLock::readLock() -{ +void CReadWriteLock::readLock() { int ret(pthread_rwlock_rdlock(&m_ReadWriteLock)); - if (ret != 0) - { + if (ret != 0) { LOG_WARN(::strerror(ret)); } } -void CReadWriteLock::readUnlock() -{ +void CReadWriteLock::readUnlock() { int ret(pthread_rwlock_unlock(&m_ReadWriteLock)); - if (ret != 0) - { + if (ret != 0) { LOG_WARN(::strerror(ret)); } } -void CReadWriteLock::writeLock() -{ +void CReadWriteLock::writeLock() { int ret(pthread_rwlock_wrlock(&m_ReadWriteLock)); - if (ret != 0) - { + if (ret != 0) { LOG_WARN(::strerror(ret)); } } -void CReadWriteLock::writeUnlock() -{ +void CReadWriteLock::writeUnlock() { int ret(pthread_rwlock_unlock(&m_ReadWriteLock)); - if (ret != 0) - { + if (ret != 0) { LOG_WARN(::strerror(ret)); } } - - } } - diff --git a/lib/core/CReadWriteLock_Windows.cc b/lib/core/CReadWriteLock_Windows.cc index d5916d7186..e6c341d60f 100644 --- a/lib/core/CReadWriteLock_Windows.cc +++ b/lib/core/CReadWriteLock_Windows.cc @@ -5,44 +5,31 @@ */ #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -CReadWriteLock::CReadWriteLock() -{ +CReadWriteLock::CReadWriteLock() { InitializeSRWLock(&m_ReadWriteLock); } -CReadWriteLock::~CReadWriteLock() -{ +CReadWriteLock::~CReadWriteLock() { // There is no function to destroy the read/write lock on Windows } -void CReadWriteLock::readLock() -{ +void CReadWriteLock::readLock() { AcquireSRWLockShared(&m_ReadWriteLock); } -void CReadWriteLock::readUnlock() -{ +void CReadWriteLock::readUnlock() { ReleaseSRWLockShared(&m_ReadWriteLock); } -void CReadWriteLock::writeLock() -{ +void CReadWriteLock::writeLock() { AcquireSRWLockExclusive(&m_ReadWriteLock); } -void CReadWriteLock::writeUnlock() -{ +void CReadWriteLock::writeUnlock() { ReleaseSRWLockExclusive(&m_ReadWriteLock); } - - } } - diff --git a/lib/core/CRegex.cc b/lib/core/CRegex.cc index 701c6af30e..13b4bc5c67 100644 --- a/lib/core/CRegex.cc +++ b/lib/core/CRegex.cc @@ -9,12 +9,9 @@ #include +namespace { -namespace -{ - -const char *translateErrorCode(boost::regex_constants::error_type code) -{ +const char* translateErrorCode(boost::regex_constants::error_type code) { // From boost_1_47_0/libs/regex/doc/html/boost_regex/ref/error_type.html // and /usr/local/include/boost-1_47/boost/regex/v4/error_type.hpp. The // switch cases are in the same order as the enum definition in @@ -22,52 +19,51 @@ const char *translateErrorCode(boost::regex_constants::error_type code) // easier to add new cases in future versions of Boost. The -Wswitch-enum // option to g++ should warn if future versions of Boost introduce new enum // values. - switch (code) - { - case boost::regex_constants::error_ok: - return "No error."; // Not used in Boost 1.47 - case boost::regex_constants::error_no_match: - return "No match."; // Not used in Boost 1.47 - case boost::regex_constants::error_bad_pattern: - return "Other unspecified errors."; - case boost::regex_constants::error_collate: - return "An invalid collating element was specified in a [[.name.]] block."; - case boost::regex_constants::error_ctype: - return "An invalid character class name was specified in a [[:name:]] block."; - case boost::regex_constants::error_escape: - return "An invalid or trailing escape was encountered."; - case boost::regex_constants::error_backref: - return "A back-reference to a non-existant marked sub-expression was encountered."; - case boost::regex_constants::error_brack: - return "An invalid character set [...] was encountered."; - case boost::regex_constants::error_paren: - return "Mismatched '(' and ')'."; - case boost::regex_constants::error_brace: - return "Mismatched '{' and '}'."; - case boost::regex_constants::error_badbrace: - return "Invalid contents of a {...} block."; - case boost::regex_constants::error_range: - return "A character range was invalid, for example [d-a]."; - case boost::regex_constants::error_space: - return "Out of memory."; - case boost::regex_constants::error_badrepeat: - return "An attempt to repeat something that can not be repeated - for example a*+"; - case boost::regex_constants::error_end: - return "Unexpected end of regular expression."; // Not used in Boost 1.47 - case boost::regex_constants::error_size: - return "Regular expression too big."; - case boost::regex_constants::error_right_paren: - return "Unmatched ')'."; // Not used in Boost 1.47 - case boost::regex_constants::error_empty: - return "Regular expression starts or ends with the alternation operator |."; - case boost::regex_constants::error_complexity: - return "The expression became too complex to handle."; - case boost::regex_constants::error_stack: - return "Out of program stack space."; - case boost::regex_constants::error_perl_extension: - return "An invalid Perl extension was encountered."; - case boost::regex_constants::error_unknown: - return "Unknown error."; + switch (code) { + case boost::regex_constants::error_ok: + return "No error."; // Not used in Boost 1.47 + case boost::regex_constants::error_no_match: + return "No match."; // Not used in Boost 1.47 + case boost::regex_constants::error_bad_pattern: + return "Other unspecified errors."; + case boost::regex_constants::error_collate: + return "An invalid collating element was specified in a [[.name.]] block."; + case boost::regex_constants::error_ctype: + return "An invalid character class name was specified in a [[:name:]] block."; + case boost::regex_constants::error_escape: + return "An invalid or trailing escape was encountered."; + case boost::regex_constants::error_backref: + return "A back-reference to a non-existant marked sub-expression was encountered."; + case boost::regex_constants::error_brack: + return "An invalid character set [...] was encountered."; + case boost::regex_constants::error_paren: + return "Mismatched '(' and ')'."; + case boost::regex_constants::error_brace: + return "Mismatched '{' and '}'."; + case boost::regex_constants::error_badbrace: + return "Invalid contents of a {...} block."; + case boost::regex_constants::error_range: + return "A character range was invalid, for example [d-a]."; + case boost::regex_constants::error_space: + return "Out of memory."; + case boost::regex_constants::error_badrepeat: + return "An attempt to repeat something that can not be repeated - for example a*+"; + case boost::regex_constants::error_end: + return "Unexpected end of regular expression."; // Not used in Boost 1.47 + case boost::regex_constants::error_size: + return "Regular expression too big."; + case boost::regex_constants::error_right_paren: + return "Unmatched ')'."; // Not used in Boost 1.47 + case boost::regex_constants::error_empty: + return "Regular expression starts or ends with the alternation operator |."; + case boost::regex_constants::error_complexity: + return "The expression became too complex to handle."; + case boost::regex_constants::error_stack: + return "Out of program stack space."; + case boost::regex_constants::error_perl_extension: + return "An invalid Perl extension was encountered."; + case boost::regex_constants::error_unknown: + return "Unknown error."; } LOG_ERROR("Unexpected error code " << code); @@ -76,46 +72,27 @@ const char *translateErrorCode(boost::regex_constants::error_type code) } // anonymous namespace +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -CRegex::CRegex() - : m_Initialised(false) -{ +CRegex::CRegex() : m_Initialised(false) { } -bool CRegex::init(const std::string ®ex) -{ +bool CRegex::init(const std::string& regex) { // Allow expression to be initialised twice m_Initialised = false; - try - { + try { m_Regex = boost::regex(regex.c_str()); - } - catch (boost::regex_error &e) - { - if (static_cast(e.position()) <= regex.size()) - { - LOG_ERROR("Unable to compile regex: '" << - regex << "' '" << - regex.substr(0, e.position()) << "' '" << - regex.substr(e.position()) << "': " << - ::translateErrorCode(e.code())); - } - else - { - LOG_ERROR("Unable to compile regex: '" << regex << "': " << - ::translateErrorCode(e.code())); + } catch (boost::regex_error& e) { + if (static_cast(e.position()) <= regex.size()) { + LOG_ERROR("Unable to compile regex: '" << regex << "' '" << regex.substr(0, e.position()) << "' '" << regex.substr(e.position()) + << "': " << ::translateErrorCode(e.code())); + } else { + LOG_ERROR("Unable to compile regex: '" << regex << "': " << ::translateErrorCode(e.code())); } return false; - } - catch (std::exception &e) - { + } catch (std::exception& e) { LOG_ERROR("Unable to compile regex: " << e.what()); return false; } @@ -125,38 +102,27 @@ bool CRegex::init(const std::string ®ex) return true; } -bool CRegex::tokenise(const std::string &str, - CRegex::TStrVec &tokens) const -{ +bool CRegex::tokenise(const std::string& str, CRegex::TStrVec& tokens) const { tokens.clear(); - if (!m_Initialised) - { + if (!m_Initialised) { LOG_ERROR("Regex not initialised"); return false; } - try - { + try { boost::smatch matches; - if (boost::regex_match(str, matches, m_Regex) == false) - { + if (boost::regex_match(str, matches, m_Regex) == false) { return false; } - for (int i = 1; i < static_cast(matches.size()); ++i) - { + for (int i = 1; i < static_cast(matches.size()); ++i) { tokens.push_back(std::string(matches[i].first, matches[i].second)); } - } - catch (boost::regex_error &e) - { - LOG_ERROR("Unable to tokenise using regex: '" << str << "': " << - ::translateErrorCode(e.code())); + } catch (boost::regex_error& e) { + LOG_ERROR("Unable to tokenise using regex: '" << str << "': " << ::translateErrorCode(e.code())); return false; - } - catch (std::exception &e) - { + } catch (std::exception& e) { LOG_ERROR("Unable to tokenise using regex: " << e.what()); return false; } @@ -164,35 +130,25 @@ bool CRegex::tokenise(const std::string &str, return true; } -bool CRegex::split(const std::string &str, - CRegex::TStrVec &tokens) const -{ +bool CRegex::split(const std::string& str, CRegex::TStrVec& tokens) const { tokens.clear(); - if (!m_Initialised) - { + if (!m_Initialised) { LOG_ERROR("Regex not initialised"); return false; } - try - { + try { boost::sregex_token_iterator i(str.begin(), str.end(), m_Regex, -1); boost::sregex_token_iterator j; - while(i != j) - { + while (i != j) { tokens.push_back(*i++); } - } - catch (boost::regex_error &e) - { - LOG_ERROR("Unable to tokenise using regex: '" << str << "': " << - ::translateErrorCode(e.code())); + } catch (boost::regex_error& e) { + LOG_ERROR("Unable to tokenise using regex: '" << str << "': " << ::translateErrorCode(e.code())); return false; - } - catch (std::exception &e) - { + } catch (std::exception& e) { LOG_ERROR("Unable to tokenise using regex: " << e.what()); return false; } @@ -200,30 +156,21 @@ bool CRegex::split(const std::string &str, return true; } -bool CRegex::matches(const std::string &str) const -{ - if (!m_Initialised) - { +bool CRegex::matches(const std::string& str) const { + if (!m_Initialised) { LOG_ERROR("Regex not initialised"); return false; } - try - { + try { boost::smatch matches; - if (boost::regex_match(str, matches, m_Regex) == false) - { + if (boost::regex_match(str, matches, m_Regex) == false) { return false; } - } - catch (boost::regex_error &e) - { - LOG_ERROR("Unable to match using regex: '" << str << "': " << - ::translateErrorCode(e.code())); + } catch (boost::regex_error& e) { + LOG_ERROR("Unable to match using regex: '" << str << "': " << ::translateErrorCode(e.code())); return false; - } - catch (std::exception &e) - { + } catch (std::exception& e) { LOG_ERROR("Unable to match using regex: " << e.what()); return false; } @@ -231,44 +178,28 @@ bool CRegex::matches(const std::string &str) const return true; } -bool CRegex::search(size_t startPos, - const std::string &str, - size_t &position, - size_t &length) const -{ - if (!m_Initialised) - { +bool CRegex::search(size_t startPos, const std::string& str, size_t& position, size_t& length) const { + if (!m_Initialised) { LOG_ERROR("Regex not initialised"); return false; } - if (startPos >= str.length()) - { + if (startPos >= str.length()) { return false; } - try - { + try { boost::smatch matches; - if (boost::regex_search(str.begin() + startPos, - str.begin() + str.length(), - matches, - m_Regex) == false) - { + if (boost::regex_search(str.begin() + startPos, str.begin() + str.length(), matches, m_Regex) == false) { return false; } position = matches[0].first - str.begin(); length = matches[0].second - matches[0].first; - } - catch (boost::regex_error &e) - { - LOG_ERROR("Unable to search using regex: '" << str << "': " << - ::translateErrorCode(e.code())); + } catch (boost::regex_error& e) { + LOG_ERROR("Unable to search using regex: '" << str << "': " << ::translateErrorCode(e.code())); return false; - } - catch (std::exception &e) - { + } catch (std::exception& e) { LOG_ERROR("Unable to match using regex: " << e.what()); return false; } @@ -276,34 +207,24 @@ bool CRegex::search(size_t startPos, return true; } -bool CRegex::search(size_t startPos, - const std::string &str, - size_t &position) const -{ +bool CRegex::search(size_t startPos, const std::string& str, size_t& position) const { size_t length(0); return this->search(startPos, str, position, length); } -bool CRegex::search(const std::string &str, - size_t &position, - size_t &length) const -{ +bool CRegex::search(const std::string& str, size_t& position, size_t& length) const { return this->search(0, str, position, length); } -bool CRegex::search(const std::string &str, - size_t &position) const -{ +bool CRegex::search(const std::string& str, size_t& position) const { size_t length(0); return this->search(0, str, position, length); } -std::string CRegex::str() const -{ - if (!m_Initialised) - { +std::string CRegex::str() const { + if (!m_Initialised) { LOG_ERROR("Regex not initialised"); return std::string(); } @@ -311,10 +232,8 @@ std::string CRegex::str() const return m_Regex.str(); } -size_t CRegex::literalCount() const -{ - if (!m_Initialised) - { +size_t CRegex::literalCount() const { + if (!m_Initialised) { LOG_ERROR("Regex not initialised"); return 0; } @@ -333,179 +252,141 @@ size_t CRegex::literalCount() const size_t subCount(0); size_t minSubCount(std::numeric_limits::max()); - for (std::string::iterator iter = regexStr.begin(); - iter != regexStr.end(); - ++iter) - { + for (std::string::iterator iter = regexStr.begin(); iter != regexStr.end(); ++iter) { char thisChar(*iter); - switch (thisChar) - { - case '$': - // Perl can expand variables, so should really skip over - // variable names at this point - break; - case '.': - case '^': - case '*': - case '+': - case '?': - break; - case '\\': - ++iter; - if (iter == regexStr.end()) - { - LOG_ERROR("Inconsistency - backslash at the end of regex"); - return count; - } - thisChar = *iter; - if (thisChar != 'd' && thisChar != 's' && thisChar != 'w' && - thisChar != 'D' && thisChar != 'S' && thisChar != 'W' && - (thisChar < '0' || thisChar > '9')) - { - if (squareBracketCount == 0 && braceCount == 0) - { - std::string::iterator nextIter(iter + 1); - if (nextIter == regexStr.end() || - (*nextIter != '*' && *nextIter != '+' && *nextIter != '?')) - { - if (inSubMatch) - { - ++subCount; - } - else - { - ++count; - } + switch (thisChar) { + case '$': + // Perl can expand variables, so should really skip over + // variable names at this point + break; + case '.': + case '^': + case '*': + case '+': + case '?': + break; + case '\\': + ++iter; + if (iter == regexStr.end()) { + LOG_ERROR("Inconsistency - backslash at the end of regex"); + return count; + } + thisChar = *iter; + if (thisChar != 'd' && thisChar != 's' && thisChar != 'w' && thisChar != 'D' && thisChar != 'S' && thisChar != 'W' && + (thisChar < '0' || thisChar > '9')) { + if (squareBracketCount == 0 && braceCount == 0) { + std::string::iterator nextIter(iter + 1); + if (nextIter == regexStr.end() || (*nextIter != '*' && *nextIter != '+' && *nextIter != '?')) { + if (inSubMatch) { + ++subCount; + } else { + ++count; } } } - break; - case '[': - ++squareBracketCount; - break; - case ']': - if (squareBracketCount == 0) - { - LOG_ERROR("Inconsistency - more ] than ["); - } - else - { - --squareBracketCount; - } - break; - case '{': - ++braceCount; - break; - case '}': - if (braceCount == 0) - { - LOG_ERROR("Inconsistency - more } than {"); - } - else - { - --braceCount; - } - break; - case '|': - if (inSubMatch) - { - if (subCount < minSubCount) - { - minSubCount = subCount; - } - subCount = 0; - } - else - { - } - break; - case '(': - inSubMatch = true; - break; - case ')': - inSubMatch = false; - if (subCount < minSubCount) - { + } + break; + case '[': + ++squareBracketCount; + break; + case ']': + if (squareBracketCount == 0) { + LOG_ERROR("Inconsistency - more ] than ["); + } else { + --squareBracketCount; + } + break; + case '{': + ++braceCount; + break; + case '}': + if (braceCount == 0) { + LOG_ERROR("Inconsistency - more } than {"); + } else { + --braceCount; + } + break; + case '|': + if (inSubMatch) { + if (subCount < minSubCount) { minSubCount = subCount; } - count += minSubCount; subCount = 0; - minSubCount = std::numeric_limits::max(); - break; - default: - if (squareBracketCount == 0 && braceCount == 0) - { - std::string::iterator nextIter(iter + 1); - if (nextIter == regexStr.end() || - (*nextIter != '*' && *nextIter != '+' && *nextIter != '?')) - { - if (inSubMatch) - { - ++subCount; - } - else - { - ++count; - } + } else { + } + break; + case '(': + inSubMatch = true; + break; + case ')': + inSubMatch = false; + if (subCount < minSubCount) { + minSubCount = subCount; + } + count += minSubCount; + subCount = 0; + minSubCount = std::numeric_limits::max(); + break; + default: + if (squareBracketCount == 0 && braceCount == 0) { + std::string::iterator nextIter(iter + 1); + if (nextIter == regexStr.end() || (*nextIter != '*' && *nextIter != '+' && *nextIter != '?')) { + if (inSubMatch) { + ++subCount; + } else { + ++count; } } - break; + } + break; } } return count; } -std::string CRegex::escapeRegexSpecial(const std::string &literal) -{ +std::string CRegex::escapeRegexSpecial(const std::string& literal) { std::string result; result.reserve(literal.size()); - for (std::string::const_iterator iter = literal.begin(); - iter != literal.end(); - ++iter) - { + for (std::string::const_iterator iter = literal.begin(); iter != literal.end(); ++iter) { char thisChar = *iter; - switch (thisChar) - { - case '.': - case '*': - case '+': - case '?': - case '|': - case '^': - case '$': - case '(': - case ')': - case '[': - case ']': - case '{': - case '}': - case '\\': - result += '\\'; - result += thisChar; - break; - case '\n': - result += "\\n"; - break; - case '\r': - // Carriage returns are made optional to prevent the regex - // having a silly incompatibility between Windows text and Unix - // text files - result += "\\r?"; - break; - default: - result += thisChar; - break; + switch (thisChar) { + case '.': + case '*': + case '+': + case '?': + case '|': + case '^': + case '$': + case '(': + case ')': + case '[': + case ']': + case '{': + case '}': + case '\\': + result += '\\'; + result += thisChar; + break; + case '\n': + result += "\\n"; + break; + case '\r': + // Carriage returns are made optional to prevent the regex + // having a silly incompatibility between Windows text and Unix + // text files + result += "\\r?"; + break; + default: + result += thisChar; + break; } } return result; } - - } } - diff --git a/lib/core/CRegexFilter.cc b/lib/core/CRegexFilter.cc index 70f124fefa..c710500f3c 100644 --- a/lib/core/CRegexFilter.cc +++ b/lib/core/CRegexFilter.cc @@ -7,24 +7,17 @@ #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { -CRegexFilter::CRegexFilter() - : m_Regex() -{ +CRegexFilter::CRegexFilter() : m_Regex() { } -bool CRegexFilter::configure(const TStrVec ®ularExpressions) -{ +bool CRegexFilter::configure(const TStrVec& regularExpressions) { m_Regex.clear(); m_Regex.resize(regularExpressions.size()); - for (std::size_t i = 0; i < regularExpressions.size(); ++i) - { - if (m_Regex[i].init(regularExpressions[i]) == false) - { + for (std::size_t i = 0; i < regularExpressions.size(); ++i) { + if (m_Regex[i].init(regularExpressions[i]) == false) { m_Regex.clear(); LOG_ERROR("Configuration failed; no filtering will apply"); return false; @@ -34,31 +27,25 @@ bool CRegexFilter::configure(const TStrVec ®ularExpressions) return true; } -std::string CRegexFilter::apply(const std::string &target) const -{ - if (m_Regex.empty()) - { +std::string CRegexFilter::apply(const std::string& target) const { + if (m_Regex.empty()) { return target; } std::string result(target); std::size_t position = 0; std::size_t length = 0; - for (std::size_t i = 0; i < m_Regex.size(); ++i) - { - const CRegex ¤tRegex = m_Regex[i]; - while (currentRegex.search(result, position, length)) - { + for (std::size_t i = 0; i < m_Regex.size(); ++i) { + const CRegex& currentRegex = m_Regex[i]; + while (currentRegex.search(result, position, length)) { result.erase(position, length); } } return result; } -bool CRegexFilter::empty() const -{ +bool CRegexFilter::empty() const { return m_Regex.empty(); } - } } diff --git a/lib/core/CResourceLocator.cc b/lib/core/CResourceLocator.cc index 0e8f36d7b1..d812e11015 100644 --- a/lib/core/CResourceLocator.cc +++ b/lib/core/CResourceLocator.cc @@ -10,20 +10,14 @@ #include - -namespace -{ -const char *CPP_SRC_HOME("CPP_SRC_HOME"); +namespace { +const char* CPP_SRC_HOME("CPP_SRC_HOME"); } -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { -std::string CResourceLocator::resourceDir() -{ +std::string CResourceLocator::resourceDir() { // Look relative to the program that's running, assuming this directory layout: // $ES_HOME/plugin//resources // $ES_HOME/plugin//platform//bin @@ -34,15 +28,12 @@ std::string CResourceLocator::resourceDir() // that does, but if neither exist return the production directory so the // error message is nicer for the end user. COsFileFuncs::TStat buf; - if (COsFileFuncs::stat(productionDir.c_str(), &buf) != 0) - { - const char *cppSrcHome(::getenv(CPP_SRC_HOME)); - if (cppSrcHome != 0) - { + if (COsFileFuncs::stat(productionDir.c_str(), &buf) != 0) { + const char* cppSrcHome(::getenv(CPP_SRC_HOME)); + if (cppSrcHome != 0) { std::string devDir(cppSrcHome); devDir += "/lib/core"; - if (COsFileFuncs::stat(devDir.c_str(), &buf) == 0) - { + if (COsFileFuncs::stat(devDir.c_str(), &buf) == 0) { return devDir; } } @@ -51,8 +42,7 @@ std::string CResourceLocator::resourceDir() return productionDir; } -std::string CResourceLocator::logDir() -{ +std::string CResourceLocator::logDir() { // Look relative to the program that's running, assuming this directory layout: // $ES_HOME/logs // $ES_HOME/plugin//platform//bin @@ -60,8 +50,7 @@ std::string CResourceLocator::logDir() std::string productionDir(CProgName::progDir() + "/../../../../../logs"); COsFileFuncs::TStat buf; - if (COsFileFuncs::stat(productionDir.c_str(), &buf) != 0) - { + if (COsFileFuncs::stat(productionDir.c_str(), &buf) != 0) { // Assume we're running as a unit test return "."; } @@ -69,18 +58,13 @@ std::string CResourceLocator::logDir() return productionDir; } -std::string CResourceLocator::cppRootDir() -{ - const char *cppSrcHome(::getenv(CPP_SRC_HOME)); - if (cppSrcHome == 0) - { +std::string CResourceLocator::cppRootDir() { + const char* cppSrcHome(::getenv(CPP_SRC_HOME)); + if (cppSrcHome == 0) { // Assume we're in a unittest directory return "../../.."; } return cppSrcHome; } - - } } - diff --git a/lib/core/CScopedFastLock.cc b/lib/core/CScopedFastLock.cc index c63f62cd63..00b7a9c17a 100644 --- a/lib/core/CScopedFastLock.cc +++ b/lib/core/CScopedFastLock.cc @@ -7,25 +7,15 @@ #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -CScopedFastLock::CScopedFastLock(CFastMutex &mutex) - : m_Mutex(mutex) -{ +CScopedFastLock::CScopedFastLock(CFastMutex& mutex) : m_Mutex(mutex) { m_Mutex.lock(); } -CScopedFastLock::~CScopedFastLock() -{ +CScopedFastLock::~CScopedFastLock() { m_Mutex.unlock(); } - - } } - diff --git a/lib/core/CScopedLock.cc b/lib/core/CScopedLock.cc index 67db373bc4..6ad703782d 100644 --- a/lib/core/CScopedLock.cc +++ b/lib/core/CScopedLock.cc @@ -7,25 +7,15 @@ #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -CScopedLock::CScopedLock(CMutex &mutex) - : m_Mutex(mutex) -{ +CScopedLock::CScopedLock(CMutex& mutex) : m_Mutex(mutex) { m_Mutex.lock(); } -CScopedLock::~CScopedLock() -{ +CScopedLock::~CScopedLock() { m_Mutex.unlock(); } - - } } - diff --git a/lib/core/CScopedReadLock.cc b/lib/core/CScopedReadLock.cc index b176d046ed..2f154cf501 100644 --- a/lib/core/CScopedReadLock.cc +++ b/lib/core/CScopedReadLock.cc @@ -7,25 +7,15 @@ #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -CScopedReadLock::CScopedReadLock(CReadWriteLock &readWriteLock) - : m_ReadWriteLock(readWriteLock) -{ +CScopedReadLock::CScopedReadLock(CReadWriteLock& readWriteLock) : m_ReadWriteLock(readWriteLock) { m_ReadWriteLock.readLock(); } -CScopedReadLock::~CScopedReadLock() -{ +CScopedReadLock::~CScopedReadLock() { m_ReadWriteLock.readUnlock(); } - - } } - diff --git a/lib/core/CScopedWriteLock.cc b/lib/core/CScopedWriteLock.cc index 79ab7925cf..94ae857aef 100644 --- a/lib/core/CScopedWriteLock.cc +++ b/lib/core/CScopedWriteLock.cc @@ -7,25 +7,15 @@ #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -CScopedWriteLock::CScopedWriteLock(CReadWriteLock &readWriteLock) - : m_ReadWriteLock(readWriteLock) -{ +CScopedWriteLock::CScopedWriteLock(CReadWriteLock& readWriteLock) : m_ReadWriteLock(readWriteLock) { m_ReadWriteLock.writeLock(); } -CScopedWriteLock::~CScopedWriteLock() -{ +CScopedWriteLock::~CScopedWriteLock() { m_ReadWriteLock.writeUnlock(); } - - } } - diff --git a/lib/core/CSetEnv.cc b/lib/core/CSetEnv.cc index 79a2e8d1f6..caf30de877 100644 --- a/lib/core/CSetEnv.cc +++ b/lib/core/CSetEnv.cc @@ -7,21 +7,11 @@ #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -int CSetEnv::setEnv(const char *name, - const char *value, - int overwrite) -{ +int CSetEnv::setEnv(const char* name, const char* value, int overwrite) { return ::setenv(name, value, overwrite); } - - } } - diff --git a/lib/core/CSetEnv_Windows.cc b/lib/core/CSetEnv_Windows.cc index cdf88eb4d3..11fc4f6556 100644 --- a/lib/core/CSetEnv_Windows.cc +++ b/lib/core/CSetEnv_Windows.cc @@ -7,26 +7,15 @@ #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -int CSetEnv::setEnv(const char *name, - const char *value, - int overwrite) -{ - if (overwrite == 0 && ::getenv(name) != 0) - { +int CSetEnv::setEnv(const char* name, const char* value, int overwrite) { + if (overwrite == 0 && ::getenv(name) != 0) { return 0; } return ::_putenv_s(name, value); } - - } } - diff --git a/lib/core/CSetMode.cc b/lib/core/CSetMode.cc index 7603dcff19..49a0351ba8 100644 --- a/lib/core/CSetMode.cc +++ b/lib/core/CSetMode.cc @@ -5,23 +5,15 @@ */ #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -int CSetMode::setMode(int /* fd */, int /* mode */) -{ +int CSetMode::setMode(int /* fd */, int /* mode */) { return 0; } -int CSetMode::setBinaryMode(int /* fd */) -{ +int CSetMode::setBinaryMode(int /* fd */) { return 0; } - } } - diff --git a/lib/core/CSetMode_Windows.cc b/lib/core/CSetMode_Windows.cc index 30fb5e619d..eb58e46ad0 100644 --- a/lib/core/CSetMode_Windows.cc +++ b/lib/core/CSetMode_Windows.cc @@ -5,26 +5,18 @@ */ #include -#include #include +#include -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { -int CSetMode::setMode(int fd, int mode) -{ +int CSetMode::setMode(int fd, int mode) { return _setmode(fd, mode); } -int CSetMode::setBinaryMode(int fd) -{ +int CSetMode::setBinaryMode(int fd) { return _setmode(fd, _O_BINARY); } - - } } - diff --git a/lib/core/CShellArgQuoter.cc b/lib/core/CShellArgQuoter.cc index 63c8864f59..ffa1850967 100644 --- a/lib/core/CShellArgQuoter.cc +++ b/lib/core/CShellArgQuoter.cc @@ -5,17 +5,11 @@ */ #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -std::string CShellArgQuoter::quote(const std::string &arg) -{ - if (arg.empty()) - { +std::string CShellArgQuoter::quote(const std::string& arg) { + if (arg.empty()) { return "''"; } @@ -29,45 +23,35 @@ std::string CShellArgQuoter::quote(const std::string &arg) // variables bool insideSingleQuote(false); - for (std::string::const_iterator iter = arg.begin(); - iter != arg.end(); - ++iter) - { - switch (*iter) - { - case '\'': - case '!': - // Take single quotes and exclamation marks outside of the main - // single quoted string and escape them individually using - // backslashes - if (insideSingleQuote) - { - result += '\''; - insideSingleQuote = false; - } - result += '\\'; - result += *iter; - break; - default: - if (!insideSingleQuote) - { - result += '\''; - insideSingleQuote = true; - } - result += *iter; - break; + for (std::string::const_iterator iter = arg.begin(); iter != arg.end(); ++iter) { + switch (*iter) { + case '\'': + case '!': + // Take single quotes and exclamation marks outside of the main + // single quoted string and escape them individually using + // backslashes + if (insideSingleQuote) { + result += '\''; + insideSingleQuote = false; + } + result += '\\'; + result += *iter; + break; + default: + if (!insideSingleQuote) { + result += '\''; + insideSingleQuote = true; + } + result += *iter; + break; } } - if (insideSingleQuote) - { + if (insideSingleQuote) { result += '\''; } return result; } - - } } - diff --git a/lib/core/CShellArgQuoter_Windows.cc b/lib/core/CShellArgQuoter_Windows.cc index 405f370adf..62fa79caf3 100644 --- a/lib/core/CShellArgQuoter_Windows.cc +++ b/lib/core/CShellArgQuoter_Windows.cc @@ -9,25 +9,16 @@ #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -std::string CShellArgQuoter::quote(const std::string &arg) -{ - if (arg.empty()) - { +std::string CShellArgQuoter::quote(const std::string& arg) { + if (arg.empty()) { return "\"\""; } // Simple command line options should not be quoted - if (arg.length() == 2 && - (arg[0] == '/' || arg[0] == '-') && - ::isalnum(static_cast(arg[1]))) - { + if (arg.length() == 2 && (arg[0] == '/' || arg[0] == '-') && ::isalnum(static_cast(arg[1]))) { return arg; } @@ -42,8 +33,7 @@ std::string CShellArgQuoter::quote(const std::string &arg) // argument to cmd /c (i.e. the name of the program/script to be run) MUST // be escaped like this, because the more complex method below will stop the // program/script being found if its path contains spaces. - if (arg.find('"') == std::string::npos && arg[arg.length() - 1] != '\\') - { + if (arg.find('"') == std::string::npos && arg[arg.length() - 1] != '\\') { result += '"'; result += arg; result += '"'; @@ -58,24 +48,20 @@ std::string CShellArgQuoter::quote(const std::string &arg) // is impossible for the Windows command prompt. result += "^\""; - for (std::string::const_iterator iter = arg.begin(); - iter != arg.end(); - ++iter) - { - switch (*iter) - { - case '(': - case ')': - case '%': - case '!': - case '^': - case '"': - case '<': - case '>': - case '&': - case '|': - result += '^'; - break; + for (std::string::const_iterator iter = arg.begin(); iter != arg.end(); ++iter) { + switch (*iter) { + case '(': + case ')': + case '%': + case '!': + case '^': + case '"': + case '<': + case '>': + case '&': + case '|': + result += '^'; + break; } result += *iter; } @@ -84,8 +70,5 @@ std::string CShellArgQuoter::quote(const std::string &arg) return result; } - - } } - diff --git a/lib/core/CSleep.cc b/lib/core/CSleep.cc index d8bc82a1e9..a69eec8adb 100644 --- a/lib/core/CSleep.cc +++ b/lib/core/CSleep.cc @@ -9,37 +9,25 @@ #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { // Default processing delay is 100 milliseconds const uint32_t CSleep::DEFAULT_PROCESSING_DELAY(100); +void CSleep::sleep(uint32_t milliseconds) { + if (milliseconds > 0) { + struct timespec delay = {milliseconds / 1000, (milliseconds % 1000) * 1000000}; -void CSleep::sleep(uint32_t milliseconds) -{ - if (milliseconds > 0) - { - struct timespec delay = { milliseconds / 1000, (milliseconds % 1000) * 1000000 }; - - if (::nanosleep(&delay, 0) < 0) - { + if (::nanosleep(&delay, 0) < 0) { LOG_WARN("nanosleep interrupted"); } } } -void CSleep::delayProcessing() -{ +void CSleep::delayProcessing() { // 0.1 seconds is a good length of time to delay processing. CSleep::sleep(DEFAULT_PROCESSING_DELAY); } - - } } - diff --git a/lib/core/CSleep_Windows.cc b/lib/core/CSleep_Windows.cc index 2e5c9af8e9..77270b160f 100644 --- a/lib/core/CSleep_Windows.cc +++ b/lib/core/CSleep_Windows.cc @@ -7,32 +7,21 @@ #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { // Default processing delay is 100 milliseconds const uint32_t CSleep::DEFAULT_PROCESSING_DELAY(100); - -void CSleep::sleep(uint32_t milliseconds) -{ - if (milliseconds > 0) - { +void CSleep::sleep(uint32_t milliseconds) { + if (milliseconds > 0) { Sleep(milliseconds); } } -void CSleep::delayProcessing() -{ +void CSleep::delayProcessing() { // 0.1 seconds is a good length of time to delay processing. CSleep::sleep(DEFAULT_PROCESSING_DELAY); } - - } } - diff --git a/lib/core/CStat.cc b/lib/core/CStat.cc index 87259f634f..13a82199e4 100644 --- a/lib/core/CStat.cc +++ b/lib/core/CStat.cc @@ -5,41 +5,32 @@ */ #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { -CStat::CStat() : m_Value(uint64_t(0)) -{ +CStat::CStat() : m_Value(uint64_t(0)) { } -void CStat::increment() -{ +void CStat::increment() { m_Value.fetch_add(1); } -void CStat::increment(uint64_t value) -{ +void CStat::increment(uint64_t value) { m_Value.fetch_add(value); } -void CStat::decrement() -{ +void CStat::decrement() { m_Value.fetch_sub(1); } -void CStat::set(uint64_t value) -{ +void CStat::set(uint64_t value) { m_Value.store(value); } -uint64_t CStat::value() const -{ +uint64_t CStat::value() const { return m_Value; } } // core } // ml - diff --git a/lib/core/CStateCompressor.cc b/lib/core/CStateCompressor.cc index a58f91829a..977d0dbbd0 100644 --- a/lib/core/CStateCompressor.cc +++ b/lib/core/CStateCompressor.cc @@ -11,76 +11,54 @@ #include #include - -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { const std::string CStateCompressor::COMPRESSED_ATTRIBUTE("compressed"); const std::string CStateCompressor::END_OF_STREAM_ATTRIBUTE("eos"); - -CStateCompressor::CStateCompressor(CDataAdder &compressedAdder) - : m_FilterSink(compressedAdder), - m_OutStream(boost::make_shared(boost::ref(m_FilterSink))) -{ +CStateCompressor::CStateCompressor(CDataAdder& compressedAdder) + : m_FilterSink(compressedAdder), m_OutStream(boost::make_shared(boost::ref(m_FilterSink))) { LOG_TRACE("New compressor"); } -CDataAdder::TOStreamP CStateCompressor::addStreamed(const std::string &index, - const std::string &baseId) -{ +CDataAdder::TOStreamP CStateCompressor::addStreamed(const std::string& index, const std::string& baseId) { LOG_TRACE("StateCompressor asking for index " << index); m_FilterSink.index(index, baseId); return m_OutStream; } -bool CStateCompressor::streamComplete(CDataAdder::TOStreamP &/*strm*/, - bool /*force*/) -{ +bool CStateCompressor::streamComplete(CDataAdder::TOStreamP& /*strm*/, bool /*force*/) { LOG_TRACE("Stream Complete"); m_OutStream->close(); return m_FilterSink.allWritesSuccessful(); } -size_t CStateCompressor::numCompressedDocs() const -{ +size_t CStateCompressor::numCompressedDocs() const { return m_FilterSink.numCompressedDocs(); } -CStateCompressor::CChunkFilter::CChunkFilter(CDataAdder &adder) - : m_Adder(adder), - m_CurrentDocNum(1), - m_BytesDone(0), - m_MaxDocSize(adder.maxDocumentSize()), - m_WritesSuccessful(true) -{ +CStateCompressor::CChunkFilter::CChunkFilter(CDataAdder& adder) + : m_Adder(adder), m_CurrentDocNum(1), m_BytesDone(0), m_MaxDocSize(adder.maxDocumentSize()), m_WritesSuccessful(true) { } -std::streamsize CStateCompressor::CChunkFilter::write(const char *s, std::streamsize n) -{ +std::streamsize CStateCompressor::CChunkFilter::write(const char* s, std::streamsize n) { // Write up to n characters from the buffer // s to the output sequence, returning the // number of characters written std::streamsize written = 0; - while (n > 0) - { - if (!m_OStream) - { - const std::string ¤tDocId = m_Adder.makeCurrentDocId(m_BaseId, - m_CurrentDocNum); + while (n > 0) { + if (!m_OStream) { + const std::string& currentDocId = m_Adder.makeCurrentDocId(m_BaseId, m_CurrentDocNum); LOG_TRACE("Add streamed: " << m_Index << ", " << currentDocId); m_OStream = m_Adder.addStreamed(m_Index, currentDocId); - if (!m_OStream) - { + if (!m_OStream) { LOG_ERROR("Failed to connect to store"); return 0; } - if (m_OStream->bad()) - { + if (m_OStream->bad()) { LOG_ERROR("Error connecting to store"); return 0; } @@ -94,16 +72,13 @@ std::streamsize CStateCompressor::CChunkFilter::write(const char *s, std::stream LOG_TRACE("Write: " << header); m_OStream->write(header.c_str(), header.size()); m_BytesDone += header.size(); - } - else - { + } else { LOG_TRACE("Write: ,"); m_OStream->write(",", 1); m_BytesDone += 1; } this->writeInternal(s, written, n); - if (m_BytesDone >= (m_MaxDocSize - 1)) - { + if (m_BytesDone >= (m_MaxDocSize - 1)) { LOG_TRACE("Terminated stream " << m_CurrentDocNum); this->closeStream(false); m_OStream.reset(); @@ -114,18 +89,14 @@ std::streamsize CStateCompressor::CChunkFilter::write(const char *s, std::stream return written; } -void CStateCompressor::CChunkFilter::close() -{ +void CStateCompressor::CChunkFilter::close() { this->closeStream(true); } -void CStateCompressor::CChunkFilter::closeStream(bool isFinal) -{ - if (m_OStream) - { +void CStateCompressor::CChunkFilter::closeStream(bool isFinal) { + if (m_OStream) { std::string footer(1, ']'); - if (isFinal) - { + if (isFinal) { footer += ",\""; footer += END_OF_STREAM_ATTRIBUTE; footer += "\":true"; @@ -140,17 +111,12 @@ void CStateCompressor::CChunkFilter::closeStream(bool isFinal) } } -void CStateCompressor::CChunkFilter::index(const std::string &index, - const std::string &baseId) -{ +void CStateCompressor::CChunkFilter::index(const std::string& index, const std::string& baseId) { m_Index = index; m_BaseId = baseId; } -void CStateCompressor::CChunkFilter::writeInternal(const char *s, - std::streamsize &written, - std::streamsize &n) -{ +void CStateCompressor::CChunkFilter::writeInternal(const char* s, std::streamsize& written, std::streamsize& n) { std::size_t bytesToWrite = std::min(std::size_t(n), m_MaxDocSize - m_BytesDone); LOG_TRACE("Writing string: " << std::string(&s[written], bytesToWrite)); m_OStream->write("\"", 1); @@ -161,13 +127,11 @@ void CStateCompressor::CChunkFilter::writeInternal(const char *s, m_BytesDone += bytesToWrite + 2; } -bool CStateCompressor::CChunkFilter::allWritesSuccessful() -{ +bool CStateCompressor::CChunkFilter::allWritesSuccessful() { return m_WritesSuccessful; } -size_t CStateCompressor::CChunkFilter::numCompressedDocs() const -{ +size_t CStateCompressor::CChunkFilter::numCompressedDocs() const { return m_CurrentDocNum - 1; } diff --git a/lib/core/CStateDecompressor.cc b/lib/core/CStateDecompressor.cc index a69511a81a..f65628d8dd 100644 --- a/lib/core/CStateDecompressor.cc +++ b/lib/core/CStateDecompressor.cc @@ -16,81 +16,64 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { const std::string CStateDecompressor::EMPTY_DATA("H4sIAAAAAAAA/4uOBQApu0wNAgAAAA=="); -CStateDecompressor::CStateDecompressor(CDataSearcher &compressedSearcher) : - m_Searcher(compressedSearcher), - m_FilterSource(compressedSearcher) -{ +CStateDecompressor::CStateDecompressor(CDataSearcher& compressedSearcher) + : m_Searcher(compressedSearcher), m_FilterSource(compressedSearcher) { m_InFilter.reset(new TFilteredInput); m_InFilter->push(boost::iostreams::gzip_decompressor()); m_InFilter->push(CBase64Decoder()); m_InFilter->push(boost::ref(m_FilterSource)); } -CDataSearcher::TIStreamP CStateDecompressor::search(size_t /*currentDocNum*/, - size_t /*limit*/) -{ +CDataSearcher::TIStreamP CStateDecompressor::search(size_t /*currentDocNum*/, size_t /*limit*/) { return m_InFilter; } -void CStateDecompressor::setStateRestoreSearch(const std::string &index) -{ +void CStateDecompressor::setStateRestoreSearch(const std::string& index) { m_Searcher.setStateRestoreSearch(index); } -void CStateDecompressor::setStateRestoreSearch(const std::string &index, - const std::string &id) -{ +void CStateDecompressor::setStateRestoreSearch(const std::string& index, const std::string& id) { m_Searcher.setStateRestoreSearch(index, id); } -CStateDecompressor::CDechunkFilter::CDechunkFilter(CDataSearcher &searcher) : - m_Initialised(false), - m_SentData(false), - m_Searcher(searcher), - m_CurrentDocNum(1), - m_EndOfStream(false), - m_BufferOffset(0), - m_NestedLevel(1) -{ +CStateDecompressor::CDechunkFilter::CDechunkFilter(CDataSearcher& searcher) + : m_Initialised(false), + m_SentData(false), + m_Searcher(searcher), + m_CurrentDocNum(1), + m_EndOfStream(false), + m_BufferOffset(0), + m_NestedLevel(1) { } -std::streamsize CStateDecompressor::CDechunkFilter::read(char *s, std::streamsize n) -{ - if (m_EndOfStream) - { +std::streamsize CStateDecompressor::CDechunkFilter::read(char* s, std::streamsize n) { + if (m_EndOfStream) { LOG_TRACE("EOS -1"); return -1; } // return number of bytes read, -1 for EOF std::streamsize bytesDone = 0; - while (bytesDone < n) - { - if (!m_IStream) - { + while (bytesDone < n) { + if (!m_IStream) { // Get a new input stream LOG_TRACE("Getting new stream, for document number " << m_CurrentDocNum); m_IStream = m_Searcher.search(m_CurrentDocNum, 1); - if (!m_IStream) - { + if (!m_IStream) { LOG_ERROR("Unable to connect to data store"); return this->endOfStream(s, n, bytesDone); } - if (m_IStream->bad()) - { + if (m_IStream->bad()) { LOG_ERROR("Error connecting to data store"); return this->endOfStream(s, n, bytesDone); } - if (m_IStream->fail()) - { + if (m_IStream->fail()) { m_EndOfStream = true; // This is not fatal - we just didn't find the given document number // Presume that we have finished @@ -101,20 +84,17 @@ std::streamsize CStateDecompressor::CDechunkFilter::read(char *s, std::streamsiz m_InputStreamWrapper.reset(new rapidjson::IStreamWrapper(*m_IStream)); m_Reader.reset(new rapidjson::Reader); - if (!this->readHeader()) - { + if (!this->readHeader()) { return this->endOfStream(s, n, bytesDone); } } this->handleRead(s, n, bytesDone); - if (m_EndOfStream) - { + if (m_EndOfStream) { return this->endOfStream(s, n, bytesDone); } - if ((m_IStream) && (m_IStream->eof())) - { + if ((m_IStream) && (m_IStream->eof())) { LOG_TRACE("Stream EOF"); m_IStream.reset(); ++m_CurrentDocNum; @@ -124,13 +104,10 @@ std::streamsize CStateDecompressor::CDechunkFilter::read(char *s, std::streamsiz return bytesDone; } -bool CStateDecompressor::CDechunkFilter::parseNext() -{ - if (m_Reader->HasParseError()) - { - const char *error(rapidjson::GetParseError_En(m_Reader->GetParseErrorCode())); - LOG_ERROR("Error parsing JSON at offset " << m_Reader->GetErrorOffset() << - ": " << ((error != 0) ? error : "No message")); +bool CStateDecompressor::CDechunkFilter::parseNext() { + if (m_Reader->HasParseError()) { + const char* error(rapidjson::GetParseError_En(m_Reader->GetParseErrorCode())); + LOG_ERROR("Error parsing JSON at offset " << m_Reader->GetErrorOffset() << ": " << ((error != 0) ? error : "No message")); return false; } @@ -139,12 +116,10 @@ bool CStateDecompressor::CDechunkFilter::parseNext() return m_Reader->IterativeParseNext(*m_InputStreamWrapper, m_Handler); } -bool CStateDecompressor::CDechunkFilter::readHeader() -{ +bool CStateDecompressor::CDechunkFilter::readHeader() { m_Reader->IterativeParseInit(); - if (this->parseNext() == false) - { + if (this->parseNext() == false) { LOG_ERROR("Failed to find valid JSON"); m_Initialised = false; m_IStream.reset(); @@ -152,22 +127,16 @@ bool CStateDecompressor::CDechunkFilter::readHeader() return false; } - while (this->parseNext()) - { + while (this->parseNext()) { if (m_Handler.s_Type == SRapidJsonHandler::E_TokenKey && - CStateCompressor::COMPRESSED_ATTRIBUTE.compare(0, CStateCompressor::COMPRESSED_ATTRIBUTE.length(), - m_Handler.s_CompressedChunk, m_Handler.s_CompressedChunkLength) == 0) - { - if (this->parseNext() && - m_Handler.s_Type == SRapidJsonHandler::E_TokenArrayStart) - { + CStateCompressor::COMPRESSED_ATTRIBUTE.compare( + 0, CStateCompressor::COMPRESSED_ATTRIBUTE.length(), m_Handler.s_CompressedChunk, m_Handler.s_CompressedChunkLength) == 0) { + if (this->parseNext() && m_Handler.s_Type == SRapidJsonHandler::E_TokenArrayStart) { m_Initialised = true; m_BufferOffset = 0; return true; } - } - else if (m_Handler.s_Type == SRapidJsonHandler::E_TokenObjectStart) - { + } else if (m_Handler.s_Type == SRapidJsonHandler::E_TokenObjectStart) { ++m_NestedLevel; } } @@ -180,17 +149,14 @@ bool CStateDecompressor::CDechunkFilter::readHeader() return false; } -void CStateDecompressor::CDechunkFilter::handleRead(char *s, std::streamsize n, std::streamsize &bytesDone) -{ +void CStateDecompressor::CDechunkFilter::handleRead(char* s, std::streamsize n, std::streamsize& bytesDone) { // Extract data from the JSON array "compressed" - if (!m_Initialised) - { + if (!m_Initialised) { return; } // Copy any outstanding data - if (m_BufferOffset > 0) - { + if (m_BufferOffset > 0) { std::streamsize toCopy = std::min((n - bytesDone), (m_Handler.s_CompressedChunkLength - m_BufferOffset)); std::memcpy(s + bytesDone, m_Handler.s_CompressedChunk + m_BufferOffset, toCopy); bytesDone += toCopy; @@ -198,36 +164,27 @@ void CStateDecompressor::CDechunkFilter::handleRead(char *s, std::streamsize n, } // Expect to have data in an array - while (bytesDone < n && - this->parseNext()) - { + while (bytesDone < n && this->parseNext()) { m_BufferOffset = 0; - if (m_Handler.s_Type == SRapidJsonHandler::E_TokenArrayEnd) - { + if (m_Handler.s_Type == SRapidJsonHandler::E_TokenArrayEnd) { LOG_TRACE("Come to end of array"); - if (this->parseNext() && - m_Handler.s_Type == SRapidJsonHandler::E_TokenKey && - CStateCompressor::END_OF_STREAM_ATTRIBUTE.compare(0, CStateCompressor::END_OF_STREAM_ATTRIBUTE.length(), - m_Handler.s_CompressedChunk, m_Handler.s_CompressedChunkLength) == 0) - { - LOG_DEBUG("Explicit end-of-stream marker found in document with index " << - m_CurrentDocNum); + if (this->parseNext() && m_Handler.s_Type == SRapidJsonHandler::E_TokenKey && + CStateCompressor::END_OF_STREAM_ATTRIBUTE.compare(0, + CStateCompressor::END_OF_STREAM_ATTRIBUTE.length(), + m_Handler.s_CompressedChunk, + m_Handler.s_CompressedChunkLength) == 0) { + LOG_DEBUG("Explicit end-of-stream marker found in document with index " << m_CurrentDocNum); // Read the value of the CStateCompressor::END_OF_STREAM_ATTRIBUTE field and the closing brace - if (this->parseNext() && - m_Handler.s_Type != SRapidJsonHandler::E_TokenBool) - { - LOG_ERROR("Expecting bool value to follow " << - CStateCompressor::END_OF_STREAM_ATTRIBUTE << ", got " << m_Handler.s_Type); + if (this->parseNext() && m_Handler.s_Type != SRapidJsonHandler::E_TokenBool) { + LOG_ERROR("Expecting bool value to follow " << CStateCompressor::END_OF_STREAM_ATTRIBUTE << ", got " + << m_Handler.s_Type); } - while (m_NestedLevel > 0) - { - if (this->parseNext() && - m_Handler.s_Type != SRapidJsonHandler::E_TokenObjectEnd) - { - LOG_ERROR("Expecting end object to follow " << - CStateCompressor::END_OF_STREAM_ATTRIBUTE << ", got " << m_Handler.s_Type); + while (m_NestedLevel > 0) { + if (this->parseNext() && m_Handler.s_Type != SRapidJsonHandler::E_TokenObjectEnd) { + LOG_ERROR("Expecting end object to follow " << CStateCompressor::END_OF_STREAM_ATTRIBUTE << ", got " + << m_Handler.s_Type); } --m_NestedLevel; @@ -242,13 +199,10 @@ void CStateDecompressor::CDechunkFilter::handleRead(char *s, std::streamsize n, break; } m_SentData = true; - if (m_Handler.s_CompressedChunkLength <= (n - bytesDone)) - { + if (m_Handler.s_CompressedChunkLength <= (n - bytesDone)) { std::memcpy(s + bytesDone, m_Handler.s_CompressedChunk, m_Handler.s_CompressedChunkLength); bytesDone += m_Handler.s_CompressedChunkLength; - } - else - { + } else { std::streamsize toCopy = n - bytesDone; std::memcpy(s + bytesDone, m_Handler.s_CompressedChunk, toCopy); bytesDone += toCopy; @@ -258,12 +212,10 @@ void CStateDecompressor::CDechunkFilter::handleRead(char *s, std::streamsize n, } } -std::streamsize CStateDecompressor::CDechunkFilter::endOfStream(char *s, std::streamsize n, std::streamsize bytesDone) -{ +std::streamsize CStateDecompressor::CDechunkFilter::endOfStream(char* s, std::streamsize n, std::streamsize bytesDone) { // return [ ] if not m_Initialised m_EndOfStream = true; - if (!m_SentData && bytesDone == 0) - { + if (!m_SentData && bytesDone == 0) { std::streamsize toCopy = std::min(std::streamsize(EMPTY_DATA.size()), n); ::memcpy(s, EMPTY_DATA.c_str(), toCopy); return toCopy; @@ -274,52 +226,44 @@ std::streamsize CStateDecompressor::CDechunkFilter::endOfStream(char *s, std::st return (bytesDone == 0) ? -1 : bytesDone; } -void CStateDecompressor::CDechunkFilter::close() -{ +void CStateDecompressor::CDechunkFilter::close() { } -bool CStateDecompressor::CDechunkFilter::SRapidJsonHandler::Bool(bool) -{ +bool CStateDecompressor::CDechunkFilter::SRapidJsonHandler::Bool(bool) { s_Type = E_TokenBool; return true; } -bool CStateDecompressor::CDechunkFilter::SRapidJsonHandler::String(const char *str, rapidjson::SizeType length, bool) -{ +bool CStateDecompressor::CDechunkFilter::SRapidJsonHandler::String(const char* str, rapidjson::SizeType length, bool) { s_Type = E_TokenString; s_CompressedChunk = str; s_CompressedChunkLength = length; return true; } -bool CStateDecompressor::CDechunkFilter::SRapidJsonHandler::Key(const char *str, rapidjson::SizeType length, bool) -{ +bool CStateDecompressor::CDechunkFilter::SRapidJsonHandler::Key(const char* str, rapidjson::SizeType length, bool) { s_Type = E_TokenKey; s_CompressedChunk = str; s_CompressedChunkLength = length; return true; } -bool CStateDecompressor::CDechunkFilter::SRapidJsonHandler::StartObject() -{ +bool CStateDecompressor::CDechunkFilter::SRapidJsonHandler::StartObject() { s_Type = E_TokenObjectStart; return true; } -bool CStateDecompressor::CDechunkFilter::SRapidJsonHandler::EndObject(rapidjson::SizeType) -{ +bool CStateDecompressor::CDechunkFilter::SRapidJsonHandler::EndObject(rapidjson::SizeType) { s_Type = E_TokenObjectEnd; return true; } -bool CStateDecompressor::CDechunkFilter::SRapidJsonHandler::StartArray() -{ +bool CStateDecompressor::CDechunkFilter::SRapidJsonHandler::StartArray() { s_Type = E_TokenArrayStart; return true; } -bool CStateDecompressor::CDechunkFilter::SRapidJsonHandler::EndArray(rapidjson::SizeType) -{ +bool CStateDecompressor::CDechunkFilter::SRapidJsonHandler::EndArray(rapidjson::SizeType) { s_Type = E_TokenArrayEnd; return true; } diff --git a/lib/core/CStateMachine.cc b/lib/core/CStateMachine.cc index 6fa9252d1d..711de846cd 100644 --- a/lib/core/CStateMachine.cc +++ b/lib/core/CStateMachine.cc @@ -6,26 +6,23 @@ #include -#include #include #include #include -#include +#include #include #include #include +#include #include #include #include -namespace ml -{ -namespace core -{ -namespace -{ +namespace ml { +namespace core { +namespace { // CStateMachine const std::string MACHINE_TAG("a"); @@ -38,41 +35,31 @@ const std::string TRANSITION_FUNCTION_TAG("c"); std::size_t BAD_MACHINE = boost::numeric::bounds::highest(); CFastMutex mutex; - } -void CStateMachine::expectedNumberMachines(std::size_t number) -{ +void CStateMachine::expectedNumberMachines(std::size_t number) { CScopedFastLock lock(mutex); ms_Machines.capacity(number); } -CStateMachine CStateMachine::create(const TStrVec &alphabet, - const TStrVec &states, - const TSizeVecVec &transitionFunction, - std::size_t state) -{ +CStateMachine +CStateMachine::create(const TStrVec& alphabet, const TStrVec& states, const TSizeVecVec& transitionFunction, std::size_t state) { // Validate that the alphabet, states, transition function, // and initial state are consistent. CStateMachine result; - if (state >= states.size()) - { + if (state >= states.size()) { LOG_ERROR("Invalid initial state: " << state); return result; } - if (alphabet.empty() || alphabet.size() != transitionFunction.size()) - { + if (alphabet.empty() || alphabet.size() != transitionFunction.size()) { LOG_ERROR("Bad alphabet: " << core::CContainerPrinter::print(alphabet)); return result; } - for (const auto &function : transitionFunction) - { - if (states.size() != function.size()) - { - LOG_ERROR("Bad transition function row: " - << core::CContainerPrinter::print(function)); + for (const auto& function : transitionFunction) { + if (states.size() != function.size()) { + LOG_ERROR("Bad transition function row: " << core::CContainerPrinter::print(function)); return result; } } @@ -84,13 +71,11 @@ CStateMachine CStateMachine::create(const TStrVec &alphabet, SLookupMachine machine(alphabet, states, transitionFunction); std::size_t size = ms_Machines.size(); - std::size_t m = find(0, size, machine); - if (m == size || machine != ms_Machines[m]) - { + std::size_t m = find(0, size, machine); + if (m == size || machine != ms_Machines[m]) { CScopedFastLock lock(mutex); m = find(0, ms_Machines.size(), machine); - if (m == ms_Machines.size()) - { + if (m == ms_Machines.size()) { ms_Machines.push_back(SMachine(alphabet, states, transitionFunction)); } } @@ -100,40 +85,32 @@ CStateMachine CStateMachine::create(const TStrVec &alphabet, return result; } -bool CStateMachine::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name = traverser.name(); +bool CStateMachine::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); RESTORE_BUILT_IN(MACHINE_TAG, m_Machine) RESTORE_BUILT_IN(STATE_TAG, m_State) - } - while (traverser.next()); + } while (traverser.next()); return true; } -void CStateMachine::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CStateMachine::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(MACHINE_TAG, m_Machine); inserter.insertValue(STATE_TAG, m_State); } -bool CStateMachine::bad() const -{ +bool CStateMachine::bad() const { return m_Machine == BAD_MACHINE; } -bool CStateMachine::apply(std::size_t symbol) -{ - const TSizeVecVec &table = ms_Machines[m_Machine].s_TransitionFunction; +bool CStateMachine::apply(std::size_t symbol) { + const TSizeVecVec& table = ms_Machines[m_Machine].s_TransitionFunction; - if (symbol >= table.size()) - { + if (symbol >= table.size()) { LOG_ERROR("Bad symbol " << symbol << " not in alphabet [" << table.size() << "]"); return false; } - if (m_State >= table[symbol].size()) - { + if (m_State >= table[symbol].size()) { LOG_ERROR("Bad state " << m_State << " not in states [" << table[symbol].size() << "]"); return false; } @@ -142,114 +119,79 @@ bool CStateMachine::apply(std::size_t symbol) return true; } -std::size_t CStateMachine::state() const -{ +std::size_t CStateMachine::state() const { return m_State; } -std::string CStateMachine::printState(std::size_t state) const -{ - if (state >= ms_Machines[m_Machine].s_States.size()) - { +std::string CStateMachine::printState(std::size_t state) const { + if (state >= ms_Machines[m_Machine].s_States.size()) { return "State Not Found"; } return ms_Machines[m_Machine].s_States[state]; } -std::string CStateMachine::printSymbol(std::size_t symbol) const -{ - if (symbol >= ms_Machines[m_Machine].s_Alphabet.size()) - { +std::string CStateMachine::printSymbol(std::size_t symbol) const { + if (symbol >= ms_Machines[m_Machine].s_Alphabet.size()) { return "Symbol Not Found"; } return ms_Machines[m_Machine].s_Alphabet[symbol]; } -uint64_t CStateMachine::checksum() const -{ - return CHashing::hashCombine(static_cast(m_Machine), - static_cast(m_State)); +uint64_t CStateMachine::checksum() const { + return CHashing::hashCombine(static_cast(m_Machine), static_cast(m_State)); } -std::size_t CStateMachine::numberMachines() -{ +std::size_t CStateMachine::numberMachines() { CScopedFastLock lock(mutex); return ms_Machines.size(); } -void CStateMachine::clear() -{ +void CStateMachine::clear() { CScopedFastLock lock(mutex); ms_Machines.clear(); } -std::size_t CStateMachine::find(std::size_t begin, - std::size_t end, - const SLookupMachine &machine) -{ - for (std::size_t i = begin; i < end; ++i) - { - if (machine == ms_Machines[i]) - { +std::size_t CStateMachine::find(std::size_t begin, std::size_t end, const SLookupMachine& machine) { + for (std::size_t i = begin; i < end; ++i) { + if (machine == ms_Machines[i]) { return i; } } return end; } -CStateMachine::CStateMachine() : - m_Machine(BAD_MACHINE), - m_State(0) -{} - -CStateMachine::SMachine::SMachine(const TStrVec &alphabet, - const TStrVec &states, - const TSizeVecVec &transitionFunction) : - s_Alphabet(alphabet), - s_States(states), - s_TransitionFunction(transitionFunction) -{} - -CStateMachine::SMachine::SMachine(const SMachine &other) : - s_Alphabet(other.s_Alphabet), - s_States(other.s_States), - s_TransitionFunction(other.s_TransitionFunction) -{} - -CStateMachine::SLookupMachine::SLookupMachine(const TStrVec &alphabet, - const TStrVec &states, - const TSizeVecVec &transitionFunction) : - s_Alphabet(alphabet), - s_States(states), - s_TransitionFunction(transitionFunction) -{} - -bool CStateMachine::SLookupMachine::operator==(const SMachine &rhs) const -{ - return boost::unwrap_ref(s_TransitionFunction) == rhs.s_TransitionFunction - && boost::unwrap_ref(s_Alphabet) == rhs.s_Alphabet - && boost::unwrap_ref(s_States) == rhs.s_States; +CStateMachine::CStateMachine() : m_Machine(BAD_MACHINE), m_State(0) { +} + +CStateMachine::SMachine::SMachine(const TStrVec& alphabet, const TStrVec& states, const TSizeVecVec& transitionFunction) + : s_Alphabet(alphabet), s_States(states), s_TransitionFunction(transitionFunction) { } -CStateMachine::CMachineDeque::CMachineDeque() : - m_Capacity(DEFAULT_CAPACITY), - m_NumberMachines(0) -{ +CStateMachine::SMachine::SMachine(const SMachine& other) + : s_Alphabet(other.s_Alphabet), s_States(other.s_States), s_TransitionFunction(other.s_TransitionFunction) { +} + +CStateMachine::SLookupMachine::SLookupMachine(const TStrVec& alphabet, const TStrVec& states, const TSizeVecVec& transitionFunction) + : s_Alphabet(alphabet), s_States(states), s_TransitionFunction(transitionFunction) { +} + +bool CStateMachine::SLookupMachine::operator==(const SMachine& rhs) const { + return boost::unwrap_ref(s_TransitionFunction) == rhs.s_TransitionFunction && boost::unwrap_ref(s_Alphabet) == rhs.s_Alphabet && + boost::unwrap_ref(s_States) == rhs.s_States; +} + +CStateMachine::CMachineDeque::CMachineDeque() : m_Capacity(DEFAULT_CAPACITY), m_NumberMachines(0) { m_Machines.push_back(TMachineVec()); m_Machines.back().reserve(m_Capacity); } -void CStateMachine::CMachineDeque::capacity(std::size_t capacity) -{ +void CStateMachine::CMachineDeque::capacity(std::size_t capacity) { m_Capacity = capacity; } -const CStateMachine::SMachine &CStateMachine::CMachineDeque::operator[](std::size_t pos) const -{ - for (const auto &machines : m_Machines) - { - if (pos < machines.size()) - { +const CStateMachine::SMachine& CStateMachine::CMachineDeque::operator[](std::size_t pos) const { + for (const auto& machines : m_Machines) { + if (pos < machines.size()) { return machines[pos]; } pos -= machines.size(); @@ -257,15 +199,12 @@ const CStateMachine::SMachine &CStateMachine::CMachineDeque::operator[](std::siz LOG_ABORT("Invalid index '" << pos << "'"); } -std::size_t CStateMachine::CMachineDeque::size() const -{ +std::size_t CStateMachine::CMachineDeque::size() const { return m_NumberMachines.load(std::memory_order_acquire); } -void CStateMachine::CMachineDeque::push_back(const SMachine &machine) -{ - if (m_Machines.back().size() == m_Capacity) - { +void CStateMachine::CMachineDeque::push_back(const SMachine& machine) { + if (m_Machines.back().size() == m_Capacity) { m_Machines.push_back(TMachineVec()); m_Machines.back().reserve(m_Capacity); } @@ -273,8 +212,7 @@ void CStateMachine::CMachineDeque::push_back(const SMachine &machine) m_NumberMachines.store(this->size() + 1, std::memory_order_release); } -void CStateMachine::CMachineDeque::clear() -{ +void CStateMachine::CMachineDeque::clear() { m_NumberMachines.store(0); m_Machines.clear(); m_Machines.push_back(TMachineVec()); @@ -282,6 +220,5 @@ void CStateMachine::CMachineDeque::clear() } CStateMachine::CMachineDeque CStateMachine::ms_Machines; - } } diff --git a/lib/core/CStatePersistInserter.cc b/lib/core/CStatePersistInserter.cc index 3d771a6427..60e3d14a24 100644 --- a/lib/core/CStatePersistInserter.cc +++ b/lib/core/CStatePersistInserter.cc @@ -5,35 +5,22 @@ */ #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - -CStatePersistInserter::~CStatePersistInserter() -{ +CStatePersistInserter::~CStatePersistInserter() { } -void CStatePersistInserter::insertValue(const std::string &name, - double value, - CIEEE754::EPrecision precision) -{ +void CStatePersistInserter::insertValue(const std::string& name, double value, CIEEE754::EPrecision precision) { this->insertValue(name, CStringUtils::typeToStringPrecise(value, precision)); } -CStatePersistInserter::CAutoLevel::CAutoLevel(const std::string &name, - CStatePersistInserter &inserter) - : m_Inserter(inserter) -{ +CStatePersistInserter::CAutoLevel::CAutoLevel(const std::string& name, CStatePersistInserter& inserter) : m_Inserter(inserter) { m_Inserter.newLevel(name); } -CStatePersistInserter::CAutoLevel::~CAutoLevel() -{ +CStatePersistInserter::CAutoLevel::~CAutoLevel() { m_Inserter.endLevel(); } - } } - diff --git a/lib/core/CStateRestoreTraverser.cc b/lib/core/CStateRestoreTraverser.cc index 19af9a5202..4c1d36afbc 100644 --- a/lib/core/CStateRestoreTraverser.cc +++ b/lib/core/CStateRestoreTraverser.cc @@ -7,57 +7,38 @@ #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -CStateRestoreTraverser::CStateRestoreTraverser() - : m_BadState(false) -{ +CStateRestoreTraverser::CStateRestoreTraverser() : m_BadState(false) { } -CStateRestoreTraverser::~CStateRestoreTraverser() -{ +CStateRestoreTraverser::~CStateRestoreTraverser() { } -bool CStateRestoreTraverser::haveBadState() const -{ +bool CStateRestoreTraverser::haveBadState() const { return m_BadState; } -void CStateRestoreTraverser::setBadState() -{ +void CStateRestoreTraverser::setBadState() { m_BadState = true; } -CStateRestoreTraverser::CAutoLevel::CAutoLevel(CStateRestoreTraverser &traverser) - : m_Traverser(traverser), - m_Descended(traverser.descend()), - m_BadState(false) -{ +CStateRestoreTraverser::CAutoLevel::CAutoLevel(CStateRestoreTraverser& traverser) + : m_Traverser(traverser), m_Descended(traverser.descend()), m_BadState(false) { } -void CStateRestoreTraverser::CAutoLevel::setBadState() -{ +void CStateRestoreTraverser::CAutoLevel::setBadState() { m_BadState = true; } -CStateRestoreTraverser::CAutoLevel::~CAutoLevel() -{ - if (m_Descended && !m_BadState) - { - if (m_Traverser.ascend() == false) - { +CStateRestoreTraverser::CAutoLevel::~CAutoLevel() { + if (m_Descended && !m_BadState) { + if (m_Traverser.ascend() == false) { LOG_ERROR("Inconsistency - could not ascend following previous descend"); m_Traverser.setBadState(); } } } - - } } - diff --git a/lib/core/CStatistics.cc b/lib/core/CStatistics.cc index a47d1d6ac6..c843c7a465 100644 --- a/lib/core/CStatistics.cc +++ b/lib/core/CStatistics.cc @@ -18,13 +18,10 @@ #include #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { -namespace -{ +namespace { using TGenericLineWriter = core::CRapidJsonLineWriter; @@ -37,11 +34,7 @@ static const std::string KEY_TAG("a"); static const std::string VALUE_TAG("b"); //! Helper function to add a string/int pair to JSON writer -void addStringInt(TGenericLineWriter &writer, - const std::string &name, - const std::string &description, - uint64_t stat) -{ +void addStringInt(TGenericLineWriter& writer, const std::string& name, const std::string& description, uint64_t stat) { writer.StartObject(); writer.String(NAME_TYPE); @@ -55,29 +48,23 @@ void addStringInt(TGenericLineWriter &writer, writer.EndObject(); } - } -CStatistics::CStatistics() -{ +CStatistics::CStatistics() { } -CStatistics &CStatistics::instance() -{ +CStatistics& CStatistics::instance() { return ms_Instance; } -CStat &CStatistics::stat(int index) -{ - if (static_cast(index) >= ms_Instance.m_Stats.size()) - { +CStat& CStatistics::stat(int index) { + if (static_cast(index) >= ms_Instance.m_Stats.size()) { LOG_ABORT("Bad index " << index); } return ms_Instance.m_Stats[index]; } -void CStatistics::staticsAcceptPersistInserter(CStatePersistInserter &inserter) -{ +void CStatistics::staticsAcceptPersistInserter(CStatePersistInserter& inserter) { // This does not guarantee that consistent statistics get persisted for a // background persistence. The analytics thread could be updating // statistics while this method is running. There is no danger of memory @@ -91,35 +78,25 @@ void CStatistics::staticsAcceptPersistInserter(CStatePersistInserter &inserter) // statistics, so that it would know it was not updating statistics during // the copy operation.) - for (int i = 0; i < stat_t::E_LastEnumStat; ++i) - { + for (int i = 0; i < stat_t::E_LastEnumStat; ++i) { inserter.insertValue(KEY_TAG, i); inserter.insertValue(VALUE_TAG, stat(i).value()); } } -bool CStatistics::staticsAcceptRestoreTraverser(CStateRestoreTraverser &traverser) -{ +bool CStatistics::staticsAcceptRestoreTraverser(CStateRestoreTraverser& traverser) { uint64_t value = 0; int key = 0; - do - { - const std::string &name = traverser.name(); - if (name == KEY_TAG) - { + do { + const std::string& name = traverser.name(); + if (name == KEY_TAG) { value = 0; - if (CStringUtils::stringToType(traverser.value(), - key) == false) - { + if (CStringUtils::stringToType(traverser.value(), key) == false) { LOG_ERROR("Invalid key value in " << traverser.value()); return false; } - } - else if (name == VALUE_TAG) - { - if (CStringUtils::stringToType(traverser.value(), - value) == false) - { + } else if (name == VALUE_TAG) { + if (CStringUtils::stringToType(traverser.value(), value) == false) { LOG_ERROR("Invalid stat value in " << traverser.value()); return false; } @@ -127,17 +104,14 @@ bool CStatistics::staticsAcceptRestoreTraverser(CStateRestoreTraverser &traverse key = 0; value = 0; } - } - while (traverser.next()); + } while (traverser.next()); return true; } CStatistics CStatistics::ms_Instance; - -std::ostream &operator<<(std::ostream &o, const CStatistics &/*stats*/) -{ +std::ostream& operator<<(std::ostream& o, const CStatistics& /*stats*/) { rapidjson::OStreamWrapper writeStream(o); TGenericLineWriter writer(writeStream); @@ -148,10 +122,7 @@ std::ostream &operator<<(std::ostream &o, const CStatistics &/*stats*/) "Number of new people not allowed", CStatistics::stat(stat_t::E_NumberNewPeopleNotAllowed).value()); - addStringInt(writer, - "E_NumberNewPeople", - "Number of new people created", - CStatistics::stat(stat_t::E_NumberNewPeople).value()); + addStringInt(writer, "E_NumberNewPeople", "Number of new people created", CStatistics::stat(stat_t::E_NumberNewPeople).value()); addStringInt(writer, "E_NumberNewPeopleRecycled", @@ -198,25 +169,18 @@ std::ostream &operator<<(std::ostream &o, const CStatistics &/*stats*/) "Number of new attributes not allowed", CStatistics::stat(stat_t::E_NumberNewAttributesNotAllowed).value()); - addStringInt(writer, - "E_NumberNewAttributes", - "Number of new attributes created", - CStatistics::stat(stat_t::E_NumberNewAttributes).value()); + addStringInt( + writer, "E_NumberNewAttributes", "Number of new attributes created", CStatistics::stat(stat_t::E_NumberNewAttributes).value()); addStringInt(writer, "E_NumberNewAttributesRecycled", "Number of new attributes recycled into existing space", CStatistics::stat(stat_t::E_NumberNewAttributesRecycled).value()); - addStringInt(writer, - "E_NumberByFields", - "Number of 'by' fields within the model", - CStatistics::stat(stat_t::E_NumberByFields).value()); + addStringInt(writer, "E_NumberByFields", "Number of 'by' fields within the model", CStatistics::stat(stat_t::E_NumberByFields).value()); - addStringInt(writer, - "E_NumberOverFields", - "Number of 'over' fields within the model", - CStatistics::stat(stat_t::E_NumberOverFields).value()); + addStringInt( + writer, "E_NumberOverFields", "Number of 'over' fields within the model", CStatistics::stat(stat_t::E_NumberOverFields).value()); addStringInt(writer, "E_NumberExcludedFrequentInvocations", @@ -246,4 +210,3 @@ std::ostream &operator<<(std::ostream &o, const CStatistics &/*stats*/) } // core } // ml - diff --git a/lib/core/CStopWatch.cc b/lib/core/CStopWatch.cc index 0c0224da8b..50c56183a1 100644 --- a/lib/core/CStopWatch.cc +++ b/lib/core/CStopWatch.cc @@ -9,28 +9,17 @@ #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -CStopWatch::CStopWatch(bool startRunning) - : m_IsRunning(false), - m_Start(0), - m_AccumulatedTime(0) -{ - if (startRunning) - { +CStopWatch::CStopWatch(bool startRunning) : m_IsRunning(false), m_Start(0), m_AccumulatedTime(0) { + if (startRunning) { this->start(); } } -void CStopWatch::start() -{ - if (m_IsRunning) - { +void CStopWatch::start() { + if (m_IsRunning) { LOG_ERROR("Stop watch already running"); return; } @@ -39,10 +28,8 @@ void CStopWatch::start() m_Start = m_MonotonicTime.milliseconds(); } -uint64_t CStopWatch::stop() -{ - if (!m_IsRunning) - { +uint64_t CStopWatch::stop() { + if (!m_IsRunning) { LOG_ERROR("Stop watch not running"); return m_AccumulatedTime; } @@ -54,10 +41,8 @@ uint64_t CStopWatch::stop() return m_AccumulatedTime; } -uint64_t CStopWatch::lap() -{ - if (!m_IsRunning) - { +uint64_t CStopWatch::lap() { + if (!m_IsRunning) { LOG_ERROR("Stop watch not running"); return m_AccumulatedTime; } @@ -65,27 +50,22 @@ uint64_t CStopWatch::lap() return m_AccumulatedTime + this->calcDuration(); } -bool CStopWatch::isRunning() const -{ +bool CStopWatch::isRunning() const { return m_IsRunning; } -void CStopWatch::reset(bool startRunning) -{ +void CStopWatch::reset(bool startRunning) { m_AccumulatedTime = 0; m_IsRunning = false; - if (startRunning) - { + if (startRunning) { this->start(); } } -uint64_t CStopWatch::calcDuration() -{ +uint64_t CStopWatch::calcDuration() { uint64_t current(m_MonotonicTime.milliseconds()); - if (current < m_Start) - { + if (current < m_Start) { LOG_WARN("Monotonic timer has gone backwards - " "stop watch timings will be inaccurate"); m_Start = current; @@ -94,8 +74,5 @@ uint64_t CStopWatch::calcDuration() return current - m_Start; } - - } } - diff --git a/lib/core/CStoredStringPtr.cc b/lib/core/CStoredStringPtr.cc index 2ac628d873..0d150af511 100644 --- a/lib/core/CStoredStringPtr.cc +++ b/lib/core/CStoredStringPtr.cc @@ -12,116 +12,89 @@ #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - -CStoredStringPtr::CStoredStringPtr() noexcept - : m_String{} -{ +CStoredStringPtr::CStoredStringPtr() noexcept : m_String{} { } -CStoredStringPtr::CStoredStringPtr(const std::string &str) - : m_String{boost::make_shared(str)} -{ +CStoredStringPtr::CStoredStringPtr(const std::string& str) : m_String{boost::make_shared(str)} { } -CStoredStringPtr::CStoredStringPtr(std::string &&str) - : m_String{boost::make_shared(std::move(str))} -{ +CStoredStringPtr::CStoredStringPtr(std::string&& str) : m_String{boost::make_shared(std::move(str))} { } -void CStoredStringPtr::swap(CStoredStringPtr &other) noexcept -{ +void CStoredStringPtr::swap(CStoredStringPtr& other) noexcept { m_String.swap(other.m_String); } -const std::string &CStoredStringPtr::operator*() const noexcept -{ +const std::string& CStoredStringPtr::operator*() const noexcept { return *m_String; } -const std::string *CStoredStringPtr::operator->() const noexcept -{ +const std::string* CStoredStringPtr::operator->() const noexcept { return m_String.get(); } -const std::string *CStoredStringPtr::get() const noexcept -{ +const std::string* CStoredStringPtr::get() const noexcept { return m_String.get(); } -CStoredStringPtr::operator bool() const noexcept -{ +CStoredStringPtr::operator bool() const noexcept { return m_String.get() != nullptr; } -bool CStoredStringPtr::isUnique() const noexcept -{ +bool CStoredStringPtr::isUnique() const noexcept { return m_String.unique(); } -bool CStoredStringPtr::operator==(std::nullptr_t rhs) const noexcept -{ +bool CStoredStringPtr::operator==(std::nullptr_t rhs) const noexcept { return m_String == rhs; } -bool CStoredStringPtr::operator!=(std::nullptr_t rhs) const noexcept -{ +bool CStoredStringPtr::operator!=(std::nullptr_t rhs) const noexcept { return m_String != rhs; } -bool CStoredStringPtr::operator==(const CStoredStringPtr &rhs) const noexcept -{ +bool CStoredStringPtr::operator==(const CStoredStringPtr& rhs) const noexcept { return m_String == rhs.m_String; } -bool CStoredStringPtr::operator!=(const CStoredStringPtr &rhs) const noexcept -{ +bool CStoredStringPtr::operator!=(const CStoredStringPtr& rhs) const noexcept { return m_String != rhs.m_String; } -bool CStoredStringPtr::operator<(const CStoredStringPtr &rhs) const noexcept -{ +bool CStoredStringPtr::operator<(const CStoredStringPtr& rhs) const noexcept { return m_String < rhs.m_String; } -std::size_t CStoredStringPtr::actualMemoryUsage() const -{ +std::size_t CStoredStringPtr::actualMemoryUsage() const { // We convert to a raw pointer here to avoid the "divide by use count" // feature of CMemory's shared_ptr handling return CMemory::dynamicSize(m_String.get()); } -void CStoredStringPtr::debugActualMemoryUsage(CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CStoredStringPtr::debugActualMemoryUsage(CMemoryUsage::TMemoryUsagePtr mem) const { // This is NOT the standard way to account for the memory of a // shared_ptr - do NOT copy this to other classes with shared_ptr members mem->addItem("m_String", this->actualMemoryUsage()); } -CStoredStringPtr CStoredStringPtr::makeStoredString(const std::string &str) -{ +CStoredStringPtr CStoredStringPtr::makeStoredString(const std::string& str) { return CStoredStringPtr(str); } -CStoredStringPtr CStoredStringPtr::makeStoredString(std::string &&str) -{ +CStoredStringPtr CStoredStringPtr::makeStoredString(std::string&& str) { return CStoredStringPtr(std::move(str)); } -std::size_t hash_value(const CStoredStringPtr &ptr) -{ +std::size_t hash_value(const CStoredStringPtr& ptr) { return boost::hash_value(ptr.m_String); } -void swap(CStoredStringPtr &lhs, CStoredStringPtr &rhs) -{ +void swap(CStoredStringPtr& lhs, CStoredStringPtr& rhs) { lhs.swap(rhs); } - } // core } // ml diff --git a/lib/core/CStrCaseCmp.cc b/lib/core/CStrCaseCmp.cc index db261b3752..975f9fde93 100644 --- a/lib/core/CStrCaseCmp.cc +++ b/lib/core/CStrCaseCmp.cc @@ -7,24 +7,15 @@ #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -int CStrCaseCmp::strCaseCmp(const char *s1, const char *s2) -{ +int CStrCaseCmp::strCaseCmp(const char* s1, const char* s2) { return ::strcasecmp(s1, s2); } -int CStrCaseCmp::strNCaseCmp(const char *s1, const char *s2, size_t n) -{ +int CStrCaseCmp::strNCaseCmp(const char* s1, const char* s2, size_t n) { return ::strncasecmp(s1, s2, n); } - - } } - diff --git a/lib/core/CStrCaseCmp_Windows.cc b/lib/core/CStrCaseCmp_Windows.cc index 5c208f919b..de00672f75 100644 --- a/lib/core/CStrCaseCmp_Windows.cc +++ b/lib/core/CStrCaseCmp_Windows.cc @@ -7,24 +7,15 @@ #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -int CStrCaseCmp::strCaseCmp(const char *s1, const char *s2) -{ +int CStrCaseCmp::strCaseCmp(const char* s1, const char* s2) { return ::_stricmp(s1, s2); } -int CStrCaseCmp::strNCaseCmp(const char *s1, const char *s2, size_t n) -{ +int CStrCaseCmp::strNCaseCmp(const char* s1, const char* s2, size_t n) { return ::_strnicmp(s1, s2, n); } - - } } - diff --git a/lib/core/CStrFTime.cc b/lib/core/CStrFTime.cc index 64076e5203..cefce87662 100644 --- a/lib/core/CStrFTime.cc +++ b/lib/core/CStrFTime.cc @@ -5,22 +5,11 @@ */ #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -size_t CStrFTime::strFTime(char *buf, - size_t maxSize, - const char *format, - struct tm *tm) -{ +size_t CStrFTime::strFTime(char* buf, size_t maxSize, const char* format, struct tm* tm) { return ::strftime(buf, maxSize, format, tm); } - - } } - diff --git a/lib/core/CStrFTime_Windows.cc b/lib/core/CStrFTime_Windows.cc index c09012a7aa..41f0116865 100644 --- a/lib/core/CStrFTime_Windows.cc +++ b/lib/core/CStrFTime_Windows.cc @@ -5,8 +5,8 @@ */ #include -#include #include +#include #include #include @@ -15,21 +15,12 @@ #include #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { // Work around the fact that Windows strftime() treats %z differently to Unix -size_t CStrFTime::strFTime(char *buf, - size_t maxSize, - const char *format, - struct tm *tm) -{ - if (buf == 0 || format == 0 || tm == 0) - { +size_t CStrFTime::strFTime(char* buf, size_t maxSize, const char* format, struct tm* tm) { + if (buf == 0 || format == 0 || tm == 0) { errno = EINVAL; return 0; } @@ -37,8 +28,7 @@ size_t CStrFTime::strFTime(char *buf, std::string adjFormat(format); size_t zPos(adjFormat.find("%z")); - if (zPos != std::string::npos) - { + if (zPos != std::string::npos) { // The approach is to replace the %z with a literal core_t::TTime localTm(CTimezone::instance().localToUtc(*tm)); core_t::TTime gmTm(::_mkgmtime(tm)); @@ -47,33 +37,25 @@ size_t CStrFTime::strFTime(char *buf, core_t::TTime diffHours(diffMinutes / 60); std::ostringstream strm; - strm << ((diffSeconds < 0) ? '-' : '+') - << std::setfill('0') << std::setw(2) << ::_abs64(diffHours) - << std::setfill('0') << std::setw(2) << (::_abs64(diffMinutes) % 60); + strm << ((diffSeconds < 0) ? '-' : '+') << std::setfill('0') << std::setw(2) << ::_abs64(diffHours) << std::setfill('0') + << std::setw(2) << (::_abs64(diffMinutes) % 60); adjFormat.replace(zPos, 2, strm.str()); } zPos = adjFormat.find("%Z"); - if (zPos != std::string::npos) - { - CTimezone &tz = CTimezone::instance(); + if (zPos != std::string::npos) { + CTimezone& tz = CTimezone::instance(); // +ve means in DST; -ve means unknown - if (tm->tm_isdst > 0) - { + if (tm->tm_isdst > 0) { adjFormat.replace(zPos, 2, tz.dstAbbrev()); - } - else - { + } else { adjFormat.replace(zPos, 2, tz.stdAbbrev()); } } return ::strftime(buf, maxSize, adjFormat.c_str(), tm); } - - } } - diff --git a/lib/core/CStrPTime.cc b/lib/core/CStrPTime.cc index e46df5bf61..52be67da9e 100644 --- a/lib/core/CStrPTime.cc +++ b/lib/core/CStrPTime.cc @@ -5,22 +5,13 @@ */ #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -char *CStrPTime::strPTime(const char *buf, - const char *format, - struct tm *tm) +char* CStrPTime::strPTime(const char* buf, const char* format, struct tm* tm) { return ::strptime(buf, format, tm); } - - } } - diff --git a/lib/core/CStrPTime_Linux.cc b/lib/core/CStrPTime_Linux.cc index d4bcc69be9..30283adba3 100644 --- a/lib/core/CStrPTime_Linux.cc +++ b/lib/core/CStrPTime_Linux.cc @@ -12,12 +12,8 @@ #include #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { // On Linux strptime() accepts %z, but doesn't make any changes // to the output based on it! @@ -35,13 +31,9 @@ namespace core // at the appropriate point in the buffer. // // (Interestingly, strptime() works fine on Mac OS X.) -char *CStrPTime::strPTime(const char *buf, - const char *format, - struct tm *tm) -{ +char* CStrPTime::strPTime(const char* buf, const char* format, struct tm* tm) { // If any of the inputs are NULL then do nothing - if (buf == 0 || format == 0 || tm == 0) - { + if (buf == 0 || format == 0 || tm == 0) { return 0; } @@ -49,129 +41,96 @@ char *CStrPTime::strPTime(const char *buf, // Replace %Z first if present size_t tznamePos(adjFormat.find("%Z")); - if (tznamePos != std::string::npos) - { + if (tznamePos != std::string::npos) { // Find the corresponding place in the buffer - char *excess(CStrPTime::strPTime(buf, - adjFormat.substr(0, tznamePos).c_str(), - tm)); - if (excess == 0) - { + char* excess(CStrPTime::strPTime(buf, adjFormat.substr(0, tznamePos).c_str(), tm)); + if (excess == 0) { return 0; } // Skip leading whitespace - while (::isspace(static_cast(*excess))) - { + while (::isspace(static_cast(*excess))) { ++excess; } // Only GMT and the standard and daylight saving timezone names for the // current timezone are supported, as per the strptime() man page std::string possTzName(excess); - if (possTzName.find("GMT") == 0) - { + if (possTzName.find("GMT") == 0) { adjFormat.replace(tznamePos, 2, "GMT"); - } - else if (possTzName.find(::tzname[0]) == 0) - { + } else if (possTzName.find(::tzname[0]) == 0) { adjFormat.replace(tznamePos, 2, ::tzname[0]); - } - else if (possTzName.find(::tzname[1]) == 0) - { + } else if (possTzName.find(::tzname[1]) == 0) { adjFormat.replace(tznamePos, 2, ::tzname[1]); - } - else - { + } else { return 0; } } // Check if the format specifier includes a %z size_t zPos(adjFormat.find("%z")); - if (zPos != std::string::npos) - { + if (zPos != std::string::npos) { // If there's anything except whitespace after the // %z it's too complicated - if (adjFormat.find_first_not_of(CStringUtils::WHITESPACE_CHARS, zPos + 2) != std::string::npos) - { + if (adjFormat.find_first_not_of(CStringUtils::WHITESPACE_CHARS, zPos + 2) != std::string::npos) { return 0; } adjFormat.erase(zPos); } - char *excess(::strptime(buf, adjFormat.c_str(), tm)); + char* excess(::strptime(buf, adjFormat.c_str(), tm)); // We only have more work to do if %z was in the string, and // the basic strptime() call worked - if (excess != 0 && zPos != std::string::npos) - { + if (excess != 0 && zPos != std::string::npos) { // Skip leading whitespace - while (::isspace(static_cast(*excess))) - { + while (::isspace(static_cast(*excess))) { ++excess; } // We expect something along the lines of +0000 or // -0500, i.e. a plus or minus sign followed by 4 digits int sign(0); - if (*excess == '+') - { + if (*excess == '+') { sign = 1; - } - else if (*excess == '-') - { + } else if (*excess == '-') { sign = -1; - } - else - { + } else { return 0; } ++excess; int hour(0); - if (*excess >= '0' && *excess <= '2') - { + if (*excess >= '0' && *excess <= '2') { hour += (*excess - '0') * 10; - } - else - { + } else { return 0; } ++excess; - if (*excess >= '0' && *excess <= '9') - { + if (*excess >= '0' && *excess <= '9') { hour += (*excess - '0'); - } - else - { + } else { return 0; } ++excess; int minute(0); - if (*excess >= '0' && *excess <= '5') - { + if (*excess >= '0' && *excess <= '5') { minute += (*excess - '0') * 10; - } - else - { + } else { return 0; } ++excess; - if (*excess >= '0' && *excess <= '9') - { + if (*excess >= '0' && *excess <= '9') { minute += (*excess - '0'); - } - else - { + } else { return 0; } @@ -205,8 +164,5 @@ char *CStrPTime::strPTime(const char *buf, return excess; } - - } } - diff --git a/lib/core/CStrPTime_Windows.cc b/lib/core/CStrPTime_Windows.cc index 3a00b9b843..bb0ce167d4 100644 --- a/lib/core/CStrPTime_Windows.cc +++ b/lib/core/CStrPTime_Windows.cc @@ -14,15 +14,10 @@ #include // We don't have a header for this on Windows, so declare it here -extern "C" -char *strptime(const char *buf, const char *fmt, struct tm *tm); - - -namespace ml -{ -namespace core -{ +extern "C" char* strptime(const char* buf, const char* fmt, struct tm* tm); +namespace ml { +namespace core { // Our Windows strptime() implementation supports the %z and %Z time formats. // However, on Windows struct tm doesn't have any members for GMT offset that @@ -37,13 +32,9 @@ namespace core // Also, since strptime() uses the C runtime globals _tzname[0] and _tzname[1], // whereas we might want to use a different timezone, we replace %Z in the // format string with a string obtained from the CTimezone singleton. -char *CStrPTime::strPTime(const char *buf, - const char *format, - struct tm *tm) -{ +char* CStrPTime::strPTime(const char* buf, const char* format, struct tm* tm) { // If any of the inputs are NULL then do nothing - if (buf == 0 || format == 0 || tm == 0) - { + if (buf == 0 || format == 0 || tm == 0) { return 0; } @@ -51,47 +42,33 @@ char *CStrPTime::strPTime(const char *buf, // Replace %Z first if present size_t tznamePos(adjFormat.find("%Z")); - if (tznamePos != std::string::npos) - { + if (tznamePos != std::string::npos) { // Find the corresponding place in the buffer - char *excess(CStrPTime::strPTime(buf, - adjFormat.substr(0, tznamePos).c_str(), - tm)); - if (excess == 0) - { + char* excess(CStrPTime::strPTime(buf, adjFormat.substr(0, tznamePos).c_str(), tm)); + if (excess == 0) { return 0; } // Skip leading whitespace - while (::isspace(static_cast(*excess))) - { + while (::isspace(static_cast(*excess))) { ++excess; } // Only GMT and the standard and daylight saving timezone names for the // current timezone are supported, as per the strptime() man page std::string possTzName(excess); - if (possTzName.find("GMT") == 0) - { + if (possTzName.find("GMT") == 0) { adjFormat.replace(tznamePos, 2, "GMT"); - } - else - { - CTimezone &tz = CTimezone::instance(); + } else { + CTimezone& tz = CTimezone::instance(); std::string stdAbbrev(tz.stdAbbrev()); - if (possTzName.find(stdAbbrev) == 0) - { + if (possTzName.find(stdAbbrev) == 0) { adjFormat.replace(tznamePos, 2, stdAbbrev); - } - else - { + } else { std::string dstAbbrev(tz.dstAbbrev()); - if (possTzName.find(dstAbbrev) == 0) - { + if (possTzName.find(dstAbbrev) == 0) { adjFormat.replace(tznamePos, 2, dstAbbrev); - } - else - { + } else { return 0; } } @@ -100,89 +77,68 @@ char *CStrPTime::strPTime(const char *buf, // Check if the format specifier includes a %z size_t zPos(adjFormat.find("%z")); - if (zPos != std::string::npos) - { + if (zPos != std::string::npos) { // If there's anything except whitespace after the // %z it's too complicated - if (adjFormat.find_first_not_of(CStringUtils::WHITESPACE_CHARS, zPos + 2) != std::string::npos) - { + if (adjFormat.find_first_not_of(CStringUtils::WHITESPACE_CHARS, zPos + 2) != std::string::npos) { return 0; } adjFormat.erase(zPos); } - char *excess(::strptime(buf, adjFormat.c_str(), tm)); + char* excess(::strptime(buf, adjFormat.c_str(), tm)); // We only have more work to do if %z was in the string, and // the basic strptime() call worked - if (excess != 0 && zPos != std::string::npos) - { + if (excess != 0 && zPos != std::string::npos) { // Skip leading whitespace - while (::isspace(static_cast(*excess))) - { + while (::isspace(static_cast(*excess))) { ++excess; } // We expect something along the lines of +0000 or // -0500, i.e. a plus or minus sign followed by 4 digits core_t::TTime sign(0); - if (*excess == '+') - { + if (*excess == '+') { sign = 1; - } - else if (*excess == '-') - { + } else if (*excess == '-') { sign = -1; - } - else - { + } else { return 0; } ++excess; core_t::TTime hour(0); - if (*excess >= '0' && *excess <= '2') - { + if (*excess >= '0' && *excess <= '2') { hour += (*excess - '0') * 10; - } - else - { + } else { return 0; } ++excess; - if (*excess >= '0' && *excess <= '9') - { + if (*excess >= '0' && *excess <= '9') { hour += (*excess - '0'); - } - else - { + } else { return 0; } ++excess; core_t::TTime minute(0); - if (*excess >= '0' && *excess <= '5') - { + if (*excess >= '0' && *excess <= '5') { minute += (*excess - '0') * 10; - } - else - { + } else { return 0; } ++excess; - if (*excess >= '0' && *excess <= '9') - { + if (*excess >= '0' && *excess <= '9') { minute += (*excess - '0'); - } - else - { + } else { return 0; } @@ -196,17 +152,13 @@ char *CStrPTime::strPTime(const char *buf, utcTime -= sign * minute * 60; utcTime -= sign * hour * 60 * 60; - CTimezone &tz = CTimezone::instance(); - if (tz.utcToLocal(utcTime, *tm) == false) - { + CTimezone& tz = CTimezone::instance(); + if (tz.utcToLocal(utcTime, *tm) == false) { return 0; } } return excess; } - - } } - diff --git a/lib/core/CStrTokR.cc b/lib/core/CStrTokR.cc index ac9973adb6..09c28b797f 100644 --- a/lib/core/CStrTokR.cc +++ b/lib/core/CStrTokR.cc @@ -7,19 +7,11 @@ #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -char *CStrTokR::strTokR(char *str, const char *sep, char **lasts) -{ +char* CStrTokR::strTokR(char* str, const char* sep, char** lasts) { return ::strtok_r(str, sep, lasts); } - - } } - diff --git a/lib/core/CStrTokR_Windows.cc b/lib/core/CStrTokR_Windows.cc index e0d85dec4a..27b78488c4 100644 --- a/lib/core/CStrTokR_Windows.cc +++ b/lib/core/CStrTokR_Windows.cc @@ -7,19 +7,11 @@ #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -char *CStrTokR::strTokR(char *str, const char *sep, char **lasts) -{ +char* CStrTokR::strTokR(char* str, const char* sep, char** lasts) { return ::strtok_s(str, sep, lasts); } - - } } - diff --git a/lib/core/CStringCache.cc b/lib/core/CStringCache.cc index a245de6a72..5db81f5fed 100644 --- a/lib/core/CStringCache.cc +++ b/lib/core/CStringCache.cc @@ -9,61 +9,47 @@ #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { const std::string CStringCache::EMPTY_STRING; - -CStringCache::CStringCache() - : m_HaveCopyOnWriteStrings(false) -{ +CStringCache::CStringCache() : m_HaveCopyOnWriteStrings(false) { // Detect whether strings have copy-on-write semantics - if they don't then // this class may be of little value std::string test1("test copy-on-write"); std::string test2(test1); - if (test2.data() == test1.data()) - { + if (test2.data() == test1.data()) { m_HaveCopyOnWriteStrings = true; } } -bool CStringCache::haveCopyOnWriteStrings() const -{ +bool CStringCache::haveCopyOnWriteStrings() const { return m_HaveCopyOnWriteStrings; } -const std::string &CStringCache::stringFor(const char *str) -{ +const std::string& CStringCache::stringFor(const char* str) { // Stop processing NULL input immediately so that subsequent code doesn't // have to worry about NULL pointers - if (str == 0) - { + if (str == 0) { return EMPTY_STRING; } return this->stringFor(str, ::strlen(str)); } -const std::string &CStringCache::stringFor(const char *str, size_t length) -{ +const std::string& CStringCache::stringFor(const char* str, size_t length) { // Stop processing NULL input immediately so that subsequent code doesn't // have to worry about NULL pointers - if (length == 0 || str == 0) - { + if (length == 0 || str == 0) { return EMPTY_STRING; } - CCharPHash hash(str, str + length); + CCharPHash hash(str, str + length); CCharPStrEqual equal(length); TStrUSetCItr iter = m_Cache.find(str, hash, equal); - if (iter == m_Cache.end()) - { + if (iter == m_Cache.end()) { // This involves a temporary string creation, rehash, and other // processing. This is why using this class is only worthwhile if a // small number of cached strings are seen repeatedly. @@ -73,15 +59,11 @@ const std::string &CStringCache::stringFor(const char *str, size_t length) return *iter; } -size_t CStringCache::CStrHash::operator()(const std::string &str) const -{ +size_t CStringCache::CStrHash::operator()(const std::string& str) const { // It is essential that the result of this hash matches that of the method // below size_t hash(0); - for (std::string::const_iterator iter = str.begin(); - iter != str.end(); - ++iter) - { + for (std::string::const_iterator iter = str.begin(); iter != str.end(); ++iter) { hash *= 17; hash += *iter; } @@ -89,42 +71,27 @@ size_t CStringCache::CStrHash::operator()(const std::string &str) const } // Caller is responsible for ensuring that str is not NULL and end > str -inline -CStringCache::CCharPHash::CCharPHash(const char *str, const char *end) - : m_Hash(0) -{ +inline CStringCache::CCharPHash::CCharPHash(const char* str, const char* end) : m_Hash(0) { // It is essential that the result of this hash matches that of the method // above size_t hash(*str); - while (++str != end) - { + while (++str != end) { hash *= 17; hash += *str; } m_Hash = hash; } -inline -size_t CStringCache::CCharPHash::operator()(const char *) const -{ +inline size_t CStringCache::CCharPHash::operator()(const char*) const { return m_Hash; } -inline -CStringCache::CCharPStrEqual::CCharPStrEqual(size_t length) - : m_Length(length) -{ +inline CStringCache::CCharPStrEqual::CCharPStrEqual(size_t length) : m_Length(length) { } // Caller is responsible for ensuring that lhs is not NULL -inline -bool CStringCache::CCharPStrEqual::operator()(const char *lhs, const std::string &rhs) const -{ - return m_Length == rhs.length() && - ::memcmp(lhs, rhs.data(), m_Length) == 0; +inline bool CStringCache::CCharPStrEqual::operator()(const char* lhs, const std::string& rhs) const { + return m_Length == rhs.length() && ::memcmp(lhs, rhs.data(), m_Length) == 0; } - - } } - diff --git a/lib/core/CStringSimilarityTester.cc b/lib/core/CStringSimilarityTester.cc index f5726a3882..f21977684d 100644 --- a/lib/core/CStringSimilarityTester.cc +++ b/lib/core/CStringSimilarityTester.cc @@ -7,53 +7,34 @@ #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { const int CStringSimilarityTester::MINUS_INFINITE_INT(std::numeric_limits::min()); - -CStringSimilarityTester::CStringSimilarityTester() - : m_Compressor(true) -{ +CStringSimilarityTester::CStringSimilarityTester() : m_Compressor(true) { } -bool CStringSimilarityTester::similarity(const std::string &first, - const std::string &second, - double &result) const -{ +bool CStringSimilarityTester::similarity(const std::string& first, const std::string& second, double& result) const { size_t firstCompLength(0); size_t secondCompLength(0); - if (m_Compressor.addString(first) == false || - m_Compressor.compressedLength(true, firstCompLength) == false || - m_Compressor.addString(second) == false || - m_Compressor.compressedLength(true, secondCompLength) == false) - { + if (m_Compressor.addString(first) == false || m_Compressor.compressedLength(true, firstCompLength) == false || + m_Compressor.addString(second) == false || m_Compressor.compressedLength(true, secondCompLength) == false) { // The compressor will have logged the detailed reason LOG_ERROR("Compression problem"); return false; } - return this->similarity(first, - firstCompLength, - second, - secondCompLength, - result); + return this->similarity(first, firstCompLength, second, secondCompLength, result); } -bool CStringSimilarityTester::similarity(const std::string &first, +bool CStringSimilarityTester::similarity(const std::string& first, size_t firstCompLength, - const std::string &second, + const std::string& second, size_t secondCompLength, - double &result) const -{ - if (first.empty() && second.empty()) - { + double& result) const { + if (first.empty() && second.empty()) { // Special case that will cause a divide by zero error if // we're not careful result = 1.0; @@ -63,13 +44,9 @@ bool CStringSimilarityTester::similarity(const std::string &first, size_t firstPlusSecondCompLength(0); size_t secondPlusFirstCompLength(0); - if (m_Compressor.addString(first) == false || - m_Compressor.addString(second) == false || - m_Compressor.compressedLength(true, firstPlusSecondCompLength) == false || - m_Compressor.addString(second) == false || - m_Compressor.addString(first) == false || - m_Compressor.compressedLength(true, secondPlusFirstCompLength) == false) - { + if (m_Compressor.addString(first) == false || m_Compressor.addString(second) == false || + m_Compressor.compressedLength(true, firstPlusSecondCompLength) == false || m_Compressor.addString(second) == false || + m_Compressor.addString(first) == false || m_Compressor.compressedLength(true, secondPlusFirstCompLength) == false) { // The compressor will have logged the detailed reason LOG_ERROR("Compression problem"); return false; @@ -88,25 +65,16 @@ bool CStringSimilarityTester::similarity(const std::string &first, return true; } -bool CStringSimilarityTester::compressedLengthOf(const std::string &str, - size_t &length) const -{ - return m_Compressor.addString(str) && - m_Compressor.compressedLength(true, length); +bool CStringSimilarityTester::compressedLengthOf(const std::string& str, size_t& length) const { + return m_Compressor.addString(str) && m_Compressor.compressedLength(true, length); } -int **CStringSimilarityTester::setupBerghelRoachMatrix(int maxDist, - TScopedIntArray &dataArray, - TScopedIntPArray &matrixArray) -{ +int** CStringSimilarityTester::setupBerghelRoachMatrix(int maxDist, TScopedIntArray& dataArray, TScopedIntPArray& matrixArray) { // Ensure that we don't suffer memory corruption due to an incorrect input - if (maxDist <= 0) - { + if (maxDist <= 0) { LOG_ERROR("Programmatic error - maxDist too small " << maxDist); return 0; - } - else if (maxDist >= std::numeric_limits::max() / 2) - { + } else if (maxDist >= std::numeric_limits::max() / 2) { LOG_ERROR("Programmatic error - maxDist too big " << maxDist); return 0; } @@ -123,9 +91,8 @@ int **CStringSimilarityTester::setupBerghelRoachMatrix(int maxDist, // The column indexes go from -1 to maxDist inclusive, so add 1 to the // pointer such that row[-1] points to the beginning of the row memory - int *rowZero(dataArray.get() + 1); - for (int row = 0; row < rows; ++row) - { + int* rowZero(dataArray.get() + 1); + for (int row = 0; row < rows; ++row) { matrixArray[row] = rowZero; rowZero += columns; } @@ -134,14 +101,13 @@ int **CStringSimilarityTester::setupBerghelRoachMatrix(int maxDist, // the pointer such that matrix[-maxDist] points to the first row memory. // (Then matrix[-maxDist][-1] will point to the very beginning of the // memory.) - int **matrix; + int** matrix; matrix = matrixArray.get() + maxDist; // Initialise the matrix. This is an optimised version of the pseudo-code // near the end of the sub-section titled "The Algorithm Kernel" in // http://berghel.net/publications/asm/asm.pdf - for (int k = -maxDist; k < 0; ++k) - { + for (int k = -maxDist; k < 0; ++k) { // Here note that (::abs(k) - 1) == -1 - k; int absKMinusOne(-1 - k); matrix[k][absKMinusOne - 1] = MINUS_INFINITE_INT; @@ -149,8 +115,7 @@ int **CStringSimilarityTester::setupBerghelRoachMatrix(int maxDist, } // k = 0, so (::abs(k) - 1) == -1 matrix[0][-1] = -1; - for (int k = 1; k <= maxDist; ++k) - { + for (int k = 1; k <= maxDist; ++k) { // Here note that (::abs(k) - 1) == k - 1; int absKMinusOne(k - 1); matrix[k][absKMinusOne - 1] = MINUS_INFINITE_INT; @@ -159,8 +124,5 @@ int **CStringSimilarityTester::setupBerghelRoachMatrix(int maxDist, return matrix; } - - } } - diff --git a/lib/core/CStringUtils.cc b/lib/core/CStringUtils.cc index b3f75f9315..48cb67a054 100644 --- a/lib/core/CStringUtils.cc +++ b/lib/core/CStringUtils.cc @@ -21,16 +21,13 @@ #include #include - -namespace -{ +namespace { //! In order to avoid a failure on read we need to account for the rounding //! performed by strtod which can result in the rounded value being outside //! the representable range for values near minimum and maximum double. -double clampToReadable(double x) -{ +double clampToReadable(double x) { static const double SMALLEST = -(1.0 - 5e-16) * std::numeric_limits::max(); - static const double LARGEST = (1.0 - 5e-16) * std::numeric_limits::max(); + static const double LARGEST = (1.0 - 5e-16) * std::numeric_limits::max(); return (x < SMALLEST ? SMALLEST : x > LARGEST ? LARGEST : x); } @@ -38,51 +35,39 @@ double clampToReadable(double x) // require it, call locale() during the static initialisation phase of the // program. Of course, the locale may already be constructed before this if // another static object has used it. -const std::locale &DO_NOT_USE_THIS_VARIABLE = ml::core::CStringUtils::locale(); +const std::locale& DO_NOT_USE_THIS_VARIABLE = ml::core::CStringUtils::locale(); } - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { // Initialise static class members const std::string CStringUtils::WHITESPACE_CHARS(" \t\r\n\v\f"); - -int CStringUtils::utf8ByteType(char c) -{ +int CStringUtils::utf8ByteType(char c) { unsigned char u = static_cast(c); - if ((u & 0x80) == 0) - { + if ((u & 0x80) == 0) { // Single byte character return 1; } - if ((u & 0xC0) == 0x80) - { + if ((u & 0xC0) == 0x80) { // Continuation character return -1; } - if ((u & 0xE0) == 0xC0) - { + if ((u & 0xE0) == 0xC0) { // Start of two byte character return 2; } - if ((u & 0xF0) == 0xE0) - { + if ((u & 0xF0) == 0xE0) { // Start of three byte character return 3; } - if ((u & 0xF8) == 0xF0) - { + if ((u & 0xF8) == 0xF0) { // Start of four byte character return 4; } - if ((u & 0xFC) == 0xF8) - { + if ((u & 0xFC) == 0xF8) { // Start of five byte character return 5; } @@ -90,35 +75,23 @@ int CStringUtils::utf8ByteType(char c) return 6; } -std::string CStringUtils::toLower(std::string str) -{ - std::transform(str.begin(), - str.end(), - str.begin(), - &::tolower); +std::string CStringUtils::toLower(std::string str) { + std::transform(str.begin(), str.end(), str.begin(), &::tolower); return str; } -std::string CStringUtils::toUpper(std::string str) -{ - std::transform(str.begin(), - str.end(), - str.begin(), - &::toupper); +std::string CStringUtils::toUpper(std::string str) { + std::transform(str.begin(), str.end(), str.begin(), &::toupper); return str; } -size_t CStringUtils::numMatches(const std::string &str, - const std::string &word) -{ - size_t count(0); +size_t CStringUtils::numMatches(const std::string& str, const std::string& word) { + size_t count(0); std::string::size_type pos(0); - while (pos != std::string::npos) - { + while (pos != std::string::npos) { pos = str.find(word, pos); - if (pos != std::string::npos) - { + if (pos != std::string::npos) { ++count; // start next search after this word @@ -129,22 +102,17 @@ size_t CStringUtils::numMatches(const std::string &str, return count; } -void CStringUtils::trimWhitespace(std::string &str) -{ +void CStringUtils::trimWhitespace(std::string& str) { CStringUtils::trim(WHITESPACE_CHARS, str); } -void CStringUtils::trim(const std::string &toTrim, - std::string &str) -{ - if (toTrim.empty() || str.empty()) - { +void CStringUtils::trim(const std::string& toTrim, std::string& str) { + if (toTrim.empty() || str.empty()) { return; } std::string::size_type pos = str.find_last_not_of(toTrim); - if (pos == std::string::npos) - { + if (pos == std::string::npos) { // Special case - entire string is being trimmed str.clear(); return; @@ -153,33 +121,24 @@ void CStringUtils::trim(const std::string &toTrim, str.erase(pos + 1); pos = str.find_first_not_of(toTrim); - if (pos != std::string::npos && pos > 0) - { + if (pos != std::string::npos && pos > 0) { str.erase(0, pos); } } -std::string CStringUtils::normaliseWhitespace(const std::string &str) -{ +std::string CStringUtils::normaliseWhitespace(const std::string& str) { std::string result; result.reserve(str.length()); bool outputSpace(true); - for (std::string::const_iterator iter = str.begin(); - iter != str.end(); - ++iter) - { + for (std::string::const_iterator iter = str.begin(); iter != str.end(); ++iter) { char current(*iter); - if (::isspace(static_cast(current))) - { - if (outputSpace) - { + if (::isspace(static_cast(current))) { + if (outputSpace) { outputSpace = false; result += ' '; } - } - else - { + } else { outputSpace = true; result += current; } @@ -188,23 +147,17 @@ std::string CStringUtils::normaliseWhitespace(const std::string &str) return result; } -size_t CStringUtils::replace(const std::string &from, - const std::string &to, - std::string &str) -{ - if (from == to) - { +size_t CStringUtils::replace(const std::string& from, const std::string& to, std::string& str) { + if (from == to) { return 0; } size_t count(0); std::string::size_type pos(0); - while (pos != std::string::npos) - { + while (pos != std::string::npos) { pos = str.find(from, pos); - if (pos == std::string::npos) - { + if (pos == std::string::npos) { return count; } @@ -217,18 +170,13 @@ size_t CStringUtils::replace(const std::string &from, return count; } -size_t CStringUtils::replaceFirst(const std::string &from, - const std::string &to, - std::string &str) -{ - if (from == to) - { +size_t CStringUtils::replaceFirst(const std::string& from, const std::string& to, std::string& str) { + if (from == to) { return 0; } std::string::size_type pos = str.find(from); - if (pos == std::string::npos) - { + if (pos == std::string::npos) { return 0; } @@ -237,21 +185,15 @@ size_t CStringUtils::replaceFirst(const std::string &from, return 1; } -void CStringUtils::escape(char escape, - const std::string &toEscape, - std::string &str) -{ - if (escape == '\0' || toEscape.empty()) - { +void CStringUtils::escape(char escape, const std::string& toEscape, std::string& str) { + if (escape == '\0' || toEscape.empty()) { return; } std::string::size_type pos(0); - while (pos < str.length()) - { + while (pos < str.length()) { pos = str.find_first_of(toEscape, pos); - if (pos == std::string::npos) - { + if (pos == std::string::npos) { break; } @@ -263,25 +205,18 @@ void CStringUtils::escape(char escape, } } -void CStringUtils::unEscape(char escape, std::string &str) -{ - if (escape == '\0') - { +void CStringUtils::unEscape(char escape, std::string& str) { + if (escape == '\0') { return; } std::string::size_type pos(0); - while (pos < str.length()) - { + while (pos < str.length()) { pos = str.find(escape, pos); - if (pos == std::string::npos) - { + if (pos == std::string::npos) { break; - } - else if (pos + 1 == str.length()) - { - LOG_WARN("String to be unescaped ends with escape character: " << - str); + } else if (pos + 1 == str.length()) { + LOG_WARN("String to be unescaped ends with escape character: " << str); } str.erase(pos, 1); @@ -292,8 +227,7 @@ void CStringUtils::unEscape(char escape, std::string &str) } } -std::string CStringUtils::_typeToString(const unsigned long long &i) -{ +std::string CStringUtils::_typeToString(const unsigned long long& i) { char buf[4 * sizeof(unsigned long long)]; ::memset(buf, 0, sizeof(buf)); @@ -302,8 +236,7 @@ std::string CStringUtils::_typeToString(const unsigned long long &i) return buf; } -std::string CStringUtils::_typeToString(const unsigned long &i) -{ +std::string CStringUtils::_typeToString(const unsigned long& i) { char buf[4 * sizeof(unsigned long)]; ::memset(buf, 0, sizeof(buf)); @@ -312,8 +245,7 @@ std::string CStringUtils::_typeToString(const unsigned long &i) return buf; } -std::string CStringUtils::_typeToString(const unsigned int &i) -{ +std::string CStringUtils::_typeToString(const unsigned int& i) { char buf[4 * sizeof(unsigned int)]; ::memset(buf, 0, sizeof(buf)); @@ -322,8 +254,7 @@ std::string CStringUtils::_typeToString(const unsigned int &i) return buf; } -std::string CStringUtils::_typeToString(const unsigned short &i) -{ +std::string CStringUtils::_typeToString(const unsigned short& i) { char buf[4 * sizeof(unsigned short)]; ::memset(buf, 0, sizeof(buf)); @@ -332,8 +263,7 @@ std::string CStringUtils::_typeToString(const unsigned short &i) return buf; } -std::string CStringUtils::_typeToString(const long long &i) -{ +std::string CStringUtils::_typeToString(const long long& i) { char buf[4 * sizeof(long long)]; ::memset(buf, 0, sizeof(buf)); @@ -342,8 +272,7 @@ std::string CStringUtils::_typeToString(const long long &i) return buf; } -std::string CStringUtils::_typeToString(const long &i) -{ +std::string CStringUtils::_typeToString(const long& i) { char buf[4 * sizeof(long)]; ::memset(buf, 0, sizeof(buf)); @@ -352,8 +281,7 @@ std::string CStringUtils::_typeToString(const long &i) return buf; } -std::string CStringUtils::_typeToString(const int &i) -{ +std::string CStringUtils::_typeToString(const int& i) { char buf[4 * sizeof(int)]; ::memset(buf, 0, sizeof(buf)); @@ -362,8 +290,7 @@ std::string CStringUtils::_typeToString(const int &i) return buf; } -std::string CStringUtils::_typeToString(const short &i) -{ +std::string CStringUtils::_typeToString(const short& i) { char buf[4 * sizeof(short)]; ::memset(buf, 0, sizeof(buf)); @@ -372,13 +299,11 @@ std::string CStringUtils::_typeToString(const short &i) return buf; } -std::string CStringUtils::_typeToString(const bool &b) -{ +std::string CStringUtils::_typeToString(const bool& b) { return (b ? "true" : "false"); } -std::string CStringUtils::_typeToString(const double &i) -{ +std::string CStringUtils::_typeToString(const double& i) { // Note the extra large buffer here, which is because the format string is // "%f" rather than "%g", which means we could be printing a 308 digit // number without resorting to scientific notation @@ -390,24 +315,20 @@ std::string CStringUtils::_typeToString(const double &i) return buf; } -std::string CStringUtils::_typeToString(const char *str) -{ +std::string CStringUtils::_typeToString(const char* str) { return str; } -std::string CStringUtils::_typeToString(const char &c) -{ +std::string CStringUtils::_typeToString(const char& c) { return std::string(1, c); } // This may seem silly, but it allows generic code to be written -const std::string &CStringUtils::_typeToString(const std::string &str) -{ +const std::string& CStringUtils::_typeToString(const std::string& str) { return str; } -std::string CStringUtils::typeToStringPretty(double d) -{ +std::string CStringUtils::typeToStringPretty(double d) { // Maximum size = 1 (for sign) // + 7 (for # s.f.) // + 1 (for decimal point) @@ -421,9 +342,7 @@ std::string CStringUtils::typeToStringPretty(double d) return buf; } -std::string CStringUtils::typeToStringPrecise(double d, - CIEEE754::EPrecision precision) -{ +std::string CStringUtils::typeToStringPrecise(double d, CIEEE754::EPrecision precision) { // Just use a large enough buffer to hold maximum precision. char buf[4 * sizeof(double)]; ::memset(buf, 0, sizeof(buf)); @@ -439,24 +358,19 @@ std::string CStringUtils::typeToStringPrecise(double d, // gives 4.999999e-1 rather than the correctly rounded value 0.5. int ret = 0; - switch (precision) - { + switch (precision) { case CIEEE754::E_HalfPrecision: - ret = std::fabs(d) < 1.0 && d != 0.0 ? - ::sprintf(buf, "%.2e", clampToReadable(CIEEE754::round(d, CIEEE754::E_HalfPrecision))) : - ::sprintf(buf, "%.3g", clampToReadable(CIEEE754::round(d, CIEEE754::E_HalfPrecision))); + ret = std::fabs(d) < 1.0 && d != 0.0 ? ::sprintf(buf, "%.2e", clampToReadable(CIEEE754::round(d, CIEEE754::E_HalfPrecision))) + : ::sprintf(buf, "%.3g", clampToReadable(CIEEE754::round(d, CIEEE754::E_HalfPrecision))); break; case CIEEE754::E_SinglePrecision: - ret = std::fabs(d) < 1.0 && d != 0.0 ? - ::sprintf(buf, "%.6e", clampToReadable(CIEEE754::round(d, CIEEE754::E_SinglePrecision))) : - ::sprintf(buf, "%.7g", clampToReadable(CIEEE754::round(d, CIEEE754::E_SinglePrecision))); + ret = std::fabs(d) < 1.0 && d != 0.0 ? ::sprintf(buf, "%.6e", clampToReadable(CIEEE754::round(d, CIEEE754::E_SinglePrecision))) + : ::sprintf(buf, "%.7g", clampToReadable(CIEEE754::round(d, CIEEE754::E_SinglePrecision))); break; case CIEEE754::E_DoublePrecision: - ret = std::fabs(d) < 1.0 && d != 0.0 ? - ::sprintf(buf, "%.14e", clampToReadable(d)) : - ::sprintf(buf, "%.15g", clampToReadable(d)); + ret = std::fabs(d) < 1.0 && d != 0.0 ? ::sprintf(buf, "%.14e", clampToReadable(d)) : ::sprintf(buf, "%.15g", clampToReadable(d)); break; } @@ -464,57 +378,43 @@ std::string CStringUtils::typeToStringPrecise(double d, // 123.45e010 with 123.45e10 and 123.45e-010 with 123.45e-10. // Also it is inefficient to output trailing zeros, i.e. // 1.23456000000000e-11 so we strip these off in the following. - if (ret > 2) - { + if (ret > 2) { // Look for an 'e' - char *ptr(static_cast(::memchr(buf, 'e', ret - 1))); - if (ptr != 0) - { + char* ptr(static_cast(::memchr(buf, 'e', ret - 1))); + if (ptr != 0) { bool edit = false; bool minus = false; // Strip off any trailing zeros and a trailing point. - char *bwd = ptr; - for (;;) - { + char* bwd = ptr; + for (;;) { --bwd; - if (*bwd == '0' || *bwd == '.') - { + if (*bwd == '0' || *bwd == '.') { edit = true; - } - else - { + } else { break; } } // Strip off any leading zeros in the exponent. - char *fwd = ptr; - for (;;) - { + char* fwd = ptr; + for (;;) { ++fwd; - if (*fwd == '-') - { + if (*fwd == '-') { minus = true; - } - else if (*fwd == '+' || *fwd == '0') - { + } else if (*fwd == '+' || *fwd == '0') { edit = true; - } - else - { + } else { break; } } - if (edit) - { + if (edit) { std::string adjResult; adjResult.reserve(ret - 1); // mantissa adjResult.assign(buf, bwd + 1); - if (::isdigit(static_cast(*fwd))) - { + if (::isdigit(static_cast(*fwd))) { adjResult.append(minus ? "e-" : "e"); // exponent adjResult.append(fwd); @@ -527,49 +427,45 @@ std::string CStringUtils::typeToStringPrecise(double d, return buf; } -bool CStringUtils::_stringToType(bool silent, - const std::string &str, - unsigned long long &i) -{ - if (str.empty()) - { - if (!silent) - { +bool CStringUtils::_stringToType(bool silent, const std::string& str, unsigned long long& i) { + if (str.empty()) { + if (!silent) { LOG_ERROR("Unable to convert empty string to unsigned long long"); } return false; } - char *endPtr(0); + char* endPtr(0); errno = 0; unsigned long long ret(::strtoull(str.c_str(), &endPtr, 0)); - if (ret == 0 && errno == EINVAL) - { - if (!silent) - { - LOG_ERROR("Unable to convert string '" << str << "'" - " to unsigned long long: " << ::strerror(errno)); + if (ret == 0 && errno == EINVAL) { + if (!silent) { + LOG_ERROR("Unable to convert string '" << str + << "'" + " to unsigned long long: " + << ::strerror(errno)); } return false; } if (ret == ULLONG_MAX && errno == ERANGE) // note ULLONG_MAX used for compatability with strtoull { - if (!silent) - { - LOG_ERROR("Unable to convert string '" << str << "'" - " to unsigned long long: " << ::strerror(errno)); + if (!silent) { + LOG_ERROR("Unable to convert string '" << str + << "'" + " to unsigned long long: " + << ::strerror(errno)); } return false; } - if (endPtr != 0 && *endPtr != '\0') - { - if (!silent) - { - LOG_ERROR("Unable to convert string '" << str << "'" - " to unsigned long long: first invalid character " << endPtr); + if (endPtr != 0 && *endPtr != '\0') { + if (!silent) { + LOG_ERROR("Unable to convert string '" << str + << "'" + " to unsigned long long: first invalid character " + << endPtr); } return false; } @@ -579,49 +475,45 @@ bool CStringUtils::_stringToType(bool silent, return true; } -bool CStringUtils::_stringToType(bool silent, - const std::string &str, - unsigned long &i) -{ - if (str.empty()) - { - if (!silent) - { +bool CStringUtils::_stringToType(bool silent, const std::string& str, unsigned long& i) { + if (str.empty()) { + if (!silent) { LOG_ERROR("Unable to convert empty string to unsigned long"); } return false; } - char *endPtr(0); + char* endPtr(0); errno = 0; unsigned long ret(::strtoul(str.c_str(), &endPtr, 0)); - if (ret == 0 && errno == EINVAL) - { - if (!silent) - { - LOG_ERROR("Unable to convert string '" << str << "'" - " to unsigned long: " << ::strerror(errno)); + if (ret == 0 && errno == EINVAL) { + if (!silent) { + LOG_ERROR("Unable to convert string '" << str + << "'" + " to unsigned long: " + << ::strerror(errno)); } return false; } if (ret == ULONG_MAX && errno == ERANGE) // note ULONG_MAX used for compatability with strtoul { - if (!silent) - { - LOG_ERROR("Unable to convert string '" << str << "'" - " to unsigned long: " << ::strerror(errno)); + if (!silent) { + LOG_ERROR("Unable to convert string '" << str + << "'" + " to unsigned long: " + << ::strerror(errno)); } return false; } - if (endPtr != 0 && *endPtr != '\0') - { - if (!silent) - { - LOG_ERROR("Unable to convert string '" << str << "'" - " to unsigned long: first invalid character " << endPtr); + if (endPtr != 0 && *endPtr != '\0') { + if (!silent) { + LOG_ERROR("Unable to convert string '" << str + << "'" + " to unsigned long: first invalid character " + << endPtr); } return false; } @@ -631,25 +523,20 @@ bool CStringUtils::_stringToType(bool silent, return true; } -bool CStringUtils::_stringToType(bool silent, - const std::string &str, - unsigned int &i) -{ +bool CStringUtils::_stringToType(bool silent, const std::string& str, unsigned int& i) { // First try to convert to unsigned long. // If that works check the range for unsigned int. unsigned long ret(0); - if (CStringUtils::_stringToType(silent, str, ret) == false) - { + if (CStringUtils::_stringToType(silent, str, ret) == false) { return false; } // Now check if the result is in range for unsigned int - if (ret > std::numeric_limits::max()) - { - if (!silent) - { - LOG_ERROR("Unable to convert string '" << str << "'" - " to unsigned int - out of range"); + if (ret > std::numeric_limits::max()) { + if (!silent) { + LOG_ERROR("Unable to convert string '" << str + << "'" + " to unsigned int - out of range"); } return false; } @@ -659,25 +546,20 @@ bool CStringUtils::_stringToType(bool silent, return true; } -bool CStringUtils::_stringToType(bool silent, - const std::string &str, - unsigned short &i) -{ +bool CStringUtils::_stringToType(bool silent, const std::string& str, unsigned short& i) { // First try to convert to unsigned long. // If that works check the range for unsigned short. unsigned long ret(0); - if (CStringUtils::_stringToType(silent, str, ret) == false) - { + if (CStringUtils::_stringToType(silent, str, ret) == false) { return false; } // Now check if the result is in range for unsigned short - if (ret > std::numeric_limits::max()) - { - if (!silent) - { - LOG_ERROR("Unable to convert string '" << str << "'" - " to unsigned short - out of range"); + if (ret > std::numeric_limits::max()) { + if (!silent) { + LOG_ERROR("Unable to convert string '" << str + << "'" + " to unsigned short - out of range"); } return false; } @@ -687,49 +569,45 @@ bool CStringUtils::_stringToType(bool silent, return true; } -bool CStringUtils::_stringToType(bool silent, - const std::string &str, - long long &i) -{ - if (str.empty()) - { - if (!silent) - { +bool CStringUtils::_stringToType(bool silent, const std::string& str, long long& i) { + if (str.empty()) { + if (!silent) { LOG_ERROR("Unable to convert empty string to long long"); } return false; } - char *endPtr(0); + char* endPtr(0); errno = 0; long long ret(::strtoll(str.c_str(), &endPtr, 0)); - if (ret == 0 && errno == EINVAL) - { - if (!silent) - { - LOG_ERROR("Unable to convert string '" << str << "'" - " to long long: " << ::strerror(errno)); + if (ret == 0 && errno == EINVAL) { + if (!silent) { + LOG_ERROR("Unable to convert string '" << str + << "'" + " to long long: " + << ::strerror(errno)); } return false; } if ((ret == LLONG_MIN || ret == LLONG_MAX) && errno == ERANGE) // note LLONG_MAX used for compatability with strtoll { - if (!silent) - { - LOG_ERROR("Unable to convert string '" << str << "'" - " to long long: " << ::strerror(errno)); + if (!silent) { + LOG_ERROR("Unable to convert string '" << str + << "'" + " to long long: " + << ::strerror(errno)); } return false; } - if (endPtr != 0 && *endPtr != '\0') - { - if (!silent) - { - LOG_ERROR("Unable to convert string '" << str << "'" - " to long long: first invalid character " << endPtr); + if (endPtr != 0 && *endPtr != '\0') { + if (!silent) { + LOG_ERROR("Unable to convert string '" << str + << "'" + " to long long: first invalid character " + << endPtr); } return false; } @@ -739,49 +617,45 @@ bool CStringUtils::_stringToType(bool silent, return true; } -bool CStringUtils::_stringToType(bool silent, - const std::string &str, - long &i) -{ - if (str.empty()) - { - if (!silent) - { +bool CStringUtils::_stringToType(bool silent, const std::string& str, long& i) { + if (str.empty()) { + if (!silent) { LOG_ERROR("Unable to convert empty string to long"); } return false; } - char *endPtr(0); + char* endPtr(0); errno = 0; long ret(::strtol(str.c_str(), &endPtr, 0)); - if (ret == 0 && errno == EINVAL) - { - if (!silent) - { - LOG_ERROR("Unable to convert string '" << str << "'" - " to long: " << ::strerror(errno)); + if (ret == 0 && errno == EINVAL) { + if (!silent) { + LOG_ERROR("Unable to convert string '" << str + << "'" + " to long: " + << ::strerror(errno)); } return false; } if ((ret == LONG_MIN || ret == LONG_MAX) && errno == ERANGE) // note LONG_MAX used for compatability with strtol { - if (!silent) - { - LOG_ERROR("Unable to convert string '" << str << "'" - " to long: " << ::strerror(errno)); + if (!silent) { + LOG_ERROR("Unable to convert string '" << str + << "'" + " to long: " + << ::strerror(errno)); } return false; } - if (endPtr != 0 && *endPtr != '\0') - { - if (!silent) - { - LOG_ERROR("Unable to convert string '" << str << "'" - " to long: first invalid character " << endPtr); + if (endPtr != 0 && *endPtr != '\0') { + if (!silent) { + LOG_ERROR("Unable to convert string '" << str + << "'" + " to long: first invalid character " + << endPtr); } return false; } @@ -791,25 +665,19 @@ bool CStringUtils::_stringToType(bool silent, return true; } -bool CStringUtils::_stringToType(bool silent, - const std::string &str, - int &i) -{ +bool CStringUtils::_stringToType(bool silent, const std::string& str, int& i) { // First try to convert to long. If that works check the range for int. long ret(0); - if (CStringUtils::_stringToType(silent, str, ret) == false) - { + if (CStringUtils::_stringToType(silent, str, ret) == false) { return false; } // Now check if the result is in range for int - if (ret < std::numeric_limits::min() || - ret > std::numeric_limits::max()) - { - if (!silent) - { - LOG_ERROR("Unable to convert string '" << str << "'" - " to int - out of range"); + if (ret < std::numeric_limits::min() || ret > std::numeric_limits::max()) { + if (!silent) { + LOG_ERROR("Unable to convert string '" << str + << "'" + " to int - out of range"); } return false; } @@ -819,25 +687,19 @@ bool CStringUtils::_stringToType(bool silent, return true; } -bool CStringUtils::_stringToType(bool silent, - const std::string &str, - short &i) -{ +bool CStringUtils::_stringToType(bool silent, const std::string& str, short& i) { // First try to convert to long. If that works check the range for short. long ret(0); - if (CStringUtils::_stringToType(silent, str, ret) == false) - { + if (CStringUtils::_stringToType(silent, str, ret) == false) { return false; } // Now check if the result is in range for short - if (ret < std::numeric_limits::min() || - ret > std::numeric_limits::max()) - { - if (!silent) - { - LOG_ERROR("Unable to convert string '" << str << "'" - " to short - out of range"); + if (ret < std::numeric_limits::min() || ret > std::numeric_limits::max()) { + if (!silent) { + LOG_ERROR("Unable to convert string '" << str + << "'" + " to short - out of range"); } return false; } @@ -847,80 +709,66 @@ bool CStringUtils::_stringToType(bool silent, return true; } -bool CStringUtils::_stringToType(bool silent, - const std::string &str, - bool &ret) -{ - switch (str.length()) - { - case 0: - if (!silent) - { - LOG_ERROR("Cannot convert empty string to bool"); - } - return false; - case 1: - switch (str[0]) - { - case 'T': - case 'Y': - case 't': - case 'y': - ret = true; - return true; - case 'F': - case 'N': - case 'f': - case 'n': - ret = false; - return true; - } - break; - case 2: - if (CStrCaseCmp::strCaseCmp(str.c_str(), "no") == 0) - { - ret = false; - return true; - } - if (CStrCaseCmp::strCaseCmp(str.c_str(), "on") == 0) - { - ret = true; - return true; - } - break; - case 3: - if (CStrCaseCmp::strCaseCmp(str.c_str(), "yes") == 0) - { - ret = true; - return true; - } - if (CStrCaseCmp::strCaseCmp(str.c_str(), "off") == 0) - { - ret = false; - return true; - } - break; - case 4: - if (CStrCaseCmp::strCaseCmp(str.c_str(), "true") == 0) - { - ret = true; - return true; - } - break; - case 5: - if (CStrCaseCmp::strCaseCmp(str.c_str(), "false") == 0) - { - ret = false; - return true; - } - break; +bool CStringUtils::_stringToType(bool silent, const std::string& str, bool& ret) { + switch (str.length()) { + case 0: + if (!silent) { + LOG_ERROR("Cannot convert empty string to bool"); + } + return false; + case 1: + switch (str[0]) { + case 'T': + case 'Y': + case 't': + case 'y': + ret = true; + return true; + case 'F': + case 'N': + case 'f': + case 'n': + ret = false; + return true; + } + break; + case 2: + if (CStrCaseCmp::strCaseCmp(str.c_str(), "no") == 0) { + ret = false; + return true; + } + if (CStrCaseCmp::strCaseCmp(str.c_str(), "on") == 0) { + ret = true; + return true; + } + break; + case 3: + if (CStrCaseCmp::strCaseCmp(str.c_str(), "yes") == 0) { + ret = true; + return true; + } + if (CStrCaseCmp::strCaseCmp(str.c_str(), "off") == 0) { + ret = false; + return true; + } + break; + case 4: + if (CStrCaseCmp::strCaseCmp(str.c_str(), "true") == 0) { + ret = true; + return true; + } + break; + case 5: + if (CStrCaseCmp::strCaseCmp(str.c_str(), "false") == 0) { + ret = false; + return true; + } + break; } long l(0); - if (CStringUtils::_stringToType(silent, str, l) == false) - { - if (!silent) - { + if (CStringUtils::_stringToType(silent, str, l) == false) { + if (!silent) { LOG_ERROR("Cannot convert " << str << " to bool"); } return false; @@ -931,49 +779,44 @@ bool CStringUtils::_stringToType(bool silent, return true; } -bool CStringUtils::_stringToType(bool silent, - const std::string &str, - double &d) -{ - if (str.empty()) - { - if (!silent) - { +bool CStringUtils::_stringToType(bool silent, const std::string& str, double& d) { + if (str.empty()) { + if (!silent) { LOG_ERROR("Unable to convert empty string to double"); } return false; } - char *endPtr(0); + char* endPtr(0); errno = 0; double ret(::strtod(str.c_str(), &endPtr)); - if (ret == 0 && errno == EINVAL) - { - if (!silent) - { - LOG_ERROR("Unable to convert string '" << str << "'" - " to double: " << ::strerror(errno)); + if (ret == 0 && errno == EINVAL) { + if (!silent) { + LOG_ERROR("Unable to convert string '" << str + << "'" + " to double: " + << ::strerror(errno)); } return false; } - if ((ret == HUGE_VAL || ret == -HUGE_VAL) && errno == ERANGE) - { - if (!silent) - { - LOG_ERROR("Unable to convert string '" << str << "'" - " to double: " << ::strerror(errno)); + if ((ret == HUGE_VAL || ret == -HUGE_VAL) && errno == ERANGE) { + if (!silent) { + LOG_ERROR("Unable to convert string '" << str + << "'" + " to double: " + << ::strerror(errno)); } return false; } - if (endPtr != 0 && *endPtr != '\0') - { - if (!silent) - { - LOG_ERROR("Unable to convert string '" << str << "'" - " to double: first invalid character " << endPtr); + if (endPtr != 0 && *endPtr != '\0') { + if (!silent) { + LOG_ERROR("Unable to convert string '" << str + << "'" + " to double: first invalid character " + << endPtr); } return false; } @@ -983,16 +826,10 @@ bool CStringUtils::_stringToType(bool silent, return true; } -bool CStringUtils::_stringToType(bool silent, - const std::string &str, - char &c) -{ - if (str.length() != 1) - { - if (!silent) - { - LOG_ERROR("Unable to convert string '" << str << "' to char: " << - (str.empty() ? "too short" : "too long")); +bool CStringUtils::_stringToType(bool silent, const std::string& str, char& c) { + if (str.length() != 1) { + if (!silent) { + LOG_ERROR("Unable to convert string '" << str << "' to char: " << (str.empty() ? "too short" : "too long")); } return false; } @@ -1003,44 +840,30 @@ bool CStringUtils::_stringToType(bool silent, } // This may seem silly, but it allows generic code to be written -bool CStringUtils::_stringToType(bool /* silent */, - const std::string &str, - std::string &outStr) -{ +bool CStringUtils::_stringToType(bool /* silent */, const std::string& str, std::string& outStr) { outStr = str; return true; } -void CStringUtils::tokenise(const std::string &delim, - const std::string &str, - TStrVec &tokens, - std::string &remainder) -{ +void CStringUtils::tokenise(const std::string& delim, const std::string& str, TStrVec& tokens, std::string& remainder) { std::string::size_type pos(0); - for (;;) - { + for (;;) { std::string::size_type pos2(str.find(delim, pos)); - if (pos2 == std::string::npos) - { + if (pos2 == std::string::npos) { remainder.assign(str, pos, str.size() - pos); break; - } - else - { + } else { tokens.push_back(str.substr(pos, pos2 - pos)); pos = pos2 + delim.size(); } } } -std::string CStringUtils::longestCommonSubstr(const std::string &str1, - const std::string &str2) -{ +std::string CStringUtils::longestCommonSubstr(const std::string& str1, const std::string& str2) { std::string common; - if (str1.empty() || str2.empty()) - { + if (str1.empty() || str2.empty()) { return common; } @@ -1054,37 +877,25 @@ std::string CStringUtils::longestCommonSubstr(const std::string &str1, size_t maxLen(0); size_t lastSubstrBegin(0); - for (size_t i = 0; i < firstLen; ++i) - { - for (size_t j = 0; j < secondLen; ++j) - { - if (str1[i] != str2[j]) - { + for (size_t i = 0; i < firstLen; ++i) { + for (size_t j = 0; j < secondLen; ++j) { + if (str1[i] != str2[j]) { matrix[i][j] = 0; - } - else - { - if (i == 0 || j == 0) - { + } else { + if (i == 0 || j == 0) { matrix[i][j] = 1; - } - else - { + } else { matrix[i][j] = 1 + matrix[i - 1][j - 1]; } - if (matrix[i][j] > maxLen) - { + if (matrix[i][j] > maxLen) { maxLen = matrix[i][j]; size_t thisSubstrBegin(i - maxLen + 1); - if (lastSubstrBegin == thisSubstrBegin) - { + if (lastSubstrBegin == thisSubstrBegin) { // We're continuing the current longest common substring common += str1[i]; - } - else - { + } else { // We're starting a new longest common substring common.assign(str1, thisSubstrBegin, maxLen); lastSubstrBegin = thisSubstrBegin; @@ -1097,12 +908,9 @@ std::string CStringUtils::longestCommonSubstr(const std::string &str1, return common; } -std::string CStringUtils::longestCommonSubsequence(const std::string &str1, - const std::string &str2) -{ +std::string CStringUtils::longestCommonSubsequence(const std::string& str1, const std::string& str2) { std::string common; - if (str1.empty() || str2.empty()) - { + if (str1.empty() || str2.empty()) { return common; } @@ -1114,27 +922,20 @@ std::string CStringUtils::longestCommonSubsequence(const std::string &str1, T2DSizeArray matrix(boost::extents[firstLen + 1][secondLen + 1]); // Initialise the top row and left column of the matrix to zero - for (size_t i = 0; i <= firstLen; ++i) - { + for (size_t i = 0; i <= firstLen; ++i) { matrix[i][0] = 0; } - for (size_t j = 0; j <= secondLen; ++j) - { + for (size_t j = 0; j <= secondLen; ++j) { matrix[0][j] = 0; } // Fill in the rest of the matrix - for (size_t i = 1; i <= firstLen; ++i) - { - for (size_t j = 1; j <= secondLen; ++j) - { - if (str1[i - 1] == str2[j - 1]) - { + for (size_t i = 1; i <= firstLen; ++i) { + for (size_t j = 1; j <= secondLen; ++j) { + if (str1[i - 1] == str2[j - 1]) { matrix[i][j] = matrix[i - 1][j - 1] + 1; - } - else - { + } else { matrix[i][j] = std::max(matrix[i][j - 1], matrix[i - 1][j]); } } @@ -1144,8 +945,7 @@ std::string CStringUtils::longestCommonSubsequence(const std::string &str1, // corner of the matrix - if this length is zero, we don't need to backtrack // to find the actual characters size_t seqLen(matrix[firstLen][secondLen]); - if (seqLen > 0) - { + if (seqLen > 0) { // Create a string of NULLs to be overwritten (in reverse order) by the // actual characters common.resize(seqLen); @@ -1154,30 +954,22 @@ std::string CStringUtils::longestCommonSubsequence(const std::string &str1, // Now backtrack through the matrix to find the common sequence size_t i(firstLen); size_t j(secondLen); - while (i > 0 && j > 0) - { - if (str1[i - 1] == str2[j - 1]) - { + while (i > 0 && j > 0) { + if (str1[i - 1] == str2[j - 1]) { common[resPos] = str1[i - 1]; // If we've got all the characters we need we can stop early - if (resPos == 0) - { + if (resPos == 0) { break; } --i; --j; --resPos; - } - else - { - if (matrix[i][j - 1] >= matrix[i - 1][j]) - { + } else { + if (matrix[i][j - 1] >= matrix[i - 1][j]) { --j; - } - else - { + } else { --i; } } @@ -1187,8 +979,7 @@ std::string CStringUtils::longestCommonSubsequence(const std::string &str1, return common; } -std::string CStringUtils::wideToNarrow(const std::wstring &wideStr) -{ +std::string CStringUtils::wideToNarrow(const std::wstring& wideStr) { // Annoyingly, the STL character transformations only work on // character arrays, and not std::string objects std::string narrowStr(wideStr.length(), '\0'); @@ -1197,15 +988,11 @@ std::string CStringUtils::wideToNarrow(const std::wstring &wideStr) // cope with UTF8 either, so we should replace it with a proper // string conversion library, e.g. ICU using TWCharTCType = std::ctype; - std::use_facet(CStringUtils::locale()).narrow(wideStr.data(), - wideStr.data() + wideStr.length(), - '?', - &narrowStr[0]); + std::use_facet(CStringUtils::locale()).narrow(wideStr.data(), wideStr.data() + wideStr.length(), '?', &narrowStr[0]); return narrowStr; } -std::wstring CStringUtils::narrowToWide(const std::string &narrowStr) -{ +std::wstring CStringUtils::narrowToWide(const std::string& narrowStr) { // Annoyingly, the STL character transformations only work on // character arrays, and not std::string objects std::wstring wideStr(narrowStr.length(), L'\0'); @@ -1214,18 +1001,13 @@ std::wstring CStringUtils::narrowToWide(const std::string &narrowStr) // cope with UTF8 either, so we should replace it with a proper // string conversion library, e.g. ICU using TWCharTCType = std::ctype; - std::use_facet(CStringUtils::locale()).widen(narrowStr.data(), - narrowStr.data() + narrowStr.length(), - &wideStr[0]); + std::use_facet(CStringUtils::locale()).widen(narrowStr.data(), narrowStr.data() + narrowStr.length(), &wideStr[0]); return wideStr; } -const std::locale &CStringUtils::locale() -{ +const std::locale& CStringUtils::locale() { static std::locale loc; return loc; } - - } } diff --git a/lib/core/CThread.cc b/lib/core/CThread.cc index b1ffc67710..41eef8759e 100644 --- a/lib/core/CThread.cc +++ b/lib/core/CThread.cc @@ -12,9 +12,7 @@ #include #include - -namespace -{ +namespace { //! Handler for signals that does nothing but will, unlike ignoring a signal, //! cause calls to be interrupted with EINTR. The idea is that an open() or @@ -23,8 +21,7 @@ namespace //! return a failure code and set errno to EINTR. This gives the code //! surrounding the blocking call a chance to not call it again but instead //! do something different. -void noOpHandler(int /*sig*/) -{ +void noOpHandler(int /*sig*/) { } //! Use SIGIO for waking up blocking calls. The same handler will be used in @@ -34,8 +31,7 @@ void noOpHandler(int /*sig*/) //! handling then we could change the signal we use in this class to another //! (maybe SIGURG). However, it's bad practice for reusable libraries to //! unconditionally install signal handlers, so unlikely to be a problem. -bool installNoOpSigIoHandler() -{ +bool installNoOpSigIoHandler() { struct sigaction sa; sigemptyset(&sa.sa_mask); sa.sa_handler = &noOpHandler; @@ -44,51 +40,39 @@ bool installNoOpSigIoHandler() } const bool SIGIO_HANDLER_INSTALLED(installNoOpSigIoHandler()); - } -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { -CThread::CThread() - : m_ThreadId(0) -{ +CThread::CThread() : m_ThreadId(0) { } -CThread::~CThread() -{ +CThread::~CThread() { CScopedLock lock(m_IdMutex); - if (m_ThreadId != 0) - { + if (m_ThreadId != 0) { LOG_ERROR("Trying to destroy a running thread. Call 'stop' before destroying"); } } -bool CThread::start() -{ +bool CThread::start() { TThreadId dummy(0); return this->start(dummy); } -bool CThread::start(TThreadId &threadId) -{ +bool CThread::start(TThreadId& threadId) { CScopedLock lock(m_IdMutex); - if (m_ThreadId != 0) - { + if (m_ThreadId != 0) { LOG_ERROR("Thread already running"); threadId = m_ThreadId; return false; } int ret = pthread_create(&m_ThreadId, 0, &CThread::threadFunc, this); - if (ret != 0) - { + if (ret != 0) { LOG_ERROR("Cannot create thread: " << ::strerror(ret)); threadId = 0; return false; @@ -99,18 +83,15 @@ bool CThread::start(TThreadId &threadId) return true; } -bool CThread::stop() -{ +bool CThread::stop() { CScopedLock lock(m_IdMutex); - if (m_ThreadId == 0) - { + if (m_ThreadId == 0) { LOG_ERROR("Thread not running"); return false; } - if (pthread_self() == m_ThreadId) - { + if (pthread_self() == m_ThreadId) { LOG_ERROR("Can't stop own thread"); return false; } @@ -119,8 +100,7 @@ bool CThread::stop() this->shutdown(); int ret = pthread_join(m_ThreadId, 0); - if (ret != 0) - { + if (ret != 0) { LOG_ERROR("Error joining thread: " << ::strerror(ret)); } @@ -129,25 +109,21 @@ bool CThread::stop() return true; } -bool CThread::waitForFinish() -{ +bool CThread::waitForFinish() { CScopedLock lock(m_IdMutex); - if (m_ThreadId == 0) - { + if (m_ThreadId == 0) { LOG_ERROR("Thread not running"); return false; } - if (pthread_self() == m_ThreadId) - { + if (pthread_self() == m_ThreadId) { LOG_ERROR("Can't stop own thread"); return false; } int ret = pthread_join(m_ThreadId, 0); - if (ret != 0) - { + if (ret != 0) { LOG_ERROR("Error joining thread: " << ::strerror(ret)); } @@ -156,25 +132,21 @@ bool CThread::waitForFinish() return true; } -bool CThread::isStarted() const -{ +bool CThread::isStarted() const { CScopedLock lock(m_IdMutex); return (m_ThreadId != 0); } -bool CThread::cancelBlockedIo() -{ +bool CThread::cancelBlockedIo() { CScopedLock lock(m_IdMutex); - if (m_ThreadId == 0) - { + if (m_ThreadId == 0) { LOG_ERROR("Thread not running"); return false; } - if (pthread_self() == m_ThreadId) - { + if (pthread_self() == m_ThreadId) { LOG_ERROR("Can't cancel blocked IO in own thread"); return false; } @@ -182,13 +154,10 @@ bool CThread::cancelBlockedIo() // Deliver the signal using pthread_kill() rather than raise() to ensure it // is delivered to the correct thread. int ret = pthread_kill(m_ThreadId, SIGIO); - if (ret != 0) - { + if (ret != 0) { // Don't report an error if the thread has already exited - if (ret != ESRCH) - { - LOG_ERROR("Error cancelling blocked IO in thread: " << - ::strerror(ret)); + if (ret != ESRCH) { + LOG_ERROR("Error cancelling blocked IO in thread: " << ::strerror(ret)); return false; } } @@ -196,10 +165,8 @@ bool CThread::cancelBlockedIo() return true; } -bool CThread::cancelBlockedIo(TThreadId threadId) -{ - if (pthread_self() == threadId) - { +bool CThread::cancelBlockedIo(TThreadId threadId) { + if (pthread_self() == threadId) { LOG_ERROR("Can't cancel blocked IO in own thread"); return false; } @@ -207,13 +174,10 @@ bool CThread::cancelBlockedIo(TThreadId threadId) // Deliver the signal using pthread_kill() rather than raise() to ensure it // is delivered to the correct thread. int ret = pthread_kill(threadId, SIGIO); - if (ret != 0) - { + if (ret != 0) { // Don't report an error if the thread has already exited - if (ret != ESRCH) - { - LOG_ERROR("Error cancelling blocked IO in thread " << - threadId << ": " << ::strerror(ret)); + if (ret != ESRCH) { + LOG_ERROR("Error cancelling blocked IO in thread " << threadId << ": " << ::strerror(ret)); return false; } } @@ -221,21 +185,16 @@ bool CThread::cancelBlockedIo(TThreadId threadId) return true; } -CThread::TThreadId CThread::currentThreadId() -{ +CThread::TThreadId CThread::currentThreadId() { return pthread_self(); } -CThread::TThreadRet STDCALL CThread::threadFunc(void *obj) -{ - CThread *instance = static_cast(obj); +CThread::TThreadRet STDCALL CThread::threadFunc(void* obj) { + CThread* instance = static_cast(obj); instance->run(); return 0; } - - } } - diff --git a/lib/core/CThread_Windows.cc b/lib/core/CThread_Windows.cc index b0e9bc30cb..25f286a84f 100644 --- a/lib/core/CThread_Windows.cc +++ b/lib/core/CThread_Windows.cc @@ -13,42 +13,30 @@ #include #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -CThread::CThread() - : m_ThreadId(0), - m_ThreadHandle(INVALID_HANDLE_VALUE) -{ +CThread::CThread() : m_ThreadId(0), m_ThreadHandle(INVALID_HANDLE_VALUE) { } -CThread::~CThread() -{ +CThread::~CThread() { CScopedLock lock(m_IdMutex); - if (m_ThreadHandle != INVALID_HANDLE_VALUE) - { + if (m_ThreadHandle != INVALID_HANDLE_VALUE) { LOG_ERROR("Trying to destroy a running thread. Call 'stop' before destroying"); } } -bool CThread::start() -{ +bool CThread::start() { TThreadId dummy(0); return this->start(dummy); } -bool CThread::start(TThreadId &threadId) -{ +bool CThread::start(TThreadId& threadId) { CScopedLock lock(m_IdMutex); - if (m_ThreadHandle != INVALID_HANDLE_VALUE) - { + if (m_ThreadHandle != INVALID_HANDLE_VALUE) { LOG_ERROR("Thread already running"); threadId = m_ThreadId; return false; @@ -64,14 +52,8 @@ bool CThread::start(TThreadId &threadId) // thread. However, the handle returned by _beginthreadex() has to be // closed by the caller of _beginthreadex(), so it is guaranteed to be a // valid handle if _beginthreadex() did not return an error. - uintptr_t handle(_beginthreadex(0, - 0, - &CThread::threadFunc, - this, - 0, - 0)); - if (handle == 0) - { + uintptr_t handle(_beginthreadex(0, 0, &CThread::threadFunc, this, 0, 0)); + if (handle == 0) { LOG_ERROR("Cannot create thread: " << ::strerror(errno)); threadId = 0; return false; @@ -84,18 +66,15 @@ bool CThread::start(TThreadId &threadId) return true; } -bool CThread::stop() -{ +bool CThread::stop() { CScopedLock lock(m_IdMutex); - if (m_ThreadHandle == INVALID_HANDLE_VALUE) - { + if (m_ThreadHandle == INVALID_HANDLE_VALUE) { LOG_ERROR("Thread not running"); return false; } - if (GetCurrentThreadId() == m_ThreadId) - { + if (GetCurrentThreadId() == m_ThreadId) { LOG_ERROR("Can't stop own thread"); return false; } @@ -103,14 +82,12 @@ bool CThread::stop() // Signal to running thread to shutdown this->shutdown(); - if (WaitForSingleObject(m_ThreadHandle, INFINITE) != 0) - { + if (WaitForSingleObject(m_ThreadHandle, INFINITE) != 0) { DWORD errCode(GetLastError()); // To match pthread behaviour, we won't report an error for joining a // thread that's already stopped - if (errCode != ERROR_INVALID_HANDLE) - { + if (errCode != ERROR_INVALID_HANDLE) { LOG_ERROR("Error joining thread: " << CWindowsError(errCode)); } } @@ -122,30 +99,25 @@ bool CThread::stop() return true; } -bool CThread::waitForFinish() -{ +bool CThread::waitForFinish() { CScopedLock lock(m_IdMutex); - if (m_ThreadHandle == INVALID_HANDLE_VALUE) - { + if (m_ThreadHandle == INVALID_HANDLE_VALUE) { LOG_ERROR("Thread not running"); return false; } - if (GetCurrentThreadId() == m_ThreadId) - { + if (GetCurrentThreadId() == m_ThreadId) { LOG_ERROR("Can't stop own thread"); return false; } - if (WaitForSingleObject(m_ThreadHandle, INFINITE) != 0) - { + if (WaitForSingleObject(m_ThreadHandle, INFINITE) != 0) { DWORD errCode(GetLastError()); // To match pthread behaviour, we won't report an error for joining a // thread that's already stopped - if (errCode != ERROR_INVALID_HANDLE) - { + if (errCode != ERROR_INVALID_HANDLE) { LOG_ERROR("Error joining thread: " << CWindowsError(errCode)); } } @@ -157,38 +129,31 @@ bool CThread::waitForFinish() return true; } -bool CThread::isStarted() const -{ +bool CThread::isStarted() const { CScopedLock lock(m_IdMutex); return (m_ThreadHandle != INVALID_HANDLE_VALUE); } -bool CThread::cancelBlockedIo() -{ +bool CThread::cancelBlockedIo() { CScopedLock lock(m_IdMutex); - if (m_ThreadHandle == INVALID_HANDLE_VALUE) - { + if (m_ThreadHandle == INVALID_HANDLE_VALUE) { LOG_ERROR("Thread not running"); return false; } - if (GetCurrentThreadId() == m_ThreadId) - { + if (GetCurrentThreadId() == m_ThreadId) { LOG_ERROR("Can't cancel blocked IO in own thread"); return false; } - if (CancelSynchronousIo(m_ThreadHandle) == FALSE) - { + if (CancelSynchronousIo(m_ThreadHandle) == FALSE) { DWORD errCode(GetLastError()); // Don't report an error if there is no blocking call to cancel - if (errCode != ERROR_NOT_FOUND) - { - LOG_ERROR("Error cancelling blocked IO in thread: " << - CWindowsError(errCode)); + if (errCode != ERROR_NOT_FOUND) { + LOG_ERROR("Error cancelling blocked IO in thread: " << CWindowsError(errCode)); return false; } } @@ -196,35 +161,26 @@ bool CThread::cancelBlockedIo() return true; } -bool CThread::cancelBlockedIo(TThreadId threadId) -{ - if (GetCurrentThreadId() == threadId) - { +bool CThread::cancelBlockedIo(TThreadId threadId) { + if (GetCurrentThreadId() == threadId) { LOG_ERROR("Can't cancel blocked IO in own thread"); return false; } - HANDLE threadHandle = OpenThread(THREAD_TERMINATE, - FALSE, - threadId); + HANDLE threadHandle = OpenThread(THREAD_TERMINATE, FALSE, threadId); // Note inconsistency in Win32 thread function return codes here - the error // return is NULL rather than INVALID_HANDLE_VALUE! - if (threadHandle == 0) - { - LOG_ERROR("Error cancelling blocked IO in thread " << threadId << - ": " << CWindowsError()); + if (threadHandle == 0) { + LOG_ERROR("Error cancelling blocked IO in thread " << threadId << ": " << CWindowsError()); return false; } - if (CancelSynchronousIo(threadHandle) == FALSE) - { + if (CancelSynchronousIo(threadHandle) == FALSE) { DWORD errCode(GetLastError()); // Don't report an error if there is no blocking call to cancel - if (errCode != ERROR_NOT_FOUND) - { - LOG_ERROR("Error cancelling blocked IO in thread " << threadId << - ": " << CWindowsError(errCode)); + if (errCode != ERROR_NOT_FOUND) { + LOG_ERROR("Error cancelling blocked IO in thread " << threadId << ": " << CWindowsError(errCode)); CloseHandle(threadHandle); return false; } @@ -235,14 +191,12 @@ bool CThread::cancelBlockedIo(TThreadId threadId) return true; } -CThread::TThreadId CThread::currentThreadId() -{ +CThread::TThreadId CThread::currentThreadId() { return GetCurrentThreadId(); } -CThread::TThreadRet STDCALL CThread::threadFunc(void *obj) -{ - CThread *instance = static_cast(obj); +CThread::TThreadRet STDCALL CThread::threadFunc(void* obj) { + CThread* instance = static_cast(obj); instance->run(); @@ -250,8 +204,5 @@ CThread::TThreadRet STDCALL CThread::threadFunc(void *obj) // the C runtime library data structures associated with it return 0; } - - } } - diff --git a/lib/core/CTimeGm.cc b/lib/core/CTimeGm.cc index b76e28f974..c18fb9db25 100644 --- a/lib/core/CTimeGm.cc +++ b/lib/core/CTimeGm.cc @@ -5,18 +5,11 @@ */ #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -time_t CTimeGm::timeGm(struct tm *ts) -{ +time_t CTimeGm::timeGm(struct tm* ts) { return ::timegm(ts); } - - } } diff --git a/lib/core/CTimeGm_Windows.cc b/lib/core/CTimeGm_Windows.cc index 6407b626b7..acbb65a46f 100644 --- a/lib/core/CTimeGm_Windows.cc +++ b/lib/core/CTimeGm_Windows.cc @@ -5,19 +5,11 @@ */ #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -time_t CTimeGm::timeGm(struct tm *ts) -{ +time_t CTimeGm::timeGm(struct tm* ts) { return ::_mkgmtime(ts); } - - } } - diff --git a/lib/core/CTimeUtils.cc b/lib/core/CTimeUtils.cc index e5954f28a5..1421e15891 100644 --- a/lib/core/CTimeUtils.cc +++ b/lib/core/CTimeUtils.cc @@ -5,64 +5,50 @@ */ #include -#include #include #include #include #include #include +#include #include #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { // Initialise class static data const core_t::TTime CTimeUtils::MAX_CLOCK_DISCREPANCY(300); - -core_t::TTime CTimeUtils::now() -{ +core_t::TTime CTimeUtils::now() { return ::time(0); } -std::string CTimeUtils::toIso8601(core_t::TTime t) -{ +std::string CTimeUtils::toIso8601(core_t::TTime t) { std::string result; CTimeUtils::toStringCommon(t, "%Y-%m-%dT%H:%M:%S%z", result); return result; } -std::string CTimeUtils::toLocalString(core_t::TTime t) -{ +std::string CTimeUtils::toLocalString(core_t::TTime t) { std::string result; CTimeUtils::toStringCommon(t, "%c", result); return result; } -std::string CTimeUtils::toTimeString(core_t::TTime t) -{ +std::string CTimeUtils::toTimeString(core_t::TTime t) { std::string result; CTimeUtils::toStringCommon(t, "%H:%M:%S", result); return result; } -int64_t CTimeUtils::toEpochMs(core_t::TTime t) -{ +int64_t CTimeUtils::toEpochMs(core_t::TTime t) { return static_cast(t) * 1000; } -bool CTimeUtils::strptime(const std::string &format, - const std::string &dateTime, - core_t::TTime &preTime) -{ - if (CTimeUtils::strptimeSilent(format, dateTime, preTime) == false) - { +bool CTimeUtils::strptime(const std::string& format, const std::string& dateTime, core_t::TTime& preTime) { + if (CTimeUtils::strptimeSilent(format, dateTime, preTime) == false) { LOG_ERROR("Unable to convert " << dateTime << " to " << format); return false; } @@ -70,22 +56,18 @@ bool CTimeUtils::strptime(const std::string &format, return true; } -bool CTimeUtils::strptimeSilent(const std::string &format, - const std::string &dateTime, - core_t::TTime &preTime) -{ +bool CTimeUtils::strptimeSilent(const std::string& format, const std::string& dateTime, core_t::TTime& preTime) { struct tm t; ::memset(&t, 0, sizeof(struct tm)); - const char *ret(CStrPTime::strPTime(dateTime.c_str(), format.c_str(), &t)); - if (ret == 0) - { + const char* ret(CStrPTime::strPTime(dateTime.c_str(), format.c_str(), &t)); + if (ret == 0) { return false; } t.tm_isdst = -1; - CTimezone &tz = CTimezone::instance(); + CTimezone& tz = CTimezone::instance(); // Some date formats don't give the year, so we might need to guess it // We'll assume that the year is the current year, unless that results @@ -94,13 +76,11 @@ bool CTimeUtils::strptimeSilent(const std::string &format, struct tm copy; ::memset(©, 0, sizeof(struct tm)); bool guessedYear(false); - if (t.tm_year == 0) - { + if (t.tm_year == 0) { struct tm now; ::memset(&now, 0, sizeof(struct tm)); - if (tz.utcToLocal(CTimeUtils::now(), now) == false) - { + if (tz.utcToLocal(CTimeUtils::now(), now) == false) { return false; } @@ -117,8 +97,7 @@ bool CTimeUtils::strptimeSilent(const std::string &format, // year and recalculate // Use a tolerance of 5 minutes in case of slight clock discrepancies // between different machines at the customer location - if (guessedYear && preTime > CTimeUtils::now() + MAX_CLOCK_DISCREPANCY) - { + if (guessedYear && preTime > CTimeUtils::now() + MAX_CLOCK_DISCREPANCY) { // Recalculate using a copy since mktime changes the contents of the // struct copy.tm_year -= 1; @@ -129,27 +108,22 @@ bool CTimeUtils::strptimeSilent(const std::string &format, return true; } -void CTimeUtils::toStringCommon(core_t::TTime t, - const std::string &format, - std::string &result) -{ +void CTimeUtils::toStringCommon(core_t::TTime t, const std::string& format, std::string& result) { // core_t::TTime holds an epoch time (UTC) struct tm out; - CTimezone &tz = CTimezone::instance(); - if (tz.utcToLocal(t, out) == false) - { + CTimezone& tz = CTimezone::instance(); + if (tz.utcToLocal(t, out) == false) { LOG_ERROR("Cannot convert time " << t << " : " << ::strerror(errno)); result.clear(); return; } static const size_t SIZE(256); - char buf[SIZE] = { '\0' }; + char buf[SIZE] = {'\0'}; size_t ret(CStrFTime::strFTime(buf, SIZE, format.c_str(), &out)); - if (ret == 0) - { + if (ret == 0) { LOG_ERROR("Cannot convert time " << t << " : " << ::strerror(errno)); result.clear(); return; @@ -158,20 +132,16 @@ void CTimeUtils::toStringCommon(core_t::TTime t, result = buf; } -bool CTimeUtils::isDateWord(const std::string &word) -{ +bool CTimeUtils::isDateWord(const std::string& word) { return CDateWordCache::instance().isDateWord(word); } - // Initialise statics for the inner class CDateWordCache -CFastMutex CTimeUtils::CDateWordCache::ms_InitMutex; -volatile CTimeUtils::CDateWordCache *CTimeUtils::CDateWordCache::ms_Instance(0); +CFastMutex CTimeUtils::CDateWordCache::ms_InitMutex; +volatile CTimeUtils::CDateWordCache* CTimeUtils::CDateWordCache::ms_Instance(0); -const CTimeUtils::CDateWordCache &CTimeUtils::CDateWordCache::instance() -{ - if (ms_Instance == 0) - { +const CTimeUtils::CDateWordCache& CTimeUtils::CDateWordCache::instance() { + if (ms_Instance == 0) { CScopedFastLock lock(ms_InitMutex); // Even if we get into this code block in more than one thread, whatever @@ -184,18 +154,16 @@ const CTimeUtils::CDateWordCache &CTimeUtils::CDateWordCache::instance() } // Need to explicitly cast away volatility - return *const_cast(ms_Instance); + return *const_cast(ms_Instance); } -bool CTimeUtils::CDateWordCache::isDateWord(const std::string &word) const -{ +bool CTimeUtils::CDateWordCache::isDateWord(const std::string& word) const { return m_DateWords.find(word) != m_DateWords.end(); } -CTimeUtils::CDateWordCache::CDateWordCache() -{ +CTimeUtils::CDateWordCache::CDateWordCache() { static const size_t SIZE(256); - char buf[SIZE] = { '\0' }; + char buf[SIZE] = {'\0'}; struct tm workTime; ::memset(&workTime, 0, sizeof(struct tm)); @@ -212,51 +180,42 @@ CTimeUtils::CDateWordCache::CDateWordCache() workTime.tm_isdst = -1; // Populate day-of-week names and abbreviations - for (int dayOfWeek = 0; dayOfWeek < 7; ++dayOfWeek) - { + for (int dayOfWeek = 0; dayOfWeek < 7; ++dayOfWeek) { ++workTime.tm_mday; workTime.tm_wday = dayOfWeek; ++workTime.tm_yday; - if (::strftime(buf, SIZE, "%a", &workTime) > 0) - { + if (::strftime(buf, SIZE, "%a", &workTime) > 0) { m_DateWords.insert(buf); } - if (::strftime(buf, SIZE, "%A", &workTime) > 0) - { + if (::strftime(buf, SIZE, "%A", &workTime) > 0) { m_DateWords.insert(buf); } } // Populate month names and abbreviations - first January - if (::strftime(buf, SIZE, "%b", &workTime) > 0) - { + if (::strftime(buf, SIZE, "%b", &workTime) > 0) { m_DateWords.insert(buf); } - if (::strftime(buf, SIZE, "%B", &workTime) > 0) - { + if (::strftime(buf, SIZE, "%B", &workTime) > 0) { m_DateWords.insert(buf); } - static const int DAYS_PER_MONTH[] = { 31, 28, 31, 30, 31, 30, - 31, 31, 30, 31, 30, 31 }; + static const int DAYS_PER_MONTH[] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; // Populate other month names and abbreviations - for (int month = 1; month < 12; ++month) - { + for (int month = 1; month < 12; ++month) { int prevMonthDays(DAYS_PER_MONTH[month - 1]); workTime.tm_mon = month; workTime.tm_wday += prevMonthDays; workTime.tm_wday %= 7; workTime.tm_yday += prevMonthDays; - if (::strftime(buf, SIZE, "%b", &workTime) > 0) - { + if (::strftime(buf, SIZE, "%b", &workTime) > 0) { m_DateWords.insert(buf); } - if (::strftime(buf, SIZE, "%B", &workTime) > 0) - { + if (::strftime(buf, SIZE, "%B", &workTime) > 0) { m_DateWords.insert(buf); } } @@ -266,25 +225,19 @@ CTimeUtils::CDateWordCache::CDateWordCache() m_DateWords.insert("UTC"); // Finally, add the current timezone (if available) - CTimezone &tz = CTimezone::instance(); - const std::string &stdAbbrev = tz.stdAbbrev(); - if (!stdAbbrev.empty()) - { + CTimezone& tz = CTimezone::instance(); + const std::string& stdAbbrev = tz.stdAbbrev(); + if (!stdAbbrev.empty()) { m_DateWords.insert(stdAbbrev); } - const std::string &dstAbbrev = tz.dstAbbrev(); - if (!dstAbbrev.empty()) - { + const std::string& dstAbbrev = tz.dstAbbrev(); + if (!dstAbbrev.empty()) { m_DateWords.insert(dstAbbrev); } } -CTimeUtils::CDateWordCache::~CDateWordCache() -{ +CTimeUtils::CDateWordCache::~CDateWordCache() { ms_Instance = 0; } - - } } - diff --git a/lib/core/CTimezone.cc b/lib/core/CTimezone.cc index 7577592819..db3a3cf5ee 100644 --- a/lib/core/CTimezone.cc +++ b/lib/core/CTimezone.cc @@ -12,52 +12,39 @@ #include #include - -namespace -{ +namespace { // To ensure the singleton is constructed before multiple threads may require it // call instance() during the static initialisation phase of the program. Of // course, the instance may already be constructed before this if another static // object has used it. -const ml::core::CTimezone &DO_NOT_USE_THIS_VARIABLE = - ml::core::CTimezone::instance(); +const ml::core::CTimezone& DO_NOT_USE_THIS_VARIABLE = ml::core::CTimezone::instance(); } -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { -CTimezone::CTimezone() -{ +CTimezone::CTimezone() { } -CTimezone::~CTimezone() -{ +CTimezone::~CTimezone() { } -CTimezone &CTimezone::instance() -{ +CTimezone& CTimezone::instance() { static CTimezone instance; return instance; } -const std::string &CTimezone::timezoneName() const -{ +const std::string& CTimezone::timezoneName() const { CScopedFastLock lock(m_Mutex); return m_Name; } -bool CTimezone::timezoneName(const std::string &name) -{ +bool CTimezone::timezoneName(const std::string& name) { CScopedFastLock lock(m_Mutex); - if (CSetEnv::setEnv("TZ", name.c_str(), 1) != 0) - { - LOG_ERROR("Unable to set TZ environment variable to " << name << - " : " << ::strerror(errno)); + if (CSetEnv::setEnv("TZ", name.c_str(), 1) != 0) { + LOG_ERROR("Unable to set TZ environment variable to " << name << " : " << ::strerror(errno)); return false; } @@ -69,47 +56,40 @@ bool CTimezone::timezoneName(const std::string &name) return true; } -bool CTimezone::setTimezone(const std::string &timezone) -{ +bool CTimezone::setTimezone(const std::string& timezone) { return CTimezone::instance().timezoneName(timezone); } -std::string CTimezone::stdAbbrev() const -{ +std::string CTimezone::stdAbbrev() const { CScopedFastLock lock(m_Mutex); return ::tzname[0]; } -std::string CTimezone::dstAbbrev() const -{ +std::string CTimezone::dstAbbrev() const { CScopedFastLock lock(m_Mutex); return ::tzname[1]; } -core_t::TTime CTimezone::localToUtc(struct tm &localTime) const -{ +core_t::TTime CTimezone::localToUtc(struct tm& localTime) const { return ::mktime(&localTime); } -bool CTimezone::utcToLocal(core_t::TTime utcTime, struct tm &localTime) const -{ - if (::localtime_r(&utcTime, &localTime) == 0) - { +bool CTimezone::utcToLocal(core_t::TTime utcTime, struct tm& localTime) const { + if (::localtime_r(&utcTime, &localTime) == 0) { return false; } return true; } bool CTimezone::dateFields(core_t::TTime utcTime, - int &daysSinceSunday, - int &dayOfMonth, - int &daysSinceJanuary1st, - int &monthsSinceJanuary, - int &yearsSince1900, - int &secondsSinceMidnight) const -{ + int& daysSinceSunday, + int& dayOfMonth, + int& daysSinceJanuary1st, + int& monthsSinceJanuary, + int& yearsSince1900, + int& secondsSinceMidnight) const { daysSinceSunday = -1; dayOfMonth = -1; daysSinceJanuary1st = -1; @@ -120,8 +100,7 @@ bool CTimezone::dateFields(core_t::TTime utcTime, struct tm result; // core_t::TTime holds an epoch time (UTC) - if (this->utcToLocal(utcTime, result)) - { + if (this->utcToLocal(utcTime, result)) { daysSinceSunday = result.tm_wday; dayOfMonth = result.tm_mday; monthsSinceJanuary = result.tm_mon; @@ -133,7 +112,5 @@ bool CTimezone::dateFields(core_t::TTime utcTime, return false; } - } } - diff --git a/lib/core/CTimezone_Windows.cc b/lib/core/CTimezone_Windows.cc index 33eebd92cd..e9861d0ddb 100644 --- a/lib/core/CTimezone_Windows.cc +++ b/lib/core/CTimezone_Windows.cc @@ -20,24 +20,18 @@ #include #include - -namespace -{ +namespace { // To ensure the singleton is constructed before multiple threads may require it // call instance() during the static initialisation phase of the program. Of // course, the instance may already be constructed before this if another static // object has used it. -const ml::core::CTimezone &DO_NOT_USE_THIS_VARIABLE = - ml::core::CTimezone::instance(); +const ml::core::CTimezone& DO_NOT_USE_THIS_VARIABLE = ml::core::CTimezone::instance(); } -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { -CTimezone::CTimezone() -{ +CTimezone::CTimezone() { CScopedFastLock lock(m_Mutex); // We never want to use the Visual C++ runtime library's timezone switching @@ -45,8 +39,7 @@ CTimezone::CTimezone() // environment variable is unset, so that the operating system settings are // obtained and used by the C runtime. Timezones other than the current // operating system timezone will be dealt with using Boost. - if (::getenv("TZ") != 0) - { + if (::getenv("TZ") != 0) { ::_putenv_s("TZ", ""); } @@ -55,50 +48,37 @@ CTimezone::CTimezone() // Try to load the Boost timezone database std::string path(CResourceLocator::resourceDir()); path += "/date_time_zonespec.csv"; - try - { + try { m_TimezoneDb.load_from_file(path); - } - catch (std::exception &ex) - { - LOG_ERROR("Failed to load Boost timezone database from " << path << - " : " << ex.what()); - } + } catch (std::exception& ex) { LOG_ERROR("Failed to load Boost timezone database from " << path << " : " << ex.what()); } } -CTimezone::~CTimezone() -{ +CTimezone::~CTimezone() { } -CTimezone &CTimezone::instance() -{ +CTimezone& CTimezone::instance() { static CTimezone instance; return instance; } -const std::string &CTimezone::timezoneName() const -{ +const std::string& CTimezone::timezoneName() const { CScopedFastLock lock(m_Mutex); return m_Name; } -bool CTimezone::timezoneName(const std::string &name) -{ +bool CTimezone::timezoneName(const std::string& name) { CScopedFastLock lock(m_Mutex); - if (name.empty()) - { + if (name.empty()) { m_Timezone.reset(); m_Name.clear(); return true; } m_Timezone = m_TimezoneDb.time_zone_from_region(name); - if (m_Timezone == 0) - { - LOG_ERROR("Unable to set timezone to " << name << - " - operating system timezone settings will be used instead"); + if (m_Timezone == 0) { + LOG_ERROR("Unable to set timezone to " << name << " - operating system timezone settings will be used instead"); m_Name.clear(); return false; @@ -109,41 +89,34 @@ bool CTimezone::timezoneName(const std::string &name) return true; } -bool CTimezone::setTimezone(const std::string &timezone) -{ +bool CTimezone::setTimezone(const std::string& timezone) { return CTimezone::instance().timezoneName(timezone); } -std::string CTimezone::stdAbbrev() const -{ +std::string CTimezone::stdAbbrev() const { CScopedFastLock lock(m_Mutex); - if (m_Timezone == 0) - { + if (m_Timezone == 0) { return _tzname[0]; } return m_Timezone->std_zone_abbrev(); } -std::string CTimezone::dstAbbrev() const -{ +std::string CTimezone::dstAbbrev() const { CScopedFastLock lock(m_Mutex); - if (m_Timezone == 0) - { + if (m_Timezone == 0) { return _tzname[1]; } return m_Timezone->has_dst() ? m_Timezone->dst_zone_abbrev() : m_Timezone->std_zone_abbrev(); } -core_t::TTime CTimezone::localToUtc(struct tm &localTime) const -{ +core_t::TTime CTimezone::localToUtc(struct tm& localTime) const { CScopedFastLock lock(m_Mutex); - if (m_Timezone == 0) - { + if (m_Timezone == 0) { // We're using operating system timezone settings, so use the C // runtime's result return ::mktime(&localTime); @@ -152,9 +125,7 @@ core_t::TTime CTimezone::localToUtc(struct tm &localTime) const // The timezone for this program has been explicitly set, and might not // be the same as the operating system timezone, so use Boost - static const boost::posix_time::ptime EPOCH(boost::gregorian::date(1970, - 1, - 1)); + static const boost::posix_time::ptime EPOCH(boost::gregorian::date(1970, 1, 1)); boost::gregorian::date dateIn(boost::gregorian::date_from_tm(localTime)); boost::posix_time::time_duration timeIn(static_cast(localTime.tm_hour), @@ -162,28 +133,17 @@ core_t::TTime CTimezone::localToUtc(struct tm &localTime) const static_cast(localTime.tm_sec)); boost::posix_time::time_duration diff; - try - { - boost::local_time::local_date_time boostLocal(dateIn, - timeIn, - m_Timezone, - boost::local_time::local_date_time::EXCEPTION_ON_ERROR); + try { + boost::local_time::local_date_time boostLocal(dateIn, timeIn, m_Timezone, boost::local_time::local_date_time::EXCEPTION_ON_ERROR); diff = boostLocal.utc_time() - EPOCH; localTime.tm_isdst = (boostLocal.is_dst() ? 1 : 0); - } - catch (boost::local_time::ambiguous_result &) - { + } catch (boost::local_time::ambiguous_result&) { // If we get an ambiguous time, assume it's standard, not daylight // savings - boost::local_time::local_date_time boostLocal(dateIn, - timeIn, - m_Timezone, - false); + boost::local_time::local_date_time boostLocal(dateIn, timeIn, m_Timezone, false); diff = boostLocal.utc_time() - EPOCH; localTime.tm_isdst = 0; - } - catch (std::exception &ex) - { + } catch (std::exception& ex) { // Any other exception represents an error in the input LOG_ERROR("Error converting local time to UTC : " << ex.what()); errno = EINVAL; @@ -193,15 +153,12 @@ core_t::TTime CTimezone::localToUtc(struct tm &localTime) const return diff.total_seconds(); } -bool CTimezone::utcToLocal(core_t::TTime utcTime, struct tm &localTime) const -{ +bool CTimezone::utcToLocal(core_t::TTime utcTime, struct tm& localTime) const { CScopedFastLock lock(m_Mutex); - if (m_Timezone == 0) - { + if (m_Timezone == 0) { // We're using operating system timezone settings, so use the C runtime - if (::localtime_s(&localTime, &utcTime) != 0) - { + if (::localtime_s(&localTime, &utcTime) != 0) { return false; } return true; @@ -217,13 +174,12 @@ bool CTimezone::utcToLocal(core_t::TTime utcTime, struct tm &localTime) const } bool CTimezone::dateFields(core_t::TTime utcTime, - int &daysSinceSunday, - int &dayOfMonth, - int &daysSinceJanuary1st, - int &monthsSinceJanuary, - int &yearsSince1900, - int &secondsSinceMidnight) const -{ + int& daysSinceSunday, + int& dayOfMonth, + int& daysSinceJanuary1st, + int& monthsSinceJanuary, + int& yearsSince1900, + int& secondsSinceMidnight) const { daysSinceSunday = -1; dayOfMonth = -1; daysSinceJanuary1st = -1; @@ -234,8 +190,7 @@ bool CTimezone::dateFields(core_t::TTime utcTime, struct tm result; // core_t::TTime holds an epoch time (UTC) - if (this->utcToLocal(utcTime, result)) - { + if (this->utcToLocal(utcTime, result)) { daysSinceSunday = result.tm_wday; dayOfMonth = result.tm_mday; monthsSinceJanuary = result.tm_mon; @@ -247,7 +202,5 @@ bool CTimezone::dateFields(core_t::TTime utcTime, return false; } - } } - diff --git a/lib/core/CUnSetEnv.cc b/lib/core/CUnSetEnv.cc index e5fdb84bfa..7d4e2bc6a7 100644 --- a/lib/core/CUnSetEnv.cc +++ b/lib/core/CUnSetEnv.cc @@ -7,19 +7,11 @@ #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -int CUnSetEnv::unSetEnv(const char *name) -{ +int CUnSetEnv::unSetEnv(const char* name) { return ::unsetenv(name); } - - } } - diff --git a/lib/core/CUnSetEnv_Windows.cc b/lib/core/CUnSetEnv_Windows.cc index dab2b1f1d1..436f8b51e4 100644 --- a/lib/core/CUnSetEnv_Windows.cc +++ b/lib/core/CUnSetEnv_Windows.cc @@ -7,21 +7,13 @@ #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -int CUnSetEnv::unSetEnv(const char *name) -{ +int CUnSetEnv::unSetEnv(const char* name) { // The Microsoft C runtime library treats a request to set an environment // variable to an empty string as a request to delete it return ::_putenv_s(name, ""); } - - } } - diff --git a/lib/core/CUname.cc b/lib/core/CUname.cc index 946e509ba8..3ee4579aaf 100644 --- a/lib/core/CUname.cc +++ b/lib/core/CUname.cc @@ -12,55 +12,45 @@ #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -std::string CUname::sysName() -{ +std::string CUname::sysName() { struct utsname name; ::uname(&name); return name.sysname; } -std::string CUname::nodeName() -{ +std::string CUname::nodeName() { struct utsname name; ::uname(&name); return name.nodename; } -std::string CUname::release() -{ +std::string CUname::release() { struct utsname name; ::uname(&name); return name.release; } -std::string CUname::version() -{ +std::string CUname::version() { struct utsname name; ::uname(&name); return name.version; } -std::string CUname::machine() -{ +std::string CUname::machine() { struct utsname name; ::uname(&name); return name.machine; } -std::string CUname::all() -{ +std::string CUname::all() { struct utsname name; ::uname(&name); @@ -78,8 +68,7 @@ std::string CUname::all() return all; } -std::string CUname::mlPlatform() -{ +std::string CUname::mlPlatform() { struct utsname name; ::uname(&name); @@ -98,40 +87,31 @@ std::string CUname::mlPlatform() std::string os(CStringUtils::toLower(name.sysname)); #ifdef _CS_GNU_LIBC_VERSION - if (os == "linux") - { - char buffer[128] = { '\0' }; + if (os == "linux") { + char buffer[128] = {'\0'}; // This isn't great because it's assuming that any C runtime library // that doesn't identify itself as glibc is musl, but it's hard to do // better as musl goes out of its way to be hard to detect - if (::confstr(_CS_GNU_LIBC_VERSION, buffer, sizeof(buffer)) == 0 || - ::strstr(buffer, "glibc") == 0) - { + if (::confstr(_CS_GNU_LIBC_VERSION, buffer, sizeof(buffer)) == 0 || ::strstr(buffer, "glibc") == 0) { os += "-musl"; } } #endif - const std::string &machine = CStringUtils::toLower(name.machine); - if (machine.length() == 4 && machine[0] == 'i' && machine[2] == '8' && machine[3] == '6') - { + const std::string& machine = CStringUtils::toLower(name.machine); + if (machine.length() == 4 && machine[0] == 'i' && machine[2] == '8' && machine[3] == '6') { return os + "-x86"; } - if (machine == "amd64" || machine == "i86pc") - { + if (machine == "amd64" || machine == "i86pc") { return os + "-x86_64"; } return os + '-' + machine; } -std::string CUname::mlOsVer() -{ +std::string CUname::mlOsVer() { return CUname::release(); } - - } } - diff --git a/lib/core/CUname_Windows.cc b/lib/core/CUname_Windows.cc index ca8ca305fc..8d299eaf0e 100644 --- a/lib/core/CUname_Windows.cc +++ b/lib/core/CUname_Windows.cc @@ -14,60 +14,43 @@ #include #include +namespace ml { +namespace core { +namespace detail { -namespace ml -{ -namespace core -{ -namespace detail -{ - - -bool queryKernelVersion(uint16_t &major, uint16_t &minor, uint16_t &build) -{ +bool queryKernelVersion(uint16_t& major, uint16_t& minor, uint16_t& build) { // This used to be done with GetVersionEx(), but that no longer works // starting with Windows 8.1/Windows Server 2012r2. Instead we get the // true OS version by looking at the product version for kernel32.dll, and // then distinguish client/server versions of Windows using // VerifyVersionInfo(). - static const char *KERNEL32_DLL("kernel32.dll"); + static const char* KERNEL32_DLL("kernel32.dll"); DWORD handle(0); DWORD size(GetFileVersionInfoSize(KERNEL32_DLL, &handle)); - if (size == 0) - { - LOG_ERROR("Error getting file version info size for " << KERNEL32_DLL << - " - error code : " << CWindowsError()); + if (size == 0) { + LOG_ERROR("Error getting file version info size for " << KERNEL32_DLL << " - error code : " << CWindowsError()); return false; } using TScopedCharArray = boost::scoped_array; TScopedCharArray buffer(new char[size]); - if (GetFileVersionInfo(KERNEL32_DLL, handle, size, buffer.get()) == FALSE) - { - LOG_ERROR("Error getting file version info for " << KERNEL32_DLL << - " - error code : " << CWindowsError()); + if (GetFileVersionInfo(KERNEL32_DLL, handle, size, buffer.get()) == FALSE) { + LOG_ERROR("Error getting file version info for " << KERNEL32_DLL << " - error code : " << CWindowsError()); return false; } UINT len(0); - VS_FIXEDFILEINFO *fixedFileInfo(0); - if (VerQueryValue(buffer.get(), - "\\", - reinterpret_cast(&fixedFileInfo), - &len) == FALSE) - { - LOG_ERROR("Error querying fixed file info for " << KERNEL32_DLL << - " - error code : " << CWindowsError()); + VS_FIXEDFILEINFO* fixedFileInfo(0); + if (VerQueryValue(buffer.get(), "\\", reinterpret_cast(&fixedFileInfo), &len) == FALSE) { + LOG_ERROR("Error querying fixed file info for " << KERNEL32_DLL << " - error code : " << CWindowsError()); return false; } - if (len < sizeof(VS_FIXEDFILEINFO)) - { - LOG_ERROR("Too little data returned for VS_FIXEDFILEINFO - " << - "expected " << sizeof(VS_FIXEDFILEINFO) << " bytes, got " << - len); + if (len < sizeof(VS_FIXEDFILEINFO)) { + LOG_ERROR("Too little data returned for VS_FIXEDFILEINFO - " + << "expected " << sizeof(VS_FIXEDFILEINFO) << " bytes, got " << len); return false; } @@ -77,51 +60,38 @@ bool queryKernelVersion(uint16_t &major, uint16_t &minor, uint16_t &build) return true; } - - } - -std::string CUname::sysName() -{ +std::string CUname::sysName() { return "Windows"; } -std::string CUname::nodeName() -{ +std::string CUname::nodeName() { // First ask with a size of zero to find the required size DWORD size(0); BOOL res(GetComputerNameEx(ComputerNameDnsHostname, 0, &size)); - if (res != FALSE || GetLastError() != ERROR_MORE_DATA) - { - LOG_ERROR("Error getting computer name length - error code : " << - CWindowsError()); + if (res != FALSE || GetLastError() != ERROR_MORE_DATA) { + LOG_ERROR("Error getting computer name length - error code : " << CWindowsError()); return std::string(); } using TCharVec = std::vector; TCharVec buffer(size); - res = GetComputerNameEx(ComputerNameDnsHostname, - &buffer[0], - &size); - if (res == FALSE) - { - LOG_ERROR("Error getting computer name - error code : " << - CWindowsError()); + res = GetComputerNameEx(ComputerNameDnsHostname, &buffer[0], &size); + if (res == FALSE) { + LOG_ERROR("Error getting computer name - error code : " << CWindowsError()); return std::string(); } return std::string(buffer.begin(), buffer.begin() + size); } -std::string CUname::release() -{ +std::string CUname::release() { uint16_t major(0); uint16_t minor(0); uint16_t build(0); - if (detail::queryKernelVersion(major, minor, build) == false) - { + if (detail::queryKernelVersion(major, minor, build) == false) { // Error logging done in the helper function return std::string(); } @@ -132,13 +102,11 @@ std::string CUname::release() return strm.str(); } -std::string CUname::version() -{ +std::string CUname::version() { uint16_t major(0); uint16_t minor(0); uint16_t build(0); - if (detail::queryKernelVersion(major, minor, build) == false) - { + if (detail::queryKernelVersion(major, minor, build) == false) { // Error logging done in the helper function return std::string(); } @@ -148,41 +116,23 @@ std::string CUname::version() // Client and server builds of Windows share the same version numbers, so // determine whether this is a client or server - OSVERSIONINFOEX versionInfoEx = { 0 }; + OSVERSIONINFOEX versionInfoEx = {0}; versionInfoEx.dwOSVersionInfoSize = sizeof(OSVERSIONINFOEX); DWORDLONG conditionMask(0); versionInfoEx.wProductType = VER_NT_DOMAIN_CONTROLLER; - if (VerifyVersionInfo(&versionInfoEx, - VER_PRODUCT_TYPE, - VerSetConditionMask(conditionMask, - VER_PRODUCT_TYPE, - VER_EQUAL)) != FALSE) - { + if (VerifyVersionInfo(&versionInfoEx, VER_PRODUCT_TYPE, VerSetConditionMask(conditionMask, VER_PRODUCT_TYPE, VER_EQUAL)) != FALSE) { strm << " (Domain Controller)"; - } - else - { + } else { conditionMask = 0; versionInfoEx.wProductType = VER_NT_SERVER; - if (VerifyVersionInfo(&versionInfoEx, - VER_PRODUCT_TYPE, - VerSetConditionMask(conditionMask, - VER_PRODUCT_TYPE, - VER_EQUAL)) != FALSE) - { + if (VerifyVersionInfo(&versionInfoEx, VER_PRODUCT_TYPE, VerSetConditionMask(conditionMask, VER_PRODUCT_TYPE, VER_EQUAL)) != FALSE) { strm << " (Server)"; - } - else - { + } else { conditionMask = 0; versionInfoEx.wProductType = VER_NT_WORKSTATION; - if (VerifyVersionInfo(&versionInfoEx, - VER_PRODUCT_TYPE, - VerSetConditionMask(conditionMask, - VER_PRODUCT_TYPE, - VER_EQUAL)) != FALSE) - { + if (VerifyVersionInfo(&versionInfoEx, VER_PRODUCT_TYPE, VerSetConditionMask(conditionMask, VER_PRODUCT_TYPE, VER_EQUAL)) != + FALSE) { strm << " (Workstation)"; } } @@ -193,39 +143,36 @@ std::string CUname::version() return strm.str(); } -std::string CUname::machine() -{ +std::string CUname::machine() { SYSTEM_INFO systemInfo; GetNativeSystemInfo(&systemInfo); std::string result; - switch (systemInfo.wProcessorArchitecture) - { - case PROCESSOR_ARCHITECTURE_AMD64: - result = "x64"; - break; - case PROCESSOR_ARCHITECTURE_IA64: - result = "itanium"; - break; - case PROCESSOR_ARCHITECTURE_INTEL: - result = "x86"; - break; - case PROCESSOR_ARCHITECTURE_UNKNOWN: - result = "unknown"; - break; - default: - LOG_ERROR("Unexpected result from GetNativeSystemInfo() : " - "wProcessorArchitecture = " << - systemInfo.wProcessorArchitecture); - break; + switch (systemInfo.wProcessorArchitecture) { + case PROCESSOR_ARCHITECTURE_AMD64: + result = "x64"; + break; + case PROCESSOR_ARCHITECTURE_IA64: + result = "itanium"; + break; + case PROCESSOR_ARCHITECTURE_INTEL: + result = "x86"; + break; + case PROCESSOR_ARCHITECTURE_UNKNOWN: + result = "unknown"; + break; + default: + LOG_ERROR("Unexpected result from GetNativeSystemInfo() : " + "wProcessorArchitecture = " + << systemInfo.wProcessorArchitecture); + break; } return result; } -std::string CUname::all() -{ +std::string CUname::all() { // This is in the format of "uname -a" std::string all(CUname::sysName()); all += ' '; @@ -240,25 +187,21 @@ std::string CUname::all() return all; } -std::string CUname::mlPlatform() -{ +std::string CUname::mlPlatform() { // Determine the current platform name, in the format used by Kibana // downloads. For Windows this is either "windows-x86" or "windows-x86_64". - if (sizeof(void *) == 8) - { + if (sizeof(void*) == 8) { return "windows-x86_64"; } return "windows-x86"; } -std::string CUname::mlOsVer() -{ +std::string CUname::mlOsVer() { uint16_t major(0); uint16_t minor(0); uint16_t build(0); - if (detail::queryKernelVersion(major, minor, build) == false) - { + if (detail::queryKernelVersion(major, minor, build) == false) { // Error logging done in the helper function return std::string(); } @@ -268,8 +211,5 @@ std::string CUname::mlOsVer() return strm.str(); } - - } } - diff --git a/lib/core/CWindowsError.cc b/lib/core/CWindowsError.cc index 837f4095cd..b77817aa40 100644 --- a/lib/core/CWindowsError.cc +++ b/lib/core/CWindowsError.cc @@ -7,41 +7,26 @@ #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -CWindowsError::CWindowsError() - : m_ErrorCode(0) -{ +CWindowsError::CWindowsError() : m_ErrorCode(0) { } -CWindowsError::CWindowsError(uint32_t /* errorCode */) - : m_ErrorCode(0) -{ +CWindowsError::CWindowsError(uint32_t /* errorCode */) : m_ErrorCode(0) { } -uint32_t CWindowsError::errorCode() const -{ +uint32_t CWindowsError::errorCode() const { return m_ErrorCode; } -std::string CWindowsError::errorString() const -{ +std::string CWindowsError::errorString() const { return "Asking for Windows error message on Unix!"; } -std::ostream &operator<<(std::ostream &os, - const CWindowsError & /* windowsError */) -{ +std::ostream& operator<<(std::ostream& os, const CWindowsError& /* windowsError */) { os << "Asking for Windows error message on Unix!"; return os; } - - } } - diff --git a/lib/core/CWindowsError_Windows.cc b/lib/core/CWindowsError_Windows.cc index 981c2a5ad9..6783bdfc27 100644 --- a/lib/core/CWindowsError_Windows.cc +++ b/lib/core/CWindowsError_Windows.cc @@ -10,36 +10,25 @@ #include -namespace -{ +namespace { static const size_t BUFFER_SIZE(1024); } +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -CWindowsError::CWindowsError() - : m_ErrorCode(GetLastError()) -{ +CWindowsError::CWindowsError() : m_ErrorCode(GetLastError()) { } -CWindowsError::CWindowsError(uint32_t errorCode) - : m_ErrorCode(errorCode) -{ +CWindowsError::CWindowsError(uint32_t errorCode) : m_ErrorCode(errorCode) { } -uint32_t CWindowsError::errorCode() const -{ +uint32_t CWindowsError::errorCode() const { return m_ErrorCode; } -std::string CWindowsError::errorString() const -{ - char message[BUFFER_SIZE] = { '\0' }; +std::string CWindowsError::errorString() const { + char message[BUFFER_SIZE] = {'\0'}; DWORD msgLen(FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_MAX_WIDTH_MASK, 0, @@ -48,18 +37,15 @@ std::string CWindowsError::errorString() const message, BUFFER_SIZE, 0)); - if (msgLen == 0) - { + if (msgLen == 0) { return "unknown error code (" + CStringUtils::typeToString(m_ErrorCode) + ')'; } return message; } -std::ostream &operator<<(std::ostream &os, - const CWindowsError &windowsError) -{ - char message[BUFFER_SIZE] = { '\0' }; +std::ostream& operator<<(std::ostream& os, const CWindowsError& windowsError) { + char message[BUFFER_SIZE] = {'\0'}; DWORD msgLen(FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_MAX_WIDTH_MASK, 0, @@ -68,19 +54,13 @@ std::ostream &operator<<(std::ostream &os, message, BUFFER_SIZE, 0)); - if (msgLen == 0) - { + if (msgLen == 0) { os << "unknown error code (" << windowsError.m_ErrorCode << ')'; - } - else - { + } else { os << message; } return os; } - - } } - diff --git a/lib/core/CWordDictionary.cc b/lib/core/CWordDictionary.cc index dc3a23da97..2a55eaae09 100644 --- a/lib/core/CWordDictionary.cc +++ b/lib/core/CWordDictionary.cc @@ -15,78 +15,63 @@ #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -namespace -{ - +namespace { const char PART_OF_SPEECH_SEPARATOR('@'); - -CWordDictionary::EPartOfSpeech partOfSpeechFromCode(char partOfSpeechCode) -{ +CWordDictionary::EPartOfSpeech partOfSpeechFromCode(char partOfSpeechCode) { // These codes are taken from the readme file that comes with Moby // Part-of-Speech - see http://icon.shef.ac.uk/Moby/mpos.html - switch (partOfSpeechCode) - { - case '?': - // This means the word existed in SCOWL but not Moby and none of the - // heuristics in crossref.py worked - return CWordDictionary::E_UnknownPart; - case 'N': - case 'h': // Currently don't distinguish noun phrases - case 'o': // Currently don't distinguish nominative nouns - return CWordDictionary::E_Noun; - case 'p': - return CWordDictionary::E_Plural; - case 'V': - case 't': // Currently don't distinguish transitive verbs - case 'i': // Currently don't distinguish intransitive verbs - return CWordDictionary::E_Verb; - case 'A': - return CWordDictionary::E_Adjective; - case 'v': - return CWordDictionary::E_Adverb; - case 'C': - return CWordDictionary::E_Conjunction; - case 'P': - return CWordDictionary::E_Preposition; - case '!': - return CWordDictionary::E_Interjection; - case 'r': - return CWordDictionary::E_Pronoun; - case 'D': - return CWordDictionary::E_DefiniteArticle; - case 'I': - return CWordDictionary::E_IndefiniteArticle; - default: - break; + switch (partOfSpeechCode) { + case '?': + // This means the word existed in SCOWL but not Moby and none of the + // heuristics in crossref.py worked + return CWordDictionary::E_UnknownPart; + case 'N': + case 'h': // Currently don't distinguish noun phrases + case 'o': // Currently don't distinguish nominative nouns + return CWordDictionary::E_Noun; + case 'p': + return CWordDictionary::E_Plural; + case 'V': + case 't': // Currently don't distinguish transitive verbs + case 'i': // Currently don't distinguish intransitive verbs + return CWordDictionary::E_Verb; + case 'A': + return CWordDictionary::E_Adjective; + case 'v': + return CWordDictionary::E_Adverb; + case 'C': + return CWordDictionary::E_Conjunction; + case 'P': + return CWordDictionary::E_Preposition; + case '!': + return CWordDictionary::E_Interjection; + case 'r': + return CWordDictionary::E_Pronoun; + case 'D': + return CWordDictionary::E_DefiniteArticle; + case 'I': + return CWordDictionary::E_IndefiniteArticle; + default: + break; } // This should be treated as an error when returned by this function return CWordDictionary::E_NotInDictionary; } - - } +const char* CWordDictionary::DICTIONARY_FILE("ml-en.dict"); -const char *CWordDictionary::DICTIONARY_FILE("ml-en.dict"); +CFastMutex CWordDictionary::ms_LoadMutex; +volatile CWordDictionary* CWordDictionary::ms_Instance(0); -CFastMutex CWordDictionary::ms_LoadMutex; -volatile CWordDictionary *CWordDictionary::ms_Instance(0); - - -const CWordDictionary &CWordDictionary::instance() -{ - if (ms_Instance == 0) - { +const CWordDictionary& CWordDictionary::instance() { + if (ms_Instance == 0) { CScopedFastLock lock(ms_LoadMutex); // Even if we get into this code block in more than one thread, whatever @@ -99,97 +84,73 @@ const CWordDictionary &CWordDictionary::instance() } // Need to explicitly cast away volatility - return *const_cast(ms_Instance); + return *const_cast(ms_Instance); } -bool CWordDictionary::isInDictionary(const std::string &str) const -{ +bool CWordDictionary::isInDictionary(const std::string& str) const { return m_DictionaryWords.find(str) != m_DictionaryWords.end(); } -CWordDictionary::EPartOfSpeech CWordDictionary::partOfSpeech(const std::string &str) const -{ +CWordDictionary::EPartOfSpeech CWordDictionary::partOfSpeech(const std::string& str) const { TStrUMapCItr iter = m_DictionaryWords.find(str); - if (iter == m_DictionaryWords.end()) - { + if (iter == m_DictionaryWords.end()) { return E_NotInDictionary; } return iter->second; } -CWordDictionary::CWordDictionary() -{ +CWordDictionary::CWordDictionary() { std::string fileToLoad(CResourceLocator::resourceDir() + '/' + DICTIONARY_FILE); // If the file can't be read for some reason, we just end up with an empty // dictionary std::ifstream ifs(fileToLoad.c_str()); - if (ifs.is_open()) - { - LOG_DEBUG("Populating word dictionary from file " << - fileToLoad); + if (ifs.is_open()) { + LOG_DEBUG("Populating word dictionary from file " << fileToLoad); std::string word; - while (std::getline(ifs, word)) - { + while (std::getline(ifs, word)) { CStringUtils::trimWhitespace(word); - if (word.empty()) - { + if (word.empty()) { continue; } size_t sepPos(word.find(PART_OF_SPEECH_SEPARATOR)); - if (sepPos == std::string::npos) - { - LOG_ERROR("Found word with no part-of-speech separator: " << - word); + if (sepPos == std::string::npos) { + LOG_ERROR("Found word with no part-of-speech separator: " << word); continue; } - if (sepPos == 0) - { - LOG_ERROR("Found part-of-speech separator with no preceding word: " << - word); + if (sepPos == 0) { + LOG_ERROR("Found part-of-speech separator with no preceding word: " << word); continue; } - if (sepPos + 1 >= word.length()) - { - LOG_ERROR("Found word with no part-of-speech code: " << - word); + if (sepPos + 1 >= word.length()) { + LOG_ERROR("Found word with no part-of-speech code: " << word); continue; } char partOfSpeechCode(word[sepPos + 1]); EPartOfSpeech partOfSpeech(partOfSpeechFromCode(partOfSpeechCode)); - if (partOfSpeech == E_NotInDictionary) - { - LOG_ERROR("Unknown part-of-speech code (" << partOfSpeechCode << - ") for word: " << word); + if (partOfSpeech == E_NotInDictionary) { + LOG_ERROR("Unknown part-of-speech code (" << partOfSpeechCode << ") for word: " << word); continue; } word.erase(sepPos); m_DictionaryWords[word] = partOfSpeech; } - LOG_DEBUG("Populated word dictionary with " << - m_DictionaryWords.size() << " words"); - } - else - { + LOG_DEBUG("Populated word dictionary with " << m_DictionaryWords.size() << " words"); + } else { LOG_ERROR("Failed to open dictionary file " << fileToLoad); } } -CWordDictionary::~CWordDictionary() -{ +CWordDictionary::~CWordDictionary() { ms_Instance = 0; } -size_t CWordDictionary::CStrHashIgnoreCase::operator()(const std::string &str) const -{ +size_t CWordDictionary::CStrHashIgnoreCase::operator()(const std::string& str) const { size_t hash(0); - for (std::string::const_iterator iter = str.begin(); - iter != str.end(); - ++iter) - { + for (std::string::const_iterator iter = str.begin(); iter != str.end(); ++iter) { hash *= 17; hash += ::tolower(*iter); } @@ -197,14 +158,8 @@ size_t CWordDictionary::CStrHashIgnoreCase::operator()(const std::string &str) c return hash; } -bool CWordDictionary::CStrEqualIgnoreCase::operator()(const std::string &lhs, - const std::string &rhs) const -{ - return lhs.length() == rhs.length() && - CStrCaseCmp::strCaseCmp(lhs.c_str(), rhs.c_str()) == 0; +bool CWordDictionary::CStrEqualIgnoreCase::operator()(const std::string& lhs, const std::string& rhs) const { + return lhs.length() == rhs.length() && CStrCaseCmp::strCaseCmp(lhs.c_str(), rhs.c_str()) == 0; } - - } } - diff --git a/lib/core/CWordExtractor.cc b/lib/core/CWordExtractor.cc index 7eed74858f..6aa75c5342 100644 --- a/lib/core/CWordExtractor.cc +++ b/lib/core/CWordExtractor.cc @@ -9,26 +9,16 @@ #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { const std::string CWordExtractor::PUNCT_CHARS("!\"'(),-./:;?[]`"); - -void CWordExtractor::extractWordsFromMessage(const std::string &message, - std::string &messageWords) -{ +void CWordExtractor::extractWordsFromMessage(const std::string& message, std::string& messageWords) { CWordExtractor::extractWordsFromMessage(1, message, messageWords); } -void CWordExtractor::extractWordsFromMessage(size_t minConsecutive, - const std::string &message, - std::string &messageWords) -{ +void CWordExtractor::extractWordsFromMessage(size_t minConsecutive, const std::string& message, std::string& messageWords) { // Words are taken to be sub-strings of 1 or more letters, all lower case // except possibly the first, preceded by a space, and followed by 0 or 1 // punctuation characters and then a space (or the end of the string). @@ -49,31 +39,22 @@ void CWordExtractor::extractWordsFromMessage(size_t minConsecutive, size_t punctCount(0); bool inWord(false); std::string curWord; - const CWordDictionary &dict = CWordDictionary::instance(); - for (size_t messagePos = 0; messagePos < messageLen; ++messagePos) - { + const CWordDictionary& dict = CWordDictionary::instance(); + for (size_t messagePos = 0; messagePos < messageLen; ++messagePos) { char thisChar(message[messagePos]); bool rollback(false); - if (::isspace(static_cast(thisChar))) - { - if (inWord && punctCount <= 1) - { - if (dict.isInDictionary(curWord)) - { - messageWords.append(message, - wordStartPos, - messagePos - spaceCount - punctCount - wordStartPos); + if (::isspace(static_cast(thisChar))) { + if (inWord && punctCount <= 1) { + if (dict.isInDictionary(curWord)) { + messageWords.append(message, wordStartPos, messagePos - spaceCount - punctCount - wordStartPos); messageWords += ' '; ++consecutive; - if (consecutive >= minConsecutive) - { + if (consecutive >= minConsecutive) { rollbackPos = messageWords.length(); } - } - else - { + } else { rollback = true; } } @@ -84,91 +65,64 @@ void CWordExtractor::extractWordsFromMessage(size_t minConsecutive, } // Not using ::ispunct() here, as its definition of punctuation is too // permissive (basically anything that's not a letter, number or space) - else if (PUNCT_CHARS.find(thisChar) != std::string::npos) - { + else if (PUNCT_CHARS.find(thisChar) != std::string::npos) { ++punctCount; - if (punctCount > 1) - { + if (punctCount > 1) { rollback = true; } - } - else if (::isalpha(static_cast(thisChar))) - { - if (punctCount == 0) - { - if (inWord) - { - if (::isupper(static_cast(thisChar))) - { + } else if (::isalpha(static_cast(thisChar))) { + if (punctCount == 0) { + if (inWord) { + if (::isupper(static_cast(thisChar))) { inWord = false; rollback = true; - } - else - { + } else { curWord += thisChar; } - } - else - { - if (spaceCount > 0) - { + } else { + if (spaceCount > 0) { inWord = true; wordStartPos = messagePos; curWord = thisChar; } } - } - else - { + } else { inWord = false; rollback = true; } spaceCount = 0; punctCount = 0; - } - else - { + } else { spaceCount = 0; punctCount = 0; inWord = false; rollback = true; } - if (rollback) - { + if (rollback) { messageWords.erase(rollbackPos); consecutive = 0; } } - if (inWord && punctCount <= 1 && dict.isInDictionary(curWord)) - { + if (inWord && punctCount <= 1 && dict.isInDictionary(curWord)) { ++consecutive; - if (consecutive >= minConsecutive) - { - messageWords.append(message, - wordStartPos, - message.length() - wordStartPos - punctCount); + if (consecutive >= minConsecutive) { + messageWords.append(message, wordStartPos, message.length() - wordStartPos - punctCount); messageWords += ' '; rollbackPos = messageWords.length(); } } - if (rollbackPos == 0) - { + if (rollbackPos == 0) { messageWords.clear(); - } - else - { + } else { // Subtract 1 to strip the last space (since the above code always // appends a trailing space after each word) messageWords.erase(rollbackPos - 1); } } - - } } - diff --git a/lib/core/CXmlNode.cc b/lib/core/CXmlNode.cc index cec24dcf4b..909175dbdd 100644 --- a/lib/core/CXmlNode.cc +++ b/lib/core/CXmlNode.cc @@ -5,77 +5,53 @@ */ #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -CXmlNode::CXmlNode() -{ +CXmlNode::CXmlNode() { } -CXmlNode::CXmlNode(const std::string &name) - : m_Name(name) -{ +CXmlNode::CXmlNode(const std::string& name) : m_Name(name) { } -CXmlNode::CXmlNode(const std::string &name, - const std::string &value) - : m_Name(name), - m_Value(value) -{ +CXmlNode::CXmlNode(const std::string& name, const std::string& value) : m_Name(name), m_Value(value) { } -CXmlNode::CXmlNode(const std::string &name, - const std::string &value, - const TStrStrMap &attributes) - : m_Name(name), - m_Value(value), - m_Attributes(attributes.begin(), attributes.end()) -{ +CXmlNode::CXmlNode(const std::string& name, const std::string& value, const TStrStrMap& attributes) + : m_Name(name), m_Value(value), m_Attributes(attributes.begin(), attributes.end()) { } -CXmlNode::~CXmlNode() -{ +CXmlNode::~CXmlNode() { } -const std::string &CXmlNode::name() const -{ +const std::string& CXmlNode::name() const { return m_Name; } -const std::string &CXmlNode::value() const -{ +const std::string& CXmlNode::value() const { return m_Value; } -const CXmlNode::TStrStrPrVec &CXmlNode::attributes() const -{ +const CXmlNode::TStrStrPrVec& CXmlNode::attributes() const { return m_Attributes; } -void CXmlNode::name(const std::string &name) -{ +void CXmlNode::name(const std::string& name) { m_Name = name; } -void CXmlNode::value(const std::string &value) -{ +void CXmlNode::value(const std::string& value) { m_Value = value; } -std::string CXmlNode::dump() const -{ +std::string CXmlNode::dump() const { std::string strRep("name="); strRep += m_Name; strRep += ";value="; strRep += m_Value; strRep += ';'; - for (TStrStrPrVecCItr itr = m_Attributes.begin(); itr != m_Attributes.end(); ++itr) - { + for (TStrStrPrVecCItr itr = m_Attributes.begin(); itr != m_Attributes.end(); ++itr) { strRep += itr->first; strRep += '='; strRep += itr->second; @@ -84,8 +60,5 @@ std::string CXmlNode::dump() const return strRep; } - - } } - diff --git a/lib/core/CXmlNodeWithChildren.cc b/lib/core/CXmlNodeWithChildren.cc index 446394f99f..6ab92c1298 100644 --- a/lib/core/CXmlNodeWithChildren.cc +++ b/lib/core/CXmlNodeWithChildren.cc @@ -9,50 +9,30 @@ #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -CXmlNodeWithChildren::CXmlNodeWithChildren() - : CXmlNode() -{ +CXmlNodeWithChildren::CXmlNodeWithChildren() : CXmlNode() { } -CXmlNodeWithChildren::CXmlNodeWithChildren(const std::string &name) - : CXmlNode(name) -{ +CXmlNodeWithChildren::CXmlNodeWithChildren(const std::string& name) : CXmlNode(name) { } -CXmlNodeWithChildren::CXmlNodeWithChildren(const std::string &name, - const std::string &value) - : CXmlNode(name, value) -{ +CXmlNodeWithChildren::CXmlNodeWithChildren(const std::string& name, const std::string& value) : CXmlNode(name, value) { } -CXmlNodeWithChildren::CXmlNodeWithChildren(const std::string &name, - const std::string &value, - const CXmlNode::TStrStrMap &attributes) - : CXmlNode(name, value, attributes) -{ +CXmlNodeWithChildren::CXmlNodeWithChildren(const std::string& name, const std::string& value, const CXmlNode::TStrStrMap& attributes) + : CXmlNode(name, value, attributes) { } -CXmlNodeWithChildren::CXmlNodeWithChildren(const CXmlNodeWithChildren &arg) - : CXmlNode(arg), - m_Children(arg.m_Children) -{ +CXmlNodeWithChildren::CXmlNodeWithChildren(const CXmlNodeWithChildren& arg) : CXmlNode(arg), m_Children(arg.m_Children) { } -CXmlNodeWithChildren::~CXmlNodeWithChildren() -{ +CXmlNodeWithChildren::~CXmlNodeWithChildren() { } -CXmlNodeWithChildren &CXmlNodeWithChildren::operator=(const CXmlNodeWithChildren &rhs) -{ - if (this != &rhs) - { +CXmlNodeWithChildren& CXmlNodeWithChildren::operator=(const CXmlNodeWithChildren& rhs) { + if (this != &rhs) { this->CXmlNode::operator=(rhs); m_Children = rhs.m_Children; } @@ -60,34 +40,28 @@ CXmlNodeWithChildren &CXmlNodeWithChildren::operator=(const CXmlNodeWithChildren return *this; } -void CXmlNodeWithChildren::addChild(const CXmlNode &child) -{ +void CXmlNodeWithChildren::addChild(const CXmlNode& child) { m_Children.push_back(boost::make_shared()); m_Children.back()->CXmlNode::operator=(child); } -void CXmlNodeWithChildren::addChild(const CXmlNodeWithChildren &child) -{ +void CXmlNodeWithChildren::addChild(const CXmlNodeWithChildren& child) { m_Children.push_back(boost::make_shared(child)); } -void CXmlNodeWithChildren::addChildP(const TXmlNodeWithChildrenP &childP) -{ +void CXmlNodeWithChildren::addChildP(const TXmlNodeWithChildrenP& childP) { m_Children.push_back(childP); } -const CXmlNodeWithChildren::TChildNodePVec &CXmlNodeWithChildren::children() const -{ +const CXmlNodeWithChildren::TChildNodePVec& CXmlNodeWithChildren::children() const { return m_Children; } -std::string CXmlNodeWithChildren::dump() const -{ +std::string CXmlNodeWithChildren::dump() const { return this->dump(0); } -std::string CXmlNodeWithChildren::dump(size_t indent) const -{ +std::string CXmlNodeWithChildren::dump(size_t indent) const { std::string strRep(indent, '\t'); // Call base class dump for name/value/attributes @@ -96,21 +70,14 @@ std::string CXmlNodeWithChildren::dump(size_t indent) const strRep += core_t::LINE_ENDING; // Now add children at next level of indenting - for (TChildNodePVecCItr childIter = m_Children.begin(); - childIter != m_Children.end(); - ++childIter) - { - const CXmlNodeWithChildren *child = childIter->get(); - if (child != 0) - { + for (TChildNodePVecCItr childIter = m_Children.begin(); childIter != m_Children.end(); ++childIter) { + const CXmlNodeWithChildren* child = childIter->get(); + if (child != 0) { strRep += child->dump(indent + 1); } } return strRep; } - - } } - diff --git a/lib/core/CXmlNodeWithChildrenPool.cc b/lib/core/CXmlNodeWithChildrenPool.cc index 0d9f28c996..99318d6a03 100644 --- a/lib/core/CXmlNodeWithChildrenPool.cc +++ b/lib/core/CXmlNodeWithChildrenPool.cc @@ -12,27 +12,17 @@ #include +namespace ml { +namespace core { -namespace ml -{ -namespace core -{ - - -CXmlNodeWithChildrenPool::CXmlNodeWithChildrenPool() - : m_MaxRecycled(m_Recycled.max_size()) -{ +CXmlNodeWithChildrenPool::CXmlNodeWithChildrenPool() : m_MaxRecycled(m_Recycled.max_size()) { } -CXmlNodeWithChildrenPool::CXmlNodeWithChildrenPool(size_t maxRecycled) - : m_MaxRecycled(std::min(maxRecycled, m_Recycled.max_size())) -{ +CXmlNodeWithChildrenPool::CXmlNodeWithChildrenPool(size_t maxRecycled) : m_MaxRecycled(std::min(maxRecycled, m_Recycled.max_size())) { } -CXmlNodeWithChildren::TXmlNodeWithChildrenP CXmlNodeWithChildrenPool::newNode() -{ - if (m_Recycled.empty()) - { +CXmlNodeWithChildren::TXmlNodeWithChildrenP CXmlNodeWithChildrenPool::newNode() { + if (m_Recycled.empty()) { return boost::make_shared(); } @@ -41,9 +31,7 @@ CXmlNodeWithChildren::TXmlNodeWithChildrenP CXmlNodeWithChildrenPool::newNode() return nodePtr; } -CXmlNodeWithChildren::TXmlNodeWithChildrenP CXmlNodeWithChildrenPool::newNode(std::string name, - std::string value) -{ +CXmlNodeWithChildren::TXmlNodeWithChildrenP CXmlNodeWithChildrenPool::newNode(std::string name, std::string value) { CXmlNodeWithChildren::TXmlNodeWithChildrenP nodePtr(this->newNode()); // We take advantage of friendship here to set the node's name and value @@ -53,30 +41,21 @@ CXmlNodeWithChildren::TXmlNodeWithChildrenP CXmlNodeWithChildrenPool::newNode(st return nodePtr; } -CXmlNodeWithChildren::TXmlNodeWithChildrenP CXmlNodeWithChildrenPool::newNode(const std::string &name, - double value, - CIEEE754::EPrecision precision) -{ +CXmlNodeWithChildren::TXmlNodeWithChildrenP +CXmlNodeWithChildrenPool::newNode(const std::string& name, double value, CIEEE754::EPrecision precision) { return this->newNode(name, CStringUtils::typeToStringPrecise(value, precision)); } -void CXmlNodeWithChildrenPool::recycle(CXmlNodeWithChildren::TXmlNodeWithChildrenP &nodePtr) -{ - if (nodePtr == 0) - { +void CXmlNodeWithChildrenPool::recycle(CXmlNodeWithChildren::TXmlNodeWithChildrenP& nodePtr) { + if (nodePtr == 0) { LOG_ERROR("Unexpected NULL pointer"); return; } - if (m_Recycled.size() < m_MaxRecycled) - { + if (m_Recycled.size() < m_MaxRecycled) { // We take advantage of friendship here to clear the node's attribute vector nodePtr->m_Attributes.clear(); - std::for_each(nodePtr->m_Children.rbegin(), - nodePtr->m_Children.rend(), - boost::bind(&CXmlNodeWithChildrenPool::recycle, - this, - _1)); + std::for_each(nodePtr->m_Children.rbegin(), nodePtr->m_Children.rend(), boost::bind(&CXmlNodeWithChildrenPool::recycle, this, _1)); nodePtr->m_Children.clear(); m_Recycled.push_back(nodePtr); @@ -90,8 +69,5 @@ void CXmlNodeWithChildrenPool::recycle(CXmlNodeWithChildren::TXmlNodeWithChildre // will not be preventing lots of memory being freed. nodePtr.reset(); } - - } } - diff --git a/lib/core/CXmlParser.cc b/lib/core/CXmlParser.cc index a2882dd23b..5bafb839d3 100644 --- a/lib/core/CXmlParser.cc +++ b/lib/core/CXmlParser.cc @@ -6,9 +6,9 @@ #include #include -#include #include #include +#include #include #include @@ -21,54 +21,40 @@ #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { const std::string CXmlParser::ATTRIBUTE_SEPARATOR("@"); const std::string CXmlParser::ATTRIBUTE_EQUALS("="); // 4 spaces to match the Ml coding standards -const size_t CXmlParser::DEFAULT_INDENT_SPACES(4); -const size_t CXmlParser::MAX_INDENT_SPACES(10); +const size_t CXmlParser::DEFAULT_INDENT_SPACES(4); +const size_t CXmlParser::MAX_INDENT_SPACES(10); // The number of spaces in this constant MUST match the maximum above -const char *CXmlParser::INDENT_SPACE_STR(" "); - +const char* CXmlParser::INDENT_SPACE_STR(" "); -CXmlParser::CXmlParser() - : m_Doc(0), - m_XPathContext(0), - m_NavigatedNode(0) -{ +CXmlParser::CXmlParser() : m_Doc(0), m_XPathContext(0), m_NavigatedNode(0) { // Note that xmlLoadExtDtdDefaultValue needs to be set before parsing, // but is a per-thread setting // xmlLoadExtDtdDefaultValue = 1; } -CXmlParser::~CXmlParser() -{ +CXmlParser::~CXmlParser() { this->destroy(); } -void CXmlParser::destroy() -{ - if (m_XPathContext != 0) - { +void CXmlParser::destroy() { + if (m_XPathContext != 0) { xmlXPathFreeContext(m_XPathContext); m_XPathContext = 0; } - if (m_Doc != 0) - { + if (m_Doc != 0) { xmlFreeDoc(m_Doc); m_Doc = 0; } m_NavigatedNode = 0; } -bool CXmlParser::parseFile(const std::string &fileName) -{ +bool CXmlParser::parseFile(const std::string& fileName) { this->destroy(); // Initialise globals - NOTE this current prints a line for EVERY call to @@ -79,22 +65,19 @@ bool CXmlParser::parseFile(const std::string &fileName) xmlLoadExtDtdDefaultValue = 1; m_Doc = xmlParseFile(fileName.c_str()); - if (m_Doc == 0) - { + if (m_Doc == 0) { LOG_ERROR("Unable to parse XML file " << fileName); return false; } // Resolve xincludes - if (xmlXIncludeProcess(m_Doc) < 0) - { + if (xmlXIncludeProcess(m_Doc) < 0) { LOG_ERROR("Unable to parse XML file " << fileName); return false; } m_XPathContext = xmlXPathNewContext(m_Doc); - if (m_XPathContext == 0) - { + if (m_XPathContext == 0) { this->destroy(); LOG_ERROR("Unable to parse XML file " << fileName); return false; @@ -106,13 +89,11 @@ bool CXmlParser::parseFile(const std::string &fileName) return true; } -bool CXmlParser::parseString(const std::string &xml) -{ +bool CXmlParser::parseString(const std::string& xml) { return this->parseBuffer(xml.c_str(), xml.length()); } -bool CXmlParser::parseBuffer(const char *begin, size_t length) -{ +bool CXmlParser::parseBuffer(const char* begin, size_t length) { this->destroy(); // Initialise globals - NOTE this current prints a line for EVERY call to @@ -123,27 +104,23 @@ bool CXmlParser::parseBuffer(const char *begin, size_t length) xmlLoadExtDtdDefaultValue = 1; m_Doc = xmlParseMemory(begin, static_cast(length)); - if (m_Doc == 0) - { + if (m_Doc == 0) { LOG_ERROR("Unable to parse XML of length " << length); // Only log the full XML string at the debug level, so that it doesn't // get sent to the socket logger - LOG_DEBUG("XML that cannot be parsed is " << - std::string(begin, length)); + LOG_DEBUG("XML that cannot be parsed is " << std::string(begin, length)); return false; } // Don't resolve xincludes for string parsing m_XPathContext = xmlXPathNewContext(m_Doc); - if (m_XPathContext == 0) - { + if (m_XPathContext == 0) { this->destroy(); LOG_ERROR("Unable to parse XML of length " << length); // Only log the full XML string at the debug level, so that it doesn't // get sent to the socket logger - LOG_DEBUG("XML that cannot be parsed is " << - std::string(begin, length)); + LOG_DEBUG("XML that cannot be parsed is " << std::string(begin, length)); return false; } @@ -153,30 +130,25 @@ bool CXmlParser::parseBuffer(const char *begin, size_t length) return true; } -bool CXmlParser::parseBufferInSitu(char *begin, size_t length) -{ +bool CXmlParser::parseBufferInSitu(char* begin, size_t length) { // With libxml2 there's no benefit to parsing in-situ return this->parseBuffer(begin, length); } -std::string CXmlParser::rootElementName() const -{ - if (m_Doc == 0) - { +std::string CXmlParser::rootElementName() const { + if (m_Doc == 0) { LOG_ERROR("Cannot get root element for unparsed document"); return std::string(); } - xmlNode *root(xmlDocGetRootElement(m_Doc)); - if (root == 0) - { + xmlNode* root(xmlDocGetRootElement(m_Doc)); + if (root == 0) { LOG_ERROR("Error getting root element"); return std::string(); } - const char *name(reinterpret_cast(root->name)); - if (name == 0) - { + const char* name(reinterpret_cast(root->name)); + if (name == 0) { LOG_ERROR("Error getting root element name"); return std::string(); } @@ -184,12 +156,9 @@ std::string CXmlParser::rootElementName() const return name; } -bool CXmlParser::evalXPathExpression(const std::string &xpathExpr, - std::string &ret) const -{ +bool CXmlParser::evalXPathExpression(const std::string& xpathExpr, std::string& ret) const { CXmlNode value; - if (this->evalXPathExpression(xpathExpr, value) == false) - { + if (this->evalXPathExpression(xpathExpr, value) == false) { LOG_ERROR("Unable to eval " << xpathExpr); return false; } @@ -199,18 +168,14 @@ bool CXmlParser::evalXPathExpression(const std::string &xpathExpr, return true; } -bool CXmlParser::evalXPathExpression(const std::string &xpathExpr, - CXmlNode &ret) const -{ +bool CXmlParser::evalXPathExpression(const std::string& xpathExpr, CXmlNode& ret) const { TXmlNodeVec vec; - if (this->evalXPathExpression(xpathExpr, vec) == false) - { + if (this->evalXPathExpression(xpathExpr, vec) == false) { return false; } - if (vec.size() != 1) - { + if (vec.size() != 1) { LOG_ERROR("Return for " << xpathExpr << " must be single value, not " << vec.size()); return false; } @@ -220,55 +185,44 @@ bool CXmlParser::evalXPathExpression(const std::string &xpathExpr, return true; } -bool CXmlParser::evalXPathExpression(const std::string &xpathExpr, - TStrVec &ret) const -{ +bool CXmlParser::evalXPathExpression(const std::string& xpathExpr, TStrVec& ret) const { ret.clear(); TXmlNodeVec vec; - if (this->evalXPathExpression(xpathExpr, vec) == false) - { + if (this->evalXPathExpression(xpathExpr, vec) == false) { return false; } - if (vec.empty()) - { + if (vec.empty()) { // This is ok return true; } ret.reserve(vec.size()); - for (TXmlNodeVecItr itr = vec.begin(); itr != vec.end(); ++itr) - { + for (TXmlNodeVecItr itr = vec.begin(); itr != vec.end(); ++itr) { ret.push_back(itr->value()); } return true; } -bool CXmlParser::evalXPathExpression(const std::string &xpathExpr, - TStrSet &ret) const -{ +bool CXmlParser::evalXPathExpression(const std::string& xpathExpr, TStrSet& ret) const { ret.clear(); TXmlNodeVec vec; - if (this->evalXPathExpression(xpathExpr, vec) == false) - { + if (this->evalXPathExpression(xpathExpr, vec) == false) { return false; } - if (vec.empty()) - { + if (vec.empty()) { // This is ok return true; } - for (TXmlNodeVecItr itr = vec.begin(); itr != vec.end(); ++itr) - { - if (ret.insert(itr->value()).second == false) - { + for (TXmlNodeVecItr itr = vec.begin(); itr != vec.end(); ++itr) { + if (ret.insert(itr->value()).second == false) { LOG_ERROR("Duplicate value " << itr->value()); return false; } @@ -277,25 +231,19 @@ bool CXmlParser::evalXPathExpression(const std::string &xpathExpr, return true; } -bool CXmlParser::evalXPathExpression(const std::string &xpathExpr, - TStrStrMap &ret) const -{ +bool CXmlParser::evalXPathExpression(const std::string& xpathExpr, TStrStrMap& ret) const { ret.clear(); TXmlNodeVec values; - if (this->evalXPathExpression(xpathExpr, values) == false) - { + if (this->evalXPathExpression(xpathExpr, values) == false) { LOG_ERROR("Unable to evaluate xpath expression " << xpathExpr); return false; } - for (TXmlNodeVecCItr itr = values.begin(); itr != values.end(); ++itr) - { - if (ret.insert(TStrStrMap::value_type(itr->name(), itr->value())).second == false) - { - LOG_ERROR("Inappropriate method call. Tags for " << xpathExpr << - " must be unique"); + for (TXmlNodeVecCItr itr = values.begin(); itr != values.end(); ++itr) { + if (ret.insert(TStrStrMap::value_type(itr->name(), itr->value())).second == false) { + LOG_ERROR("Inappropriate method call. Tags for " << xpathExpr << " must be unique"); return false; } } @@ -303,35 +251,28 @@ bool CXmlParser::evalXPathExpression(const std::string &xpathExpr, return true; } -bool CXmlParser::evalXPathExpression(const std::string &xpathExpr, - CXmlParser::TXmlNodeVec &ret) const -{ +bool CXmlParser::evalXPathExpression(const std::string& xpathExpr, CXmlParser::TXmlNodeVec& ret) const { ret.clear(); - if (m_Doc == 0 || m_XPathContext == 0) - { + if (m_Doc == 0 || m_XPathContext == 0) { LOG_ERROR("Attempt to evaluate Xpath expression before ::parseFile is called"); return false; } - xmlXPathObject *xpathObj(xmlXPathEvalExpression(reinterpret_cast(xpathExpr.c_str()), - m_XPathContext)); - if (xpathObj == 0) - { + xmlXPathObject* xpathObj(xmlXPathEvalExpression(reinterpret_cast(xpathExpr.c_str()), m_XPathContext)); + if (xpathObj == 0) { LOG_ERROR("Unable to evaluate xpath expression " << xpathExpr); return false; } - if (xpathObj->type != XPATH_NODESET) - { + if (xpathObj->type != XPATH_NODESET) { xmlXPathFreeObject(xpathObj); LOG_ERROR("Unable to evaluate xpath expression " << xpathExpr << " " << xpathObj->type); return false; } - xmlNodeSet *nodes = xpathObj->nodesetval; - if (nodes == 0) - { + xmlNodeSet* nodes = xpathObj->nodesetval; + if (nodes == 0) { xmlXPathFreeObject(xpathObj); // Returning 0 results is not an error at this stage return true; @@ -341,39 +282,32 @@ bool CXmlParser::evalXPathExpression(const std::string &xpathExpr, xmlXPathNodeSetSort(nodes); int numEntries(nodes->nodeNr); - for (int i = 0; i < numEntries; ++i) - { + for (int i = 0; i < numEntries; ++i) { xmlElementType type(nodes->nodeTab[i]->type); - if (type == XML_ELEMENT_NODE || type == XML_ATTRIBUTE_NODE) - { - const xmlChar *name(nodes->nodeTab[i]->name); - xmlChar *value(xmlNodeGetContent(nodes->nodeTab[i])); + if (type == XML_ELEMENT_NODE || type == XML_ATTRIBUTE_NODE) { + const xmlChar* name(nodes->nodeTab[i]->name); + xmlChar* value(xmlNodeGetContent(nodes->nodeTab[i])); - CXmlNode node(reinterpret_cast(name), - reinterpret_cast(value)); + CXmlNode node(reinterpret_cast(name), reinterpret_cast(value)); ret.push_back(node); xmlFree(value); - CXmlNode::TStrStrPrVec &attrs = ret.back().m_Attributes; + CXmlNode::TStrStrPrVec& attrs = ret.back().m_Attributes; - xmlAttr *prop(nodes->nodeTab[i]->properties); - while (prop != 0) - { - const xmlChar *propName(prop->name); - xmlChar *propValue(xmlGetProp(nodes->nodeTab[i], propName)); + xmlAttr* prop(nodes->nodeTab[i]->properties); + while (prop != 0) { + const xmlChar* propName(prop->name); + xmlChar* propValue(xmlGetProp(nodes->nodeTab[i], propName)); - attrs.push_back(CXmlNode::TStrStrPr(reinterpret_cast(propName), - reinterpret_cast(propValue))); + attrs.push_back(CXmlNode::TStrStrPr(reinterpret_cast(propName), reinterpret_cast(propValue))); xmlFree(propValue); prop = prop->next; } - } - else - { + } else { LOG_ERROR("Node type " << type << " not supported"); } } @@ -383,24 +317,22 @@ bool CXmlParser::evalXPathExpression(const std::string &xpathExpr, return true; } -std::string CXmlParser::dumpToString() const -{ +std::string CXmlParser::dumpToString() const { // The xmlTreeIndentString "global" is really a per-thread variable. // 4 spaces per indent to match Ml standard. xmlTreeIndentString = " "; std::string result; - if (m_Doc != 0) - { + if (m_Doc != 0) { // Dump the root node to a buffer and print it - xmlBuffer *buf(xmlBufferCreate()); - xmlNode *rootNode(xmlDocGetRootElement(m_Doc)); + xmlBuffer* buf(xmlBufferCreate()); + xmlNode* rootNode(xmlDocGetRootElement(m_Doc)); xmlNodeDump(buf, m_Doc, rootNode, 0, 0); // Set return - result = reinterpret_cast(buf->content); + result = reinterpret_cast(buf->content); // Free buffer xmlBufferFree(buf); @@ -409,52 +341,38 @@ std::string CXmlParser::dumpToString() const return result; } -void CXmlParser::dumpToStdout() const -{ +void CXmlParser::dumpToStdout() const { // The xmlTreeIndentString "global" is really a per-thread variable. // 4 spaces per indent to match Ml standard. xmlTreeIndentString = " "; - if (m_Doc != 0) - { + if (m_Doc != 0) { //! NB: This won't go to the standard log file, and will be completely //! discarded if the program is running as a Windows service! xmlDocDump(stdout, m_Doc); } } -void CXmlParser::convert(const CXmlNodeWithChildren &root, - std::string &result) -{ +void CXmlParser::convert(const CXmlNodeWithChildren& root, std::string& result) { CXmlParser::convert(DEFAULT_INDENT_SPACES, root, result); } -void CXmlParser::convert(size_t indentSpaces, - const CXmlNodeWithChildren &root, - std::string &result) -{ +void CXmlParser::convert(size_t indentSpaces, const CXmlNodeWithChildren& root, std::string& result) { // The xmlTreeIndentString "global" is really a per-thread variable. - xmlTreeIndentString = INDENT_SPACE_STR + - MAX_INDENT_SPACES - - std::min(indentSpaces, - MAX_INDENT_SPACES); + xmlTreeIndentString = INDENT_SPACE_STR + MAX_INDENT_SPACES - std::min(indentSpaces, MAX_INDENT_SPACES); // Create a temporary document - xmlDoc *doc(xmlNewDoc(reinterpret_cast("1.0"))); + xmlDoc* doc(xmlNewDoc(reinterpret_cast("1.0"))); // Root node - xmlNode *rootNode(xmlNewNode(0, - reinterpret_cast(root.name().c_str()))); + xmlNode* rootNode(xmlNewNode(0, reinterpret_cast(root.name().c_str()))); - const CXmlNode::TStrStrPrVec &attrs = root.attributes(); + const CXmlNode::TStrStrPrVec& attrs = root.attributes(); - for (CXmlNode::TStrStrPrVecCItr attrIter = attrs.begin(); - attrIter != attrs.end(); - ++attrIter) - { + for (CXmlNode::TStrStrPrVecCItr attrIter = attrs.begin(); attrIter != attrs.end(); ++attrIter) { xmlSetProp(rootNode, - reinterpret_cast(attrIter->first.c_str()), - reinterpret_cast(attrIter->second.c_str())); + reinterpret_cast(attrIter->first.c_str()), + reinterpret_cast(attrIter->second.c_str())); } // Create child nodes @@ -463,7 +381,7 @@ void CXmlParser::convert(size_t indentSpaces, xmlDocSetRootElement(doc, rootNode); // Dump the root node to a buffer - xmlBuffer *buf(xmlBufferCreate()); + xmlBuffer* buf(xmlBufferCreate()); xmlNodeDump(buf, doc, rootNode, 0, 1); // Free associated memory. @@ -471,55 +389,39 @@ void CXmlParser::convert(size_t indentSpaces, doc = 0; // Set return - result = reinterpret_cast(buf->content); + result = reinterpret_cast(buf->content); // Free buffer xmlBufferFree(buf); buf = 0; } -void CXmlParser::convertChildren(const CXmlNodeWithChildren ¤t, - xmlNode &xmlRep) -{ - const CXmlNodeWithChildren::TChildNodePVec &childVec = current.children(); - - for (CXmlNodeWithChildren::TChildNodePVecCItr childIter = childVec.begin(); - childIter != childVec.end(); - ++childIter) - { - const CXmlNodeWithChildren *child = childIter->get(); - if (child != 0) - { - xmlNode *childRep(0); - - if (child->value().empty() && - !child->children().empty()) - { +void CXmlParser::convertChildren(const CXmlNodeWithChildren& current, xmlNode& xmlRep) { + const CXmlNodeWithChildren::TChildNodePVec& childVec = current.children(); + + for (CXmlNodeWithChildren::TChildNodePVecCItr childIter = childVec.begin(); childIter != childVec.end(); ++childIter) { + const CXmlNodeWithChildren* child = childIter->get(); + if (child != 0) { + xmlNode* childRep(0); + + if (child->value().empty() && !child->children().empty()) { // It's crucial to specify the value as NULL rather than // an empty string, otherwise the formatting will be messed // up - childRep = xmlNewChild(&xmlRep, - 0, - reinterpret_cast(child->name().c_str()), - 0); - } - else - { + childRep = xmlNewChild(&xmlRep, 0, reinterpret_cast(child->name().c_str()), 0); + } else { childRep = xmlNewTextChild(&xmlRep, 0, - reinterpret_cast(child->name().c_str()), - reinterpret_cast(child->value().c_str())); + reinterpret_cast(child->name().c_str()), + reinterpret_cast(child->value().c_str())); } - const CXmlNode::TStrStrPrVec &attrs = child->attributes(); + const CXmlNode::TStrStrPrVec& attrs = child->attributes(); - for (CXmlNode::TStrStrPrVecCItr attrIter = attrs.begin(); - attrIter != attrs.end(); - ++attrIter) - { + for (CXmlNode::TStrStrPrVecCItr attrIter = attrs.begin(); attrIter != attrs.end(); ++attrIter) { xmlSetProp(childRep, - reinterpret_cast(attrIter->first.c_str()), - reinterpret_cast(attrIter->second.c_str())); + reinterpret_cast(attrIter->first.c_str()), + reinterpret_cast(attrIter->second.c_str())); } CXmlParser::convertChildren(*child, *childRep); @@ -527,73 +429,49 @@ void CXmlParser::convertChildren(const CXmlNodeWithChildren ¤t, } } -void CXmlParser::convert(const std::string &root, - const TStrStrMap &values, - std::string &result) -{ - CXmlParser::convert(DEFAULT_INDENT_SPACES, - root, - values, - result); +void CXmlParser::convert(const std::string& root, const TStrStrMap& values, std::string& result) { + CXmlParser::convert(DEFAULT_INDENT_SPACES, root, values, result); } -void CXmlParser::convert(size_t indentSpaces, - const std::string &root, - const TStrStrMap &values, - std::string &result) -{ +void CXmlParser::convert(size_t indentSpaces, const std::string& root, const TStrStrMap& values, std::string& result) { // The xmlTreeIndentString "global" is really a per-thread variable. - xmlTreeIndentString = INDENT_SPACE_STR + - MAX_INDENT_SPACES - - std::min(indentSpaces, - MAX_INDENT_SPACES); + xmlTreeIndentString = INDENT_SPACE_STR + MAX_INDENT_SPACES - std::min(indentSpaces, MAX_INDENT_SPACES); // Create a temporary document - xmlDoc *doc(xmlNewDoc(reinterpret_cast("1.0"))); + xmlDoc* doc(xmlNewDoc(reinterpret_cast("1.0"))); // Root node - xmlNode *rootNode(xmlNewNode(0, reinterpret_cast(root.c_str()))); + xmlNode* rootNode(xmlNewNode(0, reinterpret_cast(root.c_str()))); // Create child nodes - for (TStrStrMapCItr itr = values.begin(); itr != values.end(); ++itr) - { + for (TStrStrMapCItr itr = values.begin(); itr != values.end(); ++itr) { // Handle an optional attribute in the form tag@name=value std::string tag(itr->first); std::string attribute; size_t attrPos(tag.find(ATTRIBUTE_SEPARATOR)); - if (attrPos == 0) - { - LOG_ERROR("Attribute separator found at position zero in tag " << - tag); + if (attrPos == 0) { + LOG_ERROR("Attribute separator found at position zero in tag " << tag); continue; } - if (attrPos != std::string::npos) - { + if (attrPos != std::string::npos) { attribute.assign(tag, attrPos + 1, tag.length() - attrPos - 1); tag.erase(attrPos); } - xmlNode *childRep(xmlNewTextChild(rootNode, - 0, - reinterpret_cast(tag.c_str()), - reinterpret_cast(itr->second.c_str()))); + xmlNode* childRep(xmlNewTextChild( + rootNode, 0, reinterpret_cast(tag.c_str()), reinterpret_cast(itr->second.c_str()))); - if (!attribute.empty()) - { + if (!attribute.empty()) { size_t eqPos(attribute.find(ATTRIBUTE_EQUALS)); - if (eqPos == std::string::npos || eqPos == 0) - { - LOG_ERROR("Attribute format does not contain '" << ATTRIBUTE_EQUALS << - "' surrounded by name and value : " << attribute << - core_t::LINE_ENDING << "Map key : " << itr->first << - core_t::LINE_ENDING << "Map value : " << itr->second); - } - else - { + if (eqPos == std::string::npos || eqPos == 0) { + LOG_ERROR("Attribute format does not contain '" << ATTRIBUTE_EQUALS << "' surrounded by name and value : " << attribute + << core_t::LINE_ENDING << "Map key : " << itr->first << core_t::LINE_ENDING + << "Map value : " << itr->second); + } else { xmlSetProp(childRep, - reinterpret_cast(attribute.substr(0, eqPos).c_str()), - reinterpret_cast(attribute.substr(eqPos + 1).c_str())); + reinterpret_cast(attribute.substr(0, eqPos).c_str()), + reinterpret_cast(attribute.substr(eqPos + 1).c_str())); } } } @@ -601,7 +479,7 @@ void CXmlParser::convert(size_t indentSpaces, xmlDocSetRootElement(doc, rootNode); // Dump the root node to a buffer and print it - xmlBuffer *buf(xmlBufferCreate()); + xmlBuffer* buf(xmlBufferCreate()); xmlNodeDump(buf, doc, rootNode, 0, 0); // Free associated memory. @@ -609,42 +487,34 @@ void CXmlParser::convert(size_t indentSpaces, doc = 0; // Set return - result = reinterpret_cast(buf->content); + result = reinterpret_cast(buf->content); // Free buffer xmlBufferFree(buf); } -bool CXmlParser::convert(const std::string &root, - const TStrStrMap &values) -{ - if (m_Doc != 0) - { +bool CXmlParser::convert(const std::string& root, const TStrStrMap& values) { + if (m_Doc != 0) { LOG_ERROR("convert requires an empty document"); return false; } // Create a temporary document - m_Doc = xmlNewDoc(reinterpret_cast("1.0")); + m_Doc = xmlNewDoc(reinterpret_cast("1.0")); // Root node - xmlNode *rootNode(xmlNewNode(0, - reinterpret_cast(root.c_str()))); + xmlNode* rootNode(xmlNewNode(0, reinterpret_cast(root.c_str()))); // Create child nodes - for (TStrStrMapCItr itr = values.begin(); itr != values.end(); ++itr) - { - xmlNewTextChild(rootNode, - 0, - reinterpret_cast(itr->first.c_str()), - reinterpret_cast(itr->second.c_str())); + for (TStrStrMapCItr itr = values.begin(); itr != values.end(); ++itr) { + xmlNewTextChild( + rootNode, 0, reinterpret_cast(itr->first.c_str()), reinterpret_cast(itr->second.c_str())); } xmlDocSetRootElement(m_Doc, rootNode); m_XPathContext = xmlXPathNewContext(m_Doc); - if (m_XPathContext == 0) - { + if (m_XPathContext == 0) { this->destroy(); LOG_ERROR("Unable to convert to XML"); return false; @@ -656,8 +526,7 @@ bool CXmlParser::convert(const std::string &root, return true; } -bool CXmlParser::toNodeHierarchy(CXmlNodeWithChildren::TXmlNodeWithChildrenP &rootNodePtr) const -{ +bool CXmlParser::toNodeHierarchy(CXmlNodeWithChildren::TXmlNodeWithChildrenP& rootNodePtr) const { // Because both the pool and the nodes use shared pointers, it doesn't // matter if the pool that originally allocates the nodes is destroyed // before the nodes themselves. Hence we can get away with implementing @@ -667,26 +536,21 @@ bool CXmlParser::toNodeHierarchy(CXmlNodeWithChildren::TXmlNodeWithChildrenP &ro return this->toNodeHierarchy(pool, rootNodePtr); } -bool CXmlParser::toNodeHierarchy(CXmlNodeWithChildrenPool &pool, - CXmlNodeWithChildren::TXmlNodeWithChildrenP &rootNodePtr) const -{ +bool CXmlParser::toNodeHierarchy(CXmlNodeWithChildrenPool& pool, CXmlNodeWithChildren::TXmlNodeWithChildrenP& rootNodePtr) const { rootNodePtr.reset(); - if (m_Doc == 0) - { + if (m_Doc == 0) { LOG_ERROR("Attempt to convert to node hierarchy before ::parseFile is called"); return false; } - const xmlNode *root(xmlDocGetRootElement(const_cast(m_Doc))); - if (root == 0) - { + const xmlNode* root(xmlDocGetRootElement(const_cast(m_Doc))); + if (root == 0) { LOG_ERROR("Error getting root element"); return false; } - if (root->type != XML_ELEMENT_NODE) - { + if (root->type != XML_ELEMENT_NODE) { LOG_ERROR("Node type " << root->type << " not supported"); return false; } @@ -694,9 +558,7 @@ bool CXmlParser::toNodeHierarchy(CXmlNodeWithChildrenPool &pool, return this->toNodeHierarchy(*root, pool, 0, rootNodePtr); } -bool CXmlParser::toNodeHierarchy(CStringCache &cache, - CXmlNodeWithChildren::TXmlNodeWithChildrenP &rootNodePtr) const -{ +bool CXmlParser::toNodeHierarchy(CStringCache& cache, CXmlNodeWithChildren::TXmlNodeWithChildrenP& rootNodePtr) const { // Because both the pool and the nodes use shared pointers, it doesn't // matter if the pool that originally allocates the nodes is destroyed // before the nodes themselves. Hence we can get away with implementing @@ -706,59 +568,49 @@ bool CXmlParser::toNodeHierarchy(CStringCache &cache, return this->toNodeHierarchy(pool, cache, rootNodePtr); } -bool CXmlParser::toNodeHierarchy(CXmlNodeWithChildrenPool &pool, - CStringCache &cache, - CXmlNodeWithChildren::TXmlNodeWithChildrenP &rootNodePtr) const -{ +bool CXmlParser::toNodeHierarchy(CXmlNodeWithChildrenPool& pool, + CStringCache& cache, + CXmlNodeWithChildren::TXmlNodeWithChildrenP& rootNodePtr) const { rootNodePtr.reset(); - if (m_Doc == 0) - { + if (m_Doc == 0) { LOG_ERROR("Attempt to convert to node hierarchy before ::parseFile is called"); return false; } - const xmlNode *root(xmlDocGetRootElement(const_cast(m_Doc))); - if (root == 0) - { + const xmlNode* root(xmlDocGetRootElement(const_cast(m_Doc))); + if (root == 0) { LOG_ERROR("Error getting root element"); return false; } - if (root->type != XML_ELEMENT_NODE) - { + if (root->type != XML_ELEMENT_NODE) { LOG_ERROR("Node type " << root->type << " not supported"); return false; } // Only use the cache if the current platform employs copy-on-write strings. // If all strings are distinct then the cache is pointless. - CStringCache *cachePtr(cache.haveCopyOnWriteStrings() ? &cache : 0); + CStringCache* cachePtr(cache.haveCopyOnWriteStrings() ? &cache : 0); return this->toNodeHierarchy(*root, pool, cachePtr, rootNodePtr); } -bool CXmlParser::navigateRoot() -{ - if (m_Doc != 0) - { +bool CXmlParser::navigateRoot() { + if (m_Doc != 0) { m_NavigatedNode = xmlDocGetRootElement(m_Doc); } return m_NavigatedNode != 0; } -bool CXmlParser::navigateFirstChild() -{ - if (m_NavigatedNode == 0) - { +bool CXmlParser::navigateFirstChild() { + if (m_NavigatedNode == 0) { return false; } - xmlNode *childNode(m_NavigatedNode->children); - while (childNode != 0) - { - if (childNode->type == XML_ELEMENT_NODE) - { + xmlNode* childNode(m_NavigatedNode->children); + while (childNode != 0) { + if (childNode->type == XML_ELEMENT_NODE) { m_NavigatedNode = childNode; return true; } @@ -769,18 +621,14 @@ bool CXmlParser::navigateFirstChild() return false; } -bool CXmlParser::navigateNext() -{ - if (m_NavigatedNode == 0) - { +bool CXmlParser::navigateNext() { + if (m_NavigatedNode == 0) { return false; } - xmlNode *nextNode(m_NavigatedNode->next); - while (nextNode != 0) - { - if (nextNode->type == XML_ELEMENT_NODE) - { + xmlNode* nextNode(m_NavigatedNode->next); + while (nextNode != 0) { + if (nextNode->type == XML_ELEMENT_NODE) { m_NavigatedNode = nextNode; return true; } @@ -791,18 +639,14 @@ bool CXmlParser::navigateNext() return false; } -bool CXmlParser::navigateParent() -{ - if (m_NavigatedNode == 0) - { +bool CXmlParser::navigateParent() { + if (m_NavigatedNode == 0) { return false; } - xmlNode *parentNode(m_NavigatedNode->parent); - while (parentNode != 0) - { - if (parentNode->type == XML_ELEMENT_NODE) - { + xmlNode* parentNode(m_NavigatedNode->parent); + while (parentNode != 0) { + if (parentNode->type == XML_ELEMENT_NODE) { m_NavigatedNode = parentNode; return true; } @@ -813,22 +657,18 @@ bool CXmlParser::navigateParent() return false; } -bool CXmlParser::currentNodeName(std::string &name) -{ - if (m_NavigatedNode == 0) - { +bool CXmlParser::currentNodeName(std::string& name) { + if (m_NavigatedNode == 0) { return false; } - name = reinterpret_cast(m_NavigatedNode->name); + name = reinterpret_cast(m_NavigatedNode->name); return true; } -bool CXmlParser::currentNodeValue(std::string &value) -{ - if (m_NavigatedNode == 0) - { +bool CXmlParser::currentNodeValue(std::string& value) { + if (m_NavigatedNode == 0) { return false; } @@ -838,22 +678,15 @@ bool CXmlParser::currentNodeValue(std::string &value) // (If we used xmlNodeGetContent() we'd get the text of child nodes too, // which we don't want, as we'll be dealing with the text in the child // nodes recursively.) - const xmlNode *child(m_NavigatedNode->children); - while (child != 0) - { - if (child->type == XML_TEXT_NODE || - child->type == XML_CDATA_SECTION_NODE) - { - const xmlChar *textVal(child->content); - if (textVal != 0) - { - if (isValueSet) - { - value += reinterpret_cast(textVal); - } - else - { - value = reinterpret_cast(textVal); + const xmlNode* child(m_NavigatedNode->children); + while (child != 0) { + if (child->type == XML_TEXT_NODE || child->type == XML_CDATA_SECTION_NODE) { + const xmlChar* textVal(child->content); + if (textVal != 0) { + if (isValueSet) { + value += reinterpret_cast(textVal); + } else { + value = reinterpret_cast(textVal); isValueSet = true; } } @@ -862,34 +695,29 @@ bool CXmlParser::currentNodeValue(std::string &value) child = child->next; } - if (!isValueSet) - { + if (!isValueSet) { value.clear(); } return true; } -bool CXmlParser::setRootNode(const std::string &root) -{ - if (m_Doc != 0) - { +bool CXmlParser::setRootNode(const std::string& root) { + if (m_Doc != 0) { LOG_ERROR("setRootNode requires an empty document"); return false; } // Create a temporary document - m_Doc = xmlNewDoc(reinterpret_cast("1.0")); + m_Doc = xmlNewDoc(reinterpret_cast("1.0")); // Root node - xmlNode *rootNode(xmlNewNode(0, - reinterpret_cast(root.c_str()))); + xmlNode* rootNode(xmlNewNode(0, reinterpret_cast(root.c_str()))); xmlDocSetRootElement(m_Doc, rootNode); m_XPathContext = xmlXPathNewContext(m_Doc); - if (m_XPathContext == 0) - { + if (m_XPathContext == 0) { this->destroy(); LOG_ERROR("Unable to set root node"); return false; @@ -901,28 +729,20 @@ bool CXmlParser::setRootNode(const std::string &root) return true; } -bool CXmlParser::addNewChildNode(const std::string &name, - const std::string &value) -{ - if (m_Doc == 0) - { +bool CXmlParser::addNewChildNode(const std::string& name, const std::string& value) { + if (m_Doc == 0) { LOG_ERROR("Cannot add to uninitialised document"); return false; } - xmlNode *root(xmlDocGetRootElement(m_Doc)); - if (root == 0) - { + xmlNode* root(xmlDocGetRootElement(m_Doc)); + if (root == 0) { LOG_ERROR("Error getting root element"); return false; } // Note the namespace is NULL here - if (xmlNewTextChild(root, - 0, - reinterpret_cast(name.c_str()), - reinterpret_cast(value.c_str())) == 0) - { + if (xmlNewTextChild(root, 0, reinterpret_cast(name.c_str()), reinterpret_cast(value.c_str())) == 0) { LOG_ERROR("Unable to add new child to " << root); return false; } @@ -933,41 +753,29 @@ bool CXmlParser::addNewChildNode(const std::string &name, return true; } -bool CXmlParser::addNewChildNode(const std::string &name, - const std::string &value, - const TStrStrMap &attrs) -{ - if (m_Doc == 0) - { +bool CXmlParser::addNewChildNode(const std::string& name, const std::string& value, const TStrStrMap& attrs) { + if (m_Doc == 0) { LOG_ERROR("Cannot add to uninitialised document"); return false; } - xmlNode *root(xmlDocGetRootElement(m_Doc)); - if (root == 0) - { + xmlNode* root(xmlDocGetRootElement(m_Doc)); + if (root == 0) { LOG_ERROR("Error getting root element"); return false; } // Note the namespace is NULL here - xmlNode *child(xmlNewTextChild(root, - 0, - reinterpret_cast(name.c_str()), - reinterpret_cast(value.c_str()))); - if (child == 0) - { + xmlNode* child( + xmlNewTextChild(root, 0, reinterpret_cast(name.c_str()), reinterpret_cast(value.c_str()))); + if (child == 0) { LOG_ERROR("Unable to add new child to " << root); return false; } - for (TStrStrMapCItr attrIter = attrs.begin(); - attrIter != attrs.end(); - ++attrIter) - { - xmlSetProp(child, - reinterpret_cast(attrIter->first.c_str()), - reinterpret_cast(attrIter->second.c_str())); + for (TStrStrMapCItr attrIter = attrs.begin(); attrIter != attrs.end(); ++attrIter) { + xmlSetProp( + child, reinterpret_cast(attrIter->first.c_str()), reinterpret_cast(attrIter->second.c_str())); } // This makes XPath operations on large documents much faster @@ -976,33 +784,25 @@ bool CXmlParser::addNewChildNode(const std::string &name, return true; } -bool CXmlParser::changeChildNodeValue(const std::string &name, - const std::string &newValue) -{ - if (m_Doc == 0) - { +bool CXmlParser::changeChildNodeValue(const std::string& name, const std::string& newValue) { + if (m_Doc == 0) { LOG_ERROR("Cannot add to uninitialised document"); return false; } - xmlNode *root(xmlDocGetRootElement(m_Doc)); - if (root == 0) - { + xmlNode* root(xmlDocGetRootElement(m_Doc)); + if (root == 0) { LOG_ERROR("Error getting root element"); return false; } - xmlNode *child(root->children); - while (child != 0) - { - if (child->type == XML_ELEMENT_NODE && - name == reinterpret_cast(child->name)) - { + xmlNode* child(root->children); + while (child != 0) { + if (child->type == XML_ELEMENT_NODE && name == reinterpret_cast(child->name)) { // Unlike xmlNewTextChild, xmlNodeSetContent doesn't escape special // characters, so we have to call xmlEncodeSpecialChars ourselves to // do this - xmlChar *encoded(xmlEncodeSpecialChars(m_Doc, - reinterpret_cast(newValue.c_str()))); + xmlChar* encoded(xmlEncodeSpecialChars(m_Doc, reinterpret_cast(newValue.c_str()))); xmlNodeSetContent(child, encoded); xmlFree(encoded); @@ -1019,8 +819,7 @@ bool CXmlParser::changeChildNodeValue(const std::string &name, // TODO this whole function should really be replaced with a proper character // set conversion library -bool CXmlParser::stringLatin1ToUtf8(std::string &str) -{ +bool CXmlParser::stringLatin1ToUtf8(std::string& str) { // The UTF-8 character corresponding to each Latin1 character will require // either 1 or 2 bytes of storage (but note that some UTF-8 characters can // require 3 bytes) @@ -1033,17 +832,12 @@ bool CXmlParser::stringLatin1ToUtf8(std::string &str) int outLen(static_cast(bufferSize)); // This function is provided by libxml2 - int ret = ::isolat1ToUTF8(reinterpret_cast(&buffer[0]), - &outLen, - reinterpret_cast(str.c_str()), - &inLen); - if (ret == -1 || inLen < static_cast(str.length())) - { - LOG_ERROR("Failure converting Latin1 string to UTF-8" << - core_t::LINE_ENDING << "Return code: " << ret << - core_t::LINE_ENDING << "Remaining length: " << inLen << - core_t::LINE_ENDING << "Original string: " << str << - core_t::LINE_ENDING << "Result so far: " << &buffer[0]); + int ret = + ::isolat1ToUTF8(reinterpret_cast(&buffer[0]), &outLen, reinterpret_cast(str.c_str()), &inLen); + if (ret == -1 || inLen < static_cast(str.length())) { + LOG_ERROR("Failure converting Latin1 string to UTF-8" + << core_t::LINE_ENDING << "Return code: " << ret << core_t::LINE_ENDING << "Remaining length: " << inLen + << core_t::LINE_ENDING << "Original string: " << str << core_t::LINE_ENDING << "Result so far: " << &buffer[0]); return false; } @@ -1053,25 +847,21 @@ bool CXmlParser::stringLatin1ToUtf8(std::string &str) return true; } -bool CXmlParser::toNodeHierarchy(const xmlNode &parentNode, - CXmlNodeWithChildrenPool &pool, - CStringCache *cache, - CXmlNodeWithChildren::TXmlNodeWithChildrenP &nodePtr) const -{ +bool CXmlParser::toNodeHierarchy(const xmlNode& parentNode, + CXmlNodeWithChildrenPool& pool, + CStringCache* cache, + CXmlNodeWithChildren::TXmlNodeWithChildrenP& nodePtr) const { // Create the parent node nodePtr = pool.newNode(); // Here we take advantage of friendship to directly modify the CXmlNode's // name and value. - if (cache != 0) - { + if (cache != 0) { // Get the name from the cache if there is one, as we expect relatively // few distinct names repeated many times - nodePtr->m_Name = cache->stringFor(reinterpret_cast(parentNode.name)); - } - else - { - nodePtr->m_Name = reinterpret_cast(parentNode.name); + nodePtr->m_Name = cache->stringFor(reinterpret_cast(parentNode.name)); + } else { + nodePtr->m_Name = reinterpret_cast(parentNode.name); } // Nodes from the pool may contain old values @@ -1081,22 +871,15 @@ bool CXmlParser::toNodeHierarchy(const xmlNode &parentNode, // (If we used xmlNodeGetContent() we'd get the text of child nodes too, // which we don't want, as we'll be dealing with the text in the child // nodes recursively.) - const xmlNode *child(parentNode.children); - while (child != 0) - { - if (child->type == XML_TEXT_NODE || - child->type == XML_CDATA_SECTION_NODE) - { - const xmlChar *textVal(child->content); - if (textVal != 0) - { - if (isValueSet) - { - nodePtr->m_Value += reinterpret_cast(textVal); - } - else - { - nodePtr->m_Value = reinterpret_cast(textVal); + const xmlNode* child(parentNode.children); + while (child != 0) { + if (child->type == XML_TEXT_NODE || child->type == XML_CDATA_SECTION_NODE) { + const xmlChar* textVal(child->content); + if (textVal != 0) { + if (isValueSet) { + nodePtr->m_Value += reinterpret_cast(textVal); + } else { + nodePtr->m_Value = reinterpret_cast(textVal); isValueSet = true; } } @@ -1105,43 +888,34 @@ bool CXmlParser::toNodeHierarchy(const xmlNode &parentNode, child = child->next; } - if (!isValueSet) - { + if (!isValueSet) { nodePtr->m_Value.clear(); } // Take advantage of friendship to add attributes directly to the parent // node - const xmlAttr *prop(parentNode.properties); - while (prop != 0) - { + const xmlAttr* prop(parentNode.properties); + while (prop != 0) { // Only cover the likely case. // (If we ever need to cover unlikely cases then use: // xmlChar *propValue(xmlGetProp(const_cast(&parentNode), propName)); // followed by: // xmlFree(propValue); // but obviously this involves a temporary memory allocation.) - const xmlNode *propChildren(prop->children); - if (propChildren != 0 && - propChildren->next == 0 && - propChildren->type == XML_TEXT_NODE) - { - const char *propName(reinterpret_cast(prop->name)); - const char *propValue(reinterpret_cast(propChildren->content)); + const xmlNode* propChildren(prop->children); + if (propChildren != 0 && propChildren->next == 0 && propChildren->type == XML_TEXT_NODE) { + const char* propName(reinterpret_cast(prop->name)); + const char* propValue(reinterpret_cast(propChildren->content)); // Here we take advantage of friendship to directly modify the // CXmlNode's attributes map, thus avoiding the need to build a // separate map and then copy it - if (cache != 0) - { + if (cache != 0) { // Get attribute names and values from the cache if there is // one, as we expect relatively few distinct attributes repeated // many times - nodePtr->m_Attributes.push_back(CXmlNode::TStrStrPr(cache->stringFor(propName), - cache->stringFor(propValue))); - } - else - { + nodePtr->m_Attributes.push_back(CXmlNode::TStrStrPr(cache->stringFor(propName), cache->stringFor(propValue))); + } else { nodePtr->m_Attributes.push_back(CXmlNode::TStrStrPr(propName, propValue)); } } @@ -1150,15 +924,12 @@ bool CXmlParser::toNodeHierarchy(const xmlNode &parentNode, } // Recursively add the children to the parent - const xmlNode *childNode(parentNode.children); - while (childNode != 0) - { - if (childNode->type == XML_ELEMENT_NODE) - { + const xmlNode* childNode(parentNode.children); + while (childNode != 0) { + if (childNode->type == XML_ELEMENT_NODE) { CXmlNodeWithChildren::TXmlNodeWithChildrenP childPtr; - if (this->toNodeHierarchy(*childNode, pool, cache, childPtr) == false) - { + if (this->toNodeHierarchy(*childNode, pool, cache, childPtr) == false) { return false; } @@ -1174,10 +945,9 @@ bool CXmlParser::toNodeHierarchy(const xmlNode &parentNode, // 'Ml' error handler // Note, this is called on every error // TODO print a consolidated error message -void CXmlParser::errorHandler(void * /* ctxt */, const char *msg, ...) -{ +void CXmlParser::errorHandler(void* /* ctxt */, const char* msg, ...) { static const size_t ERRBUF_SIZE(1024); - char errbuf[ERRBUF_SIZE] = { '\0' }; + char errbuf[ERRBUF_SIZE] = {'\0'}; va_list args; va_start(args, msg); @@ -1186,8 +956,5 @@ void CXmlParser::errorHandler(void * /* ctxt */, const char *msg, ...) LOG_ERROR("XML error: " << errbuf); } - - } } - diff --git a/lib/core/CXmlParserIntf.cc b/lib/core/CXmlParserIntf.cc index a7640a303e..9d49c55c71 100644 --- a/lib/core/CXmlParserIntf.cc +++ b/lib/core/CXmlParserIntf.cc @@ -9,44 +9,30 @@ #include - -namespace ml -{ -namespace core -{ - +namespace ml { +namespace core { const std::string CXmlParserIntf::XML_HEADER(""); - -CXmlParserIntf::CXmlParserIntf() -{ +CXmlParserIntf::CXmlParserIntf() { } -CXmlParserIntf::~CXmlParserIntf() -{ +CXmlParserIntf::~CXmlParserIntf() { } -std::string CXmlParserIntf::makeValidName(const std::string &str) -{ +std::string CXmlParserIntf::makeValidName(const std::string& str) { std::string result(str); - if (!result.empty()) - { + if (!result.empty()) { // First character can't be a number - if (!::isalpha(static_cast(result[0]))) - { + if (!::isalpha(static_cast(result[0]))) { result[0] = '_'; } // Other characters can be numbers, but change all other punctuation to // underscores - for (std::string::iterator iter = result.begin() + 1; - iter != result.end(); - ++iter) - { - if (!::isalnum(static_cast(*iter))) - { + for (std::string::iterator iter = result.begin() + 1; iter != result.end(); ++iter) { + if (!::isalnum(static_cast(*iter))) { *iter = '_'; } } @@ -55,8 +41,7 @@ std::string CXmlParserIntf::makeValidName(const std::string &str) return result; } -std::string CXmlParserIntf::toOneLine(const std::string &xml) -{ +std::string CXmlParserIntf::toOneLine(const std::string& xml) { std::string oneLine(xml); CStringUtils::replace(XML_HEADER, "", oneLine); @@ -66,8 +51,5 @@ std::string CXmlParserIntf::toOneLine(const std::string &xml) return oneLine; } - - } } - diff --git a/lib/core/unittest/CAllocationStrategyTest.cc b/lib/core/unittest/CAllocationStrategyTest.cc index a3a3112541..3761a2459a 100644 --- a/lib/core/unittest/CAllocationStrategyTest.cc +++ b/lib/core/unittest/CAllocationStrategyTest.cc @@ -13,24 +13,21 @@ using namespace ml; -namespace -{ +namespace { double TOLERANCE = 1.0; -// This is 10% plus a small allowance for rounding error +// This is 10% plus a small allowance for rounding error double RATIO = 1.1 + 0.05; } template -void assertSize(const T &t) -{ +void assertSize(const T& t) { std::size_t s = t.size(); std::size_t c = t.capacity(); LOG_DEBUG("Size " << s << ", capacity " << c); CPPUNIT_ASSERT(double(c) <= std::max(double(s) * RATIO, double(s) + TOLERANCE)); } -void CAllocationStrategyTest::test() -{ +void CAllocationStrategyTest::test() { using TIntVec = std::vector; { @@ -60,30 +57,23 @@ void CAllocationStrategyTest::test() core::CAllocationStrategy::resize(v, 128); assertSize(v); - } { TIntVec v; core::CAllocationStrategy::push_back(v, 55); assertSize(v); - for (std::size_t i = 0; i < 10000; i++) - { + for (std::size_t i = 0; i < 10000; i++) { core::CAllocationStrategy::push_back(v, int(55 + i)); assertSize(v); } } - } +CppUnit::Test* CAllocationStrategyTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CAllocationStrategyTest"); - -CppUnit::Test *CAllocationStrategyTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CAllocationStrategyTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CAllocationStrategyTest::test", - &CAllocationStrategyTest::test) ); + suiteOfTests->addTest( + new CppUnit::TestCaller("CAllocationStrategyTest::test", &CAllocationStrategyTest::test)); return suiteOfTests; } diff --git a/lib/core/unittest/CAllocationStrategyTest.h b/lib/core/unittest/CAllocationStrategyTest.h index 0bd50d8159..5d468bfa1a 100644 --- a/lib/core/unittest/CAllocationStrategyTest.h +++ b/lib/core/unittest/CAllocationStrategyTest.h @@ -8,13 +8,11 @@ #include -class CAllocationStrategyTest : public CppUnit::TestFixture -{ - public: - void test(); +class CAllocationStrategyTest : public CppUnit::TestFixture { +public: + void test(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CAllocationStrategyTest_h - diff --git a/lib/core/unittest/CBase64FilterTest.cc b/lib/core/unittest/CBase64FilterTest.cc index 41dcb3c723..19b350e9d3 100644 --- a/lib/core/unittest/CBase64FilterTest.cc +++ b/lib/core/unittest/CBase64FilterTest.cc @@ -9,9 +9,9 @@ #include +#include #include #include -#include using TRandom = boost::mt19937; using TDistribution = boost::uniform_int<>; @@ -21,87 +21,66 @@ using TGeneratorItr = boost::generator_iterator; using namespace ml; using namespace core; -namespace -{ +namespace { using TFilteredOutput = boost::iostreams::filtering_stream; using TFilteredInput = boost::iostreams::filtering_stream; // Implements the boost::iostreams Source template interface -class CMockSource -{ - public: - using char_type = char; +class CMockSource { +public: + using char_type = char; - struct category : - public boost::iostreams::source_tag - {}; + struct category : public boost::iostreams::source_tag {}; - public: - CMockSource(const std::string &s) : m_Data(s), m_Read(0) - { } +public: + CMockSource(const std::string& s) : m_Data(s), m_Read(0) {} - std::streamsize read(char* s, std::streamsize n) - { - if (m_Read >= std::streamsize(m_Data.size())) - { - return -1; - } - std::streamsize toCopy = std::min(std::streamsize(m_Data.size() - m_Read), n); - LOG_TRACE("Read " << toCopy << ": " << std::string(m_Data.c_str() + m_Read, toCopy)); - memcpy(s, m_Data.c_str() + m_Read, toCopy); - m_Read += toCopy; - return toCopy; + std::streamsize read(char* s, std::streamsize n) { + if (m_Read >= std::streamsize(m_Data.size())) { + return -1; } + std::streamsize toCopy = std::min(std::streamsize(m_Data.size() - m_Read), n); + LOG_TRACE("Read " << toCopy << ": " << std::string(m_Data.c_str() + m_Read, toCopy)); + memcpy(s, m_Data.c_str() + m_Read, toCopy); + m_Read += toCopy; + return toCopy; + } - void close() - { - } + void close() {} - private: - std::string m_Data; - std::streamsize m_Read; +private: + std::string m_Data; + std::streamsize m_Read; }; - // Implements the boost::iostreams Sink template interface -class CMockSink -{ - public: - using char_type = char; +class CMockSink { +public: + using char_type = char; - struct category : - public boost::iostreams::sink_tag, - public boost::iostreams::closable_tag - {}; + struct category : public boost::iostreams::sink_tag, public boost::iostreams::closable_tag {}; - public: - CMockSink() - {} +public: + CMockSink() {} - std::streamsize write(const char* s, std::streamsize n) - { - m_Data.append(s, n); - return n; - } + std::streamsize write(const char* s, std::streamsize n) { + m_Data.append(s, n); + return n; + } - void close() - {} + void close() {} - const std::string getData() const - { - return m_Data; - } + const std::string getData() const { return m_Data; } - private: - void writeInternal(const char *s, std::streamsize &written, std::streamsize &n); +private: + void writeInternal(const char* s, std::streamsize& written, std::streamsize& n); - private: - std::string m_Data; +private: + std::string m_Data; }; -void testEncodeDecode(const std::string &input) -{ +void testEncodeDecode(const std::string& input) { CMockSink sink; { TFilteredOutput filter; @@ -120,12 +99,9 @@ void testEncodeDecode(const std::string &input) CPPUNIT_ASSERT_EQUAL(input, s); } } - } - -void CBase64FilterTest::testEncode() -{ +void CBase64FilterTest::testEncode() { { // Test encode ability, with known test data @@ -154,30 +130,26 @@ void CBase64FilterTest::testEncode() TFilteredOutput filter; filter.push(CBase64Encoder()); filter.push(boost::ref(sink)); - for (std::size_t i = 0; i < 50000; i++) - { + for (std::size_t i = 0; i < 50000; i++) { filter << "OneTwoThreeFourFiveSixSevenEightNineTen"; } } std::ostringstream result; - for (std::size_t i = 0; i < 50000; i++) - { + for (std::size_t i = 0; i < 50000; i++) { result << "T25lVHdvVGhyZWVGb3VyRml2ZVNpeFNldmVuRWlnaHROaW5lVGVu"; } CPPUNIT_ASSERT_EQUAL(result.str(), sink.getData()); } } - -void CBase64FilterTest::testDecode() -{ +void CBase64FilterTest::testDecode() { { // Test decoding std::string encoded = "TWFuIGlzIGRpc3Rpbmd1aXNoZWQsIG5vdCBvbmx5IGJ5IGhpcyByZWFzb24sIGJ1dCBieSB0aGlz" - "IHNpbmd1bGFyIHBhc3Npb24gZnJvbSBvdGhlciBhbmltYWxzLCB3aGljaCBpcyBhIGx1c3Qgb2Yg" - "dGhlIG1pbmQsIHRoYXQgYnkgYSBwZXJzZXZlcmFuY2Ugb2YgZGVsaWdodCBpbiB0aGUgY29udGlu" - "dWVkIGFuZCBpbmRlZmF0aWdhYmxlIGdlbmVyYXRpb24gb2Yga25vd2xlZGdlLCBleGNlZWRzIHRo" - "ZSBzaG9ydCB2ZWhlbWVuY2Ugb2YgYW55IGNhcm5hbCBwbGVhc3VyZS4="; + "IHNpbmd1bGFyIHBhc3Npb24gZnJvbSBvdGhlciBhbmltYWxzLCB3aGljaCBpcyBhIGx1c3Qgb2Yg" + "dGhlIG1pbmQsIHRoYXQgYnkgYSBwZXJzZXZlcmFuY2Ugb2YgZGVsaWdodCBpbiB0aGUgY29udGlu" + "dWVkIGFuZCBpbmRlZmF0aWdhYmxlIGdlbmVyYXRpb24gb2Yga25vd2xlZGdlLCBleGNlZWRzIHRo" + "ZSBzaG9ydCB2ZWhlbWVuY2Ugb2YgYW55IGNhcm5hbCBwbGVhc3VyZS4="; std::string expected = "Man is distinguished, not only by his reason, but by this singular passion from " "other animals, which is a lust of the mind, that by a perseverance of delight " "in the continued and indefatigable generation of knowledge, exceeds the short " @@ -204,9 +176,7 @@ void CBase64FilterTest::testDecode() } } - -void CBase64FilterTest::testBoth() -{ +void CBase64FilterTest::testBoth() { { ::testEncodeDecode("a"); ::testEncodeDecode("aa"); @@ -224,30 +194,19 @@ void CBase64FilterTest::testBoth() TGeneratorItr randItr(&generator); std::ostringstream ss; - for (std::size_t i = 0; i < 5000000; i++) - { + for (std::size_t i = 0; i < 5000000; i++) { ss << char(*randItr++); } ::testEncodeDecode(ss.str()); } } +CppUnit::Test* CBase64FilterTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CBase64FilterTest"); -CppUnit::Test* CBase64FilterTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CBase64FilterTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CBase64FilterTest::testDecode", - &CBase64FilterTest::testDecode) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CBase64FilterTest::testEncode", - &CBase64FilterTest::testEncode) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CBase64FilterTest::testBoth", - &CBase64FilterTest::testBoth) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CBase64FilterTest::testDecode", &CBase64FilterTest::testDecode)); + suiteOfTests->addTest(new CppUnit::TestCaller("CBase64FilterTest::testEncode", &CBase64FilterTest::testEncode)); + suiteOfTests->addTest(new CppUnit::TestCaller("CBase64FilterTest::testBoth", &CBase64FilterTest::testBoth)); return suiteOfTests; } - - diff --git a/lib/core/unittest/CBase64FilterTest.h b/lib/core/unittest/CBase64FilterTest.h index 629e383302..5dd4a85986 100644 --- a/lib/core/unittest/CBase64FilterTest.h +++ b/lib/core/unittest/CBase64FilterTest.h @@ -8,15 +8,13 @@ #include +class CBase64FilterTest : public CppUnit::TestFixture { +public: + void testDecode(); + void testEncode(); + void testBoth(); -class CBase64FilterTest : public CppUnit::TestFixture -{ - public: - void testDecode(); - void testEncode(); - void testBoth(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CBase64FilterTest_h diff --git a/lib/core/unittest/CBlockingMessageQueueTest.cc b/lib/core/unittest/CBlockingMessageQueueTest.cc index 16493a0f38..134f86a130 100644 --- a/lib/core/unittest/CBlockingMessageQueueTest.cc +++ b/lib/core/unittest/CBlockingMessageQueueTest.cc @@ -5,51 +5,40 @@ */ #include "CBlockingMessageQueueTest.h" -#include #include +#include #include +CppUnit::Test* CBlockingMessageQueueTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CBlockingMessageQueueTest"); -CppUnit::Test *CBlockingMessageQueueTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CBlockingMessageQueueTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CBlockingMessageQueueTest::testSendReceive", - &CBlockingMessageQueueTest::testSendReceive) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CBlockingMessageQueueTest::testSendReceive", + &CBlockingMessageQueueTest::testSendReceive)); return suiteOfTests; } -namespace -{ - class CReceiver - { - public: - void processMsg(const std::string &str, size_t /* backlog */) - { - m_Strings.push_back(str); - if ((m_Strings.size() % 1000) == 0) - { - LOG_DEBUG("Received " << m_Strings.size() << " strings"); - } - } - - size_t size() const - { - return m_Strings.size(); - } - - private: - using TStrVec = std::vector; - - TStrVec m_Strings; - }; +namespace { +class CReceiver { +public: + void processMsg(const std::string& str, size_t /* backlog */) { + m_Strings.push_back(str); + if ((m_Strings.size() % 1000) == 0) { + LOG_DEBUG("Received " << m_Strings.size() << " strings"); + } + } + + size_t size() const { return m_Strings.size(); } + +private: + using TStrVec = std::vector; + + TStrVec m_Strings; +}; } -void CBlockingMessageQueueTest::testSendReceive() -{ +void CBlockingMessageQueueTest::testSendReceive() { CReceiver receiver; static const size_t QUEUE_SIZE(100); @@ -64,8 +53,7 @@ void CBlockingMessageQueueTest::testSendReceive() LOG_DEBUG("Sending " << TEST_SIZE << " strings"); - for (size_t i = 0; i < TEST_SIZE; ++i) - { + for (size_t i = 0; i < TEST_SIZE; ++i) { queue.dispatchMsg("Test string"); } @@ -75,4 +63,3 @@ void CBlockingMessageQueueTest::testSendReceive() CPPUNIT_ASSERT_EQUAL(TEST_SIZE, receiver.size()); } - diff --git a/lib/core/unittest/CBlockingMessageQueueTest.h b/lib/core/unittest/CBlockingMessageQueueTest.h index fe56be3cf0..269ec59c66 100644 --- a/lib/core/unittest/CBlockingMessageQueueTest.h +++ b/lib/core/unittest/CBlockingMessageQueueTest.h @@ -8,14 +8,11 @@ #include +class CBlockingMessageQueueTest : public CppUnit::TestFixture { +public: + void testSendReceive(); -class CBlockingMessageQueueTest : public CppUnit::TestFixture -{ - public: - void testSendReceive(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CBlockingMessageQueueTest_h - diff --git a/lib/core/unittest/CByteSwapperTest.cc b/lib/core/unittest/CByteSwapperTest.cc index d3343a09fe..ba6063bed9 100644 --- a/lib/core/unittest/CByteSwapperTest.cc +++ b/lib/core/unittest/CByteSwapperTest.cc @@ -10,20 +10,15 @@ #include +CppUnit::Test* CByteSwapperTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CByteSwapperTest"); -CppUnit::Test *CByteSwapperTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CByteSwapperTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CByteSwapperTest::testByteSwaps", - &CByteSwapperTest::testByteSwaps) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CByteSwapperTest::testByteSwaps", &CByteSwapperTest::testByteSwaps)); return suiteOfTests; } -void CByteSwapperTest::testByteSwaps() -{ +void CByteSwapperTest::testByteSwaps() { uint8_t type1(0x12); CPPUNIT_ASSERT(ml::core::CByteSwapper::swapBytes(type1) == 0x12); @@ -50,4 +45,3 @@ void CByteSwapperTest::testByteSwaps() int64_t type8(0x0FEDCBA987654321LL); CPPUNIT_ASSERT(ml::core::CByteSwapper::swapBytes(type8) == 0x21436587A9CBED0FLL); } - diff --git a/lib/core/unittest/CByteSwapperTest.h b/lib/core/unittest/CByteSwapperTest.h index 40c20ace9e..62d50908e2 100644 --- a/lib/core/unittest/CByteSwapperTest.h +++ b/lib/core/unittest/CByteSwapperTest.h @@ -8,13 +8,11 @@ #include -class CByteSwapperTest : public CppUnit::TestFixture -{ - public: - void testByteSwaps(); +class CByteSwapperTest : public CppUnit::TestFixture { +public: + void testByteSwaps(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CByteSwapperTest_h - diff --git a/lib/core/unittest/CCompressUtilsTest.cc b/lib/core/unittest/CCompressUtilsTest.cc index 0ecf47bed4..a4c19deaf3 100644 --- a/lib/core/unittest/CCompressUtilsTest.cc +++ b/lib/core/unittest/CCompressUtilsTest.cc @@ -10,28 +10,20 @@ #include - -CppUnit::Test *CCompressUtilsTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CCompressUtilsTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCompressUtilsTest::testEmptyAdd", - &CCompressUtilsTest::testEmptyAdd) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCompressUtilsTest::testOneAdd", - &CCompressUtilsTest::testOneAdd) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCompressUtilsTest::testManyAdds", - &CCompressUtilsTest::testManyAdds) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCompressUtilsTest::testLengthOnly", - &CCompressUtilsTest::testLengthOnly) ); +CppUnit::Test* CCompressUtilsTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CCompressUtilsTest"); + + suiteOfTests->addTest( + new CppUnit::TestCaller("CCompressUtilsTest::testEmptyAdd", &CCompressUtilsTest::testEmptyAdd)); + suiteOfTests->addTest(new CppUnit::TestCaller("CCompressUtilsTest::testOneAdd", &CCompressUtilsTest::testOneAdd)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CCompressUtilsTest::testManyAdds", &CCompressUtilsTest::testManyAdds)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CCompressUtilsTest::testLengthOnly", &CCompressUtilsTest::testLengthOnly)); return suiteOfTests; } -void CCompressUtilsTest::testEmptyAdd() -{ +void CCompressUtilsTest::testEmptyAdd() { ml::core::CCompressUtils compressor(false); std::string str; @@ -50,8 +42,7 @@ void CCompressUtilsTest::testEmptyAdd() CPPUNIT_ASSERT_EQUAL(length, output.size()); } -void CCompressUtilsTest::testOneAdd() -{ +void CCompressUtilsTest::testOneAdd() { ml::core::CCompressUtils compressor(false); std::string str("1234567890"); @@ -70,8 +61,7 @@ void CCompressUtilsTest::testOneAdd() CPPUNIT_ASSERT_EQUAL(length, output.size()); } -void CCompressUtilsTest::testManyAdds() -{ +void CCompressUtilsTest::testManyAdds() { ml::core::CCompressUtils compressorMulti(false); std::string str1("1234567890"); @@ -88,8 +78,7 @@ void CCompressUtilsTest::testManyAdds() CPPUNIT_ASSERT(compressorMulti.compressedData(true, outputMulti)); CPPUNIT_ASSERT(compressorMulti.compressedLength(true, lengthMulti)); - LOG_INFO("Length of " << str1 << str2 << str3 << - " compressed is " << lengthMulti); + LOG_INFO("Length of " << str1 << str2 << str3 << " compressed is " << lengthMulti); CPPUNIT_ASSERT(lengthMulti > 0); CPPUNIT_ASSERT_EQUAL(lengthMulti, outputMulti.size()); @@ -109,8 +98,7 @@ void CCompressUtilsTest::testManyAdds() CPPUNIT_ASSERT(outputMulti == outputSingle); } -void CCompressUtilsTest::testLengthOnly() -{ +void CCompressUtilsTest::testLengthOnly() { ml::core::CCompressUtils compressorFull(false); std::string str("qwertyuiopa1234sdfghjklzxcvbnm"); @@ -125,8 +113,7 @@ void CCompressUtilsTest::testLengthOnly() CPPUNIT_ASSERT(compressorFull.compressedData(true, outputFull)); CPPUNIT_ASSERT(compressorFull.compressedLength(true, lengthFull)); - LOG_INFO("Length of " << str << str << str << - " compressed is " << lengthFull); + LOG_INFO("Length of " << str << str << str << " compressed is " << lengthFull); CPPUNIT_ASSERT(lengthFull > 0); CPPUNIT_ASSERT_EQUAL(lengthFull, outputFull.size()); @@ -147,4 +134,3 @@ void CCompressUtilsTest::testLengthOnly() CPPUNIT_ASSERT_EQUAL(lengthFull, lengthLengthOnly); CPPUNIT_ASSERT_EQUAL(size_t(0), outputLengthOnly.size()); } - diff --git a/lib/core/unittest/CCompressUtilsTest.h b/lib/core/unittest/CCompressUtilsTest.h index 72ac61b6fa..85263ef7f3 100644 --- a/lib/core/unittest/CCompressUtilsTest.h +++ b/lib/core/unittest/CCompressUtilsTest.h @@ -8,17 +8,14 @@ #include +class CCompressUtilsTest : public CppUnit::TestFixture { +public: + void testEmptyAdd(); + void testOneAdd(); + void testManyAdds(); + void testLengthOnly(); -class CCompressUtilsTest : public CppUnit::TestFixture -{ - public: - void testEmptyAdd(); - void testOneAdd(); - void testManyAdds(); - void testLengthOnly(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CCompressUtilsTest_h - diff --git a/lib/core/unittest/CCompressedDictionaryTest.cc b/lib/core/unittest/CCompressedDictionaryTest.cc index 39ecfa4c23..0b61ead540 100644 --- a/lib/core/unittest/CCompressedDictionaryTest.cc +++ b/lib/core/unittest/CCompressedDictionaryTest.cc @@ -18,8 +18,7 @@ using namespace ml; using namespace core; using namespace test; -void CCompressedDictionaryTest::testAll() -{ +void CCompressedDictionaryTest::testAll() { using TStrVec = std::vector; using TDictionary = CCompressedDictionary<2>; using TWordUSet = TDictionary::TWordUSet; @@ -37,8 +36,7 @@ void CCompressedDictionaryTest::testAll() std::string word2("word2"); std::string word3("word3"); - for (std::size_t i = 0u; i < numberTests; ++i) - { + for (std::size_t i = 0u; i < numberTests; ++i) { LOG_DEBUG("Collision test = " << i); rng.generateWords(wordLength, numberWords, words); @@ -46,8 +44,7 @@ void CCompressedDictionaryTest::testAll() TDictionary dictionary; TWordUSet uniqueWords; - for (std::size_t j = 0u; j < words.size(); ++j) - { + for (std::size_t j = 0u; j < words.size(); ++j) { CPPUNIT_ASSERT(uniqueWords.insert(dictionary.word(words[j])).second); CPPUNIT_ASSERT(uniqueWords.insert(dictionary.word(words[j], word2)).second); CPPUNIT_ASSERT(uniqueWords.insert(dictionary.word(words[j], word2, word3)).second); @@ -55,8 +52,7 @@ void CCompressedDictionaryTest::testAll() } } -void CCompressedDictionaryTest::testPersist() -{ +void CCompressedDictionaryTest::testPersist() { using TDictionary1 = CCompressedDictionary<1>; using TDictionary2 = CCompressedDictionary<2>; using TDictionary3 = CCompressedDictionary<3>; @@ -100,15 +96,12 @@ void CCompressedDictionaryTest::testPersist() } } -CppUnit::Test *CCompressedDictionaryTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CCompressedDictionaryTest"); +CppUnit::Test* CCompressedDictionaryTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CCompressedDictionaryTest"); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCompressedDictionaryTest::testAll", - &CCompressedDictionaryTest::testAll) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCompressedDictionaryTest::testPersist", - &CCompressedDictionaryTest::testPersist) ); + suiteOfTests->addTest( + new CppUnit::TestCaller("CCompressedDictionaryTest::testAll", &CCompressedDictionaryTest::testAll)); + suiteOfTests->addTest(new CppUnit::TestCaller("CCompressedDictionaryTest::testPersist", + &CCompressedDictionaryTest::testPersist)); return suiteOfTests; } diff --git a/lib/core/unittest/CCompressedDictionaryTest.h b/lib/core/unittest/CCompressedDictionaryTest.h index 24fe2d0609..6dd3bfc7cb 100644 --- a/lib/core/unittest/CCompressedDictionaryTest.h +++ b/lib/core/unittest/CCompressedDictionaryTest.h @@ -9,14 +9,12 @@ #include +class CCompressedDictionaryTest : public CppUnit::TestFixture { +public: + void testAll(); + void testPersist(); -class CCompressedDictionaryTest : public CppUnit::TestFixture -{ - public: - void testAll(); - void testPersist(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CCompressedDictionaryTest_h diff --git a/lib/core/unittest/CConcurrentWrapperTest.cc b/lib/core/unittest/CConcurrentWrapperTest.cc index 1106d3a07e..32a88a2dcc 100644 --- a/lib/core/unittest/CConcurrentWrapperTest.cc +++ b/lib/core/unittest/CConcurrentWrapperTest.cc @@ -6,8 +6,8 @@ #include "CConcurrentWrapperTest.h" -#include #include +#include #include #include @@ -28,61 +28,57 @@ using TOStringStreamConcurrentWrapper = CConcurrentWrapper; // a low capacity wrapper with only 5 buckets for the queue, the 3 controls the wakeup of threads using TOStringStreamLowCapacityConcurrentWrapper = CConcurrentWrapper; -void CConcurrentWrapperTest::testBasic() -{ +void CConcurrentWrapperTest::testBasic() { std::ostringstream stringStream; { TOStringStreamConcurrentWrapper wrappedStringStream(stringStream); - wrappedStringStream( [](std::ostream& o){ o << "Hello 1"; o << " world 1\n"; } ); - wrappedStringStream( [](std::ostream& o){ o << "Hello 2"; o << " world 2\n"; } ); + wrappedStringStream([](std::ostream& o) { + o << "Hello 1"; + o << " world 1\n"; + }); + wrappedStringStream([](std::ostream& o) { + o << "Hello 2"; + o << " world 2\n"; + }); } CPPUNIT_ASSERT_EQUAL(std::string("Hello 1 world 1\nHello 2 world 2\n"), stringStream.str()); } -namespace -{ - -void task(CConcurrentWrapper &sink, int i, std::chrono::microseconds pause) -{ - sink ( [i, pause](std::ostream& o) - { - o << "ta"; - std::this_thread::sleep_for(pause); - o << "sk "; - o << std::setw (5); - o << i; - o << "\n"; - } ); +namespace { + +void task(CConcurrentWrapper& sink, int i, std::chrono::microseconds pause) { + sink([i, pause](std::ostream& o) { + o << "ta"; + std::this_thread::sleep_for(pause); + o << "sk "; + o << std::setw(5); + o << i; + o << "\n"; + }); } -void taskLowCapacityQueue(TOStringStreamLowCapacityConcurrentWrapper &sink, int i, - std::chrono::microseconds pause) -{ - sink ( [i, pause](std::ostream& o) - { - o << "ta"; - std::this_thread::sleep_for(pause); - o << "sk "; - o << std::setw (5); - o << i; - o << "\n"; - } ); +void taskLowCapacityQueue(TOStringStreamLowCapacityConcurrentWrapper& sink, int i, std::chrono::microseconds pause) { + sink([i, pause](std::ostream& o) { + o << "ta"; + std::this_thread::sleep_for(pause); + o << "sk "; + o << std::setw(5); + o << i; + o << "\n"; + }); } - } -void CConcurrentWrapperTest::testThreads() -{ +void CConcurrentWrapperTest::testThreads() { std::ostringstream stringStream; static const size_t MESSAGES(1500); { TOStringStreamConcurrentWrapper wrappedStringStream(stringStream); boost::threadpool::pool tp(10); - for (size_t i = 0; i < MESSAGES; ++i) - { + for (size_t i = 0; i < MESSAGES; ++i) { tp.schedule(boost::bind(task, boost::ref(wrappedStringStream), i, std::chrono::microseconds(0))); } @@ -95,14 +91,12 @@ void CConcurrentWrapperTest::testThreads() CPPUNIT_ASSERT_EQUAL(MESSAGES, numberOfLines); CPPUNIT_ASSERT_EQUAL(11 * MESSAGES, output.size()); - for (size_t i = 0; i < MESSAGES; ++i) - { + for (size_t i = 0; i < MESSAGES; ++i) { CPPUNIT_ASSERT_EQUAL(std::string("task"), output.substr(11 * i, 4)); } } -void CConcurrentWrapperTest::testThreadsSlow() -{ +void CConcurrentWrapperTest::testThreadsSlow() { std::ostringstream stringStream; static const size_t MESSAGES(50); @@ -110,8 +104,7 @@ void CConcurrentWrapperTest::testThreadsSlow() TOStringStreamConcurrentWrapper wrappedStringStream(stringStream); boost::threadpool::pool tp(2); - for (size_t i = 0; i < MESSAGES; ++i) - { + for (size_t i = 0; i < MESSAGES; ++i) { tp.schedule(boost::bind(task, boost::ref(wrappedStringStream), i, std::chrono::microseconds(50))); } @@ -124,14 +117,12 @@ void CConcurrentWrapperTest::testThreadsSlow() CPPUNIT_ASSERT_EQUAL(MESSAGES, numberOfLines); CPPUNIT_ASSERT_EQUAL(11 * MESSAGES, output.size()); - for (size_t i = 0; i < MESSAGES; ++i) - { + for (size_t i = 0; i < MESSAGES; ++i) { CPPUNIT_ASSERT_EQUAL(std::string("task"), output.substr(11 * i, 4)); } } -void CConcurrentWrapperTest::testThreadsSlowLowCapacity() -{ +void CConcurrentWrapperTest::testThreadsSlowLowCapacity() { std::ostringstream stringStream; static const size_t MESSAGES(50); @@ -140,10 +131,8 @@ void CConcurrentWrapperTest::testThreadsSlowLowCapacity() boost::threadpool::pool tp(2); - for (size_t i = 0; i < MESSAGES; ++i) - { - tp.schedule(boost::bind(taskLowCapacityQueue, boost::ref(wrappedStringStream), i, - std::chrono::microseconds(50))); + for (size_t i = 0; i < MESSAGES; ++i) { + tp.schedule(boost::bind(taskLowCapacityQueue, boost::ref(wrappedStringStream), i, std::chrono::microseconds(50))); } tp.wait(); @@ -155,14 +144,12 @@ void CConcurrentWrapperTest::testThreadsSlowLowCapacity() CPPUNIT_ASSERT_EQUAL(MESSAGES, numberOfLines); CPPUNIT_ASSERT_EQUAL(11 * MESSAGES, output.size()); - for (size_t i = 0; i < MESSAGES; ++i) - { + for (size_t i = 0; i < MESSAGES; ++i) { CPPUNIT_ASSERT_EQUAL(std::string("task"), output.substr(11 * i, 4)); } } -void CConcurrentWrapperTest::testThreadsLowCapacity() -{ +void CConcurrentWrapperTest::testThreadsLowCapacity() { std::ostringstream stringStream; static const size_t MESSAGES(2500); @@ -171,10 +158,8 @@ void CConcurrentWrapperTest::testThreadsLowCapacity() boost::threadpool::pool tp(8); - for (size_t i = 0; i < MESSAGES; ++i) - { - tp.schedule(boost::bind(taskLowCapacityQueue, boost::ref(wrappedStringStream), i, - std::chrono::microseconds(0))); + for (size_t i = 0; i < MESSAGES; ++i) { + tp.schedule(boost::bind(taskLowCapacityQueue, boost::ref(wrappedStringStream), i, std::chrono::microseconds(0))); } tp.wait(); @@ -186,14 +171,12 @@ void CConcurrentWrapperTest::testThreadsLowCapacity() CPPUNIT_ASSERT_EQUAL(MESSAGES, numberOfLines); CPPUNIT_ASSERT_EQUAL(11 * MESSAGES, output.size()); - for (size_t i = 0; i < MESSAGES; ++i) - { + for (size_t i = 0; i < MESSAGES; ++i) { CPPUNIT_ASSERT_EQUAL(std::string("task"), output.substr(11 * i, 4)); } } -void CConcurrentWrapperTest::testMemoryDebug() -{ +void CConcurrentWrapperTest::testMemoryDebug() { CMemoryUsage mem; std::ostringstream stringStream; @@ -203,28 +186,21 @@ void CConcurrentWrapperTest::testMemoryDebug() CPPUNIT_ASSERT_EQUAL(wrappedStringStream.memoryUsage(), mem.usage()); } -CppUnit::Test *CConcurrentWrapperTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CConcurrentWrapperTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CConcurrentWrapperTest::testBasic", - &CConcurrentWrapperTest::testBasic) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CConcurrentWrapperTest::testThreads", - &CConcurrentWrapperTest::testThreads) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CConcurrentWrapperTest::testThreadsSlow", - &CConcurrentWrapperTest::testThreadsSlow) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CConcurrentWrapperTest::testThreadsSlowLowCapacity", - &CConcurrentWrapperTest::testThreadsSlowLowCapacity) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CConcurrentWrapperTest::testThreadsLowCapacity", - &CConcurrentWrapperTest::testThreadsLowCapacity) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CConcurrentWrapperTest::testMemoryDebug", - &CConcurrentWrapperTest::testMemoryDebug) ); +CppUnit::Test* CConcurrentWrapperTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CConcurrentWrapperTest"); + + suiteOfTests->addTest( + new CppUnit::TestCaller("CConcurrentWrapperTest::testBasic", &CConcurrentWrapperTest::testBasic)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CConcurrentWrapperTest::testThreads", &CConcurrentWrapperTest::testThreads)); + suiteOfTests->addTest(new CppUnit::TestCaller("CConcurrentWrapperTest::testThreadsSlow", + &CConcurrentWrapperTest::testThreadsSlow)); + suiteOfTests->addTest(new CppUnit::TestCaller("CConcurrentWrapperTest::testThreadsSlowLowCapacity", + &CConcurrentWrapperTest::testThreadsSlowLowCapacity)); + suiteOfTests->addTest(new CppUnit::TestCaller("CConcurrentWrapperTest::testThreadsLowCapacity", + &CConcurrentWrapperTest::testThreadsLowCapacity)); + suiteOfTests->addTest(new CppUnit::TestCaller("CConcurrentWrapperTest::testMemoryDebug", + &CConcurrentWrapperTest::testMemoryDebug)); return suiteOfTests; } diff --git a/lib/core/unittest/CConcurrentWrapperTest.h b/lib/core/unittest/CConcurrentWrapperTest.h index d6cd53df2a..5b94245b72 100644 --- a/lib/core/unittest/CConcurrentWrapperTest.h +++ b/lib/core/unittest/CConcurrentWrapperTest.h @@ -9,18 +9,16 @@ #include -class CConcurrentWrapperTest : public CppUnit::TestFixture -{ - public: - void testBasic(); - void testThreads(); - void testThreadsSlow(); - void testThreadsSlowLowCapacity(); - void testThreadsLowCapacity(); - void testMemoryDebug(); +class CConcurrentWrapperTest : public CppUnit::TestFixture { +public: + void testBasic(); + void testThreads(); + void testThreadsSlow(); + void testThreadsSlowLowCapacity(); + void testThreadsLowCapacity(); + void testMemoryDebug(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; - #endif /* INCLUDED_CConcurrentWrapperTest_h */ diff --git a/lib/core/unittest/CContainerPrinterTest.cc b/lib/core/unittest/CContainerPrinterTest.cc index d3b0926756..aa6bb7ee5b 100644 --- a/lib/core/unittest/CContainerPrinterTest.cc +++ b/lib/core/unittest/CContainerPrinterTest.cc @@ -6,8 +6,8 @@ #include "CContainerPrinterTest.h" -#include #include +#include #include #include @@ -15,38 +15,34 @@ #include #include -#include #include +#include using namespace ml; using namespace core; -void CContainerPrinterTest::testAll() -{ +void CContainerPrinterTest::testAll() { std::vector vec; LOG_DEBUG("vec = " << CContainerPrinter::print(vec)); CPPUNIT_ASSERT(CContainerPrinter::print(vec) == "[]"); vec.push_back(1.1); vec.push_back(3.2); LOG_DEBUG("vec = " << CContainerPrinter::print(vec)); - CPPUNIT_ASSERT_EQUAL(std::string("[1.1, 3.2]"), - CContainerPrinter::print(vec)); + CPPUNIT_ASSERT_EQUAL(std::string("[1.1, 3.2]"), CContainerPrinter::print(vec)); std::list> list; list.push_back(std::make_pair(1, 2)); list.push_back(std::make_pair(2, 2)); list.push_back(std::make_pair(3, 2)); LOG_DEBUG("list = " << CContainerPrinter::print(list)); - CPPUNIT_ASSERT_EQUAL(std::string("[(1, 2), (2, 2), (3, 2)]"), - CContainerPrinter::print(list)); + CPPUNIT_ASSERT_EQUAL(std::string("[(1, 2), (2, 2), (3, 2)]"), CContainerPrinter::print(list)); std::list> plist; plist.push_back(boost::shared_ptr()); plist.push_back(boost::shared_ptr(new double(3.0))); plist.push_back(boost::shared_ptr(new double(1.1))); LOG_DEBUG("plist = " << CContainerPrinter::print(plist)); - CPPUNIT_ASSERT_EQUAL(std::string("[\"null\", 3, 1.1]"), - CContainerPrinter::print(plist)); + CPPUNIT_ASSERT_EQUAL(std::string("[\"null\", 3, 1.1]"), CContainerPrinter::print(plist)); double three = 3.0; double fivePointOne = 5.1; @@ -55,23 +51,15 @@ void CContainerPrinterTest::testAll() map.insert(std::make_pair(3.3, &fivePointOne)); map.insert(std::make_pair(1.0, static_cast(0))); LOG_DEBUG("map = " << CContainerPrinter::print(map)); - CPPUNIT_ASSERT_EQUAL(std::string("[(1, \"null\"), (1.1, 3), (3.3, 5.1)]"), - CContainerPrinter::print(map)); + CPPUNIT_ASSERT_EQUAL(std::string("[(1, \"null\"), (1.1, 3), (3.3, 5.1)]"), CContainerPrinter::print(map)); - std::auto_ptr pints[] = - { - std::auto_ptr(new int(2)), - std::auto_ptr(new int(3)), - std::auto_ptr(new int(2)) - }; + std::auto_ptr pints[] = {std::auto_ptr(new int(2)), std::auto_ptr(new int(3)), std::auto_ptr(new int(2))}; LOG_DEBUG("pints = " << CContainerPrinter::print(boost::begin(pints), boost::end(pints))); - CPPUNIT_ASSERT_EQUAL(std::string("[2, 3, 2]"), - CContainerPrinter::print(boost::begin(pints), boost::end(pints))); + CPPUNIT_ASSERT_EQUAL(std::string("[2, 3, 2]"), CContainerPrinter::print(boost::begin(pints), boost::end(pints))); std::vector> ovec(2, boost::optional()); LOG_DEBUG("ovec = " << CContainerPrinter::print(ovec)); - CPPUNIT_ASSERT_EQUAL(std::string("[\"null\", \"null\"]"), - CContainerPrinter::print(ovec)); + CPPUNIT_ASSERT_EQUAL(std::string("[\"null\", \"null\"]"), CContainerPrinter::print(ovec)); std::vector>, double>> aggregate; aggregate.push_back(std::make_pair(list, 1.3)); @@ -82,13 +70,11 @@ void CContainerPrinterTest::testAll() CContainerPrinter::print(aggregate)); } -CppUnit::Test *CContainerPrinterTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CContainerPrinterTest"); +CppUnit::Test* CContainerPrinterTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CContainerPrinterTest"); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CContainerPrinterTest::testAll", - &CContainerPrinterTest::testAll) ); + suiteOfTests->addTest( + new CppUnit::TestCaller("CContainerPrinterTest::testAll", &CContainerPrinterTest::testAll)); return suiteOfTests; } diff --git a/lib/core/unittest/CContainerPrinterTest.h b/lib/core/unittest/CContainerPrinterTest.h index b29d783613..6c05ec53bc 100644 --- a/lib/core/unittest/CContainerPrinterTest.h +++ b/lib/core/unittest/CContainerPrinterTest.h @@ -9,12 +9,11 @@ #include -class CContainerPrinterTest : public CppUnit::TestFixture -{ - public: - void testAll(); +class CContainerPrinterTest : public CppUnit::TestFixture { +public: + void testAll(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CContainerPrinterTest_h diff --git a/lib/core/unittest/CContainerThroughputTest.cc b/lib/core/unittest/CContainerThroughputTest.cc index dbd3efbb72..36db77dc11 100644 --- a/lib/core/unittest/CContainerThroughputTest.cc +++ b/lib/core/unittest/CContainerThroughputTest.cc @@ -16,259 +16,203 @@ #include #include - const size_t CContainerThroughputTest::FILL_SIZE(2); const size_t CContainerThroughputTest::TEST_SIZE(10000000); - -CppUnit::Test *CContainerThroughputTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CContainerThroughputTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CContainerThroughputTest::testVector", - &CContainerThroughputTest::testVector) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CContainerThroughputTest::testList", - &CContainerThroughputTest::testList) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CContainerThroughputTest::testDeque", - &CContainerThroughputTest::testDeque) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CContainerThroughputTest::testMap", - &CContainerThroughputTest::testMap) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CContainerThroughputTest::testCircBuf", - &CContainerThroughputTest::testCircBuf) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CContainerThroughputTest::testMultiIndex", - &CContainerThroughputTest::testMultiIndex) ); +CppUnit::Test* CContainerThroughputTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CContainerThroughputTest"); + + suiteOfTests->addTest( + new CppUnit::TestCaller("CContainerThroughputTest::testVector", &CContainerThroughputTest::testVector)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CContainerThroughputTest::testList", &CContainerThroughputTest::testList)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CContainerThroughputTest::testDeque", &CContainerThroughputTest::testDeque)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CContainerThroughputTest::testMap", &CContainerThroughputTest::testMap)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CContainerThroughputTest::testCircBuf", &CContainerThroughputTest::testCircBuf)); + suiteOfTests->addTest(new CppUnit::TestCaller("CContainerThroughputTest::testMultiIndex", + &CContainerThroughputTest::testMultiIndex)); return suiteOfTests; } -void CContainerThroughputTest::setUp() -{ +void CContainerThroughputTest::setUp() { CPPUNIT_ASSERT(FILL_SIZE > 0); CPPUNIT_ASSERT(TEST_SIZE > FILL_SIZE); } -void CContainerThroughputTest::testVector() -{ +void CContainerThroughputTest::testVector() { using TContentVec = std::vector; TContentVec testVec; testVec.reserve(FILL_SIZE); ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO("Starting vector throughput test at " << - ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO("Starting vector throughput test at " << ml::core::CTimeUtils::toTimeString(start)); size_t count(0); - while (count < FILL_SIZE) - { + while (count < FILL_SIZE) { ++count; testVec.push_back(SContent(count)); } - while (count < TEST_SIZE) - { + while (count < TEST_SIZE) { testVec.erase(testVec.begin()); ++count; testVec.push_back(SContent(count)); } ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO("Finished vector throughput test at " << - ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO("Finished vector throughput test at " << ml::core::CTimeUtils::toTimeString(end)); CPPUNIT_ASSERT_EQUAL(FILL_SIZE, testVec.size()); - LOG_INFO("Vector throughput test with fill size " << FILL_SIZE << - " and test size " << TEST_SIZE << " took " << (end - start) << - " seconds"); + LOG_INFO("Vector throughput test with fill size " << FILL_SIZE << " and test size " << TEST_SIZE << " took " << (end - start) + << " seconds"); } -void CContainerThroughputTest::testList() -{ +void CContainerThroughputTest::testList() { using TContentList = std::list; TContentList testList; ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO("Starting list throughput test at " << - ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO("Starting list throughput test at " << ml::core::CTimeUtils::toTimeString(start)); size_t count(0); - while (count < FILL_SIZE) - { + while (count < FILL_SIZE) { ++count; testList.push_back(SContent(count)); } - while (count < TEST_SIZE) - { + while (count < TEST_SIZE) { testList.pop_front(); ++count; testList.push_back(SContent(count)); } ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO("Finished list throughput test at " << - ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO("Finished list throughput test at " << ml::core::CTimeUtils::toTimeString(end)); CPPUNIT_ASSERT_EQUAL(FILL_SIZE, testList.size()); - LOG_INFO("List throughput test with fill size " << FILL_SIZE << - " and test size " << TEST_SIZE << " took " << (end - start) << - " seconds"); + LOG_INFO("List throughput test with fill size " << FILL_SIZE << " and test size " << TEST_SIZE << " took " << (end - start) + << " seconds"); } -void CContainerThroughputTest::testDeque() -{ +void CContainerThroughputTest::testDeque() { using TContentDeque = std::deque; TContentDeque testDeque; ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO("Starting deque throughput test at " << - ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO("Starting deque throughput test at " << ml::core::CTimeUtils::toTimeString(start)); size_t count(0); - while (count < FILL_SIZE) - { + while (count < FILL_SIZE) { ++count; testDeque.push_back(SContent(count)); } - while (count < TEST_SIZE) - { + while (count < TEST_SIZE) { testDeque.pop_front(); ++count; testDeque.push_back(SContent(count)); } ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO("Finished deque throughput test at " << - ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO("Finished deque throughput test at " << ml::core::CTimeUtils::toTimeString(end)); CPPUNIT_ASSERT_EQUAL(FILL_SIZE, testDeque.size()); - LOG_INFO("Deque throughput test with fill size " << FILL_SIZE << - " and test size " << TEST_SIZE << " took " << (end - start) << - " seconds"); + LOG_INFO("Deque throughput test with fill size " << FILL_SIZE << " and test size " << TEST_SIZE << " took " << (end - start) + << " seconds"); } -void CContainerThroughputTest::testMap() -{ +void CContainerThroughputTest::testMap() { using TSizeContentMap = std::map; TSizeContentMap testMap; ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO("Starting map throughput test at " << - ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO("Starting map throughput test at " << ml::core::CTimeUtils::toTimeString(start)); size_t count(0); - while (count < FILL_SIZE) - { + while (count < FILL_SIZE) { ++count; testMap.insert(TSizeContentMap::value_type(count, SContent(count))); } - while (count < TEST_SIZE) - { + while (count < TEST_SIZE) { testMap.erase(testMap.begin()); ++count; testMap.insert(TSizeContentMap::value_type(count, SContent(count))); } ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO("Finished map throughput test at " << - ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO("Finished map throughput test at " << ml::core::CTimeUtils::toTimeString(end)); CPPUNIT_ASSERT_EQUAL(FILL_SIZE, testMap.size()); - LOG_INFO("Map throughput test with fill size " << FILL_SIZE << - " and test size " << TEST_SIZE << " took " << (end - start) << - " seconds"); + LOG_INFO("Map throughput test with fill size " << FILL_SIZE << " and test size " << TEST_SIZE << " took " << (end - start) + << " seconds"); } -void CContainerThroughputTest::testCircBuf() -{ +void CContainerThroughputTest::testCircBuf() { using TContentCircBuf = boost::circular_buffer; TContentCircBuf testCircBuf(FILL_SIZE); ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO("Starting circular buffer throughput test at " << - ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO("Starting circular buffer throughput test at " << ml::core::CTimeUtils::toTimeString(start)); size_t count(0); - while (count < FILL_SIZE) - { + while (count < FILL_SIZE) { ++count; testCircBuf.push_back(SContent(count)); } - while (count < TEST_SIZE) - { + while (count < TEST_SIZE) { testCircBuf.pop_front(); ++count; testCircBuf.push_back(SContent(count)); } ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO("Finished circular buffer throughput test at " << - ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO("Finished circular buffer throughput test at " << ml::core::CTimeUtils::toTimeString(end)); CPPUNIT_ASSERT_EQUAL(FILL_SIZE, testCircBuf.size()); - LOG_INFO("Circular buffer throughput test with fill size " << FILL_SIZE << - " and test size " << TEST_SIZE << " took " << (end - start) << - " seconds"); + LOG_INFO("Circular buffer throughput test with fill size " << FILL_SIZE << " and test size " << TEST_SIZE << " took " << (end - start) + << " seconds"); } -void CContainerThroughputTest::testMultiIndex() -{ +void CContainerThroughputTest::testMultiIndex() { using TContentMIndex = boost::multi_index::multi_index_container< SContent, - boost::multi_index::indexed_by< - boost::multi_index::hashed_unique< - BOOST_MULTI_INDEX_MEMBER(SContent, size_t, s_Size) - > - > - >; + boost::multi_index::indexed_by>>; TContentMIndex testMultiIndex; ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO("Starting multi-index throughput test at " << - ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO("Starting multi-index throughput test at " << ml::core::CTimeUtils::toTimeString(start)); size_t count(0); - while (count < FILL_SIZE) - { + while (count < FILL_SIZE) { ++count; testMultiIndex.insert(SContent(count)); } - while (count < TEST_SIZE) - { + while (count < TEST_SIZE) { testMultiIndex.erase(testMultiIndex.begin()); ++count; testMultiIndex.insert(SContent(count)); } ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO("Finished multi-index throughput test at " << - ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO("Finished multi-index throughput test at " << ml::core::CTimeUtils::toTimeString(end)); CPPUNIT_ASSERT_EQUAL(FILL_SIZE, testMultiIndex.size()); - LOG_INFO("Multi-index throughput test with fill size " << FILL_SIZE << - " and test size " << TEST_SIZE << " took " << (end - start) << - " seconds"); + LOG_INFO("Multi-index throughput test with fill size " << FILL_SIZE << " and test size " << TEST_SIZE << " took " << (end - start) + << " seconds"); } -CContainerThroughputTest::SContent::SContent(size_t count) - : s_Size(count), - s_Ptr(this), - s_Double(double(count)) -{ +CContainerThroughputTest::SContent::SContent(size_t count) : s_Size(count), s_Ptr(this), s_Double(double(count)) { } - diff --git a/lib/core/unittest/CContainerThroughputTest.h b/lib/core/unittest/CContainerThroughputTest.h index 3b2d512419..eb28d64f02 100644 --- a/lib/core/unittest/CContainerThroughputTest.h +++ b/lib/core/unittest/CContainerThroughputTest.h @@ -8,35 +8,31 @@ #include - -class CContainerThroughputTest : public CppUnit::TestFixture -{ - public: - struct SContent - { - SContent(size_t count); - - size_t s_Size; - void *s_Ptr; - double s_Double; - }; - - public: - void testVector(); - void testList(); - void testDeque(); - void testMap(); - void testCircBuf(); - void testMultiIndex(); - - void setUp(); - - static CppUnit::Test *suite(); - - private: - static const size_t FILL_SIZE; - static const size_t TEST_SIZE; +class CContainerThroughputTest : public CppUnit::TestFixture { +public: + struct SContent { + SContent(size_t count); + + size_t s_Size; + void* s_Ptr; + double s_Double; + }; + +public: + void testVector(); + void testList(); + void testDeque(); + void testMap(); + void testCircBuf(); + void testMultiIndex(); + + void setUp(); + + static CppUnit::Test* suite(); + +private: + static const size_t FILL_SIZE; + static const size_t TEST_SIZE; }; #endif // INCLUDED_CContainerThroughputTest_h - diff --git a/lib/core/unittest/CDelimiterTest.cc b/lib/core/unittest/CDelimiterTest.cc index 3bfdec3bb0..122b725b73 100644 --- a/lib/core/unittest/CDelimiterTest.cc +++ b/lib/core/unittest/CDelimiterTest.cc @@ -11,46 +11,34 @@ #include #include - -CppUnit::Test *CDelimiterTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CDelimiterTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CDelimiterTest::testSimpleTokenise", - &CDelimiterTest::testSimpleTokenise) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CDelimiterTest::testRegexTokenise", - &CDelimiterTest::testRegexTokenise) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CDelimiterTest::testQuotedTokenise", - &CDelimiterTest::testQuotedTokenise) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CDelimiterTest::testQuotedEscapedTokenise", - &CDelimiterTest::testQuotedEscapedTokenise) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CDelimiterTest::testInvalidQuotedTokenise", - &CDelimiterTest::testInvalidQuotedTokenise) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CDelimiterTest::testQuoteEqualsEscapeTokenise", - &CDelimiterTest::testQuoteEqualsEscapeTokenise) ); +CppUnit::Test* CDelimiterTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CDelimiterTest"); + + suiteOfTests->addTest( + new CppUnit::TestCaller("CDelimiterTest::testSimpleTokenise", &CDelimiterTest::testSimpleTokenise)); + suiteOfTests->addTest(new CppUnit::TestCaller("CDelimiterTest::testRegexTokenise", &CDelimiterTest::testRegexTokenise)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CDelimiterTest::testQuotedTokenise", &CDelimiterTest::testQuotedTokenise)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CDelimiterTest::testQuotedEscapedTokenise", &CDelimiterTest::testQuotedEscapedTokenise)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CDelimiterTest::testInvalidQuotedTokenise", &CDelimiterTest::testInvalidQuotedTokenise)); + suiteOfTests->addTest(new CppUnit::TestCaller("CDelimiterTest::testQuoteEqualsEscapeTokenise", + &CDelimiterTest::testQuoteEqualsEscapeTokenise)); return suiteOfTests; } -void CDelimiterTest::testSimpleTokenise() -{ - std::string testData( - "Oct 12, 2008 8:38:51 AM org.apache.tomcat.util.http.Parameters processParameters\n" - "WARNING: Parameters: Invalid chunk ignored.\n" - "Oct 12, 2008 8:38:52 AM org.apache.tomcat.util.http.Parameters processParameters\n" - "WARNING: Parameters: Invalid chunk ignored.\n" - "Oct 12, 2008 8:38:53 AM org.apache.tomcat.util.http.Parameters processParameters\n" - "WARNING: Parameters: Invalid chunk ignored.\n" - "Oct 12, 2008 8:39:03 AM org.apache.tomcat.util.http.Parameters processParameters\n" - "WARNING: Parameters: Invalid chunk ignored.\n" - "Oct 12, 2008 8:39:04 AM org.apache.tomcat.util.http.Parameters processParameters\n" - "WARNING: Parameters: Invalid chunk ignored.\n" - ); +void CDelimiterTest::testSimpleTokenise() { + std::string testData("Oct 12, 2008 8:38:51 AM org.apache.tomcat.util.http.Parameters processParameters\n" + "WARNING: Parameters: Invalid chunk ignored.\n" + "Oct 12, 2008 8:38:52 AM org.apache.tomcat.util.http.Parameters processParameters\n" + "WARNING: Parameters: Invalid chunk ignored.\n" + "Oct 12, 2008 8:38:53 AM org.apache.tomcat.util.http.Parameters processParameters\n" + "WARNING: Parameters: Invalid chunk ignored.\n" + "Oct 12, 2008 8:39:03 AM org.apache.tomcat.util.http.Parameters processParameters\n" + "WARNING: Parameters: Invalid chunk ignored.\n" + "Oct 12, 2008 8:39:04 AM org.apache.tomcat.util.http.Parameters processParameters\n" + "WARNING: Parameters: Invalid chunk ignored.\n"); LOG_DEBUG("Input data:\n" << testData << '\n'); @@ -82,21 +70,18 @@ void CDelimiterTest::testSimpleTokenise() CPPUNIT_ASSERT_EQUAL(size_t(0), remainder.size()); } -void CDelimiterTest::testRegexTokenise() -{ +void CDelimiterTest::testRegexTokenise() { // Some of the lines here are Windows text format, and others Unix text - std::string testData( - "Oct 12, 2008 8:38:51 AM org.apache.tomcat.util.http.Parameters processParameters\r\n" - "WARNING: Parameters: Invalid chunk ignored.\r\n" - "Oct 12, 2008 8:38:52 AM org.apache.tomcat.util.http.Parameters processParameters\r\n" - "WARNING: Parameters: Invalid chunk ignored.\n" - "Oct 12, 2008 8:38:53 AM org.apache.tomcat.util.http.Parameters processParameters\n" - "WARNING: Parameters: Invalid chunk ignored.\r\n" - "Oct 12, 2008 8:39:03 AM org.apache.tomcat.util.http.Parameters processParameters\r\n" - "WARNING: Parameters: Invalid chunk ignored.\n" - "Oct 12, 2008 8:39:04 AM org.apache.tomcat.util.http.Parameters processParameters\n" - "WARNING: Parameters: Invalid chunk ignored.\n" - ); + std::string testData("Oct 12, 2008 8:38:51 AM org.apache.tomcat.util.http.Parameters processParameters\r\n" + "WARNING: Parameters: Invalid chunk ignored.\r\n" + "Oct 12, 2008 8:38:52 AM org.apache.tomcat.util.http.Parameters processParameters\r\n" + "WARNING: Parameters: Invalid chunk ignored.\n" + "Oct 12, 2008 8:38:53 AM org.apache.tomcat.util.http.Parameters processParameters\n" + "WARNING: Parameters: Invalid chunk ignored.\r\n" + "Oct 12, 2008 8:39:03 AM org.apache.tomcat.util.http.Parameters processParameters\r\n" + "WARNING: Parameters: Invalid chunk ignored.\n" + "Oct 12, 2008 8:39:04 AM org.apache.tomcat.util.http.Parameters processParameters\n" + "WARNING: Parameters: Invalid chunk ignored.\n"); LOG_DEBUG("Input data:\n" << testData << '\n'); @@ -129,12 +114,11 @@ void CDelimiterTest::testRegexTokenise() CPPUNIT_ASSERT_EQUAL(size_t(0), remainder.size()); } -void CDelimiterTest::testQuotedTokenise() -{ +void CDelimiterTest::testQuotedTokenise() { // NB: The backslashes here escape the quotes for the benefit of the C++ compiler - std::string testData( - "3,1,5415.1132,56135135,0x00000001,0x00000002,\"SOME_STRING\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",0x0000000000000000,0x0000000000000000,\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\"" - ); + std::string testData("3,1,5415.1132,56135135,0x00000001,0x00000002,\"SOME_STRING\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\"," + "\"\",\"\",0x0000000000000000,0x0000000000000000,\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\"," + "\"\",\"\",\"\",\"\",\"\",\"\""); LOG_DEBUG("Input data:\n" << testData << '\n'); @@ -156,13 +140,13 @@ void CDelimiterTest::testQuotedTokenise() CPPUNIT_ASSERT_EQUAL(size_t(40), delimited.size()); } -void CDelimiterTest::testQuotedEscapedTokenise() -{ +void CDelimiterTest::testQuotedEscapedTokenise() { // Similar to previous test, but there are four values with escaped quotes in AFTER // pre-processing by the C++ compiler - std::string testData( - "3,1,5415.1132,56135135,0x00000001,0x00000002,\"SOME_STRING\",\"\",\"\\\"\",\"\",\"\",\"\",\"\",\"\",\"A \\\"middling\\\" one\",\"\",\"\",\"\",\"\",0x0000000000000000,0x0000000000000000,\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\\\"start\",\"\",\"\",\"end\\\"\",\"\",\"\",\"\",\"\",\"\",\"\\\"both\\\"\",\"\",\"\"" - ); + std::string testData("3,1,5415.1132,56135135,0x00000001,0x00000002,\"SOME_STRING\",\"\",\"\\\"\",\"\",\"\",\"\",\"\",\"\",\"A " + "\\\"middling\\\" " + "one\",\"\",\"\",\"\",\"\",0x0000000000000000,0x0000000000000000,\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\\\"start\"," + "\"\",\"\",\"end\\\"\",\"\",\"\",\"\",\"\",\"\",\"\\\"both\\\"\",\"\",\"\""); LOG_DEBUG("Input data:\n" << testData << '\n'); @@ -184,13 +168,10 @@ void CDelimiterTest::testQuotedEscapedTokenise() CPPUNIT_ASSERT_EQUAL(size_t(40), delimited.size()); } -void CDelimiterTest::testInvalidQuotedTokenise() -{ +void CDelimiterTest::testInvalidQuotedTokenise() { // Invalid quoting (e.g. mismatched) mustn't cause the tokeniser to go into // an infinite loop - std::string testData( - "4/26/2011 4:19,aaa.bbbbbb@cc.ddddd.com,\"64222\",\"/some_action.do?param1=foo¶m2=Sljahfej+kfejhafef/3931nfV" - ); + std::string testData("4/26/2011 4:19,aaa.bbbbbb@cc.ddddd.com,\"64222\",\"/some_action.do?param1=foo¶m2=Sljahfej+kfejhafef/3931nfV"); LOG_DEBUG("Input data:\n" << testData << '\n'); @@ -206,14 +187,16 @@ void CDelimiterTest::testInvalidQuotedTokenise() CPPUNIT_ASSERT_EQUAL(std::string("/some_action.do?param1=foo¶m2=Sljahfej+kfejhafef/3931nfV"), remainder); } -void CDelimiterTest::testQuoteEqualsEscapeTokenise() -{ +void CDelimiterTest::testQuoteEqualsEscapeTokenise() { // In this example, double quotes are used for quoting, but they are escaped // by doubling them up, so the escape character is the same as the quote // character std::string testData( - "May 24 22:02:13 1,2012/05/24 22:02:13,724747467,SOME_STRING,url,1,2012/04/10 02:53:17,192.168.0.3,192.168.0.1,0.0.0.0,0.0.0.0,aaa,bbbbbb,,ccccc,dddd1,eeee,ffffff,gggggggg1/2,ggggggg1/1,aA,2012/04/10 02:53:19,27555,1,8450,80,0,0,0x200000,hhh,jjjjjjj,\"www.somesite.com/ajax/home.php/Pane?__a=1&data={\"\"pid\"\":34,\"\"data\"\":[\"\"a.163624624.35636.13135\"\",true,false]}&__user=6625141\",(9999),yetuth-atrat,info,client-to-server,0,0x0,192.168.0.0-192.168.255.255,Some Country,0,application/x-javascript" - ); + "May 24 22:02:13 1,2012/05/24 22:02:13,724747467,SOME_STRING,url,1,2012/04/10 " + "02:53:17,192.168.0.3,192.168.0.1,0.0.0.0,0.0.0.0,aaa,bbbbbb,,ccccc,dddd1,eeee,ffffff,gggggggg1/2,ggggggg1/1,aA,2012/04/10 " + "02:53:19,27555,1,8450,80,0,0,0x200000,hhh,jjjjjjj,\"www.somesite.com/ajax/home.php/" + "Pane?__a=1&data={\"\"pid\"\":34,\"\"data\"\":[\"\"a.163624624.35636.13135\"\",true,false]}&__user=6625141\",(9999),yetuth-atrat," + "info,client-to-server,0,0x0,192.168.0.0-192.168.255.255,Some Country,0,application/x-javascript"); LOG_DEBUG("Input data:\n" << testData << '\n'); diff --git a/lib/core/unittest/CDelimiterTest.h b/lib/core/unittest/CDelimiterTest.h index 71804d6f57..709e46e733 100644 --- a/lib/core/unittest/CDelimiterTest.h +++ b/lib/core/unittest/CDelimiterTest.h @@ -11,22 +11,19 @@ #include #include - -class CDelimiterTest : public CppUnit::TestFixture -{ - public: - void testSimpleTokenise(); - void testRegexTokenise(); - void testQuotedTokenise(); - void testQuotedEscapedTokenise(); - void testInvalidQuotedTokenise(); - void testQuoteEqualsEscapeTokenise(); - - static CppUnit::Test *suite(); - - private: - typedef std::ostream_iterator TStrOStreamItr; +class CDelimiterTest : public CppUnit::TestFixture { +public: + void testSimpleTokenise(); + void testRegexTokenise(); + void testQuotedTokenise(); + void testQuotedEscapedTokenise(); + void testInvalidQuotedTokenise(); + void testQuoteEqualsEscapeTokenise(); + + static CppUnit::Test* suite(); + +private: + typedef std::ostream_iterator TStrOStreamItr; }; #endif // INCLUDED_CDelimiterTest_h - diff --git a/lib/core/unittest/CDetachedProcessSpawnerTest.cc b/lib/core/unittest/CDetachedProcessSpawnerTest.cc index d02d64992f..2430f94494 100644 --- a/lib/core/unittest/CDetachedProcessSpawnerTest.cc +++ b/lib/core/unittest/CDetachedProcessSpawnerTest.cc @@ -15,9 +15,7 @@ #include #include - -namespace -{ +namespace { const std::string OUTPUT_FILE("withNs.xml"); #ifdef Windows // Unlike Windows NT system calls, copy's command line cannot cope with @@ -25,49 +23,40 @@ const std::string OUTPUT_FILE("withNs.xml"); const std::string INPUT_FILE("testfiles\\withNs.xml"); // File size is different on Windows due to CRLF line endings const size_t EXPECTED_FILE_SIZE(585); -const char *winDir(::getenv("windir")); -const std::string PROCESS_PATH1(winDir != 0 ? std::string(winDir) + "\\System32\\cmd" - : std::string("C:\\Windows\\System32\\cmd")); -const std::string PROCESS_ARGS1[] = { "/C", - "copy " + INPUT_FILE + " ." }; -const std::string &PROCESS_PATH2 = PROCESS_PATH1; -const std::string PROCESS_ARGS2[] = { "/C", - "ping 127.0.0.1 -n 11" }; +const char* winDir(::getenv("windir")); +const std::string PROCESS_PATH1(winDir != 0 ? std::string(winDir) + "\\System32\\cmd" : std::string("C:\\Windows\\System32\\cmd")); +const std::string PROCESS_ARGS1[] = {"/C", "copy " + INPUT_FILE + " ."}; +const std::string& PROCESS_PATH2 = PROCESS_PATH1; +const std::string PROCESS_ARGS2[] = {"/C", "ping 127.0.0.1 -n 11"}; #else const std::string INPUT_FILE("testfiles/withNs.xml"); const size_t EXPECTED_FILE_SIZE(563); const std::string PROCESS_PATH1("/bin/dd"); -const std::string PROCESS_ARGS1[] = { "if=" + INPUT_FILE, - "of=" + OUTPUT_FILE, - "bs=1", - "count=" + ml::core::CStringUtils::typeToString(EXPECTED_FILE_SIZE) }; +const std::string PROCESS_ARGS1[] = {"if=" + INPUT_FILE, + "of=" + OUTPUT_FILE, + "bs=1", + "count=" + ml::core::CStringUtils::typeToString(EXPECTED_FILE_SIZE)}; const std::string PROCESS_PATH2("/bin/sleep"); -const std::string PROCESS_ARGS2[] = { "10" }; +const std::string PROCESS_ARGS2[] = {"10"}; #endif } -CppUnit::Test *CDetachedProcessSpawnerTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CDetachedProcessSpawnerTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CDetachedProcessSpawnerTest::testSpawn", - &CDetachedProcessSpawnerTest::testSpawn) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CDetachedProcessSpawnerTest::testKill", - &CDetachedProcessSpawnerTest::testKill) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CDetachedProcessSpawnerTest::testPermitted", - &CDetachedProcessSpawnerTest::testPermitted) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CDetachedProcessSpawnerTest::testNonExistent", - &CDetachedProcessSpawnerTest::testNonExistent) ); +CppUnit::Test* CDetachedProcessSpawnerTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CDetachedProcessSpawnerTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CDetachedProcessSpawnerTest::testSpawn", + &CDetachedProcessSpawnerTest::testSpawn)); + suiteOfTests->addTest(new CppUnit::TestCaller("CDetachedProcessSpawnerTest::testKill", + &CDetachedProcessSpawnerTest::testKill)); + suiteOfTests->addTest(new CppUnit::TestCaller("CDetachedProcessSpawnerTest::testPermitted", + &CDetachedProcessSpawnerTest::testPermitted)); + suiteOfTests->addTest(new CppUnit::TestCaller("CDetachedProcessSpawnerTest::testNonExistent", + &CDetachedProcessSpawnerTest::testNonExistent)); return suiteOfTests; } -void CDetachedProcessSpawnerTest::testSpawn() -{ +void CDetachedProcessSpawnerTest::testSpawn() { // The intention of this test is to copy a file by spawning an external // program and then make sure the file has been copied @@ -78,8 +67,7 @@ void CDetachedProcessSpawnerTest::testSpawn() ml::core::CDetachedProcessSpawner::TStrVec permittedPaths(1, PROCESS_PATH1); ml::core::CDetachedProcessSpawner spawner(permittedPaths); - ml::core::CDetachedProcessSpawner::TStrVec args(PROCESS_ARGS1, - PROCESS_ARGS1 + boost::size(PROCESS_ARGS1)); + ml::core::CDetachedProcessSpawner::TStrVec args(PROCESS_ARGS1, PROCESS_ARGS1 + boost::size(PROCESS_ARGS1)); CPPUNIT_ASSERT(spawner.spawn(PROCESS_PATH1, args)); @@ -93,8 +81,7 @@ void CDetachedProcessSpawnerTest::testSpawn() CPPUNIT_ASSERT_EQUAL(0, ::remove(OUTPUT_FILE.c_str())); } -void CDetachedProcessSpawnerTest::testKill() -{ +void CDetachedProcessSpawnerTest::testKill() { // The intention of this test is to spawn a process that sleeps for 10 // seconds, but kill it before it exits by itself and prove that its death // has been detected @@ -102,8 +89,7 @@ void CDetachedProcessSpawnerTest::testKill() ml::core::CDetachedProcessSpawner::TStrVec permittedPaths(1, PROCESS_PATH2); ml::core::CDetachedProcessSpawner spawner(permittedPaths); - ml::core::CDetachedProcessSpawner::TStrVec args(PROCESS_ARGS2, - PROCESS_ARGS2 + boost::size(PROCESS_ARGS2)); + ml::core::CDetachedProcessSpawner::TStrVec args(PROCESS_ARGS2, PROCESS_ARGS2 + boost::size(PROCESS_ARGS2)); ml::core::CProcess::TPid childPid = 0; CPPUNIT_ASSERT(spawner.spawn(PROCESS_PATH2, args, childPid)); @@ -125,8 +111,7 @@ void CDetachedProcessSpawnerTest::testKill() CPPUNIT_ASSERT(!spawner.terminateChild(static_cast(-1))); } -void CDetachedProcessSpawnerTest::testPermitted() -{ +void CDetachedProcessSpawnerTest::testPermitted() { ml::core::CDetachedProcessSpawner::TStrVec permittedPaths(1, PROCESS_PATH1); ml::core::CDetachedProcessSpawner spawner(permittedPaths); @@ -134,12 +119,10 @@ void CDetachedProcessSpawnerTest::testPermitted() CPPUNIT_ASSERT(!spawner.spawn("./ml_test", ml::core::CDetachedProcessSpawner::TStrVec())); } -void CDetachedProcessSpawnerTest::testNonExistent() -{ +void CDetachedProcessSpawnerTest::testNonExistent() { ml::core::CDetachedProcessSpawner::TStrVec permittedPaths(1, "./does_not_exist"); ml::core::CDetachedProcessSpawner spawner(permittedPaths); // Should fail as even though it's a permitted process as the file doesn't exist CPPUNIT_ASSERT(!spawner.spawn("./does_not_exist", ml::core::CDetachedProcessSpawner::TStrVec())); } - diff --git a/lib/core/unittest/CDetachedProcessSpawnerTest.h b/lib/core/unittest/CDetachedProcessSpawnerTest.h index b5fb899d33..9665bcd929 100644 --- a/lib/core/unittest/CDetachedProcessSpawnerTest.h +++ b/lib/core/unittest/CDetachedProcessSpawnerTest.h @@ -8,17 +8,14 @@ #include +class CDetachedProcessSpawnerTest : public CppUnit::TestFixture { +public: + void testSpawn(); + void testKill(); + void testPermitted(); + void testNonExistent(); -class CDetachedProcessSpawnerTest : public CppUnit::TestFixture -{ - public: - void testSpawn(); - void testKill(); - void testPermitted(); - void testNonExistent(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CDetachedProcessSpawnerTest_h - diff --git a/lib/core/unittest/CDualThreadStreamBufTest.cc b/lib/core/unittest/CDualThreadStreamBufTest.cc index 3c90ecec0d..f60e9c5cfb 100644 --- a/lib/core/unittest/CDualThreadStreamBufTest.cc +++ b/lib/core/unittest/CDualThreadStreamBufTest.cc @@ -7,10 +7,10 @@ #include #include -#include #include #include #include +#include #include #include @@ -19,100 +19,72 @@ #include #include +CppUnit::Test* CDualThreadStreamBufTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CDualThreadStreamBufTest"); -CppUnit::Test *CDualThreadStreamBufTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CDualThreadStreamBufTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CDualThreadStreamBufTest::testThroughput", - &CDualThreadStreamBufTest::testThroughput) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CDualThreadStreamBufTest::testSlowConsumer", - &CDualThreadStreamBufTest::testSlowConsumer) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CDualThreadStreamBufTest::testPutback", - &CDualThreadStreamBufTest::testPutback) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CDualThreadStreamBufTest::testFatal", - &CDualThreadStreamBufTest::testFatal) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CDualThreadStreamBufTest::testThroughput", + &CDualThreadStreamBufTest::testThroughput)); + suiteOfTests->addTest(new CppUnit::TestCaller("CDualThreadStreamBufTest::testSlowConsumer", + &CDualThreadStreamBufTest::testSlowConsumer)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CDualThreadStreamBufTest::testPutback", &CDualThreadStreamBufTest::testPutback)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CDualThreadStreamBufTest::testFatal", &CDualThreadStreamBufTest::testFatal)); return suiteOfTests; } -namespace -{ - -class CInputThread : public ml::core::CThread -{ - public: - CInputThread(ml::core::CDualThreadStreamBuf &buffer, - uint32_t delay = 0, - size_t fatalAfter = 0) - : m_Buffer(buffer), - m_Delay(delay), - m_FatalAfter(fatalAfter), - m_TotalData(0) - { - } - - size_t totalData() const - { - return m_TotalData; - } - - protected: - virtual void run() - { - std::istream strm(&m_Buffer); - size_t count(0); - std::string line; - while (std::getline(strm, line)) - { - ++count; - m_TotalData += line.length(); - ++m_TotalData; // For the delimiter - CPPUNIT_ASSERT_EQUAL(static_cast(m_TotalData), strm.tellg()); - ml::core::CSleep::sleep(m_Delay); - if (count == m_FatalAfter) - { - m_Buffer.signalFatalError(); - } +namespace { + +class CInputThread : public ml::core::CThread { +public: + CInputThread(ml::core::CDualThreadStreamBuf& buffer, uint32_t delay = 0, size_t fatalAfter = 0) + : m_Buffer(buffer), m_Delay(delay), m_FatalAfter(fatalAfter), m_TotalData(0) {} + + size_t totalData() const { return m_TotalData; } + +protected: + virtual void run() { + std::istream strm(&m_Buffer); + size_t count(0); + std::string line; + while (std::getline(strm, line)) { + ++count; + m_TotalData += line.length(); + ++m_TotalData; // For the delimiter + CPPUNIT_ASSERT_EQUAL(static_cast(m_TotalData), strm.tellg()); + ml::core::CSleep::sleep(m_Delay); + if (count == m_FatalAfter) { + m_Buffer.signalFatalError(); } } + } - virtual void shutdown() - { - m_Buffer.signalFatalError(); - } + virtual void shutdown() { m_Buffer.signalFatalError(); } - private: - ml::core::CDualThreadStreamBuf &m_Buffer; - uint32_t m_Delay; - size_t m_FatalAfter; - size_t m_TotalData; +private: + ml::core::CDualThreadStreamBuf& m_Buffer; + uint32_t m_Delay; + size_t m_FatalAfter; + size_t m_TotalData; }; -const char *DATA( - "According to the most recent Wikipedia definition \"Predictive " - "analytics encompasses a variety of statistical techniques from " - "modeling, machine learning, data mining and game theory that ... " - "exploit patterns found in historical and transactional data to " - "identify risks and opportunities.\"\n" - "In applications such as credit scoring, predictive analytics " - "identifies patterns and relationships in huge volumes of data, hidden " - "to human analysis, that presages an undesirable outcome. Many " - "vendors refer to their ability to project a ramp in a single metric, " - "say CPU utilization, as predictive analytics. As most users know, " - "these capabilities are of limited value in that single metrics are " - "rarely the cause of cataclysmic failures. Rather it is the impact of " - "change between components that causes failure in complex IT systems.\n" -); - +const char* DATA("According to the most recent Wikipedia definition \"Predictive " + "analytics encompasses a variety of statistical techniques from " + "modeling, machine learning, data mining and game theory that ... " + "exploit patterns found in historical and transactional data to " + "identify risks and opportunities.\"\n" + "In applications such as credit scoring, predictive analytics " + "identifies patterns and relationships in huge volumes of data, hidden " + "to human analysis, that presages an undesirable outcome. Many " + "vendors refer to their ability to project a ramp in a single metric, " + "say CPU utilization, as predictive analytics. As most users know, " + "these capabilities are of limited value in that single metrics are " + "rarely the cause of cataclysmic failures. Rather it is the impact of " + "change between components that causes failure in complex IT systems.\n"); } -void CDualThreadStreamBufTest::testThroughput() -{ +void CDualThreadStreamBufTest::testThroughput() { static const size_t TEST_SIZE(1000000); size_t dataSize(::strlen(DATA)); size_t totalDataSize(TEST_SIZE * dataSize); @@ -122,15 +94,12 @@ void CDualThreadStreamBufTest::testThroughput() inputThread.start(); ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO("Starting REST buffer throughput test at " << - ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO("Starting REST buffer throughput test at " << ml::core::CTimeUtils::toTimeString(start)); - for (size_t count = 0; count < TEST_SIZE; ++count) - { + for (size_t count = 0; count < TEST_SIZE; ++count) { std::streamsize toWrite(static_cast(dataSize)); - const char *ptr(DATA); - while (toWrite > 0) - { + const char* ptr(DATA); + while (toWrite > 0) { std::streamsize written(buf.sputn(ptr, toWrite)); CPPUNIT_ASSERT(written > 0); toWrite -= written; @@ -138,28 +107,22 @@ void CDualThreadStreamBufTest::testThroughput() } } - CPPUNIT_ASSERT_EQUAL(static_cast(totalDataSize), - buf.pubseekoff(0, - std::ios_base::cur, - std::ios_base::out)); + CPPUNIT_ASSERT_EQUAL(static_cast(totalDataSize), buf.pubseekoff(0, std::ios_base::cur, std::ios_base::out)); buf.signalEndOfFile(); inputThread.waitForFinish(); ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO("Finished REST buffer throughput test at " << - ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO("Finished REST buffer throughput test at " << ml::core::CTimeUtils::toTimeString(end)); CPPUNIT_ASSERT_EQUAL(totalDataSize, inputThread.totalData()); - LOG_INFO("REST buffer throughput test with test size " << TEST_SIZE << - " (total data transferred " << totalDataSize << " bytes) took " << - (end - start) << " seconds"); + LOG_INFO("REST buffer throughput test with test size " << TEST_SIZE << " (total data transferred " << totalDataSize << " bytes) took " + << (end - start) << " seconds"); } -void CDualThreadStreamBufTest::testSlowConsumer() -{ +void CDualThreadStreamBufTest::testSlowConsumer() { static const size_t TEST_SIZE(25); static const uint32_t DELAY(200); size_t dataSize(::strlen(DATA)); @@ -171,15 +134,12 @@ void CDualThreadStreamBufTest::testSlowConsumer() inputThread.start(); ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO("Starting REST buffer slow consumer test at " << - ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO("Starting REST buffer slow consumer test at " << ml::core::CTimeUtils::toTimeString(start)); - for (size_t count = 0; count < TEST_SIZE; ++count) - { + for (size_t count = 0; count < TEST_SIZE; ++count) { std::streamsize toWrite(static_cast(dataSize)); - const char *ptr(DATA); - while (toWrite > 0) - { + const char* ptr(DATA); + while (toWrite > 0) { std::streamsize written(buf.sputn(ptr, toWrite)); CPPUNIT_ASSERT(written > 0); toWrite -= written; @@ -192,15 +152,13 @@ void CDualThreadStreamBufTest::testSlowConsumer() inputThread.waitForFinish(); ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO("Finished REST buffer slow consumer test at " << - ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO("Finished REST buffer slow consumer test at " << ml::core::CTimeUtils::toTimeString(end)); CPPUNIT_ASSERT_EQUAL(totalDataSize, inputThread.totalData()); ml::core_t::TTime duration(end - start); - LOG_INFO("REST buffer slow consumer test with test size " << TEST_SIZE << - ", " << numNewLines << " newlines per message and delay " << - DELAY << "ms took " << duration << " seconds"); + LOG_INFO("REST buffer slow consumer test with test size " << TEST_SIZE << ", " << numNewLines << " newlines per message and delay " + << DELAY << "ms took " << duration << " seconds"); ml::core_t::TTime delaySecs(static_cast((DELAY * numNewLines * TEST_SIZE) / 1000)); CPPUNIT_ASSERT(duration >= delaySecs); @@ -208,16 +166,14 @@ void CDualThreadStreamBufTest::testSlowConsumer() CPPUNIT_ASSERT(duration <= delaySecs + TOLERANCE); } -void CDualThreadStreamBufTest::testPutback() -{ +void CDualThreadStreamBufTest::testPutback() { size_t dataSize(::strlen(DATA)); ml::core::CDualThreadStreamBuf buf; std::streamsize toWrite(static_cast(dataSize)); - const char *ptr(DATA); - while (toWrite > 0) - { + const char* ptr(DATA); + while (toWrite > 0) { std::streamsize written(buf.sputn(ptr, toWrite)); CPPUNIT_ASSERT(written > 0); toWrite -= written; @@ -226,23 +182,17 @@ void CDualThreadStreamBufTest::testPutback() buf.signalEndOfFile(); - static const char *PUTBACK_CHARS("put this back"); + static const char* PUTBACK_CHARS("put this back"); std::istream strm(&buf); char c('\0'); CPPUNIT_ASSERT(strm.get(c).good()); CPPUNIT_ASSERT_EQUAL(*DATA, c); CPPUNIT_ASSERT(strm.putback(c).good()); - for (const char *putbackChar = PUTBACK_CHARS; - *putbackChar != '\0'; - ++putbackChar) - { + for (const char* putbackChar = PUTBACK_CHARS; *putbackChar != '\0'; ++putbackChar) { CPPUNIT_ASSERT(strm.putback(*putbackChar).good()); } std::string actual; - for (const char *putbackChar = PUTBACK_CHARS; - *putbackChar != '\0'; - ++putbackChar) - { + for (const char* putbackChar = PUTBACK_CHARS; *putbackChar != '\0'; ++putbackChar) { CPPUNIT_ASSERT(strm.get(c).good()); actual.insert(actual.begin(), c); } @@ -250,16 +200,14 @@ void CDualThreadStreamBufTest::testPutback() std::string remainder; std::string line; - while (std::getline(strm, line)) - { + while (std::getline(strm, line)) { remainder += line; remainder += '\n'; } CPPUNIT_ASSERT_EQUAL(std::string(DATA), remainder); } -void CDualThreadStreamBufTest::testFatal() -{ +void CDualThreadStreamBufTest::testFatal() { static const size_t TEST_SIZE(10000); static const size_t BUFFER_CAPACITY(16384); size_t dataSize(::strlen(DATA)); @@ -273,15 +221,12 @@ void CDualThreadStreamBufTest::testFatal() inputThread.start(); size_t totalDataWritten(0); - for (size_t count = 0; count < TEST_SIZE; ++count) - { + for (size_t count = 0; count < TEST_SIZE; ++count) { std::streamsize toWrite(static_cast(dataSize)); - const char *ptr(DATA); - while (toWrite > 0) - { + const char* ptr(DATA); + while (toWrite > 0) { std::streamsize written(buf.sputn(ptr, toWrite)); - if (written == 0) - { + if (written == 0) { break; } toWrite -= written; @@ -294,11 +239,9 @@ void CDualThreadStreamBufTest::testFatal() inputThread.waitForFinish(); - LOG_DEBUG("Total data written in fatal error test of size " << TEST_SIZE << - " is " << totalDataWritten << " bytes"); + LOG_DEBUG("Total data written in fatal error test of size " << TEST_SIZE << " is " << totalDataWritten << " bytes"); // The fatal error should have stopped the writer thread from writing all the data CPPUNIT_ASSERT(totalDataWritten >= BUFFER_CAPACITY); CPPUNIT_ASSERT(totalDataWritten <= 3 * BUFFER_CAPACITY); } - diff --git a/lib/core/unittest/CDualThreadStreamBufTest.h b/lib/core/unittest/CDualThreadStreamBufTest.h index d18377e4fd..27716c6440 100644 --- a/lib/core/unittest/CDualThreadStreamBufTest.h +++ b/lib/core/unittest/CDualThreadStreamBufTest.h @@ -8,17 +8,14 @@ #include +class CDualThreadStreamBufTest : public CppUnit::TestFixture { +public: + void testThroughput(); + void testSlowConsumer(); + void testPutback(); + void testFatal(); -class CDualThreadStreamBufTest : public CppUnit::TestFixture -{ - public: - void testThroughput(); - void testSlowConsumer(); - void testPutback(); - void testFatal(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CDualThreadStreamBufTest_h - diff --git a/lib/core/unittest/CFileDeleterTest.cc b/lib/core/unittest/CFileDeleterTest.cc index 17abe6ba38..8f317cd1fa 100644 --- a/lib/core/unittest/CFileDeleterTest.cc +++ b/lib/core/unittest/CFileDeleterTest.cc @@ -13,20 +13,15 @@ #include +CppUnit::Test* CFileDeleterTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CFileDeleterTest"); -CppUnit::Test *CFileDeleterTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CFileDeleterTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CFileDeleterTest::testDelete", - &CFileDeleterTest::testDelete) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CFileDeleterTest::testDelete", &CFileDeleterTest::testDelete)); return suiteOfTests; } -void CFileDeleterTest::testDelete() -{ +void CFileDeleterTest::testDelete() { std::string fileName("CFileDeleterTest.txt"); { @@ -37,13 +32,10 @@ void CFileDeleterTest::testDelete() testFile << "to be deleted" << std::endl; } // The file should exist by the time the stream is closed here - CPPUNIT_ASSERT_EQUAL(0, ml::core::COsFileFuncs::access(fileName.c_str(), - ml::core::COsFileFuncs::EXISTS)); + CPPUNIT_ASSERT_EQUAL(0, ml::core::COsFileFuncs::access(fileName.c_str(), ml::core::COsFileFuncs::EXISTS)); } // The file should be deleted here - CPPUNIT_ASSERT_EQUAL(-1, ml::core::COsFileFuncs::access(fileName.c_str(), - ml::core::COsFileFuncs::EXISTS)); + CPPUNIT_ASSERT_EQUAL(-1, ml::core::COsFileFuncs::access(fileName.c_str(), ml::core::COsFileFuncs::EXISTS)); CPPUNIT_ASSERT_EQUAL(ENOENT, errno); } - diff --git a/lib/core/unittest/CFileDeleterTest.h b/lib/core/unittest/CFileDeleterTest.h index 07a86b043a..c5a6892668 100644 --- a/lib/core/unittest/CFileDeleterTest.h +++ b/lib/core/unittest/CFileDeleterTest.h @@ -8,14 +8,11 @@ #include +class CFileDeleterTest : public CppUnit::TestFixture { +public: + void testDelete(); -class CFileDeleterTest : public CppUnit::TestFixture -{ - public: - void testDelete(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CFileDeleterTest_h - diff --git a/lib/core/unittest/CFlatPrefixTreeTest.cc b/lib/core/unittest/CFlatPrefixTreeTest.cc index 03ddbe5416..53fc9866f2 100644 --- a/lib/core/unittest/CFlatPrefixTreeTest.cc +++ b/lib/core/unittest/CFlatPrefixTreeTest.cc @@ -21,40 +21,32 @@ using namespace ml; using namespace core; -CppUnit::Test *CFlatPrefixTreeTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CFlatPrefixTreeTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CFlatPrefixTreeTest::testBuildGivenUnsortedInput", - &CFlatPrefixTreeTest::testBuildGivenUnsortedInput) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CFlatPrefixTreeTest::testBuildGivenSortedInputWithDuplicates", - &CFlatPrefixTreeTest::testBuildGivenSortedInputWithDuplicates) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CFlatPrefixTreeTest::testEmptyString", - &CFlatPrefixTreeTest::testEmptyString) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CFlatPrefixTreeTest::testSimple", - &CFlatPrefixTreeTest::testSimple) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CFlatPrefixTreeTest::testLeafAndBranch", - &CFlatPrefixTreeTest::testLeafAndBranch) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CFlatPrefixTreeTest::testMatchesStartGivenStringThatMatchesMoreThanAGivenPrefix", - &CFlatPrefixTreeTest::testMatchesStartGivenStringThatMatchesMoreThanAGivenPrefix) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CFlatPrefixTreeTest::testMatchesFullyGivenStringThatIsSubstringOfPrefix", - &CFlatPrefixTreeTest::testMatchesFullyGivenStringThatIsSubstringOfPrefix) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CFlatPrefixTreeTest::testRandom", - &CFlatPrefixTreeTest::testRandom) ); +CppUnit::Test* CFlatPrefixTreeTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CFlatPrefixTreeTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CFlatPrefixTreeTest::testBuildGivenUnsortedInput", + &CFlatPrefixTreeTest::testBuildGivenUnsortedInput)); + suiteOfTests->addTest(new CppUnit::TestCaller("CFlatPrefixTreeTest::testBuildGivenSortedInputWithDuplicates", + &CFlatPrefixTreeTest::testBuildGivenSortedInputWithDuplicates)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CFlatPrefixTreeTest::testEmptyString", &CFlatPrefixTreeTest::testEmptyString)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CFlatPrefixTreeTest::testSimple", &CFlatPrefixTreeTest::testSimple)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CFlatPrefixTreeTest::testLeafAndBranch", &CFlatPrefixTreeTest::testLeafAndBranch)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CFlatPrefixTreeTest::testMatchesStartGivenStringThatMatchesMoreThanAGivenPrefix", + &CFlatPrefixTreeTest::testMatchesStartGivenStringThatMatchesMoreThanAGivenPrefix)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CFlatPrefixTreeTest::testMatchesFullyGivenStringThatIsSubstringOfPrefix", + &CFlatPrefixTreeTest::testMatchesFullyGivenStringThatIsSubstringOfPrefix)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CFlatPrefixTreeTest::testRandom", &CFlatPrefixTreeTest::testRandom)); return suiteOfTests; } -void CFlatPrefixTreeTest::testBuildGivenUnsortedInput() -{ +void CFlatPrefixTreeTest::testBuildGivenUnsortedInput() { CFlatPrefixTree::TStrVec prefixes; prefixes.push_back("b"); prefixes.push_back("a"); @@ -63,8 +55,7 @@ void CFlatPrefixTreeTest::testBuildGivenUnsortedInput() CPPUNIT_ASSERT(prefixTree.build(prefixes) == false); } -void CFlatPrefixTreeTest::testBuildGivenSortedInputWithDuplicates() -{ +void CFlatPrefixTreeTest::testBuildGivenSortedInputWithDuplicates() { CFlatPrefixTree::TStrVec prefixes; prefixes.push_back("a"); prefixes.push_back("bb"); @@ -74,8 +65,7 @@ void CFlatPrefixTreeTest::testBuildGivenSortedInputWithDuplicates() CPPUNIT_ASSERT(prefixTree.build(prefixes) == false); } -void CFlatPrefixTreeTest::testEmptyString() -{ +void CFlatPrefixTreeTest::testEmptyString() { CFlatPrefixTree::TStrVec prefixes; prefixes.push_back(""); @@ -86,8 +76,7 @@ void CFlatPrefixTreeTest::testEmptyString() CPPUNIT_ASSERT(prefixTree.matchesStart("") == false); } -void CFlatPrefixTreeTest::testSimple() -{ +void CFlatPrefixTreeTest::testSimple() { CFlatPrefixTree::TStrVec prefixes; prefixes.push_back("abc"); prefixes.push_back("acd"); @@ -100,8 +89,7 @@ void CFlatPrefixTreeTest::testSimple() LOG_TRACE("Tree: " << prefixTree.print()); - for (std::size_t i = 0; i < prefixes.size(); ++i) - { + for (std::size_t i = 0; i < prefixes.size(); ++i) { CPPUNIT_ASSERT(prefixTree.matchesStart(prefixes[i])); CPPUNIT_ASSERT(prefixTree.matchesFully(prefixes[i])); } @@ -118,8 +106,7 @@ void CFlatPrefixTreeTest::testSimple() CPPUNIT_ASSERT(prefixTree.matchesFully("") == false); } -void CFlatPrefixTreeTest::testLeafAndBranch() -{ +void CFlatPrefixTreeTest::testLeafAndBranch() { CFlatPrefixTree::TStrVec prefixes; prefixes.push_back(" oqt4g"); prefixes.push_back(" oqt4glz-"); @@ -129,15 +116,13 @@ void CFlatPrefixTreeTest::testLeafAndBranch() LOG_TRACE("Tree: " << prefixTree.print()); - for (std::size_t i = 0; i < prefixes.size(); ++i) - { + for (std::size_t i = 0; i < prefixes.size(); ++i) { CPPUNIT_ASSERT(prefixTree.matchesStart(prefixes[i])); CPPUNIT_ASSERT(prefixTree.matchesFully(prefixes[i])); } } -void CFlatPrefixTreeTest::testMatchesStartGivenStringThatMatchesMoreThanAGivenPrefix() -{ +void CFlatPrefixTreeTest::testMatchesStartGivenStringThatMatchesMoreThanAGivenPrefix() { CFlatPrefixTree::TStrVec prefixes; prefixes.push_back("h2 vh5"); prefixes.push_back("h2 vh55dm"); @@ -151,8 +136,7 @@ void CFlatPrefixTreeTest::testMatchesStartGivenStringThatMatchesMoreThanAGivenPr CPPUNIT_ASSERT(prefixTree.matchesStart("h2 vh55daetrqt4")); } -void CFlatPrefixTreeTest::testMatchesFullyGivenStringThatIsSubstringOfPrefix() -{ +void CFlatPrefixTreeTest::testMatchesFullyGivenStringThatIsSubstringOfPrefix() { CFlatPrefixTree::TStrVec prefixes; prefixes.push_back("foo"); @@ -164,21 +148,17 @@ void CFlatPrefixTreeTest::testMatchesFullyGivenStringThatIsSubstringOfPrefix() CPPUNIT_ASSERT(prefixTree.matchesFully("fo") == false); } -void CFlatPrefixTreeTest::testRandom() -{ +void CFlatPrefixTreeTest::testRandom() { test::CRandomNumbers rng; test::CRandomNumbers::CUniform0nGenerator uniformGen = rng.uniformGenerator(); CFlatPrefixTree::TStrVec grams; rng.generateWords(3, 200, grams); CFlatPrefixTree::TStrVec prefixes; - for (std::size_t i = 0; i < grams.size(); ++i) - { - for (std::size_t j = 0; j < grams.size(); ++j) - { + for (std::size_t i = 0; i < grams.size(); ++i) { + for (std::size_t j = 0; j < grams.size(); ++j) { prefixes.push_back(grams[i] + grams[j]); std::size_t n = uniformGen(5); - for (std::size_t k = 0; k < n; ++k) - { + for (std::size_t k = 0; k < n; ++k) { prefixes.back() += grams[uniformGen(grams.size())]; } } @@ -198,8 +178,7 @@ void CFlatPrefixTreeTest::testRandom() rng.generateWords(10, 200000, lookups); lookups.insert(lookups.end(), prefixes.begin(), prefixes.end()); - for (std::size_t i = 0; i < lookups.size(); ++i) - { + for (std::size_t i = 0; i < lookups.size(); ++i) { CPPUNIT_ASSERT(prefixTree.matchesFully(lookups[i]) == set.count(lookups[i]) > 0); } } @@ -208,8 +187,7 @@ void CFlatPrefixTreeTest::testRandom() { CFlatPrefixTree::TStrVec suffixes; rng.generateWords(10, 1000, suffixes); - for (std::size_t i = 0; i < 100000; i++) - { + for (std::size_t i = 0; i < 100000; i++) { std::string key = prefixes[uniformGen(prefixes.size())] + suffixes[uniformGen(suffixes.size())]; CPPUNIT_ASSERT(prefixTree.matchesStart(key)); } diff --git a/lib/core/unittest/CFlatPrefixTreeTest.h b/lib/core/unittest/CFlatPrefixTreeTest.h index 64c8719dc1..ee256b9a7e 100644 --- a/lib/core/unittest/CFlatPrefixTreeTest.h +++ b/lib/core/unittest/CFlatPrefixTreeTest.h @@ -8,20 +8,18 @@ #include +class CFlatPrefixTreeTest : public CppUnit::TestFixture { +public: + void testBuildGivenUnsortedInput(); + void testBuildGivenSortedInputWithDuplicates(); + void testEmptyString(); + void testSimple(); + void testLeafAndBranch(); + void testMatchesStartGivenStringThatMatchesMoreThanAGivenPrefix(); + void testMatchesFullyGivenStringThatIsSubstringOfPrefix(); + void testRandom(); -class CFlatPrefixTreeTest : public CppUnit::TestFixture -{ - public: - void testBuildGivenUnsortedInput(); - void testBuildGivenSortedInputWithDuplicates(); - void testEmptyString(); - void testSimple(); - void testLeafAndBranch(); - void testMatchesStartGivenStringThatMatchesMoreThanAGivenPrefix(); - void testMatchesFullyGivenStringThatIsSubstringOfPrefix(); - void testRandom(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CFlatPrefixTreeTest_h diff --git a/lib/core/unittest/CFunctionalTest.cc b/lib/core/unittest/CFunctionalTest.cc index 148de2838d..d61f26c422 100644 --- a/lib/core/unittest/CFunctionalTest.cc +++ b/lib/core/unittest/CFunctionalTest.cc @@ -14,66 +14,57 @@ using namespace ml; -void CFunctionalTest::testIsNull() -{ +void CFunctionalTest::testIsNull() { core::CFunctional::SIsNull isNull; { double five = 5.0; - double *null = 0; - const double *notNull = &five; - CPPUNIT_ASSERT( isNull(null)); + double* null = 0; + const double* notNull = &five; + CPPUNIT_ASSERT(isNull(null)); CPPUNIT_ASSERT(!isNull(notNull)); } { boost::optional null; boost::optional notNull(5.0); - CPPUNIT_ASSERT( isNull(null)); + CPPUNIT_ASSERT(isNull(null)); CPPUNIT_ASSERT(!isNull(notNull)); } { boost::shared_ptr null; boost::shared_ptr notNull(new double(5.0)); - CPPUNIT_ASSERT( isNull(null)); + CPPUNIT_ASSERT(isNull(null)); CPPUNIT_ASSERT(!isNull(notNull)); } } -void CFunctionalTest::testDereference() -{ +void CFunctionalTest::testDereference() { double one(1.0); double two(2.0); double three(3.0); - const double *null_ = 0; + const double* null_ = 0; core::CFunctional::SDereference derefIsNull; boost::optional null(null_); boost::optional notNull(&one); - CPPUNIT_ASSERT( derefIsNull(null)); + CPPUNIT_ASSERT(derefIsNull(null)); CPPUNIT_ASSERT(!derefIsNull(notNull)); std::less less; core::CFunctional::SDereference> derefLess; - const double *values[] = { &one, &two, &three }; - for (std::size_t i = 0u; i < boost::size(values); ++i) - { - for (std::size_t j = 0u; j < boost::size(values); ++j) - { + const double* values[] = {&one, &two, &three}; + for (std::size_t i = 0u; i < boost::size(values); ++i) { + for (std::size_t j = 0u; j < boost::size(values); ++j) { CPPUNIT_ASSERT_EQUAL(less(*values[i], *values[j]), derefLess(values[i], values[j])); } } } -CppUnit::Test *CFunctionalTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CFunctionalTest"); +CppUnit::Test* CFunctionalTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CFunctionalTest"); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CFunctionalTest::testIsNull", - &CFunctionalTest::testIsNull) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CFunctionalTest::testDereference", - &CFunctionalTest::testDereference) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CFunctionalTest::testIsNull", &CFunctionalTest::testIsNull)); + suiteOfTests->addTest(new CppUnit::TestCaller("CFunctionalTest::testDereference", &CFunctionalTest::testDereference)); return suiteOfTests; } diff --git a/lib/core/unittest/CFunctionalTest.h b/lib/core/unittest/CFunctionalTest.h index c77f3cdc20..c3e766d683 100644 --- a/lib/core/unittest/CFunctionalTest.h +++ b/lib/core/unittest/CFunctionalTest.h @@ -9,13 +9,12 @@ #include -class CFunctionalTest : public CppUnit::TestFixture -{ - public: - void testIsNull(); - void testDereference(); +class CFunctionalTest : public CppUnit::TestFixture { +public: + void testIsNull(); + void testDereference(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CFunctionalTest_h diff --git a/lib/core/unittest/CHashingTest.cc b/lib/core/unittest/CHashingTest.cc index e717aa6d5d..2676fd91e3 100644 --- a/lib/core/unittest/CHashingTest.cc +++ b/lib/core/unittest/CHashingTest.cc @@ -22,8 +22,7 @@ using namespace ml; using namespace core; -void CHashingTest::testUniversalHash() -{ +void CHashingTest::testUniversalHash() { // We test the definition: // "for all (x,y) in U and x != y P(h(x) = h(y)) <= 1/m" // @@ -38,40 +37,34 @@ void CHashingTest::testUniversalHash() // // must hold for a randomly selected member of the family. - double tolerances[] = { 1.0, 1.6 }; - uint32_t m[] = { 30, 300 }; - uint32_t u[] = { 1000, 10000 }; + double tolerances[] = {1.0, 1.6}; + uint32_t m[] = {30, 300}; + uint32_t u[] = {1000, 10000}; - for (size_t i = 0; i < boost::size(m); ++i) - { + for (size_t i = 0; i < boost::size(m); ++i) { CHashing::CUniversalHash::TUInt32HashVec hashes; CHashing::CUniversalHash::generateHashes(10u, m[i], hashes); double collisionsRandom = 0.0; double hashedRandom = 0.0; - for (std::size_t h = 0u; h < hashes.size(); ++h) - { + for (std::size_t h = 0u; h < hashes.size(); ++h) { LOG_DEBUG("**** Testing hash = " << hashes[h].print() << " ****"); CHashing::CUniversalHash::CUInt32Hash hash = hashes[h]; - for (size_t j = 0; j < boost::size(u); ++j) - { + for (size_t j = 0; j < boost::size(u); ++j) { uint32_t n = u[j]; LOG_DEBUG("m = " << m[i] << ", U = [" << n << "]"); uint32_t collisions = 0u; - for (uint32_t x = 0; x < n; ++x) - { - for (uint32_t y = x + 1u; y < n; ++y) - { + for (uint32_t x = 0; x < n; ++x) { + for (uint32_t y = x + 1u; y < n; ++y) { uint32_t hx = hash(x); uint32_t hy = hash(y); - if (hx == hy) - { + if (hx == hy) { ++collisions; } } @@ -80,12 +73,9 @@ void CHashingTest::testUniversalHash() collisionsRandom += static_cast(collisions); hashedRandom += static_cast(n * (n - 1)) / 2.0; - double pc = 2.0 * static_cast(collisions) - / static_cast(n * (n - 1)); + double pc = 2.0 * static_cast(collisions) / static_cast(n * (n - 1)); - LOG_DEBUG("collisions = " << collisions - << ", P(collision) = " << pc - << ", 1/m = " << (1.0 / static_cast(m[i]))); + LOG_DEBUG("collisions = " << collisions << ", P(collision) = " << pc << ", 1/m = " << (1.0 / static_cast(m[i]))); // Note that the definition of universality doesn't require // the P(collision) <= 1/m for every hash function. @@ -114,8 +104,7 @@ void CHashingTest::testUniversalHash() boost::random::uniform_int_distribution uniform(0u, 10000000u); TUInt32Vec samples; - std::generate_n(std::back_inserter(samples), 1000u, - boost::bind(uniform, boost::ref(generator))); + std::generate_n(std::back_inserter(samples), 1000u, boost::bind(uniform, boost::ref(generator))); CHashing::CUniversalHash::TUInt32HashVec hashes; CHashing::CUniversalHash::generateHashes(100u, 10000u, hashes); @@ -123,8 +112,7 @@ void CHashingTest::testUniversalHash() double collisionsRandom = 0.0; double hashedRandom = 0.0; - for (std::size_t h = 0u; h < hashes.size(); ++h) - { + for (std::size_t h = 0u; h < hashes.size(); ++h) { LOG_DEBUG("Testing hash = " << hashes[h].print()); CHashing::CUniversalHash::CUInt32Hash hash = hashes[h]; @@ -132,17 +120,12 @@ void CHashingTest::testUniversalHash() uint32_t collisions = 0u; TUInt32PrSet uniquePairs; - for (std::size_t i = 0u; i < samples.size(); ++i) - { - for (std::size_t j = i + 1u; j < samples.size(); ++j) - { - if (samples[i] != samples[j] - && uniquePairs.insert(TUInt32Pr(samples[i], samples[j])).second) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { + for (std::size_t j = i + 1u; j < samples.size(); ++j) { + if (samples[i] != samples[j] && uniquePairs.insert(TUInt32Pr(samples[i], samples[j])).second) { uint32_t hx = hash(samples[i]); uint32_t hy = hash(samples[j]); - if (hx == hy) - { + if (hx == hy) { ++collisions; } } @@ -152,12 +135,9 @@ void CHashingTest::testUniversalHash() collisionsRandom += static_cast(collisions); hashedRandom += static_cast(uniquePairs.size()); - double pc = static_cast(collisions) - / static_cast(uniquePairs.size()); + double pc = static_cast(collisions) / static_cast(uniquePairs.size()); - LOG_DEBUG("collisions = " << collisions - << ", P(collision) = " << pc - << ", 1/m = " << (1.0 / 10000.0)); + LOG_DEBUG("collisions = " << collisions << ", P(collision) = " << pc << ", 1/m = " << (1.0 / 10000.0)); // Note that the definition of universality doesn't require // the P(collision) <= 1/m for every hash function. @@ -184,15 +164,12 @@ void CHashingTest::testUniversalHash() TUint32PrUIntMap uniqueHashedPairs; - for (std::size_t h = 0u; h < hashes.size(); ++h) - { + for (std::size_t h = 0u; h < hashes.size(); ++h) { LOG_DEBUG("Testing hash = " << hashes[h].print()); CHashing::CUniversalHash::CUInt32Hash hash = hashes[h]; - for (uint32_t x = 0u; x < 2000; ++x) - { - for (uint32_t y = x + 1u; y < 2000; ++y) - { + for (uint32_t x = 0u; x < 2000; ++x) { + for (uint32_t y = x + 1u; y < 2000; ++y) { uint32_t hx = hash(x); uint32_t hy = hash(y); ++uniqueHashedPairs[TUInt32Pr(hx, hy)]; @@ -202,16 +179,10 @@ void CHashingTest::testUniversalHash() double error = 0.0; - for (TUint32PrUIntMapCItr i = uniqueHashedPairs.begin(); - i != uniqueHashedPairs.end(); - ++i) - { - double p = 2.0 * static_cast(i->second) - / 2000.0 / 1999.0 - / static_cast(hashes.size()); + for (TUint32PrUIntMapCItr i = uniqueHashedPairs.begin(); i != uniqueHashedPairs.end(); ++i) { + double p = 2.0 * static_cast(i->second) / 2000.0 / 1999.0 / static_cast(hashes.size()); - if (p > 1.0 / 10000.0) - { + if (p > 1.0 / 10000.0) { LOG_DEBUG(core::CContainerPrinter::print(*i) << ", p = " << p); error += p - 1 / 10000.0; } @@ -223,8 +194,7 @@ void CHashingTest::testUniversalHash() } } -void CHashingTest::testMurmurHash() -{ +void CHashingTest::testMurmurHash() { { std::string key("This is the voice of the Mysterons!"); uint32_t seed = 0xdead4321; @@ -238,13 +208,15 @@ void CHashingTest::testMurmurHash() CPPUNIT_ASSERT_EQUAL(uint32_t(0x54837c96), result); } { - std::string key("Your message has been analysed and it has been decided to allow one member of Spectrum to meet our representative."); + std::string key( + "Your message has been analysed and it has been decided to allow one member of Spectrum to meet our representative."); uint64_t seed = 0xaabbccddffeeeeffULL; uint64_t result = CHashing::murmurHash64(key.c_str(), static_cast(key.size()), seed); CPPUNIT_ASSERT_EQUAL(uint64_t(14826751455157300659ull), result); } { - std::string key("Earthmen, we are peaceful beings and you have tried to destroy us, but you cannot succeed. You and your people will pay for this act of aggression."); + std::string key("Earthmen, we are peaceful beings and you have tried to destroy us, but you cannot succeed. You and your people " + "will pay for this act of aggression."); uint64_t seed = 0x1324fedc9876abdeULL; uint64_t result = CHashing::safeMurmurHash64(key.c_str(), static_cast(key.size()), seed); CPPUNIT_ASSERT_EQUAL(uint64_t(7291323361835448266ull), result); @@ -266,25 +238,21 @@ void CHashingTest::testMurmurHash() uint64_t defaultLookupTime = 0; uint64_t murmurInsertTime = 0; uint64_t murmurLookupTime = 0; - for (int run = 0; run < 6; ++run) - { + for (int run = 0; run < 6; ++run) { LOG_DEBUG("run = " << run); LOG_DEBUG("Starting throughput of boost::unordered_set with default hash"); { boost::unordered_set s; stopWatch.reset(true); - for (std::size_t i = 0u; i < testStrings.size(); ++i) - { + for (std::size_t i = 0u; i < testStrings.size(); ++i) { s.insert(testStrings[i]); } defaultInsertTime += stopWatch.stop(); size_t total(0); stopWatch.reset(true); - for (int i = 0; i < 5; ++i) - { - for (std::size_t j = 0u; j < testStrings.size(); ++j) - { + for (int i = 0; i < 5; ++i) { + for (std::size_t j = 0u; j < testStrings.size(); ++j) { total += s.count(testStrings[j]); } } @@ -297,17 +265,14 @@ void CHashingTest::testMurmurHash() { boost::unordered_set s; stopWatch.reset(true); - for (std::size_t i = 0u; i < testStrings.size(); ++i) - { + for (std::size_t i = 0u; i < testStrings.size(); ++i) { s.insert(testStrings[i]); } murmurInsertTime += stopWatch.stop(); size_t total(0); stopWatch.reset(true); - for (int i = 0; i < 5; ++i) - { - for (std::size_t j = 0u; j < testStrings.size(); ++j) - { + for (int i = 0; i < 5; ++i) { + for (std::size_t j = 0u; j < testStrings.size(); ++j) { total += s.count(testStrings[j]); } } @@ -317,18 +282,14 @@ void CHashingTest::testMurmurHash() LOG_DEBUG("Finished throughput of boost::unordered_set with murmur hash"); } - LOG_DEBUG("default insert runtime = " << defaultInsertTime - << "ms, murmur insert runtime = " << murmurInsertTime << "ms"); - LOG_DEBUG("default lookup runtime = " << defaultLookupTime - << "ms, murmur lookup runtime = " << murmurLookupTime << "ms"); + LOG_DEBUG("default insert runtime = " << defaultInsertTime << "ms, murmur insert runtime = " << murmurInsertTime << "ms"); + LOG_DEBUG("default lookup runtime = " << defaultLookupTime << "ms, murmur lookup runtime = " << murmurLookupTime << "ms"); // The benefits of the murmur hash are mainly at lookup time, so just assert // on that, but still log a warning for slower insert time - if (murmurInsertTime > defaultInsertTime) - { - LOG_WARN("murmur insert runtime (" << murmurInsertTime << - "ms) was longer than default insert runtime (" << - defaultInsertTime << "ms)"); + if (murmurInsertTime > defaultInsertTime) { + LOG_WARN("murmur insert runtime (" << murmurInsertTime << "ms) was longer than default insert runtime (" << defaultInsertTime + << "ms)"); } // Most of the times the murmur lookup time will be faster. But it is not @@ -339,14 +300,12 @@ void CHashingTest::testMurmurHash() // Check the number of collisions. TSizeSizeMap uniqueHashes; CHashing::CMurmurHash2String h; - for (std::size_t i = 0u; i < testStrings.size(); ++i) - { + for (std::size_t i = 0u; i < testStrings.size(); ++i) { ++uniqueHashes[h(testStrings[i]) % 3000017]; } std::size_t maxCollisions = 0u; - for (TSizeSizeMapCItr i = uniqueHashes.begin(); i != uniqueHashes.end(); ++i) - { + for (TSizeSizeMapCItr i = uniqueHashes.begin(); i != uniqueHashes.end(); ++i) { maxCollisions = std::max(maxCollisions, i->second); } @@ -359,8 +318,7 @@ void CHashingTest::testMurmurHash() CPPUNIT_ASSERT(maxCollisions < 7); } -void CHashingTest::testHashCombine() -{ +void CHashingTest::testHashCombine() { // Check we get about the same number of collisions using hashCombine // verses full hash of string. @@ -377,8 +335,7 @@ void CHashingTest::testHashCombine() TStrVec testStrings; TSizeSet uniqueHashes; TSizeSet uniqueHashCombines; - for (std::size_t i = 0u; i < 5u; ++i) - { + for (std::size_t i = 0u; i < 5u; ++i) { LOG_DEBUG("test " << i); // This will overwrite the previous contents of testStrings @@ -387,8 +344,7 @@ void CHashingTest::testHashCombine() uniqueHashes.clear(); uniqueHashCombines.clear(); - for (std::size_t j = 0u; j < numberStrings; j += 2) - { + for (std::size_t j = 0u; j < numberStrings; j += 2) { uniqueHashes.insert(hasher(testStrings[j] + testStrings[j + 1])); uniqueHashCombines.insert(core::CHashing::hashCombine(static_cast(hasher(testStrings[j])), static_cast(hasher(testStrings[j + 1])))); @@ -397,13 +353,11 @@ void CHashingTest::testHashCombine() LOG_DEBUG("# unique hashes = " << uniqueHashes.size()); LOG_DEBUG("# unique combined hashes = " << uniqueHashCombines.size()); - CPPUNIT_ASSERT(uniqueHashCombines.size() - > static_cast(0.999 * static_cast(uniqueHashes.size()))); + CPPUNIT_ASSERT(uniqueHashCombines.size() > static_cast(0.999 * static_cast(uniqueHashes.size()))); } } -void CHashingTest::testConstructors() -{ +void CHashingTest::testConstructors() { { CHashing::CUniversalHash::CUInt32Hash hash(1, 2, 3); CPPUNIT_ASSERT_EQUAL(uint32_t(1), hash.m()); @@ -427,8 +381,7 @@ void CHashingTest::testConstructors() a.push_back(20); a.push_back(30); CHashing::CUniversalHash::CUInt32VecHash hash(5, a, 6); - CPPUNIT_ASSERT_EQUAL(CContainerPrinter::print(a), - CContainerPrinter::print(hash.a())); + CPPUNIT_ASSERT_EQUAL(CContainerPrinter::print(a), CContainerPrinter::print(hash.a())); CPPUNIT_ASSERT_EQUAL(uint32_t(5), hash.m()); CPPUNIT_ASSERT_EQUAL(uint32_t(6), hash.b()); LOG_DEBUG(hash.print()); @@ -472,22 +425,12 @@ void CHashingTest::testConstructors() } } +CppUnit::Test* CHashingTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CHashingTest"); -CppUnit::Test *CHashingTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CHashingTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CHashingTest::testUniversalHash", - &CHashingTest::testUniversalHash) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CHashingTest::testMurmurHash", - &CHashingTest::testMurmurHash) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CHashingTest::testHashCombine", - &CHashingTest::testHashCombine) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CHashingTest::testConstructors", - &CHashingTest::testConstructors) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CHashingTest::testUniversalHash", &CHashingTest::testUniversalHash)); + suiteOfTests->addTest(new CppUnit::TestCaller("CHashingTest::testMurmurHash", &CHashingTest::testMurmurHash)); + suiteOfTests->addTest(new CppUnit::TestCaller("CHashingTest::testHashCombine", &CHashingTest::testHashCombine)); + suiteOfTests->addTest(new CppUnit::TestCaller("CHashingTest::testConstructors", &CHashingTest::testConstructors)); return suiteOfTests; } diff --git a/lib/core/unittest/CHashingTest.h b/lib/core/unittest/CHashingTest.h index ea093875eb..68870decec 100644 --- a/lib/core/unittest/CHashingTest.h +++ b/lib/core/unittest/CHashingTest.h @@ -9,15 +9,14 @@ #include -class CHashingTest : public CppUnit::TestFixture -{ - public: - void testUniversalHash(); - void testMurmurHash(); - void testHashCombine(); - void testConstructors(); +class CHashingTest : public CppUnit::TestFixture { +public: + void testUniversalHash(); + void testMurmurHash(); + void testHashCombine(); + void testConstructors(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CHashingTest_h diff --git a/lib/core/unittest/CHexUtilsTest.cc b/lib/core/unittest/CHexUtilsTest.cc index eed824a676..8f4dbd03c8 100644 --- a/lib/core/unittest/CHexUtilsTest.cc +++ b/lib/core/unittest/CHexUtilsTest.cc @@ -13,20 +13,15 @@ #include #include +CppUnit::Test* CHexUtilsTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CHexUtilsTest"); -CppUnit::Test *CHexUtilsTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CHexUtilsTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CHexUtilsTest::testHexOutput", - &CHexUtilsTest::testHexOutput) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CHexUtilsTest::testHexOutput", &CHexUtilsTest::testHexOutput)); return suiteOfTests; } -void CHexUtilsTest::testHexOutput() -{ +void CHexUtilsTest::testHexOutput() { // Seed the random number generator ::srand(static_cast(::clock())); @@ -72,8 +67,7 @@ void CHexUtilsTest::testHexOutput() ml::core::CHexUtils::TDataVec randomData; randomData.reserve(100); - for (size_t count = 0; count < 100; ++count) - { + for (size_t count = 0; count < 100; ++count) { randomData.push_back(static_cast(::rand())); } @@ -106,8 +100,7 @@ void CHexUtilsTest::testHexOutput() // selected CPPUNIT_ASSERT(strm2.str() != strm3.str()); CPPUNIT_ASSERT(strm5.str() != strm6.str()); - + LOG_DEBUG("Random test output is:\n" << strm1.str()); } } - diff --git a/lib/core/unittest/CHexUtilsTest.h b/lib/core/unittest/CHexUtilsTest.h index c73dd7698f..d14773866f 100644 --- a/lib/core/unittest/CHexUtilsTest.h +++ b/lib/core/unittest/CHexUtilsTest.h @@ -8,13 +8,11 @@ #include -class CHexUtilsTest : public CppUnit::TestFixture -{ - public: - void testHexOutput(); +class CHexUtilsTest : public CppUnit::TestFixture { +public: + void testHexOutput(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CHexUtilsTest_h - diff --git a/lib/core/unittest/CIEEE754Test.cc b/lib/core/unittest/CIEEE754Test.cc index 3b77617d2f..148395a0ad 100644 --- a/lib/core/unittest/CIEEE754Test.cc +++ b/lib/core/unittest/CIEEE754Test.cc @@ -6,8 +6,8 @@ #include "CIEEE754Test.h" -#include #include +#include #include #include @@ -16,8 +16,7 @@ using namespace ml; using namespace core; -void CIEEE754Test::testRound() -{ +void CIEEE754Test::testRound() { { // Check it matches float precision. double test1 = 0.049999998; @@ -80,15 +79,10 @@ void CIEEE754Test::testRound() } } -CppUnit::Test *CIEEE754Test::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CIEEE754Test"); +CppUnit::Test* CIEEE754Test::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CIEEE754Test"); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CIEEE754Test::testRound", - &CIEEE754Test::testRound) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CIEEE754Test::testRound", &CIEEE754Test::testRound)); return suiteOfTests; - } - diff --git a/lib/core/unittest/CIEEE754Test.h b/lib/core/unittest/CIEEE754Test.h index b86b0d8fa9..7fffe7406b 100644 --- a/lib/core/unittest/CIEEE754Test.h +++ b/lib/core/unittest/CIEEE754Test.h @@ -9,12 +9,11 @@ #include -class CIEEE754Test : public CppUnit::TestFixture -{ - public: - void testRound(); +class CIEEE754Test : public CppUnit::TestFixture { +public: + void testRound(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CIEEE754Test_h diff --git a/lib/core/unittest/CJsonLogLayoutTest.cc b/lib/core/unittest/CJsonLogLayoutTest.cc index 48ee2224f3..1e8a417efc 100644 --- a/lib/core/unittest/CJsonLogLayoutTest.cc +++ b/lib/core/unittest/CJsonLogLayoutTest.cc @@ -8,30 +8,23 @@ #include #include -CppUnit::Test *CJsonLogLayoutTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CJsonLogLayoutTest"); +CppUnit::Test* CJsonLogLayoutTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CJsonLogLayoutTest"); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CJsonLogLayoutTest::testPathCropping", - &CJsonLogLayoutTest::testPathCropping) ); + suiteOfTests->addTest( + new CppUnit::TestCaller("CJsonLogLayoutTest::testPathCropping", &CJsonLogLayoutTest::testPathCropping)); return suiteOfTests; } -void CJsonLogLayoutTest::testPathCropping() -{ - LOG_DEBUG("CJsonLogLayoutTest::testPathCropping"); +void CJsonLogLayoutTest::testPathCropping() { + LOG_DEBUG("CJsonLogLayoutTest::testPathCropping"); #ifdef Windows - CPPUNIT_ASSERT_EQUAL(std::string("source.h"), - log4cxx::helpers::CJsonLogLayout::cropPath("c:\\\\home\\hendrik\\src\\include/source.h")); - CPPUNIT_ASSERT_EQUAL(std::string("source.h"), - log4cxx::helpers::CJsonLogLayout::cropPath("c:\\\\home\\hendrik\\src\\include\\source.h")); + CPPUNIT_ASSERT_EQUAL(std::string("source.h"), log4cxx::helpers::CJsonLogLayout::cropPath("c:\\\\home\\hendrik\\src\\include/source.h")); + CPPUNIT_ASSERT_EQUAL(std::string("source.h"), + log4cxx::helpers::CJsonLogLayout::cropPath("c:\\\\home\\hendrik\\src\\include\\source.h")); #else - CPPUNIT_ASSERT_EQUAL(std::string("source.h"), - log4cxx::helpers::CJsonLogLayout::cropPath("/home/hendrik/src/include/source.h")); - CPPUNIT_ASSERT_EQUAL(std::string("source.h"), - log4cxx::helpers::CJsonLogLayout::cropPath("/home/hendrik/work/../src/include/source.h")); + CPPUNIT_ASSERT_EQUAL(std::string("source.h"), log4cxx::helpers::CJsonLogLayout::cropPath("/home/hendrik/src/include/source.h")); + CPPUNIT_ASSERT_EQUAL(std::string("source.h"), log4cxx::helpers::CJsonLogLayout::cropPath("/home/hendrik/work/../src/include/source.h")); #endif } - diff --git a/lib/core/unittest/CJsonLogLayoutTest.h b/lib/core/unittest/CJsonLogLayoutTest.h index c12672ff1e..a2ba16253f 100644 --- a/lib/core/unittest/CJsonLogLayoutTest.h +++ b/lib/core/unittest/CJsonLogLayoutTest.h @@ -8,14 +8,11 @@ #include +class CJsonLogLayoutTest : public CppUnit::TestFixture { +public: + void testPathCropping(); -class CJsonLogLayoutTest : public CppUnit::TestFixture -{ - public: - void testPathCropping(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CJsonLogLayoutTest_h - diff --git a/lib/core/unittest/CJsonOutputStreamWrapperTest.cc b/lib/core/unittest/CJsonOutputStreamWrapperTest.cc index 06918756c8..1cd0c726f2 100644 --- a/lib/core/unittest/CJsonOutputStreamWrapperTest.cc +++ b/lib/core/unittest/CJsonOutputStreamWrapperTest.cc @@ -19,28 +19,22 @@ #include #include -CppUnit::Test *CJsonOutputStreamWrapperTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CJsonOutputStreamWrapperTest"); +CppUnit::Test* CJsonOutputStreamWrapperTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CJsonOutputStreamWrapperTest"); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CJsonOutputStreamWrapperTest::testConcurrentWrites", - &CJsonOutputStreamWrapperTest::testConcurrentWrites) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CJsonOutputStreamWrapperTest::testShrink", - &CJsonOutputStreamWrapperTest::testShrink) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CJsonOutputStreamWrapperTest::testConcurrentWrites", + &CJsonOutputStreamWrapperTest::testConcurrentWrites)); + suiteOfTests->addTest(new CppUnit::TestCaller("CJsonOutputStreamWrapperTest::testShrink", + &CJsonOutputStreamWrapperTest::testShrink)); return suiteOfTests; } -namespace -{ +namespace { -void task(ml::core::CJsonOutputStreamWrapper &wrapper, int id, int documents) -{ +void task(ml::core::CJsonOutputStreamWrapper& wrapper, int id, int documents) { ml::core::CRapidJsonConcurrentLineWriter writer(wrapper); - for (int i = 0; i < documents; ++i) - { + for (int i = 0; i < documents; ++i) { writer.StartObject(); writer.Key("id"); writer.Int(id); @@ -53,11 +47,9 @@ void task(ml::core::CJsonOutputStreamWrapper &wrapper, int id, int documents) writer.EndObject(); } } - } -void CJsonOutputStreamWrapperTest::testConcurrentWrites() -{ +void CJsonOutputStreamWrapperTest::testConcurrentWrites() { std::ostringstream stringStream; static const size_t WRITERS(1500); @@ -66,8 +58,7 @@ void CJsonOutputStreamWrapperTest::testConcurrentWrites() ml::core::CJsonOutputStreamWrapper wrapper(stringStream); boost::threadpool::pool tp(100); - for (size_t i = 0; i < WRITERS; ++i) - { + for (size_t i = 0; i < WRITERS; ++i) { tp.schedule(boost::bind(task, boost::ref(wrapper), i, DOCUMENTS_PER_WRITER)); } tp.wait(); @@ -78,20 +69,19 @@ void CJsonOutputStreamWrapperTest::testConcurrentWrites() // check that the document isn't malformed (like wrongly interleaved buffers) CPPUNIT_ASSERT(!doc.HasParseError()); - const rapidjson::Value &allRecords = doc.GetArray(); + const rapidjson::Value& allRecords = doc.GetArray(); // check number of documents CPPUNIT_ASSERT_EQUAL(rapidjson::SizeType(WRITERS * DOCUMENTS_PER_WRITER), allRecords.Size()); } -void CJsonOutputStreamWrapperTest::testShrink() -{ +void CJsonOutputStreamWrapperTest::testShrink() { std::ostringstream stringStream; ml::core::CJsonOutputStreamWrapper wrapper(stringStream); size_t memoryUsageBase = wrapper.memoryUsage(); ml::core::CJsonOutputStreamWrapper::TGenericLineWriter writer; - rapidjson::StringBuffer *stringBuffer; + rapidjson::StringBuffer* stringBuffer; wrapper.acquireBuffer(writer, stringBuffer); @@ -102,8 +92,7 @@ void CJsonOutputStreamWrapperTest::testShrink() CPPUNIT_ASSERT(memoryUsageBase > stringBufferSizeBase); // fill the buffer, expand it - for (size_t i=0; i < 100000; ++i) - { + for (size_t i = 0; i < 100000; ++i) { stringBuffer->Put('{'); stringBuffer->Put('}'); stringBuffer->Put(','); @@ -114,7 +103,7 @@ void CJsonOutputStreamWrapperTest::testShrink() CPPUNIT_ASSERT(wrapper.memoryUsage() > stringBuffer->stack_.GetCapacity()); // save the original pointer as flushBuffer returns a new buffer - rapidjson::StringBuffer *stringBufferOriginal = stringBuffer; + rapidjson::StringBuffer* stringBufferOriginal = stringBuffer; wrapper.flushBuffer(writer, stringBuffer); wrapper.syncFlush(); @@ -124,4 +113,3 @@ void CJsonOutputStreamWrapperTest::testShrink() CPPUNIT_ASSERT_EQUAL(memoryUsageBase, wrapper.memoryUsage()); } - diff --git a/lib/core/unittest/CJsonOutputStreamWrapperTest.h b/lib/core/unittest/CJsonOutputStreamWrapperTest.h index 5413fdba65..debb37636c 100644 --- a/lib/core/unittest/CJsonOutputStreamWrapperTest.h +++ b/lib/core/unittest/CJsonOutputStreamWrapperTest.h @@ -8,14 +8,12 @@ #include -class CJsonOutputStreamWrapperTest : public CppUnit::TestFixture -{ - public: - void testConcurrentWrites(); - void testShrink(); +class CJsonOutputStreamWrapperTest : public CppUnit::TestFixture { +public: + void testConcurrentWrites(); + void testShrink(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CJsonOutputStreamWrapperTest_h - diff --git a/lib/core/unittest/CJsonStatePersistInserterTest.cc b/lib/core/unittest/CJsonStatePersistInserterTest.cc index 3080c3fb8c..5c53c7e4b6 100644 --- a/lib/core/unittest/CJsonStatePersistInserterTest.cc +++ b/lib/core/unittest/CJsonStatePersistInserterTest.cc @@ -5,37 +5,30 @@ */ #include "CJsonStatePersistInserterTest.h" -#include #include +#include #include #include +CppUnit::Test* CJsonStatePersistInserterTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CJsonStatePersistInserterTest"); -CppUnit::Test *CJsonStatePersistInserterTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CJsonStatePersistInserterTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CJsonStatePersistInserterTest::testPersist", - &CJsonStatePersistInserterTest::testPersist) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CJsonStatePersistInserterTest::testPersist", + &CJsonStatePersistInserterTest::testPersist)); return suiteOfTests; } -namespace -{ +namespace { -void insert2ndLevel(ml::core::CStatePersistInserter &inserter) -{ +void insert2ndLevel(ml::core::CStatePersistInserter& inserter) { inserter.insertValue("level2A", 3.14, ml::core::CIEEE754::E_SinglePrecision); inserter.insertValue("level2B", 'z'); } - } -void CJsonStatePersistInserterTest::testPersist() -{ +void CJsonStatePersistInserterTest::testPersist() { std::ostringstream strm; { @@ -51,7 +44,5 @@ void CJsonStatePersistInserterTest::testPersist() LOG_DEBUG("JSON is: " << json); - CPPUNIT_ASSERT_EQUAL(std::string("{\"level1A\":\"a\",\"level1B\":\"25\",\"level1C\":{\"level2A\":\"3.14\",\"level2B\":\"z\"}}"), - json); + CPPUNIT_ASSERT_EQUAL(std::string("{\"level1A\":\"a\",\"level1B\":\"25\",\"level1C\":{\"level2A\":\"3.14\",\"level2B\":\"z\"}}"), json); } - diff --git a/lib/core/unittest/CJsonStatePersistInserterTest.h b/lib/core/unittest/CJsonStatePersistInserterTest.h index 294d0174dc..a704448eeb 100644 --- a/lib/core/unittest/CJsonStatePersistInserterTest.h +++ b/lib/core/unittest/CJsonStatePersistInserterTest.h @@ -8,14 +8,11 @@ #include +class CJsonStatePersistInserterTest : public CppUnit::TestFixture { +public: + void testPersist(); -class CJsonStatePersistInserterTest : public CppUnit::TestFixture -{ - public: - void testPersist(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CJsonStatePersistInserterTest_h - diff --git a/lib/core/unittest/CJsonStateRestoreTraverserTest.cc b/lib/core/unittest/CJsonStateRestoreTraverserTest.cc index 8446f05bba..515b8d85d1 100644 --- a/lib/core/unittest/CJsonStateRestoreTraverserTest.cc +++ b/lib/core/unittest/CJsonStateRestoreTraverserTest.cc @@ -9,41 +9,30 @@ #include - -CppUnit::Test *CJsonStateRestoreTraverserTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CJsonStateRestoreTraverserTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CJsonStateRestoreTraverserTest::testRestore1", - &CJsonStateRestoreTraverserTest::testRestore1) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CJsonStateRestoreTraverserTest::testRestore2", - &CJsonStateRestoreTraverserTest::testRestore2) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CJsonStateRestoreTraverserTest::testRestore3", - &CJsonStateRestoreTraverserTest::testRestore3) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CJsonStateRestoreTraverserTest::testRestore4", - &CJsonStateRestoreTraverserTest::testRestore4) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CJsonStateRestoreTraverserTest::testParsingBooleanFields", - &CJsonStateRestoreTraverserTest::testParsingBooleanFields) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CJsonStateRestoreTraverserTest::testRestore1IgnoreArrays", - &CJsonStateRestoreTraverserTest::testRestore1IgnoreArrays) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CJsonStateRestoreTraverserTest::testRestore1IgnoreArraysNested", - &CJsonStateRestoreTraverserTest::testRestore1IgnoreArraysNested) ); +CppUnit::Test* CJsonStateRestoreTraverserTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CJsonStateRestoreTraverserTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CJsonStateRestoreTraverserTest::testRestore1", + &CJsonStateRestoreTraverserTest::testRestore1)); + suiteOfTests->addTest(new CppUnit::TestCaller("CJsonStateRestoreTraverserTest::testRestore2", + &CJsonStateRestoreTraverserTest::testRestore2)); + suiteOfTests->addTest(new CppUnit::TestCaller("CJsonStateRestoreTraverserTest::testRestore3", + &CJsonStateRestoreTraverserTest::testRestore3)); + suiteOfTests->addTest(new CppUnit::TestCaller("CJsonStateRestoreTraverserTest::testRestore4", + &CJsonStateRestoreTraverserTest::testRestore4)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CJsonStateRestoreTraverserTest::testParsingBooleanFields", &CJsonStateRestoreTraverserTest::testParsingBooleanFields)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CJsonStateRestoreTraverserTest::testRestore1IgnoreArrays", &CJsonStateRestoreTraverserTest::testRestore1IgnoreArrays)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CJsonStateRestoreTraverserTest::testRestore1IgnoreArraysNested", &CJsonStateRestoreTraverserTest::testRestore1IgnoreArraysNested)); return suiteOfTests; } -namespace -{ +namespace { -bool traverse2ndLevel(ml::core::CStateRestoreTraverser &traverser) -{ +bool traverse2ndLevel(ml::core::CStateRestoreTraverser& traverser) { CPPUNIT_ASSERT_EQUAL(std::string("level2A"), traverser.name()); CPPUNIT_ASSERT_EQUAL(std::string("3.14"), traverser.value()); CPPUNIT_ASSERT(!traverser.hasSubLevel()); @@ -56,8 +45,7 @@ bool traverse2ndLevel(ml::core::CStateRestoreTraverser &traverser) return true; } -bool traverse1stLevel1(ml::core::CStateRestoreTraverser &traverser) -{ +bool traverse1stLevel1(ml::core::CStateRestoreTraverser& traverser) { CPPUNIT_ASSERT_EQUAL(std::string("level1A"), traverser.name()); CPPUNIT_ASSERT_EQUAL(std::string("a"), traverser.value()); CPPUNIT_ASSERT(!traverser.hasSubLevel()); @@ -74,8 +62,7 @@ bool traverse1stLevel1(ml::core::CStateRestoreTraverser &traverser) return true; } -bool traverse1stLevel2(ml::core::CStateRestoreTraverser &traverser) -{ +bool traverse1stLevel2(ml::core::CStateRestoreTraverser& traverser) { CPPUNIT_ASSERT_EQUAL(std::string("level1A"), traverser.name()); CPPUNIT_ASSERT_EQUAL(std::string("a"), traverser.value()); CPPUNIT_ASSERT(!traverser.hasSubLevel()); @@ -96,8 +83,7 @@ bool traverse1stLevel2(ml::core::CStateRestoreTraverser &traverser) return true; } -bool traverse2ndLevelEmpty(ml::core::CStateRestoreTraverser &traverser) -{ +bool traverse2ndLevelEmpty(ml::core::CStateRestoreTraverser& traverser) { CPPUNIT_ASSERT(traverser.name().empty()); CPPUNIT_ASSERT(traverser.value().empty()); CPPUNIT_ASSERT(!traverser.hasSubLevel()); @@ -106,8 +92,7 @@ bool traverse2ndLevelEmpty(ml::core::CStateRestoreTraverser &traverser) return true; } -bool traverse1stLevel3(ml::core::CStateRestoreTraverser &traverser) -{ +bool traverse1stLevel3(ml::core::CStateRestoreTraverser& traverser) { CPPUNIT_ASSERT_EQUAL(std::string("level1A"), traverser.name()); CPPUNIT_ASSERT_EQUAL(std::string("a"), traverser.value()); CPPUNIT_ASSERT(!traverser.hasSubLevel()); @@ -128,8 +113,7 @@ bool traverse1stLevel3(ml::core::CStateRestoreTraverser &traverser) return true; } -bool traverse1stLevel4(ml::core::CStateRestoreTraverser &traverser) -{ +bool traverse1stLevel4(ml::core::CStateRestoreTraverser& traverser) { CPPUNIT_ASSERT_EQUAL(std::string("level1A"), traverser.name()); CPPUNIT_ASSERT_EQUAL(std::string("a"), traverser.value()); CPPUNIT_ASSERT(!traverser.hasSubLevel()); @@ -145,11 +129,9 @@ bool traverse1stLevel4(ml::core::CStateRestoreTraverser &traverser) return true; } - } -void CJsonStateRestoreTraverserTest::testRestore1() -{ +void CJsonStateRestoreTraverserTest::testRestore1() { std::string json("{\"_source\":{\"level1A\":\"a\",\"level1B\":\"25\",\"level1C\":{\"level2A\":\"3.14\",\"level2B\":\"z\"}}}"); std::istringstream strm(json); @@ -161,9 +143,9 @@ void CJsonStateRestoreTraverserTest::testRestore1() CPPUNIT_ASSERT(!traverser.next()); } -void CJsonStateRestoreTraverserTest::testRestore2() -{ - std::string json("{\"_source\":{\"level1A\":\"a\",\"level1B\":\"25\",\"level1C\":{\"level2A\":\"3.14\",\"level2B\":\"z\"},\"level1D\":\"afterAscending\"}}"); +void CJsonStateRestoreTraverserTest::testRestore2() { + std::string json("{\"_source\":{\"level1A\":\"a\",\"level1B\":\"25\",\"level1C\":{\"level2A\":\"3.14\",\"level2B\":\"z\"},\"level1D\":" + "\"afterAscending\"}}"); std::istringstream strm(json); ml::core::CJsonStateRestoreTraverser traverser(strm); @@ -174,8 +156,7 @@ void CJsonStateRestoreTraverserTest::testRestore2() CPPUNIT_ASSERT(!traverser.next()); } -void CJsonStateRestoreTraverserTest::testRestore3() -{ +void CJsonStateRestoreTraverserTest::testRestore3() { std::string json("{\"_source\":{\"level1A\":\"a\",\"level1B\":\"25\",\"level1C\":{},\"level1D\":\"afterAscending\"}}"); std::istringstream strm(json); @@ -187,8 +168,7 @@ void CJsonStateRestoreTraverserTest::testRestore3() CPPUNIT_ASSERT(!traverser.next()); } -void CJsonStateRestoreTraverserTest::testRestore4() -{ +void CJsonStateRestoreTraverserTest::testRestore4() { std::string json("{\"_source\":{\"level1A\":\"a\",\"level1B\":\"25\",\"level1C\":{\"level2A\":\"3.14\",\"level2B\":\"z\"}}}"); std::istringstream strm(json); @@ -200,12 +180,10 @@ void CJsonStateRestoreTraverserTest::testRestore4() CPPUNIT_ASSERT(!traverser.next()); } -void CJsonStateRestoreTraverserTest::testParsingBooleanFields() -{ +void CJsonStateRestoreTraverserTest::testParsingBooleanFields() { // Even though the parser doesn't handle boolean fields it should not hiccup over them std::string json = std::string("{\"_index\" : \"categorization-test\", \"_type\" : \"categorizerState\",") + - std::string("\"_id\" : \"1\", \"_version\" : 2, \"found\" : true, ") + - std::string("\"_source\":{\"a\" :\"1\"}"); + std::string("\"_id\" : \"1\", \"_version\" : 2, \"found\" : true, ") + std::string("\"_source\":{\"a\" :\"1\"}"); std::istringstream strm(json); @@ -230,9 +208,9 @@ void CJsonStateRestoreTraverserTest::testParsingBooleanFields() CPPUNIT_ASSERT(traverser.hasSubLevel()); } -void CJsonStateRestoreTraverserTest::testRestore1IgnoreArrays() -{ - std::string json("{\"_source\":{\"level1A\":\"a\",\"someArray\":[42],\"level1B\":\"25\",\"level1C\":{\"level2A\":\"3.14\",\"level2B\":\"z\"}}}"); +void CJsonStateRestoreTraverserTest::testRestore1IgnoreArrays() { + std::string json( + "{\"_source\":{\"level1A\":\"a\",\"someArray\":[42],\"level1B\":\"25\",\"level1C\":{\"level2A\":\"3.14\",\"level2B\":\"z\"}}}"); std::istringstream strm(json); ml::core::CJsonStateRestoreTraverser traverser(strm); @@ -243,9 +221,9 @@ void CJsonStateRestoreTraverserTest::testRestore1IgnoreArrays() CPPUNIT_ASSERT(!traverser.next()); } -void CJsonStateRestoreTraverserTest::testRestore1IgnoreArraysNested() -{ - std::string json("{\"_source\":{\"level1A\":\"a\",\"someArray\":[{\"nestedArray\":[42]}],\"level1B\":\"25\",\"level1C\":{\"level2A\":\"3.14\",\"level2B\":\"z\"}}}"); +void CJsonStateRestoreTraverserTest::testRestore1IgnoreArraysNested() { + std::string json("{\"_source\":{\"level1A\":\"a\",\"someArray\":[{\"nestedArray\":[42]}],\"level1B\":\"25\",\"level1C\":{\"level2A\":" + "\"3.14\",\"level2B\":\"z\"}}}"); std::istringstream strm(json); ml::core::CJsonStateRestoreTraverser traverser(strm); diff --git a/lib/core/unittest/CJsonStateRestoreTraverserTest.h b/lib/core/unittest/CJsonStateRestoreTraverserTest.h index 589f9c59c9..ad6283f363 100644 --- a/lib/core/unittest/CJsonStateRestoreTraverserTest.h +++ b/lib/core/unittest/CJsonStateRestoreTraverserTest.h @@ -8,20 +8,17 @@ #include +class CJsonStateRestoreTraverserTest : public CppUnit::TestFixture { +public: + void testRestore1(); + void testRestore2(); + void testRestore3(); + void testRestore4(); + void testParsingBooleanFields(); + void testRestore1IgnoreArrays(); + void testRestore1IgnoreArraysNested(); -class CJsonStateRestoreTraverserTest : public CppUnit::TestFixture -{ - public: - void testRestore1(); - void testRestore2(); - void testRestore3(); - void testRestore4(); - void testParsingBooleanFields(); - void testRestore1IgnoreArrays(); - void testRestore1IgnoreArraysNested(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CJsonStateRestoreTraverserTest_h - diff --git a/lib/core/unittest/CLoggerTest.cc b/lib/core/unittest/CLoggerTest.cc index d8d47e8fab..efe807696f 100644 --- a/lib/core/unittest/CLoggerTest.cc +++ b/lib/core/unittest/CLoggerTest.cc @@ -22,37 +22,26 @@ namespace { #ifdef Windows -const char *TEST_PIPE_NAME = "\\\\.\\pipe\\testpipe"; +const char* TEST_PIPE_NAME = "\\\\.\\pipe\\testpipe"; #else -const char *TEST_PIPE_NAME = "testfiles/testpipe"; +const char* TEST_PIPE_NAME = "testfiles/testpipe"; #endif } -CppUnit::Test *CLoggerTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CLoggerTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLoggerTest::testLogging", - &CLoggerTest::testLogging) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLoggerTest::testReconfiguration", - &CLoggerTest::testReconfiguration) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLoggerTest::testSetLevel", - &CLoggerTest::testSetLevel) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLoggerTest::testLogEnvironment", - &CLoggerTest::testLogEnvironment) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLoggerTest::testNonAsciiJsonLogging", - &CLoggerTest::testNonAsciiJsonLogging) ); +CppUnit::Test* CLoggerTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CLoggerTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CLoggerTest::testLogging", &CLoggerTest::testLogging)); + suiteOfTests->addTest(new CppUnit::TestCaller("CLoggerTest::testReconfiguration", &CLoggerTest::testReconfiguration)); + suiteOfTests->addTest(new CppUnit::TestCaller("CLoggerTest::testSetLevel", &CLoggerTest::testSetLevel)); + suiteOfTests->addTest(new CppUnit::TestCaller("CLoggerTest::testLogEnvironment", &CLoggerTest::testLogEnvironment)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CLoggerTest::testNonAsciiJsonLogging", &CLoggerTest::testNonAsciiJsonLogging)); return suiteOfTests; } -void CLoggerTest::testLogging() -{ +void CLoggerTest::testLogging() { std::string t("Test message"); LOG_TRACE("Trace"); @@ -62,26 +51,22 @@ void CLoggerTest::testLogging() LOG_INFO("Info " << std::boolalpha << true); LOG_AT_LEVEL("INFO", "Dynamic INFO " << false); LOG_WARN("Warn " << t); - LOG_AT_LEVEL("WARN", "Dynamic WARN " << "abc"); + LOG_AT_LEVEL("WARN", + "Dynamic WARN " + << "abc"); LOG_ERROR("Error " << 1000 << ' ' << 0.23124F); LOG_AT_LEVEL("ERROR", "Dynamic ERROR"); LOG_FATAL("Fatal - application to handle exit"); LOG_AT_LEVEL("FATAL", "Dynamic FATAL " << t); - try - { + try { LOG_ABORT("Throwing exception " << 1221U << ' ' << 0.23124); CPPUNIT_ASSERT(false); - } - catch (std::runtime_error &) - { - CPPUNIT_ASSERT(true); - } + } catch (std::runtime_error&) { CPPUNIT_ASSERT(true); } } -void CLoggerTest::testReconfiguration() -{ - ml::core::CLogger &logger = ml::core::CLogger::instance(); +void CLoggerTest::testReconfiguration() { + ml::core::CLogger& logger = ml::core::CLogger::instance(); LOG_DEBUG("Starting logger reconfiguration test"); @@ -101,9 +86,8 @@ void CLoggerTest::testReconfiguration() CPPUNIT_ASSERT(logger.hasBeenReconfigured()); } -void CLoggerTest::testSetLevel() -{ - ml::core::CLogger &logger = ml::core::CLogger::instance(); +void CLoggerTest::testSetLevel() { + ml::core::CLogger& logger = ml::core::CLogger::instance(); LOG_DEBUG("Starting logger level test"); @@ -157,28 +141,23 @@ void CLoggerTest::testSetLevel() LOG_DEBUG("Finished logger level test"); } -void CLoggerTest::testNonAsciiJsonLogging() -{ - std::vector messages {"Non-iso8859-15: 编码", "Non-ascii: üaöä", "Non-iso8859-15: 编码 test", "surrogate pair: 𐐷 test"}; +void CLoggerTest::testNonAsciiJsonLogging() { + std::vector messages{"Non-iso8859-15: 编码", "Non-ascii: üaöä", "Non-iso8859-15: 编码 test", "surrogate pair: 𐐷 test"}; std::ostringstream loggedData; - std::thread reader([&loggedData] - { + std::thread reader([&loggedData] { // wait a bit so that pipe has been created ml::core::CSleep::sleep(200); std::ifstream strm(TEST_PIPE_NAME); - std::copy(std::istreambuf_iterator(strm), - std::istreambuf_iterator(), - std::ostreambuf_iterator(loggedData)); + std::copy(std::istreambuf_iterator(strm), std::istreambuf_iterator(), std::ostreambuf_iterator(loggedData)); }); - ml::core::CLogger &logger = ml::core::CLogger::instance(); + ml::core::CLogger& logger = ml::core::CLogger::instance(); // logger might got reconfigured in previous tests, so reset and reconfigure it logger.reset(); logger.reconfigure(TEST_PIPE_NAME, ""); - for (const auto &m : messages) - { + for (const auto& m : messages) { LOG_INFO(m); } @@ -186,39 +165,32 @@ void CLoggerTest::testNonAsciiJsonLogging() logger.reset(); reader.join(); - std::istringstream inputStream (loggedData.str()); + std::istringstream inputStream(loggedData.str()); std::string line; size_t foundMessages = 0; // test that we found the messages we put in, - while (std::getline(inputStream, line)) - { - if (line.empty()) - { + while (std::getline(inputStream, line)) { + if (line.empty()) { continue; } rapidjson::Document doc; doc.Parse(line); CPPUNIT_ASSERT(!doc.HasParseError()); CPPUNIT_ASSERT(doc.HasMember("message")); - const rapidjson::Value &messageValue = doc["message"]; + const rapidjson::Value& messageValue = doc["message"]; std::string messageString(messageValue.GetString(), messageValue.GetStringLength()); // we expect messages to be in order, so we only need to test the current one - if (messageString.find(messages[foundMessages]) != std::string::npos) - { + if (messageString.find(messages[foundMessages]) != std::string::npos) { ++foundMessages; - } - else if (foundMessages > 0) - { + } else if (foundMessages > 0) { CPPUNIT_FAIL(messageString + " did not contain " + messages[foundMessages]); } } CPPUNIT_ASSERT_EQUAL(messages.size(), foundMessages); } -void CLoggerTest::testLogEnvironment() -{ +void CLoggerTest::testLogEnvironment() { ml::core::CLogger::instance().logEnvironment(); } - diff --git a/lib/core/unittest/CLoggerTest.h b/lib/core/unittest/CLoggerTest.h index 005fc79cac..3ea3d46b8e 100644 --- a/lib/core/unittest/CLoggerTest.h +++ b/lib/core/unittest/CLoggerTest.h @@ -8,18 +8,15 @@ #include +class CLoggerTest : public CppUnit::TestFixture { +public: + void testLogging(); + void testReconfiguration(); + void testSetLevel(); + void testLogEnvironment(); + void testNonAsciiJsonLogging(); -class CLoggerTest : public CppUnit::TestFixture -{ - public: - void testLogging(); - void testReconfiguration(); - void testSetLevel(); - void testLogEnvironment(); - void testNonAsciiJsonLogging(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CLoggerTest_h - diff --git a/lib/core/unittest/CMapPopulationTest.cc b/lib/core/unittest/CMapPopulationTest.cc index 73eeba6c85..96b0963d56 100644 --- a/lib/core/unittest/CMapPopulationTest.cc +++ b/lib/core/unittest/CMapPopulationTest.cc @@ -17,29 +17,22 @@ #include - const size_t CMapPopulationTest::FILL_SIZE(20); const size_t CMapPopulationTest::TEST_SIZE(200000); - -CMapPopulationTest::CMapPopulationTest() - : m_TestData(0) -{ +CMapPopulationTest::CMapPopulationTest() : m_TestData(0) { } -CppUnit::Test *CMapPopulationTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CMapPopulationTest"); +CppUnit::Test* CMapPopulationTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CMapPopulationTest"); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMapPopulationTest::testMapInsertSpeed", - &CMapPopulationTest::testMapInsertSpeed) ); + suiteOfTests->addTest( + new CppUnit::TestCaller("CMapPopulationTest::testMapInsertSpeed", &CMapPopulationTest::testMapInsertSpeed)); return suiteOfTests; } -void CMapPopulationTest::setUp() -{ +void CMapPopulationTest::setUp() { // This class gets constructed once for every test, but by making the test // data static every test will use the same test data, which is important // for a fair comparison @@ -51,59 +44,45 @@ CMapPopulationTest::CTestData::CTestData(size_t fillSize) // It's essential these vectors don't resize as the char pointers in the // last two vectors point into the contents of the strings in the first two, // so set the correct size when they're constructed - : m_StringKeys(fillSize), - m_StringVals(fillSize), - m_CharPtrKeys(fillSize), - m_CharPtrVals(fillSize) -{ + : m_StringKeys(fillSize), m_StringVals(fillSize), m_CharPtrKeys(fillSize), m_CharPtrVals(fillSize) { // Set up test data such that each test uses identical data - for (size_t index = 0; index < fillSize; ++index) - { + for (size_t index = 0; index < fillSize; ++index) { // Keys are 4 to 12 letters followed by a unique number - for (int count = 4 + (::rand() % 9); count > 0; --count) - { + for (int count = 4 + (::rand() % 9); count > 0; --count) { m_StringKeys[index] += char('a' + ::rand() % 26); } m_StringKeys[index] += ml::core::CStringUtils::typeToString(index); m_CharPtrKeys[index] = m_StringKeys[index].c_str(); // Values are 16 to 32 printable ASCII characters in length - for (int count = 16 + (::rand() % 17); count > 0; --count) - { + for (int count = 16 + (::rand() % 17); count > 0; --count) { m_StringVals[index] += char(' ' + ::rand() % 95); } m_CharPtrVals[index] = m_StringVals[index].c_str(); } - for (size_t index = 0; index < fillSize; ++index) - { - LOG_DEBUG("Test entry " << index << ": " << - m_CharPtrKeys[index] << " -> " << m_CharPtrVals[index]); + for (size_t index = 0; index < fillSize; ++index) { + LOG_DEBUG("Test entry " << index << ": " << m_CharPtrKeys[index] << " -> " << m_CharPtrVals[index]); } } -const CMapPopulationTest::CTestData::TStrVec &CMapPopulationTest::CTestData::stringKeys() const -{ +const CMapPopulationTest::CTestData::TStrVec& CMapPopulationTest::CTestData::stringKeys() const { return m_StringKeys; } -const CMapPopulationTest::CTestData::TStrVec &CMapPopulationTest::CTestData::stringVals() const -{ +const CMapPopulationTest::CTestData::TStrVec& CMapPopulationTest::CTestData::stringVals() const { return m_StringVals; } -const CMapPopulationTest::CTestData::TCharPVec &CMapPopulationTest::CTestData::charPtrKeys() const -{ +const CMapPopulationTest::CTestData::TCharPVec& CMapPopulationTest::CTestData::charPtrKeys() const { return m_CharPtrKeys; } -const CMapPopulationTest::CTestData::TCharPVec &CMapPopulationTest::CTestData::charPtrVals() const -{ +const CMapPopulationTest::CTestData::TCharPVec& CMapPopulationTest::CTestData::charPtrVals() const { return m_CharPtrVals; } -void CMapPopulationTest::testMapInsertSpeed() -{ +void CMapPopulationTest::testMapInsertSpeed() { // Schedule all the other tests to be run in a thread pool - the number of // threads is chosen to be less than the number of cores so that the results // aren't skewed too much if other processes are running on the machine @@ -124,172 +103,133 @@ void CMapPopulationTest::testMapInsertSpeed() tp.wait(); } -void CMapPopulationTest::testMapInsertStr() -{ +void CMapPopulationTest::testMapInsertStr() { TStrStrMapVec testVec(TEST_SIZE); ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO("Starting map insert string test at " << - ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO("Starting map insert string test at " << ml::core::CTimeUtils::toTimeString(start)); this->addInsert(m_TestData->stringKeys(), m_TestData->stringVals(), testVec); ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO("Finished map insert string test at " << - ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO("Finished map insert string test at " << ml::core::CTimeUtils::toTimeString(end)); - LOG_INFO("Map insert string test with fill size " << FILL_SIZE << - " and test size " << TEST_SIZE << " took " << (end - start) << - " seconds"); + LOG_INFO("Map insert string test with fill size " << FILL_SIZE << " and test size " << TEST_SIZE << " took " << (end - start) + << " seconds"); } -void CMapPopulationTest::testMapInsertCharP() -{ +void CMapPopulationTest::testMapInsertCharP() { TStrStrMapVec testVec(TEST_SIZE); ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO("Starting map insert char pointer test at " << - ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO("Starting map insert char pointer test at " << ml::core::CTimeUtils::toTimeString(start)); this->addInsert(m_TestData->charPtrKeys(), m_TestData->charPtrVals(), testVec); ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO("Finished map insert char pointer test at " << - ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO("Finished map insert char pointer test at " << ml::core::CTimeUtils::toTimeString(end)); - LOG_INFO("Map insert char pointer test with fill size " << FILL_SIZE << - " and test size " << TEST_SIZE << " took " << (end - start) << - " seconds"); + LOG_INFO("Map insert char pointer test with fill size " << FILL_SIZE << " and test size " << TEST_SIZE << " took " << (end - start) + << " seconds"); } -void CMapPopulationTest::testMapOpSqBracStr() -{ +void CMapPopulationTest::testMapOpSqBracStr() { TStrStrMapVec testVec(TEST_SIZE); ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO("Starting map operator[] string test at " << - ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO("Starting map operator[] string test at " << ml::core::CTimeUtils::toTimeString(start)); this->addOpSqBrac(m_TestData->stringKeys(), m_TestData->stringVals(), testVec); ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO("Finished map operator[] string test at " << - ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO("Finished map operator[] string test at " << ml::core::CTimeUtils::toTimeString(end)); - LOG_INFO("Map operator[] string test with fill size " << FILL_SIZE << - " and test size " << TEST_SIZE << " took " << (end - start) << - " seconds"); + LOG_INFO("Map operator[] string test with fill size " << FILL_SIZE << " and test size " << TEST_SIZE << " took " << (end - start) + << " seconds"); } -void CMapPopulationTest::testMapOpSqBracCharP() -{ +void CMapPopulationTest::testMapOpSqBracCharP() { TStrStrMapVec testVec(TEST_SIZE); ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO("Starting map operator[] char pointer test at " << - ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO("Starting map operator[] char pointer test at " << ml::core::CTimeUtils::toTimeString(start)); this->addOpSqBrac(m_TestData->charPtrKeys(), m_TestData->charPtrVals(), testVec); ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO("Finished map operator[] char pointer test at " << - ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO("Finished map operator[] char pointer test at " << ml::core::CTimeUtils::toTimeString(end)); - LOG_INFO("Map operator[] char pointer test with fill size " << FILL_SIZE << - " and test size " << TEST_SIZE << " took " << (end - start) << - " seconds"); + LOG_INFO("Map operator[] char pointer test with fill size " << FILL_SIZE << " and test size " << TEST_SIZE << " took " << (end - start) + << " seconds"); } -void CMapPopulationTest::testUMapInsertStr() -{ +void CMapPopulationTest::testUMapInsertStr() { TStrStrUMapVec testVec(TEST_SIZE); ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO("Starting unordered map insert string test at " << - ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO("Starting unordered map insert string test at " << ml::core::CTimeUtils::toTimeString(start)); this->addInsert(m_TestData->stringKeys(), m_TestData->stringVals(), testVec); ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO("Finished unordered map insert string test at " << - ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO("Finished unordered map insert string test at " << ml::core::CTimeUtils::toTimeString(end)); - LOG_INFO("Unordered map insert string test with fill size " << FILL_SIZE << - " and test size " << TEST_SIZE << " took " << (end - start) << - " seconds"); + LOG_INFO("Unordered map insert string test with fill size " << FILL_SIZE << " and test size " << TEST_SIZE << " took " << (end - start) + << " seconds"); } -void CMapPopulationTest::testUMapInsertCharP() -{ +void CMapPopulationTest::testUMapInsertCharP() { TStrStrUMapVec testVec(TEST_SIZE); ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO("Starting unordered map insert char pointer test at " << - ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO("Starting unordered map insert char pointer test at " << ml::core::CTimeUtils::toTimeString(start)); this->addInsert(m_TestData->charPtrKeys(), m_TestData->charPtrVals(), testVec); ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO("Finished unordered map insert char pointer test at " << - ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO("Finished unordered map insert char pointer test at " << ml::core::CTimeUtils::toTimeString(end)); - LOG_INFO("Unordered map insert char pointer test with fill size " << FILL_SIZE << - " and test size " << TEST_SIZE << " took " << (end - start) << - " seconds"); + LOG_INFO("Unordered map insert char pointer test with fill size " << FILL_SIZE << " and test size " << TEST_SIZE << " took " + << (end - start) << " seconds"); } -void CMapPopulationTest::testUMapOpSqBracStr() -{ +void CMapPopulationTest::testUMapOpSqBracStr() { TStrStrUMapVec testVec(TEST_SIZE); ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO("Starting unordered map operator[] string test at " << - ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO("Starting unordered map operator[] string test at " << ml::core::CTimeUtils::toTimeString(start)); this->addOpSqBrac(m_TestData->stringKeys(), m_TestData->stringVals(), testVec); ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO("Finished unordered map operator[] string test at " << - ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO("Finished unordered map operator[] string test at " << ml::core::CTimeUtils::toTimeString(end)); - LOG_INFO("Unordered map operator[] string test with fill size " << FILL_SIZE << - " and test size " << TEST_SIZE << " took " << (end - start) << - " seconds"); + LOG_INFO("Unordered map operator[] string test with fill size " << FILL_SIZE << " and test size " << TEST_SIZE << " took " + << (end - start) << " seconds"); } -void CMapPopulationTest::testUMapOpSqBracCharP() -{ +void CMapPopulationTest::testUMapOpSqBracCharP() { TStrStrUMapVec testVec(TEST_SIZE); ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO("Starting unordered map operator[] char pointer test at " << - ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO("Starting unordered map operator[] char pointer test at " << ml::core::CTimeUtils::toTimeString(start)); this->addOpSqBrac(m_TestData->charPtrKeys(), m_TestData->charPtrVals(), testVec); ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO("Finished unordered map operator[] char pointer test at " << - ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO("Finished unordered map operator[] char pointer test at " << ml::core::CTimeUtils::toTimeString(end)); - LOG_INFO("Unordered map operator[] char pointer test with fill size " << FILL_SIZE << - " and test size " << TEST_SIZE << " took " << (end - start) << - " seconds"); + LOG_INFO("Unordered map operator[] char pointer test with fill size " << FILL_SIZE << " and test size " << TEST_SIZE << " took " + << (end - start) << " seconds"); } -template -void CMapPopulationTest::addInsert(const INPUT_CONTAINER &keys, - const INPUT_CONTAINER &values, - MAP_CONTAINER &maps) const -{ - for (typename MAP_CONTAINER::iterator iter = maps.begin(); - iter != maps.end(); - ++iter) - { - typename MAP_CONTAINER::value_type &map = *iter; +template +void CMapPopulationTest::addInsert(const INPUT_CONTAINER& keys, const INPUT_CONTAINER& values, MAP_CONTAINER& maps) const { + for (typename MAP_CONTAINER::iterator iter = maps.begin(); iter != maps.end(); ++iter) { + typename MAP_CONTAINER::value_type& map = *iter; size_t limit(std::min(keys.size(), values.size())); - for (size_t index = 0; index < limit; ++index) - { + for (size_t index = 0; index < limit; ++index) { map.insert(typename MAP_CONTAINER::value_type::value_type(keys[index], values[index])); } @@ -297,24 +237,16 @@ void CMapPopulationTest::addInsert(const INPUT_CONTAINER &keys, } } -template -void CMapPopulationTest::addOpSqBrac(const INPUT_CONTAINER &keys, - const INPUT_CONTAINER &values, - MAP_CONTAINER &maps) const -{ - for (typename MAP_CONTAINER::iterator iter = maps.begin(); - iter != maps.end(); - ++iter) - { - typename MAP_CONTAINER::value_type &map = *iter; +template +void CMapPopulationTest::addOpSqBrac(const INPUT_CONTAINER& keys, const INPUT_CONTAINER& values, MAP_CONTAINER& maps) const { + for (typename MAP_CONTAINER::iterator iter = maps.begin(); iter != maps.end(); ++iter) { + typename MAP_CONTAINER::value_type& map = *iter; size_t limit(std::min(keys.size(), values.size())); - for (size_t index = 0; index < limit; ++index) - { + for (size_t index = 0; index < limit; ++index) { map[keys[index]] = values[index]; } CPPUNIT_ASSERT_EQUAL(limit, map.size()); } } - diff --git a/lib/core/unittest/CMapPopulationTest.h b/lib/core/unittest/CMapPopulationTest.h index 2fb871fcf4..20f0e2c5ce 100644 --- a/lib/core/unittest/CMapPopulationTest.h +++ b/lib/core/unittest/CMapPopulationTest.h @@ -14,74 +14,66 @@ #include #include +class CMapPopulationTest : public CppUnit::TestFixture { +public: + CMapPopulationTest(); -class CMapPopulationTest : public CppUnit::TestFixture -{ - public: - CMapPopulationTest(); - - void testMapInsertSpeed(); + void testMapInsertSpeed(); - //! For performance on multi-core hardware, these tests are all run from - //! the thread pool - void testMapInsertStr(); - void testMapInsertCharP(); - void testMapOpSqBracStr(); - void testMapOpSqBracCharP(); - void testUMapInsertStr(); - void testUMapInsertCharP(); - void testUMapOpSqBracStr(); - void testUMapOpSqBracCharP(); + //! For performance on multi-core hardware, these tests are all run from + //! the thread pool + void testMapInsertStr(); + void testMapInsertCharP(); + void testMapOpSqBracStr(); + void testMapOpSqBracCharP(); + void testUMapInsertStr(); + void testUMapInsertCharP(); + void testUMapOpSqBracStr(); + void testUMapOpSqBracCharP(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); - void setUp(); + void setUp(); - private: - class CTestData - { - public: - using TStrVec = std::vector ; - using TCharPVec = std::vector; +private: + class CTestData { + public: + using TStrVec = std::vector; + using TCharPVec = std::vector; - public: - CTestData(size_t fillSize); + public: + CTestData(size_t fillSize); - const TStrVec &stringKeys() const; - const TStrVec &stringVals() const; - const TCharPVec &charPtrKeys() const; - const TCharPVec &charPtrVals() const; + const TStrVec& stringKeys() const; + const TStrVec& stringVals() const; + const TCharPVec& charPtrKeys() const; + const TCharPVec& charPtrVals() const; - private: - TStrVec m_StringKeys; - TStrVec m_StringVals; + private: + TStrVec m_StringKeys; + TStrVec m_StringVals; - TCharPVec m_CharPtrKeys; - TCharPVec m_CharPtrVals; - }; + TCharPVec m_CharPtrKeys; + TCharPVec m_CharPtrVals; + }; - private: - using TStrStrMap = std::map; - using TStrStrMapVec = std::vector; - using TStrStrUMap = boost::unordered_map; - using TStrStrUMapVec = std::vector; +private: + using TStrStrMap = std::map; + using TStrStrMapVec = std::vector; + using TStrStrUMap = boost::unordered_map; + using TStrStrUMapVec = std::vector; - template - void addInsert(const INPUT_CONTAINER &keys, - const INPUT_CONTAINER &values, - MAP_CONTAINER &maps) const; + template + void addInsert(const INPUT_CONTAINER& keys, const INPUT_CONTAINER& values, MAP_CONTAINER& maps) const; - template - void addOpSqBrac(const INPUT_CONTAINER &keys, - const INPUT_CONTAINER &values, - MAP_CONTAINER &maps) const; + template + void addOpSqBrac(const INPUT_CONTAINER& keys, const INPUT_CONTAINER& values, MAP_CONTAINER& maps) const; - private: - static const size_t FILL_SIZE; - static const size_t TEST_SIZE; +private: + static const size_t FILL_SIZE; + static const size_t TEST_SIZE; - const CTestData *m_TestData; + const CTestData* m_TestData; }; #endif // INCLUDED_CMapPopulationTest_h - diff --git a/lib/core/unittest/CMemoryUsageJsonWriterTest.cc b/lib/core/unittest/CMemoryUsageJsonWriterTest.cc index 6d5907cdfd..863539bf04 100644 --- a/lib/core/unittest/CMemoryUsageJsonWriterTest.cc +++ b/lib/core/unittest/CMemoryUsageJsonWriterTest.cc @@ -13,20 +13,16 @@ using namespace ml; -CppUnit::Test *CMemoryUsageJsonWriterTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CMemoryUsageJsonWriterTest"); +CppUnit::Test* CMemoryUsageJsonWriterTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CMemoryUsageJsonWriterTest"); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMemoryUsageJsonWriterTest::test", - &CMemoryUsageJsonWriterTest::test) ); + suiteOfTests->addTest( + new CppUnit::TestCaller("CMemoryUsageJsonWriterTest::test", &CMemoryUsageJsonWriterTest::test)); return suiteOfTests; } - -void CMemoryUsageJsonWriterTest::test() -{ +void CMemoryUsageJsonWriterTest::test() { { // Check that adding nothing produces nothing std::ostringstream ss; @@ -94,7 +90,9 @@ void CMemoryUsageJsonWriterTest::test() writer.endArray(); writer.endObject(); writer.finalise(); - CPPUNIT_ASSERT_EQUAL(std::string("{\"Hello\":{\"memory\":223},\"Sheeple\":[{\"Womple\":{\"memory\":44}},{\"Whimple\":{\"memory\":66},\"magic\":{\"memory\":7777}}]}\n"), ss.str()); + CPPUNIT_ASSERT_EQUAL(std::string("{\"Hello\":{\"memory\":223},\"Sheeple\":[{\"Womple\":{\"memory\":44}},{\"Whimple\":{\"memory\":" + "66},\"magic\":{\"memory\":7777}}]}\n"), + ss.str()); } { // Check sub-object @@ -113,6 +111,8 @@ void CMemoryUsageJsonWriterTest::test() writer.endArray(); writer.endObject(); writer.finalise(); - CPPUNIT_ASSERT_EQUAL(std::string("{\"Hello\":{\"memory\":223},\"Sheeple\":[{\"Dumplings\":{\"memory\":345},\"Gravy\":{\"memory\":12341234}}]}\n"), ss.str()); + CPPUNIT_ASSERT_EQUAL( + std::string("{\"Hello\":{\"memory\":223},\"Sheeple\":[{\"Dumplings\":{\"memory\":345},\"Gravy\":{\"memory\":12341234}}]}\n"), + ss.str()); } } diff --git a/lib/core/unittest/CMemoryUsageJsonWriterTest.h b/lib/core/unittest/CMemoryUsageJsonWriterTest.h index 47bf06283e..8ae76acfcb 100644 --- a/lib/core/unittest/CMemoryUsageJsonWriterTest.h +++ b/lib/core/unittest/CMemoryUsageJsonWriterTest.h @@ -8,13 +8,11 @@ #include +class CMemoryUsageJsonWriterTest : public CppUnit::TestFixture { +public: + void test(); -class CMemoryUsageJsonWriterTest : public CppUnit::TestFixture -{ - public: - void test(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CMemoryUsageJsonWriterTest_h diff --git a/lib/core/unittest/CMemoryUsageTest.cc b/lib/core/unittest/CMemoryUsageTest.cc index f5595053fa..0ad5d3987e 100644 --- a/lib/core/unittest/CMemoryUsageTest.cc +++ b/lib/core/unittest/CMemoryUsageTest.cc @@ -28,99 +28,73 @@ using namespace ml; -namespace -{ +namespace { // Subset of model_t equivalent duplicated here to avoid a dependency // with the model library -enum EFeature -{ - E_IndividualHighMeanByPerson, - E_IndividualCountByBucketAndPerson, - E_IndividualHighCountsByBucketAndPerson -}; +enum EFeature { E_IndividualHighMeanByPerson, E_IndividualCountByBucketAndPerson, E_IndividualHighCountsByBucketAndPerson }; using TIntVec = std::vector; using TStrVec = std::vector; -struct SPod -{ +struct SPod { double s_V1; double s_V2; int s_V3; }; -struct SFoo -{ +struct SFoo { static bool dynamicSizeAlwaysZero() { return true; } explicit SFoo(std::size_t key = 0) : s_Key(key) {} - bool operator<(const SFoo &rhs) const { return s_Key < rhs.s_Key; } - bool operator==(const SFoo &rhs) const { return s_Key == rhs.s_Key; } + bool operator<(const SFoo& rhs) const { return s_Key < rhs.s_Key; } + bool operator==(const SFoo& rhs) const { return s_Key == rhs.s_Key; } std::size_t s_Key; double s_State[100]; }; -struct SFooWithMemoryUsage -{ +struct SFooWithMemoryUsage { explicit SFooWithMemoryUsage(std::size_t key = 0) : s_Key(key) {} - bool operator<(const SFooWithMemoryUsage &rhs) const { return s_Key < rhs.s_Key; } - bool operator==(const SFooWithMemoryUsage &rhs) const { return s_Key == rhs.s_Key; } - std::size_t memoryUsage() const - { - return 0; - } + bool operator<(const SFooWithMemoryUsage& rhs) const { return s_Key < rhs.s_Key; } + bool operator==(const SFooWithMemoryUsage& rhs) const { return s_Key == rhs.s_Key; } + std::size_t memoryUsage() const { return 0; } - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const - { - mem->setName("SFooWithMemoryUsage", 0); - } + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("SFooWithMemoryUsage", 0); } std::size_t s_Key; double s_State[100]; }; -struct SFooWrapper -{ - std::size_t memoryUsage() const - { +struct SFooWrapper { + std::size_t memoryUsage() const { std::size_t mem = core::CMemory::dynamicSize(s_Foo); return mem; } SFooWithMemoryUsage s_Foo; }; -struct SBar -{ +struct SBar { using TFooVec = std::vector; explicit SBar(std::size_t key = 0) : s_Key(key), s_State() {} - bool operator<(const SBar &rhs) const { return s_Key < rhs.s_Key; } - bool operator==(const SBar &rhs) const { return s_Key == rhs.s_Key; } - std::size_t memoryUsage() const - { - return sizeof(SFoo) * s_State.capacity(); - } + bool operator<(const SBar& rhs) const { return s_Key < rhs.s_Key; } + bool operator==(const SBar& rhs) const { return s_Key == rhs.s_Key; } + std::size_t memoryUsage() const { return sizeof(SFoo) * s_State.capacity(); } std::size_t s_Key; TFooVec s_State; }; -struct SBarDebug -{ +struct SBarDebug { using TFooVec = std::vector; explicit SBarDebug(std::size_t key = 0) : s_Key(key), s_State() {} - bool operator<(const SBarDebug &rhs) const { return s_Key < rhs.s_Key; } - bool operator==(const SBarDebug &rhs) const { return s_Key == rhs.s_Key; } - std::size_t memoryUsage() const - { - return sizeof(SFoo) * s_State.capacity(); - } + bool operator<(const SBarDebug& rhs) const { return s_Key < rhs.s_Key; } + bool operator==(const SBarDebug& rhs) const { return s_Key == rhs.s_Key; } + std::size_t memoryUsage() const { return sizeof(SFoo) * s_State.capacity(); } - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const - { + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("SBarDebug", 0); core::CMemoryDebug::dynamicSize("s_State", s_State, mem); } @@ -129,20 +103,15 @@ struct SBarDebug TFooVec s_State; }; -struct SBarVectorDebug -{ +struct SBarVectorDebug { using TFooVec = std::vector; explicit SBarVectorDebug(std::size_t key = 0) : s_Key(key), s_State() {} - bool operator<(const SBarVectorDebug &rhs) const { return s_Key < rhs.s_Key; } - bool operator==(const SBarVectorDebug &rhs) const { return s_Key == rhs.s_Key; } - std::size_t memoryUsage() const - { - return core::CMemory::dynamicSize(s_State); - } + bool operator<(const SBarVectorDebug& rhs) const { return s_Key < rhs.s_Key; } + bool operator==(const SBarVectorDebug& rhs) const { return s_Key == rhs.s_Key; } + std::size_t memoryUsage() const { return core::CMemory::dynamicSize(s_State); } - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const - { + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("SBarVectorDebug", 0); core::CMemoryDebug::dynamicSize("s_State", s_State, mem); } @@ -151,169 +120,122 @@ struct SBarVectorDebug TFooVec s_State; }; -struct SHash -{ - std::size_t operator()(const SFoo &foo) const { return foo.s_Key; } - std::size_t operator()(const SFooWithMemoryUsage &foo) const { return foo.s_Key; } - std::size_t operator()(const SBar &bar) const { return bar.s_Key; } +struct SHash { + std::size_t operator()(const SFoo& foo) const { return foo.s_Key; } + std::size_t operator()(const SFooWithMemoryUsage& foo) const { return foo.s_Key; } + std::size_t operator()(const SBar& bar) const { return bar.s_Key; } }; -class CBase -{ - public: - CBase(std::size_t i) : m_Vec(i, 0) {} +class CBase { +public: + CBase(std::size_t i) : m_Vec(i, 0) {} - virtual ~CBase() = default; + virtual ~CBase() = default; - virtual std::size_t memoryUsage() const - { - return core::CMemory::dynamicSize(m_Vec); - } + virtual std::size_t memoryUsage() const { return core::CMemory::dynamicSize(m_Vec); } - virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const - { - mem->setName("CBase", 0); - core::CMemoryDebug::dynamicSize("m_Vec", m_Vec, mem); - } + virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { + mem->setName("CBase", 0); + core::CMemoryDebug::dynamicSize("m_Vec", m_Vec, mem); + } - virtual std::size_t staticSize() const - { - return sizeof(*this); - } + virtual std::size_t staticSize() const { return sizeof(*this); } - private: - uint64_t m_Fixed[5]; - TIntVec m_Vec; +private: + uint64_t m_Fixed[5]; + TIntVec m_Vec; }; -class CDerived : public CBase -{ - public: - CDerived(std::size_t i) : CBase(i), m_Strings(i, "This is a secret string") {} +class CDerived : public CBase { +public: + CDerived(std::size_t i) : CBase(i), m_Strings(i, "This is a secret string") {} - virtual ~CDerived() = default; + virtual ~CDerived() = default; - virtual std::size_t memoryUsage() const - { - std::size_t mem = core::CMemory::dynamicSize(m_Strings); - mem += this->CBase::memoryUsage(); - return mem; - } + virtual std::size_t memoryUsage() const { + std::size_t mem = core::CMemory::dynamicSize(m_Strings); + mem += this->CBase::memoryUsage(); + return mem; + } - virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const - { - mem->setName("CDerived", 0); - core::CMemoryDebug::dynamicSize("m_Strings", m_Strings, mem); - this->CBase::debugMemoryUsage(mem->addChild()); - } + virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { + mem->setName("CDerived", 0); + core::CMemoryDebug::dynamicSize("m_Strings", m_Strings, mem); + this->CBase::debugMemoryUsage(mem->addChild()); + } - virtual std::size_t staticSize() const - { - return sizeof(*this); - } + virtual std::size_t staticSize() const { return sizeof(*this); } - private: - uint64_t m_Fixed[50]; - TStrVec m_Strings; +private: + uint64_t m_Fixed[50]; + TStrVec m_Strings; }; //! A basic allocator that tracks memory usage template -class CTrackingAllocator -{ - public: - using value_type = T; - using pointer = value_type*; - using const_pointer = const value_type*; - using reference = value_type&; - using const_reference = const value_type&; - using size_type = std::size_t; - using difference_type = std::ptrdiff_t; - - public: - // convert an allocator to allocator - template - struct rebind - { - using other = CTrackingAllocator; - }; - - public: - CTrackingAllocator() = default; - CTrackingAllocator(const CTrackingAllocator &) = default; - - template - inline CTrackingAllocator(const CTrackingAllocator &) - { - } - - // address - inline pointer address(reference r) - { - return &r; - } - - inline const_pointer address(const_reference r) - { - return &r; - } - - // memory allocation - inline pointer allocate(size_type cnt, typename std::allocator::const_pointer = 0) - { - ms_Allocated += cnt; - return reinterpret_cast(::operator new(cnt * sizeof (T))); - } +class CTrackingAllocator { +public: + using value_type = T; + using pointer = value_type*; + using const_pointer = const value_type*; + using reference = value_type&; + using const_reference = const value_type&; + using size_type = std::size_t; + using difference_type = std::ptrdiff_t; + +public: + // convert an allocator to allocator + template + struct rebind { + using other = CTrackingAllocator; + }; + +public: + CTrackingAllocator() = default; + CTrackingAllocator(const CTrackingAllocator&) = default; + + template + inline CTrackingAllocator(const CTrackingAllocator&) {} + + // address + inline pointer address(reference r) { return &r; } + + inline const_pointer address(const_reference r) { return &r; } + + // memory allocation + inline pointer allocate(size_type cnt, typename std::allocator::const_pointer = 0) { + ms_Allocated += cnt; + return reinterpret_cast(::operator new(cnt * sizeof(T))); + } - inline void deallocate(pointer p, size_type cnt) - { - ms_Allocated -= cnt; - ::operator delete(p); - } + inline void deallocate(pointer p, size_type cnt) { + ms_Allocated -= cnt; + ::operator delete(p); + } - // size - inline size_type max_size() const - { - return std::numeric_limits::max() / sizeof(T); - } + // size + inline size_type max_size() const { return std::numeric_limits::max() / sizeof(T); } - static std::size_t usage() - { - return ms_Allocated; - } + static std::size_t usage() { return ms_Allocated; } - // construction/destruction - inline void construct(pointer p, const T &t) - { - new(p) T(t); - } + // construction/destruction + inline void construct(pointer p, const T& t) { new (p) T(t); } - inline void destroy(pointer p) - { - p->~T(); - } + inline void destroy(pointer p) { p->~T(); } - inline bool operator==(const CTrackingAllocator &) const - { - return true; - } + inline bool operator==(const CTrackingAllocator&) const { return true; } - inline bool operator!=(const CTrackingAllocator &a) const - { - return !operator==(a); - } + inline bool operator!=(const CTrackingAllocator& a) const { return !operator==(a); } - private: - static std::size_t ms_Allocated; +private: + static std::size_t ms_Allocated; }; template std::size_t CTrackingAllocator::ms_Allocated = 0; - } -void CMemoryUsageTest::testUsage() -{ +void CMemoryUsageTest::testUsage() { using TFooVec = std::vector; using TFooWithMemoryVec = std::vector; using TFooList = std::list; @@ -351,8 +273,7 @@ void CMemoryUsageTest::testUsage() LOG_DEBUG("*** TFooVec ***"); LOG_DEBUG("dynamicSize(foos) = " << core::CMemory::dynamicSize(foos)); LOG_DEBUG("dynamicSize(foosWithMemory) = " << core::CMemory::dynamicSize(foosWithMemory)); - CPPUNIT_ASSERT_EQUAL(core::CMemory::dynamicSize(foos), - core::CMemory::dynamicSize(foosWithMemory)); + CPPUNIT_ASSERT_EQUAL(core::CMemory::dynamicSize(foos), core::CMemory::dynamicSize(foosWithMemory)); } { TFooList foos(10); @@ -361,8 +282,7 @@ void CMemoryUsageTest::testUsage() LOG_DEBUG("*** TFooList ***"); LOG_DEBUG("dynamicSize(foos) = " << core::CMemory::dynamicSize(foos)); LOG_DEBUG("dynamicSize(foosWithMemory) = " << core::CMemory::dynamicSize(foosWithMemory)); - CPPUNIT_ASSERT_EQUAL(core::CMemory::dynamicSize(foos), - core::CMemory::dynamicSize(foosWithMemory)); + CPPUNIT_ASSERT_EQUAL(core::CMemory::dynamicSize(foos), core::CMemory::dynamicSize(foosWithMemory)); } { TFooDeque foos(10); @@ -371,8 +291,7 @@ void CMemoryUsageTest::testUsage() LOG_DEBUG("*** TFooDeque ***"); LOG_DEBUG("dynamicSize(foos) = " << core::CMemory::dynamicSize(foos)); LOG_DEBUG("dynamicSize(foosWithMemory) = " << core::CMemory::dynamicSize(foosWithMemory)); - CPPUNIT_ASSERT_EQUAL(core::CMemory::dynamicSize(foos), - core::CMemory::dynamicSize(foosWithMemory)); + CPPUNIT_ASSERT_EQUAL(core::CMemory::dynamicSize(foos), core::CMemory::dynamicSize(foosWithMemory)); } { TFooCircBuf foos(10); @@ -383,16 +302,14 @@ void CMemoryUsageTest::testUsage() LOG_DEBUG("*** TFooCircBuf ***"); LOG_DEBUG("dynamicSize(foos) = " << core::CMemory::dynamicSize(foos)); LOG_DEBUG("dynamicSize(foosWithMemory) = " << core::CMemory::dynamicSize(foosWithMemory)); - CPPUNIT_ASSERT_EQUAL(core::CMemory::dynamicSize(foos), - core::CMemory::dynamicSize(foosWithMemory)); + CPPUNIT_ASSERT_EQUAL(core::CMemory::dynamicSize(foos), core::CMemory::dynamicSize(foosWithMemory)); } { TFooFooMap foos; TFooWithMemoryFooWithMemoryMap foosWithMemory; - std::size_t keys[] = { 0, 1, 2, 3, 4, 5 }; - for (std::size_t i = 0u; i < boost::size(keys); ++i) - { + std::size_t keys[] = {0, 1, 2, 3, 4, 5}; + for (std::size_t i = 0u; i < boost::size(keys); ++i) { foos[SFoo(keys[i])] = SFoo(keys[i]); foosWithMemory[SFooWithMemoryUsage(keys[i])] = SFooWithMemoryUsage(keys[i]); } @@ -400,16 +317,14 @@ void CMemoryUsageTest::testUsage() LOG_DEBUG("*** TFooFooMap ***"); LOG_DEBUG("dynamicSize(foos) = " << core::CMemory::dynamicSize(foos)); LOG_DEBUG("dynamicSize(foosWithMemory) = " << core::CMemory::dynamicSize(foosWithMemory)); - CPPUNIT_ASSERT_EQUAL(core::CMemory::dynamicSize(foos), - core::CMemory::dynamicSize(foosWithMemory)); + CPPUNIT_ASSERT_EQUAL(core::CMemory::dynamicSize(foos), core::CMemory::dynamicSize(foosWithMemory)); } { TFooFooUMap foos; TFooWithMemoryFooWithMemoryUMap foosWithMemory; - std::size_t keys[] = { 0, 1, 2, 3, 4, 5 }; - for (std::size_t i = 0u; i < boost::size(keys); ++i) - { + std::size_t keys[] = {0, 1, 2, 3, 4, 5}; + for (std::size_t i = 0u; i < boost::size(keys); ++i) { foos[SFoo(keys[i])] = SFoo(keys[i]); foosWithMemory[SFooWithMemoryUsage(keys[i])] = SFooWithMemoryUsage(keys[i]); } @@ -417,22 +332,19 @@ void CMemoryUsageTest::testUsage() LOG_DEBUG("*** TFooFooUMap ***"); LOG_DEBUG("dynamicSize(foos) = " << core::CMemory::dynamicSize(foos)); LOG_DEBUG("dynamicSize(foosWithMemory) = " << core::CMemory::dynamicSize(foosWithMemory)); - CPPUNIT_ASSERT_EQUAL(core::CMemory::dynamicSize(foos), - core::CMemory::dynamicSize(foosWithMemory)); + CPPUNIT_ASSERT_EQUAL(core::CMemory::dynamicSize(foos), core::CMemory::dynamicSize(foosWithMemory)); } { TFooFSet foos; - std::size_t keys[] = { 0, 1, 2, 3, 4, 5 }; - for (std::size_t i = 0u; i < boost::size(keys); ++i) - { + std::size_t keys[] = {0, 1, 2, 3, 4, 5}; + for (std::size_t i = 0u; i < boost::size(keys); ++i) { foos.insert(SFoo(keys[i])); } LOG_DEBUG("*** TFooFSet ***"); LOG_DEBUG("dynamicSize(foos) = " << core::CMemory::dynamicSize(foos)); - CPPUNIT_ASSERT_EQUAL(core::CMemory::dynamicSize(foos), - foos.capacity() * sizeof(SFoo)); + CPPUNIT_ASSERT_EQUAL(core::CMemory::dynamicSize(foos), foos.capacity() * sizeof(SFoo)); } { @@ -463,13 +375,10 @@ void CMemoryUsageTest::testUsage() LOG_DEBUG("*** TBarVec ***"); LOG_DEBUG("dynamic size = " << core::CMemory::dynamicSize(bars1)); - LOG_DEBUG("expected dynamic size = " << core::CMemory::dynamicSize(bars2) - + core::CMemory::dynamicSize(state21) - + core::CMemory::dynamicSize(state22)); - CPPUNIT_ASSERT_EQUAL( core::CMemory::dynamicSize(bars1), - core::CMemory::dynamicSize(bars2) - + core::CMemory::dynamicSize(state21) - + core::CMemory::dynamicSize(state22)); + LOG_DEBUG("expected dynamic size = " << core::CMemory::dynamicSize(bars2) + core::CMemory::dynamicSize(state21) + + core::CMemory::dynamicSize(state22)); + CPPUNIT_ASSERT_EQUAL(core::CMemory::dynamicSize(bars1), + core::CMemory::dynamicSize(bars2) + core::CMemory::dynamicSize(state21) + core::CMemory::dynamicSize(state22)); } { SBar key; @@ -485,13 +394,10 @@ void CMemoryUsageTest::testUsage() LOG_DEBUG("*** TBarBarMap ***"); LOG_DEBUG("dynamic size = " << core::CMemory::dynamicSize(bars1)); - LOG_DEBUG("expected dynamic size = " << core::CMemory::dynamicSize(bars2) - + core::CMemory::dynamicSize(key) - + core::CMemory::dynamicSize(value)); - CPPUNIT_ASSERT_EQUAL( core::CMemory::dynamicSize(bars1), - core::CMemory::dynamicSize(bars2) - + core::CMemory::dynamicSize(key) - + core::CMemory::dynamicSize(value)); + LOG_DEBUG("expected dynamic size = " << core::CMemory::dynamicSize(bars2) + core::CMemory::dynamicSize(key) + + core::CMemory::dynamicSize(value)); + CPPUNIT_ASSERT_EQUAL(core::CMemory::dynamicSize(bars1), + core::CMemory::dynamicSize(bars2) + core::CMemory::dynamicSize(key) + core::CMemory::dynamicSize(value)); } { SBar key; @@ -507,13 +413,10 @@ void CMemoryUsageTest::testUsage() LOG_DEBUG("*** TBarBarUMap ***"); LOG_DEBUG("dynamic size = " << core::CMemory::dynamicSize(bars1)); - LOG_DEBUG("expected dynamic size = " << core::CMemory::dynamicSize(bars2) - + core::CMemory::dynamicSize(key) - + core::CMemory::dynamicSize(value)); - CPPUNIT_ASSERT_EQUAL( core::CMemory::dynamicSize(bars1), - core::CMemory::dynamicSize(bars2) - + core::CMemory::dynamicSize(key) - + core::CMemory::dynamicSize(value)); + LOG_DEBUG("expected dynamic size = " << core::CMemory::dynamicSize(bars2) + core::CMemory::dynamicSize(key) + + core::CMemory::dynamicSize(value)); + CPPUNIT_ASSERT_EQUAL(core::CMemory::dynamicSize(bars1), + core::CMemory::dynamicSize(bars2) + core::CMemory::dynamicSize(key) + core::CMemory::dynamicSize(value)); } { SBar key; @@ -533,13 +436,10 @@ void CMemoryUsageTest::testUsage() LOG_DEBUG("*** TBarBarFMap ***"); LOG_DEBUG("dynamic size = " << core::CMemory::dynamicSize(bars1)); - LOG_DEBUG("expected dynamic size = " << core::CMemory::dynamicSize(bars2) - + core::CMemory::dynamicSize(key) - + core::CMemory::dynamicSize(value)); - CPPUNIT_ASSERT_EQUAL( core::CMemory::dynamicSize(bars1), - core::CMemory::dynamicSize(bars2) - + core::CMemory::dynamicSize(key) - + core::CMemory::dynamicSize(value)); + LOG_DEBUG("expected dynamic size = " << core::CMemory::dynamicSize(bars2) + core::CMemory::dynamicSize(key) + + core::CMemory::dynamicSize(value)); + CPPUNIT_ASSERT_EQUAL(core::CMemory::dynamicSize(bars1), + core::CMemory::dynamicSize(bars2) + core::CMemory::dynamicSize(key) + core::CMemory::dynamicSize(value)); } { SBar value; @@ -549,11 +449,8 @@ void CMemoryUsageTest::testUsage() LOG_DEBUG("*** TBarPtr ***"); LOG_DEBUG("dynamic size = " << core::CMemory::dynamicSize(pointer)); - LOG_DEBUG("expected dynamic size = " << sizeof(SBar) - + sizeof(SFoo) * value.s_State.capacity()); - CPPUNIT_ASSERT_EQUAL(core::CMemory::dynamicSize(pointer), - sizeof(SBar) - + sizeof(SFoo) * value.s_State.capacity()); + LOG_DEBUG("expected dynamic size = " << sizeof(SBar) + sizeof(SFoo) * value.s_State.capacity()); + CPPUNIT_ASSERT_EQUAL(core::CMemory::dynamicSize(pointer), sizeof(SBar) + sizeof(SFoo) * value.s_State.capacity()); } { @@ -570,27 +467,20 @@ void CMemoryUsageTest::testUsage() variables.push_back(b); LOG_DEBUG("wrong dynamic size = " << core::CMemory::dynamicSize(variables)); - CPPUNIT_ASSERT_EQUAL(variables.capacity() * sizeof(std::size_t), - core::CMemory::dynamicSize(variables)); + CPPUNIT_ASSERT_EQUAL(variables.capacity() * sizeof(std::size_t), core::CMemory::dynamicSize(variables)); - core::CMemory::CAnyVisitor &visitor = core::CMemory::anyVisitor(); + core::CMemory::CAnyVisitor& visitor = core::CMemory::anyVisitor(); visitor.registerCallback(); visitor.registerCallback(); LOG_DEBUG("dynamic size = " << core::CMemory::dynamicSize(variables)); - LOG_DEBUG("expected dynamic size = " << variables.capacity() * sizeof(std::size_t) - + sizeof(a) - + core::CMemory::dynamicSize(a) - + sizeof(b) - + core::CMemory::dynamicSize(b)); - CPPUNIT_ASSERT_EQUAL( variables.capacity() * sizeof(std::size_t) - + sizeof(a) - + core::CMemory::dynamicSize(a) - + sizeof(b) - + core::CMemory::dynamicSize(b), + LOG_DEBUG("expected dynamic size = " << variables.capacity() * sizeof(std::size_t) + sizeof(a) + core::CMemory::dynamicSize(a) + + sizeof(b) + core::CMemory::dynamicSize(b)); + CPPUNIT_ASSERT_EQUAL(variables.capacity() * sizeof(std::size_t) + sizeof(a) + core::CMemory::dynamicSize(a) + sizeof(b) + + core::CMemory::dynamicSize(b), core::CMemory::dynamicSize(variables)); - core::CMemoryDebug::CAnyVisitor &debugVisitor = core::CMemoryDebug::anyVisitor(); + core::CMemoryDebug::CAnyVisitor& debugVisitor = core::CMemoryDebug::anyVisitor(); debugVisitor.registerCallback(); debugVisitor.registerCallback(); @@ -602,8 +492,8 @@ void CMemoryUsageTest::testUsage() LOG_DEBUG(ss.str()); } { - CBase * base = new CBase(10); - CBase * derived = new CDerived(10); + CBase* base = new CBase(10); + CBase* derived = new CDerived(10); { core::CMemoryUsage mem; core::CMemoryDebug::dynamicSize("", *base, &mem); @@ -656,24 +546,20 @@ void CMemoryUsageTest::testUsage() CBase base(5); CPPUNIT_ASSERT_EQUAL(base.memoryUsage(), core::CMemory::dynamicSize(base)); - CBase * basePtr = new CBase(5); - CPPUNIT_ASSERT_EQUAL(basePtr->memoryUsage() + sizeof(*basePtr), - core::CMemory::dynamicSize(basePtr)); + CBase* basePtr = new CBase(5); + CPPUNIT_ASSERT_EQUAL(basePtr->memoryUsage() + sizeof(*basePtr), core::CMemory::dynamicSize(basePtr)); CDerived derived(6); CPPUNIT_ASSERT_EQUAL(derived.memoryUsage(), core::CMemory::dynamicSize(derived)); - CDerived * derivedPtr = new CDerived(5); - CPPUNIT_ASSERT_EQUAL(derivedPtr->memoryUsage() + sizeof(*derivedPtr), - core::CMemory::dynamicSize(derivedPtr)); + CDerived* derivedPtr = new CDerived(5); + CPPUNIT_ASSERT_EQUAL(derivedPtr->memoryUsage() + sizeof(*derivedPtr), core::CMemory::dynamicSize(derivedPtr)); - CBase * basederivedPtr = new CDerived(5); - CPPUNIT_ASSERT_EQUAL(basederivedPtr->memoryUsage() + sizeof(CDerived), - core::CMemory::dynamicSize(basederivedPtr)); + CBase* basederivedPtr = new CDerived(5); + CPPUNIT_ASSERT_EQUAL(basederivedPtr->memoryUsage() + sizeof(CDerived), core::CMemory::dynamicSize(basederivedPtr)); TBasePtr sPtr(new CDerived(6)); - CPPUNIT_ASSERT_EQUAL(sPtr->memoryUsage() + sizeof(CDerived), - core::CMemory::dynamicSize(sPtr)); + CPPUNIT_ASSERT_EQUAL(sPtr->memoryUsage() + sizeof(CDerived), core::CMemory::dynamicSize(sPtr)); } { TDerivedVec vec; @@ -686,8 +572,7 @@ void CMemoryUsageTest::testUsage() vec.push_back(CDerived(12)); std::size_t total = core::CMemory::dynamicSize(vec); std::size_t calc = vec.capacity() * sizeof(CDerived); - for (std::size_t i = 0; i < vec.size(); ++i) - { + for (std::size_t i = 0; i < vec.size(); ++i) { calc += vec[i].memoryUsage(); } CPPUNIT_ASSERT_EQUAL(calc, total); @@ -704,19 +589,17 @@ void CMemoryUsageTest::testUsage() std::size_t total = core::CMemory::dynamicSize(vec); std::size_t calc = vec.capacity() * sizeof(TBasePtr); - for (std::size_t i = 0; i < 6; ++i) - { - calc += static_cast(vec[i].get())->memoryUsage(); + for (std::size_t i = 0; i < 6; ++i) { + calc += static_cast(vec[i].get())->memoryUsage(); calc += sizeof(CBase); } - calc += static_cast(vec[6].get())->memoryUsage(); + calc += static_cast(vec[6].get())->memoryUsage(); calc += sizeof(CDerived); CPPUNIT_ASSERT_EQUAL(calc, total); } } -void CMemoryUsageTest::testDebug() -{ +void CMemoryUsageTest::testDebug() { using TBarVec = std::vector; using TBarVecPtr = boost::shared_ptr; @@ -725,8 +608,7 @@ void CMemoryUsageTest::testDebug() SBar sbar; SBarDebug sbarDebug; SBarVectorDebug sbarVectorDebug; - for (unsigned i = 0; i < 9; ++i) - { + for (unsigned i = 0; i < 9; ++i) { sbar.s_State.push_back(SFoo(i)); sbarDebug.s_State.push_back(SFoo(i)); sbarVectorDebug.s_State.push_back(SFooWithMemoryUsage(i)); @@ -749,8 +631,7 @@ void CMemoryUsageTest::testDebug() std::ostringstream ss; memoryUsage.print(ss); LOG_TRACE("SBarVectorDebug: " + ss.str()); - LOG_TRACE("memoryUsage: " << sbarVectorDebug.memoryUsage() << - ", debugUsage: " << memoryUsage.usage()); + LOG_TRACE("memoryUsage: " << sbarVectorDebug.memoryUsage() << ", debugUsage: " << memoryUsage.usage()); CPPUNIT_ASSERT_EQUAL(sbarVectorDebug.memoryUsage(), memoryUsage.usage()); } } @@ -767,8 +648,7 @@ void CMemoryUsageTest::testDebug() core::CMemoryDebug::dynamicSize("TBarVecPtr", t, memoryUsage.addChild()); std::ostringstream ss; memoryUsage.print(ss); - LOG_TRACE("TBarVecPtr usage: " << core::CMemory::dynamicSize(t) << ", debug: " << - memoryUsage.usage()); + LOG_TRACE("TBarVecPtr usage: " << core::CMemory::dynamicSize(t) << ", debug: " << memoryUsage.usage()); LOG_TRACE(ss.str()); CPPUNIT_ASSERT_EQUAL(core::CMemory::dynamicSize(t), memoryUsage.usage()); } @@ -784,13 +664,11 @@ void CMemoryUsageTest::testDebug() vec->push_back(SBar(3)); vec->push_back(SBar(4)); - t.push_back(TFeatureBarVecPtrPr(E_IndividualHighMeanByPerson, - vec)); + t.push_back(TFeatureBarVecPtrPr(E_IndividualHighMeanByPerson, vec)); TBarVecPtr vec2(new TBarVec()); vec2->push_back(SBar(22)); vec2->push_back(SBar(33)); - t.push_back(TFeatureBarVecPtrPr(E_IndividualCountByBucketAndPerson, - vec)); + t.push_back(TFeatureBarVecPtrPr(E_IndividualCountByBucketAndPerson, vec)); t.push_back(TFeatureBarVecPtrPr(E_IndividualHighCountsByBucketAndPerson, TBarVecPtr())); core::CMemoryUsage memoryUsage; @@ -798,24 +676,22 @@ void CMemoryUsageTest::testDebug() core::CMemoryDebug::dynamicSize("TFeatureBarVecPtrPrVec", t, memoryUsage.addChild()); std::ostringstream ss; memoryUsage.print(ss); - LOG_TRACE("TFeatureBarVecPtrPrVec usage: " << core::CMemory::dynamicSize(t) << - ", debug: " << memoryUsage.usage()); + LOG_TRACE("TFeatureBarVecPtrPrVec usage: " << core::CMemory::dynamicSize(t) << ", debug: " << memoryUsage.usage()); LOG_TRACE(ss.str()); CPPUNIT_ASSERT_EQUAL(core::CMemory::dynamicSize(t), memoryUsage.usage()); } } -void CMemoryUsageTest::testDynamicSizeAlwaysZero() -{ - // Without some (as yet unspecified) help from the compiler, is_pod will - // never report that a class or struct is a POD; this is always safe, if - // possibly sub-optimal. Currently (May 2011) compilers more recent than - // Visual C++ 8, GCC-4.3, Greenhills 6.0, Intel-11.0, and Codegear have the - // necessary compiler intrinsics to ensure that this trait "just works". - // You may also test to see if the necessary intrinsics are available by - // checking to see if the macro BOOST_IS_POD is defined. (Taken from - // http://www.boost.org/doc/libs/1_65_1/libs/type_traits/doc/html/boost_typetraits/reference/is_pod.html - // .) +void CMemoryUsageTest::testDynamicSizeAlwaysZero() { +// Without some (as yet unspecified) help from the compiler, is_pod will +// never report that a class or struct is a POD; this is always safe, if +// possibly sub-optimal. Currently (May 2011) compilers more recent than +// Visual C++ 8, GCC-4.3, Greenhills 6.0, Intel-11.0, and Codegear have the +// necessary compiler intrinsics to ensure that this trait "just works". +// You may also test to see if the necessary intrinsics are available by +// checking to see if the macro BOOST_IS_POD is defined. (Taken from +// http://www.boost.org/doc/libs/1_65_1/libs/type_traits/doc/html/boost_typetraits/reference/is_pod.html +// .) #ifdef BOOST_IS_POD bool haveStructPodCompilerSupport = true; #else @@ -852,8 +728,7 @@ void CMemoryUsageTest::testDynamicSizeAlwaysZero() CPPUNIT_ASSERT_EQUAL(false, test); } -void CMemoryUsageTest::testCompress() -{ +void CMemoryUsageTest::testCompress() { { // Check that non-repeated entries are not removed core::CMemoryUsage mem; @@ -891,7 +766,7 @@ void CMemoryUsageTest::testCompress() mem.setName("root", 1); mem.addChild()->setName("muffin", 4); mem.addChild()->setName("child", 3); - core::CMemoryUsage * child = mem.addChild(); + core::CMemoryUsage* child = mem.addChild(); child->setName("child", 5); child->addChild()->setName("grandchild", 100); mem.addChild()->setName("child", 7); @@ -914,15 +789,14 @@ void CMemoryUsageTest::testCompress() after = ss.str(); } std::string expected("{\"root\":{\"memory\":1},\"subItems\":[{\"muffin\":" - "{\"memory\":4}},{\"child [*10]\":{\"memory\":220}},{\"puffin\":" - "{\"memory\":2}}]}\n"); + "{\"memory\":4}},{\"child [*10]\":{\"memory\":220}},{\"puffin\":" + "{\"memory\":2}}]}\n"); LOG_DEBUG(after); CPPUNIT_ASSERT_EQUAL(expected, after); } } -void CMemoryUsageTest::testStringBehaviour() -{ +void CMemoryUsageTest::testStringBehaviour() { // This "test" highlights the way the std::string class behaves on each // platform we support. Experience shows that methods like reserve(), // clear() and operator=() don't always work the way the books suggest... @@ -937,12 +811,10 @@ void CMemoryUsageTest::testStringBehaviour() std::string empty1; std::string empty2; - LOG_INFO("Two independently constructed empty strings have data at " << - static_cast(empty1.data()) << " and " << - static_cast(empty2.data()) << " and capacity " << - empty1.capacity()); - if (empty1.data() == empty2.data()) - { + LOG_INFO("Two independently constructed empty strings have data at " << static_cast(empty1.data()) << " and " + << static_cast(empty2.data()) << " and capacity " + << empty1.capacity()); + if (empty1.data() == empty2.data()) { LOG_INFO("All strings constructed empty probably share the same " "representation on this platform"); } @@ -952,35 +824,29 @@ void CMemoryUsageTest::testStringBehaviour() std::string something3; something3 = something2; - LOG_INFO("Non-empty string has data at " << - static_cast(something1.data()) << " length " << - something1.length() << " and capacity " << something1.capacity()); + LOG_INFO("Non-empty string has data at " << static_cast(something1.data()) << " length " << something1.length() + << " and capacity " << something1.capacity()); - LOG_INFO("Copy constructed string has data at " << - static_cast(something2.data()) << " length " << - something2.length() << " and capacity " << something2.capacity()); - if (something2.data() == something1.data()) - { + LOG_INFO("Copy constructed string has data at " << static_cast(something2.data()) << " length " << something2.length() + << " and capacity " << something2.capacity()); + if (something2.data() == something1.data()) { LOG_INFO("Copy constructor probably has a copy-on-write " "implementation on this platform"); } - LOG_INFO("Assigned string has data at " << - static_cast(something3.data()) << " length " << - something3.length() << " and capacity " << something3.capacity()); - if (something3.data() == something2.data()) - { + LOG_INFO("Assigned string has data at " << static_cast(something3.data()) << " length " << something3.length() + << " and capacity " << something3.capacity()); + if (something3.data() == something2.data()) { LOG_INFO("Assignment operator probably has a copy-on-write " "implementation on this platform"); } something1.clear(); - LOG_INFO("Cleared string that was copied to two others has data at " << - static_cast(something1.data()) << " length " << - something1.length() << " and capacity " << something1.capacity()); - if (something1.data() == empty1.data()) - { + LOG_INFO("Cleared string that was copied to two others has data at " << static_cast(something1.data()) << " length " + << something1.length() << " and capacity " + << something1.capacity()); + if (something1.data() == empty1.data()) { LOG_INFO("Cleared strings revert to shared empty representation on " "this platform"); } @@ -988,11 +854,10 @@ void CMemoryUsageTest::testStringBehaviour() something2 = empty2; LOG_INFO("String that was copied to another then assigned an empty string " - "has data at " << static_cast(something2.data()) << - " length " << something2.length() << " and capacity " << - something2.capacity()); - if (something2.data() == empty1.data()) - { + "has data at " + << static_cast(something2.data()) << " length " << something2.length() << " and capacity " + << something2.capacity()); + if (something2.data() == empty1.data()) { LOG_INFO("Strings that have an empty constructed string assigned to " "them share the same representation as other empty " "constructed strings on this platform"); @@ -1000,36 +865,31 @@ void CMemoryUsageTest::testStringBehaviour() std::string uncopied("uncopied"); - LOG_INFO("Non-empty uncopied string has data at " << - static_cast(uncopied.data()) << " length " << - uncopied.length() << " and capacity " << uncopied.capacity()); + LOG_INFO("Non-empty uncopied string has data at " << static_cast(uncopied.data()) << " length " << uncopied.length() + << " and capacity " << uncopied.capacity()); uncopied.clear(); - LOG_INFO("Cleared uncopied string has data at " << - static_cast(uncopied.data()) << " length " << - uncopied.length() << " and capacity " << uncopied.capacity()); + LOG_INFO("Cleared uncopied string has data at " << static_cast(uncopied.data()) << " length " << uncopied.length() + << " and capacity " << uncopied.capacity()); std::string startSmall("small"); - LOG_INFO("Non-empty small string unchanged since construction has data at " << - static_cast(startSmall.data()) << " length " << - startSmall.length() << " and capacity " << startSmall.capacity()); + LOG_INFO("Non-empty small string unchanged since construction has data at " << static_cast(startSmall.data()) << " length " + << startSmall.length() << " and capacity " + << startSmall.capacity()); startSmall.reserve(100); size_t capacity100(startSmall.capacity()); - LOG_INFO("Small string after reserving 100 bytes has data at " << - static_cast(startSmall.data()) << " length " << - startSmall.length() << " and capacity " << startSmall.capacity()); + LOG_INFO("Small string after reserving 100 bytes has data at " << static_cast(startSmall.data()) << " length " + << startSmall.length() << " and capacity " << startSmall.capacity()); startSmall.reserve(10); - LOG_INFO("Small string after reserving 10 bytes has data at " << - static_cast(startSmall.data()) << " length " << - startSmall.length() << " and capacity " << startSmall.capacity()); - if (startSmall.capacity() < capacity100) - { + LOG_INFO("Small string after reserving 10 bytes has data at " << static_cast(startSmall.data()) << " length " + << startSmall.length() << " and capacity " << startSmall.capacity()); + if (startSmall.capacity() < capacity100) { LOG_INFO("On this platform reservations can reduce string capacity"); } @@ -1037,82 +897,67 @@ void CMemoryUsageTest::testStringBehaviour() // the short string optimisation (if it's being used) std::string startLong("this_string_is_longer_than_one_that_will_take_advantage_of_the_small_string_optimisation"); - LOG_INFO("Long string after initial construction has data at " << - static_cast(startLong.data()) << " length " << - startLong.length() << " and capacity " << startLong.capacity()); + LOG_INFO("Long string after initial construction has data at " << static_cast(startLong.data()) << " length " + << startLong.length() << " and capacity " << startLong.capacity()); startLong.reserve(10000); size_t capacity10000(startLong.capacity()); - LOG_INFO("Long string after reserving 10000 bytes has data at " << - static_cast(startLong.data()) << " length " << - startLong.length() << " and capacity " << startLong.capacity()); + LOG_INFO("Long string after reserving 10000 bytes has data at " << static_cast(startLong.data()) << " length " + << startLong.length() << " and capacity " << startLong.capacity()); startLong.clear(); - LOG_INFO("Long string after clearing has data at " << - static_cast(startLong.data()) << " length " << - startLong.length() << " and capacity " << startLong.capacity()); - if (startLong.capacity() < capacity10000) - { + LOG_INFO("Long string after clearing has data at " << static_cast(startLong.data()) << " length " << startLong.length() + << " and capacity " << startLong.capacity()); + if (startLong.capacity() < capacity10000) { LOG_INFO("On this platform clearing can reduce string capacity"); } using TSizeVec = std::vector; std::string grower; TSizeVec capacities(1, grower.capacity()); - for (size_t count = 0; count < 50000; ++count) - { + for (size_t count = 0; count < 50000; ++count) { grower += 'x'; - if (grower.capacity() != capacities.back()) - { + if (grower.capacity() != capacities.back()) { capacities.push_back(grower.capacity()); } } - LOG_INFO("Capacities during growth from 0 to 50000 characters are: " << - core::CContainerPrinter::print(capacities)); + LOG_INFO("Capacities during growth from 0 to 50000 characters are: " << core::CContainerPrinter::print(capacities)); std::string toBeShrunk(100, 'a'); toBeShrunk = "a lot smaller than it was"; size_t preShrinkCapacity(toBeShrunk.capacity()); - LOG_INFO("String to be shrunk has starting size " << toBeShrunk.size() << - " and capacity " << preShrinkCapacity); + LOG_INFO("String to be shrunk has starting size " << toBeShrunk.size() << " and capacity " << preShrinkCapacity); std::string(toBeShrunk).swap(toBeShrunk); size_t postShrinkCapacity(toBeShrunk.capacity()); - LOG_INFO("String to be shrunk has post-shrink size " << toBeShrunk.size() << - " and capacity " << postShrinkCapacity); + LOG_INFO("String to be shrunk has post-shrink size " << toBeShrunk.size() << " and capacity " << postShrinkCapacity); - LOG_INFO("The swap() trick to reduce capacity " << - ((postShrinkCapacity < preShrinkCapacity) ? "works" : "DOESN'T WORK!")); + LOG_INFO("The swap() trick to reduce capacity " << ((postShrinkCapacity < preShrinkCapacity) ? "works" : "DOESN'T WORK!")); } -void CMemoryUsageTest::testStringMemory() -{ +void CMemoryUsageTest::testStringMemory() { using TAllocator = ::CTrackingAllocator; using TString = std::basic_string, TAllocator>; - for (std::size_t i = 0; i < 1500; ++i) - { + for (std::size_t i = 0; i < 1500; ++i) { CPPUNIT_ASSERT_EQUAL(std::size_t(0), TAllocator::usage()); TString trackingString; std::string normalString; - for (std::size_t j = 0; j < i; ++j) - { + for (std::size_t j = 0; j < i; ++j) { trackingString.push_back(static_cast('a' + j)); normalString.push_back(static_cast('a' + j)); } - LOG_DEBUG("String size " << core::CMemory::dynamicSize(normalString) << - ", allocated " << TAllocator::usage()); + LOG_DEBUG("String size " << core::CMemory::dynamicSize(normalString) << ", allocated " << TAllocator::usage()); CPPUNIT_ASSERT_EQUAL(core::CMemory::dynamicSize(normalString), TAllocator::usage()); } } -void CMemoryUsageTest::testStringClear() -{ +void CMemoryUsageTest::testStringClear() { using TAllocator = ::CTrackingAllocator; using TString = std::basic_string, TAllocator>; @@ -1131,8 +976,7 @@ void CMemoryUsageTest::testStringClear() CPPUNIT_ASSERT_EQUAL(usage3Copies, TAllocator::usage()); } -void CMemoryUsageTest::testSharedPointer() -{ +void CMemoryUsageTest::testSharedPointer() { LOG_DEBUG("*** testSharedPointer ***"); using TIntVec = std::vector; using TIntVecPtr = boost::shared_ptr; @@ -1176,17 +1020,12 @@ void CMemoryUsageTest::testSharedPointer() // vec2: 8 (capacity) * 16 (shared_ptr element size) // = 688 - std::size_t expectedSize = vec1.capacity() * sizeof(TIntVecPtr) + - vec2.capacity() * sizeof(TIntVecPtr) + - 3 * sizeof(TIntVec) + - (vec1[0]->capacity() + vec1[1]->capacity() + - vec1[3]->capacity()) * sizeof(int); + std::size_t expectedSize = vec1.capacity() * sizeof(TIntVecPtr) + vec2.capacity() * sizeof(TIntVecPtr) + 3 * sizeof(TIntVec) + + (vec1[0]->capacity() + vec1[1]->capacity() + vec1[3]->capacity()) * sizeof(int); - LOG_DEBUG("Expected: " << expectedSize << ", actual: " << (core::CMemory::dynamicSize(vec1) + - core::CMemory::dynamicSize(vec2))); + LOG_DEBUG("Expected: " << expectedSize << ", actual: " << (core::CMemory::dynamicSize(vec1) + core::CMemory::dynamicSize(vec2))); - CPPUNIT_ASSERT_EQUAL(expectedSize, core::CMemory::dynamicSize(vec1) + - core::CMemory::dynamicSize(vec2)); + CPPUNIT_ASSERT_EQUAL(expectedSize, core::CMemory::dynamicSize(vec1) + core::CMemory::dynamicSize(vec2)); TStrPtrVec svec1; svec1.push_back(TStrPtr(new std::string("This is a string"))); @@ -1211,10 +1050,9 @@ void CMemoryUsageTest::testSharedPointer() CPPUNIT_ASSERT(std::abs(stringSizeBefore - stringSizeAfter) < 4); } -void CMemoryUsageTest::testRawPointer() -{ +void CMemoryUsageTest::testRawPointer() { LOG_DEBUG("*** testRawPointer ***"); - std::string *strPtr = 0; + std::string* strPtr = 0; CPPUNIT_ASSERT_EQUAL(std::size_t(0), core::CMemory::dynamicSize(strPtr)); std::string foo = "abcdefghijklmnopqrstuvwxyz"; @@ -1226,8 +1064,7 @@ void CMemoryUsageTest::testRawPointer() CPPUNIT_ASSERT_EQUAL(fooMem + sizeof(std::string), core::CMemory::dynamicSize(strPtr)); } -void CMemoryUsageTest::testSmallVector() -{ +void CMemoryUsageTest::testSmallVector() { LOG_DEBUG("*** testSmallVector ***"); using TSizeVec = std::vector; @@ -1239,14 +1076,11 @@ void CMemoryUsageTest::testSmallVector() TSizeVec sizes; test.generateUniformSamples(0, 12, 100, sizes); - for (auto size : sizes) - { + for (auto size : sizes) { TDouble1Vec vec1(size); TDouble6Vec vec2(size); TDouble9Vec vec3(size); - TSizeVec memory{core::CMemory::dynamicSize(vec1), - core::CMemory::dynamicSize(vec2), - core::CMemory::dynamicSize(vec3)}; + TSizeVec memory{core::CMemory::dynamicSize(vec1), core::CMemory::dynamicSize(vec2), core::CMemory::dynamicSize(vec3)}; // These assertions hold because the vectors never shrink CPPUNIT_ASSERT(size > 2 || memory[0] == 0); CPPUNIT_ASSERT(memory[0] == 0 || memory[0] == vec1.capacity() * sizeof(double)); @@ -1282,41 +1116,25 @@ void CMemoryUsageTest::testSmallVector() CPPUNIT_ASSERT(extraMem > 0); } -CppUnit::Test *CMemoryUsageTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CMemoryUsageTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMemoryUsageTest::testUsage", - &CMemoryUsageTest::testUsage) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMemoryUsageTest::testDebug", - &CMemoryUsageTest::testDebug) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMemoryUsageTest::testDynamicSizeAlwaysZero", - &CMemoryUsageTest::testDynamicSizeAlwaysZero) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMemoryUsageTest::testCompress", - &CMemoryUsageTest::testCompress) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMemoryUsageTest::testStringBehaviour", - &CMemoryUsageTest::testStringBehaviour ) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMemoryUsageTest::testStringMemory", - &CMemoryUsageTest::testStringMemory) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMemoryUsageTest::testStringClear", - &CMemoryUsageTest::testStringClear) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMemoryUsageTest::testSharedPointer", - &CMemoryUsageTest::testSharedPointer) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMemoryUsageTest::testRawPointer", - &CMemoryUsageTest::testRawPointer) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMemoryUsageTest::testSmallVector", - &CMemoryUsageTest::testSmallVector) ); +CppUnit::Test* CMemoryUsageTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CMemoryUsageTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CMemoryUsageTest::testUsage", &CMemoryUsageTest::testUsage)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMemoryUsageTest::testDebug", &CMemoryUsageTest::testDebug)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMemoryUsageTest::testDynamicSizeAlwaysZero", + &CMemoryUsageTest::testDynamicSizeAlwaysZero)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMemoryUsageTest::testCompress", &CMemoryUsageTest::testCompress)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CMemoryUsageTest::testStringBehaviour", &CMemoryUsageTest::testStringBehaviour)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CMemoryUsageTest::testStringMemory", &CMemoryUsageTest::testStringMemory)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CMemoryUsageTest::testStringClear", &CMemoryUsageTest::testStringClear)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CMemoryUsageTest::testSharedPointer", &CMemoryUsageTest::testSharedPointer)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMemoryUsageTest::testRawPointer", &CMemoryUsageTest::testRawPointer)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CMemoryUsageTest::testSmallVector", &CMemoryUsageTest::testSmallVector)); return suiteOfTests; } - diff --git a/lib/core/unittest/CMemoryUsageTest.h b/lib/core/unittest/CMemoryUsageTest.h index 445dd35fa6..67c782476e 100644 --- a/lib/core/unittest/CMemoryUsageTest.h +++ b/lib/core/unittest/CMemoryUsageTest.h @@ -9,21 +9,20 @@ #include -class CMemoryUsageTest : public CppUnit::TestFixture -{ - public: - void testUsage(); - void testDebug(); - void testDynamicSizeAlwaysZero(); - void testCompress(); - void testStringBehaviour(); - void testStringMemory(); - void testStringClear(); - void testSharedPointer(); - void testRawPointer(); - void testSmallVector(); +class CMemoryUsageTest : public CppUnit::TestFixture { +public: + void testUsage(); + void testDebug(); + void testDynamicSizeAlwaysZero(); + void testCompress(); + void testStringBehaviour(); + void testStringMemory(); + void testStringClear(); + void testSharedPointer(); + void testRawPointer(); + void testSmallVector(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CMemoryUsageTest_h diff --git a/lib/core/unittest/CMessageBufferTest.cc b/lib/core/unittest/CMessageBufferTest.cc index 92d683158c..e6ba84e0cc 100644 --- a/lib/core/unittest/CMessageBufferTest.cc +++ b/lib/core/unittest/CMessageBufferTest.cc @@ -10,96 +10,73 @@ #include +CppUnit::Test* CMessageBufferTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CMessageBufferTest"); -CppUnit::Test *CMessageBufferTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CMessageBufferTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMessageBufferTest::testAll", - &CMessageBufferTest::testAll) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CMessageBufferTest::testAll", &CMessageBufferTest::testAll)); return suiteOfTests; } -namespace -{ - class CBuffer - { - public: - using TStrVec = std::vector; - - public: - CBuffer(uint32_t flushInterval) : m_FlushInterval(flushInterval) - { - } - - void addMessage(const std::string &str) - { - if((m_Buffer.size() % 1000) == 0) - { - LOG_DEBUG("Received " << m_Buffer.size() << " strings"); - } - m_Buffer.push_back(str); - } - - uint32_t flushInterval() const - { - return m_FlushInterval; - } - - ml::core_t::TTime flushMessages(TStrVec &messages) - { - LOG_DEBUG("Flush messages " << m_Buffer.size()); - - messages = m_Buffer; - - m_Buffer.clear(); - - // For time sensitive buffers, this value can provide the - // current time for example, but for this simple test it's not - // used - return 0; - } - - void flushAllMessages(TStrVec &messages) - { - this->flushMessages(messages); - } - - void processMessages(const TStrVec &messages, ml::core_t::TTime) - { - m_Results.insert(m_Results.end(), messages.begin(), messages.end()); - - LOG_DEBUG("Processed " << messages.size() << " " << m_Results.size() << " messages"); - } - - size_t size() const - { - return m_Results.size(); - } - - private: - uint32_t m_FlushInterval; - TStrVec m_Buffer; - TStrVec m_Results; - }; +namespace { +class CBuffer { +public: + using TStrVec = std::vector; + +public: + CBuffer(uint32_t flushInterval) : m_FlushInterval(flushInterval) {} + + void addMessage(const std::string& str) { + if ((m_Buffer.size() % 1000) == 0) { + LOG_DEBUG("Received " << m_Buffer.size() << " strings"); + } + m_Buffer.push_back(str); + } + + uint32_t flushInterval() const { return m_FlushInterval; } + + ml::core_t::TTime flushMessages(TStrVec& messages) { + LOG_DEBUG("Flush messages " << m_Buffer.size()); + + messages = m_Buffer; + + m_Buffer.clear(); + + // For time sensitive buffers, this value can provide the + // current time for example, but for this simple test it's not + // used + return 0; + } + + void flushAllMessages(TStrVec& messages) { this->flushMessages(messages); } + + void processMessages(const TStrVec& messages, ml::core_t::TTime) { + m_Results.insert(m_Results.end(), messages.begin(), messages.end()); + + LOG_DEBUG("Processed " << messages.size() << " " << m_Results.size() << " messages"); + } + + size_t size() const { return m_Results.size(); } + +private: + uint32_t m_FlushInterval; + TStrVec m_Buffer; + TStrVec m_Results; +}; } -void CMessageBufferTest::testAll() -{ +void CMessageBufferTest::testAll() { CBuffer buffer(10); - ml::core::CMessageBuffer queue(buffer); + ml::core::CMessageBuffer queue(buffer); CPPUNIT_ASSERT(queue.start()); - size_t max(100000); + size_t max(100000); LOG_DEBUG("Sending " << max << " strings"); - for(size_t i = 0; i < max; ++i) - { + for (size_t i = 0; i < max; ++i) { queue.addMessage("Test string"); } @@ -107,5 +84,5 @@ void CMessageBufferTest::testAll() queue.stop(); - CPPUNIT_ASSERT_EQUAL(max, buffer.size()); + CPPUNIT_ASSERT_EQUAL(max, buffer.size()); } diff --git a/lib/core/unittest/CMessageBufferTest.h b/lib/core/unittest/CMessageBufferTest.h index 41a8636801..48f56bf2e9 100644 --- a/lib/core/unittest/CMessageBufferTest.h +++ b/lib/core/unittest/CMessageBufferTest.h @@ -8,12 +8,11 @@ #include -class CMessageBufferTest : public CppUnit::TestFixture -{ - public: - void testAll(); +class CMessageBufferTest : public CppUnit::TestFixture { +public: + void testAll(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CMessageBufferTest_h diff --git a/lib/core/unittest/CMessageQueueTest.cc b/lib/core/unittest/CMessageQueueTest.cc index 1a4b3cde65..6a09a99603 100644 --- a/lib/core/unittest/CMessageQueueTest.cc +++ b/lib/core/unittest/CMessageQueueTest.cc @@ -13,63 +13,46 @@ #include +CppUnit::Test* CMessageQueueTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CMessageQueueTest"); -CppUnit::Test *CMessageQueueTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CMessageQueueTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMessageQueueTest::testSendReceive", - &CMessageQueueTest::testSendReceive) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMessageQueueTest::testTiming", - &CMessageQueueTest::testTiming) ); + suiteOfTests->addTest( + new CppUnit::TestCaller("CMessageQueueTest::testSendReceive", &CMessageQueueTest::testSendReceive)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMessageQueueTest::testTiming", &CMessageQueueTest::testTiming)); return suiteOfTests; } -namespace -{ - class CReceiver - { - public: - CReceiver(uint32_t sleepTime = 0) - : m_SleepTime(sleepTime) - { - } - - void processMsg(const std::string &str, size_t /* backlog */) - { - m_Strings.push_back(str); - if ((m_Strings.size() % 1000) == 0) - { - LOG_DEBUG("Received " << m_Strings.size() << " strings"); - } - - // Delay the processing if requested - this enables us to test - // the timing functionality - if (m_SleepTime > 0) - { - ml::core::CSleep::sleep(m_SleepTime); - } - } - - size_t size() const - { - return m_Strings.size(); - } - - private: - using TStrVec = std::vector; - - TStrVec m_Strings; - - uint32_t m_SleepTime; - }; +namespace { +class CReceiver { +public: + CReceiver(uint32_t sleepTime = 0) : m_SleepTime(sleepTime) {} + + void processMsg(const std::string& str, size_t /* backlog */) { + m_Strings.push_back(str); + if ((m_Strings.size() % 1000) == 0) { + LOG_DEBUG("Received " << m_Strings.size() << " strings"); + } + + // Delay the processing if requested - this enables us to test + // the timing functionality + if (m_SleepTime > 0) { + ml::core::CSleep::sleep(m_SleepTime); + } + } + + size_t size() const { return m_Strings.size(); } + +private: + using TStrVec = std::vector; + + TStrVec m_Strings; + + uint32_t m_SleepTime; +}; } -void CMessageQueueTest::testSendReceive() -{ +void CMessageQueueTest::testSendReceive() { CReceiver receiver; ml::core::CMessageQueue queue(receiver); @@ -80,8 +63,7 @@ void CMessageQueueTest::testSendReceive() LOG_DEBUG("Sending " << TEST_SIZE << " strings"); - for (size_t i = 0; i < TEST_SIZE; ++i) - { + for (size_t i = 0; i < TEST_SIZE; ++i) { queue.dispatchMsg("Test string"); } @@ -92,16 +74,13 @@ void CMessageQueueTest::testSendReceive() CPPUNIT_ASSERT_EQUAL(TEST_SIZE, receiver.size()); } -void CMessageQueueTest::testTiming() -{ +void CMessageQueueTest::testTiming() { // Tell the receiver to delay processing by 29ms for each item (otherwise // it will be too fast to time on a modern computer). CReceiver receiver(29); static const size_t NUM_TO_TIME(100); - ml::core::CMessageQueue queue(receiver); + ml::core::CMessageQueue queue(receiver); CPPUNIT_ASSERT(queue.start()); @@ -109,8 +88,7 @@ void CMessageQueueTest::testTiming() LOG_DEBUG("Sending " << TEST_SIZE << " strings"); - for (size_t i = 0; i < TEST_SIZE; ++i) - { + for (size_t i = 0; i < TEST_SIZE; ++i) { queue.dispatchMsg("Test string"); } @@ -121,8 +99,7 @@ void CMessageQueueTest::testTiming() CPPUNIT_ASSERT_EQUAL(TEST_SIZE, receiver.size()); double avgProcTimeSec(queue.rollingAverageProcessingTime()); - LOG_DEBUG("Average processing time per item for the last " << NUM_TO_TIME << - " items was " << avgProcTimeSec << " seconds"); + LOG_DEBUG("Average processing time per item for the last " << NUM_TO_TIME << " items was " << avgProcTimeSec << " seconds"); // The high side tolerance is greater here, because although the sleep will // make up the bulk of the processing time, there is some other processing @@ -134,4 +111,3 @@ void CMessageQueueTest::testTiming() // experience with the OS X Yosemite build VM - TODO: investigate in detail CPPUNIT_ASSERT(0.04 > avgProcTimeSec); } - diff --git a/lib/core/unittest/CMessageQueueTest.h b/lib/core/unittest/CMessageQueueTest.h index f11bbdac25..c4c5d0f7fd 100644 --- a/lib/core/unittest/CMessageQueueTest.h +++ b/lib/core/unittest/CMessageQueueTest.h @@ -8,14 +8,12 @@ #include -class CMessageQueueTest : public CppUnit::TestFixture -{ - public: - void testSendReceive(); - void testTiming(); +class CMessageQueueTest : public CppUnit::TestFixture { +public: + void testSendReceive(); + void testTiming(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CMessageQueueTest_h - diff --git a/lib/core/unittest/CMonotonicTimeTest.cc b/lib/core/unittest/CMonotonicTimeTest.cc index 345f9aeada..6b4bca0d33 100644 --- a/lib/core/unittest/CMonotonicTimeTest.cc +++ b/lib/core/unittest/CMonotonicTimeTest.cc @@ -9,23 +9,18 @@ #include #include +CppUnit::Test* CMonotonicTimeTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CMonotonicTimeTest"); -CppUnit::Test *CMonotonicTimeTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CMonotonicTimeTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMonotonicTimeTest::testMilliseconds", - &CMonotonicTimeTest::testMilliseconds) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMonotonicTimeTest::testNanoseconds", - &CMonotonicTimeTest::testNanoseconds) ); + suiteOfTests->addTest( + new CppUnit::TestCaller("CMonotonicTimeTest::testMilliseconds", &CMonotonicTimeTest::testMilliseconds)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CMonotonicTimeTest::testNanoseconds", &CMonotonicTimeTest::testNanoseconds)); return suiteOfTests; } -void CMonotonicTimeTest::testMilliseconds() -{ +void CMonotonicTimeTest::testMilliseconds() { ml::core::CMonotonicTime monoTime; uint64_t start(monoTime.milliseconds()); @@ -35,16 +30,14 @@ void CMonotonicTimeTest::testMilliseconds() uint64_t end(monoTime.milliseconds()); uint64_t diff(end - start); - LOG_DEBUG("During 1 second the monotonic millisecond timer advanced by " << - diff << " milliseconds"); + LOG_DEBUG("During 1 second the monotonic millisecond timer advanced by " << diff << " milliseconds"); // Allow 10% margin of error - this is as much for the sleep as the timer CPPUNIT_ASSERT(diff > 900); CPPUNIT_ASSERT(diff < 1100); } -void CMonotonicTimeTest::testNanoseconds() -{ +void CMonotonicTimeTest::testNanoseconds() { ml::core::CMonotonicTime monoTime; uint64_t start(monoTime.nanoseconds()); @@ -54,11 +47,9 @@ void CMonotonicTimeTest::testNanoseconds() uint64_t end(monoTime.nanoseconds()); uint64_t diff(end - start); - LOG_DEBUG("During 1 second the monotonic nanosecond timer advanced by " << - diff << " nanoseconds"); + LOG_DEBUG("During 1 second the monotonic nanosecond timer advanced by " << diff << " nanoseconds"); // Allow 10% margin of error - this is as much for the sleep as the timer CPPUNIT_ASSERT(diff > 900000000); CPPUNIT_ASSERT(diff < 1100000000); } - diff --git a/lib/core/unittest/CMonotonicTimeTest.h b/lib/core/unittest/CMonotonicTimeTest.h index c1ee19b210..b22c35a6c0 100644 --- a/lib/core/unittest/CMonotonicTimeTest.h +++ b/lib/core/unittest/CMonotonicTimeTest.h @@ -8,15 +8,12 @@ #include +class CMonotonicTimeTest : public CppUnit::TestFixture { +public: + void testMilliseconds(); + void testNanoseconds(); -class CMonotonicTimeTest : public CppUnit::TestFixture -{ - public: - void testMilliseconds(); - void testNanoseconds(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CMonotonicTimeTest_h - diff --git a/lib/core/unittest/CMutexTest.cc b/lib/core/unittest/CMutexTest.cc index f26a952834..25505af60e 100644 --- a/lib/core/unittest/CMutexTest.cc +++ b/lib/core/unittest/CMutexTest.cc @@ -7,20 +7,15 @@ #include +CppUnit::Test* CMutexTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CMutexTest"); -CppUnit::Test *CMutexTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CMutexTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMutexTest::testRecursive", - &CMutexTest::testRecursive) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CMutexTest::testRecursive", &CMutexTest::testRecursive)); return suiteOfTests; } -void CMutexTest::testRecursive() -{ +void CMutexTest::testRecursive() { ml::core::CMutex mutex; mutex.lock(); @@ -31,4 +26,3 @@ void CMutexTest::testRecursive() mutex.unlock(); mutex.unlock(); } - diff --git a/lib/core/unittest/CMutexTest.h b/lib/core/unittest/CMutexTest.h index 050f1fd754..42fbb9ec52 100644 --- a/lib/core/unittest/CMutexTest.h +++ b/lib/core/unittest/CMutexTest.h @@ -8,12 +8,11 @@ #include -class CMutexTest : public CppUnit::TestFixture -{ - public: - void testRecursive(); +class CMutexTest : public CppUnit::TestFixture { +public: + void testRecursive(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CMutexTest_h diff --git a/lib/core/unittest/CNamedPipeFactoryTest.cc b/lib/core/unittest/CNamedPipeFactoryTest.cc index 5550765c96..034cc7a714 100644 --- a/lib/core/unittest/CNamedPipeFactoryTest.cc +++ b/lib/core/unittest/CNamedPipeFactoryTest.cc @@ -18,177 +18,132 @@ #include #endif - -namespace -{ +namespace { const uint32_t SLEEP_TIME_MS = 100; const uint32_t PAUSE_TIME_MS = 10; -const size_t MAX_ATTEMPTS = 100; -const size_t TEST_SIZE = 10000; -const char TEST_CHAR = 'a'; +const size_t MAX_ATTEMPTS = 100; +const size_t TEST_SIZE = 10000; +const char TEST_CHAR = 'a'; #ifdef Windows -const char *TEST_PIPE_NAME = "\\\\.\\pipe\\testpipe"; +const char* TEST_PIPE_NAME = "\\\\.\\pipe\\testpipe"; #else -const char *TEST_PIPE_NAME = "testfiles/testpipe"; +const char* TEST_PIPE_NAME = "testfiles/testpipe"; #endif -class CThreadDataWriter : public ml::core::CThread -{ - public: - CThreadDataWriter(const std::string &fileName, size_t size) - : m_FileName(fileName), - m_Size(size) - { - } +class CThreadDataWriter : public ml::core::CThread { +public: + CThreadDataWriter(const std::string& fileName, size_t size) : m_FileName(fileName), m_Size(size) {} - protected: - virtual void run() - { - // Wait for the file to exist - ml::core::CSleep::sleep(SLEEP_TIME_MS); +protected: + virtual void run() { + // Wait for the file to exist + ml::core::CSleep::sleep(SLEEP_TIME_MS); - std::ofstream strm(m_FileName.c_str()); - for (size_t i = 0; i < m_Size && strm.good(); ++i) - { - strm << TEST_CHAR; - } + std::ofstream strm(m_FileName.c_str()); + for (size_t i = 0; i < m_Size && strm.good(); ++i) { + strm << TEST_CHAR; } + } - virtual void shutdown() - { - } + virtual void shutdown() {} - private: - std::string m_FileName; - size_t m_Size; +private: + std::string m_FileName; + size_t m_Size; }; -class CThreadDataReader : public ml::core::CThread -{ - public: - CThreadDataReader(const std::string &fileName) - : m_FileName(fileName) - { - } - - const std::string &data() const - { - return m_Data; - } - - protected: - virtual void run() - { - m_Data.clear(); - - std::ifstream strm; - - // Try to open the file repeatedly to allow time for the other - // thread to create it - size_t attempt(1); - do - { - CPPUNIT_ASSERT(attempt++ <= MAX_ATTEMPTS); - ml::core::CSleep::sleep(PAUSE_TIME_MS); - strm.open(m_FileName.c_str()); - } - while (!strm.is_open()); - - static const std::streamsize BUF_SIZE = 512; - char buffer[BUF_SIZE]; - while (strm.good()) - { - strm.read(buffer, BUF_SIZE); - CPPUNIT_ASSERT(!strm.bad()); - if (strm.gcount() > 0) - { - // This code deals with the test character we write to - // detect the short-lived connection problem on Windows - const char *copyFrom = buffer; - size_t copyLen = static_cast(strm.gcount()); - if (m_Data.empty() && - *buffer == ml::core::CNamedPipeFactory::TEST_CHAR) - { - ++copyFrom; - --copyLen; - } - if (copyLen > 0) - { - m_Data.append(copyFrom, copyLen); - } +class CThreadDataReader : public ml::core::CThread { +public: + CThreadDataReader(const std::string& fileName) : m_FileName(fileName) {} + + const std::string& data() const { return m_Data; } + +protected: + virtual void run() { + m_Data.clear(); + + std::ifstream strm; + + // Try to open the file repeatedly to allow time for the other + // thread to create it + size_t attempt(1); + do { + CPPUNIT_ASSERT(attempt++ <= MAX_ATTEMPTS); + ml::core::CSleep::sleep(PAUSE_TIME_MS); + strm.open(m_FileName.c_str()); + } while (!strm.is_open()); + + static const std::streamsize BUF_SIZE = 512; + char buffer[BUF_SIZE]; + while (strm.good()) { + strm.read(buffer, BUF_SIZE); + CPPUNIT_ASSERT(!strm.bad()); + if (strm.gcount() > 0) { + // This code deals with the test character we write to + // detect the short-lived connection problem on Windows + const char* copyFrom = buffer; + size_t copyLen = static_cast(strm.gcount()); + if (m_Data.empty() && *buffer == ml::core::CNamedPipeFactory::TEST_CHAR) { + ++copyFrom; + --copyLen; + } + if (copyLen > 0) { + m_Data.append(copyFrom, copyLen); } } } + } - virtual void shutdown() - { - } + virtual void shutdown() {} - private: - std::string m_FileName; - std::string m_Data; +private: + std::string m_FileName; + std::string m_Data; }; -class CThreadBlockCanceller : public ml::core::CThread -{ - public: - CThreadBlockCanceller(ml::core::CThread::TThreadId threadId) - : m_ThreadId(threadId) - { - } +class CThreadBlockCanceller : public ml::core::CThread { +public: + CThreadBlockCanceller(ml::core::CThread::TThreadId threadId) : m_ThreadId(threadId) {} - protected: - virtual void run() - { - // Wait for the file to exist - ml::core::CSleep::sleep(SLEEP_TIME_MS); +protected: + virtual void run() { + // Wait for the file to exist + ml::core::CSleep::sleep(SLEEP_TIME_MS); - // Cancel the open() or read() operation on the file - CPPUNIT_ASSERT(ml::core::CThread::cancelBlockedIo(m_ThreadId)); - } + // Cancel the open() or read() operation on the file + CPPUNIT_ASSERT(ml::core::CThread::cancelBlockedIo(m_ThreadId)); + } - virtual void shutdown() - { - } + virtual void shutdown() {} - private: - ml::core::CThread::TThreadId m_ThreadId; +private: + ml::core::CThread::TThreadId m_ThreadId; }; - } -CppUnit::Test *CNamedPipeFactoryTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CNamedPipeFactoryTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CNamedPipeFactoryTest::testServerIsCppReader", - &CNamedPipeFactoryTest::testServerIsCppReader) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CNamedPipeFactoryTest::testServerIsCReader", - &CNamedPipeFactoryTest::testServerIsCReader) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CNamedPipeFactoryTest::testServerIsCppWriter", - &CNamedPipeFactoryTest::testServerIsCppWriter) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CNamedPipeFactoryTest::testServerIsCWriter", - &CNamedPipeFactoryTest::testServerIsCWriter) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CNamedPipeFactoryTest::testCancelBlock", - &CNamedPipeFactoryTest::testCancelBlock) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CNamedPipeFactoryTest::testErrorIfRegularFile", - &CNamedPipeFactoryTest::testErrorIfRegularFile) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CNamedPipeFactoryTest::testErrorIfSymlink", - &CNamedPipeFactoryTest::testErrorIfSymlink) ); +CppUnit::Test* CNamedPipeFactoryTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CNamedPipeFactoryTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CNamedPipeFactoryTest::testServerIsCppReader", + &CNamedPipeFactoryTest::testServerIsCppReader)); + suiteOfTests->addTest(new CppUnit::TestCaller("CNamedPipeFactoryTest::testServerIsCReader", + &CNamedPipeFactoryTest::testServerIsCReader)); + suiteOfTests->addTest(new CppUnit::TestCaller("CNamedPipeFactoryTest::testServerIsCppWriter", + &CNamedPipeFactoryTest::testServerIsCppWriter)); + suiteOfTests->addTest(new CppUnit::TestCaller("CNamedPipeFactoryTest::testServerIsCWriter", + &CNamedPipeFactoryTest::testServerIsCWriter)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CNamedPipeFactoryTest::testCancelBlock", &CNamedPipeFactoryTest::testCancelBlock)); + suiteOfTests->addTest(new CppUnit::TestCaller("CNamedPipeFactoryTest::testErrorIfRegularFile", + &CNamedPipeFactoryTest::testErrorIfRegularFile)); + suiteOfTests->addTest(new CppUnit::TestCaller("CNamedPipeFactoryTest::testErrorIfSymlink", + &CNamedPipeFactoryTest::testErrorIfSymlink)); return suiteOfTests; } -void CNamedPipeFactoryTest::testServerIsCppReader() -{ +void CNamedPipeFactoryTest::testServerIsCppReader() { CThreadDataWriter threadWriter(TEST_PIPE_NAME, TEST_SIZE); CPPUNIT_ASSERT(threadWriter.start()); @@ -198,16 +153,13 @@ void CNamedPipeFactoryTest::testServerIsCppReader() static const std::streamsize BUF_SIZE = 512; std::string readData; char buffer[BUF_SIZE]; - do - { + do { strm->read(buffer, BUF_SIZE); CPPUNIT_ASSERT(!strm->bad()); - if (strm->gcount() > 0) - { + if (strm->gcount() > 0) { readData.append(buffer, static_cast(strm->gcount())); } - } - while (!strm->eof()); + } while (!strm->eof()); CPPUNIT_ASSERT_EQUAL(TEST_SIZE, readData.length()); CPPUNIT_ASSERT_EQUAL(std::string(TEST_SIZE, TEST_CHAR), readData); @@ -217,8 +169,7 @@ void CNamedPipeFactoryTest::testServerIsCppReader() strm.reset(); } -void CNamedPipeFactoryTest::testServerIsCReader() -{ +void CNamedPipeFactoryTest::testServerIsCReader() { CThreadDataWriter threadWriter(TEST_PIPE_NAME, TEST_SIZE); CPPUNIT_ASSERT(threadWriter.start()); @@ -228,16 +179,13 @@ void CNamedPipeFactoryTest::testServerIsCReader() static const size_t BUF_SIZE = 512; std::string readData; char buffer[BUF_SIZE]; - do - { + do { size_t charsRead = ::fread(buffer, sizeof(char), BUF_SIZE, file.get()); CPPUNIT_ASSERT(!::ferror(file.get())); - if (charsRead > 0) - { + if (charsRead > 0) { readData.append(buffer, charsRead); } - } - while (!::feof(file.get())); + } while (!::feof(file.get())); CPPUNIT_ASSERT_EQUAL(TEST_SIZE, readData.length()); CPPUNIT_ASSERT_EQUAL(std::string(TEST_SIZE, TEST_CHAR), readData); @@ -247,8 +195,7 @@ void CNamedPipeFactoryTest::testServerIsCReader() file.reset(); } -void CNamedPipeFactoryTest::testServerIsCppWriter() -{ +void CNamedPipeFactoryTest::testServerIsCppWriter() { CThreadDataReader threadReader(TEST_PIPE_NAME); CPPUNIT_ASSERT(threadReader.start()); @@ -257,10 +204,8 @@ void CNamedPipeFactoryTest::testServerIsCppWriter() size_t charsLeft(TEST_SIZE); size_t blockSize(7); - while (charsLeft > 0) - { - if (blockSize > charsLeft) - { + while (charsLeft > 0) { + if (blockSize > charsLeft) { blockSize = charsLeft; } (*strm) << std::string(blockSize, TEST_CHAR); @@ -276,8 +221,7 @@ void CNamedPipeFactoryTest::testServerIsCppWriter() CPPUNIT_ASSERT_EQUAL(std::string(TEST_SIZE, TEST_CHAR), threadReader.data()); } -void CNamedPipeFactoryTest::testServerIsCWriter() -{ +void CNamedPipeFactoryTest::testServerIsCWriter() { CThreadDataReader threadReader(TEST_PIPE_NAME); CPPUNIT_ASSERT(threadReader.start()); @@ -286,10 +230,8 @@ void CNamedPipeFactoryTest::testServerIsCWriter() size_t charsLeft(TEST_SIZE); size_t blockSize(7); - while (charsLeft > 0) - { - if (blockSize > charsLeft) - { + while (charsLeft > 0) { + if (blockSize > charsLeft) { blockSize = charsLeft; } CPPUNIT_ASSERT(::fputs(std::string(blockSize, TEST_CHAR).c_str(), file.get()) >= 0); @@ -304,8 +246,7 @@ void CNamedPipeFactoryTest::testServerIsCWriter() CPPUNIT_ASSERT_EQUAL(std::string(TEST_SIZE, TEST_CHAR), threadReader.data()); } -void CNamedPipeFactoryTest::testCancelBlock() -{ +void CNamedPipeFactoryTest::testCancelBlock() { CThreadBlockCanceller cancellerThread(ml::core::CThread::currentThreadId()); CPPUNIT_ASSERT(cancellerThread.start()); @@ -315,21 +256,19 @@ void CNamedPipeFactoryTest::testCancelBlock() CPPUNIT_ASSERT(cancellerThread.stop()); } -void CNamedPipeFactoryTest::testErrorIfRegularFile() -{ +void CNamedPipeFactoryTest::testErrorIfRegularFile() { ml::core::CNamedPipeFactory::TIStreamP strm = ml::core::CNamedPipeFactory::openPipeStreamRead("Main.cc"); CPPUNIT_ASSERT(strm == 0); } -void CNamedPipeFactoryTest::testErrorIfSymlink() -{ +void CNamedPipeFactoryTest::testErrorIfSymlink() { #ifdef Windows // It's impossible to create a symlink to a named pipe on Windows - they // live under \\.\pipe\ and it's not possible to symlink to this part of // the file system LOG_DEBUG("symlink test not relevant to Windows"); #else - static const char *TEST_SYMLINK_NAME = "test_symlink"; + static const char* TEST_SYMLINK_NAME = "test_symlink"; // Remove any files left behind by a previous failed test, but don't check // the return codes as these calls will usually fail @@ -346,4 +285,3 @@ void CNamedPipeFactoryTest::testErrorIfSymlink() CPPUNIT_ASSERT_EQUAL(0, ::unlink(TEST_PIPE_NAME)); #endif } - diff --git a/lib/core/unittest/CNamedPipeFactoryTest.h b/lib/core/unittest/CNamedPipeFactoryTest.h index c777e03f05..94bd1bea37 100644 --- a/lib/core/unittest/CNamedPipeFactoryTest.h +++ b/lib/core/unittest/CNamedPipeFactoryTest.h @@ -8,19 +8,17 @@ #include -class CNamedPipeFactoryTest : public CppUnit::TestFixture -{ - public: - void testServerIsCppReader(); - void testServerIsCReader(); - void testServerIsCppWriter(); - void testServerIsCWriter(); - void testCancelBlock(); - void testErrorIfRegularFile(); - void testErrorIfSymlink(); +class CNamedPipeFactoryTest : public CppUnit::TestFixture { +public: + void testServerIsCppReader(); + void testServerIsCReader(); + void testServerIsCppWriter(); + void testServerIsCWriter(); + void testCancelBlock(); + void testErrorIfRegularFile(); + void testErrorIfSymlink(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CNamedPipeFactoryTest_h - diff --git a/lib/core/unittest/COsFileFuncsTest.cc b/lib/core/unittest/COsFileFuncsTest.cc index 3433dc1683..90c89bd8ab 100644 --- a/lib/core/unittest/COsFileFuncsTest.cc +++ b/lib/core/unittest/COsFileFuncsTest.cc @@ -16,23 +16,16 @@ #include #endif +CppUnit::Test* COsFileFuncsTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("COsFileFuncsTest"); -CppUnit::Test *COsFileFuncsTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("COsFileFuncsTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "COsFileFuncsTest::testInode", - &COsFileFuncsTest::testInode) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "COsFileFuncsTest::testLStat", - &COsFileFuncsTest::testLStat) ); + suiteOfTests->addTest(new CppUnit::TestCaller("COsFileFuncsTest::testInode", &COsFileFuncsTest::testInode)); + suiteOfTests->addTest(new CppUnit::TestCaller("COsFileFuncsTest::testLStat", &COsFileFuncsTest::testLStat)); return suiteOfTests; } -void COsFileFuncsTest::testInode() -{ +void COsFileFuncsTest::testInode() { // Windows doesn't have inodes as such, but on NTFS we can simulate a number // that fulfils the purpose of determining when a file has been renamed and // another one with the original name has been created. @@ -44,51 +37,40 @@ void COsFileFuncsTest::testInode() ::memset(&statBuf, 0, sizeof(statBuf)); ml::core::COsFileFuncs::TIno headerDirect(0); - CPPUNIT_ASSERT_EQUAL(0, ml::core::COsFileFuncs::stat(headerFile.c_str(), - &statBuf)); + CPPUNIT_ASSERT_EQUAL(0, ml::core::COsFileFuncs::stat(headerFile.c_str(), &statBuf)); headerDirect = statBuf.st_ino; - LOG_DEBUG("Inode for " << headerFile << " from directory is " << - headerDirect); + LOG_DEBUG("Inode for " << headerFile << " from directory is " << headerDirect); ::memset(&statBuf, 0, sizeof(statBuf)); ml::core::COsFileFuncs::TIno headerOpen(0); - int headerFd(ml::core::COsFileFuncs::open(headerFile.c_str(), - ml::core::COsFileFuncs::RDONLY)); + int headerFd(ml::core::COsFileFuncs::open(headerFile.c_str(), ml::core::COsFileFuncs::RDONLY)); CPPUNIT_ASSERT(headerFd != -1); - CPPUNIT_ASSERT_EQUAL(0, ml::core::COsFileFuncs::fstat(headerFd, - &statBuf)); + CPPUNIT_ASSERT_EQUAL(0, ml::core::COsFileFuncs::fstat(headerFd, &statBuf)); CPPUNIT_ASSERT_EQUAL(0, ml::core::COsFileFuncs::close(headerFd)); headerOpen = statBuf.st_ino; - LOG_DEBUG("Inode for " << headerFile << " from open file is " << - headerOpen); + LOG_DEBUG("Inode for " << headerFile << " from open file is " << headerOpen); ::memset(&statBuf, 0, sizeof(statBuf)); ml::core::COsFileFuncs::TIno implDirect(0); - CPPUNIT_ASSERT_EQUAL(0, ml::core::COsFileFuncs::stat(implFile.c_str(), - &statBuf)); + CPPUNIT_ASSERT_EQUAL(0, ml::core::COsFileFuncs::stat(implFile.c_str(), &statBuf)); implDirect = statBuf.st_ino; - LOG_DEBUG("Inode for " << implFile << " from directory is " << - implDirect); + LOG_DEBUG("Inode for " << implFile << " from directory is " << implDirect); ::memset(&statBuf, 0, sizeof(statBuf)); ml::core::COsFileFuncs::TIno implOpen(0); - int implFd(ml::core::COsFileFuncs::open(implFile.c_str(), - ml::core::COsFileFuncs::RDONLY)); + int implFd(ml::core::COsFileFuncs::open(implFile.c_str(), ml::core::COsFileFuncs::RDONLY)); CPPUNIT_ASSERT(implFd != -1); - CPPUNIT_ASSERT_EQUAL(0, ml::core::COsFileFuncs::fstat(implFd, - &statBuf)); + CPPUNIT_ASSERT_EQUAL(0, ml::core::COsFileFuncs::fstat(implFd, &statBuf)); CPPUNIT_ASSERT_EQUAL(0, ml::core::COsFileFuncs::close(implFd)); implOpen = statBuf.st_ino; - LOG_DEBUG("Inode for " << implFile << " from open file is " << - implOpen); + LOG_DEBUG("Inode for " << implFile << " from open file is " << implOpen); CPPUNIT_ASSERT_EQUAL(headerDirect, headerOpen); CPPUNIT_ASSERT_EQUAL(implDirect, implOpen); CPPUNIT_ASSERT(implDirect != headerDirect); } -void COsFileFuncsTest::testLStat() -{ +void COsFileFuncsTest::testLStat() { std::string file("Main.cc"); std::string symLink("Main.symlink.cc"); @@ -103,17 +85,14 @@ void COsFileFuncsTest::testLStat() LOG_WARN("Skipping lstat() test as it would need to run as administrator"); #else #ifdef Windows - CPPUNIT_ASSERT(CreateSymbolicLink(symLink.c_str(), - file.c_str(), - SYMBOLIC_LINK_FLAG_ALLOW_UNPRIVILEGED_CREATE) != FALSE); + CPPUNIT_ASSERT(CreateSymbolicLink(symLink.c_str(), file.c_str(), SYMBOLIC_LINK_FLAG_ALLOW_UNPRIVILEGED_CREATE) != FALSE); #else CPPUNIT_ASSERT_EQUAL(0, ::symlink(file.c_str(), symLink.c_str())); #endif ml::core::COsFileFuncs::TStat statBuf; ::memset(&statBuf, 0, sizeof(statBuf)); - CPPUNIT_ASSERT_EQUAL(0, ml::core::COsFileFuncs::lstat(symLink.c_str(), - &statBuf)); + CPPUNIT_ASSERT_EQUAL(0, ml::core::COsFileFuncs::lstat(symLink.c_str(), &statBuf)); // Windows doesn't have a flag for symlinks, so just assert that lstat() // doesn't think the link is one of the other types of file system object CPPUNIT_ASSERT((statBuf.st_mode & S_IFMT) != S_IFREG); @@ -123,10 +102,8 @@ void COsFileFuncsTest::testLStat() // Due to the way this test is structured, the link should have been created // in the last few seconds (but the linked file, Main.cc, could be older) ml::core_t::TTime now = ml::core::CTimeUtils::now(); - LOG_INFO("now: " << now << - ", symlink create time: " << statBuf.st_ctime << - ", symlink modification time: " << statBuf.st_mtime << - ", symlink access time: " << statBuf.st_atime); + LOG_INFO("now: " << now << ", symlink create time: " << statBuf.st_ctime << ", symlink modification time: " << statBuf.st_mtime + << ", symlink access time: " << statBuf.st_atime); CPPUNIT_ASSERT(statBuf.st_ctime > now - 3); CPPUNIT_ASSERT(statBuf.st_mtime > now - 3); CPPUNIT_ASSERT(statBuf.st_atime > now - 3); @@ -134,4 +111,3 @@ void COsFileFuncsTest::testLStat() CPPUNIT_ASSERT_EQUAL(0, ::remove(symLink.c_str())); #endif } - diff --git a/lib/core/unittest/COsFileFuncsTest.h b/lib/core/unittest/COsFileFuncsTest.h index 5015b0bb14..52ea614f9f 100644 --- a/lib/core/unittest/COsFileFuncsTest.h +++ b/lib/core/unittest/COsFileFuncsTest.h @@ -8,15 +8,12 @@ #include +class COsFileFuncsTest : public CppUnit::TestFixture { +public: + void testInode(); + void testLStat(); -class COsFileFuncsTest : public CppUnit::TestFixture -{ - public: - void testInode(); - void testLStat(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_COsFileFuncsTest_h - diff --git a/lib/core/unittest/CPatternSetTest.cc b/lib/core/unittest/CPatternSetTest.cc index 1a735ce4fe..db45f21f85 100644 --- a/lib/core/unittest/CPatternSetTest.cc +++ b/lib/core/unittest/CPatternSetTest.cc @@ -8,71 +8,54 @@ #include #include - using namespace ml; using namespace core; -CppUnit::Test *CPatternSetTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CPatternSetTest"); - - suiteOfTests->addTest(new CppUnit::TestCaller( - "CPatternSetTest::testInitFromJson_GivenInvalidJson", - &CPatternSetTest::testInitFromJson_GivenInvalidJson) ); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CPatternSetTest::testInitFromJson_GivenNonArray", - &CPatternSetTest::testInitFromJson_GivenNonArray) ); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CPatternSetTest::testInitFromJson_GivenArrayWithNonStringItem", - &CPatternSetTest::testInitFromJson_GivenArrayWithNonStringItem) ); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CPatternSetTest::testInitFromJson_GivenArrayWithDuplicates", - &CPatternSetTest::testInitFromJson_GivenArrayWithDuplicates) ); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CPatternSetTest::testContains_GivenFullMatchKeys", - &CPatternSetTest::testContains_GivenFullMatchKeys) ); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CPatternSetTest::testContains_GivenPrefixKeys", - &CPatternSetTest::testContains_GivenPrefixKeys) ); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CPatternSetTest::testContains_GivenSuffixKeys", - &CPatternSetTest::testContains_GivenSuffixKeys) ); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CPatternSetTest::testContains_GivenContainsKeys", - &CPatternSetTest::testContains_GivenContainsKeys) ); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CPatternSetTest::testContains_GivenMixedKeys", - &CPatternSetTest::testContains_GivenMixedKeys) ); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CPatternSetTest::testClear", - &CPatternSetTest::testClear) ); +CppUnit::Test* CPatternSetTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CPatternSetTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CPatternSetTest::testInitFromJson_GivenInvalidJson", + &CPatternSetTest::testInitFromJson_GivenInvalidJson)); + suiteOfTests->addTest(new CppUnit::TestCaller("CPatternSetTest::testInitFromJson_GivenNonArray", + &CPatternSetTest::testInitFromJson_GivenNonArray)); + suiteOfTests->addTest(new CppUnit::TestCaller("CPatternSetTest::testInitFromJson_GivenArrayWithNonStringItem", + &CPatternSetTest::testInitFromJson_GivenArrayWithNonStringItem)); + suiteOfTests->addTest(new CppUnit::TestCaller("CPatternSetTest::testInitFromJson_GivenArrayWithDuplicates", + &CPatternSetTest::testInitFromJson_GivenArrayWithDuplicates)); + suiteOfTests->addTest(new CppUnit::TestCaller("CPatternSetTest::testContains_GivenFullMatchKeys", + &CPatternSetTest::testContains_GivenFullMatchKeys)); + suiteOfTests->addTest(new CppUnit::TestCaller("CPatternSetTest::testContains_GivenPrefixKeys", + &CPatternSetTest::testContains_GivenPrefixKeys)); + suiteOfTests->addTest(new CppUnit::TestCaller("CPatternSetTest::testContains_GivenSuffixKeys", + &CPatternSetTest::testContains_GivenSuffixKeys)); + suiteOfTests->addTest(new CppUnit::TestCaller("CPatternSetTest::testContains_GivenContainsKeys", + &CPatternSetTest::testContains_GivenContainsKeys)); + suiteOfTests->addTest(new CppUnit::TestCaller("CPatternSetTest::testContains_GivenMixedKeys", + &CPatternSetTest::testContains_GivenMixedKeys)); + suiteOfTests->addTest(new CppUnit::TestCaller("CPatternSetTest::testClear", &CPatternSetTest::testClear)); return suiteOfTests; } -void CPatternSetTest::testInitFromJson_GivenInvalidJson() -{ +void CPatternSetTest::testInitFromJson_GivenInvalidJson() { std::string json("[\"foo\""); CPatternSet set; CPPUNIT_ASSERT(set.initFromJson(json) == false); } -void CPatternSetTest::testInitFromJson_GivenNonArray() -{ +void CPatternSetTest::testInitFromJson_GivenNonArray() { std::string json("{}"); CPatternSet set; CPPUNIT_ASSERT(set.initFromJson(json) == false); } -void CPatternSetTest::testInitFromJson_GivenArrayWithNonStringItem() -{ +void CPatternSetTest::testInitFromJson_GivenArrayWithNonStringItem() { std::string json("[\"foo\", 3]"); CPatternSet set; CPPUNIT_ASSERT(set.initFromJson(json) == false); } -void CPatternSetTest::testInitFromJson_GivenArrayWithDuplicates() -{ +void CPatternSetTest::testInitFromJson_GivenArrayWithDuplicates() { std::string json("[\"foo\",\"foo\", \"bar\", \"bar\"]"); CPatternSet set; @@ -82,8 +65,7 @@ void CPatternSetTest::testInitFromJson_GivenArrayWithDuplicates() CPPUNIT_ASSERT(set.contains("bar")); } -void CPatternSetTest::testContains_GivenFullMatchKeys() -{ +void CPatternSetTest::testContains_GivenFullMatchKeys() { std::string json("[\"foo\",\"bar\"]"); CPatternSet set; @@ -94,8 +76,7 @@ void CPatternSetTest::testContains_GivenFullMatchKeys() CPPUNIT_ASSERT(set.contains("nonItem") == false); } -void CPatternSetTest::testContains_GivenPrefixKeys() -{ +void CPatternSetTest::testContains_GivenPrefixKeys() { std::string json("[\"abc*\", \"foo*\"]"); CPatternSet set; @@ -109,8 +90,7 @@ void CPatternSetTest::testContains_GivenPrefixKeys() CPPUNIT_ASSERT(set.contains("_foo") == false); } -void CPatternSetTest::testContains_GivenSuffixKeys() -{ +void CPatternSetTest::testContains_GivenSuffixKeys() { std::string json("[\"*xyz\", \"*foo\"]"); CPatternSet set; @@ -124,8 +104,7 @@ void CPatternSetTest::testContains_GivenSuffixKeys() CPPUNIT_ASSERT(set.contains("foo_") == false); } -void CPatternSetTest::testContains_GivenContainsKeys() -{ +void CPatternSetTest::testContains_GivenContainsKeys() { std::string json("[\"*foo*\", \"*456*\"]"); CPatternSet set; @@ -143,8 +122,7 @@ void CPatternSetTest::testContains_GivenContainsKeys() CPPUNIT_ASSERT(set.contains("12346789") == false); } -void CPatternSetTest::testContains_GivenMixedKeys() -{ +void CPatternSetTest::testContains_GivenMixedKeys() { std::string json("[\"foo\", \"foo*\", \"*foo\", \"*foo*\"]"); CPatternSet set; @@ -157,8 +135,7 @@ void CPatternSetTest::testContains_GivenMixedKeys() CPPUNIT_ASSERT(set.contains("fo") == false); } -void CPatternSetTest::testClear() -{ +void CPatternSetTest::testClear() { std::string json("[\"foo\"]"); CPatternSet set; diff --git a/lib/core/unittest/CPatternSetTest.h b/lib/core/unittest/CPatternSetTest.h index c8715b9cfd..9af6431160 100644 --- a/lib/core/unittest/CPatternSetTest.h +++ b/lib/core/unittest/CPatternSetTest.h @@ -8,21 +8,20 @@ #include -class CPatternSetTest : public CppUnit::TestFixture -{ - public: - void testInitFromJson_GivenInvalidJson(); - void testInitFromJson_GivenNonArray(); - void testInitFromJson_GivenArrayWithNonStringItem(); - void testInitFromJson_GivenArrayWithDuplicates(); - void testContains_GivenFullMatchKeys(); - void testContains_GivenPrefixKeys(); - void testContains_GivenSuffixKeys(); - void testContains_GivenContainsKeys(); - void testContains_GivenMixedKeys(); - void testClear(); +class CPatternSetTest : public CppUnit::TestFixture { +public: + void testInitFromJson_GivenInvalidJson(); + void testInitFromJson_GivenNonArray(); + void testInitFromJson_GivenArrayWithNonStringItem(); + void testInitFromJson_GivenArrayWithDuplicates(); + void testContains_GivenFullMatchKeys(); + void testContains_GivenPrefixKeys(); + void testContains_GivenSuffixKeys(); + void testContains_GivenContainsKeys(); + void testContains_GivenMixedKeys(); + void testClear(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CPatternSetTest_h diff --git a/lib/core/unittest/CPersistUtilsTest.cc b/lib/core/unittest/CPersistUtilsTest.cc index 9b799be65b..b2b35c40c7 100644 --- a/lib/core/unittest/CPersistUtilsTest.cc +++ b/lib/core/unittest/CPersistUtilsTest.cc @@ -7,14 +7,14 @@ #include "CPersistUtilsTest.h" #include -#include #include #include +#include #include #include -#include #include +#include #include #include @@ -36,145 +36,114 @@ using TSizeUSet = boost::unordered_set; using TSizeUSetVec = std::vector; using TSizeDoublePrBuf = boost::circular_buffer; -namespace -{ +namespace { class BasicCompare {}; class ContainerCompare {}; template -struct enable_if_type -{ +struct enable_if_type { using type = R; }; template -struct compare_container_selector -{ +struct compare_container_selector { using value = BasicCompare; }; template -struct compare_container_selector::type> -{ +struct compare_container_selector::type> { using value = ContainerCompare; }; -template class CCompareImpl {}; +template +class CCompareImpl {}; //! Convenience function to select implementation. template -bool compare(const T &lhs, const T &rhs) -{ +bool compare(const T& lhs, const T& rhs) { return CCompareImpl::value>::dispatch(lhs, rhs); } -struct SFirstLess -{ +struct SFirstLess { template - inline bool operator()(const std::pair &lhs, - const std::pair &rhs) const - { + inline bool operator()(const std::pair& lhs, const std::pair& rhs) const { return lhs.first < rhs.first; } }; -struct SEqual -{ - bool operator()(double lhs, double rhs) const - { - return std::fabs(lhs - rhs) <= 1e-5 * std::max(std::fabs(lhs), std::fabs(rhs)); - } +struct SEqual { + bool operator()(double lhs, double rhs) const { return std::fabs(lhs - rhs) <= 1e-5 * std::max(std::fabs(lhs), std::fabs(rhs)); } template - bool operator()(T lhs, T rhs) const - { + bool operator()(T lhs, T rhs) const { return this->operator()(static_cast(lhs), static_cast(rhs)); } - bool operator()(const TSizeDoublePr &lhs, - const TSizeDoublePr &rhs) - { - return lhs.first == rhs.first - && this->operator()(lhs.second, rhs.second); + bool operator()(const TSizeDoublePr& lhs, const TSizeDoublePr& rhs) { + return lhs.first == rhs.first && this->operator()(lhs.second, rhs.second); } template - bool operator()(const std::pair &lhs, const std::pair &rhs) - { + bool operator()(const std::pair& lhs, const std::pair& rhs) { return compare(lhs.first, rhs.first) && compare(lhs.second, rhs.second); } }; - template<> -class CCompareImpl -{ - public: - template - static bool dispatch(const T &lhs, const T &rhs) - { - SEqual eq; - return eq(lhs, rhs); - } +class CCompareImpl { +public: + template + static bool dispatch(const T& lhs, const T& rhs) { + SEqual eq; + return eq(lhs, rhs); + } }; template<> -class CCompareImpl -{ - public: - template - static bool dispatch(const T &lhs, const T &rhs) - { - using TCItr = typename T::const_iterator; - if (lhs.size() != rhs.size()) - { +class CCompareImpl { +public: + template + static bool dispatch(const T& lhs, const T& rhs) { + using TCItr = typename T::const_iterator; + if (lhs.size() != rhs.size()) { + return false; + } + for (TCItr i = lhs.begin(), j = rhs.begin(); i != lhs.end(); ++i, ++j) { + if (!compare(*i, *j)) { return false; } - for (TCItr i = lhs.begin(), j = rhs.begin(); i != lhs.end(); ++i, ++j) - { - if (!compare(*i, *j)) - { - return false; - } - } - return true; } + return true; + } - template - static bool dispatch(const boost::unordered_map &lhs, - const boost::unordered_map &rhs) - { - using TVec = std::vector>; - TVec lKeys(lhs.begin(), lhs.end()); - TVec rKeys(rhs.begin(), rhs.end()); - std::sort(lKeys.begin(), lKeys.end(), SFirstLess()); - std::sort(rKeys.begin(), rKeys.end(), SFirstLess()); - return compare(lKeys, rKeys); - } + template + static bool dispatch(const boost::unordered_map& lhs, const boost::unordered_map& rhs) { + using TVec = std::vector>; + TVec lKeys(lhs.begin(), lhs.end()); + TVec rKeys(rhs.begin(), rhs.end()); + std::sort(lKeys.begin(), lKeys.end(), SFirstLess()); + std::sort(rKeys.begin(), rKeys.end(), SFirstLess()); + return compare(lKeys, rKeys); + } - template - static bool dispatch(const boost::unordered_set &lhs, - const boost::unordered_set &rhs) - { - using TVec = std::vector; - TVec lKeys(lhs.begin(), lhs.end()); - TVec rKeys(rhs.begin(), rhs.end()); - std::sort(lKeys.begin(), lKeys.end()); - std::sort(rKeys.begin(), rKeys.end()); - return compare(lKeys, rKeys); - } + template + static bool dispatch(const boost::unordered_set& lhs, const boost::unordered_set& rhs) { + using TVec = std::vector; + TVec lKeys(lhs.begin(), lhs.end()); + TVec rKeys(rhs.begin(), rhs.end()); + std::sort(lKeys.begin(), lKeys.end()); + std::sort(rKeys.begin(), rKeys.end()); + return compare(lKeys, rKeys); + } }; template -bool equal(const T &lhs, const T &rhs) -{ - return lhs.size() == rhs.size() - && std::equal(lhs.begin(), lhs.end(), rhs.begin(), SEqual()); +bool equal(const T& lhs, const T& rhs) { + return lhs.size() == rhs.size() && std::equal(lhs.begin(), lhs.end(), rhs.begin(), SEqual()); } template -void testPersistRestore(const T &collection, const T &initial = T()) -{ +void testPersistRestore(const T& collection, const T& initial = T()) { const std::string tag("baseTag"); std::stringstream origSs; { @@ -191,7 +160,7 @@ void testPersistRestore(const T &collection, const T &initial = T()) } LOG_TRACE(" - doing persist again " << typeid(T).name()); { - const T &restoredRef = restored; + const T& restoredRef = restored; core::CJsonStatePersistInserter inserter(restoredSs); core::CPersistUtils::persist(tag, restoredRef, inserter); } @@ -200,11 +169,9 @@ void testPersistRestore(const T &collection, const T &initial = T()) CPPUNIT_ASSERT_EQUAL(origSs.str(), restoredSs.str()); CPPUNIT_ASSERT(compare(collection, restored)); } - } -void CPersistUtilsTest::testPersistContainers() -{ +void CPersistUtilsTest::testPersistContainers() { // 1) Check that persistence and restoration is idempotent. // 2) Check some edge cases. // 3) Test failures. @@ -295,8 +262,7 @@ void CPersistUtilsTest::testPersistContainers() set.insert(4); testPersistRestore(set); } - LOG_DEBUG("*** nested ***") - { + LOG_DEBUG("*** nested ***") { TDoubleVecVec vec(3); vec[0].push_back(22.22); vec[0].push_back(3456245); @@ -338,7 +304,6 @@ void CPersistUtilsTest::testPersistContainers() testPersistRestore(moreStrs); - TSizeUSetVec collection(5); collection[0].insert(1); collection[0].insert(3); @@ -457,10 +422,7 @@ void CPersistUtilsTest::testPersistContainers() CPPUNIT_ASSERT(!core::CPersistUtils::fromString(bad, collection)); CPPUNIT_ASSERT(collection.empty()); - bad = std::string("etjdjk") - + core::CPersistUtils::PAIR_DELIMITER - + "2.3" - + core::CPersistUtils::DELIMITER + bad; + bad = std::string("etjdjk") + core::CPersistUtils::PAIR_DELIMITER + "2.3" + core::CPersistUtils::DELIMITER + bad; CPPUNIT_ASSERT(!core::CPersistUtils::fromString(bad, collection)); CPPUNIT_ASSERT(collection.empty()); } @@ -474,15 +436,13 @@ void CPersistUtilsTest::testPersistContainers() } } -void CPersistUtilsTest::testPersistIterators() -{ +void CPersistUtilsTest::testPersistIterators() { // Persist only a sub set of a collection { LOG_DEBUG("*** vector range ***"); TDoubleVec collection; - for (int i=0; i<20; i++) - { + for (int i = 0; i < 20; i++) { collection.push_back(i); } @@ -496,13 +456,11 @@ void CPersistUtilsTest::testPersistIterators() core::CPersistUtils::fromString(state, restored); TDoubleVec firstTen; - for (int i=0; i<10; i++) - { + for (int i = 0; i < 10; i++) { firstTen.push_back(i); } CPPUNIT_ASSERT(equal(firstTen, restored)); - TDoubleVec::iterator fifth = collection.begin() + 5; TDoubleVec::iterator tenth = collection.begin() + 10; @@ -513,23 +471,20 @@ void CPersistUtilsTest::testPersistIterators() core::CPersistUtils::fromString(state, restored); TDoubleVec fithToTenth; - for (int i=5; i<10; i++) - { + for (int i = 5; i < 10; i++) { fithToTenth.push_back(i); } CPPUNIT_ASSERT(equal(fithToTenth, restored)); } } -void CPersistUtilsTest::testAppend() -{ +void CPersistUtilsTest::testAppend() { // Persist only a sub set of a collection { LOG_DEBUG("*** vector append ***"); TDoubleVec source; - for (int i=0; i<9; i++) - { + for (int i = 0; i < 9; i++) { source.push_back(i); } @@ -539,12 +494,10 @@ void CPersistUtilsTest::testAppend() core::CPersistUtils::fromString(state, restored); CPPUNIT_ASSERT(equal(source, restored)); - for (int i=9; i<15; i++) - { + for (int i = 9; i < 15; i++) { source.push_back(i); } - TDoubleVec::iterator begin = source.begin() + 9; TDoubleVec::iterator end = source.begin() + 15; @@ -552,12 +505,10 @@ void CPersistUtilsTest::testAppend() CPPUNIT_ASSERT(begin == end); LOG_DEBUG("state = " << state); - core::CPersistUtils::fromString(state, restored, core::CPersistUtils::DELIMITER, - core::CPersistUtils::PAIR_DELIMITER, true); + core::CPersistUtils::fromString(state, restored, core::CPersistUtils::DELIMITER, core::CPersistUtils::PAIR_DELIMITER, true); CPPUNIT_ASSERT(equal(source, restored)); - for (int i=15; i<19; i++) - { + for (int i = 15; i < 19; i++) { source.push_back(i); } @@ -568,12 +519,10 @@ void CPersistUtilsTest::testAppend() CPPUNIT_ASSERT(begin == end); LOG_DEBUG("state = " << state); - core::CPersistUtils::fromString(state, restored, core::CPersistUtils::DELIMITER, - core::CPersistUtils::PAIR_DELIMITER, true); + core::CPersistUtils::fromString(state, restored, core::CPersistUtils::DELIMITER, core::CPersistUtils::PAIR_DELIMITER, true); CPPUNIT_ASSERT(equal(source, restored)); } - { LOG_DEBUG("*** map append ***"); @@ -582,30 +531,25 @@ void CPersistUtilsTest::testAppend() LOG_DEBUG("state = " << state); TSizeDoubleMap restored; - core::CPersistUtils::fromString(state, restored, core::CPersistUtils::DELIMITER, - core::CPersistUtils::PAIR_DELIMITER, true); + core::CPersistUtils::fromString(state, restored, core::CPersistUtils::DELIMITER, core::CPersistUtils::PAIR_DELIMITER, true); CPPUNIT_ASSERT(equal(collection, restored)); - - for (int i=0; i<10; i++) - { + for (int i = 0; i < 10; i++) { collection.insert(TSizeDoublePr(i, 3.2)); } state = core::CPersistUtils::toString(collection); LOG_DEBUG("state = " << state); - core::CPersistUtils::fromString(state, restored, core::CPersistUtils::DELIMITER, - core::CPersistUtils::PAIR_DELIMITER, true); + core::CPersistUtils::fromString(state, restored, core::CPersistUtils::DELIMITER, core::CPersistUtils::PAIR_DELIMITER, true); CPPUNIT_ASSERT(equal(collection, restored)); // add another element - std::pair pr = collection.insert(TSizeDoublePr(14, 1.0)); + std::pair pr = collection.insert(TSizeDoublePr(14, 1.0)); TSizeDoubleMap::iterator end = collection.end(); state = core::CPersistUtils::toString(pr.first, end); LOG_DEBUG("state = " << state); - core::CPersistUtils::fromString(state, restored, core::CPersistUtils::DELIMITER, - core::CPersistUtils::PAIR_DELIMITER, true); + core::CPersistUtils::fromString(state, restored, core::CPersistUtils::DELIMITER, core::CPersistUtils::PAIR_DELIMITER, true); CPPUNIT_ASSERT(equal(collection, restored)); pr = collection.insert(TSizeDoublePr(20, 158.0)); @@ -614,26 +558,19 @@ void CPersistUtilsTest::testAppend() end = collection.end(); state = core::CPersistUtils::toString(pr.first, end); LOG_DEBUG("state = " << state); - core::CPersistUtils::fromString(state, restored, core::CPersistUtils::DELIMITER, - core::CPersistUtils::PAIR_DELIMITER, true); + core::CPersistUtils::fromString(state, restored, core::CPersistUtils::DELIMITER, core::CPersistUtils::PAIR_DELIMITER, true); CPPUNIT_ASSERT(equal(collection, restored)); } } -CppUnit::Test *CPersistUtilsTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CPersistUtilsTest"); +CppUnit::Test* CPersistUtilsTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CPersistUtilsTest"); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CPersistUtilsTest::testPersistContainers", - &CPersistUtilsTest::testPersistContainers) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CPersistUtilsTest::testPersistIterators", - &CPersistUtilsTest::testPersistIterators) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CPersistUtilsTest::testAppend", - &CPersistUtilsTest::testAppend) ); + suiteOfTests->addTest( + new CppUnit::TestCaller("CPersistUtilsTest::testPersistContainers", &CPersistUtilsTest::testPersistContainers)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CPersistUtilsTest::testPersistIterators", &CPersistUtilsTest::testPersistIterators)); + suiteOfTests->addTest(new CppUnit::TestCaller("CPersistUtilsTest::testAppend", &CPersistUtilsTest::testAppend)); return suiteOfTests; } - diff --git a/lib/core/unittest/CPersistUtilsTest.h b/lib/core/unittest/CPersistUtilsTest.h index c085551b91..dee0bac219 100644 --- a/lib/core/unittest/CPersistUtilsTest.h +++ b/lib/core/unittest/CPersistUtilsTest.h @@ -9,14 +9,13 @@ #include -class CPersistUtilsTest : public CppUnit::TestFixture -{ - public: - void testPersistContainers(); - void testPersistIterators(); - void testAppend(); +class CPersistUtilsTest : public CppUnit::TestFixture { +public: + void testPersistContainers(); + void testPersistIterators(); + void testAppend(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CPersistUtilsTest_h diff --git a/lib/core/unittest/CPolymorphicStackObjectCPtrTest.cc b/lib/core/unittest/CPolymorphicStackObjectCPtrTest.cc index 44d52c3329..d021a3ca4b 100644 --- a/lib/core/unittest/CPolymorphicStackObjectCPtrTest.cc +++ b/lib/core/unittest/CPolymorphicStackObjectCPtrTest.cc @@ -12,44 +12,36 @@ using namespace ml; -namespace -{ - -class CBase -{ - public: - virtual ~CBase() {} - virtual std::string iam() const = 0; -}; +namespace { -class CDerived1 : public CBase -{ - public: - virtual std::string iam() const { return "d1"; } +class CBase { +public: + virtual ~CBase() {} + virtual std::string iam() const = 0; }; -class CDerived2 : public CBase -{ - public: - virtual std::string iam() const { return "d2"; } +class CDerived1 : public CBase { +public: + virtual std::string iam() const { return "d1"; } }; -class CDerived3 : public CBase -{ - public: - virtual std::string iam() const { return "d3"; } +class CDerived2 : public CBase { +public: + virtual std::string iam() const { return "d2"; } }; -class CDerived4 : public CBase -{ - public: - virtual std::string iam() const { return "d4"; } +class CDerived3 : public CBase { +public: + virtual std::string iam() const { return "d3"; } }; +class CDerived4 : public CBase { +public: + virtual std::string iam() const { return "d4"; } +}; } -void CPolymorphicStackObjectCPtrTest::testAll() -{ +void CPolymorphicStackObjectCPtrTest::testAll() { using TStackPtr12 = core::CPolymorphicStackObjectCPtr; using TStackPtr1234 = core::CPolymorphicStackObjectCPtr; @@ -82,13 +74,11 @@ void CPolymorphicStackObjectCPtrTest::testAll() CPPUNIT_ASSERT(!null); } -CppUnit::Test *CPolymorphicStackObjectCPtrTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CPolymorphicStackObjectCPtrTest"); +CppUnit::Test* CPolymorphicStackObjectCPtrTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CPolymorphicStackObjectCPtrTest"); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CPolymorphicStackObjectCPtrTest::testAll", - &CPolymorphicStackObjectCPtrTest::testAll) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CPolymorphicStackObjectCPtrTest::testAll", + &CPolymorphicStackObjectCPtrTest::testAll)); return suiteOfTests; } diff --git a/lib/core/unittest/CPolymorphicStackObjectCPtrTest.h b/lib/core/unittest/CPolymorphicStackObjectCPtrTest.h index 7b84e2a859..4e1c75bd35 100644 --- a/lib/core/unittest/CPolymorphicStackObjectCPtrTest.h +++ b/lib/core/unittest/CPolymorphicStackObjectCPtrTest.h @@ -9,12 +9,11 @@ #include -class CPolymorphicStackObjectCPtrTest : public CppUnit::TestFixture -{ - public: - void testAll(); +class CPolymorphicStackObjectCPtrTest : public CppUnit::TestFixture { +public: + void testAll(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CPolymorphicStackObjectCPtrTest_h diff --git a/lib/core/unittest/CProcessPriorityTest.cc b/lib/core/unittest/CProcessPriorityTest.cc index 0e2f6846a3..afc2de5765 100644 --- a/lib/core/unittest/CProcessPriorityTest.cc +++ b/lib/core/unittest/CProcessPriorityTest.cc @@ -7,20 +7,15 @@ #include +CppUnit::Test* CProcessPriorityTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CProcessPriorityTest"); -CppUnit::Test *CProcessPriorityTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CProcessPriorityTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CProcessPriorityTest::testReducePriority", - &CProcessPriorityTest::testReducePriority) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CProcessPriorityTest::testReducePriority", + &CProcessPriorityTest::testReducePriority)); return suiteOfTests; } -void CProcessPriorityTest::testReducePriority() -{ +void CProcessPriorityTest::testReducePriority() { ml::core::CProcessPriority::reducePriority(); } - diff --git a/lib/core/unittest/CProcessPriorityTest.h b/lib/core/unittest/CProcessPriorityTest.h index 669104b2c2..0acb73c710 100644 --- a/lib/core/unittest/CProcessPriorityTest.h +++ b/lib/core/unittest/CProcessPriorityTest.h @@ -8,14 +8,11 @@ #include +class CProcessPriorityTest : public CppUnit::TestFixture { +public: + void testReducePriority(); -class CProcessPriorityTest : public CppUnit::TestFixture -{ - public: - void testReducePriority(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CProcessPriorityTest_h - diff --git a/lib/core/unittest/CProcessPriorityTest_Linux.cc b/lib/core/unittest/CProcessPriorityTest_Linux.cc index b811dd3b78..78887bf521 100644 --- a/lib/core/unittest/CProcessPriorityTest_Linux.cc +++ b/lib/core/unittest/CProcessPriorityTest_Linux.cc @@ -15,19 +15,15 @@ #include #include +namespace { -namespace -{ - -bool readFromSystemFile(const std::string &fileName, std::string &content) -{ - char buffer[16] = { '\0' }; +bool readFromSystemFile(const std::string& fileName, std::string& content) { + char buffer[16] = {'\0'}; // Use low level functions to read rather than C++ wrappers, as these are // system files. int fd = ::open(fileName.c_str(), O_RDONLY); - if (fd == -1) - { + if (fd == -1) { LOG_INFO("Could not open " << fileName << ": " << ::strerror(errno)); return false; } @@ -35,14 +31,12 @@ bool readFromSystemFile(const std::string &fileName, std::string &content) ssize_t bytesRead = ::read(fd, buffer, sizeof(buffer)); ::close(fd); - if (bytesRead < 0) - { + if (bytesRead < 0) { LOG_ERROR("Error reading from " << fileName << ": " << ::strerror(errno)); return false; } - if (bytesRead == 0) - { + if (bytesRead == 0) { LOG_WARN("Read nothing from " << fileName); return false; } @@ -52,48 +46,37 @@ bool readFromSystemFile(const std::string &fileName, std::string &content) return true; } - } +CppUnit::Test* CProcessPriorityTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CProcessPriorityTest"); -CppUnit::Test *CProcessPriorityTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CProcessPriorityTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CProcessPriorityTest::testReducePriority", - &CProcessPriorityTest::testReducePriority) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CProcessPriorityTest::testReducePriority", + &CProcessPriorityTest::testReducePriority)); return suiteOfTests; } -void CProcessPriorityTest::testReducePriority() -{ +void CProcessPriorityTest::testReducePriority() { ml::core::CProcessPriority::reducePriority(); bool readFromOneOrOther(false); std::string content; - if (readFromSystemFile("/proc/self/oom_score_adj", content) == true) - { + if (readFromSystemFile("/proc/self/oom_score_adj", content) == true) { CPPUNIT_ASSERT_EQUAL(std::string("667"), content); readFromOneOrOther = true; } - if (readFromSystemFile("/proc/self/oom_adj", content) == true) - { - if (readFromOneOrOther) - { - LOG_DEBUG("oom_score_adj 667 corresponds to oom_adj " << content << - " on kernel " << ml::core::CUname::release()); + if (readFromSystemFile("/proc/self/oom_adj", content) == true) { + if (readFromOneOrOther) { + LOG_DEBUG("oom_score_adj 667 corresponds to oom_adj " << content << " on kernel " << ml::core::CUname::release()); int oomAdj = 0; CPPUNIT_ASSERT(ml::core::CStringUtils::stringToType(content, oomAdj)); // For the kernel versions that support both, there's variation in // what an oom_score_adj of 667 maps to - the range seems to be 8-11 CPPUNIT_ASSERT(oomAdj >= 8); CPPUNIT_ASSERT(oomAdj <= 11); - } - else - { + } else { CPPUNIT_ASSERT_EQUAL(std::string("10"), content); } readFromOneOrOther = true; @@ -101,4 +84,3 @@ void CProcessPriorityTest::testReducePriority() CPPUNIT_ASSERT(readFromOneOrOther); } - diff --git a/lib/core/unittest/CProcessTest.cc b/lib/core/unittest/CProcessTest.cc index 8fe0ebce0d..c2a6c96230 100644 --- a/lib/core/unittest/CProcessTest.cc +++ b/lib/core/unittest/CProcessTest.cc @@ -8,21 +8,16 @@ #include #include +CppUnit::Test* CProcessTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CProcessTest"); -CppUnit::Test *CProcessTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CProcessTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CProcessTest::testPids", - &CProcessTest::testPids) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CProcessTest::testPids", &CProcessTest::testPids)); return suiteOfTests; } -void CProcessTest::testPids() -{ - ml::core::CProcess &process = ml::core::CProcess::instance(); +void CProcessTest::testPids() { + ml::core::CProcess& process = ml::core::CProcess::instance(); ml::core::CProcess::TPid pid = process.id(); ml::core::CProcess::TPid ppid = process.parentId(); @@ -32,4 +27,3 @@ void CProcessTest::testPids() CPPUNIT_ASSERT(ppid != 0); CPPUNIT_ASSERT(pid != ppid); } - diff --git a/lib/core/unittest/CProcessTest.h b/lib/core/unittest/CProcessTest.h index 065c78f1d1..7571e0f0b3 100644 --- a/lib/core/unittest/CProcessTest.h +++ b/lib/core/unittest/CProcessTest.h @@ -8,14 +8,11 @@ #include +class CProcessTest : public CppUnit::TestFixture { +public: + void testPids(); -class CProcessTest : public CppUnit::TestFixture -{ - public: - void testPids(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CProcessTest_h - diff --git a/lib/core/unittest/CProgNameTest.cc b/lib/core/unittest/CProgNameTest.cc index b13473ad67..da7d8bbc41 100644 --- a/lib/core/unittest/CProgNameTest.cc +++ b/lib/core/unittest/CProgNameTest.cc @@ -9,23 +9,16 @@ #include #include +CppUnit::Test* CProgNameTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CProgNameTest"); -CppUnit::Test *CProgNameTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CProgNameTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CProgNameTest::testProgName", - &CProgNameTest::testProgName) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CProgNameTest::testProgDir", - &CProgNameTest::testProgDir) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CProgNameTest::testProgName", &CProgNameTest::testProgName)); + suiteOfTests->addTest(new CppUnit::TestCaller("CProgNameTest::testProgDir", &CProgNameTest::testProgDir)); return suiteOfTests; } -void CProgNameTest::testProgName() -{ +void CProgNameTest::testProgName() { std::string progName(ml::core::CProgName::progName()); LOG_DEBUG("Current program name is " << progName); @@ -33,8 +26,7 @@ void CProgNameTest::testProgName() CPPUNIT_ASSERT_EQUAL(std::string("ml_test"), progName); } -void CProgNameTest::testProgDir() -{ +void CProgNameTest::testProgDir() { std::string progDir(ml::core::CProgName::progDir()); LOG_DEBUG("Current program directory is " << progDir); @@ -46,4 +38,3 @@ void CProgNameTest::testProgDir() // Confirm we've stripped any extended length indicator on Windows CPPUNIT_ASSERT(progDir.compare(0, 4, "\\\\?\\") != 0); } - diff --git a/lib/core/unittest/CProgNameTest.h b/lib/core/unittest/CProgNameTest.h index 699468dd40..9b730f2a45 100644 --- a/lib/core/unittest/CProgNameTest.h +++ b/lib/core/unittest/CProgNameTest.h @@ -8,15 +8,12 @@ #include +class CProgNameTest : public CppUnit::TestFixture { +public: + void testProgName(); + void testProgDir(); -class CProgNameTest : public CppUnit::TestFixture -{ - public: - void testProgName(); - void testProgDir(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CProgNameTest_h - diff --git a/lib/core/unittest/CRapidJsonLineWriterTest.cc b/lib/core/unittest/CRapidJsonLineWriterTest.cc index d26e20708a..1f0786d9ad 100644 --- a/lib/core/unittest/CRapidJsonLineWriterTest.cc +++ b/lib/core/unittest/CRapidJsonLineWriterTest.cc @@ -20,8 +20,7 @@ #include -namespace -{ +namespace { const std::string STR_NAME("str"); const std::string EMPTY1_NAME("empty1"); const std::string EMPTY2_NAME("empty2"); @@ -37,13 +36,11 @@ const std::string NAN_ARRAY_NAME("nan[]"); const std::string TTIME_ARRAY_NAME("TTime[]"); } - -void CRapidJsonLineWriterTest::testDoublePrecission() -{ +void CRapidJsonLineWriterTest::testDoublePrecission() { std::ostringstream strm; { - using TGenericLineWriter = ml::core::CRapidJsonLineWriter, rapidjson::UTF8<>, - rapidjson::CrtAllocator>; + using TGenericLineWriter = + ml::core::CRapidJsonLineWriter, rapidjson::UTF8<>, rapidjson::CrtAllocator>; rapidjson::OStreamWrapper writeStream(strm); TGenericLineWriter writer(writeStream); @@ -60,11 +57,10 @@ void CRapidJsonLineWriterTest::testDoublePrecission() CPPUNIT_ASSERT_EQUAL(std::string("{\"a\":0.00003,\"b\":5e-300,\"c\":0.0}\n"), strm.str()); } -void CRapidJsonLineWriterTest::testDoublePrecissionDtoa() -{ +void CRapidJsonLineWriterTest::testDoublePrecissionDtoa() { char buffer[100]; - char *end = rapidjson::internal::dtoa(3e-5, buffer); + char* end = rapidjson::internal::dtoa(3e-5, buffer); CPPUNIT_ASSERT_EQUAL(std::string("0.00003"), std::string(buffer, static_cast(end - buffer))); end = rapidjson::internal::dtoa(2e-20, buffer, 20); @@ -85,16 +81,14 @@ void CRapidJsonLineWriterTest::testDoublePrecissionDtoa() CPPUNIT_ASSERT_EQUAL(std::string("1e-300"), std::string(buffer, ret)); } -void CRapidJsonLineWriterTest::microBenchmark() -{ +void CRapidJsonLineWriterTest::microBenchmark() { char buffer[100]; ml::core::CStopWatch stopWatch; stopWatch.start(); size_t runs = 100000000; - for (size_t i = 0; i < runs; ++i) - { + for (size_t i = 0; i < runs; ++i) { rapidjson::internal::dtoa(3e-5, buffer); rapidjson::internal::dtoa(0.0, buffer); rapidjson::internal::dtoa(0.12345, buffer); @@ -105,8 +99,7 @@ void CRapidJsonLineWriterTest::microBenchmark() LOG_INFO("Rapidjson dtoa " << runs << " runs took " << elapsed); stopWatch.reset(); stopWatch.start(); - for (size_t i = 0; i < runs; ++i) - { + for (size_t i = 0; i < runs; ++i) { ::snprintf(buffer, sizeof(buffer), "%g", 3e-5); ::snprintf(buffer, sizeof(buffer), "%g", 0.0); ::snprintf(buffer, sizeof(buffer), "%g", 0.12345); @@ -115,19 +108,16 @@ void CRapidJsonLineWriterTest::microBenchmark() } elapsed = stopWatch.stop(); - LOG_INFO("snprintf " << runs <<" runs took " << elapsed); + LOG_INFO("snprintf " << runs << " runs took " << elapsed); } -CppUnit::Test* CRapidJsonLineWriterTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CRapidJsonLineWriterTest"); +CppUnit::Test* CRapidJsonLineWriterTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CRapidJsonLineWriterTest"); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CRapidJsonLineWriterTest::testDoublePrecissionDtoa", - &CRapidJsonLineWriterTest::testDoublePrecissionDtoa) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CRapidJsonLineWriterTest::testDoublePrecission", - &CRapidJsonLineWriterTest::testDoublePrecission) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CRapidJsonLineWriterTest::testDoublePrecissionDtoa", + &CRapidJsonLineWriterTest::testDoublePrecissionDtoa)); + suiteOfTests->addTest(new CppUnit::TestCaller("CRapidJsonLineWriterTest::testDoublePrecission", + &CRapidJsonLineWriterTest::testDoublePrecission)); // microbenchmark, enable if you are interested /*suiteOfTests->addTest( new CppUnit::TestCaller( @@ -136,4 +126,3 @@ CppUnit::Test* CRapidJsonLineWriterTest::suite() return suiteOfTests; } - diff --git a/lib/core/unittest/CRapidJsonLineWriterTest.h b/lib/core/unittest/CRapidJsonLineWriterTest.h index 93a4a44ff5..f84f7aa415 100644 --- a/lib/core/unittest/CRapidJsonLineWriterTest.h +++ b/lib/core/unittest/CRapidJsonLineWriterTest.h @@ -8,16 +8,13 @@ #include +class CRapidJsonLineWriterTest : public CppUnit::TestFixture { +public: + void testDoublePrecission(); + void testDoublePrecissionDtoa(); + void microBenchmark(); -class CRapidJsonLineWriterTest : public CppUnit::TestFixture -{ - public: - void testDoublePrecission(); - void testDoublePrecissionDtoa(); - void microBenchmark(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; - #endif /* INCLUDED_CRapidJsonLineWriterTest_h */ diff --git a/lib/core/unittest/CRapidJsonWriterBaseTest.cc b/lib/core/unittest/CRapidJsonWriterBaseTest.cc index db57a7547b..33dae44005 100644 --- a/lib/core/unittest/CRapidJsonWriterBaseTest.cc +++ b/lib/core/unittest/CRapidJsonWriterBaseTest.cc @@ -5,7 +5,6 @@ */ #include "CRapidJsonWriterBaseTest.h" - #include #include #include @@ -16,23 +15,18 @@ #include #include +CppUnit::Test* CRapidJsonWriterBaseTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CRapidJsonWriterBaseTest"); -CppUnit::Test *CRapidJsonWriterBaseTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CRapidJsonWriterBaseTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CRapidJsonWriterBaseTest::testAddFields", - &CRapidJsonWriterBaseTest::testAddFields) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CRapidJsonWriterBaseTest::testRemoveMemberIfPresent", - &CRapidJsonWriterBaseTest::testRemoveMemberIfPresent) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CRapidJsonWriterBaseTest::testAddFields", + &CRapidJsonWriterBaseTest::testAddFields)); + suiteOfTests->addTest(new CppUnit::TestCaller("CRapidJsonWriterBaseTest::testRemoveMemberIfPresent", + &CRapidJsonWriterBaseTest::testRemoveMemberIfPresent)); return suiteOfTests; } -namespace -{ +namespace { const std::string STR_NAME("str"); const std::string EMPTY1_NAME("empty1"); const std::string EMPTY2_NAME("empty2"); @@ -49,31 +43,30 @@ const std::string NAN_ARRAY_NAME("nan[]"); const std::string TTIME_ARRAY_NAME("TTime[]"); } -void CRapidJsonWriterBaseTest::testAddFields() -{ +void CRapidJsonWriterBaseTest::testAddFields() { std::ostringstream strm; rapidjson::OStreamWrapper writeStream(strm); using TGenericLineWriter = - ml::core::CRapidJsonWriterBase, rapidjson::UTF8<>, - rapidjson::CrtAllocator>; + ml::core::CRapidJsonWriterBase, rapidjson::UTF8<>, rapidjson::CrtAllocator>; TGenericLineWriter writer(writeStream); - rapidjson::Document doc = writer.makeDoc();; + rapidjson::Document doc = writer.makeDoc(); + ; writer.addStringFieldCopyToObj(STR_NAME, "hello", doc); writer.addStringFieldCopyToObj(EMPTY1_NAME, "", doc); writer.addStringFieldCopyToObj(EMPTY2_NAME, "", doc, true); - writer.addDoubleFieldToObj(DOUBLE_NAME, 1.77e-156, doc); + writer.addDoubleFieldToObj(DOUBLE_NAME, 1.77e-156, doc); writer.addDoubleFieldToObj(NAN_NAME, std::numeric_limits::quiet_NaN(), doc); writer.addDoubleFieldToObj(INFINITY_NAME, std::numeric_limits::infinity(), doc); writer.addBoolFieldToObj(BOOL_NAME, false, doc); writer.addIntFieldToObj(INT_NAME, -9, doc); writer.addTimeFieldToObj(TIME_NAME, ml::core_t::TTime(1521035866), doc); writer.addUIntFieldToObj(UINT_NAME, 999999999999999ull, doc); - writer.addStringArrayFieldToObj(STR_ARRAY_NAME, TGenericLineWriter::TStrVec(3, "blah"), doc); + writer.addStringArrayFieldToObj(STR_ARRAY_NAME, TGenericLineWriter::TStrVec(3, "blah"), doc); writer.addDoubleArrayFieldToObj(DOUBLE_ARRAY_NAME, TGenericLineWriter::TDoubleVec(10, 1.5), doc); - writer.addDoubleArrayFieldToObj(NAN_ARRAY_NAME, TGenericLineWriter::TDoubleVec(2, std::numeric_limits::quiet_NaN()), doc); - writer.addTimeArrayFieldToObj(TTIME_ARRAY_NAME, TGenericLineWriter::TTimeVec(2, 1421421421), doc); + writer.addDoubleArrayFieldToObj(NAN_ARRAY_NAME, TGenericLineWriter::TDoubleVec(2, std::numeric_limits::quiet_NaN()), doc); + writer.addTimeArrayFieldToObj(TTIME_ARRAY_NAME, TGenericLineWriter::TTimeVec(2, 1421421421), doc); writer.write(doc); writer.Flush(); @@ -84,35 +77,33 @@ void CRapidJsonWriterBaseTest::testAddFields() LOG_DEBUG("Printed doc is: " << printedDoc); std::string expectedDoc("{" - "\"str\":\"hello\"," - "\"empty2\":\"\"," - "\"double\":1.77e-156," - "\"nan\":0," - "\"infinity\":0," - "\"bool\":false," - "\"int\":-9," - "\"time\":1521035866000," - "\"uint\":999999999999999," - "\"str[]\":[\"blah\",\"blah\",\"blah\"]," - "\"double[]\":[1.5,1.5,1.5,1.5,1.5,1.5,1.5,1.5,1.5,1.5]," - "\"nan[]\":[0,0]," - "\"TTime[]\":[1421421421000,1421421421000]" + "\"str\":\"hello\"," + "\"empty2\":\"\"," + "\"double\":1.77e-156," + "\"nan\":0," + "\"infinity\":0," + "\"bool\":false," + "\"int\":-9," + "\"time\":1521035866000," + "\"uint\":999999999999999," + "\"str[]\":[\"blah\",\"blah\",\"blah\"]," + "\"double[]\":[1.5,1.5,1.5,1.5,1.5,1.5,1.5,1.5,1.5,1.5]," + "\"nan[]\":[0,0]," + "\"TTime[]\":[1421421421000,1421421421000]" "}"); CPPUNIT_ASSERT_EQUAL(expectedDoc, printedDoc); } - -void CRapidJsonWriterBaseTest::testRemoveMemberIfPresent() -{ +void CRapidJsonWriterBaseTest::testRemoveMemberIfPresent() { std::ostringstream strm; rapidjson::OStreamWrapper writeStream(strm); using TGenericLineWriter = - ml::core::CRapidJsonWriterBase, rapidjson::UTF8<>, - rapidjson::CrtAllocator>; + ml::core::CRapidJsonWriterBase, rapidjson::UTF8<>, rapidjson::CrtAllocator>; TGenericLineWriter writer(writeStream); - rapidjson::Document doc = writer.makeDoc();; + rapidjson::Document doc = writer.makeDoc(); + ; std::string foo("foo"); diff --git a/lib/core/unittest/CRapidJsonWriterBaseTest.h b/lib/core/unittest/CRapidJsonWriterBaseTest.h index db06a1dbc7..55dde7d989 100644 --- a/lib/core/unittest/CRapidJsonWriterBaseTest.h +++ b/lib/core/unittest/CRapidJsonWriterBaseTest.h @@ -8,14 +8,12 @@ #include -class CRapidJsonWriterBaseTest : public CppUnit::TestFixture -{ - public: - void testAddFields(); - void testRemoveMemberIfPresent(); +class CRapidJsonWriterBaseTest : public CppUnit::TestFixture { +public: + void testAddFields(); + void testRemoveMemberIfPresent(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CRapidJsonWriterBaseTest_h - diff --git a/lib/core/unittest/CRapidXmlParserTest.cc b/lib/core/unittest/CRapidXmlParserTest.cc index c6ba89f6ea..717c323f0d 100644 --- a/lib/core/unittest/CRapidXmlParserTest.cc +++ b/lib/core/unittest/CRapidXmlParserTest.cc @@ -12,38 +12,27 @@ #include - -CppUnit::Test *CRapidXmlParserTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CRapidXmlParserTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CRapidXmlParserTest::testParse1", - &CRapidXmlParserTest::testParse1) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CRapidXmlParserTest::testParse2", - &CRapidXmlParserTest::testParse2) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CRapidXmlParserTest::testNavigate", - &CRapidXmlParserTest::testNavigate) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CRapidXmlParserTest::testConvert", - &CRapidXmlParserTest::testConvert) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CRapidXmlParserTest::testDump", - &CRapidXmlParserTest::testDump) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CRapidXmlParserTest::testParseSpeed", - &CRapidXmlParserTest::testParseSpeed) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CRapidXmlParserTest::testConvertSpeed", - &CRapidXmlParserTest::testConvertSpeed) ); +CppUnit::Test* CRapidXmlParserTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CRapidXmlParserTest"); + + suiteOfTests->addTest( + new CppUnit::TestCaller("CRapidXmlParserTest::testParse1", &CRapidXmlParserTest::testParse1)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CRapidXmlParserTest::testParse2", &CRapidXmlParserTest::testParse2)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CRapidXmlParserTest::testNavigate", &CRapidXmlParserTest::testNavigate)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CRapidXmlParserTest::testConvert", &CRapidXmlParserTest::testConvert)); + suiteOfTests->addTest(new CppUnit::TestCaller("CRapidXmlParserTest::testDump", &CRapidXmlParserTest::testDump)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CRapidXmlParserTest::testParseSpeed", &CRapidXmlParserTest::testParseSpeed)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CRapidXmlParserTest::testConvertSpeed", &CRapidXmlParserTest::testConvertSpeed)); return suiteOfTests; } -void CRapidXmlParserTest::testParse1() -{ +void CRapidXmlParserTest::testParse1() { std::string goodString = CRapidXmlParserTest::fileToString("./testfiles/CXmlParser1.xml"); ml::core::CRapidXmlParser parser; @@ -53,8 +42,7 @@ void CRapidXmlParserTest::testParse1() this->testParse1(parser); } -void CRapidXmlParserTest::testParse2() -{ +void CRapidXmlParserTest::testParse2() { std::string goodString = CRapidXmlParserTest::fileToString("./testfiles/CXmlParser2.xml"); ml::core::CRapidXmlParser parser; @@ -68,17 +56,17 @@ void CRapidXmlParserTest::testParse2() CPPUNIT_ASSERT_EQUAL(std::string("syslog_parser"), rootNodePtr->name()); CPPUNIT_ASSERT_EQUAL(rootNodePtr->name(), parser.rootElementName()); - const ml::core::CXmlNodeWithChildren::TChildNodePVec &parseTree = rootNodePtr->children(); + const ml::core::CXmlNodeWithChildren::TChildNodePVec& parseTree = rootNodePtr->children(); CPPUNIT_ASSERT_EQUAL(size_t(1), parseTree.size()); CPPUNIT_ASSERT(parseTree[0] != 0); CPPUNIT_ASSERT_EQUAL(std::string("parsetree"), parseTree[0]->name()); - const ml::core::CXmlNodeWithChildren::TChildNodePVec &expression = parseTree[0]->children(); + const ml::core::CXmlNodeWithChildren::TChildNodePVec& expression = parseTree[0]->children(); CPPUNIT_ASSERT_EQUAL(size_t(2), expression.size()); CPPUNIT_ASSERT(expression[0] != 0); CPPUNIT_ASSERT_EQUAL(std::string("expression"), expression[0]->name()); - const ml::core::CXmlNodeWithChildren::TChildNodePVec &descriptionAndRegexes = expression[0]->children(); + const ml::core::CXmlNodeWithChildren::TChildNodePVec& descriptionAndRegexes = expression[0]->children(); CPPUNIT_ASSERT_EQUAL(size_t(2), descriptionAndRegexes.size()); CPPUNIT_ASSERT(descriptionAndRegexes[0] != 0); CPPUNIT_ASSERT_EQUAL(std::string("description"), descriptionAndRegexes[0]->name()); @@ -86,7 +74,7 @@ void CRapidXmlParserTest::testParse2() CPPUNIT_ASSERT(descriptionAndRegexes[1] != 0); CPPUNIT_ASSERT_EQUAL(std::string("regexes"), descriptionAndRegexes[1]->name()); - const ml::core::CXmlNodeWithChildren::TChildNodePVec &varbind = descriptionAndRegexes[1]->children(); + const ml::core::CXmlNodeWithChildren::TChildNodePVec& varbind = descriptionAndRegexes[1]->children(); CPPUNIT_ASSERT_EQUAL(size_t(2), varbind.size()); CPPUNIT_ASSERT(varbind[0] != 0); CPPUNIT_ASSERT_EQUAL(std::string("varbind"), varbind[0]->name()); @@ -94,7 +82,7 @@ void CRapidXmlParserTest::testParse2() CPPUNIT_ASSERT_EQUAL(std::string("varbind"), varbind[1]->name()); // Test attributes - const ml::core::CXmlNodeWithChildren::TChildNodePVec &tokenAndRegex0 = varbind[0]->children(); + const ml::core::CXmlNodeWithChildren::TChildNodePVec& tokenAndRegex0 = varbind[0]->children(); CPPUNIT_ASSERT_EQUAL(std::string("token"), tokenAndRegex0[0]->name()); CPPUNIT_ASSERT_EQUAL(std::string(""), tokenAndRegex0[0]->value()); CPPUNIT_ASSERT_EQUAL(std::string("regex"), tokenAndRegex0[1]->name()); @@ -103,15 +91,14 @@ void CRapidXmlParserTest::testParse2() CPPUNIT_ASSERT(this->testAttribute(*(tokenAndRegex0[1]), "local", "BZ")); // Test CDATA - const ml::core::CXmlNodeWithChildren::TChildNodePVec &tokenAndRegex1 = varbind[1]->children(); + const ml::core::CXmlNodeWithChildren::TChildNodePVec& tokenAndRegex1 = varbind[1]->children(); CPPUNIT_ASSERT_EQUAL(std::string("token"), tokenAndRegex1[0]->name()); CPPUNIT_ASSERT_EQUAL(std::string("source"), tokenAndRegex1[0]->value()); CPPUNIT_ASSERT_EQUAL(std::string("regex"), tokenAndRegex1[1]->name()); CPPUNIT_ASSERT_EQUAL(std::string("(template[[:space:]]*<[^;:{]+>[[:space:]]*)?"), tokenAndRegex1[1]->value()); } -void CRapidXmlParserTest::testNavigate() -{ +void CRapidXmlParserTest::testNavigate() { std::string goodString = CRapidXmlParserTest::fileToString("./testfiles/CXmlParser2.xml"); ml::core::CRapidXmlParser parser; @@ -145,8 +132,7 @@ void CRapidXmlParserTest::testNavigate() CPPUNIT_ASSERT(!parser.navigateNext()); } -ml::core::CXmlNodeWithChildren::TXmlNodeWithChildrenP CRapidXmlParserTest::makeTestNodeHierarchy() -{ +ml::core::CXmlNodeWithChildren::TXmlNodeWithChildrenP CRapidXmlParserTest::makeTestNodeHierarchy() { ml::core::CXmlNodeWithChildren::TXmlNodeWithChildrenP root(new ml::core::CXmlNodeWithChildren("root")); ml::core::CXmlNodeWithChildren::TXmlNodeWithChildrenP id(new ml::core::CXmlNodeWithChildren("id", "123")); @@ -169,14 +155,14 @@ ml::core::CXmlNodeWithChildren::TXmlNodeWithChildrenP CRapidXmlParserTest::makeT attrMap["attr1"] = "first 'attribute'"; attrMap["attr2"] = "second \"attribute\""; - ml::core::CXmlNodeWithChildren::TXmlNodeWithChildrenP dualAttr(new ml::core::CXmlNodeWithChildren("dual", "I've got <2> attributes", attrMap)); + ml::core::CXmlNodeWithChildren::TXmlNodeWithChildrenP dualAttr( + new ml::core::CXmlNodeWithChildren("dual", "I've got <2> attributes", attrMap)); root->addChildP(dualAttr); return root; } -void CRapidXmlParserTest::testConvert() -{ +void CRapidXmlParserTest::testConvert() { // Use a standard node hierarchy to allow for comparison with the // standards-compliant XML parser ml::core::CXmlNodeWithChildren::TXmlNodeWithChildrenP root(CRapidXmlParserTest::makeTestNodeHierarchy()); @@ -208,8 +194,7 @@ void CRapidXmlParserTest::testConvert() CPPUNIT_ASSERT(converted.find("") != std::string::npos); } -void CRapidXmlParserTest::testDump() -{ +void CRapidXmlParserTest::testDump() { std::string goodString = CRapidXmlParserTest::fileToString("./testfiles/CXmlParser1.xml"); ml::core::CRapidXmlParser parser1; @@ -225,8 +210,7 @@ void CRapidXmlParserTest::testDump() this->testParse1(parser2); } -void CRapidXmlParserTest::testParse1(const ml::core::CRapidXmlParser &parser) -{ +void CRapidXmlParserTest::testParse1(const ml::core::CRapidXmlParser& parser) { ml::core::CXmlNodeWithChildren::TXmlNodeWithChildrenP rootNodePtr; CPPUNIT_ASSERT(parser.toNodeHierarchy(rootNodePtr)); @@ -234,14 +218,14 @@ void CRapidXmlParserTest::testParse1(const ml::core::CRapidXmlParser &parser) CPPUNIT_ASSERT_EQUAL(std::string("ItemSearchResponse"), rootNodePtr->name()); CPPUNIT_ASSERT_EQUAL(rootNodePtr->name(), parser.rootElementName()); - const ml::core::CXmlNodeWithChildren::TChildNodePVec &firstLevelChildren = rootNodePtr->children(); + const ml::core::CXmlNodeWithChildren::TChildNodePVec& firstLevelChildren = rootNodePtr->children(); CPPUNIT_ASSERT_EQUAL(size_t(2), firstLevelChildren.size()); CPPUNIT_ASSERT(firstLevelChildren[0] != 0); CPPUNIT_ASSERT_EQUAL(std::string("OperationRequest"), firstLevelChildren[0]->name()); CPPUNIT_ASSERT(firstLevelChildren[1] != 0); CPPUNIT_ASSERT_EQUAL(std::string("Items"), firstLevelChildren[1]->name()); - const ml::core::CXmlNodeWithChildren::TChildNodePVec &opReqChildren = firstLevelChildren[0]->children(); + const ml::core::CXmlNodeWithChildren::TChildNodePVec& opReqChildren = firstLevelChildren[0]->children(); CPPUNIT_ASSERT_EQUAL(size_t(4), opReqChildren.size()); CPPUNIT_ASSERT(opReqChildren[0] != 0); CPPUNIT_ASSERT_EQUAL(std::string("HTTPHeaders"), opReqChildren[0]->name()); @@ -255,40 +239,39 @@ void CRapidXmlParserTest::testParse1(const ml::core::CRapidXmlParser &parser) CPPUNIT_ASSERT_EQUAL(std::string("1.05041599273682"), opReqChildren[3]->value()); // Test CDATA - const ml::core::CXmlNodeWithChildren::TChildNodePVec &itemsChildren = firstLevelChildren[1]->children(); + const ml::core::CXmlNodeWithChildren::TChildNodePVec& itemsChildren = firstLevelChildren[1]->children(); CPPUNIT_ASSERT_EQUAL(size_t(13), itemsChildren.size()); CPPUNIT_ASSERT(itemsChildren[3] != 0); - const ml::core::CXmlNodeWithChildren::TChildNodePVec &item3Children = itemsChildren[3]->children(); + const ml::core::CXmlNodeWithChildren::TChildNodePVec& item3Children = itemsChildren[3]->children(); CPPUNIT_ASSERT_EQUAL(size_t(4), item3Children.size()); CPPUNIT_ASSERT(item3Children[0] != 0); CPPUNIT_ASSERT_EQUAL(std::string("msg"), item3Children[0]->name()); CPPUNIT_ASSERT_EQUAL(std::string("\n\ Invalid Date of Birth.
This is a test validation message from the server \n\ - "), item3Children[0]->value()); + "), + item3Children[0]->value()); // Test escaped ampersand CPPUNIT_ASSERT(itemsChildren[10] != 0); - const ml::core::CXmlNodeWithChildren::TChildNodePVec &item10Children = itemsChildren[10]->children(); + const ml::core::CXmlNodeWithChildren::TChildNodePVec& item10Children = itemsChildren[10]->children(); CPPUNIT_ASSERT_EQUAL(size_t(3), item10Children.size()); CPPUNIT_ASSERT(item10Children[2] != 0); CPPUNIT_ASSERT_EQUAL(std::string("ItemAttributes"), item10Children[2]->name()); - const ml::core::CXmlNodeWithChildren::TChildNodePVec &itemAttributesChildren = item10Children[2]->children(); + const ml::core::CXmlNodeWithChildren::TChildNodePVec& itemAttributesChildren = item10Children[2]->children(); CPPUNIT_ASSERT_EQUAL(size_t(4), itemAttributesChildren.size()); CPPUNIT_ASSERT(itemAttributesChildren[1] != 0); CPPUNIT_ASSERT_EQUAL(std::string("Manufacturer"), itemAttributesChildren[1]->name()); CPPUNIT_ASSERT_EQUAL(std::string("William Morrow & Company"), itemAttributesChildren[1]->value()); } -std::string CRapidXmlParserTest::fileToString(const std::string &fileName) -{ +std::string CRapidXmlParserTest::fileToString(const std::string& fileName) { std::string ret; std::ifstream ifs(fileName.c_str()); CPPUNIT_ASSERT_MESSAGE(fileName, ifs.is_open()); std::string line; - while (std::getline(ifs, line)) - { + while (std::getline(ifs, line)) { ret += line; ret += '\n'; } @@ -296,18 +279,13 @@ std::string CRapidXmlParserTest::fileToString(const std::string &fileName) return ret; } -bool CRapidXmlParserTest::testAttribute(const ml::core::CXmlNode &node, - const std::string &key, - const std::string &expected) -{ +bool CRapidXmlParserTest::testAttribute(const ml::core::CXmlNode& node, const std::string& key, const std::string& expected) { std::string actual; - if (node.attribute(key, actual) == false) - { + if (node.attribute(key, actual) == false) { return false; } - if (actual != expected) - { + if (actual != expected) { LOG_ERROR(actual << ' ' << expected); return false; } @@ -315,20 +293,17 @@ bool CRapidXmlParserTest::testAttribute(const ml::core::CXmlNode &node, return true; } -void CRapidXmlParserTest::testParseSpeed() -{ +void CRapidXmlParserTest::testParseSpeed() { static const size_t TEST_SIZE(25000); std::string testString(CRapidXmlParserTest::fileToString("./testfiles/CXmlParser2.xml")); ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO("Starting parse speed test at " << - ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO("Starting parse speed test at " << ml::core::CTimeUtils::toTimeString(start)); ml::core::CXmlNodeWithChildrenPool nodePool; - for (size_t count = 0; count < TEST_SIZE; ++count) - { + for (size_t count = 0; count < TEST_SIZE; ++count) { ml::core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseString(testString)); @@ -341,15 +316,12 @@ void CRapidXmlParserTest::testParseSpeed() } ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO("Finished parse speed test at " << - ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO("Finished parse speed test at " << ml::core::CTimeUtils::toTimeString(end)); - LOG_INFO("Parsing " << TEST_SIZE << " documents took " << - (end - start) << " seconds"); + LOG_INFO("Parsing " << TEST_SIZE << " documents took " << (end - start) << " seconds"); } -void CRapidXmlParserTest::testConvertSpeed() -{ +void CRapidXmlParserTest::testConvertSpeed() { static const size_t TEST_SIZE(100000); // Use a standard node hierarchy to allow for comparison with the @@ -357,20 +329,15 @@ void CRapidXmlParserTest::testConvertSpeed() ml::core::CXmlNodeWithChildren::TXmlNodeWithChildrenP root(CRapidXmlParserTest::makeTestNodeHierarchy()); ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO("Starting convert speed test at " << - ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO("Starting convert speed test at " << ml::core::CTimeUtils::toTimeString(start)); - for (size_t count = 0; count < TEST_SIZE; ++count) - { + for (size_t count = 0; count < TEST_SIZE; ++count) { std::string converted; ml::core::CRapidXmlParser::convert(*root, converted); } ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO("Finished convert speed test at " << - ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO("Finished convert speed test at " << ml::core::CTimeUtils::toTimeString(end)); - LOG_INFO("Converting " << TEST_SIZE << " documents took " << - (end - start) << " seconds"); + LOG_INFO("Converting " << TEST_SIZE << " documents took " << (end - start) << " seconds"); } - diff --git a/lib/core/unittest/CRapidXmlParserTest.h b/lib/core/unittest/CRapidXmlParserTest.h index b625b5c33d..7035103706 100644 --- a/lib/core/unittest/CRapidXmlParserTest.h +++ b/lib/core/unittest/CRapidXmlParserTest.h @@ -12,39 +12,33 @@ #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CRapidXmlParser; class CXmlNode; } } -class CRapidXmlParserTest : public CppUnit::TestFixture -{ - public: - void testParse1(); - void testParse2(); - void testNavigate(); - void testConvert(); - void testDump(); - void testParseSpeed(); - void testConvertSpeed(); +class CRapidXmlParserTest : public CppUnit::TestFixture { +public: + void testParse1(); + void testParse2(); + void testNavigate(); + void testConvert(); + void testDump(); + void testParseSpeed(); + void testConvertSpeed(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); - static ml::core::CXmlNodeWithChildren::TXmlNodeWithChildrenP makeTestNodeHierarchy(); + static ml::core::CXmlNodeWithChildren::TXmlNodeWithChildrenP makeTestNodeHierarchy(); - private: - static void testParse1(const ml::core::CRapidXmlParser &parser); +private: + static void testParse1(const ml::core::CRapidXmlParser& parser); - static std::string fileToString(const std::string &fileName); + static std::string fileToString(const std::string& fileName); - static bool testAttribute(const ml::core::CXmlNode &node, - const std::string &key, - const std::string &expected); + static bool testAttribute(const ml::core::CXmlNode& node, const std::string& key, const std::string& expected); }; #endif // INCLUDED_CRapidXmlParserTest_h - diff --git a/lib/core/unittest/CRapidXmlStatePersistInserterTest.cc b/lib/core/unittest/CRapidXmlStatePersistInserterTest.cc index dc61db48d7..6a4c62d291 100644 --- a/lib/core/unittest/CRapidXmlStatePersistInserterTest.cc +++ b/lib/core/unittest/CRapidXmlStatePersistInserterTest.cc @@ -8,31 +8,24 @@ #include #include +CppUnit::Test* CRapidXmlStatePersistInserterTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CRapidXmlStatePersistInserterTest"); -CppUnit::Test *CRapidXmlStatePersistInserterTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CRapidXmlStatePersistInserterTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CRapidXmlStatePersistInserterTest::testPersist", - &CRapidXmlStatePersistInserterTest::testPersist) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CRapidXmlStatePersistInserterTest::testPersist", + &CRapidXmlStatePersistInserterTest::testPersist)); return suiteOfTests; } -namespace -{ +namespace { -void insert2ndLevel(ml::core::CStatePersistInserter &inserter) -{ +void insert2ndLevel(ml::core::CStatePersistInserter& inserter) { inserter.insertValue("level2A", 3.14, ml::core::CIEEE754::E_SinglePrecision); inserter.insertValue("level2B", 'z'); } - } -void CRapidXmlStatePersistInserterTest::testPersist() -{ +void CRapidXmlStatePersistInserterTest::testPersist() { ml::core::CRapidXmlStatePersistInserter::TStrStrMap rootAttributes; rootAttributes["attr1"] = "attrVal1"; rootAttributes["attr2"] = "attrVal2"; @@ -49,7 +42,8 @@ void CRapidXmlStatePersistInserterTest::testPersist() LOG_DEBUG("XML is: " << xml); inserter.toXml(false, xml); - CPPUNIT_ASSERT_EQUAL(std::string("a253.14z"), + CPPUNIT_ASSERT_EQUAL(std::string("a253.14z"), xml); } - diff --git a/lib/core/unittest/CRapidXmlStatePersistInserterTest.h b/lib/core/unittest/CRapidXmlStatePersistInserterTest.h index 063a82478d..b22caa050c 100644 --- a/lib/core/unittest/CRapidXmlStatePersistInserterTest.h +++ b/lib/core/unittest/CRapidXmlStatePersistInserterTest.h @@ -8,14 +8,11 @@ #include +class CRapidXmlStatePersistInserterTest : public CppUnit::TestFixture { +public: + void testPersist(); -class CRapidXmlStatePersistInserterTest : public CppUnit::TestFixture -{ - public: - void testPersist(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CRapidXmlStatePersistInserterTest_h - diff --git a/lib/core/unittest/CRapidXmlStateRestoreTraverserTest.cc b/lib/core/unittest/CRapidXmlStateRestoreTraverserTest.cc index 147184d7b9..33d1cbcb68 100644 --- a/lib/core/unittest/CRapidXmlStateRestoreTraverserTest.cc +++ b/lib/core/unittest/CRapidXmlStateRestoreTraverserTest.cc @@ -8,23 +8,18 @@ #include #include +CppUnit::Test* CRapidXmlStateRestoreTraverserTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CRapidXmlStateRestoreTraverserTest"); -CppUnit::Test *CRapidXmlStateRestoreTraverserTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CRapidXmlStateRestoreTraverserTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CRapidXmlStateRestoreTraverserTest::testRestore", - &CRapidXmlStateRestoreTraverserTest::testRestore) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CRapidXmlStateRestoreTraverserTest::testRestore", + &CRapidXmlStateRestoreTraverserTest::testRestore)); return suiteOfTests; } -namespace -{ +namespace { -bool traverse2ndLevel(ml::core::CStateRestoreTraverser &traverser) -{ +bool traverse2ndLevel(ml::core::CStateRestoreTraverser& traverser) { CPPUNIT_ASSERT_EQUAL(std::string("level2A"), traverser.name()); CPPUNIT_ASSERT_EQUAL(std::string("3.14"), traverser.value()); CPPUNIT_ASSERT(!traverser.hasSubLevel()); @@ -37,8 +32,7 @@ bool traverse2ndLevel(ml::core::CStateRestoreTraverser &traverser) return true; } -bool traverse1stLevel(ml::core::CStateRestoreTraverser &traverser) -{ +bool traverse1stLevel(ml::core::CStateRestoreTraverser& traverser) { CPPUNIT_ASSERT_EQUAL(std::string("level1A"), traverser.name()); CPPUNIT_ASSERT_EQUAL(std::string("a"), traverser.value()); CPPUNIT_ASSERT(!traverser.hasSubLevel()); @@ -54,12 +48,12 @@ bool traverse1stLevel(ml::core::CStateRestoreTraverser &traverser) return true; } - } -void CRapidXmlStateRestoreTraverserTest::testRestore() -{ - std::string xml("a253.14z"); +void CRapidXmlStateRestoreTraverserTest::testRestore() { + std::string xml("a253.14z"); ml::core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(xml)); @@ -71,4 +65,3 @@ void CRapidXmlStateRestoreTraverserTest::testRestore() CPPUNIT_ASSERT(traverser.traverseSubLevel(&traverse1stLevel)); CPPUNIT_ASSERT(!traverser.next()); } - diff --git a/lib/core/unittest/CRapidXmlStateRestoreTraverserTest.h b/lib/core/unittest/CRapidXmlStateRestoreTraverserTest.h index 37868a9f75..d58131fe56 100644 --- a/lib/core/unittest/CRapidXmlStateRestoreTraverserTest.h +++ b/lib/core/unittest/CRapidXmlStateRestoreTraverserTest.h @@ -8,14 +8,11 @@ #include +class CRapidXmlStateRestoreTraverserTest : public CppUnit::TestFixture { +public: + void testRestore(); -class CRapidXmlStateRestoreTraverserTest : public CppUnit::TestFixture -{ - public: - void testRestore(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CRapidXmlStateRestoreTraverserTest_h - diff --git a/lib/core/unittest/CReadWriteLockTest.cc b/lib/core/unittest/CReadWriteLockTest.cc index 1853f2a815..db7b81d65f 100644 --- a/lib/core/unittest/CReadWriteLockTest.cc +++ b/lib/core/unittest/CReadWriteLockTest.cc @@ -21,275 +21,189 @@ #include +CppUnit::Test* CReadWriteLockTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CReadWriteLockTest"); -CppUnit::Test *CReadWriteLockTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CReadWriteLockTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CReadWriteLockTest::testReadLock", - &CReadWriteLockTest::testReadLock) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CReadWriteLockTest::testWriteLock", - &CReadWriteLockTest::testWriteLock) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CReadWriteLockTest::testPerformanceVersusMutex", - &CReadWriteLockTest::testPerformanceVersusMutex) ); + suiteOfTests->addTest( + new CppUnit::TestCaller("CReadWriteLockTest::testReadLock", &CReadWriteLockTest::testReadLock)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CReadWriteLockTest::testWriteLock", &CReadWriteLockTest::testWriteLock)); + suiteOfTests->addTest(new CppUnit::TestCaller("CReadWriteLockTest::testPerformanceVersusMutex", + &CReadWriteLockTest::testPerformanceVersusMutex)); return suiteOfTests; } +namespace { -namespace -{ - - -class CUnprotectedAdder : public ml::core::CThread -{ - public: - CUnprotectedAdder(uint32_t sleepTime, - uint32_t iterations, - uint32_t increment, - volatile uint32_t &variable) - : m_SleepTime(sleepTime), - m_Iterations(iterations), - m_Increment(increment), - m_Variable(variable) - { - } +class CUnprotectedAdder : public ml::core::CThread { +public: + CUnprotectedAdder(uint32_t sleepTime, uint32_t iterations, uint32_t increment, volatile uint32_t& variable) + : m_SleepTime(sleepTime), m_Iterations(iterations), m_Increment(increment), m_Variable(variable) {} - protected: - void run() - { - for (uint32_t count = 0; count < m_Iterations; ++count) - { - m_Variable += m_Increment; - ml::core::CSleep::sleep(m_SleepTime); - } +protected: + void run() { + for (uint32_t count = 0; count < m_Iterations; ++count) { + m_Variable += m_Increment; + ml::core::CSleep::sleep(m_SleepTime); } + } - void shutdown() - { - // Always just wait for run() to complete - } + void shutdown() { + // Always just wait for run() to complete + } - private: - uint32_t m_SleepTime; - uint32_t m_Iterations; - uint32_t m_Increment; - volatile uint32_t &m_Variable; +private: + uint32_t m_SleepTime; + uint32_t m_Iterations; + uint32_t m_Increment; + volatile uint32_t& m_Variable; }; -class CAtomicAdder : public ml::core::CThread -{ - public: - CAtomicAdder(uint32_t sleepTime, - uint32_t iterations, - uint32_t increment, - std::atomic_uint_fast32_t &variable) - : m_SleepTime(sleepTime), - m_Iterations(iterations), - m_Increment(increment), - m_Variable(variable) - { - } +class CAtomicAdder : public ml::core::CThread { +public: + CAtomicAdder(uint32_t sleepTime, uint32_t iterations, uint32_t increment, std::atomic_uint_fast32_t& variable) + : m_SleepTime(sleepTime), m_Iterations(iterations), m_Increment(increment), m_Variable(variable) {} - protected: - void run() - { - for (uint32_t count = 0; count < m_Iterations; ++count) - { - m_Variable.fetch_add(m_Increment); - ml::core::CSleep::sleep(m_SleepTime); - } +protected: + void run() { + for (uint32_t count = 0; count < m_Iterations; ++count) { + m_Variable.fetch_add(m_Increment); + ml::core::CSleep::sleep(m_SleepTime); } + } - void shutdown() - { - // Always just wait for run() to complete - } + void shutdown() { + // Always just wait for run() to complete + } - private: - uint32_t m_SleepTime; - uint32_t m_Iterations; - uint32_t m_Increment; - std::atomic_uint_fast32_t &m_Variable; +private: + uint32_t m_SleepTime; + uint32_t m_Iterations; + uint32_t m_Increment; + std::atomic_uint_fast32_t& m_Variable; }; -class CFastMutexProtectedAdder : public ml::core::CThread -{ - public: - CFastMutexProtectedAdder(ml::core::CFastMutex &mutex, - uint32_t sleepTime, - uint32_t iterations, - uint32_t increment, - volatile uint32_t &variable) - : m_Mutex(mutex), - m_SleepTime(sleepTime), - m_Iterations(iterations), - m_Increment(increment), - m_Variable(variable) - { - } +class CFastMutexProtectedAdder : public ml::core::CThread { +public: + CFastMutexProtectedAdder(ml::core::CFastMutex& mutex, + uint32_t sleepTime, + uint32_t iterations, + uint32_t increment, + volatile uint32_t& variable) + : m_Mutex(mutex), m_SleepTime(sleepTime), m_Iterations(iterations), m_Increment(increment), m_Variable(variable) {} - protected: - void run() - { - for (uint32_t count = 0; count < m_Iterations; ++count) - { - ml::core::CScopedFastLock lock(m_Mutex); +protected: + void run() { + for (uint32_t count = 0; count < m_Iterations; ++count) { + ml::core::CScopedFastLock lock(m_Mutex); - m_Variable += m_Increment; - ml::core::CSleep::sleep(m_SleepTime); - } + m_Variable += m_Increment; + ml::core::CSleep::sleep(m_SleepTime); } + } - void shutdown() - { - // Always just wait for run() to complete - } + void shutdown() { + // Always just wait for run() to complete + } - private: - ml::core::CFastMutex &m_Mutex; - uint32_t m_SleepTime; - uint32_t m_Iterations; - uint32_t m_Increment; - volatile uint32_t &m_Variable; +private: + ml::core::CFastMutex& m_Mutex; + uint32_t m_SleepTime; + uint32_t m_Iterations; + uint32_t m_Increment; + volatile uint32_t& m_Variable; }; -class CMutexProtectedAdder : public ml::core::CThread -{ - public: - CMutexProtectedAdder(ml::core::CMutex &mutex, - uint32_t sleepTime, - uint32_t iterations, - uint32_t increment, - volatile uint32_t &variable) - : m_Mutex(mutex), - m_SleepTime(sleepTime), - m_Iterations(iterations), - m_Increment(increment), - m_Variable(variable) - { - } +class CMutexProtectedAdder : public ml::core::CThread { +public: + CMutexProtectedAdder(ml::core::CMutex& mutex, uint32_t sleepTime, uint32_t iterations, uint32_t increment, volatile uint32_t& variable) + : m_Mutex(mutex), m_SleepTime(sleepTime), m_Iterations(iterations), m_Increment(increment), m_Variable(variable) {} - protected: - void run() - { - for (uint32_t count = 0; count < m_Iterations; ++count) - { - ml::core::CScopedLock lock(m_Mutex); +protected: + void run() { + for (uint32_t count = 0; count < m_Iterations; ++count) { + ml::core::CScopedLock lock(m_Mutex); - m_Variable += m_Increment; - ml::core::CSleep::sleep(m_SleepTime); - } + m_Variable += m_Increment; + ml::core::CSleep::sleep(m_SleepTime); } + } - void shutdown() - { - // Always just wait for run() to complete - } + void shutdown() { + // Always just wait for run() to complete + } - private: - ml::core::CMutex &m_Mutex; - uint32_t m_SleepTime; - uint32_t m_Iterations; - uint32_t m_Increment; - volatile uint32_t &m_Variable; +private: + ml::core::CMutex& m_Mutex; + uint32_t m_SleepTime; + uint32_t m_Iterations; + uint32_t m_Increment; + volatile uint32_t& m_Variable; }; -class CWriteLockProtectedAdder : public ml::core::CThread -{ - public: - CWriteLockProtectedAdder(ml::core::CReadWriteLock &readWriteLock, - uint32_t sleepTime, - uint32_t iterations, - uint32_t increment, - volatile uint32_t &variable) - : m_ReadWriteLock(readWriteLock), - m_SleepTime(sleepTime), - m_Iterations(iterations), - m_Increment(increment), - m_Variable(variable) - { - } +class CWriteLockProtectedAdder : public ml::core::CThread { +public: + CWriteLockProtectedAdder(ml::core::CReadWriteLock& readWriteLock, + uint32_t sleepTime, + uint32_t iterations, + uint32_t increment, + volatile uint32_t& variable) + : m_ReadWriteLock(readWriteLock), m_SleepTime(sleepTime), m_Iterations(iterations), m_Increment(increment), m_Variable(variable) {} - protected: - void run() - { - for (uint32_t count = 0; count < m_Iterations; ++count) - { - ml::core::CScopedWriteLock lock(m_ReadWriteLock); +protected: + void run() { + for (uint32_t count = 0; count < m_Iterations; ++count) { + ml::core::CScopedWriteLock lock(m_ReadWriteLock); - m_Variable += m_Increment; - ml::core::CSleep::sleep(m_SleepTime); - } + m_Variable += m_Increment; + ml::core::CSleep::sleep(m_SleepTime); } + } - void shutdown() - { - // Always just wait for run() to complete - } + void shutdown() { + // Always just wait for run() to complete + } - private: - ml::core::CReadWriteLock &m_ReadWriteLock; - uint32_t m_SleepTime; - uint32_t m_Iterations; - uint32_t m_Increment; - volatile uint32_t &m_Variable; +private: + ml::core::CReadWriteLock& m_ReadWriteLock; + uint32_t m_SleepTime; + uint32_t m_Iterations; + uint32_t m_Increment; + volatile uint32_t& m_Variable; }; -class CReadLockProtectedReader : public ml::core::CThread -{ - public: - CReadLockProtectedReader(ml::core::CReadWriteLock &readWriteLock, - uint32_t sleepTime, - uint32_t iterations, - volatile uint32_t &variable) - : m_ReadWriteLock(readWriteLock), - m_SleepTime(sleepTime), - m_Iterations(iterations), - m_Variable(variable), - m_LastRead(variable) - { - } +class CReadLockProtectedReader : public ml::core::CThread { +public: + CReadLockProtectedReader(ml::core::CReadWriteLock& readWriteLock, uint32_t sleepTime, uint32_t iterations, volatile uint32_t& variable) + : m_ReadWriteLock(readWriteLock), m_SleepTime(sleepTime), m_Iterations(iterations), m_Variable(variable), m_LastRead(variable) {} - uint32_t lastRead() const - { - return m_LastRead; - } + uint32_t lastRead() const { return m_LastRead; } - protected: - void run() - { - for (uint32_t count = 0; count < m_Iterations; ++count) - { - ml::core::CScopedReadLock lock(m_ReadWriteLock); +protected: + void run() { + for (uint32_t count = 0; count < m_Iterations; ++count) { + ml::core::CScopedReadLock lock(m_ReadWriteLock); - m_LastRead = m_Variable; - ml::core::CSleep::sleep(m_SleepTime); - } + m_LastRead = m_Variable; + ml::core::CSleep::sleep(m_SleepTime); } + } - void shutdown() - { - // Always just wait for run() to complete - } + void shutdown() { + // Always just wait for run() to complete + } - private: - ml::core::CReadWriteLock &m_ReadWriteLock; - uint32_t m_SleepTime; - uint32_t m_Iterations; - volatile uint32_t &m_Variable; - uint32_t m_LastRead; +private: + ml::core::CReadWriteLock& m_ReadWriteLock; + uint32_t m_SleepTime; + uint32_t m_Iterations; + volatile uint32_t& m_Variable; + uint32_t m_LastRead; }; - - } - -void CReadWriteLockTest::testReadLock() -{ +void CReadWriteLockTest::testReadLock() { uint32_t testVariable(0); ml::core::CReadWriteLock readWriteLock; @@ -328,8 +242,7 @@ void CReadWriteLockTest::testReadLock() CPPUNIT_ASSERT_EQUAL(testVariable, reader3.lastRead()); } -void CReadWriteLockTest::testWriteLock() -{ +void CReadWriteLockTest::testWriteLock() { static const uint32_t TEST_SIZE(50000); uint32_t testVariable(0); @@ -347,22 +260,19 @@ void CReadWriteLockTest::testWriteLock() writer2.stop(); writer3.stop(); - LOG_INFO("Write lock protected variable incremented to " << - testVariable); + LOG_INFO("Write lock protected variable incremented to " << testVariable); CPPUNIT_ASSERT_EQUAL(TEST_SIZE * (1 + 5 + 9), testVariable); } -void CReadWriteLockTest::testPerformanceVersusMutex() -{ +void CReadWriteLockTest::testPerformanceVersusMutex() { static const uint32_t TEST_SIZE(1000000); { uint32_t testVariable(0); ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO("Starting unlocked throughput test at " << - ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO("Starting unlocked throughput test at " << ml::core::CTimeUtils::toTimeString(start)); CUnprotectedAdder writer1(0, TEST_SIZE, 1, testVariable); CUnprotectedAdder writer2(0, TEST_SIZE, 5, testVariable); @@ -377,16 +287,13 @@ void CReadWriteLockTest::testPerformanceVersusMutex() writer3.stop(); ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO("Finished unlocked throughput test at " << - ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO("Finished unlocked throughput test at " << ml::core::CTimeUtils::toTimeString(end)); - LOG_INFO("Unlocked throughput test with test size " << - TEST_SIZE << " took " << (end - start) << " seconds"); + LOG_INFO("Unlocked throughput test with test size " << TEST_SIZE << " took " << (end - start) << " seconds"); LOG_INFO("Unlocked variable incremented to " << testVariable); - if (testVariable != TEST_SIZE * (1 + 5 + 9)) - { + if (testVariable != TEST_SIZE * (1 + 5 + 9)) { // Obviously this would be unacceptable in production code, but this // unit test is showing the cost of different types of lock compared // to the unlocked case @@ -397,8 +304,7 @@ void CReadWriteLockTest::testPerformanceVersusMutex() std::atomic_uint_fast32_t testVariable(0); ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO("Starting atomic throughput test at " << - ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO("Starting atomic throughput test at " << ml::core::CTimeUtils::toTimeString(start)); CAtomicAdder writer1(0, TEST_SIZE, 1, testVariable); CAtomicAdder writer2(0, TEST_SIZE, 5, testVariable); @@ -413,11 +319,9 @@ void CReadWriteLockTest::testPerformanceVersusMutex() writer3.stop(); ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO("Finished atomic throughput test at " << - ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO("Finished atomic throughput test at " << ml::core::CTimeUtils::toTimeString(end)); - LOG_INFO("Atomic throughput test with test size " << - TEST_SIZE << " took " << (end - start) << " seconds"); + LOG_INFO("Atomic throughput test with test size " << TEST_SIZE << " took " << (end - start) << " seconds"); LOG_INFO("Atomic variable incremented to " << testVariable.load()); @@ -428,8 +332,7 @@ void CReadWriteLockTest::testPerformanceVersusMutex() ml::core::CFastMutex mutex; ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO("Starting fast mutex lock throughput test at " << - ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO("Starting fast mutex lock throughput test at " << ml::core::CTimeUtils::toTimeString(start)); CFastMutexProtectedAdder writer1(mutex, 0, TEST_SIZE, 1, testVariable); CFastMutexProtectedAdder writer2(mutex, 0, TEST_SIZE, 5, testVariable); @@ -444,14 +347,11 @@ void CReadWriteLockTest::testPerformanceVersusMutex() writer3.stop(); ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO("Finished fast mutex lock throughput test at " << - ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO("Finished fast mutex lock throughput test at " << ml::core::CTimeUtils::toTimeString(end)); - LOG_INFO("Fast mutex lock throughput test with test size " << - TEST_SIZE << " took " << (end - start) << " seconds"); + LOG_INFO("Fast mutex lock throughput test with test size " << TEST_SIZE << " took " << (end - start) << " seconds"); - LOG_INFO("Fast mutex lock protected variable incremented to " << - testVariable); + LOG_INFO("Fast mutex lock protected variable incremented to " << testVariable); CPPUNIT_ASSERT_EQUAL(TEST_SIZE * (1 + 5 + 9), testVariable); } @@ -460,8 +360,7 @@ void CReadWriteLockTest::testPerformanceVersusMutex() ml::core::CMutex mutex; ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO("Starting mutex lock throughput test at " << - ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO("Starting mutex lock throughput test at " << ml::core::CTimeUtils::toTimeString(start)); CMutexProtectedAdder writer1(mutex, 0, TEST_SIZE, 1, testVariable); CMutexProtectedAdder writer2(mutex, 0, TEST_SIZE, 5, testVariable); @@ -476,14 +375,11 @@ void CReadWriteLockTest::testPerformanceVersusMutex() writer3.stop(); ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO("Finished mutex lock throughput test at " << - ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO("Finished mutex lock throughput test at " << ml::core::CTimeUtils::toTimeString(end)); - LOG_INFO("Mutex lock throughput test with test size " << - TEST_SIZE << " took " << (end - start) << " seconds"); + LOG_INFO("Mutex lock throughput test with test size " << TEST_SIZE << " took " << (end - start) << " seconds"); - LOG_INFO("Mutex lock protected variable incremented to " << - testVariable); + LOG_INFO("Mutex lock protected variable incremented to " << testVariable); CPPUNIT_ASSERT_EQUAL(TEST_SIZE * (1 + 5 + 9), testVariable); } @@ -492,8 +388,7 @@ void CReadWriteLockTest::testPerformanceVersusMutex() ml::core::CReadWriteLock readWriteLock; ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO("Starting read-write lock throughput test at " << - ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO("Starting read-write lock throughput test at " << ml::core::CTimeUtils::toTimeString(start)); CWriteLockProtectedAdder writer1(readWriteLock, 0, TEST_SIZE, 1, testVariable); CWriteLockProtectedAdder writer2(readWriteLock, 0, TEST_SIZE, 5, testVariable); @@ -508,16 +403,12 @@ void CReadWriteLockTest::testPerformanceVersusMutex() writer3.stop(); ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO("Finished read-write lock throughput test at " << - ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO("Finished read-write lock throughput test at " << ml::core::CTimeUtils::toTimeString(end)); - LOG_INFO("Read-write lock throughput test with test size " << - TEST_SIZE << " took " << (end - start) << " seconds"); + LOG_INFO("Read-write lock throughput test with test size " << TEST_SIZE << " took " << (end - start) << " seconds"); - LOG_INFO("Write lock protected variable incremented to " << - testVariable); + LOG_INFO("Write lock protected variable incremented to " << testVariable); CPPUNIT_ASSERT_EQUAL(TEST_SIZE * (1 + 5 + 9), testVariable); } } - diff --git a/lib/core/unittest/CReadWriteLockTest.h b/lib/core/unittest/CReadWriteLockTest.h index d8c772a8e7..1d281d2a4d 100644 --- a/lib/core/unittest/CReadWriteLockTest.h +++ b/lib/core/unittest/CReadWriteLockTest.h @@ -8,16 +8,13 @@ #include +class CReadWriteLockTest : public CppUnit::TestFixture { +public: + void testReadLock(); + void testWriteLock(); + void testPerformanceVersusMutex(); -class CReadWriteLockTest : public CppUnit::TestFixture -{ - public: - void testReadLock(); - void testWriteLock(); - void testPerformanceVersusMutex(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CReadWriteLockTest_h - diff --git a/lib/core/unittest/CRegexFilterTest.cc b/lib/core/unittest/CRegexFilterTest.cc index 75f2c92b0c..222cd3acff 100644 --- a/lib/core/unittest/CRegexFilterTest.cc +++ b/lib/core/unittest/CRegexFilterTest.cc @@ -8,32 +8,24 @@ #include #include - -CppUnit::Test *CRegexFilterTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CRegexFilterTest"); - - suiteOfTests->addTest(new CppUnit::TestCaller( - "CRegexFilterTest::testConfigure_GivenInvalidRegex", - &CRegexFilterTest::testConfigure_GivenInvalidRegex) ); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CRegexFilterTest::testApply_GivenEmptyFilter", - &CRegexFilterTest::testApply_GivenEmptyFilter) ); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CRegexFilterTest::testApply_GivenSingleMatchAllRegex", - &CRegexFilterTest::testApply_GivenSingleMatchAllRegex) ); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CRegexFilterTest::testApply_GivenSingleRegex", - &CRegexFilterTest::testApply_GivenSingleRegex) ); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CRegexFilterTest::testApply_GivenMultipleRegex", - &CRegexFilterTest::testApply_GivenMultipleRegex) ); +CppUnit::Test* CRegexFilterTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CRegexFilterTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CRegexFilterTest::testConfigure_GivenInvalidRegex", + &CRegexFilterTest::testConfigure_GivenInvalidRegex)); + suiteOfTests->addTest(new CppUnit::TestCaller("CRegexFilterTest::testApply_GivenEmptyFilter", + &CRegexFilterTest::testApply_GivenEmptyFilter)); + suiteOfTests->addTest(new CppUnit::TestCaller("CRegexFilterTest::testApply_GivenSingleMatchAllRegex", + &CRegexFilterTest::testApply_GivenSingleMatchAllRegex)); + suiteOfTests->addTest(new CppUnit::TestCaller("CRegexFilterTest::testApply_GivenSingleRegex", + &CRegexFilterTest::testApply_GivenSingleRegex)); + suiteOfTests->addTest(new CppUnit::TestCaller("CRegexFilterTest::testApply_GivenMultipleRegex", + &CRegexFilterTest::testApply_GivenMultipleRegex)); return suiteOfTests; } -void CRegexFilterTest::testConfigure_GivenInvalidRegex() -{ +void CRegexFilterTest::testConfigure_GivenInvalidRegex() { std::vector regexVector; regexVector.push_back(std::string(".*")); regexVector.push_back(std::string("(")); @@ -43,16 +35,14 @@ void CRegexFilterTest::testConfigure_GivenInvalidRegex() CPPUNIT_ASSERT(filter.empty()); } -void CRegexFilterTest::testApply_GivenEmptyFilter() -{ +void CRegexFilterTest::testApply_GivenEmptyFilter() { ml::core::CRegexFilter filter; CPPUNIT_ASSERT(filter.empty()); CPPUNIT_ASSERT_EQUAL(std::string("foo"), filter.apply(std::string("foo"))); } -void CRegexFilterTest::testApply_GivenSingleMatchAllRegex() -{ +void CRegexFilterTest::testApply_GivenSingleMatchAllRegex() { std::vector regexVector; regexVector.push_back(std::string(".*")); @@ -62,8 +52,7 @@ void CRegexFilterTest::testApply_GivenSingleMatchAllRegex() CPPUNIT_ASSERT_EQUAL(std::string(), filter.apply(std::string("foo"))); } -void CRegexFilterTest::testApply_GivenSingleRegex() -{ +void CRegexFilterTest::testApply_GivenSingleRegex() { std::vector regexVector; regexVector.push_back(std::string("f")); @@ -73,8 +62,7 @@ void CRegexFilterTest::testApply_GivenSingleRegex() CPPUNIT_ASSERT_EQUAL(std::string("a"), filter.apply(std::string("fffa"))); } -void CRegexFilterTest::testApply_GivenMultipleRegex() -{ +void CRegexFilterTest::testApply_GivenMultipleRegex() { std::vector regexVector; regexVector.push_back(std::string("f[o]+")); regexVector.push_back(std::string("bar")); diff --git a/lib/core/unittest/CRegexFilterTest.h b/lib/core/unittest/CRegexFilterTest.h index f72ab98b6c..f3ff8e1553 100644 --- a/lib/core/unittest/CRegexFilterTest.h +++ b/lib/core/unittest/CRegexFilterTest.h @@ -8,16 +8,15 @@ #include -class CRegexFilterTest : public CppUnit::TestFixture -{ - public: - void testConfigure_GivenInvalidRegex(); - void testApply_GivenEmptyFilter(); - void testApply_GivenSingleMatchAllRegex(); - void testApply_GivenSingleRegex(); - void testApply_GivenMultipleRegex(); +class CRegexFilterTest : public CppUnit::TestFixture { +public: + void testConfigure_GivenInvalidRegex(); + void testApply_GivenEmptyFilter(); + void testApply_GivenSingleMatchAllRegex(); + void testApply_GivenSingleRegex(); + void testApply_GivenMultipleRegex(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CRegexFilterTest_h diff --git a/lib/core/unittest/CRegexTest.cc b/lib/core/unittest/CRegexTest.cc index 22836d1f78..2226ca726a 100644 --- a/lib/core/unittest/CRegexTest.cc +++ b/lib/core/unittest/CRegexTest.cc @@ -8,38 +8,21 @@ #include #include +CppUnit::Test* CRegexTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CRegexTest"); -CppUnit::Test *CRegexTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CRegexTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CRegexTest::testInit", - &CRegexTest::testInit) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CRegexTest::testSearch", - &CRegexTest::testSearch) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CRegexTest::testSplit", - &CRegexTest::testSplit) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CRegexTest::testTokenise1", - &CRegexTest::testTokenise1) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CRegexTest::testTokenise2", - &CRegexTest::testTokenise2) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CRegexTest::testEscape", - &CRegexTest::testEscape) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CRegexTest::testLiteralCount", - &CRegexTest::testLiteralCount) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CRegexTest::testInit", &CRegexTest::testInit)); + suiteOfTests->addTest(new CppUnit::TestCaller("CRegexTest::testSearch", &CRegexTest::testSearch)); + suiteOfTests->addTest(new CppUnit::TestCaller("CRegexTest::testSplit", &CRegexTest::testSplit)); + suiteOfTests->addTest(new CppUnit::TestCaller("CRegexTest::testTokenise1", &CRegexTest::testTokenise1)); + suiteOfTests->addTest(new CppUnit::TestCaller("CRegexTest::testTokenise2", &CRegexTest::testTokenise2)); + suiteOfTests->addTest(new CppUnit::TestCaller("CRegexTest::testEscape", &CRegexTest::testEscape)); + suiteOfTests->addTest(new CppUnit::TestCaller("CRegexTest::testLiteralCount", &CRegexTest::testLiteralCount)); return suiteOfTests; } -void CRegexTest::testInit() -{ +void CRegexTest::testInit() { { std::string regexStr = "[[:digit: ] )"; @@ -87,18 +70,19 @@ void CRegexTest::testInit() CPPUNIT_ASSERT(regex.init(regexStr)); CPPUNIT_ASSERT_EQUAL(regexStr, regex.str()); - CPPUNIT_ASSERT(regex.matches(" ")); + CPPUNIT_ASSERT( + regex.matches(" ")); } { // Uninitialised std::string regexStr = "<.*"; ml::core::CRegex regex; - CPPUNIT_ASSERT(!regex.matches(" ")); + CPPUNIT_ASSERT( + !regex.matches(" ")); } } -void CRegexTest::testSearch() -{ +void CRegexTest::testSearch() { { // Uninitialised ml::core::CRegex regex; @@ -154,8 +138,7 @@ void CRegexTest::testSearch() } } -void CRegexTest::testTokenise1() -{ +void CRegexTest::testTokenise1() { std::string str1(" Transport node error on node 0x1234"); std::string str2(" Transport read error (8) on node 0x1235"); @@ -201,8 +184,7 @@ void CRegexTest::testTokenise1() CPPUNIT_ASSERT(regex.matches(str1)); CPPUNIT_ASSERT(regex.tokenise(str1, tokens)); - for (ml::core::CRegex::TStrVec::iterator itr = tokens.begin(); itr != tokens.end(); ++itr) - { + for (ml::core::CRegex::TStrVec::iterator itr = tokens.begin(); itr != tokens.end(); ++itr) { LOG_DEBUG("'" << *itr << "'"); } @@ -228,8 +210,7 @@ void CRegexTest::testTokenise1() CPPUNIT_ASSERT(regex.matches(str2)); CPPUNIT_ASSERT(regex.tokenise(str2, tokens)); - for (ml::core::CRegex::TStrVec::iterator itr = tokens.begin(); itr != tokens.end(); ++itr) - { + for (ml::core::CRegex::TStrVec::iterator itr = tokens.begin(); itr != tokens.end(); ++itr) { LOG_DEBUG("'" << *itr << "'"); } @@ -237,10 +218,12 @@ void CRegexTest::testTokenise1() CPPUNIT_ASSERT(!regex.tokenise(str1, tokens)); } - std::string str3("Sep 10, 2009 3:54:12 AM org.apache.tomcat.util.http.Parameters processParameters\r\nWARNING: Parameters: Invalid chunk ignored."); + std::string str3( + "Sep 10, 2009 3:54:12 AM org.apache.tomcat.util.http.Parameters processParameters\r\nWARNING: Parameters: Invalid chunk ignored."); { - std::string regexStr("(\\w+\\s+\\d+,\\s+\\d+\\s+\\d+:\\d+:\\d+\\s+\\w+)\\s*([[:alnum:].]+)\\s*(\\w+)\\r?\\n(INFO|WARNING|SEVERE|DEBUG|FATAL): Parameters: Invalid chunk ignored\\.\\s*"); + std::string regexStr("(\\w+\\s+\\d+,\\s+\\d+\\s+\\d+:\\d+:\\d+\\s+\\w+)\\s*([[:alnum:].]+)\\s*(\\w+)\\r?\\n(INFO|WARNING|SEVERE|" + "DEBUG|FATAL): Parameters: Invalid chunk ignored\\.\\s*"); ml::core::CRegex regex; @@ -251,8 +234,7 @@ void CRegexTest::testTokenise1() CPPUNIT_ASSERT(regex.matches(str3)); CPPUNIT_ASSERT(regex.tokenise(str3, tokens)); - for (ml::core::CRegex::TStrVec::iterator itr = tokens.begin(); itr != tokens.end(); ++itr) - { + for (ml::core::CRegex::TStrVec::iterator itr = tokens.begin(); itr != tokens.end(); ++itr) { LOG_DEBUG("'" << *itr << "'"); } } @@ -271,15 +253,13 @@ void CRegexTest::testTokenise1() CPPUNIT_ASSERT(regex.matches(str4)); CPPUNIT_ASSERT(regex.tokenise(str4, tokens)); - for (ml::core::CRegex::TStrVec::iterator itr = tokens.begin(); itr != tokens.end(); ++itr) - { + for (ml::core::CRegex::TStrVec::iterator itr = tokens.begin(); itr != tokens.end(); ++itr) { LOG_DEBUG("'" << *itr << "'"); } } } -void CRegexTest::testTokenise2() -{ +void CRegexTest::testTokenise2() { std::string regexStr("(.+?)(?:\\((.*?)\\))?"); ml::core::CRegex regex; @@ -315,8 +295,7 @@ void CRegexTest::testTokenise2() CPPUNIT_ASSERT(tokens[1].empty()); } -void CRegexTest::testSplit() -{ +void CRegexTest::testSplit() { std::string str1(" Transport node error on node 0x1234"); std::string str2(" Transport read error (8) on node 0x1235"); @@ -341,23 +320,20 @@ void CRegexTest::testSplit() CPPUNIT_ASSERT(regex.split(str1, tokens)); - for (ml::core::CRegex::TStrVec::iterator itr = tokens.begin(); itr != tokens.end(); ++itr) - { + for (ml::core::CRegex::TStrVec::iterator itr = tokens.begin(); itr != tokens.end(); ++itr) { LOG_DEBUG("'" << *itr << "'"); } } } -void CRegexTest::testEscape() -{ +void CRegexTest::testEscape() { CPPUNIT_ASSERT_EQUAL(std::string("\\.\\.\\."), ml::core::CRegex::escapeRegexSpecial("...")); CPPUNIT_ASSERT_EQUAL(std::string("hello"), ml::core::CRegex::escapeRegexSpecial("hello")); CPPUNIT_ASSERT_EQUAL(std::string("\\)hello\\(\\n\\^"), ml::core::CRegex::escapeRegexSpecial(")hello(\n^")); CPPUNIT_ASSERT_EQUAL(std::string("\\)hello\\(\\r?\\n\\^"), ml::core::CRegex::escapeRegexSpecial(")hello(\r\n^")); } -void CRegexTest::testLiteralCount() -{ +void CRegexTest::testLiteralCount() { { // Uninitialised ml::core::CRegex regex; @@ -453,4 +429,3 @@ void CRegexTest::testLiteralCount() CPPUNIT_ASSERT_EQUAL(size_t(0), regex.literalCount()); } } - diff --git a/lib/core/unittest/CRegexTest.h b/lib/core/unittest/CRegexTest.h index f75f4af02c..dfe3484e15 100644 --- a/lib/core/unittest/CRegexTest.h +++ b/lib/core/unittest/CRegexTest.h @@ -8,18 +8,17 @@ #include -class CRegexTest : public CppUnit::TestFixture -{ - public: - void testInit(); - void testSearch(); - void testSplit(); - void testTokenise1(); - void testTokenise2(); - void testEscape(); - void testLiteralCount(); +class CRegexTest : public CppUnit::TestFixture { +public: + void testInit(); + void testSearch(); + void testSplit(); + void testTokenise1(); + void testTokenise2(); + void testEscape(); + void testLiteralCount(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CRegexTest_h diff --git a/lib/core/unittest/CResourceLocatorTest.cc b/lib/core/unittest/CResourceLocatorTest.cc index 2c6847f692..8e4fdde3d3 100644 --- a/lib/core/unittest/CResourceLocatorTest.cc +++ b/lib/core/unittest/CResourceLocatorTest.cc @@ -9,26 +9,20 @@ #include #include +CppUnit::Test* CResourceLocatorTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CResourceLocatorTest"); -CppUnit::Test *CResourceLocatorTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CResourceLocatorTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CResourceLocatorTest::testResourceDir", - &CResourceLocatorTest::testResourceDir) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CResourceLocatorTest::testLogDir", - &CResourceLocatorTest::testLogDir) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CResourceLocatorTest::testSrcRootDir", - &CResourceLocatorTest::testSrcRootDir) ); + suiteOfTests->addTest( + new CppUnit::TestCaller("CResourceLocatorTest::testResourceDir", &CResourceLocatorTest::testResourceDir)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CResourceLocatorTest::testLogDir", &CResourceLocatorTest::testLogDir)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CResourceLocatorTest::testSrcRootDir", &CResourceLocatorTest::testSrcRootDir)); return suiteOfTests; } -void CResourceLocatorTest::testResourceDir() -{ +void CResourceLocatorTest::testResourceDir() { std::string resourceDir(ml::core::CResourceLocator::resourceDir()); LOG_DEBUG("Resource directory is " << resourceDir); @@ -37,8 +31,7 @@ void CResourceLocatorTest::testResourceDir() CPPUNIT_ASSERT_EQUAL(0, ml::core::COsFileFuncs::stat((resourceDir + "/ml-en.dict").c_str(), &buf)); } -void CResourceLocatorTest::testLogDir() -{ +void CResourceLocatorTest::testLogDir() { std::string logDir(ml::core::CResourceLocator::logDir()); LOG_DEBUG("Log directory is " << logDir); @@ -46,8 +39,7 @@ void CResourceLocatorTest::testLogDir() // we're an Elasticsearch plugin } -void CResourceLocatorTest::testSrcRootDir() -{ +void CResourceLocatorTest::testSrcRootDir() { std::string cppRootDir(ml::core::CResourceLocator::cppRootDir()); LOG_DEBUG("C++ root directory is " << cppRootDir); @@ -55,4 +47,3 @@ void CResourceLocatorTest::testSrcRootDir() ml::core::COsFileFuncs::TStat buf; CPPUNIT_ASSERT_EQUAL(0, ml::core::COsFileFuncs::stat((cppRootDir + "/set_env.sh").c_str(), &buf)); } - diff --git a/lib/core/unittest/CResourceLocatorTest.h b/lib/core/unittest/CResourceLocatorTest.h index dd2cf344bf..38a48ff594 100644 --- a/lib/core/unittest/CResourceLocatorTest.h +++ b/lib/core/unittest/CResourceLocatorTest.h @@ -8,16 +8,13 @@ #include +class CResourceLocatorTest : public CppUnit::TestFixture { +public: + void testResourceDir(); + void testLogDir(); + void testSrcRootDir(); -class CResourceLocatorTest : public CppUnit::TestFixture -{ - public: - void testResourceDir(); - void testLogDir(); - void testSrcRootDir(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CResourceLocatorTest_h - diff --git a/lib/core/unittest/CShellArgQuoterTest.cc b/lib/core/unittest/CShellArgQuoterTest.cc index 21bea7c1b4..d2221d9918 100644 --- a/lib/core/unittest/CShellArgQuoterTest.cc +++ b/lib/core/unittest/CShellArgQuoterTest.cc @@ -8,36 +8,55 @@ #include #include +CppUnit::Test* CShellArgQuoterTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CShellArgQuoterTest"); -CppUnit::Test *CShellArgQuoterTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CShellArgQuoterTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CShellArgQuoterTest::testQuote", - &CShellArgQuoterTest::testQuote) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CShellArgQuoterTest::testQuote", &CShellArgQuoterTest::testQuote)); return suiteOfTests; } -void CShellArgQuoterTest::testQuote() -{ +void CShellArgQuoterTest::testQuote() { LOG_DEBUG("\n" - "echo " << ml::core::CShellArgQuoter::quote("") << "\n" - "echo " << ml::core::CShellArgQuoter::quote("hello") << "\n" - "echo " << ml::core::CShellArgQuoter::quote("\"hello\" there") << "\n" - "echo " << ml::core::CShellArgQuoter::quote("'hello' there") << "\n" - "echo " << ml::core::CShellArgQuoter::quote("hello! there") << "\n" - "echo " << ml::core::CShellArgQuoter::quote("don't want this to fail!") << "\n" - "echo " << ml::core::CShellArgQuoter::quote("don't want this expanded: $HOME") << "\n" - "echo " << ml::core::CShellArgQuoter::quote("don't want this expanded: %windir%") << "\n" - "echo " << ml::core::CShellArgQuoter::quote("don't want this expanded: \"$HOME\"") << "\n" - "echo " << ml::core::CShellArgQuoter::quote("don't want this expanded: \"%windir%\"") << "\n" - "echo " << ml::core::CShellArgQuoter::quote("don't want this expanded: '$HOME'") << "\n" - "echo " << ml::core::CShellArgQuoter::quote("don't want this expanded: '%windir%'") << "\n" - "echo " << ml::core::CShellArgQuoter::quote("top ^ hat!")); + "echo " + << ml::core::CShellArgQuoter::quote("") + << "\n" + "echo " + << ml::core::CShellArgQuoter::quote("hello") + << "\n" + "echo " + << ml::core::CShellArgQuoter::quote("\"hello\" there") + << "\n" + "echo " + << ml::core::CShellArgQuoter::quote("'hello' there") + << "\n" + "echo " + << ml::core::CShellArgQuoter::quote("hello! there") + << "\n" + "echo " + << ml::core::CShellArgQuoter::quote("don't want this to fail!") + << "\n" + "echo " + << ml::core::CShellArgQuoter::quote("don't want this expanded: $HOME") + << "\n" + "echo " + << ml::core::CShellArgQuoter::quote("don't want this expanded: %windir%") + << "\n" + "echo " + << ml::core::CShellArgQuoter::quote("don't want this expanded: \"$HOME\"") + << "\n" + "echo " + << ml::core::CShellArgQuoter::quote("don't want this expanded: \"%windir%\"") + << "\n" + "echo " + << ml::core::CShellArgQuoter::quote("don't want this expanded: '$HOME'") + << "\n" + "echo " + << ml::core::CShellArgQuoter::quote("don't want this expanded: '%windir%'") + << "\n" + "echo " + << ml::core::CShellArgQuoter::quote("top ^ hat!")); // Paste the output of the above into a command prompt and check what // happens... } - diff --git a/lib/core/unittest/CShellArgQuoterTest.h b/lib/core/unittest/CShellArgQuoterTest.h index c050b9380e..145923280b 100644 --- a/lib/core/unittest/CShellArgQuoterTest.h +++ b/lib/core/unittest/CShellArgQuoterTest.h @@ -8,14 +8,11 @@ #include +class CShellArgQuoterTest : public CppUnit::TestFixture { +public: + void testQuote(); -class CShellArgQuoterTest : public CppUnit::TestFixture -{ - public: - void testQuote(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CShellArgQuoterTest_h - diff --git a/lib/core/unittest/CSleepTest.cc b/lib/core/unittest/CSleepTest.cc index 5e6550fc6e..e72b0bcc71 100644 --- a/lib/core/unittest/CSleepTest.cc +++ b/lib/core/unittest/CSleepTest.cc @@ -6,24 +6,19 @@ #include "CSleepTest.h" #include -#include #include #include +#include +CppUnit::Test* CSleepTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CSleepTest"); -CppUnit::Test *CSleepTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CSleepTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSleepTest::testSleep", - &CSleepTest::testSleep) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CSleepTest::testSleep", &CSleepTest::testSleep)); return suiteOfTests; } -void CSleepTest::testSleep() -{ +void CSleepTest::testSleep() { ml::core_t::TTime start(ml::core::CTimeUtils::now()); ml::core::CSleep::sleep(7500); @@ -31,11 +26,9 @@ void CSleepTest::testSleep() ml::core_t::TTime end(ml::core::CTimeUtils::now()); ml::core_t::TTime diff(end - start); - LOG_DEBUG("During 7.5 second wait, the clock advanced by " << - diff << " seconds"); + LOG_DEBUG("During 7.5 second wait, the clock advanced by " << diff << " seconds"); // Clock time should be 7 or 8 seconds further ahead CPPUNIT_ASSERT(diff >= 7); CPPUNIT_ASSERT(diff <= 8); } - diff --git a/lib/core/unittest/CSleepTest.h b/lib/core/unittest/CSleepTest.h index ba5bcfef0a..fc7ce37c21 100644 --- a/lib/core/unittest/CSleepTest.h +++ b/lib/core/unittest/CSleepTest.h @@ -8,14 +8,11 @@ #include +class CSleepTest : public CppUnit::TestFixture { +public: + void testSleep(); -class CSleepTest : public CppUnit::TestFixture -{ - public: - void testSleep(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CSleepTest_h - diff --git a/lib/core/unittest/CSmallVectorTest.cc b/lib/core/unittest/CSmallVectorTest.cc index e8593a327a..203a3a1482 100644 --- a/lib/core/unittest/CSmallVectorTest.cc +++ b/lib/core/unittest/CSmallVectorTest.cc @@ -13,8 +13,7 @@ using namespace ml; -void CSmallVectorTest::testNonStandard() -{ +void CSmallVectorTest::testNonStandard() { using TDoubleVec = std::vector; using TDouble5Vec = core::CSmallVector; @@ -22,12 +21,10 @@ void CSmallVectorTest::testNonStandard() { TDoubleVec vec{0.1, 1.4, 7.4}; TDouble5Vec svec(vec); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(vec), - core::CContainerPrinter::print(svec)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(vec), core::CContainerPrinter::print(svec)); TDoubleVec cvec(svec); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(vec), - core::CContainerPrinter::print(cvec)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(vec), core::CContainerPrinter::print(cvec)); } // Test addition and subtraction. @@ -36,23 +33,19 @@ void CSmallVectorTest::testNonStandard() TDouble5Vec vec2{1.3, 1.6, 2.2, 1.6}; vec1 -= vec2; - CPPUNIT_ASSERT_EQUAL(std::string("[-0.3, 1.6, -0.8, 5.7]"), - core::CContainerPrinter::print(vec1)); + CPPUNIT_ASSERT_EQUAL(std::string("[-0.3, 1.6, -0.8, 5.7]"), core::CContainerPrinter::print(vec1)); vec1 += vec2; vec1 += vec2; - CPPUNIT_ASSERT_EQUAL(std::string("[2.3, 4.8, 3.6, 8.9]"), - core::CContainerPrinter::print(vec1)); + CPPUNIT_ASSERT_EQUAL(std::string("[2.3, 4.8, 3.6, 8.9]"), core::CContainerPrinter::print(vec1)); } } -CppUnit::Test *CSmallVectorTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CSmallVectorTest"); +CppUnit::Test* CSmallVectorTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CSmallVectorTest"); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSmallVectorTest::testNonStandard", - &CSmallVectorTest::testNonStandard) ); + suiteOfTests->addTest( + new CppUnit::TestCaller("CSmallVectorTest::testNonStandard", &CSmallVectorTest::testNonStandard)); return suiteOfTests; } diff --git a/lib/core/unittest/CSmallVectorTest.h b/lib/core/unittest/CSmallVectorTest.h index 74cc030436..3181952c0b 100644 --- a/lib/core/unittest/CSmallVectorTest.h +++ b/lib/core/unittest/CSmallVectorTest.h @@ -9,12 +9,11 @@ #include -class CSmallVectorTest : public CppUnit::TestFixture -{ - public: - void testNonStandard(); +class CSmallVectorTest : public CppUnit::TestFixture { +public: + void testNonStandard(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CSmallVectorTest_h diff --git a/lib/core/unittest/CStateCompressorTest.cc b/lib/core/unittest/CStateCompressorTest.cc index 6bbec6359f..770d3697fb 100644 --- a/lib/core/unittest/CStateCompressorTest.cc +++ b/lib/core/unittest/CStateCompressorTest.cc @@ -12,9 +12,9 @@ #include #include +#include #include #include -#include #include @@ -26,134 +26,96 @@ using TDistribution = boost::uniform_int<>; using TGenerator = boost::random::variate_generator; using TGeneratorItr = boost::generator_iterator; -namespace -{ +namespace { using TSizeStrMap = std::map; using TSizeStrMapCItr = TSizeStrMap::const_iterator; using TOStreamP = core::CDataAdder::TOStreamP; using TIStreamP = core::CDataSearcher::TIStreamP; -void insert3rdLevel(ml::core::CStatePersistInserter &inserter) -{ +void insert3rdLevel(ml::core::CStatePersistInserter& inserter) { inserter.insertValue("ssdrgad", 99999, ml::core::CIEEE754::E_SinglePrecision); inserter.insertValue("bbvczcvbdfb", "rrtw"); } -void insert2ndLevel(ml::core::CStatePersistInserter &inserter) -{ +void insert2ndLevel(ml::core::CStatePersistInserter& inserter) { inserter.insertValue("eerwq_dsf_dfsgh_h5dafg", 3.14, ml::core::CIEEE754::E_SinglePrecision); inserter.insertValue("level2B", 'z'); - for (std::size_t i = 0; i < 50; i++) - { + for (std::size_t i = 0; i < 50; i++) { inserter.insertLevel("hiawat" + core::CStringUtils::typeToString(i), &insert3rdLevel); } } -void insert1stLevel(ml::core::CStatePersistInserter &inserter, std::size_t n) -{ +void insert1stLevel(ml::core::CStatePersistInserter& inserter, std::size_t n) { inserter.insertValue("theFirstThing", "a"); inserter.insertValue("anItemThatComesNext", 25); - for (std::size_t i = 0; i < n; i++) - { + for (std::size_t i = 0; i < n; i++) { inserter.insertLevel("levelC" + core::CStringUtils::typeToString(i), &insert2ndLevel); } } -class CMockDataAdder : public ml::core::CDataAdder -{ - public: - CMockDataAdder(std::size_t maxDocSize) - : m_CurrentDocNum(0), - m_MaxDocumentSize(maxDocSize) - { - } +class CMockDataAdder : public ml::core::CDataAdder { +public: + CMockDataAdder(std::size_t maxDocSize) : m_CurrentDocNum(0), m_MaxDocumentSize(maxDocSize) {} - virtual TOStreamP addStreamed(const std::string &/*index*/, - const std::string &/*id*/) - { - ++m_CurrentDocNum; - m_CurrentStream = TOStreamP(new std::ostringstream); - return m_CurrentStream; - } + virtual TOStreamP addStreamed(const std::string& /*index*/, const std::string& /*id*/) { + ++m_CurrentDocNum; + m_CurrentStream = TOStreamP(new std::ostringstream); + return m_CurrentStream; + } - virtual bool streamComplete(TOStreamP &strm, - bool /*force*/) - { - CPPUNIT_ASSERT_EQUAL(m_CurrentStream, strm); - std::ostringstream *ss = dynamic_cast(m_CurrentStream.get()); - CPPUNIT_ASSERT(ss); - LOG_TRACE(ss->str()); - m_Data[m_CurrentDocNum] = ss->str(); - LOG_TRACE(m_Data[m_CurrentDocNum]); - return true; - } + virtual bool streamComplete(TOStreamP& strm, bool /*force*/) { + CPPUNIT_ASSERT_EQUAL(m_CurrentStream, strm); + std::ostringstream* ss = dynamic_cast(m_CurrentStream.get()); + CPPUNIT_ASSERT(ss); + LOG_TRACE(ss->str()); + m_Data[m_CurrentDocNum] = ss->str(); + LOG_TRACE(m_Data[m_CurrentDocNum]); + return true; + } - virtual std::size_t maxDocumentSize() const - { - return m_MaxDocumentSize; - } + virtual std::size_t maxDocumentSize() const { return m_MaxDocumentSize; } - const TSizeStrMap &data() const - { - return m_Data; - } + const TSizeStrMap& data() const { return m_Data; } - private: - TSizeStrMap m_Data; - std::size_t m_CurrentDocNum; - TOStreamP m_CurrentStream; - std::size_t m_MaxDocumentSize; +private: + TSizeStrMap m_Data; + std::size_t m_CurrentDocNum; + TOStreamP m_CurrentStream; + std::size_t m_MaxDocumentSize; }; -class CMockDataSearcher : public ml::core::CDataSearcher -{ - public: - CMockDataSearcher(CMockDataAdder &adder) : m_Adder(adder), m_AskedFor(0) - { - } - - virtual TIStreamP search(size_t /*currentDocNum*/, size_t /*limit*/) - { - TIStreamP stream; - const TSizeStrMap &events = m_Adder.data(); - - TSizeStrMapCItr iter = events.find(m_AskedFor + 1); - if (iter == events.end()) - { - // return a stream here that is in the fail state - stream.reset(new std::stringstream); - stream->setstate(std::ios_base::failbit); - } - else - { - stream.reset(new std::stringstream(iter->second)); - ++m_AskedFor; - } - return stream; - } - - std::size_t totalDocs() const - { - return m_Adder.data().size(); +class CMockDataSearcher : public ml::core::CDataSearcher { +public: + CMockDataSearcher(CMockDataAdder& adder) : m_Adder(adder), m_AskedFor(0) {} + + virtual TIStreamP search(size_t /*currentDocNum*/, size_t /*limit*/) { + TIStreamP stream; + const TSizeStrMap& events = m_Adder.data(); + + TSizeStrMapCItr iter = events.find(m_AskedFor + 1); + if (iter == events.end()) { + // return a stream here that is in the fail state + stream.reset(new std::stringstream); + stream->setstate(std::ios_base::failbit); + } else { + stream.reset(new std::stringstream(iter->second)); + ++m_AskedFor; } + return stream; + } - std::size_t askedFor() const - { - return m_AskedFor; - } + std::size_t totalDocs() const { return m_Adder.data().size(); } - private: + std::size_t askedFor() const { return m_AskedFor; } - CMockDataAdder &m_Adder; - std::size_t m_AskedFor; +private: + CMockDataAdder& m_Adder; + std::size_t m_AskedFor; }; - } - -void CStateCompressorTest::testForApiNoKey() -{ +void CStateCompressorTest::testForApiNoKey() { // This test verifies the basic operation of compressing and decompressing // some JSON data, using two simultaneous streams: one regular stringstream, // and one compress/decompress stream @@ -193,8 +155,7 @@ void CStateCompressorTest::testForApiNoKey() CPPUNIT_ASSERT_EQUAL(ref.size(), restored.size()); } -void CStateCompressorTest::testStreaming() -{ +void CStateCompressorTest::testStreaming() { // The purpose of this test is to add a reasonable block of data to the // compressed store, then read it back out and show that the data is // read in stream chunks, not all at once. CMockDataSearcher has a @@ -231,43 +192,37 @@ void CStateCompressorTest::testStreaming() CPPUNIT_ASSERT(mockKvSearcher.askedFor() > lastAskedFor); lastAskedFor = mockKvSearcher.askedFor(); - for (std::size_t i = 0; i < 5000; i++) - { + for (std::size_t i = 0; i < 5000; i++) { traverser.next(); } CPPUNIT_ASSERT(mockKvSearcher.askedFor() > lastAskedFor); lastAskedFor = mockKvSearcher.askedFor(); - for (std::size_t i = 0; i < 5000; i++) - { + for (std::size_t i = 0; i < 5000; i++) { traverser.next(); } CPPUNIT_ASSERT(mockKvSearcher.askedFor() > lastAskedFor); lastAskedFor = mockKvSearcher.askedFor(); - for (std::size_t i = 0; i < 5000; i++) - { + for (std::size_t i = 0; i < 5000; i++) { traverser.next(); } CPPUNIT_ASSERT(mockKvSearcher.askedFor() > lastAskedFor); lastAskedFor = mockKvSearcher.askedFor(); - for (std::size_t i = 0; i < 5000; i++) - { + for (std::size_t i = 0; i < 5000; i++) { traverser.next(); } CPPUNIT_ASSERT(mockKvSearcher.askedFor() > lastAskedFor); lastAskedFor = mockKvSearcher.askedFor(); - while (traverser.next()) - {}; + while (traverser.next()) { + }; LOG_TRACE("Asked for: " << mockKvSearcher.askedFor()); CPPUNIT_ASSERT_EQUAL(mockKvSearcher.askedFor(), mockKvAdder.data().size()); } } - -void CStateCompressorTest::testChunking() -{ +void CStateCompressorTest::testChunking() { // Put arbitrary string data into the stream, and stress different sizes // check CMockDataAdder with max doc sizes from 500 to 500000 @@ -275,21 +230,17 @@ void CStateCompressorTest::testChunking() TGenerator generator(rng, TDistribution(0, 254)); TGeneratorItr randItr(&generator); - for (std::size_t i = 500; i < 5000001; i *= 10) - { + for (std::size_t i = 500; i < 5000001; i *= 10) { // check string data from sizes 1 to 200000 - for (std::size_t j = 1; j < 2570000; j *= 4) - { + for (std::size_t j = 1; j < 2570000; j *= 4) { CMockDataAdder adder(i); std::ostringstream ss; std::string decompressed; - try - { + try { { ml::core::CStateCompressor compressor(adder); ml::core::CDataAdder::TOStreamP strm = compressor.addStreamed("1", ""); - for (std::size_t k = 0; k < j; k++) - { + for (std::size_t k = 0; k < j; k++) { char c = char(*randItr++); ss << c; (*strm) << c; @@ -303,9 +254,7 @@ void CStateCompressorTest::testChunking() std::istreambuf_iterator eos; decompressed.assign(std::istreambuf_iterator(*strm), eos); } - } - catch (std::exception &e) - { + } catch (std::exception& e) { LOG_DEBUG("Error in test case " << i << " / " << j << ": " << e.what()); LOG_DEBUG("String is: " << ss.str()); CPPUNIT_ASSERT(false); @@ -320,13 +269,11 @@ void CStateCompressorTest::testChunking() CMockDataAdder adder(0xffffffff); std::ostringstream ss; std::string decompressed; - try - { + try { { ml::core::CStateCompressor compressor(adder); ml::core::CDataAdder::TOStreamP strm = compressor.addStreamed("1", ""); - for (std::size_t k = 0; k < 100000000; k++) - { + for (std::size_t k = 0; k < 100000000; k++) { char c = char(*randItr++); ss << c; (*strm) << c; @@ -340,9 +287,7 @@ void CStateCompressorTest::testChunking() std::istreambuf_iterator eos; decompressed.assign(std::istreambuf_iterator(*strm), eos); } - } - catch (std::exception &e) - { + } catch (std::exception& e) { LOG_DEBUG("Error in test case " << e.what()); LOG_DEBUG("String is: " << ss.str()); CPPUNIT_ASSERT(false); @@ -351,19 +296,15 @@ void CStateCompressorTest::testChunking() } } -CppUnit::Test* CStateCompressorTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CStateCompressorTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CStateCompressorTest::testForApiNoKey", - &CStateCompressorTest::testForApiNoKey) ); - suiteOfTests->addTest( new CppUnit::TestCaller ( - "CStateCompressorTest::testStreaming", - &CStateCompressorTest::testStreaming) ); - suiteOfTests->addTest( new CppUnit::TestCaller ( - "CStateCompressorTest::testChunking", - &CStateCompressorTest::testChunking) ); +CppUnit::Test* CStateCompressorTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CStateCompressorTest"); + + suiteOfTests->addTest( + new CppUnit::TestCaller("CStateCompressorTest::testForApiNoKey", &CStateCompressorTest::testForApiNoKey)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CStateCompressorTest::testStreaming", &CStateCompressorTest::testStreaming)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CStateCompressorTest::testChunking", &CStateCompressorTest::testChunking)); return suiteOfTests; } diff --git a/lib/core/unittest/CStateCompressorTest.h b/lib/core/unittest/CStateCompressorTest.h index 72abcbc221..8983bfb65a 100644 --- a/lib/core/unittest/CStateCompressorTest.h +++ b/lib/core/unittest/CStateCompressorTest.h @@ -8,16 +8,14 @@ #include +class CStateCompressorTest : public CppUnit::TestFixture { +public: + void testForApiNoKey(); + void testStreaming(); + void testChunking(); + void testFile(); -class CStateCompressorTest : public CppUnit::TestFixture -{ - public: - void testForApiNoKey(); - void testStreaming(); - void testChunking(); - void testFile(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CStateCompressorTest_h diff --git a/lib/core/unittest/CStateMachineTest.cc b/lib/core/unittest/CStateMachineTest.cc index 32ac085d02..73a1296fc4 100644 --- a/lib/core/unittest/CStateMachineTest.cc +++ b/lib/core/unittest/CStateMachineTest.cc @@ -22,108 +22,75 @@ using namespace ml; -namespace -{ +namespace { using TSizeVec = std::vector; using TSizeVecVec = std::vector; using TStrVec = std::vector; -class CStateMachineClearer : core::CStateMachine -{ - public: - static void clear() - { - core::CStateMachine::clear(); - } +class CStateMachineClearer : core::CStateMachine { +public: + static void clear() { core::CStateMachine::clear(); } }; -struct SMachine -{ +struct SMachine { TStrVec s_Alphabet; TStrVec s_States; TSizeVecVec s_TransitionFunction; - bool operator==(const SMachine &rhs) const - { - return s_Alphabet == rhs.s_Alphabet - && s_States == rhs.s_States - && s_TransitionFunction == rhs.s_TransitionFunction; + bool operator==(const SMachine& rhs) const { + return s_Alphabet == rhs.s_Alphabet && s_States == rhs.s_States && s_TransitionFunction == rhs.s_TransitionFunction; } - bool operator<(const SMachine &rhs) const - { - return s_Alphabet < rhs.s_Alphabet - || (s_Alphabet == rhs.s_Alphabet && s_States < rhs.s_States) - || ( s_Alphabet == rhs.s_Alphabet - && s_States == rhs.s_States - && s_TransitionFunction < rhs.s_TransitionFunction); + bool operator<(const SMachine& rhs) const { + return s_Alphabet < rhs.s_Alphabet || (s_Alphabet == rhs.s_Alphabet && s_States < rhs.s_States) || + (s_Alphabet == rhs.s_Alphabet && s_States == rhs.s_States && s_TransitionFunction < rhs.s_TransitionFunction); } }; using TMachineVec = std::vector; -class CTestThread : public core::CThread -{ - public: - using TCppUnitExceptionP = boost::shared_ptr; - - public: - CTestThread(const TMachineVec &machines) : - m_Machines(machines), - m_Failures(0) - {} - - std::size_t failures() const - { - return m_Failures; - } - - const TSizeVec &states() const - { - return m_States; - } - - private: - virtual void run() - { - std::size_t n = 10000; - m_States.reserve(n); - TSizeVec machine; - for (std::size_t i = 0u; i < n; ++i) - { - m_Rng.generateUniformSamples(0, m_Machines.size(), 1, machine); - core::CStateMachine sm = core::CStateMachine::create(m_Machines[machine[0]].s_Alphabet, - m_Machines[machine[0]].s_States, - m_Machines[machine[0]].s_TransitionFunction, - 0); // initial state - if (!sm.apply(0)) - { - ++m_Failures; - } - m_States.push_back(sm.state()); +class CTestThread : public core::CThread { +public: + using TCppUnitExceptionP = boost::shared_ptr; + +public: + CTestThread(const TMachineVec& machines) : m_Machines(machines), m_Failures(0) {} + + std::size_t failures() const { return m_Failures; } + + const TSizeVec& states() const { return m_States; } + +private: + virtual void run() { + std::size_t n = 10000; + m_States.reserve(n); + TSizeVec machine; + for (std::size_t i = 0u; i < n; ++i) { + m_Rng.generateUniformSamples(0, m_Machines.size(), 1, machine); + core::CStateMachine sm = core::CStateMachine::create(m_Machines[machine[0]].s_Alphabet, + m_Machines[machine[0]].s_States, + m_Machines[machine[0]].s_TransitionFunction, + 0); // initial state + if (!sm.apply(0)) { + ++m_Failures; } + m_States.push_back(sm.state()); } + } - virtual void shutdown() {} + virtual void shutdown() {} - private: - test::CRandomNumbers m_Rng; - TMachineVec m_Machines; - std::size_t m_Failures; - TSizeVec m_States; +private: + test::CRandomNumbers m_Rng; + TMachineVec m_Machines; + std::size_t m_Failures; + TSizeVec m_States; }; -void randomMachines(std::size_t n, TMachineVec &result) -{ - std::string states[] = - { - "A", "B", "C", "D", "E", "F", "G", "H", "I", "J" - }; - std::string alphabet[] = - { - "0", "1", "2", "3", "4", "5", "6", "7", "8", "9" - }; +void randomMachines(std::size_t n, TMachineVec& result) { + std::string states[] = {"A", "B", "C", "D", "E", "F", "G", "H", "I", "J"}; + std::string alphabet[] = {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"}; test::CRandomNumbers rng; @@ -134,13 +101,11 @@ void randomMachines(std::size_t n, TMachineVec &result) rng.generateUniformSamples(1, boost::size(alphabet), n, na); result.resize(n); - for (std::size_t i = 0u; i < n; ++i) - { + for (std::size_t i = 0u; i < n; ++i) { result[i].s_States.assign(states, states + ns[i]); result[i].s_Alphabet.assign(alphabet, alphabet + na[i]); result[i].s_TransitionFunction.resize(na[i]); - for (std::size_t j = 0u; j < na[i]; ++j) - { + for (std::size_t j = 0u; j < na[i]; ++j) { rng.generateUniformSamples(0, ns[i], ns[i], result[i].s_TransitionFunction[j]); } @@ -148,11 +113,9 @@ void randomMachines(std::size_t n, TMachineVec &result) std::next_permutation(boost::begin(alphabet), boost::end(alphabet)); } } - } -void CStateMachineTest::testBasics() -{ +void CStateMachineTest::testBasics() { // Test errors on create. // Test transitions. @@ -160,34 +123,29 @@ void CStateMachineTest::testBasics() TMachineVec machines; randomMachines(5, machines); - for (std::size_t m = 0u; m < machines.size(); ++m) - { + for (std::size_t m = 0u; m < machines.size(); ++m) { LOG_DEBUG("machine " << m); - for (std::size_t i = 0u; i < machines[m].s_Alphabet.size(); ++i) - { - for (std::size_t j = 0u; j < machines[m].s_States.size(); ++j) - { + for (std::size_t i = 0u; i < machines[m].s_Alphabet.size(); ++i) { + for (std::size_t j = 0u; j < machines[m].s_States.size(); ++j) { core::CStateMachine sm = core::CStateMachine::create(machines[m].s_Alphabet, machines[m].s_States, machines[m].s_TransitionFunction, j); // initial state - const std::string &oldState = machines[m].s_States[j]; + const std::string& oldState = machines[m].s_States[j]; sm.apply(i); - const std::string &newState = machines[m].s_States[sm.state()]; + const std::string& newState = machines[m].s_States[sm.state()]; LOG_DEBUG(" " << oldState << " -> " << newState); - CPPUNIT_ASSERT_EQUAL(machines[m].s_States[machines[m].s_TransitionFunction[i][j]], - sm.printState(sm.state())); + CPPUNIT_ASSERT_EQUAL(machines[m].s_States[machines[m].s_TransitionFunction[i][j]], sm.printState(sm.state())); } } } } -void CStateMachineTest::testPersist() -{ +void CStateMachineTest::testPersist() { // Check persist maintains the checksum and is idempotent. TMachineVec machine; @@ -226,8 +184,7 @@ void CStateMachineTest::testPersist() CPPUNIT_ASSERT_EQUAL(origXml, newXml); } -void CStateMachineTest::testMultithreaded() -{ +void CStateMachineTest::testMultithreaded() { // Check that we create each machine once and we don't get any // errors updating due to stale reads. @@ -244,45 +201,34 @@ void CStateMachineTest::testMultithreaded() using TThreadPtr = boost::shared_ptr; using TThreadVec = std::vector; TThreadVec threads; - for (std::size_t i = 0u; i < 20; ++i) - { + for (std::size_t i = 0u; i < 20; ++i) { threads.push_back(TThreadPtr(new CTestThread(machines))); } - for (std::size_t i = 0u; i < threads.size(); ++i) - { + for (std::size_t i = 0u; i < threads.size(); ++i) { CPPUNIT_ASSERT(threads[i]->start()); } - for (std::size_t i = 0u; i < threads.size(); ++i) - { + for (std::size_t i = 0u; i < threads.size(); ++i) { CPPUNIT_ASSERT(threads[i]->waitForFinish()); } - for (std::size_t i = 0u; i < threads.size(); ++i) - { + for (std::size_t i = 0u; i < threads.size(); ++i) { // No failed reads. CPPUNIT_ASSERT_EQUAL(std::size_t(0), threads[i]->failures()); } - for (std::size_t i = 1u; i < threads.size(); ++i) - { + for (std::size_t i = 1u; i < threads.size(); ++i) { // No wrong reads. - CPPUNIT_ASSERT(threads[i]->states() == threads[i-1]->states()); + CPPUNIT_ASSERT(threads[i]->states() == threads[i - 1]->states()); } // No duplicates. CPPUNIT_ASSERT_EQUAL(machines.size(), core::CStateMachine::numberMachines()); } -CppUnit::Test *CStateMachineTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CStateMachineTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CStateMachineTest::testBasics", - &CStateMachineTest::testBasics) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CStateMachineTest::testPersist", - &CStateMachineTest::testPersist) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CStateMachineTest::testMultithreaded", - &CStateMachineTest::testMultithreaded) ); +CppUnit::Test* CStateMachineTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CStateMachineTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CStateMachineTest::testBasics", &CStateMachineTest::testBasics)); + suiteOfTests->addTest(new CppUnit::TestCaller("CStateMachineTest::testPersist", &CStateMachineTest::testPersist)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CStateMachineTest::testMultithreaded", &CStateMachineTest::testMultithreaded)); return suiteOfTests; } diff --git a/lib/core/unittest/CStateMachineTest.h b/lib/core/unittest/CStateMachineTest.h index a4977f83e4..f317043099 100644 --- a/lib/core/unittest/CStateMachineTest.h +++ b/lib/core/unittest/CStateMachineTest.h @@ -9,14 +9,13 @@ #include -class CStateMachineTest : public CppUnit::TestFixture -{ - public: - void testBasics(); - void testPersist(); - void testMultithreaded(); +class CStateMachineTest : public CppUnit::TestFixture { +public: + void testBasics(); + void testPersist(); + void testMultithreaded(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CStateMachineTest_h diff --git a/lib/core/unittest/CStatisticsTest.cc b/lib/core/unittest/CStatisticsTest.cc index fae7f24e3f..7f22e47213 100644 --- a/lib/core/unittest/CStatisticsTest.cc +++ b/lib/core/unittest/CStatisticsTest.cc @@ -16,41 +16,29 @@ #include - -namespace -{ +namespace { const int TEST_STAT = 0u; -class CStatisticsTestRunner : public ml::core::CThread -{ +class CStatisticsTestRunner : public ml::core::CThread { public: - CStatisticsTestRunner() : m_I(0), m_N(0) - { - } + CStatisticsTestRunner() : m_I(0), m_N(0) {} - void initialise(int i, int n) - { + void initialise(int i, int n) { m_N = n; m_I = i; } private: - virtual void run() - { - if (m_I < 6) - { + virtual void run() { + if (m_I < 6) { ml::core::CStatistics::stat(TEST_STAT + m_I).increment(); - } - else - { + } else { ml::core::CStatistics::stat(TEST_STAT + m_I - m_N).decrement(); } } - virtual void shutdown() - { - } + virtual void shutdown() {} int m_I; int m_N; @@ -58,28 +46,21 @@ class CStatisticsTestRunner : public ml::core::CThread } // namespace -CppUnit::Test *CStatisticsTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CStatisticsTest"); +CppUnit::Test* CStatisticsTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CStatisticsTest"); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CStatisticsTest::testStatistics", - &CStatisticsTest::testStatistics) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CStatisticsTest::testPersist", - &CStatisticsTest::testPersist) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CStatisticsTest::testStatistics", &CStatisticsTest::testStatistics)); + suiteOfTests->addTest(new CppUnit::TestCaller("CStatisticsTest::testPersist", &CStatisticsTest::testPersist)); return suiteOfTests; } -void CStatisticsTest::testStatistics() -{ +void CStatisticsTest::testStatistics() { LOG_TRACE("Starting Statistics test"); - ml::core::CStatistics &stats = ml::core::CStatistics::instance(); + ml::core::CStatistics& stats = ml::core::CStatistics::instance(); static const int N = 6; - for (int i = 0; i < N; i++) - { + for (int i = 0; i < N; i++) { CPPUNIT_ASSERT_EQUAL(uint64_t(0), stats.stat(TEST_STAT + i).value()); } @@ -93,28 +74,23 @@ void CStatisticsTest::testStatistics() CPPUNIT_ASSERT_EQUAL(uint64_t(0), stats.stat(TEST_STAT).value()); CStatisticsTestRunner runners[N * 2]; - for (int i = 0; i < N * 2; i++) - { + for (int i = 0; i < N * 2; i++) { runners[i].initialise(i, N); } - for (int i = 0; i < N * 2; i++) - { + for (int i = 0; i < N * 2; i++) { runners[i].start(); } - for (int i = 0; i < N * 2; i++) - { + for (int i = 0; i < N * 2; i++) { runners[i].waitForFinish(); } - for (int i = 0; i < N; i++) - { + for (int i = 0; i < N; i++) { CPPUNIT_ASSERT_EQUAL(uint64_t(0), stats.stat(TEST_STAT + i).value()); } - for (int i = 0; i < 0x1000000; i++) - { + for (int i = 0; i < 0x1000000; i++) { stats.stat(TEST_STAT).increment(); } CPPUNIT_ASSERT_EQUAL(uint64_t(0x1000000), stats.stat(TEST_STAT).value()); @@ -122,14 +98,12 @@ void CStatisticsTest::testStatistics() LOG_TRACE("Finished Statistics test"); } -void CStatisticsTest::testPersist() -{ +void CStatisticsTest::testPersist() { LOG_DEBUG("Starting persist test"); - ml::core::CStatistics &stats = ml::core::CStatistics::instance(); + ml::core::CStatistics& stats = ml::core::CStatistics::instance(); // Check that a save/restore with all zeros is Ok - for (int i = 0; i < ml::stat_t::E_LastEnumStat; i++) - { + for (int i = 0; i < ml::stat_t::E_LastEnumStat; i++) { stats.stat(i).set(0); } @@ -144,22 +118,18 @@ void CStatisticsTest::testPersist() ml::core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origStaticsXml)); ml::core::CRapidXmlStateRestoreTraverser traverser(parser); - CPPUNIT_ASSERT(traverser.traverseSubLevel( - &ml::core::CStatistics::staticsAcceptRestoreTraverser)); + CPPUNIT_ASSERT(traverser.traverseSubLevel(&ml::core::CStatistics::staticsAcceptRestoreTraverser)); } - for (int i = 0; i < ml::stat_t::E_LastEnumStat; i++) - { + for (int i = 0; i < ml::stat_t::E_LastEnumStat; i++) { CPPUNIT_ASSERT_EQUAL(uint64_t(0), stats.stat(i).value()); } // Set some other values and check that restore puts all to zero - for (int i = 0; i < ml::stat_t::E_LastEnumStat; i++) - { + for (int i = 0; i < ml::stat_t::E_LastEnumStat; i++) { stats.stat(i).set(567 + (i * 3)); } - for (int i = 0; i < ml::stat_t::E_LastEnumStat; i++) - { + for (int i = 0; i < ml::stat_t::E_LastEnumStat; i++) { CPPUNIT_ASSERT_EQUAL(uint64_t(567 + (i * 3)), stats.stat(i).value()); } @@ -176,12 +146,10 @@ void CStatisticsTest::testPersist() ml::core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origStaticsXml)); ml::core::CRapidXmlStateRestoreTraverser traverser(parser); - CPPUNIT_ASSERT(traverser.traverseSubLevel( - &ml::core::CStatistics::staticsAcceptRestoreTraverser)); + CPPUNIT_ASSERT(traverser.traverseSubLevel(&ml::core::CStatistics::staticsAcceptRestoreTraverser)); } - for (int i = 0; i < ml::stat_t::E_LastEnumStat; i++) - { + for (int i = 0; i < ml::stat_t::E_LastEnumStat; i++) { CPPUNIT_ASSERT_EQUAL(uint64_t(0), stats.stat(i).value()); } @@ -190,12 +158,10 @@ void CStatisticsTest::testPersist() ml::core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(newStaticsXml)); ml::core::CRapidXmlStateRestoreTraverser traverser(parser); - CPPUNIT_ASSERT(traverser.traverseSubLevel( - &ml::core::CStatistics::staticsAcceptRestoreTraverser)); + CPPUNIT_ASSERT(traverser.traverseSubLevel(&ml::core::CStatistics::staticsAcceptRestoreTraverser)); } - for (int i = 0; i < ml::stat_t::E_LastEnumStat; i++) - { + for (int i = 0; i < ml::stat_t::E_LastEnumStat; i++) { CPPUNIT_ASSERT_EQUAL(uint64_t(567 + (i * 3)), stats.stat(i).value()); } @@ -204,12 +170,10 @@ void CStatisticsTest::testPersist() ml::core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origStaticsXml)); ml::core::CRapidXmlStateRestoreTraverser traverser(parser); - CPPUNIT_ASSERT(traverser.traverseSubLevel( - &ml::core::CStatistics::staticsAcceptRestoreTraverser)); + CPPUNIT_ASSERT(traverser.traverseSubLevel(&ml::core::CStatistics::staticsAcceptRestoreTraverser)); } - for (int i = 0; i < ml::stat_t::E_LastEnumStat; i++) - { + for (int i = 0; i < ml::stat_t::E_LastEnumStat; i++) { CPPUNIT_ASSERT_EQUAL(uint64_t(0), stats.stat(i).value()); } @@ -222,8 +186,7 @@ void CStatisticsTest::testPersist() regex.init("\n"); regex.split(output, tokens); } - for (ml::core::CRegex::TStrVecCItr i = tokens.begin(); i != (tokens.end() - 1); ++i) - { + for (ml::core::CRegex::TStrVecCItr i = tokens.begin(); i != (tokens.end() - 1); ++i) { ml::core::CRegex regex; // Look for "name":"E.*"value": 0} regex.init(".*\"name\":\"E.*\"value\":0.*"); diff --git a/lib/core/unittest/CStatisticsTest.h b/lib/core/unittest/CStatisticsTest.h index 9b7e4296c5..bbe24c57c7 100644 --- a/lib/core/unittest/CStatisticsTest.h +++ b/lib/core/unittest/CStatisticsTest.h @@ -8,16 +8,14 @@ #include -class CStatisticsTest : public CppUnit::TestFixture -{ +class CStatisticsTest : public CppUnit::TestFixture { public: void testStatistics(); void testPersist(); void threadRunner(int i); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CStatisticsTest_h - diff --git a/lib/core/unittest/CStopWatchTest.cc b/lib/core/unittest/CStopWatchTest.cc index 5fabfadd8e..72724f9ff2 100644 --- a/lib/core/unittest/CStopWatchTest.cc +++ b/lib/core/unittest/CStopWatchTest.cc @@ -11,20 +11,15 @@ #include +CppUnit::Test* CStopWatchTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CStopWatchTest"); -CppUnit::Test *CStopWatchTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CStopWatchTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CStopWatchTest::testStopWatch", - &CStopWatchTest::testStopWatch) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CStopWatchTest::testStopWatch", &CStopWatchTest::testStopWatch)); return suiteOfTests; } -void CStopWatchTest::testStopWatch() -{ +void CStopWatchTest::testStopWatch() { ml::core::CStopWatch stopWatch; LOG_DEBUG("About to start stop watch test"); @@ -35,8 +30,7 @@ void CStopWatchTest::testStopWatch() uint64_t elapsed(stopWatch.lap()); - LOG_DEBUG("After a 5.5 second wait, the stop watch reads " << - elapsed << " milliseconds"); + LOG_DEBUG("After a 5.5 second wait, the stop watch reads " << elapsed << " milliseconds"); // Elapsed time should be between 5.4 and 5.6 seconds CPPUNIT_ASSERT(elapsed >= 5400); @@ -46,8 +40,7 @@ void CStopWatchTest::testStopWatch() elapsed = stopWatch.stop(); - LOG_DEBUG("After a further 3.5 second wait, the stop watch reads " << - elapsed << " milliseconds"); + LOG_DEBUG("After a further 3.5 second wait, the stop watch reads " << elapsed << " milliseconds"); // Elapsed time should be between 8.9 and 9.1 seconds CPPUNIT_ASSERT(elapsed >= 8900); @@ -64,10 +57,10 @@ void CStopWatchTest::testStopWatch() LOG_DEBUG("After a further 2 second wait with the stop watch stopped, " "followed by a 0.5 second wait with the stop watch running, it " - "reads " << elapsed << " milliseconds"); + "reads " + << elapsed << " milliseconds"); // Elapsed time should be between 9.4 and 9.6 seconds CPPUNIT_ASSERT(elapsed >= 9400); CPPUNIT_ASSERT(elapsed <= 9600); } - diff --git a/lib/core/unittest/CStopWatchTest.h b/lib/core/unittest/CStopWatchTest.h index 54a8775f0a..211cf3efff 100644 --- a/lib/core/unittest/CStopWatchTest.h +++ b/lib/core/unittest/CStopWatchTest.h @@ -8,14 +8,11 @@ #include +class CStopWatchTest : public CppUnit::TestFixture { +public: + void testStopWatch(); -class CStopWatchTest : public CppUnit::TestFixture -{ - public: - void testStopWatch(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CStopWatchTest_h - diff --git a/lib/core/unittest/CStoredStringPtrTest.cc b/lib/core/unittest/CStoredStringPtrTest.cc index 6c977e58f6..cc4425f2f8 100644 --- a/lib/core/unittest/CStoredStringPtrTest.cc +++ b/lib/core/unittest/CStoredStringPtrTest.cc @@ -13,42 +13,31 @@ #include #include +CppUnit::Test* CStoredStringPtrTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CStoredStringPtrTest"); -CppUnit::Test *CStoredStringPtrTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CStoredStringPtrTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CStoredStringPtrTest::testPointerSemantics", - &CStoredStringPtrTest::testPointerSemantics) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CStoredStringPtrTest::testMemoryUsage", - &CStoredStringPtrTest::testMemoryUsage) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CStoredStringPtrTest::testHash", - &CStoredStringPtrTest::testHash) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CStoredStringPtrTest::testPointerSemantics", + &CStoredStringPtrTest::testPointerSemantics)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CStoredStringPtrTest::testMemoryUsage", &CStoredStringPtrTest::testMemoryUsage)); + suiteOfTests->addTest(new CppUnit::TestCaller("CStoredStringPtrTest::testHash", &CStoredStringPtrTest::testHash)); return suiteOfTests; } -void CStoredStringPtrTest::testPointerSemantics() -{ +void CStoredStringPtrTest::testPointerSemantics() { { ml::core::CStoredStringPtr null; - if (null) - { + if (null) { CPPUNIT_FAIL("Should not return true in boolean context"); } - if (!null) - { + if (!null) { CPPUNIT_ASSERT(null != ml::core::CStoredStringPtr::makeStoredString("not null")); CPPUNIT_ASSERT(null == nullptr); CPPUNIT_ASSERT(null.get() == nullptr); - } - else - { + } else { CPPUNIT_FAIL("Should not return false in negated boolean context"); } } @@ -57,23 +46,17 @@ void CStoredStringPtrTest::testPointerSemantics() ml::core::CStoredStringPtr ptr1 = ml::core::CStoredStringPtr::makeStoredString(str1); - if (ptr1) - { + if (ptr1) { CPPUNIT_ASSERT(ptr1 == ptr1); CPPUNIT_ASSERT(ptr1 != nullptr); CPPUNIT_ASSERT(ptr1.get() != nullptr); - } - else - { + } else { CPPUNIT_FAIL("Should not return false in boolean context"); } - if (!ptr1) - { + if (!ptr1) { CPPUNIT_FAIL("Should not return true in negated boolean context"); - } - else - { + } else { CPPUNIT_ASSERT_EQUAL(0, ptr1->compare(str1)); } } @@ -84,23 +67,17 @@ void CStoredStringPtrTest::testPointerSemantics() ml::core::CStoredStringPtr ptr2 = ml::core::CStoredStringPtr::makeStoredString(std::move(str2)); - if (ptr2) - { + if (ptr2) { CPPUNIT_ASSERT(ptr2 == ptr2); CPPUNIT_ASSERT(ptr2 != nullptr); CPPUNIT_ASSERT(ptr2.get() != nullptr); - } - else - { + } else { CPPUNIT_FAIL("Should not return false in boolean context"); } - if (!ptr2) - { + if (!ptr2) { CPPUNIT_FAIL("Should not return true in negated boolean context"); - } - else - { + } else { // str2 should no longer contain its original value, as it should // have been moved to the stored string CPPUNIT_ASSERT(ptr2->compare(str2) != 0); @@ -108,8 +85,7 @@ void CStoredStringPtrTest::testPointerSemantics() } } -void CStoredStringPtrTest::testMemoryUsage() -{ +void CStoredStringPtrTest::testMemoryUsage() { { ml::core::CStoredStringPtr null; @@ -134,8 +110,7 @@ void CStoredStringPtrTest::testMemoryUsage() } } -void CStoredStringPtrTest::testHash() -{ +void CStoredStringPtrTest::testHash() { using TStoredStringPtrUSet = boost::unordered_set; ml::core::CStoredStringPtr key = ml::core::CStoredStringPtr::makeStoredString("key"); @@ -145,4 +120,3 @@ void CStoredStringPtrTest::testHash() CPPUNIT_ASSERT_EQUAL(std::size_t(1), s.count(key)); } - diff --git a/lib/core/unittest/CStoredStringPtrTest.h b/lib/core/unittest/CStoredStringPtrTest.h index 95e5b0947c..3d6aa30b7a 100644 --- a/lib/core/unittest/CStoredStringPtrTest.h +++ b/lib/core/unittest/CStoredStringPtrTest.h @@ -8,15 +8,13 @@ #include -class CStoredStringPtrTest : public CppUnit::TestFixture -{ - public: - void testPointerSemantics(); - void testMemoryUsage(); - void testHash(); +class CStoredStringPtrTest : public CppUnit::TestFixture { +public: + void testPointerSemantics(); + void testMemoryUsage(); + void testHash(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CStoredStringPtrTest_h - diff --git a/lib/core/unittest/CStringSimilarityTesterTest.cc b/lib/core/unittest/CStringSimilarityTesterTest.cc index f53827e0cc..d52b9850df 100644 --- a/lib/core/unittest/CStringSimilarityTesterTest.cc +++ b/lib/core/unittest/CStringSimilarityTesterTest.cc @@ -16,38 +16,31 @@ #include #include - -CppUnit::Test *CStringSimilarityTesterTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CStringSimilarityTesterTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CStringSimilarityTesterTest::testStringSimilarity", - &CStringSimilarityTesterTest::testStringSimilarity) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CStringSimilarityTesterTest::testLevensteinDistance", - &CStringSimilarityTesterTest::testLevensteinDistance) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CStringSimilarityTesterTest::testLevensteinDistance2", - &CStringSimilarityTesterTest::testLevensteinDistance2) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CStringSimilarityTesterTest::testLevensteinDistanceThroughputDifferent", - &CStringSimilarityTesterTest::testLevensteinDistanceThroughputDifferent) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CStringSimilarityTesterTest::testLevensteinDistanceThroughputSimilar", - &CStringSimilarityTesterTest::testLevensteinDistanceThroughputSimilar) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CStringSimilarityTesterTest::testLevensteinDistanceAlgorithmEquivalence", - &CStringSimilarityTesterTest::testLevensteinDistanceAlgorithmEquivalence) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CStringSimilarityTesterTest::testWeightedEditDistance", - &CStringSimilarityTesterTest::testWeightedEditDistance) ); +CppUnit::Test* CStringSimilarityTesterTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CStringSimilarityTesterTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CStringSimilarityTesterTest::testStringSimilarity", + &CStringSimilarityTesterTest::testStringSimilarity)); + suiteOfTests->addTest(new CppUnit::TestCaller("CStringSimilarityTesterTest::testLevensteinDistance", + &CStringSimilarityTesterTest::testLevensteinDistance)); + suiteOfTests->addTest(new CppUnit::TestCaller("CStringSimilarityTesterTest::testLevensteinDistance2", + &CStringSimilarityTesterTest::testLevensteinDistance2)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CStringSimilarityTesterTest::testLevensteinDistanceThroughputDifferent", + &CStringSimilarityTesterTest::testLevensteinDistanceThroughputDifferent)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CStringSimilarityTesterTest::testLevensteinDistanceThroughputSimilar", + &CStringSimilarityTesterTest::testLevensteinDistanceThroughputSimilar)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CStringSimilarityTesterTest::testLevensteinDistanceAlgorithmEquivalence", + &CStringSimilarityTesterTest::testLevensteinDistanceAlgorithmEquivalence)); + suiteOfTests->addTest(new CppUnit::TestCaller("CStringSimilarityTesterTest::testWeightedEditDistance", + &CStringSimilarityTesterTest::testWeightedEditDistance)); return suiteOfTests; } -void CStringSimilarityTesterTest::testStringSimilarity() -{ +void CStringSimilarityTesterTest::testStringSimilarity() { std::string str1("This is identical"); std::string str2("This is identical"); @@ -135,8 +128,7 @@ void CStringSimilarityTesterTest::testStringSimilarity() CPPUNIT_ASSERT_EQUAL(similarity3, similarity9); } -void CStringSimilarityTesterTest::testLevensteinDistance() -{ +void CStringSimilarityTesterTest::testLevensteinDistance() { ml::core::CStringSimilarityTester sst; std::string cat("cat"); @@ -180,8 +172,7 @@ void CStringSimilarityTesterTest::testLevensteinDistance() CPPUNIT_ASSERT_EQUAL(size_t(7), sst.levenshteinDistanceEx(str1, str4, &::isdigit)); } -void CStringSimilarityTesterTest::testLevensteinDistance2() -{ +void CStringSimilarityTesterTest::testLevensteinDistance2() { ml::core::CStringSimilarityTester sst; using TStrVec = std::vector; @@ -257,107 +248,86 @@ void CStringSimilarityTesterTest::testLevensteinDistance2() CPPUNIT_ASSERT_EQUAL(size_t(9), sst.levenshteinDistance(empty, serviceStart)); } -void CStringSimilarityTesterTest::testLevensteinDistanceThroughputDifferent() -{ +void CStringSimilarityTesterTest::testLevensteinDistanceThroughputDifferent() { ml::core::CStringSimilarityTester sst; using TStrVec = std::vector; static const size_t TEST_SIZE(700); - static const int MAX_LEN(40); + static const int MAX_LEN(40); TStrVec input(TEST_SIZE); - for (size_t index = 0; index < TEST_SIZE; ++index) - { + for (size_t index = 0; index < TEST_SIZE; ++index) { // Construct the strings from a random number of random lower case // letters - empty strings are possible - for (int len = (::rand() % MAX_LEN); len > 0; --len) - { + for (int len = (::rand() % MAX_LEN); len > 0; --len) { input[index] += char('a' + (::rand() % 26)); } } ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO("Starting Levenstein distance throughput test for low commonality strings at " << - ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO("Starting Levenstein distance throughput test for low commonality strings at " << ml::core::CTimeUtils::toTimeString(start)); - for (size_t i = 0; i < TEST_SIZE; ++i) - { - for (size_t j = 0; j < TEST_SIZE; ++j) - { + for (size_t i = 0; i < TEST_SIZE; ++i) { + for (size_t j = 0; j < TEST_SIZE; ++j) { size_t result(sst.levenshteinDistance(input[i], input[j])); - if (i == j) - { + if (i == j) { CPPUNIT_ASSERT_EQUAL(size_t(0), result); } } } ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO("Finished Levenstein distance throughput test for low commonality strings at " << - ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO("Finished Levenstein distance throughput test for low commonality strings at " << ml::core::CTimeUtils::toTimeString(end)); - LOG_INFO("Levenstein distance throughput test for low commonality strings with size " << - TEST_SIZE << " and maximum string length " << MAX_LEN << - " took " << (end - start) << " seconds"); + LOG_INFO("Levenstein distance throughput test for low commonality strings with size " + << TEST_SIZE << " and maximum string length " << MAX_LEN << " took " << (end - start) << " seconds"); } -void CStringSimilarityTesterTest::testLevensteinDistanceThroughputSimilar() -{ +void CStringSimilarityTesterTest::testLevensteinDistanceThroughputSimilar() { ml::core::CStringSimilarityTester sst; using TStrVec = std::vector; static const size_t TEST_SIZE(700); - static const int EXTRA_CHARS(4); + static const int EXTRA_CHARS(4); TStrVec input(TEST_SIZE); - for (size_t index = 0; index < TEST_SIZE; ++index) - { + for (size_t index = 0; index < TEST_SIZE; ++index) { // Construct the strings with a large amount of commonality - for (int count = 0; count < EXTRA_CHARS; ++count) - { - if (index % 2 == 0) - { + for (int count = 0; count < EXTRA_CHARS; ++count) { + if (index % 2 == 0) { input[index] += "common"; } input[index] += char('a' + (::rand() % 26)); - if (index % 2 != 0) - { + if (index % 2 != 0) { input[index] += "common"; } } } ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO("Starting Levenstein distance throughput test for similar strings at " << - ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO("Starting Levenstein distance throughput test for similar strings at " << ml::core::CTimeUtils::toTimeString(start)); - for (size_t i = 0; i < TEST_SIZE; ++i) - { - for (size_t j = 0; j < TEST_SIZE; ++j) - { + for (size_t i = 0; i < TEST_SIZE; ++i) { + for (size_t j = 0; j < TEST_SIZE; ++j) { size_t result(sst.levenshteinDistance(input[i], input[j])); - if (i == j) - { + if (i == j) { CPPUNIT_ASSERT_EQUAL(size_t(0), result); } } } ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO("Finished Levenstein distance throughput test for similar strings at " << - ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO("Finished Levenstein distance throughput test for similar strings at " << ml::core::CTimeUtils::toTimeString(end)); - LOG_INFO("Levenstein distance throughput test for similar strings with size " << - TEST_SIZE << " and " << EXTRA_CHARS << " extra characters took " << - (end - start) << " seconds"); + LOG_INFO("Levenstein distance throughput test for similar strings with size " + << TEST_SIZE << " and " << EXTRA_CHARS << " extra characters took " << (end - start) << " seconds"); } -void CStringSimilarityTesterTest::testLevensteinDistanceAlgorithmEquivalence() -{ +void CStringSimilarityTesterTest::testLevensteinDistanceAlgorithmEquivalence() { // The intention here is to ensure that the Berghel-Roach algorithm delivers // the same results as the simple algorithm. We take advantage of // friendship to call the private implementation methods directly. @@ -372,20 +342,14 @@ void CStringSimilarityTesterTest::testLevensteinDistanceAlgorithmEquivalence() // Remember we're calling private implementation methods here that require: // 1) Neither input sequence is empty // 2) The first input sequence is no longer than the second input sequence - CPPUNIT_ASSERT_EQUAL(sst.levenshteinDistanceSimple(cat, cat), - sst.berghelRoachEditDistance(cat, cat)); - CPPUNIT_ASSERT_EQUAL(sst.levenshteinDistanceSimple(cat, dog), - sst.berghelRoachEditDistance(cat, dog)); - CPPUNIT_ASSERT_EQUAL(sst.levenshteinDistanceSimple(cat, mouse), - sst.berghelRoachEditDistance(cat, mouse)); - CPPUNIT_ASSERT_EQUAL(sst.levenshteinDistanceSimple(cat, elephant), - sst.berghelRoachEditDistance(cat, elephant)); - CPPUNIT_ASSERT_EQUAL(sst.levenshteinDistanceSimple(mouse, elephant), - sst.berghelRoachEditDistance(mouse, elephant)); + CPPUNIT_ASSERT_EQUAL(sst.levenshteinDistanceSimple(cat, cat), sst.berghelRoachEditDistance(cat, cat)); + CPPUNIT_ASSERT_EQUAL(sst.levenshteinDistanceSimple(cat, dog), sst.berghelRoachEditDistance(cat, dog)); + CPPUNIT_ASSERT_EQUAL(sst.levenshteinDistanceSimple(cat, mouse), sst.berghelRoachEditDistance(cat, mouse)); + CPPUNIT_ASSERT_EQUAL(sst.levenshteinDistanceSimple(cat, elephant), sst.berghelRoachEditDistance(cat, elephant)); + CPPUNIT_ASSERT_EQUAL(sst.levenshteinDistanceSimple(mouse, elephant), sst.berghelRoachEditDistance(mouse, elephant)); } -void CStringSimilarityTesterTest::testWeightedEditDistance() -{ +void CStringSimilarityTesterTest::testWeightedEditDistance() { ml::core::CStringSimilarityTester sst; using TStrSizePr = std::pair; @@ -463,4 +427,3 @@ void CStringSimilarityTesterTest::testWeightedEditDistance() CPPUNIT_ASSERT_EQUAL(size_t(21), sst.weightedEditDistance(serviceStart, empty)); CPPUNIT_ASSERT_EQUAL(size_t(21), sst.weightedEditDistance(empty, serviceStart)); } - diff --git a/lib/core/unittest/CStringSimilarityTesterTest.h b/lib/core/unittest/CStringSimilarityTesterTest.h index ab1d002d78..00d43e2153 100644 --- a/lib/core/unittest/CStringSimilarityTesterTest.h +++ b/lib/core/unittest/CStringSimilarityTesterTest.h @@ -8,19 +8,17 @@ #include -class CStringSimilarityTesterTest : public CppUnit::TestFixture -{ - public: - void testStringSimilarity(); - void testLevensteinDistance(); - void testLevensteinDistance2(); - void testLevensteinDistanceThroughputDifferent(); - void testLevensteinDistanceThroughputSimilar(); - void testLevensteinDistanceAlgorithmEquivalence(); - void testWeightedEditDistance(); +class CStringSimilarityTesterTest : public CppUnit::TestFixture { +public: + void testStringSimilarity(); + void testLevensteinDistance(); + void testLevensteinDistance2(); + void testLevensteinDistanceThroughputDifferent(); + void testLevensteinDistanceThroughputSimilar(); + void testLevensteinDistanceAlgorithmEquivalence(); + void testWeightedEditDistance(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CStringSimilarityTesterTest_h - diff --git a/lib/core/unittest/CStringUtilsTest.cc b/lib/core/unittest/CStringUtilsTest.cc index 5b83445ab6..efe7fa62ad 100644 --- a/lib/core/unittest/CStringUtilsTest.cc +++ b/lib/core/unittest/CStringUtilsTest.cc @@ -7,8 +7,8 @@ #include #include -#include #include +#include #include @@ -19,80 +19,47 @@ #include #include - -CppUnit::Test *CStringUtilsTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CStringUtilsTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CStringUtilsTest::testNumMatches", - &CStringUtilsTest::testNumMatches) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CStringUtilsTest::testReplace", - &CStringUtilsTest::testReplace) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CStringUtilsTest::testReplaceFirst", - &CStringUtilsTest::testReplaceFirst) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CStringUtilsTest::testTypeToString", - &CStringUtilsTest::testTypeToString) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CStringUtilsTest::testTypeToStringPrecise", - &CStringUtilsTest::testTypeToStringPrecise) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CStringUtilsTest::testTypeToStringPretty", - &CStringUtilsTest::testTypeToStringPretty) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CStringUtilsTest::testStringToType", - &CStringUtilsTest::testStringToType) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CStringUtilsTest::testTokeniser", - &CStringUtilsTest::testTokeniser) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CStringUtilsTest::testTrim", - &CStringUtilsTest::testTrim) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CStringUtilsTest::testJoin", - &CStringUtilsTest::testJoin) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CStringUtilsTest::testLower", - &CStringUtilsTest::testLower) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CStringUtilsTest::testUpper", - &CStringUtilsTest::testUpper) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CStringUtilsTest::testNarrowWiden", - &CStringUtilsTest::testNarrowWiden) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CStringUtilsTest::testEscape", - &CStringUtilsTest::testEscape) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CStringUtilsTest::testUnEscape", - &CStringUtilsTest::testUnEscape) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CStringUtilsTest::testLongestSubstr", - &CStringUtilsTest::testLongestSubstr) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CStringUtilsTest::testLongestSubseq", - &CStringUtilsTest::testLongestSubseq) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CStringUtilsTest::testNormaliseWhitespace", - &CStringUtilsTest::testNormaliseWhitespace) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CStringUtilsTest::testPerformance", - &CStringUtilsTest::testPerformance) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CStringUtilsTest::testUtf8ByteType", - &CStringUtilsTest::testUtf8ByteType) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CStringUtilsTest::testRoundtripMaxDouble", - &CStringUtilsTest::testRoundtripMaxDouble) ); +CppUnit::Test* CStringUtilsTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CStringUtilsTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CStringUtilsTest::testNumMatches", &CStringUtilsTest::testNumMatches)); + suiteOfTests->addTest(new CppUnit::TestCaller("CStringUtilsTest::testReplace", &CStringUtilsTest::testReplace)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CStringUtilsTest::testReplaceFirst", &CStringUtilsTest::testReplaceFirst)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CStringUtilsTest::testTypeToString", &CStringUtilsTest::testTypeToString)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CStringUtilsTest::testTypeToStringPrecise", &CStringUtilsTest::testTypeToStringPrecise)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CStringUtilsTest::testTypeToStringPretty", &CStringUtilsTest::testTypeToStringPretty)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CStringUtilsTest::testStringToType", &CStringUtilsTest::testStringToType)); + suiteOfTests->addTest(new CppUnit::TestCaller("CStringUtilsTest::testTokeniser", &CStringUtilsTest::testTokeniser)); + suiteOfTests->addTest(new CppUnit::TestCaller("CStringUtilsTest::testTrim", &CStringUtilsTest::testTrim)); + suiteOfTests->addTest(new CppUnit::TestCaller("CStringUtilsTest::testJoin", &CStringUtilsTest::testJoin)); + suiteOfTests->addTest(new CppUnit::TestCaller("CStringUtilsTest::testLower", &CStringUtilsTest::testLower)); + suiteOfTests->addTest(new CppUnit::TestCaller("CStringUtilsTest::testUpper", &CStringUtilsTest::testUpper)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CStringUtilsTest::testNarrowWiden", &CStringUtilsTest::testNarrowWiden)); + suiteOfTests->addTest(new CppUnit::TestCaller("CStringUtilsTest::testEscape", &CStringUtilsTest::testEscape)); + suiteOfTests->addTest(new CppUnit::TestCaller("CStringUtilsTest::testUnEscape", &CStringUtilsTest::testUnEscape)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CStringUtilsTest::testLongestSubstr", &CStringUtilsTest::testLongestSubstr)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CStringUtilsTest::testLongestSubseq", &CStringUtilsTest::testLongestSubseq)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CStringUtilsTest::testNormaliseWhitespace", &CStringUtilsTest::testNormaliseWhitespace)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CStringUtilsTest::testPerformance", &CStringUtilsTest::testPerformance)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CStringUtilsTest::testUtf8ByteType", &CStringUtilsTest::testUtf8ByteType)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CStringUtilsTest::testRoundtripMaxDouble", &CStringUtilsTest::testRoundtripMaxDouble)); return suiteOfTests; } -void CStringUtilsTest::testNumMatches() -{ +void CStringUtilsTest::testNumMatches() { { std::string str("%d %M %Y %f %D %t"); @@ -101,8 +68,7 @@ void CStringUtilsTest::testNumMatches() } } -void CStringUtilsTest::testReplace() -{ +void CStringUtilsTest::testReplace() { { std::string in("%d%M%Y%f%D%t"); const std::string out(" %d %M %Y %f %D %t"); @@ -121,8 +87,7 @@ void CStringUtilsTest::testReplace() } } -void CStringUtilsTest::testReplaceFirst() -{ +void CStringUtilsTest::testReplaceFirst() { { std::string in("%d%M%Y%f%D%t"); const std::string out(" %d%M%Y%f%D%t"); @@ -141,61 +106,60 @@ void CStringUtilsTest::testReplaceFirst() } } -void CStringUtilsTest::testTypeToString() -{ +void CStringUtilsTest::testTypeToString() { { - uint64_t i(18446744073709551615ULL); + uint64_t i(18446744073709551615ULL); std::string expected("18446744073709551615"); std::string actual = ml::core::CStringUtils::typeToString(i); CPPUNIT_ASSERT_EQUAL(expected, actual); - uint64_t j(0); + uint64_t j(0); CPPUNIT_ASSERT(ml::core::CStringUtils::stringToType(actual, j)); CPPUNIT_ASSERT_EQUAL(i, j); } { - uint32_t i(123456U); + uint32_t i(123456U); std::string expected("123456"); std::string actual = ml::core::CStringUtils::typeToString(i); CPPUNIT_ASSERT_EQUAL(expected, actual); - uint32_t j(0); + uint32_t j(0); CPPUNIT_ASSERT(ml::core::CStringUtils::stringToType(actual, j)); CPPUNIT_ASSERT_EQUAL(i, j); } { - uint16_t i(12345U); + uint16_t i(12345U); std::string expected("12345"); std::string actual = ml::core::CStringUtils::typeToString(i); CPPUNIT_ASSERT_EQUAL(expected, actual); - uint16_t j(0); + uint16_t j(0); CPPUNIT_ASSERT(ml::core::CStringUtils::stringToType(actual, j)); CPPUNIT_ASSERT_EQUAL(i, j); } { - int32_t i(123456); + int32_t i(123456); std::string expected("123456"); std::string actual = ml::core::CStringUtils::typeToString(i); CPPUNIT_ASSERT_EQUAL(expected, actual); - int32_t j(0); + int32_t j(0); CPPUNIT_ASSERT(ml::core::CStringUtils::stringToType(actual, j)); CPPUNIT_ASSERT_EQUAL(i, j); } { - double i(0.123456); + double i(0.123456); std::string expected("0.123456"); std::string actual = ml::core::CStringUtils::typeToString(i); CPPUNIT_ASSERT_EQUAL(expected, actual); } { - double i(0.123456e10); + double i(0.123456e10); std::string expected("1234560000.000000"); std::string actual = ml::core::CStringUtils::typeToString(i); @@ -203,101 +167,100 @@ void CStringUtilsTest::testTypeToString() } } -void CStringUtilsTest::testTypeToStringPrecise() -{ +void CStringUtilsTest::testTypeToStringPrecise() { { - double i(1.0); + double i(1.0); std::string expected("1"); std::string actual = ml::core::CStringUtils::typeToStringPrecise(i, ml::core::CIEEE754::E_SinglePrecision); CPPUNIT_ASSERT_EQUAL(expected, actual); } { - double i(1.0); + double i(1.0); std::string expected("1"); std::string actual = ml::core::CStringUtils::typeToStringPrecise(i, ml::core::CIEEE754::E_DoublePrecision); CPPUNIT_ASSERT_EQUAL(expected, actual); } { - double i(0.123456); + double i(0.123456); std::string expected("1.23456e-1"); std::string actual = ml::core::CStringUtils::typeToStringPrecise(i, ml::core::CIEEE754::E_SinglePrecision); CPPUNIT_ASSERT_EQUAL(expected, actual); } { - double i(0.123456); + double i(0.123456); std::string expected("1.23456e-1"); std::string actual = ml::core::CStringUtils::typeToStringPrecise(i, ml::core::CIEEE754::E_DoublePrecision); CPPUNIT_ASSERT_EQUAL(expected, actual); } { - double i(0.123456e10); + double i(0.123456e10); std::string expected("1.23456e9"); std::string actual = ml::core::CStringUtils::typeToStringPrecise(i, ml::core::CIEEE754::E_SinglePrecision); CPPUNIT_ASSERT_EQUAL(expected, actual); } { - double i(0.123456e10); + double i(0.123456e10); std::string expected("1234560000"); std::string actual = ml::core::CStringUtils::typeToStringPrecise(i, ml::core::CIEEE754::E_DoublePrecision); CPPUNIT_ASSERT_EQUAL(expected, actual); } { - double i(0.123456e-10); + double i(0.123456e-10); std::string expected("1.23456e-11"); std::string actual = ml::core::CStringUtils::typeToStringPrecise(i, ml::core::CIEEE754::E_SinglePrecision); CPPUNIT_ASSERT_EQUAL(expected, actual); } { - double i(0.123456e-10); + double i(0.123456e-10); std::string expected("1.23456e-11"); std::string actual = ml::core::CStringUtils::typeToStringPrecise(i, ml::core::CIEEE754::E_DoublePrecision); CPPUNIT_ASSERT_EQUAL(expected, actual); } { - double i(0.123456787654321e-10); + double i(0.123456787654321e-10); std::string expected("1.234568e-11"); std::string actual = ml::core::CStringUtils::typeToStringPrecise(i, ml::core::CIEEE754::E_SinglePrecision); CPPUNIT_ASSERT_EQUAL(expected, actual); } { - double i(0.123456787654321e-10); + double i(0.123456787654321e-10); std::string expected("1.23456787654321e-11"); std::string actual = ml::core::CStringUtils::typeToStringPrecise(i, ml::core::CIEEE754::E_DoublePrecision); CPPUNIT_ASSERT_EQUAL(expected, actual); } { - double i(0.00000000012345678765432123456); + double i(0.00000000012345678765432123456); std::string expected("1.234568e-10"); std::string actual = ml::core::CStringUtils::typeToStringPrecise(i, ml::core::CIEEE754::E_SinglePrecision); CPPUNIT_ASSERT_EQUAL(expected, actual); } { - double i(0.00000000012345678765432123456); + double i(0.00000000012345678765432123456); std::string expected("1.23456787654321e-10"); std::string actual = ml::core::CStringUtils::typeToStringPrecise(i, ml::core::CIEEE754::E_DoublePrecision); CPPUNIT_ASSERT_EQUAL(expected, actual); } { - double i(123456787654321.23456); + double i(123456787654321.23456); std::string expected("1.234568e14"); std::string actual = ml::core::CStringUtils::typeToStringPrecise(i, ml::core::CIEEE754::E_SinglePrecision); CPPUNIT_ASSERT_EQUAL(expected, actual); } { - double i(123456787654321.23456); + double i(123456787654321.23456); std::string expected("123456787654321"); std::string actual = ml::core::CStringUtils::typeToStringPrecise(i, ml::core::CIEEE754::E_DoublePrecision); @@ -305,8 +268,7 @@ void CStringUtilsTest::testTypeToStringPrecise() } } -void CStringUtilsTest::testTypeToStringPretty() -{ +void CStringUtilsTest::testTypeToStringPretty() { // This doesn't assert because the format differs between operating systems LOG_DEBUG("1.0 -> " << ml::core::CStringUtils::typeToStringPretty(1.0)); LOG_DEBUG("0.123456 -> " << ml::core::CStringUtils::typeToStringPretty(0.123456)); @@ -317,8 +279,7 @@ void CStringUtilsTest::testTypeToStringPretty() LOG_DEBUG("123456787654321.23456 -> " << ml::core::CStringUtils::typeToStringPretty(123456787654321.23456)); } -void CStringUtilsTest::testStringToType() -{ +void CStringUtilsTest::testStringToType() { { // All good conversions bool ret; @@ -423,7 +384,7 @@ void CStringUtilsTest::testStringToType() } { // All good conversions - double ret; + double ret; CPPUNIT_ASSERT(ml::core::CStringUtils::stringToType("50.256", ret)); CPPUNIT_ASSERT_EQUAL(50.256, ret); CPPUNIT_ASSERT(ml::core::CStringUtils::stringToType("-50.256", ret)); @@ -433,7 +394,7 @@ void CStringUtilsTest::testStringToType() } { // All bad conversions - bool ret; + bool ret; CPPUNIT_ASSERT(!ml::core::CStringUtils::stringToType("tr", ret)); CPPUNIT_ASSERT(!ml::core::CStringUtils::stringToType("fa", ret)); CPPUNIT_ASSERT(!ml::core::CStringUtils::stringToType("s1235sd", ret)); @@ -481,8 +442,7 @@ void CStringUtilsTest::testStringToType() } } -void CStringUtilsTest::testTokeniser() -{ +void CStringUtilsTest::testTokeniser() { std::string str = "sadcasd csac asdcasdc asdc asdc sadc sadc asd csdc ewwef f sdf sd f sdf sdfsadfasdf\n" "adscasdcadsc\n" "asdfcasdcadsdsasdcsadcadsc\n" @@ -498,34 +458,30 @@ void CStringUtilsTest::testTokeniser() this->testTokeniser("f", str); } -void CStringUtilsTest::testTokeniser(const std::string &delim, const std::string &str) -{ +void CStringUtilsTest::testTokeniser(const std::string& delim, const std::string& str) { // Tokenise using ml ml::core::CStringUtils::TStrVec tokens; - std::string remainder; + std::string remainder; ml::core::CStringUtils::tokenise(delim, str, tokens, remainder); LOG_DEBUG(str << " DELIM = '" << delim << "' REMAINDER = '" << remainder << "'"); - for (ml::core::CStringUtils::TStrVecItr itr = tokens.begin(); itr != tokens.end(); ++itr) - { + for (ml::core::CStringUtils::TStrVecItr itr = tokens.begin(); itr != tokens.end(); ++itr) { LOG_DEBUG("'" << *itr << "'"); } // Tokenise using strtok - char *test = ::strdup(str.c_str()); + char* test = ::strdup(str.c_str()); CPPUNIT_ASSERT(test); ml::core::CStringUtils::TStrVec strtokVec; // Note: strtok, uses ANY ONE character in the delimiter string to split on, // so the delimiters for this test have to be one character - char *brk = 0; - for (char *line = ml::core::CStrTokR::strTokR(test, delim.c_str(), &brk); - line != 0; - line = ml::core::CStrTokR::strTokR(0, delim.c_str(), &brk)) - { + char* brk = 0; + for (char* line = ml::core::CStrTokR::strTokR(test, delim.c_str(), &brk); line != 0; + line = ml::core::CStrTokR::strTokR(0, delim.c_str(), &brk)) { strtokVec.push_back(line); LOG_DEBUG("'" << line << "'"); } @@ -533,15 +489,13 @@ void CStringUtilsTest::testTokeniser(const std::string &delim, const std::string free(test); test = 0; - if (remainder.empty() == false) - { + if (remainder.empty() == false) { tokens.push_back(remainder); } std::string::size_type pos = str.rfind(delim); - if (pos != std::string::npos) - { - std::string remainderExpected = str.substr(pos+delim.size()); + if (pos != std::string::npos) { + std::string remainderExpected = str.substr(pos + delim.size()); CPPUNIT_ASSERT_EQUAL(remainderExpected, remainder); } @@ -551,8 +505,7 @@ void CStringUtilsTest::testTokeniser(const std::string &delim, const std::string CPPUNIT_ASSERT(strtokVec == tokens); } -void CStringUtilsTest::testTrim() -{ +void CStringUtilsTest::testTrim() { std::string testStr; testStr = " hello\r\n"; @@ -584,8 +537,7 @@ void CStringUtilsTest::testTrim() CPPUNIT_ASSERT_EQUAL(std::string(""), testStr); } -void CStringUtilsTest::testJoin() -{ +void CStringUtilsTest::testJoin() { LOG_DEBUG("*** testJoin ***") using namespace ml; using namespace core; @@ -628,8 +580,7 @@ void CStringUtilsTest::testJoin() CPPUNIT_ASSERT_EQUAL(std::string("aaa,bbb,ccc"), CStringUtils::join(strSet, std::string(","))); } -void CStringUtilsTest::testLower() -{ +void CStringUtilsTest::testLower() { CPPUNIT_ASSERT_EQUAL(std::string("hello"), ml::core::CStringUtils::toLower("hello")); CPPUNIT_ASSERT_EQUAL(std::string("hello"), ml::core::CStringUtils::toLower("Hello")); CPPUNIT_ASSERT_EQUAL(std::string("hello"), ml::core::CStringUtils::toLower("HELLO")); @@ -639,8 +590,7 @@ void CStringUtilsTest::testLower() CPPUNIT_ASSERT_EQUAL(std::string("_-+hello"), ml::core::CStringUtils::toLower("_-+HELLO")); } -void CStringUtilsTest::testUpper() -{ +void CStringUtilsTest::testUpper() { CPPUNIT_ASSERT_EQUAL(std::string("HELLO"), ml::core::CStringUtils::toUpper("hello")); CPPUNIT_ASSERT_EQUAL(std::string("HELLO"), ml::core::CStringUtils::toUpper("Hello")); CPPUNIT_ASSERT_EQUAL(std::string("HELLO"), ml::core::CStringUtils::toUpper("HELLO")); @@ -650,8 +600,7 @@ void CStringUtilsTest::testUpper() CPPUNIT_ASSERT_EQUAL(std::string("_-+HELLO"), ml::core::CStringUtils::toUpper("_-+HELLO")); } -void CStringUtilsTest::testNarrowWiden() -{ +void CStringUtilsTest::testNarrowWiden() { std::string hello1("Hello"); std::wstring hello2(L"Hello"); @@ -662,8 +611,7 @@ void CStringUtilsTest::testNarrowWiden() CPPUNIT_ASSERT(ml::core::CStringUtils::wideToNarrow(hello2) == hello1); } -void CStringUtilsTest::testEscape() -{ +void CStringUtilsTest::testEscape() { const std::string toEscape("\"'\\"); const std::string escaped1("\\\"quoted\\\""); @@ -679,8 +627,7 @@ void CStringUtilsTest::testEscape() CPPUNIT_ASSERT_EQUAL(escaped2, unEscaped2); } -void CStringUtilsTest::testUnEscape() -{ +void CStringUtilsTest::testUnEscape() { std::string escaped1("\\\"quoted\\\""); const std::string unEscaped1("\"quoted\""); @@ -698,8 +645,7 @@ void CStringUtilsTest::testUnEscape() ml::core::CStringUtils::unEscape('\\', dodgy); } -void CStringUtilsTest::testLongestSubstr() -{ +void CStringUtilsTest::testLongestSubstr() { { std::string str1; std::string str2; @@ -708,8 +654,7 @@ void CStringUtilsTest::testLongestSubstr() CPPUNIT_ASSERT_EQUAL(std::string(""), common); - LOG_DEBUG("Longest common substring of '" << - str1 << "' and '" << str2 << "' is '" << common << "'"); + LOG_DEBUG("Longest common substring of '" << str1 << "' and '" << str2 << "' is '" << common << "'"); } { std::string str1("Hello world"); @@ -719,8 +664,7 @@ void CStringUtilsTest::testLongestSubstr() CPPUNIT_ASSERT_EQUAL(std::string(""), common); - LOG_DEBUG("Longest common substring of '" << - str1 << "' and '" << str2 << "' is '" << common << "'"); + LOG_DEBUG("Longest common substring of '" << str1 << "' and '" << str2 << "' is '" << common << "'"); } { std::string str1("Hello world"); @@ -730,8 +674,7 @@ void CStringUtilsTest::testLongestSubstr() CPPUNIT_ASSERT_EQUAL(std::string("Hello "), common); - LOG_DEBUG("Longest common substring of '" << - str1 << "' and '" << str2 << "' is '" << common << "'"); + LOG_DEBUG("Longest common substring of '" << str1 << "' and '" << str2 << "' is '" << common << "'"); } { std::string str1("Hello world"); @@ -741,8 +684,7 @@ void CStringUtilsTest::testLongestSubstr() CPPUNIT_ASSERT_EQUAL(std::string("ello"), common); - LOG_DEBUG("Longest common substring of '" << - str1 << "' and '" << str2 << "' is '" << common << "'"); + LOG_DEBUG("Longest common substring of '" << str1 << "' and '" << str2 << "' is '" << common << "'"); } { std::string str1("abc"); @@ -752,8 +694,7 @@ void CStringUtilsTest::testLongestSubstr() CPPUNIT_ASSERT_EQUAL(std::string(""), common); - LOG_DEBUG("Longest common substring of '" << - str1 << "' and '" << str2 << "' is '" << common << "'"); + LOG_DEBUG("Longest common substring of '" << str1 << "' and '" << str2 << "' is '" << common << "'"); } { std::string str1("abc xyz defgv hij"); @@ -763,8 +704,7 @@ void CStringUtilsTest::testLongestSubstr() CPPUNIT_ASSERT_EQUAL(std::string(" defg"), common); - LOG_DEBUG("Longest common substring of '" << - str1 << "' and '" << str2 << "' is '" << common << "'"); + LOG_DEBUG("Longest common substring of '" << str1 << "' and '" << str2 << "' is '" << common << "'"); } { std::string str1("Source LOTS on 13080:742 has shut down."); @@ -774,8 +714,7 @@ void CStringUtilsTest::testLongestSubstr() CPPUNIT_ASSERT_EQUAL(std::string(" has shut down."), common); - LOG_DEBUG("Longest common substring of '" << - str1 << "' and '" << str2 << "' is '" << common << "'"); + LOG_DEBUG("Longest common substring of '" << str1 << "' and '" << str2 << "' is '" << common << "'"); } { std::string str1("No filter list defined for ."); @@ -785,13 +724,11 @@ void CStringUtilsTest::testLongestSubstr() CPPUNIT_ASSERT_EQUAL(std::string("No filter list defined for "), common); - LOG_DEBUG("Longest common substring of '" << - str1 << "' and '" << str2 << "' is '" << common << "'"); + LOG_DEBUG("Longest common substring of '" << str1 << "' and '" << str2 << "' is '" << common << "'"); } } -void CStringUtilsTest::testLongestSubseq() -{ +void CStringUtilsTest::testLongestSubseq() { { std::string str1; std::string str2; @@ -800,8 +737,7 @@ void CStringUtilsTest::testLongestSubseq() CPPUNIT_ASSERT_EQUAL(std::string(""), common); - LOG_DEBUG("Longest common subsequence of '" << - str1 << "' and '" << str2 << "' is '" << common << "'"); + LOG_DEBUG("Longest common subsequence of '" << str1 << "' and '" << str2 << "' is '" << common << "'"); } { std::string str1("Hello world"); @@ -811,8 +747,7 @@ void CStringUtilsTest::testLongestSubseq() CPPUNIT_ASSERT_EQUAL(std::string(""), common); - LOG_DEBUG("Longest common subsequence of '" << - str1 << "' and '" << str2 << "' is '" << common << "'"); + LOG_DEBUG("Longest common subsequence of '" << str1 << "' and '" << str2 << "' is '" << common << "'"); } { std::string str1("Hello world"); @@ -822,8 +757,7 @@ void CStringUtilsTest::testLongestSubseq() CPPUNIT_ASSERT_EQUAL(std::string("Hello "), common); - LOG_DEBUG("Longest common subsequence of '" << - str1 << "' and '" << str2 << "' is '" << common << "'"); + LOG_DEBUG("Longest common subsequence of '" << str1 << "' and '" << str2 << "' is '" << common << "'"); } { std::string str1("Hello world"); @@ -833,8 +767,7 @@ void CStringUtilsTest::testLongestSubseq() CPPUNIT_ASSERT_EQUAL(std::string("ello"), common); - LOG_DEBUG("Longest common subsequence of '" << - str1 << "' and '" << str2 << "' is '" << common << "'"); + LOG_DEBUG("Longest common subsequence of '" << str1 << "' and '" << str2 << "' is '" << common << "'"); } { std::string str1("abc"); @@ -844,8 +777,7 @@ void CStringUtilsTest::testLongestSubseq() CPPUNIT_ASSERT_EQUAL(std::string(""), common); - LOG_DEBUG("Longest common subsequence of '" << - str1 << "' and '" << str2 << "' is '" << common << "'"); + LOG_DEBUG("Longest common subsequence of '" << str1 << "' and '" << str2 << "' is '" << common << "'"); } { std::string str1("abc xyz defgv hij"); @@ -855,8 +787,7 @@ void CStringUtilsTest::testLongestSubseq() CPPUNIT_ASSERT_EQUAL(std::string("abc defg hij"), common); - LOG_DEBUG("Longest common subsequence of '" << - str1 << "' and '" << str2 << "' is '" << common << "'"); + LOG_DEBUG("Longest common subsequence of '" << str1 << "' and '" << str2 << "' is '" << common << "'"); } { std::string str1("Source LOTS on 13080:742 has shut down."); @@ -866,8 +797,7 @@ void CStringUtilsTest::testLongestSubseq() CPPUNIT_ASSERT_EQUAL(std::string("Source T on 13080:2 has shut down."), common); - LOG_DEBUG("Longest common subsequence of '" << - str1 << "' and '" << str2 << "' is '" << common << "'"); + LOG_DEBUG("Longest common subsequence of '" << str1 << "' and '" << str2 << "' is '" << common << "'"); } { std::string str1("No filter list defined for ."); @@ -877,13 +807,11 @@ void CStringUtilsTest::testLongestSubseq() CPPUNIT_ASSERT_EQUAL(std::string("No filter list defined for ."), common); - LOG_DEBUG("Longest common subsequence of '" << - str1 << "' and '" << str2 << "' is '" << common << "'"); + LOG_DEBUG("Longest common subsequence of '" << str1 << "' and '" << str2 << "' is '" << common << "'"); } } -void CStringUtilsTest::testNormaliseWhitespace() -{ +void CStringUtilsTest::testNormaliseWhitespace() { std::string spacey(" what\ta lot \tof\n" "spaces"); std::string normalised(" what a lot of spaces"); @@ -891,8 +819,7 @@ void CStringUtilsTest::testNormaliseWhitespace() CPPUNIT_ASSERT_EQUAL(normalised, ml::core::CStringUtils::normaliseWhitespace(spacey)); } -void CStringUtilsTest::testPerformance() -{ +void CStringUtilsTest::testPerformance() { static const size_t TEST_SIZE(1000000); static const double TEST_SIZE_D(static_cast(TEST_SIZE)); @@ -902,8 +829,7 @@ void CStringUtilsTest::testPerformance() LOG_DEBUG("Before CStringUtils::typeToString integer test"); stopWatch.start(); - for (size_t count = 0; count < TEST_SIZE; ++count) - { + for (size_t count = 0; count < TEST_SIZE; ++count) { std::string result(ml::core::CStringUtils::typeToString(count)); ml::core::CStringUtils::stringToType(result, count); } @@ -917,8 +843,7 @@ void CStringUtilsTest::testPerformance() { LOG_DEBUG("Before boost::lexical_cast integer test"); stopWatch.start(); - for (size_t count = 0; count < TEST_SIZE; ++count) - { + for (size_t count = 0; count < TEST_SIZE; ++count) { std::string result(boost::lexical_cast(count)); count = boost::lexical_cast(result); } @@ -933,8 +858,7 @@ void CStringUtilsTest::testPerformance() LOG_DEBUG("Before CStringUtils::typeToString floating point test"); stopWatch.start(); - for (double count = 0.0; count < TEST_SIZE_D; count += 1.41) - { + for (double count = 0.0; count < TEST_SIZE_D; count += 1.41) { std::string result(ml::core::CStringUtils::typeToString(count)); ml::core::CStringUtils::stringToType(result, count); } @@ -948,8 +872,7 @@ void CStringUtilsTest::testPerformance() { LOG_DEBUG("Before boost::lexical_cast floating point test"); stopWatch.start(); - for (double count = 0.0; count < TEST_SIZE_D; count += 1.41) - { + for (double count = 0.0; count < TEST_SIZE_D; count += 1.41) { std::string result(boost::lexical_cast(count)); count = boost::lexical_cast(result); } @@ -959,8 +882,7 @@ void CStringUtilsTest::testPerformance() } } -void CStringUtilsTest::testUtf8ByteType() -{ +void CStringUtilsTest::testUtf8ByteType() { std::string testStr; // single byte UTF-8 character testStr += "a"; @@ -983,24 +905,17 @@ void CStringUtilsTest::testUtf8ByteType() CPPUNIT_ASSERT_EQUAL(-1, ml::core::CStringUtils::utf8ByteType(testStr[9])); } -void CStringUtilsTest::testRoundtripMaxDouble() -{ - ml::core::CIEEE754::EPrecision precisions[] = - { - ml::core::CIEEE754::E_SinglePrecision, - ml::core::CIEEE754::E_DoublePrecision - }; - double tolerances[] = { 5e-7, 5e-15 }; - for (std::size_t i = 0u; i < boost::size(precisions); ++i) - { +void CStringUtilsTest::testRoundtripMaxDouble() { + ml::core::CIEEE754::EPrecision precisions[] = {ml::core::CIEEE754::E_SinglePrecision, ml::core::CIEEE754::E_DoublePrecision}; + double tolerances[] = {5e-7, 5e-15}; + for (std::size_t i = 0u; i < boost::size(precisions); ++i) { double max = std::numeric_limits::max(); std::string str = ml::core::CStringUtils::typeToStringPrecise(max, precisions[i]); double d = 0.0; CPPUNIT_ASSERT(ml::core::CStringUtils::stringToType(str, d)); CPPUNIT_ASSERT_DOUBLES_EQUAL(max, d, tolerances[i] * max); } - for (std::size_t i = 0u; i < boost::size(precisions); ++i) - { + for (std::size_t i = 0u; i < boost::size(precisions); ++i) { double min = -std::numeric_limits::max(); std::string str = ml::core::CStringUtils::typeToStringPrecise(min, precisions[i]); double d = 0.0; diff --git a/lib/core/unittest/CStringUtilsTest.h b/lib/core/unittest/CStringUtilsTest.h index 61d77a9036..4e8871318b 100644 --- a/lib/core/unittest/CStringUtilsTest.h +++ b/lib/core/unittest/CStringUtilsTest.h @@ -8,38 +8,34 @@ #include +class CStringUtilsTest : public CppUnit::TestFixture { +public: + void testNumMatches(); + void testReplace(); + void testReplaceFirst(); + void testTypeToString(); + void testTypeToStringPrecise(); + void testTypeToStringPretty(); + void testStringToType(); + void testTokeniser(); + void testTrim(); + void testJoin(); + void testLower(); + void testUpper(); + void testNarrowWiden(); + void testEscape(); + void testUnEscape(); + void testLongestSubstr(); + void testLongestSubseq(); + void testNormaliseWhitespace(); + void testPerformance(); + void testUtf8ByteType(); + void testRoundtripMaxDouble(); -class CStringUtilsTest : public CppUnit::TestFixture -{ - public: - void testNumMatches(); - void testReplace(); - void testReplaceFirst(); - void testTypeToString(); - void testTypeToStringPrecise(); - void testTypeToStringPretty(); - void testStringToType(); - void testTokeniser(); - void testTrim(); - void testJoin(); - void testLower(); - void testUpper(); - void testNarrowWiden(); - void testEscape(); - void testUnEscape(); - void testLongestSubstr(); - void testLongestSubseq(); - void testNormaliseWhitespace(); - void testPerformance(); - void testUtf8ByteType(); - void testRoundtripMaxDouble(); + static CppUnit::Test* suite(); - static CppUnit::Test *suite(); - - private: - void testTokeniser(const std::string &delim, - const std::string &str); +private: + void testTokeniser(const std::string& delim, const std::string& str); }; #endif // INCLUDED_CStringUtilsTest_h - diff --git a/lib/core/unittest/CThreadFarmTest.cc b/lib/core/unittest/CThreadFarmTest.cc index 9ebae22eb8..c6d3324f89 100644 --- a/lib/core/unittest/CThreadFarmTest.cc +++ b/lib/core/unittest/CThreadFarmTest.cc @@ -16,127 +16,96 @@ #include #include +CppUnit::Test* CThreadFarmTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CThreadFarmTest"); -CppUnit::Test *CThreadFarmTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CThreadFarmTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CThreadFarmTest::testNumCpus", - &CThreadFarmTest::testNumCpus) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CThreadFarmTest::testSendReceive", - &CThreadFarmTest::testSendReceive) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CThreadFarmTest::testNumCpus", &CThreadFarmTest::testNumCpus)); + suiteOfTests->addTest(new CppUnit::TestCaller("CThreadFarmTest::testSendReceive", &CThreadFarmTest::testSendReceive)); return suiteOfTests; } -void CThreadFarmTest::testNumCpus() -{ +void CThreadFarmTest::testNumCpus() { unsigned int numCpus(boost::thread::hardware_concurrency()); LOG_INFO("Number of CPUs on this machine is " << numCpus); } -namespace -{ - class CString - { - public: - CString() - { - } - - CString(const std::string &str) : m_Str(str) - { - } - - CString(const CString &arg) : m_Str(arg.m_Str) - { - } - - CString &operator=(const CString &arg) - { - m_Str = arg.m_Str; - return *this; - } - - CString &operator=(const std::string &str) - { - m_Str = str; - return *this; - } - - const std::string &str() const - { - return m_Str; - } - - private: - std::string m_Str; - }; - - class CHandler - { - public: - void processResult(const CString &result) - { - LOG_DEBUG("Process result " << result.str() << - " in thread " << ml::core::CThread::currentThreadId()); - - ml::core::CScopedLock lock(m_Mutex); - m_OutstandingOutput.erase(result.str()); - } - - void addExpectedOutput(const std::string &expected) - { - ml::core::CScopedLock lock(m_Mutex); - m_OutstandingOutput.insert(expected); - } - - bool haveAllExpected() - { - ml::core::CScopedLock lock(m_Mutex); - - TStrSet::iterator iter = m_OutstandingOutput.begin(); - if (iter != m_OutstandingOutput.end()) - { - LOG_WARN("Result: " << *iter << " is still outstanding"); - } - - return m_OutstandingOutput.empty(); - } - - private: - using TStrSet = std::set; - - TStrSet m_OutstandingOutput; - ml::core::CMutex m_Mutex; - }; - - class CProcessor - { - public: - CProcessor(const std::string &id) : m_Id(id) - { - } - - void msgToResult(const std::string &str, CString &result) - { - LOG_DEBUG("messageToResult " << str); - - result = (str + ' ' + m_Id); - - LOG_DEBUG("messageToResult " << result.str()); - } - - private: - std::string m_Id; - }; +namespace { +class CString { +public: + CString() {} + + CString(const std::string& str) : m_Str(str) {} + + CString(const CString& arg) : m_Str(arg.m_Str) {} + + CString& operator=(const CString& arg) { + m_Str = arg.m_Str; + return *this; + } + + CString& operator=(const std::string& str) { + m_Str = str; + return *this; + } + + const std::string& str() const { return m_Str; } + +private: + std::string m_Str; +}; + +class CHandler { +public: + void processResult(const CString& result) { + LOG_DEBUG("Process result " << result.str() << " in thread " << ml::core::CThread::currentThreadId()); + + ml::core::CScopedLock lock(m_Mutex); + m_OutstandingOutput.erase(result.str()); + } + + void addExpectedOutput(const std::string& expected) { + ml::core::CScopedLock lock(m_Mutex); + m_OutstandingOutput.insert(expected); + } + + bool haveAllExpected() { + ml::core::CScopedLock lock(m_Mutex); + + TStrSet::iterator iter = m_OutstandingOutput.begin(); + if (iter != m_OutstandingOutput.end()) { + LOG_WARN("Result: " << *iter << " is still outstanding"); + } + + return m_OutstandingOutput.empty(); + } + +private: + using TStrSet = std::set; + + TStrSet m_OutstandingOutput; + ml::core::CMutex m_Mutex; +}; + +class CProcessor { +public: + CProcessor(const std::string& id) : m_Id(id) {} + + void msgToResult(const std::string& str, CString& result) { + LOG_DEBUG("messageToResult " << str); + + result = (str + ' ' + m_Id); + + LOG_DEBUG("messageToResult " << result.str()); + } + +private: + std::string m_Id; +}; } -void CThreadFarmTest::testSendReceive() -{ +void CThreadFarmTest::testSendReceive() { CHandler handler; ml::core::CThreadFarm farm(handler, "test"); @@ -152,13 +121,12 @@ void CThreadFarmTest::testSendReceive() CPPUNIT_ASSERT(farm.start()); - size_t max(10); + size_t max(10); LOG_DEBUG("Sending " << max << " strings"); char id = 'A'; - for (size_t i = 0; i < max; ++i) - { + for (size_t i = 0; i < max; ++i) { std::string message("Test string "); message += id; @@ -184,4 +152,3 @@ void CThreadFarmTest::testSendReceive() CPPUNIT_ASSERT(handler.haveAllExpected()); } - diff --git a/lib/core/unittest/CThreadFarmTest.h b/lib/core/unittest/CThreadFarmTest.h index d1400b96f5..c8e574adde 100644 --- a/lib/core/unittest/CThreadFarmTest.h +++ b/lib/core/unittest/CThreadFarmTest.h @@ -8,15 +8,12 @@ #include +class CThreadFarmTest : public CppUnit::TestFixture { +public: + void testNumCpus(); + void testSendReceive(); -class CThreadFarmTest : public CppUnit::TestFixture -{ - public: - void testNumCpus(); - void testSendReceive(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CThreadFarmTest_h - diff --git a/lib/core/unittest/CThreadMutexConditionTest.cc b/lib/core/unittest/CThreadMutexConditionTest.cc index 0e9b4204d6..3c66e035b6 100644 --- a/lib/core/unittest/CThreadMutexConditionTest.cc +++ b/lib/core/unittest/CThreadMutexConditionTest.cc @@ -11,71 +11,58 @@ #include #include +CppUnit::Test* CThreadMutexConditionTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CThreadMutexConditionTest"); -CppUnit::Test *CThreadMutexConditionTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CThreadMutexConditionTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CThreadMutexConditionTest::testThread", - &CThreadMutexConditionTest::testThread) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CThreadMutexConditionTest::testThreadCondition", - &CThreadMutexConditionTest::testThreadCondition) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CThreadMutexConditionTest::testThread", + &CThreadMutexConditionTest::testThread)); + suiteOfTests->addTest(new CppUnit::TestCaller("CThreadMutexConditionTest::testThreadCondition", + &CThreadMutexConditionTest::testThreadCondition)); return suiteOfTests; } -void CThreadMutexConditionTest::testThread() -{ - class CThread : public ml::core::CThread - { - public: - CThread() : m_Running(false) - { - } - - bool isRunning() - { - m_Mutex.lock(); - bool ret = m_Running; - m_Mutex.unlock(); - return ret; - } - - private: - void run() - { - LOG_DEBUG("Thread running"); +void CThreadMutexConditionTest::testThread() { + class CThread : public ml::core::CThread { + public: + CThread() : m_Running(false) {} + + bool isRunning() { + m_Mutex.lock(); + bool ret = m_Running; + m_Mutex.unlock(); + return ret; + } + + private: + void run() { + LOG_DEBUG("Thread running"); + m_Mutex.lock(); + m_Running = true; + m_Mutex.unlock(); + + for (;;) { m_Mutex.lock(); - m_Running = true; - m_Mutex.unlock(); - - for(;;) - { - m_Mutex.lock(); - if(m_Running == false) - { - m_Mutex.unlock(); - break; - } + if (m_Running == false) { m_Mutex.unlock(); + break; } - - LOG_DEBUG("Thread exiting"); - } - - void shutdown() - { - LOG_DEBUG("Thread shutdown"); - m_Mutex.lock(); - m_Running = false; m_Mutex.unlock(); } - private: - ml::core::CMutex m_Mutex; - bool m_Running; + LOG_DEBUG("Thread exiting"); + } + + void shutdown() { + LOG_DEBUG("Thread shutdown"); + m_Mutex.lock(); + m_Running = false; + m_Mutex.unlock(); + } + + private: + ml::core::CMutex m_Mutex; + bool m_Running; }; CThread thread; @@ -95,64 +82,54 @@ void CThreadMutexConditionTest::testThread() CPPUNIT_ASSERT(thread.isRunning() == false); } -void CThreadMutexConditionTest::testThreadCondition() -{ - class CThread : public ml::core::CThread - { - public: - CThread() : m_Condition(m_Mutex) - { - } - - void lock() - { - LOG_DEBUG("lock start " << this->currentThreadId()); - m_Mutex.lock(); - LOG_DEBUG("lock end " << this->currentThreadId()); - } - - void unlock() - { - LOG_DEBUG("unlock " << this->currentThreadId()); - m_Mutex.unlock(); - } - - void wait() - { - LOG_DEBUG("wait start " << this->currentThreadId()); - m_Condition.wait(); - LOG_DEBUG("wait end " << this->currentThreadId()); - } - - void signal() - { - LOG_DEBUG("signal " << this->currentThreadId()); - m_Condition.signal(); - } - - private: - void run() - { - LOG_DEBUG("Thread running"); - this->lock(); - this->signal(); - this->wait(); - this->unlock(); - LOG_DEBUG("Thread exiting"); - } - - void shutdown() - { - LOG_DEBUG("Thread shutting down"); - this->lock(); - this->signal(); - this->unlock(); - LOG_DEBUG("Thread shutdown"); - } - - private: - ml::core::CMutex m_Mutex; - ml::core::CCondition m_Condition; +void CThreadMutexConditionTest::testThreadCondition() { + class CThread : public ml::core::CThread { + public: + CThread() : m_Condition(m_Mutex) {} + + void lock() { + LOG_DEBUG("lock start " << this->currentThreadId()); + m_Mutex.lock(); + LOG_DEBUG("lock end " << this->currentThreadId()); + } + + void unlock() { + LOG_DEBUG("unlock " << this->currentThreadId()); + m_Mutex.unlock(); + } + + void wait() { + LOG_DEBUG("wait start " << this->currentThreadId()); + m_Condition.wait(); + LOG_DEBUG("wait end " << this->currentThreadId()); + } + + void signal() { + LOG_DEBUG("signal " << this->currentThreadId()); + m_Condition.signal(); + } + + private: + void run() { + LOG_DEBUG("Thread running"); + this->lock(); + this->signal(); + this->wait(); + this->unlock(); + LOG_DEBUG("Thread exiting"); + } + + void shutdown() { + LOG_DEBUG("Thread shutting down"); + this->lock(); + this->signal(); + this->unlock(); + LOG_DEBUG("Thread shutdown"); + } + + private: + ml::core::CMutex m_Mutex; + ml::core::CCondition m_Condition; }; CThread thread; diff --git a/lib/core/unittest/CThreadMutexConditionTest.h b/lib/core/unittest/CThreadMutexConditionTest.h index 77bcc1d4de..060bde5328 100644 --- a/lib/core/unittest/CThreadMutexConditionTest.h +++ b/lib/core/unittest/CThreadMutexConditionTest.h @@ -8,13 +8,12 @@ #include -class CThreadMutexConditionTest : public CppUnit::TestFixture -{ - public: - void testThread(); - void testThreadCondition(); +class CThreadMutexConditionTest : public CppUnit::TestFixture { +public: + void testThread(); + void testThreadCondition(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CThreadMutexConditionTest_h diff --git a/lib/core/unittest/CThreadPoolTest.cc b/lib/core/unittest/CThreadPoolTest.cc index 0d68ab6871..c14fdba03f 100644 --- a/lib/core/unittest/CThreadPoolTest.cc +++ b/lib/core/unittest/CThreadPoolTest.cc @@ -9,35 +9,26 @@ #include +CppUnit::Test* CThreadPoolTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CThreadPoolTest"); -CppUnit::Test *CThreadPoolTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CThreadPoolTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CThreadPoolTest::testPool", - &CThreadPoolTest::testPool) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CThreadPoolTest::testPool", &CThreadPoolTest::testPool)); return suiteOfTests; } -namespace -{ +namespace { -void first_task() -{ - LOG_DEBUG("first task is running"); +void first_task() { + LOG_DEBUG("first task is running"); } -void second_task() -{ - LOG_DEBUG("second task is running"); +void second_task() { + LOG_DEBUG("second task is running"); } - } -void CThreadPoolTest::testPool() -{ +void CThreadPoolTest::testPool() { // Create fifo thread pool container with two threads. boost::threadpool::pool tp(2); @@ -48,4 +39,3 @@ void CThreadPoolTest::testPool() // Wait until all tasks are finished. tp.wait(); } - diff --git a/lib/core/unittest/CThreadPoolTest.h b/lib/core/unittest/CThreadPoolTest.h index f5b0e4f9ec..fe53d8d3f2 100644 --- a/lib/core/unittest/CThreadPoolTest.h +++ b/lib/core/unittest/CThreadPoolTest.h @@ -8,14 +8,11 @@ #include +class CThreadPoolTest : public CppUnit::TestFixture { +public: + void testPool(); -class CThreadPoolTest : public CppUnit::TestFixture -{ - public: - void testPool(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CThreadPoolTest_h - diff --git a/lib/core/unittest/CTickerTest.cc b/lib/core/unittest/CTickerTest.cc index 75821bbb97..d415b66480 100644 --- a/lib/core/unittest/CTickerTest.cc +++ b/lib/core/unittest/CTickerTest.cc @@ -9,45 +9,29 @@ #include #include +CppUnit::Test* CTickerTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CTickerTest"); -CppUnit::Test *CTickerTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CTickerTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTickerTest::testTicker", - &CTickerTest::testTicker) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CTickerTest::testTicker", &CTickerTest::testTicker)); return suiteOfTests; } -namespace -{ - class CReceiver - { - public: - CReceiver() - : m_Ticks(0) - { - } +namespace { +class CReceiver { +public: + CReceiver() : m_Ticks(0) {} - void tick() - { - ++m_Ticks; - } + void tick() { ++m_Ticks; } - size_t ticks() const - { - return m_Ticks; - } + size_t ticks() const { return m_Ticks; } - private: - size_t m_Ticks; - }; +private: + size_t m_Ticks; +}; } -void CTickerTest::testTicker() -{ +void CTickerTest::testTicker() { CReceiver receiver; ml::core::CTicker ticker(100, receiver); @@ -70,4 +54,3 @@ void CTickerTest::testTicker() // itself) then it's probably not too much of a cause for concern. CPPUNIT_ASSERT(tickCount >= 9); } - diff --git a/lib/core/unittest/CTickerTest.h b/lib/core/unittest/CTickerTest.h index 0fe67da3d3..75456b2c36 100644 --- a/lib/core/unittest/CTickerTest.h +++ b/lib/core/unittest/CTickerTest.h @@ -8,12 +8,11 @@ #include -class CTickerTest : public CppUnit::TestFixture -{ - public: - void testTicker(); +class CTickerTest : public CppUnit::TestFixture { +public: + void testTicker(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CTickerTest_h diff --git a/lib/core/unittest/CTimeUtilsTest.cc b/lib/core/unittest/CTimeUtilsTest.cc index bed970a9ea..b50f546f57 100644 --- a/lib/core/unittest/CTimeUtilsTest.cc +++ b/lib/core/unittest/CTimeUtilsTest.cc @@ -13,38 +13,21 @@ #include +CppUnit::Test* CTimeUtilsTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CTimeUtilsTest"); -CppUnit::Test *CTimeUtilsTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CTimeUtilsTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTimeUtilsTest::testNow", - &CTimeUtilsTest::testNow) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTimeUtilsTest::testToIso8601", - &CTimeUtilsTest::testToIso8601) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTimeUtilsTest::testToLocal", - &CTimeUtilsTest::testToLocal) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTimeUtilsTest::testToEpochMs", - &CTimeUtilsTest::testToEpochMs) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTimeUtilsTest::testStrptime", - &CTimeUtilsTest::testStrptime) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTimeUtilsTest::testTimezone", - &CTimeUtilsTest::testTimezone) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTimeUtilsTest::testDateWords", - &CTimeUtilsTest::testDateWords) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CTimeUtilsTest::testNow", &CTimeUtilsTest::testNow)); + suiteOfTests->addTest(new CppUnit::TestCaller("CTimeUtilsTest::testToIso8601", &CTimeUtilsTest::testToIso8601)); + suiteOfTests->addTest(new CppUnit::TestCaller("CTimeUtilsTest::testToLocal", &CTimeUtilsTest::testToLocal)); + suiteOfTests->addTest(new CppUnit::TestCaller("CTimeUtilsTest::testToEpochMs", &CTimeUtilsTest::testToEpochMs)); + suiteOfTests->addTest(new CppUnit::TestCaller("CTimeUtilsTest::testStrptime", &CTimeUtilsTest::testStrptime)); + suiteOfTests->addTest(new CppUnit::TestCaller("CTimeUtilsTest::testTimezone", &CTimeUtilsTest::testTimezone)); + suiteOfTests->addTest(new CppUnit::TestCaller("CTimeUtilsTest::testDateWords", &CTimeUtilsTest::testDateWords)); return suiteOfTests; } -void CTimeUtilsTest::testNow() -{ +void CTimeUtilsTest::testNow() { ml::core_t::TTime t1(ml::core::CTimeUtils::now()); ml::core::CSleep::sleep(1001); ml::core_t::TTime t2(ml::core::CTimeUtils::now()); @@ -52,8 +35,7 @@ void CTimeUtilsTest::testNow() CPPUNIT_ASSERT(t2 > t1); } -void CTimeUtilsTest::testToIso8601() -{ +void CTimeUtilsTest::testToIso8601() { // These tests assume UK time. In case they're ever run outside the UK, // we'll explicitly set the timezone for the purpose of these tests. CPPUNIT_ASSERT(ml::core::CTimezone::setTimezone("Europe/London")); @@ -76,8 +58,7 @@ void CTimeUtilsTest::testToIso8601() } } -void CTimeUtilsTest::testToLocal() -{ +void CTimeUtilsTest::testToLocal() { // These tests assume UK time. In case they're ever run outside the UK, // we'll explicitly set the timezone for the purpose of these tests. CPPUNIT_ASSERT(ml::core::CTimezone::setTimezone("Europe/London")); @@ -108,183 +89,175 @@ void CTimeUtilsTest::testToLocal() } } -void CTimeUtilsTest::testToEpochMs() -{ +void CTimeUtilsTest::testToEpochMs() { CPPUNIT_ASSERT_EQUAL(int64_t(1000), ml::core::CTimeUtils::toEpochMs(ml::core_t::TTime(1))); CPPUNIT_ASSERT_EQUAL(int64_t(-1000), ml::core::CTimeUtils::toEpochMs(ml::core_t::TTime(-1))); CPPUNIT_ASSERT_EQUAL(int64_t(1521035866000), ml::core::CTimeUtils::toEpochMs(ml::core_t::TTime(1521035866))); CPPUNIT_ASSERT_EQUAL(int64_t(-1521035866000), ml::core::CTimeUtils::toEpochMs(ml::core_t::TTime(-1521035866))); } -void CTimeUtilsTest::testStrptime() -{ +void CTimeUtilsTest::testStrptime() { // These tests assume UK time. In case they're ever run outside the UK, // we'll explicitly set the timezone for the purpose of these tests. CPPUNIT_ASSERT(ml::core::CTimezone::setTimezone("Europe/London")); -{ - // This time is deliberately chosen to be during daylight saving time - std::string dateTime("1122334455"); - - std::string format("%s"); - - ml::core_t::TTime expected(1122334455); - ml::core_t::TTime actual(0); + { + // This time is deliberately chosen to be during daylight saving time + std::string dateTime("1122334455"); - CPPUNIT_ASSERT(ml::core::CTimeUtils::strptime(format, dateTime, actual)); - CPPUNIT_ASSERT_EQUAL(expected, actual); -} -{ - std::string dateTime("2008-11-26 14:40:37"); + std::string format("%s"); - std::string format("%Y-%m-%d %H:%M:%S"); + ml::core_t::TTime expected(1122334455); + ml::core_t::TTime actual(0); - ml::core_t::TTime expected(1227710437); - ml::core_t::TTime actual(0); + CPPUNIT_ASSERT(ml::core::CTimeUtils::strptime(format, dateTime, actual)); + CPPUNIT_ASSERT_EQUAL(expected, actual); + } + { + std::string dateTime("2008-11-26 14:40:37"); - CPPUNIT_ASSERT(ml::core::CTimeUtils::strptime(format, dateTime, actual)); - CPPUNIT_ASSERT_EQUAL(expected, actual); + std::string format("%Y-%m-%d %H:%M:%S"); - std::string badDateTime("2008-11-26 25:40:37"); - CPPUNIT_ASSERT(!ml::core::CTimeUtils::strptime(format, badDateTime, actual)); -} -{ - std::string dateTime("10/31/2008 3:15:00 AM"); + ml::core_t::TTime expected(1227710437); + ml::core_t::TTime actual(0); - std::string format("%m/%d/%Y %I:%M:%S %p"); + CPPUNIT_ASSERT(ml::core::CTimeUtils::strptime(format, dateTime, actual)); + CPPUNIT_ASSERT_EQUAL(expected, actual); - ml::core_t::TTime expected(1225422900); - ml::core_t::TTime actual(0); + std::string badDateTime("2008-11-26 25:40:37"); + CPPUNIT_ASSERT(!ml::core::CTimeUtils::strptime(format, badDateTime, actual)); + } + { + std::string dateTime("10/31/2008 3:15:00 AM"); - CPPUNIT_ASSERT(ml::core::CTimeUtils::strptime(format, dateTime, actual)); - CPPUNIT_ASSERT_EQUAL(expected, actual); - LOG_DEBUG(actual); -} -{ - std::string dateTime("Fri Oct 31 3:15:00 AM GMT 08"); + std::string format("%m/%d/%Y %I:%M:%S %p"); - std::string format("%a %b %d %I:%M:%S %p %Z %y"); + ml::core_t::TTime expected(1225422900); + ml::core_t::TTime actual(0); - ml::core_t::TTime expected(1225422900); - ml::core_t::TTime actual(0); + CPPUNIT_ASSERT(ml::core::CTimeUtils::strptime(format, dateTime, actual)); + CPPUNIT_ASSERT_EQUAL(expected, actual); + LOG_DEBUG(actual); + } + { + std::string dateTime("Fri Oct 31 3:15:00 AM GMT 08"); - CPPUNIT_ASSERT(ml::core::CTimeUtils::strptime(format, dateTime, actual)); - CPPUNIT_ASSERT_EQUAL(expected, actual); - LOG_DEBUG(actual); -} -{ - std::string dateTime("Tue Jun 23 17:24:55 2009"); + std::string format("%a %b %d %I:%M:%S %p %Z %y"); - std::string format("%a %b %d %T %Y"); + ml::core_t::TTime expected(1225422900); + ml::core_t::TTime actual(0); - ml::core_t::TTime expected(1245774295); - ml::core_t::TTime actual(0); + CPPUNIT_ASSERT(ml::core::CTimeUtils::strptime(format, dateTime, actual)); + CPPUNIT_ASSERT_EQUAL(expected, actual); + LOG_DEBUG(actual); + } + { + std::string dateTime("Tue Jun 23 17:24:55 2009"); - CPPUNIT_ASSERT(ml::core::CTimeUtils::strptime(format, dateTime, actual)); - CPPUNIT_ASSERT_EQUAL(expected, actual); - LOG_DEBUG(actual); -} -{ - std::string dateTime("Tue Jun 23 17:24:55 BST 2009"); + std::string format("%a %b %d %T %Y"); - std::string format("%a %b %d %T %Z %Y"); + ml::core_t::TTime expected(1245774295); + ml::core_t::TTime actual(0); - ml::core_t::TTime expected(1245774295); - ml::core_t::TTime actual(0); + CPPUNIT_ASSERT(ml::core::CTimeUtils::strptime(format, dateTime, actual)); + CPPUNIT_ASSERT_EQUAL(expected, actual); + LOG_DEBUG(actual); + } + { + std::string dateTime("Tue Jun 23 17:24:55 BST 2009"); - CPPUNIT_ASSERT(ml::core::CTimeUtils::strptime(format, dateTime, actual)); - CPPUNIT_ASSERT_EQUAL(expected, actual); - LOG_DEBUG(actual); -} -{ - // This time is in summer, but explicitly specifies a GMT offset of 0, - // so we should get 1245777895 instead of 1245774295 - std::string dateTime("Tue Jun 23 17:24:55 2009 +0000"); + std::string format("%a %b %d %T %Z %Y"); - std::string format("%a %b %d %T %Y %z"); + ml::core_t::TTime expected(1245774295); + ml::core_t::TTime actual(0); - ml::core_t::TTime expected(1245777895); - ml::core_t::TTime actual(0); + CPPUNIT_ASSERT(ml::core::CTimeUtils::strptime(format, dateTime, actual)); + CPPUNIT_ASSERT_EQUAL(expected, actual); + LOG_DEBUG(actual); + } + { + // This time is in summer, but explicitly specifies a GMT offset of 0, + // so we should get 1245777895 instead of 1245774295 + std::string dateTime("Tue Jun 23 17:24:55 2009 +0000"); - CPPUNIT_ASSERT(ml::core::CTimeUtils::strptime(format, dateTime, actual)); - CPPUNIT_ASSERT_EQUAL(expected, actual); - LOG_DEBUG(actual); + std::string format("%a %b %d %T %Y %z"); - std::string badDateTime1("Tue Jun 23 17:24:55 2009"); - CPPUNIT_ASSERT(!ml::core::CTimeUtils::strptime(format, badDateTime1, actual)); + ml::core_t::TTime expected(1245777895); + ml::core_t::TTime actual(0); - std::string badDateTime2("Tue Jun 23 17:24:55 2009 0000"); - CPPUNIT_ASSERT(!ml::core::CTimeUtils::strptime(format, badDateTime2, actual)); -} -{ - // Test what happens when no year is given - std::string dateTime("Jun 23 17:24:55"); + CPPUNIT_ASSERT(ml::core::CTimeUtils::strptime(format, dateTime, actual)); + CPPUNIT_ASSERT_EQUAL(expected, actual); + LOG_DEBUG(actual); - std::string format("%b %d %T"); + std::string badDateTime1("Tue Jun 23 17:24:55 2009"); + CPPUNIT_ASSERT(!ml::core::CTimeUtils::strptime(format, badDateTime1, actual)); - ml::core_t::TTime actual(0); + std::string badDateTime2("Tue Jun 23 17:24:55 2009 0000"); + CPPUNIT_ASSERT(!ml::core::CTimeUtils::strptime(format, badDateTime2, actual)); + } + { + // Test what happens when no year is given + std::string dateTime("Jun 23 17:24:55"); - CPPUNIT_ASSERT(ml::core::CTimeUtils::strptime(format, dateTime, actual)); - LOG_DEBUG(actual); + std::string format("%b %d %T"); - // This test is only approximate (assuming leap year with leap second), so - // print a warning too - CPPUNIT_ASSERT(actual >= ml::core::CTimeUtils::now() - 366 * 24 * 60 * 60 - 1); - char buf[128] = { '\0' }; - LOG_WARN("If the following date is not within the last year then something is wrong: " << - ml::core::CCTimeR::cTimeR(&actual, buf)); + ml::core_t::TTime actual(0); - // Allow small tolerance in case of clock discrepancies between machines - CPPUNIT_ASSERT(actual <= ml::core::CTimeUtils::now() + ml::core::CTimeUtils::MAX_CLOCK_DISCREPANCY); -} -{ - // Test what happens when no year is given - std::string dateTime("Jan 01 01:24:55"); + CPPUNIT_ASSERT(ml::core::CTimeUtils::strptime(format, dateTime, actual)); + LOG_DEBUG(actual); - std::string format("%b %d %T"); + // This test is only approximate (assuming leap year with leap second), so + // print a warning too + CPPUNIT_ASSERT(actual >= ml::core::CTimeUtils::now() - 366 * 24 * 60 * 60 - 1); + char buf[128] = {'\0'}; + LOG_WARN("If the following date is not within the last year then something is wrong: " << ml::core::CCTimeR::cTimeR(&actual, buf)); - ml::core_t::TTime actual(0); + // Allow small tolerance in case of clock discrepancies between machines + CPPUNIT_ASSERT(actual <= ml::core::CTimeUtils::now() + ml::core::CTimeUtils::MAX_CLOCK_DISCREPANCY); + } + { + // Test what happens when no year is given + std::string dateTime("Jan 01 01:24:55"); - CPPUNIT_ASSERT(ml::core::CTimeUtils::strptime(format, dateTime, actual)); - LOG_DEBUG(actual); + std::string format("%b %d %T"); - // This test is only approximate (assuming leap year with leap second), so - // print a warning too - CPPUNIT_ASSERT(actual >= ml::core::CTimeUtils::now() - 366 * 24 * 60 * 60 - 1); - char buf[128] = { '\0' }; - LOG_WARN("If the following date is not within the last year then something is wrong: " << - ml::core::CCTimeR::cTimeR(&actual, buf)); + ml::core_t::TTime actual(0); - // Allow small tolerance in case of clock discrepancies between machines - CPPUNIT_ASSERT(actual <= ml::core::CTimeUtils::now() + ml::core::CTimeUtils::MAX_CLOCK_DISCREPANCY); -} -{ - // Test what happens when no year is given - std::string dateTime("Dec 31 23:24:55"); + CPPUNIT_ASSERT(ml::core::CTimeUtils::strptime(format, dateTime, actual)); + LOG_DEBUG(actual); - std::string format("%b %d %T"); + // This test is only approximate (assuming leap year with leap second), so + // print a warning too + CPPUNIT_ASSERT(actual >= ml::core::CTimeUtils::now() - 366 * 24 * 60 * 60 - 1); + char buf[128] = {'\0'}; + LOG_WARN("If the following date is not within the last year then something is wrong: " << ml::core::CCTimeR::cTimeR(&actual, buf)); - ml::core_t::TTime actual(0); + // Allow small tolerance in case of clock discrepancies between machines + CPPUNIT_ASSERT(actual <= ml::core::CTimeUtils::now() + ml::core::CTimeUtils::MAX_CLOCK_DISCREPANCY); + } + { + // Test what happens when no year is given + std::string dateTime("Dec 31 23:24:55"); - CPPUNIT_ASSERT(ml::core::CTimeUtils::strptime(format, dateTime, actual)); - LOG_DEBUG(actual); + std::string format("%b %d %T"); - // This test is only approximate (assuming leap year with leap second), so - // print a warning too - CPPUNIT_ASSERT(actual >= ml::core::CTimeUtils::now() - 366 * 24 * 60 * 60 - 1); - char buf[128] = { '\0' }; - LOG_WARN("If the following date is not within the last year then something is wrong: " << - ml::core::CCTimeR::cTimeR(&actual, buf)); + ml::core_t::TTime actual(0); - // Allow small tolerance in case of clock discrepancies between machines - CPPUNIT_ASSERT(actual <= ml::core::CTimeUtils::now() + ml::core::CTimeUtils::MAX_CLOCK_DISCREPANCY); -} + CPPUNIT_ASSERT(ml::core::CTimeUtils::strptime(format, dateTime, actual)); + LOG_DEBUG(actual); + // This test is only approximate (assuming leap year with leap second), so + // print a warning too + CPPUNIT_ASSERT(actual >= ml::core::CTimeUtils::now() - 366 * 24 * 60 * 60 - 1); + char buf[128] = {'\0'}; + LOG_WARN("If the following date is not within the last year then something is wrong: " << ml::core::CCTimeR::cTimeR(&actual, buf)); + // Allow small tolerance in case of clock discrepancies between machines + CPPUNIT_ASSERT(actual <= ml::core::CTimeUtils::now() + ml::core::CTimeUtils::MAX_CLOCK_DISCREPANCY); + } } -void CTimeUtilsTest::testTimezone() -{ +void CTimeUtilsTest::testTimezone() { static const ml::core_t::TTime SECONDS_PER_HOUR = 3600; // These convert the same date/time to a Unix time, but in a variety of @@ -403,8 +376,7 @@ void CTimeUtilsTest::testTimezone() CPPUNIT_ASSERT(ml::core::CTimezone::setTimezone("")); } -void CTimeUtilsTest::testDateWords() -{ +void CTimeUtilsTest::testDateWords() { // These tests assume they're being run in an English speaking country LOG_DEBUG("Checking day of week abbreviations"); @@ -492,4 +464,3 @@ void CTimeUtilsTest::testDateWords() CPPUNIT_ASSERT(!ml::core::CTimeUtils::isDateWord("\t")); CPPUNIT_ASSERT(!ml::core::CTimeUtils::isDateWord(" \t")); } - diff --git a/lib/core/unittest/CTimeUtilsTest.h b/lib/core/unittest/CTimeUtilsTest.h index f5d79ea4f0..6a95e735ee 100644 --- a/lib/core/unittest/CTimeUtilsTest.h +++ b/lib/core/unittest/CTimeUtilsTest.h @@ -8,20 +8,17 @@ #include +class CTimeUtilsTest : public CppUnit::TestFixture { +public: + void testNow(); + void testToIso8601(); + void testToLocal(); + void testToEpochMs(); + void testStrptime(); + void testTimezone(); + void testDateWords(); -class CTimeUtilsTest : public CppUnit::TestFixture -{ - public: - void testNow(); - void testToIso8601(); - void testToLocal(); - void testToEpochMs(); - void testStrptime(); - void testTimezone(); - void testDateWords(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CTimeUtilsTest_h - diff --git a/lib/core/unittest/CTripleTest.cc b/lib/core/unittest/CTripleTest.cc index 1df7651844..535e850342 100644 --- a/lib/core/unittest/CTripleTest.cc +++ b/lib/core/unittest/CTripleTest.cc @@ -12,22 +12,15 @@ #include +CppUnit::Test* CTripleTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CTripleTest"); -CppUnit::Test *CTripleTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CTripleTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTripleTest::testOperators", - &CTripleTest::testOperators) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTripleTest::testBoostHashReady", - &CTripleTest::testBoostHashReady) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CTripleTest::testOperators", &CTripleTest::testOperators)); + suiteOfTests->addTest(new CppUnit::TestCaller("CTripleTest::testBoostHashReady", &CTripleTest::testBoostHashReady)); return suiteOfTests; } -void CTripleTest::testOperators() -{ +void CTripleTest::testOperators() { { // Assignment ml::core::CTriple triple1("foo", 10, 8); @@ -64,8 +57,7 @@ void CTripleTest::testOperators() } } -void CTripleTest::testBoostHashReady() -{ +void CTripleTest::testBoostHashReady() { using TStringSizeShortTriple = ml::core::CTriple; using TStringSizeShortTripleSizeMap = boost::unordered_map; @@ -77,4 +69,3 @@ void CTripleTest::testBoostHashReady() CPPUNIT_ASSERT_EQUAL(std::size_t(2), map[ml::core::make_triple(std::string("bar"), std::size_t(20), short(4))]); CPPUNIT_ASSERT(map.find(ml::core::make_triple(std::string("bar"), std::size_t(20), short(8))) == map.end()); } - diff --git a/lib/core/unittest/CTripleTest.h b/lib/core/unittest/CTripleTest.h index b697e0ae0b..b3883257b5 100644 --- a/lib/core/unittest/CTripleTest.h +++ b/lib/core/unittest/CTripleTest.h @@ -8,15 +8,12 @@ #include +class CTripleTest : public CppUnit::TestFixture { +public: + void testOperators(); + void testBoostHashReady(); -class CTripleTest : public CppUnit::TestFixture -{ - public: - void testOperators(); - void testBoostHashReady(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CTripleTest_h - diff --git a/lib/core/unittest/CUnameTest.cc b/lib/core/unittest/CUnameTest.cc index 130a41da22..489e0e491b 100644 --- a/lib/core/unittest/CUnameTest.cc +++ b/lib/core/unittest/CUnameTest.cc @@ -8,20 +8,15 @@ #include #include +CppUnit::Test* CUnameTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CUnameTest"); -CppUnit::Test *CUnameTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CUnameTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CUnameTest::testUname", - &CUnameTest::testUname) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CUnameTest::testUname", &CUnameTest::testUname)); return suiteOfTests; } -void CUnameTest::testUname() -{ +void CUnameTest::testUname() { LOG_DEBUG(ml::core::CUname::sysName()); LOG_DEBUG(ml::core::CUname::nodeName()); LOG_DEBUG(ml::core::CUname::release()); @@ -31,4 +26,3 @@ void CUnameTest::testUname() LOG_DEBUG(ml::core::CUname::mlPlatform()); LOG_DEBUG(ml::core::CUname::mlOsVer()); } - diff --git a/lib/core/unittest/CUnameTest.h b/lib/core/unittest/CUnameTest.h index 6f9c7cd5f6..ce8e99abcb 100644 --- a/lib/core/unittest/CUnameTest.h +++ b/lib/core/unittest/CUnameTest.h @@ -8,14 +8,11 @@ #include +class CUnameTest : public CppUnit::TestFixture { +public: + void testUname(); -class CUnameTest : public CppUnit::TestFixture -{ - public: - void testUname(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CUnameTest_h - diff --git a/lib/core/unittest/CVectorRangeTest.cc b/lib/core/unittest/CVectorRangeTest.cc index 4b8a2a5191..734ed50b63 100644 --- a/lib/core/unittest/CVectorRangeTest.cc +++ b/lib/core/unittest/CVectorRangeTest.cc @@ -18,8 +18,7 @@ using TDoubleVec = std::vector; using TDoubleRng = core::CVectorRange; using TDoubleCRng = core::CVectorRange; -void CVectorRangeTest::testCreation() -{ +void CVectorRangeTest::testCreation() { LOG_DEBUG("*** CVectorRangeTest::testCreation ***"); { @@ -28,20 +27,16 @@ void CVectorRangeTest::testCreation() TDoubleRng range13{values1, 1, 3}; range13 = core::make_range(values1, 0, 3); - CPPUNIT_ASSERT_EQUAL(std::string("[1, 1, 0.1, 0.7, 9.8]"), - core::CContainerPrinter::print(values1)); + CPPUNIT_ASSERT_EQUAL(std::string("[1, 1, 0.1, 0.7, 9.8]"), core::CContainerPrinter::print(values1)); range13 = core::make_range(values2, 1, 4); - CPPUNIT_ASSERT_EQUAL(std::string("[1, 1.4, 5.7, 1.2, 9.8]"), - core::CContainerPrinter::print(values1)); + CPPUNIT_ASSERT_EQUAL(std::string("[1, 1.4, 5.7, 1.2, 9.8]"), core::CContainerPrinter::print(values1)); range13.assign(2, 2.0); - CPPUNIT_ASSERT_EQUAL(std::string("[1, 2, 2, 9.8]"), - core::CContainerPrinter::print(values1)); + CPPUNIT_ASSERT_EQUAL(std::string("[1, 2, 2, 9.8]"), core::CContainerPrinter::print(values1)); range13.assign(values2.begin(), values2.end()); - CPPUNIT_ASSERT_EQUAL(std::string("[1, 3.1, 1.4, 5.7, 1.2, 9.8]"), - core::CContainerPrinter::print(values1)); + CPPUNIT_ASSERT_EQUAL(std::string("[1, 3.1, 1.4, 5.7, 1.2, 9.8]"), core::CContainerPrinter::print(values1)); } { TDoubleVec values1{1.0, 0.1, 0.7, 9.8}; @@ -49,15 +44,12 @@ void CVectorRangeTest::testCreation() TDoubleCRng range1{values1, 1, 3}; range1 = TDoubleCRng(values2, 0, 3); - CPPUNIT_ASSERT_EQUAL(std::string("[1, 0.1, 0.7, 9.8]"), - core::CContainerPrinter::print(values1)); - CPPUNIT_ASSERT_EQUAL(std::string("[3.1, 1.4, 5.7]"), - core::CContainerPrinter::print(range1)); + CPPUNIT_ASSERT_EQUAL(std::string("[1, 0.1, 0.7, 9.8]"), core::CContainerPrinter::print(values1)); + CPPUNIT_ASSERT_EQUAL(std::string("[3.1, 1.4, 5.7]"), core::CContainerPrinter::print(range1)); } } -void CVectorRangeTest::testAccessors() -{ +void CVectorRangeTest::testAccessors() { LOG_DEBUG("*** CVectorRangeTest::testAccessors ***"); TDoubleVec values{1.0, 0.1, 0.7, 9.8, 8.0}; @@ -88,8 +80,7 @@ void CVectorRangeTest::testAccessors() CPPUNIT_ASSERT_EQUAL(9.8, crange14.back()); } -void CVectorRangeTest::testIterators() -{ +void CVectorRangeTest::testIterators() { LOG_DEBUG("*** CVectorRangeTest::testIterators ***"); TDoubleVec values{1.0, 0.1, 0.7, 9.8, 8.0}; @@ -97,27 +88,23 @@ void CVectorRangeTest::testIterators() TDoubleRng range14{values, 1, 4}; const TDoubleRng crange14{values, 1, 4}; - for (auto i = range14.begin(), j = values.begin() + 1; i != range14.end(); ++i, ++j) - { + for (auto i = range14.begin(), j = values.begin() + 1; i != range14.end(); ++i, ++j) { CPPUNIT_ASSERT_EQUAL(*j, *i); } CPPUNIT_ASSERT_EQUAL(std::ptrdiff_t(3), range14.end() - range14.begin()); - for (auto i = range14.cbegin(), j = values.cbegin() + 1; i != range14.cend(); ++i, ++j) - { + for (auto i = range14.cbegin(), j = values.cbegin() + 1; i != range14.cend(); ++i, ++j) { CPPUNIT_ASSERT_EQUAL(*j, *i); } CPPUNIT_ASSERT_EQUAL(std::ptrdiff_t(3), range14.end() - range14.begin()); - for (auto i = crange14.begin(), j = values.cbegin() + 1; i != crange14.end(); ++i, ++j) - { + for (auto i = crange14.begin(), j = values.cbegin() + 1; i != crange14.end(); ++i, ++j) { CPPUNIT_ASSERT_EQUAL(*j, *i); } CPPUNIT_ASSERT_EQUAL(std::ptrdiff_t(3), crange14.end() - crange14.begin()); } -void CVectorRangeTest::testSizing() -{ +void CVectorRangeTest::testSizing() { LOG_DEBUG("*** CVectorRangeTest::testSizing ***"); TDoubleVec values{1.0, 0.1, 0.7, 9.8, 8.0}; @@ -142,8 +129,7 @@ void CVectorRangeTest::testSizing() CPPUNIT_ASSERT(values.capacity() >= 15); } -void CVectorRangeTest::testModifiers() -{ +void CVectorRangeTest::testModifiers() { LOG_DEBUG("*** CVectorRangeTest::testModifiers ***"); TDoubleVec values1{1.0, 0.1, 0.7, 9.8, 8.0}; @@ -196,21 +182,20 @@ void CVectorRangeTest::testModifiers() TDoubleRng range113{values1, 1, 3}; TDoubleRng range223{values2, 2, 3}; - std::string s1 = core::CContainerPrinter::print(values1); - std::string s2 = core::CContainerPrinter::print(values2); + std::string s1 = core::CContainerPrinter::print(values1); + std::string s2 = core::CContainerPrinter::print(values2); std::string s113 = core::CContainerPrinter::print(range113); std::string s223 = core::CContainerPrinter::print(range223); range113.swap(range223); - CPPUNIT_ASSERT_EQUAL(s1, core::CContainerPrinter::print(values1)); - CPPUNIT_ASSERT_EQUAL(s2, core::CContainerPrinter::print(values2)); + CPPUNIT_ASSERT_EQUAL(s1, core::CContainerPrinter::print(values1)); + CPPUNIT_ASSERT_EQUAL(s2, core::CContainerPrinter::print(values2)); CPPUNIT_ASSERT_EQUAL(s113, core::CContainerPrinter::print(range223)); CPPUNIT_ASSERT_EQUAL(s223, core::CContainerPrinter::print(range113)); } -void CVectorRangeTest::testComparisons() -{ +void CVectorRangeTest::testComparisons() { LOG_DEBUG("*** CVectorRangeTest::testComparisons ***"); TDoubleVec values1{1.0, 0.1, 0.7, 9.8, 8.0}; @@ -244,28 +229,16 @@ void CVectorRangeTest::testComparisons() CPPUNIT_ASSERT(!(range103 >= range202)); } -CppUnit::Test *CVectorRangeTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CVectorRangeTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CVectorRangeTest::testCreation", - &CVectorRangeTest::testCreation) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CVectorRangeTest::testAccessors", - &CVectorRangeTest::testAccessors) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CVectorRangeTest::testIterators", - &CVectorRangeTest::testIterators) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CVectorRangeTest::testSizing", - &CVectorRangeTest::testSizing) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CVectorRangeTest::testModifiers", - &CVectorRangeTest::testModifiers) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CVectorRangeTest::testComparisons", - &CVectorRangeTest::testComparisons) ); +CppUnit::Test* CVectorRangeTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CVectorRangeTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CVectorRangeTest::testCreation", &CVectorRangeTest::testCreation)); + suiteOfTests->addTest(new CppUnit::TestCaller("CVectorRangeTest::testAccessors", &CVectorRangeTest::testAccessors)); + suiteOfTests->addTest(new CppUnit::TestCaller("CVectorRangeTest::testIterators", &CVectorRangeTest::testIterators)); + suiteOfTests->addTest(new CppUnit::TestCaller("CVectorRangeTest::testSizing", &CVectorRangeTest::testSizing)); + suiteOfTests->addTest(new CppUnit::TestCaller("CVectorRangeTest::testModifiers", &CVectorRangeTest::testModifiers)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CVectorRangeTest::testComparisons", &CVectorRangeTest::testComparisons)); return suiteOfTests; } diff --git a/lib/core/unittest/CVectorRangeTest.h b/lib/core/unittest/CVectorRangeTest.h index dcf7f2be5c..84ecb6b0e0 100644 --- a/lib/core/unittest/CVectorRangeTest.h +++ b/lib/core/unittest/CVectorRangeTest.h @@ -9,17 +9,16 @@ #include -class CVectorRangeTest : public CppUnit::TestFixture -{ - public: - void testCreation(); - void testAccessors(); - void testIterators(); - void testSizing(); - void testModifiers(); - void testComparisons(); +class CVectorRangeTest : public CppUnit::TestFixture { +public: + void testCreation(); + void testAccessors(); + void testIterators(); + void testSizing(); + void testModifiers(); + void testComparisons(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CTrendTestsTest_h diff --git a/lib/core/unittest/CWindowsErrorTest.cc b/lib/core/unittest/CWindowsErrorTest.cc index 2a489f523a..e6e824a7a6 100644 --- a/lib/core/unittest/CWindowsErrorTest.cc +++ b/lib/core/unittest/CWindowsErrorTest.cc @@ -5,23 +5,18 @@ */ #include "CWindowsErrorTest.h" -#include #include +#include +CppUnit::Test* CWindowsErrorTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CWindowsErrorTest"); -CppUnit::Test *CWindowsErrorTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CWindowsErrorTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CWindowsErrorTest::testErrors", - &CWindowsErrorTest::testErrors) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CWindowsErrorTest::testErrors", &CWindowsErrorTest::testErrors)); return suiteOfTests; } -void CWindowsErrorTest::testErrors() -{ +void CWindowsErrorTest::testErrors() { LOG_INFO("Windows error 1 is : " << ml::core::CWindowsError(1)); LOG_INFO("Windows error 2 is : " << ml::core::CWindowsError(2)); LOG_INFO("Windows error 3 is : " << ml::core::CWindowsError(3)); @@ -29,4 +24,3 @@ void CWindowsErrorTest::testErrors() LOG_INFO("Windows error 5 is : " << ml::core::CWindowsError(5)); LOG_INFO("Windows error 6 is : " << ml::core::CWindowsError(6)); } - diff --git a/lib/core/unittest/CWindowsErrorTest.h b/lib/core/unittest/CWindowsErrorTest.h index 36bce91a6f..cae7c43d8c 100644 --- a/lib/core/unittest/CWindowsErrorTest.h +++ b/lib/core/unittest/CWindowsErrorTest.h @@ -8,14 +8,11 @@ #include +class CWindowsErrorTest : public CppUnit::TestFixture { +public: + void testErrors(); -class CWindowsErrorTest : public CppUnit::TestFixture -{ - public: - void testErrors(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CWindowsErrorTest_h - diff --git a/lib/core/unittest/CWordDictionaryTest.cc b/lib/core/unittest/CWordDictionaryTest.cc index 7cbd4887c0..dedf32d01f 100644 --- a/lib/core/unittest/CWordDictionaryTest.cc +++ b/lib/core/unittest/CWordDictionaryTest.cc @@ -9,30 +9,23 @@ #include #include - -CppUnit::Test *CWordDictionaryTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CWordDictionaryTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CWordDictionaryTest::testLookups", - &CWordDictionaryTest::testLookups) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CWordDictionaryTest::testPartOfSpeech", - &CWordDictionaryTest::testPartOfSpeech) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CWordDictionaryTest::testWeightingFunctors", - &CWordDictionaryTest::testWeightingFunctors) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CWordDictionaryTest::testPerformance", - &CWordDictionaryTest::testPerformance) ); +CppUnit::Test* CWordDictionaryTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CWordDictionaryTest"); + + suiteOfTests->addTest( + new CppUnit::TestCaller("CWordDictionaryTest::testLookups", &CWordDictionaryTest::testLookups)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CWordDictionaryTest::testPartOfSpeech", &CWordDictionaryTest::testPartOfSpeech)); + suiteOfTests->addTest(new CppUnit::TestCaller("CWordDictionaryTest::testWeightingFunctors", + &CWordDictionaryTest::testWeightingFunctors)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CWordDictionaryTest::testPerformance", &CWordDictionaryTest::testPerformance)); return suiteOfTests; } -void CWordDictionaryTest::testLookups() -{ - const ml::core::CWordDictionary &dict = ml::core::CWordDictionary::instance(); +void CWordDictionaryTest::testLookups() { + const ml::core::CWordDictionary& dict = ml::core::CWordDictionary::instance(); CPPUNIT_ASSERT(dict.isInDictionary("hello")); CPPUNIT_ASSERT(dict.isInDictionary("Hello")); @@ -48,113 +41,69 @@ void CWordDictionaryTest::testLookups() CPPUNIT_ASSERT(!dict.isInDictionary("HELLO2")); } -void CWordDictionaryTest::testPartOfSpeech() -{ - const ml::core::CWordDictionary &dict = ml::core::CWordDictionary::instance(); - - CPPUNIT_ASSERT_EQUAL(ml::core::CWordDictionary::E_NotInDictionary, - dict.partOfSpeech("ajksdf")); - CPPUNIT_ASSERT_EQUAL(ml::core::CWordDictionary::E_UnknownPart, - dict.partOfSpeech("callback")); - CPPUNIT_ASSERT_EQUAL(ml::core::CWordDictionary::E_Noun, - dict.partOfSpeech("House")); - CPPUNIT_ASSERT_EQUAL(ml::core::CWordDictionary::E_Plural, - dict.partOfSpeech("Houses")); - CPPUNIT_ASSERT_EQUAL(ml::core::CWordDictionary::E_Verb, - dict.partOfSpeech("COMPLETED")); - CPPUNIT_ASSERT_EQUAL(ml::core::CWordDictionary::E_Adjective, - dict.partOfSpeech("heavy")); - CPPUNIT_ASSERT_EQUAL(ml::core::CWordDictionary::E_Adverb, - dict.partOfSpeech("slowly")); - CPPUNIT_ASSERT_EQUAL(ml::core::CWordDictionary::E_Conjunction, - dict.partOfSpeech("AND")); - CPPUNIT_ASSERT_EQUAL(ml::core::CWordDictionary::E_Preposition, - dict.partOfSpeech("without")); - CPPUNIT_ASSERT_EQUAL(ml::core::CWordDictionary::E_Interjection, - dict.partOfSpeech("gosh")); - CPPUNIT_ASSERT_EQUAL(ml::core::CWordDictionary::E_Pronoun, - dict.partOfSpeech("hers")); - CPPUNIT_ASSERT_EQUAL(ml::core::CWordDictionary::E_DefiniteArticle, - dict.partOfSpeech("the")); - CPPUNIT_ASSERT_EQUAL(ml::core::CWordDictionary::E_IndefiniteArticle, - dict.partOfSpeech("a")); +void CWordDictionaryTest::testPartOfSpeech() { + const ml::core::CWordDictionary& dict = ml::core::CWordDictionary::instance(); + + CPPUNIT_ASSERT_EQUAL(ml::core::CWordDictionary::E_NotInDictionary, dict.partOfSpeech("ajksdf")); + CPPUNIT_ASSERT_EQUAL(ml::core::CWordDictionary::E_UnknownPart, dict.partOfSpeech("callback")); + CPPUNIT_ASSERT_EQUAL(ml::core::CWordDictionary::E_Noun, dict.partOfSpeech("House")); + CPPUNIT_ASSERT_EQUAL(ml::core::CWordDictionary::E_Plural, dict.partOfSpeech("Houses")); + CPPUNIT_ASSERT_EQUAL(ml::core::CWordDictionary::E_Verb, dict.partOfSpeech("COMPLETED")); + CPPUNIT_ASSERT_EQUAL(ml::core::CWordDictionary::E_Adjective, dict.partOfSpeech("heavy")); + CPPUNIT_ASSERT_EQUAL(ml::core::CWordDictionary::E_Adverb, dict.partOfSpeech("slowly")); + CPPUNIT_ASSERT_EQUAL(ml::core::CWordDictionary::E_Conjunction, dict.partOfSpeech("AND")); + CPPUNIT_ASSERT_EQUAL(ml::core::CWordDictionary::E_Preposition, dict.partOfSpeech("without")); + CPPUNIT_ASSERT_EQUAL(ml::core::CWordDictionary::E_Interjection, dict.partOfSpeech("gosh")); + CPPUNIT_ASSERT_EQUAL(ml::core::CWordDictionary::E_Pronoun, dict.partOfSpeech("hers")); + CPPUNIT_ASSERT_EQUAL(ml::core::CWordDictionary::E_DefiniteArticle, dict.partOfSpeech("the")); + CPPUNIT_ASSERT_EQUAL(ml::core::CWordDictionary::E_IndefiniteArticle, dict.partOfSpeech("a")); } -void CWordDictionaryTest::testWeightingFunctors() -{ +void CWordDictionaryTest::testWeightingFunctors() { { ml::core::CWordDictionary::TWeightAll2 weighter; - CPPUNIT_ASSERT_EQUAL(size_t(0), - weighter(ml::core::CWordDictionary::E_NotInDictionary)); - CPPUNIT_ASSERT_EQUAL(size_t(2), - weighter(ml::core::CWordDictionary::E_UnknownPart)); - CPPUNIT_ASSERT_EQUAL(size_t(2), - weighter(ml::core::CWordDictionary::E_Noun)); - CPPUNIT_ASSERT_EQUAL(size_t(2), - weighter(ml::core::CWordDictionary::E_Plural)); - CPPUNIT_ASSERT_EQUAL(size_t(2), - weighter(ml::core::CWordDictionary::E_Verb)); - CPPUNIT_ASSERT_EQUAL(size_t(2), - weighter(ml::core::CWordDictionary::E_Adjective)); - CPPUNIT_ASSERT_EQUAL(size_t(2), - weighter(ml::core::CWordDictionary::E_Adverb)); - CPPUNIT_ASSERT_EQUAL(size_t(2), - weighter(ml::core::CWordDictionary::E_Conjunction)); - CPPUNIT_ASSERT_EQUAL(size_t(2), - weighter(ml::core::CWordDictionary::E_Preposition)); - CPPUNIT_ASSERT_EQUAL(size_t(2), - weighter(ml::core::CWordDictionary::E_Interjection)); - CPPUNIT_ASSERT_EQUAL(size_t(2), - weighter(ml::core::CWordDictionary::E_Pronoun)); - CPPUNIT_ASSERT_EQUAL(size_t(2), - weighter(ml::core::CWordDictionary::E_DefiniteArticle)); - CPPUNIT_ASSERT_EQUAL(size_t(2), - weighter(ml::core::CWordDictionary::E_IndefiniteArticle)); + CPPUNIT_ASSERT_EQUAL(size_t(0), weighter(ml::core::CWordDictionary::E_NotInDictionary)); + CPPUNIT_ASSERT_EQUAL(size_t(2), weighter(ml::core::CWordDictionary::E_UnknownPart)); + CPPUNIT_ASSERT_EQUAL(size_t(2), weighter(ml::core::CWordDictionary::E_Noun)); + CPPUNIT_ASSERT_EQUAL(size_t(2), weighter(ml::core::CWordDictionary::E_Plural)); + CPPUNIT_ASSERT_EQUAL(size_t(2), weighter(ml::core::CWordDictionary::E_Verb)); + CPPUNIT_ASSERT_EQUAL(size_t(2), weighter(ml::core::CWordDictionary::E_Adjective)); + CPPUNIT_ASSERT_EQUAL(size_t(2), weighter(ml::core::CWordDictionary::E_Adverb)); + CPPUNIT_ASSERT_EQUAL(size_t(2), weighter(ml::core::CWordDictionary::E_Conjunction)); + CPPUNIT_ASSERT_EQUAL(size_t(2), weighter(ml::core::CWordDictionary::E_Preposition)); + CPPUNIT_ASSERT_EQUAL(size_t(2), weighter(ml::core::CWordDictionary::E_Interjection)); + CPPUNIT_ASSERT_EQUAL(size_t(2), weighter(ml::core::CWordDictionary::E_Pronoun)); + CPPUNIT_ASSERT_EQUAL(size_t(2), weighter(ml::core::CWordDictionary::E_DefiniteArticle)); + CPPUNIT_ASSERT_EQUAL(size_t(2), weighter(ml::core::CWordDictionary::E_IndefiniteArticle)); } { ml::core::CWordDictionary::TWeightVerbs5Other2 weighter; - CPPUNIT_ASSERT_EQUAL(size_t(0), - weighter(ml::core::CWordDictionary::E_NotInDictionary)); - CPPUNIT_ASSERT_EQUAL(size_t(2), - weighter(ml::core::CWordDictionary::E_UnknownPart)); - CPPUNIT_ASSERT_EQUAL(size_t(2), - weighter(ml::core::CWordDictionary::E_Noun)); - CPPUNIT_ASSERT_EQUAL(size_t(2), - weighter(ml::core::CWordDictionary::E_Plural)); - CPPUNIT_ASSERT_EQUAL(size_t(5), - weighter(ml::core::CWordDictionary::E_Verb)); - CPPUNIT_ASSERT_EQUAL(size_t(2), - weighter(ml::core::CWordDictionary::E_Adjective)); - CPPUNIT_ASSERT_EQUAL(size_t(2), - weighter(ml::core::CWordDictionary::E_Adverb)); - CPPUNIT_ASSERT_EQUAL(size_t(2), - weighter(ml::core::CWordDictionary::E_Conjunction)); - CPPUNIT_ASSERT_EQUAL(size_t(2), - weighter(ml::core::CWordDictionary::E_Preposition)); - CPPUNIT_ASSERT_EQUAL(size_t(2), - weighter(ml::core::CWordDictionary::E_Interjection)); - CPPUNIT_ASSERT_EQUAL(size_t(2), - weighter(ml::core::CWordDictionary::E_Pronoun)); - CPPUNIT_ASSERT_EQUAL(size_t(2), - weighter(ml::core::CWordDictionary::E_DefiniteArticle)); - CPPUNIT_ASSERT_EQUAL(size_t(2), - weighter(ml::core::CWordDictionary::E_IndefiniteArticle)); + CPPUNIT_ASSERT_EQUAL(size_t(0), weighter(ml::core::CWordDictionary::E_NotInDictionary)); + CPPUNIT_ASSERT_EQUAL(size_t(2), weighter(ml::core::CWordDictionary::E_UnknownPart)); + CPPUNIT_ASSERT_EQUAL(size_t(2), weighter(ml::core::CWordDictionary::E_Noun)); + CPPUNIT_ASSERT_EQUAL(size_t(2), weighter(ml::core::CWordDictionary::E_Plural)); + CPPUNIT_ASSERT_EQUAL(size_t(5), weighter(ml::core::CWordDictionary::E_Verb)); + CPPUNIT_ASSERT_EQUAL(size_t(2), weighter(ml::core::CWordDictionary::E_Adjective)); + CPPUNIT_ASSERT_EQUAL(size_t(2), weighter(ml::core::CWordDictionary::E_Adverb)); + CPPUNIT_ASSERT_EQUAL(size_t(2), weighter(ml::core::CWordDictionary::E_Conjunction)); + CPPUNIT_ASSERT_EQUAL(size_t(2), weighter(ml::core::CWordDictionary::E_Preposition)); + CPPUNIT_ASSERT_EQUAL(size_t(2), weighter(ml::core::CWordDictionary::E_Interjection)); + CPPUNIT_ASSERT_EQUAL(size_t(2), weighter(ml::core::CWordDictionary::E_Pronoun)); + CPPUNIT_ASSERT_EQUAL(size_t(2), weighter(ml::core::CWordDictionary::E_DefiniteArticle)); + CPPUNIT_ASSERT_EQUAL(size_t(2), weighter(ml::core::CWordDictionary::E_IndefiniteArticle)); } } -void CWordDictionaryTest::testPerformance() -{ - const ml::core::CWordDictionary &dict = ml::core::CWordDictionary::instance(); +void CWordDictionaryTest::testPerformance() { + const ml::core::CWordDictionary& dict = ml::core::CWordDictionary::instance(); ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO("Starting word dictionary throughput test at " << - ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO("Starting word dictionary throughput test at " << ml::core::CTimeUtils::toTimeString(start)); static const size_t TEST_SIZE(100000); - for (size_t count = 0; count < TEST_SIZE; ++count) - { + for (size_t count = 0; count < TEST_SIZE; ++count) { dict.isInDictionary("hello"); dict.isInDictionary("Hello"); dict.isInDictionary("HELLO"); @@ -170,10 +119,7 @@ void CWordDictionaryTest::testPerformance() } ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO("Finished word dictionary throughput test at " << - ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO("Finished word dictionary throughput test at " << ml::core::CTimeUtils::toTimeString(end)); - LOG_INFO("Word dictionary throughput test took " << (end - start) << - " seconds"); + LOG_INFO("Word dictionary throughput test took " << (end - start) << " seconds"); } - diff --git a/lib/core/unittest/CWordDictionaryTest.h b/lib/core/unittest/CWordDictionaryTest.h index 7b72e910e8..816b81412e 100644 --- a/lib/core/unittest/CWordDictionaryTest.h +++ b/lib/core/unittest/CWordDictionaryTest.h @@ -8,17 +8,14 @@ #include +class CWordDictionaryTest : public CppUnit::TestFixture { +public: + void testLookups(); + void testPartOfSpeech(); + void testWeightingFunctors(); + void testPerformance(); -class CWordDictionaryTest : public CppUnit::TestFixture -{ - public: - void testLookups(); - void testPartOfSpeech(); - void testWeightingFunctors(); - void testPerformance(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CWordDictionaryTest_h - diff --git a/lib/core/unittest/CWordExtractorTest.cc b/lib/core/unittest/CWordExtractorTest.cc index 2a864cf1fd..10eb1e671a 100644 --- a/lib/core/unittest/CWordExtractorTest.cc +++ b/lib/core/unittest/CWordExtractorTest.cc @@ -8,23 +8,18 @@ #include #include +CppUnit::Test* CWordExtractorTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CWordExtractorTest"); -CppUnit::Test *CWordExtractorTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CWordExtractorTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CWordExtractorTest::testWordExtract", - &CWordExtractorTest::testWordExtract) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CWordExtractorTest::testMinConsecutive", - &CWordExtractorTest::testMinConsecutive) ); + suiteOfTests->addTest( + new CppUnit::TestCaller("CWordExtractorTest::testWordExtract", &CWordExtractorTest::testWordExtract)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CWordExtractorTest::testMinConsecutive", &CWordExtractorTest::testMinConsecutive)); return suiteOfTests; } -void CWordExtractorTest::testWordExtract() -{ +void CWordExtractorTest::testWordExtract() { { std::string message("2017-01-25 02:10:03,551 ERROR [co.elastic.tradefeedtracker.MessageLoggerService] Failed to Rollback"); std::string words; @@ -37,7 +32,8 @@ void CWordExtractorTest::testWordExtract() CPPUNIT_ASSERT_EQUAL(std::string("Failed to Rollback"), words); } { - std::string message("2017-01-25 14:20:49,646 INFO [co.elastic.settlement.synchronization.errors.NonFXInstructionSyncImpl] Found corresponding outgoingPaymentFlow :: OutGoingPaymentFlow.id = 7480"); + std::string message("2017-01-25 14:20:49,646 INFO [co.elastic.settlement.synchronization.errors.NonFXInstructionSyncImpl] Found " + "corresponding outgoingPaymentFlow :: OutGoingPaymentFlow.id = 7480"); std::string words; ml::core::CWordExtractor::extractWordsFromMessage(message, words); @@ -60,8 +56,7 @@ void CWordExtractorTest::testWordExtract() } } -void CWordExtractorTest::testMinConsecutive() -{ +void CWordExtractorTest::testMinConsecutive() { { std::string message("2017-01-25 02:10:03,551 ERROR [co.elastic.tradefeedtracker.MessageLoggerService] Failed to Rollback"); std::string words; @@ -119,7 +114,8 @@ void CWordExtractorTest::testMinConsecutive() CPPUNIT_ASSERT_EQUAL(std::string("which is more than the configured time"), words); } { - std::string message(" Output threshold breached for: dave at position 192.168.156.136/net using application 163 on channel 12."); + std::string message(" Output threshold breached for: dave at position 192.168.156.136/net using " + "application 163 on channel 12."); std::string words; ml::core::CWordExtractor::extractWordsFromMessage(2, message, words); @@ -147,4 +143,3 @@ void CWordExtractorTest::testMinConsecutive() CPPUNIT_ASSERT_EQUAL(std::string("Output threshold breached for"), words); } } - diff --git a/lib/core/unittest/CWordExtractorTest.h b/lib/core/unittest/CWordExtractorTest.h index 88e1b86a98..8be0f1376a 100644 --- a/lib/core/unittest/CWordExtractorTest.h +++ b/lib/core/unittest/CWordExtractorTest.h @@ -8,15 +8,12 @@ #include +class CWordExtractorTest : public CppUnit::TestFixture { +public: + void testWordExtract(); + void testMinConsecutive(); -class CWordExtractorTest : public CppUnit::TestFixture -{ - public: - void testWordExtract(); - void testMinConsecutive(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CWordExtractorTest_h - diff --git a/lib/core/unittest/CXmlNodeWithChildrenTest.cc b/lib/core/unittest/CXmlNodeWithChildrenTest.cc index 3ef08d240e..39fe8c1e78 100644 --- a/lib/core/unittest/CXmlNodeWithChildrenTest.cc +++ b/lib/core/unittest/CXmlNodeWithChildrenTest.cc @@ -11,29 +11,22 @@ #include #include - -CppUnit::Test *CXmlNodeWithChildrenTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CXmlNodeWithChildrenTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXmlNodeWithChildrenTest::testNodeHierarchyToXml", - &CXmlNodeWithChildrenTest::testNodeHierarchyToXml) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXmlNodeWithChildrenTest::testParserToNodeHierarchy", - &CXmlNodeWithChildrenTest::testParserToNodeHierarchy) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXmlNodeWithChildrenTest::testPerformanceNoPool", - &CXmlNodeWithChildrenTest::testPerformanceNoPool) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXmlNodeWithChildrenTest::testPerformanceWithPool", - &CXmlNodeWithChildrenTest::testPerformanceWithPool) ); +CppUnit::Test* CXmlNodeWithChildrenTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CXmlNodeWithChildrenTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CXmlNodeWithChildrenTest::testNodeHierarchyToXml", + &CXmlNodeWithChildrenTest::testNodeHierarchyToXml)); + suiteOfTests->addTest(new CppUnit::TestCaller("CXmlNodeWithChildrenTest::testParserToNodeHierarchy", + &CXmlNodeWithChildrenTest::testParserToNodeHierarchy)); + suiteOfTests->addTest(new CppUnit::TestCaller("CXmlNodeWithChildrenTest::testPerformanceNoPool", + &CXmlNodeWithChildrenTest::testPerformanceNoPool)); + suiteOfTests->addTest(new CppUnit::TestCaller("CXmlNodeWithChildrenTest::testPerformanceWithPool", + &CXmlNodeWithChildrenTest::testPerformanceWithPool)); return suiteOfTests; } -void CXmlNodeWithChildrenTest::testNodeHierarchyToXml() -{ +void CXmlNodeWithChildrenTest::testNodeHierarchyToXml() { ml::core::CXmlParser parser; ml::core::CXmlNodeWithChildren twoDeepA("twoDeepA", "Element A"); @@ -108,8 +101,7 @@ void CXmlNodeWithChildrenTest::testNodeHierarchyToXml() CPPUNIT_ASSERT(xml.find("twoDeepC") < xml.find("twoDeepB")); } -void CXmlNodeWithChildrenTest::testParserToNodeHierarchy() -{ +void CXmlNodeWithChildrenTest::testParserToNodeHierarchy() { ml::core::CXmlParser parser; std::string xml = "\ @@ -152,47 +144,39 @@ void CXmlNodeWithChildrenTest::testParserToNodeHierarchy() CPPUNIT_ASSERT(xml.find("value5") != std::string::npos); } -void CXmlNodeWithChildrenTest::testPerformanceNoPool() -{ +void CXmlNodeWithChildrenTest::testPerformanceNoPool() { ml::core::CXmlParser parser; CPPUNIT_ASSERT(parser.parseFile("testfiles/p2psmon.xml")); ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO("Starting node hierarchy performance test with no pool at " << - ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO("Starting node hierarchy performance test with no pool at " << ml::core::CTimeUtils::toTimeString(start)); static const size_t TEST_SIZE(20000); - for (size_t count = 0; count < TEST_SIZE; ++count) - { + for (size_t count = 0; count < TEST_SIZE; ++count) { ml::core::CXmlNodeWithChildren::TXmlNodeWithChildrenP rootNodePtr; CPPUNIT_ASSERT(parser.toNodeHierarchy(rootNodePtr)); CPPUNIT_ASSERT(rootNodePtr != 0); } ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO("Finished node hierarchy performance test with no pool at " << - ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO("Finished node hierarchy performance test with no pool at " << ml::core::CTimeUtils::toTimeString(end)); - LOG_INFO("Node hierarchy performance test of size " << TEST_SIZE << - " with no pool took " << (end - start) << " seconds"); + LOG_INFO("Node hierarchy performance test of size " << TEST_SIZE << " with no pool took " << (end - start) << " seconds"); } -void CXmlNodeWithChildrenTest::testPerformanceWithPool() -{ +void CXmlNodeWithChildrenTest::testPerformanceWithPool() { ml::core::CXmlParser parser; CPPUNIT_ASSERT(parser.parseFile("testfiles/p2psmon.xml")); ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO("Starting node hierarchy performance test with pool at " << - ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO("Starting node hierarchy performance test with pool at " << ml::core::CTimeUtils::toTimeString(start)); ml::core::CXmlNodeWithChildrenPool pool; static const size_t TEST_SIZE(20000); - for (size_t count = 0; count < TEST_SIZE; ++count) - { + for (size_t count = 0; count < TEST_SIZE; ++count) { ml::core::CXmlNodeWithChildren::TXmlNodeWithChildrenP rootNodePtr; CPPUNIT_ASSERT(parser.toNodeHierarchy(pool, rootNodePtr)); CPPUNIT_ASSERT(rootNodePtr != 0); @@ -200,10 +184,7 @@ void CXmlNodeWithChildrenTest::testPerformanceWithPool() } ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO("Finished node hierarchy performance test with pool at " << - ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO("Finished node hierarchy performance test with pool at " << ml::core::CTimeUtils::toTimeString(end)); - LOG_INFO("Node hierarchy performance test of size " << TEST_SIZE << - " with pool took " << (end - start) << " seconds"); + LOG_INFO("Node hierarchy performance test of size " << TEST_SIZE << " with pool took " << (end - start) << " seconds"); } - diff --git a/lib/core/unittest/CXmlNodeWithChildrenTest.h b/lib/core/unittest/CXmlNodeWithChildrenTest.h index 064d2de222..5a6c4d0f14 100644 --- a/lib/core/unittest/CXmlNodeWithChildrenTest.h +++ b/lib/core/unittest/CXmlNodeWithChildrenTest.h @@ -8,16 +8,14 @@ #include -class CXmlNodeWithChildrenTest : public CppUnit::TestFixture -{ - public: - void testNodeHierarchyToXml(); - void testParserToNodeHierarchy(); - void testPerformanceNoPool(); - void testPerformanceWithPool(); +class CXmlNodeWithChildrenTest : public CppUnit::TestFixture { +public: + void testNodeHierarchyToXml(); + void testParserToNodeHierarchy(); + void testPerformanceNoPool(); + void testPerformanceWithPool(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CXmlNodeWithChildrenTest_h - diff --git a/lib/core/unittest/CXmlParserTest.cc b/lib/core/unittest/CXmlParserTest.cc index 5fae290ff7..c2cbb63347 100644 --- a/lib/core/unittest/CXmlParserTest.cc +++ b/lib/core/unittest/CXmlParserTest.cc @@ -7,9 +7,9 @@ #include #include -#include #include #include +#include #include @@ -19,83 +19,37 @@ #include - -CppUnit::Test *CXmlParserTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CXmlParserTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXmlParserTest::testParse1File", - &CXmlParserTest::testParse1File) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXmlParserTest::testParse1String", - &CXmlParserTest::testParse1String) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXmlParserTest::testParse2", - &CXmlParserTest::testParse2) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXmlParserTest::testNavigate", - &CXmlParserTest::testNavigate) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXmlParserTest::testParseXInclude", - &CXmlParserTest::testParseXInclude) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXmlParserTest::testParse3", - &CXmlParserTest::testParse3) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXmlParserTest::testParse4", - &CXmlParserTest::testParse4) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXmlParserTest::testParse5", - &CXmlParserTest::testParse5) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXmlParserTest::testParse6", - &CXmlParserTest::testParse6) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXmlParserTest::testConvert1", - &CXmlParserTest::testConvert1) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXmlParserTest::testConvert2", - &CXmlParserTest::testConvert2) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXmlParserTest::testConvert3", - &CXmlParserTest::testConvert3) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXmlParserTest::testConvert4", - &CXmlParserTest::testConvert4) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXmlParserTest::testAddNewChildNode", - &CXmlParserTest::testAddNewChildNode) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXmlParserTest::testSetRootNode", - &CXmlParserTest::testSetRootNode) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXmlParserTest::testDump", - &CXmlParserTest::testDump) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXmlParserTest::testMakeValidName", - &CXmlParserTest::testMakeValidName) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXmlParserTest::testChangeChild", - &CXmlParserTest::testChangeChild) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXmlParserTest::testHugeDoc", - &CXmlParserTest::testHugeDoc) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXmlParserTest::testParseSpeed", - &CXmlParserTest::testParseSpeed) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXmlParserTest::testConvertSpeed", - &CXmlParserTest::testConvertSpeed) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXmlParserTest::testComplexXPath", - &CXmlParserTest::testComplexXPath) ); +CppUnit::Test* CXmlParserTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CXmlParserTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CXmlParserTest::testParse1File", &CXmlParserTest::testParse1File)); + suiteOfTests->addTest(new CppUnit::TestCaller("CXmlParserTest::testParse1String", &CXmlParserTest::testParse1String)); + suiteOfTests->addTest(new CppUnit::TestCaller("CXmlParserTest::testParse2", &CXmlParserTest::testParse2)); + suiteOfTests->addTest(new CppUnit::TestCaller("CXmlParserTest::testNavigate", &CXmlParserTest::testNavigate)); + suiteOfTests->addTest(new CppUnit::TestCaller("CXmlParserTest::testParseXInclude", &CXmlParserTest::testParseXInclude)); + suiteOfTests->addTest(new CppUnit::TestCaller("CXmlParserTest::testParse3", &CXmlParserTest::testParse3)); + suiteOfTests->addTest(new CppUnit::TestCaller("CXmlParserTest::testParse4", &CXmlParserTest::testParse4)); + suiteOfTests->addTest(new CppUnit::TestCaller("CXmlParserTest::testParse5", &CXmlParserTest::testParse5)); + suiteOfTests->addTest(new CppUnit::TestCaller("CXmlParserTest::testParse6", &CXmlParserTest::testParse6)); + suiteOfTests->addTest(new CppUnit::TestCaller("CXmlParserTest::testConvert1", &CXmlParserTest::testConvert1)); + suiteOfTests->addTest(new CppUnit::TestCaller("CXmlParserTest::testConvert2", &CXmlParserTest::testConvert2)); + suiteOfTests->addTest(new CppUnit::TestCaller("CXmlParserTest::testConvert3", &CXmlParserTest::testConvert3)); + suiteOfTests->addTest(new CppUnit::TestCaller("CXmlParserTest::testConvert4", &CXmlParserTest::testConvert4)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CXmlParserTest::testAddNewChildNode", &CXmlParserTest::testAddNewChildNode)); + suiteOfTests->addTest(new CppUnit::TestCaller("CXmlParserTest::testSetRootNode", &CXmlParserTest::testSetRootNode)); + suiteOfTests->addTest(new CppUnit::TestCaller("CXmlParserTest::testDump", &CXmlParserTest::testDump)); + suiteOfTests->addTest(new CppUnit::TestCaller("CXmlParserTest::testMakeValidName", &CXmlParserTest::testMakeValidName)); + suiteOfTests->addTest(new CppUnit::TestCaller("CXmlParserTest::testChangeChild", &CXmlParserTest::testChangeChild)); + suiteOfTests->addTest(new CppUnit::TestCaller("CXmlParserTest::testHugeDoc", &CXmlParserTest::testHugeDoc)); + suiteOfTests->addTest(new CppUnit::TestCaller("CXmlParserTest::testParseSpeed", &CXmlParserTest::testParseSpeed)); + suiteOfTests->addTest(new CppUnit::TestCaller("CXmlParserTest::testConvertSpeed", &CXmlParserTest::testConvertSpeed)); + suiteOfTests->addTest(new CppUnit::TestCaller("CXmlParserTest::testComplexXPath", &CXmlParserTest::testComplexXPath)); return suiteOfTests; } -void CXmlParserTest::testParse1File() -{ +void CXmlParserTest::testParse1File() { std::string badFileName = "./testfiles/CXmlParser_bad.xml"; std::string goodFileName = "./testfiles/CXmlParser1.xml"; @@ -107,8 +61,7 @@ void CXmlParserTest::testParse1File() this->testParse1(parser); } -void CXmlParserTest::testParse1String() -{ +void CXmlParserTest::testParse1String() { std::string goodString = CXmlParserTest::fileToString("./testfiles/CXmlParser1.xml"); ml::core::CXmlParser parser; @@ -118,8 +71,7 @@ void CXmlParserTest::testParse1String() this->testParse1(parser); } -void CXmlParserTest::testParse2() -{ +void CXmlParserTest::testParse2() { std::string goodFileName = "./testfiles/CXmlParser2.xml"; ml::core::CXmlParser parser; @@ -167,8 +119,7 @@ void CXmlParserTest::testParse2() CPPUNIT_ASSERT(nodes[1].attributes().empty()); } -void CXmlParserTest::testNavigate() -{ +void CXmlParserTest::testNavigate() { std::string goodFileName = "./testfiles/CXmlParser2.xml"; ml::core::CXmlParser parser; @@ -202,8 +153,7 @@ void CXmlParserTest::testNavigate() CPPUNIT_ASSERT(!parser.navigateNext()); } -void CXmlParserTest::testParseXInclude() -{ +void CXmlParserTest::testParseXInclude() { std::string goodFileName = "./testfiles/CXmlParser3.xml"; std::string badFileName = "./testfiles/CXmlParser4.xml"; @@ -253,8 +203,7 @@ void CXmlParserTest::testParseXInclude() CPPUNIT_ASSERT(nodes[1].attributes().empty()); } -void CXmlParserTest::testParse3() -{ +void CXmlParserTest::testParse3() { std::string fileName = "./testfiles/CXmlParser5.xml"; ml::core::CXmlParser parser; @@ -266,47 +215,28 @@ void CXmlParserTest::testParse3() CPPUNIT_ASSERT(parser.evalXPathExpression("/ItemSearchResponse/OperationRequest/Arguments/Argument", arguments)); CPPUNIT_ASSERT_EQUAL(size_t(7), arguments.size()); - for (ml::core::CXmlParser::TXmlNodeVecItr itr = arguments.begin(); - itr != arguments.end(); - ++itr) - { - if (itr->value() == "Service") - { + for (ml::core::CXmlParser::TXmlNodeVecItr itr = arguments.begin(); itr != arguments.end(); ++itr) { + if (itr->value() == "Service") { CPPUNIT_ASSERT(this->testAttribute(*itr, "Value", "AWSECommerceService")); - } - else if (itr->value() == "AssociateTag") - { + } else if (itr->value() == "AssociateTag") { CPPUNIT_ASSERT(!this->testAttribute(*itr, "Value", "")); - } - else if (itr->value() == "SearchIndex") - { + } else if (itr->value() == "SearchIndex") { CPPUNIT_ASSERT(this->testAttribute(*itr, "Value", "Books")); - } - else if (itr->value() == "Author") - { + } else if (itr->value() == "Author") { CPPUNIT_ASSERT(!this->testAttribute(*itr, "Value", "")); - } - else if (itr->value() == "Hacasdasdcv") - { + } else if (itr->value() == "Hacasdasdcv") { CPPUNIT_ASSERT(this->testAttribute(*itr, "Value", "1A7XKHR5BYD0WPJVQEG2")); - } - else if (itr->value() == "Version") - { + } else if (itr->value() == "Version") { CPPUNIT_ASSERT(this->testAttribute(*itr, "Value", "2006-06-28")); - } - else if (itr->value() == "Operation") - { + } else if (itr->value() == "Operation") { CPPUNIT_ASSERT(!this->testAttribute(*itr, "Value", "")); - } - else - { + } else { CPPUNIT_ASSERT_MESSAGE(itr->dump(), false); } } } -void CXmlParserTest::testParse4() -{ +void CXmlParserTest::testParse4() { std::string fileName = "./testfiles/CXmlParser1.xml"; ml::core::CXmlParser parser; @@ -336,8 +266,7 @@ void CXmlParserTest::testParse4() CPPUNIT_ASSERT(!parser.evalXPathExpression("/ItemSearchResponse/Items/Request/ItemSearchRequest/Author", i)); } -void CXmlParserTest::testParse5() -{ +void CXmlParserTest::testParse5() { ml::core::CXmlParser parser; std::string xml = "\ @@ -366,87 +295,84 @@ void CXmlParserTest::testParse5() CPPUNIT_ASSERT_EQUAL(itr->second, std::string("value3")); } -void CXmlParserTest::testParse6() -{ +void CXmlParserTest::testParse6() { -{ - ml::core::CXmlParser parser; + { + ml::core::CXmlParser parser; - std::string xml = "\ + std::string xml = "\ \ value1 \ value2 \ value3 \ "; - CPPUNIT_ASSERT(parser.parseString(xml)); + CPPUNIT_ASSERT(parser.parseString(xml)); - ml::core::CXmlParser::TStrVec values; + ml::core::CXmlParser::TStrVec values; - CPPUNIT_ASSERT(parser.evalXPathExpression("/root/name", values)); + CPPUNIT_ASSERT(parser.evalXPathExpression("/root/name", values)); - CPPUNIT_ASSERT_EQUAL(values.size(), size_t(3)); + CPPUNIT_ASSERT_EQUAL(values.size(), size_t(3)); - CPPUNIT_ASSERT_EQUAL(values[0], std::string("value1")); - CPPUNIT_ASSERT_EQUAL(values[1], std::string("value2")); - CPPUNIT_ASSERT_EQUAL(values[2], std::string("value3")); -} -{ - ml::core::CXmlParser parser; + CPPUNIT_ASSERT_EQUAL(values[0], std::string("value1")); + CPPUNIT_ASSERT_EQUAL(values[1], std::string("value2")); + CPPUNIT_ASSERT_EQUAL(values[2], std::string("value3")); + } + { + ml::core::CXmlParser parser; - std::string xml = "\ + std::string xml = "\ \ \ \ "; - CPPUNIT_ASSERT(parser.parseString(xml)); + CPPUNIT_ASSERT(parser.parseString(xml)); - ml::core::CXmlParser::TStrVec values; + ml::core::CXmlParser::TStrVec values; - CPPUNIT_ASSERT(parser.evalXPathExpression("/root/names/*", values)); + CPPUNIT_ASSERT(parser.evalXPathExpression("/root/names/*", values)); - CPPUNIT_ASSERT(values.empty()); -} -{ - ml::core::CXmlParser parser; + CPPUNIT_ASSERT(values.empty()); + } + { + ml::core::CXmlParser parser; - std::string xml = "\ + std::string xml = "\ \ value1 \ value2 \ value3 \ "; - CPPUNIT_ASSERT(parser.parseString(xml)); + CPPUNIT_ASSERT(parser.parseString(xml)); - ml::core::CXmlParser::TStrSet values; + ml::core::CXmlParser::TStrSet values; - CPPUNIT_ASSERT(parser.evalXPathExpression("/root/name", values)); + CPPUNIT_ASSERT(parser.evalXPathExpression("/root/name", values)); - CPPUNIT_ASSERT_EQUAL(values.size(), size_t(3)); -} -{ - ml::core::CXmlParser parser; + CPPUNIT_ASSERT_EQUAL(values.size(), size_t(3)); + } + { + ml::core::CXmlParser parser; - std::string xml = "\ + std::string xml = "\ \ value1 \ value2 \ value2 \ "; - CPPUNIT_ASSERT(parser.parseString(xml)); + CPPUNIT_ASSERT(parser.parseString(xml)); - ml::core::CXmlParser::TStrSet values; - - CPPUNIT_ASSERT(!parser.evalXPathExpression("/root/name", values)); -} + ml::core::CXmlParser::TStrSet values; + CPPUNIT_ASSERT(!parser.evalXPathExpression("/root/name", values)); + } } -void CXmlParserTest::testConvert1() -{ +void CXmlParserTest::testConvert1() { ml::core::CXmlParser::TStrStrMap values; values.insert(ml::core::CXmlParser::TStrStrMap::value_type("key", "<&sdacasdc")); @@ -472,8 +398,7 @@ void CXmlParserTest::testConvert1() CPPUNIT_ASSERT_EQUAL(std::string("sdac asdc asdc\nadsc\nasdc\n"), node.value()); } -void CXmlParserTest::testConvert2() -{ +void CXmlParserTest::testConvert2() { ml::core::CXmlParser::TStrStrMap values; values.insert(ml::core::CXmlParser::TStrStrMap::value_type("key", "<&sdacasdc")); @@ -494,8 +419,7 @@ void CXmlParserTest::testConvert2() CPPUNIT_ASSERT_EQUAL(std::string("sdac asdc asdc\nadsc\nasdc\n"), node.value()); } -void CXmlParserTest::testConvert3() -{ +void CXmlParserTest::testConvert3() { ml::core::CXmlParser::TStrStrMap values; values.insert(ml::core::CXmlParser::TStrStrMap::value_type("key", "<&sdacasdc")); @@ -527,8 +451,7 @@ void CXmlParserTest::testConvert3() CPPUNIT_ASSERT_EQUAL(std::string("1"), node.value()); } -void CXmlParserTest::testConvert4() -{ +void CXmlParserTest::testConvert4() { // Use a standard node hierarchy to allow for comparison with the // standards-compliant XML parser ml::core::CXmlNodeWithChildren::TXmlNodeWithChildrenP root(CRapidXmlParserTest::makeTestNodeHierarchy()); @@ -560,8 +483,7 @@ void CXmlParserTest::testConvert4() CPPUNIT_ASSERT(converted.find("") != std::string::npos); } -void CXmlParserTest::testAddNewChildNode() -{ +void CXmlParserTest::testAddNewChildNode() { ml::core::CXmlParser parser; std::string xml = "\ @@ -588,39 +510,36 @@ void CXmlParserTest::testAddNewChildNode() CPPUNIT_ASSERT_EQUAL(std::string("value4"), value); } -void CXmlParserTest::testSetRootNode() -{ +void CXmlParserTest::testSetRootNode() { -{ - ml::core::CXmlParser parser; + { + ml::core::CXmlParser parser; - CPPUNIT_ASSERT(parser.setRootNode("root")); + CPPUNIT_ASSERT(parser.setRootNode("root")); - CPPUNIT_ASSERT(parser.addNewChildNode("name1", "value1")); - CPPUNIT_ASSERT(parser.addNewChildNode("name2", "value2")); + CPPUNIT_ASSERT(parser.addNewChildNode("name1", "value1")); + CPPUNIT_ASSERT(parser.addNewChildNode("name2", "value2")); - std::string value; + std::string value; - CPPUNIT_ASSERT(parser.evalXPathExpression("/root/name1", value)); - CPPUNIT_ASSERT_EQUAL(std::string("value1"), value); - CPPUNIT_ASSERT(parser.evalXPathExpression("/root/name2", value)); - CPPUNIT_ASSERT_EQUAL(std::string("value2"), value); -} -{ - ml::core::CXmlParser parser; - - CPPUNIT_ASSERT(parser.setRootNode("root")); + CPPUNIT_ASSERT(parser.evalXPathExpression("/root/name1", value)); + CPPUNIT_ASSERT_EQUAL(std::string("value1"), value); + CPPUNIT_ASSERT(parser.evalXPathExpression("/root/name2", value)); + CPPUNIT_ASSERT_EQUAL(std::string("value2"), value); + } + { + ml::core::CXmlParser parser; - CPPUNIT_ASSERT(parser.addNewChildNode("name", "value1")); - CPPUNIT_ASSERT(parser.addNewChildNode("name", "value2")); + CPPUNIT_ASSERT(parser.setRootNode("root")); - parser.dumpToStdout(); -} + CPPUNIT_ASSERT(parser.addNewChildNode("name", "value1")); + CPPUNIT_ASSERT(parser.addNewChildNode("name", "value2")); + parser.dumpToStdout(); + } } -void CXmlParserTest::testDump() -{ +void CXmlParserTest::testDump() { std::string fileName = "./testfiles/CXmlParser1.xml"; ml::core::CXmlParser parser1; @@ -634,16 +553,14 @@ void CXmlParserTest::testDump() this->testParse1(parser2); } -std::string CXmlParserTest::fileToString(const std::string &fileName) -{ +std::string CXmlParserTest::fileToString(const std::string& fileName) { std::string ret; std::ifstream ifs(fileName.c_str()); CPPUNIT_ASSERT_MESSAGE(fileName, ifs.is_open()); std::string line; - while (std::getline(ifs, line)) - { + while (std::getline(ifs, line)) { ret += line; ret += '\n'; } @@ -651,16 +568,17 @@ std::string CXmlParserTest::fileToString(const std::string &fileName) return ret; } -void CXmlParserTest::testParse1(const ml::core::CXmlParser &parser) -{ +void CXmlParserTest::testParse1(const ml::core::CXmlParser& parser) { ml::core::CXmlNode node; - std::string value; + std::string value; CPPUNIT_ASSERT(!parser.evalXPathExpression("//badpath", node)); CPPUNIT_ASSERT(parser.evalXPathExpression("/ItemSearchResponse/OperationRequest/HTTPHeaders/Header/@Value", node)); CPPUNIT_ASSERT_EQUAL(std::string("Value"), node.name()); - CPPUNIT_ASSERT_EQUAL(std::string("Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Avant Browser; Avant Browser; .NET CLR 1.0.3705; .NET CLR 2.0.50727; .NET CLR 1.1.4322; Media Center PC 4.0; InfoPath.2)"), node.value()); + CPPUNIT_ASSERT_EQUAL(std::string("Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Avant Browser; Avant Browser; .NET CLR 1.0.3705; " + ".NET CLR 2.0.50727; .NET CLR 1.1.4322; Media Center PC 4.0; InfoPath.2)"), + node.value()); CPPUNIT_ASSERT(node.attributes().empty()); CPPUNIT_ASSERT(parser.evalXPathExpression("/ItemSearchResponse/OperationRequest/RequestId", node)); @@ -679,24 +597,20 @@ void CXmlParserTest::testParse1(const ml::core::CXmlParser &parser) CPPUNIT_ASSERT_EQUAL(std::string("msg"), node.name()); CPPUNIT_ASSERT_EQUAL(std::string("\n\ Invalid Date of Birth.
This is a test validation message from the server \n\ - "), node.value()); + "), + node.value()); CPPUNIT_ASSERT(node.attributes().empty()); CPPUNIT_ASSERT_EQUAL(std::string("ItemSearchResponse"), parser.rootElementName()); } -bool CXmlParserTest::testAttribute(const ml::core::CXmlNode &node, - const std::string &key, - const std::string &expected) -{ +bool CXmlParserTest::testAttribute(const ml::core::CXmlNode& node, const std::string& key, const std::string& expected) { std::string actual; - if (node.attribute(key, actual) == false) - { + if (node.attribute(key, actual) == false) { return false; } - if (actual != expected) - { + if (actual != expected) { LOG_ERROR(actual << ' ' << expected); return false; } @@ -704,18 +618,17 @@ bool CXmlParserTest::testAttribute(const ml::core::CXmlNode &node, return true; } -void CXmlParserTest::testMakeValidName() -{ +void CXmlParserTest::testMakeValidName() { CPPUNIT_ASSERT_EQUAL(std::string("name"), ml::core::CXmlParser::makeValidName("name")); CPPUNIT_ASSERT_EQUAL(std::string("name1"), ml::core::CXmlParser::makeValidName("name1")); CPPUNIT_ASSERT_EQUAL(std::string("_name"), ml::core::CXmlParser::makeValidName("1name")); CPPUNIT_ASSERT_EQUAL(std::string("name_2"), ml::core::CXmlParser::makeValidName("name/2")); CPPUNIT_ASSERT_EQUAL(std::string("_name_"), ml::core::CXmlParser::makeValidName("_name_")); - CPPUNIT_ASSERT_EQUAL(std::string("__cencl01b_System_System_Calls_sec"), ml::core::CXmlParser::makeValidName("\\\\cencl01b\\System\\System Calls/sec")); + CPPUNIT_ASSERT_EQUAL(std::string("__cencl01b_System_System_Calls_sec"), + ml::core::CXmlParser::makeValidName("\\\\cencl01b\\System\\System Calls/sec")); } -void CXmlParserTest::testChangeChild() -{ +void CXmlParserTest::testChangeChild() { ml::core::CXmlParser parser; CPPUNIT_ASSERT(parser.setRootNode("root")); @@ -769,8 +682,7 @@ void CXmlParserTest::testChangeChild() CPPUNIT_ASSERT_EQUAL(std::string("changed3"), value); } -void CXmlParserTest::testHugeDoc() -{ +void CXmlParserTest::testHugeDoc() { // libxml2 can exhibit O(n^2.42) behaviour if the xmlXPathOrderDocElems() // function hasn't been called on the document. Obviously this only shows // up as a problem in huge XML documents. @@ -783,16 +695,14 @@ void CXmlParserTest::testHugeDoc() ofs << "" << std::endl; static const size_t NUM_NODES(300000); - for (size_t count = 1; count <= NUM_NODES; ++count) - { + for (size_t count = 1; count <= NUM_NODES; ++count) { ofs << " " << count << "" << std::endl; } ofs << "" << std::endl; ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO("Starting huge XPath test at " << - ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO("Starting huge XPath test at " << ml::core::CTimeUtils::toTimeString(start)); ml::core::CXmlParser parser; ml::core::CXmlParser::TStrSet valueSet; @@ -804,31 +714,26 @@ void CXmlParserTest::testHugeDoc() CPPUNIT_ASSERT(parser.evalXPathExpression("/nodes/node", valueSet)); ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO("Finished huge XPath test at " << - ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO("Finished huge XPath test at " << ml::core::CTimeUtils::toTimeString(end)); CPPUNIT_ASSERT_EQUAL(NUM_NODES, valueSet.size()); - LOG_INFO("Applying an XPath to a node set with " << NUM_NODES << - " nodes took " << (end - start) << " seconds"); + LOG_INFO("Applying an XPath to a node set with " << NUM_NODES << " nodes took " << (end - start) << " seconds"); ::remove(fileName.c_str()); } -void CXmlParserTest::testParseSpeed() -{ +void CXmlParserTest::testParseSpeed() { static const size_t TEST_SIZE(25000); std::string testString(CXmlParserTest::fileToString("./testfiles/CXmlParser2.xml")); ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO("Starting parse speed test at " << - ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO("Starting parse speed test at " << ml::core::CTimeUtils::toTimeString(start)); ml::core::CXmlNodeWithChildrenPool nodePool; - for (size_t count = 0; count < TEST_SIZE; ++count) - { + for (size_t count = 0; count < TEST_SIZE; ++count) { ml::core::CXmlParser parser; CPPUNIT_ASSERT(parser.parseString(testString)); @@ -841,15 +746,12 @@ void CXmlParserTest::testParseSpeed() } ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO("Finished parse speed test at " << - ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO("Finished parse speed test at " << ml::core::CTimeUtils::toTimeString(end)); - LOG_INFO("Parsing " << TEST_SIZE << " documents took " << - (end - start) << " seconds"); + LOG_INFO("Parsing " << TEST_SIZE << " documents took " << (end - start) << " seconds"); } -void CXmlParserTest::testConvertSpeed() -{ +void CXmlParserTest::testConvertSpeed() { static const size_t TEST_SIZE(100000); // Use a standard node hierarchy to allow for comparison with the @@ -857,25 +759,20 @@ void CXmlParserTest::testConvertSpeed() ml::core::CXmlNodeWithChildren::TXmlNodeWithChildrenP root(CRapidXmlParserTest::makeTestNodeHierarchy()); ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO("Starting convert speed test at " << - ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO("Starting convert speed test at " << ml::core::CTimeUtils::toTimeString(start)); - for (size_t count = 0; count < TEST_SIZE; ++count) - { + for (size_t count = 0; count < TEST_SIZE; ++count) { std::string converted; ml::core::CXmlParser::convert(*root, converted); } ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO("Finished convert speed test at " << - ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO("Finished convert speed test at " << ml::core::CTimeUtils::toTimeString(end)); - LOG_INFO("Converting " << TEST_SIZE << " documents took " << - (end - start) << " seconds"); + LOG_INFO("Converting " << TEST_SIZE << " documents took " << (end - start) << " seconds"); } -void CXmlParserTest::testComplexXPath() -{ +void CXmlParserTest::testComplexXPath() { ml::core::CXmlParser parser; CPPUNIT_ASSERT(parser.parseFile("testfiles/withNs.xml")); @@ -883,8 +780,7 @@ void CXmlParserTest::testComplexXPath() // This convoluted query is for XML schemas that // have a default namespace but don't give it a name! - CPPUNIT_ASSERT(parser.evalXPathExpression("//*[local-name()='title' and .='ml']/..//*[local-name()='key' and @name='disabled']", - disabled)); + CPPUNIT_ASSERT( + parser.evalXPathExpression("//*[local-name()='title' and .='ml']/..//*[local-name()='key' and @name='disabled']", disabled)); CPPUNIT_ASSERT_EQUAL(true, disabled); } - diff --git a/lib/core/unittest/CXmlParserTest.h b/lib/core/unittest/CXmlParserTest.h index 93ce181d0f..b8160ebf3c 100644 --- a/lib/core/unittest/CXmlParserTest.h +++ b/lib/core/unittest/CXmlParserTest.h @@ -10,52 +10,46 @@ #include -namespace ml -{ -namespace core -{ +namespace ml { +namespace core { class CXmlNode; class CXmlParser; } } -class CXmlParserTest : public CppUnit::TestFixture -{ - public: - void testParse1File(); - void testParse1String(); - void testParse2(); - void testNavigate(); - void testParseXInclude(); - void testParse3(); - void testParse4(); - void testParse5(); - void testParse6(); - void testConvert1(); - void testConvert2(); - void testConvert3(); - void testConvert4(); - void testAddNewChildNode(); - void testSetRootNode(); - void testDump(); - void testMakeValidName(); - void testChangeChild(); - void testHugeDoc(); - void testParseSpeed(); - void testConvertSpeed(); - void testComplexXPath(); - - static CppUnit::Test *suite(); - - private: - static void testParse1(const ml::core::CXmlParser &parser); - - static std::string fileToString(const std::string &fileName); - - static bool testAttribute(const ml::core::CXmlNode &node, - const std::string &key, - const std::string &expected); +class CXmlParserTest : public CppUnit::TestFixture { +public: + void testParse1File(); + void testParse1String(); + void testParse2(); + void testNavigate(); + void testParseXInclude(); + void testParse3(); + void testParse4(); + void testParse5(); + void testParse6(); + void testConvert1(); + void testConvert2(); + void testConvert3(); + void testConvert4(); + void testAddNewChildNode(); + void testSetRootNode(); + void testDump(); + void testMakeValidName(); + void testChangeChild(); + void testHugeDoc(); + void testParseSpeed(); + void testConvertSpeed(); + void testComplexXPath(); + + static CppUnit::Test* suite(); + +private: + static void testParse1(const ml::core::CXmlParser& parser); + + static std::string fileToString(const std::string& fileName); + + static bool testAttribute(const ml::core::CXmlNode& node, const std::string& key, const std::string& expected); }; #endif // INCLUDED_CXmlParserTest_h - diff --git a/lib/core/unittest/Main.cc b/lib/core/unittest/Main.cc index d459f09c5c..49e78ee156 100644 --- a/lib/core/unittest/Main.cc +++ b/lib/core/unittest/Main.cc @@ -9,8 +9,8 @@ #include "CBase64FilterTest.h" #include "CBlockingMessageQueueTest.h" #include "CByteSwapperTest.h" -#include "CCompressedDictionaryTest.h" #include "CCompressUtilsTest.h" +#include "CCompressedDictionaryTest.h" #include "CConcurrentWrapperTest.h" #include "CContainerPrinterTest.h" #include "CContainerThroughputTest.h" @@ -40,8 +40,8 @@ #include "CPatternSetTest.h" #include "CPersistUtilsTest.h" #include "CPolymorphicStackObjectCPtrTest.h" -#include "CProcessTest.h" #include "CProcessPriorityTest.h" +#include "CProcessTest.h" #include "CProgNameTest.h" #include "CRapidJsonLineWriterTest.h" #include "CRapidJsonWriterBaseTest.h" @@ -68,90 +68,87 @@ #include "CTickerTest.h" #include "CTimeUtilsTest.h" #include "CTripleTest.h" +#include "CUnameTest.h" #include "CVectorRangeTest.h" #include "CWindowsErrorTest.h" #include "CWordDictionaryTest.h" #include "CWordExtractorTest.h" -#include "CUnameTest.h" #include "CXmlNodeWithChildrenTest.h" #include "CXmlParserTest.h" - -int main(int argc, const char **argv) -{ +int main(int argc, const char** argv) { ml::test::CTestRunner runner(argc, argv); - runner.addTest( CAllocationStrategyTest::suite() ); - runner.addTest( CBase64FilterTest::suite() ); - runner.addTest( CBlockingMessageQueueTest::suite() ); - runner.addTest( CByteSwapperTest::suite() ); - runner.addTest( CCompressedDictionaryTest::suite() ); - runner.addTest( CCompressUtilsTest::suite() ); - runner.addTest( CConcurrentWrapperTest::suite() ); - runner.addTest( CContainerPrinterTest::suite() ); - runner.addTest( CContainerThroughputTest::suite() ); - runner.addTest( CDelimiterTest::suite() ); - runner.addTest( CDetachedProcessSpawnerTest::suite() ); - runner.addTest( CDualThreadStreamBufTest::suite() ); - runner.addTest( CFileDeleterTest::suite() ); - runner.addTest( CFlatPrefixTreeTest::suite() ); - runner.addTest( CFunctionalTest::suite() ); - runner.addTest( CHashingTest::suite() ); - runner.addTest( CHexUtilsTest::suite() ); - runner.addTest( CIEEE754Test::suite() ); - runner.addTest( CJsonLogLayoutTest::suite() ); - runner.addTest( CJsonOutputStreamWrapperTest::suite() ); - runner.addTest( CJsonStatePersistInserterTest::suite() ); - runner.addTest( CJsonStateRestoreTraverserTest::suite() ); - runner.addTest( CLoggerTest::suite() ); - runner.addTest( CMapPopulationTest::suite() ); - runner.addTest( CMemoryUsageJsonWriterTest::suite() ); - runner.addTest( CMemoryUsageTest::suite() ); - runner.addTest( CMessageBufferTest::suite() ); - runner.addTest( CMessageQueueTest::suite() ); - runner.addTest( CMonotonicTimeTest::suite() ); - runner.addTest( CMutexTest::suite() ); - runner.addTest( CNamedPipeFactoryTest::suite() ); - runner.addTest( COsFileFuncsTest::suite() ); - runner.addTest( CPatternSetTest::suite() ); - runner.addTest( CPersistUtilsTest::suite() ); - runner.addTest( CPolymorphicStackObjectCPtrTest::suite() ); - runner.addTest( CProcessTest::suite() ); - runner.addTest( CProcessPriorityTest::suite() ); - runner.addTest( CProgNameTest::suite() ); - runner.addTest( CRapidJsonLineWriterTest::suite() ); - runner.addTest( CRapidJsonWriterBaseTest::suite() ); - runner.addTest( CRapidXmlParserTest::suite() ); - runner.addTest( CRapidXmlStatePersistInserterTest::suite() ); - runner.addTest( CRapidXmlStateRestoreTraverserTest::suite() ); - runner.addTest( CReadWriteLockTest::suite() ); - runner.addTest( CRegexFilterTest::suite() ); - runner.addTest( CRegexTest::suite() ); - runner.addTest( CResourceLocatorTest::suite() ); - runner.addTest( CShellArgQuoterTest::suite() ); - runner.addTest( CSleepTest::suite() ); - runner.addTest( CSmallVectorTest::suite() ); - runner.addTest( CStateCompressorTest::suite() ); - runner.addTest( CStateMachineTest::suite() ); - runner.addTest( CStatisticsTest::suite() ); - runner.addTest( CStopWatchTest::suite() ); - runner.addTest( CStoredStringPtrTest::suite() ); - runner.addTest( CStringSimilarityTesterTest::suite() ); - runner.addTest( CStringUtilsTest::suite() ); - runner.addTest( CThreadFarmTest::suite() ); - runner.addTest( CThreadMutexConditionTest::suite() ); - runner.addTest( CThreadPoolTest::suite() ); - runner.addTest( CTickerTest::suite() ); - runner.addTest( CTimeUtilsTest::suite() ); - runner.addTest( CTripleTest::suite() ); - runner.addTest( CUnameTest::suite() ); - runner.addTest( CVectorRangeTest::suite() ); - runner.addTest( CWindowsErrorTest::suite() ); - runner.addTest( CWordDictionaryTest::suite() ); - runner.addTest( CWordExtractorTest::suite() ); - runner.addTest( CXmlNodeWithChildrenTest::suite() ); - runner.addTest( CXmlParserTest::suite() ); + runner.addTest(CAllocationStrategyTest::suite()); + runner.addTest(CBase64FilterTest::suite()); + runner.addTest(CBlockingMessageQueueTest::suite()); + runner.addTest(CByteSwapperTest::suite()); + runner.addTest(CCompressedDictionaryTest::suite()); + runner.addTest(CCompressUtilsTest::suite()); + runner.addTest(CConcurrentWrapperTest::suite()); + runner.addTest(CContainerPrinterTest::suite()); + runner.addTest(CContainerThroughputTest::suite()); + runner.addTest(CDelimiterTest::suite()); + runner.addTest(CDetachedProcessSpawnerTest::suite()); + runner.addTest(CDualThreadStreamBufTest::suite()); + runner.addTest(CFileDeleterTest::suite()); + runner.addTest(CFlatPrefixTreeTest::suite()); + runner.addTest(CFunctionalTest::suite()); + runner.addTest(CHashingTest::suite()); + runner.addTest(CHexUtilsTest::suite()); + runner.addTest(CIEEE754Test::suite()); + runner.addTest(CJsonLogLayoutTest::suite()); + runner.addTest(CJsonOutputStreamWrapperTest::suite()); + runner.addTest(CJsonStatePersistInserterTest::suite()); + runner.addTest(CJsonStateRestoreTraverserTest::suite()); + runner.addTest(CLoggerTest::suite()); + runner.addTest(CMapPopulationTest::suite()); + runner.addTest(CMemoryUsageJsonWriterTest::suite()); + runner.addTest(CMemoryUsageTest::suite()); + runner.addTest(CMessageBufferTest::suite()); + runner.addTest(CMessageQueueTest::suite()); + runner.addTest(CMonotonicTimeTest::suite()); + runner.addTest(CMutexTest::suite()); + runner.addTest(CNamedPipeFactoryTest::suite()); + runner.addTest(COsFileFuncsTest::suite()); + runner.addTest(CPatternSetTest::suite()); + runner.addTest(CPersistUtilsTest::suite()); + runner.addTest(CPolymorphicStackObjectCPtrTest::suite()); + runner.addTest(CProcessTest::suite()); + runner.addTest(CProcessPriorityTest::suite()); + runner.addTest(CProgNameTest::suite()); + runner.addTest(CRapidJsonLineWriterTest::suite()); + runner.addTest(CRapidJsonWriterBaseTest::suite()); + runner.addTest(CRapidXmlParserTest::suite()); + runner.addTest(CRapidXmlStatePersistInserterTest::suite()); + runner.addTest(CRapidXmlStateRestoreTraverserTest::suite()); + runner.addTest(CReadWriteLockTest::suite()); + runner.addTest(CRegexFilterTest::suite()); + runner.addTest(CRegexTest::suite()); + runner.addTest(CResourceLocatorTest::suite()); + runner.addTest(CShellArgQuoterTest::suite()); + runner.addTest(CSleepTest::suite()); + runner.addTest(CSmallVectorTest::suite()); + runner.addTest(CStateCompressorTest::suite()); + runner.addTest(CStateMachineTest::suite()); + runner.addTest(CStatisticsTest::suite()); + runner.addTest(CStopWatchTest::suite()); + runner.addTest(CStoredStringPtrTest::suite()); + runner.addTest(CStringSimilarityTesterTest::suite()); + runner.addTest(CStringUtilsTest::suite()); + runner.addTest(CThreadFarmTest::suite()); + runner.addTest(CThreadMutexConditionTest::suite()); + runner.addTest(CThreadPoolTest::suite()); + runner.addTest(CTickerTest::suite()); + runner.addTest(CTimeUtilsTest::suite()); + runner.addTest(CTripleTest::suite()); + runner.addTest(CUnameTest::suite()); + runner.addTest(CVectorRangeTest::suite()); + runner.addTest(CWindowsErrorTest::suite()); + runner.addTest(CWordDictionaryTest::suite()); + runner.addTest(CWordExtractorTest::suite()); + runner.addTest(CXmlNodeWithChildrenTest::suite()); + runner.addTest(CXmlParserTest::suite()); return !runner.runTests(); } - diff --git a/lib/maths/CAdaptiveBucketing.cc b/lib/maths/CAdaptiveBucketing.cc index e3a6f9eea2..add136956c 100644 --- a/lib/maths/CAdaptiveBucketing.cc +++ b/lib/maths/CAdaptiveBucketing.cc @@ -27,19 +27,15 @@ #include #include -namespace ml -{ -namespace maths -{ -namespace -{ +namespace ml { +namespace maths { +namespace { using TDoubleMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; //! Clear a vector and recover its memory. template -void clearAndShrink(std::vector &vector) -{ +void clearAndShrink(std::vector& vector) { std::vector empty; empty.swap(vector); } @@ -57,26 +53,21 @@ const double ALPHA{0.25}; const double EPS{std::numeric_limits::epsilon()}; const double WEIGHTS[]{1.0, 1.0, 1.0, 0.75, 0.5}; const double MINIMUM_DECAY_RATE{0.001}; - } -bool CAdaptiveBucketing::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name{traverser.name()}; +bool CAdaptiveBucketing::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name{traverser.name()}; RESTORE_BUILT_IN(DECAY_RATE_TAG, m_DecayRate) RESTORE(ENDPOINT_TAG, core::CPersistUtils::fromString(traverser.value(), m_Endpoints)) RESTORE(CENTRES_TAG, core::CPersistUtils::fromString(traverser.value(), m_Centres)) RESTORE(LP_FORCE_TAG, m_LpForce.fromDelimited(traverser.value())) RESTORE(FORCE_TAG, m_Force.fromDelimited(traverser.value())) - } - while (traverser.next()); + } while (traverser.next()); return true; } -void CAdaptiveBucketing::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CAdaptiveBucketing::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(DECAY_RATE_TAG, m_DecayRate, core::CIEEE754::E_SinglePrecision); inserter.insertValue(ENDPOINT_TAG, core::CPersistUtils::toString(m_Endpoints)); inserter.insertValue(CENTRES_TAG, core::CPersistUtils::toString(m_Centres)); @@ -84,22 +75,16 @@ void CAdaptiveBucketing::acceptPersistInserter(core::CStatePersistInserter &inse inserter.insertValue(FORCE_TAG, m_Force.toDelimited()); } -CAdaptiveBucketing::CAdaptiveBucketing(double decayRate, double minimumBucketLength) : - m_DecayRate{std::max(decayRate, MINIMUM_DECAY_RATE)}, - m_MinimumBucketLength{minimumBucketLength} -{} - -CAdaptiveBucketing::CAdaptiveBucketing(double decayRate, - double minimumBucketLength, - core::CStateRestoreTraverser &traverser) : - m_DecayRate{std::max(decayRate, MINIMUM_DECAY_RATE)}, - m_MinimumBucketLength{minimumBucketLength} -{ +CAdaptiveBucketing::CAdaptiveBucketing(double decayRate, double minimumBucketLength) + : m_DecayRate{std::max(decayRate, MINIMUM_DECAY_RATE)}, m_MinimumBucketLength{minimumBucketLength} { +} + +CAdaptiveBucketing::CAdaptiveBucketing(double decayRate, double minimumBucketLength, core::CStateRestoreTraverser& traverser) + : m_DecayRate{std::max(decayRate, MINIMUM_DECAY_RATE)}, m_MinimumBucketLength{minimumBucketLength} { traverser.traverseSubLevel(boost::bind(&CAdaptiveBucketing::acceptRestoreTraverser, this, _1)); } -void CAdaptiveBucketing::swap(CAdaptiveBucketing &other) -{ +void CAdaptiveBucketing::swap(CAdaptiveBucketing& other) { std::swap(m_DecayRate, other.m_DecayRate); std::swap(m_MinimumBucketLength, other.m_MinimumBucketLength); m_Endpoints.swap(other.m_Endpoints); @@ -108,21 +93,17 @@ void CAdaptiveBucketing::swap(CAdaptiveBucketing &other) std::swap(m_Force, other.m_Force); } -bool CAdaptiveBucketing::initialized() const -{ +bool CAdaptiveBucketing::initialized() const { return m_Endpoints.size() > 0; } -bool CAdaptiveBucketing::initialize(double a, double b, std::size_t n) -{ - if (n == 0) - { +bool CAdaptiveBucketing::initialize(double a, double b, std::size_t n) { + if (n == 0) { LOG_ERROR("Must have at least one bucket"); return false; } - if (m_MinimumBucketLength > 0.0) - { + if (m_MinimumBucketLength > 0.0) { // Handle the case that the minimum bucket length is // longer than the period. m_MinimumBucketLength = std::min(m_MinimumBucketLength, b - a); @@ -130,10 +111,9 @@ bool CAdaptiveBucketing::initialize(double a, double b, std::size_t n) } m_Endpoints.clear(); - m_Endpoints.reserve(n+1); + m_Endpoints.reserve(n + 1); double width{(b - a) / static_cast(n)}; - for (std::size_t i = 0u; i < n+1; ++i) - { + for (std::size_t i = 0u; i < n + 1; ++i) { m_Endpoints.push_back(a + static_cast(i) * width); } m_Centres.clear(); @@ -142,34 +122,25 @@ bool CAdaptiveBucketing::initialize(double a, double b, std::size_t n) return true; } -void CAdaptiveBucketing::initialValues(core_t::TTime start, - core_t::TTime end, - const TFloatMeanAccumulatorVec &values) -{ - if (!this->initialized()) - { +void CAdaptiveBucketing::initialValues(core_t::TTime start, core_t::TTime end, const TFloatMeanAccumulatorVec& values) { + if (!this->initialized()) { return; } core_t::TTime size{static_cast(values.size())}; core_t::TTime dT{(end - start) / size}; - core_t::TTime dt{static_cast( - CTools::truncate(m_MinimumBucketLength, 1.0, static_cast(dT)))}; + core_t::TTime dt{static_cast(CTools::truncate(m_MinimumBucketLength, 1.0, static_cast(dT)))}; double scale{std::pow(static_cast(dt) / static_cast(dT), 2.0)}; - for (core_t::TTime time = start + dt/2; time < end; time += dt) - { - if (this->inWindow(time)) - { + for (core_t::TTime time = start + dt / 2; time < end; time += dt) { + if (this->inWindow(time)) { core_t::TTime i{(time - start) / dT}; double value{CBasicStatistics::mean(values[i])}; double weight{scale * CBasicStatistics::count(values[i])}; - if (weight > 0.0) - { + if (weight > 0.0) { std::size_t bucket; - if (this->bucket(time, bucket)) - { + if (this->bucket(time, bucket)) { this->add(bucket, time, weight); this->add(bucket, time, value, weight); } @@ -178,50 +149,40 @@ void CAdaptiveBucketing::initialValues(core_t::TTime start, } } -std::size_t CAdaptiveBucketing::size() const -{ +std::size_t CAdaptiveBucketing::size() const { return m_Centres.size(); } -void CAdaptiveBucketing::clear() -{ +void CAdaptiveBucketing::clear() { clearAndShrink(m_Endpoints); clearAndShrink(m_Centres); } -void CAdaptiveBucketing::add(std::size_t bucket, core_t::TTime time, double weight) -{ - TDoubleMeanAccumulator centre{ - CBasicStatistics::accumulator(this->count(bucket), - static_cast(m_Centres[bucket]))}; +void CAdaptiveBucketing::add(std::size_t bucket, core_t::TTime time, double weight) { + TDoubleMeanAccumulator centre{CBasicStatistics::accumulator(this->count(bucket), static_cast(m_Centres[bucket]))}; centre.add(this->offset(time), weight); m_Centres[bucket] = CBasicStatistics::mean(centre); } -void CAdaptiveBucketing::decayRate(double value) -{ +void CAdaptiveBucketing::decayRate(double value) { m_DecayRate = std::max(value, MINIMUM_DECAY_RATE); } -double CAdaptiveBucketing::decayRate() const -{ +double CAdaptiveBucketing::decayRate() const { return m_DecayRate; } -void CAdaptiveBucketing::age(double factor) -{ +void CAdaptiveBucketing::age(double factor) { factor = factor * factor; m_LpForce.age(factor); m_Force.age(factor); } -double CAdaptiveBucketing::minimumBucketLength() const -{ +double CAdaptiveBucketing::minimumBucketLength() const { return m_MinimumBucketLength; } -void CAdaptiveBucketing::refine(core_t::TTime time) -{ +void CAdaptiveBucketing::refine(core_t::TTime time) { using TDoubleDoublePr = std::pair; using TDoubleDoublePrVec = std::vector; using TDoubleSizePr = std::pair; @@ -231,8 +192,7 @@ void CAdaptiveBucketing::refine(core_t::TTime time) LOG_TRACE("refining at " << time); std::size_t n{m_Endpoints.size()}; - if (n < 2) - { + if (n < 2) { return; } --n; @@ -243,8 +203,7 @@ void CAdaptiveBucketing::refine(core_t::TTime time) // Extract the bucket means. TDoubleDoublePrVec values; values.reserve(n); - for (std::size_t i = 0u; i < n; ++i) - { + for (std::size_t i = 0u; i < n; ++i) { values.emplace_back(this->count(i), this->predict(i, time, m_Centres[i])); } LOG_TRACE("values = " << core::CContainerPrinter::print(values)); @@ -253,34 +212,23 @@ void CAdaptiveBucketing::refine(core_t::TTime time) // boundary conditions at the start and end of the interval. TDoubleVec ranges; ranges.reserve(n); - for (std::size_t i = 0u; i < n; ++i) - { - TDoubleDoublePr v[]{values[(n + i - 2) % n], - values[(n + i - 1) % n], - values[(n + i + 0) % n], - values[(n + i + 1) % n], - values[(n + i + 2) % n]}; + for (std::size_t i = 0u; i < n; ++i) { + TDoubleDoublePr v[]{ + values[(n + i - 2) % n], values[(n + i - 1) % n], values[(n + i + 0) % n], values[(n + i + 1) % n], values[(n + i + 2) % n]}; TMinAccumulator min; TMaxAccumulator max; - for (std::size_t j = 0u; j < sizeof(v)/sizeof(v[0]); ++j) - { - if (v[j].first > 0.0) - { + for (std::size_t j = 0u; j < sizeof(v) / sizeof(v[0]); ++j) { + if (v[j].first > 0.0) { min.add({v[j].second, j}); max.add({v[j].second, j}); } } - if (min.count() > 0) - { - ranges.push_back( WEIGHTS[max[0].second > min[0].second ? - max[0].second - min[0].second : - min[0].second - max[0].second] - * std::pow(max[0].first - min[0].first, 0.75)); - } - else - { + if (min.count() > 0) { + ranges.push_back(WEIGHTS[max[0].second > min[0].second ? max[0].second - min[0].second : min[0].second - max[0].second] * + std::pow(max[0].first - min[0].first, 0.75)); + } else { ranges.push_back(0.0); } } @@ -292,14 +240,12 @@ void CAdaptiveBucketing::refine(core_t::TTime time) double totalAveragingError{0.0}; TDoubleVec averagingErrors; averagingErrors.reserve(n); - for (std::size_t i = 0u; i < n; ++i) - { + for (std::size_t i = 0u; i < n; ++i) { double ai{m_Endpoints[i]}; - double bi{m_Endpoints[i+1]}; + double bi{m_Endpoints[i + 1]}; double error{0.0}; - for (std::size_t j = 0u; j < boost::size(SMOOTHING_FUNCTION); ++j) - { + for (std::size_t j = 0u; j < boost::size(SMOOTHING_FUNCTION); ++j) { error += SMOOTHING_FUNCTION[j] * ranges[(n + i + j - WIDTH) % n]; } @@ -319,17 +265,13 @@ void CAdaptiveBucketing::refine(core_t::TTime time) // If all the function values are identical then the end points // should be equidistant. We check step in case of underflow. - if (step == 0.0) - { + if (step == 0.0) { m_Endpoints[0] = a; - for (std::size_t i = 0u; i < n; ++i) - { + for (std::size_t i = 0u; i < n; ++i) { m_Endpoints[i] = (b - a) * static_cast(i) / n_; } m_Endpoints[n] = b; - } - else - { + } else { // Noise in the bucket mean values creates a "high" // frequency mean zero driving force on the buckets' // end points desired positions. Once they have stabilized @@ -341,9 +283,9 @@ void CAdaptiveBucketing::refine(core_t::TTime time) // the buckets values loses a small amount of information, // see the comments at the start of refresh for more // details. - double alpha{ALPHA * (CBasicStatistics::mean(m_Force) == 0.0 ? - 1.0 : std::fabs( CBasicStatistics::mean(m_LpForce)) - / CBasicStatistics::mean(m_Force))}; + double alpha{ALPHA * (CBasicStatistics::mean(m_Force) == 0.0 + ? 1.0 + : std::fabs(CBasicStatistics::mean(m_LpForce)) / CBasicStatistics::mean(m_Force))}; double force{0.0}; // Linearly interpolate between the current end points @@ -353,30 +295,22 @@ void CAdaptiveBucketing::refine(core_t::TTime time) // dynamics and damps any oscillatory behavior which // might otherwise occur. double error{0.0}; - for (std::size_t i = 0u, j = 1u; i < n && j < n+1; ++i) - { + for (std::size_t i = 0u, j = 1u; i < n && j < n + 1; ++i) { double ai{endpoints[i]}; - double bi{endpoints[i+1]}; + double bi{endpoints[i + 1]}; double h{bi - ai}; double e{averagingErrors[i]}; error += e; - for (double e_ = step - (error - e); - error >= step; - e_ += step, error -= step) - { + for (double e_ = step - (error - e); error >= step; e_ += step, error -= step) { double x{h * e_ / averagingErrors[i]}; m_Endpoints[j] = endpoints[j] + alpha * (ai + x - endpoints[j]); force += (ai + x) - endpoints[j]; - LOG_TRACE("interval averaging error = " << e - << ", a(i) = " << ai - << ", x = " << x - << ", endpoint " << endpoints[j] - << " -> " << ai + x); + LOG_TRACE("interval averaging error = " << e << ", a(i) = " << ai << ", x = " << x << ", endpoint " << endpoints[j] + << " -> " << ai + x); ++j; } } - if (m_MinimumBucketLength > 0.0) - { + if (m_MinimumBucketLength > 0.0) { CTools::spread(a, b, m_MinimumBucketLength, m_Endpoints); } @@ -396,23 +330,18 @@ void CAdaptiveBucketing::refine(core_t::TTime time) bool CAdaptiveBucketing::knots(core_t::TTime time, CSplineTypes::EBoundaryCondition boundary, - TDoubleVec &knots, - TDoubleVec &values, - TDoubleVec &variances) const -{ + TDoubleVec& knots, + TDoubleVec& values, + TDoubleVec& variances) const { knots.clear(); values.clear(); variances.clear(); std::size_t n{m_Centres.size()}; - for (std::size_t i = 0u; i < n; ++i) - { - if (this->count(i) > 0.0) - { + for (std::size_t i = 0u; i < n; ++i) { + if (this->count(i) > 0.0) { double wide{3.0 * (m_Endpoints[n] - m_Endpoints[0]) / static_cast(n)}; - LOG_TRACE("period " << m_Endpoints[n] - m_Endpoints[0] - << ", # buckets = " << n - << ", wide = " << wide); + LOG_TRACE("period " << m_Endpoints[n] - m_Endpoints[0] << ", # buckets = " << n << ", wide = " << wide); // We get two points for each wide bucket but at most // one third of the buckets can be wide. In this case @@ -422,31 +351,26 @@ bool CAdaptiveBucketing::knots(core_t::TTime time, variances.reserve(4 * n / 3); double a{m_Endpoints[i]}; - double b{m_Endpoints[i+1]}; + double b{m_Endpoints[i + 1]}; double c{m_Centres[i]}; knots.push_back(m_Endpoints[0]); values.push_back(this->predict(i, time, c)); variances.push_back(this->variance(i)); - for (/**/; i < n; ++i) - { - if (this->count(i) > 0.0) - { + for (/**/; i < n; ++i) { + if (this->count(i) > 0.0) { a = m_Endpoints[i]; - b = m_Endpoints[i+1]; + b = m_Endpoints[i + 1]; c = m_Centres[i]; double m{this->predict(i, time, c)}; double v{this->variance(i)}; - if (b - a > wide) - { + if (b - a > wide) { knots.push_back(std::max(c - (b - a) / 4.0, a)); values.push_back(m); variances.push_back(v); knots.push_back(std::min(c + (b - a) / 4.0, b)); values.push_back(m); variances.push_back(v); - } - else - { + } else { knots.push_back(c); values.push_back(m); variances.push_back(v); @@ -454,8 +378,7 @@ bool CAdaptiveBucketing::knots(core_t::TTime time, } } - switch (boundary) - { + switch (boundary) { case CSplineTypes::E_Natural: case CSplineTypes::E_ParabolicRunout: knots.push_back(m_Endpoints[n]); @@ -477,70 +400,55 @@ bool CAdaptiveBucketing::knots(core_t::TTime time, return knots.size() >= 2; } -const CAdaptiveBucketing::TFloatVec &CAdaptiveBucketing::endpoints() const -{ +const CAdaptiveBucketing::TFloatVec& CAdaptiveBucketing::endpoints() const { return m_Endpoints; } -CAdaptiveBucketing::TFloatVec &CAdaptiveBucketing::endpoints() -{ +CAdaptiveBucketing::TFloatVec& CAdaptiveBucketing::endpoints() { return m_Endpoints; } -const CAdaptiveBucketing::TFloatVec &CAdaptiveBucketing::centres() const -{ +const CAdaptiveBucketing::TFloatVec& CAdaptiveBucketing::centres() const { return m_Centres; } -CAdaptiveBucketing::TFloatVec &CAdaptiveBucketing::centres() -{ +CAdaptiveBucketing::TFloatVec& CAdaptiveBucketing::centres() { return m_Centres; } -double CAdaptiveBucketing::count() const -{ +double CAdaptiveBucketing::count() const { double result = 0.0; - for (std::size_t i = 0u; i < m_Centres.size(); ++i) - { + for (std::size_t i = 0u; i < m_Centres.size(); ++i) { result += this->count(i); } return result; } -CAdaptiveBucketing::TDoubleVec CAdaptiveBucketing::values(core_t::TTime time) const -{ +CAdaptiveBucketing::TDoubleVec CAdaptiveBucketing::values(core_t::TTime time) const { TDoubleVec result; result.reserve(m_Centres.size()); - for (std::size_t i = 0u; i < m_Centres.size(); ++i) - { + for (std::size_t i = 0u; i < m_Centres.size(); ++i) { result.push_back(this->predict(i, time, m_Centres[i])); } return result; } -CAdaptiveBucketing::TDoubleVec CAdaptiveBucketing::variances() const -{ +CAdaptiveBucketing::TDoubleVec CAdaptiveBucketing::variances() const { TDoubleVec result; result.reserve(m_Centres.size()); - for (std::size_t i = 0u; i < m_Centres.size(); ++i) - { + for (std::size_t i = 0u; i < m_Centres.size(); ++i) { result.push_back(this->variance(i)); } return result; } -bool CAdaptiveBucketing::bucket(core_t::TTime time, std::size_t &result) const -{ +bool CAdaptiveBucketing::bucket(core_t::TTime time, std::size_t& result) const { double t{this->offset(time)}; - std::size_t i(std::upper_bound(m_Endpoints.begin(), - m_Endpoints.end(), t) - m_Endpoints.begin()); + std::size_t i(std::upper_bound(m_Endpoints.begin(), m_Endpoints.end(), t) - m_Endpoints.begin()); std::size_t n{m_Endpoints.size()}; - if (t < m_Endpoints[0] || i == n) - { - LOG_ERROR("t = " << t - << " out of range [" << m_Endpoints[0] - << "," << m_Endpoints[n-1] << ")"); + if (t < m_Endpoints[0] || i == n) { + LOG_ERROR("t = " << t << " out of range [" << m_Endpoints[0] << "," << m_Endpoints[n - 1] << ")"); return false; } @@ -548,20 +456,17 @@ bool CAdaptiveBucketing::bucket(core_t::TTime time, std::size_t &result) const return true; } -uint64_t CAdaptiveBucketing::checksum(uint64_t seed) const -{ +uint64_t CAdaptiveBucketing::checksum(uint64_t seed) const { seed = CChecksum::calculate(seed, m_DecayRate); seed = CChecksum::calculate(seed, m_MinimumBucketLength); seed = CChecksum::calculate(seed, m_Endpoints); return CChecksum::calculate(seed, m_Centres); } -std::size_t CAdaptiveBucketing::memoryUsage() const -{ +std::size_t CAdaptiveBucketing::memoryUsage() const { std::size_t mem{core::CMemory::dynamicSize(m_Endpoints)}; mem += core::CMemory::dynamicSize(m_Centres); return mem; } - } } diff --git a/lib/maths/CAgglomerativeClusterer.cc b/lib/maths/CAgglomerativeClusterer.cc index 433c3e2f9d..0808284551 100644 --- a/lib/maths/CAgglomerativeClusterer.cc +++ b/lib/maths/CAgglomerativeClusterer.cc @@ -8,8 +8,8 @@ #include #include -#include #include +#include #include #include @@ -22,13 +22,10 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { -namespace -{ +namespace { using TSizeSizePr = std::pair; using TDoubleSizeSizePrPr = std::pair; @@ -43,24 +40,16 @@ using TNodePtrVec = std::vector; const double INF = boost::numeric::bounds::highest(); //! Get the distance between node \p i and \p j. -inline double &distance(TDoubleVecVec &distanceMatrix, - std::size_t i, - std::size_t j) -{ - if (j > i) - { +inline double& distance(TDoubleVecVec& distanceMatrix, std::size_t i, std::size_t j) { + if (j > i) { std::swap(i, j); } return distanceMatrix[i][j]; } //! Get the distance between node \p i and \p j. -inline double distance(const TDoubleVecVec &distanceMatrix, - std::size_t i, - std::size_t j) -{ - if (j > i) - { +inline double distance(const TDoubleVecVec& distanceMatrix, std::size_t i, std::size_t j) { + if (j > i) { std::swap(i, j); } return distanceMatrix[i][j]; @@ -72,17 +61,9 @@ inline double distance(const TDoubleVecVec &distanceMatrix, //!
 //!   \f$\displaystyle \max_{a \in A, b \in B}{d[a,b]}\f$
 //! 
-struct SComplete -{ - void operator()(const TDoubleVec &/*sizes*/, - std::size_t x, - std::size_t a, - std::size_t b, - TDoubleVecVec &distanceMatrix) const - { - distance(distanceMatrix, b, x) = - std::max(distance(distanceMatrix, a, x), - distance(distanceMatrix, b, x)); +struct SComplete { + void operator()(const TDoubleVec& /*sizes*/, std::size_t x, std::size_t a, std::size_t b, TDoubleVecVec& distanceMatrix) const { + distance(distanceMatrix, b, x) = std::max(distance(distanceMatrix, a, x), distance(distanceMatrix, b, x)); } }; @@ -92,56 +73,32 @@ struct SComplete //!
 //!   \f$\displaystyle \frac{1}{|A||B|}\sum_{a \in A, b \in B}{d[a,b]}\f$
 //! 
-struct SAverage -{ - void operator()(const TDoubleVec &sizes, - std::size_t x, - std::size_t a, - std::size_t b, - TDoubleVecVec &distanceMatrix) const - { +struct SAverage { + void operator()(const TDoubleVec& sizes, std::size_t x, std::size_t a, std::size_t b, TDoubleVecVec& distanceMatrix) const { double sa = sizes[a]; double sb = sizes[b]; - distance(distanceMatrix, b, x) = - ( sa * distance(distanceMatrix, a, x) - + sb * distance(distanceMatrix, b, x)) / (sa + sb); + distance(distanceMatrix, b, x) = (sa * distance(distanceMatrix, a, x) + sb * distance(distanceMatrix, b, x)) / (sa + sb); } }; //! \brief Weighted objective distance update function. -struct SWeighted -{ - void operator()(const TDoubleVec /*sizes*/, - std::size_t x, - std::size_t a, - std::size_t b, - TDoubleVecVec &distanceMatrix) const - { - distance(distanceMatrix, b, x) = - ( distance(distanceMatrix, a, x) - + distance(distanceMatrix, b, x)) / 2.0; +struct SWeighted { + void operator()(const TDoubleVec /*sizes*/, std::size_t x, std::size_t a, std::size_t b, TDoubleVecVec& distanceMatrix) const { + distance(distanceMatrix, b, x) = (distance(distanceMatrix, a, x) + distance(distanceMatrix, b, x)) / 2.0; } }; //! \brief Ward objective distance update function. //! //! See https://en.wikipedia.org/wiki/Ward%27s_method. -struct SWard -{ - void operator()(const TDoubleVec sizes, - std::size_t x, - std::size_t a, - std::size_t b, - TDoubleVecVec &distanceMatrix) const - { +struct SWard { + void operator()(const TDoubleVec sizes, std::size_t x, std::size_t a, std::size_t b, TDoubleVecVec& distanceMatrix) const { double sa = sizes[a]; double sb = sizes[b]; double sx = sizes[x]; - distance(distanceMatrix, b, x) = - std::sqrt( (sa + sx) * distance(distanceMatrix, a, x) - + (sb + sx) * distance(distanceMatrix, b, x) - - sx * distance(distanceMatrix, a, b)) - / (sa + sb + sx); + distance(distanceMatrix, b, x) = std::sqrt((sa + sx) * distance(distanceMatrix, a, x) + (sb + sx) * distance(distanceMatrix, b, x) - + sx * distance(distanceMatrix, a, b)) / + (sa + sb + sx); } }; @@ -155,14 +112,11 @@ struct SWard //! \param[in] distanceMatrix the matrices of distances //! between the points to cluster. //! \param[in] L Filled in with the unsorted dendrogram. -void mstCluster(const TDoubleVecVec &distanceMatrix, - TDoubleSizeSizePrPrVec &L) -{ +void mstCluster(const TDoubleVecVec& distanceMatrix, TDoubleSizeSizePrPrVec& L) { L.clear(); std::size_t N = distanceMatrix.size(); - if (N <= 1) - { + if (N <= 1) { return; } @@ -170,26 +124,22 @@ void mstCluster(const TDoubleVecVec &distanceMatrix, TSizeVec S; S.reserve(N); - for (std::size_t i = 0u; i < N; ++i) - { + for (std::size_t i = 0u; i < N; ++i) { S.push_back(i); } TDoubleVec D(N, INF); std::size_t c = S[N - 1]; - while (S.size() > 1) - { + while (S.size() > 1) { S.erase(std::find(S.begin(), S.end(), c)); std::size_t n = 0; double d = INF; - for (std::size_t i = 0u; i < S.size(); ++i) - { + for (std::size_t i = 0u; i < S.size(); ++i) { std::size_t x = S[i]; D[x] = std::min(D[x], distance(distanceMatrix, x, c)); - if (D[x] < d) - { + if (D[x] < d) { n = x; d = D[x]; } @@ -219,10 +169,7 @@ void mstCluster(const TDoubleVecVec &distanceMatrix, //! \note For maximum efficiency modifications are made in //! place to \p distanceMatrix. template -void nnCluster(TDoubleVecVec &distanceMatrix, - UPDATE update, - TDoubleSizeSizePrPrVec &L) -{ +void nnCluster(TDoubleVecVec& distanceMatrix, UPDATE update, TDoubleSizeSizePrPrVec& L) { // In departure from the scheme given by Mullner we make all // our updates in-place by using a direct address table from // n -> max(a, b), where n is the new node index and a and b @@ -235,8 +182,7 @@ void nnCluster(TDoubleVecVec &distanceMatrix, L.clear(); std::size_t N = distanceMatrix.size(); - if (N <= 1) - { + if (N <= 1) { return; } @@ -244,8 +190,7 @@ void nnCluster(TDoubleVecVec &distanceMatrix, TSizeVec S; S.reserve(N); - for (std::size_t i = 0u; i < N; ++i) - { + for (std::size_t i = 0u; i < N; ++i) { S.push_back(i); } TSizeVec chain; @@ -253,8 +198,7 @@ void nnCluster(TDoubleVecVec &distanceMatrix, TDoubleVec size(N, 1.0); TSizeVec rightmost; rightmost.reserve(2 * N - 1); - for (std::size_t i = 0u; i < N; ++i) - { + for (std::size_t i = 0u; i < N; ++i) { rightmost.push_back(i); } @@ -262,19 +206,15 @@ void nnCluster(TDoubleVecVec &distanceMatrix, std::size_t b = 1; std::size_t p = N - 1; - while (S.size() > 1) - { + while (S.size() > 1) { std::size_t m = chain.size(); - if (m <= 3) - { + if (m <= 3) { a = S[0]; b = S[1]; chain.clear(); chain.push_back(a); m = 1; - } - else - { + } else { a = chain[m - 4]; b = chain[m - 3]; // Cut the tail. @@ -288,55 +228,44 @@ void nnCluster(TDoubleVecVec &distanceMatrix, LOG_TRACE("a = " << a << ", b = " << b << ", m = " << m); double d; - do - { - std::size_t c = 0u; + do { + std::size_t c = 0u; std::size_t ra = rightmost[a]; d = INF; - for (std::size_t i = 0u; i < S.size(); ++i) - { - std::size_t x = S[i]; + for (std::size_t i = 0u; i < S.size(); ++i) { + std::size_t x = S[i]; std::size_t rx = rightmost[x]; - if (a != x) - { + if (a != x) { double dx = distance(distanceMatrix, ra, rx); - if (dx < d || (dx == d && x == b)) - { + if (dx < d || (dx == d && x == b)) { c = x; d = dx; } } } - b = a; a = c; + b = a; + a = c; chain.push_back(a); ++m; - } - while (m <= 3 || a != chain[m - 3]); + } while (m <= 3 || a != chain[m - 3]); - if (a > b) - { + if (a > b) { std::swap(a, b); } std::size_t ra = rightmost[a]; std::size_t rb = rightmost[b]; - LOG_TRACE("chain = " <m_Parent) - { +TNode& CAgglomerativeClusterer::CNode::root() { + CNode* result = this; + for (CNode* parent = m_Parent; parent; parent = parent->m_Parent) { result = parent; } return *result; } -void CAgglomerativeClusterer::CNode::points(TSizeVec &result) const -{ - if (!m_LeftChild && !m_RightChild) - { +void CAgglomerativeClusterer::CNode::points(TSizeVec& result) const { + if (!m_LeftChild && !m_RightChild) { result.push_back(m_Index); } - if (m_LeftChild) - { + if (m_LeftChild) { m_LeftChild->points(result); } - if (m_RightChild) - { + if (m_RightChild) { m_RightChild->points(result); } } -void CAgglomerativeClusterer::CNode::clusters(TDoubleSizeVecPrVec &result) const -{ - if (m_LeftChild && m_RightChild) - { +void CAgglomerativeClusterer::CNode::clusters(TDoubleSizeVecPrVec& result) const { + if (m_LeftChild && m_RightChild) { TSizeVec points; this->points(points); result.emplace_back(m_Height, points); } - if (m_LeftChild) - { + if (m_LeftChild) { m_LeftChild->clusters(result); } - if (m_RightChild) - { + if (m_RightChild) { m_RightChild->clusters(result); } } -void CAgglomerativeClusterer::CNode::clusteringAt(double height, - TSizeVecVec &result) const -{ - if (height >= m_Height) - { +void CAgglomerativeClusterer::CNode::clusteringAt(double height, TSizeVecVec& result) const { + if (height >= m_Height) { result.push_back(TSizeVec()); this->points(result.back()); - } - else - { - if (m_LeftChild && height < m_LeftChild->height()) - { + } else { + if (m_LeftChild && height < m_LeftChild->height()) { m_LeftChild->clusteringAt(height, result); - } - else if (m_LeftChild) - { + } else if (m_LeftChild) { result.push_back(TSizeVec()); m_LeftChild->points(result.back()); } - if (m_RightChild && height < m_RightChild->height()) - { + if (m_RightChild && height < m_RightChild->height()) { m_RightChild->clusteringAt(height, result); - } - else if (m_RightChild) - { + } else if (m_RightChild) { result.push_back(TSizeVec()); m_RightChild->points(result.back()); } } } -std::string CAgglomerativeClusterer::CNode::print(const std::string &indent) const -{ +std::string CAgglomerativeClusterer::CNode::print(const std::string& indent) const { std::string result; result += "height = " + core::CStringUtils::typeToStringPretty(m_Height); - if (m_LeftChild) - { + if (m_LeftChild) { result += core_t::LINE_ENDING + indent + m_LeftChild->print(indent + " "); } - if (m_RightChild) - { + if (m_RightChild) { result += core_t::LINE_ENDING + indent + m_RightChild->print(indent + " "); } - if (!m_LeftChild && !m_RightChild) - { + if (!m_LeftChild && !m_RightChild) { result += ", point = " + core::CStringUtils::typeToStringPretty(m_Index); } return result; } - } } diff --git a/lib/maths/CAssignment.cc b/lib/maths/CAssignment.cc index 93f78fcbf2..c53522e359 100644 --- a/lib/maths/CAssignment.cc +++ b/lib/maths/CAssignment.cc @@ -13,13 +13,10 @@ #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { -namespace -{ +namespace { using TBoolVec = std::vector; using TDoubleVec = std::vector; @@ -38,10 +35,7 @@ const double MAXIMUM_COST = boost::numeric::bounds::highest(); //! \param[in] costs The matrix of costs. //! \param[in] i The row index. //! \param[in] j The column index. -inline double cost(const TDoubleVecVec &costs, - std::size_t i, - std::size_t j) -{ +inline double cost(const TDoubleVecVec& costs, std::size_t i, std::size_t j) { return (i < costs.size() ? (j < costs[i].size() ? costs[i][j] : 0.0) : 0.0); } @@ -58,12 +52,8 @@ inline double cost(const TDoubleVecVec &costs, //! \param[in] columnPotential The column potential function. //! \param[in] i The row index. //! \param[in] j The column index. -inline double adjustedCost(const TDoubleVecVec &costs, - const TDoubleVec &rowPotential, - const TDoubleVec &columnPotential, - std::size_t i, - std::size_t j) -{ +inline double +adjustedCost(const TDoubleVecVec& costs, const TDoubleVec& rowPotential, const TDoubleVec& columnPotential, std::size_t i, std::size_t j) { // The bracketing is important in this expression since // it ensures we find the correct initial feasible solution. return (cost(costs, i, j) - columnPotential[j]) - rowPotential[i]; @@ -75,11 +65,7 @@ inline double adjustedCost(const TDoubleVecVec &costs, //! \param[in] j The column index to match. //! \param[out] matchColumnByRow The columns matching each row. //! \param[out] matchRowByColumn The rows matching each column. -inline void match(std::size_t i, - std::size_t j, - TSizeVec &matchColumnByRow, - TSizeVec &matchRowByColumn) -{ +inline void match(std::size_t i, std::size_t j, TSizeVec& matchColumnByRow, TSizeVec& matchRowByColumn) { matchColumnByRow[i] = j; matchRowByColumn[j] = i; } @@ -102,34 +88,29 @@ inline void match(std::size_t i, //! \param[out] minSlackColumn The column of the minimum slack //! edge. //! \param[out] minSlackValue The minimum slack. -void grow(const TDoubleVecVec &costs, - const TDoubleVec &rowPotential, - const TDoubleVec &columnPotential, - const TSizeVec &parentRowByCommittedColumn, +void grow(const TDoubleVecVec& costs, + const TDoubleVec& rowPotential, + const TDoubleVec& columnPotential, + const TSizeVec& parentRowByCommittedColumn, std::size_t pivot, - TBoolVec &committedRows, - TSizeVec &minSlackRowByColumn, - TDoubleVec &minSlackValueByColumn, - std::size_t &minSlackRow, - std::size_t &minSlackColumn, - double &minSlackValue) -{ + TBoolVec& committedRows, + TSizeVec& minSlackRowByColumn, + TDoubleVec& minSlackValueByColumn, + std::size_t& minSlackRow, + std::size_t& minSlackColumn, + double& minSlackValue) { minSlackRow = UNMATCHED; minSlackColumn = UNMATCHED; minSlackValue = MAXIMUM_COST; committedRows[pivot] = true; - for (std::size_t j = 0u; j < parentRowByCommittedColumn.size(); ++j) - { - if (parentRowByCommittedColumn[j] == UNMATCHED) - { + for (std::size_t j = 0u; j < parentRowByCommittedColumn.size(); ++j) { + if (parentRowByCommittedColumn[j] == UNMATCHED) { double slack = adjustedCost(costs, rowPotential, columnPotential, pivot, j); - if (minSlackValueByColumn[j] > slack) - { + if (minSlackValueByColumn[j] > slack) { minSlackValueByColumn[j] = slack; minSlackRowByColumn[j] = pivot; } - if (minSlackValueByColumn[j] < minSlackValue) - { + if (minSlackValueByColumn[j] < minSlackValue) { minSlackValue = minSlackValueByColumn[j]; minSlackRow = minSlackRowByColumn[j]; minSlackColumn = j; @@ -137,26 +118,20 @@ void grow(const TDoubleVecVec &costs, } } } - } -bool CAssignment::kuhnMunkres(const TDoubleVecVec &costs, - TSizeSizePrVec &matching) -{ +bool CAssignment::kuhnMunkres(const TDoubleVecVec& costs, TSizeSizePrVec& matching) { matching.clear(); - if (costs.empty()) - { + if (costs.empty()) { return true; } // Sanity check cost matrix. std::size_t n = costs.size(); std::size_t m = costs[0].size(); - for (std::size_t i = 1u; i < costs.size(); ++i) - { - if (costs[i].size() != m) - { + for (std::size_t i = 1u; i < costs.size(); ++i) { + if (costs[i].size() != m) { LOG_ERROR("Irregular cost matrix"); return false; } @@ -168,20 +143,16 @@ bool CAssignment::kuhnMunkres(const TDoubleVecVec &costs, // edge incident on each column and row. TDoubleVec columnPotential(N, 0.0); TDoubleVec rowPotential(N, 0.0); - for (std::size_t j = 0u; j < m; ++j) - { + for (std::size_t j = 0u; j < m; ++j) { double min = costs[0][j]; - for (std::size_t i = 1u; i < N; ++i) - { + for (std::size_t i = 1u; i < N; ++i) { min = std::min(min, cost(costs, i, j)); } columnPotential[j] = min; } - for (std::size_t i = 0u; i < n; ++i) - { + for (std::size_t i = 0u; i < n; ++i) { double min = costs[i][0] - columnPotential[0]; - for (std::size_t j = 1u; j < N; ++j) - { + for (std::size_t j = 1u; j < N; ++j) { min = std::min(min, cost(costs, i, j) - columnPotential[j]); } rowPotential[i] = min; @@ -194,14 +165,10 @@ bool CAssignment::kuhnMunkres(const TDoubleVecVec &costs, TSizeVec matchColumnByRow(N, UNMATCHED); TSizeVec matchRowByColumn(N, UNMATCHED); std::size_t unmatched = N; - for (std::size_t i = 0u; i < N; ++i) - { - for (std::size_t j = 0u; j < N; ++j) - { - if (matchColumnByRow[i] == UNMATCHED - && matchRowByColumn[j] == UNMATCHED - && adjustedCost(costs, rowPotential, columnPotential, i, j) == 0.0) - { + for (std::size_t i = 0u; i < N; ++i) { + for (std::size_t j = 0u; j < N; ++j) { + if (matchColumnByRow[i] == UNMATCHED && matchRowByColumn[j] == UNMATCHED && + adjustedCost(costs, rowPotential, columnPotential, i, j) == 0.0) { match(i, j, matchColumnByRow, matchRowByColumn); --unmatched; } @@ -213,8 +180,7 @@ bool CAssignment::kuhnMunkres(const TDoubleVecVec &costs, TSizeVec minSlackRowByColumn(N, UNMATCHED); TDoubleVec minSlackValueByColumn(N, MAXIMUM_COST); - while (unmatched > 0) - { + while (unmatched > 0) { LOG_TRACE("matchColumnByRow = " << core::CContainerPrinter::print(matchColumnByRow)); LOG_TRACE("matchRowByColumn = " << core::CContainerPrinter::print(matchRowByColumn)); LOG_TRACE("unmatched = " << unmatched); @@ -224,17 +190,14 @@ bool CAssignment::kuhnMunkres(const TDoubleVecVec &costs, // Find an unmatched row. We look for the augmenting // path which matches this row in the loop below. std::size_t pivot = N; - for (std::size_t i = 0u; i < N; ++i) - { - if (matchColumnByRow[i] == UNMATCHED) - { + for (std::size_t i = 0u; i < N; ++i) { + if (matchColumnByRow[i] == UNMATCHED) { pivot = i; break; } } LOG_TRACE("pivot = " << pivot); - if (pivot == N) - { + if (pivot == N) { LOG_ERROR("Bad pivot: costs = " << core::CContainerPrinter::print(costs)); return false; } @@ -243,7 +206,9 @@ bool CAssignment::kuhnMunkres(const TDoubleVecVec &costs, std::size_t minSlackRow; std::size_t minSlackColumn; double minSlackValue; - grow(costs, rowPotential, columnPotential, + grow(costs, + rowPotential, + columnPotential, parentRowByCommittedColumn, pivot, committedRows, @@ -253,10 +218,8 @@ bool CAssignment::kuhnMunkres(const TDoubleVecVec &costs, minSlackColumn, minSlackValue); LOG_TRACE("committedRows = " << core::CContainerPrinter::print(committedRows)); - LOG_TRACE("minSlackRowByColumn = " - << core::CContainerPrinter::print(minSlackRowByColumn)); - LOG_TRACE("minSlackValueByColumn = " - << core::CContainerPrinter::print(minSlackValueByColumn)); + LOG_TRACE("minSlackRowByColumn = " << core::CContainerPrinter::print(minSlackRowByColumn)); + LOG_TRACE("minSlackValueByColumn = " << core::CContainerPrinter::print(minSlackValueByColumn)); // Search for an augmenting path following zero slack // edges. In each iteration the minimum potential is @@ -270,34 +233,24 @@ bool CAssignment::kuhnMunkres(const TDoubleVecVec &costs, LOG_TRACE("*** Search for augmenting path ***"); std::size_t check = 0u; - for (/**/; check < N; ++check) - { - LOG_TRACE(" minSlackValue = " << minSlackValue - << ", minSlackRow = " << minSlackRow - << ", minSlackColumn = " << minSlackColumn); + for (/**/; check < N; ++check) { + LOG_TRACE(" minSlackValue = " << minSlackValue << ", minSlackRow = " << minSlackRow << ", minSlackColumn = " << minSlackColumn); // Checking greater than zero here is important since // due to non-associativity of floating point arithmetic // it may be that after adjusting potentials some slacks // are slightly negative. - if (minSlackValue > 0.0) - { + if (minSlackValue > 0.0) { double adjustment = minSlackValue; - for (std::size_t i = 0; i < N; ++i) - { - if (committedRows[i]) - { + for (std::size_t i = 0; i < N; ++i) { + if (committedRows[i]) { rowPotential[i] += adjustment; } } - for (std::size_t j = 0u; j < N; ++j) - { - if (parentRowByCommittedColumn[j] == UNMATCHED) - { + for (std::size_t j = 0u; j < N; ++j) { + if (parentRowByCommittedColumn[j] == UNMATCHED) { minSlackValueByColumn[j] -= adjustment; - } - else - { + } else { columnPotential[j] -= adjustment; } } @@ -307,43 +260,34 @@ bool CAssignment::kuhnMunkres(const TDoubleVecVec &costs, parentRowByCommittedColumn[minSlackColumn] = minSlackRow; pivot = matchRowByColumn[minSlackColumn]; - if (pivot == UNMATCHED) - { + if (pivot == UNMATCHED) { // Update the matching by backtracking. std::size_t committedColumn = minSlackColumn; check = 0u; - for (/**/; check < N; ++check) - { + for (/**/; check < N; ++check) { std::size_t parentRow = parentRowByCommittedColumn[committedColumn]; std::size_t tmp = matchColumnByRow[parentRow]; - match(parentRow, - committedColumn, - matchColumnByRow, - matchRowByColumn); + match(parentRow, committedColumn, matchColumnByRow, matchRowByColumn); committedColumn = tmp; - if (committedColumn == UNMATCHED) - { + if (committedColumn == UNMATCHED) { break; } } - if (check == N) - { - LOG_ERROR("Bad augmenting path: costs = " - << core::CContainerPrinter::print(costs)); + if (check == N) { + LOG_ERROR("Bad augmenting path: costs = " << core::CContainerPrinter::print(costs)); return false; } --unmatched; break; - } - else - { + } else { LOG_TRACE(" pivot = " << pivot); - LOG_TRACE(" parentRowByCommittedColumn = " - << core::CContainerPrinter::print(parentRowByCommittedColumn)); + LOG_TRACE(" parentRowByCommittedColumn = " << core::CContainerPrinter::print(parentRowByCommittedColumn)); // Grow the path to include the pivot row. - grow(costs, rowPotential, columnPotential, + grow(costs, + rowPotential, + columnPotential, parentRowByCommittedColumn, pivot, committedRows, @@ -353,16 +297,12 @@ bool CAssignment::kuhnMunkres(const TDoubleVecVec &costs, minSlackColumn, minSlackValue); LOG_TRACE(" committedRows = " << core::CContainerPrinter::print(committedRows)); - LOG_TRACE(" minSlackRowByColumn = " - << core::CContainerPrinter::print(minSlackRowByColumn)); - LOG_TRACE(" minSlackValueByColumn = " - << core::CContainerPrinter::print(minSlackValueByColumn)); + LOG_TRACE(" minSlackRowByColumn = " << core::CContainerPrinter::print(minSlackRowByColumn)); + LOG_TRACE(" minSlackValueByColumn = " << core::CContainerPrinter::print(minSlackValueByColumn)); } } - if (check == N) - { - LOG_ERROR("Failed to find path: costs " - << core::CContainerPrinter::print(costs)); + if (check == N) { + LOG_ERROR("Failed to find path: costs " << core::CContainerPrinter::print(costs)); return false; } @@ -374,16 +314,13 @@ bool CAssignment::kuhnMunkres(const TDoubleVecVec &costs, // Extract the matching. matching.reserve(std::min(m, n)); - for (std::size_t i = 0u; i < n; ++i) - { - if (matchColumnByRow[i] < m) - { + for (std::size_t i = 0u; i < n; ++i) { + if (matchColumnByRow[i] < m) { matching.emplace_back(i, matchColumnByRow[i]); } } return true; } - } } diff --git a/lib/maths/CBasicStatistics.cc b/lib/maths/CBasicStatistics.cc index ee23f3b8a0..848d3aed20 100644 --- a/lib/maths/CBasicStatistics.cc +++ b/lib/maths/CBasicStatistics.cc @@ -11,32 +11,24 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { -double CBasicStatistics::mean(const TDoubleDoublePr &samples) -{ +double CBasicStatistics::mean(const TDoubleDoublePr& samples) { return 0.5 * (samples.first + samples.second); } -double CBasicStatistics::mean(const TDoubleVec &sample) -{ - return std::accumulate(sample.begin(), sample.end(), 0.0) - / static_cast(sample.size()); +double CBasicStatistics::mean(const TDoubleVec& sample) { + return std::accumulate(sample.begin(), sample.end(), 0.0) / static_cast(sample.size()); } -double CBasicStatistics::median(const TDoubleVec &dataIn) -{ - if (dataIn.empty()) - { +double CBasicStatistics::median(const TDoubleVec& dataIn) { + if (dataIn.empty()) { return 0.0; } std::size_t size{dataIn.size()}; - if (size == 1) - { + if (size == 1) { return dataIn[0]; } @@ -53,17 +45,14 @@ double CBasicStatistics::median(const TDoubleVec &dataIn) size_t index{size / 2}; std::nth_element(data.begin(), data.begin() + index, data.end()); - if (useMean) - { + if (useMean) { // Since the nth element is the second of the two we need to average, // the first element to be averaged will be the largest of all those // before the nth one in the vector. auto left = std::max_element(data.begin(), data.begin() + index); median = (*left + data[index]) / 2.0; - } - else - { + } else { median = data[index]; } @@ -72,7 +61,5 @@ double CBasicStatistics::median(const TDoubleVec &dataIn) const char CBasicStatistics::INTERNAL_DELIMITER(':'); const char CBasicStatistics::EXTERNAL_DELIMITER(';'); - } } - diff --git a/lib/maths/CBjkstUniqueValues.cc b/lib/maths/CBjkstUniqueValues.cc index 355df8b8a5..12570b1f6f 100644 --- a/lib/maths/CBjkstUniqueValues.cc +++ b/lib/maths/CBjkstUniqueValues.cc @@ -19,15 +19,11 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { -namespace -{ -namespace detail -{ +namespace { +namespace detail { using TUInt8Vec = std::vector; using TUInt8VecItr = TUInt8Vec::iterator; @@ -35,13 +31,10 @@ using TUInt8VecCItr = TUInt8Vec::const_iterator; //! Convert the decomposition of the hash into two 8 bit integers //! bask into the original hash value. -inline uint16_t from8Bit(uint8_t leading, uint8_t trailing) -{ +inline uint16_t from8Bit(uint8_t leading, uint8_t trailing) { // The C++ standard says that arithmetic on types smaller than int may be // done by converting to int, so cast this way to avoid compiler warnings - return static_cast( - (static_cast(leading) << 8) + trailing - ); + return static_cast((static_cast(leading) << 8) + trailing); } using TUInt8UInt8Pr = std::pair; @@ -57,101 +50,68 @@ using TUInt8UInt8Pr = std::pair; //! |<-----8 bits---->|<-----8 bits---->|<-----8 bits---->| //! |(g(x) >> 8) % 256| g(x) % 256 | zeros(x) | //! \endcode -class CHashIterator : public std::iterator, - private boost::less_than_comparable> > -{ - public: - //! The STL that comes with g++ requires a default constructor - this - //! will create an object that's suitable only to be assigned to, which - //! is hopefully all g++'s STL does with it! - CHashIterator() : m_Itr() - { - } - - CHashIterator(TUInt8VecItr itr) : m_Itr(itr) - { - } - - TUInt8VecItr base() const - { - return m_Itr; - } - - bool operator==(CHashIterator other) const - { - return m_Itr == other.m_Itr; - } - bool operator!=(CHashIterator other) const - { - return m_Itr != other.m_Itr; - } - bool operator<(CHashIterator other) const - { - return m_Itr < other.m_Itr; - } - - uint16_t operator*() const - { - return from8Bit(*m_Itr, *(m_Itr+1)); - } - const CHashIterator &operator++() - { - m_Itr += 3; - return *this; - } - const CHashIterator operator++(int) - { - CHashIterator result(m_Itr); - m_Itr += 3; - return result; - } - const CHashIterator &operator--() - { - m_Itr -= 3; - return *this; - } - CHashIterator operator--(int) - { - CHashIterator result(m_Itr); - m_Itr -= 3; - return result; - } - uint16_t operator[](ptrdiff_t n) const - { - TUInt8VecCItr itr = m_Itr + 3*n; - return from8Bit(*itr, *(itr + 1)); - } - const CHashIterator &operator+=(ptrdiff_t n) - { - m_Itr += 3*n; - return *this; - } - const CHashIterator &operator-=(ptrdiff_t n) - { - m_Itr -= 3*n; - return *this; - } - ptrdiff_t operator-(const CHashIterator &other) const - { - return (m_Itr - other.m_Itr) / 3; - } +class CHashIterator + : public std::iterator, + private boost::less_than_comparable>> { +public: + //! The STL that comes with g++ requires a default constructor - this + //! will create an object that's suitable only to be assigned to, which + //! is hopefully all g++'s STL does with it! + CHashIterator() : m_Itr() {} + + CHashIterator(TUInt8VecItr itr) : m_Itr(itr) {} + + TUInt8VecItr base() const { return m_Itr; } + + bool operator==(CHashIterator other) const { return m_Itr == other.m_Itr; } + bool operator!=(CHashIterator other) const { return m_Itr != other.m_Itr; } + bool operator<(CHashIterator other) const { return m_Itr < other.m_Itr; } + + uint16_t operator*() const { return from8Bit(*m_Itr, *(m_Itr + 1)); } + const CHashIterator& operator++() { + m_Itr += 3; + return *this; + } + const CHashIterator operator++(int) { + CHashIterator result(m_Itr); + m_Itr += 3; + return result; + } + const CHashIterator& operator--() { + m_Itr -= 3; + return *this; + } + CHashIterator operator--(int) { + CHashIterator result(m_Itr); + m_Itr -= 3; + return result; + } + uint16_t operator[](ptrdiff_t n) const { + TUInt8VecCItr itr = m_Itr + 3 * n; + return from8Bit(*itr, *(itr + 1)); + } + const CHashIterator& operator+=(ptrdiff_t n) { + m_Itr += 3 * n; + return *this; + } + const CHashIterator& operator-=(ptrdiff_t n) { + m_Itr -= 3 * n; + return *this; + } + ptrdiff_t operator-(const CHashIterator& other) const { return (m_Itr - other.m_Itr) / 3; } - private: - TUInt8VecItr m_Itr; +private: + TUInt8VecItr m_Itr; }; -bool insert(TUInt8Vec &b, uint16_t g, uint8_t zeros) -{ +bool insert(TUInt8Vec& b, uint16_t g, uint8_t zeros) { // This uses the fact that the set "b" is laid out as follows: // |<---8 bits--->|<---8 bits--->|<---8 bits--->| // |(g >> 8) % 256| g % 256 | zeros | - CHashIterator lb = std::lower_bound(CHashIterator(b.begin()), - CHashIterator(b.end()), g); - if (lb.base() != b.end() && *lb == g) - { + CHashIterator lb = std::lower_bound(CHashIterator(b.begin()), CHashIterator(b.end()), g); + if (lb.base() != b.end() && *lb == g) { // We've got this value in the set. Update the zeros, // which may have changed if the h hash has changed. *(lb.base() + 2) = zeros; @@ -168,53 +128,42 @@ bool insert(TUInt8Vec &b, uint16_t g, uint8_t zeros) ptrdiff_t i = lb.base() - b.begin(); uint8_t g1 = static_cast(g >> 8); uint8_t g2 = static_cast(g); - LOG_TRACE("Adding g = " << g << " at " << i - << " (g1 = " << static_cast(g1) - << ", g2 = " << static_cast(g2) << ")"); + LOG_TRACE("Adding g = " << g << " at " << i << " (g1 = " << static_cast(g1) << ", g2 = " << static_cast(g2) << ")"); b.insert(lb.base(), 3u, uint8_t()); b[i] = g1; - b[i+1] = g2; - b[i+2] = zeros; + b[i + 1] = g2; + b[i + 2] = zeros; return true; } -void remove(TUInt8Vec &b, uint16_t g) -{ +void remove(TUInt8Vec& b, uint16_t g) { // This uses the fact that the set "b" is laid out as follows: // |<---8 bits--->|<---8 bits--->|<---8 bits--->| // |(g >> 8) % 256| g % 256 | zeros | - CHashIterator lb = std::lower_bound(CHashIterator(b.begin()), - CHashIterator(b.end()), g); - if (lb.base() != b.end() && *lb == g) - { + CHashIterator lb = std::lower_bound(CHashIterator(b.begin()), CHashIterator(b.end()), g); + if (lb.base() != b.end() && *lb == g) { // We've got this value in the set. b.erase(lb.base(), lb.base() + 3); } } -void prune(TUInt8Vec &b, uint8_t z) -{ +void prune(TUInt8Vec& b, uint8_t z) { // This uses the fact that the set "b" is laid out as follows: // |<---8 bits--->|<---8 bits--->|<---8 bits--->| // |(g >> 8) % 256| g % 256 | zeros | std::size_t j = 0u; - for (std::size_t i = 0u; i < b.size(); i += 3) - { - if (b[i + 2] >= z) - { + for (std::size_t i = 0u; i < b.size(); i += 3) { + if (b[i + 2] >= z) { b[j] = b[i]; b[j + 1] = b[i + 1]; b[j + 2] = b[i + 2]; j += 3; - } - else - { - LOG_TRACE("Removing " << from8Bit(b[i], b[i + 1]) - << ", zeros = " << static_cast(b[i + 2]) - << ", z = " << static_cast(z)); + } else { + LOG_TRACE("Removing " << from8Bit(b[i], b[i + 1]) << ", zeros = " << static_cast(b[i + 2]) + << ", z = " << static_cast(z)); } } b.erase(b.begin() + j, b.end()); @@ -239,54 +188,44 @@ const std::string B_TAG("d"); //! Casting conversion to a string. template -class CToString -{ - public: - template - std::string operator()(V value) const - { - return core::CStringUtils::typeToString(static_cast(value)); - } +class CToString { +public: + template + std::string operator()(V value) const { + return core::CStringUtils::typeToString(static_cast(value)); + } }; //! Casting initialization from string. template -class CFromString -{ - public: - template - bool operator()(const std::string &token, V &value) const - { - U value_; - if (core::CStringUtils::stringToType(token, value_)) - { - value = static_cast(value_); - return true; - } - return false; +class CFromString { +public: + template + bool operator()(const std::string& token, V& value) const { + U value_; + if (core::CStringUtils::stringToType(token, value_)) { + value = static_cast(value_); + return true; } + return false; + } }; - } -uint8_t CBjkstUniqueValues::trailingZeros(uint32_t value) -{ - if (value == 0) - { +uint8_t CBjkstUniqueValues::trailingZeros(uint32_t value) { + if (value == 0) { return 32; } // This is just doing a binary search for the first // non-zero bit. - static const uint32_t MASKS[] = { 0xffff, 0xff, 0xf, 0x3, 0x1 }; - static const uint8_t SHIFTS[] = { 16, 8, 4, 2, 1 }; + static const uint32_t MASKS[] = {0xffff, 0xff, 0xf, 0x3, 0x1}; + static const uint8_t SHIFTS[] = {16, 8, 4, 2, 1}; uint8_t result = 0u; - for (std::size_t i = 0u; i < 5; ++i) - { - switch (value & MASKS[i]) - { + for (std::size_t i = 0u; i < 5; ++i) { + switch (value & MASKS[i]) { case 0: value >>= SHIFTS[i]; result = static_cast(result + SHIFTS[i]); @@ -299,43 +238,30 @@ uint8_t CBjkstUniqueValues::trailingZeros(uint32_t value) return result; } -CBjkstUniqueValues::CBjkstUniqueValues(std::size_t numberHashes, std::size_t maxSize) : - m_MaxSize(maxSize), - m_NumberHashes(numberHashes), - m_Sketch(TUInt32Vec()) -{ +CBjkstUniqueValues::CBjkstUniqueValues(std::size_t numberHashes, std::size_t maxSize) + : m_MaxSize(maxSize), m_NumberHashes(numberHashes), m_Sketch(TUInt32Vec()) { } -CBjkstUniqueValues::CBjkstUniqueValues(core::CStateRestoreTraverser &traverser) : - m_MaxSize(0), - m_NumberHashes(0) -{ +CBjkstUniqueValues::CBjkstUniqueValues(core::CStateRestoreTraverser& traverser) : m_MaxSize(0), m_NumberHashes(0) { traverser.traverseSubLevel(boost::bind(&CBjkstUniqueValues::acceptRestoreTraverser, this, _1)); } -void CBjkstUniqueValues::swap(CBjkstUniqueValues &other) -{ - if (this == &other) - { +void CBjkstUniqueValues::swap(CBjkstUniqueValues& other) { + if (this == &other) { return; } std::swap(m_MaxSize, other.m_MaxSize); std::swap(m_NumberHashes, other.m_NumberHashes); - try - { - TUInt32Vec *values = boost::get(&m_Sketch); - if (values) - { - TUInt32Vec *otherValues = boost::get(&other.m_Sketch); - if (otherValues) - { + try { + TUInt32Vec* values = boost::get(&m_Sketch); + if (values) { + TUInt32Vec* otherValues = boost::get(&other.m_Sketch); + if (otherValues) { values->swap(*otherValues); - } - else - { - SSketch &otherSketch = boost::get(other.m_Sketch); + } else { + SSketch& otherSketch = boost::get(other.m_Sketch); TUInt32Vec tmp; tmp.swap(*values); m_Sketch = SSketch(); @@ -343,18 +269,13 @@ void CBjkstUniqueValues::swap(CBjkstUniqueValues &other) other.m_Sketch = TUInt32Vec(); boost::get(other.m_Sketch).swap(tmp); } - } - else - { - SSketch &sketch = boost::get(m_Sketch); - SSketch *otherSketch = boost::get(&other.m_Sketch); - if (otherSketch) - { + } else { + SSketch& sketch = boost::get(m_Sketch); + SSketch* otherSketch = boost::get(&other.m_Sketch); + if (otherSketch) { sketch.swap(*otherSketch); - } - else - { - TUInt32Vec &otherValues = boost::get(other.m_Sketch); + } else { + TUInt32Vec& otherValues = boost::get(other.m_Sketch); TUInt32Vec tmp; tmp.swap(otherValues); other.m_Sketch = SSketch(); @@ -363,243 +284,162 @@ void CBjkstUniqueValues::swap(CBjkstUniqueValues &other) boost::get(m_Sketch).swap(tmp); } } - } - catch (const std::exception &e) - { - LOG_ABORT("Unexpected exception: " << e.what()); - } + } catch (const std::exception& e) { LOG_ABORT("Unexpected exception: " << e.what()); } } -bool CBjkstUniqueValues::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name = traverser.name(); +bool CBjkstUniqueValues::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); RESTORE_BUILT_IN(MAX_SIZE_TAG, m_MaxSize) RESTORE_BUILT_IN(NUMBER_HASHES_TAG, m_NumberHashes) - if (name == VALUES_TAG) - { + if (name == VALUES_TAG) { m_Sketch = TUInt32Vec(); - TUInt32Vec &values = boost::get(m_Sketch); - if (core::CPersistUtils::fromString(traverser.value(), values, DELIMITER) == false) - { + TUInt32Vec& values = boost::get(m_Sketch); + if (core::CPersistUtils::fromString(traverser.value(), values, DELIMITER) == false) { return false; } continue; } - if (name == SKETCH_TAG) - { + if (name == SKETCH_TAG) { m_Sketch = SSketch(); - SSketch &sketch = boost::get(m_Sketch); + SSketch& sketch = boost::get(m_Sketch); sketch.s_G.reserve(m_NumberHashes); sketch.s_H.reserve(m_NumberHashes); sketch.s_Z.reserve(m_NumberHashes); sketch.s_B.reserve(m_NumberHashes); - if (traverser.traverseSubLevel(boost::bind(&SSketch::acceptRestoreTraverser, - &sketch, _1, m_NumberHashes)) == false) - { + if (traverser.traverseSubLevel(boost::bind(&SSketch::acceptRestoreTraverser, &sketch, _1, m_NumberHashes)) == false) { return false; } continue; } - } - while (traverser.next()); + } while (traverser.next()); return true; } -void CBjkstUniqueValues::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CBjkstUniqueValues::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(MAX_SIZE_TAG, m_MaxSize); inserter.insertValue(NUMBER_HASHES_TAG, m_NumberHashes); - const TUInt32Vec *values = boost::get(&m_Sketch); - if (values) - { + const TUInt32Vec* values = boost::get(&m_Sketch); + if (values) { inserter.insertValue(VALUES_TAG, core::CPersistUtils::toString(*values, DELIMITER)); - } - else - { - try - { - const SSketch &sketch = boost::get(m_Sketch); + } else { + try { + const SSketch& sketch = boost::get(m_Sketch); inserter.insertLevel(SKETCH_TAG, boost::bind(&SSketch::acceptPersistInserter, &sketch, _1)); - } - catch (const std::exception &e) - { - LOG_ABORT("Unexpected exception: " << e.what()); - } + } catch (const std::exception& e) { LOG_ABORT("Unexpected exception: " << e.what()); } } } -void CBjkstUniqueValues::add(uint32_t value) -{ - TUInt32Vec *values = boost::get(&m_Sketch); - if (values) - { +void CBjkstUniqueValues::add(uint32_t value) { + TUInt32Vec* values = boost::get(&m_Sketch); + if (values) { TUInt32VecItr i = std::lower_bound(values->begin(), values->end(), value); - if (i == values->end() || *i != value) - { + if (i == values->end() || *i != value) { values->insert(i, value); } this->sketch(); - } - else - { - try - { - SSketch &sketch = boost::get(m_Sketch); + } else { + try { + SSketch& sketch = boost::get(m_Sketch); sketch.add(m_MaxSize, value); - } - catch (const std::exception &e) - { - LOG_ABORT("Unexpected exception: " << e.what()); - } + } catch (const std::exception& e) { LOG_ABORT("Unexpected exception: " << e.what()); } } } -void CBjkstUniqueValues::remove(uint32_t value) -{ - TUInt32Vec *values = boost::get(&m_Sketch); - if (values) - { +void CBjkstUniqueValues::remove(uint32_t value) { + TUInt32Vec* values = boost::get(&m_Sketch); + if (values) { TUInt32VecItr i = std::lower_bound(values->begin(), values->end(), value); - if (i != values->end() && *i == value) - { + if (i != values->end() && *i == value) { values->erase(i); } - } - else - { - try - { - SSketch &sketch = boost::get(m_Sketch); + } else { + try { + SSketch& sketch = boost::get(m_Sketch); sketch.remove(value); - } - catch (const std::exception &e) - { - LOG_ABORT("Unexpected exception: " << e.what()); - } + } catch (const std::exception& e) { LOG_ABORT("Unexpected exception: " << e.what()); } } } -uint32_t CBjkstUniqueValues::number() const -{ - const TUInt32Vec *values = boost::get(&m_Sketch); - if (values == 0) - { - try - { - const SSketch &sketch = boost::get(m_Sketch); +uint32_t CBjkstUniqueValues::number() const { + const TUInt32Vec* values = boost::get(&m_Sketch); + if (values == 0) { + try { + const SSketch& sketch = boost::get(m_Sketch); return sketch.number(); - } - catch (const std::exception &e) - { - LOG_ABORT("Unexpected exception: " << e.what()); - } + } catch (const std::exception& e) { LOG_ABORT("Unexpected exception: " << e.what()); } } return static_cast(values->size()); } -uint64_t CBjkstUniqueValues::checksum(uint64_t seed) const -{ +uint64_t CBjkstUniqueValues::checksum(uint64_t seed) const { seed = CChecksum::calculate(seed, m_MaxSize); seed = CChecksum::calculate(seed, m_NumberHashes); - const TUInt32Vec *values = boost::get(&m_Sketch); - if (values == 0) - { - try - { - const SSketch &sketch = boost::get(m_Sketch); + const TUInt32Vec* values = boost::get(&m_Sketch); + if (values == 0) { + try { + const SSketch& sketch = boost::get(m_Sketch); seed = CChecksum::calculate(seed, sketch.s_G); seed = CChecksum::calculate(seed, sketch.s_H); seed = CChecksum::calculate(seed, sketch.s_Z); return CChecksum::calculate(seed, sketch.s_B); - } - catch (const std::exception &e) - { - LOG_ABORT("Unexpected exception: " << e.what()); - } + } catch (const std::exception& e) { LOG_ABORT("Unexpected exception: " << e.what()); } } return CChecksum::calculate(seed, *values); } -void CBjkstUniqueValues::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CBjkstUniqueValues::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CBjkstUniqueValues"); - const TUInt32Vec *values = boost::get(&m_Sketch); - if (values) - { + const TUInt32Vec* values = boost::get(&m_Sketch); + if (values) { core::CMemoryDebug::dynamicSize("values", *values, mem); - } - else - { - try - { - const SSketch &sketch = boost::get(m_Sketch); + } else { + try { + const SSketch& sketch = boost::get(m_Sketch); mem->addItem("SSketch", sizeof(SSketch)); - core::CMemoryDebug::dynamicSize("sketch.s_G", sketch.s_G, mem); + core::CMemoryDebug::dynamicSize("sketch.s_G", sketch.s_G, mem); core::CMemoryDebug::dynamicSize("sketch.s_H", sketch.s_H, mem); core::CMemoryDebug::dynamicSize("sketch.s_Z", sketch.s_Z, mem); core::CMemoryDebug::dynamicSize("sketch.s_B", sketch.s_B, mem); - } - catch (const std::exception &e) - { - LOG_ABORT("Unexpected exception: " << e.what()); - } + } catch (const std::exception& e) { LOG_ABORT("Unexpected exception: " << e.what()); } } } -std::size_t CBjkstUniqueValues::memoryUsage() const -{ +std::size_t CBjkstUniqueValues::memoryUsage() const { std::size_t mem = 0; - const TUInt32Vec *values = boost::get(&m_Sketch); - if (values) - { + const TUInt32Vec* values = boost::get(&m_Sketch); + if (values) { mem += core::CMemory::dynamicSize(*values); - } - else - { - try - { - const SSketch &sketch = boost::get(m_Sketch); + } else { + try { + const SSketch& sketch = boost::get(m_Sketch); mem += sizeof(SSketch); mem += core::CMemory::dynamicSize(sketch.s_G); mem += core::CMemory::dynamicSize(sketch.s_H); mem += core::CMemory::dynamicSize(sketch.s_Z); mem += core::CMemory::dynamicSize(sketch.s_B); - } - catch (const std::exception &e) - { - LOG_ABORT("Unexpected exception: " << e.what()); - } + } catch (const std::exception& e) { LOG_ABORT("Unexpected exception: " << e.what()); } } return mem; } -void CBjkstUniqueValues::sketch() -{ - static const std::size_t UINT8_SIZE = sizeof(uint8_t); +void CBjkstUniqueValues::sketch() { + static const std::size_t UINT8_SIZE = sizeof(uint8_t); static const std::size_t UINT32_SIZE = sizeof(uint32_t); - static const std::size_t HASH_SIZE = sizeof(core::CHashing::CUniversalHash::CUInt32UnrestrictedHash); - static const std::size_t VEC8_SIZE = sizeof(TUInt8Vec); - static const std::size_t VEC32_SIZE = sizeof(TUInt32Vec); + static const std::size_t HASH_SIZE = sizeof(core::CHashing::CUniversalHash::CUInt32UnrestrictedHash); + static const std::size_t VEC8_SIZE = sizeof(TUInt8Vec); + static const std::size_t VEC32_SIZE = sizeof(TUInt32Vec); static const std::size_t SKETCH_SIZE = sizeof(SSketch); - TUInt32Vec *values = boost::get(&m_Sketch); - if (values) - { + TUInt32Vec* values = boost::get(&m_Sketch); + if (values) { std::size_t valuesSize = VEC32_SIZE + UINT32_SIZE * values->capacity(); - std::size_t sketchSize = SKETCH_SIZE - + m_NumberHashes * ( 2 * HASH_SIZE - + 1 * UINT8_SIZE - + 1 * VEC8_SIZE - + 3 * m_MaxSize * UINT8_SIZE); - if (valuesSize > sketchSize) - { - if ( values->capacity() > values->size() - && values->size() < (sketchSize - VEC32_SIZE) / UINT32_SIZE) - { + std::size_t sketchSize = + SKETCH_SIZE + m_NumberHashes * (2 * HASH_SIZE + 1 * UINT8_SIZE + 1 * VEC8_SIZE + 3 * m_MaxSize * UINT8_SIZE); + if (valuesSize > sketchSize) { + if (values->capacity() > values->size() && values->size() < (sketchSize - VEC32_SIZE) / UINT32_SIZE) { TUInt32Vec shrunk; shrunk.reserve((sketchSize - VEC32_SIZE) / UINT32_SIZE); shrunk.assign(values->begin(), values->end()); @@ -612,200 +452,136 @@ void CBjkstUniqueValues::sketch() TUInt32Vec values_; values_.swap(*values); m_Sketch = SSketch(m_NumberHashes); - for (std::size_t i = 0u; i < values_.size(); ++i) - { + for (std::size_t i = 0u; i < values_.size(); ++i) { this->add(values_[i]); } } } } -CBjkstUniqueValues::SSketch::SSketch() -{ +CBjkstUniqueValues::SSketch::SSketch() { } -CBjkstUniqueValues::SSketch::SSketch(std::size_t numberHashes) -{ +CBjkstUniqueValues::SSketch::SSketch(std::size_t numberHashes) { core::CHashing::CUniversalHash::generateHashes(numberHashes, s_G); core::CHashing::CUniversalHash::generateHashes(numberHashes, s_H); s_Z.resize(numberHashes, 0); s_B.resize(numberHashes, TUInt8Vec()); } -void CBjkstUniqueValues::SSketch::swap(SSketch &other) -{ +void CBjkstUniqueValues::SSketch::swap(SSketch& other) { s_G.swap(other.s_G); s_H.swap(other.s_H); s_Z.swap(other.s_Z); s_B.swap(other.s_B); } -bool CBjkstUniqueValues::SSketch::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser, - std::size_t numberHashes) -{ +bool CBjkstUniqueValues::SSketch::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser, std::size_t numberHashes) { core::CHashing::CUniversalHash::CFromString hashFromString(PAIR_DELIMITER); - do - { - const std::string &name = traverser.name(); - if (name == HASH_G_TAG) - { - if ( core::CPersistUtils::fromString(traverser.value(), - hashFromString, - s_G, - DELIMITER) == false - || s_G.size() != numberHashes) - { + do { + const std::string& name = traverser.name(); + if (name == HASH_G_TAG) { + if (core::CPersistUtils::fromString(traverser.value(), hashFromString, s_G, DELIMITER) == false || s_G.size() != numberHashes) { LOG_ERROR("Invalid hashes in " << traverser.value()); return false; } - } - else if (name == HASH_H_TAG) - { - if ( core::CPersistUtils::fromString(traverser.value(), - hashFromString, - s_H, - DELIMITER) == false - || s_H.size() != numberHashes) - { + } else if (name == HASH_H_TAG) { + if (core::CPersistUtils::fromString(traverser.value(), hashFromString, s_H, DELIMITER) == false || s_H.size() != numberHashes) { LOG_ERROR("Invalid hashes in " << traverser.value()); return false; } - } - else if (name == Z_TAG) - { - if ( core::CPersistUtils::fromString(traverser.value(), - CFromString(), - s_Z, - DELIMITER) == false - || s_Z.size() != numberHashes) - { + } else if (name == Z_TAG) { + if (core::CPersistUtils::fromString(traverser.value(), CFromString(), s_Z, DELIMITER) == false || + s_Z.size() != numberHashes) { LOG_ERROR("Invalid zeros in " << traverser.value()); return false; } - } - else if (name == B_TAG) - { + } else if (name == B_TAG) { s_B.push_back(TUInt8Vec()); - if (core::CPersistUtils::fromString(traverser.value(), - CFromString(), - s_B.back(), - DELIMITER) == false) - { + if (core::CPersistUtils::fromString(traverser.value(), CFromString(), s_B.back(), DELIMITER) == false) { LOG_ERROR("Invalid values in " << traverser.value()); return false; } } - } - while (traverser.next()); + } while (traverser.next()); - if (s_B.size() != numberHashes) - { - LOG_ERROR("Invalid number of rows " << s_B.size() - << " expected " << numberHashes); + if (s_B.size() != numberHashes) { + LOG_ERROR("Invalid number of rows " << s_B.size() << " expected " << numberHashes); return false; - } return true; } -void CBjkstUniqueValues::SSketch::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CBjkstUniqueValues::SSketch::acceptPersistInserter(core::CStatePersistInserter& inserter) const { core::CHashing::CUniversalHash::CToString hashToString(PAIR_DELIMITER); - inserter.insertValue(HASH_G_TAG, - core::CPersistUtils::toString(s_G, hashToString, DELIMITER)); - inserter.insertValue(HASH_H_TAG, - core::CPersistUtils::toString(s_H, hashToString, DELIMITER)); + inserter.insertValue(HASH_G_TAG, core::CPersistUtils::toString(s_G, hashToString, DELIMITER)); + inserter.insertValue(HASH_H_TAG, core::CPersistUtils::toString(s_H, hashToString, DELIMITER)); inserter.insertValue(Z_TAG, core::CPersistUtils::toString(s_Z, CToString(), DELIMITER)); - for (std::size_t i = 0u; i < s_B.size(); ++i) - { + for (std::size_t i = 0u; i < s_B.size(); ++i) { inserter.insertValue(B_TAG, core::CPersistUtils::toString(s_B[i], CToString(), DELIMITER)); } } -void CBjkstUniqueValues::SSketch::add(std::size_t maxSize, - uint32_t value) -{ +void CBjkstUniqueValues::SSketch::add(std::size_t maxSize, uint32_t value) { LOG_TRACE("Adding " << value); - for (std::size_t i = 0u; i < s_Z.size(); ++i) - { + for (std::size_t i = 0u; i < s_Z.size(); ++i) { uint8_t zeros = trailingZeros((s_H[i])(value)); - if (zeros >= s_Z[i]) - { - TUInt8Vec &b = s_B[i]; + if (zeros >= s_Z[i]) { + TUInt8Vec& b = s_B[i]; uint16_t g = static_cast((s_G[i])(value)); - LOG_TRACE("g = " << g - << ", zeros = " << static_cast(zeros)); - if (detail::insert(b, g, zeros)) - { - while (b.size() >= 3 * maxSize) - { + LOG_TRACE("g = " << g << ", zeros = " << static_cast(zeros)); + if (detail::insert(b, g, zeros)) { + while (b.size() >= 3 * maxSize) { ++s_Z[i]; detail::prune(b, s_Z[i]); } - if (b.capacity() >= 3 * maxSize) - { + if (b.capacity() >= 3 * maxSize) { TUInt8Vec shrunk; shrunk.reserve(3 * maxSize); shrunk.assign(b.begin(), b.end()); b.swap(shrunk); } - LOG_TRACE("|B| = " << b.size() - << ", z = " << static_cast(s_Z[i])); + LOG_TRACE("|B| = " << b.size() << ", z = " << static_cast(s_Z[i])); } } } } -void CBjkstUniqueValues::SSketch::remove(uint32_t value) -{ - for (std::size_t i = 0u; i < s_Z.size(); ++i) - { +void CBjkstUniqueValues::SSketch::remove(uint32_t value) { + for (std::size_t i = 0u; i < s_Z.size(); ++i) { uint8_t zeros = trailingZeros((s_H[i])(value)); - if (zeros >= s_Z[i]) - { - TUInt8Vec &b = s_B[i]; + if (zeros >= s_Z[i]) { + TUInt8Vec& b = s_B[i]; uint16_t g = static_cast((s_G[i])(value)); - LOG_TRACE("g = " << g - << ", zeros = " << static_cast(zeros)); + LOG_TRACE("g = " << g << ", zeros = " << static_cast(zeros)); detail::remove(b, g); } } } -uint32_t CBjkstUniqueValues::SSketch::number() const -{ +uint32_t CBjkstUniqueValues::SSketch::number() const { using TUInt32Vec = std::vector; // This uses the median trick to reduce the error. TUInt32Vec estimates; estimates.reserve(s_Z.size()); - for (std::size_t i = 0u; i < s_Z.size(); ++i) - { - LOG_TRACE("|B| = " << s_B[i].size() - << ", z = " << static_cast(s_Z[i])); - estimates.push_back(static_cast(s_B[i].size() / 3) - * (1 << s_Z[i])); + for (std::size_t i = 0u; i < s_Z.size(); ++i) { + LOG_TRACE("|B| = " << s_B[i].size() << ", z = " << static_cast(s_Z[i])); + estimates.push_back(static_cast(s_B[i].size() / 3) * (1 << s_Z[i])); } - LOG_TRACE("estimates = " - << core::CContainerPrinter::print(estimates)); + LOG_TRACE("estimates = " << core::CContainerPrinter::print(estimates)); std::size_t n = estimates.size(); - if (n % 2 == 0) - { - std::partial_sort(estimates.begin(), - estimates.begin() + n/2 + 1, - estimates.end()); - return (estimates[n/2] + estimates[n/2-1]) / 2; + if (n % 2 == 0) { + std::partial_sort(estimates.begin(), estimates.begin() + n / 2 + 1, estimates.end()); + return (estimates[n / 2] + estimates[n / 2 - 1]) / 2; } - std::nth_element(estimates.begin(), - estimates.begin() + n/2, - estimates.end()); - return estimates[n/2]; + std::nth_element(estimates.begin(), estimates.begin() + n / 2, estimates.end()); + return estimates[n / 2]; } - } } diff --git a/lib/maths/CCalendarComponent.cc b/lib/maths/CCalendarComponent.cc index ce27e66366..93dc1f9941 100644 --- a/lib/maths/CCalendarComponent.cc +++ b/lib/maths/CCalendarComponent.cc @@ -7,10 +7,10 @@ #include #include -#include #include #include #include +#include #include #include @@ -25,193 +25,157 @@ #include #include -namespace ml -{ -namespace maths -{ -namespace -{ +namespace ml { +namespace maths { +namespace { using TDoubleDoublePr = maths_t::TDoubleDoublePr; const std::string DECOMPOSITION_COMPONENT_TAG{"a"}; const std::string BUCKETING_TAG{"b"}; const std::string EMPTY_STRING; } -CCalendarComponent::CCalendarComponent(const CCalendarFeature &feature, +CCalendarComponent::CCalendarComponent(const CCalendarFeature& feature, std::size_t maxSize, double decayRate, double minimumBucketLength, CSplineTypes::EBoundaryCondition boundaryCondition, CSplineTypes::EType valueInterpolationType, - CSplineTypes::EType varianceInterpolationType) : - CDecompositionComponent{maxSize, boundaryCondition, valueInterpolationType, varianceInterpolationType}, - m_Bucketing{feature, decayRate, minimumBucketLength} -{} + CSplineTypes::EType varianceInterpolationType) + : CDecompositionComponent{maxSize, boundaryCondition, valueInterpolationType, varianceInterpolationType}, + m_Bucketing{feature, decayRate, minimumBucketLength} { +} CCalendarComponent::CCalendarComponent(double decayRate, double minimumBucketLength, - core::CStateRestoreTraverser &traverser, + core::CStateRestoreTraverser& traverser, CSplineTypes::EType valueInterpolationType, - CSplineTypes::EType varianceInterpolationType) : - CDecompositionComponent{0, CSplineTypes::E_Periodic, valueInterpolationType, varianceInterpolationType} -{ - traverser.traverseSubLevel(boost::bind(&CCalendarComponent::acceptRestoreTraverser, - this, decayRate, minimumBucketLength, _1)); + CSplineTypes::EType varianceInterpolationType) + : CDecompositionComponent{0, CSplineTypes::E_Periodic, valueInterpolationType, varianceInterpolationType} { + traverser.traverseSubLevel(boost::bind(&CCalendarComponent::acceptRestoreTraverser, this, decayRate, minimumBucketLength, _1)); } -void CCalendarComponent::swap(CCalendarComponent &other) -{ +void CCalendarComponent::swap(CCalendarComponent& other) { this->CDecompositionComponent::swap(other); m_Bucketing.swap(other.m_Bucketing); } -bool CCalendarComponent::acceptRestoreTraverser(double decayRate, - double minimumBucketLength, - core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name{traverser.name()}; +bool CCalendarComponent::acceptRestoreTraverser(double decayRate, double minimumBucketLength, core::CStateRestoreTraverser& traverser) { + do { + const std::string& name{traverser.name()}; RESTORE(DECOMPOSITION_COMPONENT_TAG, - traverser.traverseSubLevel(boost::bind(&CDecompositionComponent::acceptRestoreTraverser, - static_cast(this), _1))) + traverser.traverseSubLevel( + boost::bind(&CDecompositionComponent::acceptRestoreTraverser, static_cast(this), _1))) RESTORE_SETUP_TEARDOWN(BUCKETING_TAG, CCalendarComponentAdaptiveBucketing bucketing(decayRate, minimumBucketLength, traverser), true, m_Bucketing.swap(bucketing)) - } - while (traverser.next()); + } while (traverser.next()); return true; } -void CCalendarComponent::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ - inserter.insertLevel(DECOMPOSITION_COMPONENT_TAG, - boost::bind(&CDecompositionComponent::acceptPersistInserter, - static_cast(this), _1)); - inserter.insertLevel(BUCKETING_TAG, boost::bind( - &CCalendarComponentAdaptiveBucketing::acceptPersistInserter, &m_Bucketing, _1)); +void CCalendarComponent::acceptPersistInserter(core::CStatePersistInserter& inserter) const { + inserter.insertLevel( + DECOMPOSITION_COMPONENT_TAG, + boost::bind(&CDecompositionComponent::acceptPersistInserter, static_cast(this), _1)); + inserter.insertLevel(BUCKETING_TAG, boost::bind(&CCalendarComponentAdaptiveBucketing::acceptPersistInserter, &m_Bucketing, _1)); } -bool CCalendarComponent::initialized() const -{ +bool CCalendarComponent::initialized() const { return this->CDecompositionComponent::initialized(); } -void CCalendarComponent::initialize() -{ +void CCalendarComponent::initialize() { this->clear(); m_Bucketing.initialize(this->maxSize()); } -std::size_t CCalendarComponent::size() const -{ +std::size_t CCalendarComponent::size() const { return m_Bucketing.size(); } -void CCalendarComponent::clear() -{ +void CCalendarComponent::clear() { this->CDecompositionComponent::clear(); - if (m_Bucketing.initialized()) - { + if (m_Bucketing.initialized()) { m_Bucketing.clear(); } } -void CCalendarComponent::linearScale(core_t::TTime time, double scale) -{ +void CCalendarComponent::linearScale(core_t::TTime time, double scale) { m_Bucketing.linearScale(scale); this->interpolate(time, false); } -void CCalendarComponent::add(core_t::TTime time, double value, double weight) -{ +void CCalendarComponent::add(core_t::TTime time, double value, double weight) { m_Bucketing.add(time, value, weight); } -void CCalendarComponent::interpolate(core_t::TTime time, bool refine) -{ - if (refine) - { +void CCalendarComponent::interpolate(core_t::TTime time, bool refine) { + if (refine) { m_Bucketing.refine(time); } TDoubleVec knots; TDoubleVec values; TDoubleVec variances; - if (m_Bucketing.knots(time, this->boundaryCondition(), knots, values, variances)) - { + if (m_Bucketing.knots(time, this->boundaryCondition(), knots, values, variances)) { this->CDecompositionComponent::interpolate(knots, values, variances); } } -double CCalendarComponent::decayRate() const -{ +double CCalendarComponent::decayRate() const { return m_Bucketing.decayRate(); } -void CCalendarComponent::decayRate(double decayRate) -{ +void CCalendarComponent::decayRate(double decayRate) { return m_Bucketing.decayRate(decayRate); } -void CCalendarComponent::propagateForwardsByTime(double time) -{ +void CCalendarComponent::propagateForwardsByTime(double time) { m_Bucketing.propagateForwardsByTime(time); } -CCalendarFeature CCalendarComponent::feature() const -{ +CCalendarFeature CCalendarComponent::feature() const { return m_Bucketing.feature(); } -TDoubleDoublePr CCalendarComponent::value(core_t::TTime time, double confidence) const -{ +TDoubleDoublePr CCalendarComponent::value(core_t::TTime time, double confidence) const { double offset{static_cast(this->feature().offset(time))}; double n{m_Bucketing.count(time)}; return this->CDecompositionComponent::value(offset, n, confidence); } -double CCalendarComponent::meanValue() const -{ +double CCalendarComponent::meanValue() const { return this->CDecompositionComponent::meanValue(); } -TDoubleDoublePr CCalendarComponent::variance(core_t::TTime time, double confidence) const -{ +TDoubleDoublePr CCalendarComponent::variance(core_t::TTime time, double confidence) const { double offset{static_cast(this->feature().offset(time))}; double n{m_Bucketing.count(time)}; return this->CDecompositionComponent::variance(offset, n, confidence); } -double CCalendarComponent::meanVariance() const -{ +double CCalendarComponent::meanVariance() const { return this->CDecompositionComponent::meanVariance(); } -double CCalendarComponent::heteroscedasticity() const -{ +double CCalendarComponent::heteroscedasticity() const { return this->CDecompositionComponent::heteroscedasticity(); } -uint64_t CCalendarComponent::checksum(uint64_t seed) const -{ +uint64_t CCalendarComponent::checksum(uint64_t seed) const { seed = this->CDecompositionComponent::checksum(seed); return CChecksum::calculate(seed, m_Bucketing); } -void CCalendarComponent::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CCalendarComponent::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CCalendarComponent"); core::CMemoryDebug::dynamicSize("m_Bucketing", m_Bucketing, mem); core::CMemoryDebug::dynamicSize("m_Splines", this->splines(), mem); } -std::size_t CCalendarComponent::memoryUsage() const -{ +std::size_t CCalendarComponent::memoryUsage() const { return core::CMemory::dynamicSize(m_Bucketing) + core::CMemory::dynamicSize(this->splines()); } - } } diff --git a/lib/maths/CCalendarComponentAdaptiveBucketing.cc b/lib/maths/CCalendarComponentAdaptiveBucketing.cc index afccd07bfb..6c7293dfac 100644 --- a/lib/maths/CCalendarComponentAdaptiveBucketing.cc +++ b/lib/maths/CCalendarComponentAdaptiveBucketing.cc @@ -25,19 +25,15 @@ #include #include -namespace ml -{ -namespace maths -{ -namespace -{ +namespace ml { +namespace maths { +namespace { using TFloatMeanVarAccumulator = CCalendarComponentAdaptiveBucketing::TFloatMeanVarAccumulator; //! Clear a vector and recover its memory. template -void clearAndShrink(std::vector &vector) -{ +void clearAndShrink(std::vector& vector) { std::vector empty; empty.swap(vector); } @@ -46,56 +42,46 @@ const std::string ADAPTIVE_BUCKETING_TAG{"a"}; const std::string FEATURE_TAG{"b"}; const std::string VALUES_TAG{"c"}; const std::string EMPTY_STRING; - } -CCalendarComponentAdaptiveBucketing::CCalendarComponentAdaptiveBucketing() : - CAdaptiveBucketing{0.0, 0.0} -{} +CCalendarComponentAdaptiveBucketing::CCalendarComponentAdaptiveBucketing() : CAdaptiveBucketing{0.0, 0.0} { +} CCalendarComponentAdaptiveBucketing::CCalendarComponentAdaptiveBucketing(CCalendarFeature feature, double decayRate, - double minimumBucketLength) : - CAdaptiveBucketing{decayRate, minimumBucketLength}, - m_Feature{feature} -{} + double minimumBucketLength) + : CAdaptiveBucketing{decayRate, minimumBucketLength}, m_Feature{feature} { +} CCalendarComponentAdaptiveBucketing::CCalendarComponentAdaptiveBucketing(double decayRate, double minimumBucketLength, - core::CStateRestoreTraverser &traverser) : - CAdaptiveBucketing{decayRate, minimumBucketLength} -{ + core::CStateRestoreTraverser& traverser) + : CAdaptiveBucketing{decayRate, minimumBucketLength} { traverser.traverseSubLevel(boost::bind(&CCalendarComponentAdaptiveBucketing::acceptRestoreTraverser, this, _1)); } -void CCalendarComponentAdaptiveBucketing::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CCalendarComponentAdaptiveBucketing::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertLevel(ADAPTIVE_BUCKETING_TAG, - boost::bind(&CAdaptiveBucketing::acceptPersistInserter, - static_cast(this), _1)); + boost::bind(&CAdaptiveBucketing::acceptPersistInserter, static_cast(this), _1)); inserter.insertValue(FEATURE_TAG, m_Feature.toDelimited()); core::CPersistUtils::persist(VALUES_TAG, m_Values, inserter); } -void CCalendarComponentAdaptiveBucketing::swap(CCalendarComponentAdaptiveBucketing &other) -{ +void CCalendarComponentAdaptiveBucketing::swap(CCalendarComponentAdaptiveBucketing& other) { this->CAdaptiveBucketing::swap(other); std::swap(m_Feature, other.m_Feature); m_Values.swap(other.m_Values); } -bool CCalendarComponentAdaptiveBucketing::initialized() const -{ +bool CCalendarComponentAdaptiveBucketing::initialized() const { return this->CAdaptiveBucketing::initialized(); } -bool CCalendarComponentAdaptiveBucketing::initialize(std::size_t n) -{ +bool CCalendarComponentAdaptiveBucketing::initialize(std::size_t n) { double a{0.0}; double b{static_cast(m_Feature.window())}; - if (this->CAdaptiveBucketing::initialize(a, b, n)) - { + if (this->CAdaptiveBucketing::initialize(a, b, n)) { m_Values.clear(); m_Values.resize(this->size()); return true; @@ -103,92 +89,72 @@ bool CCalendarComponentAdaptiveBucketing::initialize(std::size_t n) return false; } -std::size_t CCalendarComponentAdaptiveBucketing::size() const -{ +std::size_t CCalendarComponentAdaptiveBucketing::size() const { return this->CAdaptiveBucketing::size(); } -void CCalendarComponentAdaptiveBucketing::clear() -{ +void CCalendarComponentAdaptiveBucketing::clear() { this->CAdaptiveBucketing::clear(); clearAndShrink(m_Values); } -void CCalendarComponentAdaptiveBucketing::linearScale(double scale) -{ - for (auto &value : m_Values) - { +void CCalendarComponentAdaptiveBucketing::linearScale(double scale) { + for (auto& value : m_Values) { CBasicStatistics::moment<0>(value) *= scale; } } -void CCalendarComponentAdaptiveBucketing::add(core_t::TTime time, double value, double weight) -{ +void CCalendarComponentAdaptiveBucketing::add(core_t::TTime time, double value, double weight) { std::size_t bucket{0}; - if (this->initialized() && this->bucket(time, bucket)) - { + if (this->initialized() && this->bucket(time, bucket)) { this->CAdaptiveBucketing::add(bucket, time, weight); TFloatMeanVarAccumulator variance{m_Values[bucket]}; variance.add(value, weight * weight); m_Values[bucket].add(value, weight); - CBasicStatistics::moment<1>(m_Values[bucket]) = - CBasicStatistics::maximumLikelihoodVariance(variance); + CBasicStatistics::moment<1>(m_Values[bucket]) = CBasicStatistics::maximumLikelihoodVariance(variance); } } -CCalendarFeature CCalendarComponentAdaptiveBucketing::feature() const -{ +CCalendarFeature CCalendarComponentAdaptiveBucketing::feature() const { return m_Feature; } -void CCalendarComponentAdaptiveBucketing::decayRate(double value) -{ +void CCalendarComponentAdaptiveBucketing::decayRate(double value) { this->CAdaptiveBucketing::decayRate(value); } -double CCalendarComponentAdaptiveBucketing::decayRate() const -{ +double CCalendarComponentAdaptiveBucketing::decayRate() const { return this->CAdaptiveBucketing::decayRate(); } -void CCalendarComponentAdaptiveBucketing::propagateForwardsByTime(double time) -{ - if (time < 0.0) - { +void CCalendarComponentAdaptiveBucketing::propagateForwardsByTime(double time) { + if (time < 0.0) { LOG_ERROR("Can't propagate bucketing backwards in time"); - } - else if (this->initialized()) - { + } else if (this->initialized()) { double factor{::exp(-this->CAdaptiveBucketing::decayRate() * time)}; this->CAdaptiveBucketing::age(factor); - for (auto &value : m_Values) - { + for (auto& value : m_Values) { value.age(factor); } } } -double CCalendarComponentAdaptiveBucketing::minimumBucketLength() const -{ +double CCalendarComponentAdaptiveBucketing::minimumBucketLength() const { return this->CAdaptiveBucketing::minimumBucketLength(); } -void CCalendarComponentAdaptiveBucketing::refine(core_t::TTime time) -{ +void CCalendarComponentAdaptiveBucketing::refine(core_t::TTime time) { this->CAdaptiveBucketing::refine(time); } -double CCalendarComponentAdaptiveBucketing::count(core_t::TTime time) const -{ - const TFloatMeanVarAccumulator *value = this->value(time); +double CCalendarComponentAdaptiveBucketing::count(core_t::TTime time) const { + const TFloatMeanVarAccumulator* value = this->value(time); return value ? static_cast(CBasicStatistics::count(*value)) : 0.0; } -const TFloatMeanVarAccumulator *CCalendarComponentAdaptiveBucketing::value(core_t::TTime time) const -{ - const TFloatMeanVarAccumulator *result{0}; - if (this->initialized()) - { +const TFloatMeanVarAccumulator* CCalendarComponentAdaptiveBucketing::value(core_t::TTime time) const { + const TFloatMeanVarAccumulator* result{0}; + if (this->initialized()) { std::size_t bucket{0}; this->bucket(time, bucket); bucket = CTools::truncate(bucket, std::size_t(0), m_Values.size() - 1); @@ -199,71 +165,59 @@ const TFloatMeanVarAccumulator *CCalendarComponentAdaptiveBucketing::value(core_ bool CCalendarComponentAdaptiveBucketing::knots(core_t::TTime time, CSplineTypes::EBoundaryCondition boundary, - TDoubleVec &knots, - TDoubleVec &values, - TDoubleVec &variances) const -{ + TDoubleVec& knots, + TDoubleVec& values, + TDoubleVec& variances) const { return this->CAdaptiveBucketing::knots(time, boundary, knots, values, variances); } -uint64_t CCalendarComponentAdaptiveBucketing::checksum(uint64_t seed) const -{ +uint64_t CCalendarComponentAdaptiveBucketing::checksum(uint64_t seed) const { seed = this->CAdaptiveBucketing::checksum(seed); seed = CChecksum::calculate(seed, m_Feature); return CChecksum::calculate(seed, m_Values); } -void CCalendarComponentAdaptiveBucketing::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CCalendarComponentAdaptiveBucketing::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CCalendarComponentAdaptiveBucketing"); core::CMemoryDebug::dynamicSize("m_Endpoints", this->CAdaptiveBucketing::endpoints(), mem); core::CMemoryDebug::dynamicSize("m_Centres", this->CAdaptiveBucketing::centres(), mem); core::CMemoryDebug::dynamicSize("m_Values", m_Values, mem); } -std::size_t CCalendarComponentAdaptiveBucketing::memoryUsage() const -{ +std::size_t CCalendarComponentAdaptiveBucketing::memoryUsage() const { return this->CAdaptiveBucketing::memoryUsage() + core::CMemory::dynamicSize(m_Values); } -const CCalendarComponentAdaptiveBucketing::TFloatVec &CCalendarComponentAdaptiveBucketing::endpoints() const -{ +const CCalendarComponentAdaptiveBucketing::TFloatVec& CCalendarComponentAdaptiveBucketing::endpoints() const { return this->CAdaptiveBucketing::endpoints(); } -double CCalendarComponentAdaptiveBucketing::count() const -{ +double CCalendarComponentAdaptiveBucketing::count() const { return this->CAdaptiveBucketing::count(); } -CCalendarComponentAdaptiveBucketing::TDoubleVec CCalendarComponentAdaptiveBucketing::values(core_t::TTime time) const -{ +CCalendarComponentAdaptiveBucketing::TDoubleVec CCalendarComponentAdaptiveBucketing::values(core_t::TTime time) const { return this->CAdaptiveBucketing::values(time); } -CCalendarComponentAdaptiveBucketing::TDoubleVec CCalendarComponentAdaptiveBucketing::variances() const -{ +CCalendarComponentAdaptiveBucketing::TDoubleVec CCalendarComponentAdaptiveBucketing::variances() const { return this->CAdaptiveBucketing::variances(); } -bool CCalendarComponentAdaptiveBucketing::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name{traverser.name()}; - RESTORE(ADAPTIVE_BUCKETING_TAG, traverser.traverseSubLevel( - boost::bind(&CAdaptiveBucketing::acceptRestoreTraverser, - static_cast(this), _1))); +bool CCalendarComponentAdaptiveBucketing::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name{traverser.name()}; + RESTORE(ADAPTIVE_BUCKETING_TAG, + traverser.traverseSubLevel( + boost::bind(&CAdaptiveBucketing::acceptRestoreTraverser, static_cast(this), _1))); RESTORE(FEATURE_TAG, m_Feature.fromDelimited(traverser.value())); RESTORE(VALUES_TAG, core::CPersistUtils::restore(VALUES_TAG, m_Values, traverser)) - } - while (traverser.next()); + } while (traverser.next()); return true; } -void CCalendarComponentAdaptiveBucketing::refresh(const TFloatVec &endpoints) -{ +void CCalendarComponentAdaptiveBucketing::refresh(const TFloatVec& endpoints) { // Values are assigned based on their intersection with each // bucket in the previous configuration. The regression and // variance are computed using the appropriate combination @@ -292,69 +246,58 @@ void CCalendarComponentAdaptiveBucketing::refresh(const TFloatVec &endpoints) std::size_t m{m_Values.size()}; std::size_t n{endpoints.size()}; - if (m+1 != n) - { + if (m + 1 != n) { LOG_ERROR("Inconsistent end points and regressions"); return; } - TFloatVec &m_Endpoints{this->CAdaptiveBucketing::endpoints()}; - TFloatVec &m_Centres{this->CAdaptiveBucketing::centres()}; + TFloatVec& m_Endpoints{this->CAdaptiveBucketing::endpoints()}; + TFloatVec& m_Centres{this->CAdaptiveBucketing::centres()}; TFloatMeanVarVec values; TFloatVec centres; values.reserve(m); centres.reserve(m); - for (std::size_t i = 1u; i < n; ++i) - { - double yl{m_Endpoints[i-1]}; + for (std::size_t i = 1u; i < n; ++i) { + double yl{m_Endpoints[i - 1]}; double yr{m_Endpoints[i]}; - std::size_t r = std::lower_bound(endpoints.begin(), - endpoints.end(), yr) - endpoints.begin(); + std::size_t r = std::lower_bound(endpoints.begin(), endpoints.end(), yr) - endpoints.begin(); r = CTools::truncate(r, std::size_t(1), n - 1); - std::size_t l = std::upper_bound(endpoints.begin(), - endpoints.end(), yl) - endpoints.begin(); + std::size_t l = std::upper_bound(endpoints.begin(), endpoints.end(), yl) - endpoints.begin(); l = CTools::truncate(l, std::size_t(1), r); LOG_TRACE("interval = [" << yl << "," << yr << "]"); LOG_TRACE("l = " << l << ", r = " << r); - LOG_TRACE("[x(l), x(r)] = [" << endpoints[l-1] << "," << endpoints[r] << "]"); + LOG_TRACE("[x(l), x(r)] = [" << endpoints[l - 1] << "," << endpoints[r] << "]"); - double xl{endpoints[l-1]}; + double xl{endpoints[l - 1]}; double xr{endpoints[l]}; - if (l == r) - { - double interval{m_Endpoints[i] - m_Endpoints[i-1]}; + if (l == r) { + double interval{m_Endpoints[i] - m_Endpoints[i - 1]}; double w{CTools::truncate(interval / (xr - xl), 0.0, 1.0)}; - values.push_back(CBasicStatistics::scaled(m_Values[l-1], w * w)); - centres.push_back(CTools::truncate(static_cast(m_Centres[l-1]), yl, yr)); - } - else - { - double interval{xr - m_Endpoints[i-1]}; + values.push_back(CBasicStatistics::scaled(m_Values[l - 1], w * w)); + centres.push_back(CTools::truncate(static_cast(m_Centres[l - 1]), yl, yr)); + } else { + double interval{xr - m_Endpoints[i - 1]}; double w{CTools::truncate(interval / (xr - xl), 0.0, 1.0)}; - TDoubleMeanVarAccumulator value{CBasicStatistics::scaled(m_Values[l-1], w)}; + TDoubleMeanVarAccumulator value{CBasicStatistics::scaled(m_Values[l - 1], w)}; TDoubleMeanAccumulator centre{ - CBasicStatistics::accumulator(w * CBasicStatistics::count(m_Values[l-1]), - static_cast(m_Centres[l-1]))}; - double count{w * w * CBasicStatistics::count(m_Values[l-1])}; - while (++l < r) - { - value += m_Values[l-1]; - centre += CBasicStatistics::accumulator(CBasicStatistics::count(m_Values[l-1]), - static_cast(m_Centres[l-1])); - count += CBasicStatistics::count(m_Values[l-1]); + CBasicStatistics::accumulator(w * CBasicStatistics::count(m_Values[l - 1]), static_cast(m_Centres[l - 1]))}; + double count{w * w * CBasicStatistics::count(m_Values[l - 1])}; + while (++l < r) { + value += m_Values[l - 1]; + centre += CBasicStatistics::accumulator(CBasicStatistics::count(m_Values[l - 1]), static_cast(m_Centres[l - 1])); + count += CBasicStatistics::count(m_Values[l - 1]); } - xl = endpoints[l-1]; + xl = endpoints[l - 1]; xr = endpoints[l]; interval = m_Endpoints[i] - xl; w = CTools::truncate(interval / (xr - xl), 0.0, 1.0); - value += CBasicStatistics::scaled(m_Values[l-1], w); - centre += CBasicStatistics::accumulator(w * CBasicStatistics::count(m_Values[l-1]), - static_cast(m_Centres[l-1])); - count += w * w * CBasicStatistics::count(m_Values[l-1]); + value += CBasicStatistics::scaled(m_Values[l - 1], w); + centre += CBasicStatistics::accumulator(w * CBasicStatistics::count(m_Values[l - 1]), static_cast(m_Centres[l - 1])); + count += w * w * CBasicStatistics::count(m_Values[l - 1]); double scale{count / CBasicStatistics::count(value)}; values.push_back(CBasicStatistics::scaled(value, scale)); centres.push_back(CTools::truncate(CBasicStatistics::mean(centre), yl, yr)); @@ -366,17 +309,14 @@ void CCalendarComponentAdaptiveBucketing::refresh(const TFloatVec &endpoints) // that is equal to the number of points they will receive in one // period. double count{0.0}; - for (const auto &value : values) - { + for (const auto& value : values) { count += CBasicStatistics::count(value); } count /= (endpoints[m] - endpoints[0]); - for (std::size_t i = 0u; i < m; ++i) - { + for (std::size_t i = 0u; i < m; ++i) { double ci{CBasicStatistics::count(values[i])}; - if (ci > 0.0) - { - CBasicStatistics::scale(count * (endpoints[i+1] - endpoints[i]) / ci, values[i]); + if (ci > 0.0) { + CBasicStatistics::scale(count * (endpoints[i + 1] - endpoints[i]) / ci, values[i]); } } @@ -390,35 +330,28 @@ void CCalendarComponentAdaptiveBucketing::refresh(const TFloatVec &endpoints) m_Centres.swap(centres); } -bool CCalendarComponentAdaptiveBucketing::inWindow(core_t::TTime time) const -{ +bool CCalendarComponentAdaptiveBucketing::inWindow(core_t::TTime time) const { return m_Feature.inWindow(time); } -void CCalendarComponentAdaptiveBucketing::add(std::size_t bucket, core_t::TTime /*time*/, double value, double weight) -{ +void CCalendarComponentAdaptiveBucketing::add(std::size_t bucket, core_t::TTime /*time*/, double value, double weight) { m_Values[bucket].add(value, weight); } -double CCalendarComponentAdaptiveBucketing::offset(core_t::TTime time) const -{ +double CCalendarComponentAdaptiveBucketing::offset(core_t::TTime time) const { return static_cast(m_Feature.offset(time)); } -double CCalendarComponentAdaptiveBucketing::count(std::size_t bucket) const -{ +double CCalendarComponentAdaptiveBucketing::count(std::size_t bucket) const { return CBasicStatistics::count(m_Values[bucket]); } -double CCalendarComponentAdaptiveBucketing::predict(std::size_t bucket, core_t::TTime /*time*/, double /*offset*/) const -{ +double CCalendarComponentAdaptiveBucketing::predict(std::size_t bucket, core_t::TTime /*time*/, double /*offset*/) const { return CBasicStatistics::mean(m_Values[bucket]); } -double CCalendarComponentAdaptiveBucketing::variance(std::size_t bucket) const -{ +double CCalendarComponentAdaptiveBucketing::variance(std::size_t bucket) const { return CBasicStatistics::maximumLikelihoodVariance(m_Values[bucket]); } - } } diff --git a/lib/maths/CCalendarFeature.cc b/lib/maths/CCalendarFeature.cc index ea9f053090..b519b2453b 100644 --- a/lib/maths/CCalendarFeature.cc +++ b/lib/maths/CCalendarFeature.cc @@ -7,90 +7,67 @@ #include #include -#include #include #include +#include #include #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { -namespace -{ -const int LAST_DAY_IN_MONTH[] = - { - 30, 27, 30, 29, 30, 29, 30, 30, 29, 30, 29, 30 - }; -const std::string DAYS[] = - { - "Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday" - }; +namespace { +const int LAST_DAY_IN_MONTH[] = {30, 27, 30, 29, 30, 29, 30, 30, 29, 30, 29, 30}; +const std::string DAYS[] = {"Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"}; -const int DAY = core::constants::DAY; +const int DAY = core::constants::DAY; //! Check if \p year (years since 1900) is a leap year. -bool isLeapYear(int year) -{ +bool isLeapYear(int year) { year += 1900; return (year % 4 == 0 && year % 100 == 0) || year % 400 == 0; } //! Get the number of days in \p month of \p year. -int lastDayInMonth(int year, int month) -{ +int lastDayInMonth(int year, int month) { return LAST_DAY_IN_MONTH[month] + (month == 1 && isLeapYear(year) ? 1 : 0); } //! Compute the day of week of the first of the month if the //! \p dayOfMonth is a \p dayOfWeek. -int dayOfFirst(int dayOfMonth, int dayOfWeek) -{ +int dayOfFirst(int dayOfMonth, int dayOfWeek) { return (CIntegerTools::ceil(dayOfMonth, 7) - dayOfMonth + dayOfWeek) % 7; } //! Print the day or week count. -std::string print_(int count, bool suffix) -{ - static const std::string suffix_[] = { "th", "st", "nd", "rd", "th" }; - return core::CStringUtils::typeToString(count) - + (suffix ? suffix_[count < 20 ? std::min(count, 4) : - std::min(count % 10, 4)] : ""); +std::string print_(int count, bool suffix) { + static const std::string suffix_[] = {"th", "st", "nd", "rd", "th"}; + return core::CStringUtils::typeToString(count) + (suffix ? suffix_[count < 20 ? std::min(count, 4) : std::min(count % 10, 4)] : ""); } - } -CCalendarFeature::CCalendarFeature() : m_Feature(INVALID), m_Value(INVALID) {} +CCalendarFeature::CCalendarFeature() : m_Feature(INVALID), m_Value(INVALID) { +} -CCalendarFeature::CCalendarFeature(uint16_t feature, core_t::TTime time) : - m_Feature(INVALID), m_Value(INVALID) -{ +CCalendarFeature::CCalendarFeature(uint16_t feature, core_t::TTime time) : m_Feature(INVALID), m_Value(INVALID) { int dayOfWeek{}; int dayOfMonth{}; int dayOfYear{}; int month{}; int year{}; int secondsSinceMidnight{}; - if (core::CTimezone::instance().dateFields(time, - dayOfWeek, dayOfMonth, dayOfYear, - month, year, secondsSinceMidnight)) - { + if (core::CTimezone::instance().dateFields(time, dayOfWeek, dayOfMonth, dayOfYear, month, year, secondsSinceMidnight)) { dayOfMonth -= 1; this->initialize(feature, dayOfWeek, dayOfMonth, month, year); - } - else - { + } else { LOG_ERROR("Invalid time: " << time); } } -CCalendarFeature::TCalendarFeature4Ary CCalendarFeature::features(core_t::TTime time) -{ +CCalendarFeature::TCalendarFeature4Ary CCalendarFeature::features(core_t::TTime time) { TCalendarFeature4Ary result; int dayOfWeek{}; int dayOfMonth{}; @@ -98,32 +75,20 @@ CCalendarFeature::TCalendarFeature4Ary CCalendarFeature::features(core_t::TTime int month{}; int year{}; int secondsSinceMidnight{}; - if (core::CTimezone::instance().dateFields(time, - dayOfWeek, dayOfMonth, dayOfYear, - month, year, secondsSinceMidnight)) - { + if (core::CTimezone::instance().dateFields(time, dayOfWeek, dayOfMonth, dayOfYear, month, year, secondsSinceMidnight)) { dayOfMonth -= 1; auto i = result.begin(); - for (uint16_t feature = BEGIN_FEATURES; feature < END_FEATURES; ++feature, ++i) - { + for (uint16_t feature = BEGIN_FEATURES; feature < END_FEATURES; ++feature, ++i) { i->initialize(feature, dayOfWeek, dayOfMonth, month, year); } - } - else - { + } else { LOG_ERROR("Invalid time: " << time); } return result; } -void CCalendarFeature::initialize(uint16_t feature, - int dayOfWeek, - int dayOfMonth, - int month, - int year) -{ - switch (feature) - { +void CCalendarFeature::initialize(uint16_t feature, int dayOfWeek, int dayOfMonth, int month, int year) { + switch (feature) { case DAYS_SINCE_START_OF_MONTH: m_Feature = feature; m_Value = static_cast(dayOfMonth); @@ -138,8 +103,7 @@ void CCalendarFeature::initialize(uint16_t feature, break; case DAY_OF_WEEK_AND_WEEKS_BEFORE_END_OF_MONTH: m_Feature = feature; - m_Value = static_cast( - 8 * ((lastDayInMonth(year, month) - dayOfMonth) / 7) + dayOfWeek); + m_Value = static_cast(8 * ((lastDayInMonth(year, month) - dayOfMonth) / 7) + dayOfWeek); break; default: LOG_ERROR("Invalid feature: " << feature); @@ -147,123 +111,97 @@ void CCalendarFeature::initialize(uint16_t feature, } } -bool CCalendarFeature::fromDelimited(const std::string &value) -{ +bool CCalendarFeature::fromDelimited(const std::string& value) { int state[2]; - if (core::CPersistUtils::fromString(value, boost::begin(state), boost::end(state))) - { + if (core::CPersistUtils::fromString(value, boost::begin(state), boost::end(state))) { m_Feature = static_cast(state[0]); - m_Value = static_cast(state[1]); + m_Value = static_cast(state[1]); return true; } return false; } -std::string CCalendarFeature::toDelimited() const -{ - int state[2] = - { - static_cast(m_Feature), - static_cast(m_Value) - }; - const int *begin = boost::begin(state); - const int *end = boost::end(state); +std::string CCalendarFeature::toDelimited() const { + int state[2] = {static_cast(m_Feature), static_cast(m_Value)}; + const int* begin = boost::begin(state); + const int* end = boost::end(state); return core::CPersistUtils::toString(begin, end); } -bool CCalendarFeature::operator==(CCalendarFeature rhs) const -{ +bool CCalendarFeature::operator==(CCalendarFeature rhs) const { return m_Feature == rhs.m_Feature && m_Value == rhs.m_Value; } -bool CCalendarFeature::operator<(CCalendarFeature rhs) const -{ +bool CCalendarFeature::operator<(CCalendarFeature rhs) const { return COrderings::lexicographical_compare(m_Feature, m_Value, rhs.m_Feature, rhs.m_Value); } -core_t::TTime CCalendarFeature::offset(core_t::TTime time) const -{ +core_t::TTime CCalendarFeature::offset(core_t::TTime time) const { int dayOfWeek{}; int dayOfMonth{}; int dayOfYear{}; int month{}; int year{}; int secondsSinceMidnight{}; - if (core::CTimezone::instance().dateFields(time, - dayOfWeek, dayOfMonth, dayOfYear, - month, year, secondsSinceMidnight)) - { + if (core::CTimezone::instance().dateFields(time, dayOfWeek, dayOfMonth, dayOfYear, month, year, secondsSinceMidnight)) { dayOfMonth -= 1; - switch (m_Feature) - { + switch (m_Feature) { case DAYS_SINCE_START_OF_MONTH: - return DAY * (dayOfMonth - static_cast(m_Value)) + secondsSinceMidnight; + return DAY * (dayOfMonth - static_cast(m_Value)) + secondsSinceMidnight; case DAYS_BEFORE_END_OF_MONTH: - return DAY * (dayOfMonth - (lastDayInMonth(year, month) - static_cast(m_Value))) - + secondsSinceMidnight; - case DAY_OF_WEEK_AND_WEEKS_SINCE_START_OF_MONTH: - { - int dayOfFirst_ = dayOfFirst(dayOfMonth, dayOfWeek); - int dayOfWeek_ = static_cast(m_Value) % 8; + return DAY * (dayOfMonth - (lastDayInMonth(year, month) - static_cast(m_Value))) + secondsSinceMidnight; + case DAY_OF_WEEK_AND_WEEKS_SINCE_START_OF_MONTH: { + int dayOfFirst_ = dayOfFirst(dayOfMonth, dayOfWeek); + int dayOfWeek_ = static_cast(m_Value) % 8; int weekOfMonth_ = static_cast(m_Value) / 8; - int dayOfMonth_ = 7 * weekOfMonth_ + (7 + dayOfWeek_ - dayOfFirst_) % 7; + int dayOfMonth_ = 7 * weekOfMonth_ + (7 + dayOfWeek_ - dayOfFirst_) % 7; return DAY * (dayOfMonth - dayOfMonth_) + secondsSinceMidnight; } - case DAY_OF_WEEK_AND_WEEKS_BEFORE_END_OF_MONTH: - { - int lastDayInMonth_ = lastDayInMonth(year, month); - int dayOfLast_ = (lastDayInMonth_ + dayOfFirst(dayOfMonth, dayOfWeek)) % 7; - int dayOfWeek_ = static_cast(m_Value) % 8; + case DAY_OF_WEEK_AND_WEEKS_BEFORE_END_OF_MONTH: { + int lastDayInMonth_ = lastDayInMonth(year, month); + int dayOfLast_ = (lastDayInMonth_ + dayOfFirst(dayOfMonth, dayOfWeek)) % 7; + int dayOfWeek_ = static_cast(m_Value) % 8; int weeksToEndOfMonth_ = static_cast(m_Value) / 8; - int dayOfMonth_ = lastDayInMonth_ - (7 * weeksToEndOfMonth_ + (7 + dayOfLast_ - dayOfWeek_) % 7); + int dayOfMonth_ = lastDayInMonth_ - (7 * weeksToEndOfMonth_ + (7 + dayOfLast_ - dayOfWeek_) % 7); return DAY * (dayOfMonth - dayOfMonth_) + secondsSinceMidnight; } default: LOG_ERROR("Invalid feature: '" << m_Feature << "'"); break; } - } - else - { + } else { LOG_ERROR("Invalid time: '" << time << "'"); } return 0; } -bool CCalendarFeature::inWindow(core_t::TTime time) const -{ +bool CCalendarFeature::inWindow(core_t::TTime time) const { core_t::TTime offset = this->offset(time); return offset >= 0 && offset < this->window(); } -core_t::TTime CCalendarFeature::window() const -{ +core_t::TTime CCalendarFeature::window() const { return core::constants::DAY; } -uint64_t CCalendarFeature::checksum(uint64_t seed) const -{ +uint64_t CCalendarFeature::checksum(uint64_t seed) const { seed = CChecksum::calculate(seed, m_Feature); return CChecksum::calculate(seed, m_Value); } -std::string CCalendarFeature::print() const -{ - switch (m_Feature) - { +std::string CCalendarFeature::print() const { + switch (m_Feature) { case DAYS_SINCE_START_OF_MONTH: return print_(static_cast(m_Value) + 1, true) + " day of month"; case DAYS_BEFORE_END_OF_MONTH: return print_(static_cast(m_Value), false) + " days before end of month"; - case DAY_OF_WEEK_AND_WEEKS_SINCE_START_OF_MONTH: - { - int dayOfWeek_ = static_cast(m_Value) % 8; + case DAY_OF_WEEK_AND_WEEKS_SINCE_START_OF_MONTH: { + int dayOfWeek_ = static_cast(m_Value) % 8; int weekOfMonth_ = static_cast(m_Value) / 8; return print_(weekOfMonth_ + 1, true) + " " + DAYS[dayOfWeek_] + " of month"; } - case DAY_OF_WEEK_AND_WEEKS_BEFORE_END_OF_MONTH: - { - int dayOfWeek_ = static_cast(m_Value) % 8; + case DAY_OF_WEEK_AND_WEEKS_BEFORE_END_OF_MONTH: { + int dayOfWeek_ = static_cast(m_Value) % 8; int weeksToEndOfMonth_ = static_cast(m_Value) / 8; return print_(weeksToEndOfMonth_, false) + " " + DAYS[dayOfWeek_] + "s before end of month"; } @@ -272,6 +210,5 @@ std::string CCalendarFeature::print() const } const uint16_t CCalendarFeature::INVALID(boost::numeric::bounds::highest()); - } } diff --git a/lib/maths/CCategoricalTools.cc b/lib/maths/CCategoricalTools.cc index 91aa119f4c..a753ab81fe 100644 --- a/lib/maths/CCategoricalTools.cc +++ b/lib/maths/CCategoricalTools.cc @@ -9,8 +9,8 @@ #include #include -#include #include +#include #include #include @@ -21,59 +21,44 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { -namespace -{ +namespace { const double LOG_TWO = std::log(2.0); //! A fast lower bound for the binomial probability of \p m //! successes for \p n trials and probability of success \p p. -inline maths_t::EFloatingPointErrorStatus -logBinomialProbabilityFastLowerBound(std::size_t n, - double p, - std::size_t m, - double &result) -{ +inline maths_t::EFloatingPointErrorStatus logBinomialProbabilityFastLowerBound(std::size_t n, double p, std::size_t m, double& result) { double n_ = static_cast(n); double m_ = static_cast(m); result = 0.0; - if (!(p >= 0.0 || p <= 1.0)) - { + if (!(p >= 0.0 || p <= 1.0)) { LOG_ERROR("Bad probability: " << p); return maths_t::E_FpFailed; } - if (p == 0.0) - { - if (m > 0) - { + if (p == 0.0) { + if (m > 0) { result = boost::numeric::bounds::lowest(); return maths_t::E_FpOverflowed; } return maths_t::E_FpNoErrors; } - if (p == 1.0) - { - if (m < n) - { + if (p == 1.0) { + if (m < n) { result = boost::numeric::bounds::lowest(); return maths_t::E_FpOverflowed; } return maths_t::E_FpNoErrors; } - if (m == 0) - { + if (m == 0) { result = n_ * std::log(1.0 - p); return maths_t::E_FpNoErrors; } - if (m == n) - { + if (m == n) { result = n_ * std::log(p); return maths_t::E_FpNoErrors; } @@ -86,10 +71,7 @@ logBinomialProbabilityFastLowerBound(std::size_t n, static const double CONSTANT = std::log(boost::math::double_constants::root_two_pi) - 2.0; double p_ = m_ / n_; - result = - 0.5 * std::log(n_ * (1.0 - p_) * p_) - + m_ * std::log(p / p_) - + (n_ - m_) * std::log((1.0 - p) / (1.0 - p_)) - + CONSTANT; + result = -0.5 * std::log(n_ * (1.0 - p_) * p_) + m_ * std::log(p / p_) + (n_ - m_) * std::log((1.0 - p) / (1.0 - p_)) + CONSTANT; return maths_t::E_FpNoErrors; } @@ -97,13 +79,8 @@ logBinomialProbabilityFastLowerBound(std::size_t n, //! binomial, i.e. the probability of seeing m or a larger value //! from a binomial with \p trials and probability of success //! \p p. -maths_t::EFloatingPointErrorStatus logRightTailProbabilityUpperBound(std::size_t n, - double p, - std::size_t m, - double &result) -{ - if (m > n) - { +maths_t::EFloatingPointErrorStatus logRightTailProbabilityUpperBound(std::size_t n, double p, std::size_t m, double& result) { + if (m > n) { LOG_ERROR("Invalid sample: " << m << " > " << n); result = boost::numeric::bounds::lowest(); return maths_t::E_FpOverflowed; @@ -111,26 +88,21 @@ maths_t::EFloatingPointErrorStatus logRightTailProbabilityUpperBound(std::size_t result = 0.0; - if (n == 0) - { + if (n == 0) { return maths_t::E_FpNoErrors; } - if (!(p >= 0.0 && p <= 1.0)) - { + if (!(p >= 0.0 && p <= 1.0)) { LOG_ERROR("Bad probability: " << p); return maths_t::E_FpFailed; } - if (p == 0.0) - { - if (m > 0) - { + if (p == 0.0) { + if (m > 0) { result = boost::numeric::bounds::lowest(); return maths_t::E_FpOverflowed; } return maths_t::E_FpNoErrors; } - if (p == 1.0) - { + if (p == 1.0) { return maths_t::E_FpNoErrors; } @@ -141,27 +113,20 @@ maths_t::EFloatingPointErrorStatus logRightTailProbabilityUpperBound(std::size_t double m_ = static_cast(m); double n_ = static_cast(n); - try - { + try { boost::math::binomial_distribution<> binomial(n_, p); - if (m_ <= boost::math::median(binomial)) - { + if (m_ <= boost::math::median(binomial)) { return maths_t::E_FpNoErrors; } double eps = (m_ - n_ * p) / n_; double q = p + eps; - double chernoff = m_ * ( q * std::log(p / q) - + (1.0 - q) * std::log((1.0 - p) / (1.0 - q))); + double chernoff = m_ * (q * std::log(p / q) + (1.0 - q) * std::log((1.0 - p) / (1.0 - q))); result = std::min(chernoff + LOG_TWO, 0.0); return maths_t::E_FpNoErrors; - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to calculate c.d.f. complement: " << e.what() - << ", n = " << n - << ", p = " << p); + } catch (const std::exception& e) { + LOG_ERROR("Failed to calculate c.d.f. complement: " << e.what() << ", n = " << n << ", p = " << p); } return maths_t::E_FpOverflowed; @@ -171,13 +136,8 @@ maths_t::EFloatingPointErrorStatus logRightTailProbabilityUpperBound(std::size_t //! binomial, i.e. the probability of seeing m or a larger value //! from a binomial with \p trials and probability of success //! \p p. -maths_t::EFloatingPointErrorStatus logRightTailProbabilityLowerBound(std::size_t n, - double p, - std::size_t m, - double &result) -{ - if (m > n) - { +maths_t::EFloatingPointErrorStatus logRightTailProbabilityLowerBound(std::size_t n, double p, std::size_t m, double& result) { + if (m > n) { LOG_ERROR("Invalid sample: " << m << " > " << n); result = boost::numeric::bounds::lowest(); return maths_t::E_FpOverflowed; @@ -185,26 +145,21 @@ maths_t::EFloatingPointErrorStatus logRightTailProbabilityLowerBound(std::size_t result = 0.0; - if (n == 0) - { + if (n == 0) { return maths_t::E_FpNoErrors; } - if (!(p >= 0.0 && p <= 1.0)) - { + if (!(p >= 0.0 && p <= 1.0)) { LOG_ERROR("Bad probability: " << p); return maths_t::E_FpFailed; } - if (p == 0.0) - { - if (m > 0) - { + if (p == 0.0) { + if (m > 0) { result = boost::numeric::bounds::lowest(); return maths_t::E_FpOverflowed; } return maths_t::E_FpNoErrors; } - if (p == 1.0) - { + if (p == 1.0) { return maths_t::E_FpNoErrors; } @@ -244,33 +199,24 @@ maths_t::EFloatingPointErrorStatus logRightTailProbabilityLowerBound(std::size_t double m_ = static_cast(m); double n_ = static_cast(n); - try - { + try { boost::math::binomial_distribution<> binomial(n_, p); - if (m_ <= boost::math::median(binomial)) - { + if (m_ <= boost::math::median(binomial)) { return maths_t::E_FpNoErrors; } double logf; - maths_t::EFloatingPointErrorStatus status = - logBinomialProbabilityFastLowerBound(n, p, m, logf); - if (status & maths_t::E_FpAllErrors) - { + maths_t::EFloatingPointErrorStatus status = logBinomialProbabilityFastLowerBound(n, p, m, logf); + if (status & maths_t::E_FpAllErrors) { result = logf; return status; } - double bound = logf + std::log(1.0 + n_ / (m_ + 1.0) - * (std::exp(p / (1.0 - p) * (n_ - m_) / n_) - 1.0)); + double bound = logf + std::log(1.0 + n_ / (m_ + 1.0) * (std::exp(p / (1.0 - p) * (n_ - m_) / n_) - 1.0)); result = std::min(bound + LOG_TWO, 0.0); return maths_t::E_FpNoErrors; - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to calculate c.d.f. complement: " << e.what() - << ", n = " << n - << ", p = " << p); + } catch (const std::exception& e) { + LOG_ERROR("Failed to calculate c.d.f. complement: " << e.what() << ", n = " << n << ", p = " << p); } return maths_t::E_FpFailed; @@ -279,13 +225,8 @@ maths_t::EFloatingPointErrorStatus logRightTailProbabilityLowerBound(std::size_t //! Get the log of right tail probability, i.e. the probability //! of seeing m or a larger value from a binomial with \p trials //! and probability of success \p p. -maths_t::EFloatingPointErrorStatus logRightTailProbability(std::size_t n, - double p, - std::size_t m, - double &result) -{ - if (m > n) - { +maths_t::EFloatingPointErrorStatus logRightTailProbability(std::size_t n, double p, std::size_t m, double& result) { + if (m > n) { LOG_ERROR("Invalid sample: " << m << " > " << n); result = boost::numeric::bounds::lowest(); return maths_t::E_FpOverflowed; @@ -293,38 +234,31 @@ maths_t::EFloatingPointErrorStatus logRightTailProbability(std::size_t n, result = 0.0; - if (n == 0) - { + if (n == 0) { return maths_t::E_FpNoErrors; } - if (!(p >= 0.0 && p <= 1.0)) - { + if (!(p >= 0.0 && p <= 1.0)) { LOG_ERROR("Bad probability: " << p); return maths_t::E_FpFailed; } - if (p == 0.0) - { - if (m > 0) - { + if (p == 0.0) { + if (m > 0) { result = boost::numeric::bounds::lowest(); return maths_t::E_FpOverflowed; } return maths_t::E_FpNoErrors; } - if (p == 1.0) - { + if (p == 1.0) { return maths_t::E_FpNoErrors; } double n_ = static_cast(n); double m_ = static_cast(m); - try - { + try { boost::math::binomial_distribution<> binomial(n_, p); - if (m_ <= boost::math::median(binomial)) - { + if (m_ <= boost::math::median(binomial)) { return maths_t::E_FpNoErrors; } @@ -332,71 +266,54 @@ maths_t::EFloatingPointErrorStatus logRightTailProbability(std::size_t n, // upper bound. double lb, ub; - maths_t::EFloatingPointErrorStatus status = - logRightTailProbabilityLowerBound(n, p, m, lb); - if (status & maths_t::E_FpAllErrors) - { - result = status == maths_t::E_FpOverflowed ? - boost::numeric::bounds::lowest() : 0.0; + maths_t::EFloatingPointErrorStatus status = logRightTailProbabilityLowerBound(n, p, m, lb); + if (status & maths_t::E_FpAllErrors) { + result = status == maths_t::E_FpOverflowed ? boost::numeric::bounds::lowest() : 0.0; return status; } status = logRightTailProbabilityUpperBound(n, p, m, ub); - if (status & maths_t::E_FpAllErrors) - { - result = status == maths_t::E_FpOverflowed ? - boost::numeric::bounds::lowest() : 0.0; + if (status & maths_t::E_FpAllErrors) { + result = status == maths_t::E_FpOverflowed ? boost::numeric::bounds::lowest() : 0.0; return status; } - if (ub <= core::constants::LOG_MIN_DOUBLE) - { + if (ub <= core::constants::LOG_MIN_DOUBLE) { result = lb; return maths_t::E_FpNoErrors; } double oneMinusF = CTools::safeCdfComplement(binomial, m_); - if (oneMinusF == 0.0) - { + if (oneMinusF == 0.0) { result = lb; return maths_t::E_FpNoErrors; } double logf; status = CCategoricalTools::logBinomialProbability(n, p, m, logf); - if (status == maths_t::E_FpFailed) - { + if (status == maths_t::E_FpFailed) { return maths_t::E_FpFailed; } double f = status == maths_t::E_FpOverflowed ? 0.0 : std::exp(logf); result = std::min(std::log(oneMinusF + f) + LOG_TWO, 0.0); return maths_t::E_FpNoErrors; - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to calculate c.d.f. complement: " << e.what() - << ", n = " << n - << ", p = " << p); + } catch (const std::exception& e) { + LOG_ERROR("Failed to calculate c.d.f. complement: " << e.what() << ", n = " << n << ", p = " << p); } return maths_t::E_FpFailed; } - } -bool CCategoricalTools::probabilityOfLessLikelyMultinomialSample(const TDoubleVec &/*probabilities*/, - const TSizeVec &i, - const TSizeVec &ni, - double &result) -{ +bool CCategoricalTools::probabilityOfLessLikelyMultinomialSample(const TDoubleVec& /*probabilities*/, + const TSizeVec& i, + const TSizeVec& ni, + double& result) { result = 1.0; - if (i.size() != ni.size()) - { - LOG_ERROR("Inconsistent categories and counts: " - << core::CContainerPrinter::print(i) - << " " << core::CContainerPrinter::print(ni)); + if (i.size() != ni.size()) { + LOG_ERROR("Inconsistent categories and counts: " << core::CContainerPrinter::print(i) << " " << core::CContainerPrinter::print(ni)); return false; } @@ -407,28 +324,20 @@ bool CCategoricalTools::probabilityOfLessLikelyMultinomialSample(const TDoubleVe return 0.0; } -double CCategoricalTools::probabilityOfCategory(std::size_t n, - const double probability) -{ - if (n == 0) - { +double CCategoricalTools::probabilityOfCategory(std::size_t n, const double probability) { + if (n == 0) { return 0.0; } - if (probability > 0.0 && probability < 1.0) - { - boost::math::binomial_distribution<> binomial(static_cast(n), - probability); + if (probability > 0.0 && probability < 1.0) { + boost::math::binomial_distribution<> binomial(static_cast(n), probability); return boost::math::cdf(boost::math::complement(binomial, 0.0)); } return probability; } -bool CCategoricalTools::expectedDistinctCategories(const TDoubleVec &probabilities, - const double n, - double &result) -{ +bool CCategoricalTools::expectedDistinctCategories(const TDoubleVec& probabilities, const double n, double& result) { // We imagine drawing n samples from a multinomial random variable // with m categories. We'd like to calculate how many distinct // categories we'd expect in this sample of n. This quantity is @@ -456,20 +365,15 @@ bool CCategoricalTools::expectedDistinctCategories(const TDoubleVec &probabiliti result = 0.0; - if (probabilities.size() == 0) - { + if (probabilities.size() == 0) { return false; } - for (std::size_t i = 0u; i < probabilities.size(); ++i) - { - if (probabilities[i] > 0.0 && probabilities[i] < 1.0) - { + for (std::size_t i = 0u; i < probabilities.size(); ++i) { + if (probabilities[i] > 0.0 && probabilities[i] < 1.0) { boost::math::binomial_distribution<> binomial(n, probabilities[i]); result += boost::math::cdf(boost::math::complement(binomial, 0.0)); - } - else if (probabilities[i] == 1.0) - { + } else if (probabilities[i] == 1.0) { result += 1.0; } } @@ -477,37 +381,28 @@ bool CCategoricalTools::expectedDistinctCategories(const TDoubleVec &probabiliti return true; } -double CCategoricalTools::logBinomialCoefficient(std::size_t n, std::size_t m) -{ - if (m == n || m == 0) - { +double CCategoricalTools::logBinomialCoefficient(std::size_t n, std::size_t m) { + if (m == n || m == 0) { return 0.0; } double n_ = static_cast(n); double m_ = static_cast(m); - return boost::math::lgamma(n_ + 1.0) - - boost::math::lgamma(m_ + 1.0) - - boost::math::lgamma(n_ - m_ + 1.0); + return boost::math::lgamma(n_ + 1.0) - boost::math::lgamma(m_ + 1.0) - boost::math::lgamma(n_ - m_ + 1.0); } -double CCategoricalTools::binomialCoefficient(std::size_t n, std::size_t m) -{ +double CCategoricalTools::binomialCoefficient(std::size_t n, std::size_t m) { return std::exp(logBinomialCoefficient(n, m)); } -bool CCategoricalTools::probabilityOfLessLikelyCategoryCount(TDoubleVec &probabilities, - const TSizeVec &i, - const TSizeVec &ni, - TDoubleVec &result, - std::size_t trials) -{ +bool CCategoricalTools::probabilityOfLessLikelyCategoryCount(TDoubleVec& probabilities, + const TSizeVec& i, + const TSizeVec& ni, + TDoubleVec& result, + std::size_t trials) { result.clear(); - if (i.size() != ni.size()) - { - LOG_ERROR("Inconsistent categories and counts: " - << core::CContainerPrinter::print(i) - << " " << core::CContainerPrinter::print(ni)); + if (i.size() != ni.size()) { + LOG_ERROR("Inconsistent categories and counts: " << core::CContainerPrinter::print(i) << " " << core::CContainerPrinter::print(ni)); return false; } @@ -540,19 +435,15 @@ bool CCategoricalTools::probabilityOfLessLikelyCategoryCount(TDoubleVec &probabi TDoubleVec probabilities_; probabilities_.reserve(i.size()); - for (std::size_t i_ = 0u; i_ < i.size(); ++i_) - { - if (i[i_] >= probabilities.size()) - { + for (std::size_t i_ = 0u; i_ < i.size(); ++i_) { + if (i[i_] >= probabilities.size()) { LOG_ERROR("Bad category: " << i[i_] << " out of range"); return false; } probabilities_.push_back(probabilities[i[i_]]); } - std::sort(probabilities.begin(), - probabilities.end(), - std::greater()); + std::sort(probabilities.begin(), probabilities.end(), std::greater()); // Declared outside the loop to minimize the number of times // it is created. @@ -563,32 +454,26 @@ bool CCategoricalTools::probabilityOfLessLikelyCategoryCount(TDoubleVec &probabi TDoubleVec g; g.reserve(trials); - for (std::size_t i_ = 0u; i_ < trials; ++i_) - { + for (std::size_t i_ = 0u; i_ < trials; ++i_) { sample.clear(); CSampling::multinomialSampleFast(probabilities, n, sample, true); double logPMin = 0.0; - for (std::size_t j = 0u; j < sample.size(); ++j) - { + for (std::size_t j = 0u; j < sample.size(); ++j) { // We check the sample is in the right tail because // we are interested in unusually large values. double pj = probabilities[j]; - if (sample[j] > static_cast((n_ + 1.0) * pj)) - { + if (sample[j] > static_cast((n_ + 1.0) * pj)) { std::size_t nj = sample[j]; double lowerBound; - if (logRightTailProbabilityLowerBound(n, pj, nj, lowerBound) & maths_t::E_FpAllErrors) - { + if (logRightTailProbabilityLowerBound(n, pj, nj, lowerBound) & maths_t::E_FpAllErrors) { continue; } - if (logPMin > lowerBound) - { + if (logPMin > lowerBound) { continue; } double logP; - if (logRightTailProbability(n, pj, nj, logP) & maths_t::E_FpAllErrors) - { + if (logRightTailProbability(n, pj, nj, logP) & maths_t::E_FpAllErrors) { continue; } logPMin = std::min(logPMin, logP); @@ -604,38 +489,27 @@ bool CCategoricalTools::probabilityOfLessLikelyCategoryCount(TDoubleVec &probabi return 0.0; } -maths_t::EFloatingPointErrorStatus -CCategoricalTools::logBinomialProbability(std::size_t n, - double p, - std::size_t m, - double &result) -{ - if (m > n) - { +maths_t::EFloatingPointErrorStatus CCategoricalTools::logBinomialProbability(std::size_t n, double p, std::size_t m, double& result) { + if (m > n) { result = boost::numeric::bounds::lowest(); return maths_t::E_FpOverflowed; } result = 0.0; - if (!(p >= 0.0 && p <= 1.0)) - { + if (!(p >= 0.0 && p <= 1.0)) { LOG_ERROR("Bad probability: " << p); return maths_t::E_FpFailed; } - if (p == 0.0) - { - if (m > 0) - { + if (p == 0.0) { + if (m > 0) { result = boost::numeric::bounds::lowest(); return maths_t::E_FpOverflowed; } return maths_t::E_FpNoErrors; } - if (p == 1.0) - { - if (m < n) - { + if (p == 1.0) { + if (m < n) { result = boost::numeric::bounds::lowest(); return maths_t::E_FpFailed; } @@ -644,50 +518,39 @@ CCategoricalTools::logBinomialProbability(std::size_t n, double n_ = static_cast(n); double m_ = static_cast(m); - result = std::min( boost::math::lgamma(n_ + 1.0) - - boost::math::lgamma(m_ + 1.0) - - boost::math::lgamma(n_ - m_ + 1.0) - + m_ * std::log(p) + (n_ - m_) * std::log(1.0 - p), 0.0); + result = std::min(boost::math::lgamma(n_ + 1.0) - boost::math::lgamma(m_ + 1.0) - boost::math::lgamma(n_ - m_ + 1.0) + + m_ * std::log(p) + (n_ - m_) * std::log(1.0 - p), + 0.0); return maths_t::E_FpNoErrors; } maths_t::EFloatingPointErrorStatus -CCategoricalTools::logMultinomialProbability(const TDoubleVec &probabilities, - const TSizeVec &ni, - double &result) -{ +CCategoricalTools::logMultinomialProbability(const TDoubleVec& probabilities, const TSizeVec& ni, double& result) { result = 0.0; - if (probabilities.size() != ni.size()) - { - LOG_ERROR("Inconsistent categories and counts: " - << core::CContainerPrinter::print(probabilities) - << " " << core::CContainerPrinter::print(ni)); + if (probabilities.size() != ni.size()) { + LOG_ERROR("Inconsistent categories and counts: " << core::CContainerPrinter::print(probabilities) << " " + << core::CContainerPrinter::print(ni)); return maths_t::E_FpFailed; } std::size_t n = std::accumulate(ni.begin(), ni.end(), std::size_t(0)); - if (n == 0) - { + if (n == 0) { return maths_t::E_FpNoErrors; } double n_ = static_cast(n); double logP = boost::math::lgamma(n_ + 1.0); - for (std::size_t i = 0u; i < ni.size(); ++i) - { + for (std::size_t i = 0u; i < ni.size(); ++i) { double ni_ = static_cast(ni[i]); - if (ni_ > 0.0) - { + if (ni_ > 0.0) { double pi_ = probabilities[i]; - if (!(pi_ >= 0.0 || pi_ <= 1.0)) - { + if (!(pi_ >= 0.0 || pi_ <= 1.0)) { LOG_ERROR("Bad probability: " << pi_); return maths_t::E_FpFailed; } - if (pi_ == 0.0) - { + if (pi_ == 0.0) { result = boost::numeric::bounds::lowest(); return maths_t::E_FpOverflowed; } @@ -698,6 +561,5 @@ CCategoricalTools::logMultinomialProbability(const TDoubleVec &probabilities, result = std::min(logP, 0.0); return maths_t::E_FpNoErrors; } - } } diff --git a/lib/maths/CClusterer.cc b/lib/maths/CClusterer.cc index 14ca95c316..14bb4f10af 100644 --- a/lib/maths/CClusterer.cc +++ b/lib/maths/CClusterer.cc @@ -6,90 +6,69 @@ #include -namespace ml -{ -namespace maths -{ -namespace -{ +namespace ml { +namespace maths { +namespace { const std::string INDEX_TAG("a"); } -CClustererTypes::CIndexGenerator::CIndexGenerator() : - m_IndexHeap(new TSizeVec(1u, 0u)) -{ +CClustererTypes::CIndexGenerator::CIndexGenerator() : m_IndexHeap(new TSizeVec(1u, 0u)) { } -bool CClustererTypes::CIndexGenerator::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ +bool CClustererTypes::CIndexGenerator::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { m_IndexHeap->clear(); - do - { - if (core::CPersistUtils::restore(INDEX_TAG, *m_IndexHeap, traverser) == false) - { + do { + if (core::CPersistUtils::restore(INDEX_TAG, *m_IndexHeap, traverser) == false) { LOG_ERROR("Invalid indices in " << traverser.value()); return false; } - } - while (traverser.next()); + } while (traverser.next()); return true; } -void CClustererTypes::CIndexGenerator::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CClustererTypes::CIndexGenerator::acceptPersistInserter(core::CStatePersistInserter& inserter) const { core::CPersistUtils::persist(INDEX_TAG, *m_IndexHeap, inserter); } -CClustererTypes::CIndexGenerator CClustererTypes::CIndexGenerator::deepCopy() const -{ +CClustererTypes::CIndexGenerator CClustererTypes::CIndexGenerator::deepCopy() const { CIndexGenerator result; result.m_IndexHeap.reset(new TSizeVec(*m_IndexHeap)); return result; } -std::size_t CClustererTypes::CIndexGenerator::next() const -{ +std::size_t CClustererTypes::CIndexGenerator::next() const { std::size_t result = m_IndexHeap->front(); std::pop_heap(m_IndexHeap->begin(), m_IndexHeap->end(), std::greater()); m_IndexHeap->pop_back(); - if (m_IndexHeap->empty()) - { + if (m_IndexHeap->empty()) { m_IndexHeap->push_back(result + 1u); } return result; } -void CClustererTypes::CIndexGenerator::recycle(std::size_t index) -{ +void CClustererTypes::CIndexGenerator::recycle(std::size_t index) { m_IndexHeap->push_back(index); std::push_heap(m_IndexHeap->begin(), m_IndexHeap->end(), std::greater()); } -void CClustererTypes::CIndexGenerator::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CClustererTypes::CIndexGenerator::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CClusterer::CIndexGenerator"); core::CMemoryDebug::dynamicSize("m_IndexHeap", m_IndexHeap, mem); } -std::size_t CClustererTypes::CIndexGenerator::memoryUsage() const -{ +std::size_t CClustererTypes::CIndexGenerator::memoryUsage() const { std::size_t mem = core::CMemory::dynamicSize(m_IndexHeap); return mem; } -std::string CClustererTypes::CIndexGenerator::print() const -{ +std::string CClustererTypes::CIndexGenerator::print() const { return core::CContainerPrinter::print(*m_IndexHeap); } const std::string CClustererTypes::X_MEANS_ONLINE_1D_TAG("a"); const std::string CClustererTypes::K_MEANS_ONLINE_1D_TAG("b"); const std::string CClustererTypes::X_MEANS_ONLINE_TAG("c"); - } } - - - diff --git a/lib/maths/CClustererStateSerialiser.cc b/lib/maths/CClustererStateSerialiser.cc index fdd014c666..717fb21c19 100644 --- a/lib/maths/CClustererStateSerialiser.cc +++ b/lib/maths/CClustererStateSerialiser.cc @@ -11,51 +11,35 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { -bool CClustererStateSerialiser::operator()(const SDistributionRestoreParams ¶ms, - TClusterer1dPtr &ptr, - core::CStateRestoreTraverser &traverser) -{ - return this->operator()(params, - CClusterer1d::CDoNothing(), - CClusterer1d::CDoNothing(), - ptr, traverser); +bool CClustererStateSerialiser:: +operator()(const SDistributionRestoreParams& params, TClusterer1dPtr& ptr, core::CStateRestoreTraverser& traverser) { + return this->operator()(params, CClusterer1d::CDoNothing(), CClusterer1d::CDoNothing(), ptr, traverser); } -bool CClustererStateSerialiser::operator()(const SDistributionRestoreParams ¶ms, - const CClusterer1d::TSplitFunc &splitFunc, - const CClusterer1d::TMergeFunc &mergeFunc, - TClusterer1dPtr &ptr, - core::CStateRestoreTraverser &traverser) -{ +bool CClustererStateSerialiser::operator()(const SDistributionRestoreParams& params, + const CClusterer1d::TSplitFunc& splitFunc, + const CClusterer1d::TMergeFunc& mergeFunc, + TClusterer1dPtr& ptr, + core::CStateRestoreTraverser& traverser) { std::size_t numResults(0); - do - { - const std::string &name = traverser.name(); - if (name == CClustererTypes::X_MEANS_ONLINE_1D_TAG) - { + do { + const std::string& name = traverser.name(); + if (name == CClustererTypes::X_MEANS_ONLINE_1D_TAG) { ptr.reset(new CXMeansOnline1d(params, splitFunc, mergeFunc, traverser)); ++numResults; - } - else if (name == CClustererTypes::K_MEANS_ONLINE_1D_TAG) - { + } else if (name == CClustererTypes::K_MEANS_ONLINE_1D_TAG) { ptr.reset(new CKMeansOnline1d(params, traverser)); ++numResults; - } - else - { + } else { LOG_ERROR("No clusterer corresponds to node name " << traverser.name()); } - } - while (traverser.next()); + } while (traverser.next()); - if (numResults != 1) - { + if (numResults != 1) { LOG_ERROR("Expected 1 (got " << numResults << ") clusterer tags"); ptr.reset(); return false; @@ -64,12 +48,8 @@ bool CClustererStateSerialiser::operator()(const SDistributionRestoreParams &par return true; } -void CClustererStateSerialiser::operator()(const CClusterer1d &clusterer, - core::CStatePersistInserter &inserter) -{ - inserter.insertLevel(clusterer.persistenceTag(), - boost::bind(&CClusterer1d::acceptPersistInserter, &clusterer, _1)); +void CClustererStateSerialiser::operator()(const CClusterer1d& clusterer, core::CStatePersistInserter& inserter) { + inserter.insertLevel(clusterer.persistenceTag(), boost::bind(&CClusterer1d::acceptPersistInserter, &clusterer, _1)); } - } } diff --git a/lib/maths/CConstantPrior.cc b/lib/maths/CConstantPrior.cc index 2e587c6fdb..814c263198 100644 --- a/lib/maths/CConstantPrior.cc +++ b/lib/maths/CConstantPrior.cc @@ -7,9 +7,9 @@ #include #include -#include #include #include +#include #include #include @@ -23,24 +23,17 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { -namespace -{ +namespace { using TOptionalDouble = boost::optional; //! Set the constant, validating the input. -void setConstant(double value, TOptionalDouble &result) -{ - if (CMathsFuncs::isNan(value)) - { +void setConstant(double value, TOptionalDouble& result) { + if (CMathsFuncs::isNan(value)) { LOG_ERROR("NaN constant"); - } - else - { + } else { result.reset(value); } } @@ -51,154 +44,111 @@ const std::string CONSTANT_TAG("a"); const std::string EMPTY_STRING; const double LOG_TWO = std::log(2.0); - } -CConstantPrior::CConstantPrior(const TOptionalDouble &constant) : - CPrior(maths_t::E_DiscreteData, 0.0) -{ - if (constant) - { +CConstantPrior::CConstantPrior(const TOptionalDouble& constant) : CPrior(maths_t::E_DiscreteData, 0.0) { + if (constant) { setConstant(*constant, m_Constant); } } -CConstantPrior::CConstantPrior(core::CStateRestoreTraverser &traverser) : - CPrior(maths_t::E_DiscreteData, 0.0) -{ +CConstantPrior::CConstantPrior(core::CStateRestoreTraverser& traverser) : CPrior(maths_t::E_DiscreteData, 0.0) { traverser.traverseSubLevel(boost::bind(&CConstantPrior::acceptRestoreTraverser, this, _1)); } -bool CConstantPrior::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name = traverser.name(); - RESTORE_SETUP_TEARDOWN(CONSTANT_TAG, - double constant, - core::CStringUtils::stringToType(traverser.value(), constant), - m_Constant.reset(constant)) - } - while (traverser.next()); +bool CConstantPrior::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); + RESTORE_SETUP_TEARDOWN( + CONSTANT_TAG, double constant, core::CStringUtils::stringToType(traverser.value(), constant), m_Constant.reset(constant)) + } while (traverser.next()); return true; } -CConstantPrior::EPrior CConstantPrior::type() const -{ +CConstantPrior::EPrior CConstantPrior::type() const { return E_Constant; } -CConstantPrior *CConstantPrior::clone() const -{ +CConstantPrior* CConstantPrior::clone() const { return new CConstantPrior(*this); } -void CConstantPrior::setToNonInformative(double /*offset*/, - double /*decayRate*/) -{ +void CConstantPrior::setToNonInformative(double /*offset*/, double /*decayRate*/) { m_Constant.reset(); } -bool CConstantPrior::needsOffset() const -{ +bool CConstantPrior::needsOffset() const { return false; } -double CConstantPrior::adjustOffset(const TWeightStyleVec &/*weightStyle*/, - const TDouble1Vec &/*samples*/, - const TDouble4Vec1Vec &/*weights*/) -{ +double +CConstantPrior::adjustOffset(const TWeightStyleVec& /*weightStyle*/, const TDouble1Vec& /*samples*/, const TDouble4Vec1Vec& /*weights*/) { return 0.0; } -double CConstantPrior::offset() const -{ +double CConstantPrior::offset() const { return 0.0; } -void CConstantPrior::addSamples(const TWeightStyleVec &/*weightStyle*/, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &/*weights*/) -{ - if (m_Constant || samples.empty()) - { +void CConstantPrior::addSamples(const TWeightStyleVec& /*weightStyle*/, const TDouble1Vec& samples, const TDouble4Vec1Vec& /*weights*/) { + if (m_Constant || samples.empty()) { return; } setConstant(samples[0], m_Constant); } -void CConstantPrior::propagateForwardsByTime(double /*time*/) -{ +void CConstantPrior::propagateForwardsByTime(double /*time*/) { } -CConstantPrior::TDoubleDoublePr -CConstantPrior::marginalLikelihoodSupport() const -{ - return std::make_pair(boost::numeric::bounds::lowest(), - boost::numeric::bounds::highest()); +CConstantPrior::TDoubleDoublePr CConstantPrior::marginalLikelihoodSupport() const { + return std::make_pair(boost::numeric::bounds::lowest(), boost::numeric::bounds::highest()); } -double CConstantPrior::marginalLikelihoodMean() const -{ - if (this->isNonInformative()) - { +double CConstantPrior::marginalLikelihoodMean() const { + if (this->isNonInformative()) { return 0.0; } return *m_Constant; } -double CConstantPrior::marginalLikelihoodMode(const TWeightStyleVec &/*weightStyles*/, - const TDouble4Vec &/*weights*/) const -{ +double CConstantPrior::marginalLikelihoodMode(const TWeightStyleVec& /*weightStyles*/, const TDouble4Vec& /*weights*/) const { return this->marginalLikelihoodMean(); } -CConstantPrior::TDoubleDoublePr -CConstantPrior::marginalLikelihoodConfidenceInterval(double /*percentage*/, - const TWeightStyleVec &/*weightStyles*/, - const TDouble4Vec &/*weights*/) const -{ - if (this->isNonInformative()) - { +CConstantPrior::TDoubleDoublePr CConstantPrior::marginalLikelihoodConfidenceInterval(double /*percentage*/, + const TWeightStyleVec& /*weightStyles*/, + const TDouble4Vec& /*weights*/) const { + if (this->isNonInformative()) { return this->marginalLikelihoodSupport(); } return std::make_pair(*m_Constant, *m_Constant); } -double CConstantPrior::marginalLikelihoodVariance(const TWeightStyleVec &/*weightStyles*/, - const TDouble4Vec &/*weights*/) const -{ +double CConstantPrior::marginalLikelihoodVariance(const TWeightStyleVec& /*weightStyles*/, const TDouble4Vec& /*weights*/) const { return this->isNonInformative() ? boost::numeric::bounds::highest() : 0.0; } -maths_t::EFloatingPointErrorStatus -CConstantPrior::jointLogMarginalLikelihood(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &result) const -{ +maths_t::EFloatingPointErrorStatus CConstantPrior::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& result) const { result = 0.0; - if (samples.empty()) - { + if (samples.empty()) { LOG_ERROR("Can't compute likelihood for empty sample set"); return maths_t::E_FpFailed; } - if (samples.size() != weights.size()) - { - LOG_ERROR("Mismatch in samples '" - << core::CContainerPrinter::print(samples) - << "' and weights '" - << core::CContainerPrinter::print(weights) << "'"); + if (samples.size() != weights.size()) { + LOG_ERROR("Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '" + << core::CContainerPrinter::print(weights) << "'"); return maths_t::E_FpFailed; } - if (this->isNonInformative()) - { + if (this->isNonInformative()) { // The non-informative likelihood is improper and effectively // zero everywhere. We use minus max double because // log(0) = HUGE_VALUE, which causes problems for Windows. @@ -213,10 +163,8 @@ CConstantPrior::jointLogMarginalLikelihood(const TWeightStyleVec &weightStyles, double numberSamples = 0.0; - for (std::size_t i = 0u; i < samples.size(); ++i) - { - if (samples[i] != *m_Constant) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { + if (samples[i] != *m_Constant) { // Technically infinite, but just use minus max double. result = boost::numeric::bounds::lowest(); return maths_t::E_FpOverflowed; @@ -229,58 +177,46 @@ CConstantPrior::jointLogMarginalLikelihood(const TWeightStyleVec &weightStyles, return maths_t::E_FpNoErrors; } -void CConstantPrior::sampleMarginalLikelihood(std::size_t numberSamples, - TDouble1Vec &samples) const -{ +void CConstantPrior::sampleMarginalLikelihood(std::size_t numberSamples, TDouble1Vec& samples) const { samples.clear(); - if (this->isNonInformative()) - { + if (this->isNonInformative()) { return; } samples.resize(numberSamples, *m_Constant); } -bool CConstantPrior::minusLogJointCdf(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound) const -{ +bool CConstantPrior::minusLogJointCdf(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound) const { lowerBound = upperBound = 0.0; - if (samples.empty()) - { + if (samples.empty()) { LOG_ERROR("Can't compute c.d.f. for empty sample set"); return false; } double numberSamples = 0.0; - try - { - for (std::size_t i = 0u; i < samples.size(); ++i) - { + try { + for (std::size_t i = 0u; i < samples.size(); ++i) { numberSamples += maths_t::count(weightStyles, weights[i]); } - } - catch (const std::exception &e) - { + } catch (const std::exception& e) { LOG_ERROR("Failed to compute c.d.f. " << e.what()); return false; } - if (this->isNonInformative()) - { + if (this->isNonInformative()) { // Note that -log(0.5) = log(2). lowerBound = upperBound = numberSamples * LOG_TWO; return true; } - for (std::size_t i = 0u; i < samples.size(); ++i) - { - if (samples[i] < *m_Constant) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { + if (samples[i] < *m_Constant) { lowerBound = upperBound = core::constants::LOG_MAX_DOUBLE; return true; } @@ -292,45 +228,36 @@ bool CConstantPrior::minusLogJointCdf(const TWeightStyleVec &weightStyles, return true; } -bool CConstantPrior::minusLogJointCdfComplement(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound) const -{ +bool CConstantPrior::minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound) const { lowerBound = upperBound = 0.0; - if (samples.empty()) - { + if (samples.empty()) { LOG_ERROR("Can't compute c.d.f. for empty sample set"); return false; } double numberSamples = 0.0; - try - { - for (std::size_t i = 0u; i < samples.size(); ++i) - { + try { + for (std::size_t i = 0u; i < samples.size(); ++i) { numberSamples += maths_t::count(weightStyles, weights[i]); } - } - catch (const std::exception &e) - { + } catch (const std::exception& e) { LOG_ERROR("Failed to compute c.d.f. " << e.what()); return false; } - if (this->isNonInformative()) - { + if (this->isNonInformative()) { // Note that -log(0.5) = log(2). lowerBound = upperBound = numberSamples * LOG_TWO; return true; } - for (std::size_t i = 0u; i < samples.size(); ++i) - { - if (samples[i] > *m_Constant) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { + if (samples[i] > *m_Constant) { lowerBound = upperBound = core::constants::LOG_MAX_DOUBLE; return true; } @@ -343,121 +270,93 @@ bool CConstantPrior::minusLogJointCdfComplement(const TWeightStyleVec &weightSty } bool CConstantPrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation /*calculation*/, - const TWeightStyleVec &/*weightStyles*/, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &/*weights*/, - double &lowerBound, - double &upperBound, - maths_t::ETail &tail) const -{ + const TWeightStyleVec& /*weightStyles*/, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& /*weights*/, + double& lowerBound, + double& upperBound, + maths_t::ETail& tail) const { lowerBound = upperBound = 0.0; tail = maths_t::E_UndeterminedTail; - if (samples.empty()) - { + if (samples.empty()) { LOG_ERROR("Can't compute probability for empty sample set"); return false; } lowerBound = upperBound = 1.0; - if (this->isNonInformative()) - { + if (this->isNonInformative()) { return true; } int tail_ = 0; - for (std::size_t i = 0u; i < samples.size(); ++i) - { - if (samples[i] != *m_Constant) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { + if (samples[i] != *m_Constant) { lowerBound = upperBound = 0.0; } - if (samples[i] < *m_Constant) - { + if (samples[i] < *m_Constant) { tail_ = tail_ | maths_t::E_LeftTail; - } - else if (samples[i] > *m_Constant) - { + } else if (samples[i] > *m_Constant) { tail_ = tail_ | maths_t::E_RightTail; } } - LOG_TRACE("samples = " << core::CContainerPrinter::print(samples) - << ", constant = " << *m_Constant - << ", lowerBound = " << lowerBound - << ", upperBound = " << upperBound - << ", tail = " << tail); + LOG_TRACE("samples = " << core::CContainerPrinter::print(samples) << ", constant = " << *m_Constant << ", lowerBound = " << lowerBound + << ", upperBound = " << upperBound << ", tail = " << tail); tail = static_cast(tail_); return true; } -bool CConstantPrior::isNonInformative() const -{ +bool CConstantPrior::isNonInformative() const { return !m_Constant; } -void CConstantPrior::print(const std::string &indent, - std::string &result) const -{ - result += core_t::LINE_ENDING + indent + "constant " - + (this->isNonInformative() ? - std::string("non-informative") : - core::CStringUtils::typeToString(*m_Constant)); +void CConstantPrior::print(const std::string& indent, std::string& result) const { + result += core_t::LINE_ENDING + indent + "constant " + + (this->isNonInformative() ? std::string("non-informative") : core::CStringUtils::typeToString(*m_Constant)); } -std::string CConstantPrior::printMarginalLikelihoodFunction(double /*weight*/) const -{ +std::string CConstantPrior::printMarginalLikelihoodFunction(double /*weight*/) const { // The marginal likelihood is zero everywhere and infinity // at the constant so not particularly interesting and we don't // bother to define this function. return EMPTY_STRING; } -std::string CConstantPrior::printJointDensityFunction() const -{ +std::string CConstantPrior::printJointDensityFunction() const { // The prior is (arguably) Dirichlet with infinite concentration // at the constant so not particularly interesting and we don't // bother to define this function. return EMPTY_STRING; } -uint64_t CConstantPrior::checksum(uint64_t seed) const -{ +uint64_t CConstantPrior::checksum(uint64_t seed) const { seed = this->CPrior::checksum(seed); return CChecksum::calculate(seed, m_Constant); } -void CConstantPrior::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CConstantPrior::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CConstantPrior"); } -std::size_t CConstantPrior::memoryUsage() const -{ +std::size_t CConstantPrior::memoryUsage() const { return 0; } -std::size_t CConstantPrior::staticSize() const -{ +std::size_t CConstantPrior::staticSize() const { return sizeof(*this); } -void CConstantPrior::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ - if (m_Constant) - { - inserter.insertValue(CONSTANT_TAG, - *m_Constant, - core::CIEEE754::E_DoublePrecision); +void CConstantPrior::acceptPersistInserter(core::CStatePersistInserter& inserter) const { + if (m_Constant) { + inserter.insertValue(CONSTANT_TAG, *m_Constant, core::CIEEE754::E_DoublePrecision); } } -CConstantPrior::TOptionalDouble CConstantPrior::constant() const -{ +CConstantPrior::TOptionalDouble CConstantPrior::constant() const { return m_Constant; } - } } diff --git a/lib/maths/CCooccurrences.cc b/lib/maths/CCooccurrences.cc index ee10eccce5..48332f287b 100644 --- a/lib/maths/CCooccurrences.cc +++ b/lib/maths/CCooccurrences.cc @@ -21,13 +21,10 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { -namespace -{ +namespace { using TDoubleVec = std::vector; using TDoubleVecVec = std::vector; @@ -40,19 +37,12 @@ using TPointVec = std::vector; using TPackedBitVectorVec = std::vector; //! \brief Counts the (co-)occurrences of two variables. -struct SCooccurrence -{ - SCooccurrence() : - s_Nxy(0.0), s_Nx(0.0), s_Ny(0.0), s_X(0), s_Y(0) - {} - SCooccurrence(double nxy, double nx, double ny, std::size_t x, std::size_t y) : - s_Nxy(nxy), s_Nx(nx), s_Ny(ny), s_X(x), s_Y(y) - {} - - bool operator<(const SCooccurrence &rhs) const - { - return s_Nxy * static_cast(rhs.s_X) * static_cast(rhs.s_Y) - < rhs.s_Nxy * s_Nx * s_Ny; +struct SCooccurrence { + SCooccurrence() : s_Nxy(0.0), s_Nx(0.0), s_Ny(0.0), s_X(0), s_Y(0) {} + SCooccurrence(double nxy, double nx, double ny, std::size_t x, std::size_t y) : s_Nxy(nxy), s_Nx(nx), s_Ny(ny), s_X(x), s_Y(y) {} + + bool operator<(const SCooccurrence& rhs) const { + return s_Nxy * static_cast(rhs.s_X) * static_cast(rhs.s_Y) < rhs.s_Nxy * s_Nx * s_Ny; } double s_Nxy, s_Nx, s_Ny; @@ -62,8 +52,7 @@ struct SCooccurrence using TMostSignificant = CBasicStatistics::COrderStatisticsHeap; //! Compute \p x * \p x. -double pow2(double x) -{ +double pow2(double x) { return x * x; } @@ -71,10 +60,8 @@ double pow2(double x) //! //! \param[in] dimension The dimension. //! \param[out] result Filled in with the projection. -void generateProjection(std::size_t dimension, CPackedBitVector &result) -{ - if (dimension == 0) - { +void generateProjection(std::size_t dimension, CPackedBitVector& result) { + if (dimension == 0) { return; } @@ -95,10 +82,8 @@ void generateProjection(std::size_t dimension, CPackedBitVector &result) bool last = (uniform01[0] < 0.5); result.extend(last); - for (std::size_t i = 1; i < uniform01.size(); ++i) - { - if (uniform01[i] < TRANSITION_PROBABILITY) - { + for (std::size_t i = 1; i < uniform01.size(); ++i) { + if (uniform01[i] < TRANSITION_PROBABILITY) { last = !last; } result.extend(last); @@ -111,19 +96,13 @@ void generateProjection(std::size_t dimension, CPackedBitVector &result) //! \param[in] lengths The Euclidean lengths of the indicator vectors. //! \param[in] mask A mask of events to consider. //! \param[in] result Filled in with the p projections of indicator vectors. -void generateProjections(const TPackedBitVectorVec &indicators, - const TDoubleVec &lengths, - const TSizeVec &mask, - TDoubleVecVec &result) -{ +void generateProjections(const TPackedBitVectorVec& indicators, const TDoubleVec& lengths, const TSizeVec& mask, TDoubleVecVec& result) { std::size_t dimension = indicators[0].dimension(); - for (std::size_t i = 0u; i < result.size(); ++i) - { + for (std::size_t i = 0u; i < result.size(); ++i) { CPackedBitVector projection; generateProjection(dimension, projection); double length = projection.euclidean(); - for (std::size_t j = 0u; j < mask.size(); ++j) - { + for (std::size_t j = 0u; j < mask.size(); ++j) { std::size_t k = mask[j]; result[i][j] = indicators[k].inner(projection) / lengths[k] / length; } @@ -140,27 +119,22 @@ void generateProjections(const TPackedBitVectorVec &indicators, //! to this set. //! \param[out] mostSignificant Maybe updated to include the co-occurrence //! of \p x and \p y. -void testCooccurrence(const TPackedBitVectorVec &indicators, - const TDoubleVec &lengths, +void testCooccurrence(const TPackedBitVectorVec& indicators, + const TDoubleVec& lengths, std::size_t x, std::size_t y, - TSizeSizePrUSet &added, - TMostSignificant &mostSignificant) -{ - if (x > y) - { + TSizeSizePrUSet& added, + TMostSignificant& mostSignificant) { + if (x > y) { std::swap(x, y); } - if (added.count(std::make_pair(x, y)) == 0) - { + if (added.count(std::make_pair(x, y)) == 0) { double nxy = indicators[x].inner(indicators[y]); std::size_t count = mostSignificant.count(); std::size_t u = mostSignificant.biggest().s_X; std::size_t v = mostSignificant.biggest().s_Y; - if (mostSignificant.add(SCooccurrence(nxy, lengths[x], lengths[y], x, y))) - { - if (mostSignificant.count() == count) - { + if (mostSignificant.add(SCooccurrence(nxy, lengths[x], lengths[y], x, y))) { + if (mostSignificant.count() == count) { added.erase(std::make_pair(u, v)); } added.insert(std::make_pair(x, y)); @@ -179,27 +153,23 @@ void testCooccurrence(const TPackedBitVectorVec &indicators, //! significantly co-occurring event indices. //! \param[out] mostSignificant Filled in with the most seed significantly //! co-occurring event pairs. -void seed(const TPackedBitVectorVec &indicators, - const TDoubleVec &lengths, +void seed(const TPackedBitVectorVec& indicators, + const TDoubleVec& lengths, TSizeVec mask, - const TDoubleVecVec &projected, - TSizeSizePrUSet &added, - TMostSignificant &mostSignificant) -{ + const TDoubleVecVec& projected, + TSizeSizePrUSet& added, + TMostSignificant& mostSignificant) { std::size_t n = mask.size(); TDoubleVec theta(n, 0.0); - for (std::size_t i = 0u; i < n; ++i) - { - for (std::size_t j = 0u; j < projected.size(); ++j) - { + for (std::size_t i = 0u; i < n; ++i) { + for (std::size_t j = 0u; j < projected.size(); ++j) { theta[i] += pow2(projected[j][i]); } theta[i] = std::acos(std::sqrt(theta[i])); } COrderings::simultaneousSort(theta, mask); - for (std::size_t i = 1u; i < n; ++i) - { - testCooccurrence(indicators, lengths, mask[i-1], mask[i], added, mostSignificant); + for (std::size_t i = 1u; i < n; ++i) { + testCooccurrence(indicators, lengths, mask[i - 1], mask[i], added, mostSignificant); } } @@ -210,17 +180,10 @@ void seed(const TPackedBitVectorVec &indicators, //! \param[in] i The index into \p theta for which to compute the filter. //! \param[in] bound The largest angularly separated event to include. //! \param[out] result The indices of the events in the filter. -void computeFilter(const TSizeVec &mask, - const TDoubleVec &theta, - std::size_t i, - double bound, - TSizeVec &result) -{ +void computeFilter(const TSizeVec& mask, const TDoubleVec& theta, std::size_t i, double bound, TSizeVec& result) { result.clear(); - ptrdiff_t start = std::lower_bound(theta.begin(), theta.end(), - theta[i] - bound) - theta.begin(); - ptrdiff_t end = std::upper_bound(theta.begin(), theta.end(), - theta[i] + bound) - theta.begin(); + ptrdiff_t start = std::lower_bound(theta.begin(), theta.end(), theta[i] - bound) - theta.begin(); + ptrdiff_t end = std::upper_bound(theta.begin(), theta.end(), theta[i] + bound) - theta.begin(); result.reserve(end - start); result.insert(result.end(), mask.begin() + start, mask.begin() + i); result.insert(result.end(), mask.begin() + i + 1, mask.begin() + end); @@ -228,13 +191,9 @@ void computeFilter(const TSizeVec &mask, } //! Apply \p filter to \p result (set intersection). -void applyFilter(const TSizeVec &filter, - TSizeVec &placeholder, - TSizeVec &result) -{ +void applyFilter(const TSizeVec& filter, TSizeVec& placeholder, TSizeVec& result) { placeholder.clear(); - std::set_intersection(result.begin(), result.end(), - filter.begin(), filter.end(), std::back_inserter(placeholder)); + std::set_intersection(result.begin(), result.end(), filter.begin(), filter.end(), std::back_inserter(placeholder)); result.swap(placeholder); } @@ -246,12 +205,11 @@ void applyFilter(const TSizeVec &filter, //! \param[in] p The number of projections to use. //! \param[in] mostSignificant Filled in with the most significant co-occurring //! events. -void searchForMostSignificantCooccurrences(const TPackedBitVectorVec &indicators, - const TDoubleVec &lengths, - const TSizeVec &mask, +void searchForMostSignificantCooccurrences(const TPackedBitVectorVec& indicators, + const TDoubleVec& lengths, + const TSizeVec& mask, std::size_t p, - TMostSignificant &mostSignificant) -{ + TMostSignificant& mostSignificant) { // This uses the fact that after projecting the values using // f : x -> (||p^t x|| / ||x||, ||(1 - p p^t) x|| / || x ||) the // Euclidean separation ||f(x) - f(y)||^2 = 2 ( 1 - x^t y / ( ||x|| ||y|| ) ) @@ -269,10 +227,8 @@ void searchForMostSignificantCooccurrences(const TPackedBitVectorVec &indicators seed(indicators, lengths, mask, thetas, added, mostSignificant); TSizeVecVec masks(p, mask); - for (std::size_t i = 0u; i < p; ++i) - { - for (std::size_t j = 0u; j < n; ++j) - { + for (std::size_t i = 0u; i < p; ++i) { + for (std::size_t j = 0u; j < n; ++j) { thetas[i][j] = std::acos(thetas[i][j]); } COrderings::simultaneousSort(thetas[i], masks[i]); @@ -282,22 +238,18 @@ void searchForMostSignificantCooccurrences(const TPackedBitVectorVec &indicators TSizeVec filter; TSizeVec placeholder; - for (std::size_t i = 0u; i < n; ++i) - { - double lambda = mostSignificant.biggest().s_Nxy - / (mostSignificant.biggest().s_Nx * mostSignificant.biggest().s_Ny); + for (std::size_t i = 0u; i < n; ++i) { + double lambda = mostSignificant.biggest().s_Nxy / (mostSignificant.biggest().s_Nx * mostSignificant.biggest().s_Ny); double bound = 2.0 * std::asin(1.0 - lambda); computeFilter(masks[0], thetas[0], i, bound, candidates); - for (std::size_t j = 1u; !candidates.empty() && j < p; ++j) - { + for (std::size_t j = 1u; !candidates.empty() && j < p; ++j) { computeFilter(masks[j], thetas[j], i, bound, filter); applyFilter(filter, placeholder, candidates); } - for (std::size_t j = 0u; j < candidates.size(); ++j) - { + for (std::size_t j = 0u; j < candidates.size(); ++j) { testCooccurrence(indicators, lengths, mask[i], candidates[j], added, mostSignificant); } } @@ -310,8 +262,7 @@ void searchForMostSignificantCooccurrences(const TPackedBitVectorVec &indicators //! \param[in] nx The count of occurrences of x. //! \param[in] ny The count of occurrences of y. //! \param[in] n The total sample size. -double significance(double nxy, double nx, double ny, double n) -{ +double significance(double nxy, double nx, double ny, double n) { // Here we test a nested composite hypothesis. // // Our null hypothesis H0 is that the probability of seeing x is independent @@ -365,22 +316,19 @@ double significance(double nxy, double nx, double ny, double n) // // which gives us the size of the test. - if (nx == 0.0 || ny == 0.0) - { + if (nx == 0.0 || ny == 0.0) { return 1.0; } double g = (nxy * n) / (nx * ny); - if (g > 1.0) - { + if (g > 1.0) { double px = nx / n; double py = ny / n; - double lambda = n * ( -g * px * py * std::log(g) - + px * (1.0 - g * py) * std::log((1.0 - py) / (1.0 - g * py)) - + py * (1.0 - g * px) * std::log((1.0 - px) / (1.0 - g * px)) - + (1.0 - px - py + g*px*py) * std::log((1.0 - px) * (1.0 - py) / (1.0 - px - py + g*px*py))); + double lambda = n * (-g * px * py * std::log(g) + px * (1.0 - g * py) * std::log((1.0 - py) / (1.0 - g * py)) + + py * (1.0 - g * px) * std::log((1.0 - px) / (1.0 - g * px)) + + (1.0 - px - py + g * px * py) * std::log((1.0 - px) * (1.0 - py) / (1.0 - px - py + g * px * py))); boost::math::chi_squared_distribution<> chi(1.0); @@ -394,64 +342,44 @@ std::string LENGTH_TAG("a"); std::string OFFSET_TAG("b"); std::string CURRENT_INDICATOR_TAG("c"); std::string INDICATOR_TAG("d"); - } -CCooccurrences::CCooccurrences(std::size_t maximumLength, std::size_t indicatorWidth) : - m_MaximumLength(maximumLength), - m_Length(0), - m_IndicatorWidth(indicatorWidth), - m_Offset(0) -{ +CCooccurrences::CCooccurrences(std::size_t maximumLength, std::size_t indicatorWidth) + : m_MaximumLength(maximumLength), m_Length(0), m_IndicatorWidth(indicatorWidth), m_Offset(0) { } -bool CCooccurrences::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name = traverser.name(); - if ( name == LENGTH_TAG - && core::CStringUtils::stringToType(traverser.value(), m_Length) == false) - { +bool CCooccurrences::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); + if (name == LENGTH_TAG && core::CStringUtils::stringToType(traverser.value(), m_Length) == false) { LOG_ERROR("Invalid length in " << traverser.value()); return false; } - if ( name == OFFSET_TAG - && core::CStringUtils::stringToType(traverser.value(), m_Offset) == false) - { + if (name == OFFSET_TAG && core::CStringUtils::stringToType(traverser.value(), m_Offset) == false) { LOG_ERROR("Invalid offset in " << traverser.value()); return false; } - if (core::CPersistUtils::restore(CURRENT_INDICATOR_TAG, m_CurrentIndicators, traverser) == false) - { + if (core::CPersistUtils::restore(CURRENT_INDICATOR_TAG, m_CurrentIndicators, traverser) == false) { LOG_ERROR("Invalid indicators in " << traverser.value()); return false; } - if (core::CPersistUtils::restore(INDICATOR_TAG, m_Indicators, traverser) == false) - { + if (core::CPersistUtils::restore(INDICATOR_TAG, m_Indicators, traverser) == false) { LOG_ERROR("Invalid indicators in " << traverser.value()); return false; } - } - while (traverser.next()); + } while (traverser.next()); return true; } -void CCooccurrences::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CCooccurrences::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(LENGTH_TAG, m_Length); inserter.insertValue(OFFSET_TAG, m_Offset); core::CPersistUtils::persist(CURRENT_INDICATOR_TAG, m_CurrentIndicators, inserter); core::CPersistUtils::persist(INDICATOR_TAG, m_Indicators, inserter); } -void CCooccurrences::topNBySignificance(std::size_t X, - std::size_t /*n*/, - TSizeSizePrVec &/*top*/, - TDoubleVec &/*significances*/) const -{ - if (X >= m_Indicators.size()) - { +void CCooccurrences::topNBySignificance(std::size_t X, std::size_t /*n*/, TSizeSizePrVec& /*top*/, TDoubleVec& /*significances*/) const { + if (X >= m_Indicators.size()) { LOG_ERROR("Unexpected event " << X); return; } @@ -459,17 +387,13 @@ void CCooccurrences::topNBySignificance(std::size_t X, // TODO } -void CCooccurrences::topNBySignificance(std::size_t n, - TSizeSizePrVec &top, - TDoubleVec &significances) const -{ +void CCooccurrences::topNBySignificance(std::size_t n, TSizeSizePrVec& top, TDoubleVec& significances) const { top.clear(); significances.clear(); std::size_t N = m_Indicators.size(); - if (N == 0) - { + if (N == 0) { return; } @@ -479,11 +403,9 @@ void CCooccurrences::topNBySignificance(std::size_t n, TSizeVec mask; mask.reserve(N); - for (std::size_t i = 0u; i < N; ++i) - { + for (std::size_t i = 0u; i < N; ++i) { lengths[i] = m_Indicators[i].euclidean(); - if (lengths[i] > 0.0) - { + if (lengths[i] > 0.0) { mask.push_back(i); } } @@ -497,83 +419,67 @@ void CCooccurrences::topNBySignificance(std::size_t n, top.reserve(mostSignificant.count()); significances.reserve(mostSignificant.count()); - for (std::size_t i = 0u; i < mostSignificant.count(); ++i) - { - const SCooccurrence &co = mostSignificant[i]; + for (std::size_t i = 0u; i < mostSignificant.count(); ++i) { + const SCooccurrence& co = mostSignificant[i]; double nxy = static_cast(co.s_Nxy); - double nx = static_cast(co.s_Nx); - double ny = static_cast(co.s_Ny); + double nx = static_cast(co.s_Nx); + double ny = static_cast(co.s_Ny); top.emplace_back(co.s_X, co.s_Y); significances.push_back(significance(nxy, nx, ny, static_cast(dimension))); } } -void CCooccurrences::addEventStreams(std::size_t n) -{ - if (n > m_Indicators.size()) - { +void CCooccurrences::addEventStreams(std::size_t n) { + if (n > m_Indicators.size()) { core::CAllocationStrategy::resize(m_Indicators, n, CPackedBitVector(m_Length, false)); } } -void CCooccurrences::removeEventStreams(const TSizeVec &remove) -{ - for (std::size_t i = 0u; i < remove.size(); ++i) - { +void CCooccurrences::removeEventStreams(const TSizeVec& remove) { + for (std::size_t i = 0u; i < remove.size(); ++i) { std::size_t X = remove[i]; - if (X < m_Indicators.size()) - { + if (X < m_Indicators.size()) { m_Indicators[X] = CPackedBitVector(); } } } -void CCooccurrences::recycleEventStreams(const TSizeVec &recycle) -{ - for (std::size_t i = 0u; i < recycle.size(); ++i) - { +void CCooccurrences::recycleEventStreams(const TSizeVec& recycle) { + for (std::size_t i = 0u; i < recycle.size(); ++i) { std::size_t X = recycle[i]; - if (X < m_Indicators.size()) - { + if (X < m_Indicators.size()) { m_Indicators[X] = CPackedBitVector(m_Length, false); } } } -void CCooccurrences::add(std::size_t X) -{ - if (X >= m_Indicators.size()) - { +void CCooccurrences::add(std::size_t X) { + if (X >= m_Indicators.size()) { LOG_ERROR("Unexpected event " << X); return; } m_CurrentIndicators.insert(X); } -void CCooccurrences::capture() -{ - if (++m_Offset < m_IndicatorWidth) - { +void CCooccurrences::capture() { + if (++m_Offset < m_IndicatorWidth) { return; } m_Offset = 0; m_Length = std::min(m_Length + 1, m_MaximumLength); - for (std::size_t X = 0u; X < m_Indicators.size(); ++X) - { - CPackedBitVector &indicator = m_Indicators[X]; + for (std::size_t X = 0u; X < m_Indicators.size(); ++X) { + CPackedBitVector& indicator = m_Indicators[X]; indicator.extend(m_CurrentIndicators.count(X) > 0); - while (indicator.dimension() > m_MaximumLength) - { + while (indicator.dimension() > m_MaximumLength) { indicator.contract(); } } m_CurrentIndicators.clear(); } -uint64_t CCooccurrences::checksum(uint64_t seed) const -{ +uint64_t CCooccurrences::checksum(uint64_t seed) const { seed = CChecksum::calculate(seed, m_MaximumLength); seed = CChecksum::calculate(seed, m_Length); seed = CChecksum::calculate(seed, m_IndicatorWidth); @@ -582,19 +488,16 @@ uint64_t CCooccurrences::checksum(uint64_t seed) const return CChecksum::calculate(seed, m_Indicators); } -void CCooccurrences::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CCooccurrences::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CCooccurrences"); core::CMemoryDebug::dynamicSize("m_CurrentIndicators", m_CurrentIndicators, mem); core::CMemoryDebug::dynamicSize("m_Indicators", m_Indicators, mem); } -std::size_t CCooccurrences::memoryUsage() const -{ +std::size_t CCooccurrences::memoryUsage() const { std::size_t mem = core::CMemory::dynamicSize(m_CurrentIndicators); mem += core::CMemory::dynamicSize(m_Indicators); return mem; } - } } diff --git a/lib/maths/CCountMinSketch.cc b/lib/maths/CCountMinSketch.cc index 2177dad751..bcded2680c 100644 --- a/lib/maths/CCountMinSketch.cc +++ b/lib/maths/CCountMinSketch.cc @@ -19,12 +19,9 @@ #include #include -namespace ml -{ -namespace maths -{ -namespace -{ +namespace ml { +namespace maths { +namespace { const std::string TOTAL_COUNT_TAG("a"); const std::string ROWS_TAG("b"); const std::string COLUMNS_TAG("c"); @@ -39,27 +36,16 @@ const char DELIMITER(':'); const char PAIR_DELIMITER(';'); } -CCountMinSketch::CCountMinSketch(std::size_t rows, - std::size_t columns) : - m_Rows(rows), - m_Columns(columns), - m_TotalCount(0.0), - m_Sketch(TUInt32FloatPrVec()) -{} - -CCountMinSketch::CCountMinSketch(core::CStateRestoreTraverser &traverser) : - m_Rows(0), - m_Columns(0), - m_TotalCount(0.0), - m_Sketch() -{ +CCountMinSketch::CCountMinSketch(std::size_t rows, std::size_t columns) + : m_Rows(rows), m_Columns(columns), m_TotalCount(0.0), m_Sketch(TUInt32FloatPrVec()) { +} + +CCountMinSketch::CCountMinSketch(core::CStateRestoreTraverser& traverser) : m_Rows(0), m_Columns(0), m_TotalCount(0.0), m_Sketch() { traverser.traverseSubLevel(boost::bind(&CCountMinSketch::acceptRestoreTraverser, this, _1)); } -void CCountMinSketch::swap(CCountMinSketch &other) -{ - if (this == &other) - { +void CCountMinSketch::swap(CCountMinSketch& other) { + if (this == &other) { return; } @@ -67,19 +53,14 @@ void CCountMinSketch::swap(CCountMinSketch &other) std::swap(m_Columns, other.m_Columns); std::swap(m_TotalCount, other.m_TotalCount); - try - { - TUInt32FloatPrVec *counts = boost::get(&m_Sketch); - if (counts) - { - TUInt32FloatPrVec *otherCounts = boost::get(&other.m_Sketch); - if (otherCounts) - { + try { + TUInt32FloatPrVec* counts = boost::get(&m_Sketch); + if (counts) { + TUInt32FloatPrVec* otherCounts = boost::get(&other.m_Sketch); + if (otherCounts) { counts->swap(*otherCounts); - } - else - { - SSketch &otherSketch = boost::get(other.m_Sketch); + } else { + SSketch& otherSketch = boost::get(other.m_Sketch); TUInt32FloatPrVec tmp; tmp.swap(*counts); m_Sketch = SSketch(); @@ -88,19 +69,14 @@ void CCountMinSketch::swap(CCountMinSketch &other) other.m_Sketch = TUInt32FloatPrVec(); boost::get(other.m_Sketch).swap(tmp); } - } - else - { - SSketch &sketch = boost::get(m_Sketch); - SSketch *otherSketch = boost::get(&other.m_Sketch); - if (otherSketch) - { + } else { + SSketch& sketch = boost::get(m_Sketch); + SSketch* otherSketch = boost::get(&other.m_Sketch); + if (otherSketch) { sketch.s_Hashes.swap(otherSketch->s_Hashes); sketch.s_Counts.swap(otherSketch->s_Counts); - } - else - { - TUInt32FloatPrVec &otherCounts = boost::get(other.m_Sketch); + } else { + TUInt32FloatPrVec& otherCounts = boost::get(other.m_Sketch); TUInt32FloatPrVec tmp; tmp.swap(otherCounts); other.m_Sketch = SSketch(); @@ -110,368 +86,246 @@ void CCountMinSketch::swap(CCountMinSketch &other) boost::get(m_Sketch).swap(tmp); } } - } - catch (const std::exception &e) - { - LOG_ABORT("Unexpected exception " << e.what()); - } + } catch (const std::exception& e) { LOG_ABORT("Unexpected exception " << e.what()); } } -bool CCountMinSketch::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name = traverser.name(); - if (name == ROWS_TAG) - { - if (core::CStringUtils::stringToType(traverser.value(), - m_Rows) == false) - { +bool CCountMinSketch::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); + if (name == ROWS_TAG) { + if (core::CStringUtils::stringToType(traverser.value(), m_Rows) == false) { LOG_ERROR("Invalid number rows in " << traverser.value()); return false; } - } - else if (name == COLUMNS_TAG) - { - if (core::CStringUtils::stringToType(traverser.value(), - m_Columns) == false) - { + } else if (name == COLUMNS_TAG) { + if (core::CStringUtils::stringToType(traverser.value(), m_Columns) == false) { LOG_ERROR("Invalid number columns in " << traverser.value()); return false; } - } - else if (name == TOTAL_COUNT_TAG) - { - if (m_TotalCount.fromString(traverser.value()) == false) - { + } else if (name == TOTAL_COUNT_TAG) { + if (m_TotalCount.fromString(traverser.value()) == false) { LOG_ERROR("Invalid total count in " << traverser.value()); return false; } - } - else if (name == CATEGORY_COUNTS_TAG) - { + } else if (name == CATEGORY_COUNTS_TAG) { m_Sketch = TUInt32FloatPrVec(); - TUInt32FloatPrVec &counts = boost::get(m_Sketch); - if (core::CPersistUtils::fromString(traverser.value(), - counts, DELIMITER, PAIR_DELIMITER) == false) - { + TUInt32FloatPrVec& counts = boost::get(m_Sketch); + if (core::CPersistUtils::fromString(traverser.value(), counts, DELIMITER, PAIR_DELIMITER) == false) { LOG_ERROR("Invalid category counts in " << traverser.value()); return false; } - } - else if (name == SKETCH_TAG) - { + } else if (name == SKETCH_TAG) { m_Sketch = SSketch(); - SSketch &sketch = boost::get(m_Sketch); + SSketch& sketch = boost::get(m_Sketch); sketch.s_Hashes.reserve(m_Rows); sketch.s_Counts.reserve(m_Rows); - if (traverser.traverseSubLevel(boost::bind(&SSketch::acceptRestoreTraverser, - &sketch, _1, m_Rows, m_Columns)) == false) - { + if (traverser.traverseSubLevel(boost::bind(&SSketch::acceptRestoreTraverser, &sketch, _1, m_Rows, m_Columns)) == false) { return false; } } - } - while (traverser.next()); + } while (traverser.next()); return true; } -void CCountMinSketch::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CCountMinSketch::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(ROWS_TAG, m_Rows); inserter.insertValue(COLUMNS_TAG, m_Columns); inserter.insertValue(TOTAL_COUNT_TAG, m_TotalCount, core::CIEEE754::E_SinglePrecision); - const TUInt32FloatPrVec *counts = boost::get(&m_Sketch); - if (counts) - { - inserter.insertValue(CATEGORY_COUNTS_TAG, - core::CPersistUtils::toString(*counts, DELIMITER, PAIR_DELIMITER)); - } - else - { - try - { - const SSketch &sketch = boost::get(m_Sketch); - inserter.insertLevel(SKETCH_TAG, - boost::bind(&SSketch::acceptPersistInserter, &sketch, _1)); - } - catch (const std::exception &e) - { - LOG_ABORT("Unexpected exception " << e.what()); - } + const TUInt32FloatPrVec* counts = boost::get(&m_Sketch); + if (counts) { + inserter.insertValue(CATEGORY_COUNTS_TAG, core::CPersistUtils::toString(*counts, DELIMITER, PAIR_DELIMITER)); + } else { + try { + const SSketch& sketch = boost::get(m_Sketch); + inserter.insertLevel(SKETCH_TAG, boost::bind(&SSketch::acceptPersistInserter, &sketch, _1)); + } catch (const std::exception& e) { LOG_ABORT("Unexpected exception " << e.what()); } } } -std::size_t CCountMinSketch::rows() const -{ +std::size_t CCountMinSketch::rows() const { return m_Rows; } -std::size_t CCountMinSketch::columns() const -{ +std::size_t CCountMinSketch::columns() const { return m_Columns; } -double CCountMinSketch::delta() const -{ - const SSketch *sketch = boost::get(&m_Sketch); - if (!sketch) - { +double CCountMinSketch::delta() const { + const SSketch* sketch = boost::get(&m_Sketch); + if (!sketch) { return 0.0; } return std::exp(-static_cast(m_Rows)); } -double CCountMinSketch::oneMinusDeltaError() const -{ - const SSketch *sketch = boost::get(&m_Sketch); - if (!sketch) - { +double CCountMinSketch::oneMinusDeltaError() const { + const SSketch* sketch = boost::get(&m_Sketch); + if (!sketch) { return 0.0; } - return std::min(boost::math::double_constants::e - / static_cast(m_Columns), 1.0) - * m_TotalCount; + return std::min(boost::math::double_constants::e / static_cast(m_Columns), 1.0) * m_TotalCount; } -void CCountMinSketch::add(uint32_t category, double count) -{ +void CCountMinSketch::add(uint32_t category, double count) { LOG_TRACE("Adding category = " << category << ", count = " << count); m_TotalCount += count; - TUInt32FloatPrVec *counts = boost::get(&m_Sketch); - if (counts) - { - auto itr = std::lower_bound(counts->begin(), counts->end(), - category, - COrderings::SFirstLess()); + TUInt32FloatPrVec* counts = boost::get(&m_Sketch); + if (counts) { + auto itr = std::lower_bound(counts->begin(), counts->end(), category, COrderings::SFirstLess()); - if (itr == counts->end() || itr->first != category) - { + if (itr == counts->end() || itr->first != category) { itr = counts->insert(itr, TUInt32FloatPr(category, 0.0)); } itr->second += count; - if (itr->second <= 0.0) - { + if (itr->second <= 0.0) { counts->erase(itr); - } - else - { + } else { this->sketch(); } - } - else - { - try - { - SSketch &sketch = boost::get(m_Sketch); - for (std::size_t i = 0u; i < sketch.s_Hashes.size(); ++i) - { + } else { + try { + SSketch& sketch = boost::get(m_Sketch); + for (std::size_t i = 0u; i < sketch.s_Hashes.size(); ++i) { uint32_t hash = (sketch.s_Hashes[i])(category); std::size_t j = static_cast(hash) % m_Columns; sketch.s_Counts[i][j] += count; LOG_TRACE("count (i,j) = (" << i << "," << j << ")" - << " -> " << sketch.s_Counts[i][j]); + << " -> " << sketch.s_Counts[i][j]); } - } - catch (const std::exception &e) - { - LOG_ABORT("Unexpected exception " << e.what()); - } + } catch (const std::exception& e) { LOG_ABORT("Unexpected exception " << e.what()); } } } -void CCountMinSketch::removeFromMap(uint32_t category) -{ - TUInt32FloatPrVec *counts = boost::get(&m_Sketch); - if (counts) - { - auto itr = std::lower_bound(counts->begin(), counts->end(), - category, - COrderings::SFirstLess()); - if (itr != counts->end() && itr->first == category) - { +void CCountMinSketch::removeFromMap(uint32_t category) { + TUInt32FloatPrVec* counts = boost::get(&m_Sketch); + if (counts) { + auto itr = std::lower_bound(counts->begin(), counts->end(), category, COrderings::SFirstLess()); + if (itr != counts->end() && itr->first == category) { counts->erase(itr); } } } -void CCountMinSketch::age(double alpha) -{ - TUInt32FloatPrVec *counts = boost::get(&m_Sketch); - if (counts) - { - for (std::size_t i = 0u; i < counts->size(); ++i) - { +void CCountMinSketch::age(double alpha) { + TUInt32FloatPrVec* counts = boost::get(&m_Sketch); + if (counts) { + for (std::size_t i = 0u; i < counts->size(); ++i) { (*counts)[i].second *= alpha; } - } - else - { - try - { - SSketch &sketch = boost::get(m_Sketch); - for (std::size_t i = 0u; i < sketch.s_Counts.size(); ++i) - { - for (std::size_t j = 0u; j < sketch.s_Counts[i].size(); ++j) - { + } else { + try { + SSketch& sketch = boost::get(m_Sketch); + for (std::size_t i = 0u; i < sketch.s_Counts.size(); ++i) { + for (std::size_t j = 0u; j < sketch.s_Counts[i].size(); ++j) { sketch.s_Counts[i][j] *= alpha; } } - } - catch (const std::exception &e) - { - LOG_ABORT("Unexpected exception " << e.what()); - } + } catch (const std::exception& e) { LOG_ABORT("Unexpected exception " << e.what()); } } } -double CCountMinSketch::totalCount() const -{ +double CCountMinSketch::totalCount() const { return m_TotalCount; } -double CCountMinSketch::count(uint32_t category) const -{ +double CCountMinSketch::count(uint32_t category) const { using TMinAccumulator = CBasicStatistics::COrderStatisticsStack; - const TUInt32FloatPrVec *counts = boost::get(&m_Sketch); - if (counts) - { - auto itr = std::lower_bound(counts->begin(), counts->end(), - category, - COrderings::SFirstLess()); + const TUInt32FloatPrVec* counts = boost::get(&m_Sketch); + if (counts) { + auto itr = std::lower_bound(counts->begin(), counts->end(), category, COrderings::SFirstLess()); - return itr == counts->end() || itr->first != category ? - 0.0 : - static_cast(itr->second); + return itr == counts->end() || itr->first != category ? 0.0 : static_cast(itr->second); } TMinAccumulator result; - try - { - const SSketch &sketch = boost::get(m_Sketch); - for (std::size_t i = 0u; i < sketch.s_Hashes.size(); ++i) - { + try { + const SSketch& sketch = boost::get(m_Sketch); + for (std::size_t i = 0u; i < sketch.s_Hashes.size(); ++i) { uint32_t hash = (sketch.s_Hashes[i])(category); std::size_t j = static_cast(hash) % m_Columns; LOG_TRACE("count (i,j) = (" << i << "," << j << ")" - << " <- " << sketch.s_Counts[i][j]); + << " <- " << sketch.s_Counts[i][j]); result.add(sketch.s_Counts[i][j]); } - } - catch (const std::exception &e) - { - LOG_ABORT("Unexpected exception " << e.what()); - } + } catch (const std::exception& e) { LOG_ABORT("Unexpected exception " << e.what()); } return result.count() > 0 ? result[0] : 0.0; } -double CCountMinSketch::fraction(uint32_t category) const -{ +double CCountMinSketch::fraction(uint32_t category) const { return this->count(category) / m_TotalCount; } -bool CCountMinSketch::sketched() const -{ +bool CCountMinSketch::sketched() const { return boost::get(&m_Sketch) != 0; } -uint64_t CCountMinSketch::checksum(uint64_t seed) const -{ +uint64_t CCountMinSketch::checksum(uint64_t seed) const { seed = CChecksum::calculate(seed, m_Rows); seed = CChecksum::calculate(seed, m_Columns); seed = CChecksum::calculate(seed, m_TotalCount); - const TUInt32FloatPrVec *counts = boost::get(&m_Sketch); - if (counts == 0) - { - try - { - const SSketch &sketch = boost::get(m_Sketch); + const TUInt32FloatPrVec* counts = boost::get(&m_Sketch); + if (counts == 0) { + try { + const SSketch& sketch = boost::get(m_Sketch); seed = CChecksum::calculate(seed, sketch.s_Hashes); return CChecksum::calculate(seed, sketch.s_Counts); - } - catch (const std::exception &e) - { - LOG_ABORT("Unexpected exception " << e.what()); - } + } catch (const std::exception& e) { LOG_ABORT("Unexpected exception " << e.what()); } } return CChecksum::calculate(seed, *counts); } -void CCountMinSketch::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CCountMinSketch::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CCountMinSketch"); - const TUInt32FloatPrVec *counts = boost::get(&m_Sketch); - if (counts) - { + const TUInt32FloatPrVec* counts = boost::get(&m_Sketch); + if (counts) { core::CMemoryDebug::dynamicSize("m_Counts", *counts, mem); - } - else - { - try - { - const SSketch &sketch = boost::get(m_Sketch); + } else { + try { + const SSketch& sketch = boost::get(m_Sketch); mem->addItem("SSketch", sizeof(SSketch)); core::CMemoryDebug::dynamicSize("sketch", sketch, mem); core::CMemoryDebug::dynamicSize("s_Hashes", sketch.s_Hashes, mem); core::CMemoryDebug::dynamicSize("s_Counts", sketch.s_Counts, mem); - } - catch (const std::exception &e) - { - LOG_ABORT("Unexpected exception " << e.what()); - } + } catch (const std::exception& e) { LOG_ABORT("Unexpected exception " << e.what()); } } } -std::size_t CCountMinSketch::memoryUsage() const -{ +std::size_t CCountMinSketch::memoryUsage() const { std::size_t mem = 0; - const TUInt32FloatPrVec *counts = boost::get(&m_Sketch); - if (counts) - { + const TUInt32FloatPrVec* counts = boost::get(&m_Sketch); + if (counts) { mem += core::CMemory::dynamicSize(*counts); - } - else - { - try - { - const SSketch &sketch = boost::get(m_Sketch); + } else { + try { + const SSketch& sketch = boost::get(m_Sketch); mem += sizeof(SSketch); mem += core::CMemory::dynamicSize(sketch.s_Hashes); mem += core::CMemory::dynamicSize(sketch.s_Counts); - } - catch (const std::exception &e) - { - LOG_ABORT("Unexpected exception " << e.what()); - } + } catch (const std::exception& e) { LOG_ABORT("Unexpected exception " << e.what()); } } return mem; } -void CCountMinSketch::sketch() -{ - static const std::size_t FLOAT_SIZE = sizeof(CFloatStorage); - static const std::size_t HASH_SIZE = sizeof(core::CHashing::CUniversalHash::CUInt32UnrestrictedHash); - static const std::size_t PAIR_SIZE = sizeof(TUInt32FloatPr); - static const std::size_t VEC_SIZE = sizeof(TUInt32FloatPrVec); +void CCountMinSketch::sketch() { + static const std::size_t FLOAT_SIZE = sizeof(CFloatStorage); + static const std::size_t HASH_SIZE = sizeof(core::CHashing::CUniversalHash::CUInt32UnrestrictedHash); + static const std::size_t PAIR_SIZE = sizeof(TUInt32FloatPr); + static const std::size_t VEC_SIZE = sizeof(TUInt32FloatPrVec); static const std::size_t SKETCH_SIZE = sizeof(SSketch); - TUInt32FloatPrVec *counts = boost::get(&m_Sketch); - if (counts) - { + TUInt32FloatPrVec* counts = boost::get(&m_Sketch); + if (counts) { std::size_t countsSize = VEC_SIZE + PAIR_SIZE * counts->capacity(); std::size_t sketchSize = SKETCH_SIZE + m_Rows * (m_Columns * FLOAT_SIZE + HASH_SIZE); - if (countsSize > sketchSize) - { - if ( counts->capacity() > counts->size() - && counts->size() < (sketchSize - VEC_SIZE) / PAIR_SIZE) - { + if (countsSize > sketchSize) { + if (counts->capacity() > counts->size() && counts->size() < (sketchSize - VEC_SIZE) / PAIR_SIZE) { TUInt32FloatPrVec shrunk; shrunk.reserve((sketchSize - VEC_SIZE) / PAIR_SIZE); shrunk.assign(counts->begin(), counts->end()); @@ -485,74 +339,50 @@ void CCountMinSketch::sketch() counts_.swap(*counts); m_TotalCount = 0.0; m_Sketch = SSketch(m_Rows, m_Columns); - for (std::size_t i = 0u; i < counts_.size(); ++i) - { + for (std::size_t i = 0u; i < counts_.size(); ++i) { this->add(counts_[i].first, counts_[i].second); } } } } -CCountMinSketch::SSketch::SSketch(std::size_t rows, - std::size_t columns) : - s_Counts(rows, TFloatVec(columns, 0.0)) -{ +CCountMinSketch::SSketch::SSketch(std::size_t rows, std::size_t columns) : s_Counts(rows, TFloatVec(columns, 0.0)) { core::CHashing::CUniversalHash::generateHashes(rows, s_Hashes); } -bool CCountMinSketch::SSketch::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser, - std::size_t rows, - std::size_t columns) -{ - do - { - const std::string &name = traverser.name(); - if (name == HASHES_TAG) - { +bool CCountMinSketch::SSketch::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser, std::size_t rows, std::size_t columns) { + do { + const std::string& name = traverser.name(); + if (name == HASHES_TAG) { core::CHashing::CUniversalHash::CFromString hashFromString(PAIR_DELIMITER); - if ( core::CPersistUtils::fromString(traverser.value(), - hashFromString, - s_Hashes, - DELIMITER) == false - || s_Hashes.size() != rows) - { + if (core::CPersistUtils::fromString(traverser.value(), hashFromString, s_Hashes, DELIMITER) == false || + s_Hashes.size() != rows) { LOG_ERROR("Invalid hashes in " << traverser.value()); return false; } - } - else if (name == COUNTS_TAG) - { + } else if (name == COUNTS_TAG) { s_Counts.push_back(TFloatVec()); - if ( core::CPersistUtils::fromString(traverser.value(), - s_Counts.back(), - DELIMITER) == false - || s_Counts.back().size() != columns) - { + if (core::CPersistUtils::fromString(traverser.value(), s_Counts.back(), DELIMITER) == false || + s_Counts.back().size() != columns) { LOG_ERROR("Invalid counts in " << traverser.value()); return false; } } - } - while (traverser.next()); + } while (traverser.next()); - if (s_Counts.size() != rows) - { - LOG_ERROR("Unexpected number of counts " << s_Counts.size() - << ", number of rows " << rows); + if (s_Counts.size() != rows) { + LOG_ERROR("Unexpected number of counts " << s_Counts.size() << ", number of rows " << rows); return false; } return true; } -void CCountMinSketch::SSketch::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CCountMinSketch::SSketch::acceptPersistInserter(core::CStatePersistInserter& inserter) const { core::CHashing::CUniversalHash::CToString hashToString(PAIR_DELIMITER); inserter.insertValue(HASHES_TAG, core::CPersistUtils::toString(s_Hashes, hashToString, DELIMITER)); - for (const auto &count : s_Counts) - { + for (const auto& count : s_Counts) { inserter.insertValue(COUNTS_TAG, core::CPersistUtils::toString(count, DELIMITER)); } } - } } diff --git a/lib/maths/CDecayRateController.cc b/lib/maths/CDecayRateController.cc index f5a867fd81..8c096da8c7 100644 --- a/lib/maths/CDecayRateController.cc +++ b/lib/maths/CDecayRateController.cc @@ -7,8 +7,8 @@ #include #include -#include #include +#include #include #include @@ -22,12 +22,9 @@ #include #include -namespace ml -{ -namespace maths -{ -namespace -{ +namespace ml { +namespace maths { +namespace { const std::string TARGET_TAG{"a"}; const std::string PREDICTION_MEAN_TAG{"b"}; @@ -79,80 +76,66 @@ const double MAXIMUM_MULTIPLIER{40.0}; //! Compute the \p learnRate and \p decayRate adjusted minimum //! count to control. -double minimumCountToControl(double learnRate, double decayRate) -{ +double minimumCountToControl(double learnRate, double decayRate) { return 0.0005 * MINIMUM_COUNT_TO_CONTROL * learnRate / decayRate; } //! Adjust the decay rate multiplier for long bucket lengths. -double adjustMultiplier(double multiplier, core_t::TTime bucketLength_) -{ +double adjustMultiplier(double multiplier, core_t::TTime bucketLength_) { double bucketLength{static_cast(bucketLength_)}; return std::pow(multiplier, std::min(bucketLength / 1800.0, 1.0)); } //! Adjust the maximum decay rate multiplier for long bucket lengths. -double adjustedMaximumMultiplier(core_t::TTime bucketLength_) -{ +double adjustedMaximumMultiplier(core_t::TTime bucketLength_) { double bucketLength{static_cast(bucketLength_)}; return MAXIMUM_MULTIPLIER / (1.0 + CTools::truncate((bucketLength - 1800.0) / 86400.0, 0.0, 1.0)); } - } -CDecayRateController::CDecayRateController() : m_Checks(0), m_Target(1.0) -{ +CDecayRateController::CDecayRateController() : m_Checks(0), m_Target(1.0) { m_Multiplier.add(m_Target); } -CDecayRateController::CDecayRateController(int checks, std::size_t dimension) : - m_Checks(checks), - m_Target(1.0), - m_PredictionMean(dimension), - m_Bias(dimension), - m_RecentAbsError(dimension), - m_HistoricalAbsError(dimension) -{ +CDecayRateController::CDecayRateController(int checks, std::size_t dimension) + : m_Checks(checks), + m_Target(1.0), + m_PredictionMean(dimension), + m_Bias(dimension), + m_RecentAbsError(dimension), + m_HistoricalAbsError(dimension) { m_Multiplier.add(m_Target); } -void CDecayRateController::reset() -{ - m_Target = 1.0; - m_Multiplier = TMeanAccumulator(); +void CDecayRateController::reset() { + m_Target = 1.0; + m_Multiplier = TMeanAccumulator(); m_PredictionMean = TMeanAccumulator1Vec(m_PredictionMean.size()); - m_Bias = TMeanAccumulator1Vec(m_Bias.size()); + m_Bias = TMeanAccumulator1Vec(m_Bias.size()); m_RecentAbsError = TMeanAccumulator1Vec(m_RecentAbsError.size()); m_HistoricalAbsError = TMeanAccumulator1Vec(m_HistoricalAbsError.size()); m_Multiplier.add(m_Target); } -bool CDecayRateController::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ +bool CDecayRateController::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { m_Multiplier = TMeanAccumulator(); - do - { - const std::string &name = traverser.name(); + do { + const std::string& name = traverser.name(); RESTORE_BUILT_IN(TARGET_TAG, m_Target) RESTORE(MULTIPLIER_TAG, m_Multiplier.fromDelimited(traverser.value())) RESTORE(RNG_TAG, m_Rng.fromString(traverser.value())) RESTORE(PREDICTION_MEAN_TAG, core::CPersistUtils::restore(PREDICTION_MEAN_TAG, m_PredictionMean, traverser)); RESTORE(BIAS_TAG, core::CPersistUtils::restore(BIAS_TAG, m_Bias, traverser)) - RESTORE(RECENT_ABS_ERROR_TAG, - core::CPersistUtils::restore(RECENT_ABS_ERROR_TAG, m_RecentAbsError, traverser)) - RESTORE(HISTORICAL_ABS_ERROR_TAG, - core::CPersistUtils::restore(HISTORICAL_ABS_ERROR_TAG, m_HistoricalAbsError, traverser)) - } - while (traverser.next()); - if (CBasicStatistics::count(m_Multiplier) == 0.0) - { + RESTORE(RECENT_ABS_ERROR_TAG, core::CPersistUtils::restore(RECENT_ABS_ERROR_TAG, m_RecentAbsError, traverser)) + RESTORE(HISTORICAL_ABS_ERROR_TAG, core::CPersistUtils::restore(HISTORICAL_ABS_ERROR_TAG, m_HistoricalAbsError, traverser)) + } while (traverser.next()); + if (CBasicStatistics::count(m_Multiplier) == 0.0) { m_Multiplier.add(m_Target); } return true; } -void CDecayRateController::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CDecayRateController::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(TARGET_TAG, m_Target); inserter.insertValue(MULTIPLIER_TAG, m_Multiplier.toDelimited()); inserter.insertValue(RNG_TAG, m_Rng.toString()); @@ -162,12 +145,11 @@ void CDecayRateController::acceptPersistInserter(core::CStatePersistInserter &in core::CPersistUtils::persist(HISTORICAL_ABS_ERROR_TAG, m_HistoricalAbsError, inserter); } -double CDecayRateController::multiplier(const TDouble1Vec &prediction, - const TDouble1VecVec &predictionErrors, +double CDecayRateController::multiplier(const TDouble1Vec& prediction, + const TDouble1VecVec& predictionErrors, core_t::TTime bucketLength, double learnRate, - double decayRate) -{ + double decayRate) { // We could estimate the, presumably non-linear, function describing // the dynamics of the various error quantities and minimize the bias // and short term absolute prediction error using the decay rate as a @@ -180,21 +162,17 @@ double CDecayRateController::multiplier(const TDouble1Vec &prediction, std::size_t dimension{m_PredictionMean.size()}; double count{this->count()}; - TMeanAccumulator1Vec *stats_[]{&m_Bias, &m_RecentAbsError, &m_HistoricalAbsError}; + TMeanAccumulator1Vec* stats_[]{&m_Bias, &m_RecentAbsError, &m_HistoricalAbsError}; double numberPredictionErrors{static_cast(predictionErrors.size())}; - for (auto predictionError : predictionErrors) - { - if (predictionError.empty()) - { + for (auto predictionError : predictionErrors) { + if (predictionError.empty()) { continue; } - for (std::size_t d = 0u; d < dimension; ++d) - { + for (std::size_t d = 0u; d < dimension; ++d) { // Truncate the prediction error to deal with large outliers. - if (count > 0.0) - { + if (count > 0.0) { double bias{CBasicStatistics::mean(m_Bias[d])}; double width{10.0 * CBasicStatistics::mean(m_HistoricalAbsError[d])}; predictionError[d] = CTools::truncate(predictionError[d], bias - width, bias + width); @@ -210,8 +188,7 @@ double CDecayRateController::multiplier(const TDouble1Vec &prediction, // so the controller will actively decrease the decay rate. double weight{learnRate / numberPredictionErrors}; - double sd{ MINIMUM_COV_TO_CONTROL - * std::fabs(CBasicStatistics::mean(m_PredictionMean[d]))}; + double sd{MINIMUM_COV_TO_CONTROL * std::fabs(CBasicStatistics::mean(m_PredictionMean[d]))}; double tolerance{sd > 0.0 ? CSampling::normalSample(m_Rng, 0.0, sd * sd) : 0.0}; m_PredictionMean[d].add(prediction[d], weight); (*stats_[0])[d].add(predictionError[d] + tolerance, weight); @@ -222,19 +199,14 @@ double CDecayRateController::multiplier(const TDouble1Vec &prediction, } } - if (count > 0.0) - { - double factors[]{std::exp(-FAST_DECAY_RATE * decayRate), - std::exp(-FAST_DECAY_RATE * decayRate), - std::exp(-SLOW_DECAY_RATE * decayRate)}; - for (auto &component : m_PredictionMean) - { + if (count > 0.0) { + double factors[]{ + std::exp(-FAST_DECAY_RATE * decayRate), std::exp(-FAST_DECAY_RATE * decayRate), std::exp(-SLOW_DECAY_RATE * decayRate)}; + for (auto& component : m_PredictionMean) { component.age(factors[2]); } - for (std::size_t i = 0u; i < 3; ++i) - { - for (auto &component : *stats_[i]) - { + for (std::size_t i = 0u; i < 3; ++i) { + for (auto& component : *stats_[i]) { component.age(factors[i]); } } @@ -242,25 +214,20 @@ double CDecayRateController::multiplier(const TDouble1Vec &prediction, double result{1.0}; - if (count > minimumCountToControl(learnRate, decayRate)) - { + if (count > minimumCountToControl(learnRate, decayRate)) { using TMaxAccumulator = CBasicStatistics::SMax::TAccumulator; // Compute the change to apply to the target decay rate. TMaxAccumulator change; - for (std::size_t d = 0u; d < dimension; ++d) - { + for (std::size_t d = 0u; d < dimension; ++d) { double stats[3]; - for (std::size_t i = 0u; i < 3; ++i) - { + for (std::size_t i = 0u; i < 3; ++i) { stats[i] = std::fabs(CBasicStatistics::mean((*stats_[i])[d])); } change.add(this->change(stats, bucketLength)); } - m_Target *= CTools::truncate(m_Target * change[0], - MINIMUM_MULTIPLIER, - adjustedMaximumMultiplier(bucketLength)) / m_Target; + m_Target *= CTools::truncate(m_Target * change[0], MINIMUM_MULTIPLIER, adjustedMaximumMultiplier(bucketLength)) / m_Target; // We smooth the target decay rate. Over time this should // converge to the single decay rate which would minimize @@ -277,18 +244,15 @@ double CDecayRateController::multiplier(const TDouble1Vec &prediction, return result; } -double CDecayRateController::multiplier() const -{ +double CDecayRateController::multiplier() const { return CBasicStatistics::mean(m_Multiplier); } -std::size_t CDecayRateController::dimension() const -{ +std::size_t CDecayRateController::dimension() const { return m_PredictionMean.size(); } -void CDecayRateController::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CDecayRateController::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CDecayRateController"); core::CMemoryDebug::dynamicSize("m_PredictionMean", m_PredictionMean, mem); core::CMemoryDebug::dynamicSize("m_Bias", m_Bias, mem); @@ -296,8 +260,7 @@ void CDecayRateController::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr core::CMemoryDebug::dynamicSize("m_HistoricalAbsError", m_HistoricalAbsError, mem); } -std::size_t CDecayRateController::memoryUsage() const -{ +std::size_t CDecayRateController::memoryUsage() const { std::size_t mem = core::CMemory::dynamicSize(m_PredictionMean); mem += core::CMemory::dynamicSize(m_Bias); mem += core::CMemory::dynamicSize(m_RecentAbsError); @@ -305,35 +268,29 @@ std::size_t CDecayRateController::memoryUsage() const return mem; } -uint64_t CDecayRateController::checksum(uint64_t seed) const -{ +uint64_t CDecayRateController::checksum(uint64_t seed) const { seed = CChecksum::calculate(seed, m_PredictionMean); seed = CChecksum::calculate(seed, m_Bias); seed = CChecksum::calculate(seed, m_RecentAbsError); return CChecksum::calculate(seed, m_HistoricalAbsError); } -double CDecayRateController::count() const -{ +double CDecayRateController::count() const { return CBasicStatistics::count(m_HistoricalAbsError[0]); } -double CDecayRateController::change(const double (&stats)[3], core_t::TTime bucketLength) const -{ - if ( ((m_Checks & E_PredictionErrorIncrease) && stats[1] > ERROR_INCREASING * stats[2]) - || ((m_Checks & E_PredictionErrorDecrease) && stats[2] > ERROR_DECREASING * stats[1]) - || ((m_Checks & E_PredictionBias) && stats[0] > BIASED * stats[1])) - { +double CDecayRateController::change(const double (&stats)[3], core_t::TTime bucketLength) const { + if (((m_Checks & E_PredictionErrorIncrease) && stats[1] > ERROR_INCREASING * stats[2]) || + ((m_Checks & E_PredictionErrorDecrease) && stats[2] > ERROR_DECREASING * stats[1]) || + ((m_Checks & E_PredictionBias) && stats[0] > BIASED * stats[1])) { return adjustMultiplier(INCREASE_RATE, bucketLength); } - if ( (!(m_Checks & E_PredictionErrorIncrease) || stats[1] < ERROR_NOT_INCREASING * stats[2]) - && (!(m_Checks & E_PredictionErrorDecrease) || stats[2] < ERROR_NOT_DECREASING * stats[1]) - && (!(m_Checks & E_PredictionBias) || stats[0] < NOT_BIASED * stats[1])) - { + if ((!(m_Checks & E_PredictionErrorIncrease) || stats[1] < ERROR_NOT_INCREASING * stats[2]) && + (!(m_Checks & E_PredictionErrorDecrease) || stats[2] < ERROR_NOT_DECREASING * stats[1]) && + (!(m_Checks & E_PredictionBias) || stats[0] < NOT_BIASED * stats[1])) { return adjustMultiplier(DECREASE_RATE, bucketLength); } return 1.0; } - } } diff --git a/lib/maths/CDecompositionComponent.cc b/lib/maths/CDecompositionComponent.cc index 096370cb2d..09de801d38 100644 --- a/lib/maths/CDecompositionComponent.cc +++ b/lib/maths/CDecompositionComponent.cc @@ -7,10 +7,10 @@ #include #include -#include #include #include #include +#include #include #include @@ -25,12 +25,9 @@ #include #include -namespace ml -{ -namespace maths -{ -namespace -{ +namespace ml { +namespace maths { +namespace { using TDoubleDoublePr = maths_t::TDoubleDoublePr; @@ -47,37 +44,32 @@ const std::string VALUES_TAG{"c"}; const std::string VARIANCES_TAG{"d"}; const std::string EMPTY_STRING; - } CDecompositionComponent::CDecompositionComponent(std::size_t maxSize, CSplineTypes::EBoundaryCondition boundaryCondition, CSplineTypes::EType valueInterpolationType, - CSplineTypes::EType varianceInterpolationType) : - m_MaxSize{maxSize}, - m_BoundaryCondition{boundaryCondition}, - m_Splines{valueInterpolationType, varianceInterpolationType}, - m_MeanValue{0.0}, - m_MeanVariance{0.0} -{} - -bool CDecompositionComponent::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name{traverser.name()}; + CSplineTypes::EType varianceInterpolationType) + : m_MaxSize{maxSize}, + m_BoundaryCondition{boundaryCondition}, + m_Splines{valueInterpolationType, varianceInterpolationType}, + m_MeanValue{0.0}, + m_MeanVariance{0.0} { +} + +bool CDecompositionComponent::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name{traverser.name()}; RESTORE_BUILT_IN(MAX_SIZE_TAG, m_MaxSize) RESTORE_SETUP_TEARDOWN(BOUNDARY_CONDITION_TAG, int boundaryCondition, core::CStringUtils::stringToType(traverser.value(), boundaryCondition), m_BoundaryCondition = static_cast(boundaryCondition)) - RESTORE(SPLINES_TAG, traverser.traverseSubLevel(boost::bind(&CPackedSplines::acceptRestoreTraverser, - &m_Splines, m_BoundaryCondition, _1))) - } - while (traverser.next()); + RESTORE(SPLINES_TAG, + traverser.traverseSubLevel(boost::bind(&CPackedSplines::acceptRestoreTraverser, &m_Splines, m_BoundaryCondition, _1))) + } while (traverser.next()); - if (this->initialized()) - { + if (this->initialized()) { m_MeanValue = this->valueSpline().mean(); m_MeanVariance = this->varianceSpline().mean(); } @@ -85,15 +77,13 @@ bool CDecompositionComponent::acceptRestoreTraverser(core::CStateRestoreTraverse return true; } -void CDecompositionComponent::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CDecompositionComponent::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(MAX_SIZE_TAG, m_MaxSize); inserter.insertValue(BOUNDARY_CONDITION_TAG, static_cast(m_BoundaryCondition)); inserter.insertLevel(SPLINES_TAG, boost::bind(&CPackedSplines::acceptPersistInserter, &m_Splines, _1)); } -void CDecompositionComponent::swap(CDecompositionComponent &other) -{ +void CDecompositionComponent::swap(CDecompositionComponent& other) { std::swap(m_MaxSize, other.m_MaxSize); std::swap(m_BoundaryCondition, other.m_BoundaryCondition); std::swap(m_MeanValue, other.m_MeanValue); @@ -101,38 +91,30 @@ void CDecompositionComponent::swap(CDecompositionComponent &other) m_Splines.swap(other.m_Splines); } -bool CDecompositionComponent::initialized() const -{ +bool CDecompositionComponent::initialized() const { return m_Splines.initialized(); } -void CDecompositionComponent::clear() -{ - if (m_Splines.initialized()) - { +void CDecompositionComponent::clear() { + if (m_Splines.initialized()) { m_Splines.clear(); } m_MeanValue = 0.0; m_MeanVariance = 0.0; } -void CDecompositionComponent::interpolate(const TDoubleVec &knots, - const TDoubleVec &values, - const TDoubleVec &variances) -{ +void CDecompositionComponent::interpolate(const TDoubleVec& knots, const TDoubleVec& values, const TDoubleVec& variances) { m_Splines.interpolate(knots, values, variances, m_BoundaryCondition); m_MeanValue = this->valueSpline().mean(); m_MeanVariance = this->varianceSpline().mean(); } -void CDecompositionComponent::shiftLevel(double shift) -{ +void CDecompositionComponent::shiftLevel(double shift) { m_Splines.shift(CPackedSplines::E_Value, shift); m_MeanValue += shift; } -TDoubleDoublePr CDecompositionComponent::value(double offset, double n, double confidence) const -{ +TDoubleDoublePr CDecompositionComponent::value(double offset, double n, double confidence) const { // In order to compute a confidence interval we need to know // the distribution of the samples. In practice, as long as // they are independent, then the sample mean will be @@ -140,35 +122,26 @@ TDoubleDoublePr CDecompositionComponent::value(double offset, double n, double c // and variance equal to the sample variance divided by root // of the number of samples. - if (this->initialized()) - { + if (this->initialized()) { double m{this->valueSpline().value(offset)}; - if (confidence == 0.0) - { + if (confidence == 0.0) { return {m, m}; } n = std::max(n, 1.0); double sd{::sqrt(std::max(this->varianceSpline().value(offset), 0.0) / n)}; - if (sd == 0.0) - { + if (sd == 0.0) { return {m, m}; } - try - { + try { boost::math::normal_distribution<> normal{m, sd}; double ql{boost::math::quantile(normal, (100.0 - confidence) / 200.0)}; double qu{boost::math::quantile(normal, (100.0 + confidence) / 200.0)}; return {ql, qu}; - } - catch (const std::exception &e) - { - LOG_ERROR("Failed calculating confidence interval: " << e.what() - << ", n = " << n - << ", m = " << m - << ", sd = " << sd - << ", confidence = " << confidence); + } catch (const std::exception& e) { + LOG_ERROR("Failed calculating confidence interval: " << e.what() << ", n = " << n << ", m = " << m << ", sd = " << sd + << ", confidence = " << confidence); } return {m, m}; } @@ -176,54 +149,42 @@ TDoubleDoublePr CDecompositionComponent::value(double offset, double n, double c return {m_MeanValue, m_MeanValue}; } -double CDecompositionComponent::meanValue() const -{ +double CDecompositionComponent::meanValue() const { return m_MeanValue; } -TDoubleDoublePr CDecompositionComponent::variance(double offset, double n, double confidence) const -{ +TDoubleDoublePr CDecompositionComponent::variance(double offset, double n, double confidence) const { // In order to compute a confidence interval we need to know // the distribution of the samples. In practice, as long as // they are independent, then the sample variance will be // asymptotically chi-squared with number of samples minus // one degrees of freedom. - if (this->initialized()) - { + if (this->initialized()) { n = std::max(n, 2.0); double v{this->varianceSpline().value(offset)}; - if (confidence == 0.0) - { + if (confidence == 0.0) { return {v, v}; } - try - { + try { boost::math::chi_squared_distribution<> chi{n - 1.0}; double ql{boost::math::quantile(chi, (100.0 - confidence) / 200.0)}; double qu{boost::math::quantile(chi, (100.0 + confidence) / 200.0)}; return std::make_pair(ql * v / (n - 1.0), qu * v / (n - 1.0)); - } - catch (const std::exception &e) - { - LOG_ERROR("Failed calculating confidence interval: " << e.what() - << ", n = " << n - << ", confidence = " << confidence); + } catch (const std::exception& e) { + LOG_ERROR("Failed calculating confidence interval: " << e.what() << ", n = " << n << ", confidence = " << confidence); } return {v, v}; } return {m_MeanVariance, m_MeanVariance}; } -double CDecompositionComponent::meanVariance() const -{ +double CDecompositionComponent::meanVariance() const { return m_MeanVariance; } -double CDecompositionComponent::heteroscedasticity() const -{ - if (m_MeanVariance == 0.0) - { +double CDecompositionComponent::heteroscedasticity() const { + if (m_MeanVariance == 0.0) { return 0.0; } @@ -232,36 +193,30 @@ double CDecompositionComponent::heteroscedasticity() const TMaxAccumulator result; TSplineCRef spline = this->varianceSpline(); - for (const auto &value : spline.values()) - { + for (const auto& value : spline.values()) { result.add(value / m_MeanVariance); } return result.count() > 0 ? result[0] : 0.0; } -std::size_t CDecompositionComponent::maxSize() const -{ +std::size_t CDecompositionComponent::maxSize() const { return std::max(m_MaxSize, MIN_MAX_SIZE); } -CSplineTypes::EBoundaryCondition CDecompositionComponent::boundaryCondition() const -{ +CSplineTypes::EBoundaryCondition CDecompositionComponent::boundaryCondition() const { return m_BoundaryCondition; } -CDecompositionComponent::TSplineCRef CDecompositionComponent::valueSpline() const -{ +CDecompositionComponent::TSplineCRef CDecompositionComponent::valueSpline() const { return m_Splines.spline(CPackedSplines::E_Value); } -CDecompositionComponent::TSplineCRef CDecompositionComponent::varianceSpline() const -{ +CDecompositionComponent::TSplineCRef CDecompositionComponent::varianceSpline() const { return m_Splines.spline(CPackedSplines::E_Variance); } -uint64_t CDecompositionComponent::checksum(uint64_t seed) const -{ +uint64_t CDecompositionComponent::checksum(uint64_t seed) const { seed = CChecksum::calculate(seed, m_MaxSize); seed = CChecksum::calculate(seed, m_BoundaryCondition); seed = CChecksum::calculate(seed, m_Splines); @@ -269,8 +224,7 @@ uint64_t CDecompositionComponent::checksum(uint64_t seed) const return CChecksum::calculate(seed, m_MeanVariance); } -const CDecompositionComponent::CPackedSplines &CDecompositionComponent::splines() const -{ +const CDecompositionComponent::CPackedSplines& CDecompositionComponent::splines() const { return m_Splines; } @@ -279,51 +233,43 @@ const std::size_t CDecompositionComponent::MIN_MAX_SIZE{1u}; ////// CDecompositionComponent::CPackedSplines ////// CDecompositionComponent::CPackedSplines::CPackedSplines(CSplineTypes::EType valueInterpolationType, - CSplineTypes::EType varianceInterpolationType) -{ + CSplineTypes::EType varianceInterpolationType) { m_Types[static_cast(E_Value)] = valueInterpolationType; m_Types[static_cast(E_Variance)] = varianceInterpolationType; } bool CDecompositionComponent::CPackedSplines::acceptRestoreTraverser(CSplineTypes::EBoundaryCondition boundary, - core::CStateRestoreTraverser &traverser) -{ + core::CStateRestoreTraverser& traverser) { int estimated{0}; TDoubleVec knots; TDoubleVec values; TDoubleVec variances; - do - { - const std::string &name{traverser.name()}; + do { + const std::string& name{traverser.name()}; RESTORE_BUILT_IN(ESTIMATED_TAG, estimated) RESTORE(KNOTS_TAG, core::CPersistUtils::fromString(traverser.value(), knots)) RESTORE(VALUES_TAG, core::CPersistUtils::fromString(traverser.value(), values)) RESTORE(VARIANCES_TAG, core::CPersistUtils::fromString(traverser.value(), variances)) - } - while (traverser.next()); + } while (traverser.next()); - if (estimated == 1) - { + if (estimated == 1) { this->interpolate(knots, values, variances, boundary); } return true; } -void CDecompositionComponent::CPackedSplines::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CDecompositionComponent::CPackedSplines::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(ESTIMATED_TAG, static_cast(this->initialized())); - if (this->initialized()) - { + if (this->initialized()) { inserter.insertValue(KNOTS_TAG, core::CPersistUtils::toString(m_Knots)); inserter.insertValue(VALUES_TAG, core::CPersistUtils::toString(m_Values[0])); inserter.insertValue(VARIANCES_TAG, core::CPersistUtils::toString(m_Values[1])); } } -void CDecompositionComponent::CPackedSplines::swap(CPackedSplines &other) -{ +void CDecompositionComponent::CPackedSplines::swap(CPackedSplines& other) { std::swap(m_Types, other.m_Types); m_Knots.swap(other.m_Knots); m_Values[0].swap(other.m_Values[0]); @@ -332,61 +278,50 @@ void CDecompositionComponent::CPackedSplines::swap(CPackedSplines &other) m_Curvatures[1].swap(other.m_Curvatures[1]); } -bool CDecompositionComponent::CPackedSplines::initialized() const -{ +bool CDecompositionComponent::CPackedSplines::initialized() const { return m_Knots.size() > 0; } -void CDecompositionComponent::CPackedSplines::clear() -{ +void CDecompositionComponent::CPackedSplines::clear() { this->spline(E_Value).clear(); this->spline(E_Variance).clear(); } -void CDecompositionComponent::CPackedSplines::shift(ESpline spline, double shift) -{ - for (auto &value : m_Values[static_cast(spline)]) - { +void CDecompositionComponent::CPackedSplines::shift(ESpline spline, double shift) { + for (auto& value : m_Values[static_cast(spline)]) { value += shift; } } -CDecompositionComponent::TSplineCRef CDecompositionComponent::CPackedSplines::spline(ESpline spline) const -{ +CDecompositionComponent::TSplineCRef CDecompositionComponent::CPackedSplines::spline(ESpline spline) const { return TSplineCRef(m_Types[static_cast(spline)], boost::cref(m_Knots), boost::cref(m_Values[static_cast(spline)]), boost::cref(m_Curvatures[static_cast(spline)])); } -CDecompositionComponent::TSplineRef CDecompositionComponent::CPackedSplines::spline(ESpline spline) -{ +CDecompositionComponent::TSplineRef CDecompositionComponent::CPackedSplines::spline(ESpline spline) { return TSplineRef(m_Types[static_cast(spline)], boost::ref(m_Knots), boost::ref(m_Values[static_cast(spline)]), boost::ref(m_Curvatures[static_cast(spline)])); } -const CDecompositionComponent::TFloatVec &CDecompositionComponent::CPackedSplines::knots() const -{ +const CDecompositionComponent::TFloatVec& CDecompositionComponent::CPackedSplines::knots() const { return m_Knots; } -void CDecompositionComponent::CPackedSplines::interpolate(const TDoubleVec &knots, - const TDoubleVec &values, - const TDoubleVec &variances, - CSplineTypes::EBoundaryCondition boundary) -{ +void CDecompositionComponent::CPackedSplines::interpolate(const TDoubleVec& knots, + const TDoubleVec& values, + const TDoubleVec& variances, + CSplineTypes::EBoundaryCondition boundary) { CPackedSplines oldSpline{m_Types[0], m_Types[1]}; this->swap(oldSpline); TSplineRef valueSpline{this->spline(E_Value)}; TSplineRef varianceSpline{this->spline(E_Variance)}; - if (!valueSpline.interpolate(knots, values, boundary)) - { + if (!valueSpline.interpolate(knots, values, boundary)) { this->swap(oldSpline); - } - else if (!varianceSpline.interpolate(knots, variances, boundary)) - { + } else if (!varianceSpline.interpolate(knots, variances, boundary)) { this->swap(oldSpline); } LOG_TRACE("types = " << core::CContainerPrinter::print(m_Types)); @@ -395,16 +330,14 @@ void CDecompositionComponent::CPackedSplines::interpolate(const TDoubleVec &knot LOG_TRACE("curvatures = " << core::CContainerPrinter::print(m_Curvatures)); } -uint64_t CDecompositionComponent::CPackedSplines::checksum(uint64_t seed) const -{ +uint64_t CDecompositionComponent::CPackedSplines::checksum(uint64_t seed) const { seed = CChecksum::calculate(seed, m_Types); seed = CChecksum::calculate(seed, m_Knots); seed = CChecksum::calculate(seed, m_Values); return CChecksum::calculate(seed, m_Curvatures); } -void CDecompositionComponent::CPackedSplines::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CDecompositionComponent::CPackedSplines::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CPackedSplines"); core::CMemoryDebug::dynamicSize("m_Knots", m_Knots, mem); core::CMemoryDebug::dynamicSize("m_Values[0]", m_Values[0], mem); @@ -413,8 +346,7 @@ void CDecompositionComponent::CPackedSplines::debugMemoryUsage(core::CMemoryUsag core::CMemoryDebug::dynamicSize("m_Curvatures[1]", m_Curvatures[1], mem); } -std::size_t CDecompositionComponent::CPackedSplines::memoryUsage() const -{ +std::size_t CDecompositionComponent::CPackedSplines::memoryUsage() const { std::size_t mem{core::CMemory::dynamicSize(m_Knots)}; mem += core::CMemory::dynamicSize(m_Values[0]); mem += core::CMemory::dynamicSize(m_Values[1]); @@ -422,6 +354,5 @@ std::size_t CDecompositionComponent::CPackedSplines::memoryUsage() const mem += core::CMemory::dynamicSize(m_Curvatures[1]); return mem; } - } } diff --git a/lib/maths/CEntropySketch.cc b/lib/maths/CEntropySketch.cc index 3a1251d041..93b27d367d 100644 --- a/lib/maths/CEntropySketch.cc +++ b/lib/maths/CEntropySketch.cc @@ -16,50 +16,40 @@ #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { -CEntropySketch::CEntropySketch(std::size_t k) : m_Y(0), m_Yi(k, 0.0) -{ +CEntropySketch::CEntropySketch(std::size_t k) : m_Y(0), m_Yi(k, 0.0) { } -void CEntropySketch::add(std::size_t category, uint64_t count) -{ +void CEntropySketch::add(std::size_t category, uint64_t count) { m_Y += count; TDoubleVec projection; this->generateProjection(category, projection); - for (std::size_t i = 0u; i < projection.size(); ++i) - { + for (std::size_t i = 0u; i < projection.size(); ++i) { m_Yi[i] += projection[i] * static_cast(count); } } -double CEntropySketch::calculate() const -{ +double CEntropySketch::calculate() const { double h = 0.0; - for (std::size_t i = 0u; i < m_Yi.size(); ++i) - { + for (std::size_t i = 0u; i < m_Yi.size(); ++i) { h += std::exp(m_Yi[i] / static_cast(m_Y)); } return -std::log(h / static_cast(m_Yi.size())); } -void CEntropySketch::generateProjection(std::size_t category, TDoubleVec &projection) -{ +void CEntropySketch::generateProjection(std::size_t category, TDoubleVec& projection) { CPRNG::CXorOShiro128Plus rng(category); CSampling::uniformSample(rng, 0.0, 1.0, 2 * m_Yi.size(), projection); - for (std::size_t i = 0u; i < projection.size(); i += 2) - { + for (std::size_t i = 0u; i < projection.size(); i += 2) { double w1 = boost::math::double_constants::pi * (projection[i] - 0.5); - double w2 = -std::log(projection[i+1]); - projection[i / 2] = std::tan(w1) * (boost::math::double_constants::half_pi - w1) - + std::log(w2 * std::cos(w1) / (boost::math::double_constants::half_pi - w1)); + double w2 = -std::log(projection[i + 1]); + projection[i / 2] = std::tan(w1) * (boost::math::double_constants::half_pi - w1) + + std::log(w2 * std::cos(w1) / (boost::math::double_constants::half_pi - w1)); } projection.resize(m_Yi.size()); LOG_TRACE("projection = " << core::CContainerPrinter::print(projection)); } - } } diff --git a/lib/maths/CExpandingWindow.cc b/lib/maths/CExpandingWindow.cc index b565c06a5d..154b306fbc 100644 --- a/lib/maths/CExpandingWindow.cc +++ b/lib/maths/CExpandingWindow.cc @@ -18,93 +18,73 @@ #include #include -namespace ml -{ -namespace maths -{ -namespace -{ +namespace ml { +namespace maths { +namespace { const std::string BUCKET_LENGTH_INDEX_TAG("a"); const std::string BUCKET_VALUES_TAG("b"); const std::string START_TIME_TAG("c"); } -CExpandingWindow::CExpandingWindow(core_t::TTime bucketLength, - TTimeCRng bucketLengths, - std::size_t size, - double decayRate) : - m_DecayRate(decayRate), - m_BucketLength(bucketLength), - m_BucketLengths(bucketLengths), - m_BucketLengthIndex(0), - m_StartTime(boost::numeric::bounds::lowest()), - m_BucketValues(size % 2 == 0 ? size : size + 1) -{} - -bool CExpandingWindow::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ +CExpandingWindow::CExpandingWindow(core_t::TTime bucketLength, TTimeCRng bucketLengths, std::size_t size, double decayRate) + : m_DecayRate(decayRate), + m_BucketLength(bucketLength), + m_BucketLengths(bucketLengths), + m_BucketLengthIndex(0), + m_StartTime(boost::numeric::bounds::lowest()), + m_BucketValues(size % 2 == 0 ? size : size + 1) { +} + +bool CExpandingWindow::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { m_BucketValues.clear(); - do - { - const std::string &name = traverser.name(); + do { + const std::string& name = traverser.name(); RESTORE_BUILT_IN(BUCKET_LENGTH_INDEX_TAG, m_BucketLengthIndex) RESTORE_BUILT_IN(START_TIME_TAG, m_StartTime) RESTORE(BUCKET_VALUES_TAG, core::CPersistUtils::restore(BUCKET_VALUES_TAG, m_BucketValues, traverser)); - } - while (traverser.next()); + } while (traverser.next()); return true; } -void CExpandingWindow::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CExpandingWindow::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(BUCKET_LENGTH_INDEX_TAG, m_BucketLengthIndex); inserter.insertValue(START_TIME_TAG, m_StartTime); core::CPersistUtils::persist(BUCKET_VALUES_TAG, m_BucketValues, inserter); } -core_t::TTime CExpandingWindow::startTime() const -{ +core_t::TTime CExpandingWindow::startTime() const { return m_StartTime; } -core_t::TTime CExpandingWindow::endTime() const -{ - return m_StartTime + ( static_cast(m_BucketValues.size()) - * m_BucketLengths[m_BucketLengthIndex]); +core_t::TTime CExpandingWindow::endTime() const { + return m_StartTime + (static_cast(m_BucketValues.size()) * m_BucketLengths[m_BucketLengthIndex]); } -core_t::TTime CExpandingWindow::bucketLength() const -{ +core_t::TTime CExpandingWindow::bucketLength() const { return m_BucketLengths[m_BucketLengthIndex]; } -const CExpandingWindow::TFloatMeanAccumulatorVec &CExpandingWindow::values() const -{ +const CExpandingWindow::TFloatMeanAccumulatorVec& CExpandingWindow::values() const { return m_BucketValues; } -CExpandingWindow::TFloatMeanAccumulatorVec CExpandingWindow::valuesMinusPrediction(const TPredictor &predictor) const -{ +CExpandingWindow::TFloatMeanAccumulatorVec CExpandingWindow::valuesMinusPrediction(const TPredictor& predictor) const { core_t::TTime start{CIntegerTools::floor(this->startTime(), m_BucketLength)}; core_t::TTime end{CIntegerTools::ceil(this->endTime(), m_BucketLength)}; core_t::TTime size{static_cast(m_BucketValues.size())}; core_t::TTime offset{static_cast(CBasicStatistics::mean(m_MeanOffset) + 0.5)}; TFloatMeanAccumulatorVec predictions(size); - for (core_t::TTime time = start + offset; time < end; time += m_BucketLength) - { + for (core_t::TTime time = start + offset; time < end; time += m_BucketLength) { core_t::TTime bucket{(time - start) / m_BucketLengths[m_BucketLengthIndex]}; - if (bucket >= 0 && bucket < size) - { + if (bucket >= 0 && bucket < size) { predictions[bucket].add(predictor(time)); } } TFloatMeanAccumulatorVec result(m_BucketValues); - for (core_t::TTime i = 0; i < size; ++i) - { - if (CBasicStatistics::count(result[i]) > 0.0) - { + for (core_t::TTime i = 0; i < size; ++i) { + if (CBasicStatistics::count(result[i]) > 0.0) { CBasicStatistics::moment<0>(result[i]) -= CBasicStatistics::mean(predictions[i]); } } @@ -112,46 +92,33 @@ CExpandingWindow::TFloatMeanAccumulatorVec CExpandingWindow::valuesMinusPredicti return result; } -void CExpandingWindow::initialize(core_t::TTime time) -{ +void CExpandingWindow::initialize(core_t::TTime time) { m_StartTime = CIntegerTools::floor(time, m_BucketLengths[0]); } -void CExpandingWindow::propagateForwardsByTime(double time) -{ - if (!CMathsFuncs::isFinite(time) || time < 0.0) - { +void CExpandingWindow::propagateForwardsByTime(double time) { + if (!CMathsFuncs::isFinite(time) || time < 0.0) { LOG_ERROR("Bad propagation time " << time); } double factor = std::exp(-m_DecayRate * time); - for (auto &value : m_BucketValues) - { + for (auto& value : m_BucketValues) { value.age(factor); } } -void CExpandingWindow::add(core_t::TTime time, double value, double weight) -{ - if (time >= m_StartTime) - { - while (this->needToCompress(time)) - { +void CExpandingWindow::add(core_t::TTime time, double value, double weight) { + if (time >= m_StartTime) { + while (this->needToCompress(time)) { m_BucketLengthIndex = (m_BucketLengthIndex + 1) % m_BucketLengths.size(); auto end = m_BucketValues.begin(); - if (m_BucketLengthIndex == 0) - { + if (m_BucketLengthIndex == 0) { m_StartTime = CIntegerTools::floor(time, m_BucketLengths[0]); - } - else - { - std::size_t compression = m_BucketLengths[m_BucketLengthIndex] - / m_BucketLengths[m_BucketLengthIndex - 1]; - for (std::size_t i = 0u; i < m_BucketValues.size(); i += compression, ++end) - { + } else { + std::size_t compression = m_BucketLengths[m_BucketLengthIndex] / m_BucketLengths[m_BucketLengthIndex - 1]; + for (std::size_t i = 0u; i < m_BucketValues.size(); i += compression, ++end) { std::swap(*end, m_BucketValues[i]); - for (std::size_t j = 1u; j < compression && i + j < m_BucketValues.size(); ++j) - { + for (std::size_t j = 1u; j < compression && i + j < m_BucketValues.size(); ++j) { *end += m_BucketValues[i + j]; } } @@ -164,28 +131,23 @@ void CExpandingWindow::add(core_t::TTime time, double value, double weight) } } -bool CExpandingWindow::needToCompress(core_t::TTime time) const -{ +bool CExpandingWindow::needToCompress(core_t::TTime time) const { return time >= this->endTime(); } -uint64_t CExpandingWindow::checksum(uint64_t seed) const -{ +uint64_t CExpandingWindow::checksum(uint64_t seed) const { seed = CChecksum::calculate(seed, m_BucketLengthIndex); seed = CChecksum::calculate(seed, m_StartTime); return CChecksum::calculate(seed, m_BucketValues); } -void CExpandingWindow::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CExpandingWindow::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CScanningPeriodicityTest"); core::CMemoryDebug::dynamicSize("m_BucketValues", m_BucketValues, mem); } -std::size_t CExpandingWindow::memoryUsage() const -{ +std::size_t CExpandingWindow::memoryUsage() const { return core::CMemory::dynamicSize(m_BucketValues); } - } } diff --git a/lib/maths/CGammaRateConjugate.cc b/lib/maths/CGammaRateConjugate.cc index 94868c5b27..2ebb55aa5d 100644 --- a/lib/maths/CGammaRateConjugate.cc +++ b/lib/maths/CGammaRateConjugate.cc @@ -6,8 +6,8 @@ #include -#include #include +#include #include #include #include @@ -16,10 +16,10 @@ #include #include #include -#include #include #include #include +#include #include #include #include @@ -39,15 +39,11 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { -namespace -{ -namespace detail -{ +namespace { +namespace detail { using TDoubleDoublePr = std::pair; using TWeightStyleVec = maths_t::TWeightStyleVec; @@ -60,10 +56,8 @@ using TMeanVarAccumulator = CBasicStatistics::SSampleMeanVar 1.5) - { +void truncateVariance(bool isInteger, TMeanAccumulator& logMean, TMeanVarAccumulator& moments) { + if (CBasicStatistics::count(moments) > 1.5) { // The idea is to model the impact of a small coefficient of variation // on the variance and the log of samples samples. Note that: // mean(log(x'(i)) = 1/n * Sum_i{ log(m + (x'(i) - m)) } @@ -114,8 +104,7 @@ void truncateVariance(bool isInteger, double sampleMean = std::max(std::fabs(CBasicStatistics::mean(moments)), 1e-8); double cov = sampleDeviation / sampleMean; double covMin = minimumCoefficientOfVariation(isInteger, sampleMean); - if (cov < covMin) - { + if (cov < covMin) { double extraDeviation = sampleMean * (covMin - cov); moments.s_Moments[1] += extraDeviation * extraDeviation; } @@ -127,25 +116,15 @@ void truncateVariance(bool isInteger, //! Computes the derivative w.r.t. the shape of the marginal likelihood //! function for gamma distributed data with known prior for the rate. -class CLikelihoodDerivativeFunction : public std::unary_function -{ - public: - CLikelihoodDerivativeFunction(double numberSamples, - double target) : - m_NumberSamples(numberSamples), - m_Target(target) - {} - - double operator()(double x) const - { - return boost::math::digamma(m_NumberSamples * x) - - boost::math::digamma(x) - - m_Target; - } +class CLikelihoodDerivativeFunction : public std::unary_function { +public: + CLikelihoodDerivativeFunction(double numberSamples, double target) : m_NumberSamples(numberSamples), m_Target(target) {} + + double operator()(double x) const { return boost::math::digamma(m_NumberSamples * x) - boost::math::digamma(x) - m_Target; } - private: - double m_NumberSamples; - double m_Target; +private: + double m_NumberSamples; + double m_Target; }; //! Compute the maximum likelihood posterior shape if possible otherwise @@ -161,13 +140,11 @@ class CLikelihoodDerivativeFunction : public std::unary_function //! \param[in] newMoments The mean and variance of the all previous samples //! plus new samples to be incorporated into the estimate. double maximumLikelihoodShape(double oldShape, - const TMeanAccumulator &oldLogMean, - const TMeanAccumulator &newLogMean, - const TMeanVarAccumulator &oldMoments, - const TMeanVarAccumulator &newMoments) -{ - if (CBasicStatistics::count(newMoments) < NON_INFORMATIVE_COUNT) - { + const TMeanAccumulator& oldLogMean, + const TMeanAccumulator& newLogMean, + const TMeanVarAccumulator& oldMoments, + const TMeanVarAccumulator& newMoments) { + if (CBasicStatistics::count(newMoments) < NON_INFORMATIVE_COUNT) { return oldShape; } @@ -187,32 +164,28 @@ double maximumLikelihoodShape(double oldShape, double oldMean = CBasicStatistics::mean(oldMoments); double oldTarget = 0.0; - if (oldNumber * oldMean > 0.0) - { + if (oldNumber * oldMean > 0.0) { oldTarget = std::log(oldNumber * oldMean) - CBasicStatistics::mean(oldLogMean); } double newNumber = CBasicStatistics::count(newMoments); double newMean = CBasicStatistics::mean(newMoments); - if (newNumber * newMean == 0.0) - { + if (newNumber * newMean == 0.0) { return 0.0; } double target = std::log(newNumber * newMean) - CBasicStatistics::mean(newLogMean); // Fall back to method of moments if maximum-likelihood fails. double bestGuess = 1.0; - if (CBasicStatistics::variance(newMoments) > 0.0) - { + if (CBasicStatistics::variance(newMoments) > 0.0) { bestGuess = newMean * newMean / CBasicStatistics::variance(newMoments); } // If we've estimated the shape before the old shape will typically // be a very good initial estimate. Otherwise, use the best guess. double x0 = bestGuess; - if (oldNumber > NON_INFORMATIVE_COUNT) - { + if (oldNumber > NON_INFORMATIVE_COUNT) { x0 = oldShape; } @@ -221,15 +194,13 @@ double maximumLikelihoodShape(double oldShape, double downFactor = 0.8; double upFactor = 1.4; - if (oldNumber > NON_INFORMATIVE_COUNT) - { + if (oldNumber > NON_INFORMATIVE_COUNT) { // Compute, very approximately, minus the gradient of the function // at the old shape. We just use the chord from the origin to the // target value and truncate its value so the bracketing loop is // well behaved. double gradient = 1.0; - if (oldShape > 0.0) - { + if (oldShape > 0.0) { gradient = CTools::truncate(oldTarget / oldShape, EPS, 1.0); } @@ -237,24 +208,18 @@ double maximumLikelihoodShape(double oldShape, // in one iteration and not overshoot too much. Again we truncate // the values so that bracketing loop is well behaved. double dTarget = std::fabs(target - oldTarget); - downFactor = CTools::truncate(1.0 - 2.0 * dTarget / gradient, - MIN_DOWN_FACTOR, - 1.0 - EPS); - upFactor = CTools::truncate(1.0 + 2.0 * dTarget / gradient, - 1.0 + EPS, - MAX_UP_FACTOR); + downFactor = CTools::truncate(1.0 - 2.0 * dTarget / gradient, MIN_DOWN_FACTOR, 1.0 - EPS); + upFactor = CTools::truncate(1.0 + 2.0 * dTarget / gradient, 1.0 + EPS, MAX_UP_FACTOR); } CLikelihoodDerivativeFunction derivative(newNumber, target); double f0 = 0.0; TDoubleDoublePr fBracket(f0, f0); - try - { + try { fBracket.first = fBracket.second = f0 = derivative(x0); - if (f0 == 0.0) - { + if (f0 == 0.0) { // We're done. return x0; } @@ -265,10 +230,8 @@ double maximumLikelihoodShape(double oldShape, // of the likelihood derivative function across a range of different // process gamma shapes and rates. In particular, the mean total // number of evaluations used by this function is around five. - for (/**/; maxIterations > 0; --maxIterations) - { - if (fBracket.first < 0.0) - { + for (/**/; maxIterations > 0; --maxIterations) { + if (fBracket.first < 0.0) { bracket.second = bracket.first; fBracket.second = fBracket.first; @@ -276,9 +239,7 @@ double maximumLikelihoodShape(double oldShape, fBracket.first = derivative(bracket.first); downFactor = std::max(0.8 * downFactor, MIN_DOWN_FACTOR); - } - else if (fBracket.second > 0.0) - { + } else if (fBracket.second > 0.0) { bracket.first = bracket.second; fBracket.first = fBracket.second; @@ -286,74 +247,38 @@ double maximumLikelihoodShape(double oldShape, fBracket.second = derivative(bracket.second); upFactor = std::min(1.4 * upFactor, MAX_UP_FACTOR); - } - else - { + } else { break; } } - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to bracket root: " << e.what() - << ", newNumber = " << newNumber - << ", newMean = " << newMean - << ", newLogMean = " << newLogMean - << ", x0 = " << x0 - << ", f(x0) = " << f0 - << ", bracket = " << core::CContainerPrinter::print(bracket) - << ", f(bracket) = " << core::CContainerPrinter::print(fBracket) - << ", bestGuess = " << bestGuess); + } catch (const std::exception& e) { + LOG_ERROR("Failed to bracket root: " << e.what() << ", newNumber = " << newNumber << ", newMean = " << newMean + << ", newLogMean = " << newLogMean << ", x0 = " << x0 << ", f(x0) = " << f0 + << ", bracket = " << core::CContainerPrinter::print(bracket) << ", f(bracket) = " + << core::CContainerPrinter::print(fBracket) << ", bestGuess = " << bestGuess); return bestGuess; } - if (maxIterations == 0) - { + if (maxIterations == 0) { LOG_TRACE("Failed to bracket root:" - << " newNumber = " << newNumber - << ", newMean = " << newMean - << ", newLogMean = " << newLogMean - << ", x0 = " << x0 - << ", f(x0) = " << f0 - << ", bracket = " << core::CContainerPrinter::print(bracket) - << ", f(bracket) = " << core::CContainerPrinter::print(fBracket) - << ", bestGuess = " << bestGuess); + << " newNumber = " << newNumber << ", newMean = " << newMean << ", newLogMean = " << newLogMean << ", x0 = " << x0 + << ", f(x0) = " << f0 << ", bracket = " << core::CContainerPrinter::print(bracket) + << ", f(bracket) = " << core::CContainerPrinter::print(fBracket) << ", bestGuess = " << bestGuess); return bestGuess; } - LOG_TRACE("newNumber = " << newNumber - << ", newMean = " << newMean - << ", newLogMean = " << newLogMean - << ", oldTarget = " << oldTarget - << ", target = " << target - << ", upFactor = " << upFactor - << ", downFactor = " << downFactor - << ", x0 = " << x0 - << ", f(x0) = " << f0 - << ", bracket = " << core::CContainerPrinter::print(bracket) - << ", f(bracket) = " << core::CContainerPrinter::print(fBracket)); - - try - { + LOG_TRACE("newNumber = " << newNumber << ", newMean = " << newMean << ", newLogMean = " << newLogMean << ", oldTarget = " << oldTarget + << ", target = " << target << ", upFactor = " << upFactor << ", downFactor = " << downFactor << ", x0 = " << x0 + << ", f(x0) = " << f0 << ", bracket = " << core::CContainerPrinter::print(bracket) + << ", f(bracket) = " << core::CContainerPrinter::print(fBracket)); + + try { CEqualWithTolerance tolerance(CToleranceTypes::E_AbsoluteTolerance, EPS * x0); - CSolvers::solve(bracket.first, - bracket.second, - fBracket.first, - fBracket.second, - derivative, - maxIterations, - tolerance, - bestGuess); - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to solve: " << e.what() - << ", newNumber = " << newNumber - << ", x0 = " << x0 - << ", f(x0) = " << f0 - << ", bracket = " << core::CContainerPrinter::print(bracket) - << ", f(bracket) = " << core::CContainerPrinter::print(fBracket) - << ", bestGuess = " << bestGuess); + CSolvers::solve(bracket.first, bracket.second, fBracket.first, fBracket.second, derivative, maxIterations, tolerance, bestGuess); + } catch (const std::exception& e) { + LOG_ERROR("Failed to solve: " << e.what() << ", newNumber = " << newNumber << ", x0 = " << x0 << ", f(x0) = " << f0 + << ", bracket = " << core::CContainerPrinter::print(bracket) + << ", f(bracket) = " << core::CContainerPrinter::print(fBracket) << ", bestGuess = " << bestGuess); return bestGuess; } @@ -362,16 +287,9 @@ double maximumLikelihoodShape(double oldShape, return (bracket.first + bracket.second) / 2.0; } - //! Adds "weight" x "right operand" to the "left operand". -struct SPlusWeight -{ - double operator()(double lhs, - double rhs, - double weight = 1.0) const - { - return lhs + weight * rhs; - } +struct SPlusWeight { + double operator()(double lhs, double rhs, double weight = 1.0) const { return lhs + weight * rhs; } }; //! Evaluate \p func on the joint predictive distribution for \p samples @@ -395,9 +313,9 @@ struct SPlusWeight //! of the likelihood for \p samples. //! \param[out] result Filled in with the aggregation of results of \p func. template -bool evaluateFunctionOnJointDistribution(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, +bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, FUNC func, AGGREGATOR aggregate, bool isNonInformative, @@ -405,12 +323,10 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec &weightStyles, double likelihoodShape, double priorShape, double priorRate, - RESULT &result) -{ + RESULT& result) { result = RESULT(); - if (samples.empty()) - { + if (samples.empty()) { LOG_ERROR("Can't compute distribution for empty sample set"); return false; } @@ -430,27 +346,19 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec &weightStyles, static const double MINIMUM_GAMMA_SHAPE = 100.0; - LOG_TRACE("likelihoodShape = " << likelihoodShape - << ", priorShape = " << priorShape - << ", priorRate = " << priorRate); + LOG_TRACE("likelihoodShape = " << likelihoodShape << ", priorShape = " << priorShape << ", priorRate = " << priorRate); - try - { - if (isNonInformative) - { + try { + if (isNonInformative) { // The non-informative prior is improper and effectively zero // everywhere. (It is acceptable to approximate all finite samples // as at the median of this distribution.) - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { double n = maths_t::count(weightStyles, weights[i]); double x = samples[i] + offset; result = aggregate(result, func(CTools::SImproperDistribution(), x), n); } - } - else if (priorShape > 2 - && priorShape > likelihoodShape * MINIMUM_GAMMA_SHAPE) - { + } else if (priorShape > 2 && priorShape > likelihoodShape * MINIMUM_GAMMA_SHAPE) { // The marginal likelihood is well approximated by a moment matched // gamma distribution. By considering: // E[ E[X | a, b] ] = E[ a' / B ] @@ -475,15 +383,14 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec &weightStyles, double rate = (priorShape - 2.0) / priorRate; LOG_TRACE("shape = " << shape << ", rate = " << rate); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { // We assume the data are described by X = Y - u where, Y is // gamma distributed and u is a constant offset. This means // that {x(i) + u} are gamma distributed. double n = maths_t::count(weightStyles, weights[i]); - double varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights[i]) - * maths_t::countVarianceScale(weightStyles, weights[i]); + double varianceScale = + maths_t::seasonalVarianceScale(weightStyles, weights[i]) * maths_t::countVarianceScale(weightStyles, weights[i]); double x = samples[i] + offset; LOG_TRACE("x = " << x); @@ -494,9 +401,7 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec &weightStyles, result = aggregate(result, func(gamma, x), n); } - } - else - { + } else { // We use the fact that the random variable is Z = X / (b + X) is // beta distributed with parameters alpha equal to likelihoodShape // and beta equal to priorShape. Therefore, we can compute the @@ -505,15 +410,14 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec &weightStyles, // // and then using the beta distribution. - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { // We assume the data are described by X = Y - u where, Y is // gamma distributed and u is a constant offset. This means // that {x(i) + u} are gamma distributed. double n = maths_t::count(weightStyles, weights[i]); - double varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights[i]) - * maths_t::countVarianceScale(weightStyles, weights[i]); + double varianceScale = + maths_t::seasonalVarianceScale(weightStyles, weights[i]) * maths_t::countVarianceScale(weightStyles, weights[i]); double x = samples[i] + offset; double scaledLikelihoodShape = likelihoodShape / varianceScale; double scaledPriorRate = varianceScale * priorRate; @@ -524,15 +428,10 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec &weightStyles, result = aggregate(result, func(beta, z), n); } } - } - catch (const std::exception &e) - { - LOG_ERROR("Error calculating joint distribution: " << e.what() - << ", offset = " << offset - << ", likelihoodShape = " << likelihoodShape - << ", priorShape = " << priorShape - << ", priorRate = " << priorRate - << ", samples = " << core::CContainerPrinter::print(samples)); + } catch (const std::exception& e) { + LOG_ERROR("Error calculating joint distribution: " + << e.what() << ", offset = " << offset << ", likelihoodShape = " << likelihoodShape << ", priorShape = " << priorShape + << ", priorRate = " << priorRate << ", samples = " << core::CContainerPrinter::print(samples)); return false; } @@ -548,53 +447,48 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec &weightStyles, //! so that it can be integrated over the hidden variable representing the //! actual value of a discrete datum which we assume is in the interval [n, n+1]. template -class CEvaluateOnSamples : core::CNonCopyable -{ - public: - CEvaluateOnSamples(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - bool isNonInformative, - double offset, - double likelihoodShape, - double priorShape, - double priorRate) : - m_WeightStyles(weightStyles), - m_Samples(samples), - m_Weights(weights), - m_IsNonInformative(isNonInformative), - m_Offset(offset), - m_LikelihoodShape(likelihoodShape), - m_PriorShape(priorShape), - m_PriorRate(priorRate) - { - } - - bool operator()(double x, double &result) const - { - return evaluateFunctionOnJointDistribution(m_WeightStyles, - m_Samples, - m_Weights, - F(), - SPlusWeight(), - m_IsNonInformative, - m_Offset + x, - m_LikelihoodShape, - m_PriorShape, - m_PriorRate, - result); - - } - - private: - const TWeightStyleVec &m_WeightStyles; - const TDouble1Vec &m_Samples; - const TDouble4Vec1Vec &m_Weights; - bool m_IsNonInformative; - double m_Offset; - double m_LikelihoodShape; - double m_PriorShape; - double m_PriorRate; +class CEvaluateOnSamples : core::CNonCopyable { +public: + CEvaluateOnSamples(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + bool isNonInformative, + double offset, + double likelihoodShape, + double priorShape, + double priorRate) + : m_WeightStyles(weightStyles), + m_Samples(samples), + m_Weights(weights), + m_IsNonInformative(isNonInformative), + m_Offset(offset), + m_LikelihoodShape(likelihoodShape), + m_PriorShape(priorShape), + m_PriorRate(priorRate) {} + + bool operator()(double x, double& result) const { + return evaluateFunctionOnJointDistribution(m_WeightStyles, + m_Samples, + m_Weights, + F(), + SPlusWeight(), + m_IsNonInformative, + m_Offset + x, + m_LikelihoodShape, + m_PriorShape, + m_PriorRate, + result); + } + +private: + const TWeightStyleVec& m_WeightStyles; + const TDouble1Vec& m_Samples; + const TDouble4Vec1Vec& m_Weights; + bool m_IsNonInformative; + double m_Offset; + double m_LikelihoodShape; + double m_PriorShape; + double m_PriorRate; }; //! Computes the probability of seeing less likely samples at a specified offset. @@ -602,75 +496,67 @@ class CEvaluateOnSamples : core::CNonCopyable //! This thin wrapper around the evaluateFunctionOnJointDistribution function //! so that it can be integrated over the hidden variable representing the //! actual value of a discrete datum which we assume is in the interval [n, n+1]. -class CProbabilityOfLessLikelySamples : core::CNonCopyable -{ - public: - CProbabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - bool isNonInformative, - double offset, - double likelihoodShape, - double priorShape, - double priorRate) : - m_Calculation(calculation), - m_WeightStyles(weightStyles), - m_Samples(samples), - m_Weights(weights), - m_IsNonInformative(isNonInformative), - m_Offset(offset), - m_LikelihoodShape(likelihoodShape), - m_PriorShape(priorShape), - m_PriorRate(priorRate), - m_Tail(0) - { +class CProbabilityOfLessLikelySamples : core::CNonCopyable { +public: + CProbabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, + const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + bool isNonInformative, + double offset, + double likelihoodShape, + double priorShape, + double priorRate) + : m_Calculation(calculation), + m_WeightStyles(weightStyles), + m_Samples(samples), + m_Weights(weights), + m_IsNonInformative(isNonInformative), + m_Offset(offset), + m_LikelihoodShape(likelihoodShape), + m_PriorShape(priorShape), + m_PriorRate(priorRate), + m_Tail(0) {} + + bool operator()(double x, double& result) const { + CJointProbabilityOfLessLikelySamples probability; + maths_t::ETail tail = maths_t::E_UndeterminedTail; + + if (!evaluateFunctionOnJointDistribution( + m_WeightStyles, + m_Samples, + m_Weights, + boost::bind(CTools::CProbabilityOfLessLikelySample(m_Calculation), _1, _2, boost::ref(tail)), + CJointProbabilityOfLessLikelySamples::SAddProbability(), + m_IsNonInformative, + m_Offset + x, + m_LikelihoodShape, + m_PriorShape, + m_PriorRate, + probability) || + !probability.calculate(result)) { + LOG_ERROR("Failed to compute probability of less likely samples"); + return false; } - bool operator()(double x, double &result) const - { - CJointProbabilityOfLessLikelySamples probability; - maths_t::ETail tail = maths_t::E_UndeterminedTail; - - if ( !evaluateFunctionOnJointDistribution(m_WeightStyles, - m_Samples, - m_Weights, - boost::bind(CTools::CProbabilityOfLessLikelySample(m_Calculation), - _1, _2, boost::ref(tail)), - CJointProbabilityOfLessLikelySamples::SAddProbability(), - m_IsNonInformative, - m_Offset + x, - m_LikelihoodShape, - m_PriorShape, - m_PriorRate, - probability) - || !probability.calculate(result)) - { - LOG_ERROR("Failed to compute probability of less likely samples"); - return false; - } - - m_Tail = m_Tail | tail; + m_Tail = m_Tail | tail; - return true; - } + return true; + } - maths_t::ETail tail() const - { - return static_cast(m_Tail); - } + maths_t::ETail tail() const { return static_cast(m_Tail); } - private: - maths_t::EProbabilityCalculation m_Calculation; - const TWeightStyleVec &m_WeightStyles; - const TDouble1Vec &m_Samples; - const TDouble4Vec1Vec &m_Weights; - bool m_IsNonInformative; - double m_Offset; - double m_LikelihoodShape; - double m_PriorShape; - double m_PriorRate; - mutable int m_Tail; +private: + maths_t::EProbabilityCalculation m_Calculation; + const TWeightStyleVec& m_WeightStyles; + const TDouble1Vec& m_Samples; + const TDouble4Vec1Vec& m_Weights; + bool m_IsNonInformative; + double m_Offset; + double m_LikelihoodShape; + double m_PriorShape; + double m_PriorRate; + mutable int m_Tail; }; //! Compute the joint marginal log likelihood function of a collection @@ -690,164 +576,132 @@ class CProbabilityOfLessLikelySamples : core::CNonCopyable //! n = |x| the number of elements in the sample vector. //! a' is the (maximum) likelihood shape of the gamma process. //! a and b are the prior gamma shape and rate, respectively. -class CLogMarginalLikelihood : core::CNonCopyable -{ - public: - CLogMarginalLikelihood(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double offset, - double likelihoodShape, - double priorShape, - double priorRate) : - m_WeightStyles(weightStyles), - m_Samples(samples), - m_Weights(weights), - m_Offset(offset), - m_LikelihoodShape(likelihoodShape), - m_PriorShape(priorShape), - m_PriorRate(priorRate), - m_NumberSamples(0.0), - m_ImpliedShape(0.0), - m_Constant(0.0), - m_ErrorStatus(maths_t::E_FpNoErrors) - { - this->precompute(); +class CLogMarginalLikelihood : core::CNonCopyable { +public: + CLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double offset, + double likelihoodShape, + double priorShape, + double priorRate) + : m_WeightStyles(weightStyles), + m_Samples(samples), + m_Weights(weights), + m_Offset(offset), + m_LikelihoodShape(likelihoodShape), + m_PriorShape(priorShape), + m_PriorRate(priorRate), + m_NumberSamples(0.0), + m_ImpliedShape(0.0), + m_Constant(0.0), + m_ErrorStatus(maths_t::E_FpNoErrors) { + this->precompute(); + } + + //! Evaluate the log marginal likelihood at the offset \p x. + bool operator()(double x, double& result) const { + if (m_ErrorStatus & maths_t::E_FpFailed) { + return false; } - //! Evaluate the log marginal likelihood at the offset \p x. - bool operator()(double x, double &result) const - { - if (m_ErrorStatus & maths_t::E_FpFailed) - { - return false; - } - - double logSamplesSum = 0.0; - double sampleSum = 0.0; - double logSeasonalScaleSum = 0.0; - - try - { - for (std::size_t i = 0u; i < m_Samples.size(); ++i) - { - double n = maths_t::countForUpdate(m_WeightStyles, m_Weights[i]); - double varianceScale = maths_t::seasonalVarianceScale(m_WeightStyles, m_Weights[i]) - * maths_t::countVarianceScale(m_WeightStyles, m_Weights[i]); - - double sample = m_Samples[i] + x + m_Offset; - - if (sample <= 0.0) - { - // Technically, the marginal likelihood is zero here - // so the log would be infinite. We use minus max - // double because log(0) = HUGE_VALUE, which causes - // problems for Windows. Calling code is notified - // when the calculation overflows and should avoid - // taking the exponential since this will underflow - // and pollute the floating point environment. This - // may cause issues for some library function - // implementations (see fe*exceptflag for more details). - result = boost::numeric::bounds::lowest(); - this->addErrorStatus(maths_t::E_FpOverflowed); - return false; - } - logSamplesSum += n * (m_LikelihoodShape / varianceScale - 1.0) * std::log(sample); - sampleSum += n / varianceScale * sample; + double logSamplesSum = 0.0; + double sampleSum = 0.0; + double logSeasonalScaleSum = 0.0; + + try { + for (std::size_t i = 0u; i < m_Samples.size(); ++i) { + double n = maths_t::countForUpdate(m_WeightStyles, m_Weights[i]); + double varianceScale = maths_t::seasonalVarianceScale(m_WeightStyles, m_Weights[i]) * + maths_t::countVarianceScale(m_WeightStyles, m_Weights[i]); + + double sample = m_Samples[i] + x + m_Offset; + + if (sample <= 0.0) { + // Technically, the marginal likelihood is zero here + // so the log would be infinite. We use minus max + // double because log(0) = HUGE_VALUE, which causes + // problems for Windows. Calling code is notified + // when the calculation overflows and should avoid + // taking the exponential since this will underflow + // and pollute the floating point environment. This + // may cause issues for some library function + // implementations (see fe*exceptflag for more details). + result = boost::numeric::bounds::lowest(); + this->addErrorStatus(maths_t::E_FpOverflowed); + return false; } + logSamplesSum += n * (m_LikelihoodShape / varianceScale - 1.0) * std::log(sample); + sampleSum += n / varianceScale * sample; } - catch (const std::exception &e) - { - LOG_ERROR("Failed to calculate likelihood: " << e.what()); - this->addErrorStatus(maths_t::E_FpFailed); - return false; - } - - result = m_Constant - + logSamplesSum - - m_ImpliedShape * std::log(m_PriorRate + sampleSum) - - logSeasonalScaleSum; - - return true; + } catch (const std::exception& e) { + LOG_ERROR("Failed to calculate likelihood: " << e.what()); + this->addErrorStatus(maths_t::E_FpFailed); + return false; } - //! Retrieve the error status for the integration. - maths_t::EFloatingPointErrorStatus errorStatus() const - { - return m_ErrorStatus; - } + result = m_Constant + logSamplesSum - m_ImpliedShape * std::log(m_PriorRate + sampleSum) - logSeasonalScaleSum; + + return true; + } - private: - //! Compute all the constants in the integrand. - void precompute() - { - m_NumberSamples = 0.0; - double logVarianceScaleSum = 0.0; - double nResidual = 0.0; - double logGammaScaledLikelihoodShape = 0.0; - double scaledImpliedShape = 0.0; - - try - { - for (std::size_t i = 0u; i < m_Weights.size(); ++i) - { - double n = maths_t::countForUpdate(m_WeightStyles, m_Weights[i]); - double varianceScale = maths_t::seasonalVarianceScale(m_WeightStyles, m_Weights[i]) - * maths_t::countVarianceScale(m_WeightStyles, m_Weights[i]); - m_NumberSamples += n; - if (varianceScale != 1.0) - { - logVarianceScaleSum -= m_LikelihoodShape - / varianceScale - * std::log(varianceScale); - logGammaScaledLikelihoodShape += - n * boost::math::lgamma(m_LikelihoodShape / varianceScale); - scaledImpliedShape += n * m_LikelihoodShape / varianceScale; - } - else - { - nResidual += n; - } + //! Retrieve the error status for the integration. + maths_t::EFloatingPointErrorStatus errorStatus() const { return m_ErrorStatus; } + +private: + //! Compute all the constants in the integrand. + void precompute() { + m_NumberSamples = 0.0; + double logVarianceScaleSum = 0.0; + double nResidual = 0.0; + double logGammaScaledLikelihoodShape = 0.0; + double scaledImpliedShape = 0.0; + + try { + for (std::size_t i = 0u; i < m_Weights.size(); ++i) { + double n = maths_t::countForUpdate(m_WeightStyles, m_Weights[i]); + double varianceScale = maths_t::seasonalVarianceScale(m_WeightStyles, m_Weights[i]) * + maths_t::countVarianceScale(m_WeightStyles, m_Weights[i]); + m_NumberSamples += n; + if (varianceScale != 1.0) { + logVarianceScaleSum -= m_LikelihoodShape / varianceScale * std::log(varianceScale); + logGammaScaledLikelihoodShape += n * boost::math::lgamma(m_LikelihoodShape / varianceScale); + scaledImpliedShape += n * m_LikelihoodShape / varianceScale; + } else { + nResidual += n; } + } - m_ImpliedShape = scaledImpliedShape - + nResidual * m_LikelihoodShape - + m_PriorShape; + m_ImpliedShape = scaledImpliedShape + nResidual * m_LikelihoodShape + m_PriorShape; - LOG_TRACE("numberSamples = " << m_NumberSamples); + LOG_TRACE("numberSamples = " << m_NumberSamples); - m_Constant = m_PriorShape * std::log(m_PriorRate) - - boost::math::lgamma(m_PriorShape) - + logVarianceScaleSum - - logGammaScaledLikelihoodShape - - nResidual * boost::math::lgamma(m_LikelihoodShape) - + boost::math::lgamma(m_ImpliedShape); - } - catch (const std::exception &e) - { - LOG_ERROR("Error calculating marginal likelihood: " << e.what()); - this->addErrorStatus(maths_t::E_FpFailed); - } + m_Constant = m_PriorShape * std::log(m_PriorRate) - boost::math::lgamma(m_PriorShape) + logVarianceScaleSum - + logGammaScaledLikelihoodShape - nResidual * boost::math::lgamma(m_LikelihoodShape) + + boost::math::lgamma(m_ImpliedShape); + } catch (const std::exception& e) { + LOG_ERROR("Error calculating marginal likelihood: " << e.what()); + this->addErrorStatus(maths_t::E_FpFailed); } + } - //! Update the error status. - void addErrorStatus(maths_t::EFloatingPointErrorStatus status) const - { - m_ErrorStatus = static_cast(m_ErrorStatus | status); - } + //! Update the error status. + void addErrorStatus(maths_t::EFloatingPointErrorStatus status) const { + m_ErrorStatus = static_cast(m_ErrorStatus | status); + } - private: - const TWeightStyleVec &m_WeightStyles; - const TDouble1Vec &m_Samples; - const TDouble4Vec1Vec &m_Weights; - double m_Offset; - double m_LikelihoodShape; - double m_PriorShape; - double m_PriorRate; - double m_NumberSamples; - double m_ImpliedShape; - double m_Constant; - mutable maths_t::EFloatingPointErrorStatus m_ErrorStatus; +private: + const TWeightStyleVec& m_WeightStyles; + const TDouble1Vec& m_Samples; + const TDouble4Vec1Vec& m_Weights; + double m_Offset; + double m_LikelihoodShape; + double m_PriorShape; + double m_PriorRate; + double m_NumberSamples; + double m_ImpliedShape; + double m_Constant; + mutable maths_t::EFloatingPointErrorStatus m_ErrorStatus; }; } // detail:: @@ -864,7 +718,6 @@ const std::string NUMBER_SAMPLES_TAG("g"); //const std::string MAXIMUM_TAG("i"); No longer used const std::string DECAY_RATE_TAG("j"); const std::string EMPTY_STRING; - } CGammaRateConjugate::CGammaRateConjugate(maths_t::EDataType dataType, @@ -872,37 +725,32 @@ CGammaRateConjugate::CGammaRateConjugate(maths_t::EDataType dataType, double shape, double rate, double decayRate, - double offsetMargin) : - CPrior(dataType, decayRate), - m_Offset(offset), - m_OffsetMargin(offsetMargin), - m_LikelihoodShape(1.0), - m_PriorShape(shape), - m_PriorRate(rate) -{} - -CGammaRateConjugate::CGammaRateConjugate(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser, - double offsetMargin) : - CPrior(params.s_DataType, 0.0), - m_Offset(0.0), - m_OffsetMargin(offsetMargin), - m_LikelihoodShape(1.0), - m_PriorShape(0.0), - m_PriorRate(0.0) -{ + double offsetMargin) + : CPrior(dataType, decayRate), + m_Offset(offset), + m_OffsetMargin(offsetMargin), + m_LikelihoodShape(1.0), + m_PriorShape(shape), + m_PriorRate(rate) { +} + +CGammaRateConjugate::CGammaRateConjugate(const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser, + double offsetMargin) + : CPrior(params.s_DataType, 0.0), + m_Offset(0.0), + m_OffsetMargin(offsetMargin), + m_LikelihoodShape(1.0), + m_PriorShape(0.0), + m_PriorRate(0.0) { traverser.traverseSubLevel(boost::bind(&CGammaRateConjugate::acceptRestoreTraverser, this, _1)); } -bool CGammaRateConjugate::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name = traverser.name(); - RESTORE_SETUP_TEARDOWN(DECAY_RATE_TAG, - double decayRate, - core::CStringUtils::stringToType(traverser.value(), decayRate), - this->decayRate(decayRate)) +bool CGammaRateConjugate::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); + RESTORE_SETUP_TEARDOWN( + DECAY_RATE_TAG, double decayRate, core::CStringUtils::stringToType(traverser.value(), decayRate), this->decayRate(decayRate)) RESTORE_BUILT_IN(OFFSET_TAG, m_Offset) RESTORE_BUILT_IN(LIKELIHOOD_SHAPE_TAG, m_LikelihoodShape) RESTORE(LOG_SAMPLES_MEAN_TAG, m_LogSamplesMean.fromDelimited(traverser.value())) @@ -913,79 +761,54 @@ bool CGammaRateConjugate::acceptRestoreTraverser(core::CStateRestoreTraverser &t double numberSamples, core::CStringUtils::stringToType(traverser.value(), numberSamples), this->numberSamples(numberSamples)) - } - while (traverser.next()); + } while (traverser.next()); return true; } -CGammaRateConjugate CGammaRateConjugate::nonInformativePrior(maths_t::EDataType dataType, - double offset, - double decayRate, - double offsetMargin) -{ - return CGammaRateConjugate(dataType, offset + offsetMargin, - NON_INFORMATIVE_SHAPE, NON_INFORMATIVE_RATE, - decayRate, offsetMargin); +CGammaRateConjugate +CGammaRateConjugate::nonInformativePrior(maths_t::EDataType dataType, double offset, double decayRate, double offsetMargin) { + return CGammaRateConjugate(dataType, offset + offsetMargin, NON_INFORMATIVE_SHAPE, NON_INFORMATIVE_RATE, decayRate, offsetMargin); } -CGammaRateConjugate::EPrior CGammaRateConjugate::type() const -{ +CGammaRateConjugate::EPrior CGammaRateConjugate::type() const { return E_Gamma; } -CGammaRateConjugate *CGammaRateConjugate::clone() const -{ +CGammaRateConjugate* CGammaRateConjugate::clone() const { return new CGammaRateConjugate(*this); } -void CGammaRateConjugate::setToNonInformative(double offset, - double decayRate) -{ - *this = nonInformativePrior(this->dataType(), - offset + this->offsetMargin(), - decayRate, this->offsetMargin()); +void CGammaRateConjugate::setToNonInformative(double offset, double decayRate) { + *this = nonInformativePrior(this->dataType(), offset + this->offsetMargin(), decayRate, this->offsetMargin()); } -double CGammaRateConjugate::offsetMargin() const -{ +double CGammaRateConjugate::offsetMargin() const { return m_OffsetMargin; } -bool CGammaRateConjugate::needsOffset() const -{ +bool CGammaRateConjugate::needsOffset() const { return true; } -double CGammaRateConjugate::adjustOffset(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights) -{ +double CGammaRateConjugate::adjustOffset(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights) { COffsetCost cost(*this); CApplyOffset apply(*this); return this->adjustOffsetWithCost(weightStyles, samples, weights, cost, apply); } -double CGammaRateConjugate::offset() const -{ +double CGammaRateConjugate::offset() const { return m_Offset; } -void CGammaRateConjugate::addSamples(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights) -{ - if (samples.empty()) - { +void CGammaRateConjugate::addSamples(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights) { + if (samples.empty()) { return; } - if (samples.size() != weights.size()) - { - LOG_ERROR("Mismatch in samples '" - << core::CContainerPrinter::print(samples) - << "' and weights '" - << core::CContainerPrinter::print(weights) << "'"); + if (samples.size() != weights.size()) { + LOG_ERROR("Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '" + << core::CContainerPrinter::print(weights) << "'"); return; } @@ -1061,42 +884,32 @@ void CGammaRateConjugate::addSamples(const TWeightStyleVec &weightStyles, TMeanAccumulator logSamplesMean = m_LogSamplesMean; TMeanVarAccumulator sampleMoments = m_SampleMoments; - try - { + try { double shift = boost::math::digamma(m_LikelihoodShape); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { double n = maths_t::countForUpdate(weightStyles, weights[i]); - double varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights[i]) - * maths_t::countVarianceScale(weightStyles, weights[i]); + double varianceScale = + maths_t::seasonalVarianceScale(weightStyles, weights[i]) * maths_t::countVarianceScale(weightStyles, weights[i]); double x = samples[i] + m_Offset; - if (!CMathsFuncs::isFinite(x) || x <= 0.0) - { + if (!CMathsFuncs::isFinite(x) || x <= 0.0) { LOG_ERROR("Discarding " << x << " it's not gamma"); continue; } - double shift_ = - shift - + boost::math::digamma(m_LikelihoodShape / varianceScale) - + std::log(varianceScale); + double shift_ = -shift + boost::math::digamma(m_LikelihoodShape / varianceScale) + std::log(varianceScale); - if (this->isInteger()) - { + if (this->isInteger()) { double logxInvPlus1 = std::log(1.0 / x + 1.0); double logxPlus1 = std::log(x + 1.0); m_LogSamplesMean.add(x * logxInvPlus1 + logxPlus1 - 1.0 - shift_, n / varianceScale); m_SampleMoments.add(x + 0.5, n / varianceScale); - } - else - { + } else { m_LogSamplesMean.add(std::log(x) - shift_, n / varianceScale); m_SampleMoments.add(x, n / varianceScale); } } - } - catch (const std::exception &e) - { + } catch (const std::exception& e) { LOG_ERROR("Failed to update likelihood: " << e.what()); return; } @@ -1112,28 +925,16 @@ void CGammaRateConjugate::addSamples(const TWeightStyleVec &weightStyles, // This is equivalent to adding on the variance of the latent // variable. - detail::truncateVariance(this->isInteger(), - logSamplesMean, - sampleMoments); - detail::truncateVariance(this->isInteger(), - m_LogSamplesMean, - m_SampleMoments); - - m_LikelihoodShape = detail::maximumLikelihoodShape(m_LikelihoodShape, - logSamplesMean, - m_LogSamplesMean, - sampleMoments, - m_SampleMoments); - - LOG_TRACE("m_Offset = " << m_Offset - << ", m_LikelihoodShape = " << m_LikelihoodShape - << ", m_LogSamplesMean = " << m_LogSamplesMean - << ", m_SampleMoments = " << m_SampleMoments - << ", m_PriorShape = " << m_PriorShape - << ", m_PriorRate = " << m_PriorRate); - - if (this->isBad()) - { + detail::truncateVariance(this->isInteger(), logSamplesMean, sampleMoments); + detail::truncateVariance(this->isInteger(), m_LogSamplesMean, m_SampleMoments); + + m_LikelihoodShape = detail::maximumLikelihoodShape(m_LikelihoodShape, logSamplesMean, m_LogSamplesMean, sampleMoments, m_SampleMoments); + + LOG_TRACE("m_Offset = " << m_Offset << ", m_LikelihoodShape = " << m_LikelihoodShape << ", m_LogSamplesMean = " << m_LogSamplesMean + << ", m_SampleMoments = " << m_SampleMoments << ", m_PriorShape = " << m_PriorShape + << ", m_PriorRate = " << m_PriorRate); + + if (this->isBad()) { LOG_ERROR("Update failed (" << this->debug() << ")"); LOG_ERROR("samples = " << core::CContainerPrinter::print(samples)); LOG_ERROR("weights = " << core::CContainerPrinter::print(weights)); @@ -1141,16 +942,13 @@ void CGammaRateConjugate::addSamples(const TWeightStyleVec &weightStyles, } } -void CGammaRateConjugate::propagateForwardsByTime(double time) -{ - if (!CMathsFuncs::isFinite(time) || time < 0.0) - { +void CGammaRateConjugate::propagateForwardsByTime(double time) { + if (!CMathsFuncs::isFinite(time) || time < 0.0) { LOG_ERROR("Bad propagation time " << time); return; } - if (this->isNonInformative()) - { + if (this->isNonInformative()) { // Nothing to be done. return; } @@ -1176,53 +974,35 @@ void CGammaRateConjugate::propagateForwardsByTime(double time) double count = CBasicStatistics::count(m_LogSamplesMean); double alpha = std::exp(-this->decayRate() * time); - alpha = count > detail::NON_INFORMATIVE_COUNT ? - (alpha * count + (1.0 - alpha) * detail::NON_INFORMATIVE_COUNT) / count : 1.0; - if (alpha < 1.0) - { + alpha = count > detail::NON_INFORMATIVE_COUNT ? (alpha * count + (1.0 - alpha) * detail::NON_INFORMATIVE_COUNT) / count : 1.0; + if (alpha < 1.0) { m_LogSamplesMean.age(alpha); m_SampleMoments.age(alpha); - m_LikelihoodShape = detail::maximumLikelihoodShape(m_LikelihoodShape, - logSamplesMean, - m_LogSamplesMean, - sampleMoments, - m_SampleMoments); + m_LikelihoodShape = + detail::maximumLikelihoodShape(m_LikelihoodShape, logSamplesMean, m_LogSamplesMean, sampleMoments, m_SampleMoments); } this->numberSamples(this->numberSamples() * alpha); - LOG_TRACE("m_LikelihoodShape = " << m_LikelihoodShape - << ", m_LogSamplesMean = " << m_LogSamplesMean - << ", m_SampleMoments = " << m_SampleMoments - << ", numberSamples = " << this->numberSamples()); + LOG_TRACE("m_LikelihoodShape = " << m_LikelihoodShape << ", m_LogSamplesMean = " << m_LogSamplesMean + << ", m_SampleMoments = " << m_SampleMoments << ", numberSamples = " << this->numberSamples()); } -CGammaRateConjugate::TDoubleDoublePr CGammaRateConjugate::marginalLikelihoodSupport() const -{ +CGammaRateConjugate::TDoubleDoublePr CGammaRateConjugate::marginalLikelihoodSupport() const { return std::make_pair(-m_Offset, boost::numeric::bounds::highest()); } -double CGammaRateConjugate::marginalLikelihoodMean() const -{ +double CGammaRateConjugate::marginalLikelihoodMean() const { return this->isInteger() ? this->mean() - 0.5 : this->mean(); } -double CGammaRateConjugate::marginalLikelihoodMode(const TWeightStyleVec &weightStyles, - const TDouble4Vec &weights) const -{ +double CGammaRateConjugate::marginalLikelihoodMode(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights) const { double varianceScale = 1.0; - try - { - varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) - * maths_t::countVarianceScale(weightStyles, weights); - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to get variance scale: " << e.what()); - } + try { + varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) * maths_t::countVarianceScale(weightStyles, weights); + } catch (const std::exception& e) { LOG_ERROR("Failed to get variance scale: " << e.what()); } - if (!this->isNonInformative()) - { + if (!this->isNonInformative()) { // We use the fact that the marginal likelihood is the distribution // of the R.V. defined as: // X = b * Z / (1 - Z) - u (1) @@ -1236,20 +1016,15 @@ double CGammaRateConjugate::marginalLikelihoodMode(const TWeightStyleVec &weight // So the mode occurs at the r.h.s. of (1) evaluated at the mode of Z. double scaledLikelihoodShape = m_LikelihoodShape / varianceScale; - if (scaledLikelihoodShape > 1.0 && this->priorShape() > 1.0) - { - try - { + if (scaledLikelihoodShape > 1.0 && this->priorShape() > 1.0) { + try { double scaledPriorRate = varianceScale * this->priorRate(); boost::math::beta_distribution<> beta(scaledLikelihoodShape, this->priorShape()); double mode = boost::math::mode(beta); return scaledPriorRate * mode / (1.0 - mode) - m_Offset; - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to compute marginal likelihood mode: " << e.what() - << ", likelihood shape = " << m_LikelihoodShape - << ", prior shape = " << this->priorShape()); + } catch (const std::exception& e) { + LOG_ERROR("Failed to compute marginal likelihood mode: " << e.what() << ", likelihood shape = " << m_LikelihoodShape + << ", prior shape = " << this->priorShape()); } } } @@ -1267,11 +1042,8 @@ double CGammaRateConjugate::marginalLikelihoodMode(const TWeightStyleVec &weight return std::max(mean == 0.0 ? 0.0 : mean - variance / mean, 0.0) - m_Offset; } -double CGammaRateConjugate::marginalLikelihoodVariance(const TWeightStyleVec &weightStyles, - const TDouble4Vec &weights) const -{ - if (this->isNonInformative()) - { +double CGammaRateConjugate::marginalLikelihoodVariance(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights) const { + if (this->isNonInformative()) { return boost::numeric::bounds::highest(); } @@ -1289,33 +1061,21 @@ double CGammaRateConjugate::marginalLikelihoodVariance(const TWeightStyleVec &we // whence... double varianceScale = 1.0; - try - { - varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) - * maths_t::countVarianceScale(weightStyles, weights); - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to get variance scale: " << e.what()); - } + try { + varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) * maths_t::countVarianceScale(weightStyles, weights); + } catch (const std::exception& e) { LOG_ERROR("Failed to get variance scale: " << e.what()); } double a = this->priorShape(); - if (a <= 2.0) - { + if (a <= 2.0) { return varianceScale * CBasicStatistics::variance(m_SampleMoments); } double b = this->priorRate(); - return varianceScale - * (1.0 + m_LikelihoodShape / (a - 1.0)) - * m_LikelihoodShape * b * b / (a - 1.0) / (a - 2.0); + return varianceScale * (1.0 + m_LikelihoodShape / (a - 1.0)) * m_LikelihoodShape * b * b / (a - 1.0) / (a - 2.0); } -CGammaRateConjugate::TDoubleDoublePr -CGammaRateConjugate::marginalLikelihoodConfidenceInterval(double percentage, - const TWeightStyleVec &weightStyles, - const TDouble4Vec &weights) const -{ - if (this->isNonInformative()) - { +CGammaRateConjugate::TDoubleDoublePr CGammaRateConjugate::marginalLikelihoodConfidenceInterval(double percentage, + const TWeightStyleVec& weightStyles, + const TDouble4Vec& weights) const { + if (this->isNonInformative()) { return this->marginalLikelihoodSupport(); } @@ -1332,57 +1092,43 @@ CGammaRateConjugate::marginalLikelihoodConfidenceInterval(double percentage, // Z is beta distributed with alpha equal to m_LikelihoodShape // and beta equal to m_PriorShape. - try - { - double varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) - * maths_t::countVarianceScale(weightStyles, weights); + try { + double varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) * maths_t::countVarianceScale(weightStyles, weights); double scaledLikelihoodShape = m_LikelihoodShape / varianceScale; double scaledPriorRate = varianceScale * this->priorRate(); boost::math::beta_distribution<> beta(scaledLikelihoodShape, this->priorShape()); double x1 = boost::math::quantile(beta, (1.0 - percentage) / 2.0); x1 = scaledPriorRate * x1 / (1.0 - x1) - m_Offset - (this->isInteger() ? 0.5 : 0.0); double x2 = x1; - if (percentage > 0.0) - { + if (percentage > 0.0) { x2 = boost::math::quantile(beta, (1.0 + percentage) / 2.0); x2 = scaledPriorRate * x2 / (1.0 - x2) - m_Offset - (this->isInteger() ? 0.5 : 0.0); } LOG_TRACE("x1 = " << x1 << ", x2 = " << x2); return std::make_pair(x1, x2); - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to compute confidence interval: " << e.what()); - } + } catch (const std::exception& e) { LOG_ERROR("Failed to compute confidence interval: " << e.what()); } return this->marginalLikelihoodSupport(); } -maths_t::EFloatingPointErrorStatus -CGammaRateConjugate::jointLogMarginalLikelihood(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &result) const -{ +maths_t::EFloatingPointErrorStatus CGammaRateConjugate::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& result) const { result = 0.0; - if (samples.empty()) - { + if (samples.empty()) { LOG_ERROR("Can't compute likelihood for empty sample set"); return maths_t::E_FpFailed; } - if (samples.size() != weights.size()) - { - LOG_ERROR("Mismatch in samples '" - << core::CContainerPrinter::print(samples) - << "' and weights '" - << core::CContainerPrinter::print(weights) << "'"); + if (samples.size() != weights.size()) { + LOG_ERROR("Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '" + << core::CContainerPrinter::print(weights) << "'"); return maths_t::E_FpFailed; } - if (this->isNonInformative()) - { + if (this->isNonInformative()) { // The non-informative likelihood is improper and effectively // zero everywhere. We use minus max double because // log(0) = HUGE_VALUE, which causes problems for Windows. @@ -1396,63 +1142,40 @@ CGammaRateConjugate::jointLogMarginalLikelihood(const TWeightStyleVec &weightSty } maths_t::EFloatingPointErrorStatus status = maths_t::E_FpFailed; - try - { - detail::CLogMarginalLikelihood logMarginalLikelihood(weightStyles, - samples, - weights, - m_Offset, - m_LikelihoodShape, - this->priorShape(), - this->priorRate()); - if (this->isInteger()) - { + try { + detail::CLogMarginalLikelihood logMarginalLikelihood( + weightStyles, samples, weights, m_Offset, m_LikelihoodShape, this->priorShape(), this->priorRate()); + if (this->isInteger()) { // If the data are discrete we compute the approximate expectation // w.r.t. to the hidden offset of the samples Z, which is uniform // on the interval [0,1]. - CIntegration::logGaussLegendre(logMarginalLikelihood, - 0.0, 1.0, - result); - } - else - { + CIntegration::logGaussLegendre(logMarginalLikelihood, 0.0, 1.0, result); + } else { logMarginalLikelihood(0.0, result); } - status = static_cast( - logMarginalLikelihood.errorStatus() | CMathsFuncs::fpStatus(result)); - if (status & maths_t::E_FpFailed) - { + status = static_cast(logMarginalLikelihood.errorStatus() | CMathsFuncs::fpStatus(result)); + if (status & maths_t::E_FpFailed) { LOG_ERROR("Failed to compute log likelihood (" << this->debug() << ")"); LOG_ERROR("samples = " << core::CContainerPrinter::print(samples)); LOG_ERROR("weights = " << core::CContainerPrinter::print(weights)); - } - else if (status & maths_t::E_FpOverflowed) - { + } else if (status & maths_t::E_FpOverflowed) { LOG_TRACE("Log likelihood overflowed for (" << this->debug() << ")"); LOG_TRACE("samples = " << core::CContainerPrinter::print(samples)); LOG_TRACE("weights = " << core::CContainerPrinter::print(weights)); } - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to compute likelihood: " << e.what()); - } + } catch (const std::exception& e) { LOG_ERROR("Failed to compute likelihood: " << e.what()); } return status; } -void CGammaRateConjugate::sampleMarginalLikelihood(std::size_t numberSamples, - TDouble1Vec &samples) const -{ +void CGammaRateConjugate::sampleMarginalLikelihood(std::size_t numberSamples, TDouble1Vec& samples) const { samples.clear(); - if (numberSamples == 0 || this->numberSamples() == 0.0) - { + if (numberSamples == 0 || this->numberSamples() == 0.0) { return; } - if (this->isNonInformative()) - { + if (this->isNonInformative()) { // We can't sample the marginal likelihood directly so match sample // moments and sampled moments. @@ -1461,8 +1184,7 @@ void CGammaRateConjugate::sampleMarginalLikelihood(std::size_t numberSamples, double deviation = std::sqrt(CBasicStatistics::variance(m_SampleMoments)); double root_two = boost::math::double_constants::root_two; - switch (numberSamples) - { + switch (numberSamples) { case 1u: samples.push_back(mean); break; @@ -1518,12 +1240,9 @@ void CGammaRateConjugate::sampleMarginalLikelihood(std::size_t numberSamples, double mean = m_LikelihoodShape * this->priorRate() / (this->priorShape() - 1.0); - try - { - boost::math::beta_distribution<> beta1(m_LikelihoodShape, - this->priorShape()); - boost::math::beta_distribution<> beta2(m_LikelihoodShape + 1.0, - this->priorShape() - 1.0); + try { + boost::math::beta_distribution<> beta1(m_LikelihoodShape, this->priorShape()); + boost::math::beta_distribution<> beta2(m_LikelihoodShape + 1.0, this->priorShape() - 1.0); LOG_TRACE("mean = " << mean << ", numberSamples = " << numberSamples); @@ -1531,96 +1250,64 @@ void CGammaRateConjugate::sampleMarginalLikelihood(std::size_t numberSamples, double lastPartialExpectation = 0.0; - for (std::size_t i = 1u; i < numberSamples; ++i) - { - double q = static_cast(i) - / static_cast(numberSamples); + for (std::size_t i = 1u; i < numberSamples; ++i) { + double q = static_cast(i) / static_cast(numberSamples); double xq = boost::math::quantile(beta1, q); double partialExpectation = mean * CTools::safeCdf(beta2, xq); - double sample = static_cast(numberSamples) - * (partialExpectation - lastPartialExpectation) - - m_Offset; + double sample = static_cast(numberSamples) * (partialExpectation - lastPartialExpectation) - m_Offset; LOG_TRACE("sample = " << sample); // Sanity check the sample: should be in the distribution support. - if (sample >= support.first && sample <= support.second) - { + if (sample >= support.first && sample <= support.second) { samples.push_back(sample); - } - else - { - LOG_ERROR("Sample out of bounds: sample = " << sample - << ", likelihoodShape = " << m_LikelihoodShape - << ", priorShape = " << this->priorShape() - << ", q = " << q - << ", x(q) = " << xq - << ", mean = " << mean); + } else { + LOG_ERROR("Sample out of bounds: sample = " << sample << ", likelihoodShape = " << m_LikelihoodShape + << ", priorShape = " << this->priorShape() << ", q = " << q << ", x(q) = " << xq + << ", mean = " << mean); } lastPartialExpectation = partialExpectation; } - double sample = static_cast(numberSamples) - * (mean - lastPartialExpectation) - m_Offset; + double sample = static_cast(numberSamples) * (mean - lastPartialExpectation) - m_Offset; LOG_TRACE("sample = " << sample); // Sanity check the sample: should be in the distribution support. - if (sample >= support.first && sample <= support.second) - { + if (sample >= support.first && sample <= support.second) { samples.push_back(sample); + } else { + LOG_ERROR("Sample out of bounds: sample = " << sample << ", likelihoodShape = " << m_LikelihoodShape + << ", priorShape = " << this->priorShape() << ", mean = " << mean); } - else - { - LOG_ERROR("Sample out of bounds: sample = " << sample - << ", likelihoodShape = " << m_LikelihoodShape - << ", priorShape = " << this->priorShape() - << ", mean = " << mean); - } - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to sample: " << e.what() - << ", likelihoodShape = " << m_LikelihoodShape - << ", priorShape = " << this->priorShape() - << ", mean = " << mean); + } catch (const std::exception& e) { + LOG_ERROR("Failed to sample: " << e.what() << ", likelihoodShape = " << m_LikelihoodShape << ", priorShape = " << this->priorShape() + << ", mean = " << mean); } } -bool CGammaRateConjugate::minusLogJointCdf(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound) const -{ +bool CGammaRateConjugate::minusLogJointCdf(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound) const { using TMinusLogCdf = detail::CEvaluateOnSamples; lowerBound = upperBound = 0.0; - TMinusLogCdf minusLogCdf(weightStyles, - samples, - weights, - this->isNonInformative(), - m_Offset, - m_LikelihoodShape, - this->priorShape(), - this->priorRate()); - - if (this->isInteger()) - { + TMinusLogCdf minusLogCdf( + weightStyles, samples, weights, this->isNonInformative(), m_Offset, m_LikelihoodShape, this->priorShape(), this->priorRate()); + + if (this->isInteger()) { // If the data are discrete we compute the approximate expectation // w.r.t. to the hidden offset of the samples Z, which is uniform // on the interval [0,1]. double value; - if (!CIntegration::logGaussLegendre(minusLogCdf, - 0.0, 1.0, - value)) - { - LOG_ERROR("Failed computing c.d.f. for " - << core::CContainerPrinter::print(samples)); + if (!CIntegration::logGaussLegendre(minusLogCdf, 0.0, 1.0, value)) { + LOG_ERROR("Failed computing c.d.f. for " << core::CContainerPrinter::print(samples)); return false; } @@ -1629,10 +1316,8 @@ bool CGammaRateConjugate::minusLogJointCdf(const TWeightStyleVec &weightStyles, } double value; - if (!minusLogCdf(0.0, value)) - { - LOG_ERROR("Failed computing c.d.f. for " - << core::CContainerPrinter::print(samples)); + if (!minusLogCdf(0.0, value)) { + LOG_ERROR("Failed computing c.d.f. for " << core::CContainerPrinter::print(samples)); return false; } @@ -1640,37 +1325,25 @@ bool CGammaRateConjugate::minusLogJointCdf(const TWeightStyleVec &weightStyles, return true; } -bool CGammaRateConjugate::minusLogJointCdfComplement(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound) const -{ +bool CGammaRateConjugate::minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound) const { using TMinusLogCdfComplement = detail::CEvaluateOnSamples; lowerBound = upperBound = 0.0; - TMinusLogCdfComplement minusLogCdfComplement(weightStyles, - samples, - weights, - this->isNonInformative(), - m_Offset, - m_LikelihoodShape, - this->priorShape(), - this->priorRate()); - - if (this->isInteger()) - { + TMinusLogCdfComplement minusLogCdfComplement( + weightStyles, samples, weights, this->isNonInformative(), m_Offset, m_LikelihoodShape, this->priorShape(), this->priorRate()); + + if (this->isInteger()) { // If the data are discrete we compute the approximate expectation // w.r.t. to the hidden offset of the samples Z, which is uniform // on the interval [0,1]. double value; - if (!CIntegration::logGaussLegendre(minusLogCdfComplement, - 0.0, 1.0, - value)) - { - LOG_ERROR("Failed computing c.d.f. complement for " - << core::CContainerPrinter::print(samples)); + if (!CIntegration::logGaussLegendre(minusLogCdfComplement, 0.0, 1.0, value)) { + LOG_ERROR("Failed computing c.d.f. complement for " << core::CContainerPrinter::print(samples)); return false; } @@ -1679,10 +1352,8 @@ bool CGammaRateConjugate::minusLogJointCdfComplement(const TWeightStyleVec &weig } double value; - if (!minusLogCdfComplement(0.0, value)) - { - LOG_ERROR("Failed computing c.d.f. complement for " - << core::CContainerPrinter::print(samples)); + if (!minusLogCdfComplement(0.0, value)) { + LOG_ERROR("Failed computing c.d.f. complement for " << core::CContainerPrinter::print(samples)); return false; } @@ -1691,13 +1362,12 @@ bool CGammaRateConjugate::minusLogJointCdfComplement(const TWeightStyleVec &weig } bool CGammaRateConjugate::probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound, - maths_t::ETail &tail) const -{ + const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound, + maths_t::ETail& tail) const { lowerBound = upperBound = 0.0; tail = maths_t::E_UndeterminedTail; @@ -1711,16 +1381,13 @@ bool CGammaRateConjugate::probabilityOfLessLikelySamples(maths_t::EProbabilityCa this->priorShape(), this->priorRate()); - if (this->isInteger()) - { + if (this->isInteger()) { // If the data are discrete we compute the approximate expectation // w.r.t. to the hidden offset of the samples Z, which is uniform // on the interval [0,1]. double value; - if (!CIntegration::gaussLegendre(probability, 0.0, 1.0, value)) - { - LOG_ERROR("Failed computing probability for " - << core::CContainerPrinter::print(samples)); + if (!CIntegration::gaussLegendre(probability, 0.0, 1.0, value)) { + LOG_ERROR("Failed computing probability for " << core::CContainerPrinter::print(samples)); return false; } @@ -1731,10 +1398,8 @@ bool CGammaRateConjugate::probabilityOfLessLikelySamples(maths_t::EProbabilityCa } double value; - if (!probability(0.0, value)) - { - LOG_ERROR("Failed computing probability for " - << core::CContainerPrinter::print(samples)); + if (!probability(0.0, value)) { + LOG_ERROR("Failed computing probability for " << core::CContainerPrinter::print(samples)); return false; } @@ -1744,51 +1409,37 @@ bool CGammaRateConjugate::probabilityOfLessLikelySamples(maths_t::EProbabilityCa return true; } -bool CGammaRateConjugate::isNonInformative() const -{ - return CBasicStatistics::count(m_SampleMoments) < detail::NON_INFORMATIVE_COUNT - || this->priorRate() == NON_INFORMATIVE_RATE; +bool CGammaRateConjugate::isNonInformative() const { + return CBasicStatistics::count(m_SampleMoments) < detail::NON_INFORMATIVE_COUNT || this->priorRate() == NON_INFORMATIVE_RATE; } -void CGammaRateConjugate::print(const std::string &indent, std::string &result) const -{ +void CGammaRateConjugate::print(const std::string& indent, std::string& result) const { result += core_t::LINE_ENDING + indent + "gamma "; - if (this->isNonInformative()) - { + if (this->isNonInformative()) { result += "non-informative"; return; } - try - { - if (this->priorShape() > 2.0) - { - double shape = (this->priorShape() - 2.0) - / (this->priorShape() - 1.0) - * m_LikelihoodShape; - double rate = this->priorRate() - / (this->priorShape() - 2.0); + try { + if (this->priorShape() > 2.0) { + double shape = (this->priorShape() - 2.0) / (this->priorShape() - 1.0) * m_LikelihoodShape; + double rate = this->priorRate() / (this->priorShape() - 2.0); boost::math::gamma_distribution<> gamma(shape, rate); double mean = boost::math::mean(gamma); double deviation = boost::math::standard_deviation(gamma); - result += "mean = " + core::CStringUtils::typeToStringPretty(mean - m_Offset) - + " sd = " + core::CStringUtils::typeToStringPretty(deviation); + result += "mean = " + core::CStringUtils::typeToStringPretty(mean - m_Offset) + + " sd = " + core::CStringUtils::typeToStringPretty(deviation); return; } - } - catch (const std::exception &) - { - } + } catch (const std::exception&) {} double mean = CBasicStatistics::mean(m_SampleMoments); double deviation = std::sqrt(CBasicStatistics::variance(m_SampleMoments)); - result += "mean = " + core::CStringUtils::typeToStringPretty(mean - m_Offset) - + " sd = " + core::CStringUtils::typeToStringPretty(deviation); + result += + "mean = " + core::CStringUtils::typeToStringPretty(mean - m_Offset) + " sd = " + core::CStringUtils::typeToStringPretty(deviation); } -std::string CGammaRateConjugate::printJointDensityFunction() const -{ - if (this->isNonInformative()) - { +std::string CGammaRateConjugate::printJointDensityFunction() const { + if (this->isNonInformative()) { // The non-informative likelihood is improper 0 everywhere. return EMPTY_STRING; } @@ -1808,8 +1459,7 @@ std::string CGammaRateConjugate::printJointDensityFunction() const std::ostringstream xCoordinates; std::ostringstream yCoordinates; xCoordinates << "x = ["; - for (unsigned int i = 0u; i < POINTS; ++i, x += xIncrement) - { + for (unsigned int i = 0u; i < POINTS; ++i, x += xIncrement) { xCoordinates << x << " "; } xCoordinates << "];" << core_t::LINE_ENDING; @@ -1817,8 +1467,7 @@ std::string CGammaRateConjugate::printJointDensityFunction() const std::ostringstream pdf; pdf << "pdf = ["; x = xStart; - for (unsigned int i = 0u; i < POINTS; ++i, x += xIncrement) - { + for (unsigned int i = 0u; i < POINTS; ++i, x += xIncrement) { pdf << CTools::safePdf(gamma, x) << " "; } pdf << "];" << core_t::LINE_ENDING << "plot(x, pdf);"; @@ -1826,8 +1475,7 @@ std::string CGammaRateConjugate::printJointDensityFunction() const return xCoordinates.str() + yCoordinates.str() + pdf.str(); } -uint64_t CGammaRateConjugate::checksum(uint64_t seed) const -{ +uint64_t CGammaRateConjugate::checksum(uint64_t seed) const { seed = this->CPrior::checksum(seed); seed = CChecksum::calculate(seed, m_Offset); seed = CChecksum::calculate(seed, m_LikelihoodShape); @@ -1837,23 +1485,19 @@ uint64_t CGammaRateConjugate::checksum(uint64_t seed) const return CChecksum::calculate(seed, m_PriorRate); } -void CGammaRateConjugate::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CGammaRateConjugate::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CGammaRateConjugate"); } -std::size_t CGammaRateConjugate::memoryUsage() const -{ +std::size_t CGammaRateConjugate::memoryUsage() const { return 0; } -std::size_t CGammaRateConjugate::staticSize() const -{ +std::size_t CGammaRateConjugate::staticSize() const { return sizeof(*this); } -void CGammaRateConjugate::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CGammaRateConjugate::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(DECAY_RATE_TAG, this->decayRate(), core::CIEEE754::E_SinglePrecision); inserter.insertValue(OFFSET_TAG, m_Offset, core::CIEEE754::E_SinglePrecision); inserter.insertValue(LIKELIHOOD_SHAPE_TAG, m_LikelihoodShape, core::CIEEE754::E_SinglePrecision); @@ -1864,81 +1508,56 @@ void CGammaRateConjugate::acceptPersistInserter(core::CStatePersistInserter &ins inserter.insertValue(NUMBER_SAMPLES_TAG, this->numberSamples(), core::CIEEE754::E_SinglePrecision); } -double CGammaRateConjugate::likelihoodShape() const -{ +double CGammaRateConjugate::likelihoodShape() const { return m_LikelihoodShape; } -double CGammaRateConjugate::likelihoodRate() const -{ - if (this->isNonInformative()) - { +double CGammaRateConjugate::likelihoodRate() const { + if (this->isNonInformative()) { return 0.0; } - try - { - boost::math::gamma_distribution<> gamma(this->priorShape(), - 1.0 / this->priorRate()); + try { + boost::math::gamma_distribution<> gamma(this->priorShape(), 1.0 / this->priorRate()); return boost::math::mean(gamma); - } - catch (std::exception &e) - { - LOG_ERROR("Failed to compute likelihood rate: " << e.what() - << ", prior shape = " << this->priorShape() - << ", prior rate = " << this->priorRate()); + } catch (std::exception& e) { + LOG_ERROR("Failed to compute likelihood rate: " << e.what() << ", prior shape = " << this->priorShape() + << ", prior rate = " << this->priorRate()); } return 0.0; } -CGammaRateConjugate::TDoubleDoublePr -CGammaRateConjugate::confidenceIntervalRate(double percentage) const -{ - if (this->isNonInformative()) - { - return std::make_pair(boost::numeric::bounds::lowest(), - boost::numeric::bounds::highest()); +CGammaRateConjugate::TDoubleDoublePr CGammaRateConjugate::confidenceIntervalRate(double percentage) const { + if (this->isNonInformative()) { + return std::make_pair(boost::numeric::bounds::lowest(), boost::numeric::bounds::highest()); } percentage /= 100.0; double lowerPercentile = 0.5 * (1.0 - percentage); double upperPercentile = 0.5 * (1.0 + percentage); - try - { + try { // The prior distribution for the rate is gamma. - boost::math::gamma_distribution<> gamma(this->priorShape(), - 1.0 / this->priorRate()); - return std::make_pair(boost::math::quantile(gamma, lowerPercentile), - boost::math::quantile(gamma, upperPercentile)); - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to compute confidence interval: " << e.what() - << ", prior shape = " << this->priorShape() - << ", prior rate = " << this->priorRate()); + boost::math::gamma_distribution<> gamma(this->priorShape(), 1.0 / this->priorRate()); + return std::make_pair(boost::math::quantile(gamma, lowerPercentile), boost::math::quantile(gamma, upperPercentile)); + } catch (const std::exception& e) { + LOG_ERROR("Failed to compute confidence interval: " << e.what() << ", prior shape = " << this->priorShape() + << ", prior rate = " << this->priorRate()); } - return std::make_pair(boost::numeric::bounds::lowest(), - boost::numeric::bounds::highest()); + return std::make_pair(boost::numeric::bounds::lowest(), boost::numeric::bounds::highest()); } -bool CGammaRateConjugate::equalTolerance(const CGammaRateConjugate &rhs, - const TEqualWithTolerance &equal) const -{ - LOG_DEBUG(m_LikelihoodShape << " " << rhs.m_LikelihoodShape << ", " - << this->priorShape() << " " << rhs.priorShape() << ", " - << this->priorRate() << " " << rhs.priorRate()); - return equal(m_LikelihoodShape, rhs.m_LikelihoodShape) - && equal(this->priorShape(), rhs.priorShape()) - && equal(this->priorRate(), rhs.priorRate()); +bool CGammaRateConjugate::equalTolerance(const CGammaRateConjugate& rhs, const TEqualWithTolerance& equal) const { + LOG_DEBUG(m_LikelihoodShape << " " << rhs.m_LikelihoodShape << ", " << this->priorShape() << " " << rhs.priorShape() << ", " + << this->priorRate() << " " << rhs.priorRate()); + return equal(m_LikelihoodShape, rhs.m_LikelihoodShape) && equal(this->priorShape(), rhs.priorShape()) && + equal(this->priorRate(), rhs.priorRate()); } -double CGammaRateConjugate::mean() const -{ - if (this->isNonInformative()) - { +double CGammaRateConjugate::mean() const { + if (this->isNonInformative()) { return CBasicStatistics::mean(m_SampleMoments); } @@ -1949,59 +1568,40 @@ double CGammaRateConjugate::mean() const // expectation of 1 / B w.r.t. the prior is b / (a-1). double a = this->priorShape(); - if (a <= 1.0) - { + if (a <= 1.0) { return CBasicStatistics::mean(m_SampleMoments) - m_Offset; } double b = this->priorRate(); return m_LikelihoodShape * b / (a - 1.0) - m_Offset; } -double CGammaRateConjugate::priorShape() const -{ - return m_PriorShape - + RATE_VARIANCE_SCALE - * CBasicStatistics::count(m_SampleMoments) - * m_LikelihoodShape; +double CGammaRateConjugate::priorShape() const { + return m_PriorShape + RATE_VARIANCE_SCALE * CBasicStatistics::count(m_SampleMoments) * m_LikelihoodShape; } -double CGammaRateConjugate::priorRate() const -{ - return m_PriorRate - + RATE_VARIANCE_SCALE - * CBasicStatistics::count(m_SampleMoments) - * CBasicStatistics::mean(m_SampleMoments); +double CGammaRateConjugate::priorRate() const { + return m_PriorRate + RATE_VARIANCE_SCALE * CBasicStatistics::count(m_SampleMoments) * CBasicStatistics::mean(m_SampleMoments); } -bool CGammaRateConjugate::isBad() const -{ - return !CMathsFuncs::isFinite(m_Offset) - || !CMathsFuncs::isFinite(m_LikelihoodShape) - || !CMathsFuncs::isFinite(CBasicStatistics::count(m_LogSamplesMean)) - || !CMathsFuncs::isFinite(CBasicStatistics::moment<0>(m_LogSamplesMean)) - || !CMathsFuncs::isFinite(CBasicStatistics::count(m_SampleMoments)) - || !CMathsFuncs::isFinite(CBasicStatistics::moment<0>(m_SampleMoments)) - || !CMathsFuncs::isFinite(CBasicStatistics::moment<1>(m_SampleMoments)) - || !CMathsFuncs::isFinite(m_PriorShape) - || !CMathsFuncs::isFinite(m_PriorRate); +bool CGammaRateConjugate::isBad() const { + return !CMathsFuncs::isFinite(m_Offset) || !CMathsFuncs::isFinite(m_LikelihoodShape) || + !CMathsFuncs::isFinite(CBasicStatistics::count(m_LogSamplesMean)) || + !CMathsFuncs::isFinite(CBasicStatistics::moment<0>(m_LogSamplesMean)) || + !CMathsFuncs::isFinite(CBasicStatistics::count(m_SampleMoments)) || + !CMathsFuncs::isFinite(CBasicStatistics::moment<0>(m_SampleMoments)) || + !CMathsFuncs::isFinite(CBasicStatistics::moment<1>(m_SampleMoments)) || !CMathsFuncs::isFinite(m_PriorShape) || + !CMathsFuncs::isFinite(m_PriorRate); } -std::string CGammaRateConjugate::debug() const -{ +std::string CGammaRateConjugate::debug() const { std::ostringstream result; - result << std::scientific << std::setprecision(15) - << m_Offset << " " - << m_LikelihoodShape << " " - << m_LogSamplesMean << " " - << m_SampleMoments << " " - << m_PriorShape << " " - << m_PriorRate; + result << std::scientific << std::setprecision(15) << m_Offset << " " << m_LikelihoodShape << " " << m_LogSamplesMean << " " + << m_SampleMoments << " " << m_PriorShape << " " << m_PriorRate; return result.str(); } const double CGammaRateConjugate::NON_INFORMATIVE_SHAPE = 1.0; const double CGammaRateConjugate::NON_INFORMATIVE_RATE = 0.0; const double CGammaRateConjugate::RATE_VARIANCE_SCALE = 0.23; - } } diff --git a/lib/maths/CGradientDescent.cc b/lib/maths/CGradientDescent.cc index db9e4cb7f6..ddf3d33c3b 100644 --- a/lib/maths/CGradientDescent.cc +++ b/lib/maths/CGradientDescent.cc @@ -11,34 +11,21 @@ #include -namespace ml -{ -namespace maths -{ - -CGradientDescent::CGradientDescent(double learnRate, double momentum) : - m_LearnRate(learnRate), - m_Momentum(momentum) -{ +namespace ml { +namespace maths { + +CGradientDescent::CGradientDescent(double learnRate, double momentum) : m_LearnRate(learnRate), m_Momentum(momentum) { } -void CGradientDescent::learnRate(double learnRate) -{ +void CGradientDescent::learnRate(double learnRate) { m_LearnRate = learnRate; } -void CGradientDescent::momentum(double momentum) -{ +void CGradientDescent::momentum(double momentum) { m_Momentum = momentum; } -bool CGradientDescent::run(std::size_t n, - const TVector &x0, - const CFunction &f, - const CGradient &gf, - TVector &xBest, - TDoubleVec &fi) -{ +bool CGradientDescent::run(std::size_t n, const TVector& x0, const CFunction& f, const CGradient& gf, TVector& xBest, TDoubleVec& fi) { fi.clear(); fi.reserve(n); @@ -49,23 +36,19 @@ bool CGradientDescent::run(std::size_t n, CBasicStatistics::COrderStatisticsStack min; CBasicStatistics::SSampleMean::TAccumulator scale; - for (std::size_t i = 0u; i < n; ++i) - { + for (std::size_t i = 0u; i < n; ++i) { double fx; - if (!f(x, fx)) - { + if (!f(x, fx)) { LOG_ERROR("Bailing on iteration " << i); return false; } - if (min.add(fx)) - { + if (min.add(fx)) { xBest = x; } fi.push_back(fx); - if (!gf(x, gfx)) - { + if (!gf(x, gfx)) { LOG_ERROR("Bailing on iteration " << i); return false; } @@ -84,41 +67,32 @@ bool CGradientDescent::run(std::size_t n, return true; } -CGradientDescent::CFunction::~CFunction() -{ +CGradientDescent::CFunction::~CFunction() { } -CGradientDescent::CGradient::~CGradient() -{ +CGradientDescent::CGradient::~CGradient() { } -CGradientDescent::CEmpiricalCentralGradient::CEmpiricalCentralGradient(const CFunction &f, double eps) : - m_Eps(eps), - m_F(f) -{} +CGradientDescent::CEmpiricalCentralGradient::CEmpiricalCentralGradient(const CFunction& f, double eps) : m_Eps(eps), m_F(f) { +} -bool CGradientDescent::CEmpiricalCentralGradient::operator()(const TVector &x, TVector &result) const -{ - if (x.dimension() != result.dimension()) - { +bool CGradientDescent::CEmpiricalCentralGradient::operator()(const TVector& x, TVector& result) const { + if (x.dimension() != result.dimension()) { LOG_ERROR("Dimension mismatch"); return false; } xShiftEps = x; - for (std::size_t i = 0u; i < x.dimension(); ++i) - { + for (std::size_t i = 0u; i < x.dimension(); ++i) { xShiftEps(i) -= m_Eps; double fMinusEps; - if (!m_F(xShiftEps, fMinusEps)) - { + if (!m_F(xShiftEps, fMinusEps)) { LOG_ERROR("Failed to evaluate function at x - eps"); return false; } xShiftEps(i) += 2.0 * m_Eps; double fPlusEps; - if (!m_F(xShiftEps, fPlusEps)) - { + if (!m_F(xShiftEps, fPlusEps)) { LOG_ERROR("Failed to evaluate function at x + eps"); return false; } @@ -128,6 +102,5 @@ bool CGradientDescent::CEmpiricalCentralGradient::operator()(const TVector &x, T return true; } - } } diff --git a/lib/maths/CGramSchmidt.cc b/lib/maths/CGramSchmidt.cc index b0ccaad02f..1e9a6d38a7 100644 --- a/lib/maths/CGramSchmidt.cc +++ b/lib/maths/CGramSchmidt.cc @@ -8,139 +8,104 @@ #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { -bool CGramSchmidt::basis(TDoubleVecVec &x) -{ +bool CGramSchmidt::basis(TDoubleVecVec& x) { return basisImpl(x); } -bool CGramSchmidt::basis(TVectorVec &x) -{ +bool CGramSchmidt::basis(TVectorVec& x) { return basisImpl(x); } -void CGramSchmidt::swap(TDoubleVec &x, TDoubleVec &y) -{ +void CGramSchmidt::swap(TDoubleVec& x, TDoubleVec& y) { x.swap(y); } -void CGramSchmidt::swap(TVector &x, TVector &y) -{ +void CGramSchmidt::swap(TVector& x, TVector& y) { x.swap(y); } -const CGramSchmidt::TDoubleVec &CGramSchmidt::minusProjection(TDoubleVec &x, - const TDoubleVec &e) -{ +const CGramSchmidt::TDoubleVec& CGramSchmidt::minusProjection(TDoubleVec& x, const TDoubleVec& e) { sameDimension(x, e); double n = inner(x, e); - for (std::size_t i = 0u; i < x.size(); ++i) - { + for (std::size_t i = 0u; i < x.size(); ++i) { x[i] -= n * e[i]; } return x; } -const CGramSchmidt::TVector &CGramSchmidt::minusProjection(TVector &x, - const TVector &e) -{ +const CGramSchmidt::TVector& CGramSchmidt::minusProjection(TVector& x, const TVector& e) { double n = e.inner(x); return x -= n * e; } -const CGramSchmidt::TDoubleVec &CGramSchmidt::divide(TDoubleVec &x, double s) -{ - for (std::size_t i = 0u; i < x.size(); ++i) - { +const CGramSchmidt::TDoubleVec& CGramSchmidt::divide(TDoubleVec& x, double s) { + for (std::size_t i = 0u; i < x.size(); ++i) { x[i] /= s; } return x; } -const CGramSchmidt::TVector &CGramSchmidt::divide(TVector &x, double s) -{ +const CGramSchmidt::TVector& CGramSchmidt::divide(TVector& x, double s) { return x /= s; } -double CGramSchmidt::norm(const TDoubleVec &x) -{ +double CGramSchmidt::norm(const TDoubleVec& x) { return std::sqrt(inner(x, x)); } -double CGramSchmidt::norm(const TVector &x) -{ +double CGramSchmidt::norm(const TVector& x) { return x.euclidean(); } -double CGramSchmidt::inner(const TDoubleVec &x, const TDoubleVec &y) -{ +double CGramSchmidt::inner(const TDoubleVec& x, const TDoubleVec& y) { sameDimension(x, y); double result = 0.0; - for (std::size_t i = 0u; i < x.size(); ++i) - { + for (std::size_t i = 0u; i < x.size(); ++i) { result += x[i] * y[i]; } return result; } -double CGramSchmidt::inner(const TVector &x, const TVector &y) -{ +double CGramSchmidt::inner(const TVector& x, const TVector& y) { sameDimension(x, y); return x.inner(y); } -void CGramSchmidt::sameDimension(const TDoubleVec &x, - const TDoubleVec &y) -{ - if (x.size() != y.size()) - { - throw std::runtime_error("Mismatching dimensions: " - + core::CStringUtils::typeToString(x.size()) - + " != " - + core::CStringUtils::typeToString(y.size())); +void CGramSchmidt::sameDimension(const TDoubleVec& x, const TDoubleVec& y) { + if (x.size() != y.size()) { + throw std::runtime_error("Mismatching dimensions: " + core::CStringUtils::typeToString(x.size()) + + " != " + core::CStringUtils::typeToString(y.size())); } } -void CGramSchmidt::sameDimension(const TVector &x, - const TVector &y) -{ - if (x.dimension() != y.dimension()) - { - throw std::runtime_error("Mismatching dimensions: " - + core::CStringUtils::typeToString(x.dimension()) - + " != " - + core::CStringUtils::typeToString(y.dimension())); +void CGramSchmidt::sameDimension(const TVector& x, const TVector& y) { + if (x.dimension() != y.dimension()) { + throw std::runtime_error("Mismatching dimensions: " + core::CStringUtils::typeToString(x.dimension()) + + " != " + core::CStringUtils::typeToString(y.dimension())); } } -void CGramSchmidt::zero(TDoubleVec &x) -{ +void CGramSchmidt::zero(TDoubleVec& x) { std::fill(x.begin(), x.end(), 0.0); } -void CGramSchmidt::zero(TVector &x) -{ - for (std::size_t i = 0u; i < x.dimension(); ++i) - { +void CGramSchmidt::zero(TVector& x) { + for (std::size_t i = 0u; i < x.dimension(); ++i) { x(i) = 0.0; } } -std::string CGramSchmidt::print(const TDoubleVec &x) -{ +std::string CGramSchmidt::print(const TDoubleVec& x) { return core::CContainerPrinter::print(x); } -std::string CGramSchmidt::print(const TVector &x) -{ +std::string CGramSchmidt::print(const TVector& x) { std::ostringstream result; result << x; return result.str(); } - } } diff --git a/lib/maths/CInformationCriteria.cc b/lib/maths/CInformationCriteria.cc index 6826b312cf..8e853d138e 100644 --- a/lib/maths/CInformationCriteria.cc +++ b/lib/maths/CInformationCriteria.cc @@ -10,61 +10,47 @@ #include -namespace ml -{ -namespace maths -{ -namespace information_criteria_detail -{ -namespace -{ +namespace ml { +namespace maths { +namespace information_criteria_detail { +namespace { //! The implementation of log determinant used for the Gaussian //! information criterion. template -double logDeterminant_(const MATRIX &covariance, double upper) -{ +double logDeterminant_(const MATRIX& covariance, double upper) { Eigen::JacobiSVD svd(covariance); double result = 0.0; double epsilon = svd.threshold() * svd.singularValues()(0); - for (int i = 0u; i < svd.singularValues().size(); ++i) - { + for (int i = 0u; i < svd.singularValues().size(); ++i) { result += std::log(std::max(upper * svd.singularValues()(i), epsilon)); } return result; } const double VARIANCE_CONFIDENCE = 0.99; - } -double confidence(double df) -{ +double confidence(double df) { boost::math::chi_squared_distribution<> chi(df); return boost::math::quantile(chi, VARIANCE_CONFIDENCE) / df; } -#define LOG_DETERMINANT(N) \ -double logDeterminant(const CSymmetricMatrixNxN &c, double upper) \ -{ \ - return logDeterminant_(toDenseMatrix(c), upper); \ -} +#define LOG_DETERMINANT(N) \ + double logDeterminant(const CSymmetricMatrixNxN& c, double upper) { return logDeterminant_(toDenseMatrix(c), upper); } LOG_DETERMINANT(2) LOG_DETERMINANT(3) LOG_DETERMINANT(4) LOG_DETERMINANT(5) #undef LOG_DETERMINANT -double logDeterminant(const CSymmetricMatrix &c, double upper) -{ +double logDeterminant(const CSymmetricMatrix& c, double upper) { return logDeterminant_(toDenseMatrix(c), upper); } -double logDeterminant(const CDenseMatrix &c, double upper) -{ +double logDeterminant(const CDenseMatrix& c, double upper) { return logDeterminant_(c, upper); } - } } } diff --git a/lib/maths/CIntegerTools.cc b/lib/maths/CIntegerTools.cc index 4b44491adb..6299d064dd 100644 --- a/lib/maths/CIntegerTools.cc +++ b/lib/maths/CIntegerTools.cc @@ -11,31 +11,25 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { -bool CIntegerTools::isInteger(double value, double tolerance) -{ +bool CIntegerTools::isInteger(double value, double tolerance) { double integerPart; double remainder = std::modf(value, &integerPart); return remainder <= tolerance * integerPart; } -std::size_t CIntegerTools::nextPow2(uint64_t x) -{ +std::size_t CIntegerTools::nextPow2(uint64_t x) { // This is just a binary search for the highest non-zero bit. - static const std::size_t SHIFTS[] = { 32u, 16u, 8u, 4u, 2u, 1u }; - static const uint64_t MASKS[] = { 0xffffffff, 0xffff, 0xff, 0xf, 0x3, 0x1 }; + static const std::size_t SHIFTS[] = {32u, 16u, 8u, 4u, 2u, 1u}; + static const uint64_t MASKS[] = {0xffffffff, 0xffff, 0xff, 0xf, 0x3, 0x1}; std::size_t result = 0u; - for (std::size_t i = 0; i < 6; ++i) - { + for (std::size_t i = 0; i < 6; ++i) { uint64_t y = (x >> SHIFTS[i]); - if (y & MASKS[i]) - { + if (y & MASKS[i]) { result += SHIFTS[i]; x = y; } @@ -43,35 +37,30 @@ std::size_t CIntegerTools::nextPow2(uint64_t x) return result + static_cast(x); } -uint64_t CIntegerTools::reverseBits(uint64_t x) -{ +uint64_t CIntegerTools::reverseBits(uint64_t x) { // Uses the standard "parallel" approach of swapping adjacent bits, then // adjacent pairs, quadruples, etc. - x = ((x >> 1) & 0x5555555555555555) | ((x << 1) & 0xaaaaaaaaaaaaaaaa); - x = ((x >> 2) & 0x3333333333333333) | ((x << 2) & 0xcccccccccccccccc); - x = ((x >> 4) & 0x0f0f0f0f0f0f0f0f) | ((x << 4) & 0xf0f0f0f0f0f0f0f0); - x = ((x >> 8) & 0x00ff00ff00ff00ff) | ((x << 8) & 0xff00ff00ff00ff00); + x = ((x >> 1) & 0x5555555555555555) | ((x << 1) & 0xaaaaaaaaaaaaaaaa); + x = ((x >> 2) & 0x3333333333333333) | ((x << 2) & 0xcccccccccccccccc); + x = ((x >> 4) & 0x0f0f0f0f0f0f0f0f) | ((x << 4) & 0xf0f0f0f0f0f0f0f0); + x = ((x >> 8) & 0x00ff00ff00ff00ff) | ((x << 8) & 0xff00ff00ff00ff00); x = ((x >> 16) & 0x0000ffff0000ffff) | ((x << 16) & 0xffff0000ffff0000); x = ((x >> 32) & 0x00000000ffffffff) | ((x << 32) & 0xffffffff00000000); return x; } -double CIntegerTools::binomial(unsigned int n, unsigned int k) -{ - if (n < k) - { +double CIntegerTools::binomial(unsigned int n, unsigned int k) { + if (n < k) { LOG_ERROR("Bad coefficient : (n k) = (" << n << " " << k << ")"); return 0.0; } double result = 1.0; - k = std::min(k, n-k); - for (unsigned int k_ = k; k_ > 0; --k_, --n) - { + k = std::min(k, n - k); + for (unsigned int k_ = k; k_ > 0; --k_, --n) { result *= static_cast(n) / static_cast(k_); } return result; } - } } diff --git a/lib/maths/CIntegration.cc b/lib/maths/CIntegration.cc index bd94e3bba1..5e92b16191 100644 --- a/lib/maths/CIntegration.cc +++ b/lib/maths/CIntegration.cc @@ -8,222 +8,161 @@ #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { -const double *CIntegration::CGaussLegendreQuadrature::weights(EOrder order) -{ - switch (order) - { - case OrderOne: return WEIGHTS1; - case OrderTwo: return WEIGHTS2; - case OrderThree: return WEIGHTS3; - case OrderFour: return WEIGHTS4; - case OrderFive: return WEIGHTS5; - case OrderSix: return WEIGHTS6; - case OrderSeven: return WEIGHTS7; - case OrderEight: return WEIGHTS8; - case OrderNine: return WEIGHTS9; - case OrderTen: return WEIGHTS10; +const double* CIntegration::CGaussLegendreQuadrature::weights(EOrder order) { + switch (order) { + case OrderOne: + return WEIGHTS1; + case OrderTwo: + return WEIGHTS2; + case OrderThree: + return WEIGHTS3; + case OrderFour: + return WEIGHTS4; + case OrderFive: + return WEIGHTS5; + case OrderSix: + return WEIGHTS6; + case OrderSeven: + return WEIGHTS7; + case OrderEight: + return WEIGHTS8; + case OrderNine: + return WEIGHTS9; + case OrderTen: + return WEIGHTS10; } LOG_ABORT("Unexpected enumeration value " << order); } -const double *CIntegration::CGaussLegendreQuadrature::abscissas(EOrder order) -{ - switch (order) - { - case OrderOne: return ABSCISSAS1; - case OrderTwo: return ABSCISSAS2; - case OrderThree: return ABSCISSAS3; - case OrderFour: return ABSCISSAS4; - case OrderFive: return ABSCISSAS5; - case OrderSix: return ABSCISSAS6; - case OrderSeven: return ABSCISSAS7; - case OrderEight: return ABSCISSAS8; - case OrderNine: return ABSCISSAS9; - case OrderTen: return ABSCISSAS10; +const double* CIntegration::CGaussLegendreQuadrature::abscissas(EOrder order) { + switch (order) { + case OrderOne: + return ABSCISSAS1; + case OrderTwo: + return ABSCISSAS2; + case OrderThree: + return ABSCISSAS3; + case OrderFour: + return ABSCISSAS4; + case OrderFive: + return ABSCISSAS5; + case OrderSix: + return ABSCISSAS6; + case OrderSeven: + return ABSCISSAS7; + case OrderEight: + return ABSCISSAS8; + case OrderNine: + return ABSCISSAS9; + case OrderTen: + return ABSCISSAS10; } LOG_ABORT("Unexpected enumeration value " << order); } -const double CIntegration::CGaussLegendreQuadrature::WEIGHTS1[] = - { - 2.0 - }; -const double CIntegration::CGaussLegendreQuadrature::WEIGHTS2[] = - { - 1.0, - 1.0 - }; -const double CIntegration::CGaussLegendreQuadrature::WEIGHTS3[] = - { - 0.8888888888888888, - 0.5555555555555556, - 0.5555555555555556 - }; -const double CIntegration::CGaussLegendreQuadrature::WEIGHTS4[] = - { - 0.6521451548625461, - 0.6521451548625461, - 0.3478548451374538, - 0.3478548451374538 - }; -const double CIntegration::CGaussLegendreQuadrature::WEIGHTS5[] = - { - 0.5688888888888889, - 0.4786286704993665, - 0.4786286704993665, - 0.2369268850561891, - 0.2369268850561891 - }; +const double CIntegration::CGaussLegendreQuadrature::WEIGHTS1[] = {2.0}; +const double CIntegration::CGaussLegendreQuadrature::WEIGHTS2[] = {1.0, 1.0}; +const double CIntegration::CGaussLegendreQuadrature::WEIGHTS3[] = {0.8888888888888888, 0.5555555555555556, 0.5555555555555556}; +const double CIntegration::CGaussLegendreQuadrature::WEIGHTS4[] = {0.6521451548625461, + 0.6521451548625461, + 0.3478548451374538, + 0.3478548451374538}; +const double CIntegration::CGaussLegendreQuadrature::WEIGHTS5[] = {0.5688888888888889, + 0.4786286704993665, + 0.4786286704993665, + 0.2369268850561891, + 0.2369268850561891}; const double CIntegration::CGaussLegendreQuadrature::WEIGHTS6[] = - { - 0.3607615730481386, - 0.3607615730481386, - 0.4679139345726910, - 0.4679139345726910, - 0.1713244923791704, - 0.1713244923791704 - }; -const double CIntegration::CGaussLegendreQuadrature::WEIGHTS7[] = - { - 0.4179591836734694, - 0.3818300505051189, - 0.3818300505051189, - 0.2797053914892766, - 0.2797053914892766, - 0.1294849661688697, - 0.1294849661688697 - }; -const double CIntegration::CGaussLegendreQuadrature::WEIGHTS8[] = - { - 0.3626837833783620, - 0.3626837833783620, - 0.3137066458778873, - 0.3137066458778873, - 0.2223810344533745, - 0.2223810344533745, - 0.1012285362903763, - 0.1012285362903763 - }; -const double CIntegration::CGaussLegendreQuadrature::WEIGHTS9[] = - { - 0.3302393550012598, - 0.1806481606948574, - 0.1806481606948574, - 0.0812743883615744, - 0.0812743883615744, - 0.3123470770400029, - 0.3123470770400029, - 0.2606106964029354, - 0.2606106964029354 - }; -const double CIntegration::CGaussLegendreQuadrature::WEIGHTS10[] = - { - 0.2955242247147529, - 0.2955242247147529, - 0.2692667193099963, - 0.2692667193099963, - 0.2190863625159820, - 0.2190863625159820, - 0.1494513491505806, - 0.1494513491505806, - 0.0666713443086881, - 0.0666713443086881 - }; + {0.3607615730481386, 0.3607615730481386, 0.4679139345726910, 0.4679139345726910, 0.1713244923791704, 0.1713244923791704}; +const double CIntegration::CGaussLegendreQuadrature::WEIGHTS7[] = {0.4179591836734694, + 0.3818300505051189, + 0.3818300505051189, + 0.2797053914892766, + 0.2797053914892766, + 0.1294849661688697, + 0.1294849661688697}; +const double CIntegration::CGaussLegendreQuadrature::WEIGHTS8[] = {0.3626837833783620, + 0.3626837833783620, + 0.3137066458778873, + 0.3137066458778873, + 0.2223810344533745, + 0.2223810344533745, + 0.1012285362903763, + 0.1012285362903763}; +const double CIntegration::CGaussLegendreQuadrature::WEIGHTS9[] = {0.3302393550012598, + 0.1806481606948574, + 0.1806481606948574, + 0.0812743883615744, + 0.0812743883615744, + 0.3123470770400029, + 0.3123470770400029, + 0.2606106964029354, + 0.2606106964029354}; +const double CIntegration::CGaussLegendreQuadrature::WEIGHTS10[] = {0.2955242247147529, + 0.2955242247147529, + 0.2692667193099963, + 0.2692667193099963, + 0.2190863625159820, + 0.2190863625159820, + 0.1494513491505806, + 0.1494513491505806, + 0.0666713443086881, + 0.0666713443086881}; -const double CIntegration::CGaussLegendreQuadrature::ABSCISSAS1[] = - { - 0.0 - }; -const double CIntegration::CGaussLegendreQuadrature::ABSCISSAS2[] = - { - -0.5773502691896257, - 0.5773502691896257 - }; -const double CIntegration::CGaussLegendreQuadrature::ABSCISSAS3[] = - { - 0.0000000000000000, - -0.7745966692414834, - 0.7745966692414834 - }; -const double CIntegration::CGaussLegendreQuadrature::ABSCISSAS4[] = - { - -0.3399810435848563, - 0.3399810435848563, - -0.8611363115940526, - 0.8611363115940526 - }; -const double CIntegration::CGaussLegendreQuadrature::ABSCISSAS5[] = - { - 0.0000000000000000, - -0.5384693101056831, - 0.5384693101056831, - -0.9061798459386640, - 0.9061798459386640 - }; +const double CIntegration::CGaussLegendreQuadrature::ABSCISSAS1[] = {0.0}; +const double CIntegration::CGaussLegendreQuadrature::ABSCISSAS2[] = {-0.5773502691896257, 0.5773502691896257}; +const double CIntegration::CGaussLegendreQuadrature::ABSCISSAS3[] = {0.0000000000000000, -0.7745966692414834, 0.7745966692414834}; +const double CIntegration::CGaussLegendreQuadrature::ABSCISSAS4[] = {-0.3399810435848563, + 0.3399810435848563, + -0.8611363115940526, + 0.8611363115940526}; +const double CIntegration::CGaussLegendreQuadrature::ABSCISSAS5[] = {0.0000000000000000, + -0.5384693101056831, + 0.5384693101056831, + -0.9061798459386640, + 0.9061798459386640}; const double CIntegration::CGaussLegendreQuadrature::ABSCISSAS6[] = - { - 0.6612093864662645, - -0.6612093864662645, - -0.2386191860831969, - 0.2386191860831969, - -0.9324695142031521, - 0.9324695142031521 - }; -const double CIntegration::CGaussLegendreQuadrature::ABSCISSAS7[] = - { - 0.0000000000000000, - 0.4058451513773972, - -0.4058451513773972, - -0.7415311855993945, - 0.7415311855993945, - -0.9491079123427585, - 0.9491079123427585 - }; -const double CIntegration::CGaussLegendreQuadrature::ABSCISSAS8[] = - { - -0.1834346424956498, - 0.1834346424956498, - -0.5255324099163290, - 0.5255324099163290, - -0.7966664774136267, - 0.7966664774136267, - -0.9602898564975363, - 0.9602898564975363 - }; -const double CIntegration::CGaussLegendreQuadrature::ABSCISSAS9[] = - { - 0.0000000000000000, - -0.8360311073266358, - 0.8360311073266358, - -0.9681602395076261, - 0.9681602395076261, - -0.3242534234038089, - 0.3242534234038089, - -0.6133714327005904, - 0.6133714327005904 - }; -const double CIntegration::CGaussLegendreQuadrature::ABSCISSAS10[] = - { - -0.1488743389816312, - 0.1488743389816312, - -0.4333953941292472, - 0.4333953941292472, - -0.6794095682990244, - 0.6794095682990244, - -0.8650633666889845, - 0.8650633666889845, - -0.9739065285171717, - 0.9739065285171717 - }; + {0.6612093864662645, -0.6612093864662645, -0.2386191860831969, 0.2386191860831969, -0.9324695142031521, 0.9324695142031521}; +const double CIntegration::CGaussLegendreQuadrature::ABSCISSAS7[] = {0.0000000000000000, + 0.4058451513773972, + -0.4058451513773972, + -0.7415311855993945, + 0.7415311855993945, + -0.9491079123427585, + 0.9491079123427585}; +const double CIntegration::CGaussLegendreQuadrature::ABSCISSAS8[] = {-0.1834346424956498, + 0.1834346424956498, + -0.5255324099163290, + 0.5255324099163290, + -0.7966664774136267, + 0.7966664774136267, + -0.9602898564975363, + 0.9602898564975363}; +const double CIntegration::CGaussLegendreQuadrature::ABSCISSAS9[] = {0.0000000000000000, + -0.8360311073266358, + 0.8360311073266358, + -0.9681602395076261, + 0.9681602395076261, + -0.3242534234038089, + 0.3242534234038089, + -0.6133714327005904, + 0.6133714327005904}; +const double CIntegration::CGaussLegendreQuadrature::ABSCISSAS10[] = {-0.1488743389816312, + 0.1488743389816312, + -0.4333953941292472, + 0.4333953941292472, + -0.6794095682990244, + 0.6794095682990244, + -0.8650633666889845, + 0.8650633666889845, + -0.9739065285171717, + 0.9739065285171717}; core::CFastMutex CIntegration::ms_Mutex; - } } diff --git a/lib/maths/CKMeansOnline1d.cc b/lib/maths/CKMeansOnline1d.cc index fbb9fbc5b0..1ec898d37d 100644 --- a/lib/maths/CKMeansOnline1d.cc +++ b/lib/maths/CKMeansOnline1d.cc @@ -7,14 +7,14 @@ #include #include -#include #include #include #include +#include #include -#include #include +#include #include #include @@ -25,13 +25,10 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { -namespace -{ +namespace { using TDouble1Vec = core::CSmallVector; using TDouble4Vec = core::CSmallVector; @@ -39,47 +36,28 @@ using TDouble4Vec1Vec = core::CSmallVector; using TDoubleDoublePr = std::pair; using TDoubleDoublePrVec = std::vector; -namespace detail -{ +namespace detail { //! \brief Orders two normals by their means. -struct SNormalMeanLess -{ - public: - bool operator()(const CNormalMeanPrecConjugate &lhs, - const CNormalMeanPrecConjugate &rhs) const - { - return lhs.marginalLikelihoodMean() < rhs.marginalLikelihoodMean(); - } - bool operator()(double lhs, - const CNormalMeanPrecConjugate &rhs) const - { - return lhs < rhs.marginalLikelihoodMean(); - } - bool operator()(const CNormalMeanPrecConjugate &lhs, - double rhs) const - { - return lhs.marginalLikelihoodMean() < rhs; - } +struct SNormalMeanLess { +public: + bool operator()(const CNormalMeanPrecConjugate& lhs, const CNormalMeanPrecConjugate& rhs) const { + return lhs.marginalLikelihoodMean() < rhs.marginalLikelihoodMean(); + } + bool operator()(double lhs, const CNormalMeanPrecConjugate& rhs) const { return lhs < rhs.marginalLikelihoodMean(); } + bool operator()(const CNormalMeanPrecConjugate& lhs, double rhs) const { return lhs.marginalLikelihoodMean() < rhs; } }; //! Get the log of the likelihood that \p point is from \p normal. -double logLikelihoodFromCluster(const TDouble1Vec &sample, - const CNormalMeanPrecConjugate &normal) -{ +double logLikelihoodFromCluster(const TDouble1Vec& sample, const CNormalMeanPrecConjugate& normal) { double likelihood; maths_t::EFloatingPointErrorStatus status = - normal.jointLogMarginalLikelihood(CConstantWeights::COUNT, - sample, - CConstantWeights::SINGLE_UNIT, - likelihood); - if (status & maths_t::E_FpFailed) - { + normal.jointLogMarginalLikelihood(CConstantWeights::COUNT, sample, CConstantWeights::SINGLE_UNIT, likelihood); + if (status & maths_t::E_FpFailed) { LOG_ERROR("Unable to compute probability for: " << sample[0]); return core::constants::LOG_MIN_DOUBLE - 1.0; } - if (status & maths_t::E_FpOverflowed) - { + if (status & maths_t::E_FpOverflowed) { return likelihood; } return likelihood + std::log(normal.numberSamples()); @@ -91,93 +69,69 @@ double logLikelihoodFromCluster(const TDouble1Vec &sample, const double HARD_ASSIGNMENT_THRESHOLD = 0.01; const std::string CLUSTER_TAG("a"); - } -CKMeansOnline1d::CKMeansOnline1d(TNormalVec &clusters) -{ +CKMeansOnline1d::CKMeansOnline1d(TNormalVec& clusters) { std::sort(clusters.begin(), clusters.end(), detail::SNormalMeanLess()); m_Clusters.assign(clusters.begin(), clusters.end()); } -CKMeansOnline1d::CKMeansOnline1d(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser) -{ - traverser.traverseSubLevel(boost::bind(&CKMeansOnline1d::acceptRestoreTraverser, - this, boost::cref(params), _1)); +CKMeansOnline1d::CKMeansOnline1d(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) { + traverser.traverseSubLevel(boost::bind(&CKMeansOnline1d::acceptRestoreTraverser, this, boost::cref(params), _1)); } -bool CKMeansOnline1d::acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name = traverser.name(); - if (name == CLUSTER_TAG) - { +bool CKMeansOnline1d::acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); + if (name == CLUSTER_TAG) { CNormalMeanPrecConjugate cluster(params, traverser); m_Clusters.push_back(cluster); } - } - while (traverser.next()); + } while (traverser.next()); return true; } -std::string CKMeansOnline1d::persistenceTag() const -{ +std::string CKMeansOnline1d::persistenceTag() const { return K_MEANS_ONLINE_1D_TAG; } -void CKMeansOnline1d::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) - { - inserter.insertLevel(CLUSTER_TAG, boost::bind(&CNormalMeanPrecConjugate::acceptPersistInserter, - &m_Clusters[i], _1)); +void CKMeansOnline1d::acceptPersistInserter(core::CStatePersistInserter& inserter) const { + for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { + inserter.insertLevel(CLUSTER_TAG, boost::bind(&CNormalMeanPrecConjugate::acceptPersistInserter, &m_Clusters[i], _1)); } } -CKMeansOnline1d *CKMeansOnline1d::clone() const -{ +CKMeansOnline1d* CKMeansOnline1d::clone() const { return new CKMeansOnline1d(*this); } -void CKMeansOnline1d::clear() -{ +void CKMeansOnline1d::clear() { m_Clusters.clear(); } -std::size_t CKMeansOnline1d::numberClusters() const -{ +std::size_t CKMeansOnline1d::numberClusters() const { return m_Clusters.size(); } -void CKMeansOnline1d::dataType(maths_t::EDataType dataType) -{ - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) - { +void CKMeansOnline1d::dataType(maths_t::EDataType dataType) { + for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { m_Clusters[i].dataType(dataType); } } -void CKMeansOnline1d::decayRate(double decayRate) -{ - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) - { +void CKMeansOnline1d::decayRate(double decayRate) { + for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { m_Clusters[i].decayRate(decayRate); } } -bool CKMeansOnline1d::hasCluster(std::size_t index) const -{ +bool CKMeansOnline1d::hasCluster(std::size_t index) const { return index < m_Clusters.size(); } -bool CKMeansOnline1d::clusterCentre(std::size_t index, double &result) const -{ - if (!this->hasCluster(index)) - { +bool CKMeansOnline1d::clusterCentre(std::size_t index, double& result) const { + if (!this->hasCluster(index)) { LOG_ERROR("Cluster " << index << " doesn't exist"); return false; } @@ -185,10 +139,8 @@ bool CKMeansOnline1d::clusterCentre(std::size_t index, double &result) const return true; } -bool CKMeansOnline1d::clusterSpread(std::size_t index, double &result) const -{ - if (!this->hasCluster(index)) - { +bool CKMeansOnline1d::clusterSpread(std::size_t index, double& result) const { + if (!this->hasCluster(index)) { LOG_ERROR("Cluster " << index << " doesn't exist"); return false; } @@ -196,70 +148,51 @@ bool CKMeansOnline1d::clusterSpread(std::size_t index, double &result) const return true; } -void CKMeansOnline1d::cluster(const double &point, - TSizeDoublePr2Vec &result, - double count) const -{ +void CKMeansOnline1d::cluster(const double& point, TSizeDoublePr2Vec& result, double count) const { result.clear(); - if (m_Clusters.empty()) - { + if (m_Clusters.empty()) { LOG_ERROR("No clusters"); return; } - auto rightCluster = std::lower_bound(m_Clusters.begin(), m_Clusters.end(), - point, detail::SNormalMeanLess()); + auto rightCluster = std::lower_bound(m_Clusters.begin(), m_Clusters.end(), point, detail::SNormalMeanLess()); - if (rightCluster == m_Clusters.end()) - { + if (rightCluster == m_Clusters.end()) { --rightCluster; result.emplace_back(rightCluster - m_Clusters.begin(), count); - } - else if (rightCluster == m_Clusters.begin()) - { + } else if (rightCluster == m_Clusters.begin()) { result.emplace_back(size_t(0), count); - } - else - { + } else { auto leftCluster = rightCluster; --leftCluster; TDouble1Vec sample(1, point); - double likelihoodLeft = detail::logLikelihoodFromCluster(sample, *leftCluster); + double likelihoodLeft = detail::logLikelihoodFromCluster(sample, *leftCluster); double likelihoodRight = detail::logLikelihoodFromCluster(sample, *rightCluster); double renormalizer = std::max(likelihoodLeft, likelihoodRight); - double pLeft = std::exp(likelihoodLeft - renormalizer); + double pLeft = std::exp(likelihoodLeft - renormalizer); double pRight = std::exp(likelihoodRight - renormalizer); double normalizer = pLeft + pRight; - pLeft /= normalizer; + pLeft /= normalizer; pRight /= normalizer; - if (pLeft < HARD_ASSIGNMENT_THRESHOLD * pRight) - { + if (pLeft < HARD_ASSIGNMENT_THRESHOLD * pRight) { result.emplace_back(rightCluster - m_Clusters.begin(), count); - } - else if (pRight < HARD_ASSIGNMENT_THRESHOLD * pLeft) - { + } else if (pRight < HARD_ASSIGNMENT_THRESHOLD * pLeft) { result.emplace_back(leftCluster - m_Clusters.begin(), count); - } - else - { + } else { result.emplace_back(leftCluster - m_Clusters.begin(), count * pLeft); result.emplace_back(rightCluster - m_Clusters.begin(), count * pRight); } } } -void CKMeansOnline1d::add(const double &point, - TSizeDoublePr2Vec &clusters, - double count) -{ +void CKMeansOnline1d::add(const double& point, TSizeDoublePr2Vec& clusters, double count) { clusters.clear(); - if (m_Clusters.empty()) - { + if (m_Clusters.empty()) { return; } @@ -268,36 +201,27 @@ void CKMeansOnline1d::add(const double &point, TDouble1Vec sample{point}; TDouble4Vec1Vec weight{TDouble4Vec(1)}; - for (std::size_t i = 0u; i < clusters.size(); ++i) - { + for (std::size_t i = 0u; i < clusters.size(); ++i) { weight[0][0] = clusters[i].second; m_Clusters[clusters[i].first].addSamples(CConstantWeights::COUNT, sample, weight); } } -void CKMeansOnline1d::add(const TDoubleDoublePrVec &points) -{ +void CKMeansOnline1d::add(const TDoubleDoublePrVec& points) { TSizeDoublePr2Vec dummy; - for (std::size_t i = 0u; i < points.size(); ++i) - { + for (std::size_t i = 0u; i < points.size(); ++i) { this->add(points[i].first, dummy, points[i].second); } } -void CKMeansOnline1d::propagateForwardsByTime(double time) -{ - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) - { +void CKMeansOnline1d::propagateForwardsByTime(double time) { + for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { m_Clusters[i].propagateForwardsByTime(time); } } -bool CKMeansOnline1d::sample(std::size_t index, - std::size_t numberSamples, - TDoubleVec &samples) const -{ - if (!this->hasCluster(index)) - { +bool CKMeansOnline1d::sample(std::size_t index, std::size_t numberSamples, TDoubleVec& samples) const { + if (!this->hasCluster(index)) { LOG_ERROR("Cluster " << index << " doesn't exist"); return false; } @@ -307,41 +231,33 @@ bool CKMeansOnline1d::sample(std::size_t index, return true; } -double CKMeansOnline1d::probability(std::size_t index) const -{ - if (!this->hasCluster(index)) - { +double CKMeansOnline1d::probability(std::size_t index) const { + if (!this->hasCluster(index)) { return 0.0; } double weight = m_Clusters[index].numberSamples(); double weightSum = 0.0; - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) - { + for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { weightSum += m_Clusters[i].numberSamples(); } return weightSum == 0.0 ? 0.0 : weight / weightSum; } -void CKMeansOnline1d::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CKMeansOnline1d::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CKMeansOnline1d"); core::CMemoryDebug::dynamicSize("m_Clusters", m_Clusters, mem); } -std::size_t CKMeansOnline1d::memoryUsage() const -{ +std::size_t CKMeansOnline1d::memoryUsage() const { return core::CMemory::dynamicSize(m_Clusters); } -std::size_t CKMeansOnline1d::staticSize() const -{ +std::size_t CKMeansOnline1d::staticSize() const { return sizeof(*this); } -uint64_t CKMeansOnline1d::checksum(uint64_t seed) const -{ +uint64_t CKMeansOnline1d::checksum(uint64_t seed) const { return CChecksum::calculate(seed, m_Clusters); } - } } diff --git a/lib/maths/CKMostCorrelated.cc b/lib/maths/CKMostCorrelated.cc index 623de98e5a..caf7df60b3 100644 --- a/lib/maths/CKMostCorrelated.cc +++ b/lib/maths/CKMostCorrelated.cc @@ -19,27 +19,24 @@ #include #include -#include #include #include #include #include +#include #include #include #include -namespace bg = boost::geometry; +namespace bg = boost::geometry; namespace bgi = boost::geometry::index; namespace bgm = boost::geometry::model; -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { -namespace -{ +namespace { using TSizeSizePr = std::pair; using TSizeSizePrUSet = boost::unordered_set; @@ -49,68 +46,51 @@ using TPointSizePrVec = std::vector; //! \brief Unary predicate to check variables, corresponding //! to labeled points, are not equal to a specified variable. -class CNotEqual : public std::unary_function -{ - public: - CNotEqual(std::size_t X) : m_X(X) {} - - bool operator()(const TPointSizePr &y) const - { - std::size_t Y = y.second; - return m_X != Y; - } +class CNotEqual : public std::unary_function { +public: + CNotEqual(std::size_t X) : m_X(X) {} + + bool operator()(const TPointSizePr& y) const { + std::size_t Y = y.second; + return m_X != Y; + } - private: - std::size_t m_X; +private: + std::size_t m_X; }; //! \brief Unary predicate to check if one specified variable //! and others, corresponding to labeled points, are in a //! specified collection pairs of variables. -class CPairNotIn : public std::unary_function -{ - public: - CPairNotIn(const TSizeSizePrUSet &lookup, std::size_t X) : - m_Lookup(&lookup), - m_X(X) - {} - - bool operator()(const TPointSizePr &y) const - { - std::size_t Y = y.second; - return m_Lookup->count(std::make_pair(std::min(m_X, Y), std::max(m_X, Y))) == 0; - } +class CPairNotIn : public std::unary_function { +public: + CPairNotIn(const TSizeSizePrUSet& lookup, std::size_t X) : m_Lookup(&lookup), m_X(X) {} + + bool operator()(const TPointSizePr& y) const { + std::size_t Y = y.second; + return m_Lookup->count(std::make_pair(std::min(m_X, Y), std::max(m_X, Y))) == 0; + } - private: - const TSizeSizePrUSet *m_Lookup; - std::size_t m_X; +private: + const TSizeSizePrUSet* m_Lookup; + std::size_t m_X; }; //! \brief Unary predicate to check if a point is closer, //! in square Euclidean distance, to a specified point than //! a specified threshold. -class CCloserThan : public std::unary_function -{ - public: - CCloserThan(double threshold, const TPoint &x) : - m_Threshold(threshold), - m_X(x) - {} - - bool operator()(const TPointSizePr &y) const - { - return pow2(bg::distance(m_X, y.first)) < m_Threshold; - } +class CCloserThan : public std::unary_function { +public: + CCloserThan(double threshold, const TPoint& x) : m_Threshold(threshold), m_X(x) {} - private: - static double pow2(double x) - { - return x * x; - } + bool operator()(const TPointSizePr& y) const { return pow2(bg::distance(m_X, y.first)) < m_Threshold; } - private: - double m_Threshold; - TPoint m_X; +private: + static double pow2(double x) { return x * x; } + +private: + double m_Threshold; + TPoint m_X; }; const std::string PROJECTIONS_TAG("a"); @@ -129,28 +109,21 @@ const double MINIMUM_FREQUENCY = 0.25; } // unnamed:: -CKMostCorrelated::CKMostCorrelated(std::size_t k, double decayRate, bool initialize) : - m_K(k), - m_DecayRate(decayRate), - m_MaximumCount(0.0) -{ - if (initialize) - { +CKMostCorrelated::CKMostCorrelated(std::size_t k, double decayRate, bool initialize) : m_K(k), m_DecayRate(decayRate), m_MaximumCount(0.0) { + if (initialize) { this->nextProjection(); } } -bool CKMostCorrelated::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ +bool CKMostCorrelated::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { m_Projections.clear(); m_CurrentProjected.clear(); m_Projected.clear(); m_Moments.clear(); m_MostCorrelated.clear(); - do - { - const std::string &name = traverser.name(); + do { + const std::string& name = traverser.name(); RESTORE(RNG_TAG, m_Rng.fromString(traverser.value())) RESTORE(PROJECTIONS_TAG, core::CPersistUtils::restore(PROJECTIONS_TAG, m_Projections, traverser)) RESTORE(CURRENT_PROJECTED_TAG, core::CPersistUtils::restore(CURRENT_PROJECTED_TAG, m_CurrentProjected, traverser)) @@ -158,14 +131,12 @@ bool CKMostCorrelated::acceptRestoreTraverser(core::CStateRestoreTraverser &trav RESTORE_BUILT_IN(MAXIMUM_COUNT_TAG, m_MaximumCount) RESTORE(MOMENTS_TAG, core::CPersistUtils::restore(MOMENTS_TAG, m_Moments, traverser)) RESTORE(MOST_CORRELATED_TAG, core::CPersistUtils::restore(MOST_CORRELATED_TAG, m_MostCorrelated, traverser)) - } - while (traverser.next()); + } while (traverser.next()); return true; } -void CKMostCorrelated::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CKMostCorrelated::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(RNG_TAG, m_Rng.toString()); core::CPersistUtils::persist(PROJECTIONS_TAG, m_Projections, inserter); core::CPersistUtils::persist(CURRENT_PROJECTED_TAG, m_CurrentProjected, inserter); @@ -175,179 +146,135 @@ void CKMostCorrelated::acceptPersistInserter(core::CStatePersistInserter &insert core::CPersistUtils::persist(MOST_CORRELATED_TAG, m_MostCorrelated, inserter); } -void CKMostCorrelated::mostCorrelated(TSizeSizePrVec &result) const -{ +void CKMostCorrelated::mostCorrelated(TSizeSizePrVec& result) const { result.clear(); std::size_t N = std::min(m_K, m_MostCorrelated.size()); - if (N > 0) - { + if (N > 0) { result.reserve(N); - for (std::size_t i = 0u; i < N; ++i) - { + for (std::size_t i = 0u; i < N; ++i) { result.emplace_back(m_MostCorrelated[i].s_X, m_MostCorrelated[i].s_Y); } } } -void CKMostCorrelated::mostCorrelated(std::size_t n, - TSizeSizePrVec &correlates, - TDoubleVec *pearson) const -{ +void CKMostCorrelated::mostCorrelated(std::size_t n, TSizeSizePrVec& correlates, TDoubleVec* pearson) const { correlates.clear(); - if (pearson) - { + if (pearson) { pearson->clear(); } n = std::min(n, m_MostCorrelated.size()); - if (n > 0) - { + if (n > 0) { correlates.reserve(n); - if (pearson) - { + if (pearson) { pearson->reserve(n); } - for (std::size_t i = 0u; i < n; ++i) - { + for (std::size_t i = 0u; i < n; ++i) { correlates.emplace_back(m_MostCorrelated[i].s_X, m_MostCorrelated[i].s_Y); - if (pearson) - { + if (pearson) { pearson->push_back(CBasicStatistics::mean(m_MostCorrelated[i].s_Correlation)); } } } } -void CKMostCorrelated::correlations(TDoubleVec &result) const -{ +void CKMostCorrelated::correlations(TDoubleVec& result) const { result.clear(); std::size_t N = std::min(m_K, m_MostCorrelated.size()); - if (N > 0) - { + if (N > 0) { result.reserve(N); - for (std::size_t i = 0u; i < N; ++i) - { + for (std::size_t i = 0u; i < N; ++i) { result.push_back(CBasicStatistics::mean(m_MostCorrelated[i].s_Correlation)); } } } -void CKMostCorrelated::correlations(std::size_t n, TDoubleVec &result) const -{ +void CKMostCorrelated::correlations(std::size_t n, TDoubleVec& result) const { result.clear(); n = std::min(n, m_MostCorrelated.size()); - if (n > 0) - { + if (n > 0) { result.reserve(n); - for (std::size_t i = 0u; i < n; ++i) - { + for (std::size_t i = 0u; i < n; ++i) { result.push_back(CBasicStatistics::mean(m_MostCorrelated[i].s_Correlation)); } } } -void CKMostCorrelated::addVariables(std::size_t n) -{ +void CKMostCorrelated::addVariables(std::size_t n) { core::CAllocationStrategy::resize(m_Moments, std::max(n, m_Moments.size())); } -void CKMostCorrelated::removeVariables(const TSizeVec &remove) -{ +void CKMostCorrelated::removeVariables(const TSizeVec& remove) { LOG_TRACE("removing = " << core::CContainerPrinter::print(remove)); - for (std::size_t i = 0u; i < remove.size(); ++i) - { - if (remove[i] < m_Moments.size()) - { + for (std::size_t i = 0u; i < remove.size(); ++i) { + if (remove[i] < m_Moments.size()) { m_Moments[remove[i]] = TMeanVarAccumulator(); m_Projected.erase(remove[i]); - m_MostCorrelated.erase(std::remove_if(m_MostCorrelated.begin(), - m_MostCorrelated.end(), - CMatches(remove[i])), m_MostCorrelated.end()); + m_MostCorrelated.erase(std::remove_if(m_MostCorrelated.begin(), m_MostCorrelated.end(), CMatches(remove[i])), + m_MostCorrelated.end()); } } } -bool CKMostCorrelated::changed() const -{ +bool CKMostCorrelated::changed() const { return m_Projections.size() == PROJECTION_DIMENSION; } -void CKMostCorrelated::add(std::size_t X, double x) -{ - if (X >= m_Moments.size()) - { +void CKMostCorrelated::add(std::size_t X, double x) { + if (X >= m_Moments.size()) { LOG_ERROR("Invalid variable " << X); return; } - TMeanVarAccumulator &moments = m_Moments[X]; + TMeanVarAccumulator& moments = m_Moments[X]; moments.add(x); TVector projected(0.0); - if (CBasicStatistics::count(moments) > 2.0) - { - double m = CBasicStatistics::mean(moments); + if (CBasicStatistics::count(moments) > 2.0) { + double m = CBasicStatistics::mean(moments); double sd = std::sqrt(CBasicStatistics::variance(moments)); - if (sd > 10.0 * std::numeric_limits::epsilon() * std::fabs(m)) - { + if (sd > 10.0 * std::numeric_limits::epsilon() * std::fabs(m)) { projected = m_Projections.back() * (x - m) / sd; m_CurrentProjected[X] += projected; } } } -void CKMostCorrelated::capture() -{ +void CKMostCorrelated::capture() { m_MaximumCount += 1.0; - for (TSizeVectorUMapCItr i = m_CurrentProjected.begin(); - i != m_CurrentProjected.end(); - ++i) - { + for (TSizeVectorUMapCItr i = m_CurrentProjected.begin(); i != m_CurrentProjected.end(); ++i) { std::size_t X = i->first; TSizeVectorPackedBitVectorPrUMapItr j = m_Projected.find(X); - if (j == m_Projected.end()) - { + if (j == m_Projected.end()) { TVector zero(0.0); CPackedBitVector indicator(PROJECTION_DIMENSION - m_Projections.size(), false); - j = m_Projected.emplace(boost::unordered::piecewise_construct, - boost::make_tuple(X), - boost::make_tuple(zero, indicator)).first; + j = m_Projected.emplace(boost::unordered::piecewise_construct, boost::make_tuple(X), boost::make_tuple(zero, indicator)).first; } j->second.first += i->second; } - for (TSizeVectorPackedBitVectorPrUMapItr i = m_Projected.begin(); - i != m_Projected.end(); - ++i) - { + for (TSizeVectorPackedBitVectorPrUMapItr i = m_Projected.begin(); i != m_Projected.end(); ++i) { i->second.second.extend(m_CurrentProjected.count(i->first) > 0); } m_Projections.pop_back(); m_CurrentProjected.clear(); - if (m_Projections.empty()) - { + if (m_Projections.empty()) { LOG_TRACE("# projections = " << m_Projected.size()); // For existing indices in the "most correlated" collection // compute the updated statistics. - for (std::size_t i = 0u; i < m_MostCorrelated.size(); ++i) - { + for (std::size_t i = 0u; i < m_MostCorrelated.size(); ++i) { m_MostCorrelated[i].update(m_Projected); } std::stable_sort(m_MostCorrelated.begin(), m_MostCorrelated.end()); // Remove any variables for which the correlation will necessarily be zero. - for (TSizeVectorPackedBitVectorPrUMapItr i = m_Projected.begin(); - i != m_Projected.end(); - /**/) - { - const CPackedBitVector &indicator = i->second.second; - if (indicator.manhattan() <= MINIMUM_FREQUENCY * static_cast(indicator.dimension())) - { + for (TSizeVectorPackedBitVectorPrUMapItr i = m_Projected.begin(); i != m_Projected.end(); + /**/) { + const CPackedBitVector& indicator = i->second.second; + if (indicator.manhattan() <= MINIMUM_FREQUENCY * static_cast(indicator.dimension())) { i = m_Projected.erase(i); - } - else - { + } else { ++i; } } @@ -364,12 +291,10 @@ void CKMostCorrelated::capture() std::size_t added = N < desired ? std::min(desired - N, n) : 0; LOG_TRACE("N = " << N << ", n = " << n << ", desired = " << desired << ", added = " << added); - if (added > 0) - { + if (added > 0) { m_MostCorrelated.insert(m_MostCorrelated.end(), add.end() - added, add.end()); } - if (n > added) - { + if (n > added) { // When deciding which values to replace from the set [m_K, N) we // do so at random with probability proportional to 1 - absolute // correlation. @@ -381,16 +306,13 @@ void CKMostCorrelated::capture() TDoubleVec p; p.reserve(std::min(N - m_K, 3 * n)); double Z = 0.0; - for (std::size_t i = vunerable; i < N; ++i) - { + for (std::size_t i = vunerable; i < N; ++i) { double oneMinusCorrelation = 1.0 - m_MostCorrelated[i].absCorrelation(); p.push_back(oneMinusCorrelation); Z += oneMinusCorrelation; } - if (Z > 0.0) - { - for (std::size_t i = 0u; i < p.size(); ++i) - { + if (Z > 0.0) { + for (std::size_t i = 0u; i < p.size(); ++i) { p[i] /= Z; } LOG_TRACE("p = " << core::CContainerPrinter::print(p)); @@ -398,8 +320,7 @@ void CKMostCorrelated::capture() TSizeVec replace; CSampling::categoricalSampleWithoutReplacement(m_Rng, p, n - added, replace); - for (std::size_t i = 1u; i <= n - added; ++i) - { + for (std::size_t i = 1u; i <= n - added; ++i) { m_MostCorrelated[vunerable + replace[i - 1]] = add[n - added - i]; } } @@ -409,8 +330,7 @@ void CKMostCorrelated::capture() } } -uint64_t CKMostCorrelated::checksum(uint64_t seed) const -{ +uint64_t CKMostCorrelated::checksum(uint64_t seed) const { seed = CChecksum::calculate(seed, m_K); seed = CChecksum::calculate(seed, m_DecayRate); seed = CChecksum::calculate(seed, m_Projections); @@ -421,8 +341,7 @@ uint64_t CKMostCorrelated::checksum(uint64_t seed) const return CChecksum::calculate(seed, m_MostCorrelated); } -void CKMostCorrelated::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CKMostCorrelated::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CKMostCorrelated"); core::CMemoryDebug::dynamicSize("m_Projections", m_Projections, mem); core::CMemoryDebug::dynamicSize("m_CurrentProjected", m_CurrentProjected, mem); @@ -431,8 +350,7 @@ void CKMostCorrelated::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) core::CMemoryDebug::dynamicSize("m_MostCorrelated", m_MostCorrelated, mem); } -std::size_t CKMostCorrelated::memoryUsage() const -{ +std::size_t CKMostCorrelated::memoryUsage() const { std::size_t mem = core::CMemory::dynamicSize(m_Projections); mem += core::CMemory::dynamicSize(m_CurrentProjected); mem += core::CMemory::dynamicSize(m_Projected); @@ -441,8 +359,7 @@ std::size_t CKMostCorrelated::memoryUsage() const return mem; } -void CKMostCorrelated::mostCorrelated(TCorrelationVec &result) const -{ +void CKMostCorrelated::mostCorrelated(TCorrelationVec& result) const { using TMaxDoubleAccumulator = CBasicStatistics::COrderStatisticsStack>; using TMaxCorrelationAccumulator = CBasicStatistics::COrderStatisticsHeap; using TPointRTree = bgi::rtree>; @@ -453,50 +370,38 @@ void CKMostCorrelated::mostCorrelated(TCorrelationVec &result) const std::size_t V = m_Projected.size(); std::size_t desired = 2 * m_K; LOG_TRACE("N = " << N << ", V = " << V << ", desired = " << desired); - if (V == 1) - { + if (V == 1) { return; } TSizeSizePrUSet lookup; - for (std::size_t i = 0u; i < m_MostCorrelated.size(); ++i) - { + for (std::size_t i = 0u; i < m_MostCorrelated.size(); ++i) { std::size_t X = m_MostCorrelated[i].s_X; std::size_t Y = m_MostCorrelated[i].s_Y; lookup.insert(std::make_pair(std::min(X, Y), std::max(X, Y))); } - std::size_t replace = std::max(static_cast( - REPLACE_FRACTION * static_cast(desired) + 0.5), - std::max(desired - N, std::size_t(1))); + std::size_t replace = + std::max(static_cast(REPLACE_FRACTION * static_cast(desired) + 0.5), std::max(desired - N, std::size_t(1))); LOG_TRACE("replace = " << replace); TMaxCorrelationAccumulator mostCorrelated(replace); - if (10 * replace > V * (V - 1)) - { + if (10 * replace > V * (V - 1)) { LOG_TRACE("Exhaustive search"); - for (TSizeVectorPackedBitVectorPrUMapCItr x = m_Projected.begin(); - x != m_Projected.end(); - ++x) - { + for (TSizeVectorPackedBitVectorPrUMapCItr x = m_Projected.begin(); x != m_Projected.end(); ++x) { std::size_t X = x->first; TSizeVectorPackedBitVectorPrUMapCItr y = x; - while (++y != m_Projected.end()) - { + while (++y != m_Projected.end()) { std::size_t Y = y->first; - if (lookup.count(std::make_pair(std::min(X, Y), std::max(X, Y))) == 0) - { - SCorrelation cxy(X, x->second.first, x->second.second, - Y, y->second.first, y->second.second); + if (lookup.count(std::make_pair(std::min(X, Y), std::max(X, Y))) == 0) { + SCorrelation cxy(X, x->second.first, x->second.second, Y, y->second.first, y->second.second); mostCorrelated.add(cxy); } } } - } - else - { + } else { LOG_TRACE("Nearest neighbour search"); // 1) Build an r-tree, @@ -511,27 +416,20 @@ void CKMostCorrelated::mostCorrelated(TCorrelationVec &result) const // Bound the correlation based on the sparsity of the metric. TMaxDoubleAccumulator fmax; double dimension = 0.0; - for (TSizeVectorPackedBitVectorPrUMapCItr i = m_Projected.begin(); - i != m_Projected.end(); - ++i) - { - const CPackedBitVector &ix = i->second.second; + for (TSizeVectorPackedBitVectorPrUMapCItr i = m_Projected.begin(); i != m_Projected.end(); ++i) { + const CPackedBitVector& ix = i->second.second; dimension = static_cast(ix.dimension()); fmax.add(ix.manhattan() / dimension); } fmax.sort(); - if (fmax[1] <= MINIMUM_FREQUENCY) - { + if (fmax[1] <= MINIMUM_FREQUENCY) { return; } double amax = fmax[1] * dimension; TPointSizePrVec points; points.reserve(m_Projected.size()); - for (TSizeVectorPackedBitVectorPrUMapCItr i = m_Projected.begin(); - i != m_Projected.end(); - ++i) - { + for (TSizeVectorPackedBitVectorPrUMapCItr i = m_Projected.begin(); i != m_Projected.end(); ++i) { points.emplace_back(i->second.first.to().toBoostArray(), i->first); } LOG_TRACE("# points = " << points.size()); @@ -544,56 +442,44 @@ void CKMostCorrelated::mostCorrelated(TCorrelationVec &result) const // so we use a small number of seed variables if V is large // compared to the number to replace. TSizeVec seeds; - if (2 * replace < V) - { + if (2 * replace < V) { CSampling::uniformSample(m_Rng, 0, V, 2 * replace, seeds); std::sort(seeds.begin(), seeds.end()); seeds.erase(std::unique(seeds.begin(), seeds.end()), seeds.end()); - } - else - { + } else { seeds.reserve(V); - seeds.assign(boost::counting_iterator(0), - boost::counting_iterator(V)); + seeds.assign(boost::counting_iterator(0), boost::counting_iterator(V)); } - try - { + try { TPointRTree rtree(points); TPointSizePrVec nearest; - for (std::size_t i = 0u; i < seeds.size(); ++i) - { + for (std::size_t i = 0u; i < seeds.size(); ++i) { std::size_t X = points[seeds[i]].second; - const TVectorPackedBitVectorPr &px = m_Projected.at(X); + const TVectorPackedBitVectorPr& px = m_Projected.at(X); nearest.clear(); bgi::query(rtree, - bgi::satisfies(CNotEqual(X)) - && bgi::satisfies(CPairNotIn(lookup, X)) - && bgi::nearest(( px.first.to()).toBoostArray(), k), + bgi::satisfies(CNotEqual(X)) && bgi::satisfies(CPairNotIn(lookup, X)) && + bgi::nearest((px.first.to()).toBoostArray(), k), std::back_inserter(nearest)); bgi::query(rtree, - bgi::satisfies(CNotEqual(X)) - && bgi::satisfies(CPairNotIn(lookup, X)) - && bgi::nearest((-px.first.to()).toBoostArray(), k), + bgi::satisfies(CNotEqual(X)) && bgi::satisfies(CPairNotIn(lookup, X)) && + bgi::nearest((-px.first.to()).toBoostArray(), k), std::back_inserter(nearest)); - for (std::size_t j = 0u; j < nearest.size(); ++j) - { + for (std::size_t j = 0u; j < nearest.size(); ++j) { std::size_t n = mostCorrelated.count(); std::size_t S = n == desired ? mostCorrelated.biggest().s_X : 0; std::size_t T = n == desired ? mostCorrelated.biggest().s_Y : 0; std::size_t Y = nearest[j].second; - const TVectorPackedBitVectorPr &py = m_Projected.at(Y); + const TVectorPackedBitVectorPr& py = m_Projected.at(Y); SCorrelation cxy(X, px.first, px.second, Y, py.first, py.second); - if (lookup.count(std::make_pair(cxy.s_X, cxy.s_Y)) > 0) - { + if (lookup.count(std::make_pair(cxy.s_X, cxy.s_Y)) > 0) { continue; } - if (mostCorrelated.add(cxy)) - { - if (n == desired) - { + if (mostCorrelated.add(cxy)) { + if (n == desired) { lookup.erase(std::make_pair(S, T)); } lookup.insert(std::make_pair(cxy.s_X, cxy.s_Y)); @@ -603,66 +489,53 @@ void CKMostCorrelated::mostCorrelated(TCorrelationVec &result) const LOG_TRACE("# seeds = " << mostCorrelated.count()); LOG_TRACE("seed most correlated = " << mostCorrelated); - for (std::size_t i = 0u; i < points.size(); ++i) - { - const SCorrelation &biggest = mostCorrelated.biggest(); + for (std::size_t i = 0u; i < points.size(); ++i) { + const SCorrelation& biggest = mostCorrelated.biggest(); double threshold = biggest.distance(amax); LOG_TRACE("threshold = " << threshold); std::size_t X = points[i].second; - const TVectorPackedBitVectorPr &px = m_Projected.at(X); + const TVectorPackedBitVectorPr& px = m_Projected.at(X); TVector width(std::sqrt(threshold)); nearest.clear(); { - bgm::box box((px.first - width).to().toBoostArray(), - (px.first + width).to().toBoostArray()); + bgm::box box((px.first - width).to().toBoostArray(), (px.first + width).to().toBoostArray()); bgi::query(rtree, - bgi::within(box) - && bgi::satisfies(CNotEqual(X)) - && bgi::satisfies(CCloserThan(threshold, - px.first.to().toBoostArray())) - && bgi::satisfies(CPairNotIn(lookup, X)), + bgi::within(box) && bgi::satisfies(CNotEqual(X)) && + bgi::satisfies(CCloserThan(threshold, px.first.to().toBoostArray())) && + bgi::satisfies(CPairNotIn(lookup, X)), std::back_inserter(nearest)); } { - bgm::box box((-px.first - width).to().toBoostArray(), - (-px.first + width).to().toBoostArray()); + bgm::box box((-px.first - width).to().toBoostArray(), (-px.first + width).to().toBoostArray()); bgi::query(rtree, - bgi::within(box) - && bgi::satisfies(CNotEqual(X)) - && bgi::satisfies(CCloserThan(threshold, - (-px.first).to().toBoostArray())) - && bgi::satisfies(CPairNotIn(lookup, X)), + bgi::within(box) && bgi::satisfies(CNotEqual(X)) && + bgi::satisfies(CCloserThan(threshold, (-px.first).to().toBoostArray())) && + bgi::satisfies(CPairNotIn(lookup, X)), std::back_inserter(nearest)); } LOG_TRACE("# candidates = " << nearest.size()); - for (std::size_t j = 0u; j < nearest.size(); ++j) - { + for (std::size_t j = 0u; j < nearest.size(); ++j) { std::size_t n = mostCorrelated.count(); std::size_t S = n == desired ? mostCorrelated.biggest().s_X : 0; std::size_t T = n == desired ? mostCorrelated.biggest().s_Y : 0; std::size_t Y = nearest[j].second; - const TVectorPackedBitVectorPr &py = m_Projected.at(Y); + const TVectorPackedBitVectorPr& py = m_Projected.at(Y); SCorrelation cxy(X, px.first, px.second, Y, py.first, py.second); - if (lookup.count(std::make_pair(cxy.s_X, cxy.s_Y)) > 0) - { + if (lookup.count(std::make_pair(cxy.s_X, cxy.s_Y)) > 0) { continue; } - if (mostCorrelated.add(cxy)) - { - if (n == desired) - { + if (mostCorrelated.add(cxy)) { + if (n == desired) { lookup.erase(std::make_pair(S, T)); } lookup.insert(std::make_pair(cxy.s_X, cxy.s_Y)); } } } - } - catch (const std::exception &e) - { + } catch (const std::exception& e) { LOG_ERROR("Failed to compute most correlated " << e.what()); return; } @@ -673,16 +546,13 @@ void CKMostCorrelated::mostCorrelated(TCorrelationVec &result) const LOG_TRACE("most correlated " << core::CContainerPrinter::print(result)); } -void CKMostCorrelated::nextProjection() -{ +void CKMostCorrelated::nextProjection() { TDoubleVec uniform01; CSampling::uniformSample(m_Rng, 0.0, 1.0, NUMBER_PROJECTIONS * PROJECTION_DIMENSION, uniform01); m_Projections.reserve(PROJECTION_DIMENSION); m_Projections.resize(PROJECTION_DIMENSION); - for (std::size_t i = 0u, j = 0u; i < PROJECTION_DIMENSION; ++i) - { - for (std::size_t k = 0u; k < NUMBER_PROJECTIONS; ++j, ++k) - { + for (std::size_t i = 0u, j = 0u; i < PROJECTION_DIMENSION; ++i) { + for (std::size_t k = 0u; k < NUMBER_PROJECTIONS; ++j, ++k) { m_Projections[i](k) = uniform01[j] < 0.5 ? -1.0 : 1.0; } } @@ -691,33 +561,27 @@ void CKMostCorrelated::nextProjection() double factor = std::exp(-m_DecayRate); m_MaximumCount *= factor; - for (std::size_t i = 0u; i < m_Moments.size(); ++i) - { + for (std::size_t i = 0u; i < m_Moments.size(); ++i) { m_Moments[i].age(factor); } - for (std::size_t i = 0u; i < m_MostCorrelated.size(); ++i) - { + for (std::size_t i = 0u; i < m_MostCorrelated.size(); ++i) { m_MostCorrelated[i].s_Correlation.age(factor); } } -const CKMostCorrelated::TVectorVec &CKMostCorrelated::projections() const -{ +const CKMostCorrelated::TVectorVec& CKMostCorrelated::projections() const { return m_Projections; } -const CKMostCorrelated::TSizeVectorPackedBitVectorPrUMap &CKMostCorrelated::projected() const -{ +const CKMostCorrelated::TSizeVectorPackedBitVectorPrUMap& CKMostCorrelated::projected() const { return m_Projected; } -const CKMostCorrelated::TCorrelationVec &CKMostCorrelated::correlations() const -{ +const CKMostCorrelated::TCorrelationVec& CKMostCorrelated::correlations() const { return m_MostCorrelated; } -const CKMostCorrelated::TMeanVarAccumulatorVec &CKMostCorrelated::moments() const -{ +const CKMostCorrelated::TMeanVarAccumulatorVec& CKMostCorrelated::moments() const { return m_Moments; } @@ -725,110 +589,81 @@ const std::size_t CKMostCorrelated::PROJECTION_DIMENSION = 20u; const double CKMostCorrelated::MINIMUM_SPARSENESS = 0.5; const double CKMostCorrelated::REPLACE_FRACTION = 0.1; -CKMostCorrelated::SCorrelation::SCorrelation() : - s_X(std::numeric_limits::max()), - s_Y(std::numeric_limits::max()) -{ +CKMostCorrelated::SCorrelation::SCorrelation() + : s_X(std::numeric_limits::max()), s_Y(std::numeric_limits::max()) { } CKMostCorrelated::SCorrelation::SCorrelation(std::size_t X, - const TVector &px, - const CPackedBitVector &ix, + const TVector& px, + const CPackedBitVector& ix, std::size_t Y, - const TVector &py, - const CPackedBitVector &iy) : - s_X(std::min(X, Y)), - s_Y(std::max(X, Y)) -{ + const TVector& py, + const CPackedBitVector& iy) + : s_X(std::min(X, Y)), s_Y(std::max(X, Y)) { s_Correlation.add(correlation(px, ix, py, iy)); } -bool CKMostCorrelated::SCorrelation::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name = traverser.name(); - if (name == CORRELATION_TAG) - { - if (s_Correlation.fromDelimited(traverser.value()) == false) - { +bool CKMostCorrelated::SCorrelation::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); + if (name == CORRELATION_TAG) { + if (s_Correlation.fromDelimited(traverser.value()) == false) { LOG_ERROR("Invalid correlation in " << traverser.value()); return false; } - } - else if (name == X_TAG) - { - if (core::CStringUtils::stringToType(traverser.value(), s_X) == false) - { + } else if (name == X_TAG) { + if (core::CStringUtils::stringToType(traverser.value(), s_X) == false) { LOG_ERROR("Invalid variable in " << traverser.value()); return false; } - } - else if (name == Y_TAG) - { - if (core::CStringUtils::stringToType(traverser.value(), s_Y) == false) - { + } else if (name == Y_TAG) { + if (core::CStringUtils::stringToType(traverser.value(), s_Y) == false) { LOG_ERROR("Invalid variable in " << traverser.value()); return false; } } - } - while (traverser.next()); + } while (traverser.next()); return true; } -void CKMostCorrelated::SCorrelation::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CKMostCorrelated::SCorrelation::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(CORRELATION_TAG, s_Correlation.toDelimited()); inserter.insertValue(X_TAG, s_X); inserter.insertValue(Y_TAG, s_Y); } -bool CKMostCorrelated::SCorrelation::operator<(const SCorrelation &rhs) const -{ - return COrderings::lexicographical_compare(-this->absCorrelation(), s_X, s_Y, - -rhs.absCorrelation(), rhs.s_X, rhs.s_Y); +bool CKMostCorrelated::SCorrelation::operator<(const SCorrelation& rhs) const { + return COrderings::lexicographical_compare(-this->absCorrelation(), s_X, s_Y, -rhs.absCorrelation(), rhs.s_X, rhs.s_Y); } -void CKMostCorrelated::SCorrelation::update(const TSizeVectorPackedBitVectorPrUMap &projected) -{ +void CKMostCorrelated::SCorrelation::update(const TSizeVectorPackedBitVectorPrUMap& projected) { TSizeVectorPackedBitVectorPrUMapCItr x = projected.find(s_X); TSizeVectorPackedBitVectorPrUMapCItr y = projected.find(s_Y); - if (x != projected.end() && y != projected.end()) - { - const TVector &px = x->second.first; - const TVector &py = y->second.first; - const CPackedBitVector &ix = x->second.second; - const CPackedBitVector &iy = y->second.second; + if (x != projected.end() && y != projected.end()) { + const TVector& px = x->second.first; + const TVector& py = y->second.first; + const CPackedBitVector& ix = x->second.second; + const CPackedBitVector& iy = y->second.second; s_Correlation.add(correlation(px, ix, py, iy)); } } -double CKMostCorrelated::SCorrelation::distance(double amax) const -{ - return static_cast(NUMBER_PROJECTIONS) - * amax - * 2.0 * (1.0 - std::fabs(CBasicStatistics::mean(s_Correlation))); +double CKMostCorrelated::SCorrelation::distance(double amax) const { + return static_cast(NUMBER_PROJECTIONS) * amax * 2.0 * (1.0 - std::fabs(CBasicStatistics::mean(s_Correlation))); } -double CKMostCorrelated::SCorrelation::absCorrelation() const -{ - return std::fabs(CBasicStatistics::mean(s_Correlation)) - - ( 1.0 / std::max(CBasicStatistics::count(s_Correlation), 2.0) - + std::sqrt(CBasicStatistics::variance(s_Correlation))); +double CKMostCorrelated::SCorrelation::absCorrelation() const { + return std::fabs(CBasicStatistics::mean(s_Correlation)) - + (1.0 / std::max(CBasicStatistics::count(s_Correlation), 2.0) + std::sqrt(CBasicStatistics::variance(s_Correlation))); } -double CKMostCorrelated::SCorrelation::correlation(const TVector &px, - const CPackedBitVector &ix, - const TVector &py, - const CPackedBitVector &iy) -{ +double +CKMostCorrelated::SCorrelation::correlation(const TVector& px, const CPackedBitVector& ix, const TVector& py, const CPackedBitVector& iy) { double result = 0.0; - double nx = ix.manhattan() / static_cast(ix.dimension()); - double ny = iy.manhattan() / static_cast(iy.dimension()); - if (nx <= MINIMUM_FREQUENCY && ny <= MINIMUM_FREQUENCY) - { + double nx = ix.manhattan() / static_cast(ix.dimension()); + double ny = iy.manhattan() / static_cast(iy.dimension()); + if (nx <= MINIMUM_FREQUENCY && ny <= MINIMUM_FREQUENCY) { return result; } @@ -836,8 +671,7 @@ double CKMostCorrelated::SCorrelation::correlation(const TVector &px, double oxy = ix.inner(iy, CPackedBitVector::E_OR); double cxy = axy / oxy; - if (cxy > MINIMUM_FREQUENCY) - { + if (cxy > MINIMUM_FREQUENCY) { // The following uses the method of moments noting that // E[S] = 2 (1 + cov(X,Y)) // E[D] = 2 (1 - cov(X,Y)) @@ -852,8 +686,7 @@ double CKMostCorrelated::SCorrelation::correlation(const TVector &px, TMeanVarAccumulator dmv; TMeanVarAccumulator smv; - for (std::size_t i = 0u; i < px.dimension(); ++i) - { + for (std::size_t i = 0u; i < px.dimension(); ++i) { dmv.add((px(i) - py(i)) * (px(i) - py(i))); smv.add((px(i) + py(i)) * (px(i) + py(i))); } @@ -867,47 +700,37 @@ double CKMostCorrelated::SCorrelation::correlation(const TVector &px, result = (cxy - MINIMUM_FREQUENCY) / (1.0 - MINIMUM_FREQUENCY); - if (3.0 * dv < sv) - { + if (3.0 * dv < sv) { result *= std::max(cdm, 0.0); - } - else if (3.0 * sv < dv) - { - result *= std::min(csm, 0.0); - } - else - { + } else if (3.0 * sv < dv) { + result *= std::min(csm, 0.0); + } else { double lambda = dv == 0 ? 1.0 : sv / dv; double a = (2.0 + lambda - 1.0) / 4.0; double b = (2.0 + 1.0 - lambda) / 4.0; - result *= a * std::max(cdm, 0.0) + b * std::min(csm, 0.0); + result *= a * std::max(cdm, 0.0) + b * std::min(csm, 0.0); } } return result; } -uint64_t CKMostCorrelated::SCorrelation::checksum(uint64_t seed) const -{ +uint64_t CKMostCorrelated::SCorrelation::checksum(uint64_t seed) const { seed = CChecksum::calculate(seed, s_Correlation); seed = CChecksum::calculate(seed, s_X); return CChecksum::calculate(seed, s_Y); } -std::string CKMostCorrelated::SCorrelation::print() const -{ - return CBasicStatistics::print(s_Correlation) - + ' ' + core::CStringUtils::typeToString(s_X) - + ' ' + core::CStringUtils::typeToString(s_Y); +std::string CKMostCorrelated::SCorrelation::print() const { + return CBasicStatistics::print(s_Correlation) + ' ' + core::CStringUtils::typeToString(s_X) + ' ' + + core::CStringUtils::typeToString(s_Y); } -CKMostCorrelated::CMatches::CMatches(std::size_t x) : m_X(x) {} +CKMostCorrelated::CMatches::CMatches(std::size_t x) : m_X(x) { +} -bool CKMostCorrelated::CMatches::operator()(const SCorrelation &correlation) const -{ +bool CKMostCorrelated::CMatches::operator()(const SCorrelation& correlation) const { return correlation.s_X == m_X || correlation.s_Y == m_X; } - - } } diff --git a/lib/maths/CLassoLogisticRegression.cc b/lib/maths/CLassoLogisticRegression.cc index e1b4bd41f3..ab66960a20 100644 --- a/lib/maths/CLassoLogisticRegression.cc +++ b/lib/maths/CLassoLogisticRegression.cc @@ -18,13 +18,10 @@ #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { -namespace -{ +namespace { using TDoubleVec = std::vector; //! An upper bound on the second derivative of minus the log of the @@ -35,11 +32,9 @@ using TDoubleVec = std::vector; //! where \f$f(r)=log(1 + e^-r)\f$. This is used to implement trust //! region concept for guaranteeing convergence of Gauss-Seidel //! iterative solver. -double F(double r, double delta) -{ +double F(double r, double delta) { r = std::fabs(r); - if (r <= delta) - { + if (r <= delta) { return 0.25; } double s = std::exp(r - delta); @@ -48,29 +43,22 @@ double F(double r, double delta) //! Computes the step for the CLG solver for the LASSO logistic //! regression objective. -double lassoStep(double beta, double lambda, double n, double d) -{ +double lassoStep(double beta, double lambda, double n, double d) { double dv = 0.0; - if (beta == 0.0) - { + if (beta == 0.0) { // Second derivative is undefined check that moving away // from zero one gets consistent sign for the desired step. dv = (n - lambda) / d; - if (dv <= 0.0) - { + if (dv <= 0.0) { dv = (n + lambda) / d; - if (dv >= 0) - { + if (dv >= 0) { return 0.0; } } - } - else - { + } else { double sign = CTools::sign(beta); dv = (n - sign * lambda) / d; - if (sign * (beta + dv) < 0.0) - { + if (sign * (beta + dv) < 0.0) { // Don't allow the sign of the beta to change in one // step, since the bound on second derivative doesn't // hold on a sign change. @@ -90,31 +78,23 @@ double lassoStep(double beta, double lambda, double n, double d) //! \note That this should decrease monotonically in each iteration //! of the inner solver loop. template -double logLikelihood(const MATRIX &x, - const TDoubleVec &y, - const TDoubleVec &lambda, - const TDoubleVec &beta) -{ +double logLikelihood(const MATRIX& x, const TDoubleVec& y, const TDoubleVec& lambda, const TDoubleVec& beta) { using iterator = typename MATRIX::iterator; double result = 0.0; TDoubleVec f(y.size(), 0.0); - for (std::size_t j = 0u; j < beta.size(); ++j) - { - for (iterator itr = x.beginRows(j); itr != x.endRows(j); ++itr) - { + for (std::size_t j = 0u; j < beta.size(); ++j) { + for (iterator itr = x.beginRows(j); itr != x.endRows(j); ++itr) { std::size_t i = x.row(itr, j); double xij = x.element(itr); f[i] += beta[j] * xij; } } - for (std::size_t i = 0u; i < f.size(); ++i) - { + for (std::size_t i = 0u; i < f.size(); ++i) { result -= std::log(1.0 + std::exp(-f[i] * y[i])); } - for (std::size_t j = 0u; j < beta.size(); ++j) - { + for (std::size_t j = 0u; j < beta.size(); ++j) { result -= lambda[j] * std::fabs(beta[j]); } @@ -126,13 +106,12 @@ double logLikelihood(const MATRIX &x, template void CLG(std::size_t maxIterations, double eps, - const MATRIX &x, - const TDoubleVec &y, - const TDoubleVec &lambda, - TDoubleVec &beta, - TDoubleVec &r, - std::size_t &numberIterations) -{ + const MATRIX& x, + const TDoubleVec& y, + const TDoubleVec& lambda, + TDoubleVec& beta, + TDoubleVec& r, + std::size_t& numberIterations) { using iterator = typename MATRIX::iterator; std::size_t d = x.columns(); @@ -150,29 +129,25 @@ void CLG(std::size_t maxIterations, // Auxiliary variables used to compute solver step. TDoubleVec num(d, 0.0); TDoubleVec den(d, 0.0); - for (std::size_t j = 0u; j < d; ++j) - { + for (std::size_t j = 0u; j < d; ++j) { double Dj = delta[j]; - for (iterator itr = x.beginRows(j); itr != x.endRows(j); ++itr) - { + for (iterator itr = x.beginRows(j); itr != x.endRows(j); ++itr) { std::size_t i = x.row(itr, j); double xij = x.element(itr); - double xy = xij * y[i]; - double xx = xij * xij; - double ri = r[i]; + double xy = xij * y[i]; + double xx = xij * xij; + double ri = r[i]; num[j] += xy / (1.0 + std::exp(ri)); den[j] += xx * F(ri, Dj * std::fabs(xij)); } } - for (std::size_t k = 0u; k < maxIterations; ++k) - { + for (std::size_t k = 0u; k < maxIterations; ++k) { rlast = r; LOG_TRACE("numerator = " << core::CContainerPrinter::print(num)); LOG_TRACE("denominator = " << core::CContainerPrinter::print(den)); - for (std::size_t j = 0u; j < d; ++j) - { + for (std::size_t j = 0u; j < d; ++j) { double bj = beta[j]; double Dj = delta[j]; double dvj = lassoStep(bj, lambda[j], num[j], den[j]); @@ -180,26 +155,23 @@ void CLG(std::size_t maxIterations, beta[j] += dbj; delta[j] = std::max(2.0 * std::fabs(dbj), Dj / 2.0); - if (dbj != 0.0 || j+1 == d) - { - for (iterator itr = x.beginRows(j); itr != x.endRows(j); ++itr) - { + if (dbj != 0.0 || j + 1 == d) { + for (iterator itr = x.beginRows(j); itr != x.endRows(j); ++itr) { std::size_t i = x.row(itr, j); r[i] += dbj * x.element(itr) * y[i]; } std::size_t jPlus1 = (j + 1) % d; - double &numjPlus1 = num[jPlus1]; - double &denjPlus1 = den[jPlus1]; - double DjPlus1 = delta[jPlus1]; + double& numjPlus1 = num[jPlus1]; + double& denjPlus1 = den[jPlus1]; + double DjPlus1 = delta[jPlus1]; numjPlus1 = denjPlus1 = 0.0; - for (iterator itr = x.beginRows(jPlus1); itr != x.endRows(jPlus1); ++itr) - { + for (iterator itr = x.beginRows(jPlus1); itr != x.endRows(jPlus1); ++itr) { std::size_t i = x.row(itr, jPlus1); double xij = x.element(itr); - double xy = xij * y[i]; - double xx = xij * xij; - double ri = r[i]; + double xy = xij * y[i]; + double xx = xij * xij; + double ri = r[i]; numjPlus1 += xy / (1.0 + std::exp(ri)); denjPlus1 += xx * F(ri, DjPlus1 * std::fabs(xij)); } @@ -214,15 +186,13 @@ void CLG(std::size_t maxIterations, // Check for convergence. double dsum = 0.0; - double sum = 0.0; - for (std::size_t i = 0u; i < r.size(); ++i) - { + double sum = 0.0; + for (std::size_t i = 0u; i < r.size(); ++i) { dsum += std::fabs(r[i] - rlast[i]); - sum += std::fabs(r[i]); + sum += std::fabs(r[i]); } LOG_TRACE("sum |dr| = " << dsum << ", sum |r| = " << sum); - if (dsum < eps * (1.0 + sum)) - { + if (dsum < eps * (1.0 + sum)) { break; } } @@ -230,96 +200,67 @@ void CLG(std::size_t maxIterations, } // unnamed:: -namespace lasso_logistic_regression_detail -{ +namespace lasso_logistic_regression_detail { ////// CDenseMatrix ////// -CDenseMatrix::CDenseMatrix() -{ +CDenseMatrix::CDenseMatrix() { } -CDenseMatrix::CDenseMatrix(TDoubleVecVec &elements) -{ +CDenseMatrix::CDenseMatrix(TDoubleVecVec& elements) { m_Elements.swap(elements); } -void CDenseMatrix::swap(CDenseMatrix &other) -{ +void CDenseMatrix::swap(CDenseMatrix& other) { m_Elements.swap(other.m_Elements); } - ////// CSparseMatrix ////// -CSparseMatrix::CSparseMatrix() : - m_Rows(0), - m_Columns(0) -{ +CSparseMatrix::CSparseMatrix() : m_Rows(0), m_Columns(0) { } -CSparseMatrix::CSparseMatrix(std::size_t rows, - std::size_t columns, - TSizeSizePrDoublePrVec &elements) : - m_Rows(rows), - m_Columns(columns) -{ +CSparseMatrix::CSparseMatrix(std::size_t rows, std::size_t columns, TSizeSizePrDoublePrVec& elements) : m_Rows(rows), m_Columns(columns) { m_Elements.swap(elements); std::sort(m_Elements.begin(), m_Elements.end(), COrderings::SFirstLess()); } -void CSparseMatrix::swap(CSparseMatrix &other) -{ +void CSparseMatrix::swap(CSparseMatrix& other) { std::swap(m_Rows, other.m_Rows); std::swap(m_Columns, other.m_Columns); m_Elements.swap(other.m_Elements); } - ////// CCyclicCoordinateDescent ////// -CCyclicCoordinateDescent::CCyclicCoordinateDescent(std::size_t maxIterations, - double eps) : - m_MaxIterations(maxIterations), - m_Eps(eps) -{ +CCyclicCoordinateDescent::CCyclicCoordinateDescent(std::size_t maxIterations, double eps) : m_MaxIterations(maxIterations), m_Eps(eps) { } template -bool CCyclicCoordinateDescent::checkInputs(const MATRIX &x, - const TDoubleVec &y, - const TDoubleVec &lambda) -{ - if (x.rows() == 0) - { +bool CCyclicCoordinateDescent::checkInputs(const MATRIX& x, const TDoubleVec& y, const TDoubleVec& lambda) { + if (x.rows() == 0) { LOG_ERROR("No training data"); return false; } - if (x.rows() != y.size()) - { - LOG_ERROR("Inconsistent training data |x| = " - << x.rows() << ", |y| = " << y.size()); + if (x.rows() != y.size()) { + LOG_ERROR("Inconsistent training data |x| = " << x.rows() << ", |y| = " << y.size()); return false; } - if (lambda.size() != x.columns()) - { - LOG_ERROR("Inconsistent prior |lambda| = " - << lambda.size() << ", D = " << x.columns()); + if (lambda.size() != x.columns()) { + LOG_ERROR("Inconsistent prior |lambda| = " << lambda.size() << ", D = " << x.columns()); return false; } return true; } -bool CCyclicCoordinateDescent::run(const CDenseMatrix &x, - const TDoubleVec &y, - const TDoubleVec &lambda, - TDoubleVec &beta, - std::size_t &numberIterations) -{ +bool CCyclicCoordinateDescent::run(const CDenseMatrix& x, + const TDoubleVec& y, + const TDoubleVec& lambda, + TDoubleVec& beta, + std::size_t& numberIterations) { beta.clear(); numberIterations = 0; - if (!checkInputs(x, y, lambda)) - { + if (!checkInputs(x, y, lambda)) { return false; } TDoubleVec r(x.rows(), 0.0); @@ -327,16 +268,14 @@ bool CCyclicCoordinateDescent::run(const CDenseMatrix &x, return true; } -bool CCyclicCoordinateDescent::run(const CSparseMatrix &x, - const TDoubleVec &y, - const TDoubleVec &lambda, - TDoubleVec &beta, - std::size_t &numberIterations) -{ +bool CCyclicCoordinateDescent::run(const CSparseMatrix& x, + const TDoubleVec& y, + const TDoubleVec& lambda, + TDoubleVec& beta, + std::size_t& numberIterations) { beta.clear(); numberIterations = 0; - if (!checkInputs(x, y, lambda)) - { + if (!checkInputs(x, y, lambda)) { return false; } TDoubleVec r(x.rows(), 0.0); @@ -344,31 +283,25 @@ bool CCyclicCoordinateDescent::run(const CSparseMatrix &x, return true; } -bool CCyclicCoordinateDescent::runIncremental(const CDenseMatrix &x, - const TDoubleVec &y, - const TDoubleVec &lambda, - TDoubleVec &beta, - std::size_t &numberIterations) -{ +bool CCyclicCoordinateDescent::runIncremental(const CDenseMatrix& x, + const TDoubleVec& y, + const TDoubleVec& lambda, + TDoubleVec& beta, + std::size_t& numberIterations) { numberIterations = 0; - if (!checkInputs(x, y, lambda)) - { + if (!checkInputs(x, y, lambda)) { return false; } - if (beta.size() != lambda.size()) - { - LOG_ERROR("Inconsistent seed parameter vector |beta| = " - << beta.size() << ", D = " << lambda.size()); + if (beta.size() != lambda.size()) { + LOG_ERROR("Inconsistent seed parameter vector |beta| = " << beta.size() << ", D = " << lambda.size()); return false; } // Initialize the reachable values. TDoubleVec r(x.rows(), 0.0); - for (std::size_t j = 0u; j < x.columns(); ++j) - { + for (std::size_t j = 0u; j < x.columns(); ++j) { double bj = beta[j]; - for (CDenseMatrix::iterator itr = x.beginRows(j); itr != x.endRows(j); ++itr) - { + for (CDenseMatrix::iterator itr = x.beginRows(j); itr != x.endRows(j); ++itr) { std::size_t i = x.row(itr, j); r[i] = bj * x.element(itr) * y[i]; } @@ -378,31 +311,25 @@ bool CCyclicCoordinateDescent::runIncremental(const CDenseMatrix &x, return true; } -bool CCyclicCoordinateDescent::runIncremental(const CSparseMatrix &x, - const TDoubleVec &y, - const TDoubleVec &lambda, - TDoubleVec &beta, - std::size_t &numberIterations) -{ +bool CCyclicCoordinateDescent::runIncremental(const CSparseMatrix& x, + const TDoubleVec& y, + const TDoubleVec& lambda, + TDoubleVec& beta, + std::size_t& numberIterations) { numberIterations = 0; - if (!checkInputs(x, y, lambda)) - { + if (!checkInputs(x, y, lambda)) { return false; } - if (beta.size() != lambda.size()) - { - LOG_ERROR("Inconsistent seed parameter vector |beta| = " - << beta.size() << ", D = " << lambda.size()); + if (beta.size() != lambda.size()) { + LOG_ERROR("Inconsistent seed parameter vector |beta| = " << beta.size() << ", D = " << lambda.size()); return false; } // Initialize the reachable values. TDoubleVec r(x.rows(), 0.0); - for (std::size_t j = 0u; j < x.columns(); ++j) - { + for (std::size_t j = 0u; j < x.columns(); ++j) { double bj = beta[j]; - for (CSparseMatrix::iterator itr = x.beginRows(j); itr != x.endRows(j); ++itr) - { + for (CSparseMatrix::iterator itr = x.beginRows(j); itr != x.endRows(j); ++itr) { std::size_t i = x.row(itr, j); r[i] = bj * x.element(itr) * y[i]; } @@ -416,70 +343,48 @@ bool CCyclicCoordinateDescent::runIncremental(const CSparseMatrix &x, ////// CLogisticRegressionModel ////// -CLogisticRegressionModel::CLogisticRegressionModel() : - m_Beta0(0.0), - m_Beta() -{ +CLogisticRegressionModel::CLogisticRegressionModel() : m_Beta0(0.0), m_Beta() { } -CLogisticRegressionModel::CLogisticRegressionModel(double beta0, - TSizeDoublePrVec &beta) : - m_Beta0(beta0), - m_Beta() -{ +CLogisticRegressionModel::CLogisticRegressionModel(double beta0, TSizeDoublePrVec& beta) : m_Beta0(beta0), m_Beta() { m_Beta.swap(beta); } -void CLogisticRegressionModel::swap(CLogisticRegressionModel &other) -{ +void CLogisticRegressionModel::swap(CLogisticRegressionModel& other) { std::swap(m_Beta0, other.m_Beta0); m_Beta.swap(other.m_Beta); } -bool CLogisticRegressionModel::operator()(const TDoubleVec &x, - double &probability) const -{ +bool CLogisticRegressionModel::operator()(const TDoubleVec& x, double& probability) const { probability = 0.5; - if (m_Beta.empty()) - { + if (m_Beta.empty()) { return true; } std::size_t n = m_Beta.size(); - if (x.size() <= m_Beta[n - 1].first) - { - LOG_ERROR("Invalid feature vector |x| = " << x.size() - << ", D = " << m_Beta[n - 1].first + 1) + if (x.size() <= m_Beta[n - 1].first) { + LOG_ERROR("Invalid feature vector |x| = " << x.size() << ", D = " << m_Beta[n - 1].first + 1) } double r = -m_Beta0; - for (std::size_t i = 0u; i < m_Beta.size(); ++i) - { + for (std::size_t i = 0u; i < m_Beta.size(); ++i) { r -= m_Beta[i].second * x[m_Beta[i].first]; } probability = 1.0 / (1.0 + std::exp(-r)); return true; } -double CLogisticRegressionModel::operator()(const TSizeDoublePrVec &x) const -{ - if (m_Beta.empty()) - { +double CLogisticRegressionModel::operator()(const TSizeDoublePrVec& x) const { + if (m_Beta.empty()) { return 0.5; } double r = -m_Beta0; - for (std::size_t i = 0u, j = 0u; i < m_Beta.size() && j < x.size(); /**/) - { - if (m_Beta[i].first < x[j].first) - { + for (std::size_t i = 0u, j = 0u; i < m_Beta.size() && j < x.size(); /**/) { + if (m_Beta[i].first < x[j].first) { ++i; - } - else if (x[j].first < m_Beta[i].first) - { + } else if (x[j].first < m_Beta[i].first) { ++j; - } - else - { + } else { r -= m_Beta[i].second * x[j].second; ++i; ++j; @@ -488,8 +393,7 @@ double CLogisticRegressionModel::operator()(const TSizeDoublePrVec &x) const return 1.0 / (1.0 + std::exp(-r)); } -namespace -{ +namespace { using namespace lasso_logistic_regression_detail; using TDoubleVecVec = std::vector; @@ -505,17 +409,11 @@ using TSizeUSet = boost::unordered_set; //! \param[in] mask The indices of the feature vectors to remove. //! \param[out] xMasked The training matrix corresponding to \p x. //! \param[out] yMasked The training labels corresponding to \p y. -void setupTrainingData(const TDoubleVecVec &x, - const TDoubleVec &y, - const TSizeUSet &mask, - CDenseMatrix &xMasked, - TDoubleVec &yMasked) -{ +void setupTrainingData(const TDoubleVecVec& x, const TDoubleVec& y, const TSizeUSet& mask, CDenseMatrix& xMasked, TDoubleVec& yMasked) { xMasked = CDenseMatrix(); yMasked.clear(); - if (x.empty()) - { + if (x.empty()) { return; } @@ -526,12 +424,9 @@ void setupTrainingData(const TDoubleVecVec &x, TDoubleVecVec xTranspose(d + 1, TDoubleVec(m, 1.0)); yMasked.reserve(m); - for (std::size_t i = 0u, i_ = 0u; i < n; ++i) - { - if (mask.count(i) == 0) - { - for (std::size_t j = 0u; j < d; ++j) - { + for (std::size_t i = 0u, i_ = 0u; i < n; ++i) { + if (mask.count(i) == 0) { + for (std::size_t j = 0u; j < d; ++j) { xTranspose[j][i_] = x[i][j]; } yMasked.push_back(y[i]); @@ -543,17 +438,15 @@ void setupTrainingData(const TDoubleVecVec &x, } //! Overload for sparse feature vectors. -void setupTrainingData(const TSizeDoublePrVecVec &x, - const TDoubleVec &y, - const TSizeUSet &mask, - CSparseMatrix &xMasked, - TDoubleVec &yMasked) -{ +void setupTrainingData(const TSizeDoublePrVecVec& x, + const TDoubleVec& y, + const TSizeUSet& mask, + CSparseMatrix& xMasked, + TDoubleVec& yMasked) { xMasked = CSparseMatrix(); yMasked.clear(); - if (x.empty()) - { + if (x.empty()) { return; } @@ -565,23 +458,18 @@ void setupTrainingData(const TSizeDoublePrVecVec &x, yMasked.reserve(m); std::size_t rows = m; std::size_t columns = 0u; - for (std::size_t i = 0u, i_ = 0u; i < n; ++i) - { - if (mask.count(i) == 0) - { - for (std::size_t j = 0u, d = x[i].size(); j < d; ++j) - { + for (std::size_t i = 0u, i_ = 0u; i < n; ++i) { + if (mask.count(i) == 0) { + for (std::size_t j = 0u, d = x[i].size(); j < d; ++j) { std::size_t j_ = x[i][j].first; - xTranspose.push_back(TSizeSizePrDoublePr(TSizeSizePr(j_, i_), - x[i][j].second)); + xTranspose.push_back(TSizeSizePrDoublePr(TSizeSizePr(j_, i_), x[i][j].second)); columns = std::max(columns, j_ + 1); } yMasked.push_back(y[i]); ++i_; } } - for (std::size_t i = 0u; i < rows; ++i) - { + for (std::size_t i = 0u; i < rows; ++i) { xTranspose.emplace_back(TSizeSizePr(columns, i), 1.0); } CSparseMatrix tmp(rows, columns + 1, xTranspose); @@ -598,11 +486,7 @@ void setupTrainingData(const TSizeDoublePrVecVec &x, //! \param[in] lambda The precision of the Laplace prior. //! \param[out] beta Filled in with the learned regression parameters. template -bool learn(const MATRIX &x, - const TDoubleVec &y, - const TDoubleVec &lambda, - TDoubleVec &beta) -{ +bool learn(const MATRIX& x, const TDoubleVec& y, const TDoubleVec& lambda, TDoubleVec& beta) { using namespace lasso_logistic_regression_detail; // TODO parameters. @@ -610,37 +494,29 @@ bool learn(const MATRIX &x, // Compute the regression parameters. std::size_t numberIterations; - if (beta.empty()) - { - if (!clg.run(x, y, lambda, beta, numberIterations)) - { + if (beta.empty()) { + if (!clg.run(x, y, lambda, beta, numberIterations)) { LOG_ERROR("Failed to solve for parameters"); return false; } - } - else - { - if (!clg.runIncremental(x, y, lambda, beta, numberIterations)) - { + } else { + if (!clg.runIncremental(x, y, lambda, beta, numberIterations)) { LOG_ERROR("Failed to solve for parameters"); return false; } } - LOG_TRACE("Solved for parameters using " - << numberIterations << " iterations"); + LOG_TRACE("Solved for parameters using " << numberIterations << " iterations"); return true; } //! Extract a matrix element from dense storage. -double element(double xij) -{ +double element(double xij) { return xij; } //! Extract a matrix element from sparse storage. -double element(const TSizeDoublePr &xij) -{ +double element(const TSizeDoublePr& xij) { return xij.second; } @@ -653,14 +529,11 @@ double element(const TSizeDoublePr &xij) //! Here, \f$n\f$ is the number of rows and \f$d\f$ is the number //! of columns. template -double l22Norm(const STORAGE &x) -{ +double l22Norm(const STORAGE& x) { using TMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; TMeanAccumulator result; - for (std::size_t i = 0u; i < x.size(); ++i) - { - for (std::size_t j = 0u; j < x[i].size(); ++j) - { + for (std::size_t i = 0u; i < x.size(); ++i) { + for (std::size_t j = 0u; j < x[i].size(); ++j) { double xij = element(x[i][j]); result.add(xij * xij); } @@ -685,80 +558,63 @@ double l22Norm(const STORAGE &x) //! and negative examples. This can be achieved by randomly splitting //! these sets independently. template -class C2FoldCrossValidatedLogLikelihood -{ - public: - using result_type = double; - - public: - C2FoldCrossValidatedLogLikelihood(std::size_t d) : - m_D(d + 1), - m_Splits(0) - {} - - //! Add a 2-split of the training data. - void addSplit(MATRIX &xTrain, - TDoubleVec &yTrain, - MATRIX &xTest, - TDoubleVec &yTest) - { - if (xTrain.rows() != m_D || xTest.rows() != m_D) - { - LOG_ERROR("Bad training data: |train| = " << xTrain.rows() - << ", |test| = " << xTest.rows() - << ", D = " << m_D); - return; - } - ++m_Splits; - m_X[0].push_back(MATRIX()); - m_X[0].back().swap(xTrain); - m_Y[0].push_back(TDoubleVec()); - m_Y[0].back().swap(yTrain); - m_X[1].push_back(MATRIX()); - m_X[1].back().swap(xTest); - m_Y[1].push_back(TDoubleVec()); - m_Y[1].back().swap(yTest); +class C2FoldCrossValidatedLogLikelihood { +public: + using result_type = double; + +public: + C2FoldCrossValidatedLogLikelihood(std::size_t d) : m_D(d + 1), m_Splits(0) {} + + //! Add a 2-split of the training data. + void addSplit(MATRIX& xTrain, TDoubleVec& yTrain, MATRIX& xTest, TDoubleVec& yTest) { + if (xTrain.rows() != m_D || xTest.rows() != m_D) { + LOG_ERROR("Bad training data: |train| = " << xTrain.rows() << ", |test| = " << xTest.rows() << ", D = " << m_D); + return; } - - //! Calculate the 2-fold cross-validation objective for the - //! prior precision \p lambda. - double operator()(double lambda) const - { - m_Beta.clear(); - m_Lambda.assign(m_D, lambda); - double result = 0.0; - for (std::size_t j = 0u; j < m_Splits; ++j) - { - for (std::size_t i = 0u; i < 2; ++i) - { - learn(m_X[i][j], m_Y[i][j], m_Lambda, m_Beta); - result += logLikelihood(m_X[(i+1) % 2][j], - m_Y[(i+1) % 2][j], - m_Lambda, - m_Beta); - } + ++m_Splits; + m_X[0].push_back(MATRIX()); + m_X[0].back().swap(xTrain); + m_Y[0].push_back(TDoubleVec()); + m_Y[0].back().swap(yTrain); + m_X[1].push_back(MATRIX()); + m_X[1].back().swap(xTest); + m_Y[1].push_back(TDoubleVec()); + m_Y[1].back().swap(yTest); + } + + //! Calculate the 2-fold cross-validation objective for the + //! prior precision \p lambda. + double operator()(double lambda) const { + m_Beta.clear(); + m_Lambda.assign(m_D, lambda); + double result = 0.0; + for (std::size_t j = 0u; j < m_Splits; ++j) { + for (std::size_t i = 0u; i < 2; ++i) { + learn(m_X[i][j], m_Y[i][j], m_Lambda, m_Beta); + result += logLikelihood(m_X[(i + 1) % 2][j], m_Y[(i + 1) % 2][j], m_Lambda, m_Beta); } - return result; } - - private: - using TMatrixVec = std::vector; - - private: - //! The feature vector dimension. - std::size_t m_D; - //! The number of 2-splits. - std::size_t m_Splits; - //! The feature vectors of the 2-splits. - TMatrixVec m_X[2]; - //! The feature vector labels of the 2-splits. - TDoubleVecVec m_Y[2]; - //! A placeholder for lambda so that it doesn't need to be - //! re-initialized on each call to operator(). - mutable TDoubleVec m_Lambda; - //! A placeholder for beta so that it doesn't need to be - //! re-initialized on each call to operator(). - mutable TDoubleVec m_Beta; + return result; + } + +private: + using TMatrixVec = std::vector; + +private: + //! The feature vector dimension. + std::size_t m_D; + //! The number of 2-splits. + std::size_t m_Splits; + //! The feature vectors of the 2-splits. + TMatrixVec m_X[2]; + //! The feature vector labels of the 2-splits. + TDoubleVecVec m_Y[2]; + //! A placeholder for lambda so that it doesn't need to be + //! re-initialized on each call to operator(). + mutable TDoubleVec m_Lambda; + //! A placeholder for beta so that it doesn't need to be + //! re-initialized on each call to operator(). + mutable TDoubleVec m_Beta; }; } // unnamed:: @@ -766,21 +622,13 @@ class C2FoldCrossValidatedLogLikelihood ////// CLassoLogisticRegression ////// template -CLassoLogisticRegression::CLassoLogisticRegression() : - m_X(), - m_D(0), - m_Y(), - m_Lambda(1.0), - m_Beta() -{ +CLassoLogisticRegression::CLassoLogisticRegression() : m_X(), m_D(0), m_Y(), m_Lambda(1.0), m_Beta() { } template template -void CLassoLogisticRegression::doLearnHyperparameter(EHyperparametersStyle style) -{ - if (m_X.empty()) - { +void CLassoLogisticRegression::doLearnHyperparameter(EHyperparametersStyle style) { + if (m_X.empty()) { return; } @@ -791,14 +639,14 @@ void CLassoLogisticRegression::doLearnHyperparameter(EHyperparametersSt double lambda = std::sqrt(l22Norm(m_X) / 2.0); m_Lambda = lambda; - if (n <= 1) - { + if (n <= 1) { return; } - switch (style) - { - case E_LambdaNormBased: return; - case E_LambdaCrossValidated: break; + switch (style) { + case E_LambdaNormBased: + return; + case E_LambdaCrossValidated: + break; } // Set up the cross-validation optimization objective. @@ -810,19 +658,14 @@ void CLassoLogisticRegression::doLearnHyperparameter(EHyperparametersSt TSizeVec negative; positive.reserve(n); negative.reserve(n); - for (std::size_t i = 0u; i < n; ++i) - { + for (std::size_t i = 0u; i < n; ++i) { (m_Y[i] > 0.0 ? positive : negative).push_back(i); } - if (positive.size() <= 1 || negative.size() <= 1) - { - LOG_WARN("Can't cross-validate: insufficient " - << (positive.size() <= 1 ? "" : "un") - << "interesting examples provided"); + if (positive.size() <= 1 || negative.size() <= 1) { + LOG_WARN("Can't cross-validate: insufficient " << (positive.size() <= 1 ? "" : "un") << "interesting examples provided"); return; } - for (std::size_t i = 0u, np = positive.size(), nn = negative.size(); i < 2; ++i) - { + for (std::size_t i = 0u, np = positive.size(), nn = negative.size(); i < 2; ++i) { CSampling::random_shuffle(positive.begin(), positive.end()); CSampling::random_shuffle(negative.begin(), negative.end()); @@ -843,42 +686,35 @@ void CLassoLogisticRegression::doLearnHyperparameter(EHyperparametersSt objective.addSplit(xTrain, yTrain, xTest, yTest); } - double scales[] = { 1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 100.0 }; - double logLikelihoods[] = { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; + double scales[] = {1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 100.0}; + double logLikelihoods[] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}; double min = lambda / 10.0; - for (std::size_t i = 0u; i < boost::size(scales); ++i) - { + for (std::size_t i = 0u; i < boost::size(scales); ++i) { logLikelihoods[i] = objective(scales[i] * min); } LOG_TRACE("log(L) = " << core::CContainerPrinter::print(logLikelihoods)); - double *max = std::max_element(boost::begin(logLikelihoods), - boost::end(logLikelihoods)); + double* max = std::max_element(boost::begin(logLikelihoods), boost::end(logLikelihoods)); ptrdiff_t a = std::max(max - logLikelihoods - 1, ptrdiff_t(0)); ptrdiff_t b = std::min(max - logLikelihoods + 1, ptrdiff_t(7)); LOG_TRACE("a = " << a << ", b = " << b); std::size_t maxIterations = boost::size(scales) / 2; double logLikelihood; - CSolvers::maximize(scales[a] * lambda, scales[b] * lambda, - logLikelihoods[a], logLikelihoods[b], - objective, 0.0, maxIterations, - lambda, logLikelihood); + CSolvers::maximize( + scales[a] * lambda, scales[b] * lambda, logLikelihoods[a], logLikelihoods[b], objective, 0.0, maxIterations, lambda, logLikelihood); LOG_TRACE("lambda = " << lambda << " log(L(lambda)) = " << logLikelihood); - m_Lambda = logLikelihood > *max ? - lambda : scales[max - logLikelihoods] * min; + m_Lambda = logLikelihood > *max ? lambda : scales[max - logLikelihoods] * min; } template template -bool CLassoLogisticRegression::doLearn(CLogisticRegressionModel &result) -{ +bool CLassoLogisticRegression::doLearn(CLogisticRegressionModel& result) { result = CLogisticRegressionModel(); - if (!this->sanityChecks()) - { + if (!this->sanityChecks()) { return false; } @@ -888,20 +724,16 @@ bool CLassoLogisticRegression::doLearn(CLogisticRegressionModel &result setupTrainingData(m_X, m_Y, excludeNone, x, y); TDoubleVec lambda(m_D, m_Lambda); TDoubleVec beta(m_Beta); - if (!learn(x, y, lambda, beta)) - { + if (!learn(x, y, lambda, beta)) { return false; } m_Beta.swap(beta); // Create the model. TSizeDoublePrVec sparse; - sparse.reserve(std::count_if(m_Beta.begin(), m_Beta.end(), - boost::bind(std::greater(), _1, 0.0))); - for (std::size_t j = 0u; j < m_D; ++j) - { - if (m_Beta[j] > 0.0) - { + sparse.reserve(std::count_if(m_Beta.begin(), m_Beta.end(), boost::bind(std::greater(), _1, 0.0))); + for (std::size_t j = 0u; j < m_D; ++j) { + if (m_Beta[j] > 0.0) { sparse.emplace_back(j, m_Beta[j]); } } @@ -912,84 +744,62 @@ bool CLassoLogisticRegression::doLearn(CLogisticRegressionModel &result } template -bool CLassoLogisticRegression::sanityChecks() const -{ - if (m_Y.empty()) - { +bool CLassoLogisticRegression::sanityChecks() const { + if (m_Y.empty()) { LOG_WARN("No training data"); return false; } bool positive = false; bool negative = false; - for (std::size_t i = 0u; - (!positive || !negative) && i < m_Y.size(); - ++i) - { + for (std::size_t i = 0u; (!positive || !negative) && i < m_Y.size(); ++i) { (m_Y[i] < 0.0 ? negative : positive) = true; } - if (!negative || !positive) - { - LOG_WARN("Only " << (negative ? "un" : "") - << "interesting examples provided: problem is ill posed"); + if (!negative || !positive) { + LOG_WARN("Only " << (negative ? "un" : "") << "interesting examples provided: problem is ill posed"); return false; } return true; } - ////// CLassoLogisticRegressionDense ////// -void CLassoLogisticRegressionDense::addTrainingData(const TDoubleVec &x, - bool interesting) -{ - if (this->x().empty()) - { +void CLassoLogisticRegressionDense::addTrainingData(const TDoubleVec& x, bool interesting) { + if (this->x().empty()) { this->d() = x.size(); } - if (x.size() != this->d()) - { - LOG_ERROR("Ignoring inconsistent training data |x| = " - << x.size() << ", D = " << this->x()[0].size()); + if (x.size() != this->d()) { + LOG_ERROR("Ignoring inconsistent training data |x| = " << x.size() << ", D = " << this->x()[0].size()); return; } this->x().push_back(x); this->y().push_back(interesting ? +1.0 : -1.0); } -void CLassoLogisticRegressionDense::learnHyperparameter(EHyperparametersStyle style) -{ +void CLassoLogisticRegressionDense::learnHyperparameter(EHyperparametersStyle style) { this->doLearnHyperparameter(style); } -bool CLassoLogisticRegressionDense::learn(CLogisticRegressionModel &result) -{ +bool CLassoLogisticRegressionDense::learn(CLogisticRegressionModel& result) { return this->doLearn(result); } - ////// CLassoLogisticRegressionSparse ////// -void CLassoLogisticRegressionSparse::addTrainingData(const TSizeDoublePrVec &x, - bool interesting) -{ - for (std::size_t i = 0u; i < x.size(); ++i) - { +void CLassoLogisticRegressionSparse::addTrainingData(const TSizeDoublePrVec& x, bool interesting) { + for (std::size_t i = 0u; i < x.size(); ++i) { this->d() = std::max(this->d(), x[i].first); } this->x().push_back(x); this->y().push_back(interesting ? +1.0 : -1.0); } -void CLassoLogisticRegressionSparse::learnHyperparameter(EHyperparametersStyle style) -{ +void CLassoLogisticRegressionSparse::learnHyperparameter(EHyperparametersStyle style) { this->doLearnHyperparameter(style); } -bool CLassoLogisticRegressionSparse::learn(CLogisticRegressionModel &result) -{ +bool CLassoLogisticRegressionSparse::learn(CLogisticRegressionModel& result) { return this->doLearn(result); } - } } diff --git a/lib/maths/CLinearAlgebraTools.cc b/lib/maths/CLinearAlgebraTools.cc index e24e67e2a9..1987aa57c4 100644 --- a/lib/maths/CLinearAlgebraTools.cc +++ b/lib/maths/CLinearAlgebraTools.cc @@ -10,278 +10,210 @@ #include -namespace ml -{ -namespace maths -{ -namespace linear_algebra_tools_detail -{ +namespace ml { +namespace maths { +namespace linear_algebra_tools_detail { -namespace -{ +namespace { //! \brief Shared implementation of the inverse quadratic product. template -class CInverseQuadraticProduct -{ - public: - template - static maths_t::EFloatingPointErrorStatus compute(std::size_t d, - const MATRIX &covariance_, - const VECTOR &residual, - double &result, - bool ignoreSingularSubspace) - { - if (residual.isZero()) - { - result = 0.0; - return maths_t::E_FpNoErrors; - } +class CInverseQuadraticProduct { +public: + template + static maths_t::EFloatingPointErrorStatus + compute(std::size_t d, const MATRIX& covariance_, const VECTOR& residual, double& result, bool ignoreSingularSubspace) { + if (residual.isZero()) { + result = 0.0; + return maths_t::E_FpNoErrors; + } - result = core::constants::LOG_MAX_DOUBLE + 1.0; + result = core::constants::LOG_MAX_DOUBLE + 1.0; - switch (d) - { - case 1: - if (covariance_(0, 0) == 0.0) - { - return maths_t::E_FpOverflowed; - } - result = residual(0) * residual(0) / covariance_(0, 0); - return maths_t::E_FpNoErrors; + switch (d) { + case 1: + if (covariance_(0, 0) == 0.0) { + return maths_t::E_FpOverflowed; + } + result = residual(0) * residual(0) / covariance_(0, 0); + return maths_t::E_FpNoErrors; - default: - { - // Note we use Jacobi SVD here so that we handle the case - // that m is singular to working precision. - Eigen::JacobiSVD covariance(toDenseMatrix(covariance_), - Eigen::ComputeFullU | Eigen::ComputeFullV); - EIGENVECTOR y(toDenseVector(residual)); + default: { + // Note we use Jacobi SVD here so that we handle the case + // that m is singular to working precision. + Eigen::JacobiSVD covariance(toDenseMatrix(covariance_), Eigen::ComputeFullU | Eigen::ComputeFullV); + EIGENVECTOR y(toDenseVector(residual)); - // Check the residual is zero on the singular subspace. - std::size_t rank = static_cast(covariance.rank()); - if (!ignoreSingularSubspace && rank < d) - { - double normC = (y.transpose() * covariance.matrixU().leftCols(rank)).norm(); - double normS = (y.transpose() * covariance.matrixU().rightCols(d - rank)).norm(); - if (normS > std::numeric_limits::epsilon() * normC) - { - return maths_t::E_FpOverflowed; - } + // Check the residual is zero on the singular subspace. + std::size_t rank = static_cast(covariance.rank()); + if (!ignoreSingularSubspace && rank < d) { + double normC = (y.transpose() * covariance.matrixU().leftCols(rank)).norm(); + double normS = (y.transpose() * covariance.matrixU().rightCols(d - rank)).norm(); + if (normS > std::numeric_limits::epsilon() * normC) { + return maths_t::E_FpOverflowed; } - y = covariance.solve(y); - result = residual.inner(y); - return maths_t::E_FpNoErrors; - } } + y = covariance.solve(y); + result = residual.inner(y); + return maths_t::E_FpNoErrors; + } } + } }; //! \brief Shared implementation of the log-likelihood function. template -class CGaussianLogLikelihood -{ - public: - template - static maths_t::EFloatingPointErrorStatus compute(std::size_t d, - const MATRIX &covariance_, - const VECTOR &residual, - double &result, - bool ignoreSingularSubspace) - { - result = core::constants::LOG_MIN_DOUBLE - 1.0; +class CGaussianLogLikelihood { +public: + template + static maths_t::EFloatingPointErrorStatus + compute(std::size_t d, const MATRIX& covariance_, const VECTOR& residual, double& result, bool ignoreSingularSubspace) { + result = core::constants::LOG_MIN_DOUBLE - 1.0; - switch (d) - { - case 1: - if (covariance_(0, 0) == 0.0) - { - return maths_t::E_FpOverflowed; - } - result = -0.5 * ( residual(0) * residual(0) / covariance_(0, 0) - + core::constants::LOG_TWO_PI - + std::log(covariance_(0, 0))); - return maths_t::E_FpNoErrors; + switch (d) { + case 1: + if (covariance_(0, 0) == 0.0) { + return maths_t::E_FpOverflowed; + } + result = -0.5 * (residual(0) * residual(0) / covariance_(0, 0) + core::constants::LOG_TWO_PI + std::log(covariance_(0, 0))); + return maths_t::E_FpNoErrors; - default: - { - // Note we use Jacobi SVD here so that we handle the case - // that m is singular to working precision. - Eigen::JacobiSVD covariance(toDenseMatrix(covariance_), - Eigen::ComputeFullU | Eigen::ComputeFullV); - EIGENVECTOR y(toDenseVector(residual)); + default: { + // Note we use Jacobi SVD here so that we handle the case + // that m is singular to working precision. + Eigen::JacobiSVD covariance(toDenseMatrix(covariance_), Eigen::ComputeFullU | Eigen::ComputeFullV); + EIGENVECTOR y(toDenseVector(residual)); - // Check the residual is zero on the singular subspace. - std::size_t rank = static_cast(covariance.rank()); - if (!ignoreSingularSubspace && rank < d) - { - double normC = (y.transpose() * covariance.matrixU().leftCols(rank)).norm(); - double normS = (y.transpose() * covariance.matrixU().rightCols(d - rank)).norm(); - result = normS > std::numeric_limits::epsilon() * normC ? - core::constants::LOG_MIN_DOUBLE - 1.0 : - core::constants::LOG_MAX_DOUBLE + 1.0; - return maths_t::E_FpOverflowed; - } - y = covariance.solve(y); - double logDeterminant = 0.0; - for (std::size_t i = 0u; i < rank; ++i) - { - logDeterminant += std::log(covariance.singularValues()(i)); - } - result = -0.5 * ( residual.inner(y) - + static_cast(rank) * core::constants::LOG_TWO_PI - + logDeterminant); - return maths_t::E_FpNoErrors; + // Check the residual is zero on the singular subspace. + std::size_t rank = static_cast(covariance.rank()); + if (!ignoreSingularSubspace && rank < d) { + double normC = (y.transpose() * covariance.matrixU().leftCols(rank)).norm(); + double normS = (y.transpose() * covariance.matrixU().rightCols(d - rank)).norm(); + result = normS > std::numeric_limits::epsilon() * normC ? core::constants::LOG_MIN_DOUBLE - 1.0 + : core::constants::LOG_MAX_DOUBLE + 1.0; + return maths_t::E_FpOverflowed; } + y = covariance.solve(y); + double logDeterminant = 0.0; + for (std::size_t i = 0u; i < rank; ++i) { + logDeterminant += std::log(covariance.singularValues()(i)); } + result = -0.5 * (residual.inner(y) + static_cast(rank) * core::constants::LOG_TWO_PI + logDeterminant); + return maths_t::E_FpNoErrors; } + } + } }; - //! \brief Shared implementation of Gaussian sampling. template -class CSampleGaussian -{ - public: - template - static void generate(std::size_t n, - const VECTOR &mean_, - const MATRIX &covariance_, - std::vector &result) - { - result.clear(); - if (n == 0) - { - return; - } +class CSampleGaussian { +public: + template + static void generate(std::size_t n, const VECTOR& mean_, const MATRIX& covariance_, std::vector& result) { + result.clear(); + if (n == 0) { + return; + } - // We sample at the points: - // { m + (E_{X_i}[ x I{[x_q, x_{q+1}]} ] * u_i }, - // - // where m is the mean, X_i is the normal associated with the i'th - // eigenvector of the covariance matrix, x_q denotes the x value - // corresponding to the quantile q, q ranges over { k*rank/n } for - // k in {0, 1, ..., n/rank-1} and u_i are the eigenvectors of the - // covariance matrix. See the discussion in CNormalMeanPrecConjugate - // for more discussion on this sampling strategy. + // We sample at the points: + // { m + (E_{X_i}[ x I{[x_q, x_{q+1}]} ] * u_i }, + // + // where m is the mean, X_i is the normal associated with the i'th + // eigenvector of the covariance matrix, x_q denotes the x value + // corresponding to the quantile q, q ranges over { k*rank/n } for + // k in {0, 1, ..., n/rank-1} and u_i are the eigenvectors of the + // covariance matrix. See the discussion in CNormalMeanPrecConjugate + // for more discussion on this sampling strategy. - VECTOR_PRECISE mean(mean_); - Eigen::JacobiSVD covariance(toDenseMatrix(covariance_), - Eigen::ComputeFullU | Eigen::ComputeFullV); - std::size_t rank = static_cast(covariance.rank()); + VECTOR_PRECISE mean(mean_); + Eigen::JacobiSVD covariance(toDenseMatrix(covariance_), Eigen::ComputeFullU | Eigen::ComputeFullV); + std::size_t rank = static_cast(covariance.rank()); - std::size_t numberIntervals = n / rank; - if (numberIntervals == 0) - { - result.push_back(mean); - } - else - { - LOG_TRACE("# intervals = " << numberIntervals); - result.reserve(rank * numberIntervals); - double scale = std::sqrt(static_cast(rank)); - LOG_TRACE("scale = " << scale) + std::size_t numberIntervals = n / rank; + if (numberIntervals == 0) { + result.push_back(mean); + } else { + LOG_TRACE("# intervals = " << numberIntervals); + result.reserve(rank * numberIntervals); + double scale = std::sqrt(static_cast(rank)); + LOG_TRACE("scale = " << scale) - for (std::size_t i = 0u; i < rank; ++i) - { - VECTOR_PRECISE u(fromDenseVector(covariance.matrixU().col(i))); - try - { - double variance = covariance.singularValues()(i); - boost::math::normal_distribution<> normal(0.0, std::sqrt(variance)); - LOG_TRACE("[U]_{.i} = " << covariance.matrixU().col(i).transpose()) - LOG_TRACE("variance = " << variance); - LOG_TRACE("u = " << u); + for (std::size_t i = 0u; i < rank; ++i) { + VECTOR_PRECISE u(fromDenseVector(covariance.matrixU().col(i))); + try { + double variance = covariance.singularValues()(i); + boost::math::normal_distribution<> normal(0.0, std::sqrt(variance)); + LOG_TRACE("[U]_{.i} = " << covariance.matrixU().col(i).transpose()) + LOG_TRACE("variance = " << variance); + LOG_TRACE("u = " << u); - double lastPartialExpectation = 0.0; - for (std::size_t j = 1u; j < numberIntervals; ++j) - { - double q = static_cast(j) - / static_cast(numberIntervals); - double xq = boost::math::quantile(normal, q); - double partialExpectation = -variance * CTools::safePdf(normal, xq); - double dx = scale - * static_cast(numberIntervals) - * (partialExpectation - lastPartialExpectation); - lastPartialExpectation = partialExpectation; - LOG_TRACE("dx = " << dx); - result.push_back(mean + dx * u); - } - double dx = -scale - * static_cast(numberIntervals) - * lastPartialExpectation; + double lastPartialExpectation = 0.0; + for (std::size_t j = 1u; j < numberIntervals; ++j) { + double q = static_cast(j) / static_cast(numberIntervals); + double xq = boost::math::quantile(normal, q); + double partialExpectation = -variance * CTools::safePdf(normal, xq); + double dx = scale * static_cast(numberIntervals) * (partialExpectation - lastPartialExpectation); + lastPartialExpectation = partialExpectation; LOG_TRACE("dx = " << dx); result.push_back(mean + dx * u); } - catch (const std::exception &e) - { - LOG_ERROR("Failed to sample eigenvector " << u << ": " << e.what()); - } - } + double dx = -scale * static_cast(numberIntervals) * lastPartialExpectation; + LOG_TRACE("dx = " << dx); + result.push_back(mean + dx * u); + } catch (const std::exception& e) { LOG_ERROR("Failed to sample eigenvector " << u << ": " << e.what()); } } } + } }; //! \brief Shared implementation of the log-determinant function. template -class CLogDeterminant -{ - public: - template - static maths_t::EFloatingPointErrorStatus compute(std::size_t d, - const MATRIX &m_, - double &result, - bool ignoreSingularSubspace) - { - result = core::constants::LOG_MIN_DOUBLE - 1.0; +class CLogDeterminant { +public: + template + static maths_t::EFloatingPointErrorStatus compute(std::size_t d, const MATRIX& m_, double& result, bool ignoreSingularSubspace) { + result = core::constants::LOG_MIN_DOUBLE - 1.0; - switch (d) - { - case 1: - if (m_(0, 0) == 0.0) - { - return maths_t::E_FpOverflowed; - } - result = std::log(m_(0, 0)); - return maths_t::E_FpNoErrors; + switch (d) { + case 1: + if (m_(0, 0) == 0.0) { + return maths_t::E_FpOverflowed; + } + result = std::log(m_(0, 0)); + return maths_t::E_FpNoErrors; - default: - { - // Note we use Jacobi SVD here so that we handle the case - // that m is singular to working precision. - Eigen::JacobiSVD svd(toDenseMatrix(m_)); + default: { + // Note we use Jacobi SVD here so that we handle the case + // that m is singular to working precision. + Eigen::JacobiSVD svd(toDenseMatrix(m_)); - // Check the residual is zero on the singular subspace. - std::size_t rank = static_cast(svd.rank()); - if (!ignoreSingularSubspace && rank < d) - { - result = static_cast(d - rank) * std::log(svd.threshold() * svd.singularValues()(0)); - return maths_t::E_FpOverflowed; - } - result = 0.0; - for (std::size_t i = 0u; i < rank; ++i) - { - result += std::log(svd.singularValues()(i)); - } - return maths_t::E_FpNoErrors; + // Check the residual is zero on the singular subspace. + std::size_t rank = static_cast(svd.rank()); + if (!ignoreSingularSubspace && rank < d) { + result = static_cast(d - rank) * std::log(svd.threshold() * svd.singularValues()(0)); + return maths_t::E_FpOverflowed; } + result = 0.0; + for (std::size_t i = 0u; i < rank; ++i) { + result += std::log(svd.singularValues()(i)); } + return maths_t::E_FpNoErrors; + } } + } }; - } -#define INVERSE_QUADRATIC_PRODUCT(T, N) \ -maths_t::EFloatingPointErrorStatus inverseQuadraticProduct(std::size_t d, \ - const CSymmetricMatrixNxN &covariance, \ - const CVectorNx1 &residual, \ - double &result, \ - bool ignoreSingularSubspace) \ -{ \ - return CInverseQuadraticProduct>::Type, \ - SDenseVector>::Type>::compute( \ - d, covariance, residual, result, ignoreSingularSubspace); \ -} +#define INVERSE_QUADRATIC_PRODUCT(T, N) \ + maths_t::EFloatingPointErrorStatus inverseQuadraticProduct(std::size_t d, \ + const CSymmetricMatrixNxN& covariance, \ + const CVectorNx1& residual, \ + double& result, \ + bool ignoreSingularSubspace) { \ + return CInverseQuadraticProduct>::Type, SDenseVector>::Type>::compute( \ + d, covariance, residual, result, ignoreSingularSubspace); \ + } INVERSE_QUADRATIC_PRODUCT(CFloatStorage, 2) INVERSE_QUADRATIC_PRODUCT(CFloatStorage, 3) INVERSE_QUADRATIC_PRODUCT(CFloatStorage, 4) @@ -292,37 +224,35 @@ INVERSE_QUADRATIC_PRODUCT(double, 4) INVERSE_QUADRATIC_PRODUCT(double, 5) #undef INVERSE_QUADRATIC_PRODUCT maths_t::EFloatingPointErrorStatus inverseQuadraticProduct(std::size_t d, - const CSymmetricMatrix &covariance, - const CVector &residual, - double &result, - bool ignoreSingularSubspace) -{ + const CSymmetricMatrix& covariance, + const CVector& residual, + double& result, + bool ignoreSingularSubspace) { return CInverseQuadraticProduct>::Type, - SDenseVector>::Type>::compute( - d, covariance, residual, result, ignoreSingularSubspace); + SDenseVector>::Type>::compute(d, + covariance, + residual, + result, + ignoreSingularSubspace); } maths_t::EFloatingPointErrorStatus inverseQuadraticProduct(std::size_t d, - const CSymmetricMatrix &covariance, - const CVector &residual, - double &result, - bool ignoreSingularSubspace) -{ - return CInverseQuadraticProduct>::Type, - SDenseVector>::Type>::compute( - d, covariance, residual, result, ignoreSingularSubspace); + const CSymmetricMatrix& covariance, + const CVector& residual, + double& result, + bool ignoreSingularSubspace) { + return CInverseQuadraticProduct>::Type, SDenseVector>::Type>::compute( + d, covariance, residual, result, ignoreSingularSubspace); } -#define GAUSSIAN_LOG_LIKELIHOOD(T, N) \ -maths_t::EFloatingPointErrorStatus gaussianLogLikelihood(std::size_t d, \ - const CSymmetricMatrixNxN &covariance, \ - const CVectorNx1 &residual, \ - double &result, \ - bool ignoreSingularSubspace) \ -{ \ - return CGaussianLogLikelihood>::Type, \ - SDenseVector>::Type>::compute( \ - d, covariance, residual, result, ignoreSingularSubspace); \ -} +#define GAUSSIAN_LOG_LIKELIHOOD(T, N) \ + maths_t::EFloatingPointErrorStatus gaussianLogLikelihood(std::size_t d, \ + const CSymmetricMatrixNxN& covariance, \ + const CVectorNx1& residual, \ + double& result, \ + bool ignoreSingularSubspace) { \ + return CGaussianLogLikelihood>::Type, SDenseVector>::Type>::compute( \ + d, covariance, residual, result, ignoreSingularSubspace); \ + } GAUSSIAN_LOG_LIKELIHOOD(CFloatStorage, 2) GAUSSIAN_LOG_LIKELIHOOD(CFloatStorage, 3) GAUSSIAN_LOG_LIKELIHOOD(CFloatStorage, 4) @@ -333,34 +263,29 @@ GAUSSIAN_LOG_LIKELIHOOD(double, 4) GAUSSIAN_LOG_LIKELIHOOD(double, 5) #undef GAUSSIAN_LOG_LIKELIHOOD maths_t::EFloatingPointErrorStatus gaussianLogLikelihood(std::size_t d, - const CSymmetricMatrix &covariance, - const CVector &residual, - double &result, - bool ignoreSingularSubspace) -{ - return CGaussianLogLikelihood>::Type, - SDenseVector>::Type>::compute( - d, covariance, residual, result, ignoreSingularSubspace); + const CSymmetricMatrix& covariance, + const CVector& residual, + double& result, + bool ignoreSingularSubspace) { + return CGaussianLogLikelihood>::Type, SDenseVector>::Type>::compute( + d, covariance, residual, result, ignoreSingularSubspace); } maths_t::EFloatingPointErrorStatus gaussianLogLikelihood(std::size_t d, - const CSymmetricMatrix &covariance, - const CVector &residual, - double &result, - bool ignoreSingularSubspace) -{ - return CGaussianLogLikelihood>::Type, - SDenseVector>::Type>::compute( - d, covariance, residual, result, ignoreSingularSubspace); + const CSymmetricMatrix& covariance, + const CVector& residual, + double& result, + bool ignoreSingularSubspace) { + return CGaussianLogLikelihood>::Type, SDenseVector>::Type>::compute( + d, covariance, residual, result, ignoreSingularSubspace); } -#define SAMPLE_GAUSSIAN(T, N) \ -void sampleGaussian(std::size_t d, \ - const CVectorNx1 &mean, \ - const CSymmetricMatrixNxN &covariance, \ - std::vector > &result) \ -{ \ - CSampleGaussian>::Type>::generate(d, mean, covariance, result); \ -} +#define SAMPLE_GAUSSIAN(T, N) \ + void sampleGaussian(std::size_t d, \ + const CVectorNx1& mean, \ + const CSymmetricMatrixNxN& covariance, \ + std::vector>& result) { \ + CSampleGaussian>::Type>::generate(d, mean, covariance, result); \ + } SAMPLE_GAUSSIAN(CFloatStorage, 2) SAMPLE_GAUSSIAN(CFloatStorage, 3) SAMPLE_GAUSSIAN(CFloatStorage, 4) @@ -371,28 +296,23 @@ SAMPLE_GAUSSIAN(double, 4) SAMPLE_GAUSSIAN(double, 5) #undef SAMPLE_GAUSSIAN void sampleGaussian(std::size_t d, - const CVector &mean, - const CSymmetricMatrix &covariance, - std::vector> &result) -{ + const CVector& mean, + const CSymmetricMatrix& covariance, + std::vector>& result) { return CSampleGaussian>::Type>::generate(d, mean, covariance, result); } void sampleGaussian(std::size_t d, - const CVector &mean, - const CSymmetricMatrix &covariance, - std::vector> &result) -{ + const CVector& mean, + const CSymmetricMatrix& covariance, + std::vector>& result) { return CSampleGaussian>::Type>::generate(d, mean, covariance, result); } -#define LOG_DETERMINANT(T, N) \ -maths_t::EFloatingPointErrorStatus logDeterminant(std::size_t d, \ - const CSymmetricMatrixNxN &matrix, \ - double &result, \ - bool ignoreSingularSubspace) \ -{ \ - return CLogDeterminant>::Type>::compute(d, matrix, result, ignoreSingularSubspace); \ -} +#define LOG_DETERMINANT(T, N) \ + maths_t::EFloatingPointErrorStatus logDeterminant( \ + std::size_t d, const CSymmetricMatrixNxN& matrix, double& result, bool ignoreSingularSubspace) { \ + return CLogDeterminant>::Type>::compute(d, matrix, result, ignoreSingularSubspace); \ + } LOG_DETERMINANT(CFloatStorage, 2) LOG_DETERMINANT(CFloatStorage, 3) LOG_DETERMINANT(CFloatStorage, 4) @@ -402,21 +322,14 @@ LOG_DETERMINANT(double, 3) LOG_DETERMINANT(double, 4) LOG_DETERMINANT(double, 5) #undef LOG_DETERMINANT -maths_t::EFloatingPointErrorStatus logDeterminant(std::size_t d, - const CSymmetricMatrix &matrix, - double &result, - bool ignoreSingularSubspace) -{ +maths_t::EFloatingPointErrorStatus +logDeterminant(std::size_t d, const CSymmetricMatrix& matrix, double& result, bool ignoreSingularSubspace) { return CLogDeterminant>::Type>::compute(d, matrix, result, ignoreSingularSubspace); } -maths_t::EFloatingPointErrorStatus logDeterminant(std::size_t d, - const CSymmetricMatrix &matrix, - double &result, - bool ignoreSingularSubspace) -{ +maths_t::EFloatingPointErrorStatus +logDeterminant(std::size_t d, const CSymmetricMatrix& matrix, double& result, bool ignoreSingularSubspace) { return CLogDeterminant>::Type>::compute(d, matrix, result, ignoreSingularSubspace); } - } } } diff --git a/lib/maths/CLogNormalMeanPrecConjugate.cc b/lib/maths/CLogNormalMeanPrecConjugate.cc index 1ba49c1d1b..d55b37c717 100644 --- a/lib/maths/CLogNormalMeanPrecConjugate.cc +++ b/lib/maths/CLogNormalMeanPrecConjugate.cc @@ -8,20 +8,20 @@ #include #include +#include #include #include -#include #include #include #include #include -#include #include #include #include #include #include +#include #include #include @@ -40,13 +40,10 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { -namespace -{ +namespace { using TDouble1Vec = core::CSmallVector; using TDouble4Vec = core::CSmallVector; @@ -57,28 +54,20 @@ using TMeanVarAccumulator = CBasicStatistics::SSampleMeanVar::TAccumulat using TWeightStyleVec = maths_t::TWeightStyleVec; //! Compute x * x. -inline double pow2(double x) -{ +inline double pow2(double x) { return x * x; } const double MINIMUM_LOGNORMAL_SHAPE = 100.0; -namespace detail -{ +namespace detail { using TDoubleDoublePr = std::pair; using TDoubleDoublePrVec = std::vector; //! \brief Adds "weight" x "right operand" to the "left operand". -struct SPlusWeight -{ - double operator()(double lhs, - double rhs, - double weight = 1.0) const - { - return lhs + weight * rhs; - } +struct SPlusWeight { + double operator()(double lhs, double rhs, double weight = 1.0) const { return lhs + weight * rhs; } }; //! Get the effective location and scale of the sample. @@ -90,11 +79,8 @@ struct SPlusWeight //! \param[in] shape The gamma prior shape. //! \param[out] location The effective location of sample distribution. //! \param[out] scale The effective scale of sample distribution. -inline void locationAndScale(double vs, double r, double s, - double mean, double precision, - double rate, double shape, - double &location, double &scale) -{ +inline void +locationAndScale(double vs, double r, double s, double mean, double precision, double rate, double shape, double& location, double& scale) { double t = vs == 1.0 ? r : r + std::log(s + vs * (1.0 - s)); double scaledPrecision = t == r ? precision : t / r * precision; double scaledRate = t == r ? rate : t / r * rate; @@ -123,9 +109,9 @@ inline void locationAndScale(double vs, double r, double s, //! \param precision The precision of the conditional mean prior. //! \param result Filled in with the aggregation of results of \p func. template -bool evaluateFunctionOnJointDistribution(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, +bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, FUNC func, AGGREGATOR aggregate, bool isNonInformative, @@ -134,12 +120,10 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec &weightStyles, double rate, double mean, double precision, - RESULT &result) -{ + RESULT& result) { result = RESULT(); - if (samples.empty()) - { + if (samples.empty()) { LOG_ERROR("Can't compute distribution for empty sample set"); return false; } @@ -157,21 +141,16 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec &weightStyles, // // This becomes increasingly accurate as the prior distribution narrows. - try - { - if (isNonInformative) - { + try { + if (isNonInformative) { // The non-informative prior is improper and effectively 0 everywhere. // (It is acceptable to approximate all finite samples as at the median // of this distribution.) - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { double n = maths_t::count(weightStyles, weights[i]); result = aggregate(result, func(CTools::SImproperDistribution(), samples[i] + offset), n); } - } - else if (shape > MINIMUM_LOGNORMAL_SHAPE) - { + } else if (shape > MINIMUM_LOGNORMAL_SHAPE) { // For large shape the marginal likelihood is very well approximated // by a log-normal distribution. In particular, the true distribution // is log t with 2 * a degrees of freedom, location m and scale @@ -192,31 +171,27 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec &weightStyles, double r = rate / shape; double s = std::exp(-r); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { double n = maths_t::count(weightStyles, weights[i]); - double varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights[i]) - * maths_t::countVarianceScale(weightStyles, weights[i]); + double varianceScale = + maths_t::seasonalVarianceScale(weightStyles, weights[i]) * maths_t::countVarianceScale(weightStyles, weights[i]); double location; double scale; locationAndScale(varianceScale, r, s, mean, precision, rate, shape, location, scale); boost::math::lognormal_distribution<> lognormal(location, scale); result = aggregate(result, func(lognormal, samples[i] + offset), n); } - } - else - { + } else { // The marginal likelihood is log t with 2 * a degrees of freedom, // location m and scale s = (a * p / (p + 1) / b) ^ (1/2). double r = rate / shape; double s = std::exp(-r); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { double n = maths_t::count(weightStyles, weights[i]); - double varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights[i]) - * maths_t::countVarianceScale(weightStyles, weights[i]); + double varianceScale = + maths_t::seasonalVarianceScale(weightStyles, weights[i]) * maths_t::countVarianceScale(weightStyles, weights[i]); double location; double scale; locationAndScale(varianceScale, r, s, mean, precision, rate, shape, location, scale); @@ -224,9 +199,7 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec &weightStyles, result = aggregate(result, func(logt, samples[i] + offset), n); } } - } - catch (const std::exception &e) - { + } catch (const std::exception& e) { LOG_ERROR("Error calculating joint c.d.f.: " << e.what()); return false; } @@ -243,55 +216,52 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec &weightStyles, //! so that it can be integrated over the hidden variable representing the //! actual value of a discrete datum which we assume is in the interval [n, n+1]. template -class CEvaluateOnSamples : core::CNonCopyable -{ - public: - CEvaluateOnSamples(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - bool isNonInformative, - double offset, - double mean, - double precision, - double shape, - double rate) : - m_WeightStyles(weightStyles), - m_Samples(samples), - m_Weights(weights), - m_IsNonInformative(isNonInformative), - m_Offset(offset), - m_Mean(mean), - m_Precision(precision), - m_Shape(shape), - m_Rate(rate) - {} - - bool operator()(double x, double &result) const - { - return evaluateFunctionOnJointDistribution(m_WeightStyles, - m_Samples, - m_Weights, - F(), - SPlusWeight(), - m_IsNonInformative, - m_Offset + x, - m_Shape, - m_Rate, - m_Mean, - m_Precision, - result); - } +class CEvaluateOnSamples : core::CNonCopyable { +public: + CEvaluateOnSamples(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + bool isNonInformative, + double offset, + double mean, + double precision, + double shape, + double rate) + : m_WeightStyles(weightStyles), + m_Samples(samples), + m_Weights(weights), + m_IsNonInformative(isNonInformative), + m_Offset(offset), + m_Mean(mean), + m_Precision(precision), + m_Shape(shape), + m_Rate(rate) {} + + bool operator()(double x, double& result) const { + return evaluateFunctionOnJointDistribution(m_WeightStyles, + m_Samples, + m_Weights, + F(), + SPlusWeight(), + m_IsNonInformative, + m_Offset + x, + m_Shape, + m_Rate, + m_Mean, + m_Precision, + result); + } - private: - const TWeightStyleVec &m_WeightStyles; - const TDouble1Vec &m_Samples; - const TDouble4Vec1Vec &m_Weights; - bool m_IsNonInformative; - double m_Offset; - double m_Mean; - double m_Precision; - double m_Shape; - double m_Rate; +private: + const TWeightStyleVec& m_WeightStyles; + const TDouble1Vec& m_Samples; + const TDouble4Vec1Vec& m_Weights; + bool m_IsNonInformative; + double m_Offset; + double m_Mean; + double m_Precision; + double m_Shape; + double m_Rate; }; //! \brief Kernel for computing the marginal likelihood's mean. @@ -304,38 +274,29 @@ class CEvaluateOnSamples : core::CNonCopyable //! //! Here, \(m\) is the expected mean, and the prior on the precision\(p\) is //! gamma distributed. -class CMeanKernel -{ - public: - using TValue = CVectorNx1; - - public: - CMeanKernel(double m, double p, double a, double b) : - m_M(m), m_P(p), m_A(a), m_B(b) - {} - - bool operator()(double x, TValue &result) const - { - try - { - boost::math::gamma_distribution<> gamma(m_A, 1.0 / m_B); - double fx = boost::math::pdf(gamma, x); - result(0) = std::exp(m_M + 0.5 / x * (1.0 / m_P + 1.0)) * fx; - result(1) = fx; - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to calculate mean kernel: " << e.what() - << ", m = " << m_M << ", p = " << m_P - << ", a = " << m_A << ", b = " << m_B - << ", x = " << x); - return false; - } - return true; +class CMeanKernel { +public: + using TValue = CVectorNx1; + +public: + CMeanKernel(double m, double p, double a, double b) : m_M(m), m_P(p), m_A(a), m_B(b) {} + + bool operator()(double x, TValue& result) const { + try { + boost::math::gamma_distribution<> gamma(m_A, 1.0 / m_B); + double fx = boost::math::pdf(gamma, x); + result(0) = std::exp(m_M + 0.5 / x * (1.0 / m_P + 1.0)) * fx; + result(1) = fx; + } catch (const std::exception& e) { + LOG_ERROR("Failed to calculate mean kernel: " << e.what() << ", m = " << m_M << ", p = " << m_P << ", a = " << m_A + << ", b = " << m_B << ", x = " << x); + return false; } + return true; + } - private: - double m_M, m_P, m_A, m_B; +private: + double m_M, m_P, m_A, m_B; }; //! \brief Kernel for computing the marginal likelihood's variance. @@ -343,40 +304,31 @@ class CMeanKernel //! This is used to evaluate the integral of the likelihood variance w.r.t. //! the prior on the likelihood precision. Note that the integral over the //! prior on the mean can be performed analytically. -class CVarianceKernel -{ - public: - using TValue = CVectorNx1; - - public: - CVarianceKernel(double mean, double m, double p, double a, double b) : - m_Mean(mean), m_M(m), m_P(p), m_A(a), m_B(b) - {} - - bool operator()(const TValue &x, TValue &result) const - { - try - { - boost::math::gamma_distribution<> gamma(m_A, 1.0 / m_B); - boost::math::normal_distribution<> normal(m_M, std::sqrt(1.0 / x(0) / m_P)); - double fx = boost::math::pdf(normal, x(1)) * boost::math::pdf(gamma, x(0)); - double m = std::exp(x(1) + 0.5 / x(0)); - result(0) = (m * m * (std::exp(1.0 / x(0)) - 1.0) + pow2(m - m_Mean)) * fx; - result(1) = fx; - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to calculate mean kernel: " << e.what() - << ", m = " << m_M << ", p = " << m_P - << ", a = " << m_A << ", b = " << m_B - << ", x = " << x); - return false; - } - return true; +class CVarianceKernel { +public: + using TValue = CVectorNx1; + +public: + CVarianceKernel(double mean, double m, double p, double a, double b) : m_Mean(mean), m_M(m), m_P(p), m_A(a), m_B(b) {} + + bool operator()(const TValue& x, TValue& result) const { + try { + boost::math::gamma_distribution<> gamma(m_A, 1.0 / m_B); + boost::math::normal_distribution<> normal(m_M, std::sqrt(1.0 / x(0) / m_P)); + double fx = boost::math::pdf(normal, x(1)) * boost::math::pdf(gamma, x(0)); + double m = std::exp(x(1) + 0.5 / x(0)); + result(0) = (m * m * (std::exp(1.0 / x(0)) - 1.0) + pow2(m - m_Mean)) * fx; + result(1) = fx; + } catch (const std::exception& e) { + LOG_ERROR("Failed to calculate mean kernel: " << e.what() << ", m = " << m_M << ", p = " << m_P << ", a = " << m_A + << ", b = " << m_B << ", x = " << x); + return false; } + return true; + } - private: - double m_Mean, m_M, m_P, m_A, m_B; +private: + double m_Mean, m_M, m_P, m_A, m_B; }; //! \brief Computes the probability of seeing less likely samples at a specified @@ -385,80 +337,72 @@ class CVarianceKernel //! This thin wrapper around the evaluateFunctionOnJointDistribution function //! so that it can be integrated over the hidden variable representing the //! actual value of a discrete datum which we assume is in the interval [n, n+1]. -class CProbabilityOfLessLikelySamples : core::CNonCopyable -{ - public: - CProbabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - bool isNonInformative, - double offset, - double mean, - double precision, - double shape, - double rate) : - m_Calculation(calculation), - m_WeightStyles(weightStyles), - m_Samples(samples), - m_Weights(weights), - m_IsNonInformative(isNonInformative), - m_Offset(offset), - m_Mean(mean), - m_Precision(precision), - m_Shape(shape), - m_Rate(rate), - m_Tail(0) - {} - - bool operator()(double x, double &result) const - { - CJointProbabilityOfLessLikelySamples probability; - maths_t::ETail tail = maths_t::E_UndeterminedTail; - - if ( !evaluateFunctionOnJointDistribution(m_WeightStyles, - m_Samples, - m_Weights, - boost::bind(CTools::CProbabilityOfLessLikelySample(m_Calculation), - _1, _2, boost::ref(tail)), - CJointProbabilityOfLessLikelySamples::SAddProbability(), - m_IsNonInformative, - m_Offset + x, - m_Shape, - m_Rate, - m_Mean, - m_Precision, - probability) - || !probability.calculate(result)) - { - LOG_ERROR("Failed to compute probability of less likely samples" - << ", samples = " << core::CContainerPrinter::print(m_Samples) - << ", offset = " << m_Offset + x); - return false; - } - - m_Tail = m_Tail | tail; - - return true; +class CProbabilityOfLessLikelySamples : core::CNonCopyable { +public: + CProbabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, + const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + bool isNonInformative, + double offset, + double mean, + double precision, + double shape, + double rate) + : m_Calculation(calculation), + m_WeightStyles(weightStyles), + m_Samples(samples), + m_Weights(weights), + m_IsNonInformative(isNonInformative), + m_Offset(offset), + m_Mean(mean), + m_Precision(precision), + m_Shape(shape), + m_Rate(rate), + m_Tail(0) {} + + bool operator()(double x, double& result) const { + CJointProbabilityOfLessLikelySamples probability; + maths_t::ETail tail = maths_t::E_UndeterminedTail; + + if (!evaluateFunctionOnJointDistribution( + m_WeightStyles, + m_Samples, + m_Weights, + boost::bind(CTools::CProbabilityOfLessLikelySample(m_Calculation), _1, _2, boost::ref(tail)), + CJointProbabilityOfLessLikelySamples::SAddProbability(), + m_IsNonInformative, + m_Offset + x, + m_Shape, + m_Rate, + m_Mean, + m_Precision, + probability) || + !probability.calculate(result)) { + LOG_ERROR("Failed to compute probability of less likely samples" + << ", samples = " << core::CContainerPrinter::print(m_Samples) << ", offset = " << m_Offset + x); + return false; } - maths_t::ETail tail() const - { - return static_cast(m_Tail); - } + m_Tail = m_Tail | tail; - private: - maths_t::EProbabilityCalculation m_Calculation; - const TWeightStyleVec &m_WeightStyles; - const TDouble1Vec &m_Samples; - const TDouble4Vec1Vec &m_Weights; - bool m_IsNonInformative; - double m_Offset; - double m_Mean; - double m_Precision; - double m_Shape; - double m_Rate; - mutable int m_Tail; + return true; + } + + maths_t::ETail tail() const { return static_cast(m_Tail); } + +private: + maths_t::EProbabilityCalculation m_Calculation; + const TWeightStyleVec& m_WeightStyles; + const TDouble1Vec& m_Samples; + const TDouble4Vec1Vec& m_Weights; + bool m_IsNonInformative; + double m_Offset; + double m_Mean; + double m_Precision; + double m_Shape; + double m_Rate; + mutable int m_Tail; }; //! \brief Wraps up log marginal likelihood function so that it can be integrated @@ -486,193 +430,160 @@ class CProbabilityOfLessLikelySamples : core::CNonCopyable //! var(.) is the sample variance function. //! m and p are the prior Gaussian mean and precision, respectively. //! a and b are the prior Gamma shape and rate, respectively. -class CLogMarginalLikelihood : core::CNonCopyable -{ - public: - CLogMarginalLikelihood(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double offset, - double mean, - double precision, - double shape, - double rate) : - m_WeightStyles(weightStyles), - m_Samples(samples), - m_Weights(weights), - m_Offset(offset), - m_Mean(mean), - m_Precision(precision), - m_Shape(shape), - m_Rate(rate), - m_NumberSamples(0.0), - m_Scales(), - m_Constant(0.0), - m_ErrorStatus(maths_t::E_FpNoErrors) - { - this->precompute(); +class CLogMarginalLikelihood : core::CNonCopyable { +public: + CLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double offset, + double mean, + double precision, + double shape, + double rate) + : m_WeightStyles(weightStyles), + m_Samples(samples), + m_Weights(weights), + m_Offset(offset), + m_Mean(mean), + m_Precision(precision), + m_Shape(shape), + m_Rate(rate), + m_NumberSamples(0.0), + m_Scales(), + m_Constant(0.0), + m_ErrorStatus(maths_t::E_FpNoErrors) { + this->precompute(); + } + + //! Evaluate the log marginal likelihood at the offset \p x. + bool operator()(double x, double& result) const { + if (m_ErrorStatus & maths_t::E_FpFailed) { + return false; } - //! Evaluate the log marginal likelihood at the offset \p x. - bool operator()(double x, double &result) const - { - if (m_ErrorStatus & maths_t::E_FpFailed) - { - return false; - } + double logSamplesSum = 0.0; + TMeanVarAccumulator logSampleMoments; - double logSamplesSum = 0.0; - TMeanVarAccumulator logSampleMoments; - - try - { - for (std::size_t i = 0u; i < m_Samples.size(); ++i) - { - double n = maths_t::countForUpdate(m_WeightStyles, m_Weights[i]); - double sample = m_Samples[i] + m_Offset + x; - if (sample <= 0.0) - { - // Technically, the marginal likelihood is zero here - // so the log would be infinite. We use minus max - // double because log(0) = HUGE_VALUE, which causes - // problems for Windows. Calling code is notified - // when the calculation overflows and should avoid - // taking the exponential since this will underflow - // and pollute the floating point environment. This - // may cause issues for some library function - // implementations (see fe*exceptflag for more details). - result = boost::numeric::bounds::lowest(); - this->addErrorStatus(maths_t::E_FpOverflowed); - return false; - } + try { + for (std::size_t i = 0u; i < m_Samples.size(); ++i) { + double n = maths_t::countForUpdate(m_WeightStyles, m_Weights[i]); + double sample = m_Samples[i] + m_Offset + x; + if (sample <= 0.0) { + // Technically, the marginal likelihood is zero here + // so the log would be infinite. We use minus max + // double because log(0) = HUGE_VALUE, which causes + // problems for Windows. Calling code is notified + // when the calculation overflows and should avoid + // taking the exponential since this will underflow + // and pollute the floating point environment. This + // may cause issues for some library function + // implementations (see fe*exceptflag for more details). + result = boost::numeric::bounds::lowest(); + this->addErrorStatus(maths_t::E_FpOverflowed); + return false; + } - double logSample = std::log(sample); - double w = m_Scales.empty() ? 1.0 : 1.0 / m_Scales[i].first; - double shift = m_Scales.empty() ? 0.0 : m_Scales[i].second; + double logSample = std::log(sample); + double w = m_Scales.empty() ? 1.0 : 1.0 / m_Scales[i].first; + double shift = m_Scales.empty() ? 0.0 : m_Scales[i].second; - logSamplesSum += n * logSample; - logSampleMoments.add(logSample - shift, n * w); - } + logSamplesSum += n * logSample; + logSampleMoments.add(logSample - shift, n * w); } - catch (const std::exception &e) - { - LOG_ERROR("Failed to calculate likelihood: " << e.what()); - this->addErrorStatus(maths_t::E_FpFailed); - return false; - } - - double weightedNumberSamples = CBasicStatistics::count(logSampleMoments); - double logSamplesMean = CBasicStatistics::mean(logSampleMoments); - double logSamplesSquareDeviation = (weightedNumberSamples - 1.0) - * CBasicStatistics::variance(logSampleMoments); + } catch (const std::exception& e) { + LOG_ERROR("Failed to calculate likelihood: " << e.what()); + this->addErrorStatus(maths_t::E_FpFailed); + return false; + } - double impliedShape = m_Shape + 0.5 * m_NumberSamples; - double impliedRate = m_Rate + 0.5 * (logSamplesSquareDeviation - + m_Precision - * weightedNumberSamples - * pow2(logSamplesMean - m_Mean) - / (m_Precision + weightedNumberSamples)); + double weightedNumberSamples = CBasicStatistics::count(logSampleMoments); + double logSamplesMean = CBasicStatistics::mean(logSampleMoments); + double logSamplesSquareDeviation = (weightedNumberSamples - 1.0) * CBasicStatistics::variance(logSampleMoments); - result = m_Constant - - impliedShape * std::log(impliedRate) - - logSamplesSum; + double impliedShape = m_Shape + 0.5 * m_NumberSamples; + double impliedRate = + m_Rate + 0.5 * (logSamplesSquareDeviation + + m_Precision * weightedNumberSamples * pow2(logSamplesMean - m_Mean) / (m_Precision + weightedNumberSamples)); - return true; - } + result = m_Constant - impliedShape * std::log(impliedRate) - logSamplesSum; - //! Retrieve the error status for the integration. - maths_t::EFloatingPointErrorStatus errorStatus() const - { - return m_ErrorStatus; - } + return true; + } - private: - static const double LOG_2_PI; - - private: - //! Compute all the constants in the integrand. - void precompute() - { - try - { - double logVarianceScaleSum = 0.0; - - if ( maths_t::hasSeasonalVarianceScale(m_WeightStyles, m_Weights) - || maths_t::hasCountVarianceScale(m_WeightStyles, m_Weights)) - { - m_Scales.reserve(m_Weights.size()); - double r = m_Rate / m_Shape; - double s = std::exp(-r); - for (std::size_t i = 0u; i < m_Weights.size(); ++i) - { - double varianceScale = maths_t::seasonalVarianceScale(m_WeightStyles, m_Weights[i]) - * maths_t::countVarianceScale(m_WeightStyles, m_Weights[i]); - - // Get the scale and shift of the exponentiated Gaussian. - if (varianceScale == 1.0) - { - m_Scales.emplace_back(1.0, 0.0); - } - else - { - double t = r + std::log(s + varianceScale * (1.0 - s)); - m_Scales.emplace_back(t / r, 0.5 * (r - t)); - logVarianceScaleSum += std::log(t / r); - } + //! Retrieve the error status for the integration. + maths_t::EFloatingPointErrorStatus errorStatus() const { return m_ErrorStatus; } + +private: + static const double LOG_2_PI; + +private: + //! Compute all the constants in the integrand. + void precompute() { + try { + double logVarianceScaleSum = 0.0; + + if (maths_t::hasSeasonalVarianceScale(m_WeightStyles, m_Weights) || maths_t::hasCountVarianceScale(m_WeightStyles, m_Weights)) { + m_Scales.reserve(m_Weights.size()); + double r = m_Rate / m_Shape; + double s = std::exp(-r); + for (std::size_t i = 0u; i < m_Weights.size(); ++i) { + double varianceScale = maths_t::seasonalVarianceScale(m_WeightStyles, m_Weights[i]) * + maths_t::countVarianceScale(m_WeightStyles, m_Weights[i]); + + // Get the scale and shift of the exponentiated Gaussian. + if (varianceScale == 1.0) { + m_Scales.emplace_back(1.0, 0.0); + } else { + double t = r + std::log(s + varianceScale * (1.0 - s)); + m_Scales.emplace_back(t / r, 0.5 * (r - t)); + logVarianceScaleSum += std::log(t / r); } } + } - m_NumberSamples = 0.0; - double weightedNumberSamples = 0.0; - - for (std::size_t i = 0u; i < m_Weights.size(); ++i) - { - double n = maths_t::countForUpdate(m_WeightStyles, m_Weights[i]); - m_NumberSamples += n; - weightedNumberSamples += n / (m_Scales.empty() ? 1.0 : m_Scales[i].first); - } - - double impliedShape = m_Shape + 0.5 * m_NumberSamples; - double impliedPrecision = m_Precision + weightedNumberSamples; + m_NumberSamples = 0.0; + double weightedNumberSamples = 0.0; - m_Constant = 0.5 * (std::log(m_Precision) - std::log(impliedPrecision)) - - 0.5 * m_NumberSamples * LOG_2_PI - - 0.5 * logVarianceScaleSum - + boost::math::lgamma(impliedShape) - - boost::math::lgamma(m_Shape) - + m_Shape * std::log(m_Rate); - } - catch (const std::exception &e) - { - LOG_ERROR("Error calculating marginal likelihood: " << e.what()); - this->addErrorStatus(maths_t::E_FpFailed); + for (std::size_t i = 0u; i < m_Weights.size(); ++i) { + double n = maths_t::countForUpdate(m_WeightStyles, m_Weights[i]); + m_NumberSamples += n; + weightedNumberSamples += n / (m_Scales.empty() ? 1.0 : m_Scales[i].first); } - } - //! Update the error status. - void addErrorStatus(maths_t::EFloatingPointErrorStatus status) const - { - m_ErrorStatus = static_cast(m_ErrorStatus | status); + double impliedShape = m_Shape + 0.5 * m_NumberSamples; + double impliedPrecision = m_Precision + weightedNumberSamples; + + m_Constant = 0.5 * (std::log(m_Precision) - std::log(impliedPrecision)) - 0.5 * m_NumberSamples * LOG_2_PI - + 0.5 * logVarianceScaleSum + boost::math::lgamma(impliedShape) - boost::math::lgamma(m_Shape) + + m_Shape * std::log(m_Rate); + } catch (const std::exception& e) { + LOG_ERROR("Error calculating marginal likelihood: " << e.what()); + this->addErrorStatus(maths_t::E_FpFailed); } + } + + //! Update the error status. + void addErrorStatus(maths_t::EFloatingPointErrorStatus status) const { + m_ErrorStatus = static_cast(m_ErrorStatus | status); + } - private: - const TWeightStyleVec &m_WeightStyles; - const TDouble1Vec &m_Samples; - const TDouble4Vec1Vec &m_Weights; - double m_Offset; - double m_Mean; - double m_Precision; - double m_Shape; - double m_Rate; - double m_NumberSamples; - TDoubleDoublePrVec m_Scales; - double m_Constant; - mutable maths_t::EFloatingPointErrorStatus m_ErrorStatus; +private: + const TWeightStyleVec& m_WeightStyles; + const TDouble1Vec& m_Samples; + const TDouble4Vec1Vec& m_Weights; + double m_Offset; + double m_Mean; + double m_Precision; + double m_Shape; + double m_Rate; + double m_NumberSamples; + TDoubleDoublePrVec m_Scales; + double m_Constant; + mutable maths_t::EFloatingPointErrorStatus m_ErrorStatus; }; const double CLogMarginalLikelihood::LOG_2_PI = std::log(boost::math::double_constants::two_pi); - //! \brief Wraps up the sample total square deviation of the logs of a //! collection of samples, i.e. //!
@@ -682,42 +593,30 @@ const double CLogMarginalLikelihood::LOG_2_PI = std::log(boost::math::double_con
 //! so that it can be integrated over the hidden variable representing the
 //! actual value of a discrete datum which we assume is in the interval
 //! [n, n+1].
-class CLogSampleSquareDeviation : core::CNonCopyable
-{
-    public:
-        CLogSampleSquareDeviation(const TWeightStyleVec &weightStyles,
-                                  const TDouble1Vec &samples,
-                                  const TDouble4Vec1Vec &weights,
-                                  double mean) :
-                m_WeightStyles(weightStyles),
-                m_Samples(samples),
-                m_Weights(weights),
-                m_Mean(mean)
-        {
-        }
-
-        bool operator()(double x, double &result) const
-        {
-            result = 0.0;
-            for (std::size_t i = 0u; i < m_Samples.size(); ++i)
-            {
-                double residual = m_Samples[i];
-                if (residual <= 0.0)
-                {
-                    continue;
-                }
-                double n = maths_t::countForUpdate(m_WeightStyles, m_Weights[i]);
-                residual = std::log(residual + x) - m_Mean;
-                result += n * pow2(residual);
+class CLogSampleSquareDeviation : core::CNonCopyable {
+public:
+    CLogSampleSquareDeviation(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights, double mean)
+        : m_WeightStyles(weightStyles), m_Samples(samples), m_Weights(weights), m_Mean(mean) {}
+
+    bool operator()(double x, double& result) const {
+        result = 0.0;
+        for (std::size_t i = 0u; i < m_Samples.size(); ++i) {
+            double residual = m_Samples[i];
+            if (residual <= 0.0) {
+                continue;
             }
-            return true;
+            double n = maths_t::countForUpdate(m_WeightStyles, m_Weights[i]);
+            residual = std::log(residual + x) - m_Mean;
+            result += n * pow2(residual);
         }
+        return true;
+    }
 
-    private:
-        const TWeightStyleVec &m_WeightStyles;
-        const TDouble1Vec &m_Samples;
-        const TDouble4Vec1Vec &m_Weights;
-        double m_Mean;
+private:
+    const TWeightStyleVec& m_WeightStyles;
+    const TDouble1Vec& m_Samples;
+    const TDouble4Vec1Vec& m_Weights;
+    double m_Mean;
 };
 
 } // detail::
@@ -733,7 +632,6 @@ const std::string NUMBER_SAMPLES_TAG("f");
 //const std::string MAXIMUM_TAG("h"); No longer used
 const std::string DECAY_RATE_TAG("i");
 const std::string EMPTY_STRING;
-
 }
 
 CLogNormalMeanPrecConjugate::CLogNormalMeanPrecConjugate(maths_t::EDataType dataType,
@@ -743,39 +641,34 @@ CLogNormalMeanPrecConjugate::CLogNormalMeanPrecConjugate(maths_t::EDataType data
                                                          double gammaShape,
                                                          double gammaRate,
                                                          double decayRate,
-                                                         double offsetMargin) :
-        CPrior(dataType, decayRate),
-        m_Offset(offset),
-        m_OffsetMargin(offsetMargin),
-        m_GaussianMean(gaussianMean),
-        m_GaussianPrecision(gaussianPrecision),
-        m_GammaShape(gammaShape),
-        m_GammaRate(gammaRate)
-{}
-
-CLogNormalMeanPrecConjugate::CLogNormalMeanPrecConjugate(const SDistributionRestoreParams ¶ms,
-                                                         core::CStateRestoreTraverser &traverser,
-                                                         double offsetMargin) :
-        CPrior(params.s_DataType, params.s_DecayRate),
-        m_Offset(0.0),
-        m_OffsetMargin(offsetMargin),
-        m_GaussianMean(0.0),
-        m_GaussianPrecision(0.0),
-        m_GammaShape(0.0),
-        m_GammaRate(0.0)
-{
+                                                         double offsetMargin)
+    : CPrior(dataType, decayRate),
+      m_Offset(offset),
+      m_OffsetMargin(offsetMargin),
+      m_GaussianMean(gaussianMean),
+      m_GaussianPrecision(gaussianPrecision),
+      m_GammaShape(gammaShape),
+      m_GammaRate(gammaRate) {
+}
+
+CLogNormalMeanPrecConjugate::CLogNormalMeanPrecConjugate(const SDistributionRestoreParams& params,
+                                                         core::CStateRestoreTraverser& traverser,
+                                                         double offsetMargin)
+    : CPrior(params.s_DataType, params.s_DecayRate),
+      m_Offset(0.0),
+      m_OffsetMargin(offsetMargin),
+      m_GaussianMean(0.0),
+      m_GaussianPrecision(0.0),
+      m_GammaShape(0.0),
+      m_GammaRate(0.0) {
     traverser.traverseSubLevel(boost::bind(&CLogNormalMeanPrecConjugate::acceptRestoreTraverser, this, _1));
 }
 
-bool CLogNormalMeanPrecConjugate::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser)
-{
-    do
-    {
-        const std::string &name = traverser.name();
-        RESTORE_SETUP_TEARDOWN(DECAY_RATE_TAG,
-                               double decayRate,
-                               core::CStringUtils::stringToType(traverser.value(), decayRate),
-                               this->decayRate(decayRate))
+bool CLogNormalMeanPrecConjugate::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) {
+    do {
+        const std::string& name = traverser.name();
+        RESTORE_SETUP_TEARDOWN(
+            DECAY_RATE_TAG, double decayRate, core::CStringUtils::stringToType(traverser.value(), decayRate), this->decayRate(decayRate))
         RESTORE_BUILT_IN(OFFSET_TAG, m_Offset)
         RESTORE_BUILT_IN(GAUSSIAN_MEAN_TAG, m_GaussianMean)
         RESTORE_BUILT_IN(GAUSSIAN_PRECISION_TAG, m_GaussianPrecision)
@@ -785,80 +678,64 @@ bool CLogNormalMeanPrecConjugate::acceptRestoreTraverser(core::CStateRestoreTrav
                                double numberSamples,
                                core::CStringUtils::stringToType(traverser.value(), numberSamples),
                                this->numberSamples(numberSamples))
-    }
-    while (traverser.next());
+    } while (traverser.next());
 
     return true;
 }
 
-CLogNormalMeanPrecConjugate CLogNormalMeanPrecConjugate::nonInformativePrior(maths_t::EDataType dataType,
-                                                                             double offset,
-                                                                             double decayRate,
-                                                                             double offsetMargin)
-{
-    return CLogNormalMeanPrecConjugate(dataType, offset + offsetMargin,
-                                       NON_INFORMATIVE_MEAN, NON_INFORMATIVE_PRECISION,
-                                       NON_INFORMATIVE_SHAPE, NON_INFORMATIVE_RATE,
-                                       decayRate, offsetMargin);
+CLogNormalMeanPrecConjugate
+CLogNormalMeanPrecConjugate::nonInformativePrior(maths_t::EDataType dataType, double offset, double decayRate, double offsetMargin) {
+    return CLogNormalMeanPrecConjugate(dataType,
+                                       offset + offsetMargin,
+                                       NON_INFORMATIVE_MEAN,
+                                       NON_INFORMATIVE_PRECISION,
+                                       NON_INFORMATIVE_SHAPE,
+                                       NON_INFORMATIVE_RATE,
+                                       decayRate,
+                                       offsetMargin);
 }
 
-CLogNormalMeanPrecConjugate::EPrior CLogNormalMeanPrecConjugate::type() const
-{
+CLogNormalMeanPrecConjugate::EPrior CLogNormalMeanPrecConjugate::type() const {
     return E_LogNormal;
 }
 
-CLogNormalMeanPrecConjugate *CLogNormalMeanPrecConjugate::clone() const
-{
+CLogNormalMeanPrecConjugate* CLogNormalMeanPrecConjugate::clone() const {
     return new CLogNormalMeanPrecConjugate(*this);
 }
 
-void CLogNormalMeanPrecConjugate::setToNonInformative(double offset,
-                                                      double decayRate)
-{
-    *this = nonInformativePrior(this->dataType(),
-                                offset + this->offsetMargin(),
-                                decayRate, this->offsetMargin());
+void CLogNormalMeanPrecConjugate::setToNonInformative(double offset, double decayRate) {
+    *this = nonInformativePrior(this->dataType(), offset + this->offsetMargin(), decayRate, this->offsetMargin());
 }
 
-double CLogNormalMeanPrecConjugate::offsetMargin() const
-{
+double CLogNormalMeanPrecConjugate::offsetMargin() const {
     return m_OffsetMargin;
 }
 
-bool CLogNormalMeanPrecConjugate::needsOffset() const
-{
+bool CLogNormalMeanPrecConjugate::needsOffset() const {
     return true;
 }
 
-double CLogNormalMeanPrecConjugate::adjustOffset(const TWeightStyleVec &weightStyles,
-                                                 const TDouble1Vec &samples,
-                                                 const TDouble4Vec1Vec &weights)
-{
+double
+CLogNormalMeanPrecConjugate::adjustOffset(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights) {
     COffsetCost cost(*this);
     CApplyOffset apply(*this);
     return this->adjustOffsetWithCost(weightStyles, samples, weights, cost, apply);
 }
 
-double CLogNormalMeanPrecConjugate::offset() const
-{
+double CLogNormalMeanPrecConjugate::offset() const {
     return m_Offset;
 }
 
-void CLogNormalMeanPrecConjugate::addSamples(const TWeightStyleVec &weightStyles,
-                                             const TDouble1Vec &samples,
-                                             const TDouble4Vec1Vec &weights)
-{
-    if (samples.empty())
-    {
+void CLogNormalMeanPrecConjugate::addSamples(const TWeightStyleVec& weightStyles,
+                                             const TDouble1Vec& samples,
+                                             const TDouble4Vec1Vec& weights) {
+    if (samples.empty()) {
         return;
     }
 
-    if (samples.size() != weights.size())
-    {
-        LOG_ERROR("Mismatch in samples '"
-                  << core::CContainerPrinter::print(samples)
-                  << "' and weights '"
-                  << core::CContainerPrinter::print(weights) << "'");
+    if (samples.size() != weights.size()) {
+        LOG_ERROR("Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '"
+                                          << core::CContainerPrinter::print(weights) << "'");
         return;
     }
 
@@ -928,21 +805,18 @@ void CLogNormalMeanPrecConjugate::addSamples(const TWeightStyleVec &weightStyles
 
     double r = m_GammaRate / m_GammaShape;
     double s = std::exp(-r);
-    try
-    {
-        if (this->isInteger())
-        {
+    try {
+        if (this->isInteger()) {
             // Filled in with samples rescaled to have approximately unit
             // variance scale.
             TDouble1Vec scaledSamples;
             scaledSamples.resize(samples.size(), 1.0);
 
             TMeanAccumulator logSamplesMean_;
-            for (std::size_t i = 0u; i < samples.size(); ++i)
-            {
+            for (std::size_t i = 0u; i < samples.size(); ++i) {
                 double n = maths_t::countForUpdate(weightStyles, weights[i]);
-                double varianceScale =  maths_t::seasonalVarianceScale(weightStyles, weights[i])
-                                      * maths_t::countVarianceScale(weightStyles, weights[i]);
+                double varianceScale =
+                    maths_t::seasonalVarianceScale(weightStyles, weights[i]) * maths_t::countVarianceScale(weightStyles, weights[i]);
                 double x = samples[i] + m_Offset;
                 numberSamples += n;
                 double t = varianceScale == 1.0 ? r : r + std::log(s + varianceScale * (1.0 - s));
@@ -956,37 +830,24 @@ void CLogNormalMeanPrecConjugate::addSamples(const TWeightStyleVec &weightStyles
             scaledNumberSamples = CBasicStatistics::count(logSamplesMean_);
             logSamplesMean = CBasicStatistics::mean(logSamplesMean_);
 
-            double mean = (m_GaussianPrecision * m_GaussianMean
-                           + scaledNumberSamples * logSamplesMean)
-                          / (m_GaussianPrecision + scaledNumberSamples);
-            for (std::size_t i = 0u; i < scaledSamples.size(); ++i)
-            {
+            double mean =
+                (m_GaussianPrecision * m_GaussianMean + scaledNumberSamples * logSamplesMean) / (m_GaussianPrecision + scaledNumberSamples);
+            for (std::size_t i = 0u; i < scaledSamples.size(); ++i) {
                 double scale = scaledSamples[i];
-                scaledSamples[i] = scale == 1.0 ?
-                                   samples[i] + m_Offset :
-                                   std::exp(mean + (std::log(samples[i] + m_Offset) - mean)
-                                                  / std::sqrt(scale));
+                scaledSamples[i] =
+                    scale == 1.0 ? samples[i] + m_Offset : std::exp(mean + (std::log(samples[i] + m_Offset) - mean) / std::sqrt(scale));
             }
 
-            detail::CLogSampleSquareDeviation deviationFunction(weightStyles,
-                                                                scaledSamples,
-                                                                weights,
-                                                                logSamplesMean);
-            CIntegration::gaussLegendre(deviationFunction,
-                                                                 0.0, 1.0,
-                                                                 logSamplesSquareDeviation);
-        }
-        else
-        {
+            detail::CLogSampleSquareDeviation deviationFunction(weightStyles, scaledSamples, weights, logSamplesMean);
+            CIntegration::gaussLegendre(deviationFunction, 0.0, 1.0, logSamplesSquareDeviation);
+        } else {
             TMeanVarAccumulator logSamplesMoments;
-            for (std::size_t i = 0u; i < samples.size(); ++i)
-            {
+            for (std::size_t i = 0u; i < samples.size(); ++i) {
                 double n = maths_t::countForUpdate(weightStyles, weights[i]);
-                double varianceScale =  maths_t::seasonalVarianceScale(weightStyles, weights[i])
-                                      * maths_t::countVarianceScale(weightStyles, weights[i]);
+                double varianceScale =
+                    maths_t::seasonalVarianceScale(weightStyles, weights[i]) * maths_t::countVarianceScale(weightStyles, weights[i]);
                 double x = samples[i] + m_Offset;
-                if (x <= 0.0)
-                {
+                if (x <= 0.0) {
                     LOG_ERROR("Discarding " << x << " it's not log-normal");
                     continue;
                 }
@@ -998,26 +859,19 @@ void CLogNormalMeanPrecConjugate::addSamples(const TWeightStyleVec &weightStyles
             }
             scaledNumberSamples = CBasicStatistics::count(logSamplesMoments);
             logSamplesMean = CBasicStatistics::mean(logSamplesMoments);
-            logSamplesSquareDeviation = (scaledNumberSamples - 1.0)
-                                       * CBasicStatistics::variance(logSamplesMoments);
+            logSamplesSquareDeviation = (scaledNumberSamples - 1.0) * CBasicStatistics::variance(logSamplesMoments);
         }
-    }
-    catch (const std::exception &e)
-    {
+    } catch (const std::exception& e) {
         LOG_ERROR("Failed to update likelihood: " << e.what());
         return;
     }
 
     m_GammaShape += 0.5 * numberSamples;
-    m_GammaRate += 0.5 * (logSamplesSquareDeviation
-                          + m_GaussianPrecision
-                            * scaledNumberSamples
-                            * pow2(logSamplesMean - m_GaussianMean)
-                            / (m_GaussianPrecision + scaledNumberSamples));
-
-    m_GaussianMean = (m_GaussianPrecision * m_GaussianMean
-                      + scaledNumberSamples * logSamplesMean)
-                     / (m_GaussianPrecision + scaledNumberSamples);
+    m_GammaRate += 0.5 * (logSamplesSquareDeviation + m_GaussianPrecision * scaledNumberSamples * pow2(logSamplesMean - m_GaussianMean) /
+                                                          (m_GaussianPrecision + scaledNumberSamples));
+
+    m_GaussianMean =
+        (m_GaussianPrecision * m_GaussianMean + scaledNumberSamples * logSamplesMean) / (m_GaussianPrecision + scaledNumberSamples);
     m_GaussianPrecision += scaledNumberSamples;
 
     // If the coefficient of variation of the data is too small we run
@@ -1026,8 +880,7 @@ void CLogNormalMeanPrecConjugate::addSamples(const TWeightStyleVec &weightStyles
     // mean) in the data of size MINIMUM_COEFFICIENT_OF_VARATION on the
     // prior parameters.
 
-    if (m_GaussianPrecision > 1.5)
-    {
+    if (m_GaussianPrecision > 1.5) {
         // The idea is to model the impact of a coefficient of variation
         // equal to MINIMUM_COEFFICIENT_OF_VARIATION on the parameters
         // of the prior this will affect. In particular, this enters in
@@ -1046,30 +899,21 @@ void CLogNormalMeanPrecConjugate::addSamples(const TWeightStyleVec &weightStyles
         //
         // From which we derive the results below.
 
-        double minimumRate = (2.0 * m_GammaShape - 1.0)
-                             * pow2(MINIMUM_COEFFICIENT_OF_VARIATION);
+        double minimumRate = (2.0 * m_GammaShape - 1.0) * pow2(MINIMUM_COEFFICIENT_OF_VARIATION);
 
-        if (m_GammaRate < minimumRate)
-        {
-            double extraVariation = (minimumRate - m_GammaRate)
-                                    / (m_GaussianPrecision - 1.0);
+        if (m_GammaRate < minimumRate) {
+            double extraVariation = (minimumRate - m_GammaRate) / (m_GaussianPrecision - 1.0);
             m_GammaRate = minimumRate;
             m_GaussianMean -= 0.5 * extraVariation;
         }
     }
 
-    LOG_TRACE("logSamplesMean = " << logSamplesMean
-              << ", logSamplesSquareDeviation = " << logSamplesSquareDeviation
-              << ", numberSamples = " << numberSamples
-              << ", scaledNumberSamples = " << scaledNumberSamples);
-    LOG_TRACE("m_GammaShape = " << m_GammaShape
-              << ", m_GammaRate = " << m_GammaRate
-              << ", m_GaussianMean = " << m_GaussianMean
-              << ", m_GaussianPrecision = " << m_GaussianPrecision
-              << ", m_Offset = " << m_Offset);
-
-    if (this->isBad())
-    {
+    LOG_TRACE("logSamplesMean = " << logSamplesMean << ", logSamplesSquareDeviation = " << logSamplesSquareDeviation
+                                  << ", numberSamples = " << numberSamples << ", scaledNumberSamples = " << scaledNumberSamples);
+    LOG_TRACE("m_GammaShape = " << m_GammaShape << ", m_GammaRate = " << m_GammaRate << ", m_GaussianMean = " << m_GaussianMean
+                                << ", m_GaussianPrecision = " << m_GaussianPrecision << ", m_Offset = " << m_Offset);
+
+    if (this->isBad()) {
         LOG_ERROR("Update failed (" << this->debug() << ")");
         LOG_ERROR("samples = " << core::CContainerPrinter::print(samples));
         LOG_ERROR("weights = " << core::CContainerPrinter::print(weights));
@@ -1077,25 +921,21 @@ void CLogNormalMeanPrecConjugate::addSamples(const TWeightStyleVec &weightStyles
     }
 }
 
-void CLogNormalMeanPrecConjugate::propagateForwardsByTime(double time)
-{
-    if (!CMathsFuncs::isFinite(time) || time < 0.0)
-    {
+void CLogNormalMeanPrecConjugate::propagateForwardsByTime(double time) {
+    if (!CMathsFuncs::isFinite(time) || time < 0.0) {
         LOG_ERROR("Bad propagation time " << time);
         return;
     }
 
-    if (this->isNonInformative())
-    {
+    if (this->isNonInformative()) {
         // Nothing to be done.
         return;
     }
 
     double alpha = std::exp(-this->decayRate() * time);
-    double beta  = 1.0 - alpha;
+    double beta = 1.0 - alpha;
 
-    m_GaussianPrecision =  alpha * m_GaussianPrecision
-                         + beta  * NON_INFORMATIVE_PRECISION;
+    m_GaussianPrecision = alpha * m_GaussianPrecision + beta * NON_INFORMATIVE_PRECISION;
 
     // We want to increase the variance of the gamma distribution while
     // holding its mean constant s.t. in the limit t -> inf var -> inf.
@@ -1107,38 +947,27 @@ void CLogNormalMeanPrecConjugate::propagateForwardsByTime(double time)
     //
     // Thus the mean is unchanged and variance is increased by 1 / f.
 
-    double factor = std::min((  alpha * m_GammaShape
-                              + beta  * NON_INFORMATIVE_SHAPE) / m_GammaShape, 1.0);
+    double factor = std::min((alpha * m_GammaShape + beta * NON_INFORMATIVE_SHAPE) / m_GammaShape, 1.0);
 
     m_GammaShape *= factor;
-    m_GammaRate  *= factor;
+    m_GammaRate *= factor;
 
     this->numberSamples(this->numberSamples() * alpha);
 
-    LOG_TRACE("time = " << time
-              << ", alpha = " << alpha
-              << ", m_GaussianPrecision = " << m_GaussianPrecision
-              << ", m_GammaShape = " << m_GammaShape
-              << ", m_GammaRate = " << m_GammaRate
-              << ", numberSamples = " << this->numberSamples());
+    LOG_TRACE("time = " << time << ", alpha = " << alpha << ", m_GaussianPrecision = " << m_GaussianPrecision << ", m_GammaShape = "
+                        << m_GammaShape << ", m_GammaRate = " << m_GammaRate << ", numberSamples = " << this->numberSamples());
 }
 
-CLogNormalMeanPrecConjugate::TDoubleDoublePr
-CLogNormalMeanPrecConjugate::marginalLikelihoodSupport() const
-{
+CLogNormalMeanPrecConjugate::TDoubleDoublePr CLogNormalMeanPrecConjugate::marginalLikelihoodSupport() const {
     return std::make_pair(-m_Offset, boost::numeric::bounds::highest());
 }
 
-double CLogNormalMeanPrecConjugate::marginalLikelihoodMean() const
-{
+double CLogNormalMeanPrecConjugate::marginalLikelihoodMean() const {
     return this->isInteger() ? this->mean() - 0.5 : this->mean();
 }
 
-double CLogNormalMeanPrecConjugate::marginalLikelihoodMode(const TWeightStyleVec &weightStyles,
-                                                           const TDouble4Vec &weights) const
-{
-    if (this->isNonInformative())
-    {
+double CLogNormalMeanPrecConjugate::marginalLikelihoodMode(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights) const {
+    if (this->isNonInformative()) {
         return std::exp(m_GaussianMean) - m_Offset;
     }
 
@@ -1147,57 +976,37 @@ double CLogNormalMeanPrecConjugate::marginalLikelihoodMode(const TWeightStyleVec
     // See evaluateFunctionOnJointDistribution for more discussion.
 
     double varianceScale = 1.0;
-    try
-    {
-        varianceScale =  maths_t::seasonalVarianceScale(weightStyles, weights)
-                       * maths_t::countVarianceScale(weightStyles, weights);
-    }
-    catch (const std::exception &e)
-    {
-        LOG_ERROR("Failed to get variance scale: " << e.what());
-    }
-    try
-    {
+    try {
+        varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) * maths_t::countVarianceScale(weightStyles, weights);
+    } catch (const std::exception& e) { LOG_ERROR("Failed to get variance scale: " << e.what()); }
+    try {
         double r = m_GammaRate / m_GammaShape;
         double s = std::exp(-r);
         double location;
         double scale;
-        detail::locationAndScale(varianceScale, r, s,
-                                 m_GaussianMean, m_GaussianPrecision,
-                                 m_GammaRate, m_GammaShape,
-                                 location, scale);
+        detail::locationAndScale(varianceScale, r, s, m_GaussianMean, m_GaussianPrecision, m_GammaRate, m_GammaShape, location, scale);
         LOG_TRACE("location = " << location << ", scale = " << scale);
-        if (m_GammaShape > MINIMUM_LOGNORMAL_SHAPE)
-        {
+        if (m_GammaShape > MINIMUM_LOGNORMAL_SHAPE) {
             boost::math::lognormal_distribution<> logNormal(location, scale);
             return boost::math::mode(logNormal) - m_Offset;
         }
         CLogTDistribution logt(2.0 * m_GammaShape, location, scale);
         double result = mode(logt) - m_Offset - (this->isInteger() ? 0.5 : 0.0);
         return result;
-    }
-    catch (const std::exception &e)
-    {
-        LOG_ERROR("Failed to compute marginal likelihood mode: " << e.what()
-                  << ", gaussian mean = " << m_GaussianMean
-                  << ", gaussian precision = " << m_GaussianPrecision
-                  << ", gamma rate = " << m_GammaRate
-                  << ", gamma shape = " << m_GammaShape);
+    } catch (const std::exception& e) {
+        LOG_ERROR("Failed to compute marginal likelihood mode: " << e.what() << ", gaussian mean = " << m_GaussianMean
+                                                                 << ", gaussian precision = " << m_GaussianPrecision
+                                                                 << ", gamma rate = " << m_GammaRate << ", gamma shape = " << m_GammaShape);
     }
 
     // Fall back to using the exponentiated Gaussian's mean and precision.
     double normalMean = this->normalMean();
     double normalPrecision = this->normalPrecision() / varianceScale;
-    return (normalPrecision == 0.0 ?
-            0.0 : std::exp(normalMean - 1.0 / normalPrecision))
-           - m_Offset;
+    return (normalPrecision == 0.0 ? 0.0 : std::exp(normalMean - 1.0 / normalPrecision)) - m_Offset;
 }
 
-double CLogNormalMeanPrecConjugate::marginalLikelihoodVariance(const TWeightStyleVec &weightStyles,
-                                                               const TDouble4Vec &weights) const
-{
-    if (this->isNonInformative())
-    {
+double CLogNormalMeanPrecConjugate::marginalLikelihoodVariance(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights) const {
+    if (this->isNonInformative()) {
         return boost::numeric::bounds::highest();
     }
 
@@ -1222,28 +1031,15 @@ double CLogNormalMeanPrecConjugate::marginalLikelihoodVariance(const TWeightStyl
     // Note that b / a > 0 so this is necessarily non-negative.
 
     double varianceScale = 1.0;
-    try
-    {
-        varianceScale =  maths_t::seasonalVarianceScale(weightStyles, weights)
-                       * maths_t::countVarianceScale(weightStyles, weights);
-    }
-    catch (const std::exception &e)
-    {
-        LOG_ERROR("Failed to get variance scale: " << e.what());
-    }
-    double vh =   std::exp(2.0 * m_GaussianMean +  m_GammaRate / m_GammaShape
-                                                 * (2.0 / m_GaussianPrecision + 1.0))
-               * (std::exp(m_GammaRate / m_GammaShape) - 1.0);
-
-    if (m_GammaShape < MINIMUM_LOGNORMAL_SHAPE)
-    {
-        try
-        {
-            detail::CVarianceKernel f(this->marginalLikelihoodMean(),
-                                      m_GaussianMean,
-                                      m_GaussianPrecision,
-                                      m_GammaShape,
-                                      m_GammaRate);
+    try {
+        varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) * maths_t::countVarianceScale(weightStyles, weights);
+    } catch (const std::exception& e) { LOG_ERROR("Failed to get variance scale: " << e.what()); }
+    double vh = std::exp(2.0 * m_GaussianMean + m_GammaRate / m_GammaShape * (2.0 / m_GaussianPrecision + 1.0)) *
+                (std::exp(m_GammaRate / m_GammaShape) - 1.0);
+
+    if (m_GammaShape < MINIMUM_LOGNORMAL_SHAPE) {
+        try {
+            detail::CVarianceKernel f(this->marginalLikelihoodMean(), m_GaussianMean, m_GaussianPrecision, m_GammaShape, m_GammaRate);
             boost::math::gamma_distribution<> gamma(m_GammaShape, 1.0 / m_GammaRate);
             TDoubleVec a(2);
             TDoubleVec b(2);
@@ -1255,29 +1051,21 @@ double CLogNormalMeanPrecConjugate::marginalLikelihoodVariance(const TWeightStyl
             b[1] = boost::math::quantile(normal, 0.97);
 
             detail::CVarianceKernel::TValue variance;
-            if (CIntegration::sparseGaussLegendre(f, a, b, variance))
-            {
+            if (CIntegration::sparseGaussLegendre(f, a, b, variance)) {
                 double vl = variance(0) / variance(1);
                 double alpha = std::min(2.0 * (1.0 - m_GammaShape / MINIMUM_LOGNORMAL_SHAPE), 1.0);
                 return varianceScale * alpha * vl + (1.0 - alpha) * vh;
             }
-        }
-        catch (const std::exception &e)
-        {
-            LOG_ERROR("Failed to calculate variance: " << e.what());
-        }
+        } catch (const std::exception& e) { LOG_ERROR("Failed to calculate variance: " << e.what()); }
     }
     return varianceScale * vh;
 }
 
 CLogNormalMeanPrecConjugate::TDoubleDoublePr
 CLogNormalMeanPrecConjugate::marginalLikelihoodConfidenceInterval(double percentage,
-                                                                  const TWeightStyleVec &weightStyles,
-                                                                  const TDouble4Vec &weights) const
-{
-    if (this->isNonInformative())
-    {
+                                                                  const TWeightStyleVec& weightStyles,
+                                                                  const TDouble4Vec& weights) const {
+    if (this->isNonInformative()) {
         return this->marginalLikelihoodSupport();
     }
 
@@ -1286,74 +1074,53 @@ CLogNormalMeanPrecConjugate::marginalLikelihoodConfidenceInterval(double percent
 
     // We use the fact that the marginal likelihood is a log-t distribution.
 
-    try
-    {
-        double varianceScale =  maths_t::seasonalVarianceScale(weightStyles, weights)
-                              * maths_t::countVarianceScale(weightStyles, weights);
+    try {
+        double varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) * maths_t::countVarianceScale(weightStyles, weights);
 
         double r = m_GammaRate / m_GammaShape;
         double s = std::exp(-r);
         double location;
         double scale;
-        detail::locationAndScale(varianceScale, r, s,
-                                 m_GaussianMean, m_GaussianPrecision,
-                                 m_GammaRate, m_GammaShape,
-                                 location, scale);
+        detail::locationAndScale(varianceScale, r, s, m_GaussianMean, m_GaussianPrecision, m_GammaRate, m_GammaShape, location, scale);
         LOG_TRACE("location = " << location << ", scale = " << scale);
 
-        if (m_GammaShape > MINIMUM_LOGNORMAL_SHAPE)
-        {
+        if (m_GammaShape > MINIMUM_LOGNORMAL_SHAPE) {
             boost::math::lognormal_distribution<> logNormal(location, scale);
-            double x1 =  boost::math::quantile(logNormal, (1.0 - percentage) / 2.0)
-                       - m_Offset - (this->isInteger() ? 0.5 : 0.0);
-            double x2 =  percentage > 0.0 ?
-                         boost::math::quantile(logNormal, (1.0 + percentage) / 2.0)
-                       - m_Offset - (this->isInteger() ? 0.5 : 0.0) : x1;
+            double x1 = boost::math::quantile(logNormal, (1.0 - percentage) / 2.0) - m_Offset - (this->isInteger() ? 0.5 : 0.0);
+            double x2 = percentage > 0.0
+                            ? boost::math::quantile(logNormal, (1.0 + percentage) / 2.0) - m_Offset - (this->isInteger() ? 0.5 : 0.0)
+                            : x1;
             LOG_TRACE("x1 = " << x1 << ", x2 = " << x2);
             return std::make_pair(x1, x2);
         }
         CLogTDistribution logt(2.0 * m_GammaShape, location, scale);
-        double x1 =  quantile(logt, (1.0 - percentage) / 2.0)
-                   - m_Offset - (this->isInteger() ? 0.5 : 0.0);
-        double x2 =  percentage > 0.0 ?
-                     quantile(logt, (1.0 + percentage) / 2.0)
-                   - m_Offset - (this->isInteger() ? 0.5 : 0.0) : x1;
+        double x1 = quantile(logt, (1.0 - percentage) / 2.0) - m_Offset - (this->isInteger() ? 0.5 : 0.0);
+        double x2 = percentage > 0.0 ? quantile(logt, (1.0 + percentage) / 2.0) - m_Offset - (this->isInteger() ? 0.5 : 0.0) : x1;
         LOG_TRACE("x1 = " << x1 << ", x2 = " << x2);
         return std::make_pair(x1, x2);
-    }
-    catch (const std::exception &e)
-    {
-        LOG_ERROR("Failed to compute confidence interval: " << e.what());
-    }
+    } catch (const std::exception& e) { LOG_ERROR("Failed to compute confidence interval: " << e.what()); }
 
     return this->marginalLikelihoodSupport();
 }
 
-maths_t::EFloatingPointErrorStatus
-CLogNormalMeanPrecConjugate::jointLogMarginalLikelihood(const TWeightStyleVec &weightStyles,
-                                                        const TDouble1Vec &samples,
-                                                        const TDouble4Vec1Vec &weights,
-                                                        double &result) const
-{
+maths_t::EFloatingPointErrorStatus CLogNormalMeanPrecConjugate::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles,
+                                                                                           const TDouble1Vec& samples,
+                                                                                           const TDouble4Vec1Vec& weights,
+                                                                                           double& result) const {
     result = 0.0;
 
-    if (samples.empty())
-    {
+    if (samples.empty()) {
         LOG_ERROR("Can't compute likelihood for empty sample set");
         return maths_t::E_FpFailed;
     }
 
-    if (samples.size() != weights.size())
-    {
-        LOG_ERROR("Mismatch in samples '"
-                  << core::CContainerPrinter::print(samples)
-                  << "' and weights '"
-                  << core::CContainerPrinter::print(weights) << "'");
+    if (samples.size() != weights.size()) {
+        LOG_ERROR("Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '"
+                                          << core::CContainerPrinter::print(weights) << "'");
         return maths_t::E_FpFailed;
     }
 
-    if (this->isNonInformative())
-    {
+    if (this->isNonInformative()) {
         // The non-informative likelihood is improper and effectively
         // zero everywhere. We use minus max double because
         // log(0) = HUGE_VALUE, which causes problems for Windows.
@@ -1366,36 +1133,21 @@ CLogNormalMeanPrecConjugate::jointLogMarginalLikelihood(const TWeightStyleVec &w
         return maths_t::E_FpOverflowed;
     }
 
-    detail::CLogMarginalLikelihood logMarginalLikelihood(weightStyles,
-                                                         samples,
-                                                         weights,
-                                                         m_Offset,
-                                                         m_GaussianMean,
-                                                         m_GaussianPrecision,
-                                                         m_GammaShape,
-                                                         m_GammaRate);
-    if (this->isInteger())
-    {
-        CIntegration::logGaussLegendre(logMarginalLikelihood,
-                                                                 0.0, 1.0,
-                                                                 result);
-    }
-    else
-    {
+    detail::CLogMarginalLikelihood logMarginalLikelihood(
+        weightStyles, samples, weights, m_Offset, m_GaussianMean, m_GaussianPrecision, m_GammaShape, m_GammaRate);
+    if (this->isInteger()) {
+        CIntegration::logGaussLegendre(logMarginalLikelihood, 0.0, 1.0, result);
+    } else {
         logMarginalLikelihood(0.0, result);
     }
 
     maths_t::EFloatingPointErrorStatus status =
-            static_cast(
-                    logMarginalLikelihood.errorStatus() | CMathsFuncs::fpStatus(result));
-    if (status & maths_t::E_FpFailed)
-    {
+        static_cast(logMarginalLikelihood.errorStatus() | CMathsFuncs::fpStatus(result));
+    if (status & maths_t::E_FpFailed) {
         LOG_ERROR("Failed to compute log likelihood (" << this->debug() << ")");
         LOG_ERROR("samples = " << core::CContainerPrinter::print(samples));
         LOG_ERROR("weights = " << core::CContainerPrinter::print(weights));
-    }
-    else if (status & maths_t::E_FpOverflowed)
-    {
+    } else if (status & maths_t::E_FpOverflowed) {
         LOG_TRACE("Log likelihood overflowed for (" << this->debug() << ")");
         LOG_TRACE("samples = " << core::CContainerPrinter::print(samples));
         LOG_TRACE("weights = " << core::CContainerPrinter::print(weights));
@@ -1403,18 +1155,14 @@ CLogNormalMeanPrecConjugate::jointLogMarginalLikelihood(const TWeightStyleVec &w
     return status;
 }
 
-void CLogNormalMeanPrecConjugate::sampleMarginalLikelihood(std::size_t numberSamples,
-                                                           TDouble1Vec &samples) const
-{
+void CLogNormalMeanPrecConjugate::sampleMarginalLikelihood(std::size_t numberSamples, TDouble1Vec& samples) const {
     samples.clear();
 
-    if (numberSamples == 0 || this->numberSamples() == 0.0)
-    {
+    if (numberSamples == 0 || this->numberSamples() == 0.0) {
         return;
     }
 
-    if (this->isNonInformative())
-    {
+    if (this->isNonInformative()) {
         // We can't sample the marginal likelihood directly. This should
         // only happen if we've had one sample so just return that sample.
         samples.push_back(std::exp(m_GaussianMean) - m_Offset);
@@ -1458,116 +1206,75 @@ void CLogNormalMeanPrecConjugate::sampleMarginalLikelihood(std::size_t numberSam
 
     samples.reserve(numberSamples);
 
-    double scale = std::sqrt((m_GaussianPrecision + 1.0)
-                             / m_GaussianPrecision
-                             * m_GammaRate / m_GammaShape);
-    try
-    {
+    double scale = std::sqrt((m_GaussianPrecision + 1.0) / m_GaussianPrecision * m_GammaRate / m_GammaShape);
+    try {
         boost::math::lognormal_distribution<> lognormal(m_GaussianMean, scale);
 
         double mean = boost::math::mean(lognormal);
 
-        LOG_TRACE("mean = " << mean
-                  << ", scale = " << scale
-                  << ", numberSamples = " << numberSamples);
+        LOG_TRACE("mean = " << mean << ", scale = " << scale << ", numberSamples = " << numberSamples);
 
         TDoubleDoublePr support = this->marginalLikelihoodSupport();
 
         double lastPartialExpectation = 0.0;
 
-        for (std::size_t i = 1u; i < numberSamples; ++i)
-        {
-            double q  =  static_cast(i)
-                       / static_cast(numberSamples);
+        for (std::size_t i = 1u; i < numberSamples; ++i) {
+            double q = static_cast(i) / static_cast(numberSamples);
             double xq = std::log(boost::math::quantile(lognormal, q));
 
-            double z =  (xq - m_GaussianMean - scale * scale)
-                       / scale
-                       / boost::math::double_constants::root_two;
+            double z = (xq - m_GaussianMean - scale * scale) / scale / boost::math::double_constants::root_two;
 
             double partialExpectation = mean * (1.0 + boost::math::erf(z)) / 2.0;
 
-            double sample = static_cast(numberSamples)
-                            * (partialExpectation - lastPartialExpectation)
-                            - m_Offset;
+            double sample = static_cast(numberSamples) * (partialExpectation - lastPartialExpectation) - m_Offset;
 
             LOG_TRACE("sample = " << sample);
 
             // Sanity check the sample: should be in the distribution support.
-            if (sample >= support.first && sample <= support.second)
-            {
+            if (sample >= support.first && sample <= support.second) {
                 samples.push_back(sample);
-            }
-            else
-            {
-                LOG_ERROR("Sample out of bounds: sample = " << sample - m_Offset
-                          << ", gaussianMean = " << m_GaussianMean
-                          << ", scale = " << scale
-                          << ", q = " << q
-                          << ", x(q) = " << xq
-                          << ", mean = " << mean);
+            } else {
+                LOG_ERROR("Sample out of bounds: sample = " << sample - m_Offset << ", gaussianMean = " << m_GaussianMean << ", scale = "
+                                                            << scale << ", q = " << q << ", x(q) = " << xq << ", mean = " << mean);
             }
 
             lastPartialExpectation = partialExpectation;
         }
 
-        double sample = static_cast(numberSamples)
-                        * (mean - lastPartialExpectation) - m_Offset;
+        double sample = static_cast(numberSamples) * (mean - lastPartialExpectation) - m_Offset;
 
         LOG_TRACE("sample = " << sample);
 
-        if (sample >= support.first && sample <= support.second)
-        {
+        if (sample >= support.first && sample <= support.second) {
             samples.push_back(sample);
+        } else {
+            LOG_ERROR("Sample out of bounds: sample = " << sample << ", gaussianMean = " << m_GaussianMean << ", scale = " << scale
+                                                        << ", mean = " << mean);
         }
-        else
-        {
-            LOG_ERROR("Sample out of bounds: sample = " << sample
-                      << ", gaussianMean = " << m_GaussianMean
-                      << ", scale = " << scale
-                      << ", mean = " << mean);
-        }
-    }
-    catch (const std::exception &e)
-    {
-        LOG_ERROR("Failed to sample: " << e.what()
-                  << ", gaussianMean " << m_GaussianMean
-                  << ", scale = " << scale);
+    } catch (const std::exception& e) {
+        LOG_ERROR("Failed to sample: " << e.what() << ", gaussianMean " << m_GaussianMean << ", scale = " << scale);
     }
 }
 
-bool CLogNormalMeanPrecConjugate::minusLogJointCdf(const TWeightStyleVec &weightStyles,
-                                                   const TDouble1Vec &samples,
-                                                   const TDouble4Vec1Vec &weights,
-                                                   double &lowerBound,
-                                                   double &upperBound) const
-{
+bool CLogNormalMeanPrecConjugate::minusLogJointCdf(const TWeightStyleVec& weightStyles,
+                                                   const TDouble1Vec& samples,
+                                                   const TDouble4Vec1Vec& weights,
+                                                   double& lowerBound,
+                                                   double& upperBound) const {
     using TMinusLogCdf = detail::CEvaluateOnSamples;
 
     lowerBound = upperBound = 0.0;
 
-    TMinusLogCdf minusLogCdf(weightStyles,
-                             samples,
-                             weights,
-                             this->isNonInformative(),
-                             m_Offset,
-                             m_GaussianMean,
-                             m_GaussianPrecision,
-                             m_GammaShape,
-                             m_GammaRate);
-
-    if (this->isInteger())
-    {
+    TMinusLogCdf minusLogCdf(
+        weightStyles, samples, weights, this->isNonInformative(), m_Offset, m_GaussianMean, m_GaussianPrecision, m_GammaShape, m_GammaRate);
+
+    if (this->isInteger()) {
         // If the data are discrete we compute the approximate expectation
         // w.r.t. to the hidden offset of the samples Z, which is uniform
         // on the interval [0,1].
         double value;
-        if (!CIntegration::logGaussLegendre(minusLogCdf,
-                                                                      0.0, 1.0,
-                                                                      value))
-        {
-            LOG_ERROR("Failed computing c.d.f. for "
-                      << core::CContainerPrinter::print(samples));
+        if (!CIntegration::logGaussLegendre(minusLogCdf, 0.0, 1.0, value)) {
+            LOG_ERROR("Failed computing c.d.f. for " << core::CContainerPrinter::print(samples));
             return false;
         }
 
@@ -1576,10 +1283,8 @@ bool CLogNormalMeanPrecConjugate::minusLogJointCdf(const TWeightStyleVec &weight
     }
 
     double value;
-    if (!minusLogCdf(0.0, value))
-    {
-        LOG_ERROR("Failed computing c.d.f for "
-                  << core::CContainerPrinter::print(samples));
+    if (!minusLogCdf(0.0, value)) {
+        LOG_ERROR("Failed computing c.d.f for " << core::CContainerPrinter::print(samples));
         return false;
     }
 
@@ -1587,38 +1292,25 @@ bool CLogNormalMeanPrecConjugate::minusLogJointCdf(const TWeightStyleVec &weight
     return true;
 }
 
-bool CLogNormalMeanPrecConjugate::minusLogJointCdfComplement(const TWeightStyleVec &weightStyles,
-                                                             const TDouble1Vec &samples,
-                                                             const TDouble4Vec1Vec &weights,
-                                                             double &lowerBound,
-                                                             double &upperBound) const
-{
+bool CLogNormalMeanPrecConjugate::minusLogJointCdfComplement(const TWeightStyleVec& weightStyles,
+                                                             const TDouble1Vec& samples,
+                                                             const TDouble4Vec1Vec& weights,
+                                                             double& lowerBound,
+                                                             double& upperBound) const {
     using TMinusLogCdfComplement = detail::CEvaluateOnSamples;
 
     lowerBound = upperBound = 0.0;
 
-    TMinusLogCdfComplement minusLogCdfComplement(weightStyles,
-                                                 samples,
-                                                 weights,
-                                                 this->isNonInformative(),
-                                                 m_Offset,
-                                                 m_GaussianMean,
-                                                 m_GaussianPrecision,
-                                                 m_GammaShape,
-                                                 m_GammaRate);
-
-    if (this->isInteger())
-    {
+    TMinusLogCdfComplement minusLogCdfComplement(
+        weightStyles, samples, weights, this->isNonInformative(), m_Offset, m_GaussianMean, m_GaussianPrecision, m_GammaShape, m_GammaRate);
+
+    if (this->isInteger()) {
         // If the data are discrete we compute the approximate expectation
         // w.r.t. to the hidden offset of the samples Z, which is uniform
         // on the interval [0,1].
         double value;
-        if (!CIntegration::logGaussLegendre(minusLogCdfComplement,
-                                                                      0.0, 1.0,
-                                                                      value))
-        {
-            LOG_ERROR("Failed computing c.d.f. complement for "
-                      << core::CContainerPrinter::print(samples));
+        if (!CIntegration::logGaussLegendre(minusLogCdfComplement, 0.0, 1.0, value)) {
+            LOG_ERROR("Failed computing c.d.f. complement for " << core::CContainerPrinter::print(samples));
             return false;
         }
 
@@ -1627,10 +1319,8 @@ bool CLogNormalMeanPrecConjugate::minusLogJointCdfComplement(const TWeightStyleV
     }
 
     double value;
-    if (!minusLogCdfComplement(0.0, value))
-    {
-        LOG_ERROR("Failed computing c.d.f complement for "
-                  << core::CContainerPrinter::print(samples));
+    if (!minusLogCdfComplement(0.0, value)) {
+        LOG_ERROR("Failed computing c.d.f complement for " << core::CContainerPrinter::print(samples));
         return false;
     }
 
@@ -1639,13 +1329,12 @@ bool CLogNormalMeanPrecConjugate::minusLogJointCdfComplement(const TWeightStyleV
 }
 
 bool CLogNormalMeanPrecConjugate::probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation,
-                                                                 const TWeightStyleVec &weightStyles,
-                                                                 const TDouble1Vec &samples,
-                                                                 const TDouble4Vec1Vec &weights,
-                                                                 double &lowerBound,
-                                                                 double &upperBound,
-                                                                 maths_t::ETail &tail) const
-{
+                                                                 const TWeightStyleVec& weightStyles,
+                                                                 const TDouble1Vec& samples,
+                                                                 const TDouble4Vec1Vec& weights,
+                                                                 double& lowerBound,
+                                                                 double& upperBound,
+                                                                 maths_t::ETail& tail) const {
     lowerBound = upperBound = 0.0;
     tail = maths_t::E_UndeterminedTail;
 
@@ -1660,16 +1349,13 @@ bool CLogNormalMeanPrecConjugate::probabilityOfLessLikelySamples(maths_t::EProba
                                                         m_GammaShape,
                                                         m_GammaRate);
 
-    if (this->isInteger())
-    {
+    if (this->isInteger()) {
         // If the data are discrete we compute the approximate expectation
         // w.r.t. to the hidden offset of the samples Z, which is uniform
         // on the interval [0,1].
         double value;
-        if (!CIntegration::gaussLegendre(probability, 0.0, 1.0, value))
-        {
-            LOG_ERROR("Failed computing probability for "
-                      << core::CContainerPrinter::print(samples));
+        if (!CIntegration::gaussLegendre(probability, 0.0, 1.0, value)) {
+            LOG_ERROR("Failed computing probability for " << core::CContainerPrinter::print(samples));
             return false;
         }
 
@@ -1680,10 +1366,8 @@ bool CLogNormalMeanPrecConjugate::probabilityOfLessLikelySamples(maths_t::EProba
     }
 
     double value;
-    if (!probability(0.0, value))
-    {
-        LOG_ERROR("Failed computing probability for "
-                  << core::CContainerPrinter::print(samples));
+    if (!probability(0.0, value)) {
+        LOG_ERROR("Failed computing probability for " << core::CContainerPrinter::print(samples));
         return false;
     }
 
@@ -1693,44 +1377,31 @@ bool CLogNormalMeanPrecConjugate::probabilityOfLessLikelySamples(maths_t::EProba
     return true;
 }
 
-bool CLogNormalMeanPrecConjugate::isNonInformative() const
-{
-    return m_GammaRate == NON_INFORMATIVE_RATE
-           || m_GaussianPrecision == NON_INFORMATIVE_PRECISION;
+bool CLogNormalMeanPrecConjugate::isNonInformative() const {
+    return m_GammaRate == NON_INFORMATIVE_RATE || m_GaussianPrecision == NON_INFORMATIVE_PRECISION;
 }
 
-void CLogNormalMeanPrecConjugate::print(const std::string &indent,
-                                        std::string &result) const
-{
+void CLogNormalMeanPrecConjugate::print(const std::string& indent, std::string& result) const {
     result += core_t::LINE_ENDING + indent + "log-normal ";
-    if (this->isNonInformative())
-    {
+    if (this->isNonInformative()) {
         result += "non-informative";
         return;
     }
 
-    double scale = std::sqrt((m_GaussianPrecision + 1.0)
-                             / m_GaussianPrecision
-                             * m_GammaRate / m_GammaShape);
-    try
-    {
+    double scale = std::sqrt((m_GaussianPrecision + 1.0) / m_GaussianPrecision * m_GammaRate / m_GammaShape);
+    try {
         boost::math::lognormal_distribution<> lognormal(m_GaussianMean, scale);
         double mean = boost::math::mean(lognormal);
         double deviation = boost::math::standard_deviation(lognormal);
-        result += "mean = " + core::CStringUtils::typeToStringPretty(mean - m_Offset)
-                 + " sd = " + core::CStringUtils::typeToStringPretty(deviation);
+        result += "mean = " + core::CStringUtils::typeToStringPretty(mean - m_Offset) +
+                  " sd = " + core::CStringUtils::typeToStringPretty(deviation);
         return;
-    }
-    catch (const std::exception &)
-    {
-    }
+    } catch (const std::exception&) {}
     result += "mean =  variance = ";
 }
 
-std::string CLogNormalMeanPrecConjugate::printJointDensityFunction() const
-{
-    if (this->isNonInformative())
-    {
+std::string CLogNormalMeanPrecConjugate::printJointDensityFunction() const {
+    if (this->isNonInformative()) {
         // The non-informative prior is improper and effectively 0 everywhere.
         return std::string();
     }
@@ -1759,8 +1430,7 @@ std::string CLogNormalMeanPrecConjugate::printJointDensityFunction() const
     std::ostringstream yCoordinates;
     xCoordinates << "x = [";
     yCoordinates << "y = [";
-    for (unsigned int i = 0u; i < POINTS; ++i, x += xIncrement, y += yIncrement)
-    {
+    for (unsigned int i = 0u; i < POINTS; ++i, x += xIncrement, y += yIncrement) {
         xCoordinates << x << " ";
         yCoordinates << y << " ";
     }
@@ -1770,17 +1440,13 @@ std::string CLogNormalMeanPrecConjugate::printJointDensityFunction() const
     std::ostringstream pdf;
     pdf << "pdf = [";
     x = xStart;
-    for (unsigned int i = 0u; i < POINTS; ++i, x += xIncrement)
-    {
+    for (unsigned int i = 0u; i < POINTS; ++i, x += xIncrement) {
         y = yStart;
-        for (unsigned int j = 0u; j < POINTS; ++j, y += yIncrement)
-        {
+        for (unsigned int j = 0u; j < POINTS; ++j, y += yIncrement) {
             double conditionalPrecision = m_GaussianPrecision * x;
-            boost::math::normal_distribution<> conditionalGaussian(m_GaussianMean,
-                                                                   1.0 / std::sqrt(conditionalPrecision));
+            boost::math::normal_distribution<> conditionalGaussian(m_GaussianMean, 1.0 / std::sqrt(conditionalPrecision));
 
-            pdf << (CTools::safePdf(gamma, x) *
-                    CTools::safePdf(conditionalGaussian, y)) << " ";
+            pdf << (CTools::safePdf(gamma, x) * CTools::safePdf(conditionalGaussian, y)) << " ";
         }
         pdf << core_t::LINE_ENDING;
     }
@@ -1789,8 +1455,7 @@ std::string CLogNormalMeanPrecConjugate::printJointDensityFunction() const
     return xCoordinates.str() + yCoordinates.str() + pdf.str();
 }
 
-uint64_t CLogNormalMeanPrecConjugate::checksum(uint64_t seed) const
-{
+uint64_t CLogNormalMeanPrecConjugate::checksum(uint64_t seed) const {
     seed = this->CPrior::checksum(seed);
     seed = CChecksum::calculate(seed, m_Offset);
     seed = CChecksum::calculate(seed, m_GaussianMean);
@@ -1799,23 +1464,19 @@ uint64_t CLogNormalMeanPrecConjugate::checksum(uint64_t seed) const
     return CChecksum::calculate(seed, m_GammaRate);
 }
 
-void CLogNormalMeanPrecConjugate::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const
-{
+void CLogNormalMeanPrecConjugate::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const {
     mem->setName("CLogNormalMeanPrecConjugate");
 }
 
-std::size_t CLogNormalMeanPrecConjugate::memoryUsage() const
-{
+std::size_t CLogNormalMeanPrecConjugate::memoryUsage() const {
     return 0;
 }
 
-std::size_t CLogNormalMeanPrecConjugate::staticSize() const
-{
+std::size_t CLogNormalMeanPrecConjugate::staticSize() const {
     return sizeof(*this);
 }
 
-void CLogNormalMeanPrecConjugate::acceptPersistInserter(core::CStatePersistInserter &inserter) const
-{
+void CLogNormalMeanPrecConjugate::acceptPersistInserter(core::CStatePersistInserter& inserter) const {
     inserter.insertValue(DECAY_RATE_TAG, this->decayRate(), core::CIEEE754::E_SinglePrecision);
     inserter.insertValue(OFFSET_TAG, m_Offset, core::CIEEE754::E_SinglePrecision);
     inserter.insertValue(GAUSSIAN_MEAN_TAG, m_GaussianMean, core::CIEEE754::E_SinglePrecision);
@@ -1825,39 +1486,28 @@ void CLogNormalMeanPrecConjugate::acceptPersistInserter(core::CStatePersistInser
     inserter.insertValue(NUMBER_SAMPLES_TAG, this->numberSamples(), core::CIEEE754::E_SinglePrecision);
 }
 
-double CLogNormalMeanPrecConjugate::normalMean() const
-{
+double CLogNormalMeanPrecConjugate::normalMean() const {
     return m_GaussianMean;
 }
 
-double CLogNormalMeanPrecConjugate::normalPrecision() const
-{
-    if (this->isNonInformative())
-    {
+double CLogNormalMeanPrecConjugate::normalPrecision() const {
+    if (this->isNonInformative()) {
         return 0.0;
     }
 
-    try
-    {
+    try {
         boost::math::gamma_distribution<> gamma(m_GammaShape, 1.0 / m_GammaRate);
         return boost::math::mean(gamma);
-    }
-    catch (std::exception &e)
-    {
-        LOG_ERROR("Failed to create prior: " << e.what()
-                  << " shape = " << m_GammaShape << ", rate = " << m_GammaRate);
+    } catch (std::exception& e) {
+        LOG_ERROR("Failed to create prior: " << e.what() << " shape = " << m_GammaShape << ", rate = " << m_GammaRate);
     }
 
     return 0.0;
 }
 
-CLogNormalMeanPrecConjugate::TDoubleDoublePr
-CLogNormalMeanPrecConjugate::confidenceIntervalNormalMean(double percentage) const
-{
-    if (this->isNonInformative())
-    {
-        return std::make_pair(boost::numeric::bounds::lowest(),
-                              boost::numeric::bounds::highest());
+CLogNormalMeanPrecConjugate::TDoubleDoublePr CLogNormalMeanPrecConjugate::confidenceIntervalNormalMean(double percentage) const {
+    if (this->isNonInformative()) {
+        return std::make_pair(boost::numeric::bounds::lowest(), boost::numeric::bounds::highest());
     }
 
     // Compute the symmetric confidence interval around the median of the
@@ -1895,13 +1545,9 @@ CLogNormalMeanPrecConjugate::confidenceIntervalNormalMean(double percentage) con
     return std::make_pair(xLower, xUpper);
 }
 
-CLogNormalMeanPrecConjugate::TDoubleDoublePr
-CLogNormalMeanPrecConjugate::confidenceIntervalNormalPrecision(double percentage) const
-{
-    if (this->isNonInformative())
-    {
-        return std::make_pair(boost::numeric::bounds::lowest(),
-                              boost::numeric::bounds::highest());
+CLogNormalMeanPrecConjugate::TDoubleDoublePr CLogNormalMeanPrecConjugate::confidenceIntervalNormalPrecision(double percentage) const {
+    if (this->isNonInformative()) {
+        return std::make_pair(boost::numeric::bounds::lowest(), boost::numeric::bounds::highest());
     }
 
     percentage /= 100.0;
@@ -1911,27 +1557,18 @@ CLogNormalMeanPrecConjugate::confidenceIntervalNormalPrecision(double percentage
     // The marginal prior distribution for the precision is gamma.
     boost::math::gamma_distribution<> gamma(m_GammaShape, 1.0 / m_GammaRate);
 
-    return std::make_pair(boost::math::quantile(gamma, lowerPercentile),
-                           boost::math::quantile(gamma, upperPercentile));
+    return std::make_pair(boost::math::quantile(gamma, lowerPercentile), boost::math::quantile(gamma, upperPercentile));
 }
 
-bool CLogNormalMeanPrecConjugate::equalTolerance(const CLogNormalMeanPrecConjugate &rhs,
-                                                 const TEqualWithTolerance &equal) const
-{
-    LOG_DEBUG(m_GaussianMean << " " << rhs.m_GaussianMean << ", "
-              << m_GaussianPrecision << " " << rhs.m_GaussianPrecision << ", "
-              << m_GammaShape << " " << rhs.m_GammaShape << ", "
-              << m_GammaRate << " " << rhs.m_GammaRate);
-    return    equal(m_GaussianMean, rhs.m_GaussianMean)
-           && equal(m_GaussianPrecision, rhs.m_GaussianPrecision)
-           && equal(m_GammaShape, rhs.m_GammaShape)
-           && equal(m_GammaRate, rhs.m_GammaRate);
+bool CLogNormalMeanPrecConjugate::equalTolerance(const CLogNormalMeanPrecConjugate& rhs, const TEqualWithTolerance& equal) const {
+    LOG_DEBUG(m_GaussianMean << " " << rhs.m_GaussianMean << ", " << m_GaussianPrecision << " " << rhs.m_GaussianPrecision << ", "
+                             << m_GammaShape << " " << rhs.m_GammaShape << ", " << m_GammaRate << " " << rhs.m_GammaRate);
+    return equal(m_GaussianMean, rhs.m_GaussianMean) && equal(m_GaussianPrecision, rhs.m_GaussianPrecision) &&
+           equal(m_GammaShape, rhs.m_GammaShape) && equal(m_GammaRate, rhs.m_GammaRate);
 }
 
-double CLogNormalMeanPrecConjugate::mean() const
-{
-    if (this->isNonInformative())
-    {
+double CLogNormalMeanPrecConjugate::mean() const {
+    if (this->isNonInformative()) {
         return std::exp(m_GaussianMean) - m_Offset;
     }
 
@@ -1954,50 +1591,30 @@ double CLogNormalMeanPrecConjugate::mean() const
     //
     // when it is narrow.
 
-    if (m_GammaShape < MINIMUM_LOGNORMAL_SHAPE)
-    {
-        try
-        {
-            detail::CMeanKernel f(m_GaussianMean,
-                                  m_GaussianPrecision,
-                                  m_GammaShape,
-                                  m_GammaRate);
+    if (m_GammaShape < MINIMUM_LOGNORMAL_SHAPE) {
+        try {
+            detail::CMeanKernel f(m_GaussianMean, m_GaussianPrecision, m_GammaShape, m_GammaRate);
             boost::math::gamma_distribution<> gamma(m_GammaShape, 1.0 / m_GammaRate);
             double a = boost::math::quantile(gamma, 0.1);
             double b = boost::math::quantile(gamma, 0.9);
             detail::CMeanKernel::TValue result;
-            if (CIntegration::gaussLegendre(f, a, b, result))
-            {
+            if (CIntegration::gaussLegendre(f, a, b, result)) {
                 return result(0) / result(1) - m_Offset;
             }
-        }
-        catch (const std::exception &e)
-        {
-            LOG_ERROR("Failed to calculate mean: " << e.what());
-        }
+        } catch (const std::exception& e) { LOG_ERROR("Failed to calculate mean: " << e.what()); }
     }
-    return std::exp(m_GaussianMean + 0.5 * m_GammaRate / m_GammaShape
-                                         * (1.0 / m_GaussianPrecision + 1.0)) - m_Offset;
+    return std::exp(m_GaussianMean + 0.5 * m_GammaRate / m_GammaShape * (1.0 / m_GaussianPrecision + 1.0)) - m_Offset;
 }
 
-bool CLogNormalMeanPrecConjugate::isBad() const
-{
-    return    !CMathsFuncs::isFinite(m_Offset)
-           || !CMathsFuncs::isFinite(m_GaussianMean)
-           || !CMathsFuncs::isFinite(m_GaussianPrecision)
-           || !CMathsFuncs::isFinite(m_GammaShape)
-           || !CMathsFuncs::isFinite(m_GammaRate);
+bool CLogNormalMeanPrecConjugate::isBad() const {
+    return !CMathsFuncs::isFinite(m_Offset) || !CMathsFuncs::isFinite(m_GaussianMean) || !CMathsFuncs::isFinite(m_GaussianPrecision) ||
+           !CMathsFuncs::isFinite(m_GammaShape) || !CMathsFuncs::isFinite(m_GammaRate);
 }
 
-std::string CLogNormalMeanPrecConjugate::debug() const
-{
+std::string CLogNormalMeanPrecConjugate::debug() const {
     std::ostringstream result;
-    result << std::scientific << std::setprecision(15)
-           << m_Offset << " "
-           << m_GaussianMean << " "
-           << m_GaussianMean << " "
-           << m_GammaShape << " "
-           << m_GammaRate;
+    result << std::scientific << std::setprecision(15) << m_Offset << " " << m_GaussianMean << " " << m_GaussianMean << " " << m_GammaShape
+           << " " << m_GammaRate;
     return result.str();
 }
 
@@ -2005,7 +1622,5 @@ const double CLogNormalMeanPrecConjugate::NON_INFORMATIVE_MEAN = 0.0;
 const double CLogNormalMeanPrecConjugate::NON_INFORMATIVE_PRECISION = 0.0;
 const double CLogNormalMeanPrecConjugate::NON_INFORMATIVE_SHAPE = 1.0;
 const double CLogNormalMeanPrecConjugate::NON_INFORMATIVE_RATE = 0.0;
-
 }
 }
-
diff --git a/lib/maths/CLogTDistribution.cc b/lib/maths/CLogTDistribution.cc
index aecb452e74..fab6db2751 100644
--- a/lib/maths/CLogTDistribution.cc
+++ b/lib/maths/CLogTDistribution.cc
@@ -14,51 +14,37 @@
 
 #include 
 
-namespace ml
-{
-namespace maths
-{
+namespace ml {
+namespace maths {
 
-namespace
-{
+namespace {
 
-inline double square(double x)
-{
+inline double square(double x) {
     return x * x;
 }
-
 }
 
-CLogTDistribution::CLogTDistribution(double degreesFreedom,
-                                     double location,
-                                     double scale) :
-        m_DegreesFreedom(degreesFreedom),
-        m_Location(location),
-        m_Scale(scale)
-{}
+CLogTDistribution::CLogTDistribution(double degreesFreedom, double location, double scale)
+    : m_DegreesFreedom(degreesFreedom), m_Location(location), m_Scale(scale) {
+}
 
-double CLogTDistribution::degreesFreedom() const
-{
+double CLogTDistribution::degreesFreedom() const {
     return m_DegreesFreedom;
 }
 
-double CLogTDistribution::location() const
-{
+double CLogTDistribution::location() const {
     return m_Location;
 }
 
-double CLogTDistribution::scale() const
-{
+double CLogTDistribution::scale() const {
     return m_Scale;
 }
 
-CLogTDistribution::TDoubleDoublePr support(const CLogTDistribution &/*distribution*/)
-{
+CLogTDistribution::TDoubleDoublePr support(const CLogTDistribution& /*distribution*/) {
     return CLogTDistribution::TDoubleDoublePr(0.0, boost::numeric::bounds::highest());
 }
 
-double mode(const CLogTDistribution &distribution)
-{
+double mode(const CLogTDistribution& distribution) {
     // The mode of a log t distribution is found by taking the derivative
     // of the p.d.f. In particular,
     //   f(x) ~ 1 / x * (1 + 1 / (n * s^2) * (log(x) - m)^2) ^ -((n+1)/2)
@@ -90,20 +76,16 @@ double mode(const CLogTDistribution &distribution)
     double degreesFreedom = distribution.degreesFreedom();
     double squareScale = square(distribution.scale());
 
-    if (square(degreesFreedom + 1.0) < 4.0 * degreesFreedom * squareScale)
-    {
+    if (square(degreesFreedom + 1.0) < 4.0 * degreesFreedom * squareScale) {
         return 0.0;
     }
 
     double location = distribution.location();
 
-    return std::exp(location - (degreesFreedom + 1.0) / 2.0
-                          + std::sqrt(square(degreesFreedom + 1.0) / 4.0
-                                   - degreesFreedom * squareScale));
+    return std::exp(location - (degreesFreedom + 1.0) / 2.0 + std::sqrt(square(degreesFreedom + 1.0) / 4.0 - degreesFreedom * squareScale));
 }
 
-CLogTDistribution::TOptionalDouble localMinimum(const CLogTDistribution &distribution)
-{
+CLogTDistribution::TOptionalDouble localMinimum(const CLogTDistribution& distribution) {
     // The distribution has a local minimum at:
     //   x = exp(m - (n+1) / 2 - ((n+1)^2 / 4 - n*s^2) ^ (1/2))
     //
@@ -115,20 +97,16 @@ CLogTDistribution::TOptionalDouble localMinimum(const CLogTDistribution &distrib
     double degreesFreedom = distribution.degreesFreedom();
     double squareScale = square(distribution.scale());
 
-    if (square(degreesFreedom + 1.0) < 4.0 * degreesFreedom * squareScale)
-    {
+    if (square(degreesFreedom + 1.0) < 4.0 * degreesFreedom * squareScale) {
         return CLogTDistribution::TOptionalDouble();
     }
 
     double location = distribution.location();
 
-    return std::exp(location - (degreesFreedom + 1.0) / 2.0
-                          - std::sqrt(square(degreesFreedom + 1.0) / 4.0
-                                   - degreesFreedom * squareScale));
+    return std::exp(location - (degreesFreedom + 1.0) / 2.0 - std::sqrt(square(degreesFreedom + 1.0) / 4.0 - degreesFreedom * squareScale));
 }
 
-double pdf(const CLogTDistribution &distribution, double x)
-{
+double pdf(const CLogTDistribution& distribution, double x) {
     // It can be shown that the p.d.f. is related to the student's t
     // p.d.f. by:
     //   f(x) = 1 / (s * x) * f((log(x) - m) / s | n)
@@ -138,12 +116,9 @@ double pdf(const CLogTDistribution &distribution, double x)
     //   s is the scale and
     //   m is the location.
 
-    if (x < 0.0)
-    {
+    if (x < 0.0) {
         return 0.0;
-    }
-    else if (x == 0.0)
-    {
+    } else if (x == 0.0) {
         // In limit x tends down to 0 it can be shown that the density
         // function tends to:
         //   f(x) = f(e^l) * (v^(1/2) * s)^(v+1) / (y * log(y)^(v+1))
@@ -167,8 +142,7 @@ double pdf(const CLogTDistribution &distribution, double x)
     return CTools::safePdf(students, value) / scale / x;
 }
 
-double cdf(const CLogTDistribution &distribution, double x)
-{
+double cdf(const CLogTDistribution& distribution, double x) {
     // It can be shown that the c.d.f. is related to the student's t
     // c.d.f. by:
     //   F(x) = F((log(x) - m) / s | n)
@@ -178,13 +152,10 @@ double cdf(const CLogTDistribution &distribution, double x)
     //   s is the scale and
     //   m is the location.
 
-    if (CMathsFuncs::isNan(x))
-    {
+    if (CMathsFuncs::isNan(x)) {
         LOG_ERROR("Bad argument x = " << x);
         return 0.0;
-    }
-    else if (x <= 0.0)
-    {
+    } else if (x <= 0.0) {
         return 0.0;
     }
 
@@ -198,18 +169,14 @@ double cdf(const CLogTDistribution &distribution, double x)
     return CTools::safeCdf(students, value);
 }
 
-double cdfComplement(const CLogTDistribution &distribution, double x)
-{
+double cdfComplement(const CLogTDistribution& distribution, double x) {
     // This is just 1 - F(x) but uses boost::math::complement to
     // avoid cancellation errors.
 
-    if (CMathsFuncs::isNan(x))
-    {
+    if (CMathsFuncs::isNan(x)) {
         LOG_ERROR("Bad argument x = " << x);
         return 0.0;
-    }
-    else if (x <= 0.0)
-    {
+    } else if (x <= 0.0) {
         return 1.0;
     }
 
@@ -223,8 +190,7 @@ double cdfComplement(const CLogTDistribution &distribution, double x)
     return CTools::safeCdfComplement(students, value);
 }
 
-double quantile(const CLogTDistribution &distribution, double q)
-{
+double quantile(const CLogTDistribution& distribution, double q) {
     // The distribution describes X = exp(s * Y + m) where Y is student's
     // t. This implies that the quantile's are obtained from the student's t
     // distribution by the transformation x_q = exp(s * y_q + m).
@@ -237,6 +203,5 @@ double quantile(const CLogTDistribution &distribution, double q)
     double location = distribution.location();
     return std::exp(scale * y_q + location);
 }
-
 }
 }
diff --git a/lib/maths/CMathsFuncs.cc b/lib/maths/CMathsFuncs.cc
index 5c3649e83d..9df66f3d48 100644
--- a/lib/maths/CMathsFuncs.cc
+++ b/lib/maths/CMathsFuncs.cc
@@ -18,96 +18,71 @@
 #undef isinf
 #endif
 
-namespace ml
-{
-namespace maths
-{
+namespace ml {
+namespace maths {
 
-bool CMathsFuncs::isNan(double val)
-{
+bool CMathsFuncs::isNan(double val) {
     return boost::math::isnan(val);
 }
-bool CMathsFuncs::isNan(const CSymmetricMatrix &val)
-{
+bool CMathsFuncs::isNan(const CSymmetricMatrix& val) {
     return anElement(static_cast(&isNan), val);
 }
-bool CMathsFuncs::isNan(const CVector &val)
-{
+bool CMathsFuncs::isNan(const CVector& val) {
     return aComponent(static_cast(&isNan), val);
 }
-bool CMathsFuncs::isNan(const core::CSmallVectorBase &val)
-{
-    for (std::size_t i = 0u; i < val.size(); ++i)
-    {
-        if (isNan(val[i]))
-        {
+bool CMathsFuncs::isNan(const core::CSmallVectorBase& val) {
+    for (std::size_t i = 0u; i < val.size(); ++i) {
+        if (isNan(val[i])) {
             return true;
         }
     }
     return false;
 }
 
-bool CMathsFuncs::isInf(double val)
-{
+bool CMathsFuncs::isInf(double val) {
     return boost::math::isinf(val);
 }
-bool CMathsFuncs::isInf(const CVector &val)
-{
+bool CMathsFuncs::isInf(const CVector& val) {
     return aComponent(static_cast(&isInf), val);
 }
-bool CMathsFuncs::isInf(const CSymmetricMatrix &val)
-{
+bool CMathsFuncs::isInf(const CSymmetricMatrix& val) {
     return anElement(static_cast(&isInf), val);
 }
-bool CMathsFuncs::isInf(const core::CSmallVectorBase &val)
-{
-    for (std::size_t i = 0u; i < val.size(); ++i)
-    {
-        if (isInf(val[i]))
-        {
+bool CMathsFuncs::isInf(const core::CSmallVectorBase& val) {
+    for (std::size_t i = 0u; i < val.size(); ++i) {
+        if (isInf(val[i])) {
             return true;
         }
     }
     return false;
 }
 
-bool CMathsFuncs::isFinite(double val)
-{
+bool CMathsFuncs::isFinite(double val) {
     return boost::math::isfinite(val);
 }
-bool CMathsFuncs::isFinite(const CVector &val)
-{
+bool CMathsFuncs::isFinite(const CVector& val) {
     return everyComponent(static_cast(&isFinite), val);
 }
-bool CMathsFuncs::isFinite(const CSymmetricMatrix &val)
-{
+bool CMathsFuncs::isFinite(const CSymmetricMatrix& val) {
     return everyElement(static_cast(&isFinite), val);
 }
-bool CMathsFuncs::isFinite(const core::CSmallVectorBase &val)
-{
-    for (std::size_t i = 0u; i < val.size(); ++i)
-    {
-        if (!isFinite(val[i]))
-        {
+bool CMathsFuncs::isFinite(const core::CSmallVectorBase& val) {
+    for (std::size_t i = 0u; i < val.size(); ++i) {
+        if (!isFinite(val[i])) {
             return false;
         }
     }
     return true;
 }
 
-maths_t::EFloatingPointErrorStatus CMathsFuncs::fpStatus(double val)
-{
-    if (isNan(val))
-    {
+maths_t::EFloatingPointErrorStatus CMathsFuncs::fpStatus(double val) {
+    if (isNan(val)) {
         return maths_t::E_FpFailed;
     }
-    if (isInf(val))
-    {
+    if (isInf(val)) {
         return maths_t::E_FpOverflowed;
     }
     return maths_t::E_FpNoErrors;
 }
-
 }
 }
-
diff --git a/lib/maths/CMixtureDistribution.cc b/lib/maths/CMixtureDistribution.cc
index 54bc73529f..8d419c306e 100644
--- a/lib/maths/CMixtureDistribution.cc
+++ b/lib/maths/CMixtureDistribution.cc
@@ -8,203 +8,155 @@
 
 #include 
 
-namespace ml
-{
-namespace maths
-{
-namespace
-{
+namespace ml {
+namespace maths {
+namespace {
 
 using TDoubleDoublePr = std::pair;
 
 //! brief Invokes the support function on a distribution.
-struct SSupport
-{
+struct SSupport {
     template
-    TDoubleDoublePr operator()(const DISTRIBUTION &distribution) const
-    {
+    TDoubleDoublePr operator()(const DISTRIBUTION& distribution) const {
         return support(distribution);
     }
 };
 
 //! \brief Invokes the mode function on a distribution.
-struct SMode
-{
+struct SMode {
     template
-    double operator()(const DISTRIBUTION &distribution) const
-    {
+    double operator()(const DISTRIBUTION& distribution) const {
         return mode(distribution);
     }
 };
 
 //! \brief Invokes the mode function on a distribution.
-struct SMean
-{
+struct SMean {
     template
-    double operator()(const DISTRIBUTION &distribution) const
-    {
+    double operator()(const DISTRIBUTION& distribution) const {
         return mean(distribution);
     }
 };
 
 //! \brief Invokes CTools::safePdf on a distribution.
-struct SPdf
-{
+struct SPdf {
     template
-    double operator()(const DISTRIBUTION &distribution, double x) const
-    {
+    double operator()(const DISTRIBUTION& distribution, double x) const {
         return CTools::safePdf(distribution, x);
     }
 };
 
 //! \brief Invokes CTools::safeCdf on a distribution.
-struct SCdf
-{
+struct SCdf {
     template
-    double operator()(const DISTRIBUTION &distribution, double x) const
-    {
+    double operator()(const DISTRIBUTION& distribution, double x) const {
         return CTools::safeCdf(distribution, x);
     }
 };
 
 //! \brief Invokes CTools::safeCdfComplement on a distribution.
-struct SCdfComplement
-{
+struct SCdfComplement {
     template
-    double operator()(const DISTRIBUTION &distribution, double x) const
-    {
+    double operator()(const DISTRIBUTION& distribution, double x) const {
         return CTools::safeCdfComplement(distribution, x);
     }
 };
 
 //! \brief Invokes the quantile function on a distribution.
-struct SQuantile
-{
+struct SQuantile {
     template
-    double operator()(const DISTRIBUTION &distribution, double x) const
-    {
+    double operator()(const DISTRIBUTION& distribution, double x) const {
         return quantile(distribution, x);
     }
 };
 
 //! \brief Invokes a specified binary action on a distribution.
 template
-class CUnaryVisitor
-{
-    public:
-        using result_type = RESULT;
-
-    public:
-        template
-        RESULT operator()(const DISTRIBUTION &distribution) const
-        {
-            return action(distribution);
-        }
-
-    private:
-        VISITOR_ACTION action;
+class CUnaryVisitor {
+public:
+    using result_type = RESULT;
+
+public:
+    template
+    RESULT operator()(const DISTRIBUTION& distribution) const {
+        return action(distribution);
+    }
+
+private:
+    VISITOR_ACTION action;
 };
 
 //! \brief Invokes a specified binary action on a distribution.
 template
-class CBinaryVisitor
-{
-    public:
-        using result_type = RESULT;
-
-    public:
-        template
-        RESULT operator()(const DISTRIBUTION &distribution, double x) const
-        {
-            return action(distribution, x);
-        }
-
-    private:
-        VISITOR_ACTION action;
-};
+class CBinaryVisitor {
+public:
+    using result_type = RESULT;
 
-}
+public:
+    template
+    RESULT operator()(const DISTRIBUTION& distribution, double x) const {
+        return action(distribution, x);
+    }
 
-namespace mixture_detail
-{
+private:
+    VISITOR_ACTION action;
+};
+}
 
-CMixtureModeImpl::CMixtureModeImpl(const boost::math::normal_distribution<> &normal) :
-        m_Distribution(normal)
-{
+namespace mixture_detail {
 
+CMixtureModeImpl::CMixtureModeImpl(const boost::math::normal_distribution<>& normal) : m_Distribution(normal) {
 }
 
-CMixtureModeImpl::CMixtureModeImpl(const boost::math::gamma_distribution<> &gamma) :
-        m_Distribution(gamma)
-{
+CMixtureModeImpl::CMixtureModeImpl(const boost::math::gamma_distribution<>& gamma) : m_Distribution(gamma) {
 }
 
-CMixtureModeImpl::CMixtureModeImpl(const boost::math::lognormal_distribution<> &lognormal) :
-        m_Distribution(lognormal)
-{
+CMixtureModeImpl::CMixtureModeImpl(const boost::math::lognormal_distribution<>& lognormal) : m_Distribution(lognormal) {
 }
-
 }
 
-CMixtureMode::CMixtureMode(const boost::math::normal_distribution<> &normal) :
-        mixture_detail::CMixtureModeImpl(normal)
-{
+CMixtureMode::CMixtureMode(const boost::math::normal_distribution<>& normal) : mixture_detail::CMixtureModeImpl(normal) {
 }
 
-CMixtureMode::CMixtureMode(const boost::math::gamma_distribution<> &gamma) :
-        mixture_detail::CMixtureModeImpl(gamma)
-{
+CMixtureMode::CMixtureMode(const boost::math::gamma_distribution<>& gamma) : mixture_detail::CMixtureModeImpl(gamma) {
 }
 
-CMixtureMode::CMixtureMode(const boost::math::lognormal_distribution<> &lognormal) :
-        mixture_detail::CMixtureModeImpl(lognormal)
-{
+CMixtureMode::CMixtureMode(const boost::math::lognormal_distribution<>& lognormal) : mixture_detail::CMixtureModeImpl(lognormal) {
 }
 
-CMixtureMode::CMixtureMode(const CMixtureMode &other) :
-        mixture_detail::CMixtureModeImpl(other)
-{
+CMixtureMode::CMixtureMode(const CMixtureMode& other) : mixture_detail::CMixtureModeImpl(other) {
 }
 
-mixture_detail::TDoubleDoublePr support(const CMixtureMode &mode)
-{
+mixture_detail::TDoubleDoublePr support(const CMixtureMode& mode) {
     return mode.visit(CUnaryVisitor());
 }
 
-double mode(const CMixtureMode &mode)
-{
+double mode(const CMixtureMode& mode) {
     return mode.visit(CUnaryVisitor());
 }
 
-double mean(const CMixtureMode &mode)
-{
+double mean(const CMixtureMode& mode) {
     return mode.visit(CUnaryVisitor());
 }
 
-double pdf(const CMixtureMode &mode, double x)
-{
+double pdf(const CMixtureMode& mode, double x) {
     return mode.visit(CBinaryVisitor(), x);
 }
 
-double cdf(const CMixtureMode &mode, double x)
-{
+double cdf(const CMixtureMode& mode, double x) {
     return mode.visit(CBinaryVisitor(), x);
 }
 
-double cdf(const CMixtureMode &mode, double x)
-{
+double cdf(const CMixtureMode& mode, double x) {
     return mode.visit(CBinaryVisitor(), x);
 }
 
-double quantile(const CMixtureMode &mode, double x)
-{
+double quantile(const CMixtureMode& mode, double x) {
     return mode.visit(CBinaryVisitor(), x);
 }
 
-CMixtureMode complement(const CMixtureMode &mode)
-{
+CMixtureMode complement(const CMixtureMode& mode) {
     return CMixtureMode(mode);
 }
-
 }
 }
diff --git a/lib/maths/CModel.cc b/lib/maths/CModel.cc
index e5bcba723b..88bae64e8d 100644
--- a/lib/maths/CModel.cc
+++ b/lib/maths/CModel.cc
@@ -18,27 +18,20 @@
 #include 
 #include 
 
-namespace ml
-{
-namespace maths
-{
-namespace
-{
+namespace ml {
+namespace maths {
+namespace {
 
 using TDouble2Vec = core::CSmallVector;
 
 //! Check if all the elements of \p lhs are less than or equal to the \p rhs.
-bool lessThanEqual(const TDouble2Vec &lhs, double rhs)
-{
-    return std::find_if(lhs.begin(), lhs.end(),
-                        [rhs](double lhs_) { return lhs_ > rhs; }) == lhs.end();
+bool lessThanEqual(const TDouble2Vec& lhs, double rhs) {
+    return std::find_if(lhs.begin(), lhs.end(), [rhs](double lhs_) { return lhs_ > rhs; }) == lhs.end();
 }
 
 //! Check if all the elements of \p lhs are less than or equal to the \p rhs.
-bool greaterThanEqual(const TDouble2Vec &lhs, double rhs)
-{
-    return std::find_if(lhs.begin(), lhs.end(),
-                        [rhs](double lhs_) { return lhs_ < rhs; }) == lhs.end();
+bool greaterThanEqual(const TDouble2Vec& lhs, double rhs) {
+    return std::find_if(lhs.begin(), lhs.end(), [rhs](double lhs_) { return lhs_ < rhs; }) == lhs.end();
 }
 
 //! Get the correction to apply to the one-sided probability calculations.
@@ -47,30 +40,25 @@ bool greaterThanEqual(const TDouble2Vec &lhs, double rhs)
 //! if the calculation is one sided below (above) we need to add on twice
 //! the probability of zero if the actual feature value is greater (less)
 //! than zero.
-double oneSidedEmptyBucketCorrection(maths_t::EProbabilityCalculation calculation,
-                                     const TDouble2Vec &value,
-                                     double probabilityEmptyBucket)
-{
-    switch (calculation)
-    {
+double
+oneSidedEmptyBucketCorrection(maths_t::EProbabilityCalculation calculation, const TDouble2Vec& value, double probabilityEmptyBucket) {
+    switch (calculation) {
     case maths_t::E_OneSidedBelow:
         return greaterThanEqual(value, 0.0) ? 2.0 * probabilityEmptyBucket : 0.0;
     case maths_t::E_OneSidedAbove:
-        return lessThanEqual(value, 0.0)    ? 2.0 * probabilityEmptyBucket : 0.0;
+        return lessThanEqual(value, 0.0) ? 2.0 * probabilityEmptyBucket : 0.0;
     case maths_t::E_TwoSided:
         break;
     }
     return 0.0;
 }
 
-const double EFFECTIVE_COUNT[]{ 1.0, 0.8, 0.7, 0.65, 0.6, 0.57, 0.54, 0.52, 0.51 };
+const double EFFECTIVE_COUNT[]{1.0, 0.8, 0.7, 0.65, 0.6, 0.57, 0.54, 0.52, 0.51};
 
 //! Get the parameters for the stub model.
-CModelParams stubParameters()
-{
+CModelParams stubParameters() {
     return CModelParams{0, 1.0, 0.0, 0.0, 6 * core::constants::HOUR, core::constants::DAY};
 }
-
 }
 
 CModelParams::CModelParams(core_t::TTime bucketLength,
@@ -78,304 +66,252 @@ CModelParams::CModelParams(core_t::TTime bucketLength,
                            double decayRate,
                            double minimumSeasonalVarianceScale,
                            core_t::TTime minimumTimeToDetectChange,
-                           core_t::TTime maximumTimeToTestForChange) :
-        m_BucketLength(bucketLength),
-        m_LearnRate(learnRate),
-        m_DecayRate(decayRate),
-        m_MinimumSeasonalVarianceScale(minimumSeasonalVarianceScale),
-        m_MinimumTimeToDetectChange(std::max(minimumTimeToDetectChange, 12 * bucketLength)),
-        m_MaximumTimeToTestForChange(std::max(maximumTimeToTestForChange, 48 * bucketLength)),
-        m_ProbabilityBucketEmpty(0.0)
-{}
-
-core_t::TTime CModelParams::bucketLength() const
-{
+                           core_t::TTime maximumTimeToTestForChange)
+    : m_BucketLength(bucketLength),
+      m_LearnRate(learnRate),
+      m_DecayRate(decayRate),
+      m_MinimumSeasonalVarianceScale(minimumSeasonalVarianceScale),
+      m_MinimumTimeToDetectChange(std::max(minimumTimeToDetectChange, 12 * bucketLength)),
+      m_MaximumTimeToTestForChange(std::max(maximumTimeToTestForChange, 48 * bucketLength)),
+      m_ProbabilityBucketEmpty(0.0) {
+}
+
+core_t::TTime CModelParams::bucketLength() const {
     return m_BucketLength;
 }
 
-double CModelParams::learnRate() const
-{
+double CModelParams::learnRate() const {
     return m_LearnRate;
 }
 
-double CModelParams::decayRate() const
-{
+double CModelParams::decayRate() const {
     return m_DecayRate;
 }
 
-double CModelParams::averagingDecayRate() const
-{
+double CModelParams::averagingDecayRate() const {
     return 5.0 * m_DecayRate;
 }
 
-double CModelParams::minimumSeasonalVarianceScale() const
-{
+double CModelParams::minimumSeasonalVarianceScale() const {
     return m_MinimumSeasonalVarianceScale;
 }
 
-bool CModelParams::testForChange(core_t::TTime changeInterval) const
-{
+bool CModelParams::testForChange(core_t::TTime changeInterval) const {
     return changeInterval >= std::max(3 * m_BucketLength, 10 * core::constants::MINUTE);
 }
 
-core_t::TTime CModelParams::minimumTimeToDetectChange(void) const
-{
+core_t::TTime CModelParams::minimumTimeToDetectChange(void) const {
     return m_MinimumTimeToDetectChange;
 }
 
-core_t::TTime CModelParams::maximumTimeToTestForChange(void) const
-{
+core_t::TTime CModelParams::maximumTimeToTestForChange(void) const {
     return m_MaximumTimeToTestForChange;
 }
 
-void CModelParams::probabilityBucketEmpty(double probability)
-{
+void CModelParams::probabilityBucketEmpty(double probability) {
     m_ProbabilityBucketEmpty = probability;
 }
 
-double CModelParams::probabilityBucketEmpty() const
-{
+double CModelParams::probabilityBucketEmpty() const {
     return m_ProbabilityBucketEmpty;
 }
 
+CModelAddSamplesParams::CModelAddSamplesParams()
+    : m_Type(maths_t::E_MixedData),
+      m_IsNonNegative(false),
+      m_PropagationInterval(1.0),
+      m_WeightStyles(0),
+      m_TrendWeights(0),
+      m_PriorWeights(0) {
+}
 
-CModelAddSamplesParams::CModelAddSamplesParams() :
-        m_Type(maths_t::E_MixedData),
-        m_IsNonNegative(false),
-        m_PropagationInterval(1.0),
-        m_WeightStyles(0),
-        m_TrendWeights(0),
-        m_PriorWeights(0)
-{}
-
-CModelAddSamplesParams &CModelAddSamplesParams::integer(bool integer)
-{
+CModelAddSamplesParams& CModelAddSamplesParams::integer(bool integer) {
     m_Type = integer ? maths_t::E_IntegerData : maths_t::E_ContinuousData;
     return *this;
 }
 
-maths_t::EDataType CModelAddSamplesParams::type() const
-{
+maths_t::EDataType CModelAddSamplesParams::type() const {
     return m_Type;
 }
 
-CModelAddSamplesParams &CModelAddSamplesParams::nonNegative(bool nonNegative)
-{
+CModelAddSamplesParams& CModelAddSamplesParams::nonNegative(bool nonNegative) {
     m_IsNonNegative = nonNegative;
     return *this;
 }
 
-bool CModelAddSamplesParams::isNonNegative() const
-{
+bool CModelAddSamplesParams::isNonNegative() const {
     return m_IsNonNegative;
 }
 
-CModelAddSamplesParams &CModelAddSamplesParams::propagationInterval(double interval)
-{
+CModelAddSamplesParams& CModelAddSamplesParams::propagationInterval(double interval) {
     m_PropagationInterval = interval;
     return *this;
 }
 
-double CModelAddSamplesParams::propagationInterval() const
-{
+double CModelAddSamplesParams::propagationInterval() const {
     return m_PropagationInterval;
 }
 
-CModelAddSamplesParams &CModelAddSamplesParams::weightStyles(const maths_t::TWeightStyleVec &styles)
-{
+CModelAddSamplesParams& CModelAddSamplesParams::weightStyles(const maths_t::TWeightStyleVec& styles) {
     m_WeightStyles = &styles;
     return *this;
 }
 
-const maths_t::TWeightStyleVec &CModelAddSamplesParams::weightStyles() const
-{
+const maths_t::TWeightStyleVec& CModelAddSamplesParams::weightStyles() const {
     return *m_WeightStyles;
 }
 
-CModelAddSamplesParams &CModelAddSamplesParams::trendWeights(const TDouble2Vec4VecVec &weights)
-{
+CModelAddSamplesParams& CModelAddSamplesParams::trendWeights(const TDouble2Vec4VecVec& weights) {
     m_TrendWeights = &weights;
     return *this;
 }
 
-const CModelAddSamplesParams::TDouble2Vec4VecVec &CModelAddSamplesParams::trendWeights() const
-{
+const CModelAddSamplesParams::TDouble2Vec4VecVec& CModelAddSamplesParams::trendWeights() const {
     return *m_TrendWeights;
 }
 
-CModelAddSamplesParams &CModelAddSamplesParams::priorWeights(const TDouble2Vec4VecVec &weights)
-{
+CModelAddSamplesParams& CModelAddSamplesParams::priorWeights(const TDouble2Vec4VecVec& weights) {
     m_PriorWeights = &weights;
     return *this;
 }
 
-const CModelAddSamplesParams::TDouble2Vec4VecVec &CModelAddSamplesParams::priorWeights() const
-{
+const CModelAddSamplesParams::TDouble2Vec4VecVec& CModelAddSamplesParams::priorWeights() const {
     return *m_PriorWeights;
 }
 
+CModelProbabilityParams::CModelProbabilityParams()
+    : m_Tag(0), m_SeasonalConfidenceInterval(DEFAULT_SEASONAL_CONFIDENCE_INTERVAL), m_WeightStyles(0), m_UpdateAnomalyModel(true) {
+}
 
-CModelProbabilityParams::CModelProbabilityParams() :
-        m_Tag(0),
-        m_SeasonalConfidenceInterval(DEFAULT_SEASONAL_CONFIDENCE_INTERVAL),
-        m_WeightStyles(0),
-        m_UpdateAnomalyModel(true)
-{}
-
-CModelProbabilityParams &CModelProbabilityParams::tag(std::size_t tag)
-{
+CModelProbabilityParams& CModelProbabilityParams::tag(std::size_t tag) {
     m_Tag = tag;
     return *this;
 }
 
-std::size_t CModelProbabilityParams::tag() const
-{
+std::size_t CModelProbabilityParams::tag() const {
     return m_Tag;
 }
 
-CModelProbabilityParams &CModelProbabilityParams::addCalculation(maths_t::EProbabilityCalculation calculation)
-{
+CModelProbabilityParams& CModelProbabilityParams::addCalculation(maths_t::EProbabilityCalculation calculation) {
     m_Calculations.push_back(calculation);
     return *this;
 }
 
-std::size_t CModelProbabilityParams::calculations() const
-{
+std::size_t CModelProbabilityParams::calculations() const {
     return m_Calculations.size();
 }
 
-maths_t::EProbabilityCalculation CModelProbabilityParams::calculation(std::size_t i) const
-{
+maths_t::EProbabilityCalculation CModelProbabilityParams::calculation(std::size_t i) const {
     return m_Calculations.size() == 1 ? m_Calculations[0] : m_Calculations[i];
 }
 
-CModelProbabilityParams &CModelProbabilityParams::seasonalConfidenceInterval(double confidence)
-{
+CModelProbabilityParams& CModelProbabilityParams::seasonalConfidenceInterval(double confidence) {
     m_SeasonalConfidenceInterval = confidence;
     return *this;
 }
 
-double CModelProbabilityParams::seasonalConfidenceInterval() const
-{
+double CModelProbabilityParams::seasonalConfidenceInterval() const {
     return m_SeasonalConfidenceInterval;
 }
 
-CModelProbabilityParams &CModelProbabilityParams::addBucketEmpty(const TBool2Vec &empty)
-{
+CModelProbabilityParams& CModelProbabilityParams::addBucketEmpty(const TBool2Vec& empty) {
     m_BucketEmpty.push_back(empty);
     return *this;
 }
 
-const CModelProbabilityParams::TBool2Vec1Vec &CModelProbabilityParams::bucketEmpty() const
-{
+const CModelProbabilityParams::TBool2Vec1Vec& CModelProbabilityParams::bucketEmpty() const {
     return m_BucketEmpty;
 }
 
-CModelProbabilityParams &CModelProbabilityParams::weightStyles(const maths_t::TWeightStyleVec &styles)
-{
+CModelProbabilityParams& CModelProbabilityParams::weightStyles(const maths_t::TWeightStyleVec& styles) {
     m_WeightStyles = &styles;
     return *this;
 }
 
-const maths_t::TWeightStyleVec &CModelProbabilityParams::weightStyles() const
-{
+const maths_t::TWeightStyleVec& CModelProbabilityParams::weightStyles() const {
     return *m_WeightStyles;
 }
 
-CModelProbabilityParams &CModelProbabilityParams::addWeights(const TDouble2Vec4Vec &weights)
-{
+CModelProbabilityParams& CModelProbabilityParams::addWeights(const TDouble2Vec4Vec& weights) {
     m_Weights.push_back(weights);
     return *this;
 }
 
-CModelProbabilityParams &CModelProbabilityParams::weights(const TDouble2Vec4Vec1Vec &weights)
-{
+CModelProbabilityParams& CModelProbabilityParams::weights(const TDouble2Vec4Vec1Vec& weights) {
     m_Weights = weights;
     return *this;
 }
 
-const CModelProbabilityParams::TDouble2Vec4Vec1Vec &CModelProbabilityParams::weights() const
-{
+const CModelProbabilityParams::TDouble2Vec4Vec1Vec& CModelProbabilityParams::weights() const {
     return m_Weights;
 }
 
-CModelProbabilityParams::TDouble2Vec4Vec1Vec &CModelProbabilityParams::weights()
-{
+CModelProbabilityParams::TDouble2Vec4Vec1Vec& CModelProbabilityParams::weights() {
     return m_Weights;
 }
 
-CModelProbabilityParams &CModelProbabilityParams::addCoordinate(std::size_t coordinate)
-{
+CModelProbabilityParams& CModelProbabilityParams::addCoordinate(std::size_t coordinate) {
     m_Coordinates.push_back(coordinate);
     return *this;
 }
 
-const CModelProbabilityParams::TSize2Vec &CModelProbabilityParams::coordinates() const
-{
+const CModelProbabilityParams::TSize2Vec& CModelProbabilityParams::coordinates() const {
     return m_Coordinates;
 }
 
-CModelProbabilityParams &CModelProbabilityParams::mostAnomalousCorrelate(std::size_t correlate)
-{
+CModelProbabilityParams& CModelProbabilityParams::mostAnomalousCorrelate(std::size_t correlate) {
     m_MostAnomalousCorrelate.reset(correlate);
     return *this;
 }
 
-CModelProbabilityParams::TOptionalSize CModelProbabilityParams::mostAnomalousCorrelate() const
-{
+CModelProbabilityParams::TOptionalSize CModelProbabilityParams::mostAnomalousCorrelate() const {
     return m_MostAnomalousCorrelate;
 }
 
-CModelProbabilityParams &CModelProbabilityParams::updateAnomalyModel(bool update)
-{
+CModelProbabilityParams& CModelProbabilityParams::updateAnomalyModel(bool update) {
     m_UpdateAnomalyModel = update;
     return *this;
 }
 
-bool CModelProbabilityParams::updateAnomalyModel() const
-{
+bool CModelProbabilityParams::updateAnomalyModel() const {
     return m_UpdateAnomalyModel;
 }
 
-
-CModel::EUpdateResult CModel::combine(EUpdateResult lhs, EUpdateResult rhs)
-{
-    switch (lhs)
-    {
-    case E_Success: return rhs;
-    case E_Reset:   return rhs == E_Failure ? E_Failure : E_Reset;
-    case E_Failure: return E_Failure;
+CModel::EUpdateResult CModel::combine(EUpdateResult lhs, EUpdateResult rhs) {
+    switch (lhs) {
+    case E_Success:
+        return rhs;
+    case E_Reset:
+        return rhs == E_Failure ? E_Failure : E_Reset;
+    case E_Failure:
+        return E_Failure;
     }
     return E_Failure;
 }
 
-CModel::CModel(const CModelParams ¶ms) : m_Params(params) {}
+CModel::CModel(const CModelParams& params) : m_Params(params) {
+}
 
-double CModel::effectiveCount(std::size_t n)
-{
-    return n <= boost::size(EFFECTIVE_COUNT) ? EFFECTIVE_COUNT[n-1] : 0.5;
+double CModel::effectiveCount(std::size_t n) {
+    return n <= boost::size(EFFECTIVE_COUNT) ? EFFECTIVE_COUNT[n - 1] : 0.5;
 }
 
-const CModelParams &CModel::params() const
-{
+const CModelParams& CModel::params() const {
     return m_Params;
 }
 
-CModelParams &CModel::params()
-{
+CModelParams& CModel::params() {
     return m_Params;
 }
 
 double CModel::correctForEmptyBucket(maths_t::EProbabilityCalculation calculation,
-                                     const TDouble2Vec &value,
+                                     const TDouble2Vec& value,
                                      bool bucketEmpty,
                                      double probabilityBucketEmpty,
-                                     double probability)
-{
+                                     double probability) {
     double pCorrected{(1.0 - probabilityBucketEmpty) * probability};
 
-    if (!bucketEmpty)
-    {
+    if (!bucketEmpty) {
         double pOneSided{oneSidedEmptyBucketCorrection(calculation, value, probabilityBucketEmpty)};
         return std::min(pOneSided + pCorrected, 1.0);
     }
@@ -385,147 +321,118 @@ double CModel::correctForEmptyBucket(maths_t::EProbabilityCalculation calculatio
 
 double CModel::correctForEmptyBucket(maths_t::EProbabilityCalculation calculation,
                                      double value,
-                                     const TBool2Vec &bucketEmpty,
-                                     const TDouble2Vec &probabilityEmptyBucket,
-                                     double probability)
-{
-    if (!bucketEmpty[0] && !bucketEmpty[1])
-    {
+                                     const TBool2Vec& bucketEmpty,
+                                     const TDouble2Vec& probabilityEmptyBucket,
+                                     double probability) {
+    if (!bucketEmpty[0] && !bucketEmpty[1]) {
         double pState{(1.0 - probabilityEmptyBucket[0]) * (1.0 - probabilityEmptyBucket[1])};
         double pOneSided{oneSidedEmptyBucketCorrection(calculation, {value}, 1.0 - pState)};
         return std::min(pOneSided + pState * probability, 1.0);
     }
 
-    if (!bucketEmpty[0])
-    {
+    if (!bucketEmpty[0]) {
         double pState{(1.0 - probabilityEmptyBucket[0]) * probabilityEmptyBucket[1]};
         double pOneSided{oneSidedEmptyBucketCorrection(calculation, {value}, probabilityEmptyBucket[0])};
         return std::min(pOneSided + pState + (1.0 - pState) * probability, 1.0);
     }
 
-    if (!bucketEmpty[1])
-    {
+    if (!bucketEmpty[1]) {
         double pState{probabilityEmptyBucket[0] * (1.0 - probabilityEmptyBucket[1])};
         double pOneSided{oneSidedEmptyBucketCorrection(calculation, {value}, probabilityEmptyBucket[1])};
         return std::min(pOneSided + pState + (1.0 - pState) * probability, 1.0);
-
     }
 
     double pState = probabilityEmptyBucket[0] * probabilityEmptyBucket[1];
     return pState + (1.0 - pState) * probability;
 }
 
+CModelStub::CModelStub() : CModel(stubParameters()) {
+}
 
-CModelStub::CModelStub() : CModel(stubParameters()) {}
-
-std::size_t CModelStub::identifier() const
-{
+std::size_t CModelStub::identifier() const {
     return 0;
 }
 
-CModelStub *CModelStub::clone(std::size_t /*id*/) const
-{
+CModelStub* CModelStub::clone(std::size_t /*id*/) const {
     return new CModelStub(*this);
 }
 
-CModelStub *CModelStub::cloneForPersistence() const
-{
+CModelStub* CModelStub::cloneForPersistence() const {
     return new CModelStub(*this);
 }
 
-CModelStub *CModelStub::cloneForForecast() const
-{
+CModelStub* CModelStub::cloneForForecast() const {
     return new CModelStub(*this);
 }
 
-bool CModelStub::isForecastPossible() const
-{
+bool CModelStub::isForecastPossible() const {
     return false;
 }
 
-void CModelStub::modelCorrelations(CTimeSeriesCorrelations &/*model*/)
-{
+void CModelStub::modelCorrelations(CTimeSeriesCorrelations& /*model*/) {
 }
 
-CModelStub::TSize2Vec1Vec CModelStub::correlates() const
-{
+CModelStub::TSize2Vec1Vec CModelStub::correlates() const {
     return {};
 }
 
-CModelStub::TDouble2Vec CModelStub::mode(core_t::TTime /*time*/,
-                                         const maths_t::TWeightStyleVec &/*weightStyles*/,
-                                         const TDouble2Vec4Vec &/*weights*/) const
-{
+CModelStub::TDouble2Vec
+CModelStub::mode(core_t::TTime /*time*/, const maths_t::TWeightStyleVec& /*weightStyles*/, const TDouble2Vec4Vec& /*weights*/) const {
     return {};
 }
 
 CModelStub::TDouble2Vec1Vec CModelStub::correlateModes(core_t::TTime /*time*/,
-                                                       const maths_t::TWeightStyleVec &/*weightStyles*/,
-                                                       const TDouble2Vec4Vec1Vec &/*weights*/) const
-{
+                                                       const maths_t::TWeightStyleVec& /*weightStyles*/,
+                                                       const TDouble2Vec4Vec1Vec& /*weights*/) const {
     return {};
 }
 
-CModelStub::TDouble2Vec1Vec CModelStub::residualModes(const maths_t::TWeightStyleVec &/*weightStyles*/,
-                                                      const TDouble2Vec4Vec &/*weights*/) const
-{
+CModelStub::TDouble2Vec1Vec CModelStub::residualModes(const maths_t::TWeightStyleVec& /*weightStyles*/,
+                                                      const TDouble2Vec4Vec& /*weights*/) const {
     return {};
 }
 
-void CModelStub::addBucketValue(const TTimeDouble2VecSizeTrVec &/*value*/)
-{
+void CModelStub::addBucketValue(const TTimeDouble2VecSizeTrVec& /*value*/) {
 }
 
-CModelStub::EUpdateResult CModelStub::addSamples(const CModelAddSamplesParams &/*params*/,
-                                                 TTimeDouble2VecSizeTrVec /*samples*/)
-{
+CModelStub::EUpdateResult CModelStub::addSamples(const CModelAddSamplesParams& /*params*/, TTimeDouble2VecSizeTrVec /*samples*/) {
     return E_Success;
 }
 
-void CModelStub::skipTime(core_t::TTime /*gap*/)
-{
+void CModelStub::skipTime(core_t::TTime /*gap*/) {
 }
 
-void CModelStub::detrend(const TTime2Vec1Vec &/*time*/,
-                         double /*confidenceInterval*/,
-                         TDouble2Vec1Vec &/*value*/) const
-{
+void CModelStub::detrend(const TTime2Vec1Vec& /*time*/, double /*confidenceInterval*/, TDouble2Vec1Vec& /*value*/) const {
 }
 
-CModelStub::TDouble2Vec CModelStub::predict(core_t::TTime /*time*/,
-                                            const TSizeDoublePr1Vec &/*correlated*/,
-                                            TDouble2Vec /*hint*/) const
-{
+CModelStub::TDouble2Vec CModelStub::predict(core_t::TTime /*time*/, const TSizeDoublePr1Vec& /*correlated*/, TDouble2Vec /*hint*/) const {
     return {};
 }
 
 CModelStub::TDouble2Vec3Vec CModelStub::confidenceInterval(core_t::TTime /*time*/,
                                                            double /*confidenceInterval*/,
-                                                           const maths_t::TWeightStyleVec &/*weightStyles*/,
-                                                           const TDouble2Vec4Vec &/*weights*/) const
-{
+                                                           const maths_t::TWeightStyleVec& /*weightStyles*/,
+                                                           const TDouble2Vec4Vec& /*weights*/) const {
     return {};
 }
 
 bool CModelStub::forecast(core_t::TTime /*startTime*/,
                           core_t::TTime /*endTime*/,
                           double /*confidenceInterval*/,
-                          const TDouble2Vec &/*minimum*/,
-                          const TDouble2Vec &/*maximum*/,
-                          const TForecastPushDatapointFunc &/*forecastPushDataPointFunc*/,
-                          std::string &/*messageOut*/)
-{
+                          const TDouble2Vec& /*minimum*/,
+                          const TDouble2Vec& /*maximum*/,
+                          const TForecastPushDatapointFunc& /*forecastPushDataPointFunc*/,
+                          std::string& /*messageOut*/) {
     return true;
 }
 
-bool CModelStub::probability(const CModelProbabilityParams &/*params*/,
-                             const TTime2Vec1Vec &/*time*/,
-                             const TDouble2Vec1Vec &/*value*/,
-                             double &probability,
-                             TTail2Vec &tail,
-                             bool &conditional,
-                             TSize1Vec &mostAnomalousCorrelate) const
-{
+bool CModelStub::probability(const CModelProbabilityParams& /*params*/,
+                             const TTime2Vec1Vec& /*time*/,
+                             const TDouble2Vec1Vec& /*value*/,
+                             double& probability,
+                             TTail2Vec& tail,
+                             bool& conditional,
+                             TSize1Vec& mostAnomalousCorrelate) const {
     probability = 1.0;
     tail.clear();
     conditional = false;
@@ -533,41 +440,30 @@ bool CModelStub::probability(const CModelProbabilityParams &/*params*/,
     return true;
 }
 
-CModelStub::TDouble2Vec CModelStub::winsorisationWeight(double /*derate*/,
-                                                        core_t::TTime /*time*/,
-                                                        const TDouble2Vec &/*value*/) const
-{
+CModelStub::TDouble2Vec CModelStub::winsorisationWeight(double /*derate*/, core_t::TTime /*time*/, const TDouble2Vec& /*value*/) const {
     return {};
 }
 
-CModelStub::TDouble2Vec CModelStub::seasonalWeight(double /*confidence*/,
-                                                   core_t::TTime /*time*/) const
-{
+CModelStub::TDouble2Vec CModelStub::seasonalWeight(double /*confidence*/, core_t::TTime /*time*/) const {
     return {};
 }
 
-std::uint64_t CModelStub::checksum(std::uint64_t seed) const
-{
+std::uint64_t CModelStub::checksum(std::uint64_t seed) const {
     return seed;
 }
 
-void CModelStub::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr /*mem*/) const
-{
+void CModelStub::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr /*mem*/) const {
 }
 
-std::size_t CModelStub::memoryUsage() const
-{
+std::size_t CModelStub::memoryUsage() const {
     return 0;
 }
 
-void CModelStub::acceptPersistInserter(core::CStatePersistInserter &/*inserter*/) const
-{
+void CModelStub::acceptPersistInserter(core::CStatePersistInserter& /*inserter*/) const {
 }
 
-maths_t::EDataType CModelStub::dataType() const
-{
+maths_t::EDataType CModelStub::dataType() const {
     return maths_t::E_MixedData;
 }
-
 }
 }
diff --git a/lib/maths/CModelStateSerialiser.cc b/lib/maths/CModelStateSerialiser.cc
index 97da9fc56b..3b4c287ae6 100644
--- a/lib/maths/CModelStateSerialiser.cc
+++ b/lib/maths/CModelStateSerialiser.cc
@@ -10,82 +10,54 @@
 
 #include 
 
-namespace ml
-{
-namespace maths
-{
-namespace
-{
+namespace ml {
+namespace maths {
+namespace {
 const std::string UNIVARIATE_TIME_SERIES_TAG{"a"};
 const std::string MULTIVARIATE_TIME_SERIES_TAG{"b"};
 const std::string MODEL_STUB_TAG{"c"};
 }
 
-bool CModelStateSerialiser::operator()(const SModelRestoreParams ¶ms,
-                                       TModelPtr &result,
-                                       core::CStateRestoreTraverser &traverser) const
-{
+bool CModelStateSerialiser::
+operator()(const SModelRestoreParams& params, TModelPtr& result, core::CStateRestoreTraverser& traverser) const {
     std::size_t numResults = 0;
 
-    do
-    {
-        const std::string &name = traverser.name();
-        if (name == UNIVARIATE_TIME_SERIES_TAG)
-        {
+    do {
+        const std::string& name = traverser.name();
+        if (name == UNIVARIATE_TIME_SERIES_TAG) {
             result.reset(new CUnivariateTimeSeriesModel(params, traverser));
             ++numResults;
-        }
-        else if (name == MULTIVARIATE_TIME_SERIES_TAG)
-        {
+        } else if (name == MULTIVARIATE_TIME_SERIES_TAG) {
             result.reset(new CMultivariateTimeSeriesModel(params, traverser));
             ++numResults;
-        }
-        else if (name == MODEL_STUB_TAG)
-        {
+        } else if (name == MODEL_STUB_TAG) {
             result.reset(new CModelStub());
             ++numResults;
-        }
-        else
-        {
+        } else {
             LOG_ERROR("No model corresponds to name " << traverser.name());
             return false;
         }
-    }
-    while (traverser.next());
+    } while (traverser.next());
 
-    if (numResults != 1)
-    {
+    if (numResults != 1) {
         LOG_ERROR("Expected 1 (got " << numResults << ") model tags");
         result.reset();
         return false;
     }
 
     return true;
-
 }
 
-void CModelStateSerialiser::operator()(const CModel &model,
-                                       core::CStatePersistInserter &inserter) const
-{
-    if (dynamic_cast(&model) != 0)
-    {
-        inserter.insertLevel(UNIVARIATE_TIME_SERIES_TAG,
-                             boost::bind(&CModel::acceptPersistInserter, &model, _1));
-    }
-    else if (dynamic_cast(&model) != 0)
-    {
-        inserter.insertLevel(MULTIVARIATE_TIME_SERIES_TAG,
-                             boost::bind(&CModel::acceptPersistInserter, &model, _1));
-    }
-    else if (dynamic_cast(&model) != 0)
-    {
+void CModelStateSerialiser::operator()(const CModel& model, core::CStatePersistInserter& inserter) const {
+    if (dynamic_cast(&model) != 0) {
+        inserter.insertLevel(UNIVARIATE_TIME_SERIES_TAG, boost::bind(&CModel::acceptPersistInserter, &model, _1));
+    } else if (dynamic_cast(&model) != 0) {
+        inserter.insertLevel(MULTIVARIATE_TIME_SERIES_TAG, boost::bind(&CModel::acceptPersistInserter, &model, _1));
+    } else if (dynamic_cast(&model) != 0) {
         inserter.insertValue(MODEL_STUB_TAG, "");
-    }
-    else
-    {
+    } else {
         LOG_ERROR("Model with type '" << typeid(model).name() << "' has no defined name");
     }
 }
-
 }
 }
diff --git a/lib/maths/CModelWeight.cc b/lib/maths/CModelWeight.cc
index 25747f6901..0939178e7a 100644
--- a/lib/maths/CModelWeight.cc
+++ b/lib/maths/CModelWeight.cc
@@ -15,78 +15,57 @@
 
 #include 
 
-namespace ml
-{
-namespace maths
-{
+namespace ml {
+namespace maths {
 
-namespace
-{
+namespace {
 
 // We use short field names to reduce the state size
 const std::string LOG_WEIGHT_TAG("a");
 const std::string LONG_TERM_LOG_WEIGHT_TAG("c");
 const double LOG_SMALLEST_WEIGHT = std::log(CTools::smallestProbability());
-
 }
 
-CModelWeight::CModelWeight(double weight) :
-    m_LogWeight(std::log(weight)),
-    m_LongTermLogWeight(m_LogWeight)
-{}
+CModelWeight::CModelWeight(double weight) : m_LogWeight(std::log(weight)), m_LongTermLogWeight(m_LogWeight) {
+}
 
-CModelWeight::operator double() const
-{
+CModelWeight::operator double() const {
     return m_LogWeight < LOG_SMALLEST_WEIGHT ? 0.0 : std::exp(m_LogWeight);
 }
 
-double CModelWeight::logWeight() const
-{
+double CModelWeight::logWeight() const {
     return m_LogWeight;
 }
 
-void CModelWeight::logWeight(double logWeight)
-{
+void CModelWeight::logWeight(double logWeight) {
     m_LogWeight = logWeight;
 }
 
-void CModelWeight::addLogFactor(double logFactor)
-{
+void CModelWeight::addLogFactor(double logFactor) {
     m_LogWeight += logFactor;
 }
 
-void CModelWeight::age(double alpha)
-{
+void CModelWeight::age(double alpha) {
     m_LogWeight = alpha * m_LogWeight + (1 - alpha) * m_LongTermLogWeight;
 }
 
-uint64_t CModelWeight::checksum(uint64_t seed) const
-{
+uint64_t CModelWeight::checksum(uint64_t seed) const {
     seed = CChecksum::calculate(seed, m_LogWeight);
     return CChecksum::calculate(seed, m_LongTermLogWeight);
 }
 
-bool CModelWeight::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser)
-{
-    do
-    {
-        const std::string &name = traverser.name();
+bool CModelWeight::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) {
+    do {
+        const std::string& name = traverser.name();
         RESTORE_BUILT_IN(LOG_WEIGHT_TAG, m_LogWeight)
         RESTORE_BUILT_IN(LONG_TERM_LOG_WEIGHT_TAG, m_LongTermLogWeight)
-    }
-    while (traverser.next());
+    } while (traverser.next());
     return true;
 }
 
-void CModelWeight::acceptPersistInserter(core::CStatePersistInserter &inserter) const
-{
-    inserter.insertValue(LOG_WEIGHT_TAG,
-                         m_LogWeight,
-                         core::CIEEE754::E_DoublePrecision);
-    inserter.insertValue(LONG_TERM_LOG_WEIGHT_TAG,
-                         m_LongTermLogWeight,
-                         core::CIEEE754::E_SinglePrecision);
+void CModelWeight::acceptPersistInserter(core::CStatePersistInserter& inserter) const {
+    inserter.insertValue(LOG_WEIGHT_TAG, m_LogWeight, core::CIEEE754::E_DoublePrecision);
+    inserter.insertValue(LONG_TERM_LOG_WEIGHT_TAG, m_LongTermLogWeight, core::CIEEE754::E_SinglePrecision);
 }
-
 }
 }
diff --git a/lib/maths/CMultimodalPrior.cc b/lib/maths/CMultimodalPrior.cc
index 6c51e81789..4ee3faf5ab 100644
--- a/lib/maths/CMultimodalPrior.cc
+++ b/lib/maths/CMultimodalPrior.cc
@@ -8,9 +8,9 @@
 
 #include 
 #include 
-#include 
 #include 
 #include 
+#include 
 #include 
 
 #include 
@@ -18,7 +18,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -26,6 +25,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -34,20 +34,17 @@
 #include 
 
 #include 
-#include 
 #include 
+#include 
 #include 
 
 #include 
 #include 
 
-namespace ml
-{
-namespace maths
-{
+namespace ml {
+namespace maths {
 
-namespace
-{
+namespace {
 
 using TSizeDoublePr = std::pair;
 using TSizeDoublePr2Vec = core::CSmallVector;
@@ -66,98 +63,71 @@ const std::string NUMBER_SAMPLES_TAG("d");
 //const std::string MAXIMUM_TAG("f"); No longer used
 const std::string DECAY_RATE_TAG("g");
 const std::string EMPTY_STRING;
-
 }
 
 //////// CMultimodalPrior Implementation ////////
 
 CMultimodalPrior::CMultimodalPrior(maths_t::EDataType dataType,
-                                   const CClusterer1d &clusterer,
-                                   const CPrior &seedPrior,
-                                   double decayRate/*= 0.0*/) :
-        CPrior(dataType, decayRate),
-        m_Clusterer(clusterer.clone()),
-        m_SeedPrior(seedPrior.clone())
-{
+                                   const CClusterer1d& clusterer,
+                                   const CPrior& seedPrior,
+                                   double decayRate /*= 0.0*/)
+    : CPrior(dataType, decayRate), m_Clusterer(clusterer.clone()), m_SeedPrior(seedPrior.clone()) {
     // Register the split and merge callbacks.
     m_Clusterer->splitFunc(CModeSplitCallback(*this));
     m_Clusterer->mergeFunc(CModeMergeCallback(*this));
 }
 
-CMultimodalPrior::CMultimodalPrior(maths_t::EDataType dataType,
-                                   const TMeanVarAccumulatorVec &moments,
-                                   double decayRate/*= 0.0*/) :
-        CPrior(dataType, decayRate),
-        m_SeedPrior(CNormalMeanPrecConjugate::nonInformativePrior(dataType, decayRate).clone())
-{
+CMultimodalPrior::CMultimodalPrior(maths_t::EDataType dataType, const TMeanVarAccumulatorVec& moments, double decayRate /*= 0.0*/)
+    : CPrior(dataType, decayRate), m_SeedPrior(CNormalMeanPrecConjugate::nonInformativePrior(dataType, decayRate).clone()) {
     using TNormalVec = std::vector;
 
     TNormalVec normals;
     normals.reserve(moments.size());
-    for (const auto &moments_ : moments)
-    {
+    for (const auto& moments_ : moments) {
         normals.emplace_back(dataType, moments_, decayRate);
     }
 
     m_Clusterer.reset(new CKMeansOnline1d(normals));
 
     m_Modes.reserve(normals.size());
-    for (std::size_t i = 0u; i < normals.size(); ++i)
-    {
+    for (std::size_t i = 0u; i < normals.size(); ++i) {
         m_Modes.emplace_back(i, TPriorPtr(normals.back().clone()));
     }
 }
 
-CMultimodalPrior::CMultimodalPrior(maths_t::EDataType dataType,
-                                   double decayRate,
-                                   TPriorPtrVec &priors):
-        CPrior(dataType, decayRate)
-{
+CMultimodalPrior::CMultimodalPrior(maths_t::EDataType dataType, double decayRate, TPriorPtrVec& priors) : CPrior(dataType, decayRate) {
     m_Modes.reserve(priors.size());
-    for (std::size_t i = 0u; i < priors.size(); ++i)
-    {
+    for (std::size_t i = 0u; i < priors.size(); ++i) {
         m_Modes.emplace_back(i, priors[i]);
     }
 }
 
-CMultimodalPrior::CMultimodalPrior(const SDistributionRestoreParams ¶ms,
-                                   core::CStateRestoreTraverser &traverser) :
-        CPrior(params.s_DataType, params.s_DecayRate)
-{
-    traverser.traverseSubLevel(boost::bind(&CMultimodalPrior::acceptRestoreTraverser,
-                                           this, boost::cref(params), _1));
+CMultimodalPrior::CMultimodalPrior(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser)
+    : CPrior(params.s_DataType, params.s_DecayRate) {
+    traverser.traverseSubLevel(boost::bind(&CMultimodalPrior::acceptRestoreTraverser, this, boost::cref(params), _1));
 }
 
-bool CMultimodalPrior::acceptRestoreTraverser(const SDistributionRestoreParams ¶ms,
-                                              core::CStateRestoreTraverser &traverser)
-{
-    do
-    {
-        const std::string &name = traverser.name();
-        RESTORE_SETUP_TEARDOWN(DECAY_RATE_TAG,
-                               double decayRate,
-                               core::CStringUtils::stringToType(traverser.value(), decayRate),
-                               this->decayRate(decayRate))
-        RESTORE(CLUSTERER_TAG, traverser.traverseSubLevel(boost::bind(CClustererStateSerialiser(),
-                                                                            boost::cref(params),
-                                                                            boost::ref(m_Clusterer), _1)))
-        RESTORE(SEED_PRIOR_TAG, traverser.traverseSubLevel(boost::bind(CPriorStateSerialiser(),
-                                                                             boost::cref(params),
-                                                                             boost::ref(m_SeedPrior), _1)))
+bool CMultimodalPrior::acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) {
+    do {
+        const std::string& name = traverser.name();
+        RESTORE_SETUP_TEARDOWN(
+            DECAY_RATE_TAG, double decayRate, core::CStringUtils::stringToType(traverser.value(), decayRate), this->decayRate(decayRate))
+        RESTORE(
+            CLUSTERER_TAG,
+            traverser.traverseSubLevel(boost::bind(CClustererStateSerialiser(), boost::cref(params), boost::ref(m_Clusterer), _1)))
+        RESTORE(SEED_PRIOR_TAG,
+                traverser.traverseSubLevel(boost::bind(CPriorStateSerialiser(), boost::cref(params), boost::ref(m_SeedPrior), _1)))
         RESTORE_SETUP_TEARDOWN(MODE_TAG,
                                TMode mode,
-                               traverser.traverseSubLevel(boost::bind(&TMode::acceptRestoreTraverser,
-                                                                      &mode, boost::cref(params), _1)),
+                               traverser.traverseSubLevel(boost::bind(&TMode::acceptRestoreTraverser, &mode, boost::cref(params), _1)),
                                m_Modes.push_back(mode))
         RESTORE_SETUP_TEARDOWN(NUMBER_SAMPLES_TAG,
                                double numberSamples,
                                core::CStringUtils::stringToType(traverser.value(), numberSamples),
                                this->numberSamples(numberSamples))
-    }
-    while (traverser.next());
+    } while (traverser.next());
 
-    if (m_Clusterer)
-    {
+    if (m_Clusterer) {
         // Register the split and merge callbacks.
         m_Clusterer->splitFunc(CModeSplitCallback(*this));
         m_Clusterer->mergeFunc(CModeMergeCallback(*this));
@@ -166,11 +136,8 @@ bool CMultimodalPrior::acceptRestoreTraverser(const SDistributionRestoreParams &
     return true;
 }
 
-CMultimodalPrior::CMultimodalPrior(const CMultimodalPrior &other) :
-        CPrior(other.dataType(), other.decayRate()),
-        m_Clusterer(other.m_Clusterer->clone()),
-        m_SeedPrior(other.m_SeedPrior->clone())
-{
+CMultimodalPrior::CMultimodalPrior(const CMultimodalPrior& other)
+    : CPrior(other.dataType(), other.decayRate()), m_Clusterer(other.m_Clusterer->clone()), m_SeedPrior(other.m_SeedPrior->clone()) {
     // Register the split and merge callbacks.
     m_Clusterer->splitFunc(CModeSplitCallback(*this));
     m_Clusterer->mergeFunc(CModeMergeCallback(*this));
@@ -178,8 +145,7 @@ CMultimodalPrior::CMultimodalPrior(const CMultimodalPrior &other) :
     // Clone all the modes up front so we can implement strong exception safety.
     TModeVec modes;
     modes.reserve(other.m_Modes.size());
-    for (const auto &mode : other.m_Modes)
-    {
+    for (const auto& mode : other.m_Modes) {
         modes.emplace_back(mode.s_Index, TPriorPtr(mode.s_Prior->clone()));
     }
     m_Modes.swap(modes);
@@ -187,18 +153,15 @@ CMultimodalPrior::CMultimodalPrior(const CMultimodalPrior &other) :
     this->addSamples(other.numberSamples());
 }
 
-CMultimodalPrior &CMultimodalPrior::operator=(const CMultimodalPrior &rhs)
-{
-    if (this != &rhs)
-    {
+CMultimodalPrior& CMultimodalPrior::operator=(const CMultimodalPrior& rhs) {
+    if (this != &rhs) {
         CMultimodalPrior copy(rhs);
         this->swap(copy);
     }
     return *this;
 }
 
-void CMultimodalPrior::swap(CMultimodalPrior &other)
-{
+void CMultimodalPrior::swap(CMultimodalPrior& other) {
     this->CPrior::swap(other);
 
     std::swap(m_Clusterer, other.m_Clusterer);
@@ -214,78 +177,58 @@ void CMultimodalPrior::swap(CMultimodalPrior &other)
     m_Modes.swap(other.m_Modes);
 }
 
-CMultimodalPrior::EPrior CMultimodalPrior::type() const
-{
+CMultimodalPrior::EPrior CMultimodalPrior::type() const {
     return E_Multimodal;
 }
 
-CMultimodalPrior *CMultimodalPrior::clone() const
-{
+CMultimodalPrior* CMultimodalPrior::clone() const {
     return new CMultimodalPrior(*this);
 }
 
-void CMultimodalPrior::dataType(maths_t::EDataType value)
-{
+void CMultimodalPrior::dataType(maths_t::EDataType value) {
     this->CPrior::dataType(value);
     m_Clusterer->dataType(value);
-    for (const auto &mode : m_Modes)
-    {
+    for (const auto& mode : m_Modes) {
         mode.s_Prior->dataType(value);
     }
 }
 
-void CMultimodalPrior::decayRate(double value)
-{
+void CMultimodalPrior::decayRate(double value) {
     this->CPrior::decayRate(value);
     m_Clusterer->decayRate(value);
-    for (const auto &mode : m_Modes)
-    {
+    for (const auto& mode : m_Modes) {
         mode.s_Prior->decayRate(value);
     }
     m_SeedPrior->decayRate(value);
 }
 
-void CMultimodalPrior::setToNonInformative(double /*offset*/, double decayRate)
-{
+void CMultimodalPrior::setToNonInformative(double /*offset*/, double decayRate) {
     m_Clusterer->clear();
     m_Modes.clear();
     this->decayRate(decayRate);
     this->numberSamples(0.0);
 }
 
-bool CMultimodalPrior::needsOffset() const
-{
-    for (const auto &mode : m_Modes)
-    {
-        if (mode.s_Prior->needsOffset())
-        {
+bool CMultimodalPrior::needsOffset() const {
+    for (const auto& mode : m_Modes) {
+        if (mode.s_Prior->needsOffset()) {
             return true;
         }
     }
     return false;
 }
 
-double CMultimodalPrior::adjustOffset(const TWeightStyleVec &weightStyles,
-                                      const TDouble1Vec &samples,
-                                      const TDouble4Vec1Vec &weights)
-{
+double CMultimodalPrior::adjustOffset(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights) {
     double result = 0.0;
 
-    if (this->needsOffset())
-    {
+    if (this->needsOffset()) {
         TSizeDoublePr2Vec clusters;
-        for (std::size_t i = 0u; i < samples.size(); ++i)
-        {
+        for (std::size_t i = 0u; i < samples.size(); ++i) {
             m_Clusterer->cluster(samples[i], clusters);
-            for (const auto &cluster : clusters)
-            {
-                auto j = std::find_if(m_Modes.begin(), m_Modes.end(),
-                                      CSetTools::CIndexInSet(cluster.first));
-                if (j != m_Modes.end())
-                {
-                    result += j->s_Prior->adjustOffset(weightStyles,
-                                                       TDouble1Vec(1, samples[i]),
-                                                       TDouble4Vec1Vec(1, weights[i]));
+            for (const auto& cluster : clusters) {
+                auto j = std::find_if(m_Modes.begin(), m_Modes.end(), CSetTools::CIndexInSet(cluster.first));
+                if (j != m_Modes.end()) {
+                    result += j->s_Prior->adjustOffset(weightStyles, TDouble1Vec(1, samples[i]), TDouble4Vec1Vec(1, weights[i]));
                 }
             }
         }
@@ -294,31 +237,22 @@ double CMultimodalPrior::adjustOffset(const TWeightStyleVec &weightStyles,
     return result;
 }
 
-double CMultimodalPrior::offset() const
-{
+double CMultimodalPrior::offset() const {
     double offset = 0.0;
-    for (const auto &mode : m_Modes)
-    {
+    for (const auto& mode : m_Modes) {
         offset = std::max(offset, mode.s_Prior->offset());
     }
     return offset;
 }
 
-void CMultimodalPrior::addSamples(const TWeightStyleVec &weightStyles_,
-                                  const TDouble1Vec &samples,
-                                  const TDouble4Vec1Vec &weights)
-{
-    if (samples.empty())
-    {
+void CMultimodalPrior::addSamples(const TWeightStyleVec& weightStyles_, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights) {
+    if (samples.empty()) {
         return;
     }
 
-    if (samples.size() != weights.size())
-    {
-        LOG_ERROR("Mismatch in samples '"
-                  << core::CContainerPrinter::print(samples)
-                  << "' and weights '"
-                  << core::CContainerPrinter::print(weights) << "'");
+    if (samples.size() != weights.size()) {
+        LOG_ERROR("Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '"
+                                          << core::CContainerPrinter::print(weights) << "'");
         return;
     }
 
@@ -354,70 +288,57 @@ void CMultimodalPrior::addSamples(const TWeightStyleVec &weightStyles_,
     std::size_t indices[maths_t::NUMBER_WEIGHT_STYLES];
     std::size_t missing = weightStyles.size() + 1;
     std::fill_n(indices, maths_t::NUMBER_WEIGHT_STYLES, missing);
-    for (std::size_t i = 0u; i < weightStyles.size(); ++i)
-    {
+    for (std::size_t i = 0u; i < weightStyles.size(); ++i) {
         indices[weightStyles[i]] = i;
     }
     std::size_t seasonal = indices[maths_t::E_SampleSeasonalVarianceScaleWeight];
     std::size_t count = indices[maths_t::E_SampleCountWeight];
     std::size_t winsorisation = indices[maths_t::E_SampleWinsorisationWeight];
-    if (count == missing)
-    {
+    if (count == missing) {
         count = weightStyles.size();
         weightStyles.push_back(maths_t::E_SampleCountWeight);
     }
 
-    try
-    {
+    try {
         bool hasSeasonalScale = !this->isNonInformative() && seasonal != missing;
-        double mean = (  !this->isNonInformative()
-                       && maths_t::hasSeasonalVarianceScale(weightStyles_, weights)) ?
-                      this->marginalLikelihoodMean() : 0.0;
+        double mean =
+            (!this->isNonInformative() && maths_t::hasSeasonalVarianceScale(weightStyles_, weights)) ? this->marginalLikelihoodMean() : 0.0;
 
-        for (std::size_t i = 0u; i < samples.size(); ++i)
-        {
+        for (std::size_t i = 0u; i < samples.size(); ++i) {
             double x = samples[i];
-            if (!CMathsFuncs::isFinite(x))
-            {
+            if (!CMathsFuncs::isFinite(x)) {
                 LOG_ERROR("Discarding " << x);
                 continue;
             }
-            if (hasSeasonalScale)
-            {
+            if (hasSeasonalScale) {
                 x = mean + (x - mean) / std::sqrt(weights[i][seasonal]);
             }
 
             sample[0] = x;
             weight[0] = weights[i];
             weight[0].resize(weightStyles.size(), 1.0);
-            if (seasonal != missing)
-            {
+            if (seasonal != missing) {
                 weight[0][seasonal] = 1.0;
             }
 
             clusters.clear();
             m_Clusterer->add(x, clusters, weight[0][count]);
 
-            double Z = std::accumulate(m_Modes.begin(), m_Modes.end(),
-                                       weight[0][count],
-                                       [](double sum, const TMode &mode) { return sum + mode.weight(); });
+            double Z = std::accumulate(
+                m_Modes.begin(), m_Modes.end(), weight[0][count], [](double sum, const TMode& mode) { return sum + mode.weight(); });
 
             double n = 0.0;
-            for (const auto &cluster : clusters)
-            {
-                auto k = std::find_if(m_Modes.begin(), m_Modes.end(),
-                                      CSetTools::CIndexInSet(cluster.first));
-                if (k == m_Modes.end())
-                {
+            for (const auto& cluster : clusters) {
+                auto k = std::find_if(m_Modes.begin(), m_Modes.end(), CSetTools::CIndexInSet(cluster.first));
+                if (k == m_Modes.end()) {
                     LOG_TRACE("Creating mode with index " << cluster.first);
                     m_Modes.emplace_back(cluster.first, m_SeedPrior);
                     k = m_Modes.end() - 1;
                 }
                 weight[0][count] = cluster.second;
-                if (winsorisation != missing)
-                {
-                    double &ww = weight[0][winsorisation];
-                    double f   = (k->weight() + cluster.second) / Z;
+                if (winsorisation != missing) {
+                    double& ww = weight[0][winsorisation];
+                    double f = (k->weight() + cluster.second) / Z;
                     ww = std::max(1.0 - (1.0 - ww) / f, ww * f);
                 }
                 k->s_Prior->addSamples(weightStyles, sample, weight);
@@ -425,23 +346,16 @@ void CMultimodalPrior::addSamples(const TWeightStyleVec &weightStyles_,
             }
             this->addSamples(n);
         }
-    }
-    catch (const std::exception &e)
-    {
-        LOG_ERROR("Failed to update likelihood: " << e.what());
-    }
+    } catch (const std::exception& e) { LOG_ERROR("Failed to update likelihood: " << e.what()); }
 }
 
-void CMultimodalPrior::propagateForwardsByTime(double time)
-{
-    if (!CMathsFuncs::isFinite(time) || time < 0.0)
-    {
+void CMultimodalPrior::propagateForwardsByTime(double time) {
+    if (!CMathsFuncs::isFinite(time) || time < 0.0) {
         LOG_ERROR("Bad propagation time " << time);
         return;
     }
 
-    if (this->isNonInformative())
-    {
+    if (this->isNonInformative()) {
         // Nothing to be done.
         return;
     }
@@ -454,8 +368,7 @@ void CMultimodalPrior::propagateForwardsByTime(double time)
     // all weights by some factor f in the range [0, 1].
 
     m_Clusterer->propagateForwardsByTime(time);
-    for (const auto &mode : m_Modes)
-    {
+    for (const auto& mode : m_Modes) {
         mode.s_Prior->propagateForwardsByTime(time);
     }
 
@@ -463,31 +376,25 @@ void CMultimodalPrior::propagateForwardsByTime(double time)
     LOG_TRACE("numberSamples = " << this->numberSamples());
 }
 
-TDoubleDoublePr CMultimodalPrior::marginalLikelihoodSupport() const
-{
+TDoubleDoublePr CMultimodalPrior::marginalLikelihoodSupport() const {
     return CMultimodalPriorUtils::marginalLikelihoodSupport(m_Modes);
 }
 
-double CMultimodalPrior::marginalLikelihoodMean() const
-{
+double CMultimodalPrior::marginalLikelihoodMean() const {
     return CMultimodalPriorUtils::marginalLikelihoodMean(m_Modes);
 }
 
-double CMultimodalPrior::nearestMarginalLikelihoodMean(double value) const
-{
-    if (m_Modes.empty())
-    {
+double CMultimodalPrior::nearestMarginalLikelihoodMean(double value) const {
+    if (m_Modes.empty()) {
         return 0.0;
     }
 
     double mean = m_Modes[0].s_Prior->marginalLikelihoodMean();
     double distance = std::fabs(value - mean);
     double result = mean;
-    for (std::size_t i = 1u; i < m_Modes.size(); ++i)
-    {
+    for (std::size_t i = 1u; i < m_Modes.size(); ++i) {
         mean = m_Modes[i].s_Prior->marginalLikelihoodMean();
-        if (std::fabs(value - mean) < distance)
-        {
+        if (std::fabs(value - mean) < distance) {
             distance = std::fabs(value - mean);
             result = mean;
         }
@@ -495,62 +402,47 @@ double CMultimodalPrior::nearestMarginalLikelihoodMean(double value) const
     return result;
 }
 
-double CMultimodalPrior::marginalLikelihoodMode(const TWeightStyleVec &weightStyles,
-                                                const TDouble4Vec &weights) const
-{
+double CMultimodalPrior::marginalLikelihoodMode(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights) const {
     return CMultimodalPriorUtils::marginalLikelihoodMode(m_Modes, weightStyles, weights);
 }
 
-CMultimodalPrior::TDouble1Vec
-CMultimodalPrior::marginalLikelihoodModes(const TWeightStyleVec &weightStyles,
-                                          const TDouble4Vec &weights) const
-{
+CMultimodalPrior::TDouble1Vec CMultimodalPrior::marginalLikelihoodModes(const TWeightStyleVec& weightStyles,
+                                                                        const TDouble4Vec& weights) const {
     TDouble1Vec result(m_Modes.size());
-    for (std::size_t i = 0u; i < m_Modes.size(); ++i)
-    {
+    for (std::size_t i = 0u; i < m_Modes.size(); ++i) {
         result[i] = m_Modes[i].s_Prior->marginalLikelihoodMode(weightStyles, weights);
     }
     return result;
 }
 
-double CMultimodalPrior::marginalLikelihoodVariance(const TWeightStyleVec &weightStyles,
-                                                    const TDouble4Vec &weights) const
-{
+double CMultimodalPrior::marginalLikelihoodVariance(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights) const {
     return CMultimodalPriorUtils::marginalLikelihoodVariance(m_Modes, weightStyles, weights);
 }
 
 TDoubleDoublePr CMultimodalPrior::marginalLikelihoodConfidenceInterval(double percentage,
-                                                                       const TWeightStyleVec &weightStyles,
-                                                                       const TDouble4Vec &weights) const
-{
+                                                                       const TWeightStyleVec& weightStyles,
+                                                                       const TDouble4Vec& weights) const {
     return CMultimodalPriorUtils::marginalLikelihoodConfidenceInterval(*this, m_Modes, percentage, weightStyles, weights);
 }
 
-maths_t::EFloatingPointErrorStatus
-CMultimodalPrior::jointLogMarginalLikelihood(const TWeightStyleVec &weightStyles,
-                                             const TDouble1Vec &samples,
-                                             const TDouble4Vec1Vec &weights,
-                                             double &result) const
-{
+maths_t::EFloatingPointErrorStatus CMultimodalPrior::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles,
+                                                                                const TDouble1Vec& samples,
+                                                                                const TDouble4Vec1Vec& weights,
+                                                                                double& result) const {
     result = 0.0;
 
-    if (samples.empty())
-    {
+    if (samples.empty()) {
         LOG_ERROR("Can't compute likelihood for empty sample set");
         return maths_t::E_FpFailed;
     }
 
-    if (samples.size() != weights.size())
-    {
-        LOG_ERROR("Mismatch in samples '"
-                  << core::CContainerPrinter::print(samples)
-                  << "' and weights '"
-                  << core::CContainerPrinter::print(weights) << "'");
+    if (samples.size() != weights.size()) {
+        LOG_ERROR("Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '"
+                                          << core::CContainerPrinter::print(weights) << "'");
         return maths_t::E_FpFailed;
     }
 
-    if (this->isNonInformative())
-    {
+    if (this->isNonInformative()) {
         // The non-informative likelihood is improper and effectively
         // zero everywhere. We use minus max double because
         // log(0) = HUGE_VALUE, which causes problems for Windows.
@@ -563,215 +455,161 @@ CMultimodalPrior::jointLogMarginalLikelihood(const TWeightStyleVec &weightStyles
         return maths_t::E_FpOverflowed;
     }
 
-    return m_Modes.size() == 1 ?
-           m_Modes[0].s_Prior->jointLogMarginalLikelihood(weightStyles, samples, weights, result) :
-           CMultimodalPriorUtils::jointLogMarginalLikelihood(m_Modes, weightStyles, samples, weights, result);
+    return m_Modes.size() == 1 ? m_Modes[0].s_Prior->jointLogMarginalLikelihood(weightStyles, samples, weights, result)
+                               : CMultimodalPriorUtils::jointLogMarginalLikelihood(m_Modes, weightStyles, samples, weights, result);
 }
 
-void CMultimodalPrior::sampleMarginalLikelihood(std::size_t numberSamples,
-                                                TDouble1Vec &samples) const
-{
+void CMultimodalPrior::sampleMarginalLikelihood(std::size_t numberSamples, TDouble1Vec& samples) const {
     samples.clear();
 
-    if (numberSamples == 0 || this->numberSamples() == 0.0)
-    {
+    if (numberSamples == 0 || this->numberSamples() == 0.0) {
         return;
     }
 
     CMultimodalPriorUtils::sampleMarginalLikelihood(m_Modes, numberSamples, samples);
 }
 
-bool CMultimodalPrior::minusLogJointCdf(const TWeightStyleVec &weightStyles,
-                                        const TDouble1Vec &samples,
-                                        const TDouble4Vec1Vec &weights,
-                                        double &lowerBound,
-                                        double &upperBound) const
-{
-    return CMultimodalPriorUtils::minusLogJointCdf(m_Modes,
-                                                   weightStyles,
-                                                   samples,
-                                                   weights,
-                                                   lowerBound, upperBound);
-}
-
-bool CMultimodalPrior::minusLogJointCdfComplement(const TWeightStyleVec &weightStyles,
-                                                  const TDouble1Vec &samples,
-                                                  const TDouble4Vec1Vec &weights,
-                                                  double &lowerBound,
-                                                  double &upperBound) const
-{
-    return CMultimodalPriorUtils::minusLogJointCdfComplement(m_Modes,
-                                                             weightStyles,
-                                                             samples,
-                                                             weights,
-                                                             lowerBound, upperBound);
+bool CMultimodalPrior::minusLogJointCdf(const TWeightStyleVec& weightStyles,
+                                        const TDouble1Vec& samples,
+                                        const TDouble4Vec1Vec& weights,
+                                        double& lowerBound,
+                                        double& upperBound) const {
+    return CMultimodalPriorUtils::minusLogJointCdf(m_Modes, weightStyles, samples, weights, lowerBound, upperBound);
+}
+
+bool CMultimodalPrior::minusLogJointCdfComplement(const TWeightStyleVec& weightStyles,
+                                                  const TDouble1Vec& samples,
+                                                  const TDouble4Vec1Vec& weights,
+                                                  double& lowerBound,
+                                                  double& upperBound) const {
+    return CMultimodalPriorUtils::minusLogJointCdfComplement(m_Modes, weightStyles, samples, weights, lowerBound, upperBound);
 }
 
 bool CMultimodalPrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation,
-                                                      const TWeightStyleVec &weightStyles,
-                                                      const TDouble1Vec &samples,
-                                                      const TDouble4Vec1Vec &weights,
-                                                      double &lowerBound,
-                                                      double &upperBound,
-                                                      maths_t::ETail &tail) const
-{
-    return CMultimodalPriorUtils::probabilityOfLessLikelySamples(*this, m_Modes,
-                                                                 calculation,
-                                                                 weightStyles,
-                                                                 samples,
-                                                                 weights,
-                                                                 lowerBound, upperBound, tail);
-}
-
-bool CMultimodalPrior::isNonInformative() const
-{
+                                                      const TWeightStyleVec& weightStyles,
+                                                      const TDouble1Vec& samples,
+                                                      const TDouble4Vec1Vec& weights,
+                                                      double& lowerBound,
+                                                      double& upperBound,
+                                                      maths_t::ETail& tail) const {
+    return CMultimodalPriorUtils::probabilityOfLessLikelySamples(
+        *this, m_Modes, calculation, weightStyles, samples, weights, lowerBound, upperBound, tail);
+}
+
+bool CMultimodalPrior::isNonInformative() const {
     return CMultimodalPriorUtils::isNonInformative(m_Modes);
 }
 
-void CMultimodalPrior::print(const std::string &indent, std::string &result) const
-{
+void CMultimodalPrior::print(const std::string& indent, std::string& result) const {
     CMultimodalPriorUtils::print(m_Modes, indent, result);
 }
 
-std::string CMultimodalPrior::printJointDensityFunction() const
-{
+std::string CMultimodalPrior::printJointDensityFunction() const {
     return "Not supported";
 }
 
-uint64_t CMultimodalPrior::checksum(uint64_t seed) const
-{
+uint64_t CMultimodalPrior::checksum(uint64_t seed) const {
     seed = this->CPrior::checksum(seed);
     seed = CChecksum::calculate(seed, m_Clusterer);
     seed = CChecksum::calculate(seed, m_SeedPrior);
     return CChecksum::calculate(seed, m_Modes);
 }
 
-void CMultimodalPrior::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const
-{
+void CMultimodalPrior::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const {
     mem->setName("CMultimodalPrior");
     core::CMemoryDebug::dynamicSize("m_Clusterer", m_Clusterer, mem);
     core::CMemoryDebug::dynamicSize("m_SeedPrior", m_SeedPrior, mem);
     core::CMemoryDebug::dynamicSize("m_Modes", m_Modes, mem);
 }
 
-std::size_t CMultimodalPrior::memoryUsage() const
-{
+std::size_t CMultimodalPrior::memoryUsage() const {
     std::size_t mem = core::CMemory::dynamicSize(m_Clusterer);
     mem += core::CMemory::dynamicSize(m_SeedPrior);
     mem += core::CMemory::dynamicSize(m_Modes);
     return mem;
 }
 
-std::size_t CMultimodalPrior::staticSize() const
-{
+std::size_t CMultimodalPrior::staticSize() const {
     return sizeof(*this);
 }
 
-void CMultimodalPrior::acceptPersistInserter(core::CStatePersistInserter &inserter) const
-{
-    inserter.insertLevel(CLUSTERER_TAG,
-                         boost::bind(CClustererStateSerialiser(),
-                                           boost::cref(*m_Clusterer),
-                                           _1));
-    inserter.insertLevel(SEED_PRIOR_TAG,
-                         boost::bind(CPriorStateSerialiser(), boost::cref(*m_SeedPrior), _1));
-    for (std::size_t i = 0u; i < m_Modes.size(); ++i)
-    {
-        inserter.insertLevel(MODE_TAG,
-                             boost::bind(&TMode::acceptPersistInserter, &m_Modes[i], _1));
+void CMultimodalPrior::acceptPersistInserter(core::CStatePersistInserter& inserter) const {
+    inserter.insertLevel(CLUSTERER_TAG, boost::bind(CClustererStateSerialiser(), boost::cref(*m_Clusterer), _1));
+    inserter.insertLevel(SEED_PRIOR_TAG, boost::bind(CPriorStateSerialiser(), boost::cref(*m_SeedPrior), _1));
+    for (std::size_t i = 0u; i < m_Modes.size(); ++i) {
+        inserter.insertLevel(MODE_TAG, boost::bind(&TMode::acceptPersistInserter, &m_Modes[i], _1));
     }
     inserter.insertValue(DECAY_RATE_TAG, this->decayRate(), core::CIEEE754::E_SinglePrecision);
     inserter.insertValue(NUMBER_SAMPLES_TAG, this->numberSamples(), core::CIEEE754::E_SinglePrecision);
 }
 
-std::size_t CMultimodalPrior::numberModes() const
-{
+std::size_t CMultimodalPrior::numberModes() const {
     return m_Modes.size();
 }
 
-bool CMultimodalPrior::checkInvariants(const std::string &tag) const
-{
+bool CMultimodalPrior::checkInvariants(const std::string& tag) const {
     bool result = true;
 
-    if (m_Modes.size() != m_Clusterer->numberClusters())
-    {
-        LOG_ERROR(tag << "# modes = " << m_Modes.size()
-                      << ", # clusters = " << m_Clusterer->numberClusters());
+    if (m_Modes.size() != m_Clusterer->numberClusters()) {
+        LOG_ERROR(tag << "# modes = " << m_Modes.size() << ", # clusters = " << m_Clusterer->numberClusters());
         result = false;
     }
 
     double numberSamples = this->numberSamples();
     double modeSamples = 0.0;
-    for (const auto &mode : m_Modes)
-    {
-        if (!m_Clusterer->hasCluster(mode.s_Index))
-        {
+    for (const auto& mode : m_Modes) {
+        if (!m_Clusterer->hasCluster(mode.s_Index)) {
             LOG_ERROR(tag << "Expected cluster for = " << mode.s_Index);
             result = false;
         }
         modeSamples += mode.s_Prior->numberSamples();
     }
 
-    CEqualWithTolerance equal(  CToleranceTypes::E_AbsoluteTolerance
-                                      | CToleranceTypes::E_RelativeTolerance,
-                                      1e-3);
-    if (!equal(modeSamples, numberSamples))
-    {
-        LOG_ERROR(tag << "Sum mode samples = " << modeSamples
-                      << ", total samples = " << numberSamples);
+    CEqualWithTolerance equal(CToleranceTypes::E_AbsoluteTolerance | CToleranceTypes::E_RelativeTolerance, 1e-3);
+    if (!equal(modeSamples, numberSamples)) {
+        LOG_ERROR(tag << "Sum mode samples = " << modeSamples << ", total samples = " << numberSamples);
         result = false;
     }
 
     return result;
 }
 
-bool CMultimodalPrior::participatesInModelSelection() const
-{
+bool CMultimodalPrior::participatesInModelSelection() const {
     return m_Modes.size() > 1;
 }
 
-double CMultimodalPrior::unmarginalizedParameters() const
-{
+double CMultimodalPrior::unmarginalizedParameters() const {
     return std::max(static_cast(m_Modes.size()), 1.0) - 1.0;
 }
 
-std::string CMultimodalPrior::debugWeights() const
-{
+std::string CMultimodalPrior::debugWeights() const {
     return TMode::debugWeights(m_Modes);
 }
 
-
 ////////// CMultimodalPrior::CModeSplitCallback Implementation //////////
 
-CMultimodalPrior::CModeSplitCallback::CModeSplitCallback(CMultimodalPrior &prior) :
-        m_Prior(&prior)
-{}
+CMultimodalPrior::CModeSplitCallback::CModeSplitCallback(CMultimodalPrior& prior) : m_Prior(&prior) {
+}
 
-void CMultimodalPrior::CModeSplitCallback::operator()(std::size_t sourceIndex,
-                                                      std::size_t leftSplitIndex,
-                                                      std::size_t rightSplitIndex) const
-{
+void CMultimodalPrior::CModeSplitCallback::
+operator()(std::size_t sourceIndex, std::size_t leftSplitIndex, std::size_t rightSplitIndex) const {
     LOG_TRACE("Splitting mode with index " << sourceIndex);
 
-    TModeVec &modes = m_Prior->m_Modes;
+    TModeVec& modes = m_Prior->m_Modes;
 
     // Remove the split mode.
-    auto mode = std::find_if(modes.begin(), modes.end(),
-                             CSetTools::CIndexInSet(sourceIndex));
+    auto mode = std::find_if(modes.begin(), modes.end(), CSetTools::CIndexInSet(sourceIndex));
     double numberSamples = mode != modes.end() ? mode->weight() : 0.0;
     modes.erase(mode);
 
-    double pLeft  = m_Prior->m_Clusterer->probability(leftSplitIndex);
+    double pLeft = m_Prior->m_Clusterer->probability(leftSplitIndex);
     double pRight = m_Prior->m_Clusterer->probability(rightSplitIndex);
     double Z = (pLeft + pRight);
-    if (Z > 0.0)
-    {
-        pLeft  /= Z;
+    if (Z > 0.0) {
+        pLeft /= Z;
         pRight /= Z;
     }
-    LOG_TRACE("# samples = " << numberSamples
-              << ", pLeft = " << pLeft
-              << ", pRight = " << pRight);
+    LOG_TRACE("# samples = " << numberSamples << ", pLeft = " << pLeft << ", pRight = " << pRight);
 
     // Create the child modes.
 
@@ -779,15 +617,14 @@ void CMultimodalPrior::CModeSplitCallback::operator()(std::size_t sourceIndex,
     modes.emplace_back(leftSplitIndex, m_Prior->m_SeedPrior);
     {
         TDoubleVec samples;
-        if (!m_Prior->m_Clusterer->sample(leftSplitIndex, MODE_SPLIT_NUMBER_SAMPLES, samples))
-        {
+        if (!m_Prior->m_Clusterer->sample(leftSplitIndex, MODE_SPLIT_NUMBER_SAMPLES, samples)) {
             LOG_ERROR("Couldn't find cluster for " << leftSplitIndex);
         }
         LOG_TRACE("samples = " << core::CContainerPrinter::print(samples));
 
         double nl = pLeft * numberSamples;
         double ns = std::min(nl, 4.0);
-        double n  = static_cast(samples.size());
+        double n = static_cast(samples.size());
         LOG_TRACE("# left = " << nl);
 
         double seedWeight = ns / n;
@@ -795,8 +632,7 @@ void CMultimodalPrior::CModeSplitCallback::operator()(std::size_t sourceIndex,
         modes.back().s_Prior->addSamples(TWeights::COUNT, samples, weights);
 
         double weight = (nl - ns) / n;
-        if (weight > 0.0)
-        {
+        if (weight > 0.0) {
             weights.assign(weights.size(), TDouble4Vec{weight});
             modes.back().s_Prior->addSamples(TWeights::COUNT, samples, weights);
             LOG_TRACE(modes.back().s_Prior->print());
@@ -807,15 +643,14 @@ void CMultimodalPrior::CModeSplitCallback::operator()(std::size_t sourceIndex,
     modes.emplace_back(rightSplitIndex, m_Prior->m_SeedPrior);
     {
         TDoubleVec samples;
-        if (!m_Prior->m_Clusterer->sample(rightSplitIndex, MODE_SPLIT_NUMBER_SAMPLES, samples))
-        {
+        if (!m_Prior->m_Clusterer->sample(rightSplitIndex, MODE_SPLIT_NUMBER_SAMPLES, samples)) {
             LOG_ERROR("Couldn't find cluster for " << rightSplitIndex)
         }
         LOG_TRACE("samples = " << core::CContainerPrinter::print(samples));
 
         double nr = pRight * numberSamples;
         double ns = std::min(nr, 4.0);
-        double n  = static_cast(samples.size());
+        double n = static_cast(samples.size());
         LOG_TRACE("# right = " << nr);
 
         double seedWeight = ns / n;
@@ -823,85 +658,66 @@ void CMultimodalPrior::CModeSplitCallback::operator()(std::size_t sourceIndex,
         modes.back().s_Prior->addSamples(TWeights::COUNT, samples, weights);
 
         double weight = (nr - ns) / n;
-        if (weight > 0.0)
-        {
+        if (weight > 0.0) {
             weights.assign(weights.size(), TDouble4Vec{weight});
             modes.back().s_Prior->addSamples(TWeights::COUNT, samples, weights);
             LOG_TRACE(modes.back().s_Prior->print());
         }
     }
 
-    if (!m_Prior->checkInvariants("SPLIT: "))
-    {
-        LOG_ERROR("# samples = " << numberSamples
-                  << ", # modes = " << modes.size()
-                  << ", pLeft = " << pLeft
-                  << ", pRight = " << pRight);
+    if (!m_Prior->checkInvariants("SPLIT: ")) {
+        LOG_ERROR("# samples = " << numberSamples << ", # modes = " << modes.size() << ", pLeft = " << pLeft << ", pRight = " << pRight);
     }
 
     LOG_TRACE("Split mode");
 }
 
-
 ////////// CMultimodalPrior::CModeMergeCallback Implementation //////////
 
-CMultimodalPrior::CModeMergeCallback::CModeMergeCallback(CMultimodalPrior &prior) :
-        m_Prior(&prior)
-{}
+CMultimodalPrior::CModeMergeCallback::CModeMergeCallback(CMultimodalPrior& prior) : m_Prior(&prior) {
+}
 
-void CMultimodalPrior::CModeMergeCallback::operator()(std::size_t leftMergeIndex,
-                                                      std::size_t rightMergeIndex,
-                                                      std::size_t targetIndex) const
-{
-    LOG_TRACE("Merging modes with indices "
-              << leftMergeIndex << " " << rightMergeIndex);
+void CMultimodalPrior::CModeMergeCallback::
+operator()(std::size_t leftMergeIndex, std::size_t rightMergeIndex, std::size_t targetIndex) const {
+    LOG_TRACE("Merging modes with indices " << leftMergeIndex << " " << rightMergeIndex);
 
-    TModeVec &modes = m_Prior->m_Modes;
+    TModeVec& modes = m_Prior->m_Modes;
 
     // Create the new mode.
     TMode newMode(targetIndex, m_Prior->m_SeedPrior);
 
     double wl = 0.0;
     double wr = 0.0;
-    double n  = 0.0;
+    double n = 0.0;
     std::size_t nl = 0;
     std::size_t nr = 0;
     TDouble1Vec samples;
 
-    auto leftMode = std::find_if(modes.begin(), modes.end(),
-                                 CSetTools::CIndexInSet(leftMergeIndex));
-    if (leftMode != modes.end())
-    {
+    auto leftMode = std::find_if(modes.begin(), modes.end(), CSetTools::CIndexInSet(leftMergeIndex));
+    if (leftMode != modes.end()) {
         wl = leftMode->s_Prior->numberSamples();
         n += wl;
         TDouble1Vec leftSamples;
         leftMode->s_Prior->sampleMarginalLikelihood(MODE_MERGE_NUMBER_SAMPLES, leftSamples);
         nl = leftSamples.size();
         samples.insert(samples.end(), leftSamples.begin(), leftSamples.end());
-    }
-    else
-    {
+    } else {
         LOG_ERROR("Couldn't find mode for " << leftMergeIndex);
     }
 
-    auto rightMode = std::find_if(modes.begin(), modes.end(),
-                                  CSetTools::CIndexInSet(rightMergeIndex));
-    if (rightMode != modes.end())
-    {
+    auto rightMode = std::find_if(modes.begin(), modes.end(), CSetTools::CIndexInSet(rightMergeIndex));
+    if (rightMode != modes.end()) {
         wr = rightMode->s_Prior->numberSamples();
         n += wr;
         TDouble1Vec rightSamples;
         rightMode->s_Prior->sampleMarginalLikelihood(MODE_MERGE_NUMBER_SAMPLES, rightSamples);
         nr = rightSamples.size();
         samples.insert(samples.end(), rightSamples.begin(), rightSamples.end());
-    }
-    else
-    {
+    } else {
         LOG_ERROR("Couldn't find mode for " << rightMergeIndex);
     }
 
-    if (n > 0.0)
-    {
+    if (n > 0.0) {
         double nl_ = static_cast(nl);
         double nr_ = static_cast(nr);
         double Z = (nl_ * wl + nr_ * wr) / (nl_ + nr_);
@@ -913,20 +729,18 @@ void CMultimodalPrior::CModeMergeCallback::operator()(std::size_t leftMergeIndex
     LOG_TRACE("n = " << n << ", wl = " << wl << ", wr = " << wr);
 
     double ns = std::min(n, 4.0);
-    double s  = static_cast(samples.size());
+    double s = static_cast(samples.size());
 
     double seedWeight = ns / s;
     TDouble4Vec1Vec weights;
     weights.reserve(samples.size());
-    weights.resize(nl,      TDouble1Vec{wl * seedWeight});
+    weights.resize(nl, TDouble1Vec{wl * seedWeight});
     weights.resize(nl + nr, TDouble1Vec{wr * seedWeight});
     newMode.s_Prior->addSamples(TWeights::COUNT, samples, weights);
 
     double weight = (n - ns) / s;
-    if (weight > 0.0)
-    {
-        for (std::size_t i = 0u; i < weights.size(); ++i)
-        {
+    if (weight > 0.0) {
+        for (std::size_t i = 0u; i < weights.size(); ++i) {
             weights[i][0] *= weight / seedWeight;
         }
         newMode.s_Prior->addSamples(TWeights::COUNT, samples, weights);
@@ -936,8 +750,7 @@ void CMultimodalPrior::CModeMergeCallback::operator()(std::size_t leftMergeIndex
     TSizeSet mergedIndices;
     mergedIndices.insert(leftMergeIndex);
     mergedIndices.insert(rightMergeIndex);
-    modes.erase(std::remove_if(modes.begin(), modes.end(),
-                               CSetTools::CIndexInSet(mergedIndices)), modes.end());
+    modes.erase(std::remove_if(modes.begin(), modes.end(), CSetTools::CIndexInSet(mergedIndices)), modes.end());
 
     // Add the new mode.
     LOG_TRACE("Creating mode with index " << targetIndex);
@@ -947,6 +760,5 @@ void CMultimodalPrior::CModeMergeCallback::operator()(std::size_t leftMergeIndex
 
     LOG_TRACE("Merged modes");
 }
-
 }
 }
diff --git a/lib/maths/CMultinomialConjugate.cc b/lib/maths/CMultinomialConjugate.cc
index 527c35279b..73807e73dd 100644
--- a/lib/maths/CMultinomialConjugate.cc
+++ b/lib/maths/CMultinomialConjugate.cc
@@ -17,9 +17,9 @@
 #include 
 #include 
 #include 
-#include 
-#include 
 #include 
+#include 
+#include 
 #include 
 #include 
 #include 
@@ -38,27 +38,20 @@
 #include 
 #include 
 
-namespace ml
-{
-namespace maths
-{
+namespace ml {
+namespace maths {
 
-namespace
-{
+namespace {
 using TDoubleVec = std::vector;
 using TDoubleVecCItr = TDoubleVec::const_iterator;
 using TDouble7Vec = core::CSmallVector;
 
-namespace detail
-{
+namespace detail {
 using TDoubleDoublePr = std::pair;
 
 //! Truncate \p to fit into a signed integer.
-int truncate(std::size_t x)
-{
-    return x > static_cast(std::numeric_limits::max()) ?
-           std::numeric_limits::max() :
-           static_cast(x);
+int truncate(std::size_t x) {
+    return x > static_cast(std::numeric_limits::max()) ? std::numeric_limits::max() : static_cast(x);
 }
 
 //! This computes the cumulative density function of the predictive
@@ -89,57 +82,43 @@ int truncate(std::size_t x)
 //! and lower bounds: the upper bound assumes that all the additional mass
 //! is to the left of every sample, the lower bound assumes that all the
 //! additional mass is to the right of every sample.
-class CCdf : core::CNonCopyable
-{
-    public:
-        CCdf(const TDoubleVec &categories,
-             const TDoubleVec &concentrations,
-             double totalConcentration) :
-                m_Categories(categories),
-                m_Cdf(),
-                m_Pu(0.0)
-        {
-            m_Cdf.reserve(m_Categories.size() + 2u);
-
-            // Construct the c.d.f.
-            m_Cdf.push_back(0.0);
-            double r = 1.0 / static_cast(concentrations.size());
-            for (std::size_t i = 0u; i < concentrations.size(); ++i)
-            {
-                double p = concentrations[i] / totalConcentration;
-                m_Cdf.push_back(m_Cdf.back() + p);
-                m_Pu += r - p;
-            }
-            m_Cdf.push_back(m_Cdf.back());
+class CCdf : core::CNonCopyable {
+public:
+    CCdf(const TDoubleVec& categories, const TDoubleVec& concentrations, double totalConcentration)
+        : m_Categories(categories), m_Cdf(), m_Pu(0.0) {
+        m_Cdf.reserve(m_Categories.size() + 2u);
+
+        // Construct the c.d.f.
+        m_Cdf.push_back(0.0);
+        double r = 1.0 / static_cast(concentrations.size());
+        for (std::size_t i = 0u; i < concentrations.size(); ++i) {
+            double p = concentrations[i] / totalConcentration;
+            m_Cdf.push_back(m_Cdf.back() + p);
+            m_Pu += r - p;
         }
+        m_Cdf.push_back(m_Cdf.back());
+    }
 
-        void operator()(double x,
-                        double &lowerBound,
-                        double &upperBound) const
-        {
-            std::size_t category = std::upper_bound(m_Categories.begin(),
-                                                    m_Categories.end(),
-                                                    x) - m_Categories.begin();
+    void operator()(double x, double& lowerBound, double& upperBound) const {
+        std::size_t category = std::upper_bound(m_Categories.begin(), m_Categories.end(), x) - m_Categories.begin();
 
-            lowerBound = m_Cdf[category];
-            upperBound = m_Cdf[category] + m_Pu;
-        }
+        lowerBound = m_Cdf[category];
+        upperBound = m_Cdf[category] + m_Pu;
+    }
 
-        void dump(TDoubleVec &lowerBounds, TDoubleVec &upperBounds) const
-        {
-            lowerBounds.reserve(m_Cdf.size() - 2u);
-            upperBounds.reserve(m_Cdf.size() - 2u);
-            for (std::size_t i = 1u; i < m_Cdf.size() - 1u; ++i)
-            {
-                lowerBounds.push_back(m_Cdf[i]);
-                upperBounds.push_back(m_Cdf[i] + m_Pu);
-            }
+    void dump(TDoubleVec& lowerBounds, TDoubleVec& upperBounds) const {
+        lowerBounds.reserve(m_Cdf.size() - 2u);
+        upperBounds.reserve(m_Cdf.size() - 2u);
+        for (std::size_t i = 1u; i < m_Cdf.size() - 1u; ++i) {
+            lowerBounds.push_back(m_Cdf[i]);
+            upperBounds.push_back(m_Cdf[i] + m_Pu);
         }
+    }
 
-    private:
-        const TDoubleVec &m_Categories;
-        TDoubleVec m_Cdf;
-        double m_Pu;
+private:
+    const TDoubleVec& m_Categories;
+    TDoubleVec m_Cdf;
+    double m_Pu;
 };
 
 //! This computes the complement of the cumulative density function
@@ -158,57 +137,43 @@ class CCdf : core::CNonCopyable
 //! and lower bounds: the upper bound assumes that all the additional mass
 //! is to the right of every sample, the lower bound assumes that all the
 //! additional mass is to the left of every sample.
-class CCdfComplement : core::CNonCopyable
-{
-    public:
-        CCdfComplement(const TDoubleVec &categories,
-                       const TDoubleVec &concentrations,
-                       double totalConcentration) :
-                m_Categories(categories),
-                m_CdfComplement(),
-                m_Pu(0.0)
-        {
-            m_CdfComplement.reserve(m_Categories.size() + 2u);
-
-            // Construct the c.d.f.
-            m_CdfComplement.push_back(0.0);
-            double r = 1.0 / static_cast(concentrations.size());
-            for (std::size_t i = concentrations.size(); i > 0; --i)
-            {
-                double p = concentrations[i-1] / totalConcentration;
-                m_CdfComplement.push_back(m_CdfComplement.back() + p);
-                m_Pu += r - p;
-            }
-            m_CdfComplement.push_back(m_CdfComplement.back());
+class CCdfComplement : core::CNonCopyable {
+public:
+    CCdfComplement(const TDoubleVec& categories, const TDoubleVec& concentrations, double totalConcentration)
+        : m_Categories(categories), m_CdfComplement(), m_Pu(0.0) {
+        m_CdfComplement.reserve(m_Categories.size() + 2u);
+
+        // Construct the c.d.f.
+        m_CdfComplement.push_back(0.0);
+        double r = 1.0 / static_cast(concentrations.size());
+        for (std::size_t i = concentrations.size(); i > 0; --i) {
+            double p = concentrations[i - 1] / totalConcentration;
+            m_CdfComplement.push_back(m_CdfComplement.back() + p);
+            m_Pu += r - p;
         }
+        m_CdfComplement.push_back(m_CdfComplement.back());
+    }
 
-        void operator()(double x,
-                        double &lowerBound,
-                        double &upperBound) const
-        {
-            std::size_t category = std::lower_bound(m_Categories.begin(),
-                                                    m_Categories.end(),
-                                                    x) - m_Categories.begin();
+    void operator()(double x, double& lowerBound, double& upperBound) const {
+        std::size_t category = std::lower_bound(m_Categories.begin(), m_Categories.end(), x) - m_Categories.begin();
 
-            lowerBound = m_CdfComplement[category + 1];
-            upperBound = m_CdfComplement[category + 1] + m_Pu;
-        }
+        lowerBound = m_CdfComplement[category + 1];
+        upperBound = m_CdfComplement[category + 1] + m_Pu;
+    }
 
-        void dump(TDoubleVec &lowerBounds, TDoubleVec &upperBounds) const
-        {
-            lowerBounds.reserve(m_CdfComplement.size() - 2u);
-            upperBounds.reserve(m_CdfComplement.size() - 2u);
-            for (std::size_t i = 1u; i < m_CdfComplement.size() - 1u; ++i)
-            {
-                lowerBounds.push_back(m_CdfComplement[i]);
-                upperBounds.push_back(m_CdfComplement[i] + m_Pu);
-            }
+    void dump(TDoubleVec& lowerBounds, TDoubleVec& upperBounds) const {
+        lowerBounds.reserve(m_CdfComplement.size() - 2u);
+        upperBounds.reserve(m_CdfComplement.size() - 2u);
+        for (std::size_t i = 1u; i < m_CdfComplement.size() - 1u; ++i) {
+            lowerBounds.push_back(m_CdfComplement[i]);
+            upperBounds.push_back(m_CdfComplement[i] + m_Pu);
         }
+    }
 
-    private:
-        const TDoubleVec &m_Categories;
-        TDoubleVec m_CdfComplement;
-        double m_Pu;
+private:
+    const TDoubleVec& m_Categories;
+    TDoubleVec m_CdfComplement;
+    double m_Pu;
 };
 
 //! Get the number of samples of the marginal priors to use as a
@@ -216,16 +181,10 @@ class CCdfComplement : core::CNonCopyable
 //!
 //! This was determined, empirically, to give reasonable errors
 //! in the calculation of the less likely probabilities.
-std::size_t numberPriorSamples(double x)
-{
-    static const double THRESHOLDS[] =
-        {
-            100.0, 1000.0, 10000.0, boost::numeric::bounds::highest()
-        };
-    static const std::size_t NUMBERS[] = { 7u, 5u, 3u, 1u };
-    return NUMBERS[std::lower_bound(boost::begin(THRESHOLDS),
-                                    boost::end(THRESHOLDS), x)
-                   - boost::begin(THRESHOLDS)];
+std::size_t numberPriorSamples(double x) {
+    static const double THRESHOLDS[] = {100.0, 1000.0, 10000.0, boost::numeric::bounds::highest()};
+    static const std::size_t NUMBERS[] = {7u, 5u, 3u, 1u};
+    return NUMBERS[std::lower_bound(boost::begin(THRESHOLDS), boost::end(THRESHOLDS), x) - boost::begin(THRESHOLDS)];
 }
 
 //! Generate \p numberSamples samples of a beta R.V. with alpha \p a
@@ -239,40 +198,31 @@ std::size_t numberPriorSamples(double x)
 //! 
 //!   \f$\displaystyle \frac{1}{B(\alpha,\beta)}x^{\alpha-1}(1-x)^{beta-1}\f$
 //! 
-void generateBetaSamples(double a, - double b, - std::size_t numberSamples, - TDouble7Vec &samples) -{ +void generateBetaSamples(double a, double b, std::size_t numberSamples, TDouble7Vec& samples) { samples.clear(); samples.reserve(numberSamples); double mean = a / (a + b); - if (numberSamples == 1 || a == 0.0 || b == 0.0) - { + if (numberSamples == 1 || a == 0.0 || b == 0.0) { samples.push_back(mean); return; } - try - { + try { boost::math::beta_distribution<> beta(a, b); boost::math::beta_distribution<> betaAlphaPlus1(a + 1, b); double dq = 1.0 / static_cast(numberSamples); double q = dq; double f = 0.0; mean /= dq; - for (std::size_t i = 1u; i < numberSamples; ++i, q += dq) - { + for (std::size_t i = 1u; i < numberSamples; ++i, q += dq) { double xq = boost::math::quantile(beta, q); double fq = boost::math::cdf(betaAlphaPlus1, xq); samples.push_back(mean * (fq - f)); f = fq; } samples.push_back(mean * (1.0 - f)); - } - catch (const std::exception &) - { + } catch (const std::exception&) { samples.clear(); samples.push_back(mean); } @@ -296,69 +246,50 @@ const std::string NUMBER_SAMPLES_TAG("e"); //const std::string MAXIMUM_TAG("g"); No longer used const std::string DECAY_RATE_TAG("h"); const std::string EMPTY_STRING; - } -CMultinomialConjugate::CMultinomialConjugate() : - m_NumberAvailableCategories(0), - m_TotalConcentration(0.0) -{} +CMultinomialConjugate::CMultinomialConjugate() : m_NumberAvailableCategories(0), m_TotalConcentration(0.0) { +} CMultinomialConjugate::CMultinomialConjugate(std::size_t maximumNumberOfCategories, - const TDoubleVec &categories, - const TDoubleVec &concentrations, - double decayRate) : - CPrior(maths_t::E_DiscreteData, decayRate), - m_NumberAvailableCategories( detail::truncate(maximumNumberOfCategories) - - detail::truncate(categories.size())), - m_Categories(categories), - m_Concentrations(concentrations), - m_TotalConcentration(0.0) -{ + const TDoubleVec& categories, + const TDoubleVec& concentrations, + double decayRate) + : CPrior(maths_t::E_DiscreteData, decayRate), + m_NumberAvailableCategories(detail::truncate(maximumNumberOfCategories) - detail::truncate(categories.size())), + m_Categories(categories), + m_Concentrations(concentrations), + m_TotalConcentration(0.0) { m_Concentrations.resize(m_Categories.size(), NON_INFORMATIVE_CONCENTRATION); - m_TotalConcentration = std::accumulate(m_Concentrations.begin(), - m_Concentrations.end(), 0.0); + m_TotalConcentration = std::accumulate(m_Concentrations.begin(), m_Concentrations.end(), 0.0); this->numberSamples(m_TotalConcentration); } -CMultinomialConjugate::CMultinomialConjugate(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser) : - CPrior(maths_t::E_DiscreteData, params.s_DecayRate), - m_NumberAvailableCategories(0), - m_TotalConcentration(0.0) -{ +CMultinomialConjugate::CMultinomialConjugate(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) + : CPrior(maths_t::E_DiscreteData, params.s_DecayRate), m_NumberAvailableCategories(0), m_TotalConcentration(0.0) { traverser.traverseSubLevel(boost::bind(&CMultinomialConjugate::acceptRestoreTraverser, this, _1)); } -bool CMultinomialConjugate::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name = traverser.name(); - RESTORE_SETUP_TEARDOWN(DECAY_RATE_TAG, - double decayRate, - core::CStringUtils::stringToType(traverser.value(), decayRate), - this->decayRate(decayRate)) +bool CMultinomialConjugate::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); + RESTORE_SETUP_TEARDOWN( + DECAY_RATE_TAG, double decayRate, core::CStringUtils::stringToType(traverser.value(), decayRate), this->decayRate(decayRate)) RESTORE_BUILT_IN(NUMBER_AVAILABLE_CATEGORIES_TAG, m_NumberAvailableCategories) - if (!name.empty() && name[0] == CATEGORY_TAG[0]) - { + if (!name.empty() && name[0] == CATEGORY_TAG[0]) { // Categories have been split across multiple fields b0, b1, etc - if (core::CPersistUtils::fromString(traverser.value(), m_Categories, - core::CPersistUtils::DELIMITER, - core::CPersistUtils::PAIR_DELIMITER, true) == false) - { + if (core::CPersistUtils::fromString( + traverser.value(), m_Categories, core::CPersistUtils::DELIMITER, core::CPersistUtils::PAIR_DELIMITER, true) == false) { LOG_ERROR("Invalid categories in split " << traverser.value()); return false; } continue; } - if (!name.empty() && name[0] == CONCENTRATION_TAG[0]) - { + if (!name.empty() && name[0] == CONCENTRATION_TAG[0]) { // Concentrations have been split across multiple fields c0, c1, c2, etc - if (core::CPersistUtils::fromString(traverser.value(), m_Concentrations, - core::CPersistUtils::DELIMITER, - core::CPersistUtils::PAIR_DELIMITER, true) == false) - { + if (core::CPersistUtils::fromString( + traverser.value(), m_Concentrations, core::CPersistUtils::DELIMITER, core::CPersistUtils::PAIR_DELIMITER, true) == + false) { LOG_ERROR("Invalid concentrations in split " << traverser.value()); return false; } @@ -369,16 +300,14 @@ bool CMultinomialConjugate::acceptRestoreTraverser(core::CStateRestoreTraverser double numberSamples, core::CStringUtils::stringToType(traverser.value(), numberSamples), this->numberSamples(numberSamples)) - } - while (traverser.next()); + } while (traverser.next()); this->shrink(); return true; } -void CMultinomialConjugate::swap(CMultinomialConjugate &other) -{ +void CMultinomialConjugate::swap(CMultinomialConjugate& other) { this->CPrior::swap(other); std::swap(m_NumberAvailableCategories, other.m_NumberAvailableCategories); m_Categories.swap(other.m_Categories); @@ -386,66 +315,44 @@ void CMultinomialConjugate::swap(CMultinomialConjugate &other) std::swap(m_TotalConcentration, other.m_TotalConcentration); } -CMultinomialConjugate -CMultinomialConjugate::nonInformativePrior(std::size_t maximumNumberOfCategories, - double decayRate) -{ - return CMultinomialConjugate(maximumNumberOfCategories, - TDoubleVec(), - TDoubleVec(), - decayRate); +CMultinomialConjugate CMultinomialConjugate::nonInformativePrior(std::size_t maximumNumberOfCategories, double decayRate) { + return CMultinomialConjugate(maximumNumberOfCategories, TDoubleVec(), TDoubleVec(), decayRate); } -CMultinomialConjugate::EPrior CMultinomialConjugate::type() const -{ +CMultinomialConjugate::EPrior CMultinomialConjugate::type() const { return E_Multinomial; } -CMultinomialConjugate *CMultinomialConjugate::clone() const -{ +CMultinomialConjugate* CMultinomialConjugate::clone() const { return new CMultinomialConjugate(*this); } -void CMultinomialConjugate::setToNonInformative(double /*offset*/, - double decayRate) -{ - *this = nonInformativePrior( m_NumberAvailableCategories - + detail::truncate(m_Categories.size()), - decayRate); +void CMultinomialConjugate::setToNonInformative(double /*offset*/, double decayRate) { + *this = nonInformativePrior(m_NumberAvailableCategories + detail::truncate(m_Categories.size()), decayRate); } -bool CMultinomialConjugate::needsOffset() const -{ +bool CMultinomialConjugate::needsOffset() const { return false; } -double CMultinomialConjugate::adjustOffset(const TWeightStyleVec &/*weightStyles*/, - const TDouble1Vec &/*samples*/, - const TDouble4Vec1Vec &/*weights*/) -{ +double CMultinomialConjugate::adjustOffset(const TWeightStyleVec& /*weightStyles*/, + const TDouble1Vec& /*samples*/, + const TDouble4Vec1Vec& /*weights*/) { return 1.0; } -double CMultinomialConjugate::offset() const -{ +double CMultinomialConjugate::offset() const { return 0.0; } -void CMultinomialConjugate::addSamples(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights) -{ - if (samples.empty()) - { +void CMultinomialConjugate::addSamples(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights) { + if (samples.empty()) { return; } - if (samples.size() != weights.size()) - { - LOG_ERROR("Mismatch in samples '" - << core::CContainerPrinter::print(samples) - << "' and weights '" - << core::CContainerPrinter::print(weights) << "'"); + if (samples.size() != weights.size()) { + LOG_ERROR("Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '" + << core::CContainerPrinter::print(weights) << "'"); return; } @@ -475,63 +382,49 @@ void CMultinomialConjugate::addSamples(const TWeightStyleVec &weightStyles, // i.e. n(i), so for example updating with {(x, 2)} is equivalent to // updating with {x, x}. - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { double x = samples[i]; - if (CMathsFuncs::isNan(x)) - { + if (CMathsFuncs::isNan(x)) { LOG_ERROR("Discarding " << x); continue; } double n = maths_t::countForUpdate(weightStyles, weights[i]); - if (!CMathsFuncs::isFinite(n)) - { + if (!CMathsFuncs::isFinite(n)) { LOG_ERROR("Bad count weight " << n); continue; } m_TotalConcentration += n; - std::size_t category = std::lower_bound(m_Categories.begin(), - m_Categories.end(), - x) - m_Categories.begin(); - if (category == m_Categories.size() || m_Categories[category] != x) - { - m_NumberAvailableCategories = - std::max(m_NumberAvailableCategories - 1, -1); - if (m_NumberAvailableCategories < 0) - { + std::size_t category = std::lower_bound(m_Categories.begin(), m_Categories.end(), x) - m_Categories.begin(); + if (category == m_Categories.size() || m_Categories[category] != x) { + m_NumberAvailableCategories = std::max(m_NumberAvailableCategories - 1, -1); + if (m_NumberAvailableCategories < 0) { continue; } // This is infrequent so the amortized cost is low. m_Categories.insert(m_Categories.begin() + category, x); - m_Concentrations.insert(m_Concentrations.begin() + category, - NON_INFORMATIVE_CONCENTRATION); + m_Concentrations.insert(m_Concentrations.begin() + category, NON_INFORMATIVE_CONCENTRATION); this->shrink(); } m_Concentrations[category] += n; } - LOG_TRACE("samples = " << core::CContainerPrinter::print(samples) - << ", m_NumberAvailableCategories = " << m_NumberAvailableCategories - << ", m_Categories = " << core::CContainerPrinter::print(m_Categories) - << ", m_Concentrations = " << core::CContainerPrinter::print(m_Concentrations) - << ", m_TotalConcentration = " << m_TotalConcentration); + LOG_TRACE("samples = " << core::CContainerPrinter::print(samples) << ", m_NumberAvailableCategories = " << m_NumberAvailableCategories + << ", m_Categories = " << core::CContainerPrinter::print(m_Categories) << ", m_Concentrations = " + << core::CContainerPrinter::print(m_Concentrations) << ", m_TotalConcentration = " << m_TotalConcentration); } -void CMultinomialConjugate::propagateForwardsByTime(double time) -{ - if (!CMathsFuncs::isFinite(time) || time < 0.0) - { +void CMultinomialConjugate::propagateForwardsByTime(double time) { + if (!CMathsFuncs::isFinite(time) || time < 0.0) { LOG_ERROR("Can't propagate model backwards in time"); return; } - if (this->isNonInformative()) - { + if (this->isNonInformative()) { // Nothing to be done. return; } @@ -551,12 +444,9 @@ void CMultinomialConjugate::propagateForwardsByTime(double time) // Thus the mean is unchanged and for large a0 the variance is // increased by very nearly 1 / f. - double factor = std::min((alpha * m_TotalConcentration - + (1.0 - alpha) * NON_INFORMATIVE_CONCENTRATION) - / m_TotalConcentration, 1.0); + double factor = std::min((alpha * m_TotalConcentration + (1.0 - alpha) * NON_INFORMATIVE_CONCENTRATION) / m_TotalConcentration, 1.0); - for (std::size_t i = 0u; i < m_Concentrations.size(); ++i) - { + for (std::size_t i = 0u; i < m_Concentrations.size(); ++i) { m_Concentrations[i] *= factor; } @@ -564,28 +454,21 @@ void CMultinomialConjugate::propagateForwardsByTime(double time) this->numberSamples(this->numberSamples() * factor); - LOG_TRACE("factor = " << factor - << ", m_Concentrations = " << core::CContainerPrinter::print(m_Concentrations) - << ", m_TotalConcentration = " << m_TotalConcentration - << ", numberSamples = " << this->numberSamples()); + LOG_TRACE("factor = " << factor << ", m_Concentrations = " << core::CContainerPrinter::print(m_Concentrations) + << ", m_TotalConcentration = " << m_TotalConcentration << ", numberSamples = " << this->numberSamples()); } -CMultinomialConjugate::TDoubleDoublePr -CMultinomialConjugate::marginalLikelihoodSupport() const -{ +CMultinomialConjugate::TDoubleDoublePr CMultinomialConjugate::marginalLikelihoodSupport() const { // Strictly speaking for a particular likelihood this is the // set of discrete values or categories, but we are interested // in the support for the possible discrete values which can // be any real numbers. - return std::make_pair(boost::numeric::bounds::lowest(), - boost::numeric::bounds::highest()); + return std::make_pair(boost::numeric::bounds::lowest(), boost::numeric::bounds::highest()); } -double CMultinomialConjugate::marginalLikelihoodMean() const -{ - if (this->isNonInformative()) - { +double CMultinomialConjugate::marginalLikelihoodMean() const { + if (this->isNonInformative()) { return 0.0; } @@ -597,18 +480,14 @@ double CMultinomialConjugate::marginalLikelihoodMean() const TMeanAccumulator result; TDoubleVec probabilities = this->probabilities(); - for (std::size_t i = 0u; i < m_Categories.size(); ++i) - { + for (std::size_t i = 0u; i < m_Categories.size(); ++i) { result.add(m_Categories[i], probabilities[i]); } return CBasicStatistics::mean(result); } -double CMultinomialConjugate::marginalLikelihoodMode(const TWeightStyleVec &/*weightStyles*/, - const TDouble4Vec &/*weights*/) const -{ - if (this->isNonInformative()) - { +double CMultinomialConjugate::marginalLikelihoodMode(const TWeightStyleVec& /*weightStyles*/, const TDouble4Vec& /*weights*/) const { + if (this->isNonInformative()) { return 0.0; } @@ -616,10 +495,8 @@ double CMultinomialConjugate::marginalLikelihoodMode(const TWeightStyleVec &/*we double modeConcentration = m_Concentrations[0]; std::size_t mode = 0u; - for (std::size_t i = 1u; i < m_Concentrations.size(); ++i) - { - if (m_Concentrations[i] > modeConcentration) - { + for (std::size_t i = 1u; i < m_Concentrations.size(); ++i) { + if (m_Concentrations[i] > modeConcentration) { modeConcentration = m_Concentrations[i]; mode = i; } @@ -628,13 +505,10 @@ double CMultinomialConjugate::marginalLikelihoodMode(const TWeightStyleVec &/*we return m_Categories[mode]; } -double CMultinomialConjugate::marginalLikelihoodVariance(const TWeightStyleVec &/*weightStyles*/, - const TDouble4Vec &/*weights*/) const -{ +double CMultinomialConjugate::marginalLikelihoodVariance(const TWeightStyleVec& /*weightStyles*/, const TDouble4Vec& /*weights*/) const { using TMeanVarAccumulator = CBasicStatistics::SSampleMeanVar::TAccumulator; - if (this->isNonInformative()) - { + if (this->isNonInformative()) { return boost::numeric::bounds::highest(); } @@ -646,20 +520,16 @@ double CMultinomialConjugate::marginalLikelihoodVariance(const TWeightStyleVec & TMeanVarAccumulator result; TDoubleVec probabilities = this->probabilities(); - for (std::size_t i = 0u; i < m_Categories.size(); ++i) - { + for (std::size_t i = 0u; i < m_Categories.size(); ++i) { result.add(m_Categories[i], probabilities[i]); } return CBasicStatistics::variance(result); } -CMultinomialConjugate::TDoubleDoublePr -CMultinomialConjugate::marginalLikelihoodConfidenceInterval(double percentage, - const TWeightStyleVec &/*weightStyles*/, - const TDouble4Vec &/*weights*/) const -{ - if (this->isNonInformative()) - { +CMultinomialConjugate::TDoubleDoublePr CMultinomialConjugate::marginalLikelihoodConfidenceInterval(double percentage, + const TWeightStyleVec& /*weightStyles*/, + const TDouble4Vec& /*weights*/) const { + if (this->isNonInformative()) { return this->marginalLikelihoodSupport(); } @@ -672,25 +542,19 @@ CMultinomialConjugate::marginalLikelihoodConfidenceInterval(double percentage, quantiles.reserve(m_Concentrations.size()); double pU = 0.0; double pCumulative = 0.0; - for (std::size_t i = 0u; i < m_Concentrations.size(); ++i) - { + for (std::size_t i = 0u; i < m_Concentrations.size(); ++i) { double p = m_Concentrations[i] / m_TotalConcentration; pCumulative += p; quantiles.push_back(pCumulative); pU += 1.0 / static_cast(m_Concentrations.size()) - p; } double q1 = (1.0 - percentage) / 2.0; - ptrdiff_t i1 = std::lower_bound(quantiles.begin(), - quantiles.end(), - q1 - pU) - quantiles.begin(); + ptrdiff_t i1 = std::lower_bound(quantiles.begin(), quantiles.end(), q1 - pU) - quantiles.begin(); double x1 = m_Categories[i1]; double x2 = x1; - if (percentage > 0.0) - { + if (percentage > 0.0) { double q2 = (1.0 + percentage) / 2.0; - ptrdiff_t i2 = std::min(std::lower_bound(quantiles.begin(), - quantiles.end(), - q2 + pU) - quantiles.begin(), + ptrdiff_t i2 = std::min(std::lower_bound(quantiles.begin(), quantiles.end(), q2 + pU) - quantiles.begin(), static_cast(quantiles.size()) - 1); x2 = m_Categories[i2]; } @@ -701,31 +565,24 @@ CMultinomialConjugate::marginalLikelihoodConfidenceInterval(double percentage, return std::make_pair(x1, x2); } -maths_t::EFloatingPointErrorStatus -CMultinomialConjugate::jointLogMarginalLikelihood(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &result) const -{ +maths_t::EFloatingPointErrorStatus CMultinomialConjugate::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& result) const { result = 0.0; - if (samples.empty()) - { + if (samples.empty()) { LOG_ERROR("Can't compute likelihood for empty sample set"); return maths_t::E_FpFailed; } - if (samples.size() != weights.size()) - { - LOG_ERROR("Mismatch in samples '" - << core::CContainerPrinter::print(samples) - << "' and weights '" - << core::CContainerPrinter::print(weights) << "'"); + if (samples.size() != weights.size()) { + LOG_ERROR("Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '" + << core::CContainerPrinter::print(weights) << "'"); return maths_t::E_FpFailed; } - if (this->isNonInformative()) - { + if (this->isNonInformative()) { // The non-informative likelihood is improper and effectively // zero everywhere. We use minus max double because // log(0) = HUGE_VALUE, which causes problems for Windows. @@ -762,70 +619,52 @@ CMultinomialConjugate::jointLogMarginalLikelihood(const TWeightStyleVec &weightS TDoubleDoubleMap categoryCounts; double numberSamples = 0.0; - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { double n = maths_t::countForUpdate(weightStyles, weights[i]); numberSamples += n; categoryCounts[samples[i]] += n; } - try - { - LOG_TRACE("# samples = " << numberSamples - << ", total concentration = " << m_TotalConcentration); + try { + LOG_TRACE("# samples = " << numberSamples << ", total concentration = " << m_TotalConcentration); - result = boost::math::lgamma(numberSamples + 1.0) - + boost::math::lgamma(m_TotalConcentration) - - boost::math::lgamma(m_TotalConcentration + numberSamples); + result = boost::math::lgamma(numberSamples + 1.0) + boost::math::lgamma(m_TotalConcentration) - + boost::math::lgamma(m_TotalConcentration + numberSamples); - for (TDoubleDoubleMapCItr countItr = categoryCounts.begin(); - countItr != categoryCounts.end(); - ++countItr) - { + for (TDoubleDoubleMapCItr countItr = categoryCounts.begin(); countItr != categoryCounts.end(); ++countItr) { double category = countItr->first; double count = countItr->second; LOG_TRACE("category = " << category << ", count = " << count); result -= boost::math::lgamma(countItr->second + 1.0); - std::size_t index = std::lower_bound(m_Categories.begin(), - m_Categories.end(), - category) - m_Categories.begin(); - if (index < m_Categories.size() && m_Categories[index] == category) - { + std::size_t index = std::lower_bound(m_Categories.begin(), m_Categories.end(), category) - m_Categories.begin(); + if (index < m_Categories.size() && m_Categories[index] == category) { LOG_TRACE("concentration = " << m_Concentrations[index]); - result += boost::math::lgamma(m_Concentrations[index] + count) - - boost::math::lgamma(m_Concentrations[index]); + result += boost::math::lgamma(m_Concentrations[index] + count) - boost::math::lgamma(m_Concentrations[index]); } } - } - catch (const std::exception &e) - { - LOG_ERROR("Unable to compute joint log likelihood: " << e.what() - << ", samples = " << core::CContainerPrinter::print(samples) - << ", categories = " << core::CContainerPrinter::print(m_Categories) - << ", concentrations = " << core::CContainerPrinter::print(m_Concentrations)); + } catch (const std::exception& e) { + LOG_ERROR("Unable to compute joint log likelihood: " << e.what() << ", samples = " << core::CContainerPrinter::print(samples) + << ", categories = " << core::CContainerPrinter::print(m_Categories) + << ", concentrations = " << core::CContainerPrinter::print(m_Concentrations)); return maths_t::E_FpFailed; } LOG_TRACE("result = " << result); maths_t::EFloatingPointErrorStatus status = CMathsFuncs::fpStatus(result); - if (status & maths_t::E_FpFailed) - { + if (status & maths_t::E_FpFailed) { LOG_ERROR("samples = " << core::CContainerPrinter::print(samples)); LOG_ERROR("weights = " << core::CContainerPrinter::print(weights)); } return status; } -void CMultinomialConjugate::sampleMarginalLikelihood(std::size_t numberSamples, - TDouble1Vec &samples) const -{ +void CMultinomialConjugate::sampleMarginalLikelihood(std::size_t numberSamples, TDouble1Vec& samples) const { samples.clear(); - if (numberSamples == 0 || this->isNonInformative()) - { + if (numberSamples == 0 || this->isNonInformative()) { return; } @@ -846,8 +685,7 @@ void CMultinomialConjugate::sampleMarginalLikelihood(std::size_t numberSamples, TDoubleVec probabilities; probabilities.reserve(m_Categories.size()); - for (std::size_t i = 0u; i < m_Concentrations.size(); ++i) - { + for (std::size_t i = 0u; i < m_Concentrations.size(); ++i) { double p = m_Concentrations[i] / m_TotalConcentration; probabilities.push_back(p); } @@ -855,32 +693,26 @@ void CMultinomialConjugate::sampleMarginalLikelihood(std::size_t numberSamples, CSampling::TSizeVec sampling; CSampling::weightedSample(numberSamples, probabilities, sampling); - if (sampling.size() != m_Categories.size()) - { + if (sampling.size() != m_Categories.size()) { LOG_ERROR("Failed to sample marginal likelihood"); return; } samples.reserve(numberSamples); - for (std::size_t i = 0u; i < m_Categories.size(); ++i) - { - std::fill_n(std::back_inserter(samples), - sampling[i], - m_Categories[i]); + for (std::size_t i = 0u; i < m_Categories.size(); ++i) { + std::fill_n(std::back_inserter(samples), sampling[i], m_Categories[i]); } LOG_TRACE("samples = " << core::CContainerPrinter::print(samples)); } -bool CMultinomialConjugate::minusLogJointCdf(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound) const -{ +bool CMultinomialConjugate::minusLogJointCdf(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound) const { lowerBound = upperBound = 0.0; - if (samples.empty()) - { + if (samples.empty()) { LOG_ERROR("Can't compute distribution for empty sample set"); return false; } @@ -915,8 +747,7 @@ bool CMultinomialConjugate::minusLogJointCdf(const TWeightStyleVec &weightStyles static const double MAX_DOUBLE = boost::numeric::bounds::highest(); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { double x = samples[i]; double n = maths_t::count(weightStyles, weights[i]); @@ -926,31 +757,25 @@ bool CMultinomialConjugate::minusLogJointCdf(const TWeightStyleVec &weightStyles // We need to handle the case that the c.d.f. is zero and hence // the log blows up. - lowerBound = sampleLowerBound == 0.0 || lowerBound == MAX_DOUBLE ? - MAX_DOUBLE : - lowerBound - n * std::log(sampleLowerBound); - upperBound = sampleUpperBound == 0.0 || upperBound == MAX_DOUBLE ? - MAX_DOUBLE : - upperBound - n * std::log(sampleUpperBound); + lowerBound = sampleLowerBound == 0.0 || lowerBound == MAX_DOUBLE ? MAX_DOUBLE : lowerBound - n * std::log(sampleLowerBound); + upperBound = sampleUpperBound == 0.0 || upperBound == MAX_DOUBLE ? MAX_DOUBLE : upperBound - n * std::log(sampleUpperBound); } return true; } -bool CMultinomialConjugate::minusLogJointCdfComplement(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound) const -{ +bool CMultinomialConjugate::minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound) const { // See minusLogJointCdf for the rationale behind this approximation. detail::CCdfComplement cdfComplement(m_Categories, m_Concentrations, m_TotalConcentration); static const double MAX_DOUBLE = boost::numeric::bounds::highest(); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { double x = samples[i]; double n = maths_t::count(weightStyles, weights[i]); @@ -960,30 +785,24 @@ bool CMultinomialConjugate::minusLogJointCdfComplement(const TWeightStyleVec &we // We need to handle the case that the c.d.f. is zero and hence // the log blows up. - lowerBound = sampleLowerBound == 0.0 || lowerBound == MAX_DOUBLE ? - MAX_DOUBLE : - lowerBound - n * std::log(sampleLowerBound); - upperBound = sampleUpperBound == 0.0 || upperBound == MAX_DOUBLE ? - MAX_DOUBLE : - upperBound - n * std::log(sampleUpperBound); + lowerBound = sampleLowerBound == 0.0 || lowerBound == MAX_DOUBLE ? MAX_DOUBLE : lowerBound - n * std::log(sampleLowerBound); + upperBound = sampleUpperBound == 0.0 || upperBound == MAX_DOUBLE ? MAX_DOUBLE : upperBound - n * std::log(sampleUpperBound); } return true; } bool CMultinomialConjugate::probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound, - maths_t::ETail &tail) const -{ + const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound, + maths_t::ETail& tail) const { lowerBound = upperBound = 0.0; tail = maths_t::E_UndeterminedTail; - if (samples.empty()) - { + if (samples.empty()) { LOG_ERROR("Can't compute distribution for empty sample set"); return false; } @@ -1006,400 +825,318 @@ bool CMultinomialConjugate::probabilityOfLessLikelySamples(maths_t::EProbability // of constant probability, which might be computable by the method // of Lagrange multipliers.) - switch (calculation) - { - case maths_t::E_OneSidedBelow: - { - // See minusLogJointCdf for a discussion of the calculation - // of the marginal c.d.f. for a single sample. - - CJointProbabilityOfLessLikelySamples jointLowerBound; - CJointProbabilityOfLessLikelySamples jointUpperBound; - - detail::CCdf cdf(m_Categories, m_Concentrations, m_TotalConcentration); - for (std::size_t i = 0u; i < samples.size(); ++i) - { - double x = samples[i]; - double n = maths_t::count(weightStyles, weights[i]); - double sampleLowerBound, sampleUpperBound; - cdf(x, sampleLowerBound, sampleUpperBound); - jointLowerBound.add(sampleLowerBound, n); - jointUpperBound.add(sampleUpperBound, n); - } + switch (calculation) { + case maths_t::E_OneSidedBelow: { + // See minusLogJointCdf for a discussion of the calculation + // of the marginal c.d.f. for a single sample. + + CJointProbabilityOfLessLikelySamples jointLowerBound; + CJointProbabilityOfLessLikelySamples jointUpperBound; + + detail::CCdf cdf(m_Categories, m_Concentrations, m_TotalConcentration); + for (std::size_t i = 0u; i < samples.size(); ++i) { + double x = samples[i]; + double n = maths_t::count(weightStyles, weights[i]); + double sampleLowerBound, sampleUpperBound; + cdf(x, sampleLowerBound, sampleUpperBound); + jointLowerBound.add(sampleLowerBound, n); + jointUpperBound.add(sampleUpperBound, n); + } - if ( !jointLowerBound.calculate(lowerBound) - || !jointUpperBound.calculate(upperBound)) - { - LOG_ERROR("Unable to compute probability for " - << core::CContainerPrinter::print(samples) - << ": " << jointLowerBound - << ", " << jointUpperBound); - return false; + if (!jointLowerBound.calculate(lowerBound) || !jointUpperBound.calculate(upperBound)) { + LOG_ERROR("Unable to compute probability for " << core::CContainerPrinter::print(samples) << ": " << jointLowerBound << ", " + << jointUpperBound); + return false; + } + tail = maths_t::E_LeftTail; + } break; + + case maths_t::E_TwoSided: { + // The probability of a less likely category is given by: + // E[ Sum_{p(j) <= p(i)}{ p(j) } ] + // + // where the expectation is taken over the prior for the distribution + // probabilities. This is hard to compute because the terms in the sum + // vary as the probabilities vary. We approximate this by taking the + // expectation over the marginal for each p(i). In particular, we compute: + // P(i) = Sum_{E[p(j)] <= E[p(i)]}{ E[p(j)] } + // + // Here, P(i) is the probability of less likely category and the + // expected probability of the i'th category is: + // E[p(i)] = a(i) / Sum_j{ a(j) } (1) + // + // where, a(i) are the concentration parameters of the prior. So, (1) + // reduces to: + // Sum_{j:a(j)<=a(i)}{ E[p(j)] } + // + // We can think of P(.) as a function of probability, i.e. if the + // probability of a category were p then its corresponding P(.) would + // be: + // P(argmin_{i : E[p(i)] >= p}{ E[p(i)] }) + // + // Given this definition we can compute: + // E[ P(p) ] (2) + // + // where the expectation is taken over the marginal for each probability. + // This can be computed exactly by noting that marginal for p(i) is + // beta distributed with alpha = a(i) and beta = Sum_j{ a(j) } - a(i). + // However, this requires us to compute quantiles at every E[p(i)] which + // would be expensive for a large number of categories. Instead we + // approximate the probability by using a fixed number of samples from + // the marginal and computing the probability by taking the mean of these. + // Finally, note that if E[p(i)] and E[p(j)] are very close we want P(i) + // and P(j) to be close, but they can in fact be very different if there + // are many probabilities E[p(k)] which satisfy: + // E[p(i)] <= E[p(k)] <= E[p(j)], + // + // To avoid this problem we scale all probabilities by (1 + eps), for + // small eps > 0, when computing (2). + // + // In the case that the number of categories has overflowed we derive a + // sharp lower bound by considering the case that all the additional + // mass is in one category which is not in the sample set. In this case + // we have three sets of categories: + // U = "Uncounted categories" + // L = {i : i is counted and P(i) < P(U)} + // M = {i : i is counted and P(i) >= P(U)} + // + // where, clearly P(U) is the extra mass. If X denotes the sample set + // and N the total concentration then for this case: + // P(i in XU) >= 1 / N + // P(i in XL) >= P(i) + // P(i in XM) >= P(i) + P(U) + // + // If |X| = 1 then a sharp upper bound is easy to compute: + // P(i in XU) <= P(U) + P(L) + // P(i in X\U) <= P(i) + P(U) (3) + // + // For the case that X contains a mixture of values that are and aren't + // in U then a sharp upper bound is difficult to compute: it depends on + // the number of different categories in XU, P(U) and the probabilities + // P(i in L). In this case we just fall back to using (3) which isn't + // sharp. + + using TSizeVec = std::vector; + + tail = maths_t::E_MixedOrNeitherTail; + + TDoubleDoubleSizeTrVec pCategories; + pCategories.reserve(m_Categories.size()); + double pU = 0.0; + double pmin = 1.0 / m_TotalConcentration; + + { + double r = 1.0 / static_cast(m_Concentrations.size()); + for (std::size_t i = 0u; i < m_Concentrations.size(); ++i) { + double p = m_Concentrations[i] / m_TotalConcentration; + pCategories.emplace_back(p, p, i); + pU += r - p; } - tail = maths_t::E_LeftTail; } - break; + std::sort(pCategories.begin(), pCategories.end()); + LOG_TRACE("p = " << core::CContainerPrinter::print(pCategories)); - case maths_t::E_TwoSided: + double pl = 0.0; { - // The probability of a less likely category is given by: - // E[ Sum_{p(j) <= p(i)}{ p(j) } ] - // - // where the expectation is taken over the prior for the distribution - // probabilities. This is hard to compute because the terms in the sum - // vary as the probabilities vary. We approximate this by taking the - // expectation over the marginal for each p(i). In particular, we compute: - // P(i) = Sum_{E[p(j)] <= E[p(i)]}{ E[p(j)] } - // - // Here, P(i) is the probability of less likely category and the - // expected probability of the i'th category is: - // E[p(i)] = a(i) / Sum_j{ a(j) } (1) - // - // where, a(i) are the concentration parameters of the prior. So, (1) - // reduces to: - // Sum_{j:a(j)<=a(i)}{ E[p(j)] } - // - // We can think of P(.) as a function of probability, i.e. if the - // probability of a category were p then its corresponding P(.) would - // be: - // P(argmin_{i : E[p(i)] >= p}{ E[p(i)] }) - // - // Given this definition we can compute: - // E[ P(p) ] (2) - // - // where the expectation is taken over the marginal for each probability. - // This can be computed exactly by noting that marginal for p(i) is - // beta distributed with alpha = a(i) and beta = Sum_j{ a(j) } - a(i). - // However, this requires us to compute quantiles at every E[p(i)] which - // would be expensive for a large number of categories. Instead we - // approximate the probability by using a fixed number of samples from - // the marginal and computing the probability by taking the mean of these. - // Finally, note that if E[p(i)] and E[p(j)] are very close we want P(i) - // and P(j) to be close, but they can in fact be very different if there - // are many probabilities E[p(k)] which satisfy: - // E[p(i)] <= E[p(k)] <= E[p(j)], - // - // To avoid this problem we scale all probabilities by (1 + eps), for - // small eps > 0, when computing (2). - // - // In the case that the number of categories has overflowed we derive a - // sharp lower bound by considering the case that all the additional - // mass is in one category which is not in the sample set. In this case - // we have three sets of categories: - // U = "Uncounted categories" - // L = {i : i is counted and P(i) < P(U)} - // M = {i : i is counted and P(i) >= P(U)} - // - // where, clearly P(U) is the extra mass. If X denotes the sample set - // and N the total concentration then for this case: - // P(i in XU) >= 1 / N - // P(i in XL) >= P(i) - // P(i in XM) >= P(i) + P(U) - // - // If |X| = 1 then a sharp upper bound is easy to compute: - // P(i in XU) <= P(U) + P(L) - // P(i in X\U) <= P(i) + P(U) (3) - // - // For the case that X contains a mixture of values that are and aren't - // in U then a sharp upper bound is difficult to compute: it depends on - // the number of different categories in XU, P(U) and the probabilities - // P(i in L). In this case we just fall back to using (3) which isn't - // sharp. - - using TSizeVec = std::vector; - - tail = maths_t::E_MixedOrNeitherTail; - - TDoubleDoubleSizeTrVec pCategories; - pCategories.reserve(m_Categories.size()); - double pU = 0.0; - double pmin = 1.0 / m_TotalConcentration; - - { - double r = 1.0 / static_cast(m_Concentrations.size()); - for (std::size_t i = 0u; i < m_Concentrations.size(); ++i) - { - double p = m_Concentrations[i] / m_TotalConcentration; - pCategories.emplace_back(p, p, i); - pU += r - p; - } + // Get the index of largest probability less than or equal to P(U). + std::size_t l = pCategories.size(); + if (pU > 0.0) { + l = std::lower_bound(pCategories.begin(), pCategories.end(), TDoubleDoubleSizeTr(pU, pU, 0)) - pCategories.begin(); } - std::sort(pCategories.begin(), pCategories.end()); - LOG_TRACE("p = " << core::CContainerPrinter::print(pCategories)); - - double pl = 0.0; - { - // Get the index of largest probability less than or equal to P(U). - std::size_t l = pCategories.size(); - if (pU > 0.0) - { - l = std::lower_bound(pCategories.begin(), - pCategories.end(), - TDoubleDoubleSizeTr(pU, pU, 0)) - - pCategories.begin(); - } - // Compute probabilities of less likely categories. - double pCumulative = 0.0; - for (std::size_t i = 0u, j = 0u; i < pCategories.size(); /**/) - { - // Find the probability equal range [i, j). - double p = pCategories[i].get<1>(); + // Compute probabilities of less likely categories. + double pCumulative = 0.0; + for (std::size_t i = 0u, j = 0u; i < pCategories.size(); /**/) { + // Find the probability equal range [i, j). + double p = pCategories[i].get<1>(); + pCumulative += p; + while (++j < pCategories.size() && pCategories[j].get<1>() == p) { pCumulative += p; - while ( ++j < pCategories.size() - && pCategories[j].get<1>() == p) - { - pCumulative += p; - } - - // Update the equal range probabilities [i, j). - for (/**/; i < j; ++i) - { - pCategories[i].get<1>() = pCumulative; - } } - if (l < pCategories.size()) - { - pl = pCategories[l].get<1>(); + // Update the equal range probabilities [i, j). + for (/**/; i < j; ++i) { + pCategories[i].get<1>() = pCumulative; } } - std::size_t nSamples = detail::numberPriorSamples(m_TotalConcentration); - LOG_TRACE("n = " << nSamples); - if (nSamples > 1) - { - // Extract the indices of the categories we want. - TSizeVec categoryIndices; - categoryIndices.reserve(samples.size()); - for (std::size_t i = 0u; i < samples.size(); ++i) - { - std::size_t index = std::lower_bound(m_Categories.begin(), - m_Categories.end(), - samples[i]) - - m_Categories.begin(); - if (index < m_Categories.size() && m_Categories[index] == samples[i]) - { - categoryIndices.push_back(index); - } - } - std::sort(categoryIndices.begin(), categoryIndices.end()); - - for (std::size_t i = 0u; i < pCategories.size(); ++i) - { - // For all categories that we actually want compute the - // average probability over a set of independent samples - // from the marginal prior for this category, which by the - // law of large numbers converges to E[ P(p) ] w.r.t. to - // marginal for p. The constants a and b are a(i) and - // Sum_j( a(j) ) - a(i), respectively. - - std::size_t j = pCategories[i].get<2>(); - if (std::binary_search(categoryIndices.begin(), - categoryIndices.end(), j)) - { - TDouble7Vec marginalSamples; - double a = m_Concentrations[j]; - double b = m_TotalConcentration - m_Concentrations[j]; - detail::generateBetaSamples(a, b, nSamples, marginalSamples); - LOG_TRACE("E[p] = " << pCategories[i].get<0>() - << ", mean = " << CBasicStatistics::mean(marginalSamples) - << ", samples = " << marginalSamples); - - TMeanAccumulator pAcc; - for (std::size_t k = 0u; k < marginalSamples.size(); ++k) - { - TDoubleDoubleSizeTr x(1.05 * marginalSamples[k], 0.0, 0); - ptrdiff_t r = - std::min(std::upper_bound(pCategories.begin(), - pCategories.end(), x) - - pCategories.begin(), - static_cast(pCategories.size()) - 1); - - double fl = r > 0 ? pCategories[r-1].get<0>() : 0.0; - double fr = pCategories[r].get<0>(); - double pl_ = r > 0 ? pCategories[r-1].get<1>() : 0.0; - double pr_ = pCategories[r].get<1>(); - double alpha = std::min((fr - fl == 0.0) ? - 0.0 : (x.get<0>() - fl) / (fr - fl), 1.0); - double px = (1.0 - alpha) * pl_ + alpha * pr_; - LOG_TRACE("E[p(l)] = " << fl - << ", P(l) = " << pl_ - << ", E[p(r)] = " << fr - << ", P(r) = " << pr_ - << ", alpha = " << alpha - << ", p = " << px); - - pAcc.add(px); - } - pCategories[i].get<1>() = CBasicStatistics::mean(pAcc); - } - } + if (l < pCategories.size()) { + pl = pCategories[l].get<1>(); } + } - LOG_TRACE("pCategories = " << core::CContainerPrinter::print(pCategories)); - LOG_TRACE("P(U) = " << pU << ", P(l) = " << pl); - - // We can use radix sort to reorder the probabilities in O(n). - // To understand the following loop note that on each iteration - // at least one extra probability is in its correct position - // so it will necessarily terminate in at most n iterations. - for (std::size_t i = 0; i < pCategories.size(); /**/) - { - if (i == pCategories[i].get<2>()) - { - ++i; - } - else - { - std::swap(pCategories[i], pCategories[pCategories[i].get<2>()]); + std::size_t nSamples = detail::numberPriorSamples(m_TotalConcentration); + LOG_TRACE("n = " << nSamples); + if (nSamples > 1) { + // Extract the indices of the categories we want. + TSizeVec categoryIndices; + categoryIndices.reserve(samples.size()); + for (std::size_t i = 0u; i < samples.size(); ++i) { + std::size_t index = std::lower_bound(m_Categories.begin(), m_Categories.end(), samples[i]) - m_Categories.begin(); + if (index < m_Categories.size() && m_Categories[index] == samples[i]) { + categoryIndices.push_back(index); } } + std::sort(categoryIndices.begin(), categoryIndices.end()); - LOG_TRACE("pCategories = " << core::CContainerPrinter::print(pCategories)); - - if (samples.size() == 1) - { - // No special aggregation is required if there is a single sample. - std::size_t index = std::lower_bound(m_Categories.begin(), - m_Categories.end(), - samples[0]) - - m_Categories.begin(); - - if (index < m_Categories.size() && m_Categories[index] == samples[0]) - { - double p = pCategories[index].get<1>(); - lowerBound = p + (p >= pU ? pU : 0.0); - upperBound = p + pU; - } - else - { - lowerBound = pmin; - upperBound = std::max(pU + pl, pmin); + for (std::size_t i = 0u; i < pCategories.size(); ++i) { + // For all categories that we actually want compute the + // average probability over a set of independent samples + // from the marginal prior for this category, which by the + // law of large numbers converges to E[ P(p) ] w.r.t. to + // marginal for p. The constants a and b are a(i) and + // Sum_j( a(j) ) - a(i), respectively. + + std::size_t j = pCategories[i].get<2>(); + if (std::binary_search(categoryIndices.begin(), categoryIndices.end(), j)) { + TDouble7Vec marginalSamples; + double a = m_Concentrations[j]; + double b = m_TotalConcentration - m_Concentrations[j]; + detail::generateBetaSamples(a, b, nSamples, marginalSamples); + LOG_TRACE("E[p] = " << pCategories[i].get<0>() << ", mean = " << CBasicStatistics::mean(marginalSamples) + << ", samples = " << marginalSamples); + + TMeanAccumulator pAcc; + for (std::size_t k = 0u; k < marginalSamples.size(); ++k) { + TDoubleDoubleSizeTr x(1.05 * marginalSamples[k], 0.0, 0); + ptrdiff_t r = std::min(std::upper_bound(pCategories.begin(), pCategories.end(), x) - pCategories.begin(), + static_cast(pCategories.size()) - 1); + + double fl = r > 0 ? pCategories[r - 1].get<0>() : 0.0; + double fr = pCategories[r].get<0>(); + double pl_ = r > 0 ? pCategories[r - 1].get<1>() : 0.0; + double pr_ = pCategories[r].get<1>(); + double alpha = std::min((fr - fl == 0.0) ? 0.0 : (x.get<0>() - fl) / (fr - fl), 1.0); + double px = (1.0 - alpha) * pl_ + alpha * pr_; + LOG_TRACE("E[p(l)] = " << fl << ", P(l) = " << pl_ << ", E[p(r)] = " << fr << ", P(r) = " << pr_ + << ", alpha = " << alpha << ", p = " << px); + + pAcc.add(px); + } + pCategories[i].get<1>() = CBasicStatistics::mean(pAcc); } + } + } - return true; + LOG_TRACE("pCategories = " << core::CContainerPrinter::print(pCategories)); + LOG_TRACE("P(U) = " << pU << ", P(l) = " << pl); + + // We can use radix sort to reorder the probabilities in O(n). + // To understand the following loop note that on each iteration + // at least one extra probability is in its correct position + // so it will necessarily terminate in at most n iterations. + for (std::size_t i = 0; i < pCategories.size(); /**/) { + if (i == pCategories[i].get<2>()) { + ++i; + } else { + std::swap(pCategories[i], pCategories[pCategories[i].get<2>()]); } + } + + LOG_TRACE("pCategories = " << core::CContainerPrinter::print(pCategories)); - TDoubleDoubleMap categoryCounts; + if (samples.size() == 1) { + // No special aggregation is required if there is a single sample. + std::size_t index = std::lower_bound(m_Categories.begin(), m_Categories.end(), samples[0]) - m_Categories.begin(); - // Count the occurrences of each category in the sample set. - for (std::size_t i = 0u; i < samples.size(); ++i) - { - double x = samples[i]; - double n = maths_t::count(weightStyles, weights[i]); - categoryCounts[x] += n; + if (index < m_Categories.size() && m_Categories[index] == samples[0]) { + double p = pCategories[index].get<1>(); + lowerBound = p + (p >= pU ? pU : 0.0); + upperBound = p + pU; + } else { + lowerBound = pmin; + upperBound = std::max(pU + pl, pmin); } - LOG_TRACE("categoryCounts = " << core::CContainerPrinter::print(categoryCounts)); + return true; + } - CJointProbabilityOfLessLikelySamples jointLowerBound; - CJointProbabilityOfLessLikelySamples jointUpperBound; + TDoubleDoubleMap categoryCounts; - for (TDoubleDoubleMapCItr countItr = categoryCounts.begin(); - countItr != categoryCounts.end(); - ++countItr) - { - double category = countItr->first; - double count = countItr->second; - LOG_TRACE("category = " << category << ", count = " << count); + // Count the occurrences of each category in the sample set. + for (std::size_t i = 0u; i < samples.size(); ++i) { + double x = samples[i]; + double n = maths_t::count(weightStyles, weights[i]); + categoryCounts[x] += n; + } - std::size_t index = std::lower_bound(m_Categories.begin(), - m_Categories.end(), - category) - - m_Categories.begin(); + LOG_TRACE("categoryCounts = " << core::CContainerPrinter::print(categoryCounts)); - double p = pCategories[index].get<1>(); - if ( index < m_Categories.size() - && m_Categories[index] == category) - { - jointLowerBound.add(p + (p >= pU ? pU : 0.0), count); - jointUpperBound.add(p + pU, count); - } - else - { - jointLowerBound.add(pmin, count); - jointUpperBound.add(std::max(pU + pl, pmin), count); - } - } + CJointProbabilityOfLessLikelySamples jointLowerBound; + CJointProbabilityOfLessLikelySamples jointUpperBound; - if ( !jointLowerBound.calculate(lowerBound) - || !jointUpperBound.calculate(upperBound)) - { - LOG_ERROR("Unable to compute probability for " - << core::CContainerPrinter::print(samples) - << ": " << jointLowerBound - << ", " << jointUpperBound); - return false; + for (TDoubleDoubleMapCItr countItr = categoryCounts.begin(); countItr != categoryCounts.end(); ++countItr) { + double category = countItr->first; + double count = countItr->second; + LOG_TRACE("category = " << category << ", count = " << count); + + std::size_t index = std::lower_bound(m_Categories.begin(), m_Categories.end(), category) - m_Categories.begin(); + + double p = pCategories[index].get<1>(); + if (index < m_Categories.size() && m_Categories[index] == category) { + jointLowerBound.add(p + (p >= pU ? pU : 0.0), count); + jointUpperBound.add(p + pU, count); + } else { + jointLowerBound.add(pmin, count); + jointUpperBound.add(std::max(pU + pl, pmin), count); } + } - LOG_TRACE("probability = [" << lowerBound << ", " << upperBound << "]"); + if (!jointLowerBound.calculate(lowerBound) || !jointUpperBound.calculate(upperBound)) { + LOG_ERROR("Unable to compute probability for " << core::CContainerPrinter::print(samples) << ": " << jointLowerBound << ", " + << jointUpperBound); + return false; } - break; - case maths_t::E_OneSidedAbove: - { - // See minusLogJointCdf for a discussion of the calculation - // of the marginal c.d.f. for a single sample. - - CJointProbabilityOfLessLikelySamples jointLowerBound; - CJointProbabilityOfLessLikelySamples jointUpperBound; - - detail::CCdfComplement cdfComplement(m_Categories, - m_Concentrations, - m_TotalConcentration); - for (std::size_t i = 0u; i < samples.size(); ++i) - { - double x = samples[i]; - double n = maths_t::count(weightStyles, weights[i]); - double sampleLowerBound, sampleUpperBound; - cdfComplement(x, sampleLowerBound, sampleUpperBound); - jointLowerBound.add(sampleLowerBound, n); - jointUpperBound.add(sampleUpperBound, n); - } + LOG_TRACE("probability = [" << lowerBound << ", " << upperBound << "]"); + } break; - if ( !jointLowerBound.calculate(lowerBound) - || !jointUpperBound.calculate(upperBound)) - { - LOG_ERROR("Unable to compute probability for " - << core::CContainerPrinter::print(samples) - << ": " << jointLowerBound - << ", " << jointUpperBound); - return false; - } - tail = maths_t::E_RightTail; + case maths_t::E_OneSidedAbove: { + // See minusLogJointCdf for a discussion of the calculation + // of the marginal c.d.f. for a single sample. + + CJointProbabilityOfLessLikelySamples jointLowerBound; + CJointProbabilityOfLessLikelySamples jointUpperBound; + + detail::CCdfComplement cdfComplement(m_Categories, m_Concentrations, m_TotalConcentration); + for (std::size_t i = 0u; i < samples.size(); ++i) { + double x = samples[i]; + double n = maths_t::count(weightStyles, weights[i]); + double sampleLowerBound, sampleUpperBound; + cdfComplement(x, sampleLowerBound, sampleUpperBound); + jointLowerBound.add(sampleLowerBound, n); + jointUpperBound.add(sampleUpperBound, n); + } + + if (!jointLowerBound.calculate(lowerBound) || !jointUpperBound.calculate(upperBound)) { + LOG_ERROR("Unable to compute probability for " << core::CContainerPrinter::print(samples) << ": " << jointLowerBound << ", " + << jointUpperBound); + return false; } - break; + tail = maths_t::E_RightTail; + } break; } return true; } -bool CMultinomialConjugate::isNonInformative() const -{ +bool CMultinomialConjugate::isNonInformative() const { return m_TotalConcentration <= NON_INFORMATIVE_CONCENTRATION; } -void CMultinomialConjugate::print(const std::string &indent, - std::string &result) const -{ - result += core_t::LINE_ENDING + indent + "multinomial " - + (this->isNonInformative() ? - std::string("non-informative") : - std::string("categories ") - + core::CContainerPrinter::print(m_Categories) - + " concentrations " - + core::CContainerPrinter::print(m_Concentrations)); +void CMultinomialConjugate::print(const std::string& indent, std::string& result) const { + result += core_t::LINE_ENDING + indent + "multinomial " + + (this->isNonInformative() ? std::string("non-informative") + : std::string("categories ") + core::CContainerPrinter::print(m_Categories) + " concentrations " + + core::CContainerPrinter::print(m_Concentrations)); } -std::string CMultinomialConjugate::printMarginalLikelihoodFunction(double /*weight*/) const -{ +std::string CMultinomialConjugate::printMarginalLikelihoodFunction(double /*weight*/) const { // This is infinite at the categories and zero elsewhere. return "Not supported"; } -std::string CMultinomialConjugate::printJointDensityFunction() const -{ +std::string CMultinomialConjugate::printJointDensityFunction() const { static const double RANGE = 0.999; static const unsigned int POINTS = 51; @@ -1408,8 +1145,7 @@ std::string CMultinomialConjugate::printJointDensityFunction() const result << "hold off" << core_t::LINE_ENDING; // We show the marginals for each category plotted on the same axes. - for (std::size_t i = 0u; i < m_Concentrations.size(); ++i) - { + for (std::size_t i = 0u; i < m_Concentrations.size(); ++i) { double a = m_Concentrations[i]; double b = m_TotalConcentration - m_Concentrations[i]; boost::math::beta_distribution<> beta(a, b); @@ -1420,16 +1156,14 @@ std::string CMultinomialConjugate::printJointDensityFunction() const double x = xStart; result << "x = ["; - for (unsigned int j = 0u; j < POINTS; ++j, x += xIncrement) - { + for (unsigned int j = 0u; j < POINTS; ++j, x += xIncrement) { result << x << " "; } result << "];" << core_t::LINE_ENDING; result << "pdf = ["; x = xStart; - for (unsigned int j = 0u; j < POINTS; ++j, x += xIncrement) - { + for (unsigned int j = 0u; j < POINTS; ++j, x += xIncrement) { result << CTools::safePdf(beta, x) << " "; } result << "];" << core_t::LINE_ENDING; @@ -1442,8 +1176,7 @@ std::string CMultinomialConjugate::printJointDensityFunction() const return result.str(); } -uint64_t CMultinomialConjugate::checksum(uint64_t seed) const -{ +uint64_t CMultinomialConjugate::checksum(uint64_t seed) const { seed = this->CPrior::checksum(seed); seed = CChecksum::calculate(seed, m_NumberAvailableCategories); seed = CChecksum::calculate(seed, m_Categories); @@ -1451,43 +1184,33 @@ uint64_t CMultinomialConjugate::checksum(uint64_t seed) const return CChecksum::calculate(seed, m_TotalConcentration); } -void CMultinomialConjugate::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CMultinomialConjugate::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CMultinomialConjugate"); core::CMemoryDebug::dynamicSize("m_Categories", m_Categories, mem); core::CMemoryDebug::dynamicSize("m_Concentrations", m_Concentrations, mem); } -std::size_t CMultinomialConjugate::memoryUsage() const -{ +std::size_t CMultinomialConjugate::memoryUsage() const { std::size_t mem = core::CMemory::dynamicSize(m_Categories); mem += core::CMemory::dynamicSize(m_Concentrations); return mem; } -std::size_t CMultinomialConjugate::staticSize() const -{ +std::size_t CMultinomialConjugate::staticSize() const { return sizeof(*this); } -void CMultinomialConjugate::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CMultinomialConjugate::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(DECAY_RATE_TAG, this->decayRate(), core::CIEEE754::E_SinglePrecision); inserter.insertValue(NUMBER_AVAILABLE_CATEGORIES_TAG, m_NumberAvailableCategories); inserter.insertValue(CATEGORY_TAG, core::CPersistUtils::toString(m_Categories)); inserter.insertValue(CONCENTRATION_TAG, core::CPersistUtils::toString(m_Concentrations)); - inserter.insertValue(TOTAL_CONCENTRATION_TAG, - m_TotalConcentration, - core::CIEEE754::E_SinglePrecision); - inserter.insertValue(NUMBER_SAMPLES_TAG, - this->numberSamples(), - core::CIEEE754::E_SinglePrecision); + inserter.insertValue(TOTAL_CONCENTRATION_TAG, m_TotalConcentration, core::CIEEE754::E_SinglePrecision); + inserter.insertValue(NUMBER_SAMPLES_TAG, this->numberSamples(), core::CIEEE754::E_SinglePrecision); } -void CMultinomialConjugate::removeCategories(TDoubleVec categoriesToRemove) -{ - if (categoriesToRemove.empty()) - { +void CMultinomialConjugate::removeCategories(TDoubleVec categoriesToRemove) { + if (categoriesToRemove.empty()) { return; } @@ -1495,17 +1218,13 @@ void CMultinomialConjugate::removeCategories(TDoubleVec categoriesToRemove) std::sort(categoriesToRemove.begin(), categoriesToRemove.end()); categoriesToRemove.push_back(boost::numeric::bounds::highest()); - for (std::size_t i = 0u, j = 0u; i < m_Categories.size(); /**/) - { - if (m_Categories[i] < categoriesToRemove[j]) - { + for (std::size_t i = 0u, j = 0u; i < m_Categories.size(); /**/) { + if (m_Categories[i] < categoriesToRemove[j]) { std::swap(m_Categories[end], m_Categories[i]); std::swap(m_Concentrations[end], m_Concentrations[i]); ++i; ++end; - } - else - { + } else { m_Categories[i] > categoriesToRemove[j] ? ++j : ++i; } } @@ -1513,26 +1232,18 @@ void CMultinomialConjugate::removeCategories(TDoubleVec categoriesToRemove) m_Categories.erase(m_Categories.begin() + end, m_Categories.end()); m_Concentrations.erase(m_Concentrations.begin() + end, m_Concentrations.end()); - m_TotalConcentration = std::accumulate(m_Concentrations.begin(), - m_Concentrations.end(), - 0.0); - LOG_TRACE("categories = " - << core::CContainerPrinter::print(m_Categories)); - LOG_TRACE("concentrations = " - << core::CContainerPrinter::print(m_Concentrations)); + m_TotalConcentration = std::accumulate(m_Concentrations.begin(), m_Concentrations.end(), 0.0); + LOG_TRACE("categories = " << core::CContainerPrinter::print(m_Categories)); + LOG_TRACE("concentrations = " << core::CContainerPrinter::print(m_Concentrations)); this->numberSamples(m_TotalConcentration); } -bool CMultinomialConjugate::index(double category, std::size_t &result) const -{ +bool CMultinomialConjugate::index(double category, std::size_t& result) const { result = std::numeric_limits::max(); - TDoubleVecCItr categoryItr = std::lower_bound(m_Categories.begin(), - m_Categories.end(), - category); - if (categoryItr == m_Categories.end() || *categoryItr != category) - { + TDoubleVecCItr categoryItr = std::lower_bound(m_Categories.begin(), m_Categories.end(), category); + if (categoryItr == m_Categories.end() || *categoryItr != category) { return false; } @@ -1540,23 +1251,19 @@ bool CMultinomialConjugate::index(double category, std::size_t &result) const return true; } -const CMultinomialConjugate::TDoubleVec &CMultinomialConjugate::categories() const -{ +const CMultinomialConjugate::TDoubleVec& CMultinomialConjugate::categories() const { return m_Categories; } -const CMultinomialConjugate::TDoubleVec &CMultinomialConjugate::concentrations() const -{ +const CMultinomialConjugate::TDoubleVec& CMultinomialConjugate::concentrations() const { return m_Concentrations; } -bool CMultinomialConjugate::concentration(double category, double &result) const -{ +bool CMultinomialConjugate::concentration(double category, double& result) const { result = 0.0; std::size_t i; - if (!this->index(category, i)) - { + if (!this->index(category, i)) { return false; } @@ -1564,19 +1271,15 @@ bool CMultinomialConjugate::concentration(double category, double &result) const return true; } -double CMultinomialConjugate::totalConcentration() const -{ +double CMultinomialConjugate::totalConcentration() const { return m_TotalConcentration; } -bool CMultinomialConjugate::probability(double category, - double &result) const -{ +bool CMultinomialConjugate::probability(double category, double& result) const { result = 0.0; double concentration; - if (!this->concentration(category, concentration)) - { + if (!this->concentration(category, concentration)) { return false; } @@ -1584,179 +1287,139 @@ bool CMultinomialConjugate::probability(double category, return true; } -CMultinomialConjugate::TDoubleVec CMultinomialConjugate::probabilities() const -{ +CMultinomialConjugate::TDoubleVec CMultinomialConjugate::probabilities() const { TDoubleVec result(m_Concentrations); - for (std::size_t i = 0u; i < result.size(); ++i) - { + for (std::size_t i = 0u; i < result.size(); ++i) { result[i] /= m_TotalConcentration; } return result; } -void CMultinomialConjugate::probabilitiesOfLessLikelyCategories( - maths_t::EProbabilityCalculation calculation, - TDoubleVec &lowerBounds, - TDoubleVec &upperBounds) const -{ +void CMultinomialConjugate::probabilitiesOfLessLikelyCategories(maths_t::EProbabilityCalculation calculation, + TDoubleVec& lowerBounds, + TDoubleVec& upperBounds) const { // See probabilityOfLessLikelySamples for an explanation of these // calculations. lowerBounds.clear(); upperBounds.clear(); - switch (calculation) - { - case maths_t::E_OneSidedBelow: + switch (calculation) { + case maths_t::E_OneSidedBelow: { + detail::CCdf cdf(m_Categories, m_Concentrations, m_TotalConcentration); + cdf.dump(lowerBounds, upperBounds); + } break; + + case maths_t::E_TwoSided: { + TDoubleDoubleSizeTrVec pCategories; + pCategories.reserve(m_Categories.size()); + double pU = 0.0; + { - detail::CCdf cdf(m_Categories, m_Concentrations, m_TotalConcentration); - cdf.dump(lowerBounds, upperBounds); + double r = 1.0 / static_cast(m_Concentrations.size()); + for (std::size_t i = 0u; i < m_Concentrations.size(); ++i) { + double p = m_Concentrations[i] / m_TotalConcentration; + pCategories.emplace_back(p, p, i); + pU += r - p; + } } - break; + std::sort(pCategories.begin(), pCategories.end()); + LOG_TRACE("pCategories = " << core::CContainerPrinter::print(pCategories)); - case maths_t::E_TwoSided: + // Get the index of largest probability less than or equal to P(U). + double pl = 0.0; { - TDoubleDoubleSizeTrVec pCategories; - pCategories.reserve(m_Categories.size()); - double pU = 0.0; - - { - double r = 1.0 / static_cast(m_Concentrations.size()); - for (std::size_t i = 0u; i < m_Concentrations.size(); ++i) - { - double p = m_Concentrations[i] / m_TotalConcentration; - pCategories.emplace_back(p, p, i); - pU += r - p; - } + std::size_t l = pCategories.size(); + if (pU > 0.0) { + l = std::lower_bound(pCategories.begin(), pCategories.end(), TDoubleDoubleSizeTr(pU, pU, 0)) - pCategories.begin(); } - std::sort(pCategories.begin(), pCategories.end()); - LOG_TRACE("pCategories = " << core::CContainerPrinter::print(pCategories)); - // Get the index of largest probability less than or equal to P(U). - double pl = 0.0; - { - std::size_t l = pCategories.size(); - if (pU > 0.0) - { - l = std::lower_bound(pCategories.begin(), - pCategories.end(), - TDoubleDoubleSizeTr(pU, pU, 0)) - - pCategories.begin(); - } - - // Compute probabilities of less likely categories. - double pCumulative = 0.0; - for (std::size_t i = 0u, j = 0u; i < pCategories.size(); /**/) - { - // Find the probability equal range [i, j). - double p = pCategories[i].get<1>(); + // Compute probabilities of less likely categories. + double pCumulative = 0.0; + for (std::size_t i = 0u, j = 0u; i < pCategories.size(); /**/) { + // Find the probability equal range [i, j). + double p = pCategories[i].get<1>(); + pCumulative += p; + while (++j < pCategories.size() && pCategories[j].get<1>() == p) { pCumulative += p; - while (++j < pCategories.size() - && pCategories[j].get<1>() == p) - { - pCumulative += p; - } - - // Update the equal range probabilities [i, j). - for (/**/; i < j; ++i) - { - pCategories[i].get<1>() = pCumulative; - } } - if (l < pCategories.size()) - { - pl = pCategories[l].get<1>(); + // Update the equal range probabilities [i, j). + for (/**/; i < j; ++i) { + pCategories[i].get<1>() = pCumulative; } } - LOG_TRACE("pCategories = " << core::CContainerPrinter::print(pCategories)); - LOG_TRACE("P(U) = " << pU << ", P(l) = " << pl); - - lowerBounds.resize(pCategories.size(), 0.0); - upperBounds.resize(pCategories.size(), 0.0); - - double p = 0.0; - double pLast = -1.0; - std::size_t n = detail::numberPriorSamples(m_TotalConcentration); - LOG_TRACE("n = " << n); - for (std::size_t i = 0u; i < pCategories.size(); ++i) - { - std::size_t j = pCategories[i].get<2>(); - - // We compute the average probability over a set of - // independent samples from the marginal prior for this - // category, which by the law of large numbers converges - // to E[ P(p) ] w.r.t. to marginal for p. The constants - // a and b are a(i) and Sum_j( a(j) ) - a(i), respectively. - // See confidenceIntervalProbabilities for a discussion. - - if (pCategories[i].get<0>() != pLast) - { - TDouble7Vec samples; - double a = m_Concentrations[j]; - double b = m_TotalConcentration - m_Concentrations[j]; - detail::generateBetaSamples(a, b, n, samples); - LOG_TRACE("E[p] = " << pCategories[i].get<0>() - << ", mean = " << CBasicStatistics::mean(samples) - << ", samples = " << core::CContainerPrinter::print(samples)); - - TMeanAccumulator pAcc; - for (std::size_t k = 0u; k < samples.size(); ++k) - { - TDoubleDoubleSizeTr x(1.05 * samples[k], 0.0, 0); - ptrdiff_t r = - std::min(std::upper_bound(pCategories.begin(), - pCategories.end(), x) - - pCategories.begin(), - static_cast(pCategories.size()) - 1); - - double fl = r > 0 ? pCategories[r-1].get<0>() : 0.0; - double fr = pCategories[r].get<0>(); - double pl_ = r > 0 ? pCategories[r-1].get<1>() : 0.0; - double pr_ = pCategories[r].get<1>(); - double alpha = std::min((fr - fl == 0.0) ? - 0.0 : (x.get<0>() - fl) / (fr - fl), 1.0); - double px = (1.0 - alpha) * pl_ + alpha * pr_; - LOG_TRACE("E[p(l)] = " << fl - << ", P(l) = " << pl_ - << ", E[p(r)] = " << fr - << ", P(r) = " << pr_ - << ", alpha = " << alpha - << ", p = " << px); + if (l < pCategories.size()) { + pl = pCategories[l].get<1>(); + } + } - pAcc.add(px); - } - p = CBasicStatistics::mean(pAcc); - pLast = pCategories[i].get<0>(); + LOG_TRACE("pCategories = " << core::CContainerPrinter::print(pCategories)); + LOG_TRACE("P(U) = " << pU << ", P(l) = " << pl); + + lowerBounds.resize(pCategories.size(), 0.0); + upperBounds.resize(pCategories.size(), 0.0); + + double p = 0.0; + double pLast = -1.0; + std::size_t n = detail::numberPriorSamples(m_TotalConcentration); + LOG_TRACE("n = " << n); + for (std::size_t i = 0u; i < pCategories.size(); ++i) { + std::size_t j = pCategories[i].get<2>(); + + // We compute the average probability over a set of + // independent samples from the marginal prior for this + // category, which by the law of large numbers converges + // to E[ P(p) ] w.r.t. to marginal for p. The constants + // a and b are a(i) and Sum_j( a(j) ) - a(i), respectively. + // See confidenceIntervalProbabilities for a discussion. + + if (pCategories[i].get<0>() != pLast) { + TDouble7Vec samples; + double a = m_Concentrations[j]; + double b = m_TotalConcentration - m_Concentrations[j]; + detail::generateBetaSamples(a, b, n, samples); + LOG_TRACE("E[p] = " << pCategories[i].get<0>() << ", mean = " << CBasicStatistics::mean(samples) + << ", samples = " << core::CContainerPrinter::print(samples)); + + TMeanAccumulator pAcc; + for (std::size_t k = 0u; k < samples.size(); ++k) { + TDoubleDoubleSizeTr x(1.05 * samples[k], 0.0, 0); + ptrdiff_t r = std::min(std::upper_bound(pCategories.begin(), pCategories.end(), x) - pCategories.begin(), + static_cast(pCategories.size()) - 1); + + double fl = r > 0 ? pCategories[r - 1].get<0>() : 0.0; + double fr = pCategories[r].get<0>(); + double pl_ = r > 0 ? pCategories[r - 1].get<1>() : 0.0; + double pr_ = pCategories[r].get<1>(); + double alpha = std::min((fr - fl == 0.0) ? 0.0 : (x.get<0>() - fl) / (fr - fl), 1.0); + double px = (1.0 - alpha) * pl_ + alpha * pr_; + LOG_TRACE("E[p(l)] = " << fl << ", P(l) = " << pl_ << ", E[p(r)] = " << fr << ", P(r) = " << pr_ + << ", alpha = " << alpha << ", p = " << px); + + pAcc.add(px); } - - LOG_TRACE("p = " << p); - lowerBounds[j] = p + (p >= pU ? pU : 0.0); - upperBounds[j] = p + pU; + p = CBasicStatistics::mean(pAcc); + pLast = pCategories[i].get<0>(); } - } - break; - case maths_t::E_OneSidedAbove: - { - detail::CCdfComplement cdfComplement(m_Categories, - m_Concentrations, - m_TotalConcentration); - cdfComplement.dump(lowerBounds, upperBounds); + LOG_TRACE("p = " << p); + lowerBounds[j] = p + (p >= pU ? pU : 0.0); + upperBounds[j] = p + pU; } - break; + } break; + + case maths_t::E_OneSidedAbove: { + detail::CCdfComplement cdfComplement(m_Categories, m_Concentrations, m_TotalConcentration); + cdfComplement.dump(lowerBounds, upperBounds); + } break; } } - -CMultinomialConjugate::TDoubleDoublePrVec -CMultinomialConjugate::confidenceIntervalProbabilities(double percentage) const -{ - if (this->isNonInformative()) - { - return TDoubleDoublePrVec(m_Concentrations.size(), - std::make_pair(0.0, 1.0)); +CMultinomialConjugate::TDoubleDoublePrVec CMultinomialConjugate::confidenceIntervalProbabilities(double percentage) const { + if (this->isNonInformative()) { + return TDoubleDoublePrVec(m_Concentrations.size(), std::make_pair(0.0, 1.0)); } // The marginal distribution over each probability is beta. @@ -1788,53 +1451,40 @@ CMultinomialConjugate::confidenceIntervalProbabilities(double percentage) const double lowerPercentile = 0.5 * (1.0 - percentage); double upperPercentile = 0.5 * (1.0 + percentage); - for (std::size_t i = 0u; i < m_Concentrations.size(); ++i) - { + for (std::size_t i = 0u; i < m_Concentrations.size(); ++i) { double a = m_Concentrations[i]; double b = m_TotalConcentration - m_Concentrations[i]; boost::math::beta_distribution<> beta(a, b); - TDoubleDoublePr percentiles(boost::math::quantile(beta, lowerPercentile), - boost::math::quantile(beta, upperPercentile)); + TDoubleDoublePr percentiles(boost::math::quantile(beta, lowerPercentile), boost::math::quantile(beta, upperPercentile)); result.push_back(percentiles); } return result; } -bool CMultinomialConjugate::equalTolerance(const CMultinomialConjugate &rhs, - const TEqualWithTolerance &equal) const -{ +bool CMultinomialConjugate::equalTolerance(const CMultinomialConjugate& rhs, const TEqualWithTolerance& equal) const { LOG_DEBUG(m_NumberAvailableCategories << " " << rhs.m_NumberAvailableCategories); - LOG_DEBUG(core::CContainerPrinter::print(m_Categories) << " " - << core::CContainerPrinter::print(rhs.m_Categories)); - LOG_DEBUG(core::CContainerPrinter::print(m_Concentrations) << " " - << core::CContainerPrinter::print(rhs.m_Concentrations)); + LOG_DEBUG(core::CContainerPrinter::print(m_Categories) << " " << core::CContainerPrinter::print(rhs.m_Categories)); + LOG_DEBUG(core::CContainerPrinter::print(m_Concentrations) << " " << core::CContainerPrinter::print(rhs.m_Concentrations)); LOG_DEBUG(m_TotalConcentration << " " << rhs.m_TotalConcentration); - return m_NumberAvailableCategories == rhs.m_NumberAvailableCategories - && m_Categories == rhs.m_Categories - && std::equal(m_Concentrations.begin(), - m_Concentrations.end(), - rhs.m_Concentrations.begin(), equal) - && equal(m_TotalConcentration, rhs.m_TotalConcentration); + return m_NumberAvailableCategories == rhs.m_NumberAvailableCategories && m_Categories == rhs.m_Categories && + std::equal(m_Concentrations.begin(), m_Concentrations.end(), rhs.m_Concentrations.begin(), equal) && + equal(m_TotalConcentration, rhs.m_TotalConcentration); } -void CMultinomialConjugate::shrink() -{ +void CMultinomialConjugate::shrink() { // Note that the vectors are only ever shrunk once. using std::swap; - if (m_Categories.capacity() > m_Categories.size() + m_NumberAvailableCategories) - { + if (m_Categories.capacity() > m_Categories.size() + m_NumberAvailableCategories) { TDoubleVec categories(m_Categories); swap(categories, m_Categories); m_Categories.reserve(m_Categories.size() + m_NumberAvailableCategories); } - if (m_Concentrations.capacity() - > m_Concentrations.size() + m_NumberAvailableCategories) - { + if (m_Concentrations.capacity() > m_Concentrations.size() + m_NumberAvailableCategories) { TDoubleVec concentrationParameters(m_Concentrations); swap(concentrationParameters, m_Concentrations); m_Concentrations.reserve(m_Concentrations.size() + m_NumberAvailableCategories); @@ -1842,6 +1492,5 @@ void CMultinomialConjugate::shrink() } const double CMultinomialConjugate::NON_INFORMATIVE_CONCENTRATION = 0.0; - } } diff --git a/lib/maths/CMultivariateConstantPrior.cc b/lib/maths/CMultivariateConstantPrior.cc index fd1d606790..9014465c94 100644 --- a/lib/maths/CMultivariateConstantPrior.cc +++ b/lib/maths/CMultivariateConstantPrior.cc @@ -7,9 +7,9 @@ #include #include -#include #include #include +#include #include #include @@ -26,42 +26,27 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { -namespace -{ +namespace { using TDouble10Vec = core::CSmallVector; using TOptionalDouble10Vec = boost::optional; //! \brief Converts a constant value to a string. -class CConstantToString -{ - public: - std::string operator()(double value) const - { - return core::CStringUtils::typeToStringPrecise(value, core::CIEEE754::E_DoublePrecision); - } +class CConstantToString { +public: + std::string operator()(double value) const { return core::CStringUtils::typeToStringPrecise(value, core::CIEEE754::E_DoublePrecision); } }; //! Set the constant, validating the input. -void setConstant(std::size_t dimension, - const TDouble10Vec &value, - TOptionalDouble10Vec &result) -{ - if (value.size() != dimension) - { - LOG_ERROR("Unexpected dimension: " << value.size() << " != " << dimension); - } - else if (CMathsFuncs::isNan(value)) - { +void setConstant(std::size_t dimension, const TDouble10Vec& value, TOptionalDouble10Vec& result) { + if (value.size() != dimension) { + LOG_ERROR("Unexpected dimension: " << value.size() << " != " << dimension); + } else if (CMathsFuncs::isNan(value)) { LOG_ERROR("NaN constant"); - } - else - { + } else { result.reset(value); } } @@ -70,130 +55,98 @@ void setConstant(std::size_t dimension, const std::string CONSTANT_TAG("a"); const std::string EMPTY_STRING; - } -CMultivariateConstantPrior::CMultivariateConstantPrior(std::size_t dimension, - const TOptionalDouble10Vec &constant) : - CMultivariatePrior(maths_t::E_DiscreteData, 0.0), - m_Dimension(dimension) -{ - if (constant) - { +CMultivariateConstantPrior::CMultivariateConstantPrior(std::size_t dimension, const TOptionalDouble10Vec& constant) + : CMultivariatePrior(maths_t::E_DiscreteData, 0.0), m_Dimension(dimension) { + if (constant) { setConstant(m_Dimension, *constant, m_Constant); } } -CMultivariateConstantPrior::CMultivariateConstantPrior(std::size_t dimension, - core::CStateRestoreTraverser &traverser) : - CMultivariatePrior(maths_t::E_DiscreteData, 0.0), - m_Dimension(dimension) -{ +CMultivariateConstantPrior::CMultivariateConstantPrior(std::size_t dimension, core::CStateRestoreTraverser& traverser) + : CMultivariatePrior(maths_t::E_DiscreteData, 0.0), m_Dimension(dimension) { traverser.traverseSubLevel(boost::bind(&CMultivariateConstantPrior::acceptRestoreTraverser, this, _1)); } -bool CMultivariateConstantPrior::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name = traverser.name(); - RESTORE_SETUP_TEARDOWN(CONSTANT_TAG, - TDouble10Vec constant, - core::CPersistUtils::fromString(traverser.value(), constant), - m_Constant.reset(constant)) - } - while (traverser.next()); +bool CMultivariateConstantPrior::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); + RESTORE_SETUP_TEARDOWN( + CONSTANT_TAG, TDouble10Vec constant, core::CPersistUtils::fromString(traverser.value(), constant), m_Constant.reset(constant)) + } while (traverser.next()); return true; } -CMultivariateConstantPrior *CMultivariateConstantPrior::clone() const -{ +CMultivariateConstantPrior* CMultivariateConstantPrior::clone() const { return new CMultivariateConstantPrior(*this); } -std::size_t CMultivariateConstantPrior::dimension() const -{ +std::size_t CMultivariateConstantPrior::dimension() const { return m_Dimension; } -void CMultivariateConstantPrior::setToNonInformative(double /*offset*/, - double /*decayRate*/) -{ +void CMultivariateConstantPrior::setToNonInformative(double /*offset*/, double /*decayRate*/) { m_Constant.reset(); } -void CMultivariateConstantPrior::adjustOffset(const TWeightStyleVec &/*weightStyle*/, - const TDouble10Vec1Vec &/*samples*/, - const TDouble10Vec4Vec1Vec &/*weights*/) -{ +void CMultivariateConstantPrior::adjustOffset(const TWeightStyleVec& /*weightStyle*/, + const TDouble10Vec1Vec& /*samples*/, + const TDouble10Vec4Vec1Vec& /*weights*/) { } -void CMultivariateConstantPrior::addSamples(const TWeightStyleVec &/*weightStyle*/, - const TDouble10Vec1Vec &samples, - const TDouble10Vec4Vec1Vec &/*weights*/) -{ - if (m_Constant || samples.empty()) - { +void CMultivariateConstantPrior::addSamples(const TWeightStyleVec& /*weightStyle*/, + const TDouble10Vec1Vec& samples, + const TDouble10Vec4Vec1Vec& /*weights*/) { + if (m_Constant || samples.empty()) { return; } setConstant(m_Dimension, samples[0], m_Constant); } -void CMultivariateConstantPrior::propagateForwardsByTime(double /*time*/) -{ +void CMultivariateConstantPrior::propagateForwardsByTime(double /*time*/) { } -CMultivariateConstantPrior::TUnivariatePriorPtrDoublePr -CMultivariateConstantPrior::univariate(const TSize10Vec &marginalize, - const TSizeDoublePr10Vec &condition) const -{ - if (!this->check(marginalize, condition)) - { +CMultivariateConstantPrior::TUnivariatePriorPtrDoublePr CMultivariateConstantPrior::univariate(const TSize10Vec& marginalize, + const TSizeDoublePr10Vec& condition) const { + if (!this->check(marginalize, condition)) { return TUnivariatePriorPtrDoublePr(); } TSize10Vec i1; this->remainingVariables(marginalize, condition, i1); - if (i1.size() != 1) - { + if (i1.size() != 1) { LOG_ERROR("Invalid variables for computing univariate distribution: " << "marginalize '" << core::CContainerPrinter::print(marginalize) << "'" << ", condition '" << core::CContainerPrinter::print(condition) << "'"); return TUnivariatePriorPtrDoublePr(); } - return this->isNonInformative() ? - TUnivariatePriorPtrDoublePr(TUnivariatePriorPtr(new CConstantPrior), 0.0) : - TUnivariatePriorPtrDoublePr(TUnivariatePriorPtr(new CConstantPrior((*m_Constant)[i1[0]])), 0.0); + return this->isNonInformative() ? TUnivariatePriorPtrDoublePr(TUnivariatePriorPtr(new CConstantPrior), 0.0) + : TUnivariatePriorPtrDoublePr(TUnivariatePriorPtr(new CConstantPrior((*m_Constant)[i1[0]])), 0.0); } -CMultivariateConstantPrior::TPriorPtrDoublePr -CMultivariateConstantPrior::bivariate(const TSize10Vec &marginalize, - const TSizeDoublePr10Vec &condition) const -{ - if (m_Dimension == 2) - { +CMultivariateConstantPrior::TPriorPtrDoublePr CMultivariateConstantPrior::bivariate(const TSize10Vec& marginalize, + const TSizeDoublePr10Vec& condition) const { + if (m_Dimension == 2) { return TPriorPtrDoublePr(TPriorPtr(this->clone()), 0.0); } - if (!this->check(marginalize, condition)) - { + if (!this->check(marginalize, condition)) { return TPriorPtrDoublePr(); } TSize10Vec i1; this->remainingVariables(marginalize, condition, i1); - if (i1.size() != 2) - { + if (i1.size() != 2) { LOG_ERROR("Invalid variables for computing univariate distribution: " << "marginalize '" << core::CContainerPrinter::print(marginalize) << "'" << ", condition '" << core::CContainerPrinter::print(condition) << "'"); return TPriorPtrDoublePr(); } - if (!this->isNonInformative()) - { + if (!this->isNonInformative()) { TDouble10Vec constant; constant[0] = (*m_Constant)[i1[0]]; constant[1] = (*m_Constant)[i1[1]]; @@ -202,82 +155,61 @@ CMultivariateConstantPrior::bivariate(const TSize10Vec &marginalize, return TPriorPtrDoublePr(TPriorPtr(new CMultivariateConstantPrior(2)), 0.0); } -CMultivariateConstantPrior::TDouble10VecDouble10VecPr -CMultivariateConstantPrior::marginalLikelihoodSupport() const -{ +CMultivariateConstantPrior::TDouble10VecDouble10VecPr CMultivariateConstantPrior::marginalLikelihoodSupport() const { TDouble10Vec lowest(m_Dimension); TDouble10Vec highest(m_Dimension); - for (std::size_t i = 0u; i < m_Dimension; ++i) - { - lowest[i] = boost::numeric::bounds::lowest(); + for (std::size_t i = 0u; i < m_Dimension; ++i) { + lowest[i] = boost::numeric::bounds::lowest(); highest[i] = boost::numeric::bounds::highest(); } return std::make_pair(lowest, highest); } -CMultivariateConstantPrior::TDouble10Vec CMultivariateConstantPrior::marginalLikelihoodMean() const -{ - if (this->isNonInformative()) - { +CMultivariateConstantPrior::TDouble10Vec CMultivariateConstantPrior::marginalLikelihoodMean() const { + if (this->isNonInformative()) { return TDouble10Vec(m_Dimension, 0.0); } return *m_Constant; } -CMultivariateConstantPrior::TDouble10Vec -CMultivariateConstantPrior::marginalLikelihoodMode(const TWeightStyleVec &/*weightStyles*/, - const TDouble10Vec4Vec &/*weights*/) const -{ +CMultivariateConstantPrior::TDouble10Vec CMultivariateConstantPrior::marginalLikelihoodMode(const TWeightStyleVec& /*weightStyles*/, + const TDouble10Vec4Vec& /*weights*/) const { return this->marginalLikelihoodMean(); } -CMultivariateConstantPrior::TDouble10Vec10Vec -CMultivariateConstantPrior::marginalLikelihoodCovariance() const -{ +CMultivariateConstantPrior::TDouble10Vec10Vec CMultivariateConstantPrior::marginalLikelihoodCovariance() const { TDouble10Vec10Vec result(m_Dimension, TDouble10Vec(m_Dimension, 0.0)); - if (this->isNonInformative()) - { - for (std::size_t i = 0u; i < m_Dimension; ++i) - { + if (this->isNonInformative()) { + for (std::size_t i = 0u; i < m_Dimension; ++i) { result[i][i] = boost::numeric::bounds::highest(); } } return result; } -CMultivariateConstantPrior::TDouble10Vec -CMultivariateConstantPrior::marginalLikelihoodVariances() const -{ - return TDouble10Vec(m_Dimension, this->isNonInformative() ? - boost::numeric::bounds::highest() : 0.0); +CMultivariateConstantPrior::TDouble10Vec CMultivariateConstantPrior::marginalLikelihoodVariances() const { + return TDouble10Vec(m_Dimension, this->isNonInformative() ? boost::numeric::bounds::highest() : 0.0); } -maths_t::EFloatingPointErrorStatus -CMultivariateConstantPrior::jointLogMarginalLikelihood(const TWeightStyleVec &weightStyles, - const TDouble10Vec1Vec &samples, - const TDouble10Vec4Vec1Vec &weights, - double &result) const -{ +maths_t::EFloatingPointErrorStatus CMultivariateConstantPrior::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble10Vec1Vec& samples, + const TDouble10Vec4Vec1Vec& weights, + double& result) const { result = 0.0; - if (samples.empty()) - { + if (samples.empty()) { LOG_ERROR("Can't compute likelihood for empty sample set"); return maths_t::E_FpFailed; } - if (samples.size() != weights.size()) - { - LOG_ERROR("Mismatch in samples '" - << core::CContainerPrinter::print(samples) - << "' and weights '" - << core::CContainerPrinter::print(weights) << "'"); + if (samples.size() != weights.size()) { + LOG_ERROR("Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '" + << core::CContainerPrinter::print(weights) << "'"); return maths_t::E_FpFailed; } - if (this->isNonInformative()) - { + if (this->isNonInformative()) { // The non-informative likelihood is improper and effectively // zero everywhere. We use minus max double because // log(0) = HUGE_VALUE, which causes problems for Windows. @@ -292,95 +224,73 @@ CMultivariateConstantPrior::jointLogMarginalLikelihood(const TWeightStyleVec &we double numberSamples = 0.0; - for (std::size_t i = 0u; i < samples.size(); ++i) - { - if (samples[i].size() != m_Dimension) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { + if (samples[i].size() != m_Dimension) { LOG_ERROR("Unexpected dimension: " << samples[i].size() << " != " << m_Dimension); continue; } - if (!std::equal(samples[i].begin(), samples[i].end(), m_Constant->begin())) - { + if (!std::equal(samples[i].begin(), samples[i].end(), m_Constant->begin())) { // Technically infinite, but just use minus max double. result = boost::numeric::bounds::lowest(); return maths_t::E_FpOverflowed; } - numberSamples += this->smallest(maths_t::countForUpdate(m_Dimension, - weightStyles, - weights[i])); + numberSamples += this->smallest(maths_t::countForUpdate(m_Dimension, weightStyles, weights[i])); } result = numberSamples * core::constants::LOG_MAX_DOUBLE; return maths_t::E_FpNoErrors; } -void CMultivariateConstantPrior::sampleMarginalLikelihood(std::size_t numberSamples, - TDouble10Vec1Vec &samples) const -{ +void CMultivariateConstantPrior::sampleMarginalLikelihood(std::size_t numberSamples, TDouble10Vec1Vec& samples) const { samples.clear(); - if (this->isNonInformative()) - { + if (this->isNonInformative()) { return; } samples.resize(numberSamples, *m_Constant); } -bool CMultivariateConstantPrior::isNonInformative() const -{ +bool CMultivariateConstantPrior::isNonInformative() const { return !m_Constant; } -void CMultivariateConstantPrior::print(const std::string &separator, - std::string &result) const -{ - result += core_t::LINE_ENDING + separator + "constant " - + (this->isNonInformative() ? - std::string("non-informative") : - core::CContainerPrinter::print(*m_Constant)); +void CMultivariateConstantPrior::print(const std::string& separator, std::string& result) const { + result += core_t::LINE_ENDING + separator + "constant " + + (this->isNonInformative() ? std::string("non-informative") : core::CContainerPrinter::print(*m_Constant)); } -uint64_t CMultivariateConstantPrior::checksum(uint64_t seed) const -{ +uint64_t CMultivariateConstantPrior::checksum(uint64_t seed) const { seed = this->CMultivariatePrior::checksum(seed); return CChecksum::calculate(seed, m_Constant); } -void CMultivariateConstantPrior::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CMultivariateConstantPrior::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CMultivariateConstantPrior"); core::CMemoryDebug::dynamicSize("m_Constant", m_Constant, mem); } -std::size_t CMultivariateConstantPrior::memoryUsage() const -{ +std::size_t CMultivariateConstantPrior::memoryUsage() const { return core::CMemory::dynamicSize(m_Constant); } -std::size_t CMultivariateConstantPrior::staticSize() const -{ +std::size_t CMultivariateConstantPrior::staticSize() const { return sizeof(*this); } -void CMultivariateConstantPrior::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ - if (m_Constant) - { +void CMultivariateConstantPrior::acceptPersistInserter(core::CStatePersistInserter& inserter) const { + if (m_Constant) { inserter.insertValue(CONSTANT_TAG, core::CPersistUtils::toString(*m_Constant, CConstantToString())); } } -std::string CMultivariateConstantPrior::persistenceTag() const -{ +std::string CMultivariateConstantPrior::persistenceTag() const { return CONSTANT_TAG + core::CStringUtils::typeToString(m_Dimension); } -const CMultivariateConstantPrior::TOptionalDouble10Vec &CMultivariateConstantPrior::constant() const -{ +const CMultivariateConstantPrior::TOptionalDouble10Vec& CMultivariateConstantPrior::constant() const { return m_Constant; } - } } diff --git a/lib/maths/CMultivariateMultimodalPrior.cc b/lib/maths/CMultivariateMultimodalPrior.cc index 8248364d1d..01395c19c3 100644 --- a/lib/maths/CMultivariateMultimodalPrior.cc +++ b/lib/maths/CMultivariateMultimodalPrior.cc @@ -6,78 +6,63 @@ #include -#include #include +#include #include -namespace ml -{ -namespace maths -{ -namespace multivariate_multimodal_prior_detail -{ +namespace ml { +namespace maths { +namespace multivariate_multimodal_prior_detail { using TDoubleVec = std::vector; using TDouble10Vec = CMultivariatePrior::TDouble10Vec; using TDouble10Vec4Vec = CMultivariatePrior::TDouble10Vec4Vec; -namespace -{ +namespace { //! Print the set of mode indices. -std::string printIndices(const TModeVec &modes) -{ +std::string printIndices(const TModeVec& modes) { std::ostringstream result; result << "{"; - if (!modes.empty()) - { + if (!modes.empty()) { result << modes[0].s_Index; - for (std::size_t i = 1u; i < modes.size(); ++i) - { + for (std::size_t i = 1u; i < modes.size(); ++i) { result << ", " << modes[i].s_Index; } } result << "}"; return result.str(); } - } -maths_t::EFloatingPointErrorStatus - jointLogMarginalLikelihood(const TModeVec &modes, - const maths_t::TWeightStyleVec &weightStyles, - const TDouble10Vec1Vec &sample, - const TDouble10Vec4Vec1Vec &weights, - TSizeDoublePr3Vec &modeLogLikelihoods, - double &result) -{ - try - { +maths_t::EFloatingPointErrorStatus jointLogMarginalLikelihood(const TModeVec& modes, + const maths_t::TWeightStyleVec& weightStyles, + const TDouble10Vec1Vec& sample, + const TDouble10Vec4Vec1Vec& weights, + TSizeDoublePr3Vec& modeLogLikelihoods, + double& result) { + try { // We re-normalize so that the maximum log likelihood is one // to avoid underflow. modeLogLikelihoods.clear(); double maxLogLikelihood = boost::numeric::bounds::lowest(); - for (std::size_t i = 0u; i < modes.size(); ++i) - { + for (std::size_t i = 0u; i < modes.size(); ++i) { double modeLogLikelihood; maths_t::EFloatingPointErrorStatus status = - modes[i].s_Prior->jointLogMarginalLikelihood(weightStyles, sample, weights, modeLogLikelihood); - if (status & maths_t::E_FpFailed) - { + modes[i].s_Prior->jointLogMarginalLikelihood(weightStyles, sample, weights, modeLogLikelihood); + if (status & maths_t::E_FpFailed) { // Logging handled at a lower level. return status; } - if (!(status & maths_t::E_FpOverflowed)) - { + if (!(status & maths_t::E_FpOverflowed)) { modeLogLikelihoods.push_back({i, modeLogLikelihood}); maxLogLikelihood = std::max(maxLogLikelihood, modeLogLikelihood); } } - if (modeLogLikelihoods.empty()) - { + if (modeLogLikelihoods.empty()) { // Technically, the marginal likelihood is zero here // so the log would be infinite. We use minus max // double because log(0) = HUGE_VALUE, which causes @@ -91,14 +76,12 @@ maths_t::EFloatingPointErrorStatus return maths_t::E_FpOverflowed; } - LOG_TRACE("modeLogLikelihoods = " - << core::CContainerPrinter::print(modeLogLikelihoods)); + LOG_TRACE("modeLogLikelihoods = " << core::CContainerPrinter::print(modeLogLikelihoods)); double sampleLikelihood = 0.0; double Z = 0.0; - for (const auto &likelihood : modeLogLikelihoods) - { + for (const auto& likelihood : modeLogLikelihoods) { double w = modes[likelihood.first].weight(); // Divide through by the largest value to avoid underflow. sampleLikelihood += w * std::exp(likelihood.second - maxLogLikelihood); @@ -108,12 +91,9 @@ maths_t::EFloatingPointErrorStatus sampleLikelihood /= Z; result = (std::log(sampleLikelihood) + maxLogLikelihood); - LOG_TRACE("sample = " << core::CContainerPrinter::print(sample) - << ", maxLogLikelihood = " << maxLogLikelihood - << ", sampleLogLikelihood = " << result); - } - catch (const std::exception &e) - { + LOG_TRACE("sample = " << core::CContainerPrinter::print(sample) << ", maxLogLikelihood = " << maxLogLikelihood + << ", sampleLogLikelihood = " << result); + } catch (const std::exception& e) { LOG_ERROR("Failed to compute likelihood: " << e.what()); return maths_t::E_FpFailed; } @@ -121,14 +101,10 @@ maths_t::EFloatingPointErrorStatus return maths_t::E_FpNoErrors; } -void sampleMarginalLikelihood(const TModeVec &modes, - std::size_t numberSamples, - TDouble10Vec1Vec &samples) -{ +void sampleMarginalLikelihood(const TModeVec& modes, std::size_t numberSamples, TDouble10Vec1Vec& samples) { samples.clear(); - if (modes.size() == 1) - { + if (modes.size() == 1) { modes[0].s_Prior->sampleMarginalLikelihood(numberSamples, samples); return; } @@ -139,32 +115,28 @@ void sampleMarginalLikelihood(const TModeVec &modes, normalizedWeights.reserve(modes.size()); double Z = 0.0; - for (const auto &mode : modes) - { + for (const auto& mode : modes) { double weight = mode.weight(); normalizedWeights.push_back(weight); Z += weight; } - for (auto &weight : normalizedWeights) - { + for (auto& weight : normalizedWeights) { weight /= Z; } CSampling::TSizeVec sampling; CSampling::weightedSample(numberSamples, normalizedWeights, sampling); LOG_TRACE("normalizedWeights = " << core::CContainerPrinter::print(normalizedWeights) - << ", sampling = " << core::CContainerPrinter::print(sampling)); + << ", sampling = " << core::CContainerPrinter::print(sampling)); - if (sampling.size() != modes.size()) - { + if (sampling.size() != modes.size()) { LOG_ERROR("Failed to sample marginal likelihood"); return; } samples.reserve(numberSamples); TDouble10Vec1Vec modeSamples; - for (std::size_t i = 0u; i < modes.size(); ++i) - { + for (std::size_t i = 0u; i < modes.size(); ++i) { modes[i].s_Prior->sampleMarginalLikelihood(sampling[i], modeSamples); LOG_TRACE("# modeSamples = " << modeSamples.size()); LOG_TRACE("modeSamples = " << core::CContainerPrinter::print(modeSamples)); @@ -173,16 +145,13 @@ void sampleMarginalLikelihood(const TModeVec &modes, LOG_TRACE("samples = " << core::CContainerPrinter::print(samples)); } -void print(const TModeVec &modes, const std::string &separator, std::string &result) -{ - double Z = std::accumulate(modes.begin(), modes.end(), 0.0, - [](double sum, const TMode &mode) { return sum + mode.weight(); }); +void print(const TModeVec& modes, const std::string& separator, std::string& result) { + double Z = std::accumulate(modes.begin(), modes.end(), 0.0, [](double sum, const TMode& mode) { return sum + mode.weight(); }); std::string separator_ = separator + separator; result += ":"; - for (const auto &mode : modes) - { + for (const auto& mode : modes) { double weight = mode.weight() / Z; result += core_t::LINE_ENDING + separator_ + " weight " + core::CStringUtils::typeToStringPretty(weight); mode.s_Prior->print(separator_, result); @@ -190,15 +159,13 @@ void print(const TModeVec &modes, const std::string &separator, std::string &res } void modeMergeCallback(std::size_t dimension, - TModeVec &modes, - const TPriorPtr &seedPrior, + TModeVec& modes, + const TPriorPtr& seedPrior, std::size_t numberSamples, std::size_t leftMergeIndex, std::size_t rightMergeIndex, - std::size_t targetIndex) -{ - LOG_TRACE("Merging modes with indices " - << leftMergeIndex << " " << rightMergeIndex); + std::size_t targetIndex) { + LOG_TRACE("Merging modes with indices " << leftMergeIndex << " " << rightMergeIndex); using TSizeSet = std::set; @@ -207,51 +174,38 @@ void modeMergeCallback(std::size_t dimension, double wl = 0.0; double wr = 0.0; - double n = 0.0; + double n = 0.0; std::size_t nl = 0; std::size_t nr = 0; TDouble10Vec1Vec samples; - auto leftMode = std::find_if(modes.begin(), modes.end(), - CSetTools::CIndexInSet(leftMergeIndex)); - if (leftMode != modes.end()) - { + auto leftMode = std::find_if(modes.begin(), modes.end(), CSetTools::CIndexInSet(leftMergeIndex)); + if (leftMode != modes.end()) { wl = leftMode->s_Prior->numberSamples(); n += wl; TDouble10Vec1Vec leftSamples; leftMode->s_Prior->sampleMarginalLikelihood(numberSamples, leftSamples); nl = leftSamples.size(); samples.insert(samples.end(), leftSamples.begin(), leftSamples.end()); - } - else - { - LOG_ERROR("Couldn't find mode for " << leftMergeIndex - << " in " << printIndices(modes) - << ", other index = " << rightMergeIndex - << ", merged index = " << targetIndex); + } else { + LOG_ERROR("Couldn't find mode for " << leftMergeIndex << " in " << printIndices(modes) << ", other index = " << rightMergeIndex + << ", merged index = " << targetIndex); } - auto rightMode = std::find_if(modes.begin(), modes.end(), - CSetTools::CIndexInSet(rightMergeIndex)); - if (rightMode != modes.end()) - { + auto rightMode = std::find_if(modes.begin(), modes.end(), CSetTools::CIndexInSet(rightMergeIndex)); + if (rightMode != modes.end()) { wr = rightMode->s_Prior->numberSamples(); n += wr; TDouble10Vec1Vec rightSamples; rightMode->s_Prior->sampleMarginalLikelihood(numberSamples, rightSamples); nr = rightSamples.size(); samples.insert(samples.end(), rightSamples.begin(), rightSamples.end()); - } - else - { - LOG_ERROR("Couldn't find mode for " << rightMergeIndex - << " in " << printIndices(modes) - << ", other index = " << leftMergeIndex - << ", merged index = " << targetIndex); + } else { + LOG_ERROR("Couldn't find mode for " << rightMergeIndex << " in " << printIndices(modes) << ", other index = " << leftMergeIndex + << ", merged index = " << targetIndex); } - if (n > 0.0) - { + if (n > 0.0) { double nl_ = static_cast(nl); double nr_ = static_cast(nr); double Z = (nl_ * wl + nr_ * wr) / (nl_ + nr_); @@ -263,26 +217,24 @@ void modeMergeCallback(std::size_t dimension, LOG_TRACE("n = " << n << ", wl = " << wl << ", wr = " << wr); double ns = std::min(n, 4.0); - double s = static_cast(samples.size()); + double s = static_cast(samples.size()); - TDouble10Vec leftSeedWeight(dimension, wl * ns / s); + TDouble10Vec leftSeedWeight(dimension, wl * ns / s); TDouble10Vec rightSeedWeight(dimension, wl * ns / s); TDouble10Vec4Vec1Vec weights; weights.reserve(samples.size()); - weights.resize(nl, TDouble10Vec1Vec(1, leftSeedWeight)); + weights.resize(nl, TDouble10Vec1Vec(1, leftSeedWeight)); weights.resize(nl + nr, TDouble10Vec1Vec(1, rightSeedWeight)); newMode.s_Prior->addSamples(CConstantWeights::COUNT, samples, weights); double weight = (n - ns) / s; - if (weight > 0.0) - { - for (std::size_t i = 0u; i < dimension; ++i) - { - leftSeedWeight[i] = wl * weight; + if (weight > 0.0) { + for (std::size_t i = 0u; i < dimension; ++i) { + leftSeedWeight[i] = wl * weight; rightSeedWeight[i] = wr * weight; } weights.clear(); - weights.resize(nl, TDouble10Vec1Vec(1, leftSeedWeight)); + weights.resize(nl, TDouble10Vec1Vec(1, leftSeedWeight)); weights.resize(nl + nr, TDouble10Vec1Vec(1, rightSeedWeight)); newMode.s_Prior->addSamples(CConstantWeights::COUNT, samples, weights); } @@ -291,8 +243,7 @@ void modeMergeCallback(std::size_t dimension, TSizeSet mergedIndices; mergedIndices.insert(leftMergeIndex); mergedIndices.insert(rightMergeIndex); - modes.erase(std::remove_if(modes.begin(), modes.end(), - CSetTools::CIndexInSet(mergedIndices)), modes.end()); + modes.erase(std::remove_if(modes.begin(), modes.end(), CSetTools::CIndexInSet(mergedIndices)), modes.end()); // Add the new mode. LOG_TRACE("Creating mode with index " << targetIndex); @@ -301,12 +252,9 @@ void modeMergeCallback(std::size_t dimension, LOG_TRACE("Merged modes"); } -std::string debugWeights(const TModeVec &modes) -{ +std::string debugWeights(const TModeVec& modes) { return TMode::debugWeights(modes); } - } } } - diff --git a/lib/maths/CMultivariateMultimodalPriorFactory.cc b/lib/maths/CMultivariateMultimodalPriorFactory.cc index 0fd34ac79e..1afa55ecb7 100644 --- a/lib/maths/CMultivariateMultimodalPriorFactory.cc +++ b/lib/maths/CMultivariateMultimodalPriorFactory.cc @@ -11,81 +11,70 @@ #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { -namespace -{ +namespace { template -class CFactory -{ - public: - static CMultivariateMultimodalPrior *make(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser) - { - return new CMultivariateMultimodalPrior(params, traverser); - } +class CFactory { +public: + static CMultivariateMultimodalPrior* make(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) { + return new CMultivariateMultimodalPrior(params, traverser); + } - static CMultivariateMultimodalPrior *make(maths_t::EDataType dataType, - double decayRate, - maths_t::EClusterWeightCalc weightCalc, - double minimumClusterFraction, - double minimumClusterCount, - double minimumCategoryCount, - const CMultivariatePrior &seedPrior) - { - boost::scoped_ptr> > clusterer( - CXMeansOnlineFactory::make(dataType, - weightCalc, - decayRate, - minimumClusterFraction, - minimumClusterCount, - minimumCategoryCount)); - return new CMultivariateMultimodalPrior(dataType, *clusterer, seedPrior, decayRate); - } + static CMultivariateMultimodalPrior* make(maths_t::EDataType dataType, + double decayRate, + maths_t::EClusterWeightCalc weightCalc, + double minimumClusterFraction, + double minimumClusterCount, + double minimumCategoryCount, + const CMultivariatePrior& seedPrior) { + boost::scoped_ptr>> clusterer(CXMeansOnlineFactory::make( + dataType, weightCalc, decayRate, minimumClusterFraction, minimumClusterCount, minimumCategoryCount)); + return new CMultivariateMultimodalPrior(dataType, *clusterer, seedPrior, decayRate); + } }; - } -#define CREATE_PRIOR(N) \ -switch (N) \ -{ \ -case 2: ptr.reset(CFactory<2>::make(FACTORY_ARGS)); break; \ -case 3: ptr.reset(CFactory<3>::make(FACTORY_ARGS)); break; \ -case 4: ptr.reset(CFactory<4>::make(FACTORY_ARGS)); break; \ -case 5: ptr.reset(CFactory<5>::make(FACTORY_ARGS)); break; \ -default: LOG_ERROR("Unsupported dimension " << N); break; \ -} +#define CREATE_PRIOR(N) \ + switch (N) { \ + case 2: \ + ptr.reset(CFactory<2>::make(FACTORY_ARGS)); \ + break; \ + case 3: \ + ptr.reset(CFactory<3>::make(FACTORY_ARGS)); \ + break; \ + case 4: \ + ptr.reset(CFactory<4>::make(FACTORY_ARGS)); \ + break; \ + case 5: \ + ptr.reset(CFactory<5>::make(FACTORY_ARGS)); \ + break; \ + default: \ + LOG_ERROR("Unsupported dimension " << N); \ + break; \ + } -CMultivariateMultimodalPriorFactory::TPriorPtr -CMultivariateMultimodalPriorFactory::nonInformative(std::size_t dimension, - maths_t::EDataType dataType, - double decayRate, - maths_t::EClusterWeightCalc weightCalc, - double minimumClusterFraction, - double minimumClusterCount, - double minimumCategoryCount, - const CMultivariatePrior &seedPrior) -{ +CMultivariateMultimodalPriorFactory::TPriorPtr CMultivariateMultimodalPriorFactory::nonInformative(std::size_t dimension, + maths_t::EDataType dataType, + double decayRate, + maths_t::EClusterWeightCalc weightCalc, + double minimumClusterFraction, + double minimumClusterCount, + double minimumCategoryCount, + const CMultivariatePrior& seedPrior) { TPriorPtr ptr; -#define FACTORY_ARGS dataType, decayRate, weightCalc, \ - minimumClusterFraction, \ - minimumClusterCount, \ - minimumCategoryCount, \ - seedPrior +#define FACTORY_ARGS dataType, decayRate, weightCalc, minimumClusterFraction, minimumClusterCount, minimumCategoryCount, seedPrior CREATE_PRIOR(dimension) #undef FACTORY_ARGS return ptr; } bool CMultivariateMultimodalPriorFactory::restore(std::size_t dimension, - const SDistributionRestoreParams ¶ms, - TPriorPtr &ptr, - core::CStateRestoreTraverser &traverser) -{ + const SDistributionRestoreParams& params, + TPriorPtr& ptr, + core::CStateRestoreTraverser& traverser) { ptr.reset(); #define FACTORY_ARGS params, traverser CREATE_PRIOR(dimension) @@ -94,6 +83,5 @@ bool CMultivariateMultimodalPriorFactory::restore(std::size_t dimension, } #undef CREATE_PRIOR - } } diff --git a/lib/maths/CMultivariateNormalConjugateFactory.cc b/lib/maths/CMultivariateNormalConjugateFactory.cc index 53bd48d8b6..5270d1fd8e 100644 --- a/lib/maths/CMultivariateNormalConjugateFactory.cc +++ b/lib/maths/CMultivariateNormalConjugateFactory.cc @@ -8,48 +8,45 @@ #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { -namespace -{ +namespace { template -class CFactory -{ - public: - static CMultivariateNormalConjugate *make(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser) - { - return new CMultivariateNormalConjugate(params, traverser); - } +class CFactory { +public: + static CMultivariateNormalConjugate* make(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) { + return new CMultivariateNormalConjugate(params, traverser); + } - static CMultivariateNormalConjugate *make(maths_t::EDataType dataType, - double decayRate) - { - return CMultivariateNormalConjugate::nonInformativePrior(dataType, decayRate).clone(); - } + static CMultivariateNormalConjugate* make(maths_t::EDataType dataType, double decayRate) { + return CMultivariateNormalConjugate::nonInformativePrior(dataType, decayRate).clone(); + } }; - } -#define CREATE_PRIOR(N) \ -switch (N) \ -{ \ -case 2: ptr.reset(CFactory<2>::make(FACTORY_ARGS)); break; \ -case 3: ptr.reset(CFactory<3>::make(FACTORY_ARGS)); break; \ -case 4: ptr.reset(CFactory<4>::make(FACTORY_ARGS)); break; \ -case 5: ptr.reset(CFactory<5>::make(FACTORY_ARGS)); break; \ -default: LOG_ERROR("Unsupported dimension " << N); break; \ -} +#define CREATE_PRIOR(N) \ + switch (N) { \ + case 2: \ + ptr.reset(CFactory<2>::make(FACTORY_ARGS)); \ + break; \ + case 3: \ + ptr.reset(CFactory<3>::make(FACTORY_ARGS)); \ + break; \ + case 4: \ + ptr.reset(CFactory<4>::make(FACTORY_ARGS)); \ + break; \ + case 5: \ + ptr.reset(CFactory<5>::make(FACTORY_ARGS)); \ + break; \ + default: \ + LOG_ERROR("Unsupported dimension " << N); \ + break; \ + } CMultivariateNormalConjugateFactory::TPriorPtr -CMultivariateNormalConjugateFactory::nonInformative(std::size_t dimension, - maths_t::EDataType dataType, - double decayRate) -{ +CMultivariateNormalConjugateFactory::nonInformative(std::size_t dimension, maths_t::EDataType dataType, double decayRate) { TPriorPtr ptr; #define FACTORY_ARGS dataType, decayRate CREATE_PRIOR(dimension); @@ -58,10 +55,9 @@ CMultivariateNormalConjugateFactory::nonInformative(std::size_t dimension, } bool CMultivariateNormalConjugateFactory::restore(std::size_t dimension, - const SDistributionRestoreParams ¶ms, - TPriorPtr &ptr, - core::CStateRestoreTraverser &traverser) -{ + const SDistributionRestoreParams& params, + TPriorPtr& ptr, + core::CStateRestoreTraverser& traverser) { ptr.reset(); #define FACTORY_ARGS params, traverser CREATE_PRIOR(dimension); @@ -70,6 +66,5 @@ bool CMultivariateNormalConjugateFactory::restore(std::size_t dimension, } #undef CREATE_PRIOR - } } diff --git a/lib/maths/CMultivariateOneOfNPrior.cc b/lib/maths/CMultivariateOneOfNPrior.cc index 07f0ff6509..d301b4409b 100644 --- a/lib/maths/CMultivariateOneOfNPrior.cc +++ b/lib/maths/CMultivariateOneOfNPrior.cc @@ -8,19 +8,19 @@ #include #include -#include #include #include +#include #include #include -#include #include #include -#include #include +#include #include #include +#include #include #include @@ -31,12 +31,9 @@ #include #include -namespace ml -{ -namespace maths -{ -namespace -{ +namespace ml { +namespace maths { +namespace { using TBool3Vec = core::CSmallVector; using TDouble3Vec = CMultivariateOneOfNPrior::TDouble3Vec; @@ -57,61 +54,48 @@ const std::string PRIOR_TAG("d"); const std::string DECAY_RATE_TAG("e"); //! Add elements of \p x to \p y. -void add(const TDouble10Vec &x, TDouble10Vec &y) -{ - for (std::size_t i = 0u; i < x.size(); ++i) - { +void add(const TDouble10Vec& x, TDouble10Vec& y) { + for (std::size_t i = 0u; i < x.size(); ++i) { y[i] += x[i]; } } //! Get the min of \p x and \p y. -TDouble10Vec min(const TDouble10Vec &x, const TDouble10Vec &y) -{ +TDouble10Vec min(const TDouble10Vec& x, const TDouble10Vec& y) { TDouble10Vec result(x); - for (std::size_t i = 0u; i < x.size(); ++i) - { + for (std::size_t i = 0u; i < x.size(); ++i) { result[i] = std::min(result[i], y[i]); } return result; } //! Get the max of \p x and \p y. -TDouble10Vec max(const TDouble10Vec &x, const TDouble10Vec &y) -{ +TDouble10Vec max(const TDouble10Vec& x, const TDouble10Vec& y) { TDouble10Vec result(x); - for (std::size_t i = 0u; i < x.size(); ++i) - { + for (std::size_t i = 0u; i < x.size(); ++i) { result[i] = std::max(result[i], y[i]); } return result; } //! Update the arithmetic mean \p mean with \p x and weight \p nx. -void updateMean(const TDouble10Vec &x, double nx, TDouble10Vec &mean, double &n) -{ - if (nx <= 0.0) - { +void updateMean(const TDouble10Vec& x, double nx, TDouble10Vec& mean, double& n) { + if (nx <= 0.0) { return; } - for (std::size_t i = 0u; i < x.size(); ++i) - { + for (std::size_t i = 0u; i < x.size(); ++i) { mean[i] = (n * mean[i] + nx * x[i]) / (n + nx); } n += nx; } //! Update the arithmetic mean \p mean with \p x and weight \p nx. -void updateMean(const TDouble10Vec10Vec &x, double nx, TDouble10Vec10Vec &mean, double &n) -{ - if (nx <= 0.0) - { +void updateMean(const TDouble10Vec10Vec& x, double nx, TDouble10Vec10Vec& mean, double& n) { + if (nx <= 0.0) { return; } - for (std::size_t i = 0u; i < x.size(); ++i) - { - for (std::size_t j = 0u; j < x[i].size(); ++j) - { + for (std::size_t i = 0u; i < x.size(); ++i) { + for (std::size_t j = 0u; j < x[i].size(); ++j) { mean[i][j] = (n * mean[i][j] + nx * x[i][j]) / (n + nx); } } @@ -119,42 +103,33 @@ void updateMean(const TDouble10Vec10Vec &x, double nx, TDouble10Vec10Vec &mean, } //! Get the largest element of \p x. -double largest(const TDouble10Vec &x) -{ +double largest(const TDouble10Vec& x) { return *std::max_element(x.begin(), x.end()); } //! Add a model vector entry reading parameters from \p traverser. -bool modelAcceptRestoreTraverser(const SDistributionRestoreParams ¶ms, - TWeightPriorPtrPrVec &models, - core::CStateRestoreTraverser &traverser) -{ +bool modelAcceptRestoreTraverser(const SDistributionRestoreParams& params, + TWeightPriorPtrPrVec& models, + core::CStateRestoreTraverser& traverser) { CModelWeight weight(1.0); bool gotWeight = false; TPriorPtr model; - do - { - const std::string &name = traverser.name(); + do { + const std::string& name = traverser.name(); RESTORE_SETUP_TEARDOWN(WEIGHT_TAG, /**/, - traverser.traverseSubLevel(boost::bind(&CModelWeight::acceptRestoreTraverser, - &weight, _1)), + traverser.traverseSubLevel(boost::bind(&CModelWeight::acceptRestoreTraverser, &weight, _1)), gotWeight = true) RESTORE(PRIOR_TAG, - traverser.traverseSubLevel(boost::bind(CPriorStateSerialiser(), - boost::cref(params), - boost::ref(model), _1))) - } - while (traverser.next()); + traverser.traverseSubLevel(boost::bind(CPriorStateSerialiser(), boost::cref(params), boost::ref(model), _1))) + } while (traverser.next()); - if (!gotWeight) - { + if (!gotWeight) { LOG_ERROR("No weight found"); return false; } - if (model == 0) - { + if (model == 0) { LOG_ERROR("No model found"); return false; } @@ -165,33 +140,25 @@ bool modelAcceptRestoreTraverser(const SDistributionRestoreParams ¶ms, } //! Read the models, decay rate and number of samples from the supplied traverser. -bool acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, - TWeightPriorPtrPrVec &models, - double &decayRate, - double &numberSamples, - core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name = traverser.name(); +bool acceptRestoreTraverser(const SDistributionRestoreParams& params, + TWeightPriorPtrPrVec& models, + double& decayRate, + double& numberSamples, + core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); RESTORE_BUILT_IN(DECAY_RATE_TAG, decayRate) RESTORE(MODEL_TAG, - traverser.traverseSubLevel(boost::bind(&modelAcceptRestoreTraverser, - boost::cref(params), - boost::ref(models), _1))) + traverser.traverseSubLevel(boost::bind(&modelAcceptRestoreTraverser, boost::cref(params), boost::ref(models), _1))) RESTORE_BUILT_IN(NUMBER_SAMPLES_TAG, numberSamples) - } - while (traverser.next()); + } while (traverser.next()); return true; } //! Persist state for one of the models by passing information //! to the supplied inserter. -void modelAcceptPersistInserter(const CModelWeight &weight, - const CMultivariatePrior &prior, - core::CStatePersistInserter &inserter) -{ +void modelAcceptPersistInserter(const CModelWeight& weight, const CMultivariatePrior& prior, core::CStatePersistInserter& inserter) { inserter.insertLevel(WEIGHT_TAG, boost::bind(&CModelWeight::acceptPersistInserter, &weight, _1)); inserter.insertLevel(PRIOR_TAG, boost::bind(CPriorStateSerialiser(), boost::cref(prior), _1)); } @@ -201,18 +168,14 @@ const double MINUS_INF = DERATE * boost::numeric::bounds::lowest(); const double INF = DERATE * boost::numeric::bounds::highest(); const double LOG_INITIAL_WEIGHT = std::log(1e-6); const double MINIMUM_SIGNIFICANT_WEIGHT = 0.01; - } CMultivariateOneOfNPrior::CMultivariateOneOfNPrior(std::size_t dimension, - const TPriorPtrVec &models, + const TPriorPtrVec& models, maths_t::EDataType dataType, - double decayRate) : - CMultivariatePrior(dataType, decayRate), - m_Dimension(dimension) -{ - if (models.empty()) - { + double decayRate) + : CMultivariatePrior(dataType, decayRate), m_Dimension(dimension) { + if (models.empty()) { LOG_ERROR("Can't initialize one-of-n with no models!"); return; } @@ -220,21 +183,17 @@ CMultivariateOneOfNPrior::CMultivariateOneOfNPrior(std::size_t dimension, // Create a new model vector using uniform weights. m_Models.reserve(models.size()); CModelWeight weight(1.0); - for (const auto &model : models) - { + for (const auto& model : models) { m_Models.emplace_back(weight, model); } } CMultivariateOneOfNPrior::CMultivariateOneOfNPrior(std::size_t dimension, - const TDoublePriorPtrPrVec &models, + const TDoublePriorPtrPrVec& models, maths_t::EDataType dataType, - double decayRate) : - CMultivariatePrior(dataType, decayRate), - m_Dimension(dimension) -{ - if (models.empty()) - { + double decayRate) + : CMultivariatePrior(dataType, decayRate), m_Dimension(dimension) { + if (models.empty()) { LOG_ERROR("Can't initialize mixed model with no models!"); return; } @@ -243,94 +202,74 @@ CMultivariateOneOfNPrior::CMultivariateOneOfNPrior(std::size_t dimension, // Create a new model vector using the specified models and their associated weights. m_Models.reserve(models.size()); - for (const auto &model : models) - { + for (const auto& model : models) { m_Models.emplace_back(CModelWeight(model.first), model.second); } } CMultivariateOneOfNPrior::CMultivariateOneOfNPrior(std::size_t dimension, - const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser) : - CMultivariatePrior(params.s_DataType, params.s_DecayRate), - m_Dimension(dimension) -{ + const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser) + : CMultivariatePrior(params.s_DataType, params.s_DecayRate), m_Dimension(dimension) { double decayRate; double numberSamples; - if (traverser.traverseSubLevel(boost::bind(&acceptRestoreTraverser, - boost::cref(params), - boost::ref(m_Models), - boost::ref(decayRate), - boost::ref(numberSamples), _1)) == false) - { + if (traverser.traverseSubLevel(boost::bind( + &acceptRestoreTraverser, boost::cref(params), boost::ref(m_Models), boost::ref(decayRate), boost::ref(numberSamples), _1)) == + false) { return; } this->decayRate(decayRate); this->numberSamples(numberSamples); } -CMultivariateOneOfNPrior::CMultivariateOneOfNPrior(const CMultivariateOneOfNPrior &other) : - CMultivariatePrior(other.dataType(), other.decayRate()), - m_Dimension(other.m_Dimension) -{ +CMultivariateOneOfNPrior::CMultivariateOneOfNPrior(const CMultivariateOneOfNPrior& other) + : CMultivariatePrior(other.dataType(), other.decayRate()), m_Dimension(other.m_Dimension) { // Clone all the models up front so we can implement strong exception safety. m_Models.reserve(other.m_Models.size()); - for (const auto &model : other.m_Models) - { + for (const auto& model : other.m_Models) { m_Models.emplace_back(model.first, TPriorPtr(model.second->clone())); } this->CMultivariatePrior::addSamples(other.numberSamples()); } -CMultivariateOneOfNPrior &CMultivariateOneOfNPrior::operator=(const CMultivariateOneOfNPrior &rhs) -{ - if (this != &rhs) - { +CMultivariateOneOfNPrior& CMultivariateOneOfNPrior::operator=(const CMultivariateOneOfNPrior& rhs) { + if (this != &rhs) { CMultivariateOneOfNPrior tmp(rhs); this->swap(tmp); } return *this; } -void CMultivariateOneOfNPrior::swap(CMultivariateOneOfNPrior &other) -{ +void CMultivariateOneOfNPrior::swap(CMultivariateOneOfNPrior& other) { this->CMultivariatePrior::swap(other); m_Models.swap(other.m_Models); } -CMultivariateOneOfNPrior *CMultivariateOneOfNPrior::clone() const -{ +CMultivariateOneOfNPrior* CMultivariateOneOfNPrior::clone() const { return new CMultivariateOneOfNPrior(*this); } -std::size_t CMultivariateOneOfNPrior::dimension() const -{ +std::size_t CMultivariateOneOfNPrior::dimension() const { return m_Dimension; } -void CMultivariateOneOfNPrior::dataType(maths_t::EDataType value) -{ +void CMultivariateOneOfNPrior::dataType(maths_t::EDataType value) { this->CMultivariatePrior::dataType(value); - for (auto &model : m_Models) - { + for (auto& model : m_Models) { model.second->dataType(value); } } -void CMultivariateOneOfNPrior::decayRate(double value) -{ +void CMultivariateOneOfNPrior::decayRate(double value) { this->CMultivariatePrior::decayRate(value); - for (auto &model : m_Models) - { + for (auto& model : m_Models) { model.second->decayRate(this->decayRate()); } } -void CMultivariateOneOfNPrior::setToNonInformative(double offset, double decayRate) -{ - for (auto &model : m_Models) - { +void CMultivariateOneOfNPrior::setToNonInformative(double offset, double decayRate) { + for (auto& model : m_Models) { model.first.age(0.0); model.second->setToNonInformative(offset, decayRate); } @@ -338,26 +277,21 @@ void CMultivariateOneOfNPrior::setToNonInformative(double offset, double decayRa this->numberSamples(0.0); } -void CMultivariateOneOfNPrior::adjustOffset(const TWeightStyleVec &weightStyles, - const TDouble10Vec1Vec &samples, - const TDouble10Vec4Vec1Vec &weights) -{ - for (auto &model : m_Models) - { +void CMultivariateOneOfNPrior::adjustOffset(const TWeightStyleVec& weightStyles, + const TDouble10Vec1Vec& samples, + const TDouble10Vec4Vec1Vec& weights) { + for (auto& model : m_Models) { model.second->adjustOffset(weightStyles, samples, weights); } } -void CMultivariateOneOfNPrior::addSamples(const TWeightStyleVec &weightStyles, - const TDouble10Vec1Vec &samples, - const TDouble10Vec4Vec1Vec &weights) -{ - if (samples.empty()) - { +void CMultivariateOneOfNPrior::addSamples(const TWeightStyleVec& weightStyles, + const TDouble10Vec1Vec& samples, + const TDouble10Vec4Vec1Vec& weights) { + if (samples.empty()) { return; } - if (!this->check(samples, weights)) - { + if (!this->check(samples, weights)) { return; } @@ -380,30 +314,22 @@ void CMultivariateOneOfNPrior::addSamples(const TWeightStyleVec &weightStyles, TDouble3Vec logLikelihoods; TMaxAccumulator maxLogLikelihood; TBool3Vec used, uses; - for (auto &model : m_Models) - { + for (auto& model : m_Models) { bool use = model.second->participatesInModelSelection(); // Update the weights with the marginal likelihoods. double logLikelihood = 0.0; - maths_t::EFloatingPointErrorStatus status = use ? - model.second->jointLogMarginalLikelihood(weightStyles, samples, weights, logLikelihood) : - maths_t::E_FpOverflowed; - if (status & maths_t::E_FpFailed) - { + maths_t::EFloatingPointErrorStatus status = + use ? model.second->jointLogMarginalLikelihood(weightStyles, samples, weights, logLikelihood) : maths_t::E_FpOverflowed; + if (status & maths_t::E_FpFailed) { LOG_ERROR("Failed to compute log-likelihood"); LOG_ERROR("samples = " << core::CContainerPrinter::print(samples)); - } - else - { - if (!(status & maths_t::E_FpOverflowed)) - { + } else { + if (!(status & maths_t::E_FpOverflowed)) { logLikelihood += model.second->unmarginalizedParameters() * penalty; logLikelihoods.push_back(logLikelihood); maxLogLikelihood.add(logLikelihood); - } - else - { + } else { logLikelihoods.push_back(MINUS_INF); } @@ -416,55 +342,42 @@ void CMultivariateOneOfNPrior::addSamples(const TWeightStyleVec &weightStyles, } TDouble10Vec n(m_Dimension, 0.0); - try - { - for (const auto &weight : weights) - { + try { + for (const auto& weight : weights) { add(maths_t::count(m_Dimension, weightStyles, weight), n); } - } - catch (std::exception &e) - { + } catch (std::exception& e) { LOG_ERROR("Failed to add samples: " << e.what()); return; } - if (!isNonInformative && maxLogLikelihood.count() > 0) - { + if (!isNonInformative && maxLogLikelihood.count() > 0) { LOG_TRACE("logLikelihoods = " << core::CContainerPrinter::print(logLikelihoods)); // The idea here is to limit the amount which extreme samples // affect model selection, particularly early on in the model // life-cycle. double l = largest(n); - double minLogLikelihood = maxLogLikelihood[0] - - l * std::min(maxModelPenalty(this->numberSamples()), 100.0); + double minLogLikelihood = maxLogLikelihood[0] - l * std::min(maxModelPenalty(this->numberSamples()), 100.0); TMaxAccumulator maxLogWeight; - for (std::size_t i = 0; i < logLikelihoods.size(); ++i) - { - CModelWeight &weight = m_Models[i].first; - if (!uses[i]) - { + for (std::size_t i = 0; i < logLikelihoods.size(); ++i) { + CModelWeight& weight = m_Models[i].first; + if (!uses[i]) { weight.logWeight(MINUS_INF); - } - else if (used[i]) - { + } else if (used[i]) { weight.addLogFactor(std::max(logLikelihoods[i], minLogLikelihood)); maxLogWeight.add(weight.logWeight()); } } - for (std::size_t i = 0u; i < m_Models.size(); ++i) - { - if (!used[i] && uses[i]) - { + for (std::size_t i = 0u; i < m_Models.size(); ++i) { + if (!used[i] && uses[i]) { m_Models[i].first.logWeight(maxLogWeight[0] + LOG_INITIAL_WEIGHT); } } } - if (this->badWeights()) - { + if (this->badWeights()) { LOG_ERROR("Update failed (" << this->debugWeights() << ")"); LOG_ERROR("samples = " << core::CContainerPrinter::print(samples)); LOG_ERROR("weights = " << core::CContainerPrinter::print(weights)); @@ -472,10 +385,8 @@ void CMultivariateOneOfNPrior::addSamples(const TWeightStyleVec &weightStyles, } } -void CMultivariateOneOfNPrior::propagateForwardsByTime(double time) -{ - if (!CMathsFuncs::isFinite(time) || time < 0.0) - { +void CMultivariateOneOfNPrior::propagateForwardsByTime(double time) { + if (!CMathsFuncs::isFinite(time) || time < 0.0) { LOG_ERROR("Bad propagation time " << time); return; } @@ -484,10 +395,8 @@ void CMultivariateOneOfNPrior::propagateForwardsByTime(double time) double alpha = std::exp(-this->scaledDecayRate() * time); - for (auto &model : m_Models) - { - if (!this->isForForecasting()) - { + for (auto& model : m_Models) { + if (!this->isForForecasting()) { model.first.age(alpha); } model.second->propagateForwardsByTime(time); @@ -498,22 +407,17 @@ void CMultivariateOneOfNPrior::propagateForwardsByTime(double time) LOG_TRACE("numberSamples = " << this->numberSamples()); } -CMultivariateOneOfNPrior::TUnivariatePriorPtrDoublePr -CMultivariateOneOfNPrior::univariate(const TSize10Vec &marginalize, - const TSizeDoublePr10Vec &condition) const -{ +CMultivariateOneOfNPrior::TUnivariatePriorPtrDoublePr CMultivariateOneOfNPrior::univariate(const TSize10Vec& marginalize, + const TSizeDoublePr10Vec& condition) const { COneOfNPrior::TDoublePriorPtrPrVec models; TDouble3Vec weights; TMaxAccumulator maxWeight; double Z = 0.0; - for (const auto &model : m_Models) - { - if (model.second->participatesInModelSelection()) - { + for (const auto& model : m_Models) { + if (model.second->participatesInModelSelection()) { TUnivariatePriorPtrDoublePr prior(model.second->univariate(marginalize, condition)); - if (prior.first == 0) - { + if (prior.first == 0) { return TUnivariatePriorPtrDoublePr(); } models.emplace_back(1.0, prior.first); @@ -523,8 +427,7 @@ CMultivariateOneOfNPrior::univariate(const TSize10Vec &marginalize, } } - for (std::size_t i = 0u; i < weights.size(); ++i) - { + for (std::size_t i = 0u; i < weights.size(); ++i) { models[i].first *= std::exp(weights[i] - maxWeight[0]) / Z; } @@ -532,12 +435,9 @@ CMultivariateOneOfNPrior::univariate(const TSize10Vec &marginalize, maxWeight.count() > 0 ? maxWeight[0] : 0.0); } -CMultivariateOneOfNPrior::TPriorPtrDoublePr -CMultivariateOneOfNPrior::bivariate(const TSize10Vec &marginalize, - const TSizeDoublePr10Vec &condition) const -{ - if (m_Dimension == 2) - { +CMultivariateOneOfNPrior::TPriorPtrDoublePr CMultivariateOneOfNPrior::bivariate(const TSize10Vec& marginalize, + const TSizeDoublePr10Vec& condition) const { + if (m_Dimension == 2) { return TPriorPtrDoublePr(TPriorPtr(this->clone()), 0.0); } @@ -546,13 +446,10 @@ CMultivariateOneOfNPrior::bivariate(const TSize10Vec &marginalize, TMaxAccumulator maxWeight; double Z = 0.0; - for (const auto &model : m_Models) - { - if (model.second->participatesInModelSelection()) - { + for (const auto& model : m_Models) { + if (model.second->participatesInModelSelection()) { TPriorPtrDoublePr prior(model.second->bivariate(marginalize, condition)); - if (prior.first == 0) - { + if (prior.first == 0) { return TPriorPtrDoublePr(); } models.emplace_back(1.0, prior.first); @@ -562,8 +459,7 @@ CMultivariateOneOfNPrior::bivariate(const TSize10Vec &marginalize, } } - for (std::size_t i = 0u; i < weights.size(); ++i) - { + for (std::size_t i = 0u; i < weights.size(); ++i) { models[i].first *= std::exp(weights[i] - maxWeight[0]) / Z; } @@ -571,21 +467,16 @@ CMultivariateOneOfNPrior::bivariate(const TSize10Vec &marginalize, maxWeight.count() > 0 ? maxWeight[0] : 0.0); } -TDouble10VecDouble10VecPr -CMultivariateOneOfNPrior::marginalLikelihoodSupport() const -{ +TDouble10VecDouble10VecPr CMultivariateOneOfNPrior::marginalLikelihoodSupport() const { // We define this is as the intersection of the component model // supports. - TDouble10VecDouble10VecPr result(TDouble10Vec(m_Dimension, MINUS_INF), - TDouble10Vec(m_Dimension, INF)); + TDouble10VecDouble10VecPr result(TDouble10Vec(m_Dimension, MINUS_INF), TDouble10Vec(m_Dimension, INF)); TDouble10VecDouble10VecPr modelSupport; - for (const auto &model : m_Models) - { - if (model.second->participatesInModelSelection()) - { - modelSupport = model.second->marginalLikelihoodSupport(); - result.first = max(result.first, modelSupport.first); + for (const auto& model : m_Models) { + if (model.second->participatesInModelSelection()) { + modelSupport = model.second->marginalLikelihoodSupport(); + result.first = max(result.first, modelSupport.first); result.second = min(result.second, modelSupport.second); } } @@ -593,8 +484,7 @@ CMultivariateOneOfNPrior::marginalLikelihoodSupport() const return result; } -TDouble10Vec CMultivariateOneOfNPrior::marginalLikelihoodMean() const -{ +TDouble10Vec CMultivariateOneOfNPrior::marginalLikelihoodMean() const { // This is E_{P(i)}[ E[X | P(i)] ] and the conditional expectation // is just the individual model expectation. Note we exclude models // with low weight because typically the means are similar between @@ -603,41 +493,33 @@ TDouble10Vec CMultivariateOneOfNPrior::marginalLikelihoodMean() const TDouble10Vec result(m_Dimension, 0.0); double w = 0.0; - for (const auto &model : m_Models) - { + for (const auto& model : m_Models) { double wi = model.first; - if (wi > MINIMUM_SIGNIFICANT_WEIGHT) - { + if (wi > MINIMUM_SIGNIFICANT_WEIGHT) { updateMean(model.second->marginalLikelihoodMean(), wi, result, w); } } return result; } -TDouble10Vec CMultivariateOneOfNPrior::nearestMarginalLikelihoodMean(const TDouble10Vec &value) const -{ +TDouble10Vec CMultivariateOneOfNPrior::nearestMarginalLikelihoodMean(const TDouble10Vec& value) const { // See marginalLikelihoodMean for discussion. TDouble10Vec result(m_Dimension, 0.0); double w = 0.0; - for (const auto &model : m_Models) - { + for (const auto& model : m_Models) { double wi = model.first; - if (wi > MINIMUM_SIGNIFICANT_WEIGHT) - { + if (wi > MINIMUM_SIGNIFICANT_WEIGHT) { updateMean(model.second->nearestMarginalLikelihoodMean(value), wi, result, w); } } return result; } -TDouble10Vec10Vec CMultivariateOneOfNPrior::marginalLikelihoodCovariance() const -{ +TDouble10Vec10Vec CMultivariateOneOfNPrior::marginalLikelihoodCovariance() const { TDouble10Vec10Vec result(m_Dimension, TDouble10Vec(m_Dimension, 0.0)); - if (this->isNonInformative()) - { - for (std::size_t i = 0u; i < m_Dimension; ++i) - { + if (this->isNonInformative()) { + for (std::size_t i = 0u; i < m_Dimension; ++i) { result[i][i] = INF; } return result; @@ -650,40 +532,32 @@ TDouble10Vec10Vec CMultivariateOneOfNPrior::marginalLikelihoodCovariance() const // the model if there is strong evidence against it. double w = 0.0; - for (const auto &model : m_Models) - { + for (const auto& model : m_Models) { double wi = model.first; - if (wi > MINIMUM_SIGNIFICANT_WEIGHT) - { + if (wi > MINIMUM_SIGNIFICANT_WEIGHT) { updateMean(model.second->marginalLikelihoodCovariance(), wi, result, w); } } return result; } -TDouble10Vec CMultivariateOneOfNPrior::marginalLikelihoodVariances() const -{ - if (this->isNonInformative()) - { +TDouble10Vec CMultivariateOneOfNPrior::marginalLikelihoodVariances() const { + if (this->isNonInformative()) { return TDouble10Vec(m_Dimension, INF); } TDouble10Vec result(m_Dimension, 0.0); double w = 0.0; - for (const auto &model : m_Models) - { + for (const auto& model : m_Models) { double wi = model.first; - if (wi > MINIMUM_SIGNIFICANT_WEIGHT) - { + if (wi > MINIMUM_SIGNIFICANT_WEIGHT) { updateMean(model.second->marginalLikelihoodVariances(), wi, result, w); } } return result; } -TDouble10Vec CMultivariateOneOfNPrior::marginalLikelihoodMode(const TWeightStyleVec &weightStyles, - const TDouble10Vec4Vec &weights) const -{ +TDouble10Vec CMultivariateOneOfNPrior::marginalLikelihoodMode(const TWeightStyleVec& weightStyles, const TDouble10Vec4Vec& weights) const { // We approximate this as the weighted average of the component // model modes. @@ -694,10 +568,8 @@ TDouble10Vec CMultivariateOneOfNPrior::marginalLikelihoodMode(const TWeightStyle TDouble10Vec result(m_Dimension, 0.0); double w = 0.0; - for (const auto &model : m_Models) - { - if (model.second->participatesInModelSelection()) - { + for (const auto& model : m_Models) { + if (model.second->participatesInModelSelection()) { sample[0] = model.second->marginalLikelihoodMode(weightStyles, weights); double logLikelihood; model.second->jointLogMarginalLikelihood(weightStyles, sample, sampleWeights, logLikelihood); @@ -709,21 +581,17 @@ TDouble10Vec CMultivariateOneOfNPrior::marginalLikelihoodMode(const TWeightStyle return CTools::truncate(result, support.first, support.second); } -maths_t::EFloatingPointErrorStatus -CMultivariateOneOfNPrior::jointLogMarginalLikelihood(const TWeightStyleVec &weightStyles, - const TDouble10Vec1Vec &samples, - const TDouble10Vec4Vec1Vec &weights, - double &result) const -{ +maths_t::EFloatingPointErrorStatus CMultivariateOneOfNPrior::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble10Vec1Vec& samples, + const TDouble10Vec4Vec1Vec& weights, + double& result) const { result = 0.0; - if (samples.empty()) - { + if (samples.empty()) { LOG_ERROR("Can't compute likelihood for empty sample set"); return maths_t::E_FpFailed; } - if (!this->check(samples, weights)) - { + if (!this->check(samples, weights)) { return maths_t::E_FpFailed; } @@ -735,19 +603,15 @@ CMultivariateOneOfNPrior::jointLogMarginalLikelihood(const TWeightStyleVec &weig TMaxAccumulator maxLogLikelihood; double Z = 0.0; - for (const auto &model : m_Models) - { - if (model.second->participatesInModelSelection()) - { + for (const auto& model : m_Models) { + if (model.second->participatesInModelSelection()) { double logLikelihood; maths_t::EFloatingPointErrorStatus status = - model.second->jointLogMarginalLikelihood(weightStyles, samples, weights, logLikelihood); - if (status & maths_t::E_FpFailed) - { + model.second->jointLogMarginalLikelihood(weightStyles, samples, weights, logLikelihood); + if (status & maths_t::E_FpFailed) { return status; } - if (!(status & maths_t::E_FpOverflowed)) - { + if (!(status & maths_t::E_FpOverflowed)) { logLikelihood += model.first.logWeight(); logLikelihoods.push_back(logLikelihood); maxLogLikelihood.add(logLikelihood); @@ -756,30 +620,25 @@ CMultivariateOneOfNPrior::jointLogMarginalLikelihood(const TWeightStyleVec &weig } } - if (maxLogLikelihood.count() == 0) - { + if (maxLogLikelihood.count() == 0) { result = MINUS_INF; return maths_t::E_FpOverflowed; } - for (auto logLikelihood : logLikelihoods) - { + for (auto logLikelihood : logLikelihoods) { result += std::exp(logLikelihood - maxLogLikelihood[0]); } result = maxLogLikelihood[0] + CTools::fastLog(result / Z); maths_t::EFloatingPointErrorStatus status = CMathsFuncs::fpStatus(result); - if (status & maths_t::E_FpFailed) - { + if (status & maths_t::E_FpFailed) { LOG_ERROR("Failed to compute log likelihood (" << this->debugWeights() << ")"); LOG_ERROR("samples = " << core::CContainerPrinter::print(samples)); LOG_ERROR("weights = " << core::CContainerPrinter::print(weights)); LOG_ERROR("logLikelihoods = " << core::CContainerPrinter::print(logLikelihoods)); LOG_ERROR("maxLogLikelihood = " << maxLogLikelihood[0]); - } - else if (status & maths_t::E_FpOverflowed) - { + } else if (status & maths_t::E_FpOverflowed) { LOG_ERROR("Log likelihood overflowed for (" << this->debugWeights() << ")"); LOG_TRACE("likelihoods = " << core::CContainerPrinter::print(logLikelihoods)); LOG_TRACE("samples = " << core::CContainerPrinter::print(samples)); @@ -788,200 +647,157 @@ CMultivariateOneOfNPrior::jointLogMarginalLikelihood(const TWeightStyleVec &weig return status; } -void CMultivariateOneOfNPrior::sampleMarginalLikelihood(std::size_t numberSamples, - TDouble10Vec1Vec &samples) const -{ +void CMultivariateOneOfNPrior::sampleMarginalLikelihood(std::size_t numberSamples, TDouble10Vec1Vec& samples) const { samples.clear(); - if (numberSamples == 0 || this->isNonInformative()) - { + if (numberSamples == 0 || this->isNonInformative()) { return; } TDouble3Vec weights; double Z = 0.0; - for (const auto &model : m_Models) - { + for (const auto& model : m_Models) { weights.push_back(model.first); Z += model.first; } - for (auto &weight : weights) - { + for (auto& weight : weights) { weight /= Z; } CSampling::TSizeVec sampling; CSampling::weightedSample(numberSamples, weights, sampling); - LOG_TRACE("weights = " << core::CContainerPrinter::print(weights) - << ", sampling = " << core::CContainerPrinter::print(sampling)); + LOG_TRACE("weights = " << core::CContainerPrinter::print(weights) << ", sampling = " << core::CContainerPrinter::print(sampling)); - if (sampling.size() != m_Models.size()) - { + if (sampling.size() != m_Models.size()) { LOG_ERROR("Failed to sample marginal likelihood"); return; } TDouble10VecDouble10VecPr support = this->marginalLikelihoodSupport(); - for (std::size_t i = 0u; i < m_Dimension; ++i) - { - support.first[i] = CTools::shiftRight(support.first[i]); + for (std::size_t i = 0u; i < m_Dimension; ++i) { + support.first[i] = CTools::shiftRight(support.first[i]); support.second[i] = CTools::shiftLeft(support.second[i]); } samples.reserve(numberSamples); TDouble10Vec1Vec modelSamples; - for (std::size_t i = 0u; i < m_Models.size(); ++i) - { + for (std::size_t i = 0u; i < m_Models.size(); ++i) { modelSamples.clear(); m_Models[i].second->sampleMarginalLikelihood(sampling[i], modelSamples); - for (auto sample : modelSamples) - { + for (auto sample : modelSamples) { samples.push_back(CTools::truncate(sample, support.first, support.second)); } } - LOG_TRACE("samples = "<< core::CContainerPrinter::print(samples)); + LOG_TRACE("samples = " << core::CContainerPrinter::print(samples)); } -bool CMultivariateOneOfNPrior::isNonInformative() const -{ - for (const auto &model : m_Models) - { - if (model.second->isNonInformative()) - { +bool CMultivariateOneOfNPrior::isNonInformative() const { + for (const auto& model : m_Models) { + if (model.second->isNonInformative()) { return true; } } return false; } -void CMultivariateOneOfNPrior::print(const std::string &separator, - std::string &result) const -{ +void CMultivariateOneOfNPrior::print(const std::string& separator, std::string& result) const { result += core_t::LINE_ENDING + separator + " one-of-n"; - if (this->isNonInformative()) - { + if (this->isNonInformative()) { result += " non-informative"; } result += ':'; - result += core_t::LINE_ENDING + separator - + " # samples " + core::CStringUtils::typeToStringPretty(this->numberSamples()); + result += core_t::LINE_ENDING + separator + " # samples " + core::CStringUtils::typeToStringPretty(this->numberSamples()); std::string separator_ = separator + separator; - for (const auto &model : m_Models) - { + for (const auto& model : m_Models) { double weight = model.first; - if (weight >= MINIMUM_SIGNIFICANT_WEIGHT) - { - result += core_t::LINE_ENDING - + separator_ - + " weight " + core::CStringUtils::typeToStringPretty(weight); + if (weight >= MINIMUM_SIGNIFICANT_WEIGHT) { + result += core_t::LINE_ENDING + separator_ + " weight " + core::CStringUtils::typeToStringPretty(weight); model.second->print(separator_, result); } } } -uint64_t CMultivariateOneOfNPrior::checksum(uint64_t seed) const -{ +uint64_t CMultivariateOneOfNPrior::checksum(uint64_t seed) const { seed = this->CMultivariatePrior::checksum(seed); return CChecksum::calculate(seed, m_Models); } -void CMultivariateOneOfNPrior::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CMultivariateOneOfNPrior::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CMultivariateOneOfNPrior"); core::CMemoryDebug::dynamicSize("m_Models", m_Models, mem); } -std::size_t CMultivariateOneOfNPrior::memoryUsage() const -{ +std::size_t CMultivariateOneOfNPrior::memoryUsage() const { return core::CMemory::dynamicSize(m_Models); } -std::size_t CMultivariateOneOfNPrior::staticSize() const -{ +std::size_t CMultivariateOneOfNPrior::staticSize() const { return sizeof(*this); } -std::string CMultivariateOneOfNPrior::persistenceTag() const -{ +std::string CMultivariateOneOfNPrior::persistenceTag() const { return ONE_OF_N_TAG + core::CStringUtils::typeToString(m_Dimension); } -void CMultivariateOneOfNPrior::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ - for (const auto &model : m_Models) - { - inserter.insertLevel(MODEL_TAG, boost::bind(&modelAcceptPersistInserter, - boost::cref(model.first), - boost::cref(*model.second), _1)); +void CMultivariateOneOfNPrior::acceptPersistInserter(core::CStatePersistInserter& inserter) const { + for (const auto& model : m_Models) { + inserter.insertLevel(MODEL_TAG, boost::bind(&modelAcceptPersistInserter, boost::cref(model.first), boost::cref(*model.second), _1)); } inserter.insertValue(DECAY_RATE_TAG, this->decayRate(), core::CIEEE754::E_SinglePrecision); inserter.insertValue(NUMBER_SAMPLES_TAG, this->numberSamples(), core::CIEEE754::E_SinglePrecision); } -CMultivariateOneOfNPrior::TDouble3Vec CMultivariateOneOfNPrior::weights() const -{ +CMultivariateOneOfNPrior::TDouble3Vec CMultivariateOneOfNPrior::weights() const { TDouble3Vec result = this->logWeights(); - for (auto &weight : result) - { + for (auto& weight : result) { weight = std::exp(weight); } return result; } -CMultivariateOneOfNPrior::TDouble3Vec CMultivariateOneOfNPrior::logWeights() const -{ +CMultivariateOneOfNPrior::TDouble3Vec CMultivariateOneOfNPrior::logWeights() const { TDouble3Vec result; double Z = 0.0; - for (const auto &model : m_Models) - { + for (const auto& model : m_Models) { result.push_back(model.first.logWeight()); Z += std::exp(result.back()); } Z = std::log(Z); - for (auto &weight : result) - { + for (auto& weight : result) { weight -= Z; } return result; } -CMultivariateOneOfNPrior::TPriorCPtr3Vec CMultivariateOneOfNPrior::models() const -{ +CMultivariateOneOfNPrior::TPriorCPtr3Vec CMultivariateOneOfNPrior::models() const { TPriorCPtr3Vec result; - for (const auto &model : m_Models) - { + for (const auto& model : m_Models) { result.push_back(model.second.get()); } return result; } -bool CMultivariateOneOfNPrior::badWeights() const -{ - for (const auto &model : m_Models) - { - if (!CMathsFuncs::isFinite(model.first.logWeight())) - { +bool CMultivariateOneOfNPrior::badWeights() const { + for (const auto& model : m_Models) { + if (!CMathsFuncs::isFinite(model.first.logWeight())) { return true; } } return false; } -std::string CMultivariateOneOfNPrior::debugWeights() const -{ - if (m_Models.empty()) - { +std::string CMultivariateOneOfNPrior::debugWeights() const { + if (m_Models.empty()) { return std::string(); } std::ostringstream result; result << std::scientific << std::setprecision(15); - for (const auto &model : m_Models) - { + for (const auto& model : m_Models) { result << " " << model.first.logWeight(); } result << " "; @@ -990,6 +806,5 @@ std::string CMultivariateOneOfNPrior::debugWeights() const const double CMultivariateOneOfNPrior::MAXIMUM_RELATIVE_ERROR = 1e-3; const double CMultivariateOneOfNPrior::LOG_MAXIMUM_RELATIVE_ERROR = std::log(MAXIMUM_RELATIVE_ERROR); - } } diff --git a/lib/maths/CMultivariateOneOfNPriorFactory.cc b/lib/maths/CMultivariateOneOfNPriorFactory.cc index 615a4285c1..f2b6f475ec 100644 --- a/lib/maths/CMultivariateOneOfNPriorFactory.cc +++ b/lib/maths/CMultivariateOneOfNPriorFactory.cc @@ -8,28 +8,22 @@ #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { -CMultivariateOneOfNPriorFactory::TPriorPtr -CMultivariateOneOfNPriorFactory::nonInformative(std::size_t dimension, - maths_t::EDataType dataType, - double decayRate, - const TPriorPtrVec &models) -{ +CMultivariateOneOfNPriorFactory::TPriorPtr CMultivariateOneOfNPriorFactory::nonInformative(std::size_t dimension, + maths_t::EDataType dataType, + double decayRate, + const TPriorPtrVec& models) { return TPriorPtr(new CMultivariateOneOfNPrior(dimension, models, dataType, decayRate)); } bool CMultivariateOneOfNPriorFactory::restore(std::size_t dimension, - const SDistributionRestoreParams ¶ms, - TPriorPtr &ptr, - core::CStateRestoreTraverser &traverser) -{ + const SDistributionRestoreParams& params, + TPriorPtr& ptr, + core::CStateRestoreTraverser& traverser) { ptr.reset(new CMultivariateOneOfNPrior(dimension, params, traverser)); return true; } - } } diff --git a/lib/maths/CMultivariatePrior.cc b/lib/maths/CMultivariatePrior.cc index d619959be7..ab7665a1fa 100644 --- a/lib/maths/CMultivariatePrior.cc +++ b/lib/maths/CMultivariatePrior.cc @@ -9,9 +9,9 @@ #include #include -#include #include #include +#include #include #include @@ -19,142 +19,102 @@ #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { -namespace -{ +namespace { -void setDecayRate(double value, double fallback, double &result) -{ - if (CMathsFuncs::isFinite(value)) - { +void setDecayRate(double value, double fallback, double& result) { + if (CMathsFuncs::isFinite(value)) { result = value; - } - else - { + } else { LOG_ERROR("Invalid decay rate " << value); result = fallback; } } - } -CMultivariatePrior::CMultivariatePrior() : - m_Forecasting(false), - m_DataType(maths_t::E_DiscreteData), - m_DecayRate(0.0), - m_NumberSamples(0) -{} +CMultivariatePrior::CMultivariatePrior() : m_Forecasting(false), m_DataType(maths_t::E_DiscreteData), m_DecayRate(0.0), m_NumberSamples(0) { +} -CMultivariatePrior::CMultivariatePrior(maths_t::EDataType dataType, - double decayRate) : - m_Forecasting(false), - m_DataType(dataType), - m_NumberSamples(0) -{ +CMultivariatePrior::CMultivariatePrior(maths_t::EDataType dataType, double decayRate) + : m_Forecasting(false), m_DataType(dataType), m_NumberSamples(0) { setDecayRate(decayRate, FALLBACK_DECAY_RATE, m_DecayRate); } -void CMultivariatePrior::swap(CMultivariatePrior &other) -{ +void CMultivariatePrior::swap(CMultivariatePrior& other) { std::swap(m_Forecasting, other.m_Forecasting); std::swap(m_DataType, other.m_DataType); std::swap(m_DecayRate, other.m_DecayRate); std::swap(m_NumberSamples, other.m_NumberSamples); } -void CMultivariatePrior::forForecasting() -{ +void CMultivariatePrior::forForecasting() { m_Forecasting = true; } -bool CMultivariatePrior::isForForecasting() const -{ +bool CMultivariatePrior::isForForecasting() const { return m_Forecasting; } -bool CMultivariatePrior::isDiscrete() const -{ - return m_DataType == maths_t::E_DiscreteData - || m_DataType == maths_t::E_IntegerData; +bool CMultivariatePrior::isDiscrete() const { + return m_DataType == maths_t::E_DiscreteData || m_DataType == maths_t::E_IntegerData; } -bool CMultivariatePrior::isInteger() const -{ +bool CMultivariatePrior::isInteger() const { return m_DataType == maths_t::E_IntegerData; } -maths_t::EDataType CMultivariatePrior::dataType() const -{ +maths_t::EDataType CMultivariatePrior::dataType() const { return m_DataType; } -double CMultivariatePrior::decayRate() const -{ +double CMultivariatePrior::decayRate() const { return m_DecayRate; } -void CMultivariatePrior::dataType(maths_t::EDataType value) -{ +void CMultivariatePrior::dataType(maths_t::EDataType value) { m_DataType = value; } -void CMultivariatePrior::decayRate(double value) -{ +void CMultivariatePrior::decayRate(double value) { setDecayRate(value, FALLBACK_DECAY_RATE, m_DecayRate); } -void CMultivariatePrior::addSamples(const TWeightStyleVec &weightStyles, - const TDouble10Vec1Vec &/*samples*/, - const TDouble10Vec4Vec1Vec &weights) -{ +void CMultivariatePrior::addSamples(const TWeightStyleVec& weightStyles, + const TDouble10Vec1Vec& /*samples*/, + const TDouble10Vec4Vec1Vec& weights) { std::size_t d = this->dimension(); TDouble10Vec n(d, 0.0); - try - { - for (std::size_t i = 0u; i < weights.size(); ++i) - { + try { + for (std::size_t i = 0u; i < weights.size(); ++i) { TDouble10Vec wi = maths_t::countForUpdate(d, weightStyles, weights[i]); - for (std::size_t j = 0u; j < d; ++j) - { + for (std::size_t j = 0u; j < d; ++j) { n[j] += wi[j]; } } - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to extract sample counts: " << e.what()); - } + } catch (const std::exception& e) { LOG_ERROR("Failed to extract sample counts: " << e.what()); } this->addSamples(smallest(n)); } -CMultivariatePrior::TDouble10Vec -CMultivariatePrior::nearestMarginalLikelihoodMean(const TDouble10Vec &/*value*/) const -{ +CMultivariatePrior::TDouble10Vec CMultivariatePrior::nearestMarginalLikelihoodMean(const TDouble10Vec& /*value*/) const { return this->marginalLikelihoodMean(); } -CMultivariatePrior::TDouble10Vec1Vec -CMultivariatePrior::marginalLikelihoodModes(const TWeightStyleVec &weightStyles, - const TDouble10Vec4Vec &weights) const -{ +CMultivariatePrior::TDouble10Vec1Vec CMultivariatePrior::marginalLikelihoodModes(const TWeightStyleVec& weightStyles, + const TDouble10Vec4Vec& weights) const { return TDouble10Vec1Vec{this->marginalLikelihoodMode(weightStyles, weights)}; } bool CMultivariatePrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec &weightStyles, - const TDouble10Vec1Vec &samples, - const TDouble10Vec4Vec1Vec &weights, - const TSize10Vec &coordinates, - TDouble10Vec2Vec &lowerBounds, - TDouble10Vec2Vec &upperBounds, - TTail10Vec &tail) const -{ - if (coordinates.empty()) - { + const TWeightStyleVec& weightStyles, + const TDouble10Vec1Vec& samples, + const TDouble10Vec4Vec1Vec& weights, + const TSize10Vec& coordinates, + TDouble10Vec2Vec& lowerBounds, + TDouble10Vec2Vec& upperBounds, + TTail10Vec& tail) const { + if (coordinates.empty()) { lowerBounds.clear(); upperBounds.clear(); tail.clear(); @@ -165,13 +125,11 @@ bool CMultivariatePrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCal upperBounds.assign(2, TDouble10Vec(coordinates.size(), 1.0)); tail.assign(coordinates.size(), maths_t::E_UndeterminedTail); - if (samples.empty()) - { + if (samples.empty()) { LOG_ERROR("Can't compute distribution for empty sample set"); return false; } - if (!this->check(samples, weights)) - { + if (!this->check(samples, weights)) { return false; } @@ -183,16 +141,10 @@ bool CMultivariatePrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCal static const TSize10Vec NO_MARGINS; static const TSizeDoublePr10Vec NO_CONDITIONS; - TJointProbabilityOfLessLikelySamplesVec lowerBounds_[2] = - { - TJointProbabilityOfLessLikelySamplesVec(coordinates.size()), - TJointProbabilityOfLessLikelySamplesVec(coordinates.size()) - }; - TJointProbabilityOfLessLikelySamplesVec upperBounds_[2] = - { - TJointProbabilityOfLessLikelySamplesVec(coordinates.size()), - TJointProbabilityOfLessLikelySamplesVec(coordinates.size()) - }; + TJointProbabilityOfLessLikelySamplesVec lowerBounds_[2] = {TJointProbabilityOfLessLikelySamplesVec(coordinates.size()), + TJointProbabilityOfLessLikelySamplesVec(coordinates.size())}; + TJointProbabilityOfLessLikelySamplesVec upperBounds_[2] = {TJointProbabilityOfLessLikelySamplesVec(coordinates.size()), + TJointProbabilityOfLessLikelySamplesVec(coordinates.size())}; std::size_t d = this->dimension(); TSize10Vec marginalize(d - 1); @@ -200,8 +152,7 @@ bool CMultivariatePrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCal TDouble1Vec sc(1); TDouble4Vec1Vec wc{TDouble4Vec(weightStyles.size())}; - for (std::size_t i = 0; i < coordinates.size(); ++i) - { + for (std::size_t i = 0; i < coordinates.size(); ++i) { std::size_t coordinate = coordinates[i]; std::copy_if(boost::make_counting_iterator(std::size_t(0)), @@ -209,44 +160,33 @@ bool CMultivariatePrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCal marginalize.begin(), [coordinate](std::size_t j) { return j != coordinate; }); TUnivariatePriorPtr margin(this->univariate(marginalize, NO_CONDITIONS).first); - if (!margin) - { + if (!margin) { return false; } - for (std::size_t j = 0u; j < samples.size(); ++j) - { - for (std::size_t k = 0u, l = 0u; k < d; ++k) - { - if (k != coordinate) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { + for (std::size_t k = 0u, l = 0u; k < d; ++k) { + if (k != coordinate) { condition[l++] = std::make_pair(k, samples[j][k]); } } sc[0] = samples[j][coordinate]; - for (std::size_t k = 0u; k < weights[j].size(); ++k) - { + for (std::size_t k = 0u; k < weights[j].size(); ++k) { wc[0][k] = weights[j][k][coordinate]; } double lb[2], ub[2]; maths_t::ETail tc[2]; - if (!margin->probabilityOfLessLikelySamples(calculation, - weightStyles, sc, wc, - lb[0], ub[0], tc[0])) - { + if (!margin->probabilityOfLessLikelySamples(calculation, weightStyles, sc, wc, lb[0], ub[0], tc[0])) { LOG_ERROR("Failed to compute probability for coordinate " << coordinate); return false; } LOG_TRACE("lb(" << coordinate << ") = " << lb[0] << ", ub(" << coordinate << ") = " << ub[0]); TUnivariatePriorPtr conditional(this->univariate(NO_MARGINS, condition).first); - if (!conditional->probabilityOfLessLikelySamples(calculation, - weightStyles, sc, wc, - lb[1], ub[1], tc[1])) - { + if (!conditional->probabilityOfLessLikelySamples(calculation, weightStyles, sc, wc, lb[1], ub[1], tc[1])) { LOG_ERROR("Failed to compute probability for coordinate " << coordinate); return false; } @@ -260,13 +200,9 @@ bool CMultivariatePrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCal } } - for (std::size_t i = 0; i < coordinates.size(); ++i) - { - if ( !lowerBounds_[0][i].calculate(lowerBounds[0][i]) - || !upperBounds_[0][i].calculate(upperBounds[0][i]) - || !lowerBounds_[1][i].calculate(lowerBounds[1][i]) - || !upperBounds_[1][i].calculate(upperBounds[1][i])) - { + for (std::size_t i = 0; i < coordinates.size(); ++i) { + if (!lowerBounds_[0][i].calculate(lowerBounds[0][i]) || !upperBounds_[0][i].calculate(upperBounds[0][i]) || + !lowerBounds_[1][i].calculate(lowerBounds[1][i]) || !upperBounds_[1][i].calculate(upperBounds[1][i])) { LOG_ERROR("Failed to compute probability for coordinate " << coordinates[i]); return false; } @@ -276,18 +212,16 @@ bool CMultivariatePrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCal } bool CMultivariatePrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec &weightStyles, - const TDouble10Vec1Vec &samples, - const TDouble10Vec4Vec1Vec &weights, - double &lowerBound, - double &upperBound, - TTail10Vec &tail) const -{ + const TWeightStyleVec& weightStyles, + const TDouble10Vec1Vec& samples, + const TDouble10Vec4Vec1Vec& weights, + double& lowerBound, + double& upperBound, + TTail10Vec& tail) const { lowerBound = upperBound = 1.0; tail.assign(this->dimension(), maths_t::E_UndeterminedTail); - if (this->isNonInformative()) - { + if (this->isNonInformative()) { return true; } @@ -301,19 +235,14 @@ bool CMultivariatePrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCal TDouble10Vec4Vec1Vec weight(1); TDouble10Vec2Vec lbs; TDouble10Vec2Vec ubs; - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { sample[0] = samples[i]; weight[0] = weights[i]; - if (!this->probabilityOfLessLikelySamples(calculation, weightStyles, - sample, weight, coordinates, - lbs, ubs, tail)) - { + if (!this->probabilityOfLessLikelySamples(calculation, weightStyles, sample, weight, coordinates, lbs, ubs, tail)) { return false; } - for (std::size_t j = 0u; j < this->dimension(); ++j) - { + for (std::size_t j = 0u; j < this->dimension(); ++j) { lowerBound_[0].add(lbs[0][j]); upperBound_[0].add(ubs[0][j]); lowerBound_[1].add(lbs[1][j]); @@ -322,23 +251,18 @@ bool CMultivariatePrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCal } double lb[2], ub[2]; - if ( !lowerBound_[0].calculate(lb[0]) - || !upperBound_[0].calculate(ub[0]) - || !lowerBound_[1].calculate(lb[1]) - || !upperBound_[1].calculate(ub[1])) - { + if (!lowerBound_[0].calculate(lb[0]) || !upperBound_[0].calculate(ub[0]) || !lowerBound_[1].calculate(lb[1]) || + !upperBound_[1].calculate(ub[1])) { return false; } - LOG_TRACE("lb = " << core::CContainerPrinter::print(lb) - << ", ub = " << core::CContainerPrinter::print(ub)); + LOG_TRACE("lb = " << core::CContainerPrinter::print(lb) << ", ub = " << core::CContainerPrinter::print(ub)); lowerBound = std::sqrt(lb[0] * lb[1]); upperBound = std::sqrt(ub[0] * ub[1]); return true; } -std::string CMultivariatePrior::printMarginalLikelihoodFunction(std::size_t x, std::size_t y) const -{ +std::string CMultivariatePrior::printMarginalLikelihoodFunction(std::size_t x, std::size_t y) const { // We'll plot the marginal likelihood function over a range where // most of the mass is, i.e. the 99% confidence interval. @@ -355,28 +279,21 @@ std::string CMultivariatePrior::printMarginalLikelihoodFunction(std::size_t x, s xm.reserve(d - 1); ym.reserve(d - 1); xym.reserve(d - 2); - for (std::size_t i = 0u; i < d; ++i) - { - if (i != x && i != y) - { + for (std::size_t i = 0u; i < d; ++i) { + if (i != x && i != y) { xm.push_back(i); ym.push_back(i); xym.push_back(i); - } - else if (i != x) - { + } else if (i != x) { xm.push_back(i); - } - else if (i != y) - { + } else if (i != y) { ym.push_back(i); } } boost::shared_ptr xMargin(this->univariate(xm, TSizeDoublePr10Vec()).first); - if (x == y) - { + if (x == y) { return xMargin != 0 ? xMargin->printMarginalLikelihoodFunction() : std::string(); } @@ -397,8 +314,7 @@ std::string CMultivariatePrior::printMarginalLikelihoodFunction(std::size_t x, s yabscissa << "y = ["; double x_ = xRange.first; double y_ = yRange.first; - for (std::size_t i = 0u; i < POINTS; ++i, x_ += dx, y_ += dy) - { + for (std::size_t i = 0u; i < POINTS; ++i, x_ += dx, y_ += dy) { xabscissa << x_ << " "; yabscissa << y_ << " "; } @@ -409,11 +325,9 @@ std::string CMultivariatePrior::printMarginalLikelihoodFunction(std::size_t x, s TDouble10Vec1Vec sample(1, TDouble10Vec(2)); TDouble10Vec4Vec1Vec weight(1, TDouble10Vec4Vec(1, TDouble10Vec(2, 1.0))); x_ = xRange.first; - for (std::size_t i = 0u; i < POINTS; ++i, x_ += dx) - { + for (std::size_t i = 0u; i < POINTS; ++i, x_ += dx) { y_ = yRange.first; - for (std::size_t j = 0u; j < POINTS; ++j, y_ += dy) - { + for (std::size_t j = 0u; j < POINTS; ++j, y_ += dy) { sample[0][0] = x_; sample[0][1] = y_; double l; @@ -425,79 +339,61 @@ std::string CMultivariatePrior::printMarginalLikelihoodFunction(std::size_t x, s likelihood << "];" << core_t::LINE_ENDING << "contour(x, y, likelihood', 20);"; return xabscissa.str() + yabscissa.str() + likelihood.str(); - } -uint64_t CMultivariatePrior::checksum(uint64_t seed) const -{ +uint64_t CMultivariatePrior::checksum(uint64_t seed) const { seed = CChecksum::calculate(seed, m_Forecasting); seed = CChecksum::calculate(seed, m_DataType); seed = CChecksum::calculate(seed, m_DecayRate); return CChecksum::calculate(seed, m_NumberSamples); } -std::string CMultivariatePrior::print() const -{ +std::string CMultivariatePrior::print() const { std::string result; this->print("--", result); return result; } -double CMultivariatePrior::offsetMargin() const -{ +double CMultivariatePrior::offsetMargin() const { return 0.2; } -double CMultivariatePrior::numberSamples() const -{ +double CMultivariatePrior::numberSamples() const { return m_NumberSamples; } -void CMultivariatePrior::numberSamples(double numberSamples) -{ +void CMultivariatePrior::numberSamples(double numberSamples) { m_NumberSamples = numberSamples; } -bool CMultivariatePrior::participatesInModelSelection() const -{ +bool CMultivariatePrior::participatesInModelSelection() const { return true; } -double CMultivariatePrior::unmarginalizedParameters() const -{ +double CMultivariatePrior::unmarginalizedParameters() const { return 0.0; } -double CMultivariatePrior::scaledDecayRate() const -{ +double CMultivariatePrior::scaledDecayRate() const { return std::pow(0.5, static_cast(this->dimension())) * this->decayRate(); } -void CMultivariatePrior::addSamples(double n) -{ +void CMultivariatePrior::addSamples(double n) { m_NumberSamples += n; } -bool CMultivariatePrior::check(const TDouble10Vec1Vec &samples, - const TDouble10Vec4Vec1Vec &weights) const -{ - if (samples.size() != weights.size()) - { - LOG_ERROR("Mismatch in samples '" << samples - << "' and weights '" << weights << "'"); +bool CMultivariatePrior::check(const TDouble10Vec1Vec& samples, const TDouble10Vec4Vec1Vec& weights) const { + if (samples.size() != weights.size()) { + LOG_ERROR("Mismatch in samples '" << samples << "' and weights '" << weights << "'"); return false; } - for (std::size_t i = 0u; i < samples.size(); ++i) - { - if (samples[i].size() != this->dimension()) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { + if (samples[i].size() != this->dimension()) { LOG_ERROR("Invalid sample '" << samples[i] << "'"); return false; } - for (const auto &weight : weights[i]) - { - if (weight.size() != this->dimension()) - { + for (const auto& weight : weights[i]) { + if (weight.size() != this->dimension()) { LOG_ERROR("Invalid weight '" << weight << "'"); return false; } @@ -506,17 +402,14 @@ bool CMultivariatePrior::check(const TDouble10Vec1Vec &samples, return true; } -bool CMultivariatePrior::check(const TSize10Vec &marginalize, - const TSizeDoublePr10Vec &condition) const -{ - static const auto FIRST = [](const TSizeDoublePr &pair) { return pair.first; }; +bool CMultivariatePrior::check(const TSize10Vec& marginalize, const TSizeDoublePr10Vec& condition) const { + static const auto FIRST = [](const TSizeDoublePr& pair) { return pair.first; }; std::size_t d = this->dimension(); - if ( (marginalize.size() > 0 && marginalize.back() >= d) - || (condition.size() > 0 && condition.back().first >= d) - || CSetTools::setIntersectSize(marginalize.begin(), marginalize.end(), - boost::make_transform_iterator(condition.begin(), FIRST), - boost::make_transform_iterator(condition.end(), FIRST)) != 0) - { + if ((marginalize.size() > 0 && marginalize.back() >= d) || (condition.size() > 0 && condition.back().first >= d) || + CSetTools::setIntersectSize(marginalize.begin(), + marginalize.end(), + boost::make_transform_iterator(condition.begin(), FIRST), + boost::make_transform_iterator(condition.end(), FIRST)) != 0) { LOG_ERROR("Invalid variables for computing univariate distribution: " << "marginalize '" << marginalize << "'" << ", condition '" << condition << "'"); @@ -525,21 +418,15 @@ bool CMultivariatePrior::check(const TSize10Vec &marginalize, return true; } -void CMultivariatePrior::remainingVariables(const TSize10Vec &marginalize, - const TSizeDoublePr10Vec &condition, - TSize10Vec &result) const -{ +void CMultivariatePrior::remainingVariables(const TSize10Vec& marginalize, const TSizeDoublePr10Vec& condition, TSize10Vec& result) const { std::size_t d = this->dimension(); result.reserve(d - marginalize.size() - condition.size()); - for (std::size_t i = 0u, j = 0u, k = 0u; k < d; ++k) - { - if (i < marginalize.size() && k == marginalize[i]) - { + for (std::size_t i = 0u, j = 0u, k = 0u; k < d; ++k) { + if (i < marginalize.size() && k == marginalize[i]) { ++i; continue; } - if (j < condition.size() && k == condition[j].first) - { + if (j < condition.size() && k == condition[j].first) { ++j; continue; } @@ -547,8 +434,7 @@ void CMultivariatePrior::remainingVariables(const TSize10Vec &marginalize, } } -double CMultivariatePrior::smallest(const TDouble10Vec &x) const -{ +double CMultivariatePrior::smallest(const TDouble10Vec& x) const { return *std::min_element(x.begin(), x.end()); } @@ -557,6 +443,5 @@ const std::string CMultivariatePrior::MULTIMODAL_TAG("a"); const std::string CMultivariatePrior::NORMAL_TAG("b"); const std::string CMultivariatePrior::ONE_OF_N_TAG("c"); const std::string CMultivariatePrior::CONSTANT_TAG("d"); - } } diff --git a/lib/maths/CNaiveBayes.cc b/lib/maths/CNaiveBayes.cc index 2ae52c8442..cd71fbb846 100644 --- a/lib/maths/CNaiveBayes.cc +++ b/lib/maths/CNaiveBayes.cc @@ -7,8 +7,8 @@ #include #include -#include #include +#include #include #include #include @@ -25,12 +25,9 @@ #include #include -namespace ml -{ -namespace maths -{ -namespace -{ +namespace ml { +namespace maths { +namespace { const std::string PRIOR_TAG{"a"}; const std::string CLASS_LABEL_TAG{"b"}; const std::string CLASS_MODEL_TAG{"c"}; @@ -39,278 +36,210 @@ const std::string COUNT_TAG{"e"}; const std::string CONDITIONAL_DENSITY_FROM_PRIOR_TAG{"f"}; } -CNaiveBayesFeatureDensityFromPrior::CNaiveBayesFeatureDensityFromPrior(const CPrior &prior) : - m_Prior(prior.clone()) -{} +CNaiveBayesFeatureDensityFromPrior::CNaiveBayesFeatureDensityFromPrior(const CPrior& prior) : m_Prior(prior.clone()) { +} -void CNaiveBayesFeatureDensityFromPrior::add(const TDouble1Vec &x) -{ +void CNaiveBayesFeatureDensityFromPrior::add(const TDouble1Vec& x) { m_Prior->addSamples(CConstantWeights::COUNT, x, CConstantWeights::SINGLE_UNIT); } -CNaiveBayesFeatureDensityFromPrior *CNaiveBayesFeatureDensityFromPrior::clone() const -{ +CNaiveBayesFeatureDensityFromPrior* CNaiveBayesFeatureDensityFromPrior::clone() const { return new CNaiveBayesFeatureDensityFromPrior(*m_Prior); } -bool CNaiveBayesFeatureDensityFromPrior::acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name{traverser.name()}; - RESTORE(PRIOR_TAG, traverser.traverseSubLevel(boost::bind( - CPriorStateSerialiser(), - boost::cref(params), boost::ref(m_Prior), _1))); - } - while (traverser.next()); +bool CNaiveBayesFeatureDensityFromPrior::acceptRestoreTraverser(const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser) { + do { + const std::string& name{traverser.name()}; + RESTORE(PRIOR_TAG, + traverser.traverseSubLevel(boost::bind(CPriorStateSerialiser(), boost::cref(params), boost::ref(m_Prior), _1))); + } while (traverser.next()); return true; } -void CNaiveBayesFeatureDensityFromPrior::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ - inserter.insertLevel(PRIOR_TAG, boost::bind(CPriorStateSerialiser(), - boost::cref(*m_Prior), _1)); +void CNaiveBayesFeatureDensityFromPrior::acceptPersistInserter(core::CStatePersistInserter& inserter) const { + inserter.insertLevel(PRIOR_TAG, boost::bind(CPriorStateSerialiser(), boost::cref(*m_Prior), _1)); } -double CNaiveBayesFeatureDensityFromPrior::logValue(const TDouble1Vec &x) const -{ +double CNaiveBayesFeatureDensityFromPrior::logValue(const TDouble1Vec& x) const { double result; - if (m_Prior->jointLogMarginalLikelihood(CConstantWeights::COUNT, x, - CConstantWeights::SINGLE_UNIT, - result) != maths_t::E_FpNoErrors) - { + if (m_Prior->jointLogMarginalLikelihood(CConstantWeights::COUNT, x, CConstantWeights::SINGLE_UNIT, result) != maths_t::E_FpNoErrors) { LOG_ERROR("Bad density value at " << x << " for " << m_Prior->print()); return boost::numeric::bounds::lowest(); } return result; } -double CNaiveBayesFeatureDensityFromPrior::logMaximumValue() const -{ +double CNaiveBayesFeatureDensityFromPrior::logMaximumValue() const { double result; - if (m_Prior->jointLogMarginalLikelihood(CConstantWeights::COUNT, - {m_Prior->marginalLikelihoodMode()}, - CConstantWeights::SINGLE_UNIT, - result) != maths_t::E_FpNoErrors) - { + if (m_Prior->jointLogMarginalLikelihood( + CConstantWeights::COUNT, {m_Prior->marginalLikelihoodMode()}, CConstantWeights::SINGLE_UNIT, result) != maths_t::E_FpNoErrors) { LOG_ERROR("Bad density value for " << m_Prior->print()); return boost::numeric::bounds::lowest(); } return result; } -void CNaiveBayesFeatureDensityFromPrior::dataType(maths_t::EDataType dataType) -{ +void CNaiveBayesFeatureDensityFromPrior::dataType(maths_t::EDataType dataType) { m_Prior->dataType(dataType); } -void CNaiveBayesFeatureDensityFromPrior::propagateForwardsByTime(double time) -{ - m_Prior->propagateForwardsByTime(time); +void CNaiveBayesFeatureDensityFromPrior::propagateForwardsByTime(double time) { + m_Prior->propagateForwardsByTime(time); } -void CNaiveBayesFeatureDensityFromPrior::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CNaiveBayesFeatureDensityFromPrior::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { return core::CMemoryDebug::dynamicSize("m_Prior", m_Prior, mem); } -std::size_t CNaiveBayesFeatureDensityFromPrior::staticSize() const -{ +std::size_t CNaiveBayesFeatureDensityFromPrior::staticSize() const { return sizeof(*this); } -std::size_t CNaiveBayesFeatureDensityFromPrior::memoryUsage() const -{ +std::size_t CNaiveBayesFeatureDensityFromPrior::memoryUsage() const { return core::CMemory::dynamicSize(m_Prior); } -uint64_t CNaiveBayesFeatureDensityFromPrior::checksum(uint64_t seed) const -{ +uint64_t CNaiveBayesFeatureDensityFromPrior::checksum(uint64_t seed) const { return CChecksum::calculate(seed, m_Prior); } -std::string CNaiveBayesFeatureDensityFromPrior::print() const -{ +std::string CNaiveBayesFeatureDensityFromPrior::print() const { std::string result; m_Prior->print(" ", result); return result; } -CNaiveBayes::CNaiveBayes(const CNaiveBayesFeatureDensity &exemplar, - double decayRate, - TOptionalDouble minMaxLogLikelihoodToUseFeature) : - m_MinMaxLogLikelihoodToUseFeature{minMaxLogLikelihoodToUseFeature}, - m_DecayRate{decayRate}, - m_Exemplar{exemplar.clone()}, - m_ClassConditionalDensities{2} -{} +CNaiveBayes::CNaiveBayes(const CNaiveBayesFeatureDensity& exemplar, double decayRate, TOptionalDouble minMaxLogLikelihoodToUseFeature) + : m_MinMaxLogLikelihoodToUseFeature{minMaxLogLikelihoodToUseFeature}, + m_DecayRate{decayRate}, + m_Exemplar{exemplar.clone()}, + m_ClassConditionalDensities{2} { +} -CNaiveBayes::CNaiveBayes(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser) : - m_DecayRate{params.s_DecayRate}, - m_ClassConditionalDensities{2} -{ - traverser.traverseSubLevel(boost::bind(&CNaiveBayes::acceptRestoreTraverser, - this, boost::cref(params), _1)); +CNaiveBayes::CNaiveBayes(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) + : m_DecayRate{params.s_DecayRate}, m_ClassConditionalDensities{2} { + traverser.traverseSubLevel(boost::bind(&CNaiveBayes::acceptRestoreTraverser, this, boost::cref(params), _1)); } -bool CNaiveBayes::acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser) -{ +bool CNaiveBayes::acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) { std::size_t label; - do - { - const std::string &name{traverser.name()}; + do { + const std::string& name{traverser.name()}; RESTORE_BUILT_IN(CLASS_LABEL_TAG, label) - RESTORE_SETUP_TEARDOWN(CLASS_MODEL_TAG, - SClass class_, - traverser.traverseSubLevel(boost::bind( - &SClass::acceptRestoreTraverser, - boost::ref(class_), boost::cref(params), _1)), - m_ClassConditionalDensities.emplace(label, class_)) + RESTORE_SETUP_TEARDOWN( + CLASS_MODEL_TAG, + SClass class_, + traverser.traverseSubLevel(boost::bind(&SClass::acceptRestoreTraverser, boost::ref(class_), boost::cref(params), _1)), + m_ClassConditionalDensities.emplace(label, class_)) RESTORE_SETUP_TEARDOWN(MIN_MAX_LOG_LIKELIHOOD_TO_USE_FEATURE_TAG, double value, core::CStringUtils::stringToType(traverser.value(), value), m_MinMaxLogLikelihoodToUseFeature.reset(value)) - } - while (traverser.next()); + } while (traverser.next()); return true; } -void CNaiveBayes::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CNaiveBayes::acceptPersistInserter(core::CStatePersistInserter& inserter) const { using TSizeClassUMapCItr = TSizeClassUMap::const_iterator; using TSizeClassUMapCItrVec = std::vector; TSizeClassUMapCItrVec classes; classes.reserve(m_ClassConditionalDensities.size()); - for (auto i = m_ClassConditionalDensities.begin(); i != m_ClassConditionalDensities.end(); ++i) - { + for (auto i = m_ClassConditionalDensities.begin(); i != m_ClassConditionalDensities.end(); ++i) { classes.push_back(i); } - std::sort(classes.begin(), classes.end(), - core::CFunctional::SDereference()); - for (const auto &class_ : classes) - { + std::sort(classes.begin(), classes.end(), core::CFunctional::SDereference()); + for (const auto& class_ : classes) { inserter.insertValue(CLASS_LABEL_TAG, class_->first); - inserter.insertLevel(CLASS_MODEL_TAG, boost::bind(&SClass::acceptPersistInserter, - boost::ref(class_->second), _1)); + inserter.insertLevel(CLASS_MODEL_TAG, boost::bind(&SClass::acceptPersistInserter, boost::ref(class_->second), _1)); } - if (m_MinMaxLogLikelihoodToUseFeature) - { - inserter.insertValue(MIN_MAX_LOG_LIKELIHOOD_TO_USE_FEATURE_TAG, - *m_MinMaxLogLikelihoodToUseFeature, - core::CIEEE754::E_SinglePrecision); + if (m_MinMaxLogLikelihoodToUseFeature) { + inserter.insertValue( + MIN_MAX_LOG_LIKELIHOOD_TO_USE_FEATURE_TAG, *m_MinMaxLogLikelihoodToUseFeature, core::CIEEE754::E_SinglePrecision); } } -void CNaiveBayes::swap(CNaiveBayes &other) -{ +void CNaiveBayes::swap(CNaiveBayes& other) { std::swap(m_DecayRate, other.m_DecayRate); m_Exemplar.swap(other.m_Exemplar); m_ClassConditionalDensities.swap(other.m_ClassConditionalDensities); std::swap(m_MinMaxLogLikelihoodToUseFeature, other.m_MinMaxLogLikelihoodToUseFeature); } -bool CNaiveBayes::initialized() const -{ +bool CNaiveBayes::initialized() const { return m_ClassConditionalDensities.size() > 0; } -void CNaiveBayes::initialClassCounts(const TDoubleSizePrVec &counts) -{ - for (const auto &count : counts) - { +void CNaiveBayes::initialClassCounts(const TDoubleSizePrVec& counts) { + for (const auto& count : counts) { m_ClassConditionalDensities[count.second] = SClass{count.first, {}}; } } -void CNaiveBayes::addTrainingDataPoint(std::size_t label, - const TDouble1VecVec &x) -{ - if (!this->validate(x)) - { +void CNaiveBayes::addTrainingDataPoint(std::size_t label, const TDouble1VecVec& x) { + if (!this->validate(x)) { return; } - auto &class_ = m_ClassConditionalDensities[label]; + auto& class_ = m_ClassConditionalDensities[label]; - if (class_.s_ConditionalDensities.empty()) - { + if (class_.s_ConditionalDensities.empty()) { class_.s_ConditionalDensities.reserve(x.size()); - std::generate_n(std::back_inserter(class_.s_ConditionalDensities), x.size(), - [this]() { return TFeatureDensityPtr{m_Exemplar->clone()}; }); + std::generate_n( + std::back_inserter(class_.s_ConditionalDensities), x.size(), [this]() { return TFeatureDensityPtr{m_Exemplar->clone()}; }); } bool updateCount{false}; - for (std::size_t i = 0u; i < x.size(); ++i) - { - if (x[i].size() > 0) - { + for (std::size_t i = 0u; i < x.size(); ++i) { + if (x[i].size() > 0) { class_.s_ConditionalDensities[i]->add(x[i]); updateCount = true; } } - if (updateCount) - { + if (updateCount) { class_.s_Count += 1.0; - } - else - { + } else { LOG_TRACE("Ignoring empty feature vector"); } } -void CNaiveBayes::dataType(maths_t::EDataType dataType) -{ - for (auto &class_ : m_ClassConditionalDensities) - { - for (auto &density : class_.second.s_ConditionalDensities) - { +void CNaiveBayes::dataType(maths_t::EDataType dataType) { + for (auto& class_ : m_ClassConditionalDensities) { + for (auto& density : class_.second.s_ConditionalDensities) { density->dataType(dataType); } } } -void CNaiveBayes::propagateForwardsByTime(double time) -{ +void CNaiveBayes::propagateForwardsByTime(double time) { double factor{std::exp(-m_DecayRate * time)}; - for (auto &class_ : m_ClassConditionalDensities) - { + for (auto& class_ : m_ClassConditionalDensities) { class_.second.s_Count *= factor; - for (auto &density : class_.second.s_ConditionalDensities) - { + for (auto& density : class_.second.s_ConditionalDensities) { density->propagateForwardsByTime(time); } } } -CNaiveBayes::TDoubleSizePrVec -CNaiveBayes::highestClassProbabilities(std::size_t n, const TDouble1VecVec &x) const -{ +CNaiveBayes::TDoubleSizePrVec CNaiveBayes::highestClassProbabilities(std::size_t n, const TDouble1VecVec& x) const { TDoubleSizePrVec p(this->classProbabilities(x)); n = std::min(n, p.size()); std::sort(p.begin(), p.begin() + n, std::greater()); return TDoubleSizePrVec{p.begin(), p.begin() + n}; } -double CNaiveBayes::classProbability(std::size_t label, const TDouble1VecVec &x) const -{ +double CNaiveBayes::classProbability(std::size_t label, const TDouble1VecVec& x) const { TDoubleSizePrVec p(this->classProbabilities(x)); - auto i = std::find_if(p.begin(), p.end(), - [label](const TDoubleSizePr &p_) { return p_.second == label; }); + auto i = std::find_if(p.begin(), p.end(), [label](const TDoubleSizePr& p_) { return p_.second == label; }); return i == p.end() ? 0.0 : i->first; } -CNaiveBayes::TDoubleSizePrVec CNaiveBayes::classProbabilities(const TDouble1VecVec &x) const -{ - if (!this->validate(x)) - { +CNaiveBayes::TDoubleSizePrVec CNaiveBayes::classProbabilities(const TDouble1VecVec& x) const { + if (!this->validate(x)) { return {}; } - if (m_ClassConditionalDensities.empty()) - { + if (m_ClassConditionalDensities.empty()) { LOG_ERROR("Trying to compute class probabilities without supplying training data"); return {}; } @@ -320,35 +249,28 @@ CNaiveBayes::TDoubleSizePrVec CNaiveBayes::classProbabilities(const TDouble1VecV TDoubleSizePrVec p; p.reserve(m_ClassConditionalDensities.size()); - for (const auto &class_ : m_ClassConditionalDensities) - { + for (const auto& class_ : m_ClassConditionalDensities) { p.emplace_back(CTools::fastLog(class_.second.s_Count), class_.first); } TDoubleVec logLikelihoods; - for (std::size_t i = 0u; i < x.size(); ++i) - { - if (x[i].size() > 0) - { + for (std::size_t i = 0u; i < x.size(); ++i) { + if (x[i].size() > 0) { TMaxAccumulator maxLogLikelihood; logLikelihoods.clear(); - for (const auto &class_ : m_ClassConditionalDensities) - { - const auto &density = class_.second.s_ConditionalDensities[i]; + for (const auto& class_ : m_ClassConditionalDensities) { + const auto& density = class_.second.s_ConditionalDensities[i]; double logLikelihood{density->logValue(x[i])}; double logMaximumLikelihood{density->logMaximumValue()}; maxLogLikelihood.add(logLikelihood - logMaximumLikelihood); logLikelihoods.push_back(logLikelihood); } double weight{1.0}; - if (m_MinMaxLogLikelihoodToUseFeature) - { + if (m_MinMaxLogLikelihoodToUseFeature) { weight = CTools::logisticFunction( - (maxLogLikelihood[0] - *m_MinMaxLogLikelihoodToUseFeature) - / std::fabs(*m_MinMaxLogLikelihoodToUseFeature), 0.1); + (maxLogLikelihood[0] - *m_MinMaxLogLikelihoodToUseFeature) / std::fabs(*m_MinMaxLogLikelihoodToUseFeature), 0.1); } - for (std::size_t j = 0u; j < logLikelihoods.size(); ++j) - { + for (std::size_t j = 0u; j < logLikelihoods.size(); ++j) { p[j].first += weight * logLikelihoods[j]; } } @@ -356,46 +278,36 @@ CNaiveBayes::TDoubleSizePrVec CNaiveBayes::classProbabilities(const TDouble1VecV double scale{std::max_element(p.begin(), p.end())->first}; double Z{0.0}; - for (auto &pc : p) - { + for (auto& pc : p) { pc.first = std::exp(pc.first - scale); Z += pc.first; } - for (auto &pc : p) - { + for (auto& pc : p) { pc.first /= Z; } return p; } -void CNaiveBayes::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CNaiveBayes::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { core::CMemoryDebug::dynamicSize("m_Exemplar", m_Exemplar, mem); - core::CMemoryDebug::dynamicSize("m_ClassConditionalDensities", - m_ClassConditionalDensities, mem); + core::CMemoryDebug::dynamicSize("m_ClassConditionalDensities", m_ClassConditionalDensities, mem); } -std::size_t CNaiveBayes::memoryUsage() const -{ - return core::CMemory::dynamicSize(m_Exemplar) - + core::CMemory::dynamicSize(m_ClassConditionalDensities); +std::size_t CNaiveBayes::memoryUsage() const { + return core::CMemory::dynamicSize(m_Exemplar) + core::CMemory::dynamicSize(m_ClassConditionalDensities); } -uint64_t CNaiveBayes::checksum(uint64_t seed) const -{ +uint64_t CNaiveBayes::checksum(uint64_t seed) const { return CChecksum::calculate(seed, m_ClassConditionalDensities); } -std::string CNaiveBayes::print() const -{ +std::string CNaiveBayes::print() const { std::ostringstream result; result << "\n"; - for (const auto &class_ : m_ClassConditionalDensities) - { + for (const auto& class_ : m_ClassConditionalDensities) { result << "CLASS(" << class_.first << ")\n"; - for (const auto &density : class_.second.s_ConditionalDensities) - { + for (const auto& density : class_.second.s_ConditionalDensities) { result << "---"; result << density->print() << "\n"; } @@ -403,69 +315,53 @@ std::string CNaiveBayes::print() const return result.str(); } -bool CNaiveBayes::validate(const TDouble1VecVec &x) const -{ +bool CNaiveBayes::validate(const TDouble1VecVec& x) const { auto class_ = m_ClassConditionalDensities.begin(); - if ( class_ != m_ClassConditionalDensities.end() - && class_->second.s_ConditionalDensities.size() > 0 - && class_->second.s_ConditionalDensities.size() != x.size()) - { + if (class_ != m_ClassConditionalDensities.end() && class_->second.s_ConditionalDensities.size() > 0 && + class_->second.s_ConditionalDensities.size() != x.size()) { LOG_ERROR("Unexpected feature vector: " << core::CContainerPrinter::print(x)); return false; } return true; } -bool CNaiveBayes::SClass::acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name{traverser.name()}; +bool CNaiveBayes::SClass::acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) { + do { + const std::string& name{traverser.name()}; RESTORE_BUILT_IN(COUNT_TAG, s_Count) RESTORE_SETUP_TEARDOWN(CONDITIONAL_DENSITY_FROM_PRIOR_TAG, CNaiveBayesFeatureDensityFromPrior tmp, traverser.traverseSubLevel(boost::bind( - &CNaiveBayesFeatureDensityFromPrior::acceptRestoreTraverser, - boost::ref(tmp), boost::cref(params), _1)), + &CNaiveBayesFeatureDensityFromPrior::acceptRestoreTraverser, boost::ref(tmp), boost::cref(params), _1)), s_ConditionalDensities.emplace_back(tmp.clone())) // Add other implementations' restore code here. - } - while (traverser.next()); + } while (traverser.next()); return true; } -void CNaiveBayes::SClass::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CNaiveBayes::SClass::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(COUNT_TAG, s_Count, core::CIEEE754::E_SinglePrecision); - for (const auto &density : s_ConditionalDensities) - { - if (dynamic_cast(density.get())) - { + for (const auto& density : s_ConditionalDensities) { + if (dynamic_cast(density.get())) { inserter.insertLevel(CONDITIONAL_DENSITY_FROM_PRIOR_TAG, - boost::bind(&CNaiveBayesFeatureDensity::acceptPersistInserter, - density.get(), _1)); + boost::bind(&CNaiveBayesFeatureDensity::acceptPersistInserter, density.get(), _1)); continue; } // Add other implementations' persist code here. } } -void CNaiveBayes::SClass::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CNaiveBayes::SClass::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { core::CMemoryDebug::dynamicSize("s_ConditionalDensities", s_ConditionalDensities, mem); } -std::size_t CNaiveBayes::SClass::memoryUsage() const -{ +std::size_t CNaiveBayes::SClass::memoryUsage() const { return core::CMemory::dynamicSize(s_ConditionalDensities); } -uint64_t CNaiveBayes::SClass::checksum(uint64_t seed) const -{ +uint64_t CNaiveBayes::SClass::checksum(uint64_t seed) const { seed = CChecksum::calculate(seed, s_Count); return CChecksum::calculate(seed, s_ConditionalDensities); } - } } diff --git a/lib/maths/CNaturalBreaksClassifier.cc b/lib/maths/CNaturalBreaksClassifier.cc index 54f1bfede7..8fcae9ef50 100644 --- a/lib/maths/CNaturalBreaksClassifier.cc +++ b/lib/maths/CNaturalBreaksClassifier.cc @@ -16,9 +16,9 @@ #include #include #include -#include #include #include +#include #include #include @@ -29,37 +29,27 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { -namespace -{ +namespace { //! Orders two tuples by their mean. -struct SMeanLess -{ - bool operator()(const CNaturalBreaksClassifier::TTuple &lhs, - const CNaturalBreaksClassifier::TTuple &rhs) const - { +struct SMeanLess { + bool operator()(const CNaturalBreaksClassifier::TTuple& lhs, const CNaturalBreaksClassifier::TTuple& rhs) const { return CBasicStatistics::mean(lhs) < CBasicStatistics::mean(rhs); } }; //! Checks if a tuple count is less than a specified value. -class CCountLessThan -{ - public: - CCountLessThan(double count) : m_Count(count) {} - - bool operator()(const CNaturalBreaksClassifier::TTuple &tuple) const - { - return CBasicStatistics::count(tuple) < m_Count; - } +class CCountLessThan { +public: + CCountLessThan(double count) : m_Count(count) {} - private: - double m_Count; + bool operator()(const CNaturalBreaksClassifier::TTuple& tuple) const { return CBasicStatistics::count(tuple) < m_Count; } + +private: + double m_Count; }; const std::string SPACE_TAG("a"); @@ -67,88 +57,69 @@ const std::string CATEGORY_TAG("b"); const std::string POINTS_TAG("c"); const std::string DECAY_RATE_TAG("d"); const std::string EMPTY_STRING; - } -CNaturalBreaksClassifier::CNaturalBreaksClassifier(std::size_t space, - double decayRate, - double minimumCategoryCount) : - m_Space(std::max(space, MINIMUM_SPACE)), - m_DecayRate(decayRate), - m_MinimumCategoryCount(minimumCategoryCount) -{ +CNaturalBreaksClassifier::CNaturalBreaksClassifier(std::size_t space, double decayRate, double minimumCategoryCount) + : m_Space(std::max(space, MINIMUM_SPACE)), m_DecayRate(decayRate), m_MinimumCategoryCount(minimumCategoryCount) { m_Categories.reserve(m_Space + MAXIMUM_BUFFER_SIZE + 1u); m_PointsBuffer.reserve(MAXIMUM_BUFFER_SIZE); } -bool CNaturalBreaksClassifier::acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser) -{ +bool CNaturalBreaksClassifier::acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) { m_DecayRate = params.s_DecayRate; m_MinimumCategoryCount = params.s_MinimumCategoryCount; - do - { - const std::string &name = traverser.name(); + do { + const std::string& name = traverser.name(); RESTORE_BUILT_IN(DECAY_RATE_TAG, m_DecayRate) RESTORE_BUILT_IN(SPACE_TAG, m_Space) RESTORE(CATEGORY_TAG, core::CPersistUtils::restore(CATEGORY_TAG, m_Categories, traverser)) RESTORE(POINTS_TAG, core::CPersistUtils::fromString(traverser.value(), m_PointsBuffer)) - } - while (traverser.next()); + } while (traverser.next()); return true; } -void CNaturalBreaksClassifier::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CNaturalBreaksClassifier::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(DECAY_RATE_TAG, m_DecayRate); inserter.insertValue(SPACE_TAG, m_Space); core::CPersistUtils::persist(CATEGORY_TAG, m_Categories, inserter); inserter.insertValue(POINTS_TAG, core::CPersistUtils::toString(m_PointsBuffer)); } -double CNaturalBreaksClassifier::percentile(double p) const -{ +double CNaturalBreaksClassifier::percentile(double p) const { LOG_TRACE("percentile = " << p); p /= 100.0; double percentileCount = 0.0; - for (std::size_t i = 0u; i < m_Categories.size(); ++i) - { + for (std::size_t i = 0u; i < m_Categories.size(); ++i) { percentileCount += CBasicStatistics::count(m_Categories[i]); } percentileCount *= p; LOG_TRACE("percentileCount = " << percentileCount); - for (std::size_t i = 0u; i < m_Categories.size(); ++i) - { + for (std::size_t i = 0u; i < m_Categories.size(); ++i) { double count = CBasicStatistics::count(m_Categories[i]); - if (percentileCount < count) - { + if (percentileCount < count) { double mean = CBasicStatistics::mean(m_Categories[i]); double deviation = std::sqrt(CBasicStatistics::maximumLikelihoodVariance(m_Categories[i])); - if (deviation == 0.0) - { + if (deviation == 0.0) { return mean; } boost::math::normal_distribution<> normal(mean, deviation); double q = (count - percentileCount) / count; - double x = q > 0.0 && q < 1.0 ? - boost::math::quantile(normal, q) : - (2.0 * q - 1.0) * boost::numeric::bounds::highest(); + double x = q > 0.0 && q < 1.0 ? boost::math::quantile(normal, q) : (2.0 * q - 1.0) * boost::numeric::bounds::highest(); LOG_TRACE("N(" << mean << "," << deviation << ")" - << ", q = " << q << ", x = " << x) + << ", q = " << q << ", x = " << x) - if (i > 0) - { + if (i > 0) { // Left truncate by the assignment boundary between // this and the left category. See deviation for // details. - double n1 = std::sqrt(CBasicStatistics::count(m_Categories[i-1])); - double m1 = CBasicStatistics::mean(m_Categories[i-1]); - double d1 = std::sqrt(CBasicStatistics::maximumLikelihoodVariance(m_Categories[i-1])); + double n1 = std::sqrt(CBasicStatistics::count(m_Categories[i - 1])); + double m1 = CBasicStatistics::mean(m_Categories[i - 1]); + double d1 = std::sqrt(CBasicStatistics::maximumLikelihoodVariance(m_Categories[i - 1])); double n2 = count; double m2 = mean; double d2 = deviation; @@ -158,17 +129,16 @@ double CNaturalBreaksClassifier::percentile(double p) const LOG_TRACE("Left truncate to " << xl); x = std::max(x, xl); } - if (i+1 < m_Categories.size()) - { + if (i + 1 < m_Categories.size()) { // Right truncate by the assignment boundary between // this and the right category. See deviation for // details. double n1 = count; double m1 = mean; double d1 = deviation; - double n2 = std::sqrt(CBasicStatistics::count(m_Categories[i+1])); - double m2 = CBasicStatistics::mean(m_Categories[i+1]); - double d2 = std::sqrt(CBasicStatistics::maximumLikelihoodVariance(m_Categories[i+1])); + double n2 = std::sqrt(CBasicStatistics::count(m_Categories[i + 1])); + double m2 = CBasicStatistics::mean(m_Categories[i + 1]); + double d2 = std::sqrt(CBasicStatistics::maximumLikelihoodVariance(m_Categories[i + 1])); double w1 = std::sqrt(n2 * d2); double w2 = std::sqrt(n1 * d1); double xr = (w1 * m1 + w2 * m2) / (w1 + w2); @@ -184,21 +154,16 @@ double CNaturalBreaksClassifier::percentile(double p) const return (p <= 0.5 ? -1.0 : 1.0) * boost::numeric::bounds::highest(); } -std::size_t CNaturalBreaksClassifier::size() const -{ +std::size_t CNaturalBreaksClassifier::size() const { return std::min(m_Categories.size() + m_PointsBuffer.size(), m_Space); } -bool CNaturalBreaksClassifier::split(std::size_t n, - std::size_t p, - TClassifierVec &result) -{ +bool CNaturalBreaksClassifier::split(std::size_t n, std::size_t p, TClassifierVec& result) { LOG_TRACE("split"); result.clear(); - if (n == 0) - { + if (n == 0) { LOG_ERROR("Bad request for zero categories"); return false; } @@ -206,37 +171,27 @@ bool CNaturalBreaksClassifier::split(std::size_t n, this->reduce(); LOG_TRACE("raw categories = " << this->print()); - if (n >= m_Categories.size()) - { + if (n >= m_Categories.size()) { double p_ = static_cast(p); - for (std::size_t i = 0u; p_ > 0.0 && i < m_Categories.size(); ++i) - { - if (CBasicStatistics::count(m_Categories[i]) < p_) - { + for (std::size_t i = 0u; p_ > 0.0 && i < m_Categories.size(); ++i) { + if (CBasicStatistics::count(m_Categories[i]) < p_) { return false; } } result.reserve(m_Categories.size()); TTupleVec category(1); - for (std::size_t i = 0u; i < m_Categories.size(); ++i) - { + for (std::size_t i = 0u; i < m_Categories.size(); ++i) { category[0] = m_Categories[i]; - result.push_back(CNaturalBreaksClassifier(m_Space, m_DecayRate, - m_MinimumCategoryCount, - category)); + result.push_back(CNaturalBreaksClassifier(m_Space, m_DecayRate, m_MinimumCategoryCount, category)); } return true; - } - else if (n == 1) - { + } else if (n == 1) { double p_ = static_cast(p); double count = 0.0; - for (std::size_t i = 0u; p_ > 0.0 && i < m_Categories.size(); ++i) - { + for (std::size_t i = 0u; p_ > 0.0 && i < m_Categories.size(); ++i) { count += CBasicStatistics::count(m_Categories[i]); } - if (count < p_) - { + if (count < p_) { return false; } result.push_back(*this); @@ -244,81 +199,59 @@ bool CNaturalBreaksClassifier::split(std::size_t n, } TSizeVec split; - if (!this->naturalBreaks(n, p, split)) - { + if (!this->naturalBreaks(n, p, split)) { return false; } result.reserve(n); - for (std::size_t i = 0u, j = 0u; i < split.size(); ++i) - { + for (std::size_t i = 0u, j = 0u; i < split.size(); ++i) { TTupleVec categories; - for (/**/; j < split[i]; ++j) - { + for (/**/; j < split[i]; ++j) { categories.push_back(m_Categories[j]); } - result.push_back(CNaturalBreaksClassifier(m_Space, m_DecayRate, - m_MinimumCategoryCount, - categories)); + result.push_back(CNaturalBreaksClassifier(m_Space, m_DecayRate, m_MinimumCategoryCount, categories)); } return true; } -bool CNaturalBreaksClassifier::split(const TSizeVec &split, TClassifierVec &result) -{ +bool CNaturalBreaksClassifier::split(const TSizeVec& split, TClassifierVec& result) { result.clear(); this->reduce(); // Sanity checks. - if ( split.empty() - || split[split.size() - 1] != m_Categories.size() - || !boost::algorithm::is_sorted(split.begin(), split.end())) - { + if (split.empty() || split[split.size() - 1] != m_Categories.size() || !boost::algorithm::is_sorted(split.begin(), split.end())) { LOG_ERROR("Bad split = " << core::CContainerPrinter::print(split)); return false; } result.reserve(split.size()); TTupleVec categories; - for (std::size_t i = 0u, j = 0u; i < split.size(); ++i) - { + for (std::size_t i = 0u, j = 0u; i < split.size(); ++i) { categories.clear(); categories.reserve(split[i] - j); - for (/**/; j < split[i]; ++j) - { + for (/**/; j < split[i]; ++j) { categories.push_back(m_Categories[j]); } - result.push_back(CNaturalBreaksClassifier(m_Space, m_DecayRate, - m_MinimumCategoryCount, - categories)); + result.push_back(CNaturalBreaksClassifier(m_Space, m_DecayRate, m_MinimumCategoryCount, categories)); } return true; } -bool CNaturalBreaksClassifier::naturalBreaks(std::size_t n, - std::size_t p, - TSizeVec &result) -{ +bool CNaturalBreaksClassifier::naturalBreaks(std::size_t n, std::size_t p, TSizeVec& result) { return naturalBreaksImpl(m_Categories, n, p, E_TargetDeviation, result); } -bool CNaturalBreaksClassifier::categories(std::size_t n, - std::size_t p, - TTupleVec &result, - bool append) -{ +bool CNaturalBreaksClassifier::categories(std::size_t n, std::size_t p, TTupleVec& result, bool append) { LOG_TRACE("categories"); - if (!append) - { + if (!append) { result.clear(); } - if (n == 0) - { + if (n == 0) { LOG_ERROR("Bad request for zero categories"); return false; } @@ -326,32 +259,23 @@ bool CNaturalBreaksClassifier::categories(std::size_t n, this->reduce(); LOG_TRACE("raw categories = " << this->print()); - if (n >= m_Categories.size()) - { + if (n >= m_Categories.size()) { double p_ = static_cast(p); - for (std::size_t i = 0u; p_ > 0.0 && i < m_Categories.size(); ++i) - { - if (CBasicStatistics::count(m_Categories[i]) < p_) - { + for (std::size_t i = 0u; p_ > 0.0 && i < m_Categories.size(); ++i) { + if (CBasicStatistics::count(m_Categories[i]) < p_) { return false; } } - if (!append) - { + if (!append) { result = m_Categories; - } - else - { + } else { result.insert(result.end(), m_Categories.begin(), m_Categories.end()); } return true; - } - else if (n == 1) - { + } else if (n == 1) { double p_ = static_cast(p); TTuple category = std::accumulate(m_Categories.begin(), m_Categories.end(), TTuple()); - if (CBasicStatistics::count(category) < p_) - { + if (CBasicStatistics::count(category) < p_) { return false; } result.push_back(category); @@ -359,17 +283,14 @@ bool CNaturalBreaksClassifier::categories(std::size_t n, } TSizeVec split; - if (!this->naturalBreaks(n, p, split)) - { + if (!this->naturalBreaks(n, p, split)) { return false; } result.reserve(result.size() + n); - for (std::size_t i = 0u, j = 0u; i < split.size(); ++i) - { + for (std::size_t i = 0u, j = 0u; i < split.size(); ++i) { TTuple category; - for (/**/; j < split[i]; ++j) - { + for (/**/; j < split[i]; ++j) { category += m_Categories[j]; } result.push_back(category); @@ -378,25 +299,19 @@ bool CNaturalBreaksClassifier::categories(std::size_t n, return true; } -bool CNaturalBreaksClassifier::categories(const TSizeVec &split, TTupleVec &result) -{ +bool CNaturalBreaksClassifier::categories(const TSizeVec& split, TTupleVec& result) { result.clear(); // Sanity checks. - if ( split.empty() - || split[split.size() - 1] != m_Categories.size() - || !boost::algorithm::is_sorted(split.begin(), split.end())) - { + if (split.empty() || split[split.size() - 1] != m_Categories.size() || !boost::algorithm::is_sorted(split.begin(), split.end())) { LOG_ERROR("Bad split = " << core::CContainerPrinter::print(split)); return false; } result.reserve(split.size()); - for (std::size_t i = 0u, j = 0u; i < split.size(); ++i) - { + for (std::size_t i = 0u, j = 0u; i < split.size(); ++i) { TTuple category; - for (/**/; j < split[i]; ++j) - { + for (/**/; j < split[i]; ++j) { category += m_Categories[j]; } result.push_back(category); @@ -405,35 +320,26 @@ bool CNaturalBreaksClassifier::categories(const TSizeVec &split, TTupleVec &resu return true; } -void CNaturalBreaksClassifier::add(double x, double count) -{ +void CNaturalBreaksClassifier::add(double x, double count) { LOG_TRACE("Adding " << x); - if (m_PointsBuffer.size() < MAXIMUM_BUFFER_SIZE) - { + if (m_PointsBuffer.size() < MAXIMUM_BUFFER_SIZE) { m_PointsBuffer.emplace_back(x, count); - } - else - { + } else { m_Categories.push_back(TTuple()); m_Categories.back().add(x, count); this->reduce(); } } -void CNaturalBreaksClassifier::merge(const CNaturalBreaksClassifier &other) -{ +void CNaturalBreaksClassifier::merge(const CNaturalBreaksClassifier& other) { LOG_TRACE("Merge"); - for (std::size_t i = 0u; i < other.m_PointsBuffer.size(); ++i) - { + for (std::size_t i = 0u; i < other.m_PointsBuffer.size(); ++i) { m_Categories.push_back(TTuple()); - m_Categories.back().add(other.m_PointsBuffer[i].first, - other.m_PointsBuffer[i].second); + m_Categories.back().add(other.m_PointsBuffer[i].first, other.m_PointsBuffer[i].second); } - m_Categories.insert(m_Categories.end(), - other.m_Categories.begin(), - other.m_Categories.end()); + m_Categories.insert(m_Categories.end(), other.m_Categories.begin(), other.m_Categories.end()); this->reduce(); @@ -442,15 +348,12 @@ void CNaturalBreaksClassifier::merge(const CNaturalBreaksClassifier &other) m_Categories.swap(categories); } -void CNaturalBreaksClassifier::decayRate(double decayRate) -{ +void CNaturalBreaksClassifier::decayRate(double decayRate) { m_DecayRate = decayRate; } -void CNaturalBreaksClassifier::propagateForwardsByTime(double time) -{ - if (time < 0.0) - { +void CNaturalBreaksClassifier::propagateForwardsByTime(double time) { + if (time < 0.0) { LOG_ERROR("Can't propagate backwards in time"); return; } @@ -459,34 +362,25 @@ void CNaturalBreaksClassifier::propagateForwardsByTime(double time) LOG_TRACE("alpha = " << alpha); LOG_TRACE("categories = " << core::CContainerPrinter::print(m_Categories)); - for (std::size_t i = 0u; i < m_Categories.size(); ++i) - { + for (std::size_t i = 0u; i < m_Categories.size(); ++i) { m_Categories[i].age(alpha); } // Prune any dead categories: we're not interested in maintaining // categories with low counts. - m_Categories.erase(std::remove_if(m_Categories.begin(), - m_Categories.end(), - CCountLessThan(m_MinimumCategoryCount)), + m_Categories.erase(std::remove_if(m_Categories.begin(), m_Categories.end(), CCountLessThan(m_MinimumCategoryCount)), m_Categories.end()); LOG_TRACE("categories = " << core::CContainerPrinter::print(m_Categories)); } -bool CNaturalBreaksClassifier::buffering() const -{ +bool CNaturalBreaksClassifier::buffering() const { return m_PointsBuffer.size() > 0; } -void CNaturalBreaksClassifier::sample(std::size_t numberSamples, - double smallest, - double /*largest*/, - TDoubleVec &result) const -{ +void CNaturalBreaksClassifier::sample(std::size_t numberSamples, double smallest, double /*largest*/, TDoubleVec& result) const { result.clear(); - if (numberSamples == 0) - { + if (numberSamples == 0) { return; } @@ -502,28 +396,23 @@ void CNaturalBreaksClassifier::sample(std::size_t numberSamples, weights.reserve(m_Categories.size()); double weightSum = 0.0; - for (std::size_t i = 0u; i < m_Categories.size(); ++i) - { + for (std::size_t i = 0u; i < m_Categories.size(); ++i) { double nCategory = CBasicStatistics::count(m_Categories[i]); weights.push_back(nCategory); weightSum += nCategory; } - for (std::size_t i = 0u; i < weights.size(); ++i) - { + for (std::size_t i = 0u; i < weights.size(); ++i) { weights[i] /= weightSum; } numberSamples = std::min(numberSamples, static_cast(weightSum)); - LOG_TRACE("weights = " << core::CContainerPrinter::print(weights) - << ", weightSum = " << weightSum - << ", n = " << numberSamples); + LOG_TRACE("weights = " << core::CContainerPrinter::print(weights) << ", weightSum = " << weightSum << ", n = " << numberSamples); result.reserve(numberSamples); TMeanAccumulator sample; TDoubleVec categorySamples; - for (std::size_t i = 0u; i < m_Categories.size(); ++i) - { + for (std::size_t i = 0u; i < m_Categories.size(); ++i) { double ni = static_cast(numberSamples) * weights[i]; std::size_t ni_ = static_cast(std::ceil(ni)); @@ -531,22 +420,16 @@ void CNaturalBreaksClassifier::sample(std::size_t numberSamples, double v = CBasicStatistics::maximumLikelihoodVariance(m_Categories[i]); CSampling::normalSampleQuantiles(m, v, ni_, categorySamples); - for (std::size_t j = 0u; j < categorySamples.size(); ++j) - { - if (categorySamples[j] < smallest) - { - if (v == 0.0) - { + for (std::size_t j = 0u; j < categorySamples.size(); ++j) { + if (categorySamples[j] < smallest) { + if (v == 0.0) { categorySamples.assign(ni_, smallest); - } - else - { + } else { m -= std::min(smallest, 0.0); double shape = m * m / v; - double rate = m / v; + double rate = m / v; CSampling::gammaSampleQuantiles(shape, rate, ni_, categorySamples); - for (std::size_t k = 0u; k < categorySamples.size(); ++k) - { + for (std::size_t k = 0u; k < categorySamples.size(); ++k) { categorySamples[k] += std::min(smallest, 0.0); } } @@ -554,19 +437,14 @@ void CNaturalBreaksClassifier::sample(std::size_t numberSamples, } } - if (!categorySamples.empty()) - { + if (!categorySamples.empty()) { ni /= static_cast(categorySamples.size()); - for (std::size_t j = 0u; j < categorySamples.size(); ++j) - { + for (std::size_t j = 0u; j < categorySamples.size(); ++j) { double nij = std::min(1.0 - CBasicStatistics::count(sample), ni); sample.add(categorySamples[j], nij); - if (CBasicStatistics::count(sample) > ALMOST_ONE) - { + if (CBasicStatistics::count(sample) > ALMOST_ONE) { result.push_back(CBasicStatistics::mean(sample)); - sample = nij < ni ? - CBasicStatistics::accumulator(ni - nij, categorySamples[j]) : - TMeanAccumulator(); + sample = nij < ni ? CBasicStatistics::accumulator(ni - nij, categorySamples[j]) : TMeanAccumulator(); } } } @@ -575,83 +453,70 @@ void CNaturalBreaksClassifier::sample(std::size_t numberSamples, LOG_TRACE("samples = " << core::CContainerPrinter::print(result)); } -std::string CNaturalBreaksClassifier::print() const -{ +std::string CNaturalBreaksClassifier::print() const { return core::CContainerPrinter::print(m_Categories); } -uint64_t CNaturalBreaksClassifier::checksum(uint64_t seed) const -{ +uint64_t CNaturalBreaksClassifier::checksum(uint64_t seed) const { seed = CChecksum::calculate(seed, m_Space); seed = CChecksum::calculate(seed, m_DecayRate); seed = CChecksum::calculate(seed, m_Categories); return CChecksum::calculate(seed, m_PointsBuffer); } -void CNaturalBreaksClassifier::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CNaturalBreaksClassifier::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CNaturalBreaksClassifier"); core::CMemoryDebug::dynamicSize("m_Categories", m_Categories, mem); core::CMemoryDebug::dynamicSize("m_PointsBuffer", m_PointsBuffer, mem); } -std::size_t CNaturalBreaksClassifier::memoryUsage() const -{ +std::size_t CNaturalBreaksClassifier::memoryUsage() const { std::size_t mem = core::CMemory::dynamicSize(m_Categories); mem += core::CMemory::dynamicSize(m_PointsBuffer); return mem; } -bool CNaturalBreaksClassifier::naturalBreaks(const TTupleVec &categories, +bool CNaturalBreaksClassifier::naturalBreaks(const TTupleVec& categories, std::size_t n, std::size_t p, EObjective target, - TSizeVec &result) -{ + TSizeVec& result) { return naturalBreaksImpl(categories, n, p, target, result); } -bool CNaturalBreaksClassifier::naturalBreaks(const TDoubleTupleVec &categories, +bool CNaturalBreaksClassifier::naturalBreaks(const TDoubleTupleVec& categories, std::size_t n, std::size_t p, EObjective target, - TSizeVec &result) -{ + TSizeVec& result) { return naturalBreaksImpl(categories, n, p, target, result); } template -bool CNaturalBreaksClassifier::naturalBreaksImpl(const std::vector &categories, +bool CNaturalBreaksClassifier::naturalBreaksImpl(const std::vector& categories, std::size_t n, std::size_t p, EObjective target, - TSizeVec &result) -{ + TSizeVec& result) { result.clear(); - if (categories.empty()) - { + if (categories.empty()) { return true; } - if (n == 0) - { + if (n == 0) { LOG_ERROR("Bad request for zero categories"); return false; } - if (n >= categories.size()) - { + if (n >= categories.size()) { result.reserve(categories.size()); - for (std::size_t i = 1u; i < categories.size(); ++i) - { + for (std::size_t i = 1u; i < categories.size(); ++i) { result.push_back(i); } result.push_back(categories.size()); return true; - } - else if (n == 1) - { + } else if (n == 1) { result.push_back(categories.size()); return true; } @@ -689,33 +554,24 @@ bool CNaturalBreaksClassifier::naturalBreaksImpl(const std::vector &categ TDoubleVecVec D(N, TDoubleVec(n, 0.0)); { TTuple t; - for (std::size_t i = 0u; i < N; ++i) - { + for (std::size_t i = 0u; i < N; ++i) { t += categories[i]; - D[i][0] = CBasicStatistics::count(t) < pp ? - INF : objective(target, t); + D[i][0] = CBasicStatistics::count(t) < pp ? INF : objective(target, t); } } - LOG_TRACE("categories = " - << core::CContainerPrinter::print(categories)); + LOG_TRACE("categories = " << core::CContainerPrinter::print(categories)); - for (std::size_t i = 1u; i < N; ++i) - { - for (std::size_t m = 1u; m <= std::min(i, n - 1); ++m) - { + for (std::size_t i = 1u; i < N; ++i) { + for (std::size_t m = 1u; m <= std::min(i, n - 1); ++m) { std::size_t b = m + 1; double d = INF; TTuple t; - for (std::size_t j = i; j >= m; --j) - { + for (std::size_t j = i; j >= m; --j) { t += categories[j]; - double c = (D[j - 1][m - 1] == INF - || CBasicStatistics::count(t) < pp) ? - INF : D[j - 1][m - 1] + objective(target, t); - if (c <= d) - { + double c = (D[j - 1][m - 1] == INF || CBasicStatistics::count(t) < pp) ? INF : D[j - 1][m - 1] + objective(target, t); + if (c <= d) { b = j; d = c; } @@ -726,8 +582,7 @@ bool CNaturalBreaksClassifier::naturalBreaksImpl(const std::vector &categ } } - if (D[N - 1][n - 1] == INF) - { + if (D[N - 1][n - 1] == INF) { return false; } @@ -743,8 +598,7 @@ bool CNaturalBreaksClassifier::naturalBreaksImpl(const std::vector &categ result.resize(n, 0); result[n - 1] = N; result[n - 2] = B[N - 1][n - 1]; - for (std::size_t i = 3u; i <= n; ++i) - { + for (std::size_t i = 3u; i <= n; ++i) { result[n - i] = B[result[n - i + 1] - 1][n - i + 1]; } @@ -753,21 +607,14 @@ bool CNaturalBreaksClassifier::naturalBreaksImpl(const std::vector &categ return true; } -CNaturalBreaksClassifier::CNaturalBreaksClassifier(std::size_t space, - double decayRate, - double minimumCategoryCount, - TTupleVec &categories) : - m_Space(space), - m_DecayRate(decayRate), - m_MinimumCategoryCount(minimumCategoryCount) -{ +CNaturalBreaksClassifier::CNaturalBreaksClassifier(std::size_t space, double decayRate, double minimumCategoryCount, TTupleVec& categories) + : m_Space(space), m_DecayRate(decayRate), m_MinimumCategoryCount(minimumCategoryCount) { m_Categories.swap(categories); m_Categories.reserve(m_Space + MAXIMUM_BUFFER_SIZE + 1u); m_PointsBuffer.reserve(MAXIMUM_BUFFER_SIZE); } -void CNaturalBreaksClassifier::reduce() -{ +void CNaturalBreaksClassifier::reduce() { LOG_TRACE("Reduce"); // Experimenting with using the optimal reduction gives no @@ -776,8 +623,7 @@ void CNaturalBreaksClassifier::reduce() // an order of magnitude slower. // Add all the points as new categories tuples and reduce. - for (std::size_t i = 0u; i < m_PointsBuffer.size(); ++i) - { + for (std::size_t i = 0u; i < m_PointsBuffer.size(); ++i) { m_Categories.push_back(TTuple()); m_Categories.back().add(m_PointsBuffer[i].first, m_PointsBuffer[i].second); } @@ -786,8 +632,7 @@ void CNaturalBreaksClassifier::reduce() std::sort(m_Categories.begin(), m_Categories.end(), SMeanLess()); LOG_TRACE("categories = " << core::CContainerPrinter::print(m_Categories)); - while (m_Categories.size() > m_Space) - { + while (m_Categories.size() > m_Space) { // Find the tuples to merge. TSizeSizePr toMerge = this->closestPair(); @@ -799,26 +644,19 @@ void CNaturalBreaksClassifier::reduce() LOG_TRACE("reduced categories = " << core::CContainerPrinter::print(m_Categories)); } -CNaturalBreaksClassifier::TSizeSizePr -CNaturalBreaksClassifier::closestPair() const -{ +CNaturalBreaksClassifier::TSizeSizePr CNaturalBreaksClassifier::closestPair() const { LOG_TRACE("Closest pair"); TSizeSizePr result; double dDeviationMin = boost::numeric::bounds::highest(); - for (std::size_t i = 1u; i < m_Categories.size(); ++i) - { - double dDeviation = deviation(m_Categories[i] + m_Categories[i - 1]) - - deviation(m_Categories[i]) - - deviation(m_Categories[i - 1]); + for (std::size_t i = 1u; i < m_Categories.size(); ++i) { + double dDeviation = deviation(m_Categories[i] + m_Categories[i - 1]) - deviation(m_Categories[i]) - deviation(m_Categories[i - 1]); - LOG_TRACE("mean[" << i - 1 << "] = " << CBasicStatistics::mean(m_Categories[i - 1]) - << ", mean[" << i << "] = " << CBasicStatistics::mean(m_Categories[i]) - << ", dDeviation = " << dDeviation); + LOG_TRACE("mean[" << i - 1 << "] = " << CBasicStatistics::mean(m_Categories[i - 1]) << ", mean[" << i + << "] = " << CBasicStatistics::mean(m_Categories[i]) << ", dDeviation = " << dDeviation); - if (dDeviation < dDeviationMin) - { + if (dDeviation < dDeviationMin) { result = TSizeSizePr(i - 1, i); dDeviationMin = dDeviation; } @@ -829,8 +667,7 @@ CNaturalBreaksClassifier::closestPair() const return result; } -double CNaturalBreaksClassifier::deviation(const TTuple &category) -{ +double CNaturalBreaksClassifier::deviation(const TTuple& category) { // The deviation objective is in some senses more natural // than the variation in one dimension. In particular, the // distances of the class boundaries from the class centers @@ -892,8 +729,7 @@ double CNaturalBreaksClassifier::deviation(const TTuple &category) return std::sqrt(count * variance); } -double CNaturalBreaksClassifier::variation(const TTuple &category) -{ +double CNaturalBreaksClassifier::variation(const TTuple& category) { double count = CBasicStatistics::count(category); double variance = CBasicStatistics::maximumLikelihoodVariance(category); return count * variance; @@ -901,7 +737,5 @@ double CNaturalBreaksClassifier::variation(const TTuple &category) const std::size_t CNaturalBreaksClassifier::MINIMUM_SPACE(2u); const std::size_t CNaturalBreaksClassifier::MAXIMUM_BUFFER_SIZE(2u); - } } - diff --git a/lib/maths/CNormalMeanPrecConjugate.cc b/lib/maths/CNormalMeanPrecConjugate.cc index 4957e98e40..40ad13a01c 100644 --- a/lib/maths/CNormalMeanPrecConjugate.cc +++ b/lib/maths/CNormalMeanPrecConjugate.cc @@ -16,9 +16,9 @@ #include #include #include -#include #include #include +#include #include #include @@ -37,20 +37,16 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { -namespace -{ +namespace { using TMeanVarAccumulator = CBasicStatistics::SSampleMeanVar::TAccumulator; const double MINIMUM_GAUSSIAN_SHAPE = 100.0; -namespace detail -{ +namespace detail { using TWeightStyleVec = maths_t::TWeightStyleVec; using TDouble1Vec = core::CSmallVector; @@ -60,14 +56,8 @@ using TDoubleDoublePr = std::pair; using TDoubleDoublePrVec = std::vector; //! Adds "weight" x "right operand" to the "left operand". -struct SPlusWeight -{ - double operator()(double lhs, - double rhs, - double weight = 1.0) const - { - return lhs + weight * rhs; - } +struct SPlusWeight { + double operator()(double lhs, double rhs, double weight = 1.0) const { return lhs + weight * rhs; } }; //! Evaluate \p func on the joint predictive distribution for \p samples @@ -91,9 +81,9 @@ struct SPlusWeight //! \param precision The precision of the conditional mean prior. //! \param result Filled in with the aggregation of results of \p func. template -bool evaluateFunctionOnJointDistribution(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, +bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, FUNC func, AGGREGATOR aggregate, bool isNonInformative, @@ -103,12 +93,10 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec &weightStyles, double mean, double precision, double predictionMean, - RESULT &result) -{ + RESULT& result) { result = RESULT(); - if (samples.empty()) - { + if (samples.empty()) { LOG_ERROR("Can't compute distribution for empty sample set"); return false; } @@ -126,41 +114,33 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec &weightStyles, // // This becomes increasingly accurate as the prior distribution narrows. - try - { - if (isNonInformative) - { + try { + if (isNonInformative) { // The non-informative prior is improper and effectively 0 everywhere. // (It is acceptable to approximate all finite samples as at the median // of this distribution.) - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { double x = samples[i]; double n = maths_t::count(weightStyles, weights[i]); - if (!CMathsFuncs::isFinite(n)) - { + if (!CMathsFuncs::isFinite(n)) { LOG_ERROR("Bad count weight " << n); return false; } result = aggregate(result, func(CTools::SImproperDistribution(), x), n); } - } - else if (shape > MINIMUM_GAUSSIAN_SHAPE) - { + } else if (shape > MINIMUM_GAUSSIAN_SHAPE) { // For large shape the marginal likelihood is very well approximated // by a moment matched Gaussian, i.e. N(m, (p+1)/p * b/a) where "m" // is the mean and "p" is the precision of the prior Gaussian and "a" // is the shape and "b" is the rate of the prior gamma distribution, // and the error function is significantly cheaper to compute. - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { double n = maths_t::count(weightStyles, weights[i]); double seasonalScale = std::sqrt(maths_t::seasonalVarianceScale(weightStyles, weights[i])); double countVarianceScale = maths_t::countVarianceScale(weightStyles, weights[i]); - double x = seasonalScale != 1.0 ? - predictionMean + (samples[i] - predictionMean) / seasonalScale : samples[i]; + double x = seasonalScale != 1.0 ? predictionMean + (samples[i] - predictionMean) / seasonalScale : samples[i]; // Get the effective precision and rate of the sample. double scaledPrecision = countVarianceScale * precision; @@ -170,9 +150,7 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec &weightStyles, boost::math::normal_distribution<> normal(mean, deviation); result = aggregate(result, func(normal, x + offset), n); } - } - else - { + } else { // The marginal likelihood is a t distribution with 2*a degrees of // freedom, location m and scale ((p+1)/p * b/a) ^ (1/2). We can // compute the distribution by transforming the data as follows: @@ -182,14 +160,12 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec &weightStyles, boost::math::students_t_distribution<> students(2.0 * shape); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { double n = maths_t::count(weightStyles, weights[i]); double seasonalScale = std::sqrt(maths_t::seasonalVarianceScale(weightStyles, weights[i])); double countVarianceScale = maths_t::countVarianceScale(weightStyles, weights[i]); - double x = seasonalScale != 1.0 ? - predictionMean + (samples[i] - predictionMean) / seasonalScale : samples[i]; + double x = seasonalScale != 1.0 ? predictionMean + (samples[i] - predictionMean) / seasonalScale : samples[i]; // Get the effective precision and rate of the sample. double scaledPrecision = countVarianceScale * precision; @@ -200,9 +176,7 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec &weightStyles, result = aggregate(result, func(students, sample), n); } } - } - catch (const std::exception &e) - { + } catch (const std::exception& e) { LOG_ERROR("Error calculating joint distribution: " << e.what()); return false; } @@ -219,56 +193,53 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec &weightStyles, //! so that it can be integrated over the hidden variable representing the //! actual value of a discrete datum which we assume is in the interval [n, n+1]. template -class CEvaluateOnSamples : core::CNonCopyable -{ - public: - CEvaluateOnSamples(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - bool isNonInformative, - double mean, - double precision, - double shape, - double rate, - double predictionMean) : - m_WeightStyles(weightStyles), - m_Samples(samples), - m_Weights(weights), - m_IsNonInformative(isNonInformative), - m_Mean(mean), - m_Precision(precision), - m_Shape(shape), - m_Rate(rate), - m_PredictionMean(predictionMean) - {} - - bool operator()(double x, double &result) const - { - return evaluateFunctionOnJointDistribution(m_WeightStyles, - m_Samples, - m_Weights, - F(), - SPlusWeight(), - m_IsNonInformative, - x, - m_Shape, - m_Rate, - m_Mean, - m_Precision, - m_PredictionMean, - result); - } +class CEvaluateOnSamples : core::CNonCopyable { +public: + CEvaluateOnSamples(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + bool isNonInformative, + double mean, + double precision, + double shape, + double rate, + double predictionMean) + : m_WeightStyles(weightStyles), + m_Samples(samples), + m_Weights(weights), + m_IsNonInformative(isNonInformative), + m_Mean(mean), + m_Precision(precision), + m_Shape(shape), + m_Rate(rate), + m_PredictionMean(predictionMean) {} + + bool operator()(double x, double& result) const { + return evaluateFunctionOnJointDistribution(m_WeightStyles, + m_Samples, + m_Weights, + F(), + SPlusWeight(), + m_IsNonInformative, + x, + m_Shape, + m_Rate, + m_Mean, + m_Precision, + m_PredictionMean, + result); + } - private: - const TWeightStyleVec &m_WeightStyles; - const TDouble1Vec &m_Samples; - const TDouble4Vec1Vec &m_Weights; - bool m_IsNonInformative; - double m_Mean; - double m_Precision; - double m_Shape; - double m_Rate; - double m_PredictionMean; +private: + const TWeightStyleVec& m_WeightStyles; + const TDouble1Vec& m_Samples; + const TDouble4Vec1Vec& m_Weights; + bool m_IsNonInformative; + double m_Mean; + double m_Precision; + double m_Shape; + double m_Rate; + double m_PredictionMean; }; //! Computes the probability of seeing less likely samples at a specified offset. @@ -276,79 +247,72 @@ class CEvaluateOnSamples : core::CNonCopyable //! This thin wrapper around the evaluateFunctionOnJointDistribution function //! so that it can be integrated over the hidden variable representing the //! actual value of a discrete datum which we assume is in the interval [n, n+1]. -class CProbabilityOfLessLikelySamples : core::CNonCopyable -{ - public: - CProbabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - bool isNonInformative, - double mean, - double precision, - double shape, - double rate, - double predictionMean) : - m_Calculation(calculation), - m_WeightStyles(weightStyles), - m_Samples(samples), - m_Weights(weights), - m_IsNonInformative(isNonInformative), - m_Mean(mean), - m_Precision(precision), - m_Shape(shape), - m_Rate(rate), - m_PredictionMean(predictionMean), - m_Tail(0) - {} - - bool operator()(double x, double &result) const - { - CJointProbabilityOfLessLikelySamples probability; - maths_t::ETail tail = maths_t::E_UndeterminedTail; - - if ( !evaluateFunctionOnJointDistribution(m_WeightStyles, - m_Samples, - m_Weights, - boost::bind(CTools::CProbabilityOfLessLikelySample(m_Calculation), - _1, _2, boost::ref(tail)), - CJointProbabilityOfLessLikelySamples::SAddProbability(), - m_IsNonInformative, - x, - m_Shape, - m_Rate, - m_Mean, - m_Precision, - m_PredictionMean, - probability) - || !probability.calculate(result)) - { - LOG_ERROR("Failed to compute probability of less likely samples"); - return false; - } - - m_Tail = m_Tail | tail; - - return true; +class CProbabilityOfLessLikelySamples : core::CNonCopyable { +public: + CProbabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, + const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + bool isNonInformative, + double mean, + double precision, + double shape, + double rate, + double predictionMean) + : m_Calculation(calculation), + m_WeightStyles(weightStyles), + m_Samples(samples), + m_Weights(weights), + m_IsNonInformative(isNonInformative), + m_Mean(mean), + m_Precision(precision), + m_Shape(shape), + m_Rate(rate), + m_PredictionMean(predictionMean), + m_Tail(0) {} + + bool operator()(double x, double& result) const { + CJointProbabilityOfLessLikelySamples probability; + maths_t::ETail tail = maths_t::E_UndeterminedTail; + + if (!evaluateFunctionOnJointDistribution( + m_WeightStyles, + m_Samples, + m_Weights, + boost::bind(CTools::CProbabilityOfLessLikelySample(m_Calculation), _1, _2, boost::ref(tail)), + CJointProbabilityOfLessLikelySamples::SAddProbability(), + m_IsNonInformative, + x, + m_Shape, + m_Rate, + m_Mean, + m_Precision, + m_PredictionMean, + probability) || + !probability.calculate(result)) { + LOG_ERROR("Failed to compute probability of less likely samples"); + return false; } - maths_t::ETail tail() const - { - return static_cast(m_Tail); - } + m_Tail = m_Tail | tail; - private: - maths_t::EProbabilityCalculation m_Calculation; - const TWeightStyleVec &m_WeightStyles; - const TDouble1Vec &m_Samples; - const TDouble4Vec1Vec &m_Weights; - bool m_IsNonInformative; - double m_Mean; - double m_Precision; - double m_Shape; - double m_Rate; - double m_PredictionMean; - mutable int m_Tail; + return true; + } + + maths_t::ETail tail() const { return static_cast(m_Tail); } + +private: + maths_t::EProbabilityCalculation m_Calculation; + const TWeightStyleVec& m_WeightStyles; + const TDouble1Vec& m_Samples; + const TDouble4Vec1Vec& m_Weights; + bool m_IsNonInformative; + double m_Mean; + double m_Precision; + double m_Shape; + double m_Rate; + double m_PredictionMean; + mutable int m_Tail; }; //! The log marginal likelihood function of the samples is the log of the @@ -369,134 +333,107 @@ class CProbabilityOfLessLikelySamples : core::CNonCopyable //! var(x) is the sample variance. //! m and p are the prior Gaussian mean and precision, respectively. //! a and b are the prior Gamma shape and rate, respectively. -class CLogMarginalLikelihood : core::CNonCopyable -{ - public: - CLogMarginalLikelihood(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double mean, - double precision, - double shape, - double rate, - double predictionMean) : - m_Mean(mean), - m_Precision(precision), - m_Shape(shape), - m_Rate(rate), - m_NumberSamples(0.0), - m_WeightedNumberSamples(0.0), - m_SampleMean(0.0), - m_SampleSquareDeviation(0.0), - m_Constant(0.0), - m_ErrorStatus(maths_t::E_FpNoErrors) - { - this->precompute(weightStyles, samples, weights, predictionMean); +class CLogMarginalLikelihood : core::CNonCopyable { +public: + CLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double mean, + double precision, + double shape, + double rate, + double predictionMean) + : m_Mean(mean), + m_Precision(precision), + m_Shape(shape), + m_Rate(rate), + m_NumberSamples(0.0), + m_WeightedNumberSamples(0.0), + m_SampleMean(0.0), + m_SampleSquareDeviation(0.0), + m_Constant(0.0), + m_ErrorStatus(maths_t::E_FpNoErrors) { + this->precompute(weightStyles, samples, weights, predictionMean); + } + + //! Evaluate the log marginal likelihood at the offset \p x. + bool operator()(double x, double& result) const { + if (m_ErrorStatus & maths_t::E_FpFailed) { + return false; } - //! Evaluate the log marginal likelihood at the offset \p x. - bool operator()(double x, double &result) const - { - if (m_ErrorStatus & maths_t::E_FpFailed) - { - return false; - } + double sampleMean = m_SampleMean + x; + double impliedShape = m_Shape + 0.5 * m_NumberSamples; + double impliedRate = m_Rate + 0.5 * (m_SampleSquareDeviation + m_Precision * m_WeightedNumberSamples * (sampleMean - m_Mean) * + (sampleMean - m_Mean) / (m_Precision + m_WeightedNumberSamples)); + result = m_Constant - impliedShape * std::log(impliedRate); - double sampleMean = m_SampleMean + x; - double impliedShape = m_Shape + 0.5 * m_NumberSamples; - double impliedRate = m_Rate + 0.5 * (m_SampleSquareDeviation - + m_Precision - * m_WeightedNumberSamples - * (sampleMean - m_Mean) - * (sampleMean - m_Mean) - / (m_Precision + m_WeightedNumberSamples)); - result = m_Constant - impliedShape * std::log(impliedRate); - - return true; - } + return true; + } - //! Retrieve the error status for the integration. - maths_t::EFloatingPointErrorStatus errorStatus() const - { - return m_ErrorStatus; - } + //! Retrieve the error status for the integration. + maths_t::EFloatingPointErrorStatus errorStatus() const { return m_ErrorStatus; } + +private: + static const double LOG_2_PI; - private: - static const double LOG_2_PI; - - private: - //! Compute all the constants in the integrand. - void precompute(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double predictionMean) - { - m_NumberSamples = 0.0; - TMeanVarAccumulator sampleMoments; - double logVarianceScaleSum = 0.0; - - try - { - for (std::size_t i = 0u; i < samples.size(); ++i) - { - double n = maths_t::countForUpdate(weightStyles, weights[i]); - double seasonalScale = std::sqrt(maths_t::seasonalVarianceScale(weightStyles, weights[i])); - double countVarianceScale = maths_t::countVarianceScale(weightStyles, weights[i]); - double w = 1.0 / countVarianceScale; - m_NumberSamples += n; - if (seasonalScale != 1.0) - { - sampleMoments.add(predictionMean + (samples[i] - predictionMean) / seasonalScale, n * w); - logVarianceScaleSum += 2.0 * std::log(seasonalScale); - } - else - { - sampleMoments.add(samples[i], n * w); - } - if (countVarianceScale != 1.0) - { - logVarianceScaleSum += std::log(countVarianceScale); - } +private: + //! Compute all the constants in the integrand. + void + precompute(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights, double predictionMean) { + m_NumberSamples = 0.0; + TMeanVarAccumulator sampleMoments; + double logVarianceScaleSum = 0.0; + + try { + for (std::size_t i = 0u; i < samples.size(); ++i) { + double n = maths_t::countForUpdate(weightStyles, weights[i]); + double seasonalScale = std::sqrt(maths_t::seasonalVarianceScale(weightStyles, weights[i])); + double countVarianceScale = maths_t::countVarianceScale(weightStyles, weights[i]); + double w = 1.0 / countVarianceScale; + m_NumberSamples += n; + if (seasonalScale != 1.0) { + sampleMoments.add(predictionMean + (samples[i] - predictionMean) / seasonalScale, n * w); + logVarianceScaleSum += 2.0 * std::log(seasonalScale); + } else { + sampleMoments.add(samples[i], n * w); + } + if (countVarianceScale != 1.0) { + logVarianceScaleSum += std::log(countVarianceScale); } - m_WeightedNumberSamples = CBasicStatistics::count(sampleMoments); - m_SampleMean = CBasicStatistics::mean(sampleMoments); - m_SampleSquareDeviation = (m_WeightedNumberSamples - 1.0) - * CBasicStatistics::variance(sampleMoments); - - double impliedShape = m_Shape + 0.5 * m_NumberSamples; - double impliedPrecision = m_Precision + m_WeightedNumberSamples; - - m_Constant = 0.5 * (std::log(m_Precision) - std::log(impliedPrecision)) - - 0.5 * m_NumberSamples * LOG_2_PI - - 0.5 * logVarianceScaleSum - + boost::math::lgamma(impliedShape) - - boost::math::lgamma(m_Shape) - + m_Shape * std::log(m_Rate); - } - catch (const std::exception &e) - { - LOG_ERROR("Error calculating marginal likelihood: " << e.what()); - this->addErrorStatus(maths_t::E_FpFailed); } - } + m_WeightedNumberSamples = CBasicStatistics::count(sampleMoments); + m_SampleMean = CBasicStatistics::mean(sampleMoments); + m_SampleSquareDeviation = (m_WeightedNumberSamples - 1.0) * CBasicStatistics::variance(sampleMoments); - //! Update the error status. - void addErrorStatus(maths_t::EFloatingPointErrorStatus status) const - { - m_ErrorStatus = static_cast(m_ErrorStatus | status); + double impliedShape = m_Shape + 0.5 * m_NumberSamples; + double impliedPrecision = m_Precision + m_WeightedNumberSamples; + + m_Constant = 0.5 * (std::log(m_Precision) - std::log(impliedPrecision)) - 0.5 * m_NumberSamples * LOG_2_PI - + 0.5 * logVarianceScaleSum + boost::math::lgamma(impliedShape) - boost::math::lgamma(m_Shape) + + m_Shape * std::log(m_Rate); + } catch (const std::exception& e) { + LOG_ERROR("Error calculating marginal likelihood: " << e.what()); + this->addErrorStatus(maths_t::E_FpFailed); } + } - private: - double m_Mean; - double m_Precision; - double m_Shape; - double m_Rate; - double m_NumberSamples; - double m_WeightedNumberSamples; - double m_SampleMean; - double m_SampleSquareDeviation; - double m_Constant; - mutable maths_t::EFloatingPointErrorStatus m_ErrorStatus; + //! Update the error status. + void addErrorStatus(maths_t::EFloatingPointErrorStatus status) const { + m_ErrorStatus = static_cast(m_ErrorStatus | status); + } + +private: + double m_Mean; + double m_Precision; + double m_Shape; + double m_Rate; + double m_NumberSamples; + double m_WeightedNumberSamples; + double m_SampleMean; + double m_SampleSquareDeviation; + double m_Constant; + mutable maths_t::EFloatingPointErrorStatus m_ErrorStatus; }; const double CLogMarginalLikelihood::LOG_2_PI = std::log(boost::math::double_constants::two_pi); @@ -513,7 +450,6 @@ const std::string NUMBER_SAMPLES_TAG("e"); //const std::string MAXIMUM_TAG("g"); No longer used const std::string DECAY_RATE_TAG("h"); const std::string EMPTY_STRING; - } CNormalMeanPrecConjugate::CNormalMeanPrecConjugate(maths_t::EDataType dataType, @@ -521,48 +457,30 @@ CNormalMeanPrecConjugate::CNormalMeanPrecConjugate(maths_t::EDataType dataType, double gaussianPrecision, double gammaShape, double gammaRate, - double decayRate/*= 0.0*/) : - CPrior(dataType, decayRate), - m_GaussianMean(gaussianMean), - m_GaussianPrecision(gaussianPrecision), - m_GammaShape(gammaShape), - m_GammaRate(gammaRate) -{ + double decayRate /*= 0.0*/) + : CPrior(dataType, decayRate), + m_GaussianMean(gaussianMean), + m_GaussianPrecision(gaussianPrecision), + m_GammaShape(gammaShape), + m_GammaRate(gammaRate) { this->numberSamples(gaussianPrecision); } -CNormalMeanPrecConjugate::CNormalMeanPrecConjugate(maths_t::EDataType dataType, - const TMeanVarAccumulator &moments, - double decayRate) : - CPrior(dataType, decayRate), - m_GaussianMean(0.0), - m_GaussianPrecision(0.0), - m_GammaShape(0.0), - m_GammaRate(0.0) -{ +CNormalMeanPrecConjugate::CNormalMeanPrecConjugate(maths_t::EDataType dataType, const TMeanVarAccumulator& moments, double decayRate) + : CPrior(dataType, decayRate), m_GaussianMean(0.0), m_GaussianPrecision(0.0), m_GammaShape(0.0), m_GammaRate(0.0) { this->reset(dataType, moments, decayRate); } -CNormalMeanPrecConjugate::CNormalMeanPrecConjugate(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser) : - CPrior(params.s_DataType, params.s_DecayRate), - m_GaussianMean(0.0), - m_GaussianPrecision(0.0), - m_GammaShape(0.0), - m_GammaRate(0.0) -{ +CNormalMeanPrecConjugate::CNormalMeanPrecConjugate(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) + : CPrior(params.s_DataType, params.s_DecayRate), m_GaussianMean(0.0), m_GaussianPrecision(0.0), m_GammaShape(0.0), m_GammaRate(0.0) { traverser.traverseSubLevel(boost::bind(&CNormalMeanPrecConjugate::acceptRestoreTraverser, this, _1)); } -bool CNormalMeanPrecConjugate::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name = traverser.name(); - RESTORE_SETUP_TEARDOWN(DECAY_RATE_TAG, - double decayRate, - core::CStringUtils::stringToType(traverser.value(), decayRate), - this->decayRate(decayRate)) +bool CNormalMeanPrecConjugate::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); + RESTORE_SETUP_TEARDOWN( + DECAY_RATE_TAG, double decayRate, core::CStringUtils::stringToType(traverser.value(), decayRate), this->decayRate(decayRate)) RESTORE_BUILT_IN(GAUSSIAN_MEAN_TAG, m_GaussianMean) RESTORE_BUILT_IN(GAUSSIAN_PRECISION_TAG, m_GaussianPrecision) RESTORE_BUILT_IN(GAMMA_SHAPE_TAG, m_GammaShape) @@ -571,16 +489,12 @@ bool CNormalMeanPrecConjugate::acceptRestoreTraverser(core::CStateRestoreTravers double numberSamples, core::CStringUtils::stringToType(traverser.value(), numberSamples), this->numberSamples(numberSamples)) - } - while (traverser.next()); + } while (traverser.next()); return true; } -void CNormalMeanPrecConjugate::reset(maths_t::EDataType dataType, - const TMeanVarAccumulator &moments, - double decayRate) -{ +void CNormalMeanPrecConjugate::reset(maths_t::EDataType dataType, const TMeanVarAccumulator& moments, double decayRate) { this->dataType(dataType); this->decayRate(decayRate); @@ -599,8 +513,7 @@ void CNormalMeanPrecConjugate::reset(maths_t::EDataType dataType, // mean) in the data of size MINIMUM_COEFFICIENT_OF_VARATION on the // prior parameters. - if (m_GaussianPrecision > 1.5) - { + if (m_GaussianPrecision > 1.5) { double truncatedMean = std::max(std::fabs(m_GaussianMean), 1e-8); double minimumDeviation = truncatedMean * MINIMUM_COEFFICIENT_OF_VARIATION; double minimumRate = (m_GaussianPrecision - 1.0) * minimumDeviation * minimumDeviation; @@ -610,64 +523,45 @@ void CNormalMeanPrecConjugate::reset(maths_t::EDataType dataType, this->CPrior::addSamples(n); } -bool CNormalMeanPrecConjugate::needsOffset() const -{ +bool CNormalMeanPrecConjugate::needsOffset() const { return false; } -CNormalMeanPrecConjugate CNormalMeanPrecConjugate::nonInformativePrior(maths_t::EDataType dataType, - double decayRate/*= 0.0*/) -{ - return CNormalMeanPrecConjugate(dataType, - NON_INFORMATIVE_MEAN, - NON_INFORMATIVE_PRECISION, - NON_INFORMATIVE_SHAPE, - NON_INFORMATIVE_RATE, - decayRate); +CNormalMeanPrecConjugate CNormalMeanPrecConjugate::nonInformativePrior(maths_t::EDataType dataType, double decayRate /*= 0.0*/) { + return CNormalMeanPrecConjugate( + dataType, NON_INFORMATIVE_MEAN, NON_INFORMATIVE_PRECISION, NON_INFORMATIVE_SHAPE, NON_INFORMATIVE_RATE, decayRate); } -CNormalMeanPrecConjugate::EPrior CNormalMeanPrecConjugate::type() const -{ +CNormalMeanPrecConjugate::EPrior CNormalMeanPrecConjugate::type() const { return E_Normal; } -CNormalMeanPrecConjugate *CNormalMeanPrecConjugate::clone() const -{ +CNormalMeanPrecConjugate* CNormalMeanPrecConjugate::clone() const { return new CNormalMeanPrecConjugate(*this); } -void CNormalMeanPrecConjugate::setToNonInformative(double /*offset*/, double decayRate) -{ +void CNormalMeanPrecConjugate::setToNonInformative(double /*offset*/, double decayRate) { *this = nonInformativePrior(this->dataType(), decayRate); } -double CNormalMeanPrecConjugate::adjustOffset(const TWeightStyleVec &/*weightStyles*/, - const TDouble1Vec &/*samples*/, - const TDouble4Vec1Vec &/*weights*/) -{ +double CNormalMeanPrecConjugate::adjustOffset(const TWeightStyleVec& /*weightStyles*/, + const TDouble1Vec& /*samples*/, + const TDouble4Vec1Vec& /*weights*/) { return 0.0; } -double CNormalMeanPrecConjugate::offset() const -{ +double CNormalMeanPrecConjugate::offset() const { return 0.0; } -void CNormalMeanPrecConjugate::addSamples(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights) -{ - if (samples.empty()) - { +void CNormalMeanPrecConjugate::addSamples(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights) { + if (samples.empty()) { return; } - if (samples.size() != weights.size()) - { - LOG_ERROR("Mismatch in samples '" - << core::CContainerPrinter::print(samples) - << "' and weights '" - << core::CContainerPrinter::print(weights) << "'"); + if (samples.size() != weights.size()) { + LOG_ERROR("Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '" + << core::CContainerPrinter::print(weights) << "'"); return; } @@ -728,44 +622,33 @@ void CNormalMeanPrecConjugate::addSamples(const TWeightStyleVec &weightStyles, double numberSamples = 0.0; TMeanVarAccumulator sampleMoments; - try - { - for (std::size_t i = 0u; i < samples.size(); ++i) - { + try { + for (std::size_t i = 0u; i < samples.size(); ++i) { double n = maths_t::countForUpdate(weightStyles, weights[i]); - double varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights[i]) - * maths_t::countVarianceScale(weightStyles, weights[i]); + double varianceScale = + maths_t::seasonalVarianceScale(weightStyles, weights[i]) * maths_t::countVarianceScale(weightStyles, weights[i]); numberSamples += n; sampleMoments.add(samples[i], n / varianceScale); } - } - catch (const std::exception &e) - { + } catch (const std::exception& e) { LOG_ERROR("Failed to update likelihood: " << e.what()); return; } double scaledNumberSamples = CBasicStatistics::count(sampleMoments); double sampleMean = CBasicStatistics::mean(sampleMoments); - double sampleSquareDeviation = (scaledNumberSamples - 1.0) - * CBasicStatistics::variance(sampleMoments); + double sampleSquareDeviation = (scaledNumberSamples - 1.0) * CBasicStatistics::variance(sampleMoments); - if (this->isInteger()) - { + if (this->isInteger()) { sampleMean += 0.5; sampleSquareDeviation += numberSamples / 12.0; } m_GammaShape += 0.5 * numberSamples; - m_GammaRate += 0.5 * (sampleSquareDeviation - + m_GaussianPrecision - * scaledNumberSamples - * (sampleMean - m_GaussianMean) - * (sampleMean - m_GaussianMean) - / (m_GaussianPrecision + scaledNumberSamples)); - - m_GaussianMean = (m_GaussianPrecision * m_GaussianMean - + scaledNumberSamples * sampleMean) - / (m_GaussianPrecision + scaledNumberSamples); + m_GammaRate += 0.5 * (sampleSquareDeviation + m_GaussianPrecision * scaledNumberSamples * (sampleMean - m_GaussianMean) * + (sampleMean - m_GaussianMean) / (m_GaussianPrecision + scaledNumberSamples)); + + m_GaussianMean = + (m_GaussianPrecision * m_GaussianMean + scaledNumberSamples * sampleMean) / (m_GaussianPrecision + scaledNumberSamples); m_GaussianPrecision += scaledNumberSamples; // If the coefficient of variation of the data is too small we run @@ -774,25 +657,19 @@ void CNormalMeanPrecConjugate::addSamples(const TWeightStyleVec &weightStyles, // mean) in the data of size MINIMUM_COEFFICIENT_OF_VARATION on the // prior parameters. - if (m_GaussianPrecision > 1.5) - { + if (m_GaussianPrecision > 1.5) { double truncatedMean = std::max(std::fabs(m_GaussianMean), 1e-8); double minimumDeviation = truncatedMean * MINIMUM_COEFFICIENT_OF_VARIATION; double minimumRate = (2.0 * m_GammaShape - 1.0) * minimumDeviation * minimumDeviation; m_GammaRate = std::max(m_GammaRate, minimumRate); } - LOG_TRACE("sampleMean = " << sampleMean - << ", sampleSquareDeviation = " << sampleSquareDeviation - << ", numberSamples = " << numberSamples - << ", scaledNumberSamples = " << scaledNumberSamples - << ", m_GammaShape = " << m_GammaShape - << ", m_GammaRate = " << m_GammaRate - << ", m_GaussianMean = " << m_GaussianMean - << ", m_GaussianPrecision = " << m_GaussianPrecision); - - if (this->isBad()) - { + LOG_TRACE("sampleMean = " << sampleMean << ", sampleSquareDeviation = " << sampleSquareDeviation + << ", numberSamples = " << numberSamples << ", scaledNumberSamples = " << scaledNumberSamples + << ", m_GammaShape = " << m_GammaShape << ", m_GammaRate = " << m_GammaRate + << ", m_GaussianMean = " << m_GaussianMean << ", m_GaussianPrecision = " << m_GaussianPrecision); + + if (this->isBad()) { LOG_ERROR("Update failed (" << this->debug() << ")"); LOG_ERROR("samples = " << core::CContainerPrinter::print(samples)); LOG_ERROR("weights = " << core::CContainerPrinter::print(weights)); @@ -800,25 +677,21 @@ void CNormalMeanPrecConjugate::addSamples(const TWeightStyleVec &weightStyles, } } -void CNormalMeanPrecConjugate::propagateForwardsByTime(double time) -{ - if (!CMathsFuncs::isFinite(time) || time < 0.0) - { +void CNormalMeanPrecConjugate::propagateForwardsByTime(double time) { + if (!CMathsFuncs::isFinite(time) || time < 0.0) { LOG_ERROR("Bad propagation time " << time); return; } - if (this->isNonInformative()) - { + if (this->isNonInformative()) { // Nothing to be done. return; } double alpha = std::exp(-this->decayRate() * time); - double beta = 1.0 - alpha; + double beta = 1.0 - alpha; - m_GaussianPrecision = alpha * m_GaussianPrecision - + beta * NON_INFORMATIVE_PRECISION; + m_GaussianPrecision = alpha * m_GaussianPrecision + beta * NON_INFORMATIVE_PRECISION; // We want to increase the variance of the gamma distribution while // holding its mean constant s.t. in the limit t -> inf var -> inf. @@ -830,45 +703,31 @@ void CNormalMeanPrecConjugate::propagateForwardsByTime(double time) // // Thus the mean is unchanged and variance is increased by 1 / f. - double factor = std::min(( alpha * m_GammaShape - + beta * NON_INFORMATIVE_SHAPE) / m_GammaShape, 1.0); + double factor = std::min((alpha * m_GammaShape + beta * NON_INFORMATIVE_SHAPE) / m_GammaShape, 1.0); m_GammaShape *= factor; - m_GammaRate *= factor; + m_GammaRate *= factor; this->numberSamples(this->numberSamples() * alpha); - LOG_TRACE("time = " << time - << ", alpha = " << alpha - << ", m_GaussianPrecision = " << m_GaussianPrecision - << ", m_GammaShape = " << m_GammaShape - << ", m_GammaRate = " << m_GammaRate - << ", numberSamples = " << this->numberSamples()); + LOG_TRACE("time = " << time << ", alpha = " << alpha << ", m_GaussianPrecision = " << m_GaussianPrecision << ", m_GammaShape = " + << m_GammaShape << ", m_GammaRate = " << m_GammaRate << ", numberSamples = " << this->numberSamples()); } -CNormalMeanPrecConjugate::TDoubleDoublePr -CNormalMeanPrecConjugate::marginalLikelihoodSupport() const -{ - return std::make_pair(boost::numeric::bounds::lowest(), - boost::numeric::bounds::highest()); +CNormalMeanPrecConjugate::TDoubleDoublePr CNormalMeanPrecConjugate::marginalLikelihoodSupport() const { + return std::make_pair(boost::numeric::bounds::lowest(), boost::numeric::bounds::highest()); } -double CNormalMeanPrecConjugate::marginalLikelihoodMean() const -{ +double CNormalMeanPrecConjugate::marginalLikelihoodMean() const { return this->isInteger() ? this->mean() - 0.5 : this->mean(); } -double CNormalMeanPrecConjugate::marginalLikelihoodMode(const TWeightStyleVec &/*weightStyles*/, - const TDouble4Vec &/*weights*/) const -{ +double CNormalMeanPrecConjugate::marginalLikelihoodMode(const TWeightStyleVec& /*weightStyles*/, const TDouble4Vec& /*weights*/) const { return this->marginalLikelihoodMean(); } -double CNormalMeanPrecConjugate::marginalLikelihoodVariance(const TWeightStyleVec &weightStyles, - const TDouble4Vec &weights) const -{ - if (this->isNonInformative() || m_GammaShape <= 1.0) - { +double CNormalMeanPrecConjugate::marginalLikelihoodVariance(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights) const { + if (this->isNonInformative() || m_GammaShape <= 1.0) { return boost::numeric::bounds::highest(); } @@ -882,15 +741,9 @@ double CNormalMeanPrecConjugate::marginalLikelihoodVariance(const TWeightStyleVe // first term evaluates to 1 / P and the second term 1 / p / t whence... double varianceScale = 1.0; - try - { - varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) - * maths_t::countVarianceScale(weightStyles, weights); - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to get variance scale: " << e.what()); - } + try { + varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) * maths_t::countVarianceScale(weightStyles, weights); + } catch (const std::exception& e) { LOG_ERROR("Failed to get variance scale: " << e.what()); } double a = m_GammaShape; double b = m_GammaRate; double t = m_GaussianPrecision; @@ -899,11 +752,9 @@ double CNormalMeanPrecConjugate::marginalLikelihoodVariance(const TWeightStyleVe CNormalMeanPrecConjugate::TDoubleDoublePr CNormalMeanPrecConjugate::marginalLikelihoodConfidenceInterval(double percentage, - const TWeightStyleVec &weightStyles, - const TDouble4Vec &weights) const -{ - if (this->isNonInformative()) - { + const TWeightStyleVec& weightStyles, + const TDouble4Vec& weights) const { + if (this->isNonInformative()) { return this->marginalLikelihoodSupport(); } @@ -912,76 +763,56 @@ CNormalMeanPrecConjugate::marginalLikelihoodConfidenceInterval(double percentage // We use the fact that the marginal likelihood is a t-distribution. - try - { + try { double seasonalScale = std::sqrt(maths_t::seasonalVarianceScale(weightStyles, weights)); double countVarianceScale = maths_t::countVarianceScale(weightStyles, weights); double scaledPrecision = countVarianceScale * m_GaussianPrecision; double scaledRate = countVarianceScale * m_GammaRate; - double scale = std::sqrt((scaledPrecision + 1.0) / scaledPrecision - * scaledRate / m_GammaShape); + double scale = std::sqrt((scaledPrecision + 1.0) / scaledPrecision * scaledRate / m_GammaShape); double m = this->marginalLikelihoodMean(); - if (m_GammaShape > MINIMUM_GAUSSIAN_SHAPE) - { + if (m_GammaShape > MINIMUM_GAUSSIAN_SHAPE) { boost::math::normal_distribution<> normal(m_GaussianMean, scale); - double x1 = boost::math::quantile(normal, (1.0 - percentage) / 2.0) - - (this->isInteger() ? 0.5 : 0.0); + double x1 = boost::math::quantile(normal, (1.0 - percentage) / 2.0) - (this->isInteger() ? 0.5 : 0.0); x1 = seasonalScale != 1.0 ? m + seasonalScale * (x1 - m) : x1; - double x2 = percentage > 0.0 ? - boost::math::quantile(normal, (1.0 + percentage) / 2.0) - - (this->isInteger() ? 0.5 : 0.0) : x1; + double x2 = percentage > 0.0 ? boost::math::quantile(normal, (1.0 + percentage) / 2.0) - (this->isInteger() ? 0.5 : 0.0) : x1; x2 = seasonalScale != 1.0 ? m + seasonalScale * (x2 - m) : x2; LOG_TRACE("x1 = " << x1 << ", x2 = " << x2 << ", scale = " << scale); return std::make_pair(x1, x2); } boost::math::students_t_distribution<> students(2.0 * m_GammaShape); - double x1 = m_GaussianMean - + scale * boost::math::quantile(students, (1.0 - percentage) / 2.0) - - (this->isInteger() ? 0.5 : 0.0); + double x1 = m_GaussianMean + scale * boost::math::quantile(students, (1.0 - percentage) / 2.0) - (this->isInteger() ? 0.5 : 0.0); x1 = seasonalScale != 1.0 ? m + seasonalScale * (x1 - m) : x1; - double x2 = percentage > 0.0 ? - m_GaussianMean - + scale * boost::math::quantile(students, (1.0 + percentage) / 2.0) - - (this->isInteger() ? 0.5 : 0.0) : x1; + double x2 = percentage > 0.0 ? m_GaussianMean + scale * boost::math::quantile(students, (1.0 + percentage) / 2.0) - + (this->isInteger() ? 0.5 : 0.0) + : x1; x2 = seasonalScale != 1.0 ? m + seasonalScale * (x2 - m) : x2; LOG_TRACE("x1 = " << x1 << ", x2 = " << x2 << ", scale = " << scale); return std::make_pair(x1, x2); - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to compute confidence interval: " << e.what()); - } + } catch (const std::exception& e) { LOG_ERROR("Failed to compute confidence interval: " << e.what()); } return this->marginalLikelihoodSupport(); } -maths_t::EFloatingPointErrorStatus -CNormalMeanPrecConjugate::jointLogMarginalLikelihood(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &result) const -{ +maths_t::EFloatingPointErrorStatus CNormalMeanPrecConjugate::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& result) const { result = 0.0; - if (samples.empty()) - { + if (samples.empty()) { LOG_ERROR("Can't compute likelihood for empty sample set"); return maths_t::E_FpFailed; } - if (samples.size() != weights.size()) - { - LOG_ERROR("Mismatch in samples '" - << core::CContainerPrinter::print(samples) - << "' and weights '" - << core::CContainerPrinter::print(weights) << "'"); + if (samples.size() != weights.size()) { + LOG_ERROR("Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '" + << core::CContainerPrinter::print(weights) << "'"); return maths_t::E_FpFailed; } - if (this->isNonInformative()) - { + if (this->isNonInformative()) { // The non-informative likelihood is improper and effectively // zero everywhere. We use minus max double because // log(0) = HUGE_VALUE, which causes problems for Windows. @@ -994,36 +825,21 @@ CNormalMeanPrecConjugate::jointLogMarginalLikelihood(const TWeightStyleVec &weig return maths_t::E_FpOverflowed; } - detail::CLogMarginalLikelihood logMarginalLikelihood(weightStyles, - samples, - weights, - m_GaussianMean, - m_GaussianPrecision, - m_GammaShape, - m_GammaRate, - this->marginalLikelihoodMean()); - if (this->isInteger()) - { - CIntegration::logGaussLegendre(logMarginalLikelihood, - 0.0, 1.0, - result); - } - else - { + detail::CLogMarginalLikelihood logMarginalLikelihood( + weightStyles, samples, weights, m_GaussianMean, m_GaussianPrecision, m_GammaShape, m_GammaRate, this->marginalLikelihoodMean()); + if (this->isInteger()) { + CIntegration::logGaussLegendre(logMarginalLikelihood, 0.0, 1.0, result); + } else { logMarginalLikelihood(0.0, result); } maths_t::EFloatingPointErrorStatus status = - static_cast( - logMarginalLikelihood.errorStatus() | CMathsFuncs::fpStatus(result)); - if (status & maths_t::E_FpFailed) - { + static_cast(logMarginalLikelihood.errorStatus() | CMathsFuncs::fpStatus(result)); + if (status & maths_t::E_FpFailed) { LOG_ERROR("Failed to compute log likelihood (" << this->debug() << ")"); LOG_ERROR("samples = " << core::CContainerPrinter::print(samples)); LOG_ERROR("weights = " << core::CContainerPrinter::print(weights)); - } - else if (status & maths_t::E_FpOverflowed) - { + } else if (status & maths_t::E_FpOverflowed) { LOG_TRACE("Log likelihood overflowed for (" << this->debug() << ")"); LOG_TRACE("samples = " << core::CContainerPrinter::print(samples)); LOG_TRACE("weights = " << core::CContainerPrinter::print(weights)); @@ -1031,18 +847,14 @@ CNormalMeanPrecConjugate::jointLogMarginalLikelihood(const TWeightStyleVec &weig return status; } -void CNormalMeanPrecConjugate::sampleMarginalLikelihood(std::size_t numberSamples, - TDouble1Vec &samples) const -{ +void CNormalMeanPrecConjugate::sampleMarginalLikelihood(std::size_t numberSamples, TDouble1Vec& samples) const { samples.clear(); - if (numberSamples == 0 || this->numberSamples() == 0.0) - { + if (numberSamples == 0 || this->numberSamples() == 0.0) { return; } - if (this->isNonInformative()) - { + if (this->isNonInformative()) { // We can't sample the marginal likelihood directly. This should // only happen if we've had one sample so just return that sample. samples.push_back(m_GaussianMean); @@ -1099,144 +911,94 @@ void CNormalMeanPrecConjugate::sampleMarginalLikelihood(std::size_t numberSample double lastPartialExpectation = 0.0; - if (m_GammaShape > MINIMUM_GAUSSIAN_SHAPE) - { - double variance = (m_GaussianPrecision + 1.0) - / m_GaussianPrecision - * m_GammaRate / m_GammaShape; + if (m_GammaShape > MINIMUM_GAUSSIAN_SHAPE) { + double variance = (m_GaussianPrecision + 1.0) / m_GaussianPrecision * m_GammaRate / m_GammaShape; - LOG_TRACE("mean = " << m_GaussianMean - << ", variance = " << variance - << ", numberSamples = " << numberSamples); + LOG_TRACE("mean = " << m_GaussianMean << ", variance = " << variance << ", numberSamples = " << numberSamples); - try - { + try { boost::math::normal_distribution<> normal(m_GaussianMean, std::sqrt(variance)); - for (std::size_t i = 1u; i < numberSamples; ++i) - { - double q = static_cast(i) - / static_cast(numberSamples); + for (std::size_t i = 1u; i < numberSamples; ++i) { + double q = static_cast(i) / static_cast(numberSamples); double xq = boost::math::quantile(normal, q); - double partialExpectation = m_GaussianMean * q - - variance * CTools::safePdf(normal, xq); + double partialExpectation = m_GaussianMean * q - variance * CTools::safePdf(normal, xq); - double sample = static_cast(numberSamples) - * (partialExpectation - lastPartialExpectation); + double sample = static_cast(numberSamples) * (partialExpectation - lastPartialExpectation); LOG_TRACE("sample = " << sample); // Sanity check the sample: should be in the distribution support. - if (sample >= support.first && sample <= support.second) - { + if (sample >= support.first && sample <= support.second) { samples.push_back(sample); - } - else - { - LOG_ERROR("Sample out of bounds: sample = " << sample - << ", gaussianMean = " << m_GaussianMean - << ", variance = " << variance - << ", q = " << q - << ", x(q) = " << xq); + } else { + LOG_ERROR("Sample out of bounds: sample = " << sample << ", gaussianMean = " << m_GaussianMean + << ", variance = " << variance << ", q = " << q << ", x(q) = " << xq); } lastPartialExpectation = partialExpectation; } + } catch (const std::exception& e) { + LOG_ERROR("Failed to sample: " << e.what() << ", gaussianMean = " << m_GaussianMean << ", variance = " << variance); } - catch (const std::exception &e) - { - LOG_ERROR("Failed to sample: " << e.what() - << ", gaussianMean = " << m_GaussianMean - << ", variance = " << variance); - } - } - else - { + } else { double degreesFreedom = 2.0 * m_GammaShape; - try - { + try { boost::math::students_t_distribution<> students(degreesFreedom); - double scale = std::sqrt((m_GaussianPrecision + 1.0) - / m_GaussianPrecision - * m_GammaRate / m_GammaShape); + double scale = std::sqrt((m_GaussianPrecision + 1.0) / m_GaussianPrecision * m_GammaRate / m_GammaShape); - LOG_TRACE("degreesFreedom = " << degreesFreedom - << ", mean = " << m_GaussianMean - << ", scale = " << scale - << ", numberSamples = " << numberSamples); + LOG_TRACE("degreesFreedom = " << degreesFreedom << ", mean = " << m_GaussianMean << ", scale = " << scale + << ", numberSamples = " << numberSamples); - double constant = CTools::safePdf(students, 0.0) - * scale - * degreesFreedom / (degreesFreedom - 1.0); + double constant = CTools::safePdf(students, 0.0) * scale * degreesFreedom / (degreesFreedom - 1.0); - for (std::size_t i = 1u; i < numberSamples; ++i) - { - double q = static_cast(i) - / static_cast(numberSamples); + for (std::size_t i = 1u; i < numberSamples; ++i) { + double q = static_cast(i) / static_cast(numberSamples); double xq = boost::math::quantile(students, q); double residual = xq * xq / degreesFreedom; - double partialExpectation = m_GaussianMean * q - - constant * std::exp(-(degreesFreedom - 1.0) / 2.0 - * std::log(1.0 + residual)); + double partialExpectation = + m_GaussianMean * q - constant * std::exp(-(degreesFreedom - 1.0) / 2.0 * std::log(1.0 + residual)); - double sample = static_cast(numberSamples) - * (partialExpectation - lastPartialExpectation); + double sample = static_cast(numberSamples) * (partialExpectation - lastPartialExpectation); LOG_TRACE("sample = " << sample); // Sanity check the sample: should be in the distribution support. - if (sample >= support.first && sample <= support.second) - { + if (sample >= support.first && sample <= support.second) { samples.push_back(sample); - } - else - { - LOG_ERROR("Sample out of bounds: sample = " << sample - << ", gaussianMean = " << m_GaussianMean - << ", constant = " << constant - << ", residual = " << residual - << ", q = " << q - << ", x(q) = " << xq); + } else { + LOG_ERROR("Sample out of bounds: sample = " << sample << ", gaussianMean = " << m_GaussianMean + << ", constant = " << constant << ", residual = " << residual + << ", q = " << q << ", x(q) = " << xq); } lastPartialExpectation = partialExpectation; } - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to sample: " << e.what() - << ", degreesFreedom = " << degreesFreedom); - } + } catch (const std::exception& e) { LOG_ERROR("Failed to sample: " << e.what() << ", degreesFreedom = " << degreesFreedom); } } - double sample = static_cast(numberSamples) - * (m_GaussianMean - lastPartialExpectation); + double sample = static_cast(numberSamples) * (m_GaussianMean - lastPartialExpectation); LOG_TRACE("sample = " << sample); // Sanity check the sample: should be in the distribution support. - if (sample >= support.first && sample <= support.second) - { + if (sample >= support.first && sample <= support.second) { samples.push_back(sample); - } - else - { - LOG_ERROR("Sample out of bounds: sample = " << sample - << ", gaussianMean = " << m_GaussianMean); + } else { + LOG_ERROR("Sample out of bounds: sample = " << sample << ", gaussianMean = " << m_GaussianMean); } } -bool CNormalMeanPrecConjugate::minusLogJointCdf(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound) const -{ +bool CNormalMeanPrecConjugate::minusLogJointCdf(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound) const { using TMinusLogCdf = detail::CEvaluateOnSamples; lowerBound = upperBound = 0.0; @@ -1251,18 +1013,13 @@ bool CNormalMeanPrecConjugate::minusLogJointCdf(const TWeightStyleVec &weightSty m_GammaRate, this->marginalLikelihoodMean()); - if (this->isInteger()) - { + if (this->isInteger()) { // If the data are discrete we compute the approximate expectation // w.r.t. to the hidden offset of the samples Z, which is uniform // on the interval [0,1]. double value; - if (!CIntegration::logGaussLegendre(minusLogCdf, - 0.0, 1.0, - value)) - { - LOG_ERROR("Failed computing c.d.f. for " - << core::CContainerPrinter::print(samples)); + if (!CIntegration::logGaussLegendre(minusLogCdf, 0.0, 1.0, value)) { + LOG_ERROR("Failed computing c.d.f. for " << core::CContainerPrinter::print(samples)); return false; } @@ -1271,10 +1028,8 @@ bool CNormalMeanPrecConjugate::minusLogJointCdf(const TWeightStyleVec &weightSty } double value; - if (!minusLogCdf(0.0, value)) - { - LOG_ERROR("Failed computing c.d.f. for " - << core::CContainerPrinter::print(samples)); + if (!minusLogCdf(0.0, value)) { + LOG_ERROR("Failed computing c.d.f. for " << core::CContainerPrinter::print(samples)); return false; } @@ -1282,12 +1037,11 @@ bool CNormalMeanPrecConjugate::minusLogJointCdf(const TWeightStyleVec &weightSty return true; } -bool CNormalMeanPrecConjugate::minusLogJointCdfComplement(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound) const -{ +bool CNormalMeanPrecConjugate::minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound) const { using TMinusLogCdfComplement = detail::CEvaluateOnSamples; lowerBound = upperBound = 0.0; @@ -1302,18 +1056,13 @@ bool CNormalMeanPrecConjugate::minusLogJointCdfComplement(const TWeightStyleVec m_GammaRate, this->marginalLikelihoodMean()); - if (this->isInteger()) - { + if (this->isInteger()) { // If the data are discrete we compute the approximate expectation // w.r.t. to the hidden offset of the samples Z, which is uniform // on the interval [0,1]. double value; - if (!CIntegration::logGaussLegendre(minusLogCdfComplement, - 0.0, 1.0, - value)) - { - LOG_ERROR("Failed computing c.d.f. complement for " - << core::CContainerPrinter::print(samples)); + if (!CIntegration::logGaussLegendre(minusLogCdfComplement, 0.0, 1.0, value)) { + LOG_ERROR("Failed computing c.d.f. complement for " << core::CContainerPrinter::print(samples)); return false; } @@ -1322,10 +1071,8 @@ bool CNormalMeanPrecConjugate::minusLogJointCdfComplement(const TWeightStyleVec } double value; - if (!minusLogCdfComplement(0.0, value)) - { - LOG_ERROR("Failed computing c.d.f. complement for " - << core::CContainerPrinter::print(samples)); + if (!minusLogCdfComplement(0.0, value)) { + LOG_ERROR("Failed computing c.d.f. complement for " << core::CContainerPrinter::print(samples)); return false; } @@ -1334,13 +1081,12 @@ bool CNormalMeanPrecConjugate::minusLogJointCdfComplement(const TWeightStyleVec } bool CNormalMeanPrecConjugate::probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound, - maths_t::ETail &tail) const -{ + const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound, + maths_t::ETail& tail) const { lowerBound = upperBound = 0.0; tail = maths_t::E_UndeterminedTail; @@ -1355,16 +1101,13 @@ bool CNormalMeanPrecConjugate::probabilityOfLessLikelySamples(maths_t::EProbabil m_GammaRate, this->marginalLikelihoodMean()); - if (this->isInteger()) - { + if (this->isInteger()) { // If the data are discrete we compute the approximate expectation // w.r.t. to the hidden offset of the samples Z, which is uniform // on the interval [0,1]. double value; - if (!CIntegration::gaussLegendre(probability, 0.0, 1.0, value)) - { - LOG_ERROR("Failed computing probability for " - << core::CContainerPrinter::print(samples)); + if (!CIntegration::gaussLegendre(probability, 0.0, 1.0, value)) { + LOG_ERROR("Failed computing probability for " << core::CContainerPrinter::print(samples)); return false; } @@ -1375,10 +1118,8 @@ bool CNormalMeanPrecConjugate::probabilityOfLessLikelySamples(maths_t::EProbabil } double value; - if (!probability(0.0, value)) - { - LOG_ERROR("Failed computing probability for " - << core::CContainerPrinter::print(samples)); + if (!probability(0.0, value)) { + LOG_ERROR("Failed computing probability for " << core::CContainerPrinter::print(samples)); return false; } @@ -1388,29 +1129,22 @@ bool CNormalMeanPrecConjugate::probabilityOfLessLikelySamples(maths_t::EProbabil return true; } -bool CNormalMeanPrecConjugate::isNonInformative() const -{ - return m_GammaRate == NON_INFORMATIVE_RATE - || m_GaussianPrecision == NON_INFORMATIVE_PRECISION; +bool CNormalMeanPrecConjugate::isNonInformative() const { + return m_GammaRate == NON_INFORMATIVE_RATE || m_GaussianPrecision == NON_INFORMATIVE_PRECISION; } -void CNormalMeanPrecConjugate::print(const std::string &indent, - std::string &result) const -{ +void CNormalMeanPrecConjugate::print(const std::string& indent, std::string& result) const { result += core_t::LINE_ENDING + indent + "normal "; - if (this->isNonInformative()) - { + if (this->isNonInformative()) { result += "non-informative"; return; } - result += "mean = " + core::CStringUtils::typeToStringPretty(this->marginalLikelihoodMean()) - + " sd = " + core::CStringUtils::typeToStringPretty(std::sqrt(this->marginalLikelihoodVariance())); + result += "mean = " + core::CStringUtils::typeToStringPretty(this->marginalLikelihoodMean()) + + " sd = " + core::CStringUtils::typeToStringPretty(std::sqrt(this->marginalLikelihoodVariance())); } -std::string CNormalMeanPrecConjugate::printJointDensityFunction() const -{ - if (this->isNonInformative()) - { +std::string CNormalMeanPrecConjugate::printJointDensityFunction() const { + if (this->isNonInformative()) { // The non-informative prior is improper and effectively 0 everywhere. return std::string(); } @@ -1439,8 +1173,7 @@ std::string CNormalMeanPrecConjugate::printJointDensityFunction() const std::ostringstream yCoordinates; xCoordinates << "x = ["; yCoordinates << "y = ["; - for (unsigned int i = 0u; i < POINTS; ++i, x += xIncrement, y += yIncrement) - { + for (unsigned int i = 0u; i < POINTS; ++i, x += xIncrement, y += yIncrement) { xCoordinates << x << " "; yCoordinates << y << " "; } @@ -1450,17 +1183,13 @@ std::string CNormalMeanPrecConjugate::printJointDensityFunction() const std::ostringstream pdf; pdf << "pdf = ["; x = xStart; - for (unsigned int i = 0u; i < POINTS; ++i, x += xIncrement) - { + for (unsigned int i = 0u; i < POINTS; ++i, x += xIncrement) { y = yStart; - for (unsigned int j = 0u; j < POINTS; ++j, y += yIncrement) - { + for (unsigned int j = 0u; j < POINTS; ++j, y += yIncrement) { double conditionalPrecision = m_GaussianPrecision * x; - boost::math::normal_distribution<> conditionalGaussian(m_GaussianMean, - 1.0 / std::sqrt(conditionalPrecision)); + boost::math::normal_distribution<> conditionalGaussian(m_GaussianMean, 1.0 / std::sqrt(conditionalPrecision)); - pdf << (CTools::safePdf(gamma, x) * - CTools::safePdf(conditionalGaussian, y)) << " "; + pdf << (CTools::safePdf(gamma, x) * CTools::safePdf(conditionalGaussian, y)) << " "; } pdf << core_t::LINE_ENDING; } @@ -1469,8 +1198,7 @@ std::string CNormalMeanPrecConjugate::printJointDensityFunction() const return xCoordinates.str() + yCoordinates.str() + pdf.str(); } -uint64_t CNormalMeanPrecConjugate::checksum(uint64_t seed) const -{ +uint64_t CNormalMeanPrecConjugate::checksum(uint64_t seed) const { seed = this->CPrior::checksum(seed); seed = CChecksum::calculate(seed, m_GaussianMean); seed = CChecksum::calculate(seed, m_GaussianPrecision); @@ -1478,23 +1206,19 @@ uint64_t CNormalMeanPrecConjugate::checksum(uint64_t seed) const return CChecksum::calculate(seed, m_GammaRate); } -void CNormalMeanPrecConjugate::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CNormalMeanPrecConjugate::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CNormalMeanPrecConjugate"); } -std::size_t CNormalMeanPrecConjugate::memoryUsage() const -{ +std::size_t CNormalMeanPrecConjugate::memoryUsage() const { return 0; } -std::size_t CNormalMeanPrecConjugate::staticSize() const -{ +std::size_t CNormalMeanPrecConjugate::staticSize() const { return sizeof(*this); } -void CNormalMeanPrecConjugate::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CNormalMeanPrecConjugate::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(DECAY_RATE_TAG, this->decayRate(), core::CIEEE754::E_SinglePrecision); inserter.insertValue(GAUSSIAN_MEAN_TAG, m_GaussianMean, core::CIEEE754::E_SinglePrecision); inserter.insertValue(GAUSSIAN_PRECISION_TAG, m_GaussianPrecision, core::CIEEE754::E_SinglePrecision); @@ -1503,28 +1227,21 @@ void CNormalMeanPrecConjugate::acceptPersistInserter(core::CStatePersistInserter inserter.insertValue(NUMBER_SAMPLES_TAG, this->numberSamples(), core::CIEEE754::E_SinglePrecision); } -double CNormalMeanPrecConjugate::mean() const -{ +double CNormalMeanPrecConjugate::mean() const { return m_GaussianMean; } -double CNormalMeanPrecConjugate::precision() const -{ - if (this->isNonInformative()) - { +double CNormalMeanPrecConjugate::precision() const { + if (this->isNonInformative()) { return 0.0; } return m_GammaShape / m_GammaRate; } -CNormalMeanPrecConjugate::TDoubleDoublePr -CNormalMeanPrecConjugate::confidenceIntervalMean(double percentage) const -{ - if (this->isNonInformative()) - { - return std::make_pair(boost::numeric::bounds::lowest(), - boost::numeric::bounds::highest()); +CNormalMeanPrecConjugate::TDoubleDoublePr CNormalMeanPrecConjugate::confidenceIntervalMean(double percentage) const { + if (this->isNonInformative()) { + return std::make_pair(boost::numeric::bounds::lowest(), boost::numeric::bounds::highest()); } // Compute the symmetric confidence interval around the median of the @@ -1558,13 +1275,9 @@ CNormalMeanPrecConjugate::confidenceIntervalMean(double percentage) const return std::make_pair(xLower, xUpper); } -CNormalMeanPrecConjugate::TDoubleDoublePr -CNormalMeanPrecConjugate::confidenceIntervalPrecision(double percentage) const -{ - if (this->isNonInformative()) - { - return std::make_pair(boost::numeric::bounds::lowest(), - boost::numeric::bounds::highest()); +CNormalMeanPrecConjugate::TDoubleDoublePr CNormalMeanPrecConjugate::confidenceIntervalPrecision(double percentage) const { + if (this->isNonInformative()) { + return std::make_pair(boost::numeric::bounds::lowest(), boost::numeric::bounds::highest()); } percentage /= 100.0; @@ -1574,39 +1287,25 @@ CNormalMeanPrecConjugate::confidenceIntervalPrecision(double percentage) const // The marginal prior distribution for the precision is gamma. boost::math::gamma_distribution<> gamma(m_GammaShape, 1.0 / m_GammaRate); - return std::make_pair(boost::math::quantile(gamma, lowerPercentile), - boost::math::quantile(gamma, upperPercentile)); + return std::make_pair(boost::math::quantile(gamma, lowerPercentile), boost::math::quantile(gamma, upperPercentile)); } -bool CNormalMeanPrecConjugate::equalTolerance(const CNormalMeanPrecConjugate &rhs, - const TEqualWithTolerance &equal) const -{ - LOG_DEBUG(m_GaussianMean << " " << rhs.m_GaussianMean << ", " - << m_GaussianPrecision << " " << rhs.m_GaussianPrecision << ", " - << m_GammaShape << " " << rhs.m_GammaShape << ", " - << m_GammaRate << " " << rhs.m_GammaRate); - - return equal(m_GaussianMean, rhs.m_GaussianMean) - && equal(m_GaussianPrecision, rhs.m_GaussianPrecision) - && equal(m_GammaShape, rhs.m_GammaShape) - && equal(m_GammaRate, rhs.m_GammaRate); +bool CNormalMeanPrecConjugate::equalTolerance(const CNormalMeanPrecConjugate& rhs, const TEqualWithTolerance& equal) const { + LOG_DEBUG(m_GaussianMean << " " << rhs.m_GaussianMean << ", " << m_GaussianPrecision << " " << rhs.m_GaussianPrecision << ", " + << m_GammaShape << " " << rhs.m_GammaShape << ", " << m_GammaRate << " " << rhs.m_GammaRate); + + return equal(m_GaussianMean, rhs.m_GaussianMean) && equal(m_GaussianPrecision, rhs.m_GaussianPrecision) && + equal(m_GammaShape, rhs.m_GammaShape) && equal(m_GammaRate, rhs.m_GammaRate); } -bool CNormalMeanPrecConjugate::isBad() const -{ - return !CMathsFuncs::isFinite(m_GaussianMean) - || !CMathsFuncs::isFinite(m_GaussianPrecision) - || !CMathsFuncs::isFinite(m_GammaShape) - || !CMathsFuncs::isFinite(m_GammaRate); +bool CNormalMeanPrecConjugate::isBad() const { + return !CMathsFuncs::isFinite(m_GaussianMean) || !CMathsFuncs::isFinite(m_GaussianPrecision) || !CMathsFuncs::isFinite(m_GammaShape) || + !CMathsFuncs::isFinite(m_GammaRate); } -std::string CNormalMeanPrecConjugate::debug() const -{ +std::string CNormalMeanPrecConjugate::debug() const { std::ostringstream result; - result << std::scientific << std::setprecision(15) - << m_GaussianMean << " " - << m_GaussianPrecision << " " - << m_GammaShape << " " + result << std::scientific << std::setprecision(15) << m_GaussianMean << " " << m_GaussianPrecision << " " << m_GammaShape << " " << m_GammaRate; return result.str(); } @@ -1615,6 +1314,5 @@ const double CNormalMeanPrecConjugate::NON_INFORMATIVE_MEAN = 0.0; const double CNormalMeanPrecConjugate::NON_INFORMATIVE_PRECISION = 0.0; const double CNormalMeanPrecConjugate::NON_INFORMATIVE_SHAPE = 1.0; const double CNormalMeanPrecConjugate::NON_INFORMATIVE_RATE = 0.0; - } } diff --git a/lib/maths/COneOfNPrior.cc b/lib/maths/COneOfNPrior.cc index b2dc3b72a4..b2a14c25ca 100644 --- a/lib/maths/COneOfNPrior.cc +++ b/lib/maths/COneOfNPrior.cc @@ -16,10 +16,10 @@ #include #include #include -#include #include #include #include +#include #include #include @@ -33,22 +33,18 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { -namespace -{ +namespace { using TBool5Vec = core::CSmallVector; using TDouble5Vec = core::CSmallVector; using TMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; //! Compute the log of \p n. -double logn(std::size_t n) -{ - static const double LOG_N[] = { 0.0, std::log(2.0), std::log(3.0), std::log(4.0), std::log(5.0) }; +double logn(std::size_t n) { + static const double LOG_N[] = {0.0, std::log(2.0), std::log(3.0), std::log(4.0), std::log(5.0)}; return n < boost::size(LOG_N) ? LOG_N[n - 1] : std::log(static_cast(n)); } @@ -74,46 +70,31 @@ const std::string PRIOR_TAG("b"); const std::string EMPTY_STRING; //! Persist state for a models by passing information to \p inserter. -void modelAcceptPersistInserter(const CModelWeight &weight, - const CPrior &prior, - core::CStatePersistInserter &inserter) -{ +void modelAcceptPersistInserter(const CModelWeight& weight, const CPrior& prior, core::CStatePersistInserter& inserter) { inserter.insertLevel(WEIGHT_TAG, boost::bind(&CModelWeight::acceptPersistInserter, &weight, _1)); inserter.insertLevel(PRIOR_TAG, boost::bind(CPriorStateSerialiser(), boost::cref(prior), _1)); } - } //////// COneOfNPrior Implementation //////// -COneOfNPrior::COneOfNPrior(const TPriorPtrVec &models, - maths_t::EDataType dataType, - double decayRate) : - CPrior(dataType, decayRate) -{ - if (models.empty()) - { +COneOfNPrior::COneOfNPrior(const TPriorPtrVec& models, maths_t::EDataType dataType, double decayRate) : CPrior(dataType, decayRate) { + if (models.empty()) { LOG_ERROR("Can't initialize one-of-n with no models!"); return; } - // Create a new model vector using uniform weights. m_Models.reserve(models.size()); CModelWeight weight(1.0); - for (const auto &model : models) - { + for (const auto& model : models) { m_Models.emplace_back(weight, model); } } -COneOfNPrior::COneOfNPrior(const TDoublePriorPtrPrVec &models, - maths_t::EDataType dataType, - double decayRate/*= 0.0*/) : - CPrior(dataType, decayRate) -{ - if (models.empty()) - { +COneOfNPrior::COneOfNPrior(const TDoublePriorPtrPrVec& models, maths_t::EDataType dataType, double decayRate /*= 0.0*/) + : CPrior(dataType, decayRate) { + if (models.empty()) { LOG_ERROR("Can't initialize mixed model with no models!"); return; } @@ -122,103 +103,79 @@ COneOfNPrior::COneOfNPrior(const TDoublePriorPtrPrVec &models, // Create a new model vector using the specified models and their associated weights. m_Models.reserve(models.size()); - for (const auto &model : models) - { + for (const auto& model : models) { m_Models.emplace_back(CModelWeight(model.first), model.second); } } -COneOfNPrior::COneOfNPrior(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser) : - CPrior(params.s_DataType, params.s_DecayRate) -{ - traverser.traverseSubLevel(boost::bind(&COneOfNPrior::acceptRestoreTraverser, - this, boost::cref(params), _1)); +COneOfNPrior::COneOfNPrior(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) + : CPrior(params.s_DataType, params.s_DecayRate) { + traverser.traverseSubLevel(boost::bind(&COneOfNPrior::acceptRestoreTraverser, this, boost::cref(params), _1)); } -bool COneOfNPrior::acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name = traverser.name(); - RESTORE_SETUP_TEARDOWN(DECAY_RATE_TAG, - double decayRate, - core::CStringUtils::stringToType(traverser.value(), decayRate), - this->decayRate(decayRate)) - RESTORE(MODEL_TAG, traverser.traverseSubLevel(boost::bind(&COneOfNPrior::modelAcceptRestoreTraverser, - this, boost::cref(params), _1))) +bool COneOfNPrior::acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); + RESTORE_SETUP_TEARDOWN( + DECAY_RATE_TAG, double decayRate, core::CStringUtils::stringToType(traverser.value(), decayRate), this->decayRate(decayRate)) + RESTORE(MODEL_TAG, + traverser.traverseSubLevel(boost::bind(&COneOfNPrior::modelAcceptRestoreTraverser, this, boost::cref(params), _1))) RESTORE_SETUP_TEARDOWN(NUMBER_SAMPLES_TAG, double numberSamples, core::CStringUtils::stringToType(traverser.value(), numberSamples), this->numberSamples(numberSamples)) - } - while (traverser.next()); + } while (traverser.next()); return true; } -COneOfNPrior::COneOfNPrior(const COneOfNPrior &other) : - CPrior(other.dataType(), other.decayRate()) -{ +COneOfNPrior::COneOfNPrior(const COneOfNPrior& other) : CPrior(other.dataType(), other.decayRate()) { // Clone all the models up front so we can implement strong exception safety. m_Models.reserve(other.m_Models.size()); - for (const auto &model : other.m_Models) - { + for (const auto& model : other.m_Models) { m_Models.emplace_back(model.first, TPriorPtr(model.second->clone())); } this->CPrior::addSamples(other.numberSamples()); } -COneOfNPrior &COneOfNPrior::operator=(const COneOfNPrior &rhs) -{ - if (this != &rhs) - { +COneOfNPrior& COneOfNPrior::operator=(const COneOfNPrior& rhs) { + if (this != &rhs) { COneOfNPrior tmp(rhs); this->swap(tmp); } return *this; } -void COneOfNPrior::swap(COneOfNPrior &other) -{ +void COneOfNPrior::swap(COneOfNPrior& other) { this->CPrior::swap(other); m_Models.swap(other.m_Models); } -COneOfNPrior::EPrior COneOfNPrior::type() const -{ +COneOfNPrior::EPrior COneOfNPrior::type() const { return E_OneOfN; } -COneOfNPrior *COneOfNPrior::clone() const -{ +COneOfNPrior* COneOfNPrior::clone() const { return new COneOfNPrior(*this); } -void COneOfNPrior::dataType(maths_t::EDataType value) -{ +void COneOfNPrior::dataType(maths_t::EDataType value) { this->CPrior::dataType(value); - for (auto &model : m_Models) - { + for (auto& model : m_Models) { model.second->dataType(value); } } -void COneOfNPrior::decayRate(double value) -{ +void COneOfNPrior::decayRate(double value) { this->CPrior::decayRate(value); - for (auto &model : m_Models) - { + for (auto& model : m_Models) { model.second->decayRate(this->decayRate()); } } -void COneOfNPrior::setToNonInformative(double offset, double decayRate) -{ - for (auto &model : m_Models) - { +void COneOfNPrior::setToNonInformative(double offset, double decayRate) { + for (auto& model : m_Models) { model.first.age(0.0); model.second->setToNonInformative(offset, decayRate); } @@ -226,60 +183,45 @@ void COneOfNPrior::setToNonInformative(double offset, double decayRate) this->numberSamples(0.0); } -void COneOfNPrior::removeModels(CModelFilter &filter) -{ +void COneOfNPrior::removeModels(CModelFilter& filter) { CScopeCanonicalizeWeights canonicalize(m_Models); std::size_t last = 0u; - for (std::size_t i = 0u; i < m_Models.size(); ++i) - { - if (last != i) - { + for (std::size_t i = 0u; i < m_Models.size(); ++i) { + if (last != i) { std::swap(m_Models[last], m_Models[i]); } - if (!filter(m_Models[last].second->type())) - { + if (!filter(m_Models[last].second->type())) { ++last; } } m_Models.erase(m_Models.begin() + last, m_Models.end()); } -bool COneOfNPrior::needsOffset() const -{ - for (const auto &model : m_Models) - { - if (model.second->needsOffset()) - { +bool COneOfNPrior::needsOffset() const { + for (const auto& model : m_Models) { + if (model.second->needsOffset()) { return true; } } return false; } -double COneOfNPrior::adjustOffset(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights) -{ +double COneOfNPrior::adjustOffset(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights) { TMeanAccumulator result; TDouble5Vec penalties; - for (auto &model : m_Models) - { + for (auto& model : m_Models) { double penalty = model.second->adjustOffset(weightStyles, samples, weights); penalties.push_back(penalty); result.add(penalty, model.first); } - if (CBasicStatistics::mean(result) != 0.0) - { + if (CBasicStatistics::mean(result) != 0.0) { CScopeCanonicalizeWeights canonicalize(m_Models); - for (std::size_t i = 0u; i < penalties.size(); ++i) - { - if ( m_Models[i].second->participatesInModelSelection() - && CMathsFuncs::isFinite(penalties)) - { - CModelWeight &weight = m_Models[i].first; + for (std::size_t i = 0u; i < penalties.size(); ++i) { + if (m_Models[i].second->participatesInModelSelection() && CMathsFuncs::isFinite(penalties)) { + CModelWeight& weight = m_Models[i].first; weight.logWeight(weight.logWeight() + penalties[i]); } } @@ -288,31 +230,22 @@ double COneOfNPrior::adjustOffset(const TWeightStyleVec &weightStyles, return CBasicStatistics::mean(result); } -double COneOfNPrior::offset() const -{ +double COneOfNPrior::offset() const { double offset = 0.0; - for (const auto &model : m_Models) - { + for (const auto& model : m_Models) { offset = std::max(offset, model.second->offset()); } return offset; } -void COneOfNPrior::addSamples(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights) -{ - if (samples.empty()) - { +void COneOfNPrior::addSamples(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights) { + if (samples.empty()) { return; } - if (samples.size() != weights.size()) - { - LOG_ERROR("Mismatch in samples '" - << core::CContainerPrinter::print(samples) - << "' and weights '" - << core::CContainerPrinter::print(weights) << "'"); + if (samples.size() != weights.size()) { + LOG_ERROR("Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '" + << core::CContainerPrinter::print(weights) << "'"); return; } @@ -380,31 +313,25 @@ void COneOfNPrior::addSamples(const TWeightStyleVec &weightStyles, TDouble5Vec logLikelihoods; TMaxAccumulator maxLogLikelihood; TBool5Vec used, uses; - for (auto &model : m_Models) - { + for (auto& model : m_Models) { bool use = model.second->participatesInModelSelection(); // Update the weights with the marginal likelihoods. double logLikelihood = 0.0; - maths_t::EFloatingPointErrorStatus status = use ? - model.second->jointLogMarginalLikelihood(weightStyles, samples, weights, logLikelihood) : - maths_t::E_FpOverflowed; + maths_t::EFloatingPointErrorStatus status = + use ? model.second->jointLogMarginalLikelihood(weightStyles, samples, weights, logLikelihood) : maths_t::E_FpOverflowed; - if (status & maths_t::E_FpFailed) - { + if (status & maths_t::E_FpFailed) { LOG_ERROR("Failed to compute log-likelihood"); LOG_ERROR("samples = " << core::CContainerPrinter::print(samples)); return; } - if (!(status & maths_t::E_FpOverflowed)) - { + if (!(status & maths_t::E_FpOverflowed)) { logLikelihood += model.second->unmarginalizedParameters() * penalty; logLikelihoods.push_back(logLikelihood); maxLogLikelihood.add(logLikelihood); - } - else - { + } else { logLikelihoods.push_back(MINUS_INF); } @@ -415,29 +342,22 @@ void COneOfNPrior::addSamples(const TWeightStyleVec &weightStyles, uses.push_back(model.second->participatesInModelSelection()); } - for (std::size_t i = 0; i < m_Models.size(); ++i) - { - if (!uses[i]) - { - CModelWeight &weight = m_Models[i].first; + for (std::size_t i = 0; i < m_Models.size(); ++i) { + if (!uses[i]) { + CModelWeight& weight = m_Models[i].first; weight.logWeight(MINUS_INF); } } - if (!isNonInformative && maxLogLikelihood.count() > 0) - { + if (!isNonInformative && maxLogLikelihood.count() > 0) { LOG_TRACE("logLikelihoods = " << core::CContainerPrinter::print(logLikelihoods)); double n = 0.0; - try - { - for (const auto &weight : weights) - { + try { + for (const auto& weight : weights) { n += maths_t::count(weightStyles, weight); } - } - catch (const std::exception &e) - { + } catch (const std::exception& e) { LOG_ERROR("Failed to add samples: " << e.what()); return; } @@ -445,30 +365,24 @@ void COneOfNPrior::addSamples(const TWeightStyleVec &weightStyles, // The idea here is to limit the amount which extreme samples // affect model selection, particularly early on in the model // life-cycle. - double minLogLikelihood = maxLogLikelihood[0] - - n * std::min(maxModelPenalty(this->numberSamples()), 100.0); + double minLogLikelihood = maxLogLikelihood[0] - n * std::min(maxModelPenalty(this->numberSamples()), 100.0); TMaxAccumulator maxLogWeight; - for (std::size_t i = 0; i < m_Models.size(); ++i) - { - if (used[i]) - { - CModelWeight &weight = m_Models[i].first; + for (std::size_t i = 0; i < m_Models.size(); ++i) { + if (used[i]) { + CModelWeight& weight = m_Models[i].first; weight.addLogFactor(std::max(logLikelihoods[i], minLogLikelihood)); maxLogWeight.add(weight.logWeight()); } } - for (std::size_t i = 0u; i < m_Models.size(); ++i) - { - if (!used[i] && uses[i]) - { + for (std::size_t i = 0u; i < m_Models.size(); ++i) { + if (!used[i] && uses[i]) { m_Models[i].first.logWeight(maxLogWeight[0] + LOG_INITIAL_WEIGHT); } } } - if (this->badWeights()) - { + if (this->badWeights()) { LOG_ERROR("Update failed (" << this->debugWeights() << ")"); LOG_ERROR("samples = " << core::CContainerPrinter::print(samples)); LOG_ERROR("weights = " << core::CContainerPrinter::print(weights)); @@ -476,10 +390,8 @@ void COneOfNPrior::addSamples(const TWeightStyleVec &weightStyles, } } -void COneOfNPrior::propagateForwardsByTime(double time) -{ - if (!CMathsFuncs::isFinite(time) || time < 0.0) - { +void COneOfNPrior::propagateForwardsByTime(double time) { + if (!CMathsFuncs::isFinite(time) || time < 0.0) { LOG_ERROR("Bad propagation time " << time); return; } @@ -488,8 +400,7 @@ void COneOfNPrior::propagateForwardsByTime(double time) double alpha = std::exp(-this->decayRate() * time); - for (auto &model : m_Models) - { + for (auto& model : m_Models) { model.first.age(alpha); model.second->propagateForwardsByTime(time); } @@ -499,17 +410,14 @@ void COneOfNPrior::propagateForwardsByTime(double time) LOG_TRACE("numberSamples = " << this->numberSamples()); } -COneOfNPrior::TDoubleDoublePr COneOfNPrior::marginalLikelihoodSupport() const -{ +COneOfNPrior::TDoubleDoublePr COneOfNPrior::marginalLikelihoodSupport() const { TDoubleDoublePr result(MINUS_INF, INF); // We define this is as the intersection of the component model supports. - for (const auto &model : m_Models) - { - if (model.second->participatesInModelSelection()) - { + for (const auto& model : m_Models) { + if (model.second->participatesInModelSelection()) { TDoubleDoublePr modelSupport = model.second->marginalLikelihoodSupport(); - result.first = std::max(result.first, modelSupport.first); + result.first = std::max(result.first, modelSupport.first); result.second = std::min(result.second, modelSupport.second); } } @@ -517,10 +425,8 @@ COneOfNPrior::TDoubleDoublePr COneOfNPrior::marginalLikelihoodSupport() const return result; } -double COneOfNPrior::marginalLikelihoodMean() const -{ - if (this->isNonInformative()) - { +double COneOfNPrior::marginalLikelihoodMean() const { + if (this->isNonInformative()) { return this->medianModelMean(); } @@ -532,11 +438,9 @@ double COneOfNPrior::marginalLikelihoodMean() const double result = 0.0; double Z = 0.0; - for (const auto &model : m_Models) - { + for (const auto& model : m_Models) { double wi = model.first; - if (wi > MINIMUM_SIGNIFICANT_WEIGHT) - { + if (wi > MINIMUM_SIGNIFICANT_WEIGHT) { result += wi * model.second->marginalLikelihoodMean(); Z += wi; } @@ -544,10 +448,8 @@ double COneOfNPrior::marginalLikelihoodMean() const return result / Z; } -double COneOfNPrior::nearestMarginalLikelihoodMean(double value) const -{ - if (this->isNonInformative()) - { +double COneOfNPrior::nearestMarginalLikelihoodMean(double value) const { + if (this->isNonInformative()) { return this->medianModelMean(); } @@ -555,11 +457,9 @@ double COneOfNPrior::nearestMarginalLikelihoodMean(double value) const double result = 0.0; double Z = 0.0; - for (const auto &model : m_Models) - { + for (const auto& model : m_Models) { double wi = model.first; - if (wi > MINIMUM_SIGNIFICANT_WEIGHT) - { + if (wi > MINIMUM_SIGNIFICANT_WEIGHT) { result += wi * model.second->nearestMarginalLikelihoodMean(value); Z += wi; } @@ -567,9 +467,7 @@ double COneOfNPrior::nearestMarginalLikelihoodMean(double value) const return result / Z; } -double COneOfNPrior::marginalLikelihoodMode(const TWeightStyleVec &weightStyles, - const TDouble4Vec &weights) const -{ +double COneOfNPrior::marginalLikelihoodMode(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights) const { // We approximate this as the weighted average of the component // model modes. @@ -579,10 +477,8 @@ double COneOfNPrior::marginalLikelihoodMode(const TWeightStyleVec &weightStyles, TDouble4Vec1Vec weight(1, weights); TMeanAccumulator mode; - for (const auto &model : m_Models) - { - if (model.second->participatesInModelSelection()) - { + for (const auto& model : m_Models) { + if (model.second->participatesInModelSelection()) { double wi = model.first; double mi = model.second->marginalLikelihoodMode(weightStyles, weights); double logLikelihood; @@ -597,11 +493,8 @@ double COneOfNPrior::marginalLikelihoodMode(const TWeightStyleVec &weightStyles, return CTools::truncate(result, support.first, support.second); } -double COneOfNPrior::marginalLikelihoodVariance(const TWeightStyleVec &weightStyles, - const TDouble4Vec &weights) const -{ - if (this->isNonInformative()) - { +double COneOfNPrior::marginalLikelihoodVariance(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights) const { + if (this->isNonInformative()) { return INF; } @@ -613,11 +506,9 @@ double COneOfNPrior::marginalLikelihoodVariance(const TWeightStyleVec &weightSty double result = 0.0; double Z = 0.0; - for (const auto &model : m_Models) - { + for (const auto& model : m_Models) { double wi = model.first; - if (wi > MINIMUM_SIGNIFICANT_WEIGHT) - { + if (wi > MINIMUM_SIGNIFICANT_WEIGHT) { result += wi * model.second->marginalLikelihoodVariance(weightStyles, weights); Z += wi; } @@ -625,11 +516,9 @@ double COneOfNPrior::marginalLikelihoodVariance(const TWeightStyleVec &weightSty return result / Z; } -COneOfNPrior::TDoubleDoublePr -COneOfNPrior::marginalLikelihoodConfidenceInterval(double percentage, - const TWeightStyleVec &weightStyles, - const TDouble4Vec &weights) const -{ +COneOfNPrior::TDoubleDoublePr COneOfNPrior::marginalLikelihoodConfidenceInterval(double percentage, + const TWeightStyleVec& weightStyles, + const TDouble4Vec& weights) const { // We approximate this as the weighted sum of the component model // intervals. To compute the weights we expand all component model // marginal likelihoods about a reasonable estimate for the true @@ -652,14 +541,11 @@ COneOfNPrior::marginalLikelihoodConfidenceInterval(double percentage, TMeanAccumulator x1, x2; - for (const auto &model : m_Models) - { + for (const auto& model : m_Models) { double weight = model.first; - if (weight >= MAXIMUM_RELATIVE_ERROR) - { - TDoubleDoublePr interval = - model.second->marginalLikelihoodConfidenceInterval(percentage, weightStyles, weights); - x1.add(interval.first, weight); + if (weight >= MAXIMUM_RELATIVE_ERROR) { + TDoubleDoublePr interval = model.second->marginalLikelihoodConfidenceInterval(percentage, weightStyles, weights); + x1.add(interval.first, weight); x2.add(interval.second, weight); } } @@ -668,26 +554,20 @@ COneOfNPrior::marginalLikelihoodConfidenceInterval(double percentage, return std::make_pair(CBasicStatistics::mean(x1), CBasicStatistics::mean(x2)); } -maths_t::EFloatingPointErrorStatus -COneOfNPrior::jointLogMarginalLikelihood(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &result) const -{ +maths_t::EFloatingPointErrorStatus COneOfNPrior::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& result) const { result = 0.0; - if (samples.empty()) - { + if (samples.empty()) { LOG_ERROR("Can't compute likelihood for empty sample set"); return maths_t::E_FpFailed; } - if (samples.size() != weights.size()) - { - LOG_ERROR("Mismatch in samples '" - << core::CContainerPrinter::print(samples) - << "' and weights '" - << core::CContainerPrinter::print(weights) << "'"); + if (samples.size() != weights.size()) { + LOG_ERROR("Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '" + << core::CContainerPrinter::print(weights) << "'"); return maths_t::E_FpFailed; } @@ -706,19 +586,15 @@ COneOfNPrior::jointLogMarginalLikelihood(const TWeightStyleVec &weightStyles, double Z = 0.0; TMaxAccumulator maxLogLikelihood; - for (const auto &model : m_Models) - { - if (model.second->participatesInModelSelection()) - { + for (const auto& model : m_Models) { + if (model.second->participatesInModelSelection()) { double logLikelihood; maths_t::EFloatingPointErrorStatus status = - model.second->jointLogMarginalLikelihood(weightStyles, samples, weights, logLikelihood); - if (status & maths_t::E_FpFailed) - { + model.second->jointLogMarginalLikelihood(weightStyles, samples, weights, logLikelihood); + if (status & maths_t::E_FpFailed) { return status; } - if (!(status & maths_t::E_FpOverflowed)) - { + if (!(status & maths_t::E_FpOverflowed)) { logLikelihood += model.first.logWeight(); logLikelihoods.push_back(logLikelihood); maxLogLikelihood.add(logLikelihood); @@ -727,8 +603,7 @@ COneOfNPrior::jointLogMarginalLikelihood(const TWeightStyleVec &weightStyles, } } - if (maxLogLikelihood.count() == 0) - { + if (maxLogLikelihood.count() == 0) { // Technically, the marginal likelihood is zero here so the // log would be infinite. We use minus max double because // log(0) = HUGE_VALUE, which causes problems for Windows. @@ -741,24 +616,20 @@ COneOfNPrior::jointLogMarginalLikelihood(const TWeightStyleVec &weightStyles, return maths_t::E_FpOverflowed; } - for (auto logLikelihood : logLikelihoods) - { + for (auto logLikelihood : logLikelihoods) { result += std::exp(logLikelihood - maxLogLikelihood[0]); } result = maxLogLikelihood[0] + CTools::fastLog(result / Z); maths_t::EFloatingPointErrorStatus status = CMathsFuncs::fpStatus(result); - if (status & maths_t::E_FpFailed) - { + if (status & maths_t::E_FpFailed) { LOG_ERROR("Failed to compute log likelihood (" << this->debugWeights() << ")"); LOG_ERROR("samples = " << core::CContainerPrinter::print(samples)); LOG_ERROR("weights = " << core::CContainerPrinter::print(weights)); LOG_ERROR("logLikelihoods = " << core::CContainerPrinter::print(logLikelihoods)); LOG_ERROR("maxLogLikelihood = " << maxLogLikelihood[0]); - } - else if (status & maths_t::E_FpOverflowed) - { + } else if (status & maths_t::E_FpOverflowed) { LOG_ERROR("Log likelihood overflowed for (" << this->debugWeights() << ")"); LOG_TRACE("likelihoods = " << core::CContainerPrinter::print(logLikelihoods)); LOG_TRACE("samples = " << core::CContainerPrinter::print(samples)); @@ -767,77 +638,63 @@ COneOfNPrior::jointLogMarginalLikelihood(const TWeightStyleVec &weightStyles, return status; } -void COneOfNPrior::sampleMarginalLikelihood(std::size_t numberSamples, - TDouble1Vec &samples) const -{ +void COneOfNPrior::sampleMarginalLikelihood(std::size_t numberSamples, TDouble1Vec& samples) const { samples.clear(); - if (numberSamples == 0 || this->isNonInformative()) - { + if (numberSamples == 0 || this->isNonInformative()) { return; } TDouble5Vec weights; double Z = 0.0; - for (const auto &model : m_Models) - { + for (const auto& model : m_Models) { weights.push_back(model.first); Z += model.first; } - for (auto &weight : weights) - { + for (auto& weight : weights) { weight /= Z; } CSampling::TSizeVec sampling; CSampling::weightedSample(numberSamples, weights, sampling); - LOG_TRACE("weights = " << core::CContainerPrinter::print(weights) - << ", sampling = " << core::CContainerPrinter::print(sampling)); + LOG_TRACE("weights = " << core::CContainerPrinter::print(weights) << ", sampling = " << core::CContainerPrinter::print(sampling)); - if (sampling.size() != m_Models.size()) - { + if (sampling.size() != m_Models.size()) { LOG_ERROR("Failed to sample marginal likelihood"); return; } TDoubleDoublePr support = this->marginalLikelihoodSupport(); - support.first = CTools::shiftRight(support.first); + support.first = CTools::shiftRight(support.first); support.second = CTools::shiftLeft(support.second); samples.reserve(numberSamples); TDouble1Vec modelSamples; - for (std::size_t i = 0u; i < m_Models.size(); ++i) - { + for (std::size_t i = 0u; i < m_Models.size(); ++i) { modelSamples.clear(); m_Models[i].second->sampleMarginalLikelihood(sampling[i], modelSamples); - for (auto sample : modelSamples) - { + for (auto sample : modelSamples) { samples.push_back(CTools::truncate(sample, support.first, support.second)); } } - LOG_TRACE("samples = "<< core::CContainerPrinter::print(samples)); + LOG_TRACE("samples = " << core::CContainerPrinter::print(samples)); } bool COneOfNPrior::minusLogJointCdfImpl(bool complement, - const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound) const -{ + const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound) const { lowerBound = upperBound = 0.0; - if (samples.empty()) - { - LOG_ERROR("Can't compute c.d.f. " - << (complement ? "complement " : "") << "for empty sample set"); + if (samples.empty()) { + LOG_ERROR("Can't compute c.d.f. " << (complement ? "complement " : "") << "for empty sample set"); return false; } - if (this->isNonInformative()) - { - lowerBound = upperBound = -std::log(complement ? 1.0 - CTools::IMPROPER_CDF : - CTools::IMPROPER_CDF); + if (this->isNonInformative()) { + lowerBound = upperBound = -std::log(complement ? 1.0 - CTools::IMPROPER_CDF : CTools::IMPROPER_CDF); return true; } @@ -860,20 +717,16 @@ bool COneOfNPrior::minusLogJointCdfImpl(bool complement, TMaxAccumulator maxLogLowerBound; TMaxAccumulator maxLogUpperBound; double logMaximumRemainder = MINUS_INF; - for (std::size_t i = 0u, n = logWeights.size(); i < n; ++i) - { + for (std::size_t i = 0u, n = logWeights.size(); i < n; ++i) { double wi = logWeights[i].first; - const CPrior &model = *m_Models[logWeights[i].second].second; + const CPrior& model = *m_Models[logWeights[i].second].second; double li = 0.0; double ui = 0.0; - if (complement && !model.minusLogJointCdfComplement(weightStyles, samples, weights, li, ui)) - { + if (complement && !model.minusLogJointCdfComplement(weightStyles, samples, weights, li, ui)) { LOG_ERROR("Failed computing c.d.f. complement for " << core::CContainerPrinter::print(samples)); return false; - } - else if (!complement && !model.minusLogJointCdf(weightStyles, samples, weights, li, ui)) - { + } else if (!complement && !model.minusLogJointCdf(weightStyles, samples, weights, li, ui)) { LOG_ERROR("Failed computing c.d.f. for " << core::CContainerPrinter::print(samples)); return false; } @@ -886,86 +739,80 @@ bool COneOfNPrior::minusLogJointCdfImpl(bool complement, maxLogUpperBound.add(ui); // Check if we can exit early with reasonable precision. - if (i+1 < n) - { - logMaximumRemainder = logn(n-i-1) + logWeights[i+1].first; - if ( logMaximumRemainder < maxLogLowerBound[0] + LOG_MAXIMUM_RELATIVE_ERROR - && logMaximumRemainder < maxLogUpperBound[0] + LOG_MAXIMUM_RELATIVE_ERROR) - { + if (i + 1 < n) { + logMaximumRemainder = logn(n - i - 1) + logWeights[i + 1].first; + if (logMaximumRemainder < maxLogLowerBound[0] + LOG_MAXIMUM_RELATIVE_ERROR && + logMaximumRemainder < maxLogUpperBound[0] + LOG_MAXIMUM_RELATIVE_ERROR) { break; } } } - if (!CTools::logWillUnderflow(maxLogLowerBound[0])) - { + if (!CTools::logWillUnderflow(maxLogLowerBound[0])) { maxLogLowerBound[0] = 0.0; } - if (!CTools::logWillUnderflow(maxLogUpperBound[0])) - { + if (!CTools::logWillUnderflow(maxLogUpperBound[0])) { maxLogUpperBound[0] = 0.0; } - for (std::size_t i = 0u; i < logLowerBounds.size(); ++i) - { + for (std::size_t i = 0u; i < logLowerBounds.size(); ++i) { lowerBound += std::exp(logLowerBounds[i] - maxLogLowerBound[0]); upperBound += std::exp(logUpperBounds[i] - maxLogUpperBound[0]); } lowerBound = -std::log(lowerBound) - maxLogLowerBound[0]; upperBound = -std::log(upperBound) - maxLogUpperBound[0]; - if (logLowerBounds.size() < logWeights.size()) - { + if (logLowerBounds.size() < logWeights.size()) { upperBound += -std::log(1.0 + std::exp(logMaximumRemainder + upperBound)); } lowerBound = std::max(lowerBound, 0.0); upperBound = std::max(upperBound, 0.0); - LOG_TRACE("Joint -log(c.d.f." << (complement ? " complement" : "") << ") = [" - << lowerBound << "," << upperBound << "]"); + LOG_TRACE("Joint -log(c.d.f." << (complement ? " complement" : "") << ") = [" << lowerBound << "," << upperBound << "]"); return true; } -bool COneOfNPrior::minusLogJointCdf(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound) const -{ +bool COneOfNPrior::minusLogJointCdf(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound) const { return this->minusLogJointCdfImpl(false, // complement - weightStyles, samples, weights, - lowerBound, upperBound); + weightStyles, + samples, + weights, + lowerBound, + upperBound); } -bool COneOfNPrior::minusLogJointCdfComplement(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound) const -{ +bool COneOfNPrior::minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound) const { return this->minusLogJointCdfImpl(true, // complement - weightStyles, samples, weights, - lowerBound, upperBound); + weightStyles, + samples, + weights, + lowerBound, + upperBound); } bool COneOfNPrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound, - maths_t::ETail &tail) const -{ + const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound, + maths_t::ETail& tail) const { lowerBound = upperBound = 0.0; tail = maths_t::E_UndeterminedTail; - if (samples.empty()) - { + if (samples.empty()) { LOG_ERROR("Can't compute distribution for empty sample set"); return false; } - if (this->isNonInformative()) - { + if (this->isNonInformative()) { lowerBound = upperBound = 1.0; return true; } @@ -986,14 +833,11 @@ bool COneOfNPrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCalculati TDoubleSizePr5Vec logWeights = this->normalizedLogWeights(); TMaxAccumulator tail_; - for (std::size_t i = 0u; i < logWeights.size(); ++i) - { + for (std::size_t i = 0u; i < logWeights.size(); ++i) { double weight = std::exp(logWeights[i].first); - const CPrior &model = *m_Models[logWeights[i].second].second; + const CPrior& model = *m_Models[logWeights[i].second].second; - if (lowerBound > static_cast(m_Models.size() - i) * weight - / MAXIMUM_RELATIVE_ERROR) - { + if (lowerBound > static_cast(m_Models.size() - i) * weight / MAXIMUM_RELATIVE_ERROR) { // The probability calculation is relatively expensive so don't // evaluate the probabilities that aren't needed to get good // accuracy. @@ -1002,37 +846,28 @@ bool COneOfNPrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCalculati double modelLowerBound, modelUpperBound; maths_t::ETail modelTail; - if (!model.probabilityOfLessLikelySamples(calculation, - weightStyles, samples, weights, - modelLowerBound, modelUpperBound, modelTail)) - { + if (!model.probabilityOfLessLikelySamples( + calculation, weightStyles, samples, weights, modelLowerBound, modelUpperBound, modelTail)) { // Logging handled at a lower level. return false; } - LOG_TRACE("weight = " << weight - << ", modelLowerBound = " << modelLowerBound - << ", modelUpperBound = " << modelLowerBound); + LOG_TRACE("weight = " << weight << ", modelLowerBound = " << modelLowerBound << ", modelUpperBound = " << modelLowerBound); lowerBound += weight * modelLowerBound; upperBound += weight * modelUpperBound; tail_.add(TDoubleTailPr(weight * (modelLowerBound + modelUpperBound), modelTail)); } - if ( !(lowerBound >= 0.0 && lowerBound <= 1.001) - || !(upperBound >= 0.0 && upperBound <= 1.001)) - { - LOG_ERROR("Bad probability bounds = [" - << lowerBound << ", " << upperBound << "]" - << ", " << core::CContainerPrinter::print(logWeights)); + if (!(lowerBound >= 0.0 && lowerBound <= 1.001) || !(upperBound >= 0.0 && upperBound <= 1.001)) { + LOG_ERROR("Bad probability bounds = [" << lowerBound << ", " << upperBound << "]" + << ", " << core::CContainerPrinter::print(logWeights)); } - if (CMathsFuncs::isNan(lowerBound)) - { + if (CMathsFuncs::isNan(lowerBound)) { lowerBound = 0.0; } - if (CMathsFuncs::isNan(upperBound)) - { + if (CMathsFuncs::isNan(upperBound)) { upperBound = 1.0; } lowerBound = CTools::truncate(lowerBound, 0.0, 1.0); @@ -1044,154 +879,118 @@ bool COneOfNPrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCalculati return true; } -bool COneOfNPrior::isNonInformative() const -{ - for (const auto &model : m_Models) - { - if ( model.second->participatesInModelSelection() - && model.second->isNonInformative()) - { +bool COneOfNPrior::isNonInformative() const { + for (const auto& model : m_Models) { + if (model.second->participatesInModelSelection() && model.second->isNonInformative()) { return true; } } return false; } -void COneOfNPrior::print(const std::string &indent, std::string &result) const -{ +void COneOfNPrior::print(const std::string& indent, std::string& result) const { result += core_t::LINE_ENDING + indent + "one-of-n"; - if (this->isNonInformative()) - { + if (this->isNonInformative()) { result += " non-informative"; } static const double MINIMUM_SIGNIFICANT_WEIGHT = 0.05; result += ':'; - result += core_t::LINE_ENDING + indent - + " # samples " - + core::CStringUtils::typeToStringPretty(this->numberSamples()); - for (const auto &model : m_Models) - { + result += core_t::LINE_ENDING + indent + " # samples " + core::CStringUtils::typeToStringPretty(this->numberSamples()); + for (const auto& model : m_Models) { double weight = model.first; - if (weight >= MINIMUM_SIGNIFICANT_WEIGHT) - { - std::string indent_ = indent - + " weight " - + core::CStringUtils::typeToStringPretty(weight) + " "; + if (weight >= MINIMUM_SIGNIFICANT_WEIGHT) { + std::string indent_ = indent + " weight " + core::CStringUtils::typeToStringPretty(weight) + " "; model.second->print(indent_, result); } } } -std::string COneOfNPrior::printJointDensityFunction() const -{ +std::string COneOfNPrior::printJointDensityFunction() const { return "Not supported"; } -uint64_t COneOfNPrior::checksum(uint64_t seed) const -{ +uint64_t COneOfNPrior::checksum(uint64_t seed) const { seed = this->CPrior::checksum(seed); return CChecksum::calculate(seed, m_Models); } -void COneOfNPrior::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void COneOfNPrior::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("COneOfNPrior"); core::CMemoryDebug::dynamicSize("m_Models", m_Models, mem); } -std::size_t COneOfNPrior::memoryUsage() const -{ +std::size_t COneOfNPrior::memoryUsage() const { return core::CMemory::dynamicSize(m_Models); } -std::size_t COneOfNPrior::staticSize() const -{ +std::size_t COneOfNPrior::staticSize() const { return sizeof(*this); } -void COneOfNPrior::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ - for (const auto &model : m_Models) - { - inserter.insertLevel(MODEL_TAG, boost::bind(&modelAcceptPersistInserter, - boost::cref(model.first), - boost::cref(*model.second), _1)); +void COneOfNPrior::acceptPersistInserter(core::CStatePersistInserter& inserter) const { + for (const auto& model : m_Models) { + inserter.insertLevel(MODEL_TAG, boost::bind(&modelAcceptPersistInserter, boost::cref(model.first), boost::cref(*model.second), _1)); } inserter.insertValue(DECAY_RATE_TAG, this->decayRate(), core::CIEEE754::E_SinglePrecision); inserter.insertValue(NUMBER_SAMPLES_TAG, this->numberSamples(), core::CIEEE754::E_SinglePrecision); } -COneOfNPrior::TDoubleVec COneOfNPrior::weights() const -{ +COneOfNPrior::TDoubleVec COneOfNPrior::weights() const { TDoubleVec result = this->logWeights(); - for (auto &weight : result) - { + for (auto& weight : result) { weight = std::exp(weight); } return result; } -COneOfNPrior::TDoubleVec COneOfNPrior::logWeights() const -{ +COneOfNPrior::TDoubleVec COneOfNPrior::logWeights() const { TDoubleVec result; result.reserve(m_Models.size()); double Z = 0.0; - for (const auto &model : m_Models) - { + for (const auto& model : m_Models) { result.push_back(model.first.logWeight()); Z += std::exp(result.back()); } Z = std::log(Z); - for (auto &weight : result) - { + for (auto& weight : result) { weight -= Z; } return result; } -COneOfNPrior::TPriorCPtrVec COneOfNPrior::models() const -{ +COneOfNPrior::TPriorCPtrVec COneOfNPrior::models() const { TPriorCPtrVec result; result.reserve(m_Models.size()); - for (const auto &model : m_Models) - { + for (const auto& model : m_Models) { result.push_back(model.second.get()); } return result; } -bool COneOfNPrior::modelAcceptRestoreTraverser(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser) -{ +bool COneOfNPrior::modelAcceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) { CModelWeight weight(1.0); bool gotWeight = false; TPriorPtr model; - do - { - const std::string &name = traverser.name(); + do { + const std::string& name = traverser.name(); RESTORE_SETUP_TEARDOWN(WEIGHT_TAG, /*no-op*/, - traverser.traverseSubLevel(boost::bind(&CModelWeight::acceptRestoreTraverser, - &weight, _1)), + traverser.traverseSubLevel(boost::bind(&CModelWeight::acceptRestoreTraverser, &weight, _1)), gotWeight = true) - RESTORE(PRIOR_TAG, traverser.traverseSubLevel(boost::bind(CPriorStateSerialiser(), - boost::cref(params), - boost::ref(model), _1))) - } - while (traverser.next()); + RESTORE(PRIOR_TAG, + traverser.traverseSubLevel(boost::bind(CPriorStateSerialiser(), boost::cref(params), boost::ref(model), _1))) + } while (traverser.next()); - if (!gotWeight) - { + if (!gotWeight) { LOG_ERROR("No weight found"); return false; } - if (model == 0) - { + if (model == 0) { LOG_ERROR("No model found"); return false; } @@ -1201,69 +1000,55 @@ bool COneOfNPrior::modelAcceptRestoreTraverser(const SDistributionRestoreParams return true; } -COneOfNPrior::TDoubleSizePr5Vec COneOfNPrior::normalizedLogWeights() const -{ +COneOfNPrior::TDoubleSizePr5Vec COneOfNPrior::normalizedLogWeights() const { TDoubleSizePr5Vec result; double Z = 0.0; - for (std::size_t i = 0u; i < m_Models.size(); ++i) - { - if (m_Models[i].second->participatesInModelSelection()) - { + for (std::size_t i = 0u; i < m_Models.size(); ++i) { + if (m_Models[i].second->participatesInModelSelection()) { double logWeight = m_Models[i].first.logWeight(); result.emplace_back(logWeight, i); Z += std::exp(logWeight); } } Z = std::log(Z); - for (auto &logWeight : result) - { + for (auto& logWeight : result) { logWeight.first -= Z; } std::sort(result.begin(), result.end(), std::greater()); return result; } -double COneOfNPrior::medianModelMean() const -{ +double COneOfNPrior::medianModelMean() const { TDoubleVec means; means.reserve(m_Models.size()); - for (const auto &model : m_Models) - { - if (model.second->participatesInModelSelection()) - { + for (const auto& model : m_Models) { + if (model.second->participatesInModelSelection()) { means.push_back(model.second->marginalLikelihoodMean()); } } return CBasicStatistics::median(means); } -bool COneOfNPrior::badWeights() const -{ - for (const auto &model : m_Models) - { - if (!CMathsFuncs::isFinite(model.first.logWeight())) - { +bool COneOfNPrior::badWeights() const { + for (const auto& model : m_Models) { + if (!CMathsFuncs::isFinite(model.first.logWeight())) { return true; } } return false; } -std::string COneOfNPrior::debugWeights() const -{ - if (m_Models.empty()) - { +std::string COneOfNPrior::debugWeights() const { + if (m_Models.empty()) { return std::string(); } std::ostringstream result; result << std::scientific << std::setprecision(15); - for (const auto &model : m_Models) - { + for (const auto& model : m_Models) { result << " " << model.first.logWeight(); } result << " "; return result.str(); } - } } diff --git a/lib/maths/COrdinal.cc b/lib/maths/COrdinal.cc index 11fcfb3670..ccfe5fe944 100644 --- a/lib/maths/COrdinal.cc +++ b/lib/maths/COrdinal.cc @@ -13,60 +13,61 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { -COrdinal::COrdinal() : m_Type(E_Nan) -{ +COrdinal::COrdinal() : m_Type(E_Nan) { m_Value.integer = 0; } -COrdinal::COrdinal(int64_t value) : m_Type(E_Integer) -{ +COrdinal::COrdinal(int64_t value) : m_Type(E_Integer) { m_Value.integer = value; } -COrdinal::COrdinal(uint64_t value) : m_Type(E_PositiveInteger) -{ +COrdinal::COrdinal(uint64_t value) : m_Type(E_PositiveInteger) { m_Value.positiveInteger = value; } -COrdinal::COrdinal(double value) : m_Type(E_Real) -{ +COrdinal::COrdinal(double value) : m_Type(E_Real) { m_Value.real = value; } -bool COrdinal::operator==(COrdinal rhs) const -{ - switch (m_Type) - { +bool COrdinal::operator==(COrdinal rhs) const { + switch (m_Type) { case E_Integer: - switch (rhs.m_Type) - { - case E_Integer: return m_Value.integer == rhs.m_Value.integer; - case E_PositiveInteger: return this->equal(m_Value.integer, rhs.m_Value.positiveInteger); - case E_Real: return this->equal(m_Value.integer, rhs.m_Value.real); - case E_Nan: break; + switch (rhs.m_Type) { + case E_Integer: + return m_Value.integer == rhs.m_Value.integer; + case E_PositiveInteger: + return this->equal(m_Value.integer, rhs.m_Value.positiveInteger); + case E_Real: + return this->equal(m_Value.integer, rhs.m_Value.real); + case E_Nan: + break; } break; case E_PositiveInteger: - switch (rhs.m_Type) - { - case E_Integer: return this->equal(rhs.m_Value.integer, m_Value.positiveInteger); - case E_PositiveInteger: return m_Value.positiveInteger == rhs.m_Value.positiveInteger; - case E_Real: return this->equal(m_Value.positiveInteger, rhs.m_Value.real); - case E_Nan: break; + switch (rhs.m_Type) { + case E_Integer: + return this->equal(rhs.m_Value.integer, m_Value.positiveInteger); + case E_PositiveInteger: + return m_Value.positiveInteger == rhs.m_Value.positiveInteger; + case E_Real: + return this->equal(m_Value.positiveInteger, rhs.m_Value.real); + case E_Nan: + break; } break; case E_Real: - switch (rhs.m_Type) - { - case E_Integer: return this->equal(rhs.m_Value.integer, m_Value.real); - case E_PositiveInteger: return this->equal(rhs.m_Value.positiveInteger, m_Value.real); - case E_Real: return m_Value.real == rhs.m_Value.real; - case E_Nan: break; + switch (rhs.m_Type) { + case E_Integer: + return this->equal(rhs.m_Value.integer, m_Value.real); + case E_PositiveInteger: + return this->equal(rhs.m_Value.positiveInteger, m_Value.real); + case E_Real: + return m_Value.real == rhs.m_Value.real; + case E_Nan: + break; } break; case E_Nan: @@ -75,38 +76,42 @@ bool COrdinal::operator==(COrdinal rhs) const return false; } -bool COrdinal::operator<(COrdinal rhs) const -{ - switch (m_Type) - { +bool COrdinal::operator<(COrdinal rhs) const { + switch (m_Type) { case E_Integer: - switch (rhs.m_Type) - { - case E_Integer: return m_Value.integer < rhs.m_Value.integer; - case E_PositiveInteger: return this->less(m_Value.integer, rhs.m_Value.positiveInteger); - case E_Real: return this->less(m_Value.integer, rhs.m_Value.real); - case E_Nan: break; + switch (rhs.m_Type) { + case E_Integer: + return m_Value.integer < rhs.m_Value.integer; + case E_PositiveInteger: + return this->less(m_Value.integer, rhs.m_Value.positiveInteger); + case E_Real: + return this->less(m_Value.integer, rhs.m_Value.real); + case E_Nan: + break; } break; case E_PositiveInteger: - switch (rhs.m_Type) - { - case E_Integer: return !this->equal(rhs.m_Value.integer, m_Value.positiveInteger) - && !this->less(rhs.m_Value.integer, m_Value.positiveInteger); - case E_PositiveInteger: return m_Value.positiveInteger < rhs.m_Value.positiveInteger; - case E_Real: return this->less(m_Value.positiveInteger, rhs.m_Value.real); - case E_Nan: break; + switch (rhs.m_Type) { + case E_Integer: + return !this->equal(rhs.m_Value.integer, m_Value.positiveInteger) && !this->less(rhs.m_Value.integer, m_Value.positiveInteger); + case E_PositiveInteger: + return m_Value.positiveInteger < rhs.m_Value.positiveInteger; + case E_Real: + return this->less(m_Value.positiveInteger, rhs.m_Value.real); + case E_Nan: + break; } break; case E_Real: - switch (rhs.m_Type) - { - case E_Integer: return !this->equal(rhs.m_Value.integer, m_Value.real) - && !this->less(rhs.m_Value.integer, m_Value.real); - case E_PositiveInteger: return !this->equal(rhs.m_Value.positiveInteger, m_Value.real) - && !this->less(rhs.m_Value.positiveInteger, m_Value.real); - case E_Real: return m_Value.real < rhs.m_Value.real; - case E_Nan: break; + switch (rhs.m_Type) { + case E_Integer: + return !this->equal(rhs.m_Value.integer, m_Value.real) && !this->less(rhs.m_Value.integer, m_Value.real); + case E_PositiveInteger: + return !this->equal(rhs.m_Value.positiveInteger, m_Value.real) && !this->less(rhs.m_Value.positiveInteger, m_Value.real); + case E_Real: + return m_Value.real < rhs.m_Value.real; + case E_Nan: + break; } break; case E_Nan: @@ -115,38 +120,35 @@ bool COrdinal::operator<(COrdinal rhs) const return false; } -bool COrdinal::isNan() const -{ +bool COrdinal::isNan() const { return m_Type == E_Nan; } -double COrdinal::asDouble() const -{ - switch (m_Type) - { - case E_Integer: return static_cast(m_Value.integer); - case E_PositiveInteger: return static_cast(m_Value.positiveInteger); - case E_Real: return m_Value.real; - case E_Nan: break; +double COrdinal::asDouble() const { + switch (m_Type) { + case E_Integer: + return static_cast(m_Value.integer); + case E_PositiveInteger: + return static_cast(m_Value.positiveInteger); + case E_Real: + return m_Value.real; + case E_Nan: + break; } return std::numeric_limits::quiet_NaN(); } -uint64_t COrdinal::hash() -{ +uint64_t COrdinal::hash() { return m_Value.positiveInteger; } -bool COrdinal::equal(int64_t lhs, uint64_t rhs) const -{ +bool COrdinal::equal(int64_t lhs, uint64_t rhs) const { return lhs < 0 ? false : static_cast(lhs) == rhs; } -bool COrdinal::equal(int64_t lhs, double rhs) const -{ - if ( rhs < static_cast(boost::numeric::bounds::lowest()) - || rhs > static_cast(boost::numeric::bounds::highest())) - { +bool COrdinal::equal(int64_t lhs, double rhs) const { + if (rhs < static_cast(boost::numeric::bounds::lowest()) || + rhs > static_cast(boost::numeric::bounds::highest())) { return false; } double integerPart; @@ -154,11 +156,8 @@ bool COrdinal::equal(int64_t lhs, double rhs) const return remainder > 0.0 ? false : lhs == static_cast(integerPart); } -bool COrdinal::equal(uint64_t lhs, double rhs) const -{ - if ( rhs < 0.0 - || rhs > static_cast(boost::numeric::bounds::highest())) - { +bool COrdinal::equal(uint64_t lhs, double rhs) const { + if (rhs < 0.0 || rhs > static_cast(boost::numeric::bounds::highest())) { return false; } double integerPart; @@ -166,54 +165,50 @@ bool COrdinal::equal(uint64_t lhs, double rhs) const return remainder > 0.0 ? false : lhs == static_cast(integerPart); } -bool COrdinal::less(int64_t lhs, uint64_t rhs) const -{ +bool COrdinal::less(int64_t lhs, uint64_t rhs) const { return lhs < 0 ? true : static_cast(lhs) < rhs; } -bool COrdinal::less(int64_t lhs, double rhs) const -{ - if (rhs < static_cast(boost::numeric::bounds::lowest())) - { +bool COrdinal::less(int64_t lhs, double rhs) const { + if (rhs < static_cast(boost::numeric::bounds::lowest())) { return false; } - if (rhs > static_cast(boost::numeric::bounds::highest())) - { + if (rhs > static_cast(boost::numeric::bounds::highest())) { return true; } double integerPart; double remainder = ::modf(rhs, &integerPart); - return lhs < static_cast(integerPart) - || (lhs == static_cast(integerPart) && remainder > 0.0); + return lhs < static_cast(integerPart) || (lhs == static_cast(integerPart) && remainder > 0.0); } -bool COrdinal::less(uint64_t lhs, double rhs) const -{ - if (rhs < 0.0) - { +bool COrdinal::less(uint64_t lhs, double rhs) const { + if (rhs < 0.0) { return false; } - if (rhs > static_cast(boost::numeric::bounds::highest())) - { + if (rhs > static_cast(boost::numeric::bounds::highest())) { return true; } double integerPart; double remainder = ::modf(rhs, &integerPart); - return lhs < static_cast(integerPart) - || (lhs == static_cast(integerPart) && remainder > 0.0); -} - -std::ostream &operator<<(std::ostream &o, COrdinal ord) -{ - switch (ord.m_Type) - { - case COrdinal::E_Integer: o << ord.m_Value.integer; break; - case COrdinal::E_PositiveInteger: o << ord.m_Value.positiveInteger; break; - case COrdinal::E_Real: o << ord.m_Value.real; break; - case COrdinal::E_Nan: o << "nan"; break; + return lhs < static_cast(integerPart) || (lhs == static_cast(integerPart) && remainder > 0.0); +} + +std::ostream& operator<<(std::ostream& o, COrdinal ord) { + switch (ord.m_Type) { + case COrdinal::E_Integer: + o << ord.m_Value.integer; + break; + case COrdinal::E_PositiveInteger: + o << ord.m_Value.positiveInteger; + break; + case COrdinal::E_Real: + o << ord.m_Value.real; + break; + case COrdinal::E_Nan: + o << "nan"; + break; } return o; } - } } diff --git a/lib/maths/CPRNG.cc b/lib/maths/CPRNG.cc index d4f7274926..b2d2bc1f10 100644 --- a/lib/maths/CPRNG.cc +++ b/lib/maths/CPRNG.cc @@ -13,90 +13,71 @@ #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { -namespace -{ -namespace detail -{ +namespace { +namespace detail { //! Discard a sequence of \p n random numbers. template -inline void discard(uint64_t n, PRNG &rng) -{ - for (/**/; n > 0; --n) - { +inline void discard(uint64_t n, PRNG& rng) { + for (/**/; n > 0; --n) { rng(); } } //! Rotate about the \p k'th bit. -uint64_t rotl(const uint64_t x, int k) -{ +uint64_t rotl(const uint64_t x, int k) { return (x << k) | (x >> (64 - k)); } - } } -CPRNG::CSplitMix64::CSplitMix64() : m_X(0) -{ +CPRNG::CSplitMix64::CSplitMix64() : m_X(0) { this->seed(); } -CPRNG::CSplitMix64::CSplitMix64(uint64_t seed) : m_X(0) -{ +CPRNG::CSplitMix64::CSplitMix64(uint64_t seed) : m_X(0) { this->seed(seed); } -bool CPRNG::CSplitMix64::operator==(CSplitMix64 other) const -{ +bool CPRNG::CSplitMix64::operator==(CSplitMix64 other) const { return m_X == other.m_X; } -void CPRNG::CSplitMix64::seed() -{ +void CPRNG::CSplitMix64::seed() { m_X = 0; } -void CPRNG::CSplitMix64::seed(uint64_t seed) -{ +void CPRNG::CSplitMix64::seed(uint64_t seed) { m_X = seed; } -uint64_t CPRNG::CSplitMix64::min() -{ +uint64_t CPRNG::CSplitMix64::min() { return 0; } -uint64_t CPRNG::CSplitMix64::max() -{ +uint64_t CPRNG::CSplitMix64::max() { return boost::numeric::bounds::highest(); } -uint64_t CPRNG::CSplitMix64::operator()() -{ +uint64_t CPRNG::CSplitMix64::operator()() { uint64_t x = (m_X += A); x = (x ^ (x >> 30)) * B; x = (x ^ (x >> 27)) * C; return x ^ (x >> 31); } -void CPRNG::CSplitMix64::discard(uint64_t n) -{ +void CPRNG::CSplitMix64::discard(uint64_t n) { detail::discard(n, *this); } -std::string CPRNG::CSplitMix64::toString() const -{ +std::string CPRNG::CSplitMix64::toString() const { return core::CStringUtils::typeToString(m_X); } -bool CPRNG::CSplitMix64::fromString(const std::string &state) -{ +bool CPRNG::CSplitMix64::fromString(const std::string& state) { return core::CStringUtils::stringToType(state, m_X); } @@ -104,45 +85,36 @@ const uint64_t CPRNG::CSplitMix64::A(0x9E3779B97F4A7C15); const uint64_t CPRNG::CSplitMix64::B(0xBF58476D1CE4E5B9); const uint64_t CPRNG::CSplitMix64::C(0x94D049BB133111EB); - -CPRNG::CXorOShiro128Plus::CXorOShiro128Plus() -{ +CPRNG::CXorOShiro128Plus::CXorOShiro128Plus() { this->seed(); } -CPRNG::CXorOShiro128Plus::CXorOShiro128Plus(uint64_t seed) -{ +CPRNG::CXorOShiro128Plus::CXorOShiro128Plus(uint64_t seed) { this->seed(seed); } -bool CPRNG::CXorOShiro128Plus::operator==(const CXorOShiro128Plus &other) const -{ +bool CPRNG::CXorOShiro128Plus::operator==(const CXorOShiro128Plus& other) const { return std::equal(&m_X[0], &m_X[2], &other.m_X[0]); } -void CPRNG::CXorOShiro128Plus::seed() -{ +void CPRNG::CXorOShiro128Plus::seed() { this->seed(0); } -void CPRNG::CXorOShiro128Plus::seed(uint64_t seed) -{ +void CPRNG::CXorOShiro128Plus::seed(uint64_t seed) { CSplitMix64 seeds(seed); seeds.generate(&m_X[0], &m_X[2]); } -uint64_t CPRNG::CXorOShiro128Plus::min() -{ +uint64_t CPRNG::CXorOShiro128Plus::min() { return 0; } -uint64_t CPRNG::CXorOShiro128Plus::max() -{ +uint64_t CPRNG::CXorOShiro128Plus::max() { return boost::numeric::bounds::highest(); } -uint64_t CPRNG::CXorOShiro128Plus::operator()() -{ +uint64_t CPRNG::CXorOShiro128Plus::operator()() { uint64_t x0 = m_X[0]; uint64_t x1 = m_X[1]; uint64_t result = x0 + x1; @@ -152,20 +124,15 @@ uint64_t CPRNG::CXorOShiro128Plus::operator()() return result; } -void CPRNG::CXorOShiro128Plus::discard(uint64_t n) -{ +void CPRNG::CXorOShiro128Plus::discard(uint64_t n) { detail::discard(n, *this); } -void CPRNG::CXorOShiro128Plus::jump() -{ - uint64_t x[2] = { 0 }; - for(std::size_t i = 0; i < 2; ++i) - { - for(unsigned int b = 0; b < 64; ++b) - { - if (JUMP[i] & 1ULL << b) - { +void CPRNG::CXorOShiro128Plus::jump() { + uint64_t x[2] = {0}; + for (std::size_t i = 0; i < 2; ++i) { + for (unsigned int b = 0; b < 64; ++b) { + if (JUMP[i] & 1ULL << b) { x[0] ^= m_X[0]; x[1] ^= m_X[1]; } @@ -177,59 +144,48 @@ void CPRNG::CXorOShiro128Plus::jump() m_X[1] = x[1]; } -std::string CPRNG::CXorOShiro128Plus::toString() const -{ - const uint64_t *begin = &m_X[0]; - const uint64_t *end = &m_X[2]; +std::string CPRNG::CXorOShiro128Plus::toString() const { + const uint64_t* begin = &m_X[0]; + const uint64_t* end = &m_X[2]; return core::CPersistUtils::toString(begin, end); } -bool CPRNG::CXorOShiro128Plus::fromString(const std::string &state) -{ +bool CPRNG::CXorOShiro128Plus::fromString(const std::string& state) { return core::CPersistUtils::fromString(state, &m_X[0], &m_X[2]); } -const uint64_t CPRNG::CXorOShiro128Plus::JUMP[] = { 0xbeac0467eba5facb, 0xd86b048b86aa9922 }; +const uint64_t CPRNG::CXorOShiro128Plus::JUMP[] = {0xbeac0467eba5facb, 0xd86b048b86aa9922}; - -CPRNG::CXorShift1024Mult::CXorShift1024Mult() : m_P(0) -{ +CPRNG::CXorShift1024Mult::CXorShift1024Mult() : m_P(0) { this->seed(); } -CPRNG::CXorShift1024Mult::CXorShift1024Mult(uint64_t seed) : m_P(0) -{ +CPRNG::CXorShift1024Mult::CXorShift1024Mult(uint64_t seed) : m_P(0) { this->seed(seed); } -bool CPRNG::CXorShift1024Mult::operator==(const CXorShift1024Mult &other) const -{ +bool CPRNG::CXorShift1024Mult::operator==(const CXorShift1024Mult& other) const { return m_P == other.m_P && std::equal(&m_X[0], &m_X[16], &other.m_X[0]); } -void CPRNG::CXorShift1024Mult::seed() -{ +void CPRNG::CXorShift1024Mult::seed() { this->seed(0); } -void CPRNG::CXorShift1024Mult::seed(uint64_t seed) -{ +void CPRNG::CXorShift1024Mult::seed(uint64_t seed) { CSplitMix64 seeds(seed); seeds.generate(&m_X[0], &m_X[16]); } -uint64_t CPRNG::CXorShift1024Mult::min() -{ +uint64_t CPRNG::CXorShift1024Mult::min() { return 0; } -uint64_t CPRNG::CXorShift1024Mult::max() -{ +uint64_t CPRNG::CXorShift1024Mult::max() { return boost::numeric::bounds::highest(); } -uint64_t CPRNG::CXorShift1024Mult::operator()() -{ +uint64_t CPRNG::CXorShift1024Mult::operator()() { uint64_t x0 = m_X[m_P]; m_P = (m_P + 1) & 15; uint64_t x1 = m_X[m_P]; @@ -238,23 +194,17 @@ uint64_t CPRNG::CXorShift1024Mult::operator()() return m_X[m_P] * A; } -void CPRNG::CXorShift1024Mult::discard(uint64_t n) -{ +void CPRNG::CXorShift1024Mult::discard(uint64_t n) { detail::discard(n, *this); } -void CPRNG::CXorShift1024Mult::jump() -{ - uint64_t t[16] = { 0 }; +void CPRNG::CXorShift1024Mult::jump() { + uint64_t t[16] = {0}; - for (std::size_t i = 0; i < 16; ++i) - { - for (unsigned int b = 0; b < 64; ++b) - { - if (JUMP[i] & 1ULL << b) - { - for (int j = 0; j < 16; ++j) - { + for (std::size_t i = 0; i < 16; ++i) { + for (unsigned int b = 0; b < 64; ++b) { + if (JUMP[i] & 1ULL << b) { + for (int j = 0; j < 16; ++j) { t[j] ^= m_X[(j + m_P) & 15]; } } @@ -262,32 +212,25 @@ void CPRNG::CXorShift1024Mult::jump() } } - for (int j = 0; j < 16; j++) - { + for (int j = 0; j < 16; j++) { m_X[(j + m_P) & 15] = t[j]; } } -std::string CPRNG::CXorShift1024Mult::toString() const -{ - const uint64_t *begin = &m_X[0]; - const uint64_t *end = &m_X[16]; - return core::CPersistUtils::toString(begin, end) - + core::CPersistUtils::PAIR_DELIMITER - + core::CStringUtils::typeToString(m_P); +std::string CPRNG::CXorShift1024Mult::toString() const { + const uint64_t* begin = &m_X[0]; + const uint64_t* end = &m_X[16]; + return core::CPersistUtils::toString(begin, end) + core::CPersistUtils::PAIR_DELIMITER + core::CStringUtils::typeToString(m_P); } -bool CPRNG::CXorShift1024Mult::fromString(std::string state) -{ +bool CPRNG::CXorShift1024Mult::fromString(std::string state) { std::size_t delimPos = state.find(core::CPersistUtils::PAIR_DELIMITER); - if (delimPos == std::string::npos) - { + if (delimPos == std::string::npos) { return false; } std::string p; p.assign(state, delimPos + 1, state.length() - delimPos); - if (!core::CStringUtils::stringToType(p, m_P)) - { + if (!core::CStringUtils::stringToType(p, m_P)) { return false; } state.resize(delimPos); @@ -295,13 +238,21 @@ bool CPRNG::CXorShift1024Mult::fromString(std::string state) } const uint64_t CPRNG::CXorShift1024Mult::A(1181783497276652981); -const uint64_t CPRNG::CXorShift1024Mult::JUMP[16] = - { - 0x84242f96eca9c41d, 0xa3c65b8776f96855, 0x5b34a39f070b5837, 0x4489affce4f31a1e, - 0x2ffeeb0a48316f40, 0xdc2d9891fe68c022, 0x3659132bb12fea70, 0xaac17d8efa43cab8, - 0xc4cb815590989b13, 0x5ee975283d71c93b, 0x691548c86c1bd540, 0x7910c41d10a1e6a5, - 0x0b5fc64563b3e2a8, 0x047f7684e9fc949d, 0xb99181f2d8f685ca, 0x284600e3f30e38c3 - }; - +const uint64_t CPRNG::CXorShift1024Mult::JUMP[16] = {0x84242f96eca9c41d, + 0xa3c65b8776f96855, + 0x5b34a39f070b5837, + 0x4489affce4f31a1e, + 0x2ffeeb0a48316f40, + 0xdc2d9891fe68c022, + 0x3659132bb12fea70, + 0xaac17d8efa43cab8, + 0xc4cb815590989b13, + 0x5ee975283d71c93b, + 0x691548c86c1bd540, + 0x7910c41d10a1e6a5, + 0x0b5fc64563b3e2a8, + 0x047f7684e9fc949d, + 0xb99181f2d8f685ca, + 0x284600e3f30e38c3}; } } diff --git a/lib/maths/CPackedBitVector.cc b/lib/maths/CPackedBitVector.cc index 77e8f5c5af..2203645484 100644 --- a/lib/maths/CPackedBitVector.cc +++ b/lib/maths/CPackedBitVector.cc @@ -13,30 +13,20 @@ #include -namespace ml -{ -namespace maths -{ - -CPackedBitVector::CPackedBitVector() : - m_Dimension(0), m_First(false), m_Parity(true) -{} - -CPackedBitVector::CPackedBitVector(bool bit) : - m_Dimension(1), m_First(bit), m_Parity(true), m_RunLengths(1, 1) -{} - -CPackedBitVector::CPackedBitVector(std::size_t dimension, bool bit) : - m_Dimension(static_cast(dimension)), - m_First(bit), m_Parity(true) -{ - if (dimension > 0) - { +namespace ml { +namespace maths { + +CPackedBitVector::CPackedBitVector() : m_Dimension(0), m_First(false), m_Parity(true) { +} + +CPackedBitVector::CPackedBitVector(bool bit) : m_Dimension(1), m_First(bit), m_Parity(true), m_RunLengths(1, 1) { +} + +CPackedBitVector::CPackedBitVector(std::size_t dimension, bool bit) + : m_Dimension(static_cast(dimension)), m_First(bit), m_Parity(true) { + if (dimension > 0) { std::size_t remainder = static_cast(MAX_RUN_LENGTH); - for (/**/; - remainder <= dimension; - remainder += static_cast(MAX_RUN_LENGTH)) - { + for (/**/; remainder <= dimension; remainder += static_cast(MAX_RUN_LENGTH)) { m_RunLengths.push_back(MAX_RUN_LENGTH); } remainder -= static_cast(MAX_RUN_LENGTH); @@ -44,23 +34,16 @@ CPackedBitVector::CPackedBitVector(std::size_t dimension, bool bit) : } } -CPackedBitVector::CPackedBitVector(const TBoolVec &bits) : - m_Dimension(static_cast(bits.size())), - m_First(bits.empty() ? false : bits[0]), m_Parity(true) -{ +CPackedBitVector::CPackedBitVector(const TBoolVec& bits) + : m_Dimension(static_cast(bits.size())), m_First(bits.empty() ? false : bits[0]), m_Parity(true) { std::size_t length = 1u; - for (std::size_t i = 1u; i < bits.size(); ++i) - { - if (bits[i] == bits[i-1]) - { - if (++length == static_cast(MAX_RUN_LENGTH)) - { + for (std::size_t i = 1u; i < bits.size(); ++i) { + if (bits[i] == bits[i - 1]) { + if (++length == static_cast(MAX_RUN_LENGTH)) { m_RunLengths.push_back(MAX_RUN_LENGTH); length -= static_cast(MAX_RUN_LENGTH); } - } - else - { + } else { m_Parity = !m_Parity; m_RunLengths.push_back(static_cast(length)); length = 1; @@ -69,110 +52,80 @@ CPackedBitVector::CPackedBitVector(const TBoolVec &bits) : m_RunLengths.push_back(static_cast(length)); } -void CPackedBitVector::contract() -{ - if (m_Dimension == 0) - { +void CPackedBitVector::contract() { + if (m_Dimension == 0) { return; } - if (--m_Dimension == 0) - { + if (--m_Dimension == 0) { m_First = false; m_Parity = true; m_RunLengths.clear(); return; } - if (m_RunLengths.front() == MAX_RUN_LENGTH) - { + if (m_RunLengths.front() == MAX_RUN_LENGTH) { std::size_t i = 1u; - for (/**/; - m_RunLengths[i] == MAX_RUN_LENGTH && i < m_RunLengths.size(); - ++i) - {} - if (m_RunLengths[i] == 0) - { + for (/**/; m_RunLengths[i] == MAX_RUN_LENGTH && i < m_RunLengths.size(); ++i) { + } + if (m_RunLengths[i] == 0) { m_RunLengths.erase(m_RunLengths.begin() + i); --m_RunLengths[i - 1]; - } - else - { + } else { --m_RunLengths[i]; } - } - else if (--m_RunLengths.front() == 0) - { + } else if (--m_RunLengths.front() == 0) { m_First = !m_First; m_Parity = !m_Parity; m_RunLengths.erase(m_RunLengths.begin()); } } -void CPackedBitVector::extend(bool bit) -{ +void CPackedBitVector::extend(bool bit) { ++m_Dimension; - if (m_Dimension == 1) - { + if (m_Dimension == 1) { m_First = bit; m_Parity = true; m_RunLengths.push_back(1); - } - else if (m_Parity ? (bit != m_First) : (bit == m_First)) - { + } else if (m_Parity ? (bit != m_First) : (bit == m_First)) { m_Parity = !m_Parity; m_RunLengths.push_back(1); - } - else if (m_RunLengths.back() + 1 == MAX_RUN_LENGTH) - { + } else if (m_RunLengths.back() + 1 == MAX_RUN_LENGTH) { ++m_RunLengths.back(); m_RunLengths.push_back(0); - } - else - { + } else { ++m_RunLengths.back(); } } -bool CPackedBitVector::fromDelimited(const std::string &str) -{ +bool CPackedBitVector::fromDelimited(const std::string& str) { std::size_t last = 0u; - std::size_t pos = str.find_first_of(core::CPersistUtils::DELIMITER, last); - if ( pos == std::string::npos - || core::CStringUtils::stringToType(str.substr(last, pos - last), - m_Dimension) == false) - { + std::size_t pos = str.find_first_of(core::CPersistUtils::DELIMITER, last); + if (pos == std::string::npos || core::CStringUtils::stringToType(str.substr(last, pos - last), m_Dimension) == false) { LOG_ERROR("Invalid packed vector in " << str); return false; } last = pos; - pos = str.find_first_of(core::CPersistUtils::DELIMITER, last + 1); + pos = str.find_first_of(core::CPersistUtils::DELIMITER, last + 1); int first = 0; - if ( pos == std::string::npos - || core::CStringUtils::stringToType(str.substr(last + 1, pos - last - 1), - first) == false) - { + if (pos == std::string::npos || core::CStringUtils::stringToType(str.substr(last + 1, pos - last - 1), first) == false) { LOG_ERROR("Invalid packed vector in " << str); return false; } m_First = (first != 0); last = pos; - pos = str.find_first_of(core::CPersistUtils::DELIMITER, last + 1); + pos = str.find_first_of(core::CPersistUtils::DELIMITER, last + 1); int parity = 0; - if ( pos == std::string::npos - || core::CStringUtils::stringToType(str.substr(last + 1, pos - last - 1), - parity) == false) - { + if (pos == std::string::npos || core::CStringUtils::stringToType(str.substr(last + 1, pos - last - 1), parity) == false) { LOG_ERROR("Invalid packed vector in " << str); return false; } m_Parity = (parity != 0); - if (core::CPersistUtils::fromString(str.substr(pos + 1), m_RunLengths) == false) - { + if (core::CPersistUtils::fromString(str.substr(pos + 1), m_RunLengths) == false) { LOG_ERROR("Invalid packed vector in " << str); return false; } @@ -180,147 +133,119 @@ bool CPackedBitVector::fromDelimited(const std::string &str) return true; } -std::string CPackedBitVector::toDelimited() const -{ +std::string CPackedBitVector::toDelimited() const { std::string result; - result += core::CStringUtils::typeToString(m_Dimension) - + core::CPersistUtils::DELIMITER; - result += core::CStringUtils::typeToString(static_cast(m_First)) - + core::CPersistUtils::DELIMITER; - result += core::CStringUtils::typeToString(static_cast(m_Parity)) - + core::CPersistUtils::DELIMITER; + result += core::CStringUtils::typeToString(m_Dimension) + core::CPersistUtils::DELIMITER; + result += core::CStringUtils::typeToString(static_cast(m_First)) + core::CPersistUtils::DELIMITER; + result += core::CStringUtils::typeToString(static_cast(m_Parity)) + core::CPersistUtils::DELIMITER; result += core::CPersistUtils::toString(m_RunLengths); return result; } -std::size_t CPackedBitVector::dimension() const -{ +std::size_t CPackedBitVector::dimension() const { return m_Dimension; } -bool CPackedBitVector::operator()(std::size_t i) const -{ +bool CPackedBitVector::operator()(std::size_t i) const { bool parity = true; - for (std::size_t j = 0u, k = static_cast(m_RunLengths[j]); - k <= i; - k += static_cast(m_RunLengths[++j])) - { - if (m_RunLengths[j] != MAX_RUN_LENGTH) - { + for (std::size_t j = 0u, k = static_cast(m_RunLengths[j]); k <= i; k += static_cast(m_RunLengths[++j])) { + if (m_RunLengths[j] != MAX_RUN_LENGTH) { parity = !parity; } } return parity ? m_First : !m_First; } -bool CPackedBitVector::operator==(const CPackedBitVector &other) const -{ - return m_Dimension == other.m_Dimension - && m_First == other.m_First - && m_Parity == other.m_Parity - && m_RunLengths == other.m_RunLengths; +bool CPackedBitVector::operator==(const CPackedBitVector& other) const { + return m_Dimension == other.m_Dimension && m_First == other.m_First && m_Parity == other.m_Parity && m_RunLengths == other.m_RunLengths; } -bool CPackedBitVector::operator<(const CPackedBitVector &rhs) const -{ - return COrderings::lexicographical_compare(m_Dimension, - m_First, - m_Parity, - m_RunLengths, - rhs.m_Dimension, - rhs.m_First, - rhs.m_Parity, - rhs.m_RunLengths); +bool CPackedBitVector::operator<(const CPackedBitVector& rhs) const { + return COrderings::lexicographical_compare( + m_Dimension, m_First, m_Parity, m_RunLengths, rhs.m_Dimension, rhs.m_First, rhs.m_Parity, rhs.m_RunLengths); } -CPackedBitVector CPackedBitVector::complement() const -{ +CPackedBitVector CPackedBitVector::complement() const { CPackedBitVector result(*this); result.m_First = !result.m_First; return result; } -double CPackedBitVector::inner(const CPackedBitVector &covector, EOperation op) const -{ +double CPackedBitVector::inner(const CPackedBitVector& covector, EOperation op) const { // This is just a line scan over the run lengths keeping // track of the parities of both vectors. double result = 0.0; - if (m_Dimension != covector.dimension()) - { - LOG_ERROR("Dimension mismatch " << m_Dimension - << " vs " << covector.dimension()); + if (m_Dimension != covector.dimension()) { + LOG_ERROR("Dimension mismatch " << m_Dimension << " vs " << covector.dimension()); return result; } - int value = static_cast(m_First); + int value = static_cast(m_First); int covalue = static_cast(covector.m_First); - std::size_t length = static_cast(m_RunLengths[0]); + std::size_t length = static_cast(m_RunLengths[0]); std::size_t colength = static_cast(covector.m_RunLengths[0]); - std::size_t pos = length; + std::size_t pos = length; std::size_t copos = colength; - for (std::size_t i = 0u, j = 0u; - pos < m_Dimension || copos < m_Dimension; - /**/) - { - std::size_t run = std::min(pos, copos) - std::max(pos - length, - copos - colength); - switch (op) - { - case E_AND: result += static_cast((value & covalue) * run); break; - case E_OR: result += static_cast((value | covalue) * run); break; - case E_XOR: result += static_cast((value ^ covalue) * run); break; + for (std::size_t i = 0u, j = 0u; pos < m_Dimension || copos < m_Dimension; + /**/) { + std::size_t run = std::min(pos, copos) - std::max(pos - length, copos - colength); + switch (op) { + case E_AND: + result += static_cast((value & covalue) * run); + break; + case E_OR: + result += static_cast((value | covalue) * run); + break; + case E_XOR: + result += static_cast((value ^ covalue) * run); + break; } - if (pos < copos) - { - if (length != MAX_RUN_LENGTH) - { + if (pos < copos) { + if (length != MAX_RUN_LENGTH) { value = 1 - value; } length = static_cast(m_RunLengths[++i]); - pos += length; - } - else if (copos < pos) - { - if (colength != MAX_RUN_LENGTH) - { + pos += length; + } else if (copos < pos) { + if (colength != MAX_RUN_LENGTH) { covalue = 1 - covalue; } colength = static_cast(covector.m_RunLengths[++j]); - copos += colength; - } - else - { - if (length != MAX_RUN_LENGTH) - { - value = 1 - value; + copos += colength; + } else { + if (length != MAX_RUN_LENGTH) { + value = 1 - value; covalue = 1 - covalue; } - length = static_cast(m_RunLengths[++i]); + length = static_cast(m_RunLengths[++i]); colength = static_cast(covector.m_RunLengths[++j]); - pos += length; - copos += colength; + pos += length; + copos += colength; } } std::size_t run = std::min(length, colength); - switch (op) - { - case E_AND: result += static_cast((value & covalue) * run); break; - case E_OR: result += static_cast((value | covalue) * run); break; - case E_XOR: result += static_cast((value ^ covalue) * run); break; + switch (op) { + case E_AND: + result += static_cast((value & covalue) * run); + break; + case E_OR: + result += static_cast((value | covalue) * run); + break; + case E_XOR: + result += static_cast((value ^ covalue) * run); + break; } return result; } -CPackedBitVector::TBoolVec CPackedBitVector::toBitVector() const -{ - if (m_Dimension == 0) - { +CPackedBitVector::TBoolVec CPackedBitVector::toBitVector() const { + if (m_Dimension == 0) { return TBoolVec(); } @@ -328,13 +253,9 @@ CPackedBitVector::TBoolVec CPackedBitVector::toBitVector() const result.reserve(m_Dimension); bool parity = true; - for (std::size_t i = 0u; i < m_RunLengths.size(); ++i) - { - std::fill_n(std::back_inserter(result), - static_cast(m_RunLengths[i]), - parity ? m_First : !m_First); - if (m_RunLengths[i] != MAX_RUN_LENGTH) - { + for (std::size_t i = 0u; i < m_RunLengths.size(); ++i) { + std::fill_n(std::back_inserter(result), static_cast(m_RunLengths[i]), parity ? m_First : !m_First); + if (m_RunLengths[i] != MAX_RUN_LENGTH) { parity = !parity; } } @@ -342,43 +263,36 @@ CPackedBitVector::TBoolVec CPackedBitVector::toBitVector() const return result; } -uint64_t CPackedBitVector::checksum() const -{ +uint64_t CPackedBitVector::checksum() const { uint64_t seed = m_Dimension; seed = CChecksum::calculate(seed, m_First); seed = CChecksum::calculate(seed, m_Parity); return CChecksum::calculate(seed, m_RunLengths); } -void CPackedBitVector::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CPackedBitVector::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CPackedBitVector"); core::CMemoryDebug::dynamicSize("m_RunLengths", m_RunLengths, mem); } -std::size_t CPackedBitVector::memoryUsage() const -{ +std::size_t CPackedBitVector::memoryUsage() const { return core::CMemory::dynamicSize(m_RunLengths); } const uint8_t CPackedBitVector::MAX_RUN_LENGTH = std::numeric_limits::max(); -std::ostream &operator<<(std::ostream &o, const CPackedBitVector &v) -{ - if (v.dimension() == 0) - { +std::ostream& operator<<(std::ostream& o, const CPackedBitVector& v) { + if (v.dimension() == 0) { return o << "[]"; } o << '[' << core::CStringUtils::typeToString(static_cast(v(0))); - for (std::size_t i = 1u; i < v.dimension(); ++i) - { + for (std::size_t i = 1u; i < v.dimension(); ++i) { o << ' ' << core::CStringUtils::typeToString(static_cast(v(i))); } o << ']'; return o; } - } } diff --git a/lib/maths/CPeriodicityHypothesisTests.cc b/lib/maths/CPeriodicityHypothesisTests.cc index 93eb4d8416..92df77c602 100644 --- a/lib/maths/CPeriodicityHypothesisTests.cc +++ b/lib/maths/CPeriodicityHypothesisTests.cc @@ -8,21 +8,21 @@ #include #include -#include #include #include #include +#include #include #include #include -#include #include #include #include #include #include #include +#include #include #include @@ -39,12 +39,9 @@ #include #include -namespace ml -{ -namespace maths -{ -namespace -{ +namespace ml { +namespace maths { +namespace { using TDoubleVec = std::vector; using TTimeVec = std::vector; @@ -59,74 +56,61 @@ using TTimeTimePr2Vec = core::CSmallVector; using TTimeTimePrMeanVarAccumulatorPr = std::pair; //! \brief Accumulates the minimum amplitude. -class CMinAmplitude -{ - public: - CMinAmplitude(std::size_t n, double level) : - m_Level(level), - m_Count(0), - m_Min(std::max(n, MINIMUM_COUNT_TO_TEST)), - m_Max(std::max(n, MINIMUM_COUNT_TO_TEST)) - {} - - void add(double x, double n) - { - if (n > 0.0) - { - ++m_Count; - m_Min.add(x - m_Level); - m_Max.add(x - m_Level); - } +class CMinAmplitude { +public: + CMinAmplitude(std::size_t n, double level) + : m_Level(level), m_Count(0), m_Min(std::max(n, MINIMUM_COUNT_TO_TEST)), m_Max(std::max(n, MINIMUM_COUNT_TO_TEST)) {} + + void add(double x, double n) { + if (n > 0.0) { + ++m_Count; + m_Min.add(x - m_Level); + m_Max.add(x - m_Level); } + } - double amplitude() const - { - if (this->count() >= MINIMUM_COUNT_TO_TEST) - { - return std::max(std::max(-m_Min.biggest(), 0.0), - std::max( m_Max.biggest(), 0.0)); - } + double amplitude() const { + if (this->count() >= MINIMUM_COUNT_TO_TEST) { + return std::max(std::max(-m_Min.biggest(), 0.0), std::max(m_Max.biggest(), 0.0)); + } + return 0.0; + } + + double significance(const boost::math::normal& normal) const { + if (this->count() < MINIMUM_COUNT_TO_TEST) { + return 1.0; + } + + double F{2.0 * CTools::safeCdf(normal, -this->amplitude())}; + if (F == 0.0) { return 0.0; } - double significance(const boost::math::normal &normal) const - { - if (this->count() < MINIMUM_COUNT_TO_TEST) - { - return 1.0; - } + double n{static_cast(this->count())}; + boost::math::binomial binomial(static_cast(m_Count), F); + return CTools::safeCdfComplement(binomial, n - 1.0); + } - double F{2.0 * CTools::safeCdf(normal, -this->amplitude())}; - if (F == 0.0) - { - return 0.0; - } +private: + using TMinAccumulator = CBasicStatistics::COrderStatisticsHeap; + using TMaxAccumulator = CBasicStatistics::COrderStatisticsHeap>; - double n{static_cast(this->count())}; - boost::math::binomial binomial(static_cast(m_Count), F); - return CTools::safeCdfComplement(binomial, n - 1.0); - } +private: + std::size_t count() const { return m_Min.count(); } + +private: + //! The minimum number of repeats for which we'll test. + static const std::size_t MINIMUM_COUNT_TO_TEST; - private: - using TMinAccumulator = CBasicStatistics::COrderStatisticsHeap; - using TMaxAccumulator = CBasicStatistics::COrderStatisticsHeap>; - - private: - std::size_t count() const { return m_Min.count(); } - - private: - //! The minimum number of repeats for which we'll test. - static const std::size_t MINIMUM_COUNT_TO_TEST; - - private: - //! The mean of the trend. - double m_Level; - //! The total count of values added. - std::size_t m_Count; - //! The smallest values. - TMinAccumulator m_Min; - //! The largest values. - TMaxAccumulator m_Max; +private: + //! The mean of the trend. + double m_Level; + //! The total count of values added. + std::size_t m_Count; + //! The smallest values. + TMinAccumulator m_Min; + //! The largest values. + TMaxAccumulator m_Max; }; const std::size_t CMinAmplitude::MINIMUM_COUNT_TO_TEST{4}; @@ -135,11 +119,8 @@ using TMinAmplitudeVec = std::vector; //! \brief Holds the relevant summary for choosing between alternative //! (non-nested) hypotheses. -struct SHypothesisSummary -{ - SHypothesisSummary(double v, double DF, const CPeriodicityHypothesisTestsResult &H) : - s_V(v), s_DF(DF), s_H(H) - {} +struct SHypothesisSummary { + SHypothesisSummary(double v, double DF, const CPeriodicityHypothesisTestsResult& H) : s_V(v), s_DF(DF), s_H(H) {} double s_V; double s_DF; @@ -148,8 +129,7 @@ struct SHypothesisSummary using THypothesisSummaryVec = std::vector; -enum EDiurnalComponents -{ +enum EDiurnalComponents { E_WeekendDay, E_WeekendWeek, E_WeekdayDay, @@ -160,11 +140,7 @@ enum EDiurnalComponents using TComponent4Vec = core::CSmallVector; -enum EThreshold -{ - E_LowThreshold, - E_HighThreshold -}; +enum EThreshold { E_LowThreshold, E_HighThreshold }; // Copy constants into scope. const core_t::TTime DAY{core::constants::DAY}; @@ -173,17 +149,9 @@ const core_t::TTime WEEK{core::constants::WEEK}; //! The periods of the diurnal components. const core_t::TTime DIURNAL_PERIODS[]{DAY, WEEK}; //! The weekend/day windows. -const TTimeTimePr DIURNAL_WINDOWS[]{{0, WEEKEND}, {WEEKEND, WEEK}, {0, WEEK}}; +const TTimeTimePr DIURNAL_WINDOWS[]{{0, WEEKEND}, {WEEKEND, WEEK}, {0, WEEK}}; //! The names of the the diurnal periodic components. -const std::string DIURNAL_COMPONENT_NAMES[] = - { - "weekend daily", - "weekend weekly", - "weekday daily", - "weekday weekly", - "daily", - "weekly" - }; +const std::string DIURNAL_COMPONENT_NAMES[] = {"weekend daily", "weekend weekly", "weekday daily", "weekday weekly", "daily", "weekly"}; //! The confidence interval used for test statistic values. const double CONFIDENCE_INTERVAL{80.0}; @@ -191,21 +159,18 @@ const double CONFIDENCE_INTERVAL{80.0}; double HIGH_PRIORITY{2.0}; //! Fit and remove a linear trend from \p values. -void removeLinearTrend(TFloatMeanAccumulatorVec &values) -{ +void removeLinearTrend(TFloatMeanAccumulatorVec& values) { using TRegression = CRegression::CLeastSquaresOnline<1, double>; TRegression trend; double time{0.0}; double dt{10.0 / static_cast(values.size())}; - for (const auto &value : values) - { + for (const auto& value : values) { trend.add(time, CBasicStatistics::mean(value), CBasicStatistics::count(value)); time += dt; } time = dt / 2.0; - for (auto &value : values) - { + for (auto& value : values) { CBasicStatistics::moment<0>(value) -= trend.predict(time); time += dt; } @@ -214,105 +179,73 @@ void removeLinearTrend(TFloatMeanAccumulatorVec &values) //! Get the correction to apply to the partition variance test //! statistic if there are \p bucketsPerRepeat buckets in the //! in one repeat of the partitioning pattern. -double weekendPartitionVarianceCorrection(std::size_t bucketsPerWeek) -{ +double weekendPartitionVarianceCorrection(std::size_t bucketsPerWeek) { static const std::size_t BUCKETS_PER_WEEK[]{7, 14, 21, 28, 42, 56, 84, 168}; static const double CORRECTIONS[]{1.0, 1.0, 1.0, 1.12, 1.31, 1.31, 1.31, 1.31}; - std::ptrdiff_t index{std::min( std::lower_bound(boost::begin(BUCKETS_PER_WEEK), - boost::end(BUCKETS_PER_WEEK), - bucketsPerWeek) - - boost::begin(BUCKETS_PER_WEEK), - std::ptrdiff_t(boost::size(BUCKETS_PER_WEEK) - 1))}; + std::ptrdiff_t index{std::min( + std::lower_bound(boost::begin(BUCKETS_PER_WEEK), boost::end(BUCKETS_PER_WEEK), bucketsPerWeek) - boost::begin(BUCKETS_PER_WEEK), + std::ptrdiff_t(boost::size(BUCKETS_PER_WEEK) - 1))}; return CORRECTIONS[index]; } //! Compute the \p percentage % variance for a chi-squared random //! variance with \p df degrees of freedom. -double varianceAtPercentile(double variance, double df, double percentage) -{ - try - { +double varianceAtPercentile(double variance, double df, double percentage) { + try { boost::math::chi_squared chi(df); return boost::math::quantile(chi, percentage / 100.0) / df * variance; - } - catch (const std::exception &e) - { - LOG_ERROR("Bad input: " << e.what() - << ", df = " << df - << ", percentage = " << percentage); - } + } catch (const std::exception& e) { LOG_ERROR("Bad input: " << e.what() << ", df = " << df << ", percentage = " << percentage); } return variance; } //! Compute the \p percentage % autocorrelation for a F distributed //! random autocorrelation with parameters \p n - 1 and \p n - 1. -double autocorrelationAtPercentile(double autocorrelation, double n, double percentage) -{ - try - { +double autocorrelationAtPercentile(double autocorrelation, double n, double percentage) { + try { boost::math::fisher_f f(n - 1.0, n - 1.0); return boost::math::quantile(f, percentage / 100.0) * autocorrelation; - } - catch (const std::exception &e) - { - LOG_ERROR("Bad input: " << e.what() - << ", n = " << n - << ", percentage = " << percentage); - } + } catch (const std::exception& e) { LOG_ERROR("Bad input: " << e.what() << ", n = " << n << ", percentage = " << percentage); } return autocorrelation; } //! Get the length of the \p window. template -T length(const std::pair &window) -{ +T length(const std::pair& window) { return window.second - window.first; } //! Get the total length of the \p windows. template -T length(const core::CSmallVector, 2> &windows) -{ - return std::accumulate(windows.begin(), windows.end(), 0, - [](core_t::TTime length_, const TTimeTimePr &window) - { return length_ + length(window); }); +T length(const core::CSmallVector, 2>& windows) { + return std::accumulate( + windows.begin(), windows.end(), 0, [](core_t::TTime length_, const TTimeTimePr& window) { return length_ + length(window); }); } //! Get the length of \p buckets. template -core_t::TTime length(const T &buckets, core_t::TTime bucketLength) -{ +core_t::TTime length(const T& buckets, core_t::TTime bucketLength) { return static_cast(buckets.size()) * bucketLength; } //! Compute the windows at repeat \p repeat with length \p length. -TTimeTimePr2Vec calculateWindows(core_t::TTime startOfWeek, - core_t::TTime window, - core_t::TTime repeat, - const TTimeTimePr &interval) -{ +TTimeTimePr2Vec calculateWindows(core_t::TTime startOfWeek, core_t::TTime window, core_t::TTime repeat, const TTimeTimePr& interval) { core_t::TTime a{startOfWeek + interval.first}; core_t::TTime b{startOfWeek + window}; core_t::TTime l{length(interval)}; TTimeTimePr2Vec result; result.reserve((b - a) / repeat); - for (core_t::TTime time = a; time < b; time += repeat) - { + for (core_t::TTime time = a; time < b; time += repeat) { result.emplace_back(time, time + l); } return result; } //! Get the index ranges corresponding to \p windows. -std::size_t calculateIndexWindows(const TTimeTimePr2Vec &windows, - core_t::TTime bucketLength, - TSizeSizePr2Vec &result) -{ +std::size_t calculateIndexWindows(const TTimeTimePr2Vec& windows, core_t::TTime bucketLength, TSizeSizePr2Vec& result) { std::size_t l(0); result.reserve(windows.size()); - for (const auto &window : windows) - { - core_t::TTime a{window.first / bucketLength}; + for (const auto& window : windows) { + core_t::TTime a{window.first / bucketLength}; core_t::TTime b{window.second / bucketLength}; result.emplace_back(a, b); l += b - a; @@ -321,25 +254,21 @@ std::size_t calculateIndexWindows(const TTimeTimePr2Vec &windows, } //! Compute the projection of \p values to \p windows. -void project(const TFloatMeanAccumulatorVec &values, - const TTimeTimePr2Vec &windows_, +void project(const TFloatMeanAccumulatorVec& values, + const TTimeTimePr2Vec& windows_, core_t::TTime bucketLength, - TFloatMeanAccumulatorVec &result) -{ + TFloatMeanAccumulatorVec& result) { result.clear(); - if (!values.empty()) - { + if (!values.empty()) { TSizeSizePr2Vec windows; calculateIndexWindows(windows_, bucketLength, windows); result.reserve(length(windows)); std::size_t n{values.size()}; - for (std::size_t i = 0u; i < windows.size(); ++i) - { + for (std::size_t i = 0u; i < windows.size(); ++i) { std::size_t a{windows[i].first}; std::size_t b{windows[i].second}; - for (std::size_t j = a; j < b; ++j) - { - const TFloatMeanAccumulator &value{values[j % n]}; + for (std::size_t j = a; j < b; ++j) { + const TFloatMeanAccumulator& value{values[j % n]}; result.push_back(value); } } @@ -348,63 +277,45 @@ void project(const TFloatMeanAccumulatorVec &values, //! Compute the periodic trend from \p values falling in \p windows. template -void periodicTrend(const U &values, - const TSizeSizePr2Vec &windows_, - core_t::TTime bucketLength, - V &trend) -{ - if (!trend.empty()) - { +void periodicTrend(const U& values, const TSizeSizePr2Vec& windows_, core_t::TTime bucketLength, V& trend) { + if (!trend.empty()) { TSizeSizePr2Vec windows; calculateIndexWindows(windows_, bucketLength, windows); std::size_t period{trend.size()}; std::size_t n{values.size()}; - for (std::size_t i = 0u; i < windows.size(); ++i) - { + for (std::size_t i = 0u; i < windows.size(); ++i) { std::size_t a{windows[i].first}; std::size_t b{windows[i].second}; - for (std::size_t j = a; j < b; ++j) - { - const TFloatMeanAccumulator &value{values[j % n]}; - trend[(j - a) % period].add(CBasicStatistics::mean(value), - CBasicStatistics::count(value)); + for (std::size_t j = a; j < b; ++j) { + const TFloatMeanAccumulator& value{values[j % n]}; + trend[(j - a) % period].add(CBasicStatistics::mean(value), CBasicStatistics::count(value)); } } } } //! Compute the average of the values at \p times. -void averageValue(const TFloatMeanAccumulatorVec &values, - const TTimeVec ×, - core_t::TTime bucketLength, - TMeanVarAccumulator &value) -{ - for (const auto time : times) - { +void averageValue(const TFloatMeanAccumulatorVec& values, const TTimeVec& times, core_t::TTime bucketLength, TMeanVarAccumulator& value) { + for (const auto time : times) { std::size_t index(time / bucketLength); - value.add(CBasicStatistics::mean(values[index]), - CBasicStatistics::count(values[index])); + value.add(CBasicStatistics::mean(values[index]), CBasicStatistics::count(values[index])); } } //! Get the maximum residual of \p trend. template -double trendAmplitude(const T &trend) -{ +double trendAmplitude(const T& trend) { using TMaxAccumulator = CBasicStatistics::SMax::TAccumulator; TMeanAccumulator level; - for (const auto &bucket : trend) - { + for (const auto& bucket : trend) { level.add(mean(bucket), count(bucket)); } TMaxAccumulator result; result.add(0.0); - for (const auto &bucket : trend) - { - if (count(bucket) > 0.0) - { + for (const auto& bucket : trend) { + if (count(bucket) > 0.0) { result.add(std::fabs(mean(bucket) - CBasicStatistics::mean(level))); } } @@ -414,256 +325,190 @@ double trendAmplitude(const T &trend) //! Extract the residual variance from the mean of a collection //! of residual variances. -double residualVariance(const TMeanAccumulator &mean) -{ +double residualVariance(const TMeanAccumulator& mean) { double n{CBasicStatistics::count(mean)}; return n <= 1.0 ? 0.0 : n / (n - 1.0) * std::max(CBasicStatistics::mean(mean), 0.0); } //! Extract the residual variance of \p bucket of a trend. -TMeanAccumulator residualVariance(const TMeanVarAccumulator &bucket, - double scale) -{ - return CBasicStatistics::accumulator(scale * CBasicStatistics::count(bucket), - CBasicStatistics::maximumLikelihoodVariance(bucket)); +TMeanAccumulator residualVariance(const TMeanVarAccumulator& bucket, double scale) { + return CBasicStatistics::accumulator(scale * CBasicStatistics::count(bucket), CBasicStatistics::maximumLikelihoodVariance(bucket)); } //! \brief Partially specialized helper class to get the trend //! residual variance as a specified type. -template struct SResidualVarianceImpl {}; +template +struct SResidualVarianceImpl {}; //! \brief Get the residual variance as a double. template<> -struct SResidualVarianceImpl -{ - static double get(const TMeanAccumulator &mean) - { - return residualVariance(mean); - } +struct SResidualVarianceImpl { + static double get(const TMeanAccumulator& mean) { return residualVariance(mean); } }; //! \brief Get the residual variance as a mean accumulator. template<> -struct SResidualVarianceImpl -{ - static TMeanAccumulator get(const TMeanAccumulator &mean) - { - return mean; - } +struct SResidualVarianceImpl { + static TMeanAccumulator get(const TMeanAccumulator& mean) { return mean; } }; //! Compute the residual variance of the trend \p trend. template -R residualVariance(const T &trend, double scale) -{ +R residualVariance(const T& trend, double scale) { TMeanAccumulator result; - for (const auto &bucket : trend) - { - result.add(CBasicStatistics::maximumLikelihoodVariance(bucket), - CBasicStatistics::count(bucket)); + for (const auto& bucket : trend) { + result.add(CBasicStatistics::maximumLikelihoodVariance(bucket), CBasicStatistics::count(bucket)); } result.s_Count *= scale; return SResidualVarianceImpl::get(result); } - } -bool CPeriodicityHypothesisTestsResult::operator==(const CPeriodicityHypothesisTestsResult &other) const -{ +bool CPeriodicityHypothesisTestsResult::operator==(const CPeriodicityHypothesisTestsResult& other) const { return m_Components == other.m_Components; } -const CPeriodicityHypothesisTestsResult & -CPeriodicityHypothesisTestsResult::operator+=(const CPeriodicityHypothesisTestsResult &other) -{ - m_Components.insert(m_Components.end(), - other.m_Components.begin(), - other.m_Components.end()); +const CPeriodicityHypothesisTestsResult& CPeriodicityHypothesisTestsResult::operator+=(const CPeriodicityHypothesisTestsResult& other) { + m_Components.insert(m_Components.end(), other.m_Components.begin(), other.m_Components.end()); return *this; } -void CPeriodicityHypothesisTestsResult::add(const std::string &description, +void CPeriodicityHypothesisTestsResult::add(const std::string& description, bool diurnal, core_t::TTime startOfPartition, core_t::TTime period, - const TTimeTimePr &window, - double precedence) -{ + const TTimeTimePr& window, + double precedence) { m_Components.emplace_back(description, diurnal, startOfPartition, period, window, precedence); } -void CPeriodicityHypothesisTestsResult::remove(const std::string &description) -{ - auto i = std::find_if(m_Components.begin(), m_Components.end(), - [&description](const SComponent &component) - { - return component.s_Description == description; - }); - if (i != m_Components.end()) - { +void CPeriodicityHypothesisTestsResult::remove(const std::string& description) { + auto i = std::find_if(m_Components.begin(), m_Components.end(), [&description](const SComponent& component) { + return component.s_Description == description; + }); + if (i != m_Components.end()) { m_Components.erase(i); } } -bool CPeriodicityHypothesisTestsResult::periodic() const -{ +bool CPeriodicityHypothesisTestsResult::periodic() const { return m_Components.size() > 0; } -const CPeriodicityHypothesisTestsResult::TComponent5Vec & -CPeriodicityHypothesisTestsResult::components() const -{ +const CPeriodicityHypothesisTestsResult::TComponent5Vec& CPeriodicityHypothesisTestsResult::components() const { return m_Components; } -std::string CPeriodicityHypothesisTestsResult::print() const -{ +std::string CPeriodicityHypothesisTestsResult::print() const { std::string result("{"); - for (const auto &component : m_Components) - { - result += " '" + component.s_Description + "'"; + for (const auto& component : m_Components) { + result += " '" + component.s_Description + "'"; } result += " }"; return result; } -CPeriodicityHypothesisTestsResult::SComponent::SComponent() : - s_Description(""), - s_Diurnal(false), - s_StartOfPartition(0), - s_Period(0), - s_Precedence(0.0) -{} +CPeriodicityHypothesisTestsResult::SComponent::SComponent() + : s_Description(""), s_Diurnal(false), s_StartOfPartition(0), s_Period(0), s_Precedence(0.0) { +} -CPeriodicityHypothesisTestsResult::SComponent::SComponent(const std::string &description, +CPeriodicityHypothesisTestsResult::SComponent::SComponent(const std::string& description, bool diurnal, core_t::TTime startOfPartition, core_t::TTime period, - const TTimeTimePr &window, - double precedence) : - s_Description(description), - s_Diurnal(diurnal), - s_StartOfPartition(startOfPartition), - s_Period(period), - s_Window(window), - s_Precedence(precedence) -{} - -bool CPeriodicityHypothesisTestsResult::SComponent::operator==(const SComponent &other) const -{ - return s_Description == other.s_Description - && s_StartOfPartition == other.s_StartOfPartition; -} - -CSeasonalTime *CPeriodicityHypothesisTestsResult::SComponent::seasonalTime() const -{ - if (s_Diurnal) - { - return new CDiurnalTime(s_StartOfPartition, - s_Window.first, - s_Window.second, - s_Period, s_Precedence); + const TTimeTimePr& window, + double precedence) + : s_Description(description), + s_Diurnal(diurnal), + s_StartOfPartition(startOfPartition), + s_Period(period), + s_Window(window), + s_Precedence(precedence) { +} + +bool CPeriodicityHypothesisTestsResult::SComponent::operator==(const SComponent& other) const { + return s_Description == other.s_Description && s_StartOfPartition == other.s_StartOfPartition; +} + +CSeasonalTime* CPeriodicityHypothesisTestsResult::SComponent::seasonalTime() const { + if (s_Diurnal) { + return new CDiurnalTime(s_StartOfPartition, s_Window.first, s_Window.second, s_Period, s_Precedence); } return new CGeneralPeriodTime(s_Period, s_Precedence); } +CPeriodicityHypothesisTestsConfig::CPeriodicityHypothesisTestsConfig() + : m_TestForDiurnal(true), m_HasDaily(false), m_HasWeekend(false), m_HasWeekly(false), m_StartOfWeek(0) { +} -CPeriodicityHypothesisTestsConfig::CPeriodicityHypothesisTestsConfig() : - m_TestForDiurnal(true), - m_HasDaily(false), - m_HasWeekend(false), - m_HasWeekly(false), - m_StartOfWeek(0) -{} - -void CPeriodicityHypothesisTestsConfig::disableDiurnal() -{ +void CPeriodicityHypothesisTestsConfig::disableDiurnal() { m_TestForDiurnal = false; } -void CPeriodicityHypothesisTestsConfig::hasDaily(bool value) -{ +void CPeriodicityHypothesisTestsConfig::hasDaily(bool value) { m_HasDaily = value; } -void CPeriodicityHypothesisTestsConfig::hasWeekend(bool value) -{ +void CPeriodicityHypothesisTestsConfig::hasWeekend(bool value) { m_HasWeekend = value; } -void CPeriodicityHypothesisTestsConfig::hasWeekly(bool value) -{ +void CPeriodicityHypothesisTestsConfig::hasWeekly(bool value) { m_HasWeekly = value; } -void CPeriodicityHypothesisTestsConfig::startOfWeek(core_t::TTime value) -{ +void CPeriodicityHypothesisTestsConfig::startOfWeek(core_t::TTime value) { m_StartOfWeek = value; } -bool CPeriodicityHypothesisTestsConfig::testForDiurnal() const -{ +bool CPeriodicityHypothesisTestsConfig::testForDiurnal() const { return m_TestForDiurnal; } -bool CPeriodicityHypothesisTestsConfig::hasDaily() const -{ +bool CPeriodicityHypothesisTestsConfig::hasDaily() const { return m_HasDaily; } -bool CPeriodicityHypothesisTestsConfig::hasWeekend() const -{ +bool CPeriodicityHypothesisTestsConfig::hasWeekend() const { return m_HasWeekend; } -bool CPeriodicityHypothesisTestsConfig::hasWeekly() const -{ +bool CPeriodicityHypothesisTestsConfig::hasWeekly() const { return m_HasWeekly; } -core_t::TTime CPeriodicityHypothesisTestsConfig::startOfWeek() const -{ +core_t::TTime CPeriodicityHypothesisTestsConfig::startOfWeek() const { return m_StartOfWeek; } +CPeriodicityHypothesisTests::CPeriodicityHypothesisTests() : m_BucketLength(0), m_WindowLength(0), m_Period(0) { +} +CPeriodicityHypothesisTests::CPeriodicityHypothesisTests(const CPeriodicityHypothesisTestsConfig& config) + : m_Config(config), m_BucketLength(0), m_WindowLength(0), m_Period(0) { +} -CPeriodicityHypothesisTests::CPeriodicityHypothesisTests() : - m_BucketLength(0), m_WindowLength(0), m_Period(0) -{} -CPeriodicityHypothesisTests::CPeriodicityHypothesisTests(const CPeriodicityHypothesisTestsConfig &config) : - m_Config(config), m_BucketLength(0), m_WindowLength(0), m_Period(0) -{} - -bool CPeriodicityHypothesisTests::initialized() const -{ +bool CPeriodicityHypothesisTests::initialized() const { return m_BucketValues.size() > 0; } -void CPeriodicityHypothesisTests::initialize(core_t::TTime bucketLength, - core_t::TTime windowLength, - core_t::TTime period) -{ +void CPeriodicityHypothesisTests::initialize(core_t::TTime bucketLength, core_t::TTime windowLength, core_t::TTime period) { m_BucketLength = bucketLength; m_WindowLength = windowLength; m_BucketValues.resize(static_cast(windowLength / m_BucketLength)); m_Period = period; } -void CPeriodicityHypothesisTests::add(core_t::TTime time, double value, double weight) -{ - if (!m_BucketValues.empty()) - { +void CPeriodicityHypothesisTests::add(core_t::TTime time, double value, double weight) { + if (!m_BucketValues.empty()) { std::size_t i((time % m_WindowLength) / m_BucketLength); m_BucketValues[i].add(value, weight); - if (weight > 0.0) - { + if (weight > 0.0) { m_TimeRange.add(time); } } } -CPeriodicityHypothesisTestsResult CPeriodicityHypothesisTests::test() const -{ +CPeriodicityHypothesisTestsResult CPeriodicityHypothesisTests::test() const { // We perform a series of tests of nested hypotheses about // the periodic components and weekday/end patterns. To test // for periodic components we compare the residual variance @@ -680,26 +525,21 @@ CPeriodicityHypothesisTestsResult CPeriodicityHypothesisTests::test() const // removing any periodic component we've already found from the // data. - if (!this->initialized()) - { + if (!this->initialized()) { return CPeriodicityHypothesisTestsResult(); } - auto window = [this](core_t::TTime period) - { - std::size_t bucketsPerPeriod(period / m_BucketLength); - std::size_t repeats{bucketsPerPeriod == 0 ? - 0 : m_BucketValues.size() / bucketsPerPeriod}; - core_t::TTime windowLength{static_cast(repeats) * period}; - return TTimeTimePr2Vec{{0, windowLength}}; - }; - auto buckets = [this](core_t::TTime period) - { - std::size_t bucketsPerPeriod(period / m_BucketLength); - std::size_t repeats{bucketsPerPeriod == 0 ? - 0 : m_BucketValues.size() / bucketsPerPeriod}; - return bucketsPerPeriod * repeats; - }; + auto window = [this](core_t::TTime period) { + std::size_t bucketsPerPeriod(period / m_BucketLength); + std::size_t repeats{bucketsPerPeriod == 0 ? 0 : m_BucketValues.size() / bucketsPerPeriod}; + core_t::TTime windowLength{static_cast(repeats) * period}; + return TTimeTimePr2Vec{{0, windowLength}}; + }; + auto buckets = [this](core_t::TTime period) { + std::size_t bucketsPerPeriod(period / m_BucketLength); + std::size_t repeats{bucketsPerPeriod == 0 ? 0 : m_BucketValues.size() / bucketsPerPeriod}; + return bucketsPerPeriod * repeats; + }; TFloatMeanAccumulatorVec detrendedBucketValues(m_BucketValues); removeLinearTrend(detrendedBucketValues); @@ -707,10 +547,8 @@ CPeriodicityHypothesisTestsResult CPeriodicityHypothesisTests::test() const TTimeTimePr2Vec windowForTestingDaily(window(DAY)); TTimeTimePr2Vec windowForTestingWeekly(window(WEEK)); TTimeTimePr2Vec windowForTestingPeriod(window(m_Period)); - TFloatMeanAccumulatorCRng bucketsForTestingDaily[]{{m_BucketValues, 0, buckets(DAY)}, - {detrendedBucketValues, 0, buckets(DAY)}}; - TFloatMeanAccumulatorCRng bucketsForTestingWeekly[]{{m_BucketValues, 0, buckets(WEEK)}, - {detrendedBucketValues, 0, buckets(WEEK)}}; + TFloatMeanAccumulatorCRng bucketsForTestingDaily[]{{m_BucketValues, 0, buckets(DAY)}, {detrendedBucketValues, 0, buckets(DAY)}}; + TFloatMeanAccumulatorCRng bucketsForTestingWeekly[]{{m_BucketValues, 0, buckets(WEEK)}, {detrendedBucketValues, 0, buckets(WEEK)}}; TFloatMeanAccumulatorCRng bucketsForTestingPeriod[]{{m_BucketValues, 0, buckets(m_Period)}, {detrendedBucketValues, 0, buckets(m_Period)}}; @@ -721,31 +559,17 @@ CPeriodicityHypothesisTestsResult CPeriodicityHypothesisTests::test() const TNestedHypothesesVec hypotheses; - for (std::size_t i : {0, 1}) - { + for (std::size_t i : {0, 1}) { TNestedHypothesesVec hypotheses_; - if (this->seenSufficientDataToTest(WEEK, bucketsForTestingWeekly[i])) - { - this->hypothesesForWeekly(windowForTestingWeekly, - bucketsForTestingWeekly[i], - windowForTestingPeriod, - bucketsForTestingPeriod[i], - hypotheses_); - } - else if (this->seenSufficientDataToTest(DAY, bucketsForTestingDaily[i])) - { - this->hypothesesForDaily(windowForTestingDaily, - bucketsForTestingDaily[i], - windowForTestingPeriod, - bucketsForTestingPeriod[i], - hypotheses_); - } - else if (this->seenSufficientDataToTest(m_Period, bucketsForTestingPeriod[i])) - { - this->hypothesesForPeriod(windowForTestingPeriod, - bucketsForTestingPeriod[i], - hypotheses_); + if (this->seenSufficientDataToTest(WEEK, bucketsForTestingWeekly[i])) { + this->hypothesesForWeekly( + windowForTestingWeekly, bucketsForTestingWeekly[i], windowForTestingPeriod, bucketsForTestingPeriod[i], hypotheses_); + } else if (this->seenSufficientDataToTest(DAY, bucketsForTestingDaily[i])) { + this->hypothesesForDaily( + windowForTestingDaily, bucketsForTestingDaily[i], windowForTestingPeriod, bucketsForTestingPeriod[i], hypotheses_); + } else if (this->seenSufficientDataToTest(m_Period, bucketsForTestingPeriod[i])) { + this->hypothesesForPeriod(windowForTestingPeriod, bucketsForTestingPeriod[i], hypotheses_); } hypotheses.insert(hypotheses.end(), hypotheses_.begin(), hypotheses_.end()); @@ -754,265 +578,272 @@ CPeriodicityHypothesisTestsResult CPeriodicityHypothesisTests::test() const return this->best(hypotheses); } -void CPeriodicityHypothesisTests::hypothesesForWeekly(const TTimeTimePr2Vec &windowForTestingWeekly, - const TFloatMeanAccumulatorCRng &bucketsForTestingWeekly, - const TTimeTimePr2Vec &windowForTestingPeriod, - const TFloatMeanAccumulatorCRng &bucketsForTestingPeriod, - TNestedHypothesesVec &hypotheses) const -{ - if (WEEK % m_Period == 0) - { - auto testForNull = boost::bind(&CPeriodicityHypothesisTests::testForNull, - this, boost::cref(windowForTestingWeekly), - boost::cref(bucketsForTestingWeekly), _1); +void CPeriodicityHypothesisTests::hypothesesForWeekly(const TTimeTimePr2Vec& windowForTestingWeekly, + const TFloatMeanAccumulatorCRng& bucketsForTestingWeekly, + const TTimeTimePr2Vec& windowForTestingPeriod, + const TFloatMeanAccumulatorCRng& bucketsForTestingPeriod, + TNestedHypothesesVec& hypotheses) const { + if (WEEK % m_Period == 0) { + auto testForNull = boost::bind( + &CPeriodicityHypothesisTests::testForNull, this, boost::cref(windowForTestingWeekly), boost::cref(bucketsForTestingWeekly), _1); auto testForPeriod = boost::bind(&CPeriodicityHypothesisTests::testForPeriod, - this, boost::cref(windowForTestingWeekly), - boost::cref(bucketsForTestingWeekly), _1); + this, + boost::cref(windowForTestingWeekly), + boost::cref(bucketsForTestingWeekly), + _1); auto testForDaily = boost::bind(&CPeriodicityHypothesisTests::testForDaily, - this, boost::cref(windowForTestingWeekly), - boost::cref(bucketsForTestingWeekly), _1); + this, + boost::cref(windowForTestingWeekly), + boost::cref(bucketsForTestingWeekly), + _1); auto testForWeekly = boost::bind(&CPeriodicityHypothesisTests::testForWeekly, - this, boost::cref(windowForTestingWeekly), - boost::cref(bucketsForTestingWeekly), _1); - auto testForDailyWithWeekend = boost::bind(&CPeriodicityHypothesisTests::testForDailyWithWeekend, - this, boost::cref(bucketsForTestingWeekly), _1); + this, + boost::cref(windowForTestingWeekly), + boost::cref(bucketsForTestingWeekly), + _1); + auto testForDailyWithWeekend = + boost::bind(&CPeriodicityHypothesisTests::testForDailyWithWeekend, this, boost::cref(bucketsForTestingWeekly), _1); auto testForWeeklyGivenWeekend = boost::bind(&CPeriodicityHypothesisTests::testForWeeklyGivenDailyWithWeekend, - this, boost::cref(windowForTestingWeekly), - boost::cref(bucketsForTestingWeekly), _1); + this, + boost::cref(windowForTestingWeekly), + boost::cref(bucketsForTestingWeekly), + _1); hypotheses.resize(1); - if (DAY % m_Period == 0) - { - hypotheses[0].null(testForNull) - .addNested(testForPeriod) - .addNested(testForDaily) - .addNested(testForDailyWithWeekend) - .addNested(testForWeeklyGivenWeekend) - .finishedNested() - .addAlternative(testForWeekly) - .finishedNested() - .finishedNested() - .addAlternative(testForDaily) - .addNested(testForDailyWithWeekend) - .addNested(testForWeeklyGivenWeekend) - .finishedNested() - .addAlternative(testForWeekly) - .finishedNested() - .addAlternative(testForDailyWithWeekend) - .addNested(testForWeeklyGivenWeekend) - .finishedNested() - .addAlternative(testForWeekly); - } - else - { - hypotheses[0].null(testForNull) - .addNested(testForDaily) - .addNested(testForDailyWithWeekend) - .addNested(testForWeeklyGivenWeekend) - .finishedNested() - .addAlternative(testForWeekly) - .finishedNested() - .addAlternative(testForDailyWithWeekend) - .addNested(testForWeeklyGivenWeekend) - .finishedNested() - .addAlternative(testForPeriod) - .addNested(testForWeekly) - .finishedNested() - .addAlternative(testForWeekly); + if (DAY % m_Period == 0) { + hypotheses[0] + .null(testForNull) + .addNested(testForPeriod) + .addNested(testForDaily) + .addNested(testForDailyWithWeekend) + .addNested(testForWeeklyGivenWeekend) + .finishedNested() + .addAlternative(testForWeekly) + .finishedNested() + .finishedNested() + .addAlternative(testForDaily) + .addNested(testForDailyWithWeekend) + .addNested(testForWeeklyGivenWeekend) + .finishedNested() + .addAlternative(testForWeekly) + .finishedNested() + .addAlternative(testForDailyWithWeekend) + .addNested(testForWeeklyGivenWeekend) + .finishedNested() + .addAlternative(testForWeekly); + } else { + hypotheses[0] + .null(testForNull) + .addNested(testForDaily) + .addNested(testForDailyWithWeekend) + .addNested(testForWeeklyGivenWeekend) + .finishedNested() + .addAlternative(testForWeekly) + .finishedNested() + .addAlternative(testForDailyWithWeekend) + .addNested(testForWeeklyGivenWeekend) + .finishedNested() + .addAlternative(testForPeriod) + .addNested(testForWeekly) + .finishedNested() + .addAlternative(testForWeekly); } - } - else if (m_Period % WEEK == 0) - { - auto testForNull = boost::bind(&CPeriodicityHypothesisTests::testForNull, - this, boost::cref(windowForTestingPeriod), - boost::cref(bucketsForTestingPeriod), _1); + } else if (m_Period % WEEK == 0) { + auto testForNull = boost::bind( + &CPeriodicityHypothesisTests::testForNull, this, boost::cref(windowForTestingPeriod), boost::cref(bucketsForTestingPeriod), _1); auto testForPeriod = boost::bind(&CPeriodicityHypothesisTests::testForPeriod, - this, boost::cref(windowForTestingPeriod), - boost::cref(bucketsForTestingPeriod), _1); + this, + boost::cref(windowForTestingPeriod), + boost::cref(bucketsForTestingPeriod), + _1); auto testForDaily = boost::bind(&CPeriodicityHypothesisTests::testForDaily, - this, boost::cref(windowForTestingPeriod), - boost::cref(bucketsForTestingPeriod), _1); + this, + boost::cref(windowForTestingPeriod), + boost::cref(bucketsForTestingPeriod), + _1); auto testForWeekly = boost::bind(&CPeriodicityHypothesisTests::testForWeekly, - this, boost::cref(windowForTestingPeriod), - boost::cref(bucketsForTestingPeriod), _1); - auto testForDailyWithWeekend = boost::bind(&CPeriodicityHypothesisTests::testForDailyWithWeekend, - this, boost::cref(bucketsForTestingPeriod), _1); + this, + boost::cref(windowForTestingPeriod), + boost::cref(bucketsForTestingPeriod), + _1); + auto testForDailyWithWeekend = + boost::bind(&CPeriodicityHypothesisTests::testForDailyWithWeekend, this, boost::cref(bucketsForTestingPeriod), _1); auto testForWeeklyGivenWeekend = boost::bind(&CPeriodicityHypothesisTests::testForWeeklyGivenDailyWithWeekend, - this, boost::cref(windowForTestingPeriod), - boost::cref(bucketsForTestingPeriod), _1); + this, + boost::cref(windowForTestingPeriod), + boost::cref(bucketsForTestingPeriod), + _1); hypotheses.resize(1); - hypotheses[0].null(testForNull) - .addNested(testForDaily) - .addNested(testForDailyWithWeekend) - .addNested(testForWeeklyGivenWeekend) - .addNested(testForPeriod) - .finishedNested() - .finishedNested() - .addAlternative(testForWeekly) - .addNested(testForPeriod) - .finishedNested() - .finishedNested() - .addAlternative(testForDailyWithWeekend) - .addNested(testForWeeklyGivenWeekend) - .addNested(testForPeriod) - .finishedNested() - .finishedNested() - .addAlternative(testForWeekly) - .addNested(testForPeriod) - .finishedNested() - .addAlternative(testForPeriod); - } - else - { + hypotheses[0] + .null(testForNull) + .addNested(testForDaily) + .addNested(testForDailyWithWeekend) + .addNested(testForWeeklyGivenWeekend) + .addNested(testForPeriod) + .finishedNested() + .finishedNested() + .addAlternative(testForWeekly) + .addNested(testForPeriod) + .finishedNested() + .finishedNested() + .addAlternative(testForDailyWithWeekend) + .addNested(testForWeeklyGivenWeekend) + .addNested(testForPeriod) + .finishedNested() + .finishedNested() + .addAlternative(testForWeekly) + .addNested(testForPeriod) + .finishedNested() + .addAlternative(testForPeriod); + } else { { auto testForNull = boost::bind(&CPeriodicityHypothesisTests::testForNull, - this, boost::cref(windowForTestingWeekly), - boost::cref(bucketsForTestingWeekly), _1); + this, + boost::cref(windowForTestingWeekly), + boost::cref(bucketsForTestingWeekly), + _1); auto testForDaily = boost::bind(&CPeriodicityHypothesisTests::testForDaily, - this, boost::cref(windowForTestingWeekly), - boost::cref(bucketsForTestingWeekly), _1); + this, + boost::cref(windowForTestingWeekly), + boost::cref(bucketsForTestingWeekly), + _1); auto testForWeekly = boost::bind(&CPeriodicityHypothesisTests::testForWeekly, - this, boost::cref(windowForTestingWeekly), - boost::cref(bucketsForTestingWeekly), _1); - auto testForDailyWithWeekend = boost::bind(&CPeriodicityHypothesisTests::testForDailyWithWeekend, - this, boost::cref(bucketsForTestingWeekly), _1); + this, + boost::cref(windowForTestingWeekly), + boost::cref(bucketsForTestingWeekly), + _1); + auto testForDailyWithWeekend = + boost::bind(&CPeriodicityHypothesisTests::testForDailyWithWeekend, this, boost::cref(bucketsForTestingWeekly), _1); auto testForWeeklyGivenWeekend = boost::bind(&CPeriodicityHypothesisTests::testForWeeklyGivenDailyWithWeekend, - this, boost::cref(windowForTestingWeekly), - boost::cref(bucketsForTestingWeekly), _1); + this, + boost::cref(windowForTestingWeekly), + boost::cref(bucketsForTestingWeekly), + _1); hypotheses.resize(2); - hypotheses[0].null(testForNull) - .addNested(testForDaily) - .addNested(testForDailyWithWeekend) - .addNested(testForWeeklyGivenWeekend) - .finishedNested() - .addAlternative(testForWeekly) - .finishedNested() - .addAlternative(testForDailyWithWeekend) - .addNested(testForWeeklyGivenWeekend) - .finishedNested() - .addAlternative(testForWeekly); + hypotheses[0] + .null(testForNull) + .addNested(testForDaily) + .addNested(testForDailyWithWeekend) + .addNested(testForWeeklyGivenWeekend) + .finishedNested() + .addAlternative(testForWeekly) + .finishedNested() + .addAlternative(testForDailyWithWeekend) + .addNested(testForWeeklyGivenWeekend) + .finishedNested() + .addAlternative(testForWeekly); } - if (m_Period % DAY == 0) - { + if (m_Period % DAY == 0) { auto testForNull = boost::bind(&CPeriodicityHypothesisTests::testForNull, - this, boost::cref(windowForTestingPeriod), - boost::cref(bucketsForTestingPeriod), _1); + this, + boost::cref(windowForTestingPeriod), + boost::cref(bucketsForTestingPeriod), + _1); auto testForDaily = boost::bind(&CPeriodicityHypothesisTests::testForDaily, - this, boost::cref(windowForTestingPeriod), - boost::cref(bucketsForTestingPeriod), _1); + this, + boost::cref(windowForTestingPeriod), + boost::cref(bucketsForTestingPeriod), + _1); auto testForPeriod = boost::bind(&CPeriodicityHypothesisTests::testForPeriod, - this, boost::cref(windowForTestingPeriod), - boost::cref(bucketsForTestingPeriod), _1); - - hypotheses[1].null(testForNull) - .addNested(testForDaily) - .addNested(testForPeriod) - .finishedNested() - .addAlternative(testForPeriod); - } - else - { + this, + boost::cref(windowForTestingPeriod), + boost::cref(bucketsForTestingPeriod), + _1); + + hypotheses[1].null(testForNull).addNested(testForDaily).addNested(testForPeriod).finishedNested().addAlternative(testForPeriod); + } else { auto testForNull = boost::bind(&CPeriodicityHypothesisTests::testForNull, - this, boost::cref(windowForTestingPeriod), - boost::cref(bucketsForTestingPeriod), _1); + this, + boost::cref(windowForTestingPeriod), + boost::cref(bucketsForTestingPeriod), + _1); auto testForPeriod = boost::bind(&CPeriodicityHypothesisTests::testForPeriod, - this, boost::cref(windowForTestingPeriod), - boost::cref(bucketsForTestingPeriod), _1); + this, + boost::cref(windowForTestingPeriod), + boost::cref(bucketsForTestingPeriod), + _1); - hypotheses[1].null(testForNull) - .addNested(testForPeriod); + hypotheses[1].null(testForNull).addNested(testForPeriod); } } } -void CPeriodicityHypothesisTests::hypothesesForDaily(const TTimeTimePr2Vec &windowForTestingDaily, - const TFloatMeanAccumulatorCRng &bucketsForTestingDaily, - const TTimeTimePr2Vec &windowForTestingPeriod, - const TFloatMeanAccumulatorCRng &bucketsForTestingPeriod, - TNestedHypothesesVec &hypotheses) const -{ - if (DAY % m_Period == 0) - { - auto testForNull = boost::bind(&CPeriodicityHypothesisTests::testForNull, - this, boost::cref(windowForTestingDaily), - boost::cref(bucketsForTestingDaily), _1); - auto testForPeriod = boost::bind(&CPeriodicityHypothesisTests::testForPeriod, - this, boost::cref(windowForTestingDaily), - boost::cref(bucketsForTestingDaily), _1); - auto testForDaily = boost::bind(&CPeriodicityHypothesisTests::testForDaily, - this, boost::cref(windowForTestingDaily), - boost::cref(bucketsForTestingDaily), _1); +void CPeriodicityHypothesisTests::hypothesesForDaily(const TTimeTimePr2Vec& windowForTestingDaily, + const TFloatMeanAccumulatorCRng& bucketsForTestingDaily, + const TTimeTimePr2Vec& windowForTestingPeriod, + const TFloatMeanAccumulatorCRng& bucketsForTestingPeriod, + TNestedHypothesesVec& hypotheses) const { + if (DAY % m_Period == 0) { + auto testForNull = boost::bind( + &CPeriodicityHypothesisTests::testForNull, this, boost::cref(windowForTestingDaily), boost::cref(bucketsForTestingDaily), _1); + auto testForPeriod = boost::bind( + &CPeriodicityHypothesisTests::testForPeriod, this, boost::cref(windowForTestingDaily), boost::cref(bucketsForTestingDaily), _1); + auto testForDaily = boost::bind( + &CPeriodicityHypothesisTests::testForDaily, this, boost::cref(windowForTestingDaily), boost::cref(bucketsForTestingDaily), _1); hypotheses.resize(1); - hypotheses[0].null(testForNull) - .addNested(testForPeriod) - .addNested(testForDaily) - .finishedNested() - .addAlternative(testForDaily); - } - else if (m_Period % DAY == 0) - { - auto testForNull = boost::bind(&CPeriodicityHypothesisTests::testForNull, - this, boost::cref(windowForTestingPeriod), - boost::cref(bucketsForTestingPeriod), _1); + hypotheses[0].null(testForNull).addNested(testForPeriod).addNested(testForDaily).finishedNested().addAlternative(testForDaily); + } else if (m_Period % DAY == 0) { + auto testForNull = boost::bind( + &CPeriodicityHypothesisTests::testForNull, this, boost::cref(windowForTestingPeriod), boost::cref(bucketsForTestingPeriod), _1); auto testForPeriod = boost::bind(&CPeriodicityHypothesisTests::testForPeriod, - this, boost::cref(windowForTestingPeriod), - boost::cref(bucketsForTestingPeriod), _1); + this, + boost::cref(windowForTestingPeriod), + boost::cref(bucketsForTestingPeriod), + _1); auto testForDaily = boost::bind(&CPeriodicityHypothesisTests::testForDaily, - this, boost::cref(windowForTestingPeriod), - boost::cref(bucketsForTestingPeriod), _1); + this, + boost::cref(windowForTestingPeriod), + boost::cref(bucketsForTestingPeriod), + _1); hypotheses.resize(1); - hypotheses[0].null(testForNull) - .addNested(testForDaily) - .addNested(testForPeriod); - } - else - { + hypotheses[0].null(testForNull).addNested(testForDaily).addNested(testForPeriod); + } else { { auto testForNull = boost::bind(&CPeriodicityHypothesisTests::testForNull, - this, boost::cref(windowForTestingDaily), - boost::cref(bucketsForTestingDaily), _1); + this, + boost::cref(windowForTestingDaily), + boost::cref(bucketsForTestingDaily), + _1); auto testForDaily = boost::bind(&CPeriodicityHypothesisTests::testForDaily, - this, boost::cref(windowForTestingDaily), - boost::cref(bucketsForTestingDaily), _1); + this, + boost::cref(windowForTestingDaily), + boost::cref(bucketsForTestingDaily), + _1); hypotheses.resize(2); - hypotheses[0].null(testForNull) - .addNested(testForDaily); + hypotheses[0].null(testForNull).addNested(testForDaily); } { auto testForNull = boost::bind(&CPeriodicityHypothesisTests::testForNull, - this, boost::cref(windowForTestingPeriod), - boost::cref(bucketsForTestingPeriod), _1); + this, + boost::cref(windowForTestingPeriod), + boost::cref(bucketsForTestingPeriod), + _1); auto testForPeriod = boost::bind(&CPeriodicityHypothesisTests::testForPeriod, - this, boost::cref(windowForTestingPeriod), - boost::cref(bucketsForTestingPeriod), _1); - hypotheses[1].null(testForNull) - .addNested(testForPeriod); + this, + boost::cref(windowForTestingPeriod), + boost::cref(bucketsForTestingPeriod), + _1); + hypotheses[1].null(testForNull).addNested(testForPeriod); } } } -void CPeriodicityHypothesisTests::hypothesesForPeriod(const TTimeTimePr2Vec &windows, - const TFloatMeanAccumulatorCRng &buckets, - TNestedHypothesesVec &hypotheses) const -{ - auto testForNull = boost::bind(&CPeriodicityHypothesisTests::testForNull, - this, boost::cref(windows), boost::cref(buckets), _1); - auto testForPeriod = boost::bind(&CPeriodicityHypothesisTests::testForPeriod, - this, boost::cref(windows), boost::cref(buckets), _1); +void CPeriodicityHypothesisTests::hypothesesForPeriod(const TTimeTimePr2Vec& windows, + const TFloatMeanAccumulatorCRng& buckets, + TNestedHypothesesVec& hypotheses) const { + auto testForNull = boost::bind(&CPeriodicityHypothesisTests::testForNull, this, boost::cref(windows), boost::cref(buckets), _1); + auto testForPeriod = boost::bind(&CPeriodicityHypothesisTests::testForPeriod, this, boost::cref(windows), boost::cref(buckets), _1); hypotheses.resize(1); - hypotheses[0].null(testForNull) - .addNested(testForPeriod); + hypotheses[0].null(testForNull).addNested(testForPeriod); } -CPeriodicityHypothesisTestsResult -CPeriodicityHypothesisTests::best(const TNestedHypothesesVec &hypotheses) const -{ +CPeriodicityHypothesisTestsResult CPeriodicityHypothesisTests::best(const TNestedHypothesesVec& hypotheses) const { // Note if there isn't a clear cut best hypothesis for variance // reduction we choose the simplest hypothesis, i.e. with maximum // degrees-of-freedom. @@ -1026,34 +857,25 @@ CPeriodicityHypothesisTests::best(const TNestedHypothesesVec &hypotheses) const THypothesisSummaryVec summaries; summaries.reserve(hypotheses.size()); - for (const auto &hypothesis : hypotheses) - { + for (const auto& hypothesis : hypotheses) { STestStats stats; CPeriodicityHypothesisTestsResult resultForHypothesis{hypothesis.test(stats)}; - if (stats.s_B > stats.s_DF0) - { - summaries.emplace_back(stats.s_V0, stats.s_B - stats.s_DF0, - std::move(resultForHypothesis)); + if (stats.s_B > stats.s_DF0) { + summaries.emplace_back(stats.s_V0, stats.s_B - stats.s_DF0, std::move(resultForHypothesis)); } } TMinAccumulator vCutoff; - for (const auto &summary : summaries) - { - vCutoff.add(varianceAtPercentile(summary.s_V, summary.s_DF, - 50.0 + CONFIDENCE_INTERVAL / 2.0)); + for (const auto& summary : summaries) { + vCutoff.add(varianceAtPercentile(summary.s_V, summary.s_DF, 50.0 + CONFIDENCE_INTERVAL / 2.0)); } - if (vCutoff.count() > 0) - { + if (vCutoff.count() > 0) { LOG_TRACE("variance cutoff = " << vCutoff[0]); TMinAccumulator df; - for (const auto &summary : summaries) - { - double v{varianceAtPercentile(summary.s_V, summary.s_DF, - 50.0 - CONFIDENCE_INTERVAL / 2.0)}; - if (v <= vCutoff[0] && df.add(-summary.s_DF)) - { + for (const auto& summary : summaries) { + double v{varianceAtPercentile(summary.s_V, summary.s_DF, 50.0 - CONFIDENCE_INTERVAL / 2.0)}; + if (v <= vCutoff[0] && df.add(-summary.s_DF)) { result = summary.s_H; } } @@ -1062,36 +884,29 @@ CPeriodicityHypothesisTests::best(const TNestedHypothesesVec &hypotheses) const } CPeriodicityHypothesisTestsResult -CPeriodicityHypothesisTests::testForNull(const TTimeTimePr2Vec &window, - const TFloatMeanAccumulatorCRng &buckets, - STestStats &stats) const -{ +CPeriodicityHypothesisTests::testForNull(const TTimeTimePr2Vec& window, const TFloatMeanAccumulatorCRng& buckets, STestStats& stats) const { LOG_TRACE("Testing null on " << core::CContainerPrinter::print(window)); this->nullHypothesis(window, buckets, stats); return CPeriodicityHypothesisTestsResult(); } -CPeriodicityHypothesisTestsResult -CPeriodicityHypothesisTests::testForDaily(const TTimeTimePr2Vec &windows, - const TFloatMeanAccumulatorCRng &buckets, - STestStats &stats) const -{ +CPeriodicityHypothesisTestsResult CPeriodicityHypothesisTests::testForDaily(const TTimeTimePr2Vec& windows, + const TFloatMeanAccumulatorCRng& buckets, + STestStats& stats) const { LOG_TRACE("Testing daily on " << core::CContainerPrinter::print(windows)); CPeriodicityHypothesisTestsResult result{stats.s_H0}; stats.s_HasPeriod = m_Config.hasDaily(); - stats.setThresholds(SIGNIFICANT_VARIANCE_REDUCTION[E_LowThreshold], - SIGNIFICANT_AMPLITUDE[E_LowThreshold], - SIGNIFICANT_AUTOCORRELATION[E_LowThreshold]); - - if ( m_Config.testForDiurnal() - && m_BucketLength <= DAY / 4 - && this->seenSufficientDataToTest(DAY, buckets) - && this->testPeriod(windows, buckets, DAY, stats)) - { + stats.setThresholds( + SIGNIFICANT_VARIANCE_REDUCTION[E_LowThreshold], SIGNIFICANT_AMPLITUDE[E_LowThreshold], SIGNIFICANT_AUTOCORRELATION[E_LowThreshold]); + + if (m_Config.testForDiurnal() && m_BucketLength <= DAY / 4 && this->seenSufficientDataToTest(DAY, buckets) && + this->testPeriod(windows, buckets, DAY, stats)) { this->hypothesis({DAY}, buckets, stats); - result.add(DIURNAL_COMPONENT_NAMES[E_Day], true, 0, + result.add(DIURNAL_COMPONENT_NAMES[E_Day], + true, + 0, DIURNAL_PERIODS[static_cast(E_Day) % 2], DIURNAL_WINDOWS[static_cast(E_Day) / 2]); } @@ -1099,29 +914,25 @@ CPeriodicityHypothesisTests::testForDaily(const TTimeTimePr2Vec &windows, return result; } -CPeriodicityHypothesisTestsResult -CPeriodicityHypothesisTests::testForWeekly(const TTimeTimePr2Vec &windows, - const TFloatMeanAccumulatorCRng &buckets, - STestStats &stats) const -{ +CPeriodicityHypothesisTestsResult CPeriodicityHypothesisTests::testForWeekly(const TTimeTimePr2Vec& windows, + const TFloatMeanAccumulatorCRng& buckets, + STestStats& stats) const { LOG_TRACE("Testing weekly on " << core::CContainerPrinter::print(windows)); CPeriodicityHypothesisTestsResult result{stats.s_H0}; stats.s_HasPeriod = m_Config.hasWeekly(); - stats.setThresholds(SIGNIFICANT_VARIANCE_REDUCTION[E_LowThreshold], - SIGNIFICANT_AMPLITUDE[E_LowThreshold], - SIGNIFICANT_AUTOCORRELATION[E_LowThreshold]); - - if ( m_Config.testForDiurnal() - && m_BucketLength <= WEEK / 4 - && this->seenSufficientDataToTest(WEEK, buckets) - && this->testPeriod(windows, buckets, WEEK, stats)) - { + stats.setThresholds( + SIGNIFICANT_VARIANCE_REDUCTION[E_LowThreshold], SIGNIFICANT_AMPLITUDE[E_LowThreshold], SIGNIFICANT_AUTOCORRELATION[E_LowThreshold]); + + if (m_Config.testForDiurnal() && m_BucketLength <= WEEK / 4 && this->seenSufficientDataToTest(WEEK, buckets) && + this->testPeriod(windows, buckets, WEEK, stats)) { stats.s_StartOfPartition = 0; stats.s_Partition.assign(1, {0, length(buckets, m_BucketLength)}); this->hypothesis({WEEK}, buckets, stats); - result.add(DIURNAL_COMPONENT_NAMES[E_Week], true, 0, + result.add(DIURNAL_COMPONENT_NAMES[E_Week], + true, + 0, DIURNAL_PERIODS[static_cast(E_Week) % 2], DIURNAL_WINDOWS[static_cast(E_Week) / 2]); } @@ -1129,15 +940,13 @@ CPeriodicityHypothesisTests::testForWeekly(const TTimeTimePr2Vec &windows, return result; } -CPeriodicityHypothesisTestsResult -CPeriodicityHypothesisTests::testForDailyWithWeekend(const TFloatMeanAccumulatorCRng &buckets, - STestStats &stats) const -{ +CPeriodicityHypothesisTestsResult CPeriodicityHypothesisTests::testForDailyWithWeekend(const TFloatMeanAccumulatorCRng& buckets, + STestStats& stats) const { LOG_TRACE("Testing for weekend"); CPeriodicityHypothesisTestsResult result{stats.s_H0}; - stats.s_HasPartition = m_Config.hasWeekend(); + stats.s_HasPartition = m_Config.hasWeekend(); stats.s_StartOfPartition = m_Config.hasWeekend() ? m_Config.startOfWeek() : 0; stats.setThresholds(SIGNIFICANT_VARIANCE_REDUCTION[E_HighThreshold], SIGNIFICANT_AMPLITUDE[E_HighThreshold], @@ -1146,22 +955,21 @@ CPeriodicityHypothesisTests::testForDailyWithWeekend(const TFloatMeanAccumulator TTimeTimePr2Vec partition{{0, WEEKEND}, {WEEKEND, WEEK}}; std::size_t bucketsPerWeek(WEEK / m_BucketLength); - if ( m_Config.testForDiurnal() - && m_BucketLength <= DAY / 4 - && this->seenSufficientDataToTest(WEEK, buckets) - && this->testPartition(partition, buckets, DAY, - weekendPartitionVarianceCorrection(bucketsPerWeek), - stats)) - { + if (m_Config.testForDiurnal() && m_BucketLength <= DAY / 4 && this->seenSufficientDataToTest(WEEK, buckets) && + this->testPartition(partition, buckets, DAY, weekendPartitionVarianceCorrection(bucketsPerWeek), stats)) { stats.s_Partition = partition; this->hypothesis({DAY, DAY}, buckets, stats); core_t::TTime startOfWeek{stats.s_StartOfPartition}; result.remove(DIURNAL_COMPONENT_NAMES[E_Day]); - result.add(DIURNAL_COMPONENT_NAMES[E_WeekendDay], true, startOfWeek, + result.add(DIURNAL_COMPONENT_NAMES[E_WeekendDay], + true, + startOfWeek, DIURNAL_PERIODS[static_cast(E_WeekendDay) % 2], DIURNAL_WINDOWS[static_cast(E_WeekendDay) / 2], HIGH_PRIORITY); - result.add(DIURNAL_COMPONENT_NAMES[E_WeekdayDay], true, startOfWeek, + result.add(DIURNAL_COMPONENT_NAMES[E_WeekdayDay], + true, + startOfWeek, DIURNAL_PERIODS[static_cast(E_WeekdayDay) % 2], DIURNAL_WINDOWS[static_cast(E_WeekdayDay) / 2], HIGH_PRIORITY); @@ -1170,31 +978,31 @@ CPeriodicityHypothesisTests::testForDailyWithWeekend(const TFloatMeanAccumulator return result; } -CPeriodicityHypothesisTestsResult -CPeriodicityHypothesisTests::testForWeeklyGivenDailyWithWeekend(const TTimeTimePr2Vec &windows, - const TFloatMeanAccumulatorCRng &buckets, - STestStats &stats) const -{ +CPeriodicityHypothesisTestsResult CPeriodicityHypothesisTests::testForWeeklyGivenDailyWithWeekend(const TTimeTimePr2Vec& windows, + const TFloatMeanAccumulatorCRng& buckets, + STestStats& stats) const { LOG_TRACE("Testing for weekly given weekend on " << core::CContainerPrinter::print(windows)); CPeriodicityHypothesisTestsResult result(stats.s_H0); - if (!m_Config.testForDiurnal()) - { + if (!m_Config.testForDiurnal()) { return result; } core_t::TTime startOfWeek{stats.s_StartOfPartition}; CPeriodicityHypothesisTestsResult resultForWeekly{this->testForWeekly(windows, buckets, stats)}; - if (resultForWeekly != result) - { + if (resultForWeekly != result) { // Note that testForWeekly sets up the hypothesis for us. - result.add(DIURNAL_COMPONENT_NAMES[E_WeekendWeek], true, startOfWeek, + result.add(DIURNAL_COMPONENT_NAMES[E_WeekendWeek], + true, + startOfWeek, DIURNAL_PERIODS[static_cast(E_WeekendWeek) % 2], DIURNAL_WINDOWS[static_cast(E_WeekendWeek) / 2], HIGH_PRIORITY); - result.add(DIURNAL_COMPONENT_NAMES[E_WeekdayWeek], true, startOfWeek, + result.add(DIURNAL_COMPONENT_NAMES[E_WeekdayWeek], + true, + startOfWeek, DIURNAL_PERIODS[static_cast(E_WeekdayWeek) % 2], DIURNAL_WINDOWS[static_cast(E_WeekdayWeek) / 2], HIGH_PRIORITY); @@ -1206,12 +1014,13 @@ CPeriodicityHypothesisTests::testForWeeklyGivenDailyWithWeekend(const TTimeTimeP TTimeTimePr2Vec weekday(calculateWindows(startOfWeek, windowLength, WEEK, {WEEKEND, WEEK})); CPeriodicityHypothesisTestsResult resultForWeekday{this->testForWeekly(weekday, buckets, stats)}; - if (resultForWeekday != result) - { + if (resultForWeekday != result) { stats.s_StartOfPartition = startOfWeek; stats.s_Partition = partition; this->hypothesis({DAY, WEEK}, buckets, stats); - result.add(DIURNAL_COMPONENT_NAMES[E_WeekdayWeek], true, startOfWeek, + result.add(DIURNAL_COMPONENT_NAMES[E_WeekdayWeek], + true, + startOfWeek, DIURNAL_PERIODS[static_cast(E_WeekdayWeek) % 2], DIURNAL_WINDOWS[static_cast(E_WeekdayWeek) / 2], HIGH_PRIORITY); @@ -1220,12 +1029,13 @@ CPeriodicityHypothesisTests::testForWeeklyGivenDailyWithWeekend(const TTimeTimeP TTimeTimePr2Vec weekend(calculateWindows(startOfWeek, windowLength, WEEK, {0, WEEKEND})); CPeriodicityHypothesisTestsResult resultForWeekend{this->testForWeekly(weekend, buckets, stats)}; - if (resultForWeekend != result) - { + if (resultForWeekend != result) { stats.s_StartOfPartition = startOfWeek; stats.s_Partition = partition; this->hypothesis({WEEK, DAY}, buckets, stats); - result.add(DIURNAL_COMPONENT_NAMES[E_WeekendWeek], true, startOfWeek, + result.add(DIURNAL_COMPONENT_NAMES[E_WeekendWeek], + true, + startOfWeek, DIURNAL_PERIODS[static_cast(E_WeekendWeek) % 2], DIURNAL_WINDOWS[static_cast(E_WeekendWeek) / 2], HIGH_PRIORITY); @@ -1234,58 +1044,39 @@ CPeriodicityHypothesisTests::testForWeeklyGivenDailyWithWeekend(const TTimeTimeP return result; } -CPeriodicityHypothesisTestsResult -CPeriodicityHypothesisTests::testForPeriod(const TTimeTimePr2Vec &windows, - const TFloatMeanAccumulatorCRng &buckets, - STestStats &stats) const -{ +CPeriodicityHypothesisTestsResult CPeriodicityHypothesisTests::testForPeriod(const TTimeTimePr2Vec& windows, + const TFloatMeanAccumulatorCRng& buckets, + STestStats& stats) const { LOG_TRACE("Testing for " << m_Period << " on " << core::CContainerPrinter::print(windows)); CPeriodicityHypothesisTestsResult result{stats.s_H0}; - if ( m_Period != DAY - && m_Period != WEEK - && m_BucketLength <= m_Period / 4 - && this->seenSufficientDataToTest(m_Period, buckets)) - { + if (m_Period != DAY && m_Period != WEEK && m_BucketLength <= m_Period / 4 && this->seenSufficientDataToTest(m_Period, buckets)) { stats.s_HasPeriod = false; EThreshold index{m_Period % DAY == 0 ? E_LowThreshold : E_HighThreshold}; - stats.setThresholds(SIGNIFICANT_VARIANCE_REDUCTION[index], - SIGNIFICANT_AMPLITUDE[index], - SIGNIFICANT_AUTOCORRELATION[index]); - if (this->testPeriod(windows, buckets, m_Period, stats)) - { + stats.setThresholds(SIGNIFICANT_VARIANCE_REDUCTION[index], SIGNIFICANT_AMPLITUDE[index], SIGNIFICANT_AUTOCORRELATION[index]); + if (this->testPeriod(windows, buckets, m_Period, stats)) { stats.s_StartOfPartition = 0; stats.s_Partition.assign(1, {0, length(buckets, m_BucketLength)}); this->hypothesis({m_Period}, buckets, stats); - result.add(core::CStringUtils::typeToString(m_Period), - false, 0, m_Period, {0, m_Period}); + result.add(core::CStringUtils::typeToString(m_Period), false, 0, m_Period, {0, m_Period}); } } return result; } -bool CPeriodicityHypothesisTests::seenSufficientDataToTest(core_t::TTime period, - const TFloatMeanAccumulatorCRng &buckets) const -{ - return (buckets.size() * m_BucketLength) / period >= 2 - && m_TimeRange.initialized() - && static_cast(m_TimeRange.range()) - >= 2.0 * ACCURATE_TEST_POPULATED_FRACTION * static_cast(period); +bool CPeriodicityHypothesisTests::seenSufficientDataToTest(core_t::TTime period, const TFloatMeanAccumulatorCRng& buckets) const { + return (buckets.size() * m_BucketLength) / period >= 2 && m_TimeRange.initialized() && + static_cast(m_TimeRange.range()) >= 2.0 * ACCURATE_TEST_POPULATED_FRACTION * static_cast(period); } -bool CPeriodicityHypothesisTests::seenSufficientPeriodicallyPopulatedBucketsToTest(const TFloatMeanAccumulatorCRng &buckets, - std::size_t period) const -{ +bool CPeriodicityHypothesisTests::seenSufficientPeriodicallyPopulatedBucketsToTest(const TFloatMeanAccumulatorCRng& buckets, + std::size_t period) const { double repeats{0.0}; - for (std::size_t i = 0u; i < period; ++i) - { - for (std::size_t j = i + period; j < buckets.size(); j += period) - { - if ( CBasicStatistics::count(buckets[j]) - * CBasicStatistics::count(buckets[j - period]) > 0.0) - { + for (std::size_t i = 0u; i < period; ++i) { + for (std::size_t j = i + period; j < buckets.size(); j += period) { + if (CBasicStatistics::count(buckets[j]) * CBasicStatistics::count(buckets[j - period]) > 0.0) { repeats += 1.0; break; } @@ -1295,25 +1086,20 @@ bool CPeriodicityHypothesisTests::seenSufficientPeriodicallyPopulatedBucketsToTe return repeats >= static_cast(period) * ACCURATE_TEST_POPULATED_FRACTION / 3.0; } -bool CPeriodicityHypothesisTests::testStatisticsFor(const TFloatMeanAccumulatorCRng &buckets, - STestStats &stats) const -{ +bool CPeriodicityHypothesisTests::testStatisticsFor(const TFloatMeanAccumulatorCRng& buckets, STestStats& stats) const { CBasicStatistics::CMinMax range; double populated{0.0}; double count{0.0}; - for (std::size_t i = 0u; i < buckets.size(); ++i) - { + for (std::size_t i = 0u; i < buckets.size(); ++i) { double ni{CBasicStatistics::count(buckets[i])}; count += ni; - if (ni > 0.0) - { + if (ni > 0.0) { populated += 1.0; range.add(static_cast(i)); } } - if (populated == 0.0) - { + if (populated == 0.0) { return false; } @@ -1322,19 +1108,15 @@ bool CPeriodicityHypothesisTests::testStatisticsFor(const TFloatMeanAccumulatorC stats.s_Range = range.max() - range.min(); stats.s_B = populated; stats.s_M = count / stats.s_B; - LOG_TRACE("range = " << stats.s_Range - << ", populatedBuckets = " << stats.s_B - << ", valuesPerBucket = " << stats.s_M); + LOG_TRACE("range = " << stats.s_Range << ", populatedBuckets = " << stats.s_B << ", valuesPerBucket = " << stats.s_M); return true; } -void CPeriodicityHypothesisTests::nullHypothesis(const TTimeTimePr2Vec &window, - const TFloatMeanAccumulatorCRng &buckets, - STestStats &stats) const -{ - if (this->testStatisticsFor(buckets, stats)) - { +void CPeriodicityHypothesisTests::nullHypothesis(const TTimeTimePr2Vec& window, + const TFloatMeanAccumulatorCRng& buckets, + STestStats& stats) const { + if (this->testStatisticsFor(buckets, stats)) { TMeanVarAccumulatorVec trend(1); periodicTrend(buckets, window, m_BucketLength, trend); double mean{CBasicStatistics::mean(trend[0])}; @@ -1342,76 +1124,57 @@ void CPeriodicityHypothesisTests::nullHypothesis(const TTimeTimePr2Vec &window, LOG_TRACE("mean = " << mean); LOG_TRACE("variance = " << v0); stats.s_DF0 = 1.0; - stats.s_V0 = v0; + stats.s_V0 = v0; stats.s_T0.assign(1, TDoubleVec{mean}); stats.s_Partition = window; } } -void CPeriodicityHypothesisTests::hypothesis(const TTime2Vec &periods, - const TFloatMeanAccumulatorCRng &buckets, - STestStats &stats) const -{ - if (this->testStatisticsFor(buckets, stats)) - { - stats.s_V0 = 0.0; +void CPeriodicityHypothesisTests::hypothesis(const TTime2Vec& periods, const TFloatMeanAccumulatorCRng& buckets, STestStats& stats) const { + if (this->testStatisticsFor(buckets, stats)) { + stats.s_V0 = 0.0; stats.s_DF0 = 0.0; - stats.s_T0 = TDoubleVec2Vec(stats.s_Partition.size()); - for (std::size_t i = 0u; i < stats.s_Partition.size(); ++i) - { - core_t::TTime period_{ std::min(periods[i], length(stats.s_Partition[i])) - / m_BucketLength}; - TTimeTimePr2Vec windows(calculateWindows(stats.s_StartOfPartition, - length(buckets, m_BucketLength), - length(stats.s_Partition), - stats.s_Partition[i])); + stats.s_T0 = TDoubleVec2Vec(stats.s_Partition.size()); + for (std::size_t i = 0u; i < stats.s_Partition.size(); ++i) { + core_t::TTime period_{std::min(periods[i], length(stats.s_Partition[i])) / m_BucketLength}; + TTimeTimePr2Vec windows(calculateWindows( + stats.s_StartOfPartition, length(buckets, m_BucketLength), length(stats.s_Partition), stats.s_Partition[i])); TMeanVarAccumulatorVec trend(periods[i] / m_BucketLength); periodicTrend(buckets, windows, m_BucketLength, trend); - stats.s_V0 += residualVariance(trend, 1.0 / stats.s_M); - stats.s_DF0 += static_cast( - std::count_if(trend.begin(), trend.end(), - [](const TMeanVarAccumulator &value) - { return CBasicStatistics::count(value) > 0.0; })); + stats.s_V0 += residualVariance(trend, 1.0 / stats.s_M); + stats.s_DF0 += static_cast(std::count_if( + trend.begin(), trend.end(), [](const TMeanVarAccumulator& value) { return CBasicStatistics::count(value) > 0.0; })); stats.s_T0[i].reserve(period_); - std::for_each(trend.begin(), trend.end(), - [&stats, i](const TMeanVarAccumulator &value) - { stats.s_T0[i].push_back(CBasicStatistics::mean(value)); }); + std::for_each(trend.begin(), trend.end(), [&stats, i](const TMeanVarAccumulator& value) { + stats.s_T0[i].push_back(CBasicStatistics::mean(value)); + }); } stats.s_V0 /= static_cast(periods.size()); } } -void CPeriodicityHypothesisTests::conditionOnHypothesis(const TTimeTimePr2Vec &windows, - const STestStats &stats, - TFloatMeanAccumulatorVec &buckets) const -{ +void CPeriodicityHypothesisTests::conditionOnHypothesis(const TTimeTimePr2Vec& windows, + const STestStats& stats, + TFloatMeanAccumulatorVec& buckets) const { std::size_t n{buckets.size()}; core_t::TTime windowLength{static_cast(n) * m_BucketLength}; - for (std::size_t i = 0u; i < stats.s_Partition.size(); ++i) - { - TTimeTimePr2Vec windows_(calculateWindows(stats.s_StartOfPartition, - windowLength, - length(stats.s_Partition), - stats.s_Partition[i])); + for (std::size_t i = 0u; i < stats.s_Partition.size(); ++i) { + TTimeTimePr2Vec windows_(calculateWindows(stats.s_StartOfPartition, windowLength, length(stats.s_Partition), stats.s_Partition[i])); TSizeSizePr2Vec indexWindows; calculateIndexWindows(windows_, m_BucketLength, indexWindows); std::size_t period{stats.s_T0[i].size()}; - LOG_TRACE("Conditioning on period = " << period - << " in windows = " << core::CContainerPrinter::print(windows_)); - for (const auto &window : indexWindows) - { + LOG_TRACE("Conditioning on period = " << period << " in windows = " << core::CContainerPrinter::print(windows_)); + for (const auto& window : indexWindows) { std::size_t a{window.first}; std::size_t b{window.second}; - for (std::size_t j = a; j < b; ++j) - { + for (std::size_t j = a; j < b; ++j) { CBasicStatistics::moment<0>(buckets[j % n]) -= stats.s_T0[i][(j - a) % period]; } } } - if (length(windows) < windowLength) - { + if (length(windows) < windowLength) { LOG_TRACE("Projecting onto " << core::CContainerPrinter::print(windows)); TFloatMeanAccumulatorVec projection; project(buckets, windows, m_BucketLength, projection); @@ -1420,11 +1183,10 @@ void CPeriodicityHypothesisTests::conditionOnHypothesis(const TTimeTimePr2Vec &w } } -bool CPeriodicityHypothesisTests::testPeriod(const TTimeTimePr2Vec &windows, - const TFloatMeanAccumulatorCRng &buckets, +bool CPeriodicityHypothesisTests::testPeriod(const TTimeTimePr2Vec& windows, + const TFloatMeanAccumulatorCRng& buckets, core_t::TTime period_, - STestStats &stats) const -{ + STestStats& stats) const { // We use two tests to check for the period: // 1) That it explains both a non-negligible absolute and statistically // significant amount of variance and the cyclic autocorrelation at @@ -1434,8 +1196,7 @@ bool CPeriodicityHypothesisTests::testPeriod(const TTimeTimePr2Vec &windows, LOG_TRACE("Testing period " << period_); - if (!this->testStatisticsFor(buckets, stats) || stats.nullHypothesisGoodEnough()) - { + if (!this->testStatisticsFor(buckets, stats) || stats.nullHypothesisGoodEnough()) { return false; } @@ -1444,12 +1205,10 @@ bool CPeriodicityHypothesisTests::testPeriod(const TTimeTimePr2Vec &windows, // We need to observe a minimum number of repeated values to test with // an acceptable false positive rate. - if (!this->seenSufficientPeriodicallyPopulatedBucketsToTest(buckets, period)) - { + if (!this->seenSufficientPeriodicallyPopulatedBucketsToTest(buckets, period)) { return false; } - if (stats.s_HasPeriod) - { + if (stats.s_HasPeriod) { return true; } @@ -1458,8 +1217,7 @@ bool CPeriodicityHypothesisTests::testPeriod(const TTimeTimePr2Vec &windows, double B{stats.s_B}; double scale{1.0 / M}; double df0{B - stats.s_DF0}; - if (df0 <= 0.0) - { + if (df0 <= 0.0) { return false; } double v0{varianceAtPercentile(stats.s_V0, df0, 50.0 + CONFIDENCE_INTERVAL / 2.0)}; @@ -1475,30 +1233,23 @@ bool CPeriodicityHypothesisTests::testPeriod(const TTimeTimePr2Vec &windows, TMeanVarAccumulatorVec trend(period); periodicTrend(values, window, m_BucketLength, trend); double b{static_cast( - std::count_if(trend.begin(), trend.end(), - [](const TMeanVarAccumulator &value) - { return CBasicStatistics::count(value) > 0.0; }))}; + std::count_if(trend.begin(), trend.end(), [](const TMeanVarAccumulator& value) { return CBasicStatistics::count(value) > 0.0; }))}; LOG_TRACE(" populated = " << b); double df1{B - b}; - if (df1 > 0.0) - { - double v1{varianceAtPercentile(residualVariance(trend, scale), df1, - 50.0 + CONFIDENCE_INTERVAL / 2.0)}; + if (df1 > 0.0) { + double v1{varianceAtPercentile(residualVariance(trend, scale), df1, 50.0 + CONFIDENCE_INTERVAL / 2.0)}; LOG_TRACE(" variance = " << v1); LOG_TRACE(" varianceThreshold = " << vt); LOG_TRACE(" significance = " << CStatisticalTests::leftTailFTest(v1 / v0, df1, df0)); double Rt{stats.s_Rt * CTools::truncate(1.0 - 0.5 * (vt - v1) / vt, 0.9, 1.0)}; - if ( v1 < vt && B > 1.0 - && CStatisticalTests::leftTailFTest(v1 / v0, df1, df0) <= MAXIMUM_SIGNIFICANCE) - { + if (v1 < vt && B > 1.0 && CStatisticalTests::leftTailFTest(v1 / v0, df1, df0) <= MAXIMUM_SIGNIFICANCE) { double R{CSignal::autocorrelation(period, values)}; R = autocorrelationAtPercentile(R, B, 50.0 - CONFIDENCE_INTERVAL / 2.0); LOG_TRACE(" autocorrelation = " << R); LOG_TRACE(" autocorrelationThreshold = " << Rt); - if (R > Rt) - { + if (R > Rt) { return true; } } @@ -1506,63 +1257,47 @@ bool CPeriodicityHypothesisTests::testPeriod(const TTimeTimePr2Vec &windows, // The amplitude test. double F1{1.0}; - if (v1 > 0.0) - { - try - { - std::size_t n{static_cast( - std::ceil(Rt * static_cast(length(window) / period_)))}; + if (v1 > 0.0) { + try { + std::size_t n{static_cast(std::ceil(Rt * static_cast(length(window) / period_)))}; TMeanAccumulator level; - for (const auto &value : values) - { - if (CBasicStatistics::count(value) > 0.0) - { + for (const auto& value : values) { + if (CBasicStatistics::count(value) > 0.0) { level.add(CBasicStatistics::mean(value)); } } TMinAmplitudeVec amplitudes(period, {n, CBasicStatistics::mean(level)}); periodicTrend(values, window, m_BucketLength, amplitudes); boost::math::normal normal(0.0, std::sqrt(v1)); - std::for_each(amplitudes.begin(), amplitudes.end(), - [&F1, &normal, at](CMinAmplitude &x) - { - if (x.amplitude() >= at) - { - F1 = std::min(F1, x.significance(normal)); - } - }); - } - catch (const std::exception &e) - { - LOG_ERROR("Unable to compute significance of amplitude: " << e.what()); - } + std::for_each(amplitudes.begin(), amplitudes.end(), [&F1, &normal, at](CMinAmplitude& x) { + if (x.amplitude() >= at) { + F1 = std::min(F1, x.significance(normal)); + } + }); + } catch (const std::exception& e) { LOG_ERROR("Unable to compute significance of amplitude: " << e.what()); } } LOG_TRACE(" F(amplitude) = " << F1); - if (1.0 - std::pow(1.0 - F1, b) <= MAXIMUM_SIGNIFICANCE) - { + if (1.0 - std::pow(1.0 - F1, b) <= MAXIMUM_SIGNIFICANCE) { return true; } } return false; } -bool CPeriodicityHypothesisTests::testPartition(const TTimeTimePr2Vec &partition, - const TFloatMeanAccumulatorCRng &buckets, +bool CPeriodicityHypothesisTests::testPartition(const TTimeTimePr2Vec& partition, + const TFloatMeanAccumulatorCRng& buckets, core_t::TTime period_, double correction, - STestStats &stats) const -{ + STestStats& stats) const { using TDoubleTimePr = std::pair; using TDoubleTimePrVec = std::vector; using TMinAccumulator = CBasicStatistics::COrderStatisticsStack; using TMeanVarAccumulatorBuffer = boost::circular_buffer; - LOG_TRACE("Testing partition " << core::CContainerPrinter::print(partition) - << " with period " << period_); + LOG_TRACE("Testing partition " << core::CContainerPrinter::print(partition) << " with period " << period_); - if (!this->testStatisticsFor(buckets, stats) || stats.nullHypothesisGoodEnough()) - { + if (!this->testStatisticsFor(buckets, stats) || stats.nullHypothesisGoodEnough()) { return false; } @@ -1570,12 +1305,10 @@ bool CPeriodicityHypothesisTests::testPartition(const TTimeTimePr2Vec &partition // We need to observe a minimum number of repeated values to test with // an acceptable false positive rate. - if (!this->seenSufficientPeriodicallyPopulatedBucketsToTest(buckets, period)) - { + if (!this->seenSufficientPeriodicallyPopulatedBucketsToTest(buckets, period)) { return false; } - if (stats.s_HasPartition) - { + if (stats.s_HasPartition) { return true; } @@ -1589,8 +1322,7 @@ bool CPeriodicityHypothesisTests::testPartition(const TTimeTimePr2Vec &partition double B{stats.s_B}; double scale{1.0 / stats.s_M}; double df0{B - stats.s_DF0}; - if (df0 <= 0.0) - { + if (df0 <= 0.0) { return false; } double v0{varianceAtPercentile(stats.s_V0, df0, 50.0 + CONFIDENCE_INTERVAL / 2.0)}; @@ -1608,49 +1340,34 @@ bool CPeriodicityHypothesisTests::testPartition(const TTimeTimePr2Vec &partition TTimeVec deltas[2]; deltas[0].reserve((length(partition[0]) * windowLength) / (period_ * repeat)); deltas[1].reserve((length(partition[1]) * windowLength) / (period_ * repeat)); - for (std::size_t i = 0u; i < 2; ++i) - { - for (const auto &window : windows[i]) - { + for (std::size_t i = 0u; i < 2; ++i) { + for (const auto& window : windows[i]) { core_t::TTime a_{window.first}; core_t::TTime b_{window.second}; - for (core_t::TTime t = a_ + period_; t <= b_; t += period_) - { + for (core_t::TTime t = a_ + period_; t <= b_; t += period_) { deltas[i].push_back(t - m_BucketLength); } } } LOG_TRACE("deltas = " << core::CContainerPrinter::print(deltas)); - TMeanVarAccumulatorBuffer trends[] - { - TMeanVarAccumulatorBuffer(period, TMeanVarAccumulator()), - TMeanVarAccumulatorBuffer(period, TMeanVarAccumulator()) - }; + TMeanVarAccumulatorBuffer trends[]{TMeanVarAccumulatorBuffer(period, TMeanVarAccumulator()), + TMeanVarAccumulatorBuffer(period, TMeanVarAccumulator())}; periodicTrend(values, windows[0], m_BucketLength, trends[0]); periodicTrend(values, windows[1], m_BucketLength, trends[1]); - TMeanAccumulator variances[] - { - residualVariance(trends[0], scale), - residualVariance(trends[1], scale) - }; + TMeanAccumulator variances[]{residualVariance(trends[0], scale), + residualVariance(trends[1], scale)}; LOG_TRACE("variances = " << core::CContainerPrinter::print(variances)); TMinAccumulator minimum; - minimum.add({( residualVariance(variances[0]) - + residualVariance(variances[1])) / 2.0, 0}); + minimum.add({(residualVariance(variances[0]) + residualVariance(variances[1])) / 2.0, 0}); TDoubleTimePrVec candidates; candidates.reserve(period); - for (core_t::TTime time = m_BucketLength; - time < repeat; - time += m_BucketLength) - { - for (std::size_t i = 0u; i < 2; ++i) - { - for (auto &delta : deltas[i]) - { + for (core_t::TTime time = m_BucketLength; time < repeat; time += m_BucketLength) { + for (std::size_t i = 0u; i < 2; ++i) { + for (auto& delta : deltas[i]) { delta = (delta + m_BucketLength) % windowLength; } TMeanVarAccumulator oldBucket{trends[i].front()}; @@ -1662,11 +1379,9 @@ bool CPeriodicityHypothesisTests::testPartition(const TTimeTimePr2Vec &partition variances[i] -= residualVariance(oldBucket, scale); variances[i] += residualVariance(newBucket, scale); } - double variance{( residualVariance(variances[0]) - + residualVariance(variances[1])) / 2.0}; + double variance{(residualVariance(variances[0]) + residualVariance(variances[1])) / 2.0}; minimum.add({variance, time}); - if (variance <= 1.05 * minimum[0].first) - { + if (variance <= 1.05 * minimum[0].first) { candidates.emplace_back(variance, time); } } @@ -1675,71 +1390,57 @@ bool CPeriodicityHypothesisTests::testPartition(const TTimeTimePr2Vec &partition TMinAccumulator best; TTimeTimePr2Vec candidateWindows; - for (const auto &candidate : candidates) - { - if (candidate.first <= 1.05 * minimum[0].first) - { + for (const auto& candidate : candidates) { + if (candidate.first <= 1.05 * minimum[0].first) { core_t::TTime candidateStartOfPartition{candidate.second}; - candidateWindows = calculateWindows(candidateStartOfPartition, - windowLength, - repeat, partition[0]); + candidateWindows = calculateWindows(candidateStartOfPartition, windowLength, repeat, partition[0]); TMeanAccumulator cost; - for (const auto &window : candidateWindows) - { - core_t::TTime a_{window.first / m_BucketLength}; + for (const auto& window : candidateWindows) { + core_t::TTime a_{window.first / m_BucketLength}; core_t::TTime b_{window.second / m_BucketLength - 1}; double va{CBasicStatistics::mean(values[a_ % values.size()])}; double vb{CBasicStatistics::mean(values[b_ % values.size()])}; cost.add(std::fabs(va) + std::fabs(vb) + std::fabs(vb - va)); } - if (best.add({CBasicStatistics::mean(cost), candidateStartOfPartition})) - { + if (best.add({CBasicStatistics::mean(cost), candidateStartOfPartition})) { b = 0.0; - for (std::size_t i = 0u; i < 2; ++i) - { - candidateWindows = calculateWindows(candidateStartOfPartition, - windowLength, - repeat, partition[i]); + for (std::size_t i = 0u; i < 2; ++i) { + candidateWindows = calculateWindows(candidateStartOfPartition, windowLength, repeat, partition[i]); TMeanVarAccumulatorVec trend(period); periodicTrend(values, candidateWindows, m_BucketLength, trend); - b += static_cast( - std::count_if(trend.begin(), trend.end(), - [](const TMeanVarAccumulator &value) - { return CBasicStatistics::count(value) > 0.0; })); + b += static_cast(std::count_if( + trend.begin(), trend.end(), [](const TMeanVarAccumulator& value) { return CBasicStatistics::count(value) > 0.0; })); } } } } double df1{B - b}; - if (df1 > 0.0) - { + if (df1 > 0.0) { double variance{correction * minimum[0].first}; double v1{varianceAtPercentile(variance, df1, 50.0 + CONFIDENCE_INTERVAL / 2.0)}; LOG_TRACE(" variance = " << v1); LOG_TRACE(" varianceThreshold = " << vt); LOG_TRACE(" significance = " << CStatisticalTests::leftTailFTest(v1 / v0, df1, df0)); - if (v1 <= vt && CStatisticalTests::leftTailFTest(v1 / v0, df1, df0) <= MAXIMUM_SIGNIFICANCE) - { + if (v1 <= vt && CStatisticalTests::leftTailFTest(v1 / v0, df1, df0) <= MAXIMUM_SIGNIFICANCE) { double R{-1.0}; double Rt{stats.s_Rt * CTools::truncate(1.0 - 0.5 * (vt - v1) / vt, 0.9, 1.0)}; startOfPartition = best[0].second; windows[0] = calculateWindows(startOfPartition, windowLength, repeat, partition[0]); windows[1] = calculateWindows(startOfPartition, windowLength, repeat, partition[1]); - for (const auto &windows_ : windows) - { + for (const auto& windows_ : windows) { TFloatMeanAccumulatorVec partitionValues; project(values, windows_, m_BucketLength, partitionValues); std::size_t windowLength_(length(windows_[0]) / m_BucketLength); - double BW{std::accumulate(partitionValues.begin(), partitionValues.end(), 0.0, - [](double n, const TFloatMeanAccumulator &value) - { return n + (CBasicStatistics::count(value) > 0.0 ? 1.0 : 0.0); })}; - if (BW > 1.0) - { + double BW{ + std::accumulate(partitionValues.begin(), partitionValues.end(), 0.0, [](double n, const TFloatMeanAccumulator& value) { + return n + (CBasicStatistics::count(value) > 0.0 ? 1.0 : 0.0); + })}; + if (BW > 1.0) { double RW{CSignal::autocorrelation(windowLength_ + period, partitionValues)}; R = std::max(R, autocorrelationAtPercentile(RW, BW, 50.0 - CONFIDENCE_INTERVAL / 2.0)); LOG_TRACE(" autocorrelation = " << R); @@ -1747,8 +1448,7 @@ bool CPeriodicityHypothesisTests::testPartition(const TTimeTimePr2Vec &partition } } - if (R > Rt) - { + if (R > Rt) { stats.s_StartOfPartition = startOfPartition; return true; } @@ -1760,66 +1460,58 @@ bool CPeriodicityHypothesisTests::testPartition(const TTimeTimePr2Vec &partition const double CPeriodicityHypothesisTests::ACCURATE_TEST_POPULATED_FRACTION{0.9}; const double CPeriodicityHypothesisTests::MINIMUM_COEFFICIENT_OF_VARIATION{1e-4}; -CPeriodicityHypothesisTests::STestStats::STestStats() : - s_HasPeriod(false), s_HasPartition(false), - s_Vt(0.0), s_At(0.0), s_Rt(0.0), - s_Range(0.0), s_B(0.0), s_M(0.0), s_V0(0.0), s_DF0(0.0), - s_StartOfPartition(0) -{} - -void CPeriodicityHypothesisTests::STestStats::setThresholds(double vt, double at, double Rt) -{ +CPeriodicityHypothesisTests::STestStats::STestStats() + : s_HasPeriod(false), + s_HasPartition(false), + s_Vt(0.0), + s_At(0.0), + s_Rt(0.0), + s_Range(0.0), + s_B(0.0), + s_M(0.0), + s_V0(0.0), + s_DF0(0.0), + s_StartOfPartition(0) { +} + +void CPeriodicityHypothesisTests::STestStats::setThresholds(double vt, double at, double Rt) { s_Vt = vt; s_At = at; s_Rt = Rt; } -bool CPeriodicityHypothesisTests::STestStats::nullHypothesisGoodEnough() const -{ +bool CPeriodicityHypothesisTests::STestStats::nullHypothesisGoodEnough() const { TMeanAccumulator mean; - for (const auto &t : s_T0) - { - mean += std::accumulate(t.begin(), t.end(), TMeanAccumulator(), - [](TMeanAccumulator m, double x) - { - m.add(std::fabs(x)); - return m; - }); + for (const auto& t : s_T0) { + mean += std::accumulate(t.begin(), t.end(), TMeanAccumulator(), [](TMeanAccumulator m, double x) { + m.add(std::fabs(x)); + return m; + }); } return std::sqrt(s_V0) <= MINIMUM_COEFFICIENT_OF_VARIATION * CBasicStatistics::mean(mean); } -CPeriodicityHypothesisTests::CNestedHypotheses::CNestedHypotheses(TTestFunc test) : - m_Test(test), m_AlwaysTestNested(false) -{} +CPeriodicityHypothesisTests::CNestedHypotheses::CNestedHypotheses(TTestFunc test) : m_Test(test), m_AlwaysTestNested(false) { +} -CPeriodicityHypothesisTests::CNestedHypotheses::CBuilder -CPeriodicityHypothesisTests::CNestedHypotheses::null(TTestFunc test) -{ +CPeriodicityHypothesisTests::CNestedHypotheses::CBuilder CPeriodicityHypothesisTests::CNestedHypotheses::null(TTestFunc test) { m_Test = test; m_AlwaysTestNested = true; return CBuilder(*this); } -CPeriodicityHypothesisTests::CNestedHypotheses & -CPeriodicityHypothesisTests::CNestedHypotheses::addNested(TTestFunc test) -{ +CPeriodicityHypothesisTests::CNestedHypotheses& CPeriodicityHypothesisTests::CNestedHypotheses::addNested(TTestFunc test) { m_Nested.emplace_back(test); return m_Nested.back(); } -CPeriodicityHypothesisTestsResult -CPeriodicityHypothesisTests::CNestedHypotheses::test(STestStats &stats) const -{ +CPeriodicityHypothesisTestsResult CPeriodicityHypothesisTests::CNestedHypotheses::test(STestStats& stats) const { CPeriodicityHypothesisTestsResult result{m_Test(stats)}; - if (m_AlwaysTestNested || result != stats.s_H0) - { + if (m_AlwaysTestNested || result != stats.s_H0) { stats.s_H0 = result; - for (const auto &child : m_Nested) - { + for (const auto& child : m_Nested) { CPeriodicityHypothesisTestsResult childResult{child.test(stats)}; - if (result != childResult) - { + if (result != childResult) { return childResult; } } @@ -1828,49 +1520,37 @@ CPeriodicityHypothesisTests::CNestedHypotheses::test(STestStats &stats) const return result; } -CPeriodicityHypothesisTests::CNestedHypotheses::CBuilder::CBuilder(CNestedHypotheses &hypothesis) -{ +CPeriodicityHypothesisTests::CNestedHypotheses::CBuilder::CBuilder(CNestedHypotheses& hypothesis) { m_Levels.push_back(&hypothesis); } -CPeriodicityHypothesisTests::CNestedHypotheses::CBuilder & -CPeriodicityHypothesisTests::CNestedHypotheses::CBuilder::addNested(TTestFunc test) -{ +CPeriodicityHypothesisTests::CNestedHypotheses::CBuilder& +CPeriodicityHypothesisTests::CNestedHypotheses::CBuilder::addNested(TTestFunc test) { m_Levels.push_back(&m_Levels.back()->addNested(test)); return *this; } -CPeriodicityHypothesisTests::CNestedHypotheses::CBuilder & -CPeriodicityHypothesisTests::CNestedHypotheses::CBuilder::addAlternative(TTestFunc test) -{ +CPeriodicityHypothesisTests::CNestedHypotheses::CBuilder& +CPeriodicityHypothesisTests::CNestedHypotheses::CBuilder::addAlternative(TTestFunc test) { m_Levels.pop_back(); return this->addNested(test); } -CPeriodicityHypothesisTests::CNestedHypotheses::CBuilder & -CPeriodicityHypothesisTests::CNestedHypotheses::CBuilder::finishedNested() -{ +CPeriodicityHypothesisTests::CNestedHypotheses::CBuilder& CPeriodicityHypothesisTests::CNestedHypotheses::CBuilder::finishedNested() { m_Levels.pop_back(); return *this; } -namespace -{ +namespace { //! Compute the mean of the autocorrelation for \f${P, 2P, ...}\f$ //! where \f$P\f$ is \p period. -double meanAutocorrelationForPeriodicOffsets(const TDoubleVec &correlations, - std::size_t window, - std::size_t period) -{ - auto correctForPad = [window](double correlation, std::size_t offset) - { - return correlation * static_cast(window) - / static_cast(window - offset); - }; +double meanAutocorrelationForPeriodicOffsets(const TDoubleVec& correlations, std::size_t window, std::size_t period) { + auto correctForPad = [window](double correlation, std::size_t offset) { + return correlation * static_cast(window) / static_cast(window - offset); + }; TMeanAccumulator result; - for (std::size_t offset = period; offset < correlations.size(); offset += period) - { + for (std::size_t offset = period; offset < correlations.size(); offset += period) { result.add(correctForPad(correlations[offset - 1], offset)); } return CBasicStatistics::mean(result); @@ -1878,8 +1558,7 @@ double meanAutocorrelationForPeriodicOffsets(const TDoubleVec &correlations, //! Find the single periodic component which explains the most //! cyclic autocorrelation. -std::size_t mostSignificantPeriodicComponent(TFloatMeanAccumulatorVec values) -{ +std::size_t mostSignificantPeriodicComponent(TFloatMeanAccumulatorVec values) { using TSizeVec = std::vector; using TDoubleSizePr = std::pair; using TMaxAccumulator = CBasicStatistics::COrderStatisticsHeap>; @@ -1902,8 +1581,7 @@ std::size_t mostSignificantPeriodicComponent(TFloatMeanAccumulatorVec values) // periodic. TMaxAccumulator candidates(15); correlations.resize(pad); - for (std::size_t p = 4u; p < correlations.size(); ++p) - { + for (std::size_t p = 4u; p < correlations.size(); ++p) { double correlation{meanAutocorrelationForPeriodicOffsets(correlations, n, p)}; LOG_TRACE("correlation(" << p << ") = " << correlation); candidates.add({correlation, p}); @@ -1911,12 +1589,10 @@ std::size_t mostSignificantPeriodicComponent(TFloatMeanAccumulatorVec values) // Sort by decreasing cyclic autocorrelation. TSizeVec candidatePeriods(15); - std::transform(candidates.begin(), candidates.end(), - candidatePeriods.begin(), - [](const TDoubleSizePr &candidate_) { return candidate_.second; }); + std::transform( + candidates.begin(), candidates.end(), candidatePeriods.begin(), [](const TDoubleSizePr& candidate_) { return candidate_.second; }); candidates.clear(); - for (const auto period : candidatePeriods) - { + for (const auto period : candidatePeriods) { TFloatMeanAccumulatorCRng window(values, 0, period * (values.size() / period)); candidates.add({CSignal::autocorrelation(period, window), period}); } @@ -1929,46 +1605,38 @@ std::size_t mostSignificantPeriodicComponent(TFloatMeanAccumulatorVec values) // ends up with higher autocorrelation due to additive noise. std::size_t result{candidates[0].second}; double cutoff{0.9 * candidates[0].first}; - for (auto i = candidates.begin() + 1; i != candidates.end() && i->first > cutoff; ++i) - { - if (i->second < result && candidates[0].second % i->second == 0) - { + for (auto i = candidates.begin() + 1; i != candidates.end() && i->first > cutoff; ++i) { + if (i->second < result && candidates[0].second % i->second == 0) { result = i->second; } } return result; } - } -CPeriodicityHypothesisTestsResult testForPeriods(const CPeriodicityHypothesisTestsConfig &config, +CPeriodicityHypothesisTestsResult testForPeriods(const CPeriodicityHypothesisTestsConfig& config, core_t::TTime startTime, core_t::TTime bucketLength, - const TFloatMeanAccumulatorVec &values) -{ + const TFloatMeanAccumulatorVec& values) { // Find the single periodic component which explains the // most cyclic autocorrelation. std::size_t period_{mostSignificantPeriodicComponent(values)}; core_t::TTime window{static_cast(values.size()) * bucketLength}; core_t::TTime period{static_cast(period_) * bucketLength}; - LOG_TRACE("bucket length = " << bucketLength - << ", window = " << window - << ", periods to test = " << period - << ", # values = " << values.size()); + LOG_TRACE("bucket length = " << bucketLength << ", window = " << window << ", periods to test = " << period + << ", # values = " << values.size()); // Set up the hypothesis tests. CPeriodicityHypothesisTests test{config}; test.initialize(bucketLength, window, period); core_t::TTime time{startTime + bucketLength / 2}; - for (const auto &value : values) - { + for (const auto& value : values) { test.add(time, CBasicStatistics::mean(value), CBasicStatistics::count(value)); time += bucketLength; } return test.test(); } - } } diff --git a/lib/maths/CPoissonMeanConjugate.cc b/lib/maths/CPoissonMeanConjugate.cc index 24dd782223..ba4501d7cd 100644 --- a/lib/maths/CPoissonMeanConjugate.cc +++ b/lib/maths/CPoissonMeanConjugate.cc @@ -15,8 +15,8 @@ #include #include #include -#include #include +#include #include #include @@ -36,29 +36,23 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { -namespace -{ +namespace { const double MINIMUM_GAUSSIAN_MEAN = 100.0; // Wrapper for static cast which can be used with STL algorithms. template -struct SStaticCast -{ +struct SStaticCast { template - inline TARGET_TYPE operator()(const SOURCE_TYPE &source) const - { + inline TARGET_TYPE operator()(const SOURCE_TYPE& source) const { return static_cast(source); } }; -namespace detail -{ +namespace detail { using TDouble1Vec = core::CSmallVector; using TDouble4Vec = core::CSmallVector; @@ -66,14 +60,8 @@ using TDouble4Vec1Vec = core::CSmallVector; using TWeightStyleVec = maths_t::TWeightStyleVec; //! Adds "weight" x "right operand" to the "left operand". -struct SPlusWeight -{ - double operator()(double lhs, - double rhs, - double weight = 1.0) const - { - return lhs + weight * rhs; - } +struct SPlusWeight { + double operator()(double lhs, double rhs, double weight = 1.0) const { return lhs + weight * rhs; } }; //! Evaluate \p func on the joint predictive distribution for \p samples @@ -92,21 +80,19 @@ struct SPlusWeight //! \param[in] rate The rate of the rate prior. //! \param[out] result Filled in with the aggregation of results of \p func. template -bool evaluateFunctionOnJointDistribution(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, +bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, FUNC func, AGGREGATOR aggregate, double offset, bool isNonInformative, double shape, double rate, - RESULT &result) -{ + RESULT& result) { result = RESULT(); - if (samples.empty()) - { + if (samples.empty()) { LOG_ERROR("Can't compute distribution for empty sample set"); return false; } @@ -126,22 +112,17 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec &weightStyles, // // This becomes increasingly accurate as the prior distribution narrows. - try - { - if (isNonInformative) - { + try { + if (isNonInformative) { // The non-informative prior is improper and effectively 0 everywhere. // (It is acceptable to approximate all finite samples as at the median // of this distribution.) - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { double x = samples[i] + offset; double n = maths_t::count(weightStyles, weights[i]); result = aggregate(result, func(CTools::SImproperDistribution(), x), n); } - } - else - { + } else { // The marginal likelihood for a single sample is the negative // binomial distribution: // f(x | p, r) = Gamma(r + x) * p^r * (1 - p)^x / x! / Gamma(r) @@ -155,20 +136,16 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec &weightStyles, // "a" is the shape and "b" is the rate of the gamma distribution, // and the error function is significantly cheaper to compute. - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { double n = maths_t::count(weightStyles, weights[i]); double x = samples[i] + offset; double mean = shape / rate; - if (mean > MINIMUM_GAUSSIAN_MEAN) - { + if (mean > MINIMUM_GAUSSIAN_MEAN) { double deviation = std::sqrt((rate + 1.0) / rate * mean); boost::math::normal_distribution<> normal(mean, deviation); result = aggregate(result, func(normal, x), n); - } - else - { + } else { double r = shape; double p = rate / (rate + 1.0); boost::math::negative_binomial_distribution<> negativeBinomial(r, p); @@ -176,14 +153,9 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec &weightStyles, } } } - } - catch (const std::exception &e) - { + } catch (const std::exception& e) { LOG_ERROR("Error calculating joint c.d.f." - << " offset = " << offset - << ", shape = " << shape - << ", rate = " << rate - << ": " << e.what()); + << " offset = " << offset << ", shape = " << shape << ", rate = " << rate << ": " << e.what()); return false; } @@ -203,38 +175,22 @@ const std::string OFFSET_TAG("d"); //const std::string MAXIMUM_TAG("f"); No longer used const std::string DECAY_RATE_TAG("g"); const std::string EMPTY_STRING; +} +CPoissonMeanConjugate::CPoissonMeanConjugate(double offset, double shape, double rate, double decayRate /*= 0.0*/) + : CPrior(maths_t::E_IntegerData, decayRate), m_Offset(offset), m_Shape(shape), m_Rate(rate) { } -CPoissonMeanConjugate::CPoissonMeanConjugate(double offset, - double shape, - double rate, - double decayRate/*= 0.0*/) : - CPrior(maths_t::E_IntegerData, decayRate), - m_Offset(offset), - m_Shape(shape), - m_Rate(rate) -{} - -CPoissonMeanConjugate::CPoissonMeanConjugate(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser) : - CPrior(maths_t::E_IntegerData, params.s_DecayRate), - m_Offset(0.0), - m_Shape(0.0), - m_Rate(0.0) -{ +CPoissonMeanConjugate::CPoissonMeanConjugate(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) + : CPrior(maths_t::E_IntegerData, params.s_DecayRate), m_Offset(0.0), m_Shape(0.0), m_Rate(0.0) { traverser.traverseSubLevel(boost::bind(&CPoissonMeanConjugate::acceptRestoreTraverser, this, _1)); } -bool CPoissonMeanConjugate::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name = traverser.name(); - RESTORE_SETUP_TEARDOWN(DECAY_RATE_TAG, - double decayRate, - core::CStringUtils::stringToType(traverser.value(), decayRate), - this->decayRate(decayRate)) +bool CPoissonMeanConjugate::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); + RESTORE_SETUP_TEARDOWN( + DECAY_RATE_TAG, double decayRate, core::CStringUtils::stringToType(traverser.value(), decayRate), this->decayRate(decayRate)) RESTORE_BUILT_IN(OFFSET_TAG, m_Offset) RESTORE_BUILT_IN(SHAPE_TAG, m_Shape) RESTORE_BUILT_IN(RATE_TAG, m_Rate) @@ -242,14 +198,12 @@ bool CPoissonMeanConjugate::acceptRestoreTraverser(core::CStateRestoreTraverser double numberSamples, core::CStringUtils::stringToType(traverser.value(), numberSamples), this->numberSamples(numberSamples)) - } - while (traverser.next()); + } while (traverser.next()); return true; } -CPoissonMeanConjugate CPoissonMeanConjugate::nonInformativePrior(double offset, double decayRate) -{ +CPoissonMeanConjugate CPoissonMeanConjugate::nonInformativePrior(double offset, double decayRate) { // We'll use the improper distribution: // lim "a -> 1+, k -> inf" { Gamma(a, k) } // @@ -259,33 +213,26 @@ CPoissonMeanConjugate CPoissonMeanConjugate::nonInformativePrior(double offset, return CPoissonMeanConjugate(offset, NON_INFORMATIVE_SHAPE, NON_INFORMATIVE_RATE, decayRate); } -CPoissonMeanConjugate::EPrior CPoissonMeanConjugate::type() const -{ +CPoissonMeanConjugate::EPrior CPoissonMeanConjugate::type() const { return E_Poisson; } -CPoissonMeanConjugate *CPoissonMeanConjugate::clone() const -{ +CPoissonMeanConjugate* CPoissonMeanConjugate::clone() const { return new CPoissonMeanConjugate(*this); } -void CPoissonMeanConjugate::setToNonInformative(double offset, - double decayRate) -{ +void CPoissonMeanConjugate::setToNonInformative(double offset, double decayRate) { *this = nonInformativePrior(offset, decayRate); } -bool CPoissonMeanConjugate::needsOffset() const -{ +bool CPoissonMeanConjugate::needsOffset() const { return true; } -double CPoissonMeanConjugate::adjustOffset(const TWeightStyleVec &/*weightStyles*/, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &/*weights*/) -{ - if (samples.empty() || CMathsFuncs::beginFinite(samples) == CMathsFuncs::endFinite(samples)) - { +double CPoissonMeanConjugate::adjustOffset(const TWeightStyleVec& /*weightStyles*/, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& /*weights*/) { + if (samples.empty() || CMathsFuncs::beginFinite(samples) == CMathsFuncs::endFinite(samples)) { return 0.0; } @@ -302,10 +249,8 @@ double CPoissonMeanConjugate::adjustOffset(const TWeightStyleVec &/*weightStyles static const double EPS = 0.01; static const double OFFSET_MARGIN = 0.0; - double minimumSample = *std::min_element(CMathsFuncs::beginFinite(samples), - CMathsFuncs::endFinite(samples)); - if (minimumSample + m_Offset >= OFFSET_MARGIN) - { + double minimumSample = *std::min_element(CMathsFuncs::beginFinite(samples), CMathsFuncs::endFinite(samples)); + if (minimumSample + m_Offset >= OFFSET_MARGIN) { return 0.0; } @@ -317,8 +262,7 @@ double CPoissonMeanConjugate::adjustOffset(const TWeightStyleVec &/*weightStyles TDouble4Vec1Vec weights(resamples.size(), TDouble4Vec(1, weight)); double before = 0.0; - if (!resamples.empty()) - { + if (!resamples.empty()) { this->jointLogMarginalLikelihood(CConstantWeights::COUNT, resamples, weights, before); } @@ -328,19 +272,15 @@ double CPoissonMeanConjugate::adjustOffset(const TWeightStyleVec &/*weightStyles m_Rate = NON_INFORMATIVE_RATE; this->numberSamples(0.0); - if (resamples.empty()) - { + if (resamples.empty()) { return 0.0; } - for (auto &sample : resamples) - { + for (auto& sample : resamples) { sample = std::max(sample, OFFSET_MARGIN - offset); } - LOG_TRACE("resamples = " << core::CContainerPrinter::print(resamples) - << ", weight = " << weight - << ", offset = " << m_Offset); + LOG_TRACE("resamples = " << core::CContainerPrinter::print(resamples) << ", weight = " << weight << ", offset = " << m_Offset); this->addSamples(weightStyle, resamples, weights); @@ -350,26 +290,18 @@ double CPoissonMeanConjugate::adjustOffset(const TWeightStyleVec &/*weightStyles return std::min(after - before, 0.0); } -double CPoissonMeanConjugate::offset() const -{ +double CPoissonMeanConjugate::offset() const { return m_Offset; } -void CPoissonMeanConjugate::addSamples(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights) -{ - if (samples.empty()) - { +void CPoissonMeanConjugate::addSamples(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights) { + if (samples.empty()) { return; } - if (samples.size() != weights.size()) - { - LOG_ERROR("Mismatch in samples '" - << core::CContainerPrinter::print(samples) - << "' and weights '" - << core::CContainerPrinter::print(weights) << "'"); + if (samples.size() != weights.size()) { + LOG_ERROR("Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '" + << core::CContainerPrinter::print(weights) << "'"); return; } @@ -396,23 +328,18 @@ void CPoissonMeanConjugate::addSamples(const TWeightStyleVec &weightStyles, double numberSamples = 0.0; double sampleSum = 0.0; - try - { - for (std::size_t i = 0u; i < samples.size(); ++i) - { + try { + for (std::size_t i = 0u; i < samples.size(); ++i) { double n = maths_t::countForUpdate(weightStyles, weights[i]); double x = samples[i] + m_Offset; - if (!CMathsFuncs::isFinite(x) || x < 0.0) - { + if (!CMathsFuncs::isFinite(x) || x < 0.0) { LOG_ERROR("Discarding " << x << " it's not Poisson"); continue; } numberSamples += n; sampleSum += n * x; } - } - catch (const std::exception &e) - { + } catch (const std::exception& e) { LOG_ERROR("Failed to update likelihood: " << e.what()); return; } @@ -420,23 +347,17 @@ void CPoissonMeanConjugate::addSamples(const TWeightStyleVec &weightStyles, m_Shape += sampleSum; m_Rate += numberSamples; - LOG_TRACE("# samples = " << numberSamples - << ", sampleSum = " << sampleSum - << ", m_Shape = " << m_Shape - << ", m_Rate = " << m_Rate - << ", m_Offset = " << m_Offset); + LOG_TRACE("# samples = " << numberSamples << ", sampleSum = " << sampleSum << ", m_Shape = " << m_Shape << ", m_Rate = " << m_Rate + << ", m_Offset = " << m_Offset); } -void CPoissonMeanConjugate::propagateForwardsByTime(double time) -{ - if (!CMathsFuncs::isFinite(time) || time < 0.0) - { +void CPoissonMeanConjugate::propagateForwardsByTime(double time) { + if (!CMathsFuncs::isFinite(time) || time < 0.0) { LOG_ERROR("Bad propagation time " << time); return; } - if (this->isNonInformative()) - { + if (this->isNonInformative()) { // There is nothing to be done. return; } @@ -453,32 +374,23 @@ void CPoissonMeanConjugate::propagateForwardsByTime(double time) // // Thus the mean is unchanged and variance is increased by 1 / f. - double factor = std::min((alpha * m_Shape - + (1.0 - alpha) * NON_INFORMATIVE_SHAPE) - / m_Shape, 1.0); + double factor = std::min((alpha * m_Shape + (1.0 - alpha) * NON_INFORMATIVE_SHAPE) / m_Shape, 1.0); m_Shape *= factor; m_Rate *= factor; this->numberSamples(this->numberSamples() * alpha); - LOG_TRACE("time = " << time - << ", alpha = " << alpha - << ", m_Shape = " << m_Shape - << ", m_Rate = " << m_Rate - << ", numberSamples = " << this->numberSamples()); + LOG_TRACE("time = " << time << ", alpha = " << alpha << ", m_Shape = " << m_Shape << ", m_Rate = " << m_Rate + << ", numberSamples = " << this->numberSamples()); } -CPoissonMeanConjugate::TDoubleDoublePr -CPoissonMeanConjugate::marginalLikelihoodSupport() const -{ +CPoissonMeanConjugate::TDoubleDoublePr CPoissonMeanConjugate::marginalLikelihoodSupport() const { return std::make_pair(-m_Offset, boost::numeric::bounds::highest()); } -double CPoissonMeanConjugate::marginalLikelihoodMean() const -{ - if (this->isNonInformative()) - { +double CPoissonMeanConjugate::marginalLikelihoodMean() const { + if (this->isNonInformative()) { return -m_Offset; } @@ -489,47 +401,36 @@ double CPoissonMeanConjugate::marginalLikelihoodMean() const return this->priorMean() - m_Offset; } -double CPoissonMeanConjugate::marginalLikelihoodMode(const TWeightStyleVec &/*weightStyles*/, - const TDouble4Vec &/*weights*/) const -{ - if (this->isNonInformative()) - { +double CPoissonMeanConjugate::marginalLikelihoodMode(const TWeightStyleVec& /*weightStyles*/, const TDouble4Vec& /*weights*/) const { + if (this->isNonInformative()) { return -m_Offset; } // boost::math::negative_binomial_distribution is broken for // successes <= 1.0. - if (m_Shape <= 1.0) - { + if (m_Shape <= 1.0) { return -m_Offset; } // We use the fact that the marginal likelihood is negative // binomial. - try - { + try { double r = m_Shape; double p = m_Rate / (m_Rate + 1.0); boost::math::negative_binomial_distribution<> negativeBinomial(r, p); return boost::math::mode(negativeBinomial) - m_Offset; - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to compute marginal likelihood mode: " << e.what() - << ", prior shape = " << m_Shape - << ", prior rate = " << m_Rate); + } catch (const std::exception& e) { + LOG_ERROR("Failed to compute marginal likelihood mode: " << e.what() << ", prior shape = " << m_Shape + << ", prior rate = " << m_Rate); } return -m_Offset; } -double CPoissonMeanConjugate::marginalLikelihoodVariance(const TWeightStyleVec &weightStyles, - const TDouble4Vec &weights) const -{ - if (this->isNonInformative()) - { +double CPoissonMeanConjugate::marginalLikelihoodVariance(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights) const { + if (this->isNonInformative()) { return boost::numeric::bounds::highest(); } @@ -538,25 +439,16 @@ double CPoissonMeanConjugate::marginalLikelihoodVariance(const TWeightStyleVec & // = "prior mean" + "prior variance" double varianceScale = 1.0; - try - { - varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) - * maths_t::countVarianceScale(weightStyles, weights); - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to get variance scale: " << e.what()); - } + try { + varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) * maths_t::countVarianceScale(weightStyles, weights); + } catch (const std::exception& e) { LOG_ERROR("Failed to get variance scale: " << e.what()); } return varianceScale * (this->priorMean() + this->priorVariance()); } -CPoissonMeanConjugate::TDoubleDoublePr -CPoissonMeanConjugate::marginalLikelihoodConfidenceInterval(double percentage, - const TWeightStyleVec &/*weightStyles*/, - const TDouble4Vec &/*weights*/) const -{ - if (this->isNonInformative()) - { +CPoissonMeanConjugate::TDoubleDoublePr CPoissonMeanConjugate::marginalLikelihoodConfidenceInterval(double percentage, + const TWeightStyleVec& /*weightStyles*/, + const TDouble4Vec& /*weights*/) const { + if (this->isNonInformative()) { return this->marginalLikelihoodSupport(); } @@ -566,49 +458,36 @@ CPoissonMeanConjugate::marginalLikelihoodConfidenceInterval(double percentage, // We use the fact that the marginal likelihood function is // negative binomial. - try - { + try { double r = m_Shape; double p = m_Rate / (m_Rate + 1.0); boost::math::negative_binomial_distribution<> negativeBinomial(r, p); double x1 = boost::math::quantile(negativeBinomial, (1.0 - percentage) / 2.0) - m_Offset; - double x2 = percentage > 0.0 ? boost::math::quantile(negativeBinomial, - (1.0 + percentage) / 2.0) - m_Offset : x1; + double x2 = percentage > 0.0 ? boost::math::quantile(negativeBinomial, (1.0 + percentage) / 2.0) - m_Offset : x1; return std::make_pair(x1, x2); - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to compute confidence interval: " << e.what()); - } + } catch (const std::exception& e) { LOG_ERROR("Failed to compute confidence interval: " << e.what()); } return this->marginalLikelihoodSupport(); } -maths_t::EFloatingPointErrorStatus -CPoissonMeanConjugate::jointLogMarginalLikelihood(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &result) const -{ +maths_t::EFloatingPointErrorStatus CPoissonMeanConjugate::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& result) const { result = 0.0; - if (samples.empty()) - { + if (samples.empty()) { LOG_ERROR("Can't compute likelihood for empty sample set"); return maths_t::E_FpFailed; } - if (samples.size() != weights.size()) - { - LOG_ERROR("Mismatch in samples '" - << core::CContainerPrinter::print(samples) - << "' and weights '" - << core::CContainerPrinter::print(weights) << "'"); + if (samples.size() != weights.size()) { + LOG_ERROR("Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '" + << core::CContainerPrinter::print(weights) << "'"); return maths_t::E_FpFailed; } - if (this->isNonInformative()) - { + if (this->isNonInformative()) { // The non-informative likelihood is improper and effectively // zero everywhere. We use minus max double because // log(0) = HUGE_VALUE, which causes problems for Windows. @@ -637,19 +516,16 @@ CPoissonMeanConjugate::jointLogMarginalLikelihood(const TWeightStyleVec &weightS // a is the prior gamma shape // b is the prior gamma rate - try - { + try { // Calculate the statistics we need for the calculation. double numberSamples = 0.0; double sampleSum = 0.0; double sampleLogFactorialSum = 0.0; - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { double n = maths_t::countForUpdate(weightStyles, weights[i]); double x = samples[i] + m_Offset; - if (x < 0.0) - { + if (x < 0.0) { // Technically, the marginal likelihood is zero here // so the log would be infinite. We use minus max // double because log(0) = HUGE_VALUE, which causes @@ -674,21 +550,15 @@ CPoissonMeanConjugate::jointLogMarginalLikelihood(const TWeightStyleVec &weightS double impliedShape = m_Shape + sampleSum; double impliedRate = m_Rate + numberSamples; - result = boost::math::lgamma(impliedShape) - + m_Shape * std::log(m_Rate) - - impliedShape * std::log(impliedRate) - - sampleLogFactorialSum - - boost::math::lgamma(m_Shape); - } - catch (const std::exception &e) - { + result = boost::math::lgamma(impliedShape) + m_Shape * std::log(m_Rate) - impliedShape * std::log(impliedRate) - + sampleLogFactorialSum - boost::math::lgamma(m_Shape); + } catch (const std::exception& e) { LOG_ERROR("Error calculating marginal likelihood: " << e.what()); return maths_t::E_FpFailed; } maths_t::EFloatingPointErrorStatus status = CMathsFuncs::fpStatus(result); - if (status & maths_t::E_FpFailed) - { + if (status & maths_t::E_FpFailed) { LOG_ERROR("Failed to compute log likelihood"); LOG_ERROR("samples = " << core::CContainerPrinter::print(samples)); LOG_ERROR("weights = " << core::CContainerPrinter::print(weights)); @@ -696,13 +566,10 @@ CPoissonMeanConjugate::jointLogMarginalLikelihood(const TWeightStyleVec &weightS return status; } -void CPoissonMeanConjugate::sampleMarginalLikelihood(std::size_t numberSamples, - TDouble1Vec &samples) const -{ +void CPoissonMeanConjugate::sampleMarginalLikelihood(std::size_t numberSamples, TDouble1Vec& samples) const { samples.clear(); - if (numberSamples == 0 || this->isNonInformative()) - { + if (numberSamples == 0 || this->isNonInformative()) { return; } @@ -753,139 +620,96 @@ void CPoissonMeanConjugate::sampleMarginalLikelihood(std::size_t numberSamples, double mean = boost::math::mean(gamma); double lastPartialExpectation = 0.0; - if (mean > MINIMUM_GAUSSIAN_MEAN) - { + if (mean > MINIMUM_GAUSSIAN_MEAN) { double variance = mean + this->priorVariance(); LOG_TRACE("mean = " << mean << ", variance = " << variance); - try - { + try { boost::math::normal_distribution<> normal(mean, std::sqrt(variance)); - for (std::size_t i = 1u; i < numberSamples; ++i) - { - double q = static_cast(i) / static_cast(numberSamples); + for (std::size_t i = 1u; i < numberSamples; ++i) { + double q = static_cast(i) / static_cast(numberSamples); double xq = boost::math::quantile(normal, q); double partialExpectation = mean * q - variance * CTools::safePdf(normal, xq); - double sample = static_cast(numberSamples) - * (partialExpectation - lastPartialExpectation) - - m_Offset; + double sample = static_cast(numberSamples) * (partialExpectation - lastPartialExpectation) - m_Offset; LOG_TRACE("sample = " << sample); // Sanity check the sample: should be in the distribution support. - if (sample >= support.first && sample <= support.second) - { + if (sample >= support.first && sample <= support.second) { samples.push_back(sample); - } - else - { - LOG_ERROR("Sample out of bounds: sample = " << sample - << ", support = [" << support.first << "," << support.second << "]" - << ", mean = " << mean - << ", variance = " << variance - << ", q = " << q - << ", x(q) = " << xq); + } else { + LOG_ERROR("Sample out of bounds: sample = " + << sample << ", support = [" << support.first << "," << support.second << "]" + << ", mean = " << mean << ", variance = " << variance << ", q = " << q << ", x(q) = " << xq); } lastPartialExpectation = partialExpectation; } + } catch (const std::exception& e) { + LOG_ERROR("Failed to sample: " << e.what() << ", mean = " << mean << ", variance = " << variance); } - catch (const std::exception &e) - { - LOG_ERROR("Failed to sample: " << e.what() - << ", mean = " << mean - << ", variance = " << variance); - } - } - else - { + } else { double r = m_Shape; double p = m_Rate / (m_Rate + 1.0); LOG_TRACE("mean = " << mean << ", r = " << r << ", p = " << p); - using boost::math::policies::policy; using boost::math::policies::discrete_quantile; + using boost::math::policies::policy; using boost::math::policies::real; using TRealQuantilePolicy = policy>; - using TNegativeBinomialRealQuantile = - boost::math::negative_binomial_distribution; + using TNegativeBinomialRealQuantile = boost::math::negative_binomial_distribution; - try - { + try { TNegativeBinomialRealQuantile negativeBinomial1(r, p); TNegativeBinomialRealQuantile negativeBinomial2(r + 1.0, p); - for (std::size_t i = 1u; i < numberSamples; ++i) - { - double q = static_cast(i) - / static_cast(numberSamples); + for (std::size_t i = 1u; i < numberSamples; ++i) { + double q = static_cast(i) / static_cast(numberSamples); double xq = boost::math::quantile(negativeBinomial1, q); - double partialExpectation = mean * boost::math::cdf(negativeBinomial2, - std::max(xq - 1.0, 0.0)); + double partialExpectation = mean * boost::math::cdf(negativeBinomial2, std::max(xq - 1.0, 0.0)); - double sample = static_cast(numberSamples) - * (partialExpectation - lastPartialExpectation) - - m_Offset; + double sample = static_cast(numberSamples) * (partialExpectation - lastPartialExpectation) - m_Offset; LOG_TRACE("sample = " << sample); // Sanity check the sample: should be in the distribution support. - if (sample >= support.first && sample <= support.second) - { + if (sample >= support.first && sample <= support.second) { samples.push_back(sample); - } - else - { - LOG_ERROR("Sample out of bounds: sample = " << sample - << ", support = [" << support.first << "," << support.second << "]" - << ", mean = " << mean - << ", r = " << r - << ", p = " << p - << ", q = " << q - << ", x(q) = " << xq); + } else { + LOG_ERROR("Sample out of bounds: sample = " + << sample << ", support = [" << support.first << "," << support.second << "]" + << ", mean = " << mean << ", r = " << r << ", p = " << p << ", q = " << q << ", x(q) = " << xq); } lastPartialExpectation = partialExpectation; } - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to sample: " << e.what() - << ", mean = " << mean - << ", r = " << r - << ", p = " << p); + } catch (const std::exception& e) { + LOG_ERROR("Failed to sample: " << e.what() << ", mean = " << mean << ", r = " << r << ", p = " << p); } } - double sample = static_cast(numberSamples) - * (mean - lastPartialExpectation) - - m_Offset; + double sample = static_cast(numberSamples) * (mean - lastPartialExpectation) - m_Offset; LOG_TRACE("sample = " << sample); // Sanity check the sample: should be in the distribution support. - if (sample >= support.first && sample <= support.second) - { + if (sample >= support.first && sample <= support.second) { samples.push_back(sample); - } - else - { - LOG_ERROR("Sample out of bounds: sample = " << sample - << ", mean = " << mean); + } else { + LOG_ERROR("Sample out of bounds: sample = " << sample << ", mean = " << mean); } } -bool CPoissonMeanConjugate::minusLogJointCdf(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound) const -{ +bool CPoissonMeanConjugate::minusLogJointCdf(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound) const { lowerBound = upperBound = 0.0; double value; @@ -898,10 +722,8 @@ bool CPoissonMeanConjugate::minusLogJointCdf(const TWeightStyleVec &weightStyles this->isNonInformative(), m_Shape, m_Rate, - value)) - { - LOG_ERROR("Failed computing c.d.f. for " - << core::CContainerPrinter::print(samples)); + value)) { + LOG_ERROR("Failed computing c.d.f. for " << core::CContainerPrinter::print(samples)); return false; } @@ -909,12 +731,11 @@ bool CPoissonMeanConjugate::minusLogJointCdf(const TWeightStyleVec &weightStyles return true; } -bool CPoissonMeanConjugate::minusLogJointCdfComplement(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound) const -{ +bool CPoissonMeanConjugate::minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound) const { lowerBound = upperBound = 0.0; double value; @@ -927,10 +748,8 @@ bool CPoissonMeanConjugate::minusLogJointCdfComplement(const TWeightStyleVec &we this->isNonInformative(), m_Shape, m_Rate, - value)) - { - LOG_ERROR("Failed computing c.d.f. complement for " - << core::CContainerPrinter::print(samples)); + value)) { + LOG_ERROR("Failed computing c.d.f. complement for " << core::CContainerPrinter::print(samples)); return false; } @@ -939,13 +758,12 @@ bool CPoissonMeanConjugate::minusLogJointCdfComplement(const TWeightStyleVec &we } bool CPoissonMeanConjugate::probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - double &lowerBound, - double &upperBound, - maths_t::ETail &tail) const -{ + const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound, + maths_t::ETail& tail) const { lowerBound = upperBound = 0.0; tail = maths_t::E_UndeterminedTail; @@ -953,21 +771,19 @@ bool CPoissonMeanConjugate::probabilityOfLessLikelySamples(maths_t::EProbability maths_t::ETail tail_ = maths_t::E_UndeterminedTail; CJointProbabilityOfLessLikelySamples probability; - if (!detail::evaluateFunctionOnJointDistribution(weightStyles, - samples, - weights, - boost::bind(CTools::CProbabilityOfLessLikelySample(calculation), - _1, _2, boost::ref(tail_)), - CJointProbabilityOfLessLikelySamples::SAddProbability(), - m_Offset, - this->isNonInformative(), - m_Shape, - m_Rate, - probability) - || !probability.calculate(value)) - { - LOG_ERROR("Failed computing probability for " - << core::CContainerPrinter::print(samples)); + if (!detail::evaluateFunctionOnJointDistribution( + weightStyles, + samples, + weights, + boost::bind(CTools::CProbabilityOfLessLikelySample(calculation), _1, _2, boost::ref(tail_)), + CJointProbabilityOfLessLikelySamples::SAddProbability(), + m_Offset, + this->isNonInformative(), + m_Shape, + m_Rate, + probability) || + !probability.calculate(value)) { + LOG_ERROR("Failed computing probability for " << core::CContainerPrinter::print(samples)); return false; } @@ -977,28 +793,22 @@ bool CPoissonMeanConjugate::probabilityOfLessLikelySamples(maths_t::EProbability return true; } -bool CPoissonMeanConjugate::isNonInformative() const -{ +bool CPoissonMeanConjugate::isNonInformative() const { return m_Rate == NON_INFORMATIVE_RATE; } -void CPoissonMeanConjugate::print(const std::string &indent, - std::string &result) const -{ +void CPoissonMeanConjugate::print(const std::string& indent, std::string& result) const { result += core_t::LINE_ENDING + indent + "poisson "; - if (this->isNonInformative()) - { + if (this->isNonInformative()) { result += "non-informative"; return; } - result += "mean = " + core::CStringUtils::typeToStringPretty(this->marginalLikelihoodMean()) - + " sd = " + core::CStringUtils::typeToStringPretty(std::sqrt(this->marginalLikelihoodVariance())); + result += "mean = " + core::CStringUtils::typeToStringPretty(this->marginalLikelihoodMean()) + + " sd = " + core::CStringUtils::typeToStringPretty(std::sqrt(this->marginalLikelihoodVariance())); } -std::string CPoissonMeanConjugate::printJointDensityFunction() const -{ - if (this->isNonInformative()) - { +std::string CPoissonMeanConjugate::printJointDensityFunction() const { + if (this->isNonInformative()) { // The non-informative prior is improper and effectively 0 everywhere. return std::string(); } @@ -1026,8 +836,7 @@ std::string CPoissonMeanConjugate::printJointDensityFunction() const std::ostringstream pdf; coordinates << "x = ["; pdf << "pdf = ["; - for (unsigned int i = 0u; i < POINTS; ++i, x += increment) - { + for (unsigned int i = 0u; i < POINTS; ++i, x += increment) { coordinates << x << " "; pdf << CTools::safePdf(gamma, x) << " "; } @@ -1037,31 +846,26 @@ std::string CPoissonMeanConjugate::printJointDensityFunction() const return coordinates.str() + pdf.str(); } -uint64_t CPoissonMeanConjugate::checksum(uint64_t seed) const -{ +uint64_t CPoissonMeanConjugate::checksum(uint64_t seed) const { seed = this->CPrior::checksum(seed); seed = CChecksum::calculate(seed, m_Offset); seed = CChecksum::calculate(seed, m_Shape); return CChecksum::calculate(seed, m_Rate); } -void CPoissonMeanConjugate::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CPoissonMeanConjugate::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CPoissonMeanConjugate"); } -std::size_t CPoissonMeanConjugate::memoryUsage() const -{ +std::size_t CPoissonMeanConjugate::memoryUsage() const { return 0; } -std::size_t CPoissonMeanConjugate::staticSize() const -{ +std::size_t CPoissonMeanConjugate::staticSize() const { return sizeof(*this); } -void CPoissonMeanConjugate::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CPoissonMeanConjugate::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(DECAY_RATE_TAG, this->decayRate(), core::CIEEE754::E_SinglePrecision); inserter.insertValue(OFFSET_TAG, m_Offset, core::CIEEE754::E_SinglePrecision); inserter.insertValue(SHAPE_TAG, m_Shape, core::CIEEE754::E_SinglePrecision); @@ -1069,55 +873,38 @@ void CPoissonMeanConjugate::acceptPersistInserter(core::CStatePersistInserter &i inserter.insertValue(NUMBER_SAMPLES_TAG, this->numberSamples(), core::CIEEE754::E_SinglePrecision); } -double CPoissonMeanConjugate::priorMean() const -{ - if (this->isNonInformative()) - { +double CPoissonMeanConjugate::priorMean() const { + if (this->isNonInformative()) { return 0.0; } - try - { + try { boost::math::gamma_distribution<> gamma(m_Shape, 1.0 / m_Rate); return boost::math::mean(gamma); - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to calculate mean: " << e.what() - << ", prior shape = " << m_Shape - << ", prior rate = " << m_Rate); + } catch (const std::exception& e) { + LOG_ERROR("Failed to calculate mean: " << e.what() << ", prior shape = " << m_Shape << ", prior rate = " << m_Rate); } return 0.0; } -double CPoissonMeanConjugate::priorVariance() const -{ - if (this->isNonInformative()) - { +double CPoissonMeanConjugate::priorVariance() const { + if (this->isNonInformative()) { return boost::numeric::bounds::highest(); } - try - { + try { boost::math::gamma_distribution<> gamma(m_Shape, 1.0 / m_Rate); return boost::math::variance(gamma); - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to calculate variance: " << e.what() - << ", prior shape = " << m_Shape - << ", prior rate = " << m_Rate); + } catch (const std::exception& e) { + LOG_ERROR("Failed to calculate variance: " << e.what() << ", prior shape = " << m_Shape << ", prior rate = " << m_Rate); } return boost::numeric::bounds::highest(); } -CPoissonMeanConjugate::TDoubleDoublePr -CPoissonMeanConjugate::meanConfidenceInterval(double percentage) const -{ - if (this->isNonInformative()) - { +CPoissonMeanConjugate::TDoubleDoublePr CPoissonMeanConjugate::meanConfidenceInterval(double percentage) const { + if (this->isNonInformative()) { return this->marginalLikelihoodSupport(); } @@ -1131,32 +918,24 @@ CPoissonMeanConjugate::meanConfidenceInterval(double percentage) const double lowerPercentile = 0.5 * (1.0 - percentage); double upperPercentile = 0.5 * (1.0 + percentage); - try - { + try { boost::math::gamma_distribution<> gamma(m_Shape, 1.0 / m_Rate); return std::make_pair(boost::math::quantile(gamma, lowerPercentile) - m_Offset, boost::math::quantile(gamma, upperPercentile) - m_Offset); - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to compute mean confidence interval: " << e.what() - << ", prior shape = " << m_Shape - << ", prior rate = " << m_Rate); + } catch (const std::exception& e) { + LOG_ERROR("Failed to compute mean confidence interval: " << e.what() << ", prior shape = " << m_Shape + << ", prior rate = " << m_Rate); } return this->marginalLikelihoodSupport(); } -bool CPoissonMeanConjugate::equalTolerance(const CPoissonMeanConjugate &rhs, - const TEqualWithTolerance &equal) const -{ - LOG_DEBUG(m_Shape << " " << rhs.m_Shape << ", " - << m_Rate << " " << rhs.m_Rate); +bool CPoissonMeanConjugate::equalTolerance(const CPoissonMeanConjugate& rhs, const TEqualWithTolerance& equal) const { + LOG_DEBUG(m_Shape << " " << rhs.m_Shape << ", " << m_Rate << " " << rhs.m_Rate); return equal(m_Shape, rhs.m_Shape) && equal(m_Rate, rhs.m_Rate); } const double CPoissonMeanConjugate::NON_INFORMATIVE_SHAPE = 0.1; const double CPoissonMeanConjugate::NON_INFORMATIVE_RATE = 0.0; - } } diff --git a/lib/maths/CPrior.cc b/lib/maths/CPrior.cc index 6c981583a5..068d0ea93a 100644 --- a/lib/maths/CPrior.cc +++ b/lib/maths/CPrior.cc @@ -26,135 +26,96 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { -namespace -{ +namespace { -namespace detail -{ +namespace detail { //! Set the decay rate, validating the input. -void setDecayRate(double value, double fallback, CFloatStorage &result) -{ - if (CMathsFuncs::isFinite(value)) - { +void setDecayRate(double value, double fallback, CFloatStorage& result) { + if (CMathsFuncs::isFinite(value)) { result = value; - } - else - { + } else { LOG_ERROR("Invalid decay rate " << value); result = fallback; } } - } const std::size_t ADJUST_OFFSET_TRIALS = 20; - } -CPrior::CPrior() : - m_DataType(maths_t::E_DiscreteData), - m_DecayRate(0.0), - m_NumberSamples(0) -{} +CPrior::CPrior() : m_DataType(maths_t::E_DiscreteData), m_DecayRate(0.0), m_NumberSamples(0) { +} -CPrior::CPrior(maths_t::EDataType dataType, double decayRate) : - m_DataType(dataType), - m_NumberSamples(0) -{ +CPrior::CPrior(maths_t::EDataType dataType, double decayRate) : m_DataType(dataType), m_NumberSamples(0) { detail::setDecayRate(decayRate, FALLBACK_DECAY_RATE, m_DecayRate); } -void CPrior::swap(CPrior &other) -{ +void CPrior::swap(CPrior& other) { std::swap(m_DataType, other.m_DataType); std::swap(m_DecayRate, other.m_DecayRate); std::swap(m_NumberSamples, other.m_NumberSamples); } -bool CPrior::isDiscrete() const -{ +bool CPrior::isDiscrete() const { return m_DataType == maths_t::E_DiscreteData || m_DataType == maths_t::E_IntegerData; } -bool CPrior::isInteger() const -{ +bool CPrior::isInteger() const { return m_DataType == maths_t::E_IntegerData; } -maths_t::EDataType CPrior::dataType() const -{ +maths_t::EDataType CPrior::dataType() const { return m_DataType; } -void CPrior::dataType(maths_t::EDataType value) -{ +void CPrior::dataType(maths_t::EDataType value) { m_DataType = value; } -double CPrior::decayRate() const -{ +double CPrior::decayRate() const { return m_DecayRate; } -void CPrior::decayRate(double value) -{ +void CPrior::decayRate(double value) { detail::setDecayRate(value, FALLBACK_DECAY_RATE, m_DecayRate); } -void CPrior::removeModels(CModelFilter &/*filter*/) -{ +void CPrior::removeModels(CModelFilter& /*filter*/) { } -double CPrior::offsetMargin() const -{ +double CPrior::offsetMargin() const { return 0.0; } -void CPrior::addSamples(const TWeightStyleVec &weightStyles, - const TDouble1Vec &/*samples*/, - const TDouble4Vec1Vec &weights) -{ +void CPrior::addSamples(const TWeightStyleVec& weightStyles, const TDouble1Vec& /*samples*/, const TDouble4Vec1Vec& weights) { double n = 0.0; - try - { - for (const auto &weight : weights) - { + try { + for (const auto& weight : weights) { n += maths_t::countForUpdate(weightStyles, weight); } - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to extract sample counts: " << e.what()); - } + } catch (const std::exception& e) { LOG_ERROR("Failed to extract sample counts: " << e.what()); } this->addSamples(n); } -double CPrior::nearestMarginalLikelihoodMean(double /*value*/) const -{ +double CPrior::nearestMarginalLikelihoodMean(double /*value*/) const { return this->marginalLikelihoodMean(); } -CPrior::TDouble1Vec CPrior::marginalLikelihoodModes(const TWeightStyleVec &weightStyles, - const TDouble4Vec &weights) const -{ +CPrior::TDouble1Vec CPrior::marginalLikelihoodModes(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights) const { return TDouble1Vec{this->marginalLikelihoodMode(weightStyles, weights)}; } -std::string CPrior::print() const -{ +std::string CPrior::print() const { std::string result; this->print("", result); return result; } -std::string CPrior::printMarginalLikelihoodFunction(double weight) const -{ +std::string CPrior::printMarginalLikelihoodFunction(double weight) const { // We'll plot the marginal likelihood function over the range // where most of the mass is. @@ -167,8 +128,7 @@ std::string CPrior::printMarginalLikelihoodFunction(double weight) const abscissa << "x = ["; likelihood << "likelihood = ["; - for (std::size_t i = 0u; i < plot.s_Abscissa.size(); ++i) - { + for (std::size_t i = 0u; i < plot.s_Abscissa.size(); ++i) { abscissa << plot.s_Abscissa[i] << " "; likelihood << plot.s_Ordinates[i] << " "; } @@ -178,17 +138,14 @@ std::string CPrior::printMarginalLikelihoodFunction(double weight) const return abscissa.str() + likelihood.str(); } -CPrior::SPlot CPrior::marginalLikelihoodPlot(unsigned int numberPoints, double weight) const -{ - if (this->isNonInformative()) - { +CPrior::SPlot CPrior::marginalLikelihoodPlot(unsigned int numberPoints, double weight) const { + if (this->isNonInformative()) { // The non-informative likelihood is improper 0 everywhere. return CPrior::SPlot(); } CPrior::SPlot plot; - if (numberPoints == 0) - { + if (numberPoints == 0) { return plot; } @@ -197,23 +154,15 @@ CPrior::SPlot CPrior::marginalLikelihoodPlot(unsigned int numberPoints, double w this->sampleMarginalLikelihood(numberPoints, plot.s_Abscissa); std::sort(plot.s_Abscissa.begin(), plot.s_Abscissa.end()); - for (auto x : plot.s_Abscissa) - { + for (auto x : plot.s_Abscissa) { double likelihood; maths_t::EFloatingPointErrorStatus status = - this->jointLogMarginalLikelihood(CConstantWeights::COUNT, - {x}, CConstantWeights::SINGLE_UNIT, - likelihood); - if (status & maths_t::E_FpFailed) - { + this->jointLogMarginalLikelihood(CConstantWeights::COUNT, {x}, CConstantWeights::SINGLE_UNIT, likelihood); + if (status & maths_t::E_FpFailed) { // Ignore point. - } - else if (status & maths_t::E_FpOverflowed) - { + } else if (status & maths_t::E_FpOverflowed) { plot.s_Ordinates.push_back(0.0); - } - else - { + } else { plot.s_Ordinates.push_back(weight * std::exp(likelihood)); } } @@ -221,67 +170,53 @@ CPrior::SPlot CPrior::marginalLikelihoodPlot(unsigned int numberPoints, double w return plot; } -uint64_t CPrior::checksum(uint64_t seed) const -{ +uint64_t CPrior::checksum(uint64_t seed) const { seed = CChecksum::calculate(seed, m_DataType); seed = CChecksum::calculate(seed, m_DecayRate); return CChecksum::calculate(seed, m_NumberSamples); } -double CPrior::numberSamples() const -{ +double CPrior::numberSamples() const { return m_NumberSamples; } -void CPrior::numberSamples(double numberSamples) -{ +void CPrior::numberSamples(double numberSamples) { m_NumberSamples = numberSamples; } -bool CPrior::participatesInModelSelection() const -{ +bool CPrior::participatesInModelSelection() const { return true; } -double CPrior::unmarginalizedParameters() const -{ +double CPrior::unmarginalizedParameters() const { return 0.0; } -void CPrior::adjustOffsetResamples(double minimumSample, - TDouble1Vec &resamples, - TDouble4Vec1Vec &resamplesWeights) const -{ +void CPrior::adjustOffsetResamples(double minimumSample, TDouble1Vec& resamples, TDouble4Vec1Vec& resamplesWeights) const { this->sampleMarginalLikelihood(ADJUST_OFFSET_SAMPLE_SIZE, resamples); std::size_t n = resamples.size(); - resamples.erase(std::remove_if(resamples.begin(), resamples.end(), - std::not1(CMathsFuncs::SIsFinite())), resamples.end()); - if (resamples.size() != n) - { + resamples.erase(std::remove_if(resamples.begin(), resamples.end(), std::not1(CMathsFuncs::SIsFinite())), resamples.end()); + if (resamples.size() != n) { LOG_ERROR("Bad samples (" << this->debug() << ")"); n = resamples.size(); } - for (std::size_t i = 0u; i < n; ++i) - { + for (std::size_t i = 0u; i < n; ++i) { resamples[i] = std::max(resamples[i], minimumSample); } double resamplesWeight = 1.0; - if (n > 0) - { + if (n > 0) { resamplesWeight = this->numberSamples() / static_cast(n); resamplesWeights.resize(n, TDouble4Vec(1, resamplesWeight)); } } -double CPrior::adjustOffsetWithCost(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights, - COffsetCost &cost, - CApplyOffset &apply) -{ - if (samples.empty() || CMathsFuncs::beginFinite(samples) == CMathsFuncs::endFinite(samples)) - { +double CPrior::adjustOffsetWithCost(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + COffsetCost& cost, + CApplyOffset& apply) { + if (samples.empty() || CMathsFuncs::beginFinite(samples) == CMathsFuncs::endFinite(samples)) { return 0.0; } @@ -297,10 +232,8 @@ double CPrior::adjustOffsetWithCost(const TWeightStyleVec &weightStyles, // likelihood of these samples w.r.t. the offset. double margin = this->offsetMargin(); - double minimumSample = *std::min_element(CMathsFuncs::beginFinite(samples), - CMathsFuncs::endFinite(samples)); - if (minimumSample + this->offset() >= margin) - { + double minimumSample = *std::min_element(CMathsFuncs::beginFinite(samples), CMathsFuncs::endFinite(samples)); + if (minimumSample + this->offset() >= margin) { return 0.0; } @@ -313,8 +246,7 @@ double CPrior::adjustOffsetWithCost(const TWeightStyleVec &weightStyles, cost.resample(minimumSample); apply.resample(minimumSample); - if (this->isNonInformative()) - { + if (this->isNonInformative()) { apply(offset); return 0.0; } @@ -327,25 +259,20 @@ double CPrior::adjustOffsetWithCost(const TWeightStyleVec &weightStyles, this->jointLogMarginalLikelihood(CConstantWeights::COUNT, resamples, resamplesWeights, before); double maximumSample = *std::max_element(samples.begin(), samples.end()); - double range = resamples.empty() ? maximumSample - minimumSample : - std::max(maximumSample - minimumSample, - resamples[resamples.size() - 1] - resamples[0]); + double range = resamples.empty() ? maximumSample - minimumSample + : std::max(maximumSample - minimumSample, resamples[resamples.size() - 1] - resamples[0]); double increment = std::max((range - margin) / static_cast(ADJUST_OFFSET_TRIALS - 1), 0.0); - if (increment > 0.0) - { + if (increment > 0.0) { TDouble1Vec trialOffsets; trialOffsets.reserve(ADJUST_OFFSET_TRIALS); - for (std::size_t i = 0u; i < ADJUST_OFFSET_TRIALS; ++i) - { + for (std::size_t i = 0u; i < ADJUST_OFFSET_TRIALS; ++i) { offset += increment; trialOffsets.push_back(offset); } double likelihood; CSolvers::globalMinimize(trialOffsets, cost, offset, likelihood); - LOG_TRACE("samples = " << core::CContainerPrinter::print(samples) - << ", offset = " << offset - << ", likelihood = " << likelihood); + LOG_TRACE("samples = " << core::CContainerPrinter::print(samples) << ", offset = " << offset << ", likelihood = " << likelihood); } apply(offset); @@ -355,185 +282,141 @@ double CPrior::adjustOffsetWithCost(const TWeightStyleVec &weightStyles, return std::min(after - before, 0.0); } -void CPrior::addSamples(double n) -{ +void CPrior::addSamples(double n) { m_NumberSamples += n; } -std::string CPrior::debug() const -{ +std::string CPrior::debug() const { return std::string(); } const double CPrior::FALLBACK_DECAY_RATE = 0.001; const std::size_t CPrior::ADJUST_OFFSET_SAMPLE_SIZE = 50u; - ////////// CPrior::CModelFilter Implementation ////////// -CPrior::CModelFilter::CModelFilter() : m_Filter(0) {} +CPrior::CModelFilter::CModelFilter() : m_Filter(0) { +} -CPrior::CModelFilter &CPrior::CModelFilter::remove(EPrior model) -{ +CPrior::CModelFilter& CPrior::CModelFilter::remove(EPrior model) { m_Filter = m_Filter | model; return *this; } -bool CPrior::CModelFilter::operator()(EPrior model) const -{ +bool CPrior::CModelFilter::operator()(EPrior model) const { return (m_Filter & model) != 0; } - ////////// CPrior::CLogMarginalLikelihood Implementation ////////// -CPrior::CLogMarginalLikelihood::CLogMarginalLikelihood(const CPrior &prior, - const TWeightStyleVec &weightStyles, - const TDouble4Vec1Vec &weights) : - m_Prior(&prior), - m_WeightStyles(&weightStyles), - m_Weights(&weights), - m_X(1) -{} - -double CPrior::CLogMarginalLikelihood::operator()(double x) const -{ +CPrior::CLogMarginalLikelihood::CLogMarginalLikelihood(const CPrior& prior, + const TWeightStyleVec& weightStyles, + const TDouble4Vec1Vec& weights) + : m_Prior(&prior), m_WeightStyles(&weightStyles), m_Weights(&weights), m_X(1) { +} + +double CPrior::CLogMarginalLikelihood::operator()(double x) const { double result; - if (!this->operator()(x, result)) - { - throw std::runtime_error("Unable to compute likelihood at " - + core::CStringUtils::typeToString(x)); + if (!this->operator()(x, result)) { + throw std::runtime_error("Unable to compute likelihood at " + core::CStringUtils::typeToString(x)); } return result; } -bool CPrior::CLogMarginalLikelihood::operator()(double x, double &result) const -{ +bool CPrior::CLogMarginalLikelihood::operator()(double x, double& result) const { m_X[0] = x; - return !(m_Prior->jointLogMarginalLikelihood(*m_WeightStyles, - m_X, *m_Weights, - result) & maths_t::E_FpFailed); + return !(m_Prior->jointLogMarginalLikelihood(*m_WeightStyles, m_X, *m_Weights, result) & maths_t::E_FpFailed); } - ////////// CPrior::COffsetParameters Implementation ////////// -CPrior::COffsetParameters::COffsetParameters(CPrior &prior) : - m_Prior(&prior), - m_WeightStyles(0), - m_Samples(0), - m_Weights(0), - m_Resamples(0), - m_ResamplesWeights(0) -{} - -void CPrior::COffsetParameters::samples(const maths_t::TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights) -{ +CPrior::COffsetParameters::COffsetParameters(CPrior& prior) + : m_Prior(&prior), m_WeightStyles(0), m_Samples(0), m_Weights(0), m_Resamples(0), m_ResamplesWeights(0) { +} + +void CPrior::COffsetParameters::samples(const maths_t::TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights) { m_WeightStyles = &weightStyles; m_Samples = &samples; m_Weights = &weights; } -void CPrior::COffsetParameters::resample(double minimumSample) -{ +void CPrior::COffsetParameters::resample(double minimumSample) { m_Prior->adjustOffsetResamples(minimumSample, m_Resamples, m_ResamplesWeights); } -CPrior &CPrior::COffsetParameters::prior() const -{ +CPrior& CPrior::COffsetParameters::prior() const { return *m_Prior; } -const maths_t::TWeightStyleVec &CPrior::COffsetParameters::weightStyles() const -{ +const maths_t::TWeightStyleVec& CPrior::COffsetParameters::weightStyles() const { return *m_WeightStyles; } -const CPrior::TDouble1Vec &CPrior::COffsetParameters::samples() const -{ +const CPrior::TDouble1Vec& CPrior::COffsetParameters::samples() const { return *m_Samples; } -const CPrior::TDouble4Vec1Vec &CPrior::COffsetParameters::weights() const -{ +const CPrior::TDouble4Vec1Vec& CPrior::COffsetParameters::weights() const { return *m_Weights; } -const CPrior::TDouble1Vec &CPrior::COffsetParameters::resamples() const -{ +const CPrior::TDouble1Vec& CPrior::COffsetParameters::resamples() const { return m_Resamples; } -const CPrior::TDouble4Vec1Vec &CPrior::COffsetParameters::resamplesWeights() const -{ +const CPrior::TDouble4Vec1Vec& CPrior::COffsetParameters::resamplesWeights() const { return m_ResamplesWeights; } - ////////// CPrior::COffsetCost Implementation ////////// -CPrior::COffsetCost::COffsetCost(CPrior &prior) : COffsetParameters(prior) {} +CPrior::COffsetCost::COffsetCost(CPrior& prior) : COffsetParameters(prior) { +} -double CPrior::COffsetCost::operator()(double offset) const -{ +double CPrior::COffsetCost::operator()(double offset) const { this->resetPriors(offset); return this->computeCost(offset); } -void CPrior::COffsetCost::resetPriors(double offset) const -{ +void CPrior::COffsetCost::resetPriors(double offset) const { this->prior().setToNonInformative(offset, this->prior().decayRate()); this->prior().addSamples(TWeights::COUNT, this->resamples(), this->resamplesWeights()); this->prior().addSamples(this->weightStyles(), this->samples(), this->weights()); } -double CPrior::COffsetCost::computeCost(double offset) const -{ +double CPrior::COffsetCost::computeCost(double offset) const { double resamplesLogLikelihood = 0.0; maths_t::EFloatingPointErrorStatus status; - if (this->resamples().size() > 0) - { - status = this->prior().jointLogMarginalLikelihood(TWeights::COUNT, - this->resamples(), - this->resamplesWeights(), - resamplesLogLikelihood); - if (status != maths_t::E_FpNoErrors) - { - LOG_ERROR("Failed evaluating log-likelihood at " << offset - << " for samples " << core::CContainerPrinter::print(this->resamples()) - << " and weights " << core::CContainerPrinter::print(this->resamplesWeights()) - << ", the prior is " << this->prior().print() + if (this->resamples().size() > 0) { + status = + this->prior().jointLogMarginalLikelihood(TWeights::COUNT, this->resamples(), this->resamplesWeights(), resamplesLogLikelihood); + if (status != maths_t::E_FpNoErrors) { + LOG_ERROR("Failed evaluating log-likelihood at " + << offset << " for samples " << core::CContainerPrinter::print(this->resamples()) << " and weights " + << core::CContainerPrinter::print(this->resamplesWeights()) << ", the prior is " << this->prior().print() << ": status " << status); } } double samplesLogLikelihood; - status = this->prior().jointLogMarginalLikelihood(this->weightStyles(), - this->samples(), - this->weights(), - samplesLogLikelihood); - if (status != maths_t::E_FpNoErrors) - { - LOG_ERROR("Failed evaluating log-likelihood at " << offset - << " for " << core::CContainerPrinter::print(this->samples()) - << " and weights " << core::CContainerPrinter::print(this->weights()) - << ", the prior is " << this->prior().print() - << ": status " << status); + status = this->prior().jointLogMarginalLikelihood(this->weightStyles(), this->samples(), this->weights(), samplesLogLikelihood); + if (status != maths_t::E_FpNoErrors) { + LOG_ERROR("Failed evaluating log-likelihood at " << offset << " for " << core::CContainerPrinter::print(this->samples()) + << " and weights " << core::CContainerPrinter::print(this->weights()) + << ", the prior is " << this->prior().print() << ": status " << status); } return -(resamplesLogLikelihood + samplesLogLikelihood); } - ////////// CPrior::CApplyOffset Implementation ////////// -CPrior::CApplyOffset::CApplyOffset(CPrior &prior) : COffsetParameters(prior) {} +CPrior::CApplyOffset::CApplyOffset(CPrior& prior) : COffsetParameters(prior) { +} -void CPrior::CApplyOffset::operator()(double offset) const -{ +void CPrior::CApplyOffset::operator()(double offset) const { this->prior().setToNonInformative(offset, this->prior().decayRate()); this->prior().addSamples(TWeights::COUNT, this->resamples(), this->resamplesWeights()); } - } } diff --git a/lib/maths/CPriorStateSerialiser.cc b/lib/maths/CPriorStateSerialiser.cc index 81ae519aa3..29358ad3a1 100644 --- a/lib/maths/CPriorStateSerialiser.cc +++ b/lib/maths/CPriorStateSerialiser.cc @@ -29,13 +29,9 @@ #include #include - -namespace ml -{ -namespace maths -{ -namespace -{ +namespace ml { +namespace maths { +namespace { // We use short field names to reduce the state size // There needs to be one constant here per sub-class of CPrior. // DO NOT change the existing tags if new sub-classes are added. @@ -51,67 +47,45 @@ const std::string CONSTANT_TAG("h"); const std::string EMPTY_STRING; } -bool CPriorStateSerialiser::operator()(const SDistributionRestoreParams ¶ms, - TPriorPtr &ptr, - core::CStateRestoreTraverser &traverser) const -{ +bool CPriorStateSerialiser:: +operator()(const SDistributionRestoreParams& params, TPriorPtr& ptr, core::CStateRestoreTraverser& traverser) const { size_t numResults(0); - do - { - const std::string &name = traverser.name(); - if (name == CONSTANT_TAG) - { + do { + const std::string& name = traverser.name(); + if (name == CONSTANT_TAG) { ptr.reset(new CConstantPrior(traverser)); ++numResults; - } - else if (name == GAMMA_TAG) - { + } else if (name == GAMMA_TAG) { ptr.reset(new CGammaRateConjugate(params, traverser)); ++numResults; - } - else if (name == LOG_NORMAL_TAG) - { + } else if (name == LOG_NORMAL_TAG) { ptr.reset(new CLogNormalMeanPrecConjugate(params, traverser)); ++numResults; - } - else if (name == MULTIMODAL_TAG) - { + } else if (name == MULTIMODAL_TAG) { ptr.reset(new CMultimodalPrior(params, traverser)); ++numResults; - } - else if (name == MULTINOMIAL_TAG) - { + } else if (name == MULTINOMIAL_TAG) { ptr.reset(new CMultinomialConjugate(params, traverser)); ++numResults; - } - else if (name == NORMAL_TAG) - { + } else if (name == NORMAL_TAG) { ptr.reset(new CNormalMeanPrecConjugate(params, traverser)); ++numResults; - } - else if (name == ONE_OF_N_TAG) - { + } else if (name == ONE_OF_N_TAG) { ptr.reset(new COneOfNPrior(params, traverser)); ++numResults; - } - else if (name == POISSON_TAG) - { + } else if (name == POISSON_TAG) { ptr.reset(new CPoissonMeanConjugate(params, traverser)); ++numResults; - } - else - { + } else { // Due to the way we divide large state into multiple chunks // this is not necessarily a problem - the unexpected element may be // marking the start of a new chunk LOG_WARN("No prior distribution corresponds to node name " << traverser.name()); } - } - while (traverser.next()); + } while (traverser.next()); - if (numResults != 1) - { + if (numResults != 1) { LOG_ERROR("Expected 1 (got " << numResults << ") prior model tags"); ptr.reset(); return false; @@ -120,118 +94,80 @@ bool CPriorStateSerialiser::operator()(const SDistributionRestoreParams ¶ms, return true; } -void CPriorStateSerialiser::operator()(const CPrior &prior, - core::CStatePersistInserter &inserter) const -{ +void CPriorStateSerialiser::operator()(const CPrior& prior, core::CStatePersistInserter& inserter) const { std::string tagName; - if (dynamic_cast(&prior) != 0) - { + if (dynamic_cast(&prior) != 0) { tagName = CONSTANT_TAG; - } - else if (dynamic_cast(&prior) != 0) - { + } else if (dynamic_cast(&prior) != 0) { tagName = GAMMA_TAG; - } - else if (dynamic_cast(&prior) != 0) - { + } else if (dynamic_cast(&prior) != 0) { tagName = LOG_NORMAL_TAG; - } - else if (dynamic_cast(&prior) != 0) - { + } else if (dynamic_cast(&prior) != 0) { tagName = MULTIMODAL_TAG; - } - else if (dynamic_cast(&prior) != 0) - { + } else if (dynamic_cast(&prior) != 0) { tagName = MULTINOMIAL_TAG; - } - else if (dynamic_cast(&prior) != 0) - { + } else if (dynamic_cast(&prior) != 0) { tagName = NORMAL_TAG; - } - else if (dynamic_cast(&prior) != 0) - { + } else if (dynamic_cast(&prior) != 0) { tagName = ONE_OF_N_TAG; - } - else if (dynamic_cast(&prior) != 0) - { + } else if (dynamic_cast(&prior) != 0) { tagName = POISSON_TAG; - } - else - { - LOG_ERROR("Prior distribution with type '" << typeid(prior).name() - << "' has no defined field name"); + } else { + LOG_ERROR("Prior distribution with type '" << typeid(prior).name() << "' has no defined field name"); return; } inserter.insertLevel(tagName, boost::bind(&CPrior::acceptPersistInserter, &prior, _1)); } -bool CPriorStateSerialiser::operator()(const SDistributionRestoreParams ¶ms, - TMultivariatePriorPtr &ptr, - core::CStateRestoreTraverser &traverser) const -{ +bool CPriorStateSerialiser:: +operator()(const SDistributionRestoreParams& params, TMultivariatePriorPtr& ptr, core::CStateRestoreTraverser& traverser) const { std::size_t numResults = 0u; - do - { - const std::string &name = traverser.name(); - if (name == CMultivariatePrior::CONSTANT_TAG) - { + do { + const std::string& name = traverser.name(); + if (name == CMultivariatePrior::CONSTANT_TAG) { std::size_t dimension; - if (core::CStringUtils::stringToType(name.substr(CMultivariatePrior::CONSTANT_TAG.length()), dimension) == false) - { + if (core::CStringUtils::stringToType(name.substr(CMultivariatePrior::CONSTANT_TAG.length()), dimension) == false) { LOG_ERROR("Bad dimension encoded in " << name); return false; } ptr.reset(new CMultivariateConstantPrior(dimension, traverser)); ++numResults; - } - else if (name.find(CMultivariatePrior::MULTIMODAL_TAG) != std::string::npos) - { + } else if (name.find(CMultivariatePrior::MULTIMODAL_TAG) != std::string::npos) { std::size_t dimension; - if (core::CStringUtils::stringToType(name.substr(CMultivariatePrior::MULTIMODAL_TAG.length()), dimension) == false) - { + if (core::CStringUtils::stringToType(name.substr(CMultivariatePrior::MULTIMODAL_TAG.length()), dimension) == false) { LOG_ERROR("Bad dimension encoded in " << name); return false; } CMultivariateMultimodalPriorFactory::restore(dimension, params, ptr, traverser); ++numResults; - } - else if (name.find(CMultivariatePrior::NORMAL_TAG) != std::string::npos) - { + } else if (name.find(CMultivariatePrior::NORMAL_TAG) != std::string::npos) { std::size_t dimension; - if (core::CStringUtils::stringToType(name.substr(CMultivariatePrior::NORMAL_TAG.length()), dimension) == false) - { + if (core::CStringUtils::stringToType(name.substr(CMultivariatePrior::NORMAL_TAG.length()), dimension) == false) { LOG_ERROR("Bad dimension encoded in " << name); return false; } CMultivariateNormalConjugateFactory::restore(dimension, params, ptr, traverser); ++numResults; - } - else if (name.find(CMultivariatePrior::ONE_OF_N_TAG) != std::string::npos) - { + } else if (name.find(CMultivariatePrior::ONE_OF_N_TAG) != std::string::npos) { std::size_t dimension; - if (core::CStringUtils::stringToType(name.substr(CMultivariatePrior::ONE_OF_N_TAG.length()), dimension) == false) - { + if (core::CStringUtils::stringToType(name.substr(CMultivariatePrior::ONE_OF_N_TAG.length()), dimension) == false) { LOG_ERROR("Bad dimension encoded in " << name); return false; } CMultivariateOneOfNPriorFactory::restore(dimension, params, ptr, traverser); ++numResults; - } - else - { + } else { // Due to the way we divide large state into multiple chunks // this is not necessarily a problem - the unexpected element may be // marking the start of a new chunk LOG_WARN("No prior distribution corresponds to node name " << traverser.name()); } - } - while (traverser.next()); + } while (traverser.next()); - if (numResults != 1) - { + if (numResults != 1) { LOG_ERROR("Expected 1 (got " << numResults << ") prior model tags"); ptr.reset(); return false; @@ -240,13 +176,8 @@ bool CPriorStateSerialiser::operator()(const SDistributionRestoreParams ¶ms, return true; } -void CPriorStateSerialiser::operator()(const CMultivariatePrior &prior, - core::CStatePersistInserter &inserter) const -{ - inserter.insertLevel(prior.persistenceTag(), - boost::bind(&CMultivariatePrior::acceptPersistInserter, &prior, _1)); +void CPriorStateSerialiser::operator()(const CMultivariatePrior& prior, core::CStatePersistInserter& inserter) const { + inserter.insertLevel(prior.persistenceTag(), boost::bind(&CMultivariatePrior::acceptPersistInserter, &prior, _1)); } - } } - diff --git a/lib/maths/CProbabilityCalibrator.cc b/lib/maths/CProbabilityCalibrator.cc index b46846554c..8aa2d68c28 100644 --- a/lib/maths/CProbabilityCalibrator.cc +++ b/lib/maths/CProbabilityCalibrator.cc @@ -19,110 +19,79 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { -namespace -{ +namespace { const uint64_t QUANTILE_SIZE = 200u; const double DISCRETIZATION_FACTOR = 100.0; //! Convert a probability to a positive integer. -uint32_t discreteProbability(const double probability) -{ - return static_cast(DISCRETIZATION_FACTOR - * -std::log(probability) + 0.5); +uint32_t discreteProbability(const double probability) { + return static_cast(DISCRETIZATION_FACTOR * -std::log(probability) + 0.5); } //! Convert a discrete probability integer into the //! approximate probability which generated it. -double rawProbability(const uint32_t &discreteProbability) -{ - return std::exp(-static_cast(discreteProbability) - / DISCRETIZATION_FACTOR); +double rawProbability(const uint32_t& discreteProbability) { + return std::exp(-static_cast(discreteProbability) / DISCRETIZATION_FACTOR); } const std::string STYLE_TAG("a"); const std::string CUTOFF_PROBABILITY_TAG("b"); const std::string DISCRETE_PROBABILITY_QUANTILE_TAG("c"); const std::string EMPTY_STRING; - } -CProbabilityCalibrator::CProbabilityCalibrator(EStyle style, - double cutoffProbability) : - m_Style(style), - m_CutoffProbability(cutoffProbability), - m_DiscreteProbabilityQuantiles(new CQDigest(QUANTILE_SIZE)) -{ - if (!(m_CutoffProbability >= 0.0 && m_CutoffProbability <= 1.0)) - { +CProbabilityCalibrator::CProbabilityCalibrator(EStyle style, double cutoffProbability) + : m_Style(style), m_CutoffProbability(cutoffProbability), m_DiscreteProbabilityQuantiles(new CQDigest(QUANTILE_SIZE)) { + if (!(m_CutoffProbability >= 0.0 && m_CutoffProbability <= 1.0)) { LOG_ERROR("Invalid cutoff probability " << m_CutoffProbability); CTools::truncate(m_CutoffProbability, 0.0, 1.0); } } -void CProbabilityCalibrator::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CProbabilityCalibrator::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(STYLE_TAG, static_cast(m_Style)); inserter.insertValue(CUTOFF_PROBABILITY_TAG, m_CutoffProbability); inserter.insertLevel(DISCRETE_PROBABILITY_QUANTILE_TAG, - boost::bind(&CQDigest::acceptPersistInserter, - m_DiscreteProbabilityQuantiles.get(), - _1)); + boost::bind(&CQDigest::acceptPersistInserter, m_DiscreteProbabilityQuantiles.get(), _1)); } -bool CProbabilityCalibrator::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name = traverser.name(); - if (name == STYLE_TAG) - { +bool CProbabilityCalibrator::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); + if (name == STYLE_TAG) { int style; - if (core::CStringUtils::stringToType(traverser.value(), style) == false) - { + if (core::CStringUtils::stringToType(traverser.value(), style) == false) { LOG_ERROR("Invalid style in " << traverser.value()); return false; } m_Style = static_cast(style); - } - else if (name == CUTOFF_PROBABILITY_TAG) - { - if (core::CStringUtils::stringToType(traverser.value(), - m_CutoffProbability) == false) - { + } else if (name == CUTOFF_PROBABILITY_TAG) { + if (core::CStringUtils::stringToType(traverser.value(), m_CutoffProbability) == false) { LOG_ERROR("Invalid cutoff in " << traverser.value()); return false; } - } - else if (name == DISCRETE_PROBABILITY_QUANTILE_TAG) - { - if (traverser.traverseSubLevel(boost::bind(&CQDigest::acceptRestoreTraverser, - m_DiscreteProbabilityQuantiles.get(), - _1)) == false) - { + } else if (name == DISCRETE_PROBABILITY_QUANTILE_TAG) { + if (traverser.traverseSubLevel(boost::bind(&CQDigest::acceptRestoreTraverser, m_DiscreteProbabilityQuantiles.get(), _1)) == + false) { LOG_ERROR("Invalid quantiles in " << traverser.value()); return false; } } - } - while (traverser.next()); + } while (traverser.next()); return true; } -void CProbabilityCalibrator::add(double probability) -{ +void CProbabilityCalibrator::add(double probability) { uint32_t pDiscrete = discreteProbability(probability); m_DiscreteProbabilityQuantiles->add(pDiscrete); } -double CProbabilityCalibrator::calibrate(double probability) const -{ +double CProbabilityCalibrator::calibrate(double probability) const { LOG_TRACE("Calibrating " << probability); // The basic idea is to calibrate the probability to the historical @@ -175,23 +144,18 @@ double CProbabilityCalibrator::calibrate(double probability) const // probabilities will be very far from what we'd expect, i.e. // we won't see any probabilities less than 1. - switch (m_Style) - { + switch (m_Style) { case E_PartialCalibration: - if (Fu > m_CutoffProbability) - { + if (Fu > m_CutoffProbability) { uint32_t pThreshold; - m_DiscreteProbabilityQuantiles->quantileSublevelSetSupremum(m_CutoffProbability, - pThreshold); + m_DiscreteProbabilityQuantiles->quantileSublevelSetSupremum(m_CutoffProbability, pThreshold); m_DiscreteProbabilityQuantiles->cdf(pThreshold, 0.0, Fl, Fu); a = n * Fu + 1.0; b = n * (1.0 - Fu) + 1.0; beta = boost::math::beta_distribution<>(a, b); Fu = boost::math::quantile(beta, 0.75); double scale = std::max((1.0 - Fu) / rawProbability(pThreshold), 1.0); - LOG_TRACE("scale = " << scale - << ", 1 - F = " << 1.0 - Fu - << ", p = " << rawProbability(pThreshold)); + LOG_TRACE("scale = " << scale << ", 1 - F = " << 1.0 - Fu << ", p = " << rawProbability(pThreshold)); return probability * scale; } return std::max(probability, 1.0 - Fu); @@ -202,6 +166,5 @@ double CProbabilityCalibrator::calibrate(double probability) const LOG_ABORT("Unexpected style " << m_Style); } - } } diff --git a/lib/maths/CQDigest.cc b/lib/maths/CQDigest.cc index a38adf07a7..3ca0015393 100644 --- a/lib/maths/CQDigest.cc +++ b/lib/maths/CQDigest.cc @@ -27,13 +27,10 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { -namespace -{ +namespace { std::string EMPTY_STRING; } @@ -41,19 +38,12 @@ const std::string CQDigest::K_TAG("a"); const std::string CQDigest::N_TAG("b"); const std::string CQDigest::NODE_TAG("c"); -CQDigest::CQDigest(uint64_t k, - double decayRate) : - m_K(k), - m_N(0u), - m_Root(0), - m_NodeAllocator(static_cast(3 * m_K + 2)), - m_DecayRate(decayRate) -{ +CQDigest::CQDigest(uint64_t k, double decayRate) + : m_K(k), m_N(0u), m_Root(0), m_NodeAllocator(static_cast(3 * m_K + 2)), m_DecayRate(decayRate) { m_Root = &m_NodeAllocator.create(CNode(0, 1, 0, 0)); } -void CQDigest::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CQDigest::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(K_TAG, m_K); inserter.insertValue(N_TAG, m_N); @@ -61,48 +51,37 @@ void CQDigest::acceptPersistInserter(core::CStatePersistInserter &inserter) cons m_Root->persistRecursive(NODE_TAG, inserter); } -bool CQDigest::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ +bool CQDigest::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { std::size_t nodeCount = 0u; - do - { - const std::string &name = traverser.name(); + do { + const std::string& name = traverser.name(); RESTORE_BUILT_IN(K_TAG, m_K) RESTORE_BUILT_IN(N_TAG, m_N) - if (name == NODE_TAG) - { + if (name == NODE_TAG) { CNode node; - if (traverser.traverseSubLevel(boost::bind(&CNode::acceptRestoreTraverser, - &node, _1)) == false) - { + if (traverser.traverseSubLevel(boost::bind(&CNode::acceptRestoreTraverser, &node, _1)) == false) { LOG_ERROR("Failed to restore NODE_TAG, got " << traverser.value()); } - if (nodeCount++ == 0) - { + if (nodeCount++ == 0) { m_Root = &m_NodeAllocator.create(node); - } - else - { + } else { m_Root->insert(m_NodeAllocator, node); } continue; } - } - while (traverser.next()); + } while (traverser.next()); return true; } -void CQDigest::add(uint32_t value, uint64_t n) -{ +void CQDigest::add(uint32_t value, uint64_t n) { LOG_TRACE("Adding = " << value); m_N += n; - CNode *expanded = m_Root->expand(m_NodeAllocator, value); - if (expanded) - { + CNode* expanded = m_Root->expand(m_NodeAllocator, value); + if (expanded) { m_Root = expanded; } @@ -113,14 +92,11 @@ void CQDigest::add(uint32_t value, uint64_t n) // tree. Otherwise, we can get away with just compressing // the path from the leaf to the root. - CNode &leaf = m_Root->insert(m_NodeAllocator, CNode(value, value, n, n)); - if (expanded || (m_N / m_K) != ((m_N - n) / m_K)) - { + CNode& leaf = m_Root->insert(m_NodeAllocator, CNode(value, value, n, n)); + if (expanded || (m_N / m_K) != ((m_N - n) / m_K)) { // Compress the whole tree. this->compress(); - } - else if (leaf.count() == n) - { + } else if (leaf.count() == n) { // Compress the path to the new leaf. TNodePtrVec compress(1u, &leaf); this->compress(compress); @@ -129,19 +105,16 @@ void CQDigest::add(uint32_t value, uint64_t n) //this->checkInvariants(); } -void CQDigest::merge(const CQDigest &digest) -{ +void CQDigest::merge(const CQDigest& digest) { TNodePtrVec nodes; digest.m_Root->postOrder(nodes); - CNode *expanded = m_Root->expand(m_NodeAllocator, digest.m_Root->max()); - if (expanded) - { + CNode* expanded = m_Root->expand(m_NodeAllocator, digest.m_Root->max()); + if (expanded) { m_Root = expanded; } - for (const auto &node : nodes) - { + for (const auto& node : nodes) { m_N += node->count(); m_Root->insert(m_NodeAllocator, *node); } @@ -152,10 +125,8 @@ void CQDigest::merge(const CQDigest &digest) //this->checkInvariants(); } -void CQDigest::propagateForwardsByTime(double time) -{ - if (time < 0.0) - { +void CQDigest::propagateForwardsByTime(double time) { + if (time < 0.0) { LOG_ERROR("Can't propagate quantiles backwards in time"); return; } @@ -168,25 +139,21 @@ void CQDigest::propagateForwardsByTime(double time) this->compress(); } -bool CQDigest::scale(double factor) -{ +bool CQDigest::scale(double factor) { using TUInt32UInt32UInt64Tr = boost::tuple; using TUInt32UInt32UInt64TrVec = std::vector; - if (factor <= 0.0) - { + if (factor <= 0.0) { LOG_ERROR("Scaling factor must be positive"); return false; } - if (factor == 1.0) - { + if (factor == 1.0) { // Nothing to do. return true; } - if (m_N == 0) - { + if (m_N == 0) { // Nothing to do. return true; } @@ -197,8 +164,7 @@ bool CQDigest::scale(double factor) std::sort(nodes.begin(), nodes.end(), SLevelLess()); TUInt32UInt32UInt64TrVec sketch; sketch.reserve(nodes.size()); - for (const auto &node : nodes) - { + for (const auto& node : nodes) { sketch.emplace_back(node->min(), node->max(), node->count()); } @@ -207,35 +173,25 @@ bool CQDigest::scale(double factor) // Reinsert the scaled summary values. boost::random::mt11213b generator; - for (std::size_t i = 0u; i < sketch.size(); ++i) - { - const TUInt32UInt32UInt64Tr &node = sketch[i]; + for (std::size_t i = 0u; i < sketch.size(); ++i) { + const TUInt32UInt32UInt64Tr& node = sketch[i]; uint32_t min = node.get<0>(); uint32_t max = node.get<1>(); uint32_t span = max - min + 1; uint64_t count = node.get<2>() / span; uint64_t remainder = node.get<2>() - count * span; - LOG_TRACE("min = " << min - << ", max = " << max - << ", count = " << count - << ", remainder = " << remainder); - - if (count > 0) - { - for (uint32_t j = 0u; j < span; ++j) - { - this->add(static_cast( - factor * static_cast(min + j) + 0.5), count); + LOG_TRACE("min = " << min << ", max = " << max << ", count = " << count << ", remainder = " << remainder); + + if (count > 0) { + for (uint32_t j = 0u; j < span; ++j) { + this->add(static_cast(factor * static_cast(min + j) + 0.5), count); } } - if (remainder > 0) - { + if (remainder > 0) { boost::random::uniform_int_distribution uniform(0u, span - 1); - for (uint64_t j = 0u; j < remainder; ++j) - { - this->add(static_cast( - factor * static_cast(min + uniform(generator)) + 0.5)); + for (uint64_t j = 0u; j < remainder; ++j) { + this->add(static_cast(factor * static_cast(min + uniform(generator)) + 0.5)); } } } @@ -243,31 +199,26 @@ bool CQDigest::scale(double factor) return true; } -void CQDigest::clear() -{ +void CQDigest::clear() { // Release all current nodes. TNodePtrVec nodes; m_Root->postOrder(nodes); - for (const auto &node : nodes) - { + for (const auto& node : nodes) { m_N -= node->count(); } // Reset root to its initial state and sanity check total count. m_Root = &m_NodeAllocator.create(CNode(0, 1, 0, 0)); - if (m_N != 0) - { + if (m_N != 0) { LOG_ERROR("Inconsistency - sum of node counts did not equal N"); m_N = 0; } } -bool CQDigest::quantile(double q, uint32_t &result) const -{ +bool CQDigest::quantile(double q, uint32_t& result) const { result = 0u; - if (m_N == 0) - { + if (m_N == 0) { LOG_ERROR("Can't compute quantiles on empty set"); return false; } @@ -280,22 +231,17 @@ bool CQDigest::quantile(double q, uint32_t &result) const return true; } -bool CQDigest::quantileSublevelSetSupremum(double f, - uint32_t &result) const -{ +bool CQDigest::quantileSublevelSetSupremum(double f, uint32_t& result) const { result = 0; - if (m_N == 0) - { + if (m_N == 0) { LOG_ERROR("Can't compute level set for empty set"); return false; } - if (f <= 0.0) - { + if (f <= 0.0) { m_Root->sublevelSetSupremum(-1, result); return true; } - if (f > 1.0) - { + if (f > 1.0) { m_Root->superlevelSetInfimum(m_Root->max() + 1, result); return true; } @@ -305,10 +251,8 @@ bool CQDigest::quantileSublevelSetSupremum(double f, return true; } -double CQDigest::cdfQuantile(double n, double p, double q) -{ - if (q == 0.5) - { +double CQDigest::cdfQuantile(double n, double p, double q) { + if (q == 0.5) { return p; } @@ -325,33 +269,22 @@ double CQDigest::cdfQuantile(double n, double p, double q) static const double ONE_THIRD = 1.0 / 3.0; - try - { + try { double a = n * p + ONE_THIRD; double b = n * (1.0 - p) + ONE_THIRD; boost::math::beta_distribution<> beta(a, b); return boost::math::quantile(beta, q); - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to calculate c.d.f. quantile: " << e.what() - << ", n = " << n - << ", p = " << p - << ", q = " << q); + } catch (const std::exception& e) { + LOG_ERROR("Failed to calculate c.d.f. quantile: " << e.what() << ", n = " << n << ", p = " << p << ", q = " << q); } return p; } -bool CQDigest::cdf(uint32_t x, - double confidence, - double &lowerBound, - double &upperBound) const -{ +bool CQDigest::cdf(uint32_t x, double confidence, double& lowerBound, double& upperBound) const { lowerBound = 0.0; upperBound = 0.0; - if (m_N == 0) - { + if (m_N == 0) { LOG_ERROR("Can't compute c.d.f. for empty set"); return false; } @@ -359,36 +292,25 @@ bool CQDigest::cdf(uint32_t x, uint64_t l = 0ull; m_Root->cdfLowerBound(x, l); lowerBound = static_cast(l) / static_cast(m_N); - if (confidence > 0.0) - { - lowerBound = cdfQuantile(static_cast(m_N), - lowerBound, - (100.0 - confidence) / 200.0); + if (confidence > 0.0) { + lowerBound = cdfQuantile(static_cast(m_N), lowerBound, (100.0 - confidence) / 200.0); } uint64_t u = 0ull; m_Root->cdfUpperBound(x, u); upperBound = static_cast(u) / static_cast(m_N); - if (confidence > 0.0) - { - upperBound = cdfQuantile(static_cast(m_N), - upperBound, - (100.0 + confidence) / 200.0); + if (confidence > 0.0) { + upperBound = cdfQuantile(static_cast(m_N), upperBound, (100.0 + confidence) / 200.0); } return true; } -void CQDigest::pdf(uint32_t x, - double confidence, - double &lowerBound, - double &upperBound) const -{ +void CQDigest::pdf(uint32_t x, double confidence, double& lowerBound, double& upperBound) const { lowerBound = 0.0; upperBound = 0.0; - if (m_N == 0) - { + if (m_N == 0) { return; } @@ -406,37 +328,27 @@ void CQDigest::pdf(uint32_t x, double supremumUpperBound; this->cdf(supremum, confidence, supremumLowerBound, supremumUpperBound); - lowerBound = std::max(supremumLowerBound - infimumUpperBound, 0.0) - / std::max(static_cast(supremum - infimum), 1.0); - upperBound = std::max(supremumUpperBound - infimumLowerBound, 0.0) - / std::max(static_cast(supremum - infimum), 1.0); + lowerBound = std::max(supremumLowerBound - infimumUpperBound, 0.0) / std::max(static_cast(supremum - infimum), 1.0); + upperBound = std::max(supremumUpperBound - infimumLowerBound, 0.0) / std::max(static_cast(supremum - infimum), 1.0); - LOG_TRACE("x = " << x - << ", supremum = " << supremum - << ", infimum = " << infimum - << ", cdf(supremum) = [" << supremumLowerBound - << "," << supremumUpperBound << "]" - << ", cdf(infimum) = [" << infimumLowerBound - << "," << infimumUpperBound << "]" - << ", pdf = [" << lowerBound << "," << upperBound << "]"); + LOG_TRACE("x = " << x << ", supremum = " << supremum << ", infimum = " << infimum << ", cdf(supremum) = [" << supremumLowerBound << "," + << supremumUpperBound << "]" + << ", cdf(infimum) = [" << infimumLowerBound << "," << infimumUpperBound << "]" + << ", pdf = [" << lowerBound << "," << upperBound << "]"); } -void CQDigest::sublevelSetSupremum(uint32_t x, uint32_t &result) const -{ +void CQDigest::sublevelSetSupremum(uint32_t x, uint32_t& result) const { m_Root->sublevelSetSupremum(static_cast(x), result); } -void CQDigest::superlevelSetInfimum(uint32_t x, uint32_t &result) const -{ +void CQDigest::superlevelSetInfimum(uint32_t x, uint32_t& result) const { m_Root->superlevelSetInfimum(x, result); } -void CQDigest::summary(TUInt32UInt64PrVec &result) const -{ +void CQDigest::summary(TUInt32UInt64PrVec& result) const { result.clear(); - if (m_N == 0) - { + if (m_N == 0) { return; } @@ -447,10 +359,8 @@ void CQDigest::summary(TUInt32UInt64PrVec &result) const uint32_t last = nodes[0]->max(); uint64_t count = nodes[0]->count(); - for (std::size_t i = 1u; i < nodes.size(); ++i) - { - if (nodes[i]->max() != last) - { + for (std::size_t i = 1u; i < nodes.size(); ++i) { + if (nodes[i]->max() != last) { result.emplace_back(last, count); last = nodes[i]->max(); } @@ -459,29 +369,24 @@ void CQDigest::summary(TUInt32UInt64PrVec &result) const } // Check if any count is aligned with the root max. - if (result.empty() || result.back().second < count) - { + if (result.empty() || result.back().second < count) { result.emplace_back(m_Root->max(), count); } - if (result.back().second != m_N) - { + if (result.back().second != m_N) { LOG_ERROR("Got " << result.back().second << " expected " << m_N); } } -uint64_t CQDigest::n() const -{ +uint64_t CQDigest::n() const { return m_N; } -uint64_t CQDigest::k() const -{ +uint64_t CQDigest::k() const { return m_K; } -uint64_t CQDigest::checksum(uint64_t seed) const -{ +uint64_t CQDigest::checksum(uint64_t seed) const { seed = CChecksum::calculate(seed, m_K); seed = CChecksum::calculate(seed, m_N); seed = CChecksum::calculate(seed, m_DecayRate); @@ -490,21 +395,18 @@ uint64_t CQDigest::checksum(uint64_t seed) const return CChecksum::calculate(seed, summary); } -bool CQDigest::checkInvariants() const -{ +bool CQDigest::checkInvariants() const { // These are: // 1) |Q| <= 3 * k. // 2) Subtree count at the root = n // 2) The node invariants are satisfied. - if (m_Root->size() > 3 * m_K) - { + if (m_Root->size() > 3 * m_K) { LOG_ERROR("|Q| = " << m_Root->size() << " 3k = " << 3 * m_K); return false; } - if (m_Root->subtreeCount() != m_N) - { + if (m_Root->subtreeCount() != m_N) { LOG_ERROR("Bad count: " << m_Root->subtreeCount() << ", n = " << m_N); return false; } @@ -512,55 +414,44 @@ bool CQDigest::checkInvariants() const return m_Root->checkInvariants(m_N / m_K); } -std::string CQDigest::print() const -{ +std::string CQDigest::print() const { std::ostringstream result; TNodePtrVec nodes; m_Root->postOrder(nodes); result << m_N << " | " << m_K << " | {"; - for (const auto &node : nodes) - { - result << " \"" << node->print() - << ',' << node->count() - << ',' << node->subtreeCount() - << '"'; + for (const auto& node : nodes) { + result << " \"" << node->print() << ',' << node->count() << ',' << node->subtreeCount() << '"'; } result << " }"; return result.str(); } -void CQDigest::compress() -{ - for (std::size_t i = 0u; i < 3 * m_K + 2; ++i) - { +void CQDigest::compress() { + for (std::size_t i = 0u; i < 3 * m_K + 2; ++i) { TNodePtrVec compress; m_Root->postOrder(compress); - if (!this->compress(compress)) - { + if (!this->compress(compress)) { return; } } LOG_ERROR("Failed to compress tree"); } -bool CQDigest::compress(TNodePtrVec &compress) -{ +bool CQDigest::compress(TNodePtrVec& compress) { bool compressed = false; std::make_heap(compress.begin(), compress.end(), SLevelLess()); - while (!compress.empty()) - { - CNode &node = *compress.front(); + while (!compress.empty()) { + CNode& node = *compress.front(); std::pop_heap(compress.begin(), compress.end(), SLevelLess()); compress.pop_back(); - if (CNode *parent = node.compress(m_NodeAllocator, m_N / m_K)) - { + if (CNode* parent = node.compress(m_NodeAllocator, m_N / m_K)) { compressed = true; compress.push_back(parent); @@ -571,75 +462,46 @@ bool CQDigest::compress(TNodePtrVec &compress) return compressed; } - -bool CQDigest::SLevelLess::operator()(const CNode *lhs, - const CNode *rhs) const -{ - return lhs->span() > rhs->span() - || (lhs->span() == rhs->span() && lhs->max() > rhs->max()); +bool CQDigest::SLevelLess::operator()(const CNode* lhs, const CNode* rhs) const { + return lhs->span() > rhs->span() || (lhs->span() == rhs->span() && lhs->max() > rhs->max()); } -bool CQDigest::SPostLess::operator()(const CNode *lhs, - const CNode *rhs) const -{ - return lhs->max() < rhs->max() - || (lhs->max() == rhs->max() && lhs->span() < rhs->span()); +bool CQDigest::SPostLess::operator()(const CNode* lhs, const CNode* rhs) const { + return lhs->max() < rhs->max() || (lhs->max() == rhs->max() && lhs->span() < rhs->span()); } - const std::string CQDigest::CNode::MIN_TAG("a"); const std::string CQDigest::CNode::MAX_TAG("b"); const std::string CQDigest::CNode::COUNT_TAG("c"); -CQDigest::CNode::CNode() : - m_Ancestor(0), - m_Descendants(), - m_Min(0xDEADBEEF), - m_Max(0xDEADBEEF), - m_Count(0xDEADBEEF), - m_SubtreeCount(0xDEADBEEF) -{ -} - -CQDigest::CNode::CNode(uint32_t min, - uint32_t max, - uint64_t count, - uint64_t subtreeCount) : - m_Ancestor(0), - m_Descendants(), - m_Min(min), - m_Max(max), - m_Count(count), - m_SubtreeCount(subtreeCount) -{ -} - -std::size_t CQDigest::CNode::size() const -{ +CQDigest::CNode::CNode() + : m_Ancestor(0), m_Descendants(), m_Min(0xDEADBEEF), m_Max(0xDEADBEEF), m_Count(0xDEADBEEF), m_SubtreeCount(0xDEADBEEF) { +} + +CQDigest::CNode::CNode(uint32_t min, uint32_t max, uint64_t count, uint64_t subtreeCount) + : m_Ancestor(0), m_Descendants(), m_Min(min), m_Max(max), m_Count(count), m_SubtreeCount(subtreeCount) { +} + +std::size_t CQDigest::CNode::size() const { std::size_t size = 1u; - for (const auto &descendant : m_Descendants) - { + for (const auto& descendant : m_Descendants) { size += descendant->size(); } return size; } -uint32_t CQDigest::CNode::quantile(uint64_t leftCount, - uint64_t n) const -{ +uint32_t CQDigest::CNode::quantile(uint64_t leftCount, uint64_t n) const { // We need to find the smallest node in post-order where // the left count is greater than n. At each level we visit // the smallest, in post order, node in the q-digest for // which the left count is greater than n. Terminating when // this node doesn't have any descendants. - for (const auto &descendant : m_Descendants) - { + for (const auto& descendant : m_Descendants) { uint64_t count = descendant->subtreeCount(); - if (leftCount + count >= n) - { + if (leftCount + count >= n) { return descendant->quantile(leftCount, n); } leftCount += count; @@ -648,27 +510,20 @@ uint32_t CQDigest::CNode::quantile(uint64_t leftCount, return m_Max; } -bool CQDigest::CNode::quantileSublevelSetSupremum(uint64_t n, - uint64_t leftCount, - uint32_t &result) const -{ +bool CQDigest::CNode::quantileSublevelSetSupremum(uint64_t n, uint64_t leftCount, uint32_t& result) const { // We are looking for the right end of the rightmost node // whose count together with those nodes to the left is // is less than n. - if (leftCount + m_SubtreeCount < n) - { + if (leftCount + m_SubtreeCount < n) { result = std::max(result, m_Max); return true; } leftCount += m_SubtreeCount; - for (auto i = m_Descendants.rbegin(); i != m_Descendants.rend(); ++i) - { + for (auto i = m_Descendants.rbegin(); i != m_Descendants.rend(); ++i) { leftCount -= (*i)->subtreeCount(); - if ( leftCount + (*i)->count() < n - && (*i)->quantileSublevelSetSupremum(n, leftCount, result)) - { + if (leftCount + (*i)->count() < n && (*i)->quantileSublevelSetSupremum(n, leftCount, result)) { break; } } @@ -676,117 +531,82 @@ bool CQDigest::CNode::quantileSublevelSetSupremum(uint64_t n, return false; } -void CQDigest::CNode::cdfLowerBound(uint32_t x, - uint64_t &result) const -{ +void CQDigest::CNode::cdfLowerBound(uint32_t x, uint64_t& result) const { // The lower bound is the sum of the counts at the nodes // for which the maximum value is less than or equal to x. - if (m_Max <= x) - { + if (m_Max <= x) { result += m_SubtreeCount; - } - else - { - for (const auto &descendant : m_Descendants) - { + } else { + for (const auto& descendant : m_Descendants) { descendant->cdfLowerBound(x, result); } } } -void CQDigest::CNode::cdfUpperBound(uint32_t x, - uint64_t &result) const -{ +void CQDigest::CNode::cdfUpperBound(uint32_t x, uint64_t& result) const { // The upper bound is the sum of the counts at the nodes // for which the minimum value is less than or equal to x. - if (m_Max <= x) - { + if (m_Max <= x) { result += m_SubtreeCount; - } - else if (m_Min <= x) - { + } else if (m_Min <= x) { result += m_Count; - for (const auto &descendant : m_Descendants) - { + for (const auto& descendant : m_Descendants) { descendant->cdfUpperBound(x, result); } } } -void CQDigest::CNode::sublevelSetSupremum(const int64_t x, - uint32_t &result) const -{ - for (auto i = m_Descendants.rbegin(); i != m_Descendants.rend(); ++i) - { - if (static_cast((*i)->max()) > x) - { +void CQDigest::CNode::sublevelSetSupremum(const int64_t x, uint32_t& result) const { + for (auto i = m_Descendants.rbegin(); i != m_Descendants.rend(); ++i) { + if (static_cast((*i)->max()) > x) { result = std::min(result, (*i)->max()); - } - else - { + } else { (*i)->sublevelSetSupremum(x, result); break; } } - if (static_cast(m_Max) > x && m_Count > 0) - { + if (static_cast(m_Max) > x && m_Count > 0) { result = std::min(result, m_Max); } } -void CQDigest::CNode::superlevelSetInfimum(uint32_t x, - uint32_t &result) const -{ - for (const auto &descendant : m_Descendants) - { - if (descendant->max() < x) - { +void CQDigest::CNode::superlevelSetInfimum(uint32_t x, uint32_t& result) const { + for (const auto& descendant : m_Descendants) { + if (descendant->max() < x) { result = std::max(result, descendant->max()); - } - else - { + } else { descendant->superlevelSetInfimum(x, result); break; } } - if (m_Max < x && m_Count > 0) - { + if (m_Max < x && m_Count > 0) { result = std::max(result, m_Max); } } -void CQDigest::CNode::postOrder(TNodePtrVec &nodes) const -{ - for (const auto &descendant : m_Descendants) - { +void CQDigest::CNode::postOrder(TNodePtrVec& nodes) const { + for (const auto& descendant : m_Descendants) { descendant->postOrder(nodes); } nodes.push_back(const_cast(this)); } -CQDigest::CNode *CQDigest::CNode::expand(CNodeAllocator &allocator, - const uint32_t &value) -{ - if (m_Max >= value) - { +CQDigest::CNode* CQDigest::CNode::expand(CNodeAllocator& allocator, const uint32_t& value) { + if (m_Max >= value) { // No expansion necessary. return 0; } - CNode *result = m_Count == 0 ? - this : &allocator.create(CNode(m_Min, m_Max, 0, 0)); + CNode* result = m_Count == 0 ? this : &allocator.create(CNode(m_Min, m_Max, 0, 0)); uint32_t levelSpan = result->span(); - do - { + do { result->m_Max += levelSpan; levelSpan <<= 1; - } - while (result->m_Max < value); + } while (result->m_Max < value); - if (result != this) - { + if (result != this) { m_Ancestor = result; result->m_Descendants.push_back(this); result->m_SubtreeCount += m_SubtreeCount; @@ -795,27 +615,20 @@ CQDigest::CNode *CQDigest::CNode::expand(CNodeAllocator &allocator, return result; } -CQDigest::CNode &CQDigest::CNode::insert(CNodeAllocator &allocator, - const CNode &node) -{ +CQDigest::CNode& CQDigest::CNode::insert(CNodeAllocator& allocator, const CNode& node) { m_SubtreeCount += node.subtreeCount(); - if (*this == node) - { + if (*this == node) { m_Count += node.count(); return *this; } - auto next = std::lower_bound(m_Descendants.begin(), - m_Descendants.end(), - &node, SPostLess()); + auto next = std::lower_bound(m_Descendants.begin(), m_Descendants.end(), &node, SPostLess()); // If it exists the ancestor will be after the node // in post order. - for (auto i = next; i != m_Descendants.end(); ++i) - { - if ((*i)->isAncestor(node) || **i == node) - { + for (auto i = next; i != m_Descendants.end(); ++i) { + if ((*i)->isAncestor(node) || **i == node) { return (*i)->insert(allocator, node); } } @@ -823,48 +636,39 @@ CQDigest::CNode &CQDigest::CNode::insert(CNodeAllocator &allocator, // This is the lowest ancestor in the q-digest. Insert // the node below it in post order and move descendants // if necessary. - CNode &newNode = allocator.create(node); + CNode& newNode = allocator.create(node); newNode.m_Ancestor = this; m_Descendants.insert(next, &newNode); - if (!newNode.isLeaf()) - { + if (!newNode.isLeaf()) { newNode.takeDescendants(*this); } return newNode; } -CQDigest::CNode *CQDigest::CNode::compress(CNodeAllocator &allocator, - uint64_t compressionFactor) -{ - if (!m_Ancestor) - { +CQDigest::CNode* CQDigest::CNode::compress(CNodeAllocator& allocator, uint64_t compressionFactor) { + if (!m_Ancestor) { // The node is no longer in the q-digest. return 0; } // Warning this function zeros m_Ancestor copy up front. - CNode *ancestor = m_Ancestor; + CNode* ancestor = m_Ancestor; // Get the sibling of this node if it exists. - CNode *sibling = ancestor->sibling(*this); + CNode* sibling = ancestor->sibling(*this); - uint64_t count = (ancestor->isParent(*this) ? ancestor->count() : 0ull) - + this->count() - + (sibling ? sibling->count() : 0ull); + uint64_t count = (ancestor->isParent(*this) ? ancestor->count() : 0ull) + this->count() + (sibling ? sibling->count() : 0ull); // Check if we should compress this node. - if (count >= compressionFactor) - { + if (count >= compressionFactor) { return 0; } - if (ancestor->isParent(*this)) - { + if (ancestor->isParent(*this)) { ancestor->m_Count = count; this->detach(allocator); - if (sibling) - { + if (sibling) { sibling->detach(allocator); } return ancestor; @@ -875,115 +679,91 @@ CQDigest::CNode *CQDigest::CNode::compress(CNodeAllocator &allocator, m_Count = count; this->isLeftChild() ? m_Max += this->span() : m_Min -= this->span(); this->takeDescendants(*ancestor); - if (sibling) - { + if (sibling) { sibling->detach(allocator); } return this; } -uint64_t CQDigest::CNode::age(double factor) -{ +uint64_t CQDigest::CNode::age(double factor) { m_SubtreeCount = 0u; - for (auto &descendant : m_Descendants) - { + for (auto& descendant : m_Descendants) { m_SubtreeCount += descendant->age(factor); } - if (m_Count > 0) - { - m_Count = static_cast( - std::max(static_cast(m_Count) * factor + 0.5, 1.0)); + if (m_Count > 0) { + m_Count = static_cast(std::max(static_cast(m_Count) * factor + 0.5, 1.0)); } m_SubtreeCount += m_Count; return m_SubtreeCount; } -uint32_t CQDigest::CNode::span() const -{ +uint32_t CQDigest::CNode::span() const { return m_Max - m_Min + 1u; } -uint32_t CQDigest::CNode::min() const -{ +uint32_t CQDigest::CNode::min() const { return m_Min; } -uint32_t CQDigest::CNode::max() const -{ +uint32_t CQDigest::CNode::max() const { return m_Max; } -const uint64_t &CQDigest::CNode::count() const -{ +const uint64_t& CQDigest::CNode::count() const { return m_Count; } -const uint64_t &CQDigest::CNode::subtreeCount() const -{ +const uint64_t& CQDigest::CNode::subtreeCount() const { return m_SubtreeCount; } -void CQDigest::CNode::persistRecursive(const std::string &nodeTag, - core::CStatePersistInserter &inserter) const -{ +void CQDigest::CNode::persistRecursive(const std::string& nodeTag, core::CStatePersistInserter& inserter) const { inserter.insertLevel(NODE_TAG, boost::bind(&CNode::acceptPersistInserter, this, _1)); // Note the tree is serialized flat in pre-order. - for (const auto &descendant : m_Descendants) - { + for (const auto& descendant : m_Descendants) { descendant->persistRecursive(nodeTag, inserter); } } -void CQDigest::CNode::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CQDigest::CNode::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(MIN_TAG, m_Min); inserter.insertValue(MAX_TAG, m_Max); inserter.insertValue(COUNT_TAG, m_Count); } -bool CQDigest::CNode::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name = traverser.name(); - if (name == MIN_TAG) - { - if (core::CStringUtils::stringToType(traverser.value(), m_Min) == false) - { +bool CQDigest::CNode::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); + if (name == MIN_TAG) { + if (core::CStringUtils::stringToType(traverser.value(), m_Min) == false) { LOG_ERROR("Invalid min in " << traverser.value()); return false; } } - if (name == MAX_TAG) - { - if (core::CStringUtils::stringToType(traverser.value(), m_Max) == false) - { + if (name == MAX_TAG) { + if (core::CStringUtils::stringToType(traverser.value(), m_Max) == false) { LOG_ERROR("Invalid max in " << traverser.value()); return false; } } - if (name == COUNT_TAG) - { - if (core::CStringUtils::stringToType(traverser.value(), m_Count) == false) - { + if (name == COUNT_TAG) { + if (core::CStringUtils::stringToType(traverser.value(), m_Count) == false) { LOG_ERROR("Invalid count in " << traverser.value()); return false; } m_SubtreeCount = m_Count; } - } - while (traverser.next()); + } while (traverser.next()); return true; } -bool CQDigest::CNode::checkInvariants(uint64_t compressionFactor) const -{ +bool CQDigest::CNode::checkInvariants(uint64_t compressionFactor) const { // 1) span is a power of 2 // 2) q-digest connectivity is consistent. // 3) subtree counts are consistent. @@ -999,8 +779,7 @@ bool CQDigest::CNode::checkInvariants(uint64_t compressionFactor) const // results. uint32_t span(this->span()); uint32_t spanMinusOne(span - 1); - if ((span | spanMinusOne) != (span ^ spanMinusOne)) - { + if ((span | spanMinusOne) != (span ^ spanMinusOne)) { LOG_ERROR("Bad span: " << this->print()); return false; } @@ -1008,58 +787,40 @@ bool CQDigest::CNode::checkInvariants(uint64_t compressionFactor) const SPostLess postLess; uint64_t subtreeCount = m_Count; - for (std::size_t i = 0u; i < m_Descendants.size(); ++i) - { - if (m_Descendants[i]->m_Ancestor != this) - { - LOG_ERROR("Bad connectivity: " << this->print() - << " -> " << m_Descendants[i]->print() - << " <- " << m_Descendants[i]->m_Ancestor->print()); + for (std::size_t i = 0u; i < m_Descendants.size(); ++i) { + if (m_Descendants[i]->m_Ancestor != this) { + LOG_ERROR("Bad connectivity: " << this->print() << " -> " << m_Descendants[i]->print() << " <- " + << m_Descendants[i]->m_Ancestor->print()); } - if (!this->isAncestor(*m_Descendants[i])) - { - LOG_ERROR("Bad connectivity: " << this->print() - << " -> " << m_Descendants[i]->print()); + if (!this->isAncestor(*m_Descendants[i])) { + LOG_ERROR("Bad connectivity: " << this->print() << " -> " << m_Descendants[i]->print()); return false; } - if (i + 1u < m_Descendants.size() - && !postLess(m_Descendants[i], m_Descendants[i + 1u])) - { - LOG_ERROR("Bad order: " << m_Descendants[i]->print() - << " >= " << m_Descendants[i + 1u]->print()); + if (i + 1u < m_Descendants.size() && !postLess(m_Descendants[i], m_Descendants[i + 1u])) { + LOG_ERROR("Bad order: " << m_Descendants[i]->print() << " >= " << m_Descendants[i + 1u]->print()); return false; } - if (!m_Descendants[i]->checkInvariants(compressionFactor)) - { + if (!m_Descendants[i]->checkInvariants(compressionFactor)) { return false; } subtreeCount += m_Descendants[i]->subtreeCount(); } - if (subtreeCount != m_SubtreeCount) - { - LOG_ERROR("Bad subtree count: expected " << subtreeCount - << " got " << m_SubtreeCount); + if (subtreeCount != m_SubtreeCount) { + LOG_ERROR("Bad subtree count: expected " << subtreeCount << " got " << m_SubtreeCount); return false; } - if (!this->isLeaf() && !this->isRoot() && m_Count > compressionFactor) - { - LOG_ERROR("Bad count: " << m_Count - << ", floor(n/k) = " << compressionFactor); + if (!this->isLeaf() && !this->isRoot() && m_Count > compressionFactor) { + LOG_ERROR("Bad count: " << m_Count << ", floor(n/k) = " << compressionFactor); return false; } - if (!this->isRoot()) - { - const CNode *sibling = m_Ancestor->sibling(*this); - uint64_t count = m_Count - + (sibling ? sibling->count() : 0ull) - + (m_Ancestor->isParent(*this) ? m_Ancestor->count() : 0ull); - if (count < compressionFactor) - { - LOG_ERROR("Bad triple count: " << count - << ", floor(n/k) = " << compressionFactor); + if (!this->isRoot()) { + const CNode* sibling = m_Ancestor->sibling(*this); + uint64_t count = m_Count + (sibling ? sibling->count() : 0ull) + (m_Ancestor->isParent(*this) ? m_Ancestor->count() : 0ull); + if (count < compressionFactor) { + LOG_ERROR("Bad triple count: " << count << ", floor(n/k) = " << compressionFactor); return false; } } @@ -1067,86 +828,68 @@ bool CQDigest::CNode::checkInvariants(uint64_t compressionFactor) const return true; } -std::string CQDigest::CNode::print() const -{ +std::string CQDigest::CNode::print() const { std::ostringstream result; result << '[' << m_Min << ',' << m_Max << ']'; return result.str(); } -bool CQDigest::CNode::operator==(const CNode &node) const -{ +bool CQDigest::CNode::operator==(const CNode& node) const { return m_Min == node.m_Min && m_Max == node.m_Max; } -std::size_t CQDigest::CNode::numberDescendants() const -{ +std::size_t CQDigest::CNode::numberDescendants() const { return m_Descendants.size(); } -CQDigest::TNodePtrVecCItr CQDigest::CNode::beginDescendants() const -{ +CQDigest::TNodePtrVecCItr CQDigest::CNode::beginDescendants() const { return m_Descendants.begin(); } -CQDigest::TNodePtrVecCItr CQDigest::CNode::endDescendants() const -{ +CQDigest::TNodePtrVecCItr CQDigest::CNode::endDescendants() const { return m_Descendants.end(); } -CQDigest::CNode *CQDigest::CNode::sibling(const CNode &node) const -{ +CQDigest::CNode* CQDigest::CNode::sibling(const CNode& node) const { uint32_t min = node.min(); node.isLeftChild() ? min += node.span() : min -= node.span(); uint32_t max = node.max(); node.isLeftChild() ? max += node.span() : max -= node.span(); CNode sibling(min, max, 0u, 0u); - auto next = std::lower_bound(m_Descendants.begin(), - m_Descendants.end(), - &sibling, SPostLess()); + auto next = std::lower_bound(m_Descendants.begin(), m_Descendants.end(), &sibling, SPostLess()); - if (next != m_Descendants.end() && (*next)->isSibling(node)) - { + if (next != m_Descendants.end() && (*next)->isSibling(node)) { return *next; } return 0; } -bool CQDigest::CNode::isSibling(const CNode &node) const -{ +bool CQDigest::CNode::isSibling(const CNode& node) const { // Check if the nodes are on the same level and share a parent. - return this->span() == node.span() - && (this->isLeftChild() ? - m_Max + 1u == node.m_Min : m_Min == node.m_Max + 1u); + return this->span() == node.span() && (this->isLeftChild() ? m_Max + 1u == node.m_Min : m_Min == node.m_Max + 1u); } -bool CQDigest::CNode::isParent(const CNode &node) const -{ +bool CQDigest::CNode::isParent(const CNode& node) const { // Check is ancestor and is in level above. return this->isAncestor(node) && this->span() == 2 * node.span(); } -bool CQDigest::CNode::isAncestor(const CNode &node) const -{ +bool CQDigest::CNode::isAncestor(const CNode& node) const { // Check for inclusion of node range. - return (m_Min < node.m_Min && m_Max >= node.m_Max) - || (m_Min <= node.m_Min && m_Max > node.m_Max); + return (m_Min < node.m_Min && m_Max >= node.m_Max) || (m_Min <= node.m_Min && m_Max > node.m_Max); } -bool CQDigest::CNode::isRoot() const -{ +bool CQDigest::CNode::isRoot() const { return m_Ancestor == 0; } -bool CQDigest::CNode::isLeaf() const -{ +bool CQDigest::CNode::isLeaf() const { return this->span() == 1; } -bool CQDigest::CNode::isLeftChild() const -{ +bool CQDigest::CNode::isLeftChild() const { // The left child nodes are always an even multiple of the // level range from the start of the overall range and the // right child nodes an odd multiple. To reduce storage we @@ -1155,44 +898,33 @@ bool CQDigest::CNode::isLeftChild() const return (m_Min / this->span()) % 2 == 0; } -void CQDigest::CNode::detach(CNodeAllocator &allocator) -{ +void CQDigest::CNode::detach(CNodeAllocator& allocator) { m_Ancestor->removeDescendant(*this); m_Ancestor->takeDescendants(*this); m_Ancestor = 0; allocator.release(*this); } -void CQDigest::CNode::removeDescendant(CNode &node) -{ +void CQDigest::CNode::removeDescendant(CNode& node) { // Remove node from the descendants. - m_Descendants.erase(std::remove(m_Descendants.begin(), - m_Descendants.end(), &node), - m_Descendants.end()); + m_Descendants.erase(std::remove(m_Descendants.begin(), m_Descendants.end(), &node), m_Descendants.end()); } -bool CQDigest::CNode::takeDescendants(CNode &node) -{ - if (node.numberDescendants() == 0) - { +bool CQDigest::CNode::takeDescendants(CNode& node) { + if (node.numberDescendants() == 0) { return false; } - if (!this->isAncestor(node)) - { + if (!this->isAncestor(node)) { // Find our descendants among the descendants of node. TNodePtrVec nodesToTake; TNodePtrVec nodesToLeave; - for (auto i = node.beginDescendants(); i != node.endDescendants(); ++i) - { - if (this->isAncestor(**i)) - { + for (auto i = node.beginDescendants(); i != node.endDescendants(); ++i) { + if (this->isAncestor(**i)) { nodesToTake.push_back(*i); (*i)->m_Ancestor = this; m_SubtreeCount += (*i)->subtreeCount(); - } - else - { + } else { nodesToLeave.push_back(*i); } } @@ -1200,9 +932,12 @@ bool CQDigest::CNode::takeDescendants(CNode &node) // Merge the descendants. TNodePtrVec descendants; descendants.reserve(m_Descendants.size() + nodesToTake.size()); - std::merge(m_Descendants.begin(), m_Descendants.end(), - nodesToTake.begin(), nodesToTake.end(), - std::back_inserter(descendants), SPostLess()); + std::merge(m_Descendants.begin(), + m_Descendants.end(), + nodesToTake.begin(), + nodesToTake.end(), + std::back_inserter(descendants), + SPostLess()); // Update the node's descendants. nodesToLeave.swap(node.m_Descendants); @@ -1213,17 +948,19 @@ bool CQDigest::CNode::takeDescendants(CNode &node) return !nodesToTake.empty(); } - for (auto i = node.beginDescendants(); i != node.endDescendants(); ++i) - { + for (auto i = node.beginDescendants(); i != node.endDescendants(); ++i) { (*i)->m_Ancestor = this; } // Merge the descendants. TNodePtrVec descendants; descendants.reserve(m_Descendants.size() + node.numberDescendants()); - std::merge(m_Descendants.begin(), m_Descendants.end(), - node.beginDescendants(), node.endDescendants(), - std::back_inserter(descendants), SPostLess()); + std::merge(m_Descendants.begin(), + m_Descendants.end(), + node.beginDescendants(), + node.endDescendants(), + std::back_inserter(descendants), + SPostLess()); // Clear out the node's descendants. TNodePtrVec empty; @@ -1235,59 +972,50 @@ bool CQDigest::CNode::takeDescendants(CNode &node) return true; } -CQDigest::CNodeAllocator::CNodeAllocator(std::size_t size) -{ +CQDigest::CNodeAllocator::CNodeAllocator(std::size_t size) { m_Nodes.push_back(TNodeVec()); m_Nodes.back().reserve(size); m_FreeNodes.push_back(TNodePtrVec()); } -CQDigest::CNode &CQDigest::CNodeAllocator::create(const CNode &node) -{ - if (m_FreeNodes.front().empty()) - { +CQDigest::CNode& CQDigest::CNodeAllocator::create(const CNode& node) { + if (m_FreeNodes.front().empty()) { // Add a new collection if necessary. This should // only happen when merging two q-digests. std::size_t size = m_Nodes.back().size(); - if (size == m_Nodes.back().capacity()) - { + if (size == m_Nodes.back().capacity()) { m_Nodes.push_back(TNodeVec()); m_Nodes.back().reserve(size); m_FreeNodes.push_back(TNodePtrVec()); LOG_TRACE("Added new block " << m_Nodes.size()); } - TNodeVec &nodes = m_Nodes.back(); + TNodeVec& nodes = m_Nodes.back(); nodes.resize(nodes.size() + 1u); nodes.back() = node; return nodes.back(); } - CNode *freeNode = m_FreeNodes.front().back(); + CNode* freeNode = m_FreeNodes.front().back(); *freeNode = node; m_FreeNodes.front().pop_back(); return *freeNode; } -void CQDigest::CNodeAllocator::release(CNode &node) -{ +void CQDigest::CNodeAllocator::release(CNode& node) { std::size_t block = this->findBlock(node); - if (block >= m_FreeNodes.size()) - { - LOG_ABORT("Bad block address = " << block - << ", max = " << m_FreeNodes.size() - 1u); + if (block >= m_FreeNodes.size()) { + LOG_ABORT("Bad block address = " << block << ", max = " << m_FreeNodes.size() - 1u); } m_FreeNodes[block].push_back(&node); - if (m_Nodes.size() > 1u) - { + if (m_Nodes.size() > 1u) { auto nodeItr = m_Nodes.begin(); std::advance(nodeItr, block); // Remove the block if none of its nodes are in use. - if (m_FreeNodes[block].size() > nodeItr->size()) - { + if (m_FreeNodes[block].size() > nodeItr->size()) { LOG_TRACE("Removing block " << block); m_FreeNodes.erase(m_FreeNodes.begin() + block); m_Nodes.erase(nodeItr); @@ -1295,35 +1023,29 @@ void CQDigest::CNodeAllocator::release(CNode &node) } } -std::size_t CQDigest::CNodeAllocator::findBlock(const CNode &node) const -{ +std::size_t CQDigest::CNodeAllocator::findBlock(const CNode& node) const { std::size_t result = 0u; - if (m_Nodes.size() == 1u) - { + if (m_Nodes.size() == 1u) { return result; } const auto le = std::less_equal(); - for (auto i = m_Nodes.begin(); i != m_Nodes.end(); ++i, ++result) - { + for (auto i = m_Nodes.begin(); i != m_Nodes.end(); ++i, ++result) { auto first = i->begin(); - auto last = i->end(); - if (first == last) - { + auto last = i->end(); + if (first == last) { continue; } --last; - if (le(&(*first), &node) && le(&node, &(*last))) - { + if (le(&(*first), &node) && le(&node, &(*last))) { break; } } return result; } - } } diff --git a/lib/maths/CQuantileSketch.cc b/lib/maths/CQuantileSketch.cc index e33c63bf8f..8f9e32c76a 100644 --- a/lib/maths/CQuantileSketch.cc +++ b/lib/maths/CQuantileSketch.cc @@ -22,13 +22,10 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { -namespace -{ +namespace { using TDoubleDoublePr = std::pair; using TDoubleDoublePrVec = std::vector; @@ -36,79 +33,66 @@ using TFloatFloatPr = CQuantileSketch::TFloatFloatPr; using TFloatFloatPrVec = CQuantileSketch::TFloatFloatPrVec; //! \brief Orders two indices of a value vector by increasing value. -class CIndexingGreater -{ - public: - CIndexingGreater(const TDoubleDoublePrVec &values) : m_Values(&values) {} - - bool operator()(std::size_t lhs, std::size_t rhs) const - { - return COrderings::lexicographical_compare(-(*m_Values)[lhs].first, - (*m_Values)[lhs].second, - -(*m_Values)[rhs].first, - (*m_Values)[rhs].second); - } +class CIndexingGreater { +public: + CIndexingGreater(const TDoubleDoublePrVec& values) : m_Values(&values) {} + + bool operator()(std::size_t lhs, std::size_t rhs) const { + return COrderings::lexicographical_compare( + -(*m_Values)[lhs].first, (*m_Values)[lhs].second, -(*m_Values)[rhs].first, (*m_Values)[rhs].second); + } - private: - const TDoubleDoublePrVec *m_Values; +private: + const TDoubleDoublePrVec* m_Values; }; //! \brief An iterator over just the unique knot values. -class CUniqueIterator : private boost::addable2< CUniqueIterator, ptrdiff_t, - boost::subtractable2< CUniqueIterator, ptrdiff_t, - boost::equality_comparable< CUniqueIterator >> > -{ - public: - CUniqueIterator(TFloatFloatPrVec &knots, std::size_t i) : - m_Knots(&knots), m_I(i) - {} - - bool operator==(const CUniqueIterator &rhs) const - { - return m_I == rhs.m_I && m_Knots == rhs.m_Knots; - } - - TFloatFloatPr &operator*() const { return (*m_Knots)[m_I]; } - TFloatFloatPr *operator->() const { return &(*m_Knots)[m_I]; } - - const CUniqueIterator &operator++() - { - double x = (*m_Knots)[m_I].first; - ptrdiff_t n = m_Knots->size(); - while (++m_I < n && (*m_Knots)[m_I].first == x) {} - return *this; +class CUniqueIterator + : private boost::addable2>> { +public: + CUniqueIterator(TFloatFloatPrVec& knots, std::size_t i) : m_Knots(&knots), m_I(i) {} + + bool operator==(const CUniqueIterator& rhs) const { return m_I == rhs.m_I && m_Knots == rhs.m_Knots; } + + TFloatFloatPr& operator*() const { return (*m_Knots)[m_I]; } + TFloatFloatPr* operator->() const { return &(*m_Knots)[m_I]; } + + const CUniqueIterator& operator++() { + double x = (*m_Knots)[m_I].first; + ptrdiff_t n = m_Knots->size(); + while (++m_I < n && (*m_Knots)[m_I].first == x) { } + return *this; + } - const CUniqueIterator &operator--() - { - double x = (*m_Knots)[m_I].first; - while (--m_I >= 0 && (*m_Knots)[m_I].first == x) {} - return *this; + const CUniqueIterator& operator--() { + double x = (*m_Knots)[m_I].first; + while (--m_I >= 0 && (*m_Knots)[m_I].first == x) { } + return *this; + } - const CUniqueIterator &operator+=(ptrdiff_t i) - { - while (--i >= 0) - { - this->operator++(); - } - return *this; + const CUniqueIterator& operator+=(ptrdiff_t i) { + while (--i >= 0) { + this->operator++(); } + return *this; + } - const CUniqueIterator &operator-=(ptrdiff_t i) - { - while (--i >= 0) - { - this->operator--(); - } - return *this; + const CUniqueIterator& operator-=(ptrdiff_t i) { + while (--i >= 0) { + this->operator--(); } + return *this; + } - ptrdiff_t index() const { return m_I; } + ptrdiff_t index() const { return m_I; } - private: - TFloatFloatPrVec *m_Knots; - ptrdiff_t m_I; +private: + TFloatFloatPrVec* m_Knots; + ptrdiff_t m_I; }; const double EPS = static_cast(std::numeric_limits::epsilon()); @@ -116,40 +100,30 @@ const std::size_t MINIMUM_MAX_SIZE = 3u; const std::string UNSORTED_TAG("a"); const std::string KNOTS_TAG("b"); const std::string COUNT_TAG("c"); - } -CQuantileSketch::CQuantileSketch(EInterpolation interpolation, std::size_t size) : - m_Interpolation(interpolation), - m_MaxSize(std::max(size, MINIMUM_MAX_SIZE)), - m_Unsorted(0), - m_Count(0.0) -{ +CQuantileSketch::CQuantileSketch(EInterpolation interpolation, std::size_t size) + : m_Interpolation(interpolation), m_MaxSize(std::max(size, MINIMUM_MAX_SIZE)), m_Unsorted(0), m_Count(0.0) { m_Knots.reserve(m_MaxSize + 1); } -bool CQuantileSketch::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name = traverser.name(); +bool CQuantileSketch::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); RESTORE_BUILT_IN(UNSORTED_TAG, m_Unsorted) RESTORE(KNOTS_TAG, core::CPersistUtils::fromString(traverser.value(), m_Knots)) RESTORE_BUILT_IN(COUNT_TAG, m_Count) - } - while (traverser.next()); + } while (traverser.next()); return true; } -void CQuantileSketch::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CQuantileSketch::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(UNSORTED_TAG, m_Unsorted); inserter.insertValue(KNOTS_TAG, core::CPersistUtils::toString(m_Knots)); inserter.insertValue(COUNT_TAG, m_Count, core::CIEEE754::E_SinglePrecision); } -const CQuantileSketch &CQuantileSketch::operator+=(const CQuantileSketch &rhs) -{ +const CQuantileSketch& CQuantileSketch::operator+=(const CQuantileSketch& rhs) { m_Knots.insert(m_Knots.end(), rhs.m_Knots.begin(), rhs.m_Knots.end()); std::sort(m_Knots.begin(), m_Knots.end()); m_Unsorted = 0; @@ -164,126 +138,96 @@ const CQuantileSketch &CQuantileSketch::operator+=(const CQuantileSketch &rhs) return *this; } -void CQuantileSketch::add(double x, double n) -{ +void CQuantileSketch::add(double x, double n) { ++m_Unsorted; m_Knots.emplace_back(x, n); m_Count += n; - if (m_Knots.size() > m_MaxSize) - { + if (m_Knots.size() > m_MaxSize) { this->reduce(); } } -void CQuantileSketch::age(double factor) -{ - for (std::size_t i = 0u; i < m_Knots.size(); ++i) - { +void CQuantileSketch::age(double factor) { + for (std::size_t i = 0u; i < m_Knots.size(); ++i) { m_Knots[i].second *= factor; } m_Count *= factor; } -bool CQuantileSketch::cdf(double x_, double &result) const -{ +bool CQuantileSketch::cdf(double x_, double& result) const { result = 0.0; - if (m_Knots.empty()) - { + if (m_Knots.empty()) { LOG_ERROR("No values added to quantile sketch"); return false; } - if (m_Unsorted > 0) - { + if (m_Unsorted > 0) { const_cast(this)->reduce(); } CFloatStorage x = x_; ptrdiff_t n = m_Knots.size(); - if (n == 1) - { + if (n == 1) { result = x < m_Knots[0].first ? 0.0 : (x > m_Knots[0].first ? 1.0 : 0.5); return true; } - ptrdiff_t k = std::lower_bound(m_Knots.begin(), m_Knots.end(), - x, COrderings::SFirstLess()) - m_Knots.begin(); + ptrdiff_t k = std::lower_bound(m_Knots.begin(), m_Knots.end(), x, COrderings::SFirstLess()) - m_Knots.begin(); LOG_TRACE("k = " << k); - switch (m_Interpolation) - { - case E_Linear: - { - if (k == 0) - { + switch (m_Interpolation) { + case E_Linear: { + if (k == 0) { double xl = m_Knots[0].first; double xr = m_Knots[1].first; - double f = m_Knots[0].second / m_Count; + double f = m_Knots[0].second / m_Count; LOG_TRACE("xl = " << xl << ", xr = " << xr << ", f = " << f); result = f * std::max(x - 1.5 * xl + 0.5 * xr, 0.0) / (xr - xl); - } - else if (k == n) - { - double xl = m_Knots[n-2].first; - double xr = m_Knots[n-1].first; - double f = m_Knots[n-1].second / m_Count; + } else if (k == n) { + double xl = m_Knots[n - 2].first; + double xr = m_Knots[n - 1].first; + double f = m_Knots[n - 1].second / m_Count; LOG_TRACE("xl = " << xl << ", xr = " << xr << ", f = " << f); result = 1.0 - f * std::max(1.5 * xr - 0.5 * xl - x, 0.0) / (xr - xl); - } - else - { - double xl = m_Knots[k-1].first; + } else { + double xl = m_Knots[k - 1].first; double xr = m_Knots[k].first; bool left = (2 * k < n); - bool loc = (2.0 * x < xl + xr); + bool loc = (2.0 * x < xl + xr); double partial = 0.0; - for (ptrdiff_t i = left ? 0 : (loc ? k : k+1), - m = left ? (loc ? k-1 : k) : n; i < m; ++i) - { + for (ptrdiff_t i = left ? 0 : (loc ? k : k + 1), m = left ? (loc ? k - 1 : k) : n; i < m; ++i) { partial += m_Knots[i].second; } partial /= m_Count; double dn; - if (loc) - { - double xll = k > 1 ? static_cast(m_Knots[k-2].first) : 2.0 * xl - xr; - xr = 0.5 * (xl + xr); + if (loc) { + double xll = k > 1 ? static_cast(m_Knots[k - 2].first) : 2.0 * xl - xr; + xr = 0.5 * (xl + xr); xl = 0.5 * (xll + xl); - dn = m_Knots[k-1].second / m_Count; - } - else - { - double xrr = k+1 < n ? static_cast(m_Knots[k+1].first) : 2.0 * xr - xl; + dn = m_Knots[k - 1].second / m_Count; + } else { + double xrr = k + 1 < n ? static_cast(m_Knots[k + 1].first) : 2.0 * xr - xl; xl = 0.5 * (xl + xr); xr = 0.5 * (xr + xrr); dn = m_Knots[k].second / m_Count; } - LOG_TRACE("left = " << left << ", loc = " << loc - << ", partial = " << partial - << ", xl = " << xl << ", xr = " << xr << ", dn = " << dn); - result = left ? partial + dn * (x - xl) / (xr - xl) : - 1.0 - partial - dn * (xr - x) / (xr - xl); + LOG_TRACE("left = " << left << ", loc = " << loc << ", partial = " << partial << ", xl = " << xl << ", xr = " << xr + << ", dn = " << dn); + result = left ? partial + dn * (x - xl) / (xr - xl) : 1.0 - partial - dn * (xr - x) / (xr - xl); } return true; } - case E_PiecewiseConstant: - { - if (k == 0) - { + case E_PiecewiseConstant: { + if (k == 0) { double f = m_Knots[0].second / m_Count; result = x < m_Knots[0].first ? 0.0 : 0.5 * f; - } - else if (k == n) - { - double f = m_Knots[n-1].second / m_Count; + } else if (k == n) { + double f = m_Knots[n - 1].second / m_Count; result = x > m_Knots[0].first ? 1.0 : 1.0 - 0.5 * f; - } - else - { + } else { bool left = (2 * k < n); double partial = x < m_Knots[0].first ? 0.0 : 0.5 * m_Knots[0].second; - for (ptrdiff_t i = left ? 0 : k+1, m = left ? k : n; i < m; ++i) - { + for (ptrdiff_t i = left ? 0 : k + 1, m = left ? k : n; i < m; ++i) { partial += m_Knots[i].second; } partial /= m_Count; @@ -296,10 +240,8 @@ bool CQuantileSketch::cdf(double x_, double &result) const return true; } -bool CQuantileSketch::minimum(double &result) const -{ - if (m_Knots.empty()) - { +bool CQuantileSketch::minimum(double& result) const { + if (m_Knots.empty()) { LOG_ERROR("No values added to quantile sketch"); return false; } @@ -308,10 +250,8 @@ bool CQuantileSketch::minimum(double &result) const return true; } -bool CQuantileSketch::maximum(double &result) const -{ - if (m_Knots.empty()) - { +bool CQuantileSketch::maximum(double& result) const { + if (m_Knots.empty()) { LOG_ERROR("No values added to quantile sketch"); return false; } @@ -320,22 +260,18 @@ bool CQuantileSketch::maximum(double &result) const return true; } -bool CQuantileSketch::quantile(double percentage, double &result) const -{ +bool CQuantileSketch::quantile(double percentage, double& result) const { result = 0.0; - if (m_Knots.empty()) - { + if (m_Knots.empty()) { LOG_ERROR("No values added to quantile sketch"); return false; } - if (m_Unsorted > 0) - { + if (m_Unsorted > 0) { const_cast(this)->reduce(); } - if (percentage < 0.0 || percentage > 100.0) - { + if (percentage < 0.0 || percentage > 100.0) { LOG_ERROR("Invalid percentile " << percentage) return false; } @@ -345,42 +281,33 @@ bool CQuantileSketch::quantile(double percentage, double &result) const percentage /= 100.0; double partial = 0.0; - double cutoff = percentage * m_Count; - for (std::size_t i = 0u; i < n; ++i) - { + double cutoff = percentage * m_Count; + for (std::size_t i = 0u; i < n; ++i) { partial += m_Knots[i].second; - if (partial >= cutoff - m_Count * EPS) - { - switch (m_Interpolation) - { + if (partial >= cutoff - m_Count * EPS) { + switch (m_Interpolation) { case E_Linear: - if (n == 1) - { + if (n == 1) { result = m_Knots[0].first; - } - else - { + } else { double x0 = m_Knots[0].first; double x1 = m_Knots[1].first; - double xa = i == 0 ? 2.0 * x0 - x1 : static_cast(m_Knots[i-1].first); + double xa = i == 0 ? 2.0 * x0 - x1 : static_cast(m_Knots[i - 1].first); double xb = m_Knots[i].first; - double xc = i+1 == n ? 2.0 * xb - xa : static_cast(m_Knots[i+1].first); + double xc = i + 1 == n ? 2.0 * xb - xa : static_cast(m_Knots[i + 1].first); xa += 0.5 * (xb - xa); xb += 0.5 * (xc - xb); double dx = (xb - xa); double nb = m_Knots[i].second; - double m = nb / dx; + double m = nb / dx; result = xb + (cutoff - partial) / m; } return true; case E_PiecewiseConstant: - if (i+1 == n || partial > cutoff + m_Count * EPS) - { - result = m_Knots[i].first; - } - else - { + if (i + 1 == n || partial > cutoff + m_Count * EPS) { + result = m_Knots[i].first; + } else { result = (m_Knots[i].first + m_Knots[i + 1].first) / 2.0; } return true; @@ -392,72 +319,58 @@ bool CQuantileSketch::quantile(double percentage, double &result) const return true; } -const CQuantileSketch::TFloatFloatPrVec &CQuantileSketch::knots() const -{ +const CQuantileSketch::TFloatFloatPrVec& CQuantileSketch::knots() const { return m_Knots; } -double CQuantileSketch::count() const -{ +double CQuantileSketch::count() const { return m_Count; } -uint64_t CQuantileSketch::checksum(uint64_t seed) const -{ +uint64_t CQuantileSketch::checksum(uint64_t seed) const { seed = CChecksum::calculate(seed, m_MaxSize); seed = CChecksum::calculate(seed, m_Knots); return CChecksum::calculate(seed, m_Count); } -void CQuantileSketch::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CQuantileSketch::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CQuantileSketch"); core::CMemoryDebug::dynamicSize("m_Knots", m_Knots, mem); } -std::size_t CQuantileSketch::memoryUsage() const -{ +std::size_t CQuantileSketch::memoryUsage() const { return core::CMemory::dynamicSize(m_Knots); } -bool CQuantileSketch::checkInvariants() const -{ - if (m_Knots.size() > m_MaxSize) - { +bool CQuantileSketch::checkInvariants() const { + if (m_Knots.size() > m_MaxSize) { LOG_ERROR("Too many knots: " << m_Knots.size() << " > " << m_MaxSize); return false; } - if (m_Unsorted > m_Knots.size()) - { + if (m_Unsorted > m_Knots.size()) { LOG_ERROR("Invalid unsorted count: " << m_Unsorted << "/" << m_Knots.size()); return false; } - if (!boost::algorithm::is_sorted(m_Knots.begin(), m_Knots.end() - m_Unsorted)) - { - LOG_ERROR("Unordered knots: " << core::CContainerPrinter::print(m_Knots.begin(), - m_Knots.end() - m_Unsorted)); + if (!boost::algorithm::is_sorted(m_Knots.begin(), m_Knots.end() - m_Unsorted)) { + LOG_ERROR("Unordered knots: " << core::CContainerPrinter::print(m_Knots.begin(), m_Knots.end() - m_Unsorted)); return false; } double count = 0.0; - for (std::size_t i = 0u; i < m_Knots.size(); ++i) - { + for (std::size_t i = 0u; i < m_Knots.size(); ++i) { count += m_Knots[i].second; } - if (std::fabs(m_Count - count) > 10.0 * EPS * m_Count) - { + if (std::fabs(m_Count - count) > 10.0 * EPS * m_Count) { LOG_ERROR("Count mismatch: error " << std::fabs(m_Count - count) << "/" << m_Count); return false; } return true; } -std::string CQuantileSketch::print() const -{ +std::string CQuantileSketch::print() const { return core::CContainerPrinter::print(m_Knots); } -void CQuantileSketch::reduce() -{ +void CQuantileSketch::reduce() { using TSizeVec = std::vector; CPRNG::CXorOShiro128Plus rng(static_cast(m_Count)); @@ -466,15 +379,13 @@ void CQuantileSketch::reduce() std::size_t target = this->target(); this->orderAndDeduplicate(); - if (m_Knots.size() > target) - { + if (m_Knots.size() > target) { TDoubleDoublePrVec costs; TSizeVec indexing; costs.reserve(m_Knots.size()); indexing.reserve(m_Knots.size()); - for (std::size_t i = 1u; i+2 < m_Knots.size(); ++i) - { - costs.emplace_back(this->cost(m_Knots[i], m_Knots[i+1]), u01(rng)); + for (std::size_t i = 1u; i + 2 < m_Knots.size(); ++i) { + costs.emplace_back(this->cost(m_Knots[i], m_Knots[i + 1]), u01(rng)); indexing.push_back(i - 1); } LOG_TRACE("costs = " << core::CContainerPrinter::print(costs)); @@ -482,21 +393,18 @@ void CQuantileSketch::reduce() std::size_t merged = 0u; std::make_heap(indexing.begin(), indexing.end(), CIndexingGreater(costs)); - for (TSizeVec stale; m_Knots.size() > target + merged; /**/) - { + for (TSizeVec stale; m_Knots.size() > target + merged; /**/) { LOG_TRACE("indexing = " << core::CContainerPrinter::print(indexing)); std::size_t l = indexing[0] + 1; std::size_t r = (CUniqueIterator(m_Knots, l) + 1).index(); - LOG_TRACE("Considering merging " << l << " and " << r - << ", cost = " << costs[l - 1].first); + LOG_TRACE("Considering merging " << l << " and " << r << ", cost = " << costs[l - 1].first); std::pop_heap(indexing.begin(), indexing.end(), CIndexingGreater(costs)); indexing.pop_back(); LOG_TRACE("stale = " << core::CContainerPrinter::print(stale)); - if (std::find(stale.begin(), stale.end(), l) == stale.end()) - { + if (std::find(stale.begin(), stale.end(), l) == stale.end()) { LOG_TRACE("Merging"); double xl = m_Knots[l].first; @@ -510,8 +418,7 @@ void CQuantileSketch::reduce() std::size_t rr = (CUniqueIterator(m_Knots, r) + 1).index(); double xm = 0.0, nm = 0.0; - switch (m_Interpolation) - { + switch (m_Interpolation) { case E_Linear: xm = (nl * xl + nr * xr) / (nl + nr); nm = nl + nr; @@ -521,26 +428,21 @@ void CQuantileSketch::reduce() nm = nl + nr; break; } - for (std::size_t i = ll + 1; i < rr; ++i) - { - m_Knots[i].first = xm; + for (std::size_t i = ll + 1; i < rr; ++i) { + m_Knots[i].first = xm; m_Knots[i].second = nm; } LOG_TRACE("merged = " << core::CContainerPrinter::print(&m_Knots[ll + 1], &m_Knots[rr])); LOG_TRACE("right = " << core::CContainerPrinter::print(m_Knots[rr])); - if (ll > 0) - { + if (ll > 0) { stale.push_back(ll); } - if (rr < m_Knots.size() - 2) - { + if (rr < m_Knots.size() - 2) { stale.push_back(rr - 1); } ++merged; - } - else - { + } else { CUniqueIterator ll(m_Knots, l); costs[l - 1].first = this->cost(*(ll), *(ll + 1)); indexing.push_back(l - 1); @@ -554,10 +456,8 @@ void CQuantileSketch::reduce() } } -void CQuantileSketch::orderAndDeduplicate() -{ - if (m_Unsorted > 0) - { +void CQuantileSketch::orderAndDeduplicate() { + if (m_Unsorted > 0) { std::sort(m_Knots.end() - m_Unsorted, m_Knots.end()); std::inplace_merge(m_Knots.begin(), m_Knots.end() - m_Unsorted, m_Knots.end()); } @@ -565,13 +465,11 @@ void CQuantileSketch::orderAndDeduplicate() // Combine any duplicate points. std::size_t end = 0u; - for (std::size_t i = 1u; i <= m_Knots.size(); ++end, ++i) - { - TFloatFloatPr &knot = m_Knots[end]; - knot = m_Knots[i-1]; + for (std::size_t i = 1u; i <= m_Knots.size(); ++end, ++i) { + TFloatFloatPr& knot = m_Knots[end]; + knot = m_Knots[i - 1]; double x = knot.first; - for (/**/; i < m_Knots.size() && m_Knots[i].first == x; ++i) - { + for (/**/; i < m_Knots.size() && m_Knots[i].first == x; ++i) { knot.second += m_Knots[i].second; } } @@ -581,13 +479,11 @@ void CQuantileSketch::orderAndDeduplicate() m_Unsorted = 0; } -std::size_t CQuantileSketch::target() const -{ +std::size_t CQuantileSketch::target() const { return static_cast(0.9 * static_cast(m_MaxSize) + 1.0); } -double CQuantileSketch::cost(const TFloatFloatPr &vl, const TFloatFloatPr &vr) const -{ +double CQuantileSketch::cost(const TFloatFloatPr& vl, const TFloatFloatPr& vr) const { // Interestingly, minimizing the approximation error (area between // curve before and after merging) produces good summary for the // piecewise constant objective, but a very bad summary for the linear @@ -608,6 +504,5 @@ double CQuantileSketch::cost(const TFloatFloatPr &vl, const TFloatFloatPr &vr) c return (10.0 * std::min(nl, nr) / m_Count) * (10.0 * (xr - xl) / width); } - } } diff --git a/lib/maths/CRadialBasisFunction.cc b/lib/maths/CRadialBasisFunction.cc index d57b25b59b..22d031a85c 100644 --- a/lib/maths/CRadialBasisFunction.cc +++ b/lib/maths/CRadialBasisFunction.cc @@ -15,17 +15,13 @@ #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { -namespace -{ +namespace { //! Checks of the interval [\p a, \p b] contains the point \p x. -inline bool contains(double a, double b, double x) -{ +inline bool contains(double a, double b, double x) { return x >= a && x <= b; } @@ -33,136 +29,93 @@ inline bool contains(double a, double b, double x) //!
 //!   \f$\displaystyle \int_{-\infty}^x{(2\epsilon(u - c))^2e^{-2(\epsilon(u-c))^2}}du\f$
 //! 
-double gaussianSquareDerivative(double x, - double centre, - double scale) -{ +double gaussianSquareDerivative(double x, double centre, double scale) { double r = scale * (x - centre); - return scale * (boost::math::double_constants::root_two_pi - * boost::math::erf(boost::math::double_constants::root_two * r) - - 4.0 * r * std::exp(-2.0 * r * r)) / 4.0; + return scale * + (boost::math::double_constants::root_two_pi * boost::math::erf(boost::math::double_constants::root_two * r) - + 4.0 * r * std::exp(-2.0 * r * r)) / + 4.0; } //! The indefinite integral //!
 //!   \f$\displaystyle \int_{-\infty}^x{e^{-\epsilon_1(u - c_1))^2 - (\epsilon_2(u - c_2))^2}}du\f$
 //! 
-double gaussianProduct(double x, - double centre1, - double centre2, - double scale1, - double scale2) -{ +double gaussianProduct(double x, double centre1, double centre2, double scale1, double scale2) { double ss = scale1 + scale2; double sd = scale2 - scale1; double scale = std::sqrt((ss * ss + sd * sd) / 2.0); - double m = (scale1 * scale1 * centre1 - + scale2 * scale2 * centre2) / (scale * scale); + double m = (scale1 * scale1 * centre1 + scale2 * scale2 * centre2) / (scale * scale); double d = scale1 * scale2 * (centre2 - centre1); - return boost::math::double_constants::root_pi - * std::exp(-d * d / (scale * scale)) - * boost::math::erf(scale * (x - m)) - / (2.0 * scale); + return boost::math::double_constants::root_pi * std::exp(-d * d / (scale * scale)) * boost::math::erf(scale * (x - m)) / (2.0 * scale); } //! The indefinite integral //!
 //!   \f$\displaystyle \int_{-\infty}^x{\frac{(2\epsilon(u - c))^2}{(1+(\epsilon(u - c)^2))^2}}du\f$
 //! 
-double inverseQuadraticSquareDerivative(double x, - double centre, - double scale) -{ +double inverseQuadraticSquareDerivative(double x, double centre, double scale) { double r = scale * (x - centre); double d = (1.0 + r * r); - return scale * ( 3.0 * r / d - + 2.0 * r / (d * d) - - 8.0 * r / (d * d * d) - + 3.0 * std::atan(r)) / 12.0; + return scale * (3.0 * r / d + 2.0 * r / (d * d) - 8.0 * r / (d * d * d) + 3.0 * std::atan(r)) / 12.0; } //! The indefinite integral //!
 //!   \f$\displaystyle \int_{-\infty}^x{\frac{1}{(1+(\epsilon_1(u-c_1)^2)(1+(\epsilon_2(u-c_2)^2)}}du\f$
 //! 
-double inverseQuadraticProduct(double x, - double centre1, - double centre2, - double scale1, - double scale2) -{ +double inverseQuadraticProduct(double x, double centre1, double centre2, double scale1, double scale2) { double r1 = scale1 * (x - centre1); double r2 = scale2 * (x - centre2); double ss = scale1 + scale2; double sd = scale2 - scale1; double d = scale1 * scale2 * (centre2 - centre1); - if (sd == 0.0 && d == 0.0) - { + if (sd == 0.0 && d == 0.0) { return (r1 / (1.0 + r1 * r1) + std::atan(r1)) / (2.0 * scale1); } - if ((d * d) > 1.0) - { - return ( scale1 * scale2 / d * std::log((1.0 + r1 * r1) / (1.0 + r2 * r2)) - + scale1 * (1.0 - (ss * sd) / (d * d)) * std::atan(r1) - + scale2 * (1.0 + (ss * sd) / (d * d)) * std::atan(r2)) - / ((1.0 + (ss * ss) / (d * d)) * (d * d + sd * sd)); + if ((d * d) > 1.0) { + return (scale1 * scale2 / d * std::log((1.0 + r1 * r1) / (1.0 + r2 * r2)) + scale1 * (1.0 - (ss * sd) / (d * d)) * std::atan(r1) + + scale2 * (1.0 + (ss * sd) / (d * d)) * std::atan(r2)) / + ((1.0 + (ss * ss) / (d * d)) * (d * d + sd * sd)); } - return ( scale1 * scale2 * d * std::log((1.0 + r1 * r1) / (1.0 + r2 * r2)) - + (d * d - ss * sd) * scale1 * std::atan(r1) - + (d * d + ss * sd) * scale2 * std::atan(r2)) - / ((d * d + ss * ss) * (d * d + sd * sd)); + return (scale1 * scale2 * d * std::log((1.0 + r1 * r1) / (1.0 + r2 * r2)) + (d * d - ss * sd) * scale1 * std::atan(r1) + + (d * d + ss * sd) * scale2 * std::atan(r2)) / + ((d * d + ss * ss) * (d * d + sd * sd)); } - } -CRadialBasisFunction::~CRadialBasisFunction() -{ +CRadialBasisFunction::~CRadialBasisFunction() { } -CGaussianBasisFunction *CGaussianBasisFunction::clone() const -{ +CGaussianBasisFunction* CGaussianBasisFunction::clone() const { return new CGaussianBasisFunction(); } -double CGaussianBasisFunction::value(double x, - double centre, - double scale) const -{ +double CGaussianBasisFunction::value(double x, double centre, double scale) const { double r = x - centre; double y = scale * r; return std::exp(-y * y); } -double CGaussianBasisFunction::derivative(double x, - double centre, - double scale) const -{ +double CGaussianBasisFunction::derivative(double x, double centre, double scale) const { double r = x - centre; double y = scale * r; return -2.0 * scale * y * std::exp(-y * y); } -bool CGaussianBasisFunction::scale(double distance, - double value, - double &result) const -{ - if (value <= 0.0 || value >= 1.0) - { +bool CGaussianBasisFunction::scale(double distance, double value, double& result) const { + if (value <= 0.0 || value >= 1.0) { return false; } result = std::sqrt(-std::log(value)) / distance; return true; } -double CGaussianBasisFunction::mean(double a, - double b, - double centre, - double scale) const -{ +double CGaussianBasisFunction::mean(double a, double b, double centre, double scale) const { // The maximum function value is at the minimum of |x - c| // in the range [a,b] and the maximum is at the maximum of // |x - c|. Denoting these x+ and x-, respectively, we can @@ -181,21 +134,16 @@ double CGaussianBasisFunction::mean(double a, double fmin = this->value(centre < m ? b : a, centre, scale); double fmax = this->value(CTools::truncate(centre, a, b), centre, scale); - if (fmax - fmin <= 2.0 * EPS * fmin * (b - a)) - { + if (fmax - fmin <= 2.0 * EPS * fmin * (b - a)) { return (fmax + fmin) / 2.0; } - return std::max(boost::math::double_constants::root_pi / 2.0 / scale - * (boost::math::erf(scale * (b - centre)) - - boost::math::erf(scale * (a - centre))) / (b - a), 0.0); + return std::max(boost::math::double_constants::root_pi / 2.0 / scale * + (boost::math::erf(scale * (b - centre)) - boost::math::erf(scale * (a - centre))) / (b - a), + 0.0); } -double CGaussianBasisFunction::meanSquareDerivative(double a, - double b, - double centre, - double scale) const -{ +double CGaussianBasisFunction::meanSquareDerivative(double a, double b, double centre, double scale) const { // The maximum of the derivative function is at the point // c +/- 1 / sqrt(2) / s. To find the maximum and minimum // values of the derivative function x+ and x- we need to @@ -211,38 +159,25 @@ double CGaussianBasisFunction::meanSquareDerivative(double a, static const double EPS = std::numeric_limits::epsilon(); - double maxima[] = - { - centre - 1.0 / (boost::math::double_constants::root_two * scale), - centre + 1.0 / (boost::math::double_constants::root_two * scale) - }; + double maxima[] = {centre - 1.0 / (boost::math::double_constants::root_two * scale), + centre + 1.0 / (boost::math::double_constants::root_two * scale)}; double fa = this->derivative(a, centre, scale); double fb = this->derivative(b, centre, scale); double fmin = contains(a, b, centre) ? 0.0 : std::min(fa, fb); - double fmax = (contains(a, b, maxima[0]) || contains(a, b, maxima[1])) ? - this->derivative(maxima[0], centre, scale) : - std::max(fa, fb); + double fmax = (contains(a, b, maxima[0]) || contains(a, b, maxima[1])) ? this->derivative(maxima[0], centre, scale) : std::max(fa, fb); double smin = fmin * fmin; double smax = fmax * fmax; - if (smax - smin <= 2.0 * EPS * smin * (b - a)) - { + if (smax - smin <= 2.0 * EPS * smin * (b - a)) { return (smin + smax) / 2.0; } - return std::max((gaussianSquareDerivative(b, centre, scale) - - gaussianSquareDerivative(a, centre, scale)) / (b - a), 0.0); + return std::max((gaussianSquareDerivative(b, centre, scale) - gaussianSquareDerivative(a, centre, scale)) / (b - a), 0.0); } -double CGaussianBasisFunction::product(double a, - double b, - double centre1, - double centre2, - double scale1, - double scale2) const -{ +double CGaussianBasisFunction::product(double a, double b, double centre1, double centre2, double scale1, double scale2) const { // The maximum function value is at the minimum of |x - c| // in the range [a,b] and the maximum is at the maximum of // |x - c|. Denoting these x+ and x-, respectively, we can @@ -267,46 +202,32 @@ double CGaussianBasisFunction::product(double a, double pmin = f1min * f2min; double pmax = f1max * f2max; - if (pmax - pmin <= 2.0 * EPS * pmin * (b - a)) - { + if (pmax - pmin <= 2.0 * EPS * pmin * (b - a)) { return (pmin + pmax) / 2.0; } - return std::max((gaussianProduct(b, centre1, centre2, scale1, scale2) - - gaussianProduct(a, centre1, centre2, scale1, scale2)) / (b - a), 0.0); + return std::max((gaussianProduct(b, centre1, centre2, scale1, scale2) - gaussianProduct(a, centre1, centre2, scale1, scale2)) / (b - a), + 0.0); } - -CInverseQuadraticBasisFunction * -CInverseQuadraticBasisFunction::clone() const -{ +CInverseQuadraticBasisFunction* CInverseQuadraticBasisFunction::clone() const { return new CInverseQuadraticBasisFunction(); } -double CInverseQuadraticBasisFunction::value(double x, - double centre, - double scale) const -{ +double CInverseQuadraticBasisFunction::value(double x, double centre, double scale) const { double r = x - centre; double y = scale * r; return 1.0 / (1.0 + y * y); } -double CInverseQuadraticBasisFunction::derivative(double x, - double centre, - double scale) const -{ +double CInverseQuadraticBasisFunction::derivative(double x, double centre, double scale) const { double r = x - centre; double y = scale * r; double yy = (1.0 + y * y); return -2.0 * scale * y / yy / yy; } -double CInverseQuadraticBasisFunction::mean(double a, - double b, - double centre, - double scale) const -{ +double CInverseQuadraticBasisFunction::mean(double a, double b, double centre, double scale) const { // The maximum function value is at the minimum of |x - c| // in the range [a,b] and the maximum is at the maximum of // |x - c|. Denoting these x+ and x-, respectively, we can @@ -325,20 +246,14 @@ double CInverseQuadraticBasisFunction::mean(double a, double fmin = this->value(centre < m ? b : a, centre, scale); double fmax = this->value(CTools::truncate(centre, a, b), centre, scale); - if (fmax - fmin <= 2.0 * EPS * fmin * (b - a)) - { + if (fmax - fmin <= 2.0 * EPS * fmin * (b - a)) { return (fmax + fmin) / 2.0; } - return std::max((std::atan(scale * (b - centre)) - - std::atan(scale * (a - centre))) / scale / (b - a), 0.0); + return std::max((std::atan(scale * (b - centre)) - std::atan(scale * (a - centre))) / scale / (b - a), 0.0); } -double CInverseQuadraticBasisFunction::meanSquareDerivative(double a, - double b, - double centre, - double scale) const -{ +double CInverseQuadraticBasisFunction::meanSquareDerivative(double a, double b, double centre, double scale) const { // The maximum of the derivative function is at the point // c +/- 1 / sqrt(3) / s. To find the maximum and minimum // values of the derivative function x+ and x- we need to @@ -354,49 +269,34 @@ double CInverseQuadraticBasisFunction::meanSquareDerivative(double a, static const double EPS = std::numeric_limits::epsilon(); - double maxima[] = - { - centre - 1.0 / (boost::math::double_constants::root_three * scale), - centre + 1.0 / (boost::math::double_constants::root_three * scale) - }; + double maxima[] = {centre - 1.0 / (boost::math::double_constants::root_three * scale), + centre + 1.0 / (boost::math::double_constants::root_three * scale)}; double fa = this->derivative(a, centre, scale); double fb = this->derivative(b, centre, scale); double fmin = contains(a, b, centre) ? 0.0 : std::min(fa, fb); - double fmax = (contains(a, b, maxima[0]) || contains(a, b, maxima[1])) ? - this->derivative(maxima[0], centre, scale) : std::max(fa, fb); + double fmax = (contains(a, b, maxima[0]) || contains(a, b, maxima[1])) ? this->derivative(maxima[0], centre, scale) : std::max(fa, fb); double smin = fmin * fmin; double smax = fmax * fmax; - if (smax - smin <= 2.0 * EPS * smin * (b - a)) - { + if (smax - smin <= 2.0 * EPS * smin * (b - a)) { return (smin + smax) / 2.0; } - return std::max((inverseQuadraticSquareDerivative(b, centre, scale) - - inverseQuadraticSquareDerivative(a, centre, scale)) / (b - a), 0.0); + return std::max((inverseQuadraticSquareDerivative(b, centre, scale) - inverseQuadraticSquareDerivative(a, centre, scale)) / (b - a), + 0.0); } -bool CInverseQuadraticBasisFunction::scale(double distance, - double value, - double &result) const -{ - if (value <= 0.0 || value >= 1.0) - { +bool CInverseQuadraticBasisFunction::scale(double distance, double value, double& result) const { + if (value <= 0.0 || value >= 1.0) { return false; } result = std::sqrt((1.0 - value) / value) / distance; return true; } -double CInverseQuadraticBasisFunction::product(double a, - double b, - double centre1, - double centre2, - double scale1, - double scale2) const -{ +double CInverseQuadraticBasisFunction::product(double a, double b, double centre1, double centre2, double scale1, double scale2) const { // The maximum function value is at the minimum of |x - c| // in the range [a,b] and the maximum is at the maximum of // |x - c|. Denoting these x+ and x-, respectively, we can @@ -421,14 +321,14 @@ double CInverseQuadraticBasisFunction::product(double a, double pmin = f1min * f2min; double pmax = f1max * f2max; - if (pmax - pmin <= 2.0 * EPS * pmin * (b - a)) - { + if (pmax - pmin <= 2.0 * EPS * pmin * (b - a)) { return (pmin + pmax) / 2.0; } - return std::max((inverseQuadraticProduct(b, centre1, centre2, scale1, scale2) - - inverseQuadraticProduct(a, centre1, centre2, scale1, scale2)) / (b - a), 0.0); + return std::max( + (inverseQuadraticProduct(b, centre1, centre2, scale1, scale2) - inverseQuadraticProduct(a, centre1, centre2, scale1, scale2)) / + (b - a), + 0.0); } - } } diff --git a/lib/maths/CRegression.cc b/lib/maths/CRegression.cc index 0aeaae43e9..c1998593c5 100644 --- a/lib/maths/CRegression.cc +++ b/lib/maths/CRegression.cc @@ -6,12 +6,9 @@ #include -namespace ml -{ -namespace maths -{ -namespace regression_detail -{ +namespace ml { +namespace maths { +namespace regression_detail { const double CMaxCondition::VALUE = 1e7; } } diff --git a/lib/maths/CRestoreParams.cc b/lib/maths/CRestoreParams.cc index b1d656ce4a..25776c9fa6 100644 --- a/lib/maths/CRestoreParams.cc +++ b/lib/maths/CRestoreParams.cc @@ -8,49 +8,44 @@ #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { SDistributionRestoreParams::SDistributionRestoreParams(maths_t::EDataType dataType, double decayRate, double minimumClusterFraction, double minimumClusterCount, - double minimumCategoryCount) : - s_DataType{dataType}, - s_DecayRate{decayRate}, - s_MinimumClusterFraction{minimumClusterFraction}, - s_MinimumClusterCount{minimumClusterCount}, - s_MinimumCategoryCount{minimumCategoryCount} -{} + double minimumCategoryCount) + : s_DataType{dataType}, + s_DecayRate{decayRate}, + s_MinimumClusterFraction{minimumClusterFraction}, + s_MinimumClusterCount{minimumClusterCount}, + s_MinimumCategoryCount{minimumCategoryCount} { +} STimeSeriesDecompositionRestoreParams::STimeSeriesDecompositionRestoreParams(double decayRate, core_t::TTime minimumBucketLength, std::size_t componentSize, - const SDistributionRestoreParams &changeModelParams) : - s_DecayRate{decayRate}, - s_MinimumBucketLength{minimumBucketLength}, - s_ComponentSize{componentSize}, - s_ChangeModelParams{changeModelParams} -{} + const SDistributionRestoreParams& changeModelParams) + : s_DecayRate{decayRate}, + s_MinimumBucketLength{minimumBucketLength}, + s_ComponentSize{componentSize}, + s_ChangeModelParams{changeModelParams} { +} STimeSeriesDecompositionRestoreParams::STimeSeriesDecompositionRestoreParams(double decayRate, core_t::TTime minimumBucketLength, - const SDistributionRestoreParams &changeModelParams) : - s_DecayRate{decayRate}, - s_MinimumBucketLength{minimumBucketLength}, - s_ComponentSize{DECOMPOSITION_COMPONENT_SIZE}, - s_ChangeModelParams{changeModelParams} -{} - -SModelRestoreParams::SModelRestoreParams(const CModelParams ¶ms, - const STimeSeriesDecompositionRestoreParams &decompositionParams, - const SDistributionRestoreParams &distributionParams) : - s_Params{params}, - s_DecompositionParams{decompositionParams}, - s_DistributionParams{distributionParams} -{} + const SDistributionRestoreParams& changeModelParams) + : s_DecayRate{decayRate}, + s_MinimumBucketLength{minimumBucketLength}, + s_ComponentSize{DECOMPOSITION_COMPONENT_SIZE}, + s_ChangeModelParams{changeModelParams} { +} +SModelRestoreParams::SModelRestoreParams(const CModelParams& params, + const STimeSeriesDecompositionRestoreParams& decompositionParams, + const SDistributionRestoreParams& distributionParams) + : s_Params{params}, s_DecompositionParams{decompositionParams}, s_DistributionParams{distributionParams} { +} } } diff --git a/lib/maths/CSampling.cc b/lib/maths/CSampling.cc index d0e48928e6..f9dd64e164 100644 --- a/lib/maths/CSampling.cc +++ b/lib/maths/CSampling.cc @@ -8,17 +8,17 @@ #include #include +#include #include #include -#include #include #include #include #include -#include #include +#include #include #include #include @@ -33,13 +33,10 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { -namespace -{ +namespace { using TDoubleVec = std::vector; using TDoubleVecVec = std::vector; @@ -47,16 +44,14 @@ using TSizeVec = std::vector; //! Defines the appropriate integer random number generator. template -struct SRng -{ +struct SRng { using Type = boost::random::uniform_int_distribution; static INTEGER min(INTEGER a) { return a; } static INTEGER max(INTEGER b) { return b - 1; } }; //! Specialization for a real uniform random number generator. template<> -struct SRng -{ +struct SRng { using Type = boost::random::uniform_real_distribution; static double min(double a) { return a; } static double max(double b) { return b; } @@ -64,31 +59,26 @@ struct SRng //! Implementation of uniform sampling. template -TYPE doUniformSample(RNG &rng, TYPE a, TYPE b) -{ +TYPE doUniformSample(RNG& rng, TYPE a, TYPE b) { typename SRng::Type uniform(SRng::min(a), SRng::max(b)); return uniform(rng); } //! Implementation of uniform sampling. template -void doUniformSample(RNG &rng, TYPE a, TYPE b, std::size_t n, std::vector &result) -{ +void doUniformSample(RNG& rng, TYPE a, TYPE b, std::size_t n, std::vector& result) { result.clear(); result.reserve(n); typename SRng::Type uniform(SRng::min(a), SRng::max(b)); - for (std::size_t i = 0u; i < n; ++i) - { + for (std::size_t i = 0u; i < n; ++i) { result.push_back(uniform(rng)); } } //! Implementation of normal sampling. template -double doNormalSample(RNG &rng, double mean, double variance) -{ - if (variance < 0.0) - { +double doNormalSample(RNG& rng, double mean, double variance) { + if (variance < 0.0) { LOG_ERROR("Invalid variance " << variance); return mean; } @@ -98,174 +88,132 @@ double doNormalSample(RNG &rng, double mean, double variance) //! Implementation of normal sampling. template -void doNormalSample(RNG &rng, double mean, double variance, std::size_t n, TDoubleVec &result) -{ +void doNormalSample(RNG& rng, double mean, double variance, std::size_t n, TDoubleVec& result) { result.clear(); - if (variance < 0.0) - { + if (variance < 0.0) { LOG_ERROR("Invalid variance " << variance); return; - } - else if (variance == 0.0) - { + } else if (variance == 0.0) { result.resize(n, mean); } result.reserve(n); boost::random::normal_distribution normal(mean, std::sqrt(variance)); - for (std::size_t i = 0u; i < n; ++i) - { + for (std::size_t i = 0u; i < n; ++i) { result.push_back(normal(rng)); } } //! Implementation of chi^2 sampling. template -void doChiSquaredSample(RNG &rng, double f, std::size_t n, TDoubleVec &result) -{ +void doChiSquaredSample(RNG& rng, double f, std::size_t n, TDoubleVec& result) { result.clear(); result.reserve(n); boost::random::chi_squared_distribution chi2(f); - for (std::size_t i = 0u; i < n; ++i) - { + for (std::size_t i = 0u; i < n; ++i) { result.push_back(chi2(rng)); } } //! Implementation of categorical sampling. template -std::size_t doCategoricalSample(RNG &rng, TDoubleVec &probabilities) -{ +std::size_t doCategoricalSample(RNG& rng, TDoubleVec& probabilities) { // We use inverse transform sampling to generate the categorical // samples from a random samples on [0,1]. std::size_t p = probabilities.size(); // Construct the transform function. - for (std::size_t i = 1u; i < p; ++i) - { + for (std::size_t i = 1u; i < p; ++i) { probabilities[i] += probabilities[i - 1]; } double uniform0X; - if (probabilities[p - 1] == 0.0) - { + if (probabilities[p - 1] == 0.0) { return doUniformSample(rng, std::size_t(0), p); - } - else - { + } else { boost::random::uniform_real_distribution<> uniform(0.0, probabilities[p - 1]); uniform0X = uniform(rng); } - return std::min(static_cast( - std::lower_bound(probabilities.begin(), - probabilities.end(), - uniform0X) - probabilities.begin()), - probabilities.size() - 1); + return std::min( + static_cast(std::lower_bound(probabilities.begin(), probabilities.end(), uniform0X) - probabilities.begin()), + probabilities.size() - 1); } //! Implementation of categorical sampling with replacement. template -void doCategoricalSampleWithReplacement(RNG &rng, - TDoubleVec &probabilities, - std::size_t n, - TSizeVec &result) -{ +void doCategoricalSampleWithReplacement(RNG& rng, TDoubleVec& probabilities, std::size_t n, TSizeVec& result) { // We use inverse transform sampling to generate the categorical // samples from random samples on [0,1]. result.clear(); - if (n == 0) - { + if (n == 0) { return; } std::size_t p = probabilities.size(); // Construct the transform function. - for (std::size_t i = 1u; i < p; ++i) - { + for (std::size_t i = 1u; i < p; ++i) { probabilities[i] += probabilities[i - 1]; } - if (probabilities[p - 1] == 0.0) - { + if (probabilities[p - 1] == 0.0) { doUniformSample(rng, std::size_t(0), p, n, result); - } - else - { + } else { result.reserve(n); boost::random::uniform_real_distribution<> uniform(0.0, probabilities[p - 1]); - for (std::size_t i = 0u; i < n; ++i) - { + for (std::size_t i = 0u; i < n; ++i) { double uniform0X = uniform(rng); - result.push_back(std::min(static_cast( - std::lower_bound(probabilities.begin(), - probabilities.end(), - uniform0X) - probabilities.begin()), - probabilities.size() - 1)); + result.push_back(std::min( + static_cast(std::lower_bound(probabilities.begin(), probabilities.end(), uniform0X) - probabilities.begin()), + probabilities.size() - 1)); } } } //! Implementation of categorical sampling without replacement. template -void doCategoricalSampleWithoutReplacement(RNG &rng, - TDoubleVec &probabilities, - std::size_t n, - TSizeVec &result) -{ +void doCategoricalSampleWithoutReplacement(RNG& rng, TDoubleVec& probabilities, std::size_t n, TSizeVec& result) { // We use inverse transform sampling to generate the categorical // samples from random samples on [0,1] and update the probabilities // throughout the sampling to exclude the values already taken. result.clear(); - if (n == 0) - { + if (n == 0) { return; } std::size_t p = probabilities.size(); - if (n >= p) - { - result.assign(boost::counting_iterator(0), - boost::counting_iterator(p)); + if (n >= p) { + result.assign(boost::counting_iterator(0), boost::counting_iterator(p)); } // Construct the transform function. - for (std::size_t i = 1u; i < p; ++i) - { + for (std::size_t i = 1u; i < p; ++i) { probabilities[i] += probabilities[i - 1]; } result.reserve(n); - TSizeVec indices(boost::counting_iterator(0), - boost::counting_iterator(p)); + TSizeVec indices(boost::counting_iterator(0), boost::counting_iterator(p)); TSizeVec s(1); - for (std::size_t i = 0u; i < n; ++i, --p) - { - if (probabilities[p - 1] <= 0.0) - { + for (std::size_t i = 0u; i < n; ++i, --p) { + if (probabilities[p - 1] <= 0.0) { doUniformSample(rng, std::size_t(0), indices.size(), 1, s); result.push_back(indices[s[0]]); - } - else - { + } else { boost::random::uniform_real_distribution<> uniform(0.0, probabilities[p - 1]); double uniform0X = uniform(rng); - s[0] = std::min(static_cast( - std::lower_bound(probabilities.begin(), - probabilities.end(), - uniform0X) - probabilities.begin()), - probabilities.size() - 1); + s[0] = std::min( + static_cast(std::lower_bound(probabilities.begin(), probabilities.end(), uniform0X) - probabilities.begin()), + probabilities.size() - 1); result.push_back(indices[s[0]]); double ps = probabilities[s[0]] - (s[0] == 0 ? 0.0 : probabilities[s[0] - 1]); - for (std::size_t j = s[0] + 1; j < p; ++j) - { + for (std::size_t j = s[0] + 1; j < p; ++j) { probabilities[j - 1] = probabilities[j] - ps; } probabilities.pop_back(); @@ -276,26 +224,17 @@ void doCategoricalSampleWithoutReplacement(RNG &rng, //! Implementation of multivariate normal sampling. template -bool doMultivariateNormalSample(RNG &rng, - const TDoubleVec &mean, - const TDoubleVecVec &covariance, - std::size_t n, - TDoubleVecVec &samples) -{ +bool doMultivariateNormalSample(RNG& rng, const TDoubleVec& mean, const TDoubleVecVec& covariance, std::size_t n, TDoubleVecVec& samples) { using TJacobiSvd = Eigen::JacobiSVD>; - if (mean.size() != covariance.size()) - { - LOG_ERROR("Incompatible mean and covariance: " - << core::CContainerPrinter::print(mean) - << ", " - << core::CContainerPrinter::print(covariance)); + if (mean.size() != covariance.size()) { + LOG_ERROR("Incompatible mean and covariance: " << core::CContainerPrinter::print(mean) << ", " + << core::CContainerPrinter::print(covariance)); return false; } samples.clear(); - if (n == 0) - { + if (n == 0) { return true; } @@ -319,18 +258,14 @@ bool doMultivariateNormalSample(RNG &rng, LOG_TRACE("Dimension = " << d); LOG_TRACE("mean = " << core::CContainerPrinter::print(mean)); - CDenseMatrix C(d,d); - for (std::size_t i = 0u; i < d; ++i) - { + CDenseMatrix C(d, d); + for (std::size_t i = 0u; i < d; ++i) { C(i, i) = covariance[i][i]; - if (covariance[i].size() < d - i) - { - LOG_ERROR("Bad covariance matrix: " - << core::CContainerPrinter::print(covariance)); + if (covariance[i].size() < d - i) { + LOG_ERROR("Bad covariance matrix: " << core::CContainerPrinter::print(covariance)); return false; } - for (std::size_t j = 0; j < i; ++j) - { + for (std::size_t j = 0; j < i; ++j) { C(i, j) = covariance[i][j]; C(j, i) = covariance[i][j]; } @@ -341,12 +276,11 @@ bool doMultivariateNormalSample(RNG &rng, // Get the singular values, these are the variances of the normals // to sample. - const CDenseVector &S = svd.singularValues(); - const CDenseMatrix &U = svd.matrixU(); + const CDenseVector& S = svd.singularValues(); + const CDenseMatrix& U = svd.matrixU(); TDoubleVec stddevs; stddevs.reserve(d); - for (std::size_t i = 0u; i < d; ++i) - { + for (std::size_t i = 0u; i < d; ++i) { stddevs.push_back(std::sqrt(std::max(S(i), 0.0))); } LOG_TRACE("Singular values of C = " << S.transpose()); @@ -357,23 +291,17 @@ bool doMultivariateNormalSample(RNG &rng, { samples.resize(n, mean); CDenseVector sample(d); - for (std::size_t i = 0u; i < n; ++i) - { - for (std::size_t j = 0u; j < d; ++j) - { - if (stddevs[j] == 0.0) - { + for (std::size_t i = 0u; i < n; ++i) { + for (std::size_t j = 0u; j < d; ++j) { + if (stddevs[j] == 0.0) { sample(j) = 0.0; - } - else - { + } else { boost::random::normal_distribution<> normal(0.0, stddevs[j]); sample(j) = normal(rng); } } sample = U * sample; - for (std::size_t j = 0u; j < d; ++j) - { + for (std::size_t j = 0u; j < d; ++j) { samples[i][j] += sample(j); } } @@ -384,19 +312,17 @@ bool doMultivariateNormalSample(RNG &rng, //! Implementation of multivariate normal sampling. template -void doMultivariateNormalSample(RNG &rng, - const CVectorNx1 &mean, - const CSymmetricMatrixNxN &covariance, +void doMultivariateNormalSample(RNG& rng, + const CVectorNx1& mean, + const CSymmetricMatrixNxN& covariance, std::size_t n, - std::vector> &samples) -{ + std::vector>& samples) { using TDenseVector = typename SDenseVector>::Type; using TDenseMatrix = typename SDenseMatrix>::Type; using TJacobiSvd = Eigen::JacobiSVD; samples.clear(); - if (n == 0) - { + if (n == 0) { return; } @@ -406,27 +332,21 @@ void doMultivariateNormalSample(RNG &rng, // Get the singular values, these are the variances of the normals // to sample. - const TDenseVector &S = svd.singularValues(); - const TDenseMatrix &U = svd.matrixU(); + const TDenseVector& S = svd.singularValues(); + const TDenseMatrix& U = svd.matrixU(); T stddevs[N] = {}; - for (std::size_t i = 0u; i < N; ++i) - { + for (std::size_t i = 0u; i < N; ++i) { stddevs[i] = std::sqrt(std::max(S(i), 0.0)); } { samples.resize(n, mean); TDenseVector sample(N); - for (std::size_t i = 0u; i < n; ++i) - { - for (std::size_t j = 0u; j < N; ++j) - { - if (stddevs[j] == 0.0) - { + for (std::size_t i = 0u; i < n; ++i) { + for (std::size_t j = 0u; j < N; ++j) { + if (stddevs[j] == 0.0) { sample(j) = 0.0; - } - else - { + } else { boost::random::normal_distribution<> normal(0.0, stddevs[j]); sample(j) = normal(rng); } @@ -439,14 +359,12 @@ void doMultivariateNormalSample(RNG &rng, //! Implementation of distribution quantile sampling. template -void sampleQuantiles(const DISTRIBUTION &distribution, std::size_t n, TDoubleVec &result) -{ +void sampleQuantiles(const DISTRIBUTION& distribution, std::size_t n, TDoubleVec& result) { CTools::SIntervalExpectation expectation; double dq = 1.0 / static_cast(n); double a = boost::numeric::bounds::lowest(); - for (std::size_t i = 1u; i < n; ++i) - { + for (std::size_t i = 1u; i < n; ++i) { double q = static_cast(i) * dq; double b = boost::math::quantile(distribution, q); result.push_back(expectation(distribution, a, b)); @@ -457,19 +375,15 @@ void sampleQuantiles(const DISTRIBUTION &distribution, std::size_t n, TDoubleVec } static const std::string RNG_TAG("a"); - } -bool CSampling::staticsAcceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ +bool CSampling::staticsAcceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { // Note we require that we only ever do one persistence per process. - do - { - const std::string &name = traverser.name(); + do { + const std::string& name = traverser.name(); - if (name == RNG_TAG) - { + if (name == RNG_TAG) { std::string value(traverser.value()); // See acceptPersistInserter std::replace(value.begin(), value.end(), '_', ' '); @@ -477,14 +391,12 @@ bool CSampling::staticsAcceptRestoreTraverser(core::CStateRestoreTraverser &trav core::CScopedFastLock scopedLock(ms_Lock); ss >> ms_Rng; } - } - while (traverser.next()); + } while (traverser.next()); return true; } -void CSampling::staticsAcceptPersistInserter(core::CStatePersistInserter &inserter) -{ +void CSampling::staticsAcceptPersistInserter(core::CStatePersistInserter& inserter) { // Note we require that we only ever do one persistence per process. std::ostringstream ss; @@ -499,242 +411,176 @@ void CSampling::staticsAcceptPersistInserter(core::CStatePersistInserter &insert inserter.insertValue(RNG_TAG, rng); } -void CSampling::seed() -{ +void CSampling::seed() { core::CScopedFastLock scopedLock(ms_Lock); ms_Rng.seed(); } -#define UNIFORM_SAMPLE(TYPE) \ -TYPE CSampling::uniformSample(TYPE a, TYPE b) \ -{ \ - core::CScopedFastLock scopedLock(ms_Lock); \ - return doUniformSample(ms_Rng, a, b); \ -} \ -TYPE CSampling::uniformSample(CPRNG::CXorOShiro128Plus &rng, TYPE a, TYPE b) \ -{ \ - return doUniformSample(rng, a, b); \ -} \ -TYPE CSampling::uniformSample(CPRNG::CXorShift1024Mult &rng, TYPE a, TYPE b) \ -{ \ - return doUniformSample(rng, a, b); \ -} \ -void CSampling::uniformSample(TYPE a, TYPE b, std::size_t n, \ - std::vector &result) \ -{ \ - core::CScopedFastLock scopedLock(ms_Lock); \ - doUniformSample(ms_Rng, a, b, n, result); \ -} \ -void CSampling::uniformSample(CPRNG::CXorOShiro128Plus &rng, \ - TYPE a, TYPE b, std::size_t n, \ - std::vector &result) \ -{ \ - doUniformSample(rng, a, b, n, result); \ -} \ -void CSampling::uniformSample(CPRNG::CXorShift1024Mult &rng, \ - TYPE a, TYPE b, std::size_t n, \ - std::vector &result) \ -{ \ - doUniformSample(rng, a, b, n, result); \ -} +#define UNIFORM_SAMPLE(TYPE) \ + TYPE CSampling::uniformSample(TYPE a, TYPE b) { \ + core::CScopedFastLock scopedLock(ms_Lock); \ + return doUniformSample(ms_Rng, a, b); \ + } \ + TYPE CSampling::uniformSample(CPRNG::CXorOShiro128Plus& rng, TYPE a, TYPE b) { return doUniformSample(rng, a, b); } \ + TYPE CSampling::uniformSample(CPRNG::CXorShift1024Mult& rng, TYPE a, TYPE b) { return doUniformSample(rng, a, b); } \ + void CSampling::uniformSample(TYPE a, TYPE b, std::size_t n, std::vector& result) { \ + core::CScopedFastLock scopedLock(ms_Lock); \ + doUniformSample(ms_Rng, a, b, n, result); \ + } \ + void CSampling::uniformSample(CPRNG::CXorOShiro128Plus& rng, TYPE a, TYPE b, std::size_t n, std::vector& result) { \ + doUniformSample(rng, a, b, n, result); \ + } \ + void CSampling::uniformSample(CPRNG::CXorShift1024Mult& rng, TYPE a, TYPE b, std::size_t n, std::vector& result) { \ + doUniformSample(rng, a, b, n, result); \ + } UNIFORM_SAMPLE(std::size_t) UNIFORM_SAMPLE(std::ptrdiff_t) UNIFORM_SAMPLE(double) #undef UNIFORM_SAMPLE -double CSampling::normalSample(double mean, double variance) -{ +double CSampling::normalSample(double mean, double variance) { core::CScopedFastLock scopedLock(ms_Lock); return doNormalSample(ms_Rng, mean, variance); } -double CSampling::normalSample(CPRNG::CXorOShiro128Plus &rng, double mean, double variance) -{ +double CSampling::normalSample(CPRNG::CXorOShiro128Plus& rng, double mean, double variance) { return doNormalSample(rng, mean, variance); } -double CSampling::normalSample(CPRNG::CXorShift1024Mult &rng, double mean, double variance) -{ +double CSampling::normalSample(CPRNG::CXorShift1024Mult& rng, double mean, double variance) { return doNormalSample(rng, mean, variance); } -void CSampling::normalSample(double mean, double variance, std::size_t n, TDoubleVec &result) -{ +void CSampling::normalSample(double mean, double variance, std::size_t n, TDoubleVec& result) { core::CScopedFastLock scopedLock(ms_Lock); doNormalSample(ms_Rng, mean, variance, n, result); } -void CSampling::normalSample(CPRNG::CXorOShiro128Plus &rng, - double mean, - double variance, - std::size_t n, - TDoubleVec &result) -{ +void CSampling::normalSample(CPRNG::CXorOShiro128Plus& rng, double mean, double variance, std::size_t n, TDoubleVec& result) { doNormalSample(rng, mean, variance, n, result); } -void CSampling::normalSample(CPRNG::CXorShift1024Mult &rng, - double mean, - double variance, - std::size_t n, - TDoubleVec &result) -{ +void CSampling::normalSample(CPRNG::CXorShift1024Mult& rng, double mean, double variance, std::size_t n, TDoubleVec& result) { doNormalSample(rng, mean, variance, n, result); } -void CSampling::chiSquaredSample(double f, std::size_t n, TDoubleVec &result) -{ +void CSampling::chiSquaredSample(double f, std::size_t n, TDoubleVec& result) { core::CScopedFastLock scopedLock(ms_Lock); doChiSquaredSample(ms_Rng, f, n, result); } -void CSampling::chiSquaredSample(CPRNG::CXorOShiro128Plus &rng, - double f, - std::size_t n, - TDoubleVec &result) -{ +void CSampling::chiSquaredSample(CPRNG::CXorOShiro128Plus& rng, double f, std::size_t n, TDoubleVec& result) { doChiSquaredSample(rng, f, n, result); } -void CSampling::chiSquaredSample(CPRNG::CXorShift1024Mult &rng, - double f, - std::size_t n, - TDoubleVec &result) -{ +void CSampling::chiSquaredSample(CPRNG::CXorShift1024Mult& rng, double f, std::size_t n, TDoubleVec& result) { doChiSquaredSample(rng, f, n, result); } -bool CSampling::multivariateNormalSample(const TDoubleVec &mean, - const TDoubleVecVec &covariance, - std::size_t n, - TDoubleVecVec &samples) -{ +bool CSampling::multivariateNormalSample(const TDoubleVec& mean, const TDoubleVecVec& covariance, std::size_t n, TDoubleVecVec& samples) { core::CScopedFastLock scopedLock(ms_Lock); return doMultivariateNormalSample(ms_Rng, mean, covariance, n, samples); } -bool CSampling::multivariateNormalSample(CPRNG::CXorOShiro128Plus &rng, - const TDoubleVec &mean, - const TDoubleVecVec &covariance, +bool CSampling::multivariateNormalSample(CPRNG::CXorOShiro128Plus& rng, + const TDoubleVec& mean, + const TDoubleVecVec& covariance, std::size_t n, - TDoubleVecVec &samples) -{ + TDoubleVecVec& samples) { return doMultivariateNormalSample(rng, mean, covariance, n, samples); } -bool CSampling::multivariateNormalSample(CPRNG::CXorShift1024Mult &rng, - const TDoubleVec &mean, - const TDoubleVecVec &covariance, +bool CSampling::multivariateNormalSample(CPRNG::CXorShift1024Mult& rng, + const TDoubleVec& mean, + const TDoubleVecVec& covariance, std::size_t n, - TDoubleVecVec &samples) -{ + TDoubleVecVec& samples) { return doMultivariateNormalSample(rng, mean, covariance, n, samples); } -#define MULTIVARIATE_NORMAL_SAMPLE(N) \ -void CSampling::multivariateNormalSample(const CVectorNx1 &mean, \ - const CSymmetricMatrixNxN &covariance, \ - std::size_t n, \ - std::vector > &samples) \ -{ \ - core::CScopedFastLock scopedLock(ms_Lock); \ - doMultivariateNormalSample(ms_Rng, mean, covariance, n, samples); \ -} \ -void CSampling::multivariateNormalSample(CPRNG::CXorOShiro128Plus &rng, \ - const CVectorNx1 &mean, \ - const CSymmetricMatrixNxN &covariance, \ - std::size_t n, \ - std::vector > &samples) \ -{ \ - doMultivariateNormalSample(rng, mean, covariance, n, samples); \ -} \ -void CSampling::multivariateNormalSample(CPRNG::CXorShift1024Mult &rng, \ - const CVectorNx1 &mean, \ - const CSymmetricMatrixNxN &covariance, \ - std::size_t n, \ - std::vector > &samples) \ -{ \ - doMultivariateNormalSample(rng, mean, covariance, n, samples); \ -} +#define MULTIVARIATE_NORMAL_SAMPLE(N) \ + void CSampling::multivariateNormalSample(const CVectorNx1& mean, \ + const CSymmetricMatrixNxN& covariance, \ + std::size_t n, \ + std::vector>& samples) { \ + core::CScopedFastLock scopedLock(ms_Lock); \ + doMultivariateNormalSample(ms_Rng, mean, covariance, n, samples); \ + } \ + void CSampling::multivariateNormalSample(CPRNG::CXorOShiro128Plus& rng, \ + const CVectorNx1& mean, \ + const CSymmetricMatrixNxN& covariance, \ + std::size_t n, \ + std::vector>& samples) { \ + doMultivariateNormalSample(rng, mean, covariance, n, samples); \ + } \ + void CSampling::multivariateNormalSample(CPRNG::CXorShift1024Mult& rng, \ + const CVectorNx1& mean, \ + const CSymmetricMatrixNxN& covariance, \ + std::size_t n, \ + std::vector>& samples) { \ + doMultivariateNormalSample(rng, mean, covariance, n, samples); \ + } MULTIVARIATE_NORMAL_SAMPLE(2) MULTIVARIATE_NORMAL_SAMPLE(3) MULTIVARIATE_NORMAL_SAMPLE(4) MULTIVARIATE_NORMAL_SAMPLE(5) #undef MULTIVARIATE_NORMAL_SAMPLE -std::size_t CSampling::categoricalSample(TDoubleVec &probabilities) -{ +std::size_t CSampling::categoricalSample(TDoubleVec& probabilities) { core::CScopedFastLock scopedLock(ms_Lock); return doCategoricalSample(ms_Rng, probabilities); } -std::size_t CSampling::categoricalSample(CPRNG::CXorOShiro128Plus &rng, TDoubleVec &probabilities) -{ +std::size_t CSampling::categoricalSample(CPRNG::CXorOShiro128Plus& rng, TDoubleVec& probabilities) { return doCategoricalSample(rng, probabilities); } -std::size_t CSampling::categoricalSample(CPRNG::CXorShift1024Mult &rng, TDoubleVec &probabilities) -{ +std::size_t CSampling::categoricalSample(CPRNG::CXorShift1024Mult& rng, TDoubleVec& probabilities) { return doCategoricalSample(rng, probabilities); } -void CSampling::categoricalSampleWithReplacement(TDoubleVec &probabilities, - std::size_t n, - TSizeVec &result) -{ +void CSampling::categoricalSampleWithReplacement(TDoubleVec& probabilities, std::size_t n, TSizeVec& result) { core::CScopedFastLock scopedLock(ms_Lock); doCategoricalSampleWithReplacement(ms_Rng, probabilities, n, result); } -void CSampling::categoricalSampleWithReplacement(CPRNG::CXorOShiro128Plus &rng, - TDoubleVec &probabilities, +void CSampling::categoricalSampleWithReplacement(CPRNG::CXorOShiro128Plus& rng, + TDoubleVec& probabilities, std::size_t n, - TSizeVec &result) -{ + TSizeVec& result) { doCategoricalSampleWithReplacement(rng, probabilities, n, result); } -void CSampling::categoricalSampleWithReplacement(CPRNG::CXorShift1024Mult &rng, - TDoubleVec &probabilities, +void CSampling::categoricalSampleWithReplacement(CPRNG::CXorShift1024Mult& rng, + TDoubleVec& probabilities, std::size_t n, - TSizeVec &result) -{ + TSizeVec& result) { doCategoricalSampleWithReplacement(rng, probabilities, n, result); } -void CSampling::categoricalSampleWithoutReplacement(TDoubleVec &probabilities, - std::size_t n, - TSizeVec &result) -{ +void CSampling::categoricalSampleWithoutReplacement(TDoubleVec& probabilities, std::size_t n, TSizeVec& result) { core::CScopedFastLock scopedLock(ms_Lock); doCategoricalSampleWithoutReplacement(ms_Rng, probabilities, n, result); } -void CSampling::categoricalSampleWithoutReplacement(CPRNG::CXorOShiro128Plus &rng, - TDoubleVec &probabilities, +void CSampling::categoricalSampleWithoutReplacement(CPRNG::CXorOShiro128Plus& rng, + TDoubleVec& probabilities, std::size_t n, - TSizeVec &result) -{ + TSizeVec& result) { doCategoricalSampleWithReplacement(rng, probabilities, n, result); } -void CSampling::categoricalSampleWithoutReplacement(CPRNG::CXorShift1024Mult &rng, - TDoubleVec &probabilities, +void CSampling::categoricalSampleWithoutReplacement(CPRNG::CXorShift1024Mult& rng, + TDoubleVec& probabilities, std::size_t n, - TSizeVec &result) -{ + TSizeVec& result) { doCategoricalSampleWithReplacement(rng, probabilities, n, result); } -void CSampling::multinomialSampleFast(TDoubleVec &probabilities, - std::size_t n, - TSizeVec &sample, - bool sorted) -{ +void CSampling::multinomialSampleFast(TDoubleVec& probabilities, std::size_t n, TSizeVec& sample, bool sorted) { sample.clear(); - if (n == 0 || probabilities.empty()) - { + if (n == 0 || probabilities.empty()) { return; } @@ -764,8 +610,7 @@ void CSampling::multinomialSampleFast(TDoubleVec &probabilities, // as efficient as possible (since this means that the the loop // will often terminate as early as possible on average). - if (!sorted) - { + if (!sorted) { std::sort(probabilities.begin(), probabilities.end(), std::greater()); } @@ -774,30 +619,23 @@ void CSampling::multinomialSampleFast(TDoubleVec &probabilities, double p = 1.0; std::size_t m = probabilities.size() - 1; core::CScopedFastLock scopedLock(ms_Lock); - for (std::size_t i = 0u; r > 0 && i < m; ++i) - { - boost::random::binomial_distribution<> binomial(static_cast(r), - probabilities[i] / p); + for (std::size_t i = 0u; r > 0 && i < m; ++i) { + boost::random::binomial_distribution<> binomial(static_cast(r), probabilities[i] / p); std::size_t ni = static_cast(binomial(ms_Rng)); sample.push_back(ni); r -= ni; p -= probabilities[i]; } - if (r > 0) - { + if (r > 0) { sample.push_back(r); } } } -void CSampling::multinomialSampleStable(TDoubleVec probabilities, - std::size_t n, - TSizeVec &sample) -{ +void CSampling::multinomialSampleStable(TDoubleVec probabilities, std::size_t n, TSizeVec& sample) { TSizeVec indices; indices.reserve(probabilities.size()); - for (std::size_t i = 0u; i < probabilities.size(); ++i) - { + for (std::size_t i = 0u; i < probabilities.size(); ++i) { indices.push_back(i); } COrderings::simultaneousSort(probabilities, indices, std::greater()); @@ -807,25 +645,18 @@ void CSampling::multinomialSampleStable(TDoubleVec probabilities, multinomialSampleFast(probabilities, n, sample); sample.resize(probabilities.size(), 0); - for (std::size_t i = 0u; i < sample.size(); /**/) - { + for (std::size_t i = 0u; i < sample.size(); /**/) { std::size_t j = indices[i]; - if (i != j) - { + if (i != j) { std::swap(sample[i], sample[j]); std::swap(indices[i], indices[j]); - } - else - { + } else { ++i; } } } -void CSampling::weightedSample(std::size_t n, - const TDoubleVec &weights, - TSizeVec &sampling) -{ +void CSampling::weightedSample(std::size_t n, const TDoubleVec& weights, TSizeVec& sampling) { // We sample each category, corresponding to the index i of its, // weight according to its weight. // @@ -858,22 +689,19 @@ void CSampling::weightedSample(std::size_t n, double totalWeight = std::accumulate(weights.begin(), weights.end(), 0.0); - n = std::max(static_cast(totalWeight - * static_cast(n) + 0.5), - static_cast(1u)); + n = std::max(static_cast(totalWeight * static_cast(n) + 0.5), static_cast(1u)); LOG_TRACE("totalWeight = " << totalWeight << ", n = " << n); TUIntVec choices; - TDoubleVec remainders[] = { TDoubleVec(), TDoubleVec() }; + TDoubleVec remainders[] = {TDoubleVec(), TDoubleVec()}; choices.reserve(weights.size()); remainders[0].reserve(weights.size()); remainders[1].reserve(weights.size()); double totalRemainder = 0.0; - for (std::size_t i = 0u; i < weights.size(); ++i) - { + for (std::size_t i = 0u; i < weights.size(); ++i) { // We need to re-normalize so that the probabilities sum to one. double number = weights[i] * static_cast(n) / totalWeight; choices.push_back((number - std::floor(number) < 0.5) ? 0u : 1u); @@ -885,99 +713,66 @@ void CSampling::weightedSample(std::size_t n, // The remainder will be integral so checking against 0.5 avoids // floating point problems. - if (std::fabs(totalRemainder) > 0.5) - { - LOG_TRACE("ideal choice function = " - << core::CContainerPrinter::print(choices)); + if (std::fabs(totalRemainder) > 0.5) { + LOG_TRACE("ideal choice function = " << core::CContainerPrinter::print(choices)); TDoubleSizePrVec candidates; - for (std::size_t i = 0u; i < choices.size(); ++i) - { - if ( (totalRemainder > 0.0 && choices[i] == 0u) - || (totalRemainder < 0.0 && choices[i] == 1u)) - { + for (std::size_t i = 0u; i < choices.size(); ++i) { + if ((totalRemainder > 0.0 && choices[i] == 0u) || (totalRemainder < 0.0 && choices[i] == 1u)) { candidates.emplace_back(-std::fabs(remainders[choices[i]][i]), i); } } std::sort(candidates.begin(), candidates.end()); - LOG_TRACE("candidates = " - << core::CContainerPrinter::print(candidates)); + LOG_TRACE("candidates = " << core::CContainerPrinter::print(candidates)); - for (std::size_t i = 0u; - i < candidates.size() && std::fabs(totalRemainder) > 0.5; - ++i) - { + for (std::size_t i = 0u; i < candidates.size() && std::fabs(totalRemainder) > 0.5; ++i) { std::size_t j = candidates[i].second; unsigned int choice = choices[j]; choices[j] = (choice + 1u) % 2u; totalRemainder += remainders[choices[j]][j] - remainders[choice][j]; } } - LOG_TRACE("choice function = " - << core::CContainerPrinter::print(choices)); + LOG_TRACE("choice function = " << core::CContainerPrinter::print(choices)); sampling.reserve(weights.size()); - for (std::size_t i = 0u; i < weights.size(); ++i) - { + for (std::size_t i = 0u; i < weights.size(); ++i) { double number = weights[i] * static_cast(n) / totalWeight; - sampling.push_back(static_cast( - choices[i] == 0u ? std::floor(number) : std::ceil(number))); + sampling.push_back(static_cast(choices[i] == 0u ? std::floor(number) : std::ceil(number))); } } -void CSampling::normalSampleQuantiles(double mean, - double variance, - std::size_t n, - TDoubleVec &result) -{ +void CSampling::normalSampleQuantiles(double mean, double variance, std::size_t n, TDoubleVec& result) { result.clear(); - if (n == 0) - { + if (n == 0) { return; } - if (variance == 0.0) - { + if (variance == 0.0) { result.resize(n, mean); return; } - try - { + try { boost::math::normal_distribution<> normal(mean, std::sqrt(variance)); sampleQuantiles(normal, n, result); - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to sample normal quantiles: " << e.what() - << ", mean = " << mean - << ", variance = " << variance); + } catch (const std::exception& e) { + LOG_ERROR("Failed to sample normal quantiles: " << e.what() << ", mean = " << mean << ", variance = " << variance); result.clear(); } } -void CSampling::gammaSampleQuantiles(double shape, - double rate, - std::size_t n, - TDoubleVec &result) -{ +void CSampling::gammaSampleQuantiles(double shape, double rate, std::size_t n, TDoubleVec& result) { result.clear(); - if (n == 0) - { + if (n == 0) { return; } - try - { + try { boost::math::gamma_distribution<> gamma(shape, 1.0 / rate); sampleQuantiles(gamma, n, result); - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to sample normal quantiles: " << e.what() - << ", shape = " << shape - << ", rate = " << rate); + } catch (const std::exception& e) { + LOG_ERROR("Failed to sample normal quantiles: " << e.what() << ", shape = " << shape << ", rate = " << rate); result.clear(); } } @@ -985,31 +780,24 @@ void CSampling::gammaSampleQuantiles(double shape, core::CFastMutex CSampling::ms_Lock; CSampling::CRandomNumberGenerator CSampling::ms_Rng; - -void CSampling::CRandomNumberGenerator::mock() -{ +void CSampling::CRandomNumberGenerator::mock() { m_Mock.reset((min() + max()) / 2); } -void CSampling::CRandomNumberGenerator::unmock() -{ +void CSampling::CRandomNumberGenerator::unmock() { m_Mock.reset(); } -void CSampling::CRandomNumberGenerator::seed() -{ +void CSampling::CRandomNumberGenerator::seed() { m_Rng.seed(); } -CSampling::CScopeMockRandomNumberGenerator::CScopeMockRandomNumberGenerator() -{ +CSampling::CScopeMockRandomNumberGenerator::CScopeMockRandomNumberGenerator() { CSampling::ms_Rng.mock(); } -CSampling::CScopeMockRandomNumberGenerator::~CScopeMockRandomNumberGenerator() -{ +CSampling::CScopeMockRandomNumberGenerator::~CScopeMockRandomNumberGenerator() { CSampling::ms_Rng.unmock(); } - } } diff --git a/lib/maths/CSeasonalComponent.cc b/lib/maths/CSeasonalComponent.cc index d68c6ce637..cb616b67cc 100644 --- a/lib/maths/CSeasonalComponent.cc +++ b/lib/maths/CSeasonalComponent.cc @@ -7,10 +7,10 @@ #include #include -#include #include #include #include +#include #include #include @@ -26,12 +26,9 @@ #include #include -namespace ml -{ -namespace maths -{ -namespace -{ +namespace ml { +namespace maths { +namespace { using TDoubleDoublePr = maths_t::TDoubleDoublePr; @@ -39,82 +36,66 @@ const std::string DECOMPOSITION_COMPONENT_TAG{"a"}; const std::string RNG_TAG{"b"}; const std::string BUCKETING_TAG{"c"}; const std::string EMPTY_STRING; - } -CSeasonalComponent::CSeasonalComponent(const CSeasonalTime &time, +CSeasonalComponent::CSeasonalComponent(const CSeasonalTime& time, std::size_t maxSize, double decayRate, double minimumBucketLength, CSplineTypes::EBoundaryCondition boundaryCondition, CSplineTypes::EType valueInterpolationType, - CSplineTypes::EType varianceInterpolationType) : - CDecompositionComponent{maxSize, boundaryCondition, valueInterpolationType, varianceInterpolationType}, - m_Bucketing{time, decayRate, minimumBucketLength} -{} + CSplineTypes::EType varianceInterpolationType) + : CDecompositionComponent{maxSize, boundaryCondition, valueInterpolationType, varianceInterpolationType}, + m_Bucketing{time, decayRate, minimumBucketLength} { +} CSeasonalComponent::CSeasonalComponent(double decayRate, double minimumBucketLength, - core::CStateRestoreTraverser &traverser, + core::CStateRestoreTraverser& traverser, CSplineTypes::EType valueInterpolationType, - CSplineTypes::EType varianceInterpolationType) : - CDecompositionComponent{0, CSplineTypes::E_Periodic, valueInterpolationType, varianceInterpolationType} -{ - traverser.traverseSubLevel(boost::bind(&CSeasonalComponent::acceptRestoreTraverser, - this, decayRate, minimumBucketLength, _1)); + CSplineTypes::EType varianceInterpolationType) + : CDecompositionComponent{0, CSplineTypes::E_Periodic, valueInterpolationType, varianceInterpolationType} { + traverser.traverseSubLevel(boost::bind(&CSeasonalComponent::acceptRestoreTraverser, this, decayRate, minimumBucketLength, _1)); } -void CSeasonalComponent::swap(CSeasonalComponent &other) -{ +void CSeasonalComponent::swap(CSeasonalComponent& other) { this->CDecompositionComponent::swap(other); std::swap(m_Rng, other.m_Rng); m_Bucketing.swap(other.m_Bucketing); } -bool CSeasonalComponent::acceptRestoreTraverser(double decayRate, - double minimumBucketLength, - core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name{traverser.name()}; +bool CSeasonalComponent::acceptRestoreTraverser(double decayRate, double minimumBucketLength, core::CStateRestoreTraverser& traverser) { + do { + const std::string& name{traverser.name()}; RESTORE(DECOMPOSITION_COMPONENT_TAG, - traverser.traverseSubLevel(boost::bind(&CDecompositionComponent::acceptRestoreTraverser, - static_cast(this), _1))) + traverser.traverseSubLevel( + boost::bind(&CDecompositionComponent::acceptRestoreTraverser, static_cast(this), _1))) RESTORE(RNG_TAG, m_Rng.fromString(traverser.value())) RESTORE_SETUP_TEARDOWN(BUCKETING_TAG, CSeasonalComponentAdaptiveBucketing bucketing(decayRate, minimumBucketLength, traverser), true, m_Bucketing.swap(bucketing)) - } - while (traverser.next()); + } while (traverser.next()); return true; } -void CSeasonalComponent::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ - inserter.insertLevel(DECOMPOSITION_COMPONENT_TAG, - boost::bind(&CDecompositionComponent::acceptPersistInserter, - static_cast(this), _1)); +void CSeasonalComponent::acceptPersistInserter(core::CStatePersistInserter& inserter) const { + inserter.insertLevel( + DECOMPOSITION_COMPONENT_TAG, + boost::bind(&CDecompositionComponent::acceptPersistInserter, static_cast(this), _1)); inserter.insertValue(RNG_TAG, m_Rng.toString()); - inserter.insertLevel(BUCKETING_TAG, boost::bind( - &CSeasonalComponentAdaptiveBucketing::acceptPersistInserter, &m_Bucketing, _1)); + inserter.insertLevel(BUCKETING_TAG, boost::bind(&CSeasonalComponentAdaptiveBucketing::acceptPersistInserter, &m_Bucketing, _1)); } -bool CSeasonalComponent::initialized() const -{ +bool CSeasonalComponent::initialized() const { return this->CDecompositionComponent::initialized(); } -bool CSeasonalComponent::initialize(core_t::TTime startTime, - core_t::TTime endTime, - const TFloatMeanAccumulatorVec &values) -{ +bool CSeasonalComponent::initialize(core_t::TTime startTime, core_t::TTime endTime, const TFloatMeanAccumulatorVec& values) { this->clear(); - if (!m_Bucketing.initialize(this->maxSize())) - { + if (!m_Bucketing.initialize(this->maxSize())) { LOG_ERROR("Bad input size: " << this->maxSize()); return false; } @@ -124,100 +105,80 @@ bool CSeasonalComponent::initialize(core_t::TTime startTime, return true; } -std::size_t CSeasonalComponent::size() const -{ +std::size_t CSeasonalComponent::size() const { return m_Bucketing.size(); } -void CSeasonalComponent::clear() -{ +void CSeasonalComponent::clear() { this->CDecompositionComponent::clear(); - if (m_Bucketing.initialized()) - { + if (m_Bucketing.initialized()) { m_Bucketing.clear(); } } -void CSeasonalComponent::shiftOrigin(core_t::TTime time) -{ +void CSeasonalComponent::shiftOrigin(core_t::TTime time) { m_Bucketing.shiftOrigin(time); } -void CSeasonalComponent::shiftLevel(double shift) -{ +void CSeasonalComponent::shiftLevel(double shift) { this->CDecompositionComponent::shiftLevel(shift); m_Bucketing.shiftLevel(shift); } -void CSeasonalComponent::shiftSlope(double shift) -{ +void CSeasonalComponent::shiftSlope(double shift) { m_Bucketing.shiftSlope(shift); } -void CSeasonalComponent::linearScale(core_t::TTime time, double scale) -{ +void CSeasonalComponent::linearScale(core_t::TTime time, double scale) { m_Bucketing.linearScale(scale); this->interpolate(time, false); } -void CSeasonalComponent::add(core_t::TTime time, double value, double weight) -{ +void CSeasonalComponent::add(core_t::TTime time, double value, double weight) { double predicted{CBasicStatistics::mean(this->value(this->jitter(time), 0.0))}; m_Bucketing.add(time, value, predicted, weight); } -void CSeasonalComponent::interpolate(core_t::TTime time, bool refine) -{ - if (refine) - { +void CSeasonalComponent::interpolate(core_t::TTime time, bool refine) { + if (refine) { m_Bucketing.refine(time); } TDoubleVec knots; TDoubleVec values; TDoubleVec variances; - if (m_Bucketing.knots(time, this->boundaryCondition(), knots, values, variances)) - { + if (m_Bucketing.knots(time, this->boundaryCondition(), knots, values, variances)) { this->CDecompositionComponent::interpolate(knots, values, variances); } } -double CSeasonalComponent::decayRate() const -{ +double CSeasonalComponent::decayRate() const { return m_Bucketing.decayRate(); } -void CSeasonalComponent::decayRate(double decayRate) -{ +void CSeasonalComponent::decayRate(double decayRate) { return m_Bucketing.decayRate(decayRate); } -void CSeasonalComponent::propagateForwardsByTime(double time, bool meanRevert) -{ +void CSeasonalComponent::propagateForwardsByTime(double time, bool meanRevert) { m_Bucketing.propagateForwardsByTime(time, meanRevert); } -const CSeasonalTime &CSeasonalComponent::time() const -{ +const CSeasonalTime& CSeasonalComponent::time() const { return m_Bucketing.time(); } -TDoubleDoublePr CSeasonalComponent::value(core_t::TTime time, double confidence) const -{ +TDoubleDoublePr CSeasonalComponent::value(core_t::TTime time, double confidence) const { double offset{this->time().periodic(time)}; double n{m_Bucketing.count(time)}; return this->CDecompositionComponent::value(offset, n, confidence); } -double CSeasonalComponent::meanValue() const -{ +double CSeasonalComponent::meanValue() const { return this->CDecompositionComponent::meanValue(); } -double CSeasonalComponent::delta(core_t::TTime time, - core_t::TTime shortPeriod, - double shortPeriodValue) const -{ +double CSeasonalComponent::delta(core_t::TTime time, core_t::TTime shortPeriod, double shortPeriodValue) const { using TMinAccumulator = CBasicStatistics::SMin::TAccumulator; using TMinMaxAccumulator = CBasicStatistics::CMinMax; @@ -238,26 +199,22 @@ double CSeasonalComponent::delta(core_t::TTime time, // periodic features in long component. We can achieve this by // reducing the value in the short seasonal component. - const CSeasonalTime &time_{this->time()}; + const CSeasonalTime& time_{this->time()}; core_t::TTime longPeriod{time_.period()}; - if (longPeriod > shortPeriod && longPeriod % shortPeriod == 0) - { + if (longPeriod > shortPeriod && longPeriod % shortPeriod == 0) { TMinAccumulator min; TMinMaxAccumulator minmax; double mean{this->CDecompositionComponent::meanValue()}; - for (core_t::TTime t = time; t < time + longPeriod; t += shortPeriod) - { - if (time_.inWindow(t)) - { + for (core_t::TTime t = time; t < time + longPeriod; t += shortPeriod) { + if (time_.inWindow(t)) { double difference{CBasicStatistics::mean(this->value(t, 0.0)) - mean}; min.add(std::fabs(difference)); minmax.add(difference); } } - if (std::fabs(minmax.signMargin()) > 0.0) - { + if (std::fabs(minmax.signMargin()) > 0.0) { return minmax.signMargin(); } @@ -274,34 +231,28 @@ double CSeasonalComponent::delta(core_t::TTime time, return 0.0; } -TDoubleDoublePr CSeasonalComponent::variance(core_t::TTime time, double confidence) const -{ +TDoubleDoublePr CSeasonalComponent::variance(core_t::TTime time, double confidence) const { double offset{this->time().periodic(time)}; double n{m_Bucketing.count(time)}; return this->CDecompositionComponent::variance(offset, n, confidence); } -double CSeasonalComponent::meanVariance() const -{ +double CSeasonalComponent::meanVariance() const { return this->CDecompositionComponent::meanVariance(); } -double CSeasonalComponent::heteroscedasticity() const -{ +double CSeasonalComponent::heteroscedasticity() const { return this->CDecompositionComponent::heteroscedasticity(); } -bool CSeasonalComponent::covariances(core_t::TTime time, TMatrix &result) const -{ +bool CSeasonalComponent::covariances(core_t::TTime time, TMatrix& result) const { result = TMatrix(0.0); - if (!this->initialized()) - { + if (!this->initialized()) { return false; } - if (auto r = m_Bucketing.regression(time)) - { + if (auto r = m_Bucketing.regression(time)) { double variance{CBasicStatistics::mean(this->variance(time, 0.0))}; return r->covariances(variance, result); } @@ -309,54 +260,44 @@ bool CSeasonalComponent::covariances(core_t::TTime time, TMatrix &result) const return false; } -CSeasonalComponent::TSplineCRef CSeasonalComponent::valueSpline() const -{ +CSeasonalComponent::TSplineCRef CSeasonalComponent::valueSpline() const { return this->CDecompositionComponent::valueSpline(); } -double CSeasonalComponent::slope() const -{ +double CSeasonalComponent::slope() const { return m_Bucketing.slope(); } -bool CSeasonalComponent::slopeAccurate(core_t::TTime time) const -{ +bool CSeasonalComponent::slopeAccurate(core_t::TTime time) const { return m_Bucketing.slopeAccurate(time); } -uint64_t CSeasonalComponent::checksum(uint64_t seed) const -{ +uint64_t CSeasonalComponent::checksum(uint64_t seed) const { seed = this->CDecompositionComponent::checksum(seed); return CChecksum::calculate(seed, m_Bucketing); } -void CSeasonalComponent::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CSeasonalComponent::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CSeasonalComponent"); core::CMemoryDebug::dynamicSize("m_Bucketing", m_Bucketing, mem); core::CMemoryDebug::dynamicSize("m_Splines", this->splines(), mem); } -std::size_t CSeasonalComponent::memoryUsage() const -{ +std::size_t CSeasonalComponent::memoryUsage() const { return core::CMemory::dynamicSize(m_Bucketing) + core::CMemory::dynamicSize(this->splines()); } -core_t::TTime CSeasonalComponent::jitter(core_t::TTime time) -{ +core_t::TTime CSeasonalComponent::jitter(core_t::TTime time) { core_t::TTime result{time}; - if (m_Bucketing.minimumBucketLength() > 0.0) - { - const CSeasonalTime &time_{this->time()}; + if (m_Bucketing.minimumBucketLength() > 0.0) { + const CSeasonalTime& time_{this->time()}; double f{CSampling::uniformSample(m_Rng, 0.0, 1.0)}; core_t::TTime a{time_.startOfWindow(time)}; core_t::TTime b{a + time_.windowLength() - 1}; - double jitter{0.5 * m_Bucketing.minimumBucketLength() - * (f <= 0.5 ? std::sqrt(2.0 * f) - 1.0 : std::sqrt(2.0 * (f - 0.5)))}; + double jitter{0.5 * m_Bucketing.minimumBucketLength() * (f <= 0.5 ? std::sqrt(2.0 * f) - 1.0 : std::sqrt(2.0 * (f - 0.5)))}; result = CTools::truncate(result + static_cast(jitter + 0.5), a, b); } return result; } - } } diff --git a/lib/maths/CSeasonalComponentAdaptiveBucketing.cc b/lib/maths/CSeasonalComponentAdaptiveBucketing.cc index aa8beab959..09d3d0f939 100644 --- a/lib/maths/CSeasonalComponentAdaptiveBucketing.cc +++ b/lib/maths/CSeasonalComponentAdaptiveBucketing.cc @@ -8,10 +8,10 @@ #include #include -#include #include #include #include +#include #include #include @@ -33,27 +33,22 @@ #include #include -namespace ml -{ -namespace maths -{ -namespace -{ +namespace ml { +namespace maths { +namespace { using TDoubleMeanVarAccumulator = CBasicStatistics::SSampleMeanVar::TAccumulator; using TRegression = CSeasonalComponentAdaptiveBucketing::TRegression; //! Clear a vector and recover its memory. template -void clearAndShrink(std::vector &vector) -{ +void clearAndShrink(std::vector& vector) { std::vector empty; empty.swap(vector); } //! Get the gradient of \p r. -double gradient(const TRegression &r) -{ +double gradient(const TRegression& r) { TRegression::TArray params; r.parameters(params); return params[1]; @@ -79,76 +74,59 @@ const std::string LAST_UPDATES_OLD_TAG{"f"}; const std::string EMPTY_STRING; const core_t::TTime UNSET_TIME{0}; const double SUFFICIENT_INTERVAL_TO_ESTIMATE_SLOPE{2.5}; - } -CSeasonalComponentAdaptiveBucketing::CSeasonalComponentAdaptiveBucketing() : - CAdaptiveBucketing{0.0, 0.0} -{} +CSeasonalComponentAdaptiveBucketing::CSeasonalComponentAdaptiveBucketing() : CAdaptiveBucketing{0.0, 0.0} { +} -CSeasonalComponentAdaptiveBucketing::CSeasonalComponentAdaptiveBucketing(const CSeasonalTime &time, +CSeasonalComponentAdaptiveBucketing::CSeasonalComponentAdaptiveBucketing(const CSeasonalTime& time, double decayRate, - double minimumBucketLength) : - CAdaptiveBucketing{decayRate, minimumBucketLength}, - m_Time{time.clone()} -{} + double minimumBucketLength) + : CAdaptiveBucketing{decayRate, minimumBucketLength}, m_Time{time.clone()} { +} -CSeasonalComponentAdaptiveBucketing::CSeasonalComponentAdaptiveBucketing(const CSeasonalComponentAdaptiveBucketing &other) : - CAdaptiveBucketing(other), - m_Time{other.m_Time->clone()}, - m_Buckets(other.m_Buckets) -{} +CSeasonalComponentAdaptiveBucketing::CSeasonalComponentAdaptiveBucketing(const CSeasonalComponentAdaptiveBucketing& other) + : CAdaptiveBucketing(other), m_Time{other.m_Time->clone()}, m_Buckets(other.m_Buckets) { +} CSeasonalComponentAdaptiveBucketing::CSeasonalComponentAdaptiveBucketing(double decayRate, double minimumBucketLength, - core::CStateRestoreTraverser &traverser) : - CAdaptiveBucketing{decayRate, minimumBucketLength} -{ + core::CStateRestoreTraverser& traverser) + : CAdaptiveBucketing{decayRate, minimumBucketLength} { traverser.traverseSubLevel(boost::bind(&CSeasonalComponentAdaptiveBucketing::acceptRestoreTraverser, this, _1)); } -const CSeasonalComponentAdaptiveBucketing & -CSeasonalComponentAdaptiveBucketing::operator=(const CSeasonalComponentAdaptiveBucketing &rhs) -{ - if (&rhs != this) - { +const CSeasonalComponentAdaptiveBucketing& CSeasonalComponentAdaptiveBucketing::operator=(const CSeasonalComponentAdaptiveBucketing& rhs) { + if (&rhs != this) { CSeasonalComponentAdaptiveBucketing tmp(rhs); this->swap(tmp); } return *this; } -void CSeasonalComponentAdaptiveBucketing::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CSeasonalComponentAdaptiveBucketing::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(VERSION_6_3_TAG, ""); inserter.insertLevel(ADAPTIVE_BUCKETING_6_3_TAG, - boost::bind(&CAdaptiveBucketing::acceptPersistInserter, - static_cast(this), _1)); - inserter.insertLevel(TIME_6_3_TAG, boost::bind(&CSeasonalTimeStateSerializer::acceptPersistInserter, - boost::cref(*m_Time), _1)); + boost::bind(&CAdaptiveBucketing::acceptPersistInserter, static_cast(this), _1)); + inserter.insertLevel(TIME_6_3_TAG, boost::bind(&CSeasonalTimeStateSerializer::acceptPersistInserter, boost::cref(*m_Time), _1)); core::CPersistUtils::persist(BUCKETS_6_3_TAG, m_Buckets, inserter); } -void CSeasonalComponentAdaptiveBucketing::swap(CSeasonalComponentAdaptiveBucketing &other) -{ +void CSeasonalComponentAdaptiveBucketing::swap(CSeasonalComponentAdaptiveBucketing& other) { this->CAdaptiveBucketing::swap(other); m_Time.swap(other.m_Time); m_Buckets.swap(other.m_Buckets); } -bool CSeasonalComponentAdaptiveBucketing::initialized() const -{ +bool CSeasonalComponentAdaptiveBucketing::initialized() const { return this->CAdaptiveBucketing::initialized(); } -bool CSeasonalComponentAdaptiveBucketing::initialize(std::size_t n) -{ +bool CSeasonalComponentAdaptiveBucketing::initialize(std::size_t n) { double a{0.0}; - double b{static_cast(std::min(this->time().windowLength(), - this->time().period()))}; + double b{static_cast(std::min(this->time().windowLength(), this->time().period()))}; - if (this->CAdaptiveBucketing::initialize(a, b, n)) - { + if (this->CAdaptiveBucketing::initialize(a, b, n)) { n = this->size(); m_Buckets.assign(n, SBucket()); return true; @@ -158,162 +136,124 @@ bool CSeasonalComponentAdaptiveBucketing::initialize(std::size_t n) void CSeasonalComponentAdaptiveBucketing::initialValues(core_t::TTime startTime, core_t::TTime endTime, - const TFloatMeanAccumulatorVec &values) -{ - if (this->initialized()) - { + const TFloatMeanAccumulatorVec& values) { + if (this->initialized()) { this->shiftOrigin(startTime); - if (!values.empty()) - { + if (!values.empty()) { this->CAdaptiveBucketing::initialValues(startTime, endTime, values); this->shiftSlope(-this->slope()); } } } -std::size_t CSeasonalComponentAdaptiveBucketing::size() const -{ +std::size_t CSeasonalComponentAdaptiveBucketing::size() const { return this->CAdaptiveBucketing::size(); } -void CSeasonalComponentAdaptiveBucketing::clear() -{ +void CSeasonalComponentAdaptiveBucketing::clear() { this->CAdaptiveBucketing::clear(); clearAndShrink(m_Buckets); } -void CSeasonalComponentAdaptiveBucketing::shiftOrigin(core_t::TTime time) -{ +void CSeasonalComponentAdaptiveBucketing::shiftOrigin(core_t::TTime time) { time = CIntegerTools::floor(time, core::constants::WEEK); double shift{m_Time->regression(time)}; - if (shift > 0.0) - { - for (auto &bucket : m_Buckets) - { + if (shift > 0.0) { + for (auto& bucket : m_Buckets) { bucket.s_Regression.shiftAbscissa(-shift); } m_Time->regressionOrigin(time); } } -void CSeasonalComponentAdaptiveBucketing::shiftLevel(double shift) -{ - for (auto &bucket : m_Buckets) - { +void CSeasonalComponentAdaptiveBucketing::shiftLevel(double shift) { + for (auto& bucket : m_Buckets) { bucket.s_Regression.shiftOrdinate(shift); } } -void CSeasonalComponentAdaptiveBucketing::shiftSlope(double shift) -{ - for (auto &bucket : m_Buckets) - { +void CSeasonalComponentAdaptiveBucketing::shiftSlope(double shift) { + for (auto& bucket : m_Buckets) { bucket.s_Regression.shiftGradient(shift); } } -void CSeasonalComponentAdaptiveBucketing::linearScale(double scale) -{ - for (auto &bucket : m_Buckets) - { +void CSeasonalComponentAdaptiveBucketing::linearScale(double scale) { + for (auto& bucket : m_Buckets) { bucket.s_Regression.linearScale(scale); } } -void CSeasonalComponentAdaptiveBucketing::add(core_t::TTime time, - double value, - double prediction, - double weight) -{ +void CSeasonalComponentAdaptiveBucketing::add(core_t::TTime time, double value, double prediction, double weight) { std::size_t bucket{0}; - if (!this->initialized() || !this->bucket(time, bucket)) - { + if (!this->initialized() || !this->bucket(time, bucket)) { return; } this->CAdaptiveBucketing::add(bucket, time, weight); - SBucket &bucket_{m_Buckets[bucket]}; + SBucket& bucket_{m_Buckets[bucket]}; double t{m_Time->regression(time)}; - TRegression ®ression{bucket_.s_Regression}; + TRegression& regression{bucket_.s_Regression}; TDoubleMeanVarAccumulator moments = - CBasicStatistics::accumulator(regression.count(), - prediction, - static_cast(bucket_.s_Variance)); + CBasicStatistics::accumulator(regression.count(), prediction, static_cast(bucket_.s_Variance)); moments.add(value, weight * weight); regression.add(t, value, weight); bucket_.s_Variance = CBasicStatistics::maximumLikelihoodVariance(moments); - if (m_Time->regressionInterval(bucket_.s_FirstUpdate, - bucket_.s_LastUpdate) < SUFFICIENT_INTERVAL_TO_ESTIMATE_SLOPE) - { + if (m_Time->regressionInterval(bucket_.s_FirstUpdate, bucket_.s_LastUpdate) < SUFFICIENT_INTERVAL_TO_ESTIMATE_SLOPE) { double delta{regression.predict(t)}; regression.shiftGradient(-gradient(regression)); delta -= regression.predict(t); regression.shiftOrdinate(delta); } - bucket_.s_FirstUpdate = bucket_.s_FirstUpdate == UNSET_TIME ? - time : std::min(bucket_.s_FirstUpdate, time); - bucket_.s_LastUpdate = bucket_.s_LastUpdate == UNSET_TIME ? - time : std::max(bucket_.s_LastUpdate, time); + bucket_.s_FirstUpdate = bucket_.s_FirstUpdate == UNSET_TIME ? time : std::min(bucket_.s_FirstUpdate, time); + bucket_.s_LastUpdate = bucket_.s_LastUpdate == UNSET_TIME ? time : std::max(bucket_.s_LastUpdate, time); } -const CSeasonalTime &CSeasonalComponentAdaptiveBucketing::time() const -{ +const CSeasonalTime& CSeasonalComponentAdaptiveBucketing::time() const { return *m_Time; } -void CSeasonalComponentAdaptiveBucketing::decayRate(double value) -{ +void CSeasonalComponentAdaptiveBucketing::decayRate(double value) { this->CAdaptiveBucketing::decayRate(value); } -double CSeasonalComponentAdaptiveBucketing::decayRate() const -{ +double CSeasonalComponentAdaptiveBucketing::decayRate() const { return this->CAdaptiveBucketing::decayRate(); } -void CSeasonalComponentAdaptiveBucketing::propagateForwardsByTime(double time, bool meanRevert) -{ - if (time < 0.0) - { +void CSeasonalComponentAdaptiveBucketing::propagateForwardsByTime(double time, bool meanRevert) { + if (time < 0.0) { LOG_ERROR("Can't propagate bucketing backwards in time"); - } - else if (this->initialized()) - { + } else if (this->initialized()) { double factor{std::exp(-this->CAdaptiveBucketing::decayRate() * time)}; this->CAdaptiveBucketing::age(factor); - for (auto &bucket : m_Buckets) - { + for (auto& bucket : m_Buckets) { bucket.s_Regression.age(factor, meanRevert); } } } -double CSeasonalComponentAdaptiveBucketing::minimumBucketLength() const -{ +double CSeasonalComponentAdaptiveBucketing::minimumBucketLength() const { return this->CAdaptiveBucketing::minimumBucketLength(); } -void CSeasonalComponentAdaptiveBucketing::refine(core_t::TTime time) -{ +void CSeasonalComponentAdaptiveBucketing::refine(core_t::TTime time) { this->CAdaptiveBucketing::refine(time); } -double CSeasonalComponentAdaptiveBucketing::count(core_t::TTime time) const -{ - const TRegression *regression = this->regression(time); +double CSeasonalComponentAdaptiveBucketing::count(core_t::TTime time) const { + const TRegression* regression = this->regression(time); return regression ? regression->count() : 0.0; } -const TRegression *CSeasonalComponentAdaptiveBucketing::regression(core_t::TTime time) const -{ - const TRegression *result{0}; - if (this->initialized()) - { +const TRegression* CSeasonalComponentAdaptiveBucketing::regression(core_t::TTime time) const { + const TRegression* result{0}; + if (this->initialized()) { std::size_t bucket{0}; this->bucket(time, bucket); bucket = CTools::truncate(bucket, std::size_t(0), m_Buckets.size() - 1); @@ -324,89 +264,71 @@ const TRegression *CSeasonalComponentAdaptiveBucketing::regression(core_t::TTime bool CSeasonalComponentAdaptiveBucketing::knots(core_t::TTime time, CSplineTypes::EBoundaryCondition boundary, - TDoubleVec &knots, - TDoubleVec &values, - TDoubleVec &variances) const -{ + TDoubleVec& knots, + TDoubleVec& values, + TDoubleVec& variances) const { return this->CAdaptiveBucketing::knots(time, boundary, knots, values, variances); } -double CSeasonalComponentAdaptiveBucketing::slope() const -{ +double CSeasonalComponentAdaptiveBucketing::slope() const { CBasicStatistics::CMinMax minmax; - for (const auto &bucket : m_Buckets) - { - if (bucket.s_Regression.count() > 0.0) - { + for (const auto& bucket : m_Buckets) { + if (bucket.s_Regression.count() > 0.0) { minmax.add(gradient(bucket.s_Regression)); } } return minmax.initialized() ? minmax.signMargin() : 0.0; } -bool CSeasonalComponentAdaptiveBucketing::slopeAccurate(core_t::TTime time) const -{ +bool CSeasonalComponentAdaptiveBucketing::slopeAccurate(core_t::TTime time) const { return this->observedInterval(time) >= SUFFICIENT_INTERVAL_TO_ESTIMATE_SLOPE; } -uint64_t CSeasonalComponentAdaptiveBucketing::checksum(uint64_t seed) const -{ +uint64_t CSeasonalComponentAdaptiveBucketing::checksum(uint64_t seed) const { seed = this->CAdaptiveBucketing::checksum(seed); seed = CChecksum::calculate(seed, m_Time); return CChecksum::calculate(seed, m_Buckets); } -void CSeasonalComponentAdaptiveBucketing::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CSeasonalComponentAdaptiveBucketing::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CSeasonalComponentAdaptiveBucketing"); core::CMemoryDebug::dynamicSize("m_Endpoints", this->CAdaptiveBucketing::endpoints(), mem); core::CMemoryDebug::dynamicSize("m_Centres", this->CAdaptiveBucketing::centres(), mem); core::CMemoryDebug::dynamicSize("m_Buckets", m_Buckets, mem); } -std::size_t CSeasonalComponentAdaptiveBucketing::memoryUsage() const -{ - return this->CAdaptiveBucketing::memoryUsage() - + core::CMemory::dynamicSize(m_Buckets); +std::size_t CSeasonalComponentAdaptiveBucketing::memoryUsage() const { + return this->CAdaptiveBucketing::memoryUsage() + core::CMemory::dynamicSize(m_Buckets); } -const CSeasonalComponentAdaptiveBucketing::TFloatVec &CSeasonalComponentAdaptiveBucketing::endpoints() const -{ +const CSeasonalComponentAdaptiveBucketing::TFloatVec& CSeasonalComponentAdaptiveBucketing::endpoints() const { return this->CAdaptiveBucketing::endpoints(); } -double CSeasonalComponentAdaptiveBucketing::count() const -{ +double CSeasonalComponentAdaptiveBucketing::count() const { return this->CAdaptiveBucketing::count(); } -CSeasonalComponentAdaptiveBucketing::TDoubleVec CSeasonalComponentAdaptiveBucketing::values(core_t::TTime time) const -{ +CSeasonalComponentAdaptiveBucketing::TDoubleVec CSeasonalComponentAdaptiveBucketing::values(core_t::TTime time) const { return this->CAdaptiveBucketing::values(time); } -CSeasonalComponentAdaptiveBucketing::TDoubleVec CSeasonalComponentAdaptiveBucketing::variances() const -{ +CSeasonalComponentAdaptiveBucketing::TDoubleVec CSeasonalComponentAdaptiveBucketing::variances() const { return this->CAdaptiveBucketing::variances(); } -bool CSeasonalComponentAdaptiveBucketing::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ - if (traverser.name() == VERSION_6_3_TAG) - { - while (traverser.next()) - { - const std::string &name{traverser.name()}; - RESTORE(ADAPTIVE_BUCKETING_6_3_TAG, traverser.traverseSubLevel( - boost::bind(&CAdaptiveBucketing::acceptRestoreTraverser, - static_cast(this), _1))); - RESTORE(TIME_6_3_TAG, traverser.traverseSubLevel( - boost::bind(&CSeasonalTimeStateSerializer::acceptRestoreTraverser, boost::ref(m_Time), _1))) +bool CSeasonalComponentAdaptiveBucketing::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + if (traverser.name() == VERSION_6_3_TAG) { + while (traverser.next()) { + const std::string& name{traverser.name()}; + RESTORE(ADAPTIVE_BUCKETING_6_3_TAG, + traverser.traverseSubLevel( + boost::bind(&CAdaptiveBucketing::acceptRestoreTraverser, static_cast(this), _1))); + RESTORE(TIME_6_3_TAG, + traverser.traverseSubLevel(boost::bind(&CSeasonalTimeStateSerializer::acceptRestoreTraverser, boost::ref(m_Time), _1))) RESTORE(BUCKETS_6_3_TAG, core::CPersistUtils::restore(BUCKETS_6_3_TAG, m_Buckets, traverser)) } - } - else - { + } else { // There is no version string this is historic state. using TTimeVec = std::vector; @@ -416,29 +338,25 @@ bool CSeasonalComponentAdaptiveBucketing::acceptRestoreTraverser(core::CStateRes TRegressionVec regressions; TFloatVec variances; TTimeVec lastUpdates; - do - { - const std::string &name{traverser.name()}; - RESTORE(ADAPTIVE_BUCKETING_OLD_TAG, traverser.traverseSubLevel( - boost::bind(&CAdaptiveBucketing::acceptRestoreTraverser, - static_cast(this), _1))); - RESTORE(TIME_OLD_TAG, traverser.traverseSubLevel( - boost::bind(&CSeasonalTimeStateSerializer::acceptRestoreTraverser, boost::ref(m_Time), _1))) + do { + const std::string& name{traverser.name()}; + RESTORE(ADAPTIVE_BUCKETING_OLD_TAG, + traverser.traverseSubLevel( + boost::bind(&CAdaptiveBucketing::acceptRestoreTraverser, static_cast(this), _1))); + RESTORE(TIME_OLD_TAG, + traverser.traverseSubLevel(boost::bind(&CSeasonalTimeStateSerializer::acceptRestoreTraverser, boost::ref(m_Time), _1))) RESTORE_BUILT_IN(INITIAL_TIME_OLD_TAG, initialTime) RESTORE_SETUP_TEARDOWN(REGRESSION_OLD_TAG, TRegression regression, - traverser.traverseSubLevel(boost::bind(&TRegression::acceptRestoreTraverser, - ®ression, _1)), + traverser.traverseSubLevel(boost::bind(&TRegression::acceptRestoreTraverser, ®ression, _1)), regressions.push_back(regression)) RESTORE(VARIANCES_OLD_TAG, core::CPersistUtils::fromString(traverser.value(), variances)) RESTORE(LAST_UPDATES_OLD_TAG, core::CPersistUtils::fromString(traverser.value(), lastUpdates)) - } - while (traverser.next()); + } while (traverser.next()); m_Buckets.clear(); m_Buckets.reserve(regressions.size()); - for (std::size_t i = 0u; i < regressions.size(); ++i) - { + for (std::size_t i = 0u; i < regressions.size(); ++i) { m_Buckets.emplace_back(regressions[i], variances[i], initialTime, lastUpdates[i]); } } @@ -448,8 +366,7 @@ bool CSeasonalComponentAdaptiveBucketing::acceptRestoreTraverser(core::CStateRes return true; } -void CSeasonalComponentAdaptiveBucketing::refresh(const TFloatVec &endpoints) -{ +void CSeasonalComponentAdaptiveBucketing::refresh(const TFloatVec& endpoints) { // Values are assigned based on their intersection with each // bucket in the previous configuration. The regression and // variance are computed using the appropriate combination @@ -478,98 +395,79 @@ void CSeasonalComponentAdaptiveBucketing::refresh(const TFloatVec &endpoints) std::size_t m{m_Buckets.size()}; std::size_t n{endpoints.size()}; - if (m+1 != n) - { + if (m + 1 != n) { LOG_ERROR("Inconsistent end points and regressions"); return; } - TFloatVec &m_Endpoints{this->CAdaptiveBucketing::endpoints()}; - TFloatVec &m_Centres{this->CAdaptiveBucketing::centres()}; + TFloatVec& m_Endpoints{this->CAdaptiveBucketing::endpoints()}; + TFloatVec& m_Centres{this->CAdaptiveBucketing::centres()}; TBucketVec buckets; TFloatVec centres; buckets.reserve(m); centres.reserve(m); - for (std::size_t i = 1u; i < n; ++i) - { - double yl{m_Endpoints[i-1]}; + for (std::size_t i = 1u; i < n; ++i) { + double yl{m_Endpoints[i - 1]}; double yr{m_Endpoints[i]}; - std::size_t r = std::lower_bound(endpoints.begin(), - endpoints.end(), yr) - endpoints.begin(); + std::size_t r = std::lower_bound(endpoints.begin(), endpoints.end(), yr) - endpoints.begin(); r = CTools::truncate(r, std::size_t(1), n - 1); - std::size_t l = std::upper_bound(endpoints.begin(), - endpoints.end(), yl) - endpoints.begin(); + std::size_t l = std::upper_bound(endpoints.begin(), endpoints.end(), yl) - endpoints.begin(); l = CTools::truncate(l, std::size_t(1), r); LOG_TRACE("interval = [" << yl << "," << yr << "]"); LOG_TRACE("l = " << l << ", r = " << r); - LOG_TRACE("[x(l), x(r)] = [" << endpoints[l-1] << "," << endpoints[r] << "]"); + LOG_TRACE("[x(l), x(r)] = [" << endpoints[l - 1] << "," << endpoints[r] << "]"); - double xl{endpoints[l-1]}; + double xl{endpoints[l - 1]}; double xr{endpoints[l]}; - if (l == r) - { - double interval{m_Endpoints[i] - m_Endpoints[i-1]}; + if (l == r) { + double interval{m_Endpoints[i] - m_Endpoints[i - 1]}; double w{CTools::truncate(interval / (xr - xl), 0.0, 1.0)}; - const SBucket &bucket{m_Buckets[l-1]}; - buckets.emplace_back(bucket.s_Regression.scaled(w * w), - bucket.s_Variance, - bucket.s_FirstUpdate, - bucket.s_LastUpdate); - centres.push_back(CTools::truncate(static_cast(m_Centres[l-1]), yl, yr)); - } - else - { - double interval{xr - m_Endpoints[i-1]}; + const SBucket& bucket{m_Buckets[l - 1]}; + buckets.emplace_back(bucket.s_Regression.scaled(w * w), bucket.s_Variance, bucket.s_FirstUpdate, bucket.s_LastUpdate); + centres.push_back(CTools::truncate(static_cast(m_Centres[l - 1]), yl, yr)); + } else { + double interval{xr - m_Endpoints[i - 1]}; double w{CTools::truncate(interval / (xr - xl), 0.0, 1.0)}; - const SBucket *bucket{&m_Buckets[l-1]}; + const SBucket* bucket{&m_Buckets[l - 1]}; TMinAccumulator firstUpdate; TMinAccumulator lastUpdate; TDoubleRegression regression{bucket->s_Regression.scaled(w)}; - TDoubleMeanVarAccumulator variance{ - CBasicStatistics::accumulator(w * bucket->s_Regression.count(), - bucket->s_Regression.mean(), - static_cast(bucket->s_Variance))}; + TDoubleMeanVarAccumulator variance{CBasicStatistics::accumulator( + w * bucket->s_Regression.count(), bucket->s_Regression.mean(), static_cast(bucket->s_Variance))}; firstUpdate.add(bucket->s_FirstUpdate); lastUpdate.add(bucket->s_LastUpdate); TDoubleMeanAccumulator centre{ - CBasicStatistics::accumulator(w * bucket->s_Regression.count(), - static_cast(m_Centres[l-1]))}; + CBasicStatistics::accumulator(w * bucket->s_Regression.count(), static_cast(m_Centres[l - 1]))}; double count{w * w * bucket->s_Regression.count()}; - while (++l < r) - { - bucket = &m_Buckets[l-1]; + while (++l < r) { + bucket = &m_Buckets[l - 1]; regression += bucket->s_Regression; - variance += CBasicStatistics::accumulator(bucket->s_Regression.count(), - bucket->s_Regression.mean(), - static_cast(bucket->s_Variance)); + variance += CBasicStatistics::accumulator( + bucket->s_Regression.count(), bucket->s_Regression.mean(), static_cast(bucket->s_Variance)); firstUpdate.add(bucket->s_FirstUpdate); lastUpdate.add(bucket->s_LastUpdate); - centre += CBasicStatistics::accumulator(bucket->s_Regression.count(), - static_cast(m_Centres[l-1])); + centre += CBasicStatistics::accumulator(bucket->s_Regression.count(), static_cast(m_Centres[l - 1])); count += bucket->s_Regression.count(); } - xl = endpoints[l-1]; + xl = endpoints[l - 1]; xr = endpoints[l]; - bucket = &m_Buckets[l-1]; + bucket = &m_Buckets[l - 1]; interval = m_Endpoints[i] - xl; w = CTools::truncate(interval / (xr - xl), 0.0, 1.0); regression += bucket->s_Regression.scaled(w); - variance += CBasicStatistics::accumulator(w * bucket->s_Regression.count(), - bucket->s_Regression.mean(), - static_cast(bucket->s_Variance)); + variance += CBasicStatistics::accumulator( + w * bucket->s_Regression.count(), bucket->s_Regression.mean(), static_cast(bucket->s_Variance)); firstUpdate.add(bucket->s_FirstUpdate); lastUpdate.add(bucket->s_LastUpdate); - centre += CBasicStatistics::accumulator(w * bucket->s_Regression.count(), - static_cast(m_Centres[l-1])); + centre += CBasicStatistics::accumulator(w * bucket->s_Regression.count(), static_cast(m_Centres[l - 1])); count += w * w * bucket->s_Regression.count(); double scale{count == regression.count() ? 1.0 : count / regression.count()}; - buckets.emplace_back(regression.scaled(scale), - CBasicStatistics::maximumLikelihoodVariance(variance), - firstUpdate[0], lastUpdate[0]); + buckets.emplace_back( + regression.scaled(scale), CBasicStatistics::maximumLikelihoodVariance(variance), firstUpdate[0], lastUpdate[0]); centres.push_back(CTools::truncate(CBasicStatistics::mean(centre), yl, yr)); } } @@ -579,17 +477,14 @@ void CSeasonalComponentAdaptiveBucketing::refresh(const TFloatVec &endpoints) // that is equal to the number of points they will receive in one // period. double count{0.0}; - for (const auto &bucket : buckets) - { + for (const auto& bucket : buckets) { count += bucket.s_Regression.count(); } count /= (endpoints[m] - endpoints[0]); - for (std::size_t i = 0u; i < m; ++i) - { + for (std::size_t i = 0u; i < m; ++i) { double c{buckets[i].s_Regression.count()}; - if (c > 0.0) - { - buckets[i].s_Regression.scale(count * (endpoints[i+1] - endpoints[i]) / c); + if (c > 0.0) { + buckets[i].s_Regression.scale(count * (endpoints[i + 1] - endpoints[i]) / c); } } @@ -601,54 +496,44 @@ void CSeasonalComponentAdaptiveBucketing::refresh(const TFloatVec &endpoints) m_Centres.swap(centres); } -bool CSeasonalComponentAdaptiveBucketing::inWindow(core_t::TTime time) const -{ +bool CSeasonalComponentAdaptiveBucketing::inWindow(core_t::TTime time) const { return m_Time->inWindow(time); } -void CSeasonalComponentAdaptiveBucketing::add(std::size_t bucket, core_t::TTime time, double value, double weight) -{ - SBucket &bucket_{m_Buckets[bucket]}; - TRegression ®ression{bucket_.s_Regression}; - CFloatStorage &variance{bucket_.s_Variance}; +void CSeasonalComponentAdaptiveBucketing::add(std::size_t bucket, core_t::TTime time, double value, double weight) { + SBucket& bucket_{m_Buckets[bucket]}; + TRegression& regression{bucket_.s_Regression}; + CFloatStorage& variance{bucket_.s_Variance}; TDoubleMeanVarAccumulator variance_{ - CBasicStatistics::accumulator(regression.count(), - regression.mean(), - static_cast(variance))}; + CBasicStatistics::accumulator(regression.count(), regression.mean(), static_cast(variance))}; variance_.add(value, weight); regression.add(m_Time->regression(time), value, weight); variance = CBasicStatistics::maximumLikelihoodVariance(variance_); } -double CSeasonalComponentAdaptiveBucketing::offset(core_t::TTime time) const -{ +double CSeasonalComponentAdaptiveBucketing::offset(core_t::TTime time) const { return m_Time->periodic(time); } -double CSeasonalComponentAdaptiveBucketing::count(std::size_t bucket) const -{ +double CSeasonalComponentAdaptiveBucketing::count(std::size_t bucket) const { return m_Buckets[bucket].s_Regression.count(); } -double CSeasonalComponentAdaptiveBucketing::predict(std::size_t bucket, core_t::TTime time, double offset) const -{ - const SBucket &bucket_{m_Buckets[bucket]}; +double CSeasonalComponentAdaptiveBucketing::predict(std::size_t bucket, core_t::TTime time, double offset) const { + const SBucket& bucket_{m_Buckets[bucket]}; core_t::TTime firstUpdate{bucket_.s_FirstUpdate}; core_t::TTime lastUpdate{bucket_.s_LastUpdate}; - const TRegression ®ression{bucket_.s_Regression}; + const TRegression& regression{bucket_.s_Regression}; double interval{static_cast(lastUpdate - firstUpdate)}; - if (interval == 0) - { + if (interval == 0) { return regression.mean(); } double t{m_Time->regression(time + static_cast(offset + 0.5))}; - double extrapolateInterval{static_cast( - CBasicStatistics::max(time - lastUpdate, firstUpdate - time, core_t::TTime(0)))}; - if (extrapolateInterval == 0.0) - { + double extrapolateInterval{static_cast(CBasicStatistics::max(time - lastUpdate, firstUpdate - time, core_t::TTime(0)))}; + if (extrapolateInterval == 0.0) { return regression.predict(t); } @@ -659,66 +544,52 @@ double CSeasonalComponentAdaptiveBucketing::predict(std::size_t bucket, core_t:: return alpha * regression.predict(t) + beta * regression.mean(); } -double CSeasonalComponentAdaptiveBucketing::variance(std::size_t bucket) const -{ +double CSeasonalComponentAdaptiveBucketing::variance(std::size_t bucket) const { return m_Buckets[bucket].s_Variance; } -double CSeasonalComponentAdaptiveBucketing::observedInterval(core_t::TTime time) const -{ - return m_Time->regressionInterval(std::min_element( - m_Buckets.begin(), m_Buckets.end(), - [](const SBucket &lhs, const SBucket &rhs) - { return lhs.s_FirstUpdate < rhs.s_FirstUpdate; })->s_FirstUpdate, time); +double CSeasonalComponentAdaptiveBucketing::observedInterval(core_t::TTime time) const { + return m_Time->regressionInterval( + std::min_element(m_Buckets.begin(), + m_Buckets.end(), + [](const SBucket& lhs, const SBucket& rhs) { return lhs.s_FirstUpdate < rhs.s_FirstUpdate; }) + ->s_FirstUpdate, + time); } -CSeasonalComponentAdaptiveBucketing::SBucket::SBucket() : - s_Variance{0.0}, - s_FirstUpdate{UNSET_TIME}, - s_LastUpdate{UNSET_TIME} -{} +CSeasonalComponentAdaptiveBucketing::SBucket::SBucket() : s_Variance{0.0}, s_FirstUpdate{UNSET_TIME}, s_LastUpdate{UNSET_TIME} { +} -CSeasonalComponentAdaptiveBucketing::SBucket::SBucket(const TRegression ®ression, +CSeasonalComponentAdaptiveBucketing::SBucket::SBucket(const TRegression& regression, double variance, core_t::TTime firstUpdate, - core_t::TTime lastUpdate) : - s_Regression{regression}, - s_Variance{variance}, - s_FirstUpdate{firstUpdate}, - s_LastUpdate{lastUpdate} -{} - -bool CSeasonalComponentAdaptiveBucketing::SBucket::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name{traverser.name()}; - RESTORE(REGRESSION_6_3_TAG, traverser.traverseSubLevel(boost::bind(&TRegression::acceptRestoreTraverser, - &s_Regression, _1))) + core_t::TTime lastUpdate) + : s_Regression{regression}, s_Variance{variance}, s_FirstUpdate{firstUpdate}, s_LastUpdate{lastUpdate} { +} + +bool CSeasonalComponentAdaptiveBucketing::SBucket::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name{traverser.name()}; + RESTORE(REGRESSION_6_3_TAG, traverser.traverseSubLevel(boost::bind(&TRegression::acceptRestoreTraverser, &s_Regression, _1))) RESTORE(VARIANCE_6_3_TAG, s_Variance.fromString(traverser.value())) RESTORE_BUILT_IN(FIRST_UPDATE_6_3_TAG, s_FirstUpdate) RESTORE_BUILT_IN(LAST_UPDATE_6_3_TAG, s_LastUpdate) - } - while (traverser.next()); + } while (traverser.next()); return true; } -void CSeasonalComponentAdaptiveBucketing::SBucket::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ - inserter.insertLevel(REGRESSION_6_3_TAG, boost::bind(&TRegression::acceptPersistInserter, - &s_Regression, _1)); +void CSeasonalComponentAdaptiveBucketing::SBucket::acceptPersistInserter(core::CStatePersistInserter& inserter) const { + inserter.insertLevel(REGRESSION_6_3_TAG, boost::bind(&TRegression::acceptPersistInserter, &s_Regression, _1)); inserter.insertValue(VARIANCE_6_3_TAG, s_Variance.toString()); inserter.insertValue(FIRST_UPDATE_6_3_TAG, s_FirstUpdate); inserter.insertValue(LAST_UPDATE_6_3_TAG, s_LastUpdate); } -uint64_t CSeasonalComponentAdaptiveBucketing::SBucket::checksum(uint64_t seed) const -{ +uint64_t CSeasonalComponentAdaptiveBucketing::SBucket::checksum(uint64_t seed) const { seed = CChecksum::calculate(seed, s_Regression); seed = CChecksum::calculate(seed, s_Variance); seed = CChecksum::calculate(seed, s_FirstUpdate); return CChecksum::calculate(seed, s_LastUpdate); } - } } diff --git a/lib/maths/CSeasonalTime.cc b/lib/maths/CSeasonalTime.cc index 8d44c34319..9f62c05c93 100644 --- a/lib/maths/CSeasonalTime.cc +++ b/lib/maths/CSeasonalTime.cc @@ -7,10 +7,10 @@ #include #include -#include #include #include #include +#include #include #include @@ -21,12 +21,9 @@ #include #include -namespace ml -{ -namespace maths -{ -namespace -{ +namespace ml { +namespace maths { +namespace { // DO NOT change the existing tags if new sub-classes are added. const std::string DIURNAL_TIME_TAG("a"); const std::string ARBITRARY_PERIOD_TIME_TAG("b"); @@ -34,135 +31,104 @@ const std::string ARBITRARY_PERIOD_TIME_TAG("b"); //////// CSeasonalTime //////// -CSeasonalTime::CSeasonalTime() : - m_Period(0), m_RegressionOrigin(0), m_Precedence(0) -{} +CSeasonalTime::CSeasonalTime() : m_Period(0), m_RegressionOrigin(0), m_Precedence(0) { +} -CSeasonalTime::CSeasonalTime(core_t::TTime period, double precedence) : - m_Period(period), m_RegressionOrigin(0), m_Precedence(precedence) -{} +CSeasonalTime::CSeasonalTime(core_t::TTime period, double precedence) : m_Period(period), m_RegressionOrigin(0), m_Precedence(precedence) { +} -bool CSeasonalTime::operator<(const CSeasonalTime &rhs) const -{ - return COrderings::lexicographical_compare(m_Period, -m_Precedence, - rhs.m_Period, -rhs.m_Precedence); +bool CSeasonalTime::operator<(const CSeasonalTime& rhs) const { + return COrderings::lexicographical_compare(m_Period, -m_Precedence, rhs.m_Period, -rhs.m_Precedence); } -double CSeasonalTime::periodic(core_t::TTime time) const -{ +double CSeasonalTime::periodic(core_t::TTime time) const { return static_cast((time - this->startOfWindow(time)) % m_Period); } -double CSeasonalTime::regression(core_t::TTime time) const -{ - return static_cast(time - m_RegressionOrigin) - / static_cast(this->regressionTimeScale()); +double CSeasonalTime::regression(core_t::TTime time) const { + return static_cast(time - m_RegressionOrigin) / static_cast(this->regressionTimeScale()); } -double CSeasonalTime::regressionInterval(core_t::TTime start, core_t::TTime end) const -{ - return static_cast(end - start) - / static_cast(this->regressionTimeScale()); +double CSeasonalTime::regressionInterval(core_t::TTime start, core_t::TTime end) const { + return static_cast(end - start) / static_cast(this->regressionTimeScale()); } -core_t::TTime CSeasonalTime::startOfWindowRepeat(core_t::TTime time) const -{ +core_t::TTime CSeasonalTime::startOfWindowRepeat(core_t::TTime time) const { return this->startOfWindowRepeat(this->windowRepeatStart(), time); } -core_t::TTime CSeasonalTime::startOfWindow(core_t::TTime time) const -{ +core_t::TTime CSeasonalTime::startOfWindow(core_t::TTime time) const { return this->startOfWindowRepeat(this->windowRepeatStart() + this->windowStart(), time); } -bool CSeasonalTime::inWindow(core_t::TTime time) const -{ +bool CSeasonalTime::inWindow(core_t::TTime time) const { time = time - this->startOfWindowRepeat(time); return time >= this->windowStart() && time < this->windowEnd(); } -core_t::TTime CSeasonalTime::period() const -{ +core_t::TTime CSeasonalTime::period() const { return m_Period; } -void CSeasonalTime::period(core_t::TTime period) -{ +void CSeasonalTime::period(core_t::TTime period) { m_Period = period; } -core_t::TTime CSeasonalTime::regressionOrigin() const -{ +core_t::TTime CSeasonalTime::regressionOrigin() const { return m_RegressionOrigin; } -void CSeasonalTime::regressionOrigin(core_t::TTime origin) -{ +void CSeasonalTime::regressionOrigin(core_t::TTime origin) { m_RegressionOrigin = origin; } -CSeasonalTime::TTimeTimePr CSeasonalTime::window() const -{ +CSeasonalTime::TTimeTimePr CSeasonalTime::window() const { return {this->windowStart(), this->windowEnd()}; } -core_t::TTime CSeasonalTime::windowLength() const -{ +core_t::TTime CSeasonalTime::windowLength() const { return this->windowEnd() - this->windowStart(); } -bool CSeasonalTime::windowed() const -{ +bool CSeasonalTime::windowed() const { return this->windowLength() < this->windowRepeat(); } -double CSeasonalTime::fractionInWindow() const -{ - return static_cast(std::max(this->period(), this->windowLength())) - / static_cast(this->windowRepeat()); +double CSeasonalTime::fractionInWindow() const { + return static_cast(std::max(this->period(), this->windowLength())) / static_cast(this->windowRepeat()); } -bool CSeasonalTime::excludes(const CSeasonalTime &other) const -{ - return std::abs(other.m_Period - m_Period) < std::max(other.m_Period, m_Period) / 20 - && m_Precedence >= other.m_Precedence; +bool CSeasonalTime::excludes(const CSeasonalTime& other) const { + return std::abs(other.m_Period - m_Period) < std::max(other.m_Period, m_Period) / 20 && m_Precedence >= other.m_Precedence; } -core_t::TTime CSeasonalTime::startOfWindowRepeat(core_t::TTime offset, core_t::TTime time) const -{ +core_t::TTime CSeasonalTime::startOfWindowRepeat(core_t::TTime offset, core_t::TTime time) const { return offset + CIntegerTools::floor(time - offset, this->windowRepeat()); } //////// CDiurnalTime //////// -CDiurnalTime::CDiurnalTime() : - m_StartOfWeek(0), m_WindowStart(0), m_WindowEnd(0) -{} +CDiurnalTime::CDiurnalTime() : m_StartOfWeek(0), m_WindowStart(0), m_WindowEnd(0) { +} CDiurnalTime::CDiurnalTime(core_t::TTime startOfWeek, core_t::TTime windowStart, core_t::TTime windowEnd, core_t::TTime period, - double precedence) : - CSeasonalTime(period, precedence), - m_StartOfWeek(startOfWeek), - m_WindowStart(windowStart), - m_WindowEnd(windowEnd) -{} - -CDiurnalTime *CDiurnalTime::clone() const -{ + double precedence) + : CSeasonalTime(period, precedence), m_StartOfWeek(startOfWeek), m_WindowStart(windowStart), m_WindowEnd(windowEnd) { +} + +CDiurnalTime* CDiurnalTime::clone() const { return new CDiurnalTime(*this); } -bool CDiurnalTime::fromString(const std::string &value) -{ +bool CDiurnalTime::fromString(const std::string& value) { boost::array times; - if (core::CPersistUtils::fromString(value, times)) - { + if (core::CPersistUtils::fromString(value, times)) { m_StartOfWeek = times[0]; m_WindowStart = times[1]; - m_WindowEnd = times[2]; + m_WindowEnd = times[2]; this->period(times[3]); this->regressionOrigin(times[4]); return true; @@ -170,8 +136,7 @@ bool CDiurnalTime::fromString(const std::string &value) return false; } -std::string CDiurnalTime::toString() const -{ +std::string CDiurnalTime::toString() const { boost::array times; times[0] = m_StartOfWeek; times[1] = m_WindowStart; @@ -181,61 +146,49 @@ std::string CDiurnalTime::toString() const return core::CPersistUtils::toString(times); } -core_t::TTime CDiurnalTime::windowRepeat() const -{ +core_t::TTime CDiurnalTime::windowRepeat() const { return core::constants::WEEK; } -core_t::TTime CDiurnalTime::windowRepeatStart() const -{ +core_t::TTime CDiurnalTime::windowRepeatStart() const { return m_StartOfWeek; } -core_t::TTime CDiurnalTime::windowStart() const -{ +core_t::TTime CDiurnalTime::windowStart() const { return m_WindowStart; } -core_t::TTime CDiurnalTime::windowEnd() const -{ +core_t::TTime CDiurnalTime::windowEnd() const { return m_WindowEnd; } -bool CDiurnalTime::hasWeekend() const -{ - return this->windowLength() == core::constants::WEEKEND - || this->windowLength() == core::constants::WEEKDAYS; +bool CDiurnalTime::hasWeekend() const { + return this->windowLength() == core::constants::WEEKEND || this->windowLength() == core::constants::WEEKDAYS; } -uint64_t CDiurnalTime::checksum(uint64_t seed) const -{ +uint64_t CDiurnalTime::checksum(uint64_t seed) const { seed = CChecksum::calculate(seed, m_StartOfWeek); seed = CChecksum::calculate(seed, m_WindowStart); seed = CChecksum::calculate(seed, m_WindowEnd); return CChecksum::calculate(seed, this->period()); } -core_t::TTime CDiurnalTime::regressionTimeScale() const -{ +core_t::TTime CDiurnalTime::regressionTimeScale() const { return core::constants::WEEK; } //////// CGeneralPeriodTime //////// -CGeneralPeriodTime::CGeneralPeriodTime(core_t::TTime period, double precedence) : - CSeasonalTime(period, precedence) -{} +CGeneralPeriodTime::CGeneralPeriodTime(core_t::TTime period, double precedence) : CSeasonalTime(period, precedence) { +} -CGeneralPeriodTime *CGeneralPeriodTime::clone() const -{ +CGeneralPeriodTime* CGeneralPeriodTime::clone() const { return new CGeneralPeriodTime(*this); } -bool CGeneralPeriodTime::fromString(const std::string &value) -{ +bool CGeneralPeriodTime::fromString(const std::string& value) { boost::array times; - if (core::CPersistUtils::fromString(value, times)) - { + if (core::CPersistUtils::fromString(value, times)) { this->period(times[0]); this->regressionOrigin(times[1]); return true; @@ -243,81 +196,63 @@ bool CGeneralPeriodTime::fromString(const std::string &value) return false; } -std::string CGeneralPeriodTime::toString() const -{ +std::string CGeneralPeriodTime::toString() const { boost::array times; times[0] = this->period(); times[1] = this->regressionOrigin(); return core::CPersistUtils::toString(times); } -core_t::TTime CGeneralPeriodTime::windowRepeat() const -{ +core_t::TTime CGeneralPeriodTime::windowRepeat() const { return this->period(); } -core_t::TTime CGeneralPeriodTime::windowRepeatStart() const -{ +core_t::TTime CGeneralPeriodTime::windowRepeatStart() const { return 0; } -core_t::TTime CGeneralPeriodTime::windowStart() const -{ +core_t::TTime CGeneralPeriodTime::windowStart() const { return 0; } -core_t::TTime CGeneralPeriodTime::windowEnd() const -{ +core_t::TTime CGeneralPeriodTime::windowEnd() const { return this->period(); } -bool CGeneralPeriodTime::hasWeekend() const -{ +bool CGeneralPeriodTime::hasWeekend() const { return false; } -uint64_t CGeneralPeriodTime::checksum(uint64_t seed) const -{ +uint64_t CGeneralPeriodTime::checksum(uint64_t seed) const { return CChecksum::calculate(seed, this->period()); } -core_t::TTime CGeneralPeriodTime::regressionTimeScale() const -{ +core_t::TTime CGeneralPeriodTime::regressionTimeScale() const { return std::max(core::constants::WEEK, this->period()); } //////// CSeasonalTimeStateSerializer //////// -bool CSeasonalTimeStateSerializer::acceptRestoreTraverser(TSeasonalTimePtr &result, - core::CStateRestoreTraverser &traverser) -{ +bool CSeasonalTimeStateSerializer::acceptRestoreTraverser(TSeasonalTimePtr& result, core::CStateRestoreTraverser& traverser) { std::size_t numResults = 0; - do - { - const std::string &name = traverser.name(); - if (name == DIURNAL_TIME_TAG) - { + do { + const std::string& name = traverser.name(); + if (name == DIURNAL_TIME_TAG) { result.reset(new CDiurnalTime); result->fromString(traverser.value()); ++numResults; - } - else if (name == ARBITRARY_PERIOD_TIME_TAG) - { + } else if (name == ARBITRARY_PERIOD_TIME_TAG) { result.reset(new CGeneralPeriodTime); result->fromString(traverser.value()); ++numResults; - } - else - { + } else { LOG_ERROR("No seasonal time corresponds to name " << traverser.name()); return false; } - } - while (traverser.next()); + } while (traverser.next()); - if (numResults != 1) - { + if (numResults != 1) { LOG_ERROR("Expected 1 (got " << numResults << ") seasonal time tags"); result.reset(); return false; @@ -326,22 +261,14 @@ bool CSeasonalTimeStateSerializer::acceptRestoreTraverser(TSeasonalTimePtr &resu return true; } -void CSeasonalTimeStateSerializer::acceptPersistInserter(const CSeasonalTime &time, - core::CStatePersistInserter &inserter) -{ - if (dynamic_cast(&time) != 0) - { +void CSeasonalTimeStateSerializer::acceptPersistInserter(const CSeasonalTime& time, core::CStatePersistInserter& inserter) { + if (dynamic_cast(&time) != 0) { inserter.insertValue(DIURNAL_TIME_TAG, time.toString()); - } - else if (dynamic_cast(&time) != 0) - { + } else if (dynamic_cast(&time) != 0) { inserter.insertValue(ARBITRARY_PERIOD_TIME_TAG, time.toString()); - } - else - { + } else { LOG_ERROR("Seasonal time with type " << typeid(time).name() << " has no defined name"); } } - } } diff --git a/lib/maths/CSignal.cc b/lib/maths/CSignal.cc index 46cba5a83b..23b32eae1d 100644 --- a/lib/maths/CSignal.cc +++ b/lib/maths/CSignal.cc @@ -15,13 +15,10 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { -namespace -{ +namespace { using TComplex = std::complex; using TComplexVec = std::vector; @@ -29,26 +26,21 @@ using TMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; using TMeanVarAccumulator = CBasicStatistics::SSampleMeanVar::TAccumulator; //! Scale \p f by \p scale. -void scale(double scale, TComplexVec &f) -{ - for (std::size_t i = 0u; i < f.size(); ++i) - { +void scale(double scale, TComplexVec& f) { + for (std::size_t i = 0u; i < f.size(); ++i) { f[i] *= scale; } } //! Compute the radix 2 FFT of \p f in-place. -void radix2fft(TComplexVec &f) -{ +void radix2fft(TComplexVec& f) { // Perform the appropriate permutation of f(x) by swapping // each i in [0, N] with its bit reversal. uint64_t bits = CIntegerTools::nextPow2(f.size()) - 1; - for (uint64_t i = 0; i < f.size(); ++i) - { + for (uint64_t i = 0; i < f.size(); ++i) { uint64_t j = CIntegerTools::reverseBits(i) >> (64 - bits); - if (j > i) - { + if (j > i) { LOG_TRACE(j << " -> " << i); std::swap(f[i], f[j]); } @@ -56,15 +48,11 @@ void radix2fft(TComplexVec &f) // Apply the twiddle factors. - for (std::size_t stride = 1; stride < f.size(); stride <<= 1) - { - for (std::size_t k = 0u; k < stride; ++k) - { - double t = boost::math::double_constants::pi * static_cast(k) - / static_cast(stride); + for (std::size_t stride = 1; stride < f.size(); stride <<= 1) { + for (std::size_t k = 0u; k < stride; ++k) { + double t = boost::math::double_constants::pi * static_cast(k) / static_cast(stride); TComplex w(std::cos(t), std::sin(t)); - for (std::size_t start = k; start < f.size(); start += 2 * stride) - { + for (std::size_t start = k; start < f.size(); start += 2 * stride) { TComplex fs = f[start]; TComplex tw = w * f[start + stride]; f[start] = fs + tw; @@ -75,39 +63,30 @@ void radix2fft(TComplexVec &f) std::reverse(f.begin() + 1, f.end()); } - } -void CSignal::conj(TComplexVec &f) -{ - for (std::size_t i = 0u; i < f.size(); ++i) - { +void CSignal::conj(TComplexVec& f) { + for (std::size_t i = 0u; i < f.size(); ++i) { f[i] = std::conj(f[i]); } } -void CSignal::hadamard(const TComplexVec &fx, TComplexVec &fy) -{ - for (std::size_t i = 0u; i < fx.size(); ++i) - { +void CSignal::hadamard(const TComplexVec& fx, TComplexVec& fy) { + for (std::size_t i = 0u; i < fx.size(); ++i) { fy[i] *= fx[i]; } } -void CSignal::fft(TComplexVec &f) -{ +void CSignal::fft(TComplexVec& f) { std::size_t n = f.size(); std::size_t p = CIntegerTools::nextPow2(n); std::size_t m = 1 << p; LOG_TRACE("n = " << n << ", m = " << m); - if ((m >> 1) == n) - { + if ((m >> 1) == n) { radix2fft(f); - } - else - { + } else { // We use Bluestein's trick to reformulate as a convolution // which can be computed by padding to a power of 2. @@ -125,10 +104,8 @@ void CSignal::fft(TComplexVec &f) chirp.emplace_back(1.0, 0.0); a[0] = f[0] * chirp[0]; b[0] = chirp[0]; - for (std::size_t i = 1u; i < n; ++i) - { - double t = boost::math::double_constants::pi * static_cast(i * i) - / static_cast(n); + for (std::size_t i = 1u; i < n; ++i) { + double t = boost::math::double_constants::pi * static_cast(i * i) / static_cast(n); chirp.emplace_back(std::cos(t), std::sin(t)); a[i] = f[i] * std::conj(chirp[i]); b[i] = b[m - i] = chirp[i]; @@ -139,35 +116,29 @@ void CSignal::fft(TComplexVec &f) hadamard(a, b); ifft(b); - for (std::size_t i = 0u; i < n; ++i) - { + for (std::size_t i = 0u; i < n; ++i) { f[i] = std::conj(chirp[i]) * b[i]; } } } -void CSignal::ifft(TComplexVec &f) -{ +void CSignal::ifft(TComplexVec& f) { conj(f); fft(f); conj(f); scale(1.0 / static_cast(f.size()), f); } -double CSignal::autocorrelation(std::size_t offset, const TFloatMeanAccumulatorVec &values) -{ +double CSignal::autocorrelation(std::size_t offset, const TFloatMeanAccumulatorVec& values) { return autocorrelation(offset, TFloatMeanAccumulatorCRng(values, 0, values.size())); } -double CSignal::autocorrelation(std::size_t offset, TFloatMeanAccumulatorCRng values) -{ +double CSignal::autocorrelation(std::size_t offset, TFloatMeanAccumulatorCRng values) { std::size_t n = values.size(); TMeanVarAccumulator moments; - for (const auto &value : values) - { - if (CBasicStatistics::count(value) > 0.0) - { + for (const auto& value : values) { + if (CBasicStatistics::count(value) > 0.0) { moments.add(CBasicStatistics::mean(value)); } } @@ -175,15 +146,12 @@ double CSignal::autocorrelation(std::size_t offset, TFloatMeanAccumulatorCRng va double mean = CBasicStatistics::mean(moments); TMeanAccumulator autocorrelation; - for (std::size_t i = 0u; i < values.size(); ++i) - { + for (std::size_t i = 0u; i < values.size(); ++i) { std::size_t j = (i + offset) % n; double ni = CBasicStatistics::count(values[i]); double nj = CBasicStatistics::count(values[j]); - if (ni > 0.0 && nj > 0.0) - { - autocorrelation.add( (CBasicStatistics::mean(values[i]) - mean) - * (CBasicStatistics::mean(values[j]) - mean)); + if (ni > 0.0 && nj > 0.0) { + autocorrelation.add((CBasicStatistics::mean(values[i]) - mean) * (CBasicStatistics::mean(values[j]) - mean)); } } @@ -193,20 +161,16 @@ double CSignal::autocorrelation(std::size_t offset, TFloatMeanAccumulatorCRng va return a == v ? 1.0 : a / v; } -void CSignal::autocorrelations(const TFloatMeanAccumulatorVec &values, TDoubleVec &result) -{ - if (values.empty()) - { +void CSignal::autocorrelations(const TFloatMeanAccumulatorVec& values, TDoubleVec& result) { + if (values.empty()) { return; } std::size_t n = values.size(); TMeanVarAccumulator moments; - for (const auto &value : values) - { - if (CBasicStatistics::count(value) > 0.0) - { + for (const auto& value : values) { + if (CBasicStatistics::count(value) > 0.0) { moments.add(CBasicStatistics::mean(value)); } } @@ -215,29 +179,22 @@ void CSignal::autocorrelations(const TFloatMeanAccumulatorVec &values, TDoubleVe TComplexVec f; f.reserve(n); - for (std::size_t i = 0u; i < n; ++i) - { + for (std::size_t i = 0u; i < n; ++i) { std::size_t j = i; - for (/**/; j < n && CBasicStatistics::count(values[j]) == 0; ++j); - if (i != j) - { + for (/**/; j < n && CBasicStatistics::count(values[j]) == 0; ++j) + ; + if (i != j) { // Infer missing values by linearly interpolating. - if (j == n) - { + if (j == n) { f.resize(n, TComplex(0.0, 0.0)); break; - } - else if (i == 0) - { + } else if (i == 0) { f.resize(j - 1, TComplex(0.0, 0.0)); - } - else - { - for (std::size_t k = i; k < j; ++k) - { + } else { + for (std::size_t k = i; k < j; ++k) { double alpha = static_cast(k - i + 1) / static_cast(j - i + 1); - double real = CBasicStatistics::mean(values[j]) - mean; - f.push_back((1.0 - alpha) * f[i-1] + alpha * TComplex(real, 0.0)); + double real = CBasicStatistics::mean(values[j]) - mean; + f.push_back((1.0 - alpha) * f[i - 1] + alpha * TComplex(real, 0.0)); } } i = j; @@ -252,11 +209,9 @@ void CSignal::autocorrelations(const TFloatMeanAccumulatorVec &values, TDoubleVe ifft(f); result.reserve(n); - for (std::size_t i = 1u; i < n; ++i) - { + for (std::size_t i = 1u; i < n; ++i) { result.push_back(f[i].real() / variance / static_cast(n)); } } - } } diff --git a/lib/maths/CSpline.cc b/lib/maths/CSpline.cc index bff5f5e699..462e7f1977 100644 --- a/lib/maths/CSpline.cc +++ b/lib/maths/CSpline.cc @@ -9,56 +9,36 @@ #include #include -namespace ml -{ -namespace maths -{ -namespace spline_detail -{ +namespace ml { +namespace maths { +namespace spline_detail { -namespace -{ +namespace { //! Sanity check the diagonals and the vector dimensions are //! consistent. -bool checkTridiagonal(const TDoubleVec &a, - const TDoubleVec &b, - const TDoubleVec &c, - const TDoubleVec &x) -{ - if (a.size() + 1 != b.size()) - { +bool checkTridiagonal(const TDoubleVec& a, const TDoubleVec& b, const TDoubleVec& c, const TDoubleVec& x) { + if (a.size() + 1 != b.size()) { LOG_ERROR("Lower diagonal and main diagonal inconsistent:" - << " a = " << core::CContainerPrinter::print(a) - << " b = " << core::CContainerPrinter::print(b)); + << " a = " << core::CContainerPrinter::print(a) << " b = " << core::CContainerPrinter::print(b)); return false; } - if (c.size() + 1 != b.size()) - { + if (c.size() + 1 != b.size()) { LOG_ERROR("Upper diagonal and main diagonal inconsistent:" - << " b = " << core::CContainerPrinter::print(b) - << " c = " << core::CContainerPrinter::print(c)); + << " b = " << core::CContainerPrinter::print(b) << " c = " << core::CContainerPrinter::print(c)); return false; } - if (b.size() != x.size()) - { + if (b.size() != x.size()) { LOG_ERROR("Dimension mismatch:" - << " x = " << core::CContainerPrinter::print(x) - << ", b = " << core::CContainerPrinter::print(b)) + << " x = " << core::CContainerPrinter::print(x) << ", b = " << core::CContainerPrinter::print(b)) return false; } return true; } - } -bool solveTridiagonal(const TDoubleVec &a, - const TDoubleVec &b, - TDoubleVec &c, - TDoubleVec &x) -{ - if (!checkTridiagonal(a, b, c, x)) - { +bool solveTridiagonal(const TDoubleVec& a, const TDoubleVec& b, TDoubleVec& c, TDoubleVec& x) { + if (!checkTridiagonal(a, b, c, x)) { return false; } @@ -78,39 +58,31 @@ bool solveTridiagonal(const TDoubleVec &a, std::size_t n = x.size(); // Eliminate the lower diagonal. - if (b[0] == 0.0) - { - LOG_ERROR("Badly conditioned: " - << core::CContainerPrinter::print(b)); + if (b[0] == 0.0) { + LOG_ERROR("Badly conditioned: " << core::CContainerPrinter::print(b)); return false; } c[0] = c[0] / b[0]; x[0] = x[0] / b[0]; - for (std::size_t i = 1; i + 1 < n; ++i) - { - double m = (b[i] - a[i-1] * c[i-1]); - if (m == 0.0) - { - LOG_ERROR("Badly conditioned: " - << core::CContainerPrinter::print(b)); + for (std::size_t i = 1; i + 1 < n; ++i) { + double m = (b[i] - a[i - 1] * c[i - 1]); + if (m == 0.0) { + LOG_ERROR("Badly conditioned: " << core::CContainerPrinter::print(b)); return false; } c[i] = c[i] / m; - x[i] = (x[i] - a[i-1] * x[i-1]) / m; + x[i] = (x[i] - a[i - 1] * x[i - 1]) / m; } - double m = (b[n-1] - a[n-2] * c[n-2]); - if (m == 0.0) - { - LOG_ERROR("Badly conditioned: " - << core::CContainerPrinter::print(b)); + double m = (b[n - 1] - a[n - 2] * c[n - 2]); + if (m == 0.0) { + LOG_ERROR("Badly conditioned: " << core::CContainerPrinter::print(b)); return false; } - x[n-1] = (x[n-1] - a[n-2] * x[n-2]) / m; + x[n - 1] = (x[n - 1] - a[n - 2] * x[n - 2]) / m; // Back substitution. - for (std::size_t i = n - 1; i-- > 0; /**/) - { - x[i] -= c[i] * x[i+1]; + for (std::size_t i = n - 1; i-- > 0; /**/) { + x[i] -= c[i] * x[i + 1]; } LOG_TRACE("x = " << core::CContainerPrinter::print(x)); @@ -118,15 +90,8 @@ bool solveTridiagonal(const TDoubleVec &a, return true; } -bool solvePeturbedTridiagonal(const TDoubleVec &a, - const TDoubleVec &b, - TDoubleVec &c, - TDoubleVec &u, - const TDoubleVec &v, - TDoubleVec &x) -{ - if (!checkTridiagonal(a, b, c, x)) - { +bool solvePeturbedTridiagonal(const TDoubleVec& a, const TDoubleVec& b, TDoubleVec& c, TDoubleVec& u, const TDoubleVec& v, TDoubleVec& x) { + if (!checkTridiagonal(a, b, c, x)) { return false; } @@ -157,56 +122,46 @@ bool solvePeturbedTridiagonal(const TDoubleVec &a, std::size_t n = x.size(); // Eliminate the lower diagonal. - if (b[0] == 0.0) - { - LOG_ERROR("Badly conditioned: " - << core::CContainerPrinter::print(b)); + if (b[0] == 0.0) { + LOG_ERROR("Badly conditioned: " << core::CContainerPrinter::print(b)); return false; } c[0] = c[0] / b[0]; x[0] = x[0] / b[0]; u[0] = u[0] / b[0]; - for (std::size_t i = 1; i + 1 < n; ++i) - { - double m = (b[i] - a[i-1] * c[i-1]); - if (m == 0.0) - { - LOG_ERROR("Badly conditioned: " - << core::CContainerPrinter::print(b)); + for (std::size_t i = 1; i + 1 < n; ++i) { + double m = (b[i] - a[i - 1] * c[i - 1]); + if (m == 0.0) { + LOG_ERROR("Badly conditioned: " << core::CContainerPrinter::print(b)); return false; } c[i] = c[i] / m; - x[i] = (x[i] - a[i-1] * x[i-1]) / m; - u[i] = (u[i] - a[i-1] * u[i-1]) / m; + x[i] = (x[i] - a[i - 1] * x[i - 1]) / m; + u[i] = (u[i] - a[i - 1] * u[i - 1]) / m; } - double m = (b[n-1] - a[n-2] * c[n-2]); - if (m == 0.0) - { - LOG_ERROR("Badly conditioned: " - << core::CContainerPrinter::print(b)); + double m = (b[n - 1] - a[n - 2] * c[n - 2]); + if (m == 0.0) { + LOG_ERROR("Badly conditioned: " << core::CContainerPrinter::print(b)); return false; } - x[n-1] = (x[n-1] - a[n-2] * x[n-2]) / m; - u[n-1] = (u[n-1] - a[n-2] * u[n-2]) / m; + x[n - 1] = (x[n - 1] - a[n - 2] * x[n - 2]) / m; + u[n - 1] = (u[n - 1] - a[n - 2] * u[n - 2]) / m; // Back substitution. - for (std::size_t i = n - 1; i-- > 0; /**/) - { - x[i] = x[i] - c[i] * x[i+1]; - u[i] = u[i] - c[i] * u[i+1]; + for (std::size_t i = n - 1; i-- > 0; /**/) { + x[i] = x[i] - c[i] * x[i + 1]; + u[i] = u[i] - c[i] * u[i + 1]; } // Apply the correction. double vx = 0.0; double vu = 0.0; - for (std::size_t i = 0u; i < n; ++i) - { + for (std::size_t i = 0u; i < n; ++i) { vx += v[i] * x[i]; vu += v[i] * u[i]; } double delta = vx / (1.0 + vu); - for (std::size_t i = 0u; i < n; ++i) - { + for (std::size_t i = 0u; i < n; ++i) { x[i] -= delta * u[i]; } @@ -214,7 +169,6 @@ bool solvePeturbedTridiagonal(const TDoubleVec &a, return true; } - } } } diff --git a/lib/maths/CStatisticalTests.cc b/lib/maths/CStatisticalTests.cc index c80a92a13b..9814a2d62f 100644 --- a/lib/maths/CStatisticalTests.cc +++ b/lib/maths/CStatisticalTests.cc @@ -23,13 +23,10 @@ #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { -namespace -{ +namespace { //! Compute the significance of the Kolmogorov-Smirnov test //! statistic, \p lambda. In particular, the significance is @@ -43,20 +40,17 @@ namespace //!
//! \see Journal of the Royal Statistical Society, ser. B, //! vol. 32, pp 115-122 M.A. Stephens. -double significance(double lambda) -{ +double significance(double lambda) { static const double EPS1 = 0.001; static const double EPS2 = 1e-8; double sum = 0.0; double fac = 2.0; double t2 = -2.0 * lambda * lambda; double termbf = 0.0; - for (std::size_t j = 1; j <= 100; ++j) - { + for (std::size_t j = 1; j <= 100; ++j) { double term = fac * std::exp(t2 * static_cast(j * j)); sum += term; - if (std::fabs(term) <= EPS1 * termbf || std::fabs(term) <= EPS2 * sum) - { + if (std::fabs(term) <= EPS1 * termbf || std::fabs(term) <= EPS2 * sum) { return sum; } fac = -fac; @@ -70,59 +64,42 @@ const std::string SIZE_TAG("a"); const std::string T_TAG("b"); const std::string F_TAG("c"); const std::string EMPTY_STRING; - } -double CStatisticalTests::leftTailFTest(double x, double d1, double d2) -{ - if (x < 0.0) - { +double CStatisticalTests::leftTailFTest(double x, double d1, double d2) { + if (x < 0.0) { return 0.0; } - if (boost::math::isinf(x)) - { + if (boost::math::isinf(x)) { return 1.0; } - try - { + try { boost::math::fisher_f_distribution<> F(d1, d2); return boost::math::cdf(F, x); - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to compute significance " << e.what() - << " d1 = " << d1 << ", d2 = " << d2 << ", x = " << x); + } catch (const std::exception& e) { + LOG_ERROR("Failed to compute significance " << e.what() << " d1 = " << d1 << ", d2 = " << d2 << ", x = " << x); } return 1.0; } -double CStatisticalTests::rightTailFTest(double x, double d1, double d2) -{ - if (x < 0.0) - { +double CStatisticalTests::rightTailFTest(double x, double d1, double d2) { + if (x < 0.0) { return 1.0; } - if (boost::math::isinf(x)) - { + if (boost::math::isinf(x)) { return 0.0; } - try - { + try { boost::math::fisher_f_distribution<> F(d1, d2); return boost::math::cdf(boost::math::complement(F, x)); - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to compute significance " << e.what() - << " d1 = " << d1 << ", d2 = " << d2 << ", x = " << x); + } catch (const std::exception& e) { + LOG_ERROR("Failed to compute significance " << e.what() << " d1 = " << d1 << ", d2 = " << d2 << ", x = " << x); } return 1.0; } -double CStatisticalTests::twoSampleKS(TDoubleVec x, TDoubleVec y) -{ - if (x.empty() || y.empty()) - { +double CStatisticalTests::twoSampleKS(TDoubleVec x, TDoubleVec y) { + if (x.empty() || y.empty()) { return 1.0; } @@ -136,16 +113,13 @@ double CStatisticalTests::twoSampleKS(TDoubleVec x, TDoubleVec y) y.push_back(boost::numeric::bounds::highest()); double D = 0.0; - for (std::size_t i = 0, j = 0; i < nx && j < ny; /**/) - { + for (std::size_t i = 0, j = 0; i < nx && j < ny; /**/) { double xi = x[i]; double yj = y[j]; - if (xi <= yj) - { + if (xi <= yj) { ++i; } - if (yj <= xi) - { + if (yj <= xi) { ++j; } double Fx = static_cast(i) / static_cast(nx); @@ -153,69 +127,50 @@ double CStatisticalTests::twoSampleKS(TDoubleVec x, TDoubleVec y) D = std::max(D, std::fabs(Fx - Fy)); } - double neff = std::sqrt( static_cast(nx) - * static_cast(ny) - / static_cast(nx + ny)); - double result = significance((neff + 0.12 + 0.11/neff) * D); - LOG_TRACE("nx = " << nx - << ", ny = " << ny - << ", D = " << D - << ", significance = " << result); + double neff = std::sqrt(static_cast(nx) * static_cast(ny) / static_cast(nx + ny)); + double result = significance((neff + 0.12 + 0.11 / neff) * D); + LOG_TRACE("nx = " << nx << ", ny = " << ny << ", D = " << D << ", significance = " << result); return result; } -CStatisticalTests::CCramerVonMises::CCramerVonMises(std::size_t size) : - m_Size(CTools::truncate(size, N[0] - 1, N[12] - 1)) -{ +CStatisticalTests::CCramerVonMises::CCramerVonMises(std::size_t size) : m_Size(CTools::truncate(size, N[0] - 1, N[12] - 1)) { m_F.reserve(size); } -CStatisticalTests::CCramerVonMises::CCramerVonMises(core::CStateRestoreTraverser &traverser) -{ - traverser.traverseSubLevel(boost::bind(&CStatisticalTests::CCramerVonMises::acceptRestoreTraverser, - this, - _1)); +CStatisticalTests::CCramerVonMises::CCramerVonMises(core::CStateRestoreTraverser& traverser) { + traverser.traverseSubLevel(boost::bind(&CStatisticalTests::CCramerVonMises::acceptRestoreTraverser, this, _1)); } -bool CStatisticalTests::CCramerVonMises::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name = traverser.name(); +bool CStatisticalTests::CCramerVonMises::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); RESTORE_SETUP_TEARDOWN(SIZE_TAG, /*no-op*/, core::CStringUtils::stringToType(traverser.value(), m_Size), m_F.reserve(m_Size)) RESTORE(T_TAG, m_T.fromDelimited(traverser.value())) - RESTORE_SETUP_TEARDOWN(F_TAG, int f, - core::CStringUtils::stringToType(traverser.value(), f), - m_F.push_back(static_cast(f))) - } - while (traverser.next()); + RESTORE_SETUP_TEARDOWN( + F_TAG, int f, core::CStringUtils::stringToType(traverser.value(), f), m_F.push_back(static_cast(f))) + } while (traverser.next()); return true; } -void CStatisticalTests::CCramerVonMises::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CStatisticalTests::CCramerVonMises::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(SIZE_TAG, m_Size); inserter.insertValue(T_TAG, m_T.toDelimited()); - for (std::size_t i= 0u; i < m_F.size(); ++i) - { + for (std::size_t i = 0u; i < m_F.size(); ++i) { inserter.insertValue(F_TAG, static_cast(m_F[i])); } } -void CStatisticalTests::CCramerVonMises::addF(double f) -{ +void CStatisticalTests::CCramerVonMises::addF(double f) { using TDoubleVec = std::vector; - if (m_F.size() == m_Size) - { + if (m_F.size() == m_Size) { TDoubleVec ff; ff.reserve(m_F.size() + 1); - for (std::size_t i = 0u; i < m_F.size(); ++i) - { + for (std::size_t i = 0u; i < m_F.size(); ++i) { ff.push_back(static_cast(m_F[i]) / SCALE); } ff.push_back(f); @@ -225,63 +180,46 @@ void CStatisticalTests::CCramerVonMises::addF(double f) // Compute the test statistic. double n = static_cast(ff.size()); double t = 1.0 / (12.0 * n); - for (std::size_t i = 0u; i < ff.size(); ++i) - { + for (std::size_t i = 0u; i < ff.size(); ++i) { double r = (2.0 * static_cast(i) + 1.0) / (2.0 * n) - ff[i]; t += r * r; } m_T.add(t); - } - else - { + } else { m_F.push_back(static_cast(SCALE * f)); } } -double CStatisticalTests::CCramerVonMises::pValue() const -{ - if (CBasicStatistics::count(m_T) == 0.0) - { +double CStatisticalTests::CCramerVonMises::pValue() const { + if (CBasicStatistics::count(m_T) == 0.0) { return 1.0; } // Linearly interpolate between the rows of the T statistic // values. double tt[16]; - ptrdiff_t row = CTools::truncate(std::lower_bound(boost::begin(N), - boost::end(N), - m_Size + 1) - N, - ptrdiff_t(1), - ptrdiff_t(12)); - double alpha = static_cast(m_Size + 1 - N[row-1]) - / static_cast(N[row] - N[row-1]); + ptrdiff_t row = CTools::truncate(std::lower_bound(boost::begin(N), boost::end(N), m_Size + 1) - N, ptrdiff_t(1), ptrdiff_t(12)); + double alpha = static_cast(m_Size + 1 - N[row - 1]) / static_cast(N[row] - N[row - 1]); double beta = 1.0 - alpha; - for (std::size_t i = 0u; i < 16; ++i) - { - tt[i] = alpha * T_VALUES[row][i] + beta * T_VALUES[row-1][i]; + for (std::size_t i = 0u; i < 16; ++i) { + tt[i] = alpha * T_VALUES[row][i] + beta * T_VALUES[row - 1][i]; } - LOG_TRACE("n = " << m_Size + 1 - << ", tt = " << core::CContainerPrinter::print(tt)); + LOG_TRACE("n = " << m_Size + 1 << ", tt = " << core::CContainerPrinter::print(tt)); double t = CBasicStatistics::mean(m_T); LOG_TRACE("t = " << t); - if (t == 0.0) - { + if (t == 0.0) { return 1.0; } - ptrdiff_t col = CTools::truncate(std::lower_bound(boost::begin(tt), - boost::end(tt), t) - tt, - ptrdiff_t(1), - ptrdiff_t(15)); - double a = tt[col-1]; + ptrdiff_t col = CTools::truncate(std::lower_bound(boost::begin(tt), boost::end(tt), t) - tt, ptrdiff_t(1), ptrdiff_t(15)); + double a = tt[col - 1]; double b = tt[col]; - double fa = P_VALUES[col-1]; + double fa = P_VALUES[col - 1]; double fb = P_VALUES[col]; - if (fb <= 0.5) - { + if (fb <= 0.5) { // The following fits p(T) = exp(-m/T + c) to extrapolate // for t less than the first knot point. Solving for m and // c using the value at the first two knot points (a, p(a)) @@ -289,7 +227,7 @@ double CStatisticalTests::CCramerVonMises::pValue() const // m = ab/(b-a) * log(f(b)/f(a)) // c = b/(b-a) * log(f(b)/f(a)) + log(f(a)) - double m = a*b / (b - a) * std::log((fb) / (fa)); + double m = a * b / (b - a) * std::log((fb) / (fa)); double c = b / (b - a) * std::log((fb) / (fa)) + std::log(fa); double p = 1.0 - std::exp(-m / t + c); @@ -312,43 +250,227 @@ double CStatisticalTests::CCramerVonMises::pValue() const return p; } -void CStatisticalTests::CCramerVonMises::age(double factor) -{ +void CStatisticalTests::CCramerVonMises::age(double factor) { m_T.age(factor); } -uint64_t CStatisticalTests::CCramerVonMises::checksum(uint64_t seed) const -{ +uint64_t CStatisticalTests::CCramerVonMises::checksum(uint64_t seed) const { seed = CChecksum::calculate(seed, m_Size); seed = CChecksum::calculate(seed, m_T); return CChecksum::calculate(seed, m_F); } const double CStatisticalTests::CCramerVonMises::P_VALUES[16] = - { - 0.01, 0.025, 0.05, 0.1, 0.15, 0.2, 0.25, 0.5, 0.75, 0.8, 0.85, 0.9, 0.95, 0.975, 0.99, 0.999 - }; -const std::size_t CStatisticalTests::CCramerVonMises::N[13] = - { - 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 50, 200, 1000 - }; -const double CStatisticalTests::CCramerVonMises::T_VALUES[13][16] = - { - {0.04326, 0.04565, 0.04962, 0.05758, 0.06554, 0.07350, 0.08146, 0.12659, 0.21522, 0.24743, 0.28854, 0.34343, 0.42480, 0.48901, 0.55058, 0.62858}, - {0.03319, 0.03774, 0.04360, 0.05205, 0.06091, 0.06887, 0.07683, 0.12542, 0.21338, 0.24167, 0.27960, 0.33785, 0.43939, 0.53318, 0.63980, 0.82240}, - {0.03002, 0.03536, 0.04149, 0.05093, 0.05896, 0.06681, 0.07493, 0.12406, 0.21171, 0.24260, 0.28336, 0.34184, 0.44206, 0.54200, 0.67017, 0.92970}, - {0.02869, 0.03422, 0.04036, 0.04969, 0.05800, 0.06610, 0.07427, 0.12250, 0.21164, 0.24237, 0.28305, 0.34238, 0.44697, 0.55056, 0.68352, 0.98730}, - {0.02796, 0.03344, 0.03959, 0.04911, 0.05747, 0.06548, 0.07351, 0.12200, 0.21110, 0.24198, 0.28331, 0.34352, 0.44911, 0.55572, 0.69443, 1.02000}, - {0.02741, 0.03292, 0.03914, 0.04869, 0.05698, 0.06492, 0.07297, 0.12158, 0.21087, 0.24197, 0.28345, 0.34397, 0.45100, 0.55935, 0.70154, 1.04250}, - {0.02702, 0.03257, 0.03875, 0.04823, 0.05650, 0.06448, 0.07254, 0.12113, 0.21065, 0.24186, 0.28356, 0.34458, 0.45240, 0.56220, 0.70720, 1.05910}, - {0.02679, 0.03230, 0.03850, 0.04798, 0.05625, 0.06423, 0.07228, 0.12088, 0.21051, 0.24179, 0.28361, 0.34487, 0.45367, 0.56493, 0.71233, 1.07220}, - {0.02657, 0.03209, 0.03830, 0.04778, 0.05605, 0.06403, 0.07208, 0.12068, 0.21040, 0.24173, 0.28365, 0.34510, 0.45441, 0.56643, 0.71531, 1.08220}, - {0.02564, 0.03120, 0.03742, 0.04689, 0.05515, 0.06312, 0.07117, 0.11978, 0.20989, 0.24148, 0.28384, 0.34617, 0.45778, 0.57331, 0.72895, 1.11898}, - {0.02512, 0.03068, 0.03690, 0.04636, 0.05462, 0.06258, 0.07062, 0.11924, 0.20958, 0.24132, 0.28396, 0.34682, 0.45986, 0.57754, 0.73728, 1.14507}, - {0.02488, 0.03043, 0.03665, 0.04610, 0.05435, 0.06231, 0.07035, 0.11897, 0.20943, 0.24125, 0.28402, 0.34715, 0.46091, 0.57968, 0.74149, 1.15783}, - {0.02481, 0.03037, 0.03658, 0.04603, 0.05428, 0.06224, 0.07027, 0.11889, 0.20938, 0.24123, 0.28403, 0.34724, 0.46119, 0.58026, 0.74262, 1.16120} - }; + {0.01, 0.025, 0.05, 0.1, 0.15, 0.2, 0.25, 0.5, 0.75, 0.8, 0.85, 0.9, 0.95, 0.975, 0.99, 0.999}; +const std::size_t CStatisticalTests::CCramerVonMises::N[13] = {2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 50, 200, 1000}; +const double CStatisticalTests::CCramerVonMises::T_VALUES[13][16] = {{0.04326, + 0.04565, + 0.04962, + 0.05758, + 0.06554, + 0.07350, + 0.08146, + 0.12659, + 0.21522, + 0.24743, + 0.28854, + 0.34343, + 0.42480, + 0.48901, + 0.55058, + 0.62858}, + {0.03319, + 0.03774, + 0.04360, + 0.05205, + 0.06091, + 0.06887, + 0.07683, + 0.12542, + 0.21338, + 0.24167, + 0.27960, + 0.33785, + 0.43939, + 0.53318, + 0.63980, + 0.82240}, + {0.03002, + 0.03536, + 0.04149, + 0.05093, + 0.05896, + 0.06681, + 0.07493, + 0.12406, + 0.21171, + 0.24260, + 0.28336, + 0.34184, + 0.44206, + 0.54200, + 0.67017, + 0.92970}, + {0.02869, + 0.03422, + 0.04036, + 0.04969, + 0.05800, + 0.06610, + 0.07427, + 0.12250, + 0.21164, + 0.24237, + 0.28305, + 0.34238, + 0.44697, + 0.55056, + 0.68352, + 0.98730}, + {0.02796, + 0.03344, + 0.03959, + 0.04911, + 0.05747, + 0.06548, + 0.07351, + 0.12200, + 0.21110, + 0.24198, + 0.28331, + 0.34352, + 0.44911, + 0.55572, + 0.69443, + 1.02000}, + {0.02741, + 0.03292, + 0.03914, + 0.04869, + 0.05698, + 0.06492, + 0.07297, + 0.12158, + 0.21087, + 0.24197, + 0.28345, + 0.34397, + 0.45100, + 0.55935, + 0.70154, + 1.04250}, + {0.02702, + 0.03257, + 0.03875, + 0.04823, + 0.05650, + 0.06448, + 0.07254, + 0.12113, + 0.21065, + 0.24186, + 0.28356, + 0.34458, + 0.45240, + 0.56220, + 0.70720, + 1.05910}, + {0.02679, + 0.03230, + 0.03850, + 0.04798, + 0.05625, + 0.06423, + 0.07228, + 0.12088, + 0.21051, + 0.24179, + 0.28361, + 0.34487, + 0.45367, + 0.56493, + 0.71233, + 1.07220}, + {0.02657, + 0.03209, + 0.03830, + 0.04778, + 0.05605, + 0.06403, + 0.07208, + 0.12068, + 0.21040, + 0.24173, + 0.28365, + 0.34510, + 0.45441, + 0.56643, + 0.71531, + 1.08220}, + {0.02564, + 0.03120, + 0.03742, + 0.04689, + 0.05515, + 0.06312, + 0.07117, + 0.11978, + 0.20989, + 0.24148, + 0.28384, + 0.34617, + 0.45778, + 0.57331, + 0.72895, + 1.11898}, + {0.02512, + 0.03068, + 0.03690, + 0.04636, + 0.05462, + 0.06258, + 0.07062, + 0.11924, + 0.20958, + 0.24132, + 0.28396, + 0.34682, + 0.45986, + 0.57754, + 0.73728, + 1.14507}, + {0.02488, + 0.03043, + 0.03665, + 0.04610, + 0.05435, + 0.06231, + 0.07035, + 0.11897, + 0.20943, + 0.24125, + 0.28402, + 0.34715, + 0.46091, + 0.57968, + 0.74149, + 1.15783}, + {0.02481, + 0.03037, + 0.03658, + 0.04603, + 0.05428, + 0.06224, + 0.07027, + 0.11889, + 0.20938, + 0.24123, + 0.28403, + 0.34724, + 0.46119, + 0.58026, + 0.74262, + 1.16120}}; const double CStatisticalTests::CCramerVonMises::SCALE(65536.0); - } } diff --git a/lib/maths/CTimeSeriesChangeDetector.cc b/lib/maths/CTimeSeriesChangeDetector.cc index 12f66787a5..a696bf7369 100644 --- a/lib/maths/CTimeSeriesChangeDetector.cc +++ b/lib/maths/CTimeSeriesChangeDetector.cc @@ -6,11 +6,11 @@ #include -#include -#include +#include #include #include -#include +#include +#include #include #include @@ -21,8 +21,8 @@ #include #include #include -#include #include +#include #include #include @@ -31,14 +31,11 @@ #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { using namespace time_series_change_detector_detail; -namespace -{ +namespace { using TDouble1Vec = core::CSmallVector; using TDouble4Vec = core::CSmallVector; using TDouble4Vec1Vec = core::CSmallVector; @@ -64,148 +61,122 @@ const double MINIMUM_SCALE{0.1}; const double MAXIMUM_SCALE{10.0}; } -SChangeDescription::SChangeDescription(EDescription description, - double value, - const TPriorPtr &residualModel) : - s_Description{description}, - s_Value{value}, - s_ResidualModel{residualModel} -{} +SChangeDescription::SChangeDescription(EDescription description, double value, const TPriorPtr& residualModel) + : s_Description{description}, s_Value{value}, s_ResidualModel{residualModel} { +} -std::string SChangeDescription::print() const -{ +std::string SChangeDescription::print() const { std::string result; - switch (s_Description) - { - case E_LevelShift: result += "level shift by "; break; - case E_LinearScale: result += "linear scale by "; break; - case E_TimeShift: result += "time shift by "; break; + switch (s_Description) { + case E_LevelShift: + result += "level shift by "; + break; + case E_LinearScale: + result += "linear scale by "; + break; + case E_TimeShift: + result += "time shift by "; + break; } return result + core::CStringUtils::typeToString(s_Value[0]); } -CUnivariateTimeSeriesChangeDetector::CUnivariateTimeSeriesChangeDetector(const TDecompositionPtr &trendModel, - const TPriorPtr &residualModel, +CUnivariateTimeSeriesChangeDetector::CUnivariateTimeSeriesChangeDetector(const TDecompositionPtr& trendModel, + const TPriorPtr& residualModel, core_t::TTime minimumTimeToDetect, core_t::TTime maximumTimeToDetect, - double minimumDeltaBicToDetect) : - m_MinimumTimeToDetect{minimumTimeToDetect}, - m_MaximumTimeToDetect{maximumTimeToDetect}, - m_MinimumDeltaBicToDetect{minimumDeltaBicToDetect}, - m_SampleCount{0}, m_CurrentEvidenceOfChange{0.0}, - m_ChangeModels{boost::make_shared(trendModel, residualModel), - boost::make_shared(trendModel, residualModel), - boost::make_shared(trendModel, residualModel, -core::constants::HOUR), - boost::make_shared(trendModel, residualModel, +core::constants::HOUR)} -{ - if (trendModel->seasonalComponents().size() > 0) - { + double minimumDeltaBicToDetect) + : m_MinimumTimeToDetect{minimumTimeToDetect}, + m_MaximumTimeToDetect{maximumTimeToDetect}, + m_MinimumDeltaBicToDetect{minimumDeltaBicToDetect}, + m_SampleCount{0}, + m_CurrentEvidenceOfChange{0.0}, + m_ChangeModels{boost::make_shared(trendModel, residualModel), + boost::make_shared(trendModel, residualModel), + boost::make_shared(trendModel, residualModel, -core::constants::HOUR), + boost::make_shared(trendModel, residualModel, +core::constants::HOUR)} { + if (trendModel->seasonalComponents().size() > 0) { m_ChangeModels.push_back(boost::make_shared(trendModel, residualModel)); } } -bool CUnivariateTimeSeriesChangeDetector::acceptRestoreTraverser(const SModelRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser) -{ +bool CUnivariateTimeSeriesChangeDetector::acceptRestoreTraverser(const SModelRestoreParams& params, + core::CStateRestoreTraverser& traverser) { auto model = m_ChangeModels.begin(); - do - { + do { const std::string name{traverser.name()}; RESTORE_BUILT_IN(MINIMUM_TIME_TO_DETECT, m_MinimumTimeToDetect) RESTORE_BUILT_IN(MAXIMUM_TIME_TO_DETECT, m_MaximumTimeToDetect) RESTORE_BUILT_IN(MINIMUM_DELTA_BIC_TO_DETECT, m_MinimumDeltaBicToDetect) RESTORE_BUILT_IN(SAMPLE_COUNT_TAG, m_SampleCount) RESTORE_BUILT_IN(CURRENT_EVIDENCE_OF_CHANGE, m_CurrentEvidenceOfChange) - RESTORE_SETUP_TEARDOWN(MIN_TIME_TAG, - core_t::TTime time, - core::CStringUtils::stringToType(traverser.value(), time), - m_TimeRange.add(time)) - RESTORE_SETUP_TEARDOWN(MAX_TIME_TAG, - core_t::TTime time, - core::CStringUtils::stringToType(traverser.value(), time), - m_TimeRange.add(time)) - RESTORE(CHANGE_MODEL_TAG, traverser.traverseSubLevel(boost::bind( - &CUnivariateChangeModel::acceptRestoreTraverser, - (model++)->get(), boost::cref(params), _1))) - } - while (traverser.next()); + RESTORE_SETUP_TEARDOWN( + MIN_TIME_TAG, core_t::TTime time, core::CStringUtils::stringToType(traverser.value(), time), m_TimeRange.add(time)) + RESTORE_SETUP_TEARDOWN( + MAX_TIME_TAG, core_t::TTime time, core::CStringUtils::stringToType(traverser.value(), time), m_TimeRange.add(time)) + RESTORE(CHANGE_MODEL_TAG, + traverser.traverseSubLevel( + boost::bind(&CUnivariateChangeModel::acceptRestoreTraverser, (model++)->get(), boost::cref(params), _1))) + } while (traverser.next()); return true; } -void CUnivariateTimeSeriesChangeDetector::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CUnivariateTimeSeriesChangeDetector::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(MINIMUM_TIME_TO_DETECT, m_MinimumTimeToDetect); inserter.insertValue(MAXIMUM_TIME_TO_DETECT, m_MaximumTimeToDetect); - inserter.insertValue(MINIMUM_DELTA_BIC_TO_DETECT, m_MinimumDeltaBicToDetect, - core::CIEEE754::E_SinglePrecision); + inserter.insertValue(MINIMUM_DELTA_BIC_TO_DETECT, m_MinimumDeltaBicToDetect, core::CIEEE754::E_SinglePrecision); inserter.insertValue(SAMPLE_COUNT_TAG, m_SampleCount); - inserter.insertValue(CURRENT_EVIDENCE_OF_CHANGE, m_CurrentEvidenceOfChange, - core::CIEEE754::E_SinglePrecision); - if (m_TimeRange.initialized()) - { + inserter.insertValue(CURRENT_EVIDENCE_OF_CHANGE, m_CurrentEvidenceOfChange, core::CIEEE754::E_SinglePrecision); + if (m_TimeRange.initialized()) { inserter.insertValue(MIN_TIME_TAG, m_TimeRange.min()); inserter.insertValue(MAX_TIME_TAG, m_TimeRange.max()); } - for (const auto &model : m_ChangeModels) - { - inserter.insertLevel(CHANGE_MODEL_TAG, - boost::bind(&CUnivariateChangeModel::acceptPersistInserter, - model.get(), _1)); + for (const auto& model : m_ChangeModels) { + inserter.insertLevel(CHANGE_MODEL_TAG, boost::bind(&CUnivariateChangeModel::acceptPersistInserter, model.get(), _1)); } } -TOptionalChangeDescription CUnivariateTimeSeriesChangeDetector::change() -{ - if (m_TimeRange.range() > m_MinimumTimeToDetect) - { +TOptionalChangeDescription CUnivariateTimeSeriesChangeDetector::change() { + if (m_TimeRange.range() > m_MinimumTimeToDetect) { std::size_t candidate{}; double p{this->decisionFunction(candidate)}; - if (p > 1.0) - { + if (p > 1.0) { return m_ChangeModels[candidate]->change(); } - m_CurrentEvidenceOfChange = - m_ChangeModels[0]->bic() - m_ChangeModels[candidate]->bic(); + m_CurrentEvidenceOfChange = m_ChangeModels[0]->bic() - m_ChangeModels[candidate]->bic(); } return TOptionalChangeDescription(); } -double CUnivariateTimeSeriesChangeDetector::decisionFunction(std::size_t &change) const -{ +double CUnivariateTimeSeriesChangeDetector::decisionFunction(std::size_t& change) const { using TChangeModelPtr5VecCItr = TChangeModelPtr5Vec::const_iterator; using TDoubleChangeModelPtr5VecCItrPr = std::pair; using TMinAccumulator = CBasicStatistics::COrderStatisticsStack; - if (m_SampleCount <= COUNT_TO_INITIALIZE) - { + if (m_SampleCount <= COUNT_TO_INITIALIZE) { return 0.0; } double noChangeBic{m_ChangeModels[0]->bic()}; TMinAccumulator candidates; - for (auto i = m_ChangeModels.begin() + 1; i != m_ChangeModels.end(); ++i) - { + for (auto i = m_ChangeModels.begin() + 1; i != m_ChangeModels.end(); ++i) { candidates.add({(*i)->bic(), i}); } candidates.sort(); - double evidences[]{noChangeBic - candidates[0].first, - noChangeBic - candidates[1].first}; + double evidences[]{noChangeBic - candidates[0].first, noChangeBic - candidates[1].first}; double expectedEvidence{noChangeBic - (*candidates[0].second)->expectedBic()}; double x[]{evidences[0] / m_MinimumDeltaBicToDetect, 2.0 * (evidences[0] - evidences[1]) / m_MinimumDeltaBicToDetect, evidences[0] / EXPECTED_EVIDENCE_THRESHOLD_MULTIPLIER / expectedEvidence, - static_cast(m_TimeRange.range() - m_MinimumTimeToDetect) - / static_cast(m_MaximumTimeToDetect - m_MinimumTimeToDetect)}; - double p{ CTools::logisticFunction(x[0], 0.05, 1.0) - * CTools::logisticFunction(x[1], 0.1, 1.0) - * (x[2] < 0.0 ? 1.0 : CTools::logisticFunction(x[2], 0.2, 1.0)) - * CTools::logisticFunction(x[3], 0.2, 0.5)}; - LOG_TRACE("p(" << (*candidates[0].second)->change()->print() << ") = " << p - << " | x = " << core::CContainerPrinter::print(x)); + static_cast(m_TimeRange.range() - m_MinimumTimeToDetect) / + static_cast(m_MaximumTimeToDetect - m_MinimumTimeToDetect)}; + double p{CTools::logisticFunction(x[0], 0.05, 1.0) * CTools::logisticFunction(x[1], 0.1, 1.0) * + (x[2] < 0.0 ? 1.0 : CTools::logisticFunction(x[2], 0.2, 1.0)) * CTools::logisticFunction(x[3], 0.2, 0.5)}; + LOG_TRACE("p(" << (*candidates[0].second)->change()->print() << ") = " << p << " | x = " << core::CContainerPrinter::print(x)); change = candidates[0].second - m_ChangeModels.begin(); @@ -219,49 +190,39 @@ double CUnivariateTimeSeriesChangeDetector::decisionFunction(std::size_t &change return p / 0.03125; } -bool CUnivariateTimeSeriesChangeDetector::stopTesting() const -{ +bool CUnivariateTimeSeriesChangeDetector::stopTesting() const { core_t::TTime range{m_TimeRange.range()}; - if (range > m_MinimumTimeToDetect) - { - double scale{0.5 + CTools::logisticFunction(2.0 * m_CurrentEvidenceOfChange - / m_MinimumDeltaBicToDetect, 0.2, 1.0)}; - return static_cast(range) - > m_MinimumTimeToDetect + scale * static_cast( - m_MaximumTimeToDetect - m_MinimumTimeToDetect); + if (range > m_MinimumTimeToDetect) { + double scale{0.5 + CTools::logisticFunction(2.0 * m_CurrentEvidenceOfChange / m_MinimumDeltaBicToDetect, 0.2, 1.0)}; + return static_cast(range) > + m_MinimumTimeToDetect + scale * static_cast(m_MaximumTimeToDetect - m_MinimumTimeToDetect); } return false; } -void CUnivariateTimeSeriesChangeDetector::addSamples(const TWeightStyleVec &weightStyles, - const TTimeDoublePr1Vec &samples, - const TDouble4Vec1Vec &weights) -{ - for (const auto &sample : samples) - { +void CUnivariateTimeSeriesChangeDetector::addSamples(const TWeightStyleVec& weightStyles, + const TTimeDoublePr1Vec& samples, + const TDouble4Vec1Vec& weights) { + for (const auto& sample : samples) { m_TimeRange.add(sample.first); } ++m_SampleCount; - for (auto &model : m_ChangeModels) - { + for (auto& model : m_ChangeModels) { model->addSamples(m_SampleCount, weightStyles, samples, weights); } } -void CUnivariateTimeSeriesChangeDetector::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CUnivariateTimeSeriesChangeDetector::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { core::CMemoryDebug::dynamicSize("m_ChangeModels", m_ChangeModels, mem); } -std::size_t CUnivariateTimeSeriesChangeDetector::memoryUsage() const -{ +std::size_t CUnivariateTimeSeriesChangeDetector::memoryUsage() const { return core::CMemory::dynamicSize(m_ChangeModels); } -uint64_t CUnivariateTimeSeriesChangeDetector::checksum(uint64_t seed) const -{ +uint64_t CUnivariateTimeSeriesChangeDetector::checksum(uint64_t seed) const { seed = CChecksum::calculate(seed, m_MinimumTimeToDetect); seed = CChecksum::calculate(seed, m_MaximumTimeToDetect); seed = CChecksum::calculate(seed, m_MinimumDeltaBicToDetect); @@ -271,41 +232,28 @@ uint64_t CUnivariateTimeSeriesChangeDetector::checksum(uint64_t seed) const return CChecksum::calculate(seed, m_ChangeModels); } -namespace time_series_change_detector_detail -{ +namespace time_series_change_detector_detail { -CUnivariateChangeModel::CUnivariateChangeModel(const TDecompositionPtr &trendModel, - const TPriorPtr &residualModel) : - m_LogLikelihood{0.0}, m_ExpectedLogLikelihood{0.0}, - m_TrendModel{trendModel}, m_ResidualModel{residualModel} -{} +CUnivariateChangeModel::CUnivariateChangeModel(const TDecompositionPtr& trendModel, const TPriorPtr& residualModel) + : m_LogLikelihood{0.0}, m_ExpectedLogLikelihood{0.0}, m_TrendModel{trendModel}, m_ResidualModel{residualModel} { +} -bool CUnivariateChangeModel::acceptRestoreTraverser(const SModelRestoreParams &/*params*/, - core::CStateRestoreTraverser &traverser) -{ - do - { +bool CUnivariateChangeModel::acceptRestoreTraverser(const SModelRestoreParams& /*params*/, core::CStateRestoreTraverser& traverser) { + do { const std::string name{traverser.name()}; RESTORE_BUILT_IN(LOG_LIKELIHOOD_TAG, m_LogLikelihood); RESTORE_BUILT_IN(EXPECTED_LOG_LIKELIHOOD_TAG, m_ExpectedLogLikelihood); return true; - } - while (traverser.next()); + } while (traverser.next()); return true; } -void CUnivariateChangeModel::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ - inserter.insertValue(LOG_LIKELIHOOD_TAG, - m_LogLikelihood, - core::CIEEE754::E_SinglePrecision); - inserter.insertValue(EXPECTED_LOG_LIKELIHOOD_TAG, - m_ExpectedLogLikelihood, - core::CIEEE754::E_SinglePrecision); +void CUnivariateChangeModel::acceptPersistInserter(core::CStatePersistInserter& inserter) const { + inserter.insertValue(LOG_LIKELIHOOD_TAG, m_LogLikelihood, core::CIEEE754::E_SinglePrecision); + inserter.insertValue(EXPECTED_LOG_LIKELIHOOD_TAG, m_ExpectedLogLikelihood, core::CIEEE754::E_SinglePrecision); } -void CUnivariateChangeModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CUnivariateChangeModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { // Note if the trend and residual models are shallow copied their // reference count will be updated so core::CMemory::dynamicSize // will give the correct contribution for these reference. @@ -313,137 +261,108 @@ void CUnivariateChangeModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePt core::CMemoryDebug::dynamicSize("m_ResidualModel", m_ResidualModel, mem); } -std::size_t CUnivariateChangeModel::memoryUsage() const -{ +std::size_t CUnivariateChangeModel::memoryUsage() const { // See above. - return core::CMemory::dynamicSize(m_TrendModel) - + core::CMemory::dynamicSize(m_ResidualModel); + return core::CMemory::dynamicSize(m_TrendModel) + core::CMemory::dynamicSize(m_ResidualModel); } -uint64_t CUnivariateChangeModel::checksum(uint64_t seed) const -{ +uint64_t CUnivariateChangeModel::checksum(uint64_t seed) const { seed = CChecksum::calculate(seed, m_LogLikelihood); seed = CChecksum::calculate(seed, m_ExpectedLogLikelihood); seed = CChecksum::calculate(seed, m_TrendModel); return CChecksum::calculate(seed, m_ResidualModel); } -bool CUnivariateChangeModel::restoreResidualModel(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser) -{ - return traverser.traverseSubLevel(boost::bind(CPriorStateSerialiser(), - boost::cref(params), - boost::ref(m_ResidualModel), _1)); +bool CUnivariateChangeModel::restoreResidualModel(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) { + return traverser.traverseSubLevel(boost::bind(CPriorStateSerialiser(), boost::cref(params), boost::ref(m_ResidualModel), _1)); } -double CUnivariateChangeModel::logLikelihood() const -{ +double CUnivariateChangeModel::logLikelihood() const { return m_LogLikelihood; } -double CUnivariateChangeModel::expectedLogLikelihood() const -{ +double CUnivariateChangeModel::expectedLogLikelihood() const { return m_ExpectedLogLikelihood; } -void CUnivariateChangeModel::updateLogLikelihood(const TWeightStyleVec &weightStyles, - const TDouble1Vec &samples, - const TDouble4Vec1Vec &weights) -{ +void CUnivariateChangeModel::updateLogLikelihood(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights) { double logLikelihood{}; - if (m_ResidualModel->jointLogMarginalLikelihood(weightStyles, samples, weights, - logLikelihood) == maths_t::E_FpNoErrors) - { + if (m_ResidualModel->jointLogMarginalLikelihood(weightStyles, samples, weights, logLikelihood) == maths_t::E_FpNoErrors) { m_LogLikelihood += logLikelihood; } } -void CUnivariateChangeModel::updateExpectedLogLikelihood(const TWeightStyleVec &weightStyles, - const TDouble4Vec1Vec &weights) -{ - for (const auto &weight : weights) - { +void CUnivariateChangeModel::updateExpectedLogLikelihood(const TWeightStyleVec& weightStyles, const TDouble4Vec1Vec& weights) { + for (const auto& weight : weights) { double expectedLogLikelihood{}; TDouble4Vec1Vec weight_{weight}; - if (m_ResidualModel->expectation(maths::CPrior::CLogMarginalLikelihood{ - *m_ResidualModel, weightStyles, weight_}, + if (m_ResidualModel->expectation(maths::CPrior::CLogMarginalLikelihood{*m_ResidualModel, weightStyles, weight_}, EXPECTED_LOG_LIKELIHOOD_NUMBER_INTERVALS, - expectedLogLikelihood, weightStyles, weight)) - { + expectedLogLikelihood, + weightStyles, + weight)) { m_ExpectedLogLikelihood += expectedLogLikelihood; } } } -const CTimeSeriesDecompositionInterface &CUnivariateChangeModel::trendModel() const -{ +const CTimeSeriesDecompositionInterface& CUnivariateChangeModel::trendModel() const { return *m_TrendModel; } -const CPrior &CUnivariateChangeModel::residualModel() const -{ +const CPrior& CUnivariateChangeModel::residualModel() const { return *m_ResidualModel; } -CPrior &CUnivariateChangeModel::residualModel() -{ +CPrior& CUnivariateChangeModel::residualModel() { return *m_ResidualModel; } -CUnivariateChangeModel::TPriorPtr CUnivariateChangeModel::residualModelPtr() const -{ +CUnivariateChangeModel::TPriorPtr CUnivariateChangeModel::residualModelPtr() const { return m_ResidualModel; } -CUnivariateNoChangeModel::CUnivariateNoChangeModel(const TDecompositionPtr &trendModel, - const TPriorPtr &residualModel) : - CUnivariateChangeModel{trendModel, residualModel} -{} +CUnivariateNoChangeModel::CUnivariateNoChangeModel(const TDecompositionPtr& trendModel, const TPriorPtr& residualModel) + : CUnivariateChangeModel{trendModel, residualModel} { +} -bool CUnivariateNoChangeModel::acceptRestoreTraverser(const SModelRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser) -{ +bool CUnivariateNoChangeModel::acceptRestoreTraverser(const SModelRestoreParams& params, core::CStateRestoreTraverser& traverser) { return this->CUnivariateChangeModel::acceptRestoreTraverser(params, traverser); } -void CUnivariateNoChangeModel::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CUnivariateNoChangeModel::acceptPersistInserter(core::CStatePersistInserter& inserter) const { this->CUnivariateChangeModel::acceptPersistInserter(inserter); } -double CUnivariateNoChangeModel::bic() const -{ +double CUnivariateNoChangeModel::bic() const { return -2.0 * this->logLikelihood(); } -double CUnivariateNoChangeModel::expectedBic() const -{ +double CUnivariateNoChangeModel::expectedBic() const { // This is irrelevant since this is only used for deciding // whether to accept a change. return this->bic(); } -TOptionalChangeDescription CUnivariateNoChangeModel::change() const -{ +TOptionalChangeDescription CUnivariateNoChangeModel::change() const { return TOptionalChangeDescription(); } void CUnivariateNoChangeModel::addSamples(const std::size_t count, TWeightStyleVec weightStyles, - const TTimeDoublePr1Vec &samples_, - TDouble4Vec1Vec weights) -{ + const TTimeDoublePr1Vec& samples_, + TDouble4Vec1Vec weights) { // See, for example, CUnivariateLevelShiftModel::addSamples // for an explanation of the delay updating the log-likelihood. - if (count >= COUNT_TO_INITIALIZE) - { - CPrior &residualModel{this->residualModel()}; + if (count >= COUNT_TO_INITIALIZE) { + CPrior& residualModel{this->residualModel()}; TDouble1Vec samples; samples.reserve(samples_.size()); - for (std::size_t i = 0u; i < samples_.size(); ++i) - { + for (std::size_t i = 0u; i < samples_.size(); ++i) { core_t::TTime time{samples_[i].first}; double value{samples_[i].second}; double seasonalScale{maths_t::seasonalVarianceScale(weightStyles, weights[i])}; @@ -453,82 +372,65 @@ void CUnivariateNoChangeModel::addSamples(const std::size_t count, maths_t::setWeight(maths_t::E_SampleWinsorisationWeight, weight, weightStyles, weights[i]); } - for (auto &weight : weights) - { + for (auto& weight : weights) { maths_t::setWeight(maths_t::E_SampleWinsorisationWeight, 1.0, weightStyles, weight); } this->updateLogLikelihood(weightStyles, samples, weights); } } -std::size_t CUnivariateNoChangeModel::staticSize() const -{ +std::size_t CUnivariateNoChangeModel::staticSize() const { return sizeof(*this); } -uint64_t CUnivariateNoChangeModel::checksum(uint64_t seed) const -{ +uint64_t CUnivariateNoChangeModel::checksum(uint64_t seed) const { return this->CUnivariateChangeModel::checksum(seed); } -CUnivariateLevelShiftModel::CUnivariateLevelShiftModel(const TDecompositionPtr &trendModel, - const TPriorPtr &residualModel) : - CUnivariateChangeModel{trendModel, TPriorPtr{residualModel->clone()}}, - m_ResidualModelMode{residualModel->marginalLikelihoodMode()}, - m_SampleCount{0.0} -{} +CUnivariateLevelShiftModel::CUnivariateLevelShiftModel(const TDecompositionPtr& trendModel, const TPriorPtr& residualModel) + : CUnivariateChangeModel{trendModel, TPriorPtr{residualModel->clone()}}, + m_ResidualModelMode{residualModel->marginalLikelihoodMode()}, + m_SampleCount{0.0} { +} -bool CUnivariateLevelShiftModel::acceptRestoreTraverser(const SModelRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser) -{ - if (this->CUnivariateChangeModel::acceptRestoreTraverser(params, traverser) == false) - { +bool CUnivariateLevelShiftModel::acceptRestoreTraverser(const SModelRestoreParams& params, core::CStateRestoreTraverser& traverser) { + if (this->CUnivariateChangeModel::acceptRestoreTraverser(params, traverser) == false) { return false; } - do - { + do { const std::string name{traverser.name()}; RESTORE(SHIFT_TAG, m_Shift.fromDelimited(traverser.value())) RESTORE_BUILT_IN(RESIDUAL_MODEL_MODE_TAG, m_ResidualModelMode) RESTORE_BUILT_IN(SAMPLE_COUNT_TAG, m_SampleCount) RESTORE(RESIDUAL_MODEL_TAG, this->restoreResidualModel(params.s_DistributionParams, traverser)) - } - while (traverser.next()); + } while (traverser.next()); return true; } -void CUnivariateLevelShiftModel::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CUnivariateLevelShiftModel::acceptPersistInserter(core::CStatePersistInserter& inserter) const { this->CUnivariateChangeModel::acceptPersistInserter(inserter); inserter.insertValue(SHIFT_TAG, m_Shift.toDelimited()); inserter.insertValue(SAMPLE_COUNT_TAG, m_SampleCount); - inserter.insertLevel(RESIDUAL_MODEL_TAG, boost::bind(CPriorStateSerialiser(), - boost::cref(this->residualModel()), _1)); + inserter.insertLevel(RESIDUAL_MODEL_TAG, boost::bind(CPriorStateSerialiser(), boost::cref(this->residualModel()), _1)); } -double CUnivariateLevelShiftModel::bic() const -{ +double CUnivariateLevelShiftModel::bic() const { return -2.0 * this->logLikelihood() + CTools::fastLog(m_SampleCount); } -double CUnivariateLevelShiftModel::expectedBic() const -{ +double CUnivariateLevelShiftModel::expectedBic() const { return -2.0 * this->expectedLogLikelihood() + CTools::fastLog(m_SampleCount); } -TOptionalChangeDescription CUnivariateLevelShiftModel::change() const -{ - return SChangeDescription{SChangeDescription::E_LevelShift, - CBasicStatistics::mean(m_Shift), - this->residualModelPtr()}; +TOptionalChangeDescription CUnivariateLevelShiftModel::change() const { + return SChangeDescription{SChangeDescription::E_LevelShift, CBasicStatistics::mean(m_Shift), this->residualModelPtr()}; } void CUnivariateLevelShiftModel::addSamples(const std::size_t count, TWeightStyleVec weightStyles, - const TTimeDoublePr1Vec &samples_, - TDouble4Vec1Vec weights) -{ - const CTimeSeriesDecompositionInterface &trendModel{this->trendModel()}; + const TTimeDoublePr1Vec& samples_, + TDouble4Vec1Vec weights) { + const CTimeSeriesDecompositionInterface& trendModel{this->trendModel()}; // We delay updating the log-likelihood because early on the // level can change giving us a better apparent fit to the @@ -536,15 +438,13 @@ void CUnivariateLevelShiftModel::addSamples(const std::size_t count, // minimum to get empirically similar sum log-likelihood if // there is no change in the data. - if (count >= COUNT_TO_INITIALIZE) - { - CPrior &residualModel{this->residualModel()}; + if (count >= COUNT_TO_INITIALIZE) { + CPrior& residualModel{this->residualModel()}; TDouble1Vec samples; samples.reserve(samples_.size()); double shift{CBasicStatistics::mean(m_Shift)}; - for (std::size_t i = 0u; i < samples_.size(); ++i) - { + for (std::size_t i = 0u; i < samples_.size(); ++i) { core_t::TTime time{samples_[i].first}; double value{samples_[i].second}; double seasonalScale{maths_t::seasonalVarianceScale(weightStyles, weights[i])}; @@ -558,16 +458,14 @@ void CUnivariateLevelShiftModel::addSamples(const std::size_t count, residualModel.addSamples(weightStyles, samples, weights); residualModel.propagateForwardsByTime(1.0); - for (auto &weight : weights) - { + for (auto& weight : weights) { maths_t::setWeight(maths_t::E_SampleWinsorisationWeight, 1.0, weightStyles, weight); } this->updateLogLikelihood(weightStyles, samples, weights); this->updateExpectedLogLikelihood(weightStyles, weights); } - for (std::size_t i = 0u; i < samples_.size(); ++i) - { + for (std::size_t i = 0u; i < samples_.size(); ++i) { core_t::TTime time{samples_[i].first}; double value{samples_[i].second}; double shift{trendModel.detrend(time, value, 0.0) - m_ResidualModelMode}; @@ -575,76 +473,60 @@ void CUnivariateLevelShiftModel::addSamples(const std::size_t count, } } -std::size_t CUnivariateLevelShiftModel::staticSize() const -{ +std::size_t CUnivariateLevelShiftModel::staticSize() const { return sizeof(*this); } -uint64_t CUnivariateLevelShiftModel::checksum(uint64_t seed) const -{ +uint64_t CUnivariateLevelShiftModel::checksum(uint64_t seed) const { seed = this->CUnivariateChangeModel::checksum(seed); seed = CChecksum::calculate(seed, m_Shift); return CChecksum::calculate(seed, m_SampleCount); } -CUnivariateLinearScaleModel::CUnivariateLinearScaleModel(const TDecompositionPtr &trendModel, - const TPriorPtr &residualModel) : - CUnivariateChangeModel{trendModel, TPriorPtr{residualModel->clone()}}, - m_ResidualModelMode{residualModel->marginalLikelihoodMode()}, - m_SampleCount{0.0} -{} +CUnivariateLinearScaleModel::CUnivariateLinearScaleModel(const TDecompositionPtr& trendModel, const TPriorPtr& residualModel) + : CUnivariateChangeModel{trendModel, TPriorPtr{residualModel->clone()}}, + m_ResidualModelMode{residualModel->marginalLikelihoodMode()}, + m_SampleCount{0.0} { +} -bool CUnivariateLinearScaleModel::acceptRestoreTraverser(const SModelRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser) -{ - if (this->CUnivariateChangeModel::acceptRestoreTraverser(params, traverser) == false) - { +bool CUnivariateLinearScaleModel::acceptRestoreTraverser(const SModelRestoreParams& params, core::CStateRestoreTraverser& traverser) { + if (this->CUnivariateChangeModel::acceptRestoreTraverser(params, traverser) == false) { return false; } - do - { + do { const std::string name{traverser.name()}; RESTORE(SCALE_TAG, m_Scale.fromDelimited(traverser.value())) RESTORE_BUILT_IN(RESIDUAL_MODEL_MODE_TAG, m_ResidualModelMode) RESTORE_BUILT_IN(SAMPLE_COUNT_TAG, m_SampleCount) RESTORE(RESIDUAL_MODEL_TAG, this->restoreResidualModel(params.s_DistributionParams, traverser)) - } - while (traverser.next()); + } while (traverser.next()); return true; } -void CUnivariateLinearScaleModel::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CUnivariateLinearScaleModel::acceptPersistInserter(core::CStatePersistInserter& inserter) const { this->CUnivariateChangeModel::acceptPersistInserter(inserter); inserter.insertValue(SCALE_TAG, m_Scale.toDelimited()); inserter.insertValue(SAMPLE_COUNT_TAG, m_SampleCount); - inserter.insertLevel(RESIDUAL_MODEL_TAG, boost::bind(CPriorStateSerialiser(), - boost::cref(this->residualModel()), _1)); + inserter.insertLevel(RESIDUAL_MODEL_TAG, boost::bind(CPriorStateSerialiser(), boost::cref(this->residualModel()), _1)); } -double CUnivariateLinearScaleModel::bic() const -{ +double CUnivariateLinearScaleModel::bic() const { return -2.0 * this->logLikelihood() + CTools::fastLog(m_SampleCount); } -double CUnivariateLinearScaleModel::expectedBic() const -{ +double CUnivariateLinearScaleModel::expectedBic() const { return -2.0 * this->expectedLogLikelihood() + CTools::fastLog(m_SampleCount); } -CUnivariateLinearScaleModel::TOptionalChangeDescription CUnivariateLinearScaleModel::change() const -{ - return SChangeDescription{SChangeDescription::E_LinearScale, - CBasicStatistics::mean(m_Scale), - this->residualModelPtr()}; +CUnivariateLinearScaleModel::TOptionalChangeDescription CUnivariateLinearScaleModel::change() const { + return SChangeDescription{SChangeDescription::E_LinearScale, CBasicStatistics::mean(m_Scale), this->residualModelPtr()}; } void CUnivariateLinearScaleModel::addSamples(const std::size_t count, TWeightStyleVec weightStyles, - const TTimeDoublePr1Vec &samples_, - TDouble4Vec1Vec weights) -{ - const CTimeSeriesDecompositionInterface &trendModel{this->trendModel()}; + const TTimeDoublePr1Vec& samples_, + TDouble4Vec1Vec weights) { + const CTimeSeriesDecompositionInterface& trendModel{this->trendModel()}; // We delay updating the log-likelihood because early on the // scale can change giving us a better apparent fit to the @@ -652,26 +534,22 @@ void CUnivariateLinearScaleModel::addSamples(const std::size_t count, // minimum to get empirically similar sum log-likelihood if // there is no change in the data. - for (std::size_t i = 0u; i < samples_.size(); ++i) - { + for (std::size_t i = 0u; i < samples_.size(); ++i) { core_t::TTime time{samples_[i].first}; double value{samples_[i].second - m_ResidualModelMode}; double prediction{CBasicStatistics::mean(trendModel.value(time, 0.0))}; double scale{std::fabs(value) / std::fabs(prediction)}; - m_Scale.add(value * prediction < 0.0 ? - MINIMUM_SCALE : CTools::truncate(scale, MINIMUM_SCALE, MAXIMUM_SCALE), + m_Scale.add(value * prediction < 0.0 ? MINIMUM_SCALE : CTools::truncate(scale, MINIMUM_SCALE, MAXIMUM_SCALE), std::fabs(prediction)); } - if (count >= COUNT_TO_INITIALIZE) - { - CPrior &residualModel{this->residualModel()}; + if (count >= COUNT_TO_INITIALIZE) { + CPrior& residualModel{this->residualModel()}; TDouble1Vec samples; samples.reserve(samples_.size()); double scale{CBasicStatistics::mean(m_Scale)}; - for (std::size_t i = 0u; i < samples_.size(); ++i) - { + for (std::size_t i = 0u; i < samples_.size(); ++i) { core_t::TTime time{samples_[i].first}; double value{samples_[i].second}; double seasonalScale{maths_t::seasonalVarianceScale(weightStyles, weights[i])}; @@ -686,8 +564,7 @@ void CUnivariateLinearScaleModel::addSamples(const std::size_t count, residualModel.addSamples(weightStyles, samples, weights); residualModel.propagateForwardsByTime(1.0); - for (auto &weight : weights) - { + for (auto& weight : weights) { maths_t::setWeight(maths_t::E_SampleWinsorisationWeight, 1.0, weightStyles, weight); } this->updateLogLikelihood(weightStyles, samples, weights); @@ -695,81 +572,63 @@ void CUnivariateLinearScaleModel::addSamples(const std::size_t count, } } -std::size_t CUnivariateLinearScaleModel::staticSize() const -{ +std::size_t CUnivariateLinearScaleModel::staticSize() const { return sizeof(*this); } -uint64_t CUnivariateLinearScaleModel::checksum(uint64_t seed) const -{ +uint64_t CUnivariateLinearScaleModel::checksum(uint64_t seed) const { seed = this->CUnivariateChangeModel::checksum(seed); seed = CChecksum::calculate(seed, m_Scale); return CChecksum::calculate(seed, m_SampleCount); } -CUnivariateTimeShiftModel::CUnivariateTimeShiftModel(const TDecompositionPtr &trendModel, - const TPriorPtr &residualModel, - core_t::TTime shift) : - CUnivariateChangeModel{trendModel, TPriorPtr{residualModel->clone()}}, - m_Shift{shift} -{} +CUnivariateTimeShiftModel::CUnivariateTimeShiftModel(const TDecompositionPtr& trendModel, + const TPriorPtr& residualModel, + core_t::TTime shift) + : CUnivariateChangeModel{trendModel, TPriorPtr{residualModel->clone()}}, m_Shift{shift} { +} -bool CUnivariateTimeShiftModel::acceptRestoreTraverser(const SModelRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser) -{ - if (this->CUnivariateChangeModel::acceptRestoreTraverser(params, traverser) == false) - { +bool CUnivariateTimeShiftModel::acceptRestoreTraverser(const SModelRestoreParams& params, core::CStateRestoreTraverser& traverser) { + if (this->CUnivariateChangeModel::acceptRestoreTraverser(params, traverser) == false) { return false; } - do - { + do { const std::string name{traverser.name()}; RESTORE(RESIDUAL_MODEL_TAG, this->restoreResidualModel(params.s_DistributionParams, traverser)) - } - while (traverser.next()); + } while (traverser.next()); return true; } -void CUnivariateTimeShiftModel::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CUnivariateTimeShiftModel::acceptPersistInserter(core::CStatePersistInserter& inserter) const { this->CUnivariateChangeModel::acceptPersistInserter(inserter); - inserter.insertLevel(RESIDUAL_MODEL_TAG, boost::bind(CPriorStateSerialiser(), - boost::cref(this->residualModel()), _1)); + inserter.insertLevel(RESIDUAL_MODEL_TAG, boost::bind(CPriorStateSerialiser(), boost::cref(this->residualModel()), _1)); } -double CUnivariateTimeShiftModel::bic() const -{ +double CUnivariateTimeShiftModel::bic() const { return -2.0 * this->logLikelihood(); } -double CUnivariateTimeShiftModel::expectedBic() const -{ +double CUnivariateTimeShiftModel::expectedBic() const { return -2.0 * this->expectedLogLikelihood(); } -TOptionalChangeDescription CUnivariateTimeShiftModel::change() const -{ - return SChangeDescription{SChangeDescription::E_TimeShift, - static_cast(m_Shift), - this->residualModelPtr()}; +TOptionalChangeDescription CUnivariateTimeShiftModel::change() const { + return SChangeDescription{SChangeDescription::E_TimeShift, static_cast(m_Shift), this->residualModelPtr()}; } void CUnivariateTimeShiftModel::addSamples(const std::size_t count, TWeightStyleVec weightStyles, - const TTimeDoublePr1Vec &samples_, - TDouble4Vec1Vec weights) -{ + const TTimeDoublePr1Vec& samples_, + TDouble4Vec1Vec weights) { // See, for example, CUnivariateLevelShiftModel::addSamples // for an explanation of the delay updating the log-likelihood. - if (count >= COUNT_TO_INITIALIZE) - { - CPrior &residualModel{this->residualModel()}; + if (count >= COUNT_TO_INITIALIZE) { + CPrior& residualModel{this->residualModel()}; TDouble1Vec samples; samples.reserve(samples_.size()); - for (std::size_t i = 0u; i < samples_.size(); ++i) - { + for (std::size_t i = 0u; i < samples_.size(); ++i) { core_t::TTime time{samples_[i].first}; double value{samples_[i].second}; double seasonalScale{maths_t::seasonalVarianceScale(weightStyles, weights[i])}; @@ -782,8 +641,7 @@ void CUnivariateTimeShiftModel::addSamples(const std::size_t count, residualModel.addSamples(weightStyles, samples, weights); residualModel.propagateForwardsByTime(1.0); - for (auto &weight : weights) - { + for (auto& weight : weights) { maths_t::setWeight(maths_t::E_SampleWinsorisationWeight, 1.0, weightStyles, weight); } this->updateLogLikelihood(weightStyles, samples, weights); @@ -791,18 +649,14 @@ void CUnivariateTimeShiftModel::addSamples(const std::size_t count, } } -std::size_t CUnivariateTimeShiftModel::staticSize() const -{ +std::size_t CUnivariateTimeShiftModel::staticSize() const { return sizeof(*this); } -uint64_t CUnivariateTimeShiftModel::checksum(uint64_t seed) const -{ +uint64_t CUnivariateTimeShiftModel::checksum(uint64_t seed) const { seed = this->CUnivariateChangeModel::checksum(seed); return CChecksum::calculate(seed, m_Shift); } - } - } } diff --git a/lib/maths/CTimeSeriesDecomposition.cc b/lib/maths/CTimeSeriesDecomposition.cc index 9031f277c8..684cfd90eb 100644 --- a/lib/maths/CTimeSeriesDecomposition.cc +++ b/lib/maths/CTimeSeriesDecomposition.cc @@ -8,9 +8,9 @@ #include #include -#include #include #include +#include #include #include @@ -35,19 +35,15 @@ #include #include -namespace ml -{ -namespace maths -{ -namespace -{ +namespace ml { +namespace maths { +namespace { using TDoubleDoublePr = maths_t::TDoubleDoublePr; using TVector2x1 = CVectorNx1; //! Convert a double pair to a 2x1 vector. -TVector2x1 vector2x1(const TDoubleDoublePr &p) -{ +TVector2x1 vector2x1(const TDoubleDoublePr& p) { TVector2x1 result; result(0) = p.first; result(1) = p.second; @@ -55,28 +51,21 @@ TVector2x1 vector2x1(const TDoubleDoublePr &p) } //! Convert a 2x1 vector to a double pair. -TDoubleDoublePr pair(const TVector2x1 &v) -{ +TDoubleDoublePr pair(const TVector2x1& v) { return {v(0), v(1)}; } //! Get the normal confidence interval. -TDoubleDoublePr confidenceInterval(double confidence, double variance) -{ - if (variance > 0.0) - { - try - { +TDoubleDoublePr confidenceInterval(double confidence, double variance) { + if (variance > 0.0) { + try { boost::math::normal normal(0.0, std::sqrt(variance)); double ql{boost::math::quantile(normal, (100.0 - confidence) / 200.0)}; double qu{boost::math::quantile(normal, (100.0 + confidence) / 200.0)}; return {ql, qu}; - } - catch (const std::exception &e) - { - LOG_ERROR("Failed calculating confidence interval: " << e.what() - << ", variance = " << variance - << ", confidence = " << confidence); + } catch (const std::exception& e) { + LOG_ERROR("Failed calculating confidence interval: " << e.what() << ", variance = " << variance + << ", confidence = " << confidence); } } return {0.0, 0.0}; @@ -98,95 +87,74 @@ const std::string COMPONENTS_OLD_TAG{"g"}; const std::string LAST_PROPAGATION_TIME_OLD_TAG{"h"}; const std::string EMPTY_STRING; - } -CTimeSeriesDecomposition::CTimeSeriesDecomposition(double decayRate, - core_t::TTime bucketLength, - std::size_t seasonalComponentSize) : - m_TimeShift{0}, - m_LastValueTime{0}, - m_LastPropagationTime{0}, - m_PeriodicityTest{decayRate, bucketLength}, - m_CalendarCyclicTest{decayRate, bucketLength}, - m_Components{decayRate, bucketLength, seasonalComponentSize} -{ +CTimeSeriesDecomposition::CTimeSeriesDecomposition(double decayRate, core_t::TTime bucketLength, std::size_t seasonalComponentSize) + : m_TimeShift{0}, + m_LastValueTime{0}, + m_LastPropagationTime{0}, + m_PeriodicityTest{decayRate, bucketLength}, + m_CalendarCyclicTest{decayRate, bucketLength}, + m_Components{decayRate, bucketLength, seasonalComponentSize} { this->initializeMediator(); } -CTimeSeriesDecomposition::CTimeSeriesDecomposition(const STimeSeriesDecompositionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser) : - m_TimeShift{0}, - m_LastValueTime{0}, - m_LastPropagationTime{0}, - m_PeriodicityTest{params.s_DecayRate, params.s_MinimumBucketLength}, - m_CalendarCyclicTest{params.s_DecayRate, params.s_MinimumBucketLength}, - m_Components{params.s_DecayRate, params.s_MinimumBucketLength, params.s_ComponentSize} -{ - traverser.traverseSubLevel(boost::bind(&CTimeSeriesDecomposition::acceptRestoreTraverser, - this, boost::cref(params.s_ChangeModelParams), _1)); +CTimeSeriesDecomposition::CTimeSeriesDecomposition(const STimeSeriesDecompositionRestoreParams& params, + core::CStateRestoreTraverser& traverser) + : m_TimeShift{0}, + m_LastValueTime{0}, + m_LastPropagationTime{0}, + m_PeriodicityTest{params.s_DecayRate, params.s_MinimumBucketLength}, + m_CalendarCyclicTest{params.s_DecayRate, params.s_MinimumBucketLength}, + m_Components{params.s_DecayRate, params.s_MinimumBucketLength, params.s_ComponentSize} { + traverser.traverseSubLevel( + boost::bind(&CTimeSeriesDecomposition::acceptRestoreTraverser, this, boost::cref(params.s_ChangeModelParams), _1)); this->initializeMediator(); } -CTimeSeriesDecomposition::CTimeSeriesDecomposition(const CTimeSeriesDecomposition &other, - bool isForForecast) : - m_TimeShift{other.m_TimeShift}, - m_LastValueTime{other.m_LastValueTime}, - m_LastPropagationTime{other.m_LastPropagationTime}, - m_PeriodicityTest{other.m_PeriodicityTest, isForForecast}, - m_CalendarCyclicTest{other.m_CalendarCyclicTest, isForForecast}, - m_Components{other.m_Components} -{ +CTimeSeriesDecomposition::CTimeSeriesDecomposition(const CTimeSeriesDecomposition& other, bool isForForecast) + : m_TimeShift{other.m_TimeShift}, + m_LastValueTime{other.m_LastValueTime}, + m_LastPropagationTime{other.m_LastPropagationTime}, + m_PeriodicityTest{other.m_PeriodicityTest, isForForecast}, + m_CalendarCyclicTest{other.m_CalendarCyclicTest, isForForecast}, + m_Components{other.m_Components} { this->initializeMediator(); } -bool CTimeSeriesDecomposition::acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser) -{ - if (traverser.name() == VERSION_6_3_TAG) - { - while (traverser.next()) - { - const std::string &name{traverser.name()}; +bool CTimeSeriesDecomposition::acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) { + if (traverser.name() == VERSION_6_3_TAG) { + while (traverser.next()) { + const std::string& name{traverser.name()}; RESTORE_BUILT_IN(TIME_SHIFT_6_3_TAG, m_TimeShift) RESTORE_BUILT_IN(LAST_VALUE_TIME_6_3_TAG, m_LastValueTime) RESTORE_BUILT_IN(LAST_PROPAGATION_TIME_6_3_TAG, m_LastPropagationTime) - RESTORE(PERIODICITY_TEST_6_3_TAG, traverser.traverseSubLevel( - boost::bind(&CPeriodicityTest::acceptRestoreTraverser, - &m_PeriodicityTest, _1))) - RESTORE(CALENDAR_CYCLIC_TEST_6_3_TAG, traverser.traverseSubLevel( - boost::bind(&CCalendarTest::acceptRestoreTraverser, - &m_CalendarCyclicTest, _1))) - RESTORE(COMPONENTS_6_3_TAG, traverser.traverseSubLevel( - boost::bind(&CComponents::acceptRestoreTraverser, - &m_Components, boost::cref(params), _1))) + RESTORE(PERIODICITY_TEST_6_3_TAG, + traverser.traverseSubLevel(boost::bind(&CPeriodicityTest::acceptRestoreTraverser, &m_PeriodicityTest, _1))) + RESTORE(CALENDAR_CYCLIC_TEST_6_3_TAG, + traverser.traverseSubLevel(boost::bind(&CCalendarTest::acceptRestoreTraverser, &m_CalendarCyclicTest, _1))) + RESTORE(COMPONENTS_6_3_TAG, + traverser.traverseSubLevel(boost::bind(&CComponents::acceptRestoreTraverser, &m_Components, boost::cref(params), _1))) } - } - else - { + } else { // There is no version string this is historic state. double decayRate{0.012}; - do - { - const std::string &name{traverser.name()}; + do { + const std::string& name{traverser.name()}; RESTORE_BUILT_IN(DECAY_RATE_OLD_TAG, decayRate) RESTORE_BUILT_IN(LAST_VALUE_TIME_OLD_TAG, m_LastValueTime) RESTORE_BUILT_IN(LAST_PROPAGATION_TIME_OLD_TAG, m_LastPropagationTime) - RESTORE(CALENDAR_CYCLIC_TEST_OLD_TAG, traverser.traverseSubLevel( - boost::bind(&CCalendarTest::acceptRestoreTraverser, - &m_CalendarCyclicTest, _1))) - RESTORE(COMPONENTS_OLD_TAG, traverser.traverseSubLevel( - boost::bind(&CComponents::acceptRestoreTraverser, - &m_Components, boost::cref(params), _1))) - } - while (traverser.next()); + RESTORE(CALENDAR_CYCLIC_TEST_OLD_TAG, + traverser.traverseSubLevel(boost::bind(&CCalendarTest::acceptRestoreTraverser, &m_CalendarCyclicTest, _1))) + RESTORE(COMPONENTS_OLD_TAG, + traverser.traverseSubLevel(boost::bind(&CComponents::acceptRestoreTraverser, &m_Components, boost::cref(params), _1))) + } while (traverser.next()); this->decayRate(decayRate); } return true; } -void CTimeSeriesDecomposition::swap(CTimeSeriesDecomposition &other) -{ +void CTimeSeriesDecomposition::swap(CTimeSeriesDecomposition& other) { std::swap(m_TimeShift, other.m_TimeShift); std::swap(m_LastValueTime, other.m_LastValueTime); std::swap(m_LastPropagationTime, other.m_LastPropagationTime); @@ -195,61 +163,49 @@ void CTimeSeriesDecomposition::swap(CTimeSeriesDecomposition &other) m_Components.swap(other.m_Components); } -CTimeSeriesDecomposition &CTimeSeriesDecomposition::operator=(const CTimeSeriesDecomposition &other) -{ - if (this != &other) - { +CTimeSeriesDecomposition& CTimeSeriesDecomposition::operator=(const CTimeSeriesDecomposition& other) { + if (this != &other) { CTimeSeriesDecomposition copy{other}; this->swap(copy); } return *this; } -void CTimeSeriesDecomposition::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CTimeSeriesDecomposition::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(VERSION_6_3_TAG, ""); inserter.insertValue(TIME_SHIFT_6_3_TAG, m_TimeShift); inserter.insertValue(LAST_VALUE_TIME_6_3_TAG, m_LastValueTime); inserter.insertValue(LAST_PROPAGATION_TIME_6_3_TAG, m_LastPropagationTime); - inserter.insertLevel(PERIODICITY_TEST_6_3_TAG, boost::bind(&CPeriodicityTest::acceptPersistInserter, - &m_PeriodicityTest, _1)); - inserter.insertLevel(CALENDAR_CYCLIC_TEST_6_3_TAG, boost::bind(&CCalendarTest::acceptPersistInserter, - &m_CalendarCyclicTest, _1)); - inserter.insertLevel(COMPONENTS_6_3_TAG, boost::bind(&CComponents::acceptPersistInserter, - &m_Components, _1)); + inserter.insertLevel(PERIODICITY_TEST_6_3_TAG, boost::bind(&CPeriodicityTest::acceptPersistInserter, &m_PeriodicityTest, _1)); + inserter.insertLevel(CALENDAR_CYCLIC_TEST_6_3_TAG, boost::bind(&CCalendarTest::acceptPersistInserter, &m_CalendarCyclicTest, _1)); + inserter.insertLevel(COMPONENTS_6_3_TAG, boost::bind(&CComponents::acceptPersistInserter, &m_Components, _1)); } -CTimeSeriesDecomposition *CTimeSeriesDecomposition::clone(bool isForForecast) const -{ +CTimeSeriesDecomposition* CTimeSeriesDecomposition::clone(bool isForForecast) const { return new CTimeSeriesDecomposition{*this, isForForecast}; } -void CTimeSeriesDecomposition::dataType(maths_t::EDataType dataType) -{ +void CTimeSeriesDecomposition::dataType(maths_t::EDataType dataType) { m_Components.dataType(dataType); } -void CTimeSeriesDecomposition::decayRate(double decayRate) -{ +void CTimeSeriesDecomposition::decayRate(double decayRate) { // Periodic component tests use a fixed decay rate. m_Components.decayRate(decayRate); } -double CTimeSeriesDecomposition::decayRate() const -{ +double CTimeSeriesDecomposition::decayRate() const { return m_Components.decayRate(); } -bool CTimeSeriesDecomposition::initialized() const -{ +bool CTimeSeriesDecomposition::initialized() const { return m_Components.initialized(); } bool CTimeSeriesDecomposition::addPoint(core_t::TTime time, double value, - const maths_t::TWeightStyleVec &weightStyles, - const maths_t::TDouble4Vec &weights) -{ + const maths_t::TWeightStyleVec& weightStyles, + const maths_t::TDouble4Vec& weights) { CComponents::CScopeNotifyOnStateChange result{m_Components}; time += m_TimeShift; @@ -259,14 +215,15 @@ bool CTimeSeriesDecomposition::addPoint(core_t::TTime time, m_LastValueTime = std::max(m_LastValueTime, time); this->propagateForwardsTo(time); - SAddValue message{time, lastTime, value, weightStyles, weights, + SAddValue message{time, + lastTime, + value, + weightStyles, + weights, CBasicStatistics::mean(this->value(time, 0.0, E_TrendForced)), CBasicStatistics::mean(this->value(time, 0.0, E_Seasonal)), CBasicStatistics::mean(this->value(time, 0.0, E_Calendar)), - [this](core_t::TTime time_) - { - return CBasicStatistics::mean(this->value(time_, 0.0, E_Seasonal | E_Calendar)); - }, + [this](core_t::TTime time_) { return CBasicStatistics::mean(this->value(time_, 0.0, E_Seasonal | E_Calendar)); }, m_Components.periodicityTestConfig()}; m_Components.handle(message); @@ -276,15 +233,11 @@ bool CTimeSeriesDecomposition::addPoint(core_t::TTime time, return result.changed(); } -bool CTimeSeriesDecomposition::applyChange(core_t::TTime time, - double value, - const SChangeDescription &change) -{ +bool CTimeSeriesDecomposition::applyChange(core_t::TTime time, double value, const SChangeDescription& change) { bool result{m_Components.usingTrendForPrediction() == false}; m_Components.useTrendForPrediction(); - switch (change.s_Description) - { + switch (change.s_Description) { case SChangeDescription::E_LevelShift: m_Components.shiftLevel(time, value, change.s_Value[0]); break; @@ -299,10 +252,8 @@ bool CTimeSeriesDecomposition::applyChange(core_t::TTime time, return result; } -void CTimeSeriesDecomposition::propagateForwardsTo(core_t::TTime time) -{ - if (time > m_LastPropagationTime) - { +void CTimeSeriesDecomposition::propagateForwardsTo(core_t::TTime time) { + if (time > m_LastPropagationTime) { m_PeriodicityTest.propagateForwards(m_LastPropagationTime, time); m_CalendarCyclicTest.propagateForwards(m_LastPropagationTime, time); m_Components.propagateForwards(m_LastPropagationTime, time); @@ -310,60 +261,44 @@ void CTimeSeriesDecomposition::propagateForwardsTo(core_t::TTime time) m_LastPropagationTime = std::max(m_LastPropagationTime, time); } -double CTimeSeriesDecomposition::meanValue(core_t::TTime time) const -{ +double CTimeSeriesDecomposition::meanValue(core_t::TTime time) const { return m_Components.meanValue(time); } -TDoubleDoublePr CTimeSeriesDecomposition::value(core_t::TTime time, - double confidence, - int components, - bool smooth) const -{ +TDoubleDoublePr CTimeSeriesDecomposition::value(core_t::TTime time, double confidence, int components, bool smooth) const { TVector2x1 baseline{0.0}; time += m_TimeShift; - if (components & E_TrendForced) - { + if (components & E_TrendForced) { baseline += vector2x1(m_Components.trend().value(time, confidence)); - } - else if (components & E_Trend) - { - if (m_Components.usingTrendForPrediction()) - { + } else if (components & E_Trend) { + if (m_Components.usingTrendForPrediction()) { baseline += vector2x1(m_Components.trend().value(time, confidence)); } } - if (components & E_Seasonal) - { - for (const auto &component : m_Components.seasonal()) - { - if (this->selected(time, components, component)) - { + if (components & E_Seasonal) { + for (const auto& component : m_Components.seasonal()) { + if (this->selected(time, components, component)) { baseline += vector2x1(component.value(time, confidence)); } } } - if (components & E_Calendar) - { - for (const auto &component : m_Components.calendar()) - { - if (component.initialized() && component.feature().inWindow(time)) - { + if (components & E_Calendar) { + for (const auto& component : m_Components.calendar()) { + if (component.initialized() && component.feature().inWindow(time)) { baseline += vector2x1(component.value(time, confidence)); } } } - if (smooth) - { - baseline += vector2x1(this->smooth( - boost::bind(&CTimeSeriesDecomposition::value, - this, _1, confidence, components & E_Seasonal, false), - time - m_TimeShift, components)); + if (smooth) { + baseline += + vector2x1(this->smooth(boost::bind(&CTimeSeriesDecomposition::value, this, _1, confidence, components & E_Seasonal, false), + time - m_TimeShift, + components)); } return pair(baseline); @@ -374,95 +309,70 @@ void CTimeSeriesDecomposition::forecast(core_t::TTime startTime, core_t::TTime step, double confidence, double minimumScale, - const TWriteForecastResult &writer) -{ - if (endTime < startTime) - { + const TWriteForecastResult& writer) { + if (endTime < startTime) { LOG_ERROR("Bad forecast range: [" << startTime << "," << endTime << "]"); return; } - if (confidence < 0.0 || confidence >= 100.0) - { + if (confidence < 0.0 || confidence >= 100.0) { LOG_ERROR("Bad confidence interval: " << confidence << "%"); return; } - auto seasonal = [this, confidence](core_t::TTime time) - { - TVector2x1 prediction(0.0); - for (const auto &component : m_Components.seasonal()) - { - if (component.initialized() && component.time().inWindow(time)) - { - prediction += vector2x1(component.value(time, confidence)); - } + auto seasonal = [this, confidence](core_t::TTime time) { + TVector2x1 prediction(0.0); + for (const auto& component : m_Components.seasonal()) { + if (component.initialized() && component.time().inWindow(time)) { + prediction += vector2x1(component.value(time, confidence)); } - for (const auto &component : m_Components.calendar()) - { - if (component.initialized() && component.feature().inWindow(time)) - { - prediction += vector2x1(component.value(time, confidence)); - } + } + for (const auto& component : m_Components.calendar()) { + if (component.initialized() && component.feature().inWindow(time)) { + prediction += vector2x1(component.value(time, confidence)); } - return pair(prediction); - }; + } + return pair(prediction); + }; startTime += m_TimeShift; - endTime += m_TimeShift; - endTime = startTime + CIntegerTools::ceil(endTime - startTime, step); + endTime += m_TimeShift; + endTime = startTime + CIntegerTools::ceil(endTime - startTime, step); double trendVariance{CBasicStatistics::mean(m_Components.trend().variance(0.0))}; double seasonalVariance{m_Components.meanVariance() - trendVariance}; double variance{this->meanVariance()}; - double scale0{std::sqrt(std::max(CBasicStatistics::mean( - this->scale(startTime, variance, 0.0)), minimumScale))}; + double scale0{std::sqrt(std::max(CBasicStatistics::mean(this->scale(startTime, variance, 0.0)), minimumScale))}; TVector2x1 i0{vector2x1(confidenceInterval(confidence, seasonalVariance))}; - auto forecastSeasonal = [&](core_t::TTime time) - { - m_Components.interpolateForForecast(time); - double scale{std::sqrt(std::max(CBasicStatistics::mean( - this->scale(time, variance, 0.0)), minimumScale))}; - TVector2x1 prediction{ vector2x1(seasonal(time)) - + vector2x1(this->smooth(seasonal, time, E_Seasonal)) - + (scale - scale0) * i0}; - return TDouble3Vec{prediction(0), (prediction(0) + prediction(1)) / 2.0, prediction(1)}; - }; + auto forecastSeasonal = [&](core_t::TTime time) { + m_Components.interpolateForForecast(time); + double scale{std::sqrt(std::max(CBasicStatistics::mean(this->scale(time, variance, 0.0)), minimumScale))}; + TVector2x1 prediction{vector2x1(seasonal(time)) + vector2x1(this->smooth(seasonal, time, E_Seasonal)) + (scale - scale0) * i0}; + return TDouble3Vec{prediction(0), (prediction(0) + prediction(1)) / 2.0, prediction(1)}; + }; m_Components.trend().forecast(startTime, endTime, step, confidence, forecastSeasonal, writer); } -double CTimeSeriesDecomposition::detrend(core_t::TTime time, - double value, - double confidence, - int components) const -{ - if (!this->initialized()) - { +double CTimeSeriesDecomposition::detrend(core_t::TTime time, double value, double confidence, int components) const { + if (!this->initialized()) { return value; } TDoubleDoublePr interval{this->value(time, confidence, components)}; return std::min(value - interval.first, 0.0) + std::max(value - interval.second, 0.0); } -double CTimeSeriesDecomposition::meanVariance() const -{ +double CTimeSeriesDecomposition::meanVariance() const { return m_Components.meanVarianceScale() * m_Components.meanVariance(); } -TDoubleDoublePr CTimeSeriesDecomposition::scale(core_t::TTime time, - double variance, - double confidence, - bool smooth) const -{ - if (!this->initialized()) - { +TDoubleDoublePr CTimeSeriesDecomposition::scale(core_t::TTime time, double variance, double confidence, bool smooth) const { + if (!this->initialized()) { return {1.0, 1.0}; } double mean{this->meanVariance()}; - if (mean == 0.0) - { + if (mean == 0.0) { return {1.0, 1.0}; } @@ -470,58 +380,45 @@ TDoubleDoublePr CTimeSeriesDecomposition::scale(core_t::TTime time, double components{0.0}; TVector2x1 scale(0.0); - if (m_Components.usingTrendForPrediction()) - { + if (m_Components.usingTrendForPrediction()) { scale += vector2x1(m_Components.trend().variance(confidence)); } - for (const auto &component : m_Components.seasonal()) - { - if (component.initialized() && component.time().inWindow(time)) - { + for (const auto& component : m_Components.seasonal()) { + if (component.initialized() && component.time().inWindow(time)) { scale += vector2x1(component.variance(time, confidence)); components += 1.0; } } - for (const auto &component : m_Components.calendar()) - { - if (component.initialized() && component.feature().inWindow(time)) - { + for (const auto& component : m_Components.calendar()) { + if (component.initialized() && component.feature().inWindow(time)) { scale += vector2x1(component.variance(time, confidence)); components += 1.0; } } double bias{std::min(2.0 * mean / variance, 1.0)}; - if (m_Components.usingTrendForPrediction()) - { + if (m_Components.usingTrendForPrediction()) { bias *= (components + 1.0) / std::max(components, 1.0); } - LOG_TRACE("mean = " << mean - << " variance = " << variance - << " bias = " << bias - << " scale = " << core::CContainerPrinter::print(scale)); + LOG_TRACE("mean = " << mean << " variance = " << variance << " bias = " << bias + << " scale = " << core::CContainerPrinter::print(scale)); scale *= m_Components.meanVarianceScale() / mean; - scale = TVector2x1{1.0} + bias * (scale - TVector2x1{1.0}); + scale = TVector2x1{1.0} + bias * (scale - TVector2x1{1.0}); - if (smooth) - { - scale += vector2x1(this->smooth( - boost::bind(&CTimeSeriesDecomposition::scale, - this, _1, variance, confidence, false), time, E_All)); + if (smooth) { + scale += vector2x1(this->smooth(boost::bind(&CTimeSeriesDecomposition::scale, this, _1, variance, confidence, false), time, E_All)); } return pair(scale); } -void CTimeSeriesDecomposition::skipTime(core_t::TTime skipInterval) -{ +void CTimeSeriesDecomposition::skipTime(core_t::TTime skipInterval) { m_LastValueTime += skipInterval; m_LastPropagationTime += skipInterval; } -uint64_t CTimeSeriesDecomposition::checksum(uint64_t seed) const -{ +uint64_t CTimeSeriesDecomposition::checksum(uint64_t seed) const { seed = CChecksum::calculate(seed, m_LastValueTime); seed = CChecksum::calculate(seed, m_LastPropagationTime); seed = CChecksum::calculate(seed, m_PeriodicityTest); @@ -529,8 +426,7 @@ uint64_t CTimeSeriesDecomposition::checksum(uint64_t seed) const return CChecksum::calculate(seed, m_Components); } -void CTimeSeriesDecomposition::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CTimeSeriesDecomposition::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CTimeSeriesDecomposition"); core::CMemoryDebug::dynamicSize("m_Mediator", m_Mediator, mem); core::CMemoryDebug::dynamicSize("m_PeriodicityTest", m_PeriodicityTest, mem); @@ -538,31 +434,24 @@ void CTimeSeriesDecomposition::debugMemoryUsage(core::CMemoryUsage::TMemoryUsage core::CMemoryDebug::dynamicSize("m_Components", m_Components, mem); } -std::size_t CTimeSeriesDecomposition::memoryUsage() const -{ - return core::CMemory::dynamicSize(m_Mediator) - + core::CMemory::dynamicSize(m_PeriodicityTest) - + core::CMemory::dynamicSize(m_CalendarCyclicTest) - + core::CMemory::dynamicSize(m_Components); +std::size_t CTimeSeriesDecomposition::memoryUsage() const { + return core::CMemory::dynamicSize(m_Mediator) + core::CMemory::dynamicSize(m_PeriodicityTest) + + core::CMemory::dynamicSize(m_CalendarCyclicTest) + core::CMemory::dynamicSize(m_Components); } -std::size_t CTimeSeriesDecomposition::staticSize() const -{ +std::size_t CTimeSeriesDecomposition::staticSize() const { return sizeof(*this); } -core_t::TTime CTimeSeriesDecomposition::timeShift() const -{ +core_t::TTime CTimeSeriesDecomposition::timeShift() const { return m_TimeShift; } -const maths_t::TSeasonalComponentVec &CTimeSeriesDecomposition::seasonalComponents() const -{ +const maths_t::TSeasonalComponentVec& CTimeSeriesDecomposition::seasonalComponents() const { return m_Components.seasonal(); } -void CTimeSeriesDecomposition::initializeMediator() -{ +void CTimeSeriesDecomposition::initializeMediator() { m_Mediator = boost::make_shared(); m_Mediator->registerHandler(m_PeriodicityTest); m_Mediator->registerHandler(m_CalendarCyclicTest); @@ -570,45 +459,31 @@ void CTimeSeriesDecomposition::initializeMediator() } template -TDoubleDoublePr CTimeSeriesDecomposition::smooth(const F &f, - core_t::TTime time, - int components) const -{ - auto offset = [&f, time](core_t::TTime discontinuity) - { - TVector2x1 baselineMinusEps{vector2x1(f(discontinuity - 1))}; - TVector2x1 baselinePlusEps{ vector2x1(f(discontinuity + 1))}; - return 0.5 * (1.0 - static_cast(std::abs(time - discontinuity)) - / static_cast(SMOOTHING_INTERVAL)) - * (baselinePlusEps - baselineMinusEps); - }; - - for (const auto &component : m_Components.seasonal()) - { - if ( !component.initialized() - || !this->matches(components, component) - || component.time().windowRepeat() <= SMOOTHING_INTERVAL) - { +TDoubleDoublePr CTimeSeriesDecomposition::smooth(const F& f, core_t::TTime time, int components) const { + auto offset = [&f, time](core_t::TTime discontinuity) { + TVector2x1 baselineMinusEps{vector2x1(f(discontinuity - 1))}; + TVector2x1 baselinePlusEps{vector2x1(f(discontinuity + 1))}; + return 0.5 * (1.0 - static_cast(std::abs(time - discontinuity)) / static_cast(SMOOTHING_INTERVAL)) * + (baselinePlusEps - baselineMinusEps); + }; + + for (const auto& component : m_Components.seasonal()) { + if (!component.initialized() || !this->matches(components, component) || component.time().windowRepeat() <= SMOOTHING_INTERVAL) { continue; } - const CSeasonalTime ×{component.time()}; + const CSeasonalTime& times{component.time()}; bool timeInWindow{times.inWindow(time)}; bool inWindowBefore{times.inWindow(time - SMOOTHING_INTERVAL)}; bool inWindowAfter{times.inWindow(time + SMOOTHING_INTERVAL)}; - if ( (!timeInWindow && inWindowBefore) - || (timeInWindow && inWindowBefore && times.startOfWindow(time) != - times.startOfWindow(time + SMOOTHING_INTERVAL))) - { - core_t::TTime discontinuity{ times.startOfWindow(time - SMOOTHING_INTERVAL) - + times.windowLength()}; + if ((!timeInWindow && inWindowBefore) || + (timeInWindow && inWindowBefore && times.startOfWindow(time) != times.startOfWindow(time + SMOOTHING_INTERVAL))) { + core_t::TTime discontinuity{times.startOfWindow(time - SMOOTHING_INTERVAL) + times.windowLength()}; return pair(-offset(discontinuity)); } - if ( (!timeInWindow && inWindowAfter) - || (timeInWindow && inWindowAfter && times.startOfWindow(time) != - times.startOfWindow(time + SMOOTHING_INTERVAL))) - { + if ((!timeInWindow && inWindowAfter) || + (timeInWindow && inWindowAfter && times.startOfWindow(time) != times.startOfWindow(time + SMOOTHING_INTERVAL))) { core_t::TTime discontinuity{component.time().startOfWindow(time + SMOOTHING_INTERVAL)}; return pair(offset(discontinuity)); } @@ -617,21 +492,13 @@ TDoubleDoublePr CTimeSeriesDecomposition::smooth(const F &f, return {0.0, 0.0}; } -bool CTimeSeriesDecomposition::selected(core_t::TTime time, - int components, - const CSeasonalComponent &component) const -{ - return component.initialized() - && this->matches(components, component) - && component.time().inWindow(time); +bool CTimeSeriesDecomposition::selected(core_t::TTime time, int components, const CSeasonalComponent& component) const { + return component.initialized() && this->matches(components, component) && component.time().inWindow(time); } -bool CTimeSeriesDecomposition::matches(int components, - const CSeasonalComponent &component) const -{ +bool CTimeSeriesDecomposition::matches(int components, const CSeasonalComponent& component) const { int seasonal{components & E_Seasonal}; - if (seasonal == E_Seasonal) - { + if (seasonal == E_Seasonal) { return true; } core_t::TTime period{component.time().period()}; @@ -639,12 +506,10 @@ bool CTimeSeriesDecomposition::matches(int components, return (seasonal == E_Diurnal && diurnal) || (seasonal == E_NonDiurnal && !diurnal); } -core_t::TTime CTimeSeriesDecomposition::lastValueTime() const -{ +core_t::TTime CTimeSeriesDecomposition::lastValueTime() const { return m_LastValueTime; } const core_t::TTime CTimeSeriesDecomposition::SMOOTHING_INTERVAL{7200}; - } } diff --git a/lib/maths/CTimeSeriesDecompositionDetail.cc b/lib/maths/CTimeSeriesDecompositionDetail.cc index 04007b6454..931510fc72 100644 --- a/lib/maths/CTimeSeriesDecompositionDetail.cc +++ b/lib/maths/CTimeSeriesDecompositionDetail.cc @@ -9,11 +9,11 @@ #include #include #include -#include #include #include #include #include +#include #include #include @@ -33,7 +33,6 @@ #include #include -#include #include #include #include @@ -48,12 +47,9 @@ #include #include -namespace ml -{ -namespace maths -{ -namespace -{ +namespace ml { +namespace maths { +namespace { using TDoubleDoublePr = maths_t::TDoubleDoublePr; using TSeasonalComponentVec = maths_t::TSeasonalComponentVec; @@ -72,20 +68,18 @@ using TComponent5Vec = CPeriodicityHypothesisTestsResult::TComponent5Vec; using TSeasonalComponentPtrVec = std::vector; using TCalendarComponentPtrVec = std::vector; -const core_t::TTime DAY = core::constants::DAY; -const core_t::TTime WEEK = core::constants::WEEK; +const core_t::TTime DAY = core::constants::DAY; +const core_t::TTime WEEK = core::constants::WEEK; const core_t::TTime MONTH = 4 * WEEK; //! Get the square of \p x. -double pow2(double x) -{ +double pow2(double x) { return x * x; } //! Compute the mean of \p mean of \p components. template -double meanOf(MEAN_FUNCTION mean, const TSeasonalComponentVec &components) -{ +double meanOf(MEAN_FUNCTION mean, const TSeasonalComponentVec& components) { // We can choose to partition the trend model into windows. // In particular, we check for the presence of weekday/end // patterns. In this function we want to compute the sum of @@ -99,27 +93,20 @@ double meanOf(MEAN_FUNCTION mean, const TSeasonalComponentVec &components) double unwindowed{0.0}; TTimeTimePrDoubleFMap windows; windows.reserve(components.size()); - for (const auto &component : components) - { - if (component.initialized()) - { + for (const auto& component : components) { + if (component.initialized()) { TTimeTimePr window{component.time().window()}; - if (window.second - window.first == component.time().windowRepeat()) - { + if (window.second - window.first == component.time().windowRepeat()) { unwindowed += (component.*mean)(); - } - else - { + } else { windows[window] += (component.*mean)(); } } } TMeanAccumulator windowed; - for (const auto &window : windows) - { - double weight{static_cast( - window.first.second - window.first.first)}; + for (const auto& window : windows) { + double weight{static_cast(window.first.second - window.first.first)}; windowed.add(window.second, weight); } @@ -140,30 +127,27 @@ double meanOf(MEAN_FUNCTION mean, const TSeasonalComponentVec &components) //! \param[out] predictions Filled in with the component predictions. //! \param[out] error Filled in with the prediction error. //! \param[out] scale Filled in with the normalization scaling. -void decompose(const CTrendComponent &trend, - const TSeasonalComponentPtrVec &seasonal, - const TCalendarComponentPtrVec &calendar, +void decompose(const CTrendComponent& trend, + const TSeasonalComponentPtrVec& seasonal, + const TCalendarComponentPtrVec& calendar, core_t::TTime time, - const TDoubleVec &deltas, - TDoubleVec &decomposition, - TDoubleVec &predictions, - double &error, - double &scale) -{ + const TDoubleVec& deltas, + TDoubleVec& decomposition, + TDoubleVec& predictions, + double& error, + double& scale) { std::size_t m{seasonal.size()}; std::size_t n{calendar.size()}; double x0{CBasicStatistics::mean(trend.value(time, 0.0))}; TDoubleVec x(m + n); double xhat{x0}; - for (std::size_t i = 0u; i < m; ++i) - { - x[i] = CBasicStatistics::mean(seasonal[i]->value(time, 0.0)); + for (std::size_t i = 0u; i < m; ++i) { + x[i] = CBasicStatistics::mean(seasonal[i]->value(time, 0.0)); xhat += x[i]; } - for (std::size_t i = m; i < m + n; ++i) - { - x[i] = CBasicStatistics::mean(calendar[i - m]->value(time, 0.0)); + for (std::size_t i = m; i < m + n; ++i) { + x[i] = CBasicStatistics::mean(calendar[i - m]->value(time, 0.0)); xhat += x[i]; } @@ -182,13 +166,11 @@ void decompose(const CTrendComponent &trend, error = decomposition[0] - xhat; decomposition[0] = x0 + (decomposition[0] - xhat) / Z; - for (std::size_t i = 0u; i < m; ++i) - { + for (std::size_t i = 0u; i < m; ++i) { predictions[i] = x[i] - seasonal[i]->meanValue(); decomposition[i + 1] = x[i] + (decomposition[i + 1] - xhat) / Z + deltas[i]; } - for (std::size_t i = m; i < m + n; ++i) - { + for (std::size_t i = m; i < m + n; ++i) { predictions[i] = x[i] - calendar[i - m]->meanValue(); decomposition[i + 1] = x[i] + (decomposition[i + 1] - xhat) / Z; } @@ -204,17 +186,11 @@ void decompose(const CTrendComponent &trend, //! Propagate a test forwards to account for \p end - \p start //! elapsed time in steps or size \p step. template -void stepwisePropagateForwards(core_t::TTime step, - core_t::TTime start, - core_t::TTime end, - const T &target) -{ - if (target) - { +void stepwisePropagateForwards(core_t::TTime step, core_t::TTime start, core_t::TTime end, const T& target) { + if (target) { start = CIntegerTools::floor(start, step); - end = CIntegerTools::floor(end, step); - if (end > start) - { + end = CIntegerTools::floor(end, step); + if (end > start) { double time{static_cast(end - start) / static_cast(step)}; target->propagateForwardsByTime(time); } @@ -222,18 +198,13 @@ void stepwisePropagateForwards(core_t::TTime step, } //! Apply the common shift to the slope of \p trend. -void shiftSlope(const TTimeTimePrDoubleFMap &slopes, - double decayRate, - CTrendComponent &trend) -{ +void shiftSlope(const TTimeTimePrDoubleFMap& slopes, double decayRate, CTrendComponent& trend) { CBasicStatistics::CMinMax minmax; - for (const auto &slope : slopes) - { + for (const auto& slope : slopes) { minmax.add(slope.second); } double shift{minmax.signMargin()}; - if (shift != 0.0) - { + if (shift != 0.0) { trend.shiftSlope(decayRate, shift); } } @@ -241,61 +212,52 @@ void shiftSlope(const TTimeTimePrDoubleFMap &slopes, // Periodicity Test State Machine // States -const std::size_t PT_INITIAL = 0; -const std::size_t PT_TEST = 1; +const std::size_t PT_INITIAL = 0; +const std::size_t PT_TEST = 1; const std::size_t PT_NOT_TESTING = 2; -const std::size_t PT_ERROR = 3; -const TStrVec PT_STATES{"INITIAL", "TEST", "NOT_TESTING", "ERROR" }; +const std::size_t PT_ERROR = 3; +const TStrVec PT_STATES{"INITIAL", "TEST", "NOT_TESTING", "ERROR"}; // Alphabet const std::size_t PT_NEW_VALUE = 0; -const std::size_t PT_RESET = 1; +const std::size_t PT_RESET = 1; const TStrVec PT_ALPHABET{"NEW_VALUE", "RESET"}; // Transition Function -const TSizeVecVec PT_TRANSITION_FUNCTION - { - TSizeVec{PT_TEST, PT_TEST, PT_NOT_TESTING, PT_ERROR }, - TSizeVec{PT_INITIAL, PT_INITIAL, PT_NOT_TESTING, PT_INITIAL} - }; +const TSizeVecVec PT_TRANSITION_FUNCTION{TSizeVec{PT_TEST, PT_TEST, PT_NOT_TESTING, PT_ERROR}, + TSizeVec{PT_INITIAL, PT_INITIAL, PT_NOT_TESTING, PT_INITIAL}}; // Calendar Cyclic Test State Machine // States -const std::size_t CC_INITIAL = 0; -const std::size_t CC_TEST = 1; +const std::size_t CC_INITIAL = 0; +const std::size_t CC_TEST = 1; const std::size_t CC_NOT_TESTING = 2; -const std::size_t CC_ERROR = 3; +const std::size_t CC_ERROR = 3; const TStrVec CC_STATES{"INITIAL", "TEST", "NOT_TESTING", "ERROR"}; // Alphabet const std::size_t CC_NEW_VALUE = 0; -const std::size_t CC_RESET = 1; +const std::size_t CC_RESET = 1; const TStrVec CC_ALPHABET{"NEW_VALUE", "RESET"}; // Transition Function -const TSizeVecVec CC_TRANSITION_FUNCTION - { - TSizeVec{CC_TEST, CC_TEST, CC_NOT_TESTING, CC_ERROR }, - TSizeVec{CC_INITIAL, CC_INITIAL, CC_NOT_TESTING, CC_INITIAL} - }; +const TSizeVecVec CC_TRANSITION_FUNCTION{TSizeVec{CC_TEST, CC_TEST, CC_NOT_TESTING, CC_ERROR}, + TSizeVec{CC_INITIAL, CC_INITIAL, CC_NOT_TESTING, CC_INITIAL}}; // Components State Machine // States const std::size_t SC_NEW_COMPONENTS = 0; -const std::size_t SC_NORMAL = 1; -const std::size_t SC_DISABLED = 2; -const std::size_t SC_ERROR = 3; +const std::size_t SC_NORMAL = 1; +const std::size_t SC_DISABLED = 2; +const std::size_t SC_ERROR = 3; const TStrVec SC_STATES{"NEW_COMPONENTS", "NORMAL", "DISABLED", "ERROR"}; // Alphabet const std::size_t SC_ADDED_COMPONENTS = 0; -const std::size_t SC_INTERPOLATED = 1; -const std::size_t SC_RESET = 2; +const std::size_t SC_INTERPOLATED = 1; +const std::size_t SC_RESET = 2; const TStrVec SC_ALPHABET{"ADDED_COMPONENTS", "INTERPOLATED", "RESET"}; // Transition Function -const TSizeVecVec SC_TRANSITION_FUNCTION - { - TSizeVec{SC_NEW_COMPONENTS, SC_NEW_COMPONENTS, SC_DISABLED, SC_ERROR }, - TSizeVec{SC_NORMAL, SC_NORMAL, SC_DISABLED, SC_ERROR }, - TSizeVec{SC_NORMAL, SC_NORMAL, SC_NORMAL, SC_NORMAL} - }; +const TSizeVecVec SC_TRANSITION_FUNCTION{TSizeVec{SC_NEW_COMPONENTS, SC_NEW_COMPONENTS, SC_DISABLED, SC_ERROR}, + TSizeVec{SC_NORMAL, SC_NORMAL, SC_DISABLED, SC_ERROR}, + TSizeVec{SC_NORMAL, SC_NORMAL, SC_NORMAL, SC_NORMAL}}; const std::string VERSION_6_3_TAG("6.3"); @@ -342,40 +304,29 @@ const std::string LAST_UPDATE_OLD_TAG{"j"}; const double MODEL_WEIGHT_UPGRADING_TO_VERSION_6p3{48.0}; -bool upgradeTrendModelToVersion6p3(const core_t::TTime bucketLength, - CTrendComponent &trend, - core::CStateRestoreTraverser &traverser) -{ +bool upgradeTrendModelToVersion6p3(const core_t::TTime bucketLength, CTrendComponent& trend, core::CStateRestoreTraverser& traverser) { using TRegression = CRegression::CLeastSquaresOnline<3, double>; TRegression regression; double variance{0.0}; core_t::TTime origin{0}; core_t::TTime lastUpdate{0}; - do - { - const std::string &name{traverser.name()}; - RESTORE(REGRESSION_OLD_TAG, traverser.traverseSubLevel(boost::bind( - &TRegression::acceptRestoreTraverser, ®ression, _1))) + do { + const std::string& name{traverser.name()}; + RESTORE(REGRESSION_OLD_TAG, traverser.traverseSubLevel(boost::bind(&TRegression::acceptRestoreTraverser, ®ression, _1))) RESTORE_BUILT_IN(VARIANCE_OLD_TAG, variance) RESTORE_BUILT_IN(TIME_ORIGIN_OLD_TAG, origin) RESTORE_BUILT_IN(LAST_UPDATE_OLD_TAG, lastUpdate) - } - while (traverser.next()); + } while (traverser.next()); // Generate some samples from the old trend model. - double weight{MODEL_WEIGHT_UPGRADING_TO_VERSION_6p3 * static_cast(bucketLength) - / static_cast(4 * WEEK)}; + double weight{MODEL_WEIGHT_UPGRADING_TO_VERSION_6p3 * static_cast(bucketLength) / static_cast(4 * WEEK)}; CPRNG::CXorOShiro128Plus rng; - for (core_t::TTime time = lastUpdate - 4 * WEEK; - time < lastUpdate; - time += bucketLength) - { + for (core_t::TTime time = lastUpdate - 4 * WEEK; time < lastUpdate; time += bucketLength) { double time_{static_cast(time - origin) / static_cast(WEEK)}; - double sample{ regression.predict(time_) - + CSampling::normalSample(rng, 0.0, variance)}; + double sample{regression.predict(time_) + CSampling::normalSample(rng, 0.0, variance)}; trend.add(time, sample, weight); } @@ -389,187 +340,159 @@ const core_t::TTime FOREVER{boost::numeric::bounds::highest()}; const std::size_t MAXIMUM_COMPONENTS{8}; const TSeasonalComponentVec NO_SEASONAL_COMPONENTS; const TCalendarComponentVec NO_CALENDAR_COMPONENTS; - } //////// SMessage //////// -CTimeSeriesDecompositionDetail::SMessage::SMessage(core_t::TTime time, core_t::TTime lastTime) : - s_Time{time}, s_LastTime{lastTime} -{} +CTimeSeriesDecompositionDetail::SMessage::SMessage(core_t::TTime time, core_t::TTime lastTime) : s_Time{time}, s_LastTime{lastTime} { +} //////// SAddValue //////// CTimeSeriesDecompositionDetail::SAddValue::SAddValue(core_t::TTime time, core_t::TTime lastTime, double value, - const maths_t::TWeightStyleVec &weightStyles, - const maths_t::TDouble4Vec &weights, + const maths_t::TWeightStyleVec& weightStyles, + const maths_t::TDouble4Vec& weights, double trend, double seasonal, double calendar, - const TPredictor &predictor, - const CPeriodicityHypothesisTestsConfig &periodicityTestConfig) : - SMessage{time, lastTime}, - s_Value{value}, - s_WeightStyles{weightStyles}, - s_Weights{weights}, - s_Trend{trend}, - s_Seasonal{seasonal}, - s_Calendar{calendar}, - s_Predictor{predictor}, - s_PeriodicityTestConfig{periodicityTestConfig} -{} + const TPredictor& predictor, + const CPeriodicityHypothesisTestsConfig& periodicityTestConfig) + : SMessage{time, lastTime}, + s_Value{value}, + s_WeightStyles{weightStyles}, + s_Weights{weights}, + s_Trend{trend}, + s_Seasonal{seasonal}, + s_Calendar{calendar}, + s_Predictor{predictor}, + s_PeriodicityTestConfig{periodicityTestConfig} { +} //////// SDetectedSeasonal //////// CTimeSeriesDecompositionDetail::SDetectedSeasonal::SDetectedSeasonal(core_t::TTime time, core_t::TTime lastTime, - const CPeriodicityHypothesisTestsResult &result, - const CExpandingWindow &window, - const TPredictor &predictor) : - SMessage{time, lastTime}, - s_Result{result}, - s_Window{window}, - s_Predictor{predictor} -{} + const CPeriodicityHypothesisTestsResult& result, + const CExpandingWindow& window, + const TPredictor& predictor) + : SMessage{time, lastTime}, s_Result{result}, s_Window{window}, s_Predictor{predictor} { +} //////// SDetectedCalendar //////// -CTimeSeriesDecompositionDetail::SDetectedCalendar::SDetectedCalendar(core_t::TTime time, - core_t::TTime lastTime, - CCalendarFeature feature) : - SMessage{time, lastTime}, s_Feature{feature} -{} +CTimeSeriesDecompositionDetail::SDetectedCalendar::SDetectedCalendar(core_t::TTime time, core_t::TTime lastTime, CCalendarFeature feature) + : SMessage{time, lastTime}, s_Feature{feature} { +} //////// SNewComponent //////// -CTimeSeriesDecompositionDetail::SNewComponents::SNewComponents(core_t::TTime time, - core_t::TTime lastTime, - EComponent component) : - SMessage{time, lastTime}, s_Component{component} -{} +CTimeSeriesDecompositionDetail::SNewComponents::SNewComponents(core_t::TTime time, core_t::TTime lastTime, EComponent component) + : SMessage{time, lastTime}, s_Component{component} { +} //////// CHandler //////// -CTimeSeriesDecompositionDetail::CHandler::CHandler() : m_Mediator{0} {} -CTimeSeriesDecompositionDetail::CHandler::~CHandler() {} +CTimeSeriesDecompositionDetail::CHandler::CHandler() : m_Mediator{0} { +} +CTimeSeriesDecompositionDetail::CHandler::~CHandler() { +} -void CTimeSeriesDecompositionDetail::CHandler::handle(const SAddValue &/*message*/) {} +void CTimeSeriesDecompositionDetail::CHandler::handle(const SAddValue& /*message*/) { +} -void CTimeSeriesDecompositionDetail::CHandler::handle(const SDetectedSeasonal &/*message*/) {} +void CTimeSeriesDecompositionDetail::CHandler::handle(const SDetectedSeasonal& /*message*/) { +} -void CTimeSeriesDecompositionDetail::CHandler::handle(const SDetectedCalendar &/*message*/) {} +void CTimeSeriesDecompositionDetail::CHandler::handle(const SDetectedCalendar& /*message*/) { +} -void CTimeSeriesDecompositionDetail::CHandler::handle(const SNewComponents &/*message*/) {} +void CTimeSeriesDecompositionDetail::CHandler::handle(const SNewComponents& /*message*/) { +} -void CTimeSeriesDecompositionDetail::CHandler::mediator(CMediator *mediator) -{ +void CTimeSeriesDecompositionDetail::CHandler::mediator(CMediator* mediator) { m_Mediator = mediator; } -CTimeSeriesDecompositionDetail::CMediator *CTimeSeriesDecompositionDetail::CHandler::mediator() const -{ +CTimeSeriesDecompositionDetail::CMediator* CTimeSeriesDecompositionDetail::CHandler::mediator() const { return m_Mediator; } //////// CMediator //////// template -void CTimeSeriesDecompositionDetail::CMediator::forward(const M &message) const -{ - for (CHandler &handler : m_Handlers) - { +void CTimeSeriesDecompositionDetail::CMediator::forward(const M& message) const { + for (CHandler& handler : m_Handlers) { handler.handle(message); } } -void CTimeSeriesDecompositionDetail::CMediator::registerHandler(CHandler &handler) -{ +void CTimeSeriesDecompositionDetail::CMediator::registerHandler(CHandler& handler) { m_Handlers.push_back(boost::ref(handler)); handler.mediator(this); } -void CTimeSeriesDecompositionDetail::CMediator::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CTimeSeriesDecompositionDetail::CMediator::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CMediator"); core::CMemoryDebug::dynamicSize("m_Handlers", m_Handlers, mem); } -std::size_t CTimeSeriesDecompositionDetail::CMediator::memoryUsage() const -{ +std::size_t CTimeSeriesDecompositionDetail::CMediator::memoryUsage() const { return core::CMemory::dynamicSize(m_Handlers); } //////// CPeriodicityTest //////// -CTimeSeriesDecompositionDetail::CPeriodicityTest::CPeriodicityTest(double decayRate, - core_t::TTime bucketLength) : - m_Machine{core::CStateMachine::create( - PT_ALPHABET, PT_STATES, PT_TRANSITION_FUNCTION, - bucketLength > LONG_BUCKET_LENGTHS.back() ? PT_NOT_TESTING : PT_INITIAL)}, - m_DecayRate{decayRate}, - m_BucketLength{bucketLength} -{} - -CTimeSeriesDecompositionDetail::CPeriodicityTest::CPeriodicityTest(const CPeriodicityTest &other, - bool isForForecast) : - m_Machine{other.m_Machine}, - m_DecayRate{other.m_DecayRate}, - m_BucketLength{other.m_BucketLength} -{ +CTimeSeriesDecompositionDetail::CPeriodicityTest::CPeriodicityTest(double decayRate, core_t::TTime bucketLength) + : m_Machine{core::CStateMachine::create(PT_ALPHABET, + PT_STATES, + PT_TRANSITION_FUNCTION, + bucketLength > LONG_BUCKET_LENGTHS.back() ? PT_NOT_TESTING : PT_INITIAL)}, + m_DecayRate{decayRate}, + m_BucketLength{bucketLength} { +} + +CTimeSeriesDecompositionDetail::CPeriodicityTest::CPeriodicityTest(const CPeriodicityTest& other, bool isForForecast) + : m_Machine{other.m_Machine}, m_DecayRate{other.m_DecayRate}, m_BucketLength{other.m_BucketLength} { // Note that m_Windows is an array. - for (std::size_t i = 0u; !isForForecast && i < other.m_Windows.size(); ++i) - { - if (other.m_Windows[i]) - { + for (std::size_t i = 0u; !isForForecast && i < other.m_Windows.size(); ++i) { + if (other.m_Windows[i]) { m_Windows[i] = boost::make_shared(*other.m_Windows[i]); } } } -bool CTimeSeriesDecompositionDetail::CPeriodicityTest::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name{traverser.name()}; - RESTORE(PERIODICITY_TEST_MACHINE_6_3_TAG, traverser.traverseSubLevel( - boost::bind(&core::CStateMachine::acceptRestoreTraverser, &m_Machine, _1))) +bool CTimeSeriesDecompositionDetail::CPeriodicityTest::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name{traverser.name()}; + RESTORE(PERIODICITY_TEST_MACHINE_6_3_TAG, + traverser.traverseSubLevel(boost::bind(&core::CStateMachine::acceptRestoreTraverser, &m_Machine, _1))) RESTORE_SETUP_TEARDOWN(SHORT_WINDOW_6_3_TAG, m_Windows[E_Short].reset(this->newWindow(E_Short)), - m_Windows[E_Short] && traverser.traverseSubLevel( - boost::bind(&CExpandingWindow::acceptRestoreTraverser, - m_Windows[E_Short].get(), _1)), + m_Windows[E_Short] && traverser.traverseSubLevel(boost::bind( + &CExpandingWindow::acceptRestoreTraverser, m_Windows[E_Short].get(), _1)), /**/) RESTORE_SETUP_TEARDOWN(LONG_WINDOW_6_3_TAG, m_Windows[E_Long].reset(this->newWindow(E_Long)), - m_Windows[E_Long] && traverser.traverseSubLevel( - boost::bind(&CExpandingWindow::acceptRestoreTraverser, - m_Windows[E_Long].get(), _1)), + m_Windows[E_Long] && traverser.traverseSubLevel(boost::bind( + &CExpandingWindow::acceptRestoreTraverser, m_Windows[E_Long].get(), _1)), /**/) - } - while (traverser.next()); + } while (traverser.next()); return true; } -void CTimeSeriesDecompositionDetail::CPeriodicityTest::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ - inserter.insertLevel(PERIODICITY_TEST_MACHINE_6_3_TAG, - boost::bind(&core::CStateMachine::acceptPersistInserter, &m_Machine, _1)); - if (m_Windows[E_Short]) - { - inserter.insertLevel(SHORT_WINDOW_6_3_TAG, boost::bind( - &CExpandingWindow::acceptPersistInserter, m_Windows[E_Short].get(), _1)); +void CTimeSeriesDecompositionDetail::CPeriodicityTest::acceptPersistInserter(core::CStatePersistInserter& inserter) const { + inserter.insertLevel(PERIODICITY_TEST_MACHINE_6_3_TAG, boost::bind(&core::CStateMachine::acceptPersistInserter, &m_Machine, _1)); + if (m_Windows[E_Short]) { + inserter.insertLevel(SHORT_WINDOW_6_3_TAG, boost::bind(&CExpandingWindow::acceptPersistInserter, m_Windows[E_Short].get(), _1)); } - if (m_Windows[E_Long]) - { - inserter.insertLevel(LONG_WINDOW_6_3_TAG, boost::bind( - &CExpandingWindow::acceptPersistInserter, m_Windows[E_Long].get(), _1)); + if (m_Windows[E_Long]) { + inserter.insertLevel(LONG_WINDOW_6_3_TAG, boost::bind(&CExpandingWindow::acceptPersistInserter, m_Windows[E_Long].get(), _1)); } } -void CTimeSeriesDecompositionDetail::CPeriodicityTest::swap(CPeriodicityTest &other) -{ +void CTimeSeriesDecompositionDetail::CPeriodicityTest::swap(CPeriodicityTest& other) { std::swap(m_Machine, other.m_Machine); std::swap(m_DecayRate, other.m_DecayRate); std::swap(m_BucketLength, other.m_BucketLength); @@ -577,23 +500,19 @@ void CTimeSeriesDecompositionDetail::CPeriodicityTest::swap(CPeriodicityTest &ot m_Windows[E_Long].swap(other.m_Windows[E_Long]); } -void CTimeSeriesDecompositionDetail::CPeriodicityTest::handle(const SAddValue &message) -{ +void CTimeSeriesDecompositionDetail::CPeriodicityTest::handle(const SAddValue& message) { core_t::TTime time{message.s_Time}; double value{message.s_Value}; - const maths_t::TWeightStyleVec &weightStyles{message.s_WeightStyles}; - const maths_t::TDouble4Vec &weights{message.s_Weights}; + const maths_t::TWeightStyleVec& weightStyles{message.s_WeightStyles}; + const maths_t::TDouble4Vec& weights{message.s_Weights}; double weight{maths_t::countForUpdate(weightStyles, weights)}; this->test(message); - switch (m_Machine.state()) - { + switch (m_Machine.state()) { case PT_TEST: - for (auto &window : m_Windows) - { - if (window) - { + for (auto& window : m_Windows) { + if (window) { window->add(time, value, weight); } } @@ -611,33 +530,27 @@ void CTimeSeriesDecompositionDetail::CPeriodicityTest::handle(const SAddValue &m } } -void CTimeSeriesDecompositionDetail::CPeriodicityTest::handle(const SNewComponents &/*message*/) -{ +void CTimeSeriesDecompositionDetail::CPeriodicityTest::handle(const SNewComponents& /*message*/) { // This can be a no-op because we always maintain the raw time // series values in the windows and apply corrections for other // components only when we test. } -void CTimeSeriesDecompositionDetail::CPeriodicityTest::test(const SAddValue &message) -{ +void CTimeSeriesDecompositionDetail::CPeriodicityTest::test(const SAddValue& message) { core_t::TTime time{message.s_Time}; core_t::TTime lastTime{message.s_LastTime}; - const TPredictor &predictor{message.s_Predictor}; - const CPeriodicityHypothesisTestsConfig &config{message.s_PeriodicityTestConfig}; + const TPredictor& predictor{message.s_Predictor}; + const CPeriodicityHypothesisTestsConfig& config{message.s_PeriodicityTestConfig}; - switch (m_Machine.state()) - { + switch (m_Machine.state()) { case PT_TEST: - for (const auto &window : m_Windows) - { - if (this->shouldTest(window, time)) - { + for (const auto& window : m_Windows) { + if (this->shouldTest(window, time)) { TFloatMeanAccumulatorVec values(window->valuesMinusPrediction(predictor)); core_t::TTime start{CIntegerTools::floor(window->startTime(), m_BucketLength)}; core_t::TTime bucketLength{window->bucketLength()}; CPeriodicityHypothesisTestsResult result{testForPeriods(config, start, bucketLength, values)}; - if (result.periodic()) - { + if (result.periodic()) { this->mediator()->forward(SDetectedSeasonal{time, lastTime, result, *window, predictor}); } } @@ -653,44 +566,35 @@ void CTimeSeriesDecompositionDetail::CPeriodicityTest::test(const SAddValue &mes } } -void CTimeSeriesDecompositionDetail::CPeriodicityTest::propagateForwards(core_t::TTime start, - core_t::TTime end) -{ - stepwisePropagateForwards(DAY, start, end, m_Windows[E_Short]); +void CTimeSeriesDecompositionDetail::CPeriodicityTest::propagateForwards(core_t::TTime start, core_t::TTime end) { + stepwisePropagateForwards(DAY, start, end, m_Windows[E_Short]); stepwisePropagateForwards(WEEK, start, end, m_Windows[E_Long]); } -uint64_t CTimeSeriesDecompositionDetail::CPeriodicityTest::checksum(uint64_t seed) const -{ +uint64_t CTimeSeriesDecompositionDetail::CPeriodicityTest::checksum(uint64_t seed) const { seed = CChecksum::calculate(seed, m_Machine); seed = CChecksum::calculate(seed, m_DecayRate); seed = CChecksum::calculate(seed, m_BucketLength); return CChecksum::calculate(seed, m_Windows); } -void CTimeSeriesDecompositionDetail::CPeriodicityTest::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CTimeSeriesDecompositionDetail::CPeriodicityTest::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CPeriodicityTest"); core::CMemoryDebug::dynamicSize("m_Windows", m_Windows, mem); } -std::size_t CTimeSeriesDecompositionDetail::CPeriodicityTest::memoryUsage() const -{ +std::size_t CTimeSeriesDecompositionDetail::CPeriodicityTest::memoryUsage() const { std::size_t usage{core::CMemory::dynamicSize(m_Windows)}; - if (m_Machine.state() == PT_INITIAL) - { + if (m_Machine.state() == PT_INITIAL) { usage += this->extraMemoryOnInitialization(); } return usage; } -std::size_t CTimeSeriesDecompositionDetail::CPeriodicityTest::extraMemoryOnInitialization() const -{ +std::size_t CTimeSeriesDecompositionDetail::CPeriodicityTest::extraMemoryOnInitialization() const { static std::size_t result{0}; - if (result == 0) - { - for (auto i : {E_Short, E_Long}) - { + if (result == 0) { + for (auto i : {E_Short, E_Long}) { TExpandingWindowPtr window(this->newWindow(i)); result += core::CMemory::dynamicSize(window); } @@ -698,37 +602,28 @@ std::size_t CTimeSeriesDecompositionDetail::CPeriodicityTest::extraMemoryOnIniti return result; } -void CTimeSeriesDecompositionDetail::CPeriodicityTest::apply(std::size_t symbol, - const SMessage &message) -{ +void CTimeSeriesDecompositionDetail::CPeriodicityTest::apply(std::size_t symbol, const SMessage& message) { core_t::TTime time{message.s_Time}; std::size_t old{m_Machine.state()}; m_Machine.apply(symbol); std::size_t state{m_Machine.state()}; - if (state != old) - { + if (state != old) { LOG_TRACE(PT_STATES[old] << "," << PT_ALPHABET[symbol] << " -> " << PT_STATES[state]); - auto initialize = [this](core_t::TTime time_) - { - for (auto i : {E_Short, E_Long}) - { - m_Windows[i].reset(this->newWindow(i)); - if (m_Windows[i]) - { - m_Windows[i]->initialize(time_); - } + auto initialize = [this](core_t::TTime time_) { + for (auto i : {E_Short, E_Long}) { + m_Windows[i].reset(this->newWindow(i)); + if (m_Windows[i]) { + m_Windows[i]->initialize(time_); } - }; + } + }; - switch (state) - { + switch (state) { case PT_TEST: - if (std::all_of(m_Windows.begin(), m_Windows.end(), - [](const TExpandingWindowPtr &window) { return !window; })) - { + if (std::all_of(m_Windows.begin(), m_Windows.end(), [](const TExpandingWindowPtr& window) { return !window; })) { initialize(time); } break; @@ -747,132 +642,105 @@ void CTimeSeriesDecompositionDetail::CPeriodicityTest::apply(std::size_t symbol, } } -bool CTimeSeriesDecompositionDetail::CPeriodicityTest::shouldTest(const TExpandingWindowPtr &window, - core_t::TTime time) const -{ +bool CTimeSeriesDecompositionDetail::CPeriodicityTest::shouldTest(const TExpandingWindowPtr& window, core_t::TTime time) const { // We need to test more frequently than when we compress, because // this only happens after we've seen 336 buckets, this would thus // significantly delay when we first detect a daily periodic for // longer bucket lengths otherwise. - auto shouldTest = [this, time](const TExpandingWindowPtr &window_) - { - core_t::TTime length{time - window_->startTime()}; - for (auto lengthToTest : {3 * DAY, 1 * WEEK, 2 * WEEK}) - { - if (length >= lengthToTest && length < lengthToTest + m_BucketLength) - { - return true; - } + auto shouldTest = [this, time](const TExpandingWindowPtr& window_) { + core_t::TTime length{time - window_->startTime()}; + for (auto lengthToTest : {3 * DAY, 1 * WEEK, 2 * WEEK}) { + if (length >= lengthToTest && length < lengthToTest + m_BucketLength) { + return true; } - return false; - }; + } + return false; + }; return window && (window->needToCompress(time) || shouldTest(window)); } -CExpandingWindow *CTimeSeriesDecompositionDetail::CPeriodicityTest::newWindow(ETest test) const -{ +CExpandingWindow* CTimeSeriesDecompositionDetail::CPeriodicityTest::newWindow(ETest test) const { using TTimeCRng = CExpandingWindow::TTimeCRng; - auto newWindow = [this](const TTimeVec &bucketLengths) - { - if (m_BucketLength <= bucketLengths.back()) - { - std::ptrdiff_t a{std::lower_bound(bucketLengths.begin(), - bucketLengths.end(), - m_BucketLength) - bucketLengths.begin()}; - std::size_t b{bucketLengths.size()}; - TTimeCRng bucketLengths_(bucketLengths, a, b); - return new CExpandingWindow(m_BucketLength, bucketLengths_, 336, m_DecayRate); - } - return static_cast(0); - }; + auto newWindow = [this](const TTimeVec& bucketLengths) { + if (m_BucketLength <= bucketLengths.back()) { + std::ptrdiff_t a{std::lower_bound(bucketLengths.begin(), bucketLengths.end(), m_BucketLength) - bucketLengths.begin()}; + std::size_t b{bucketLengths.size()}; + TTimeCRng bucketLengths_(bucketLengths, a, b); + return new CExpandingWindow(m_BucketLength, bucketLengths_, 336, m_DecayRate); + } + return static_cast(0); + }; - switch (test) - { - case E_Short: return newWindow(SHORT_BUCKET_LENGTHS); - case E_Long: return newWindow(LONG_BUCKET_LENGTHS); + switch (test) { + case E_Short: + return newWindow(SHORT_BUCKET_LENGTHS); + case E_Long: + return newWindow(LONG_BUCKET_LENGTHS); } return 0; } -const TTimeVec CTimeSeriesDecompositionDetail::CPeriodicityTest::SHORT_BUCKET_LENGTHS - { - 1, 5, 10, 30, 60, 300, 600, 1800, 3600 - }; -const TTimeVec CTimeSeriesDecompositionDetail::CPeriodicityTest::LONG_BUCKET_LENGTHS - { - 7200, 21600, 43200, 86400, 172800, 345600 - }; +const TTimeVec CTimeSeriesDecompositionDetail::CPeriodicityTest::SHORT_BUCKET_LENGTHS{1, 5, 10, 30, 60, 300, 600, 1800, 3600}; +const TTimeVec CTimeSeriesDecompositionDetail::CPeriodicityTest::LONG_BUCKET_LENGTHS{7200, 21600, 43200, 86400, 172800, 345600}; //////// CCalendarCyclic //////// -CTimeSeriesDecompositionDetail::CCalendarTest::CCalendarTest(double decayRate, - core_t::TTime bucketLength) : - m_Machine{core::CStateMachine::create(CC_ALPHABET, CC_STATES, CC_TRANSITION_FUNCTION, - bucketLength > DAY ? CC_NOT_TESTING : CC_INITIAL)}, - m_DecayRate{decayRate}, - m_LastMonth{} -{} - -CTimeSeriesDecompositionDetail::CCalendarTest::CCalendarTest(const CCalendarTest &other, - bool isForForecast) : - m_Machine{other.m_Machine}, - m_DecayRate{other.m_DecayRate}, - m_LastMonth{other.m_LastMonth}, - m_Test{!isForForecast && other.m_Test ? - boost::make_shared(*other.m_Test) : 0} -{} - -bool CTimeSeriesDecompositionDetail::CCalendarTest::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name{traverser.name()}; - RESTORE(CALENDAR_TEST_MACHINE_6_3_TAG, traverser.traverseSubLevel( - boost::bind(&core::CStateMachine::acceptRestoreTraverser, &m_Machine, _1))) +CTimeSeriesDecompositionDetail::CCalendarTest::CCalendarTest(double decayRate, core_t::TTime bucketLength) + : m_Machine{core::CStateMachine::create(CC_ALPHABET, + CC_STATES, + CC_TRANSITION_FUNCTION, + bucketLength > DAY ? CC_NOT_TESTING : CC_INITIAL)}, + m_DecayRate{decayRate}, + m_LastMonth{} { +} + +CTimeSeriesDecompositionDetail::CCalendarTest::CCalendarTest(const CCalendarTest& other, bool isForForecast) + : m_Machine{other.m_Machine}, + m_DecayRate{other.m_DecayRate}, + m_LastMonth{other.m_LastMonth}, + m_Test{!isForForecast && other.m_Test ? boost::make_shared(*other.m_Test) : 0} { +} + +bool CTimeSeriesDecompositionDetail::CCalendarTest::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name{traverser.name()}; + RESTORE(CALENDAR_TEST_MACHINE_6_3_TAG, + traverser.traverseSubLevel(boost::bind(&core::CStateMachine::acceptRestoreTraverser, &m_Machine, _1))) RESTORE_BUILT_IN(LAST_MONTH_6_3_TAG, m_LastMonth); RESTORE_SETUP_TEARDOWN(CALENDAR_TEST_6_3_TAG, m_Test = boost::make_shared(m_DecayRate), - traverser.traverseSubLevel( - boost::bind(&CCalendarCyclicTest::acceptRestoreTraverser, m_Test.get(), _1)), + traverser.traverseSubLevel(boost::bind(&CCalendarCyclicTest::acceptRestoreTraverser, m_Test.get(), _1)), /**/) - } - while (traverser.next()); + } while (traverser.next()); return true; } -void CTimeSeriesDecompositionDetail::CCalendarTest::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ - inserter.insertLevel(CALENDAR_TEST_MACHINE_6_3_TAG, - boost::bind(&core::CStateMachine::acceptPersistInserter, &m_Machine, _1)); +void CTimeSeriesDecompositionDetail::CCalendarTest::acceptPersistInserter(core::CStatePersistInserter& inserter) const { + inserter.insertLevel(CALENDAR_TEST_MACHINE_6_3_TAG, boost::bind(&core::CStateMachine::acceptPersistInserter, &m_Machine, _1)); inserter.insertValue(LAST_MONTH_6_3_TAG, m_LastMonth); - if (m_Test) - { - inserter.insertLevel(CALENDAR_TEST_6_3_TAG, boost::bind( - &CCalendarCyclicTest::acceptPersistInserter, m_Test.get(), _1)); + if (m_Test) { + inserter.insertLevel(CALENDAR_TEST_6_3_TAG, boost::bind(&CCalendarCyclicTest::acceptPersistInserter, m_Test.get(), _1)); } } -void CTimeSeriesDecompositionDetail::CCalendarTest::swap(CCalendarTest &other) -{ +void CTimeSeriesDecompositionDetail::CCalendarTest::swap(CCalendarTest& other) { std::swap(m_Machine, other.m_Machine); std::swap(m_DecayRate, other.m_DecayRate); std::swap(m_LastMonth, other.m_LastMonth); m_Test.swap(other.m_Test); } -void CTimeSeriesDecompositionDetail::CCalendarTest::handle(const SAddValue &message) -{ +void CTimeSeriesDecompositionDetail::CCalendarTest::handle(const SAddValue& message) { core_t::TTime time{message.s_Time}; double error{message.s_Value - message.s_Trend - message.s_Seasonal - message.s_Calendar}; - const maths_t::TWeightStyleVec &weightStyles{message.s_WeightStyles}; - const maths_t::TDouble4Vec &weights{message.s_Weights}; + const maths_t::TWeightStyleVec& weightStyles{message.s_WeightStyles}; + const maths_t::TDouble4Vec& weights{message.s_Weights}; this->test(message); - switch (m_Machine.state()) - { + switch (m_Machine.state()) { case CC_TEST: m_Test->add(time, error, maths_t::countForUpdate(weightStyles, weights)); break; @@ -889,12 +757,9 @@ void CTimeSeriesDecompositionDetail::CCalendarTest::handle(const SAddValue &mess } } -void CTimeSeriesDecompositionDetail::CCalendarTest::handle(const SNewComponents &message) -{ - if (m_Machine.state() != CC_NOT_TESTING) - { - switch (message.s_Component) - { +void CTimeSeriesDecompositionDetail::CCalendarTest::handle(const SNewComponents& message) { + if (m_Machine.state() != CC_NOT_TESTING) { + switch (message.s_Component) { case SNewComponents::E_GeneralSeasonal: case SNewComponents::E_DiurnalSeasonal: this->apply(CC_RESET, message); @@ -905,19 +770,14 @@ void CTimeSeriesDecompositionDetail::CCalendarTest::handle(const SNewComponents } } -void CTimeSeriesDecompositionDetail::CCalendarTest::test(const SMessage &message) -{ +void CTimeSeriesDecompositionDetail::CCalendarTest::test(const SMessage& message) { core_t::TTime time{message.s_Time}; core_t::TTime lastTime{message.s_LastTime}; - if (this->shouldTest(time)) - { - switch (m_Machine.state()) - { - case CC_TEST: - { - if (CCalendarCyclicTest::TOptionalFeature feature = m_Test->test()) - { + if (this->shouldTest(time)) { + switch (m_Machine.state()) { + case CC_TEST: { + if (CCalendarCyclicTest::TOptionalFeature feature = m_Test->test()) { this->mediator()->forward(SDetectedCalendar(time, lastTime, *feature)); } break; @@ -933,64 +793,52 @@ void CTimeSeriesDecompositionDetail::CCalendarTest::test(const SMessage &message } } -void CTimeSeriesDecompositionDetail::CCalendarTest::propagateForwards(core_t::TTime start, - core_t::TTime end) -{ +void CTimeSeriesDecompositionDetail::CCalendarTest::propagateForwards(core_t::TTime start, core_t::TTime end) { stepwisePropagateForwards(DAY, start, end, m_Test); } -uint64_t CTimeSeriesDecompositionDetail::CCalendarTest::checksum(uint64_t seed) const -{ +uint64_t CTimeSeriesDecompositionDetail::CCalendarTest::checksum(uint64_t seed) const { seed = CChecksum::calculate(seed, m_Machine); seed = CChecksum::calculate(seed, m_DecayRate); seed = CChecksum::calculate(seed, m_LastMonth); return CChecksum::calculate(seed, m_Test); } -void CTimeSeriesDecompositionDetail::CCalendarTest::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CTimeSeriesDecompositionDetail::CCalendarTest::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CCalendarTest"); core::CMemoryDebug::dynamicSize("m_Test", m_Test, mem); } -std::size_t CTimeSeriesDecompositionDetail::CCalendarTest::memoryUsage() const -{ +std::size_t CTimeSeriesDecompositionDetail::CCalendarTest::memoryUsage() const { std::size_t usage{core::CMemory::dynamicSize(m_Test)}; - if (m_Machine.state() == CC_INITIAL) - { + if (m_Machine.state() == CC_INITIAL) { usage += this->extraMemoryOnInitialization(); } return usage; } -std::size_t CTimeSeriesDecompositionDetail::CCalendarTest::extraMemoryOnInitialization() const -{ +std::size_t CTimeSeriesDecompositionDetail::CCalendarTest::extraMemoryOnInitialization() const { static std::size_t result{0}; - if (result == 0) - { + if (result == 0) { TCalendarCyclicTestPtr test(new CCalendarCyclicTest(m_DecayRate)); result = core::CMemory::dynamicSize(test); } return result; } -void CTimeSeriesDecompositionDetail::CCalendarTest::apply(std::size_t symbol, const SMessage &message) -{ +void CTimeSeriesDecompositionDetail::CCalendarTest::apply(std::size_t symbol, const SMessage& message) { core_t::TTime time{message.s_Time}; std::size_t old{m_Machine.state()}; m_Machine.apply(symbol); std::size_t state{m_Machine.state()}; - if (state != old) - { + if (state != old) { LOG_TRACE(CC_STATES[old] << "," << CC_ALPHABET[symbol] << " -> " << CC_STATES[state]); - switch (state) - { + switch (state) { case CC_TEST: - if (!m_Test) - { + if (!m_Test) { m_Test = boost::make_shared(m_DecayRate); m_LastMonth = this->month(time) + 2; } @@ -1008,19 +856,16 @@ void CTimeSeriesDecompositionDetail::CCalendarTest::apply(std::size_t symbol, co } } -bool CTimeSeriesDecompositionDetail::CCalendarTest::shouldTest(core_t::TTime time) -{ +bool CTimeSeriesDecompositionDetail::CCalendarTest::shouldTest(core_t::TTime time) { int month{this->month(time)}; - if (month == (m_LastMonth + 1) % 12) - { + if (month == (m_LastMonth + 1) % 12) { m_LastMonth = month; return true; } return false; } -int CTimeSeriesDecompositionDetail::CCalendarTest::month(core_t::TTime time) const -{ +int CTimeSeriesDecompositionDetail::CCalendarTest::month(core_t::TTime time) const { int dummy; int month; core::CTimezone::instance().dateFields(time, dummy, dummy, dummy, month, dummy, dummy); @@ -1029,60 +874,52 @@ int CTimeSeriesDecompositionDetail::CCalendarTest::month(core_t::TTime time) con //////// CComponents //////// -CTimeSeriesDecompositionDetail::CComponents::CComponents(double decayRate, - core_t::TTime bucketLength, - std::size_t seasonalComponentSize) : - m_Machine{core::CStateMachine::create(SC_ALPHABET, SC_STATES, SC_TRANSITION_FUNCTION, SC_NORMAL)}, - m_DecayRate{decayRate}, - m_BucketLength{bucketLength}, - m_SeasonalComponentSize{seasonalComponentSize}, - m_CalendarComponentSize{seasonalComponentSize / 3}, - m_Trend{decayRate}, - m_UsingTrendForPrediction{false}, - m_Watcher{0} -{} - -CTimeSeriesDecompositionDetail::CComponents::CComponents(const CComponents &other) : - m_Machine{other.m_Machine}, - m_DecayRate{other.m_DecayRate}, - m_BucketLength{other.m_BucketLength}, - m_SeasonalComponentSize{other.m_SeasonalComponentSize}, - m_CalendarComponentSize{other.m_CalendarComponentSize}, - m_Trend{other.m_Trend}, - m_Seasonal{other.m_Seasonal ? new SSeasonal{*other.m_Seasonal} : 0}, - m_Calendar{other.m_Calendar ? new SCalendar{*other.m_Calendar} : 0}, - m_MeanVarianceScale{other.m_MeanVarianceScale}, - m_Moments{other.m_Moments}, - m_MomentsMinusTrend{other.m_MomentsMinusTrend}, - m_UsingTrendForPrediction{other.m_UsingTrendForPrediction}, - m_Watcher{0} -{} - -bool CTimeSeriesDecompositionDetail::CComponents::acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser) -{ - if (traverser.name() == VERSION_6_3_TAG) - { - while (traverser.next()) - { - const std::string &name{traverser.name()}; - RESTORE(COMPONENTS_MACHINE_6_3_TAG, traverser.traverseSubLevel( - boost::bind(&core::CStateMachine::acceptRestoreTraverser, &m_Machine, _1))); +CTimeSeriesDecompositionDetail::CComponents::CComponents(double decayRate, core_t::TTime bucketLength, std::size_t seasonalComponentSize) + : m_Machine{core::CStateMachine::create(SC_ALPHABET, SC_STATES, SC_TRANSITION_FUNCTION, SC_NORMAL)}, + m_DecayRate{decayRate}, + m_BucketLength{bucketLength}, + m_SeasonalComponentSize{seasonalComponentSize}, + m_CalendarComponentSize{seasonalComponentSize / 3}, + m_Trend{decayRate}, + m_UsingTrendForPrediction{false}, + m_Watcher{0} { +} + +CTimeSeriesDecompositionDetail::CComponents::CComponents(const CComponents& other) + : m_Machine{other.m_Machine}, + m_DecayRate{other.m_DecayRate}, + m_BucketLength{other.m_BucketLength}, + m_SeasonalComponentSize{other.m_SeasonalComponentSize}, + m_CalendarComponentSize{other.m_CalendarComponentSize}, + m_Trend{other.m_Trend}, + m_Seasonal{other.m_Seasonal ? new SSeasonal{*other.m_Seasonal} : 0}, + m_Calendar{other.m_Calendar ? new SCalendar{*other.m_Calendar} : 0}, + m_MeanVarianceScale{other.m_MeanVarianceScale}, + m_Moments{other.m_Moments}, + m_MomentsMinusTrend{other.m_MomentsMinusTrend}, + m_UsingTrendForPrediction{other.m_UsingTrendForPrediction}, + m_Watcher{0} { +} + +bool CTimeSeriesDecompositionDetail::CComponents::acceptRestoreTraverser(const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser) { + if (traverser.name() == VERSION_6_3_TAG) { + while (traverser.next()) { + const std::string& name{traverser.name()}; + RESTORE(COMPONENTS_MACHINE_6_3_TAG, + traverser.traverseSubLevel(boost::bind(&core::CStateMachine::acceptRestoreTraverser, &m_Machine, _1))); RESTORE_BUILT_IN(DECAY_RATE_6_3_TAG, m_DecayRate); - RESTORE(TREND_6_3_TAG, traverser.traverseSubLevel(boost::bind( - &CTrendComponent::acceptRestoreTraverser, &m_Trend, - boost::cref(params), _1))) + RESTORE(TREND_6_3_TAG, + traverser.traverseSubLevel(boost::bind(&CTrendComponent::acceptRestoreTraverser, &m_Trend, boost::cref(params), _1))) RESTORE_SETUP_TEARDOWN(SEASONAL_6_3_TAG, m_Seasonal.reset(new SSeasonal), - traverser.traverseSubLevel(boost::bind( - &SSeasonal::acceptRestoreTraverser, - m_Seasonal.get(), m_DecayRate, m_BucketLength, _1)), + traverser.traverseSubLevel( + boost::bind(&SSeasonal::acceptRestoreTraverser, m_Seasonal.get(), m_DecayRate, m_BucketLength, _1)), /**/) RESTORE_SETUP_TEARDOWN(CALENDAR_6_3_TAG, m_Calendar.reset(new SCalendar), - traverser.traverseSubLevel(boost::bind( - &SCalendar::acceptRestoreTraverser, - m_Calendar.get(), m_DecayRate, m_BucketLength, _1)), + traverser.traverseSubLevel( + boost::bind(&SCalendar::acceptRestoreTraverser, m_Calendar.get(), m_DecayRate, m_BucketLength, _1)), /**/) RESTORE(MEAN_VARIANCE_SCALE_6_3_TAG, m_MeanVarianceScale.fromDelimited(traverser.value())) RESTORE(MOMENTS_6_3_TAG, m_Moments.fromDelimited(traverser.value())); @@ -1091,54 +928,43 @@ bool CTimeSeriesDecompositionDetail::CComponents::acceptRestoreTraverser(const S } this->decayRate(m_DecayRate); - } - else - { + } else { // There is no version string this is historic state. - do - { - const std::string &name{traverser.name()}; - RESTORE(COMPONENTS_MACHINE_OLD_TAG, traverser.traverseSubLevel( - boost::bind(&core::CStateMachine::acceptRestoreTraverser, &m_Machine, _1))); - RESTORE_SETUP_TEARDOWN(TREND_OLD_TAG, - /**/, - traverser.traverseSubLevel(boost::bind( - upgradeTrendModelToVersion6p3, - m_BucketLength, boost::ref(m_Trend), _1)), - m_UsingTrendForPrediction = true) + do { + const std::string& name{traverser.name()}; + RESTORE(COMPONENTS_MACHINE_OLD_TAG, + traverser.traverseSubLevel(boost::bind(&core::CStateMachine::acceptRestoreTraverser, &m_Machine, _1))); + RESTORE_SETUP_TEARDOWN( + TREND_OLD_TAG, + /**/, + traverser.traverseSubLevel(boost::bind(upgradeTrendModelToVersion6p3, m_BucketLength, boost::ref(m_Trend), _1)), + m_UsingTrendForPrediction = true) RESTORE_SETUP_TEARDOWN(SEASONAL_OLD_TAG, m_Seasonal.reset(new SSeasonal), - traverser.traverseSubLevel(boost::bind( - &SSeasonal::acceptRestoreTraverser, - m_Seasonal.get(), m_DecayRate, m_BucketLength, _1)), + traverser.traverseSubLevel( + boost::bind(&SSeasonal::acceptRestoreTraverser, m_Seasonal.get(), m_DecayRate, m_BucketLength, _1)), /**/) RESTORE_SETUP_TEARDOWN(CALENDAR_OLD_TAG, m_Calendar.reset(new SCalendar), - traverser.traverseSubLevel(boost::bind( - &SCalendar::acceptRestoreTraverser, - m_Calendar.get(), m_DecayRate, m_BucketLength, _1)), + traverser.traverseSubLevel( + boost::bind(&SCalendar::acceptRestoreTraverser, m_Calendar.get(), m_DecayRate, m_BucketLength, _1)), /**/) - } - while (traverser.next()); + } while (traverser.next()); m_MeanVarianceScale.add(1.0, MODEL_WEIGHT_UPGRADING_TO_VERSION_6p3); } return true; } -void CTimeSeriesDecompositionDetail::CComponents::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CTimeSeriesDecompositionDetail::CComponents::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(VERSION_6_3_TAG, ""); - inserter.insertLevel(COMPONENTS_MACHINE_6_3_TAG, - boost::bind(&core::CStateMachine::acceptPersistInserter, &m_Machine, _1)); + inserter.insertLevel(COMPONENTS_MACHINE_6_3_TAG, boost::bind(&core::CStateMachine::acceptPersistInserter, &m_Machine, _1)); inserter.insertValue(DECAY_RATE_6_3_TAG, m_DecayRate, core::CIEEE754::E_SinglePrecision); inserter.insertLevel(TREND_6_3_TAG, boost::bind(&CTrendComponent::acceptPersistInserter, m_Trend, _1)); - if (m_Seasonal) - { + if (m_Seasonal) { inserter.insertLevel(SEASONAL_6_3_TAG, boost::bind(&SSeasonal::acceptPersistInserter, m_Seasonal.get(), _1)); } - if (m_Calendar) - { + if (m_Calendar) { inserter.insertLevel(CALENDAR_6_3_TAG, boost::bind(&SCalendar::acceptPersistInserter, m_Calendar.get(), _1)); } inserter.insertValue(MEAN_VARIANCE_SCALE_6_3_TAG, m_MeanVarianceScale.toDelimited()); @@ -1147,8 +973,7 @@ void CTimeSeriesDecompositionDetail::CComponents::acceptPersistInserter(core::CS inserter.insertValue(USING_TREND_FOR_PREDICTION_6_3_TAG, m_UsingTrendForPrediction); } -void CTimeSeriesDecompositionDetail::CComponents::swap(CComponents &other) -{ +void CTimeSeriesDecompositionDetail::CComponents::swap(CComponents& other) { std::swap(m_Machine, other.m_Machine); std::swap(m_DecayRate, other.m_DecayRate); std::swap(m_BucketLength, other.m_BucketLength); @@ -1163,91 +988,78 @@ void CTimeSeriesDecompositionDetail::CComponents::swap(CComponents &other) std::swap(m_UsingTrendForPrediction, other.m_UsingTrendForPrediction); } -void CTimeSeriesDecompositionDetail::CComponents::handle(const SAddValue &message) -{ - switch (m_Machine.state()) - { +void CTimeSeriesDecompositionDetail::CComponents::handle(const SAddValue& message) { + switch (m_Machine.state()) { case SC_NORMAL: - case SC_NEW_COMPONENTS: - { - this->interpolate(message); + case SC_NEW_COMPONENTS: { + this->interpolate(message); - core_t::TTime time{message.s_Time}; - double value{message.s_Value}; - double trend{message.s_Trend}; - double seasonal{message.s_Seasonal}; - double calendar{message.s_Calendar}; - const maths_t::TWeightStyleVec &weightStyles{message.s_WeightStyles}; - const maths_t::TDouble4Vec &weights{message.s_Weights}; - - TSeasonalComponentPtrVec seasonalComponents; - TCalendarComponentPtrVec calendarComponents; - TComponentErrorsPtrVec seasonalErrors; - TComponentErrorsPtrVec calendarErrors; - TDoubleVec deltas; - - if (m_Seasonal) - { - m_Seasonal->componentsErrorsAndDeltas(time, seasonalComponents, seasonalErrors, deltas); - } - if (m_Calendar) - { - m_Calendar->componentsAndErrors(time, calendarComponents, calendarErrors); - } + core_t::TTime time{message.s_Time}; + double value{message.s_Value}; + double trend{message.s_Trend}; + double seasonal{message.s_Seasonal}; + double calendar{message.s_Calendar}; + const maths_t::TWeightStyleVec& weightStyles{message.s_WeightStyles}; + const maths_t::TDouble4Vec& weights{message.s_Weights}; + + TSeasonalComponentPtrVec seasonalComponents; + TCalendarComponentPtrVec calendarComponents; + TComponentErrorsPtrVec seasonalErrors; + TComponentErrorsPtrVec calendarErrors; + TDoubleVec deltas; + + if (m_Seasonal) { + m_Seasonal->componentsErrorsAndDeltas(time, seasonalComponents, seasonalErrors, deltas); + } + if (m_Calendar) { + m_Calendar->componentsAndErrors(time, calendarComponents, calendarErrors); + } - double weight{maths_t::countForUpdate(weightStyles, weights)}; - std::size_t m{seasonalComponents.size()}; - std::size_t n{calendarComponents.size()}; - - TDoubleVec values(m + n + 1, value); - TDoubleVec predictions(m + n); - double error; - double scale; - decompose(m_Trend, seasonalComponents, calendarComponents, - time, deltas, values, predictions, error, scale); - - core_t::TTime observedInterval{m_Trend.observedInterval()}; - - m_Trend.add(time, values[0], weight); - m_Trend.dontShiftLevel(time, value); - for (std::size_t i = 1u; i <= m; ++i) - { - CSeasonalComponent *component{seasonalComponents[i - 1]}; - CComponentErrors *error_{seasonalErrors[i - 1]}; - double wi{weight / component->time().fractionInWindow()}; - component->add(time, values[i], wi); - error_->add(error, predictions[i - 1], wi); - } - for (std::size_t i = m + 1; i <= m + n; ++i) - { - CCalendarComponent *component{calendarComponents[i - m - 1]}; - CComponentErrors *error_{calendarErrors[i - m - 1]}; - component->add(time, values[i], weight); - error_->add(error, predictions[i - 1], weight); - } + double weight{maths_t::countForUpdate(weightStyles, weights)}; + std::size_t m{seasonalComponents.size()}; + std::size_t n{calendarComponents.size()}; + + TDoubleVec values(m + n + 1, value); + TDoubleVec predictions(m + n); + double error; + double scale; + decompose(m_Trend, seasonalComponents, calendarComponents, time, deltas, values, predictions, error, scale); + + core_t::TTime observedInterval{m_Trend.observedInterval()}; + + m_Trend.add(time, values[0], weight); + m_Trend.dontShiftLevel(time, value); + for (std::size_t i = 1u; i <= m; ++i) { + CSeasonalComponent* component{seasonalComponents[i - 1]}; + CComponentErrors* error_{seasonalErrors[i - 1]}; + double wi{weight / component->time().fractionInWindow()}; + component->add(time, values[i], wi); + error_->add(error, predictions[i - 1], wi); + } + for (std::size_t i = m + 1; i <= m + n; ++i) { + CCalendarComponent* component{calendarComponents[i - m - 1]}; + CComponentErrors* error_{calendarErrors[i - m - 1]}; + component->add(time, values[i], weight); + error_->add(error, predictions[i - 1], weight); + } - m_MeanVarianceScale.add(scale, weight); - m_Moments.add(value - seasonal - calendar, weight); - m_MomentsMinusTrend.add(value - trend - seasonal - calendar, weight); - - if (!m_UsingTrendForPrediction && observedInterval > 6 * m_BucketLength) - { - double v0{CBasicStatistics::variance(m_Moments)}; - double v1{CBasicStatistics::variance(m_MomentsMinusTrend)}; - double df0{CBasicStatistics::count(m_Moments) - 1.0}; - double df1{CBasicStatistics::count(m_MomentsMinusTrend) - m_Trend.parameters()}; - m_UsingTrendForPrediction = - v1 < SIGNIFICANT_VARIANCE_REDUCTION[0] * v0 - && df0 > 0.0 && df1 > 0.0 - && CStatisticalTests::leftTailFTest(v1 / v0, df1, df0) <= MAXIMUM_SIGNIFICANCE; - if (m_UsingTrendForPrediction) - { - LOG_DEBUG("Detected trend at " << time); - } - *m_Watcher = m_UsingTrendForPrediction; + m_MeanVarianceScale.add(scale, weight); + m_Moments.add(value - seasonal - calendar, weight); + m_MomentsMinusTrend.add(value - trend - seasonal - calendar, weight); + + if (!m_UsingTrendForPrediction && observedInterval > 6 * m_BucketLength) { + double v0{CBasicStatistics::variance(m_Moments)}; + double v1{CBasicStatistics::variance(m_MomentsMinusTrend)}; + double df0{CBasicStatistics::count(m_Moments) - 1.0}; + double df1{CBasicStatistics::count(m_MomentsMinusTrend) - m_Trend.parameters()}; + m_UsingTrendForPrediction = v1 < SIGNIFICANT_VARIANCE_REDUCTION[0] * v0 && df0 > 0.0 && df1 > 0.0 && + CStatisticalTests::leftTailFTest(v1 / v0, df1, df0) <= MAXIMUM_SIGNIFICANCE; + if (m_UsingTrendForPrediction) { + LOG_DEBUG("Detected trend at " << time); } + *m_Watcher = m_UsingTrendForPrediction; } - break; + } break; case SC_DISABLED: break; default: @@ -1257,38 +1069,31 @@ void CTimeSeriesDecompositionDetail::CComponents::handle(const SAddValue &messag } } -void CTimeSeriesDecompositionDetail::CComponents::handle(const SDetectedSeasonal &message) -{ - if (this->size() + m_SeasonalComponentSize > this->maxSize()) - { +void CTimeSeriesDecompositionDetail::CComponents::handle(const SDetectedSeasonal& message) { + if (this->size() + m_SeasonalComponentSize > this->maxSize()) { return; } - switch (m_Machine.state()) - { + switch (m_Machine.state()) { case SC_NORMAL: - case SC_NEW_COMPONENTS: - { - if (!m_Seasonal) - { + case SC_NEW_COMPONENTS: { + if (!m_Seasonal) { m_Seasonal.reset(new SSeasonal); } core_t::TTime time{message.s_Time}; core_t::TTime lastTime{message.s_LastTime}; - const CPeriodicityHypothesisTestsResult &result{message.s_Result}; - const CExpandingWindow &window{message.s_Window}; - const TPredictor &predictor{message.s_Predictor}; + const CPeriodicityHypothesisTestsResult& result{message.s_Result}; + const CExpandingWindow& window{message.s_Window}; + const TPredictor& predictor{message.s_Predictor}; - TSeasonalComponentVec &components{m_Seasonal->s_Components}; - TComponentErrorsVec &errors{m_Seasonal->s_PredictionErrors}; + TSeasonalComponentVec& components{m_Seasonal->s_Components}; + TComponentErrorsVec& errors{m_Seasonal->s_PredictionErrors}; - if (!this->addSeasonalComponents(result, window, predictor, m_Trend, components, errors)) - { + if (!this->addSeasonalComponents(result, window, predictor, m_Trend, components, errors)) { break; } - if (m_Watcher) - { + if (m_Watcher) { *m_Watcher = true; } LOG_DEBUG("Detected seasonal components at " << time); @@ -1308,20 +1113,15 @@ void CTimeSeriesDecompositionDetail::CComponents::handle(const SDetectedSeasonal } } -void CTimeSeriesDecompositionDetail::CComponents::handle(const SDetectedCalendar &message) -{ - if (this->size() + m_CalendarComponentSize > this->maxSize()) - { +void CTimeSeriesDecompositionDetail::CComponents::handle(const SDetectedCalendar& message) { + if (this->size() + m_CalendarComponentSize > this->maxSize()) { return; } - switch (m_Machine.state()) - { + switch (m_Machine.state()) { case SC_NORMAL: - case SC_NEW_COMPONENTS: - { - if (!m_Calendar) - { + case SC_NEW_COMPONENTS: { + if (!m_Calendar) { m_Calendar.reset(new SCalendar); } @@ -1329,13 +1129,12 @@ void CTimeSeriesDecompositionDetail::CComponents::handle(const SDetectedCalendar core_t::TTime lastTime{message.s_LastTime}; CCalendarFeature feature{message.s_Feature}; - if (m_Calendar->haveComponent(feature)) - { + if (m_Calendar->haveComponent(feature)) { break; } - TCalendarComponentVec &components{m_Calendar->s_Components}; - TComponentErrorsVec &errors{m_Calendar->s_PredictionErrors}; + TCalendarComponentVec& components{m_Calendar->s_Components}; + TComponentErrorsVec& errors{m_Calendar->s_PredictionErrors}; this->addCalendarComponent(feature, time, components, errors); this->apply(SC_ADDED_COMPONENTS, message); @@ -1351,51 +1150,41 @@ void CTimeSeriesDecompositionDetail::CComponents::handle(const SDetectedCalendar } } -void CTimeSeriesDecompositionDetail::CComponents::useTrendForPrediction(void) -{ +void CTimeSeriesDecompositionDetail::CComponents::useTrendForPrediction(void) { m_UsingTrendForPrediction = true; } -void CTimeSeriesDecompositionDetail::CComponents::shiftLevel(core_t::TTime time, double value, double shift) -{ +void CTimeSeriesDecompositionDetail::CComponents::shiftLevel(core_t::TTime time, double value, double shift) { m_Trend.shiftLevel(time, value, shift); } -void CTimeSeriesDecompositionDetail::CComponents::linearScale(core_t::TTime time, double scale) -{ +void CTimeSeriesDecompositionDetail::CComponents::linearScale(core_t::TTime time, double scale) { m_Trend.linearScale(scale); - if (m_Seasonal) - { + if (m_Seasonal) { m_Seasonal->linearScale(time, scale); } - if (m_Calendar) - { + if (m_Calendar) { m_Calendar->linearScale(time, scale); } } -void CTimeSeriesDecompositionDetail::CComponents::interpolate(const SMessage &message) -{ +void CTimeSeriesDecompositionDetail::CComponents::interpolate(const SMessage& message) { core_t::TTime time{message.s_Time}; core_t::TTime lastTime{message.s_LastTime}; std::size_t state{m_Machine.state()}; - switch (state) - { + switch (state) { case SC_NORMAL: case SC_NEW_COMPONENTS: this->canonicalize(time); - if (this->shouldInterpolate(time, lastTime)) - { + if (this->shouldInterpolate(time, lastTime)) { LOG_TRACE("Interpolating values at " << time); - if (m_Seasonal) - { + if (m_Seasonal) { m_Seasonal->interpolate(time, lastTime, true); } - if (m_Calendar) - { + if (m_Calendar) { m_Calendar->interpolate(time, lastTime, true); } @@ -1411,131 +1200,104 @@ void CTimeSeriesDecompositionDetail::CComponents::interpolate(const SMessage &me } } -void CTimeSeriesDecompositionDetail::CComponents::interpolateForForecast(core_t::TTime time) -{ - if (this->shouldInterpolate(time, time - m_BucketLength)) - { - if (m_Seasonal) - { +void CTimeSeriesDecompositionDetail::CComponents::interpolateForForecast(core_t::TTime time) { + if (this->shouldInterpolate(time, time - m_BucketLength)) { + if (m_Seasonal) { m_Seasonal->interpolate(time, time - m_BucketLength, false); } - if (m_Calendar) - { + if (m_Calendar) { m_Calendar->interpolate(time, time - m_BucketLength, true); } } } - -void CTimeSeriesDecompositionDetail::CComponents::dataType(maths_t::EDataType dataType) -{ +void CTimeSeriesDecompositionDetail::CComponents::dataType(maths_t::EDataType dataType) { m_Trend.dataType(dataType); } -void CTimeSeriesDecompositionDetail::CComponents::decayRate(double decayRate) -{ +void CTimeSeriesDecompositionDetail::CComponents::decayRate(double decayRate) { m_DecayRate = decayRate; m_Trend.decayRate(decayRate); - if (m_Seasonal) - { + if (m_Seasonal) { m_Seasonal->decayRate(decayRate); } - if (m_Calendar) - { + if (m_Calendar) { m_Calendar->decayRate(decayRate); } } -double CTimeSeriesDecompositionDetail::CComponents::decayRate() const -{ +double CTimeSeriesDecompositionDetail::CComponents::decayRate() const { return m_DecayRate; } -void CTimeSeriesDecompositionDetail::CComponents::propagateForwards(core_t::TTime start, - core_t::TTime end) -{ +void CTimeSeriesDecompositionDetail::CComponents::propagateForwards(core_t::TTime start, core_t::TTime end) { m_Trend.propagateForwardsByTime(end - start); - if (m_Seasonal) - { + if (m_Seasonal) { m_Seasonal->propagateForwards(start, end); } - if (m_Calendar) - { + if (m_Calendar) { m_Calendar->propagateForwards(start, end); } - double factor{std::exp(-m_DecayRate * static_cast(end - start) - / static_cast(DAY))}; + double factor{std::exp(-m_DecayRate * static_cast(end - start) / static_cast(DAY))}; m_MeanVarianceScale.age(factor); m_Moments.age(factor); m_MomentsMinusTrend.age(factor); } -bool CTimeSeriesDecompositionDetail::CComponents::initialized() const -{ - return m_UsingTrendForPrediction && m_Trend.initialized() ? true : - (m_Seasonal && m_Calendar ? m_Seasonal->initialized() || m_Calendar->initialized() : - (m_Seasonal ? m_Seasonal->initialized() : - (m_Calendar ? m_Calendar->initialized() : false))); +bool CTimeSeriesDecompositionDetail::CComponents::initialized() const { + return m_UsingTrendForPrediction && m_Trend.initialized() + ? true + : (m_Seasonal && m_Calendar ? m_Seasonal->initialized() || m_Calendar->initialized() + : (m_Seasonal ? m_Seasonal->initialized() : (m_Calendar ? m_Calendar->initialized() : false))); } -const CTrendComponent &CTimeSeriesDecompositionDetail::CComponents::trend() const -{ +const CTrendComponent& CTimeSeriesDecompositionDetail::CComponents::trend() const { return m_Trend; } -const TSeasonalComponentVec &CTimeSeriesDecompositionDetail::CComponents::seasonal() const -{ +const TSeasonalComponentVec& CTimeSeriesDecompositionDetail::CComponents::seasonal() const { return m_Seasonal ? m_Seasonal->s_Components : NO_SEASONAL_COMPONENTS; } -const maths_t::TCalendarComponentVec &CTimeSeriesDecompositionDetail::CComponents::calendar() const -{ +const maths_t::TCalendarComponentVec& CTimeSeriesDecompositionDetail::CComponents::calendar() const { return m_Calendar ? m_Calendar->s_Components : NO_CALENDAR_COMPONENTS; } -bool CTimeSeriesDecompositionDetail::CComponents::usingTrendForPrediction() const -{ +bool CTimeSeriesDecompositionDetail::CComponents::usingTrendForPrediction() const { return m_UsingTrendForPrediction; } -CPeriodicityHypothesisTestsConfig CTimeSeriesDecompositionDetail::CComponents::periodicityTestConfig() const -{ +CPeriodicityHypothesisTestsConfig CTimeSeriesDecompositionDetail::CComponents::periodicityTestConfig() const { CPeriodicityHypothesisTestsConfig result; - for (const auto &component : this->seasonal()) - { - const CSeasonalTime &time{component.time()}; - result.hasDaily( result.hasDaily() || time.period() == DAY); + for (const auto& component : this->seasonal()) { + const CSeasonalTime& time{component.time()}; + result.hasDaily(result.hasDaily() || time.period() == DAY); result.hasWeekend(result.hasWeekend() || time.hasWeekend()); - result.hasWeekly( result.hasWeekly() || time.period() == WEEK); - if (time.hasWeekend()) - { + result.hasWeekly(result.hasWeekly() || time.period() == WEEK); + if (time.hasWeekend()) { result.startOfWeek(time.windowRepeatStart()); } } return result; } -double CTimeSeriesDecompositionDetail::CComponents::meanValue(core_t::TTime time) const -{ - return this->initialized() ? ( (m_UsingTrendForPrediction ? - CBasicStatistics::mean(m_Trend.value(time, 0.0)) : 0.0) - + meanOf(&CSeasonalComponent::meanValue, this->seasonal())) : 0.0; +double CTimeSeriesDecompositionDetail::CComponents::meanValue(core_t::TTime time) const { + return this->initialized() ? ((m_UsingTrendForPrediction ? CBasicStatistics::mean(m_Trend.value(time, 0.0)) : 0.0) + + meanOf(&CSeasonalComponent::meanValue, this->seasonal())) + : 0.0; } -double CTimeSeriesDecompositionDetail::CComponents::meanVariance() const -{ - return this->initialized() ? ( (m_UsingTrendForPrediction ? - CBasicStatistics::mean(this->trend().variance(0.0)) : 0.0) - + meanOf(&CSeasonalComponent::meanVariance, this->seasonal())) : 0.0; +double CTimeSeriesDecompositionDetail::CComponents::meanVariance() const { + return this->initialized() ? ((m_UsingTrendForPrediction ? CBasicStatistics::mean(this->trend().variance(0.0)) : 0.0) + + meanOf(&CSeasonalComponent::meanVariance, this->seasonal())) + : 0.0; } -double CTimeSeriesDecompositionDetail::CComponents::meanVarianceScale() const -{ +double CTimeSeriesDecompositionDetail::CComponents::meanVarianceScale() const { return CBasicStatistics::mean(m_MeanVarianceScale); } -uint64_t CTimeSeriesDecompositionDetail::CComponents::checksum(uint64_t seed) const -{ +uint64_t CTimeSeriesDecompositionDetail::CComponents::checksum(uint64_t seed) const { seed = CChecksum::calculate(seed, m_Machine); seed = CChecksum::calculate(seed, m_DecayRate); seed = CChecksum::calculate(seed, m_BucketLength); @@ -1550,92 +1312,74 @@ uint64_t CTimeSeriesDecompositionDetail::CComponents::checksum(uint64_t seed) co return CChecksum::calculate(seed, m_UsingTrendForPrediction); } -void CTimeSeriesDecompositionDetail::CComponents::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CTimeSeriesDecompositionDetail::CComponents::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CComponents"); core::CMemoryDebug::dynamicSize("m_Trend", m_Trend, mem); core::CMemoryDebug::dynamicSize("m_Seasonal", m_Seasonal, mem); core::CMemoryDebug::dynamicSize("m_Calendar", m_Calendar, mem); } -std::size_t CTimeSeriesDecompositionDetail::CComponents::memoryUsage() const -{ - return core::CMemory::dynamicSize(m_Trend) - + core::CMemory::dynamicSize(m_Seasonal) - + core::CMemory::dynamicSize(m_Calendar); +std::size_t CTimeSeriesDecompositionDetail::CComponents::memoryUsage() const { + return core::CMemory::dynamicSize(m_Trend) + core::CMemory::dynamicSize(m_Seasonal) + core::CMemory::dynamicSize(m_Calendar); } -std::size_t CTimeSeriesDecompositionDetail::CComponents::size() const -{ +std::size_t CTimeSeriesDecompositionDetail::CComponents::size() const { return (m_Seasonal ? m_Seasonal->size() : 0) + (m_Calendar ? m_Calendar->size() : 0); } -std::size_t CTimeSeriesDecompositionDetail::CComponents::maxSize() const -{ +std::size_t CTimeSeriesDecompositionDetail::CComponents::maxSize() const { return MAXIMUM_COMPONENTS * m_SeasonalComponentSize; } -bool CTimeSeriesDecompositionDetail::CComponents::addSeasonalComponents(const CPeriodicityHypothesisTestsResult &result, - const CExpandingWindow &window, - const TPredictor &predictor, - CTrendComponent &trend, - TSeasonalComponentVec &components, - TComponentErrorsVec &errors) const -{ +bool CTimeSeriesDecompositionDetail::CComponents::addSeasonalComponents(const CPeriodicityHypothesisTestsResult& result, + const CExpandingWindow& window, + const TPredictor& predictor, + CTrendComponent& trend, + TSeasonalComponentVec& components, + TComponentErrorsVec& errors) const { using TSeasonalTimePtr = boost::shared_ptr; using TSeasonalTimePtrVec = std::vector; TSeasonalTimePtrVec newSeasonalTimes; - for (const auto &candidate_ : result.components()) - { + for (const auto& candidate_ : result.components()) { TSeasonalTimePtr seasonalTime(candidate_.seasonalTime()); - if (std::find_if(components.begin(), components.end(), - [&seasonalTime](const CSeasonalComponent &component) - { - return component.time().excludes(*seasonalTime); - }) == components.end()) - { + if (std::find_if(components.begin(), components.end(), [&seasonalTime](const CSeasonalComponent& component) { + return component.time().excludes(*seasonalTime); + }) == components.end()) { LOG_DEBUG("Detected '" << candidate_.s_Description << "'"); newSeasonalTimes.push_back(seasonalTime); } } - if (newSeasonalTimes.size() > 0) - { - for (const auto &seasonalTime : newSeasonalTimes) - { - components.erase(std::remove_if(components.begin(), components.end(), - [&seasonalTime](const CSeasonalComponent &component) - { - return seasonalTime->excludes(component.time()); - }), components.end()); + if (newSeasonalTimes.size() > 0) { + for (const auto& seasonalTime : newSeasonalTimes) { + components.erase( + std::remove_if(components.begin(), + components.end(), + [&seasonalTime](const CSeasonalComponent& component) { return seasonalTime->excludes(component.time()); }), + components.end()); } std::sort(newSeasonalTimes.begin(), newSeasonalTimes.end(), maths::COrderings::SLess()); TFloatMeanAccumulatorVec values; - for (const auto &seasonalTime : newSeasonalTimes) - { + for (const auto& seasonalTime : newSeasonalTimes) { values = window.valuesMinusPrediction(predictor); - components.emplace_back(*seasonalTime, m_SeasonalComponentSize, - m_DecayRate, static_cast(m_BucketLength), - CSplineTypes::E_Natural); + components.emplace_back( + *seasonalTime, m_SeasonalComponentSize, m_DecayRate, static_cast(m_BucketLength), CSplineTypes::E_Natural); components.back().initialize(window.startTime(), window.endTime(), values); - components.back().interpolate(CIntegerTools::floor(window.endTime(), - seasonalTime->period())); + components.back().interpolate(CIntegerTools::floor(window.endTime(), seasonalTime->period())); } CTrendComponent windowTrend{trend.defaultDecayRate()}; values = window.valuesMinusPrediction(predictor); core_t::TTime time{window.startTime() + window.bucketLength() / 2}; - for (const auto &value : values) - { + for (const auto& value : values) { // Because we now test before the window is fully compressed // we can get a run of unset values at the end of the window, // we should just ignore these. - if (CBasicStatistics::count(value) > 0.0) - { + if (CBasicStatistics::count(value) > 0.0) { windowTrend.add(time, CBasicStatistics::mean(value), CBasicStatistics::count(value)); windowTrend.propagateForwardsByTime(window.bucketLength()); } @@ -1644,52 +1388,40 @@ bool CTimeSeriesDecompositionDetail::CComponents::addSeasonalComponents(const CP trend.swap(windowTrend); errors.resize(components.size()); - COrderings::simultaneousSort(components, errors, - [](const CSeasonalComponent &lhs, const CSeasonalComponent &rhs) - { - return lhs.time() < rhs.time(); - }); + COrderings::simultaneousSort( + components, errors, [](const CSeasonalComponent& lhs, const CSeasonalComponent& rhs) { return lhs.time() < rhs.time(); }); } return newSeasonalTimes.size() > 0; } -bool CTimeSeriesDecompositionDetail::CComponents::addCalendarComponent(const CCalendarFeature &feature, +bool CTimeSeriesDecompositionDetail::CComponents::addCalendarComponent(const CCalendarFeature& feature, core_t::TTime time, - maths_t::TCalendarComponentVec &components, - TComponentErrorsVec &errors) const -{ + maths_t::TCalendarComponentVec& components, + TComponentErrorsVec& errors) const { double bucketLength{static_cast(m_BucketLength)}; - components.emplace_back(feature, m_CalendarComponentSize, - m_DecayRate, bucketLength, CSplineTypes::E_Natural); + components.emplace_back(feature, m_CalendarComponentSize, m_DecayRate, bucketLength, CSplineTypes::E_Natural); components.back().initialize(); errors.resize(components.size()); LOG_DEBUG("Detected feature '" << feature.print() << "' at " << time); return true; } -void CTimeSeriesDecompositionDetail::CComponents::clearComponentErrors() -{ - if (m_Seasonal) - { - for (auto &errors : m_Seasonal->s_PredictionErrors) - { +void CTimeSeriesDecompositionDetail::CComponents::clearComponentErrors() { + if (m_Seasonal) { + for (auto& errors : m_Seasonal->s_PredictionErrors) { errors.clear(); } } - if (m_Calendar) - { - for (auto &errors : m_Calendar->s_PredictionErrors) - { + if (m_Calendar) { + for (auto& errors : m_Calendar->s_PredictionErrors) { errors.clear(); } } } -void CTimeSeriesDecompositionDetail::CComponents::apply(std::size_t symbol, const SMessage &message) -{ - if (symbol == SC_RESET) - { +void CTimeSeriesDecompositionDetail::CComponents::apply(std::size_t symbol, const SMessage& message) { + if (symbol == SC_RESET) { m_Trend.clear(); m_Seasonal.reset(); m_Calendar.reset(); @@ -1699,12 +1431,10 @@ void CTimeSeriesDecompositionDetail::CComponents::apply(std::size_t symbol, cons m_Machine.apply(symbol); std::size_t state{m_Machine.state()}; - if (state != old) - { + if (state != old) { LOG_TRACE(SC_STATES[old] << "," << SC_ALPHABET[symbol] << " -> " << SC_STATES[state]); - switch (state) - { + switch (state) { case SC_NORMAL: case SC_NEW_COMPONENTS: this->interpolate(message); @@ -1722,49 +1452,38 @@ void CTimeSeriesDecompositionDetail::CComponents::apply(std::size_t symbol, cons } } -bool CTimeSeriesDecompositionDetail::CComponents::shouldInterpolate(core_t::TTime time, - core_t::TTime last) -{ - return m_Machine.state() == SC_NEW_COMPONENTS - || (m_Seasonal && m_Seasonal->shouldInterpolate(time, last)) - || (m_Calendar && m_Calendar->shouldInterpolate(time, last)); +bool CTimeSeriesDecompositionDetail::CComponents::shouldInterpolate(core_t::TTime time, core_t::TTime last) { + return m_Machine.state() == SC_NEW_COMPONENTS || (m_Seasonal && m_Seasonal->shouldInterpolate(time, last)) || + (m_Calendar && m_Calendar->shouldInterpolate(time, last)); } -void CTimeSeriesDecompositionDetail::CComponents::shiftOrigin(core_t::TTime time) -{ +void CTimeSeriesDecompositionDetail::CComponents::shiftOrigin(core_t::TTime time) { time -= static_cast(static_cast(DAY) / m_DecayRate / 2.0); m_Trend.shiftOrigin(time); - if (m_Seasonal) - { + if (m_Seasonal) { m_Seasonal->shiftOrigin(time); } } -void CTimeSeriesDecompositionDetail::CComponents::canonicalize(core_t::TTime time) -{ +void CTimeSeriesDecompositionDetail::CComponents::canonicalize(core_t::TTime time) { this->shiftOrigin(time); - if (m_Seasonal && m_Seasonal->prune(time, m_BucketLength)) - { + if (m_Seasonal && m_Seasonal->prune(time, m_BucketLength)) { m_Seasonal.reset(); } - if (m_Calendar && m_Calendar->prune(time, m_BucketLength)) - { + if (m_Calendar && m_Calendar->prune(time, m_BucketLength)) { m_Calendar.reset(); } - if (m_Seasonal) - { - TSeasonalComponentVec &seasonal{m_Seasonal->s_Components}; + if (m_Seasonal) { + TSeasonalComponentVec& seasonal{m_Seasonal->s_Components}; TTimeTimePrDoubleFMap slope; slope.reserve(seasonal.size()); - for (auto &component : seasonal) - { - if (component.slopeAccurate(time)) - { - const CSeasonalTime &time_{component.time()}; + for (auto& component : seasonal) { + if (component.slopeAccurate(time)) { + const CSeasonalTime& time_{component.time()}; double si{component.slope()}; component.shiftSlope(-si); slope[time_.window()] += si; @@ -1776,41 +1495,30 @@ void CTimeSeriesDecompositionDetail::CComponents::canonicalize(core_t::TTime tim } } -void CTimeSeriesDecompositionDetail::CComponents::notifyOnNewComponents(bool *watcher) -{ +void CTimeSeriesDecompositionDetail::CComponents::notifyOnNewComponents(bool* watcher) { m_Watcher = watcher; } -CTimeSeriesDecompositionDetail::CComponents::CScopeNotifyOnStateChange::CScopeNotifyOnStateChange(CComponents &components) : - m_Components{components}, m_Watcher{false} -{ +CTimeSeriesDecompositionDetail::CComponents::CScopeNotifyOnStateChange::CScopeNotifyOnStateChange(CComponents& components) + : m_Components{components}, m_Watcher{false} { m_Components.notifyOnNewComponents(&m_Watcher); } -CTimeSeriesDecompositionDetail::CComponents::CScopeNotifyOnStateChange::~CScopeNotifyOnStateChange() -{ +CTimeSeriesDecompositionDetail::CComponents::CScopeNotifyOnStateChange::~CScopeNotifyOnStateChange() { m_Components.notifyOnNewComponents(0); } -bool CTimeSeriesDecompositionDetail::CComponents::CScopeNotifyOnStateChange::changed() const -{ +bool CTimeSeriesDecompositionDetail::CComponents::CScopeNotifyOnStateChange::changed() const { return m_Watcher; } -bool CTimeSeriesDecompositionDetail::CComponents::CComponentErrors::fromDelimited(const std::string &str) -{ - TFloatMeanAccumulator *state[] = - { - &m_MeanErrorWithComponent, - &m_MeanErrorWithoutComponent - }; +bool CTimeSeriesDecompositionDetail::CComponents::CComponentErrors::fromDelimited(const std::string& str) { + TFloatMeanAccumulator* state[] = {&m_MeanErrorWithComponent, &m_MeanErrorWithoutComponent}; std::string suffix = str; - for (std::size_t i = 0u, n = 0; i < 2; ++i, suffix = suffix.substr(n + 1)) - { + for (std::size_t i = 0u, n = 0; i < 2; ++i, suffix = suffix.substr(n + 1)) { n = suffix.find(CBasicStatistics::EXTERNAL_DELIMITER); - if (!state[i]->fromDelimited(suffix.substr(0, n))) - { + if (!state[i]->fromDelimited(suffix.substr(0, n))) { LOG_ERROR("Failed to parse '" << str << "'"); return false; } @@ -1819,176 +1527,131 @@ bool CTimeSeriesDecompositionDetail::CComponents::CComponentErrors::fromDelimite return true; } -std::string CTimeSeriesDecompositionDetail::CComponents::CComponentErrors::toDelimited() const -{ - return m_MeanErrorWithComponent.toDelimited() + CBasicStatistics::EXTERNAL_DELIMITER - + m_MeanErrorWithoutComponent.toDelimited() + CBasicStatistics::EXTERNAL_DELIMITER; +std::string CTimeSeriesDecompositionDetail::CComponents::CComponentErrors::toDelimited() const { + return m_MeanErrorWithComponent.toDelimited() + CBasicStatistics::EXTERNAL_DELIMITER + m_MeanErrorWithoutComponent.toDelimited() + + CBasicStatistics::EXTERNAL_DELIMITER; } -void CTimeSeriesDecompositionDetail::CComponents::CComponentErrors::add(double error, - double prediction, - double weight) -{ +void CTimeSeriesDecompositionDetail::CComponents::CComponentErrors::add(double error, double prediction, double weight) { double errorWithComponent{winsorise(pow2(error), m_MeanErrorWithComponent)}; double errorWithoutComponent{winsorise(pow2(error - prediction), m_MeanErrorWithoutComponent)}; m_MeanErrorWithComponent.add(errorWithComponent, weight); m_MeanErrorWithoutComponent.add(errorWithoutComponent, weight); } -void CTimeSeriesDecompositionDetail::CComponents::CComponentErrors::clear() -{ +void CTimeSeriesDecompositionDetail::CComponents::CComponentErrors::clear() { m_MeanErrorWithComponent = TFloatMeanAccumulator(); m_MeanErrorWithoutComponent = TFloatMeanAccumulator(); } -bool CTimeSeriesDecompositionDetail::CComponents::CComponentErrors::remove(core_t::TTime bucketLength, - CSeasonalComponent &seasonal) const -{ +bool CTimeSeriesDecompositionDetail::CComponents::CComponentErrors::remove(core_t::TTime bucketLength, CSeasonalComponent& seasonal) const { double count{CBasicStatistics::count(m_MeanErrorWithComponent)}; double errorWithComponent{CBasicStatistics::mean(m_MeanErrorWithComponent)}; double errorWithoutComponent{CBasicStatistics::mean(m_MeanErrorWithoutComponent)}; - return count > static_cast(10 * seasonal.time().period() / bucketLength) - && std::max( errorWithoutComponent - / errorWithComponent, seasonal.heteroscedasticity()) < 1.5; + return count > static_cast(10 * seasonal.time().period() / bucketLength) && + std::max(errorWithoutComponent / errorWithComponent, seasonal.heteroscedasticity()) < 1.5; } -bool CTimeSeriesDecompositionDetail::CComponents::CComponentErrors::remove(core_t::TTime bucketLength, - CCalendarComponent &calendar) const -{ +bool CTimeSeriesDecompositionDetail::CComponents::CComponentErrors::remove(core_t::TTime bucketLength, CCalendarComponent& calendar) const { double count{CBasicStatistics::count(m_MeanErrorWithComponent)}; double errorWithComponent{CBasicStatistics::mean(m_MeanErrorWithComponent)}; double errorWithoutComponent{CBasicStatistics::mean(m_MeanErrorWithoutComponent)}; - return count > static_cast(5 * calendar.feature().window() / bucketLength) - && std::max( errorWithoutComponent - / errorWithComponent, calendar.heteroscedasticity()) < 1.5; + return count > static_cast(5 * calendar.feature().window() / bucketLength) && + std::max(errorWithoutComponent / errorWithComponent, calendar.heteroscedasticity()) < 1.5; } -void CTimeSeriesDecompositionDetail::CComponents::CComponentErrors::age(double factor) -{ +void CTimeSeriesDecompositionDetail::CComponents::CComponentErrors::age(double factor) { m_MeanErrorWithComponent.age(factor); m_MeanErrorWithoutComponent.age(factor); } -uint64_t CTimeSeriesDecompositionDetail::CComponents::CComponentErrors::checksum(uint64_t seed) const -{ +uint64_t CTimeSeriesDecompositionDetail::CComponents::CComponentErrors::checksum(uint64_t seed) const { seed = CChecksum::calculate(seed, m_MeanErrorWithComponent); return CChecksum::calculate(seed, m_MeanErrorWithoutComponent); } -double CTimeSeriesDecompositionDetail::CComponents::CComponentErrors::winsorise(double squareError, - const TFloatMeanAccumulator &variance) -{ - return CBasicStatistics::count(variance) > 10.0 ? - std::min(squareError, 36.0 * CBasicStatistics::mean(variance)) : squareError; +double CTimeSeriesDecompositionDetail::CComponents::CComponentErrors::winsorise(double squareError, const TFloatMeanAccumulator& variance) { + return CBasicStatistics::count(variance) > 10.0 ? std::min(squareError, 36.0 * CBasicStatistics::mean(variance)) : squareError; } bool CTimeSeriesDecompositionDetail::CComponents::SSeasonal::acceptRestoreTraverser(double decayRate, core_t::TTime bucketLength_, - core::CStateRestoreTraverser &traverser) -{ + core::CStateRestoreTraverser& traverser) { double bucketLength{static_cast(bucketLength_)}; - if (traverser.name() == VERSION_6_3_TAG) - { - while (traverser.next()) - { - const std::string &name{traverser.name()}; - RESTORE_NO_ERROR(COMPONENT_6_3_TAG, s_Components.emplace_back( - decayRate, bucketLength, traverser)) - RESTORE(ERRORS_6_3_TAG, core::CPersistUtils::restore( - ERRORS_6_3_TAG, s_PredictionErrors, traverser)) + if (traverser.name() == VERSION_6_3_TAG) { + while (traverser.next()) { + const std::string& name{traverser.name()}; + RESTORE_NO_ERROR(COMPONENT_6_3_TAG, s_Components.emplace_back(decayRate, bucketLength, traverser)) + RESTORE(ERRORS_6_3_TAG, core::CPersistUtils::restore(ERRORS_6_3_TAG, s_PredictionErrors, traverser)) } - } - else - { + } else { // There is no version string this is historic state. - do - { - const std::string &name{traverser.name()}; - RESTORE_NO_ERROR(COMPONENT_OLD_TAG, s_Components.emplace_back( - decayRate, bucketLength, traverser)) - RESTORE(ERRORS_OLD_TAG, core::CPersistUtils::restore( - ERRORS_OLD_TAG, s_PredictionErrors, traverser)) - } - while (traverser.next()); + do { + const std::string& name{traverser.name()}; + RESTORE_NO_ERROR(COMPONENT_OLD_TAG, s_Components.emplace_back(decayRate, bucketLength, traverser)) + RESTORE(ERRORS_OLD_TAG, core::CPersistUtils::restore(ERRORS_OLD_TAG, s_PredictionErrors, traverser)) + } while (traverser.next()); } return true; } -void CTimeSeriesDecompositionDetail::CComponents::SSeasonal::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CTimeSeriesDecompositionDetail::CComponents::SSeasonal::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(VERSION_6_3_TAG, ""); - for (const auto &component : s_Components) - { - inserter.insertLevel(COMPONENT_6_3_TAG, boost::bind( - &CSeasonalComponent::acceptPersistInserter, &component, _1)); + for (const auto& component : s_Components) { + inserter.insertLevel(COMPONENT_6_3_TAG, boost::bind(&CSeasonalComponent::acceptPersistInserter, &component, _1)); } core::CPersistUtils::persist(ERRORS_6_3_TAG, s_PredictionErrors, inserter); } -void CTimeSeriesDecompositionDetail::CComponents::SSeasonal::decayRate(double decayRate) -{ - for (auto &component : s_Components) - { +void CTimeSeriesDecompositionDetail::CComponents::SSeasonal::decayRate(double decayRate) { + for (auto& component : s_Components) { component.decayRate(decayRate); } } -void CTimeSeriesDecompositionDetail::CComponents::SSeasonal::propagateForwards(core_t::TTime start, - core_t::TTime end) -{ - for (std::size_t i = 0u; i < s_Components.size(); ++i) - { +void CTimeSeriesDecompositionDetail::CComponents::SSeasonal::propagateForwards(core_t::TTime start, core_t::TTime end) { + for (std::size_t i = 0u; i < s_Components.size(); ++i) { core_t::TTime period{s_Components[i].time().period()}; core_t::TTime a{CIntegerTools::floor(start, period)}; core_t::TTime b{CIntegerTools::floor(end, period)}; - if (b > a) - { - double time{ static_cast(b - a) - / static_cast(CTools::truncate(period, DAY, WEEK))}; + if (b > a) { + double time{static_cast(b - a) / static_cast(CTools::truncate(period, DAY, WEEK))}; s_Components[i].propagateForwardsByTime(time); s_PredictionErrors[i].age(std::exp(-s_Components[i].decayRate() * time)); } } } -std::size_t CTimeSeriesDecompositionDetail::CComponents::SSeasonal::size() const -{ +std::size_t CTimeSeriesDecompositionDetail::CComponents::SSeasonal::size() const { std::size_t result{0}; - for (const auto &component : s_Components) - { + for (const auto& component : s_Components) { result += component.size(); } return result; } void CTimeSeriesDecompositionDetail::CComponents::SSeasonal::componentsErrorsAndDeltas(core_t::TTime time, - TSeasonalComponentPtrVec &components, - TComponentErrorsPtrVec &errors, - TDoubleVec &deltas) -{ + TSeasonalComponentPtrVec& components, + TComponentErrorsPtrVec& errors, + TDoubleVec& deltas) { std::size_t n{s_Components.size()}; components.reserve(n); errors.reserve(n); - for (std::size_t i = 0u; i < n; ++i) - { - if (s_Components[i].time().inWindow(time)) - { + for (std::size_t i = 0u; i < n; ++i) { + if (s_Components[i].time().inWindow(time)) { components.push_back(&s_Components[i]); errors.push_back(&s_PredictionErrors[i]); } } deltas.resize(components.size(), 0.0); - for (std::size_t i = 1u; i < components.size(); ++i) - { + for (std::size_t i = 1u; i < components.size(); ++i) { int j{static_cast(i - 1)}; - for (core_t::TTime period{components[i]->time().period()}; j > -1; --j) - { + for (core_t::TTime period{components[i]->time().period()}; j > -1; --j) { core_t::TTime period_{components[j]->time().period()}; - if (period % period_ == 0) - { + if (period % period_ == 0) { double value{CBasicStatistics::mean(components[j]->value(time, 0.0))}; double delta{0.2 * components[i]->delta(time, period_, value)}; deltas[j] += delta; @@ -1999,64 +1662,47 @@ void CTimeSeriesDecompositionDetail::CComponents::SSeasonal::componentsErrorsAnd } } -bool CTimeSeriesDecompositionDetail::CComponents::SSeasonal::shouldInterpolate(core_t::TTime time, - core_t::TTime last) const -{ - for (const auto &component : s_Components) - { +bool CTimeSeriesDecompositionDetail::CComponents::SSeasonal::shouldInterpolate(core_t::TTime time, core_t::TTime last) const { + for (const auto& component : s_Components) { core_t::TTime period{component.time().period()}; core_t::TTime a{CIntegerTools::floor(last, period)}; core_t::TTime b{CIntegerTools::floor(time, period)}; - if (b > a) - { + if (b > a) { return true; } } return false; } -void CTimeSeriesDecompositionDetail::CComponents::SSeasonal::interpolate(core_t::TTime time, - core_t::TTime last, - bool refine) -{ - for (auto &component : s_Components) - { +void CTimeSeriesDecompositionDetail::CComponents::SSeasonal::interpolate(core_t::TTime time, core_t::TTime last, bool refine) { + for (auto& component : s_Components) { core_t::TTime period{component.time().period()}; core_t::TTime a{CIntegerTools::floor(last, period)}; core_t::TTime b{CIntegerTools::floor(time, period)}; - if (b > a || !component.initialized()) - { + if (b > a || !component.initialized()) { component.interpolate(b, refine); } } } -bool CTimeSeriesDecompositionDetail::CComponents::SSeasonal::initialized() const -{ - for (const auto &component : s_Components) - { - if (component.initialized()) - { +bool CTimeSeriesDecompositionDetail::CComponents::SSeasonal::initialized() const { + for (const auto& component : s_Components) { + if (component.initialized()) { return true; } } return false; } -bool CTimeSeriesDecompositionDetail::CComponents::SSeasonal::prune(core_t::TTime time, - core_t::TTime bucketLength) -{ +bool CTimeSeriesDecompositionDetail::CComponents::SSeasonal::prune(core_t::TTime time, core_t::TTime bucketLength) { std::size_t n = s_Components.size(); - if (n > 1) - { + if (n > 1) { TTimeTimePrSizeFMap windowed; windowed.reserve(n); - for (const auto &component : s_Components) - { - const CSeasonalTime &time_ = component.time(); - if (time_.windowed()) - { + for (const auto& component : s_Components) { + const CSeasonalTime& time_ = component.time(); + if (time_.windowed()) { ++windowed[time_.window()]; } } @@ -2064,14 +1710,11 @@ bool CTimeSeriesDecompositionDetail::CComponents::SSeasonal::prune(core_t::TTime TBoolVec remove(n, false); TTimeTimePrDoubleFMap shifts; shifts.reserve(n); - for (std::size_t i = 0u; i < n; ++i) - { - const CSeasonalTime &time_ = s_Components[i].time(); + for (std::size_t i = 0u; i < n; ++i) { + const CSeasonalTime& time_ = s_Components[i].time(); auto j = windowed.find(time_.window()); - if (j == windowed.end() || j->second > 1) - { - if (s_PredictionErrors[i].remove(bucketLength, s_Components[i])) - { + if (j == windowed.end() || j->second > 1) { + if (s_PredictionErrors[i].remove(bucketLength, s_Components[i])) { LOG_DEBUG("Removing seasonal component" << " with period '" << time_.period() << "' at " << time); remove[i] = true; @@ -2081,48 +1724,33 @@ bool CTimeSeriesDecompositionDetail::CComponents::SSeasonal::prune(core_t::TTime } } - CSetTools::simultaneousRemoveIf( - remove, s_Components, s_PredictionErrors, [](bool remove_) { return remove_; }); - - for (auto &shift : shifts) - { - if (windowed.count(shift.first) > 0) - { - for (auto &component : s_Components) - { - if (shift.first == component.time().window()) - { + CSetTools::simultaneousRemoveIf(remove, s_Components, s_PredictionErrors, [](bool remove_) { return remove_; }); + + for (auto& shift : shifts) { + if (windowed.count(shift.first) > 0) { + for (auto& component : s_Components) { + if (shift.first == component.time().window()) { component.shiftLevel(shift.second); break; } } - } - else - { + } else { bool fallback = true; - for (auto &component : s_Components) - { - if (!component.time().windowed()) - { + for (auto& component : s_Components) { + if (!component.time().windowed()) { component.shiftLevel(shift.second); fallback = false; break; } } - if (fallback) - { + if (fallback) { TTimeTimePrVec shifted; shifted.reserve(s_Components.size()); - for (auto &component : s_Components) - { - const CSeasonalTime &time_ = component.time(); - if (std::find_if(shifted.begin(), shifted.end(), - [&time_](const TTimeTimePr &window) - { - return !( time_.windowEnd() <= window.first - || time_.windowStart() >= window.second); - }) == shifted.end()) - { + for (auto& component : s_Components) { + const CSeasonalTime& time_ = component.time(); + if (std::find_if(shifted.begin(), shifted.end(), [&time_](const TTimeTimePr& window) { + return !(time_.windowEnd() <= window.first || time_.windowStart() >= window.second); + }) == shifted.end()) { component.shiftLevel(shift.second); } } @@ -2134,100 +1762,73 @@ bool CTimeSeriesDecompositionDetail::CComponents::SSeasonal::prune(core_t::TTime return s_Components.empty(); } -void CTimeSeriesDecompositionDetail::CComponents::SSeasonal::shiftOrigin(core_t::TTime time) -{ - for (auto &component : s_Components) - { +void CTimeSeriesDecompositionDetail::CComponents::SSeasonal::shiftOrigin(core_t::TTime time) { + for (auto& component : s_Components) { component.shiftOrigin(time); } } -void CTimeSeriesDecompositionDetail::CComponents::SSeasonal::linearScale(core_t::TTime time, double scale) -{ - for (auto &component : s_Components) - { +void CTimeSeriesDecompositionDetail::CComponents::SSeasonal::linearScale(core_t::TTime time, double scale) { + for (auto& component : s_Components) { component.linearScale(time, scale); } } -uint64_t CTimeSeriesDecompositionDetail::CComponents::SSeasonal::checksum(uint64_t seed) const -{ +uint64_t CTimeSeriesDecompositionDetail::CComponents::SSeasonal::checksum(uint64_t seed) const { seed = CChecksum::calculate(seed, s_Components); return CChecksum::calculate(seed, s_PredictionErrors); } -void CTimeSeriesDecompositionDetail::CComponents::SSeasonal::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CTimeSeriesDecompositionDetail::CComponents::SSeasonal::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("SSeasonal"); core::CMemoryDebug::dynamicSize("s_Components", s_Components, mem); core::CMemoryDebug::dynamicSize("s_PredictionErrors", s_PredictionErrors, mem); } -std::size_t CTimeSeriesDecompositionDetail::CComponents::SSeasonal::memoryUsage() const -{ +std::size_t CTimeSeriesDecompositionDetail::CComponents::SSeasonal::memoryUsage() const { return core::CMemory::dynamicSize(s_Components) + core::CMemory::dynamicSize(s_PredictionErrors); } bool CTimeSeriesDecompositionDetail::CComponents::SCalendar::acceptRestoreTraverser(double decayRate, core_t::TTime bucketLength_, - core::CStateRestoreTraverser &traverser) -{ + core::CStateRestoreTraverser& traverser) { double bucketLength{static_cast(bucketLength_)}; - if (traverser.name() == VERSION_6_3_TAG) - { - while (traverser.next()) - { - const std::string &name{traverser.name()}; - RESTORE_NO_ERROR(COMPONENT_6_3_TAG, s_Components.emplace_back( - decayRate, bucketLength, traverser)) - RESTORE(ERRORS_6_3_TAG, core::CPersistUtils::restore( - ERRORS_6_3_TAG, s_PredictionErrors, traverser)) + if (traverser.name() == VERSION_6_3_TAG) { + while (traverser.next()) { + const std::string& name{traverser.name()}; + RESTORE_NO_ERROR(COMPONENT_6_3_TAG, s_Components.emplace_back(decayRate, bucketLength, traverser)) + RESTORE(ERRORS_6_3_TAG, core::CPersistUtils::restore(ERRORS_6_3_TAG, s_PredictionErrors, traverser)) } - } - else - { + } else { // There is no version string this is historic state. - do - { - const std::string &name{traverser.name()}; - RESTORE_NO_ERROR(COMPONENT_OLD_TAG, s_Components.emplace_back( - decayRate, bucketLength, traverser)) - RESTORE(ERRORS_OLD_TAG, core::CPersistUtils::restore( - ERRORS_OLD_TAG, s_PredictionErrors, traverser)) - } - while (traverser.next()); + do { + const std::string& name{traverser.name()}; + RESTORE_NO_ERROR(COMPONENT_OLD_TAG, s_Components.emplace_back(decayRate, bucketLength, traverser)) + RESTORE(ERRORS_OLD_TAG, core::CPersistUtils::restore(ERRORS_OLD_TAG, s_PredictionErrors, traverser)) + } while (traverser.next()); } return true; } -void CTimeSeriesDecompositionDetail::CComponents::SCalendar::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CTimeSeriesDecompositionDetail::CComponents::SCalendar::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(VERSION_6_3_TAG, ""); - for (const auto &component : s_Components) - { - inserter.insertLevel(COMPONENT_6_3_TAG, boost::bind( - &CCalendarComponent::acceptPersistInserter, &component, _1)); + for (const auto& component : s_Components) { + inserter.insertLevel(COMPONENT_6_3_TAG, boost::bind(&CCalendarComponent::acceptPersistInserter, &component, _1)); } core::CPersistUtils::persist(ERRORS_6_3_TAG, s_PredictionErrors, inserter); } -void CTimeSeriesDecompositionDetail::CComponents::SCalendar::decayRate(double decayRate) -{ - for (auto &component : s_Components) - { +void CTimeSeriesDecompositionDetail::CComponents::SCalendar::decayRate(double decayRate) { + for (auto& component : s_Components) { component.decayRate(decayRate); } } -void CTimeSeriesDecompositionDetail::CComponents::SCalendar::propagateForwards(core_t::TTime start, - core_t::TTime end) -{ - for (std::size_t i = 0u; i < s_Components.size(); ++i) - { +void CTimeSeriesDecompositionDetail::CComponents::SCalendar::propagateForwards(core_t::TTime start, core_t::TTime end) { + for (std::size_t i = 0u; i < s_Components.size(); ++i) { core_t::TTime a{CIntegerTools::floor(start, MONTH)}; core_t::TTime b{CIntegerTools::floor(end, MONTH)}; - if (b > a) - { + if (b > a) { double time{static_cast(b - a) / static_cast(MONTH)}; s_Components[i].propagateForwardsByTime(time); s_PredictionErrors[i].age(std::exp(-s_Components[i].decayRate() * time)); @@ -2235,22 +1836,17 @@ void CTimeSeriesDecompositionDetail::CComponents::SCalendar::propagateForwards(c } } -std::size_t CTimeSeriesDecompositionDetail::CComponents::SCalendar::size() const -{ +std::size_t CTimeSeriesDecompositionDetail::CComponents::SCalendar::size() const { std::size_t result{0}; - for (const auto &component : s_Components) - { + for (const auto& component : s_Components) { result += component.size(); } return result; } -bool CTimeSeriesDecompositionDetail::CComponents::SCalendar::haveComponent(CCalendarFeature feature) const -{ - for (const auto &component : s_Components) - { - if (component.feature() == feature) - { +bool CTimeSeriesDecompositionDetail::CComponents::SCalendar::haveComponent(CCalendarFeature feature) const { + for (const auto& component : s_Components) { + if (component.feature() == feature) { return true; } } @@ -2258,107 +1854,81 @@ bool CTimeSeriesDecompositionDetail::CComponents::SCalendar::haveComponent(CCale } void CTimeSeriesDecompositionDetail::CComponents::SCalendar::componentsAndErrors(core_t::TTime time, - TCalendarComponentPtrVec &components, - TComponentErrorsPtrVec &errors) -{ + TCalendarComponentPtrVec& components, + TComponentErrorsPtrVec& errors) { std::size_t n = s_Components.size(); components.reserve(n); errors.reserve(n); - for (std::size_t i = 0u; i < n; ++i) - { - if (s_Components[i].feature().inWindow(time)) - { + for (std::size_t i = 0u; i < n; ++i) { + if (s_Components[i].feature().inWindow(time)) { components.push_back(&s_Components[i]); errors.push_back(&s_PredictionErrors[i]); } } } -bool CTimeSeriesDecompositionDetail::CComponents::SCalendar::shouldInterpolate(core_t::TTime time, - core_t::TTime last) const -{ - for (const auto &component : s_Components) - { +bool CTimeSeriesDecompositionDetail::CComponents::SCalendar::shouldInterpolate(core_t::TTime time, core_t::TTime last) const { + for (const auto& component : s_Components) { CCalendarFeature feature = component.feature(); - if (!feature.inWindow(time) && feature.inWindow(last)) - { + if (!feature.inWindow(time) && feature.inWindow(last)) { return true; } } return false; } -void CTimeSeriesDecompositionDetail::CComponents::SCalendar::interpolate(core_t::TTime time, - core_t::TTime last, - bool refine) -{ - for (auto &component : s_Components) - { +void CTimeSeriesDecompositionDetail::CComponents::SCalendar::interpolate(core_t::TTime time, core_t::TTime last, bool refine) { + for (auto& component : s_Components) { CCalendarFeature feature = component.feature(); - if (!feature.inWindow(time) && feature.inWindow(last)) - { + if (!feature.inWindow(time) && feature.inWindow(last)) { component.interpolate(time - feature.offset(time), refine); } } } -bool CTimeSeriesDecompositionDetail::CComponents::SCalendar::initialized() const -{ - for (const auto &component : s_Components) - { - if (component.initialized()) - { +bool CTimeSeriesDecompositionDetail::CComponents::SCalendar::initialized() const { + for (const auto& component : s_Components) { + if (component.initialized()) { return true; } } return false; } -bool CTimeSeriesDecompositionDetail::CComponents::SCalendar::prune(core_t::TTime time, - core_t::TTime bucketLength) -{ +bool CTimeSeriesDecompositionDetail::CComponents::SCalendar::prune(core_t::TTime time, core_t::TTime bucketLength) { TBoolVec remove(s_Components.size(), false); - for (std::size_t i = 0u; i < s_Components.size(); ++i) - { - if (s_PredictionErrors[i].remove(bucketLength, s_Components[i])) - { + for (std::size_t i = 0u; i < s_Components.size(); ++i) { + if (s_PredictionErrors[i].remove(bucketLength, s_Components[i])) { LOG_DEBUG("Removing calendar component" << " '" << s_Components[i].feature().print() << "' at " << time); remove[i] = true; } } - CSetTools::simultaneousRemoveIf( - remove, s_Components, s_PredictionErrors, [](bool remove_) { return remove_; }); + CSetTools::simultaneousRemoveIf(remove, s_Components, s_PredictionErrors, [](bool remove_) { return remove_; }); return s_Components.empty(); } -void CTimeSeriesDecompositionDetail::CComponents::SCalendar::linearScale(core_t::TTime time, double scale) -{ - for (auto &component : s_Components) - { +void CTimeSeriesDecompositionDetail::CComponents::SCalendar::linearScale(core_t::TTime time, double scale) { + for (auto& component : s_Components) { component.linearScale(time, scale); } } -uint64_t CTimeSeriesDecompositionDetail::CComponents::SCalendar::checksum(uint64_t seed) const -{ +uint64_t CTimeSeriesDecompositionDetail::CComponents::SCalendar::checksum(uint64_t seed) const { seed = CChecksum::calculate(seed, s_Components); return CChecksum::calculate(seed, s_PredictionErrors); } -void CTimeSeriesDecompositionDetail::CComponents::SCalendar::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CTimeSeriesDecompositionDetail::CComponents::SCalendar::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("SCalendar"); core::CMemoryDebug::dynamicSize("s_Components", s_Components, mem); core::CMemoryDebug::dynamicSize("s_PredictionErrors", s_PredictionErrors, mem); } -std::size_t CTimeSeriesDecompositionDetail::CComponents::SCalendar::memoryUsage() const -{ +std::size_t CTimeSeriesDecompositionDetail::CComponents::SCalendar::memoryUsage() const { return core::CMemory::dynamicSize(s_Components) + core::CMemory::dynamicSize(s_PredictionErrors); } - } } diff --git a/lib/maths/CTimeSeriesDecompositionStateSerialiser.cc b/lib/maths/CTimeSeriesDecompositionStateSerialiser.cc index b6a4a07962..5459085963 100644 --- a/lib/maths/CTimeSeriesDecompositionStateSerialiser.cc +++ b/lib/maths/CTimeSeriesDecompositionStateSerialiser.cc @@ -19,14 +19,10 @@ #include #include +namespace ml { +namespace maths { -namespace ml -{ -namespace maths -{ - -namespace -{ +namespace { // We use short field names to reduce the state size // There needs to be one constant here per sub-class @@ -36,38 +32,27 @@ const std::string TIME_SERIES_DECOMPOSITION_TAG("a"); const std::string TIME_SERIES_DECOMPOSITION_STUB_TAG("b"); const std::string EMPTY_STRING; - } -bool CTimeSeriesDecompositionStateSerialiser::operator()(const STimeSeriesDecompositionRestoreParams ¶ms, - TDecompositionPtr &result, - core::CStateRestoreTraverser &traverser) const -{ +bool CTimeSeriesDecompositionStateSerialiser:: +operator()(const STimeSeriesDecompositionRestoreParams& params, TDecompositionPtr& result, core::CStateRestoreTraverser& traverser) const { std::size_t numResults = 0; - do - { - const std::string &name = traverser.name(); - if (name == TIME_SERIES_DECOMPOSITION_TAG) - { + do { + const std::string& name = traverser.name(); + if (name == TIME_SERIES_DECOMPOSITION_TAG) { result.reset(new CTimeSeriesDecomposition(params, traverser)); ++numResults; - } - else if (name == TIME_SERIES_DECOMPOSITION_STUB_TAG) - { + } else if (name == TIME_SERIES_DECOMPOSITION_STUB_TAG) { result.reset(new CTimeSeriesDecompositionStub()); ++numResults; - } - else - { + } else { LOG_ERROR("No decomposition corresponds to name " << traverser.name()); return false; } - } - while (traverser.next()); + } while (traverser.next()); - if (numResults != 1) - { + if (numResults != 1) { LOG_ERROR("Expected 1 (got " << numResults << ") decomposition tags"); result.reset(); return false; @@ -76,26 +61,18 @@ bool CTimeSeriesDecompositionStateSerialiser::operator()(const STimeSeriesDecomp return true; } -void CTimeSeriesDecompositionStateSerialiser::operator()(const CTimeSeriesDecompositionInterface &decomposition, - core::CStatePersistInserter &inserter) const -{ - if (dynamic_cast(&decomposition) != 0) - { +void CTimeSeriesDecompositionStateSerialiser::operator()(const CTimeSeriesDecompositionInterface& decomposition, + core::CStatePersistInserter& inserter) const { + if (dynamic_cast(&decomposition) != 0) { inserter.insertLevel(TIME_SERIES_DECOMPOSITION_TAG, boost::bind(&CTimeSeriesDecomposition::acceptPersistInserter, - dynamic_cast(&decomposition), _1)); - } - else if (dynamic_cast(&decomposition) != 0) - { + dynamic_cast(&decomposition), + _1)); + } else if (dynamic_cast(&decomposition) != 0) { inserter.insertValue(TIME_SERIES_DECOMPOSITION_STUB_TAG, ""); - } - else - { - LOG_ERROR("Decomposition with type '" << typeid(decomposition).name() - << "' has no defined name"); + } else { + LOG_ERROR("Decomposition with type '" << typeid(decomposition).name() << "' has no defined name"); } } - } } - diff --git a/lib/maths/CTimeSeriesDecompositionStub.cc b/lib/maths/CTimeSeriesDecompositionStub.cc index 0978a34889..5f66299842 100644 --- a/lib/maths/CTimeSeriesDecompositionStub.cc +++ b/lib/maths/CTimeSeriesDecompositionStub.cc @@ -8,67 +8,50 @@ #include -namespace ml -{ -namespace maths -{ -namespace -{ +namespace ml { +namespace maths { +namespace { const maths_t::TSeasonalComponentVec NO_COMPONENTS; } -CTimeSeriesDecompositionStub *CTimeSeriesDecompositionStub::clone(bool /*isForForecast*/) const -{ +CTimeSeriesDecompositionStub* CTimeSeriesDecompositionStub::clone(bool /*isForForecast*/) const { return new CTimeSeriesDecompositionStub(*this); } -void CTimeSeriesDecompositionStub::dataType(maths_t::EDataType /*dataType*/) -{ +void CTimeSeriesDecompositionStub::dataType(maths_t::EDataType /*dataType*/) { } -void CTimeSeriesDecompositionStub::decayRate(double /*decayRate*/) -{ +void CTimeSeriesDecompositionStub::decayRate(double /*decayRate*/) { } -double CTimeSeriesDecompositionStub::decayRate() const -{ +double CTimeSeriesDecompositionStub::decayRate() const { return 0.0; } -bool CTimeSeriesDecompositionStub::initialized() const -{ +bool CTimeSeriesDecompositionStub::initialized() const { return false; } bool CTimeSeriesDecompositionStub::addPoint(core_t::TTime /*time*/, double /*value*/, - const maths_t::TWeightStyleVec &/*weightStyles*/, - const maths_t::TDouble4Vec &/*weights*/) -{ + const maths_t::TWeightStyleVec& /*weightStyles*/, + const maths_t::TDouble4Vec& /*weights*/) { return false; } -bool CTimeSeriesDecompositionStub::applyChange(core_t::TTime /*time*/, - double /*value*/, - const SChangeDescription &/*change*/) -{ +bool CTimeSeriesDecompositionStub::applyChange(core_t::TTime /*time*/, double /*value*/, const SChangeDescription& /*change*/) { return false; } -void CTimeSeriesDecompositionStub::propagateForwardsTo(core_t::TTime /*time*/) -{ +void CTimeSeriesDecompositionStub::propagateForwardsTo(core_t::TTime /*time*/) { } -double CTimeSeriesDecompositionStub::meanValue(core_t::TTime /*time*/) const -{ +double CTimeSeriesDecompositionStub::meanValue(core_t::TTime /*time*/) const { return 0.0; } -maths_t::TDoubleDoublePr CTimeSeriesDecompositionStub::value(core_t::TTime /*time*/, - double /*confidence*/, - int /*components*/, - bool /*smooth*/) const -{ +maths_t::TDoubleDoublePr +CTimeSeriesDecompositionStub::value(core_t::TTime /*time*/, double /*confidence*/, int /*components*/, bool /*smooth*/) const { return {0.0, 0.0}; } @@ -77,69 +60,51 @@ void CTimeSeriesDecompositionStub::forecast(core_t::TTime /*startTime*/, core_t::TTime /*step*/, double /*confidence*/, double /*minimumScale*/, - const TWriteForecastResult &/*writer*/) -{ + const TWriteForecastResult& /*writer*/) { } -double CTimeSeriesDecompositionStub::detrend(core_t::TTime /*time*/, - double value, - double /*confidence*/, - int /*components*/) const -{ +double CTimeSeriesDecompositionStub::detrend(core_t::TTime /*time*/, double value, double /*confidence*/, int /*components*/) const { return value; } -double CTimeSeriesDecompositionStub::meanVariance() const -{ +double CTimeSeriesDecompositionStub::meanVariance() const { return 0.0; } -maths_t::TDoubleDoublePr CTimeSeriesDecompositionStub::scale(core_t::TTime /*time*/, - double /*variance*/, - double /*confidence*/, - bool /*smooth*/) const -{ +maths_t::TDoubleDoublePr +CTimeSeriesDecompositionStub::scale(core_t::TTime /*time*/, double /*variance*/, double /*confidence*/, bool /*smooth*/) const { return {1.0, 1.0}; } -void CTimeSeriesDecompositionStub::skipTime(core_t::TTime /*skipInterval*/) -{ +void CTimeSeriesDecompositionStub::skipTime(core_t::TTime /*skipInterval*/) { } -uint64_t CTimeSeriesDecompositionStub::checksum(uint64_t seed) const -{ +uint64_t CTimeSeriesDecompositionStub::checksum(uint64_t seed) const { return seed; } -void CTimeSeriesDecompositionStub::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CTimeSeriesDecompositionStub::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CTimeSeriesDecompositionStub"); } -std::size_t CTimeSeriesDecompositionStub::memoryUsage() const -{ +std::size_t CTimeSeriesDecompositionStub::memoryUsage() const { return 0; } -std::size_t CTimeSeriesDecompositionStub::staticSize() const -{ +std::size_t CTimeSeriesDecompositionStub::staticSize() const { return sizeof(*this); } -core_t::TTime CTimeSeriesDecompositionStub::timeShift() const -{ +core_t::TTime CTimeSeriesDecompositionStub::timeShift() const { return 0; } -const maths_t::TSeasonalComponentVec &CTimeSeriesDecompositionStub::seasonalComponents() const -{ +const maths_t::TSeasonalComponentVec& CTimeSeriesDecompositionStub::seasonalComponents() const { return NO_COMPONENTS; } -core_t::TTime CTimeSeriesDecompositionStub::lastValueTime() const -{ +core_t::TTime CTimeSeriesDecompositionStub::lastValueTime() const { return 0; } - } } diff --git a/lib/maths/CTimeSeriesModel.cc b/lib/maths/CTimeSeriesModel.cc index d988d202e7..4e07ad1fe4 100644 --- a/lib/maths/CTimeSeriesModel.cc +++ b/lib/maths/CTimeSeriesModel.cc @@ -14,17 +14,17 @@ #include #include #include -#include #include -#include +#include #include #include #include #include +#include #include #include -#include #include +#include #include #include @@ -34,12 +34,9 @@ #include #include -namespace ml -{ -namespace maths -{ -namespace -{ +namespace ml { +namespace maths { +namespace { using TDoubleDoublePr = std::pair; using TSizeDoublePr = std::pair; @@ -67,12 +64,7 @@ using TChangeDetectorPtr = boost::shared_ptr::epsilon())}; //! Derate the minimum Winsorisation weight. -double deratedMinimumWinsorisationWeight(double derate) -{ +double deratedMinimumWinsorisationWeight(double derate) { derate = CTools::truncate(derate, 0.0, 1.0); return MINIMUM_TAIL_WINSORISATION_WEIGHT + (0.5 - MINIMUM_TAIL_WINSORISATION_WEIGHT) * derate; } //! Get the one tail p-value from a specified Winsorisation weight. -double pValueFromTailWinsorisationWeight(double weight) -{ - if (weight >= 1.0) - { +double pValueFromTailWinsorisationWeight(double weight) { + if (weight >= 1.0) { return 1.0; } double logw{std::log(std::max(weight, MINIMUM_TAIL_WINSORISATION_WEIGHT))}; - return std::exp(0.5 * ( LOG_WINSORISED_FRACTION - - std::sqrt( CTools::pow2(LOG_WINSORISED_FRACTION) - + 4.0 * logw / LOG_MINIMUM_TAIL_WINSORISATION_WEIGHT - * LOG_MINIMUM_WEIGHT_FRACTION - * (LOG_MINIMUM_WEIGHT_FRACTION - LOG_WINSORISED_FRACTION)))); + return std::exp(0.5 * + (LOG_WINSORISED_FRACTION - std::sqrt(CTools::pow2(LOG_WINSORISED_FRACTION) + + 4.0 * logw / LOG_MINIMUM_TAIL_WINSORISATION_WEIGHT * LOG_MINIMUM_WEIGHT_FRACTION * + (LOG_MINIMUM_WEIGHT_FRACTION - LOG_WINSORISED_FRACTION)))); } //! Optionally randomly sample from \p indices. -TOptionalSize randomlySample(CPRNG::CXorOShiro128Plus &rng, - const CModelAddSamplesParams ¶ms, - core_t::TTime bucketLength, - const TSizeVec &indices) -{ +TOptionalSize +randomlySample(CPRNG::CXorOShiro128Plus& rng, const CModelAddSamplesParams& params, core_t::TTime bucketLength, const TSizeVec& indices) { using TDouble2Vec4Vec = core::CSmallVector; double weight{1.0}; { - auto i = std::find(params.weightStyles().begin(), - params.weightStyles().end(), - maths_t::E_SampleWinsorisationWeight); - if (i != params.weightStyles().end()) - { + auto i = std::find(params.weightStyles().begin(), params.weightStyles().end(), maths_t::E_SampleWinsorisationWeight); + if (i != params.weightStyles().end()) { std::ptrdiff_t index{i - params.weightStyles().begin()}; - auto addWeight = [index](TMeanAccumulator mean, const TDouble2Vec4Vec &weight_) - { - mean.add(weight_[index]); - return mean; - }; - TMeanAccumulator mean{std::accumulate(params.trendWeights().begin(), - params.trendWeights().end(), - TMeanAccumulator{}, addWeight)}; + auto addWeight = [index](TMeanAccumulator mean, const TDouble2Vec4Vec& weight_) { + mean.add(weight_[index]); + return mean; + }; + TMeanAccumulator mean{ + std::accumulate(params.trendWeights().begin(), params.trendWeights().end(), TMeanAccumulator{}, addWeight)}; weight = CBasicStatistics::mean(mean); } } - double p{SLIDING_WINDOW_SIZE * static_cast(bucketLength) - / static_cast(core::constants::DAY) - * weight}; - if (p >= 1.0 || CSampling::uniformSample(rng, 0.0, 1.0) < p) - { + double p{SLIDING_WINDOW_SIZE * static_cast(bucketLength) / static_cast(core::constants::DAY) * weight}; + if (p >= 1.0 || CSampling::uniformSample(rng, 0.0, 1.0) < p) { std::size_t i{CSampling::uniformSample(rng, 0, indices.size())}; return indices[i]; } @@ -153,33 +130,25 @@ TOptionalSize randomlySample(CPRNG::CXorOShiro128Plus &rng, //! Computes a Winsorisation weight based on the chance that the //! time series is currently undergoing a change. -double changeWinsorisationWeight(const TChangeDetectorPtr &detector) -{ - if (detector != nullptr) - { +double changeWinsorisationWeight(const TChangeDetectorPtr& detector) { + if (detector != nullptr) { std::size_t dummy; - return std::max(CTools::logisticFunction( - detector->decisionFunction(dummy), 0.1, 1.0, -1.0), - MINIMUM_CHANGE_WINSORISATION_WEIGHT); + return std::max(CTools::logisticFunction(detector->decisionFunction(dummy), 0.1, 1.0, -1.0), MINIMUM_CHANGE_WINSORISATION_WEIGHT); } return 1.0; } //! Convert \p value to comma separated string. -std::string toDelimited(const TTimeDoublePr &value) -{ - return core::CStringUtils::typeToString(value.first) + ',' - + core::CStringUtils::typeToStringPrecise( - value.second, core::CIEEE754::E_SinglePrecision); +std::string toDelimited(const TTimeDoublePr& value) { + return core::CStringUtils::typeToString(value.first) + ',' + + core::CStringUtils::typeToStringPrecise(value.second, core::CIEEE754::E_SinglePrecision); } //! Extract \p value from comma separated string. -bool fromDelimited(const std::string &str, TTimeDoublePr &value) -{ +bool fromDelimited(const std::string& str, TTimeDoublePr& value) { std::size_t pos{str.find(',')}; - return pos != std::string::npos - && core::CStringUtils::stringToType(str.substr(0, pos), value.first) - && core::CStringUtils::stringToType(str.substr(pos + 1), value.second); + return pos != std::string::npos && core::CStringUtils::stringToType(str.substr(0, pos), value.first) && + core::CStringUtils::stringToType(str.substr(pos + 1), value.second); } // Models @@ -226,43 +195,31 @@ const std::string SECOND_CORRELATE_ID_TAG{"b"}; const std::string CORRELATION_MODEL_TAG{"c"}; const std::string CORRELATION_TAG{"d"}; -namespace forecast -{ +namespace forecast { const std::string INFO_INSUFFICIENT_HISTORY("Insufficient history to forecast"); const std::string ERROR_MULTIVARIATE("Forecast not supported for multivariate features"); } - } -double tailWinsorisationWeight(const CPrior &prior, - double derate, - double scale, - double value) -{ +double tailWinsorisationWeight(const CPrior& prior, double derate, double scale, double value) { double deratedMinimumWeight{deratedMinimumWinsorisationWeight(derate)}; double lowerBound; double upperBound; - if (!prior.minusLogJointCdf(CConstantWeights::SEASONAL_VARIANCE, - {value}, {{scale}}, lowerBound, upperBound)) - { + if (!prior.minusLogJointCdf(CConstantWeights::SEASONAL_VARIANCE, {value}, {{scale}}, lowerBound, upperBound)) { return 1.0; } - if ( upperBound < MINUS_LOG_TOLERANCE - && !prior.minusLogJointCdfComplement(CConstantWeights::SEASONAL_VARIANCE, - {value}, {{scale}}, lowerBound, upperBound)) - { + if (upperBound < MINUS_LOG_TOLERANCE && + !prior.minusLogJointCdfComplement(CConstantWeights::SEASONAL_VARIANCE, {value}, {{scale}}, lowerBound, upperBound)) { return 1.0; } double f{std::exp(-(lowerBound + upperBound) / 2.0)}; f = std::min(f, 1.0 - f); - if (f >= WINSORISED_FRACTION) - { + if (f >= WINSORISED_FRACTION) { return 1.0; } - if (f <= MINIMUM_WINSORISATION_WEIGHT_FRACTION) - { + if (f <= MINIMUM_WINSORISATION_WEIGHT_FRACTION) { return deratedMinimumWeight; } @@ -272,14 +229,12 @@ double tailWinsorisationWeight(const CPrior &prior, // fraction and c is determined by solving: // MW = (MWF / WF)^(-c log(MWF)) - double deratedExponent{ -std::log(deratedMinimumWeight) - / LOG_MINIMUM_WEIGHT_FRACTION - / (LOG_MINIMUM_WEIGHT_FRACTION - LOG_WINSORISED_FRACTION)}; + double deratedExponent{-std::log(deratedMinimumWeight) / LOG_MINIMUM_WEIGHT_FRACTION / + (LOG_MINIMUM_WEIGHT_FRACTION - LOG_WINSORISED_FRACTION)}; double logf{std::log(f)}; double result{std::exp(-deratedExponent * logf * (logf - LOG_WINSORISED_FRACTION))}; - if (CMathsFuncs::isNan(result)) - { + if (CMathsFuncs::isNan(result)) { return 1.0; } @@ -288,332 +243,276 @@ double tailWinsorisationWeight(const CPrior &prior, return result; } -double tailWinsorisationWeight(const CMultivariatePrior &prior, +double tailWinsorisationWeight(const CMultivariatePrior& prior, std::size_t dimension, double derate, double scale, - const core::CSmallVector &value) -{ + const core::CSmallVector& value) { std::size_t dimensions = prior.dimension(); TSizeDoublePr10Vec condition(dimensions - 1); - for (std::size_t i = 0u, j = 0u; i < dimensions; ++i) - { - if (i != dimension) - { + for (std::size_t i = 0u, j = 0u; i < dimensions; ++i) { + if (i != dimension) { condition[j++] = std::make_pair(i, value[i]); } } - boost::shared_ptr conditional( - prior.univariate(NOTHING_TO_MARGINALIZE, condition).first); + boost::shared_ptr conditional(prior.univariate(NOTHING_TO_MARGINALIZE, condition).first); return tailWinsorisationWeight(*conditional, derate, scale, value[dimension]); } //! \brief A model of anomalous sections of a time series. -class CTimeSeriesAnomalyModel -{ - public: - CTimeSeriesAnomalyModel(); - CTimeSeriesAnomalyModel(core_t::TTime bucketLength, double decayRate); +class CTimeSeriesAnomalyModel { +public: + CTimeSeriesAnomalyModel(); + CTimeSeriesAnomalyModel(core_t::TTime bucketLength, double decayRate); - //! Update the anomaly with prediction error and probability. - //! - //! This extends the current anomaly if \p probability is small. - //! Otherwise it closes it. - void updateAnomaly(const CModelProbabilityParams ¶ms, - core_t::TTime time, TDouble2Vec errors, double probability); + //! Update the anomaly with prediction error and probability. + //! + //! This extends the current anomaly if \p probability is small. + //! Otherwise it closes it. + void updateAnomaly(const CModelProbabilityParams& params, core_t::TTime time, TDouble2Vec errors, double probability); - //! If the time series is currently anomalous, update the model - //! with the anomaly feature vector. - void sampleAnomaly(const CModelProbabilityParams ¶ms, core_t::TTime time); + //! If the time series is currently anomalous, update the model + //! with the anomaly feature vector. + void sampleAnomaly(const CModelProbabilityParams& params, core_t::TTime time); - //! Reset the mean error norm. - void reset(); + //! Reset the mean error norm. + void reset(); - //! If the time series is currently anomalous, compute the anomalousness - //! of the anomaly feature vector. - void probability(const CModelProbabilityParams ¶ms, core_t::TTime time, double &probability) const; + //! If the time series is currently anomalous, compute the anomalousness + //! of the anomaly feature vector. + void probability(const CModelProbabilityParams& params, core_t::TTime time, double& probability) const; - //! Age the model to account for \p time elapsed time. - void propagateForwardsByTime(double time); + //! Age the model to account for \p time elapsed time. + void propagateForwardsByTime(double time); - //! Compute a checksum for this object. - uint64_t checksum(uint64_t seed) const; + //! Compute a checksum for this object. + uint64_t checksum(uint64_t seed) const; - //! Debug the memory used by this object. - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + //! Debug the memory used by this object. + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - //! Get the memory used by this object. - std::size_t memoryUsage() const; + //! Get the memory used by this object. + std::size_t memoryUsage() const; - //! Initialize reading state from \p traverser. - bool acceptRestoreTraverser(const SModelRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser); + //! Initialize reading state from \p traverser. + bool acceptRestoreTraverser(const SModelRestoreParams& params, core::CStateRestoreTraverser& traverser); - //! Persist by passing information to \p inserter. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const; + //! Persist by passing information to \p inserter. + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; - private: - using TDouble10Vec = core::CSmallVector; - using TMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; - using TMaxAccumulator = CBasicStatistics::SMax::TAccumulator; - using TMultivariateNormalConjugate = CMultivariateNormalConjugate<2>; - using TMultivariateNormalConjugateVec = std::vector; - - //! \brief Extracts features related to anomalous time periods. - class CAnomaly - { - public: - //! See core::CMemory. - static bool dynamicSizeAlwaysZero() { return true; } - - public: - CAnomaly() : m_Tag(0), m_OpenTime(0), m_Sign(0.0) {} - CAnomaly(std::size_t tag, core_t::TTime time) : - m_Tag(tag), m_OpenTime(time), m_Sign(0.0) - {} - - //! Get the anomaly tag. - std::size_t tag() const { return m_Tag; } - - //! Add a result to the anomaly. - void update(const TDouble2Vec &errors) - { - double norm{0.0}; - for (const auto &error : errors) - { - norm += std::pow(error, 2.0); - m_Sign += error; - } - m_MeanErrorNorm.add(std::sqrt(norm)); - } - - //! Get the weight to apply to this anomaly on update. - double weight(core_t::TTime time) const - { - return 1.0 / (1.0 + std::max(static_cast(time - m_OpenTime), 0.0)); - } +private: + using TDouble10Vec = core::CSmallVector; + using TMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; + using TMaxAccumulator = CBasicStatistics::SMax::TAccumulator; + using TMultivariateNormalConjugate = CMultivariateNormalConjugate<2>; + using TMultivariateNormalConjugateVec = std::vector; - //! Check if this anomaly is positive or negative. - bool positive() const { return m_Sign > 0.0; } - - //! Get the feature vector for this anomaly. - TDouble10Vec features(core_t::TTime time) const - { - return {static_cast(time - m_OpenTime), - CBasicStatistics::mean(m_MeanErrorNorm)}; - } - - //! Compute a checksum for this object. - uint64_t checksum(uint64_t seed) const - { - seed = CChecksum::calculate(seed, m_Tag); - seed = CChecksum::calculate(seed, m_OpenTime); - seed = CChecksum::calculate(seed, m_Sign); - return CChecksum::calculate(seed, m_MeanErrorNorm); - } - - //! Initialize reading state from \p traverser. - bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) - { - do - { - const std::string &name{traverser.name()}; - RESTORE_BUILT_IN(TAG_TAG, m_Tag) - RESTORE_BUILT_IN(OPEN_TIME_TAG, m_OpenTime) - RESTORE_BUILT_IN(SIGN_TAG, m_Sign) - RESTORE(MEAN_ERROR_NORM_TAG, m_MeanErrorNorm.fromDelimited(traverser.value())) - } - while (traverser.next()); - return true; - } - - //! Persist by passing information to \p inserter. - void acceptPersistInserter(core::CStatePersistInserter &inserter) const - { - inserter.insertValue(TAG_TAG, m_Tag); - inserter.insertValue(OPEN_TIME_TAG, m_OpenTime); - inserter.insertValue(SIGN_TAG, m_Sign, core::CIEEE754::E_SinglePrecision); - inserter.insertValue(MEAN_ERROR_NORM_TAG, m_MeanErrorNorm.toDelimited()); - } + //! \brief Extracts features related to anomalous time periods. + class CAnomaly { + public: + //! See core::CMemory. + static bool dynamicSizeAlwaysZero() { return true; } - private: - //! The anomaly tag. - std::size_t m_Tag; + public: + CAnomaly() : m_Tag(0), m_OpenTime(0), m_Sign(0.0) {} + CAnomaly(std::size_t tag, core_t::TTime time) : m_Tag(tag), m_OpenTime(time), m_Sign(0.0) {} + + //! Get the anomaly tag. + std::size_t tag() const { return m_Tag; } + + //! Add a result to the anomaly. + void update(const TDouble2Vec& errors) { + double norm{0.0}; + for (const auto& error : errors) { + norm += std::pow(error, 2.0); + m_Sign += error; + } + m_MeanErrorNorm.add(std::sqrt(norm)); + } - //! The time at which the anomaly started. - core_t::TTime m_OpenTime; + //! Get the weight to apply to this anomaly on update. + double weight(core_t::TTime time) const { return 1.0 / (1.0 + std::max(static_cast(time - m_OpenTime), 0.0)); } - //! The anomaly sign, i.e. is the mean error positive or negative. - double m_Sign; + //! Check if this anomaly is positive or negative. + bool positive() const { return m_Sign > 0.0; } - //! The mean deviation from predictions. - TMeanAccumulator m_MeanErrorNorm; - }; - using TAnomaly1Vec = core::CSmallVector; + //! Get the feature vector for this anomaly. + TDouble10Vec features(core_t::TTime time) const { + return {static_cast(time - m_OpenTime), CBasicStatistics::mean(m_MeanErrorNorm)}; + } - private: - //! The largest anomalous probability. - static const double LARGEST_ANOMALOUS_PROBABILITY; - //! The log of the largest anomalous probability. - static const double LOG_LARGEST_ANOMALOUS_PROBABILITY; - //! The log of the largest probability that it is deemed - //! significantly anomalous. - static const double LOG_SMALL_PROBABILITY; - //! A unit weight. - static const TDouble10Vec4Vec1Vec UNIT; + //! Compute a checksum for this object. + uint64_t checksum(uint64_t seed) const { + seed = CChecksum::calculate(seed, m_Tag); + seed = CChecksum::calculate(seed, m_OpenTime); + seed = CChecksum::calculate(seed, m_Sign); + return CChecksum::calculate(seed, m_MeanErrorNorm); + } - private: - //! Update the appropriate anomaly model with \p anomaly. - void sample(core_t::TTime time, const CAnomaly &anomaly, double weight) - { - std::size_t index(anomaly.positive() ? 0 : 1); - TDouble10Vec1Vec features{anomaly.features(this->scale(time))}; - m_AnomalyFeatureModels[index].addSamples(CConstantWeights::COUNT, features, - {{TDouble10Vec(2, weight)}}); + //! Initialize reading state from \p traverser. + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name{traverser.name()}; + RESTORE_BUILT_IN(TAG_TAG, m_Tag) + RESTORE_BUILT_IN(OPEN_TIME_TAG, m_OpenTime) + RESTORE_BUILT_IN(SIGN_TAG, m_Sign) + RESTORE(MEAN_ERROR_NORM_TAG, m_MeanErrorNorm.fromDelimited(traverser.value())) + } while (traverser.next()); + return true; } - //! Get the scaled time. - core_t::TTime scale(core_t::TTime time) const { return time / m_BucketLength; } + //! Persist by passing information to \p inserter. + void acceptPersistInserter(core::CStatePersistInserter& inserter) const { + inserter.insertValue(TAG_TAG, m_Tag); + inserter.insertValue(OPEN_TIME_TAG, m_OpenTime); + inserter.insertValue(SIGN_TAG, m_Sign, core::CIEEE754::E_SinglePrecision); + inserter.insertValue(MEAN_ERROR_NORM_TAG, m_MeanErrorNorm.toDelimited()); + } private: - //! The data bucketing interval. - core_t::TTime m_BucketLength; - - //! The mean prediction error. - TMeanAccumulator m_MeanError; - - //! The current anomalies (if there are any). - TAnomaly1Vec m_Anomalies; - - //! The model describing features of anomalous time periods. - TMultivariateNormalConjugateVec m_AnomalyFeatureModels; + //! The anomaly tag. + std::size_t m_Tag; + + //! The time at which the anomaly started. + core_t::TTime m_OpenTime; + + //! The anomaly sign, i.e. is the mean error positive or negative. + double m_Sign; + + //! The mean deviation from predictions. + TMeanAccumulator m_MeanErrorNorm; + }; + using TAnomaly1Vec = core::CSmallVector; + +private: + //! The largest anomalous probability. + static const double LARGEST_ANOMALOUS_PROBABILITY; + //! The log of the largest anomalous probability. + static const double LOG_LARGEST_ANOMALOUS_PROBABILITY; + //! The log of the largest probability that it is deemed + //! significantly anomalous. + static const double LOG_SMALL_PROBABILITY; + //! A unit weight. + static const TDouble10Vec4Vec1Vec UNIT; + +private: + //! Update the appropriate anomaly model with \p anomaly. + void sample(core_t::TTime time, const CAnomaly& anomaly, double weight) { + std::size_t index(anomaly.positive() ? 0 : 1); + TDouble10Vec1Vec features{anomaly.features(this->scale(time))}; + m_AnomalyFeatureModels[index].addSamples(CConstantWeights::COUNT, features, {{TDouble10Vec(2, weight)}}); + } + + //! Get the scaled time. + core_t::TTime scale(core_t::TTime time) const { return time / m_BucketLength; } + +private: + //! The data bucketing interval. + core_t::TTime m_BucketLength; + + //! The mean prediction error. + TMeanAccumulator m_MeanError; + + //! The current anomalies (if there are any). + TAnomaly1Vec m_Anomalies; + + //! The model describing features of anomalous time periods. + TMultivariateNormalConjugateVec m_AnomalyFeatureModels; }; -CTimeSeriesAnomalyModel::CTimeSeriesAnomalyModel() : m_BucketLength(0) -{ +CTimeSeriesAnomalyModel::CTimeSeriesAnomalyModel() : m_BucketLength(0) { m_AnomalyFeatureModels.reserve(2); - m_AnomalyFeatureModels.push_back(TMultivariateNormalConjugate::nonInformativePrior( - maths_t::E_ContinuousData)); - m_AnomalyFeatureModels.push_back(TMultivariateNormalConjugate::nonInformativePrior( - maths_t::E_ContinuousData)); + m_AnomalyFeatureModels.push_back(TMultivariateNormalConjugate::nonInformativePrior(maths_t::E_ContinuousData)); + m_AnomalyFeatureModels.push_back(TMultivariateNormalConjugate::nonInformativePrior(maths_t::E_ContinuousData)); } -CTimeSeriesAnomalyModel::CTimeSeriesAnomalyModel(core_t::TTime bucketLength, double decayRate) : - m_BucketLength(bucketLength) -{ +CTimeSeriesAnomalyModel::CTimeSeriesAnomalyModel(core_t::TTime bucketLength, double decayRate) : m_BucketLength(bucketLength) { m_AnomalyFeatureModels.reserve(2); - m_AnomalyFeatureModels.push_back(TMultivariateNormalConjugate::nonInformativePrior( - maths_t::E_ContinuousData, - 0.5 * LARGEST_ANOMALOUS_PROBABILITY * decayRate)); - m_AnomalyFeatureModels.push_back(TMultivariateNormalConjugate::nonInformativePrior( - maths_t::E_ContinuousData, - 0.5 * LARGEST_ANOMALOUS_PROBABILITY * decayRate)); + m_AnomalyFeatureModels.push_back( + TMultivariateNormalConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.5 * LARGEST_ANOMALOUS_PROBABILITY * decayRate)); + m_AnomalyFeatureModels.push_back( + TMultivariateNormalConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.5 * LARGEST_ANOMALOUS_PROBABILITY * decayRate)); } -void CTimeSeriesAnomalyModel::updateAnomaly(const CModelProbabilityParams ¶ms, +void CTimeSeriesAnomalyModel::updateAnomaly(const CModelProbabilityParams& params, core_t::TTime time, TDouble2Vec errors, - double probability) -{ - if (params.updateAnomalyModel()) - { + double probability) { + if (params.updateAnomalyModel()) { std::size_t tag{params.tag()}; - auto anomaly = std::find_if(m_Anomalies.begin(), m_Anomalies.end(), - [tag](const CAnomaly &anomaly_) { return anomaly_.tag() == tag; }); + auto anomaly = + std::find_if(m_Anomalies.begin(), m_Anomalies.end(), [tag](const CAnomaly& anomaly_) { return anomaly_.tag() == tag; }); - if (probability < LARGEST_ANOMALOUS_PROBABILITY) - { - m_MeanError.add(std::sqrt(std::accumulate(errors.begin(), errors.end(), 0.0, - [](double n, double x) { return n + x*x; }))); + if (probability < LARGEST_ANOMALOUS_PROBABILITY) { + m_MeanError.add(std::sqrt(std::accumulate(errors.begin(), errors.end(), 0.0, [](double n, double x) { return n + x * x; }))); double scale{CBasicStatistics::mean(m_MeanError)}; - for (auto &error : errors) - { + for (auto& error : errors) { error = scale == 0.0 ? 1.0 : error / scale; } - if (anomaly == m_Anomalies.end()) - { + if (anomaly == m_Anomalies.end()) { m_Anomalies.emplace_back(tag, this->scale(time)); anomaly = m_Anomalies.end() - 1; } anomaly->update(errors); - } - else if (anomaly != m_Anomalies.end()) - { + } else if (anomaly != m_Anomalies.end()) { this->sample(time, *anomaly, 1.0 - anomaly->weight(this->scale(time))); m_Anomalies.erase(anomaly); } } } -void CTimeSeriesAnomalyModel::sampleAnomaly(const CModelProbabilityParams ¶ms, core_t::TTime time) -{ - if (params.updateAnomalyModel()) - { +void CTimeSeriesAnomalyModel::sampleAnomaly(const CModelProbabilityParams& params, core_t::TTime time) { + if (params.updateAnomalyModel()) { std::size_t tag{params.tag()}; - auto anomaly = std::find_if(m_Anomalies.begin(), m_Anomalies.end(), - [tag](const CAnomaly &anomaly_) { return anomaly_.tag() == tag; }); - if (anomaly != m_Anomalies.end()) - { + auto anomaly = + std::find_if(m_Anomalies.begin(), m_Anomalies.end(), [tag](const CAnomaly& anomaly_) { return anomaly_.tag() == tag; }); + if (anomaly != m_Anomalies.end()) { this->sample(time, *anomaly, anomaly->weight(this->scale(time))); } } } -void CTimeSeriesAnomalyModel::reset() -{ +void CTimeSeriesAnomalyModel::reset() { m_MeanError = TMeanAccumulator(); - for (auto &model : m_AnomalyFeatureModels) - { - model = TMultivariateNormalConjugate::nonInformativePrior(maths_t::E_ContinuousData, - model.decayRate()); + for (auto& model : m_AnomalyFeatureModels) { + model = TMultivariateNormalConjugate::nonInformativePrior(maths_t::E_ContinuousData, model.decayRate()); } } -void CTimeSeriesAnomalyModel::probability(const CModelProbabilityParams ¶ms, - core_t::TTime time, double &probability) const -{ +void CTimeSeriesAnomalyModel::probability(const CModelProbabilityParams& params, core_t::TTime time, double& probability) const { std::size_t tag{params.tag()}; - auto anomaly = std::find_if(m_Anomalies.begin(), m_Anomalies.end(), - [tag](const CAnomaly &anomaly_) { return anomaly_.tag() == tag; }); - if (anomaly != m_Anomalies.end()) - { + auto anomaly = std::find_if(m_Anomalies.begin(), m_Anomalies.end(), [tag](const CAnomaly& anomaly_) { return anomaly_.tag() == tag; }); + if (anomaly != m_Anomalies.end()) { std::size_t index(anomaly->positive() ? 0 : 1); TDouble10Vec1Vec features{anomaly->features(this->scale(time))}; double pl, pu; TTail10Vec tail; - if ( probability < LARGEST_ANOMALOUS_PROBABILITY - && !m_AnomalyFeatureModels[index].isNonInformative() - && m_AnomalyFeatureModels[index].probabilityOfLessLikelySamples(maths_t::E_OneSidedAbove, - CConstantWeights::COUNT, features, UNIT, - pl, pu, tail)) - { + if (probability < LARGEST_ANOMALOUS_PROBABILITY && !m_AnomalyFeatureModels[index].isNonInformative() && + m_AnomalyFeatureModels[index].probabilityOfLessLikelySamples( + maths_t::E_OneSidedAbove, CConstantWeights::COUNT, features, UNIT, pl, pu, tail)) { double logp{CTools::fastLog(probability)}; - double alpha{0.5 * std::min( (logp - LOG_LARGEST_ANOMALOUS_PROBABILITY) - / (LOG_SMALL_PROBABILITY - LOG_LARGEST_ANOMALOUS_PROBABILITY), 1.0)}; + double alpha{ + 0.5 * + std::min((logp - LOG_LARGEST_ANOMALOUS_PROBABILITY) / (LOG_SMALL_PROBABILITY - LOG_LARGEST_ANOMALOUS_PROBABILITY), 1.0)}; double pGivenAnomalous{(pl + pu) / 2.0}; double pScore{CTools::anomalyScore(probability)}; double pScoreGivenAnomalous{CTools::anomalyScore(pGivenAnomalous)}; - LOG_TRACE("features = " << features - << " score(.) = " << pScore - << " score(.|anomalous) = " << pScoreGivenAnomalous - << " p = " << probability); - probability = std::min(CTools::inverseAnomalyScore( (1.0 - alpha) * pScore - + alpha * pScoreGivenAnomalous), - LARGEST_ANOMALOUS_PROBABILITY); + LOG_TRACE("features = " << features << " score(.) = " << pScore << " score(.|anomalous) = " << pScoreGivenAnomalous + << " p = " << probability); + probability = + std::min(CTools::inverseAnomalyScore((1.0 - alpha) * pScore + alpha * pScoreGivenAnomalous), LARGEST_ANOMALOUS_PROBABILITY); } } } -void CTimeSeriesAnomalyModel::propagateForwardsByTime(double time) -{ +void CTimeSeriesAnomalyModel::propagateForwardsByTime(double time) { m_AnomalyFeatureModels[0].propagateForwardsByTime(time); m_AnomalyFeatureModels[1].propagateForwardsByTime(time); } -uint64_t CTimeSeriesAnomalyModel::checksum(uint64_t seed) const -{ +uint64_t CTimeSeriesAnomalyModel::checksum(uint64_t seed) const { seed = CChecksum::calculate(seed, m_BucketLength); seed = CChecksum::calculate(seed, m_MeanError); seed = CChecksum::calculate(seed, m_Anomalies); @@ -621,48 +520,37 @@ uint64_t CTimeSeriesAnomalyModel::checksum(uint64_t seed) const return CChecksum::calculate(seed, m_AnomalyFeatureModels[1]); } -void CTimeSeriesAnomalyModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CTimeSeriesAnomalyModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CTimeSeriesAnomalyModel"); core::CMemoryDebug::dynamicSize("m_Anomalies", m_Anomalies, mem); core::CMemoryDebug::dynamicSize("m_AnomalyFeatureModels", m_AnomalyFeatureModels, mem); } -std::size_t CTimeSeriesAnomalyModel::memoryUsage() const -{ - return core::CMemory::dynamicSize(m_Anomalies) - + core::CMemory::dynamicSize(m_AnomalyFeatureModels); +std::size_t CTimeSeriesAnomalyModel::memoryUsage() const { + return core::CMemory::dynamicSize(m_Anomalies) + core::CMemory::dynamicSize(m_AnomalyFeatureModels); } -bool CTimeSeriesAnomalyModel::acceptRestoreTraverser(const SModelRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser) -{ +bool CTimeSeriesAnomalyModel::acceptRestoreTraverser(const SModelRestoreParams& params, core::CStateRestoreTraverser& traverser) { m_BucketLength = boost::unwrap_ref(params.s_Params).bucketLength(); std::size_t index{0}; - do - { - const std::string &name{traverser.name()}; + do { + const std::string& name{traverser.name()}; RESTORE(MEAN_ERROR_TAG, m_MeanError.fromDelimited(traverser.value())); RESTORE(ANOMALIES_TAG, core::CPersistUtils::restore(ANOMALIES_TAG, m_Anomalies, traverser)); RESTORE(ANOMALY_FEATURE_MODEL_TAG, traverser.traverseSubLevel( - boost::bind(&TMultivariateNormalConjugate::acceptRestoreTraverser, - &m_AnomalyFeatureModels[index++], _1))) - } - while (traverser.next()); + boost::bind(&TMultivariateNormalConjugate::acceptRestoreTraverser, &m_AnomalyFeatureModels[index++], _1))) + } while (traverser.next()); return true; } -void CTimeSeriesAnomalyModel::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CTimeSeriesAnomalyModel::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(MEAN_ERROR_TAG, m_MeanError.toDelimited()); core::CPersistUtils::persist(ANOMALIES_TAG, m_Anomalies, inserter); inserter.insertLevel(ANOMALY_FEATURE_MODEL_TAG, - boost::bind(&TMultivariateNormalConjugate::acceptPersistInserter, - &m_AnomalyFeatureModels[0], _1)); + boost::bind(&TMultivariateNormalConjugate::acceptPersistInserter, &m_AnomalyFeatureModels[0], _1)); inserter.insertLevel(ANOMALY_FEATURE_MODEL_TAG, - boost::bind(&TMultivariateNormalConjugate::acceptPersistInserter, - &m_AnomalyFeatureModels[1], _1)); + boost::bind(&TMultivariateNormalConjugate::acceptPersistInserter, &m_AnomalyFeatureModels[1], _1)); } const double CTimeSeriesAnomalyModel::LARGEST_ANOMALOUS_PROBABILITY{0.1}; @@ -670,90 +558,69 @@ const double CTimeSeriesAnomalyModel::LOG_LARGEST_ANOMALOUS_PROBABILITY{CTools:: const double CTimeSeriesAnomalyModel::LOG_SMALL_PROBABILITY{CTools::fastLog(SMALL_PROBABILITY)}; const TDouble10Vec4Vec1Vec CTimeSeriesAnomalyModel::UNIT{CConstantWeights::unit(2)}; - -CUnivariateTimeSeriesModel::CUnivariateTimeSeriesModel(const CModelParams ¶ms, +CUnivariateTimeSeriesModel::CUnivariateTimeSeriesModel(const CModelParams& params, std::size_t id, - const CTimeSeriesDecompositionInterface &trendModel, - const CPrior &residualModel, - const TDecayRateController2Ary *controllers, - bool modelAnomalies) : - CModel(params), - m_Id(id), - m_IsNonNegative(false), - m_IsForecastable(true), - m_TrendModel(trendModel.clone()), - m_ResidualModel(residualModel.clone()), - m_AnomalyModel(modelAnomalies ? - boost::make_shared(params.bucketLength(), - params.decayRate()) : - TAnomalyModelPtr()), - m_CurrentChangeInterval(0), - m_SlidingWindow(SLIDING_WINDOW_SIZE), - m_Correlations(0) -{ - if (controllers) - { + const CTimeSeriesDecompositionInterface& trendModel, + const CPrior& residualModel, + const TDecayRateController2Ary* controllers, + bool modelAnomalies) + : CModel(params), + m_Id(id), + m_IsNonNegative(false), + m_IsForecastable(true), + m_TrendModel(trendModel.clone()), + m_ResidualModel(residualModel.clone()), + m_AnomalyModel(modelAnomalies ? boost::make_shared(params.bucketLength(), params.decayRate()) + : TAnomalyModelPtr()), + m_CurrentChangeInterval(0), + m_SlidingWindow(SLIDING_WINDOW_SIZE), + m_Correlations(0) { + if (controllers) { m_Controllers = boost::make_shared(*controllers); } } -CUnivariateTimeSeriesModel::CUnivariateTimeSeriesModel(const SModelRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser): - CModel(params.s_Params), - m_IsForecastable(false), - m_SlidingWindow(SLIDING_WINDOW_SIZE), - m_Correlations(0) -{ - traverser.traverseSubLevel(boost::bind(&CUnivariateTimeSeriesModel::acceptRestoreTraverser, - this, boost::cref(params), _1)); +CUnivariateTimeSeriesModel::CUnivariateTimeSeriesModel(const SModelRestoreParams& params, core::CStateRestoreTraverser& traverser) + : CModel(params.s_Params), m_IsForecastable(false), m_SlidingWindow(SLIDING_WINDOW_SIZE), m_Correlations(0) { + traverser.traverseSubLevel(boost::bind(&CUnivariateTimeSeriesModel::acceptRestoreTraverser, this, boost::cref(params), _1)); } -CUnivariateTimeSeriesModel::~CUnivariateTimeSeriesModel() -{ - if (m_Correlations != nullptr) - { +CUnivariateTimeSeriesModel::~CUnivariateTimeSeriesModel() { + if (m_Correlations != nullptr) { m_Correlations->removeTimeSeries(m_Id); } } -std::size_t CUnivariateTimeSeriesModel::identifier() const -{ +std::size_t CUnivariateTimeSeriesModel::identifier() const { return m_Id; } -CUnivariateTimeSeriesModel *CUnivariateTimeSeriesModel::clone(std::size_t id) const -{ - CUnivariateTimeSeriesModel *result{new CUnivariateTimeSeriesModel{*this, id}}; - if (m_Correlations != nullptr) - { +CUnivariateTimeSeriesModel* CUnivariateTimeSeriesModel::clone(std::size_t id) const { + CUnivariateTimeSeriesModel* result{new CUnivariateTimeSeriesModel{*this, id}}; + if (m_Correlations != nullptr) { result->modelCorrelations(*m_Correlations); } return result; } -CUnivariateTimeSeriesModel *CUnivariateTimeSeriesModel::cloneForPersistence() const -{ +CUnivariateTimeSeriesModel* CUnivariateTimeSeriesModel::cloneForPersistence() const { return new CUnivariateTimeSeriesModel{*this, m_Id}; } -CUnivariateTimeSeriesModel *CUnivariateTimeSeriesModel::cloneForForecast() const -{ +CUnivariateTimeSeriesModel* CUnivariateTimeSeriesModel::cloneForForecast() const { return new CUnivariateTimeSeriesModel{*this, m_Id, true}; } -bool CUnivariateTimeSeriesModel::isForecastPossible() const -{ +bool CUnivariateTimeSeriesModel::isForecastPossible() const { return m_IsForecastable && !m_ResidualModel->isNonInformative(); } -void CUnivariateTimeSeriesModel::modelCorrelations(CTimeSeriesCorrelations &model) -{ +void CUnivariateTimeSeriesModel::modelCorrelations(CTimeSeriesCorrelations& model) { m_Correlations = &model; m_Correlations->addTimeSeries(m_Id, *this); } -TSize2Vec1Vec CUnivariateTimeSeriesModel::correlates() const -{ +TSize2Vec1Vec CUnivariateTimeSeriesModel::correlates() const { TSize2Vec1Vec result; TSize1Vec correlated; TSize2Vec1Vec variables; @@ -761,30 +628,23 @@ TSize2Vec1Vec CUnivariateTimeSeriesModel::correlates() const TModelCPtr1Vec correlatedTimeSeriesModels; this->correlationModels(correlated, variables, correlationModels, correlatedTimeSeriesModels); result.resize(correlated.size(), TSize2Vec(2)); - for (std::size_t i = 0u; i < correlated.size(); ++i) - { + for (std::size_t i = 0u; i < correlated.size(); ++i) { result[i][variables[i][0]] = m_Id; result[i][variables[i][1]] = correlated[i]; } return result; } -void CUnivariateTimeSeriesModel::addBucketValue(const TTimeDouble2VecSizeTrVec &values) -{ - for (const auto &value : values) - { - m_ResidualModel->adjustOffset(CConstantWeights::COUNT, - {m_TrendModel->detrend(value.first, value.second[0], 0.0)}, - CConstantWeights::SINGLE_UNIT); +void CUnivariateTimeSeriesModel::addBucketValue(const TTimeDouble2VecSizeTrVec& values) { + for (const auto& value : values) { + m_ResidualModel->adjustOffset( + CConstantWeights::COUNT, {m_TrendModel->detrend(value.first, value.second[0], 0.0)}, CConstantWeights::SINGLE_UNIT); } } -CUnivariateTimeSeriesModel::EUpdateResult -CUnivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams ¶ms, - TTimeDouble2VecSizeTrVec samples) -{ - if (samples.empty()) - { +CUnivariateTimeSeriesModel::EUpdateResult CUnivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams& params, + TTimeDouble2VecSizeTrVec samples) { + if (samples.empty()) { return E_Success; } @@ -792,17 +652,12 @@ CUnivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams ¶ms, TSizeVec valueorder(samples.size()); std::iota(valueorder.begin(), valueorder.end(), 0); - std::stable_sort(valueorder.begin(), valueorder.end(), - [&samples](std::size_t lhs, std::size_t rhs) - { - return samples[lhs].second < samples[rhs].second; - }); + std::stable_sort(valueorder.begin(), valueorder.end(), [&samples](std::size_t lhs, std::size_t rhs) { + return samples[lhs].second < samples[rhs].second; + }); TOptionalTimeDoublePr randomSample; - if (TOptionalSize index = randomlySample(m_Rng, params, - this->params().bucketLength(), - valueorder)) - { + if (TOptionalSize index = randomlySample(m_Rng, params, this->params().bucketLength(), valueorder)) { randomSample.reset({samples[*index].first, samples[*index].second[0]}); } @@ -814,19 +669,15 @@ CUnivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams ¶ms, m_ResidualModel->dataType(type); m_TrendModel->dataType(type); - result = CModel::combine(result, this->updateTrend(params.weightStyles(), - samples, params.trendWeights())); + result = CModel::combine(result, this->updateTrend(params.weightStyles(), samples, params.trendWeights())); - for (auto &sample : samples) - { + for (auto& sample : samples) { sample.second[0] = m_TrendModel->detrend(sample.first, sample.second[0], 0.0); } - std::stable_sort(valueorder.begin(), valueorder.end(), - [&samples](std::size_t lhs, std::size_t rhs) - { - return samples[lhs].second < samples[rhs].second; - }); + std::stable_sort(valueorder.begin(), valueorder.end(), [&samples](std::size_t lhs, std::size_t rhs) { + return samples[lhs].second < samples[rhs].second; + }); TDouble1Vec samples_; TDouble4Vec1Vec weights_; @@ -834,8 +685,7 @@ CUnivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams ¶ms, weights_.reserve(samples.size()); TMeanAccumulator averageTime; - for (auto i : valueorder) - { + for (auto i : valueorder) { samples_.push_back(samples[i].second[0]); weights_.push_back(unpack(params.priorWeights()[i])); averageTime.add(static_cast(samples[i].first)); @@ -843,99 +693,82 @@ CUnivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams ¶ms, m_ResidualModel->addSamples(params.weightStyles(), samples_, weights_); m_ResidualModel->propagateForwardsByTime(params.propagationInterval()); - if (m_AnomalyModel != nullptr) - { + if (m_AnomalyModel != nullptr) { m_AnomalyModel->propagateForwardsByTime(params.propagationInterval()); } double multiplier{1.0}; - if (m_Controllers != nullptr) - { + if (m_Controllers != nullptr) { TDouble1VecVec errors[2]; errors[0].reserve(samples.size()); errors[1].reserve(samples.size()); - for (auto sample : samples_) - { + for (auto sample : samples_) { this->appendPredictionErrors(params.propagationInterval(), sample, errors); } { - CDecayRateController &controller{(*m_Controllers)[E_TrendControl]}; + CDecayRateController& controller{(*m_Controllers)[E_TrendControl]}; core_t::TTime time{static_cast(CBasicStatistics::mean(averageTime))}; TDouble1Vec trendMean{m_TrendModel->meanValue(time)}; - multiplier = controller.multiplier(trendMean, errors[E_TrendControl], - this->params().bucketLength(), - this->params().learnRate(), - this->params().decayRate()); - if (multiplier != 1.0) - { + multiplier = controller.multiplier( + trendMean, errors[E_TrendControl], this->params().bucketLength(), this->params().learnRate(), this->params().decayRate()); + if (multiplier != 1.0) { m_TrendModel->decayRate(multiplier * m_TrendModel->decayRate()); LOG_TRACE("trend decay rate = " << m_TrendModel->decayRate()); } } { - CDecayRateController &controller{(*m_Controllers)[E_ResidualControl]}; + CDecayRateController& controller{(*m_Controllers)[E_ResidualControl]}; TDouble1Vec residualMean{m_ResidualModel->marginalLikelihoodMean()}; - multiplier = controller.multiplier(residualMean, errors[E_ResidualControl], + multiplier = controller.multiplier(residualMean, + errors[E_ResidualControl], this->params().bucketLength(), this->params().learnRate(), this->params().decayRate()); - if (multiplier != 1.0) - { + if (multiplier != 1.0) { m_ResidualModel->decayRate(multiplier * m_ResidualModel->decayRate()); LOG_TRACE("prior decay rate = " << m_ResidualModel->decayRate()); } } } - if (m_Correlations != nullptr) - { + if (m_Correlations != nullptr) { m_Correlations->addSamples(m_Id, params, samples, multiplier); } - if (randomSample) - { + if (randomSample) { m_SlidingWindow.push_back({randomSample->first, randomSample->second}); } return result; } -void CUnivariateTimeSeriesModel::skipTime(core_t::TTime gap) -{ +void CUnivariateTimeSeriesModel::skipTime(core_t::TTime gap) { m_TrendModel->skipTime(gap); } CUnivariateTimeSeriesModel::TDouble2Vec -CUnivariateTimeSeriesModel::mode(core_t::TTime time, - const maths_t::TWeightStyleVec &weightStyles, - const TDouble2Vec4Vec &weights) const -{ - return { m_ResidualModel->marginalLikelihoodMode(weightStyles, unpack(weights)) - + CBasicStatistics::mean(m_TrendModel->value(time))}; -} - -CUnivariateTimeSeriesModel::TDouble2Vec1Vec -CUnivariateTimeSeriesModel::correlateModes(core_t::TTime time, - const maths_t::TWeightStyleVec &weightStyles, - const TDouble2Vec4Vec1Vec &weights) const -{ +CUnivariateTimeSeriesModel::mode(core_t::TTime time, const maths_t::TWeightStyleVec& weightStyles, const TDouble2Vec4Vec& weights) const { + return {m_ResidualModel->marginalLikelihoodMode(weightStyles, unpack(weights)) + CBasicStatistics::mean(m_TrendModel->value(time))}; +} + +CUnivariateTimeSeriesModel::TDouble2Vec1Vec CUnivariateTimeSeriesModel::correlateModes(core_t::TTime time, + const maths_t::TWeightStyleVec& weightStyles, + const TDouble2Vec4Vec1Vec& weights) const { TDouble2Vec1Vec result; TSize1Vec correlated; TSize2Vec1Vec variables; TMultivariatePriorCPtrSizePr1Vec correlationModels; TModelCPtr1Vec correlatedTimeSeriesModels; - if (this->correlationModels(correlated, variables, correlationModels, correlatedTimeSeriesModels)) - { + if (this->correlationModels(correlated, variables, correlationModels, correlatedTimeSeriesModels)) { result.resize(correlated.size(), TDouble10Vec(2)); double baseline[2]; baseline[0] = CBasicStatistics::mean(m_TrendModel->value(time)); - for (std::size_t i = 0u; i < correlated.size(); ++i) - { + for (std::size_t i = 0u; i < correlated.size(); ++i) { baseline[1] = CBasicStatistics::mean(correlatedTimeSeriesModels[i]->m_TrendModel->value(time)); - TDouble10Vec mode(correlationModels[i].first->marginalLikelihoodMode( - weightStyles, CMultivariateTimeSeriesModel::unpack(weights[i]))); + TDouble10Vec mode( + correlationModels[i].first->marginalLikelihoodMode(weightStyles, CMultivariateTimeSeriesModel::unpack(weights[i]))); result[i][variables[i][0]] = baseline[0] + mode[variables[i][0]]; result[i][variables[i][1]] = baseline[1] + mode[variables[i][1]]; } @@ -944,54 +777,38 @@ CUnivariateTimeSeriesModel::correlateModes(core_t::TTime time, return result; } -CUnivariateTimeSeriesModel::TDouble2Vec1Vec -CUnivariateTimeSeriesModel::residualModes(const maths_t::TWeightStyleVec &weightStyles, - const TDouble2Vec4Vec &weights) const -{ +CUnivariateTimeSeriesModel::TDouble2Vec1Vec CUnivariateTimeSeriesModel::residualModes(const maths_t::TWeightStyleVec& weightStyles, + const TDouble2Vec4Vec& weights) const { TDouble1Vec modes(m_ResidualModel->marginalLikelihoodModes(weightStyles, unpack(weights))); TDouble2Vec1Vec result; result.reserve(modes.size()); - for (auto mode : modes) - { + for (auto mode : modes) { result.push_back({mode}); } return result; } -void CUnivariateTimeSeriesModel::detrend(const TTime2Vec1Vec &time, - double confidenceInterval, - TDouble2Vec1Vec &value) const -{ - if (value.empty()) - { +void CUnivariateTimeSeriesModel::detrend(const TTime2Vec1Vec& time, double confidenceInterval, TDouble2Vec1Vec& value) const { + if (value.empty()) { return; } - if (value[0].size() == 1) - { + if (value[0].size() == 1) { value[0][0] = m_TrendModel->detrend(time[0][0], value[0][0], confidenceInterval); - } - else - { + } else { TSize1Vec correlated; TSize2Vec1Vec variables; TMultivariatePriorCPtrSizePr1Vec correlationModels; TModelCPtr1Vec correlatedTimeSeriesModels; - if (this->correlationModels(correlated, variables, correlationModels, correlatedTimeSeriesModels)) - { - for (std::size_t i = 0u; i < variables.size(); ++i) - { - if (!value[i].empty()) - { - value[i][variables[i][0]] = m_TrendModel->detrend(time[i][variables[i][0]], - value[i][variables[i][0]], - confidenceInterval); - value[i][variables[i][1]] = - correlatedTimeSeriesModels[i]->m_TrendModel->detrend(time[i][variables[i][1]], - value[i][variables[i][1]], - confidenceInterval); + if (this->correlationModels(correlated, variables, correlationModels, correlatedTimeSeriesModels)) { + for (std::size_t i = 0u; i < variables.size(); ++i) { + if (!value[i].empty()) { + value[i][variables[i][0]] = + m_TrendModel->detrend(time[i][variables[i][0]], value[i][variables[i][0]], confidenceInterval); + value[i][variables[i][1]] = correlatedTimeSeriesModels[i]->m_TrendModel->detrend( + time[i][variables[i][1]], value[i][variables[i][1]], confidenceInterval); } } } @@ -999,24 +816,18 @@ void CUnivariateTimeSeriesModel::detrend(const TTime2Vec1Vec &time, } CUnivariateTimeSeriesModel::TDouble2Vec -CUnivariateTimeSeriesModel::predict(core_t::TTime time, - const TSizeDoublePr1Vec &correlatedValue, - TDouble2Vec hint) const -{ +CUnivariateTimeSeriesModel::predict(core_t::TTime time, const TSizeDoublePr1Vec& correlatedValue, TDouble2Vec hint) const { double correlateCorrection{0.0}; - if (!correlatedValue.empty()) - { + if (!correlatedValue.empty()) { TSize1Vec correlated{correlatedValue[0].first}; TSize2Vec1Vec variables; TMultivariatePriorCPtrSizePr1Vec correlationModel; TModelCPtr1Vec correlatedModel; - if (m_Correlations->correlationModels(m_Id, correlated, variables, - correlationModel, correlatedModel)) - { + if (m_Correlations->correlationModels(m_Id, correlated, variables, correlationModel, correlatedModel)) { double sample{correlatedModel[0]->m_TrendModel->detrend(time, correlatedValue[0].second, 0.0)}; TSize10Vec marginalize{variables[0][1]}; TSizeDoublePr10Vec condition{{variables[0][1], sample}}; - const CMultivariatePrior *joint{correlationModel[0].first}; + const CMultivariatePrior* joint{correlationModel[0].first}; TPriorPtr margin{joint->univariate(marginalize, NOTHING_TO_CONDITION).first}; TPriorPtr conditional{joint->univariate(NOTHING_TO_MARGINALIZE, condition).first}; correlateCorrection = conditional->marginalLikelihoodMean() - margin->marginalLikelihoodMean(); @@ -1026,50 +837,40 @@ CUnivariateTimeSeriesModel::predict(core_t::TTime time, double scale{1.0 - this->params().probabilityBucketEmpty()}; double trend{0.0}; - if (m_TrendModel->initialized()) - { + if (m_TrendModel->initialized()) { trend = CBasicStatistics::mean(m_TrendModel->value(time)); } - if (hint.size() == 1) - { + if (hint.size() == 1) { hint[0] = m_TrendModel->detrend(time, hint[0], 0.0); } - double median{m_ResidualModel->isNonInformative() ? - m_ResidualModel->marginalLikelihoodMean() : - (hint.empty() ? CBasicStatistics::mean(m_ResidualModel->marginalLikelihoodConfidenceInterval(0.0)) : - m_ResidualModel->nearestMarginalLikelihoodMean(hint[0]))}; + double median{m_ResidualModel->isNonInformative() + ? m_ResidualModel->marginalLikelihoodMean() + : (hint.empty() ? CBasicStatistics::mean(m_ResidualModel->marginalLikelihoodConfidenceInterval(0.0)) + : m_ResidualModel->nearestMarginalLikelihoodMean(hint[0]))}; double result{scale * (trend + median + correlateCorrection)}; return {m_IsNonNegative ? std::max(result, 0.0) : result}; } -CUnivariateTimeSeriesModel::TDouble2Vec3Vec -CUnivariateTimeSeriesModel::confidenceInterval(core_t::TTime time, - double confidenceInterval, - const maths_t::TWeightStyleVec &weightStyles, - const TDouble2Vec4Vec &weights_) const -{ - if (m_ResidualModel->isNonInformative()) - { +CUnivariateTimeSeriesModel::TDouble2Vec3Vec CUnivariateTimeSeriesModel::confidenceInterval(core_t::TTime time, + double confidenceInterval, + const maths_t::TWeightStyleVec& weightStyles, + const TDouble2Vec4Vec& weights_) const { + if (m_ResidualModel->isNonInformative()) { return TDouble2Vec3Vec(); } double scale{1.0 - this->params().probabilityBucketEmpty()}; - double trend{m_TrendModel->initialized() ? - CBasicStatistics::mean(m_TrendModel->value(time, confidenceInterval)) : 0.0}; + double trend{m_TrendModel->initialized() ? CBasicStatistics::mean(m_TrendModel->value(time, confidenceInterval)) : 0.0}; TDouble4Vec weights(unpack(weights_)); - double median{CBasicStatistics::mean( - m_ResidualModel->marginalLikelihoodConfidenceInterval(0.0, weightStyles, weights))}; - TDoubleDoublePr interval{ - m_ResidualModel->marginalLikelihoodConfidenceInterval(confidenceInterval, weightStyles, weights)}; + double median{CBasicStatistics::mean(m_ResidualModel->marginalLikelihoodConfidenceInterval(0.0, weightStyles, weights))}; + TDoubleDoublePr interval{m_ResidualModel->marginalLikelihoodConfidenceInterval(confidenceInterval, weightStyles, weights)}; - double result[]{scale * (trend + interval.first), - scale * (trend + median), - scale * (trend + interval.second)}; + double result[]{scale * (trend + interval.first), scale * (trend + median), scale * (trend + interval.second)}; return {{m_IsNonNegative ? std::max(result[0], 0.0) : result[0]}, {m_IsNonNegative ? std::max(result[1], 0.0) : result[1]}, @@ -1079,13 +880,11 @@ CUnivariateTimeSeriesModel::confidenceInterval(core_t::TTime time, bool CUnivariateTimeSeriesModel::forecast(core_t::TTime startTime, core_t::TTime endTime, double confidenceInterval, - const TDouble2Vec &minimum_, - const TDouble2Vec &maximum_, - const TForecastPushDatapointFunc &forecastPushDataPointFunc, - std::string &messageOut) -{ - if (m_ResidualModel->isNonInformative()) - { + const TDouble2Vec& minimum_, + const TDouble2Vec& maximum_, + const TForecastPushDatapointFunc& forecastPushDataPointFunc, + std::string& messageOut) { + if (m_ResidualModel->isNonInformative()) { messageOut = forecast::INFO_INSUFFICIENT_HISTORY; return true; } @@ -1096,87 +895,66 @@ bool CUnivariateTimeSeriesModel::forecast(core_t::TTime startTime, double minimum{m_IsNonNegative ? std::max(minimum_[0], 0.0) : minimum_[0]}; double maximum{m_IsNonNegative ? std::max(maximum_[0], 0.0) : maximum_[0]}; - auto writer = [&](core_t::TTime time, const TDouble3Vec &prediction) - { - SErrorBar errorBar{time, bucketLength, - CTools::truncate(prediction[0], - minimum, - maximum + prediction[0] - prediction[1]), - CTools::truncate(prediction[1], minimum, maximum), - CTools::truncate(prediction[2], - minimum + prediction[2] - prediction[1], - maximum)}; - forecastPushDataPointFunc(errorBar); - }; - - m_TrendModel->forecast(startTime, endTime, bucketLength, confidenceInterval, - this->params().minimumSeasonalVarianceScale(), writer); + auto writer = [&](core_t::TTime time, const TDouble3Vec& prediction) { + SErrorBar errorBar{time, + bucketLength, + CTools::truncate(prediction[0], minimum, maximum + prediction[0] - prediction[1]), + CTools::truncate(prediction[1], minimum, maximum), + CTools::truncate(prediction[2], minimum + prediction[2] - prediction[1], maximum)}; + forecastPushDataPointFunc(errorBar); + }; + + m_TrendModel->forecast(startTime, endTime, bucketLength, confidenceInterval, this->params().minimumSeasonalVarianceScale(), writer); return true; } -bool CUnivariateTimeSeriesModel::probability(const CModelProbabilityParams ¶ms, - const TTime2Vec1Vec &time_, - const TDouble2Vec1Vec &value, - double &probability, - TTail2Vec &tail, - bool &conditional, - TSize1Vec &mostAnomalousCorrelate) const -{ +bool CUnivariateTimeSeriesModel::probability(const CModelProbabilityParams& params, + const TTime2Vec1Vec& time_, + const TDouble2Vec1Vec& value, + double& probability, + TTail2Vec& tail, + bool& conditional, + TSize1Vec& mostAnomalousCorrelate) const { probability = 1.0; tail.resize(1, maths_t::E_UndeterminedTail); conditional = false; mostAnomalousCorrelate.clear(); - if (value.empty()) - { + if (value.empty()) { return true; } - if (value[0].size() == 1) - { + if (value[0].size() == 1) { core_t::TTime time{time_[0][0]}; TDouble1Vec sample{m_TrendModel->detrend(time, value[0][0], params.seasonalConfidenceInterval())}; TDouble4Vec1Vec weights{unpack(params.weights()[0])}; double pl, pu; maths_t::ETail tail_; - if (m_ResidualModel->probabilityOfLessLikelySamples(params.calculation(0), - params.weightStyles(), - sample, weights, pl, pu, tail_)) - { - LOG_TRACE("P(" << sample << " | weight = " << weights - << ", time = " << time << ") = " << (pl + pu) / 2.0); - } - else - { - LOG_ERROR("Failed to compute P(" << sample - << " | weight = " << weights << ", time = " << time << ")"); + if (m_ResidualModel->probabilityOfLessLikelySamples(params.calculation(0), params.weightStyles(), sample, weights, pl, pu, tail_)) { + LOG_TRACE("P(" << sample << " | weight = " << weights << ", time = " << time << ") = " << (pl + pu) / 2.0); + } else { + LOG_ERROR("Failed to compute P(" << sample << " | weight = " << weights << ", time = " << time << ")"); return false; } - probability = correctForEmptyBucket(params.calculation(0), value[0], - params.bucketEmpty()[0][0], - this->params().probabilityBucketEmpty(), - (pl + pu) / 2.0); + probability = correctForEmptyBucket( + params.calculation(0), value[0], params.bucketEmpty()[0][0], this->params().probabilityBucketEmpty(), (pl + pu) / 2.0); - if (m_AnomalyModel != nullptr) - { - TDouble2Vec residual{ (sample[0] - m_ResidualModel->nearestMarginalLikelihoodMean(sample[0])) - / std::max(std::sqrt(this->seasonalWeight(0.0, time)[0]), 1.0)}; + if (m_AnomalyModel != nullptr) { + TDouble2Vec residual{(sample[0] - m_ResidualModel->nearestMarginalLikelihoodMean(sample[0])) / + std::max(std::sqrt(this->seasonalWeight(0.0, time)[0]), 1.0)}; m_AnomalyModel->updateAnomaly(params, time, residual, probability); m_AnomalyModel->probability(params, time, probability); m_AnomalyModel->sampleAnomaly(params, time); } tail[0] = tail_; - } - else - { + } else { TSize1Vec correlated; TSize2Vec1Vec variables; TMultivariatePriorCPtrSizePr1Vec correlationModels; TModelCPtr1Vec correlatedTimeSeriesModels; - if (!this->correlationModels(correlated, variables, correlationModels, correlatedTimeSeriesModels)) - { + if (!this->correlationModels(correlated, variables, correlationModels, correlatedTimeSeriesModels)) { return false; } @@ -1195,77 +973,55 @@ bool CUnivariateTimeSeriesModel::probability(const CModelProbabilityParams ¶ double mostAnomalousSample{0.0}; TPriorPtr mostAnomalousCorrelationModel; - for (std::size_t i = 0u; i < variables.size(); ++i) - { - if (!value[i].empty() || (!params.mostAnomalousCorrelate() || i == *params.mostAnomalousCorrelate())) - { + for (std::size_t i = 0u; i < variables.size(); ++i) { + if (!value[i].empty() || (!params.mostAnomalousCorrelate() || i == *params.mostAnomalousCorrelate())) { variable[0] = variables[i][0]; - sample[0][variables[i][0]] = m_TrendModel->detrend(time_[i][variables[i][0]], - value[i][variables[i][0]], - params.seasonalConfidenceInterval()); - sample[0][variables[i][1]] = - correlatedTimeSeriesModels[i]->m_TrendModel->detrend(time_[i][variables[i][1]], - value[i][variables[i][1]], - params.seasonalConfidenceInterval()); + sample[0][variables[i][0]] = + m_TrendModel->detrend(time_[i][variables[i][0]], value[i][variables[i][0]], params.seasonalConfidenceInterval()); + sample[0][variables[i][1]] = correlatedTimeSeriesModels[i]->m_TrendModel->detrend( + time_[i][variables[i][1]], value[i][variables[i][1]], params.seasonalConfidenceInterval()); weights[0] = CMultivariateTimeSeriesModel::unpack(params.weights()[i]); - if (correlationModels[i].first->probabilityOfLessLikelySamples(params.calculation(0), - params.weightStyles(), - sample, weights, - variable, pli, pui, ti)) - { - LOG_TRACE("Marginal P(" << sample << " | weight = " << weights - << ", coordinate = " << variable - << ") = " << (pli[0][0] + pui[0][0]) / 2.0); - LOG_TRACE("Conditional P(" << sample << " | weight = " << weights - << ", coordinate = " << variable - << ") = " << (pli[1][0] + pui[1][0]) / 2.0); - } - else - { - LOG_ERROR("Failed to compute P(" << sample - << " | weight = " << weights - << ", coordinate = " << variable << ")"); + if (correlationModels[i].first->probabilityOfLessLikelySamples( + params.calculation(0), params.weightStyles(), sample, weights, variable, pli, pui, ti)) { + LOG_TRACE("Marginal P(" << sample << " | weight = " << weights << ", coordinate = " << variable + << ") = " << (pli[0][0] + pui[0][0]) / 2.0); + LOG_TRACE("Conditional P(" << sample << " | weight = " << weights << ", coordinate = " << variable + << ") = " << (pli[1][0] + pui[1][0]) / 2.0); + } else { + LOG_ERROR("Failed to compute P(" << sample << " | weight = " << weights << ", coordinate = " << variable << ")"); continue; } probabilityBucketEmpty[variables[i][0]] = this->params().probabilityBucketEmpty(); - probabilityBucketEmpty[variables[i][1]] = - correlatedTimeSeriesModels[i]->params().probabilityBucketEmpty(); + probabilityBucketEmpty[variables[i][1]] = correlatedTimeSeriesModels[i]->params().probabilityBucketEmpty(); double pl{std::sqrt(pli[0][0] * pli[1][0])}; double pu{std::sqrt(pui[0][0] * pui[1][0])}; - double p{correctForEmptyBucket(params.calculation(0), value[0][variable[0]], - params.bucketEmpty()[i], probabilityBucketEmpty, - (pl + pu) / 2.0)}; + double p{correctForEmptyBucket( + params.calculation(0), value[0][variable[0]], params.bucketEmpty()[i], probabilityBucketEmpty, (pl + pu) / 2.0)}; aggregator.add(p, neff); - if (minProbability.add(p)) - { + if (minProbability.add(p)) { tail[0] = ti[0]; mostAnomalousCorrelate.assign(1, i); conditional = ((pli[1][0] + pui[1][0]) < (pli[0][0] + pui[0][0])); mostAnomalousTime = time_[0][variables[i][0]]; mostAnomalousSample = sample[0][variables[i][0]]; - mostAnomalousCorrelationModel = conditional ? - correlationModels[i].first->univariate({variables[i][1]}, - NOTHING_TO_CONDITION).first : - correlationModels[i].first->univariate(NOTHING_TO_MARGINALIZE, - {{variables[i][1], - sample[0][variables[i][1]]}}).first; + mostAnomalousCorrelationModel = + conditional ? correlationModels[i].first->univariate({variables[i][1]}, NOTHING_TO_CONDITION).first + : correlationModels[i] + .first->univariate(NOTHING_TO_MARGINALIZE, {{variables[i][1], sample[0][variables[i][1]]}}) + .first; } - } - else - { + } else { aggregator.add(1.0, neff); } } aggregator.calculate(probability); - if (m_AnomalyModel != nullptr) - { - TDouble2Vec residual{ ( mostAnomalousSample - - mostAnomalousCorrelationModel->nearestMarginalLikelihoodMean(mostAnomalousSample)) - / std::max(std::sqrt(this->seasonalWeight(0.0, mostAnomalousTime)[0]), 1.0)}; + if (m_AnomalyModel != nullptr) { + TDouble2Vec residual{(mostAnomalousSample - mostAnomalousCorrelationModel->nearestMarginalLikelihoodMean(mostAnomalousSample)) / + std::max(std::sqrt(this->seasonalWeight(0.0, mostAnomalousTime)[0]), 1.0)}; m_AnomalyModel->updateAnomaly(params, mostAnomalousTime, residual, probability); m_AnomalyModel->probability(params, mostAnomalousTime, probability); m_AnomalyModel->sampleAnomaly(params, mostAnomalousTime); @@ -1276,25 +1032,18 @@ bool CUnivariateTimeSeriesModel::probability(const CModelProbabilityParams ¶ } CUnivariateTimeSeriesModel::TDouble2Vec -CUnivariateTimeSeriesModel::winsorisationWeight(double derate, - core_t::TTime time, - const TDouble2Vec &value) const -{ +CUnivariateTimeSeriesModel::winsorisationWeight(double derate, core_t::TTime time, const TDouble2Vec& value) const { double scale{this->seasonalWeight(0.0, time)[0]}; double sample{m_TrendModel->detrend(time, value[0], 0.0)}; - return { tailWinsorisationWeight(*m_ResidualModel, derate, scale, sample) - * changeWinsorisationWeight(m_ChangeDetector)}; + return {tailWinsorisationWeight(*m_ResidualModel, derate, scale, sample) * changeWinsorisationWeight(m_ChangeDetector)}; } -CUnivariateTimeSeriesModel::TDouble2Vec -CUnivariateTimeSeriesModel::seasonalWeight(double confidence, core_t::TTime time) const -{ +CUnivariateTimeSeriesModel::TDouble2Vec CUnivariateTimeSeriesModel::seasonalWeight(double confidence, core_t::TTime time) const { double scale{m_TrendModel->scale(time, m_ResidualModel->marginalLikelihoodVariance(), confidence).second}; return {std::max(scale, this->params().minimumSeasonalVarianceScale())}; } -uint64_t CUnivariateTimeSeriesModel::checksum(uint64_t seed) const -{ +uint64_t CUnivariateTimeSeriesModel::checksum(uint64_t seed) const { seed = CChecksum::calculate(seed, m_IsNonNegative); seed = CChecksum::calculate(seed, m_Controllers); seed = CChecksum::calculate(seed, m_TrendModel); @@ -1307,8 +1056,7 @@ uint64_t CUnivariateTimeSeriesModel::checksum(uint64_t seed) const return CChecksum::calculate(seed, m_Correlations != 0); } -void CUnivariateTimeSeriesModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CUnivariateTimeSeriesModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CUnivariateTimeSeriesModel"); core::CMemoryDebug::dynamicSize("m_Controllers", m_Controllers, mem); core::CMemoryDebug::dynamicSize("m_TrendModel", m_TrendModel, mem); @@ -1318,24 +1066,16 @@ void CUnivariateTimeSeriesModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsa core::CMemoryDebug::dynamicSize("m_SlidingWindow", m_SlidingWindow, mem); } -std::size_t CUnivariateTimeSeriesModel::memoryUsage() const -{ - return core::CMemory::dynamicSize(m_Controllers) - + core::CMemory::dynamicSize(m_TrendModel) - + core::CMemory::dynamicSize(m_ResidualModel) - + core::CMemory::dynamicSize(m_AnomalyModel) - + core::CMemory::dynamicSize(m_ChangeDetector) - + core::CMemory::dynamicSize(m_SlidingWindow); +std::size_t CUnivariateTimeSeriesModel::memoryUsage() const { + return core::CMemory::dynamicSize(m_Controllers) + core::CMemory::dynamicSize(m_TrendModel) + + core::CMemory::dynamicSize(m_ResidualModel) + core::CMemory::dynamicSize(m_AnomalyModel) + + core::CMemory::dynamicSize(m_ChangeDetector) + core::CMemory::dynamicSize(m_SlidingWindow); } -bool CUnivariateTimeSeriesModel::acceptRestoreTraverser(const SModelRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser) -{ - if (traverser.name() == VERSION_6_3_TAG) - { - while (traverser.next()) - { - const std::string &name{traverser.name()}; +bool CUnivariateTimeSeriesModel::acceptRestoreTraverser(const SModelRestoreParams& params, core::CStateRestoreTraverser& traverser) { + if (traverser.name() == VERSION_6_3_TAG) { + while (traverser.next()) { + const std::string& name{traverser.name()}; RESTORE_BUILT_IN(ID_6_3_TAG, m_Id) RESTORE_BOOL(IS_NON_NEGATIVE_6_3_TAG, m_IsNonNegative) RESTORE_BOOL(IS_FORECASTABLE_6_3_TAG, m_IsForecastable) @@ -1344,38 +1084,32 @@ bool CUnivariateTimeSeriesModel::acceptRestoreTraverser(const SModelRestoreParam m_Controllers = boost::make_shared(), core::CPersistUtils::restore(CONTROLLER_6_3_TAG, *m_Controllers, traverser), /**/) - RESTORE(TREND_MODEL_6_3_TAG, traverser.traverseSubLevel(boost::bind( - CTimeSeriesDecompositionStateSerialiser(), - boost::cref(params.s_DecompositionParams), - boost::ref(m_TrendModel), _1))) - RESTORE(RESIDUAL_MODEL_6_3_TAG, traverser.traverseSubLevel(boost::bind( - CPriorStateSerialiser(), - boost::cref(params.s_DistributionParams), - boost::ref(m_ResidualModel), _1))) + RESTORE( + TREND_MODEL_6_3_TAG, + traverser.traverseSubLevel(boost::bind( + CTimeSeriesDecompositionStateSerialiser(), boost::cref(params.s_DecompositionParams), boost::ref(m_TrendModel), _1))) + RESTORE(RESIDUAL_MODEL_6_3_TAG, + traverser.traverseSubLevel(boost::bind( + CPriorStateSerialiser(), boost::cref(params.s_DistributionParams), boost::ref(m_ResidualModel), _1))) RESTORE_SETUP_TEARDOWN(ANOMALY_MODEL_6_3_TAG, m_AnomalyModel = boost::make_shared(), - traverser.traverseSubLevel(boost::bind(&CTimeSeriesAnomalyModel::acceptRestoreTraverser, - m_AnomalyModel.get(), boost::cref(params), _1)), + traverser.traverseSubLevel(boost::bind( + &CTimeSeriesAnomalyModel::acceptRestoreTraverser, m_AnomalyModel.get(), boost::cref(params), _1)), /**/) RESTORE(CANDIDATE_CHANGE_POINT_6_3_TAG, fromDelimited(traverser.value(), m_CandidateChangePoint)) RESTORE_BUILT_IN(CURRENT_CHANGE_INTERVAL_6_3_TAG, m_CurrentChangeInterval) - RESTORE_SETUP_TEARDOWN(CHANGE_DETECTOR_6_3_TAG, - m_ChangeDetector = boost::make_shared( - m_TrendModel, m_ResidualModel), - traverser.traverseSubLevel(boost::bind( - &CUnivariateTimeSeriesChangeDetector::acceptRestoreTraverser, - m_ChangeDetector.get(), boost::cref(params), _1)), - /**/) - RESTORE(SLIDING_WINDOW_6_3_TAG, - core::CPersistUtils::restore(SLIDING_WINDOW_6_3_TAG, m_SlidingWindow, traverser)) - } - } - else - { + RESTORE_SETUP_TEARDOWN( + CHANGE_DETECTOR_6_3_TAG, + m_ChangeDetector = boost::make_shared(m_TrendModel, m_ResidualModel), + traverser.traverseSubLevel(boost::bind( + &CUnivariateTimeSeriesChangeDetector::acceptRestoreTraverser, m_ChangeDetector.get(), boost::cref(params), _1)), + /**/) + RESTORE(SLIDING_WINDOW_6_3_TAG, core::CPersistUtils::restore(SLIDING_WINDOW_6_3_TAG, m_SlidingWindow, traverser)) + } + } else { // There is no version string this is historic state. - do - { - const std::string &name{traverser.name()}; + do { + const std::string& name{traverser.name()}; RESTORE_BUILT_IN(ID_OLD_TAG, m_Id) RESTORE_BOOL(IS_NON_NEGATIVE_OLD_TAG, m_IsNonNegative) RESTORE_BOOL(IS_FORECASTABLE_OLD_TAG, m_IsForecastable) @@ -1383,27 +1117,24 @@ bool CUnivariateTimeSeriesModel::acceptRestoreTraverser(const SModelRestoreParam m_Controllers = boost::make_shared(), core::CPersistUtils::restore(CONTROLLER_OLD_TAG, *m_Controllers, traverser), /**/) - RESTORE(TREND_OLD_TAG, traverser.traverseSubLevel(boost::bind( - CTimeSeriesDecompositionStateSerialiser(), - boost::cref(params.s_DecompositionParams), - boost::ref(m_TrendModel), _1))) - RESTORE(PRIOR_OLD_TAG, traverser.traverseSubLevel(boost::bind( - CPriorStateSerialiser(), - boost::cref(params.s_DistributionParams), - boost::ref(m_ResidualModel), _1))) + RESTORE( + TREND_OLD_TAG, + traverser.traverseSubLevel(boost::bind( + CTimeSeriesDecompositionStateSerialiser(), boost::cref(params.s_DecompositionParams), boost::ref(m_TrendModel), _1))) + RESTORE(PRIOR_OLD_TAG, + traverser.traverseSubLevel(boost::bind( + CPriorStateSerialiser(), boost::cref(params.s_DistributionParams), boost::ref(m_ResidualModel), _1))) RESTORE_SETUP_TEARDOWN(ANOMALY_MODEL_OLD_TAG, m_AnomalyModel = boost::make_shared(), - traverser.traverseSubLevel(boost::bind(&CTimeSeriesAnomalyModel::acceptRestoreTraverser, - m_AnomalyModel.get(), boost::cref(params), _1)), + traverser.traverseSubLevel(boost::bind( + &CTimeSeriesAnomalyModel::acceptRestoreTraverser, m_AnomalyModel.get(), boost::cref(params), _1)), /**/) - } - while (traverser.next()); + } while (traverser.next()); } return true; } -void CUnivariateTimeSeriesModel::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CUnivariateTimeSeriesModel::acceptPersistInserter(core::CStatePersistInserter& inserter) const { // Note that we don't persist this->params() or the correlations // because that state is reinitialized. inserter.insertValue(VERSION_6_3_TAG, ""); @@ -1411,150 +1142,115 @@ void CUnivariateTimeSeriesModel::acceptPersistInserter(core::CStatePersistInsert inserter.insertValue(IS_NON_NEGATIVE_6_3_TAG, static_cast(m_IsNonNegative)); inserter.insertValue(IS_FORECASTABLE_6_3_TAG, static_cast(m_IsForecastable)); inserter.insertValue(RNG_6_3_TAG, m_Rng.toString()); - if (m_Controllers) - { + if (m_Controllers) { core::CPersistUtils::persist(CONTROLLER_6_3_TAG, *m_Controllers, inserter); } - inserter.insertLevel(TREND_MODEL_6_3_TAG, boost::bind(CTimeSeriesDecompositionStateSerialiser(), - boost::cref(*m_TrendModel), _1)); - inserter.insertLevel(RESIDUAL_MODEL_6_3_TAG, boost::bind(CPriorStateSerialiser(), - boost::cref(*m_ResidualModel), _1)); + inserter.insertLevel(TREND_MODEL_6_3_TAG, boost::bind(CTimeSeriesDecompositionStateSerialiser(), boost::cref(*m_TrendModel), _1)); + inserter.insertLevel(RESIDUAL_MODEL_6_3_TAG, boost::bind(CPriorStateSerialiser(), boost::cref(*m_ResidualModel), _1)); inserter.insertValue(CANDIDATE_CHANGE_POINT_6_3_TAG, toDelimited(m_CandidateChangePoint)); inserter.insertValue(CURRENT_CHANGE_INTERVAL_6_3_TAG, m_CurrentChangeInterval); - if (m_ChangeDetector != nullptr) - { - inserter.insertLevel(CHANGE_DETECTOR_6_3_TAG, boost::bind( - &CUnivariateTimeSeriesChangeDetector::acceptPersistInserter, m_ChangeDetector.get(), _1)); + if (m_ChangeDetector != nullptr) { + inserter.insertLevel(CHANGE_DETECTOR_6_3_TAG, + boost::bind(&CUnivariateTimeSeriesChangeDetector::acceptPersistInserter, m_ChangeDetector.get(), _1)); } - if (m_AnomalyModel != nullptr) - { - inserter.insertLevel(ANOMALY_MODEL_6_3_TAG, - boost::bind(&CTimeSeriesAnomalyModel::acceptPersistInserter, - m_AnomalyModel.get(), _1)); + if (m_AnomalyModel != nullptr) { + inserter.insertLevel(ANOMALY_MODEL_6_3_TAG, boost::bind(&CTimeSeriesAnomalyModel::acceptPersistInserter, m_AnomalyModel.get(), _1)); } core::CPersistUtils::persist(SLIDING_WINDOW_6_3_TAG, m_SlidingWindow, inserter); } -maths_t::EDataType CUnivariateTimeSeriesModel::dataType() const -{ +maths_t::EDataType CUnivariateTimeSeriesModel::dataType() const { return m_ResidualModel->dataType(); } -CUnivariateTimeSeriesModel::TDouble4Vec CUnivariateTimeSeriesModel::unpack(const TDouble2Vec4Vec &weights) -{ +CUnivariateTimeSeriesModel::TDouble4Vec CUnivariateTimeSeriesModel::unpack(const TDouble2Vec4Vec& weights) { TDouble4Vec result; result.reserve(weights.size()); - for (const auto &weight : weights) - { + for (const auto& weight : weights) { result.push_back(weight[0]); } return result; } void CUnivariateTimeSeriesModel::reinitializeResidualModel(double learnRate, - const TDecompositionPtr &trend, - const TTimeDoublePrCBuf &slidingWindow, - CPrior &residualModel) -{ + const TDecompositionPtr& trend, + const TTimeDoublePrCBuf& slidingWindow, + CPrior& residualModel) { residualModel.setToNonInformative(0.0, residualModel.decayRate()); - if (!slidingWindow.empty()) - { + if (!slidingWindow.empty()) { double slidingWindowLength{static_cast(slidingWindow.size())}; TDouble4Vec1Vec weight{{std::max(learnRate, std::min(5.0 / slidingWindowLength, 1.0))}}; - for (const auto &value : slidingWindow) - { + for (const auto& value : slidingWindow) { TDouble1Vec sample{trend->detrend(value.first, value.second, 0.0)}; residualModel.addSamples(CConstantWeights::COUNT, sample, weight); } } } -const CUnivariateTimeSeriesModel::TTimeDoublePrCBuf &CUnivariateTimeSeriesModel::slidingWindow() const -{ +const CUnivariateTimeSeriesModel::TTimeDoublePrCBuf& CUnivariateTimeSeriesModel::slidingWindow() const { return m_SlidingWindow; } -const CTimeSeriesDecompositionInterface &CUnivariateTimeSeriesModel::trendModel() const -{ +const CTimeSeriesDecompositionInterface& CUnivariateTimeSeriesModel::trendModel() const { return *m_TrendModel; } -const CPrior &CUnivariateTimeSeriesModel::residualModel() const -{ +const CPrior& CUnivariateTimeSeriesModel::residualModel() const { return *m_ResidualModel; } -CUnivariateTimeSeriesModel::CUnivariateTimeSeriesModel(const CUnivariateTimeSeriesModel &other, - std::size_t id, - bool isForForecast) : - CModel(other.params()), - m_Id(id), - m_IsNonNegative(other.m_IsNonNegative), - m_IsForecastable(other.m_IsForecastable), - m_Rng(other.m_Rng), - m_TrendModel(other.m_TrendModel->clone()), - m_ResidualModel(other.m_ResidualModel->clone()), - m_AnomalyModel(!isForForecast && other.m_AnomalyModel ? - boost::make_shared(*other.m_AnomalyModel) : - TAnomalyModelPtr()), - m_CandidateChangePoint(other.m_CandidateChangePoint), - m_CurrentChangeInterval(other.m_CurrentChangeInterval), - m_ChangeDetector(!isForForecast && other.m_ChangeDetector ? - boost::make_shared(*other.m_ChangeDetector) : - TChangeDetectorPtr()), - m_SlidingWindow(!isForForecast ? other.m_SlidingWindow : TTimeDoublePrCBuf{}), - m_Correlations(0) -{ - if (!isForForecast && other.m_Controllers != nullptr) - { +CUnivariateTimeSeriesModel::CUnivariateTimeSeriesModel(const CUnivariateTimeSeriesModel& other, std::size_t id, bool isForForecast) + : CModel(other.params()), + m_Id(id), + m_IsNonNegative(other.m_IsNonNegative), + m_IsForecastable(other.m_IsForecastable), + m_Rng(other.m_Rng), + m_TrendModel(other.m_TrendModel->clone()), + m_ResidualModel(other.m_ResidualModel->clone()), + m_AnomalyModel(!isForForecast && other.m_AnomalyModel ? boost::make_shared(*other.m_AnomalyModel) + : TAnomalyModelPtr()), + m_CandidateChangePoint(other.m_CandidateChangePoint), + m_CurrentChangeInterval(other.m_CurrentChangeInterval), + m_ChangeDetector(!isForForecast && other.m_ChangeDetector + ? boost::make_shared(*other.m_ChangeDetector) + : TChangeDetectorPtr()), + m_SlidingWindow(!isForForecast ? other.m_SlidingWindow : TTimeDoublePrCBuf{}), + m_Correlations(0) { + if (!isForForecast && other.m_Controllers != nullptr) { m_Controllers = boost::make_shared(*other.m_Controllers); } } -CUnivariateTimeSeriesModel::EUpdateResult -CUnivariateTimeSeriesModel::testAndApplyChange(const CModelAddSamplesParams ¶ms, - const TSizeVec &order, - const TTimeDouble2VecSizeTrVec &values) -{ +CUnivariateTimeSeriesModel::EUpdateResult CUnivariateTimeSeriesModel::testAndApplyChange(const CModelAddSamplesParams& params, + const TSizeVec& order, + const TTimeDouble2VecSizeTrVec& values) { std::size_t median{order[order.size() / 2]}; TDouble4Vec weights(unpack(params.priorWeights()[median])); core_t::TTime time{values[median].first}; - if (m_ChangeDetector == nullptr) - { + if (m_ChangeDetector == nullptr) { core_t::TTime minimumTimeToDetect{this->params().minimumTimeToDetectChange()}; core_t::TTime maximumTimeToTest{this->params().maximumTimeToTestForChange()}; double weight{maths_t::winsorisationWeight(params.weightStyles(), {weights})}; - if ( minimumTimeToDetect < maximumTimeToTest - && pValueFromTailWinsorisationWeight(weight) <= 1e-5) - { + if (minimumTimeToDetect < maximumTimeToTest && pValueFromTailWinsorisationWeight(weight) <= 1e-5) { m_CurrentChangeInterval += this->params().bucketLength(); - if (this->params().testForChange(m_CurrentChangeInterval)) - { + if (this->params().testForChange(m_CurrentChangeInterval)) { m_ChangeDetector = boost::make_shared( - m_TrendModel, m_ResidualModel, - minimumTimeToDetect, maximumTimeToTest); + m_TrendModel, m_ResidualModel, minimumTimeToDetect, maximumTimeToTest); m_CurrentChangeInterval = 0; } - } - else - { + } else { m_CandidateChangePoint = {time, values[median].second[0]}; m_CurrentChangeInterval = 0; } } - if (m_ChangeDetector != nullptr) - { - m_ChangeDetector->addSamples(params.weightStyles(), - {std::make_pair(time, values[median].second[0])}, {weights}); + if (m_ChangeDetector != nullptr) { + m_ChangeDetector->addSamples(params.weightStyles(), {std::make_pair(time, values[median].second[0])}, {weights}); - if (m_ChangeDetector->stopTesting()) - { + if (m_ChangeDetector->stopTesting()) { m_ChangeDetector.reset(); - } - else if (auto change = m_ChangeDetector->change()) - { + } else if (auto change = m_ChangeDetector->change()) { LOG_DEBUG("Detected " << change->print() << " at " << values[median].first); m_ChangeDetector.reset(); return this->applyChange(*change); @@ -1564,13 +1260,9 @@ CUnivariateTimeSeriesModel::testAndApplyChange(const CModelAddSamplesParams &par return E_Success; } -CUnivariateTimeSeriesModel::EUpdateResult -CUnivariateTimeSeriesModel::applyChange(const SChangeDescription &change) -{ - for (auto &value : m_SlidingWindow) - { - switch (change.s_Description) - { +CUnivariateTimeSeriesModel::EUpdateResult CUnivariateTimeSeriesModel::applyChange(const SChangeDescription& change) { + for (auto& value : m_SlidingWindow) { + switch (change.s_Description) { case SChangeDescription::E_LevelShift: value.second += change.s_Value[0]; break; @@ -1578,18 +1270,14 @@ CUnivariateTimeSeriesModel::applyChange(const SChangeDescription &change) value.second *= change.s_Value[0]; break; case SChangeDescription::E_TimeShift: - value.first += static_cast(change.s_Value[0]); + value.first += static_cast(change.s_Value[0]); break; } } - if (m_TrendModel->applyChange(m_CandidateChangePoint.first, - m_CandidateChangePoint.second, change)) - { + if (m_TrendModel->applyChange(m_CandidateChangePoint.first, m_CandidateChangePoint.second, change)) { this->reinitializeStateGivenNewComponent(); - } - else - { + } else { change.s_ResidualModel->decayRate(m_ResidualModel->decayRate()); m_ResidualModel = change.s_ResidualModel; } @@ -1597,15 +1285,11 @@ CUnivariateTimeSeriesModel::applyChange(const SChangeDescription &change) return E_Success; } -CUnivariateTimeSeriesModel::EUpdateResult -CUnivariateTimeSeriesModel::updateTrend(const maths_t::TWeightStyleVec &weightStyles, - const TTimeDouble2VecSizeTrVec &samples, - const TDouble2Vec4VecVec &weights) -{ - for (const auto &sample : samples) - { - if (sample.second.size() != 1) - { +CUnivariateTimeSeriesModel::EUpdateResult CUnivariateTimeSeriesModel::updateTrend(const maths_t::TWeightStyleVec& weightStyles, + const TTimeDouble2VecSizeTrVec& samples, + const TDouble2Vec4VecVec& weights) { + for (const auto& sample : samples) { + if (sample.second.size() != 1) { LOG_ERROR("Dimension mismatch: '" << sample.second.size() << " != 1'"); return E_Failure; } @@ -1617,127 +1301,92 @@ CUnivariateTimeSeriesModel::updateTrend(const maths_t::TWeightStyleVec &weightSt // or for count feature, the times of all samples will be the same. TSizeVec timeorder(samples.size()); std::iota(timeorder.begin(), timeorder.end(), 0); - std::stable_sort(timeorder.begin(), timeorder.end(), - [&samples](std::size_t lhs, std::size_t rhs) - { - return COrderings::lexicographical_compare(samples[lhs].first, - samples[lhs].second, - samples[rhs].first, - samples[rhs].second); - }); - - for (auto i : timeorder) - { + std::stable_sort(timeorder.begin(), timeorder.end(), [&samples](std::size_t lhs, std::size_t rhs) { + return COrderings::lexicographical_compare(samples[lhs].first, samples[lhs].second, samples[rhs].first, samples[rhs].second); + }); + + for (auto i : timeorder) { core_t::TTime time{samples[i].first}; double value{samples[i].second[0]}; TDouble4Vec weight(unpack(weights[i])); - if (m_TrendModel->addPoint(time, value, weightStyles, weight)) - { + if (m_TrendModel->addPoint(time, value, weightStyles, weight)) { result = E_Reset; } } - if (result == E_Reset) - { + if (result == E_Reset) { this->reinitializeStateGivenNewComponent(); } return result; } -void CUnivariateTimeSeriesModel::appendPredictionErrors(double interval, - double sample_, - TDouble1VecVec (&result)[2]) -{ +void CUnivariateTimeSeriesModel::appendPredictionErrors(double interval, double sample_, TDouble1VecVec (&result)[2]) { using TDecompositionPtr1Vec = core::CSmallVector; TDouble1Vec sample{sample_}; TDecompositionPtr1Vec trend{m_TrendModel}; - if (auto error = predictionError(interval, m_ResidualModel, sample)) - { + if (auto error = predictionError(interval, m_ResidualModel, sample)) { result[E_ResidualControl].push_back(*error); } - if (auto error = predictionError(trend, sample)) - { + if (auto error = predictionError(trend, sample)) { result[E_TrendControl].push_back(*error); } } -void CUnivariateTimeSeriesModel::reinitializeStateGivenNewComponent() -{ - reinitializeResidualModel(this->params().learnRate(), - m_TrendModel, m_SlidingWindow, *m_ResidualModel); - if (m_Correlations != nullptr) - { +void CUnivariateTimeSeriesModel::reinitializeStateGivenNewComponent() { + reinitializeResidualModel(this->params().learnRate(), m_TrendModel, m_SlidingWindow, *m_ResidualModel); + if (m_Correlations != nullptr) { m_Correlations->removeTimeSeries(m_Id); } - if (m_Controllers != nullptr) - { - m_ResidualModel->decayRate( m_ResidualModel->decayRate() - / (*m_Controllers)[E_ResidualControl].multiplier()); - m_TrendModel->decayRate( m_TrendModel->decayRate() - / (*m_Controllers)[E_TrendControl].multiplier()); - for (auto &controller : *m_Controllers) - { + if (m_Controllers != nullptr) { + m_ResidualModel->decayRate(m_ResidualModel->decayRate() / (*m_Controllers)[E_ResidualControl].multiplier()); + m_TrendModel->decayRate(m_TrendModel->decayRate() / (*m_Controllers)[E_TrendControl].multiplier()); + for (auto& controller : *m_Controllers) { controller.reset(); } } - if (m_AnomalyModel != nullptr) - { + if (m_AnomalyModel != nullptr) { m_AnomalyModel->reset(); } m_ChangeDetector.reset(); } -bool CUnivariateTimeSeriesModel::correlationModels(TSize1Vec &correlated, - TSize2Vec1Vec &variables, - TMultivariatePriorCPtrSizePr1Vec &correlationModels, - TModelCPtr1Vec &correlatedTimeSeriesModels) const -{ - if (m_Correlations) - { +bool CUnivariateTimeSeriesModel::correlationModels(TSize1Vec& correlated, + TSize2Vec1Vec& variables, + TMultivariatePriorCPtrSizePr1Vec& correlationModels, + TModelCPtr1Vec& correlatedTimeSeriesModels) const { + if (m_Correlations) { correlated = m_Correlations->correlated(m_Id); - m_Correlations->correlationModels(m_Id, correlated, variables, - correlationModels, - correlatedTimeSeriesModels); + m_Correlations->correlationModels(m_Id, correlated, variables, correlationModels, correlatedTimeSeriesModels); } return correlated.size() > 0; } +CTimeSeriesCorrelations::CTimeSeriesCorrelations(double minimumSignificantCorrelation, double decayRate) + : m_MinimumSignificantCorrelation(minimumSignificantCorrelation), m_Correlations(MAXIMUM_CORRELATIONS, decayRate) { +} -CTimeSeriesCorrelations::CTimeSeriesCorrelations(double minimumSignificantCorrelation, - double decayRate) : - m_MinimumSignificantCorrelation(minimumSignificantCorrelation), - m_Correlations(MAXIMUM_CORRELATIONS, decayRate) -{} - -CTimeSeriesCorrelations::CTimeSeriesCorrelations(const CTimeSeriesCorrelations &other, - bool isForPersistence) : - m_MinimumSignificantCorrelation(other.m_MinimumSignificantCorrelation), - m_SampleData(other.m_SampleData), - m_Correlations(other.m_Correlations), - m_CorrelatedLookup(other.m_CorrelatedLookup), - m_TimeSeriesModels(isForPersistence ? TModelCPtrVec() : other.m_TimeSeriesModels) -{ - for (const auto &model : other.m_CorrelationDistributionModels) - { - m_CorrelationDistributionModels.emplace( - model.first, std::make_pair(TMultivariatePriorPtr(model.second.first->clone()), - model.second.second)); +CTimeSeriesCorrelations::CTimeSeriesCorrelations(const CTimeSeriesCorrelations& other, bool isForPersistence) + : m_MinimumSignificantCorrelation(other.m_MinimumSignificantCorrelation), + m_SampleData(other.m_SampleData), + m_Correlations(other.m_Correlations), + m_CorrelatedLookup(other.m_CorrelatedLookup), + m_TimeSeriesModels(isForPersistence ? TModelCPtrVec() : other.m_TimeSeriesModels) { + for (const auto& model : other.m_CorrelationDistributionModels) { + m_CorrelationDistributionModels.emplace(model.first, + std::make_pair(TMultivariatePriorPtr(model.second.first->clone()), model.second.second)); } } -CTimeSeriesCorrelations *CTimeSeriesCorrelations::clone() const -{ +CTimeSeriesCorrelations* CTimeSeriesCorrelations::clone() const { return new CTimeSeriesCorrelations(*this); } -CTimeSeriesCorrelations *CTimeSeriesCorrelations::cloneForPersistence() const -{ +CTimeSeriesCorrelations* CTimeSeriesCorrelations::cloneForPersistence() const { return new CTimeSeriesCorrelations(*this, true); } -void CTimeSeriesCorrelations::processSamples(const maths_t::TWeightStyleVec &weightStyles) -{ +void CTimeSeriesCorrelations::processSamples(const maths_t::TWeightStyleVec& weightStyles) { using TSizeSizePrMultivariatePriorPtrDoublePrUMapCItr = TSizeSizePrMultivariatePriorPtrDoublePrUMap::const_iterator; using TSizeSizePrMultivariatePriorPtrDoublePrUMapCItrVec = std::vector; @@ -1751,34 +1400,29 @@ void CTimeSeriesCorrelations::processSamples(const maths_t::TWeightStyleVec &wei // preserve the random number sequence. TSizeSizePrMultivariatePriorPtrDoublePrUMapCItrVec iterators; iterators.reserve(m_CorrelationDistributionModels.size()); - for (auto i = m_CorrelationDistributionModels.begin(); i != m_CorrelationDistributionModels.end(); ++i) - { + for (auto i = m_CorrelationDistributionModels.begin(); i != m_CorrelationDistributionModels.end(); ++i) { iterators.push_back(i); } - std::sort(iterators.begin(), iterators.end(), - core::CFunctional::SDereference()); + std::sort(iterators.begin(), iterators.end(), core::CFunctional::SDereference()); TDouble10Vec1Vec multivariateSamples; TDouble10Vec4Vec1Vec multivariateWeights; - for (auto i : iterators) - { + for (auto i : iterators) { std::size_t pid1{i->first.first}; std::size_t pid2{i->first.second}; auto i1 = m_SampleData.find(pid1); auto i2 = m_SampleData.find(pid2); - if (i1 == m_SampleData.end() || i2 == m_SampleData.end()) - { + if (i1 == m_SampleData.end() || i2 == m_SampleData.end()) { continue; } - const TMultivariatePriorPtr &prior{i->second.first}; - SSampleData *samples1{&i1->second}; - SSampleData *samples2{&i2->second}; + const TMultivariatePriorPtr& prior{i->second.first}; + SSampleData* samples1{&i1->second}; + SSampleData* samples2{&i2->second}; std::size_t n1{samples1->s_Times.size()}; std::size_t n2{samples2->s_Times.size()}; - std::size_t indices[] = { 0, 1 }; - if (n1 < n2) - { + std::size_t indices[] = {0, 1}; + if (n1 < n2) { std::swap(samples1, samples2); std::swap(n1, n2); std::swap(indices[0], indices[1]); @@ -1786,51 +1430,46 @@ void CTimeSeriesCorrelations::processSamples(const maths_t::TWeightStyleVec &wei multivariateSamples.assign(n1, TDouble10Vec(2)); multivariateWeights.assign(n1, TDouble10Vec4Vec(weightStyles.size(), TDouble10Vec(2))); - TSize1Vec &tags2{samples2->s_Tags}; - TTime1Vec ×2{samples2->s_Times}; + TSize1Vec& tags2{samples2->s_Tags}; + TTime1Vec& times2{samples2->s_Times}; COrderings::simultaneousSort(tags2, times2, samples2->s_Samples, samples2->s_Weights); - for (auto j = tags2.begin(); j != tags2.end(); /**/) - { + for (auto j = tags2.begin(); j != tags2.end(); /**/) { auto k = std::upper_bound(j, tags2.end(), *j); std::size_t a = j - tags2.begin(); std::size_t b = k - tags2.begin(); - COrderings::simultaneousSort(core::make_range(times2, a, b), - core::make_range(samples2->s_Samples, a, b), - core::make_range(samples2->s_Weights, a, b)); + COrderings::simultaneousSort( + core::make_range(times2, a, b), core::make_range(samples2->s_Samples, a, b), core::make_range(samples2->s_Weights, a, b)); j = k; } - for (std::size_t j1 = 0u; j1 < n1; ++j1) - { + for (std::size_t j1 = 0u; j1 < n1; ++j1) { std::size_t j2{0u}; - if (n2 > 1) - { + if (n2 > 1) { std::size_t tag{samples1->s_Tags[j1]}; core_t::TTime time{samples1->s_Times[j1]}; std::size_t a_ = std::lower_bound(tags2.begin(), tags2.end(), tag) - tags2.begin(); std::size_t b_ = std::upper_bound(tags2.begin(), tags2.end(), tag) - tags2.begin(); - std::size_t b{CTools::truncate(static_cast( - std::lower_bound(times2.begin() + a_, - times2.begin() + b_, time) - times2.begin()), - std::size_t(1), n2 - 1)}; + std::size_t b{CTools::truncate( + static_cast(std::lower_bound(times2.begin() + a_, times2.begin() + b_, time) - times2.begin()), + std::size_t(1), + n2 - 1)}; std::size_t a{b - 1}; j2 = std::abs(times2[a] - time) < std::abs(times2[b] - time) ? a : b; } multivariateSamples[j1][indices[0]] = samples1->s_Samples[j1]; multivariateSamples[j1][indices[1]] = samples2->s_Samples[j2]; - for (std::size_t w = 0u; w < weightStyles.size(); ++w) - { + for (std::size_t w = 0u; w < weightStyles.size(); ++w) { multivariateWeights[j1][w][indices[0]] = samples1->s_Weights[j1][w]; multivariateWeights[j1][w][indices[1]] = samples2->s_Weights[j2][w]; } } LOG_TRACE("correlate samples = " << core::CContainerPrinter::print(multivariateSamples) - << ", correlate weights = " << core::CContainerPrinter::print(multivariateWeights)); + << ", correlate weights = " << core::CContainerPrinter::print(multivariateWeights)); - prior->dataType( samples1->s_Type == maths_t::E_IntegerData - || samples2->s_Type == maths_t::E_IntegerData ? - maths_t::E_IntegerData : maths_t::E_ContinuousData); + prior->dataType(samples1->s_Type == maths_t::E_IntegerData || samples2->s_Type == maths_t::E_IntegerData + ? maths_t::E_IntegerData + : maths_t::E_ContinuousData); prior->addSamples(weightStyles, multivariateSamples, multivariateWeights); prior->propagateForwardsByTime(std::min(samples1->s_Interval, samples2->s_Interval)); prior->decayRate(std::sqrt(samples1->s_Multiplier * samples2->s_Multiplier) * prior->decayRate()); @@ -1842,33 +1481,27 @@ void CTimeSeriesCorrelations::processSamples(const maths_t::TWeightStyleVec &wei m_SampleData.clear(); } -void CTimeSeriesCorrelations::refresh(const CTimeSeriesCorrelateModelAllocator &allocator) -{ +void CTimeSeriesCorrelations::refresh(const CTimeSeriesCorrelateModelAllocator& allocator) { using TDoubleVec = std::vector; using TSizeSizePrVec = std::vector; - if (m_Correlations.changed()) - { + if (m_Correlations.changed()) { TSizeSizePrVec correlated; TDoubleVec correlationCoeffs; - m_Correlations.mostCorrelated(static_cast( - 1.2 * static_cast(allocator.maxNumberCorrelations())), - correlated, - &correlationCoeffs); + m_Correlations.mostCorrelated( + static_cast(1.2 * static_cast(allocator.maxNumberCorrelations())), correlated, &correlationCoeffs); LOG_TRACE("correlated = " << core::CContainerPrinter::print(correlated)); LOG_TRACE("correlationCoeffs = " << core::CContainerPrinter::print(correlationCoeffs)); - ptrdiff_t cutoff{std::upper_bound(correlationCoeffs.begin(), correlationCoeffs.end(), + ptrdiff_t cutoff{std::upper_bound(correlationCoeffs.begin(), + correlationCoeffs.end(), 0.5 * m_MinimumSignificantCorrelation, - [](double lhs, double rhs) - { - return std::fabs(lhs) > std::fabs(rhs); - }) - correlationCoeffs.begin()}; + [](double lhs, double rhs) { return std::fabs(lhs) > std::fabs(rhs); }) - + correlationCoeffs.begin()}; LOG_TRACE("cutoff = " << cutoff); correlated.erase(correlated.begin() + cutoff, correlated.end()); - if (correlated.empty()) - { + if (correlated.empty()) { m_CorrelationDistributionModels.clear(); this->refreshLookup(); return; @@ -1880,16 +1513,13 @@ void CTimeSeriesCorrelations::refresh(const CTimeSeriesCorrelateModelAllocator & TSizeVec presentRank; TSizeSizePrVec missing; TSizeVec missingRank; - std::size_t np{static_cast( - std::max(0.9 * static_cast(correlated.size()), 1.0))}; - std::size_t nm{static_cast( - std::max(0.1 * static_cast(correlated.size()), 1.0))}; + std::size_t np{static_cast(std::max(0.9 * static_cast(correlated.size()), 1.0))}; + std::size_t nm{static_cast(std::max(0.1 * static_cast(correlated.size()), 1.0))}; present.reserve(np); presentRank.reserve(np); missing.reserve(nm); missingRank.reserve(nm); - for (std::size_t j = 0u; j < correlated.size(); ++j) - { + for (std::size_t j = 0u; j < correlated.size(); ++j) { bool isPresent{m_CorrelationDistributionModels.count(correlated[j]) > 0}; (isPresent ? present : missing).push_back(correlated[j]); (isPresent ? presentRank : missingRank).push_back(j); @@ -1898,16 +1528,11 @@ void CTimeSeriesCorrelations::refresh(const CTimeSeriesCorrelateModelAllocator & // Remove any weakly correlated models. std::size_t initial{m_CorrelationDistributionModels.size()}; COrderings::simultaneousSort(present, presentRank); - for (auto i = m_CorrelationDistributionModels.begin(); i != m_CorrelationDistributionModels.end(); /**/) - { - std::size_t j = std::lower_bound(present.begin(), - present.end(), i->first) - present.begin(); - if (j == present.size() || i->first != present[j]) - { + for (auto i = m_CorrelationDistributionModels.begin(); i != m_CorrelationDistributionModels.end(); /**/) { + std::size_t j = std::lower_bound(present.begin(), present.end(), i->first) - present.begin(); + if (j == present.size() || i->first != present[j]) { i = m_CorrelationDistributionModels.erase(i); - } - else - { + } else { i->second.second = correlationCoeffs[presentRank[j]]; ++i; } @@ -1916,26 +1541,17 @@ void CTimeSeriesCorrelations::refresh(const CTimeSeriesCorrelateModelAllocator & // Remove the remaining most weakly correlated models subject // to the capacity constraint. COrderings::simultaneousSort(presentRank, present, std::greater()); - for (std::size_t i = 0u; m_CorrelationDistributionModels.size() > allocator.maxNumberCorrelations(); ++i) - { + for (std::size_t i = 0u; m_CorrelationDistributionModels.size() > allocator.maxNumberCorrelations(); ++i) { m_CorrelationDistributionModels.erase(present[i]); } - if (allocator.areAllocationsAllowed()) - { - for (std::size_t i = 0u, nextChunk = std::min(allocator.maxNumberCorrelations(), - initial + allocator.chunkSize()); - m_CorrelationDistributionModels.size() < allocator.maxNumberCorrelations() - && i < missing.size() - && ( m_CorrelationDistributionModels.size() <= initial - || !allocator.exceedsLimit(m_CorrelationDistributionModels.size())); - nextChunk = std::min(allocator.maxNumberCorrelations(), - nextChunk + allocator.chunkSize())) - { - for (/**/; i < missing.size() && m_CorrelationDistributionModels.size() < nextChunk; ++i) - { - m_CorrelationDistributionModels.insert({missing[i], {allocator.newPrior(), - correlationCoeffs[missingRank[i]]}}); + if (allocator.areAllocationsAllowed()) { + for (std::size_t i = 0u, nextChunk = std::min(allocator.maxNumberCorrelations(), initial + allocator.chunkSize()); + m_CorrelationDistributionModels.size() < allocator.maxNumberCorrelations() && i < missing.size() && + (m_CorrelationDistributionModels.size() <= initial || !allocator.exceedsLimit(m_CorrelationDistributionModels.size())); + nextChunk = std::min(allocator.maxNumberCorrelations(), nextChunk + allocator.chunkSize())) { + for (/**/; i < missing.size() && m_CorrelationDistributionModels.size() < nextChunk; ++i) { + m_CorrelationDistributionModels.insert({missing[i], {allocator.newPrior(), correlationCoeffs[missingRank[i]]}}); } } } @@ -1944,14 +1560,11 @@ void CTimeSeriesCorrelations::refresh(const CTimeSeriesCorrelateModelAllocator & } } -const CTimeSeriesCorrelations::TSizeSizePrMultivariatePriorPtrDoublePrUMap & -CTimeSeriesCorrelations::correlationModels() const -{ +const CTimeSeriesCorrelations::TSizeSizePrMultivariatePriorPtrDoublePrUMap& CTimeSeriesCorrelations::correlationModels() const { return m_CorrelationDistributionModels; } -void CTimeSeriesCorrelations::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CTimeSeriesCorrelations::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CTimeSeriesCorrelations"); core::CMemoryDebug::dynamicSize("m_SampleData", m_SampleData, mem); core::CMemoryDebug::dynamicSize("m_Correlations", m_Correlations, mem); @@ -1959,128 +1572,92 @@ void CTimeSeriesCorrelations::debugMemoryUsage(core::CMemoryUsage::TMemoryUsageP core::CMemoryDebug::dynamicSize("m_CorrelationDistributionModels", m_CorrelationDistributionModels, mem); } -std::size_t CTimeSeriesCorrelations::memoryUsage() const -{ - return core::CMemory::dynamicSize(m_SampleData) - + core::CMemory::dynamicSize(m_Correlations) - + core::CMemory::dynamicSize(m_CorrelatedLookup) - + core::CMemory::dynamicSize(m_CorrelationDistributionModels); +std::size_t CTimeSeriesCorrelations::memoryUsage() const { + return core::CMemory::dynamicSize(m_SampleData) + core::CMemory::dynamicSize(m_Correlations) + + core::CMemory::dynamicSize(m_CorrelatedLookup) + core::CMemory::dynamicSize(m_CorrelationDistributionModels); } -bool CTimeSeriesCorrelations::acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name{traverser.name()}; +bool CTimeSeriesCorrelations::acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) { + do { + const std::string& name{traverser.name()}; RESTORE(K_MOST_CORRELATED_TAG, - traverser.traverseSubLevel(boost::bind(&CKMostCorrelated::acceptRestoreTraverser, - &m_Correlations, _1))) - RESTORE(CORRELATED_LOOKUP_TAG, - core::CPersistUtils::restore(CORRELATED_LOOKUP_TAG, m_CorrelatedLookup, traverser)) + traverser.traverseSubLevel(boost::bind(&CKMostCorrelated::acceptRestoreTraverser, &m_Correlations, _1))) + RESTORE(CORRELATED_LOOKUP_TAG, core::CPersistUtils::restore(CORRELATED_LOOKUP_TAG, m_CorrelatedLookup, traverser)) RESTORE(CORRELATION_MODELS_TAG, - traverser.traverseSubLevel(boost::bind(&CTimeSeriesCorrelations::restoreCorrelationModels, - this, boost::cref(params), _1))) - } - while (traverser.next()); + traverser.traverseSubLevel(boost::bind(&CTimeSeriesCorrelations::restoreCorrelationModels, this, boost::cref(params), _1))) + } while (traverser.next()); return true; } -void CTimeSeriesCorrelations::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CTimeSeriesCorrelations::acceptPersistInserter(core::CStatePersistInserter& inserter) const { // Note we don't persist the minimum significant correlation or the // models because that state is reinitialized. The sample is only // maintained transitively during an update at the end of a bucket // and so always empty at the point persistence occurs. - inserter.insertLevel(K_MOST_CORRELATED_TAG, - boost::bind(&CKMostCorrelated::acceptPersistInserter, &m_Correlations, _1)); + inserter.insertLevel(K_MOST_CORRELATED_TAG, boost::bind(&CKMostCorrelated::acceptPersistInserter, &m_Correlations, _1)); core::CPersistUtils::persist(CORRELATED_LOOKUP_TAG, m_CorrelatedLookup, inserter); - inserter.insertLevel(CORRELATION_MODELS_TAG, - boost::bind(&CTimeSeriesCorrelations::persistCorrelationModels, this, _1)); + inserter.insertLevel(CORRELATION_MODELS_TAG, boost::bind(&CTimeSeriesCorrelations::persistCorrelationModels, this, _1)); } -bool CTimeSeriesCorrelations::restoreCorrelationModels(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name{traverser.name()}; +bool CTimeSeriesCorrelations::restoreCorrelationModels(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) { + do { + const std::string& name{traverser.name()}; RESTORE_SETUP_TEARDOWN(CORRELATION_MODEL_TAG, TSizeSizePrMultivariatePriorPtrDoublePrPr prior, - traverser.traverseSubLevel( - boost::bind(&restore, boost::cref(params), boost::ref(prior), _1)), + traverser.traverseSubLevel(boost::bind(&restore, boost::cref(params), boost::ref(prior), _1)), m_CorrelationDistributionModels.insert(prior)) - } - while (traverser.next()); + } while (traverser.next()); return true; } -void CTimeSeriesCorrelations::persistCorrelationModels(core::CStatePersistInserter &inserter) const -{ - using TSizeSizePrMultivariatePriorPtrDoublePrUMapCItrVec = - std::vector; +void CTimeSeriesCorrelations::persistCorrelationModels(core::CStatePersistInserter& inserter) const { + using TSizeSizePrMultivariatePriorPtrDoublePrUMapCItrVec = std::vector; TSizeSizePrMultivariatePriorPtrDoublePrUMapCItrVec ordered; ordered.reserve(m_CorrelationDistributionModels.size()); - for (auto prior = m_CorrelationDistributionModels.begin(); prior != m_CorrelationDistributionModels.end(); ++prior) - { + for (auto prior = m_CorrelationDistributionModels.begin(); prior != m_CorrelationDistributionModels.end(); ++prior) { ordered.push_back(prior); } - std::sort(ordered.begin(), ordered.end(), - core::CFunctional::SDereference()); - for (auto prior : ordered) - { + std::sort(ordered.begin(), ordered.end(), core::CFunctional::SDereference()); + for (auto prior : ordered) { inserter.insertLevel(CORRELATION_MODEL_TAG, boost::bind(&persist, boost::cref(*prior), _1)); } } -bool CTimeSeriesCorrelations::restore(const SDistributionRestoreParams ¶ms, - TSizeSizePrMultivariatePriorPtrDoublePrPr &model, - core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name{traverser.name()}; +bool CTimeSeriesCorrelations::restore(const SDistributionRestoreParams& params, + TSizeSizePrMultivariatePriorPtrDoublePrPr& model, + core::CStateRestoreTraverser& traverser) { + do { + const std::string& name{traverser.name()}; RESTORE_BUILT_IN(FIRST_CORRELATE_ID_TAG, model.first.first) RESTORE_BUILT_IN(SECOND_CORRELATE_ID_TAG, model.first.second) - RESTORE(CORRELATION_MODEL_TAG, - traverser.traverseSubLevel(boost::bind(CPriorStateSerialiser(), - boost::cref(params), - boost::ref(model.second.first), _1))) + RESTORE( + CORRELATION_MODEL_TAG, + traverser.traverseSubLevel(boost::bind(CPriorStateSerialiser(), boost::cref(params), boost::ref(model.second.first), _1))) RESTORE_BUILT_IN(CORRELATION_TAG, model.second.second) - } - while (traverser.next()); + } while (traverser.next()); return true; } -void CTimeSeriesCorrelations::persist(const TSizeSizePrMultivariatePriorPtrDoublePrPr &model, - core::CStatePersistInserter &inserter) -{ +void CTimeSeriesCorrelations::persist(const TSizeSizePrMultivariatePriorPtrDoublePrPr& model, core::CStatePersistInserter& inserter) { inserter.insertValue(FIRST_CORRELATE_ID_TAG, model.first.first); inserter.insertValue(SECOND_CORRELATE_ID_TAG, model.first.second); - inserter.insertLevel(CORRELATION_MODEL_TAG, boost::bind(CPriorStateSerialiser(), - boost::cref(*model.second.first), _1)); - inserter.insertValue(CORRELATION_TAG, - model.second.second, - core::CIEEE754::E_SinglePrecision); + inserter.insertLevel(CORRELATION_MODEL_TAG, boost::bind(CPriorStateSerialiser(), boost::cref(*model.second.first), _1)); + inserter.insertValue(CORRELATION_TAG, model.second.second, core::CIEEE754::E_SinglePrecision); } -void CTimeSeriesCorrelations::addTimeSeries(std::size_t id, const CUnivariateTimeSeriesModel &model) -{ +void CTimeSeriesCorrelations::addTimeSeries(std::size_t id, const CUnivariateTimeSeriesModel& model) { m_Correlations.addVariables(id + 1); core::CAllocationStrategy::resize(m_TimeSeriesModels, std::max(id + 1, m_TimeSeriesModels.size())); m_TimeSeriesModels[id] = &model; } -void CTimeSeriesCorrelations::removeTimeSeries(std::size_t id) -{ +void CTimeSeriesCorrelations::removeTimeSeries(std::size_t id) { auto correlated_ = m_CorrelatedLookup.find(id); - if (correlated_ != m_CorrelatedLookup.end()) - { - TSize1Vec &correlated{correlated_->second}; - for (const auto &correlate : correlated) - { + if (correlated_ != m_CorrelatedLookup.end()) { + TSize1Vec& correlated{correlated_->second}; + for (const auto& correlate : correlated) { m_CorrelationDistributionModels.erase({id, correlate}); m_CorrelationDistributionModels.erase({correlate, id}); } @@ -2091,46 +1668,40 @@ void CTimeSeriesCorrelations::removeTimeSeries(std::size_t id) } void CTimeSeriesCorrelations::addSamples(std::size_t id, - const CModelAddSamplesParams ¶ms, - const TTimeDouble2VecSizeTrVec &samples, - double multiplier) -{ - SSampleData &data{m_SampleData[id]}; + const CModelAddSamplesParams& params, + const TTimeDouble2VecSizeTrVec& samples, + double multiplier) { + SSampleData& data{m_SampleData[id]}; data.s_Type = params.type(); data.s_Times.reserve(samples.size()); data.s_Samples.reserve(samples.size()); data.s_Tags.reserve(samples.size()); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { data.s_Times.push_back(samples[i].first); data.s_Samples.push_back(samples[i].second[0]); data.s_Tags.push_back(samples[i].third); - data.s_Weights.push_back( - CUnivariateTimeSeriesModel::unpack(params.priorWeights()[i])); + data.s_Weights.push_back(CUnivariateTimeSeriesModel::unpack(params.priorWeights()[i])); } data.s_Interval = params.propagationInterval(); data.s_Multiplier = multiplier; m_Correlations.add(id, CBasicStatistics::median(data.s_Samples)); } -TSize1Vec CTimeSeriesCorrelations::correlated(std::size_t id) const -{ +TSize1Vec CTimeSeriesCorrelations::correlated(std::size_t id) const { auto correlated = m_CorrelatedLookup.find(id); return correlated != m_CorrelatedLookup.end() ? correlated->second : TSize1Vec(); } bool CTimeSeriesCorrelations::correlationModels(std::size_t id, - TSize1Vec &correlated, - TSize2Vec1Vec &variables, - TMultivariatePriorCPtrSizePr1Vec &correlationModels, - TModelCPtr1Vec &correlatedTimeSeriesModels) const -{ + TSize1Vec& correlated, + TSize2Vec1Vec& variables, + TMultivariatePriorCPtrSizePr1Vec& correlationModels, + TModelCPtr1Vec& correlatedTimeSeriesModels) const { variables.clear(); correlationModels.clear(); correlatedTimeSeriesModels.clear(); - if (correlated.empty()) - { + if (correlated.empty()) { return false; } @@ -2138,28 +1709,22 @@ bool CTimeSeriesCorrelations::correlationModels(std::size_t id, correlationModels.reserve(correlated.size()); correlatedTimeSeriesModels.reserve(correlated.size()); std::size_t end{0u}; - for (auto correlate : correlated) - { + for (auto correlate : correlated) { auto i = m_CorrelationDistributionModels.find({id, correlate}); TSize2Vec variable{0, 1}; - if (i == m_CorrelationDistributionModels.end()) - { + if (i == m_CorrelationDistributionModels.end()) { i = m_CorrelationDistributionModels.find({correlate, id}); std::swap(variable[0], variable[1]); } - if (i == m_CorrelationDistributionModels.end()) - { - LOG_ERROR("Unexpectedly missing prior for correlation (" << id - << "," << correlate << ")"); + if (i == m_CorrelationDistributionModels.end()) { + LOG_ERROR("Unexpectedly missing prior for correlation (" << id << "," << correlate << ")"); continue; } - if (std::fabs(i->second.second) < m_MinimumSignificantCorrelation) - { + if (std::fabs(i->second.second) < m_MinimumSignificantCorrelation) { LOG_TRACE("Correlation " << i->second.second << " is too small to model"); continue; } - if (i->second.first->numberSamples() < MINIMUM_CORRELATE_PRIOR_SAMPLE_COUNT) - { + if (i->second.first->numberSamples() < MINIMUM_CORRELATE_PRIOR_SAMPLE_COUNT) { LOG_TRACE("Too few samples in correlate model"); continue; } @@ -2170,131 +1735,101 @@ bool CTimeSeriesCorrelations::correlationModels(std::size_t id, } correlated.resize(variables.size()); - for (auto correlate : correlated) - { + for (auto correlate : correlated) { correlatedTimeSeriesModels.push_back(m_TimeSeriesModels[correlate]); } return correlationModels.size() > 0; } -void CTimeSeriesCorrelations::refreshLookup() -{ +void CTimeSeriesCorrelations::refreshLookup() { m_CorrelatedLookup.clear(); - for (const auto &prior : m_CorrelationDistributionModels) - { + for (const auto& prior : m_CorrelationDistributionModels) { std::size_t x0{prior.first.first}; std::size_t x1{prior.first.second}; m_CorrelatedLookup[x0].push_back(x1); m_CorrelatedLookup[x1].push_back(x0); } - for (auto &prior : m_CorrelatedLookup) - { + for (auto& prior : m_CorrelatedLookup) { std::sort(prior.second.begin(), prior.second.end()); } } - -CMultivariateTimeSeriesModel::CMultivariateTimeSeriesModel(const CModelParams ¶ms, - const CTimeSeriesDecompositionInterface &trend, - const CMultivariatePrior &residualModel, - const TDecayRateController2Ary *controllers, - bool modelAnomalies) : - CModel(params), - m_IsNonNegative(false), - m_ResidualModel(residualModel.clone()), - m_AnomalyModel(modelAnomalies ? - boost::make_shared(params.bucketLength(), - params.decayRate()) : - TAnomalyModelPtr()), - m_SlidingWindow(SLIDING_WINDOW_SIZE) -{ - if (controllers) - { +CMultivariateTimeSeriesModel::CMultivariateTimeSeriesModel(const CModelParams& params, + const CTimeSeriesDecompositionInterface& trend, + const CMultivariatePrior& residualModel, + const TDecayRateController2Ary* controllers, + bool modelAnomalies) + : CModel(params), + m_IsNonNegative(false), + m_ResidualModel(residualModel.clone()), + m_AnomalyModel(modelAnomalies ? boost::make_shared(params.bucketLength(), params.decayRate()) + : TAnomalyModelPtr()), + m_SlidingWindow(SLIDING_WINDOW_SIZE) { + if (controllers) { m_Controllers = boost::make_shared(*controllers); } - for (std::size_t d = 0u; d < this->dimension(); ++d) - { + for (std::size_t d = 0u; d < this->dimension(); ++d) { m_TrendModel.emplace_back(trend.clone()); } } -CMultivariateTimeSeriesModel::CMultivariateTimeSeriesModel(const CMultivariateTimeSeriesModel &other) : - CModel(other.params()), - m_IsNonNegative(other.m_IsNonNegative), - m_ResidualModel(other.m_ResidualModel->clone()), - m_AnomalyModel(other.m_AnomalyModel ? - boost::make_shared(*other.m_AnomalyModel) : - TAnomalyModelPtr()), - m_SlidingWindow(other.m_SlidingWindow) -{ - if (other.m_Controllers) - { +CMultivariateTimeSeriesModel::CMultivariateTimeSeriesModel(const CMultivariateTimeSeriesModel& other) + : CModel(other.params()), + m_IsNonNegative(other.m_IsNonNegative), + m_ResidualModel(other.m_ResidualModel->clone()), + m_AnomalyModel(other.m_AnomalyModel ? boost::make_shared(*other.m_AnomalyModel) : TAnomalyModelPtr()), + m_SlidingWindow(other.m_SlidingWindow) { + if (other.m_Controllers) { m_Controllers = boost::make_shared(*other.m_Controllers); } m_TrendModel.reserve(other.m_TrendModel.size()); - for (const auto &trend : other.m_TrendModel) - { + for (const auto& trend : other.m_TrendModel) { m_TrendModel.emplace_back(trend->clone()); } } -CMultivariateTimeSeriesModel::CMultivariateTimeSeriesModel(const SModelRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser) : - CModel(params.s_Params), - m_SlidingWindow(SLIDING_WINDOW_SIZE) -{ - traverser.traverseSubLevel(boost::bind(&CMultivariateTimeSeriesModel::acceptRestoreTraverser, - this, boost::cref(params), _1)); +CMultivariateTimeSeriesModel::CMultivariateTimeSeriesModel(const SModelRestoreParams& params, core::CStateRestoreTraverser& traverser) + : CModel(params.s_Params), m_SlidingWindow(SLIDING_WINDOW_SIZE) { + traverser.traverseSubLevel(boost::bind(&CMultivariateTimeSeriesModel::acceptRestoreTraverser, this, boost::cref(params), _1)); } -std::size_t CMultivariateTimeSeriesModel::identifier() const -{ +std::size_t CMultivariateTimeSeriesModel::identifier() const { return 0; } -CMultivariateTimeSeriesModel *CMultivariateTimeSeriesModel::clone(std::size_t /*id*/) const -{ +CMultivariateTimeSeriesModel* CMultivariateTimeSeriesModel::clone(std::size_t /*id*/) const { return new CMultivariateTimeSeriesModel{*this}; } -CMultivariateTimeSeriesModel *CMultivariateTimeSeriesModel::cloneForPersistence() const -{ +CMultivariateTimeSeriesModel* CMultivariateTimeSeriesModel::cloneForPersistence() const { return new CMultivariateTimeSeriesModel{*this}; } -CMultivariateTimeSeriesModel *CMultivariateTimeSeriesModel::cloneForForecast() const -{ +CMultivariateTimeSeriesModel* CMultivariateTimeSeriesModel::cloneForForecast() const { // Note: placeholder as there is no forecast support for multivariate time series for now return new CMultivariateTimeSeriesModel{*this}; } -bool CMultivariateTimeSeriesModel::isForecastPossible() const -{ +bool CMultivariateTimeSeriesModel::isForecastPossible() const { return false; } -void CMultivariateTimeSeriesModel::modelCorrelations(CTimeSeriesCorrelations &/*model*/) -{ +void CMultivariateTimeSeriesModel::modelCorrelations(CTimeSeriesCorrelations& /*model*/) { // no-op } -TSize2Vec1Vec CMultivariateTimeSeriesModel::correlates() const -{ +TSize2Vec1Vec CMultivariateTimeSeriesModel::correlates() const { return TSize2Vec1Vec(); } -void CMultivariateTimeSeriesModel::addBucketValue(const TTimeDouble2VecSizeTrVec &/*value*/) -{ +void CMultivariateTimeSeriesModel::addBucketValue(const TTimeDouble2VecSizeTrVec& /*value*/) { // no-op } -CMultivariateTimeSeriesModel::EUpdateResult -CMultivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams ¶ms, - TTimeDouble2VecSizeTrVec samples) -{ - if (samples.empty()) - { +CMultivariateTimeSeriesModel::EUpdateResult CMultivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams& params, + TTimeDouble2VecSizeTrVec samples) { + if (samples.empty()) { return E_Success; } @@ -2302,17 +1837,12 @@ CMultivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams ¶ms, TSizeVec valueorder(samples.size()); std::iota(valueorder.begin(), valueorder.end(), 0); - std::stable_sort(valueorder.begin(), valueorder.end(), - [&samples](std::size_t lhs, std::size_t rhs) - { - return samples[lhs].second < samples[rhs].second; - }); + std::stable_sort(valueorder.begin(), valueorder.end(), [&samples](std::size_t lhs, std::size_t rhs) { + return samples[lhs].second < samples[rhs].second; + }); TOptionalTimeDouble2VecPr randomSample; - if (TOptionalSize index = randomlySample(m_Rng, params, - this->params().bucketLength(), - valueorder)) - { + if (TOptionalSize index = randomlySample(m_Rng, params, this->params().bucketLength(), valueorder)) { randomSample.reset({samples[*index].first, samples[*index].second}); } @@ -2320,8 +1850,7 @@ CMultivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams ¶ms, maths_t::EDataType type{params.type()}; m_ResidualModel->dataType(type); - for (auto &trendModel : m_TrendModel) - { + for (auto& trendModel : m_TrendModel) { trendModel->dataType(type); } @@ -2329,26 +1858,20 @@ CMultivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams ¶ms, EUpdateResult result{this->updateTrend(params.weightStyles(), samples, params.trendWeights())}; - for (auto &sample : samples) - { - if (sample.second.size() != dimension) - { - LOG_ERROR("Unexpected sample dimension: '" - << sample.second.size() << " != " << this->dimension() << "' discarding"); + for (auto& sample : samples) { + if (sample.second.size() != dimension) { + LOG_ERROR("Unexpected sample dimension: '" << sample.second.size() << " != " << this->dimension() << "' discarding"); continue; } core_t::TTime time{sample.first}; - for (std::size_t d = 0u; d < sample.second.size(); ++d) - { + for (std::size_t d = 0u; d < sample.second.size(); ++d) { sample.second[d] = m_TrendModel[d]->detrend(time, sample.second[d], 0.0); } } - std::stable_sort(valueorder.begin(), valueorder.end(), - [&samples](std::size_t lhs, std::size_t rhs) - { - return samples[lhs].second < samples[rhs].second; - }); + std::stable_sort(valueorder.begin(), valueorder.end(), [&samples](std::size_t lhs, std::size_t rhs) { + return samples[lhs].second < samples[rhs].second; + }); TDouble10Vec1Vec samples_; TDouble10Vec4Vec1Vec weights_; @@ -2356,8 +1879,7 @@ CMultivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams ¶ms, weights_.reserve(samples.size()); TMeanAccumulator averageTime; - for (auto i : valueorder) - { + for (auto i : valueorder) { samples_.push_back(samples[i].second); weights_.push_back(unpack(params.priorWeights()[i])); averageTime.add(static_cast(samples[i].first)); @@ -2365,135 +1887,106 @@ CMultivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams ¶ms, m_ResidualModel->addSamples(params.weightStyles(), samples_, weights_); m_ResidualModel->propagateForwardsByTime(params.propagationInterval()); - if (m_AnomalyModel != nullptr) - { + if (m_AnomalyModel != nullptr) { m_AnomalyModel->propagateForwardsByTime(params.propagationInterval()); } - if (m_Controllers != nullptr) - { + if (m_Controllers != nullptr) { TDouble1VecVec errors[2]; errors[0].reserve(samples.size()); errors[1].reserve(samples.size()); - for (auto i : valueorder) - { + for (auto i : valueorder) { this->appendPredictionErrors(params.propagationInterval(), samples[i].second, errors); } { - CDecayRateController &controller{(*m_Controllers)[E_TrendControl]}; + CDecayRateController& controller{(*m_Controllers)[E_TrendControl]}; TDouble1Vec trendMean(dimension); core_t::TTime time{static_cast(CBasicStatistics::mean(averageTime))}; - for (std::size_t d = 0u; d < dimension; ++d) - { + for (std::size_t d = 0u; d < dimension; ++d) { trendMean[d] = m_TrendModel[d]->meanValue(time); } - double multiplier{controller.multiplier(trendMean, errors[E_TrendControl], - this->params().bucketLength(), - this->params().learnRate(), - this->params().decayRate())}; - if (multiplier != 1.0) - { - for (const auto &trend : m_TrendModel) - { + double multiplier{controller.multiplier( + trendMean, errors[E_TrendControl], this->params().bucketLength(), this->params().learnRate(), this->params().decayRate())}; + if (multiplier != 1.0) { + for (const auto& trend : m_TrendModel) { trend->decayRate(multiplier * trend->decayRate()); } LOG_TRACE("trend decay rate = " << m_TrendModel[0]->decayRate()); } } { - CDecayRateController &controller{(*m_Controllers)[E_ResidualControl]}; + CDecayRateController& controller{(*m_Controllers)[E_ResidualControl]}; TDouble1Vec residualMean(m_ResidualModel->marginalLikelihoodMean()); - double multiplier{controller.multiplier(residualMean, errors[E_ResidualControl], + double multiplier{controller.multiplier(residualMean, + errors[E_ResidualControl], this->params().bucketLength(), this->params().learnRate(), this->params().decayRate())}; - if (multiplier != 1.0) - { + if (multiplier != 1.0) { m_ResidualModel->decayRate(multiplier * m_ResidualModel->decayRate()); LOG_TRACE("prior decay rate = " << m_ResidualModel->decayRate()); } } } - if (randomSample) - { + if (randomSample) { m_SlidingWindow.push_back({randomSample->first, randomSample->second}); } return result; } -void CMultivariateTimeSeriesModel::skipTime(core_t::TTime gap) -{ - for (const auto &trend : m_TrendModel) - { +void CMultivariateTimeSeriesModel::skipTime(core_t::TTime gap) { + for (const auto& trend : m_TrendModel) { trend->skipTime(gap); } } CMultivariateTimeSeriesModel::TDouble2Vec -CMultivariateTimeSeriesModel::mode(core_t::TTime time, - const maths_t::TWeightStyleVec &weightStyles, - const TDouble2Vec4Vec &weights) const -{ +CMultivariateTimeSeriesModel::mode(core_t::TTime time, const maths_t::TWeightStyleVec& weightStyles, const TDouble2Vec4Vec& weights) const { std::size_t dimension = this->dimension(); TDouble2Vec result(dimension); TDouble10Vec mode(m_ResidualModel->marginalLikelihoodMode(weightStyles, unpack(weights))); - for (std::size_t d = 0u; d < dimension; ++d) - { + for (std::size_t d = 0u; d < dimension; ++d) { result[d] = mode[d] + CBasicStatistics::mean(m_TrendModel[d]->value(time)); } return result; } -CMultivariateTimeSeriesModel::TDouble2Vec1Vec -CMultivariateTimeSeriesModel::correlateModes(core_t::TTime /*time*/, - const maths_t::TWeightStyleVec &/*weightStyles*/, - const TDouble2Vec4Vec1Vec &/*weights*/) const -{ +CMultivariateTimeSeriesModel::TDouble2Vec1Vec CMultivariateTimeSeriesModel::correlateModes(core_t::TTime /*time*/, + const maths_t::TWeightStyleVec& /*weightStyles*/, + const TDouble2Vec4Vec1Vec& /*weights*/) const { return TDouble2Vec1Vec(); } -CMultivariateTimeSeriesModel::TDouble2Vec1Vec -CMultivariateTimeSeriesModel::residualModes(const maths_t::TWeightStyleVec &weightStyles, - const TDouble2Vec4Vec &weights) const -{ +CMultivariateTimeSeriesModel::TDouble2Vec1Vec CMultivariateTimeSeriesModel::residualModes(const maths_t::TWeightStyleVec& weightStyles, + const TDouble2Vec4Vec& weights) const { TDouble10Vec1Vec modes(m_ResidualModel->marginalLikelihoodModes(weightStyles, unpack(weights))); TDouble2Vec1Vec result; result.reserve(modes.size()); - for (const auto &mode : modes) - { + for (const auto& mode : modes) { result.push_back(TDouble2Vec(mode)); } return result; } -void CMultivariateTimeSeriesModel::detrend(const TTime2Vec1Vec &time_, - double confidenceInterval, - TDouble2Vec1Vec &value) const -{ +void CMultivariateTimeSeriesModel::detrend(const TTime2Vec1Vec& time_, double confidenceInterval, TDouble2Vec1Vec& value) const { std::size_t dimension{this->dimension()}; core_t::TTime time{time_[0][0]}; - for (std::size_t d = 0u; d < dimension; ++d) - { + for (std::size_t d = 0u; d < dimension; ++d) { value[0][d] = m_TrendModel[d]->detrend(time, value[0][d], confidenceInterval); } } CMultivariateTimeSeriesModel::TDouble2Vec -CMultivariateTimeSeriesModel::predict(core_t::TTime time, - const TSizeDoublePr1Vec &/*correlated*/, - TDouble2Vec hint) const -{ +CMultivariateTimeSeriesModel::predict(core_t::TTime time, const TSizeDoublePr1Vec& /*correlated*/, TDouble2Vec hint) const { using TUnivariatePriorPtr = boost::shared_ptr; std::size_t dimension{this->dimension()}; double scale{1.0 - this->params().probabilityBucketEmpty()}; - if (hint.size() == dimension) - { - for (std::size_t d = 0u; d < dimension; ++d) - { + if (hint.size() == dimension) { + for (std::size_t d = 0u; d < dimension; ++d) { hint[d] = m_TrendModel[d]->detrend(time, hint[d], 0.0); } } @@ -2503,23 +1996,19 @@ CMultivariateTimeSeriesModel::predict(core_t::TTime time, TDouble2Vec result(dimension); TDouble10Vec mean(m_ResidualModel->marginalLikelihoodMean()); - for (std::size_t d = 0u; d < dimension; --marginalize[std::min(d, dimension - 2)], ++d) - { + for (std::size_t d = 0u; d < dimension; --marginalize[std::min(d, dimension - 2)], ++d) { double trend{0.0}; - if (m_TrendModel[d]->initialized()) - { + if (m_TrendModel[d]->initialized()) { trend = CBasicStatistics::mean(m_TrendModel[d]->value(time)); } double median{mean[d]}; - if (!m_ResidualModel->isNonInformative()) - { + if (!m_ResidualModel->isNonInformative()) { TUnivariatePriorPtr marginal{m_ResidualModel->univariate(marginalize, NOTHING_TO_CONDITION).first}; - median = hint.empty() ? CBasicStatistics::mean(marginal->marginalLikelihoodConfidenceInterval(0.0)) : - marginal->nearestMarginalLikelihoodMean(hint[d]); + median = hint.empty() ? CBasicStatistics::mean(marginal->marginalLikelihoodConfidenceInterval(0.0)) + : marginal->nearestMarginalLikelihoodMean(hint[d]); } result[d] = scale * (trend + median); - if (m_IsNonNegative) - { + if (m_IsNonNegative) { result[d] = std::max(result[d], 0.0); } } @@ -2527,14 +2016,11 @@ CMultivariateTimeSeriesModel::predict(core_t::TTime time, return result; } -CMultivariateTimeSeriesModel::TDouble2Vec3Vec -CMultivariateTimeSeriesModel::confidenceInterval(core_t::TTime time, - double confidenceInterval, - const maths_t::TWeightStyleVec &weightStyles, - const TDouble2Vec4Vec &weights_) const -{ - if (m_ResidualModel->isNonInformative()) - { +CMultivariateTimeSeriesModel::TDouble2Vec3Vec CMultivariateTimeSeriesModel::confidenceInterval(core_t::TTime time, + double confidenceInterval, + const maths_t::TWeightStyleVec& weightStyles, + const TDouble2Vec4Vec& weights_) const { + if (m_ResidualModel->isNonInformative()) { return TDouble2Vec3Vec(); } @@ -2549,29 +2035,23 @@ CMultivariateTimeSeriesModel::confidenceInterval(core_t::TTime time, TDouble2Vec3Vec result(3, TDouble2Vec(dimension)); TDouble4Vec weights; - for (std::size_t d = 0u; d < dimension; --marginalize[std::min(d, dimension - 2)], ++d) - { - double trend{m_TrendModel[d]->initialized() ? - CBasicStatistics::mean(m_TrendModel[d]->value(time, confidenceInterval)) : 0.0}; + for (std::size_t d = 0u; d < dimension; --marginalize[std::min(d, dimension - 2)], ++d) { + double trend{m_TrendModel[d]->initialized() ? CBasicStatistics::mean(m_TrendModel[d]->value(time, confidenceInterval)) : 0.0}; weights.clear(); weights.reserve(weights_.size()); - for (const auto &weight : weights_) - { + for (const auto& weight : weights_) { weights.push_back(weight[d]); } TUnivariatePriorPtr marginal{m_ResidualModel->univariate(marginalize, NOTHING_TO_CONDITION).first}; - double median{CBasicStatistics::mean( - marginal->marginalLikelihoodConfidenceInterval(0.0, weightStyles, weights))}; - TDoubleDoublePr interval{ - marginal->marginalLikelihoodConfidenceInterval(confidenceInterval, weightStyles, weights)}; + double median{CBasicStatistics::mean(marginal->marginalLikelihoodConfidenceInterval(0.0, weightStyles, weights))}; + TDoubleDoublePr interval{marginal->marginalLikelihoodConfidenceInterval(confidenceInterval, weightStyles, weights)}; result[0][d] = scale * (trend + interval.first); result[1][d] = scale * (trend + median); result[2][d] = scale * (trend + interval.second); - if (m_IsNonNegative) - { + if (m_IsNonNegative) { result[0][d] = std::max(result[0][d], 0.0); result[1][d] = std::max(result[1][d], 0.0); result[2][d] = std::max(result[2][d], 0.0); @@ -2584,27 +2064,24 @@ CMultivariateTimeSeriesModel::confidenceInterval(core_t::TTime time, bool CMultivariateTimeSeriesModel::forecast(core_t::TTime /*startTime*/, core_t::TTime /*endTime*/, double /*confidenceInterval*/, - const TDouble2Vec &/*minimum*/, - const TDouble2Vec &/*maximum*/, - const TForecastPushDatapointFunc &/*forecastPushDataPointFunc*/, - std::string &messageOut) -{ + const TDouble2Vec& /*minimum*/, + const TDouble2Vec& /*maximum*/, + const TForecastPushDatapointFunc& /*forecastPushDataPointFunc*/, + std::string& messageOut) { LOG_DEBUG(forecast::ERROR_MULTIVARIATE); messageOut = forecast::ERROR_MULTIVARIATE; return false; } -bool CMultivariateTimeSeriesModel::probability(const CModelProbabilityParams ¶ms, - const TTime2Vec1Vec &time_, - const TDouble2Vec1Vec &value, - double &probability, - TTail2Vec &tail, - bool &conditional, - TSize1Vec &mostAnomalousCorrelate) const -{ +bool CMultivariateTimeSeriesModel::probability(const CModelProbabilityParams& params, + const TTime2Vec1Vec& time_, + const TDouble2Vec1Vec& value, + double& probability, + TTail2Vec& tail, + bool& conditional, + TSize1Vec& mostAnomalousCorrelate) const { TSize2Vec coordinates(params.coordinates()); - if (coordinates.empty()) - { + if (coordinates.empty()) { coordinates.resize(this->dimension()); std::iota(coordinates.begin(), coordinates.end(), 0); } @@ -2617,10 +2094,8 @@ bool CMultivariateTimeSeriesModel::probability(const CModelProbabilityParams &pa std::size_t dimension{this->dimension()}; core_t::TTime time{time_[0][0]}; TDouble10Vec1Vec sample{TDouble10Vec(dimension)}; - for (std::size_t d = 0u; d < dimension; ++d) - { - sample[0][d] = m_TrendModel[d]->detrend(time, value[0][d], - params.seasonalConfidenceInterval()); + for (std::size_t d = 0u; d < dimension; ++d) { + sample[0][d] = m_TrendModel[d]->detrend(time, value[0][d], params.seasonalConfidenceInterval()); } TDouble10Vec4Vec1Vec weights{unpack(params.weights()[0])}; bool bucketEmpty{params.bucketEmpty()[0][0]}; @@ -2633,48 +2108,32 @@ bool CMultivariateTimeSeriesModel::probability(const CModelProbabilityParams &pa TDouble10Vec2Vec pls; TDouble10Vec2Vec pus; TTail10Vec tail_; - for (std::size_t i = 0u; i < coordinates.size(); ++i) - { + for (std::size_t i = 0u; i < coordinates.size(); ++i) { maths_t::EProbabilityCalculation calculation = params.calculation(i); coordinate[0] = coordinates[i]; - if (!m_ResidualModel->probabilityOfLessLikelySamples(calculation, - params.weightStyles(), - sample, weights, coordinate, - pls, pus, tail_)) - { + if (!m_ResidualModel->probabilityOfLessLikelySamples( + calculation, params.weightStyles(), sample, weights, coordinate, pls, pus, tail_)) { LOG_ERROR("Failed to compute P(" << sample << " | weight = " << weights << ")"); return false; } - pl_[0].add(correctForEmptyBucket(calculation, value[0], - bucketEmpty, probabilityBucketEmpty, - pls[0][0])); - pu_[0].add(correctForEmptyBucket(calculation, value[0], - bucketEmpty, probabilityBucketEmpty, - pus[0][0])); - pl_[1].add(correctForEmptyBucket(calculation, value[0], - bucketEmpty, probabilityBucketEmpty, - pls[1][0])); - pu_[1].add(correctForEmptyBucket(calculation, value[0], - bucketEmpty, probabilityBucketEmpty, - pus[1][0])); + pl_[0].add(correctForEmptyBucket(calculation, value[0], bucketEmpty, probabilityBucketEmpty, pls[0][0])); + pu_[0].add(correctForEmptyBucket(calculation, value[0], bucketEmpty, probabilityBucketEmpty, pus[0][0])); + pl_[1].add(correctForEmptyBucket(calculation, value[0], bucketEmpty, probabilityBucketEmpty, pls[1][0])); + pu_[1].add(correctForEmptyBucket(calculation, value[0], bucketEmpty, probabilityBucketEmpty, pus[1][0])); tail[i] = tail_[0]; } double pl[2], pu[2]; - if ( !pl_[0].calculate(pl[0]) || !pu_[0].calculate(pu[0]) - || !pl_[1].calculate(pl[1]) || !pu_[1].calculate(pu[1])) - { + if (!pl_[0].calculate(pl[0]) || !pu_[0].calculate(pu[0]) || !pl_[1].calculate(pl[1]) || !pu_[1].calculate(pu[1])) { return false; } probability = (std::sqrt(pl[0] * pl[1]) + std::sqrt(pu[0] * pu[1])) / 2.0; - if (m_AnomalyModel != nullptr) - { + if (m_AnomalyModel != nullptr) { TDouble2Vec residual(dimension); TDouble10Vec nearest(m_ResidualModel->nearestMarginalLikelihoodMean(sample[0])); TDouble2Vec scale(this->seasonalWeight(0.0, time)); - for (std::size_t i = 0u; i < dimension; ++i) - { + for (std::size_t i = 0u; i < dimension; ++i) { residual[i] = (sample[0][i] - nearest[i]) / std::max(std::sqrt(scale[i]), 1.0); } m_AnomalyModel->updateAnomaly(params, time, residual, probability); @@ -2686,43 +2145,34 @@ bool CMultivariateTimeSeriesModel::probability(const CModelProbabilityParams &pa } CMultivariateTimeSeriesModel::TDouble2Vec -CMultivariateTimeSeriesModel::winsorisationWeight(double derate, - core_t::TTime time, - const TDouble2Vec &value) const -{ +CMultivariateTimeSeriesModel::winsorisationWeight(double derate, core_t::TTime time, const TDouble2Vec& value) const { TDouble2Vec result(this->dimension()); std::size_t dimension{this->dimension()}; TDouble2Vec scale(this->seasonalWeight(0.0, time)); TDouble10Vec sample(dimension); - for (std::size_t d = 0u; d < dimension; ++d) - { + for (std::size_t d = 0u; d < dimension; ++d) { sample[d] = m_TrendModel[d]->detrend(time, value[d], 0.0); } - for (std::size_t d = 0u; d < dimension; ++d) - { + for (std::size_t d = 0u; d < dimension; ++d) { result[d] = tailWinsorisationWeight(*m_ResidualModel, d, derate, scale[d], sample); } return result; } -CMultivariateTimeSeriesModel::TDouble2Vec -CMultivariateTimeSeriesModel::seasonalWeight(double confidence, core_t::TTime time) const -{ +CMultivariateTimeSeriesModel::TDouble2Vec CMultivariateTimeSeriesModel::seasonalWeight(double confidence, core_t::TTime time) const { TDouble2Vec result(this->dimension()); TDouble10Vec variances(m_ResidualModel->marginalLikelihoodVariances()); - for (std::size_t d = 0u, dimension = this->dimension(); d < dimension; ++d) - { + for (std::size_t d = 0u, dimension = this->dimension(); d < dimension; ++d) { double scale{m_TrendModel[d]->scale(time, variances[d], confidence).second}; result[d] = std::max(scale, this->params().minimumSeasonalVarianceScale()); } return result; } -uint64_t CMultivariateTimeSeriesModel::checksum(uint64_t seed) const -{ +uint64_t CMultivariateTimeSeriesModel::checksum(uint64_t seed) const { seed = CChecksum::calculate(seed, m_IsNonNegative); seed = CChecksum::calculate(seed, m_Controllers); seed = CChecksum::calculate(seed, m_TrendModel); @@ -2731,8 +2181,7 @@ uint64_t CMultivariateTimeSeriesModel::checksum(uint64_t seed) const return CChecksum::calculate(seed, m_SlidingWindow); } -void CMultivariateTimeSeriesModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CMultivariateTimeSeriesModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CUnivariateTimeSeriesModel"); core::CMemoryDebug::dynamicSize("m_Controllers", m_Controllers, mem); core::CMemoryDebug::dynamicSize("m_TrendModel", m_TrendModel, mem); @@ -2741,23 +2190,16 @@ void CMultivariateTimeSeriesModel::debugMemoryUsage(core::CMemoryUsage::TMemoryU core::CMemoryDebug::dynamicSize("m_SlidingWindow", m_SlidingWindow, mem); } -std::size_t CMultivariateTimeSeriesModel::memoryUsage() const -{ - return core::CMemory::dynamicSize(m_Controllers) - + core::CMemory::dynamicSize(m_TrendModel) - + core::CMemory::dynamicSize(m_ResidualModel) - + core::CMemory::dynamicSize(m_AnomalyModel) - + core::CMemory::dynamicSize(m_SlidingWindow); +std::size_t CMultivariateTimeSeriesModel::memoryUsage() const { + return core::CMemory::dynamicSize(m_Controllers) + core::CMemory::dynamicSize(m_TrendModel) + + core::CMemory::dynamicSize(m_ResidualModel) + core::CMemory::dynamicSize(m_AnomalyModel) + + core::CMemory::dynamicSize(m_SlidingWindow); } -bool CMultivariateTimeSeriesModel::acceptRestoreTraverser(const SModelRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser) -{ - if (traverser.name() == VERSION_6_3_TAG) - { - while (traverser.next()) - { - const std::string &name{traverser.name()}; +bool CMultivariateTimeSeriesModel::acceptRestoreTraverser(const SModelRestoreParams& params, core::CStateRestoreTraverser& traverser) { + if (traverser.name() == VERSION_6_3_TAG) { + while (traverser.next()) { + const std::string& name{traverser.name()}; RESTORE_BOOL(IS_NON_NEGATIVE_6_3_TAG, m_IsNonNegative) RESTORE(RNG_6_3_TAG, m_Rng.fromString(traverser.value())) RESTORE_SETUP_TEARDOWN(CONTROLLER_6_3_TAG, @@ -2766,29 +2208,24 @@ bool CMultivariateTimeSeriesModel::acceptRestoreTraverser(const SModelRestorePar /**/) RESTORE_SETUP_TEARDOWN(TREND_MODEL_6_3_TAG, m_TrendModel.push_back(TDecompositionPtr()), - traverser.traverseSubLevel(boost::bind( - CTimeSeriesDecompositionStateSerialiser(), - boost::cref(params.s_DecompositionParams), - boost::ref(m_TrendModel.back()), _1)), + traverser.traverseSubLevel(boost::bind(CTimeSeriesDecompositionStateSerialiser(), + boost::cref(params.s_DecompositionParams), + boost::ref(m_TrendModel.back()), + _1)), /**/) - RESTORE(RESIDUAL_MODEL_6_3_TAG, traverser.traverseSubLevel(boost::bind( - CPriorStateSerialiser(), - boost::cref(params.s_DistributionParams), - boost::ref(m_ResidualModel), _1))) + RESTORE(RESIDUAL_MODEL_6_3_TAG, + traverser.traverseSubLevel(boost::bind( + CPriorStateSerialiser(), boost::cref(params.s_DistributionParams), boost::ref(m_ResidualModel), _1))) RESTORE_SETUP_TEARDOWN(ANOMALY_MODEL_6_3_TAG, m_AnomalyModel = boost::make_shared(), - traverser.traverseSubLevel(boost::bind(&CTimeSeriesAnomalyModel::acceptRestoreTraverser, - m_AnomalyModel.get(), boost::cref(params), _1)), + traverser.traverseSubLevel(boost::bind( + &CTimeSeriesAnomalyModel::acceptRestoreTraverser, m_AnomalyModel.get(), boost::cref(params), _1)), /**/) - RESTORE(SLIDING_WINDOW_6_3_TAG, - core::CPersistUtils::restore(SLIDING_WINDOW_6_3_TAG, m_SlidingWindow, traverser)) + RESTORE(SLIDING_WINDOW_6_3_TAG, core::CPersistUtils::restore(SLIDING_WINDOW_6_3_TAG, m_SlidingWindow, traverser)) } - } - else - { - do - { - const std::string &name{traverser.name()}; + } else { + do { + const std::string& name{traverser.name()}; RESTORE_BOOL(IS_NON_NEGATIVE_OLD_TAG, m_IsNonNegative) RESTORE_SETUP_TEARDOWN(CONTROLLER_OLD_TAG, m_Controllers = boost::make_shared(), @@ -2796,85 +2233,67 @@ bool CMultivariateTimeSeriesModel::acceptRestoreTraverser(const SModelRestorePar /**/) RESTORE_SETUP_TEARDOWN(TREND_OLD_TAG, m_TrendModel.push_back(TDecompositionPtr()), - traverser.traverseSubLevel(boost::bind( - CTimeSeriesDecompositionStateSerialiser(), - boost::cref(params.s_DecompositionParams), - boost::ref(m_TrendModel.back()), _1)), + traverser.traverseSubLevel(boost::bind(CTimeSeriesDecompositionStateSerialiser(), + boost::cref(params.s_DecompositionParams), + boost::ref(m_TrendModel.back()), + _1)), /**/) - RESTORE(PRIOR_OLD_TAG, traverser.traverseSubLevel(boost::bind( - CPriorStateSerialiser(), - boost::cref(params.s_DistributionParams), - boost::ref(m_ResidualModel), _1))) + RESTORE(PRIOR_OLD_TAG, + traverser.traverseSubLevel(boost::bind( + CPriorStateSerialiser(), boost::cref(params.s_DistributionParams), boost::ref(m_ResidualModel), _1))) RESTORE_SETUP_TEARDOWN(ANOMALY_MODEL_OLD_TAG, m_AnomalyModel = boost::make_shared(), - traverser.traverseSubLevel(boost::bind(&CTimeSeriesAnomalyModel::acceptRestoreTraverser, - m_AnomalyModel.get(), boost::cref(params), _1)), + traverser.traverseSubLevel(boost::bind( + &CTimeSeriesAnomalyModel::acceptRestoreTraverser, m_AnomalyModel.get(), boost::cref(params), _1)), /**/) - } - while (traverser.next()); + } while (traverser.next()); } return true; } -void CMultivariateTimeSeriesModel::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CMultivariateTimeSeriesModel::acceptPersistInserter(core::CStatePersistInserter& inserter) const { // Note that we don't persist this->params() because that state // is reinitialized. inserter.insertValue(VERSION_6_3_TAG, ""); inserter.insertValue(IS_NON_NEGATIVE_6_3_TAG, static_cast(m_IsNonNegative)); - if (m_Controllers) - { + if (m_Controllers) { core::CPersistUtils::persist(CONTROLLER_6_3_TAG, *m_Controllers, inserter); } - for (const auto &trend : m_TrendModel) - { - inserter.insertLevel(TREND_MODEL_6_3_TAG, boost::bind(CTimeSeriesDecompositionStateSerialiser(), - boost::cref(*trend), _1)); + for (const auto& trend : m_TrendModel) { + inserter.insertLevel(TREND_MODEL_6_3_TAG, boost::bind(CTimeSeriesDecompositionStateSerialiser(), boost::cref(*trend), _1)); } - inserter.insertLevel(RESIDUAL_MODEL_6_3_TAG, boost::bind(CPriorStateSerialiser(), - boost::cref(*m_ResidualModel), _1)); - if (m_AnomalyModel != nullptr) - { - inserter.insertLevel(ANOMALY_MODEL_6_3_TAG, - boost::bind(&CTimeSeriesAnomalyModel::acceptPersistInserter, - m_AnomalyModel.get(), _1)); + inserter.insertLevel(RESIDUAL_MODEL_6_3_TAG, boost::bind(CPriorStateSerialiser(), boost::cref(*m_ResidualModel), _1)); + if (m_AnomalyModel != nullptr) { + inserter.insertLevel(ANOMALY_MODEL_6_3_TAG, boost::bind(&CTimeSeriesAnomalyModel::acceptPersistInserter, m_AnomalyModel.get(), _1)); } core::CPersistUtils::persist(SLIDING_WINDOW_6_3_TAG, m_SlidingWindow, inserter); } -maths_t::EDataType CMultivariateTimeSeriesModel::dataType() const -{ +maths_t::EDataType CMultivariateTimeSeriesModel::dataType() const { return m_ResidualModel->dataType(); } -CMultivariateTimeSeriesModel::TDouble10Vec4Vec CMultivariateTimeSeriesModel::unpack(const TDouble2Vec4Vec &weights) -{ +CMultivariateTimeSeriesModel::TDouble10Vec4Vec CMultivariateTimeSeriesModel::unpack(const TDouble2Vec4Vec& weights) { TDouble10Vec4Vec result; result.reserve(weights.size()); - for (const auto &weight : weights) - { + for (const auto& weight : weights) { result.emplace_back(weight); } return result; } void CMultivariateTimeSeriesModel::reinitializeResidualModel(double learnRate, - const TDecompositionPtr10Vec &trend, - const TTimeDouble2VecPrCBuf &slidingWindow, - CMultivariatePrior &residualModel) -{ + const TDecompositionPtr10Vec& trend, + const TTimeDouble2VecPrCBuf& slidingWindow, + CMultivariatePrior& residualModel) { residualModel.setToNonInformative(0.0, residualModel.decayRate()); - if (!slidingWindow.empty()) - { + if (!slidingWindow.empty()) { std::size_t dimension{residualModel.dimension()}; double slidingWindowLength{static_cast(slidingWindow.size())}; - TDouble10Vec4Vec1Vec weight{{TDouble10Vec( - dimension, std::max(learnRate, std::min(5.0 / slidingWindowLength, 1.0)))}}; - for (const auto &value : slidingWindow) - { + TDouble10Vec4Vec1Vec weight{{TDouble10Vec(dimension, std::max(learnRate, std::min(5.0 / slidingWindowLength, 1.0)))}}; + for (const auto& value : slidingWindow) { TDouble10Vec1Vec sample{TDouble10Vec(dimension)}; - for (std::size_t i = 0u; i < dimension; ++i) - { + for (std::size_t i = 0u; i < dimension; ++i) { sample[0][i] = trend[i]->detrend(value.first, value.second[i], 0.0); } residualModel.addSamples(CConstantWeights::COUNT, sample, weight); @@ -2882,34 +2301,26 @@ void CMultivariateTimeSeriesModel::reinitializeResidualModel(double learnRate, } } -const CMultivariateTimeSeriesModel::TTimeDouble2VecPrCBuf &CMultivariateTimeSeriesModel::slidingWindow() const -{ +const CMultivariateTimeSeriesModel::TTimeDouble2VecPrCBuf& CMultivariateTimeSeriesModel::slidingWindow() const { return m_SlidingWindow; } -const CMultivariateTimeSeriesModel::TDecompositionPtr10Vec &CMultivariateTimeSeriesModel::trendModel() const -{ +const CMultivariateTimeSeriesModel::TDecompositionPtr10Vec& CMultivariateTimeSeriesModel::trendModel() const { return m_TrendModel; } -const CMultivariatePrior &CMultivariateTimeSeriesModel::residualModel() const -{ +const CMultivariatePrior& CMultivariateTimeSeriesModel::residualModel() const { return *m_ResidualModel; } -CMultivariateTimeSeriesModel::EUpdateResult -CMultivariateTimeSeriesModel::updateTrend(const maths_t::TWeightStyleVec &weightStyles, - const TTimeDouble2VecSizeTrVec &samples, - const TDouble2Vec4VecVec &weights) -{ +CMultivariateTimeSeriesModel::EUpdateResult CMultivariateTimeSeriesModel::updateTrend(const maths_t::TWeightStyleVec& weightStyles, + const TTimeDouble2VecSizeTrVec& samples, + const TDouble2Vec4VecVec& weights) { std::size_t dimension{this->dimension()}; - for (const auto &sample : samples) - { - if (sample.second.size() != dimension) - { - LOG_ERROR("Dimension mismatch: '" - << sample.second.size() << " != " << m_TrendModel.size() << "'"); + for (const auto& sample : samples) { + if (sample.second.size() != dimension) { + LOG_ERROR("Dimension mismatch: '" << sample.second.size() << " != " << m_TrendModel.size() << "'"); return E_Failure; } } @@ -2918,86 +2329,61 @@ CMultivariateTimeSeriesModel::updateTrend(const maths_t::TWeightStyleVec &weight // or for count feature, the times of all samples will be the same. TSizeVec timeorder(samples.size()); std::iota(timeorder.begin(), timeorder.end(), 0); - std::stable_sort(timeorder.begin(), timeorder.end(), - [&samples](std::size_t lhs, std::size_t rhs) - { - return COrderings::lexicographical_compare(samples[lhs].first, - samples[lhs].second, - samples[rhs].first, - samples[rhs].second); - }); + std::stable_sort(timeorder.begin(), timeorder.end(), [&samples](std::size_t lhs, std::size_t rhs) { + return COrderings::lexicographical_compare(samples[lhs].first, samples[lhs].second, samples[rhs].first, samples[rhs].second); + }); EUpdateResult result{E_Success}; { TDouble4Vec weight(weightStyles.size()); - for (auto i : timeorder) - { + for (auto i : timeorder) { core_t::TTime time{samples[i].first}; TDouble10Vec value(samples[i].second); - for (std::size_t d = 0u; d < dimension; ++d) - { - for (std::size_t j = 0u; j < weights[i].size(); ++j) - { + for (std::size_t d = 0u; d < dimension; ++d) { + for (std::size_t j = 0u; j < weights[i].size(); ++j) { weight[j] = weights[i][j][d]; } - if (m_TrendModel[d]->addPoint(time, value[d], weightStyles, weight)) - { + if (m_TrendModel[d]->addPoint(time, value[d], weightStyles, weight)) { result = E_Reset; } } } } - if (result == E_Reset) - { + if (result == E_Reset) { this->reinitializeStateGivenNewComponent(); } return result; } -void CMultivariateTimeSeriesModel::appendPredictionErrors(double interval, - const TDouble2Vec &sample, - TDouble1VecVec (&result)[2]) -{ - if (auto error = predictionError(interval, m_ResidualModel, sample)) - { +void CMultivariateTimeSeriesModel::appendPredictionErrors(double interval, const TDouble2Vec& sample, TDouble1VecVec (&result)[2]) { + if (auto error = predictionError(interval, m_ResidualModel, sample)) { result[E_ResidualControl].push_back(*error); } - if (auto error = predictionError(m_TrendModel, sample)) - { + if (auto error = predictionError(m_TrendModel, sample)) { result[E_TrendControl].push_back(*error); } } <<<<<<< HEAD -void CMultivariateTimeSeriesModel::reinitializeStateGivenNewComponent() -{ - reinitializeResidualModel(this->params().learnRate(), - m_TrendModel, m_SlidingWindow, *m_ResidualModel); - if (m_Controllers != nullptr) - { - m_ResidualModel->decayRate( m_ResidualModel->decayRate() - / (*m_Controllers)[E_ResidualControl].multiplier()); - for (auto &trend : m_TrendModel) - { - trend->decayRate( trend->decayRate() - / (*m_Controllers)[E_TrendControl].multiplier()); - } - for (auto &controller : *m_Controllers) - { +void CMultivariateTimeSeriesModel::reinitializeStateGivenNewComponent() { + reinitializeResidualModel(this->params().learnRate(), m_TrendModel, m_SlidingWindow, *m_ResidualModel); + if (m_Controllers != nullptr) { + m_ResidualModel->decayRate(m_ResidualModel->decayRate() / (*m_Controllers)[E_ResidualControl].multiplier()); + for (auto& trend : m_TrendModel) { + trend->decayRate(trend->decayRate() / (*m_Controllers)[E_TrendControl].multiplier()); + } + for (auto& controller : *m_Controllers) { controller.reset(); } } - if (m_AnomalyModel != nullptr) - { + if (m_AnomalyModel != nullptr) { m_AnomalyModel->reset(); } } -std::size_t CMultivariateTimeSeriesModel::dimension() const -{ +std::size_t CMultivariateTimeSeriesModel::dimension() const { return m_ResidualModel->dimension(); } - } } diff --git a/lib/maths/CTools.cc b/lib/maths/CTools.cc index f69858676e..ade2194afc 100644 --- a/lib/maths/CTools.cc +++ b/lib/maths/CTools.cc @@ -8,13 +8,13 @@ #include #include -#include #include +#include #include #include -#include #include +#include #include #include #include @@ -30,68 +30,53 @@ #include #include #include -#include #include #include #include +#include #include #include #include - -namespace boost -{ -namespace math -{ -namespace policies -{ +namespace boost { +namespace math { +namespace policies { template -T user_overflow_error(const char */*function*/, const char */*message*/, const T &/*val*/) -{ +T user_overflow_error(const char* /*function*/, const char* /*message*/, const T& /*val*/) { return boost::numeric::bounds::highest(); } - } } } -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { -namespace -{ +namespace { using TDoubleBoolPr = std::pair; using TDoubleDoublePr = std::pair; using TOptionalDoubleDoublePr = boost::optional; -namespace adapters -{ +namespace adapters { template -inline double pdf(const DISTRIBUTION &distribution, double x) -{ +inline double pdf(const DISTRIBUTION& distribution, double x) { return CTools::safePdf(distribution, x); } -inline double pdf(const CLogTDistribution &distribution, double x) -{ +inline double pdf(const CLogTDistribution& distribution, double x) { return ml::maths::pdf(distribution, x); } } // adapters:: -inline TDoubleBoolPr stationaryPoint(const boost::math::beta_distribution<> &beta) -{ - if (beta.alpha() < 1.0 && beta.beta() < 1.0) - { +inline TDoubleBoolPr stationaryPoint(const boost::math::beta_distribution<>& beta) { + if (beta.alpha() < 1.0 && beta.beta() < 1.0) { // This is the unique minimum of the p.d.f. - return { (beta.alpha() - 1.0) - / (beta.alpha() + beta.beta() - 2.0), false}; + return {(beta.alpha() - 1.0) / (beta.alpha() + beta.beta() - 2.0), false}; } return {boost::math::mode(beta), true}; } @@ -103,41 +88,29 @@ inline TDoubleBoolPr stationaryPoint(const boost::math::beta_distribution<> &bet //! of the p.d.f. function. This is used to adapt the function for use //! with the boost::math solvers. template -class CPdf -{ - public: - CPdf(const DISTRIBUTION &distribution, double target) : - m_Distribution(distribution), - m_Target(target) - {} - - double operator()(double x) const - { - return adapters::pdf(m_Distribution, x) - m_Target; - } +class CPdf { +public: + CPdf(const DISTRIBUTION& distribution, double target) : m_Distribution(distribution), m_Target(target) {} + + double operator()(double x) const { return adapters::pdf(m_Distribution, x) - m_Target; } - private: - DISTRIBUTION m_Distribution; - double m_Target; +private: + DISTRIBUTION m_Distribution; + double m_Target; }; //! Convenience factory method for the CPdf object for \p distribution. template -inline CPdf makePdf(const DISTRIBUTION &distribution, double target) -{ +inline CPdf makePdf(const DISTRIBUTION& distribution, double target) { return CPdf(distribution, target); } template -inline double continuousSafePdf(const Distribution &distribution, double x) -{ +inline double continuousSafePdf(const Distribution& distribution, double x) { TDoubleDoublePr support = boost::math::support(distribution); - if (x <= support.first || x >= support.second) - { + if (x <= support.first || x >= support.second) { return 0.0; - } - else if (CMathsFuncs::isNan(x)) - { + } else if (CMathsFuncs::isNan(x)) { LOG_ERROR("x = NaN, distribution = " << typeid(Distribution).name()); return 0.0; } @@ -145,19 +118,15 @@ inline double continuousSafePdf(const Distribution &distribution, double x) } template -inline double discreteSafePdf(const Distribution &distribution, double x) -{ +inline double discreteSafePdf(const Distribution& distribution, double x) { // Note that the inequalities are strict this is needed because // the distribution is discrete and can have mass at the support // end points. TDoubleDoublePr support = boost::math::support(distribution); - if (x < support.first || x > support.second) - { + if (x < support.first || x > support.second) { return 0.0; - } - else if (CMathsFuncs::isNan(x)) - { + } else if (CMathsFuncs::isNan(x)) { LOG_ERROR("x = NaN, distribution = " << typeid(Distribution).name()); return 0.0; } @@ -165,19 +134,13 @@ inline double discreteSafePdf(const Distribution &distribution, double x) } template -inline double continuousSafeCdf(const Distribution &distribution, double x) -{ +inline double continuousSafeCdf(const Distribution& distribution, double x) { TDoubleDoublePr support = boost::math::support(distribution); - if (x <= support.first) - { + if (x <= support.first) { return 0.0; - } - else if (x >= support.second) - { + } else if (x >= support.second) { return 1.0; - } - else if (CMathsFuncs::isNan(x)) - { + } else if (CMathsFuncs::isNan(x)) { LOG_ERROR("x = NaN, distribution = " << typeid(Distribution).name()); return 0.0; } @@ -185,23 +148,17 @@ inline double continuousSafeCdf(const Distribution &distribution, double x) } template -inline double discreteSafeCdf(const Distribution &distribution, double x) -{ +inline double discreteSafeCdf(const Distribution& distribution, double x) { // Note that the inequalities are strict this is needed because // the distribution is discrete and can have mass at the support // end points. TDoubleDoublePr support = boost::math::support(distribution); - if (x < support.first) - { + if (x < support.first) { return 0.0; - } - else if (x > support.second) - { + } else if (x > support.second) { return 1.0; - } - else if (CMathsFuncs::isNan(x)) - { + } else if (CMathsFuncs::isNan(x)) { LOG_ERROR("x = NaN, distribution = " << typeid(Distribution).name()); return 0.0; } @@ -209,19 +166,13 @@ inline double discreteSafeCdf(const Distribution &distribution, double x) } template -inline double continuousSafeCdfComplement(const Distribution &distribution, double x) -{ +inline double continuousSafeCdfComplement(const Distribution& distribution, double x) { TDoubleDoublePr support = boost::math::support(distribution); - if (x <= support.first) - { + if (x <= support.first) { return 1.0; - } - else if (x >= support.second) - { + } else if (x >= support.second) { return 0.0; - } - else if (CMathsFuncs::isNan(x)) - { + } else if (CMathsFuncs::isNan(x)) { LOG_ERROR("x = NaN, distribution = " << typeid(Distribution).name()); return 0.0; } @@ -229,23 +180,17 @@ inline double continuousSafeCdfComplement(const Distribution &distribution, doub } template -inline double discreteSafeCdfComplement(const Distribution &distribution, double x) -{ +inline double discreteSafeCdfComplement(const Distribution& distribution, double x) { // Note that the inequalities are strict this is needed because // the distribution is discrete and can have mass at the support // end points. TDoubleDoublePr support = boost::math::support(distribution); - if (x < support.first) - { + if (x < support.first) { return 1.0; - } - else if (x > support.second) - { + } else if (x > support.second) { return 0.0; - } - else if (CMathsFuncs::isNan(x)) - { + } else if (CMathsFuncs::isNan(x)) { LOG_ERROR("x = NaN distribution = " << typeid(Distribution).name()); return 0.0; } @@ -259,17 +204,13 @@ const double POS_INF = boost::numeric::bounds::highest(); } // unnamed:: - //////// SMinusLogCdf Implementation //////// -namespace -{ +namespace { //! Computes -log(\p cdf) enforces limits and avoids underflow. -inline double safeMinusLogCdf(double cdf) -{ - if (cdf == 0.0) - { +inline double safeMinusLogCdf(double cdf) { + if (cdf == 0.0) { // log(0.0) == -HUGE_VALUE, which is too big for our purposes // and causes problems on Windows. In fact, we want to avoid // underflow since this will pollute the floating point @@ -281,133 +222,103 @@ inline double safeMinusLogCdf(double cdf) } return std::max(-std::log(cdf), 0.0); } - } const double CTools::IMPROPER_CDF(0.5); -double CTools::SMinusLogCdf::operator()(const SImproperDistribution &, double) const -{ +double CTools::SMinusLogCdf::operator()(const SImproperDistribution&, double) const { return -std::log(IMPROPER_CDF); } -double CTools::SMinusLogCdf::operator()(const normal &normal_, double x) const -{ +double CTools::SMinusLogCdf::operator()(const normal& normal_, double x) const { return safeMinusLogCdf(safeCdf(normal_, x)); } -double CTools::SMinusLogCdf::operator()(const students_t &students, double x) const -{ +double CTools::SMinusLogCdf::operator()(const students_t& students, double x) const { return safeMinusLogCdf(safeCdf(students, x)); } -double CTools::SMinusLogCdf::operator()(const negative_binomial &negativeBinomial, double x) const -{ +double CTools::SMinusLogCdf::operator()(const negative_binomial& negativeBinomial, double x) const { return safeMinusLogCdf(safeCdf(negativeBinomial, x)); } -double CTools::SMinusLogCdf::operator()(const lognormal &logNormal, double x) const -{ +double CTools::SMinusLogCdf::operator()(const lognormal& logNormal, double x) const { return safeMinusLogCdf(safeCdf(logNormal, x)); } -double CTools::SMinusLogCdf::operator()(const CLogTDistribution &logt, double x) const -{ +double CTools::SMinusLogCdf::operator()(const CLogTDistribution& logt, double x) const { return safeMinusLogCdf(maths::cdf(logt, x)); } -double CTools::SMinusLogCdf::operator()(const gamma &gamma_, double x) const -{ +double CTools::SMinusLogCdf::operator()(const gamma& gamma_, double x) const { return safeMinusLogCdf(safeCdf(gamma_, x)); } -double CTools::SMinusLogCdf::operator()(const beta &beta_, double x) const -{ +double CTools::SMinusLogCdf::operator()(const beta& beta_, double x) const { return safeMinusLogCdf(safeCdf(beta_, x)); } - //////// SMinusLogCdfComplement Implementation //////// -double CTools::SMinusLogCdfComplement::operator()(const SImproperDistribution &, double) const -{ +double CTools::SMinusLogCdfComplement::operator()(const SImproperDistribution&, double) const { return -std::log(1.0 - IMPROPER_CDF); } -double CTools::SMinusLogCdfComplement::operator()(const normal &normal_, double x) const -{ +double CTools::SMinusLogCdfComplement::operator()(const normal& normal_, double x) const { return safeMinusLogCdf(safeCdfComplement(normal_, x)); } -double CTools::SMinusLogCdfComplement::operator()(const students_t &students, double x) const -{ +double CTools::SMinusLogCdfComplement::operator()(const students_t& students, double x) const { return safeMinusLogCdf(safeCdfComplement(students, x)); } -double CTools::SMinusLogCdfComplement::operator()(const negative_binomial &negativeBinomial, double x) const -{ +double CTools::SMinusLogCdfComplement::operator()(const negative_binomial& negativeBinomial, double x) const { return safeMinusLogCdf(safeCdfComplement(negativeBinomial, x)); } -double CTools::SMinusLogCdfComplement::operator()(const lognormal &logNormal, double x) const -{ +double CTools::SMinusLogCdfComplement::operator()(const lognormal& logNormal, double x) const { return safeMinusLogCdf(safeCdfComplement(logNormal, x)); } -double CTools::SMinusLogCdfComplement::operator()(const CLogTDistribution &logt, double x) const -{ +double CTools::SMinusLogCdfComplement::operator()(const CLogTDistribution& logt, double x) const { return safeMinusLogCdf(maths::cdfComplement(logt, x)); } -double CTools::SMinusLogCdfComplement::operator()(const gamma &gamma_, double x) const -{ +double CTools::SMinusLogCdfComplement::operator()(const gamma& gamma_, double x) const { return safeMinusLogCdf(safeCdfComplement(gamma_, x)); } -double CTools::SMinusLogCdfComplement::operator()(const beta &beta_, double x) const -{ +double CTools::SMinusLogCdfComplement::operator()(const beta& beta_, double x) const { return safeMinusLogCdf(safeCdfComplement(beta_, x)); } - //////// SProbabilityLessLikelySample Implementation //////// -CTools::CProbabilityOfLessLikelySample::CProbabilityOfLessLikelySample( - maths_t::EProbabilityCalculation calculation) : - m_Calculation(calculation) -{ +CTools::CProbabilityOfLessLikelySample::CProbabilityOfLessLikelySample(maths_t::EProbabilityCalculation calculation) + : m_Calculation(calculation) { } -double CTools::CProbabilityOfLessLikelySample::operator()(const SImproperDistribution &, - double, - maths_t::ETail &tail) const -{ +double CTools::CProbabilityOfLessLikelySample::operator()(const SImproperDistribution&, double, maths_t::ETail& tail) const { // For any finite sample this is one. tail = maths_t::E_MixedOrNeitherTail; return 1.0; } -double CTools::CProbabilityOfLessLikelySample::operator()(const normal &normal_, - double x, - maths_t::ETail &tail) const -{ +double CTools::CProbabilityOfLessLikelySample::operator()(const normal& normal_, double x, maths_t::ETail& tail) const { double px = 0.0; TDoubleDoublePr support = boost::math::support(normal_); - if (!this->check(support, x, px, tail)) - { + if (!this->check(support, x, px, tail)) { return px; } - switch (m_Calculation) - { + switch (m_Calculation) { case maths_t::E_OneSidedBelow: px = truncate(2.0 * safeCdf(normal_, x), 0.0, 1.0); tail = static_cast(tail | maths_t::E_LeftTail); break; - case maths_t::E_TwoSided: - { + case maths_t::E_TwoSided: { // The normal distribution is symmetric and single mode so the // probability of less likely events than x is: // 2 * std::min(cdf(x), 1 - cdf(x)). @@ -416,12 +327,9 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const normal &normal_, // so that we aren't restricted to epsilon precision. double m = boost::math::mode(normal_); - if (x < m) - { + if (x < m) { px = truncate(2.0 * safeCdf(normal_, x), 0.0, 1.0); - } - else - { + } else { px = truncate(2.0 * safeCdfComplement(normal_, x), 0.0, 1.0); } this->tail(x, m, tail); @@ -436,27 +344,21 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const normal &normal_, return px; } -double CTools::CProbabilityOfLessLikelySample::operator()(const students_t &students, - double x, - maths_t::ETail &tail) const -{ +double CTools::CProbabilityOfLessLikelySample::operator()(const students_t& students, double x, maths_t::ETail& tail) const { double px = 0.0; TDoubleDoublePr support = boost::math::support(students); - if (!this->check(support, x, px, tail)) - { + if (!this->check(support, x, px, tail)) { return px; } - switch (m_Calculation) - { + switch (m_Calculation) { case maths_t::E_OneSidedBelow: px = truncate(2.0 * safeCdf(students, x), 0.0, 1.0); tail = static_cast(tail | maths_t::E_LeftTail); break; - case maths_t::E_TwoSided: - { + case maths_t::E_TwoSided: { // Student's t distribution is symmetric and single mode so the // probability of less likely events than x is: // 2 * std::min(cdf(x), 1 - cdf(x)). @@ -464,12 +366,9 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const students_t &stud // Note, we use the complement function to compute the 1 - cdf(x) // so that we aren't restricted to epsilon precision. double m = boost::math::mode(students); - if (x < m) - { + if (x < m) { px = truncate(2.0 * safeCdf(students, x), 0.0, 1.0); - } - else - { + } else { px = truncate(2.0 * safeCdfComplement(students, x), 0.0, 1.0); } this->tail(x, m, tail); @@ -484,22 +383,17 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const students_t &stud return px; } -double CTools::CProbabilityOfLessLikelySample::operator()(const negative_binomial &negativeBinomial, - double x, - maths_t::ETail &tail) const -{ +double CTools::CProbabilityOfLessLikelySample::operator()(const negative_binomial& negativeBinomial, double x, maths_t::ETail& tail) const { x = std::floor(x); double px = 0.0; TDoubleDoublePr support = boost::math::support(negativeBinomial); - if (!this->check(support, x, px, tail)) - { + if (!this->check(support, x, px, tail)) { return px; } - switch (m_Calculation) - { + switch (m_Calculation) { case maths_t::E_OneSidedBelow: tail = static_cast(tail | maths_t::E_LeftTail); return truncate(2.0 * safeCdf(negativeBinomial, x), 0.0, 1.0); @@ -510,8 +404,7 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const negative_binomia case maths_t::E_OneSidedAbove: tail = static_cast(tail | maths_t::E_RightTail); - return truncate(2.0 * ( safeCdfComplement(negativeBinomial, x) - + safePdf(negativeBinomial, x)), 0.0, 1.0); + return truncate(2.0 * (safeCdfComplement(negativeBinomial, x) + safePdf(negativeBinomial, x)), 0.0, 1.0); } double fx = safePdf(negativeBinomial, x); @@ -520,10 +413,8 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const negative_binomia double m = boost::math::mode(negativeBinomial); LOG_TRACE("x = " << x << ", f(x) = " << fx); - // If the number of successes <= 1 the distribution is single sided. - if (r <= 1.0) - { + if (r <= 1.0) { tail = maths_t::E_RightTail; return truncate(safeCdfComplement(negativeBinomial, x) + fx, 0.0, 1.0); } @@ -532,12 +423,10 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const negative_binomia // If the f(x) <= f(0) and x is greater than the mode the probability // is just P(y > x). - if (x > m) - { + if (x > m) { double f0 = safePdf(negativeBinomial, 0.0); LOG_TRACE("f(0) = " << f0); - if (fx <= f0) - { + if (fx <= f0) { return truncate(safeCdfComplement(negativeBinomial, x) + fx, 0.0, 1.0); } } @@ -547,8 +436,7 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const negative_binomia LOG_TRACE("m = " << m << ", f(m) = " << fm); - if (fx >= fm) - { + if (fx >= fm) { return 1.0; } @@ -556,17 +444,14 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const negative_binomia std::size_t maxIterations = MAX_ITERATIONS; double b1, b2, f1, f2; - if (x > m) - { + if (x > m) { b1 = b2 = m; f1 = f2 = fm; double shrinkFactor = 1.5; double step = (1.0 / shrinkFactor - 1.0) * b1; - for (;;) - { - if (maxIterations == 0 || f1 <= fx) - { + for (;;) { + if (maxIterations == 0 || f1 <= fx) { break; } @@ -576,17 +461,13 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const negative_binomia f1 = safePdf(negativeBinomial, b1); --maxIterations; - if (maxIterations <= 3 * MAX_ITERATIONS / 4) - { + if (maxIterations <= 3 * MAX_ITERATIONS / 4) { shrinkFactor *= 2.0; } - step = (maxIterations == MAX_ITERATIONS / 2 ? - b1 : (1.0 / shrinkFactor - 1.0) * b1); + step = (maxIterations == MAX_ITERATIONS / 2 ? b1 : (1.0 / shrinkFactor - 1.0) * b1); } - } - else - { + } else { // Noting that the binomial coefficient (k + r - 1)! / k! / (r - 1)! // is a monotonic increasing function of k, we have for any k': // f(k') * (1 - p)^(k - k') < f(k) for k > k' @@ -596,8 +477,7 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const negative_binomia double logOneMinusP = std::log(1 - p); - b1 = std::floor(m + std::log( std::max(fx, MIN_DOUBLE) - / std::max(fm, MIN_DOUBLE)) / logOneMinusP); + b1 = std::floor(m + std::log(std::max(fx, MIN_DOUBLE) / std::max(fm, MIN_DOUBLE)) / logOneMinusP); f1 = safePdf(negativeBinomial, b1); b2 = b1; f2 = f1; @@ -606,10 +486,8 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const negative_binomia double growthFactor = 0.25; double step = growthFactor * b2; - for (;;) - { - if (maxIterations == 0 || f2 <= fx) - { + for (;;) { + if (maxIterations == 0 || f2 <= fx) { break; } @@ -621,29 +499,23 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const negative_binomia // We compute successively tighter lower bounds on the // bracket point. - double lowerBound = b2 + std::log( std::max(fx, MIN_DOUBLE) - / std::max(f2, MIN_DOUBLE)) - / logOneMinusP; - LOG_TRACE("b2 = " << b2 - << ", f2 = " - << f2 << ", bound = " << lowerBound); - - if (maxIterations <= 3 * MAX_ITERATIONS / 4) - { + double lowerBound = b2 + std::log(std::max(fx, MIN_DOUBLE) / std::max(f2, MIN_DOUBLE)) / logOneMinusP; + LOG_TRACE("b2 = " << b2 << ", f2 = " << f2 << ", bound = " << lowerBound); + + if (maxIterations <= 3 * MAX_ITERATIONS / 4) { growthFactor *= 4.0; } step = std::max(growthFactor * b2, lowerBound - b2); - } + } } LOG_TRACE("Initial bracket = (" << b1 << "," << b2 << ")" - << ", f(bracket) = (" << f1 - fx << "," << f2 - fx << ")"); + << ", f(bracket) = (" << f1 - fx << "," << f2 - fx << ")"); px = x < m ? safeCdf(negativeBinomial, x) : safeCdfComplement(negativeBinomial, x); double y = POS_INF; - try - { + try { // Note that this form of epsilon controls the maximum // relative error in the probability since p > px and // the error will be order eps * f(x) so we require that @@ -651,69 +523,46 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const negative_binomia double eps = 0.05 * px / std::max(fx, MIN_DOUBLE); eps = std::max(eps, EPSILON * std::min(b1, b2)); CEqualWithTolerance equal(CToleranceTypes::E_AbsoluteTolerance, eps); - CSolvers::solve(b1, b2, f1 - fx, f2 - fx, - makePdf(negativeBinomial, fx), - maxIterations, equal, y); + CSolvers::solve(b1, b2, f1 - fx, f2 - fx, makePdf(negativeBinomial, fx), maxIterations, equal, y); LOG_TRACE("bracket = (" << b1 << "," << b2 << ")" - << ", iterations = " << maxIterations - << ", f(y) = " << safePdf(negativeBinomial, y) - fx - << ", eps = " << eps); - } - catch (const std::exception &e) - { - if (std::fabs(f1 - fx) < 10.0 * EPSILON * fx) - { + << ", iterations = " << maxIterations << ", f(y) = " << safePdf(negativeBinomial, y) - fx + << ", eps = " << eps); + } catch (const std::exception& e) { + if (std::fabs(f1 - fx) < 10.0 * EPSILON * fx) { y = b1; - } - else if (std::fabs(f2 - fx) < 10.0 * EPSILON * fx) - { + } else if (std::fabs(f2 - fx) < 10.0 * EPSILON * fx) { y = b2; - } - else - { - LOG_ERROR("Failed in root finding: " << e.what() - << ", x = " << x - << ", bracket = (" << b1 << "," << b2 << ")" - << ", f(bracket) = (" << f1 - fx << "," << f2 - fx << ")"); + } else { + LOG_ERROR("Failed in root finding: " << e.what() << ", x = " << x << ", bracket = (" << b1 << "," << b2 << ")" + << ", f(bracket) = (" << f1 - fx << "," << f2 - fx << ")"); return truncate(px, 0.0, 1.0); } } - if ( (x < m && y < m) - || (x > m && y > m) - || !(x >= support.first && x <= support.second)) - { + if ((x < m && y < m) || (x > m && y > m) || !(x >= support.first && x <= support.second)) { LOG_ERROR("Bad root " << y << " (x = " << x << ")"); } - double py = x < m ? - safeCdfComplement(negativeBinomial, y) : - safeCdf(negativeBinomial, y); + double py = x < m ? safeCdfComplement(negativeBinomial, y) : safeCdf(negativeBinomial, y); return truncate(px + py + fx, 0.0, 1.0); } -double CTools::CProbabilityOfLessLikelySample::operator()(const lognormal &logNormal, - double x, - maths_t::ETail &tail) const -{ +double CTools::CProbabilityOfLessLikelySample::operator()(const lognormal& logNormal, double x, maths_t::ETail& tail) const { double px = 0.0; TDoubleDoublePr support = boost::math::support(logNormal); - if (!this->check(support, x, px, tail)) - { + if (!this->check(support, x, px, tail)) { return px; } - switch (m_Calculation) - { + switch (m_Calculation) { case maths_t::E_OneSidedBelow: px = truncate(2.0 * safeCdf(logNormal, x), 0.0, 1.0); tail = static_cast(tail | maths_t::E_LeftTail); break; - case maths_t::E_TwoSided: - { + case maths_t::E_TwoSided: { // Changing variables to x = exp(m) * exp(x') where m is the location // of the log normal distribution it is possible to show that the // equal point on the p.d.f. is at: @@ -727,14 +576,11 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const lognormal &logNo double logx = std::log(x); double squareScale = pow2(logNormal.scale()); double discriminant = - std::sqrt(pow2(squareScale) - + (logx - logNormal.location() + 2.0 * squareScale) - * (logx - logNormal.location())); + std::sqrt(pow2(squareScale) + (logx - logNormal.location() + 2.0 * squareScale) * (logx - logNormal.location())); double m = boost::math::mode(logNormal); this->tail(x, m, tail); double y = m * std::exp(x > m ? -discriminant : discriminant); - if (x > y) - { + if (x > y) { std::swap(x, y); } px = truncate(safeCdf(logNormal, x) + safeCdfComplement(logNormal, y), 0.0, 1.0); @@ -749,20 +595,15 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const lognormal &logNo return px; } -double CTools::CProbabilityOfLessLikelySample::operator()(const CLogTDistribution &logt, - double x, - maths_t::ETail &tail) const -{ +double CTools::CProbabilityOfLessLikelySample::operator()(const CLogTDistribution& logt, double x, maths_t::ETail& tail) const { double px = 0.0; TDoubleDoublePr support = maths::support(logt); - if (!this->check(support, x, px, tail)) - { + if (!this->check(support, x, px, tail)) { return px; } - switch (m_Calculation) - { + switch (m_Calculation) { case maths_t::E_OneSidedBelow: tail = static_cast(tail | maths_t::E_LeftTail); return truncate(2.0 * cdf(logt, x), 0.0, 1.0); @@ -789,33 +630,26 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const CLogTDistributio // Note that the p.d.f. can have a local minimum between zero // and the mode of the distribution. CLogTDistribution::TOptionalDouble localMinimum = maths::localMinimum(logt); - if (!localMinimum) - { + if (!localMinimum) { // If there is no local minimum the distribution is single sided. return truncate(cdfComplement(logt, x), 0.0, 1.0); - } - else - { + } else { double b1 = *localMinimum; double f1 = pdf(logt, b1); LOG_TRACE("b1 = " << b1 << ", f(b1) = " << f1); - if (f1 > fx) - { + if (f1 > fx) { return truncate(cdfComplement(logt, x), 0.0, 1.0); - } - else if (x > m) - { + } else if (x > m) { px = cdfComplement(logt, x); double b2 = m; double f2 = pdf(logt, m); LOG_TRACE("Initial bracket = (" << b1 << "," << b2 << ")" - << ", f(bracket) = (" << f1 - fx << "," << f2 - fx << ")"); + << ", f(bracket) = (" << f1 - fx << "," << f2 - fx << ")"); double y = 0.0; - try - { + try { // The gradient of the log normal p.d.f. can be very // large near the origin so we use the maximum of f1 and // f2 to be safe here rather that the value of f at the @@ -827,30 +661,17 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const CLogTDistributio eps = std::max(eps, EPSILON * std::min(b1, b2)); CEqualWithTolerance equal(CToleranceTypes::E_AbsoluteTolerance, eps); std::size_t maxIterations = MAX_ITERATIONS; - CSolvers::solve(b1, b2, f1 - fx, f2 - fx, - makePdf(logt, fx), - maxIterations, equal, y); + CSolvers::solve(b1, b2, f1 - fx, f2 - fx, makePdf(logt, fx), maxIterations, equal, y); LOG_TRACE("bracket = (" << b1 << "," << b2 << ")" - << ", iterations = " << maxIterations - << ", f(y) = " << pdf(logt, y) - fx - << ", eps = " << eps); - } - catch (const std::exception &e) - { - if (std::fabs(f1 - fx) < 10.0 * EPSILON * fx) - { + << ", iterations = " << maxIterations << ", f(y) = " << pdf(logt, y) - fx << ", eps = " << eps); + } catch (const std::exception& e) { + if (std::fabs(f1 - fx) < 10.0 * EPSILON * fx) { y = b1; - } - else if (std::fabs(f2 - fx) < 10.0 * EPSILON * fx) - { + } else if (std::fabs(f2 - fx) < 10.0 * EPSILON * fx) { y = b2; - } - else - { - LOG_ERROR("Failed in root finding: " << e.what() - << ", x = " << x - << ", bracket = (" << b1 << "," << b2 << ")" - << ", f(bracket) = (" << f1 - fx << "," << f2 - fx << ")"); + } else { + LOG_ERROR("Failed in root finding: " << e.what() << ", x = " << x << ", bracket = (" << b1 << "," << b2 << ")" + << ", f(bracket) = (" << f1 - fx << "," << f2 - fx << ")"); return truncate(px, 0.0, 1.0); } } @@ -864,8 +685,7 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const CLogTDistributio // For small x the density can be greater than the local mode. double fm = pdf(logt, m); LOG_TRACE("f(m) = " << fm); - if (fx > fm) - { + if (fx > fm) { return truncate(cdfComplement(logt, x), 0.0, 1.0); } @@ -899,13 +719,10 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const CLogTDistributio double scale = std::sqrt(v) * s; double bound = 0.0; double fBound = POS_INF; - if (fl < fx) - { + if (fl < fx) { bound = std::exp(l); fBound = fl; - } - else - { + } else { double t1 = l + std::log(fl / fx); double t2 = (l - scale) / 8.0 + std::log(scale / 3.0); double k0 = 8.0 * (t1 + (v + 1.0) * t2) / (v + 9.0); @@ -916,8 +733,7 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const CLogTDistributio double f1 = fBound < fx ? fm : fBound; double b2 = bound; double f2 = fBound; - LOG_TRACE("b1 = " << b1 << ", f(b1) = " << f1 - << ", b2 = " << b2 << ", f(b2) = " << f2); + LOG_TRACE("b1 = " << b1 << ", f(b1) = " << f1 << ", b2 = " << b2 << ", f(b2) = " << f2); std::size_t maxIterations = MAX_ITERATIONS; @@ -927,10 +743,8 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const CLogTDistributio // quickly and fallback to the bound if we haven't bracketed double step = std::max(b2, std::exp(l) - b2); double growthFactor = 1.0; - for (;;) - { - if (maxIterations == 0 || f2 <= fx) - { + for (;;) { + if (maxIterations == 0 || f2 <= fx) { break; } @@ -945,29 +759,23 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const CLogTDistributio double upperBound = b2 * f2 / fx; LOG_TRACE("Bound = " << upperBound); - if (maxIterations <= 3 * MAX_ITERATIONS / 4) - { + if (maxIterations <= 3 * MAX_ITERATIONS / 4) { growthFactor *= 3.0; } - if ( maxIterations <= MAX_ITERATIONS / 2 - || upperBound - b2 < 2.0 * growthFactor * b2) - { + if (maxIterations <= MAX_ITERATIONS / 2 || upperBound - b2 < 2.0 * growthFactor * b2) { step = upperBound - b2; - } - else - { + } else { step = growthFactor * b2; } } LOG_TRACE("Initial bracket = (" << b1 << "," << b2 << ")" - << ", f(bracket) = (" << f1 - fx << "," << f2 - fx << ")"); + << ", f(bracket) = (" << f1 - fx << "," << f2 - fx << ")"); px = cdf(logt, x); double y = POS_INF; - try - { + try { // Note that this form of epsilon controls the maximum // relative error in the probability since p > px and // the error will be order eps * f(x) so we require that @@ -975,30 +783,17 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const CLogTDistributio double eps = 0.05 * px / std::max(fx, MIN_DOUBLE); eps = std::max(eps, EPSILON * std::min(b1, b2)); CEqualWithTolerance equal(CToleranceTypes::E_AbsoluteTolerance, eps); - CSolvers::solve(b1, b2, f1 - fx, f2 - fx, - makePdf(logt, fx), - maxIterations, equal, y); + CSolvers::solve(b1, b2, f1 - fx, f2 - fx, makePdf(logt, fx), maxIterations, equal, y); LOG_TRACE("bracket = (" << b1 << "," << b2 << ")" - << ", iterations = " << maxIterations - << ", f(y) = " << pdf(logt, y) - fx - << ", eps = " << eps); - } - catch (const std::exception &e) - { - if (std::fabs(f1 - fx) < 10.0 * EPSILON * fx) - { + << ", iterations = " << maxIterations << ", f(y) = " << pdf(logt, y) - fx << ", eps = " << eps); + } catch (const std::exception& e) { + if (std::fabs(f1 - fx) < 10.0 * EPSILON * fx) { y = b1; - } - else if (std::fabs(f2 - fx) < 10.0 * EPSILON * fx) - { + } else if (std::fabs(f2 - fx) < 10.0 * EPSILON * fx) { y = b2; - } - else - { - LOG_ERROR("Failed in root finding: " << e.what() - << ", x = " << x - << ", bracket = (" << b1 << "," << b2 << ")" - << ", f(bracket) = (" << f1 - fx << "," << f2 - fx << ")"); + } else { + LOG_ERROR("Failed in root finding: " << e.what() << ", x = " << x << ", bracket = (" << b1 << "," << b2 << ")" + << ", f(bracket) = (" << f1 - fx << "," << f2 - fx << ")"); return truncate(px, 0.0, 1.0); } } @@ -1006,20 +801,15 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const CLogTDistributio return truncate(px + cdfComplement(logt, y), 0.0, 1.0); } -double CTools::CProbabilityOfLessLikelySample::operator()(const gamma &gamma_, - double x, - maths_t::ETail &tail) const -{ +double CTools::CProbabilityOfLessLikelySample::operator()(const gamma& gamma_, double x, maths_t::ETail& tail) const { double px = 0.0; TDoubleDoublePr support = boost::math::support(gamma_); - if (!this->check(support, x, px, tail)) - { + if (!this->check(support, x, px, tail)) { return px; } - switch (m_Calculation) - { + switch (m_Calculation) { case maths_t::E_OneSidedBelow: tail = static_cast(tail | maths_t::E_LeftTail); return truncate(2.0 * safeCdf(gamma_, x), 0.0, 1.0); @@ -1034,8 +824,7 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const gamma &gamma_, } // For alpha <= 1 the distribution is single sided. - if (gamma_.shape() <= 1.0) - { + if (gamma_.shape() <= 1.0) { tail = static_cast(tail | maths_t::E_RightTail); return truncate(safeCdfComplement(gamma_, x), 0.0, 1.0); } @@ -1049,15 +838,12 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const gamma &gamma_, this->tail(x, m, tail); - double y[] = { 2.0 * m - x, 0.0 }; + double y[] = {2.0 * m - x, 0.0}; unsigned int i = 0u; - if (x == m) - { + if (x == m) { return 1.0; - } - else if (x < m) - { + } else if (x < m) { // For x < m we use the recurrence relation: // y(n+1) = x + m * log(y(n) / x) // @@ -1074,19 +860,14 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const gamma &gamma_, // initializing with a second order Taylor expansion about the // mode. - for (;;) - { + for (;;) { y[(i + 1) % 2] = x + m * std::log(y[i % 2] / x); LOG_TRACE("y = " << y[(i + 1) % 2]); - if (++i == MAX_ITERATIONS - || std::fabs(y[1] - y[0]) < CONVERGENCE_TOLERANCE * std::max(y[0], y[1])) - { + if (++i == MAX_ITERATIONS || std::fabs(y[1] - y[0]) < CONVERGENCE_TOLERANCE * std::max(y[0], y[1])) { break; } } - } - else - { + } else { // For x > m we use the recurrence relation: // y(n+1) = m - x * exp(-(x - y(n)) / m) // @@ -1105,13 +886,10 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const gamma &gamma_, y[0] = std::max(y[0], m / 2.0); - for (;;) - { + for (;;) { y[(i + 1) % 2] = x * std::exp(-(x - y[i % 2]) / m); LOG_TRACE("y = " << y[(i + 1) % 2]); - if (++i == MAX_ITERATIONS - || std::fabs(y[1] - y[0]) < CONVERGENCE_TOLERANCE * std::max(y[0], y[1])) - { + if (++i == MAX_ITERATIONS || std::fabs(y[1] - y[0]) < CONVERGENCE_TOLERANCE * std::max(y[0], y[1])) { break; } } @@ -1121,10 +899,8 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const gamma &gamma_, double fy = safePdf(gamma_, y[i % 2]); LOG_TRACE("f(x) = " << fx << ", f(y) = " << fy); - if (std::fabs(fx - fy) <= PDF_TOLERANCE * std::max(fx, fy)) - { - if (x > y[i % 2]) - { + if (std::fabs(fx - fy) <= PDF_TOLERANCE * std::max(fx, fy)) { + if (x > y[i % 2]) { std::swap(x, y[i % 2]); } return truncate(safeCdf(gamma_, x) + safeCdfComplement(gamma_, y[i % 2]), 0.0, 1.0); @@ -1148,38 +924,30 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const gamma &gamma_, double b = a; double fa = fy; double fb = fa; - if (x > m && fy < fx) - { + if (x > m && fy < fx) { b = m; fb = safePdf(gamma_, m); - } - else if (x > m && fy > fx) - { + } else if (x > m && fy > fx) { b = (1.0 + gamma_.scale() / (a - m) * std::log(fa / fx)) * a; fb = safePdf(gamma_, b); std::swap(a, b); std::swap(fa, fb); - } - else if (fy < fx) - { + } else if (fy < fx) { b = m; fb = safePdf(gamma_, m); std::swap(a, b); std::swap(fa, fb); - } - else - { + } else { b = (1.0 + gamma_.scale() / (a - m) * std::log(fa / fx)) * a; fb = safePdf(gamma_, b); } LOG_TRACE("Initial bracket = (" << a << ", " << b << ")" - << ", f(bracket) = (" << fa - fx << "," << fb - fx << ")"); + << ", f(bracket) = (" << fa - fx << "," << fb - fx << ")"); px = x > m ? safeCdfComplement(gamma_, x) : safeCdf(gamma_, x); - try - { + try { // The gradient of the gamma p.d.f. can be very large // near the origin so we use the maximum of fa and // fb to be safe here rather that the value of f at the @@ -1194,30 +962,19 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const gamma &gamma_, double candidate; CSolvers::solve(a, b, fa - fx, fb - fx, makePdf(gamma_, fx), maxIterations, equal, candidate); LOG_TRACE("bracket = (" << a << "," << b << ")" - << ", iterations = " << maxIterations - << ", f(candidate) = " << safePdf(gamma_, candidate) - fx); + << ", iterations = " << maxIterations << ", f(candidate) = " << safePdf(gamma_, candidate) - fx); - if (std::fabs(safePdf(gamma_, candidate) - fx) < std::fabs(fy - fx)) - { + if (std::fabs(safePdf(gamma_, candidate) - fx) < std::fabs(fy - fx)) { y[i % 2] = candidate; } - } - catch (const std::exception &e) - { - if (std::fabs(fa - fx) < 10.0 * EPSILON * fx) - { + } catch (const std::exception& e) { + if (std::fabs(fa - fx) < 10.0 * EPSILON * fx) { y[i % 2] = a; - } - else if (std::fabs(fb - fx) < 10.0 * EPSILON * fx) - { + } else if (std::fabs(fb - fx) < 10.0 * EPSILON * fx) { y[i % 2] = b; - } - else - { - LOG_ERROR("Failed in bracketed solver: " << e.what() - << ", x = " << x - << ", bracket = (" << a << ", " << b << ")" - << ", f(bracket) = (" << fa - fx << "," << fb - fx << ")"); + } else { + LOG_ERROR("Failed in bracketed solver: " << e.what() << ", x = " << x << ", bracket = (" << a << ", " << b << ")" + << ", f(bracket) = (" << fa - fx << "," << fb - fx << ")"); return truncate(px, 0.0, 1.0); } } @@ -1229,20 +986,15 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const gamma &gamma_, return truncate(px + py, 0.0, 1.0); } -double CTools::CProbabilityOfLessLikelySample::operator()(const beta &beta_, - double x, - maths_t::ETail &tail) const -{ +double CTools::CProbabilityOfLessLikelySample::operator()(const beta& beta_, double x, maths_t::ETail& tail) const { double px = 0.0; TDoubleDoublePr support(0.0, 1.0); - if (!this->check(support, x, px, tail)) - { + if (!this->check(support, x, px, tail)) { return px; } - switch (m_Calculation) - { + switch (m_Calculation) { case maths_t::E_OneSidedBelow: tail = static_cast(tail | maths_t::E_LeftTail); return truncate(2.0 * safeCdf(beta_, x), 0.0, 1.0); @@ -1256,8 +1008,7 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const beta &beta_, return truncate(2.0 * safeCdfComplement(beta_, x), 0.0, 1.0); } - if (beta_.alpha() < 1.0 && beta_.beta() < 1.0) - { + if (beta_.alpha() < 1.0 && beta_.beta() < 1.0) { // The probability density function tends to infinity at x = 0 // and x = 1 and has a unique minimum in the interval (0,1). // @@ -1267,32 +1018,23 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const beta &beta_, tail = maths_t::E_MixedOrNeitherTail; double eps = boost::math::tools::epsilon(); - if (x <= eps || x >= 1.0 - eps) - { + if (x <= eps || x >= 1.0 - eps) { return 1.0; } support = std::make_pair(eps, 1.0 - eps); - } - else if (beta_.alpha() == 1.0 && beta_.beta() == 1.0) - { + } else if (beta_.alpha() == 1.0 && beta_.beta() == 1.0) { // The distribution is flat. tail = maths_t::E_MixedOrNeitherTail; return 1.0; - } - else if (beta_.alpha() <= 1.0 && beta_.beta() >= 1.0) - { + } else if (beta_.alpha() <= 1.0 && beta_.beta() >= 1.0) { // The distribution is monotone decreasing. tail = static_cast(tail | maths_t::E_RightTail); return truncate(safeCdfComplement(beta_, x), 0.0, 1.0); - } - else if (beta_.alpha() >= 1.0 && beta_.beta() <= 1.0) - { + } else if (beta_.alpha() >= 1.0 && beta_.beta() <= 1.0) { // The distribution is monotone increasing. tail = static_cast(tail | maths_t::E_LeftTail); return truncate(safeCdf(beta_, x), 0.0, 1.0); - } - else - { + } else { // If alpha > 1 and beta > 1 the probability density function // tends to zero at x = 0 and x = 1 and has a unique maximum in // the interval (0,1). @@ -1310,13 +1052,12 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const beta &beta_, const unsigned int MAX_ITERATIONS = 6u; TDoubleBoolPr sp = stationaryPoint(beta_); - double y[] = { 2.0 * sp.first - x, 0.0 }; + double y[] = {2.0 * sp.first - x, 0.0}; unsigned int i = 0u; this->tail(x, sp.first, tail); - if (x < sp.first) - { + if (x < sp.first) { // For x < mode we use the recurrence relation: // y(n+1) = 1 - (x / y(n))^((a-1)/(b-1)) * (1 - x) // @@ -1336,28 +1077,20 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const beta &beta_, y[0] = std::min(y[0], (1.0 + sp.first) / 2.0); double k = (beta_.alpha() - 1.0) / (beta_.beta() - 1.0); - for (;;) - { + for (;;) { y[(i + 1) % 2] = 1.0 - std::exp(k * std::log(x / y[i % 2])) * (1.0 - x); - if (++i == MAX_ITERATIONS - || std::fabs(y[1] - y[0]) < CONVERGENCE_TOLERANCE) - { + if (++i == MAX_ITERATIONS || std::fabs(y[1] - y[0]) < CONVERGENCE_TOLERANCE) { break; } } // Max sure y is supported by the p.d.f. - if (y[i % 2] > support.second) - { - return truncate(sp.second ? - safeCdf(beta_, x) : - safeCdfComplement(beta_, x), 0.0, 1.0); + if (y[i % 2] > support.second) { + return truncate(sp.second ? safeCdf(beta_, x) : safeCdfComplement(beta_, x), 0.0, 1.0); } y[i % 2] = std::max(y[i % 2], sp.first); - } - else - { + } else { // For x > mode we use the recurrence relation: // y(n+1) = ((1 - x) / (1 - y(n)))^((b-1)/(a-1)) * x // @@ -1377,22 +1110,16 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const beta &beta_, y[0] = std::max(y[0], sp.first / 2.0); double k = (beta_.beta() - 1.0) / (beta_.alpha() - 1.0); - for (;;) - { - y[(i + 1) % 2] = std::exp(k * std::log((1.0 - x) / (1.0 - y[i % 2]))) * x; - if (++i == MAX_ITERATIONS - || std::fabs(y[1] - y[0]) < CONVERGENCE_TOLERANCE) - { + for (;;) { + y[(i + 1) % 2] = std::exp(k * std::log((1.0 - x) / (1.0 - y[i % 2]))) * x; + if (++i == MAX_ITERATIONS || std::fabs(y[1] - y[0]) < CONVERGENCE_TOLERANCE) { break; } } // Max sure y is supported by the p.d.f. - if (y[i % 2] < support.first) - { - return truncate(sp.second ? - safeCdfComplement(beta_, x) : - safeCdf(beta_, x), 0.0, 1.0); + if (y[i % 2] < support.first) { + return truncate(sp.second ? safeCdfComplement(beta_, x) : safeCdf(beta_, x), 0.0, 1.0); } y[i % 2] = std::min(y[i % 2], sp.first); @@ -1405,147 +1132,109 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const beta &beta_, TDoubleDoublePr bracket(support); TDoubleDoublePr fBracket(0.0, 0.0); - try - { + try { double error = sp.second ? fy - fx : fx - fy; - if (std::fabs(error) <= PDF_TOLERANCE * std::max(fx, fy)) - { - if (x > y[i % 2]) - { + if (std::fabs(error) <= PDF_TOLERANCE * std::max(fx, fy)) { + if (x > y[i % 2]) { std::swap(x, y[i % 2]); } - return truncate(sp.second ? - safeCdf(beta_, x) + safeCdfComplement(beta_, y[i % 2]) : - safeCdf(beta_, y[i % 2]) - safeCdf(beta_, x), 0.0, 1.0); - } - else if (error > 0.0) - { - if (x < sp.first) - { + return truncate(sp.second ? safeCdf(beta_, x) + safeCdfComplement(beta_, y[i % 2]) + : safeCdf(beta_, y[i % 2]) - safeCdf(beta_, x), + 0.0, + 1.0); + } else if (error > 0.0) { + if (x < sp.first) { bracket = std::make_pair(y[i % 2], bracket.second); double fa = fy - fx; double fb = safePdf(beta_, bracket.second) - fx; fBracket = std::make_pair(fa, fb); - } - else - { + } else { bracket = std::make_pair(bracket.first, y[i % 2]); double fa = safePdf(beta_, bracket.first) - fx; double fb = fy - fx; fBracket = std::make_pair(fa, fb); } - } - else - { + } else { bracket = std::make_pair(sp.first, y[i % 2]); double fa = safePdf(beta_, sp.first) - fx; double fb = fy - fx; fBracket = std::make_pair(fa, fb); } - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to evaluate p.d.f.: " << e.what() - << ", alpha = " << beta_.alpha() - << ", beta = " << beta_.beta() - << ", x = " << x << ", y = " << y[i % 2]); + } catch (const std::exception& e) { + LOG_ERROR("Failed to evaluate p.d.f.: " << e.what() << ", alpha = " << beta_.alpha() << ", beta = " << beta_.beta() << ", x = " << x + << ", y = " << y[i % 2]); return 1.0; } - if (bracket.first > bracket.second) - { + if (bracket.first > bracket.second) { std::swap(bracket.first, bracket.second); std::swap(fBracket.first, fBracket.second); } LOG_TRACE("Initial bracket = " << core::CContainerPrinter::print(bracket) - << ", f(bracket) = " << core::CContainerPrinter::print(fBracket)); + << ", f(bracket) = " << core::CContainerPrinter::print(fBracket)); - try - { + try { double eps = 0.05 / fx; eps = std::max(eps, EPSILON * std::min(bracket.first, bracket.second)); CEqualWithTolerance equal(CToleranceTypes::E_AbsoluteTolerance, eps); std::size_t maxIterations = MAX_ITERATIONS; double candidate; - CSolvers::solve(bracket.first, bracket.second, - fBracket.first, fBracket.second, - makePdf(beta_, fx), - maxIterations, equal, candidate); - - LOG_TRACE("bracket = " << core::CContainerPrinter::print(bracket) - << ", iterations = " << maxIterations - << ", f(candidate) = " << safePdf(beta_, candidate) - fx - << ", eps = " << eps); - - if (std::fabs(safePdf(beta_, candidate) - fx) < std::fabs(fy - fx)) - { + CSolvers::solve( + bracket.first, bracket.second, fBracket.first, fBracket.second, makePdf(beta_, fx), maxIterations, equal, candidate); + + LOG_TRACE("bracket = " << core::CContainerPrinter::print(bracket) << ", iterations = " << maxIterations + << ", f(candidate) = " << safePdf(beta_, candidate) - fx << ", eps = " << eps); + + if (std::fabs(safePdf(beta_, candidate) - fx) < std::fabs(fy - fx)) { y[i % 2] = candidate; } - } - catch (const std::exception &e) - { - if (std::fabs(fBracket.first - fx) < 10.0 * EPSILON * fx) - { + } catch (const std::exception& e) { + if (std::fabs(fBracket.first - fx) < 10.0 * EPSILON * fx) { y[i % 2] = bracket.first; - } - else if (std::fabs(fBracket.second - fx) < 10.0 * EPSILON * fx) - { + } else if (std::fabs(fBracket.second - fx) < 10.0 * EPSILON * fx) { y[i % 2] = bracket.second; - } - else - { - LOG_ERROR("Failed in bracketed solver: " << e.what() - << ", x = " << x - << ", bracket " << core::CContainerPrinter::print(bracket) - << ", f(bracket) = " << core::CContainerPrinter::print(fBracket)); + } else { + LOG_ERROR("Failed in bracketed solver: " << e.what() << ", x = " << x << ", bracket " << core::CContainerPrinter::print(bracket) + << ", f(bracket) = " << core::CContainerPrinter::print(fBracket)); return 1.0; } } - if (x > y[i % 2]) - { + if (x > y[i % 2]) { std::swap(x, y[i % 2]); } - return truncate(sp.second ? - safeCdf(beta_, x) + safeCdfComplement(beta_, y[i % 2]) : - safeCdf(beta_, y[i % 2]) - safeCdf(beta_, x), 0.0, 1.0); + return truncate( + sp.second ? safeCdf(beta_, x) + safeCdfComplement(beta_, y[i % 2]) : safeCdf(beta_, y[i % 2]) - safeCdf(beta_, x), 0.0, 1.0); } -bool CTools::CProbabilityOfLessLikelySample::check(const TDoubleDoublePr &support, - double x, - double &px, - maths_t::ETail &tail) const -{ - if (CMathsFuncs::isNan(x)) - { +bool CTools::CProbabilityOfLessLikelySample::check(const TDoubleDoublePr& support, double x, double& px, maths_t::ETail& tail) const { + if (CMathsFuncs::isNan(x)) { LOG_ERROR("Bad argument x = " << x); tail = maths_t::E_MixedOrNeitherTail; return false; - } - else if (x < support.first) - { - switch (m_Calculation) - { + } else if (x < support.first) { + switch (m_Calculation) { case maths_t::E_OneSidedBelow: case maths_t::E_TwoSided: - px = 0.0; break; + px = 0.0; + break; case maths_t::E_OneSidedAbove: - px = 1.0; break; + px = 1.0; + break; } tail = static_cast(tail | maths_t::E_LeftTail); return false; - } - else if (x > support.second) - { - switch (m_Calculation) - { + } else if (x > support.second) { + switch (m_Calculation) { case maths_t::E_OneSidedBelow: - px = 1.0; break; + px = 1.0; + break; case maths_t::E_TwoSided: case maths_t::E_OneSidedAbove: - px = 0.0; break; + px = 0.0; + break; } tail = static_cast(tail | maths_t::E_RightTail); return false; @@ -1553,37 +1242,29 @@ bool CTools::CProbabilityOfLessLikelySample::check(const TDoubleDoublePr &suppor return true; } -void CTools::CProbabilityOfLessLikelySample::tail(double x, - double mode, - maths_t::ETail &tail) const -{ - if (x <= mode) - { +void CTools::CProbabilityOfLessLikelySample::tail(double x, double mode, maths_t::ETail& tail) const { + if (x <= mode) { tail = static_cast(tail | maths_t::E_LeftTail); } - if (x >= mode) - { + if (x >= mode) { tail = static_cast(tail | maths_t::E_RightTail); } } - //////// CMixtureProbabilityOfLessLikelySample Implementation //////// CTools::CMixtureProbabilityOfLessLikelySample::CMixtureProbabilityOfLessLikelySample(std::size_t n, double x, double logFx, double a, - double b) : - m_X(x), m_LogFx(logFx), m_A(a), m_B(b) -{ + double b) + : m_X(x), m_LogFx(logFx), m_A(a), m_B(b) { m_Endpoints.reserve(4 * n + 2); m_Endpoints.push_back(a); m_Endpoints.push_back(b); } -void CTools::CMixtureProbabilityOfLessLikelySample::reinitialize(double x, double logFx) -{ +void CTools::CMixtureProbabilityOfLessLikelySample::reinitialize(double x, double logFx) { m_X = x; m_LogFx = logFx; m_Endpoints.clear(); @@ -1591,24 +1272,18 @@ void CTools::CMixtureProbabilityOfLessLikelySample::reinitialize(double x, doubl m_Endpoints.push_back(m_B); } -void CTools::CMixtureProbabilityOfLessLikelySample::addMode(double weight, double modeMean, double modeSd) -{ +void CTools::CMixtureProbabilityOfLessLikelySample::addMode(double weight, double modeMean, double modeSd) { double deviation = m_LogFx - fastLog(weight) + LOG_ROOT_TWO_PI + fastLog(modeSd); - if (deviation >= 0.0) - { + if (deviation >= 0.0) { deviation = 0.0; m_Endpoints.push_back(truncate(modeMean - 2.0 * modeSd, m_A, m_B)); m_Endpoints.push_back(truncate(modeMean + 2.0 * modeSd, m_A, m_B)); - } - else if (deviation >= -0.5) - { + } else if (deviation >= -0.5) { deviation = std::sqrt(-2.0 * deviation); m_Endpoints.push_back(truncate(modeMean - (deviation + 2.0) * modeSd, m_A, m_B)); m_Endpoints.push_back(truncate(modeMean, m_A, m_B)); m_Endpoints.push_back(truncate(modeMean + (deviation + 2.0) * modeSd, m_A, m_B)); - } - else - { + } else { deviation = std::sqrt(-2.0 * deviation); m_Endpoints.push_back(truncate(modeMean - (deviation + 2.0) * modeSd, m_A, m_B)); m_Endpoints.push_back(truncate(modeMean - (deviation - 1.0) * modeSd, m_A, m_B)); @@ -1618,129 +1293,98 @@ void CTools::CMixtureProbabilityOfLessLikelySample::addMode(double weight, doubl m_MaxDeviation.add((2.0 + deviation) * modeSd); } -void CTools::CMixtureProbabilityOfLessLikelySample::intervals(TDoubleDoublePrVec &intervals) -{ +void CTools::CMixtureProbabilityOfLessLikelySample::intervals(TDoubleDoublePrVec& intervals) { std::sort(m_Endpoints.begin(), m_Endpoints.end()); - m_Endpoints.erase(std::unique(m_Endpoints.begin(), - m_Endpoints.end()), m_Endpoints.end()); + m_Endpoints.erase(std::unique(m_Endpoints.begin(), m_Endpoints.end()), m_Endpoints.end()); intervals.reserve(m_Endpoints.size() - 1); - for (std::size_t i = 1u; i < m_Endpoints.size(); ++i) - { + for (std::size_t i = 1u; i < m_Endpoints.size(); ++i) { intervals.emplace_back(m_Endpoints[i - 1], m_Endpoints[i]); } LOG_TRACE("intervals = " << core::CContainerPrinter::print(intervals)); } -const double CTools::CMixtureProbabilityOfLessLikelySample::LOG_ROOT_TWO_PI = - 0.5 * std::log(boost::math::double_constants::two_pi); - +const double CTools::CMixtureProbabilityOfLessLikelySample::LOG_ROOT_TWO_PI = 0.5 * std::log(boost::math::double_constants::two_pi); //////// SIntervalExpectation Implementation //////// -double CTools::SIntervalExpectation::operator()(const normal &normal_, - double a, - double b) const -{ - if (a > b) - { +double CTools::SIntervalExpectation::operator()(const normal& normal_, double a, double b) const { + if (a > b) { std::swap(a, b); } - if (a == POS_INF) - { + if (a == POS_INF) { return POS_INF; } double mean = normal_.mean(); double sd = normal_.standard_deviation(); - double s = std::sqrt(2.0) * sd; + double s = std::sqrt(2.0) * sd; double a_ = a == NEG_INF ? a : (a - mean) / s; double b_ = b == POS_INF ? b : (b - mean) / s; double expa = a_ == NEG_INF ? 0.0 : std::exp(-a_ * a_); double expb = b_ == POS_INF ? 0.0 : std::exp(-b_ * b_); double erfa = a_ == NEG_INF ? -1.0 : boost::math::erf(a_); - double erfb = b_ == POS_INF ? 1.0 : boost::math::erf(b_); + double erfb = b_ == POS_INF ? 1.0 : boost::math::erf(b_); - if (erfb - erfa < std::sqrt(EPSILON)) - { - return expa == expb ? - (a + b) / 2.0 : - (a * expa + b * expb) / (expa + expb); + if (erfb - erfa < std::sqrt(EPSILON)) { + return expa == expb ? (a + b) / 2.0 : (a * expa + b * expb) / (expa + expb); } - return mean + 2.0 * sd * (expa - expb) - / boost::math::double_constants::root_two_pi - / (erfb - erfa); + return mean + 2.0 * sd * (expa - expb) / boost::math::double_constants::root_two_pi / (erfb - erfa); } -double CTools::SIntervalExpectation::operator()(const lognormal &logNormal, - double a, - double b) const -{ - if (a > b) - { +double CTools::SIntervalExpectation::operator()(const lognormal& logNormal, double a, double b) const { + if (a > b) { std::swap(a, b); } - if (a == POS_INF) - { + if (a == POS_INF) { return POS_INF; } - if (b <= 0.0) - { + if (b <= 0.0) { return 0.0; } double location = logNormal.location(); double scale = logNormal.scale(); double mean = boost::math::mean(logNormal); - double loga = a <= 0.0 ? NEG_INF : std::log(a); + double loga = a <= 0.0 ? NEG_INF : std::log(a); double logb = b == POS_INF ? POS_INF : std::log(b); - double c = location + scale * scale; - double s = std::sqrt(2.0) * scale; + double c = location + scale * scale; + double s = std::sqrt(2.0) * scale; double a_ = loga == NEG_INF ? NEG_INF : (loga - location) / s; double b_ = logb == POS_INF ? POS_INF : (logb - location) / s; double erfa = loga == NEG_INF ? -1.0 : boost::math::erf((loga - c) / s); - double erfb = logb == POS_INF ? 1.0 : boost::math::erf((logb - c) / s); + double erfb = logb == POS_INF ? 1.0 : boost::math::erf((logb - c) / s); - if (erfb - erfa < std::sqrt(EPSILON)) - { + if (erfb - erfa < std::sqrt(EPSILON)) { double expa = loga == NEG_INF ? 0.0 : std::exp(-a_ * a_); double expb = logb == POS_INF ? 0.0 : std::exp(-b_ * b_); - return expa == expb ? - (2.0 * a / (a + b)) * b : - (expa + expb) / (expa / a + expb / b); + return expa == expb ? (2.0 * a / (a + b)) * b : (expa + expb) / (expa / a + expb / b); } double erfa_ = a_ == NEG_INF ? -1.0 : boost::math::erf(a_); - double erfb_ = b_ == POS_INF ? 1.0 : boost::math::erf(b_); + double erfb_ = b_ == POS_INF ? 1.0 : boost::math::erf(b_); return mean * (erfb - erfa) / (erfb_ - erfa_); } -double CTools::SIntervalExpectation::operator()(const gamma &gamma_, - double a, - double b) const -{ - if (a > b) - { +double CTools::SIntervalExpectation::operator()(const gamma& gamma_, double a, double b) const { + if (a > b) { std::swap(a, b); } - if (a == POS_INF) - { + if (a == POS_INF) { return POS_INF; } - if (b <= 0.0) - { + if (b <= 0.0) { return 0.0; } double shape = gamma_.shape(); double rate = 1.0 / gamma_.scale(); double mean = boost::math::mean(gamma_); - double gama = a <= 0.0 ? 0.0 : boost::math::gamma_p(shape + 1.0, rate * a); + double gama = a <= 0.0 ? 0.0 : boost::math::gamma_p(shape + 1.0, rate * a); double gamb = b == POS_INF ? 1.0 : boost::math::gamma_p(shape + 1.0, rate * b); - if (gamb - gama < std::sqrt(EPSILON)) - { - double expa = a <= 0.0 ? 0.0 : std::exp((shape - 1.0) * std::log(a) - rate * a); + if (gamb - gama < std::sqrt(EPSILON)) { + double expa = a <= 0.0 ? 0.0 : std::exp((shape - 1.0) * std::log(a) - rate * a); double expb = b == POS_INF ? 0.0 : std::exp((shape - 1.0) * std::log(b) - rate * b); return (a * expa + b * expb) / (expa + expb); } @@ -1750,111 +1394,87 @@ double CTools::SIntervalExpectation::operator()(const gamma &gamma_, return mean * (gamb - gama) / (gamb_ - gama_); } - //////// smallestProbability Implementation //////// -double CTools::smallestProbability() -{ +double CTools::smallestProbability() { return MIN_DOUBLE; } - //////// safePdf Implementation //////// -namespace -{ +namespace { -namespace math_policy -{ +namespace math_policy { using namespace boost::math::policies; using AllowOverflow = policy>; } inline boost::math::normal_distribution - allowOverflow(const boost::math::normal_distribution<> &normal) -{ +allowOverflow(const boost::math::normal_distribution<>& normal) { return boost::math::normal_distribution(normal.mean(), normal.standard_deviation()); } inline boost::math::students_t_distribution - allowOverflow(const boost::math::students_t_distribution<> &students) -{ +allowOverflow(const boost::math::students_t_distribution<>& students) { return boost::math::students_t_distribution(students.degrees_of_freedom()); } inline boost::math::poisson_distribution - allowOverflow(const boost::math::poisson_distribution<> &poisson) -{ +allowOverflow(const boost::math::poisson_distribution<>& poisson) { return boost::math::poisson_distribution(poisson.mean()); } inline boost::math::negative_binomial_distribution - allowOverflow(const boost::math::negative_binomial_distribution<> &negativeBinomial) -{ +allowOverflow(const boost::math::negative_binomial_distribution<>& negativeBinomial) { return boost::math::negative_binomial_distribution(negativeBinomial.successes(), negativeBinomial.success_fraction()); } inline boost::math::lognormal_distribution - allowOverflow(const boost::math::lognormal_distribution<> &logNormal) -{ +allowOverflow(const boost::math::lognormal_distribution<>& logNormal) { return boost::math::lognormal_distribution(logNormal.location(), logNormal.scale()); } -inline boost::math::gamma_distribution - allowOverflow(const boost::math::gamma_distribution<> &gamma) -{ +inline boost::math::gamma_distribution allowOverflow(const boost::math::gamma_distribution<>& gamma) { return boost::math::gamma_distribution(gamma.shape(), gamma.scale()); } -inline boost::math::beta_distribution - allowOverflow(const boost::math::beta_distribution<> &beta) -{ +inline boost::math::beta_distribution allowOverflow(const boost::math::beta_distribution<>& beta) { return boost::math::beta_distribution(beta.alpha(), beta.beta()); } inline boost::math::binomial_distribution - allowOverflow(const boost::math::binomial_distribution<> &binomial) -{ - return boost::math::binomial_distribution(binomial.trials(), - binomial.success_fraction()); +allowOverflow(const boost::math::binomial_distribution<>& binomial) { + return boost::math::binomial_distribution(binomial.trials(), binomial.success_fraction()); } inline boost::math::chi_squared_distribution - allowOverflow(const boost::math::chi_squared_distribution<> &chi2) -{ +allowOverflow(const boost::math::chi_squared_distribution<>& chi2) { return boost::math::chi_squared_distribution(chi2.degrees_of_freedom()); } - } -double CTools::safePdf(const normal &normal_, double x) -{ +double CTools::safePdf(const normal& normal_, double x) { return continuousSafePdf(allowOverflow(normal_), x); } -double CTools::safePdf(const students_t &students, double x) -{ +double CTools::safePdf(const students_t& students, double x) { return continuousSafePdf(allowOverflow(students), x); } -double CTools::safePdf(const poisson &poisson_, double x) -{ +double CTools::safePdf(const poisson& poisson_, double x) { return discreteSafePdf(allowOverflow(poisson_), x); } -double CTools::safePdf(const negative_binomial &negativeBinomial, double x) -{ +double CTools::safePdf(const negative_binomial& negativeBinomial, double x) { return discreteSafePdf(allowOverflow(negativeBinomial), x); } -double CTools::safePdf(const lognormal &logNormal, double x) -{ +double CTools::safePdf(const lognormal& logNormal, double x) { return continuousSafePdf(allowOverflow(logNormal), x); } -double CTools::safePdf(const gamma &gamma_, double x) -{ +double CTools::safePdf(const gamma& gamma_, double x) { TDoubleDoublePr support = boost::math::support(gamma_); // The distribution at the 0 is either: @@ -1864,14 +1484,10 @@ double CTools::safePdf(const gamma &gamma_, double x) // // depending on the shape parameter. - if (x == support.first) - { - if (gamma_.shape() < 1.0) - { + if (x == support.first) { + if (gamma_.shape() < 1.0) { return POS_INF; - } - else if (gamma_.shape() == 1.0) - { + } else if (gamma_.shape() == 1.0) { return 1.0 / gamma_.scale(); } return 0.0; @@ -1880,8 +1496,7 @@ double CTools::safePdf(const gamma &gamma_, double x) return continuousSafePdf(allowOverflow(gamma_), x); } -double CTools::safePdf(const beta &beta_, double x) -{ +double CTools::safePdf(const beta& beta_, double x) { TDoubleDoublePr support = boost::math::support(beta_); // The distribution is either: @@ -1896,33 +1511,20 @@ double CTools::safePdf(const beta &beta_, double x) // introduced by using a constant continuation of the function // from eps and 1 - eps can be very large. - if (x == support.first) - { - if (beta_.alpha() < 1.0) - { + if (x == support.first) { + if (beta_.alpha() < 1.0) { return POS_INF; - } - else if (beta_.alpha() == 1.0) - { + } else if (beta_.alpha() == 1.0) { return 1.0 / boost::math::beta(beta_.alpha(), beta_.beta()); - } - else - { + } else { return 0.0; } - } - else if (x == support.second) - { - if (beta_.beta() < 1.0) - { + } else if (x == support.second) { + if (beta_.beta() < 1.0) { return POS_INF; - } - else if (beta_.beta() == 1.0) - { + } else if (beta_.beta() == 1.0) { return 1.0 / boost::math::beta(beta_.alpha(), beta_.beta()); - } - else - { + } else { return 0.0; } } @@ -1930,13 +1532,11 @@ double CTools::safePdf(const beta &beta_, double x) return continuousSafePdf(allowOverflow(beta_), x); } -double CTools::safePdf(const binomial &binomial_, double x) -{ +double CTools::safePdf(const binomial& binomial_, double x) { return discreteSafePdf(allowOverflow(binomial_), x); } -double CTools::safePdf(const chi_squared &chi2, double x) -{ +double CTools::safePdf(const chi_squared& chi2, double x) { TDoubleDoublePr support = boost::math::support(chi2); // Depending on the degrees of freedom the pdf at zero is either: @@ -1948,18 +1548,12 @@ double CTools::safePdf(const chi_squared &chi2, double x) double df = chi2.degrees_of_freedom(); - if (x == support.first) - { - if (df < 2.0) - { + if (x == support.first) { + if (df < 2.0) { return POS_INF; - } - else if (df == 2.0) - { + } else if (df == 2.0) { return 0.5; - } - else - { + } else { return 0.0; } } @@ -1967,107 +1561,85 @@ double CTools::safePdf(const chi_squared &chi2, double x) return continuousSafePdf(allowOverflow(chi2), x); } - //////// safeCdf Implementation //////// -double CTools::safeCdf(const normal &normal_, double x) -{ +double CTools::safeCdf(const normal& normal_, double x) { return continuousSafeCdf(allowOverflow(normal_), x); } -double CTools::safeCdf(const students_t &students, double x) -{ +double CTools::safeCdf(const students_t& students, double x) { return continuousSafeCdf(allowOverflow(students), x); } -double CTools::safeCdf(const poisson &poisson_, double x) -{ +double CTools::safeCdf(const poisson& poisson_, double x) { return discreteSafeCdf(allowOverflow(poisson_), x); } -double CTools::safeCdf(const negative_binomial &negativeBinomial, double x) -{ +double CTools::safeCdf(const negative_binomial& negativeBinomial, double x) { return discreteSafeCdf(allowOverflow(negativeBinomial), x); } -double CTools::safeCdf(const lognormal &logNormal, double x) -{ +double CTools::safeCdf(const lognormal& logNormal, double x) { return continuousSafeCdf(allowOverflow(logNormal), x); } -double CTools::safeCdf(const gamma &gamma_, double x) -{ +double CTools::safeCdf(const gamma& gamma_, double x) { return continuousSafeCdf(allowOverflow(gamma_), x); } -double CTools::safeCdf(const beta &beta_, double x) -{ +double CTools::safeCdf(const beta& beta_, double x) { return continuousSafeCdf(allowOverflow(beta_), x); } -double CTools::safeCdf(const binomial &binomial_, double x) -{ +double CTools::safeCdf(const binomial& binomial_, double x) { return discreteSafeCdf(allowOverflow(binomial_), x); } -double CTools::safeCdf(const chi_squared &chi2, double x) -{ +double CTools::safeCdf(const chi_squared& chi2, double x) { return continuousSafeCdf(allowOverflow(chi2), x); } - //////// safeCdfComplement Implementation //////// -double CTools::safeCdfComplement(const normal &normal_, double x) -{ +double CTools::safeCdfComplement(const normal& normal_, double x) { return continuousSafeCdfComplement(allowOverflow(normal_), x); } -double CTools::safeCdfComplement(const students_t &students, double x) -{ +double CTools::safeCdfComplement(const students_t& students, double x) { return continuousSafeCdfComplement(allowOverflow(students), x); } -double CTools::safeCdfComplement(const poisson &poisson_, double x) -{ +double CTools::safeCdfComplement(const poisson& poisson_, double x) { return discreteSafeCdfComplement(allowOverflow(poisson_), x); } -double CTools::safeCdfComplement(const negative_binomial &negativeBinomial, double x) -{ +double CTools::safeCdfComplement(const negative_binomial& negativeBinomial, double x) { return discreteSafeCdfComplement(allowOverflow(negativeBinomial), x); } -double CTools::safeCdfComplement(const lognormal &logNormal, double x) -{ +double CTools::safeCdfComplement(const lognormal& logNormal, double x) { return continuousSafeCdfComplement(allowOverflow(logNormal), x); } -double CTools::safeCdfComplement(const gamma &gamma_, double x) -{ +double CTools::safeCdfComplement(const gamma& gamma_, double x) { return continuousSafeCdfComplement(allowOverflow(gamma_), x); } -double CTools::safeCdfComplement(const beta &beta_, double x) -{ +double CTools::safeCdfComplement(const beta& beta_, double x) { return continuousSafeCdfComplement(allowOverflow(beta_), x); } -double CTools::safeCdfComplement(const binomial &binomial_, double x) -{ +double CTools::safeCdfComplement(const binomial& binomial_, double x) { return discreteSafeCdfComplement(allowOverflow(binomial_), x); } -double CTools::safeCdfComplement(const chi_squared &chi2, double x) -{ +double CTools::safeCdfComplement(const chi_squared& chi2, double x) { return continuousSafeCdfComplement(allowOverflow(chi2), x); } - //////// deviation Implementation //////// -namespace -{ +namespace { const double SMALL_PROBABILITY_ANOMALY_SCORE = 1.0; const double MINUSCULE_PROBABILITY_ANOMALY_SCORE = 50.0; const double MAX_ANOMALY_SCORE = 100.0; @@ -2077,104 +1649,77 @@ const double MINUS_LOG_SMALL_PROBABILITY = -std::log(SMALL_PROBABILITY); const double MINUS_LOG_MINUSCULE_PROBABILITY = -std::log(MINUSCULE_PROBABILITY); } -double CTools::anomalyScore(double p) -{ +double CTools::anomalyScore(double p) { const double MINUS_LOG_SMALLEST_PROBABILITY = -std::log(smallestProbability()); double result = 0.0; double adjP = std::max(p, smallestProbability()); - if (adjP < LARGEST_SIGNIFICANT_PROBABILITY) - { - if (adjP >= SMALL_PROBABILITY) - { + if (adjP < LARGEST_SIGNIFICANT_PROBABILITY) { + if (adjP >= SMALL_PROBABILITY) { // We use a linear scaling based on the inverse probability // into the range (0.0, 1.0]. - result = SMALL_PROBABILITY_ANOMALY_SCORE - * (1.0 / adjP - INV_LARGEST_SIGNIFICANT_PROBABILITY) - / (INV_SMALL_PROBABILITY - INV_LARGEST_SIGNIFICANT_PROBABILITY); - } - else if (adjP >= MINUSCULE_PROBABILITY) - { + result = SMALL_PROBABILITY_ANOMALY_SCORE * (1.0 / adjP - INV_LARGEST_SIGNIFICANT_PROBABILITY) / + (INV_SMALL_PROBABILITY - INV_LARGEST_SIGNIFICANT_PROBABILITY); + } else if (adjP >= MINUSCULE_PROBABILITY) { // We use a linear scaling based on the log probability into // the range (1.0, 50.0]. - result = SMALL_PROBABILITY_ANOMALY_SCORE - + (MINUSCULE_PROBABILITY_ANOMALY_SCORE - SMALL_PROBABILITY_ANOMALY_SCORE) - * (-std::log(adjP) - MINUS_LOG_SMALL_PROBABILITY) - / (MINUS_LOG_MINUSCULE_PROBABILITY - MINUS_LOG_SMALL_PROBABILITY); - } - else - { + result = SMALL_PROBABILITY_ANOMALY_SCORE + (MINUSCULE_PROBABILITY_ANOMALY_SCORE - SMALL_PROBABILITY_ANOMALY_SCORE) * + (-std::log(adjP) - MINUS_LOG_SMALL_PROBABILITY) / + (MINUS_LOG_MINUSCULE_PROBABILITY - MINUS_LOG_SMALL_PROBABILITY); + } else { // We use a linear scaling based on the log probability into // the range (50.0, 100.0]. - result = MINUSCULE_PROBABILITY_ANOMALY_SCORE - + (MAX_ANOMALY_SCORE - MINUSCULE_PROBABILITY_ANOMALY_SCORE) - * (-std::log(adjP) - MINUS_LOG_MINUSCULE_PROBABILITY) - / (MINUS_LOG_SMALLEST_PROBABILITY - MINUS_LOG_MINUSCULE_PROBABILITY); + result = MINUSCULE_PROBABILITY_ANOMALY_SCORE + (MAX_ANOMALY_SCORE - MINUSCULE_PROBABILITY_ANOMALY_SCORE) * + (-std::log(adjP) - MINUS_LOG_MINUSCULE_PROBABILITY) / + (MINUS_LOG_SMALLEST_PROBABILITY - MINUS_LOG_MINUSCULE_PROBABILITY); } } - if (!(result >= 0.0 && result <= MAX_ANOMALY_SCORE)) - { + if (!(result >= 0.0 && result <= MAX_ANOMALY_SCORE)) { LOG_ERROR("Deviation " << result << " out of range, p =" << p); } return result; } -double CTools::inverseAnomalyScore(double deviation) -{ +double CTools::inverseAnomalyScore(double deviation) { const double MINUS_LOG_SMALLEST_PROBABILITY = -std::log(smallestProbability()); double result = 0.0; double adjDeviation = truncate(deviation, 0.0, MAX_ANOMALY_SCORE); - if (adjDeviation == 0.0) - { + if (adjDeviation == 0.0) { result = (1.0 + LARGEST_SIGNIFICANT_PROBABILITY) / 2.0; - } - else if (adjDeviation <= SMALL_PROBABILITY_ANOMALY_SCORE) - { + } else if (adjDeviation <= SMALL_PROBABILITY_ANOMALY_SCORE) { // We invert the linear scaling of the inverse probability // into the range (0.0, 1.0]. - result = 1.0 / (INV_LARGEST_SIGNIFICANT_PROBABILITY - + (INV_SMALL_PROBABILITY - INV_LARGEST_SIGNIFICANT_PROBABILITY) - * deviation - / SMALL_PROBABILITY_ANOMALY_SCORE); - } - else if (adjDeviation <= MINUSCULE_PROBABILITY_ANOMALY_SCORE) - { + result = 1.0 / (INV_LARGEST_SIGNIFICANT_PROBABILITY + + (INV_SMALL_PROBABILITY - INV_LARGEST_SIGNIFICANT_PROBABILITY) * deviation / SMALL_PROBABILITY_ANOMALY_SCORE); + } else if (adjDeviation <= MINUSCULE_PROBABILITY_ANOMALY_SCORE) { // We invert the linear scaling of the log probability // into the range (1.0, 50.0]. - result = std::exp(-(MINUS_LOG_SMALL_PROBABILITY - + (MINUS_LOG_MINUSCULE_PROBABILITY - MINUS_LOG_SMALL_PROBABILITY) - * (deviation - SMALL_PROBABILITY_ANOMALY_SCORE) - / (MINUSCULE_PROBABILITY_ANOMALY_SCORE - SMALL_PROBABILITY_ANOMALY_SCORE))); - } - else - { + result = std::exp(-(MINUS_LOG_SMALL_PROBABILITY + (MINUS_LOG_MINUSCULE_PROBABILITY - MINUS_LOG_SMALL_PROBABILITY) * + (deviation - SMALL_PROBABILITY_ANOMALY_SCORE) / + (MINUSCULE_PROBABILITY_ANOMALY_SCORE - SMALL_PROBABILITY_ANOMALY_SCORE))); + } else { // We invert the linear scaling of the log probability // into the range (50.0, 100.0]. - result = std::exp(-(MINUS_LOG_MINUSCULE_PROBABILITY - + (MINUS_LOG_SMALLEST_PROBABILITY - MINUS_LOG_MINUSCULE_PROBABILITY) - * (deviation - MINUSCULE_PROBABILITY_ANOMALY_SCORE) - / (MAX_ANOMALY_SCORE - MINUSCULE_PROBABILITY_ANOMALY_SCORE))); + result = std::exp(-(MINUS_LOG_MINUSCULE_PROBABILITY + (MINUS_LOG_SMALLEST_PROBABILITY - MINUS_LOG_MINUSCULE_PROBABILITY) * + (deviation - MINUSCULE_PROBABILITY_ANOMALY_SCORE) / + (MAX_ANOMALY_SCORE - MINUSCULE_PROBABILITY_ANOMALY_SCORE))); } - if (!(result >= 0.0 && result <= 1.0)) - { - LOG_ERROR("Probability " << result - << " out of range, deviation =" << deviation); + if (!(result >= 0.0 && result <= 1.0)) { + LOG_ERROR("Probability " << result << " out of range, deviation =" << deviation); } return result; } - //////// differentialEntropy Implementation //////// -double CTools::differentialEntropy(const poisson &poisson_) -{ +double CTools::differentialEntropy(const poisson& poisson_) { // Approximate as sum over [mean - 5 * std, mean + 5 * std]. double mean = boost::math::mean(poisson_); @@ -2185,8 +1730,7 @@ double CTools::differentialEntropy(const poisson &poisson_) double result = 0.0; - for (unsigned int x = a; x <= b; ++x) - { + for (unsigned int x = a; x <= b; ++x) { double pdf = safePdf(poisson_, x); result -= log(pdf) * pdf; } @@ -2194,21 +1738,17 @@ double CTools::differentialEntropy(const poisson &poisson_) return result; } -double CTools::differentialEntropy(const normal &normal_) -{ +double CTools::differentialEntropy(const normal& normal_) { // Equals log(2 * pi * e * v) / 2 // // where, // m is the mean and variance of the normal distribution. double variance = boost::math::variance(normal_); - return 0.5 * std::log( boost::math::double_constants::two_pi - * boost::math::double_constants::e - * variance); + return 0.5 * std::log(boost::math::double_constants::two_pi * boost::math::double_constants::e * variance); } -double CTools::differentialEntropy(const lognormal &logNormal) -{ +double CTools::differentialEntropy(const lognormal& logNormal) { // Equals log(2 * pi * e * v) / 2 + m. // // where, @@ -2217,13 +1757,10 @@ double CTools::differentialEntropy(const lognormal &logNormal) double location = logNormal.location(); double scale = logNormal.scale(); - return 0.5 * std::log( boost::math::double_constants::two_pi - * boost::math::double_constants::e - * pow2(scale)) + location; + return 0.5 * std::log(boost::math::double_constants::two_pi * boost::math::double_constants::e * pow2(scale)) + location; } -double CTools::differentialEntropy(const gamma &gamma_) -{ +double CTools::differentialEntropy(const gamma& gamma_) { // Equals k + log(t) + log(g(k)) + (1 - k) * f(k) // // where, @@ -2233,16 +1770,12 @@ double CTools::differentialEntropy(const gamma &gamma_) double shape = gamma_.shape(); double scale = gamma_.scale(); - return shape + std::log(scale) - + boost::math::lgamma(shape) - + (1 - shape) * boost::math::digamma(shape); + return shape + std::log(scale) + boost::math::lgamma(shape) + (1 - shape) * boost::math::digamma(shape); } - //////// CGroup Implementation //////// -void CTools::CGroup::merge(const CGroup &other, double separation, double min, double max) -{ +void CTools::CGroup::merge(const CGroup& other, double separation, double min, double max) { m_A = std::min(m_A, other.m_A); m_B = std::max(m_B, other.m_B); @@ -2255,72 +1788,49 @@ void CTools::CGroup::merge(const CGroup &other, double separation, double min, d m_Centre.s_Moments[0] += std::min(max - r, 0.0); } -bool CTools::CGroup::overlap(const CGroup &other, double separation) const -{ +bool CTools::CGroup::overlap(const CGroup& other, double separation) const { const double TOL{1.0 + EPSILON}; double ll{this->leftEndpoint(separation)}; double lr{this->rightEndpoint(separation)}; double rl{other.leftEndpoint(separation)}; double rr{other.rightEndpoint(separation)}; - return !( TOL * (lr + separation) <= rl - || ll >= TOL * (rr + separation) - || TOL * (rr + separation) <= ll - || rl >= TOL * (lr + separation)); + return !(TOL * (lr + separation) <= rl || ll >= TOL * (rr + separation) || TOL * (rr + separation) <= ll || + rl >= TOL * (lr + separation)); } -double CTools::CGroup::leftEndpoint(double separation) const -{ - return CBasicStatistics::mean(m_Centre) - - static_cast(m_B - m_A) * separation / 2.0; +double CTools::CGroup::leftEndpoint(double separation) const { + return CBasicStatistics::mean(m_Centre) - static_cast(m_B - m_A) * separation / 2.0; } -double CTools::CGroup::rightEndpoint(double separation) const -{ - return CBasicStatistics::mean(m_Centre) - + static_cast(m_B - m_A) * separation / 2.0; +double CTools::CGroup::rightEndpoint(double separation) const { + return CBasicStatistics::mean(m_Centre) + static_cast(m_B - m_A) * separation / 2.0; } - const CTools::CLookupTableForFastLog CTools::FAST_LOG_TABLE; - //////// Miscellaneous Implementations //////// -namespace -{ +namespace { const double EPS{0.1}; -const double COEFFS[] - { - -1.0, - +1.0 / 2.0, - -1.0 / 6.0, - +1.0 / 24.0, - -1.0 / 120.0, - +1.0 / 720.0 - }; +const double COEFFS[]{-1.0, +1.0 / 2.0, -1.0 / 6.0, +1.0 / 24.0, -1.0 / 120.0, +1.0 / 720.0}; const std::size_t N{boost::size(COEFFS)}; } -double CTools::shiftLeft(double x, double eps) -{ - if (x == NEG_INF) - { +double CTools::shiftLeft(double x, double eps) { + if (x == NEG_INF) { return x; } return (x < 0.0 ? 1.0 + eps : 1.0 - eps) * x; } -double CTools::shiftRight(double x, double eps) -{ - if (x == POS_INF) - { +double CTools::shiftRight(double x, double eps) { + if (x == POS_INF) { return x; } return (x < 0.0 ? 1.0 - eps : 1.0 + eps) * x; } -double CTools::powOneMinusX(double x, double p) -{ +double CTools::powOneMinusX(double x, double p) { // For large p, // (1 - x) ^ p ~= exp(-p * x). // @@ -2331,34 +1841,27 @@ double CTools::powOneMinusX(double x, double p) // // and canceling the leading terms. - if (x == 1.0) - { + if (x == 1.0) { return 0.0; } - if (p == 1.0) - { + if (p == 1.0) { return 1.0 - x; } double y = p * x; - if (std::fabs(y) < EPS) - { + if (std::fabs(y) < EPS) { double remainder = 0.0; double ti = 1.0; - for (std::size_t i = 0u; i < N && p != 0.0; ++i, p -= 1.0) - { + for (std::size_t i = 0u; i < N && p != 0.0; ++i, p -= 1.0) { ti *= p * x; remainder += COEFFS[i] * ti; } return 1.0 + remainder; - } - else if (p > 1000.0) - { + } else if (p > 1000.0) { return std::exp(-y); } - if (x > 1.0) - { + if (x > 1.0) { double sign = static_cast(p) % 2 ? -1.0 : 1.0; return sign * std::exp(p * std::log(x - 1.0)); } @@ -2366,8 +1869,7 @@ double CTools::powOneMinusX(double x, double p) return std::exp(p * std::log(1.0 - x)); } -double CTools::oneMinusPowOneMinusX(double x, double p) -{ +double CTools::oneMinusPowOneMinusX(double x, double p) { // For large p, // (1 - x) ^ p ~= exp(-p * x). // @@ -2382,34 +1884,27 @@ double CTools::oneMinusPowOneMinusX(double x, double p) // // when p * x is small. - if (x == 1.0) - { + if (x == 1.0) { return 1.0; } - if (p == 1.0) - { + if (p == 1.0) { return x; } double y = p * x; - if (std::fabs(y) < EPS) - { + if (std::fabs(y) < EPS) { double result = 0.0; double ti = 1.0; - for (std::size_t i = 0u; i < N && p != 0.0; ++i, p -= 1.0) - { + for (std::size_t i = 0u; i < N && p != 0.0; ++i, p -= 1.0) { ti *= p * x; result -= COEFFS[i] * ti; } return result; - } - else if (p > 1000.0) - { + } else if (p > 1000.0) { return 1.0 - std::exp(-y); } - if (x > 1.0) - { + if (x > 1.0) { double sign = static_cast(p) % 2 ? -1.0 : 1.0; return 1.0 - sign * std::exp(p * std::log(x - 1.0)); } @@ -2417,25 +1912,19 @@ double CTools::oneMinusPowOneMinusX(double x, double p) return 1.0 - std::exp(p * std::log(1.0 - x)); } -double CTools::logOneMinusX(double x) -{ +double CTools::logOneMinusX(double x) { double result = 0.0; - if (std::fabs(x) < EPS) - { + if (std::fabs(x) < EPS) { double xi = -x; - for (std::size_t i = 0u; i < 6; ++i, xi *= -x) - { + for (std::size_t i = 0u; i < 6; ++i, xi *= -x) { result += xi / static_cast(i + 1); } - } - else - { + } else { result = std::log(1.0 - x); } return result; } - } } diff --git a/lib/maths/CTrendComponent.cc b/lib/maths/CTrendComponent.cc index a661cb61a8..d09a7cc77e 100644 --- a/lib/maths/CTrendComponent.cc +++ b/lib/maths/CTrendComponent.cc @@ -6,9 +6,9 @@ #include -#include #include #include +#include #include #include @@ -19,20 +19,17 @@ #include #include -#include #include +#include #include #include #include #include -namespace ml -{ -namespace maths -{ -namespace -{ +namespace ml { +namespace maths { +namespace { using TOptionalDoubleDoublePr = boost::optional>; const double TIME_SCALES[]{144.0, 72.0, 36.0, 12.0, 4.0, 1.0, 0.25, 0.05}; @@ -43,52 +40,39 @@ const std::size_t NO_CHANGE_LABEL{0}; const std::size_t LEVEL_CHANGE_LABEL{1}; //! Get the desired weight for the regression model. -double modelWeight(double targetDecayRate, double modelDecayRate) -{ - return targetDecayRate == modelDecayRate ? - 1.0 : std::min(targetDecayRate, modelDecayRate) - / std::max(targetDecayRate, modelDecayRate); +double modelWeight(double targetDecayRate, double modelDecayRate) { + return targetDecayRate == modelDecayRate ? 1.0 : std::min(targetDecayRate, modelDecayRate) / std::max(targetDecayRate, modelDecayRate); } //! We scale the time used for the regression model to improve //! the condition of the design matrix. -double scaleTime(core_t::TTime time, core_t::TTime origin) -{ +double scaleTime(core_t::TTime time, core_t::TTime origin) { return static_cast(time - origin) / static_cast(core::constants::WEEK); } //! Get the \p confidence interval for \p prediction and \p variance. -TOptionalDoubleDoublePr confidenceInterval(double prediction, - double variance, - double confidence) -{ - try - { +TOptionalDoubleDoublePr confidenceInterval(double prediction, double variance, double confidence) { + try { boost::math::normal normal{prediction, std::sqrt(variance)}; double ql{boost::math::quantile(normal, (100.0 - confidence) / 200.0)}; double qu{boost::math::quantile(normal, (100.0 + confidence) / 200.0)}; return std::make_pair(ql, qu); - } - catch (const std::exception &e) - { - LOG_ERROR("Failed calculating confidence interval: " << e.what() - << ", prediction = " << prediction - << ", variance = " << variance - << ", confidence = " << confidence); + } catch (const std::exception& e) { + LOG_ERROR("Failed calculating confidence interval: " << e.what() << ", prediction = " << prediction << ", variance = " << variance + << ", confidence = " << confidence); } return TOptionalDoubleDoublePr{}; } -CNaiveBayes initialProbabilityOfChangeModel(double decayRate) -{ +CNaiveBayes initialProbabilityOfChangeModel(double decayRate) { decayRate *= TIME_SCALES[NUMBER_MODELS - 1]; - return CNaiveBayes{CNaiveBayesFeatureDensityFromPrior{ - CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, - decayRate)}, decayRate, -20.0}; + return CNaiveBayes{ + CNaiveBayesFeatureDensityFromPrior{CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, decayRate)}, + decayRate, + -20.0}; } -CNormalMeanPrecConjugate initialMagnitudeOfChangeModel(double decayRate) -{ +CNormalMeanPrecConjugate initialMagnitudeOfChangeModel(double decayRate) { return CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, decayRate); } @@ -105,28 +89,24 @@ const std::string MAGNITUDE_OF_LEVEL_CHANGE_MODEL_TAG{"j"}; const std::string WEIGHT_TAG{"a"}; const std::string REGRESSION_TAG{"b"}; const std::string RESIDUAL_MOMENTS_TAG{"c"}; - } -CTrendComponent::CTrendComponent(double decayRate) : - m_DefaultDecayRate(decayRate), - m_TargetDecayRate(decayRate), - m_FirstUpdate(UNSET_TIME), - m_LastUpdate(UNSET_TIME), - m_RegressionOrigin(UNSET_TIME), - m_PredictionErrorVariance(0.0), - m_TimeOfLastLevelChange(UNSET_TIME), - m_ProbabilityOfLevelChangeModel(initialProbabilityOfChangeModel(decayRate)), - m_MagnitudeOfLevelChangeModel(initialMagnitudeOfChangeModel(decayRate)) -{ - for (std::size_t i = 0u; i < NUMBER_MODELS; ++i) - { +CTrendComponent::CTrendComponent(double decayRate) + : m_DefaultDecayRate(decayRate), + m_TargetDecayRate(decayRate), + m_FirstUpdate(UNSET_TIME), + m_LastUpdate(UNSET_TIME), + m_RegressionOrigin(UNSET_TIME), + m_PredictionErrorVariance(0.0), + m_TimeOfLastLevelChange(UNSET_TIME), + m_ProbabilityOfLevelChangeModel(initialProbabilityOfChangeModel(decayRate)), + m_MagnitudeOfLevelChangeModel(initialMagnitudeOfChangeModel(decayRate)) { + for (std::size_t i = 0u; i < NUMBER_MODELS; ++i) { m_TrendModels.emplace_back(modelWeight(1.0, TIME_SCALES[i])); } } -void CTrendComponent::swap(CTrendComponent &other) -{ +void CTrendComponent::swap(CTrendComponent& other) { std::swap(m_DefaultDecayRate, other.m_DefaultDecayRate); std::swap(m_TargetDecayRate, other.m_TargetDecayRate); std::swap(m_FirstUpdate, other.m_FirstUpdate); @@ -140,66 +120,50 @@ void CTrendComponent::swap(CTrendComponent &other) m_MagnitudeOfLevelChangeModel.swap(other.m_MagnitudeOfLevelChangeModel); } -void CTrendComponent::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CTrendComponent::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(TARGET_DECAY_RATE_TAG, m_TargetDecayRate); inserter.insertValue(FIRST_UPDATE_TAG, m_FirstUpdate); inserter.insertValue(LAST_UPDATE_TAG, m_LastUpdate); inserter.insertValue(REGRESSION_ORIGIN_TAG, m_RegressionOrigin); - for (const auto &model : m_TrendModels) - { + for (const auto& model : m_TrendModels) { inserter.insertLevel(MODEL_TAG, boost::bind(&SModel::acceptPersistInserter, &model, _1)); } - inserter.insertValue(PREDICTION_ERROR_VARIANCE_TAG, - m_PredictionErrorVariance, - core::CIEEE754::E_DoublePrecision); + inserter.insertValue(PREDICTION_ERROR_VARIANCE_TAG, m_PredictionErrorVariance, core::CIEEE754::E_DoublePrecision); inserter.insertValue(VALUE_MOMENTS_TAG, m_ValueMoments.toDelimited()); inserter.insertValue(TIME_OF_LAST_LEVEL_CHANGE_TAG, m_TimeOfLastLevelChange); inserter.insertLevel(PROBABILITY_OF_LEVEL_CHANGE_MODEL_TAG, - boost::bind(&CNaiveBayes::acceptPersistInserter, - &m_ProbabilityOfLevelChangeModel, _1)); + boost::bind(&CNaiveBayes::acceptPersistInserter, &m_ProbabilityOfLevelChangeModel, _1)); inserter.insertLevel(MAGNITUDE_OF_LEVEL_CHANGE_MODEL_TAG, - boost::bind(&CNormalMeanPrecConjugate::acceptPersistInserter, - &m_MagnitudeOfLevelChangeModel, _1)); + boost::bind(&CNormalMeanPrecConjugate::acceptPersistInserter, &m_MagnitudeOfLevelChangeModel, _1)); } -bool CTrendComponent::acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser) -{ +bool CTrendComponent::acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) { std::size_t i{0}; - do - { - const std::string &name{traverser.name()}; + do { + const std::string& name{traverser.name()}; RESTORE_BUILT_IN(TARGET_DECAY_RATE_TAG, m_TargetDecayRate) RESTORE_BUILT_IN(FIRST_UPDATE_TAG, m_FirstUpdate) RESTORE_BUILT_IN(LAST_UPDATE_TAG, m_LastUpdate) RESTORE_BUILT_IN(REGRESSION_ORIGIN_TAG, m_RegressionOrigin) - RESTORE(MODEL_TAG, traverser.traverseSubLevel(boost::bind( - &SModel::acceptRestoreTraverser, &m_TrendModels[i++], _1))) + RESTORE(MODEL_TAG, traverser.traverseSubLevel(boost::bind(&SModel::acceptRestoreTraverser, &m_TrendModels[i++], _1))) RESTORE_BUILT_IN(PREDICTION_ERROR_VARIANCE_TAG, m_PredictionErrorVariance) RESTORE(VALUE_MOMENTS_TAG, m_ValueMoments.fromDelimited(traverser.value())) RESTORE_BUILT_IN(TIME_OF_LAST_LEVEL_CHANGE_TAG, m_TimeOfLastLevelChange) - RESTORE_NO_ERROR(PROBABILITY_OF_LEVEL_CHANGE_MODEL_TAG, - m_ProbabilityOfLevelChangeModel = CNaiveBayes(params, traverser)) - RESTORE_NO_ERROR(MAGNITUDE_OF_LEVEL_CHANGE_MODEL_TAG, - m_MagnitudeOfLevelChangeModel = CNormalMeanPrecConjugate(params, traverser)) - } - while (traverser.next()); + RESTORE_NO_ERROR(PROBABILITY_OF_LEVEL_CHANGE_MODEL_TAG, m_ProbabilityOfLevelChangeModel = CNaiveBayes(params, traverser)) + RESTORE_NO_ERROR(MAGNITUDE_OF_LEVEL_CHANGE_MODEL_TAG, m_MagnitudeOfLevelChangeModel = CNormalMeanPrecConjugate(params, traverser)) + } while (traverser.next()); return true; } -bool CTrendComponent::initialized() const -{ +bool CTrendComponent::initialized() const { return m_LastUpdate != UNSET_TIME; } -void CTrendComponent::clear() -{ +void CTrendComponent::clear() { m_FirstUpdate = UNSET_TIME; - m_LastUpdate = UNSET_TIME; + m_LastUpdate = UNSET_TIME; m_RegressionOrigin = UNSET_TIME; - for (std::size_t i = 0u; i < NUMBER_MODELS; ++i) - { + for (std::size_t i = 0u; i < NUMBER_MODELS; ++i) { m_TrendModels[i] = SModel(modelWeight(1.0, TIME_SCALES[i])); } m_PredictionErrorVariance = 0.0; @@ -209,38 +173,29 @@ void CTrendComponent::clear() m_MagnitudeOfLevelChangeModel = initialMagnitudeOfChangeModel(m_DefaultDecayRate); } -void CTrendComponent::shiftOrigin(core_t::TTime time) -{ +void CTrendComponent::shiftOrigin(core_t::TTime time) { time = CIntegerTools::floor(time, core::constants::WEEK); double scaledShift{scaleTime(time, m_RegressionOrigin)}; - if (scaledShift > 0.0) - { - for (auto &model : m_TrendModels) - { + if (scaledShift > 0.0) { + for (auto& model : m_TrendModels) { model.s_Regression.shiftAbscissa(-scaledShift); } m_RegressionOrigin = time; } } -void CTrendComponent::shiftSlope(double decayRate, double shift) -{ - for (std::size_t i = 0u; i < NUMBER_MODELS; ++i) - { - double shift_{std::min(m_DefaultDecayRate * TIME_SCALES[i] - / decayRate, 1.0) * shift}; +void CTrendComponent::shiftSlope(double decayRate, double shift) { + for (std::size_t i = 0u; i < NUMBER_MODELS; ++i) { + double shift_{std::min(m_DefaultDecayRate * TIME_SCALES[i] / decayRate, 1.0) * shift}; m_TrendModels[i].s_Regression.shiftGradient(shift_); } } -void CTrendComponent::shiftLevel(core_t::TTime time, double value, double shift) -{ - for (auto &model : m_TrendModels) - { +void CTrendComponent::shiftLevel(core_t::TTime time, double value, double shift) { + for (auto& model : m_TrendModels) { model.s_Regression.shiftOrdinate(shift); } - if (m_TimeOfLastLevelChange != UNSET_TIME) - { + if (m_TimeOfLastLevelChange != UNSET_TIME) { double dt{static_cast(time - m_TimeOfLastLevelChange)}; m_ProbabilityOfLevelChangeModel.addTrainingDataPoint(LEVEL_CHANGE_LABEL, {{dt}, {value}}); } @@ -248,101 +203,81 @@ void CTrendComponent::shiftLevel(core_t::TTime time, double value, double shift) m_TimeOfLastLevelChange = time; } -void CTrendComponent::dontShiftLevel(core_t::TTime time, double value) -{ - if (m_TimeOfLastLevelChange != UNSET_TIME) - { +void CTrendComponent::dontShiftLevel(core_t::TTime time, double value) { + if (m_TimeOfLastLevelChange != UNSET_TIME) { double dt{static_cast(time - m_TimeOfLastLevelChange)}; m_ProbabilityOfLevelChangeModel.addTrainingDataPoint(NO_CHANGE_LABEL, {{dt}, {value}}); } } -void CTrendComponent::linearScale(double scale) -{ - for (auto &model : m_TrendModels) - { +void CTrendComponent::linearScale(double scale) { + for (auto& model : m_TrendModels) { model.s_Regression.linearScale(scale); } } -void CTrendComponent::add(core_t::TTime time, double value, double weight) -{ +void CTrendComponent::add(core_t::TTime time, double value, double weight) { // Update the model weights: we weight the components based on the // relative difference in the component scale and the target scale. - for (std::size_t i = 0u; i < NUMBER_MODELS; ++i) - { - m_TrendModels[i].s_Weight.add(modelWeight(m_TargetDecayRate, - m_DefaultDecayRate * TIME_SCALES[i])); + for (std::size_t i = 0u; i < NUMBER_MODELS; ++i) { + m_TrendModels[i].s_Weight.add(modelWeight(m_TargetDecayRate, m_DefaultDecayRate * TIME_SCALES[i])); } // Update the models. - if (m_FirstUpdate == UNSET_TIME) - { + if (m_FirstUpdate == UNSET_TIME) { m_RegressionOrigin = CIntegerTools::floor(time, core::constants::WEEK); } double prediction{CBasicStatistics::mean(this->value(time, 0.0))}; double count{this->count()}; - if (count > 0.0) - { - TMeanVarAccumulator moments{CBasicStatistics::accumulator( - count, prediction, m_PredictionErrorVariance)}; + if (count > 0.0) { + TMeanVarAccumulator moments{CBasicStatistics::accumulator(count, prediction, m_PredictionErrorVariance)}; moments.add(value, weight); m_PredictionErrorVariance = CBasicStatistics::maximumLikelihoodVariance(moments); } double scaledTime{scaleTime(time, m_RegressionOrigin)}; - for (auto &model : m_TrendModels) - { + for (auto& model : m_TrendModels) { model.s_Regression.add(scaledTime, value, weight); - model.s_ResidualMoments.add( - value - model.s_Regression.predict(scaledTime, MAX_CONDITION)); + model.s_ResidualMoments.add(value - model.s_Regression.predict(scaledTime, MAX_CONDITION)); } m_ValueMoments.add(value); m_FirstUpdate = m_FirstUpdate == UNSET_TIME ? time : std::min(m_FirstUpdate, time); - m_LastUpdate = std::max(m_LastUpdate, time); + m_LastUpdate = std::max(m_LastUpdate, time); } -void CTrendComponent::dataType(maths_t::EDataType dataType) -{ +void CTrendComponent::dataType(maths_t::EDataType dataType) { m_ProbabilityOfLevelChangeModel.dataType(dataType); m_MagnitudeOfLevelChangeModel.dataType(dataType); } -double CTrendComponent::defaultDecayRate() const -{ +double CTrendComponent::defaultDecayRate() const { return m_DefaultDecayRate; } -void CTrendComponent::decayRate(double decayRate) -{ +void CTrendComponent::decayRate(double decayRate) { m_TargetDecayRate = decayRate; } -void CTrendComponent::propagateForwardsByTime(core_t::TTime interval) -{ +void CTrendComponent::propagateForwardsByTime(core_t::TTime interval) { TDoubleVec factors(this->factors(interval)); double median{CBasicStatistics::median(factors)}; - for (std::size_t i = 0u; i < NUMBER_MODELS; ++i) - { + for (std::size_t i = 0u; i < NUMBER_MODELS; ++i) { m_TrendModels[i].s_Weight.age(median); m_TrendModels[i].s_Regression.age(factors[i]); m_TrendModels[i].s_ResidualMoments.age(std::sqrt(factors[i])); } - double interval_{ static_cast(interval) - / static_cast(core::constants::DAY)}; + double interval_{static_cast(interval) / static_cast(core::constants::DAY)}; m_ProbabilityOfLevelChangeModel.propagateForwardsByTime(interval_); m_MagnitudeOfLevelChangeModel.propagateForwardsByTime(interval_); } -CTrendComponent::TDoubleDoublePr CTrendComponent::value(core_t::TTime time, double confidence) const -{ - if (!this->initialized()) - { +CTrendComponent::TDoubleDoublePr CTrendComponent::value(core_t::TTime time, double confidence) const { + if (!this->initialized()) { return {0.0, 0.0}; } @@ -353,24 +288,18 @@ CTrendComponent::TDoubleDoublePr CTrendComponent::value(core_t::TTime time, doub TMeanAccumulator prediction_; { TDoubleVec factors(this->factors(std::abs(time - m_LastUpdate))); - for (std::size_t i = 0u; i < NUMBER_MODELS; ++i) - { + for (std::size_t i = 0u; i < NUMBER_MODELS; ++i) { prediction_.add(m_TrendModels[i].s_Regression.predict(scaledTime, MAX_CONDITION), factors[i] * CBasicStatistics::mean(m_TrendModels[i].s_Weight)); } } - double prediction{ a * CBasicStatistics::mean(prediction_) - + b * CBasicStatistics::mean(m_ValueMoments)}; + double prediction{a * CBasicStatistics::mean(prediction_) + b * CBasicStatistics::mean(m_ValueMoments)}; - if (confidence > 0.0 && m_PredictionErrorVariance > 0.0) - { - double variance{ a * m_PredictionErrorVariance - / std::max(this->count(), 1.0) - + b * CBasicStatistics::variance(m_ValueMoments) - / std::max(CBasicStatistics::count(m_ValueMoments), 1.0)}; - if (auto interval = confidenceInterval(prediction, variance, confidence)) - { + if (confidence > 0.0 && m_PredictionErrorVariance > 0.0) { + double variance{a * m_PredictionErrorVariance / std::max(this->count(), 1.0) + + b * CBasicStatistics::variance(m_ValueMoments) / std::max(CBasicStatistics::count(m_ValueMoments), 1.0)}; + if (auto interval = confidenceInterval(prediction, variance, confidence)) { return *interval; } } @@ -378,30 +307,22 @@ CTrendComponent::TDoubleDoublePr CTrendComponent::value(core_t::TTime time, doub return {prediction, prediction}; } -CTrendComponent::TDoubleDoublePr CTrendComponent::variance(double confidence) const -{ - if (!this->initialized()) - { +CTrendComponent::TDoubleDoublePr CTrendComponent::variance(double confidence) const { + if (!this->initialized()) { return {0.0, 0.0}; } double variance{m_PredictionErrorVariance}; - if (confidence > 0.0 && m_PredictionErrorVariance > 0.0) - { + if (confidence > 0.0 && m_PredictionErrorVariance > 0.0) { double df{std::max(this->count(), 2.0) - 1.0}; - try - { + try { boost::math::chi_squared chi{df}; double ql{boost::math::quantile(chi, (100.0 - confidence) / 200.0)}; double qu{boost::math::quantile(chi, (100.0 + confidence) / 200.0)}; return {ql * variance / df, qu * variance / df}; - } - catch (const std::exception &e) - { - LOG_ERROR("Failed calculating confidence interval: " << e.what() - << ", df = " << df - << ", confidence = " << confidence); + } catch (const std::exception& e) { + LOG_ERROR("Failed calculating confidence interval: " << e.what() << ", df = " << df << ", confidence = " << confidence); } } @@ -412,16 +333,13 @@ void CTrendComponent::forecast(core_t::TTime startTime, core_t::TTime endTime, core_t::TTime step, double confidence, - const TSeasonalForecast &seasonal, - const TWriteForecastResult &writer) const -{ - if (endTime < startTime) - { + const TSeasonalForecast& seasonal, + const TWriteForecastResult& writer) const { + if (endTime < startTime) { LOG_ERROR("Bad forecast range: [" << startTime << "," << endTime << "]"); return; } - if (confidence < 0.0 || confidence >= 100.0) - { + if (confidence < 0.0 || confidence >= 100.0) { LOG_ERROR("Bad confidence interval: " << confidence << "%"); return; } @@ -436,52 +354,43 @@ void CTrendComponent::forecast(core_t::TTime startTime, TRegressionArrayVec models(NUMBER_MODELS); TMatrixVec modelCovariances(NUMBER_MODELS); TDoubleVec residualVariances(NUMBER_MODELS); - for (std::size_t i = 0u; i < NUMBER_MODELS; ++i) - { - const SModel &model{m_TrendModels[i]}; + for (std::size_t i = 0u; i < NUMBER_MODELS; ++i) { + const SModel& model{m_TrendModels[i]}; model.s_Regression.parameters(models[i], MAX_CONDITION); - model.s_Regression.covariances(m_PredictionErrorVariance, - modelCovariances[i], MAX_CONDITION); + model.s_Regression.covariances(m_PredictionErrorVariance, modelCovariances[i], MAX_CONDITION); modelCovariances[i] /= std::max(model.s_Regression.count(), 1.0); - residualVariances[i] = CTools::pow2(CBasicStatistics::mean(model.s_ResidualMoments)) - + CBasicStatistics::variance(model.s_ResidualMoments); + residualVariances[i] = + CTools::pow2(CBasicStatistics::mean(model.s_ResidualMoments)) + CBasicStatistics::variance(model.s_ResidualMoments); LOG_TRACE("params = " << core::CContainerPrinter::print(models[i])); LOG_TRACE("covariances = " << modelCovariances[i].toDelimited()) LOG_TRACE("variances = " << residualVariances[i]); } LOG_TRACE("long time variance = " << CBasicStatistics::variance(m_ValueMoments)); - CForecastLevel level{m_ProbabilityOfLevelChangeModel, - m_MagnitudeOfLevelChangeModel, - m_TimeOfLastLevelChange}; + CForecastLevel level{m_ProbabilityOfLevelChangeModel, m_MagnitudeOfLevelChangeModel, m_TimeOfLastLevelChange}; TDoubleVec variances(NUMBER_MODELS + 1); - for (core_t::TTime time = startTime; time < endTime; time += step) - { + for (core_t::TTime time = startTime; time < endTime; time += step) { double scaledDt{scaleTime(time, startTime)}; TVector times({0.0, scaledDt, scaledDt * scaledDt}); double a{this->weightOfPrediction(time)}; double b{1.0 - a}; - for (std::size_t j = 0u; j < NUMBER_MODELS; ++j) - { + for (std::size_t j = 0u; j < NUMBER_MODELS; ++j) { modelWeights[j] *= factors[j]; errorWeights[j] *= CTools::pow2(factors[j]); } - for (std::size_t j = 0u; j < NUMBER_MODELS; ++j) - { + for (std::size_t j = 0u; j < NUMBER_MODELS; ++j) { variances[j] = times.inner(modelCovariances[j] * times) + residualVariances[j]; } variances[NUMBER_MODELS] = CBasicStatistics::variance(m_ValueMoments); - for (auto v = variances.rbegin(); v != variances.rend(); ++v) - { - *v = *std::min_element(variances.rbegin(), v+1); + for (auto v = variances.rbegin(); v != variances.rend(); ++v) { + *v = *std::min_element(variances.rbegin(), v + 1); } TMeanAccumulator variance_; - for (std::size_t j = 0u; j < NUMBER_MODELS; ++j) - { + for (std::size_t j = 0u; j < NUMBER_MODELS; ++j) { variance_.add(variances[j], errorWeights[j]); } @@ -491,31 +400,27 @@ void CTrendComponent::forecast(core_t::TTime startTime, double ql{0.0}; double qu{0.0}; - double variance{ a * CBasicStatistics::mean(variance_) - + b * CBasicStatistics::variance(m_ValueMoments)}; - if (auto interval = confidenceInterval(0.0, variance, confidence)) - { + double variance{a * CBasicStatistics::mean(variance_) + b * CBasicStatistics::variance(m_ValueMoments)}; + if (auto interval = confidenceInterval(0.0, variance, confidence)) { boost::tie(ql, qu) = *interval; } - writer(time, {level_[0] + seasonal_[0] + prediction + ql, - level_[1] + seasonal_[1] + prediction, - level_[2] + seasonal_[2] + prediction + qu}); + writer(time, + {level_[0] + seasonal_[0] + prediction + ql, + level_[1] + seasonal_[1] + prediction, + level_[2] + seasonal_[2] + prediction + qu}); } } -core_t::TTime CTrendComponent::observedInterval() const -{ +core_t::TTime CTrendComponent::observedInterval() const { return m_LastUpdate - m_FirstUpdate; } -double CTrendComponent::parameters() const -{ +double CTrendComponent::parameters() const { return static_cast(TRegression::N); } -uint64_t CTrendComponent::checksum(uint64_t seed) const -{ +uint64_t CTrendComponent::checksum(uint64_t seed) const { seed = CChecksum::calculate(seed, m_TargetDecayRate); seed = CChecksum::calculate(seed, m_FirstUpdate); seed = CChecksum::calculate(seed, m_LastUpdate); @@ -527,13 +432,11 @@ uint64_t CTrendComponent::checksum(uint64_t seed) const return CChecksum::calculate(seed, m_MagnitudeOfLevelChangeModel); } -std::string CTrendComponent::print() const -{ +std::string CTrendComponent::print() const { std::ostringstream result; result << "\n===\n"; result << "Trend Models:"; - for (const auto &model : m_TrendModels) - { + for (const auto& model : m_TrendModels) { result << "\n" << model.s_Regression.print(); } result << "\n===\n"; @@ -545,147 +448,112 @@ std::string CTrendComponent::print() const return result.str(); } -CTrendComponent::TDoubleVec CTrendComponent::factors(core_t::TTime interval) const -{ +CTrendComponent::TDoubleVec CTrendComponent::factors(core_t::TTime interval) const { TDoubleVec result(NUMBER_MODELS); - double factor{ m_DefaultDecayRate - * static_cast(interval) - / static_cast(core::constants::DAY)}; - for (std::size_t i = 0u; i < NUMBER_MODELS; ++i) - { + double factor{m_DefaultDecayRate * static_cast(interval) / static_cast(core::constants::DAY)}; + for (std::size_t i = 0u; i < NUMBER_MODELS; ++i) { result[i] = std::exp(-TIME_SCALES[i] * factor); } return result; } -CTrendComponent::TDoubleVec CTrendComponent::initialForecastModelWeights() const -{ +CTrendComponent::TDoubleVec CTrendComponent::initialForecastModelWeights() const { TDoubleVec result(NUMBER_MODELS); - for (std::size_t i = 0u; i < NUMBER_MODELS; ++i) - { - result[i] = std::exp( static_cast(NUMBER_MODELS / 2) - - static_cast(i)); + for (std::size_t i = 0u; i < NUMBER_MODELS; ++i) { + result[i] = std::exp(static_cast(NUMBER_MODELS / 2) - static_cast(i)); } return result; } -CTrendComponent::TDoubleVec CTrendComponent::initialForecastErrorWeights() const -{ +CTrendComponent::TDoubleVec CTrendComponent::initialForecastErrorWeights() const { TDoubleVec result(NUMBER_MODELS + 1); - for (std::size_t i = 0u; i < NUMBER_MODELS; ++i) - { - result[i] = std::exp( static_cast(NUMBER_MODELS / 2) - - static_cast(i)); + for (std::size_t i = 0u; i < NUMBER_MODELS; ++i) { + result[i] = std::exp(static_cast(NUMBER_MODELS / 2) - static_cast(i)); } result[NUMBER_MODELS] = result[NUMBER_MODELS - 1] / std::exp(1.0); return result; } -double CTrendComponent::count() const -{ +double CTrendComponent::count() const { TMeanAccumulator result; - for (const auto &model : m_TrendModels) - { - result.add(CTools::fastLog(model.s_Regression.count()), - CBasicStatistics::mean(model.s_Weight)); + for (const auto& model : m_TrendModels) { + result.add(CTools::fastLog(model.s_Regression.count()), CBasicStatistics::mean(model.s_Weight)); } return std::exp(CBasicStatistics::mean(result)); } -double CTrendComponent::value(const TDoubleVec &weights, - const TRegressionArrayVec &models, - double time) const -{ +double CTrendComponent::value(const TDoubleVec& weights, const TRegressionArrayVec& models, double time) const { TMeanAccumulator prediction; - for (std::size_t i = 0u; i < models.size(); ++i) - { + for (std::size_t i = 0u; i < models.size(); ++i) { prediction.add(CRegression::predict(models[i], time), weights[i]); } return CBasicStatistics::mean(prediction); } -double CTrendComponent::weightOfPrediction(core_t::TTime time) const -{ +double CTrendComponent::weightOfPrediction(core_t::TTime time) const { double interval{static_cast(m_LastUpdate - m_FirstUpdate)}; - if (interval == 0.0) - { + if (interval == 0.0) { return 0.0; } - double extrapolateInterval{static_cast( - CBasicStatistics::max(time - m_LastUpdate, m_FirstUpdate - time, core_t::TTime(0)))}; - if (extrapolateInterval == 0.0) - { + double extrapolateInterval{static_cast(CBasicStatistics::max(time - m_LastUpdate, m_FirstUpdate - time, core_t::TTime(0)))}; + if (extrapolateInterval == 0.0) { return 1.0; } return CTools::logisticFunction(extrapolateInterval / interval, 0.1, 1.0, -1.0); } -CTrendComponent::SModel::SModel(double weight) -{ +CTrendComponent::SModel::SModel(double weight) { s_Weight.add(weight, 0.01); } -void CTrendComponent::SModel::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CTrendComponent::SModel::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(WEIGHT_TAG, s_Weight.toDelimited()); - inserter.insertLevel(REGRESSION_TAG, boost::bind(&TRegression::acceptPersistInserter, - &s_Regression, _1)); + inserter.insertLevel(REGRESSION_TAG, boost::bind(&TRegression::acceptPersistInserter, &s_Regression, _1)); inserter.insertValue(RESIDUAL_MOMENTS_TAG, s_ResidualMoments.toDelimited()); } -bool CTrendComponent::SModel::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name{traverser.name()}; +bool CTrendComponent::SModel::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name{traverser.name()}; RESTORE(WEIGHT_TAG, s_Weight.fromDelimited(traverser.value())) - RESTORE(REGRESSION_TAG, traverser.traverseSubLevel(boost::bind( - &TRegression::acceptRestoreTraverser, &s_Regression, _1))) + RESTORE(REGRESSION_TAG, traverser.traverseSubLevel(boost::bind(&TRegression::acceptRestoreTraverser, &s_Regression, _1))) RESTORE(RESIDUAL_MOMENTS_TAG, s_ResidualMoments.fromDelimited(traverser.value())) - } - while (traverser.next()); + } while (traverser.next()); return true; } -uint64_t CTrendComponent::SModel::checksum(uint64_t seed) const -{ +uint64_t CTrendComponent::SModel::checksum(uint64_t seed) const { seed = CChecksum::calculate(seed, s_Weight); seed = CChecksum::calculate(seed, s_Regression); return CChecksum::calculate(seed, s_ResidualMoments); } -CTrendComponent::CForecastLevel::CForecastLevel(const CNaiveBayes &probability, - const CNormalMeanPrecConjugate &magnitude, +CTrendComponent::CForecastLevel::CForecastLevel(const CNaiveBayes& probability, + const CNormalMeanPrecConjugate& magnitude, core_t::TTime timeOfLastChange, - std::size_t numberPaths) : - m_Probability(probability), - m_Magnitude(magnitude), - m_Levels(numberPaths), - m_TimesOfLastChange(numberPaths, timeOfLastChange), - m_ProbabilitiesOfChange(numberPaths, 0.0) -{ + std::size_t numberPaths) + : m_Probability(probability), + m_Magnitude(magnitude), + m_Levels(numberPaths), + m_TimesOfLastChange(numberPaths, timeOfLastChange), + m_ProbabilitiesOfChange(numberPaths, 0.0) { m_Uniform01.reserve(numberPaths); } -CTrendComponent::TDouble3Vec -CTrendComponent::CForecastLevel::forecast(core_t::TTime time, double prediction, double confidence) -{ +CTrendComponent::TDouble3Vec CTrendComponent::CForecastLevel::forecast(core_t::TTime time, double prediction, double confidence) { TDouble3Vec result{0.0, 0.0, 0.0}; - if (m_Probability.initialized()) - { + if (m_Probability.initialized()) { CSampling::uniformSample(0.0, 1.0, m_Levels.size(), m_Uniform01); bool reorder{false}; - for (std::size_t i = 0u; i < m_Levels.size(); ++i) - { + for (std::size_t i = 0u; i < m_Levels.size(); ++i) { double dt{static_cast(time - m_TimesOfLastChange[i])}; double x = m_Levels[i] + prediction; double p{m_Probability.classProbability(LEVEL_CHANGE_LABEL, {{dt}, {x}})}; m_ProbabilitiesOfChange[i] = std::max(m_ProbabilitiesOfChange[i], p); - if (m_Uniform01[i] < m_ProbabilitiesOfChange[i]) - { + if (m_Uniform01[i] < m_ProbabilitiesOfChange[i]) { double stepMean{m_Magnitude.marginalLikelihoodMean()}; double stepVariance{m_Magnitude.marginalLikelihoodVariance()}; m_Levels[i] += CSampling::normalSample(m_Rng, stepMean, stepVariance); @@ -694,18 +562,13 @@ CTrendComponent::CForecastLevel::forecast(core_t::TTime time, double prediction, reorder = true; } } - if (reorder) - { + if (reorder) { COrderings::simultaneousSort(m_Levels, m_TimesOfLastChange, m_ProbabilitiesOfChange); } double rollouts{static_cast(m_Levels.size())}; - std::size_t lower{std::min(static_cast( - (100.0 - confidence) / 200.0 * rollouts + 0.5), - m_Levels.size())}; - std::size_t upper{std::min(static_cast( - (100.0 + confidence) / 200.0 * rollouts + 0.5), - m_Levels.size() - 1)}; + std::size_t lower{std::min(static_cast((100.0 - confidence) / 200.0 * rollouts + 0.5), m_Levels.size())}; + std::size_t upper{std::min(static_cast((100.0 + confidence) / 200.0 * rollouts + 0.5), m_Levels.size() - 1)}; result[0] = m_Levels[lower]; result[1] = CBasicStatistics::median(m_Levels); @@ -714,6 +577,5 @@ CTrendComponent::CForecastLevel::forecast(core_t::TTime time, double prediction, return result; } - } } diff --git a/lib/maths/CTrendTests.cc b/lib/maths/CTrendTests.cc index 70d1f047b9..cd298da020 100644 --- a/lib/maths/CTrendTests.cc +++ b/lib/maths/CTrendTests.cc @@ -8,13 +8,13 @@ #include #include -#include #include #include #include #include #include #include +#include #include #include @@ -38,9 +38,9 @@ #include #include #include +#include #include #include -#include #include #include @@ -48,12 +48,9 @@ #include #include -namespace ml -{ -namespace maths -{ -namespace -{ +namespace ml { +namespace maths { +namespace { using TDoubleVec = std::vector; using TMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; @@ -61,36 +58,24 @@ using TTimeVec = std::vector; //! \brief Sets the timezone to a specified value in a constructor //! call so it can be called once by static initialisation. -struct SSetTimeZone -{ - SSetTimeZone(const std::string &zone) - { - core::CTimezone::instance().timezoneName(zone); - } +struct SSetTimeZone { + SSetTimeZone(const std::string& zone) { core::CTimezone::instance().timezoneName(zone); } }; //! Generate \p n samples uniformly in the interval [\p a, \p b]. template -void generateUniformSamples(boost::random::mt19937_64 &rng, - double a, - double b, - std::size_t n, - ITR samples) -{ +void generateUniformSamples(boost::random::mt19937_64& rng, double a, double b, std::size_t n, ITR samples) { boost::random::uniform_real_distribution<> uniform(a, b); std::generate_n(samples, n, boost::bind(uniform, boost::ref(rng))); } //! Force the sample mean to zero. -void zeroMean(TDoubleVec &samples) -{ +void zeroMean(TDoubleVec& samples) { TMeanAccumulator mean; - for (auto sample : samples) - { + for (auto sample : samples) { mean.add(sample); } - for (auto &sample : samples) - { + for (auto& sample : samples) { sample -= CBasicStatistics::mean(mean); } } @@ -122,23 +107,19 @@ const std::string ERROR_SUMS_TAG("e"); //! The maximum significance of a test statistic. const double MAXIMUM_SIGNIFICANCE = 0.001; //! Forward day in seconds into scope. -const core_t::TTime DAY = core::constants::DAY; +const core_t::TTime DAY = core::constants::DAY; //! Forward day in seconds into scope. const core_t::TTime WEEK = core::constants::WEEK; - } //////// CRandomizedPeriodicitytest //////// -CRandomizedPeriodicityTest::CRandomizedPeriodicityTest() : - m_DayRefreshedProjections(-DAY_RESAMPLE_INTERVAL), - m_WeekRefreshedProjections(-DAY_RESAMPLE_INTERVAL) -{ +CRandomizedPeriodicityTest::CRandomizedPeriodicityTest() + : m_DayRefreshedProjections(-DAY_RESAMPLE_INTERVAL), m_WeekRefreshedProjections(-DAY_RESAMPLE_INTERVAL) { resample(0); } -bool CRandomizedPeriodicityTest::staticsAcceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ +bool CRandomizedPeriodicityTest::staticsAcceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { // Note we require that we only ever do one persistence per process. std::size_t index = 0; @@ -146,12 +127,10 @@ bool CRandomizedPeriodicityTest::staticsAcceptRestoreTraverser(core::CStateResto core::CScopedLock lock(ms_Lock); - do - { - const std::string &name = traverser.name(); + do { + const std::string& name = traverser.name(); - if (name == RNG_TAG) - { + if (name == RNG_TAG) { // Replace '_' with space std::string value(traverser.value()); std::replace(value.begin(), value.end(), '_', ' '); @@ -185,14 +164,12 @@ bool CRandomizedPeriodicityTest::staticsAcceptRestoreTraverser(core::CStateResto double d, core::CStringUtils::stringToType(traverser.value(), d), ms_WeekPeriodicProjections[index].push_back(d)) - } - while (traverser.next()); + } while (traverser.next()); return true; } -void CRandomizedPeriodicityTest::staticsAcceptPersistInserter(core::CStatePersistInserter &inserter) -{ +void CRandomizedPeriodicityTest::staticsAcceptPersistInserter(core::CStatePersistInserter& inserter) { // Note we require that we only ever do one persistence per process. core::CScopedLock lock(ms_Lock); @@ -205,53 +182,40 @@ void CRandomizedPeriodicityTest::staticsAcceptPersistInserter(core::CStatePersis inserter.insertValue(RNG_TAG, rng); inserter.insertValue(DAY_RESAMPLED_TAG, ms_DayResampled.load()); inserter.insertValue(WEEK_RESAMPLED_TAG, ms_WeekResampled.load()); - for (std::size_t i = 0; i < N; ++i) - { + for (std::size_t i = 0; i < N; ++i) { inserter.insertValue(ARRAY_INDEX_TAG, i); - for (auto rand : ms_DayRandomProjections[i]) - { + for (auto rand : ms_DayRandomProjections[i]) { inserter.insertValue(DAY_RANDOM_PROJECTIONS_TAG, rand); } - for (auto rand : ms_DayPeriodicProjections[i]) - { + for (auto rand : ms_DayPeriodicProjections[i]) { inserter.insertValue(DAY_PERIODIC_PROJECTIONS_TAG, rand); } - for (auto rand : ms_WeekRandomProjections[i]) - { + for (auto rand : ms_WeekRandomProjections[i]) { inserter.insertValue(WEEK_RANDOM_PROJECTIONS_TAG, rand); } - for (auto rand : ms_WeekPeriodicProjections[i]) - { + for (auto rand : ms_WeekPeriodicProjections[i]) { inserter.insertValue(WEEK_PERIODIC_PROJECTIONS_TAG, rand); } } } -bool CRandomizedPeriodicityTest::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name = traverser.name(); +bool CRandomizedPeriodicityTest::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); RESTORE(DAY_PROJECTIONS_TAG, m_DayProjections.fromDelimited(traverser.value())) RESTORE(DAY_STATISTICS_TAG, m_DayStatistics.fromDelimited(traverser.value())) - RESTORE(DAY_REFRESHED_PROJECTIONS_TAG, - core::CStringUtils::stringToType(traverser.value(), - m_DayRefreshedProjections)) + RESTORE(DAY_REFRESHED_PROJECTIONS_TAG, core::CStringUtils::stringToType(traverser.value(), m_DayRefreshedProjections)) RESTORE(WEEK_PROJECTIONS_TAG, m_WeekProjections.fromDelimited(traverser.value())) RESTORE(WEEK_STATISTICS_TAG, m_WeekStatistics.fromDelimited(traverser.value())) RESTORE(DAY_STATISTICS_TAG, m_DayStatistics.fromDelimited(traverser.value())) - RESTORE(WEEK_REFRESHED_PROJECTIONS_TAG, - core::CStringUtils::stringToType(traverser.value(), - m_WeekRefreshedProjections)) - } - while (traverser.next()); + RESTORE(WEEK_REFRESHED_PROJECTIONS_TAG, core::CStringUtils::stringToType(traverser.value(), m_WeekRefreshedProjections)) + } while (traverser.next()); return true; } -void CRandomizedPeriodicityTest::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CRandomizedPeriodicityTest::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(DAY_PROJECTIONS_TAG, m_DayProjections.toDelimited()); inserter.insertValue(DAY_STATISTICS_TAG, m_DayStatistics.toDelimited()); inserter.insertValue(DAY_REFRESHED_PROJECTIONS_TAG, m_DayRefreshedProjections); @@ -260,18 +224,15 @@ void CRandomizedPeriodicityTest::acceptPersistInserter(core::CStatePersistInsert inserter.insertValue(WEEK_REFRESHED_PROJECTIONS_TAG, m_WeekRefreshedProjections); } -void CRandomizedPeriodicityTest::add(core_t::TTime time, double value) -{ +void CRandomizedPeriodicityTest::add(core_t::TTime time, double value) { resample(time); - if (time >= m_DayRefreshedProjections + DAY_RESAMPLE_INTERVAL) - { + if (time >= m_DayRefreshedProjections + DAY_RESAMPLE_INTERVAL) { LOG_TRACE("Updating day statistics"); updateStatistics(m_DayProjections, m_DayStatistics); m_DayRefreshedProjections = CIntegerTools::floor(time, DAY_RESAMPLE_INTERVAL); } - if (time >= m_WeekRefreshedProjections + WEEK_RESAMPLE_INTERVAL) - { + if (time >= m_WeekRefreshedProjections + WEEK_RESAMPLE_INTERVAL) { LOG_TRACE("Updating week statistics"); updateStatistics(m_WeekProjections, m_WeekStatistics); m_WeekRefreshedProjections = CIntegerTools::floor(time, WEEK_RESAMPLE_INTERVAL); @@ -279,80 +240,61 @@ void CRandomizedPeriodicityTest::add(core_t::TTime time, double value) TVector2N daySample; TVector2N weekSample; - std::size_t td = static_cast( (time % DAY_RESAMPLE_INTERVAL) - / SAMPLE_INTERVAL); - std::size_t d = static_cast( (time % DAY) - / SAMPLE_INTERVAL); - std::size_t tw = static_cast( (time % WEEK_RESAMPLE_INTERVAL) - / SAMPLE_INTERVAL); - std::size_t w = static_cast( (time % WEEK) - / SAMPLE_INTERVAL); - - for (std::size_t i = 0u; i < N; ++i) - { - daySample(2*i+0) = ms_DayRandomProjections[i][td] * value; - daySample(2*i+1) = ms_DayPeriodicProjections[i][d] * value; - weekSample(2*i+0) = ms_WeekRandomProjections[i][tw] * value; - weekSample(2*i+1) = ms_WeekPeriodicProjections[i][w] * value; + std::size_t td = static_cast((time % DAY_RESAMPLE_INTERVAL) / SAMPLE_INTERVAL); + std::size_t d = static_cast((time % DAY) / SAMPLE_INTERVAL); + std::size_t tw = static_cast((time % WEEK_RESAMPLE_INTERVAL) / SAMPLE_INTERVAL); + std::size_t w = static_cast((time % WEEK) / SAMPLE_INTERVAL); + + for (std::size_t i = 0u; i < N; ++i) { + daySample(2 * i + 0) = ms_DayRandomProjections[i][td] * value; + daySample(2 * i + 1) = ms_DayPeriodicProjections[i][d] * value; + weekSample(2 * i + 0) = ms_WeekRandomProjections[i][tw] * value; + weekSample(2 * i + 1) = ms_WeekPeriodicProjections[i][w] * value; } m_DayProjections.add(daySample); m_WeekProjections.add(weekSample); } -bool CRandomizedPeriodicityTest::test() const -{ +bool CRandomizedPeriodicityTest::test() const { static const double SIGNIFICANCE = 1e-3; - try - { + try { double nd = CBasicStatistics::count(m_DayStatistics); - if (nd >= 1.0) - { + if (nd >= 1.0) { TVector2 S = CBasicStatistics::mean(m_DayStatistics); LOG_TRACE("Day test statistic, S = " << S << ", n = " << nd); - double ratio = S(0) == S(1) ? - 1.0 : (S(0) == 0.0 ? boost::numeric::bounds::highest() : - static_cast(S(1) / S(0))); + double ratio = + S(0) == S(1) ? 1.0 : (S(0) == 0.0 ? boost::numeric::bounds::highest() : static_cast(S(1) / S(0))); double significance = CStatisticalTests::rightTailFTest(ratio, nd, nd); LOG_TRACE("Daily significance = " << significance); - if (significance < SIGNIFICANCE) - { + if (significance < SIGNIFICANCE) { return true; } } double nw = CBasicStatistics::count(m_WeekStatistics); - if (nw >= 1.0) - { + if (nw >= 1.0) { TVector2 S = CBasicStatistics::mean(m_WeekStatistics); LOG_TRACE("Week test statistic, S = " << S); - double ratio = S(0) == S(1) ? - 1.0 : (S(0) == 0.0 ? boost::numeric::bounds::highest() : - static_cast(S(1) / S(0))); + double ratio = + S(0) == S(1) ? 1.0 : (S(0) == 0.0 ? boost::numeric::bounds::highest() : static_cast(S(1) / S(0))); double significance = CStatisticalTests::rightTailFTest(ratio, nw, nw); LOG_TRACE("Weekly significance = " << significance); - if (significance < SIGNIFICANCE) - { + if (significance < SIGNIFICANCE) { return true; } } - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to test for periodicity: " << e.what()); - } + } catch (const std::exception& e) { LOG_ERROR("Failed to test for periodicity: " << e.what()); } return false; } -void CRandomizedPeriodicityTest::reset() -{ +void CRandomizedPeriodicityTest::reset() { core::CScopedLock lock(ms_Lock); ms_Rng = boost::random::mt19937_64(); - for (std::size_t i = 0u; i < N; ++i) - { + for (std::size_t i = 0u; i < N; ++i) { ms_DayRandomProjections[i].clear(); ms_DayPeriodicProjections[i].clear(); ms_WeekRandomProjections[i].clear(); @@ -362,8 +304,7 @@ void CRandomizedPeriodicityTest::reset() ms_WeekResampled = -WEEK_RESAMPLE_INTERVAL; } -uint64_t CRandomizedPeriodicityTest::checksum(uint64_t seed) const -{ +uint64_t CRandomizedPeriodicityTest::checksum(uint64_t seed) const { // This checksum is problematic until we switch to using our // own rng for each test. //seed = CChecksum::calculate(seed, m_DayProjections); @@ -375,22 +316,18 @@ uint64_t CRandomizedPeriodicityTest::checksum(uint64_t seed) const return seed; } -void CRandomizedPeriodicityTest::updateStatistics(TVector2NMeanAccumulator &projections, - TVector2MeanAccumulator &statistics) -{ +void CRandomizedPeriodicityTest::updateStatistics(TVector2NMeanAccumulator& projections, TVector2MeanAccumulator& statistics) { static const double ALPHA = 0.1; - if (CBasicStatistics::count(projections) > 0.0) - { - const TVector2N &mean = CBasicStatistics::mean(projections); + if (CBasicStatistics::count(projections) > 0.0) { + const TVector2N& mean = CBasicStatistics::mean(projections); LOG_TRACE("mean = " << mean); TVector2MeanAccumulator statistic; - for (std::size_t i = 0u; i < N; ++i) - { + for (std::size_t i = 0u; i < N; ++i) { TVector2 s; - s(0) = mean(2*i+0) * mean(2*i+0); - s(1) = mean(2*i+1) * mean(2*i+1); + s(0) = mean(2 * i + 0) * mean(2 * i + 0); + s(1) = mean(2 * i + 1) * mean(2 * i + 1); statistic.add(s); } statistics += statistic; @@ -401,37 +338,24 @@ void CRandomizedPeriodicityTest::updateStatistics(TVector2NMeanAccumulator &proj projections = TVector2NMeanAccumulator(); } -void CRandomizedPeriodicityTest::resample(core_t::TTime time) -{ - if (time >= ms_DayResampled.load(std::memory_order_acquire) + DAY_RESAMPLE_INTERVAL) - { +void CRandomizedPeriodicityTest::resample(core_t::TTime time) { + if (time >= ms_DayResampled.load(std::memory_order_acquire) + DAY_RESAMPLE_INTERVAL) { core::CScopedLock lock(ms_Lock); LOG_TRACE("Updating daily random projections at " << time); - if (time >= ms_DayResampled.load(std::memory_order_relaxed) + DAY_RESAMPLE_INTERVAL) - { - resample(DAY, - DAY_RESAMPLE_INTERVAL, - ms_DayPeriodicProjections, - ms_DayRandomProjections); - ms_DayResampled.store(CIntegerTools::floor(time, DAY_RESAMPLE_INTERVAL), - std::memory_order_release); + if (time >= ms_DayResampled.load(std::memory_order_relaxed) + DAY_RESAMPLE_INTERVAL) { + resample(DAY, DAY_RESAMPLE_INTERVAL, ms_DayPeriodicProjections, ms_DayRandomProjections); + ms_DayResampled.store(CIntegerTools::floor(time, DAY_RESAMPLE_INTERVAL), std::memory_order_release); } } - if (time >= ms_WeekResampled.load(std::memory_order_acquire) + WEEK_RESAMPLE_INTERVAL) - { + if (time >= ms_WeekResampled.load(std::memory_order_acquire) + WEEK_RESAMPLE_INTERVAL) { core::CScopedLock lock(ms_Lock); LOG_TRACE("Updating weekly random projections at " << time); - if (time >= ms_WeekResampled.load(std::memory_order_relaxed) + WEEK_RESAMPLE_INTERVAL) - { - resample(WEEK, - WEEK_RESAMPLE_INTERVAL, - ms_WeekPeriodicProjections, - ms_WeekRandomProjections); - ms_WeekResampled.store(CIntegerTools::floor(time, WEEK_RESAMPLE_INTERVAL), - std::memory_order_release); + if (time >= ms_WeekResampled.load(std::memory_order_relaxed) + WEEK_RESAMPLE_INTERVAL) { + resample(WEEK, WEEK_RESAMPLE_INTERVAL, ms_WeekPeriodicProjections, ms_WeekRandomProjections); + ms_WeekResampled.store(CIntegerTools::floor(time, WEEK_RESAMPLE_INTERVAL), std::memory_order_release); } } } @@ -439,25 +363,18 @@ void CRandomizedPeriodicityTest::resample(core_t::TTime time) void CRandomizedPeriodicityTest::resample(core_t::TTime period, core_t::TTime resampleInterval, TDoubleVec (&periodicProjections)[N], - TDoubleVec (&randomProjections)[N]) -{ + TDoubleVec (&randomProjections)[N]) { std::size_t n = static_cast(period / SAMPLE_INTERVAL); std::size_t t = static_cast(resampleInterval / SAMPLE_INTERVAL); std::size_t p = static_cast(resampleInterval / period); - for (std::size_t i = 0u; i < N; ++i) - { + for (std::size_t i = 0u; i < N; ++i) { periodicProjections[i].resize(n); generateUniformSamples(ms_Rng, -1.0, 1.0, n, periodicProjections[i].begin()); zeroMean(periodicProjections[i]); randomProjections[i].resize(t); - for (std::size_t j = 0u; j < p; ++j) - { - std::copy(periodicProjections[i].begin(), - periodicProjections[i].end(), - randomProjections[i].begin() + j * n); - CSampling::random_shuffle(ms_Rng, - randomProjections[i].begin() + j * n, - randomProjections[i].begin() + (j+1) * n); + for (std::size_t j = 0u; j < p; ++j) { + std::copy(periodicProjections[i].begin(), periodicProjections[i].end(), randomProjections[i].begin() + j * n); + CSampling::random_shuffle(ms_Rng, randomProjections[i].begin() + j * n, randomProjections[i].begin() + (j + 1) * n); } } } @@ -476,83 +393,64 @@ core::CMutex CRandomizedPeriodicityTest::ms_Lock; //////// CCalendarCyclicTest //////// -CCalendarCyclicTest::CCalendarCyclicTest(double decayRate) : - m_DecayRate(decayRate), - m_Bucket(0), - m_ErrorQuantiles(CQuantileSketch::E_Linear, 20), - m_ErrorCounts(WINDOW / BUCKET) -{ +CCalendarCyclicTest::CCalendarCyclicTest(double decayRate) + : m_DecayRate(decayRate), m_Bucket(0), m_ErrorQuantiles(CQuantileSketch::E_Linear, 20), m_ErrorCounts(WINDOW / BUCKET) { static const SSetTimeZone timezone("GMT"); m_ErrorSums.reserve(WINDOW / BUCKET / 10); } -bool CCalendarCyclicTest::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name = traverser.name(); +bool CCalendarCyclicTest::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); RESTORE_BUILT_IN(BUCKET_TAG, m_Bucket) - RESTORE(ERROR_QUANTILES_TAG, traverser.traverseSubLevel( - boost::bind(&CQuantileSketch::acceptRestoreTraverser, &m_ErrorQuantiles, _1))) + RESTORE(ERROR_QUANTILES_TAG, + traverser.traverseSubLevel(boost::bind(&CQuantileSketch::acceptRestoreTraverser, &m_ErrorQuantiles, _1))) RESTORE(ERROR_COUNTS_TAG, core::CPersistUtils::restore(ERROR_COUNTS_TAG, m_ErrorCounts, traverser)) RESTORE(ERROR_SUMS_TAG, core::CPersistUtils::fromString(traverser.value(), m_ErrorSums)) - } - while (traverser.next()); + } while (traverser.next()); return true; } -void CCalendarCyclicTest::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CCalendarCyclicTest::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(BUCKET_TAG, m_Bucket); - inserter.insertLevel(ERROR_QUANTILES_TAG, - boost::bind(&CQuantileSketch::acceptPersistInserter, &m_ErrorQuantiles, _1)); + inserter.insertLevel(ERROR_QUANTILES_TAG, boost::bind(&CQuantileSketch::acceptPersistInserter, &m_ErrorQuantiles, _1)); core::CPersistUtils::persist(ERROR_COUNTS_TAG, m_ErrorCounts, inserter); inserter.insertValue(ERROR_SUMS_TAG, core::CPersistUtils::toString(m_ErrorSums)); } -void CCalendarCyclicTest::propagateForwardsByTime(double time) -{ - if (!CMathsFuncs::isFinite(time) || time < 0.0) - { +void CCalendarCyclicTest::propagateForwardsByTime(double time) { + if (!CMathsFuncs::isFinite(time) || time < 0.0) { LOG_ERROR("Bad propagation time " << time); return; } m_ErrorQuantiles.age(std::exp(-m_DecayRate * time)); } -void CCalendarCyclicTest::add(core_t::TTime time, double error, double weight) -{ +void CCalendarCyclicTest::add(core_t::TTime time, double error, double weight) { error = std::fabs(error); m_ErrorQuantiles.add(error, weight); - if (m_ErrorQuantiles.count() > 100.0) - { + if (m_ErrorQuantiles.count() > 100.0) { core_t::TTime bucket = CIntegerTools::floor(time, BUCKET); - if (m_ErrorCounts.empty()) - { + if (m_ErrorCounts.empty()) { m_ErrorCounts.push_back(0); - } - else - { - for (core_t::TTime i = m_Bucket; i < bucket; i += BUCKET) - { + } else { + for (core_t::TTime i = m_Bucket; i < bucket; i += BUCKET) { m_ErrorCounts.push_back(0); } } - uint32_t &count = m_ErrorCounts.back(); + uint32_t& count = m_ErrorCounts.back(); count += (count % COUNT_BITS < COUNT_BITS - 1) ? 1 : 0; double high; m_ErrorQuantiles.quantile(LARGE_ERROR_PERCENTILE, high); - m_ErrorSums.erase(m_ErrorSums.begin(), - std::find_if(m_ErrorSums.begin(), m_ErrorSums.end(), - [bucket](const TTimeFloatPr &error_) - { return error_.first + WINDOW > bucket; })); - if (error >= high) - { + m_ErrorSums.erase(m_ErrorSums.begin(), std::find_if(m_ErrorSums.begin(), m_ErrorSums.end(), [bucket](const TTimeFloatPr& error_) { + return error_.first + WINDOW > bucket; + })); + if (error >= high) { count += (count < 0x100000000 - COUNT_BITS) ? COUNT_BITS : 0; m_ErrorSums[bucket] += this->winsorise(error); } @@ -561,15 +459,11 @@ void CCalendarCyclicTest::add(core_t::TTime time, double error, double weight) } } -CCalendarCyclicTest::TOptionalFeature CCalendarCyclicTest::test() const -{ +CCalendarCyclicTest::TOptionalFeature CCalendarCyclicTest::test() const { // The statistics we need in order to be able to test for calendar // features. - struct SStats - { - SStats() : - s_Offset(0), s_Repeats(0), s_Sum(0.0), s_Count(0.0), s_Significance(0.0) - {} + struct SStats { + SStats() : s_Offset(0), s_Repeats(0), s_Sum(0.0), s_Count(0.0), s_Significance(0.0) {} core_t::TTime s_Offset; unsigned int s_Repeats; double s_Sum; @@ -591,18 +485,14 @@ CCalendarCyclicTest::TOptionalFeature CCalendarCyclicTest::test() const TFeatureStatsFMap stats; stats.reserve(m_ErrorSums.size()); - for (auto offset : TIMEZONE_OFFSETS) - { - for (const auto &error : m_ErrorSums) - { - std::size_t i = m_ErrorCounts.size() - 1 - - static_cast((m_Bucket - error.first) / BUCKET); + for (auto offset : TIMEZONE_OFFSETS) { + for (const auto& error : m_ErrorSums) { + std::size_t i = m_ErrorCounts.size() - 1 - static_cast((m_Bucket - error.first) / BUCKET); double n = static_cast(m_ErrorCounts[i] % COUNT_BITS); double x = static_cast(m_ErrorCounts[i] / COUNT_BITS); double s = this->significance(n, x); - for (auto feature : CCalendarFeature::features(error.first + BUCKET / 2 + offset)) - { - SStats &stat = stats[feature]; + for (auto feature : CCalendarFeature::features(error.first + BUCKET / 2 + offset)) { + SStats& stat = stats[feature]; ++stat.s_Repeats; stat.s_Offset = offset; stat.s_Sum += error.second; @@ -616,17 +506,13 @@ CCalendarCyclicTest::TOptionalFeature CCalendarCyclicTest::test() const m_ErrorQuantiles.quantile(50.0, errorThreshold); errorThreshold *= 2.0; - for (const auto &stat : stats) - { + for (const auto& stat : stats) { CCalendarFeature feature = stat.first; double r = static_cast(stat.second.s_Repeats); double x = stat.second.s_Count; double e = stat.second.s_Sum; double s = stat.second.s_Significance; - if ( stat.second.s_Repeats >= MINIMUM_REPEATS - && e > errorThreshold * x - && std::pow(s, r) < MAXIMUM_SIGNIFICANCE) - { + if (stat.second.s_Repeats >= MINIMUM_REPEATS && e > errorThreshold * x && std::pow(s, r) < MAXIMUM_SIGNIFICANCE) { result.add({e, stat.second.s_Offset, feature}); } } @@ -634,47 +520,35 @@ CCalendarCyclicTest::TOptionalFeature CCalendarCyclicTest::test() const return result.count() > 0 ? result[0].third : TOptionalFeature(); } -uint64_t CCalendarCyclicTest::checksum(uint64_t seed) const -{ +uint64_t CCalendarCyclicTest::checksum(uint64_t seed) const { seed = CChecksum::calculate(seed, m_ErrorQuantiles); seed = CChecksum::calculate(seed, m_ErrorCounts); return CChecksum::calculate(seed, m_ErrorSums); } -void CCalendarCyclicTest::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CCalendarCyclicTest::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CCalendarCyclicTest"); core::CMemoryDebug::dynamicSize("m_ErrorQuantiles", m_ErrorQuantiles, mem); core::CMemoryDebug::dynamicSize("m_ErrorCounts", m_ErrorCounts, mem); core::CMemoryDebug::dynamicSize("m_ErrorSums", m_ErrorSums, mem); } -std::size_t CCalendarCyclicTest::memoryUsage() const -{ - return core::CMemory::dynamicSize(m_ErrorQuantiles) - + core::CMemory::dynamicSize(m_ErrorCounts) - + core::CMemory::dynamicSize(m_ErrorSums); +std::size_t CCalendarCyclicTest::memoryUsage() const { + return core::CMemory::dynamicSize(m_ErrorQuantiles) + core::CMemory::dynamicSize(m_ErrorCounts) + + core::CMemory::dynamicSize(m_ErrorSums); } -double CCalendarCyclicTest::winsorise(double error) const -{ +double CCalendarCyclicTest::winsorise(double error) const { double high; m_ErrorQuantiles.quantile(99.5, high); return std::min(error, high); } -double CCalendarCyclicTest::significance(double n, double x) const -{ - try - { +double CCalendarCyclicTest::significance(double n, double x) const { + try { boost::math::binomial binom(n, 1.0 - LARGE_ERROR_PERCENTILE / 100.0); return std::min(2.0 * CTools::safeCdfComplement(binom, x - 1.0), 1.0); - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to calculate significance: " << e.what() - << " n = " << n << " x = " << x); - } + } catch (const std::exception& e) { LOG_ERROR("Failed to calculate significance: " << e.what() << " n = " << n << " x = " << x); } return 1.0; } @@ -685,6 +559,5 @@ const unsigned int CCalendarCyclicTest::MINIMUM_REPEATS{4}; const uint32_t CCalendarCyclicTest::COUNT_BITS{0x100000}; // TODO support offsets are +/- 12hrs for time zones. const TTimeVec CCalendarCyclicTest::TIMEZONE_OFFSETS{0}; - } } diff --git a/lib/maths/CXMeansOnline1d.cc b/lib/maths/CXMeansOnline1d.cc index 81898071fd..8370489516 100644 --- a/lib/maths/CXMeansOnline1d.cc +++ b/lib/maths/CXMeansOnline1d.cc @@ -9,21 +9,21 @@ #include #include #include -#include #include #include #include +#include #include #include #include #include #include -#include #include #include #include #include +#include #include #include @@ -35,18 +35,15 @@ #include #include #include -#include #include +#include #include #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { -namespace -{ +namespace { using TDouble1Vec = core::CSmallVector; using TDouble4Vec = core::CSmallVector; @@ -56,64 +53,43 @@ using TSizeVec = std::vector; using TTuple = CNaturalBreaksClassifier::TTuple; using TTupleVec = CNaturalBreaksClassifier::TTupleVec; -namespace detail -{ +namespace detail { using TMeanVarAccumulator = CBasicStatistics::SSampleMeanVar::TAccumulator; //! \brief Orders two clusters by their centres. -struct SClusterCentreLess -{ - bool operator()(const CXMeansOnline1d::CCluster &lhs, - const CXMeansOnline1d::CCluster &rhs) const - { +struct SClusterCentreLess { + bool operator()(const CXMeansOnline1d::CCluster& lhs, const CXMeansOnline1d::CCluster& rhs) const { return lhs.centre() < rhs.centre(); } - bool operator()(double lhs, - const CXMeansOnline1d::CCluster &rhs) const - { - return lhs < rhs.centre(); - } - bool operator()(const CXMeansOnline1d::CCluster &lhs, - double rhs) const - { - return lhs.centre() < rhs; - } + bool operator()(double lhs, const CXMeansOnline1d::CCluster& rhs) const { return lhs < rhs.centre(); } + bool operator()(const CXMeansOnline1d::CCluster& lhs, double rhs) const { return lhs.centre() < rhs; } }; //! Get \p x time \p x. -double pow2(double x) -{ +double pow2(double x) { return x * x; } //! Get the minimum of \p x, \p y and \p z. -double min(double x, double y, double z) -{ +double min(double x, double y, double z) { return std::min(std::min(x, y), z); } //! Get the log of the likelihood that \p point is from the \p normal. maths_t::EFloatingPointErrorStatus -logLikelihoodFromCluster(double point, - const CNormalMeanPrecConjugate &normal, - double probability, - double &result) -{ +logLikelihoodFromCluster(double point, const CNormalMeanPrecConjugate& normal, double probability, double& result) { result = core::constants::LOG_MIN_DOUBLE - 1.0; double likelihood; maths_t::EFloatingPointErrorStatus status = - normal.jointLogMarginalLikelihood(CConstantWeights::COUNT, {point}, - CConstantWeights::SINGLE_UNIT, likelihood); - if (status & maths_t::E_FpFailed) - { + normal.jointLogMarginalLikelihood(CConstantWeights::COUNT, {point}, CConstantWeights::SINGLE_UNIT, likelihood); + if (status & maths_t::E_FpFailed) { LOG_ERROR("Unable to compute likelihood for: " << point); return status; } - if (status & maths_t::E_FpOverflowed) - { + if (status & maths_t::E_FpOverflowed) { result = likelihood; return status; } @@ -124,27 +100,22 @@ logLikelihoodFromCluster(double point, //! Get the moments of \p categories and the splits into //! [\p start, \p split) and [\p split, \p end). -void candidates(const TTupleVec &categories, +void candidates(const TTupleVec& categories, std::size_t start, std::size_t split, std::size_t end, - TMeanVarAccumulator &mv, - TMeanVarAccumulator &mvl, - TMeanVarAccumulator &mvr) -{ - LOG_TRACE("categories = " - << core::CContainerPrinter::print(categories.begin() + start, - categories.begin() + end)); + TMeanVarAccumulator& mv, + TMeanVarAccumulator& mvl, + TMeanVarAccumulator& mvr) { + LOG_TRACE("categories = " << core::CContainerPrinter::print(categories.begin() + start, categories.begin() + end)); LOG_TRACE("split at = " << split); - for (std::size_t i = start; i < split; ++i) - { - mv += categories[i]; + for (std::size_t i = start; i < split; ++i) { + mv += categories[i]; mvl += categories[i]; } - for (std::size_t i = split; i < end; ++i) - { - mv += categories[i]; + for (std::size_t i = split; i < end; ++i) { + mv += categories[i]; mvr += categories[i]; } @@ -152,30 +123,36 @@ void candidates(const TTupleVec &categories, } //! Compute the mean of \p category. -double mean(maths_t::EDataType dataType, const TTuple &category) -{ +double mean(maths_t::EDataType dataType, const TTuple& category) { double result = CBasicStatistics::mean(category); - switch (dataType) - { - case maths_t::E_DiscreteData: break; - case maths_t::E_IntegerData: result += 0.5; break; - case maths_t::E_ContinuousData: break; - case maths_t::E_MixedData: break; + switch (dataType) { + case maths_t::E_DiscreteData: + break; + case maths_t::E_IntegerData: + result += 0.5; + break; + case maths_t::E_ContinuousData: + break; + case maths_t::E_MixedData: + break; } return result; } //! Compute the variance of \p category. -double variance(maths_t::EDataType dataType, const TTuple &category) -{ +double variance(maths_t::EDataType dataType, const TTuple& category) { double n = CBasicStatistics::count(category); double result = (1.0 + 1.0 / n) * CBasicStatistics::maximumLikelihoodVariance(category); - switch (dataType) - { - case maths_t::E_DiscreteData: break; - case maths_t::E_IntegerData: result += 1.0 / 12.0; break; - case maths_t::E_ContinuousData: break; - case maths_t::E_MixedData: break; + switch (dataType) { + case maths_t::E_DiscreteData: + break; + case maths_t::E_IntegerData: + result += 1.0 / 12.0; + break; + case maths_t::E_ContinuousData: + break; + case maths_t::E_MixedData: + break; } return result; } @@ -196,12 +173,13 @@ double variance(maths_t::EDataType dataType, const TTuple &category) void BICGain(maths_t::EDataType dataType, CAvailableModeDistributions distributions, double smallest, - const TTupleVec &categories, + const TTupleVec& categories, std::size_t start, std::size_t split, std::size_t end, - double &distance, double &nl, double &nr) -{ + double& distance, + double& nl, + double& nr) { // The basic idea is to compute the difference between the // Bayes Information Content (BIC) for one and two clusters // for the sketch defined by the categories passed to this @@ -254,8 +232,7 @@ void BICGain(maths_t::EDataType dataType, candidates(categories, start, split, end, mv, mvl, mvr); double logNormalOffset = std::max(0.0, GAMMA_OFFSET_MARGIN - smallest); double gammaOffset = std::max(0.0, LOG_NORMAL_OFFSET_MARGIN - smallest); - for (std::size_t i = start; i < end; ++i) - { + for (std::size_t i = start; i < end; ++i) { double x = mean(dataType, categories[i]); logNormalOffset = std::max(logNormalOffset, LOG_NORMAL_OFFSET_MARGIN - x); gammaOffset = std::max(gammaOffset, GAMMA_OFFSET_MARGIN - x); @@ -268,9 +245,9 @@ void BICGain(maths_t::EDataType dataType, // Compute the BIC gain for splitting the mode. - double ll1n = 0.0; - double ll1l = 0.0; - double ll1g = 0.0; + double ll1n = 0.0; + double ll1l = 0.0; + double ll1g = 0.0; double ll2nl = 0.0; double ll2ll = 0.0; double ll2gl = 0.0; @@ -282,8 +259,7 @@ void BICGain(maths_t::EDataType dataType, double n = CBasicStatistics::count(mv); double m = mean(dataType, mv); double v = variance(dataType, mv); - if (v <= MINIMUM_COEFFICIENT_OF_VARIATION * std::fabs(m)) - { + if (v <= MINIMUM_COEFFICIENT_OF_VARIATION * std::fabs(m)) { return; } @@ -305,8 +281,7 @@ void BICGain(maths_t::EDataType dataType, double mr = mean(dataType, mvr); double vr = std::max(variance(dataType, mvr), vmin); - try - { + try { // Mixture of log-normals (method of moments) double sl = std::log(1.0 + vl / pow2(ml + logNormalOffset)); double ll = std::log(ml + logNormalOffset) - sl / 2.0; @@ -318,95 +293,84 @@ void BICGain(maths_t::EDataType dataType, double ar = pow2(mr + gammaOffset) / vr; double br = (mr + gammaOffset) / vr; - double log2piv = std::log(boost::math::double_constants::two_pi * v); - double log2pis = std::log(boost::math::double_constants::two_pi * s); - double loggn = boost::math::lgamma(a) - a * std::log(b); + double log2piv = std::log(boost::math::double_constants::two_pi * v); + double log2pis = std::log(boost::math::double_constants::two_pi * s); + double loggn = boost::math::lgamma(a) - a * std::log(b); double log2pivl = std::log(boost::math::double_constants::two_pi * vl / pow2(wl)); double log2pivr = std::log(boost::math::double_constants::two_pi * vr / pow2(wr)); double log2pisl = std::log(boost::math::double_constants::two_pi * sl / pow2(wl)); double log2pisr = std::log(boost::math::double_constants::two_pi * sr / pow2(wr)); - double loggnl = boost::math::lgamma(al) - al * std::log(bl) - std::log(wl); - double loggnr = boost::math::lgamma(ar) - ar * std::log(br) - std::log(wr); + double loggnl = boost::math::lgamma(al) - al * std::log(bl) - std::log(wl); + double loggnr = boost::math::lgamma(ar) - ar * std::log(br) - std::log(wr); - for (std::size_t i = start; i < split; ++i) - { + for (std::size_t i = start; i < split; ++i) { double ni = CBasicStatistics::count(categories[i]); double mi = mean(dataType, categories[i]); double vi = variance(dataType, categories[i]); - if (vi == 0.0) - { + if (vi == 0.0) { double li = std::log(mi + logNormalOffset); - ll1n += ni * ((vi + pow2(mi - m)) / v + log2piv); - ll1l += ni * (pow2(li - l) / s + 2.0 * li + log2pis); - ll1g += ni * 2.0 * (b * (mi + gammaOffset) - (a - 1.0) * li + loggn); + ll1n += ni * ((vi + pow2(mi - m)) / v + log2piv); + ll1l += ni * (pow2(li - l) / s + 2.0 * li + log2pis); + ll1g += ni * 2.0 * (b * (mi + gammaOffset) - (a - 1.0) * li + loggn); ll2nl += ni * ((vi + pow2(mi - ml)) / vl + log2pivl); ll2ll += ni * (pow2(li - ll) / sl + 2.0 * li + log2pisl); ll2gl += ni * 2.0 * (bl * (mi + gammaOffset) - (al - 1.0) * li + loggnl); - } - else - { + } else { double si = std::log(1.0 + vi / pow2(mi + logNormalOffset)); double li = std::log(mi + logNormalOffset) - si / 2.0; - ll1n += ni * ((vi + pow2(mi - m)) / v + log2piv); - ll1l += ni * ((si + pow2(li - l)) / s + 2.0 * li + log2pis); - ll1g += ni * 2.0 * (b * (mi + gammaOffset) - (a - 1.0) * li + loggn); + ll1n += ni * ((vi + pow2(mi - m)) / v + log2piv); + ll1l += ni * ((si + pow2(li - l)) / s + 2.0 * li + log2pis); + ll1g += ni * 2.0 * (b * (mi + gammaOffset) - (a - 1.0) * li + loggn); ll2nl += ni * ((vi + pow2(mi - ml)) / vl + log2pivl); ll2ll += ni * ((si + pow2(li - ll)) / sl + 2.0 * li + log2pisl); ll2gl += ni * 2.0 * (bl * (mi + gammaOffset) - (al - 1.0) * li + loggnl); } } - for (std::size_t i = split; i < end; ++i) - { + for (std::size_t i = split; i < end; ++i) { double ni = CBasicStatistics::count(categories[i]); double mi = mean(dataType, categories[i]); double vi = variance(dataType, categories[i]); - if (vi == 0.0) - { + if (vi == 0.0) { double li = std::log(mi + logNormalOffset); - ll1n += ni * ((vi + pow2(mi - m)) / v + log2piv); - ll1l += ni * (pow2(li - l) / s + 2.0 * li + log2pis); - ll1g += ni * 2.0 * (b * (mi + gammaOffset) - (a - 1.0) * li + loggn); + ll1n += ni * ((vi + pow2(mi - m)) / v + log2piv); + ll1l += ni * (pow2(li - l) / s + 2.0 * li + log2pis); + ll1g += ni * 2.0 * (b * (mi + gammaOffset) - (a - 1.0) * li + loggn); ll2nr += ni * ((vi + pow2(mi - mr)) / vr + log2pivr); ll2lr += ni * (pow2(li - lr) / sr + 2.0 * li + log2pisr); ll2gr += ni * 2.0 * (br * (mi + gammaOffset) - (ar - 1.0) * li + loggnr); - } - else - { + } else { double si = std::log(1.0 + vi / pow2(mi + logNormalOffset)); double li = std::log(mi + logNormalOffset) - si / 2.0; - ll1n += ni * ((vi + pow2(mi - m)) / v + log2piv); - ll1l += ni * ((si + pow2(li - l)) / s + 2.0 * li + log2pis); - ll1g += ni * 2.0 * (b * (mi + gammaOffset) - (a - 1.0) * li + loggn); + ll1n += ni * ((vi + pow2(mi - m)) / v + log2piv); + ll1l += ni * ((si + pow2(li - l)) / s + 2.0 * li + log2pis); + ll1g += ni * 2.0 * (b * (mi + gammaOffset) - (a - 1.0) * li + loggn); ll2nr += ni * ((vi + pow2(mi - mr)) / vr + log2pivr); ll2lr += ni * ((si + pow2(li - lr)) / sr + 2.0 * li + log2pisr); ll2gr += ni * 2.0 * (br * (mi + gammaOffset) - (ar - 1.0) * li + loggnr); } } - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to compute BIC gain: " << e.what() - << ", n = " << n << ", m = " << m << ", v = " << v - << ", wl = " << wl << ", ml = " << ml << ", vl = " << vl - << ", wr = " << wr << ", mr = " << mr << ", vr = " << vr); + } catch (const std::exception& e) { + LOG_ERROR("Failed to compute BIC gain: " << e.what() << ", n = " << n << ", m = " << m << ", v = " << v << ", wl = " << wl + << ", ml = " << ml << ", vl = " << vl << ", wr = " << wr << ", mr = " << mr + << ", vr = " << vr); return; } double logn = std::log(n); - double ll1 = min(distributions.haveNormal() ? ll1n : boost::numeric::bounds::highest(), - distributions.haveLogNormal() ? ll1l : boost::numeric::bounds::highest(), - distributions.haveGamma() ? ll1g : boost::numeric::bounds::highest()) - + distributions.parameters() * logn; - double ll2 = min(distributions.haveNormal() ? ll2nl : boost::numeric::bounds::highest(), - distributions.haveLogNormal() ? ll2ll : boost::numeric::bounds::highest(), - distributions.haveGamma() ? ll2gl : boost::numeric::bounds::highest()) - + min(distributions.haveNormal() ? ll2nr : boost::numeric::bounds::highest(), - distributions.haveLogNormal() ? ll2lr : boost::numeric::bounds::highest(), - distributions.haveGamma() ? ll2gr : boost::numeric::bounds::highest()) - + (2.0 * distributions.parameters() + 1.0) * logn; + double ll1 = min(distributions.haveNormal() ? ll1n : boost::numeric::bounds::highest(), + distributions.haveLogNormal() ? ll1l : boost::numeric::bounds::highest(), + distributions.haveGamma() ? ll1g : boost::numeric::bounds::highest()) + + distributions.parameters() * logn; + double ll2 = min(distributions.haveNormal() ? ll2nl : boost::numeric::bounds::highest(), + distributions.haveLogNormal() ? ll2ll : boost::numeric::bounds::highest(), + distributions.haveGamma() ? ll2gl : boost::numeric::bounds::highest()) + + min(distributions.haveNormal() ? ll2nr : boost::numeric::bounds::highest(), + distributions.haveLogNormal() ? ll2lr : boost::numeric::bounds::highest(), + distributions.haveGamma() ? ll2gr : boost::numeric::bounds::highest()) + + (2.0 * distributions.parameters() + 1.0) * logn; LOG_TRACE("BIC(1) = " << ll1 << ", BIC(2) = " << ll2); @@ -430,8 +394,7 @@ void BICGain(maths_t::EDataType dataType, //! //! \param[in] interval The Winsorisation interval. //! \param[in,out] category The category to Winsorise. -void winsorise(const TDoubleDoublePr &interval, TTuple &category) -{ +void winsorise(const TDoubleDoublePr& interval, TTuple& category) { double a = interval.first; double b = interval.second; double m = CBasicStatistics::mean(category); @@ -441,13 +404,11 @@ void winsorise(const TDoubleDoublePr &interval, TTuple &category) double xa = m - a; double xb = b - m; - if (sigma == 0.0 || (xa > t && xb > t)) - { + if (sigma == 0.0 || (xa > t && xb > t)) { return; } - try - { + try { boost::math::normal_distribution<> normal(m, sigma); double pa = xa > t ? 0.0 : CTools::safeCdf(normal, a); double pb = xb > t ? 0.0 : CTools::safeCdfComplement(normal, b); @@ -455,34 +416,24 @@ void winsorise(const TDoubleDoublePr &interval, TTuple &category) xa /= sigma; xb /= sigma; - double ea = xa > t ? 0.0 : std::exp(-xa*xa / 2.0); - double eb = xb > t ? 0.0 : std::exp(-xb*xb / 2.0); + double ea = xa > t ? 0.0 : std::exp(-xa * xa / 2.0); + double eb = xb > t ? 0.0 : std::exp(-xb * xb / 2.0); - double km = sigma - / boost::math::double_constants::root_two_pi - * (ea - eb); - double kv = -sigma * sigma - / boost::math::double_constants::root_two_pi - * (xa * ea + xb * eb); + double km = sigma / boost::math::double_constants::root_two_pi * (ea - eb); + double kv = -sigma * sigma / boost::math::double_constants::root_two_pi * (xa * ea + xb * eb); double wm = pa * a + pb * b + m * (1.0 - pb - pa) + km; xa = a - wm; xb = b - wm; double xm = wm - m; - double wv = xa * xa * pa + xb * xb * pb - + (sigma * sigma + xm * xm) * (1.0 - pb - pa) - + 2.0 * xm * km + kv; + double wv = xa * xa * pa + xb * xb * pb + (sigma * sigma + xm * xm) * (1.0 - pb - pa) + 2.0 * xm * km + kv; double n = CBasicStatistics::count(category); category.s_Moments[0] = wm; category.s_Moments[1] = std::max((n - 1.0) / n * wv, 0.0); - } - catch (const std::exception &e) - { - LOG_ERROR("Bad category = " << category << ": " << e.what()); - } + } catch (const std::exception& e) { LOG_ERROR("Bad category = " << category << ": " << e.what()); } } //! Search for a split of the data that satisfies the constraints @@ -508,9 +459,8 @@ bool splitSearch(double minimumCount, maths_t::EDataType dataType, CAvailableModeDistributions distributions, double smallest, - const TTupleVec &categories, - TSizeVec &result) -{ + const TTupleVec& categories, + TSizeVec& result) { using TSizeSizePr = std::pair; LOG_TRACE("begin split search"); @@ -531,26 +481,20 @@ bool splitSearch(double minimumCount, // the corresponding full 2-split can be split subject to the // same constraints (to avoid merging the split straight away). - for (;;) - { + for (;;) { LOG_TRACE("node = " << core::CContainerPrinter::print(node)); LOG_TRACE("categories = " << core::CContainerPrinter::print(categories)); - nodeCategories.assign(categories.begin() + node.first, - categories.begin() + node.second); + nodeCategories.assign(categories.begin() + node.first, categories.begin() + node.second); - CNaturalBreaksClassifier::naturalBreaks(nodeCategories, 2, 0, - CNaturalBreaksClassifier::E_TargetDeviation, - candidate); + CNaturalBreaksClassifier::naturalBreaks(nodeCategories, 2, 0, CNaturalBreaksClassifier::E_TargetDeviation, candidate); LOG_TRACE("candidate = " << core::CContainerPrinter::print(candidate)); - if (candidate.size() != 2) - { + if (candidate.size() != 2) { LOG_ERROR("Expected 2-split: " << core::CContainerPrinter::print(candidate)); break; } - if (candidate[0] == 0 || candidate[0] == nodeCategories.size()) - { + if (candidate[0] == 0 || candidate[0] == nodeCategories.size()) { // This can happen if all the points are co-located, // in which case we can't split this node anyway. break; @@ -562,45 +506,34 @@ bool splitSearch(double minimumCount, double distance; double nl; double nr; - BICGain(dataType, distributions, smallest, categories, - node.first, candidate[0], node.second, distance, nl, nr); + BICGain(dataType, distributions, smallest, categories, node.first, candidate[0], node.second, distance, nl, nr); // Check the count constraint. bool satisfiesCount = (std::min(nl, nr) >= minimumCount); - LOG_TRACE("count = " << std::min(nl, nr) - << " (to split " << minimumCount << ")"); + LOG_TRACE("count = " << std::min(nl, nr) << " (to split " << minimumCount << ")"); // Check the distance constraint. bool satisfiesDistance = (distance > minimumDistance); - LOG_TRACE("max(BIC(1) - BIC(2), 0) = " << distance - << " (to split " << minimumDistance << ")"); + LOG_TRACE("max(BIC(1) - BIC(2), 0) = " << distance << " (to split " << minimumDistance << ")"); - if (!satisfiesCount) - { + if (!satisfiesCount) { // Recurse to the (one) node with sufficient count. - if (nl > minimumCount && candidate[0] - node.first > 1) - { + if (nl > minimumCount && candidate[0] - node.first > 1) { node = std::make_pair(node.first, candidate[0]); continue; } - if (nr > minimumCount && node.second - candidate[0] > 1) - { + if (nr > minimumCount && node.second - candidate[0] > 1) { node = std::make_pair(candidate[0], node.second); continue; } - } - else if (satisfiesDistance) - { + } else if (satisfiesDistance) { LOG_TRACE("Checking full split"); - BICGain(dataType, distributions, smallest, categories, - 0, candidate[0], categories.size(), distance, nl, nr); + BICGain(dataType, distributions, smallest, categories, 0, candidate[0], categories.size(), distance, nl, nr); - LOG_TRACE("max(BIC(1) - BIC(2), 0) = " << distance - << " (to split " << minimumDistance << ")"); + LOG_TRACE("max(BIC(1) - BIC(2), 0) = " << distance << " (to split " << minimumDistance << ")"); - if (distance > minimumDistance) - { + if (distance > minimumDistance) { result.push_back(candidate[0]); result.push_back(categories.size()); } @@ -613,7 +546,6 @@ bool splitSearch(double minimumCount, return !result.empty(); } - } // detail:: // 1 - "smallest hard assignment weight" @@ -638,49 +570,37 @@ static const std::string STRUCTURE_TAG("b"); static const std::string PRIOR_TAG("c"); const std::string EMPTY_STRING; - } -CAvailableModeDistributions::CAvailableModeDistributions(int value) : m_Value(value) -{ +CAvailableModeDistributions::CAvailableModeDistributions(int value) : m_Value(value) { } -const CAvailableModeDistributions & -CAvailableModeDistributions::operator+(const CAvailableModeDistributions &rhs) -{ +const CAvailableModeDistributions& CAvailableModeDistributions::operator+(const CAvailableModeDistributions& rhs) { m_Value = m_Value | rhs.m_Value; return *this; } -double CAvailableModeDistributions::parameters() const -{ - return (this->haveNormal() ? 2.0 : 0.0) - + (this->haveGamma() ? 2.0 : 0.0) - + (this->haveLogNormal() ? 2.0 : 0.0); +double CAvailableModeDistributions::parameters() const { + return (this->haveNormal() ? 2.0 : 0.0) + (this->haveGamma() ? 2.0 : 0.0) + (this->haveLogNormal() ? 2.0 : 0.0); } -bool CAvailableModeDistributions::haveNormal() const -{ +bool CAvailableModeDistributions::haveNormal() const { return (m_Value & NORMAL) != 0; } -bool CAvailableModeDistributions::haveGamma() const -{ +bool CAvailableModeDistributions::haveGamma() const { return (m_Value & GAMMA) != 0; } -bool CAvailableModeDistributions::haveLogNormal() const -{ +bool CAvailableModeDistributions::haveLogNormal() const { return (m_Value & LOG_NORMAL) != 0; } -std::string CAvailableModeDistributions::toString() const -{ +std::string CAvailableModeDistributions::toString() const { return core::CStringUtils::typeToString(m_Value); } -bool CAvailableModeDistributions::fromString(const std::string &value) -{ +bool CAvailableModeDistributions::fromString(const std::string& value) { return core::CStringUtils::stringToType(value, m_Value); } @@ -692,92 +612,82 @@ CXMeansOnline1d::CXMeansOnline1d(maths_t::EDataType dataType, double minimumClusterCount, double minimumCategoryCount, double winsorisationConfidenceInterval, - const TSplitFunc &splitFunc, - const TMergeFunc &mergeFunc) : - CClusterer1d(splitFunc, mergeFunc), - m_DataType(dataType), - m_AvailableDistributions(availableDistributions), - m_InitialDecayRate(decayRate), - m_DecayRate(decayRate), - m_HistoryLength(0.0), - m_WeightCalc(weightCalc), - m_MinimumClusterFraction(minimumClusterFraction), - m_MinimumClusterCount(minimumClusterCount), - m_MinimumCategoryCount(minimumCategoryCount), - m_WinsorisationConfidenceInterval(winsorisationConfidenceInterval), - m_Clusters(1, CCluster(*this)) -{ -} - -CXMeansOnline1d::CXMeansOnline1d(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser) : - CClusterer1d(CDoNothing(), CDoNothing()), - m_DataType(params.s_DataType), - m_AvailableDistributions(CAvailableModeDistributions::ALL), - m_InitialDecayRate(params.s_DecayRate), - m_DecayRate(params.s_DecayRate), - m_HistoryLength(), - m_WeightCalc(maths_t::E_ClustersEqualWeight), - m_MinimumClusterFraction(), - m_MinimumClusterCount(), - m_MinimumCategoryCount(params.s_MinimumCategoryCount), - m_WinsorisationConfidenceInterval() -{ - traverser.traverseSubLevel(boost::bind(&CXMeansOnline1d::acceptRestoreTraverser, - this, boost::cref(params), _1)); -} - -CXMeansOnline1d::CXMeansOnline1d(const SDistributionRestoreParams ¶ms, - const TSplitFunc &splitFunc, - const TMergeFunc &mergeFunc, - core::CStateRestoreTraverser &traverser) : - CClusterer1d(splitFunc, mergeFunc), - m_DataType(params.s_DataType), - m_AvailableDistributions(CAvailableModeDistributions::ALL), - m_InitialDecayRate(params.s_DecayRate), - m_DecayRate(params.s_DecayRate), - m_HistoryLength(), - m_WeightCalc(maths_t::E_ClustersEqualWeight), - m_MinimumClusterFraction(), - m_MinimumClusterCount(), - m_MinimumCategoryCount(params.s_MinimumCategoryCount), - m_WinsorisationConfidenceInterval() -{ - traverser.traverseSubLevel(boost::bind(&CXMeansOnline1d::acceptRestoreTraverser, - this, boost::cref(params), _1)); -} - -CXMeansOnline1d::CXMeansOnline1d(const CXMeansOnline1d &other) : - CClusterer1d(other.splitFunc(), other.mergeFunc()), - m_DataType(other.m_DataType), - m_AvailableDistributions(other.m_AvailableDistributions), - m_InitialDecayRate(other.m_InitialDecayRate), - m_DecayRate(other.m_DecayRate), - m_HistoryLength(other.m_HistoryLength), - m_WeightCalc(other.m_WeightCalc), - m_MinimumClusterFraction(other.m_MinimumClusterFraction), - m_MinimumClusterCount(other.m_MinimumClusterCount), - m_MinimumCategoryCount(other.m_MinimumCategoryCount), - m_WinsorisationConfidenceInterval(other.m_WinsorisationConfidenceInterval), - m_ClusterIndexGenerator(other.m_ClusterIndexGenerator.deepCopy()), - m_Smallest(other.m_Smallest), - m_Largest(other.m_Largest), - m_Clusters(other.m_Clusters) -{ -} - -CXMeansOnline1d &CXMeansOnline1d::operator=(const CXMeansOnline1d &other) -{ - if (this != &other) - { + const TSplitFunc& splitFunc, + const TMergeFunc& mergeFunc) + : CClusterer1d(splitFunc, mergeFunc), + m_DataType(dataType), + m_AvailableDistributions(availableDistributions), + m_InitialDecayRate(decayRate), + m_DecayRate(decayRate), + m_HistoryLength(0.0), + m_WeightCalc(weightCalc), + m_MinimumClusterFraction(minimumClusterFraction), + m_MinimumClusterCount(minimumClusterCount), + m_MinimumCategoryCount(minimumCategoryCount), + m_WinsorisationConfidenceInterval(winsorisationConfidenceInterval), + m_Clusters(1, CCluster(*this)) { +} + +CXMeansOnline1d::CXMeansOnline1d(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) + : CClusterer1d(CDoNothing(), CDoNothing()), + m_DataType(params.s_DataType), + m_AvailableDistributions(CAvailableModeDistributions::ALL), + m_InitialDecayRate(params.s_DecayRate), + m_DecayRate(params.s_DecayRate), + m_HistoryLength(), + m_WeightCalc(maths_t::E_ClustersEqualWeight), + m_MinimumClusterFraction(), + m_MinimumClusterCount(), + m_MinimumCategoryCount(params.s_MinimumCategoryCount), + m_WinsorisationConfidenceInterval() { + traverser.traverseSubLevel(boost::bind(&CXMeansOnline1d::acceptRestoreTraverser, this, boost::cref(params), _1)); +} + +CXMeansOnline1d::CXMeansOnline1d(const SDistributionRestoreParams& params, + const TSplitFunc& splitFunc, + const TMergeFunc& mergeFunc, + core::CStateRestoreTraverser& traverser) + : CClusterer1d(splitFunc, mergeFunc), + m_DataType(params.s_DataType), + m_AvailableDistributions(CAvailableModeDistributions::ALL), + m_InitialDecayRate(params.s_DecayRate), + m_DecayRate(params.s_DecayRate), + m_HistoryLength(), + m_WeightCalc(maths_t::E_ClustersEqualWeight), + m_MinimumClusterFraction(), + m_MinimumClusterCount(), + m_MinimumCategoryCount(params.s_MinimumCategoryCount), + m_WinsorisationConfidenceInterval() { + traverser.traverseSubLevel(boost::bind(&CXMeansOnline1d::acceptRestoreTraverser, this, boost::cref(params), _1)); +} + +CXMeansOnline1d::CXMeansOnline1d(const CXMeansOnline1d& other) + : CClusterer1d(other.splitFunc(), other.mergeFunc()), + m_DataType(other.m_DataType), + m_AvailableDistributions(other.m_AvailableDistributions), + m_InitialDecayRate(other.m_InitialDecayRate), + m_DecayRate(other.m_DecayRate), + m_HistoryLength(other.m_HistoryLength), + m_WeightCalc(other.m_WeightCalc), + m_MinimumClusterFraction(other.m_MinimumClusterFraction), + m_MinimumClusterCount(other.m_MinimumClusterCount), + m_MinimumCategoryCount(other.m_MinimumCategoryCount), + m_WinsorisationConfidenceInterval(other.m_WinsorisationConfidenceInterval), + m_ClusterIndexGenerator(other.m_ClusterIndexGenerator.deepCopy()), + m_Smallest(other.m_Smallest), + m_Largest(other.m_Largest), + m_Clusters(other.m_Clusters) { +} + +CXMeansOnline1d& CXMeansOnline1d::operator=(const CXMeansOnline1d& other) { + if (this != &other) { CXMeansOnline1d tmp(other); this->swap(tmp); } return *this; } -void CXMeansOnline1d::swap(CXMeansOnline1d &other) -{ +void CXMeansOnline1d::swap(CXMeansOnline1d& other) { this->CClusterer1d::swap(other); std::swap(m_DataType, other.m_DataType); std::swap(m_AvailableDistributions, other.m_AvailableDistributions); @@ -795,15 +705,12 @@ void CXMeansOnline1d::swap(CXMeansOnline1d &other) m_Clusters.swap(other.m_Clusters); } -std::string CXMeansOnline1d::persistenceTag() const -{ +std::string CXMeansOnline1d::persistenceTag() const { return X_MEANS_ONLINE_1D_TAG; } -void CXMeansOnline1d::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) - { +void CXMeansOnline1d::acceptPersistInserter(core::CStatePersistInserter& inserter) const { + for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { inserter.insertLevel(CLUSTER_TAG, boost::bind(&CCluster::acceptPersistInserter, &m_Clusters[i], _1)); } inserter.insertValue(AVAILABLE_DISTRIBUTIONS_TAG, m_AvailableDistributions.toString()); @@ -814,20 +721,15 @@ void CXMeansOnline1d::acceptPersistInserter(core::CStatePersistInserter &inserte inserter.insertValue(WEIGHT_CALC_TAG, static_cast(m_WeightCalc)); inserter.insertValue(MINIMUM_CLUSTER_FRACTION_TAG, m_MinimumClusterFraction); inserter.insertValue(MINIMUM_CLUSTER_COUNT_TAG, m_MinimumClusterCount); - inserter.insertValue(WINSORISATION_CONFIDENCE_INTERVAL_TAG, - m_WinsorisationConfidenceInterval); - inserter.insertLevel(CLUSTER_INDEX_GENERATOR_TAG, - boost::bind(&CIndexGenerator::acceptPersistInserter, - &m_ClusterIndexGenerator, _1)); + inserter.insertValue(WINSORISATION_CONFIDENCE_INTERVAL_TAG, m_WinsorisationConfidenceInterval); + inserter.insertLevel(CLUSTER_INDEX_GENERATOR_TAG, boost::bind(&CIndexGenerator::acceptPersistInserter, &m_ClusterIndexGenerator, _1)); } -CXMeansOnline1d *CXMeansOnline1d::clone() const -{ +CXMeansOnline1d* CXMeansOnline1d::clone() const { return new CXMeansOnline1d(*this); } -void CXMeansOnline1d::clear() -{ +void CXMeansOnline1d::clear() { *this = CXMeansOnline1d(m_DataType, m_AvailableDistributions, m_WeightCalc, @@ -840,39 +742,31 @@ void CXMeansOnline1d::clear() this->mergeFunc()); } -std::size_t CXMeansOnline1d::numberClusters() const -{ +std::size_t CXMeansOnline1d::numberClusters() const { return m_Clusters.size(); } -void CXMeansOnline1d::dataType(maths_t::EDataType dataType) -{ +void CXMeansOnline1d::dataType(maths_t::EDataType dataType) { m_DataType = dataType; - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) - { + for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { m_Clusters[i].dataType(dataType); } } -void CXMeansOnline1d::decayRate(double decayRate) -{ +void CXMeansOnline1d::decayRate(double decayRate) { m_DecayRate = decayRate; - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) - { + for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { m_Clusters[i].decayRate(decayRate); } } -bool CXMeansOnline1d::hasCluster(std::size_t index) const -{ +bool CXMeansOnline1d::hasCluster(std::size_t index) const { return this->cluster(index) != 0; } -bool CXMeansOnline1d::clusterCentre(std::size_t index, double &result) const -{ - const CCluster *cluster = this->cluster(index); - if (!cluster) - { +bool CXMeansOnline1d::clusterCentre(std::size_t index, double& result) const { + const CCluster* cluster = this->cluster(index); + if (!cluster) { LOG_ERROR("Cluster " << index << " doesn't exist"); return false; } @@ -880,11 +774,9 @@ bool CXMeansOnline1d::clusterCentre(std::size_t index, double &result) const return true; } -bool CXMeansOnline1d::clusterSpread(std::size_t index, double &result) const -{ - const CCluster *cluster = this->cluster(index); - if (!cluster) - { +bool CXMeansOnline1d::clusterSpread(std::size_t index, double& result) const { + const CCluster* cluster = this->cluster(index); + if (!cluster) { LOG_ERROR("Cluster " << index << " doesn't exist"); return false; } @@ -892,34 +784,22 @@ bool CXMeansOnline1d::clusterSpread(std::size_t index, double &result) const return true; } -void CXMeansOnline1d::cluster(const double &point, - TSizeDoublePr2Vec &result, - double count) const -{ +void CXMeansOnline1d::cluster(const double& point, TSizeDoublePr2Vec& result, double count) const { result.clear(); - if (m_Clusters.empty()) - { + if (m_Clusters.empty()) { LOG_ERROR("No clusters"); return; } - TClusterVecCItr rightCluster = std::lower_bound(m_Clusters.begin(), - m_Clusters.end(), - point, - detail::SClusterCentreLess()); + TClusterVecCItr rightCluster = std::lower_bound(m_Clusters.begin(), m_Clusters.end(), point, detail::SClusterCentreLess()); - if (rightCluster == m_Clusters.end()) - { + if (rightCluster == m_Clusters.end()) { --rightCluster; result.emplace_back(rightCluster->index(), count); - } - else if (rightCluster == m_Clusters.begin()) - { + } else if (rightCluster == m_Clusters.begin()) { result.emplace_back(rightCluster->index(), count); - } - else - { + } else { // This does a soft assignment. Given we are finding a // partitioning clustering (as a result of targeting // the k-means objective) we only consider the case that @@ -942,187 +822,139 @@ void CXMeansOnline1d::cluster(const double &point, TClusterVecCItr leftCluster = rightCluster; --leftCluster; - double likelihoodLeft = leftCluster->logLikelihoodFromCluster(m_WeightCalc, point); + double likelihoodLeft = leftCluster->logLikelihoodFromCluster(m_WeightCalc, point); double likelihoodRight = rightCluster->logLikelihoodFromCluster(m_WeightCalc, point); double renormalizer = std::max(likelihoodLeft, likelihoodRight); - double pLeft = std::exp(likelihoodLeft - renormalizer); + double pLeft = std::exp(likelihoodLeft - renormalizer); double pRight = std::exp(likelihoodRight - renormalizer); double normalizer = pLeft + pRight; - pLeft /= normalizer; + pLeft /= normalizer; pRight /= normalizer; - if (pLeft < HARD_ASSIGNMENT_THRESHOLD * pRight) - { + if (pLeft < HARD_ASSIGNMENT_THRESHOLD * pRight) { result.emplace_back(rightCluster->index(), count); - } - else if (pRight < HARD_ASSIGNMENT_THRESHOLD * pLeft) - { + } else if (pRight < HARD_ASSIGNMENT_THRESHOLD * pLeft) { result.emplace_back(leftCluster->index(), count); - } - else - { + } else { result.emplace_back(leftCluster->index(), pLeft * count); result.emplace_back(rightCluster->index(), pRight * count); } } } -void CXMeansOnline1d::add(const double &point, - TSizeDoublePr2Vec &clusters, - double count) -{ +void CXMeansOnline1d::add(const double& point, TSizeDoublePr2Vec& clusters, double count) { m_HistoryLength += 1.0; m_Smallest.add(point); m_Largest.add(point); clusters.clear(); - TClusterVecItr rightCluster = std::lower_bound(m_Clusters.begin(), - m_Clusters.end(), - point, - detail::SClusterCentreLess()); + TClusterVecItr rightCluster = std::lower_bound(m_Clusters.begin(), m_Clusters.end(), point, detail::SClusterCentreLess()); - if (rightCluster == m_Clusters.end()) - { + if (rightCluster == m_Clusters.end()) { --rightCluster; LOG_TRACE("Adding " << point << " to " << rightCluster->centre()); rightCluster->add(point, count); clusters.emplace_back(rightCluster->index(), count); - if (this->maybeSplit(rightCluster)) - { + if (this->maybeSplit(rightCluster)) { this->cluster(point, clusters, count); - } - else if (rightCluster != m_Clusters.begin()) - { + } else if (rightCluster != m_Clusters.begin()) { TClusterVecItr leftCluster = rightCluster; --leftCluster; - if (this->maybeMerge(leftCluster, rightCluster)) - { + if (this->maybeMerge(leftCluster, rightCluster)) { this->cluster(point, clusters, count); } } - } - else if (rightCluster == m_Clusters.begin()) - { + } else if (rightCluster == m_Clusters.begin()) { LOG_TRACE("Adding " << point << " to " << rightCluster->centre()); rightCluster->add(point, count); clusters.emplace_back(rightCluster->index(), count); - if (this->maybeSplit(rightCluster)) - { + if (this->maybeSplit(rightCluster)) { this->cluster(point, clusters, count); - } - else - { + } else { TClusterVecItr leftCluster = rightCluster; ++rightCluster; - if (this->maybeMerge(leftCluster, rightCluster)) - { + if (this->maybeMerge(leftCluster, rightCluster)) { this->cluster(point, clusters, count); } } - } - else - { + } else { // See the cluster member function for more details on // soft assignment. TClusterVecItr leftCluster = rightCluster; --leftCluster; - double likelihoodLeft = leftCluster->logLikelihoodFromCluster(m_WeightCalc, point); + double likelihoodLeft = leftCluster->logLikelihoodFromCluster(m_WeightCalc, point); double likelihoodRight = rightCluster->logLikelihoodFromCluster(m_WeightCalc, point); // Normalize the likelihood values. double renormalizer = std::max(likelihoodLeft, likelihoodRight); - double pLeft = std::exp(likelihoodLeft - renormalizer); + double pLeft = std::exp(likelihoodLeft - renormalizer); double pRight = std::exp(likelihoodRight - renormalizer); double normalizer = pLeft + pRight; - pLeft /= normalizer; + pLeft /= normalizer; pRight /= normalizer; - if (pLeft < HARD_ASSIGNMENT_THRESHOLD * pRight) - { + if (pLeft < HARD_ASSIGNMENT_THRESHOLD * pRight) { LOG_TRACE("Adding " << point << " to " << rightCluster->centre()); rightCluster->add(point, count); clusters.emplace_back(rightCluster->index(), count); - if ( this->maybeSplit(rightCluster) - || this->maybeMerge(leftCluster, rightCluster)) - { + if (this->maybeSplit(rightCluster) || this->maybeMerge(leftCluster, rightCluster)) { this->cluster(point, clusters, count); } - } - else if (pRight < HARD_ASSIGNMENT_THRESHOLD * pLeft) - { + } else if (pRight < HARD_ASSIGNMENT_THRESHOLD * pLeft) { LOG_TRACE("Adding " << point << " to " << leftCluster->centre()); leftCluster->add(point, count); clusters.emplace_back(leftCluster->index(), count); - if ( this->maybeSplit(leftCluster) - || this->maybeMerge(leftCluster, rightCluster)) - { + if (this->maybeSplit(leftCluster) || this->maybeMerge(leftCluster, rightCluster)) { this->cluster(point, clusters, count); } - } - else - { + } else { // Get the weighted counts. double countLeft = count * pLeft; double countRight = count * pRight; - LOG_TRACE("Soft adding " << point - << " " << countLeft << " to " << leftCluster->centre() - << " and " << countRight << " to " << rightCluster->centre()); + LOG_TRACE("Soft adding " << point << " " << countLeft << " to " << leftCluster->centre() << " and " << countRight << " to " + << rightCluster->centre()); leftCluster->add(point, countLeft); rightCluster->add(point, countRight); clusters.emplace_back(leftCluster->index(), countLeft); clusters.emplace_back(rightCluster->index(), countRight); - if ( this->maybeSplit(leftCluster) - || this->maybeSplit(rightCluster) - || this->maybeMerge(leftCluster, rightCluster)) - { + if (this->maybeSplit(leftCluster) || this->maybeSplit(rightCluster) || this->maybeMerge(leftCluster, rightCluster)) { this->cluster(point, clusters, count); } } } - if (this->prune()) - { + if (this->prune()) { this->cluster(point, clusters, count); } } -void CXMeansOnline1d::add(const TDoubleDoublePrVec &points) -{ - if (m_Clusters.empty()) - { +void CXMeansOnline1d::add(const TDoubleDoublePrVec& points) { + if (m_Clusters.empty()) { m_Clusters.push_back(CCluster(*this)); } TSizeDoublePr2Vec dummy; - for (std::size_t i = 0u; i < points.size(); ++i) - { + for (std::size_t i = 0u; i < points.size(); ++i) { this->add(points[i].first, dummy, points[i].second); } } -void CXMeansOnline1d::propagateForwardsByTime(double time) -{ - if (time < 0.0) - { +void CXMeansOnline1d::propagateForwardsByTime(double time) { + if (time < 0.0) { LOG_ERROR("Can't propagate backwards in time"); return; } m_HistoryLength *= std::exp(-m_DecayRate * time); - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) - { + for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { m_Clusters[i].propagateForwardsByTime(time); } } -bool CXMeansOnline1d::sample(std::size_t index, - std::size_t numberSamples, - TDoubleVec &samples) const -{ - const CCluster *cluster = this->cluster(index); - if (!cluster) - { +bool CXMeansOnline1d::sample(std::size_t index, std::size_t numberSamples, TDoubleVec& samples) const { + const CCluster* cluster = this->cluster(index); + if (!cluster) { LOG_ERROR("Cluster " << index << " doesn't exist"); return false; } @@ -1130,15 +962,12 @@ bool CXMeansOnline1d::sample(std::size_t index, return true; } -double CXMeansOnline1d::probability(std::size_t index) const -{ +double CXMeansOnline1d::probability(std::size_t index) const { double weight = 0.0; double weightSum = 0.0; - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) - { - const CCluster &cluster = m_Clusters[i]; - if (cluster.index() == index) - { + for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { + const CCluster& cluster = m_Clusters[i]; + if (cluster.index() == index) { weight = cluster.weight(maths_t::E_ClustersFractionWeight); } weightSum += cluster.weight(maths_t::E_ClustersFractionWeight); @@ -1146,27 +975,23 @@ double CXMeansOnline1d::probability(std::size_t index) const return weightSum == 0.0 ? 0.0 : weight / weightSum; } -void CXMeansOnline1d::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CXMeansOnline1d::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CXMeansOnline1d"); core::CMemoryDebug::dynamicSize("m_ClusterIndexGenerator", m_ClusterIndexGenerator, mem); core::CMemoryDebug::dynamicSize("m_Clusters", m_Clusters, mem); } -std::size_t CXMeansOnline1d::memoryUsage() const -{ +std::size_t CXMeansOnline1d::memoryUsage() const { std::size_t mem = core::CMemory::dynamicSize(m_ClusterIndexGenerator); mem += core::CMemory::dynamicSize(m_Clusters); return mem; } -std::size_t CXMeansOnline1d::staticSize() const -{ +std::size_t CXMeansOnline1d::staticSize() const { return sizeof(*this); } -uint64_t CXMeansOnline1d::checksum(uint64_t seed) const -{ +uint64_t CXMeansOnline1d::checksum(uint64_t seed) const { seed = CChecksum::calculate(seed, m_DataType); seed = CChecksum::calculate(seed, m_DecayRate); seed = CChecksum::calculate(seed, m_HistoryLength); @@ -1174,25 +999,20 @@ uint64_t CXMeansOnline1d::checksum(uint64_t seed) const return CChecksum::calculate(seed, m_Clusters); } -double CXMeansOnline1d::count() const -{ +double CXMeansOnline1d::count() const { double result = 0.0; - for (std::size_t i = 0; i < m_Clusters.size(); ++i) - { + for (std::size_t i = 0; i < m_Clusters.size(); ++i) { result += m_Clusters[i].count(); } return result; } -const CXMeansOnline1d::TClusterVec &CXMeansOnline1d::clusters() const -{ +const CXMeansOnline1d::TClusterVec& CXMeansOnline1d::clusters() const { return m_Clusters; } -std::string CXMeansOnline1d::printClusters() const -{ - if (m_Clusters.empty()) - { +std::string CXMeansOnline1d::printClusters() const { + if (m_Clusters.empty()) { return std::string(); } @@ -1204,20 +1024,17 @@ std::string CXMeansOnline1d::printClusters() const static const double RANGE = 99.9; static const unsigned int POINTS = 201; - TDoubleDoublePr range(boost::numeric::bounds::highest(), - boost::numeric::bounds::lowest()); + TDoubleDoublePr range(boost::numeric::bounds::highest(), boost::numeric::bounds::lowest()); - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) - { - const CPrior &prior = m_Clusters[i].prior(); + for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { + const CPrior& prior = m_Clusters[i].prior(); TDoubleDoublePr clusterRange = prior.marginalLikelihoodConfidenceInterval(RANGE); range.first = std::min(range.first, clusterRange.first); range.second = std::max(range.second, clusterRange.second); } double weightSum = 0.0; - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) - { + for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { weightSum += m_Clusters[i].weight(m_WeightCalc); } @@ -1231,19 +1048,14 @@ std::string CXMeansOnline1d::printClusters() const std::ostringstream likelihoodStr; coordinatesStr << "x = ["; likelihoodStr << "likelihood = ["; - for (unsigned int i = 0u; i < POINTS; ++i, x[0] += increment) - { + for (unsigned int i = 0u; i < POINTS; ++i, x[0] += increment) { double likelihood = 0.0; - for (std::size_t j = 0u; j < m_Clusters.size(); ++j) - { + for (std::size_t j = 0u; j < m_Clusters.size(); ++j) { double logLikelihood; - const CPrior &prior = m_Clusters[j].prior(); - if (!( prior.jointLogMarginalLikelihood(COUNT_WEIGHT, x, UNIT_WEIGHT, logLikelihood) - & (maths_t::E_FpFailed | maths_t::E_FpOverflowed))) - { - likelihood += m_Clusters[j].weight(m_WeightCalc) - / weightSum - * std::exp(logLikelihood); + const CPrior& prior = m_Clusters[j].prior(); + if (!(prior.jointLogMarginalLikelihood(COUNT_WEIGHT, x, UNIT_WEIGHT, logLikelihood) & + (maths_t::E_FpFailed | maths_t::E_FpOverflowed))) { + likelihood += m_Clusters[j].weight(m_WeightCalc) / weightSum * std::exp(logLikelihood); } } coordinatesStr << x[0] << " "; @@ -1255,33 +1067,26 @@ std::string CXMeansOnline1d::printClusters() const return coordinatesStr.str() + likelihoodStr.str(); } -CXMeansOnline1d::CIndexGenerator &CXMeansOnline1d::indexGenerator() -{ +CXMeansOnline1d::CIndexGenerator& CXMeansOnline1d::indexGenerator() { return m_ClusterIndexGenerator; } -bool CXMeansOnline1d::acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name = traverser.name(); - RESTORE_SETUP_TEARDOWN(CLUSTER_TAG, - CCluster cluster(*this), - traverser.traverseSubLevel(boost::bind(&CCluster::acceptRestoreTraverser, - &cluster, boost::cref(params), _1)), - m_Clusters.push_back(cluster)) +bool CXMeansOnline1d::acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); + RESTORE_SETUP_TEARDOWN( + CLUSTER_TAG, + CCluster cluster(*this), + traverser.traverseSubLevel(boost::bind(&CCluster::acceptRestoreTraverser, &cluster, boost::cref(params), _1)), + m_Clusters.push_back(cluster)) RESTORE(AVAILABLE_DISTRIBUTIONS_TAG, m_AvailableDistributions.fromString(traverser.value())) - RESTORE_SETUP_TEARDOWN(DECAY_RATE_TAG, - double decayRate, - core::CStringUtils::stringToType(traverser.value(), decayRate), - this->decayRate(decayRate)) + RESTORE_SETUP_TEARDOWN( + DECAY_RATE_TAG, double decayRate, core::CStringUtils::stringToType(traverser.value(), decayRate), this->decayRate(decayRate)) RESTORE_BUILT_IN(HISTORY_LENGTH_TAG, m_HistoryLength); RESTORE(SMALLEST_TAG, m_Smallest.fromDelimited(traverser.value())) RESTORE(LARGEST_TAG, m_Largest.fromDelimited(traverser.value())) RESTORE(CLUSTER_INDEX_GENERATOR_TAG, - traverser.traverseSubLevel(boost::bind(&CIndexGenerator::acceptRestoreTraverser, - &m_ClusterIndexGenerator, _1))) + traverser.traverseSubLevel(boost::bind(&CIndexGenerator::acceptRestoreTraverser, &m_ClusterIndexGenerator, _1))) RESTORE_SETUP_TEARDOWN(WEIGHT_CALC_TAG, int weightCalc, core::CStringUtils::stringToType(traverser.value(), weightCalc), @@ -1289,32 +1094,25 @@ bool CXMeansOnline1d::acceptRestoreTraverser(const SDistributionRestoreParams &p RESTORE_BUILT_IN(MINIMUM_CLUSTER_FRACTION_TAG, m_MinimumClusterFraction) RESTORE_BUILT_IN(MINIMUM_CLUSTER_COUNT_TAG, m_MinimumClusterCount) RESTORE_BUILT_IN(WINSORISATION_CONFIDENCE_INTERVAL_TAG, m_WinsorisationConfidenceInterval) - } - while (traverser.next()); + } while (traverser.next()); return true; } -const CXMeansOnline1d::CCluster *CXMeansOnline1d::cluster(std::size_t index) const -{ - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) - { - if (m_Clusters[i].index() == index) - { +const CXMeansOnline1d::CCluster* CXMeansOnline1d::cluster(std::size_t index) const { + for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { + if (m_Clusters[i].index() == index) { return &m_Clusters[i]; } } return 0; } -double CXMeansOnline1d::minimumSplitCount() const -{ +double CXMeansOnline1d::minimumSplitCount() const { double result = m_MinimumClusterCount; - if (m_MinimumClusterFraction > 0.0) - { + if (m_MinimumClusterFraction > 0.0) { double count = 0.0; - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) - { + for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { count += m_Clusters[i].count(); } double scale = std::max(m_HistoryLength * (1.0 - std::exp(-m_InitialDecayRate)), 1.0); @@ -1325,21 +1123,15 @@ double CXMeansOnline1d::minimumSplitCount() const return result; } -bool CXMeansOnline1d::maybeSplit(TClusterVecItr cluster) -{ - if (cluster == m_Clusters.end()) - { +bool CXMeansOnline1d::maybeSplit(TClusterVecItr cluster) { + if (cluster == m_Clusters.end()) { return false; } TDoubleDoublePr interval = this->winsorisationInterval(); - if (TOptionalClusterClusterPr split = cluster->split(m_AvailableDistributions, - this->minimumSplitCount(), - m_Smallest[0], interval, - m_ClusterIndexGenerator)) - { - LOG_TRACE("Splitting cluster " << cluster->index() - << " at " << cluster->centre()); + if (TOptionalClusterClusterPr split = + cluster->split(m_AvailableDistributions, this->minimumSplitCount(), m_Smallest[0], interval, m_ClusterIndexGenerator)) { + LOG_TRACE("Splitting cluster " << cluster->index() << " at " << cluster->centre()); std::size_t index = cluster->index(); *cluster = split->second; m_Clusters.insert(cluster, split->first); @@ -1350,21 +1142,15 @@ bool CXMeansOnline1d::maybeSplit(TClusterVecItr cluster) return false; } -bool CXMeansOnline1d::maybeMerge(TClusterVecItr cluster1, - TClusterVecItr cluster2) -{ - if (cluster1 == m_Clusters.end() || cluster2 == m_Clusters.end()) - { +bool CXMeansOnline1d::maybeMerge(TClusterVecItr cluster1, TClusterVecItr cluster2) { + if (cluster1 == m_Clusters.end() || cluster2 == m_Clusters.end()) { return false; } TDoubleDoublePr interval = this->winsorisationInterval(); - if (cluster1->shouldMerge(*cluster2, m_AvailableDistributions, m_Smallest[0], interval)) - { - LOG_TRACE("Merging cluster " << cluster1->index() - << " at " << cluster1->centre() - << " and cluster " << cluster2->index() - << " at " << cluster2->centre()); + if (cluster1->shouldMerge(*cluster2, m_AvailableDistributions, m_Smallest[0], interval)) { + LOG_TRACE("Merging cluster " << cluster1->index() << " at " << cluster1->centre() << " and cluster " << cluster2->index() << " at " + << cluster2->centre()); std::size_t index1 = cluster1->index(); std::size_t index2 = cluster2->index(); CCluster merged = cluster1->merge(*cluster2, m_ClusterIndexGenerator); @@ -1377,36 +1163,28 @@ bool CXMeansOnline1d::maybeMerge(TClusterVecItr cluster1, return false; } -bool CXMeansOnline1d::prune() -{ - if (m_Clusters.size() <= 1) - { +bool CXMeansOnline1d::prune() { + if (m_Clusters.size() <= 1) { return false; } bool result = false; double minimumCount = this->minimumSplitCount() * CLUSTER_DELETE_FRACTION; - for (std::size_t i = 1u; i < m_Clusters.size(); /**/) - { - CCluster &left = m_Clusters[i-1]; - CCluster &right = m_Clusters[i]; - if (left.count() < minimumCount || right.count() < minimumCount) - { - std::size_t leftIndex = left.index(); + for (std::size_t i = 1u; i < m_Clusters.size(); /**/) { + CCluster& left = m_Clusters[i - 1]; + CCluster& right = m_Clusters[i]; + if (left.count() < minimumCount || right.count() < minimumCount) { + std::size_t leftIndex = left.index(); std::size_t rightIndex = right.index(); - LOG_TRACE("Merging cluster " << leftIndex - << " at " << left.centre() - << " and cluster " << rightIndex - << " at "<< right.centre()); + LOG_TRACE("Merging cluster " << leftIndex << " at " << left.centre() << " and cluster " << rightIndex << " at " + << right.centre()); CCluster merge = left.merge(right, m_ClusterIndexGenerator); left = merge; m_Clusters.erase(m_Clusters.begin() + i); (this->mergeFunc())(leftIndex, rightIndex, merge.index()); result = true; - } - else - { + } else { ++i; } } @@ -1414,16 +1192,13 @@ bool CXMeansOnline1d::prune() return result; } -TDoubleDoublePr CXMeansOnline1d::winsorisationInterval() const -{ +TDoubleDoublePr CXMeansOnline1d::winsorisationInterval() const { double f = (1.0 - m_WinsorisationConfidenceInterval) / 2.0; - if (f * this->count() < 1.0) - { + if (f * this->count() < 1.0) { // Don't bother if we don't expect a sample outside the // Winsorisation interval. - return std::make_pair(boost::numeric::bounds::lowest() / 2.0, - boost::numeric::bounds::highest() / 2.0); + return std::make_pair(boost::numeric::bounds::lowest() / 2.0, boost::numeric::bounds::highest() / 2.0); } // The Winsorisation interval are the positions corresponding @@ -1432,32 +1207,25 @@ TDoubleDoublePr CXMeansOnline1d::winsorisationInterval() const // data to the 1 - f central confidence interval. double totalCount = 0.0; - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) - { + for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { totalCount += m_Clusters[i].count(); } double leftCount = f * totalCount; double rightCount = (1.0 - f) * totalCount; - LOG_TRACE("totalCount = " << totalCount - << " interval = [" << leftCount << "," << rightCount << "]" - << " # clusters = " << m_Clusters.size()); + LOG_TRACE("totalCount = " << totalCount << " interval = [" << leftCount << "," << rightCount << "]" + << " # clusters = " << m_Clusters.size()); TDoubleDoublePr result; double partialCount = 0.0; - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) - { + for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { double count = m_Clusters[i].count(); - if (partialCount < leftCount - && partialCount + count >= leftCount) - { + if (partialCount < leftCount && partialCount + count >= leftCount) { double p = 100.0 * (leftCount - partialCount) / count; result.first = m_Clusters[i].percentile(p); } - if (partialCount < rightCount - && partialCount + count >= rightCount) - { + if (partialCount < rightCount && partialCount + count >= rightCount) { double p = 100.0 * (rightCount - partialCount) / count; result.second = m_Clusters[i].percentile(p); break; @@ -1465,144 +1233,108 @@ TDoubleDoublePr CXMeansOnline1d::winsorisationInterval() const partialCount += count; } - LOG_TRACE("Winsorisation interval = [" - << result.first << "," << result.second << "]"); + LOG_TRACE("Winsorisation interval = [" << result.first << "," << result.second << "]"); return result; } //////////// CCluster Implementation //////////// -CXMeansOnline1d::CCluster::CCluster(const CXMeansOnline1d &clusterer) : - m_Index(clusterer.m_ClusterIndexGenerator.next()), - m_Prior(CNormalMeanPrecConjugate::nonInformativePrior(clusterer.m_DataType, - clusterer.m_DecayRate)), - m_Structure(STRUCTURE_SIZE, clusterer.m_DecayRate, clusterer.m_MinimumCategoryCount) -{ +CXMeansOnline1d::CCluster::CCluster(const CXMeansOnline1d& clusterer) + : m_Index(clusterer.m_ClusterIndexGenerator.next()), + m_Prior(CNormalMeanPrecConjugate::nonInformativePrior(clusterer.m_DataType, clusterer.m_DecayRate)), + m_Structure(STRUCTURE_SIZE, clusterer.m_DecayRate, clusterer.m_MinimumCategoryCount) { } -CXMeansOnline1d::CCluster::CCluster(std::size_t index, - const CNormalMeanPrecConjugate &prior, - const CNaturalBreaksClassifier &structure) : - m_Index(index), - m_Prior(prior), - m_Structure(structure) -{ +CXMeansOnline1d::CCluster::CCluster(std::size_t index, const CNormalMeanPrecConjugate& prior, const CNaturalBreaksClassifier& structure) + : m_Index(index), m_Prior(prior), m_Structure(structure) { } -bool CXMeansOnline1d::CCluster::acceptRestoreTraverser(const SDistributionRestoreParams ¶ms, - core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name = traverser.name(); +bool CXMeansOnline1d::CCluster::acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); RESTORE_BUILT_IN(INDEX_TAG, m_Index) RESTORE_NO_ERROR(PRIOR_TAG, m_Prior = CNormalMeanPrecConjugate(params, traverser)) RESTORE(STRUCTURE_TAG, - traverser.traverseSubLevel(boost::bind(&CNaturalBreaksClassifier::acceptRestoreTraverser, - &m_Structure, boost::cref(params), _1))) - } - while (traverser.next()); + traverser.traverseSubLevel( + boost::bind(&CNaturalBreaksClassifier::acceptRestoreTraverser, &m_Structure, boost::cref(params), _1))) + } while (traverser.next()); return true; } - -void CXMeansOnline1d::CCluster::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CXMeansOnline1d::CCluster::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(INDEX_TAG, m_Index); inserter.insertLevel(PRIOR_TAG, boost::bind(&CNormalMeanPrecConjugate::acceptPersistInserter, &m_Prior, _1)); inserter.insertLevel(STRUCTURE_TAG, boost::bind(&CNaturalBreaksClassifier::acceptPersistInserter, &m_Structure, _1)); } -void CXMeansOnline1d::CCluster::dataType(maths_t::EDataType dataType) -{ +void CXMeansOnline1d::CCluster::dataType(maths_t::EDataType dataType) { m_Prior.dataType(dataType); } -void CXMeansOnline1d::CCluster::add(double point, double count) -{ - m_Prior.addSamples(CConstantWeights::COUNT, - TDouble1Vec(1, point), - TDouble4Vec1Vec(1, TDouble4Vec(1, count))); +void CXMeansOnline1d::CCluster::add(double point, double count) { + m_Prior.addSamples(CConstantWeights::COUNT, TDouble1Vec(1, point), TDouble4Vec1Vec(1, TDouble4Vec(1, count))); m_Structure.add(point, count); } -void CXMeansOnline1d::CCluster::decayRate(double decayRate) -{ +void CXMeansOnline1d::CCluster::decayRate(double decayRate) { m_Prior.decayRate(decayRate); m_Structure.decayRate(decayRate); } -void CXMeansOnline1d::CCluster::propagateForwardsByTime(double time) -{ +void CXMeansOnline1d::CCluster::propagateForwardsByTime(double time) { m_Prior.propagateForwardsByTime(time); m_Structure.propagateForwardsByTime(time); } -std::size_t CXMeansOnline1d::CCluster::index() const -{ +std::size_t CXMeansOnline1d::CCluster::index() const { return m_Index; } -double CXMeansOnline1d::CCluster::centre() const -{ +double CXMeansOnline1d::CCluster::centre() const { return m_Prior.marginalLikelihoodMean(); } -double CXMeansOnline1d::CCluster::spread() const -{ +double CXMeansOnline1d::CCluster::spread() const { return std::sqrt(m_Prior.marginalLikelihoodVariance()); } -double CXMeansOnline1d::CCluster::percentile(double p) const -{ +double CXMeansOnline1d::CCluster::percentile(double p) const { return m_Structure.percentile(p); } -double CXMeansOnline1d::CCluster::count() const -{ +double CXMeansOnline1d::CCluster::count() const { return m_Prior.numberSamples(); } -double CXMeansOnline1d::CCluster::weight(maths_t::EClusterWeightCalc calc) const -{ - switch (calc) - { - case maths_t::E_ClustersEqualWeight: return 1.0; - case maths_t::E_ClustersFractionWeight: return m_Prior.numberSamples(); +double CXMeansOnline1d::CCluster::weight(maths_t::EClusterWeightCalc calc) const { + switch (calc) { + case maths_t::E_ClustersEqualWeight: + return 1.0; + case maths_t::E_ClustersFractionWeight: + return m_Prior.numberSamples(); } LOG_ABORT("Unexpected calculation style " << calc); } -double CXMeansOnline1d::CCluster::logLikelihoodFromCluster(maths_t::EClusterWeightCalc calc, - double point) const -{ +double CXMeansOnline1d::CCluster::logLikelihoodFromCluster(maths_t::EClusterWeightCalc calc, double point) const { double result; - if (detail::logLikelihoodFromCluster(point, m_Prior, - this->weight(calc), - result) & maths_t::E_FpFailed) - { + if (detail::logLikelihoodFromCluster(point, m_Prior, this->weight(calc), result) & maths_t::E_FpFailed) { LOG_ERROR("Unable to compute likelihood for: " << m_Index); } return result; } -void CXMeansOnline1d::CCluster::sample(std::size_t numberSamples, - double smallest, - double largest, - TDoubleVec &samples) const -{ +void CXMeansOnline1d::CCluster::sample(std::size_t numberSamples, double smallest, double largest, TDoubleVec& samples) const { m_Structure.sample(numberSamples, smallest, largest, samples); } -CXMeansOnline1d::TOptionalClusterClusterPr -CXMeansOnline1d::CCluster::split(CAvailableModeDistributions distributions, - double minimumCount, - double smallest, - const TDoubleDoublePr &interval, - CIndexGenerator &indexGenerator) -{ +CXMeansOnline1d::TOptionalClusterClusterPr CXMeansOnline1d::CCluster::split(CAvailableModeDistributions distributions, + double minimumCount, + double smallest, + const TDoubleDoublePr& interval, + CIndexGenerator& indexGenerator) { // We do our clustering top down to minimize space and avoid // making splits before we are confident they exist. This is // important for anomaly detection because we do *not* want @@ -1619,8 +1351,7 @@ CXMeansOnline1d::CCluster::split(CAvailableModeDistributions distributions, LOG_TRACE("split"); - if (m_Structure.buffering()) - { + if (m_Structure.buffering()) { return TOptionalClusterClusterPr(); } @@ -1628,8 +1359,7 @@ CXMeansOnline1d::CCluster::split(CAvailableModeDistributions distributions, double decayRate = m_Prior.decayRate(); std::size_t n = m_Structure.size(); - if (n < 2) - { + if (n < 2) { return TOptionalClusterClusterPr(); } @@ -1637,13 +1367,10 @@ CXMeansOnline1d::CCluster::split(CAvailableModeDistributions distributions, { TTupleVec categories; m_Structure.categories(n, 0, categories); - for (std::size_t i = 0u; i < categories.size(); ++i) - { + for (std::size_t i = 0u; i < categories.size(); ++i) { detail::winsorise(interval, categories[i]); } - if (!detail::splitSearch(minimumCount, MINIMUM_SPLIT_DISTANCE, - dataType, distributions, smallest, categories, split)) - { + if (!detail::splitSearch(minimumCount, MINIMUM_SPLIT_DISTANCE, dataType, distributions, smallest, categories, split)) { return TOptionalClusterClusterPr(); } } @@ -1653,10 +1380,8 @@ CXMeansOnline1d::CCluster::split(CAvailableModeDistributions distributions, CNaturalBreaksClassifier::TClassifierVec classifiers; m_Structure.split(split, classifiers); - LOG_TRACE("Splitting cluster " << this->index() - << " at " << this->centre() - << " left = " << classifiers[0].print() - << ", right = " << classifiers[1].print()); + LOG_TRACE("Splitting cluster " << this->index() << " at " << this->centre() << " left = " << classifiers[0].print() + << ", right = " << classifiers[1].print()); std::size_t index1 = indexGenerator.next(); std::size_t index2 = indexGenerator.next(); @@ -1664,53 +1389,41 @@ CXMeansOnline1d::CCluster::split(CAvailableModeDistributions distributions, CNormalMeanPrecConjugate leftNormal(dataType, categories[0], decayRate); CNormalMeanPrecConjugate rightNormal(dataType, categories[1], decayRate); - return TClusterClusterPr(CCluster(index1, leftNormal, classifiers[0]), - CCluster(index2, rightNormal, classifiers[1])); + return TClusterClusterPr(CCluster(index1, leftNormal, classifiers[0]), CCluster(index2, rightNormal, classifiers[1])); } -bool CXMeansOnline1d::CCluster::shouldMerge(CCluster &other, +bool CXMeansOnline1d::CCluster::shouldMerge(CCluster& other, CAvailableModeDistributions distributions, double smallest, - const TDoubleDoublePr &interval) -{ - if ( m_Structure.buffering() - || m_Structure.size() == 0 - || other.m_Structure.size() == 0) - { + const TDoubleDoublePr& interval) { + if (m_Structure.buffering() || m_Structure.size() == 0 || other.m_Structure.size() == 0) { return false; } maths_t::EDataType dataType = m_Prior.dataType(); TTupleVec categories; - if (!m_Structure.categories(m_Structure.size(), 0, categories)) - { + if (!m_Structure.categories(m_Structure.size(), 0, categories)) { return false; } std::size_t split = categories.size(); - if (!other.m_Structure.categories(other.m_Structure.size(), 0, categories, true)) - { + if (!other.m_Structure.categories(other.m_Structure.size(), 0, categories, true)) { return false; } - for (std::size_t i = 0u; i < categories.size(); ++i) - { + for (std::size_t i = 0u; i < categories.size(); ++i) { detail::winsorise(interval, categories[i]); } double distance; double nl; double nr; - detail::BICGain(dataType, distributions, smallest, categories, - 0, split, categories.size(), distance, nl, nr); - LOG_TRACE("max(BIC(1) - BIC(2), 0) = " << distance - << " (to merge " << MAXIMUM_MERGE_DISTANCE << ")"); + detail::BICGain(dataType, distributions, smallest, categories, 0, split, categories.size(), distance, nl, nr); + LOG_TRACE("max(BIC(1) - BIC(2), 0) = " << distance << " (to merge " << MAXIMUM_MERGE_DISTANCE << ")"); return distance <= MAXIMUM_MERGE_DISTANCE; } -CXMeansOnline1d::CCluster -CXMeansOnline1d::CCluster::merge(CCluster &other, CIndexGenerator &indexGenerator) -{ +CXMeansOnline1d::CCluster CXMeansOnline1d::CCluster::merge(CCluster& other, CIndexGenerator& indexGenerator) { TTupleVec left, right; m_Structure.categories(1, 0, left); other.m_Structure.categories(1, 0, right); @@ -1719,21 +1432,17 @@ CXMeansOnline1d::CCluster::merge(CCluster &other, CIndexGenerator &indexGenerato CNormalMeanPrecConjugate::TMeanVarAccumulator mergedCategories; - if (left.size() > 0) - { + if (left.size() > 0) { LOG_TRACE("left = " << left[0]); mergedCategories += left[0]; } - if (right.size() > 0) - { + if (right.size() > 0) { LOG_TRACE("right = " << right[0]); mergedCategories += right[0]; } - CNormalMeanPrecConjugate prior(m_Prior.dataType(), - mergedCategories, - m_Prior.decayRate()); + CNormalMeanPrecConjugate prior(m_Prior.dataType(), mergedCategories, m_Prior.decayRate()); CNaturalBreaksClassifier structure(m_Structure); structure.merge(other.m_Structure); @@ -1746,27 +1455,23 @@ CXMeansOnline1d::CCluster::merge(CCluster &other, CIndexGenerator &indexGenerato return result; } -const CNormalMeanPrecConjugate &CXMeansOnline1d::CCluster::prior() const -{ +const CNormalMeanPrecConjugate& CXMeansOnline1d::CCluster::prior() const { return m_Prior; } -uint64_t CXMeansOnline1d::CCluster::checksum(uint64_t seed) const -{ +uint64_t CXMeansOnline1d::CCluster::checksum(uint64_t seed) const { seed = CChecksum::calculate(seed, m_Index); seed = CChecksum::calculate(seed, m_Prior); return CChecksum::calculate(seed, m_Structure); } -void CXMeansOnline1d::CCluster::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CXMeansOnline1d::CCluster::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CXMeansOnline1d::CCluster"); core::CMemoryDebug::dynamicSize("m_Prior", m_Prior, mem); core::CMemoryDebug::dynamicSize("m_Structure", m_Structure, mem); } -std::size_t CXMeansOnline1d::CCluster::memoryUsage() const -{ +std::size_t CXMeansOnline1d::CCluster::memoryUsage() const { std::size_t mem = core::CMemory::dynamicSize(m_Prior); mem += core::CMemory::dynamicSize(m_Structure); return mem; @@ -1777,6 +1482,5 @@ const double CXMeansOnline1d::MINIMUM_SPLIT_DISTANCE(6.0); const double CXMeansOnline1d::MAXIMUM_MERGE_DISTANCE(2.0); const double CXMeansOnline1d::CLUSTER_DELETE_FRACTION(0.8); const std::size_t CXMeansOnline1d::STRUCTURE_SIZE(12u); - } } diff --git a/lib/maths/CXMeansOnlineFactory.cc b/lib/maths/CXMeansOnlineFactory.cc index b407295d57..b41e9f2c63 100644 --- a/lib/maths/CXMeansOnlineFactory.cc +++ b/lib/maths/CXMeansOnlineFactory.cc @@ -10,37 +10,31 @@ #include -namespace ml -{ -namespace maths -{ -namespace xmeans_online_factory_detail -{ +namespace ml { +namespace maths { +namespace xmeans_online_factory_detail { -#define XMEANS_FACTORY(T, N) \ -CClusterer > *CFactory::make(maths_t::EDataType dataType, \ - maths_t::EClusterWeightCalc weightCalc, \ - double decayRate, \ - double minimumClusterFraction, \ - double minimumClusterCount, \ - double minimumCategoryCount) \ -{ \ - return new CXMeansOnline(dataType, weightCalc, decayRate, \ - minimumClusterFraction, minimumClusterCount, minimumCategoryCount); \ -} \ -CClusterer > *CFactory::restore(const SDistributionRestoreParams ¶ms, \ - const CClustererTypes::TSplitFunc &splitFunc, \ - const CClustererTypes::TMergeFunc &mergeFunc, \ - core::CStateRestoreTraverser &traverser) \ -{ \ - return new CXMeansOnline(params, splitFunc, mergeFunc, traverser); \ -} +#define XMEANS_FACTORY(T, N) \ + CClusterer>* CFactory::make(maths_t::EDataType dataType, \ + maths_t::EClusterWeightCalc weightCalc, \ + double decayRate, \ + double minimumClusterFraction, \ + double minimumClusterCount, \ + double minimumCategoryCount) { \ + return new CXMeansOnline( \ + dataType, weightCalc, decayRate, minimumClusterFraction, minimumClusterCount, minimumCategoryCount); \ + } \ + CClusterer>* CFactory::restore(const SDistributionRestoreParams& params, \ + const CClustererTypes::TSplitFunc& splitFunc, \ + const CClustererTypes::TMergeFunc& mergeFunc, \ + core::CStateRestoreTraverser& traverser) { \ + return new CXMeansOnline(params, splitFunc, mergeFunc, traverser); \ + } XMEANS_FACTORY(CFloatStorage, 2) XMEANS_FACTORY(CFloatStorage, 3) XMEANS_FACTORY(CFloatStorage, 4) XMEANS_FACTORY(CFloatStorage, 5) #undef XMEANS_FACTORY - } } } diff --git a/lib/maths/Constants.cc b/lib/maths/Constants.cc index 79bab92268..6884d7f2af 100644 --- a/lib/maths/Constants.cc +++ b/lib/maths/Constants.cc @@ -6,10 +6,8 @@ #include -namespace ml -{ -namespace maths -{ +namespace ml { +namespace maths { const maths_t::TWeightStyleVec CConstantWeights::COUNT{maths_t::E_SampleCountWeight}; const maths_t::TWeightStyleVec CConstantWeights::COUNT_VARIANCE{maths_t::E_SampleCountVarianceScaleWeight}; @@ -17,12 +15,8 @@ const maths_t::TWeightStyleVec CConstantWeights::SEASONAL_VARIANCE{maths_t::E_Sa const CConstantWeights::TDouble4Vec CConstantWeights::UNIT{1.0}; const CConstantWeights::TDouble4Vec1Vec CConstantWeights::SINGLE_UNIT{UNIT}; -double maxModelPenalty(double numberSamples) -{ +double maxModelPenalty(double numberSamples) { return 10.0 + numberSamples; } - } } - - diff --git a/lib/maths/MathsTypes.cc b/lib/maths/MathsTypes.cc index 3403164cd3..3cc5b58c7e 100644 --- a/lib/maths/MathsTypes.cc +++ b/lib/maths/MathsTypes.cc @@ -16,59 +16,43 @@ #include -namespace ml -{ -namespace maths_t -{ -namespace -{ -namespace detail -{ +namespace ml { +namespace maths_t { +namespace { +namespace detail { //! Check that the weights styles and weights are consistent. template -inline bool check(const TWeightStyleVec &weightStyles, - const core::CSmallVector &weights) -{ - if (weightStyles.size() == weights.size()) - { +inline bool check(const TWeightStyleVec& weightStyles, const core::CSmallVector& weights) { + if (weightStyles.size() == weights.size()) { return true; } - LOG_ERROR("Mismatch in weight styles '" - << core::CContainerPrinter::print(weightStyles) - << "' and weights '" - << core::CContainerPrinter::print(weights) << "'"); + LOG_ERROR("Mismatch in weight styles '" << core::CContainerPrinter::print(weightStyles) << "' and weights '" + << core::CContainerPrinter::print(weights) << "'"); return false; } //! Multiply \p lhs by \p rhs. -inline void multiplyEquals(double rhs, double &lhs) -{ +inline void multiplyEquals(double rhs, double& lhs) { lhs *= rhs; } //! Elementwise multiply \p lhs by \p rhs. -inline void multiplyEquals(const TDouble10Vec &rhs, TDouble10Vec &lhs) -{ - for (std::size_t i = 0u; i < lhs.size(); ++i) - { +inline void multiplyEquals(const TDouble10Vec& rhs, TDouble10Vec& lhs) { + for (std::size_t i = 0u; i < lhs.size(); ++i) { lhs[i] *= rhs[i]; } } //! Check if less than zero. -inline bool isNegative(double value) -{ +inline bool isNegative(double value) { return value < 0.0; } //! Elementwise check if less than zero. -inline bool isNegative(const TDouble10Vec &values) -{ - for (auto value : values) - { - if (value < 0.0) - { +inline bool isNegative(const TDouble10Vec& values) { + for (auto value : values) { + if (value < 0.0) { return true; } } @@ -76,18 +60,14 @@ inline bool isNegative(const TDouble10Vec &values) } //! Check if less than or equal to zero. -inline bool isNonPostive(double value) -{ +inline bool isNonPostive(double value) { return value <= 0.0; } //! Elementwise check if less than or equal to zero. -inline bool isNonPostive(const TDouble10Vec &values) -{ - for (auto value : values) - { - if (value < 0.0) - { +inline bool isNonPostive(const TDouble10Vec& values) { + for (auto value : values) { + if (value < 0.0) { return true; } } @@ -96,29 +76,25 @@ inline bool isNonPostive(const TDouble10Vec &values) //! Extract the effective sample count from a collection of weights. template -void count(const TWeightStyleVec &weightStyles, - const core::CSmallVector &weights, - T &result) -{ - if (check(weightStyles, weights)) - { +void count(const TWeightStyleVec& weightStyles, const core::CSmallVector& weights, T& result) { + if (check(weightStyles, weights)) { T candidate(result); - for (std::size_t i = 0u; i < weightStyles.size(); ++i) - { - switch (weightStyles[i]) - { - case E_SampleCountWeight: multiplyEquals(weights[i], candidate); break; - case E_SampleSeasonalVarianceScaleWeight: break; - case E_SampleCountVarianceScaleWeight: break; - case E_SampleWinsorisationWeight: break; + for (std::size_t i = 0u; i < weightStyles.size(); ++i) { + switch (weightStyles[i]) { + case E_SampleCountWeight: + multiplyEquals(weights[i], candidate); + break; + case E_SampleSeasonalVarianceScaleWeight: + break; + case E_SampleCountVarianceScaleWeight: + break; + case E_SampleWinsorisationWeight: + break; } } - if (!maths::CMathsFuncs::isFinite(result) || isNegative(result)) - { + if (!maths::CMathsFuncs::isFinite(result) || isNegative(result)) { LOG_ERROR("Ignoring bad count weight: " << result); - } - else - { + } else { result = std::move(candidate); } } @@ -127,29 +103,26 @@ void count(const TWeightStyleVec &weightStyles, //! Extract the effective sample count with which to update a model //! from a collection of weights. template -void countForUpdate(const TWeightStyleVec &weightStyles, - const core::CSmallVector &weights, - T &result) -{ - if (check(weightStyles, weights)) - { +void countForUpdate(const TWeightStyleVec& weightStyles, const core::CSmallVector& weights, T& result) { + if (check(weightStyles, weights)) { T candidate(result); - for (std::size_t i = 0u; i < weightStyles.size(); ++i) - { - switch (weightStyles[i]) - { - case E_SampleCountWeight: multiplyEquals(weights[i], candidate); break; - case E_SampleSeasonalVarianceScaleWeight: break; - case E_SampleCountVarianceScaleWeight: break; - case E_SampleWinsorisationWeight: multiplyEquals(weights[i], candidate); break; + for (std::size_t i = 0u; i < weightStyles.size(); ++i) { + switch (weightStyles[i]) { + case E_SampleCountWeight: + multiplyEquals(weights[i], candidate); + break; + case E_SampleSeasonalVarianceScaleWeight: + break; + case E_SampleCountVarianceScaleWeight: + break; + case E_SampleWinsorisationWeight: + multiplyEquals(weights[i], candidate); + break; } } - if (!maths::CMathsFuncs::isFinite(result) || isNegative(result)) - { + if (!maths::CMathsFuncs::isFinite(result) || isNegative(result)) { LOG_ERROR("Ignoring bad count weight: " << result); - } - else - { + } else { result = std::move(candidate); } } @@ -157,29 +130,25 @@ void countForUpdate(const TWeightStyleVec &weightStyles, //! Extract the Winsorisation weight from a collection of weights. template -void winsorisationWeight(const TWeightStyleVec &weightStyles, - const core::CSmallVector &weights, - T &result) -{ - if (check(weightStyles, weights)) - { +void winsorisationWeight(const TWeightStyleVec& weightStyles, const core::CSmallVector& weights, T& result) { + if (check(weightStyles, weights)) { T candidate(result); - for (std::size_t i = 0u; i < weightStyles.size(); ++i) - { - switch (weightStyles[i]) - { - case E_SampleCountWeight: break; - case E_SampleSeasonalVarianceScaleWeight: break; - case E_SampleCountVarianceScaleWeight: break; - case E_SampleWinsorisationWeight: multiplyEquals(weights[i], candidate); break; + for (std::size_t i = 0u; i < weightStyles.size(); ++i) { + switch (weightStyles[i]) { + case E_SampleCountWeight: + break; + case E_SampleSeasonalVarianceScaleWeight: + break; + case E_SampleCountVarianceScaleWeight: + break; + case E_SampleWinsorisationWeight: + multiplyEquals(weights[i], candidate); + break; } } - if (!maths::CMathsFuncs::isFinite(result) || isNegative(result)) - { + if (!maths::CMathsFuncs::isFinite(result) || isNegative(result)) { LOG_ERROR("Ignoring bad Winsorisation weight: " << result); - } - else - { + } else { result = std::move(candidate); } } @@ -187,29 +156,25 @@ void winsorisationWeight(const TWeightStyleVec &weightStyles, //! Extract the seasonal variance scale from a collection of weights. template -void seasonalVarianceScale(const TWeightStyleVec &weightStyles, - const core::CSmallVector &weights, - T &result) -{ - if (check(weightStyles, weights)) - { +void seasonalVarianceScale(const TWeightStyleVec& weightStyles, const core::CSmallVector& weights, T& result) { + if (check(weightStyles, weights)) { T candidate(result); - for (std::size_t i = 0u; i < weightStyles.size(); ++i) - { - switch (weightStyles[i]) - { - case E_SampleCountWeight: break; - case E_SampleSeasonalVarianceScaleWeight: multiplyEquals(weights[i], candidate); break; - case E_SampleCountVarianceScaleWeight: break; - case E_SampleWinsorisationWeight: break; + for (std::size_t i = 0u; i < weightStyles.size(); ++i) { + switch (weightStyles[i]) { + case E_SampleCountWeight: + break; + case E_SampleSeasonalVarianceScaleWeight: + multiplyEquals(weights[i], candidate); + break; + case E_SampleCountVarianceScaleWeight: + break; + case E_SampleWinsorisationWeight: + break; } } - if (!maths::CMathsFuncs::isFinite(result) || isNonPostive(result)) - { + if (!maths::CMathsFuncs::isFinite(result) || isNonPostive(result)) { LOG_ERROR("Ignoring bad variance scale: " << result); - } - else - { + } else { result = std::move(candidate); } } @@ -217,160 +182,116 @@ void seasonalVarianceScale(const TWeightStyleVec &weightStyles, //! Extract the count variance scale from a collection of weights. template -void countVarianceScale(const TWeightStyleVec &weightStyles, - const core::CSmallVector &weights, - T &result) -{ - if (check(weightStyles, weights)) - { +void countVarianceScale(const TWeightStyleVec& weightStyles, const core::CSmallVector& weights, T& result) { + if (check(weightStyles, weights)) { T candidate(result); - for (std::size_t i = 0u; i < weightStyles.size(); ++i) - { - switch (weightStyles[i]) - { - case E_SampleCountWeight: break; - case E_SampleSeasonalVarianceScaleWeight: break; - case E_SampleCountVarianceScaleWeight: multiplyEquals(weights[i], candidate); break; - case E_SampleWinsorisationWeight: break; + for (std::size_t i = 0u; i < weightStyles.size(); ++i) { + switch (weightStyles[i]) { + case E_SampleCountWeight: + break; + case E_SampleSeasonalVarianceScaleWeight: + break; + case E_SampleCountVarianceScaleWeight: + multiplyEquals(weights[i], candidate); + break; + case E_SampleWinsorisationWeight: + break; } } - if (!maths::CMathsFuncs::isFinite(result) || isNonPostive(result)) - { + if (!maths::CMathsFuncs::isFinite(result) || isNonPostive(result)) { LOG_ERROR("Ignoring bad variance scale: " << result); - } - else - { + } else { result = std::move(candidate); } } } - } } -double count(const TWeightStyleVec &weightStyles, - const TDouble4Vec &weights) -{ +double count(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights) { double result{1.0}; detail::count(weightStyles, weights, result); return result; } -TDouble10Vec count(std::size_t dimension, - const TWeightStyleVec &weightStyles, - const TDouble10Vec4Vec &weights) -{ +TDouble10Vec count(std::size_t dimension, const TWeightStyleVec& weightStyles, const TDouble10Vec4Vec& weights) { TDouble10Vec result(dimension, 1.0); detail::count(weightStyles, weights, result); return result; - } -double countForUpdate(const TWeightStyleVec &weightStyles, - const TDouble4Vec &weights) -{ +double countForUpdate(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights) { double result{1.0}; detail::countForUpdate(weightStyles, weights, result); return result; } -TDouble10Vec countForUpdate(std::size_t dimension, - const TWeightStyleVec &weightStyles, - const TDouble10Vec4Vec &weights) -{ +TDouble10Vec countForUpdate(std::size_t dimension, const TWeightStyleVec& weightStyles, const TDouble10Vec4Vec& weights) { TDouble10Vec result(dimension, 1.0); detail::countForUpdate(weightStyles, weights, result); return result; } -double winsorisationWeight(const TWeightStyleVec &weightStyles, - const TDouble4Vec &weights) -{ +double winsorisationWeight(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights) { double result{1.0}; detail::winsorisationWeight(weightStyles, weights, result); return result; } -TDouble10Vec winsorisationWeight(std::size_t dimension, - const TWeightStyleVec &weightStyles, - const TDouble10Vec4Vec &weights) -{ +TDouble10Vec winsorisationWeight(std::size_t dimension, const TWeightStyleVec& weightStyles, const TDouble10Vec4Vec& weights) { TDouble10Vec result(dimension, 1.0); detail::winsorisationWeight(weightStyles, weights, result); return result; } -double seasonalVarianceScale(const TWeightStyleVec &weightStyles, - const TDouble4Vec &weights) -{ +double seasonalVarianceScale(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights) { double result{1.0}; detail::seasonalVarianceScale(weightStyles, weights, result); return result; } -TDouble10Vec seasonalVarianceScale(std::size_t dimension, - const TWeightStyleVec &weightStyles, - const TDouble10Vec4Vec &weights) -{ +TDouble10Vec seasonalVarianceScale(std::size_t dimension, const TWeightStyleVec& weightStyles, const TDouble10Vec4Vec& weights) { TDouble10Vec result(dimension, 1.0); detail::seasonalVarianceScale(weightStyles, weights, result); return result; } -double countVarianceScale(const TWeightStyleVec &weightStyles, - const TDouble4Vec &weights) -{ +double countVarianceScale(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights) { double result{1.0}; detail::countVarianceScale(weightStyles, weights, result); return result; } -TDouble10Vec countVarianceScale(std::size_t dimension, - const TWeightStyleVec &weightStyles, - const TDouble10Vec4Vec &weights) -{ +TDouble10Vec countVarianceScale(std::size_t dimension, const TWeightStyleVec& weightStyles, const TDouble10Vec4Vec& weights) { TDouble10Vec result(dimension, 1.0); detail::countVarianceScale(weightStyles, weights, result); return result; } -bool hasSeasonalVarianceScale(const TWeightStyleVec &weightStyles, - const TDouble4Vec &weights) -{ +bool hasSeasonalVarianceScale(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights) { return seasonalVarianceScale(weightStyles, weights) != 1.0; } -bool hasSeasonalVarianceScale(const TWeightStyleVec &weightStyles, - const TDouble4Vec1Vec &weights) -{ - for (std::size_t i = 0u; i < weights.size(); ++i) - { - if (hasSeasonalVarianceScale(weightStyles, weights[i])) - { +bool hasSeasonalVarianceScale(const TWeightStyleVec& weightStyles, const TDouble4Vec1Vec& weights) { + for (std::size_t i = 0u; i < weights.size(); ++i) { + if (hasSeasonalVarianceScale(weightStyles, weights[i])) { return true; } } return false; } -bool hasSeasonalVarianceScale(const TWeightStyleVec &weightStyles, - const TDouble10Vec4Vec &weights) -{ - if (!detail::check(weightStyles, weights)) - { +bool hasSeasonalVarianceScale(const TWeightStyleVec& weightStyles, const TDouble10Vec4Vec& weights) { + if (!detail::check(weightStyles, weights)) { return false; } - for (std::size_t i = 0u; i < weightStyles.size(); ++i) - { - switch (weightStyles[i]) - { + for (std::size_t i = 0u; i < weightStyles.size(); ++i) { + switch (weightStyles[i]) { case E_SampleCountWeight: break; case E_SampleSeasonalVarianceScaleWeight: - for (std::size_t j = 0u; j < weights[i].size(); ++j) - { - if (weights[i][j] != 1.0) - { + for (std::size_t j = 0u; j < weights[i].size(); ++j) { + if (weights[i][j] != 1.0) { return true; } } @@ -384,58 +305,41 @@ bool hasSeasonalVarianceScale(const TWeightStyleVec &weightStyles, return false; } -bool hasSeasonalVarianceScale(const TWeightStyleVec &weightStyles, - const TDouble10Vec4Vec1Vec &weights) -{ - for (std::size_t i = 0u; i < weights.size(); ++i) - { - if (hasSeasonalVarianceScale(weightStyles, weights[i])) - { +bool hasSeasonalVarianceScale(const TWeightStyleVec& weightStyles, const TDouble10Vec4Vec1Vec& weights) { + for (std::size_t i = 0u; i < weights.size(); ++i) { + if (hasSeasonalVarianceScale(weightStyles, weights[i])) { return true; } } return false; } -bool hasCountVarianceScale(const TWeightStyleVec &weightStyles, - const TDouble4Vec &weights) -{ +bool hasCountVarianceScale(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights) { return countVarianceScale(weightStyles, weights) != 1.0; } -bool hasCountVarianceScale(const TWeightStyleVec &weightStyles, - const TDouble4Vec1Vec &weights) -{ - for (std::size_t i = 0u; i < weights.size(); ++i) - { - if (hasCountVarianceScale(weightStyles, weights[i])) - { +bool hasCountVarianceScale(const TWeightStyleVec& weightStyles, const TDouble4Vec1Vec& weights) { + for (std::size_t i = 0u; i < weights.size(); ++i) { + if (hasCountVarianceScale(weightStyles, weights[i])) { return true; } } return false; } -bool hasCountVarianceScale(const TWeightStyleVec &weightStyles, - const TDouble10Vec4Vec &weights) -{ - if (!detail::check(weightStyles, weights)) - { +bool hasCountVarianceScale(const TWeightStyleVec& weightStyles, const TDouble10Vec4Vec& weights) { + if (!detail::check(weightStyles, weights)) { return false; } - for (std::size_t i = 0u; i < weightStyles.size(); ++i) - { - switch (weightStyles[i]) - { + for (std::size_t i = 0u; i < weightStyles.size(); ++i) { + switch (weightStyles[i]) { case E_SampleCountWeight: break; case E_SampleSeasonalVarianceScaleWeight: break; case E_SampleCountVarianceScaleWeight: - for (std::size_t j = 0u; j < weights[i].size(); ++j) - { - if (weights[i][j] != 1.0) - { + for (std::size_t j = 0u; j < weights[i].size(); ++j) { + if (weights[i][j] != 1.0) { return true; } } @@ -447,55 +351,33 @@ bool hasCountVarianceScale(const TWeightStyleVec &weightStyles, return false; } -bool hasCountVarianceScale(const TWeightStyleVec &weightStyles, - const TDouble10Vec4Vec1Vec &weights) -{ - for (std::size_t i = 0u; i < weights.size(); ++i) - { - if (hasCountVarianceScale(weightStyles, weights[i])) - { +bool hasCountVarianceScale(const TWeightStyleVec& weightStyles, const TDouble10Vec4Vec1Vec& weights) { + for (std::size_t i = 0u; i < weights.size(); ++i) { + if (hasCountVarianceScale(weightStyles, weights[i])) { return true; } } return false; } -void setWeight(ESampleWeightStyle style, - double weight, - TWeightStyleVec &weightStyles, - TDouble4Vec &weights) -{ - std::ptrdiff_t i{std::find(weightStyles.begin(), - weightStyles.end(), style) - weightStyles.begin()}; - if (static_cast(i) < weightStyles.size()) - { +void setWeight(ESampleWeightStyle style, double weight, TWeightStyleVec& weightStyles, TDouble4Vec& weights) { + std::ptrdiff_t i{std::find(weightStyles.begin(), weightStyles.end(), style) - weightStyles.begin()}; + if (static_cast(i) < weightStyles.size()) { weights[i] = weight; - } - else - { + } else { weightStyles.push_back(style); weights.push_back(weight); } } -void setWeight(ESampleWeightStyle style, - double weight, - std::size_t dimension, - TWeightStyleVec &weightStyles, - TDouble10Vec4Vec &weights) -{ - std::ptrdiff_t i{std::find(weightStyles.begin(), - weightStyles.end(), style) - weightStyles.begin()}; - if (static_cast(i) < weightStyles.size()) - { +void setWeight(ESampleWeightStyle style, double weight, std::size_t dimension, TWeightStyleVec& weightStyles, TDouble10Vec4Vec& weights) { + std::ptrdiff_t i{std::find(weightStyles.begin(), weightStyles.end(), style) - weightStyles.begin()}; + if (static_cast(i) < weightStyles.size()) { weights[i].assign(dimension, weight); - } - else - { + } else { weightStyles.push_back(style); weights.push_back(TDouble10Vec(dimension, weight)); } } - } } diff --git a/lib/maths/ProbabilityAggregators.cc b/lib/maths/ProbabilityAggregators.cc index 37d786e7b9..17f0fbb026 100644 --- a/lib/maths/ProbabilityAggregators.cc +++ b/lib/maths/ProbabilityAggregators.cc @@ -6,8 +6,8 @@ #include -#include #include +#include #include #include @@ -21,12 +21,9 @@ #include #include -namespace ml -{ -namespace maths -{ -namespace -{ +namespace ml { +namespace maths { +namespace { using TDoubleDoublePr = std::pair; using TDoubleVec = std::vector; @@ -45,19 +42,12 @@ using TDoubleVec = std::vector; //! avoids loss of precision which we'd get when subtracting \f$P(R(i))\f$ //! from 1. See CJointProbabilityOfLessLikelySamples::calculate for details //! of how the \f$z(i)\f$ are used to compute the joint probability. -bool deviation(double p, double &result) -{ - try - { +bool deviation(double p, double& result) { + try { boost::math::normal_distribution<> normal(0.0, 1.0); result = CTools::pow2(boost::math::quantile(normal, p / 2.0)); return true; - } - catch (const std::exception &e) - { - LOG_ERROR("Unable to compute quantile: " << e.what() - << ", probability = " << p); - } + } catch (const std::exception& e) { LOG_ERROR("Unable to compute quantile: " << e.what() << ", probability = " << p); } return false; } @@ -77,109 +67,85 @@ bool deviation(double p, double &result) //!
 //! \f$\displaystyle \frac{N!}{(N-m)!}\int_{0}^{P_m}\int_{t_m}^{P_{m-1}}...\int_{t_2}^{P_1}(1-t_1)^{N-m}dt_1...dt_m\f$
 //! 
-class CNumericalLogProbabilityOfMFromNExtremeSamples -{ - public: - using TMinValueAccumulator = CBasicStatistics::COrderStatisticsHeap; - - //! A recursive integrand for the multi-variable integration. - class CLogIntegrand - { - public: - //! \param limits The upper limits of integration. - //! \param n The total number of samples. - //! \param m The number of extreme samples. - //! \param i The variable being integrated, i.e. \f$t_i\f$. - CLogIntegrand(const TDoubleVec &limits, - const TDoubleVec &corrections, - std::size_t n, - std::size_t m, - std::size_t i) : - m_Limits(&limits), - m_Corrections(&corrections), - m_N(n), m_M(m), m_I(i) - { - } - - //! Wrapper around evaluate which adapts it for CIntegration::gaussLegendre. - bool operator()(double x, double &result) const - { - result = this->evaluate(x); - return true; - } - - private: - //! Evaluate the i'th integral at \p x. - double evaluate(double x) const - { - if (m_I == m_M) - { - return static_cast(m_N - m_M) * CTools::logOneMinusX(x); - } - double result; - CLogIntegrand f(*m_Limits, *m_Corrections, m_N, m_M, m_I + 1u); - CIntegration::logGaussLegendre(f, x, (*m_Limits)[m_I], result); - result += (*m_Corrections)[m_I]; - return result; - } - - const TDoubleVec *m_Limits; - const TDoubleVec *m_Corrections; - std::size_t m_N; - std::size_t m_M; - std::size_t m_I; - }; +class CNumericalLogProbabilityOfMFromNExtremeSamples { +public: + using TMinValueAccumulator = CBasicStatistics::COrderStatisticsHeap; + //! A recursive integrand for the multi-variable integration. + class CLogIntegrand { public: - //! The maximum integral dimension. - static const std::size_t MAX_DIMENSION; - - public: - //! \param p The probabilities (in sorted order). + //! \param limits The upper limits of integration. //! \param n The total number of samples. - CNumericalLogProbabilityOfMFromNExtremeSamples(const TMinValueAccumulator &p, - std::size_t n) : - m_N(n) - { - if (p.count() > 0) - { - // For large n the integral is dominated from the contributions - // near the lowest probability. - m_P.push_back(p[0]); - m_Corrections.push_back(0.0); - for (std::size_t i = 1u; i < std::min(p.count(), MAX_DIMENSION); ++i) - { - m_P.push_back(truncate(p[i], m_P[i-1])); - m_Corrections.push_back(p[i] == p[i-1] ? 0.0 : std::log(p[i] - p[i-1]) - std::log(m_P[i] - m_P[i-1])); - } - } + //! \param m The number of extreme samples. + //! \param i The variable being integrated, i.e. \f$t_i\f$. + CLogIntegrand(const TDoubleVec& limits, const TDoubleVec& corrections, std::size_t n, std::size_t m, std::size_t i) + : m_Limits(&limits), m_Corrections(&corrections), m_N(n), m_M(m), m_I(i) {} + + //! Wrapper around evaluate which adapts it for CIntegration::gaussLegendre. + bool operator()(double x, double& result) const { + result = this->evaluate(x); + return true; } - //! Calculate the probability (by numerical integration). - double calculate() - { + private: + //! Evaluate the i'th integral at \p x. + double evaluate(double x) const { + if (m_I == m_M) { + return static_cast(m_N - m_M) * CTools::logOneMinusX(x); + } double result; - CLogIntegrand f(m_P, m_Corrections, m_N, m_P.size(), 1u); - CIntegration::logGaussLegendre(f, 0, m_P[0], result); - result += boost::math::lgamma(static_cast(m_N) + 1.0) - - boost::math::lgamma(static_cast(m_N - m_P.size()) + 1.0); + CLogIntegrand f(*m_Limits, *m_Corrections, m_N, m_M, m_I + 1u); + CIntegration::logGaussLegendre(f, x, (*m_Limits)[m_I], result); + result += (*m_Corrections)[m_I]; return result; } - private: - double truncate(double p, double pMinus1) const - { - static const double CUTOFF[] = - { - 1.0e32, 1.0e16, 1.0e8, 1.0e4, 100.0 - }; - return std::min(p, (m_N >= boost::size(CUTOFF) ? 100.0 : CUTOFF[m_N]) * pMinus1); + const TDoubleVec* m_Limits; + const TDoubleVec* m_Corrections; + std::size_t m_N; + std::size_t m_M; + std::size_t m_I; + }; + +public: + //! The maximum integral dimension. + static const std::size_t MAX_DIMENSION; + +public: + //! \param p The probabilities (in sorted order). + //! \param n The total number of samples. + CNumericalLogProbabilityOfMFromNExtremeSamples(const TMinValueAccumulator& p, std::size_t n) : m_N(n) { + if (p.count() > 0) { + // For large n the integral is dominated from the contributions + // near the lowest probability. + m_P.push_back(p[0]); + m_Corrections.push_back(0.0); + for (std::size_t i = 1u; i < std::min(p.count(), MAX_DIMENSION); ++i) { + m_P.push_back(truncate(p[i], m_P[i - 1])); + m_Corrections.push_back(p[i] == p[i - 1] ? 0.0 : std::log(p[i] - p[i - 1]) - std::log(m_P[i] - m_P[i - 1])); + } } + } - private: - TDoubleVec m_P; - TDoubleVec m_Corrections; - std::size_t m_N; + //! Calculate the probability (by numerical integration). + double calculate() { + double result; + CLogIntegrand f(m_P, m_Corrections, m_N, m_P.size(), 1u); + CIntegration::logGaussLegendre(f, 0, m_P[0], result); + result += boost::math::lgamma(static_cast(m_N) + 1.0) - boost::math::lgamma(static_cast(m_N - m_P.size()) + 1.0); + return result; + } + +private: + double truncate(double p, double pMinus1) const { + static const double CUTOFF[] = {1.0e32, 1.0e16, 1.0e8, 1.0e4, 100.0}; + return std::min(p, (m_N >= boost::size(CUTOFF) ? 100.0 : CUTOFF[m_N]) * pMinus1); + } + +private: + TDoubleVec m_P; + TDoubleVec m_Corrections; + std::size_t m_N; }; const std::size_t CNumericalLogProbabilityOfMFromNExtremeSamples::MAX_DIMENSION(10u); @@ -188,29 +154,23 @@ const char DELIMITER(':'); } // unnamed:: - //////// CJointProbabilityOfLessLikelySample Implementation //////// -CJointProbabilityOfLessLikelySamples::CJointProbabilityOfLessLikelySamples() : - m_Distance(0.0), m_NumberSamples(0.0) -{ +CJointProbabilityOfLessLikelySamples::CJointProbabilityOfLessLikelySamples() : m_Distance(0.0), m_NumberSamples(0.0) { } -bool CJointProbabilityOfLessLikelySamples::fromDelimited(const std::string &value) -{ +bool CJointProbabilityOfLessLikelySamples::fromDelimited(const std::string& value) { core::CPersistUtils::CBuiltinFromString converter(DELIMITER); TDoubleDoublePr distanceAndNumberSamples; - if (converter(value, distanceAndNumberSamples)) - { + if (converter(value, distanceAndNumberSamples)) { m_Distance = distanceAndNumberSamples.first; m_NumberSamples = distanceAndNumberSamples.second; return true; } double onlySample; - if (converter(value, onlySample)) - { + if (converter(value, onlySample)) { m_OnlyProbability.reset(onlySample); return true; } @@ -220,103 +180,78 @@ bool CJointProbabilityOfLessLikelySamples::fromDelimited(const std::string &valu return false; } -std::string CJointProbabilityOfLessLikelySamples::toDelimited() const -{ +std::string CJointProbabilityOfLessLikelySamples::toDelimited() const { core::CPersistUtils::CBuiltinToString converter(DELIMITER); - if (m_OnlyProbability) - { + if (m_OnlyProbability) { return converter(*m_OnlyProbability); } TDoubleDoublePr distanceAndNumberSamples(m_Distance, m_NumberSamples); return converter(distanceAndNumberSamples); } -const CJointProbabilityOfLessLikelySamples & - CJointProbabilityOfLessLikelySamples::operator+=(const CJointProbabilityOfLessLikelySamples &other) -{ - if (m_NumberSamples == 0.0) - { +const CJointProbabilityOfLessLikelySamples& CJointProbabilityOfLessLikelySamples:: +operator+=(const CJointProbabilityOfLessLikelySamples& other) { + if (m_NumberSamples == 0.0) { m_OnlyProbability = other.m_OnlyProbability; - } - else if (other.m_NumberSamples == 0.0) - { + } else if (other.m_NumberSamples == 0.0) { // Nothing to do. - } - else if (m_OnlyProbability && other.m_OnlyProbability) - { + } else if (m_OnlyProbability && other.m_OnlyProbability) { double d; - if (deviation(*m_OnlyProbability, d)) - { + if (deviation(*m_OnlyProbability, d)) { m_Distance += d; } - if (deviation(*other.m_OnlyProbability, d)) - { + if (deviation(*other.m_OnlyProbability, d)) { m_Distance += d; } m_OnlyProbability.reset(); - } - else if (m_OnlyProbability) - { + } else if (m_OnlyProbability) { double d; - if (deviation(*m_OnlyProbability, d)) - { + if (deviation(*m_OnlyProbability, d)) { m_Distance += d; } m_Distance += other.m_Distance; m_OnlyProbability.reset(); - } - else if (other.m_OnlyProbability) - { + } else if (other.m_OnlyProbability) { double d; - if (deviation(*other.m_OnlyProbability, d)) - { + if (deviation(*other.m_OnlyProbability, d)) { m_Distance += d; } - } - else - { + } else { m_Distance += other.m_Distance; } m_NumberSamples += other.m_NumberSamples; return *this; } -void CJointProbabilityOfLessLikelySamples::add(double probability, double weight) -{ +void CJointProbabilityOfLessLikelySamples::add(double probability, double weight) { // Round up to epsilon to stop z overflowing in the case the probability // is very small. - if (probability < CTools::smallestProbability()) - { + if (probability < CTools::smallestProbability()) { probability = CTools::smallestProbability(); } - if (m_NumberSamples == 0.0 && weight == 1.0) - { + if (m_NumberSamples == 0.0 && weight == 1.0) { m_OnlyProbability = probability; m_NumberSamples = weight; return; } double d; - if (m_OnlyProbability && deviation(*m_OnlyProbability, d)) - { + if (m_OnlyProbability && deviation(*m_OnlyProbability, d)) { m_Distance += d; m_OnlyProbability.reset(); } - if (deviation(probability, d)) - { + if (deviation(probability, d)) { m_Distance += d * weight; m_NumberSamples += weight; } } -bool CJointProbabilityOfLessLikelySamples::calculate(double &result) const -{ +bool CJointProbabilityOfLessLikelySamples::calculate(double& result) const { result = 1.0; // This is defined as one for the case there are no samples. - if (m_OnlyProbability) - { + if (m_OnlyProbability) { result = CTools::truncate(*m_OnlyProbability, 0.0, 1.0); return true; } @@ -324,8 +259,7 @@ bool CJointProbabilityOfLessLikelySamples::calculate(double &result) const // We use a small positive threshold on the distance because of overflow // in the method boost uses to compute the incomplete gamma function. The // result will be very close to one in this case anyway. - if (m_NumberSamples == 0.0 || m_Distance / m_NumberSamples < 1e-8) - { + if (m_NumberSamples == 0.0 || m_Distance / m_NumberSamples < 1e-8) { return true; } @@ -344,23 +278,16 @@ bool CJointProbabilityOfLessLikelySamples::calculate(double &result) const // gi(., .) is the upper incomplete gamma function. // g(.) is the gamma function. - try - { + try { result = boost::math::gamma_q(m_NumberSamples / 2.0, m_Distance / 2.0); - } - catch (const std::exception &e) - { - LOG_ERROR("Unable to compute probability: " << e.what() - << ", m_NumberSamples = " << m_NumberSamples - << ", m_Distance = " << m_Distance); + } catch (const std::exception& e) { + LOG_ERROR("Unable to compute probability: " << e.what() << ", m_NumberSamples = " << m_NumberSamples + << ", m_Distance = " << m_Distance); return false; } - if (!(result >= 0.0 && result <= 1.0)) - { - LOG_ERROR("Invalid joint probability = " << result - << ", m_NumberSamples = " << m_NumberSamples - << ", m_Distance = " << m_Distance); + if (!(result >= 0.0 && result <= 1.0)) { + LOG_ERROR("Invalid joint probability = " << result << ", m_NumberSamples = " << m_NumberSamples << ", m_Distance = " << m_Distance); } result = CTools::truncate(result, 0.0, 1.0); @@ -368,18 +295,15 @@ bool CJointProbabilityOfLessLikelySamples::calculate(double &result) const return true; } -bool CJointProbabilityOfLessLikelySamples::averageProbability(double &result) const -{ +bool CJointProbabilityOfLessLikelySamples::averageProbability(double& result) const { result = 1.0; // This is defined as one for the case there are no samples. - if (m_OnlyProbability) - { + if (m_OnlyProbability) { result = CTools::truncate(*m_OnlyProbability, 0.0, 1.0); return true; } - if (m_NumberSamples == 0.0 || m_Distance == 0.0) - { + if (m_NumberSamples == 0.0 || m_Distance == 0.0) { return true; } @@ -387,24 +311,18 @@ bool CJointProbabilityOfLessLikelySamples::averageProbability(double &result) co // get the same joint probability and is a measurement of the typical // probability in a set of independent samples. - try - { + try { boost::math::normal_distribution<> normal(0.0, 1.0); result = 2.0 * boost::math::cdf(normal, -std::sqrt(m_Distance / m_NumberSamples)); - } - catch (const std::exception &e) - { - LOG_ERROR("Unable to compute probability: " << e.what() - << ", m_NumberSamples = " << m_NumberSamples - << ", m_Distance = " << m_Distance); + } catch (const std::exception& e) { + LOG_ERROR("Unable to compute probability: " << e.what() << ", m_NumberSamples = " << m_NumberSamples + << ", m_Distance = " << m_Distance); return false; } - if (!(result >= 0.0 && result <= 1.0)) - { - LOG_ERROR("Invalid average probability = " << result - << ", m_NumberSamples = " << m_NumberSamples - << ", m_Distance = " << m_Distance); + if (!(result >= 0.0 && result <= 1.0)) { + LOG_ERROR("Invalid average probability = " << result << ", m_NumberSamples = " << m_NumberSamples + << ", m_Distance = " << m_Distance); } result = CTools::truncate(result, 0.0, 1.0); @@ -412,81 +330,62 @@ bool CJointProbabilityOfLessLikelySamples::averageProbability(double &result) co return true; } -CJointProbabilityOfLessLikelySamples::TOptionalDouble - CJointProbabilityOfLessLikelySamples::onlyProbability() const -{ +CJointProbabilityOfLessLikelySamples::TOptionalDouble CJointProbabilityOfLessLikelySamples::onlyProbability() const { return m_OnlyProbability; } -double CJointProbabilityOfLessLikelySamples::distance() const -{ +double CJointProbabilityOfLessLikelySamples::distance() const { return m_Distance; } -double CJointProbabilityOfLessLikelySamples::numberSamples() const -{ +double CJointProbabilityOfLessLikelySamples::numberSamples() const { return m_NumberSamples; } -uint64_t CJointProbabilityOfLessLikelySamples::checksum(uint64_t seed) const -{ +uint64_t CJointProbabilityOfLessLikelySamples::checksum(uint64_t seed) const { seed = CChecksum::calculate(seed, m_OnlyProbability); seed = CChecksum::calculate(seed, m_Distance); return CChecksum::calculate(seed, m_NumberSamples); } -std::ostream &CJointProbabilityOfLessLikelySamples::print(std::ostream &o) const -{ +std::ostream& CJointProbabilityOfLessLikelySamples::print(std::ostream& o) const { return o << '(' << m_NumberSamples << ", " << m_Distance << ')'; } -std::ostream &operator<<(std::ostream &o, - const CJointProbabilityOfLessLikelySamples &probability) -{ +std::ostream& operator<<(std::ostream& o, const CJointProbabilityOfLessLikelySamples& probability) { return probability.print(o); } -CJointProbabilityOfLessLikelySamples & - CJointProbabilityOfLessLikelySamples::SAddProbability::operator()( - CJointProbabilityOfLessLikelySamples &jointProbability, - const double probability, - const double weight) const -{ +CJointProbabilityOfLessLikelySamples& CJointProbabilityOfLessLikelySamples::SAddProbability:: +operator()(CJointProbabilityOfLessLikelySamples& jointProbability, const double probability, const double weight) const { jointProbability.add(probability, weight); return jointProbability; } - //////// CLogJointProbabilityOfLessLikelySample Implementation //////// -CLogJointProbabilityOfLessLikelySamples::CLogJointProbabilityOfLessLikelySamples() -{ +CLogJointProbabilityOfLessLikelySamples::CLogJointProbabilityOfLessLikelySamples() { } -const CLogJointProbabilityOfLessLikelySamples & - CLogJointProbabilityOfLessLikelySamples::operator+=(const CLogJointProbabilityOfLessLikelySamples &other) -{ +const CLogJointProbabilityOfLessLikelySamples& CLogJointProbabilityOfLessLikelySamples:: +operator+=(const CLogJointProbabilityOfLessLikelySamples& other) { this->CJointProbabilityOfLessLikelySamples::operator+=(other); return *this; } -void CLogJointProbabilityOfLessLikelySamples::add(double probability, double weight) -{ +void CLogJointProbabilityOfLessLikelySamples::add(double probability, double weight) { this->CJointProbabilityOfLessLikelySamples::add(probability, weight); } -bool CLogJointProbabilityOfLessLikelySamples::calculateLowerBound(double &result) const -{ +bool CLogJointProbabilityOfLessLikelySamples::calculateLowerBound(double& result) const { result = 0.0; // This is defined as log(1) = 0 for the case there are no samples. - if (this->onlyProbability()) - { + if (this->onlyProbability()) { result = std::min(std::log(*this->onlyProbability()), 0.0); return true; } - if (this->numberSamples() == 0.0 || this->distance() == 0.0) - { + if (this->numberSamples() == 0.0 || this->distance() == 0.0) { return true; } @@ -559,8 +458,7 @@ bool CLogJointProbabilityOfLessLikelySamples::calculateLowerBound(double &result // If upper incomplete gamma function doesn't underflow use the "exact" value // (we want 1 d.p. of precision). double probability; - if (this->calculate(probability) && probability > 10.0 * boost::numeric::bounds::smallest()) - { + if (this->calculate(probability) && probability > 10.0 * boost::numeric::bounds::smallest()) { LOG_TRACE("probability = " << probability); result = std::log(probability); return true; @@ -574,8 +472,7 @@ bool CLogJointProbabilityOfLessLikelySamples::calculateLowerBound(double &result double bound = boost::numeric::bounds::lowest(); - try - { + try { double logx = std::log(x); double p = std::floor(s - 1.0); double logPFactorial = boost::math::lgamma(p + 1.0); @@ -584,43 +481,30 @@ bool CLogJointProbabilityOfLessLikelySamples::calculateLowerBound(double &result double b1 = 0.0; - if ((m + 1.0) * (1.0 + logx - logm) >= LOG_DOUBLE_MAX) - { + if ((m + 1.0) * (1.0 + logx - logm) >= LOG_DOUBLE_MAX) { // Handle the case that (e*x/m)^(m+1) overflows. b1 = -1.0 - 0.5 * logm + m * (1.0 + logx - logm); - } - else if (E * x / m != 1.0) - { + } else if (E * x / m != 1.0) { double r = 1.0 - E * x / m; b1 = -1.0 - 0.5 * logm + std::log(CTools::oneMinusPowOneMinusX(r, m + 1.0) / r); - } - else - { + } else { // Use L'Hopital's rule to show that: // lim { (1 - r^(m+1)) / (1 - r) } = m + 1 // r -> 1 b1 = -1.0 - 0.5 * logm + std::log(m + 1.0); } - if (p > m) - { + if (p > m) { double t = 0.0; double logp = std::log(p); - if ((p - m) * (1.0 + logx - logp) >= LOG_DOUBLE_MAX) - { + if ((p - m) * (1.0 + logx - logp) >= LOG_DOUBLE_MAX) { // Handle the case that (e*x/p)^(p-m) overflows. - t = m + (m + 1.0) * logx - (m + 1.5) * logp - + (p - m - 1.0) * (1.0 + logx - logp); - } - else if (E * x / p != 1.0) - { + t = m + (m + 1.0) * logx - (m + 1.5) * logp + (p - m - 1.0) * (1.0 + logx - logp); + } else if (E * x / p != 1.0) { double r = 1.0 - E * x / p; - t = m + (m + 1.0) * logx - (m + 1.5) * logp - + std::log(CTools::oneMinusPowOneMinusX(r, p - m) / r); - } - else - { + t = m + (m + 1.0) * logx - (m + 1.5) * logp + std::log(CTools::oneMinusPowOneMinusX(r, p - m) / r); + } else { // Use L'Hopital's rule to show that: // lim { (1 - r^(p - m)) / (1 - r) } = p - m // r -> 1 @@ -632,8 +516,7 @@ bool CLogJointProbabilityOfLessLikelySamples::calculateLowerBound(double &result } double b2 = 0.0; - if ((p + 1.0) * std::log(x) < logPFactorial + std::log(p + 1.0)) - { + if ((p + 1.0) * std::log(x) < logPFactorial + std::log(p + 1.0)) { b2 = std::log(1.0 - std::exp((p + 1.0) * logx - logPFactorial) / (p + 1.0)) + x; } @@ -641,17 +524,9 @@ bool CLogJointProbabilityOfLessLikelySamples::calculateLowerBound(double &result bound = (s - 1.0) * logx - x + logSum - boost::math::lgamma(s); - LOG_TRACE("s = " << s << ", x = " << x - << ", p = " << p << ", m = " << m - << ", b1 = " << b1 << ", b2 = " << b2 - << ", log(sum) = " << logSum - << ", bound = " << bound); - } - catch (const std::exception &e) - { - LOG_ERROR("Failed computing bound: " << e.what() - << ", s = " << s << ", x = " << x); - } + LOG_TRACE("s = " << s << ", x = " << x << ", p = " << p << ", m = " << m << ", b1 = " << b1 << ", b2 = " << b2 + << ", log(sum) = " << logSum << ", bound = " << bound); + } catch (const std::exception& e) { LOG_ERROR("Failed computing bound: " << e.what() << ", s = " << s << ", x = " << x); } result = std::min(bound, 0.0); LOG_TRACE("result = " << result); @@ -659,18 +534,15 @@ bool CLogJointProbabilityOfLessLikelySamples::calculateLowerBound(double &result return true; } -bool CLogJointProbabilityOfLessLikelySamples::calculateUpperBound(double &result) const -{ +bool CLogJointProbabilityOfLessLikelySamples::calculateUpperBound(double& result) const { result = 0.0; // This is defined as log(1) = 0 for the case there are no samples. - if (this->onlyProbability()) - { + if (this->onlyProbability()) { result = std::min(std::log(*this->onlyProbability()), 0.0); return true; } - if (this->numberSamples() == 0.0 || this->distance() == 0.0) - { + if (this->numberSamples() == 0.0 || this->distance() == 0.0) { return true; } @@ -723,8 +595,7 @@ bool CLogJointProbabilityOfLessLikelySamples::calculateUpperBound(double &result // If upper incomplete gamma function likely isn't going to underflow // use the "exact" value. Note that we want 1 d.p. of precision. double probability; - if (this->calculate(probability) && probability > 10.0 * boost::numeric::bounds::smallest()) - { + if (this->calculate(probability) && probability > 10.0 * boost::numeric::bounds::smallest()) { LOG_TRACE("probability = " << probability); result = std::log(probability); return true; @@ -737,25 +608,19 @@ bool CLogJointProbabilityOfLessLikelySamples::calculateUpperBound(double &result double bound = 0.0; - try - { + try { double p = std::ceil(s - 1.0); double b1 = 0.0; - if ((p + 1.0) * std::log(p / x) >= LOG_DOUBLE_MAX) - { + if ((p + 1.0) * std::log(p / x) >= LOG_DOUBLE_MAX) { // Handle the case that (p/x)^(p+1) is going to overflow. In this case // (1 - (p/x)^(p+1)) / (1 - p/x) < (p/x)^(p+1) / (p/x - 1) but they are // essentially equal. b1 = (p + 1.0) * std::log(p / x) - std::log(p / x - 1.0); - } - else if (p != x) - { + } else if (p != x) { double r = 1.0 - p / x; b1 = std::log(CTools::oneMinusPowOneMinusX(r, p + 1.0) / r); - } - else - { + } else { // Use L'Hopital's rule to show that: // lim { (1 - r^(p+1)) / (1 - r) } = p + 1 // r -> 1 @@ -768,16 +633,8 @@ bool CLogJointProbabilityOfLessLikelySamples::calculateUpperBound(double &result bound = (s - 1.0) * std::log(x) - x + logSum - boost::math::lgamma(s); - LOG_TRACE("s = " << s << ", x = " << x - << ", b1 = " << b1 << ", b2 = " << b2 - << ", log(sum) = " << logSum - << ", bound = " << bound); - } - catch (const std::exception &e) - { - LOG_ERROR("Failed computing bound: " << e.what() - << ", s = " << s << ", x = " << x); - } + LOG_TRACE("s = " << s << ", x = " << x << ", b1 = " << b1 << ", b2 = " << b2 << ", log(sum) = " << logSum << ", bound = " << bound); + } catch (const std::exception& e) { LOG_ERROR("Failed computing bound: " << e.what() << ", s = " << s << ", x = " << x); } result = std::min(bound, 0.0); LOG_TRACE("result = " << result); @@ -785,121 +642,91 @@ bool CLogJointProbabilityOfLessLikelySamples::calculateUpperBound(double &result return true; } - //////// CProbabilityOfExtremeSample Implementation //////// -CProbabilityOfExtremeSample::CProbabilityOfExtremeSample() : - m_NumberSamples(0.0) -{ +CProbabilityOfExtremeSample::CProbabilityOfExtremeSample() : m_NumberSamples(0.0) { } -bool CProbabilityOfExtremeSample::fromDelimited(const std::string &value) -{ +bool CProbabilityOfExtremeSample::fromDelimited(const std::string& value) { std::size_t i = value.find_first_of(DELIMITER); - if (!core::CStringUtils::stringToType(value.substr(0, i), m_NumberSamples)) - { + if (!core::CStringUtils::stringToType(value.substr(0, i), m_NumberSamples)) { LOG_ERROR("Failed to extract number samples from " << value); return false; } - return m_MinValue.fromDelimited(value.substr(i+1)); + return m_MinValue.fromDelimited(value.substr(i + 1)); } -std::string CProbabilityOfExtremeSample::toDelimited() const -{ - return core::CStringUtils::typeToString(m_NumberSamples) - + DELIMITER - + m_MinValue.toDelimited(); +std::string CProbabilityOfExtremeSample::toDelimited() const { + return core::CStringUtils::typeToString(m_NumberSamples) + DELIMITER + m_MinValue.toDelimited(); } -const CProbabilityOfExtremeSample & - CProbabilityOfExtremeSample::operator+=(const CProbabilityOfExtremeSample &other) -{ +const CProbabilityOfExtremeSample& CProbabilityOfExtremeSample::operator+=(const CProbabilityOfExtremeSample& other) { m_MinValue += other.m_MinValue; m_NumberSamples += other.m_NumberSamples; return *this; } -bool CProbabilityOfExtremeSample::add(double probability, double weight) -{ +bool CProbabilityOfExtremeSample::add(double probability, double weight) { bool result = m_MinValue.add(probability); m_NumberSamples += weight; return result; } -bool CProbabilityOfExtremeSample::calculate(double &result) const -{ +bool CProbabilityOfExtremeSample::calculate(double& result) const { result = 1.0; - if (m_NumberSamples > 0) - { + if (m_NumberSamples > 0) { result = CTools::truncate(CTools::oneMinusPowOneMinusX(m_MinValue[0], m_NumberSamples), 0.0, 1.0); } return true; } -uint64_t CProbabilityOfExtremeSample::checksum(uint64_t seed) const -{ +uint64_t CProbabilityOfExtremeSample::checksum(uint64_t seed) const { seed = CChecksum::calculate(seed, m_MinValue); return CChecksum::calculate(seed, m_NumberSamples); } -std::ostream &CProbabilityOfExtremeSample::print(std::ostream &o) const -{ +std::ostream& CProbabilityOfExtremeSample::print(std::ostream& o) const { return o << "(" << m_NumberSamples << ", " << m_MinValue.print() << ")"; } -std::ostream &operator<<(std::ostream &o, - const CProbabilityOfExtremeSample &probability) -{ +std::ostream& operator<<(std::ostream& o, const CProbabilityOfExtremeSample& probability) { return probability.print(o); } - //////// CProbabilityOfMFromNMostExtremeSamples Implementation //////// -CLogProbabilityOfMFromNExtremeSamples::CLogProbabilityOfMFromNExtremeSamples(std::size_t m) : - m_MinValues(m), - m_NumberSamples(0u) -{ +CLogProbabilityOfMFromNExtremeSamples::CLogProbabilityOfMFromNExtremeSamples(std::size_t m) : m_MinValues(m), m_NumberSamples(0u) { } -bool CLogProbabilityOfMFromNExtremeSamples::fromDelimited(const std::string &value) -{ +bool CLogProbabilityOfMFromNExtremeSamples::fromDelimited(const std::string& value) { std::size_t i = value.find_first_of(DELIMITER); - if (!core::CStringUtils::stringToType(value.substr(0, i), m_NumberSamples)) - { + if (!core::CStringUtils::stringToType(value.substr(0, i), m_NumberSamples)) { LOG_ERROR("Failed to extract number samples from " << value); return false; } - return m_MinValues.fromDelimited(value.substr(i+1)); + return m_MinValues.fromDelimited(value.substr(i + 1)); } -std::string CLogProbabilityOfMFromNExtremeSamples::toDelimited() const -{ - return core::CStringUtils::typeToString(m_NumberSamples) - + DELIMITER - + m_MinValues.toDelimited(); +std::string CLogProbabilityOfMFromNExtremeSamples::toDelimited() const { + return core::CStringUtils::typeToString(m_NumberSamples) + DELIMITER + m_MinValues.toDelimited(); } -const CLogProbabilityOfMFromNExtremeSamples & - CLogProbabilityOfMFromNExtremeSamples::operator+=(const CLogProbabilityOfMFromNExtremeSamples &other) -{ +const CLogProbabilityOfMFromNExtremeSamples& CLogProbabilityOfMFromNExtremeSamples:: +operator+=(const CLogProbabilityOfMFromNExtremeSamples& other) { m_MinValues += other.m_MinValues; m_NumberSamples += other.m_NumberSamples; return *this; } -void CLogProbabilityOfMFromNExtremeSamples::add(const double probability) -{ +void CLogProbabilityOfMFromNExtremeSamples::add(const double probability) { m_MinValues.add(probability); ++m_NumberSamples; } -bool CLogProbabilityOfMFromNExtremeSamples::calculate(double &result) -{ +bool CLogProbabilityOfMFromNExtremeSamples::calculate(double& result) { result = 0.0; - if (m_NumberSamples == 0) - { + if (m_NumberSamples == 0) { return true; } @@ -934,19 +761,16 @@ bool CLogProbabilityOfMFromNExtremeSamples::calculate(double &result) double logLargestCoeff = 0.0; TDoubleVec coeffs; - if (M > 1) - { + if (M > 1) { coeffs.reserve(M - 1); } m_MinValues.sort(); - for (std::size_t i = 0u; i < M; ++i) - { + for (std::size_t i = 0u; i < M; ++i) { m_MinValues[i] = CTools::truncate(m_MinValues[i], CTools::smallestProbability(), 1.0); } - for (std::size_t m = 1u; m < M; ++m) - { + for (std::size_t m = 1u; m < M; ++m) { double p = m_MinValues[M - m]; LOG_TRACE("p(" << m << ") = " << p); @@ -954,8 +778,7 @@ bool CLogProbabilityOfMFromNExtremeSamples::calculate(double &result) // Update the coefficients (they are stored in reverse order). double sum = 0.0; - for (std::size_t i = 0u; i < coeffs.size(); ++i) - { + for (std::size_t i = 0u; i < coeffs.size(); ++i) { double index = static_cast(coeffs.size() - i); coeffs[i] /= index; sum += coeffs[i] * CTools::powOneMinusX(p / 2.0, index); @@ -974,18 +797,14 @@ bool CLogProbabilityOfMFromNExtremeSamples::calculate(double &result) // Re-normalize the coefficients if they aren't all identically zero. double cmax = 0.0; - for (std::size_t i = 0u; i < coeffs.size(); ++i) - { - if (std::fabs(coeffs[i]) > 1.0 / boost::numeric::bounds::highest()) - { + for (std::size_t i = 0u; i < coeffs.size(); ++i) { + if (std::fabs(coeffs[i]) > 1.0 / boost::numeric::bounds::highest()) { cmax = std::max(cmax, std::fabs(coeffs[i])); } } - if (cmax > 0.0) - { + if (cmax > 0.0) { LOG_TRACE("cmax = " << cmax); - for (std::size_t i = 0u; i < coeffs.size(); ++i) - { + for (std::size_t i = 0u; i < coeffs.size(); ++i) { coeffs[i] /= cmax; } logLargestCoeff += std::log(cmax); @@ -996,15 +815,12 @@ bool CLogProbabilityOfMFromNExtremeSamples::calculate(double &result) // Re-normalize in the case that we haven't been able to in the loop // because of overflow. double cmax = 0.0; - for (std::size_t i = 0u; i < coeffs.size(); ++i) - { + for (std::size_t i = 0u; i < coeffs.size(); ++i) { cmax = std::max(cmax, std::fabs(coeffs[i])); } - if (cmax > 0.0 && cmax < 1.0 / boost::numeric::bounds::highest()) - { + if (cmax > 0.0 && cmax < 1.0 / boost::numeric::bounds::highest()) { logLargestCoeff = std::log(cmax); - for (std::size_t i = 0u; i < coeffs.size(); ++i) - { + for (std::size_t i = 0u; i < coeffs.size(); ++i) { coeffs[i] /= cmax; } } @@ -1016,12 +832,9 @@ bool CLogProbabilityOfMFromNExtremeSamples::calculate(double &result) double pMin = CTools::oneMinusPowOneMinusX(pM, static_cast(N)); LOG_TRACE("1 - (1 - p(" << M << "))^" << N << " = " << pMin); - if (M > 1) - { - double logScale = static_cast(M) * std::log(2.0) - + boost::math::lgamma(static_cast(N + 1)) - - boost::math::lgamma(static_cast(N - M + 1)) - + logLargestCoeff; + if (M > 1) { + double logScale = static_cast(M) * std::log(2.0) + boost::math::lgamma(static_cast(N + 1)) - + boost::math::lgamma(static_cast(N - M + 1)) + logLargestCoeff; LOG_TRACE("log(scale) = " << logScale); double sum = 0.0; @@ -1029,26 +842,21 @@ bool CLogProbabilityOfMFromNExtremeSamples::calculate(double &result) double negative = 0.0; TDoubleVec terms; terms.reserve(coeffs.size()); - for (std::size_t i = 0u; i < coeffs.size(); ++i) - { + for (std::size_t i = 0u; i < coeffs.size(); ++i) { double index = static_cast(coeffs.size() - i); double c = coeffs[i] / index; double p = CTools::oneMinusPowOneMinusX(pM / 2.0, index); - LOG_TRACE("term(" << index << ") = " << (c * p) - << " (c(" << index << ") = " << c - << ", 1 - (1 - p(M)/2)^" << index << " = " << p << ")"); + LOG_TRACE("term(" << index << ") = " << (c * p) << " (c(" << index << ") = " << c << ", 1 - (1 - p(M)/2)^" << index << " = " + << p << ")"); terms.push_back(c * p); sum += std::fabs(c * p); (c * p < 0.0 ? negative : positive) += std::fabs(c * p); } LOG_TRACE("negative = " << negative << ", positive = " << positive); - if (sum == 0.0) - { + if (sum == 0.0) { result = std::log(pMin); - } - else - { + } else { // To minimize cancellation errors we add pMin inside the loop // and compute weights s.t. Sum_i( w(i) ) = 1.0 and w(i) * pMin // is roughly the same size as the i'th coefficient. @@ -1058,59 +866,45 @@ bool CLogProbabilityOfMFromNExtremeSamples::calculate(double &result) result = 0.0; double condition = 0.0; double logPMin = std::log(pMin); - if (logPMin - logScale > core::constants::LOG_MAX_DOUBLE) - { - for (std::size_t i = 0u; i < terms.size(); ++i) - { + if (logPMin - logScale > core::constants::LOG_MAX_DOUBLE) { + for (std::size_t i = 0u; i < terms.size(); ++i) { LOG_TRACE("remainder(" << i << ") = " << std::fabs(terms[i])); result += std::fabs(terms[i]); } result = std::log(result * pMin / sum); - } - else - { - if (logPMin - logScale < core::constants::LOG_MIN_DOUBLE) - { + } else { + if (logPMin - logScale < core::constants::LOG_MIN_DOUBLE) { pMin = 0.0; - for (std::size_t i = 0u; i < terms.size(); ++i) - { + for (std::size_t i = 0u; i < terms.size(); ++i) { result += terms[i]; condition = std::max(condition, std::fabs(terms[i])); } - } - else - { + } else { pMin /= std::exp(logScale); LOG_TRACE("pMin = " << pMin); - for (std::size_t i = 0u; i < terms.size(); ++i) - { + for (std::size_t i = 0u; i < terms.size(); ++i) { double remainder = std::fabs(terms[i]) * pMin / sum + terms[i]; result += remainder; - double absTerms[] = { std::fabs(terms[i]), std::fabs(terms[i] * pMin / sum), std::fabs(remainder) }; + double absTerms[] = {std::fabs(terms[i]), std::fabs(terms[i] * pMin / sum), std::fabs(remainder)}; condition = std::max(condition, *std::max_element(absTerms, absTerms + 3)); } } LOG_TRACE("result = " << result << ", condition = " << condition); - if (result <= 0.0 || condition > PRECISION * result) - { + if (result <= 0.0 || condition > PRECISION * result) { // Whoops we've lost all our precision. Fall back to numerical // integration (note this caps M <= 10 so the runtime doesn't // blow up). LOG_TRACE("Falling back to numerical integration"); CNumericalLogProbabilityOfMFromNExtremeSamples numerical(m_MinValues, N); result = numerical.calculate(); - } - else - { + } else { result = logScale + std::log(result); } } } - } - else - { + } else { result = std::log(pMin); } @@ -1118,28 +912,20 @@ bool CLogProbabilityOfMFromNExtremeSamples::calculate(double &result) // than one on occasion we use a tolerance which should be much // larger than necessary, but we are only interested in values // well outside the range as indicative of a genuine problem. - for (std::size_t i = 0u; i < 2; ++i) - { - if (!(result < 0.001)) - { + for (std::size_t i = 0u; i < 2; ++i) { + if (!(result < 0.001)) { std::ostringstream minValues; minValues << std::setprecision(16) << "[" << m_MinValues[0]; - for (std::size_t j = 1u; j < m_MinValues.count(); ++j) - { + for (std::size_t j = 1u; j < m_MinValues.count(); ++j) { minValues << " " << m_MinValues[j]; } minValues << "]"; - LOG_ERROR("Invalid log(extreme probability) = " << result - << ", m_NumberSamples = " << m_NumberSamples - << ", m_MinValues = " << minValues.str() - << ", coeffs = " << core::CContainerPrinter::print(coeffs) - << ", log(max{coeffs}) = " << logLargestCoeff - << ", pM = " << pM - << ", pMin = " << pMin); + LOG_ERROR("Invalid log(extreme probability) = " << result << ", m_NumberSamples = " << m_NumberSamples << ", m_MinValues = " + << minValues.str() << ", coeffs = " << core::CContainerPrinter::print(coeffs) + << ", log(max{coeffs}) = " << logLargestCoeff << ", pM = " << pM + << ", pMin = " << pMin); result = 0.0; - } - else - { + } else { break; } LOG_TRACE("Falling back to numerical integration"); @@ -1153,8 +939,7 @@ bool CLogProbabilityOfMFromNExtremeSamples::calculate(double &result) return true; } -bool CLogProbabilityOfMFromNExtremeSamples::calibrated(double &result) -{ +bool CLogProbabilityOfMFromNExtremeSamples::calibrated(double& result) { // This probability systematically decreases for increasing min(M, N). // Ideally, we would like the probability to be calibrated, such that, // with probability P it is less than or equal to P for individual @@ -1164,11 +949,9 @@ bool CLogProbabilityOfMFromNExtremeSamples::calibrated(double &result) // a function of min(M, N). The following is a fit to the empirical // function. - if (this->calculate(result)) - { + if (this->calculate(result)) { std::size_t n = std::min(m_MinValues.count(), m_NumberSamples); - if (n == 0) - { + if (n == 0) { return true; } result /= 1.0 + std::log(static_cast(n)) / 2.1; @@ -1178,11 +961,9 @@ bool CLogProbabilityOfMFromNExtremeSamples::calibrated(double &result) return false; } -uint64_t CLogProbabilityOfMFromNExtremeSamples::checksum(uint64_t seed) const -{ +uint64_t CLogProbabilityOfMFromNExtremeSamples::checksum(uint64_t seed) const { seed = CChecksum::calculate(seed, m_MinValues); return CChecksum::calculate(seed, m_NumberSamples); } - } } diff --git a/lib/maths/unittest/CAgglomerativeClustererTest.cc b/lib/maths/unittest/CAgglomerativeClustererTest.cc index e0f1495bb4..b15f812562 100644 --- a/lib/maths/unittest/CAgglomerativeClustererTest.cc +++ b/lib/maths/unittest/CAgglomerativeClustererTest.cc @@ -22,8 +22,7 @@ using namespace ml; -namespace -{ +namespace { using TDoubleVec = std::vector; using TDoubleVecVec = std::vector; @@ -32,129 +31,93 @@ using TSizeVecVec = std::vector; using TDoubleSizeVecPr = std::pair; using TDoubleSizeVecPrVec = std::vector; -class CCluster -{ - public: - explicit CCluster(std::size_t p) : - m_Height(0.0), - m_Points(1, p) - {} - - void swap(CCluster &other) - { - m_Points.swap(other.m_Points); - } +class CCluster { +public: + explicit CCluster(std::size_t p) : m_Height(0.0), m_Points(1, p) {} - static CCluster merge(double height, - const CCluster &lhs, - const CCluster &rhs) - { - CCluster result; - result.m_Height = height; - result.m_Points.reserve(lhs.m_Points.size() + rhs.m_Points.size()); - result.m_Points.insert(result.m_Points.end(), - lhs.m_Points.begin(), - lhs.m_Points.end()); - result.m_Points.insert(result.m_Points.end(), - rhs.m_Points.begin(), - rhs.m_Points.end()); - std::sort(result.m_Points.begin(), result.m_Points.end()); - return result; - } + void swap(CCluster& other) { m_Points.swap(other.m_Points); } - void add(TDoubleSizeVecPrVec &result) - { - result.push_back(TDoubleSizeVecPr(m_Height, m_Points)); - } + static CCluster merge(double height, const CCluster& lhs, const CCluster& rhs) { + CCluster result; + result.m_Height = height; + result.m_Points.reserve(lhs.m_Points.size() + rhs.m_Points.size()); + result.m_Points.insert(result.m_Points.end(), lhs.m_Points.begin(), lhs.m_Points.end()); + result.m_Points.insert(result.m_Points.end(), rhs.m_Points.begin(), rhs.m_Points.end()); + std::sort(result.m_Points.begin(), result.m_Points.end()); + return result; + } - const TSizeVec &points() const - { - return m_Points; - } + void add(TDoubleSizeVecPrVec& result) { result.push_back(TDoubleSizeVecPr(m_Height, m_Points)); } + + const TSizeVec& points() const { return m_Points; } - private: - explicit CCluster() : m_Height(0.0) {} +private: + explicit CCluster() : m_Height(0.0) {} - private: - double m_Height; - TSizeVec m_Points; +private: + double m_Height; + TSizeVec m_Points; }; using TClusterVec = std::vector; -class CSlinkObjective -{ - public: - CSlinkObjective(const TDoubleVecVec &distanceMatrix) : - m_DistanceMatrix(&distanceMatrix) - {} - - double operator()(const CCluster &lhs, const CCluster &rhs) - { - double result = std::numeric_limits::max(); - const TSizeVec &lp = lhs.points(); - const TSizeVec &rp = rhs.points(); - for (std::size_t i = 0u; i < lp.size(); ++i) - { - for (std::size_t j = 0u; j < rp.size(); ++j) - { - std::size_t pi = lp[i]; - std::size_t pj = rp[j]; - if (pj > pi) - { - std::swap(pi, pj); - } - result = std::min(result, (*m_DistanceMatrix)[pi][pj]); +class CSlinkObjective { +public: + CSlinkObjective(const TDoubleVecVec& distanceMatrix) : m_DistanceMatrix(&distanceMatrix) {} + + double operator()(const CCluster& lhs, const CCluster& rhs) { + double result = std::numeric_limits::max(); + const TSizeVec& lp = lhs.points(); + const TSizeVec& rp = rhs.points(); + for (std::size_t i = 0u; i < lp.size(); ++i) { + for (std::size_t j = 0u; j < rp.size(); ++j) { + std::size_t pi = lp[i]; + std::size_t pj = rp[j]; + if (pj > pi) { + std::swap(pi, pj); } + result = std::min(result, (*m_DistanceMatrix)[pi][pj]); } - return result; } + return result; + } - private: - const TDoubleVecVec *m_DistanceMatrix; +private: + const TDoubleVecVec* m_DistanceMatrix; }; -class CClinkObjective -{ - public: - CClinkObjective(const TDoubleVecVec &distanceMatrix) : - m_DistanceMatrix(&distanceMatrix) - {} - - double operator()(const CCluster &lhs, const CCluster &rhs) - { - double result = -std::numeric_limits::max(); - const TSizeVec &lp = lhs.points(); - const TSizeVec &rp = rhs.points(); - for (std::size_t i = 0u; i < lp.size(); ++i) - { - for (std::size_t j = 0u; j < rp.size(); ++j) - { - std::size_t pi = lp[i]; - std::size_t pj = rp[j]; - if (pj > pi) - { - std::swap(pi, pj); - } - result = std::max(result, (*m_DistanceMatrix)[pi][pj]); +class CClinkObjective { +public: + CClinkObjective(const TDoubleVecVec& distanceMatrix) : m_DistanceMatrix(&distanceMatrix) {} + + double operator()(const CCluster& lhs, const CCluster& rhs) { + double result = -std::numeric_limits::max(); + const TSizeVec& lp = lhs.points(); + const TSizeVec& rp = rhs.points(); + for (std::size_t i = 0u; i < lp.size(); ++i) { + for (std::size_t j = 0u; j < rp.size(); ++j) { + std::size_t pi = lp[i]; + std::size_t pj = rp[j]; + if (pj > pi) { + std::swap(pi, pj); } + result = std::max(result, (*m_DistanceMatrix)[pi][pj]); } - return result; } + return result; + } - private: - const TDoubleVecVec *m_DistanceMatrix; +private: + const TDoubleVecVec* m_DistanceMatrix; }; template -TClusterVec agglomerativeCluster(const TDoubleVecVec &distanceMatrix) -{ +TClusterVec agglomerativeCluster(const TDoubleVecVec& distanceMatrix) { std::size_t n = distanceMatrix.size(); TClusterVec clusters; clusters.reserve(n); - for (std::size_t i = 0u; i < n; ++i) - { + for (std::size_t i = 0u; i < n; ++i) { clusters.push_back(CCluster(i)); } @@ -163,19 +126,15 @@ TClusterVec agglomerativeCluster(const TDoubleVecVec &distanceMatrix) TClusterVec tree; tree.reserve(n); - while (clusters.size() > 1) - { + while (clusters.size() > 1) { double fmin = std::numeric_limits::max(); std::size_t mi = 0; std::size_t mj = 0; - for (std::size_t i = 0u; i < clusters.size(); ++i) - { - for (std::size_t j = i+1; j < clusters.size(); ++j) - { + for (std::size_t i = 0u; i < clusters.size(); ++i) { + for (std::size_t j = i + 1; j < clusters.size(); ++j) { double fij = f(clusters[i], clusters[j]); - if (fij < fmin) - { + if (fij < fmin) { fmin = fij; mi = i; mj = j; @@ -183,8 +142,7 @@ TClusterVec agglomerativeCluster(const TDoubleVecVec &distanceMatrix) } } - if (mi < mj) - { + if (mi < mj) { std::swap(mi, mj); } LOG_DEBUG("fmin = " << fmin << ", mi = " << mi << ", mj = " << mj); @@ -200,52 +158,39 @@ TClusterVec agglomerativeCluster(const TDoubleVecVec &distanceMatrix) return tree; } -std::string print(maths::CAgglomerativeClusterer::EObjective o) -{ - switch (o) - { - case maths::CAgglomerativeClusterer::E_Single: return "slink"; - case maths::CAgglomerativeClusterer::E_Complete: return "clink"; - case maths::CAgglomerativeClusterer::E_Average: return "average"; - case maths::CAgglomerativeClusterer::E_Weighted: return "weighted"; - case maths::CAgglomerativeClusterer::E_Ward: return "ward"; +std::string print(maths::CAgglomerativeClusterer::EObjective o) { + switch (o) { + case maths::CAgglomerativeClusterer::E_Single: + return "slink"; + case maths::CAgglomerativeClusterer::E_Complete: + return "clink"; + case maths::CAgglomerativeClusterer::E_Average: + return "average"; + case maths::CAgglomerativeClusterer::E_Weighted: + return "weighted"; + case maths::CAgglomerativeClusterer::E_Ward: + return "ward"; } return "unexpected"; } - } -void CAgglomerativeClustererTest::testNode() -{ +void CAgglomerativeClustererTest::testNode() { LOG_DEBUG("+-----------------------------------------+"); LOG_DEBUG("| CAgglomerativeClustererTest::testNode |"); LOG_DEBUG("+-----------------------------------------+"); - double heights[] = - { - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 1.0, - 1.5, - 1.9, - 4.0 - }; - - maths::CAgglomerativeClusterer::CNode nodes[] = - { - maths::CAgglomerativeClusterer::CNode(0, heights[0]), - maths::CAgglomerativeClusterer::CNode(1, heights[1]), - maths::CAgglomerativeClusterer::CNode(2, heights[2]), - maths::CAgglomerativeClusterer::CNode(3, heights[3]), - maths::CAgglomerativeClusterer::CNode(4, heights[4]), - maths::CAgglomerativeClusterer::CNode(5, heights[5]), - maths::CAgglomerativeClusterer::CNode(6, heights[6]), - maths::CAgglomerativeClusterer::CNode(7, heights[7]), - maths::CAgglomerativeClusterer::CNode(8, heights[8]) - }; + double heights[] = {0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.5, 1.9, 4.0}; + + maths::CAgglomerativeClusterer::CNode nodes[] = {maths::CAgglomerativeClusterer::CNode(0, heights[0]), + maths::CAgglomerativeClusterer::CNode(1, heights[1]), + maths::CAgglomerativeClusterer::CNode(2, heights[2]), + maths::CAgglomerativeClusterer::CNode(3, heights[3]), + maths::CAgglomerativeClusterer::CNode(4, heights[4]), + maths::CAgglomerativeClusterer::CNode(5, heights[5]), + maths::CAgglomerativeClusterer::CNode(6, heights[6]), + maths::CAgglomerativeClusterer::CNode(7, heights[7]), + maths::CAgglomerativeClusterer::CNode(8, heights[8])}; nodes[5].addChild(nodes[0]); nodes[5].addChild(nodes[1]); @@ -256,26 +201,23 @@ void CAgglomerativeClustererTest::testNode() nodes[8].addChild(nodes[5]); nodes[8].addChild(nodes[7]); - const maths::CAgglomerativeClusterer::CNode &root = nodes[8]; + const maths::CAgglomerativeClusterer::CNode& root = nodes[8]; LOG_DEBUG("tree = \n" << root.print()); - for (std::size_t i = 0u; i < 8; ++i) - { + for (std::size_t i = 0u; i < 8; ++i) { CPPUNIT_ASSERT_EQUAL(root.index(), nodes[i].root().index()); } TSizeVec points; root.points(points); std::sort(points.begin(), points.end()); - CPPUNIT_ASSERT_EQUAL(std::string("[0, 1, 2, 3, 4]"), - core::CContainerPrinter::print(points)); + CPPUNIT_ASSERT_EQUAL(std::string("[0, 1, 2, 3, 4]"), core::CContainerPrinter::print(points)); points.clear(); nodes[7].points(points); std::sort(points.begin(), points.end()); - CPPUNIT_ASSERT_EQUAL(std::string("[2, 3, 4]"), - core::CContainerPrinter::print(points)); + CPPUNIT_ASSERT_EQUAL(std::string("[2, 3, 4]"), core::CContainerPrinter::print(points)); { TDoubleSizeVecPrVec clusters; @@ -285,60 +227,44 @@ void CAgglomerativeClustererTest::testNode() core::CContainerPrinter::print(clusters)); } - std::string expected[] = - { - std::string("[[0, 1], [2], [3], [4]]"), - std::string("[[0, 1], [2, 3], [4]]"), - std::string("[[0, 1], [4, 2, 3]]"), - std::string("[[0, 1, 4, 2, 3]]") - }; - for (std::size_t h = 5; h < 9; ++h) - { + std::string expected[] = {std::string("[[0, 1], [2], [3], [4]]"), + std::string("[[0, 1], [2, 3], [4]]"), + std::string("[[0, 1], [4, 2, 3]]"), + std::string("[[0, 1, 4, 2, 3]]")}; + for (std::size_t h = 5; h < 9; ++h) { TSizeVecVec clusters; root.clusteringAt(heights[h], clusters); std::sort(clusters.begin(), clusters.end()); - LOG_DEBUG("Clusters at " << heights[h] - << " are " << core::CContainerPrinter::print(clusters)); + LOG_DEBUG("Clusters at " << heights[h] << " are " << core::CContainerPrinter::print(clusters)); CPPUNIT_ASSERT_EQUAL(expected[h - 5], core::CContainerPrinter::print(clusters)); } } -void CAgglomerativeClustererTest::testSimplePermutations() -{ +void CAgglomerativeClustererTest::testSimplePermutations() { LOG_DEBUG("+-------------------------------------------------------+"); LOG_DEBUG("| CAgglomerativeClustererTest::testSimplePermutations |"); LOG_DEBUG("+-------------------------------------------------------+"); - double x[] = { 1.0, 3.2, 4.5, 7.8 }; + double x[] = {1.0, 3.2, 4.5, 7.8}; std::size_t n = boost::size(x); - maths::CAgglomerativeClusterer::EObjective objectives[] = - { - maths::CAgglomerativeClusterer::E_Single, - maths::CAgglomerativeClusterer::E_Complete - }; + maths::CAgglomerativeClusterer::EObjective objectives[] = {maths::CAgglomerativeClusterer::E_Single, + maths::CAgglomerativeClusterer::E_Complete}; - std::string expected[] = - { - std::string("[(3.3, [0, 1, 2, 3]), (2.2, [0, 1, 2]), (1.3, [1, 2])]"), - std::string("[(6.8, [0, 1, 2, 3]), (3.5, [0, 1, 2]), (1.3, [1, 2])]") - }; + std::string expected[] = {std::string("[(3.3, [0, 1, 2, 3]), (2.2, [0, 1, 2]), (1.3, [1, 2])]"), + std::string("[(6.8, [0, 1, 2, 3]), (3.5, [0, 1, 2]), (1.3, [1, 2])]")}; - for (std::size_t o = 0u; o < boost::size(objectives); ++o) - { + for (std::size_t o = 0u; o < boost::size(objectives); ++o) { LOG_DEBUG("****** " << print(objectives[o]) << " ******"); - std::size_t p[] = { 0, 1, 2, 3 }; + std::size_t p[] = {0, 1, 2, 3}; - do - { + do { LOG_DEBUG("*** " << core::CContainerPrinter::print(p) << " ***"); TDoubleVecVec distanceMatrix(n); - for (std::size_t i = 0u; i < n; ++i) - { - for (std::size_t j = i; j < n; ++j) - { + for (std::size_t i = 0u; i < n; ++i) { + for (std::size_t j = i; j < n; ++j) { distanceMatrix[j].push_back(std::fabs(x[p[i]] - x[p[j]])); } LOG_DEBUG("D = " << core::CContainerPrinter::print(distanceMatrix[i])); @@ -355,10 +281,8 @@ void CAgglomerativeClustererTest::testSimplePermutations() LOG_DEBUG("clusters = " << core::CContainerPrinter::print(clusters)); - for (std::size_t i = 0u; i < clusters.size(); ++i) - { - for (std::size_t j = 0u; j < clusters[i].second.size(); ++j) - { + for (std::size_t i = 0u; i < clusters.size(); ++i) { + for (std::size_t j = 0u; j < clusters[i].second.size(); ++j) { clusters[i].second[j] = p[clusters[i].second[j]]; } std::sort(clusters[i].second.begin(), clusters[i].second.end()); @@ -367,62 +291,45 @@ void CAgglomerativeClustererTest::testSimplePermutations() LOG_DEBUG("canonical clusters = " << core::CContainerPrinter::print(clusters)); CPPUNIT_ASSERT_EQUAL(expected[o], core::CContainerPrinter::print(clusters)); - } - while (std::next_permutation(boost::begin(p), boost::end(p))); + } while (std::next_permutation(boost::begin(p), boost::end(p))); } } -void CAgglomerativeClustererTest::testDegenerate() -{ +void CAgglomerativeClustererTest::testDegenerate() { LOG_DEBUG("+-----------------------------------------------+"); LOG_DEBUG("| CAgglomerativeClustererTest::testDegenerate |"); LOG_DEBUG("+-----------------------------------------------+"); - double x[] = { 1.0, 3.2, 3.2, 3.2, 4.5, 7.8 }; - std::size_t n = boost::size(x); - - maths::CAgglomerativeClusterer::EObjective objectives[] = - { - maths::CAgglomerativeClusterer::E_Single, - maths::CAgglomerativeClusterer::E_Complete - }; - - std::string expected[][3] = - { - { - std::string("[(3.3, [0, 1, 2, 3, 4, 5]), (2.2, [0, 1, 2, 3, 4]), (1.3, [1, 2, 3, 4]), (0, [1, 2, 3]), (0, [1, 2])]"), - std::string("[(3.3, [0, 1, 2, 3, 4, 5]), (2.2, [0, 1, 2, 3, 4]), (1.3, [1, 2, 3, 4]), (0, [1, 2, 3]), (0, [1, 3])]"), - std::string("[(3.3, [0, 1, 2, 3, 4, 5]), (2.2, [0, 1, 2, 3, 4]), (1.3, [1, 2, 3, 4]), (0, [1, 2, 3]), (0, [2, 3])]") - }, - { - std::string("[(6.8, [0, 1, 2, 3, 4, 5]), (3.5, [0, 1, 2, 3, 4]), (1.3, [1, 2, 3, 4]), (0, [1, 2, 3]), (0, [1, 2])]"), - std::string("[(6.8, [0, 1, 2, 3, 4, 5]), (3.5, [0, 1, 2, 3, 4]), (1.3, [1, 2, 3, 4]), (0, [1, 2, 3]), (0, [1, 3])]"), - std::string("[(6.8, [0, 1, 2, 3, 4, 5]), (3.5, [0, 1, 2, 3, 4]), (1.3, [1, 2, 3, 4]), (0, [1, 2, 3]), (0, [2, 3])]") - } - }; + double x[] = {1.0, 3.2, 3.2, 3.2, 4.5, 7.8}; + std::size_t n = boost::size(x); - for (std::size_t o = 0u, count = 0u; o < boost::size(objectives); ++o) - { + maths::CAgglomerativeClusterer::EObjective objectives[] = {maths::CAgglomerativeClusterer::E_Single, + maths::CAgglomerativeClusterer::E_Complete}; + + std::string expected[][3] = { + {std::string("[(3.3, [0, 1, 2, 3, 4, 5]), (2.2, [0, 1, 2, 3, 4]), (1.3, [1, 2, 3, 4]), (0, [1, 2, 3]), (0, [1, 2])]"), + std::string("[(3.3, [0, 1, 2, 3, 4, 5]), (2.2, [0, 1, 2, 3, 4]), (1.3, [1, 2, 3, 4]), (0, [1, 2, 3]), (0, [1, 3])]"), + std::string("[(3.3, [0, 1, 2, 3, 4, 5]), (2.2, [0, 1, 2, 3, 4]), (1.3, [1, 2, 3, 4]), (0, [1, 2, 3]), (0, [2, 3])]")}, + {std::string("[(6.8, [0, 1, 2, 3, 4, 5]), (3.5, [0, 1, 2, 3, 4]), (1.3, [1, 2, 3, 4]), (0, [1, 2, 3]), (0, [1, 2])]"), + std::string("[(6.8, [0, 1, 2, 3, 4, 5]), (3.5, [0, 1, 2, 3, 4]), (1.3, [1, 2, 3, 4]), (0, [1, 2, 3]), (0, [1, 3])]"), + std::string("[(6.8, [0, 1, 2, 3, 4, 5]), (3.5, [0, 1, 2, 3, 4]), (1.3, [1, 2, 3, 4]), (0, [1, 2, 3]), (0, [2, 3])]")}}; + + for (std::size_t o = 0u, count = 0u; o < boost::size(objectives); ++o) { LOG_DEBUG("****** " << print(objectives[o]) << " ******"); - std::size_t p[] = { 0, 1, 2, 3, 4, 5 }; + std::size_t p[] = {0, 1, 2, 3, 4, 5}; - do - { - if (count % 10 == 0) - { + do { + if (count % 10 == 0) { LOG_DEBUG("*** " << core::CContainerPrinter::print(p) << " ***"); } TDoubleVecVec distanceMatrix(n); - for (std::size_t i = 0u; i < n; ++i) - { - for (std::size_t j = i; j < n; ++j) - { + for (std::size_t i = 0u; i < n; ++i) { + for (std::size_t j = i; j < n; ++j) { distanceMatrix[j].push_back(std::fabs(x[p[i]] - x[p[j]])); } - if (count % 10 == 0) - { + if (count % 10 == 0) { LOG_DEBUG("D = " << core::CContainerPrinter::print(distanceMatrix[i])); } } @@ -436,36 +343,30 @@ void CAgglomerativeClustererTest::testDegenerate() TDoubleSizeVecPrVec clusters; tree.back().clusters(clusters); - if (count % 10 == 0) - { + if (count % 10 == 0) { LOG_DEBUG("clusters = " << core::CContainerPrinter::print(clusters)); } - for (std::size_t i = 0u; i < clusters.size(); ++i) - { - for (std::size_t j = 0u; j < clusters[i].second.size(); ++j) - { + for (std::size_t i = 0u; i < clusters.size(); ++i) { + for (std::size_t j = 0u; j < clusters[i].second.size(); ++j) { clusters[i].second[j] = p[clusters[i].second[j]]; } std::sort(clusters[i].second.begin(), clusters[i].second.end()); } - if (count % 10 == 0) - { + if (count % 10 == 0) { LOG_DEBUG("canonical clusters = " << core::CContainerPrinter::print(clusters)); } - CPPUNIT_ASSERT( expected[o][0] == core::CContainerPrinter::print(clusters) - || expected[o][1] == core::CContainerPrinter::print(clusters) - || expected[o][2] == core::CContainerPrinter::print(clusters)); + CPPUNIT_ASSERT(expected[o][0] == core::CContainerPrinter::print(clusters) || + expected[o][1] == core::CContainerPrinter::print(clusters) || + expected[o][2] == core::CContainerPrinter::print(clusters)); ++count; - } - while (std::next_permutation(boost::begin(p), boost::end(p))); + } while (std::next_permutation(boost::begin(p), boost::end(p))); } } -void CAgglomerativeClustererTest::testRandom() -{ +void CAgglomerativeClustererTest::testRandom() { LOG_DEBUG("+-------------------------------------------+"); LOG_DEBUG("| CAgglomerativeClustererTest::testRandom |"); LOG_DEBUG("+-------------------------------------------+"); @@ -474,34 +375,26 @@ void CAgglomerativeClustererTest::testRandom() std::size_t n = 20u; - maths::CAgglomerativeClusterer::EObjective objectives[] = - { - maths::CAgglomerativeClusterer::E_Single, - maths::CAgglomerativeClusterer::E_Complete - }; + maths::CAgglomerativeClusterer::EObjective objectives[] = {maths::CAgglomerativeClusterer::E_Single, + maths::CAgglomerativeClusterer::E_Complete}; - for (std::size_t o = 0u; o < boost::size(objectives); ++o) - { + for (std::size_t o = 0u; o < boost::size(objectives); ++o) { LOG_DEBUG("*** " << print(objectives[o]) << " ***"); - for (std::size_t t = 0u; t < 10; ++t) - { + for (std::size_t t = 0u; t < 10; ++t) { TDoubleVec dij; rng.generateUniformSamples(0.0, 100.0, n * (n - 1) / 2, dij); TDoubleVecVec distanceMatrix(n); - for (std::size_t i = 0u, k = 0u; i < n; ++i) - { - for (std::size_t j = i; j < n; ++j) - { + for (std::size_t i = 0u, k = 0u; i < n; ++i) { + for (std::size_t j = i; j < n; ++j) { distanceMatrix[j].push_back(i == j ? 0.0 : dij[k++]); } LOG_DEBUG("D = " << core::CContainerPrinter::print(distanceMatrix[i])); } TClusterVec expectedTree; - switch (objectives[o]) - { + switch (objectives[o]) { case maths::CAgglomerativeClusterer::E_Single: expectedTree = agglomerativeCluster(distanceMatrix); break; @@ -518,8 +411,7 @@ void CAgglomerativeClustererTest::testRandom() TDoubleSizeVecPrVec expectedClusters; expectedClusters.reserve(expectedTree.size()); - for (std::size_t i = 0u; i < expectedTree.size(); ++i) - { + for (std::size_t i = 0u; i < expectedTree.size(); ++i) { expectedTree[i].add(expectedClusters); } std::sort(expectedClusters.begin(), expectedClusters.end()); @@ -534,36 +426,29 @@ void CAgglomerativeClustererTest::testRandom() TDoubleSizeVecPrVec clusters; tree.back().clusters(clusters); - for (std::size_t i = 0u; i < clusters.size(); ++i) - { + for (std::size_t i = 0u; i < clusters.size(); ++i) { std::sort(clusters[i].second.begin(), clusters[i].second.end()); } std::sort(clusters.begin(), clusters.end()); LOG_DEBUG("clusters = " << core::CContainerPrinter::print(clusters)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedClusters), - core::CContainerPrinter::print(clusters)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedClusters), core::CContainerPrinter::print(clusters)); } } } -CppUnit::Test *CAgglomerativeClustererTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CAgglomerativeClustererTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CAgglomerativeClustererTest::testNode", - &CAgglomerativeClustererTest::testNode) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CAgglomerativeClustererTest::testSimplePermutations", - &CAgglomerativeClustererTest::testSimplePermutations) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CAgglomerativeClustererTest::testDegenerate", - &CAgglomerativeClustererTest::testDegenerate) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CAgglomerativeClustererTest::testRandom", - &CAgglomerativeClustererTest::testRandom) ); +CppUnit::Test* CAgglomerativeClustererTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CAgglomerativeClustererTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CAgglomerativeClustererTest::testNode", + &CAgglomerativeClustererTest::testNode)); + suiteOfTests->addTest(new CppUnit::TestCaller("CAgglomerativeClustererTest::testSimplePermutations", + &CAgglomerativeClustererTest::testSimplePermutations)); + suiteOfTests->addTest(new CppUnit::TestCaller("CAgglomerativeClustererTest::testDegenerate", + &CAgglomerativeClustererTest::testDegenerate)); + suiteOfTests->addTest(new CppUnit::TestCaller("CAgglomerativeClustererTest::testRandom", + &CAgglomerativeClustererTest::testRandom)); return suiteOfTests; } diff --git a/lib/maths/unittest/CAgglomerativeClustererTest.h b/lib/maths/unittest/CAgglomerativeClustererTest.h index fc1d5d7f09..04154ae4e5 100644 --- a/lib/maths/unittest/CAgglomerativeClustererTest.h +++ b/lib/maths/unittest/CAgglomerativeClustererTest.h @@ -9,15 +9,14 @@ #include -class CAgglomerativeClustererTest : public CppUnit::TestFixture -{ - public: - void testNode(); - void testSimplePermutations(); - void testDegenerate(); - void testRandom(); +class CAgglomerativeClustererTest : public CppUnit::TestFixture { +public: + void testNode(); + void testSimplePermutations(); + void testDegenerate(); + void testRandom(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CAgglomerativeClustererTest_h diff --git a/lib/maths/unittest/CAssignmentTest.cc b/lib/maths/unittest/CAssignmentTest.cc index 0e0194e52d..dd040f4534 100644 --- a/lib/maths/unittest/CAssignmentTest.cc +++ b/lib/maths/unittest/CAssignmentTest.cc @@ -21,8 +21,7 @@ using namespace ml; -namespace -{ +namespace { using TDoubleVec = std::vector; using TDoubleVecVec = std::vector; @@ -31,96 +30,73 @@ using TSizeSizePr = std::pair; using TSizeSizePrVec = std::vector; template -void fill(const double (&costs)[N][M], TDoubleVecVec &result) -{ - for (std::size_t i = 0u; i < N; ++i) - { +void fill(const double (&costs)[N][M], TDoubleVecVec& result) { + for (std::size_t i = 0u; i < N; ++i) { result.push_back(TDoubleVec()); - for (std::size_t j = 0u; j < M; ++j) - { + for (std::size_t j = 0u; j < M; ++j) { result.back().push_back(costs[i][j]); } } } -void fill(const TDoubleVec &costs, TDoubleVecVec &result) -{ +void fill(const TDoubleVec& costs, TDoubleVecVec& result) { std::size_t n = static_cast(std::sqrt(static_cast(costs.size()))); result.reserve(n); - for (std::size_t i = 0u; i < n; ++i) - { + for (std::size_t i = 0u; i < n; ++i) { result.push_back(TDoubleVec()); result.back().reserve(n); - for (std::size_t j = 0u; j < n; ++j) - { + for (std::size_t j = 0u; j < n; ++j) { result.back().push_back(costs[i * n + j]); } } } -double cost(const TDoubleVecVec &costs, - const TSizeSizePrVec &matching) -{ +double cost(const TDoubleVecVec& costs, const TSizeSizePrVec& matching) { double result = 0.0; - for (std::size_t i = 0u; i < matching.size(); ++i) - { + for (std::size_t i = 0u; i < matching.size(); ++i) { result += costs[matching[i].first][matching[i].second]; } return result; } -double match(const TDoubleVecVec &costs, - TSizeSizePrVec &matching) -{ +double match(const TDoubleVecVec& costs, TSizeSizePrVec& matching) { std::size_t n = costs.size(); TSizeVec permutation; - for (std::size_t i = 0u; i < n; ++i) - { + for (std::size_t i = 0u; i < n; ++i) { permutation.push_back(i); } double minCost = std::numeric_limits::max(); - do - { + do { double cost = 0.0; - for (std::size_t i = 0u; i < costs.size(); ++i) - { + for (std::size_t i = 0u; i < costs.size(); ++i) { cost += costs[i][permutation[i]]; } - if (cost < minCost) - { + if (cost < minCost) { minCost = cost; matching.clear(); - for (std::size_t i = 0u; i < permutation.size(); ++i) - { + for (std::size_t i = 0u; i < permutation.size(); ++i) { matching.push_back(TSizeSizePr(i, permutation[i])); } } - } - while (std::next_permutation(permutation.begin(), permutation.end())); + } while (std::next_permutation(permutation.begin(), permutation.end())); return minCost; } - } -void CAssignmentTest::testKuhnMunkres() -{ +void CAssignmentTest::testKuhnMunkres() { LOG_DEBUG("+-----------------------------------+"); LOG_DEBUG("| CAssignmentTest::testKuhnMunkres |"); LOG_DEBUG("+-----------------------------------+"); { LOG_DEBUG("test 1: bad input"); - const double test11[][5] = - { - { 2.0, 1.0, 1.0, 2.0, 2.0 }, - { 1.0, 2.0, 2.0, 2.0, 2.0 }, - }; - const double test12[][6] = - { - { 1.0, 1.0, 2.0, 2.0, 2.0, 3.0 } - }; + const double test11[][5] = { + {2.0, 1.0, 1.0, 2.0, 2.0}, + {1.0, 2.0, 2.0, 2.0, 2.0}, + }; + const double test12[][6] = {{1.0, 1.0, 2.0, 2.0, 2.0, 3.0}}; TDoubleVecVec costs; fill(test11, costs); fill(test12, costs); @@ -130,14 +106,11 @@ void CAssignmentTest::testKuhnMunkres() } { LOG_DEBUG("test 2: 5x5"); - const double test2[][5] = - { - { 2.0, 1.0, 1.0, 2.0, 2.0 }, - { 1.0, 2.0, 2.0, 2.0, 2.0 }, - { 2.0, 2.0, 2.0, 1.0, 2.0 }, - { 1.0, 1.0, 2.0, 2.0, 2.0 }, - { 2.0, 2.0, 2.0, 2.0, 1.0 } - }; + const double test2[][5] = {{2.0, 1.0, 1.0, 2.0, 2.0}, + {1.0, 2.0, 2.0, 2.0, 2.0}, + {2.0, 2.0, 2.0, 1.0, 2.0}, + {1.0, 1.0, 2.0, 2.0, 2.0}, + {2.0, 2.0, 2.0, 2.0, 1.0}}; TDoubleVecVec costs; fill(test2, costs); @@ -149,14 +122,8 @@ void CAssignmentTest::testKuhnMunkres() } { LOG_DEBUG("test 3: 5x4"); - const double test3[][4] = - { - { 2.0, 1.0, 1.0, 2.0 }, - { 1.0, 2.0, 2.0, 2.0 }, - { 2.0, 2.0, 2.0, 1.0 }, - { 1.0, 1.0, 2.0, 2.0 }, - { 2.0, 2.0, 2.0, 2.0 } - }; + const double test3[][4] = { + {2.0, 1.0, 1.0, 2.0}, {1.0, 2.0, 2.0, 2.0}, {2.0, 2.0, 2.0, 1.0}, {1.0, 1.0, 2.0, 2.0}, {2.0, 2.0, 2.0, 2.0}}; TDoubleVecVec costs; fill(test3, costs); @@ -168,13 +135,12 @@ void CAssignmentTest::testKuhnMunkres() } { LOG_DEBUG("test 4: 4x5"); - const double test4[][5] = - { - { 2.0, 1.0, 1.0, 2.0, 2.0 }, - { 1.0, 2.0, 2.0, 2.0, 2.0 }, - { 2.0, 2.0, 2.0, 1.0, 2.0 }, - { 1.0, 1.0, 2.0, 2.0, 2.0 }, - }; + const double test4[][5] = { + {2.0, 1.0, 1.0, 2.0, 2.0}, + {1.0, 2.0, 2.0, 2.0, 2.0}, + {2.0, 2.0, 2.0, 1.0, 2.0}, + {1.0, 1.0, 2.0, 2.0, 2.0}, + }; TDoubleVecVec costs; fill(test4, costs); @@ -189,33 +155,28 @@ void CAssignmentTest::testKuhnMunkres() { LOG_DEBUG("test 5: small random"); - for (std::size_t i = 2u; i < 9; ++i) - { + for (std::size_t i = 2u; i < 9; ++i) { LOG_DEBUG("***" << i << "x" << i); - for (std::size_t test = 0u; test < 100; ++test) - { + for (std::size_t test = 0u; test < 100; ++test) { TDoubleVec samples; - rng.generateUniformSamples(0.1, 1000.0, i*i, samples); + rng.generateUniformSamples(0.1, 1000.0, i * i, samples); TDoubleVecVec costs; fill(samples, costs); - if (test % 10 == 0) - { + if (test % 10 == 0) { LOG_DEBUG("costs = " << core::CContainerPrinter::print(costs)); } TSizeSizePrVec expectedMatching; double expectedCost = match(costs, expectedMatching); - if (test % 10 == 0) - { + if (test % 10 == 0) { LOG_DEBUG("expectedCost = " << expectedCost); } TSizeSizePrVec matching; maths::CAssignment::kuhnMunkres(costs, matching); - if (test % 10 == 0) - { + if (test % 10 == 0) { LOG_DEBUG("cost = " << cost(costs, matching)); } @@ -246,25 +207,21 @@ void CAssignmentTest::testKuhnMunkres() // Try some random permutations random permutations and check // we don't find a lower cost solution. TSizeVec randomMatching; - for (std::size_t i = 0u; i < costs.size(); ++i) - { + for (std::size_t i = 0u; i < costs.size(); ++i) { randomMatching.push_back(i); } double lowestRandomCost = std::numeric_limits::max(); - for (std::size_t i = 0u; i < 1000; ++i) - { + for (std::size_t i = 0u; i < 1000; ++i) { rng.random_shuffle(randomMatching.begin(), randomMatching.end()); double cost = 0.0; - for (std::size_t j = 0u; j < costs.size(); ++j) - { + for (std::size_t j = 0u; j < costs.size(); ++j) { cost += costs[j][randomMatching[j]]; } lowestRandomCost = std::min(lowestRandomCost, cost); } - LOG_DEBUG("optimal cost = " << optimalCost - << ", lowest random cost = " << lowestRandomCost); + LOG_DEBUG("optimal cost = " << optimalCost << ", lowest random cost = " << lowestRandomCost); CPPUNIT_ASSERT(lowestRandomCost >= optimalCost); // Check adding higher cost row has no effect. @@ -277,30 +234,25 @@ void CAssignmentTest::testKuhnMunkres() { LOG_DEBUG("test 7: euler 345"); - const double euler345[][15] = - { - { 7, 53, 183, 439, 863, 497, 383, 563, 79, 973, 287, 63, 343, 169, 583}, - {627, 343, 773, 959, 943, 767, 473, 103, 699, 303, 957, 703, 583, 639, 913}, - {447, 283, 463, 29, 23, 487, 463, 993, 119, 883, 327, 493, 423, 159, 743}, - {217, 623, 3, 399, 853, 407, 103, 983, 89, 463, 290, 516, 212, 462, 350}, - {960, 376, 682, 962, 300, 780, 486, 502, 912, 800, 250, 346, 172, 812, 350}, - {870, 456, 192, 162, 593, 473, 915, 45, 989, 873, 823, 965, 425, 329, 803}, - {973, 965, 905, 919, 133, 673, 665, 235, 509, 613, 673, 815, 165, 992, 326}, - {322, 148, 972, 962, 286, 255, 941, 541, 265, 323, 925, 281, 601, 95, 973}, - {445, 721, 11, 525, 473, 65, 511, 164, 138, 672, 18, 428, 154, 448, 848}, - {414, 456, 310, 312, 798, 104, 566, 520, 302, 248, 694, 976, 430, 392, 198}, - {184, 829, 373, 181, 631, 101, 969, 613, 840, 740, 778, 458, 284, 760, 390}, - {821, 461, 843, 513, 17, 901, 711, 993, 293, 157, 274, 94, 192, 156, 574}, - { 34, 124, 4, 878, 450, 476, 712, 914, 838, 669, 875, 299, 823, 329, 699}, - {815, 559, 813, 459, 522, 788, 168, 586, 966, 232, 308, 833, 251, 631, 107}, - {813, 883, 451, 509, 615, 77, 281, 613, 459, 205, 380, 274, 302, 35, 805} - }; + const double euler345[][15] = {{7, 53, 183, 439, 863, 497, 383, 563, 79, 973, 287, 63, 343, 169, 583}, + {627, 343, 773, 959, 943, 767, 473, 103, 699, 303, 957, 703, 583, 639, 913}, + {447, 283, 463, 29, 23, 487, 463, 993, 119, 883, 327, 493, 423, 159, 743}, + {217, 623, 3, 399, 853, 407, 103, 983, 89, 463, 290, 516, 212, 462, 350}, + {960, 376, 682, 962, 300, 780, 486, 502, 912, 800, 250, 346, 172, 812, 350}, + {870, 456, 192, 162, 593, 473, 915, 45, 989, 873, 823, 965, 425, 329, 803}, + {973, 965, 905, 919, 133, 673, 665, 235, 509, 613, 673, 815, 165, 992, 326}, + {322, 148, 972, 962, 286, 255, 941, 541, 265, 323, 925, 281, 601, 95, 973}, + {445, 721, 11, 525, 473, 65, 511, 164, 138, 672, 18, 428, 154, 448, 848}, + {414, 456, 310, 312, 798, 104, 566, 520, 302, 248, 694, 976, 430, 392, 198}, + {184, 829, 373, 181, 631, 101, 969, 613, 840, 740, 778, 458, 284, 760, 390}, + {821, 461, 843, 513, 17, 901, 711, 993, 293, 157, 274, 94, 192, 156, 574}, + {34, 124, 4, 878, 450, 476, 712, 914, 838, 669, 875, 299, 823, 329, 699}, + {815, 559, 813, 459, 522, 788, 168, 586, 966, 232, 308, 833, 251, 631, 107}, + {813, 883, 451, 509, 615, 77, 281, 613, 459, 205, 380, 274, 302, 35, 805}}; TDoubleVecVec costs; fill(euler345, costs); - for (std::size_t i = 0u; i < costs.size(); ++i) - { - for (std::size_t j = 0u; j < costs[i].size(); ++j) - { + for (std::size_t i = 0u; i < costs.size(); ++i) { + for (std::size_t j = 0u; j < costs[i].size(); ++j) { costs[i][j] = 1000 - costs[i][j]; } } @@ -312,13 +264,10 @@ void CAssignmentTest::testKuhnMunkres() } } -CppUnit::Test *CAssignmentTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CAssignmentTest"); +CppUnit::Test* CAssignmentTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CAssignmentTest"); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CAssignmentTest::testKuhnMunkres", - &CAssignmentTest::testKuhnMunkres) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CAssignmentTest::testKuhnMunkres", &CAssignmentTest::testKuhnMunkres)); return suiteOfTests; } diff --git a/lib/maths/unittest/CAssignmentTest.h b/lib/maths/unittest/CAssignmentTest.h index 6162e0795e..7f3b670123 100644 --- a/lib/maths/unittest/CAssignmentTest.h +++ b/lib/maths/unittest/CAssignmentTest.h @@ -9,12 +9,11 @@ #include -class CAssignmentTest : public CppUnit::TestFixture -{ - public: - void testKuhnMunkres(); +class CAssignmentTest : public CppUnit::TestFixture { +public: + void testKuhnMunkres(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CAssignmentTest_h diff --git a/lib/maths/unittest/CBasicStatisticsTest.cc b/lib/maths/unittest/CBasicStatisticsTest.cc index c10d84257f..ab00ef5206 100644 --- a/lib/maths/unittest/CBasicStatisticsTest.cc +++ b/lib/maths/unittest/CBasicStatisticsTest.cc @@ -25,8 +25,7 @@ #include -namespace -{ +namespace { using TMeanAccumulator = ml::maths::CBasicStatistics::SSampleMean::TAccumulator; using TMeanVarAccumulator = ml::maths::CBasicStatistics::SSampleMeanVar::TAccumulator; @@ -40,67 +39,51 @@ using TMeanVarSkewAccumulatorVec = std::vector; const std::string TAG("a"); -struct SRestore -{ +struct SRestore { using result_type = bool; template - bool operator()(std::vector &restored, ml::core::CStateRestoreTraverser &traverser) const - { + bool operator()(std::vector& restored, ml::core::CStateRestoreTraverser& traverser) const { return ml::core::CPersistUtils::restore(TAG, restored, traverser); } template - bool operator()(T &restored, ml::core::CStateRestoreTraverser &traverser) const - { + bool operator()(T& restored, ml::core::CStateRestoreTraverser& traverser) const { return restored.fromDelimited(traverser.value()); } }; - } - -CppUnit::Test *CBasicStatisticsTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CBasicStatisticsTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CBasicStatisticsTest::testMean", - &CBasicStatisticsTest::testMean) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CBasicStatisticsTest::testCentralMoments", - &CBasicStatisticsTest::testCentralMoments) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CBasicStatisticsTest::testVectorCentralMoments", - &CBasicStatisticsTest::testVectorCentralMoments) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CBasicStatisticsTest::testCovariances", - &CBasicStatisticsTest::testCovariances) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CBasicStatisticsTest::testCovariancesLedoitWolf", - &CBasicStatisticsTest::testCovariancesLedoitWolf) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CBasicStatisticsTest::testMedian", - &CBasicStatisticsTest::testMedian) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CBasicStatisticsTest::testOrderStatistics", - &CBasicStatisticsTest::testOrderStatistics) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CBasicStatisticsTest::testMinMax", - &CBasicStatisticsTest::testMinMax) ); +CppUnit::Test* CBasicStatisticsTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CBasicStatisticsTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CBasicStatisticsTest::testMean", &CBasicStatisticsTest::testMean)); + suiteOfTests->addTest(new CppUnit::TestCaller("CBasicStatisticsTest::testCentralMoments", + &CBasicStatisticsTest::testCentralMoments)); + suiteOfTests->addTest(new CppUnit::TestCaller("CBasicStatisticsTest::testVectorCentralMoments", + &CBasicStatisticsTest::testVectorCentralMoments)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CBasicStatisticsTest::testCovariances", &CBasicStatisticsTest::testCovariances)); + suiteOfTests->addTest(new CppUnit::TestCaller("CBasicStatisticsTest::testCovariancesLedoitWolf", + &CBasicStatisticsTest::testCovariancesLedoitWolf)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CBasicStatisticsTest::testMedian", &CBasicStatisticsTest::testMedian)); + suiteOfTests->addTest(new CppUnit::TestCaller("CBasicStatisticsTest::testOrderStatistics", + &CBasicStatisticsTest::testOrderStatistics)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CBasicStatisticsTest::testMinMax", &CBasicStatisticsTest::testMinMax)); return suiteOfTests; } -void CBasicStatisticsTest::testMean() -{ +void CBasicStatisticsTest::testMean() { LOG_DEBUG("+---------------------------------+"); LOG_DEBUG("| CBasicStatisticsTest::testMean |"); LOG_DEBUG("+---------------------------------+"); - double sample[] = { 0.9, 10.0, 5.6, 1.23, -12.3, 445.2, 0.0, 1.2 }; + double sample[] = {0.9, 10.0, 5.6, 1.23, -12.3, 445.2, 0.0, 1.2}; - ml::maths::CBasicStatistics::TDoubleVec sampleVec(sample, sample+sizeof(sample)/sizeof(sample[0])); + ml::maths::CBasicStatistics::TDoubleVec sampleVec(sample, sample + sizeof(sample) / sizeof(sample[0])); double mean = ml::maths::CBasicStatistics::mean(sampleVec); @@ -108,8 +91,7 @@ void CBasicStatisticsTest::testMean() CPPUNIT_ASSERT_EQUAL(56.47875, mean); } -void CBasicStatisticsTest::testCentralMoments() -{ +void CBasicStatisticsTest::testCentralMoments() { LOG_DEBUG("+--------------------------------------------+"); LOG_DEBUG("| CBasicStatisticsTest::testCentralMoments |"); LOG_DEBUG("+--------------------------------------------+"); @@ -118,18 +100,15 @@ void CBasicStatisticsTest::testCentralMoments() LOG_DEBUG("Test mean double"); { - double samples[] = { 0.9, 10.0, 5.6, 1.23, -12.3, 7.2, 0.0, 1.2 }; + double samples[] = {0.9, 10.0, 5.6, 1.23, -12.3, 7.2, 0.0, 1.2}; TMeanAccumulator acc; - size_t count = sizeof(samples)/sizeof(samples[0]); + size_t count = sizeof(samples) / sizeof(samples[0]); acc = std::for_each(samples, samples + count, acc); - CPPUNIT_ASSERT_EQUAL(count, - static_cast(ml::maths::CBasicStatistics::count(acc))); + CPPUNIT_ASSERT_EQUAL(count, static_cast(ml::maths::CBasicStatistics::count(acc))); - CPPUNIT_ASSERT_DOUBLES_EQUAL(1.72875, - ml::maths::CBasicStatistics::mean(acc), - 0.000005); + CPPUNIT_ASSERT_DOUBLES_EQUAL(1.72875, ml::maths::CBasicStatistics::mean(acc), 0.000005); double n0 = ml::maths::CBasicStatistics::count(acc); ml::maths::CBasicStatistics::scale(0.5, acc); @@ -141,40 +120,32 @@ void CBasicStatisticsTest::testCentralMoments() { using TFloatMeanAccumulator = ml::maths::CBasicStatistics::SSampleMean::TAccumulator; - float samples[] = { 0.9f, 10.0f, 5.6f, 1.23f, -12.3f, 7.2f, 0.0f, 1.2f }; + float samples[] = {0.9f, 10.0f, 5.6f, 1.23f, -12.3f, 7.2f, 0.0f, 1.2f}; TFloatMeanAccumulator acc; - size_t count = sizeof(samples)/sizeof(samples[0]); + size_t count = sizeof(samples) / sizeof(samples[0]); acc = std::for_each(samples, samples + count, acc); - CPPUNIT_ASSERT_EQUAL(count, - static_cast(ml::maths::CBasicStatistics::count(acc))); + CPPUNIT_ASSERT_EQUAL(count, static_cast(ml::maths::CBasicStatistics::count(acc))); - CPPUNIT_ASSERT_DOUBLES_EQUAL(1.72875, - ml::maths::CBasicStatistics::mean(acc), - 0.000005); + CPPUNIT_ASSERT_DOUBLES_EQUAL(1.72875, ml::maths::CBasicStatistics::mean(acc), 0.000005); } LOG_DEBUG("Test mean and variance"); { - double samples[] = { 0.9, 10.0, 5.6, 1.23, -12.3, 7.2, 0.0, 1.2 }; + double samples[] = {0.9, 10.0, 5.6, 1.23, -12.3, 7.2, 0.0, 1.2}; TMeanVarAccumulator acc; - size_t count = sizeof(samples)/sizeof(samples[0]); + size_t count = sizeof(samples) / sizeof(samples[0]); acc = std::for_each(samples, samples + count, acc); - CPPUNIT_ASSERT_EQUAL(count, - static_cast(ml::maths::CBasicStatistics::count(acc))); + CPPUNIT_ASSERT_EQUAL(count, static_cast(ml::maths::CBasicStatistics::count(acc))); - CPPUNIT_ASSERT_DOUBLES_EQUAL(1.72875, - ml::maths::CBasicStatistics::mean(acc), - 0.000005); + CPPUNIT_ASSERT_DOUBLES_EQUAL(1.72875, ml::maths::CBasicStatistics::mean(acc), 0.000005); - CPPUNIT_ASSERT_DOUBLES_EQUAL(44.90633, - ml::maths::CBasicStatistics::variance(acc), - 0.000005); + CPPUNIT_ASSERT_DOUBLES_EQUAL(44.90633, ml::maths::CBasicStatistics::variance(acc), 0.000005); double n0 = ml::maths::CBasicStatistics::count(acc); ml::maths::CBasicStatistics::scale(0.5, acc); @@ -184,27 +155,20 @@ void CBasicStatisticsTest::testCentralMoments() LOG_DEBUG("Test mean, variance and skew"); { - double samples[] = { 0.9, 10.0, 5.6, 1.23, -12.3, 7.2, 0.0, 1.2 }; + double samples[] = {0.9, 10.0, 5.6, 1.23, -12.3, 7.2, 0.0, 1.2}; TMeanVarSkewAccumulator acc; - size_t count = sizeof(samples)/sizeof(samples[0]); + size_t count = sizeof(samples) / sizeof(samples[0]); acc = std::for_each(samples, samples + count, acc); - CPPUNIT_ASSERT_EQUAL(count, - static_cast(ml::maths::CBasicStatistics::count(acc))); + CPPUNIT_ASSERT_EQUAL(count, static_cast(ml::maths::CBasicStatistics::count(acc))); - CPPUNIT_ASSERT_DOUBLES_EQUAL(1.72875, - ml::maths::CBasicStatistics::mean(acc), - 0.000005); + CPPUNIT_ASSERT_DOUBLES_EQUAL(1.72875, ml::maths::CBasicStatistics::mean(acc), 0.000005); - CPPUNIT_ASSERT_DOUBLES_EQUAL(44.90633, - ml::maths::CBasicStatistics::variance(acc), - 0.000005); + CPPUNIT_ASSERT_DOUBLES_EQUAL(44.90633, ml::maths::CBasicStatistics::variance(acc), 0.000005); - CPPUNIT_ASSERT_DOUBLES_EQUAL(-0.82216, - ml::maths::CBasicStatistics::skewness(acc), - 0.000005); + CPPUNIT_ASSERT_DOUBLES_EQUAL(-0.82216, ml::maths::CBasicStatistics::skewness(acc), 0.000005); double n0 = ml::maths::CBasicStatistics::count(acc); ml::maths::CBasicStatistics::scale(0.5, acc); @@ -214,81 +178,63 @@ void CBasicStatisticsTest::testCentralMoments() LOG_DEBUG("Test weighted update"); { - double samples[] = { 0.9, 1.0, 2.3, 1.5 }; - std::size_t weights[] = { 1, 4, 2, 3 }; + double samples[] = {0.9, 1.0, 2.3, 1.5}; + std::size_t weights[] = {1, 4, 2, 3}; { TMeanAccumulator acc1; TMeanAccumulator acc2; - for (size_t i = 0; i < boost::size(samples); ++i) - { + for (size_t i = 0; i < boost::size(samples); ++i) { acc1.add(samples[i], static_cast(weights[i])); - for (std::size_t j = 0u; j < weights[i]; ++j) - { + for (std::size_t j = 0u; j < weights[i]; ++j) { acc2.add(samples[i]); } } - CPPUNIT_ASSERT_DOUBLES_EQUAL(ml::maths::CBasicStatistics::mean(acc1), - ml::maths::CBasicStatistics::mean(acc2), - 1e-10); + CPPUNIT_ASSERT_DOUBLES_EQUAL(ml::maths::CBasicStatistics::mean(acc1), ml::maths::CBasicStatistics::mean(acc2), 1e-10); } { TMeanVarAccumulator acc1; TMeanVarAccumulator acc2; - for (size_t i = 0; i < boost::size(samples); ++i) - { + for (size_t i = 0; i < boost::size(samples); ++i) { acc1.add(samples[i], static_cast(weights[i])); - for (std::size_t j = 0u; j < weights[i]; ++j) - { + for (std::size_t j = 0u; j < weights[i]; ++j) { acc2.add(samples[i]); } } - CPPUNIT_ASSERT_DOUBLES_EQUAL(ml::maths::CBasicStatistics::mean(acc1), - ml::maths::CBasicStatistics::mean(acc2), - 1e-10); - CPPUNIT_ASSERT_DOUBLES_EQUAL(ml::maths::CBasicStatistics::variance(acc1), - ml::maths::CBasicStatistics::variance(acc2), - 1e-10); + CPPUNIT_ASSERT_DOUBLES_EQUAL(ml::maths::CBasicStatistics::mean(acc1), ml::maths::CBasicStatistics::mean(acc2), 1e-10); + CPPUNIT_ASSERT_DOUBLES_EQUAL(ml::maths::CBasicStatistics::variance(acc1), ml::maths::CBasicStatistics::variance(acc2), 1e-10); } { TMeanVarSkewAccumulator acc1; TMeanVarSkewAccumulator acc2; - for (size_t i = 0; i < boost::size(samples); ++i) - { + for (size_t i = 0; i < boost::size(samples); ++i) { acc1.add(samples[i], static_cast(weights[i])); - for (std::size_t j = 0u; j < weights[i]; ++j) - { + for (std::size_t j = 0u; j < weights[i]; ++j) { acc2.add(samples[i]); } } - CPPUNIT_ASSERT_DOUBLES_EQUAL(ml::maths::CBasicStatistics::mean(acc1), - ml::maths::CBasicStatistics::mean(acc2), - 1e-10); - CPPUNIT_ASSERT_DOUBLES_EQUAL(ml::maths::CBasicStatistics::variance(acc1), - ml::maths::CBasicStatistics::variance(acc2), - 1e-10); - CPPUNIT_ASSERT_DOUBLES_EQUAL(ml::maths::CBasicStatistics::skewness(acc1), - ml::maths::CBasicStatistics::skewness(acc2), - 1e-10); + CPPUNIT_ASSERT_DOUBLES_EQUAL(ml::maths::CBasicStatistics::mean(acc1), ml::maths::CBasicStatistics::mean(acc2), 1e-10); + CPPUNIT_ASSERT_DOUBLES_EQUAL(ml::maths::CBasicStatistics::variance(acc1), ml::maths::CBasicStatistics::variance(acc2), 1e-10); + CPPUNIT_ASSERT_DOUBLES_EQUAL(ml::maths::CBasicStatistics::skewness(acc1), ml::maths::CBasicStatistics::skewness(acc2), 1e-10); } } LOG_DEBUG("Test addition"); { // Test addition. - double samples1[] = { 0.9, 10.0, 5.6, 1.23 }; - double samples2[] = { -12.3, 7.2, 0.0, 1.2 }; + double samples1[] = {0.9, 10.0, 5.6, 1.23}; + double samples2[] = {-12.3, 7.2, 0.0, 1.2}; - size_t count1 = sizeof(samples1)/sizeof(samples1[0]); - size_t count2 = sizeof(samples2)/sizeof(samples2[0]); + size_t count1 = sizeof(samples1) / sizeof(samples1[0]); + size_t count2 = sizeof(samples2) / sizeof(samples2[0]); { TMeanAccumulator acc1; @@ -297,12 +243,9 @@ void CBasicStatisticsTest::testCentralMoments() acc1 = std::for_each(samples1, samples1 + count1, acc1); acc2 = std::for_each(samples2, samples2 + count2, acc2); - CPPUNIT_ASSERT_EQUAL(count1 + count2, - static_cast(ml::maths::CBasicStatistics::count(acc1 + acc2))); + CPPUNIT_ASSERT_EQUAL(count1 + count2, static_cast(ml::maths::CBasicStatistics::count(acc1 + acc2))); - CPPUNIT_ASSERT_DOUBLES_EQUAL(1.72875, - ml::maths::CBasicStatistics::mean(acc1 + acc2), - 0.000005); + CPPUNIT_ASSERT_DOUBLES_EQUAL(1.72875, ml::maths::CBasicStatistics::mean(acc1 + acc2), 0.000005); } { @@ -312,16 +255,11 @@ void CBasicStatisticsTest::testCentralMoments() acc1 = std::for_each(samples1, samples1 + count1, acc1); acc2 = std::for_each(samples2, samples2 + count2, acc2); - CPPUNIT_ASSERT_EQUAL(count1 + count2, - static_cast(ml::maths::CBasicStatistics::count(acc1 + acc2))); + CPPUNIT_ASSERT_EQUAL(count1 + count2, static_cast(ml::maths::CBasicStatistics::count(acc1 + acc2))); - CPPUNIT_ASSERT_DOUBLES_EQUAL(1.72875, - ml::maths::CBasicStatistics::mean(acc1 + acc2), - 0.000005); + CPPUNIT_ASSERT_DOUBLES_EQUAL(1.72875, ml::maths::CBasicStatistics::mean(acc1 + acc2), 0.000005); - CPPUNIT_ASSERT_DOUBLES_EQUAL(44.90633, - ml::maths::CBasicStatistics::variance(acc1 + acc2), - 0.000005); + CPPUNIT_ASSERT_DOUBLES_EQUAL(44.90633, ml::maths::CBasicStatistics::variance(acc1 + acc2), 0.000005); } { @@ -331,20 +269,13 @@ void CBasicStatisticsTest::testCentralMoments() acc1 = std::for_each(samples1, samples1 + count1, acc1); acc2 = std::for_each(samples2, samples2 + count2, acc2); - CPPUNIT_ASSERT_EQUAL(count1 + count2, - static_cast(ml::maths::CBasicStatistics::count(acc1 + acc2))); + CPPUNIT_ASSERT_EQUAL(count1 + count2, static_cast(ml::maths::CBasicStatistics::count(acc1 + acc2))); - CPPUNIT_ASSERT_DOUBLES_EQUAL(1.72875, - ml::maths::CBasicStatistics::mean(acc1 + acc2), - 0.000005); + CPPUNIT_ASSERT_DOUBLES_EQUAL(1.72875, ml::maths::CBasicStatistics::mean(acc1 + acc2), 0.000005); - CPPUNIT_ASSERT_DOUBLES_EQUAL(44.90633, - ml::maths::CBasicStatistics::variance(acc1 + acc2), - 0.000005); + CPPUNIT_ASSERT_DOUBLES_EQUAL(44.90633, ml::maths::CBasicStatistics::variance(acc1 + acc2), 0.000005); - CPPUNIT_ASSERT_DOUBLES_EQUAL(-0.82216, - ml::maths::CBasicStatistics::skewness(acc1 + acc2),\ - 0.000005); + CPPUNIT_ASSERT_DOUBLES_EQUAL(-0.82216, ml::maths::CBasicStatistics::skewness(acc1 + acc2), 0.000005); } } @@ -360,31 +291,22 @@ void CBasicStatisticsTest::testCentralMoments() TDoubleVec samples; rng.generateNormalSamples(2.0, 3.0, 40u, samples); - for (std::size_t j = 1u; j < samples.size(); ++j) - { + for (std::size_t j = 1u; j < samples.size(); ++j) { LOG_DEBUG("split = " << j << "/" << samples.size() - j); - for (std::size_t i = 0u; i < j; ++i) - { + for (std::size_t i = 0u; i < j; ++i) { acc1.add(samples[i]); } - for (std::size_t i = j; i < samples.size(); ++i) - { + for (std::size_t i = j; i < samples.size(); ++i) { acc2.add(samples[i]); } TMeanAccumulator sum = acc1 + acc2; - CPPUNIT_ASSERT_EQUAL(ml::maths::CBasicStatistics::count(acc1), - ml::maths::CBasicStatistics::count(sum - acc2)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(ml::maths::CBasicStatistics::mean(acc1), - ml::maths::CBasicStatistics::mean(sum - acc2), - 1e-10); - CPPUNIT_ASSERT_EQUAL(ml::maths::CBasicStatistics::count(acc2), - ml::maths::CBasicStatistics::count(sum - acc1)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(ml::maths::CBasicStatistics::mean(acc2), - ml::maths::CBasicStatistics::mean(sum - acc1), - 1e-10); + CPPUNIT_ASSERT_EQUAL(ml::maths::CBasicStatistics::count(acc1), ml::maths::CBasicStatistics::count(sum - acc2)); + CPPUNIT_ASSERT_DOUBLES_EQUAL(ml::maths::CBasicStatistics::mean(acc1), ml::maths::CBasicStatistics::mean(sum - acc2), 1e-10); + CPPUNIT_ASSERT_EQUAL(ml::maths::CBasicStatistics::count(acc2), ml::maths::CBasicStatistics::count(sum - acc1)); + CPPUNIT_ASSERT_DOUBLES_EQUAL(ml::maths::CBasicStatistics::mean(acc2), ml::maths::CBasicStatistics::mean(sum - acc1), 1e-10); } } LOG_DEBUG("Test mean and variance"); @@ -395,37 +317,26 @@ void CBasicStatisticsTest::testCentralMoments() TDoubleVec samples; rng.generateGammaSamples(3.0, 3.0, 40u, samples); - for (std::size_t j = 1u; j < samples.size(); ++j) - { + for (std::size_t j = 1u; j < samples.size(); ++j) { LOG_DEBUG("split = " << j << "/" << samples.size() - j); - for (std::size_t i = 0u; i < j; ++i) - { + for (std::size_t i = 0u; i < j; ++i) { acc1.add(samples[i]); } - for (std::size_t i = j; i < samples.size(); ++i) - { + for (std::size_t i = j; i < samples.size(); ++i) { acc2.add(samples[i]); } TMeanVarAccumulator sum = acc1 + acc2; - CPPUNIT_ASSERT_EQUAL(ml::maths::CBasicStatistics::count(acc1), - ml::maths::CBasicStatistics::count(sum - acc2)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(ml::maths::CBasicStatistics::mean(acc1), - ml::maths::CBasicStatistics::mean(sum - acc2), - 1e-10); - CPPUNIT_ASSERT_DOUBLES_EQUAL(ml::maths::CBasicStatistics::variance(acc1), - ml::maths::CBasicStatistics::variance(sum - acc2), - 1e-10); - CPPUNIT_ASSERT_EQUAL(ml::maths::CBasicStatistics::count(acc2), - ml::maths::CBasicStatistics::count(sum - acc1)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(ml::maths::CBasicStatistics::mean(acc2), - ml::maths::CBasicStatistics::mean(sum - acc1), - 1e-10); - CPPUNIT_ASSERT_DOUBLES_EQUAL(ml::maths::CBasicStatistics::variance(acc2), - ml::maths::CBasicStatistics::variance(sum - acc1), - 1e-10); + CPPUNIT_ASSERT_EQUAL(ml::maths::CBasicStatistics::count(acc1), ml::maths::CBasicStatistics::count(sum - acc2)); + CPPUNIT_ASSERT_DOUBLES_EQUAL(ml::maths::CBasicStatistics::mean(acc1), ml::maths::CBasicStatistics::mean(sum - acc2), 1e-10); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + ml::maths::CBasicStatistics::variance(acc1), ml::maths::CBasicStatistics::variance(sum - acc2), 1e-10); + CPPUNIT_ASSERT_EQUAL(ml::maths::CBasicStatistics::count(acc2), ml::maths::CBasicStatistics::count(sum - acc1)); + CPPUNIT_ASSERT_DOUBLES_EQUAL(ml::maths::CBasicStatistics::mean(acc2), ml::maths::CBasicStatistics::mean(sum - acc1), 1e-10); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + ml::maths::CBasicStatistics::variance(acc2), ml::maths::CBasicStatistics::variance(sum - acc1), 1e-10); } } LOG_DEBUG("Test mean, variance and skew"); @@ -436,52 +347,39 @@ void CBasicStatisticsTest::testCentralMoments() TDoubleVec samples; rng.generateLogNormalSamples(1.1, 1.0, 40u, samples); - for (std::size_t j = 1u; j < samples.size(); ++j) - { + for (std::size_t j = 1u; j < samples.size(); ++j) { LOG_DEBUG("split = " << j << "/" << samples.size() - j); - for (std::size_t i = 0u; i < j; ++i) - { + for (std::size_t i = 0u; i < j; ++i) { acc1.add(samples[i]); } - for (std::size_t i = j; i < samples.size(); ++i) - { + for (std::size_t i = j; i < samples.size(); ++i) { acc2.add(samples[i]); } TMeanVarSkewAccumulator sum = acc1 + acc2; - CPPUNIT_ASSERT_EQUAL(ml::maths::CBasicStatistics::count(acc1), - ml::maths::CBasicStatistics::count(sum - acc2)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(ml::maths::CBasicStatistics::mean(acc1), - ml::maths::CBasicStatistics::mean(sum - acc2), - 1e-10); - CPPUNIT_ASSERT_DOUBLES_EQUAL(ml::maths::CBasicStatistics::variance(acc1), - ml::maths::CBasicStatistics::variance(sum - acc2), - 1e-10); - CPPUNIT_ASSERT_DOUBLES_EQUAL(ml::maths::CBasicStatistics::skewness(acc1), - ml::maths::CBasicStatistics::skewness(sum - acc2), - 1e-10); - CPPUNIT_ASSERT_EQUAL(ml::maths::CBasicStatistics::count(acc2), - ml::maths::CBasicStatistics::count(sum - acc1)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(ml::maths::CBasicStatistics::mean(acc2), - ml::maths::CBasicStatistics::mean(sum - acc1), - 1e-10); - CPPUNIT_ASSERT_DOUBLES_EQUAL(ml::maths::CBasicStatistics::variance(acc2), - ml::maths::CBasicStatistics::variance(sum - acc1), - 1e-10); - CPPUNIT_ASSERT_DOUBLES_EQUAL(ml::maths::CBasicStatistics::skewness(acc2), - ml::maths::CBasicStatistics::skewness(sum - acc1), - 1e-10); + CPPUNIT_ASSERT_EQUAL(ml::maths::CBasicStatistics::count(acc1), ml::maths::CBasicStatistics::count(sum - acc2)); + CPPUNIT_ASSERT_DOUBLES_EQUAL(ml::maths::CBasicStatistics::mean(acc1), ml::maths::CBasicStatistics::mean(sum - acc2), 1e-10); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + ml::maths::CBasicStatistics::variance(acc1), ml::maths::CBasicStatistics::variance(sum - acc2), 1e-10); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + ml::maths::CBasicStatistics::skewness(acc1), ml::maths::CBasicStatistics::skewness(sum - acc2), 1e-10); + CPPUNIT_ASSERT_EQUAL(ml::maths::CBasicStatistics::count(acc2), ml::maths::CBasicStatistics::count(sum - acc1)); + CPPUNIT_ASSERT_DOUBLES_EQUAL(ml::maths::CBasicStatistics::mean(acc2), ml::maths::CBasicStatistics::mean(sum - acc1), 1e-10); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + ml::maths::CBasicStatistics::variance(acc2), ml::maths::CBasicStatistics::variance(sum - acc1), 1e-10); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + ml::maths::CBasicStatistics::skewness(acc2), ml::maths::CBasicStatistics::skewness(sum - acc1), 1e-10); } } } - LOG_DEBUG("test vector") - { + LOG_DEBUG("test vector") { using TVectorMeanAccumulator = ml::maths::CBasicStatistics::SSampleMean>::TAccumulator; using TVectorMeanVarAccumulator = ml::maths::CBasicStatistics::SSampleMeanVar>::TAccumulator; - using TVectorMeanVarSkewAccumulator = ml::maths::CBasicStatistics::SSampleMeanVarSkew>::TAccumulator; + using TVectorMeanVarSkewAccumulator = + ml::maths::CBasicStatistics::SSampleMeanVarSkew>::TAccumulator; ml::test::CRandomNumbers rng; @@ -494,23 +392,18 @@ void CBasicStatisticsTest::testCentralMoments() TMeanAccumulator means[4]; TVectorMeanAccumulator vectorMean; - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { ml::maths::CVectorNx1 v; - for (std::size_t j = 0u; j < 4; ++i, ++j) - { + for (std::size_t j = 0u; j < 4; ++i, ++j) { means[j].add(samples[i]); v(j) = samples[i]; } LOG_DEBUG("v = " << v); vectorMean.add(v); - CPPUNIT_ASSERT_EQUAL(ml::maths::CBasicStatistics::count(means[0]), - ml::maths::CBasicStatistics::count(vectorMean)); - for (std::size_t j = 0u; j < 4; ++j) - { - CPPUNIT_ASSERT_EQUAL(ml::maths::CBasicStatistics::mean(means[j]), - (ml::maths::CBasicStatistics::mean(vectorMean))(j)); + CPPUNIT_ASSERT_EQUAL(ml::maths::CBasicStatistics::count(means[0]), ml::maths::CBasicStatistics::count(vectorMean)); + for (std::size_t j = 0u; j < 4; ++j) { + CPPUNIT_ASSERT_EQUAL(ml::maths::CBasicStatistics::mean(means[j]), (ml::maths::CBasicStatistics::mean(vectorMean))(j)); } } } @@ -523,11 +416,9 @@ void CBasicStatisticsTest::testCentralMoments() TMeanVarAccumulator meansAndVariances[4]; TVectorMeanVarAccumulator vectorMeanAndVariances; - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { ml::maths::CVectorNx1 v; - for (std::size_t j = 0u; j < 4; ++i, ++j) - { + for (std::size_t j = 0u; j < 4; ++i, ++j) { meansAndVariances[j].add(samples[i]); v(j) = samples[i]; } @@ -536,8 +427,7 @@ void CBasicStatisticsTest::testCentralMoments() CPPUNIT_ASSERT_EQUAL(ml::maths::CBasicStatistics::count(meansAndVariances[0]), ml::maths::CBasicStatistics::count(vectorMeanAndVariances)); - for (std::size_t j = 0u; j < 4; ++j) - { + for (std::size_t j = 0u; j < 4; ++j) { CPPUNIT_ASSERT_EQUAL(ml::maths::CBasicStatistics::mean(meansAndVariances[j]), (ml::maths::CBasicStatistics::mean(vectorMeanAndVariances))(j)); CPPUNIT_ASSERT_EQUAL(ml::maths::CBasicStatistics::variance(meansAndVariances[j]), @@ -554,11 +444,9 @@ void CBasicStatisticsTest::testCentralMoments() TMeanVarSkewAccumulator meansVariancesAndSkews[4]; TVectorMeanVarSkewAccumulator vectorMeanVarianceAndSkew; - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { ml::maths::CVectorNx1 v; - for (std::size_t j = 0u; j < 4; ++i, ++j) - { + for (std::size_t j = 0u; j < 4; ++i, ++j) { meansVariancesAndSkews[j].add(samples[i]); v(j) = samples[i]; } @@ -567,8 +455,7 @@ void CBasicStatisticsTest::testCentralMoments() CPPUNIT_ASSERT_EQUAL(ml::maths::CBasicStatistics::count(meansVariancesAndSkews[0]), ml::maths::CBasicStatistics::count(vectorMeanVarianceAndSkew)); - for (std::size_t j = 0u; j < 4; ++j) - { + for (std::size_t j = 0u; j < 4; ++j) { CPPUNIT_ASSERT_EQUAL(ml::maths::CBasicStatistics::mean(meansVariancesAndSkews[j]), (ml::maths::CBasicStatistics::mean(vectorMeanVarianceAndSkew))(j)); CPPUNIT_ASSERT_EQUAL(ml::maths::CBasicStatistics::variance(meansVariancesAndSkews[j]), @@ -602,8 +489,7 @@ void CBasicStatisticsTest::testCentralMoments() CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind(SRestore(), boost::ref(restored), _1))); LOG_DEBUG("restored = " << ml::core::CContainerPrinter::print(restored)); CPPUNIT_ASSERT_EQUAL(moments.size(), restored.size()); - for (std::size_t i = 0u; i < restored.size(); ++i) - { + for (std::size_t i = 0u; i < restored.size(); ++i) { CPPUNIT_ASSERT_EQUAL(moments[i].checksum(), restored[i].checksum()); } } @@ -630,8 +516,7 @@ void CBasicStatisticsTest::testCentralMoments() CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind(SRestore(), boost::ref(restored), _1))); LOG_DEBUG("restored = " << ml::core::CContainerPrinter::print(restored)); CPPUNIT_ASSERT_EQUAL(moments.size(), restored.size()); - for (std::size_t i = 0u; i < restored.size(); ++i) - { + for (std::size_t i = 0u; i < restored.size(); ++i) { CPPUNIT_ASSERT_EQUAL(moments[i].checksum(), restored[i].checksum()); } } @@ -656,8 +541,7 @@ void CBasicStatisticsTest::testCentralMoments() CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind(SRestore(), boost::ref(restored), _1))); LOG_DEBUG("restored = " << ml::core::CContainerPrinter::print(restored)); CPPUNIT_ASSERT_EQUAL(moments.size(), restored.size()); - for (std::size_t i = 0u; i < restored.size(); ++i) - { + for (std::size_t i = 0u; i < restored.size(); ++i) { CPPUNIT_ASSERT_EQUAL(moments[i].checksum(), restored[i].checksum()); } } @@ -685,8 +569,7 @@ void CBasicStatisticsTest::testCentralMoments() CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind(SRestore(), boost::ref(restored), _1))); LOG_DEBUG("restored = " << ml::core::CContainerPrinter::print(restored)); CPPUNIT_ASSERT_EQUAL(moments.size(), restored.size()); - for (std::size_t i = 0u; i < restored.size(); ++i) - { + for (std::size_t i = 0u; i < restored.size(); ++i) { CPPUNIT_ASSERT_EQUAL(moments[i].checksum(), restored[i].checksum()); } } @@ -711,8 +594,7 @@ void CBasicStatisticsTest::testCentralMoments() CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind(SRestore(), boost::ref(restored), _1))); LOG_DEBUG("restored = " << ml::core::CContainerPrinter::print(restored)); CPPUNIT_ASSERT_EQUAL(moments.size(), restored.size()); - for (std::size_t i = 0u; i < restored.size(); ++i) - { + for (std::size_t i = 0u; i < restored.size(); ++i) { CPPUNIT_ASSERT_EQUAL(moments[i].checksum(), restored[i].checksum()); } } @@ -740,8 +622,7 @@ void CBasicStatisticsTest::testCentralMoments() CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind(SRestore(), boost::ref(restored), _1))); LOG_DEBUG("restored = " << ml::core::CContainerPrinter::print(restored)); CPPUNIT_ASSERT_EQUAL(moments.size(), restored.size()); - for (std::size_t i = 0u; i < restored.size(); ++i) - { + for (std::size_t i = 0u; i < restored.size(); ++i) { CPPUNIT_ASSERT_EQUAL(moments[i].checksum(), restored[i].checksum()); } } @@ -753,8 +634,7 @@ void CBasicStatisticsTest::testCentralMoments() CPPUNIT_ASSERT_EQUAL(true, ml::core::memory_detail::SDynamicSizeAlwaysZero::value()); } -void CBasicStatisticsTest::testVectorCentralMoments() -{ +void CBasicStatisticsTest::testVectorCentralMoments() { LOG_DEBUG("+--------------------------------------------------+"); LOG_DEBUG("| CBasicStatisticsTest::testVectorCentralMoments |"); LOG_DEBUG("+--------------------------------------------------+"); @@ -765,8 +645,16 @@ void CBasicStatisticsTest::testVectorCentralMoments() { TMeanAccumulator2Vec moments1(2); TMeanAccumulatorVec moments2(2); - moments1[0].add(2.0); moments1[0].add(5.0); moments1[0].add(2.9); moments1[1].add(4.0); moments1[1].add(3.0); - moments2[0].add(2.0); moments2[0].add(5.0); moments2[0].add(2.9); moments2[1].add(4.0); moments2[1].add(3.0); + moments1[0].add(2.0); + moments1[0].add(5.0); + moments1[0].add(2.9); + moments1[1].add(4.0); + moments1[1].add(3.0); + moments2[0].add(2.0); + moments2[0].add(5.0); + moments2[0].add(2.9); + moments2[1].add(4.0); + moments2[1].add(3.0); TDouble2Vec counts1 = ml::maths::CBasicStatistics::count(moments1); TDouble2Vec means1 = ml::maths::CBasicStatistics::mean(moments1); TDoubleVec counts2 = ml::maths::CBasicStatistics::count(moments2); @@ -779,8 +667,16 @@ void CBasicStatisticsTest::testVectorCentralMoments() { TMeanVarAccumulator2Vec moments1(2); TMeanVarAccumulatorVec moments2(2); - moments1[0].add(2.0); moments1[0].add(4.0); moments1[1].add(3.0); moments1[1].add(4.0); moments1[1].add(5.0); - moments2[0].add(2.0); moments2[0].add(4.0); moments2[1].add(3.0); moments2[1].add(4.0); moments2[1].add(5.0); + moments1[0].add(2.0); + moments1[0].add(4.0); + moments1[1].add(3.0); + moments1[1].add(4.0); + moments1[1].add(5.0); + moments2[0].add(2.0); + moments2[0].add(4.0); + moments2[1].add(3.0); + moments2[1].add(4.0); + moments2[1].add(5.0); TDouble2Vec counts1 = ml::maths::CBasicStatistics::count(moments1); TDouble2Vec means1 = ml::maths::CBasicStatistics::mean(moments1); TDouble2Vec vars1 = ml::maths::CBasicStatistics::variance(moments1); @@ -801,8 +697,16 @@ void CBasicStatisticsTest::testVectorCentralMoments() { TMeanVarSkewAccumulator2Vec moments1(2); TMeanVarSkewAccumulatorVec moments2(2); - moments1[0].add(2.0); moments1[0].add(4.0); moments1[1].add(2.0); moments1[1].add(5.0); moments1[1].add(5.0); - moments2[0].add(2.0); moments2[0].add(4.0); moments2[1].add(2.0); moments2[1].add(5.0); moments2[1].add(5.0); + moments1[0].add(2.0); + moments1[0].add(4.0); + moments1[1].add(2.0); + moments1[1].add(5.0); + moments1[1].add(5.0); + moments2[0].add(2.0); + moments2[0].add(4.0); + moments2[1].add(2.0); + moments2[1].add(5.0); + moments2[1].add(5.0); TDouble2Vec counts1 = ml::maths::CBasicStatistics::count(moments1); TDouble2Vec means1 = ml::maths::CBasicStatistics::mean(moments1); TDouble2Vec vars1 = ml::maths::CBasicStatistics::variance(moments1); @@ -826,60 +730,30 @@ void CBasicStatisticsTest::testVectorCentralMoments() } } -void CBasicStatisticsTest::testCovariances() -{ +void CBasicStatisticsTest::testCovariances() { LOG_DEBUG("+-----------------------------------------+"); LOG_DEBUG("| CBasicStatisticsTest::testCovariances |"); LOG_DEBUG("+-----------------------------------------+"); LOG_DEBUG("N(3,I)"); { - const double raw[][3] = - { - { 2.58894, 2.87211, 1.62609 }, - { 3.88246, 2.98577, 2.70981 }, - { 2.03317, 3.33715, 2.93560 }, - { 3.30100, 4.38844, 1.65705 }, - { 2.12426, 2.21127, 2.57000 }, - { 4.21041, 4.20745, 1.90752 }, - { 3.56139, 3.14454, 0.89316 }, - { 4.29444, 1.58715, 3.58402 }, - { 3.06731, 3.91581, 2.85951 }, - { 3.62798, 2.28786, 2.89994 }, - { 2.05834, 2.96137, 3.57654 }, - { 2.72185, 3.36003, 3.09708 }, - { 0.94924, 2.19797, 3.30941 }, - { 2.11159, 2.49182, 3.56793 }, - { 3.10364, 0.32747, 3.62487 }, - { 2.28235, 3.83542, 3.35942 }, - { 3.30549, 2.95951, 2.97006 }, - { 3.05787, 2.94188, 2.64095 }, - { 3.98245, 2.02892, 3.07909 }, - { 3.81189, 2.89389, 3.81389 }, - { 3.32811, 3.88484, 4.17866 }, - { 2.06964, 3.80683, 2.46835 }, - { 4.58989, 2.00321, 1.93029 }, - { 2.51484, 4.46106, 3.71248 }, - { 3.30729, 2.44768, 3.43241 }, - { 3.52222, 2.91724, 1.49631 }, - { 1.71826, 4.79752, 4.38398 }, - { 3.14173, 3.16237, 2.49654 }, - { 3.26538, 2.21858, 5.05477 }, - { 2.88352, 1.94396, 3.08744 } - }; - - const double expectedMean[] = { 3.013898, 2.952637, 2.964104 }; - const double expectedCovariances[][3] = - { - { 0.711903, -0.174535, -0.199460 }, - { -0.174535, 0.935285, -0.091192 }, - { -0.199460, -0.091192, 0.833710 } - }; + const double raw[][3] = { + {2.58894, 2.87211, 1.62609}, {3.88246, 2.98577, 2.70981}, {2.03317, 3.33715, 2.93560}, {3.30100, 4.38844, 1.65705}, + {2.12426, 2.21127, 2.57000}, {4.21041, 4.20745, 1.90752}, {3.56139, 3.14454, 0.89316}, {4.29444, 1.58715, 3.58402}, + {3.06731, 3.91581, 2.85951}, {3.62798, 2.28786, 2.89994}, {2.05834, 2.96137, 3.57654}, {2.72185, 3.36003, 3.09708}, + {0.94924, 2.19797, 3.30941}, {2.11159, 2.49182, 3.56793}, {3.10364, 0.32747, 3.62487}, {2.28235, 3.83542, 3.35942}, + {3.30549, 2.95951, 2.97006}, {3.05787, 2.94188, 2.64095}, {3.98245, 2.02892, 3.07909}, {3.81189, 2.89389, 3.81389}, + {3.32811, 3.88484, 4.17866}, {2.06964, 3.80683, 2.46835}, {4.58989, 2.00321, 1.93029}, {2.51484, 4.46106, 3.71248}, + {3.30729, 2.44768, 3.43241}, {3.52222, 2.91724, 1.49631}, {1.71826, 4.79752, 4.38398}, {3.14173, 3.16237, 2.49654}, + {3.26538, 2.21858, 5.05477}, {2.88352, 1.94396, 3.08744}}; + + const double expectedMean[] = {3.013898, 2.952637, 2.964104}; + const double expectedCovariances[][3] = { + {0.711903, -0.174535, -0.199460}, {-0.174535, 0.935285, -0.091192}, {-0.199460, -0.091192, 0.833710}}; ml::maths::CBasicStatistics::SSampleCovariances covariances; - for (std::size_t i = 0u; i < boost::size(raw); ++i) - { + for (std::size_t i = 0u; i < boost::size(raw); ++i) { ml::maths::CVectorNx1 v(raw[i]); LOG_DEBUG("v = " << v); covariances.add(v); @@ -889,36 +763,30 @@ void CBasicStatisticsTest::testCovariances() LOG_DEBUG("mean = " << ml::maths::CBasicStatistics::mean(covariances)); LOG_DEBUG("covariances = " << ml::maths::CBasicStatistics::covariances(covariances)); - CPPUNIT_ASSERT_EQUAL(static_cast(boost::size(raw)), - ml::maths::CBasicStatistics::count(covariances)); - for (std::size_t i = 0u; i < 3; ++i) - { - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMean[i], - (ml::maths::CBasicStatistics::mean(covariances))(i), - 2e-6); - for (std::size_t j = 0u; j < 3; ++j) - { - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedCovariances[i][j], - (ml::maths::CBasicStatistics::covariances(covariances))(i, j), - 2e-6); + CPPUNIT_ASSERT_EQUAL(static_cast(boost::size(raw)), ml::maths::CBasicStatistics::count(covariances)); + for (std::size_t i = 0u; i < 3; ++i) { + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMean[i], (ml::maths::CBasicStatistics::mean(covariances))(i), 2e-6); + for (std::size_t j = 0u; j < 3; ++j) { + CPPUNIT_ASSERT_DOUBLES_EQUAL( + expectedCovariances[i][j], (ml::maths::CBasicStatistics::covariances(covariances))(i, j), 2e-6); } } - bool dynamicSizeAlwaysZero = ml::core::memory_detail::SDynamicSizeAlwaysZero< - ml::maths::CBasicStatistics::SSampleCovariances>::value(); + bool dynamicSizeAlwaysZero = + ml::core::memory_detail::SDynamicSizeAlwaysZero>::value(); CPPUNIT_ASSERT_EQUAL(true, dynamicSizeAlwaysZero); } { using TVectorVec = std::vector>; - double mean_[] = { 1.0, 3.0, 2.0, 7.0 }; + double mean_[] = {1.0, 3.0, 2.0, 7.0}; ml::maths::CVectorNx1 mean(mean_); - double covariances1_[] = { 1.0, 1.0, 1.0, 1.0 }; - double covariances2_[] = { -1.0, 1.0, 0.0, 0.0 }; - double covariances3_[] = { -1.0, -1.0, 2.0, 0.0 }; - double covariances4_[] = { -1.0, -1.0, -1.0, 3.0 }; + double covariances1_[] = {1.0, 1.0, 1.0, 1.0}; + double covariances2_[] = {-1.0, 1.0, 0.0, 0.0}; + double covariances3_[] = {-1.0, -1.0, 2.0, 0.0}; + double covariances4_[] = {-1.0, -1.0, -1.0, 3.0}; ml::maths::CVectorNx1 covariances1(covariances1_); ml::maths::CVectorNx1 covariances2(covariances2_); @@ -926,14 +794,10 @@ void CBasicStatisticsTest::testCovariances() ml::maths::CVectorNx1 covariances4(covariances4_); ml::maths::CSymmetricMatrixNxN covariance( - 10.0 * ml::maths::CSymmetricMatrixNxN(ml::maths::E_OuterProduct, - covariances1 / covariances1.euclidean()) - + 5.0 * ml::maths::CSymmetricMatrixNxN(ml::maths::E_OuterProduct, - covariances2 / covariances2.euclidean()) - + 5.0 * ml::maths::CSymmetricMatrixNxN(ml::maths::E_OuterProduct, - covariances3 / covariances3.euclidean()) - + 2.0 * ml::maths::CSymmetricMatrixNxN(ml::maths::E_OuterProduct, - covariances4 / covariances4.euclidean())); + 10.0 * ml::maths::CSymmetricMatrixNxN(ml::maths::E_OuterProduct, covariances1 / covariances1.euclidean()) + + 5.0 * ml::maths::CSymmetricMatrixNxN(ml::maths::E_OuterProduct, covariances2 / covariances2.euclidean()) + + 5.0 * ml::maths::CSymmetricMatrixNxN(ml::maths::E_OuterProduct, covariances3 / covariances3.euclidean()) + + 2.0 * ml::maths::CSymmetricMatrixNxN(ml::maths::E_OuterProduct, covariances4 / covariances4.euclidean())); std::size_t n = 10000u; @@ -942,8 +806,7 @@ void CBasicStatisticsTest::testCovariances() ml::maths::CBasicStatistics::SSampleCovariances sampleCovariance; - for (std::size_t i = 0u; i < n; ++i) - { + for (std::size_t i = 0u; i < n; ++i) { sampleCovariance.add(samples[i]); } @@ -953,17 +816,10 @@ void CBasicStatisticsTest::testCovariances() LOG_DEBUG("mean = " << ml::maths::CBasicStatistics::mean(sampleCovariance)); LOG_DEBUG("covariances = " << ml::maths::CBasicStatistics::covariances(sampleCovariance)); - for (std::size_t i = 0u; i < 4; ++i) - { - CPPUNIT_ASSERT_DOUBLES_EQUAL(mean(i), - (ml::maths::CBasicStatistics::mean(sampleCovariance))(i), - 0.05); - for (std::size_t j = 0u; j < 4; ++j) - { - CPPUNIT_ASSERT_DOUBLES_EQUAL(covariance(i, j), - (ml::maths::CBasicStatistics::covariances(sampleCovariance))(i, j), - 0.16); - + for (std::size_t i = 0u; i < 4; ++i) { + CPPUNIT_ASSERT_DOUBLES_EQUAL(mean(i), (ml::maths::CBasicStatistics::mean(sampleCovariance))(i), 0.05); + for (std::size_t j = 0u; j < 4; ++j) { + CPPUNIT_ASSERT_DOUBLES_EQUAL(covariance(i, j), (ml::maths::CBasicStatistics::covariances(sampleCovariance))(i, j), 0.16); } } } @@ -975,21 +831,13 @@ void CBasicStatisticsTest::testCovariances() rng.generateUniformSamples(5.0, 10.0, 400, coordinates); std::vector> points; - for (std::size_t i = 0u; i < coordinates.size(); i += 4) - { - double c[] = - { - coordinates[i+0], - coordinates[i+1], - coordinates[i+2], - coordinates[i+3] - }; + for (std::size_t i = 0u; i < coordinates.size(); i += 4) { + double c[] = {coordinates[i + 0], coordinates[i + 1], coordinates[i + 2], coordinates[i + 3]}; points.push_back(ml::maths::CVectorNx1(c)); } ml::maths::CBasicStatistics::SSampleCovariances expectedSampleCovariances; - for (std::size_t i = 0u; i < points.size(); ++i) - { + for (std::size_t i = 0u; i < points.size(); ++i) { expectedSampleCovariances.add(points[i]); } @@ -1006,8 +854,7 @@ void CBasicStatisticsTest::testCovariances() } } -void CBasicStatisticsTest::testCovariancesLedoitWolf() -{ +void CBasicStatisticsTest::testCovariancesLedoitWolf() { LOG_DEBUG("+---------------------------------------------------+"); LOG_DEBUG("| CBasicStatisticsTest::testCovariancesLedoitWolf |"); LOG_DEBUG("+---------------------------------------------------+"); @@ -1020,37 +867,24 @@ void CBasicStatisticsTest::testCovariancesLedoitWolf() ml::test::CRandomNumbers rng; - double means[][2] = - { - { 10.0, 10.0 }, - { 20.0, 150.0 }, - { -10.0, -20.0 }, - { -20.0, 40.0 }, - { 40.0, 90.0 } - }; - - double covariances[][2][2] = - { - { { 40.0, 0.0 }, { 0.0, 40.0 } }, - { { 20.0, 5.0 }, { 5.0, 10.0 } }, - { { 300.0, -70.0 }, { -70.0, 60.0 } }, - { { 100.0, 20.0 }, { 20.0, 60.0 } }, - { { 50.0, -10.0 }, { -10.0, 60.0 } } - }; + double means[][2] = {{10.0, 10.0}, {20.0, 150.0}, {-10.0, -20.0}, {-20.0, 40.0}, {40.0, 90.0}}; + + double covariances[][2][2] = {{{40.0, 0.0}, {0.0, 40.0}}, + {{20.0, 5.0}, {5.0, 10.0}}, + {{300.0, -70.0}, {-70.0, 60.0}}, + {{100.0, 20.0}, {20.0, 60.0}}, + {{50.0, -10.0}, {-10.0, 60.0}}}; ml::maths::CBasicStatistics::SSampleMean::TAccumulator error; ml::maths::CBasicStatistics::SSampleMean::TAccumulator errorLW; - for (std::size_t i = 0u; i < boost::size(means); ++i) - { + for (std::size_t i = 0u; i < boost::size(means); ++i) { LOG_DEBUG("*** test " << i << " ***"); TDoubleVec mean(boost::begin(means[i]), boost::end(means[i])); TDoubleVecVec covariance; - for (std::size_t j = 0u; j < boost::size(covariances[i]); ++j) - { - covariance.push_back(TDoubleVec(boost::begin(covariances[i][j]), - boost::end(covariances[i][j]))); + for (std::size_t j = 0u; j < boost::size(covariances[i]); ++j) { + covariance.push_back(TDoubleVec(boost::begin(covariances[i][j]), boost::end(covariances[i][j]))); } TMatrix2 covExpected(covariance); LOG_DEBUG("cov expected = " << covExpected); @@ -1060,11 +894,9 @@ void CBasicStatisticsTest::testCovariancesLedoitWolf() // Test the frobenius norm of the error in the covariance matrix. - for (std::size_t j = 3u; j < samples.size(); ++j) - { + for (std::size_t j = 3u; j < samples.size(); ++j) { TVector2Vec jsamples; - for (std::size_t k = 0u; k < j; ++k) - { + for (std::size_t k = 0u; k < j; ++k) { jsamples.push_back(TVector2(samples[k])); } @@ -1074,14 +906,13 @@ void CBasicStatisticsTest::testCovariancesLedoitWolf() ml::maths::CBasicStatistics::SSampleCovariances covLW; ml::maths::CBasicStatistics::covariancesLedoitWolf(jsamples, covLW); - const TMatrix2 &covML = ml::maths::CBasicStatistics::maximumLikelihoodCovariances(cov); - const TMatrix2 &covLWML = ml::maths::CBasicStatistics::maximumLikelihoodCovariances(covLW); + const TMatrix2& covML = ml::maths::CBasicStatistics::maximumLikelihoodCovariances(cov); + const TMatrix2& covLWML = ml::maths::CBasicStatistics::maximumLikelihoodCovariances(covLW); - double errorML = (covML - covExpected).frobenius(); + double errorML = (covML - covExpected).frobenius(); double errorLWML = (covLWML - covExpected).frobenius(); - if (j % 5 == 0) - { + if (j % 5 == 0) { LOG_DEBUG("cov ML = " << covML); LOG_DEBUG("cov LWML = " << covLWML); LOG_DEBUG("error ML = " << errorML << ", error LWML = " << errorLWML); @@ -1094,12 +925,10 @@ void CBasicStatisticsTest::testCovariancesLedoitWolf() LOG_DEBUG("error = " << error); LOG_DEBUG("error LW = " << errorLW); - CPPUNIT_ASSERT( ml::maths::CBasicStatistics::mean(errorLW) - < 0.9 * ml::maths::CBasicStatistics::mean(error)); + CPPUNIT_ASSERT(ml::maths::CBasicStatistics::mean(errorLW) < 0.9 * ml::maths::CBasicStatistics::mean(error)); } -void CBasicStatisticsTest::testMedian() -{ +void CBasicStatisticsTest::testMedian() { LOG_DEBUG("+------------------------------------+"); LOG_DEBUG("| CBasicStatisticsTest::testMedian |"); LOG_DEBUG("+------------------------------------+"); @@ -1112,45 +941,45 @@ void CBasicStatisticsTest::testMedian() CPPUNIT_ASSERT_EQUAL(0.0, median); } { - double sample[] = { 1.0 }; + double sample[] = {1.0}; - ml::maths::CBasicStatistics::TDoubleVec sampleVec(sample, sample+sizeof(sample)/sizeof(sample[0])); + ml::maths::CBasicStatistics::TDoubleVec sampleVec(sample, sample + sizeof(sample) / sizeof(sample[0])); double median = ml::maths::CBasicStatistics::median(sampleVec); CPPUNIT_ASSERT_EQUAL(1.0, median); } { - double sample[] = { 2.0, 1.0 }; + double sample[] = {2.0, 1.0}; - ml::maths::CBasicStatistics::TDoubleVec sampleVec(sample, sample+sizeof(sample)/sizeof(sample[0])); + ml::maths::CBasicStatistics::TDoubleVec sampleVec(sample, sample + sizeof(sample) / sizeof(sample[0])); double median = ml::maths::CBasicStatistics::median(sampleVec); CPPUNIT_ASSERT_EQUAL(1.5, median); } { - double sample[] = { 3.0, 1.0, 2.0 }; + double sample[] = {3.0, 1.0, 2.0}; - ml::maths::CBasicStatistics::TDoubleVec sampleVec(sample, sample+sizeof(sample)/sizeof(sample[0])); + ml::maths::CBasicStatistics::TDoubleVec sampleVec(sample, sample + sizeof(sample) / sizeof(sample[0])); double median = ml::maths::CBasicStatistics::median(sampleVec); CPPUNIT_ASSERT_EQUAL(2.0, median); } { - double sample[] = { 3.0, 5.0, 9.0, 1.0, 2.0, 6.0, 7.0, 4.0, 8.0 }; + double sample[] = {3.0, 5.0, 9.0, 1.0, 2.0, 6.0, 7.0, 4.0, 8.0}; - ml::maths::CBasicStatistics::TDoubleVec sampleVec(sample, sample+sizeof(sample)/sizeof(sample[0])); + ml::maths::CBasicStatistics::TDoubleVec sampleVec(sample, sample + sizeof(sample) / sizeof(sample[0])); double median = ml::maths::CBasicStatistics::median(sampleVec); CPPUNIT_ASSERT_EQUAL(5.0, median); } { - double sample[] = { 3.0, 5.0, 10.0, 2.0, 6.0, 7.0, 1.0, 9.0, 4.0, 8.0 }; + double sample[] = {3.0, 5.0, 10.0, 2.0, 6.0, 7.0, 1.0, 9.0, 4.0, 8.0}; - ml::maths::CBasicStatistics::TDoubleVec sampleVec(sample, sample+sizeof(sample)/sizeof(sample[0])); + ml::maths::CBasicStatistics::TDoubleVec sampleVec(sample, sample + sizeof(sample) / sizeof(sample[0])); double median = ml::maths::CBasicStatistics::median(sampleVec); @@ -1158,8 +987,7 @@ void CBasicStatisticsTest::testMedian() } } -void CBasicStatisticsTest::testOrderStatistics() -{ +void CBasicStatisticsTest::testOrderStatistics() { LOG_DEBUG("+---------------------------------------------+"); LOG_DEBUG("| CBasicStatisticsTest::testOrderStatistics |"); LOG_DEBUG("+---------------------------------------------+"); @@ -1175,15 +1003,14 @@ void CBasicStatisticsTest::testOrderStatistics() { // Test on the stack min, max, combine and persist and restore. - double data[] = { 1.0, 2.3, 1.1, 1.0, 5.0, 3.0, 11.0, 0.2, 15.8, 12.3 }; + double data[] = {1.0, 2.3, 1.1, 1.0, 5.0, 3.0, 11.0, 0.2, 15.8, 12.3}; TMinStatsStack minValues; TMaxStatsStack maxValues; TMinStatsStack minFirstHalf; TMinStatsStack minSecondHalf; - for (size_t i = 0; i < boost::size(data); ++i) - { + for (size_t i = 0; i < boost::size(data); ++i) { minValues.add(data[i]); maxValues.add(data[i]); (2 * i < boost::size(data) ? minFirstHalf : minSecondHalf).add(data[i]); @@ -1191,15 +1018,12 @@ void CBasicStatisticsTest::testOrderStatistics() std::sort(boost::begin(data), boost::end(data)); minValues.sort(); - LOG_DEBUG("x_1 = " << minValues[0] - << ", x_2 = " << minValues[1]); + LOG_DEBUG("x_1 = " << minValues[0] << ", x_2 = " << minValues[1]); CPPUNIT_ASSERT(std::equal(minValues.begin(), minValues.end(), data)); std::sort(boost::begin(data), boost::end(data), std::greater()); maxValues.sort(); - LOG_DEBUG("x_n = " << maxValues[0] - << ", x_(n-1) = " << maxValues[1] - << ", x_(n-2) = " << maxValues[2]); + LOG_DEBUG("x_n = " << maxValues[0] << ", x_(n-1) = " << maxValues[1] << ", x_(n-2) = " << maxValues[2]); CPPUNIT_ASSERT(std::equal(maxValues.begin(), maxValues.end(), data)); CPPUNIT_ASSERT_EQUAL(static_cast(2), minValues.count()); @@ -1207,8 +1031,7 @@ void CBasicStatisticsTest::testOrderStatistics() TMinStatsStack minFirstPlusSecondHalf = (minFirstHalf + minSecondHalf); minFirstPlusSecondHalf.sort(); - CPPUNIT_ASSERT(std::equal(minValues.begin(), minValues.end(), - minFirstPlusSecondHalf.begin())); + CPPUNIT_ASSERT(std::equal(minValues.begin(), minValues.end(), minFirstPlusSecondHalf.begin())); // Test persist is idempotent. @@ -1227,8 +1050,7 @@ void CBasicStatisticsTest::testOrderStatistics() ml::core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); ml::core::CRapidXmlStateRestoreTraverser traverser(parser); - CPPUNIT_ASSERT(traverser.traverseSubLevel( - boost::bind(SRestore(), boost::ref(restoredMinValues), _1))); + CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind(SRestore(), boost::ref(restoredMinValues), _1))); } // The XML representation of the new stats object should be unchanged. @@ -1244,14 +1066,13 @@ void CBasicStatisticsTest::testOrderStatistics() { // Test on the heap min, max, combine and persist and restore. - double data[] = { 1.0, 2.3, 1.1, 1.0, 5.0, 3.0, 11.0, 0.2, 15.8, 12.3 }; + double data[] = {1.0, 2.3, 1.1, 1.0, 5.0, 3.0, 11.0, 0.2, 15.8, 12.3}; TMinStatsHeap min2Values(2); TMaxStatsHeap max3Values(3); TMaxStatsHeap max20Values(20); - for (size_t i = 0; i < boost::size(data); ++i) - { + for (size_t i = 0; i < boost::size(data); ++i) { min2Values.add(data[i]); max3Values.add(data[i]); max20Values.add(data[i]); @@ -1259,15 +1080,12 @@ void CBasicStatisticsTest::testOrderStatistics() std::sort(boost::begin(data), boost::end(data)); min2Values.sort(); - LOG_DEBUG("x_1 = " << min2Values[0] - << ", x_2 = " << min2Values[1]); + LOG_DEBUG("x_1 = " << min2Values[0] << ", x_2 = " << min2Values[1]); CPPUNIT_ASSERT(std::equal(min2Values.begin(), min2Values.end(), data)); std::sort(boost::begin(data), boost::end(data), std::greater()); max3Values.sort(); - LOG_DEBUG("x_n = " << max3Values[0] - << ", x_(n-1) = " << max3Values[1] - << ", x_(n-2) = " << max3Values[2]); + LOG_DEBUG("x_n = " << max3Values[0] << ", x_(n-1) = " << max3Values[1] << ", x_(n-2) = " << max3Values[2]); CPPUNIT_ASSERT(std::equal(max3Values.begin(), max3Values.end(), data)); max20Values.sort(); @@ -1291,8 +1109,7 @@ void CBasicStatisticsTest::testOrderStatistics() ml::core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); ml::core::CRapidXmlStateRestoreTraverser traverser(parser); - CPPUNIT_ASSERT(traverser.traverseSubLevel( - boost::bind(SRestore(), boost::ref(restoredMaxValues), _1))); + CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind(SRestore(), boost::ref(restoredMaxValues), _1))); } // The XML representation of the new stats object should be unchanged. @@ -1327,18 +1144,14 @@ void CBasicStatisticsTest::testOrderStatistics() CPPUNIT_ASSERT_EQUAL(1.0, min.biggest()); CPPUNIT_ASSERT_EQUAL(1.0, max.biggest()); std::size_t i{0}; - for (auto value : { 3.6, -6.1, 1.0, 3.4 }) - { + for (auto value : {3.6, -6.1, 1.0, 3.4}) { min.add(value); max.add(value); - if (i++ == 0) - { + if (i++ == 0) { CPPUNIT_ASSERT_EQUAL(3.6, min.biggest()); CPPUNIT_ASSERT_EQUAL(1.0, max.biggest()); - } - else - { - CPPUNIT_ASSERT_EQUAL( 3.6, min.biggest()); + } else { + CPPUNIT_ASSERT_EQUAL(3.6, min.biggest()); CPPUNIT_ASSERT_EQUAL(-6.1, max.biggest()); } } @@ -1356,8 +1169,7 @@ void CBasicStatisticsTest::testOrderStatistics() } } -void CBasicStatisticsTest::testMinMax() -{ +void CBasicStatisticsTest::testMinMax() { LOG_DEBUG("+------------------------------------+"); LOG_DEBUG("| CBasicStatisticsTest::testMinMax |"); LOG_DEBUG("+------------------------------------+"); @@ -1373,9 +1185,9 @@ void CBasicStatisticsTest::testMinMax() CPPUNIT_ASSERT(!minmax.initialized()); minmax.add(positive); CPPUNIT_ASSERT(minmax.initialized()); - CPPUNIT_ASSERT_EQUAL(0.3, minmax.min()); + CPPUNIT_ASSERT_EQUAL(0.3, minmax.min()); CPPUNIT_ASSERT_EQUAL(11.7, minmax.max()); - CPPUNIT_ASSERT_EQUAL(0.3, minmax.signMargin()); + CPPUNIT_ASSERT_EQUAL(0.3, minmax.signMargin()); } { ml::maths::CBasicStatistics::CMinMax minmax; @@ -1383,8 +1195,8 @@ void CBasicStatisticsTest::testMinMax() minmax.add(negative); CPPUNIT_ASSERT(minmax.initialized()); CPPUNIT_ASSERT_EQUAL(-18.2, minmax.min()); - CPPUNIT_ASSERT_EQUAL(-0.8, minmax.max()); - CPPUNIT_ASSERT_EQUAL(-0.8, minmax.signMargin()); + CPPUNIT_ASSERT_EQUAL(-0.8, minmax.max()); + CPPUNIT_ASSERT_EQUAL(-0.8, minmax.signMargin()); } { ml::maths::CBasicStatistics::CMinMax minmax; @@ -1392,8 +1204,8 @@ void CBasicStatisticsTest::testMinMax() minmax.add(mixed); CPPUNIT_ASSERT(minmax.initialized()); CPPUNIT_ASSERT_EQUAL(-8.0, minmax.min()); - CPPUNIT_ASSERT_EQUAL( 2.1, minmax.max()); - CPPUNIT_ASSERT_EQUAL( 0.0, minmax.signMargin()); + CPPUNIT_ASSERT_EQUAL(2.1, minmax.max()); + CPPUNIT_ASSERT_EQUAL(0.0, minmax.signMargin()); } { ml::maths::CBasicStatistics::CMinMax minmax1; diff --git a/lib/maths/unittest/CBasicStatisticsTest.h b/lib/maths/unittest/CBasicStatisticsTest.h index 936de50a99..b1cad731e8 100644 --- a/lib/maths/unittest/CBasicStatisticsTest.h +++ b/lib/maths/unittest/CBasicStatisticsTest.h @@ -8,21 +8,18 @@ #include +class CBasicStatisticsTest : public CppUnit::TestFixture { +public: + void testMean(); + void testCentralMoments(); + void testVectorCentralMoments(); + void testCovariances(); + void testCovariancesLedoitWolf(); + void testMedian(); + void testOrderStatistics(); + void testMinMax(); -class CBasicStatisticsTest : public CppUnit::TestFixture -{ - public: - void testMean(); - void testCentralMoments(); - void testVectorCentralMoments(); - void testCovariances(); - void testCovariancesLedoitWolf(); - void testMedian(); - void testOrderStatistics(); - void testMinMax(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CBasicStatisticsTest_h - diff --git a/lib/maths/unittest/CBjkstUniqueValuesTest.cc b/lib/maths/unittest/CBjkstUniqueValuesTest.cc index 802bfeb73a..31ab39b35c 100644 --- a/lib/maths/unittest/CBjkstUniqueValuesTest.cc +++ b/lib/maths/unittest/CBjkstUniqueValuesTest.cc @@ -20,56 +20,44 @@ using namespace ml; using namespace maths; using namespace test; -namespace -{ +namespace { using TDoubleVec = std::vector; using TSizeVec = std::vector; using TUInt32Set = std::set; using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; -uint8_t trailingZeros(uint32_t x) -{ +uint8_t trailingZeros(uint32_t x) { uint8_t result = 0; - for (/**/; (x & 0x1) == 0; x >>= 1) - { + for (/**/; (x & 0x1) == 0; x >>= 1) { ++result; } return result; } - } -void CBjkstUniqueValuesTest::testTrailingZeros() -{ +void CBjkstUniqueValuesTest::testTrailingZeros() { LOG_DEBUG("+---------------------------------------------+"); LOG_DEBUG("| CBjkstUniqueValuesTest::testTrailingZeros |"); LOG_DEBUG("+---------------------------------------------+"); uint32_t n = 1; - for (uint8_t i = 0; i < 32; n <<= 1, ++i) - { + for (uint8_t i = 0; i < 32; n <<= 1, ++i) { CPPUNIT_ASSERT_EQUAL(i, CBjkstUniqueValues::trailingZeros(n)); } TDoubleVec samples; CRandomNumbers rng; - rng.generateUniformSamples(0.0, - std::numeric_limits::max(), - 10000, - samples); + rng.generateUniformSamples(0.0, std::numeric_limits::max(), 10000, samples); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { uint32_t sample = static_cast(samples[i]); - CPPUNIT_ASSERT_EQUAL(trailingZeros(sample), - CBjkstUniqueValues::trailingZeros(sample)); + CPPUNIT_ASSERT_EQUAL(trailingZeros(sample), CBjkstUniqueValues::trailingZeros(sample)); } } -void CBjkstUniqueValuesTest::testNumber() -{ +void CBjkstUniqueValuesTest::testNumber() { LOG_DEBUG("+--------------------------------------+"); LOG_DEBUG("| CBjkstUniqueValuesTest::testNumber |"); LOG_DEBUG("+--------------------------------------+"); @@ -85,8 +73,7 @@ void CBjkstUniqueValuesTest::testNumber() double totalError6 = 0.0; std::size_t largeError6Count = 0u; - for (std::size_t i = 0u; i < numberTests; ++i) - { + for (std::size_t i = 0u; i < numberTests; ++i) { CBjkstUniqueValues approxUniqueValues5(5, 60); CBjkstUniqueValues approxUniqueValues6(6, 60); TUInt32Set uniqueValues; @@ -94,8 +81,7 @@ void CBjkstUniqueValuesTest::testNumber() TDoubleVec samples; rng.generateUniformSamples(0.0, 20000.0, 500u + i, samples); - for (std::size_t j = 0u; j < 2 * samples.size(); ++j) - { + for (std::size_t j = 0u; j < 2 * samples.size(); ++j) { uint32_t sample = static_cast(samples[j % samples.size()]); approxUniqueValues5.add(sample); approxUniqueValues6.add(sample); @@ -114,14 +100,12 @@ void CBjkstUniqueValuesTest::testNumber() CPPUNIT_ASSERT(error5 < 0.35); CPPUNIT_ASSERT(error6 < 0.30); - if (error5 > 0.14) - { + if (error5 > 0.14) { ++largeError5Count; } totalError5 += error5; - if (error6 > 0.12) - { + if (error6 > 0.12) { ++largeError6Count; } totalError6 += error6; @@ -130,10 +114,8 @@ void CBjkstUniqueValuesTest::testNumber() totalError5 /= static_cast(numberTests); totalError6 /= static_cast(numberTests); - LOG_DEBUG("totalError5 = " << totalError5 - << ", largeErrorCount5 = " << largeError5Count); - LOG_DEBUG("totalError6 = " << totalError6 - << ", largeErrorCount6 = " << largeError6Count); + LOG_DEBUG("totalError5 = " << totalError5 << ", largeErrorCount5 = " << largeError5Count); + LOG_DEBUG("totalError6 = " << totalError6 << ", largeErrorCount6 = " << largeError6Count); CPPUNIT_ASSERT(totalError5 < 0.07); CPPUNIT_ASSERT(largeError5Count < 80); @@ -142,8 +124,7 @@ void CBjkstUniqueValuesTest::testNumber() CPPUNIT_ASSERT(largeError6Count < 85); } -void CBjkstUniqueValuesTest::testRemove() -{ +void CBjkstUniqueValuesTest::testRemove() { LOG_DEBUG("+--------------------------------------+"); LOG_DEBUG("| CBjkstUniqueValuesTest::testRemove |"); LOG_DEBUG("+--------------------------------------+"); @@ -165,56 +146,45 @@ void CBjkstUniqueValuesTest::testRemove() TMeanAccumulator meanRelativeErrorBeforeRemove; TMeanAccumulator meanRelativeErrorAfterRemove; - for (std::size_t t = 0u; t < numberTests; ++t) - { - LOG_DEBUG("*** test = " << t+1 << " ***"); + for (std::size_t t = 0u; t < numberTests; ++t) { + LOG_DEBUG("*** test = " << t + 1 << " ***"); maths::CBjkstUniqueValues sketch(2, 150); TUInt32Set unique; - for (std::size_t i = 0u; i < categories.size(); ++i) - { + for (std::size_t i = 0u; i < categories.size(); ++i) { uint32_t category = static_cast(categories[i]); sketch.add(category); unique.insert(category); } LOG_DEBUG("exact = " << unique.size()); LOG_DEBUG("approx = " << sketch.number()); - CPPUNIT_ASSERT_DOUBLES_EQUAL(static_cast(unique.size()), - static_cast(sketch.number()), - 0.3 * static_cast(unique.size())); - meanRelativeErrorBeforeRemove.add(std::fabs( static_cast(unique.size()) - - static_cast(sketch.number())) - / static_cast(unique.size())); - + CPPUNIT_ASSERT_DOUBLES_EQUAL( + static_cast(unique.size()), static_cast(sketch.number()), 0.3 * static_cast(unique.size())); + meanRelativeErrorBeforeRemove.add(std::fabs(static_cast(unique.size()) - static_cast(sketch.number())) / + static_cast(unique.size())); rng.random_shuffle(categories.begin(), categories.end()); - for (std::size_t i = 0u; i < toRemove[t]; ++i) - { + for (std::size_t i = 0u; i < toRemove[t]; ++i) { uint32_t category = static_cast(categories[i]); sketch.remove(category); unique.erase(category); } LOG_DEBUG("exact = " << unique.size()); LOG_DEBUG("approx = " << sketch.number()); - CPPUNIT_ASSERT_DOUBLES_EQUAL(static_cast(unique.size()), - static_cast(sketch.number()), - 0.25 * static_cast(unique.size())); - meanRelativeErrorAfterRemove.add(std::fabs( static_cast(unique.size()) - - static_cast(sketch.number())) - / static_cast(unique.size())); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + static_cast(unique.size()), static_cast(sketch.number()), 0.25 * static_cast(unique.size())); + meanRelativeErrorAfterRemove.add(std::fabs(static_cast(unique.size()) - static_cast(sketch.number())) / + static_cast(unique.size())); } - LOG_DEBUG("meanRelativeErrorBeforeRemove = " - << maths::CBasicStatistics::mean(meanRelativeErrorBeforeRemove)); - LOG_DEBUG("meanRelativeErrorAfterRemove = " - << maths::CBasicStatistics::mean(meanRelativeErrorAfterRemove)); + LOG_DEBUG("meanRelativeErrorBeforeRemove = " << maths::CBasicStatistics::mean(meanRelativeErrorBeforeRemove)); + LOG_DEBUG("meanRelativeErrorAfterRemove = " << maths::CBasicStatistics::mean(meanRelativeErrorAfterRemove)); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanRelativeErrorBeforeRemove) < 0.05); - CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanRelativeErrorAfterRemove) - < 1.3 * maths::CBasicStatistics::mean(meanRelativeErrorBeforeRemove)); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanRelativeErrorAfterRemove) < + 1.3 * maths::CBasicStatistics::mean(meanRelativeErrorBeforeRemove)); } -void CBjkstUniqueValuesTest::testSwap() -{ +void CBjkstUniqueValuesTest::testSwap() { LOG_DEBUG("+------------------------------------+"); LOG_DEBUG("| CBjkstUniqueValuesTest::testSwap |"); LOG_DEBUG("+------------------------------------+"); @@ -234,20 +204,16 @@ void CBjkstUniqueValuesTest::testSwap() maths::CBjkstUniqueValues sketch2(2, 110); maths::CBjkstUniqueValues sketch3(3, 120); maths::CBjkstUniqueValues sketch4(2, 180); - for (std::size_t i = 0u; i < categories1.size(); ++i) - { + for (std::size_t i = 0u; i < categories1.size(); ++i) { sketch1.add(static_cast(categories1[i])); } - for (std::size_t i = 0u; i < categories2.size(); ++i) - { + for (std::size_t i = 0u; i < categories2.size(); ++i) { sketch2.add(static_cast(categories2[i])); } - for (std::size_t i = 0u; i < categories3.size(); ++i) - { + for (std::size_t i = 0u; i < categories3.size(); ++i) { sketch3.add(static_cast(categories3[i])); } - for (std::size_t i = 0u; i < categories4.size(); ++i) - { + for (std::size_t i = 0u; i < categories4.size(); ++i) { sketch4.add(static_cast(categories4[i])); } @@ -281,8 +247,7 @@ void CBjkstUniqueValuesTest::testSwap() sketch3.swap(sketch4); } -void CBjkstUniqueValuesTest::testSmall() -{ +void CBjkstUniqueValuesTest::testSmall() { LOG_DEBUG("+-------------------------------------+"); LOG_DEBUG("| CBjkstUniqueValuesTest::testSmall |"); LOG_DEBUG("+-------------------------------------+"); @@ -300,8 +265,7 @@ void CBjkstUniqueValuesTest::testSmall() maths::CBjkstUniqueValues sketch(3, 100); TUInt32Set unique; - for (std::size_t i = 0u; i < 100; ++i) - { + for (std::size_t i = 0u; i < 100; ++i) { uint32_t category = static_cast(categories[i]); sketch.add(category); unique.insert(category); @@ -310,27 +274,23 @@ void CBjkstUniqueValuesTest::testSmall() } LOG_DEBUG("# categories = " << sketch.number()); - for (std::size_t i = 100u; i < categories.size(); ++i) - { + for (std::size_t i = 100u; i < categories.size(); ++i) { uint32_t category = static_cast(categories[i]); sketch.add(category); unique.insert(category); LOG_DEBUG("exact = " << unique.size()); LOG_DEBUG("approx = " << sketch.number()); - CPPUNIT_ASSERT_DOUBLES_EQUAL(static_cast(unique.size()), - static_cast(sketch.number()), - 0.15 * static_cast(unique.size())); - meanRelativeError.add(std::fabs( static_cast(unique.size()) - - static_cast(sketch.number())) - / static_cast(unique.size())); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + static_cast(unique.size()), static_cast(sketch.number()), 0.15 * static_cast(unique.size())); + meanRelativeError.add(std::fabs(static_cast(unique.size()) - static_cast(sketch.number())) / + static_cast(unique.size())); } LOG_DEBUG("meanRelativeError = " << maths::CBasicStatistics::mean(meanRelativeError)); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanRelativeError) < 0.05); } -void CBjkstUniqueValuesTest::testPersist() -{ +void CBjkstUniqueValuesTest::testPersist() { LOG_DEBUG("+---------------------------------------+"); LOG_DEBUG("| CBjkstUniqueValuesTest::testPersist |"); LOG_DEBUG("+---------------------------------------+"); @@ -341,8 +301,7 @@ void CBjkstUniqueValuesTest::testPersist() rng.generateUniformSamples(0, 50000, 1000, categories); maths::CBjkstUniqueValues origSketch(2, 100); - for (std::size_t i = 0u; i < 100; ++i) - { + for (std::size_t i = 0u; i < 100; ++i) { origSketch.add(static_cast(categories[i])); } @@ -361,10 +320,8 @@ void CBjkstUniqueValuesTest::testPersist() core::CRapidXmlStateRestoreTraverser traverser(parser); maths::CBjkstUniqueValues restoredSketch(traverser); - LOG_DEBUG("orig checksum = " << origSketch.checksum() - << ", new checksum = " << restoredSketch.checksum()); - CPPUNIT_ASSERT_EQUAL(origSketch.checksum(), - restoredSketch.checksum()); + LOG_DEBUG("orig checksum = " << origSketch.checksum() << ", new checksum = " << restoredSketch.checksum()); + CPPUNIT_ASSERT_EQUAL(origSketch.checksum(), restoredSketch.checksum()); std::string newXml; core::CRapidXmlStatePersistInserter inserter("root"); @@ -374,8 +331,7 @@ void CBjkstUniqueValuesTest::testPersist() CPPUNIT_ASSERT_EQUAL(origXml, newXml); } - for (std::size_t i = 100u; i < categories.size(); ++i) - { + for (std::size_t i = 100u; i < categories.size(); ++i) { origSketch.add(static_cast(categories[i])); } @@ -394,10 +350,8 @@ void CBjkstUniqueValuesTest::testPersist() core::CRapidXmlStateRestoreTraverser traverser(parser); maths::CBjkstUniqueValues restoredSketch(traverser); - LOG_DEBUG("orig checksum = " << origSketch.checksum() - << ", new checksum = " << restoredSketch.checksum()); - CPPUNIT_ASSERT_EQUAL(origSketch.checksum(), - restoredSketch.checksum()); + LOG_DEBUG("orig checksum = " << origSketch.checksum() << ", new checksum = " << restoredSketch.checksum()); + CPPUNIT_ASSERT_EQUAL(origSketch.checksum(), restoredSketch.checksum()); std::string newXml; core::CRapidXmlStatePersistInserter inserter("root"); @@ -408,28 +362,21 @@ void CBjkstUniqueValuesTest::testPersist() } } -CppUnit::Test *CBjkstUniqueValuesTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CBjkstUniqueValuesTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CBjkstUniqueValuesTest::testTrailingZeros", - &CBjkstUniqueValuesTest::testTrailingZeros) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CBjkstUniqueValuesTest::testNumber", - &CBjkstUniqueValuesTest::testNumber) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CBjkstUniqueValuesTest::testRemove", - &CBjkstUniqueValuesTest::testRemove) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CBjkstUniqueValuesTest::testSwap", - &CBjkstUniqueValuesTest::testSwap) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CBjkstUniqueValuesTest::testSmall", - &CBjkstUniqueValuesTest::testSmall) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CBjkstUniqueValuesTest::testPersist", - &CBjkstUniqueValuesTest::testPersist) ); +CppUnit::Test* CBjkstUniqueValuesTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CBjkstUniqueValuesTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CBjkstUniqueValuesTest::testTrailingZeros", + &CBjkstUniqueValuesTest::testTrailingZeros)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CBjkstUniqueValuesTest::testNumber", &CBjkstUniqueValuesTest::testNumber)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CBjkstUniqueValuesTest::testRemove", &CBjkstUniqueValuesTest::testRemove)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CBjkstUniqueValuesTest::testSwap", &CBjkstUniqueValuesTest::testSwap)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CBjkstUniqueValuesTest::testSmall", &CBjkstUniqueValuesTest::testSmall)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CBjkstUniqueValuesTest::testPersist", &CBjkstUniqueValuesTest::testPersist)); return suiteOfTests; } diff --git a/lib/maths/unittest/CBjkstUniqueValuesTest.h b/lib/maths/unittest/CBjkstUniqueValuesTest.h index 35ff480e30..496ee38bc5 100644 --- a/lib/maths/unittest/CBjkstUniqueValuesTest.h +++ b/lib/maths/unittest/CBjkstUniqueValuesTest.h @@ -9,17 +9,16 @@ #include -class CBjkstUniqueValuesTest : public CppUnit::TestFixture -{ - public: - void testTrailingZeros(); - void testNumber(); - void testRemove(); - void testSwap(); - void testSmall(); - void testPersist(); +class CBjkstUniqueValuesTest : public CppUnit::TestFixture { +public: + void testTrailingZeros(); + void testNumber(); + void testRemove(); + void testSwap(); + void testSmall(); + void testPersist(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CBjkstUniqueValuesTest_h diff --git a/lib/maths/unittest/CBootstrapClustererTest.cc b/lib/maths/unittest/CBootstrapClustererTest.cc index 874b671d86..98fc4f1e35 100644 --- a/lib/maths/unittest/CBootstrapClustererTest.cc +++ b/lib/maths/unittest/CBootstrapClustererTest.cc @@ -20,8 +20,7 @@ using namespace ml; -namespace -{ +namespace { using TBoolVec = std::vector; using TDoubleVec = std::vector; @@ -34,61 +33,40 @@ using TMatrix2 = maths::CSymmetricMatrixNxN; using TMatrix2Vec = std::vector; using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; -struct SVector2Hash -{ - std::size_t operator()(const TVector2 &x) const - { - return static_cast(x.checksum()); - } +struct SVector2Hash { + std::size_t operator()(const TVector2& x) const { return static_cast(x.checksum()); } }; using TVector2SizeUMap = boost::unordered_map; template -class CBootstrapClustererForTest : public maths::CBootstrapClusterer -{ - public: - using TBoolVec = typename maths::CBootstrapClusterer::TBoolVec; - using TSizeVec = typename maths::CBootstrapClusterer::TSizeVec; - using TSizeVecVecVec = typename maths::CBootstrapClusterer::TSizeVecVecVec; - using TPointVec = typename maths::CBootstrapClusterer::TPointVec; - using TGraph = typename maths::CBootstrapClusterer::TGraph; - - public: - CBootstrapClustererForTest(double overlapThreshold, double chainingFactor) : - maths::CBootstrapClusterer(overlapThreshold, chainingFactor) - {} - - void buildClusterGraph(TSizeVecVecVec &bootstrapClusters, - TGraph &graph) const - { - TPointVec dummy(1); // only used for reserving memory. - this->maths::CBootstrapClusterer::buildClusterGraph(dummy, bootstrapClusters, graph); - } +class CBootstrapClustererForTest : public maths::CBootstrapClusterer { +public: + using TBoolVec = typename maths::CBootstrapClusterer::TBoolVec; + using TSizeVec = typename maths::CBootstrapClusterer::TSizeVec; + using TSizeVecVecVec = typename maths::CBootstrapClusterer::TSizeVecVecVec; + using TPointVec = typename maths::CBootstrapClusterer::TPointVec; + using TGraph = typename maths::CBootstrapClusterer::TGraph; + +public: + CBootstrapClustererForTest(double overlapThreshold, double chainingFactor) + : maths::CBootstrapClusterer(overlapThreshold, chainingFactor) {} + + void buildClusterGraph(TSizeVecVecVec& bootstrapClusters, TGraph& graph) const { + TPointVec dummy(1); // only used for reserving memory. + this->maths::CBootstrapClusterer::buildClusterGraph(dummy, bootstrapClusters, graph); + } - std::size_t thickets(std::size_t n, const TGraph &graph, TSizeVec &components) const - { - return this->maths::CBootstrapClusterer::thickets(n, graph, components); - } + std::size_t thickets(std::size_t n, const TGraph& graph, TSizeVec& components) const { + return this->maths::CBootstrapClusterer::thickets(n, graph, components); + } - bool separate(TGraph &graph, TBoolVec &parity) const - { - return this->maths::CBootstrapClusterer::separate(graph, parity); - } + bool separate(TGraph& graph, TBoolVec& parity) const { return this->maths::CBootstrapClusterer::separate(graph, parity); } - bool cutSearch(std::size_t u, - std::size_t v, - const TGraph &graph, - double threshold, - double &cost, - TBoolVec &parities) const - { - return this->maths::CBootstrapClusterer::cutSearch(u, v, graph, threshold, cost, parities); - } + bool cutSearch(std::size_t u, std::size_t v, const TGraph& graph, double threshold, double& cost, TBoolVec& parities) const { + return this->maths::CBootstrapClusterer::cutSearch(u, v, graph, threshold, cost, parities); + } - TSizeVec &offsets() - { - return this->maths::CBootstrapClusterer::offsets(); - } + TSizeVec& offsets() { return this->maths::CBootstrapClusterer::offsets(); } }; using TBootstrapClustererForTest2 = CBootstrapClustererForTest; @@ -97,30 +75,23 @@ using TVertexItr = boost::graph_traits::vertex_iterator; using TEdgeItr = boost::graph_traits::edge_iterator; using TAdjacencyItr = boost::graph_traits::adjacency_iterator; -void clique(std::size_t a, std::size_t b, TGraph &graph) -{ - for (std::size_t i = a; i < b; ++i) - { - for (std::size_t j = i+1; j < b; ++j) - { +void clique(std::size_t a, std::size_t b, TGraph& graph) { + for (std::size_t i = a; i < b; ++i) { + for (std::size_t j = i + 1; j < b; ++j) { boost::put(boost::edge_weight, graph, boost::add_edge(i, j, graph).first, 1.0); } } } -void connect(const TSizeVec &U, const TSizeVec &V, TGraph &graph) -{ +void connect(const TSizeVec& U, const TSizeVec& V, TGraph& graph) { CPPUNIT_ASSERT_EQUAL(U.size(), V.size()); - for (std::size_t i = 0u; i < U.size(); ++i) - { + for (std::size_t i = 0u; i < U.size(); ++i) { boost::put(boost::edge_weight, graph, boost::add_edge(U[i], V[i], graph).first, 1.0); } } - } -void CBootstrapClustererTest::testFacade() -{ +void CBootstrapClustererTest::testFacade() { LOG_DEBUG("+---------------------------------------+"); LOG_DEBUG("| CBootstrapClustererTest::testFacade |"); LOG_DEBUG("+---------------------------------------+"); @@ -131,18 +102,17 @@ void CBootstrapClustererTest::testFacade() std::size_t improveStructureClusterSeeds = 2; std::size_t improveStructureKmeansIterations = 3; - for (std::size_t t = 0u; t < 10; ++t) - { + for (std::size_t t = 0u; t < 10; ++t) { LOG_DEBUG("Trial " << t); - double m1_[] = { 2.0, 2.0 }; - double v1_[] = { 4.0, 2.0, 4.0 }; + double m1_[] = {2.0, 2.0}; + double v1_[] = {4.0, 2.0, 4.0}; TVector2 m1(&m1_[0], &m1_[2]); TMatrix2 v1(&v1_[0], &v1_[3]); TVector2Vec points1; maths::CSampling::multivariateNormalSample(m1, v1, 50, points1); - double m2_[] = { 10.0, 5.0 }; - double v2_[] = { 4.0, 0.0, 1.0 }; + double m2_[] = {10.0, 5.0}; + double v2_[] = {4.0, 0.0, 1.0}; TVector2 m2(&m2_[0], &m2_[2]); TMatrix2 v2(&v2_[0], &v2_[3]); TVector2Vec points2; @@ -157,22 +127,17 @@ void CBootstrapClustererTest::testFacade() maths::CSampling::seed(); - maths::CBootstrapClustererFacade> > clusterer( - xmeans, - improveParamsKmeansIterations, - improveStructureClusterSeeds, - improveStructureKmeansIterations); + maths::CBootstrapClustererFacade>> clusterer( + xmeans, improveParamsKmeansIterations, improveStructureClusterSeeds, improveStructureKmeansIterations); TVector2VecVec actual; { TSizeVecVec clusters; clusterer.cluster(points, clusters); actual.resize(clusters.size()); - for (std::size_t i = 0u; i < clusters.size(); ++i) - { + for (std::size_t i = 0u; i < clusters.size(); ++i) { std::sort(clusters[i].begin(), clusters[i].end()); - for (std::size_t j = 0u; j < clusters[i].size(); ++j) - { + for (std::size_t j = 0u; j < clusters[i].size(); ++j) { actual[i].push_back(points[clusters[i][j]]); } } @@ -181,29 +146,23 @@ void CBootstrapClustererTest::testFacade() maths::CSampling::seed(); xmeans.setPoints(points); - xmeans.run(improveParamsKmeansIterations, - improveStructureClusterSeeds, - improveStructureKmeansIterations); + xmeans.run(improveParamsKmeansIterations, improveStructureClusterSeeds, improveStructureKmeansIterations); TVector2VecVec expected(xmeans.clusters().size()); - for (std::size_t i = 0u; i < xmeans.clusters().size(); ++i) - { + for (std::size_t i = 0u; i < xmeans.clusters().size(); ++i) { expected[i] = xmeans.clusters()[i].points(); std::sort(expected[i].begin(), expected[i].end()); } CPPUNIT_ASSERT_EQUAL(expected.size(), actual.size()); - for (std::size_t i = 0u; i < expected.size(); ++i) - { - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expected[i]), - core::CContainerPrinter::print(actual[i])); + for (std::size_t i = 0u; i < expected.size(); ++i) { + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expected[i]), core::CContainerPrinter::print(actual[i])); } } } } -void CBootstrapClustererTest::testBuildClusterGraph() -{ +void CBootstrapClustererTest::testBuildClusterGraph() { LOG_DEBUG("+--------------------------------------------------+"); LOG_DEBUG("| CBootstrapClustererTest::testBuildClusterGraph |"); LOG_DEBUG("+--------------------------------------------------+"); @@ -212,88 +171,76 @@ void CBootstrapClustererTest::testBuildClusterGraph() // thresholds. const std::size_t _ = 15; - std::size_t clusters_[][5][5] = - { - { { 0, 1, 2, 3, 4 }, { 5, 6, 7, 8, 9 }, { 10, 11, 12, 13, 14 }, { _, _, _, _, _ }, { _, _, _, _, _ } }, - { { 0, 1, _, 3, 4 }, { 5, 6, _, _, _ }, { 10, 11, 12, 13, 14 }, { 2, 7, 8, 9, _ }, { _, _, _, _, _ } }, - { { 0, 1, 2, 3, _ }, { 5, 6, 7, 8, 9 }, { _, _, 12, 13, 14 }, { 4, _, _, _, _ }, { 10, 11, _, _, _ } }, - { { _, _, 2, 3, 4 }, { _, _, _, 8, 9 }, { 10, 11, 12, 13, 14 }, { 0, 1, 5, 6, 7 }, { _, _, _, _, _ } } - }; + std::size_t clusters_[][5][5] = {{{0, 1, 2, 3, 4}, {5, 6, 7, 8, 9}, {10, 11, 12, 13, 14}, {_, _, _, _, _}, {_, _, _, _, _}}, + {{0, 1, _, 3, 4}, {5, 6, _, _, _}, {10, 11, 12, 13, 14}, {2, 7, 8, 9, _}, {_, _, _, _, _}}, + {{0, 1, 2, 3, _}, {5, 6, 7, 8, 9}, {_, _, 12, 13, 14}, {4, _, _, _, _}, {10, 11, _, _, _}}, + {{_, _, 2, 3, 4}, {_, _, _, 8, 9}, {10, 11, 12, 13, 14}, {0, 1, 5, 6, 7}, {_, _, _, _, _}}}; TBootstrapClustererForTest2::TSizeVecVecVec clusters(boost::size(clusters_)); - for (std::size_t i = 0u; i < boost::size(clusters_); ++i) - { - for (std::size_t j = 0u; j < boost::size(clusters_[i]); ++j) - { + for (std::size_t i = 0u; i < boost::size(clusters_); ++i) { + for (std::size_t j = 0u; j < boost::size(clusters_[i]); ++j) { TSizeVec cluster; - for (std::size_t k = 0u; k < boost::size(clusters_[i][j]); ++k) - { - if (clusters_[i][j][k] != _) - { + for (std::size_t k = 0u; k < boost::size(clusters_[i][j]); ++k) { + if (clusters_[i][j][k] != _) { cluster.push_back(clusters_[i][j][k]); } } - if (!cluster.empty()) - { + if (!cluster.empty()) { clusters[i].push_back(cluster); } } } - double overlaps[] = { 0.1, 0.5, 0.9 }; - std::string expected[] = - { - std::string("0: [3, 6, 7, 10, 12, 15]\n" - "1: [4, 6, 8, 13, 15]\n" - "2: [5, 9, 11, 14]\n" - "3: [0, 7, 10, 12, 15]\n" - "4: [1, 8, 15]\n" - "5: [2, 9, 11, 14]\n" - "6: [0, 1, 7, 8, 12, 13, 15]\n" - "7: [0, 3, 6, 12, 15]\n" - "8: [1, 4, 6, 13, 15]\n" - "9: [2, 5, 14]\n" - "10: [0, 3, 12]\n" - "11: [2, 5, 14]\n" - "12: [0, 3, 6, 7, 10]\n" - "13: [1, 6, 8]\n" - "14: [2, 5, 9, 11]\n" - "15: [0, 1, 3, 4, 6, 7, 8]\n"), - std::string("0: [3, 7, 10, 12]\n" - "1: [4, 6, 8, 13, 15]\n" - "2: [5, 9, 11, 14]\n" - "3: [0, 7, 10, 12]\n" - "4: [1, 8, 15]\n" - "5: [2, 9, 11, 14]\n" - "6: [1, 8, 13]\n" - "7: [0, 3, 12]\n" - "8: [1, 4, 6, 13, 15]\n" - "9: [2, 5, 14]\n" - "10: [0, 3, 12]\n" - "11: [2, 5, 14]\n" - "12: [0, 3, 7, 10]\n" - "13: [1, 6, 8]\n" - "14: [2, 5, 9, 11]\n" - "15: [1, 4, 8]\n"), - std::string("0: [3, 7, 10, 12]\n" - "1: [4, 8, 13]\n" - "2: [5, 9, 11, 14]\n" - "3: [0, 10]\n" - "4: [1, 8, 15]\n" - "5: [2, 9, 11, 14]\n" - "6: [13]\n" - "7: [0]\n" - "8: [1, 4, 13]\n" - "9: [2, 5, 14]\n" - "10: [0, 3, 12]\n" - "11: [2, 5, 14]\n" - "12: [0, 10]\n" - "13: [1, 6, 8]\n" - "14: [2, 5, 9, 11]\n" - "15: [4]\n") - }; - - for (std::size_t i = 0u; i < boost::size(overlaps); ++i) - { + double overlaps[] = {0.1, 0.5, 0.9}; + std::string expected[] = {std::string("0: [3, 6, 7, 10, 12, 15]\n" + "1: [4, 6, 8, 13, 15]\n" + "2: [5, 9, 11, 14]\n" + "3: [0, 7, 10, 12, 15]\n" + "4: [1, 8, 15]\n" + "5: [2, 9, 11, 14]\n" + "6: [0, 1, 7, 8, 12, 13, 15]\n" + "7: [0, 3, 6, 12, 15]\n" + "8: [1, 4, 6, 13, 15]\n" + "9: [2, 5, 14]\n" + "10: [0, 3, 12]\n" + "11: [2, 5, 14]\n" + "12: [0, 3, 6, 7, 10]\n" + "13: [1, 6, 8]\n" + "14: [2, 5, 9, 11]\n" + "15: [0, 1, 3, 4, 6, 7, 8]\n"), + std::string("0: [3, 7, 10, 12]\n" + "1: [4, 6, 8, 13, 15]\n" + "2: [5, 9, 11, 14]\n" + "3: [0, 7, 10, 12]\n" + "4: [1, 8, 15]\n" + "5: [2, 9, 11, 14]\n" + "6: [1, 8, 13]\n" + "7: [0, 3, 12]\n" + "8: [1, 4, 6, 13, 15]\n" + "9: [2, 5, 14]\n" + "10: [0, 3, 12]\n" + "11: [2, 5, 14]\n" + "12: [0, 3, 7, 10]\n" + "13: [1, 6, 8]\n" + "14: [2, 5, 9, 11]\n" + "15: [1, 4, 8]\n"), + std::string("0: [3, 7, 10, 12]\n" + "1: [4, 8, 13]\n" + "2: [5, 9, 11, 14]\n" + "3: [0, 10]\n" + "4: [1, 8, 15]\n" + "5: [2, 9, 11, 14]\n" + "6: [13]\n" + "7: [0]\n" + "8: [1, 4, 13]\n" + "9: [2, 5, 14]\n" + "10: [0, 3, 12]\n" + "11: [2, 5, 14]\n" + "12: [0, 10]\n" + "13: [1, 6, 8]\n" + "14: [2, 5, 9, 11]\n" + "15: [4]\n")}; + + for (std::size_t i = 0u; i < boost::size(overlaps); ++i) { LOG_DEBUG("*** overlap threshold = " << overlaps[i] << " ***"); TGraph graph; @@ -307,8 +254,7 @@ void CBootstrapClustererTest::testBuildClusterGraph() std::string rep; TVertexItr j, endj; - for (boost::tie(j, endj) = boost::vertices(graph); j != endj; ++j) - { + for (boost::tie(j, endj) = boost::vertices(graph); j != endj; ++j) { rep += core::CStringUtils::typeToString(*j); TAdjacencyItr k, endk; boost::tie(k, endk) = boost::adjacent_vertices(*j, graph); @@ -322,8 +268,7 @@ void CBootstrapClustererTest::testBuildClusterGraph() } } -void CBootstrapClustererTest::testCutSearch() -{ +void CBootstrapClustererTest::testCutSearch() { LOG_DEBUG("+------------------------------------------+"); LOG_DEBUG("| CBootstrapClustererTest::testCutSearch |"); LOG_DEBUG("+------------------------------------------+"); @@ -340,8 +285,7 @@ void CBootstrapClustererTest::testCutSearch() rng.generateUniformSamples(1, 15, trials, connections); TMeanAccumulator quality; - for (std::size_t t = 0u; t < trials; ++t) - { + for (std::size_t t = 0u; t < trials; ++t) { std::size_t v = 20u; TGraph graph(v); @@ -364,12 +308,9 @@ void CBootstrapClustererTest::testCutSearch() TBoolVec parities; clusterer.cutSearch(0, 1, graph, 0.0, cost, parities); - LOG_DEBUG("cost = " << cost - << ", parities = " << core::CContainerPrinter::print(parities)); + LOG_DEBUG("cost = " << cost << ", parities = " << core::CContainerPrinter::print(parities)); - double sparsestCut = static_cast(connections[t]) - / static_cast(20 - splits[t]) - / static_cast(splits[t]); + double sparsestCut = static_cast(connections[t]) / static_cast(20 - splits[t]) / static_cast(splits[t]); LOG_DEBUG("sparsest = " << sparsestCut); quality.add(cost - sparsestCut); @@ -379,8 +320,7 @@ void CBootstrapClustererTest::testCutSearch() CPPUNIT_ASSERT(1.0 - maths::CBasicStatistics::mean(quality) > 0.98); } -void CBootstrapClustererTest::testSeparate() -{ +void CBootstrapClustererTest::testSeparate() { LOG_DEBUG("+-----------------------------------------+"); LOG_DEBUG("| CBootstrapClustererTest::testSeparate |"); LOG_DEBUG("+-----------------------------------------+"); @@ -400,29 +340,28 @@ void CBootstrapClustererTest::testSeparate() std::size_t errors = 0; TMeanAccumulator quality; - for (std::size_t t = 0u; t < trials; ++t) - { + for (std::size_t t = 0u; t < trials; ++t) { std::size_t v = 40u; TGraph graph(v); - std::size_t k[] = { splits1[t], splits2[t] }; - clique(0, k[0], graph); + std::size_t k[] = {splits1[t], splits2[t]}; + clique(0, k[0], graph); clique(k[0], k[1], graph); - clique(k[1], v, graph); + clique(k[1], v, graph); TSizeVec S, T, U, V; - rng.generateUniformSamples( 0, k[0], connections[2*t ], S); - rng.generateUniformSamples(k[0], k[1], connections[2*t ], T); - rng.generateUniformSamples(k[0], k[1], connections[2*t + 1], U); - rng.generateUniformSamples(k[1], v, connections[2*t + 1], V); + rng.generateUniformSamples(0, k[0], connections[2 * t], S); + rng.generateUniformSamples(k[0], k[1], connections[2 * t], T); + rng.generateUniformSamples(k[0], k[1], connections[2 * t + 1], U); + rng.generateUniformSamples(k[1], v, connections[2 * t + 1], V); connect(S, T, graph); connect(U, V, graph); std::size_t e = boost::num_edges(graph); LOG_DEBUG("split = " << splits1[t] << ":" << splits2[t] << ":" << v - splits2[t]); - LOG_DEBUG("# connections = " << connections[2*t] << " " << connections[2*t + 1]); + LOG_DEBUG("# connections = " << connections[2 * t] << " " << connections[2 * t + 1]); TBootstrapClustererForTest2 clusterer(0.3, 3.0); @@ -430,35 +369,27 @@ void CBootstrapClustererTest::testSeparate() bool separable = clusterer.separate(graph, parities); LOG_DEBUG("parities = " << core::CContainerPrinter::print(parities)); - double a = 0.0; - double b = 0.0; + double a = 0.0; + double b = 0.0; double cut = 0.0; - for (std::size_t i = 0u; i < v; ++i) - { + for (std::size_t i = 0u; i < v; ++i) { (parities[i] ? a : b) += 1.0; } TEdgeItr i, end; - for (boost::tie(i, end) = boost::edges(graph); i != end; ++i) - { - if (parities[boost::source(*i, graph)] != parities[boost::target(*i, graph)]) - { + for (boost::tie(i, end) = boost::edges(graph); i != end; ++i) { + if (parities[boost::source(*i, graph)] != parities[boost::target(*i, graph)]) { cut += 1.0; } } LOG_DEBUG("cost = " << cut / (a * b)) - double sparsestCut = std::min( static_cast(connections[2*t]) - / static_cast(k[0]) - / static_cast(v - k[0]), - static_cast(connections[2*t + 1]) - / static_cast(k[1]) - / static_cast(v - k[1])); + double sparsestCut = + std::min(static_cast(connections[2 * t]) / static_cast(k[0]) / static_cast(v - k[0]), + static_cast(connections[2 * t + 1]) / static_cast(k[1]) / static_cast(v - k[1])); - double threshold = 0.1 * static_cast(2 * e) - / static_cast(v * (v - 1)); + double threshold = 0.1 * static_cast(2 * e) / static_cast(v * (v - 1)); - LOG_DEBUG("sparsest = " << sparsestCut - << " need " << threshold << " to separate"); + LOG_DEBUG("sparsest = " << sparsestCut << " need " << threshold << " to separate"); errors += static_cast((sparsestCut < threshold) != separable); quality.add(cut / (a * b) - sparsestCut); @@ -470,8 +401,7 @@ void CBootstrapClustererTest::testSeparate() CPPUNIT_ASSERT(1.0 - maths::CBasicStatistics::mean(quality) > 0.99); } -void CBootstrapClustererTest::testThickets() -{ +void CBootstrapClustererTest::testThickets() { LOG_DEBUG("+-----------------------------------------+"); LOG_DEBUG("| CBootstrapClustererTest::testThickets |"); LOG_DEBUG("+-----------------------------------------+"); @@ -493,28 +423,30 @@ void CBootstrapClustererTest::testThickets() int error = 0; TMeanAccumulator meanJaccard; - for (std::size_t t = 0u; t < trials; ++t) - { + for (std::size_t t = 0u; t < trials; ++t) { std::size_t v = 40u; TGraph graph(v); - std::size_t k[] = { splits1[t], splits2[t] }; - clique(0, k[0], graph); + std::size_t k[] = {splits1[t], splits2[t]}; + clique(0, k[0], graph); clique(k[0], k[1], graph); - clique(k[1], v, graph); + clique(k[1], v, graph); TSizeVecVec expectedClusters(3); - for (std::size_t i = 0u; i < v; ++i) - { - if (i < k[0]) { expectedClusters[0].push_back(i); } - else if (i < k[1]) { expectedClusters[1].push_back(i); } - else { expectedClusters[2].push_back(i); } + for (std::size_t i = 0u; i < v; ++i) { + if (i < k[0]) { + expectedClusters[0].push_back(i); + } else if (i < k[1]) { + expectedClusters[1].push_back(i); + } else { + expectedClusters[2].push_back(i); + } } std::sort(expectedClusters.begin(), expectedClusters.end()); TSizeVec U, V; - rng.generateUniformSamples( 0, k[0], connections[t], U); + rng.generateUniformSamples(0, k[0], connections[t], U); rng.generateUniformSamples(k[0], k[1], connections[t], V); connect(U, V, graph); @@ -530,21 +462,16 @@ void CBootstrapClustererTest::testThickets() LOG_DEBUG("components = " << core::CContainerPrinter::print(components)); error += std::abs(3 - static_cast(c)); - if (c == 3) - { + if (c == 3) { TSizeVecVec clusters(3); - for (std::size_t i = 0; i < v; ++i) - { + for (std::size_t i = 0; i < v; ++i) { clusters[components[i]].push_back(i); } std::sort(clusters.begin(), clusters.end()); - for (std::size_t i = 0u; i < 3; ++i) - { - double jaccard = maths::CSetTools::jaccard(expectedClusters[i].begin(), - expectedClusters[i].end(), - clusters[i].begin(), - clusters[i].end()); + for (std::size_t i = 0u; i < 3; ++i) { + double jaccard = maths::CSetTools::jaccard( + expectedClusters[i].begin(), expectedClusters[i].end(), clusters[i].begin(), clusters[i].end()); CPPUNIT_ASSERT(jaccard > 0.8); meanJaccard.add(jaccard); } @@ -557,8 +484,7 @@ void CBootstrapClustererTest::testThickets() CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanJaccard) > 0.99); } -void CBootstrapClustererTest::testNonConvexClustering() -{ +void CBootstrapClustererTest::testNonConvexClustering() { LOG_DEBUG("+----------------------------------------------------+"); LOG_DEBUG("| CBootstrapClustererTest::testNonConvexClustering |"); LOG_DEBUG("+----------------------------------------------------+"); @@ -573,110 +499,33 @@ void CBootstrapClustererTest::testNonConvexClustering() // has mean equal to half a sine wave which poses problems for // x-means. - double x[][2] = - { - { 2.00000, 1.99667 }, // Cluster 1 - { 4.00000, 3.97339 }, - { 6.00000, 5.91040 }, - { 8.00000, 7.78837 }, - { 10.00000, 9.58851 }, - { 12.00000, 11.29285 }, - { 14.00000, 12.88435 }, - { 16.00000, 14.34712 }, - { 18.00000, 15.66654 }, - { 20.00000, 16.82942 }, - { 22.00000, 17.82415 }, - { 24.00000, 18.64078 }, - { 26.00000, 19.27116 }, - { 28.00000, 19.70899 }, - { 30.00000, 19.94990 }, - { 32.00000, 19.99147 }, - { 34.00000, 19.83330 }, - { 36.00000, 19.47695 }, - { 38.00000, 18.92600 }, - { 40.00000, 18.18595 }, - { 42.00000, 17.26419 }, - { 44.00000, 16.16993 }, - { 46.00000, 14.91410 }, - { 48.00000, 13.50926 }, - { 50.00000, 11.96944 }, - { 52.00000, 10.31003 }, - { 54.00000, 8.54760 }, - { 56.00000, 6.69976 }, - { 58.00000, 4.78499 }, - { 60.00000, 2.82240 }, - { 62.00000, 0.83161 }, - { 181.00000, 9.95004 }, // Cluster 2 - { 182.00000, 9.80067 }, - { 183.00000, 9.55336 }, - { 184.00000, 9.21061 }, - { 185.00000, 8.77583 }, - { 186.00000, 8.25336 }, - { 187.00000, 7.64842 }, - { 188.00000, 6.96707 }, - { 189.00000, 6.21610 }, - { 190.00000, 5.40302 }, - { 191.00000, 4.53596 }, - { 192.00000, 3.62358 }, - { 193.00000, 2.67499 }, - { 194.00000, 1.69967 }, - { 195.00000, 0.70737 }, - { 196.00000, -0.29200 }, - { 197.00000, -1.28844 }, - { 198.00000, -2.27202 }, - { 199.00000, -3.23290 }, - { 200.00000, -4.16147 }, - { 201.00000, -5.04846 }, - { 202.00000, -5.88501 }, - { 203.00000, -6.66276 }, - { 204.00000, -7.37394 }, - { 205.00000, -8.01144 }, - { 206.00000, -8.56889 }, - { 207.00000, -9.04072 }, - { 208.00000, -9.42222 }, - { 209.00000, -9.70958 }, - { 210.00000, -9.89992 }, - { 211.00000, -9.99135 }, - { 232.41593, -9.95004 }, // Cluster 3 - { 233.41593, -9.80067 }, - { 234.41593, -9.55336 }, - { 235.41593, -9.21061 }, - { 236.41593, -8.77583 }, - { 237.41593, -8.25336 }, - { 238.41593, -7.64842 }, - { 239.41593, -6.96707 }, - { 240.41593, -6.21610 }, - { 241.41593, -5.40302 }, - { 242.41593, -4.53596 }, - { 243.41593, -3.62358 }, - { 244.41593, -2.67499 }, - { 245.41593, -1.69967 }, - { 246.41593, -0.70737 }, - { 247.41593, 0.29200 }, - { 248.41593, 1.28844 }, - { 249.41593, 2.27202 }, - { 250.41593, 3.23290 }, - { 251.41593, 4.16147 }, - { 252.41593, 5.04846 }, - { 253.41593, 5.88501 }, - { 254.41593, 6.66276 }, - { 255.41593, 7.37394 }, - { 256.41593, 8.01144 }, - { 257.41593, 8.56889 }, - { 258.41593, 9.04072 }, - { 259.41593, 9.42222 }, - { 260.41593, 9.70958 }, - { 261.41593, 9.89992 }, - { 262.41593, 9.99135 } - }; - std::size_t clusters[] = { 0, 31, 62, boost::size(x) }; + double x[][2] = {{2.00000, 1.99667}, // Cluster 1 + {4.00000, 3.97339}, {6.00000, 5.91040}, {8.00000, 7.78837}, {10.00000, 9.58851}, {12.00000, 11.29285}, + {14.00000, 12.88435}, {16.00000, 14.34712}, {18.00000, 15.66654}, {20.00000, 16.82942}, {22.00000, 17.82415}, + {24.00000, 18.64078}, {26.00000, 19.27116}, {28.00000, 19.70899}, {30.00000, 19.94990}, {32.00000, 19.99147}, + {34.00000, 19.83330}, {36.00000, 19.47695}, {38.00000, 18.92600}, {40.00000, 18.18595}, {42.00000, 17.26419}, + {44.00000, 16.16993}, {46.00000, 14.91410}, {48.00000, 13.50926}, {50.00000, 11.96944}, {52.00000, 10.31003}, + {54.00000, 8.54760}, {56.00000, 6.69976}, {58.00000, 4.78499}, {60.00000, 2.82240}, {62.00000, 0.83161}, + {181.00000, 9.95004}, // Cluster 2 + {182.00000, 9.80067}, {183.00000, 9.55336}, {184.00000, 9.21061}, {185.00000, 8.77583}, {186.00000, 8.25336}, + {187.00000, 7.64842}, {188.00000, 6.96707}, {189.00000, 6.21610}, {190.00000, 5.40302}, {191.00000, 4.53596}, + {192.00000, 3.62358}, {193.00000, 2.67499}, {194.00000, 1.69967}, {195.00000, 0.70737}, {196.00000, -0.29200}, + {197.00000, -1.28844}, {198.00000, -2.27202}, {199.00000, -3.23290}, {200.00000, -4.16147}, {201.00000, -5.04846}, + {202.00000, -5.88501}, {203.00000, -6.66276}, {204.00000, -7.37394}, {205.00000, -8.01144}, {206.00000, -8.56889}, + {207.00000, -9.04072}, {208.00000, -9.42222}, {209.00000, -9.70958}, {210.00000, -9.89992}, {211.00000, -9.99135}, + {232.41593, -9.95004}, // Cluster 3 + {233.41593, -9.80067}, {234.41593, -9.55336}, {235.41593, -9.21061}, {236.41593, -8.77583}, {237.41593, -8.25336}, + {238.41593, -7.64842}, {239.41593, -6.96707}, {240.41593, -6.21610}, {241.41593, -5.40302}, {242.41593, -4.53596}, + {243.41593, -3.62358}, {244.41593, -2.67499}, {245.41593, -1.69967}, {246.41593, -0.70737}, {247.41593, 0.29200}, + {248.41593, 1.28844}, {249.41593, 2.27202}, {250.41593, 3.23290}, {251.41593, 4.16147}, {252.41593, 5.04846}, + {253.41593, 5.88501}, {254.41593, 6.66276}, {255.41593, 7.37394}, {256.41593, 8.01144}, {257.41593, 8.56889}, + {258.41593, 9.04072}, {259.41593, 9.42222}, {260.41593, 9.70958}, {261.41593, 9.89992}, {262.41593, 9.99135}}; + std::size_t clusters[] = {0, 31, 62, boost::size(x)}; TSizeVecVec perfect(3); - for (std::size_t i = 1u; i < boost::size(clusters); ++i) - { - for (std::size_t j = clusters[i-1]; j < clusters[i]; ++j) - { - perfect[i-1].push_back(j); + for (std::size_t i = 1u; i < boost::size(clusters); ++i) { + for (std::size_t j = clusters[i - 1]; j < clusters[i]; ++j) { + perfect[i - 1].push_back(j); } } TSizeVecVec bootstrap; @@ -694,18 +543,16 @@ void CBootstrapClustererTest::testNonConvexClustering() TVector2Vec flatPoints; TVector2SizeUMap lookup; TDoubleVec noise; - for (std::size_t t = 0u; t < 10; ++t) - { + for (std::size_t t = 0u; t < 10; ++t) { LOG_DEBUG("Trial " << t); flatPoints.clear(); lookup.clear(); rng.generateUniformSamples(0, 4.0, 2 * boost::size(x), noise); - for (std::size_t i = 0u; i < boost::size(x); ++i) - { + for (std::size_t i = 0u; i < boost::size(x); ++i) { TVector2 point(&x[i][0], &x[i][2]); - point(0) += noise[2*i]; - point(1) += noise[2*i + 1]; + point(0) += noise[2 * i]; + point(1) += noise[2 * i + 1]; flatPoints.push_back(point); lookup[point] = i; } @@ -713,7 +560,7 @@ void CBootstrapClustererTest::testNonConvexClustering() TVector2VecVec bootstrapClusters; maths::bootstrapCluster(flatPoints, - 20, // trials + 20, // trials xmeans, improveParamsKmeansIterations, improveStructureClusterSeeds, @@ -723,11 +570,9 @@ void CBootstrapClustererTest::testNonConvexClustering() bootstrapClusters); bootstrap.resize(bootstrapClusters.size()); - for (std::size_t i = 0u; i < bootstrapClusters.size(); ++i) - { + for (std::size_t i = 0u; i < bootstrapClusters.size(); ++i) { bootstrap[i].clear(); - for (std::size_t j = 0u; j < bootstrapClusters[i].size(); ++j) - { + for (std::size_t j = 0u; j < bootstrapClusters[i].size(); ++j) { auto k = lookup.find(bootstrapClusters[i][j]); CPPUNIT_ASSERT(k != lookup.end()); bootstrap[i].push_back(k->second); @@ -735,35 +580,26 @@ void CBootstrapClustererTest::testNonConvexClustering() std::sort(bootstrap[i].begin(), bootstrap[i].end()); } TDoubleVec jaccard; - for (std::size_t i = 0u; i < perfect.size(); ++i) - { + for (std::size_t i = 0u; i < perfect.size(); ++i) { double jmax = 0.0; - for (std::size_t j = 0u; j < bootstrap.size(); ++j) - { - jmax = std::max(jmax, maths::CSetTools::jaccard(bootstrap[j].begin(), - bootstrap[j].end(), - perfect[i].begin(), - perfect[i].end())); + for (std::size_t j = 0u; j < bootstrap.size(); ++j) { + jmax = std::max(jmax, + maths::CSetTools::jaccard(bootstrap[j].begin(), bootstrap[j].end(), perfect[i].begin(), perfect[i].end())); } jaccard.push_back(jmax); } - LOG_DEBUG("# clusters bootstrap = " << bootstrap.size() - << ", Jaccard bootstrap = " << core::CContainerPrinter::print(jaccard)); + LOG_DEBUG("# clusters bootstrap = " << bootstrap.size() << ", Jaccard bootstrap = " << core::CContainerPrinter::print(jaccard)); numberClustersBootstrap.add(static_cast(bootstrap.size())); jaccardBootstrapToPerfect.add(jaccard); TVector2Vec flatPoints_(flatPoints); xmeans.setPoints(flatPoints_); - xmeans.run(improveParamsKmeansIterations, - improveStructureClusterSeeds, - improveStructureKmeansIterations); + xmeans.run(improveParamsKmeansIterations, improveStructureClusterSeeds, improveStructureKmeansIterations); vanilla.resize(xmeans.clusters().size()); - for (std::size_t i = 0u; i < xmeans.clusters().size(); ++i) - { + for (std::size_t i = 0u; i < xmeans.clusters().size(); ++i) { vanilla[i].clear(); - for (std::size_t j = 0u; j < xmeans.clusters()[i].points().size(); ++j) - { + for (std::size_t j = 0u; j < xmeans.clusters()[i].points().size(); ++j) { auto k = lookup.find(xmeans.clusters()[i].points()[j]); CPPUNIT_ASSERT(k != lookup.end()); vanilla[i].push_back(k->second); @@ -771,20 +607,15 @@ void CBootstrapClustererTest::testNonConvexClustering() std::sort(vanilla[i].begin(), vanilla[i].end()); } jaccard.clear(); - for (std::size_t i = 0u; i < perfect.size(); ++i) - { + for (std::size_t i = 0u; i < perfect.size(); ++i) { double jmax = 0.0; - for (std::size_t j = 0u; j < vanilla.size(); ++j) - { - jmax = std::max(jmax, maths::CSetTools::jaccard(vanilla[j].begin(), - vanilla[j].end(), - perfect[i].begin(), - perfect[i].end())); + for (std::size_t j = 0u; j < vanilla.size(); ++j) { + jmax = + std::max(jmax, maths::CSetTools::jaccard(vanilla[j].begin(), vanilla[j].end(), perfect[i].begin(), perfect[i].end())); } jaccard.push_back(jmax); } - LOG_DEBUG("# clusters vanilla = " << vanilla.size() - << ", Jaccard vanilla = " << core::CContainerPrinter::print(jaccard)); + LOG_DEBUG("# clusters vanilla = " << vanilla.size() << ", Jaccard vanilla = " << core::CContainerPrinter::print(jaccard)); numberClustersVanilla.add(static_cast(vanilla.size())); jaccardVanillaToPerfect.add(jaccard); } @@ -796,12 +627,10 @@ void CBootstrapClustererTest::testNonConvexClustering() CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, maths::CBasicStatistics::mean(jaccardBootstrapToPerfect), 0.1); CPPUNIT_ASSERT_DOUBLES_EQUAL(3.0, maths::CBasicStatistics::mean(numberClustersBootstrap), 0.6); - CPPUNIT_ASSERT( maths::CBasicStatistics::mean(jaccardBootstrapToPerfect) - > maths::CBasicStatistics::mean(jaccardVanillaToPerfect)); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(jaccardBootstrapToPerfect) > maths::CBasicStatistics::mean(jaccardVanillaToPerfect)); } -void CBootstrapClustererTest::testClusteringStability() -{ +void CBootstrapClustererTest::testClusteringStability() { LOG_DEBUG("+----------------------------------------------------+"); LOG_DEBUG("| CBootstrapClustererTest::testClusteringStability |"); LOG_DEBUG("+----------------------------------------------------+"); @@ -812,26 +641,24 @@ void CBootstrapClustererTest::testClusteringStability() test::CRandomNumbers rng; - double m1_[] = { 2.0, 2.0 }; - double v1_[] = { 4.0, 2.0, 4.0 }; + double m1_[] = {2.0, 2.0}; + double v1_[] = {4.0, 2.0, 4.0}; TVector2 m1(&m1_[0], &m1_[2]); TMatrix2 v1(&v1_[0], &v1_[3]); TVector2Vec points1; maths::CSampling::multivariateNormalSample(m1, v1, 50, points1); - double m2_[] = { 10.0, 5.0 }; - double v2_[] = { 4.0, 0.0, 1.0 }; + double m2_[] = {10.0, 5.0}; + double v2_[] = {4.0, 0.0, 1.0}; TVector2 m2(&m2_[0], &m2_[2]); TMatrix2 v2(&v2_[0], &v2_[3]); TVector2Vec points2; maths::CSampling::multivariateNormalSample(m2, v2, 50, points2); TSizeVecVec perfect(2); - for (std::size_t i = 0u; i < points1.size(); ++i) - { + for (std::size_t i = 0u; i < points1.size(); ++i) { perfect[0].push_back(i); } - for (std::size_t i = 0u; i < points2.size(); ++i) - { + for (std::size_t i = 0u; i < points2.size(); ++i) { perfect[1].push_back(points1.size() + i); } @@ -842,30 +669,25 @@ void CBootstrapClustererTest::testClusteringStability() points.insert(points.end(), points2.begin(), points2.end()); TVector2SizeUMap lookup; - for (std::size_t i = 0u; i < points.size(); ++i) - { + for (std::size_t i = 0u; i < points.size(); ++i) { lookup[points[i]] = i; } TSizeVecVec clusterCounts(perfect.size(), TSizeVec(points.size(), 0)); - for (std::size_t t = 0u; t < 10; ++t) - { + for (std::size_t t = 0u; t < 10; ++t) { LOG_DEBUG("Trial " << t); rng.random_shuffle(points1.begin(), points1.end()); rng.random_shuffle(points2.begin(), points2.end()); - points.assign(points1.begin(), - points1.begin() + (3 * points1.size()) / 4); - points.insert(points.end(), - points2.begin(), - points2.begin() + (3 * points2.size()) / 4); + points.assign(points1.begin(), points1.begin() + (3 * points1.size()) / 4); + points.insert(points.end(), points2.begin(), points2.begin() + (3 * points2.size()) / 4); TVector2VecVec bootstrapClusters; maths::CXMeans> xmeans(20); maths::bootstrapCluster(points, - 20, // trials + 20, // trials xmeans, 4, // improve params 2, // improve structure seeds @@ -875,14 +697,11 @@ void CBootstrapClustererTest::testClusteringStability() bootstrapClusters); LOG_DEBUG("# clusters = " << bootstrapClusters.size()); - if (bootstrapClusters.size() > 1) - { + if (bootstrapClusters.size() > 1) { bootstrap.resize(bootstrapClusters.size()); - for (std::size_t i = 0u; i < bootstrapClusters.size(); ++i) - { + for (std::size_t i = 0u; i < bootstrapClusters.size(); ++i) { bootstrap[i].clear(); - for (std::size_t j = 0u; j < bootstrapClusters[i].size(); ++j) - { + for (std::size_t j = 0u; j < bootstrapClusters[i].size(); ++j) { auto k = lookup.find(bootstrapClusters[i][j]); CPPUNIT_ASSERT(k != lookup.end()); bootstrap[i].push_back(k->second); @@ -891,21 +710,14 @@ void CBootstrapClustererTest::testClusteringStability() } LOG_DEBUG("clusters = " << core::CContainerPrinter::print(bootstrap)); - for (std::size_t i = 0u; i < bootstrap.size(); ++i) - { + for (std::size_t i = 0u; i < bootstrap.size(); ++i) { double Jmax = 0.0; std::size_t cluster = 0; - for (std::size_t j = 0u; j < perfect.size(); ++j) - { - double J = maths::CSetTools::jaccard(bootstrap[i].begin(), - bootstrap[i].end(), - perfect[j].begin(), - perfect[j].end()); - boost::tie(Jmax, cluster) = std::max(std::make_pair(Jmax, cluster), - std::make_pair(J, j)); + for (std::size_t j = 0u; j < perfect.size(); ++j) { + double J = maths::CSetTools::jaccard(bootstrap[i].begin(), bootstrap[i].end(), perfect[j].begin(), perfect[j].end()); + boost::tie(Jmax, cluster) = std::max(std::make_pair(Jmax, cluster), std::make_pair(J, j)); } - for (std::size_t j = 0u; j < bootstrap[i].size(); ++j) - { + for (std::size_t j = 0u; j < bootstrap[i].size(); ++j) { ++clusterCounts[cluster][bootstrap[i][j]]; } } @@ -913,18 +725,15 @@ void CBootstrapClustererTest::testClusteringStability() } TDoubleVec consistency(points.size(), 1.0); - for (std::size_t i = 0u; i < points.size(); ++i) - { + for (std::size_t i = 0u; i < points.size(); ++i) { double c0 = static_cast(clusterCounts[0][i]); double c1 = static_cast(clusterCounts[1][i]); - if (c0 > 0.0 || c1 > 0.0) - { + if (c0 > 0.0 || c1 > 0.0) { consistency[i] = (std::max(c0, c1) - std::min(c0, c1)) / (c0 + c1); } } - LOG_DEBUG("consistency = " - << core::CContainerPrinter::print(consistency)); + LOG_DEBUG("consistency = " << core::CContainerPrinter::print(consistency)); TMeanAccumulator meanConsistency; meanConsistency.add(consistency); @@ -932,31 +741,23 @@ void CBootstrapClustererTest::testClusteringStability() CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanConsistency) > 0.95); } -CppUnit::Test *CBootstrapClustererTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CBootstrapClustererTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CBootstrapClustererTest::testFacade", - &CBootstrapClustererTest::testFacade) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CBootstrapClustererTest::testBuildClusterGraph", - &CBootstrapClustererTest::testBuildClusterGraph) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CBootstrapClustererTest::testCutSearch", - &CBootstrapClustererTest::testCutSearch) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CBootstrapClustererTest::testSeparate", - &CBootstrapClustererTest::testSeparate) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CBootstrapClustererTest::testThickets", - &CBootstrapClustererTest::testThickets) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CBootstrapClustererTest::testNonConvexClustering", - &CBootstrapClustererTest::testNonConvexClustering) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CBootstrapClustererTest::testClusteringStability", - &CBootstrapClustererTest::testClusteringStability) ); +CppUnit::Test* CBootstrapClustererTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CBootstrapClustererTest"); + + suiteOfTests->addTest( + new CppUnit::TestCaller("CBootstrapClustererTest::testFacade", &CBootstrapClustererTest::testFacade)); + suiteOfTests->addTest(new CppUnit::TestCaller("CBootstrapClustererTest::testBuildClusterGraph", + &CBootstrapClustererTest::testBuildClusterGraph)); + suiteOfTests->addTest(new CppUnit::TestCaller("CBootstrapClustererTest::testCutSearch", + &CBootstrapClustererTest::testCutSearch)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CBootstrapClustererTest::testSeparate", &CBootstrapClustererTest::testSeparate)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CBootstrapClustererTest::testThickets", &CBootstrapClustererTest::testThickets)); + suiteOfTests->addTest(new CppUnit::TestCaller("CBootstrapClustererTest::testNonConvexClustering", + &CBootstrapClustererTest::testNonConvexClustering)); + suiteOfTests->addTest(new CppUnit::TestCaller("CBootstrapClustererTest::testClusteringStability", + &CBootstrapClustererTest::testClusteringStability)); return suiteOfTests; } diff --git a/lib/maths/unittest/CBootstrapClustererTest.h b/lib/maths/unittest/CBootstrapClustererTest.h index d448b36c7b..4030893141 100644 --- a/lib/maths/unittest/CBootstrapClustererTest.h +++ b/lib/maths/unittest/CBootstrapClustererTest.h @@ -9,18 +9,17 @@ #include -class CBootstrapClustererTest : public CppUnit::TestFixture -{ - public: - void testFacade(); - void testBuildClusterGraph(); - void testCutSearch(); - void testSeparate(); - void testThickets(); - void testNonConvexClustering(); - void testClusteringStability(); +class CBootstrapClustererTest : public CppUnit::TestFixture { +public: + void testFacade(); + void testBuildClusterGraph(); + void testCutSearch(); + void testSeparate(); + void testThickets(); + void testNonConvexClustering(); + void testClusteringStability(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CBootstrapClustererTest_h diff --git a/lib/maths/unittest/CBoundingBoxTest.cc b/lib/maths/unittest/CBoundingBoxTest.cc index d14bc1a636..c4351c4720 100644 --- a/lib/maths/unittest/CBoundingBoxTest.cc +++ b/lib/maths/unittest/CBoundingBoxTest.cc @@ -22,59 +22,39 @@ using TVector4 = maths::CVectorNx1; using TBoundingBox2 = maths::CBoundingBox; using TBoundingBox4 = maths::CBoundingBox; -namespace -{ - -bool closerToX(const TBoundingBox2 &bb, - const TVector2 &x, - const TVector2 &y) -{ - TVector2 cc[] = { bb.blc(), bb.trc() }; - for (std::size_t c = 0u; c < 4; ++c) - { - double p[] = { cc[c / 2](0), cc[c % 2](1) }; +namespace { + +bool closerToX(const TBoundingBox2& bb, const TVector2& x, const TVector2& y) { + TVector2 cc[] = {bb.blc(), bb.trc()}; + for (std::size_t c = 0u; c < 4; ++c) { + double p[] = {cc[c / 2](0), cc[c % 2](1)}; TVector2 corner(p, p + 2); - if ((x - corner).euclidean() > (y - corner).euclidean()) - { + if ((x - corner).euclidean() > (y - corner).euclidean()) { return false; } } return true; } -bool closerToX(const TBoundingBox4 &bb, - const TVector4 &x, - const TVector4 &y) -{ - TVector4 cc[] = { bb.blc(), bb.trc() }; - for (std::size_t c = 0u; c < 16; ++c) - { - double p[] = { cc[c / 8](0), cc[(c / 4) % 2](1), cc[(c / 2) % 2](2), cc[c % 2](3) }; +bool closerToX(const TBoundingBox4& bb, const TVector4& x, const TVector4& y) { + TVector4 cc[] = {bb.blc(), bb.trc()}; + for (std::size_t c = 0u; c < 16; ++c) { + double p[] = {cc[c / 8](0), cc[(c / 4) % 2](1), cc[(c / 2) % 2](2), cc[c % 2](3)}; TVector4 corner(p, p + 4); - if ((x - corner).euclidean() > (y - corner).euclidean()) - { + if ((x - corner).euclidean() > (y - corner).euclidean()) { return false; } } return true; } - } -void CBoundingBoxTest::testAdd() -{ +void CBoundingBoxTest::testAdd() { LOG_DEBUG("+-----------------------------+"); LOG_DEBUG("| CBoundingBoxTest::testAdd |"); LOG_DEBUG("+-----------------------------+"); - double points[][2] = - { - { -1.0, 5.0 }, - { 2.0, 20.0 }, - { 10.0, 4.0 }, - { -10.0, -3.0 }, - { 200.0, 50.0 } - }; + double points[][2] = {{-1.0, 5.0}, {2.0, 20.0}, {10.0, 4.0}, {-10.0, -3.0}, {200.0, 50.0}}; TBoundingBox2 bb(TVector2(&points[0][0], &points[0][0] + 2)); CPPUNIT_ASSERT_EQUAL(-1.0, bb.blc()(0)); @@ -118,8 +98,7 @@ void CBoundingBoxTest::testAdd() CPPUNIT_ASSERT_EQUAL((-3.0 + 50.0) / 2.0, bb.centre()(1)); } -void CBoundingBoxTest::testCloserTo() -{ +void CBoundingBoxTest::testCloserTo() { LOG_DEBUG("+----------------------------------+"); LOG_DEBUG("| CBoundingBoxTest::testCloserTo |"); LOG_DEBUG("+----------------------------------+"); @@ -134,75 +113,58 @@ void CBoundingBoxTest::testCloserTo() TDoubleVec probes; rng.generateUniformSamples(-1000.0, 1000.0, 160, probes); - for (std::size_t i = 0u; i < n; i += 4) - { - TVector2 x1(&points[i ], &points[i + 2]); + for (std::size_t i = 0u; i < n; i += 4) { + TVector2 x1(&points[i], &points[i + 2]); TVector2 x2(&points[i + 2], &points[i + 4]); TBoundingBox2 bb(x1); bb.add(x2); - for (std::size_t j = 0u; j < probes.size(); j += 4) - { - TVector2 y1(&probes[j ], &probes[j + 2]); + for (std::size_t j = 0u; j < probes.size(); j += 4) { + TVector2 y1(&probes[j], &probes[j + 2]); TVector2 y2(&probes[j + 2], &probes[j + 4]); bool closer = closerToX(bb, y1, y2); - if (closer) - { - LOG_DEBUG("bb = " << bb.print() - << " is closer to " << y1 << " than " << y2); + if (closer) { + LOG_DEBUG("bb = " << bb.print() << " is closer to " << y1 << " than " << y2); } CPPUNIT_ASSERT_EQUAL(closer, bb.closerToX(y1, y2)); closer = closerToX(bb, y2, y1); - if (closer) - { - LOG_DEBUG("bb = " << bb.print() - << " is closer to " << y2 << " than " << y1); + if (closer) { + LOG_DEBUG("bb = " << bb.print() << " is closer to " << y2 << " than " << y1); } CPPUNIT_ASSERT_EQUAL(closer, bb.closerToX(y2, y1)); } } - for (std::size_t i = 0u; i < n; i += 8) - { - TVector4 x1(&points[i ], &points[i + 4]); + for (std::size_t i = 0u; i < n; i += 8) { + TVector4 x1(&points[i], &points[i + 4]); TVector4 x2(&points[i + 4], &points[i + 8]); TBoundingBox4 bb(x1); bb.add(x2); - for (std::size_t j = 0u; j < probes.size(); j += 4) - { - TVector4 y1(&probes[j ], &probes[j + 4]); + for (std::size_t j = 0u; j < probes.size(); j += 4) { + TVector4 y1(&probes[j], &probes[j + 4]); TVector4 y2(&probes[j + 4], &probes[j + 8]); bool closer = closerToX(bb, y1, y2); - if (closer) - { - LOG_DEBUG("bb = " << bb.print() - << " is closer to " << y1 << " than " << y2); + if (closer) { + LOG_DEBUG("bb = " << bb.print() << " is closer to " << y1 << " than " << y2); } CPPUNIT_ASSERT_EQUAL(closer, bb.closerToX(y1, y2)); closer = closerToX(bb, y2, y1); - if (closer) - { - LOG_DEBUG("bb = " << bb.print() - << " is closer to " << y2 << " than " << y1); + if (closer) { + LOG_DEBUG("bb = " << bb.print() << " is closer to " << y2 << " than " << y1); } CPPUNIT_ASSERT_EQUAL(closer, bb.closerToX(y2, y1)); } } } -CppUnit::Test *CBoundingBoxTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CBoundingBoxTest"); +CppUnit::Test* CBoundingBoxTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CBoundingBoxTest"); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CBoundingBoxTest::testAdd", - &CBoundingBoxTest::testAdd) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CBoundingBoxTest::testCloserTo", - &CBoundingBoxTest::testCloserTo) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CBoundingBoxTest::testAdd", &CBoundingBoxTest::testAdd)); + suiteOfTests->addTest(new CppUnit::TestCaller("CBoundingBoxTest::testCloserTo", &CBoundingBoxTest::testCloserTo)); return suiteOfTests; } diff --git a/lib/maths/unittest/CBoundingBoxTest.h b/lib/maths/unittest/CBoundingBoxTest.h index 0d70ac71a7..4275a1ae9e 100644 --- a/lib/maths/unittest/CBoundingBoxTest.h +++ b/lib/maths/unittest/CBoundingBoxTest.h @@ -9,13 +9,12 @@ #include -class CBoundingBoxTest : public CppUnit::TestFixture -{ - public: - void testAdd(); - void testCloserTo(); +class CBoundingBoxTest : public CppUnit::TestFixture { +public: + void testAdd(); + void testCloserTo(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CBoundingBostTest_h diff --git a/lib/maths/unittest/CCalendarComponentAdaptiveBucketingTest.cc b/lib/maths/unittest/CCalendarComponentAdaptiveBucketingTest.cc index 62e5f5dde8..0ee0060a64 100644 --- a/lib/maths/unittest/CCalendarComponentAdaptiveBucketingTest.cc +++ b/lib/maths/unittest/CCalendarComponentAdaptiveBucketingTest.cc @@ -6,15 +6,15 @@ #include "CCalendarComponentAdaptiveBucketingTest.h" #include -#include #include #include #include #include #include +#include -#include #include +#include #include @@ -22,8 +22,7 @@ using namespace ml; -namespace -{ +namespace { using TDoubleVec = std::vector; using TFloatVec = std::vector; using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; @@ -31,19 +30,16 @@ using TMinAccumulator = maths::CBasicStatistics::SMin::TAccumulator; using TMaxAccumulator = maths::CBasicStatistics::SMax::TAccumulator; } -void CCalendarComponentAdaptiveBucketingTest::setUp() -{ +void CCalendarComponentAdaptiveBucketingTest::setUp() { m_Timezone = core::CTimezone::instance().timezoneName(); core::CTimezone::instance().setTimezone("GMT"); } -void CCalendarComponentAdaptiveBucketingTest::tearDown() -{ +void CCalendarComponentAdaptiveBucketingTest::tearDown() { core::CTimezone::instance().setTimezone(m_Timezone); } -void CCalendarComponentAdaptiveBucketingTest::testInitialize() -{ +void CCalendarComponentAdaptiveBucketingTest::testInitialize() { LOG_DEBUG("+-----------------------------------------------------------+"); LOG_DEBUG("| CCalendarComponentAdaptiveBucketingTest::testInitialize |"); LOG_DEBUG("+-----------------------------------------------------------+"); @@ -55,14 +51,14 @@ void CCalendarComponentAdaptiveBucketingTest::testInitialize() const std::string expectedEndpoints{"[0, 7200, 14400, 21600, 28800, 36000, 43200, 50400, 57600, 64800, 72000, 79200, 86400]"}; const std::string expectedKnots{"[0, 3600, 10800, 18000, 25200, 32400, 39600, 46800, 54000, 61200, 68400, 75600, 82800, 86400]"}; - const std::string expectedValues{"[129600, 90000, 97200, 104400, 111600, 118800, 126000, 133200, 140400, 147600, 154800, 162000, 169200, 129600]"}; + const std::string expectedValues{ + "[129600, 90000, 97200, 104400, 111600, 118800, 126000, 133200, 140400, 147600, 154800, 162000, 169200, 129600]"}; CPPUNIT_ASSERT(bucketing.initialize(12)); - const TFloatVec &endpoints{bucketing.endpoints()}; + const TFloatVec& endpoints{bucketing.endpoints()}; CPPUNIT_ASSERT_EQUAL(expectedEndpoints, core::CContainerPrinter::print(endpoints)); - for (core_t::TTime t = 86400 + 3600; t < 172800; t += 7200) - { + for (core_t::TTime t = 86400 + 3600; t < 172800; t += 7200) { bucketing.add(t, static_cast(t)); } TDoubleVec knots; @@ -73,8 +69,7 @@ void CCalendarComponentAdaptiveBucketingTest::testInitialize() CPPUNIT_ASSERT_EQUAL(expectedValues, core::CContainerPrinter::print(values)); } -void CCalendarComponentAdaptiveBucketingTest::testSwap() -{ +void CCalendarComponentAdaptiveBucketingTest::testSwap() { LOG_DEBUG("+-----------------------------------------------------+"); LOG_DEBUG("| CCalendarComponentAdaptiveBucketingTest::testSwap |"); LOG_DEBUG("+-----------------------------------------------------+"); @@ -87,19 +82,15 @@ void CCalendarComponentAdaptiveBucketingTest::testSwap() test::CRandomNumbers rng; bucketing1.initialize(10); - for (std::size_t p = 0; p < 50; ++p) - { + for (std::size_t p = 0; p < 50; ++p) { TDoubleVec noise; rng.generateNormalSamples(0.0, 2.0, 100, noise); core_t::TTime start{now + static_cast(86400 * p)}; - for (std::size_t i = 0u; i < 100; ++i) - { + for (std::size_t i = 0u; i < 100; ++i) { core_t::TTime t{start + static_cast(864 * i)}; - if (bucketing1.feature().inWindow(t)) - { - double y{0.02 * (static_cast(i) - 50.0) - * (static_cast(i) - 50.0)}; + if (bucketing1.feature().inWindow(t)) { + double y{0.02 * (static_cast(i) - 50.0) * (static_cast(i) - 50.0)}; bucketing1.add(t, y + noise[i]); } } @@ -107,8 +98,7 @@ void CCalendarComponentAdaptiveBucketingTest::testSwap() bucketing1.propagateForwardsByTime(1.0); } - maths::CCalendarFeature feature2{maths::CCalendarFeature::DAYS_BEFORE_END_OF_MONTH, - now - core::constants::WEEK}; + maths::CCalendarFeature feature2{maths::CCalendarFeature::DAYS_BEFORE_END_OF_MONTH, now - core::constants::WEEK}; maths::CCalendarComponentAdaptiveBucketing bucketing2{feature2, 0.1}; uint64_t checksum1{bucketing1.checksum()}; @@ -123,19 +113,15 @@ void CCalendarComponentAdaptiveBucketingTest::testSwap() CPPUNIT_ASSERT_EQUAL(checksum2, bucketing1.checksum()); } -void CCalendarComponentAdaptiveBucketingTest::testRefine() -{ +void CCalendarComponentAdaptiveBucketingTest::testRefine() { LOG_DEBUG("+-------------------------------------------------------+"); LOG_DEBUG("| CCalendarComponentAdaptiveBucketingTest::testRefine |"); LOG_DEBUG("+-------------------------------------------------------+"); // Test that refine reduces the function approximation error. - core_t::TTime times[] = - { - -1, 3600, 10800, 18000, 25200, 32400, 39600, 46800, 54000, 61200, 68400, 75600, 82800, 86400 - }; - double function[] = { 10, 10, 10, 10, 100, 90, 80, 90, 100, 20, 10, 10, 10, 10 }; + core_t::TTime times[] = {-1, 3600, 10800, 18000, 25200, 32400, 39600, 46800, 54000, 61200, 68400, 75600, 82800, 86400}; + double function[] = {10, 10, 10, 10, 100, 90, 80, 90, 100, 20, 10, 10, 10, 10}; maths::CCalendarFeature feature{maths::CCalendarFeature::DAYS_SINCE_START_OF_MONTH, 0}; maths::CCalendarComponentAdaptiveBucketing bucketing1{feature}; @@ -147,15 +133,11 @@ void CCalendarComponentAdaptiveBucketingTest::testRefine() test::CRandomNumbers rng; bool lastInWindow{true}; - for (core_t::TTime t = 0; t < 31536000; t += 1800) - { + for (core_t::TTime t = 0; t < 31536000; t += 1800) { bool inWindow{bucketing1.feature().inWindow(t)}; - if (inWindow) - { + if (inWindow) { core_t::TTime x{bucketing1.feature().offset(t)}; - ptrdiff_t i{std::lower_bound(boost::begin(times), - boost::end(times), - x) - boost::begin(times)}; + ptrdiff_t i{std::lower_bound(boost::begin(times), boost::end(times), x) - boost::begin(times)}; double x0{static_cast(times[i - 1])}; double x1{static_cast(times[i])}; double y0{function[i - 1]}; @@ -165,9 +147,7 @@ void CCalendarComponentAdaptiveBucketingTest::testRefine() rng.generateNormalSamples(0.0, 4.0, 1, noise); bucketing1.add(t, y + noise[0]); bucketing2.add(t, y + noise[0]); - } - else if (lastInWindow && !inWindow) - { + } else if (lastInWindow && !inWindow) { bucketing2.refine(t); } lastInWindow = inWindow; @@ -175,15 +155,11 @@ void CCalendarComponentAdaptiveBucketingTest::testRefine() TMeanAccumulator meanError1; TMaxAccumulator maxError1; - const TFloatVec &endpoints1{bucketing1.endpoints()}; + const TFloatVec& endpoints1{bucketing1.endpoints()}; TDoubleVec values1{bucketing1.values(20 * 86400)}; - for (std::size_t i = 1; i < endpoints1.size(); ++i) - { - core_t::TTime t{static_cast( - 0.5 * (endpoints1[i] + endpoints1[i-1] + 1.0))}; - ptrdiff_t j{std::lower_bound(boost::begin(times), - boost::end(times), - t) - boost::begin(times)}; + for (std::size_t i = 1; i < endpoints1.size(); ++i) { + core_t::TTime t{static_cast(0.5 * (endpoints1[i] + endpoints1[i - 1] + 1.0))}; + ptrdiff_t j{std::lower_bound(boost::begin(times), boost::end(times), t) - boost::begin(times)}; double x0{static_cast(times[j - 1])}; double x1{static_cast(times[j])}; double y0{function[j - 1]}; @@ -195,15 +171,11 @@ void CCalendarComponentAdaptiveBucketingTest::testRefine() TMeanAccumulator meanError2; TMaxAccumulator maxError2; - const TFloatVec &endpoints2{bucketing2.endpoints()}; + const TFloatVec& endpoints2{bucketing2.endpoints()}; TDoubleVec values2{bucketing2.values(20 * 86400)}; - for (std::size_t i = 1; i < endpoints1.size(); ++i) - { - core_t::TTime t{static_cast( - 0.5 * (endpoints2[i] + endpoints2[i-1] + 1.0))}; - ptrdiff_t j{std::lower_bound(boost::begin(times), - boost::end(times), - t) - boost::begin(times)}; + for (std::size_t i = 1; i < endpoints1.size(); ++i) { + core_t::TTime t{static_cast(0.5 * (endpoints2[i] + endpoints2[i - 1] + 1.0))}; + ptrdiff_t j{std::lower_bound(boost::begin(times), boost::end(times), t) - boost::begin(times)}; double x0{static_cast(times[j - 1])}; double x1{static_cast(times[j])}; double y0{function[j - 1]}; @@ -221,8 +193,7 @@ void CCalendarComponentAdaptiveBucketingTest::testRefine() CPPUNIT_ASSERT(maxError2[0] < 0.65 * maxError1[0]); } -void CCalendarComponentAdaptiveBucketingTest::testPropagateForwardsByTime() -{ +void CCalendarComponentAdaptiveBucketingTest::testPropagateForwardsByTime() { LOG_DEBUG("+------------------------------------------------------------------------+"); LOG_DEBUG("| CCalendarComponentAdaptiveBucketingTest::testPropagateForwardsByTime |"); LOG_DEBUG("+------------------------------------------------------------------------+"); @@ -235,40 +206,31 @@ void CCalendarComponentAdaptiveBucketingTest::testPropagateForwardsByTime() maths::CCalendarComponentAdaptiveBucketing bucketing{feature, 0.2}; bucketing.initialize(10); - for (core_t::TTime t = 0; t < 86400; t += 1800) - { - double y = 10.0 * (static_cast(t) - 43200.0) / 43200.0 - * (static_cast(t) - 43200.0) / 43200.0; + for (core_t::TTime t = 0; t < 86400; t += 1800) { + double y = 10.0 * (static_cast(t) - 43200.0) / 43200.0 * (static_cast(t) - 43200.0) / 43200.0; bucketing.add(t, y); } bucketing.refine(86400); bucketing.propagateForwardsByTime(1.0); double lastCount = bucketing.count(); - for (std::size_t i = 0u; i < 20; ++i) - { + for (std::size_t i = 0u; i < 20; ++i) { bucketing.propagateForwardsByTime(1.0); double count = bucketing.count(); - LOG_DEBUG("count = " << count - << ", lastCount = " << lastCount - << " count/lastCount = " << count/lastCount); + LOG_DEBUG("count = " << count << ", lastCount = " << lastCount << " count/lastCount = " << count / lastCount); CPPUNIT_ASSERT(count < lastCount); - CPPUNIT_ASSERT_DOUBLES_EQUAL(0.81873, count/lastCount, 5e-6); + CPPUNIT_ASSERT_DOUBLES_EQUAL(0.81873, count / lastCount, 5e-6); lastCount = count; } } -void CCalendarComponentAdaptiveBucketingTest::testMinimumBucketLength() -{ +void CCalendarComponentAdaptiveBucketingTest::testMinimumBucketLength() { LOG_DEBUG("+--------------------------------------------------------------------+"); LOG_DEBUG("| CCalendarComponentAdaptiveBucketingTest::testMinimumBucketLength |"); LOG_DEBUG("+--------------------------------------------------------------------+"); - double function[] = - { - 0.0, 0.0, 10.0, 12.0, 11.0, 16.0, 15.0, 1.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 - }; + double function[] = {0.0, 0.0, 10.0, 12.0, 11.0, 16.0, 15.0, 1.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}; std::size_t n{boost::size(function)}; test::CRandomNumbers rng; @@ -279,15 +241,12 @@ void CCalendarComponentAdaptiveBucketingTest::testMinimumBucketLength() bucketing1.initialize(n); bucketing2.initialize(n); - for (std::size_t i = 0u; i < 10; ++i) - { - for (core_t::TTime t = 0; t < 86400; t += 3600) - { + for (std::size_t i = 0u; i < 10; ++i) { + for (core_t::TTime t = 0; t < 86400; t += 3600) { TDoubleVec values; rng.generateNormalSamples(function[t / 3600], 1.0, 6, values); - for (core_t::TTime dt = 0; dt < 3600; dt += 600) - { + for (core_t::TTime dt = 0; dt < 3600; dt += 600) { bucketing1.add(t + dt, values[dt / 600]); bucketing2.add(t + dt, values[dt / 600]); } @@ -295,18 +254,17 @@ void CCalendarComponentAdaptiveBucketingTest::testMinimumBucketLength() bucketing1.refine(86400); bucketing2.refine(86400); - const TFloatVec &endpoints1{bucketing1.endpoints()}; - const TFloatVec &endpoints2{bucketing2.endpoints()}; + const TFloatVec& endpoints1{bucketing1.endpoints()}; + const TFloatVec& endpoints2{bucketing2.endpoints()}; CPPUNIT_ASSERT_EQUAL(endpoints1.size(), endpoints2.size()); TMinAccumulator minimumBucketLength1; TMinAccumulator minimumBucketLength2; double minimumTotalError{0.0}; - for (std::size_t j = 1u; j < endpoints1.size(); ++j) - { - minimumBucketLength1.add(endpoints1[j] - endpoints1[j-1]); - minimumBucketLength2.add(endpoints2[j] - endpoints2[j-1]); - double minimumShift{std::max(1500.0 - (endpoints1[j] - endpoints1[j-1]), 0.0) / 2.0}; + for (std::size_t j = 1u; j < endpoints1.size(); ++j) { + minimumBucketLength1.add(endpoints1[j] - endpoints1[j - 1]); + minimumBucketLength2.add(endpoints2[j] - endpoints2[j - 1]); + double minimumShift{std::max(1500.0 - (endpoints1[j] - endpoints1[j - 1]), 0.0) / 2.0}; minimumTotalError += minimumShift; } LOG_DEBUG("minimumBucketLength1 = " << minimumBucketLength1); @@ -317,8 +275,7 @@ void CCalendarComponentAdaptiveBucketingTest::testMinimumBucketLength() LOG_DEBUG(core::CContainerPrinter::print(endpoints2)); double totalError{0.0}; - for (std::size_t j = 0u; j < endpoints1.size(); ++j) - { + for (std::size_t j = 0u; j < endpoints1.size(); ++j) { totalError += std::fabs(endpoints2[j] - endpoints1[j]); } @@ -328,8 +285,7 @@ void CCalendarComponentAdaptiveBucketingTest::testMinimumBucketLength() } } -void CCalendarComponentAdaptiveBucketingTest::testUnintialized() -{ +void CCalendarComponentAdaptiveBucketingTest::testUnintialized() { LOG_DEBUG("+-------------------------------------------------------------+"); LOG_DEBUG("| CCalendarComponentAdaptiveBucketingTest::testUnintialized |"); LOG_DEBUG("+-------------------------------------------------------------+"); @@ -360,8 +316,7 @@ void CCalendarComponentAdaptiveBucketingTest::testUnintialized() bucketing.initialize(10); CPPUNIT_ASSERT(bucketing.initialized()); - for (core_t::TTime t = 0; t < 86400; t += 8640) - { + for (core_t::TTime t = 0; t < 86400; t += 8640) { bucketing.add(t, static_cast(t * t)); } @@ -377,8 +332,7 @@ void CCalendarComponentAdaptiveBucketingTest::testUnintialized() CPPUNIT_ASSERT(bucketing.variances().empty()); } -void CCalendarComponentAdaptiveBucketingTest::testKnots() -{ +void CCalendarComponentAdaptiveBucketingTest::testKnots() { LOG_DEBUG("+------------------------------------------------------+"); LOG_DEBUG("| CCalendarComponentAdaptiveBucketingTest::testKnots |"); LOG_DEBUG("+------------------------------------------------------+"); @@ -395,10 +349,8 @@ void CCalendarComponentAdaptiveBucketingTest::testKnots() bucketing.initialize(24); - for (core_t::TTime t = 0; t < 86400; t += 600) - { - double y{0.0002 * (static_cast(t) - 43800.0) - * (static_cast(t) - 43800.0)}; + for (core_t::TTime t = 0; t < 86400; t += 600) { + double y{0.0002 * (static_cast(t) - 43800.0) * (static_cast(t) - 43800.0)}; TDoubleVec noise; rng.generateNormalSamples(0.0, 4.0, 1, noise); bucketing.add(t, y + noise[0]); @@ -414,10 +366,8 @@ void CCalendarComponentAdaptiveBucketingTest::testKnots() TMeanAccumulator meanError; TMeanAccumulator meanValue; - for (std::size_t i = 0u; i < knots.size(); ++i) - { - double expectedValue{0.0002 * (knots[i] - 43800.0) - * (knots[i] - 43800.0)}; + for (std::size_t i = 0u; i < knots.size(); ++i) { + double expectedValue{0.0002 * (knots[i] - 43800.0) * (knots[i] - 43800.0)}; LOG_DEBUG("expected = " << expectedValue << ", value = " << values[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedValue, values[i], 50000.0); meanError.add(std::fabs(values[i] - expectedValue)); @@ -425,8 +375,7 @@ void CCalendarComponentAdaptiveBucketingTest::testKnots() } LOG_DEBUG("meanError = " << maths::CBasicStatistics::mean(meanError)); LOG_DEBUG("meanValue = " << maths::CBasicStatistics::mean(meanValue)); - CPPUNIT_ASSERT( maths::CBasicStatistics::mean(meanError) - / maths::CBasicStatistics::mean(meanValue) < 0.02); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError) / maths::CBasicStatistics::mean(meanValue) < 0.02); } LOG_DEBUG("*** Variances ***"); @@ -436,19 +385,15 @@ void CCalendarComponentAdaptiveBucketingTest::testKnots() bucketing.initialize(24); bool lastInWindow{true}; - for (core_t::TTime t = 0; t < 15638400; t += 600) - { + for (core_t::TTime t = 0; t < 15638400; t += 600) { bool inWindow{bucketing.feature().inWindow(t)}; - if (inWindow) - { + if (inWindow) { double x = static_cast(bucketing.feature().offset(t)); double v{0.001 * (x - 43800.0) * (x - 43800.0) / 86400}; TDoubleVec noise; rng.generateNormalSamples(0.0, v, 1, noise); bucketing.add(t, noise[0]); - } - else if (lastInWindow && !inWindow) - { + } else if (lastInWindow && !inWindow) { bucketing.refine(t); } lastInWindow = inWindow; @@ -463,25 +408,20 @@ void CCalendarComponentAdaptiveBucketingTest::testKnots() TMeanAccumulator meanError; TMeanAccumulator meanVariance; - for (std::size_t i = 0u; i < knots.size(); ++i) - { - double expectedVariance{0.001 * (static_cast(knots[i]) - 43800.0) - * (static_cast(knots[i]) - 43800.0) / 86400}; - LOG_DEBUG("expected = " << expectedVariance - << ", variance = " << variances[i]); + for (std::size_t i = 0u; i < knots.size(); ++i) { + double expectedVariance{0.001 * (static_cast(knots[i]) - 43800.0) * (static_cast(knots[i]) - 43800.0) / 86400}; + LOG_DEBUG("expected = " << expectedVariance << ", variance = " << variances[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedVariance, variances[i], 5.0); meanError.add(std::fabs(variances[i] - expectedVariance)); meanVariance.add(std::fabs(expectedVariance)); } LOG_DEBUG("meanError = " << maths::CBasicStatistics::mean(meanError)); LOG_DEBUG("meanVariance = " << maths::CBasicStatistics::mean(meanVariance)); - CPPUNIT_ASSERT( maths::CBasicStatistics::mean(meanError) - / maths::CBasicStatistics::mean(meanVariance) < 0.16); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError) / maths::CBasicStatistics::mean(meanVariance) < 0.16); } } -void CCalendarComponentAdaptiveBucketingTest::testPersist() -{ +void CCalendarComponentAdaptiveBucketingTest::testPersist() { LOG_DEBUG("+--------------------------------------------------------+"); LOG_DEBUG("| CCalendarComponentAdaptiveBucketingTest::testPersist |"); LOG_DEBUG("+--------------------------------------------------------+"); @@ -495,15 +435,11 @@ void CCalendarComponentAdaptiveBucketingTest::testPersist() maths::CCalendarComponentAdaptiveBucketing bucketing{feature, decayRate, minimumBucketLength}; bucketing.initialize(10); - for (std::size_t p = 0; p < 10; ++p) - { - for (std::size_t i = 0u; i < 100; ++i) - { + for (std::size_t p = 0; p < 10; ++p) { + for (std::size_t i = 0u; i < 100; ++i) { core_t::TTime t{static_cast(p * 86400 + 864 * i)}; - if (bucketing.feature().inWindow(t)) - { - double y{0.02 * (static_cast(i) - 50.0) - * (static_cast(i) - 50.0)}; + if (bucketing.feature().inWindow(t)) { + double y{0.02 * (static_cast(i) - 50.0) * (static_cast(i) - 50.0)}; bucketing.add(t, y); } } @@ -526,12 +462,9 @@ void CCalendarComponentAdaptiveBucketingTest::testPersist() core::CRapidXmlStateRestoreTraverser traverser(parser); // Restore the XML into a new bucketing. - maths::CCalendarComponentAdaptiveBucketing restoredBucketing{decayRate + 0.1, - minimumBucketLength, - traverser}; + maths::CCalendarComponentAdaptiveBucketing restoredBucketing{decayRate + 0.1, minimumBucketLength, traverser}; - LOG_DEBUG("orig checksum = " << checksum - << " restored checksum = " << restoredBucketing.checksum()); + LOG_DEBUG("orig checksum = " << checksum << " restored checksum = " << restoredBucketing.checksum()); CPPUNIT_ASSERT_EQUAL(checksum, restoredBucketing.checksum()); // The XML representation of the new bucketing should be the @@ -545,34 +478,27 @@ void CCalendarComponentAdaptiveBucketingTest::testPersist() CPPUNIT_ASSERT_EQUAL(origXml, newXml); } -CppUnit::Test *CCalendarComponentAdaptiveBucketingTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CCalendarComponentAdaptiveBucketingTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCalendarComponentAdaptiveBucketingTest::testInitialize", - &CCalendarComponentAdaptiveBucketingTest::testInitialize) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCalendarComponentAdaptiveBucketingTest::testSwap", - &CCalendarComponentAdaptiveBucketingTest::testSwap) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCalendarComponentAdaptiveBucketingTest::testRefine", - &CCalendarComponentAdaptiveBucketingTest::testRefine) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCalendarComponentAdaptiveBucketingTest::testPropagateForwardsByTime", - &CCalendarComponentAdaptiveBucketingTest::testPropagateForwardsByTime) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCalendarComponentAdaptiveBucketingTest::testMinimumBucketLength", - &CCalendarComponentAdaptiveBucketingTest::testMinimumBucketLength) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCalendarComponentAdaptiveBucketingTest::testUnintialized", - &CCalendarComponentAdaptiveBucketingTest::testUnintialized) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCalendarComponentAdaptiveBucketingTest::testKnots", - &CCalendarComponentAdaptiveBucketingTest::testKnots) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCalendarComponentAdaptiveBucketingTest::testPersist", - &CCalendarComponentAdaptiveBucketingTest::testPersist) ); +CppUnit::Test* CCalendarComponentAdaptiveBucketingTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CCalendarComponentAdaptiveBucketingTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCalendarComponentAdaptiveBucketingTest::testInitialize", &CCalendarComponentAdaptiveBucketingTest::testInitialize)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCalendarComponentAdaptiveBucketingTest::testSwap", &CCalendarComponentAdaptiveBucketingTest::testSwap)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCalendarComponentAdaptiveBucketingTest::testRefine", &CCalendarComponentAdaptiveBucketingTest::testRefine)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCalendarComponentAdaptiveBucketingTest::testPropagateForwardsByTime", + &CCalendarComponentAdaptiveBucketingTest::testPropagateForwardsByTime)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCalendarComponentAdaptiveBucketingTest::testMinimumBucketLength", + &CCalendarComponentAdaptiveBucketingTest::testMinimumBucketLength)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCalendarComponentAdaptiveBucketingTest::testUnintialized", &CCalendarComponentAdaptiveBucketingTest::testUnintialized)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCalendarComponentAdaptiveBucketingTest::testKnots", &CCalendarComponentAdaptiveBucketingTest::testKnots)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCalendarComponentAdaptiveBucketingTest::testPersist", &CCalendarComponentAdaptiveBucketingTest::testPersist)); return suiteOfTests; } diff --git a/lib/maths/unittest/CCalendarComponentAdaptiveBucketingTest.h b/lib/maths/unittest/CCalendarComponentAdaptiveBucketingTest.h index 20d031e8e8..5887cfd0ba 100644 --- a/lib/maths/unittest/CCalendarComponentAdaptiveBucketingTest.h +++ b/lib/maths/unittest/CCalendarComponentAdaptiveBucketingTest.h @@ -8,25 +8,24 @@ #include -class CCalendarComponentAdaptiveBucketingTest : public CppUnit::TestFixture -{ - public: - void setUp(); - void tearDown(); +class CCalendarComponentAdaptiveBucketingTest : public CppUnit::TestFixture { +public: + void setUp(); + void tearDown(); - void testInitialize(); - void testSwap(); - void testRefine(); - void testPropagateForwardsByTime(); - void testMinimumBucketLength(); - void testUnintialized(); - void testKnots(); - void testPersist(); + void testInitialize(); + void testSwap(); + void testRefine(); + void testPropagateForwardsByTime(); + void testMinimumBucketLength(); + void testUnintialized(); + void testKnots(); + void testPersist(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); - private: - std::string m_Timezone; +private: + std::string m_Timezone; }; #endif // INCLUDED_CCalendarComponentAdaptiveBucketingTest_h diff --git a/lib/maths/unittest/CCalendarFeatureTest.cc b/lib/maths/unittest/CCalendarFeatureTest.cc index c525aff9e0..e828a9776e 100644 --- a/lib/maths/unittest/CCalendarFeatureTest.cc +++ b/lib/maths/unittest/CCalendarFeatureTest.cc @@ -5,11 +5,11 @@ */ #include "CCalendarFeatureTest.h" -#include #include -#include +#include #include #include +#include #include @@ -23,31 +23,24 @@ using TSizeVec = std::vector; using TTimeVec = std::vector; using TCalendarFeatureVec = std::vector; -namespace -{ +namespace { const core_t::TTime DAY = 86400; //! \brief Sets the timezone to GMT for the lifetime of the objct. -class CScopeGMT -{ - public: - CScopeGMT() - { - m_Timezone = core::CTimezone::instance().timezoneName(); - core::CTimezone::instance().timezoneName("GMT"); - } - ~CScopeGMT() - { - core::CTimezone::instance().timezoneName(m_Timezone); - } +class CScopeGMT { +public: + CScopeGMT() { + m_Timezone = core::CTimezone::instance().timezoneName(); + core::CTimezone::instance().timezoneName("GMT"); + } + ~CScopeGMT() { core::CTimezone::instance().timezoneName(m_Timezone); } - private: - std::string m_Timezone; +private: + std::string m_Timezone; }; } -void CCalendarFeatureTest::testInitialize() -{ +void CCalendarFeatureTest::testInitialize() { LOG_DEBUG("+----------------------------------------+"); LOG_DEBUG("| CCalendarFeatureTest::testInitialize |"); LOG_DEBUG("+----------------------------------------+"); @@ -59,8 +52,7 @@ void CCalendarFeatureTest::testInitialize() TSizeVec times; rng.generateUniformSamples(100000, 10000000, 10, times); - for (std::size_t i = 0u; i < times.size(); ++i) - { + for (std::size_t i = 0u; i < times.size(); ++i) { core_t::TTime time{static_cast(times[i])}; maths::CCalendarFeature::TCalendarFeature4Ary expected; @@ -75,8 +67,7 @@ void CCalendarFeatureTest::testInitialize() } } -void CCalendarFeatureTest::testComparison() -{ +void CCalendarFeatureTest::testComparison() { LOG_DEBUG("+----------------------------------------+"); LOG_DEBUG("| CCalendarFeatureTest::testComparison |"); LOG_DEBUG("+----------------------------------------+"); @@ -86,8 +77,7 @@ void CCalendarFeatureTest::testComparison() test::CRandomNumbers rng; TSizeVec times; - for (core_t::TTime time = 1; time < 31 * DAY; time += DAY) - { + for (core_t::TTime time = 1; time < 31 * DAY; time += DAY) { times.push_back(time); } @@ -95,8 +85,7 @@ void CCalendarFeatureTest::testComparison() TCalendarFeatureVec features; - for (std::size_t i = 0u; i < times.size(); ++i) - { + for (std::size_t i = 0u; i < times.size(); ++i) { core_t::TTime time{static_cast(times[i])}; maths::CCalendarFeature::TCalendarFeature4Ary fi = maths::CCalendarFeature::features(time); features.insert(features.end(), fi.begin(), fi.end()); @@ -105,20 +94,17 @@ void CCalendarFeatureTest::testComparison() std::sort(features.begin(), features.end()); features.erase(std::unique(features.begin(), features.end()), features.end()); - for (std::size_t i = 0u; i < features.size(); ++i) - { + for (std::size_t i = 0u; i < features.size(); ++i) { CPPUNIT_ASSERT(features[i] == features[i]); CPPUNIT_ASSERT(!(features[i] < features[i] || features[i] > features[i])); - for (std::size_t j = i+1; j < features.size(); ++j) - { + for (std::size_t j = i + 1; j < features.size(); ++j) { CPPUNIT_ASSERT(features[i] != features[j]); CPPUNIT_ASSERT(features[i] < features[j] || features[i] > features[j]); } } } -void CCalendarFeatureTest::testOffset() -{ +void CCalendarFeatureTest::testOffset() { LOG_DEBUG("+------------------------------------+"); LOG_DEBUG("| CCalendarFeatureTest::testOffset |"); LOG_DEBUG("+------------------------------------+"); @@ -139,8 +125,7 @@ void CCalendarFeatureTest::testOffset() std::size_t tests = 0u; - for (const auto &time_ : times) - { + for (const auto& time_ : times) { core_t::TTime time{start + static_cast(time_)}; maths::CCalendarFeature::TCalendarFeature4Ary features = maths::CCalendarFeature::features(time); @@ -150,19 +135,16 @@ void CCalendarFeatureTest::testOffset() TTimeVec offsets{-86400, -43400, -12800, -3600, 0, 3600, 12800, 43400, 86400}; - for (const auto &offset : offsets) - { + for (const auto& offset : offsets) { core_t::TTime offsetTime = time + offset; int offsetMonth; core::CTimezone::instance().dateFields(offsetTime, dummy, dummy, dummy, offsetMonth, dummy, dummy); - if (month == offsetMonth) - { - for (const auto &feature : features) - { - CPPUNIT_ASSERT( feature.offset(time) + offset == feature.offset(offsetTime) - || feature.offset(time) + offset == feature.offset(offsetTime) - 3600 - || feature.offset(time) + offset == feature.offset(offsetTime) + 3600); + if (month == offsetMonth) { + for (const auto& feature : features) { + CPPUNIT_ASSERT(feature.offset(time) + offset == feature.offset(offsetTime) || + feature.offset(time) + offset == feature.offset(offsetTime) - 3600 || + feature.offset(time) + offset == feature.offset(offsetTime) + 3600); ++tests; } } @@ -174,42 +156,37 @@ void CCalendarFeatureTest::testOffset() LOG_DEBUG("# tests = " << tests); CPPUNIT_ASSERT(tests > 30000); - core_t::TTime feb1st = 31 * DAY; + core_t::TTime feb1st = 31 * DAY; core_t::TTime march1st = feb1st + 28 * DAY; core_t::TTime april1st = march1st + 31 * DAY; - core_t::TTime may1st = april1st + 30 * DAY; + core_t::TTime may1st = april1st + 30 * DAY; LOG_DEBUG("Test days since start of month"); { maths::CCalendarFeature feature(maths::CCalendarFeature::DAYS_SINCE_START_OF_MONTH, feb1st); - for (core_t::TTime time = march1st; time < april1st; time += DAY) - { + for (core_t::TTime time = march1st; time < april1st; time += DAY) { CPPUNIT_ASSERT_EQUAL(time - march1st, feature.offset(time)); CPPUNIT_ASSERT_EQUAL(time - march1st + 4800, feature.offset(time + 4800)); } } { maths::CCalendarFeature feature(maths::CCalendarFeature::DAYS_SINCE_START_OF_MONTH, feb1st + 12 * DAY); - for (core_t::TTime time = march1st; time < april1st; time += DAY) - { + for (core_t::TTime time = march1st; time < april1st; time += DAY) { CPPUNIT_ASSERT_EQUAL(time - march1st - 12 * DAY, feature.offset(time)); CPPUNIT_ASSERT_EQUAL(time - march1st - 12 * DAY + 43400, feature.offset(time + 43400)); } } - LOG_DEBUG("Test days before end of month") - { + LOG_DEBUG("Test days before end of month") { maths::CCalendarFeature feature(maths::CCalendarFeature::DAYS_BEFORE_END_OF_MONTH, feb1st); - for (core_t::TTime time = march1st; time < april1st; time += DAY) - { + for (core_t::TTime time = march1st; time < april1st; time += DAY) { CPPUNIT_ASSERT_EQUAL(time - march1st - 3 * DAY, feature.offset(time)); CPPUNIT_ASSERT_EQUAL(time - march1st - 3 * DAY + 7200, feature.offset(time + 7200)); } } { maths::CCalendarFeature feature(maths::CCalendarFeature::DAYS_BEFORE_END_OF_MONTH, feb1st + 10 * DAY); - for (core_t::TTime time = march1st; time < april1st; time += DAY) - { + for (core_t::TTime time = march1st; time < april1st; time += DAY) { CPPUNIT_ASSERT_EQUAL(time - march1st - 13 * DAY, feature.offset(time)); CPPUNIT_ASSERT_EQUAL(time - march1st - 13 * DAY + 86399, feature.offset(time + 86399)); } @@ -219,8 +196,7 @@ void CCalendarFeatureTest::testOffset() { // Feb 1 1970 is a Sunday and April 1st 1970 is a Wednesday. maths::CCalendarFeature feature(maths::CCalendarFeature::DAY_OF_WEEK_AND_WEEKS_SINCE_START_OF_MONTH, feb1st); - for (core_t::TTime time = april1st; time < may1st; time += DAY) - { + for (core_t::TTime time = april1st; time < may1st; time += DAY) { CPPUNIT_ASSERT_EQUAL(time - april1st - 4 * DAY, feature.offset(time)); CPPUNIT_ASSERT_EQUAL(time - april1st - 4 * DAY + 7200, feature.offset(time + 7200)); } @@ -228,8 +204,7 @@ void CCalendarFeatureTest::testOffset() { // Feb 13 1970 is a Friday and April 1st 1970 is a Wednesday. maths::CCalendarFeature feature(maths::CCalendarFeature::DAY_OF_WEEK_AND_WEEKS_SINCE_START_OF_MONTH, feb1st + 12 * DAY); - for (core_t::TTime time = april1st; time < may1st; time += DAY) - { + for (core_t::TTime time = april1st; time < may1st; time += DAY) { CPPUNIT_ASSERT_EQUAL(time - april1st - 9 * DAY, feature.offset(time)); CPPUNIT_ASSERT_EQUAL(time - april1st - 9 * DAY + 73000, feature.offset(time + 73000)); } @@ -239,8 +214,7 @@ void CCalendarFeatureTest::testOffset() { // Feb 1 1970 is a Sunday and April 31st 1970 is a Friday. maths::CCalendarFeature feature(maths::CCalendarFeature::DAY_OF_WEEK_AND_WEEKS_BEFORE_END_OF_MONTH, feb1st); - for (core_t::TTime time = april1st; time < may1st; time += DAY) - { + for (core_t::TTime time = april1st; time < may1st; time += DAY) { CPPUNIT_ASSERT_EQUAL(time - april1st - 4 * DAY, feature.offset(time)); CPPUNIT_ASSERT_EQUAL(time - april1st - 4 * DAY + 7200, feature.offset(time + 7200)); } @@ -248,25 +222,21 @@ void CCalendarFeatureTest::testOffset() { // Feb 13 1970 is a Friday and April 1st 1970 is a Wednesday. maths::CCalendarFeature feature(maths::CCalendarFeature::DAY_OF_WEEK_AND_WEEKS_SINCE_START_OF_MONTH, feb1st + 12 * DAY); - for (core_t::TTime time = april1st; time < may1st; time += DAY) - { + for (core_t::TTime time = april1st; time < may1st; time += DAY) { CPPUNIT_ASSERT_EQUAL(time - april1st - 9 * DAY, feature.offset(time)); CPPUNIT_ASSERT_EQUAL(time - april1st - 9 * DAY + 73000, feature.offset(time + 73000)); } } } -void CCalendarFeatureTest::testPersist() -{ +void CCalendarFeatureTest::testPersist() { LOG_DEBUG("+-------------------------------------+"); LOG_DEBUG("| CCalendarFeatureTest::testPersist |"); LOG_DEBUG("+-------------------------------------+"); - maths::CCalendarFeature::TCalendarFeature4Ary features = - maths::CCalendarFeature::features(core::CTimeUtils::now()); + maths::CCalendarFeature::TCalendarFeature4Ary features = maths::CCalendarFeature::features(core::CTimeUtils::now()); - for (std::size_t i = 0u; i < 4; ++i) - { + for (std::size_t i = 0u; i < 4; ++i) { std::string state = features[i].toDelimited(); LOG_DEBUG("state = " << state); @@ -278,22 +248,17 @@ void CCalendarFeatureTest::testPersist() } } -CppUnit::Test *CCalendarFeatureTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CCalendarFeatureTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCalendarFeatureTest::testInitialize", - &CCalendarFeatureTest::testInitialize) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCalendarFeatureTest::testComparison", - &CCalendarFeatureTest::testComparison) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCalendarFeatureTest::testOffset", - &CCalendarFeatureTest::testOffset) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCalendarFeatureTest::testPersist", - &CCalendarFeatureTest::testPersist) ); +CppUnit::Test* CCalendarFeatureTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CCalendarFeatureTest"); + + suiteOfTests->addTest( + new CppUnit::TestCaller("CCalendarFeatureTest::testInitialize", &CCalendarFeatureTest::testInitialize)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CCalendarFeatureTest::testComparison", &CCalendarFeatureTest::testComparison)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CCalendarFeatureTest::testOffset", &CCalendarFeatureTest::testOffset)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CCalendarFeatureTest::testPersist", &CCalendarFeatureTest::testPersist)); return suiteOfTests; } diff --git a/lib/maths/unittest/CCalendarFeatureTest.h b/lib/maths/unittest/CCalendarFeatureTest.h index 6d45a8f297..d9fe32201e 100644 --- a/lib/maths/unittest/CCalendarFeatureTest.h +++ b/lib/maths/unittest/CCalendarFeatureTest.h @@ -8,15 +8,14 @@ #include -class CCalendarFeatureTest : public CppUnit::TestFixture -{ - public: - void testInitialize(); - void testComparison(); - void testOffset(); - void testPersist(); +class CCalendarFeatureTest : public CppUnit::TestFixture { +public: + void testInitialize(); + void testComparison(); + void testOffset(); + void testPersist(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CCalendarFeatureTest_h diff --git a/lib/maths/unittest/CCategoricalToolsTest.cc b/lib/maths/unittest/CCategoricalToolsTest.cc index 884e67728e..7a18743ab8 100644 --- a/lib/maths/unittest/CCategoricalToolsTest.cc +++ b/lib/maths/unittest/CCategoricalToolsTest.cc @@ -23,24 +23,19 @@ using TSizeVec = std::vector; using namespace ml; -void CCategoricalToolsTest::testProbabilityOfLessLikelyMultinomialSample() -{ +void CCategoricalToolsTest::testProbabilityOfLessLikelyMultinomialSample() { LOG_DEBUG("+-----------------------------------------------------------------------+"); LOG_DEBUG("| CCategoricalToolsTest::testProbabilityOfLessLikelyMultinomialSample |"); LOG_DEBUG("+-----------------------------------------------------------------------+"); - } -void CCategoricalToolsTest::testProbabilityOfLessLikelyCategoryCount() -{ +void CCategoricalToolsTest::testProbabilityOfLessLikelyCategoryCount() { LOG_DEBUG("+-------------------------------------------------------------------+"); LOG_DEBUG("| CCategoricalToolsTest::testProbabilityOfLessLikelyCategoryCount |"); LOG_DEBUG("+-------------------------------------------------------------------+"); - } -void CCategoricalToolsTest::testExpectedDistinctCategories() -{ +void CCategoricalToolsTest::testExpectedDistinctCategories() { LOG_DEBUG("+---------------------------------------------------------+"); LOG_DEBUG("| CCategoricalToolsTest::testExpectedDistinctCategories |"); LOG_DEBUG("+---------------------------------------------------------+"); @@ -53,18 +48,15 @@ void CCategoricalToolsTest::testExpectedDistinctCategories() test::CRandomNumbers rng; { - double categories[] = { 1.0, 2.0, 3.0, 4.0, 5.0 }; + double categories[] = {1.0, 2.0, 3.0, 4.0, 5.0}; { - double probabilities[] = { 0.2, 0.2, 0.2, 0.2, 0.2 }; + double probabilities[] = {0.2, 0.2, 0.2, 0.2, 0.2}; TMeanVarAccumulator expectedDistinctCategories; - for (std::size_t i = 0u; i < nTrials; ++i) - { + for (std::size_t i = 0u; i < nTrials; ++i) { TDoubleVec samples; - rng.generateMultinomialSamples(TDoubleVec(boost::begin(categories), - boost::end(categories)), - TDoubleVec(boost::begin(probabilities), - boost::end(probabilities)), + rng.generateMultinomialSamples(TDoubleVec(boost::begin(categories), boost::end(categories)), + TDoubleVec(boost::begin(probabilities), boost::end(probabilities)), boost::size(probabilities), samples); std::sort(samples.begin(), samples.end()); @@ -73,34 +65,29 @@ void CCategoricalToolsTest::testExpectedDistinctCategories() } LOG_DEBUG("probabilities = " << core::CContainerPrinter::print(probabilities)); LOG_DEBUG("expectedDistinctCategories = " - << maths::CBasicStatistics::mean(expectedDistinctCategories) - << " (deviation = " << std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) - / static_cast(nTrials)) << ")"); + << maths::CBasicStatistics::mean(expectedDistinctCategories) << " (deviation = " + << std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / static_cast(nTrials)) << ")"); double distinctCategories; - maths::CCategoricalTools::expectedDistinctCategories(TDoubleVec(boost::begin(probabilities), - boost::end(probabilities)), + maths::CCategoricalTools::expectedDistinctCategories(TDoubleVec(boost::begin(probabilities), boost::end(probabilities)), static_cast(boost::size(probabilities)), distinctCategories); LOG_DEBUG("distinctCategories = " << distinctCategories); - CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(expectedDistinctCategories), - distinctCategories, - 2.0 * std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) - / static_cast(nTrials))); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + maths::CBasicStatistics::mean(expectedDistinctCategories), + distinctCategories, + 2.0 * std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / static_cast(nTrials))); } { - double probabilities[] = { 0.1, 0.3, 0.4, 0.1, 0.1 }; + double probabilities[] = {0.1, 0.3, 0.4, 0.1, 0.1}; TMeanVarAccumulator expectedDistinctCategories; - for (std::size_t i = 0u; i < nTrials; ++i) - { + for (std::size_t i = 0u; i < nTrials; ++i) { TDoubleVec samples; - rng.generateMultinomialSamples(TDoubleVec(boost::begin(categories), - boost::end(categories)), - TDoubleVec(boost::begin(probabilities), - boost::end(probabilities)), + rng.generateMultinomialSamples(TDoubleVec(boost::begin(categories), boost::end(categories)), + TDoubleVec(boost::begin(probabilities), boost::end(probabilities)), boost::size(probabilities), samples); std::sort(samples.begin(), samples.end()); @@ -109,33 +96,28 @@ void CCategoricalToolsTest::testExpectedDistinctCategories() } LOG_DEBUG("probabilities = " << core::CContainerPrinter::print(probabilities)); LOG_DEBUG("expectedDistinctCategories = " - << maths::CBasicStatistics::mean(expectedDistinctCategories) - << " (deviation = " << std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) - / static_cast(nTrials)) << ")"); + << maths::CBasicStatistics::mean(expectedDistinctCategories) << " (deviation = " + << std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / static_cast(nTrials)) << ")"); double distinctCategories; - maths::CCategoricalTools::expectedDistinctCategories(TDoubleVec(boost::begin(probabilities), - boost::end(probabilities)), + maths::CCategoricalTools::expectedDistinctCategories(TDoubleVec(boost::begin(probabilities), boost::end(probabilities)), static_cast(boost::size(probabilities)), distinctCategories); LOG_DEBUG("distinctCategories = " << distinctCategories); - CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(expectedDistinctCategories), - distinctCategories, - 2.0 * std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) - / static_cast(nTrials))); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + maths::CBasicStatistics::mean(expectedDistinctCategories), + distinctCategories, + 2.0 * std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / static_cast(nTrials))); } { - double probabilities[] = { 0.35, 0.1, 0.25, 0.25, 0.05 }; + double probabilities[] = {0.35, 0.1, 0.25, 0.25, 0.05}; TMeanVarAccumulator expectedDistinctCategories; - for (std::size_t i = 0u; i < nTrials; ++i) - { + for (std::size_t i = 0u; i < nTrials; ++i) { TDoubleVec samples; - rng.generateMultinomialSamples(TDoubleVec(boost::begin(categories), - boost::end(categories)), - TDoubleVec(boost::begin(probabilities), - boost::end(probabilities)), + rng.generateMultinomialSamples(TDoubleVec(boost::begin(categories), boost::end(categories)), + TDoubleVec(boost::begin(probabilities), boost::end(probabilities)), boost::size(probabilities), samples); std::sort(samples.begin(), samples.end()); @@ -144,37 +126,32 @@ void CCategoricalToolsTest::testExpectedDistinctCategories() } LOG_DEBUG("probabilities = " << core::CContainerPrinter::print(probabilities)); LOG_DEBUG("expectedDistinctCategories = " - << maths::CBasicStatistics::mean(expectedDistinctCategories) - << " (deviation = " << std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) - / static_cast(nTrials)) << ")"); + << maths::CBasicStatistics::mean(expectedDistinctCategories) << " (deviation = " + << std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / static_cast(nTrials)) << ")"); double distinctCategories; - maths::CCategoricalTools::expectedDistinctCategories(TDoubleVec(boost::begin(probabilities), - boost::end(probabilities)), + maths::CCategoricalTools::expectedDistinctCategories(TDoubleVec(boost::begin(probabilities), boost::end(probabilities)), static_cast(boost::size(probabilities)), distinctCategories); LOG_DEBUG("distinctCategories = " << distinctCategories); - CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(expectedDistinctCategories), - distinctCategories, - 2.0 * std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) - / static_cast(nTrials))); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + maths::CBasicStatistics::mean(expectedDistinctCategories), + distinctCategories, + 2.0 * std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / static_cast(nTrials))); } } { - double categories[] = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0 }; + double categories[] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0}; { - double probabilities[] = { 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1 }; + double probabilities[] = {0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1}; TMeanVarAccumulator expectedDistinctCategories; - for (std::size_t i = 0u; i < nTrials; ++i) - { + for (std::size_t i = 0u; i < nTrials; ++i) { TDoubleVec samples; - rng.generateMultinomialSamples(TDoubleVec(boost::begin(categories), - boost::end(categories)), - TDoubleVec(boost::begin(probabilities), - boost::end(probabilities)), + rng.generateMultinomialSamples(TDoubleVec(boost::begin(categories), boost::end(categories)), + TDoubleVec(boost::begin(probabilities), boost::end(probabilities)), boost::size(probabilities), samples); std::sort(samples.begin(), samples.end()); @@ -183,34 +160,29 @@ void CCategoricalToolsTest::testExpectedDistinctCategories() } LOG_DEBUG("probabilities = " << core::CContainerPrinter::print(probabilities)); LOG_DEBUG("expectedDistinctCategories = " - << maths::CBasicStatistics::mean(expectedDistinctCategories) - << " (deviation = " << std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) - / static_cast(nTrials)) << ")"); + << maths::CBasicStatistics::mean(expectedDistinctCategories) << " (deviation = " + << std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / static_cast(nTrials)) << ")"); double distinctCategories; - maths::CCategoricalTools::expectedDistinctCategories(TDoubleVec(boost::begin(probabilities), - boost::end(probabilities)), + maths::CCategoricalTools::expectedDistinctCategories(TDoubleVec(boost::begin(probabilities), boost::end(probabilities)), static_cast(boost::size(probabilities)), distinctCategories); LOG_DEBUG("distinctCategories = " << distinctCategories); - CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(expectedDistinctCategories), - distinctCategories, - 2.0 * std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) - / static_cast(nTrials))); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + maths::CBasicStatistics::mean(expectedDistinctCategories), + distinctCategories, + 2.0 * std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / static_cast(nTrials))); } { - double probabilities[] = { 0.05, 0.3, 0.4, 0.02, 0.03, 0.05, 0.05, 0.01, 0.02, 0.07 }; + double probabilities[] = {0.05, 0.3, 0.4, 0.02, 0.03, 0.05, 0.05, 0.01, 0.02, 0.07}; TMeanVarAccumulator expectedDistinctCategories; - for (std::size_t i = 0u; i < nTrials; ++i) - { + for (std::size_t i = 0u; i < nTrials; ++i) { TDoubleVec samples; - rng.generateMultinomialSamples(TDoubleVec(boost::begin(categories), - boost::end(categories)), - TDoubleVec(boost::begin(probabilities), - boost::end(probabilities)), + rng.generateMultinomialSamples(TDoubleVec(boost::begin(categories), boost::end(categories)), + TDoubleVec(boost::begin(probabilities), boost::end(probabilities)), boost::size(probabilities), samples); std::sort(samples.begin(), samples.end()); @@ -219,33 +191,28 @@ void CCategoricalToolsTest::testExpectedDistinctCategories() } LOG_DEBUG("probabilities = " << core::CContainerPrinter::print(probabilities)); LOG_DEBUG("expectedDistinctCategories = " - << maths::CBasicStatistics::mean(expectedDistinctCategories) - << " (deviation = " << std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) - / static_cast(nTrials)) << ")"); + << maths::CBasicStatistics::mean(expectedDistinctCategories) << " (deviation = " + << std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / static_cast(nTrials)) << ")"); double distinctCategories; - maths::CCategoricalTools::expectedDistinctCategories(TDoubleVec(boost::begin(probabilities), - boost::end(probabilities)), + maths::CCategoricalTools::expectedDistinctCategories(TDoubleVec(boost::begin(probabilities), boost::end(probabilities)), static_cast(boost::size(probabilities)), distinctCategories); LOG_DEBUG("distinctCategories = " << distinctCategories); - CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(expectedDistinctCategories), - distinctCategories, - 2.0 * std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) - / static_cast(nTrials))); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + maths::CBasicStatistics::mean(expectedDistinctCategories), + distinctCategories, + 2.0 * std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / static_cast(nTrials))); } { - double probabilities[] = { 0.05, 0.1, 0.15, 0.15, 0.05, 0.05, 0.1, 0.15, 0.15, 0.05 }; + double probabilities[] = {0.05, 0.1, 0.15, 0.15, 0.05, 0.05, 0.1, 0.15, 0.15, 0.05}; TMeanVarAccumulator expectedDistinctCategories; - for (std::size_t i = 0u; i < nTrials; ++i) - { + for (std::size_t i = 0u; i < nTrials; ++i) { TDoubleVec samples; - rng.generateMultinomialSamples(TDoubleVec(boost::begin(categories), - boost::end(categories)), - TDoubleVec(boost::begin(probabilities), - boost::end(probabilities)), + rng.generateMultinomialSamples(TDoubleVec(boost::begin(categories), boost::end(categories)), + TDoubleVec(boost::begin(probabilities), boost::end(probabilities)), boost::size(probabilities), samples); std::sort(samples.begin(), samples.end()); @@ -254,27 +221,24 @@ void CCategoricalToolsTest::testExpectedDistinctCategories() } LOG_DEBUG("probabilities = " << core::CContainerPrinter::print(probabilities)); LOG_DEBUG("expectedDistinctCategories = " - << maths::CBasicStatistics::mean(expectedDistinctCategories) - << " (deviation = " << std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) - / static_cast(nTrials)) << ")"); + << maths::CBasicStatistics::mean(expectedDistinctCategories) << " (deviation = " + << std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / static_cast(nTrials)) << ")"); double distinctCategories; - maths::CCategoricalTools::expectedDistinctCategories(TDoubleVec(boost::begin(probabilities), - boost::end(probabilities)), + maths::CCategoricalTools::expectedDistinctCategories(TDoubleVec(boost::begin(probabilities), boost::end(probabilities)), static_cast(boost::size(probabilities)), distinctCategories); LOG_DEBUG("distinctCategories = " << distinctCategories); - CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(expectedDistinctCategories), - distinctCategories, - 2.0 * std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) - / static_cast(nTrials))); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + maths::CBasicStatistics::mean(expectedDistinctCategories), + distinctCategories, + 2.0 * std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / static_cast(nTrials))); } } { TDoubleVec categories; - for (std::size_t i = 1u; i < 101; ++i) - { + for (std::size_t i = 1u; i < 101; ++i) { categories.push_back(static_cast(i)); } @@ -284,11 +248,9 @@ void CCategoricalToolsTest::testExpectedDistinctCategories() TDoubleVecVec probabilities; rng.generateDirichletSamples(concentrations, 50u, probabilities); - for (std::size_t i = 0u; i < 50; ++i) - { + for (std::size_t i = 0u; i < 50; ++i) { TMeanVarAccumulator expectedDistinctCategories; - for (std::size_t j = 0u; j < nTrials; ++j) - { + for (std::size_t j = 0u; j < nTrials; ++j) { TDoubleVec samples; rng.generateMultinomialSamples(categories, probabilities[i], categories.size(), samples); std::sort(samples.begin(), samples.end()); @@ -296,20 +258,19 @@ void CCategoricalToolsTest::testExpectedDistinctCategories() expectedDistinctCategories.add(static_cast(samples.size())); } LOG_DEBUG("expectedDistinctCategories = " - << maths::CBasicStatistics::mean(expectedDistinctCategories) - << " (deviation = " << std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) - / static_cast(nTrials)) << ")"); + << maths::CBasicStatistics::mean(expectedDistinctCategories) << " (deviation = " + << std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / static_cast(nTrials)) + << ")"); double distinctCategories; - maths::CCategoricalTools::expectedDistinctCategories(probabilities[i], - static_cast(categories.size()), - distinctCategories); + maths::CCategoricalTools::expectedDistinctCategories( + probabilities[i], static_cast(categories.size()), distinctCategories); LOG_DEBUG("distinctCategories = " << distinctCategories); - CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(expectedDistinctCategories), - distinctCategories, - 3.0 * std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) - / static_cast(nTrials))); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + maths::CBasicStatistics::mean(expectedDistinctCategories), + distinctCategories, + 3.0 * std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / static_cast(nTrials))); } } { @@ -321,11 +282,9 @@ void CCategoricalToolsTest::testExpectedDistinctCategories() TDoubleVecVec probabilities; rng.generateDirichletSamples(concentrations, 50u, probabilities); - for (std::size_t i = 0u; i < 50; ++i) - { + for (std::size_t i = 0u; i < 50; ++i) { TMeanVarAccumulator expectedDistinctCategories; - for (std::size_t j = 0u; j < nTrials; ++j) - { + for (std::size_t j = 0u; j < nTrials; ++j) { TDoubleVec samples; rng.generateMultinomialSamples(categories, probabilities[i], categories.size(), samples); std::sort(samples.begin(), samples.end()); @@ -333,20 +292,19 @@ void CCategoricalToolsTest::testExpectedDistinctCategories() expectedDistinctCategories.add(static_cast(samples.size())); } LOG_DEBUG("expectedDistinctCategories = " - << maths::CBasicStatistics::mean(expectedDistinctCategories) - << " (deviation = " << std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) - / static_cast(nTrials)) << ")"); + << maths::CBasicStatistics::mean(expectedDistinctCategories) << " (deviation = " + << std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / static_cast(nTrials)) + << ")"); double distinctCategories; - maths::CCategoricalTools::expectedDistinctCategories(probabilities[i], - static_cast(categories.size()), - distinctCategories); + maths::CCategoricalTools::expectedDistinctCategories( + probabilities[i], static_cast(categories.size()), distinctCategories); LOG_DEBUG("distinctCategories = " << distinctCategories); - CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(expectedDistinctCategories), - distinctCategories, - 3.0 * std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) - / static_cast(nTrials))); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + maths::CBasicStatistics::mean(expectedDistinctCategories), + distinctCategories, + 3.0 * std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / static_cast(nTrials))); } } { @@ -362,11 +320,9 @@ void CCategoricalToolsTest::testExpectedDistinctCategories() TDoubleVecVec probabilities; rng.generateDirichletSamples(concentrations, 50u, probabilities); - for (std::size_t i = 0u; i < 50; ++i) - { + for (std::size_t i = 0u; i < 50; ++i) { TMeanVarAccumulator expectedDistinctCategories; - for (std::size_t j = 0u; j < nTrials; ++j) - { + for (std::size_t j = 0u; j < nTrials; ++j) { TDoubleVec samples; rng.generateMultinomialSamples(categories, probabilities[i], categories.size(), samples); std::sort(samples.begin(), samples.end()); @@ -374,80 +330,67 @@ void CCategoricalToolsTest::testExpectedDistinctCategories() expectedDistinctCategories.add(static_cast(samples.size())); } LOG_DEBUG("expectedDistinctCategories = " - << maths::CBasicStatistics::mean(expectedDistinctCategories) - << " (deviation = " << std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) - / static_cast(nTrials)) << ")"); + << maths::CBasicStatistics::mean(expectedDistinctCategories) << " (deviation = " + << std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / static_cast(nTrials)) + << ")"); double distinctCategories; - maths::CCategoricalTools::expectedDistinctCategories(probabilities[i], - static_cast(categories.size()), - distinctCategories); + maths::CCategoricalTools::expectedDistinctCategories( + probabilities[i], static_cast(categories.size()), distinctCategories); LOG_DEBUG("distinctCategories = " << distinctCategories); - CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(expectedDistinctCategories), - distinctCategories, - 2.5 * std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) - / static_cast(nTrials))); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + maths::CBasicStatistics::mean(expectedDistinctCategories), + distinctCategories, + 2.5 * std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / static_cast(nTrials))); } } } } -void CCategoricalToolsTest::testLogBinomialProbability() -{ +void CCategoricalToolsTest::testLogBinomialProbability() { LOG_DEBUG("+-----------------------------------------------------+"); LOG_DEBUG("| CCategoricalToolsTest::testLogBinomialProbability |"); LOG_DEBUG("+-----------------------------------------------------+"); // Test the calculation matches the boost::binomial_distribution. - double n[] = { 10, 100, 10000 }; - double p[] = { 0.1, 0.5, 0.9 }; + double n[] = {10, 100, 10000}; + double p[] = {0.1, 0.5, 0.9}; - for (std::size_t i = 0u; i < boost::size(n); ++i) - { - for (std::size_t j = 0u; j < boost::size(p); ++j) - { + for (std::size_t i = 0u; i < boost::size(n); ++i) { + for (std::size_t j = 0u; j < boost::size(p); ++j) { LOG_DEBUG("n = " << n[i] << ", p = " << p[j]); boost::math::binomial_distribution<> binomial(n[i], p[j]); double median = boost::math::median(binomial); - for (std::size_t f = 1u; f < 10; ++f) - { + for (std::size_t f = 1u; f < 10; ++f) { double f_ = static_cast(f) / 10.0; double m = std::floor(f_ * median); double pdf = boost::math::pdf(binomial, m); double logpdf; CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, - maths::CCategoricalTools::logBinomialProbability(static_cast(n[i]), - p[j], - static_cast(m), - logpdf)); - LOG_DEBUG("f(" << m << "), expected = " << pdf - << ", actual = " << std::exp(logpdf)); + maths::CCategoricalTools::logBinomialProbability( + static_cast(n[i]), p[j], static_cast(m), logpdf)); + LOG_DEBUG("f(" << m << "), expected = " << pdf << ", actual = " << std::exp(logpdf)); CPPUNIT_ASSERT_DOUBLES_EQUAL(pdf, std::exp(logpdf), 1e-6 * pdf); } - for (std::size_t f = 1u; f < 10; ++f) - { + for (std::size_t f = 1u; f < 10; ++f) { double f_ = static_cast(f) / 10.0; double m = median + std::floor(f_ * (n[i] - median)); double pdf = boost::math::pdf(binomial, m); double logpdf; CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, - maths::CCategoricalTools::logBinomialProbability(static_cast(n[i]), - p[j], - static_cast(m), - logpdf)); - LOG_DEBUG("f(" << m << "), expected = " << pdf - << ", actual = " << std::exp(logpdf)); + maths::CCategoricalTools::logBinomialProbability( + static_cast(n[i]), p[j], static_cast(m), logpdf)); + LOG_DEBUG("f(" << m << "), expected = " << pdf << ", actual = " << std::exp(logpdf)); CPPUNIT_ASSERT_DOUBLES_EQUAL(pdf, std::exp(logpdf), 1e-6 * pdf); } } } } -void CCategoricalToolsTest::testLogMultinomialProbability() -{ +void CCategoricalToolsTest::testLogMultinomialProbability() { LOG_DEBUG("+--------------------------------------------------------+"); LOG_DEBUG("| CCategoricalToolsTest::testLogMultinomialProbability |"); LOG_DEBUG("+--------------------------------------------------------+"); @@ -459,19 +402,16 @@ void CCategoricalToolsTest::testLogMultinomialProbability() LOG_DEBUG(""); LOG_DEBUG("*** Test two categories ***"); { - double n[] = { 10, 100, 10000 }; - double p[] = { 0.1, 0.5, 0.9 }; + double n[] = {10, 100, 10000}; + double p[] = {0.1, 0.5, 0.9}; - for (std::size_t i = 0u; i < boost::size(n); ++i) - { - for (std::size_t j = 0u; j < boost::size(p); ++j) - { + for (std::size_t i = 0u; i < boost::size(n); ++i) { + for (std::size_t j = 0u; j < boost::size(p); ++j) { LOG_DEBUG("n = " << n[i] << ", p = " << p[j]); boost::math::binomial_distribution<> binomial(n[i], p[j]); double median = boost::math::median(binomial); - for (std::size_t f = 1u; f < 10; ++f) - { + for (std::size_t f = 1u; f < 10; ++f) { double f_ = static_cast(f) / 10.0; double m = std::floor(f_ * median); double pdf = boost::math::pdf(binomial, m); @@ -482,14 +422,11 @@ void CCategoricalToolsTest::testLogMultinomialProbability() TSizeVec ni; ni.push_back(static_cast(m)); ni.push_back(static_cast(n[i] - m)); - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, - maths::CCategoricalTools::logMultinomialProbability(pi, ni, logpdf)); - LOG_DEBUG("f(" << m << "), expected = " << pdf - << ", actual = " << std::exp(logpdf)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, maths::CCategoricalTools::logMultinomialProbability(pi, ni, logpdf)); + LOG_DEBUG("f(" << m << "), expected = " << pdf << ", actual = " << std::exp(logpdf)); CPPUNIT_ASSERT_DOUBLES_EQUAL(pdf, std::exp(logpdf), 1e-6 * pdf); } - for (std::size_t f = 1u; f < 10; ++f) - { + for (std::size_t f = 1u; f < 10; ++f) { double f_ = static_cast(f) / 10.0; double m = median + std::floor(f_ * (n[i] - median)); double pdf = boost::math::pdf(binomial, m); @@ -500,10 +437,8 @@ void CCategoricalToolsTest::testLogMultinomialProbability() TSizeVec ni; ni.push_back(static_cast(m)); ni.push_back(static_cast(n[i] - m)); - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, - maths::CCategoricalTools::logMultinomialProbability(pi, ni, logpdf)); - LOG_DEBUG("f(" << m << "), expected = " << pdf - << ", actual = " << std::exp(logpdf)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, maths::CCategoricalTools::logMultinomialProbability(pi, ni, logpdf)); + LOG_DEBUG("f(" << m << "), expected = " << pdf << ", actual = " << std::exp(logpdf)); CPPUNIT_ASSERT_DOUBLES_EQUAL(pdf, std::exp(logpdf), 1e-6 * pdf); } } @@ -519,18 +454,15 @@ void CCategoricalToolsTest::testLogMultinomialProbability() pi.push_back(0.6); std::size_t n = 10; - for (std::size_t m = 0u; m <= n; ++m) - { + for (std::size_t m = 0u; m <= n; ++m) { double marginal = 0.0; - for (std::size_t i = 0u; i <= n - m; ++i) - { + for (std::size_t i = 0u; i <= n - m; ++i) { double logpdf; TSizeVec ni; ni.push_back(m); ni.push_back(i); ni.push_back(n - m - i); - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, - maths::CCategoricalTools::logMultinomialProbability(pi, ni, logpdf)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, maths::CCategoricalTools::logMultinomialProbability(pi, ni, logpdf)); marginal += std::exp(logpdf); } @@ -542,25 +474,20 @@ void CCategoricalToolsTest::testLogMultinomialProbability() } } -CppUnit::Test *CCategoricalToolsTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CCategoricalToolsTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCategoricalToolsTest::testProbabilityOfLessLikelyMultinomialSample", - &CCategoricalToolsTest::testProbabilityOfLessLikelyMultinomialSample) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCategoricalToolsTest::testProbabilityOfLessLikelyCategoryCount", - &CCategoricalToolsTest::testProbabilityOfLessLikelyCategoryCount) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCategoricalToolsTest::testExpectedDistinctCategories", - &CCategoricalToolsTest::testExpectedDistinctCategories) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCategoricalToolsTest::testLogBinomialProbability", - &CCategoricalToolsTest::testLogBinomialProbability) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCategoricalToolsTest::testLogMultinomialProbability", - &CCategoricalToolsTest::testLogMultinomialProbability) ); +CppUnit::Test* CCategoricalToolsTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CCategoricalToolsTest"); + + suiteOfTests->addTest( + new CppUnit::TestCaller("CCategoricalToolsTest::testProbabilityOfLessLikelyMultinomialSample", + &CCategoricalToolsTest::testProbabilityOfLessLikelyMultinomialSample)); + suiteOfTests->addTest(new CppUnit::TestCaller("CCategoricalToolsTest::testProbabilityOfLessLikelyCategoryCount", + &CCategoricalToolsTest::testProbabilityOfLessLikelyCategoryCount)); + suiteOfTests->addTest(new CppUnit::TestCaller("CCategoricalToolsTest::testExpectedDistinctCategories", + &CCategoricalToolsTest::testExpectedDistinctCategories)); + suiteOfTests->addTest(new CppUnit::TestCaller("CCategoricalToolsTest::testLogBinomialProbability", + &CCategoricalToolsTest::testLogBinomialProbability)); + suiteOfTests->addTest(new CppUnit::TestCaller("CCategoricalToolsTest::testLogMultinomialProbability", + &CCategoricalToolsTest::testLogMultinomialProbability)); return suiteOfTests; } diff --git a/lib/maths/unittest/CCategoricalToolsTest.h b/lib/maths/unittest/CCategoricalToolsTest.h index 0355422888..eab87d9add 100644 --- a/lib/maths/unittest/CCategoricalToolsTest.h +++ b/lib/maths/unittest/CCategoricalToolsTest.h @@ -9,16 +9,15 @@ #include -class CCategoricalToolsTest : public CppUnit::TestFixture -{ - public: - void testProbabilityOfLessLikelyMultinomialSample(); - void testProbabilityOfLessLikelyCategoryCount(); - void testExpectedDistinctCategories(); - void testLogBinomialProbability(); - void testLogMultinomialProbability(); +class CCategoricalToolsTest : public CppUnit::TestFixture { +public: + void testProbabilityOfLessLikelyMultinomialSample(); + void testProbabilityOfLessLikelyCategoryCount(); + void testExpectedDistinctCategories(); + void testLogBinomialProbability(); + void testLogMultinomialProbability(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CCategoricalToolsTest_h diff --git a/lib/maths/unittest/CChecksumTest.cc b/lib/maths/unittest/CChecksumTest.cc index a80f349c26..111f2327ee 100644 --- a/lib/maths/unittest/CChecksumTest.cc +++ b/lib/maths/unittest/CChecksumTest.cc @@ -30,33 +30,19 @@ using namespace ml; -namespace -{ - -enum EAnEnum -{ - E_1, - E_2, - E_3 -}; +namespace { + +enum EAnEnum { E_1, E_2, E_3 }; -struct SFoo -{ +struct SFoo { SFoo(uint64_t key) : s_Key(key) {} - uint64_t checksum() const - { - return s_Key; - } + uint64_t checksum() const { return s_Key; } uint64_t s_Key; }; -struct SBar -{ +struct SBar { SBar(uint64_t key) : s_Key(key) {} - uint64_t checksum(uint64_t seed) const - { - return core::CHashing::hashCombine(seed, s_Key); - } + uint64_t checksum(uint64_t seed) const { return core::CHashing::hashCombine(seed, s_Key); } uint64_t s_Key; }; @@ -72,11 +58,9 @@ using TDoubleMeanVarAccumulatorPr = std::pair; using TDoubleMeanVarAccumulatorPrList = std::list; using TFooDeque = std::deque; using TBarVec = std::vector; - } -void CChecksumTest::testMemberChecksum() -{ +void CChecksumTest::testMemberChecksum() { LOG_DEBUG("+-------------------------------------+"); LOG_DEBUG("| CChecksumTest::testMemberChecksum |"); LOG_DEBUG("+-------------------------------------+"); @@ -89,16 +73,13 @@ void CChecksumTest::testMemberChecksum() // Test that member functions are invoked. SFoo foo(100); LOG_DEBUG("checksum foo = " << maths::CChecksum::calculate(seed, foo)); - CPPUNIT_ASSERT_EQUAL(maths::CChecksum::calculate(seed, foo), - core::CHashing::hashCombine(seed, foo.checksum())); + CPPUNIT_ASSERT_EQUAL(maths::CChecksum::calculate(seed, foo), core::CHashing::hashCombine(seed, foo.checksum())); SBar bar(200); LOG_DEBUG("checksum bar = " << maths::CChecksum::calculate(seed, bar)); - CPPUNIT_ASSERT_EQUAL(maths::CChecksum::calculate(seed, bar), - bar.checksum(seed)); + CPPUNIT_ASSERT_EQUAL(maths::CChecksum::calculate(seed, bar), bar.checksum(seed)); } -void CChecksumTest::testContainers() -{ +void CChecksumTest::testContainers() { LOG_DEBUG("+---------------------------------+"); LOG_DEBUG("| CChecksumTest::testContainers |"); LOG_DEBUG("+---------------------------------+"); @@ -113,73 +94,58 @@ void CChecksumTest::testContainers() // slightly, i.e. by changing an element value, permuting elements, // etc. { - int values[] = { -1, 20, 10, 15, 2, 2 }; + int values[] = {-1, 20, 10, 15, 2, 2}; TIntVec a(boost::begin(values), boost::end(values)); TIntVec b(boost::begin(values), boost::end(values)); LOG_DEBUG("checksum a = " << maths::CChecksum::calculate(seed, a)); LOG_DEBUG("checksum b = " << maths::CChecksum::calculate(seed, b)); - CPPUNIT_ASSERT_EQUAL(maths::CChecksum::calculate(seed, a), - maths::CChecksum::calculate(seed, b)); + CPPUNIT_ASSERT_EQUAL(maths::CChecksum::calculate(seed, a), maths::CChecksum::calculate(seed, b)); b[2] = 3; LOG_DEBUG("checksum a = " << maths::CChecksum::calculate(seed, a)); LOG_DEBUG("checksum b = " << maths::CChecksum::calculate(seed, b)); - CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) - != maths::CChecksum::calculate(seed, b)); + CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) != maths::CChecksum::calculate(seed, b)); b.assign(boost::begin(values), boost::end(values)); rng.random_shuffle(b.begin(), b.end()); LOG_DEBUG("checksum a = " << maths::CChecksum::calculate(seed, a)); LOG_DEBUG("checksum b = " << maths::CChecksum::calculate(seed, b)); - CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) - != maths::CChecksum::calculate(seed, b)); + CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) != maths::CChecksum::calculate(seed, b)); b.assign(boost::begin(values), boost::end(values)); b[b.size() - 1] = 3; LOG_DEBUG("checksum a = " << maths::CChecksum::calculate(seed, a)); LOG_DEBUG("checksum b = " << maths::CChecksum::calculate(seed, b)); - CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) - != maths::CChecksum::calculate(seed, b)); + CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) != maths::CChecksum::calculate(seed, b)); } { - TSizeAnEnumMap::value_type values[] = - { - TSizeAnEnumMap::value_type(-1, E_2), - TSizeAnEnumMap::value_type(20, E_1), - TSizeAnEnumMap::value_type(10, E_3), - TSizeAnEnumMap::value_type(15, E_1), - TSizeAnEnumMap::value_type(2, E_2), - TSizeAnEnumMap::value_type(3, E_1) - }; + TSizeAnEnumMap::value_type values[] = {TSizeAnEnumMap::value_type(-1, E_2), + TSizeAnEnumMap::value_type(20, E_1), + TSizeAnEnumMap::value_type(10, E_3), + TSizeAnEnumMap::value_type(15, E_1), + TSizeAnEnumMap::value_type(2, E_2), + TSizeAnEnumMap::value_type(3, E_1)}; TSizeAnEnumMap a(boost::begin(values), boost::end(values)); TSizeAnEnumMap b(boost::begin(values), boost::end(values)); LOG_DEBUG("checksum a = " << maths::CChecksum::calculate(seed, a)); LOG_DEBUG("checksum b = " << maths::CChecksum::calculate(seed, b)); - CPPUNIT_ASSERT_EQUAL(maths::CChecksum::calculate(seed, a), - maths::CChecksum::calculate(seed, b)); + CPPUNIT_ASSERT_EQUAL(maths::CChecksum::calculate(seed, a), maths::CChecksum::calculate(seed, b)); b[2] = E_1; LOG_DEBUG("checksum a = " << maths::CChecksum::calculate(seed, a)); LOG_DEBUG("checksum b = " << maths::CChecksum::calculate(seed, b)); - CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) - != maths::CChecksum::calculate(seed, b)); + CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) != maths::CChecksum::calculate(seed, b)); b.clear(); - std::copy(boost::begin(values), - boost::end(values), - std::inserter(b, b.end())); + std::copy(boost::begin(values), boost::end(values), std::inserter(b, b.end())); b.erase(2); b[4] = E_2; LOG_DEBUG("checksum a = " << maths::CChecksum::calculate(seed, a)); LOG_DEBUG("checksum b = " << maths::CChecksum::calculate(seed, b)); - CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) - != maths::CChecksum::calculate(seed, b)); + CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) != maths::CChecksum::calculate(seed, b)); } { - std::string values[] = { "rain", "in", "spain" }; + std::string values[] = {"rain", "in", "spain"}; TStrSet a(boost::begin(values), boost::end(values)); uint64_t expected = seed; core::CHashing::CSafeMurmurHash2String64 hasher; - for (TStrSetCItr itr = a.begin(); itr != a.end(); ++itr) - { - expected = core::CHashing::safeMurmurHash64(itr->data(), - static_cast(itr->size()), - expected); + for (TStrSetCItr itr = a.begin(); itr != a.end(); ++itr) { + expected = core::CHashing::safeMurmurHash64(itr->data(), static_cast(itr->size()), expected); } LOG_DEBUG("checksum expected = " << expected); LOG_DEBUG("checksum actual = " << maths::CChecksum::calculate(seed, a)); @@ -187,40 +153,35 @@ void CChecksumTest::testContainers() } // Test that unordered containers are sorted. - std::string keys[] = { "the", "quick", "brown", "fox" }; - double values[] = { 5.6, 2.1, -3.0, 22.1 }; + std::string keys[] = {"the", "quick", "brown", "fox"}; + double values[] = {5.6, 2.1, -3.0, 22.1}; { boost::unordered_set a; std::set b; - for (std::size_t i = 0u; i < boost::size(values); ++i) - { + for (std::size_t i = 0u; i < boost::size(values); ++i) { a.insert(values[i]); b.insert(values[i]); } LOG_DEBUG("checksum a = " << maths::CChecksum::calculate(seed, a)); LOG_DEBUG("checksum b = " << maths::CChecksum::calculate(seed, b)); - CPPUNIT_ASSERT_EQUAL(maths::CChecksum::calculate(seed, a), - maths::CChecksum::calculate(seed, b)); + CPPUNIT_ASSERT_EQUAL(maths::CChecksum::calculate(seed, a), maths::CChecksum::calculate(seed, b)); } { boost::unordered_map a; std::map b; - for (std::size_t i = 0u; i < boost::size(keys); ++i) - { + for (std::size_t i = 0u; i < boost::size(keys); ++i) { a.insert(std::make_pair(keys[i], values[i])); b.insert(std::make_pair(keys[i], values[i])); } LOG_DEBUG("checksum a = " << maths::CChecksum::calculate(seed, a)); LOG_DEBUG("checksum b = " << maths::CChecksum::calculate(seed, b)); - CPPUNIT_ASSERT_EQUAL(maths::CChecksum::calculate(seed, a), - maths::CChecksum::calculate(seed, b)); + CPPUNIT_ASSERT_EQUAL(maths::CChecksum::calculate(seed, a), maths::CChecksum::calculate(seed, b)); } } -void CChecksumTest::testNullable() -{ +void CChecksumTest::testNullable() { LOG_DEBUG("+-------------------------------+"); LOG_DEBUG("| CChecksumTest::testNullable |"); LOG_DEBUG("+-------------------------------+"); @@ -240,8 +201,7 @@ void CChecksumTest::testNullable() TOptionalDouble optional(value); LOG_DEBUG("checksum expected = " << maths::CChecksum::calculate(seed, value)); LOG_DEBUG("checksum actual = " << maths::CChecksum::calculate(seed, optional)); - CPPUNIT_ASSERT_EQUAL(maths::CChecksum::calculate(seed, value), - maths::CChecksum::calculate(seed, optional)); + CPPUNIT_ASSERT_EQUAL(maths::CChecksum::calculate(seed, value), maths::CChecksum::calculate(seed, optional)); } { TMeanVarAccumulator value; @@ -252,13 +212,11 @@ void CChecksumTest::testNullable() TMeanVarAccumulatorPtr pointer(new TMeanVarAccumulator(value)); LOG_DEBUG("checksum expected = " << maths::CChecksum::calculate(seed, value)); LOG_DEBUG("checksum actual = " << maths::CChecksum::calculate(seed, pointer)); - CPPUNIT_ASSERT_EQUAL(maths::CChecksum::calculate(seed, value), - maths::CChecksum::calculate(seed, pointer)); + CPPUNIT_ASSERT_EQUAL(maths::CChecksum::calculate(seed, value), maths::CChecksum::calculate(seed, pointer)); } } -void CChecksumTest::testAccumulators() -{ +void CChecksumTest::testAccumulators() { LOG_DEBUG("+-----------------------------------+"); LOG_DEBUG("| CChecksumTest::testAccumulators |"); LOG_DEBUG("+-----------------------------------+"); @@ -273,13 +231,11 @@ void CChecksumTest::testAccumulators() value.add(653.0); LOG_DEBUG("checksum expected = " << core::CHashing::hashCombine(seed, value.checksum())); LOG_DEBUG("checksum actual = " << maths::CChecksum::calculate(seed, value)); - CPPUNIT_ASSERT_EQUAL(core::CHashing::hashCombine(seed, value.checksum()), - maths::CChecksum::calculate(seed, value)); + CPPUNIT_ASSERT_EQUAL(core::CHashing::hashCombine(seed, value.checksum()), maths::CChecksum::calculate(seed, value)); } } -void CChecksumTest::testPair() -{ +void CChecksumTest::testPair() { LOG_DEBUG("+---------------------------+"); LOG_DEBUG("| CChecksumTest::testPair |"); LOG_DEBUG("+---------------------------+"); @@ -298,14 +254,12 @@ void CChecksumTest::testPair() b.first = 4790.0; LOG_DEBUG("checksum a = " << maths::CChecksum::calculate(seed, a)); LOG_DEBUG("checksum b = " << maths::CChecksum::calculate(seed, b)); - CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) - != maths::CChecksum::calculate(seed, b)); + CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) != maths::CChecksum::calculate(seed, b)); b = a; b.second.add(678629.0); LOG_DEBUG("checksum a = " << maths::CChecksum::calculate(seed, a)); LOG_DEBUG("checksum b = " << maths::CChecksum::calculate(seed, b)); - CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) - != maths::CChecksum::calculate(seed, b)); + CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) != maths::CChecksum::calculate(seed, b)); TDoubleMeanVarAccumulatorPrList collection; collection.push_back(a); @@ -315,35 +269,30 @@ void CChecksumTest::testPair() LOG_DEBUG("expected checksum = " << expected); LOG_DEBUG("actual checksum = " << maths::CChecksum::calculate(seed, collection)); CPPUNIT_ASSERT_EQUAL(expected, maths::CChecksum::calculate(seed, collection)); - } } -void CChecksumTest::testArray() -{ +void CChecksumTest::testArray() { LOG_DEBUG("+----------------------------+"); LOG_DEBUG("| CChecksumTest::testArray |"); LOG_DEBUG("+----------------------------+"); uint64_t seed = 1679023009937ull; - double a[] = { 1.0, 23.8, 15.2, 14.7 }; - double b[] = { 1.0, 23.8, 15.2, 14.7 }; + double a[] = {1.0, 23.8, 15.2, 14.7}; + double b[] = {1.0, 23.8, 15.2, 14.7}; LOG_DEBUG("checksum a = " << maths::CChecksum::calculate(seed, a)); LOG_DEBUG("checksum b = " << maths::CChecksum::calculate(seed, b)); - CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) - == maths::CChecksum::calculate(seed, b)); + CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) == maths::CChecksum::calculate(seed, b)); b[1] = 23.79; LOG_DEBUG("checksum a = " << maths::CChecksum::calculate(seed, a)); LOG_DEBUG("checksum b = " << maths::CChecksum::calculate(seed, b)); - CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) - != maths::CChecksum::calculate(seed, b)); + CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) != maths::CChecksum::calculate(seed, b)); } -void CChecksumTest::testCombinations() -{ +void CChecksumTest::testCombinations() { LOG_DEBUG("+-----------------------------------+"); LOG_DEBUG("| CChecksumTest::testCombinations |"); LOG_DEBUG("+-----------------------------------+"); @@ -361,100 +310,61 @@ void CChecksumTest::testCombinations() // slightly, i.e. by changing an element value, permuting elements, // etc. { - SFoo values[] = - { - SFoo(static_cast(-1)), - SFoo(20), - SFoo(10), - SFoo(15), - SFoo(2), - SFoo(2) - }; + SFoo values[] = {SFoo(static_cast(-1)), SFoo(20), SFoo(10), SFoo(15), SFoo(2), SFoo(2)}; TFooDeque a(boost::begin(values), boost::end(values)); TFooDeque b(boost::begin(values), boost::end(values)); LOG_DEBUG("checksum a = " << maths::CChecksum::calculate(seed, a)); LOG_DEBUG("checksum b = " << maths::CChecksum::calculate(seed, b)); - CPPUNIT_ASSERT_EQUAL(maths::CChecksum::calculate(seed, a), - maths::CChecksum::calculate(seed, b)); + CPPUNIT_ASSERT_EQUAL(maths::CChecksum::calculate(seed, a), maths::CChecksum::calculate(seed, b)); b[2] = SFoo(3); LOG_DEBUG("checksum a = " << maths::CChecksum::calculate(seed, a)); LOG_DEBUG("checksum b = " << maths::CChecksum::calculate(seed, b)); - CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) - != maths::CChecksum::calculate(seed, b)); + CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) != maths::CChecksum::calculate(seed, b)); b.assign(boost::begin(values), boost::end(values)); rng.random_shuffle(b.begin(), b.end()); LOG_DEBUG("checksum a = " << maths::CChecksum::calculate(seed, a)); LOG_DEBUG("checksum b = " << maths::CChecksum::calculate(seed, b)); - CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) - != maths::CChecksum::calculate(seed, b)); + CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) != maths::CChecksum::calculate(seed, b)); b.assign(boost::begin(values), boost::end(values)); b[b.size() - 1] = 3; LOG_DEBUG("checksum a = " << maths::CChecksum::calculate(seed, a)); LOG_DEBUG("checksum b = " << maths::CChecksum::calculate(seed, b)); - CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) - != maths::CChecksum::calculate(seed, b)); + CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) != maths::CChecksum::calculate(seed, b)); } { - SBar values[] = - { - SBar(static_cast(-1)), - SBar(20), - SBar(10), - SBar(15), - SBar(2), - SBar(2) - }; + SBar values[] = {SBar(static_cast(-1)), SBar(20), SBar(10), SBar(15), SBar(2), SBar(2)}; TBarVec a(boost::begin(values), boost::end(values)); TBarVec b(boost::begin(values), boost::end(values)); LOG_DEBUG("checksum a = " << maths::CChecksum::calculate(seed, a)); LOG_DEBUG("checksum b = " << maths::CChecksum::calculate(seed, b)); - CPPUNIT_ASSERT_EQUAL(maths::CChecksum::calculate(seed, a), - maths::CChecksum::calculate(seed, b)); + CPPUNIT_ASSERT_EQUAL(maths::CChecksum::calculate(seed, a), maths::CChecksum::calculate(seed, b)); b[2] = SBar(3); LOG_DEBUG("checksum a = " << maths::CChecksum::calculate(seed, a)); LOG_DEBUG("checksum b = " << maths::CChecksum::calculate(seed, b)); - CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) - != maths::CChecksum::calculate(seed, b)); + CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) != maths::CChecksum::calculate(seed, b)); b.assign(boost::begin(values), boost::end(values)); rng.random_shuffle(b.begin(), b.end()); LOG_DEBUG("checksum a = " << maths::CChecksum::calculate(seed, a)); LOG_DEBUG("checksum b = " << maths::CChecksum::calculate(seed, b)); - CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) - != maths::CChecksum::calculate(seed, b)); + CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) != maths::CChecksum::calculate(seed, b)); b.assign(boost::begin(values), boost::end(values)); b[b.size() - 1] = 3; LOG_DEBUG("checksum a = " << maths::CChecksum::calculate(seed, a)); LOG_DEBUG("checksum b = " << maths::CChecksum::calculate(seed, b)); - CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) - != maths::CChecksum::calculate(seed, b)); + CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) != maths::CChecksum::calculate(seed, b)); } } -CppUnit::Test *CChecksumTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CChecksumTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CChecksumTest::testMemberChecksum", - &CChecksumTest::testMemberChecksum) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CChecksumTest::testContainers", - &CChecksumTest::testContainers) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CChecksumTest::testNullable", - &CChecksumTest::testNullable) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CChecksumTest::testAccumulators", - &CChecksumTest::testAccumulators) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CChecksumTest::testPair", - &CChecksumTest::testPair) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CChecksumTest::testArray", - &CChecksumTest::testArray) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CChecksumTest::testCombinations", - &CChecksumTest::testCombinations) ); +CppUnit::Test* CChecksumTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CChecksumTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CChecksumTest::testMemberChecksum", &CChecksumTest::testMemberChecksum)); + suiteOfTests->addTest(new CppUnit::TestCaller("CChecksumTest::testContainers", &CChecksumTest::testContainers)); + suiteOfTests->addTest(new CppUnit::TestCaller("CChecksumTest::testNullable", &CChecksumTest::testNullable)); + suiteOfTests->addTest(new CppUnit::TestCaller("CChecksumTest::testAccumulators", &CChecksumTest::testAccumulators)); + suiteOfTests->addTest(new CppUnit::TestCaller("CChecksumTest::testPair", &CChecksumTest::testPair)); + suiteOfTests->addTest(new CppUnit::TestCaller("CChecksumTest::testArray", &CChecksumTest::testArray)); + suiteOfTests->addTest(new CppUnit::TestCaller("CChecksumTest::testCombinations", &CChecksumTest::testCombinations)); return suiteOfTests; } diff --git a/lib/maths/unittest/CChecksumTest.h b/lib/maths/unittest/CChecksumTest.h index 4ba7374ae5..05e34debd9 100644 --- a/lib/maths/unittest/CChecksumTest.h +++ b/lib/maths/unittest/CChecksumTest.h @@ -9,18 +9,17 @@ #include -class CChecksumTest : public CppUnit::TestFixture -{ - public: - void testMemberChecksum(); - void testContainers(); - void testNullable(); - void testAccumulators(); - void testPair(); - void testArray(); - void testCombinations(); +class CChecksumTest : public CppUnit::TestFixture { +public: + void testMemberChecksum(); + void testContainers(); + void testNullable(); + void testAccumulators(); + void testPair(); + void testArray(); + void testCombinations(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CChecksumTest_h diff --git a/lib/maths/unittest/CClustererTest.cc b/lib/maths/unittest/CClustererTest.cc index 50d0b4aeb0..c672c5b00a 100644 --- a/lib/maths/unittest/CClustererTest.cc +++ b/lib/maths/unittest/CClustererTest.cc @@ -17,8 +17,7 @@ using namespace ml; -void CClustererTest::testIndexGenerator() -{ +void CClustererTest::testIndexGenerator() { LOG_DEBUG("+--------------------------------------+"); LOG_DEBUG("| CClustererTest::testIndexGenerator |"); LOG_DEBUG("+--------------------------------------+"); @@ -41,8 +40,7 @@ void CClustererTest::testIndexGenerator() rng.generateUniformSamples(0.0, 1.0, numberOperations, tmp); TSizeVec nexts; nexts.reserve(tmp.size()); - for (std::size_t i = 0u; i < tmp.size(); ++i) - { + for (std::size_t i = 0u; i < tmp.size(); ++i) { nexts.push_back(static_cast(tmp[i] + 0.5)); } @@ -51,26 +49,19 @@ void CClustererTest::testIndexGenerator() TSizeSet indices; std::size_t maxSetSize = 0u; - for (std::size_t i = 0u; i < numberOperations; ++i) - { - if (i % 1000 == 0) - { + for (std::size_t i = 0u; i < numberOperations; ++i) { + if (i % 1000 == 0) { LOG_DEBUG("maxSetSize = " << maxSetSize); LOG_DEBUG("indices = " << core::CContainerPrinter::print(indices)); } - if (nexts[i] == 1) - { + if (nexts[i] == 1) { CPPUNIT_ASSERT(indices.insert(generator.next()).second); maxSetSize = std::max(maxSetSize, indices.size()); - if (*indices.begin() >= maxSetSize) - { - LOG_DEBUG("index = " << *indices.begin() - << ", maxSetSize = " << maxSetSize); + if (*indices.begin() >= maxSetSize) { + LOG_DEBUG("index = " << *indices.begin() << ", maxSetSize = " << maxSetSize); } CPPUNIT_ASSERT(*indices.begin() < maxSetSize); - } - else if (!indices.empty()) - { + } else if (!indices.empty()) { TDoubleVec indexToErase; double max = static_cast(indices.size()) - 1e-3; rng.generateUniformSamples(0.0, max, 1u, indexToErase); @@ -83,13 +74,11 @@ void CClustererTest::testIndexGenerator() } } -CppUnit::Test *CClustererTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CClustererTest"); +CppUnit::Test* CClustererTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CClustererTest"); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CClustererTest::testIndexGenerator", - &CClustererTest::testIndexGenerator) ); + suiteOfTests->addTest( + new CppUnit::TestCaller("CClustererTest::testIndexGenerator", &CClustererTest::testIndexGenerator)); return suiteOfTests; } diff --git a/lib/maths/unittest/CClustererTest.h b/lib/maths/unittest/CClustererTest.h index 180efd1490..471155130d 100644 --- a/lib/maths/unittest/CClustererTest.h +++ b/lib/maths/unittest/CClustererTest.h @@ -9,12 +9,11 @@ #include -class CClustererTest : public CppUnit::TestFixture -{ - public: - void testIndexGenerator(); +class CClustererTest : public CppUnit::TestFixture { +public: + void testIndexGenerator(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CClustererTest_h diff --git a/lib/maths/unittest/CCountMinSketchTest.cc b/lib/maths/unittest/CCountMinSketchTest.cc index 71f04419e1..5d9a46fadd 100644 --- a/lib/maths/unittest/CCountMinSketchTest.cc +++ b/lib/maths/unittest/CCountMinSketchTest.cc @@ -19,8 +19,7 @@ using namespace ml; using TDoubleVec = std::vector; -void CCountMinSketchTest::testCounts() -{ +void CCountMinSketchTest::testCounts() { LOG_DEBUG("+-----------------------------------+"); LOG_DEBUG("| CCountMinSketchTest::testCounts |"); LOG_DEBUG("+-----------------------------------+"); @@ -36,8 +35,7 @@ void CCountMinSketchTest::testCounts() LOG_DEBUG("") LOG_DEBUG("Test Uniform") - for (std::size_t t = 0u, n = 100u; n < 1500; ++t, n += 100) - { + for (std::size_t t = 0u, n = 100u; n < 1500; ++t, n += 100) { LOG_DEBUG("*** number categories = " << n << " ***"); TDoubleVec counts; @@ -45,8 +43,7 @@ void CCountMinSketchTest::testCounts() maths::CCountMinSketch sketch(2, 751); - for (std::size_t i = 0u; i < counts.size(); ++i) - { + for (std::size_t i = 0u; i < counts.size(); ++i) { counts[i] = std::floor(counts[i]); sketch.add(static_cast(i), counts[i]); } @@ -54,31 +51,23 @@ void CCountMinSketchTest::testCounts() TMeanAccumulator meanError; double errorCount = 0.0; - for (std::size_t i = 0u; i < counts.size(); ++i) - { + for (std::size_t i = 0u; i < counts.size(); ++i) { double count = counts[i]; double estimated = sketch.count(static_cast(i)); - if (i % 50 == 0) - { - LOG_DEBUG("category = " << i - << ", true count = " << count - << ", estimated count = " << estimated); + if (i % 50 == 0) { + LOG_DEBUG("category = " << i << ", true count = " << count << ", estimated count = " << estimated); } meanError.add(std::fabs(estimated - count)); - if (count + sketch.oneMinusDeltaError() < estimated) - { + if (count + sketch.oneMinusDeltaError() < estimated) { errorCount += 1.0; } } LOG_DEBUG("mean error = " << maths::CBasicStatistics::mean(meanError)); LOG_DEBUG("error count = " << errorCount); - if (sketch.oneMinusDeltaError() == 0.0) - { + if (sketch.oneMinusDeltaError() == 0.0) { CPPUNIT_ASSERT_EQUAL(0.0, maths::CBasicStatistics::mean(meanError)); - } - else - { + } else { //CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError) // < 0.1 * static_cast(n)); } @@ -99,26 +88,21 @@ void CCountMinSketchTest::testCounts() maths::CCountMinSketch sketch(2, 751); - for (std::size_t i = 0u; i < heavyHitters.size(); ++i) - { + for (std::size_t i = 0u; i < heavyHitters.size(); ++i) { heavyHitters[i] = std::floor(heavyHitters[i]); sketch.add(static_cast(i), heavyHitters[i]); } - for (std::size_t i = 0u; i < counts.size(); ++i) - { + for (std::size_t i = 0u; i < counts.size(); ++i) { counts[i] = std::floor(counts[i]); sketch.add(static_cast(i + heavyHitters.size()), counts[i]); } LOG_DEBUG("error = " << sketch.oneMinusDeltaError()); TMeanAccumulator meanRelativeError; - for (std::size_t i = 0u; i < heavyHitters.size(); ++i) - { + for (std::size_t i = 0u; i < heavyHitters.size(); ++i) { double count = heavyHitters[i]; double estimated = sketch.count(static_cast(i)); - LOG_DEBUG("category = " << i - << ", true count = " << count - << ", estimated count = " << estimated); + LOG_DEBUG("category = " << i << ", true count = " << count << ", estimated count = " << estimated); double relativeError = std::fabs(estimated - count) / count; CPPUNIT_ASSERT(relativeError < 0.01); @@ -131,8 +115,7 @@ void CCountMinSketchTest::testCounts() } } -void CCountMinSketchTest::testSwap() -{ +void CCountMinSketchTest::testSwap() { LOG_DEBUG("+---------------------------------+"); LOG_DEBUG("| CCountMinSketchTest::testSwap |"); LOG_DEBUG("+---------------------------------+"); @@ -152,20 +135,16 @@ void CCountMinSketchTest::testSwap() maths::CCountMinSketch sketch2(2, 750); maths::CCountMinSketch sketch3(3, 300); maths::CCountMinSketch sketch4(2, 400); - for (std::size_t i = 0u; i < counts1.size(); ++i) - { + for (std::size_t i = 0u; i < counts1.size(); ++i) { sketch1.add(static_cast(i), counts1[i]); } - for (std::size_t i = 0u; i < counts2.size(); ++i) - { + for (std::size_t i = 0u; i < counts2.size(); ++i) { sketch2.add(static_cast(i), counts2[i]); } - for (std::size_t i = 0u; i < counts3.size(); ++i) - { + for (std::size_t i = 0u; i < counts3.size(); ++i) { sketch3.add(static_cast(i), counts3[i]); } - for (std::size_t i = 0u; i < counts4.size(); ++i) - { + for (std::size_t i = 0u; i < counts4.size(); ++i) { sketch4.add(static_cast(i), counts4[i]); } @@ -199,8 +178,7 @@ void CCountMinSketchTest::testSwap() sketch3.swap(sketch4); } -void CCountMinSketchTest::testPersist() -{ +void CCountMinSketchTest::testPersist() { LOG_DEBUG("+------------------------------------+"); LOG_DEBUG("| CCountMinSketchTest::testPersist |"); LOG_DEBUG("+------------------------------------+"); @@ -211,8 +189,7 @@ void CCountMinSketchTest::testPersist() rng.generateUniformSamples(2.0, 301.0, 500, counts); maths::CCountMinSketch origSketch(2, 600); - for (std::size_t i = 0u; i < counts.size(); ++i) - { + for (std::size_t i = 0u; i < counts.size(); ++i) { origSketch.add(static_cast(i), counts[i]); } @@ -231,10 +208,8 @@ void CCountMinSketchTest::testPersist() core::CRapidXmlStateRestoreTraverser traverser(parser); maths::CCountMinSketch restoredSketch(traverser); - LOG_DEBUG("orig checksum = " << origSketch.checksum() - << ", new checksum = " << restoredSketch.checksum()); - CPPUNIT_ASSERT_EQUAL(origSketch.checksum(), - restoredSketch.checksum()); + LOG_DEBUG("orig checksum = " << origSketch.checksum() << ", new checksum = " << restoredSketch.checksum()); + CPPUNIT_ASSERT_EQUAL(origSketch.checksum(), restoredSketch.checksum()); std::string newXml; core::CRapidXmlStatePersistInserter inserter("root"); @@ -247,8 +222,7 @@ void CCountMinSketchTest::testPersist() // Sketch. TDoubleVec moreCounts; rng.generateUniformSamples(2.0, 301.0, 500, moreCounts); - for (std::size_t i = 0u; i < moreCounts.size(); ++i) - { + for (std::size_t i = 0u; i < moreCounts.size(); ++i) { origSketch.add(static_cast(counts.size() + i), moreCounts[i]); } @@ -267,10 +241,8 @@ void CCountMinSketchTest::testPersist() core::CRapidXmlStateRestoreTraverser traverser(parser); maths::CCountMinSketch restoredSketch(traverser); - LOG_DEBUG("orig checksum = " << origSketch.checksum() - << ", new checksum = " << restoredSketch.checksum()); - CPPUNIT_ASSERT_EQUAL(origSketch.checksum(), - restoredSketch.checksum()); + LOG_DEBUG("orig checksum = " << origSketch.checksum() << ", new checksum = " << restoredSketch.checksum()); + CPPUNIT_ASSERT_EQUAL(origSketch.checksum(), restoredSketch.checksum()); std::string newXml; core::CRapidXmlStatePersistInserter inserter("root"); @@ -281,19 +253,14 @@ void CCountMinSketchTest::testPersist() } } -CppUnit::Test *CCountMinSketchTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CCountMinSketchTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCountMinSketchTest::testCounts", - &CCountMinSketchTest::testCounts) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCountMinSketchTest::testSwap", - &CCountMinSketchTest::testSwap) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCountMinSketchTest::testPersist", - &CCountMinSketchTest::testPersist) ); +CppUnit::Test* CCountMinSketchTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CCountMinSketchTest"); + + suiteOfTests->addTest( + new CppUnit::TestCaller("CCountMinSketchTest::testCounts", &CCountMinSketchTest::testCounts)); + suiteOfTests->addTest(new CppUnit::TestCaller("CCountMinSketchTest::testSwap", &CCountMinSketchTest::testSwap)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CCountMinSketchTest::testPersist", &CCountMinSketchTest::testPersist)); return suiteOfTests; } diff --git a/lib/maths/unittest/CCountMinSketchTest.h b/lib/maths/unittest/CCountMinSketchTest.h index d6ff8b33b6..551798df71 100644 --- a/lib/maths/unittest/CCountMinSketchTest.h +++ b/lib/maths/unittest/CCountMinSketchTest.h @@ -9,14 +9,13 @@ #include -class CCountMinSketchTest : public CppUnit::TestFixture -{ - public: - void testCounts(); - void testSwap(); - void testPersist(); +class CCountMinSketchTest : public CppUnit::TestFixture { +public: + void testCounts(); + void testSwap(); + void testPersist(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CCountMinSketchTest_h diff --git a/lib/maths/unittest/CDecayRateControllerTest.cc b/lib/maths/unittest/CDecayRateControllerTest.cc index 6fc96469a4..d5f2d5067f 100644 --- a/lib/maths/unittest/CDecayRateControllerTest.cc +++ b/lib/maths/unittest/CDecayRateControllerTest.cc @@ -21,8 +21,7 @@ using namespace ml; using namespace handy_typedefs; -void CDecayRateControllerTest::testLowCov() -{ +void CDecayRateControllerTest::testLowCov() { LOG_DEBUG("+----------------------------------------+"); LOG_DEBUG("| CDecayRateControllerTest::testLowCov |"); LOG_DEBUG("+----------------------------------------+"); @@ -35,16 +34,14 @@ void CDecayRateControllerTest::testLowCov() maths::CDecayRateController controller(maths::CDecayRateController::E_PredictionBias, 1); double decayRate{0.0005}; - for (std::size_t i = 0u; i < 1000; ++i) - { + for (std::size_t i = 0u; i < 1000; ++i) { double multiplier{controller.multiplier({10000.0}, {{1.0}}, 3600, 1.0, 0.0005)}; decayRate *= multiplier; } LOG_DEBUG("Controlled decay = " << decayRate); CPPUNIT_ASSERT(decayRate > 0.0005); - for (std::size_t i = 0u; i < 1000; ++i) - { + for (std::size_t i = 0u; i < 1000; ++i) { double multiplier{controller.multiplier({10000.0}, {{0.0}}, 3600, 1.0, 0.0005)}; decayRate *= multiplier; } @@ -52,8 +49,7 @@ void CDecayRateControllerTest::testLowCov() CPPUNIT_ASSERT(decayRate < 0.0005); } -void CDecayRateControllerTest::testOrderedErrors() -{ +void CDecayRateControllerTest::testOrderedErrors() { LOG_DEBUG("+-----------------------------------------------+"); LOG_DEBUG("| CDecayRateControllerTest::testOrderedErrors |"); LOG_DEBUG("+-----------------------------------------------+"); @@ -68,10 +64,8 @@ void CDecayRateControllerTest::testOrderedErrors() double decayRate{0.0005}; TDouble1VecVec predictionErrors; - for (std::size_t i = 0u; i < 500; ++i) - { - for (int j = 0; j < 100; ++j) - { + for (std::size_t i = 0u; i < 500; ++i) { + for (int j = 0; j < 100; ++j) { predictionErrors.push_back({static_cast(j - 50)}); } double multiplier{controller.multiplier({100.0}, predictionErrors, 3600, 1.0, 0.0005)}; @@ -79,11 +73,9 @@ void CDecayRateControllerTest::testOrderedErrors() } LOG_DEBUG("Controlled decay = " << decayRate); CPPUNIT_ASSERT(decayRate <= 0.0005); - } -void CDecayRateControllerTest::testPersist() -{ +void CDecayRateControllerTest::testPersist() { LOG_DEBUG("+-----------------------------------------+"); LOG_DEBUG("| CDecayRateControllerTest::testPersist |"); LOG_DEBUG("+-----------------------------------------+"); @@ -99,8 +91,7 @@ void CDecayRateControllerTest::testPersist() rng.generateUniformSamples(-2.0, 6.0, 1000, errors); maths::CDecayRateController origController(maths::CDecayRateController::E_PredictionBias, 1); - for (std::size_t i = 0u; i < values.size(); ++i) - { + for (std::size_t i = 0u; i < values.size(); ++i) { origController.multiplier({values[i]}, {{errors[i]}}, 3600, 1.0, 0.0005); } @@ -119,30 +110,23 @@ void CDecayRateControllerTest::testPersist() CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); maths::CDecayRateController restoredController; - CPPUNIT_ASSERT_EQUAL(true, traverser.traverseSubLevel( - boost::bind(&maths::CDecayRateController::acceptRestoreTraverser, - &restoredController, _1))); - - LOG_DEBUG("orig checksum = " << origController.checksum() - << ", new checksum = " << restoredController.checksum()); - CPPUNIT_ASSERT_EQUAL(origController.checksum(), - restoredController.checksum()); + CPPUNIT_ASSERT_EQUAL( + true, traverser.traverseSubLevel(boost::bind(&maths::CDecayRateController::acceptRestoreTraverser, &restoredController, _1))); + + LOG_DEBUG("orig checksum = " << origController.checksum() << ", new checksum = " << restoredController.checksum()); + CPPUNIT_ASSERT_EQUAL(origController.checksum(), restoredController.checksum()); } } -CppUnit::Test *CDecayRateControllerTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CDecayRateControllerTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CDecayRateControllerTest::testLowCov", - &CDecayRateControllerTest::testLowCov) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CDecayRateControllerTest::testOrderedErrors", - &CDecayRateControllerTest::testOrderedErrors) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CDecayRateControllerTest::testPersist", - &CDecayRateControllerTest::testPersist) ); +CppUnit::Test* CDecayRateControllerTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CDecayRateControllerTest"); + + suiteOfTests->addTest( + new CppUnit::TestCaller("CDecayRateControllerTest::testLowCov", &CDecayRateControllerTest::testLowCov)); + suiteOfTests->addTest(new CppUnit::TestCaller("CDecayRateControllerTest::testOrderedErrors", + &CDecayRateControllerTest::testOrderedErrors)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CDecayRateControllerTest::testPersist", &CDecayRateControllerTest::testPersist)); return suiteOfTests; } diff --git a/lib/maths/unittest/CDecayRateControllerTest.h b/lib/maths/unittest/CDecayRateControllerTest.h index b6e48a2613..0451ef1c24 100644 --- a/lib/maths/unittest/CDecayRateControllerTest.h +++ b/lib/maths/unittest/CDecayRateControllerTest.h @@ -9,14 +9,13 @@ #include -class CDecayRateControllerTest : public CppUnit::TestFixture -{ - public: - void testLowCov(); - void testOrderedErrors(); - void testPersist(); +class CDecayRateControllerTest : public CppUnit::TestFixture { +public: + void testLowCov(); + void testOrderedErrors(); + void testPersist(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CDecayRateControllerTest_h diff --git a/lib/maths/unittest/CEntropySketchTest.cc b/lib/maths/unittest/CEntropySketchTest.cc index 8df13d6df3..9717fbefe5 100644 --- a/lib/maths/unittest/CEntropySketchTest.cc +++ b/lib/maths/unittest/CEntropySketchTest.cc @@ -21,8 +21,7 @@ using namespace ml; -void CEntropySketchTest::testAll() -{ +void CEntropySketchTest::testAll() { LOG_DEBUG("+---------------------------------------+"); LOG_DEBUG("| CBjkstUniqueValuesTest::testPersist |"); LOG_DEBUG("+---------------------------------------+"); @@ -39,85 +38,64 @@ void CEntropySketchTest::testAll() maths::CBasicStatistics::COrderStatisticsStack> maxError[3]; maths::CBasicStatistics::SSampleMean::TAccumulator meanError[3]; - double K[] = { 20.0, 40.0, 60.0 }; - double eps[] = { 0.2, 0.4, 0.6 }; - double epsDeviations[][3] = - { - { 0.0, 0.0, 0.0 }, - { 0.0, 0.0, 0.0 }, - { 0.0, 0.0, 0.0 } - }; + double K[] = {20.0, 40.0, 60.0}; + double eps[] = {0.2, 0.4, 0.6}; + double epsDeviations[][3] = {{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}; TSizeVec counts; - for (std::size_t t = 0u; t < numberCategories.size(); ++t) - { + for (std::size_t t = 0u; t < numberCategories.size(); ++t) { rng.generateUniformSamples(1, 10, numberCategories[t], counts); std::size_t Z = std::accumulate(counts.begin(), counts.end(), 0); - maths::CEntropySketch entropy[] = - { - maths::CEntropySketch(static_cast(K[0])), - maths::CEntropySketch(static_cast(K[1])), - maths::CEntropySketch(static_cast(K[2])) - }; + maths::CEntropySketch entropy[] = {maths::CEntropySketch(static_cast(K[0])), + maths::CEntropySketch(static_cast(K[1])), + maths::CEntropySketch(static_cast(K[2]))}; - for (std::size_t i = 0u; i < 3; ++i) - { + for (std::size_t i = 0u; i < 3; ++i) { TSizeDoubleUMap p; - for (std::size_t j = 0u; j < numberCategories[t]; ++j) - { + for (std::size_t j = 0u; j < numberCategories[t]; ++j) { entropy[i].add(j, counts[j]); p[j] += static_cast(counts[j]) / static_cast(Z); } double ha = entropy[i].calculate(); - double h = 0.0; - for (TSizeDoubleUMapCItr j = p.begin(); j != p.end(); ++j) - { + double h = 0.0; + for (TSizeDoubleUMapCItr j = p.begin(); j != p.end(); ++j) { h -= j->second * std::log(j->second); } - if (t % 30 == 0) - { + if (t % 30 == 0) { LOG_DEBUG("H_approx = " << ha << ", H_exact = " << h); } meanError[i].add(std::fabs(ha - h) / h); - maxError[i].add( std::fabs(ha - h) / h); - for (std::size_t k = 0u; k < 3; ++k) - { - if (std::fabs(ha - h) > eps[k]) - { + maxError[i].add(std::fabs(ha - h) / h); + for (std::size_t k = 0u; k < 3; ++k) { + if (std::fabs(ha - h) > eps[k]) { epsDeviations[i][k] += 1.0; } } } } - - double maxMaxErrors[] = { 0.14, 0.11, 0.08 }; - double maxMeanErrors[] = { 0.05, 0.04, 0.03 }; - for (std::size_t i = 0u; i < 3; ++i) - { + double maxMaxErrors[] = {0.14, 0.11, 0.08}; + double maxMeanErrors[] = {0.05, 0.04, 0.03}; + for (std::size_t i = 0u; i < 3; ++i) { LOG_DEBUG("max error = " << maxError[i][0]); LOG_DEBUG("mean error = " << maths::CBasicStatistics::mean(meanError[i])); LOG_DEBUG("large deviations = " << core::CContainerPrinter::print(epsDeviations[i])); CPPUNIT_ASSERT(maxError[i][0] < maxMaxErrors[i]); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError[i]) < maxMeanErrors[i]); // Test additive approximation bounds. - for (std::size_t j = 0u; j < 3; ++j) - { - CPPUNIT_ASSERT(epsDeviations[i][j] / 1000.0 < 2.0 * std::exp(-K[i]*eps[j]*eps[j] / 6.0)); + for (std::size_t j = 0u; j < 3; ++j) { + CPPUNIT_ASSERT(epsDeviations[i][j] / 1000.0 < 2.0 * std::exp(-K[i] * eps[j] * eps[j] / 6.0)); } } } -CppUnit::Test *CEntropySketchTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CEntropySketchTest"); +CppUnit::Test* CEntropySketchTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CEntropySketchTest"); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEntropySketchTest::testAll", - &CEntropySketchTest::testAll) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CEntropySketchTest::testAll", &CEntropySketchTest::testAll)); return suiteOfTests; } diff --git a/lib/maths/unittest/CEntropySketchTest.h b/lib/maths/unittest/CEntropySketchTest.h index d293e804d4..ac777a5ed0 100644 --- a/lib/maths/unittest/CEntropySketchTest.h +++ b/lib/maths/unittest/CEntropySketchTest.h @@ -9,12 +9,11 @@ #include -class CEntropySketchTest : public CppUnit::TestFixture -{ - public: - void testAll(); +class CEntropySketchTest : public CppUnit::TestFixture { +public: + void testAll(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CEntropySketchTest_h diff --git a/lib/maths/unittest/CEqualWithToleranceTest.cc b/lib/maths/unittest/CEqualWithToleranceTest.cc index c88e502558..e2b4fc406f 100644 --- a/lib/maths/unittest/CEqualWithToleranceTest.cc +++ b/lib/maths/unittest/CEqualWithToleranceTest.cc @@ -13,8 +13,7 @@ using namespace ml; -void CEqualWithToleranceTest::testScalar() -{ +void CEqualWithToleranceTest::testScalar() { LOG_DEBUG("+---------------------------------------+"); LOG_DEBUG("| CEqualWithToleranceTest::testScalar |"); LOG_DEBUG("+---------------------------------------+"); @@ -22,213 +21,199 @@ void CEqualWithToleranceTest::testScalar() { maths::CEqualWithTolerance abs(maths::CToleranceTypes::E_AbsoluteTolerance, 0.31); maths::CEqualWithTolerance rel(maths::CToleranceTypes::E_RelativeTolerance, 0.01); - maths::CEqualWithTolerance absAndRel( maths::CToleranceTypes::E_AbsoluteTolerance - & maths::CToleranceTypes::E_RelativeTolerance, - 0.31, 0.01); - maths::CEqualWithTolerance absOrRel( maths::CToleranceTypes::E_AbsoluteTolerance - | maths::CToleranceTypes::E_RelativeTolerance, - 0.31, 0.01); + maths::CEqualWithTolerance absAndRel( + maths::CToleranceTypes::E_AbsoluteTolerance & maths::CToleranceTypes::E_RelativeTolerance, 0.31, 0.01); + maths::CEqualWithTolerance absOrRel( + maths::CToleranceTypes::E_AbsoluteTolerance | maths::CToleranceTypes::E_RelativeTolerance, 0.31, 0.01); { double a = 1.1; double b = 1.4; double c = 200.6; double d = 202.61; - CPPUNIT_ASSERT_EQUAL(true, abs(a, b)); + CPPUNIT_ASSERT_EQUAL(true, abs(a, b)); CPPUNIT_ASSERT_EQUAL(false, abs(c, d)); CPPUNIT_ASSERT_EQUAL(false, rel(a, b)); - CPPUNIT_ASSERT_EQUAL(true, rel(c, d)); + CPPUNIT_ASSERT_EQUAL(true, rel(c, d)); CPPUNIT_ASSERT_EQUAL(false, absAndRel(a, b)); CPPUNIT_ASSERT_EQUAL(false, absAndRel(c, d)); - CPPUNIT_ASSERT_EQUAL(true, absOrRel(a, b)); - CPPUNIT_ASSERT_EQUAL(true, absOrRel(c, d)); + CPPUNIT_ASSERT_EQUAL(true, absOrRel(a, b)); + CPPUNIT_ASSERT_EQUAL(true, absOrRel(c, d)); } { double a = -1.1; double b = -1.4; double c = -200.6; double d = -202.61; - CPPUNIT_ASSERT_EQUAL(true, abs(a, b)); + CPPUNIT_ASSERT_EQUAL(true, abs(a, b)); CPPUNIT_ASSERT_EQUAL(false, abs(c, d)); CPPUNIT_ASSERT_EQUAL(false, rel(a, b)); - CPPUNIT_ASSERT_EQUAL(true, rel(c, d)); + CPPUNIT_ASSERT_EQUAL(true, rel(c, d)); CPPUNIT_ASSERT_EQUAL(false, absAndRel(a, b)); CPPUNIT_ASSERT_EQUAL(false, absAndRel(c, d)); - CPPUNIT_ASSERT_EQUAL(true, absOrRel(a, b)); - CPPUNIT_ASSERT_EQUAL(true, absOrRel(c, d)); + CPPUNIT_ASSERT_EQUAL(true, absOrRel(a, b)); + CPPUNIT_ASSERT_EQUAL(true, absOrRel(c, d)); } } { maths::CEqualWithTolerance abs(maths::CToleranceTypes::E_AbsoluteTolerance, 0.31f); maths::CEqualWithTolerance rel(maths::CToleranceTypes::E_RelativeTolerance, 0.01f); - maths::CEqualWithTolerance absAndRel( maths::CToleranceTypes::E_AbsoluteTolerance - & maths::CToleranceTypes::E_RelativeTolerance, - 0.31f, 0.01f); - maths::CEqualWithTolerance absOrRel( maths::CToleranceTypes::E_AbsoluteTolerance - | maths::CToleranceTypes::E_RelativeTolerance, - 0.31f, 0.01f); + maths::CEqualWithTolerance absAndRel( + maths::CToleranceTypes::E_AbsoluteTolerance & maths::CToleranceTypes::E_RelativeTolerance, 0.31f, 0.01f); + maths::CEqualWithTolerance absOrRel( + maths::CToleranceTypes::E_AbsoluteTolerance | maths::CToleranceTypes::E_RelativeTolerance, 0.31f, 0.01f); float a = 1.1f; float b = 1.4f; float c = 200.6f; float d = 202.61f; - CPPUNIT_ASSERT_EQUAL(true, abs(a, b)); + CPPUNIT_ASSERT_EQUAL(true, abs(a, b)); CPPUNIT_ASSERT_EQUAL(false, abs(c, d)); CPPUNIT_ASSERT_EQUAL(false, rel(a, b)); - CPPUNIT_ASSERT_EQUAL(true, rel(c, d)); + CPPUNIT_ASSERT_EQUAL(true, rel(c, d)); CPPUNIT_ASSERT_EQUAL(false, absAndRel(a, b)); CPPUNIT_ASSERT_EQUAL(false, absAndRel(c, d)); - CPPUNIT_ASSERT_EQUAL(true, absOrRel(a, b)); - CPPUNIT_ASSERT_EQUAL(true, absOrRel(c, d)); + CPPUNIT_ASSERT_EQUAL(true, absOrRel(a, b)); + CPPUNIT_ASSERT_EQUAL(true, absOrRel(c, d)); } } -void CEqualWithToleranceTest::testVector() -{ +void CEqualWithToleranceTest::testVector() { LOG_DEBUG("+---------------------------------------+"); LOG_DEBUG("| CEqualWithToleranceTest::testVector |"); LOG_DEBUG("+---------------------------------------+"); - float a_[] = { 1.1f, 1.2f }; - float b_[] = { 1.2f, 1.3f }; - float c_[] = { 201.1f, 202.2f }; - float d_[] = { 202.1f, 203.2f }; + float a_[] = {1.1f, 1.2f}; + float b_[] = {1.2f, 1.3f}; + float c_[] = {201.1f, 202.2f}; + float d_[] = {202.1f, 203.2f}; maths::CVector epsAbs(2, 0.15 / std::sqrt(2.0)); maths::CVector epsRel(2, 0.0062 / std::sqrt(2.0)); maths::CEqualWithTolerance> abs(maths::CToleranceTypes::E_AbsoluteTolerance, epsAbs); maths::CEqualWithTolerance> rel(maths::CToleranceTypes::E_RelativeTolerance, epsRel); - maths::CEqualWithTolerance> absAndRel( maths::CToleranceTypes::E_AbsoluteTolerance - & maths::CToleranceTypes::E_RelativeTolerance, - epsAbs, epsRel); - maths::CEqualWithTolerance> absOrRel( maths::CToleranceTypes::E_AbsoluteTolerance - | maths::CToleranceTypes::E_RelativeTolerance, - epsAbs, epsRel); + maths::CEqualWithTolerance> absAndRel( + maths::CToleranceTypes::E_AbsoluteTolerance & maths::CToleranceTypes::E_RelativeTolerance, epsAbs, epsRel); + maths::CEqualWithTolerance> absOrRel( + maths::CToleranceTypes::E_AbsoluteTolerance | maths::CToleranceTypes::E_RelativeTolerance, epsAbs, epsRel); { maths::CVector a(a_, a_ + 2); maths::CVector b(b_, b_ + 2); maths::CVector c(c_, c_ + 2); maths::CVector d(d_, d_ + 2); - CPPUNIT_ASSERT_EQUAL(true, abs(a, b)); + CPPUNIT_ASSERT_EQUAL(true, abs(a, b)); CPPUNIT_ASSERT_EQUAL(false, abs(c, d)); CPPUNIT_ASSERT_EQUAL(false, rel(a, b)); - CPPUNIT_ASSERT_EQUAL(true, rel(c, d)); + CPPUNIT_ASSERT_EQUAL(true, rel(c, d)); CPPUNIT_ASSERT_EQUAL(false, absAndRel(a, b)); CPPUNIT_ASSERT_EQUAL(false, absAndRel(c, d)); - CPPUNIT_ASSERT_EQUAL(true, absOrRel(a, b)); - CPPUNIT_ASSERT_EQUAL(true, absOrRel(c, d)); + CPPUNIT_ASSERT_EQUAL(true, absOrRel(a, b)); + CPPUNIT_ASSERT_EQUAL(true, absOrRel(c, d)); } { maths::CVector a(a_, a_ + 2); maths::CVector b(b_, b_ + 2); maths::CVector c(c_, c_ + 2); maths::CVector d(d_, d_ + 2); - CPPUNIT_ASSERT_EQUAL(true, abs(-a, -b)); + CPPUNIT_ASSERT_EQUAL(true, abs(-a, -b)); CPPUNIT_ASSERT_EQUAL(false, abs(-c, -d)); CPPUNIT_ASSERT_EQUAL(false, rel(-a, -b)); - CPPUNIT_ASSERT_EQUAL(true, rel(-c, -d)); + CPPUNIT_ASSERT_EQUAL(true, rel(-c, -d)); CPPUNIT_ASSERT_EQUAL(false, absAndRel(-a, -b)); CPPUNIT_ASSERT_EQUAL(false, absAndRel(-c, -d)); - CPPUNIT_ASSERT_EQUAL(true, absOrRel(-a, -b)); - CPPUNIT_ASSERT_EQUAL(true, absOrRel(-c, -d)); + CPPUNIT_ASSERT_EQUAL(true, absOrRel(-a, -b)); + CPPUNIT_ASSERT_EQUAL(true, absOrRel(-c, -d)); } { maths::CVector a(a_, a_ + 2); maths::CVector b(b_, b_ + 2); maths::CVector c(c_, c_ + 2); maths::CVector d(d_, d_ + 2); - CPPUNIT_ASSERT_EQUAL(true, abs(a, b)); + CPPUNIT_ASSERT_EQUAL(true, abs(a, b)); CPPUNIT_ASSERT_EQUAL(false, abs(c, d)); CPPUNIT_ASSERT_EQUAL(false, rel(a, b)); - CPPUNIT_ASSERT_EQUAL(true, rel(c, d)); + CPPUNIT_ASSERT_EQUAL(true, rel(c, d)); CPPUNIT_ASSERT_EQUAL(false, absAndRel(a, b)); CPPUNIT_ASSERT_EQUAL(false, absAndRel(c, d)); - CPPUNIT_ASSERT_EQUAL(true, absOrRel(a, b)); - CPPUNIT_ASSERT_EQUAL(true, absOrRel(c, d)); + CPPUNIT_ASSERT_EQUAL(true, absOrRel(a, b)); + CPPUNIT_ASSERT_EQUAL(true, absOrRel(c, d)); } } -void CEqualWithToleranceTest::testMatrix() -{ +void CEqualWithToleranceTest::testMatrix() { LOG_DEBUG("+---------------------------------------+"); LOG_DEBUG("| CEqualWithToleranceTest::testMatrix |"); LOG_DEBUG("+---------------------------------------+"); - float a_[] = { 1.1f, 1.2f, 1.3f }; - float b_[] = { 1.2f, 1.3f, 1.4f }; - float c_[] = { 201.1f, 202.2f, 203.4f }; - float d_[] = { 202.1f, 203.2f, 204.4f }; + float a_[] = {1.1f, 1.2f, 1.3f}; + float b_[] = {1.2f, 1.3f, 1.4f}; + float c_[] = {201.1f, 202.2f, 203.4f}; + float d_[] = {202.1f, 203.2f, 204.4f}; maths::CSymmetricMatrix epsAbs(2, 0.21 / 2.0); maths::CSymmetricMatrix epsRel(2, 0.005 / 2.0); maths::CEqualWithTolerance> abs(maths::CToleranceTypes::E_AbsoluteTolerance, epsAbs); maths::CEqualWithTolerance> rel(maths::CToleranceTypes::E_RelativeTolerance, epsRel); - maths::CEqualWithTolerance> absAndRel( maths::CToleranceTypes::E_AbsoluteTolerance - & maths::CToleranceTypes::E_RelativeTolerance, - epsAbs, epsRel); - maths::CEqualWithTolerance> absOrRel( maths::CToleranceTypes::E_AbsoluteTolerance - | maths::CToleranceTypes::E_RelativeTolerance, - epsAbs, epsRel); + maths::CEqualWithTolerance> absAndRel( + maths::CToleranceTypes::E_AbsoluteTolerance & maths::CToleranceTypes::E_RelativeTolerance, epsAbs, epsRel); + maths::CEqualWithTolerance> absOrRel( + maths::CToleranceTypes::E_AbsoluteTolerance | maths::CToleranceTypes::E_RelativeTolerance, epsAbs, epsRel); { maths::CSymmetricMatrix a(a_, a_ + 3); maths::CSymmetricMatrix b(b_, b_ + 3); maths::CSymmetricMatrix c(c_, c_ + 3); maths::CSymmetricMatrix d(d_, d_ + 3); - CPPUNIT_ASSERT_EQUAL(true, abs(a, b)); + CPPUNIT_ASSERT_EQUAL(true, abs(a, b)); CPPUNIT_ASSERT_EQUAL(false, abs(c, d)); CPPUNIT_ASSERT_EQUAL(false, rel(a, b)); - CPPUNIT_ASSERT_EQUAL(true, rel(c, d)); + CPPUNIT_ASSERT_EQUAL(true, rel(c, d)); CPPUNIT_ASSERT_EQUAL(false, absAndRel(a, b)); CPPUNIT_ASSERT_EQUAL(false, absAndRel(c, d)); - CPPUNIT_ASSERT_EQUAL(true, absOrRel(a, b)); - CPPUNIT_ASSERT_EQUAL(true, absOrRel(c, d)); + CPPUNIT_ASSERT_EQUAL(true, absOrRel(a, b)); + CPPUNIT_ASSERT_EQUAL(true, absOrRel(c, d)); } { maths::CSymmetricMatrix a(a_, a_ + 3); maths::CSymmetricMatrix b(b_, b_ + 3); maths::CSymmetricMatrix c(c_, c_ + 3); maths::CSymmetricMatrix d(d_, d_ + 3); - CPPUNIT_ASSERT_EQUAL(true, abs(-a, -b)); + CPPUNIT_ASSERT_EQUAL(true, abs(-a, -b)); CPPUNIT_ASSERT_EQUAL(false, abs(-c, -d)); CPPUNIT_ASSERT_EQUAL(false, rel(-a, -b)); - CPPUNIT_ASSERT_EQUAL(true, rel(-c, -d)); + CPPUNIT_ASSERT_EQUAL(true, rel(-c, -d)); CPPUNIT_ASSERT_EQUAL(false, absAndRel(-a, -b)); CPPUNIT_ASSERT_EQUAL(false, absAndRel(-c, -d)); - CPPUNIT_ASSERT_EQUAL(true, absOrRel(a, b)); - CPPUNIT_ASSERT_EQUAL(true, absOrRel(c, d)); + CPPUNIT_ASSERT_EQUAL(true, absOrRel(a, b)); + CPPUNIT_ASSERT_EQUAL(true, absOrRel(c, d)); } { maths::CSymmetricMatrix a(a_, a_ + 3); maths::CSymmetricMatrix b(b_, b_ + 3); maths::CSymmetricMatrix c(c_, c_ + 3); maths::CSymmetricMatrix d(d_, d_ + 3); - CPPUNIT_ASSERT_EQUAL(true, abs(a, b)); + CPPUNIT_ASSERT_EQUAL(true, abs(a, b)); CPPUNIT_ASSERT_EQUAL(false, abs(c, d)); CPPUNIT_ASSERT_EQUAL(false, rel(a, b)); - CPPUNIT_ASSERT_EQUAL(true, rel(c, d)); + CPPUNIT_ASSERT_EQUAL(true, rel(c, d)); CPPUNIT_ASSERT_EQUAL(false, absAndRel(a, b)); CPPUNIT_ASSERT_EQUAL(false, absAndRel(c, d)); - CPPUNIT_ASSERT_EQUAL(true, absOrRel(a, b)); - CPPUNIT_ASSERT_EQUAL(true, absOrRel(c, d)); + CPPUNIT_ASSERT_EQUAL(true, absOrRel(a, b)); + CPPUNIT_ASSERT_EQUAL(true, absOrRel(c, d)); } } -CppUnit::Test *CEqualWithToleranceTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CEqualWithToleranceTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEqualWithToleranceTest::testScalar", - &CEqualWithToleranceTest::testScalar) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEqualWithToleranceTest::testVector", - &CEqualWithToleranceTest::testVector) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEqualWithToleranceTest::testMatrix", - &CEqualWithToleranceTest::testMatrix) ); +CppUnit::Test* CEqualWithToleranceTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CEqualWithToleranceTest"); + + suiteOfTests->addTest( + new CppUnit::TestCaller("CEqualWithToleranceTest::testScalar", &CEqualWithToleranceTest::testScalar)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CEqualWithToleranceTest::testVector", &CEqualWithToleranceTest::testVector)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CEqualWithToleranceTest::testMatrix", &CEqualWithToleranceTest::testMatrix)); return suiteOfTests; } diff --git a/lib/maths/unittest/CEqualWithToleranceTest.h b/lib/maths/unittest/CEqualWithToleranceTest.h index 9cde078c81..d0af5ca7c4 100644 --- a/lib/maths/unittest/CEqualWithToleranceTest.h +++ b/lib/maths/unittest/CEqualWithToleranceTest.h @@ -9,14 +9,13 @@ #include -class CEqualWithToleranceTest : public CppUnit::TestFixture -{ - public: - void testScalar(); - void testVector(); - void testMatrix(); +class CEqualWithToleranceTest : public CppUnit::TestFixture { +public: + void testScalar(); + void testVector(); + void testMatrix(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CEqualWithToleranceTest_h diff --git a/lib/maths/unittest/CForecastTest.cc b/lib/maths/unittest/CForecastTest.cc index 933f40a3e3..e075d95b9b 100644 --- a/lib/maths/unittest/CForecastTest.cc +++ b/lib/maths/unittest/CForecastTest.cc @@ -33,8 +33,7 @@ using namespace ml; using namespace handy_typedefs; -namespace -{ +namespace { using TDoubleVec = std::vector; using TTimeDoublePr = std::pair; using TTimeDoublePrVec = std::vector; @@ -52,186 +51,159 @@ const std::size_t TAG{0u}; const TDouble2Vec MINIMUM_VALUE{boost::numeric::bounds::lowest()}; const TDouble2Vec MAXIMUM_VALUE{boost::numeric::bounds::highest()}; -maths::CModelParams params(core_t::TTime bucketLength) -{ +maths::CModelParams params(core_t::TTime bucketLength) { using TTimeDoubleMap = std::map; static TTimeDoubleMap learnRates; learnRates[bucketLength] = static_cast(bucketLength) / 1800.0; double minimumSeasonalVarianceScale{0.25}; - return maths::CModelParams{bucketLength, learnRates[bucketLength], - DECAY_RATE, minimumSeasonalVarianceScale, - 6 * core::constants::HOUR, core::constants::DAY}; + return maths::CModelParams{ + bucketLength, learnRates[bucketLength], DECAY_RATE, minimumSeasonalVarianceScale, 6 * core::constants::HOUR, core::constants::DAY}; } -maths::CUnivariateTimeSeriesModel::TDecayRateController2Ary decayRateControllers() -{ - return {{maths::CDecayRateController( maths::CDecayRateController::E_PredictionBias - | maths::CDecayRateController::E_PredictionErrorIncrease, 1), - maths::CDecayRateController( maths::CDecayRateController::E_PredictionBias - | maths::CDecayRateController::E_PredictionErrorIncrease - | maths::CDecayRateController::E_PredictionErrorDecrease, 1)}}; +maths::CUnivariateTimeSeriesModel::TDecayRateController2Ary decayRateControllers() { + return {{maths::CDecayRateController( + maths::CDecayRateController::E_PredictionBias | maths::CDecayRateController::E_PredictionErrorIncrease, 1), + maths::CDecayRateController(maths::CDecayRateController::E_PredictionBias | + maths::CDecayRateController::E_PredictionErrorIncrease | + maths::CDecayRateController::E_PredictionErrorDecrease, + 1)}}; } } -void mockSink(maths::SErrorBar errorBar, TErrorBarVec &prediction) -{ +void mockSink(maths::SErrorBar errorBar, TErrorBarVec& prediction) { prediction.push_back(errorBar); } -void CForecastTest::testDailyNoLongTermTrend() -{ +void CForecastTest::testDailyNoLongTermTrend() { LOG_DEBUG("+-------------------------------------------+"); LOG_DEBUG("| CForecastTest::testDailyNoLongTermTrend |"); LOG_DEBUG("+-------------------------------------------+"); core_t::TTime bucketLength{600}; - TDoubleVec y{ 0.0, 2.0, 2.0, 4.0, 8.0, 10.0, 15.0, 20.0, - 120.0, 120.0, 110.0, 100.0, 90.0, 100.0, 130.0, 80.0, - 30.0, 15.0, 10.0, 8.0, 5.0, 3.0, 2.0, 0.0}; + TDoubleVec y{0.0, 2.0, 2.0, 4.0, 8.0, 10.0, 15.0, 20.0, 120.0, 120.0, 110.0, 100.0, + 90.0, 100.0, 130.0, 80.0, 30.0, 15.0, 10.0, 8.0, 5.0, 3.0, 2.0, 0.0}; test::CRandomNumbers rng; - auto trend = [&y, bucketLength](core_t::TTime time, double noise) - { - core_t::TTime i{(time % 86400) / bucketLength}; - double alpha{static_cast(i % 6) / 6.0}; - double beta{1.0 - alpha}; - return 40.0 + alpha * y[i/6] + beta * y[(i/6 + 1) % y.size()] + noise; - }; + auto trend = [&y, bucketLength](core_t::TTime time, double noise) { + core_t::TTime i{(time % 86400) / bucketLength}; + double alpha{static_cast(i % 6) / 6.0}; + double beta{1.0 - alpha}; + return 40.0 + alpha * y[i / 6] + beta * y[(i / 6 + 1) % y.size()] + noise; + }; this->test(trend, bucketLength, 60, 64.0, 4.0, 0.13); } -void CForecastTest::testDailyConstantLongTermTrend() -{ +void CForecastTest::testDailyConstantLongTermTrend() { LOG_DEBUG("+-------------------------------------------------+"); LOG_DEBUG("| CForecastTest::testDailyConstantLongTermTrend |"); LOG_DEBUG("+-------------------------------------------------+"); core_t::TTime bucketLength{3600}; - TDoubleVec y{ 0.0, 2.0, 2.0, 4.0, 8.0, 10.0, 15.0, 20.0, - 80.0, 100.0, 110.0, 120.0, 110.0, 100.0, 90.0, 80.0, - 30.0, 15.0, 10.0, 8.0, 5.0, 3.0, 2.0, 0.0}; + TDoubleVec y{0.0, 2.0, 2.0, 4.0, 8.0, 10.0, 15.0, 20.0, 80.0, 100.0, 110.0, 120.0, + 110.0, 100.0, 90.0, 80.0, 30.0, 15.0, 10.0, 8.0, 5.0, 3.0, 2.0, 0.0}; - auto trend = [&y, bucketLength](core_t::TTime time, double noise) - { - core_t::TTime i{(time % 86400) / bucketLength}; - return 0.25 * static_cast(time) - / static_cast(bucketLength) + y[i] + noise; - }; + auto trend = [&y, bucketLength](core_t::TTime time, double noise) { + core_t::TTime i{(time % 86400) / bucketLength}; + return 0.25 * static_cast(time) / static_cast(bucketLength) + y[i] + noise; + }; this->test(trend, bucketLength, 60, 64.0, 15.0, 0.02); } -void CForecastTest::testDailyVaryingLongTermTrend() -{ +void CForecastTest::testDailyVaryingLongTermTrend() { LOG_DEBUG("+------------------------------------------------+"); LOG_DEBUG("| CForecastTest::testDailyVaryingLongTermTrend |"); LOG_DEBUG("+------------------------------------------------+"); core_t::TTime bucketLength{3600}; double day{86400.0}; - TDoubleVec times{ 0.0 , 5.0 * day, 10.0 * day, 15.0 * day, 20.0 * day, 25.0 * day, - 30.0 * day, 35.0 * day, 40.0 * day, 45.0 * day, 50.0 * day, 55.0 * day, - 60.0 * day, 65.0 * day, 70.0 * day, 75.0 * day, 80.0 * day, 85.0 * day, - 90.0 * day, 95.0 * day, 100.0 * day, 105.0 * day, 110.0 * day, 115.0 * day}; - TDoubleVec values{20.0, 30.0, 25.0, 35.0, 45.0, 40.0, 38.0, 36.0, 35.0, 25.0, 35.0, 45.0, + TDoubleVec times{0.0, 5.0 * day, 10.0 * day, 15.0 * day, 20.0 * day, 25.0 * day, 30.0 * day, 35.0 * day, + 40.0 * day, 45.0 * day, 50.0 * day, 55.0 * day, 60.0 * day, 65.0 * day, 70.0 * day, 75.0 * day, + 80.0 * day, 85.0 * day, 90.0 * day, 95.0 * day, 100.0 * day, 105.0 * day, 110.0 * day, 115.0 * day}; + TDoubleVec values{20.0, 30.0, 25.0, 35.0, 45.0, 40.0, 38.0, 36.0, 35.0, 25.0, 35.0, 45.0, 55.0, 62.0, 70.0, 76.0, 79.0, 82.0, 86.0, 90.0, 95.0, 100.0, 106.0, 112.0}; maths::CSpline<> trend_(maths::CSplineTypes::E_Cubic); trend_.interpolate(times, values, maths::CSplineTypes::E_Natural); - auto trend = [&trend_](core_t::TTime time, double noise) - { - double time_{static_cast(time)}; - return trend_.value(time_) - + 8.0 * std::sin(boost::math::double_constants::two_pi * time_ / 43200.0) - + noise; - }; + auto trend = [&trend_](core_t::TTime time, double noise) { + double time_{static_cast(time)}; + return trend_.value(time_) + 8.0 * std::sin(boost::math::double_constants::two_pi * time_ / 43200.0) + noise; + }; this->test(trend, bucketLength, 100, 9.0, 13.0, 0.04); } -void CForecastTest::testComplexNoLongTermTrend() -{ +void CForecastTest::testComplexNoLongTermTrend() { LOG_DEBUG("+---------------------------------------------+"); LOG_DEBUG("| CForecastTest::testComplexNoLongTermTrend |"); LOG_DEBUG("+---------------------------------------------+"); core_t::TTime bucketLength{3600}; - TDoubleVec y{ 0.0, 10.0, 20.0, 20.0, 30.0, 40.0, 50.0, 60.0, - 80.0, 100.0, 110.0, 120.0, 110.0, 100.0, 90.0, 80.0, - 60.0, 40.0, 30.0, 20.0, 10.0, 10.0, 5.0, 0.0}; + TDoubleVec y{0.0, 10.0, 20.0, 20.0, 30.0, 40.0, 50.0, 60.0, 80.0, 100.0, 110.0, 120.0, + 110.0, 100.0, 90.0, 80.0, 60.0, 40.0, 30.0, 20.0, 10.0, 10.0, 5.0, 0.0}; TDoubleVec scale{1.0, 1.1, 1.05, 0.95, 0.9, 0.3, 0.2}; - auto trend = [&y, &scale, bucketLength](core_t::TTime time, double noise) - { - core_t::TTime d{(time % 604800) / 86400}; - core_t::TTime h{(time % 86400) / bucketLength}; - return scale[d] * (20.0 + y[h] + noise); - }; + auto trend = [&y, &scale, bucketLength](core_t::TTime time, double noise) { + core_t::TTime d{(time % 604800) / 86400}; + core_t::TTime h{(time % 86400) / bucketLength}; + return scale[d] * (20.0 + y[h] + noise); + }; this->test(trend, bucketLength, 60, 24.0, 34.0, 0.13); } -void CForecastTest::testComplexConstantLongTermTrend() -{ +void CForecastTest::testComplexConstantLongTermTrend() { LOG_DEBUG("+---------------------------------------------------+"); LOG_DEBUG("| CForecastTest::testComplexConstantLongTermTrend |"); LOG_DEBUG("+---------------------------------------------------+"); core_t::TTime bucketLength{3600}; - TDoubleVec y{ 0.0, 10.0, 20.0, 20.0, 30.0, 40.0, 50.0, 60.0, - 80.0, 100.0, 110.0, 120.0, 110.0, 100.0, 90.0, 80.0, - 60.0, 40.0, 30.0, 20.0, 10.0, 10.0, 5.0, 0.0}; + TDoubleVec y{0.0, 10.0, 20.0, 20.0, 30.0, 40.0, 50.0, 60.0, 80.0, 100.0, 110.0, 120.0, + 110.0, 100.0, 90.0, 80.0, 60.0, 40.0, 30.0, 20.0, 10.0, 10.0, 5.0, 0.0}; TDoubleVec scale{1.0, 1.1, 1.05, 0.95, 0.9, 0.3, 0.2}; - auto trend = [&y, &scale, bucketLength](core_t::TTime time, double noise) - { - core_t::TTime d{(time % 604800) / 86400}; - core_t::TTime h{(time % 86400) / bucketLength}; - return 0.25 * static_cast(time) - / static_cast(bucketLength) + scale[d] * (20.0 + y[h] + noise); - }; + auto trend = [&y, &scale, bucketLength](core_t::TTime time, double noise) { + core_t::TTime d{(time % 604800) / 86400}; + core_t::TTime h{(time % 86400) / bucketLength}; + return 0.25 * static_cast(time) / static_cast(bucketLength) + scale[d] * (20.0 + y[h] + noise); + }; this->test(trend, bucketLength, 60, 24.0, 17.0, 0.04); } -void CForecastTest::testComplexVaryingLongTermTrend() -{ +void CForecastTest::testComplexVaryingLongTermTrend() { LOG_DEBUG("+--------------------------------------------------+"); LOG_DEBUG("| CForecastTest::testComplexVaryingLongTermTrend |"); LOG_DEBUG("+--------------------------------------------------+"); core_t::TTime bucketLength{3600}; double day{86400.0}; - TDoubleVec times{ 0.0 , 5.0 * day, 10.0 * day, 15.0 * day, 20.0 * day, 25.0 * day, - 30.0 * day, 35.0 * day, 40.0 * day, 45.0 * day, 50.0 * day, 55.0 * day, - 60.0 * day, 65.0 * day, 70.0 * day, 75.0 * day, 80.0 * day, 85.0 * day, - 90.0 * day, 95.0 * day, 100.0 * day, 105.0 * day, 110.0 * day, 115.0 * day}; - TDoubleVec values{20.0, 30.0, 25.0, 35.0, 45.0, 40.0, 38.0, 36.0, 35.0, 25.0, 35.0, 45.0, + TDoubleVec times{0.0, 5.0 * day, 10.0 * day, 15.0 * day, 20.0 * day, 25.0 * day, 30.0 * day, 35.0 * day, + 40.0 * day, 45.0 * day, 50.0 * day, 55.0 * day, 60.0 * day, 65.0 * day, 70.0 * day, 75.0 * day, + 80.0 * day, 85.0 * day, 90.0 * day, 95.0 * day, 100.0 * day, 105.0 * day, 110.0 * day, 115.0 * day}; + TDoubleVec values{20.0, 30.0, 25.0, 35.0, 45.0, 40.0, 38.0, 36.0, 35.0, 25.0, 35.0, 45.0, 55.0, 62.0, 70.0, 76.0, 79.0, 82.0, 86.0, 90.0, 95.0, 100.0, 106.0, 112.0}; - TDoubleVec y{0.0, 1.0, 2.0, 2.0, 3.0, 4.0, 5.0, 6.0, - 8.0, 10.0, 11.0, 12.0, 11.0, 10.0, 9.0, 8.0, - 6.0, 4.0, 3.0, 2.0, 1.0, 1.0, 0.5, 0.0}; + TDoubleVec y{0.0, 1.0, 2.0, 2.0, 3.0, 4.0, 5.0, 6.0, 8.0, 10.0, 11.0, 12.0, + 11.0, 10.0, 9.0, 8.0, 6.0, 4.0, 3.0, 2.0, 1.0, 1.0, 0.5, 0.0}; TDoubleVec scale{1.0, 1.1, 1.05, 0.95, 0.9, 0.3, 0.2}; maths::CSpline<> trend_(maths::CSplineTypes::E_Cubic); trend_.interpolate(times, values, maths::CSplineTypes::E_Natural); - auto trend = [&trend_, &y, &scale, bucketLength](core_t::TTime time, double noise) - { - core_t::TTime d{(time % 604800) / 86400}; - core_t::TTime h{(time % 86400) / bucketLength}; - double time_{static_cast(time)}; - return trend_.value(time_) + scale[d] * (20.0 + y[h] + noise); - }; + auto trend = [&trend_, &y, &scale, bucketLength](core_t::TTime time, double noise) { + core_t::TTime d{(time % 604800) / 86400}; + core_t::TTime h{(time % 86400) / bucketLength}; + double time_{static_cast(time)}; + return trend_.value(time_) + scale[d] * (20.0 + y[h] + noise); + }; this->test(trend, bucketLength, 60, 4.0, 23.0, 0.05); } -void CForecastTest::testNonNegative() -{ +void CForecastTest::testNonNegative() { LOG_DEBUG("+----------------------------------+"); LOG_DEBUG("| CForecastTest::testNonNegative |"); LOG_DEBUG("+----------------------------------+"); @@ -241,8 +213,7 @@ void CForecastTest::testNonNegative() test::CRandomNumbers rng; maths::CTimeSeriesDecomposition trend(0.012, bucketLength); - maths::CNormalMeanPrecConjugate prior = - maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, DECAY_RATE); + maths::CNormalMeanPrecConjugate prior = maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, DECAY_RATE); maths::CUnivariateTimeSeriesModel::TDecayRateController2Ary controllers{decayRateControllers()}; maths::CUnivariateTimeSeriesModel model(params(bucketLength), TAG, trend, prior, &controllers); @@ -257,19 +228,17 @@ void CForecastTest::testNonNegative() core_t::TTime time{0}; TDouble2Vec4VecVec weights{{{1.0}}}; - for (std::size_t d = 0u; d < 20; ++d) - { + for (std::size_t d = 0u; d < 20; ++d) { TDoubleVec noise; rng.generateNormalSamples(2.0, 3.0, 48, noise); - for (auto value = noise.begin(); value != noise.end(); ++value, time += bucketLength) - { + for (auto value = noise.begin(); value != noise.end(); ++value, time += bucketLength) { maths::CModelAddSamplesParams params; params.integer(false) - .nonNegative(true) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); + .nonNegative(true) + .propagationInterval(1.0) + .weightStyles(maths::CConstantWeights::COUNT) + .trendWeights(weights) + .priorWeights(weights); double y{std::max(*value, 0.0)}; model.addSamples(params, {core::make_triple(time, TDouble2Vec{y}, TAG)}); //actual.push_back(y); @@ -283,28 +252,21 @@ void CForecastTest::testNonNegative() core_t::TTime end{time + 20 * core::constants::DAY}; std::string m; TModelPtr forecastModel(model.cloneForForecast()); - forecastModel->forecast(start, end, 95.0, - MINIMUM_VALUE, MAXIMUM_VALUE, - boost::bind(&mockSink, _1, boost::ref(prediction)), m); + forecastModel->forecast(start, end, 95.0, MINIMUM_VALUE, MAXIMUM_VALUE, boost::bind(&mockSink, _1, boost::ref(prediction)), m); std::size_t outOfBounds{0}; std::size_t count{0}; - for (std::size_t i = 0u; i < prediction.size(); ++i) - { + for (std::size_t i = 0u; i < prediction.size(); ++i) { TDoubleVec noise; rng.generateNormalSamples(2.0, 3.0, 48, noise); - for (auto value = noise.begin(); - i < prediction.size() && value != noise.end(); - ++i, ++value, time += bucketLength) - { + for (auto value = noise.begin(); i < prediction.size() && value != noise.end(); ++i, ++value, time += bucketLength) { CPPUNIT_ASSERT(prediction[i].s_LowerBound >= 0); - CPPUNIT_ASSERT(prediction[i].s_Predicted >= 0); + CPPUNIT_ASSERT(prediction[i].s_Predicted >= 0); CPPUNIT_ASSERT(prediction[i].s_UpperBound >= 0); double y{std::max(*value, 0.0)}; - outOfBounds += ( y < prediction[i].s_LowerBound - || y > prediction[i].s_UpperBound ? 1 : 0); + outOfBounds += (y < prediction[i].s_LowerBound || y > prediction[i].s_UpperBound ? 1 : 0); ++count; //actual.push_back(y); //ly.push_back(prediction[i].s_LowerBound); @@ -313,8 +275,7 @@ void CForecastTest::testNonNegative() } } - double percentageOutOfBounds{100.0 * static_cast(outOfBounds) - / static_cast(count)}; + double percentageOutOfBounds{100.0 * static_cast(outOfBounds) / static_cast(count)}; LOG_DEBUG("% out of bounds = " << percentageOutOfBounds); //file << "actual = " << core::CContainerPrinter::print(actual) << ";\n"; @@ -325,8 +286,7 @@ void CForecastTest::testNonNegative() CPPUNIT_ASSERT(percentageOutOfBounds < 8.0); } -void CForecastTest::testFinancialIndex() -{ +void CForecastTest::testFinancialIndex() { LOG_DEBUG("+-------------------------------------+"); LOG_DEBUG("| CForecastTest::testFinancialIndex |"); LOG_DEBUG("+-------------------------------------+"); @@ -336,20 +296,14 @@ void CForecastTest::testFinancialIndex() TTimeDoublePrVec timeseries; core_t::TTime startTime; core_t::TTime endTime; - CPPUNIT_ASSERT(test::CTimeSeriesTestData::parse("testfiles/financial_index.csv", - timeseries, - startTime, - endTime, - "^([0-9]+),([0-9\\.]+)")); + CPPUNIT_ASSERT( + test::CTimeSeriesTestData::parse("testfiles/financial_index.csv", timeseries, startTime, endTime, "^([0-9]+),([0-9\\.]+)")); CPPUNIT_ASSERT(!timeseries.empty()); - LOG_DEBUG("timeseries = " << core::CContainerPrinter::print(timeseries.begin(), - timeseries.begin() + 10) - << " ..."); + LOG_DEBUG("timeseries = " << core::CContainerPrinter::print(timeseries.begin(), timeseries.begin() + 10) << " ..."); maths::CTimeSeriesDecomposition trend(0.012, bucketLength); - maths::CNormalMeanPrecConjugate prior = - maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, DECAY_RATE); + maths::CNormalMeanPrecConjugate prior = maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, DECAY_RATE); maths::CUnivariateTimeSeriesModel::TDecayRateController2Ary controllers{decayRateControllers()}; maths::CUnivariateTimeSeriesModel model(params(bucketLength), TAG, trend, prior, &controllers); @@ -365,17 +319,14 @@ void CForecastTest::testFinancialIndex() std::size_t n{5 * timeseries.size() / 6}; TDouble2Vec4VecVec weights{{{1.0}}}; - for (std::size_t i = 0u; i < n; ++i) - { + for (std::size_t i = 0u; i < n; ++i) { maths::CModelAddSamplesParams params; params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); - model.addSamples(params, {core::make_triple(timeseries[i].first, - TDouble2Vec{timeseries[i].second}, - TAG)}); + .propagationInterval(1.0) + .weightStyles(maths::CConstantWeights::COUNT) + .trendWeights(weights) + .priorWeights(weights); + model.addSamples(params, {core::make_triple(timeseries[i].first, TDouble2Vec{timeseries[i].second}, TAG)}); //actual.push_back(timeseries[i].second); } @@ -386,21 +337,15 @@ void CForecastTest::testFinancialIndex() core_t::TTime end{timeseries[timeseries.size() - 1].first}; std::string m; TModelPtr forecastModel(model.cloneForForecast()); - forecastModel->forecast(start, end, 99.0, - MINIMUM_VALUE, MAXIMUM_VALUE, - boost::bind(&mockSink, _1, boost::ref(prediction)), m); + forecastModel->forecast(start, end, 99.0, MINIMUM_VALUE, MAXIMUM_VALUE, boost::bind(&mockSink, _1, boost::ref(prediction)), m); std::size_t outOfBounds{0}; std::size_t count{0}; TMeanAccumulator error; - for (std::size_t i = n, j = 0u; - i < timeseries.size() && j < prediction.size(); - ++i, ++j) - { + for (std::size_t i = n, j = 0u; i < timeseries.size() && j < prediction.size(); ++i, ++j) { double yi{timeseries[i].second}; - outOfBounds += ( yi < prediction[j].s_LowerBound - || yi > prediction[j].s_UpperBound ? 1 : 0); + outOfBounds += (yi < prediction[j].s_LowerBound || yi > prediction[j].s_UpperBound ? 1 : 0); ++count; error.add(std::fabs(yi - prediction[j].s_Predicted) / std::fabs(yi)); //actual.push_back(yi); @@ -409,8 +354,7 @@ void CForecastTest::testFinancialIndex() //uy.push_back(prediction[j].s_UpperBound); } - double percentageOutOfBounds{100.0 * static_cast(outOfBounds) - / static_cast(count)}; + double percentageOutOfBounds{100.0 * static_cast(outOfBounds) / static_cast(count)}; LOG_DEBUG("% out of bounds = " << percentageOutOfBounds); LOG_DEBUG("error = " << maths::CBasicStatistics::mean(error)); @@ -423,34 +367,23 @@ void CForecastTest::testFinancialIndex() CPPUNIT_ASSERT(maths::CBasicStatistics::mean(error) < 0.1); } -CppUnit::Test *CForecastTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CForecastTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CForecastTest::testDailyNoLongTermTrend", - &CForecastTest::testDailyNoLongTermTrend) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CForecastTest::testDailyConstantLongTermTrend", - &CForecastTest::testDailyConstantLongTermTrend) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CForecastTest::testDailyVaryingLongTermTrend", - &CForecastTest::testDailyVaryingLongTermTrend) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CForecastTest::testComplexNoLongTermTrend", - &CForecastTest::testComplexNoLongTermTrend) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CForecastTest::testComplexConstantLongTermTrend", - &CForecastTest::testComplexConstantLongTermTrend) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CForecastTest::testComplexVaryingLongTermTrend", - &CForecastTest::testComplexVaryingLongTermTrend) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CForecastTest::testNonNegative", - &CForecastTest::testNonNegative) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CForecastTest::testFinancialIndex", - &CForecastTest::testFinancialIndex) ); +CppUnit::Test* CForecastTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CForecastTest"); + + suiteOfTests->addTest( + new CppUnit::TestCaller("CForecastTest::testDailyNoLongTermTrend", &CForecastTest::testDailyNoLongTermTrend)); + suiteOfTests->addTest(new CppUnit::TestCaller("CForecastTest::testDailyConstantLongTermTrend", + &CForecastTest::testDailyConstantLongTermTrend)); + suiteOfTests->addTest(new CppUnit::TestCaller("CForecastTest::testDailyVaryingLongTermTrend", + &CForecastTest::testDailyVaryingLongTermTrend)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CForecastTest::testComplexNoLongTermTrend", &CForecastTest::testComplexNoLongTermTrend)); + suiteOfTests->addTest(new CppUnit::TestCaller("CForecastTest::testComplexConstantLongTermTrend", + &CForecastTest::testComplexConstantLongTermTrend)); + suiteOfTests->addTest(new CppUnit::TestCaller("CForecastTest::testComplexVaryingLongTermTrend", + &CForecastTest::testComplexVaryingLongTermTrend)); + suiteOfTests->addTest(new CppUnit::TestCaller("CForecastTest::testNonNegative", &CForecastTest::testNonNegative)); + suiteOfTests->addTest(new CppUnit::TestCaller("CForecastTest::testFinancialIndex", &CForecastTest::testFinancialIndex)); return suiteOfTests; } @@ -460,8 +393,7 @@ void CForecastTest::test(TTrend trend, std::size_t daysToLearn, double noiseVariance, double maximumPercentageOutOfBounds, - double maximumError) -{ + double maximumError) { //std::ofstream file; //file.open("results.m"); @@ -475,27 +407,25 @@ void CForecastTest::test(TTrend trend, test::CRandomNumbers rng; maths::CUnivariateTimeSeriesModel::TDecayRateController2Ary controllers{decayRateControllers()}; - maths::CUnivariateTimeSeriesModel model( - params(bucketLength), TAG, - maths::CTimeSeriesDecomposition(0.012, bucketLength), - maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, DECAY_RATE), - &controllers); + maths::CUnivariateTimeSeriesModel model(params(bucketLength), + TAG, + maths::CTimeSeriesDecomposition(0.012, bucketLength), + maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, DECAY_RATE), + &controllers); core_t::TTime time{0}; TDouble2Vec4VecVec weights{{{1.0}}}; - for (std::size_t d = 0u; d < daysToLearn; ++d) - { + for (std::size_t d = 0u; d < daysToLearn; ++d) { TDoubleVec noise; rng.generateNormalSamples(0.0, noiseVariance, 86400 / bucketLength, noise); - for (std::size_t i = 0u; i < noise.size(); ++i, time += bucketLength) - { + for (std::size_t i = 0u; i < noise.size(); ++i, time += bucketLength) { maths::CModelAddSamplesParams params; params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); + .propagationInterval(1.0) + .weightStyles(maths::CConstantWeights::COUNT) + .trendWeights(weights) + .priorWeights(weights); double yi{trend(time, noise[i])}; model.addSamples(params, {core::make_triple(time, TDouble2Vec{yi}, TAG)}); //actual.push_back(yi); @@ -509,27 +439,20 @@ void CForecastTest::test(TTrend trend, core_t::TTime end{time + 2 * core::constants::WEEK}; TModelPtr forecastModel(model.cloneForForecast()); std::string m; - forecastModel->forecast(start, end, 80.0, - MINIMUM_VALUE, MAXIMUM_VALUE, - boost::bind(&mockSink, _1, boost::ref(prediction)), m); + forecastModel->forecast(start, end, 80.0, MINIMUM_VALUE, MAXIMUM_VALUE, boost::bind(&mockSink, _1, boost::ref(prediction)), m); std::size_t outOfBounds{0}; std::size_t count{0}; TMeanAccumulator error; - for (std::size_t i = 0u; i < prediction.size(); /**/) - { + for (std::size_t i = 0u; i < prediction.size(); /**/) { TDoubleVec noise; rng.generateNormalSamples(0.0, noiseVariance, 86400 / bucketLength, noise); TDoubleVec day; - for (std::size_t j = 0u; - i < prediction.size() && j < noise.size(); - ++i, ++j, time += bucketLength) - { + for (std::size_t j = 0u; i < prediction.size() && j < noise.size(); ++i, ++j, time += bucketLength) { double yj{trend(time, noise[j])}; day.push_back(yj); - outOfBounds += ( yj < prediction[i].s_LowerBound - || yj > prediction[i].s_UpperBound ? 1 : 0); + outOfBounds += (yj < prediction[i].s_LowerBound || yj > prediction[i].s_UpperBound ? 1 : 0); ++count; error.add(std::fabs(yj - prediction[i].s_Predicted) / std::fabs(yj)); //actual.push_back(yj); @@ -539,8 +462,7 @@ void CForecastTest::test(TTrend trend, } } - double percentageOutOfBounds{100.0 * static_cast(outOfBounds) - / static_cast(count)}; + double percentageOutOfBounds{100.0 * static_cast(outOfBounds) / static_cast(count)}; LOG_DEBUG("% out of bounds = " << percentageOutOfBounds); LOG_DEBUG("error = " << maths::CBasicStatistics::mean(error)); diff --git a/lib/maths/unittest/CForecastTest.h b/lib/maths/unittest/CForecastTest.h index 0d9505fc44..70ccde4669 100644 --- a/lib/maths/unittest/CForecastTest.h +++ b/lib/maths/unittest/CForecastTest.h @@ -13,30 +13,29 @@ #include -class CForecastTest : public CppUnit::TestFixture -{ - public: - void testDailyNoLongTermTrend(); - void testDailyConstantLongTermTrend(); - void testDailyVaryingLongTermTrend(); - void testComplexNoLongTermTrend(); - void testComplexConstantLongTermTrend(); - void testComplexVaryingLongTermTrend(); - void testNonNegative(); - void testFinancialIndex(); - - static CppUnit::Test *suite(); - - private: - using TTrend = std::function; - - private: - void test(TTrend trend, - ml::core_t::TTime bucketLength, - std::size_t daysToLearn, - double noiseVariance, - double maximumPercentageOutOfBounds, - double maximumError); +class CForecastTest : public CppUnit::TestFixture { +public: + void testDailyNoLongTermTrend(); + void testDailyConstantLongTermTrend(); + void testDailyVaryingLongTermTrend(); + void testComplexNoLongTermTrend(); + void testComplexConstantLongTermTrend(); + void testComplexVaryingLongTermTrend(); + void testNonNegative(); + void testFinancialIndex(); + + static CppUnit::Test* suite(); + +private: + using TTrend = std::function; + +private: + void test(TTrend trend, + ml::core_t::TTime bucketLength, + std::size_t daysToLearn, + double noiseVariance, + double maximumPercentageOutOfBounds, + double maximumError); }; #endif // INCLUDED_CForecastTest_h diff --git a/lib/maths/unittest/CGammaRateConjugateTest.cc b/lib/maths/unittest/CGammaRateConjugateTest.cc index 5ab400d505..91e518f4dd 100644 --- a/lib/maths/unittest/CGammaRateConjugateTest.cc +++ b/lib/maths/unittest/CGammaRateConjugateTest.cc @@ -33,8 +33,7 @@ using namespace ml; using namespace handy_typedefs; -namespace -{ +namespace { using TDoubleVec = std::vector; using TDoubleDoublePr = std::pair; @@ -43,17 +42,13 @@ using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumula using TMeanVarAccumulator = maths::CBasicStatistics::SSampleMeanVar::TAccumulator; using CGammaRateConjugate = CPriorTestInterfaceMixin; -CGammaRateConjugate makePrior(maths_t::EDataType dataType = maths_t::E_ContinuousData, - const double &offset = 0.0, - const double &decayRate = 0.0) -{ +CGammaRateConjugate +makePrior(maths_t::EDataType dataType = maths_t::E_ContinuousData, const double& offset = 0.0, const double& decayRate = 0.0) { return CGammaRateConjugate::nonInformativePrior(dataType, offset, decayRate, 0.0); } - } -void CGammaRateConjugateTest::testMultipleUpdate() -{ +void CGammaRateConjugateTest::testMultipleUpdate() { LOG_DEBUG("+-----------------------------------------------+"); LOG_DEBUG("| CGammaRateConjugateTest::testMultipleUpdate |"); LOG_DEBUG("+-----------------------------------------------+"); @@ -61,11 +56,7 @@ void CGammaRateConjugateTest::testMultipleUpdate() // Test that we get the same result updating once with a vector of 100 // samples of an R.V. versus updating individually 100 times. - const maths_t::EDataType dataTypes[] = - { - maths_t::E_IntegerData, - maths_t::E_ContinuousData - }; + const maths_t::EDataType dataTypes[] = {maths_t::E_IntegerData, maths_t::E_ContinuousData}; const double shape = 2.0; const double scale = 3.0; @@ -75,13 +66,11 @@ void CGammaRateConjugateTest::testMultipleUpdate() TDoubleVec samples; rng.generateGammaSamples(shape, scale, 100, samples); - for (size_t i = 0; i < boost::size(dataTypes); ++i) - { + for (size_t i = 0; i < boost::size(dataTypes); ++i) { CGammaRateConjugate filter1(makePrior(dataTypes[i])); CGammaRateConjugate filter2(filter1); - for (std::size_t j = 0; j < samples.size(); ++j) - { + for (std::size_t j = 0; j < samples.size(); ++j) { filter1.addSamples(TDouble1Vec(1, samples[j])); } filter2.addSamples(samples); @@ -96,22 +85,16 @@ void CGammaRateConjugateTest::testMultipleUpdate() TDoubleVec scaledSamples; rng.generateGammaSamples(shape / 2.0, 2.0 * scale, 100, scaledSamples); - for (size_t i = 0; i < boost::size(dataTypes); ++i) - { + for (size_t i = 0; i < boost::size(dataTypes); ++i) { CGammaRateConjugate filter1(makePrior(dataTypes[i])); filter1.addSamples(samples); CGammaRateConjugate filter2(filter1); maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); - for (std::size_t j = 0u; j < scaledSamples.size(); ++j) - { - filter1.addSamples(weightStyle, - TDouble1Vec(1, scaledSamples[j]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 2.0))); + for (std::size_t j = 0u; j < scaledSamples.size(); ++j) { + filter1.addSamples(weightStyle, TDouble1Vec(1, scaledSamples[j]), TDouble4Vec1Vec(1, TDouble4Vec(1, 2.0))); } - filter2.addSamples(weightStyle, - scaledSamples, - TDouble4Vec1Vec(scaledSamples.size(), TDouble4Vec(1, 2.0))); + filter2.addSamples(weightStyle, scaledSamples, TDouble4Vec1Vec(scaledSamples.size(), TDouble4Vec(1, 2.0))); using TEqual = maths::CEqualWithTolerance; TEqual equal(maths::CToleranceTypes::E_RelativeTolerance, 0.03); @@ -120,16 +103,14 @@ void CGammaRateConjugateTest::testMultipleUpdate() // Test the count weight is equivalent to adding repeated samples. - for (size_t i = 0; i < boost::size(dataTypes); ++i) - { + for (size_t i = 0; i < boost::size(dataTypes); ++i) { CGammaRateConjugate filter1(makePrior(dataTypes[i])); CGammaRateConjugate filter2(filter1); double x = 3.0; std::size_t count = 10; - for (std::size_t j = 0u; j < count; ++j) - { + for (std::size_t j = 0u; j < count; ++j) { filter1.addSamples(TDouble1Vec(1, x)); } filter2.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), @@ -142,8 +123,7 @@ void CGammaRateConjugateTest::testMultipleUpdate() } } -void CGammaRateConjugateTest::testPropagation() -{ +void CGammaRateConjugateTest::testPropagation() { LOG_DEBUG("+--------------------------------------------+"); LOG_DEBUG("| CGammaRateConjugateTest::testPropagation |"); LOG_DEBUG("+--------------------------------------------+"); @@ -160,8 +140,7 @@ void CGammaRateConjugateTest::testPropagation() CGammaRateConjugate filter(makePrior(maths_t::E_ContinuousData, 0.1)); - for (std::size_t i = 0; i < samples.size(); ++i) - { + for (std::size_t i = 0; i < samples.size(); ++i) { filter.addSamples(TDouble1Vec(1, samples[i])); } @@ -173,17 +152,14 @@ void CGammaRateConjugateTest::testPropagation() double propagatedShape = filter.likelihoodShape(); double propagatedRate = filter.likelihoodRate(); - LOG_DEBUG("shape = " << shape - << ", rate = " << rate - << ", propagatedShape = " << propagatedShape - << ", propagatedRate = " << propagatedRate); + LOG_DEBUG("shape = " << shape << ", rate = " << rate << ", propagatedShape = " << propagatedShape + << ", propagatedRate = " << propagatedRate); CPPUNIT_ASSERT_DOUBLES_EQUAL(shape, propagatedShape, eps); CPPUNIT_ASSERT_DOUBLES_EQUAL(rate, propagatedRate, eps); } -void CGammaRateConjugateTest::testShapeEstimation() -{ +void CGammaRateConjugateTest::testShapeEstimation() { LOG_DEBUG("+------------------------------------------------+"); LOG_DEBUG("| CGammaRateConjugateTest::testShapeEstimation |"); LOG_DEBUG("+------------------------------------------------+"); @@ -193,17 +169,15 @@ void CGammaRateConjugateTest::testShapeEstimation() // use any explicit bounds on the convergence rates so simply check that // we do get closer as the number of samples increases. - const double decayRates[] = { 0.0, 0.001, 0.01 }; + const double decayRates[] = {0.0, 0.001, 0.01}; - for (size_t i = 0; i < boost::size(decayRates); ++i) - { + for (size_t i = 0; i < boost::size(decayRates); ++i) { test::CRandomNumbers rng; double tests = 0.0; double errorIncreased = 0.0; - for (unsigned int test = 0u; test < 100u; ++test) - { + for (unsigned int test = 0u; test < 100u; ++test) { double shape = 0.5 * (test + 1.0); double scale = 2.0; @@ -218,12 +192,10 @@ void CGammaRateConjugateTest::testShapeEstimation() double previousError = std::numeric_limits::max(); double averageShape = 0.0; - for (std::size_t j = 0u; j < samples.size() / nAggregate; ++j) - { + for (std::size_t j = 0u; j < samples.size() / nAggregate; ++j) { double error = 0.0; averageShape = 0.0; - for (std::size_t k = 0u; k < nAggregate; ++k) - { + for (std::size_t k = 0u; k < nAggregate; ++k) { filters[k].addSamples(TDouble1Vec(1, samples[nAggregate * j + k])); filters[k].propagateForwardsByTime(1.0); @@ -233,10 +205,8 @@ void CGammaRateConjugateTest::testShapeEstimation() error /= static_cast(nAggregate); averageShape /= static_cast(nAggregate); - if (j > 0u && j % 20u == 0u) - { - if (error > previousError) - { + if (j > 0u && j % 20u == 0u) { + if (error > previousError) { errorIncreased += 1.0; } tests += 1.0; @@ -252,11 +222,10 @@ void CGammaRateConjugateTest::testShapeEstimation() // Error should only increase in at most 7% of measurements. CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, errorIncreased, 0.07 * tests); - } + } } -void CGammaRateConjugateTest::testRateEstimation() -{ +void CGammaRateConjugateTest::testRateEstimation() { LOG_DEBUG("+-----------------------------------------------+"); LOG_DEBUG("| CGammaRateConjugateTest::testRateEstimation |"); LOG_DEBUG("+-----------------------------------------------+"); @@ -266,19 +235,17 @@ void CGammaRateConjugateTest::testRateEstimation() // rate of a gamma process lies in various confidence intervals // the correct percentage of the times. - const double decayRates[] = { 0.0, 0.001, 0.01 }; + const double decayRates[] = {0.0, 0.001, 0.01}; const unsigned int nTests = 100u; - const double testIntervals[] = { 50.0, 60.0, 70.0, 80.0, 85.0, 90.0, 95.0, 99.0 }; + const double testIntervals[] = {50.0, 60.0, 70.0, 80.0, 85.0, 90.0, 95.0, 99.0}; - for (size_t i = 0; i < boost::size(decayRates); ++i) - { + for (size_t i = 0; i < boost::size(decayRates); ++i) { test::CRandomNumbers rng; - unsigned int errors[] = { 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u }; + unsigned int errors[] = {0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u}; - for (unsigned int test = 0; test < nTests; ++test) - { + for (unsigned int test = 0; test < nTests; ++test) { double shape = 2.0; double scale = 0.2 * (test + 1.0); double rate = 1.0 / scale; @@ -288,30 +255,23 @@ void CGammaRateConjugateTest::testRateEstimation() CGammaRateConjugate filter(makePrior(maths_t::E_ContinuousData, decayRates[i])); - for (std::size_t j = 0u; j < samples.size(); ++j) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { filter.addSamples(TDouble1Vec(1, samples[j])); filter.propagateForwardsByTime(1.0); } - for (size_t j = 0; j < boost::size(testIntervals); ++j) - { - TDoubleDoublePr confidenceInterval = - filter.confidenceIntervalRate(testIntervals[j]); + for (size_t j = 0; j < boost::size(testIntervals); ++j) { + TDoubleDoublePr confidenceInterval = filter.confidenceIntervalRate(testIntervals[j]); - if (rate < confidenceInterval.first || - rate > confidenceInterval.second) - { + if (rate < confidenceInterval.first || rate > confidenceInterval.second) { ++errors[j]; } } } - for (size_t j = 0; j < boost::size(testIntervals); ++j) - { + for (size_t j = 0; j < boost::size(testIntervals); ++j) { // The number of errors should be inside the percentile bounds. - unsigned int maximumErrors = - static_cast(std::ceil((1.0 - testIntervals[j]/100.0) * nTests)); + unsigned int maximumErrors = static_cast(std::ceil((1.0 - testIntervals[j] / 100.0) * nTests)); LOG_DEBUG("errors = " << errors[j] << ", maximumErrors = " << maximumErrors); @@ -320,20 +280,14 @@ void CGammaRateConjugateTest::testRateEstimation() } } -void CGammaRateConjugateTest::testMarginalLikelihood() -{ +void CGammaRateConjugateTest::testMarginalLikelihood() { LOG_DEBUG("+---------------------------------------------------+"); LOG_DEBUG("| CGammaRateConjugateTest::testMarginalLikelihood |"); LOG_DEBUG("+---------------------------------------------------+"); // Check that the c.d.f. <= 1 at extreme. - maths_t::EDataType dataTypes[] = - { - maths_t::E_ContinuousData, - maths_t::E_IntegerData - }; - for (std::size_t t = 0u; t < boost::size(dataTypes); ++t) - { + maths_t::EDataType dataTypes[] = {maths_t::E_ContinuousData, maths_t::E_IntegerData}; + for (std::size_t t = 0u; t < boost::size(dataTypes); ++t) { CGammaRateConjugate filter(makePrior()); const double shape = 1.0; @@ -345,23 +299,18 @@ void CGammaRateConjugateTest::testMarginalLikelihood() rng.generateGammaSamples(shape, scale, 200, samples); filter.addSamples(samples); - maths_t::ESampleWeightStyle weightStyles[] = - { - maths_t::E_SampleCountWeight, - maths_t::E_SampleWinsorisationWeight, - maths_t::E_SampleCountWeight - }; - double weights[] = { 0.1, 1.0, 10.0 }; + maths_t::ESampleWeightStyle weightStyles[] = { + maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight, maths_t::E_SampleCountWeight}; + double weights[] = {0.1, 1.0, 10.0}; - for (std::size_t i = 0u; i < boost::size(weightStyles); ++i) - { - for (std::size_t j = 0u; j < boost::size(weights); ++j) - { + for (std::size_t i = 0u; i < boost::size(weightStyles); ++i) { + for (std::size_t j = 0u; j < boost::size(weights); ++j) { double lb, ub; filter.minusLogJointCdf(maths_t::TWeightStyleVec(1, weightStyles[i]), TDouble1Vec(1, 1000.0), TDouble4Vec1Vec(1, TDouble4Vec(1, weights[j])), - lb, ub); + lb, + ub); LOG_DEBUG("-log(c.d.f) = " << (lb + ub) / 2.0); CPPUNIT_ASSERT(lb >= 0.0); CPPUNIT_ASSERT(ub >= 0.0); @@ -372,7 +321,7 @@ void CGammaRateConjugateTest::testMarginalLikelihood() // Check that the marginal likelihood and c.d.f. agree for some // test data and that the c.d.f. <= 1. - const double decayRates[] = { 0.0, 0.001, 0.01 }; + const double decayRates[] = {0.0, 0.001, 0.01}; const double shape = 5.0; const double scale = 1.0; @@ -383,20 +332,17 @@ void CGammaRateConjugateTest::testMarginalLikelihood() test::CRandomNumbers rng; - unsigned int numberSamples[] = { 4u, 10u, 300u, 500u }; - const double tolerances[] = { 1e-8, 1e-8, 0.01, 0.001 }; + unsigned int numberSamples[] = {4u, 10u, 300u, 500u}; + const double tolerances[] = {1e-8, 1e-8, 0.01, 0.001}; - for (size_t i = 0; i < boost::size(numberSamples); ++i) - { + for (size_t i = 0; i < boost::size(numberSamples); ++i) { TDoubleVec samples; rng.generateGammaSamples(shape, scale, numberSamples[i], samples); - for (size_t j = 0; j < boost::size(decayRates); ++j) - { + for (size_t j = 0; j < boost::size(decayRates); ++j) { CGammaRateConjugate filter(makePrior(maths_t::E_ContinuousData, 0.0, decayRates[j])); - for (std::size_t k = 0u; k < samples.size(); ++k) - { + for (std::size_t k = 0u; k < samples.size(); ++k) { filter.addSamples(TDouble1Vec(1, samples[k])); filter.propagateForwardsByTime(1.0); } @@ -405,22 +351,16 @@ void CGammaRateConjugateTest::testMarginalLikelihood() // of the c.d.f. at a range of deltas from the true mean. const double eps = 1e-4; - double deltas[] = - { - -2.0, -1.6, -1.2, -0.8, -0.4, -0.2, 0.0, 0.5, 1.0, 2.0, 3.0, 4.0, 5.0 - }; + double deltas[] = {-2.0, -1.6, -1.2, -0.8, -0.4, -0.2, 0.0, 0.5, 1.0, 2.0, 3.0, 4.0, 5.0}; - for (size_t k = 0; k < boost::size(deltas); ++k) - { + for (size_t k = 0; k < boost::size(deltas); ++k) { double x = mean + deltas[k] * std::sqrt(variance); TDouble1Vec sample(1, x); - LOG_DEBUG("number = " << numberSamples[i] - << ", sample = " << sample[0]); + LOG_DEBUG("number = " << numberSamples[i] << ", sample = " << sample[0]); double logLikelihood = 0.0; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, - filter.jointLogMarginalLikelihood(sample, logLikelihood)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter.jointLogMarginalLikelihood(sample, logLikelihood)); double pdf = std::exp(logLikelihood); double lowerBound = 0.0, upperBound = 0.0; @@ -465,52 +405,42 @@ void CGammaRateConjugateTest::testMarginalLikelihood() TDoubleVec samples; rng.generateGammaSamples(shape, scale, 100000, samples); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { TDouble1Vec sample(1, samples[i]); filter.addSamples(sample); double logLikelihood = 0.0; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, - filter.jointLogMarginalLikelihood(sample, logLikelihood)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter.jointLogMarginalLikelihood(sample, logLikelihood)); differentialEntropy -= logLikelihood; } differentialEntropy /= static_cast(samples.size()); - LOG_DEBUG("differentialEntropy = " << differentialEntropy - << ", expectedDifferentialEntropy = " << expectedDifferentialEntropy); + LOG_DEBUG("differentialEntropy = " << differentialEntropy << ", expectedDifferentialEntropy = " << expectedDifferentialEntropy); CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedDifferentialEntropy, differentialEntropy, 0.0025); } - const double varianceScales[] = - { - 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.2, 1.5, 2.0, 2.5, 3.0, 4.0, 5.0 - }; + const double varianceScales[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.2, 1.5, 2.0, 2.5, 3.0, 4.0, 5.0}; CGammaRateConjugate filter(makePrior()); TDoubleVec samples; rng.generateGammaSamples(shape, scale, 1000, samples); filter.addSamples(samples); - const double percentages[] = - { - 5.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 95.0 - }; + const double percentages[] = {5.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 95.0}; { // Test that marginal likelihood confidence intervals are // what we'd expect for various variance scales. TMeanAccumulator error; - for (std::size_t i = 0u; i < boost::size(percentages); ++i) - { + for (std::size_t i = 0u; i < boost::size(percentages); ++i) { double q1, q2; filter.marginalLikelihoodQuantileForTest(50.0 - percentages[i] / 2.0, 1e-3, q1); filter.marginalLikelihoodQuantileForTest(50.0 + percentages[i] / 2.0, 1e-3, q2); TDoubleDoublePr interval = filter.marginalLikelihoodConfidenceInterval(percentages[i]); LOG_DEBUG("[q1, q2] = [" << q1 << ", " << q2 << "]" - << ", interval = " << core::CContainerPrinter::print(interval)); + << ", interval = " << core::CContainerPrinter::print(interval)); CPPUNIT_ASSERT_DOUBLES_EQUAL(q1, interval.first, 0.02); CPPUNIT_ASSERT_DOUBLES_EQUAL(q2, interval.second, 0.02); error.add(std::fabs(interval.first - q1)); @@ -523,21 +453,18 @@ void CGammaRateConjugateTest::testMarginalLikelihood() maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); TDouble4Vec weight(1, 1.0); TMeanAccumulator totalError; - for (std::size_t i = 0u; i < boost::size(varianceScales); ++i) - { + for (std::size_t i = 0u; i < boost::size(varianceScales); ++i) { TMeanAccumulator error; double vs = varianceScales[i]; weight[0] = vs; LOG_DEBUG("*** vs = " << vs << " ***"); - for (std::size_t j = 0u; j < boost::size(percentages); ++j) - { + for (std::size_t j = 0u; j < boost::size(percentages); ++j) { boost::math::gamma_distribution<> scaledGamma(shape / vs, vs * scale); double q1 = boost::math::quantile(scaledGamma, (50.0 - percentages[j] / 2.0) / 100.0); double q2 = boost::math::quantile(scaledGamma, (50.0 + percentages[j] / 2.0) / 100.0); - TDoubleDoublePr interval = - filter.marginalLikelihoodConfidenceInterval(percentages[j], weightStyle, weight); + TDoubleDoublePr interval = filter.marginalLikelihoodConfidenceInterval(percentages[j], weightStyle, weight); LOG_DEBUG("[q1, q2] = [" << q1 << ", " << q2 << "]" - << ", interval = " << core::CContainerPrinter::print(interval)); + << ", interval = " << core::CContainerPrinter::print(interval)); CPPUNIT_ASSERT_DOUBLES_EQUAL(q1, interval.first, 0.4); CPPUNIT_ASSERT_DOUBLES_EQUAL(q2, interval.second, 0.4); error.add(std::fabs(interval.first - q1)); @@ -552,8 +479,7 @@ void CGammaRateConjugateTest::testMarginalLikelihood() } } -void CGammaRateConjugateTest::testMarginalLikelihoodMean() -{ +void CGammaRateConjugateTest::testMarginalLikelihoodMean() { LOG_DEBUG("+-------------------------------------------------------+"); LOG_DEBUG("| CGammaRateConjugateTest::testMarginalLikelihoodMean |"); LOG_DEBUG("+-------------------------------------------------------+"); @@ -561,17 +487,14 @@ void CGammaRateConjugateTest::testMarginalLikelihoodMean() // Test that the expectation of the marginal likelihood matches // the expected mean of the marginal likelihood. - const double shapes[] = { 5.0, 20.0, 40.0 }; - const double scales[] = { 1.0, 10.0, 20.0 }; + const double shapes[] = {5.0, 20.0, 40.0}; + const double scales[] = {1.0, 10.0, 20.0}; test::CRandomNumbers rng; - for (std::size_t i = 0u; i < boost::size(shapes); ++i) - { - for (std::size_t j = 0u; j < boost::size(scales); ++j) - { - LOG_DEBUG("*** shape = " << shapes[i] - << ", scale = " << scales[j] << " ***"); + for (std::size_t i = 0u; i < boost::size(shapes); ++i) { + for (std::size_t j = 0u; j < boost::size(scales); ++j) { + LOG_DEBUG("*** shape = " << shapes[i] << ", scale = " << scales[j] << " ***"); CGammaRateConjugate filter(makePrior()); @@ -582,31 +505,25 @@ void CGammaRateConjugateTest::testMarginalLikelihoodMean() TDoubleVec samples; rng.generateGammaSamples(shapes[i], scales[j], 100, samples); - for (std::size_t k = 0u; k < samples.size(); ++k) - { + for (std::size_t k = 0u; k < samples.size(); ++k) { filter.addSamples(TDouble1Vec(1, samples[k])); double expectedMean; CPPUNIT_ASSERT(filter.marginalLikelihoodMeanForTest(expectedMean)); - if (k % 10 == 0) - { - LOG_DEBUG("marginalLikelihoodMean = " << filter.marginalLikelihoodMean() - << ", expectedMean = " << expectedMean); + if (k % 10 == 0) { + LOG_DEBUG("marginalLikelihoodMean = " << filter.marginalLikelihoodMean() << ", expectedMean = " << expectedMean); } // The error is mainly due to the truncation in the // integration range used to compute the expected mean. - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMean, - filter.marginalLikelihoodMean(), - 1e-3 * expectedMean); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMean, filter.marginalLikelihoodMean(), 1e-3 * expectedMean); } } } } -void CGammaRateConjugateTest::testMarginalLikelihoodMode() -{ +void CGammaRateConjugateTest::testMarginalLikelihoodMode() { LOG_DEBUG("+-------------------------------------------------------+"); LOG_DEBUG("| CGammaRateConjugateTest::testMarginalLikelihoodMode |"); LOG_DEBUG("+-------------------------------------------------------+"); @@ -614,19 +531,14 @@ void CGammaRateConjugateTest::testMarginalLikelihoodMode() // Test that the marginal likelihood mode is what we'd expect // with variances variance scales. - const double shapes[] = { 5.0, 20.0, 40.0 }; - const double scales[] = { 1.0, 10.0, 20.0 }; - const double varianceScales[] = - { - 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.2, 1.5, 2.0, 2.5, 3.0, 4.0 - }; + const double shapes[] = {5.0, 20.0, 40.0}; + const double scales[] = {1.0, 10.0, 20.0}; + const double varianceScales[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.2, 1.5, 2.0, 2.5, 3.0, 4.0}; test::CRandomNumbers rng; - for (std::size_t i = 0u; i < boost::size(shapes); ++i) - { - for (std::size_t j = 0u; j < boost::size(scales); ++j) - { + for (std::size_t i = 0u; i < boost::size(shapes); ++i) { + for (std::size_t j = 0u; j < boost::size(scales); ++j) { LOG_DEBUG("*** shape = " << shapes[i] << ", scale = " << scales[j] << " ***"); CGammaRateConjugate filter(makePrior()); @@ -638,17 +550,14 @@ void CGammaRateConjugateTest::testMarginalLikelihoodMode() TMeanAccumulator relativeError; maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); TDouble4Vec weight(1, 1.0); - for (std::size_t k = 0u; k < boost::size(varianceScales); ++k) - { + for (std::size_t k = 0u; k < boost::size(varianceScales); ++k) { double vs = varianceScales[k]; weight[0] = vs; boost::math::gamma_distribution<> scaledGamma(shapes[i] / vs, vs * scales[j]); double expectedMode = boost::math::mode(scaledGamma); LOG_DEBUG("marginalLikelihoodMode = " << filter.marginalLikelihoodMode(weightStyle, weight) - << ", expectedMode = " << expectedMode); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMode, - filter.marginalLikelihoodMode(weightStyle, weight), - 0.28 * expectedMode + 0.3); + << ", expectedMode = " << expectedMode); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMode, filter.marginalLikelihoodMode(weightStyle, weight), 0.28 * expectedMode + 0.3); double error = std::fabs(filter.marginalLikelihoodMode(weightStyle, weight) - expectedMode); relativeError.add(error == 0.0 ? 0.0 : error / expectedMode); } @@ -658,8 +567,7 @@ void CGammaRateConjugateTest::testMarginalLikelihoodMode() } } -void CGammaRateConjugateTest::testMarginalLikelihoodVariance() -{ +void CGammaRateConjugateTest::testMarginalLikelihoodVariance() { LOG_DEBUG("+-----------------------------------------------------------+"); LOG_DEBUG("| CGammaRateConjugateTest::testMarginalLikelihoodVariance |"); LOG_DEBUG("+-----------------------------------------------------------+"); @@ -668,17 +576,14 @@ void CGammaRateConjugateTest::testMarginalLikelihoodVariance() // the marginal likelihood matches the expected variance of the // marginal likelihood. - const double shapes[] = { 5.0, 20.0, 40.0 }; - const double scales[] = { 1.0, 10.0, 20.0 }; + const double shapes[] = {5.0, 20.0, 40.0}; + const double scales[] = {1.0, 10.0, 20.0}; test::CRandomNumbers rng; - for (std::size_t i = 0u; i < boost::size(shapes); ++i) - { - for (std::size_t j = 0u; j < boost::size(scales); ++j) - { - LOG_DEBUG("*** shape = " << shapes[i] - << ", scale = " << scales[j] << " ***"); + for (std::size_t i = 0u; i < boost::size(shapes); ++i) { + for (std::size_t j = 0u; j < boost::size(scales); ++j) { + LOG_DEBUG("*** shape = " << shapes[i] << ", scale = " << scales[j] << " ***"); CGammaRateConjugate filter(makePrior()); @@ -691,27 +596,22 @@ void CGammaRateConjugateTest::testMarginalLikelihoodVariance() TMeanAccumulator relativeError; - for (std::size_t k = 0u; k < samples.size(); ++k) - { + for (std::size_t k = 0u; k < samples.size(); ++k) { filter.addSamples(TDouble1Vec(1, samples[k])); double expectedVariance; CPPUNIT_ASSERT(filter.marginalLikelihoodVarianceForTest(expectedVariance)); - if (k % 10 == 0) - { + if (k % 10 == 0) { LOG_DEBUG("marginalLikelihoodVariance = " << filter.marginalLikelihoodVariance() - << ", expectedVariance = " << expectedVariance); + << ", expectedVariance = " << expectedVariance); } // The error is mainly due to the truncation in the // integration range used to compute the expected mean. - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedVariance, - filter.marginalLikelihoodVariance(), - 0.01 * expectedVariance); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedVariance, filter.marginalLikelihoodVariance(), 0.01 * expectedVariance); - relativeError.add(std::fabs(expectedVariance - filter.marginalLikelihoodVariance()) - / expectedVariance); + relativeError.add(std::fabs(expectedVariance - filter.marginalLikelihoodVariance()) / expectedVariance); } LOG_DEBUG("relativeError = " << maths::CBasicStatistics::mean(relativeError)); @@ -720,8 +620,7 @@ void CGammaRateConjugateTest::testMarginalLikelihoodVariance() } } -void CGammaRateConjugateTest::testSampleMarginalLikelihood() -{ +void CGammaRateConjugateTest::testSampleMarginalLikelihood() { LOG_DEBUG("+---------------------------------------------------------+"); LOG_DEBUG("| CGammaRateConjugateTest::testSampleMarginalLikelihood |"); LOG_DEBUG("+---------------------------------------------------------+"); @@ -751,8 +650,7 @@ void CGammaRateConjugateTest::testSampleMarginalLikelihood() TMeanVarAccumulator sampleMeanVar; - for (std::size_t i = 0u; i < 3; ++i) - { + for (std::size_t i = 0u; i < 3; ++i) { sampleMeanVar.add(samples[i]); filter.addSamples(TDouble1Vec(1, samples[i])); @@ -764,17 +662,15 @@ void CGammaRateConjugateTest::testSampleMarginalLikelihood() sampledMeanVar = std::for_each(sampled.begin(), sampled.end(), sampledMeanVar); CPPUNIT_ASSERT_EQUAL(i + 1, sampled.size()); - CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(sampleMeanVar), - maths::CBasicStatistics::mean(sampledMeanVar), eps); - CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::variance(sampleMeanVar), - maths::CBasicStatistics::variance(sampledMeanVar), eps); + CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(sampleMeanVar), maths::CBasicStatistics::mean(sampledMeanVar), eps); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + maths::CBasicStatistics::variance(sampleMeanVar), maths::CBasicStatistics::variance(sampledMeanVar), eps); } TMeanAccumulator meanVarError; std::size_t numberSampled = 20u; - for (std::size_t i = 3u; i < samples.size(); ++i) - { + for (std::size_t i = 3u; i < samples.size(); ++i) { filter.addSamples(TDouble1Vec(1, samples[i])); sampled.clear(); @@ -785,32 +681,26 @@ void CGammaRateConjugateTest::testSampleMarginalLikelihood() sampledMoments = std::for_each(sampled.begin(), sampled.end(), sampledMoments); LOG_DEBUG("expectedMean = " << filter.marginalLikelihoodMean() - << ", sampledMean = " << maths::CBasicStatistics::mean(sampledMoments)); + << ", sampledMean = " << maths::CBasicStatistics::mean(sampledMoments)); LOG_DEBUG("expectedVar = " << filter.marginalLikelihoodVariance() - << ", sampledVar = " << maths::CBasicStatistics::variance(sampledMoments)); + << ", sampledVar = " << maths::CBasicStatistics::variance(sampledMoments)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(filter.marginalLikelihoodMean(), - maths::CBasicStatistics::mean(sampledMoments), - 1e-8); + CPPUNIT_ASSERT_DOUBLES_EQUAL(filter.marginalLikelihoodMean(), maths::CBasicStatistics::mean(sampledMoments), 1e-8); CPPUNIT_ASSERT_DOUBLES_EQUAL(filter.marginalLikelihoodVariance(), maths::CBasicStatistics::variance(sampledMoments), 0.25 * filter.marginalLikelihoodVariance()); - meanVarError.add( std::fabs( filter.marginalLikelihoodVariance() - - maths::CBasicStatistics::variance(sampledMoments)) - / filter.marginalLikelihoodVariance()); + meanVarError.add(std::fabs(filter.marginalLikelihoodVariance() - maths::CBasicStatistics::variance(sampledMoments)) / + filter.marginalLikelihoodVariance()); std::sort(sampled.begin(), sampled.end()); - for (std::size_t j = 1u; j < sampled.size(); ++j) - { - double q = 100.0 * static_cast(j) - / static_cast(numberSampled); + for (std::size_t j = 1u; j < sampled.size(); ++j) { + double q = 100.0 * static_cast(j) / static_cast(numberSampled); double expectedQuantile; CPPUNIT_ASSERT(filter.marginalLikelihoodQuantileForTest(q, eps, expectedQuantile)); - LOG_DEBUG("quantile = " << q - << ", x_quantile = " << expectedQuantile - << ", quantile range = [" << sampled[j - 1u] << "," << sampled[j] << "]"); + LOG_DEBUG("quantile = " << q << ", x_quantile = " << expectedQuantile << ", quantile range = [" << sampled[j - 1u] << "," + << sampled[j] << "]"); CPPUNIT_ASSERT(expectedQuantile >= sampled[j - 1u]); CPPUNIT_ASSERT(expectedQuantile <= sampled[j]); @@ -821,8 +711,7 @@ void CGammaRateConjugateTest::testSampleMarginalLikelihood() CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanVarError) < 0.025); } -void CGammaRateConjugateTest::testCdf() -{ +void CGammaRateConjugateTest::testCdf() { LOG_DEBUG("+------------------------------------+"); LOG_DEBUG("| CGammaRateConjugateTest::testCdf |"); LOG_DEBUG("+------------------------------------+"); @@ -856,13 +745,11 @@ void CGammaRateConjugateTest::testCdf() double f = (lowerBound + upperBound) / 2.0; CPPUNIT_ASSERT(filter.minusLogJointCdfComplement(TDouble1Vec(1, -1.0), lowerBound, upperBound)); double fComplement = (lowerBound + upperBound) / 2.0; - LOG_DEBUG("log(F(x)) = " << -f - << ", log(1 - F(x)) = " << fComplement); + LOG_DEBUG("log(F(x)) = " << -f << ", log(1 - F(x)) = " << fComplement); CPPUNIT_ASSERT_DOUBLES_EQUAL(std::log(std::numeric_limits::min()), -f, 1e-10); CPPUNIT_ASSERT_EQUAL(1.0, std::exp(-fComplement)); - for (std::size_t i = 1u; i < 500; ++i) - { + for (std::size_t i = 1u; i < 500; ++i) { double x = static_cast(i) / 5.0; CPPUNIT_ASSERT(filter.minusLogJointCdf(TDouble1Vec(1, x), lowerBound, upperBound)); @@ -870,14 +757,12 @@ void CGammaRateConjugateTest::testCdf() CPPUNIT_ASSERT(filter.minusLogJointCdfComplement(TDouble1Vec(1, x), lowerBound, upperBound)); fComplement = (lowerBound + upperBound) / 2.0; - LOG_DEBUG("log(F(x)) = " << (f == 0.0 ? f : -f) - << ", log(1 - F(x)) = " << (fComplement == 0.0 ? fComplement : -fComplement)); + LOG_DEBUG("log(F(x)) = " << (f == 0.0 ? f : -f) << ", log(1 - F(x)) = " << (fComplement == 0.0 ? fComplement : -fComplement)); CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, std::exp(-f) + std::exp(-fComplement), 1e-10); } } -void CGammaRateConjugateTest::testProbabilityOfLessLikelySamples() -{ +void CGammaRateConjugateTest::testProbabilityOfLessLikelySamples() { LOG_DEBUG("+---------------------------------------------------------------+"); LOG_DEBUG("| CGammaRateConjugateTest::testProbabilityOfLessLikelySamples |"); LOG_DEBUG("+---------------------------------------------------------------+"); @@ -889,18 +774,16 @@ void CGammaRateConjugateTest::testProbabilityOfLessLikelySamples() // We also check that the tail calculation attributes samples to // the appropriate tail of the distribution. - const double shapes[] = { 0.4, 10.0, 200.0 }; - const double scales[] = { 0.1, 5.0, 50.0 }; - const double vs[] = { 0.5, 1.0, 2.0 }; + const double shapes[] = {0.4, 10.0, 200.0}; + const double scales[] = {0.1, 5.0, 50.0}; + const double vs[] = {0.5, 1.0, 2.0}; test::CRandomNumbers rng; TMeanAccumulator meanError; - for (size_t i = 0; i < boost::size(shapes); ++i) - { - for (size_t j = 0; j < boost::size(scales); ++j) - { + for (size_t i = 0; i < boost::size(shapes); ++i) { + for (size_t j = 0; j < boost::size(scales); ++j) { LOG_DEBUG("shape = " << shapes[i] << ", scale = " << scales[j]); TDoubleVec samples; @@ -913,8 +796,7 @@ void CGammaRateConjugateTest::testProbabilityOfLessLikelySamples() double rate_ = filter.likelihoodRate(); TDoubleVec likelihoods; - for (std::size_t k = 0u; k < samples.size(); ++k) - { + for (std::size_t k = 0u; k < samples.size(); ++k) { double likelihood; filter.jointLogMarginalLikelihood(TDouble1Vec(1, samples[k]), likelihood); likelihoods.push_back(likelihood); @@ -922,27 +804,22 @@ void CGammaRateConjugateTest::testProbabilityOfLessLikelySamples() std::sort(likelihoods.begin(), likelihoods.end()); boost::math::gamma_distribution<> gamma(shape_, 1.0 / rate_); - for (std::size_t k = 1u; k < 10; ++k) - { + for (std::size_t k = 1u; k < 10; ++k) { double x = boost::math::quantile(gamma, static_cast(k) / 10.0); TDouble1Vec sample(1, x); double fx; filter.jointLogMarginalLikelihood(sample, fx); - double px = static_cast(std::lower_bound(likelihoods.begin(), - likelihoods.end(), fx) - - likelihoods.begin()) - / static_cast(likelihoods.size()); + double px = static_cast(std::lower_bound(likelihoods.begin(), likelihoods.end(), fx) - likelihoods.begin()) / + static_cast(likelihoods.size()); double lb, ub; filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, sample, lb, ub); double ssd = std::sqrt(px * (1.0 - px) / static_cast(samples.size())); - LOG_DEBUG("expected P(x) = " << px - << ", actual P(x) = " << (lb + ub) / 2.0 - << " sample sd = " << ssd); + LOG_DEBUG("expected P(x) = " << px << ", actual P(x) = " << (lb + ub) / 2.0 << " sample sd = " << ssd); CPPUNIT_ASSERT_DOUBLES_EQUAL(px, (lb + ub) / 2.0, 3.0 * ssd); @@ -951,11 +828,9 @@ void CGammaRateConjugateTest::testProbabilityOfLessLikelySamples() maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); - for (std::size_t k = 0u; k < boost::size(vs); ++k) - { - double mode = filter.marginalLikelihoodMode(weightStyle, - TDouble1Vec(1, vs[k])); - double ss[] = { 0.9 * mode, 1.1 * mode }; + for (std::size_t k = 0u; k < boost::size(vs); ++k) { + double mode = filter.marginalLikelihoodMode(weightStyle, TDouble1Vec(1, vs[k])); + double ss[] = {0.9 * mode, 1.1 * mode}; LOG_DEBUG("vs = " << vs[k] << ", mode = " << mode); @@ -963,59 +838,58 @@ void CGammaRateConjugateTest::testProbabilityOfLessLikelySamples() maths_t::ETail tail; { - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - weightStyle, - TDouble1Vec(1, ss[0]), - TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), - lb, ub, tail); + filter.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, weightStyle, TDouble1Vec(1, ss[0]), TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); - if (mode > 0.0) - { + if (mode > 0.0) { filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, weightStyle, TDouble1Vec(ss, ss + 2), TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, ub, tail); + lb, + ub, + tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); filter.probabilityOfLessLikelySamples(maths_t::E_OneSidedBelow, weightStyle, TDouble1Vec(ss, ss + 2), TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, ub, tail); + lb, + ub, + tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); filter.probabilityOfLessLikelySamples(maths_t::E_OneSidedAbove, weightStyle, TDouble1Vec(ss, ss + 2), TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, ub, tail); + lb, + ub, + tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } } - if (mode > 0.0) - { - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - weightStyle, - TDouble1Vec(1, ss[1]), - TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), - lb, ub, tail); + if (mode > 0.0) { + filter.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, weightStyle, TDouble1Vec(1, ss[1]), TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - weightStyle, - TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, ub, tail); + filter.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, weightStyle, TDouble1Vec(ss, ss + 2), TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); filter.probabilityOfLessLikelySamples(maths_t::E_OneSidedBelow, weightStyle, TDouble1Vec(ss, ss + 2), TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, ub, tail); + lb, + ub, + tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); filter.probabilityOfLessLikelySamples(maths_t::E_OneSidedAbove, weightStyle, TDouble1Vec(ss, ss + 2), TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, ub, tail); + lb, + ub, + tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } } @@ -1026,8 +900,7 @@ void CGammaRateConjugateTest::testProbabilityOfLessLikelySamples() CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError) < 0.01); } -void CGammaRateConjugateTest::testAnomalyScore() -{ +void CGammaRateConjugateTest::testAnomalyScore() { LOG_DEBUG("+---------------------------------------------+"); LOG_DEBUG("| CGammaRateConjugateTest::testAnomalyScore |"); LOG_DEBUG("+---------------------------------------------+"); @@ -1041,15 +914,15 @@ void CGammaRateConjugateTest::testAnomalyScore() using TUIntVec = std::vector; - const double decayRates[] = { 0.0, 0.001, 0.01 }; + const double decayRates[] = {0.0, 0.001, 0.01}; - const double shapes[] = { 0.4, 10.0, 200.0 }; - const double scales[] = { 0.1, 5.0, 50.0 }; + const double shapes[] = {0.4, 10.0, 200.0}; + const double scales[] = {0.1, 5.0, 50.0}; const double threshold = 0.02; - const unsigned int anomalyTimes[] = { 30u, 120u, 300u, 420u }; - const double anomalies[] = { 4.0, 5.0, 10.0, 15.0, 0.0 }; + const unsigned int anomalyTimes[] = {30u, 120u, 300u, 420u}; + const double anomalies[] = {4.0, 5.0, 10.0, 15.0, 0.0}; test::CRandomNumbers rng; @@ -1059,12 +932,10 @@ void CGammaRateConjugateTest::testAnomalyScore() file.open("results.m"); double totalFalsePositiveRate = 0.0; - std::size_t totalPositives[] = { 0u, 0u, 0u }; + std::size_t totalPositives[] = {0u, 0u, 0u}; - for (size_t i = 0; i < boost::size(shapes); ++i) - { - for (size_t j = 0; j < boost::size(scales); ++j) - { + for (size_t i = 0; i < boost::size(shapes); ++i) { + for (size_t j = 0; j < boost::size(scales); ++j) { LOG_DEBUG("shape = " << shapes[i] << ", scale = " << scales[j]); boost::math::gamma_distribution<> gamma(shapes[i], scales[j]); @@ -1072,8 +943,7 @@ void CGammaRateConjugateTest::testAnomalyScore() TDoubleVec samples; rng.generateGammaSamples(shapes[i], scales[j], 500, samples); - for (size_t k = 0; k < boost::size(decayRates); ++k) - { + for (size_t k = 0; k < boost::size(decayRates); ++k) { CGammaRateConjugate filter(makePrior(maths_t::E_ContinuousData, 0.0, decayRates[k])); ++test; @@ -1084,21 +954,18 @@ void CGammaRateConjugateTest::testAnomalyScore() scores << "score" << test << " = ["; TUIntVec candidateAnomalies; - for (unsigned int time = 0; time < samples.size(); ++time) - { - double sample = samples[time] - + (anomalies[std::find(boost::begin(anomalyTimes), - boost::end(anomalyTimes), time) - - boost::begin(anomalyTimes)] - * boost::math::standard_deviation(gamma)); + for (unsigned int time = 0; time < samples.size(); ++time) { + double sample = + samples[time] + + (anomalies[std::find(boost::begin(anomalyTimes), boost::end(anomalyTimes), time) - boost::begin(anomalyTimes)] * + boost::math::standard_deviation(gamma)); TDouble1Vec sampleVec(1, sample); filter.addSamples(sampleVec); double score; filter.anomalyScore(maths_t::E_TwoSided, sampleVec, score); - if (score > threshold) - { + if (score > threshold) { candidateAnomalies.push_back(time); } @@ -1110,8 +977,7 @@ void CGammaRateConjugateTest::testAnomalyScore() x << "];\n"; scores << "];\n"; - file << x.str() << scores.str() - << "plot(x" << test << ", score" << test << ");\n" + file << x.str() << scores.str() << "plot(x" << test << ", score" << test << ");\n" << "input(\"Hit any key for next test\");\n\n"; TUIntVec falsePositives; @@ -1121,9 +987,7 @@ void CGammaRateConjugateTest::testAnomalyScore() boost::end(anomalyTimes), std::back_inserter(falsePositives)); - double falsePositiveRate = - static_cast(falsePositives.size()) - / static_cast(samples.size()); + double falsePositiveRate = static_cast(falsePositives.size()) / static_cast(samples.size()); totalFalsePositiveRate += falsePositiveRate; @@ -1134,8 +998,7 @@ void CGammaRateConjugateTest::testAnomalyScore() boost::end(anomalyTimes), std::back_inserter(positives)); - LOG_DEBUG("falsePositiveRate = " << falsePositiveRate - << ", positives = " << positives.size()); + LOG_DEBUG("falsePositiveRate = " << falsePositiveRate << ", positives = " << positives.size()); // False alarm rate should be less than 0.6%. CPPUNIT_ASSERT(falsePositiveRate <= 0.006); @@ -1151,8 +1014,7 @@ void CGammaRateConjugateTest::testAnomalyScore() totalFalsePositiveRate /= static_cast(test); LOG_DEBUG("totalFalsePositiveRate = " << totalFalsePositiveRate); - for (size_t i = 0; i < boost::size(totalPositives); ++i) - { + for (size_t i = 0; i < boost::size(totalPositives); ++i) { LOG_DEBUG("positives = " << totalPositives[i]); CPPUNIT_ASSERT(totalPositives[i] >= 24); @@ -1162,8 +1024,7 @@ void CGammaRateConjugateTest::testAnomalyScore() CPPUNIT_ASSERT(totalFalsePositiveRate < 0.0011); } -void CGammaRateConjugateTest::testOffset() -{ +void CGammaRateConjugateTest::testOffset() { LOG_DEBUG("+---------------------------------------+"); LOG_DEBUG("| CGammaRateConjugateTest::testOffset |"); LOG_DEBUG("+---------------------------------------+"); @@ -1171,13 +1032,9 @@ void CGammaRateConjugateTest::testOffset() // The idea of this test is to check that the offset correctly cancels // out a translation applied to a gamma distributed data set. - const maths_t::EDataType dataTypes[] = - { - maths_t::E_IntegerData, - maths_t::E_ContinuousData - }; - const double offsets[] = { -0.5, 0.5 }; - const double decayRates[] = { 0.0, 0.001, 0.01 }; + const maths_t::EDataType dataTypes[] = {maths_t::E_IntegerData, maths_t::E_ContinuousData}; + const double offsets[] = {-0.5, 0.5}; + const double decayRates[] = {0.0, 0.001, 0.01}; const double shape = 5.0; const double scale = 1.0; @@ -1189,17 +1046,13 @@ void CGammaRateConjugateTest::testOffset() TDoubleVec samples; rng.generateGammaSamples(shape, scale, 100, samples); - for (size_t i = 0; i < boost::size(dataTypes); ++i) - { - for (size_t j = 0; j < boost::size(offsets); ++j) - { - for (size_t k = 0; k < boost::size(decayRates); ++k) - { + for (size_t i = 0; i < boost::size(dataTypes); ++i) { + for (size_t j = 0; j < boost::size(offsets); ++j) { + for (size_t k = 0; k < boost::size(decayRates); ++k) { CGammaRateConjugate filter1(makePrior(dataTypes[i], offsets[j], decayRates[k])); CGammaRateConjugate filter2(makePrior(dataTypes[i], 0.0, decayRates[k])); - for (std::size_t l = 0u; l < samples.size(); ++l) - { + for (std::size_t l = 0u; l < samples.size(); ++l) { double offsetSample = samples[l] - offsets[j]; TDouble1Vec offsetSampleVec(1, offsetSample); filter1.addSamples(offsetSampleVec); @@ -1213,20 +1066,14 @@ void CGammaRateConjugateTest::testOffset() double likelihood1; filter1.jointLogMarginalLikelihood(offsetSampleVec, likelihood1); double lowerBound1, upperBound1; - filter1.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - offsetSampleVec, - lowerBound1, - upperBound1); + filter1.probabilityOfLessLikelySamples(maths_t::E_TwoSided, offsetSampleVec, lowerBound1, upperBound1); CPPUNIT_ASSERT_EQUAL(lowerBound1, upperBound1); double probability1 = (lowerBound1 + upperBound1) / 2.0; double likelihood2; filter2.jointLogMarginalLikelihood(sample, likelihood2); double lowerBound2, upperBound2; - filter2.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - sample, - lowerBound2, - upperBound2); + filter2.probabilityOfLessLikelySamples(maths_t::E_TwoSided, sample, lowerBound2, upperBound2); CPPUNIT_ASSERT_EQUAL(lowerBound2, upperBound2); double probability2 = (lowerBound2 + upperBound2) / 2.0; @@ -1242,8 +1089,7 @@ void CGammaRateConjugateTest::testOffset() } } -void CGammaRateConjugateTest::testIntegerData() -{ +void CGammaRateConjugateTest::testIntegerData() { LOG_DEBUG("+--------------------------------------------+"); LOG_DEBUG("| CGammaRateConjugateTest::testIntegerData |"); LOG_DEBUG("+--------------------------------------------+"); @@ -1253,14 +1099,12 @@ void CGammaRateConjugateTest::testIntegerData() // The idea of this test is to check that the inferred model agrees in the // limit (large n) with the model inferred from such data. - const double shapes[] = { 0.2, 1.0, 4.5 }; - const double scales[] = { 0.2, 1.0, 4.5 }; + const double shapes[] = {0.2, 1.0, 4.5}; + const double scales[] = {0.2, 1.0, 4.5}; const std::size_t nSamples = 25000u; - for (size_t i = 0; i < boost::size(shapes); ++i) - { - for (size_t j = 0; j < boost::size(scales); ++j) - { + for (size_t i = 0; i < boost::size(shapes); ++i) { + for (size_t j = 0; j < boost::size(scales); ++j) { test::CRandomNumbers rng; TDoubleVec samples; @@ -1272,8 +1116,7 @@ void CGammaRateConjugateTest::testIntegerData() CGammaRateConjugate filter1(makePrior(maths_t::E_IntegerData, 0.1)); CGammaRateConjugate filter2(makePrior(maths_t::E_ContinuousData, 0.1)); - for (std::size_t k = 0u; k < nSamples; ++k) - { + for (std::size_t k = 0u; k < nSamples; ++k) { double x = std::floor(samples[k]); TDouble1Vec sample(1, x); @@ -1291,10 +1134,8 @@ void CGammaRateConjugateTest::testIntegerData() TMeanAccumulator meanError; - for (size_t i = 0; i < boost::size(shapes); ++i) - { - for (size_t j = 0; j < boost::size(scales); ++j) - { + for (size_t i = 0; i < boost::size(shapes); ++i) { + for (size_t j = 0; j < boost::size(scales); ++j) { test::CRandomNumbers rng; TDoubleVec seedSamples; @@ -1314,8 +1155,7 @@ void CGammaRateConjugateTest::testIntegerData() TMeanAccumulator meanProbability1; TMeanAccumulator meanProbability2; - for (std::size_t k = 0u; k < nSamples; ++k) - { + for (std::size_t k = 0u; k < nSamples; ++k) { double x = std::floor(samples[k]); TDouble1Vec sample(1, x); @@ -1336,10 +1176,7 @@ void CGammaRateConjugateTest::testIntegerData() double p1 = maths::CBasicStatistics::mean(meanProbability1); double p2 = maths::CBasicStatistics::mean(meanProbability2); - LOG_DEBUG("shape = " << shapes[i] - << ", rate = " << scales[j] - << ", p1 = " << p1 - << ", p2 = " << p2); + LOG_DEBUG("shape = " << shapes[i] << ", rate = " << scales[j] << ", p1 = " << p1 << ", p2 = " << p2); CPPUNIT_ASSERT_DOUBLES_EQUAL(p1, p2, 0.15 * p1); meanError.add(fabs(p1 - p2)); @@ -1350,45 +1187,37 @@ void CGammaRateConjugateTest::testIntegerData() CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError) < 0.016); } -void CGammaRateConjugateTest::testLowVariationData() -{ +void CGammaRateConjugateTest::testLowVariationData() { LOG_DEBUG("+-------------------------------------------------+"); LOG_DEBUG("| CGammaRateConjugateTest::testLowVariationData |"); LOG_DEBUG("+-------------------------------------------------+"); { CGammaRateConjugate filter(makePrior(maths_t::E_IntegerData)); - for (std::size_t i = 0u; i < 100; ++i) - { + for (std::size_t i = 0u; i < 100; ++i) { filter.addSamples(TDouble1Vec(1, 430.0)); } TDoubleDoublePr interval = filter.marginalLikelihoodConfidenceInterval(68.0); double sigma = (interval.second - interval.first) / 2.0; - LOG_DEBUG("68% confidence interval " - << core::CContainerPrinter::print(interval) - << ", approximate variance = " << sigma * sigma); + LOG_DEBUG("68% confidence interval " << core::CContainerPrinter::print(interval) << ", approximate variance = " << sigma * sigma); CPPUNIT_ASSERT_DOUBLES_EQUAL(12.0, 1.0 / (sigma * sigma), 0.5); } { CGammaRateConjugate filter(makePrior(maths_t::E_ContinuousData)); - for (std::size_t i = 0u; i < 100; ++i) - { + for (std::size_t i = 0u; i < 100; ++i) { filter.addSamples(TDouble1Vec(1, 430.0)); } TDoubleDoublePr interval = filter.marginalLikelihoodConfidenceInterval(68.0); double sigma = (interval.second - interval.first) / 2.0; - LOG_DEBUG("68% confidence interval " - << core::CContainerPrinter::print(interval) - << ", approximate s.t.d. = " << sigma); + LOG_DEBUG("68% confidence interval " << core::CContainerPrinter::print(interval) << ", approximate s.t.d. = " << sigma); CPPUNIT_ASSERT_DOUBLES_EQUAL(1e-4, sigma / 430.5, 5e-6); } } -void CGammaRateConjugateTest::testPersist() -{ +void CGammaRateConjugateTest::testPersist() { LOG_DEBUG("+----------------------------------------+"); LOG_DEBUG("| CGammaRateConjugateTest::testPersist |"); LOG_DEBUG("+----------------------------------------+"); @@ -1399,11 +1228,9 @@ void CGammaRateConjugateTest::testPersist() rng.generateGammaSamples(1.0, 3.0, 500, samples); maths::CGammaRateConjugate origFilter(makePrior(maths_t::E_ContinuousData, 0.1)); - for (std::size_t i = 0u; i < samples.size(); ++i) - { - origFilter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, samples[i]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0))); + for (std::size_t i = 0u; i < samples.size(); ++i) { + origFilter.addSamples( + maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), TDouble1Vec(1, samples[i]), TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0))); } double decayRate = origFilter.decayRate(); @@ -1429,8 +1256,7 @@ void CGammaRateConjugateTest::testPersist() maths::CGammaRateConjugate restoredFilter(params, traverser); uint64_t checksum = origFilter.checksum(); - LOG_DEBUG("orig checksum = " << checksum - << " restored checksum = " << restoredFilter.checksum()); + LOG_DEBUG("orig checksum = " << checksum << " restored checksum = " << restoredFilter.checksum()); CPPUNIT_ASSERT_EQUAL(checksum, restoredFilter.checksum()); // The XML representation of the new filter should be the same as the original @@ -1443,8 +1269,7 @@ void CGammaRateConjugateTest::testPersist() CPPUNIT_ASSERT_EQUAL(origXml, newXml); } -void CGammaRateConjugateTest::testVarianceScale() -{ +void CGammaRateConjugateTest::testVarianceScale() { LOG_DEBUG("+----------------------------------------------+"); LOG_DEBUG("| CGammaRateConjugateTest::testVarianceScale |"); LOG_DEBUG("+----------------------------------------------+"); @@ -1467,21 +1292,13 @@ void CGammaRateConjugateTest::testVarianceScale() // Finally, we test update with scaled samples produces the // correct posterior. - maths_t::ESampleWeightStyle scales[] = - { - maths_t::E_SampleSeasonalVarianceScaleWeight, - maths_t::E_SampleCountVarianceScaleWeight - }; + maths_t::ESampleWeightStyle scales[] = {maths_t::E_SampleSeasonalVarianceScaleWeight, maths_t::E_SampleCountVarianceScaleWeight}; - for (std::size_t s = 0u; s < boost::size(scales); ++s) - { + for (std::size_t s = 0u; s < boost::size(scales); ++s) { const double shape = 3.0; const double scale = 3.0; - const double varianceScales[] = - { - 0.20, 0.50, 0.75, 1.50, 2.00, 5.00 - }; + const double varianceScales[] = {0.20, 0.50, 0.75, 1.50, 2.00, 5.00}; test::CRandomNumbers rng; @@ -1489,10 +1306,7 @@ void CGammaRateConjugateTest::testVarianceScale() LOG_DEBUG("****** probabilityOfLessLikelySamples ******"); { - const double percentiles[] = - { - 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0 - }; + const double percentiles[] = {10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0}; const std::size_t nSamples = 1000u; const std::size_t nScaledSamples = 10000u; @@ -1512,41 +1326,33 @@ void CGammaRateConjugateTest::testVarianceScale() TDoubleVec probabilities; probabilities.reserve(nScaledSamples); - for (std::size_t i = 0; i < unscaledSamples.size(); ++i) - { + for (std::size_t i = 0; i < unscaledSamples.size(); ++i) { TDouble1Vec sample(1, unscaledSamples[i]); double lowerBound, upperBound; - CPPUNIT_ASSERT(filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - sample, - lowerBound, - upperBound)); + CPPUNIT_ASSERT(filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, sample, lowerBound, upperBound)); CPPUNIT_ASSERT_EQUAL(lowerBound, upperBound); double probability = (lowerBound + upperBound) / 2.0; probabilities.push_back(probability); } std::sort(probabilities.begin(), probabilities.end()); - for (size_t i = 0; i < boost::size(percentiles); ++i) - { - std::size_t index = static_cast( - static_cast(nScaledSamples) * percentiles[i]/100.0); - double error = fabs(probabilities[index] - percentiles[i]/100.0); + for (size_t i = 0; i < boost::size(percentiles); ++i) { + std::size_t index = static_cast(static_cast(nScaledSamples) * percentiles[i] / 100.0); + double error = fabs(probabilities[index] - percentiles[i] / 100.0); expectedPercentileErrors.push_back(error); expectedTotalError += error; } } - for (size_t i = 0; i < boost::size(varianceScales); ++i) - { + for (size_t i = 0; i < boost::size(varianceScales); ++i) { LOG_DEBUG("**** variance scale = " << varianceScales[i] << " ****"); double scaledShape = shape / varianceScales[i]; double ss = varianceScales[i] * scale; { boost::math::gamma_distribution<> gamma(scaledShape, ss); - LOG_DEBUG("mean = " << boost::math::mean(gamma) - << ", variance = " << boost::math::variance(gamma)); + LOG_DEBUG("mean = " << boost::math::mean(gamma) << ", variance = " << boost::math::variance(gamma)); } TDoubleVec scaledSamples; @@ -1554,18 +1360,16 @@ void CGammaRateConjugateTest::testVarianceScale() TDoubleVec probabilities; probabilities.reserve(nScaledSamples); - for (std::size_t j = 0; j < scaledSamples.size(); ++j) - { + for (std::size_t j = 0; j < scaledSamples.size(); ++j) { double lowerBound, upperBound; maths_t::ETail tail; - CPPUNIT_ASSERT(filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, - maths_t::TWeightStyleVec(1, scales[s]), - TDouble1Vec(1, scaledSamples[j]), - TDouble4Vec1Vec(1, TDouble4Vec(1, varianceScales[i])), - lowerBound, - upperBound, - tail)); + CPPUNIT_ASSERT(filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, + maths_t::TWeightStyleVec(1, scales[s]), + TDouble1Vec(1, scaledSamples[j]), + TDouble4Vec1Vec(1, TDouble4Vec(1, varianceScales[i])), + lowerBound, + upperBound, + tail)); CPPUNIT_ASSERT_EQUAL(lowerBound, upperBound); double probability = (lowerBound + upperBound) / 2.0; probabilities.push_back(probability); @@ -1573,26 +1377,21 @@ void CGammaRateConjugateTest::testVarianceScale() std::sort(probabilities.begin(), probabilities.end()); double totalError = 0.0; - for (size_t j = 0; j < boost::size(percentiles); ++j) - { - std::size_t index = static_cast( - static_cast(nScaledSamples) * percentiles[j]/100.0); - double error = fabs(probabilities[index] - percentiles[j]/100.0); + for (size_t j = 0; j < boost::size(percentiles); ++j) { + std::size_t index = static_cast(static_cast(nScaledSamples) * percentiles[j] / 100.0); + double error = fabs(probabilities[index] - percentiles[j] / 100.0); totalError += error; double errorThreshold = 0.017 + expectedPercentileErrors[j]; - LOG_DEBUG("percentile = " << percentiles[j] - << ", probability = " << probabilities[index] - << ", error = " << error - << ", error threshold = " << errorThreshold); + LOG_DEBUG("percentile = " << percentiles[j] << ", probability = " << probabilities[index] << ", error = " << error + << ", error threshold = " << errorThreshold); CPPUNIT_ASSERT(error < errorThreshold); } double totalErrorThreshold = 0.1 + expectedTotalError; - LOG_DEBUG("total error = " << totalError - << ", totalError threshold = " << totalErrorThreshold); + LOG_DEBUG("total error = " << totalError << ", totalError threshold = " << totalErrorThreshold); CPPUNIT_ASSERT(totalError < totalErrorThreshold); } @@ -1601,17 +1400,13 @@ void CGammaRateConjugateTest::testVarianceScale() LOG_DEBUG(""); LOG_DEBUG("****** jointLogMarginalLikelihood ******"); - for (size_t i = 0; i < boost::size(varianceScales); ++i) - { + for (size_t i = 0; i < boost::size(varianceScales); ++i) { LOG_DEBUG("**** variance scale = " << varianceScales[i] << " ****"); double scaledShape = shape / varianceScales[i]; double scaledScale = varianceScales[i] * scale; boost::math::gamma_distribution<> gamma(scaledShape, scaledScale); - { - LOG_DEBUG("mean = " << boost::math::mean(gamma) - << ", variance = " << boost::math::variance(gamma)); - } + { LOG_DEBUG("mean = " << boost::math::mean(gamma) << ", variance = " << boost::math::variance(gamma)); } double expectedDifferentialEntropy = maths::CTools::differentialEntropy(gamma); CGammaRateConjugate filter(makePrior()); @@ -1625,45 +1420,36 @@ void CGammaRateConjugateTest::testVarianceScale() TDoubleVec scaledSamples; rng.generateGammaSamples(scaledShape, scaledScale, 50000, scaledSamples); - for (std::size_t j = 0u; j < scaledSamples.size(); ++j) - { + for (std::size_t j = 0u; j < scaledSamples.size(); ++j) { double logLikelihood = 0.0; CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, - filter.jointLogMarginalLikelihood( - maths_t::TWeightStyleVec(1, scales[s]), - TDouble1Vec(1, scaledSamples[j]), - TDouble4Vec1Vec(1, TDouble4Vec(1, varianceScales[i])), - logLikelihood)); + filter.jointLogMarginalLikelihood(maths_t::TWeightStyleVec(1, scales[s]), + TDouble1Vec(1, scaledSamples[j]), + TDouble4Vec1Vec(1, TDouble4Vec(1, varianceScales[i])), + logLikelihood)); differentialEntropy -= logLikelihood; } differentialEntropy /= static_cast(scaledSamples.size()); - LOG_DEBUG("differentialEntropy = " << differentialEntropy - << ", expectedDifferentialEntropy = " << expectedDifferentialEntropy); + LOG_DEBUG("differentialEntropy = " << differentialEntropy << ", expectedDifferentialEntropy = " << expectedDifferentialEntropy); CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedDifferentialEntropy, differentialEntropy, 0.05); } } - const maths_t::EDataType dataTypes[] = - { - maths_t::E_IntegerData, - maths_t::E_ContinuousData - }; + const maths_t::EDataType dataTypes[] = {maths_t::E_IntegerData, maths_t::E_ContinuousData}; - const double maximumMeanError[] = { 0.08, 0.11 }; - const double maximumVarianceError[] = { 1.0, 0.2 }; - const double maximumMeanMeanError[] = { 0.01, 0.01 }; - const double maximumMeanVarianceError[] = { 0.08, 0.05 }; + const double maximumMeanError[] = {0.08, 0.11}; + const double maximumVarianceError[] = {1.0, 0.2}; + const double maximumMeanMeanError[] = {0.01, 0.01}; + const double maximumMeanVarianceError[] = {0.08, 0.05}; - for (std::size_t s = 0u; s < boost::size(scales); ++s) - { - for (std::size_t t = 0u; t < boost::size(dataTypes); ++t) - { - const double shapes[] = { 1.0, 10.0, 100.0, 1000.0, 100000.0, 1000000.0 }; - const double rates[] = { 1.0, 10.0, 100.0, 1000.0, 100000.0, 1000000.0 }; - const double varianceScales[] = { 0.1, 0.5, 1.0, 2.0, 10.0, 100.0 }; + for (std::size_t s = 0u; s < boost::size(scales); ++s) { + for (std::size_t t = 0u; t < boost::size(dataTypes); ++t) { + const double shapes[] = {1.0, 10.0, 100.0, 1000.0, 100000.0, 1000000.0}; + const double rates[] = {1.0, 10.0, 100.0, 1000.0, 100000.0, 1000000.0}; + const double varianceScales[] = {0.1, 0.5, 1.0, 2.0, 10.0, 100.0}; maths_t::TWeightStyleVec weightStyle(1, scales[s]); TDoubleVec samples; @@ -1674,40 +1460,33 @@ void CGammaRateConjugateTest::testVarianceScale() TMeanAccumulator meanMeanError; TMeanAccumulator meanVarianceError; - for (std::size_t i = 0u; i < boost::size(shapes); ++i) - { - for (std::size_t j = 0u; j < boost::size(rates); ++j) - { + for (std::size_t i = 0u; i < boost::size(shapes); ++i) { + for (std::size_t j = 0u; j < boost::size(rates); ++j) { double shape = shapes[i]; double rate = rates[j]; // We purposely don't estimate true variance in this case. - if (shape < rate * rate * maths::MINIMUM_COEFFICIENT_OF_VARIATION) - { + if (shape < rate * rate * maths::MINIMUM_COEFFICIENT_OF_VARIATION) { continue; } LOG_DEBUG(""); - LOG_DEBUG("****** shape = " << shape - << ", rate = " << rate << " ******"); + LOG_DEBUG("****** shape = " << shape << ", rate = " << rate << " ******"); double mean = shape / rate; double variance = mean / rate; - for (std::size_t k = 0u; k < boost::size(varianceScales); ++k) - { + for (std::size_t k = 0u; k < boost::size(varianceScales); ++k) { double scale = varianceScales[k]; LOG_DEBUG("*** scale = " << scale << " ***"); double scaledShape = shape / scale; double scaledRate = rate / scale; - LOG_DEBUG("scaled shape = " << scaledShape - << ", scaled rate = " << scaledRate); + LOG_DEBUG("scaled shape = " << scaledShape << ", scaled rate = " << scaledRate); TMeanAccumulator meanError; TMeanAccumulator varianceError; - for (unsigned int test = 0u; test < 5; ++test) - { + for (unsigned int test = 0u; test < 5; ++test) { CGammaRateConjugate filter(makePrior(dataTypes[t])); rng.generateGammaSamples(shape, 1.0 / rate, 200, samples); @@ -1723,10 +1502,8 @@ void CGammaRateConjugateTest::testVarianceScale() double estimatedVariance = estimatedMean / filter.likelihoodRate(); double dm = (dataTypes[t] == maths_t::E_IntegerData ? 0.5 : 0.0); double dv = (dataTypes[t] == maths_t::E_IntegerData ? 1.0 / 12.0 : 0.0); - double trialMeanError = std::fabs(estimatedMean - (mean + dm)) - / std::max(1.0, mean + dm); - double trialVarianceError = std::fabs(estimatedVariance - (variance + dv)) - / std::max(1.0, variance + dv); + double trialMeanError = std::fabs(estimatedMean - (mean + dm)) / std::max(1.0, mean + dm); + double trialVarianceError = std::fabs(estimatedVariance - (variance + dv)) / std::max(1.0, variance + dv); LOG_DEBUG("trial mean error = " << trialMeanError); LOG_DEBUG("trial variance error = " << trialVarianceError); @@ -1735,10 +1512,8 @@ void CGammaRateConjugateTest::testVarianceScale() varianceError.add(trialVarianceError); } - LOG_DEBUG("mean error = " - << maths::CBasicStatistics::mean(meanError)); - LOG_DEBUG("variance error = " - << maths::CBasicStatistics::mean(varianceError)); + LOG_DEBUG("mean error = " << maths::CBasicStatistics::mean(meanError)); + LOG_DEBUG("variance error = " << maths::CBasicStatistics::mean(varianceError)); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError) < maximumMeanError[t]); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(varianceError) < maximumVarianceError[t]); @@ -1749,10 +1524,8 @@ void CGammaRateConjugateTest::testVarianceScale() } } - LOG_DEBUG("mean mean error = " - << maths::CBasicStatistics::mean(meanMeanError)); - LOG_DEBUG("mean variance error = " - << maths::CBasicStatistics::mean(meanVarianceError)); + LOG_DEBUG("mean mean error = " << maths::CBasicStatistics::mean(meanMeanError)); + LOG_DEBUG("mean variance error = " << maths::CBasicStatistics::mean(meanVarianceError)); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanMeanError) < maximumMeanMeanError[t]); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanVarianceError) < maximumMeanVarianceError[t]); @@ -1760,8 +1533,7 @@ void CGammaRateConjugateTest::testVarianceScale() } } -void CGammaRateConjugateTest::testNegativeSample() -{ +void CGammaRateConjugateTest::testNegativeSample() { LOG_DEBUG("+-----------------------------------------------+"); LOG_DEBUG("| CGammaRateConjugateTest::testNegativeSample |"); LOG_DEBUG("+-----------------------------------------------+"); @@ -1780,10 +1552,8 @@ void CGammaRateConjugateTest::testNegativeSample() TDoubleVec samples; rng.generateGammaSamples(shape, scale, 100, samples); - CGammaRateConjugate filter1( - CGammaRateConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.0, 0.0, 0.2)); - CGammaRateConjugate filter2( - CGammaRateConjugate::nonInformativePrior(maths_t::E_ContinuousData, 1.2586, 0.0, 0.2)); + CGammaRateConjugate filter1(CGammaRateConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.0, 0.0, 0.2)); + CGammaRateConjugate filter2(CGammaRateConjugate::nonInformativePrior(maths_t::E_ContinuousData, 1.2586, 0.0, 0.2)); filter1.addSamples(samples); filter2.addSamples(samples); @@ -1799,64 +1569,45 @@ void CGammaRateConjugateTest::testNegativeSample() CPPUNIT_ASSERT(filter1.equalTolerance(filter2, equal)); } -CppUnit::Test* CGammaRateConjugateTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CGammaRateConjugateTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CGammaRateConjugateTest::testMultipleUpdate", - &CGammaRateConjugateTest::testMultipleUpdate) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CGammaRateConjugateTest::testPropagation", - &CGammaRateConjugateTest::testPropagation) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CGammaRateConjugateTest::testShapeEstimation", - &CGammaRateConjugateTest::testShapeEstimation) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CGammaRateConjugateTest::testRateEstimation", - &CGammaRateConjugateTest::testRateEstimation) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CGammaRateConjugateTest::testMarginalLikelihood", - &CGammaRateConjugateTest::testMarginalLikelihood) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CGammaRateConjugateTest::testMarginalLikelihoodMean", - &CGammaRateConjugateTest::testMarginalLikelihoodMean) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CGammaRateConjugateTest::testMarginalLikelihoodMode", - &CGammaRateConjugateTest::testMarginalLikelihoodMode) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CGammaRateConjugateTest::testMarginalLikelihoodVariance", - &CGammaRateConjugateTest::testMarginalLikelihoodVariance) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CGammaRateConjugateTest::testSampleMarginalLikelihood", - &CGammaRateConjugateTest::testSampleMarginalLikelihood) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CGammaRateConjugateTest::testCdf", - &CGammaRateConjugateTest::testCdf) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CGammaRateConjugateTest::testProbabilityOfLessLikelySamples", - &CGammaRateConjugateTest::testProbabilityOfLessLikelySamples) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CGammaRateConjugateTest::testAnomalyScore", - &CGammaRateConjugateTest::testAnomalyScore) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CGammaRateConjugateTest::testOffset", - &CGammaRateConjugateTest::testOffset) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CGammaRateConjugateTest::testIntegerData", - &CGammaRateConjugateTest::testIntegerData) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CGammaRateConjugateTest::testLowVariationData", - &CGammaRateConjugateTest::testLowVariationData) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CGammaRateConjugateTest::testPersist", - &CGammaRateConjugateTest::testPersist) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CGammaRateConjugateTest::testVarianceScale", - &CGammaRateConjugateTest::testVarianceScale) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CGammaRateConjugateTest::testNegativeSample", - &CGammaRateConjugateTest::testNegativeSample) ); +CppUnit::Test* CGammaRateConjugateTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CGammaRateConjugateTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CGammaRateConjugateTest::testMultipleUpdate", + &CGammaRateConjugateTest::testMultipleUpdate)); + suiteOfTests->addTest(new CppUnit::TestCaller("CGammaRateConjugateTest::testPropagation", + &CGammaRateConjugateTest::testPropagation)); + suiteOfTests->addTest(new CppUnit::TestCaller("CGammaRateConjugateTest::testShapeEstimation", + &CGammaRateConjugateTest::testShapeEstimation)); + suiteOfTests->addTest(new CppUnit::TestCaller("CGammaRateConjugateTest::testRateEstimation", + &CGammaRateConjugateTest::testRateEstimation)); + suiteOfTests->addTest(new CppUnit::TestCaller("CGammaRateConjugateTest::testMarginalLikelihood", + &CGammaRateConjugateTest::testMarginalLikelihood)); + suiteOfTests->addTest(new CppUnit::TestCaller("CGammaRateConjugateTest::testMarginalLikelihoodMean", + &CGammaRateConjugateTest::testMarginalLikelihoodMean)); + suiteOfTests->addTest(new CppUnit::TestCaller("CGammaRateConjugateTest::testMarginalLikelihoodMode", + &CGammaRateConjugateTest::testMarginalLikelihoodMode)); + suiteOfTests->addTest(new CppUnit::TestCaller("CGammaRateConjugateTest::testMarginalLikelihoodVariance", + &CGammaRateConjugateTest::testMarginalLikelihoodVariance)); + suiteOfTests->addTest(new CppUnit::TestCaller("CGammaRateConjugateTest::testSampleMarginalLikelihood", + &CGammaRateConjugateTest::testSampleMarginalLikelihood)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CGammaRateConjugateTest::testCdf", &CGammaRateConjugateTest::testCdf)); + suiteOfTests->addTest(new CppUnit::TestCaller("CGammaRateConjugateTest::testProbabilityOfLessLikelySamples", + &CGammaRateConjugateTest::testProbabilityOfLessLikelySamples)); + suiteOfTests->addTest(new CppUnit::TestCaller("CGammaRateConjugateTest::testAnomalyScore", + &CGammaRateConjugateTest::testAnomalyScore)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CGammaRateConjugateTest::testOffset", &CGammaRateConjugateTest::testOffset)); + suiteOfTests->addTest(new CppUnit::TestCaller("CGammaRateConjugateTest::testIntegerData", + &CGammaRateConjugateTest::testIntegerData)); + suiteOfTests->addTest(new CppUnit::TestCaller("CGammaRateConjugateTest::testLowVariationData", + &CGammaRateConjugateTest::testLowVariationData)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CGammaRateConjugateTest::testPersist", &CGammaRateConjugateTest::testPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller("CGammaRateConjugateTest::testVarianceScale", + &CGammaRateConjugateTest::testVarianceScale)); + suiteOfTests->addTest(new CppUnit::TestCaller("CGammaRateConjugateTest::testNegativeSample", + &CGammaRateConjugateTest::testNegativeSample)); return suiteOfTests; } diff --git a/lib/maths/unittest/CGammaRateConjugateTest.h b/lib/maths/unittest/CGammaRateConjugateTest.h index bae6912898..20d2cdf4ad 100644 --- a/lib/maths/unittest/CGammaRateConjugateTest.h +++ b/lib/maths/unittest/CGammaRateConjugateTest.h @@ -9,29 +9,28 @@ #include -class CGammaRateConjugateTest : public CppUnit::TestFixture -{ - public: - void testMultipleUpdate(); - void testPropagation(); - void testShapeEstimation(); - void testRateEstimation(); - void testMarginalLikelihood(); - void testMarginalLikelihoodMean(); - void testMarginalLikelihoodMode(); - void testMarginalLikelihoodVariance(); - void testSampleMarginalLikelihood(); - void testCdf(); - void testProbabilityOfLessLikelySamples(); - void testAnomalyScore(); - void testOffset(); - void testIntegerData(); - void testLowVariationData(); - void testPersist(); - void testVarianceScale(); - void testNegativeSample(); +class CGammaRateConjugateTest : public CppUnit::TestFixture { +public: + void testMultipleUpdate(); + void testPropagation(); + void testShapeEstimation(); + void testRateEstimation(); + void testMarginalLikelihood(); + void testMarginalLikelihoodMean(); + void testMarginalLikelihoodMode(); + void testMarginalLikelihoodVariance(); + void testSampleMarginalLikelihood(); + void testCdf(); + void testProbabilityOfLessLikelySamples(); + void testAnomalyScore(); + void testOffset(); + void testIntegerData(); + void testLowVariationData(); + void testPersist(); + void testVarianceScale(); + void testNegativeSample(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CGammaRateConjugateTest_h diff --git a/lib/maths/unittest/CGramSchmidtTest.cc b/lib/maths/unittest/CGramSchmidtTest.cc index fe6b3cda06..dd5ab97837 100644 --- a/lib/maths/unittest/CGramSchmidtTest.cc +++ b/lib/maths/unittest/CGramSchmidtTest.cc @@ -26,91 +26,70 @@ using TDoubleVecVec = std::vector; using TVector4 = maths::CVectorNx1; using TVector4Vec = std::vector; -namespace -{ +namespace { template -void generate(test::CRandomNumbers &rng, - std::size_t n, - std::size_t d, - std::vector &x) -{ +void generate(test::CRandomNumbers& rng, std::size_t n, std::size_t d, std::vector& x) { LOG_DEBUG("n = " << n << ", d = " << d); TDoubleVec components; rng.generateUniformSamples(0.0, 10.0, n * d, components); x.clear(); - for (std::size_t i = 0u; i < n; ++i) - { - x.push_back(T(&components[i * d], - &components[(i+1) * d])); + for (std::size_t i = 0u; i < n; ++i) { + x.push_back(T(&components[i * d], &components[(i + 1) * d])); } } -void debug(const TDoubleVecVec &x) -{ +void debug(const TDoubleVecVec& x) { LOG_DEBUG("x ="); - for (std::size_t i = 0u; i < x.size(); ++i) - { + for (std::size_t i = 0u; i < x.size(); ++i) { LOG_DEBUG(" " << core::CContainerPrinter::print(x[i])); } } -void debug(const TVector4Vec &x) -{ +void debug(const TVector4Vec& x) { LOG_DEBUG("x ="); - for (std::size_t i = 0u; i < x.size(); ++i) - { + for (std::size_t i = 0u; i < x.size(); ++i) { LOG_DEBUG(" " << x[i]); } } -double inner(const TDoubleVec &x, const TDoubleVec &y) -{ +double inner(const TDoubleVec& x, const TDoubleVec& y) { CPPUNIT_ASSERT_EQUAL(x.size(), y.size()); double result = 0.0; - for (std::size_t i = 0u; i < x.size(); ++i) - { + for (std::size_t i = 0u; i < x.size(); ++i) { result += x[i] * y[i]; } return result; } -TDoubleVec multiply(const TDoubleVec &x, double s) -{ +TDoubleVec multiply(const TDoubleVec& x, double s) { TDoubleVec result = x; - for (std::size_t i = 0u; i < x.size(); ++i) - { + for (std::size_t i = 0u; i < x.size(); ++i) { result[i] *= s; } return result; } -const TDoubleVec &add(TDoubleVec &x, const TDoubleVec &y) -{ +const TDoubleVec& add(TDoubleVec& x, const TDoubleVec& y) { CPPUNIT_ASSERT_EQUAL(x.size(), y.size()); - for (std::size_t i = 0u; i < x.size(); ++i) - { + for (std::size_t i = 0u; i < x.size(); ++i) { x[i] += y[i]; } return x; } -const TDoubleVec &subtract(TDoubleVec &x, const TDoubleVec &y) -{ +const TDoubleVec& subtract(TDoubleVec& x, const TDoubleVec& y) { CPPUNIT_ASSERT_EQUAL(x.size(), y.size()); - for (std::size_t i = 0u; i < x.size(); ++i) - { + for (std::size_t i = 0u; i < x.size(); ++i) { x[i] -= y[i]; } return x; } - } -void CGramSchmidtTest::testOrthogonality() -{ +void CGramSchmidtTest::testOrthogonality() { LOG_DEBUG("+---------------------------------------+"); LOG_DEBUG("| CGramSchmidtTest::testOrthogonality |"); LOG_DEBUG("+---------------------------------------+"); @@ -121,8 +100,7 @@ void CGramSchmidtTest::testOrthogonality() LOG_DEBUG("*** Test vector ***"); TDoubleVecVec x; - for (std::size_t t = 0u; t < 50; ++t) - { + for (std::size_t t = 0u; t < 50; ++t) { std::size_t d = t / 5 + 5; std::size_t n = t / 5 + 2; @@ -131,14 +109,12 @@ void CGramSchmidtTest::testOrthogonality() generate(rng, n, d, x); maths::CGramSchmidt::basis(x); - if (t % 10 == 0) debug(x); - for (std::size_t i = 0u; i < x.size(); ++i) - { - for (std::size_t j = i+1; j < x.size(); ++j) - { + if (t % 10 == 0) + debug(x); + for (std::size_t i = 0u; i < x.size(); ++i) { + for (std::size_t j = i + 1; j < x.size(); ++j) { double xiDotxj = inner(x[i], x[j]); - if (t % 10 == 0) - { + if (t % 10 == 0) { LOG_DEBUG("x(i)' x(j) = " << xiDotxj); } CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, xiDotxj, 1e-10); @@ -151,19 +127,16 @@ void CGramSchmidtTest::testOrthogonality() LOG_DEBUG("*** Test CVectorNx1 ***"); TVector4Vec x; - for (std::size_t t = 0u; t < 50; ++t) - { + for (std::size_t t = 0u; t < 50; ++t) { generate(rng, 4, 4, x); maths::CGramSchmidt::basis(x); - if (t % 10 == 0) debug(x); - for (std::size_t i = 0u; i < x.size(); ++i) - { - for (std::size_t j = i+1; j < x.size(); ++j) - { + if (t % 10 == 0) + debug(x); + for (std::size_t i = 0u; i < x.size(); ++i) { + for (std::size_t j = i + 1; j < x.size(); ++j) { double xiDotxj = x[i].inner(x[j]); - if (t % 10 == 0) - { + if (t % 10 == 0) { LOG_DEBUG("x(i)' x(j) = " << xiDotxj); } CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, xiDotxj, 1e-10); @@ -173,8 +146,7 @@ void CGramSchmidtTest::testOrthogonality() } } -void CGramSchmidtTest::testNormalisation() -{ +void CGramSchmidtTest::testNormalisation() { LOG_DEBUG("+---------------------------------------+"); LOG_DEBUG("| CGramSchmidtTest::testNormalisation |"); LOG_DEBUG("+---------------------------------------+"); @@ -185,8 +157,7 @@ void CGramSchmidtTest::testNormalisation() LOG_DEBUG("*** Test vector ***"); TDoubleVecVec x; - for (std::size_t t = 0u; t < 50; ++t) - { + for (std::size_t t = 0u; t < 50; ++t) { std::size_t d = t / 5 + 5; std::size_t n = t / 5 + 2; @@ -195,12 +166,11 @@ void CGramSchmidtTest::testNormalisation() generate(rng, n, d, x); maths::CGramSchmidt::basis(x); - if (t % 10 == 0) debug(x); - for (std::size_t i = 0u; i < x.size(); ++i) - { + if (t % 10 == 0) + debug(x); + for (std::size_t i = 0u; i < x.size(); ++i) { double normxi = std::sqrt(inner(x[i], x[i])); - if (t % 10 == 0) - { + if (t % 10 == 0) { LOG_DEBUG("|| x(i) || = " << normxi); } CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, normxi, 1e-15); @@ -212,19 +182,16 @@ void CGramSchmidtTest::testNormalisation() LOG_DEBUG("*** Test CVectorNx1 ***"); TVector4Vec x; - for (std::size_t t = 0u; t < 50; ++t) - { + for (std::size_t t = 0u; t < 50; ++t) { generate(rng, 4, 4, x); maths::CGramSchmidt::basis(x); - if (t % 10 == 0) debug(x); - for (std::size_t i = 0u; i < x.size(); ++i) - { - for (std::size_t j = i+1; j < x.size(); ++j) - { + if (t % 10 == 0) + debug(x); + for (std::size_t i = 0u; i < x.size(); ++i) { + for (std::size_t j = i + 1; j < x.size(); ++j) { double normxi = x[i].euclidean(); - if (t % 10 == 0) - { + if (t % 10 == 0) { LOG_DEBUG("|| x(i) || = " << normxi); } CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, normxi, 1e-15); @@ -234,8 +201,7 @@ void CGramSchmidtTest::testNormalisation() } } -void CGramSchmidtTest::testSpan() -{ +void CGramSchmidtTest::testSpan() { LOG_DEBUG("+------------------------------+"); LOG_DEBUG("| CGramSchmidtTest::testSpan |"); LOG_DEBUG("+------------------------------+"); @@ -247,8 +213,7 @@ void CGramSchmidtTest::testSpan() TDoubleVecVec x; TDoubleVecVec basis; - for (std::size_t t = 0u; t < 50; ++t) - { + for (std::size_t t = 0u; t < 50; ++t) { std::size_t d = t / 5 + 5; std::size_t n = t / 5 + 2; @@ -258,17 +223,15 @@ void CGramSchmidtTest::testSpan() basis = x; maths::CGramSchmidt::basis(basis); - if (t % 10 == 0) debug(basis); - for (std::size_t i = 0u; i < x.size(); ++i) - { + if (t % 10 == 0) + debug(basis); + for (std::size_t i = 0u; i < x.size(); ++i) { TDoubleVec r(x[i].size(), 0.0); - for (std::size_t j = 0u; j < basis.size(); ++j) - { + for (std::size_t j = 0u; j < basis.size(); ++j) { add(r, multiply(basis[j], inner(x[i], basis[j]))); } - if (t % 10 == 0) - { + if (t % 10 == 0) { LOG_DEBUG("x(i) = " << core::CContainerPrinter::print(x[i])); LOG_DEBUG("projection = " << core::CContainerPrinter::print(r)); } @@ -276,8 +239,7 @@ void CGramSchmidtTest::testSpan() subtract(r, x[i]); double normr = std::sqrt(inner(r, r)); - if (t % 10 == 0) - { + if (t % 10 == 0) { LOG_DEBUG("|| r || = " << normr); } @@ -291,23 +253,20 @@ void CGramSchmidtTest::testSpan() TVector4Vec x; TVector4Vec basis; - for (std::size_t t = 0u; t < 50; ++t) - { + for (std::size_t t = 0u; t < 50; ++t) { generate(rng, 4, 4, x); basis = x; maths::CGramSchmidt::basis(basis); - if (t % 10 == 0) debug(x); - for (std::size_t i = 0u; i < x.size(); ++i) - { + if (t % 10 == 0) + debug(x); + for (std::size_t i = 0u; i < x.size(); ++i) { TVector4 r(0.0); - for (std::size_t j = 0u; j < basis.size(); ++j) - { + for (std::size_t j = 0u; j < basis.size(); ++j) { r += basis[j] * x[i].inner(basis[j]); } - if (t % 10 == 0) - { + if (t % 10 == 0) { LOG_DEBUG("x(i) = " << x[i]); LOG_DEBUG("projection = " << r); } @@ -315,8 +274,7 @@ void CGramSchmidtTest::testSpan() r -= x[i]; double normr = r.euclidean(); - if (t % 10 == 0) - { + if (t % 10 == 0) { LOG_DEBUG("|| r || = " << normr); } @@ -326,8 +284,7 @@ void CGramSchmidtTest::testSpan() } } -void CGramSchmidtTest::testEdgeCases() -{ +void CGramSchmidtTest::testEdgeCases() { LOG_DEBUG("+-----------------------------------+"); LOG_DEBUG("| CGramSchmidtTest::testEdgeCases |"); LOG_DEBUG("+-----------------------------------+"); @@ -335,21 +292,14 @@ void CGramSchmidtTest::testEdgeCases() { LOG_DEBUG("*** Test zero vector ***"); - double x_[][5] = - { - { 0.0, 0.0, 0.0, 0.0, 0.0 }, - { 1.0, 3.0, 4.0, 0.0, 6.0 }, - { 0.4, 0.3, 0.6, 1.0, 7.0 } - }; - std::size_t p[] = { 0, 1, 2 }; + double x_[][5] = {{0.0, 0.0, 0.0, 0.0, 0.0}, {1.0, 3.0, 4.0, 0.0, 6.0}, {0.4, 0.3, 0.6, 1.0, 7.0}}; + std::size_t p[] = {0, 1, 2}; - do - { + do { LOG_DEBUG("permutation = " << core::CContainerPrinter::print(p)); TDoubleVecVec x; - for (std::size_t i = 0u; i < boost::size(p); ++i) - { + for (std::size_t i = 0u; i < boost::size(p); ++i) { x.push_back(TDoubleVec(&x_[p[i]][0], &x_[p[i]][4])); } debug(x); @@ -357,31 +307,22 @@ void CGramSchmidtTest::testEdgeCases() debug(x); CPPUNIT_ASSERT_EQUAL(std::size_t(2), x.size()); - } - while (std::next_permutation(p, p + boost::size(p))); + } while (std::next_permutation(p, p + boost::size(p))); } { LOG_DEBUG(""); LOG_DEBUG("*** Test degenerate ***"); - double x_[][4] = - { - { 1.0, 1.0, 1.0, 1.0 }, - { -1.0, 2.3, 1.0, 0.03 }, - { 1.0, 1.0, 1.0, 1.0 }, - { -1.0, 2.3, 1.0, 0.03 }, - { -4.0, 0.3, 1.4, 1.03 } - }; + double x_[][4] = { + {1.0, 1.0, 1.0, 1.0}, {-1.0, 2.3, 1.0, 0.03}, {1.0, 1.0, 1.0, 1.0}, {-1.0, 2.3, 1.0, 0.03}, {-4.0, 0.3, 1.4, 1.03}}; - std::size_t p[] = { 0, 1, 2, 3, 4 }; + std::size_t p[] = {0, 1, 2, 3, 4}; - do - { + do { LOG_DEBUG("permutation = " << core::CContainerPrinter::print(p)); TDoubleVecVec x; - for (std::size_t i = 0u; i < boost::size(p); ++i) - { + for (std::size_t i = 0u; i < boost::size(p); ++i) { x.push_back(TDoubleVec(&x_[p[i]][0], &x_[p[i]][4])); } @@ -390,27 +331,19 @@ void CGramSchmidtTest::testEdgeCases() debug(x); CPPUNIT_ASSERT_EQUAL(std::size_t(3), x.size()); - } - while (std::next_permutation(p, p + boost::size(p))); + } while (std::next_permutation(p, p + boost::size(p))); } } -CppUnit::Test *CGramSchmidtTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CGramSchmidtTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CGramSchmidtTest::testOrthogonality", - &CGramSchmidtTest::testOrthogonality) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CGramSchmidtTest::testNormalisation", - &CGramSchmidtTest::testNormalisation) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CGramSchmidtTest::testSpan", - &CGramSchmidtTest::testSpan) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CGramSchmidtTest::testEdgeCases", - &CGramSchmidtTest::testEdgeCases) ); +CppUnit::Test* CGramSchmidtTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CGramSchmidtTest"); + + suiteOfTests->addTest( + new CppUnit::TestCaller("CGramSchmidtTest::testOrthogonality", &CGramSchmidtTest::testOrthogonality)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CGramSchmidtTest::testNormalisation", &CGramSchmidtTest::testNormalisation)); + suiteOfTests->addTest(new CppUnit::TestCaller("CGramSchmidtTest::testSpan", &CGramSchmidtTest::testSpan)); + suiteOfTests->addTest(new CppUnit::TestCaller("CGramSchmidtTest::testEdgeCases", &CGramSchmidtTest::testEdgeCases)); return suiteOfTests; } diff --git a/lib/maths/unittest/CGramSchmidtTest.h b/lib/maths/unittest/CGramSchmidtTest.h index b4f994a516..ab370204dc 100644 --- a/lib/maths/unittest/CGramSchmidtTest.h +++ b/lib/maths/unittest/CGramSchmidtTest.h @@ -9,15 +9,14 @@ #include -class CGramSchmidtTest : public CppUnit::TestFixture -{ - public: - void testOrthogonality(); - void testNormalisation(); - void testSpan(); - void testEdgeCases(); +class CGramSchmidtTest : public CppUnit::TestFixture { +public: + void testOrthogonality(); + void testNormalisation(); + void testSpan(); + void testEdgeCases(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CGramSchmidtTest_h diff --git a/lib/maths/unittest/CInformationCriteriaTest.cc b/lib/maths/unittest/CInformationCriteriaTest.cc index 78fe31de2e..6da231a853 100644 --- a/lib/maths/unittest/CInformationCriteriaTest.cc +++ b/lib/maths/unittest/CInformationCriteriaTest.cc @@ -16,8 +16,7 @@ using namespace ml; -namespace -{ +namespace { using TSizeVec = std::vector; using TSizeVecVec = std::vector; @@ -35,32 +34,21 @@ using TMatrix4 = maths::CSymmetricMatrixNxN; using TMatrix4Vec = std::vector; template -double logfSphericalGaussian(const POINT &mean, - double variance, - const POINT &x) -{ +double logfSphericalGaussian(const POINT& mean, double variance, const POINT& x) { double d = static_cast(x.dimension()); double r = (x - mean).euclidean(); - return -0.5 * ( d * std::log(boost::math::double_constants::two_pi * variance) - + r * r / variance); + return -0.5 * (d * std::log(boost::math::double_constants::two_pi * variance) + r * r / variance); } - - template -double logfGaussian(const POINT &mean, - const MATRIX &covariance, - const POINT &x) -{ +double logfGaussian(const POINT& mean, const MATRIX& covariance, const POINT& x) { double result; maths::gaussianLogLikelihood(covariance, x - mean, result); return result; } - } -void CInformationCriteriaTest::testSphericalGaussian() -{ +void CInformationCriteriaTest::testSphericalGaussian() { LOG_DEBUG("+---------------------------------------------------+"); LOG_DEBUG("| CInformationCriteriaTest::testSphericalGaussian |"); LOG_DEBUG("+---------------------------------------------------+"); @@ -71,13 +59,12 @@ void CInformationCriteriaTest::testSphericalGaussian() maths::CSampling::seed(); { - double variance = 5.0; - double mean_[] = { 10.0, 20.0 }; - double lowerTriangle[] = { variance, 0.0, variance }; + double variance = 5.0; + double mean_[] = {10.0, 20.0}; + double lowerTriangle[] = {variance, 0.0, variance}; TVector2 mean(boost::begin(mean_), boost::end(mean_)); - TMatrix2 covariance(boost::begin(lowerTriangle), - boost::end(lowerTriangle)); + TMatrix2 covariance(boost::begin(lowerTriangle), boost::end(lowerTriangle)); LOG_DEBUG("mean = " << mean); LOG_DEBUG("covariance = " << covariance); @@ -88,38 +75,31 @@ void CInformationCriteriaTest::testSphericalGaussian() double upper = maths::information_criteria_detail::confidence(n - 1.0); double likelihood = 0.0; - for (std::size_t i = 0u; i < samples.size(); ++i) - { - likelihood += -2.0 * logfSphericalGaussian(mean, variance, samples[i]) - + 2.0 * std::log(upper); + for (std::size_t i = 0u; i < samples.size(); ++i) { + likelihood += -2.0 * logfSphericalGaussian(mean, variance, samples[i]) + 2.0 * std::log(upper); } double expectedAICc = likelihood + 6.0 + 12.0 / (n - 4.0); - double expectedBIC = likelihood + 3.0 * std::log(n); + double expectedBIC = likelihood + 3.0 * std::log(n); maths::CSphericalGaussianInfoCriterion bic; bic.add(samples); LOG_DEBUG("expected BIC = " << expectedBIC); LOG_DEBUG("BIC = " << bic.calculate()); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedBIC, - bic.calculate(), - 2e-3 * expectedBIC); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedBIC, bic.calculate(), 2e-3 * expectedBIC); maths::CSphericalGaussianInfoCriterion aic; aic.add(samples); LOG_DEBUG("expected AICc = " << expectedAICc); LOG_DEBUG("AICc = " << aic.calculate()); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedAICc, - aic.calculate(), - 2e-3 * expectedAICc); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedAICc, aic.calculate(), 2e-3 * expectedAICc); } { - double variance = 8.0; - double mean_[] = { -5.0, 30.0, 2.0, 7.9 }; - double lowerTriangle[] = { variance, 0.0, variance, 0.0, 0.0, variance, 0.0, 0.0, 0.0, variance }; + double variance = 8.0; + double mean_[] = {-5.0, 30.0, 2.0, 7.9}; + double lowerTriangle[] = {variance, 0.0, variance, 0.0, 0.0, variance, 0.0, 0.0, 0.0, variance}; TVector4 mean(boost::begin(mean_), boost::end(mean_)); - TMatrix4 covariance(boost::begin(lowerTriangle), - boost::end(lowerTriangle)); + TMatrix4 covariance(boost::begin(lowerTriangle), boost::end(lowerTriangle)); LOG_DEBUG("mean = " << mean); LOG_DEBUG("covariance = " << covariance); @@ -130,29 +110,23 @@ void CInformationCriteriaTest::testSphericalGaussian() double upper = maths::information_criteria_detail::confidence(n - 1.0); double likelihood = 0.0; - for (std::size_t i = 0u; i < samples.size(); ++i) - { - likelihood += -2.0 * logfSphericalGaussian(mean, variance, samples[i]) - + 4.0 * std::log(upper); + for (std::size_t i = 0u; i < samples.size(); ++i) { + likelihood += -2.0 * logfSphericalGaussian(mean, variance, samples[i]) + 4.0 * std::log(upper); } double expectedAICc = likelihood + 10.0 + 30.0 / (n - 6.0); - double expectedBIC = likelihood + 5.0 * std::log(n); + double expectedBIC = likelihood + 5.0 * std::log(n); maths::CSphericalGaussianInfoCriterion bic; bic.add(samples); LOG_DEBUG("expected BIC = " << expectedBIC); LOG_DEBUG("BIC = " << bic.calculate()); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedBIC, - bic.calculate(), - 2e-3 * expectedBIC); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedBIC, bic.calculate(), 2e-3 * expectedBIC); maths::CSphericalGaussianInfoCriterion aic; aic.add(samples); LOG_DEBUG("expected AICc = " << expectedAICc); LOG_DEBUG("AICc = " << aic.calculate()); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedAICc, - aic.calculate(), - 2e-3 * expectedAICc); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedAICc, aic.calculate(), 2e-3 * expectedAICc); } // Check that they correctly distinguish the best fit model. @@ -160,24 +134,22 @@ void CInformationCriteriaTest::testSphericalGaussian() test::CRandomNumbers rng; { - double variance = 40.0; - double mean_[] = { 15.0, 27.0 }; - double lowerTriangle[] = { variance, 0.0, variance }; + double variance = 40.0; + double mean_[] = {15.0, 27.0}; + double lowerTriangle[] = {variance, 0.0, variance}; TVector2 mean(boost::begin(mean_), boost::end(mean_)); - TMatrix2 covariance(boost::begin(lowerTriangle), - boost::end(lowerTriangle)); + TMatrix2 covariance(boost::begin(lowerTriangle), boost::end(lowerTriangle)); LOG_DEBUG("mean = " << mean); LOG_DEBUG("covariance = " << covariance); TVector2Vec samples; maths::CSampling::multivariateNormalSample(mean, covariance, 1000, samples); - maths::CSphericalGaussianInfoCriterion bic1(samples); + maths::CSphericalGaussianInfoCriterion bic1(samples); maths::CSphericalGaussianInfoCriterion aic1(samples); - for (std::size_t t = 0u; t < 100; ++t) - { + for (std::size_t t = 0u; t < 100; ++t) { rng.random_shuffle(samples.begin(), samples.end()); TSizeVec split; @@ -186,7 +158,7 @@ void CInformationCriteriaTest::testSphericalGaussian() TVector2Vec samples1(&samples[0], &samples[split[0]]); TVector2Vec samples2(&samples[split[0]], &samples[999]); - maths::CSphericalGaussianInfoCriterion bic2(samples); + maths::CSphericalGaussianInfoCriterion bic2(samples); maths::CSphericalGaussianInfoCriterion aic2(samples); bic2.add(samples1); bic2.add(samples2); @@ -207,16 +179,15 @@ void CInformationCriteriaTest::testSphericalGaussian() maths::CKMeansFast kmeans; kmeans.setPoints(samples); - for (std::size_t t = 0u; t < centres.size(); t += 2) - { - TVector2Vec tcentres(¢res[t], ¢res[t+2]); + for (std::size_t t = 0u; t < centres.size(); t += 2) { + TVector2Vec tcentres(¢res[t], ¢res[t + 2]); kmeans.setCentres(tcentres); kmeans.run(10); TVector2VecVec clusters; kmeans.clusters(clusters); - maths::CSphericalGaussianInfoCriterion bic2(clusters); + maths::CSphericalGaussianInfoCriterion bic2(clusters); maths::CSphericalGaussianInfoCriterion aic2(clusters); LOG_DEBUG("1 cluster BIC = " << bic1.calculate()); @@ -230,8 +201,7 @@ void CInformationCriteriaTest::testSphericalGaussian() } } -void CInformationCriteriaTest::testSphericalGaussianWithSphericalCluster() -{ +void CInformationCriteriaTest::testSphericalGaussianWithSphericalCluster() { LOG_DEBUG("+-----------------------------------------------------------------------+"); LOG_DEBUG("| CInformationCriteriaTest::testSphericalGaussianWithSphericalCluster |"); LOG_DEBUG("+-----------------------------------------------------------------------+"); @@ -246,31 +216,27 @@ void CInformationCriteriaTest::testSphericalGaussianWithSphericalCluster() maths::CSampling::seed(); - double means_[][2] = { { 10.0, 20.0 }, { 12.0, 30.0 } }; - double lowerTriangle[] = { 5.0, 0.0, 5.0 }; + double means_[][2] = {{10.0, 20.0}, {12.0, 30.0}}; + double lowerTriangle[] = {5.0, 0.0, 5.0}; TVector2Vec means; - for (std::size_t i = 0u; i < boost::size(means_); ++i) - { + for (std::size_t i = 0u; i < boost::size(means_); ++i) { means.push_back(TVector2(boost::begin(means_[i]), boost::end(means_[i]))); } - TMatrix2 covariance(boost::begin(lowerTriangle), - boost::end(lowerTriangle)); + TMatrix2 covariance(boost::begin(lowerTriangle), boost::end(lowerTriangle)); LOG_DEBUG("means = " << core::CContainerPrinter::print(means)); LOG_DEBUG("covariance = " << covariance); - for (std::size_t t = 0u; t < 10; ++t) - { - LOG_DEBUG("*** trial = " << t+1 << " ***"); + for (std::size_t t = 0u; t < 10; ++t) { + LOG_DEBUG("*** trial = " << t + 1 << " ***"); TVector2VecVec points(means.size()); TSphericalCluster2VecVec clusters; - for (std::size_t i = 0u; i < means.size(); ++i) - { + for (std::size_t i = 0u; i < means.size(); ++i) { maths::CSampling::multivariateNormalSample(means[i], covariance, 1000, points[i]); TMeanVar2Accumulator moments; moments.add(points[i]); - double n = maths::CBasicStatistics::count(moments); + double n = maths::CBasicStatistics::count(moments); TVector2 m = maths::CBasicStatistics::mean(moments); TVector2 v = maths::CBasicStatistics::maximumLikelihoodVariance(moments); TSphericalCluster2::TAnnotation countAndVariance(n, (v(0) + v(1)) / 2.0); @@ -285,9 +251,7 @@ void CInformationCriteriaTest::testSphericalGaussianWithSphericalCluster() LOG_DEBUG("BIC points = " << bicPoints.calculate()); LOG_DEBUG("BIC clusters = " << bicClusters.calculate()); - CPPUNIT_ASSERT_DOUBLES_EQUAL(bicPoints.calculate(), - bicClusters.calculate(), - 1e-10 * bicPoints.calculate()); + CPPUNIT_ASSERT_DOUBLES_EQUAL(bicPoints.calculate(), bicClusters.calculate(), 1e-10 * bicPoints.calculate()); maths::CSphericalGaussianInfoCriterion aicPoints; aicPoints.add(points); @@ -295,14 +259,11 @@ void CInformationCriteriaTest::testSphericalGaussianWithSphericalCluster() aicClusters.add(clusters); LOG_DEBUG("AICc points = " << aicPoints.calculate()); LOG_DEBUG("AICc clusters = " << aicClusters.calculate()); - CPPUNIT_ASSERT_DOUBLES_EQUAL(aicPoints.calculate(), - aicClusters.calculate(), - 1e-10 * aicPoints.calculate()); + CPPUNIT_ASSERT_DOUBLES_EQUAL(aicPoints.calculate(), aicClusters.calculate(), 1e-10 * aicPoints.calculate()); } } -void CInformationCriteriaTest::testGaussian() -{ +void CInformationCriteriaTest::testGaussian() { LOG_DEBUG("+------------------------------------------+"); LOG_DEBUG("| CInformationCriteriaTest::testGaussian |"); LOG_DEBUG("+------------------------------------------+"); @@ -310,12 +271,11 @@ void CInformationCriteriaTest::testGaussian() maths::CSampling::seed(); { - double mean_[] = { 10.0, 20.0 }; - double lowerTriangle[] = { 5.0, 1.0, 5.0 }; + double mean_[] = {10.0, 20.0}; + double lowerTriangle[] = {5.0, 1.0, 5.0}; TVector2 mean(boost::begin(mean_), boost::end(mean_)); - TMatrix2 covariance(boost::begin(lowerTriangle), - boost::end(lowerTriangle)); + TMatrix2 covariance(boost::begin(lowerTriangle), boost::end(lowerTriangle)); LOG_DEBUG("mean = " << mean); LOG_DEBUG("covariance = " << covariance); @@ -326,37 +286,30 @@ void CInformationCriteriaTest::testGaussian() double upper = maths::information_criteria_detail::confidence(n - 1.0); double likelihood = 0.0; - for (std::size_t i = 0u; i < samples.size(); ++i) - { - likelihood += -2.0 * logfGaussian(mean, covariance, samples[i]) - + 2.0 * std::log(upper); + for (std::size_t i = 0u; i < samples.size(); ++i) { + likelihood += -2.0 * logfGaussian(mean, covariance, samples[i]) + 2.0 * std::log(upper); } double expectedAICc = likelihood + 10.0 + 30.0 / (n - 6.0); - double expectedBIC = likelihood + 5.0 * std::log(n); + double expectedBIC = likelihood + 5.0 * std::log(n); maths::CGaussianInfoCriterion bic; bic.add(samples); LOG_DEBUG("expected BIC = " << expectedBIC); LOG_DEBUG("BIC = " << bic.calculate()); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedBIC, - bic.calculate(), - 2e-3 * expectedBIC); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedBIC, bic.calculate(), 2e-3 * expectedBIC); maths::CGaussianInfoCriterion aic; aic.add(samples); LOG_DEBUG("expected AICc = " << expectedAICc); LOG_DEBUG("AICc = " << aic.calculate()); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedAICc, - aic.calculate(), - 2e-3 * expectedAICc); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedAICc, aic.calculate(), 2e-3 * expectedAICc); } { - double mean_[] = { -5.0, 30.0, 2.0, 7.9 }; - double lowerTriangle[] = { 8.0, 1.0, 8.0, 0.0, 0.0, 8.0, 0.0, 2.0, 0.5, 8.0 }; + double mean_[] = {-5.0, 30.0, 2.0, 7.9}; + double lowerTriangle[] = {8.0, 1.0, 8.0, 0.0, 0.0, 8.0, 0.0, 2.0, 0.5, 8.0}; TVector4 mean(boost::begin(mean_), boost::end(mean_)); - TMatrix4 covariance(boost::begin(lowerTriangle), - boost::end(lowerTriangle)); + TMatrix4 covariance(boost::begin(lowerTriangle), boost::end(lowerTriangle)); LOG_DEBUG("mean = " << mean); LOG_DEBUG("covariance = " << covariance); @@ -367,29 +320,23 @@ void CInformationCriteriaTest::testGaussian() double upper = maths::information_criteria_detail::confidence(n - 1.0); double likelihood = 0.0; - for (std::size_t i = 0u; i < samples.size(); ++i) - { - likelihood += -2.0 * logfGaussian(mean, covariance, samples[i]) - + 4.0 * std::log(upper); + for (std::size_t i = 0u; i < samples.size(); ++i) { + likelihood += -2.0 * logfGaussian(mean, covariance, samples[i]) + 4.0 * std::log(upper); } double expectedAICc = likelihood + 28.0 + 210.0 / (n - 15.0); - double expectedBIC = likelihood + 14.0 * std::log(n); + double expectedBIC = likelihood + 14.0 * std::log(n); maths::CGaussianInfoCriterion bic; bic.add(samples); LOG_DEBUG("expected BIC = " << expectedBIC); LOG_DEBUG("BIC = " << bic.calculate()); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedBIC, - bic.calculate(), - 2e-3 * expectedBIC); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedBIC, bic.calculate(), 2e-3 * expectedBIC); maths::CGaussianInfoCriterion aic; aic.add(samples); LOG_DEBUG("expected AICc = " << expectedAICc); LOG_DEBUG("AICc = " << aic.calculate()); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedAICc, - aic.calculate(), - 2e-3 * expectedAICc); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedAICc, aic.calculate(), 2e-3 * expectedAICc); } // Check that they correctly distinguish the best fit model. @@ -397,23 +344,21 @@ void CInformationCriteriaTest::testGaussian() test::CRandomNumbers rng; { - double mean_[] = { 15.0, 27.0 }; - double lowerTriangle[] = { 40.0, 5.0, 40.0 }; + double mean_[] = {15.0, 27.0}; + double lowerTriangle[] = {40.0, 5.0, 40.0}; TVector2 mean(boost::begin(mean_), boost::end(mean_)); - TMatrix2 covariance(boost::begin(lowerTriangle), - boost::end(lowerTriangle)); + TMatrix2 covariance(boost::begin(lowerTriangle), boost::end(lowerTriangle)); LOG_DEBUG("mean = " << mean); LOG_DEBUG("covariance = " << covariance); TVector2Vec samples; maths::CSampling::multivariateNormalSample(mean, covariance, 1000, samples); - maths::CSphericalGaussianInfoCriterion bic1(samples); + maths::CSphericalGaussianInfoCriterion bic1(samples); maths::CSphericalGaussianInfoCriterion aic1(samples); - for (std::size_t t = 0u; t < 100; ++t) - { + for (std::size_t t = 0u; t < 100; ++t) { rng.random_shuffle(samples.begin(), samples.end()); TSizeVec split; @@ -422,7 +367,7 @@ void CInformationCriteriaTest::testGaussian() TVector2Vec samples1(&samples[0], &samples[split[0]]); TVector2Vec samples2(&samples[split[0]], &samples[999]); - maths::CGaussianInfoCriterion bic2(samples); + maths::CGaussianInfoCriterion bic2(samples); maths::CGaussianInfoCriterion aic2(samples); bic2.add(samples1); bic2.add(samples2); @@ -443,16 +388,15 @@ void CInformationCriteriaTest::testGaussian() maths::CKMeansFast kmeans; kmeans.setPoints(samples); - for (std::size_t t = 0u; t < centres.size(); t += 2) - { - TVector2Vec tcentres(¢res[t], ¢res[t+2]); + for (std::size_t t = 0u; t < centres.size(); t += 2) { + TVector2Vec tcentres(¢res[t], ¢res[t + 2]); kmeans.setCentres(tcentres); kmeans.run(10); TVector2VecVec clusters; kmeans.clusters(clusters); - maths::CSphericalGaussianInfoCriterion bic2(clusters); + maths::CSphericalGaussianInfoCriterion bic2(clusters); maths::CSphericalGaussianInfoCriterion aic2(clusters); LOG_DEBUG("1 cluster BIC = " << bic1.calculate()); @@ -466,8 +410,7 @@ void CInformationCriteriaTest::testGaussian() } } -void CInformationCriteriaTest::testGaussianWithSphericalCluster() -{ +void CInformationCriteriaTest::testGaussianWithSphericalCluster() { LOG_DEBUG("+--------------------------------------------------------------+"); LOG_DEBUG("| CInformationCriteriaTest::testGaussianWithSphericalCluster |"); LOG_DEBUG("+--------------------------------------------------------------+"); @@ -478,31 +421,27 @@ void CInformationCriteriaTest::testGaussianWithSphericalCluster() maths::CSampling::seed(); - double means_[][2] = { { 10.0, 20.0 }, { 12.0, 30.0 } }; - double lowerTriangle[] = { 5.0, 0.0, 5.0 }; + double means_[][2] = {{10.0, 20.0}, {12.0, 30.0}}; + double lowerTriangle[] = {5.0, 0.0, 5.0}; TVector2Vec means; - for (std::size_t i = 0u; i < boost::size(means_); ++i) - { + for (std::size_t i = 0u; i < boost::size(means_); ++i) { means.push_back(TVector2(boost::begin(means_[i]), boost::end(means_[i]))); } - TMatrix2 covariance(boost::begin(lowerTriangle), - boost::end(lowerTriangle)); + TMatrix2 covariance(boost::begin(lowerTriangle), boost::end(lowerTriangle)); LOG_DEBUG("means = " << core::CContainerPrinter::print(means)); LOG_DEBUG("covariance = " << covariance); - for (std::size_t t = 0u; t < 10; ++t) - { - LOG_DEBUG("*** trial = " << t+1 << " ***"); + for (std::size_t t = 0u; t < 10; ++t) { + LOG_DEBUG("*** trial = " << t + 1 << " ***"); TVector2VecVec points(means.size()); TSphericalCluster2VecVec clusters; - for (std::size_t i = 0u; i < means.size(); ++i) - { + for (std::size_t i = 0u; i < means.size(); ++i) { maths::CSampling::multivariateNormalSample(means[i], covariance, 1000, points[i]); TMeanVar2Accumulator moments; moments.add(points[i]); - double n = maths::CBasicStatistics::count(moments); + double n = maths::CBasicStatistics::count(moments); TVector2 m = maths::CBasicStatistics::mean(moments); TVector2 v = maths::CBasicStatistics::maximumLikelihoodVariance(moments); TSphericalCluster2::TAnnotation countAndVariance(n, (v(0) + v(1)) / 2.0); @@ -517,9 +456,7 @@ void CInformationCriteriaTest::testGaussianWithSphericalCluster() LOG_DEBUG("BIC points = " << bicPoints.calculate()); LOG_DEBUG("BIC clusters = " << bicClusters.calculate()); - CPPUNIT_ASSERT_DOUBLES_EQUAL(bicPoints.calculate(), - bicClusters.calculate(), - 2e-3 * bicPoints.calculate()); + CPPUNIT_ASSERT_DOUBLES_EQUAL(bicPoints.calculate(), bicClusters.calculate(), 2e-3 * bicPoints.calculate()); maths::CGaussianInfoCriterion aicPoints; aicPoints.add(points); @@ -527,28 +464,22 @@ void CInformationCriteriaTest::testGaussianWithSphericalCluster() aicClusters.add(clusters); LOG_DEBUG("AICc points = " << aicPoints.calculate()); LOG_DEBUG("AICc clusters = " << aicClusters.calculate()); - CPPUNIT_ASSERT_DOUBLES_EQUAL(aicPoints.calculate(), - aicClusters.calculate(), - 2e-3 * aicPoints.calculate()); + CPPUNIT_ASSERT_DOUBLES_EQUAL(aicPoints.calculate(), aicClusters.calculate(), 2e-3 * aicPoints.calculate()); } } -CppUnit::Test *CInformationCriteriaTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CInformationCriteriaTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CInformationCriteriaTest::testSphericalGaussian", - &CInformationCriteriaTest::testSphericalGaussian) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CInformationCriteriaTest::testSphericalGaussianWithSphericalCluster", - &CInformationCriteriaTest::testSphericalGaussianWithSphericalCluster) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CInformationCriteriaTest::testGaussian", - &CInformationCriteriaTest::testGaussian) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CInformationCriteriaTest::testGaussianWithSphericalCluster", - &CInformationCriteriaTest::testGaussianWithSphericalCluster) ); +CppUnit::Test* CInformationCriteriaTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CInformationCriteriaTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CInformationCriteriaTest::testSphericalGaussian", + &CInformationCriteriaTest::testSphericalGaussian)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CInformationCriteriaTest::testSphericalGaussianWithSphericalCluster", + &CInformationCriteriaTest::testSphericalGaussianWithSphericalCluster)); + suiteOfTests->addTest(new CppUnit::TestCaller("CInformationCriteriaTest::testGaussian", + &CInformationCriteriaTest::testGaussian)); + suiteOfTests->addTest(new CppUnit::TestCaller("CInformationCriteriaTest::testGaussianWithSphericalCluster", + &CInformationCriteriaTest::testGaussianWithSphericalCluster)); return suiteOfTests; } diff --git a/lib/maths/unittest/CInformationCriteriaTest.h b/lib/maths/unittest/CInformationCriteriaTest.h index 3ef2e290bd..435cbd0abc 100644 --- a/lib/maths/unittest/CInformationCriteriaTest.h +++ b/lib/maths/unittest/CInformationCriteriaTest.h @@ -9,15 +9,14 @@ #include -class CInformationCriteriaTest : public CppUnit::TestFixture -{ - public: - void testSphericalGaussian(); - void testSphericalGaussianWithSphericalCluster(); - void testGaussian(); - void testGaussianWithSphericalCluster(); +class CInformationCriteriaTest : public CppUnit::TestFixture { +public: + void testSphericalGaussian(); + void testSphericalGaussianWithSphericalCluster(); + void testGaussian(); + void testGaussianWithSphericalCluster(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CInformationCriteriaTest_h diff --git a/lib/maths/unittest/CIntegerToolsTest.cc b/lib/maths/unittest/CIntegerToolsTest.cc index 520320efab..25cf7ba708 100644 --- a/lib/maths/unittest/CIntegerToolsTest.cc +++ b/lib/maths/unittest/CIntegerToolsTest.cc @@ -21,28 +21,22 @@ using namespace ml; -namespace -{ +namespace { using TSizeVec = std::vector; -std::string printBits(uint64_t x) -{ +std::string printBits(uint64_t x) { std::string result(64, '0'); - for (std::size_t i = 0u; i < 64; ++i, x >>= 1) - { - if (x & 0x1) - { + for (std::size_t i = 0u; i < 64; ++i, x >>= 1) { + if (x & 0x1) { result[63 - i] = '1'; } } return result; } - } -void CIntegerToolsTest::testNextPow2() -{ +void CIntegerToolsTest::testNextPow2() { LOG_DEBUG("+-----------------------------------+"); LOG_DEBUG("| CIntegerToolsTest::testNextPow2 |"); LOG_DEBUG("+-----------------------------------+"); @@ -51,10 +45,7 @@ void CIntegerToolsTest::testNextPow2() test::CRandomNumbers rng; - for (std::size_t test = 1u, shift = 1u; - test < (std::numeric_limits::max() >> 1) + 1; - test <<= 1, ++shift) - { + for (std::size_t test = 1u, shift = 1u; test < (std::numeric_limits::max() >> 1) + 1; test <<= 1, ++shift) { LOG_DEBUG("Testing shift = " << shift); // Edge cases. @@ -63,15 +54,13 @@ void CIntegerToolsTest::testNextPow2() TSizeVec offsets; rng.generateUniformSamples(0, test, 100, offsets); - for (std::size_t i = 0u; i < offsets.size(); ++i) - { + for (std::size_t i = 0u; i < offsets.size(); ++i) { CPPUNIT_ASSERT_EQUAL(shift, maths::CIntegerTools::nextPow2(test + offsets[i])); } } } -void CIntegerToolsTest::testReverseBits() -{ +void CIntegerToolsTest::testReverseBits() { LOG_DEBUG("+--------------------------------------+"); LOG_DEBUG("| CIntegerToolsTest::testReverseBits |"); LOG_DEBUG("+--------------------------------------+"); @@ -83,14 +72,12 @@ void CIntegerToolsTest::testReverseBits() std::string expected; std::string actual; - for (std::size_t i = 0u; i < values.size(); ++i) - { + for (std::size_t i = 0u; i < values.size(); ++i) { uint64_t x = static_cast(values[i]); expected = printBits(x); std::reverse(expected.begin(), expected.end()); actual = printBits(maths::CIntegerTools::reverseBits(x)); - if (i % 500 == 0) - { + if (i % 500 == 0) { LOG_DEBUG("expected = " << expected); LOG_DEBUG("actual = " << actual); } @@ -98,8 +85,7 @@ void CIntegerToolsTest::testReverseBits() } } -void CIntegerToolsTest::testGcd() -{ +void CIntegerToolsTest::testGcd() { LOG_DEBUG("+------------------------------+"); LOG_DEBUG("| CIntegerToolsTest::testGcd |"); LOG_DEBUG("+------------------------------+"); @@ -120,9 +106,8 @@ void CIntegerToolsTest::testGcd() test::CRandomNumbers rng; LOG_DEBUG("--- gcd(a, b) ---"); - std::size_t primes[] = { 2, 3, 5, 7, 11, 13, 17, 19, 29, 97, 821, 5851, 7877 }; - for (std::size_t i = 0u; i < 1000; ++i) - { + std::size_t primes[] = {2, 3, 5, 7, 11, 13, 17, 19, 29, 97, 821, 5851, 7877}; + for (std::size_t i = 0u; i < 1000; ++i) { TSizeVec indices; TSizeVec split; rng.generateUniformSamples(0, boost::size(primes), 7, indices); @@ -131,37 +116,30 @@ void CIntegerToolsTest::testGcd() std::sort(indices.begin() + split[0], indices.end()); TSizeVec cf; - std::set_intersection(indices.begin(), indices.begin() + split[0], - indices.begin() + split[0], indices.end(), - std::back_inserter(cf)); + std::set_intersection( + indices.begin(), indices.begin() + split[0], indices.begin() + split[0], indices.end(), std::back_inserter(cf)); // Use 64 bit integers here otherwise overflow will occur in 32 bit code uint64_t bigGcd = 1; - for (std::size_t j = 0u; j < cf.size(); ++j) - { + for (std::size_t j = 0u; j < cf.size(); ++j) { bigGcd *= primes[cf[j]]; } uint64_t big1 = 1; - for (std::size_t j = 0u; j < split[0]; ++j) - { + for (std::size_t j = 0u; j < split[0]; ++j) { big1 *= primes[indices[j]]; } uint64_t big2 = 1; - for (std::size_t j = split[0]; j < indices.size(); ++j) - { + for (std::size_t j = split[0]; j < indices.size(); ++j) { big2 *= primes[indices[j]]; } - LOG_DEBUG("big1 = " << big1 - << ", big2 = " << big2 - << " - expected gcd = " << bigGcd - << ", gcd = " << maths::CIntegerTools::gcd(big1, big2)); + LOG_DEBUG("big1 = " << big1 << ", big2 = " << big2 << " - expected gcd = " << bigGcd + << ", gcd = " << maths::CIntegerTools::gcd(big1, big2)); CPPUNIT_ASSERT_EQUAL(bigGcd, maths::CIntegerTools::gcd(big1, big2)); } LOG_DEBUG("--- gcd(a, b, c) ---"); - for (std::size_t i = 0u; i < 1000; ++i) - { + for (std::size_t i = 0u; i < 1000; ++i) { TSizeVec indices; rng.generateUniformSamples(0, 10, 9, indices); std::sort(indices.begin(), indices.begin() + 3); @@ -169,37 +147,26 @@ void CIntegerToolsTest::testGcd() std::sort(indices.begin() + 6, indices.end()); TSizeVec cf; - std::set_intersection(indices.begin(), indices.begin() + 3, - indices.begin() + 3, indices.begin() + 6, - std::back_inserter(cf)); + std::set_intersection(indices.begin(), indices.begin() + 3, indices.begin() + 3, indices.begin() + 6, std::back_inserter(cf)); TSizeVec tmp; - std::set_intersection(cf.begin(), cf.end(), - indices.begin() + 6, indices.end(), - std::back_inserter(tmp)); + std::set_intersection(cf.begin(), cf.end(), indices.begin() + 6, indices.end(), std::back_inserter(tmp)); cf.swap(tmp); std::size_t gcd = 1; - for (std::size_t j = 0u; j < cf.size(); ++j) - { + for (std::size_t j = 0u; j < cf.size(); ++j) { gcd *= primes[cf[j]]; } TSizeVec n(3, 1); - for (std::size_t j = 0u; j < 3; ++j) - { + for (std::size_t j = 0u; j < 3; ++j) { n[0] *= primes[indices[j]]; } - for (std::size_t j = 3; j < 6; ++j) - { + for (std::size_t j = 3; j < 6; ++j) { n[1] *= primes[indices[j]]; } - for (std::size_t j = 6; j < indices.size(); ++j) - { + for (std::size_t j = 6; j < indices.size(); ++j) { n[2] *= primes[indices[j]]; } - LOG_DEBUG("n = " << core::CContainerPrinter::print(n) - << " - expected gcd = " << gcd - << ", gcd = " << maths::CIntegerTools::gcd(n)); - + LOG_DEBUG("n = " << core::CContainerPrinter::print(n) << " - expected gcd = " << gcd << ", gcd = " << maths::CIntegerTools::gcd(n)); } LOG_DEBUG("--- gcd(a, b, c, d) ---"); @@ -208,50 +175,38 @@ void CIntegerToolsTest::testGcd() n[1] = 19 * 97; n[2] = 17 * 19 * 83; n[3] = 17 * 19 * 29 * 83; - LOG_DEBUG("n = " << core::CContainerPrinter::print(n) - << " - expected gcd = 19" - << ", gcd = " << maths::CIntegerTools::gcd(n)); + LOG_DEBUG("n = " << core::CContainerPrinter::print(n) << " - expected gcd = 19" + << ", gcd = " << maths::CIntegerTools::gcd(n)); CPPUNIT_ASSERT_EQUAL(std::size_t(19), maths::CIntegerTools::gcd(n)); } -void CIntegerToolsTest::testBinomial() -{ +void CIntegerToolsTest::testBinomial() { LOG_DEBUG("+-----------------------------------+"); LOG_DEBUG("| CIntegerToolsTest::testBinomial |"); LOG_DEBUG("+-----------------------------------+"); - unsigned int n[] = { 1u, 2u, 5u, 7u, 10u }; + unsigned int n[] = {1u, 2u, 5u, 7u, 10u}; - for (std::size_t i = 0u; i < boost::size(n); ++i) - { - for (unsigned int j = 0u; j <= n[i]; ++j) - { - LOG_DEBUG("j = " << j << ", n = " << n[i] - << ", (n j) = " << maths::CIntegerTools::binomial(n[i], j)); + for (std::size_t i = 0u; i < boost::size(n); ++i) { + for (unsigned int j = 0u; j <= n[i]; ++j) { + LOG_DEBUG("j = " << j << ", n = " << n[i] << ", (n j) = " << maths::CIntegerTools::binomial(n[i], j)); - double expected = std::exp( boost::math::lgamma(static_cast(n[i]+1)) - - boost::math::lgamma(static_cast(n[i]-j+1)) - - boost::math::lgamma(static_cast(j+1))); + double expected = + std::exp(boost::math::lgamma(static_cast(n[i] + 1)) - boost::math::lgamma(static_cast(n[i] - j + 1)) - + boost::math::lgamma(static_cast(j + 1))); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, maths::CIntegerTools::binomial(n[i], j), 1e-10); } } } -CppUnit::Test *CIntegerToolsTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CIntegerToolsTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CIntegerToolsTest::testNextPow2", - &CIntegerToolsTest::testNextPow2) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CIntegerToolsTest::testReverseBits", - &CIntegerToolsTest::testReverseBits) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CIntegerToolsTest::testGcd", - &CIntegerToolsTest::testGcd) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CIntegerToolsTest::testBinomial", - &CIntegerToolsTest::testBinomial) ); - - return suiteOfTests;} +CppUnit::Test* CIntegerToolsTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CIntegerToolsTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CIntegerToolsTest::testNextPow2", &CIntegerToolsTest::testNextPow2)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CIntegerToolsTest::testReverseBits", &CIntegerToolsTest::testReverseBits)); + suiteOfTests->addTest(new CppUnit::TestCaller("CIntegerToolsTest::testGcd", &CIntegerToolsTest::testGcd)); + suiteOfTests->addTest(new CppUnit::TestCaller("CIntegerToolsTest::testBinomial", &CIntegerToolsTest::testBinomial)); + + return suiteOfTests; +} diff --git a/lib/maths/unittest/CIntegerToolsTest.h b/lib/maths/unittest/CIntegerToolsTest.h index 033f67b48f..a924cc3113 100644 --- a/lib/maths/unittest/CIntegerToolsTest.h +++ b/lib/maths/unittest/CIntegerToolsTest.h @@ -9,15 +9,14 @@ #include -class CIntegerToolsTest : public CppUnit::TestFixture -{ - public: - void testNextPow2(); - void testReverseBits(); - void testGcd(); - void testBinomial(); +class CIntegerToolsTest : public CppUnit::TestFixture { +public: + void testNextPow2(); + void testReverseBits(); + void testGcd(); + void testBinomial(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CIntegerToolsTest_h diff --git a/lib/maths/unittest/CIntegrationTest.cc b/lib/maths/unittest/CIntegrationTest.cc index ed099653c2..d959923bd9 100644 --- a/lib/maths/unittest/CIntegrationTest.cc +++ b/lib/maths/unittest/CIntegrationTest.cc @@ -26,67 +26,47 @@ using namespace ml; using namespace maths; -namespace -{ +namespace { template -class CPolynomialFunction : public std::unary_function -{ - public: - CPolynomialFunction(const double (&coefficients)[ORDER + 1]) - { - std::copy(coefficients, - coefficients + ORDER + 1, - m_Coefficients); - } - - bool operator()(const double &x, double &result) const - { - result = 0.0; - for (unsigned int i = 0u; i < ORDER + 1; ++i) - { - result += m_Coefficients[i] * std::pow(x, static_cast(i)); - } - return true; +class CPolynomialFunction : public std::unary_function { +public: + CPolynomialFunction(const double (&coefficients)[ORDER + 1]) { std::copy(coefficients, coefficients + ORDER + 1, m_Coefficients); } + + bool operator()(const double& x, double& result) const { + result = 0.0; + for (unsigned int i = 0u; i < ORDER + 1; ++i) { + result += m_Coefficients[i] * std::pow(x, static_cast(i)); } + return true; + } - const double &coefficient(unsigned int i) const - { - return m_Coefficients[i]; - } + const double& coefficient(unsigned int i) const { return m_Coefficients[i]; } - private: - double m_Coefficients[ORDER + 1]; +private: + double m_Coefficients[ORDER + 1]; }; -std::ostream &operator<<(std::ostream &o, const CPolynomialFunction<0u> &f) -{ +std::ostream& operator<<(std::ostream& o, const CPolynomialFunction<0u>& f) { return o << f.coefficient(0); } template -std::ostream &operator<<(std::ostream &o, const CPolynomialFunction &f) -{ +std::ostream& operator<<(std::ostream& o, const CPolynomialFunction& f) { o << f.coefficient(0) << " + "; - for (unsigned int i = 1u; i < ORDER; ++i) - { + for (unsigned int i = 1u; i < ORDER; ++i) { o << f.coefficient(i) << "x^" << i << " + "; } - if (ORDER > 0) - { + if (ORDER > 0) { o << f.coefficient(ORDER) << "x^" << ORDER; } return o; } template -double integrate(const CPolynomialFunction &f, - const double &a, - const double &b) -{ +double integrate(const CPolynomialFunction& f, const double& a, const double& b) { double result = 0.0; - for (unsigned int i = 0; i < ORDER + 1; ++i) - { + for (unsigned int i = 0; i < ORDER + 1; ++i) { double n = static_cast(i) + 1.0; result += f.coefficient(i) / n * (std::pow(b, n) - std::pow(a, n)); } @@ -94,83 +74,64 @@ double integrate(const CPolynomialFunction &f, } template -class CMultivariatePolynomialFunction -{ - public: - using TVector = CVectorNx1; - - struct SMonomial - { - bool operator<(const SMonomial &rhs) const - { - return std::accumulate(s_Powers, s_Powers + DIMENSION, 0.0) - < std::accumulate(rhs.s_Powers, rhs.s_Powers + DIMENSION, 0.0); - } - double s_Coefficient; - double s_Powers[DIMENSION]; - }; - - using TMonomialVec = std::vector; - - public: - void add(double coefficient, double powers[DIMENSION]) - { - m_Terms.push_back(SMonomial()); - m_Terms.back().s_Coefficient = coefficient; - std::copy(powers, powers + DIMENSION, m_Terms.back().s_Powers); - } +class CMultivariatePolynomialFunction { +public: + using TVector = CVectorNx1; - void finalize() - { - std::sort(m_Terms.begin(), m_Terms.end()); + struct SMonomial { + bool operator<(const SMonomial& rhs) const { + return std::accumulate(s_Powers, s_Powers + DIMENSION, 0.0) < std::accumulate(rhs.s_Powers, rhs.s_Powers + DIMENSION, 0.0); } + double s_Coefficient; + double s_Powers[DIMENSION]; + }; - bool operator()(const TVector &x, double &result) const - { - result = 0.0; - for (std::size_t i = 0u; i < m_Terms.size(); ++i) - { - const SMonomial &monomial = m_Terms[i]; - double term = monomial.s_Coefficient; - for (unsigned int j = 0u; j < DIMENSION; ++j) - { - if (monomial.s_Powers[j] > 0.0) - { - term *= std::pow(x(j), monomial.s_Powers[j]); - } + using TMonomialVec = std::vector; + +public: + void add(double coefficient, double powers[DIMENSION]) { + m_Terms.push_back(SMonomial()); + m_Terms.back().s_Coefficient = coefficient; + std::copy(powers, powers + DIMENSION, m_Terms.back().s_Powers); + } + + void finalize() { std::sort(m_Terms.begin(), m_Terms.end()); } + + bool operator()(const TVector& x, double& result) const { + result = 0.0; + for (std::size_t i = 0u; i < m_Terms.size(); ++i) { + const SMonomial& monomial = m_Terms[i]; + double term = monomial.s_Coefficient; + for (unsigned int j = 0u; j < DIMENSION; ++j) { + if (monomial.s_Powers[j] > 0.0) { + term *= std::pow(x(j), monomial.s_Powers[j]); } - result += term; } - return true; + result += term; } + return true; + } - const TMonomialVec &terms() const { return m_Terms; } + const TMonomialVec& terms() const { return m_Terms; } - private: - TMonomialVec m_Terms; +private: + TMonomialVec m_Terms; }; template -std::ostream &operator<<(std::ostream &o, const CMultivariatePolynomialFunction &f) -{ - if (!f.terms().empty()) - { +std::ostream& operator<<(std::ostream& o, const CMultivariatePolynomialFunction& f) { + if (!f.terms().empty()) { o << (f.terms())[0].s_Coefficient; - for (unsigned int j = 0u; j < DIMENSION; ++j) - { - if ((f.terms())[0].s_Powers[j] > 0.0) - { + for (unsigned int j = 0u; j < DIMENSION; ++j) { + if ((f.terms())[0].s_Powers[j] > 0.0) { o << ".x" << j << "^" << (f.terms())[0].s_Powers[j]; } } } - for (std::size_t i = 1u; i < f.terms().size(); ++i) - { + for (std::size_t i = 1u; i < f.terms().size(); ++i) { o << " + " << (f.terms())[i].s_Coefficient; - for (unsigned int j = 0u; j < DIMENSION; ++j) - { - if ((f.terms())[i].s_Powers[j] > 0.0) - { + for (unsigned int j = 0u; j < DIMENSION; ++j) { + if ((f.terms())[i].s_Powers[j] > 0.0) { o << ".x" << j << "^" << (f.terms())[i].s_Powers[j]; } } @@ -181,16 +142,11 @@ std::ostream &operator<<(std::ostream &o, const CMultivariatePolynomialFunction< using TDoubleVec = std::vector; template -double integrate(const CMultivariatePolynomialFunction &f, - const TDoubleVec &a, - const TDoubleVec &b) -{ +double integrate(const CMultivariatePolynomialFunction& f, const TDoubleVec& a, const TDoubleVec& b) { double result = 0.0; - for (std::size_t i = 0u; i < f.terms().size(); ++i) - { + for (std::size_t i = 0u; i < f.terms().size(); ++i) { double term = (f.terms())[i].s_Coefficient; - for (unsigned int j = 0; j < DIMENSION; ++j) - { + for (unsigned int j = 0; j < DIMENSION; ++j) { double n = (f.terms())[i].s_Powers[j] + 1.0; term *= (std::pow(b[j], n) - std::pow(a[j], n)) / n; } @@ -199,37 +155,29 @@ double integrate(const CMultivariatePolynomialFunction &f, return result; } - using TDoubleVecVec = std::vector; -bool readGrid(const std::string &file, - TDoubleVec &weights, - TDoubleVecVec &points) -{ +bool readGrid(const std::string& file, TDoubleVec& weights, TDoubleVecVec& points) { using TStrVec = std::vector; std::ifstream d2_l1; d2_l1.open(file.c_str()); - if (!d2_l1) - { + if (!d2_l1) { LOG_ERROR("Bad file: " << file); return false; } std::string line; - while (std::getline(d2_l1, line)) - { + while (std::getline(d2_l1, line)) { TStrVec point; std::string weight; core::CStringUtils::tokenise(", ", line, point, weight); core::CStringUtils::trimWhitespace(weight); points.push_back(TDoubleVec()); - for (std::size_t i = 0u; i < point.size(); ++i) - { + for (std::size_t i = 0u; i < point.size(); ++i) { core::CStringUtils::trimWhitespace(point[i]); double xi; - if (!core::CStringUtils::stringToType(point[i], xi)) - { + if (!core::CStringUtils::stringToType(point[i], xi)) { LOG_ERROR("Bad point: " << core::CContainerPrinter::print(point)); return false; } @@ -237,8 +185,7 @@ bool readGrid(const std::string &file, } double w; - if (!core::CStringUtils::stringToType(weight, w)) - { + if (!core::CStringUtils::stringToType(weight, w)) { LOG_ERROR("Bad weight: " << weight); return false; } @@ -248,62 +195,46 @@ bool readGrid(const std::string &file, return true; } -class CSmoothHeavySide -{ - public: - using result_type = double; +class CSmoothHeavySide { +public: + using result_type = double; - public: - CSmoothHeavySide(double slope, double offset) : - m_Slope(slope), - m_Offset(offset) - { - } +public: + CSmoothHeavySide(double slope, double offset) : m_Slope(slope), m_Offset(offset) {} - bool operator()(double x, double &result) const - { - result = std::exp(m_Slope * (x - m_Offset)) - / (std::exp(m_Slope * (x - m_Offset)) + 1.0); - return true; - } + bool operator()(double x, double& result) const { + result = std::exp(m_Slope * (x - m_Offset)) / (std::exp(m_Slope * (x - m_Offset)) + 1.0); + return true; + } - private: - double m_Slope; - double m_Offset; +private: + double m_Slope; + double m_Offset; }; -class CNormal -{ - public: - using result_type = double; +class CNormal { +public: + using result_type = double; - public: - CNormal(double mean, double std) : - m_Mean(mean), - m_Std(std) - { - } +public: + CNormal(double mean, double std) : m_Mean(mean), m_Std(std) {} - bool operator()(double x, double &result) const - { - if (m_Std <= 0.0) - { - return false; - } - boost::math::normal_distribution<> normal(m_Mean, m_Std); - result = boost::math::pdf(normal, x); - return true; + bool operator()(double x, double& result) const { + if (m_Std <= 0.0) { + return false; } + boost::math::normal_distribution<> normal(m_Mean, m_Std); + result = boost::math::pdf(normal, x); + return true; + } - private: - double m_Mean; - double m_Std; +private: + double m_Mean; + double m_Std; }; - } -void CIntegrationTest::testAllSingleVariate() -{ +void CIntegrationTest::testAllSingleVariate() { LOG_DEBUG("+-------------------------------------------+"); LOG_DEBUG("| CIntegerToolsTest::testAllSingleVariate |"); LOG_DEBUG("+-------------------------------------------+"); @@ -325,553 +256,410 @@ void CIntegrationTest::testAllSingleVariate() static const double EPS = 1e-6; - double ranges[][2] = - { - { -3.0, -1.0 }, - { -1.0, 5.0 }, - { 0.0, 8.0 } - }; + double ranges[][2] = {{-3.0, -1.0}, {-1.0, 5.0}, {0.0, 8.0}}; { - double coeffs[][1] = - { - { -3.2 }, - { 0.0 }, - { 1.0 }, - { 5.0 }, - { 12.1 } - }; - - for (unsigned int i = 0; i < sizeof(ranges)/sizeof(ranges[0]); ++i) - { - for (unsigned int j = 0; j < sizeof(coeffs)/sizeof(coeffs[0]); ++j) - { + double coeffs[][1] = {{-3.2}, {0.0}, {1.0}, {5.0}, {12.1}}; + + for (unsigned int i = 0; i < sizeof(ranges) / sizeof(ranges[0]); ++i) { + for (unsigned int j = 0; j < sizeof(coeffs) / sizeof(coeffs[0]); ++j) { TConstant f(coeffs[j]); LOG_DEBUG("range = [" << ranges[i][0] << "," << ranges[i][1] << "]" - << ", f(x) = " << f); + << ", f(x) = " << f); double expected = integrate(f, ranges[i][0], ranges[i][1]); double actual; - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); } } } { - double coeffs[][2] = - { - { -3.2, -1.2 }, - { 0.0, 2.0 }, - { 1.0, -1.0 }, - { 5.0, 6.4 }, - { 12.1, -8.3 } - }; - - for (unsigned int i = 0; i < sizeof(ranges)/sizeof(ranges[0]); ++i) - { - for (unsigned int j = 0; j < sizeof(coeffs)/sizeof(coeffs[0]); ++j) - { + double coeffs[][2] = {{-3.2, -1.2}, {0.0, 2.0}, {1.0, -1.0}, {5.0, 6.4}, {12.1, -8.3}}; + + for (unsigned int i = 0; i < sizeof(ranges) / sizeof(ranges[0]); ++i) { + for (unsigned int j = 0; j < sizeof(coeffs) / sizeof(coeffs[0]); ++j) { TLinear f(coeffs[j]); LOG_DEBUG("range = [" << ranges[i][0] << "," << ranges[i][1] << "]" - << ", f(x) = " << f); + << ", f(x) = " << f); double expected = integrate(f, ranges[i][0], ranges[i][1]); double actual; - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); } } } { - double coeffs[][3] = - { - { -3.2, -1.2, -3.5 }, - { 0.1, 2.0, 4.6 }, - { 1.0, -1.0, 1.0 }, - { 5.0, 6.4, -4.1 }, - { 12.1, -8.3, 10.1 } - }; - - for (unsigned int i = 0; i < sizeof(ranges)/sizeof(ranges[0]); ++i) - { - for (unsigned int j = 0; j < sizeof(coeffs)/sizeof(coeffs[0]); ++j) - { + double coeffs[][3] = {{-3.2, -1.2, -3.5}, {0.1, 2.0, 4.6}, {1.0, -1.0, 1.0}, {5.0, 6.4, -4.1}, {12.1, -8.3, 10.1}}; + + for (unsigned int i = 0; i < sizeof(ranges) / sizeof(ranges[0]); ++i) { + for (unsigned int j = 0; j < sizeof(coeffs) / sizeof(coeffs[0]); ++j) { TQuadratic f(coeffs[j]); LOG_DEBUG("range = [" << ranges[i][0] << "," << ranges[i][1] << "]" - << ", f(x) = " << f); + << ", f(x) = " << f); double expected = integrate(f, ranges[i][0], ranges[i][1]); double actual; - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); } } } { - double coeffs[][4] = - { - { -1.2, -1.9, -3.0, -3.2 }, - { 0.4, 2.0, 4.6, 2.3 }, - { 1.0, -1.0, 1.0, -1.0 }, - { 4.0, 2.4, -8.1, -2.1 }, - { 10.1, -6.3, 1.1, 8.3 } - }; - - for (unsigned int i = 0; i < sizeof(ranges)/sizeof(ranges[0]); ++i) - { - for (unsigned int j = 0; j < sizeof(coeffs)/sizeof(coeffs[0]); ++j) - { + double coeffs[][4] = { + {-1.2, -1.9, -3.0, -3.2}, {0.4, 2.0, 4.6, 2.3}, {1.0, -1.0, 1.0, -1.0}, {4.0, 2.4, -8.1, -2.1}, {10.1, -6.3, 1.1, 8.3}}; + + for (unsigned int i = 0; i < sizeof(ranges) / sizeof(ranges[0]); ++i) { + for (unsigned int j = 0; j < sizeof(coeffs) / sizeof(coeffs[0]); ++j) { TCubic f(coeffs[j]); LOG_DEBUG("range = [" << ranges[i][0] << "," << ranges[i][1] << "]" - << ", f(x) = " << f); + << ", f(x) = " << f); double expected = integrate(f, ranges[i][0], ranges[i][1]); double actual; - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); } } } { - double coeffs[][5] = - { - { -1.1, -0.9, -4.0, -1.2, -0.2 }, - { 20.4, 2.0, 4.6, 2.3, 0.7 }, - { 1.0, -1.0, 1.0, -1.0, 1.0 }, - { 4.0, 2.4, -8.1, -2.1, 1.4 }, - { 10.1, -6.3, 1.1, 8.3, -5.1 } - }; - - for (unsigned int i = 0; i < sizeof(ranges)/sizeof(ranges[0]); ++i) - { - for (unsigned int j = 0; j < sizeof(coeffs)/sizeof(coeffs[0]); ++j) - { + double coeffs[][5] = {{-1.1, -0.9, -4.0, -1.2, -0.2}, + {20.4, 2.0, 4.6, 2.3, 0.7}, + {1.0, -1.0, 1.0, -1.0, 1.0}, + {4.0, 2.4, -8.1, -2.1, 1.4}, + {10.1, -6.3, 1.1, 8.3, -5.1}}; + + for (unsigned int i = 0; i < sizeof(ranges) / sizeof(ranges[0]); ++i) { + for (unsigned int j = 0; j < sizeof(coeffs) / sizeof(coeffs[0]); ++j) { TQuartic f(coeffs[j]); LOG_DEBUG("range = [" << ranges[i][0] << "," << ranges[i][1] << "]" - << ", f(x) = " << f); + << ", f(x) = " << f); double expected = integrate(f, ranges[i][0], ranges[i][1]); double actual; - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); } } } { - double coeffs[][6] = - { - { -1.1, -0.9, -4.0, -1.2, -0.2, -1.1 }, - { 20.4, 6.0, 2.6, 0.3, 0.7, 2.3 }, - { 1.0, -1.0, 1.0, -1.0, 1.0, -1.0 }, - { 3.0, 2.4, -8.1, -2.1, 1.4, -3.1 }, - { 10.1, -5.3, 2.1, 4.3, -7.1, 0.4 } - }; - - for (unsigned int i = 0; i < sizeof(ranges)/sizeof(ranges[0]); ++i) - { - for (unsigned int j = 0; j < sizeof(coeffs)/sizeof(coeffs[0]); ++j) - { + double coeffs[][6] = {{-1.1, -0.9, -4.0, -1.2, -0.2, -1.1}, + {20.4, 6.0, 2.6, 0.3, 0.7, 2.3}, + {1.0, -1.0, 1.0, -1.0, 1.0, -1.0}, + {3.0, 2.4, -8.1, -2.1, 1.4, -3.1}, + {10.1, -5.3, 2.1, 4.3, -7.1, 0.4}}; + + for (unsigned int i = 0; i < sizeof(ranges) / sizeof(ranges[0]); ++i) { + for (unsigned int j = 0; j < sizeof(coeffs) / sizeof(coeffs[0]); ++j) { TQuintic f(coeffs[j]); LOG_DEBUG("range = [" << ranges[i][0] << "," << ranges[i][1] << "]" - << ", f(x) = " << f); + << ", f(x) = " << f); double expected = integrate(f, ranges[i][0], ranges[i][1]); double actual; - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); } } } { - double coeffs[][7] = - { - { -1.1, -0.9, -4.0, -1.2, -0.2, -1.1, -0.1 }, - { 20.4, 6.0, 2.6, 0.3, 0.7, 2.3, 1.0 }, - { 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0 }, - { 3.0, 2.4, -8.1, -2.1, 1.4, -3.1, 2.1 }, - { 10.1, -5.3, 2.1, 4.3, -7.1, 0.4, -0.5 } - }; - - for (unsigned int i = 0; i < sizeof(ranges)/sizeof(ranges[0]); ++i) - { - for (unsigned int j = 0; j < sizeof(coeffs)/sizeof(coeffs[0]); ++j) - { + double coeffs[][7] = {{-1.1, -0.9, -4.0, -1.2, -0.2, -1.1, -0.1}, + {20.4, 6.0, 2.6, 0.3, 0.7, 2.3, 1.0}, + {1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0}, + {3.0, 2.4, -8.1, -2.1, 1.4, -3.1, 2.1}, + {10.1, -5.3, 2.1, 4.3, -7.1, 0.4, -0.5}}; + + for (unsigned int i = 0; i < sizeof(ranges) / sizeof(ranges[0]); ++i) { + for (unsigned int j = 0; j < sizeof(coeffs) / sizeof(coeffs[0]); ++j) { THexic f(coeffs[j]); LOG_DEBUG("range = [" << ranges[i][0] << "," << ranges[i][1] << "]" - << ", f(x) = " << f); + << ", f(x) = " << f); double expected = integrate(f, ranges[i][0], ranges[i][1]); double actual; - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); } } } { - double coeffs[][8] = - { - { -1.1, -0.9, -4.0, -1.2, -0.2, -1.1, -0.1, -2.0 }, - { 20.4, 6.0, 2.6, 0.3, 0.7, 2.3, 1.0, 3.0 }, - { 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0 }, - { 3.0, 2.4, -8.1, -2.1, 1.4, -3.1, 2.1, -2.1 }, - { 10.1, -5.3, 2.1, 4.3, -7.1, 0.4, -0.5, 0.3 } - }; - - for (unsigned int i = 0; i < sizeof(ranges)/sizeof(ranges[0]); ++i) - { - for (unsigned int j = 0; j < sizeof(coeffs)/sizeof(coeffs[0]); ++j) - { + double coeffs[][8] = {{-1.1, -0.9, -4.0, -1.2, -0.2, -1.1, -0.1, -2.0}, + {20.4, 6.0, 2.6, 0.3, 0.7, 2.3, 1.0, 3.0}, + {1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0}, + {3.0, 2.4, -8.1, -2.1, 1.4, -3.1, 2.1, -2.1}, + {10.1, -5.3, 2.1, 4.3, -7.1, 0.4, -0.5, 0.3}}; + + for (unsigned int i = 0; i < sizeof(ranges) / sizeof(ranges[0]); ++i) { + for (unsigned int j = 0; j < sizeof(coeffs) / sizeof(coeffs[0]); ++j) { THeptic f(coeffs[j]); LOG_DEBUG("range = [" << ranges[i][0] << "," << ranges[i][1] << "]" - << ", f(x) = " << f); + << ", f(x) = " << f); double expected = integrate(f, ranges[i][0], ranges[i][1]); double actual; - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); } } } { - double coeffs[][9] = - { - { -1.1, -0.9, -4.0, -1.2, -0.2, -1.1, -0.1, -2.0, -0.1 }, - { 20.4, 6.0, 2.6, 0.3, 0.7, 2.3, 1.0, 3.0, 10.0 }, - { 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0 }, - { 3.0, 2.4, -8.1, -2.1, 1.4, -3.1, 2.1, -2.1, -1.0 }, - { 10.1, -5.3, 2.1, 4.3, -7.1, 0.4, -0.5, 0.3, 0.3 } - }; - - for (unsigned int i = 0; i < sizeof(ranges)/sizeof(ranges[0]); ++i) - { - for (unsigned int j = 0; j < sizeof(coeffs)/sizeof(coeffs[0]); ++j) - { + double coeffs[][9] = {{-1.1, -0.9, -4.0, -1.2, -0.2, -1.1, -0.1, -2.0, -0.1}, + {20.4, 6.0, 2.6, 0.3, 0.7, 2.3, 1.0, 3.0, 10.0}, + {1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0}, + {3.0, 2.4, -8.1, -2.1, 1.4, -3.1, 2.1, -2.1, -1.0}, + {10.1, -5.3, 2.1, 4.3, -7.1, 0.4, -0.5, 0.3, 0.3}}; + + for (unsigned int i = 0; i < sizeof(ranges) / sizeof(ranges[0]); ++i) { + for (unsigned int j = 0; j < sizeof(coeffs) / sizeof(coeffs[0]); ++j) { TOctic f(coeffs[j]); LOG_DEBUG("range = [" << ranges[i][0] << "," << ranges[i][1] << "]" - << ", f(x) = " << f); + << ", f(x) = " << f); double expected = integrate(f, ranges[i][0], ranges[i][1]); double actual; - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); } } } { - double coeffs[][10] = - { - { -1.1, -0.9, -4.0, -1.2, -0.2, -1.1, -0.1, -2.0, -0.1, -3.4 }, - { 20.4, 6.0, 2.6, 0.3, 0.7, 2.3, 1.0, 3.0, 10.0, 1.3 }, - { 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0 }, - { 3.0, 2.4, -8.1, -2.1, 1.4, -3.1, 2.1, -2.1, -1.0, 1.1 }, - { 10.1, -5.3, 2.1, 4.3, -7.1, 0.4, -0.5, 0.3, 0.3, -5.0 } - }; - - for (unsigned int i = 0; i < sizeof(ranges)/sizeof(ranges[0]); ++i) - { - for (unsigned int j = 0; j < sizeof(coeffs)/sizeof(coeffs[0]); ++j) - { + double coeffs[][10] = {{-1.1, -0.9, -4.0, -1.2, -0.2, -1.1, -0.1, -2.0, -0.1, -3.4}, + {20.4, 6.0, 2.6, 0.3, 0.7, 2.3, 1.0, 3.0, 10.0, 1.3}, + {1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0}, + {3.0, 2.4, -8.1, -2.1, 1.4, -3.1, 2.1, -2.1, -1.0, 1.1}, + {10.1, -5.3, 2.1, 4.3, -7.1, 0.4, -0.5, 0.3, 0.3, -5.0}}; + + for (unsigned int i = 0; i < sizeof(ranges) / sizeof(ranges[0]); ++i) { + for (unsigned int j = 0; j < sizeof(coeffs) / sizeof(coeffs[0]); ++j) { TNonic f(coeffs[j]); LOG_DEBUG("range = [" << ranges[i][0] << "," << ranges[i][1] << "]" - << ", f(x) = " << f); + << ", f(x) = " << f); double expected = integrate(f, ranges[i][0], ranges[i][1]); double actual; - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); } } } { - double coeffs[][11] = - { - { -1.1, -0.9, -4.0, -1.2, -0.2, -1.1, -0.1, -2.0, -0.1, -3.4, -0.9 }, - { 20.4, 6.0, 2.6, 0.3, 0.7, 2.3, 1.0, 3.0, 10.0, 1.3, 2.0 }, - { 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0 }, - { 3.0, 2.4, -8.1, -2.1, 1.4, -3.1, 2.1, -2.1, -1.0, 1.1, 3.1 }, - { 10.1, -5.3, 2.1, 4.3, -7.1, 0.4, -0.5, 0.3, 0.3, -5.0, -0.1 } - }; - - for (unsigned int i = 0; i < sizeof(ranges)/sizeof(ranges[0]); ++i) - { - for (unsigned int j = 0; j < sizeof(coeffs)/sizeof(coeffs[0]); ++j) - { + double coeffs[][11] = {{-1.1, -0.9, -4.0, -1.2, -0.2, -1.1, -0.1, -2.0, -0.1, -3.4, -0.9}, + {20.4, 6.0, 2.6, 0.3, 0.7, 2.3, 1.0, 3.0, 10.0, 1.3, 2.0}, + {1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0}, + {3.0, 2.4, -8.1, -2.1, 1.4, -3.1, 2.1, -2.1, -1.0, 1.1, 3.1}, + {10.1, -5.3, 2.1, 4.3, -7.1, 0.4, -0.5, 0.3, 0.3, -5.0, -0.1}}; + + for (unsigned int i = 0; i < sizeof(ranges) / sizeof(ranges[0]); ++i) { + for (unsigned int j = 0; j < sizeof(coeffs) / sizeof(coeffs[0]); ++j) { TDecic f(coeffs[j]); LOG_DEBUG("range = [" << ranges[i][0] << "," << ranges[i][1] << "]" - << ", f(x) = " << f); + << ", f(x) = " << f); double expected = integrate(f, ranges[i][0], ranges[i][1]); double actual; - CPPUNIT_ASSERT(CIntegration::gaussLegendre( - f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); } } } } - - -void CIntegrationTest::testAdaptive() -{ +void CIntegrationTest::testAdaptive() { LOG_DEBUG("+-----------------------------------+"); LOG_DEBUG("| CIntegerToolsTest::testAdaptive |"); LOG_DEBUG("+-----------------------------------+"); @@ -884,30 +672,16 @@ void CIntegrationTest::testAdaptive() CSmoothHeavySide heavySide(10.0, 20.0); - TDoubleDoublePr intervals_[] = - { - TDoubleDoublePr(0.0, 10.0), - TDoubleDoublePr(10.0, 20.0), - TDoubleDoublePr(20.0, 30.0), - TDoubleDoublePr(30.0, 40.0) - }; - TDoubleDoublePrVec intervals(boost::begin(intervals_), - boost::end(intervals_)); + TDoubleDoublePr intervals_[] = { + TDoubleDoublePr(0.0, 10.0), TDoubleDoublePr(10.0, 20.0), TDoubleDoublePr(20.0, 30.0), TDoubleDoublePr(30.0, 40.0)}; + TDoubleDoublePrVec intervals(boost::begin(intervals_), boost::end(intervals_)); TDoubleVec fIntervals(intervals.size()); - for (std::size_t i = 0u; i < intervals.size(); ++i) - { - CIntegration::gaussLegendre(heavySide, - intervals[i].first, - intervals[i].second, - fIntervals[i]); + for (std::size_t i = 0u; i < intervals.size(); ++i) { + CIntegration::gaussLegendre(heavySide, intervals[i].first, intervals[i].second, fIntervals[i]); } double result = 0.0; - CIntegration::adaptiveGaussLegendre(heavySide, - intervals, - fIntervals, - 3, 5, 0.01, - result); + CIntegration::adaptiveGaussLegendre(heavySide, intervals, fIntervals, 3, 5, 0.01, result); LOG_DEBUG("expectedResult = 20.0"); LOG_DEBUG("result = " << result); CPPUNIT_ASSERT_DOUBLES_EQUAL(20.0, result, 0.01 * 20.0); @@ -918,40 +692,23 @@ void CIntegrationTest::testAdaptive() CNormal normal(21.0, 3.0); double expectedResult = 0.0; - for (std::size_t i = 0u; i < 400; ++i) - { + for (std::size_t i = 0u; i < 400; ++i) { double fi; - CIntegration::gaussLegendre(normal, - 0.1 * static_cast(i), - 0.1 * static_cast(i+1), - fi); + CIntegration::gaussLegendre( + normal, 0.1 * static_cast(i), 0.1 * static_cast(i + 1), fi); expectedResult += fi; } - TDoubleDoublePr intervals_[] = - { - TDoubleDoublePr(0.0, 10.0), - TDoubleDoublePr(10.0, 20.0), - TDoubleDoublePr(20.0, 30.0), - TDoubleDoublePr(30.0, 40.0) - }; - TDoubleDoublePrVec intervals(boost::begin(intervals_), - boost::end(intervals_)); + TDoubleDoublePr intervals_[] = { + TDoubleDoublePr(0.0, 10.0), TDoubleDoublePr(10.0, 20.0), TDoubleDoublePr(20.0, 30.0), TDoubleDoublePr(30.0, 40.0)}; + TDoubleDoublePrVec intervals(boost::begin(intervals_), boost::end(intervals_)); TDoubleVec fIntervals(intervals.size()); - for (std::size_t i = 0u; i < intervals.size(); ++i) - { - CIntegration::gaussLegendre(normal, - intervals[i].first, - intervals[i].second, - fIntervals[i]); + for (std::size_t i = 0u; i < intervals.size(); ++i) { + CIntegration::gaussLegendre(normal, intervals[i].first, intervals[i].second, fIntervals[i]); } double result = 0.0; - CIntegration::adaptiveGaussLegendre(normal, - intervals, - fIntervals, - 3, 5, 0.0001, - result); + CIntegration::adaptiveGaussLegendre(normal, intervals, fIntervals, 3, 5, 0.0001, result); LOG_DEBUG("expectedResult = " << expectedResult); LOG_DEBUG("result = " << result); CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedResult, result, 0.0001 * expectedResult); @@ -965,46 +722,28 @@ void CIntegrationTest::testAdaptive() CCompositeFunctions::CProduct f(heavySide, normal); double expectedResult = 0.0; - for (std::size_t i = 0u; i < 400; ++i) - { + for (std::size_t i = 0u; i < 400; ++i) { double fi; - CIntegration::gaussLegendre(f, - 0.1 * static_cast(i), - 0.1 * static_cast(i+1), - fi); + CIntegration::gaussLegendre(f, 0.1 * static_cast(i), 0.1 * static_cast(i + 1), fi); expectedResult += fi; } - TDoubleDoublePr intervals_[] = - { - TDoubleDoublePr(0.0, 20.0), - TDoubleDoublePr(20.0, 40.0) - }; - TDoubleDoublePrVec intervals(boost::begin(intervals_), - boost::end(intervals_)); + TDoubleDoublePr intervals_[] = {TDoubleDoublePr(0.0, 20.0), TDoubleDoublePr(20.0, 40.0)}; + TDoubleDoublePrVec intervals(boost::begin(intervals_), boost::end(intervals_)); TDoubleVec fIntervals(intervals.size()); - for (std::size_t i = 0u; i < intervals.size(); ++i) - { - CIntegration::gaussLegendre(f, - intervals[i].first, - intervals[i].second, - fIntervals[i]); + for (std::size_t i = 0u; i < intervals.size(); ++i) { + CIntegration::gaussLegendre(f, intervals[i].first, intervals[i].second, fIntervals[i]); } double result = 0.0; - CIntegration::adaptiveGaussLegendre(f, - intervals, - fIntervals, - 3, 5, 0.0001, - result); + CIntegration::adaptiveGaussLegendre(f, intervals, fIntervals, 3, 5, 0.0001, result); LOG_DEBUG("expectedResult = " << expectedResult); LOG_DEBUG("result = " << result); CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedResult, result, 0.0001 * expectedResult); } } -void CIntegrationTest::testSparseGrid() -{ +void CIntegrationTest::testSparseGrid() { LOG_DEBUG("+-------------------------------------+"); LOG_DEBUG("| CIntegerToolsTest::testSparseGrid |"); LOG_DEBUG("+-------------------------------------+"); @@ -1017,30 +756,23 @@ void CIntegrationTest::testSparseGrid() TDoubleVec expectedWeights; TDoubleVecVec expectedPoints; - CPPUNIT_ASSERT(readGrid("testfiles/sparse_guass_quadrature_test_d2_l1", - expectedWeights, - expectedPoints)); + CPPUNIT_ASSERT(readGrid("testfiles/sparse_guass_quadrature_test_d2_l1", expectedWeights, expectedPoints)); - using TSparse2do1 = CIntegration::CSparseGaussLegendreQuadrature; + using TSparse2do1 = CIntegration::CSparseGaussLegendreQuadrature; - const TSparse2do1 &sparse = TSparse2do1::instance(); + const TSparse2do1& sparse = TSparse2do1::instance(); LOG_DEBUG("# points = " << sparse.weights().size()); CPPUNIT_ASSERT_EQUAL(expectedWeights.size(), sparse.weights().size()); CPPUNIT_ASSERT_EQUAL(expectedPoints.size(), sparse.points().size()); - for (std::size_t i = 0u; i < expectedWeights.size(); ++i) - { + for (std::size_t i = 0u; i < expectedWeights.size(); ++i) { LOG_DEBUG("weight = " << (sparse.weights())[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedWeights[i], - (sparse.weights())[i] / 4.0, 1e-6); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedWeights[i], (sparse.weights())[i] / 4.0, 1e-6); LOG_DEBUG("point = " << (sparse.points())[i]); - for (std::size_t j = 0u; j < expectedPoints[i].size(); ++j) - { - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedPoints[i][j], - 0.5 + (sparse.points())[i](j) / 2.0, 1e-6); + for (std::size_t j = 0u; j < expectedPoints[i].size(); ++j) { + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedPoints[i][j], 0.5 + (sparse.points())[i](j) / 2.0, 1e-6); } } } @@ -1050,30 +782,23 @@ void CIntegrationTest::testSparseGrid() TDoubleVec expectedWeights; TDoubleVecVec expectedPoints; - CPPUNIT_ASSERT(readGrid("testfiles/sparse_guass_quadrature_test_d2_l2", - expectedWeights, - expectedPoints)); + CPPUNIT_ASSERT(readGrid("testfiles/sparse_guass_quadrature_test_d2_l2", expectedWeights, expectedPoints)); - using TSparse2do2 = CIntegration::CSparseGaussLegendreQuadrature; + using TSparse2do2 = CIntegration::CSparseGaussLegendreQuadrature; - const TSparse2do2 &sparse = TSparse2do2::instance(); + const TSparse2do2& sparse = TSparse2do2::instance(); LOG_DEBUG("# points = " << sparse.weights().size()); CPPUNIT_ASSERT_EQUAL(expectedWeights.size(), sparse.weights().size()); CPPUNIT_ASSERT_EQUAL(expectedPoints.size(), sparse.points().size()); - for (std::size_t i = 0u; i < expectedWeights.size(); ++i) - { + for (std::size_t i = 0u; i < expectedWeights.size(); ++i) { LOG_DEBUG("weight = " << (sparse.weights())[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedWeights[i], - (sparse.weights())[i] / 4.0, 1e-6); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedWeights[i], (sparse.weights())[i] / 4.0, 1e-6); LOG_DEBUG("point = " << (sparse.points())[i]); - for (std::size_t j = 0u; j < expectedPoints[i].size(); ++j) - { - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedPoints[i][j], - 0.5 + (sparse.points())[i](j) / 2.0, 1e-6); + for (std::size_t j = 0u; j < expectedPoints[i].size(); ++j) { + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedPoints[i][j], 0.5 + (sparse.points())[i](j) / 2.0, 1e-6); } } } @@ -1083,30 +808,23 @@ void CIntegrationTest::testSparseGrid() TDoubleVec expectedWeights; TDoubleVecVec expectedPoints; - CPPUNIT_ASSERT(readGrid("testfiles/sparse_guass_quadrature_test_d2_l4", - expectedWeights, - expectedPoints)); + CPPUNIT_ASSERT(readGrid("testfiles/sparse_guass_quadrature_test_d2_l4", expectedWeights, expectedPoints)); - using TSparse2do4 = CIntegration::CSparseGaussLegendreQuadrature; + using TSparse2do4 = CIntegration::CSparseGaussLegendreQuadrature; - const TSparse2do4 &sparse = TSparse2do4::instance(); + const TSparse2do4& sparse = TSparse2do4::instance(); LOG_DEBUG("# points = " << sparse.weights().size()); CPPUNIT_ASSERT_EQUAL(expectedWeights.size(), sparse.weights().size()); CPPUNIT_ASSERT_EQUAL(expectedPoints.size(), sparse.points().size()); - for (std::size_t i = 0u; i < expectedWeights.size(); ++i) - { + for (std::size_t i = 0u; i < expectedWeights.size(); ++i) { LOG_DEBUG("weight = " << (sparse.weights())[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedWeights[i], - (sparse.weights())[i] / 4.0, 1e-6); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedWeights[i], (sparse.weights())[i] / 4.0, 1e-6); LOG_DEBUG("point = " << (sparse.points())[i]); - for (std::size_t j = 0u; j < expectedPoints[i].size(); ++j) - { - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedPoints[i][j], - 0.5 + (sparse.points())[i](j) / 2.0, 1e-6); + for (std::size_t j = 0u; j < expectedPoints[i].size(); ++j) { + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedPoints[i][j], 0.5 + (sparse.points())[i](j) / 2.0, 1e-6); } } } @@ -1116,30 +834,23 @@ void CIntegrationTest::testSparseGrid() TDoubleVec expectedWeights; TDoubleVecVec expectedPoints; - CPPUNIT_ASSERT(readGrid("testfiles/sparse_guass_quadrature_test_d7_l3", - expectedWeights, - expectedPoints)); + CPPUNIT_ASSERT(readGrid("testfiles/sparse_guass_quadrature_test_d7_l3", expectedWeights, expectedPoints)); - using TSparse7do3 = CIntegration::CSparseGaussLegendreQuadrature; + using TSparse7do3 = CIntegration::CSparseGaussLegendreQuadrature; - const TSparse7do3 &sparse = TSparse7do3::instance(); + const TSparse7do3& sparse = TSparse7do3::instance(); LOG_DEBUG("# points = " << sparse.weights().size()); CPPUNIT_ASSERT_EQUAL(expectedWeights.size(), sparse.weights().size()); CPPUNIT_ASSERT_EQUAL(expectedPoints.size(), sparse.points().size()); - for (std::size_t i = 0u; i < expectedWeights.size(); ++i) - { + for (std::size_t i = 0u; i < expectedWeights.size(); ++i) { LOG_DEBUG("weight = " << (sparse.weights())[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedWeights[i], - (sparse.weights())[i] / std::pow(2.0, 7.0), 1e-6); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedWeights[i], (sparse.weights())[i] / std::pow(2.0, 7.0), 1e-6); LOG_DEBUG("point = " << (sparse.points())[i]); - for (std::size_t j = 0u; j < expectedPoints[i].size(); ++j) - { - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedPoints[i][j], - 0.5 + (sparse.points())[i](j) / 2.0, 1e-6); + for (std::size_t j = 0u; j < expectedPoints[i].size(); ++j) { + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedPoints[i][j], 0.5 + (sparse.points())[i](j) / 2.0, 1e-6); } } } @@ -1149,112 +860,117 @@ void CIntegrationTest::testSparseGrid() TDoubleVec expectedWeights; TDoubleVecVec expectedPoints; - CPPUNIT_ASSERT(readGrid("testfiles/sparse_guass_quadrature_test_d7_l5", - expectedWeights, - expectedPoints)); + CPPUNIT_ASSERT(readGrid("testfiles/sparse_guass_quadrature_test_d7_l5", expectedWeights, expectedPoints)); - using TSparse7do5 = CIntegration::CSparseGaussLegendreQuadrature; + using TSparse7do5 = CIntegration::CSparseGaussLegendreQuadrature; - const TSparse7do5 &sparse = TSparse7do5::instance(); + const TSparse7do5& sparse = TSparse7do5::instance(); LOG_DEBUG("# points = " << sparse.weights().size()); CPPUNIT_ASSERT_EQUAL(expectedWeights.size(), sparse.weights().size()); CPPUNIT_ASSERT_EQUAL(expectedPoints.size(), sparse.points().size()); - for (std::size_t i = 0u; i < expectedWeights.size(); ++i) - { - if (i % 10 == 0) - { + for (std::size_t i = 0u; i < expectedWeights.size(); ++i) { + if (i % 10 == 0) { LOG_DEBUG("weight = " << (sparse.weights())[i]); } - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedWeights[i], - (sparse.weights())[i] / std::pow(2.0, 7.0), 1e-6); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedWeights[i], (sparse.weights())[i] / std::pow(2.0, 7.0), 1e-6); - if (i % 10 == 0) - { + if (i % 10 == 0) { LOG_DEBUG("point = " << (sparse.points())[i]); } - for (std::size_t j = 0u; j < expectedPoints[i].size(); ++j) - { - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedPoints[i][j], - 0.5 + (sparse.points())[i](j) / 2.0, 1e-6); + for (std::size_t j = 0u; j < expectedPoints[i].size(); ++j) { + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedPoints[i][j], 0.5 + (sparse.points())[i](j) / 2.0, 1e-6); } } } - unsigned int dimensions[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; - unsigned int order[] = { 1, 2, 3, 4, 5 }; - - std::size_t expectedNumberPoints[][5] = - { - { 1, 2, 3, 4, 5 }, - { 1, 5, 13, 29, 53 }, - { 1, 7, 25, 69, 165 }, - { 1, 9, 41, 137, 385 }, - { 1, 11, 61, 241, 781 }, - { 1, 13, 85, 389, 1433 }, - { 1, 15, 113, 589, 2437 }, - { 1, 17, 145, 849, 3905 }, - { 1, 19, 181, 1177, 5965 }, - { 1, 21, 221, 1581, 8761 } - }; - - for (std::size_t i = 0u; i < boost::size(dimensions); ++i) - { + unsigned int dimensions[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + unsigned int order[] = {1, 2, 3, 4, 5}; + + std::size_t expectedNumberPoints[][5] = {{1, 2, 3, 4, 5}, + {1, 5, 13, 29, 53}, + {1, 7, 25, 69, 165}, + {1, 9, 41, 137, 385}, + {1, 11, 61, 241, 781}, + {1, 13, 85, 389, 1433}, + {1, 15, 113, 589, 2437}, + {1, 17, 145, 849, 3905}, + {1, 19, 181, 1177, 5965}, + {1, 21, 221, 1581, 8761}}; + + for (std::size_t i = 0u; i < boost::size(dimensions); ++i) { LOG_DEBUG("DIMENSION = " << dimensions[i]); -#define NUMBER_POINTS(dimension, n) \ - switch (order[j]) \ - { \ - case 1: n = CIntegration::CSparseGaussLegendreQuadrature::instance().points().size(); \ - break; \ - case 2: n = CIntegration::CSparseGaussLegendreQuadrature::instance().points().size(); \ - break; \ - case 3: n = CIntegration::CSparseGaussLegendreQuadrature::instance().points().size(); \ - break; \ - case 4: n = CIntegration::CSparseGaussLegendreQuadrature::instance().points().size(); \ - break; \ - case 5: n = CIntegration::CSparseGaussLegendreQuadrature::instance().points().size(); \ - break; \ - default: n = 0; \ - break; \ - } - for (std::size_t j = 0u; j < boost::size(order); ++j) - { +#define NUMBER_POINTS(dimension, n) \ + switch (order[j]) { \ + case 1: \ + n = CIntegration::CSparseGaussLegendreQuadrature::instance().points().size(); \ + break; \ + case 2: \ + n = CIntegration::CSparseGaussLegendreQuadrature::instance().points().size(); \ + break; \ + case 3: \ + n = CIntegration::CSparseGaussLegendreQuadrature::instance().points().size(); \ + break; \ + case 4: \ + n = CIntegration::CSparseGaussLegendreQuadrature::instance().points().size(); \ + break; \ + case 5: \ + n = CIntegration::CSparseGaussLegendreQuadrature::instance().points().size(); \ + break; \ + default: \ + n = 0; \ + break; \ + } + for (std::size_t j = 0u; j < boost::size(order); ++j) { LOG_DEBUG("ORDER = " << order[j]); std::size_t numberPoints = 0u; - switch (dimensions[i]) - { - case 1: NUMBER_POINTS(CIntegration::OneDimension, numberPoints); break; - case 2: NUMBER_POINTS(CIntegration::TwoDimensions, numberPoints); break; - case 3: NUMBER_POINTS(CIntegration::ThreeDimensions, numberPoints); break; - case 4: NUMBER_POINTS(CIntegration::FourDimensions, numberPoints); break; - case 5: NUMBER_POINTS(CIntegration::FiveDimensions, numberPoints); break; - case 6: NUMBER_POINTS(CIntegration::SixDimensions, numberPoints); break; - case 7: NUMBER_POINTS(CIntegration::SevenDimensions, numberPoints); break; - case 8: NUMBER_POINTS(CIntegration::EightDimensions, numberPoints); break; - case 9: NUMBER_POINTS(CIntegration::NineDimensions, numberPoints); break; - case 10: NUMBER_POINTS(CIntegration::TenDimensions, numberPoints); break; - default: numberPoints = 0u; break; + switch (dimensions[i]) { + case 1: + NUMBER_POINTS(CIntegration::OneDimension, numberPoints); + break; + case 2: + NUMBER_POINTS(CIntegration::TwoDimensions, numberPoints); + break; + case 3: + NUMBER_POINTS(CIntegration::ThreeDimensions, numberPoints); + break; + case 4: + NUMBER_POINTS(CIntegration::FourDimensions, numberPoints); + break; + case 5: + NUMBER_POINTS(CIntegration::FiveDimensions, numberPoints); + break; + case 6: + NUMBER_POINTS(CIntegration::SixDimensions, numberPoints); + break; + case 7: + NUMBER_POINTS(CIntegration::SevenDimensions, numberPoints); + break; + case 8: + NUMBER_POINTS(CIntegration::EightDimensions, numberPoints); + break; + case 9: + NUMBER_POINTS(CIntegration::NineDimensions, numberPoints); + break; + case 10: + NUMBER_POINTS(CIntegration::TenDimensions, numberPoints); + break; + default: + numberPoints = 0u; + break; } #undef NUMBER_POINTS - LOG_DEBUG("number points: actual = " << numberPoints - << ", expected = " << expectedNumberPoints[i][j]); + LOG_DEBUG("number points: actual = " << numberPoints << ", expected = " << expectedNumberPoints[i][j]); CPPUNIT_ASSERT_EQUAL(expectedNumberPoints[i][j], numberPoints); } } } -void CIntegrationTest::testMultivariateSmooth() -{ +void CIntegrationTest::testMultivariateSmooth() { LOG_DEBUG("+---------------------------------------------+"); LOG_DEBUG("| CIntegerToolsTest::testMultivariateSmooth |"); LOG_DEBUG("+---------------------------------------------+"); @@ -1273,12 +989,10 @@ void CIntegrationTest::testMultivariateSmooth() static const std::size_t DIMENSION = 2u; - for (std::size_t l = 2; l < 5; ++l) - { + for (std::size_t l = 2; l < 5; ++l) { LOG_DEBUG("ORDER = " << 1 + l); - for (std::size_t t = 0u; t < 20; ++t) - { + for (std::size_t t = 0u; t < 20; ++t) { std::size_t n = 3u; TSizeVec coefficients; @@ -1288,16 +1002,10 @@ void CIntegrationTest::testMultivariateSmooth() rng.generateUniformSamples(0, l, DIMENSION * n, powers); CMultivariatePolynomialFunction polynomial; - for (std::size_t i = 0u; i < n; ++i) - { + for (std::size_t i = 0u; i < n; ++i) { double c = static_cast(coefficients[i]); - double p[] = - { - static_cast(powers[DIMENSION*i + 0]), - static_cast(powers[DIMENSION*i + 1]) - }; - if (std::accumulate(p, p + DIMENSION, 0.0) > (2.0 * static_cast(l) - 1.0)) - { + double p[] = {static_cast(powers[DIMENSION * i + 0]), static_cast(powers[DIMENSION * i + 1])}; + if (std::accumulate(p, p + DIMENSION, 0.0) > (2.0 * static_cast(l) - 1.0)) { continue; } polynomial.add(c, p); @@ -1318,23 +1026,22 @@ void CIntegrationTest::testMultivariateSmooth() double actual = 0.0; bool successful = false; - switch (l) - { + switch (l) { case 2: - successful = CIntegration::sparseGaussLegendre(polynomial, a, b, actual); + successful = + CIntegration::sparseGaussLegendre(polynomial, a, b, actual); break; case 3: - successful = CIntegration::sparseGaussLegendre(polynomial, a, b, actual); + successful = + CIntegration::sparseGaussLegendre(polynomial, a, b, actual); break; case 4: - successful = CIntegration::sparseGaussLegendre(polynomial, a, b, actual); + successful = + CIntegration::sparseGaussLegendre(polynomial, a, b, actual); break; case 5: - successful = CIntegration::sparseGaussLegendre(polynomial, a, b, actual); + successful = + CIntegration::sparseGaussLegendre(polynomial, a, b, actual); break; default: break; @@ -1353,12 +1060,10 @@ void CIntegrationTest::testMultivariateSmooth() static const std::size_t DIMENSION = 5u; - for (std::size_t l = 2; l < 5; ++l) - { + for (std::size_t l = 2; l < 5; ++l) { LOG_DEBUG("ORDER = " << l); - for (std::size_t t = 0u; t < 20; ++t) - { + for (std::size_t t = 0u; t < 20; ++t) { std::size_t n = 10u; TSizeVec coefficients; @@ -1368,19 +1073,14 @@ void CIntegrationTest::testMultivariateSmooth() rng.generateUniformSamples(0, l, DIMENSION * n, powers); CMultivariatePolynomialFunction polynomial; - for (std::size_t i = 0u; i < n; ++i) - { + for (std::size_t i = 0u; i < n; ++i) { double c = static_cast(coefficients[i]); - double p[] = - { - static_cast(powers[5*i + 0]), - static_cast(powers[5*i + 1]), - static_cast(powers[5*i + 2]), - static_cast(powers[5*i + 3]), - static_cast(powers[5*i + 4]) - }; - if (std::accumulate(p, p + DIMENSION, 0.0) > (2.0 * static_cast(l) - 1.0)) - { + double p[] = {static_cast(powers[5 * i + 0]), + static_cast(powers[5 * i + 1]), + static_cast(powers[5 * i + 2]), + static_cast(powers[5 * i + 3]), + static_cast(powers[5 * i + 4])}; + if (std::accumulate(p, p + DIMENSION, 0.0) > (2.0 * static_cast(l) - 1.0)) { continue; } polynomial.add(c, p); @@ -1401,23 +1101,22 @@ void CIntegrationTest::testMultivariateSmooth() double actual = 0.0; bool successful = false; - switch (l) - { + switch (l) { case 2: - successful = CIntegration::sparseGaussLegendre(polynomial, a, b, actual); + successful = + CIntegration::sparseGaussLegendre(polynomial, a, b, actual); break; case 3: - successful = CIntegration::sparseGaussLegendre(polynomial, a, b, actual); + successful = + CIntegration::sparseGaussLegendre(polynomial, a, b, actual); break; case 4: - successful = CIntegration::sparseGaussLegendre(polynomial, a, b, actual); + successful = + CIntegration::sparseGaussLegendre(polynomial, a, b, actual); break; case 5: - successful = CIntegration::sparseGaussLegendre(polynomial, a, b, actual); + successful = + CIntegration::sparseGaussLegendre(polynomial, a, b, actual); break; default: break; @@ -1432,22 +1131,15 @@ void CIntegrationTest::testMultivariateSmooth() } } -CppUnit::Test *CIntegrationTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CIntegrationTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CIntegrationTest::testAllSingleVariate", - &CIntegrationTest::testAllSingleVariate) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CIntegrationTest::testAdaptive", - &CIntegrationTest::testAdaptive) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CIntegrationTest::testSparseGrid", - &CIntegrationTest::testSparseGrid) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CIntegrationTest::testMultivariateSmooth", - &CIntegrationTest::testMultivariateSmooth) ); +CppUnit::Test* CIntegrationTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CIntegrationTest"); + + suiteOfTests->addTest( + new CppUnit::TestCaller("CIntegrationTest::testAllSingleVariate", &CIntegrationTest::testAllSingleVariate)); + suiteOfTests->addTest(new CppUnit::TestCaller("CIntegrationTest::testAdaptive", &CIntegrationTest::testAdaptive)); + suiteOfTests->addTest(new CppUnit::TestCaller("CIntegrationTest::testSparseGrid", &CIntegrationTest::testSparseGrid)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CIntegrationTest::testMultivariateSmooth", &CIntegrationTest::testMultivariateSmooth)); return suiteOfTests; } diff --git a/lib/maths/unittest/CIntegrationTest.h b/lib/maths/unittest/CIntegrationTest.h index e7e7bf3168..637f677ddb 100644 --- a/lib/maths/unittest/CIntegrationTest.h +++ b/lib/maths/unittest/CIntegrationTest.h @@ -9,15 +9,14 @@ #include -class CIntegrationTest : public CppUnit::TestFixture -{ - public: - void testAllSingleVariate(); - void testAdaptive(); - void testSparseGrid(); - void testMultivariateSmooth(); +class CIntegrationTest : public CppUnit::TestFixture { +public: + void testAllSingleVariate(); + void testAdaptive(); + void testSparseGrid(); + void testMultivariateSmooth(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CIntegration_h diff --git a/lib/maths/unittest/CKMeansFastTest.cc b/lib/maths/unittest/CKMeansFastTest.cc index d88d7802d9..a9e633cfea 100644 --- a/lib/maths/unittest/CKMeansFastTest.cc +++ b/lib/maths/unittest/CKMeansFastTest.cc @@ -8,32 +8,29 @@ #include +#include +#include #include #include -#include -#include #include #include using namespace ml; -namespace -{ +namespace { //! \brief Expose internals of k-means for testing. template -class CKMeansFastForTest : maths::CKMeansFast -{ - public: - using TBoundingBox = typename maths::CKMeansFast::TBoundingBox; - using TKdTreeNodeData = typename maths::CKMeansFast::CKdTreeNodeData; - using TDataPropagator = typename maths::CKMeansFast::SDataPropagator; - using TCentreFilter = typename maths::CKMeansFast::CCentreFilter; - using TCentroidComputer = typename maths::CKMeansFast::CCentroidComputer; - using TClosestPointsCollector = typename maths::CKMeansFast::CClosestPointsCollector; +class CKMeansFastForTest : maths::CKMeansFast { +public: + using TBoundingBox = typename maths::CKMeansFast::TBoundingBox; + using TKdTreeNodeData = typename maths::CKMeansFast::CKdTreeNodeData; + using TDataPropagator = typename maths::CKMeansFast::SDataPropagator; + using TCentreFilter = typename maths::CKMeansFast::CCentreFilter; + using TCentroidComputer = typename maths::CKMeansFast::CCentroidComputer; + using TClosestPointsCollector = typename maths::CKMeansFast::CClosestPointsCollector; }; - } using TDoubleVec = std::vector; @@ -49,101 +46,80 @@ using TMean2AccumulatorVec = std::vector; using TMean4Accumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; using TMean4AccumulatorVec = std::vector; -namespace -{ +namespace { template -struct SKdTreeDataInvariantsChecker -{ +struct SKdTreeDataInvariantsChecker { using TData = typename CKMeansFastForTest::TKdTreeNodeData; using TMeanAccumulator = typename maths::CBasicStatistics::SSampleMean::TAccumulator; using TBoundingBox = typename CKMeansFastForTest::TBoundingBox; - void operator()(const typename maths::CKdTree::SNode &node) const - { + void operator()(const typename maths::CKdTree::SNode& node) const { TMeanAccumulator centroid; TBoundingBox bb(node.s_Point); centroid.add(node.s_Point); - if (node.s_LeftChild) - { + if (node.s_LeftChild) { bb.add(node.s_LeftChild->boundingBox()); centroid += node.s_LeftChild->centroid(); } - if (node.s_RightChild) - { + if (node.s_RightChild) { bb.add(node.s_RightChild->boundingBox()); centroid += node.s_RightChild->centroid(); } CPPUNIT_ASSERT_EQUAL(bb.print(), node.boundingBox().print()); - CPPUNIT_ASSERT_EQUAL(maths::CBasicStatistics::print(centroid), - maths::CBasicStatistics::print(node.centroid())); + CPPUNIT_ASSERT_EQUAL(maths::CBasicStatistics::print(centroid), maths::CBasicStatistics::print(node.centroid())); } }; template -class CCentreFilterChecker -{ - public: - using TSizeVec = std::vector; - using TPointVec = std::vector; - using TData = typename CKMeansFastForTest::TKdTreeNodeData; - using TCentreFilter = typename CKMeansFastForTest::TCentreFilter; - - public: - CCentreFilterChecker(const TPointVec ¢res, - std::size_t &numberAdmitted) : - m_Centres(centres), - m_CentreFilter(centres), - m_NumberAdmitted(numberAdmitted) - {} - - bool operator()(const typename maths::CKdTree::SNode &node) const - { - using TDoubleSizePr = std::pair; - - m_CentreFilter.prune(node.boundingBox()); - const TSizeVec &filtered = m_CentreFilter.filter(); - maths::CBasicStatistics::COrderStatisticsStack closest; - for (std::size_t i = 0u; i < m_Centres.size(); ++i) - { - closest.add(TDoubleSizePr((m_Centres[i] - node.s_Point).euclidean(), i)); - } - closest.sort(); - if (std::find(filtered.begin(), - filtered.end(), - closest[0].second) == filtered.end()) - { - LOG_DEBUG("filtered = " << core::CContainerPrinter::print(filtered)); - LOG_DEBUG("closest = " << closest.print()); - CPPUNIT_ASSERT(false); - } - if (filtered.size() > 1) - { - m_NumberAdmitted += filtered.size(); - } - return true; +class CCentreFilterChecker { +public: + using TSizeVec = std::vector; + using TPointVec = std::vector; + using TData = typename CKMeansFastForTest::TKdTreeNodeData; + using TCentreFilter = typename CKMeansFastForTest::TCentreFilter; + +public: + CCentreFilterChecker(const TPointVec& centres, std::size_t& numberAdmitted) + : m_Centres(centres), m_CentreFilter(centres), m_NumberAdmitted(numberAdmitted) {} + + bool operator()(const typename maths::CKdTree::SNode& node) const { + using TDoubleSizePr = std::pair; + + m_CentreFilter.prune(node.boundingBox()); + const TSizeVec& filtered = m_CentreFilter.filter(); + maths::CBasicStatistics::COrderStatisticsStack closest; + for (std::size_t i = 0u; i < m_Centres.size(); ++i) { + closest.add(TDoubleSizePr((m_Centres[i] - node.s_Point).euclidean(), i)); + } + closest.sort(); + if (std::find(filtered.begin(), filtered.end(), closest[0].second) == filtered.end()) { + LOG_DEBUG("filtered = " << core::CContainerPrinter::print(filtered)); + LOG_DEBUG("closest = " << closest.print()); + CPPUNIT_ASSERT(false); } + if (filtered.size() > 1) { + m_NumberAdmitted += filtered.size(); + } + return true; + } - private: - TPointVec m_Centres; - mutable TCentreFilter m_CentreFilter; - std::size_t &m_NumberAdmitted; +private: + TPointVec m_Centres; + mutable TCentreFilter m_CentreFilter; + std::size_t& m_NumberAdmitted; }; template -std::pair closest(const std::vector &y, - const POINT &x) -{ +std::pair closest(const std::vector& y, const POINT& x) { std::size_t closest = 0u; double dmin = (x - y[0]).euclidean(); - for (std::size_t i = 1u; i < y.size(); ++i) - { + for (std::size_t i = 1u; i < y.size(); ++i) { double di = (x - y[i]).euclidean(); - if (di < dmin) - { + if (di < dmin) { closest = i; dmin = di; } @@ -152,36 +128,28 @@ std::pair closest(const std::vector &y, } template -bool kmeans(const std::vector &points, - std::size_t iterations, - std::vector ¢res) -{ +bool kmeans(const std::vector& points, std::size_t iterations, std::vector& centres) { using TMeanAccumlator = typename maths::CBasicStatistics::SSampleMean::TAccumulator; std::vector centroids; - for (std::size_t i = 0u; i < iterations; ++i) - { + for (std::size_t i = 0u; i < iterations; ++i) { centroids.clear(); centroids.resize(centres.size()); - for (std::size_t j = 0u; j < points.size(); ++j) - { + for (std::size_t j = 0u; j < points.size(); ++j) { std::size_t centre = closest(centres, points[j]).first; centroids[centre].add(points[j]); } bool converged = true; - for (std::size_t j = 0u; j < centres.size(); ++j) - { - if (maths::CBasicStatistics::mean(centroids[j]) != centres[j]) - { + for (std::size_t j = 0u; j < centres.size(); ++j) { + if (maths::CBasicStatistics::mean(centroids[j]) != centres[j]) { centres[j] = maths::CBasicStatistics::mean(centroids[j]); converged = false; } } - if (converged) - { + if (converged) { return true; } } @@ -189,47 +157,39 @@ bool kmeans(const std::vector &points, return false; } -double square(double x) -{ +double square(double x) { return x * x; } -double sumSquareResiduals(const TVector2VecVec &points) -{ +double sumSquareResiduals(const TVector2VecVec& points) { double result = 0.0; - for (std::size_t i = 0u; i < points.size(); ++i) - { + for (std::size_t i = 0u; i < points.size(); ++i) { TMean2Accumulator m_; m_.add(points[i]); TVector2 m = maths::CBasicStatistics::mean(m_); - for (std::size_t j = 0u; j < points[i].size(); ++j) - { + for (std::size_t j = 0u; j < points[i].size(); ++j) { result += square((points[i][j] - m).euclidean()); } } return result; } - } -void CKMeansFastTest::testDataPropagation() -{ +void CKMeansFastTest::testDataPropagation() { LOG_DEBUG("+----------------------------------------+"); LOG_DEBUG("| CKMeansFastTest::testDataPropagation |"); LOG_DEBUG("+----------------------------------------+"); test::CRandomNumbers rng; - for (std::size_t i = 1u; i <= 100; ++i) - { + for (std::size_t i = 1u; i <= 100; ++i) { LOG_DEBUG("Test " << i); TDoubleVec samples; rng.generateUniformSamples(-400.0, 400.0, 1000, samples); { maths::CKdTree::TKdTreeNodeData> tree; TVector2Vec points; - for (std::size_t j = 0u; j < samples.size(); j += 2) - { + for (std::size_t j = 0u; j < samples.size(); j += 2) { points.push_back(TVector2(&samples[j], &samples[j + 2])); } tree.build(points); @@ -239,8 +199,7 @@ void CKMeansFastTest::testDataPropagation() { maths::CKdTree::TKdTreeNodeData> tree; TVector4Vec points; - for (std::size_t j = 0u; j < samples.size(); j += 4) - { + for (std::size_t j = 0u; j < samples.size(); j += 4) { points.push_back(TVector4(&samples[j], &samples[j + 4])); } tree.build(points); @@ -250,8 +209,7 @@ void CKMeansFastTest::testDataPropagation() } } -void CKMeansFastTest::testFilter() -{ +void CKMeansFastTest::testFilter() { LOG_DEBUG("+-------------------------------+"); LOG_DEBUG("| CKMeansFastTest::testFilter |"); LOG_DEBUG("+-------------------------------+"); @@ -262,8 +220,7 @@ void CKMeansFastTest::testFilter() test::CRandomNumbers rng; - for (std::size_t i = 1u; i <= 100; ++i) - { + for (std::size_t i = 1u; i <= 100; ++i) { LOG_DEBUG("Test " << i); TDoubleVec samples1; rng.generateUniformSamples(-400.0, 400.0, 4000, samples1); @@ -275,14 +232,12 @@ void CKMeansFastTest::testFilter() maths::CKdTree::TKdTreeNodeData> tree; TVector2Vec points; - for (std::size_t j = 0u; j < samples1.size(); j += 2) - { + for (std::size_t j = 0u; j < samples1.size(); j += 2) { points.push_back(TVector2(&samples1[j], &samples1[j + 2])); } tree.build(points); TVector2Vec centres; - for (std::size_t j = 0u; j < samples2.size(); j += 2) - { + for (std::size_t j = 0u; j < samples2.size(); j += 2) { centres.push_back(TVector2(&samples2[j], &samples2[j + 2])); } LOG_DEBUG(" centres = " << core::CContainerPrinter::print(centres)); @@ -291,9 +246,7 @@ void CKMeansFastTest::testFilter() std::size_t numberAdmitted = 0; CCentreFilterChecker checker(centres, numberAdmitted); tree.preorderDepthFirst(checker); - double speedup = static_cast(points.size()) - * static_cast(centres.size()) - / static_cast(numberAdmitted); + double speedup = static_cast(points.size()) * static_cast(centres.size()) / static_cast(numberAdmitted); LOG_DEBUG(" speedup = " << speedup); CPPUNIT_ASSERT(speedup > 30.0); } @@ -303,14 +256,12 @@ void CKMeansFastTest::testFilter() maths::CKdTree::TKdTreeNodeData> tree; TVector4Vec points; - for (std::size_t j = 0u; j < samples1.size(); j += 4) - { + for (std::size_t j = 0u; j < samples1.size(); j += 4) { points.push_back(TVector4(&samples1[j], &samples1[j + 4])); } tree.build(points); TVector4Vec centres; - for (std::size_t j = 0u; j < samples2.size(); j += 4) - { + for (std::size_t j = 0u; j < samples2.size(); j += 4) { centres.push_back(TVector4(&samples2[j], &samples2[j + 4])); } LOG_DEBUG(" centres = " << core::CContainerPrinter::print(centres)); @@ -319,17 +270,14 @@ void CKMeansFastTest::testFilter() std::size_t numberAdmitted = 0; CCentreFilterChecker checker(centres, numberAdmitted); tree.preorderDepthFirst(checker); - double speedup = static_cast(points.size()) - * static_cast(centres.size()) - / static_cast(numberAdmitted); + double speedup = static_cast(points.size()) * static_cast(centres.size()) / static_cast(numberAdmitted); LOG_DEBUG(" speedup = " << speedup); CPPUNIT_ASSERT(speedup > 5.5); } } } -void CKMeansFastTest::testCentroids() -{ +void CKMeansFastTest::testCentroids() { LOG_DEBUG("+----------------------------------+"); LOG_DEBUG("| CKMeansFastTest::testCentroids |"); LOG_DEBUG("+----------------------------------+"); @@ -340,8 +288,7 @@ void CKMeansFastTest::testCentroids() test::CRandomNumbers rng; - for (std::size_t i = 1u; i <= 100; ++i) - { + for (std::size_t i = 1u; i <= 100; ++i) { LOG_DEBUG("Test " << i); TDoubleVec samples1; rng.generateUniformSamples(-400.0, 400.0, 4000, samples1); @@ -353,14 +300,12 @@ void CKMeansFastTest::testCentroids() maths::CKdTree::TKdTreeNodeData> tree; TVector2Vec points; - for (std::size_t j = 0u; j < samples1.size(); j += 2) - { + for (std::size_t j = 0u; j < samples1.size(); j += 2) { points.push_back(TVector2(&samples1[j], &samples1[j + 2])); } tree.build(points); TVector2Vec centres; - for (std::size_t j = 0u; j < samples2.size(); j += 2) - { + for (std::size_t j = 0u; j < samples2.size(); j += 2) { centres.push_back(TVector2(&samples2[j], &samples2[j + 2])); } tree.postorderDepthFirst(CKMeansFastForTest::TDataPropagator()); @@ -370,28 +315,24 @@ void CKMeansFastTest::testCentroids() tree.preorderDepthFirst(computer); TMean2AccumulatorVec expectedCentroids(centres.size()); - for (std::size_t j = 0u; j < points.size(); ++j) - { + for (std::size_t j = 0u; j < points.size(); ++j) { expectedCentroids[closest(centres, points[j]).first].add(points[j]); } LOG_DEBUG(" expected centroids = " << core::CContainerPrinter::print(expectedCentroids)); LOG_DEBUG(" centroids = " << core::CContainerPrinter::print(centroids)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedCentroids), - core::CContainerPrinter::print(centroids)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedCentroids), core::CContainerPrinter::print(centroids)); } { LOG_DEBUG("Vector4"); maths::CKdTree::TKdTreeNodeData> tree; TVector4Vec points; - for (std::size_t j = 0u; j < samples1.size(); j += 4) - { + for (std::size_t j = 0u; j < samples1.size(); j += 4) { points.push_back(TVector4(&samples1[j], &samples1[j + 4])); } tree.build(points); TVector4Vec centres; - for (std::size_t j = 0u; j < samples2.size(); j += 4) - { + for (std::size_t j = 0u; j < samples2.size(); j += 4) { centres.push_back(TVector4(&samples2[j], &samples2[j + 4])); } tree.postorderDepthFirst(CKMeansFastForTest::TDataPropagator()); @@ -401,20 +342,17 @@ void CKMeansFastTest::testCentroids() tree.preorderDepthFirst(computer); TMean4AccumulatorVec expectedCentroids(centres.size()); - for (std::size_t j = 0u; j < points.size(); ++j) - { + for (std::size_t j = 0u; j < points.size(); ++j) { expectedCentroids[closest(centres, points[j]).first].add(points[j]); } LOG_DEBUG(" expected centroids = " << core::CContainerPrinter::print(expectedCentroids)); LOG_DEBUG(" centroids = " << core::CContainerPrinter::print(centroids)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedCentroids), - core::CContainerPrinter::print(centroids)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedCentroids), core::CContainerPrinter::print(centroids)); } } } -void CKMeansFastTest::testClosestPoints() -{ +void CKMeansFastTest::testClosestPoints() { LOG_DEBUG("+--------------------------------------+"); LOG_DEBUG("| CKMeansFastTest::testClosestPoints |"); LOG_DEBUG("+--------------------------------------+"); @@ -427,8 +365,7 @@ void CKMeansFastTest::testClosestPoints() test::CRandomNumbers rng; - for (std::size_t i = 1u; i <= 100; ++i) - { + for (std::size_t i = 1u; i <= 100; ++i) { LOG_DEBUG("Test " << i); TDoubleVec samples1; rng.generateUniformSamples(-400.0, 400.0, 4000, samples1); @@ -439,28 +376,22 @@ void CKMeansFastTest::testClosestPoints() maths::CKdTree::TKdTreeNodeData> tree; TVector2Vec points; - for (std::size_t j = 0u; j < samples1.size(); j += 2) - { + for (std::size_t j = 0u; j < samples1.size(); j += 2) { points.push_back(TVector2(&samples1[j], &samples1[j + 2])); } tree.build(points); TVector2Vec centres; - for (std::size_t j = 0u; j < samples2.size(); j += 2) - { + for (std::size_t j = 0u; j < samples2.size(); j += 2) { centres.push_back(TVector2(&samples2[j], &samples2[j + 2])); } tree.postorderDepthFirst(CKMeansFastForTest::TDataPropagator()); TVector2VecVec closestPoints; - CKMeansFastForTest::TClosestPointsCollector collector(points.size(), - centres, - closestPoints); + CKMeansFastForTest::TClosestPointsCollector collector(points.size(), centres, closestPoints); tree.postorderDepthFirst(collector); - for (std::size_t j = 0u; j < closestPoints.size(); ++j) - { - for (std::size_t k = 0u; k < closestPoints[j].size(); ++k) - { + for (std::size_t j = 0u; j < closestPoints.size(); ++j) { + for (std::size_t k = 0u; k < closestPoints[j].size(); ++k) { CPPUNIT_ASSERT_EQUAL(closest(centres, closestPoints[j][k]).first, j); } } @@ -469,28 +400,22 @@ void CKMeansFastTest::testClosestPoints() maths::CKdTree::TKdTreeNodeData> tree; TVector4Vec points; - for (std::size_t j = 0u; j < samples1.size(); j += 4) - { + for (std::size_t j = 0u; j < samples1.size(); j += 4) { points.push_back(TVector4(&samples1[j], &samples1[j + 4])); } tree.build(points); TVector4Vec centres; - for (std::size_t j = 0u; j < samples2.size(); j += 4) - { + for (std::size_t j = 0u; j < samples2.size(); j += 4) { centres.push_back(TVector4(&samples2[j], &samples2[j + 4])); } tree.postorderDepthFirst(CKMeansFastForTest::TDataPropagator()); TVector4VecVec closestPoints; - CKMeansFastForTest::TClosestPointsCollector collector(points.size(), - centres, - closestPoints); + CKMeansFastForTest::TClosestPointsCollector collector(points.size(), centres, closestPoints); tree.postorderDepthFirst(collector); - for (std::size_t j = 0u; j < closestPoints.size(); ++j) - { - for (std::size_t k = 0u; k < closestPoints[j].size(); ++k) - { + for (std::size_t j = 0u; j < closestPoints.size(); ++j) { + for (std::size_t k = 0u; k < closestPoints[j].size(); ++k) { CPPUNIT_ASSERT_EQUAL(closest(centres, closestPoints[j][k]).first, j); } } @@ -498,8 +423,7 @@ void CKMeansFastTest::testClosestPoints() } } -void CKMeansFastTest::testRun() -{ +void CKMeansFastTest::testRun() { LOG_DEBUG("+----------------------------+"); LOG_DEBUG("| CKMeansFastTest::testRun |"); LOG_DEBUG("+----------------------------+"); @@ -509,8 +433,7 @@ void CKMeansFastTest::testRun() test::CRandomNumbers rng; - for (std::size_t t = 1u; t <= 100; ++t) - { + for (std::size_t t = 1u; t <= 100; ++t) { LOG_DEBUG("Test " << t); TDoubleVec samples1; @@ -520,13 +443,11 @@ void CKMeansFastTest::testRun() { TVector2Vec points; - for (std::size_t i = 0u; i < samples1.size(); i += 2) - { + for (std::size_t i = 0u; i < samples1.size(); i += 2) { points.push_back(TVector2(&samples1[i], &samples1[i + 2])); } TVector2Vec centres; - for (std::size_t i = 0u; i < samples2.size(); i += 2) - { + for (std::size_t i = 0u; i < samples2.size(); i += 2) { centres.push_back(TVector2(&samples2[i], &samples2[i + 2])); } @@ -544,14 +465,12 @@ void CKMeansFastTest::testRun() LOG_DEBUG("centres = " << core::CContainerPrinter::print(centres)); LOG_DEBUG("fast centres = " << core::CContainerPrinter::print(kmeansFast.centres())); CPPUNIT_ASSERT_EQUAL(converged, fastConverged); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(centres), - core::CContainerPrinter::print(kmeansFast.centres())); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(centres), core::CContainerPrinter::print(kmeansFast.centres())); } } } -void CKMeansFastTest::testRunWithSphericalClusters() -{ +void CKMeansFastTest::testRunWithSphericalClusters() { LOG_DEBUG("+-------------------------------------------------+"); LOG_DEBUG("| CKMeansFastTest::testRunWithSphericalClusters |"); LOG_DEBUG("+-------------------------------------------------+"); @@ -564,43 +483,28 @@ void CKMeansFastTest::testRunWithSphericalClusters() using TSphericalCluster2Vec = std::vector; using TMeanVar2Accumulator = maths::CBasicStatistics::SSampleMeanVar::TAccumulator; - double means[][2] = - { - { 1.0, 1.0 }, - { 2.0, 1.5 }, - { 1.5, 1.5 }, - { 1.9, 1.5 }, - { 1.0, 1.5 }, - { 10.0, 15.0 }, - { 12.0, 13.5 }, - { 12.0, 11.5 }, - { 14.0, 10.5 } - }; - std::size_t counts[] = { 10, 15, 5, 8, 17, 10, 11, 8, 12 }; - double lowerTriangle[] = { 1.0, 0.0, 1.0 }; + double means[][2] = { + {1.0, 1.0}, {2.0, 1.5}, {1.5, 1.5}, {1.9, 1.5}, {1.0, 1.5}, {10.0, 15.0}, {12.0, 13.5}, {12.0, 11.5}, {14.0, 10.5}}; + std::size_t counts[] = {10, 15, 5, 8, 17, 10, 11, 8, 12}; + double lowerTriangle[] = {1.0, 0.0, 1.0}; test::CRandomNumbers rng; - for (std::size_t t = 0u; t < 50; ++t) - { - LOG_DEBUG("*** trial = " << t+1 << " ***"); + for (std::size_t t = 0u; t < 50; ++t) { + LOG_DEBUG("*** trial = " << t + 1 << " ***"); TVector2Vec points; TSphericalCluster2Vec clusters; - for (std::size_t i = 0u; i < boost::size(means); ++i) - { + for (std::size_t i = 0u; i < boost::size(means); ++i) { TVector2Vec pointsi; TVector2 mean(&means[i][0], &means[i][2]); TMatrix2 covariances(&lowerTriangle[0], &lowerTriangle[3]); - maths::CSampling::multivariateNormalSample(mean, - covariances, - counts[i], - pointsi); + maths::CSampling::multivariateNormalSample(mean, covariances, counts[i], pointsi); points.insert(points.end(), pointsi.begin(), pointsi.end()); TMeanVar2Accumulator moments; moments.add(pointsi); - double n = maths::CBasicStatistics::count(moments); + double n = maths::CBasicStatistics::count(moments); TVector2 m = maths::CBasicStatistics::mean(moments); TVector2 v = maths::CBasicStatistics::variance(moments); TSphericalCluster2::TAnnotation countAndVariance(n, (v(0) + v(1)) / 2.0); @@ -630,20 +534,17 @@ void CKMeansFastTest::testRunWithSphericalClusters() TVector2Vec kmeansPointsCentres = kmeansPoints.centres(); TSphericalCluster2Vec kmeansClustersCentres_ = kmeansClusters.centres(); - TVector2Vec kmeansClustersCentres(kmeansClustersCentres_.begin(), - kmeansClustersCentres_.end()); + TVector2Vec kmeansClustersCentres(kmeansClustersCentres_.begin(), kmeansClustersCentres_.end()); std::sort(kmeansPointsCentres.begin(), kmeansPointsCentres.end()); std::sort(kmeansClustersCentres.begin(), kmeansClustersCentres.end()); LOG_DEBUG("k-means points = " << core::CContainerPrinter::print(kmeansPointsCentres)); LOG_DEBUG("k-means clusters = " << core::CContainerPrinter::print(kmeansClustersCentres)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(kmeansPointsCentres), - core::CContainerPrinter::print(kmeansClustersCentres)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(kmeansPointsCentres), core::CContainerPrinter::print(kmeansClustersCentres)); } } -void CKMeansFastTest::testPlusPlus() -{ +void CKMeansFastTest::testPlusPlus() { LOG_DEBUG("+---------------------------------+"); LOG_DEBUG("| CKMeansFastTest::testPlusPlus |"); LOG_DEBUG("+---------------------------------+"); @@ -667,8 +568,7 @@ void CKMeansFastTest::testPlusPlus() TMeanAccumulator meanSSRRatio; double maxSSRRatio = 0.0; - for (std::size_t t = 0u; t < 100; ++t) - { + for (std::size_t t = 0u; t < 100; ++t) { TSizeVec sizes; sizes.push_back(400); sizes.push_back(300); @@ -681,8 +581,7 @@ void CKMeansFastTest::testPlusPlus() rng.generateRandomMultivariateNormals(sizes, means, covariances, points); TVector2Vec flatPoints; - for (std::size_t i = 0u; i < points.size(); ++i) - { + for (std::size_t i = 0u; i < points.size(); ++i) { flatPoints.insert(flatPoints.end(), points[i].begin(), points[i].end()); std::sort(points[i].begin(), points[i].end()); } @@ -692,8 +591,7 @@ void CKMeansFastTest::testPlusPlus() TSizeVec random; rng.generateUniformSamples(0, flatPoints.size(), k, random); LOG_DEBUG("random = " << core::CContainerPrinter::print(random)); - for (std::size_t i = 0u; i < k; ++i) - { + for (std::size_t i = 0u; i < k; ++i) { randomCentres.push_back(flatPoints[random[i]]); } @@ -703,25 +601,18 @@ void CKMeansFastTest::testPlusPlus() kmeansPlusPlus.run(flatPoints, k, plusPlusCentres); TSizeVec sampledClusters; - for (std::size_t i = 0u; i < plusPlusCentres.size(); ++i) - { + for (std::size_t i = 0u; i < plusPlusCentres.size(); ++i) { std::size_t j = 0u; - for (/**/; j < points.size(); ++j) - { - TVector2VecCItr next = std::lower_bound(points[j].begin(), - points[j].end(), - plusPlusCentres[i]); - if (next != points[j].end() && *next == plusPlusCentres[i]) - { + for (/**/; j < points.size(); ++j) { + TVector2VecCItr next = std::lower_bound(points[j].begin(), points[j].end(), plusPlusCentres[i]); + if (next != points[j].end() && *next == plusPlusCentres[i]) { break; } } sampledClusters.push_back(j); } std::sort(sampledClusters.begin(), sampledClusters.end()); - sampledClusters.erase(std::unique(sampledClusters.begin(), - sampledClusters.end()), - sampledClusters.end()); + sampledClusters.erase(std::unique(sampledClusters.begin(), sampledClusters.end()), sampledClusters.end()); CPPUNIT_ASSERT(sampledClusters.size() >= 2); numberClustersSampled.add(static_cast(sampledClusters.size())); @@ -765,31 +656,19 @@ void CKMeansFastTest::testPlusPlus() CPPUNIT_ASSERT_DOUBLES_EQUAL(4.0, maths::CBasicStatistics::mean(numberClustersSampled), 0.3); } -CppUnit::Test *CKMeansFastTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CKMeansFastTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CKMeansFastTest::testDataPropagation", - &CKMeansFastTest::testDataPropagation) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CKMeansFastTest::testFilter", - &CKMeansFastTest::testFilter) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CKMeansFastTest::testCentroids", - &CKMeansFastTest::testCentroids) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CKMeansFastTest::testClosestPoints", - &CKMeansFastTest::testClosestPoints) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CKMeansFastTest::testRun", - &CKMeansFastTest::testRun) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CKMeansFastTest::testRunWithSphericalClusters", - &CKMeansFastTest::testRunWithSphericalClusters) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CKMeansFastTest::testPlusPlus", - &CKMeansFastTest::testPlusPlus) ); +CppUnit::Test* CKMeansFastTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CKMeansFastTest"); + + suiteOfTests->addTest( + new CppUnit::TestCaller("CKMeansFastTest::testDataPropagation", &CKMeansFastTest::testDataPropagation)); + suiteOfTests->addTest(new CppUnit::TestCaller("CKMeansFastTest::testFilter", &CKMeansFastTest::testFilter)); + suiteOfTests->addTest(new CppUnit::TestCaller("CKMeansFastTest::testCentroids", &CKMeansFastTest::testCentroids)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CKMeansFastTest::testClosestPoints", &CKMeansFastTest::testClosestPoints)); + suiteOfTests->addTest(new CppUnit::TestCaller("CKMeansFastTest::testRun", &CKMeansFastTest::testRun)); + suiteOfTests->addTest(new CppUnit::TestCaller("CKMeansFastTest::testRunWithSphericalClusters", + &CKMeansFastTest::testRunWithSphericalClusters)); + suiteOfTests->addTest(new CppUnit::TestCaller("CKMeansFastTest::testPlusPlus", &CKMeansFastTest::testPlusPlus)); return suiteOfTests; } diff --git a/lib/maths/unittest/CKMeansFastTest.h b/lib/maths/unittest/CKMeansFastTest.h index b5dfd0a63e..2f2a153e66 100644 --- a/lib/maths/unittest/CKMeansFastTest.h +++ b/lib/maths/unittest/CKMeansFastTest.h @@ -9,18 +9,17 @@ #include -class CKMeansFastTest : public CppUnit::TestFixture -{ - public: - void testDataPropagation(); - void testFilter(); - void testCentroids(); - void testClosestPoints(); - void testRun(); - void testRunWithSphericalClusters(); - void testPlusPlus(); +class CKMeansFastTest : public CppUnit::TestFixture { +public: + void testDataPropagation(); + void testFilter(); + void testCentroids(); + void testClosestPoints(); + void testRun(); + void testRunWithSphericalClusters(); + void testPlusPlus(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CKMeansFastTest_h diff --git a/lib/maths/unittest/CKMeansOnlineTest.cc b/lib/maths/unittest/CKMeansOnlineTest.cc index b1ff1f5b8c..a7ecfedbdc 100644 --- a/lib/maths/unittest/CKMeansOnlineTest.cc +++ b/lib/maths/unittest/CKMeansOnlineTest.cc @@ -21,8 +21,7 @@ using namespace ml; -namespace -{ +namespace { using TDoubleVec = std::vector; using TSizeVec = std::vector; using TSizeVecVec = std::vector; @@ -37,43 +36,31 @@ using TVector5Vec = std::vector; using TMeanVar5Accumulator = maths::CBasicStatistics::SSampleMeanVar::TAccumulator; template -class CKMeansOnlineTestForTest : public maths::CKMeansOnline -{ - public: - using TSphericalClusterVec = typename maths::CKMeansOnline::TSphericalClusterVec; - using TDoubleMeanVarAccumulator = typename maths::CKMeansOnline::TDoubleMeanVarAccumulator; - using TFloatMeanAccumulatorDoublePr = typename maths::CKMeansOnline::TFloatMeanAccumulatorDoublePr; - - public: - CKMeansOnlineTestForTest(std::size_t k, double decayRate = 0.0) : - maths::CKMeansOnline(k, decayRate) - {} - - static void add(const POINT &x, - double count, - TFloatMeanAccumulatorDoublePr &cluster) - { - maths::CKMeansOnline::add(x, count, cluster); - } +class CKMeansOnlineTestForTest : public maths::CKMeansOnline { +public: + using TSphericalClusterVec = typename maths::CKMeansOnline::TSphericalClusterVec; + using TDoubleMeanVarAccumulator = typename maths::CKMeansOnline::TDoubleMeanVarAccumulator; + using TFloatMeanAccumulatorDoublePr = typename maths::CKMeansOnline::TFloatMeanAccumulatorDoublePr; - static double variance(const TDoubleMeanVarAccumulator &moments) - { - return maths::CKMeansOnline::variance(moments); - } +public: + CKMeansOnlineTestForTest(std::size_t k, double decayRate = 0.0) : maths::CKMeansOnline(k, decayRate) {} + + static void add(const POINT& x, double count, TFloatMeanAccumulatorDoublePr& cluster) { + maths::CKMeansOnline::add(x, count, cluster); + } + + static double variance(const TDoubleMeanVarAccumulator& moments) { return maths::CKMeansOnline::variance(moments); } }; template -std::string print(const POINT &point) -{ +std::string print(const POINT& point) { std::ostringstream result; result << point; return result.str(); } - } -void CKMeansOnlineTest::testVariance() -{ +void CKMeansOnlineTest::testVariance() { LOG_DEBUG("+-----------------------------------+"); LOG_DEBUG("| CKMeansOnlineTest::testVariance |"); LOG_DEBUG("+-----------------------------------+"); @@ -83,23 +70,14 @@ void CKMeansOnlineTest::testVariance() test::CRandomNumbers rng; - for (std::size_t t = 1u; t <= 50; ++t) - { + for (std::size_t t = 1u; t <= 50; ++t) { LOG_DEBUG("*** test = " << t << " ***"); TDoubleVec coordinates; rng.generateUniformSamples(0.0, 10.0, 50, coordinates); TVector5Vec points; - for (std::size_t i = 0u; i < coordinates.size(); i += 5) - { - double c[] = - { - coordinates[i+0], - coordinates[i+1], - coordinates[i+2], - coordinates[i+3], - coordinates[i+4] - }; + for (std::size_t i = 0u; i < coordinates.size(); i += 5) { + double c[] = {coordinates[i + 0], coordinates[i + 1], coordinates[i + 2], coordinates[i + 3], coordinates[i + 4]}; points.push_back(TVector5(c)); } @@ -107,8 +85,7 @@ void CKMeansOnlineTest::testVariance() actual.add(points); TMeanVarAccumulator expected; - for (std::size_t i = 0u; i < coordinates.size(); ++i) - { + for (std::size_t i = 0u; i < coordinates.size(); ++i) { expected.add(coordinates[i] - maths::CBasicStatistics::mean(actual)(i % 5)); } @@ -121,8 +98,7 @@ void CKMeansOnlineTest::testVariance() } } -void CKMeansOnlineTest::testAdd() -{ +void CKMeansOnlineTest::testAdd() { LOG_DEBUG("+------------------------------+"); LOG_DEBUG("| CKMeansOnlineTest::testAdd |"); LOG_DEBUG("+------------------------------+"); @@ -134,8 +110,7 @@ void CKMeansOnlineTest::testAdd() test::CRandomNumbers rng; - for (std::size_t t = 1u; t <= 50; ++t) - { + for (std::size_t t = 1u; t <= 50; ++t) { LOG_DEBUG("*** test = " << t << " ***"); TDoubleVec coordinates; @@ -143,45 +118,34 @@ void CKMeansOnlineTest::testAdd() TDoubleVec counts; rng.generateUniformSamples(1.0, 2.0, 20, counts); TVector2Vec points; - for (std::size_t i = 0u; i < coordinates.size(); i += 2) - { - double c[] = - { - coordinates[i+0], - coordinates[i+1] - }; + for (std::size_t i = 0u; i < coordinates.size(); i += 2) { + double c[] = {coordinates[i + 0], coordinates[i + 1]}; points.push_back(TVector2(c)); } TMean2AccumulatorDoublePr actual; TMeanVar2Accumulator expected; - for (std::size_t i = 0u; i < points.size(); ++i) - { + for (std::size_t i = 0u; i < points.size(); ++i) { CKMeansOnlineTestForTest::add(points[i], counts[i], actual); expected.add(points[i], counts[i]); } TVector2 ones(1.0); - LOG_DEBUG("actual = " << maths::CBasicStatistics::mean(actual.first) - << "," << actual.second); - LOG_DEBUG("expected = " << maths::CBasicStatistics::mean(expected) - << "," << maths::CBasicStatistics::maximumLikelihoodVariance(expected).inner(ones) - / static_cast(ones.dimension())); + LOG_DEBUG("actual = " << maths::CBasicStatistics::mean(actual.first) << "," << actual.second); + LOG_DEBUG("expected = " + << maths::CBasicStatistics::mean(expected) << "," + << maths::CBasicStatistics::maximumLikelihoodVariance(expected).inner(ones) / static_cast(ones.dimension())); - CPPUNIT_ASSERT_EQUAL(print(maths::CBasicStatistics::mean(expected)), - print(maths::CBasicStatistics::mean(actual.first))); + CPPUNIT_ASSERT_EQUAL(print(maths::CBasicStatistics::mean(expected)), print(maths::CBasicStatistics::mean(actual.first))); CPPUNIT_ASSERT_DOUBLES_EQUAL( - maths::CBasicStatistics::maximumLikelihoodVariance(expected).inner(ones) - / static_cast(ones.dimension()), - actual.second, - 1e-10 * maths::CBasicStatistics::maximumLikelihoodVariance(expected).inner(ones) - / static_cast(ones.dimension())); + maths::CBasicStatistics::maximumLikelihoodVariance(expected).inner(ones) / static_cast(ones.dimension()), + actual.second, + 1e-10 * maths::CBasicStatistics::maximumLikelihoodVariance(expected).inner(ones) / static_cast(ones.dimension())); } } -void CKMeansOnlineTest::testReduce() -{ +void CKMeansOnlineTest::testReduce() { LOG_DEBUG("+---------------------------------+"); LOG_DEBUG("| CKMeansOnlineTest::testReduce |"); LOG_DEBUG("+---------------------------------+"); @@ -196,8 +160,7 @@ void CKMeansOnlineTest::testReduce() test::CRandomNumbers rng; - for (std::size_t t = 1u; t <= 10; ++t) - { + for (std::size_t t = 1u; t <= 10; ++t) { LOG_DEBUG("*** test = " << t << " ***"); TDoubleVec coordinates; @@ -205,13 +168,8 @@ void CKMeansOnlineTest::testReduce() TDoubleVec counts; rng.generateUniformSamples(1.0, 2.0, 21, counts); TVector2Vec points; - for (std::size_t i = 0u; i < coordinates.size(); i += 2) - { - double c[] = - { - coordinates[i+0], - coordinates[i+1] - }; + for (std::size_t i = 0u; i < coordinates.size(); i += 2) { + double c[] = {coordinates[i + 0], coordinates[i + 1]}; points.push_back(TVector2(c)); } @@ -221,40 +179,33 @@ void CKMeansOnlineTest::testReduce() TVector2 ones(1.0); - for (std::size_t i = 0u; i < points.size(); ++i) - { + for (std::size_t i = 0u; i < points.size(); ++i) { kmeans.add(points[i], counts[i]); expected.add(points[i], counts[i]); - if ((i+1) % 7 == 0) - { + if ((i + 1) % 7 == 0) { CKMeansOnlineTestForTest::TSphericalClusterVec clusters; kmeans.clusters(clusters); CPPUNIT_ASSERT(clusters.size() <= 10); TMeanVar2Accumulator actual; - for (std::size_t j = 0u; j < clusters.size(); ++j) - { + for (std::size_t j = 0u; j < clusters.size(); ++j) { actual.add(clusters[j]); } LOG_DEBUG("expected = " << expected); LOG_DEBUG("actual = " << actual); - CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::count(expected), - maths::CBasicStatistics::count(actual), 1e-10); - CPPUNIT_ASSERT_EQUAL(print(maths::CBasicStatistics::mean(expected)), - print(maths::CBasicStatistics::mean(actual))); - CPPUNIT_ASSERT_DOUBLES_EQUAL( - maths::CBasicStatistics::maximumLikelihoodVariance(expected).inner(ones), - maths::CBasicStatistics::maximumLikelihoodVariance(actual).inner(ones), - 1e-10 * maths::CBasicStatistics::maximumLikelihoodVariance(expected).inner(ones)); + CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::count(expected), maths::CBasicStatistics::count(actual), 1e-10); + CPPUNIT_ASSERT_EQUAL(print(maths::CBasicStatistics::mean(expected)), print(maths::CBasicStatistics::mean(actual))); + CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::maximumLikelihoodVariance(expected).inner(ones), + maths::CBasicStatistics::maximumLikelihoodVariance(actual).inner(ones), + 1e-10 * maths::CBasicStatistics::maximumLikelihoodVariance(expected).inner(ones)); } } } } -void CKMeansOnlineTest::testClustering() -{ +void CKMeansOnlineTest::testClustering() { LOG_DEBUG("+-------------------------------------+"); LOG_DEBUG("| CKMeansOnlineTest::testClustering |"); LOG_DEBUG("+-------------------------------------+"); @@ -268,26 +219,19 @@ void CKMeansOnlineTest::testClustering() TMeanVarAccumulator cost; TMeanVarAccumulator costOnline; - double a[] = { 0.0, 20.0 }; - double b[] = { 5.0, 30.0 }; + double a[] = {0.0, 20.0}; + double b[] = {5.0, 30.0}; TVector2Vec points; - for (std::size_t i = 0u; i < 2; ++i) - { + for (std::size_t i = 0u; i < 2; ++i) { TDoubleVec coordinates; rng.generateUniformSamples(a[i], b[i], 200, coordinates); - for (std::size_t j = 0u; j < coordinates.size(); j += 2) - { - double c[] = - { - coordinates[j+0], - coordinates[j+1] - }; + for (std::size_t j = 0u; j < coordinates.size(); j += 2) { + double c[] = {coordinates[j + 0], coordinates[j + 1]}; points.push_back(TVector2(c)); } } - for (std::size_t t = 1u; t <= 10; ++t) - { + for (std::size_t t = 1u; t <= 10; ++t) { LOG_DEBUG("*** test = " << t << " ***"); maths::CKMeansFast kmeans; @@ -296,8 +240,7 @@ void CKMeansOnlineTest::testClustering() TVector2Vec centres; TVector2VecVec clusters; maths::CPRNG::CXorOShiro128Plus rng_; - for (std::size_t i = 0u; i < 10; ++i) - { + for (std::size_t i = 0u; i < 10; ++i) { maths::CKMeansPlusPlusInitialization seedCentres(rng_); seedCentres.run(points, 2, centres); kmeans.setCentres(centres); @@ -311,8 +254,7 @@ void CKMeansOnlineTest::testClustering() maths::CKMeansOnline kmeansOnline(24); double costOnline_ = std::numeric_limits::max(); { - for (std::size_t i = 0u; i < points.size(); ++i) - { + for (std::size_t i = 0u; i < points.size(); ++i) { kmeansOnline.add(points[i]); } maths::CKMeansOnline::TSphericalClusterVecVec clustersOnline; @@ -321,8 +263,7 @@ void CKMeansOnlineTest::testClustering() criterion.add(clustersOnline); costOnline_ = criterion.calculate(); } - LOG_DEBUG("cost = " << cost_ - << ", cost online = " << costOnline_); + LOG_DEBUG("cost = " << cost_ << ", cost online = " << costOnline_); cost.add(cost_); costOnline.add(costOnline_); @@ -333,12 +274,9 @@ void CKMeansOnlineTest::testClustering() LOG_DEBUG("cost = " << cost); LOG_DEBUG("cost online = " << costOnline); - CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(costOnline), - maths::CBasicStatistics::mean(cost), - 1e-10); - CPPUNIT_ASSERT_DOUBLES_EQUAL(std::sqrt(maths::CBasicStatistics::variance(costOnline)), - std::sqrt(maths::CBasicStatistics::variance(cost)), - 1e-10); + CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(costOnline), maths::CBasicStatistics::mean(cost), 1e-10); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + std::sqrt(maths::CBasicStatistics::variance(costOnline)), std::sqrt(maths::CBasicStatistics::variance(cost)), 1e-10); } { @@ -348,18 +286,12 @@ void CKMeansOnlineTest::testClustering() TDoubleVec coordinates; rng.generateUniformSamples(0.0, 10.0, 1000, coordinates); TVector2Vec points; - for (std::size_t i = 0u; i < coordinates.size(); i += 2) - { - double v[] = - { - coordinates[i+0], - coordinates[i+1] - }; + for (std::size_t i = 0u; i < coordinates.size(); i += 2) { + double v[] = {coordinates[i + 0], coordinates[i + 1]}; points.push_back(TVector2(v)); } - for (std::size_t t = 1u; t <= 20; ++t) - { + for (std::size_t t = 1u; t <= 20; ++t) { LOG_DEBUG("*** test = " << t << " ***"); maths::CKMeansFast kmeans; @@ -370,8 +302,7 @@ void CKMeansOnlineTest::testClustering() TVector2Vec centres; TVector2VecVec clusters; maths::CPRNG::CXorOShiro128Plus rng_; - for (std::size_t i = 0u; i < 10; ++i) - { + for (std::size_t i = 0u; i < 10; ++i) { maths::CKMeansPlusPlusInitialization seedCentres(rng_); seedCentres.run(points, 3, centres); kmeans.setCentres(centres); @@ -384,8 +315,7 @@ void CKMeansOnlineTest::testClustering() double costOnline_ = std::numeric_limits::max(); { - for (std::size_t i = 0u; i < points.size(); ++i) - { + for (std::size_t i = 0u; i < points.size(); ++i) { kmeansOnline.add(points[i]); } maths::CKMeansOnline::TSphericalClusterVecVec clustersOnline; @@ -394,8 +324,7 @@ void CKMeansOnlineTest::testClustering() criterion.add(clustersOnline); costOnline_ = criterion.calculate(); } - LOG_DEBUG("cost = " << cost_ - << ", cost online = " << costOnline_); + LOG_DEBUG("cost = " << cost_ << ", cost online = " << costOnline_); cost.add(cost_); costOnline.add(costOnline_); @@ -406,15 +335,13 @@ void CKMeansOnlineTest::testClustering() LOG_DEBUG("cost = " << cost); LOG_DEBUG("cost online = " << costOnline); - CPPUNIT_ASSERT(maths::CBasicStatistics::mean(costOnline) - <= 1.01 * maths::CBasicStatistics::mean(cost)); - CPPUNIT_ASSERT(std::sqrt(maths::CBasicStatistics::variance(costOnline)) - <= 26.0 * std::sqrt(maths::CBasicStatistics::variance(cost))); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(costOnline) <= 1.01 * maths::CBasicStatistics::mean(cost)); + CPPUNIT_ASSERT(std::sqrt(maths::CBasicStatistics::variance(costOnline)) <= + 26.0 * std::sqrt(maths::CBasicStatistics::variance(cost))); } } -void CKMeansOnlineTest::testSplit() -{ +void CKMeansOnlineTest::testSplit() { LOG_DEBUG("+--------------------------------+"); LOG_DEBUG("| CKMeansOnlineTest::testSplit |"); LOG_DEBUG("+--------------------------------+"); @@ -426,36 +353,29 @@ void CKMeansOnlineTest::testSplit() test::CRandomNumbers rng; - double m[] = { 5.0, 15.0 }; - double v[] = { 5.0, 10.0 }; + double m[] = {5.0, 15.0}; + double v[] = {5.0, 10.0}; TVector2Vec points; - for (std::size_t i = 0u; i < 2; ++i) - { + for (std::size_t i = 0u; i < 2; ++i) { TDoubleVec coordinates; rng.generateNormalSamples(m[i], v[i], 350, coordinates); - for (std::size_t j = 0u; j < coordinates.size(); j += 2) - { - double c[] = - { - coordinates[j+0], - coordinates[j+1] - }; + for (std::size_t j = 0u; j < coordinates.size(); j += 2) { + double c[] = {coordinates[j + 0], coordinates[j + 1]}; points.push_back(TVector2(c)); } } maths::CKMeansOnline kmeansOnline(30); - for (std::size_t i = 0u; i < points.size(); ++i) - { + for (std::size_t i = 0u; i < points.size(); ++i) { kmeansOnline.add(points[i]); } CPPUNIT_ASSERT(!kmeansOnline.buffering()); - std::size_t one[] = { 0, 2, 7, 18, 19, 22 }; - std::size_t two[] = { 3, 4, 5, 6, 10, 11, 23, 24 }; - std::size_t three[] = { 1, 8, 9, 12, 13, 14, 15, 16, 17 }; - std::size_t four[] = { 20, 21, 25, 26, 27, 28 }; - std::size_t five[] = { 29 }; + std::size_t one[] = {0, 2, 7, 18, 19, 22}; + std::size_t two[] = {3, 4, 5, 6, 10, 11, 23, 24}; + std::size_t three[] = {1, 8, 9, 12, 13, 14, 15, 16, 17}; + std::size_t four[] = {20, 21, 25, 26, 27, 28}; + std::size_t five[] = {29}; TSizeVecVec split; split.push_back(TSizeVec(boost::begin(one), boost::end(one))); split.push_back(TSizeVec(boost::begin(two), boost::end(two))); @@ -469,30 +389,24 @@ void CKMeansOnlineTest::testSplit() kmeansOnline.split(split, clusterers); CPPUNIT_ASSERT_EQUAL(split.size(), clusterers.size()); - for (std::size_t i = 0u; i < split.size(); ++i) - { + for (std::size_t i = 0u; i < split.size(); ++i) { maths::CKMeansOnline::TSphericalClusterVec actual; clusterers[i].clusters(actual); CPPUNIT_ASSERT(!clusterers[i].buffering()); CPPUNIT_ASSERT_EQUAL(split[i].size(), actual.size()); maths::CKMeansOnline::TSphericalClusterVec expected; - for (std::size_t j = 0u; j < split[i].size(); ++j) - { + for (std::size_t j = 0u; j < split[i].size(); ++j) { expected.push_back(clusters[split[i][j]]); } - LOG_DEBUG("expected clusters = " - << core::CContainerPrinter::print(expected)); - LOG_DEBUG("actual clusters = " - << core::CContainerPrinter::print(actual)); + LOG_DEBUG("expected clusters = " << core::CContainerPrinter::print(expected)); + LOG_DEBUG("actual clusters = " << core::CContainerPrinter::print(actual)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expected), - core::CContainerPrinter::print(actual)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expected), core::CContainerPrinter::print(actual)); } } -void CKMeansOnlineTest::testMerge() -{ +void CKMeansOnlineTest::testMerge() { LOG_DEBUG("+--------------------------------+"); LOG_DEBUG("| CKMeansOnlineTest::testMerge |"); LOG_DEBUG("+--------------------------------+"); @@ -506,44 +420,30 @@ void CKMeansOnlineTest::testMerge() test::CRandomNumbers rng; - double m[] = { 5.0, 15.0 }; - double v[] = { 5.0, 10.0 }; + double m[] = {5.0, 15.0}; + double v[] = {5.0, 10.0}; TVector2Vec points[2]; - for (std::size_t i = 0u; i < 2; ++i) - { + for (std::size_t i = 0u; i < 2; ++i) { TDoubleVec coordinates; rng.generateNormalSamples(m[i], v[i], 350, coordinates); - for (std::size_t j = 0u; j < coordinates.size(); j += 2) - { - double c[] = - { - coordinates[j+0], - coordinates[j+1] - }; + for (std::size_t j = 0u; j < coordinates.size(); j += 2) { + double c[] = {coordinates[j + 0], coordinates[j + 1]}; points[i].push_back(TVector2(c)); } } - maths::CKMeansOnline kmeans[] = - { - maths::CKMeansOnline(20), - maths::CKMeansOnline(25) - }; - for (std::size_t i = 0u; i < 2; ++i) - { - for (std::size_t j = 0u; j < points[i].size(); ++j) - { + maths::CKMeansOnline kmeans[] = {maths::CKMeansOnline(20), maths::CKMeansOnline(25)}; + for (std::size_t i = 0u; i < 2; ++i) { + for (std::size_t j = 0u; j < points[i].size(); ++j) { kmeans[i].add(points[i][j]); } } TMeanVar2Accumulator expected; - for (std::size_t i = 0u; i < 2; ++i) - { + for (std::size_t i = 0u; i < 2; ++i) { CKMeansOnlineTestForTest::TSphericalClusterVec clusters; kmeans[i].clusters(clusters); - for (std::size_t j = 0u; j < clusters.size(); ++j) - { + for (std::size_t j = 0u; j < clusters.size(); ++j) { expected.add(clusters[j]); } } @@ -553,8 +453,7 @@ void CKMeansOnlineTest::testMerge() TMeanVar2Accumulator actual; CKMeansOnlineTestForTest::TSphericalClusterVec clusters; kmeans[0].clusters(clusters); - for (std::size_t j = 0u; j < clusters.size(); ++j) - { + for (std::size_t j = 0u; j < clusters.size(); ++j) { actual.add(clusters[j]); } @@ -562,18 +461,14 @@ void CKMeansOnlineTest::testMerge() LOG_DEBUG("expected = " << expected); LOG_DEBUG("actual = " << actual); - CPPUNIT_ASSERT_EQUAL(maths::CBasicStatistics::count(expected), - maths::CBasicStatistics::count(actual)); - CPPUNIT_ASSERT_EQUAL(print(maths::CBasicStatistics::mean(expected)), - print(maths::CBasicStatistics::mean(actual))); - CPPUNIT_ASSERT_DOUBLES_EQUAL( - maths::CBasicStatistics::maximumLikelihoodVariance(expected).inner(ones), - maths::CBasicStatistics::maximumLikelihoodVariance(actual).inner(ones), - 1e-10 * maths::CBasicStatistics::maximumLikelihoodVariance(expected).inner(ones)); + CPPUNIT_ASSERT_EQUAL(maths::CBasicStatistics::count(expected), maths::CBasicStatistics::count(actual)); + CPPUNIT_ASSERT_EQUAL(print(maths::CBasicStatistics::mean(expected)), print(maths::CBasicStatistics::mean(actual))); + CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::maximumLikelihoodVariance(expected).inner(ones), + maths::CBasicStatistics::maximumLikelihoodVariance(actual).inner(ones), + 1e-10 * maths::CBasicStatistics::maximumLikelihoodVariance(expected).inner(ones)); } -void CKMeansOnlineTest::testPropagateForwardsByTime() -{ +void CKMeansOnlineTest::testPropagateForwardsByTime() { LOG_DEBUG("+--------------------------------------------------+"); LOG_DEBUG("| CKMeansOnlineTest::testPropagateForwardsByTime |"); LOG_DEBUG("+--------------------------------------------------+"); @@ -587,25 +482,18 @@ void CKMeansOnlineTest::testPropagateForwardsByTime() TVector2Vec points; TDoubleVec coordinates; rng.generateNormalSamples(m, v, 700, coordinates); - double outlier_[] = { 50.0, 20.0 }; + double outlier_[] = {50.0, 20.0}; TVector2 outlier(outlier_); - for (std::size_t i = 0u; i < coordinates.size(); i += 2) - { - double c[] = - { - coordinates[i+0], - coordinates[i+1] - }; + for (std::size_t i = 0u; i < coordinates.size(); i += 2) { + double c[] = {coordinates[i + 0], coordinates[i + 1]}; points.push_back(TVector2(c)); - if (i == 200) - { + if (i == 200) { points.push_back(outlier); } } maths::CKMeansOnline kmeans(5, 0.1); - for (std::size_t i = 0u; i < points.size(); ++i) - { + for (std::size_t i = 0u; i < points.size(); ++i) { kmeans.add(points[i]); } @@ -619,14 +507,12 @@ void CKMeansOnlineTest::testPropagateForwardsByTime() LOG_DEBUG("clusters after = " << core::CContainerPrinter::print(clusters)); CPPUNIT_ASSERT_EQUAL(std::size_t(4), clusters.size()); - for (std::size_t i = 0u; i < clusters.size(); ++i) - { + for (std::size_t i = 0u; i < clusters.size(); ++i) { CPPUNIT_ASSERT(clusters[i] != outlier); } } -void CKMeansOnlineTest::testSample() -{ +void CKMeansOnlineTest::testSample() { LOG_DEBUG("+---------------------------------+"); LOG_DEBUG("| CKMeansOnlineTest::testSample |"); LOG_DEBUG("+---------------------------------+"); @@ -639,30 +525,18 @@ void CKMeansOnlineTest::testSample() maths::CSampling::seed(); - std::size_t n[] = { 500, 500 }; - double means[][2] = - { - { 0.0, 10.0 }, - { 20.0, 30.0 } - }; - double covariances[][3] = - { - { 10.0, 2.0, 8.0 }, - { 15.0, 5.0, 12.0 } - }; + std::size_t n[] = {500, 500}; + double means[][2] = {{0.0, 10.0}, {20.0, 30.0}}; + double covariances[][3] = {{10.0, 2.0, 8.0}, {15.0, 5.0, 12.0}}; maths::CBasicStatistics::SSampleCovariances expectedSampleCovariances[2]; TVector2Vec samples; - for (std::size_t i = 0u; i < 2; ++i) - { + for (std::size_t i = 0u; i < 2; ++i) { TVector2 mean(means[i]); TMatrix2 covariance(covariances[i], covariances[i] + 3); TVector2Vec modeSamples; - maths::CSampling::multivariateNormalSample(mean, - covariance, - n[i], - modeSamples); + maths::CSampling::multivariateNormalSample(mean, covariance, n[i], modeSamples); expectedSampleCovariances[i].add(modeSamples); samples.insert(samples.end(), modeSamples.begin(), modeSamples.end()); } @@ -674,22 +548,19 @@ void CKMeansOnlineTest::testSample() maths::CKMeansOnline kmeans(10, 0.1); TVector2Vec expectedSampled; - for (std::size_t i = 0u; i < 10; ++i) - { + for (std::size_t i = 0u; i < 10; ++i) { expectedSampled.push_back(samples[i]); std::sort(expectedSampled.begin(), expectedSampled.end()); kmeans.add(samples[i]); TVector2Vec sampled; - kmeans.sample(i+1, sampled); + kmeans.sample(i + 1, sampled); std::sort(sampled.begin(), sampled.end()); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedSampled), - core::CContainerPrinter::print(sampled)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedSampled), core::CContainerPrinter::print(sampled)); } - for (std::size_t i = 10u; i < samples.size(); ++i) - { + for (std::size_t i = 10u; i < samples.size(); ++i) { kmeans.add(samples[i]); } @@ -699,26 +570,21 @@ void CKMeansOnlineTest::testSample() LOG_DEBUG("sampled = " << core::CContainerPrinter::print(sampled)); maths::CBasicStatistics::SSampleCovariances sampleCovariances[2]; - for (std::size_t i = 0u; i < sampled.size(); ++i) - { - if ((sampled[i] - TVector2(means[0])).euclidean() - < (sampled[i] - TVector2(means[1])).euclidean()) - { + for (std::size_t i = 0u; i < sampled.size(); ++i) { + if ((sampled[i] - TVector2(means[0])).euclidean() < (sampled[i] - TVector2(means[1])).euclidean()) { sampleCovariances[0].add(sampled[i]); - } - else - { + } else { sampleCovariances[1].add(sampled[i]); } } - TVector2 expectedMean0 = maths::CBasicStatistics::mean(expectedSampleCovariances[0]); + TVector2 expectedMean0 = maths::CBasicStatistics::mean(expectedSampleCovariances[0]); TMatrix2 expectedCovariance0 = maths::CBasicStatistics::covariances(expectedSampleCovariances[0]); - TVector2 expectedMean1 = maths::CBasicStatistics::mean(expectedSampleCovariances[1]); + TVector2 expectedMean1 = maths::CBasicStatistics::mean(expectedSampleCovariances[1]); TMatrix2 expectedCovariance1 = maths::CBasicStatistics::covariances(expectedSampleCovariances[1]); - TVector2 mean0 = maths::CBasicStatistics::mean(sampleCovariances[0]); + TVector2 mean0 = maths::CBasicStatistics::mean(sampleCovariances[0]); TMatrix2 covariance0 = maths::CBasicStatistics::covariances(sampleCovariances[0]); - TVector2 mean1 = maths::CBasicStatistics::mean(sampleCovariances[1]); + TVector2 mean1 = maths::CBasicStatistics::mean(sampleCovariances[1]); TMatrix2 covariance1 = maths::CBasicStatistics::covariances(sampleCovariances[1]); LOG_DEBUG("expected mean, variance 0 = " << expectedMean0 << ", " << expectedCovariance0); @@ -726,27 +592,20 @@ void CKMeansOnlineTest::testSample() LOG_DEBUG("expected mean, variance 1 = " << expectedMean1 << ", " << expectedCovariance1); LOG_DEBUG("mean, variance 1 = " << mean1 << ", " << covariance1); - double meanError0 = (mean0 - expectedMean0).euclidean() - / expectedMean0.euclidean(); - double covarianceError0 = (covariance0 - expectedCovariance0).frobenius() - / expectedCovariance0.frobenius(); - LOG_DEBUG("mean error 0 = " << meanError0 - << ", covariance error 0 = " << covarianceError0); + double meanError0 = (mean0 - expectedMean0).euclidean() / expectedMean0.euclidean(); + double covarianceError0 = (covariance0 - expectedCovariance0).frobenius() / expectedCovariance0.frobenius(); + LOG_DEBUG("mean error 0 = " << meanError0 << ", covariance error 0 = " << covarianceError0); CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, meanError0, 0.01); CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, covarianceError0, 0.27); - double meanError1 = (mean1 - expectedMean1).euclidean() - / expectedMean0.euclidean(); - double covarianceError1 = (covariance1 - expectedCovariance1).frobenius() - / expectedCovariance1.frobenius(); - LOG_DEBUG("mean error 1 = " << meanError1 - << ", covariance error 1 = " << covarianceError1); + double meanError1 = (mean1 - expectedMean1).euclidean() / expectedMean0.euclidean(); + double covarianceError1 = (covariance1 - expectedCovariance1).frobenius() / expectedCovariance1.frobenius(); + LOG_DEBUG("mean error 1 = " << meanError1 << ", covariance error 1 = " << covarianceError1); CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, meanError1, 0.01); CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, covarianceError1, 0.24); } -void CKMeansOnlineTest::testPersist() -{ +void CKMeansOnlineTest::testPersist() { LOG_DEBUG("+----------------------------------+"); LOG_DEBUG("| CKMeansOnlineTest::testPersist |"); LOG_DEBUG("+----------------------------------+"); @@ -756,14 +615,12 @@ void CKMeansOnlineTest::testPersist() TDoubleVec coordinates; rng.generateUniformSamples(0.0, 400.0, 998, coordinates); TVector2Vec points; - for (std::size_t i = 0u; i < coordinates.size(); i += 2) - { - points.push_back(TVector2(&coordinates[i], &coordinates[i+2])); + for (std::size_t i = 0u; i < coordinates.size(); i += 2) { + points.push_back(TVector2(&coordinates[i], &coordinates[i + 2])); } maths::CKMeansOnline origKmeans(25, 0.1); - for (std::size_t i = 0u; i < points.size(); ++i) - { + for (std::size_t i = 0u; i < points.size(); ++i) { origKmeans.add(points[i]); } @@ -788,16 +645,11 @@ void CKMeansOnlineTest::testPersist() maths::MINIMUM_CLUSTER_SPLIT_FRACTION, maths::MINIMUM_CLUSTER_SPLIT_COUNT, maths::MINIMUM_CATEGORY_COUNT); - CPPUNIT_ASSERT(traverser.traverseSubLevel( - boost::bind(&maths::CKMeansOnline::acceptRestoreTraverser, - &restoredKmeans, - boost::cref(params), - _1))); + CPPUNIT_ASSERT(traverser.traverseSubLevel( + boost::bind(&maths::CKMeansOnline::acceptRestoreTraverser, &restoredKmeans, boost::cref(params), _1))); - LOG_DEBUG("orig checksum = " << origKmeans.checksum() - << ", new checksum = " << restoredKmeans.checksum()); - CPPUNIT_ASSERT_EQUAL(origKmeans.checksum(), - restoredKmeans.checksum()); + LOG_DEBUG("orig checksum = " << origKmeans.checksum() << ", new checksum = " << restoredKmeans.checksum()); + CPPUNIT_ASSERT_EQUAL(origKmeans.checksum(), restoredKmeans.checksum()); std::string newXml; core::CRapidXmlStatePersistInserter inserter("root"); @@ -808,37 +660,20 @@ void CKMeansOnlineTest::testPersist() } } -CppUnit::Test *CKMeansOnlineTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CKMeansOnlineTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CKMeansOnlineTest::testVariance", - &CKMeansOnlineTest::testVariance) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CKMeansOnlineTest::testAdd", - &CKMeansOnlineTest::testAdd) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CKMeansOnlineTest::testReduce", - &CKMeansOnlineTest::testReduce) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CKMeansOnlineTest::testClustering", - &CKMeansOnlineTest::testClustering) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CKMeansOnlineTest::testSplit", - &CKMeansOnlineTest::testSplit) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CKMeansOnlineTest::testMerge", - &CKMeansOnlineTest::testMerge) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CKMeansOnlineTest::testPropagateForwardsByTime", - &CKMeansOnlineTest::testPropagateForwardsByTime) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CKMeansOnlineTest::testSample", - &CKMeansOnlineTest::testSample) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CKMeansOnlineTest::testPersist", - &CKMeansOnlineTest::testPersist) ); +CppUnit::Test* CKMeansOnlineTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CKMeansOnlineTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CKMeansOnlineTest::testVariance", &CKMeansOnlineTest::testVariance)); + suiteOfTests->addTest(new CppUnit::TestCaller("CKMeansOnlineTest::testAdd", &CKMeansOnlineTest::testAdd)); + suiteOfTests->addTest(new CppUnit::TestCaller("CKMeansOnlineTest::testReduce", &CKMeansOnlineTest::testReduce)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CKMeansOnlineTest::testClustering", &CKMeansOnlineTest::testClustering)); + suiteOfTests->addTest(new CppUnit::TestCaller("CKMeansOnlineTest::testSplit", &CKMeansOnlineTest::testSplit)); + suiteOfTests->addTest(new CppUnit::TestCaller("CKMeansOnlineTest::testMerge", &CKMeansOnlineTest::testMerge)); + suiteOfTests->addTest(new CppUnit::TestCaller("CKMeansOnlineTest::testPropagateForwardsByTime", + &CKMeansOnlineTest::testPropagateForwardsByTime)); + suiteOfTests->addTest(new CppUnit::TestCaller("CKMeansOnlineTest::testSample", &CKMeansOnlineTest::testSample)); + suiteOfTests->addTest(new CppUnit::TestCaller("CKMeansOnlineTest::testPersist", &CKMeansOnlineTest::testPersist)); return suiteOfTests; } diff --git a/lib/maths/unittest/CKMeansOnlineTest.h b/lib/maths/unittest/CKMeansOnlineTest.h index d1a68e53b2..620ffaaed6 100644 --- a/lib/maths/unittest/CKMeansOnlineTest.h +++ b/lib/maths/unittest/CKMeansOnlineTest.h @@ -9,20 +9,19 @@ #include -class CKMeansOnlineTest : public CppUnit::TestFixture -{ - public: - void testVariance(); - void testAdd(); - void testReduce(); - void testClustering(); - void testSplit(); - void testMerge(); - void testPropagateForwardsByTime(); - void testSample(); - void testPersist(); +class CKMeansOnlineTest : public CppUnit::TestFixture { +public: + void testVariance(); + void testAdd(); + void testReduce(); + void testClustering(); + void testSplit(); + void testMerge(); + void testPropagateForwardsByTime(); + void testSample(); + void testPersist(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CKMeansOnlineTest_h diff --git a/lib/maths/unittest/CKMostCorrelatedTest.cc b/lib/maths/unittest/CKMostCorrelatedTest.cc index 5a68fb70ae..8b8cc094d4 100644 --- a/lib/maths/unittest/CKMostCorrelatedTest.cc +++ b/lib/maths/unittest/CKMostCorrelatedTest.cc @@ -26,8 +26,7 @@ using namespace ml; -namespace -{ +namespace { using TDoubleVec = std::vector; using TVector2 = maths::CVectorNx1; @@ -36,83 +35,56 @@ using TMatrix2 = maths::CSymmetricMatrixNxN; using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; using TMeanVarAccumulator = maths::CBasicStatistics::SSampleMeanVar::TAccumulator; -class CKMostCorrelatedForTest : public maths::CKMostCorrelated -{ - public: - using TCorrelation = maths::CKMostCorrelated::SCorrelation; - using TCorrelationVec = maths::CKMostCorrelated::TCorrelationVec; - using TSizeVectorPackedBitVectorPrUMap = maths::CKMostCorrelated::TSizeVectorPackedBitVectorPrUMap; - using TSizeVectorPackedBitVectorPrUMapCItr = maths::CKMostCorrelated::TSizeVectorPackedBitVectorPrUMapCItr; - using TMeanVarAccumulatorVec = maths::CKMostCorrelated::TMeanVarAccumulatorVec; - using maths::CKMostCorrelated::mostCorrelated; - using maths::CKMostCorrelated::correlations; - - public: - CKMostCorrelatedForTest(std::size_t size, double decayRate) : - maths::CKMostCorrelated(size, decayRate) - {} - - void mostCorrelated(TCorrelationVec &result) const - { - this->maths::CKMostCorrelated::mostCorrelated(result); - } +class CKMostCorrelatedForTest : public maths::CKMostCorrelated { +public: + using TCorrelation = maths::CKMostCorrelated::SCorrelation; + using TCorrelationVec = maths::CKMostCorrelated::TCorrelationVec; + using TSizeVectorPackedBitVectorPrUMap = maths::CKMostCorrelated::TSizeVectorPackedBitVectorPrUMap; + using TSizeVectorPackedBitVectorPrUMapCItr = maths::CKMostCorrelated::TSizeVectorPackedBitVectorPrUMapCItr; + using TMeanVarAccumulatorVec = maths::CKMostCorrelated::TMeanVarAccumulatorVec; + using maths::CKMostCorrelated::correlations; + using maths::CKMostCorrelated::mostCorrelated; - const TVectorVec &projections() const - { - return this->maths::CKMostCorrelated::projections(); - } +public: + CKMostCorrelatedForTest(std::size_t size, double decayRate) : maths::CKMostCorrelated(size, decayRate) {} - const TSizeVectorPackedBitVectorPrUMap &projected() const - { - return this->maths::CKMostCorrelated::projected(); - } + void mostCorrelated(TCorrelationVec& result) const { this->maths::CKMostCorrelated::mostCorrelated(result); } - const TCorrelationVec &correlations() const - { - return this->maths::CKMostCorrelated::correlations(); - } + const TVectorVec& projections() const { return this->maths::CKMostCorrelated::projections(); } - const TMeanVarAccumulatorVec &moments() const - { - return this->maths::CKMostCorrelated::moments(); - } + const TSizeVectorPackedBitVectorPrUMap& projected() const { return this->maths::CKMostCorrelated::projected(); } + + const TCorrelationVec& correlations() const { return this->maths::CKMostCorrelated::correlations(); } + + const TMeanVarAccumulatorVec& moments() const { return this->maths::CKMostCorrelated::moments(); } }; -double mutualInformation(const TDoubleVec &p1, - const TDoubleVec &p2) -{ +double mutualInformation(const TDoubleVec& p1, const TDoubleVec& p2) { std::size_t n = p1.size(); - double f1[] = { 0.0, 0.0 }; - double f2[] = { 0.0, 0.0 }; - double f12[][2] = { { 0.0, 0.0 }, { 0.0, 0.0 } }; + double f1[] = {0.0, 0.0}; + double f2[] = {0.0, 0.0}; + double f12[][2] = {{0.0, 0.0}, {0.0, 0.0}}; - for (std::size_t i = 0u; i < n; ++i) - { + for (std::size_t i = 0u; i < n; ++i) { f1[p1[i] < 0 ? 0 : 1] += 1.0; f2[p2[i] < 0 ? 0 : 1] += 1.0; f12[p1[i] < 0 ? 0 : 1][p2[i] < 0 ? 0 : 1] += 1.0; } - double I = 0.0; + double I = 0.0; double H1 = 0.0; double H2 = 0.0; - for (std::size_t i = 0u; i < 2; ++i) - { - for (std::size_t j = 0u; j < 2; ++j) - { - if (f12[i][j] > 0.0) - { - I += f12[i][j] / static_cast(n) - * std::log(f12[i][j] * static_cast(n) / f1[i] / f2[j]); + for (std::size_t i = 0u; i < 2; ++i) { + for (std::size_t j = 0u; j < 2; ++j) { + if (f12[i][j] > 0.0) { + I += f12[i][j] / static_cast(n) * std::log(f12[i][j] * static_cast(n) / f1[i] / f2[j]); } } - if (f1[i] > 0.0) - { + if (f1[i] > 0.0) { H1 -= f1[i] / static_cast(n) * std::log(f1[i] / static_cast(n)); } - if (f2[i] > 0.0) - { + if (f2[i] > 0.0) { H2 -= f2[i] / static_cast(n) * std::log(f2[i] / static_cast(n)); } } @@ -121,10 +93,9 @@ double mutualInformation(const TDoubleVec &p1, } void estimateCorrelation(const std::size_t trials, - const TVector2 &mean, - const TMatrix2 &covariance, - TMeanVarAccumulator &correlationEstimate) -{ + const TVector2& mean, + const TMatrix2& covariance, + TMeanVarAccumulator& correlationEstimate) { using TVector10 = maths::CVectorNx1; using TVector10Vec = std::vector; using TMeanVar2Accumulator = maths::CBasicStatistics::SSampleMeanVar::TAccumulator; @@ -133,62 +104,53 @@ void estimateCorrelation(const std::size_t trials, TMeanVar2Accumulator sampleMoments; - for (std::size_t t = 0u; t < trials; ++t) - { + for (std::size_t t = 0u; t < trials; ++t) { TVector2Vec samples; maths::CSampling::multivariateNormalSample(mean, covariance, 50, samples); TVector10Vec projections; TDoubleVec uniform01; rng.generateUniformSamples(0.0, 1.0, 500, uniform01); - for (std::size_t i = 0u; i < uniform01.size(); i += 10) - { - double v[] = - { - uniform01[i+0] < 0.5 ? -1.0 : 1.0, - uniform01[i+1] < 0.5 ? -1.0 : 1.0, - uniform01[i+2] < 0.5 ? -1.0 : 1.0, - uniform01[i+3] < 0.5 ? -1.0 : 1.0, - uniform01[i+4] < 0.5 ? -1.0 : 1.0, - uniform01[i+5] < 0.5 ? -1.0 : 1.0, - uniform01[i+6] < 0.5 ? -1.0 : 1.0, - uniform01[i+7] < 0.5 ? -1.0 : 1.0, - uniform01[i+8] < 0.5 ? -1.0 : 1.0, - uniform01[i+9] < 0.5 ? -1.0 : 1.0 - - }; + for (std::size_t i = 0u; i < uniform01.size(); i += 10) { + double v[] = {uniform01[i + 0] < 0.5 ? -1.0 : 1.0, + uniform01[i + 1] < 0.5 ? -1.0 : 1.0, + uniform01[i + 2] < 0.5 ? -1.0 : 1.0, + uniform01[i + 3] < 0.5 ? -1.0 : 1.0, + uniform01[i + 4] < 0.5 ? -1.0 : 1.0, + uniform01[i + 5] < 0.5 ? -1.0 : 1.0, + uniform01[i + 6] < 0.5 ? -1.0 : 1.0, + uniform01[i + 7] < 0.5 ? -1.0 : 1.0, + uniform01[i + 8] < 0.5 ? -1.0 : 1.0, + uniform01[i + 9] < 0.5 ? -1.0 : 1.0 + + }; projections.push_back(TVector10(boost::begin(v), boost::end(v))); } TVector10 px(0.0); TVector10 py(0.0); - for (std::size_t i = 0u; i < projections.size(); ++i) - { + for (std::size_t i = 0u; i < projections.size(); ++i) { sampleMoments.add(samples[i]); - if (maths::CBasicStatistics::count(sampleMoments) > 1.0) - { - px += projections[i] * (samples[i](0) - maths::CBasicStatistics::mean(sampleMoments)(0)) - / std::sqrt(maths::CBasicStatistics::variance(sampleMoments)(0)); - py += projections[i] * (samples[i](1) - maths::CBasicStatistics::mean(sampleMoments)(1)) - / std::sqrt(maths::CBasicStatistics::variance(sampleMoments)(1)); + if (maths::CBasicStatistics::count(sampleMoments) > 1.0) { + px += projections[i] * (samples[i](0) - maths::CBasicStatistics::mean(sampleMoments)(0)) / + std::sqrt(maths::CBasicStatistics::variance(sampleMoments)(0)); + py += projections[i] * (samples[i](1) - maths::CBasicStatistics::mean(sampleMoments)(1)) / + std::sqrt(maths::CBasicStatistics::variance(sampleMoments)(1)); } } maths::CPackedBitVector ix(50, true); maths::CPackedBitVector iy(50, true); double correlation = CKMostCorrelatedForTest::TCorrelation::correlation(px, ix, py, iy); - if (t % 10 == 0) - { + if (t % 10 == 0) { LOG_DEBUG("correlation = " << correlation); } correlationEstimate.add(correlation); } } - } -void CKMostCorrelatedTest::testCorrelation() -{ +void CKMostCorrelatedTest::testCorrelation() { LOG_DEBUG("+-----------------------------------------+") LOG_DEBUG("| CKMostCorrelatedTest::testCorrelation |") LOG_DEBUG("+-----------------------------------------+") @@ -200,8 +162,8 @@ void CKMostCorrelatedTest::testCorrelation() { LOG_DEBUG("*** Weak Correlation ***"); - double m[] = { 10.0, 15.0 }; - double c[] = { 10.0, 2.0, 10.0 }; + double m[] = {10.0, 15.0}; + double c[] = {10.0, 2.0, 10.0}; TVector2 mean(boost::begin(m), boost::end(m)); TMatrix2 covariance(boost::begin(c), boost::end(c)); @@ -216,8 +178,8 @@ void CKMostCorrelatedTest::testCorrelation() { LOG_DEBUG("*** Medium Correlation ***"); - double m[] = { 10.0, 15.0 }; - double c[] = { 10.0, 5.0, 10.0 }; + double m[] = {10.0, 15.0}; + double c[] = {10.0, 5.0, 10.0}; TVector2 mean(boost::begin(m), boost::end(m)); TMatrix2 covariance(boost::begin(c), boost::end(c)); @@ -232,8 +194,8 @@ void CKMostCorrelatedTest::testCorrelation() { LOG_DEBUG("*** Strong Correlation ***"); - double m[] = { 10.0, 15.0 }; - double c[] = { 10.0, 9.0, 10.0 }; + double m[] = {10.0, 15.0}; + double c[] = {10.0, 9.0, 10.0}; TVector2 mean(boost::begin(m), boost::end(m)); TMatrix2 covariance(boost::begin(c), boost::end(c)); @@ -247,8 +209,7 @@ void CKMostCorrelatedTest::testCorrelation() } } -void CKMostCorrelatedTest::testNextProjection() -{ +void CKMostCorrelatedTest::testNextProjection() { LOG_DEBUG("+--------------------------------------------+") LOG_DEBUG("| CKMostCorrelatedTest::testNextProjection |") LOG_DEBUG("+--------------------------------------------+") @@ -260,14 +221,7 @@ void CKMostCorrelatedTest::testNextProjection() maths::CSampling::seed(); - double combinations[][2] = - { - { 1.0, 0.0 }, - { 0.9, 0.1 }, - { 0.5, 0.5 }, - { 0.1, 0.9 }, - { 0.0, 1.0 } - }; + double combinations[][2] = {{1.0, 0.0}, {0.9, 0.1}, {0.5, 0.5}, {0.1, 0.9}, {0.0, 1.0}}; test::CRandomNumbers rng; @@ -281,40 +235,31 @@ void CKMostCorrelatedTest::testNextProjection() CKMostCorrelatedForTest::TVectorVec p1 = mostCorrelated.projections(); LOG_DEBUG("projections 1 = "); - for (std::size_t i = 0u; i < p1.size(); ++i) - { + for (std::size_t i = 0u; i < p1.size(); ++i) { LOG_DEBUG(" " << core::CContainerPrinter::print(p1[i])); } CPPUNIT_ASSERT(!p1.empty()); CPPUNIT_ASSERT_EQUAL(std::size_t(10), p1[0].dimension()); TDoubleVecVec projections1(10, TDoubleVec(p1.size())); - for (std::size_t i = 0u; i < p1.size(); ++i) - { - for (std::size_t j = 0u; j < p1[i].dimension(); ++j) - { + for (std::size_t i = 0u; i < p1.size(); ++i) { + for (std::size_t j = 0u; j < p1[i].dimension(); ++j) { projections1[j][i] = p1[i](j); } } TMeanAccumulator I1; - for (std::size_t i = 0u; i < projections1.size(); ++i) - { - for (std::size_t j = 0u; j < i; ++j) - { + for (std::size_t i = 0u; i < projections1.size(); ++i) { + for (std::size_t j = 0u; j < i; ++j) { I1.add(mutualInformation(projections1[i], projections1[j])); } } LOG_DEBUG("I1 = " << maths::CBasicStatistics::mean(I1)); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(I1) < 0.1); - for (std::size_t i = 0u; i < 19; ++i) - { - for (std::size_t j = 0u, X = 0u; j < variables; j += 2) - { - for (std::size_t k = 0u; k < boost::size(combinations); ++k, ++X) - { - double x = combinations[k][0] * samples[i * variables + j] - + combinations[k][1] * samples[i * variables + j + 1]; + for (std::size_t i = 0u; i < 19; ++i) { + for (std::size_t j = 0u, X = 0u; j < variables; j += 2) { + for (std::size_t k = 0u; k < boost::size(combinations); ++k, ++X) { + double x = combinations[k][0] * samples[i * variables + j] + combinations[k][1] * samples[i * variables + j + 1]; mostCorrelated.add(X, x); } } @@ -322,12 +267,9 @@ void CKMostCorrelatedTest::testNextProjection() } // This should trigger the next projection to be generated. - for (std::size_t i = 0u, X = 0u; i < variables; i += 2) - { - for (std::size_t j = 0u; j < boost::size(combinations); ++j, ++X) - { - double x = combinations[j][0] * samples[19 * variables + i] - + combinations[j][1] * samples[19 * variables + i + 1]; + for (std::size_t i = 0u, X = 0u; i < variables; i += 2) { + for (std::size_t j = 0u; j < boost::size(combinations); ++j, ++X) { + double x = combinations[j][0] * samples[19 * variables + i] + combinations[j][1] * samples[19 * variables + i + 1]; mostCorrelated.add(X, x); } } @@ -342,56 +284,45 @@ void CKMostCorrelatedTest::testNextProjection() CKMostCorrelatedForTest::TVectorVec p2 = mostCorrelated.projections(); LOG_DEBUG("projections 2 = "); - for (std::size_t i = 0u; i < p2.size(); ++i) - { + for (std::size_t i = 0u; i < p2.size(); ++i) { LOG_DEBUG(" " << core::CContainerPrinter::print(p2[i])); } CPPUNIT_ASSERT(!p2.empty()); CPPUNIT_ASSERT_EQUAL(std::size_t(10), p2[0].dimension()); TDoubleVecVec projections2(10, TDoubleVec(p2.size())); - for (std::size_t i = 0u; i < p2.size(); ++i) - { - for (std::size_t j = 0u; j < p2[i].dimension(); ++j) - { + for (std::size_t i = 0u; i < p2.size(); ++i) { + for (std::size_t j = 0u; j < p2[i].dimension(); ++j) { projections2[j][i] = p2[i](j); } } TMeanAccumulator I2; - for (std::size_t i = 0u; i < projections2.size(); ++i) - { - for (std::size_t j = 0u; j < i; ++j) - { + for (std::size_t i = 0u; i < projections2.size(); ++i) { + for (std::size_t j = 0u; j < i; ++j) { I2.add(mutualInformation(projections2[i], projections2[j])); } } LOG_DEBUG("I2 = " << maths::CBasicStatistics::mean(I2)); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(I2) < 0.1); TMeanAccumulator I12; - for (std::size_t i = 0u; i < projections1.size(); ++i) - { - for (std::size_t j = 0u; j < projections2.size(); ++j) - { + for (std::size_t i = 0u; i < projections1.size(); ++i) { + for (std::size_t j = 0u; j < projections2.size(); ++j) { I12.add(mutualInformation(projections1[i], projections2[j])); } } LOG_DEBUG("I12 = " << maths::CBasicStatistics::mean(I12)); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(I12) < 0.1); - for (std::size_t i = 0u; i < moments1.size(); ++i) - { - CPPUNIT_ASSERT( maths::CBasicStatistics::count(moments1[i]) - > maths::CBasicStatistics::count(moments2[i])); + for (std::size_t i = 0u; i < moments1.size(); ++i) { + CPPUNIT_ASSERT(maths::CBasicStatistics::count(moments1[i]) > maths::CBasicStatistics::count(moments2[i])); } - for (std::size_t i = 0u; i < correlations2.size(); ++i) - { + for (std::size_t i = 0u; i < correlations2.size(); ++i) { CPPUNIT_ASSERT(maths::CBasicStatistics::count(correlations2[i].s_Correlation) > 0.0); CPPUNIT_ASSERT(maths::CBasicStatistics::count(correlations2[i].s_Correlation) < 1.0); } } -void CKMostCorrelatedTest::testMostCorrelated() -{ +void CKMostCorrelatedTest::testMostCorrelated() { LOG_DEBUG("+--------------------------------------------+") LOG_DEBUG("| CKMostCorrelatedTest::testMostCorrelated |") LOG_DEBUG("+--------------------------------------------+") @@ -402,14 +333,7 @@ void CKMostCorrelatedTest::testMostCorrelated() maths::CSampling::seed(); - double combinations[][2] = - { - { 1.0, 0.0 }, - { 0.9, 0.1 }, - { 0.5, 0.5 }, - { 0.1, 0.9 }, - { 0.0, 1.0 } - }; + double combinations[][2] = {{1.0, 0.0}, {0.9, 0.1}, {0.5, 0.5}, {0.1, 0.9}, {0.0, 1.0}}; test::CRandomNumbers rng; @@ -421,14 +345,10 @@ void CKMostCorrelatedTest::testMostCorrelated() CKMostCorrelatedForTest mostCorrelated(100, 0.0); mostCorrelated.addVariables((variables * boost::size(combinations)) / 2); - for (std::size_t i = 0u; i < 19; ++i) - { - for (std::size_t j = 0u, X = 0u; j < variables; j += 2) - { - for (std::size_t k = 0u; k < boost::size(combinations); ++k, ++X) - { - double x = combinations[k][0] * samples[i * variables + j] - + combinations[k][1] * samples[i * variables + j + 1]; + for (std::size_t i = 0u; i < 19; ++i) { + for (std::size_t j = 0u, X = 0u; j < variables; j += 2) { + for (std::size_t k = 0u; k < boost::size(combinations); ++k, ++X) { + double x = combinations[k][0] * samples[i * variables + j] + combinations[k][1] * samples[i * variables + j + 1]; mostCorrelated.add(X, x); } } @@ -438,15 +358,12 @@ void CKMostCorrelatedTest::testMostCorrelated() TMaxCorrelationAccumulator expected(200); for (CKMostCorrelatedForTest::TSizeVectorPackedBitVectorPrUMapCItr x = mostCorrelated.projected().begin(); x != mostCorrelated.projected().end(); - ++x) - { + ++x) { std::size_t X = x->first; CKMostCorrelatedForTest::TSizeVectorPackedBitVectorPrUMapCItr y = x; - while (++y != mostCorrelated.projected().end()) - { + while (++y != mostCorrelated.projected().end()) { std::size_t Y = y->first; - CKMostCorrelatedForTest::TCorrelation cxy(X, x->second.first, x->second.second, - Y, y->second.first, y->second.second); + CKMostCorrelatedForTest::TCorrelation cxy(X, x->second.first, x->second.second, Y, y->second.first, y->second.second); expected.add(cxy); } } @@ -456,12 +373,10 @@ void CKMostCorrelatedTest::testMostCorrelated() CKMostCorrelatedForTest::TCorrelationVec actual; mostCorrelated.mostCorrelated(actual); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expected), - core::CContainerPrinter::print(actual)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expected), core::CContainerPrinter::print(actual)); } -void CKMostCorrelatedTest::testRemoveVariables() -{ +void CKMostCorrelatedTest::testRemoveVariables() { LOG_DEBUG("+---------------------------------------------+") LOG_DEBUG("| CKMostCorrelatedTest::testRemoveVariables |") LOG_DEBUG("+---------------------------------------------+") @@ -474,32 +389,24 @@ void CKMostCorrelatedTest::testRemoveVariables() maths::CSampling::seed(); - double combinations[][2] = - { - { 0.8, 0.2 } - }; + double combinations[][2] = {{0.8, 0.2}}; test::CRandomNumbers rng; TDoubleVec samples; rng.generateUniformSamples(0.0, 10.0, 2000, samples); - for (std::size_t i = 0u; i < samples.size(); i += 10) - { - for (std::size_t j = 0u; j < 10; j += 2) - { - samples[i + j + 1] = combinations[0][0] * samples[i + j] - + combinations[0][1] * samples[i + j + 1]; + for (std::size_t i = 0u; i < samples.size(); i += 10) { + for (std::size_t j = 0u; j < 10; j += 2) { + samples[i + j + 1] = combinations[0][0] * samples[i + j] + combinations[0][1] * samples[i + j + 1]; } } CKMostCorrelatedForTest mostCorrelated(10, 0.0); mostCorrelated.addVariables(10); - for (std::size_t i = 0u; i < samples.size(); i += 10) - { - for (std::size_t j = 0u; j < 10; ++j) - { + for (std::size_t i = 0u; i < samples.size(); i += 10) { + for (std::size_t j = 0u; j < 10; ++j) { mostCorrelated.add(j, samples[i + j]); } mostCorrelated.capture(); @@ -509,21 +416,19 @@ void CKMostCorrelatedTest::testRemoveVariables() mostCorrelated.mostCorrelated(correlatedPairs); LOG_DEBUG("correlatedPairs = " << core::CContainerPrinter::print(correlatedPairs)); - std::size_t remove_[] = { 2, 5 }; + std::size_t remove_[] = {2, 5}; CKMostCorrelatedForTest::TSizeVec remove(boost::begin(remove_), boost::end(remove_)); mostCorrelated.removeVariables(remove); mostCorrelated.mostCorrelated(correlatedPairs); LOG_DEBUG("correlatedPairs = " << core::CContainerPrinter::print(correlatedPairs)); - for (std::size_t i = 0u; i < correlatedPairs.size(); ++i) - { - CPPUNIT_ASSERT(std::find(remove.begin(), remove.end(), correlatedPairs[i].first) == remove.end()); + for (std::size_t i = 0u; i < correlatedPairs.size(); ++i) { + CPPUNIT_ASSERT(std::find(remove.begin(), remove.end(), correlatedPairs[i].first) == remove.end()); CPPUNIT_ASSERT(std::find(remove.begin(), remove.end(), correlatedPairs[i].second) == remove.end()); } } -void CKMostCorrelatedTest::testAccuracy() -{ +void CKMostCorrelatedTest::testAccuracy() { LOG_DEBUG("+--------------------------------------+") LOG_DEBUG("| CKMostCorrelatedTest::testAccuracy |") LOG_DEBUG("+--------------------------------------+") @@ -535,62 +440,47 @@ void CKMostCorrelatedTest::testAccuracy() maths::CSampling::seed(); - double combinations[][2] = - { - { 0.8, 0.2 } - }; + double combinations[][2] = {{0.8, 0.2}}; test::CRandomNumbers rng; - for (std::size_t t = 0u; t < 10; ++t) - { - LOG_DEBUG("*** test = " << t+1 << " ***"); + for (std::size_t t = 0u; t < 10; ++t) { + LOG_DEBUG("*** test = " << t + 1 << " ***"); TDoubleVec samples; rng.generateUniformSamples(0.0, 10.0, 2000, samples); - - for (std::size_t i = 0u; i < samples.size(); i += 10) - { - for (std::size_t j = 0u; j < 10; j += 2) - { - samples[i + j + 1] = combinations[0][0] * samples[i + j] - + combinations[0][1] * samples[i + j + 1]; + for (std::size_t i = 0u; i < samples.size(); i += 10) { + for (std::size_t j = 0u; j < 10; j += 2) { + samples[i + j + 1] = combinations[0][0] * samples[i + j] + combinations[0][1] * samples[i + j + 1]; } } CKMostCorrelatedForTest mostCorrelated(10, 0.0); mostCorrelated.addVariables(10); - for (std::size_t i = 0u; i < samples.size(); i += 10) - { - for (std::size_t j = 0u; j < 10; ++j) - { + for (std::size_t i = 0u; i < samples.size(); i += 10) { + for (std::size_t j = 0u; j < 10; ++j) { mostCorrelated.add(j, samples[i + j]); } mostCorrelated.capture(); - if ((i + 10) % 200 == 0) - { + if ((i + 10) % 200 == 0) { CKMostCorrelatedForTest::TSizeSizePrVec correlatedPairs; mostCorrelated.mostCorrelated(correlatedPairs); TDoubleVec correlations; mostCorrelated.correlations(correlations); - LOG_DEBUG("correlatedPairs = " << core::CContainerPrinter::print(correlatedPairs.begin(), - correlatedPairs.begin() + 5)); - LOG_DEBUG("correlations = " << core::CContainerPrinter::print(correlations.begin(), - correlations.begin() + 5)); + LOG_DEBUG("correlatedPairs = " << core::CContainerPrinter::print(correlatedPairs.begin(), correlatedPairs.begin() + 5)); + LOG_DEBUG("correlations = " << core::CContainerPrinter::print(correlations.begin(), correlations.begin() + 5)); std::sort(correlatedPairs.begin(), correlatedPairs.begin() + 5); CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1), (2, 3), (4, 5), (6, 7), (8, 9)]"), - core::CContainerPrinter::print(correlatedPairs.begin(), - correlatedPairs.begin() + 5)); + core::CContainerPrinter::print(correlatedPairs.begin(), correlatedPairs.begin() + 5)); } } } } -void CKMostCorrelatedTest::testStability() -{ +void CKMostCorrelatedTest::testStability() { LOG_DEBUG("+---------------------------------------+") LOG_DEBUG("| CKMostCorrelatedTest::testStability |") LOG_DEBUG("+---------------------------------------+") @@ -604,51 +494,39 @@ void CKMostCorrelatedTest::testStability() maths::CSampling::seed(); - double combinations[][2] = - { - { 0.8, 0.2 }, - { 6.0, 4.0 } - }; + double combinations[][2] = {{0.8, 0.2}, {6.0, 4.0}}; test::CRandomNumbers rng; TDoubleVec samples; rng.generateUniformSamples(0.0, 10.0, 16000, samples); - for (std::size_t i = 0u; i < samples.size(); i += 20) - { - for (std::size_t j = 0u; j < 10; j += 2) - { - samples[i + j + 1] = combinations[0][0] * samples[i + j] - + combinations[0][1] * samples[i + j + 1]; + for (std::size_t i = 0u; i < samples.size(); i += 20) { + for (std::size_t j = 0u; j < 10; j += 2) { + samples[i + j + 1] = combinations[0][0] * samples[i + j] + combinations[0][1] * samples[i + j + 1]; } - for (std::size_t j = 10u; j < 20; j += 2) - { - samples[i + j + 1] = combinations[1][0] * samples[i + j] - + combinations[1][1] * samples[i + j + 1]; + for (std::size_t j = 10u; j < 20; j += 2) { + samples[i + j + 1] = combinations[1][0] * samples[i + j] + combinations[1][1] * samples[i + j + 1]; } } CKMostCorrelatedForTest mostCorrelated(10, 0.0); mostCorrelated.addVariables(20); - for (std::size_t i = 0u; i < samples.size(); i += 20) - { - for (std::size_t j = 0u; j < 20; ++j) - { + for (std::size_t i = 0u; i < samples.size(); i += 20) { + for (std::size_t j = 0u; j < 20; ++j) { mostCorrelated.add(j, samples[i + j]); } mostCorrelated.capture(); - if (i > 800 && (i + 20) % 400 == 0) - { + if (i > 800 && (i + 20) % 400 == 0) { CKMostCorrelatedForTest::TSizeSizePrVec correlatedPairs; mostCorrelated.mostCorrelated(correlatedPairs); TDoubleVec correlations; mostCorrelated.correlations(correlations); LOG_DEBUG("correlatedPairs = " << core::CContainerPrinter::print(correlatedPairs)); LOG_DEBUG("correlations = " << core::CContainerPrinter::print(correlations)); - std::sort(correlatedPairs.begin(), correlatedPairs.begin() + 5); + std::sort(correlatedPairs.begin(), correlatedPairs.begin() + 5); std::sort(correlatedPairs.begin() + 5, correlatedPairs.begin() + 10); CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1), (2, 3), (4, 5), (6, 7), (8, 9), " "(10, 11), (12, 13), (14, 15), (16, 17), (18, 19)]"), @@ -657,8 +535,7 @@ void CKMostCorrelatedTest::testStability() } } -void CKMostCorrelatedTest::testChangingCorrelation() -{ +void CKMostCorrelatedTest::testChangingCorrelation() { LOG_DEBUG("+-------------------------------------------------+") LOG_DEBUG("| CKMostCorrelatedTest::testChangingCorrelation |") LOG_DEBUG("+-------------------------------------------------+") @@ -671,37 +548,27 @@ void CKMostCorrelatedTest::testChangingCorrelation() maths::CSampling::seed(); - double combinations[][2] = - { - { 0.6, 0.4 } - }; + double combinations[][2] = {{0.6, 0.4}}; test::CRandomNumbers rng; TDoubleVec samples; rng.generateUniformSamples(0.0, 10.0, 4000, samples); - for (std::size_t i = 0u; i < samples.size(); i += 10) - { - for (std::size_t j = 0u; j < 8; j += 2) - { - samples[i + j + 1] = combinations[0][0] * samples[i + j] - + combinations[0][1] * samples[i + j + 1]; + for (std::size_t i = 0u; i < samples.size(); i += 10) { + for (std::size_t j = 0u; j < 8; j += 2) { + samples[i + j + 1] = combinations[0][0] * samples[i + j] + combinations[0][1] * samples[i + j + 1]; } - if (i >= samples.size() / 3) - { - samples[i + 9] = combinations[0][0] * samples[i + 8] - + combinations[0][1] * samples[i + 9]; + if (i >= samples.size() / 3) { + samples[i + 9] = combinations[0][0] * samples[i + 8] + combinations[0][1] * samples[i + 9]; } } CKMostCorrelatedForTest mostCorrelated(10, 0.0); mostCorrelated.addVariables(10); - for (std::size_t i = 0u; i < samples.size(); i += 10) - { - for (std::size_t j = 0u; j < 10; ++j) - { + for (std::size_t i = 0u; i < samples.size(); i += 10) { + for (std::size_t j = 0u; j < 10; ++j) { mostCorrelated.add(j, samples[i + j]); } mostCorrelated.capture(); @@ -709,11 +576,8 @@ void CKMostCorrelatedTest::testChangingCorrelation() LOG_DEBUG("correlations = " << core::CContainerPrinter::print(mostCorrelated.correlations())); bool present = false; - for (std::size_t i = 0u; i < mostCorrelated.correlations().size(); ++i) - { - if ( mostCorrelated.correlations()[i].s_X == 8 - && mostCorrelated.correlations()[i].s_Y == 9) - { + for (std::size_t i = 0u; i < mostCorrelated.correlations().size(); ++i) { + if (mostCorrelated.correlations()[i].s_X == 8 && mostCorrelated.correlations()[i].s_Y == 9) { CPPUNIT_ASSERT(maths::CBasicStatistics::mean(mostCorrelated.correlations()[i].s_Correlation) > 0.7); present = true; } @@ -721,8 +585,7 @@ void CKMostCorrelatedTest::testChangingCorrelation() CPPUNIT_ASSERT(present); } -void CKMostCorrelatedTest::testMissingData() -{ +void CKMostCorrelatedTest::testMissingData() { LOG_DEBUG("+-----------------------------------------+") LOG_DEBUG("| CKMostCorrelatedTest::testMissingData |") LOG_DEBUG("+-----------------------------------------+") @@ -736,38 +599,28 @@ void CKMostCorrelatedTest::testMissingData() maths::CSampling::seed(); - double combinations[][2] = - { - { 0.8, 0.2 } - }; + double combinations[][2] = {{0.8, 0.2}}; test::CRandomNumbers rng; TDoubleVec samples; rng.generateUniformSamples(0.0, 10.0, 4000, samples); - for (std::size_t i = 0u; i < samples.size(); i += 10) - { - for (std::size_t j = 0u; j < 10; j += 2) - { - samples[i + j + 1] = combinations[0][0] * samples[i + j] - + combinations[0][1] * samples[i + j + 1]; + for (std::size_t i = 0u; i < samples.size(); i += 10) { + for (std::size_t j = 0u; j < 10; j += 2) { + samples[i + j + 1] = combinations[0][0] * samples[i + j] + combinations[0][1] * samples[i + j + 1]; } } CKMostCorrelatedForTest mostCorrelated(10, 0.0); mostCorrelated.addVariables(10); - for (std::size_t i = 0u; i < samples.size(); i += 10) - { - for (std::size_t j = 0u; j < 10; ++j) - { - if (j == 4 || j == 6) - { + for (std::size_t i = 0u; i < samples.size(); i += 10) { + for (std::size_t j = 0u; j < 10; ++j) { + if (j == 4 || j == 6) { TDoubleVec test; rng.generateUniformSamples(0.0, 1.0, 1, test); - if (test[0] < 0.2) - { + if (test[0] < 0.2) { continue; } } @@ -775,16 +628,13 @@ void CKMostCorrelatedTest::testMissingData() } mostCorrelated.capture(); - if (i > 1000 && (i + 10) % 200 == 0) - { + if (i > 1000 && (i + 10) % 200 == 0) { CKMostCorrelatedForTest::TSizeSizePrVec correlatedPairs; mostCorrelated.mostCorrelated(correlatedPairs); TDoubleVec correlations; mostCorrelated.correlations(correlations); - LOG_DEBUG("correlatedPairs = " << core::CContainerPrinter::print(correlatedPairs.begin(), - correlatedPairs.begin() + 5)); - LOG_DEBUG("correlations = " << core::CContainerPrinter::print(correlations.begin(), - correlations.begin() + 5)); + LOG_DEBUG("correlatedPairs = " << core::CContainerPrinter::print(correlatedPairs.begin(), correlatedPairs.begin() + 5)); + LOG_DEBUG("correlations = " << core::CContainerPrinter::print(correlations.begin(), correlations.begin() + 5)); std::sort(correlatedPairs.begin(), correlatedPairs.begin() + 3); std::sort(correlatedPairs.begin() + 3, correlatedPairs.begin() + 5); CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1), (2, 3), (8, 9), (4, 5), (6, 7)]"), @@ -793,8 +643,7 @@ void CKMostCorrelatedTest::testMissingData() } } -void CKMostCorrelatedTest::testScale() -{ +void CKMostCorrelatedTest::testScale() { LOG_DEBUG("+-----------------------------------+") LOG_DEBUG("| CKMostCorrelatedTest::testScale |") LOG_DEBUG("+-----------------------------------+") @@ -809,25 +658,20 @@ void CKMostCorrelatedTest::testScale() test::CRandomNumbers rng; - std::size_t n[] = { 200, 400, 800, 1600, 3200 }; + std::size_t n[] = {200, 400, 800, 1600, 3200}; uint64_t elapsed[5]; - for (std::size_t s = 0u; s < boost::size(n); ++s) - { - double proportions[] = { 0.2, 0.3, 0.5 }; + for (std::size_t s = 0u; s < boost::size(n); ++s) { + double proportions[] = {0.2, 0.3, 0.5}; std::size_t b = 200; - std::size_t ns[] = - { - static_cast(static_cast(n[s] * b) * proportions[0]), - static_cast(static_cast(n[s] * b) * proportions[1]), - static_cast(static_cast(n[s] * b) * proportions[2]) - }; + std::size_t ns[] = {static_cast(static_cast(n[s] * b) * proportions[0]), + static_cast(static_cast(n[s] * b) * proportions[1]), + static_cast(static_cast(n[s] * b) * proportions[2])}; TDoubleVec scales; rng.generateUniformSamples(10.0, 40.0, n[s], scales); TSizeVec labels; - for (std::size_t i = 0; i < n[s]; ++i) - { + for (std::size_t i = 0; i < n[s]; ++i) { labels.push_back(i); } rng.random_shuffle(labels.begin(), labels.end()); @@ -840,24 +684,17 @@ void CKMostCorrelatedTest::testScale() rng.generateNormalSamples(50.0, 20.0, ns[2], normal); TDoubleVecVec samples(b, TDoubleVec(n[s])); - const TDoubleVec *samples_[] = { &uniform, &gamma, &normal }; - for (std::size_t i = 0u; i < b; ++i) - { - for (std::size_t j = 0u, l = 0u; j < 3; ++j) - { + const TDoubleVec* samples_[] = {&uniform, &gamma, &normal}; + for (std::size_t i = 0u; i < b; ++i) { + for (std::size_t j = 0u, l = 0u; j < 3; ++j) { std::size_t m = samples_[j]->size() / b; - for (std::size_t k = 0u; k < m; ++k, ++l) - { + for (std::size_t k = 0u; k < m; ++k, ++l) { samples[i][labels[l]] = scales[k] * (*samples_[j])[i * m + k]; } } } - double weights[][2] = - { - { 0.65, 0.35 }, - { 0.35, 0.65 } - }; + double weights[][2] = {{0.65, 0.35}, {0.35, 0.65}}; CKMostCorrelatedForTest mostCorrelated(n[s], 0.0); mostCorrelated.addVariables(n[s]); @@ -865,14 +702,12 @@ void CKMostCorrelatedTest::testScale() core::CStopWatch watch; watch.start(); - for (std::size_t i = 0u; i < samples.size(); ++i) - { - for (std::size_t j = 0u; j < samples[i].size(); j += 2) - { - double x = weights[0][0] * samples[i][j] + weights[0][1] * samples[i][j+1]; - double y = weights[1][0] * samples[i][j] + weights[1][1] * samples[i][j+1]; - mostCorrelated.add(j, x); - mostCorrelated.add(j+1, y); + for (std::size_t i = 0u; i < samples.size(); ++i) { + for (std::size_t j = 0u; j < samples[i].size(); j += 2) { + double x = weights[0][0] * samples[i][j] + weights[0][1] * samples[i][j + 1]; + double y = weights[1][0] * samples[i][j] + weights[1][1] * samples[i][j + 1]; + mostCorrelated.add(j, x); + mostCorrelated.add(j + 1, y); } mostCorrelated.capture(); } @@ -892,31 +727,25 @@ void CKMostCorrelatedTest::testScale() // Test that the slope is subquadratic TMeanVarAccumulator slope; - for (std::size_t i = 1u; i < boost::size(elapsed); ++i) - { - slope.add(static_cast(elapsed[i]) / static_cast(elapsed[i-1])); + for (std::size_t i = 1u; i < boost::size(elapsed); ++i) { + slope.add(static_cast(elapsed[i]) / static_cast(elapsed[i - 1])); } double exponent = std::log(maths::CBasicStatistics::mean(slope)) / std::log(2.0); LOG_DEBUG("exponent = " << exponent); - double sdRatio = std::sqrt(maths::CBasicStatistics::variance(slope)) - / maths::CBasicStatistics::mean(slope); + double sdRatio = std::sqrt(maths::CBasicStatistics::variance(slope)) / maths::CBasicStatistics::mean(slope); LOG_DEBUG("sdRatio = " << sdRatio); - if (core::CUname::nodeName().compare(0, 3, "vm-") == 0) - { + if (core::CUname::nodeName().compare(0, 3, "vm-") == 0) { // Allow more leeway on a VM as non-linearity is most likely due to the // VM stalling CPPUNIT_ASSERT(exponent < 2.0); CPPUNIT_ASSERT(sdRatio < 0.75); - } - else - { + } else { CPPUNIT_ASSERT(exponent < 1.75); CPPUNIT_ASSERT(sdRatio < 0.5); } } -void CKMostCorrelatedTest::testPersistence() -{ +void CKMostCorrelatedTest::testPersistence() { LOG_DEBUG("+-----------------------------------------+") LOG_DEBUG("| CKMostCorrelatedTest::testPersistence |") LOG_DEBUG("+-----------------------------------------+") @@ -925,32 +754,24 @@ void CKMostCorrelatedTest::testPersistence() maths::CSampling::seed(); - double combinations[][2] = - { - { 0.8, 0.2 } - }; + double combinations[][2] = {{0.8, 0.2}}; test::CRandomNumbers rng; TDoubleVec samples; rng.generateUniformSamples(0.0, 10.0, 4000, samples); - for (std::size_t i = 0u; i < samples.size(); i += 10) - { - for (std::size_t j = 0u; j < 10; j += 2) - { - samples[i + j + 1] = combinations[0][0] * samples[i + j] - + combinations[0][1] * samples[i + j + 1]; + for (std::size_t i = 0u; i < samples.size(); i += 10) { + for (std::size_t j = 0u; j < 10; j += 2) { + samples[i + j + 1] = combinations[0][0] * samples[i + j] + combinations[0][1] * samples[i + j + 1]; } } maths::CKMostCorrelated origMostCorrelated(10, 0.001); origMostCorrelated.addVariables(10); - for (std::size_t i = 0u; i < samples.size(); i += 10) - { - for (std::size_t j = 0u; j < 10; ++j) - { + for (std::size_t i = 0u; i < samples.size(); i += 10) { + for (std::size_t j = 0u; j < 10; ++j) { origMostCorrelated.add(j, samples[i + j]); } origMostCorrelated.capture(); @@ -969,15 +790,10 @@ void CKMostCorrelatedTest::testPersistence() CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); maths::CKMostCorrelated restoredMostCorrelated(10, 0.001); - CPPUNIT_ASSERT(traverser.traverseSubLevel( - boost::bind(&maths::CKMostCorrelated::acceptRestoreTraverser, - &restoredMostCorrelated, - _1))); + CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind(&maths::CKMostCorrelated::acceptRestoreTraverser, &restoredMostCorrelated, _1))); - LOG_DEBUG("orig checksum = " << origMostCorrelated.checksum() - << ", new checksum = " << restoredMostCorrelated.checksum()); - CPPUNIT_ASSERT_EQUAL(origMostCorrelated.checksum(), - restoredMostCorrelated.checksum()); + LOG_DEBUG("orig checksum = " << origMostCorrelated.checksum() << ", new checksum = " << restoredMostCorrelated.checksum()); + CPPUNIT_ASSERT_EQUAL(origMostCorrelated.checksum(), restoredMostCorrelated.checksum()); std::string newXml; core::CRapidXmlStatePersistInserter inserter("root"); @@ -987,40 +803,29 @@ void CKMostCorrelatedTest::testPersistence() CPPUNIT_ASSERT_EQUAL(origXml, newXml); } -CppUnit::Test *CKMostCorrelatedTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CKMostCorrelatedTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CKMostCorrelatedTest::testCorrelation", - &CKMostCorrelatedTest::testCorrelation) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CKMostCorrelatedTest::testNextProjection", - &CKMostCorrelatedTest::testNextProjection) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CKMostCorrelatedTest::testMostCorrelated", - &CKMostCorrelatedTest::testMostCorrelated) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CKMostCorrelatedTest::testRemoveVariables", - &CKMostCorrelatedTest::testRemoveVariables) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CKMostCorrelatedTest::testAccuracy", - &CKMostCorrelatedTest::testAccuracy) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CKMostCorrelatedTest::testStability", - &CKMostCorrelatedTest::testStability) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CKMostCorrelatedTest::testChangingCorrelation", - &CKMostCorrelatedTest::testChangingCorrelation) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CKMostCorrelatedTest::testMissingData", - &CKMostCorrelatedTest::testMissingData) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CKMostCorrelatedTest::testScale", - &CKMostCorrelatedTest::testScale) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CKMostCorrelatedTest::testPersistence", - &CKMostCorrelatedTest::testPersistence) ); +CppUnit::Test* CKMostCorrelatedTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CKMostCorrelatedTest"); + + suiteOfTests->addTest( + new CppUnit::TestCaller("CKMostCorrelatedTest::testCorrelation", &CKMostCorrelatedTest::testCorrelation)); + suiteOfTests->addTest(new CppUnit::TestCaller("CKMostCorrelatedTest::testNextProjection", + &CKMostCorrelatedTest::testNextProjection)); + suiteOfTests->addTest(new CppUnit::TestCaller("CKMostCorrelatedTest::testMostCorrelated", + &CKMostCorrelatedTest::testMostCorrelated)); + suiteOfTests->addTest(new CppUnit::TestCaller("CKMostCorrelatedTest::testRemoveVariables", + &CKMostCorrelatedTest::testRemoveVariables)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CKMostCorrelatedTest::testAccuracy", &CKMostCorrelatedTest::testAccuracy)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CKMostCorrelatedTest::testStability", &CKMostCorrelatedTest::testStability)); + suiteOfTests->addTest(new CppUnit::TestCaller("CKMostCorrelatedTest::testChangingCorrelation", + &CKMostCorrelatedTest::testChangingCorrelation)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CKMostCorrelatedTest::testMissingData", &CKMostCorrelatedTest::testMissingData)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CKMostCorrelatedTest::testScale", &CKMostCorrelatedTest::testScale)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CKMostCorrelatedTest::testPersistence", &CKMostCorrelatedTest::testPersistence)); return suiteOfTests; } diff --git a/lib/maths/unittest/CKMostCorrelatedTest.h b/lib/maths/unittest/CKMostCorrelatedTest.h index ac59991828..20579f25ec 100644 --- a/lib/maths/unittest/CKMostCorrelatedTest.h +++ b/lib/maths/unittest/CKMostCorrelatedTest.h @@ -9,21 +9,20 @@ #include -class CKMostCorrelatedTest : public CppUnit::TestFixture -{ - public: - void testCorrelation(); - void testNextProjection(); - void testMostCorrelated(); - void testRemoveVariables(); - void testAccuracy(); - void testStability(); - void testChangingCorrelation(); - void testMissingData(); - void testPersistence(); - void testScale(); +class CKMostCorrelatedTest : public CppUnit::TestFixture { +public: + void testCorrelation(); + void testNextProjection(); + void testMostCorrelated(); + void testRemoveVariables(); + void testAccuracy(); + void testStability(); + void testChangingCorrelation(); + void testMissingData(); + void testPersistence(); + void testScale(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CKMostCorrelatedTest_h diff --git a/lib/maths/unittest/CKdTreeTest.cc b/lib/maths/unittest/CKdTreeTest.cc index f3ce1189c7..f9c1550715 100644 --- a/lib/maths/unittest/CKdTreeTest.cc +++ b/lib/maths/unittest/CKdTreeTest.cc @@ -29,15 +29,13 @@ using TDoubleVector5Pr = std::pair; using TVector5Vec = std::vector; template -std::string print(const T &t) -{ +std::string print(const T& t) { std::ostringstream o; o << t; return o.str(); } -void CKdTreeTest::testBuild() -{ +void CKdTreeTest::testBuild() { LOG_DEBUG("+--------------------------+"); LOG_DEBUG("| CKdTreeTest::testBuild |"); LOG_DEBUG("+--------------------------+"); @@ -46,14 +44,12 @@ void CKdTreeTest::testBuild() test::CRandomNumbers rng; - for (std::size_t i = 0u; i < numberTests; ++i) - { + for (std::size_t i = 0u; i < numberTests; ++i) { TDoubleVec samples; rng.generateUniformSamples(-100.0, 100.0, 2 * (i + 1), samples); TVector2Vec points; - for (std::size_t j = 0u; j < samples.size(); j += 2) - { + for (std::size_t j = 0u; j < samples.size(); j += 2) { points.push_back(TVector2(&samples[j], &samples[j + 2])); } @@ -62,14 +58,12 @@ void CKdTreeTest::testBuild() CPPUNIT_ASSERT(kdTree.checkInvariants()); } - for (std::size_t i = 0u; i < numberTests; ++i) - { + for (std::size_t i = 0u; i < numberTests; ++i) { TDoubleVec samples; rng.generateUniformSamples(-100.0, 100.0, 5 * (i + 1), samples); TVector5Vec points; - for (std::size_t j = 0u; j < samples.size(); j += 5) - { + for (std::size_t j = 0u; j < samples.size(); j += 5) { points.push_back(TVector5(&samples[j], &samples[j + 5])); } @@ -79,8 +73,7 @@ void CKdTreeTest::testBuild() } } -void CKdTreeTest::testNearestNeighbour() -{ +void CKdTreeTest::testNearestNeighbour() { LOG_DEBUG("+-------------------------------------+"); LOG_DEBUG("| CKdTreeTest::testNearestNeighbour |"); LOG_DEBUG("+-------------------------------------+"); @@ -89,14 +82,12 @@ void CKdTreeTest::testNearestNeighbour() test::CRandomNumbers rng; - for (std::size_t i = 0u; i < numberTests; ++i) - { + for (std::size_t i = 0u; i < numberTests; ++i) { TDoubleVec samples; rng.generateUniformSamples(-100.0, 100.0, 2 * (i + 1), samples); TVector2Vec points; - for (std::size_t j = 0u; j < samples.size(); j += 2) - { + for (std::size_t j = 0u; j < samples.size(); j += 2) { points.push_back(TVector2(&samples[j], &samples[j + 2])); } @@ -107,52 +98,37 @@ void CKdTreeTest::testNearestNeighbour() rng.generateUniformSamples(-150.0, 150.0, 2 * 10, samples); TVector2Vec tests; - for (std::size_t j = 0u; j < samples.size(); j += 2) - { + for (std::size_t j = 0u; j < samples.size(); j += 2) { tests.push_back(TVector2(&samples[j], &samples[j + 2])); } - if (i % 10 == 0) - { + if (i % 10 == 0) { LOG_DEBUG("*** Test " << i << " ***"); } - for (std::size_t j = 0u; j < tests.size(); ++j) - { - using TMinAccumulator = maths::CBasicStatistics::COrderStatisticsStack< - TDoubleVector2Pr, 1, maths::COrderings::SFirstLess>; + for (std::size_t j = 0u; j < tests.size(); ++j) { + using TMinAccumulator = maths::CBasicStatistics::COrderStatisticsStack; TMinAccumulator expectedNearest; - for (std::size_t k = 0u; k < points.size(); ++k) - { - expectedNearest.add(TDoubleVector2Pr((tests[j] - points[k]).euclidean(), - points[k])); + for (std::size_t k = 0u; k < points.size(); ++k) { + expectedNearest.add(TDoubleVector2Pr((tests[j] - points[k]).euclidean(), points[k])); } - const TVector2 *nearest = kdTree.nearestNeighbour(tests[j]); + const TVector2* nearest = kdTree.nearestNeighbour(tests[j]); CPPUNIT_ASSERT(nearest); - if (i % 10 == 0) - { - LOG_DEBUG("Expected nearest = " << expectedNearest[0].second - << ", expected distance = " << expectedNearest[0].first); - LOG_DEBUG("Nearest = " << *nearest - << ", actual distance = " << (tests[j] - *nearest).euclidean()); + if (i % 10 == 0) { + LOG_DEBUG("Expected nearest = " << expectedNearest[0].second << ", expected distance = " << expectedNearest[0].first); + LOG_DEBUG("Nearest = " << *nearest << ", actual distance = " << (tests[j] - *nearest).euclidean()); } - CPPUNIT_ASSERT_EQUAL(print(expectedNearest[0].second), - print(*nearest)); + CPPUNIT_ASSERT_EQUAL(print(expectedNearest[0].second), print(*nearest)); } } } -CppUnit::Test *CKdTreeTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CKdTreeTest"); +CppUnit::Test* CKdTreeTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CKdTreeTest"); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CKdTreeTest::testBuild", - &CKdTreeTest::testBuild) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CKdTreeTest::testNearestNeighbour", - &CKdTreeTest::testNearestNeighbour) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CKdTreeTest::testBuild", &CKdTreeTest::testBuild)); + suiteOfTests->addTest(new CppUnit::TestCaller("CKdTreeTest::testNearestNeighbour", &CKdTreeTest::testNearestNeighbour)); return suiteOfTests; } diff --git a/lib/maths/unittest/CKdTreeTest.h b/lib/maths/unittest/CKdTreeTest.h index 32006514cd..903401b1b2 100644 --- a/lib/maths/unittest/CKdTreeTest.h +++ b/lib/maths/unittest/CKdTreeTest.h @@ -7,16 +7,14 @@ #ifndef INCLUDED_CKdTreeTest_h #define INCLUDED_CKdTreeTest_h - #include -class CKdTreeTest : public CppUnit::TestFixture -{ - public: - void testBuild(); - void testNearestNeighbour(); +class CKdTreeTest : public CppUnit::TestFixture { +public: + void testBuild(); + void testNearestNeighbour(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CKdTreeTest_h diff --git a/lib/maths/unittest/CLassoLogisticRegressionTest.cc b/lib/maths/unittest/CLassoLogisticRegressionTest.cc index 172d3c4f1d..c84aa69018 100644 --- a/lib/maths/unittest/CLassoLogisticRegressionTest.cc +++ b/lib/maths/unittest/CLassoLogisticRegressionTest.cc @@ -21,8 +21,7 @@ using namespace ml; -namespace -{ +namespace { using TDoubleVec = std::vector; using TDoubleVecVec = std::vector; @@ -31,69 +30,51 @@ using TSizeSizePrDoublePr = std::pair; using TSizeSizePrDoublePrVec = std::vector; template -void initializeMatrix(const ARRAY &x_, TDoubleVecVec &x) -{ +void initializeMatrix(const ARRAY& x_, TDoubleVecVec& x) { x.resize(boost::size(x_[0]), TDoubleVec(boost::size(x_), 0.0)); - for (std::size_t i = 0u; i < boost::size(x_); ++i) - { - for (std::size_t j = 0u; j < boost::size(x_[i]); ++j) - { + for (std::size_t i = 0u; i < boost::size(x_); ++i) { + for (std::size_t j = 0u; j < boost::size(x_[i]); ++j) { x[j][i] = x_[i][j]; } } } template -void initializeMatrix(const ARRAY &x_, TSizeSizePrDoublePrVec &x) -{ - for (std::size_t i = 0u; i < boost::size(x_); ++i) - { - for (std::size_t j = 0u; j < boost::size(x_[i]); ++j) - { - if (x_[i][j] > 0.0) - { +void initializeMatrix(const ARRAY& x_, TSizeSizePrDoublePrVec& x) { + for (std::size_t i = 0u; i < boost::size(x_); ++i) { + for (std::size_t j = 0u; j < boost::size(x_[i]); ++j) { + if (x_[i][j] > 0.0) { x.push_back(TSizeSizePrDoublePr(TSizeSizePr(j, i), x_[i][j])); } } } } -double inner(const TDoubleVec &x, const TDoubleVec &y) -{ +double inner(const TDoubleVec& x, const TDoubleVec& y) { double result = 0.0; - for (std::size_t i = 0u; i < x.size(); ++i) - { + for (std::size_t i = 0u; i < x.size(); ++i) { result += x[i] * y[i]; } return result; } -double logLikelihood(const TDoubleVecVec &x, - const TDoubleVec &y, - const TDoubleVec &lambda, - const TDoubleVec &beta) -{ +double logLikelihood(const TDoubleVecVec& x, const TDoubleVec& y, const TDoubleVec& lambda, const TDoubleVec& beta) { double result = 0.0; - for (std::size_t i = 0u; i < y.size(); ++i) - { + for (std::size_t i = 0u; i < y.size(); ++i) { double f = 0.0; - for (std::size_t j = 0u; j < beta.size(); ++j) - { + for (std::size_t j = 0u; j < beta.size(); ++j) { f += beta[j] * x[j][i]; } result -= std::log(1.0 + std::exp(-f * y[i])); } - for (std::size_t j = 0u; j < beta.size(); ++j) - { + for (std::size_t j = 0u; j < beta.size(); ++j) { result -= lambda[j] * std::fabs(beta[j]); } return result; } - } -void CLassoLogisticRegressionTest::testCyclicCoordinateDescent() -{ +void CLassoLogisticRegressionTest::testCyclicCoordinateDescent() { LOG_DEBUG("+-------------------------------------------------------------+"); LOG_DEBUG("| CLassoLogisticRegressionTest::testCyclicCoordinateDescent |"); LOG_DEBUG("+-------------------------------------------------------------+"); @@ -114,18 +95,8 @@ void CLassoLogisticRegressionTest::testCyclicCoordinateDescent() maths::lasso_logistic_regression_detail::CCyclicCoordinateDescent clg(50, 0.001); TDoubleVec lambda(2, 0.25); - double x_[][2] = - { - { 0.1, 1.0 }, - { 0.3, 1.0 }, - { 0.4, 1.0 }, - { 0.0, 1.0 }, - { 1.0, 1.0 }, - { 0.6, 1.0 }, - { 0.7, 1.0 }, - { 0.45, 1.0 } - }; - double y_[] = { -1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0, -1.0 }; + double x_[][2] = {{0.1, 1.0}, {0.3, 1.0}, {0.4, 1.0}, {0.0, 1.0}, {1.0, 1.0}, {0.6, 1.0}, {0.7, 1.0}, {0.45, 1.0}}; + double y_[] = {-1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0, -1.0}; TDoubleVecVec x; initializeMatrix(x_, x); @@ -136,36 +107,29 @@ void CLassoLogisticRegressionTest::testCyclicCoordinateDescent() TDoubleVec beta1; std::size_t numberIterations; clg.run(x, y, lambda, beta1, numberIterations); - LOG_DEBUG("dense beta = " << core::CContainerPrinter::print(beta1) - << ", numberIterations = " << numberIterations); + LOG_DEBUG("dense beta = " << core::CContainerPrinter::print(beta1) << ", numberIterations = " << numberIterations); TDoubleVec beta2; - maths::lasso_logistic_regression_detail::CSparseMatrix xs(boost::size(x_), - boost::size(x_[0]), - xs_); + maths::lasso_logistic_regression_detail::CSparseMatrix xs(boost::size(x_), boost::size(x_[0]), xs_); clg.run(xs, y, lambda, beta2, numberIterations); - LOG_DEBUG("sparse beta = " << core::CContainerPrinter::print(beta2) - << ", numberIterations = " << numberIterations); + LOG_DEBUG("sparse beta = " << core::CContainerPrinter::print(beta2) << ", numberIterations = " << numberIterations); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(beta1), - core::CContainerPrinter::print(beta2)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(beta1), core::CContainerPrinter::print(beta2)); initializeMatrix(x_, x); double ll = logLikelihood(x, y, lambda, beta1); LOG_DEBUG("log-likelihood = " << ll); double llMinusEps = 0.0; - double llPlusEps = 0.0; - for (std::size_t i = 0u; i < 10; ++i) - { + double llPlusEps = 0.0; + for (std::size_t i = 0u; i < 10; ++i) { TDoubleVec step; rng.generateUniformSamples(0.0, EPS, beta1.size(), step); TDoubleVec betaMinusEps; TDoubleVec betaPlusEps; double length = 0.0; - for (std::size_t j = 0u; j < beta1.size(); ++j) - { + for (std::size_t j = 0u; j < beta1.size(); ++j) { betaMinusEps.push_back(beta1[j] - step[j]); betaPlusEps.push_back(beta1[j] + step[j]); length += step[j] * step[j]; @@ -173,19 +137,19 @@ void CLassoLogisticRegressionTest::testCyclicCoordinateDescent() length = 2.0 * std::sqrt(length); llMinusEps += logLikelihood(x, y, lambda, betaMinusEps); - llPlusEps += logLikelihood(x, y, lambda, betaPlusEps); - LOG_DEBUG("log-likelihood minus eps = " << llMinusEps / static_cast(i+1)); - LOG_DEBUG("log-likelihood plus eps = " << llPlusEps / static_cast(i+1)); + llPlusEps += logLikelihood(x, y, lambda, betaPlusEps); + LOG_DEBUG("log-likelihood minus eps = " << llMinusEps / static_cast(i + 1)); + LOG_DEBUG("log-likelihood plus eps = " << llPlusEps / static_cast(i + 1)); double slope = (llPlusEps - llMinusEps) / length; LOG_DEBUG("slope = " << slope); CPPUNIT_ASSERT(slope < 0.015); } CPPUNIT_ASSERT(ll > llMinusEps / 10.0); - CPPUNIT_ASSERT(ll > llPlusEps / 10.0); + CPPUNIT_ASSERT(ll > llPlusEps / 10.0); } - double lambdas[] = { 2.5, 5.0, 10.0, 15.0, 20.0 }; + double lambdas[] = {2.5, 5.0, 10.0, 15.0, 20.0}; maths::lasso_logistic_regression_detail::CCyclicCoordinateDescent clg(100, 0.001); @@ -196,30 +160,25 @@ void CLassoLogisticRegressionTest::testCyclicCoordinateDescent() TDoubleVec decisionNormal; rng.generateUniformSamples(0.0, 1.0, 5, decisionNormal); double length = std::sqrt(inner(decisionNormal, decisionNormal)); - for (std::size_t j = 0u; j < decisionNormal.size(); ++j) - { + for (std::size_t j = 0u; j < decisionNormal.size(); ++j) { decisionNormal[j] /= length; } - LOG_DEBUG("decisionNormal = " - << core::CContainerPrinter::print(decisionNormal)); + LOG_DEBUG("decisionNormal = " << core::CContainerPrinter::print(decisionNormal)); TDoubleVecVec x_(6, TDoubleVec(100, 0.0)); TDoubleVec y_(100, 0.0); - for (std::size_t i = 0u; i < 100; ++i) - { + for (std::size_t i = 0u; i < 100; ++i) { TDoubleVec xi; rng.generateUniformSamples(-20.0, 20.0, 5, xi); double yi = std::sqrt(inner(decisionNormal, xi)) > decision ? 1.0 : -1.0; - for (std::size_t j = 0u; j < xi.size(); ++j) - { + for (std::size_t j = 0u; j < xi.size(); ++j) { x_[j][i] = xi[j]; } x_[5][i] = 1.0; y_[i] = yi; } - for (std::size_t k = 0u; k < boost::size(lambdas); ++k) - { + for (std::size_t k = 0u; k < boost::size(lambdas); ++k) { TDoubleVec lambda(6, lambdas[k]); TDoubleVecVec x(x_); TDoubleVec y(y_); @@ -227,19 +186,16 @@ void CLassoLogisticRegressionTest::testCyclicCoordinateDescent() TDoubleVec beta; std::size_t numberIterations; clg.run(x, y, lambda, beta, numberIterations); - LOG_DEBUG("beta = " << core::CContainerPrinter::print(beta) - << ", numberIterations = " << numberIterations); + LOG_DEBUG("beta = " << core::CContainerPrinter::print(beta) << ", numberIterations = " << numberIterations); TDoubleVec effectiveDecisionNormal; - for (std::size_t j = 0u; j < decisionNormal.size(); ++j) - { + for (std::size_t j = 0u; j < decisionNormal.size(); ++j) { effectiveDecisionNormal.push_back(beta[j]); } - double theta = std::acos(inner(effectiveDecisionNormal, decisionNormal) - / std::sqrt(inner(effectiveDecisionNormal, effectiveDecisionNormal))) - * 360.0 - / boost::math::double_constants::two_pi; + double theta = + std::acos(inner(effectiveDecisionNormal, decisionNormal) / std::sqrt(inner(effectiveDecisionNormal, effectiveDecisionNormal))) * + 360.0 / boost::math::double_constants::two_pi; LOG_DEBUG("angular error = " << theta << " deg"); CPPUNIT_ASSERT(theta < 7.5); } @@ -249,48 +205,40 @@ void CLassoLogisticRegressionTest::testCyclicCoordinateDescent() // in order as we increase lambda. } -void CLassoLogisticRegressionTest::testCyclicCoordinateDescentLargeSparse() -{ +void CLassoLogisticRegressionTest::testCyclicCoordinateDescentLargeSparse() { LOG_DEBUG("+------------------------------------------------------------------------+"); LOG_DEBUG("| CLassoLogisticRegressionTest::testCyclicCoordinateDescentLargeSparse |"); LOG_DEBUG("+------------------------------------------------------------------------+"); // TODO } -void CLassoLogisticRegressionTest::testCyclicCoordinateDescentIncremental() -{ +void CLassoLogisticRegressionTest::testCyclicCoordinateDescentIncremental() { // TODO } -void CLassoLogisticRegressionTest::testNormBasedLambda() -{ +void CLassoLogisticRegressionTest::testNormBasedLambda() { // TODO } -void CLassoLogisticRegressionTest::testCrossValidatedLambda() -{ +void CLassoLogisticRegressionTest::testCrossValidatedLambda() { // TODO } -CppUnit::Test *CLassoLogisticRegressionTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CLassoLogisticRegressionTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLassoLogisticRegressionTest::testCyclicCoordinateDescent", - &CLassoLogisticRegressionTest::testCyclicCoordinateDescent) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLassoLogisticRegressionTest::testCyclicCoordinateDescentLargeSparse", - &CLassoLogisticRegressionTest::testCyclicCoordinateDescentLargeSparse) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLassoLogisticRegressionTest::testCyclicCoordinateDescentIncremental", - &CLassoLogisticRegressionTest::testCyclicCoordinateDescentIncremental) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLassoLogisticRegressionTest::testNormBasedLambda", - &CLassoLogisticRegressionTest::testNormBasedLambda) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLassoLogisticRegressionTest::testCrossValidatedLambda", - &CLassoLogisticRegressionTest::testCrossValidatedLambda) ); +CppUnit::Test* CLassoLogisticRegressionTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CLassoLogisticRegressionTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLassoLogisticRegressionTest::testCyclicCoordinateDescent", &CLassoLogisticRegressionTest::testCyclicCoordinateDescent)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CLassoLogisticRegressionTest::testCyclicCoordinateDescentLargeSparse", + &CLassoLogisticRegressionTest::testCyclicCoordinateDescentLargeSparse)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CLassoLogisticRegressionTest::testCyclicCoordinateDescentIncremental", + &CLassoLogisticRegressionTest::testCyclicCoordinateDescentIncremental)); + suiteOfTests->addTest(new CppUnit::TestCaller("CLassoLogisticRegressionTest::testNormBasedLambda", + &CLassoLogisticRegressionTest::testNormBasedLambda)); + suiteOfTests->addTest(new CppUnit::TestCaller("CLassoLogisticRegressionTest::testCrossValidatedLambda", + &CLassoLogisticRegressionTest::testCrossValidatedLambda)); return suiteOfTests; } diff --git a/lib/maths/unittest/CLassoLogisticRegressionTest.h b/lib/maths/unittest/CLassoLogisticRegressionTest.h index ff69cf7ae7..4657fedc7b 100644 --- a/lib/maths/unittest/CLassoLogisticRegressionTest.h +++ b/lib/maths/unittest/CLassoLogisticRegressionTest.h @@ -9,16 +9,15 @@ #include -class CLassoLogisticRegressionTest : public CppUnit::TestFixture -{ - public: - void testCyclicCoordinateDescent(); - void testCyclicCoordinateDescentLargeSparse(); - void testCyclicCoordinateDescentIncremental(); - void testNormBasedLambda(); - void testCrossValidatedLambda(); +class CLassoLogisticRegressionTest : public CppUnit::TestFixture { +public: + void testCyclicCoordinateDescent(); + void testCyclicCoordinateDescentLargeSparse(); + void testCyclicCoordinateDescentIncremental(); + void testNormBasedLambda(); + void testCrossValidatedLambda(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CLassoLogisticRegressionTest_h diff --git a/lib/maths/unittest/CLinearAlgebraTest.cc b/lib/maths/unittest/CLinearAlgebraTest.cc index 9e8600dd02..0e1ae53fac 100644 --- a/lib/maths/unittest/CLinearAlgebraTest.cc +++ b/lib/maths/unittest/CLinearAlgebraTest.cc @@ -22,8 +22,7 @@ using namespace ml; using TDoubleVec = std::vector; using TDoubleVecVec = std::vector; -void CLinearAlgebraTest::testSymmetricMatrixNxN() -{ +void CLinearAlgebraTest::testSymmetricMatrixNxN() { LOG_DEBUG("+----------------------------------------------+"); LOG_DEBUG("| CLinearAlgebraTest::testSymmetricMatrixNxN |"); LOG_DEBUG("+----------------------------------------------+"); @@ -32,70 +31,57 @@ void CLinearAlgebraTest::testSymmetricMatrixNxN() { maths::CSymmetricMatrixNxN matrix; LOG_DEBUG("matrix = " << matrix); - for (std::size_t i = 0u; i < 3; ++i) - { - for (std::size_t j = 0u; j < 3; ++j) - { + for (std::size_t i = 0u; i < 3; ++i) { + for (std::size_t j = 0u; j < 3; ++j) { CPPUNIT_ASSERT_EQUAL(0.0, matrix(i, j)); } } CPPUNIT_ASSERT_EQUAL(0.0, matrix.trace()); - } + } { maths::CSymmetricMatrixNxN matrix(3.0); LOG_DEBUG("matrix = " << matrix); - for (std::size_t i = 0u; i < 3; ++i) - { - for (std::size_t j = 0u; j < 3; ++j) - { + for (std::size_t i = 0u; i < 3; ++i) { + for (std::size_t j = 0u; j < 3; ++j) { CPPUNIT_ASSERT_EQUAL(3.0, matrix(i, j)); } } CPPUNIT_ASSERT_EQUAL(9.0, matrix.trace()); } { - double m[][5] = - { - { 1.1, 2.4, 1.4, 3.7, 4.0 }, - { 2.4, 3.2, 1.8, 0.7, 1.0 }, - { 1.4, 1.8, 0.8, 4.7, 3.1 }, - { 3.7, 0.7, 4.7, 4.7, 1.1 }, - { 4.0, 1.0, 3.1, 1.1, 1.0 } - }; + double m[][5] = {{1.1, 2.4, 1.4, 3.7, 4.0}, + {2.4, 3.2, 1.8, 0.7, 1.0}, + {1.4, 1.8, 0.8, 4.7, 3.1}, + {3.7, 0.7, 4.7, 4.7, 1.1}, + {4.0, 1.0, 3.1, 1.1, 1.0}}; maths::CSymmetricMatrixNxN matrix(m); LOG_DEBUG("matrix = " << matrix); - for (std::size_t i = 0u; i < 5; ++i) - { - for (std::size_t j = 0u; j < 5; ++j) - { + for (std::size_t i = 0u; i < 5; ++i) { + for (std::size_t j = 0u; j < 5; ++j) { CPPUNIT_ASSERT_EQUAL(m[i][j], matrix(i, j)); } } CPPUNIT_ASSERT_EQUAL(10.8, matrix.trace()); } { - double v[] = { 1.0, 2.0, 3.0, 4.0 }; + double v[] = {1.0, 2.0, 3.0, 4.0}; maths::CVectorNx1 vector(v); maths::CSymmetricMatrixNxN matrix(maths::E_OuterProduct, vector); LOG_DEBUG("matrix = " << matrix); - for (std::size_t i = 0u; i < 4; ++i) - { - for (std::size_t j = 0u; j < 4; ++j) - { - CPPUNIT_ASSERT_EQUAL(static_cast((i+1) * (j+1)), matrix(i, j)); + for (std::size_t i = 0u; i < 4; ++i) { + for (std::size_t j = 0u; j < 4; ++j) { + CPPUNIT_ASSERT_EQUAL(static_cast((i + 1) * (j + 1)), matrix(i, j)); } } CPPUNIT_ASSERT_EQUAL(30.0, matrix.trace()); } { - double v[] = { 1.0, 2.0, 3.0, 4.0 }; + double v[] = {1.0, 2.0, 3.0, 4.0}; maths::CVectorNx1 vector(v); maths::CSymmetricMatrixNxN matrix(maths::E_Diagonal, vector); LOG_DEBUG("matrix = " << matrix); - for (std::size_t i = 0u; i < 4; ++i) - { - for (std::size_t j = 0u; j < 4; ++j) - { + for (std::size_t i = 0u; i < 4; ++i) { + for (std::size_t j = 0u; j < 4; ++j) { CPPUNIT_ASSERT_EQUAL(i == j ? vector(i) : 0.0, matrix(i, j)); } } @@ -105,21 +91,16 @@ void CLinearAlgebraTest::testSymmetricMatrixNxN() { LOG_DEBUG("Sum"); - double m[][5] = - { - { 1.1, 2.4, 1.4, 3.7, 4.0 }, - { 2.4, 3.2, 1.8, 0.7, 1.0 }, - { 1.4, 1.8, 0.8, 4.7, 3.1 }, - { 3.7, 0.7, 4.7, 4.7, 1.1 }, - { 4.0, 1.0, 3.1, 1.1, 1.0 } - }; + double m[][5] = {{1.1, 2.4, 1.4, 3.7, 4.0}, + {2.4, 3.2, 1.8, 0.7, 1.0}, + {1.4, 1.8, 0.8, 4.7, 3.1}, + {3.7, 0.7, 4.7, 4.7, 1.1}, + {4.0, 1.0, 3.1, 1.1, 1.0}}; maths::CSymmetricMatrixNxN matrix(m); maths::CSymmetricMatrixNxN sum = matrix + matrix; LOG_DEBUG("sum = " << sum); - for (std::size_t i = 0u; i < 5; ++i) - { - for (std::size_t j = 0u; j < 5; ++j) - { + for (std::size_t i = 0u; i < 5; ++i) { + for (std::size_t j = 0u; j < 5; ++j) { CPPUNIT_ASSERT_EQUAL(2.0 * m[i][j], sum(i, j)); } } @@ -127,26 +108,14 @@ void CLinearAlgebraTest::testSymmetricMatrixNxN() { LOG_DEBUG("Difference"); - double m1[][3] = - { - { 1.1, 0.4, 1.4 }, - { 0.4, 1.2, 1.8 }, - { 1.4, 1.8, 0.8 } - }; - double m2[][3] = - { - { 2.1, 0.3, 0.4 }, - { 0.3, 1.2, 3.8 }, - { 0.4, 3.8, 0.2 } - }; + double m1[][3] = {{1.1, 0.4, 1.4}, {0.4, 1.2, 1.8}, {1.4, 1.8, 0.8}}; + double m2[][3] = {{2.1, 0.3, 0.4}, {0.3, 1.2, 3.8}, {0.4, 3.8, 0.2}}; maths::CSymmetricMatrixNxN matrix1(m1); maths::CSymmetricMatrixNxN matrix2(m2); maths::CSymmetricMatrixNxN difference = matrix1 - matrix2; LOG_DEBUG("difference = " << difference); - for (std::size_t i = 0u; i < 3; ++i) - { - for (std::size_t j = 0u; j < 3; ++j) - { + for (std::size_t i = 0u; i < 3; ++i) { + for (std::size_t j = 0u; j < 3; ++j) { CPPUNIT_ASSERT_EQUAL(m1[i][j] - m2[i][j], difference(i, j)); } } @@ -154,39 +123,34 @@ void CLinearAlgebraTest::testSymmetricMatrixNxN() { LOG_DEBUG("Matrix Scalar Multiplication"); - double v[] = { 1.0, 2.0, 3.0, 4.0, 5.0 }; + double v[] = {1.0, 2.0, 3.0, 4.0, 5.0}; maths::CVectorNx1 vector(v); maths::CSymmetricMatrixNxN m(maths::E_OuterProduct, vector); maths::CSymmetricMatrixNxN ms = m * 3.0; LOG_DEBUG("3 * m = " << ms); - for (std::size_t i = 0u; i < 5; ++i) - { - for (std::size_t j = 0u; j < 5; ++j) - { - CPPUNIT_ASSERT_EQUAL(3.0 * static_cast((i+1) * (j+1)), ms(i, j)); + for (std::size_t i = 0u; i < 5; ++i) { + for (std::size_t j = 0u; j < 5; ++j) { + CPPUNIT_ASSERT_EQUAL(3.0 * static_cast((i + 1) * (j + 1)), ms(i, j)); } } } { LOG_DEBUG("Matrix Scalar Division"); - double v[] = { 1.0, 2.0, 3.0, 4.0, 5.0 }; + double v[] = {1.0, 2.0, 3.0, 4.0, 5.0}; maths::CVectorNx1 vector(v); maths::CSymmetricMatrixNxN m(maths::E_OuterProduct, vector); maths::CSymmetricMatrixNxN ms = m / 4.0; LOG_DEBUG("m / 4.0 = " << ms); - for (std::size_t i = 0u; i < 5; ++i) - { - for (std::size_t j = 0u; j < 5; ++j) - { - CPPUNIT_ASSERT_EQUAL(static_cast((i+1) * (j+1)) / 4.0, ms(i, j)); + for (std::size_t i = 0u; i < 5; ++i) { + for (std::size_t j = 0u; j < 5; ++j) { + CPPUNIT_ASSERT_EQUAL(static_cast((i + 1) * (j + 1)) / 4.0, ms(i, j)); } } } } -void CLinearAlgebraTest::testVectorNx1() -{ +void CLinearAlgebraTest::testVectorNx1() { LOG_DEBUG("+-------------------------------------+"); LOG_DEBUG("| CLinearAlgebraTest::testVectorNx1 |"); LOG_DEBUG("+-------------------------------------+"); @@ -195,25 +159,22 @@ void CLinearAlgebraTest::testVectorNx1() { maths::CVectorNx1 vector; LOG_DEBUG("vector = " << vector); - for (std::size_t i = 0u; i < 3; ++i) - { + for (std::size_t i = 0u; i < 3; ++i) { CPPUNIT_ASSERT_EQUAL(0.0, vector(i)); } } { maths::CVectorNx1 vector(3.0); LOG_DEBUG("vector = " << vector); - for (std::size_t i = 0u; i < 3; ++i) - { + for (std::size_t i = 0u; i < 3; ++i) { CPPUNIT_ASSERT_EQUAL(3.0, vector(i)); } } { - double v[] = { 1.1, 2.4, 1.4, 3.7, 4.0 }; + double v[] = {1.1, 2.4, 1.4, 3.7, 4.0}; maths::CVectorNx1 vector(v); LOG_DEBUG("vector = " << vector); - for (std::size_t i = 0u; i < 5; ++i) - { + for (std::size_t i = 0u; i < 5; ++i) { CPPUNIT_ASSERT_EQUAL(v[i], vector(i)); } } @@ -222,26 +183,24 @@ void CLinearAlgebraTest::testVectorNx1() { LOG_DEBUG("Sum"); - double v[] = { 1.1, 2.4, 1.4, 3.7, 4.0 }; + double v[] = {1.1, 2.4, 1.4, 3.7, 4.0}; maths::CVectorNx1 vector(v); maths::CVectorNx1 sum = vector + vector; LOG_DEBUG("vector = " << sum); - for (std::size_t i = 0u; i < 5; ++i) - { + for (std::size_t i = 0u; i < 5; ++i) { CPPUNIT_ASSERT_EQUAL(2.0 * v[i], sum(i)); } } { LOG_DEBUG("Difference"); - double v1[] = { 1.1, 0.4, 1.4 }; - double v2[] = { 2.1, 0.3, 0.4 }; + double v1[] = {1.1, 0.4, 1.4}; + double v2[] = {2.1, 0.3, 0.4}; maths::CVectorNx1 vector1(v1); maths::CVectorNx1 vector2(v2); maths::CVectorNx1 difference = vector1 - vector2; LOG_DEBUG("vector = " << difference); - for (std::size_t i = 0u; i < 3; ++i) - { + for (std::size_t i = 0u; i < 3; ++i) { CPPUNIT_ASSERT_EQUAL(v1[i] - v2[i], difference(i)); } } @@ -250,8 +209,7 @@ void CLinearAlgebraTest::testVectorNx1() Eigen::Matrix4d randomMatrix; Eigen::Vector4d randomVector; - for (std::size_t t = 0u; t < 20; ++t) - { + for (std::size_t t = 0u; t < 20; ++t) { randomMatrix.setRandom(); Eigen::Matrix4d a = randomMatrix.selfadjointView(); LOG_DEBUG("A =\n" << a); @@ -266,8 +224,7 @@ void CLinearAlgebraTest::testVectorNx1() LOG_DEBUG("y =\n" << y); maths::CVectorNx1 sy = s * y; LOG_DEBUG("Sy = " << sy); - for (std::size_t i = 0u; i < 4; ++i) - { + for (std::size_t i = 0u; i < 4; ++i) { CPPUNIT_ASSERT_EQUAL(expected(i), sy(i)); } } @@ -275,31 +232,28 @@ void CLinearAlgebraTest::testVectorNx1() { LOG_DEBUG("Vector Scalar Multiplication"); - double v[] = { 1.0, 2.0, 3.0, 4.0, 5.0 }; + double v[] = {1.0, 2.0, 3.0, 4.0, 5.0}; maths::CVectorNx1 vector(v); maths::CVectorNx1 vs = vector * 3.0; LOG_DEBUG("3 * v = " << vs); - for (std::size_t i = 0u; i < 5; ++i) - { - CPPUNIT_ASSERT_EQUAL(3.0 * static_cast((i+1)), vs(i)); + for (std::size_t i = 0u; i < 5; ++i) { + CPPUNIT_ASSERT_EQUAL(3.0 * static_cast((i + 1)), vs(i)); } } { LOG_DEBUG("Matrix Scalar Division"); - double v[] = { 1.0, 2.0, 3.0, 4.0, 5.0 }; + double v[] = {1.0, 2.0, 3.0, 4.0, 5.0}; maths::CVectorNx1 vector(v); maths::CVectorNx1 vs = vector / 4.0; LOG_DEBUG("v / 4.0 = " << vs); - for (std::size_t i = 0u; i < 5; ++i) - { - CPPUNIT_ASSERT_EQUAL(static_cast((i+1)) / 4.0, vs(i)); + for (std::size_t i = 0u; i < 5; ++i) { + CPPUNIT_ASSERT_EQUAL(static_cast((i + 1)) / 4.0, vs(i)); } } } -void CLinearAlgebraTest::testSymmetricMatrix() -{ +void CLinearAlgebraTest::testSymmetricMatrix() { LOG_DEBUG("+-------------------------------------------+"); LOG_DEBUG("| CLinearAlgebraTest::testSymmetricMatrix |"); LOG_DEBUG("+-------------------------------------------+"); @@ -310,10 +264,8 @@ void CLinearAlgebraTest::testSymmetricMatrix() LOG_DEBUG("matrix = " << matrix); CPPUNIT_ASSERT_EQUAL(std::size_t(3), matrix.rows()); CPPUNIT_ASSERT_EQUAL(std::size_t(3), matrix.columns()); - for (std::size_t i = 0u; i < 3; ++i) - { - for (std::size_t j = 0u; j < 3; ++j) - { + for (std::size_t i = 0u; i < 3; ++i) { + for (std::size_t j = 0u; j < 3; ++j) { CPPUNIT_ASSERT_EQUAL(0.0, matrix(i, j)); } } @@ -323,28 +275,21 @@ void CLinearAlgebraTest::testSymmetricMatrix() LOG_DEBUG("matrix = " << matrix); CPPUNIT_ASSERT_EQUAL(std::size_t(4), matrix.rows()); CPPUNIT_ASSERT_EQUAL(std::size_t(4), matrix.columns()); - for (std::size_t i = 0u; i < 4; ++i) - { - for (std::size_t j = 0u; j < 4; ++j) - { + for (std::size_t i = 0u; i < 4; ++i) { + for (std::size_t j = 0u; j < 4; ++j) { CPPUNIT_ASSERT_EQUAL(3.0, matrix(i, j)); } } } { - double m_[][5] = - { - { 1.1, 2.4, 1.4, 3.7, 4.0 }, - { 2.4, 3.2, 1.8, 0.7, 1.0 }, - { 1.4, 1.8, 0.8, 4.7, 3.1 }, - { 3.7, 0.7, 4.7, 4.7, 1.1 }, - { 4.0, 1.0, 3.1, 1.1, 1.0 } - }; + double m_[][5] = {{1.1, 2.4, 1.4, 3.7, 4.0}, + {2.4, 3.2, 1.8, 0.7, 1.0}, + {1.4, 1.8, 0.8, 4.7, 3.1}, + {3.7, 0.7, 4.7, 4.7, 1.1}, + {4.0, 1.0, 3.1, 1.1, 1.0}}; TDoubleVecVec m(5, TDoubleVec(5)); - for (std::size_t i = 0u; i < 5; ++i) - { - for (std::size_t j = 0u; j < 5; ++j) - { + for (std::size_t i = 0u; i < 5; ++i) { + for (std::size_t j = 0u; j < 5; ++j) { m[i][j] = m_[i][j]; } } @@ -352,32 +297,21 @@ void CLinearAlgebraTest::testSymmetricMatrix() LOG_DEBUG("matrix = " << matrix); CPPUNIT_ASSERT_EQUAL(std::size_t(5), matrix.rows()); CPPUNIT_ASSERT_EQUAL(std::size_t(5), matrix.columns()); - for (std::size_t i = 0u; i < 5; ++i) - { - for (std::size_t j = 0u; j < 5; ++j) - { + for (std::size_t i = 0u; i < 5; ++i) { + for (std::size_t j = 0u; j < 5; ++j) { CPPUNIT_ASSERT_EQUAL(m[i][j], matrix(i, j)); } } CPPUNIT_ASSERT_EQUAL(10.8, matrix.trace()); } { - double m[] = - { - 1.1, - 2.4, 3.2, - 1.4, 1.8, 0.8, - 3.7, 0.7, 4.7, 4.7, - 4.0, 1.0, 3.1, 1.1, 1.0 - }; + double m[] = {1.1, 2.4, 3.2, 1.4, 1.8, 0.8, 3.7, 0.7, 4.7, 4.7, 4.0, 1.0, 3.1, 1.1, 1.0}; maths::CSymmetricMatrix matrix(boost::begin(m), boost::end(m)); LOG_DEBUG("matrix = " << matrix); CPPUNIT_ASSERT_EQUAL(std::size_t(5), matrix.rows()); CPPUNIT_ASSERT_EQUAL(std::size_t(5), matrix.columns()); - for (std::size_t i = 0u, k = 0u; i < 5; ++i) - { - for (std::size_t j = 0u; j <= i; ++j, ++k) - { + for (std::size_t i = 0u, k = 0u; i < 5; ++i) { + for (std::size_t j = 0u; j <= i; ++j, ++k) { CPPUNIT_ASSERT_EQUAL(m[k], matrix(i, j)); CPPUNIT_ASSERT_EQUAL(m[k], matrix(j, i)); } @@ -385,32 +319,28 @@ void CLinearAlgebraTest::testSymmetricMatrix() CPPUNIT_ASSERT_EQUAL(10.8, matrix.trace()); } { - double v[] = { 1.0, 2.0, 3.0, 4.0 }; + double v[] = {1.0, 2.0, 3.0, 4.0}; maths::CVector vector(boost::begin(v), boost::end(v)); maths::CSymmetricMatrix matrix(maths::E_OuterProduct, vector); LOG_DEBUG("matrix = " << matrix); CPPUNIT_ASSERT_EQUAL(std::size_t(4), matrix.rows()); CPPUNIT_ASSERT_EQUAL(std::size_t(4), matrix.columns()); - for (std::size_t i = 0u; i < 4; ++i) - { - for (std::size_t j = 0u; j < 4; ++j) - { - CPPUNIT_ASSERT_EQUAL(static_cast((i+1) * (j+1)), matrix(i, j)); + for (std::size_t i = 0u; i < 4; ++i) { + for (std::size_t j = 0u; j < 4; ++j) { + CPPUNIT_ASSERT_EQUAL(static_cast((i + 1) * (j + 1)), matrix(i, j)); } } CPPUNIT_ASSERT_EQUAL(30.0, matrix.trace()); } { - double v[] = { 1.0, 2.0, 3.0, 4.0 }; + double v[] = {1.0, 2.0, 3.0, 4.0}; maths::CVector vector(boost::begin(v), boost::end(v)); maths::CSymmetricMatrix matrix(maths::E_Diagonal, vector); LOG_DEBUG("matrix = " << matrix); CPPUNIT_ASSERT_EQUAL(std::size_t(4), matrix.rows()); CPPUNIT_ASSERT_EQUAL(std::size_t(4), matrix.columns()); - for (std::size_t i = 0u; i < 4; ++i) - { - for (std::size_t j = 0u; j < 4; ++j) - { + for (std::size_t i = 0u; i < 4; ++i) { + for (std::size_t j = 0u; j < 4; ++j) { CPPUNIT_ASSERT_EQUAL(i == j ? vector(i) : 0.0, matrix(i, j)); } } @@ -420,21 +350,12 @@ void CLinearAlgebraTest::testSymmetricMatrix() { LOG_DEBUG("Sum"); - double m[] = - { - 1.1, - 2.4, 3.2, - 1.4, 1.8, 0.8, - 3.7, 0.7, 4.7, 4.7, - 4.0, 1.0, 3.1, 1.1, 1.0 - }; + double m[] = {1.1, 2.4, 3.2, 1.4, 1.8, 0.8, 3.7, 0.7, 4.7, 4.7, 4.0, 1.0, 3.1, 1.1, 1.0}; maths::CSymmetricMatrix matrix(boost::begin(m), boost::end(m)); maths::CSymmetricMatrix sum = matrix + matrix; LOG_DEBUG("sum = " << sum); - for (std::size_t i = 0u, k = 0u; i < 5; ++i) - { - for (std::size_t j = 0u; j <= i; ++j, ++k) - { + for (std::size_t i = 0u, k = 0u; i < 5; ++i) { + for (std::size_t j = 0u; j <= i; ++j, ++k) { CPPUNIT_ASSERT_EQUAL(2.0 * m[k], sum(i, j)); CPPUNIT_ASSERT_EQUAL(2.0 * m[k], sum(j, i)); } @@ -443,26 +364,14 @@ void CLinearAlgebraTest::testSymmetricMatrix() { LOG_DEBUG("Difference"); - double m1[] = - { - 1.1, - 0.4, 1.2, - 1.4, 1.8, 0.8 - }; - double m2[] = - { - 2.1, - 0.3, 1.2, - 0.4, 3.8, 0.2 - }; + double m1[] = {1.1, 0.4, 1.2, 1.4, 1.8, 0.8}; + double m2[] = {2.1, 0.3, 1.2, 0.4, 3.8, 0.2}; maths::CSymmetricMatrix matrix1(boost::begin(m1), boost::end(m1)); maths::CSymmetricMatrix matrix2(boost::begin(m2), boost::end(m2)); maths::CSymmetricMatrix difference = matrix1 - matrix2; LOG_DEBUG("difference = " << difference); - for (std::size_t i = 0u, k = 0u; i < 3; ++i) - { - for (std::size_t j = 0u; j <= i; ++j, ++k) - { + for (std::size_t i = 0u, k = 0u; i < 3; ++i) { + for (std::size_t j = 0u; j <= i; ++j, ++k) { CPPUNIT_ASSERT_EQUAL(m1[k] - m2[k], difference(i, j)); CPPUNIT_ASSERT_EQUAL(m1[k] - m2[k], difference(j, i)); } @@ -471,39 +380,34 @@ void CLinearAlgebraTest::testSymmetricMatrix() { LOG_DEBUG("Matrix Scalar Multiplication"); - double v[] = { 1.0, 2.0, 3.0, 4.0, 5.0 }; + double v[] = {1.0, 2.0, 3.0, 4.0, 5.0}; maths::CVector vector(boost::begin(v), boost::end(v)); maths::CSymmetricMatrix m(maths::E_OuterProduct, vector); maths::CSymmetricMatrix ms = m * 3.0; LOG_DEBUG("3 * m = " << ms); - for (std::size_t i = 0u; i < 5; ++i) - { - for (std::size_t j = 0u; j < 5; ++j) - { - CPPUNIT_ASSERT_EQUAL(3.0 * static_cast((i+1) * (j+1)), ms(i, j)); + for (std::size_t i = 0u; i < 5; ++i) { + for (std::size_t j = 0u; j < 5; ++j) { + CPPUNIT_ASSERT_EQUAL(3.0 * static_cast((i + 1) * (j + 1)), ms(i, j)); } } } { LOG_DEBUG("Matrix Scalar Division"); - double v[] = { 1.0, 2.0, 3.0, 4.0, 5.0 }; + double v[] = {1.0, 2.0, 3.0, 4.0, 5.0}; maths::CVector vector(boost::begin(v), boost::end(v)); maths::CSymmetricMatrix m(maths::E_OuterProduct, vector); maths::CSymmetricMatrix ms = m / 4.0; LOG_DEBUG("m / 4.0 = " << ms); - for (std::size_t i = 0u; i < 5; ++i) - { - for (std::size_t j = 0u; j < 5; ++j) - { - CPPUNIT_ASSERT_EQUAL(static_cast((i+1) * (j+1)) / 4.0, ms(i, j)); + for (std::size_t i = 0u; i < 5; ++i) { + for (std::size_t j = 0u; j < 5; ++j) { + CPPUNIT_ASSERT_EQUAL(static_cast((i + 1) * (j + 1)) / 4.0, ms(i, j)); } } } } -void CLinearAlgebraTest::testVector() -{ +void CLinearAlgebraTest::testVector() { LOG_DEBUG("+----------------------------------+"); LOG_DEBUG("| CLinearAlgebraTest::testVector |"); LOG_DEBUG("+----------------------------------+"); @@ -513,8 +417,7 @@ void CLinearAlgebraTest::testVector() maths::CVector vector(3); LOG_DEBUG("vector = " << vector); CPPUNIT_ASSERT_EQUAL(std::size_t(3), vector.dimension()); - for (std::size_t i = 0u; i < 3; ++i) - { + for (std::size_t i = 0u; i < 3; ++i) { CPPUNIT_ASSERT_EQUAL(0.0, vector(i)); } } @@ -522,29 +425,26 @@ void CLinearAlgebraTest::testVector() maths::CVector vector(4, 3.0); LOG_DEBUG("vector = " << vector); CPPUNIT_ASSERT_EQUAL(std::size_t(4), vector.dimension()); - for (std::size_t i = 0u; i < 4; ++i) - { + for (std::size_t i = 0u; i < 4; ++i) { CPPUNIT_ASSERT_EQUAL(3.0, vector(i)); } } { - double v_[] = { 1.1, 2.4, 1.4, 3.7, 4.0 }; + double v_[] = {1.1, 2.4, 1.4, 3.7, 4.0}; TDoubleVec v(boost::begin(v_), boost::end(v_)); maths::CVector vector(v); CPPUNIT_ASSERT_EQUAL(std::size_t(5), vector.dimension()); LOG_DEBUG("vector = " << vector); - for (std::size_t i = 0u; i < 5; ++i) - { + for (std::size_t i = 0u; i < 5; ++i) { CPPUNIT_ASSERT_EQUAL(v[i], vector(i)); } } { - double v[] = { 1.1, 2.4, 1.4, 3.7, 4.0 }; + double v[] = {1.1, 2.4, 1.4, 3.7, 4.0}; maths::CVector vector(boost::begin(v), boost::end(v)); CPPUNIT_ASSERT_EQUAL(std::size_t(5), vector.dimension()); LOG_DEBUG("vector = " << vector); - for (std::size_t i = 0u; i < 5; ++i) - { + for (std::size_t i = 0u; i < 5; ++i) { CPPUNIT_ASSERT_EQUAL(v[i], vector(i)); } } @@ -553,26 +453,24 @@ void CLinearAlgebraTest::testVector() { LOG_DEBUG("Sum"); - double v[] = { 1.1, 2.4, 1.4, 3.7, 4.0 }; + double v[] = {1.1, 2.4, 1.4, 3.7, 4.0}; maths::CVector vector(boost::begin(v), boost::end(v)); maths::CVector sum = vector + vector; LOG_DEBUG("vector = " << sum); - for (std::size_t i = 0u; i < 5; ++i) - { + for (std::size_t i = 0u; i < 5; ++i) { CPPUNIT_ASSERT_EQUAL(2.0 * v[i], sum(i)); } } { LOG_DEBUG("Difference"); - double v1[] = { 1.1, 0.4, 1.4 }; - double v2[] = { 2.1, 0.3, 0.4 }; + double v1[] = {1.1, 0.4, 1.4}; + double v2[] = {2.1, 0.3, 0.4}; maths::CVector vector1(boost::begin(v1), boost::end(v1)); maths::CVector vector2(boost::begin(v2), boost::end(v2)); maths::CVector difference = vector1 - vector2; LOG_DEBUG("vector = " << difference); - for (std::size_t i = 0u; i < 3; ++i) - { + for (std::size_t i = 0u; i < 3; ++i) { CPPUNIT_ASSERT_EQUAL(v1[i] - v2[i], difference(i)); } } @@ -581,8 +479,7 @@ void CLinearAlgebraTest::testVector() Eigen::MatrixXd randomMatrix(std::size_t(4), std::size_t(4)); Eigen::VectorXd randomVector(4); - for (std::size_t t = 0u; t < 20; ++t) - { + for (std::size_t t = 0u; t < 20; ++t) { randomMatrix.setRandom(); Eigen::MatrixXd a = randomMatrix.selfadjointView(); LOG_DEBUG("A =\n" << a); @@ -597,8 +494,7 @@ void CLinearAlgebraTest::testVector() LOG_DEBUG("y =\n" << y); maths::CVector sy = s * y; LOG_DEBUG("Sy = " << sy); - for (std::size_t i = 0u; i < 4; ++i) - { + for (std::size_t i = 0u; i < 4; ++i) { CPPUNIT_ASSERT_DOUBLES_EQUAL(expected(i), sy(i), 1e-10); } } @@ -606,97 +502,53 @@ void CLinearAlgebraTest::testVector() { LOG_DEBUG("Vector Scalar Multiplication"); - double v[] = { 1.0, 2.0, 3.0, 4.0, 5.0 }; + double v[] = {1.0, 2.0, 3.0, 4.0, 5.0}; maths::CVector vector(boost::begin(v), boost::end(v)); maths::CVector vs = vector * 3.0; LOG_DEBUG("3 * v = " << vs); - for (std::size_t i = 0u; i < 5; ++i) - { - CPPUNIT_ASSERT_EQUAL(3.0 * static_cast((i+1)), vs(i)); + for (std::size_t i = 0u; i < 5; ++i) { + CPPUNIT_ASSERT_EQUAL(3.0 * static_cast((i + 1)), vs(i)); } } { LOG_DEBUG("Matrix Scalar Division"); - double v[] = { 1.0, 2.0, 3.0, 4.0, 5.0 }; + double v[] = {1.0, 2.0, 3.0, 4.0, 5.0}; maths::CVector vector(boost::begin(v), boost::end(v)); maths::CVector vs = vector / 4.0; LOG_DEBUG("v / 4.0 = " << vs); - for (std::size_t i = 0u; i < 5; ++i) - { - CPPUNIT_ASSERT_EQUAL(static_cast((i+1)) / 4.0, vs(i)); + for (std::size_t i = 0u; i < 5; ++i) { + CPPUNIT_ASSERT_EQUAL(static_cast((i + 1)) / 4.0, vs(i)); } } } -void CLinearAlgebraTest::testNorms() -{ +void CLinearAlgebraTest::testNorms() { LOG_DEBUG("+---------------------------------+"); LOG_DEBUG("| CLinearAlgebraTest::testNorms |"); LOG_DEBUG("+---------------------------------+"); - double v[][5] = - { - { 1.0, 2.1, 3.2, 1.7, 0.1 }, - { 0.0, -2.1, 1.2, 1.9, 4.1 }, - { -1.0, 7.1, 5.2, 1.7, -0.1 }, - { -3.0, 1.1, -3.3, 1.8, 6.1 } - }; - double expectedEuclidean[] = - { - 4.30697, - 5.12543, - 9.01942, - 7.84538 - }; + double v[][5] = {{1.0, 2.1, 3.2, 1.7, 0.1}, {0.0, -2.1, 1.2, 1.9, 4.1}, {-1.0, 7.1, 5.2, 1.7, -0.1}, {-3.0, 1.1, -3.3, 1.8, 6.1}}; + double expectedEuclidean[] = {4.30697, 5.12543, 9.01942, 7.84538}; - for (std::size_t i = 0u; i < boost::size(v); ++i) - { + for (std::size_t i = 0u; i < boost::size(v); ++i) { maths::CVectorNx1 v_(v[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedEuclidean[i], v_.euclidean(), 5e-6); } - double m[][15] = - { - { 1.0, - 2.1, 3.2, - 1.7, 0.1, 4.2, - 0.3, 2.8, 4.1, 0.1, - 0.4, 1.2, 5.2, 0.2, 6.3 }, - { 0.0, - -2.1, 1.2, - 1.9, 4.1, 4.5, - -3.1, 0.0, 1.3, 7.5, - 0.2, 1.0, 4.5, 8.1, 0.3 }, - { -1.0, - 7.1, 5.2, - 1.7, -0.1, 3.2, - 1.8, -3.2, 4.2, 9.1, - 0.2, 0.4, 4.1, 7.2, 1.3 }, - { -3.0, - 1.1, -3.3, - 1.8, 6.1, -1.3, - 1.3, 4.2, 3.1, 1.9, - -2.3, 3.1, 2.4, 2.3, 1.0 } - }; - double expectedFrobenius[] = - { - 13.78550, - 18.00250, - 20.72052, - 14.80844 - }; + double m[][15] = {{1.0, 2.1, 3.2, 1.7, 0.1, 4.2, 0.3, 2.8, 4.1, 0.1, 0.4, 1.2, 5.2, 0.2, 6.3}, + {0.0, -2.1, 1.2, 1.9, 4.1, 4.5, -3.1, 0.0, 1.3, 7.5, 0.2, 1.0, 4.5, 8.1, 0.3}, + {-1.0, 7.1, 5.2, 1.7, -0.1, 3.2, 1.8, -3.2, 4.2, 9.1, 0.2, 0.4, 4.1, 7.2, 1.3}, + {-3.0, 1.1, -3.3, 1.8, 6.1, -1.3, 1.3, 4.2, 3.1, 1.9, -2.3, 3.1, 2.4, 2.3, 1.0}}; + double expectedFrobenius[] = {13.78550, 18.00250, 20.72052, 14.80844}; - for (std::size_t i = 0u; i < boost::size(m); ++i) - { - maths::CSymmetricMatrixNxN m_(boost::begin(m[i]), - boost::end(m[i])); + for (std::size_t i = 0u; i < boost::size(m); ++i) { + maths::CSymmetricMatrixNxN m_(boost::begin(m[i]), boost::end(m[i])); CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedFrobenius[i], m_.frobenius(), 5e-6); } } -void CLinearAlgebraTest::testUtils() -{ +void CLinearAlgebraTest::testUtils() { LOG_DEBUG("+---------------------------------+"); LOG_DEBUG("| CLinearAlgebraTest::testUtils |"); LOG_DEBUG("+---------------------------------+"); @@ -705,269 +557,175 @@ void CLinearAlgebraTest::testUtils() { LOG_DEBUG("Vector min, max, fabs, sqrt"); - const double v1_[] = { 1.0, 3.1, 2.2, 4.9, 12.0 }; + const double v1_[] = {1.0, 3.1, 2.2, 4.9, 12.0}; maths::CVectorNx1 v1(v1_); - const double v2_[] = { 1.5, 3.0, 2.7, 5.2, 8.0 }; + const double v2_[] = {1.5, 3.0, 2.7, 5.2, 8.0}; maths::CVectorNx1 v2(v2_); - const double v3_[] = { -1.0, 3.1, -2.2, -4.9, 12.0 }; + const double v3_[] = {-1.0, 3.1, -2.2, -4.9, 12.0}; maths::CVectorNx1 v3(v3_); { - double expected[] = { 1.0, 3.1, 2.2, 4.0, 4.0 }; + double expected[] = {1.0, 3.1, 2.2, 4.0, 4.0}; LOG_DEBUG("min(v1, 4.0) = " << maths::min(v1, 4.0)); - for (std::size_t i = 0u; i < 5; ++i) - { - CPPUNIT_ASSERT_EQUAL(expected[i], - (maths::min(v1, 4.0))(i)); + for (std::size_t i = 0u; i < 5; ++i) { + CPPUNIT_ASSERT_EQUAL(expected[i], (maths::min(v1, 4.0))(i)); } } - for (std::size_t i = 0u; i < 5; ++i) - { - CPPUNIT_ASSERT_EQUAL((maths::min(v1, 4.0))(i), - (maths::min(4.0, v1))(i)); + for (std::size_t i = 0u; i < 5; ++i) { + CPPUNIT_ASSERT_EQUAL((maths::min(v1, 4.0))(i), (maths::min(4.0, v1))(i)); } { - double expected[] = { 1.0, 3.0, 2.2, 4.9, 8.0 }; + double expected[] = {1.0, 3.0, 2.2, 4.9, 8.0}; LOG_DEBUG("min(v1, v2) = " << maths::min(v1, v2)); - for (std::size_t i = 0u; i < 5; ++i) - { - CPPUNIT_ASSERT_EQUAL(expected[i], - (maths::min(v1, v2))(i)); + for (std::size_t i = 0u; i < 5; ++i) { + CPPUNIT_ASSERT_EQUAL(expected[i], (maths::min(v1, v2))(i)); } } { - double expected[] = { 3.0, 3.1, 3.0, 4.9, 12.0 }; + double expected[] = {3.0, 3.1, 3.0, 4.9, 12.0}; LOG_DEBUG("max(v1, 3.0) = " << maths::max(v1, 3.0)); - for (std::size_t i = 0u; i < 5; ++i) - { - CPPUNIT_ASSERT_EQUAL(expected[i], - (maths::max(v1, 3.0))(i)); + for (std::size_t i = 0u; i < 5; ++i) { + CPPUNIT_ASSERT_EQUAL(expected[i], (maths::max(v1, 3.0))(i)); } } - for (std::size_t i = 0u; i < 5; ++i) - { - CPPUNIT_ASSERT_EQUAL((maths::max(v1, 3.0))(i), - (maths::max(3.0, v1))(i)); + for (std::size_t i = 0u; i < 5; ++i) { + CPPUNIT_ASSERT_EQUAL((maths::max(v1, 3.0))(i), (maths::max(3.0, v1))(i)); } { - double expected[] = { 1.5, 3.1, 2.7, 5.2, 12.0 }; + double expected[] = {1.5, 3.1, 2.7, 5.2, 12.0}; LOG_DEBUG("max(v1, v2) = " << maths::max(v1, v2)); - for (std::size_t i = 0u; i < 5; ++i) - { - CPPUNIT_ASSERT_EQUAL(expected[i], - (maths::max(v1, v2))(i)); + for (std::size_t i = 0u; i < 5; ++i) { + CPPUNIT_ASSERT_EQUAL(expected[i], (maths::max(v1, v2))(i)); } } { - double expected[] = { 1.0, std::sqrt(3.1), std::sqrt(2.2), std::sqrt(4.9), std::sqrt(12.0) }; + double expected[] = {1.0, std::sqrt(3.1), std::sqrt(2.2), std::sqrt(4.9), std::sqrt(12.0)}; LOG_DEBUG("sqrt(v1) = " << maths::sqrt(v1)); - for (std::size_t i = 0u; i < 5; ++i) - { - CPPUNIT_ASSERT_EQUAL(expected[i], - (maths::sqrt(v1))(i)); + for (std::size_t i = 0u; i < 5; ++i) { + CPPUNIT_ASSERT_EQUAL(expected[i], (maths::sqrt(v1))(i)); } } { - const double expected[] = { 1.0, 3.1, 2.2, 4.9, 12.0 }; + const double expected[] = {1.0, 3.1, 2.2, 4.9, 12.0}; LOG_DEBUG("fabs(v3) = " << maths::fabs(v3)); - for (std::size_t i = 0u; i < 5; ++i) - { - CPPUNIT_ASSERT_EQUAL(expected[i], - (maths::fabs(v3))(i)); + for (std::size_t i = 0u; i < 5; ++i) { + CPPUNIT_ASSERT_EQUAL(expected[i], (maths::fabs(v3))(i)); } } } { LOG_DEBUG("Matrix min, max, fabs, sqrt"); - double m1_[][3] = - { - { 2.1, 0.3, 0.4 }, - { 0.3, 1.2, 3.8 }, - { 0.4, 3.8, 0.2 } - }; + double m1_[][3] = {{2.1, 0.3, 0.4}, {0.3, 1.2, 3.8}, {0.4, 3.8, 0.2}}; maths::CSymmetricMatrixNxN m1(m1_); - double m2_[][3] = - { - { 1.1, 0.4, 1.4 }, - { 0.4, 1.2, 1.8 }, - { 1.4, 1.8, 0.8 } - }; + double m2_[][3] = {{1.1, 0.4, 1.4}, {0.4, 1.2, 1.8}, {1.4, 1.8, 0.8}}; maths::CSymmetricMatrixNxN m2(m2_); - double m3_[][3] = - { - { -2.1, 0.3, 0.4 }, - { 0.3, -1.2, -3.8 }, - { 0.4, -3.8, 0.2 } - }; + double m3_[][3] = {{-2.1, 0.3, 0.4}, {0.3, -1.2, -3.8}, {0.4, -3.8, 0.2}}; maths::CSymmetricMatrixNxN m3(m3_); { - double expected[][3] = - { - { 2.1, 0.3, 0.4 }, - { 0.3, 1.2, 3.0 }, - { 0.4, 3.0, 0.2 } - }; + double expected[][3] = {{2.1, 0.3, 0.4}, {0.3, 1.2, 3.0}, {0.4, 3.0, 0.2}}; LOG_DEBUG("min(m1, 3.0) = " << maths::min(m1, 3.0)); - for (std::size_t i = 0u; i < 3; ++i) - { - for (std::size_t j = 0u; j < 3; ++j) - { - CPPUNIT_ASSERT_EQUAL(expected[i][j], - (maths::min(m1, 3.0))(i, j)); + for (std::size_t i = 0u; i < 3; ++i) { + for (std::size_t j = 0u; j < 3; ++j) { + CPPUNIT_ASSERT_EQUAL(expected[i][j], (maths::min(m1, 3.0))(i, j)); } } } - for (std::size_t i = 0u; i < 3; ++i) - { - for (std::size_t j = 0u; j < 3; ++j) - { - CPPUNIT_ASSERT_EQUAL((maths::min(m1, 3.0))(i, j), - (maths::min(3.0, m1))(i, j)); + for (std::size_t i = 0u; i < 3; ++i) { + for (std::size_t j = 0u; j < 3; ++j) { + CPPUNIT_ASSERT_EQUAL((maths::min(m1, 3.0))(i, j), (maths::min(3.0, m1))(i, j)); } } { - double expected[][3] = - { - { 1.1, 0.3, 0.4 }, - { 0.3, 1.2, 1.8 }, - { 0.4, 1.8, 0.2 } - }; + double expected[][3] = {{1.1, 0.3, 0.4}, {0.3, 1.2, 1.8}, {0.4, 1.8, 0.2}}; LOG_DEBUG("min(m1, m2) = " << maths::min(m1, m2)); - for (std::size_t i = 0u; i < 3; ++i) - { - for (std::size_t j = 0u; j < 3; ++j) - { - CPPUNIT_ASSERT_EQUAL(expected[i][j], - (maths::min(m1, m2))(i, j)); + for (std::size_t i = 0u; i < 3; ++i) { + for (std::size_t j = 0u; j < 3; ++j) { + CPPUNIT_ASSERT_EQUAL(expected[i][j], (maths::min(m1, m2))(i, j)); } } } { - double expected[][3] = - { - { 2.1, 2.0, 2.0 }, - { 2.0, 2.0, 3.8 }, - { 2.0, 3.8, 2.0 } - }; + double expected[][3] = {{2.1, 2.0, 2.0}, {2.0, 2.0, 3.8}, {2.0, 3.8, 2.0}}; LOG_DEBUG("max(m1, 2.0) = " << maths::max(m1, 2.0)); - for (std::size_t i = 0u; i < 3; ++i) - { - for (std::size_t j = 0u; j < 3; ++j) - { - CPPUNIT_ASSERT_EQUAL(expected[i][j], - (maths::max(m1, 2.0))(i, j)); + for (std::size_t i = 0u; i < 3; ++i) { + for (std::size_t j = 0u; j < 3; ++j) { + CPPUNIT_ASSERT_EQUAL(expected[i][j], (maths::max(m1, 2.0))(i, j)); } } } - for (std::size_t i = 0u; i < 3; ++i) - { - for (std::size_t j = 0u; j < 3; ++j) - { - CPPUNIT_ASSERT_EQUAL((maths::max(m1, 2.0))(i, j), - (maths::max(2.0, m1))(i, j)); + for (std::size_t i = 0u; i < 3; ++i) { + for (std::size_t j = 0u; j < 3; ++j) { + CPPUNIT_ASSERT_EQUAL((maths::max(m1, 2.0))(i, j), (maths::max(2.0, m1))(i, j)); } } { - double expected[][3] = - { - { 2.1, 0.4, 1.4 }, - { 0.4, 1.2, 3.8 }, - { 1.4, 3.8, 0.8 } - }; + double expected[][3] = {{2.1, 0.4, 1.4}, {0.4, 1.2, 3.8}, {1.4, 3.8, 0.8}}; LOG_DEBUG("max(m1, m2) = " << maths::max(m1, m2)); - for (std::size_t i = 0u; i < 3; ++i) - { - for (std::size_t j = 0u; j < 3; ++j) - { - CPPUNIT_ASSERT_EQUAL(expected[i][j], - (maths::max(m1, m2))(i, j)); + for (std::size_t i = 0u; i < 3; ++i) { + for (std::size_t j = 0u; j < 3; ++j) { + CPPUNIT_ASSERT_EQUAL(expected[i][j], (maths::max(m1, m2))(i, j)); } } } { - double expected[][3] = - { - { std::sqrt(2.1), std::sqrt(0.3), std::sqrt(0.4) }, - { std::sqrt(0.3), std::sqrt(1.2), std::sqrt(3.8) }, - { std::sqrt(0.4), std::sqrt(3.8), std::sqrt(0.2) } - }; + double expected[][3] = {{std::sqrt(2.1), std::sqrt(0.3), std::sqrt(0.4)}, + {std::sqrt(0.3), std::sqrt(1.2), std::sqrt(3.8)}, + {std::sqrt(0.4), std::sqrt(3.8), std::sqrt(0.2)}}; LOG_DEBUG("sqrt(m1) = " << maths::sqrt(m1)); - for (std::size_t i = 0u; i < 3; ++i) - { - for (std::size_t j = 0u; j < 3; ++j) - { - CPPUNIT_ASSERT_EQUAL(expected[i][j], - (maths::sqrt(m1))(i, j)); + for (std::size_t i = 0u; i < 3; ++i) { + for (std::size_t j = 0u; j < 3; ++j) { + CPPUNIT_ASSERT_EQUAL(expected[i][j], (maths::sqrt(m1))(i, j)); } } } { - double expected[][3] = - { - { 2.1, 0.3, 0.4 }, - { 0.3, 1.2, 3.8 }, - { 0.4, 3.8, 0.2 } - }; + double expected[][3] = {{2.1, 0.3, 0.4}, {0.3, 1.2, 3.8}, {0.4, 3.8, 0.2}}; LOG_DEBUG("fabs(m3) = " << maths::fabs(m3)); - for (std::size_t i = 0u; i < 3; ++i) - { - for (std::size_t j = 0u; j < 3; ++j) - { - CPPUNIT_ASSERT_EQUAL(expected[i][j], - (maths::fabs(m3))(i, j)); + for (std::size_t i = 0u; i < 3; ++i) { + for (std::size_t j = 0u; j < 3; ++j) { + CPPUNIT_ASSERT_EQUAL(expected[i][j], (maths::fabs(m3))(i, j)); } } } } } -void CLinearAlgebraTest::testGaussianLogLikelihood() -{ +void CLinearAlgebraTest::testGaussianLogLikelihood() { LOG_DEBUG("+-------------------------------------------------+"); LOG_DEBUG("| CLinearAlgebraTest::testGaussianLogLikelihood |"); LOG_DEBUG("+-------------------------------------------------+"); // Test the log likelihood (expected from octave). { - const double covariance_[][4] = - { - { 10.70779, 0.14869, 1.44263, 2.26889 }, - { 0.14869, 10.70919, 2.56363, 1.87805 }, - { 1.44263, 2.56363, 11.90966, 2.44121 }, - { 2.26889, 1.87805, 2.44121, 11.53904 } - }; + const double covariance_[][4] = {{10.70779, 0.14869, 1.44263, 2.26889}, + {0.14869, 10.70919, 2.56363, 1.87805}, + {1.44263, 2.56363, 11.90966, 2.44121}, + {2.26889, 1.87805, 2.44121, 11.53904}}; maths::CSymmetricMatrixNxN covariance(covariance_); - const double x_[][4] = - { - { -1.335028, -0.222988, -0.174935, -0.480772 }, - { 0.137550, 1.286252, 0.027043, 1.349709 }, - { -0.445561, 2.390953, 0.302770, 0.084871 }, - { 0.275802, 0.408910, -2.247157, 0.196043 }, - { 0.179101, 0.177340, -0.456634, 5.314863 }, - { 0.260426, 0.325159, 1.214650, -1.267697 }, - { -0.363917, -0.422225, 0.360000, 0.401383 }, - { 1.492814, 3.257986, 0.065441, -0.187108 }, - { 1.214063, 0.067988, -0.241846, -0.425730 }, - { -0.306693, -0.188497, -1.092719, 1.288093 } - }; - - const double expected[] = - { - -8.512128, -8.569778, -8.706920, -8.700537, -9.794163, - -8.602336, -8.462027, -9.096402, -8.521042, -8.590054 - }; - - for (std::size_t i = 0u; i < boost::size(x_); ++i) - { + const double x_[][4] = {{-1.335028, -0.222988, -0.174935, -0.480772}, + {0.137550, 1.286252, 0.027043, 1.349709}, + {-0.445561, 2.390953, 0.302770, 0.084871}, + {0.275802, 0.408910, -2.247157, 0.196043}, + {0.179101, 0.177340, -0.456634, 5.314863}, + {0.260426, 0.325159, 1.214650, -1.267697}, + {-0.363917, -0.422225, 0.360000, 0.401383}, + {1.492814, 3.257986, 0.065441, -0.187108}, + {1.214063, 0.067988, -0.241846, -0.425730}, + {-0.306693, -0.188497, -1.092719, 1.288093}}; + + const double expected[] = { + -8.512128, -8.569778, -8.706920, -8.700537, -9.794163, -8.602336, -8.462027, -9.096402, -8.521042, -8.590054}; + + for (std::size_t i = 0u; i < boost::size(x_); ++i) { maths::CVectorNx1 x(x_[i]); double likelihood; CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, maths::gaussianLogLikelihood(covariance, x, likelihood)); @@ -979,40 +737,31 @@ void CLinearAlgebraTest::testGaussianLogLikelihood() // Test log likelihood singular matrix. { - double e1_[] = { 1.0, 1.0, 1.0, 1.0 }; - double e2_[] = { -1.0, 1.0, 0.0, 0.0 }; - double e3_[] = { -1.0, -1.0, 2.0, 0.0 }; - double e4_[] = { -1.0, -1.0, -1.0, 3.0 }; + double e1_[] = {1.0, 1.0, 1.0, 1.0}; + double e2_[] = {-1.0, 1.0, 0.0, 0.0}; + double e3_[] = {-1.0, -1.0, 2.0, 0.0}; + double e4_[] = {-1.0, -1.0, -1.0, 3.0}; maths::CVectorNx1 e1(e1_); maths::CVectorNx1 e2(e2_); maths::CVectorNx1 e3(e3_); maths::CVectorNx1 e4(e4_); maths::CSymmetricMatrixNxN covariance( - 10.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e1 / e1.euclidean()) - + 5.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e2 / e2.euclidean()) - + 5.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e3 / e3.euclidean())); + 10.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e1 / e1.euclidean()) + + 5.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e2 / e2.euclidean()) + + 5.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e3 / e3.euclidean())); double likelihood; CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, maths::gaussianLogLikelihood(covariance, e1, likelihood)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(-0.5 * ( 3.0 * std::log(boost::math::double_constants::two_pi) - + std::log(10.0 * 5.0 * 5.0) - + 4.0 / 10.0), - likelihood, - 1e-10); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + -0.5 * (3.0 * std::log(boost::math::double_constants::two_pi) + std::log(10.0 * 5.0 * 5.0) + 4.0 / 10.0), likelihood, 1e-10); CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, maths::gaussianLogLikelihood(covariance, e2, likelihood)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(-0.5 * ( 3.0 * std::log(boost::math::double_constants::two_pi) - + std::log(10.0 * 5.0 * 5.0) - + 2.0 / 5.0), - likelihood, - 1e-10); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + -0.5 * (3.0 * std::log(boost::math::double_constants::two_pi) + std::log(10.0 * 5.0 * 5.0) + 2.0 / 5.0), likelihood, 1e-10); CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, maths::gaussianLogLikelihood(covariance, e3, likelihood)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(-0.5 * ( 3.0 * std::log(boost::math::double_constants::two_pi) - + std::log(10.0 * 5.0 * 5.0) - + 6.0 / 5.0), - likelihood, - 1e-10); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + -0.5 * (3.0 * std::log(boost::math::double_constants::two_pi) + std::log(10.0 * 5.0 * 5.0) + 6.0 / 5.0), likelihood, 1e-10); CPPUNIT_ASSERT_EQUAL(maths_t::E_FpOverflowed, maths::gaussianLogLikelihood(covariance, e1, likelihood, false)); CPPUNIT_ASSERT(likelihood > 0.0); @@ -1022,71 +771,66 @@ void CLinearAlgebraTest::testGaussianLogLikelihood() // Construct a matrix whose eigenvalues and vectors are known. { - double e1_[] = { 1.0, 1.0, 1.0, 1.0 }; - double e2_[] = { -1.0, 1.0, 0.0, 0.0 }; - double e3_[] = { -1.0, -1.0, 2.0, 0.0 }; - double e4_[] = { -1.0, -1.0, -1.0, 3.0 }; + double e1_[] = {1.0, 1.0, 1.0, 1.0}; + double e2_[] = {-1.0, 1.0, 0.0, 0.0}; + double e3_[] = {-1.0, -1.0, 2.0, 0.0}; + double e4_[] = {-1.0, -1.0, -1.0, 3.0}; maths::CVectorNx1 e1(e1_); maths::CVectorNx1 e2(e2_); maths::CVectorNx1 e3(e3_); maths::CVectorNx1 e4(e4_); maths::CSymmetricMatrixNxN covariance( - 10.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e1 / e1.euclidean()) - + 5.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e2 / e2.euclidean()) - + 5.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e3 / e3.euclidean()) - + 2.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e4 / e4.euclidean())); + 10.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e1 / e1.euclidean()) + + 5.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e2 / e2.euclidean()) + + 5.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e3 / e3.euclidean()) + + 2.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e4 / e4.euclidean())); double likelihood; CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, maths::gaussianLogLikelihood(covariance, e1, likelihood)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(-0.5 * ( 4.0 * std::log(boost::math::double_constants::two_pi) - + std::log(10.0 * 5.0 * 5.0 * 2.0) - + 4.0 / 10.0), - likelihood, - 1e-10); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + -0.5 * (4.0 * std::log(boost::math::double_constants::two_pi) + std::log(10.0 * 5.0 * 5.0 * 2.0) + 4.0 / 10.0), + likelihood, + 1e-10); CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, maths::gaussianLogLikelihood(covariance, e2, likelihood)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(-0.5 * ( 4.0 * std::log(boost::math::double_constants::two_pi) - + std::log(10.0 * 5.0 * 5.0 * 2.0) - + 2.0 / 5.0), - likelihood, - 1e-10); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + -0.5 * (4.0 * std::log(boost::math::double_constants::two_pi) + std::log(10.0 * 5.0 * 5.0 * 2.0) + 2.0 / 5.0), + likelihood, + 1e-10); CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, maths::gaussianLogLikelihood(covariance, e3, likelihood)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(-0.5 * ( 4.0 * std::log(boost::math::double_constants::two_pi) - + std::log(10.0 * 5.0 * 5.0 * 2.0) - + 6.0 / 5.0), - likelihood, - 1e-10); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + -0.5 * (4.0 * std::log(boost::math::double_constants::two_pi) + std::log(10.0 * 5.0 * 5.0 * 2.0) + 6.0 / 5.0), + likelihood, + 1e-10); CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, maths::gaussianLogLikelihood(covariance, e4, likelihood)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(-0.5 * ( 4.0 * std::log(boost::math::double_constants::two_pi) - + std::log(10.0 * 5.0 * 5.0 * 2.0) - + 12.0 / 2.0), - likelihood, - 1e-10); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + -0.5 * (4.0 * std::log(boost::math::double_constants::two_pi) + std::log(10.0 * 5.0 * 5.0 * 2.0) + 12.0 / 2.0), + likelihood, + 1e-10); } } -void CLinearAlgebraTest::testSampleGaussian() -{ +void CLinearAlgebraTest::testSampleGaussian() { LOG_DEBUG("+------------------------------------------+"); LOG_DEBUG("| CLinearAlgebraTest::testSampleGaussian |"); LOG_DEBUG("+------------------------------------------+"); // Test singular matrix. { - double m[] = { 1.0, 2.0, 3.0, 4.0 }; + double m[] = {1.0, 2.0, 3.0, 4.0}; maths::CVectorNx1 mean(m); - double e1_[] = { 1.0, 1.0, 1.0, 1.0 }; - double e2_[] = { -1.0, 1.0, 0.0, 0.0 }; - double e3_[] = { -1.0, -1.0, 2.0, 0.0 }; - double e4_[] = { -1.0, -1.0, -1.0, 3.0 }; + double e1_[] = {1.0, 1.0, 1.0, 1.0}; + double e2_[] = {-1.0, 1.0, 0.0, 0.0}; + double e3_[] = {-1.0, -1.0, 2.0, 0.0}; + double e4_[] = {-1.0, -1.0, -1.0, 3.0}; maths::CVectorNx1 e1(e1_); maths::CVectorNx1 e2(e2_); maths::CVectorNx1 e3(e3_); maths::CVectorNx1 e4(e4_); maths::CSymmetricMatrixNxN covariance( - 10.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e1 / e1.euclidean()) - + 5.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e2 / e2.euclidean()) - + 5.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e3 / e3.euclidean())); + 10.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e1 / e1.euclidean()) + + 5.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e2 / e2.euclidean()) + + 5.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e3 / e3.euclidean())); std::vector> samples; maths::sampleGaussian(100, mean, covariance, samples); @@ -1095,8 +839,7 @@ void CLinearAlgebraTest::testSampleGaussian() maths::CBasicStatistics::SSampleCovariances covariances; - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { covariances.add(samples[i]); } @@ -1105,39 +848,34 @@ void CLinearAlgebraTest::testSampleGaussian() LOG_DEBUG("sample mean = " << maths::CBasicStatistics::mean(covariances)); LOG_DEBUG("sample covariance = " << maths::CBasicStatistics::maximumLikelihoodCovariances(covariances)); - maths::CVectorNx1 meanError = - maths::CVectorNx1(mean) - - maths::CBasicStatistics::mean(covariances); + maths::CVectorNx1 meanError = maths::CVectorNx1(mean) - maths::CBasicStatistics::mean(covariances); maths::CSymmetricMatrixNxN covarianceError = - maths::CSymmetricMatrixNxN(covariance) - - maths::CBasicStatistics::maximumLikelihoodCovariances(covariances); + maths::CSymmetricMatrixNxN(covariance) - maths::CBasicStatistics::maximumLikelihoodCovariances(covariances); - LOG_DEBUG("|error| / |mean| = " - << meanError.euclidean() / mean.euclidean()); - LOG_DEBUG("|error| / |covariance| = " - << covarianceError.frobenius() / covariance.frobenius()); + LOG_DEBUG("|error| / |mean| = " << meanError.euclidean() / mean.euclidean()); + LOG_DEBUG("|error| / |covariance| = " << covarianceError.frobenius() / covariance.frobenius()); CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, meanError.euclidean() / mean.euclidean(), 1e-10); CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, covarianceError.frobenius() / covariance.frobenius(), 0.01); } // Construct a matrix whose eigenvalues and vectors are known. { - double m[] = { 15.0, 0.0, 1.0, 5.0 }; + double m[] = {15.0, 0.0, 1.0, 5.0}; maths::CVectorNx1 mean(m); - double e1_[] = { 1.0, 1.0, 1.0, 1.0 }; - double e2_[] = { -1.0, 1.0, 0.0, 0.0 }; - double e3_[] = { -1.0, -1.0, 2.0, 0.0 }; - double e4_[] = { -1.0, -1.0, -1.0, 3.0 }; + double e1_[] = {1.0, 1.0, 1.0, 1.0}; + double e2_[] = {-1.0, 1.0, 0.0, 0.0}; + double e3_[] = {-1.0, -1.0, 2.0, 0.0}; + double e4_[] = {-1.0, -1.0, -1.0, 3.0}; maths::CVectorNx1 e1(e1_); maths::CVectorNx1 e2(e2_); maths::CVectorNx1 e3(e3_); maths::CVectorNx1 e4(e4_); maths::CSymmetricMatrixNxN covariance( - 10.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e1 / e1.euclidean()) - + 5.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e2 / e2.euclidean()) - + 5.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e3 / e3.euclidean()) - + 2.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e4 / e4.euclidean())); + 10.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e1 / e1.euclidean()) + + 5.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e2 / e2.euclidean()) + + 5.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e3 / e3.euclidean()) + + 2.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e4 / e4.euclidean())); std::vector> samples; maths::sampleGaussian(100, mean, covariance, samples); @@ -1146,8 +884,7 @@ void CLinearAlgebraTest::testSampleGaussian() maths::CBasicStatistics::SSampleCovariances covariances; - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { covariances.add(samples[i]); } @@ -1156,72 +893,39 @@ void CLinearAlgebraTest::testSampleGaussian() LOG_DEBUG("sample mean = " << maths::CBasicStatistics::mean(covariances)); LOG_DEBUG("sample covariance = " << maths::CBasicStatistics::maximumLikelihoodCovariances(covariances)); - maths::CVectorNx1 meanError = - maths::CVectorNx1(mean) - - maths::CBasicStatistics::mean(covariances); + maths::CVectorNx1 meanError = maths::CVectorNx1(mean) - maths::CBasicStatistics::mean(covariances); maths::CSymmetricMatrixNxN covarianceError = - maths::CSymmetricMatrixNxN(covariance) - - maths::CBasicStatistics::maximumLikelihoodCovariances(covariances); + maths::CSymmetricMatrixNxN(covariance) - maths::CBasicStatistics::maximumLikelihoodCovariances(covariances); - LOG_DEBUG("|error| / |mean| = " - << meanError.euclidean() / mean.euclidean()); - LOG_DEBUG("|error| / |covariance| = " - << covarianceError.frobenius() / covariance.frobenius()); + LOG_DEBUG("|error| / |mean| = " << meanError.euclidean() / mean.euclidean()); + LOG_DEBUG("|error| / |covariance| = " << covarianceError.frobenius() / covariance.frobenius()); CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, meanError.euclidean() / mean.euclidean(), 1e-10); CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, covarianceError.frobenius() / covariance.frobenius(), 0.02); } } -void CLinearAlgebraTest::testLogDeterminant() -{ +void CLinearAlgebraTest::testLogDeterminant() { LOG_DEBUG("+------------------------------------------+"); LOG_DEBUG("| CLinearAlgebraTest::testLogDeterminant |"); LOG_DEBUG("+------------------------------------------+"); // Test the determinant (expected from octave). { - const double matrices[][3][3] = - { - { { 0.25451, 0.52345, 0.61308 }, - { 0.52345, 1.19825, 1.12804 }, - { 0.61308, 1.12804, 1.78833 } }, - { { 0.83654, 0.24520, 0.80310 }, - { 0.24520, 0.38368, 0.30554 }, - { 0.80310, 0.30554, 0.78936 } }, - { { 0.73063, 0.87818, 0.85836 }, - { 0.87818, 1.50305, 1.17931 }, - { 0.85836, 1.17931, 1.05850 } }, - { { 0.38947, 0.61062, 0.34423 }, - { 0.61062, 1.60437, 0.91664 }, - { 0.34423, 0.91664, 0.52448 } }, - { { 1.79563, 1.78751, 2.17200 }, - { 1.78751, 1.83443, 2.17340 }, - { 2.17200, 2.17340, 2.62958 } }, - { { 0.57023, 0.47992, 0.71581 }, - { 0.47992, 1.09182, 0.97989 }, - { 0.71581, 0.97989, 1.32316 } }, - { { 2.31264, 0.72098, 2.38050 }, - { 0.72098, 0.28103, 0.78025 }, - { 2.38050, 0.78025, 2.49219 } }, - { { 0.83678, 0.45230, 0.74564 }, - { 0.45230, 0.26482, 0.33491 }, - { 0.74564, 0.33491, 1.29216 } }, - { { 0.84991, 0.85443, 0.36922 }, - { 0.85443, 1.12737, 0.83074 }, - { 0.36922, 0.83074, 1.01195 } }, - { { 0.27156, 0.26441, 0.29726 }, - { 0.26441, 0.32388, 0.18895 }, - { 0.29726, 0.18895, 0.47884 } } - }; - - const double expected[] = - { - 5.1523e-03, 6.7423e-04, 4.5641e-04, 1.5880e-04, 3.1654e-06, - 8.5319e-02, 2.0840e-03, 6.8008e-03, 1.4755e-02, 2.6315e-05 - }; - - for (std::size_t i = 0u; i < boost::size(matrices); ++i) - { + const double matrices[][3][3] = {{{0.25451, 0.52345, 0.61308}, {0.52345, 1.19825, 1.12804}, {0.61308, 1.12804, 1.78833}}, + {{0.83654, 0.24520, 0.80310}, {0.24520, 0.38368, 0.30554}, {0.80310, 0.30554, 0.78936}}, + {{0.73063, 0.87818, 0.85836}, {0.87818, 1.50305, 1.17931}, {0.85836, 1.17931, 1.05850}}, + {{0.38947, 0.61062, 0.34423}, {0.61062, 1.60437, 0.91664}, {0.34423, 0.91664, 0.52448}}, + {{1.79563, 1.78751, 2.17200}, {1.78751, 1.83443, 2.17340}, {2.17200, 2.17340, 2.62958}}, + {{0.57023, 0.47992, 0.71581}, {0.47992, 1.09182, 0.97989}, {0.71581, 0.97989, 1.32316}}, + {{2.31264, 0.72098, 2.38050}, {0.72098, 0.28103, 0.78025}, {2.38050, 0.78025, 2.49219}}, + {{0.83678, 0.45230, 0.74564}, {0.45230, 0.26482, 0.33491}, {0.74564, 0.33491, 1.29216}}, + {{0.84991, 0.85443, 0.36922}, {0.85443, 1.12737, 0.83074}, {0.36922, 0.83074, 1.01195}}, + {{0.27156, 0.26441, 0.29726}, {0.26441, 0.32388, 0.18895}, {0.29726, 0.18895, 0.47884}}}; + + const double expected[] = { + 5.1523e-03, 6.7423e-04, 4.5641e-04, 1.5880e-04, 3.1654e-06, 8.5319e-02, 2.0840e-03, 6.8008e-03, 1.4755e-02, 2.6315e-05}; + + for (std::size_t i = 0u; i < boost::size(matrices); ++i) { maths::CSymmetricMatrixNxN M(matrices[i]); double logDeterminant; maths::logDeterminant(M, logDeterminant); @@ -1233,64 +937,53 @@ void CLinearAlgebraTest::testLogDeterminant() // Construct a matrix whose eigenvalues and vectors are known. { - double e1_[] = { 1.0, 1.0, 1.0, 1.0 }; - double e2_[] = { -1.0, 1.0, 0.0, 0.0 }; - double e3_[] = { -1.0, -1.0, 2.0, 0.0 }; - double e4_[] = { -1.0, -1.0, -1.0, 3.0 }; + double e1_[] = {1.0, 1.0, 1.0, 1.0}; + double e2_[] = {-1.0, 1.0, 0.0, 0.0}; + double e3_[] = {-1.0, -1.0, 2.0, 0.0}; + double e4_[] = {-1.0, -1.0, -1.0, 3.0}; maths::CVectorNx1 e1(e1_); maths::CVectorNx1 e2(e2_); maths::CVectorNx1 e3(e3_); maths::CVectorNx1 e4(e4_); - maths::CSymmetricMatrixNxN M( - 10.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e1 / e1.euclidean()) - + 5.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e2 / e2.euclidean()) - + 5.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e3 / e3.euclidean()) - + 2.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e4 / e4.euclidean())); + maths::CSymmetricMatrixNxN M(10.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e1 / e1.euclidean()) + + 5.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e2 / e2.euclidean()) + + 5.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e3 / e3.euclidean()) + + 2.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e4 / e4.euclidean())); double logDeterminant; maths::logDeterminant(M, logDeterminant); CPPUNIT_ASSERT_DOUBLES_EQUAL(std::log(10.0 * 5.0 * 5.0 * 2.0), logDeterminant, 1e-10); } } -namespace -{ +namespace { template -std::string print(const MATRIX &m) -{ +std::string print(const MATRIX& m) { std::ostringstream result; result << m; return result.str(); } - } -void CLinearAlgebraTest::testProjected() -{ +void CLinearAlgebraTest::testProjected() { LOG_DEBUG("+-------------------------------------+"); LOG_DEBUG("| CLinearAlgebraTest::testProjected |"); LOG_DEBUG("+-------------------------------------+"); using TSizeVec = std::vector; - const double m[][5] = - { - { 1.2, 2.4, 1.9, 3.8, 8.3 }, - { 2.4, 1.0, 0.2, 1.6, 3.1 }, - { 1.9, 0.2, 8.1, 1.1, 0.1 }, - { 3.8, 1.6, 1.1, 3.7, 7.3 }, - { 8.3, 3.1, 0.1, 7.3, 0.9 } - }; - const double v[] = - { - 0.3, 3.4, 10.6, 0.9, 5.7 - }; + const double m[][5] = {{1.2, 2.4, 1.9, 3.8, 8.3}, + {2.4, 1.0, 0.2, 1.6, 3.1}, + {1.9, 0.2, 8.1, 1.1, 0.1}, + {3.8, 1.6, 1.1, 3.7, 7.3}, + {8.3, 3.1, 0.1, 7.3, 0.9}}; + const double v[] = {0.3, 3.4, 10.6, 0.9, 5.7}; maths::CSymmetricMatrixNxN matrix(m); maths::CVectorNx1 vector(v); { - std::size_t ss[] = { 0, 1 }; + std::size_t ss[] = {0, 1}; TSizeVec subspace(boost::begin(ss), boost::end(ss)); Eigen::MatrixXd projectedMatrix = maths::projectedMatrix(subspace, matrix); @@ -1301,7 +994,7 @@ void CLinearAlgebraTest::testProjected() CPPUNIT_ASSERT_EQUAL(std::string("0.3\n3.4"), print(projectedVector)); } { - std::size_t ss[] = { 1, 0 }; + std::size_t ss[] = {1, 0}; TSizeVec subspace(boost::begin(ss), boost::end(ss)); Eigen::MatrixXd projectedMatrix = maths::projectedMatrix(subspace, matrix); @@ -1312,7 +1005,7 @@ void CLinearAlgebraTest::testProjected() CPPUNIT_ASSERT_EQUAL(std::string("3.4\n0.3"), print(projectedVector)); } { - std::size_t ss[] = { 1, 0, 4 }; + std::size_t ss[] = {1, 0, 4}; TSizeVec subspace(boost::begin(ss), boost::end(ss)); Eigen::MatrixXd projectedMatrix = maths::projectedMatrix(subspace, matrix); @@ -1324,8 +1017,7 @@ void CLinearAlgebraTest::testProjected() } } -void CLinearAlgebraTest::testPersist() -{ +void CLinearAlgebraTest::testPersist() { LOG_DEBUG("+-----------------------------------+"); LOG_DEBUG("| CLinearAlgebraTest::testPersist |"); LOG_DEBUG("+-----------------------------------+"); @@ -1334,13 +1026,7 @@ void CLinearAlgebraTest::testPersist() // bad input produces an error. { - double matrix_[][4] = - { - { 1.0, 2.1, 1.5, 0.1 }, - { 2.1, 2.2, 3.7, 0.6 }, - { 1.5, 3.7, 0.4, 8.1 }, - { 0.1, 0.6, 8.1, 4.3 } - }; + double matrix_[][4] = {{1.0, 2.1, 1.5, 0.1}, {2.1, 2.2, 3.7, 0.6}, {1.5, 3.7, 0.4, 8.1}, {0.1, 0.6, 8.1, 4.3}}; maths::CSymmetricMatrixNxN matrix(matrix_); @@ -1351,10 +1037,8 @@ void CLinearAlgebraTest::testPersist() LOG_DEBUG("delimited = " << str); - for (std::size_t i = 0u; i < 4; ++i) - { - for (std::size_t j = 0u; j < 4; ++j) - { + for (std::size_t i = 0u; i < 4; ++i) { + for (std::size_t j = 0u; j < 4; ++j) { CPPUNIT_ASSERT_EQUAL(matrix(i, j), restoredMatrix(i, j)); } } @@ -1367,7 +1051,7 @@ void CLinearAlgebraTest::testPersist() CPPUNIT_ASSERT(!restoredMatrix.fromDelimited(bad)); } { - double vector_[] = { 11.2, 2.1, 1.5 }; + double vector_[] = {11.2, 2.1, 1.5}; maths::CVectorNx1 vector(vector_); @@ -1378,8 +1062,7 @@ void CLinearAlgebraTest::testPersist() LOG_DEBUG("delimited = " << str); - for (std::size_t i = 0u; i < 3; ++i) - { + for (std::size_t i = 0u; i < 3; ++i) { CPPUNIT_ASSERT_EQUAL(vector(i), restoredVector(i)); } @@ -1392,43 +1075,27 @@ void CLinearAlgebraTest::testPersist() } } -CppUnit::Test *CLinearAlgebraTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CLinearAlgebraTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLinearAlgebraTest::testSymmetricMatrixNxN", - &CLinearAlgebraTest::testSymmetricMatrixNxN) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLinearAlgebraTest::testVectorNx1", - &CLinearAlgebraTest::testVectorNx1) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLinearAlgebraTest::testSymmetricMatrix", - &CLinearAlgebraTest::testSymmetricMatrix) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLinearAlgebraTest::testVector", - &CLinearAlgebraTest::testVector) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLinearAlgebraTest::testNorms", - &CLinearAlgebraTest::testNorms) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLinearAlgebraTest::testUtils", - &CLinearAlgebraTest::testUtils) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLinearAlgebraTest::testGaussianLogLikelihood", - &CLinearAlgebraTest::testGaussianLogLikelihood) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLinearAlgebraTest::testSampleGaussian", - &CLinearAlgebraTest::testSampleGaussian) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLinearAlgebraTest::testLogDeterminant", - &CLinearAlgebraTest::testLogDeterminant) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLinearAlgebraTest::testProjected", - &CLinearAlgebraTest::testProjected) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLinearAlgebraTest::testPersist", - &CLinearAlgebraTest::testPersist) ); +CppUnit::Test* CLinearAlgebraTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CLinearAlgebraTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CLinearAlgebraTest::testSymmetricMatrixNxN", + &CLinearAlgebraTest::testSymmetricMatrixNxN)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CLinearAlgebraTest::testVectorNx1", &CLinearAlgebraTest::testVectorNx1)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CLinearAlgebraTest::testSymmetricMatrix", &CLinearAlgebraTest::testSymmetricMatrix)); + suiteOfTests->addTest(new CppUnit::TestCaller("CLinearAlgebraTest::testVector", &CLinearAlgebraTest::testVector)); + suiteOfTests->addTest(new CppUnit::TestCaller("CLinearAlgebraTest::testNorms", &CLinearAlgebraTest::testNorms)); + suiteOfTests->addTest(new CppUnit::TestCaller("CLinearAlgebraTest::testUtils", &CLinearAlgebraTest::testUtils)); + suiteOfTests->addTest(new CppUnit::TestCaller("CLinearAlgebraTest::testGaussianLogLikelihood", + &CLinearAlgebraTest::testGaussianLogLikelihood)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CLinearAlgebraTest::testSampleGaussian", &CLinearAlgebraTest::testSampleGaussian)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CLinearAlgebraTest::testLogDeterminant", &CLinearAlgebraTest::testLogDeterminant)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CLinearAlgebraTest::testProjected", &CLinearAlgebraTest::testProjected)); + suiteOfTests->addTest(new CppUnit::TestCaller("CLinearAlgebraTest::testPersist", &CLinearAlgebraTest::testPersist)); return suiteOfTests; } diff --git a/lib/maths/unittest/CLinearAlgebraTest.h b/lib/maths/unittest/CLinearAlgebraTest.h index a359aa75ac..37137f7a71 100644 --- a/lib/maths/unittest/CLinearAlgebraTest.h +++ b/lib/maths/unittest/CLinearAlgebraTest.h @@ -9,22 +9,21 @@ #include -class CLinearAlgebraTest : public CppUnit::TestFixture -{ - public: - void testSymmetricMatrixNxN(); - void testVectorNx1(); - void testSymmetricMatrix(); - void testVector(); - void testNorms(); - void testUtils(); - void testGaussianLogLikelihood(); - void testSampleGaussian(); - void testLogDeterminant(); - void testProjected(); - void testPersist(); +class CLinearAlgebraTest : public CppUnit::TestFixture { +public: + void testSymmetricMatrixNxN(); + void testVectorNx1(); + void testSymmetricMatrix(); + void testVector(); + void testNorms(); + void testUtils(); + void testGaussianLogLikelihood(); + void testSampleGaussian(); + void testLogDeterminant(); + void testProjected(); + void testPersist(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CLinearAlgebraTest_h diff --git a/lib/maths/unittest/CLogNormalMeanPrecConjugateTest.cc b/lib/maths/unittest/CLogNormalMeanPrecConjugateTest.cc index b26d5517ea..c1a7d6d1d9 100644 --- a/lib/maths/unittest/CLogNormalMeanPrecConjugateTest.cc +++ b/lib/maths/unittest/CLogNormalMeanPrecConjugateTest.cc @@ -34,8 +34,7 @@ using namespace ml; using namespace handy_typedefs; -namespace -{ +namespace { using TDoubleVec = std::vector; using TDoubleDoublePr = std::pair; @@ -44,17 +43,13 @@ using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumula using TMeanVarAccumulator = maths::CBasicStatistics::SSampleMeanVar::TAccumulator; using CLogNormalMeanPrecConjugate = CPriorTestInterfaceMixin; -CLogNormalMeanPrecConjugate makePrior(maths_t::EDataType dataType = maths_t::E_ContinuousData, - const double &offset = 0.0, - const double &decayRate = 0.0) -{ +CLogNormalMeanPrecConjugate +makePrior(maths_t::EDataType dataType = maths_t::E_ContinuousData, const double& offset = 0.0, const double& decayRate = 0.0) { return CLogNormalMeanPrecConjugate::nonInformativePrior(dataType, offset, decayRate, 0.0); } - } -void CLogNormalMeanPrecConjugateTest::testMultipleUpdate() -{ +void CLogNormalMeanPrecConjugateTest::testMultipleUpdate() { LOG_DEBUG("+-------------------------------------------------------+"); LOG_DEBUG("| CLogNormalMeanPrecConjugateTest::testMultipleUpdate |"); LOG_DEBUG("+-------------------------------------------------------+"); @@ -64,11 +59,7 @@ void CLogNormalMeanPrecConjugateTest::testMultipleUpdate() using TEqual = maths::CEqualWithTolerance; - const maths_t::EDataType dataTypes[] = - { - maths_t::E_IntegerData, - maths_t::E_ContinuousData - }; + const maths_t::EDataType dataTypes[] = {maths_t::E_IntegerData, maths_t::E_ContinuousData}; const double location = std::log(10.0); const double squareScale = 3.0; @@ -78,13 +69,11 @@ void CLogNormalMeanPrecConjugateTest::testMultipleUpdate() TDoubleVec samples; rng.generateLogNormalSamples(location, squareScale, 100, samples); - for (std::size_t i = 0; i < boost::size(dataTypes); ++i) - { + for (std::size_t i = 0; i < boost::size(dataTypes); ++i) { CLogNormalMeanPrecConjugate filter1(makePrior(dataTypes[i])); CLogNormalMeanPrecConjugate filter2(filter1); - for (std::size_t j = 0u; j < samples.size(); ++j) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { filter1.addSamples(TDouble1Vec(1, samples[j])); } filter2.addSamples(samples); @@ -108,22 +97,16 @@ void CLogNormalMeanPrecConjugateTest::testMultipleUpdate() TDoubleVec scaledSamples; rng.generateLogNormalSamples(scaledLocation, scaledSquareScale, 100, scaledSamples); - for (size_t i = 0; i < boost::size(dataTypes); ++i) - { + for (size_t i = 0; i < boost::size(dataTypes); ++i) { CLogNormalMeanPrecConjugate filter1(makePrior(dataTypes[i])); filter1.addSamples(samples); CLogNormalMeanPrecConjugate filter2(filter1); maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); - for (std::size_t j = 0u; j < scaledSamples.size(); ++j) - { - filter1.addSamples(weightStyle, - TDouble1Vec(1, scaledSamples[j]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 2.0))); + for (std::size_t j = 0u; j < scaledSamples.size(); ++j) { + filter1.addSamples(weightStyle, TDouble1Vec(1, scaledSamples[j]), TDouble4Vec1Vec(1, TDouble4Vec(1, 2.0))); } - filter2.addSamples(weightStyle, - scaledSamples, - TDouble4Vec1Vec(scaledSamples.size(), TDouble4Vec(1, 2.0))); + filter2.addSamples(weightStyle, scaledSamples, TDouble4Vec1Vec(scaledSamples.size(), TDouble4Vec(1, 2.0))); LOG_DEBUG(filter1.print()); LOG_DEBUG("vs"); @@ -134,16 +117,14 @@ void CLogNormalMeanPrecConjugateTest::testMultipleUpdate() // Test the count weight is equivalent to adding repeated samples. - for (size_t i = 0; i < boost::size(dataTypes); ++i) - { + for (size_t i = 0; i < boost::size(dataTypes); ++i) { CLogNormalMeanPrecConjugate filter1(makePrior(dataTypes[i])); CLogNormalMeanPrecConjugate filter2(filter1); double x = 3.0; std::size_t count = 10; - for (std::size_t j = 0u; j < count; ++j) - { + for (std::size_t j = 0u; j < count; ++j) { filter1.addSamples(TDouble1Vec(1, x)); } filter2.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), @@ -158,8 +139,7 @@ void CLogNormalMeanPrecConjugateTest::testMultipleUpdate() } } -void CLogNormalMeanPrecConjugateTest::testPropagation() -{ +void CLogNormalMeanPrecConjugateTest::testPropagation() { LOG_DEBUG("+----------------------------------------------------+"); LOG_DEBUG("| CLogNormalMeanPrecConjugateTest::testPropagation |"); LOG_DEBUG("+----------------------------------------------------+"); @@ -176,8 +156,7 @@ void CLogNormalMeanPrecConjugateTest::testPropagation() CLogNormalMeanPrecConjugate filter(makePrior(maths_t::E_ContinuousData, 0.1)); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { filter.addSamples(TDouble1Vec(1, static_cast(samples[i]))); } @@ -189,17 +168,14 @@ void CLogNormalMeanPrecConjugateTest::testPropagation() double propagatedMean = filter.normalMean(); double propagatedPrecision = filter.normalPrecision(); - LOG_DEBUG("mean = " << mean - << ", precision = " << precision - << ", propagatedMean = " << propagatedMean - << ", propagatedPrecision = " << propagatedPrecision); + LOG_DEBUG("mean = " << mean << ", precision = " << precision << ", propagatedMean = " << propagatedMean + << ", propagatedPrecision = " << propagatedPrecision); CPPUNIT_ASSERT_DOUBLES_EQUAL(mean, propagatedMean, eps); CPPUNIT_ASSERT_DOUBLES_EQUAL(precision, propagatedPrecision, eps); } -void CLogNormalMeanPrecConjugateTest::testMeanEstimation() -{ +void CLogNormalMeanPrecConjugateTest::testMeanEstimation() { LOG_DEBUG("+-------------------------------------------------------+"); LOG_DEBUG("| CLogNormalMeanPrecConjugateTest::testMeanEstimation |"); LOG_DEBUG("+-------------------------------------------------------+"); @@ -209,69 +185,55 @@ void CLogNormalMeanPrecConjugateTest::testMeanEstimation() // by checking that the true mean lies in various confidence intervals // the correct percentage of the times. - const double decayRates[] = { 0.0, 0.001, 0.01 }; + const double decayRates[] = {0.0, 0.001, 0.01}; const unsigned int nTests = 500u; - const double testIntervals[] = { 50.0, 60.0, 70.0, 80.0, 85.0, 90.0, 95.0, 99.0 }; + const double testIntervals[] = {50.0, 60.0, 70.0, 80.0, 85.0, 90.0, 95.0, 99.0}; - for (size_t i = 0; i < boost::size(decayRates); ++i) - { + for (size_t i = 0; i < boost::size(decayRates); ++i) { test::CRandomNumbers rng; - unsigned int errors[] = { 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u }; + unsigned int errors[] = {0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u}; - for (unsigned int test = 0; test < nTests; ++test) - { + for (unsigned int test = 0; test < nTests; ++test) { double location = std::log(0.5 * (test + 1)); double squareScale = 4.0; TDoubleVec samples; rng.generateLogNormalSamples(location, squareScale, 500, samples); - CLogNormalMeanPrecConjugate filter( - makePrior(maths_t::E_ContinuousData, 0.0, decayRates[i])); + CLogNormalMeanPrecConjugate filter(makePrior(maths_t::E_ContinuousData, 0.0, decayRates[i])); - for (std::size_t j = 0u; j < samples.size(); ++j) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { filter.addSamples(TDouble1Vec(1, samples[j])); filter.propagateForwardsByTime(1.0); } - for (size_t j = 0u; j < boost::size(testIntervals); ++j) - { - TDoubleDoublePr confidenceInterval = - filter.confidenceIntervalNormalMean(testIntervals[j]); - if (location < confidenceInterval.first || - location > confidenceInterval.second) - { + for (size_t j = 0u; j < boost::size(testIntervals); ++j) { + TDoubleDoublePr confidenceInterval = filter.confidenceIntervalNormalMean(testIntervals[j]); + if (location < confidenceInterval.first || location > confidenceInterval.second) { ++errors[j]; } } } - for (size_t j = 0; j < boost::size(testIntervals); ++j) - { + for (size_t j = 0; j < boost::size(testIntervals); ++j) { double interval = 100.0 * errors[j] / static_cast(nTests); - LOG_DEBUG("interval = " << interval - << ", expectedInterval = " << (100.0 - testIntervals[j])); + LOG_DEBUG("interval = " << interval << ", expectedInterval = " << (100.0 - testIntervals[j])); // If the decay rate is zero the intervals should be accurate. // Otherwise, they should be an upper bound. - if (decayRates[i] == 0.0) - { + if (decayRates[i] == 0.0) { CPPUNIT_ASSERT_DOUBLES_EQUAL(interval, (100.0 - testIntervals[j]), 4.0); - } - else - { + } else { CPPUNIT_ASSERT(interval <= (100.0 - testIntervals[j])); } } } } -void CLogNormalMeanPrecConjugateTest::testPrecisionEstimation() -{ +void CLogNormalMeanPrecConjugateTest::testPrecisionEstimation() { LOG_DEBUG("+------------------------------------------------------------+"); LOG_DEBUG("| CLogNormalMeanPrecConjugateTest::testPrecisionEstimation |"); LOG_DEBUG("+------------------------------------------------------------+"); @@ -281,19 +243,17 @@ void CLogNormalMeanPrecConjugateTest::testPrecisionEstimation() // checking that the true precision lies in various confidence intervals // the correct percentage of the times. - const double decayRates[] = { 0.0, 0.001, 0.01 }; + const double decayRates[] = {0.0, 0.001, 0.01}; const unsigned int nTests = 500u; - const double testIntervals[] = { 50.0, 60.0, 70.0, 80.0, 85.0, 90.0, 95.0, 99.0 }; + const double testIntervals[] = {50.0, 60.0, 70.0, 80.0, 85.0, 90.0, 95.0, 99.0}; - for (size_t i = 0; i < boost::size(decayRates); ++i) - { + for (size_t i = 0; i < boost::size(decayRates); ++i) { test::CRandomNumbers rng; - unsigned int errors[] = { 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u }; + unsigned int errors[] = {0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u}; - for (unsigned int test = 0; test < nTests; ++test) - { + for (unsigned int test = 0; test < nTests; ++test) { double location = 1.0; double squareScale = 0.002 * static_cast(test + 1); double precision = 1 / squareScale; @@ -301,63 +261,46 @@ void CLogNormalMeanPrecConjugateTest::testPrecisionEstimation() TDoubleVec samples; rng.generateLogNormalSamples(location, squareScale, 500, samples); - CLogNormalMeanPrecConjugate filter( - makePrior(maths_t::E_ContinuousData, 0.0, decayRates[i])); + CLogNormalMeanPrecConjugate filter(makePrior(maths_t::E_ContinuousData, 0.0, decayRates[i])); - for (std::size_t j = 0u; j < samples.size(); ++j) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { filter.addSamples(TDouble1Vec(1, samples[j])); filter.propagateForwardsByTime(1.0); } - for (size_t j = 0; j < boost::size(testIntervals); ++j) - { - TDoubleDoublePr confidenceInterval = - filter.confidenceIntervalNormalPrecision(testIntervals[j]); + for (size_t j = 0; j < boost::size(testIntervals); ++j) { + TDoubleDoublePr confidenceInterval = filter.confidenceIntervalNormalPrecision(testIntervals[j]); - if (precision < confidenceInterval.first || - precision > confidenceInterval.second) - { + if (precision < confidenceInterval.first || precision > confidenceInterval.second) { ++errors[j]; } } } - for (size_t j = 0; j < boost::size(testIntervals); ++j) - { + for (size_t j = 0; j < boost::size(testIntervals); ++j) { double interval = 100.0 * errors[j] / static_cast(nTests); - LOG_DEBUG("interval = " << interval - << ", expectedInterval = " << (100.0 - testIntervals[j])); + LOG_DEBUG("interval = " << interval << ", expectedInterval = " << (100.0 - testIntervals[j])); // If the decay rate is zero the intervals should be accurate. // Otherwise, they should be an upper bound. - if (decayRates[i] == 0.0) - { + if (decayRates[i] == 0.0) { CPPUNIT_ASSERT_DOUBLES_EQUAL(interval, (100.0 - testIntervals[j]), 4.0); - } - else - { + } else { CPPUNIT_ASSERT(interval <= (100.0 - testIntervals[j])); } } } } -void CLogNormalMeanPrecConjugateTest::testMarginalLikelihood() -{ +void CLogNormalMeanPrecConjugateTest::testMarginalLikelihood() { LOG_DEBUG("+-----------------------------------------------------------+"); LOG_DEBUG("| CLogNormalMeanPrecConjugateTest::testMarginalLikelihood |"); LOG_DEBUG("+-----------------------------------------------------------+"); // Check that the c.d.f. <= 1 at extreme. - maths_t::EDataType dataTypes[] = - { - maths_t::E_ContinuousData, - maths_t::E_IntegerData - }; - for (std::size_t t = 0u; t < boost::size(dataTypes); ++t) - { + maths_t::EDataType dataTypes[] = {maths_t::E_ContinuousData, maths_t::E_IntegerData}; + for (std::size_t t = 0u; t < boost::size(dataTypes); ++t) { CLogNormalMeanPrecConjugate filter(makePrior(dataTypes[t])); const double location = 1.0; @@ -369,23 +312,18 @@ void CLogNormalMeanPrecConjugateTest::testMarginalLikelihood() rng.generateLogNormalSamples(location, squareScale, 200, samples); filter.addSamples(samples); - maths_t::ESampleWeightStyle weightStyles[] = - { - maths_t::E_SampleCountWeight, - maths_t::E_SampleWinsorisationWeight, - maths_t::E_SampleCountWeight - }; - double weights[] = { 0.1, 1.0, 10.0 }; + maths_t::ESampleWeightStyle weightStyles[] = { + maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight, maths_t::E_SampleCountWeight}; + double weights[] = {0.1, 1.0, 10.0}; - for (std::size_t i = 0u; i < boost::size(weightStyles); ++i) - { - for (std::size_t j = 0u; j < boost::size(weights); ++j) - { + for (std::size_t i = 0u; i < boost::size(weightStyles); ++i) { + for (std::size_t j = 0u; j < boost::size(weights); ++j) { double lb, ub; filter.minusLogJointCdf(maths_t::TWeightStyleVec(1, weightStyles[i]), TDouble1Vec(1, 10000.0), TDouble4Vec1Vec(1, TDouble4Vec(1, weights[j])), - lb, ub); + lb, + ub); LOG_DEBUG("-log(c.d.f) = " << (lb + ub) / 2.0); CPPUNIT_ASSERT(lb >= 0.0); CPPUNIT_ASSERT(ub >= 0.0); @@ -397,28 +335,24 @@ void CLogNormalMeanPrecConjugateTest::testMarginalLikelihood() // test data and that the c.d.f. <= 1 and that the expected value // of the log likelihood tends to the differential entropy. - const double decayRates[] = { 0.0, 0.001, 0.01 }; + const double decayRates[] = {0.0, 0.001, 0.01}; const double location = 3.0; const double squareScale = 1.0; - unsigned int numberSamples[] = { 2u, 10u, 500u }; + unsigned int numberSamples[] = {2u, 10u, 500u}; const double tolerance = 1e-3; test::CRandomNumbers rng; - for (size_t i = 0; i < boost::size(numberSamples); ++i) - { + for (size_t i = 0; i < boost::size(numberSamples); ++i) { TDoubleVec samples; rng.generateLogNormalSamples(location, squareScale, numberSamples[i], samples); - for (size_t j = 0; j < boost::size(decayRates); ++j) - { - CLogNormalMeanPrecConjugate filter( - makePrior(maths_t::E_ContinuousData, 0.0, decayRates[j])); + for (size_t j = 0; j < boost::size(decayRates); ++j) { + CLogNormalMeanPrecConjugate filter(makePrior(maths_t::E_ContinuousData, 0.0, decayRates[j])); - for (std::size_t k = 0u; k < samples.size(); ++k) - { + for (std::size_t k = 0u; k < samples.size(); ++k) { filter.addSamples(TDouble1Vec(1, samples[k])); filter.propagateForwardsByTime(1.0); } @@ -427,22 +361,16 @@ void CLogNormalMeanPrecConjugateTest::testMarginalLikelihood() // c.d.f. at a range of deltas from the true location. const double eps = 1e-4; - double deltas[] = - { - -5.0, -4.0, -3.0, -2.0, -1.0, -0.5, 0.0, 0.5, 1.0, 2.0, 3.0, 4.0, 5.0 - }; + double deltas[] = {-5.0, -4.0, -3.0, -2.0, -1.0, -0.5, 0.0, 0.5, 1.0, 2.0, 3.0, 4.0, 5.0}; - for (size_t k = 0; k < boost::size(deltas); ++k) - { + for (size_t k = 0; k < boost::size(deltas); ++k) { double x = std::exp(location + deltas[k] * std::sqrt(squareScale)); TDouble1Vec sample(1, x); - LOG_DEBUG("number = " << numberSamples[i] - << ", sample = " << sample[0]); + LOG_DEBUG("number = " << numberSamples[i] << ", sample = " << sample[0]); double logLikelihood = 0.0; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, - filter.jointLogMarginalLikelihood(sample, logLikelihood)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter.jointLogMarginalLikelihood(sample, logLikelihood)); double pdf = std::exp(logLikelihood); double lowerBound = 0.0, upperBound = 0.0; @@ -488,32 +416,23 @@ void CLogNormalMeanPrecConjugateTest::testMarginalLikelihood() TDoubleVec samples; rng.generateLogNormalSamples(location, squareScale, 100000, samples); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { TDouble1Vec sample(1, samples[i]); filter.addSamples(sample); double logLikelihood = 0.0; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, - filter.jointLogMarginalLikelihood(sample, logLikelihood)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter.jointLogMarginalLikelihood(sample, logLikelihood)); differentialEntropy -= logLikelihood; } differentialEntropy /= static_cast(samples.size()); - LOG_DEBUG("differentialEntropy = " << differentialEntropy - << ", expectedDifferentialEntropy = " << expectedDifferentialEntropy); + LOG_DEBUG("differentialEntropy = " << differentialEntropy << ", expectedDifferentialEntropy = " << expectedDifferentialEntropy); CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedDifferentialEntropy, differentialEntropy, 5e-3); } { - const double varianceScales[] = - { - 0.1, 0.2, 0.3, 0.4, 0.5, - 0.6, 0.7, 0.8, 0.9, 1.0, - 1.2, 1.5, 2.0, 2.5, 3.0, - 4.0, 5.0 - }; + const double varianceScales[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.2, 1.5, 2.0, 2.5, 3.0, 4.0, 5.0}; boost::math::lognormal_distribution<> logNormal(location, std::sqrt(squareScale)); CLogNormalMeanPrecConjugate filter(makePrior()); @@ -521,24 +440,20 @@ void CLogNormalMeanPrecConjugateTest::testMarginalLikelihood() rng.generateLogNormalSamples(location, squareScale, 1000, samples); filter.addSamples(samples); - const double percentages[] = - { - 5.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 95.0 - }; + const double percentages[] = {5.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 95.0}; { // Test that marginal likelihood confidence intervals are // what we'd expect for various variance scales. TMeanAccumulator error; - for (std::size_t i = 0u; i < boost::size(percentages); ++i) - { + for (std::size_t i = 0u; i < boost::size(percentages); ++i) { double q1, q2; filter.marginalLikelihoodQuantileForTest(50.0 - percentages[i] / 2.0, 1e-3, q1); filter.marginalLikelihoodQuantileForTest(50.0 + percentages[i] / 2.0, 1e-3, q2); TDoubleDoublePr interval = filter.marginalLikelihoodConfidenceInterval(percentages[i]); LOG_DEBUG("[q1, q2] = [" << q1 << ", " << q2 << "]" - << ", interval = " << core::CContainerPrinter::print(interval)); + << ", interval = " << core::CContainerPrinter::print(interval)); CPPUNIT_ASSERT_DOUBLES_EQUAL(q1, interval.first, 1e-3); CPPUNIT_ASSERT_DOUBLES_EQUAL(q2, interval.second, 1e-3); error.add(std::fabs(interval.first - q1)); @@ -549,27 +464,21 @@ void CLogNormalMeanPrecConjugateTest::testMarginalLikelihood() } { TMeanAccumulator totalError; - for (std::size_t i = 0u; i < boost::size(varianceScales); ++i) - { + for (std::size_t i = 0u; i < boost::size(varianceScales); ++i) { TMeanAccumulator error; double vs = varianceScales[i]; double shift = std::log(1.0 + vs * (std::exp(squareScale) - 1.0)) - squareScale; double shiftedLocation = location - 0.5 * shift; double shiftedSquareScale = squareScale + shift; - boost::math::lognormal_distribution<> scaledLogNormal(shiftedLocation, - std::sqrt(shiftedSquareScale)); - LOG_DEBUG("*** vs = " << boost::math::variance(scaledLogNormal) - / boost::math::variance(logNormal) << " ***"); - for (std::size_t j = 0u; j < boost::size(percentages); ++j) - { + boost::math::lognormal_distribution<> scaledLogNormal(shiftedLocation, std::sqrt(shiftedSquareScale)); + LOG_DEBUG("*** vs = " << boost::math::variance(scaledLogNormal) / boost::math::variance(logNormal) << " ***"); + for (std::size_t j = 0u; j < boost::size(percentages); ++j) { double q1 = boost::math::quantile(scaledLogNormal, (50.0 - percentages[j] / 2.0) / 100.0); double q2 = boost::math::quantile(scaledLogNormal, (50.0 + percentages[j] / 2.0) / 100.0); TDoubleDoublePr interval = filter.marginalLikelihoodConfidenceInterval( - percentages[j], - maths_t::TWeightStyleVec(1, maths_t::E_SampleCountVarianceScaleWeight), - TDouble4Vec(1, vs)); + percentages[j], maths_t::TWeightStyleVec(1, maths_t::E_SampleCountVarianceScaleWeight), TDouble4Vec(1, vs)); LOG_DEBUG("[q1, q2] = [" << q1 << ", " << q2 << "]" - << ", interval = " << core::CContainerPrinter::print(interval)); + << ", interval = " << core::CContainerPrinter::print(interval)); CPPUNIT_ASSERT_DOUBLES_EQUAL(q1, interval.first, std::max(0.5, 0.2 * q1)); CPPUNIT_ASSERT_DOUBLES_EQUAL(q2, interval.second, 0.1 * q2); error.add(std::fabs(interval.first - q1) / q1); @@ -585,8 +494,7 @@ void CLogNormalMeanPrecConjugateTest::testMarginalLikelihood() } } -void CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodMean() -{ +void CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodMean() { LOG_DEBUG("+---------------------------------------------------------------+"); LOG_DEBUG("| CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodMean |"); LOG_DEBUG("+---------------------------------------------------------------+"); @@ -594,17 +502,14 @@ void CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodMean() // Test that the expectation of the marginal likelihood matches // the expected mean of the marginal likelihood. - const double locations[] = { 0.1, 1.0, 3.0 }; - const double squareScales[] = { 0.1, 1.0, 3.0 }; + const double locations[] = {0.1, 1.0, 3.0}; + const double squareScales[] = {0.1, 1.0, 3.0}; test::CRandomNumbers rng; - for (std::size_t i = 0u; i < boost::size(locations); ++i) - { - for (std::size_t j = 0u; j < boost::size(squareScales); ++j) - { - LOG_DEBUG("*** location = " << locations[i] - << ", squareScale = " << squareScales[j] << " ***"); + for (std::size_t i = 0u; i < boost::size(locations); ++i) { + for (std::size_t j = 0u; j < boost::size(squareScales); ++j) { + LOG_DEBUG("*** location = " << locations[i] << ", squareScale = " << squareScales[j] << " ***"); CLogNormalMeanPrecConjugate filter(makePrior()); @@ -617,25 +522,19 @@ void CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodMean() TMeanAccumulator relativeError; - for (std::size_t k = 0u; k < samples.size(); ++k) - { + for (std::size_t k = 0u; k < samples.size(); ++k) { filter.addSamples(TDouble1Vec(1, samples[k])); double expectedMean; CPPUNIT_ASSERT(filter.marginalLikelihoodMeanForTest(expectedMean)); - if (k % 10 == 0) - { - LOG_DEBUG("marginalLikelihoodMean = " << filter.marginalLikelihoodMean() - << ", expectedMean = " << expectedMean); + if (k % 10 == 0) { + LOG_DEBUG("marginalLikelihoodMean = " << filter.marginalLikelihoodMean() << ", expectedMean = " << expectedMean); } - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMean, - filter.marginalLikelihoodMean(), - 0.35 * expectedMean); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMean, filter.marginalLikelihoodMean(), 0.35 * expectedMean); - relativeError.add(std::fabs(filter.marginalLikelihoodMean() - expectedMean) - / expectedMean); + relativeError.add(std::fabs(filter.marginalLikelihoodMean() - expectedMean) / expectedMean); } LOG_DEBUG("relativeError = " << maths::CBasicStatistics::mean(relativeError)); @@ -644,8 +543,7 @@ void CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodMean() } } -void CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodMode() -{ +void CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodMode() { LOG_DEBUG("+---------------------------------------------------------------+"); LOG_DEBUG("| CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodMode |"); LOG_DEBUG("+---------------------------------------------------------------+"); @@ -653,24 +551,17 @@ void CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodMode() // Test that the marginal likelihood mode is what we'd expect // with variances variance scales. - const double locations[] = { 0.1, 1.0, 3.0 }; - const double squareScales[] = { 0.1, 1.0, 3.0 }; - const double varianceScales[] = - { - 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.2, 1.5, 2.0, 2.5, 3.0, 4.0, 5.0 - }; + const double locations[] = {0.1, 1.0, 3.0}; + const double squareScales[] = {0.1, 1.0, 3.0}; + const double varianceScales[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.2, 1.5, 2.0, 2.5, 3.0, 4.0, 5.0}; test::CRandomNumbers rng; - for (std::size_t i = 0u; i < boost::size(locations); ++i) - { - for (std::size_t j = 0u; j < boost::size(squareScales); ++j) - { - LOG_DEBUG("*** location = " << locations[i] - << ", squareScale = " << squareScales[j] << " ***"); + for (std::size_t i = 0u; i < boost::size(locations); ++i) { + for (std::size_t j = 0u; j < boost::size(squareScales); ++j) { + LOG_DEBUG("*** location = " << locations[i] << ", squareScale = " << squareScales[j] << " ***"); - boost::math::lognormal_distribution<> logNormal(locations[i], - std::sqrt(squareScales[j])); + boost::math::lognormal_distribution<> logNormal(locations[i], std::sqrt(squareScales[j])); CLogNormalMeanPrecConjugate filter(makePrior()); TDoubleVec samples; @@ -680,25 +571,19 @@ void CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodMode() maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); TDouble4Vec weight(1, 1.0); TMeanAccumulator error; - for (std::size_t k = 0u; k < boost::size(varianceScales); ++k) - { + for (std::size_t k = 0u; k < boost::size(varianceScales); ++k) { double vs = varianceScales[k]; weight[0] = vs; double shift = std::log(1.0 + vs * (std::exp(squareScales[j]) - 1.0)) - squareScales[j]; double shiftedLocation = locations[i] - 0.5 * shift; double shiftedSquareScale = squareScales[j] + shift; - boost::math::lognormal_distribution<> scaledLogNormal(shiftedLocation, - std::sqrt(shiftedSquareScale)); + boost::math::lognormal_distribution<> scaledLogNormal(shiftedLocation, std::sqrt(shiftedSquareScale)); double expectedMode = boost::math::mode(scaledLogNormal); - LOG_DEBUG("dm = " << boost::math::mean(scaledLogNormal) - - boost::math::mean(logNormal) - << ", vs = " << boost::math::variance(scaledLogNormal) - / boost::math::variance(logNormal) - << ", marginalLikelihoodMode = " << filter.marginalLikelihoodMode(weightStyle, weight) - << ", expectedMode = " << expectedMode); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMode, - filter.marginalLikelihoodMode(weightStyle, weight), - 1.0); + LOG_DEBUG("dm = " << boost::math::mean(scaledLogNormal) - boost::math::mean(logNormal) + << ", vs = " << boost::math::variance(scaledLogNormal) / boost::math::variance(logNormal) + << ", marginalLikelihoodMode = " << filter.marginalLikelihoodMode(weightStyle, weight) + << ", expectedMode = " << expectedMode); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMode, filter.marginalLikelihoodMode(weightStyle, weight), 1.0); error.add(std::fabs(filter.marginalLikelihoodMode(weightStyle, weight) - expectedMode)); } LOG_DEBUG("error = " << maths::CBasicStatistics::mean(error)); @@ -707,8 +592,7 @@ void CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodMode() } } -void CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodVariance() -{ +void CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodVariance() { LOG_DEBUG("+-------------------------------------------------------------------+"); LOG_DEBUG("| CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodVariance |"); LOG_DEBUG("+-------------------------------------------------------------------+"); @@ -717,17 +601,14 @@ void CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodVariance() // the marginal likelihood matches the expected variance of the // marginal likelihood. - const double locations[] = { 0.1, 1.0, 3.0 }; - const double squareScales[] = { 0.1, 1.0, 3.0 }; + const double locations[] = {0.1, 1.0, 3.0}; + const double squareScales[] = {0.1, 1.0, 3.0}; test::CRandomNumbers rng; - for (std::size_t i = 0u; i < boost::size(locations); ++i) - { - for (std::size_t j = 0u; j < boost::size(squareScales); ++j) - { - LOG_DEBUG("*** location = " << locations[i] - << ", squareScale = " << squareScales[j] << " ***"); + for (std::size_t i = 0u; i < boost::size(locations); ++i) { + for (std::size_t j = 0u; j < boost::size(squareScales); ++j) { + LOG_DEBUG("*** location = " << locations[i] << ", squareScale = " << squareScales[j] << " ***"); CLogNormalMeanPrecConjugate filter(makePrior()); @@ -740,21 +621,18 @@ void CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodVariance() TMeanAccumulator relativeError; - for (std::size_t k = 0u; k < samples.size(); ++k) - { + for (std::size_t k = 0u; k < samples.size(); ++k) { filter.addSamples(TDouble1Vec(1, samples[k])); double expectedVariance; CPPUNIT_ASSERT(filter.marginalLikelihoodVarianceForTest(expectedVariance)); - if (k % 10 == 0) - { + if (k % 10 == 0) { LOG_DEBUG("marginalLikelihoodVariance = " << filter.marginalLikelihoodVariance() - << ", expectedVariance = " << expectedVariance); + << ", expectedVariance = " << expectedVariance); } - relativeError.add(std::fabs(filter.marginalLikelihoodVariance() - expectedVariance) - / expectedVariance); + relativeError.add(std::fabs(filter.marginalLikelihoodVariance() - expectedVariance) / expectedVariance); } LOG_DEBUG("relativeError = " << maths::CBasicStatistics::mean(relativeError)); @@ -763,8 +641,7 @@ void CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodVariance() } } -void CLogNormalMeanPrecConjugateTest::testSampleMarginalLikelihood() -{ +void CLogNormalMeanPrecConjugateTest::testSampleMarginalLikelihood() { LOG_DEBUG("+-----------------------------------------------------------------+"); LOG_DEBUG("| CLogNormalMeanPrecConjugateTest::testSampleMarginalLikelihood |"); LOG_DEBUG("+-----------------------------------------------------------------+"); @@ -792,8 +669,7 @@ void CLogNormalMeanPrecConjugateTest::testSampleMarginalLikelihood() TDouble1Vec sampled; - for (std::size_t i = 0u; i < 1u; ++i) - { + for (std::size_t i = 0u; i < 1u; ++i) { filter.addSamples(TDouble1Vec(1, samples[i])); sampled.clear(); filter.sampleMarginalLikelihood(10, sampled); @@ -804,8 +680,7 @@ void CLogNormalMeanPrecConjugateTest::testSampleMarginalLikelihood() TMeanAccumulator meanMeanError; std::size_t numberSampled = 20u; - for (std::size_t i = 1u; i < samples.size(); ++i) - { + for (std::size_t i = 1u; i < samples.size(); ++i) { filter.addSamples(TDouble1Vec(1, samples[i])); sampled.clear(); filter.sampleMarginalLikelihood(numberSampled, sampled); @@ -813,35 +688,29 @@ void CLogNormalMeanPrecConjugateTest::testSampleMarginalLikelihood() // The error is due to the approximation of the likelihood // function by a moment matched log-normal. This becomes // increasingly accurate as the number of updates increases. - if (i >= 10u) - { + if (i >= 10u) { TMeanVarAccumulator sampledMoments; sampledMoments = std::for_each(sampled.begin(), sampled.end(), sampledMoments); CPPUNIT_ASSERT_EQUAL(numberSampled, sampled.size()); LOG_DEBUG("expectedMean = " << filter.marginalLikelihoodMean() - << ", sampledMean = " << maths::CBasicStatistics::mean(sampledMoments)); + << ", sampledMean = " << maths::CBasicStatistics::mean(sampledMoments)); LOG_DEBUG("expectedVar = " << filter.marginalLikelihoodVariance() - << ", sampledVar = " << maths::CBasicStatistics::variance(sampledMoments)); + << ", sampledVar = " << maths::CBasicStatistics::variance(sampledMoments)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(filter.marginalLikelihoodMean(), - maths::CBasicStatistics::mean(sampledMoments), - 0.8); - meanMeanError.add(std::fabs( filter.marginalLikelihoodMean() - - maths::CBasicStatistics::mean(sampledMoments))); + CPPUNIT_ASSERT_DOUBLES_EQUAL(filter.marginalLikelihoodMean(), maths::CBasicStatistics::mean(sampledMoments), 0.8); + meanMeanError.add(std::fabs(filter.marginalLikelihoodMean() - maths::CBasicStatistics::mean(sampledMoments))); } std::sort(sampled.begin(), sampled.end()); - for (std::size_t j = 1u; j < sampled.size(); ++j) - { + for (std::size_t j = 1u; j < sampled.size(); ++j) { double q = 100.0 * static_cast(j) / static_cast(numberSampled); double expectedQuantile; CPPUNIT_ASSERT(filter.marginalLikelihoodQuantileForTest(q, eps, expectedQuantile)); - LOG_DEBUG("quantile = " << q - << ", x_quantile = " << expectedQuantile - << ", quantile range = [" << sampled[j - 1] << "," << sampled[j] << "]"); + LOG_DEBUG("quantile = " << q << ", x_quantile = " << expectedQuantile << ", quantile range = [" << sampled[j - 1] << "," + << sampled[j] << "]"); CPPUNIT_ASSERT(expectedQuantile >= sampled[j - 1] - 0.2 * std::max(6.0 - static_cast(i), 0.0)); CPPUNIT_ASSERT(expectedQuantile <= sampled[j] + 1.2 * std::max(6.0 - static_cast(i), 0.0)); } @@ -851,8 +720,7 @@ void CLogNormalMeanPrecConjugateTest::testSampleMarginalLikelihood() CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanMeanError) < 0.25); } -void CLogNormalMeanPrecConjugateTest::testCdf() -{ +void CLogNormalMeanPrecConjugateTest::testCdf() { LOG_DEBUG("+--------------------------------------------+"); LOG_DEBUG("| CLogNormalMeanPrecConjugateTest::testCdf |"); LOG_DEBUG("+--------------------------------------------+"); @@ -866,14 +734,13 @@ void CLogNormalMeanPrecConjugateTest::testCdf() const double location = 2.0; const double squareScale = 0.8; - const std::size_t n[] = { 20u, 80u }; + const std::size_t n[] = {20u, 80u}; test::CRandomNumbers rng; CLogNormalMeanPrecConjugate filter(makePrior()); - for (std::size_t i = 0u; i < boost::size(n); ++i) - { + for (std::size_t i = 0u; i < boost::size(n); ++i) { TDoubleVec samples; rng.generateLogNormalSamples(location, squareScale, n[i], samples); @@ -888,28 +755,24 @@ void CLogNormalMeanPrecConjugateTest::testCdf() double f = (lowerBound + upperBound) / 2.0; CPPUNIT_ASSERT(filter.minusLogJointCdfComplement(TDouble1Vec(1, -1.0), lowerBound, upperBound)); double fComplement = (lowerBound + upperBound) / 2.0; - LOG_DEBUG("log(F(x)) = " << -f - << ", log(1 - F(x)) = " << fComplement); + LOG_DEBUG("log(F(x)) = " << -f << ", log(1 - F(x)) = " << fComplement); CPPUNIT_ASSERT_DOUBLES_EQUAL(std::log(std::numeric_limits::min()), -f, 1e-10); CPPUNIT_ASSERT_EQUAL(1.0, std::exp(-fComplement)); - for (std::size_t j = 1u; j < 500; ++j) - { + for (std::size_t j = 1u; j < 500; ++j) { double x = static_cast(j) / 2.0; CPPUNIT_ASSERT(filter.minusLogJointCdf(TDouble1Vec(1, x), lowerBound, upperBound)); f = (lowerBound + upperBound) / 2.0; CPPUNIT_ASSERT(filter.minusLogJointCdfComplement(TDouble1Vec(1, x), lowerBound, upperBound)); fComplement = (lowerBound + upperBound) / 2.0; - LOG_DEBUG("log(F(x)) = " << (f == 0.0 ? f : -f) - << ", log(1 - F(x)) = " << (fComplement == 0.0 ? fComplement : -fComplement)); + LOG_DEBUG("log(F(x)) = " << (f == 0.0 ? f : -f) << ", log(1 - F(x)) = " << (fComplement == 0.0 ? fComplement : -fComplement)); CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, std::exp(-f) + std::exp(-fComplement), 1e-10); } } } -void CLogNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples() -{ +void CLogNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples() { LOG_DEBUG("+-----------------------------------------------------------------------+"); LOG_DEBUG("| CLogNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples |"); LOG_DEBUG("+-----------------------------------------------------------------------+"); @@ -921,20 +784,17 @@ void CLogNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples() // We also check that the tail calculation attributes samples to // the appropriate tail of the distribution. - const double means[] = { 0.1, 1.5, 3.0 }; - const double squareScales[] = { 0.2, 0.4, 1.5 }; - const double vs[] = { 0.5, 1.0, 2.0 }; + const double means[] = {0.1, 1.5, 3.0}; + const double squareScales[] = {0.2, 0.4, 1.5}; + const double vs[] = {0.5, 1.0, 2.0}; test::CRandomNumbers rng; TMeanAccumulator meanError; - for (size_t i = 0; i < boost::size(means); ++i) - { - for (size_t j = 0; j < boost::size(squareScales); ++j) - { - LOG_DEBUG("means = " << means[i] - << ", scale = " << std::sqrt(squareScales[j])); + for (size_t i = 0; i < boost::size(means); ++i) { + for (size_t j = 0; j < boost::size(squareScales); ++j) { + LOG_DEBUG("means = " << means[i] << ", scale = " << std::sqrt(squareScales[j])); TDoubleVec samples; rng.generateLogNormalSamples(means[i], squareScales[j], 1000, samples); @@ -943,11 +803,10 @@ void CLogNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples() filter.addSamples(samples); double location = filter.normalMean(); - double scale = std::sqrt(1.0 / filter.normalPrecision()); + double scale = std::sqrt(1.0 / filter.normalPrecision()); TDoubleVec likelihoods; - for (std::size_t k = 0u; k < samples.size(); ++k) - { + for (std::size_t k = 0u; k < samples.size(); ++k) { double likelihood; filter.jointLogMarginalLikelihood(TDouble1Vec(1, samples[k]), likelihood); likelihoods.push_back(likelihood); @@ -955,27 +814,22 @@ void CLogNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples() std::sort(likelihoods.begin(), likelihoods.end()); boost::math::lognormal_distribution<> lognormal(location, scale); - for (std::size_t k = 1u; k < 10; ++k) - { + for (std::size_t k = 1u; k < 10; ++k) { double x = boost::math::quantile(lognormal, static_cast(k) / 10.0); TDouble1Vec sample(1, x); double fx; filter.jointLogMarginalLikelihood(sample, fx); - double px = static_cast(std::lower_bound(likelihoods.begin(), - likelihoods.end(), fx) - - likelihoods.begin()) - / static_cast(likelihoods.size()); + double px = static_cast(std::lower_bound(likelihoods.begin(), likelihoods.end(), fx) - likelihoods.begin()) / + static_cast(likelihoods.size()); double lb, ub; filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, sample, lb, ub); double ssd = std::sqrt(px * (1.0 - px) / static_cast(samples.size())); - LOG_DEBUG("expected P(x) = " << px - << ", actual P(x) = " << (lb + ub) / 2.0 - << " sample sd = " << ssd); + LOG_DEBUG("expected P(x) = " << px << ", actual P(x) = " << (lb + ub) / 2.0 << " sample sd = " << ssd); CPPUNIT_ASSERT_DOUBLES_EQUAL(px, (lb + ub) / 2.0, 3.0 * ssd); @@ -984,11 +838,9 @@ void CLogNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples() maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); - for (std::size_t k = 0u; k < boost::size(vs); ++k) - { - double mode = filter.marginalLikelihoodMode(weightStyle, - TDouble4Vec(1, vs[k])); - double ss[] = { 0.9 * mode, 1.1 * mode }; + for (std::size_t k = 0u; k < boost::size(vs); ++k) { + double mode = filter.marginalLikelihoodMode(weightStyle, TDouble4Vec(1, vs[k])); + double ss[] = {0.9 * mode, 1.1 * mode}; LOG_DEBUG("vs = " << vs[k] << ", mode = " << mode); @@ -996,59 +848,58 @@ void CLogNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples() maths_t::ETail tail; { - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - weightStyle, - TDouble1Vec(1, ss[0]), - TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), - lb, ub, tail); + filter.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, weightStyle, TDouble1Vec(1, ss[0]), TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); - if (mode > 0.0) - { + if (mode > 0.0) { filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, weightStyle, TDouble1Vec(ss, ss + 2), TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, ub, tail); + lb, + ub, + tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); filter.probabilityOfLessLikelySamples(maths_t::E_OneSidedBelow, weightStyle, TDouble1Vec(ss, ss + 2), TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, ub, tail); + lb, + ub, + tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); filter.probabilityOfLessLikelySamples(maths_t::E_OneSidedAbove, weightStyle, TDouble1Vec(ss, ss + 2), TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, ub, tail); + lb, + ub, + tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } } - if (mode > 0.0) - { - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - weightStyle, - TDouble1Vec(1, ss[1]), - TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), - lb, ub, tail); + if (mode > 0.0) { + filter.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, weightStyle, TDouble1Vec(1, ss[1]), TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - weightStyle, - TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, ub, tail); + filter.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, weightStyle, TDouble1Vec(ss, ss + 2), TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); filter.probabilityOfLessLikelySamples(maths_t::E_OneSidedBelow, weightStyle, TDouble1Vec(ss, ss + 2), TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, ub, tail); + lb, + ub, + tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); filter.probabilityOfLessLikelySamples(maths_t::E_OneSidedAbove, weightStyle, TDouble1Vec(ss, ss + 2), TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, ub, tail); + lb, + ub, + tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } } @@ -1059,8 +910,7 @@ void CLogNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples() CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError) < 0.01); } -void CLogNormalMeanPrecConjugateTest::testAnomalyScore() -{ +void CLogNormalMeanPrecConjugateTest::testAnomalyScore() { LOG_DEBUG("+-----------------------------------------------------+"); LOG_DEBUG("| CLogNormalMeanPrecConjugateTest::testAnomalyScore |"); LOG_DEBUG("+-----------------------------------------------------+"); @@ -1074,15 +924,15 @@ void CLogNormalMeanPrecConjugateTest::testAnomalyScore() using TUIntVec = std::vector; - const double decayRates[] = { 0.0, 0.001, 0.01 }; + const double decayRates[] = {0.0, 0.001, 0.01}; - const double means[] = { 0.1, 1.5, 3.0 }; - const double squareScales[] = { 0.2, 0.4, 1.5 }; + const double means[] = {0.1, 1.5, 3.0}; + const double squareScales[] = {0.2, 0.4, 1.5}; const double threshold = 0.02; - const unsigned int anomalyTimes[] = { 30u, 120u, 300u, 420u }; - const double anomalies[] = { 4.0, 5.0, 10.0, 15.0, 0.0 }; + const unsigned int anomalyTimes[] = {30u, 120u, 300u, 420u}; + const double anomalies[] = {4.0, 5.0, 10.0, 15.0, 0.0}; test::CRandomNumbers rng; @@ -1092,12 +942,10 @@ void CLogNormalMeanPrecConjugateTest::testAnomalyScore() file.open("results.m"); double totalFalsePositiveRate = 0.0; - std::size_t totalPositives[] = { 0u, 0u, 0u }; + std::size_t totalPositives[] = {0u, 0u, 0u}; - for (size_t i = 0; i < boost::size(means); ++i) - { - for (size_t j = 0; j < boost::size(squareScales); ++j) - { + for (size_t i = 0; i < boost::size(means); ++i) { + for (size_t j = 0; j < boost::size(squareScales); ++j) { LOG_DEBUG("mean = " << means[i] << ", scale = " << std::sqrt(squareScales[j])); boost::math::lognormal_distribution<> logNormal(means[i], std::sqrt(squareScales[j])); @@ -1105,10 +953,8 @@ void CLogNormalMeanPrecConjugateTest::testAnomalyScore() TDoubleVec samples; rng.generateLogNormalSamples(means[i], squareScales[j], 500, samples); - for (size_t k = 0; k < boost::size(decayRates); ++k) - { - CLogNormalMeanPrecConjugate filter( - makePrior(maths_t::E_ContinuousData, 0.0, decayRates[k])); + for (size_t k = 0; k < boost::size(decayRates); ++k) { + CLogNormalMeanPrecConjugate filter(makePrior(maths_t::E_ContinuousData, 0.0, decayRates[k])); ++test; @@ -1118,12 +964,10 @@ void CLogNormalMeanPrecConjugateTest::testAnomalyScore() scores << "score" << test << " = ["; TUIntVec candidateAnomalies; - for (unsigned int time = 0; time < samples.size(); ++time) - { - double anomaly = anomalies[std::find(boost::begin(anomalyTimes), - boost::end(anomalyTimes), time) - - boost::begin(anomalyTimes)] - * boost::math::standard_deviation(logNormal); + for (unsigned int time = 0; time < samples.size(); ++time) { + double anomaly = + anomalies[std::find(boost::begin(anomalyTimes), boost::end(anomalyTimes), time) - boost::begin(anomalyTimes)] * + boost::math::standard_deviation(logNormal); double sample = samples[time] + anomaly; TDouble1Vec sampleVec(1, sample); @@ -1131,8 +975,7 @@ void CLogNormalMeanPrecConjugateTest::testAnomalyScore() double score; filter.anomalyScore(maths_t::E_TwoSided, sampleVec, score); - if (score > threshold) - { + if (score > threshold) { candidateAnomalies.push_back(time); } @@ -1144,8 +987,7 @@ void CLogNormalMeanPrecConjugateTest::testAnomalyScore() x << "];\n"; scores << "];\n"; - file << x.str() << scores.str() - << "plot(x" << test << ", score" << test << ");\n" + file << x.str() << scores.str() << "plot(x" << test << ", score" << test << ");\n" << "input(\"Hit any key for next test\");\n\n"; TUIntVec falsePositives; @@ -1155,8 +997,7 @@ void CLogNormalMeanPrecConjugateTest::testAnomalyScore() boost::end(anomalyTimes), std::back_inserter(falsePositives)); - double falsePositiveRate = static_cast(falsePositives.size()) - / static_cast(samples.size()); + double falsePositiveRate = static_cast(falsePositives.size()) / static_cast(samples.size()); totalFalsePositiveRate += falsePositiveRate; @@ -1167,8 +1008,7 @@ void CLogNormalMeanPrecConjugateTest::testAnomalyScore() boost::end(anomalyTimes), std::back_inserter(positives)); - LOG_DEBUG("falsePositiveRate = " << falsePositiveRate - << ", positives = " << positives.size()); + LOG_DEBUG("falsePositiveRate = " << falsePositiveRate << ", positives = " << positives.size()); // False alarm rate should be less than 1%. CPPUNIT_ASSERT(falsePositiveRate <= 0.01); @@ -1185,8 +1025,7 @@ void CLogNormalMeanPrecConjugateTest::testAnomalyScore() LOG_DEBUG("totalFalsePositiveRate = " << totalFalsePositiveRate); - for (size_t i = 0; i < boost::size(totalPositives); ++i) - { + for (size_t i = 0; i < boost::size(totalPositives); ++i) { LOG_DEBUG("positives = " << totalPositives[i]); CPPUNIT_ASSERT(totalPositives[i] >= 20u); } @@ -1195,8 +1034,7 @@ void CLogNormalMeanPrecConjugateTest::testAnomalyScore() CPPUNIT_ASSERT(totalFalsePositiveRate < 0.004); } -void CLogNormalMeanPrecConjugateTest::testOffset() -{ +void CLogNormalMeanPrecConjugateTest::testOffset() { LOG_DEBUG("+-----------------------------------------------+"); LOG_DEBUG("| CLogNormalMeanPrecConjugateTest::testOffset |"); LOG_DEBUG("+-----------------------------------------------+"); @@ -1204,13 +1042,9 @@ void CLogNormalMeanPrecConjugateTest::testOffset() // The idea of this test is to check that the offset correctly cancels // out a translation applied to a log-normally distributed data set. - const maths_t::EDataType dataTypes[] = - { - maths_t::E_IntegerData, - maths_t::E_ContinuousData - }; - const double offsets[] = { -0.5, 0.5 }; - const double decayRates[] = { 0.0, 0.001, 0.01 }; + const maths_t::EDataType dataTypes[] = {maths_t::E_IntegerData, maths_t::E_ContinuousData}; + const double offsets[] = {-0.5, 0.5}; + const double decayRates[] = {0.0, 0.001, 0.01}; const double location = 3.0; const double squareScale = 1.0; @@ -1222,17 +1056,13 @@ void CLogNormalMeanPrecConjugateTest::testOffset() TDoubleVec samples; rng.generateLogNormalSamples(location, squareScale, 100, samples); - for (size_t i = 0; i < boost::size(dataTypes); ++i) - { - for (size_t j = 0; j < boost::size(offsets); ++j) - { - for (size_t k = 0; k < boost::size(decayRates); ++k) - { + for (size_t i = 0; i < boost::size(dataTypes); ++i) { + for (size_t j = 0; j < boost::size(offsets); ++j) { + for (size_t k = 0; k < boost::size(decayRates); ++k) { CLogNormalMeanPrecConjugate filter1(makePrior(dataTypes[i], offsets[j], decayRates[k])); CLogNormalMeanPrecConjugate filter2(makePrior(dataTypes[i], 0.0, decayRates[k])); - for (std::size_t l = 0u; l < samples.size(); ++l) - { + for (std::size_t l = 0u; l < samples.size(); ++l) { double offsetSample = samples[l] - offsets[j]; TDouble1Vec offsetSampleVec(1, offsetSample); filter1.addSamples(offsetSampleVec); @@ -1246,20 +1076,14 @@ void CLogNormalMeanPrecConjugateTest::testOffset() double likelihood1; filter1.jointLogMarginalLikelihood(offsetSampleVec, likelihood1); double lowerBound1, upperBound1; - filter1.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - offsetSampleVec, - lowerBound1, - upperBound1); + filter1.probabilityOfLessLikelySamples(maths_t::E_TwoSided, offsetSampleVec, lowerBound1, upperBound1); CPPUNIT_ASSERT_EQUAL(lowerBound1, upperBound1); double probability1 = (lowerBound1 + upperBound1) / 2.0; double likelihood2; filter2.jointLogMarginalLikelihood(sample, likelihood2); double lowerBound2, upperBound2; - filter2.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - sample, - lowerBound2, - upperBound2); + filter2.probabilityOfLessLikelySamples(maths_t::E_TwoSided, sample, lowerBound2, upperBound2); CPPUNIT_ASSERT_EQUAL(lowerBound2, upperBound2); double probability2 = (lowerBound2 + upperBound2) / 2.0; @@ -1275,8 +1099,7 @@ void CLogNormalMeanPrecConjugateTest::testOffset() } } -void CLogNormalMeanPrecConjugateTest::testIntegerData() -{ +void CLogNormalMeanPrecConjugateTest::testIntegerData() { LOG_DEBUG("+----------------------------------------------------+"); LOG_DEBUG("| CLogNormalMeanPrecConjugateTest::testIntegerData |"); LOG_DEBUG("+----------------------------------------------------+"); @@ -1286,72 +1109,65 @@ void CLogNormalMeanPrecConjugateTest::testIntegerData() // The idea of this test is to check that the inferred model agrees in the // limit (large n) with the model inferred from such data. - const double locations[] = { 0.2, 1.0, 1.5 }; - const double squareScales[] = { 0.5, 2.0 }; + const double locations[] = {0.2, 1.0, 1.5}; + const double squareScales[] = {0.5, 2.0}; const std::size_t nSamples = 100000u; - for (std::size_t i = 0; i < boost::size(locations); ++i) - { - for (std::size_t j = 0; j < boost::size(squareScales); ++j) - { - test::CRandomNumbers rng; + for (std::size_t i = 0; i < boost::size(locations); ++i) { + for (std::size_t j = 0; j < boost::size(squareScales); ++j) { + test::CRandomNumbers rng; - TDoubleVec samples; - rng.generateLogNormalSamples(locations[i], squareScales[j], nSamples, samples); + TDoubleVec samples; + rng.generateLogNormalSamples(locations[i], squareScales[j], nSamples, samples); - TDoubleVec uniform; - rng.generateUniformSamples(0.0, 1.0, nSamples, uniform); + TDoubleVec uniform; + rng.generateUniformSamples(0.0, 1.0, nSamples, uniform); - CLogNormalMeanPrecConjugate filter1(makePrior(maths_t::E_IntegerData, 0.1)); - CLogNormalMeanPrecConjugate filter2(makePrior(maths_t::E_ContinuousData, 0.1)); + CLogNormalMeanPrecConjugate filter1(makePrior(maths_t::E_IntegerData, 0.1)); + CLogNormalMeanPrecConjugate filter2(makePrior(maths_t::E_ContinuousData, 0.1)); - for (std::size_t k = 0; k < nSamples; ++k) - { - double x = std::floor(samples[k]); + for (std::size_t k = 0; k < nSamples; ++k) { + double x = std::floor(samples[k]); - TDouble1Vec sample(1, x); - filter1.addSamples(sample); + TDouble1Vec sample(1, x); + filter1.addSamples(sample); - sample[0] += uniform[k]; - filter2.addSamples(sample); - } + sample[0] += uniform[k]; + filter2.addSamples(sample); + } - using TEqual = maths::CEqualWithTolerance; - TEqual equal(maths::CToleranceTypes::E_RelativeTolerance, 0.01); - CPPUNIT_ASSERT(filter1.equalTolerance(filter2, equal)); + using TEqual = maths::CEqualWithTolerance; + TEqual equal(maths::CToleranceTypes::E_RelativeTolerance, 0.01); + CPPUNIT_ASSERT(filter1.equalTolerance(filter2, equal)); - TMeanAccumulator meanLogLikelihood1; - TMeanAccumulator meanLogLikelihood2; - for (std::size_t k = 0u; k < nSamples; ++k) - { - double x = std::floor(samples[k]); + TMeanAccumulator meanLogLikelihood1; + TMeanAccumulator meanLogLikelihood2; + for (std::size_t k = 0u; k < nSamples; ++k) { + double x = std::floor(samples[k]); - TDouble1Vec sample(1, x); - double logLikelihood1; - filter1.jointLogMarginalLikelihood(sample, logLikelihood1); - meanLogLikelihood1.add(-logLikelihood1); + TDouble1Vec sample(1, x); + double logLikelihood1; + filter1.jointLogMarginalLikelihood(sample, logLikelihood1); + meanLogLikelihood1.add(-logLikelihood1); - sample[0] += uniform[k]; - double logLikelihood2; - filter2.jointLogMarginalLikelihood(sample, logLikelihood2); - meanLogLikelihood2.add(-logLikelihood2); - } + sample[0] += uniform[k]; + double logLikelihood2; + filter2.jointLogMarginalLikelihood(sample, logLikelihood2); + meanLogLikelihood2.add(-logLikelihood2); + } - LOG_DEBUG("meanLogLikelihood1 = " << maths::CBasicStatistics::mean(meanLogLikelihood1) - << ", meanLogLikelihood2 = " << maths::CBasicStatistics::mean(meanLogLikelihood2)); + LOG_DEBUG("meanLogLikelihood1 = " << maths::CBasicStatistics::mean(meanLogLikelihood1) + << ", meanLogLikelihood2 = " << maths::CBasicStatistics::mean(meanLogLikelihood2)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(meanLogLikelihood1), - maths::CBasicStatistics::mean(meanLogLikelihood2), - 0.05); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + maths::CBasicStatistics::mean(meanLogLikelihood1), maths::CBasicStatistics::mean(meanLogLikelihood2), 0.05); } } TMeanAccumulator meanError; - for (size_t i = 0; i < boost::size(locations); ++i) - { - for (std::size_t j = 0; j < boost::size(squareScales); ++j) - { + for (size_t i = 0; i < boost::size(locations); ++i) { + for (std::size_t j = 0; j < boost::size(squareScales); ++j) { test::CRandomNumbers rng; TDoubleVec seedSamples; @@ -1371,8 +1187,7 @@ void CLogNormalMeanPrecConjugateTest::testIntegerData() TMeanAccumulator meanProbability1; TMeanAccumulator meanProbability2; - for (std::size_t k = 0; k < nSamples; ++k) - { + for (std::size_t k = 0; k < nSamples; ++k) { double x = std::floor(samples[k]); TDouble1Vec sample(1, x); @@ -1393,9 +1208,7 @@ void CLogNormalMeanPrecConjugateTest::testIntegerData() double p1 = maths::CBasicStatistics::mean(meanProbability1); double p2 = maths::CBasicStatistics::mean(meanProbability2); - LOG_DEBUG("location = " << locations[i] - << ", p1 = " << p1 - << ", p2 = " << p2); + LOG_DEBUG("location = " << locations[i] << ", p1 = " << p1 << ", p2 = " << p2); CPPUNIT_ASSERT_DOUBLES_EQUAL(p1, p2, 0.05 * p1); @@ -1407,45 +1220,37 @@ void CLogNormalMeanPrecConjugateTest::testIntegerData() CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError) < 0.005); } -void CLogNormalMeanPrecConjugateTest::testLowVariationData() -{ +void CLogNormalMeanPrecConjugateTest::testLowVariationData() { LOG_DEBUG("+---------------------------------------------------------+"); LOG_DEBUG("| CLogNormalMeanPrecConjugateTest::testLowVariationData |"); LOG_DEBUG("+---------------------------------------------------------+"); { CLogNormalMeanPrecConjugate filter(makePrior(maths_t::E_IntegerData)); - for (std::size_t i = 0u; i < 100; ++i) - { + for (std::size_t i = 0u; i < 100; ++i) { filter.addSamples(TDouble1Vec(1, 430.0)); } TDoubleDoublePr interval = filter.marginalLikelihoodConfidenceInterval(68.0); double sigma = (interval.second - interval.first) / 2.0; - LOG_DEBUG("68% confidence interval " - << core::CContainerPrinter::print(interval) - << ", approximate variance = " << sigma * sigma); + LOG_DEBUG("68% confidence interval " << core::CContainerPrinter::print(interval) << ", approximate variance = " << sigma * sigma); CPPUNIT_ASSERT_DOUBLES_EQUAL(12.0, 1.0 / (sigma * sigma), 0.15); } { CLogNormalMeanPrecConjugate filter(makePrior(maths_t::E_ContinuousData)); - for (std::size_t i = 0u; i < 100; ++i) - { + for (std::size_t i = 0u; i < 100; ++i) { filter.addSamples(TDouble1Vec(1, 430.0)); } TDoubleDoublePr interval = filter.marginalLikelihoodConfidenceInterval(68.0); double sigma = (interval.second - interval.first) / 2.0; - LOG_DEBUG("68% confidence interval " - << core::CContainerPrinter::print(interval) - << ", approximate s.t.d. = " << sigma); + LOG_DEBUG("68% confidence interval " << core::CContainerPrinter::print(interval) << ", approximate s.t.d. = " << sigma); CPPUNIT_ASSERT_DOUBLES_EQUAL(1e-4, sigma / 430.5, 5e-5); } } -void CLogNormalMeanPrecConjugateTest::testPersist() -{ +void CLogNormalMeanPrecConjugateTest::testPersist() { LOG_DEBUG("+------------------------------------------------+"); LOG_DEBUG("| CLogNormalMeanPrecConjugateTest::testPersist |"); LOG_DEBUG("+------------------------------------------------+"); @@ -1459,11 +1264,9 @@ void CLogNormalMeanPrecConjugateTest::testPersist() rng.generateLogNormalSamples(location, squareScale, 100, samples); maths::CLogNormalMeanPrecConjugate origFilter(makePrior()); - for (std::size_t i = 0u; i < samples.size(); ++i) - { - origFilter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, samples[i]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0))); + for (std::size_t i = 0u; i < samples.size(); ++i) { + origFilter.addSamples( + maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), TDouble1Vec(1, samples[i]), TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0))); } double decayRate = origFilter.decayRate(); uint64_t checksum = origFilter.checksum(); @@ -1489,8 +1292,7 @@ void CLogNormalMeanPrecConjugateTest::testPersist() maths::MINIMUM_CATEGORY_COUNT); maths::CLogNormalMeanPrecConjugate restoredFilter(params, traverser); - LOG_DEBUG("orig checksum = " << checksum - << " restored checksum = " << restoredFilter.checksum()); + LOG_DEBUG("orig checksum = " << checksum << " restored checksum = " << restoredFilter.checksum()); CPPUNIT_ASSERT_EQUAL(checksum, restoredFilter.checksum()); // The XML representation of the new filter should be the same as the original @@ -1503,8 +1305,7 @@ void CLogNormalMeanPrecConjugateTest::testPersist() CPPUNIT_ASSERT_EQUAL(origXml, newXml); } -void CLogNormalMeanPrecConjugateTest::testVarianceScale() -{ +void CLogNormalMeanPrecConjugateTest::testVarianceScale() { LOG_DEBUG("+------------------------------------------------------+"); LOG_DEBUG("| CLogNormalMeanPrecConjugateTest::testVarianceScale |"); LOG_DEBUG("+------------------------------------------------------+"); @@ -1527,36 +1328,24 @@ void CLogNormalMeanPrecConjugateTest::testVarianceScale() // Finally, we test update with scaled samples produces the // correct posterior. - maths_t::ESampleWeightStyle scales[] = - { - maths_t::E_SampleSeasonalVarianceScaleWeight, - maths_t::E_SampleCountVarianceScaleWeight - }; + maths_t::ESampleWeightStyle scales[] = {maths_t::E_SampleSeasonalVarianceScaleWeight, maths_t::E_SampleCountVarianceScaleWeight}; - for (std::size_t s = 0u; s < boost::size(scales); ++s) - { + for (std::size_t s = 0u; s < boost::size(scales); ++s) { const double location = 2.0; const double squareScale = 1.5; { boost::math::lognormal_distribution<> logNormal(location, std::sqrt(squareScale)); - LOG_DEBUG("mean = " << boost::math::mean(logNormal) - << ", variance = " << boost::math::variance(logNormal)); + LOG_DEBUG("mean = " << boost::math::mean(logNormal) << ", variance = " << boost::math::variance(logNormal)); } - const double varianceScales[] = - { - 0.20, 0.50, 0.75, 1.50, 2.00, 5.00 - }; + const double varianceScales[] = {0.20, 0.50, 0.75, 1.50, 2.00, 5.00}; LOG_DEBUG(""); LOG_DEBUG("****** probabilityOfLessLikelySamples ******"); - const double percentiles[] = - { - 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0 - }; + const double percentiles[] = {10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0}; - const std::size_t nSamples[] = { 10u, 20u, 40u, 80u, 1000u }; + const std::size_t nSamples[] = {10u, 20u, 40u, 80u, 1000u}; const std::size_t nScaledSamples = 50000u; double percentileErrorTolerance = 0.08; @@ -1566,8 +1355,7 @@ void CLogNormalMeanPrecConjugateTest::testVarianceScale() double totalUnscaledMeanPercentileError = 0.0; double totalMeanPercentileError = 0.0; double trials = 0.0; - for (size_t i = 0; i < boost::size(nSamples); ++i) - { + for (size_t i = 0; i < boost::size(nSamples); ++i) { LOG_DEBUG("**** nSamples = " << nSamples[i] << " ****"); test::CRandomNumbers rng; @@ -1587,42 +1375,34 @@ void CLogNormalMeanPrecConjugateTest::testVarianceScale() TDoubleVec probabilities; probabilities.reserve(nScaledSamples); - for (std::size_t j = 0; j < unscaledSamples.size(); ++j) - { + for (std::size_t j = 0; j < unscaledSamples.size(); ++j) { TDouble1Vec sample(1, unscaledSamples[j]); double lowerBound, upperBound; - CPPUNIT_ASSERT(filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - sample, - lowerBound, - upperBound)); + CPPUNIT_ASSERT(filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, sample, lowerBound, upperBound)); CPPUNIT_ASSERT_EQUAL(lowerBound, upperBound); double probability = (lowerBound + upperBound) / 2.0; probabilities.push_back(probability); } std::sort(probabilities.begin(), probabilities.end()); - for (size_t j = 0; j < boost::size(percentiles); ++j) - { - std::size_t index = static_cast( - static_cast(nScaledSamples) * percentiles[j]/100.0); - double error = std::fabs(probabilities[index] - percentiles[j]/100.0); + for (size_t j = 0; j < boost::size(percentiles); ++j) { + std::size_t index = static_cast(static_cast(nScaledSamples) * percentiles[j] / 100.0); + double error = std::fabs(probabilities[index] - percentiles[j] / 100.0); unscaledPercentileErrors.push_back(error); unscaledMeanPercentileError += error; } unscaledMeanPercentileError /= static_cast(boost::size(percentiles)); } - for (size_t j = 0; j < boost::size(varianceScales); ++j) - { + for (size_t j = 0; j < boost::size(varianceScales); ++j) { LOG_DEBUG("**** variance scale = " << varianceScales[j] << " ****"); double ss = std::log(1.0 + varianceScales[j] * (std::exp(squareScale) - 1.0)); double shiftedLocation = location + (squareScale - ss) / 2.0; { boost::math::lognormal_distribution<> logNormal(shiftedLocation, std::sqrt(ss)); - LOG_DEBUG("mean = " << boost::math::mean(logNormal) - << ", variance = " << boost::math::variance(logNormal)); + LOG_DEBUG("mean = " << boost::math::mean(logNormal) << ", variance = " << boost::math::variance(logNormal)); } TDoubleVec scaledSamples; @@ -1630,18 +1410,16 @@ void CLogNormalMeanPrecConjugateTest::testVarianceScale() TDoubleVec probabilities; probabilities.reserve(nScaledSamples); - for (std::size_t k = 0; k < scaledSamples.size(); ++k) - { + for (std::size_t k = 0; k < scaledSamples.size(); ++k) { double lowerBound, upperBound; maths_t::ETail tail; - CPPUNIT_ASSERT(filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, - maths_t::TWeightStyleVec(1, scales[s]), - TDouble1Vec(1, scaledSamples[k]), - TDouble4Vec1Vec(1, TDouble4Vec(1, varianceScales[j])), - lowerBound, - upperBound, - tail)); + CPPUNIT_ASSERT(filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, + maths_t::TWeightStyleVec(1, scales[s]), + TDouble1Vec(1, scaledSamples[k]), + TDouble4Vec1Vec(1, TDouble4Vec(1, varianceScales[j])), + lowerBound, + upperBound, + tail)); CPPUNIT_ASSERT_EQUAL(lowerBound, upperBound); double probability = (lowerBound + upperBound) / 2.0; probabilities.push_back(probability); @@ -1649,18 +1427,14 @@ void CLogNormalMeanPrecConjugateTest::testVarianceScale() std::sort(probabilities.begin(), probabilities.end()); double meanPercentileError = 0.0; - for (size_t k = 0; k < boost::size(percentiles); ++k) - { - std::size_t index = static_cast( - static_cast(nScaledSamples) * percentiles[k]/100.0); - double error = std::fabs(probabilities[index] - percentiles[k]/100.0); + for (size_t k = 0; k < boost::size(percentiles); ++k) { + std::size_t index = static_cast(static_cast(nScaledSamples) * percentiles[k] / 100.0); + double error = std::fabs(probabilities[index] - percentiles[k] / 100.0); meanPercentileError += error; double threshold = percentileErrorTolerance + unscaledPercentileErrors[k]; - LOG_DEBUG("percentile = " << percentiles[k] - << ", probability = " << probabilities[index] - << ", error = " << error - << ", error threshold = " << threshold); + LOG_DEBUG("percentile = " << percentiles[k] << ", probability = " << probabilities[index] << ", error = " << error + << ", error threshold = " << threshold); CPPUNIT_ASSERT(error < threshold); } @@ -1668,8 +1442,7 @@ void CLogNormalMeanPrecConjugateTest::testVarianceScale() double threshold = meanPercentileErrorTolerance + unscaledMeanPercentileError; - LOG_DEBUG("mean error = " << meanPercentileError - << ", mean error threshold = " << threshold); + LOG_DEBUG("mean error = " << meanPercentileError << ", mean error threshold = " << threshold); CPPUNIT_ASSERT(meanPercentileError < threshold); @@ -1684,8 +1457,7 @@ void CLogNormalMeanPrecConjugateTest::testVarianceScale() { double threshold = totalMeanPercentileErrorTolerance + totalUnscaledMeanPercentileError; LOG_DEBUG("total unscaled mean error = " << totalUnscaledMeanPercentileError); - LOG_DEBUG("total mean error = " << totalMeanPercentileError - << ", total mean error threshold = " << threshold); + LOG_DEBUG("total mean error = " << totalMeanPercentileError << ", total mean error threshold = " << threshold); CPPUNIT_ASSERT(totalMeanPercentileError < threshold); } @@ -1694,17 +1466,13 @@ void CLogNormalMeanPrecConjugateTest::testVarianceScale() test::CRandomNumbers rng; - for (size_t i = 0; i < boost::size(varianceScales); ++i) - { + for (size_t i = 0; i < boost::size(varianceScales); ++i) { LOG_DEBUG("**** variance scale = " << varianceScales[i] << " ****"); double ss = std::log(1.0 + varianceScales[i] * (std::exp(squareScale) - 1.0)); double shiftedLocation = location + (squareScale - ss) / 2.0; boost::math::lognormal_distribution<> logNormal(shiftedLocation, std::sqrt(ss)); - { - LOG_DEBUG("mean = " << boost::math::mean(logNormal) - << ", variance = " << boost::math::variance(logNormal)); - } + { LOG_DEBUG("mean = " << boost::math::mean(logNormal) << ", variance = " << boost::math::variance(logNormal)); } double expectedDifferentialEntropy = maths::CTools::differentialEntropy(logNormal); CLogNormalMeanPrecConjugate filter(makePrior()); @@ -1718,45 +1486,36 @@ void CLogNormalMeanPrecConjugateTest::testVarianceScale() TDoubleVec scaledSamples; rng.generateLogNormalSamples(shiftedLocation, ss, 100000, scaledSamples); - for (std::size_t j = 0u; j < scaledSamples.size(); ++j) - { + for (std::size_t j = 0u; j < scaledSamples.size(); ++j) { double logLikelihood = 0.0; CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, - filter.jointLogMarginalLikelihood( - maths_t::TWeightStyleVec(1, scales[s]), - TDouble1Vec(1, scaledSamples[j]), - TDouble4Vec1Vec(1, TDouble4Vec(1, varianceScales[i])), - logLikelihood)); + filter.jointLogMarginalLikelihood(maths_t::TWeightStyleVec(1, scales[s]), + TDouble1Vec(1, scaledSamples[j]), + TDouble4Vec1Vec(1, TDouble4Vec(1, varianceScales[i])), + logLikelihood)); differentialEntropy -= logLikelihood; } differentialEntropy /= static_cast(scaledSamples.size()); - LOG_DEBUG("differentialEntropy = " << differentialEntropy - << ", expectedDifferentialEntropy = " << expectedDifferentialEntropy); + LOG_DEBUG("differentialEntropy = " << differentialEntropy << ", expectedDifferentialEntropy = " << expectedDifferentialEntropy); CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedDifferentialEntropy, differentialEntropy, 0.5); } } - const maths_t::EDataType dataTypes[] = - { - maths_t::E_IntegerData, - maths_t::E_ContinuousData - }; + const maths_t::EDataType dataTypes[] = {maths_t::E_IntegerData, maths_t::E_ContinuousData}; - const double maximumMeanError[] = { 0.5, 0.5 }; - const double maximumVarianceError[] = { 1.4, 1.0 }; - const double maximumMeanMeanError[] = { 0.02, 0.01 }; - const double maximumMeanVarianceError[] = { 0.18, 0.1 }; + const double maximumMeanError[] = {0.5, 0.5}; + const double maximumVarianceError[] = {1.4, 1.0}; + const double maximumMeanMeanError[] = {0.02, 0.01}; + const double maximumMeanVarianceError[] = {0.18, 0.1}; - for (std::size_t s = 0u; s < boost::size(scales); ++s) - { - for (std::size_t t = 0u; t < boost::size(dataTypes); ++t) - { - const double means[] = { 0.1, 1.0, 10.0, 100.0, 1000.0, 100000.0, 1000000.0 }; - const double variances[] = { 0.1, 1.0, 10.0, 100.0, 1000.0, 100000.0, 1000000.0 }; - const double varianceScales[] = { 0.1, 0.5, 1.0, 2.0, 10.0, 100.0 }; + for (std::size_t s = 0u; s < boost::size(scales); ++s) { + for (std::size_t t = 0u; t < boost::size(dataTypes); ++t) { + const double means[] = {0.1, 1.0, 10.0, 100.0, 1000.0, 100000.0, 1000000.0}; + const double variances[] = {0.1, 1.0, 10.0, 100.0, 1000.0, 100000.0, 1000000.0}; + const double varianceScales[] = {0.1, 0.5, 1.0, 2.0, 10.0, 100.0}; maths_t::TWeightStyleVec weightStyle(1, scales[s]); TDoubleVec samples; @@ -1767,24 +1526,20 @@ void CLogNormalMeanPrecConjugateTest::testVarianceScale() TMeanAccumulator meanMeanError; TMeanAccumulator meanVarianceError; - for (std::size_t i = 0u; i < boost::size(means); ++i) - { - for (std::size_t j = 0u; j < boost::size(variances); ++j) - { + for (std::size_t i = 0u; i < boost::size(means); ++i) { + for (std::size_t j = 0u; j < boost::size(variances); ++j) { double mean = means[i]; double variance = variances[j]; // We don't include very skewed distributions because they // are hard estimate accurately even without scaling due to // relatively frequent large outliers. - if (mean <= 0.1 * variance) - { + if (mean <= 0.1 * variance) { continue; } // We purposely don't estimate true variance in this case. - if (std::sqrt(variance) < mean * maths::MINIMUM_COEFFICIENT_OF_VARIATION) - { + if (std::sqrt(variance) < mean * maths::MINIMUM_COEFFICIENT_OF_VARIATION) { continue; } @@ -1794,17 +1549,14 @@ void CLogNormalMeanPrecConjugateTest::testVarianceScale() { boost::math::lognormal_distribution<> logNormal(location, std::sqrt(squareScale)); LOG_DEBUG(""); - LOG_DEBUG("****** mean = " << boost::math::mean(logNormal) - << ", variance = " << boost::math::variance(logNormal) << " ******"); - LOG_DEBUG("location = " << location - << ", precision = " << precision); + LOG_DEBUG("****** mean = " << boost::math::mean(logNormal) << ", variance = " << boost::math::variance(logNormal) + << " ******"); + LOG_DEBUG("location = " << location << ", precision = " << precision); } - for (std::size_t k = 0u; k < boost::size(varianceScales); ++k) - { + for (std::size_t k = 0u; k < boost::size(varianceScales); ++k) { double scale = varianceScales[k]; - if (scale * variance >= 100.0 * mean) - { + if (scale * variance >= 100.0 * mean) { continue; } LOG_DEBUG("*** scale = " << scale << " ***"); @@ -1815,15 +1567,13 @@ void CLogNormalMeanPrecConjugateTest::testVarianceScale() { boost::math::lognormal_distribution<> logNormal(scaledLocation, std::sqrt(scaledSquareScale)); LOG_DEBUG("scaled mean = " << boost::math::mean(logNormal) - << ", scaled variance = " << boost::math::variance(logNormal)); - LOG_DEBUG("scaled location = " << scaledLocation - << ", scaled precision = " << scaledPrecision); + << ", scaled variance = " << boost::math::variance(logNormal)); + LOG_DEBUG("scaled location = " << scaledLocation << ", scaled precision = " << scaledPrecision); } TMeanAccumulator meanError; TMeanAccumulator varianceError; - for (unsigned int test = 0u; test < 5; ++test) - { + for (unsigned int test = 0u; test < 5; ++test) { CLogNormalMeanPrecConjugate filter(makePrior(dataTypes[t])); rng.generateLogNormalSamples(location, squareScale, 200, samples); @@ -1835,14 +1585,12 @@ void CLogNormalMeanPrecConjugateTest::testVarianceScale() weights.resize(samples.size(), TDouble4Vec(1, scale)); filter.addSamples(weightStyle, samples, weights); - boost::math::lognormal_distribution<> logNormal(filter.normalMean(), - std::sqrt(1.0 / filter.normalPrecision())); + boost::math::lognormal_distribution<> logNormal(filter.normalMean(), std::sqrt(1.0 / filter.normalPrecision())); double dm = (dataTypes[t] == maths_t::E_IntegerData ? 0.5 : 0.0); double dv = (dataTypes[t] == maths_t::E_IntegerData ? 1.0 / 12.0 : 0.0); - double trialMeanError = std::fabs(boost::math::mean(logNormal) - (mean + dm)) - / std::max(1.0, mean); - double trialVarianceError = std::fabs(boost::math::variance(logNormal) - (variance + dv)) - / std::max(1.0, variance); + double trialMeanError = std::fabs(boost::math::mean(logNormal) - (mean + dm)) / std::max(1.0, mean); + double trialVarianceError = + std::fabs(boost::math::variance(logNormal) - (variance + dv)) / std::max(1.0, variance); LOG_DEBUG("trial mean error = " << trialMeanError); LOG_DEBUG("trial variance error = " << trialVarianceError); @@ -1851,10 +1599,8 @@ void CLogNormalMeanPrecConjugateTest::testVarianceScale() varianceError.add(trialVarianceError); } - LOG_DEBUG("mean error = " - << maths::CBasicStatistics::mean(meanError)); - LOG_DEBUG("variance error = " - << maths::CBasicStatistics::mean(varianceError)); + LOG_DEBUG("mean error = " << maths::CBasicStatistics::mean(meanError)); + LOG_DEBUG("variance error = " << maths::CBasicStatistics::mean(varianceError)); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError) < maximumMeanError[t]); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(varianceError) < maximumVarianceError[t]); @@ -1865,10 +1611,8 @@ void CLogNormalMeanPrecConjugateTest::testVarianceScale() } } - LOG_DEBUG("mean mean error = " - << maths::CBasicStatistics::mean(meanMeanError)); - LOG_DEBUG("mean variance error = " - << maths::CBasicStatistics::mean(meanVarianceError)); + LOG_DEBUG("mean mean error = " << maths::CBasicStatistics::mean(meanMeanError)); + LOG_DEBUG("mean variance error = " << maths::CBasicStatistics::mean(meanVarianceError)); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanMeanError) < maximumMeanMeanError[t]); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanVarianceError) < maximumMeanVarianceError[t]); @@ -1876,8 +1620,7 @@ void CLogNormalMeanPrecConjugateTest::testVarianceScale() } } -void CLogNormalMeanPrecConjugateTest::testNegativeSample() -{ +void CLogNormalMeanPrecConjugateTest::testNegativeSample() { LOG_DEBUG("+-------------------------------------------------------+"); LOG_DEBUG("| CLogNormalMeanPrecConjugateTest::testNegativeSample |"); LOG_DEBUG("+-------------------------------------------------------+"); @@ -1896,10 +1639,8 @@ void CLogNormalMeanPrecConjugateTest::testNegativeSample() TDoubleVec samples; rng.generateLogNormalSamples(location, squareScale, 100, samples); - CLogNormalMeanPrecConjugate filter1 = - CLogNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.0, 0.0, 0.2); - CLogNormalMeanPrecConjugate filter2 = - CLogNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, 1.74524, 0.0, 0.2); + CLogNormalMeanPrecConjugate filter1 = CLogNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.0, 0.0, 0.2); + CLogNormalMeanPrecConjugate filter2 = CLogNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, 1.74524, 0.0, 0.2); filter1.addSamples(samples); filter2.addSamples(samples); @@ -1915,65 +1656,47 @@ void CLogNormalMeanPrecConjugateTest::testNegativeSample() CPPUNIT_ASSERT(filter1.equalTolerance(filter2, equal)); } -CppUnit::Test* CLogNormalMeanPrecConjugateTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CLogNormalMeanPrecConjugateTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLogNormalMeanPrecConjugateTest::testMultipleUpdate", - &CLogNormalMeanPrecConjugateTest::testMultipleUpdate) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLogNormalMeanPrecConjugateTest::testPropagation", - &CLogNormalMeanPrecConjugateTest::testPropagation) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLogNormalMeanPrecConjugateTest::testMeanEstimation", - &CLogNormalMeanPrecConjugateTest::testMeanEstimation) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLogNormalMeanPrecConjugateTest::testPrecisionEstimation", - &CLogNormalMeanPrecConjugateTest::testPrecisionEstimation) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLogNormalMeanPrecConjugateTest::testMarginalLikelihood", - &CLogNormalMeanPrecConjugateTest::testMarginalLikelihood) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodMean", - &CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodMean) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodMode", - &CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodMode) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodVariance", - &CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodVariance) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLogNormalMeanPrecConjugateTest::testSampleMarginalLikelihood", - &CLogNormalMeanPrecConjugateTest::testSampleMarginalLikelihood) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLogNormalMeanPrecConjugateTest::testCdf", - &CLogNormalMeanPrecConjugateTest::testCdf) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLogNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples", - &CLogNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLogNormalMeanPrecConjugateTest::testAnomalyScore", - &CLogNormalMeanPrecConjugateTest::testAnomalyScore) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLogNormalMeanPrecConjugateTest::testOffset", - &CLogNormalMeanPrecConjugateTest::testOffset) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLogNormalMeanPrecConjugateTest::testIntegerData", - &CLogNormalMeanPrecConjugateTest::testIntegerData) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLogNormalMeanPrecConjugateTest::testLowVariationData", - &CLogNormalMeanPrecConjugateTest::testLowVariationData) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLogNormalMeanPrecConjugateTest::testPersist", - &CLogNormalMeanPrecConjugateTest::testPersist) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLogNormalMeanPrecConjugateTest::testVarianceScale", - &CLogNormalMeanPrecConjugateTest::testVarianceScale) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLogNormalMeanPrecConjugateTest::testNegativeSample", - &CLogNormalMeanPrecConjugateTest::testNegativeSample) ); +CppUnit::Test* CLogNormalMeanPrecConjugateTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CLogNormalMeanPrecConjugateTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CLogNormalMeanPrecConjugateTest::testMultipleUpdate", + &CLogNormalMeanPrecConjugateTest::testMultipleUpdate)); + suiteOfTests->addTest(new CppUnit::TestCaller("CLogNormalMeanPrecConjugateTest::testPropagation", + &CLogNormalMeanPrecConjugateTest::testPropagation)); + suiteOfTests->addTest(new CppUnit::TestCaller("CLogNormalMeanPrecConjugateTest::testMeanEstimation", + &CLogNormalMeanPrecConjugateTest::testMeanEstimation)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLogNormalMeanPrecConjugateTest::testPrecisionEstimation", &CLogNormalMeanPrecConjugateTest::testPrecisionEstimation)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLogNormalMeanPrecConjugateTest::testMarginalLikelihood", &CLogNormalMeanPrecConjugateTest::testMarginalLikelihood)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodMean", &CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodMean)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodMode", &CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodMode)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodVariance", + &CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodVariance)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLogNormalMeanPrecConjugateTest::testSampleMarginalLikelihood", &CLogNormalMeanPrecConjugateTest::testSampleMarginalLikelihood)); + suiteOfTests->addTest(new CppUnit::TestCaller("CLogNormalMeanPrecConjugateTest::testCdf", + &CLogNormalMeanPrecConjugateTest::testCdf)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CLogNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples", + &CLogNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples)); + suiteOfTests->addTest(new CppUnit::TestCaller("CLogNormalMeanPrecConjugateTest::testAnomalyScore", + &CLogNormalMeanPrecConjugateTest::testAnomalyScore)); + suiteOfTests->addTest(new CppUnit::TestCaller("CLogNormalMeanPrecConjugateTest::testOffset", + &CLogNormalMeanPrecConjugateTest::testOffset)); + suiteOfTests->addTest(new CppUnit::TestCaller("CLogNormalMeanPrecConjugateTest::testIntegerData", + &CLogNormalMeanPrecConjugateTest::testIntegerData)); + suiteOfTests->addTest(new CppUnit::TestCaller("CLogNormalMeanPrecConjugateTest::testLowVariationData", + &CLogNormalMeanPrecConjugateTest::testLowVariationData)); + suiteOfTests->addTest(new CppUnit::TestCaller("CLogNormalMeanPrecConjugateTest::testPersist", + &CLogNormalMeanPrecConjugateTest::testPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller("CLogNormalMeanPrecConjugateTest::testVarianceScale", + &CLogNormalMeanPrecConjugateTest::testVarianceScale)); + suiteOfTests->addTest(new CppUnit::TestCaller("CLogNormalMeanPrecConjugateTest::testNegativeSample", + &CLogNormalMeanPrecConjugateTest::testNegativeSample)); return suiteOfTests; } - diff --git a/lib/maths/unittest/CLogNormalMeanPrecConjugateTest.h b/lib/maths/unittest/CLogNormalMeanPrecConjugateTest.h index e89646a0a0..b6a4950d9c 100644 --- a/lib/maths/unittest/CLogNormalMeanPrecConjugateTest.h +++ b/lib/maths/unittest/CLogNormalMeanPrecConjugateTest.h @@ -9,30 +9,28 @@ #include +class CLogNormalMeanPrecConjugateTest : public CppUnit::TestFixture { +public: + void testMultipleUpdate(); + void testPropagation(); + void testMeanEstimation(); + void testPrecisionEstimation(); + void testMarginalLikelihood(); + void testMarginalLikelihoodMean(); + void testMarginalLikelihoodMode(); + void testMarginalLikelihoodVariance(); + void testSampleMarginalLikelihood(); + void testCdf(); + void testProbabilityOfLessLikelySamples(); + void testAnomalyScore(); + void testOffset(); + void testIntegerData(); + void testLowVariationData(); + void testPersist(); + void testVarianceScale(); + void testNegativeSample(); -class CLogNormalMeanPrecConjugateTest : public CppUnit::TestFixture -{ - public: - void testMultipleUpdate(); - void testPropagation(); - void testMeanEstimation(); - void testPrecisionEstimation(); - void testMarginalLikelihood(); - void testMarginalLikelihoodMean(); - void testMarginalLikelihoodMode(); - void testMarginalLikelihoodVariance(); - void testSampleMarginalLikelihood(); - void testCdf(); - void testProbabilityOfLessLikelySamples(); - void testAnomalyScore(); - void testOffset(); - void testIntegerData(); - void testLowVariationData(); - void testPersist(); - void testVarianceScale(); - void testNegativeSample(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CLogNormalMeanVarConjugateTest_h diff --git a/lib/maths/unittest/CLogTDistributionTest.cc b/lib/maths/unittest/CLogTDistributionTest.cc index a4f42f3539..752eccf741 100644 --- a/lib/maths/unittest/CLogTDistributionTest.cc +++ b/lib/maths/unittest/CLogTDistributionTest.cc @@ -24,8 +24,7 @@ using TDoubleVec = std::vector; using TDoubleVecItr = TDoubleVec::iterator; using TDoubleVecCItr = TDoubleVec::const_iterator; -void CLogTDistributionTest::testMode() -{ +void CLogTDistributionTest::testMode() { LOG_DEBUG("+-----------------------------------+"); LOG_DEBUG("| CLogTDistributionTest::testMode |"); LOG_DEBUG("+-----------------------------------+"); @@ -36,28 +35,21 @@ void CLogTDistributionTest::testMode() const double eps = 1e-5; - const double degreesFreedoms[] = { 2.0, 10.0, 40.0 }; - const double locations[] = { 1.0, 2.0, 3.0, 4.0, 6.0 }; - const double squareScales[] = { 0.5, 1, 1.5, 2.0, 3.0 }; + const double degreesFreedoms[] = {2.0, 10.0, 40.0}; + const double locations[] = {1.0, 2.0, 3.0, 4.0, 6.0}; + const double squareScales[] = {0.5, 1, 1.5, 2.0, 3.0}; - for (size_t i = 0; i < boost::size(degreesFreedoms); ++i) - { - for (size_t j = 0; j < boost::size(locations); ++j) - { - for (size_t k = 0; k < boost::size(squareScales); ++k) - { - LOG_DEBUG("degrees freedom = " << degreesFreedoms[i] - << ", location = " << locations[j] - << ", scale = " << std::sqrt(squareScales[k])); + for (size_t i = 0; i < boost::size(degreesFreedoms); ++i) { + for (size_t j = 0; j < boost::size(locations); ++j) { + for (size_t k = 0; k < boost::size(squareScales); ++k) { + LOG_DEBUG("degrees freedom = " << degreesFreedoms[i] << ", location = " << locations[j] + << ", scale = " << std::sqrt(squareScales[k])); - CLogTDistribution logt(degreesFreedoms[i], - locations[j], - std::sqrt(squareScales[k])); + CLogTDistribution logt(degreesFreedoms[i], locations[j], std::sqrt(squareScales[k])); double x = mode(logt); - if (x != 0.0) - { + if (x != 0.0) { double pMinusEps = pdf(logt, x - eps); double p = pdf(logt, x); double pPlusEps = pdf(logt, x + eps); @@ -74,8 +66,7 @@ void CLogTDistributionTest::testMode() } } -void CLogTDistributionTest::testPdf() -{ +void CLogTDistributionTest::testPdf() { LOG_DEBUG("+----------------------------------+"); LOG_DEBUG("| CLogTDistributionTest::testPdf |"); LOG_DEBUG("+----------------------------------+"); @@ -85,21 +76,17 @@ void CLogTDistributionTest::testPdf() const double tolerance = 1e-6; const double eps = 1e-6; - const double degreesFreedom[] = { 2.0, 10.0, 40.0 }; - const double locations[] = { 1.0, 2.0, 3.0 }; - const double squareScales[] = { 0.5, 1, 1.5 }; + const double degreesFreedom[] = {2.0, 10.0, 40.0}; + const double locations[] = {1.0, 2.0, 3.0}; + const double squareScales[] = {0.5, 1, 1.5}; size_t nTests = boost::size(degreesFreedom); nTests = std::min(nTests, boost::size(locations)); nTests = std::min(nTests, boost::size(squareScales)); - for (size_t test = 0; test < nTests; ++test) - { - CLogTDistribution logt(degreesFreedom[test], - locations[test], - std::sqrt(squareScales[test])); + for (size_t test = 0; test < nTests; ++test) { + CLogTDistribution logt(degreesFreedom[test], locations[test], std::sqrt(squareScales[test])); - for (unsigned int p = 1; p < 100; ++p) - { + for (unsigned int p = 1; p < 100; ++p) { double q = static_cast(p) / 100.0; double x = quantile(logt, q); @@ -107,16 +94,14 @@ void CLogTDistributionTest::testPdf() double dcdfdx = (cdf(logt, x + eps) - cdf(logt, x - eps)) / 2.0 / eps; LOG_DEBUG("percentile = " << p << "%" - << ", pdf = " << pdf - << ", dcdfdx = " << dcdfdx); + << ", pdf = " << pdf << ", dcdfdx = " << dcdfdx); CPPUNIT_ASSERT_DOUBLES_EQUAL(pdf, dcdfdx, tolerance); } } } -void CLogTDistributionTest::testCdf() -{ +void CLogTDistributionTest::testCdf() { LOG_DEBUG("+----------------------------------+"); LOG_DEBUG("| CLogTDistributionTest::testCdf |"); LOG_DEBUG("+----------------------------------+"); @@ -126,42 +111,34 @@ void CLogTDistributionTest::testCdf() const size_t nSamples = 100000u; - const double degreesFreedom[] = { 2.0, 10.0, 40.0 }; - const double locations[] = { 1.0, 2.0, 3.0 }; - const double squareScales[] = { 0.5, 1, 1.5 }; + const double degreesFreedom[] = {2.0, 10.0, 40.0}; + const double locations[] = {1.0, 2.0, 3.0}; + const double squareScales[] = {0.5, 1, 1.5}; size_t nTests = boost::size(degreesFreedom); nTests = std::min(nTests, boost::size(locations)); nTests = std::min(nTests, boost::size(squareScales)); CRandomNumbers rng; - for (size_t test = 0; test < nTests; ++test) - { + for (size_t test = 0; test < nTests; ++test) { TDoubleVec samples; rng.generateStudentsSamples(degreesFreedom[test], nSamples, samples); - for (TDoubleVecItr sampleItr = samples.begin(); - sampleItr != samples.end(); - ++sampleItr) - { + for (TDoubleVecItr sampleItr = samples.begin(); sampleItr != samples.end(); ++sampleItr) { *sampleItr = std::exp(*sampleItr * std::sqrt(squareScales[test]) + locations[test]); } // Check the data percentiles. - CLogTDistribution logt(degreesFreedom[test], - locations[test], - std::sqrt(squareScales[test])); + CLogTDistribution logt(degreesFreedom[test], locations[test], std::sqrt(squareScales[test])); std::sort(samples.begin(), samples.end()); - for (unsigned int p = 1; p < 100; ++p) - { + for (unsigned int p = 1; p < 100; ++p) { double x = samples[nSamples * p / 100]; double actualCdf = cdf(logt, x); double expectedCdf = static_cast(p) / 100; LOG_DEBUG("percentile = " << p << "%" - << ", actual cdf = " << actualCdf - << ", expected cdf = " << expectedCdf); + << ", actual cdf = " << actualCdf << ", expected cdf = " << expectedCdf); // No more than a 10% error in the sample percentile. CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedCdf, actualCdf, 0.1 * expectedCdf); @@ -169,29 +146,24 @@ void CLogTDistributionTest::testCdf() } } -void CLogTDistributionTest::testQuantile() -{ +void CLogTDistributionTest::testQuantile() { LOG_DEBUG("+---------------------------------------+"); LOG_DEBUG("| CLogTDistributionTest::testQuantile |"); LOG_DEBUG("+---------------------------------------+"); // Check that the quantile is the inverse of the c.d.f. - const double degreesFreedom[] = { 2.0, 10.0, 40.0 }; - const double locations[] = { 1.0, 2.0, 3.0 }; - const double squareScales[] = { 0.5, 1, 1.5 }; + const double degreesFreedom[] = {2.0, 10.0, 40.0}; + const double locations[] = {1.0, 2.0, 3.0}; + const double squareScales[] = {0.5, 1, 1.5}; size_t nTests = boost::size(degreesFreedom); nTests = std::min(nTests, boost::size(locations)); nTests = std::min(nTests, boost::size(squareScales)); - for (size_t test = 0; test < nTests; ++test) - { - CLogTDistribution logt(degreesFreedom[test], - locations[test], - std::sqrt(squareScales[test])); + for (size_t test = 0; test < nTests; ++test) { + CLogTDistribution logt(degreesFreedom[test], locations[test], std::sqrt(squareScales[test])); - for (unsigned int p = 1; p < 100; ++p) - { + for (unsigned int p = 1; p < 100; ++p) { double q = static_cast(p) / 100.0; // Check that the quantile function is the inverse @@ -201,22 +173,17 @@ void CLogTDistributionTest::testQuantile() } } -CppUnit::Test *CLogTDistributionTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CLogTDistributionTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLogTDistributionTest::testMode", - &CLogTDistributionTest::testMode) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLogTDistributionTest::testPdf", - &CLogTDistributionTest::testPdf) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLogTDistributionTest::testCdf", - &CLogTDistributionTest::testCdf) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLogTDistributionTest::testQuantile", - &CLogTDistributionTest::testQuantile) ); +CppUnit::Test* CLogTDistributionTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CLogTDistributionTest"); + + suiteOfTests->addTest( + new CppUnit::TestCaller("CLogTDistributionTest::testMode", &CLogTDistributionTest::testMode)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CLogTDistributionTest::testPdf", &CLogTDistributionTest::testPdf)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CLogTDistributionTest::testCdf", &CLogTDistributionTest::testCdf)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CLogTDistributionTest::testQuantile", &CLogTDistributionTest::testQuantile)); return suiteOfTests; } diff --git a/lib/maths/unittest/CLogTDistributionTest.h b/lib/maths/unittest/CLogTDistributionTest.h index a21ce60af2..0011da12d8 100644 --- a/lib/maths/unittest/CLogTDistributionTest.h +++ b/lib/maths/unittest/CLogTDistributionTest.h @@ -9,15 +9,14 @@ #include -class CLogTDistributionTest : public CppUnit::TestFixture -{ - public: - void testMode(); - void testPdf(); - void testCdf(); - void testQuantile(); +class CLogTDistributionTest : public CppUnit::TestFixture { +public: + void testMode(); + void testPdf(); + void testCdf(); + void testQuantile(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CLogTDistributionTest_h diff --git a/lib/maths/unittest/CMathsFuncsTest.cc b/lib/maths/unittest/CMathsFuncsTest.cc index 27b776eb88..d34deedb4f 100644 --- a/lib/maths/unittest/CMathsFuncsTest.cc +++ b/lib/maths/unittest/CMathsFuncsTest.cc @@ -14,13 +14,13 @@ using namespace ml; -namespace -{ -double zero() { return 0.0; } +namespace { +double zero() { + return 0.0; +} } -void CMathsFuncsTest::testIsNan() -{ +void CMathsFuncsTest::testIsNan() { CPPUNIT_ASSERT(!maths::CMathsFuncs::isNan(0.0)); CPPUNIT_ASSERT(!maths::CMathsFuncs::isNan(1e7)); CPPUNIT_ASSERT(!maths::CMathsFuncs::isNan(-1e17)); @@ -32,8 +32,7 @@ void CMathsFuncsTest::testIsNan() CPPUNIT_ASSERT(maths::CMathsFuncs::isNan(1.0 / zero() - 2.0 / zero())); } -void CMathsFuncsTest::testIsInf() -{ +void CMathsFuncsTest::testIsInf() { CPPUNIT_ASSERT(!maths::CMathsFuncs::isInf(0.0)); CPPUNIT_ASSERT(!maths::CMathsFuncs::isInf(1.8738e7)); CPPUNIT_ASSERT(!maths::CMathsFuncs::isInf(-1.376e17)); @@ -47,8 +46,7 @@ void CMathsFuncsTest::testIsInf() CPPUNIT_ASSERT(maths::CMathsFuncs::isInf(std::exp(1.0 / zero()))); } -void CMathsFuncsTest::testIsFinite() -{ +void CMathsFuncsTest::testIsFinite() { using TDoubleVec = std::vector; CPPUNIT_ASSERT(maths::CMathsFuncs::isFinite(0.0)); @@ -68,8 +66,7 @@ void CMathsFuncsTest::testIsFinite() test1.push_back(2.0); test1.push_back(25.0); test1.push_back(-1e6); - CPPUNIT_ASSERT(std::equal(test1.begin(), test1.end(), - maths::CMathsFuncs::beginFinite(test1))); + CPPUNIT_ASSERT(std::equal(test1.begin(), test1.end(), maths::CMathsFuncs::beginFinite(test1))); TDoubleVec test2; test2.push_back(zero() / zero()); @@ -80,47 +77,31 @@ void CMathsFuncsTest::testIsFinite() test2.push_back(25.0); test2.push_back(-1e6); test2.push_back(zero() / zero()); - CPPUNIT_ASSERT(std::equal(test1.begin(), test1.end(), - maths::CMathsFuncs::beginFinite(test2))); + CPPUNIT_ASSERT(std::equal(test1.begin(), test1.end(), maths::CMathsFuncs::beginFinite(test2))); TDoubleVec test3; - CPPUNIT_ASSERT( maths::CMathsFuncs::beginFinite(test3) - == maths::CMathsFuncs::endFinite(test3)); + CPPUNIT_ASSERT(maths::CMathsFuncs::beginFinite(test3) == maths::CMathsFuncs::endFinite(test3)); TDoubleVec test4; test4.push_back(zero() / zero()); test4.push_back(1.0 / zero()); test4.push_back(zero() / zero()); - CPPUNIT_ASSERT( maths::CMathsFuncs::beginFinite(test4) - == maths::CMathsFuncs::endFinite(test4)); + CPPUNIT_ASSERT(maths::CMathsFuncs::beginFinite(test4) == maths::CMathsFuncs::endFinite(test4)); } -void CMathsFuncsTest::testFpStatus() -{ - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, - maths::CMathsFuncs::fpStatus(3.8)); - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpOverflowed, - maths::CMathsFuncs::fpStatus(1.0 / zero())); - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpFailed, - maths::CMathsFuncs::fpStatus(zero() / zero())); +void CMathsFuncsTest::testFpStatus() { + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, maths::CMathsFuncs::fpStatus(3.8)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpOverflowed, maths::CMathsFuncs::fpStatus(1.0 / zero())); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpFailed, maths::CMathsFuncs::fpStatus(zero() / zero())); } -CppUnit::Test *CMathsFuncsTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CMathsFuncsTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMathsFuncsTest::testIsNan", - &CMathsFuncsTest::testIsNan) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMathsFuncsTest::testIsInf", - &CMathsFuncsTest::testIsInf) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMathsFuncsTest::testIsFinite", - &CMathsFuncsTest::testIsFinite) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMathsFuncsTest::testFpStatus", - &CMathsFuncsTest::testFpStatus) ); +CppUnit::Test* CMathsFuncsTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CMathsFuncsTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CMathsFuncsTest::testIsNan", &CMathsFuncsTest::testIsNan)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMathsFuncsTest::testIsInf", &CMathsFuncsTest::testIsInf)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMathsFuncsTest::testIsFinite", &CMathsFuncsTest::testIsFinite)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMathsFuncsTest::testFpStatus", &CMathsFuncsTest::testFpStatus)); return suiteOfTests; } diff --git a/lib/maths/unittest/CMathsFuncsTest.h b/lib/maths/unittest/CMathsFuncsTest.h index 2624fdf554..647752ee18 100644 --- a/lib/maths/unittest/CMathsFuncsTest.h +++ b/lib/maths/unittest/CMathsFuncsTest.h @@ -9,15 +9,14 @@ #include -class CMathsFuncsTest : public CppUnit::TestFixture -{ - public: - void testIsNan(); - void testIsInf(); - void testIsFinite(); - void testFpStatus(); +class CMathsFuncsTest : public CppUnit::TestFixture { +public: + void testIsNan(); + void testIsInf(); + void testIsFinite(); + void testFpStatus(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CMathsFuncsTest_h diff --git a/lib/maths/unittest/CMathsMemoryTest.cc b/lib/maths/unittest/CMathsMemoryTest.cc index da53b310c3..af81203250 100644 --- a/lib/maths/unittest/CMathsMemoryTest.cc +++ b/lib/maths/unittest/CMathsMemoryTest.cc @@ -6,16 +6,16 @@ #include "CMathsMemoryTest.h" -#include #include #include -#include -#include +#include #include +#include +#include +#include #include #include -#include -#include +#include #include #include #include @@ -23,15 +23,13 @@ using namespace ml; using namespace maths; -void CMathsMemoryTest::testTimeSeriesDecompositions() -{ +void CMathsMemoryTest::testTimeSeriesDecompositions() { CTimeSeriesDecomposition decomp(0.95, 3600, 55); core_t::TTime time; time = 140390672; - for (unsigned i = 0; i < 600000; i += 600) - { + for (unsigned i = 0; i < 600000; i += 600) { decomp.addPoint(time + i, (0.55 * (0.2 + (i % 86400)))); } @@ -40,8 +38,7 @@ void CMathsMemoryTest::testTimeSeriesDecompositions() CPPUNIT_ASSERT_EQUAL(decomp.memoryUsage(), mem.usage()); } -void CMathsMemoryTest::testPriors() -{ +void CMathsMemoryTest::testPriors() { CConstantPrior::TOptionalDouble d; CConstantPrior constantPrior(d); CPPUNIT_ASSERT_EQUAL(std::size_t(0), constantPrior.memoryUsage()); @@ -61,41 +58,31 @@ void CMathsMemoryTest::testPriors() gammaRateConjugate.addSamples(weightStyles, samples, weights); CPPUNIT_ASSERT_EQUAL(std::size_t(0), gammaRateConjugate.memoryUsage()); - CLogNormalMeanPrecConjugate logNormalConjugate(maths_t::E_ContinuousData, 0.0, 0.9, 0.8, 0.7, 0.2); CPPUNIT_ASSERT_EQUAL(std::size_t(0), logNormalConjugate.memoryUsage()); logNormalConjugate.addSamples(weightStyles, samples, weights); CPPUNIT_ASSERT_EQUAL(std::size_t(0), logNormalConjugate.memoryUsage()); - CPoissonMeanConjugate poissonConjugate(0.0, 0.8, 0.7, 0.3); CPPUNIT_ASSERT_EQUAL(std::size_t(0), poissonConjugate.memoryUsage()); poissonConjugate.addSamples(weightStyles, samples, weights); CPPUNIT_ASSERT_EQUAL(std::size_t(0), poissonConjugate.memoryUsage()); - CNormalMeanPrecConjugate normalConjugate(maths_t::E_ContinuousData, 0.0, 0.9, 0.8, 0.7, 0.2); CPPUNIT_ASSERT_EQUAL(std::size_t(0), normalConjugate.memoryUsage()); normalConjugate.addSamples(weightStyles, samples, weights); CPPUNIT_ASSERT_EQUAL(std::size_t(0), normalConjugate.memoryUsage()); - CMultinomialConjugate multinomialConjugate; CPPUNIT_ASSERT_EQUAL(std::size_t(0), multinomialConjugate.memoryUsage()); multinomialConjugate.addSamples(weightStyles, samples, weights); CPPUNIT_ASSERT_EQUAL(std::size_t(0), multinomialConjugate.memoryUsage()); - - CXMeansOnline1d clusterer(maths_t::E_ContinuousData, - maths::CAvailableModeDistributions::ALL, - maths_t::E_ClustersEqualWeight); + CXMeansOnline1d clusterer(maths_t::E_ContinuousData, maths::CAvailableModeDistributions::ALL, maths_t::E_ClustersEqualWeight); // Check that the clusterer has size at least as great as the sum of it's fixed members - std::size_t clustererSize = sizeof(maths_t::EDataType) - + 4 * sizeof(double) - + sizeof(maths_t::EClusterWeightCalc) - + sizeof(CClusterer1d::CIndexGenerator) - + sizeof(CXMeansOnline1d::TClusterVec); + std::size_t clustererSize = sizeof(maths_t::EDataType) + 4 * sizeof(double) + sizeof(maths_t::EClusterWeightCalc) + + sizeof(CClusterer1d::CIndexGenerator) + sizeof(CXMeansOnline1d::TClusterVec); CPPUNIT_ASSERT(clusterer.memoryUsage() >= clustererSize); @@ -126,8 +113,7 @@ void CMathsMemoryTest::testPriors() CPPUNIT_ASSERT_EQUAL(multimodalPrior.memoryUsage(), mem.usage()); } -void CMathsMemoryTest::testBjkstVec() -{ +void CMathsMemoryTest::testBjkstVec() { using TBjkstValuesVec = std::vector; { // Test empty @@ -142,10 +128,8 @@ void CMathsMemoryTest::testBjkstVec() TBjkstValuesVec values; maths::CBjkstUniqueValues seed(3, 100); values.resize(5, seed); - for (std::size_t i = 0; i < 5; i++) - { - for (int j = 0; j < 100; j++) - { + for (std::size_t i = 0; i < 5; i++) { + for (int j = 0; j < 100; j++) { values[i].add(j); } } @@ -159,10 +143,8 @@ void CMathsMemoryTest::testBjkstVec() TBjkstValuesVec values; maths::CBjkstUniqueValues seed(3, 100); values.resize(5, seed); - for (std::size_t i = 0; i < 5; i++) - { - for (int j = 0; j < 1000; j++) - { + for (std::size_t i = 0; i < 5; i++) { + for (int j = 0; j < 1000; j++) { values[i].add(j); } } @@ -173,23 +155,15 @@ void CMathsMemoryTest::testBjkstVec() } } +CppUnit::Test* CMathsMemoryTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CMathsMemoryTest"); -CppUnit::Test *CMathsMemoryTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CMathsMemoryTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMathsMemoryTest::testPriors", - &CMathsMemoryTest::testPriors) ); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMathsMemoryTest::testTimeSeriesDecompositions", - &CMathsMemoryTest::testTimeSeriesDecompositions) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CMathsMemoryTest::testPriors", &CMathsMemoryTest::testPriors)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMathsMemoryTest::testBjkstVec", - &CMathsMemoryTest::testBjkstVec) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CMathsMemoryTest::testTimeSeriesDecompositions", + &CMathsMemoryTest::testTimeSeriesDecompositions)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMathsMemoryTest::testBjkstVec", &CMathsMemoryTest::testBjkstVec)); return suiteOfTests; } diff --git a/lib/maths/unittest/CMathsMemoryTest.h b/lib/maths/unittest/CMathsMemoryTest.h index 86c1ab30e4..57f9a9dffb 100644 --- a/lib/maths/unittest/CMathsMemoryTest.h +++ b/lib/maths/unittest/CMathsMemoryTest.h @@ -9,15 +9,13 @@ #include -class CMathsMemoryTest : public CppUnit::TestFixture -{ +class CMathsMemoryTest : public CppUnit::TestFixture { public: void testPriors(); void testBjkstVec(); void testTimeSeriesDecompositions(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; - #endif // INCLUDED_CMathsMemoryTest_h diff --git a/lib/maths/unittest/CMixtureDistributionTest.cc b/lib/maths/unittest/CMixtureDistributionTest.cc index 4d7652d3f6..afaf14f32d 100644 --- a/lib/maths/unittest/CMixtureDistributionTest.cc +++ b/lib/maths/unittest/CMixtureDistributionTest.cc @@ -24,8 +24,7 @@ using TNormalVec = std::vector>; using TLogNormalVec = std::vector>; using TGammaVec = std::vector>; -void CMixtureDistributionTest::testSupport() -{ +void CMixtureDistributionTest::testSupport() { LOG_DEBUG("+--------------------------------------+"); LOG_DEBUG("| CLogTDistributionTest::testSupport |"); LOG_DEBUG("+--------------------------------------+"); @@ -40,8 +39,7 @@ void CMixtureDistributionTest::testSupport() modes.push_back(n1); modes.push_back(n2); CMixtureDistribution> mixture(weights, modes); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(boost::math::support(n1)), - core::CContainerPrinter::print(support(mixture))); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(boost::math::support(n1)), core::CContainerPrinter::print(support(mixture))); } { boost::math::lognormal_distribution<> l1(1.0, 0.5); @@ -53,13 +51,11 @@ void CMixtureDistributionTest::testSupport() modes.push_back(l1); modes.push_back(l2); CMixtureDistribution> mixture(weights, modes); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(boost::math::support(l1)), - core::CContainerPrinter::print(support(mixture))); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(boost::math::support(l1)), core::CContainerPrinter::print(support(mixture))); } } -void CMixtureDistributionTest::testMode() -{ +void CMixtureDistributionTest::testMode() { LOG_DEBUG("+-----------------------------------+"); LOG_DEBUG("| CLogTDistributionTest::testMode |"); LOG_DEBUG("+-----------------------------------+"); @@ -73,22 +69,10 @@ void CMixtureDistributionTest::testMode() { LOG_DEBUG("Mixture Two Normals"); - double means[][2] = - { - { 0.0, 10.0 }, - { 0.0, 9.0 }, - { 0.0, 8.0 }, - { 0.0, 7.0 }, - { 0.0, 6.0 }, - { 0.0, 5.0 }, - { 0.0, 4.0 }, - { 0.0, 3.0 }, - { 0.0, 2.0 }, - { 0.0, 1.0 } - }; - - for (std::size_t i = 0u; i < boost::size(means); ++i) - { + double means[][2] = { + {0.0, 10.0}, {0.0, 9.0}, {0.0, 8.0}, {0.0, 7.0}, {0.0, 6.0}, {0.0, 5.0}, {0.0, 4.0}, {0.0, 3.0}, {0.0, 2.0}, {0.0, 1.0}}; + + for (std::size_t i = 0u; i < boost::size(means); ++i) { LOG_DEBUG("means = " << core::CContainerPrinter::print(means[i])); TDoubleVec weights; weights.push_back(0.6); @@ -109,9 +93,7 @@ void CMixtureDistributionTest::testMode() double derivative = (pPlusEps - pMinusEps) / 2.0 / eps; double curvature = (pPlusEps - 2.0 * p + pMinusEps) / eps / eps; - LOG_DEBUG("x = " << x - << ", df/dx = " << derivative - << ", d^2f/dx^2 = " << curvature); + LOG_DEBUG("x = " << x << ", df/dx = " << derivative << ", d^2f/dx^2 = " << curvature); // Gradient zero + curvature negative => maximum. CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, derivative, 1e-6); @@ -143,9 +125,7 @@ void CMixtureDistributionTest::testMode() double derivative = (pPlusEps - pMinusEps) / 2.0 / eps; double curvature = (pPlusEps - 2.0 * p + pMinusEps) / eps / eps; - LOG_DEBUG("x = " << x - << ", df/dx = " << derivative - << ", d^2f/dx^2 = " << curvature); + LOG_DEBUG("x = " << x << ", df/dx = " << derivative << ", d^2f/dx^2 = " << curvature); // Gradient zero + curvature negative => maximum. CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, derivative, 1e-6); @@ -172,9 +152,7 @@ void CMixtureDistributionTest::testMode() double derivative = (pPlusEps - pMinusEps) / 2.0 / eps; double curvature = (pPlusEps - 2.0 * p + pMinusEps) / eps / eps; - LOG_DEBUG("x = " << x - << ", df/dx = " << derivative - << ", d^2f/dx^2 = " << curvature); + LOG_DEBUG("x = " << x << ", df/dx = " << derivative << ", d^2f/dx^2 = " << curvature); // Gradient zero + curvature negative => maximum. CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, derivative, 1e-6); @@ -182,8 +160,7 @@ void CMixtureDistributionTest::testMode() } } -void CMixtureDistributionTest::testPdf() -{ +void CMixtureDistributionTest::testPdf() { LOG_DEBUG("+-------------------------------------+"); LOG_DEBUG("| CMixtureDistributionTest::testPdf |"); LOG_DEBUG("+-------------------------------------+"); @@ -194,51 +171,17 @@ void CMixtureDistributionTest::testPdf() const double eps = 1e-6; { - double weights[][2] = - { - { 0.5, 0.5 }, - { 0.3, 0.7 }, - { 0.6, 0.4 }, - { 0.5, 0.5 }, - { 0.1, 0.9 }, - { 0.61, 0.39 }, - { 0.7, 0.3 }, - { 0.8, 0.2 }, - { 0.15, 0.85 }, - { 0.3, 0.7 } - }; - double means[][2] = - { - { 0.0, 10.0 }, - { 1.0, 9.0 }, - { 1.4, 6.0 }, - { 0.0, 7.0 }, - { 3.0, 7.5 }, - { 0.0, 5.0 }, - { 2.0, 4.0 }, - { 1.0, 3.0 }, - { 1.1, 2.0 }, - { 3.0, 3.2 } - }; - double variances[][2] = - { - { 0.3, 10.0 }, - { 1.0, 0.4 }, - { 1.4, 6.0 }, - { 3.0, 1.1 }, - { 3.0, 3.5 }, - { 1.0, 5.0 }, - { 2.3, 4.0 }, - { 3.0, 1.0 }, - { 1.1, 1.0 }, - { 3.0, 3.2 } - }; + double weights[][2] = { + {0.5, 0.5}, {0.3, 0.7}, {0.6, 0.4}, {0.5, 0.5}, {0.1, 0.9}, {0.61, 0.39}, {0.7, 0.3}, {0.8, 0.2}, {0.15, 0.85}, {0.3, 0.7}}; + double means[][2] = { + {0.0, 10.0}, {1.0, 9.0}, {1.4, 6.0}, {0.0, 7.0}, {3.0, 7.5}, {0.0, 5.0}, {2.0, 4.0}, {1.0, 3.0}, {1.1, 2.0}, {3.0, 3.2}}; + double variances[][2] = { + {0.3, 10.0}, {1.0, 0.4}, {1.4, 6.0}, {3.0, 1.1}, {3.0, 3.5}, {1.0, 5.0}, {2.3, 4.0}, {3.0, 1.0}, {1.1, 1.0}, {3.0, 3.2}}; CPPUNIT_ASSERT_EQUAL(boost::size(weights), boost::size(means)); CPPUNIT_ASSERT_EQUAL(boost::size(means), boost::size(variances)); - for (size_t i = 0u; i < boost::size(weights); ++i) - { + for (size_t i = 0u; i < boost::size(weights); ++i) { LOG_DEBUG("*** Test Case " << i << " ***"); TDoubleVec w; @@ -251,18 +194,15 @@ void CMixtureDistributionTest::testPdf() modes.push_back(n2); CMixtureDistribution> mixture(w, modes); - for (unsigned int p = 1; p < 100; ++p) - { + for (unsigned int p = 1; p < 100; ++p) { double q = static_cast(p) / 100.0; double x = quantile(mixture, q); double f = pdf(mixture, x); - double dFdx = ( cdf(mixture, x + eps) - - cdf(mixture, x - eps) ) / 2.0 / eps; + double dFdx = (cdf(mixture, x + eps) - cdf(mixture, x - eps)) / 2.0 / eps; LOG_DEBUG("percentile = " << p << "%" - << ", f = " << f - << ", dF/dx = " << dFdx); + << ", f = " << f << ", dF/dx = " << dFdx); CPPUNIT_ASSERT_DOUBLES_EQUAL(f, dFdx, tolerance); } @@ -270,8 +210,7 @@ void CMixtureDistributionTest::testPdf() } } -void CMixtureDistributionTest::testCdf() -{ +void CMixtureDistributionTest::testCdf() { LOG_DEBUG("+-------------------------------------+"); LOG_DEBUG("| CMixtureDistributionTest::testCdf |"); LOG_DEBUG("+-------------------------------------+"); @@ -281,50 +220,24 @@ void CMixtureDistributionTest::testCdf() const std::size_t nSamples = 100000u; - const double weights[][2] = - { - { 0.3, 0.7 }, - { 0.5, 0.5 }, - { 0.6, 0.4 }, - { 0.35, 0.65 }, - { 0.55, 0.45 } - }; - const double shapes[][2] = - { - { 10.0, 30.0 }, - { 5.0, 25.0 }, - { 20.0, 25.0 }, - { 4.0, 50.0 }, - { 11.0, 33.0 } - }; - const double scales[][2] = - { - { 0.3, 0.2 }, - { 1.0, 1.1 }, - { 0.9, 0.95 }, - { 0.4, 1.2 }, - { 2.3, 2.1 } - }; + const double weights[][2] = {{0.3, 0.7}, {0.5, 0.5}, {0.6, 0.4}, {0.35, 0.65}, {0.55, 0.45}}; + const double shapes[][2] = {{10.0, 30.0}, {5.0, 25.0}, {20.0, 25.0}, {4.0, 50.0}, {11.0, 33.0}}; + const double scales[][2] = {{0.3, 0.2}, {1.0, 1.1}, {0.9, 0.95}, {0.4, 1.2}, {2.3, 2.1}}; CPPUNIT_ASSERT_EQUAL(boost::size(weights), boost::size(shapes)); CPPUNIT_ASSERT_EQUAL(boost::size(shapes), boost::size(scales)); CRandomNumbers rng; - for (std::size_t i = 0u; i < boost::size(weights); ++i) - { + for (std::size_t i = 0u; i < boost::size(weights); ++i) { LOG_DEBUG("*** Test Case " << i << " ***"); TDoubleVec samples1; - rng.generateGammaSamples(shapes[i][0], scales[i][0], - static_cast( - weights[i][0] * static_cast(nSamples)), - samples1); + rng.generateGammaSamples( + shapes[i][0], scales[i][0], static_cast(weights[i][0] * static_cast(nSamples)), samples1); TDoubleVec samples2; - rng.generateGammaSamples(shapes[i][1], scales[i][1], - static_cast( - weights[i][1] * static_cast(nSamples)), - samples2); + rng.generateGammaSamples( + shapes[i][1], scales[i][1], static_cast(weights[i][1] * static_cast(nSamples)), samples2); TDoubleVec samples; samples.insert(samples.end(), samples1.begin(), samples1.end()); @@ -342,15 +255,13 @@ void CMixtureDistributionTest::testCdf() CMixtureDistribution> mixture(w, modes); // Check the data percentiles. - for (unsigned int p = 1; p < 100; ++p) - { + for (unsigned int p = 1; p < 100; ++p) { double x = samples[nSamples * p / 100]; double actualCdf = cdf(mixture, x); double expectedCdf = static_cast(p) / 100; LOG_DEBUG("percentile = " << p << "%" - << ", actual cdf = " << actualCdf - << ", expected cdf = " << expectedCdf); + << ", actual cdf = " << actualCdf << ", expected cdf = " << expectedCdf); // No more than a 10% error in the sample percentile. CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedCdf, actualCdf, 0.1 * expectedCdf); @@ -358,44 +269,21 @@ void CMixtureDistributionTest::testCdf() } } -void CMixtureDistributionTest::testQuantile() -{ +void CMixtureDistributionTest::testQuantile() { LOG_DEBUG("+------------------------------------------+"); LOG_DEBUG("| CMixtureDistributionTest::testQuantile |"); LOG_DEBUG("+------------------------------------------+"); // Check that the quantile is the inverse of the c.d.f. - const double weights[][3] = - { - { 0.4, 0.3, 0.3 }, - { 0.1, 0.4, 0.5 }, - { 0.6, 0.2, 0.2 }, - { 0.1, 0.8, 0.1 }, - { 0.25, 0.3, 0.45 } - }; - const double locations[][3] = - { - { 1.0, 1.9, 2.2 }, - { 0.9, 1.8, 3.0 }, - { 2.0, 4.0, 4.5 }, - { 0.1, 0.3, 0.4 }, - { 0.2, 1.3, 4.8 } - }; - const double scales[][3] = - { - { 0.1, 0.04, 0.5 }, - { 0.8, 0.3, 0.6 }, - { 0.5, 0.3, 0.4 }, - { 0.3, 0.08, 0.9 }, - { 0.1, 0.2, 1.0 } - }; + const double weights[][3] = {{0.4, 0.3, 0.3}, {0.1, 0.4, 0.5}, {0.6, 0.2, 0.2}, {0.1, 0.8, 0.1}, {0.25, 0.3, 0.45}}; + const double locations[][3] = {{1.0, 1.9, 2.2}, {0.9, 1.8, 3.0}, {2.0, 4.0, 4.5}, {0.1, 0.3, 0.4}, {0.2, 1.3, 4.8}}; + const double scales[][3] = {{0.1, 0.04, 0.5}, {0.8, 0.3, 0.6}, {0.5, 0.3, 0.4}, {0.3, 0.08, 0.9}, {0.1, 0.2, 1.0}}; CPPUNIT_ASSERT_EQUAL(boost::size(weights), boost::size(locations)); CPPUNIT_ASSERT_EQUAL(boost::size(locations), boost::size(scales)); - for (std::size_t i = 0u; i < boost::size(weights); ++i) - { + for (std::size_t i = 0u; i < boost::size(weights); ++i) { LOG_DEBUG("*** Test " << i << " ***"); TDoubleVec w; @@ -411,8 +299,7 @@ void CMixtureDistributionTest::testQuantile() modes.push_back(l3); CMixtureDistribution> mixture(w, modes); - for (unsigned int p = 1; p < 100; ++p) - { + for (unsigned int p = 1; p < 100; ++p) { double q = static_cast(p) / 100.0; double f = cdf(mixture, quantile(mixture, q)); LOG_DEBUG("Error = " << std::fabs(q - f)); @@ -421,25 +308,19 @@ void CMixtureDistributionTest::testQuantile() } } -CppUnit::Test *CMixtureDistributionTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CMixtureDistributionTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMixtureDistributionTest::testSupport", - &CMixtureDistributionTest::testSupport) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMixtureDistributionTest::testMode", - &CMixtureDistributionTest::testMode) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMixtureDistributionTest::testPdf", - &CMixtureDistributionTest::testPdf) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMixtureDistributionTest::testCdf", - &CMixtureDistributionTest::testCdf) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMixtureDistributionTest::testQuantile", - &CMixtureDistributionTest::testQuantile) ); +CppUnit::Test* CMixtureDistributionTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CMixtureDistributionTest"); + + suiteOfTests->addTest( + new CppUnit::TestCaller("CMixtureDistributionTest::testSupport", &CMixtureDistributionTest::testSupport)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CMixtureDistributionTest::testMode", &CMixtureDistributionTest::testMode)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CMixtureDistributionTest::testPdf", &CMixtureDistributionTest::testPdf)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CMixtureDistributionTest::testCdf", &CMixtureDistributionTest::testCdf)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMixtureDistributionTest::testQuantile", + &CMixtureDistributionTest::testQuantile)); return suiteOfTests; } diff --git a/lib/maths/unittest/CMixtureDistributionTest.h b/lib/maths/unittest/CMixtureDistributionTest.h index 96fe0b3b93..edfdfa00e5 100644 --- a/lib/maths/unittest/CMixtureDistributionTest.h +++ b/lib/maths/unittest/CMixtureDistributionTest.h @@ -9,16 +9,15 @@ #include -class CMixtureDistributionTest : public CppUnit::TestFixture -{ - public: - void testSupport(); - void testMode(); - void testPdf(); - void testCdf(); - void testQuantile(); +class CMixtureDistributionTest : public CppUnit::TestFixture { +public: + void testSupport(); + void testMode(); + void testPdf(); + void testCdf(); + void testQuantile(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CMixtureDistributionTest_h diff --git a/lib/maths/unittest/CModelTest.cc b/lib/maths/unittest/CModelTest.cc index b95c0ee154..e6155a031c 100644 --- a/lib/maths/unittest/CModelTest.cc +++ b/lib/maths/unittest/CModelTest.cc @@ -14,8 +14,7 @@ using namespace ml; -void CModelTest::testAll() -{ +void CModelTest::testAll() { LOG_DEBUG("+-----------------------+"); LOG_DEBUG("| CModelTest::testAll |"); LOG_DEBUG("+-----------------------+"); @@ -27,9 +26,8 @@ void CModelTest::testAll() double learnRate{0.5}; double decayRate{0.001}; double minimumSeasonalVarianceScale{0.3}; - maths::CModelParams params(bucketLength, learnRate, decayRate, - minimumSeasonalVarianceScale, - 6 * core::constants::HOUR, core::constants::DAY); + maths::CModelParams params( + bucketLength, learnRate, decayRate, minimumSeasonalVarianceScale, 6 * core::constants::HOUR, core::constants::DAY); CPPUNIT_ASSERT_EQUAL(bucketLength, params.bucketLength()); CPPUNIT_ASSERT_EQUAL(learnRate, params.learnRate()); CPPUNIT_ASSERT_EQUAL(decayRate, params.decayRate()); @@ -49,18 +47,16 @@ void CModelTest::testAll() maths::CModelAddSamplesParams::TDouble2Vec4VecVec priorWeights(1, weights2); maths::CModelAddSamplesParams params; params.integer(true) - .propagationInterval(1.5) - .weightStyles(maths::CConstantWeights::SEASONAL_VARIANCE) - .trendWeights(trendWeights) - .priorWeights(priorWeights); + .propagationInterval(1.5) + .weightStyles(maths::CConstantWeights::SEASONAL_VARIANCE) + .trendWeights(trendWeights) + .priorWeights(priorWeights); CPPUNIT_ASSERT_EQUAL(maths_t::E_IntegerData, params.type()); CPPUNIT_ASSERT_EQUAL(1.5, params.propagationInterval()); CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(maths::CConstantWeights::SEASONAL_VARIANCE), core::CContainerPrinter::print(params.weightStyles())); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(trendWeights), - core::CContainerPrinter::print(params.trendWeights())); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(priorWeights), - core::CContainerPrinter::print(params.priorWeights())); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(trendWeights), core::CContainerPrinter::print(params.trendWeights())); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(priorWeights), core::CContainerPrinter::print(params.priorWeights())); } { maths::CModelProbabilityParams::TDouble2Vec weight1(2, 0.4); @@ -71,38 +67,33 @@ void CModelTest::testAll() CPPUNIT_ASSERT(!params.mostAnomalousCorrelate()); CPPUNIT_ASSERT(params.coordinates().empty()); params.addCalculation(maths_t::E_OneSidedAbove) - .addCalculation(maths_t::E_TwoSided) - .seasonalConfidenceInterval(50.0) - .addBucketEmpty(maths::CModelProbabilityParams::TBool2Vec{true, true}) - .addBucketEmpty(maths::CModelProbabilityParams::TBool2Vec{false, true}) - .weightStyles(maths::CConstantWeights::COUNT_VARIANCE) - .addWeights(weights1) - .addWeights(weights2) - .mostAnomalousCorrelate(1) - .addCoordinate(1) - .addCoordinate(0); + .addCalculation(maths_t::E_TwoSided) + .seasonalConfidenceInterval(50.0) + .addBucketEmpty(maths::CModelProbabilityParams::TBool2Vec{true, true}) + .addBucketEmpty(maths::CModelProbabilityParams::TBool2Vec{false, true}) + .weightStyles(maths::CConstantWeights::COUNT_VARIANCE) + .addWeights(weights1) + .addWeights(weights2) + .mostAnomalousCorrelate(1) + .addCoordinate(1) + .addCoordinate(0); CPPUNIT_ASSERT_EQUAL(std::size_t(2), params.calculations()); CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedAbove, params.calculation(0)); CPPUNIT_ASSERT_EQUAL(maths_t::E_TwoSided, params.calculation(1)); CPPUNIT_ASSERT_EQUAL(50.0, params.seasonalConfidenceInterval()); - CPPUNIT_ASSERT_EQUAL(std::string("[[true, true], [false, true]]"), - core::CContainerPrinter::print(params.bucketEmpty())); + CPPUNIT_ASSERT_EQUAL(std::string("[[true, true], [false, true]]"), core::CContainerPrinter::print(params.bucketEmpty())); CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(maths::CConstantWeights::COUNT_VARIANCE), core::CContainerPrinter::print(params.weightStyles())); - CPPUNIT_ASSERT_EQUAL(std::string("[[[0.4, 0.4]], [[0.7, 0.7]]]"), - core::CContainerPrinter::print(params.weights())); + CPPUNIT_ASSERT_EQUAL(std::string("[[[0.4, 0.4]], [[0.7, 0.7]]]"), core::CContainerPrinter::print(params.weights())); CPPUNIT_ASSERT_EQUAL(std::size_t(1), *params.mostAnomalousCorrelate()); CPPUNIT_ASSERT_EQUAL(std::string("[1, 0]"), core::CContainerPrinter::print(params.coordinates())); } } -CppUnit::Test *CModelTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CModelTest"); +CppUnit::Test* CModelTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CModelTest"); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CModelTest::testAll", - &CModelTest::testAll) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CModelTest::testAll", &CModelTest::testAll)); return suiteOfTests; } diff --git a/lib/maths/unittest/CModelTest.h b/lib/maths/unittest/CModelTest.h index 9d97d02b6a..9fa0c39161 100644 --- a/lib/maths/unittest/CModelTest.h +++ b/lib/maths/unittest/CModelTest.h @@ -9,12 +9,11 @@ #include -class CModelTest : public CppUnit::TestFixture -{ - public: - void testAll(); +class CModelTest : public CppUnit::TestFixture { +public: + void testAll(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDE_CModelTest_h diff --git a/lib/maths/unittest/CMultimodalPriorTest.cc b/lib/maths/unittest/CMultimodalPriorTest.cc index 3520668dd5..52bf5d7ac4 100644 --- a/lib/maths/unittest/CMultimodalPriorTest.cc +++ b/lib/maths/unittest/CMultimodalPriorTest.cc @@ -39,8 +39,7 @@ using namespace ml; using namespace handy_typedefs; -namespace -{ +namespace { using TDoubleVec = std::vector; using TDoubleDoublePr = std::pair; @@ -53,14 +52,11 @@ using CMultimodalPrior = CPriorTestInterfaceMixin; using COneOfNPrior = CPriorTestInterfaceMixin; //! Make the default mode prior. -COneOfNPrior makeModePrior(const double &decayRate = 0.0) -{ - CGammaRateConjugate gamma( - maths::CGammaRateConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.01, decayRate, 0.0)); +COneOfNPrior makeModePrior(const double& decayRate = 0.0) { + CGammaRateConjugate gamma(maths::CGammaRateConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.01, decayRate, 0.0)); CLogNormalMeanPrecConjugate logNormal( - maths::CLogNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.01, decayRate, 0.0)); - CNormalMeanPrecConjugate normal( - maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, decayRate)); + maths::CLogNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.01, decayRate, 0.0)); + CNormalMeanPrecConjugate normal(maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, decayRate)); COneOfNPrior::TPriorPtrVec priors; priors.push_back(COneOfNPrior::TPriorPtr(gamma.clone())); @@ -70,76 +66,40 @@ COneOfNPrior makeModePrior(const double &decayRate = 0.0) } //! Make a vanilla multimodal prior. -CMultimodalPrior makePrior(const maths::CPrior *modePrior, - const double &decayRate) -{ - maths::CXMeansOnline1d clusterer(maths_t::E_ContinuousData, - maths::CAvailableModeDistributions::ALL, - maths_t::E_ClustersFractionWeight, - decayRate); - - if (modePrior) - { - return maths::CMultimodalPrior(maths_t::E_ContinuousData, - clusterer, - *modePrior, - decayRate); +CMultimodalPrior makePrior(const maths::CPrior* modePrior, const double& decayRate) { + maths::CXMeansOnline1d clusterer( + maths_t::E_ContinuousData, maths::CAvailableModeDistributions::ALL, maths_t::E_ClustersFractionWeight, decayRate); + + if (modePrior) { + return maths::CMultimodalPrior(maths_t::E_ContinuousData, clusterer, *modePrior, decayRate); } - return maths::CMultimodalPrior(maths_t::E_ContinuousData, - clusterer, - makeModePrior(decayRate), - decayRate); + return maths::CMultimodalPrior(maths_t::E_ContinuousData, clusterer, makeModePrior(decayRate), decayRate); } -CMultimodalPrior makePrior(const maths::CPrior *modePrior) -{ +CMultimodalPrior makePrior(const maths::CPrior* modePrior) { return makePrior(modePrior, 0.0); } -CMultimodalPrior makePrior(double decayRate) -{ +CMultimodalPrior makePrior(double decayRate) { return makePrior(0, decayRate); } -CMultimodalPrior makePrior() -{ +CMultimodalPrior makePrior() { return makePrior(0, 0.0); } test::CRandomNumbers RNG; -void sample(const boost::math::normal_distribution<> &normal, - std::size_t numberSamples, - TDoubleVec &result) -{ - RNG.generateNormalSamples(boost::math::mean(normal), - boost::math::variance(normal), - numberSamples, - result); +void sample(const boost::math::normal_distribution<>& normal, std::size_t numberSamples, TDoubleVec& result) { + RNG.generateNormalSamples(boost::math::mean(normal), boost::math::variance(normal), numberSamples, result); } -void sample(const boost::math::lognormal_distribution<> &lognormal, - std::size_t numberSamples, - TDoubleVec &result) -{ - RNG.generateLogNormalSamples(lognormal.location(), - lognormal.scale() * lognormal.scale(), - numberSamples, - result); +void sample(const boost::math::lognormal_distribution<>& lognormal, std::size_t numberSamples, TDoubleVec& result) { + RNG.generateLogNormalSamples(lognormal.location(), lognormal.scale() * lognormal.scale(), numberSamples, result); } -void sample(const boost::math::gamma_distribution<> &gamma, - std::size_t numberSamples, - TDoubleVec &result) -{ - RNG.generateGammaSamples(gamma.shape(), - gamma.scale(), - numberSamples, - result); +void sample(const boost::math::gamma_distribution<>& gamma, std::size_t numberSamples, TDoubleVec& result) { + RNG.generateGammaSamples(gamma.shape(), gamma.scale(), numberSamples, result); } template -void probabilityOfLessLikelySample(const maths::CMixtureDistribution &mixture, - const double &x, - double &probability, - double &deviation) -{ +void probabilityOfLessLikelySample(const maths::CMixtureDistribution& mixture, const double& x, double& probability, double& deviation) { using TModeVec = typename maths::CMixtureDistribution::TModeVec; static const double NUMBER_SAMPLES = 10000.0; @@ -147,16 +107,13 @@ void probabilityOfLessLikelySample(const maths::CMixtureDistribution &mixture probability = 0.0; double fx = pdf(mixture, x); - const TDoubleVec &weights = mixture.weights(); - const TModeVec &modes = mixture.modes(); - for (std::size_t i = 0u; i < modes.size(); ++i) - { + const TDoubleVec& weights = mixture.weights(); + const TModeVec& modes = mixture.modes(); + for (std::size_t i = 0u; i < modes.size(); ++i) { TDoubleVec samples; sample(modes[i], static_cast(NUMBER_SAMPLES * weights[i]), samples); - for (std::size_t j = 0u; j < samples.size(); ++j) - { - if (pdf(mixture, samples[j]) < fx) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { + if (pdf(mixture, samples[j]) < fx) { probability += 1.0 / NUMBER_SAMPLES; } } @@ -166,11 +123,9 @@ void probabilityOfLessLikelySample(const maths::CMixtureDistribution &mixture // "Anomaly Detection in Application Performance Monitoring Data" deviation = std::sqrt(probability * (1.0 - probability) / NUMBER_SAMPLES); } - } -void CMultimodalPriorTest::testMultipleUpdate() -{ +void CMultimodalPriorTest::testMultipleUpdate() { LOG_DEBUG("+--------------------------------------------+"); LOG_DEBUG("| CMultimodalPriorTest::testMultipleUpdate |"); LOG_DEBUG("+--------------------------------------------+"); @@ -178,11 +133,7 @@ void CMultimodalPriorTest::testMultipleUpdate() // Test that we get the same result updating once with a vector of 100 // samples of an R.V. versus updating individually 100 times. - const maths_t::EDataType dataTypes[] = - { - maths_t::E_IntegerData, - maths_t::E_ContinuousData - }; + const maths_t::EDataType dataTypes[] = {maths_t::E_IntegerData, maths_t::E_ContinuousData}; const double shape = 2.0; const double scale = 3.0; @@ -194,21 +145,14 @@ void CMultimodalPriorTest::testMultipleUpdate() TDoubleVec samples; rng.generateNormalSamples(shape, scale, 100, samples); - for (size_t i = 0; i < boost::size(dataTypes); ++i) - { - maths::CXMeansOnline1d clusterer(dataTypes[i], - maths::CAvailableModeDistributions::ALL, - maths_t::E_ClustersFractionWeight); + for (size_t i = 0; i < boost::size(dataTypes); ++i) { + maths::CXMeansOnline1d clusterer(dataTypes[i], maths::CAvailableModeDistributions::ALL, maths_t::E_ClustersFractionWeight); CMultimodalPrior filter1(maths::CMultimodalPrior( - dataTypes[i], - clusterer, - maths::CNormalMeanPrecConjugate::nonInformativePrior(dataTypes[i], - decayRate))); + dataTypes[i], clusterer, maths::CNormalMeanPrecConjugate::nonInformativePrior(dataTypes[i], decayRate))); CMultimodalPrior filter2(filter1); - for (std::size_t j = 0; j < samples.size(); ++j) - { + for (std::size_t j = 0; j < samples.size(); ++j) { filter1.addSamples(TDouble1Vec(1, samples[j])); } filter2.addSamples(samples); @@ -219,8 +163,7 @@ void CMultimodalPriorTest::testMultipleUpdate() } } -void CMultimodalPriorTest::testPropagation() -{ +void CMultimodalPriorTest::testPropagation() { LOG_DEBUG("+-----------------------------------------+"); LOG_DEBUG("| CMultimodalPriorTest::testPropagation |"); LOG_DEBUG("+-----------------------------------------+"); @@ -248,49 +191,38 @@ void CMultimodalPriorTest::testPropagation() const double decayRate = 0.1; CMultimodalPrior filter(makePrior(decayRate)); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { filter.addSamples(TDouble1Vec(1, static_cast(samples[i]))); CPPUNIT_ASSERT(filter.checkInvariants()); } double mean = filter.marginalLikelihoodMean(); - TDoubleDoublePr percentiles[] = - { - filter.marginalLikelihoodConfidenceInterval(60.0), - filter.marginalLikelihoodConfidenceInterval(70.0), - filter.marginalLikelihoodConfidenceInterval(80.0), - filter.marginalLikelihoodConfidenceInterval(90.0) - }; + TDoubleDoublePr percentiles[] = {filter.marginalLikelihoodConfidenceInterval(60.0), + filter.marginalLikelihoodConfidenceInterval(70.0), + filter.marginalLikelihoodConfidenceInterval(80.0), + filter.marginalLikelihoodConfidenceInterval(90.0)}; filter.propagateForwardsByTime(40.0); CPPUNIT_ASSERT(filter.checkInvariants()); double propagatedMean = filter.marginalLikelihoodMean(); - TDoubleDoublePr propagatedPercentiles[] = - { - filter.marginalLikelihoodConfidenceInterval(60.0), - filter.marginalLikelihoodConfidenceInterval(70.0), - filter.marginalLikelihoodConfidenceInterval(80.0), - filter.marginalLikelihoodConfidenceInterval(90.0) - }; + TDoubleDoublePr propagatedPercentiles[] = {filter.marginalLikelihoodConfidenceInterval(60.0), + filter.marginalLikelihoodConfidenceInterval(70.0), + filter.marginalLikelihoodConfidenceInterval(80.0), + filter.marginalLikelihoodConfidenceInterval(90.0)}; LOG_DEBUG("mean = " << mean << ", propagatedMean = " << propagatedMean); - LOG_DEBUG("percentiles = " - << core::CContainerPrinter::print(percentiles)); - LOG_DEBUG("propagatedPercentiles = " - << core::CContainerPrinter::print(propagatedPercentiles)); + LOG_DEBUG("percentiles = " << core::CContainerPrinter::print(percentiles)); + LOG_DEBUG("propagatedPercentiles = " << core::CContainerPrinter::print(propagatedPercentiles)); CPPUNIT_ASSERT_DOUBLES_EQUAL(mean, propagatedMean, eps * mean); - for (std::size_t i = 0u; i < boost::size(percentiles); ++i) - { + for (std::size_t i = 0u; i < boost::size(percentiles); ++i) { CPPUNIT_ASSERT(propagatedPercentiles[i].first < percentiles[i].first); CPPUNIT_ASSERT(propagatedPercentiles[i].second > percentiles[i].second); } } -void CMultimodalPriorTest::testSingleMode() -{ +void CMultimodalPriorTest::testSingleMode() { LOG_DEBUG("+----------------------------------------+"); LOG_DEBUG("| CMultimodalPriorTest::testSingleMode |"); LOG_DEBUG("+----------------------------------------+"); @@ -316,8 +248,7 @@ void CMultimodalPriorTest::testSingleMode() TDoubleVec samples; rng.generateNormalSamples(mean, std::sqrt(variance), 1000, samples); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { TDouble1Vec sample(1, samples[i]); filter1.addSamples(sample); filter2.addSamples(sample); @@ -329,27 +260,22 @@ void CMultimodalPriorTest::testSingleMode() TMeanAccumulator differentialEntropy; boost::math::normal_distribution<> f(mean, std::sqrt(variance)); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { double fx = boost::math::pdf(f, samples[i]); TDouble1Vec sample(1, samples[i]); double l1; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, - filter1.jointLogMarginalLikelihood(sample, l1)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter1.jointLogMarginalLikelihood(sample, l1)); L1G.add(std::log(fx) - l1); double l2; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, - filter2.jointLogMarginalLikelihood(sample, l2)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter2.jointLogMarginalLikelihood(sample, l2)); L12.add(l2 - l1); differentialEntropy.add(-std::log(fx)); } - LOG_DEBUG("L1G = " << maths::CBasicStatistics::mean(L1G) - << ", L12 = " << maths::CBasicStatistics::mean(L12) - << ", differential entropy " << differentialEntropy); + LOG_DEBUG("L1G = " << maths::CBasicStatistics::mean(L1G) << ", L12 = " << maths::CBasicStatistics::mean(L12) + << ", differential entropy " << differentialEntropy); - CPPUNIT_ASSERT( maths::CBasicStatistics::mean(L1G) - / maths::CBasicStatistics::mean(differentialEntropy) < 0.0); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(L1G) / maths::CBasicStatistics::mean(differentialEntropy) < 0.0); } LOG_DEBUG("Log-Normal"); { @@ -363,8 +289,7 @@ void CMultimodalPriorTest::testSingleMode() TDoubleVec samples; rng.generateLogNormalSamples(location, squareScale, 1000, samples); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { TDouble1Vec sample(1, samples[i]); filter1.addSamples(sample); filter2.addSamples(sample); @@ -377,27 +302,22 @@ void CMultimodalPriorTest::testSingleMode() boost::math::lognormal_distribution<> f(location, std::sqrt(squareScale)); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { double fx = boost::math::pdf(f, samples[i]); TDouble1Vec sample(1, samples[i]); double l1; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, - filter1.jointLogMarginalLikelihood(sample, l1)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter1.jointLogMarginalLikelihood(sample, l1)); L1G.add(std::log(fx) - l1); double l2; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, - filter2.jointLogMarginalLikelihood(sample, l2)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter2.jointLogMarginalLikelihood(sample, l2)); L12.add(l2 - l1); differentialEntropy.add(-std::log(fx)); } - LOG_DEBUG("L1G = " << maths::CBasicStatistics::mean(L1G) - << ", L12 = " << maths::CBasicStatistics::mean(L12) - << ", differential entropy " << differentialEntropy); + LOG_DEBUG("L1G = " << maths::CBasicStatistics::mean(L1G) << ", L12 = " << maths::CBasicStatistics::mean(L12) + << ", differential entropy " << differentialEntropy); - CPPUNIT_ASSERT( maths::CBasicStatistics::mean(L1G) - / maths::CBasicStatistics::mean(differentialEntropy) < 0.0); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(L1G) / maths::CBasicStatistics::mean(differentialEntropy) < 0.0); } LOG_DEBUG("Gamma"); { @@ -411,8 +331,7 @@ void CMultimodalPriorTest::testSingleMode() TDoubleVec samples; rng.generateGammaSamples(shape, scale, 1000, samples); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { TDouble1Vec sample(1, samples[i]); filter1.addSamples(sample); filter2.addSamples(sample); @@ -425,32 +344,26 @@ void CMultimodalPriorTest::testSingleMode() boost::math::gamma_distribution<> f(shape, scale); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { double fx = boost::math::pdf(f, samples[i]); TDouble1Vec sample(1, samples[i]); double l1; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, - filter1.jointLogMarginalLikelihood(sample, l1)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter1.jointLogMarginalLikelihood(sample, l1)); L1G.add(std::log(fx) - l1); double l2; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, - filter2.jointLogMarginalLikelihood(sample, l2)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter2.jointLogMarginalLikelihood(sample, l2)); L12.add(l2 - l1); differentialEntropy.add(-std::log(fx)); } - LOG_DEBUG("L1G = " << maths::CBasicStatistics::mean(L1G) - << ", L12 = " << maths::CBasicStatistics::mean(L12) - << ", differential entropy " << differentialEntropy); + LOG_DEBUG("L1G = " << maths::CBasicStatistics::mean(L1G) << ", L12 = " << maths::CBasicStatistics::mean(L12) + << ", differential entropy " << differentialEntropy); - CPPUNIT_ASSERT( maths::CBasicStatistics::mean(L1G) - / maths::CBasicStatistics::mean(differentialEntropy) < 0.1); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(L1G) / maths::CBasicStatistics::mean(differentialEntropy) < 0.1); } } -void CMultimodalPriorTest::testMultipleModes() -{ +void CMultimodalPriorTest::testMultipleModes() { LOG_DEBUG("+-------------------------------------------+"); LOG_DEBUG("| CMultimodalPriorTest::testMultipleModes |"); LOG_DEBUG("+-------------------------------------------+"); @@ -497,24 +410,20 @@ void CMultimodalPriorTest::testMultipleModes() double loss = 0.0; TMeanAccumulator differentialEntropy_; - for (std::size_t j = 0u; j < samples.size(); ++j) - { - double fx = w1 * boost::math::pdf(mode1Distribution, samples[j]) - + w2 * boost::math::pdf(mode2Distribution, samples[j]); + for (std::size_t j = 0u; j < samples.size(); ++j) { + double fx = w1 * boost::math::pdf(mode1Distribution, samples[j]) + w2 * boost::math::pdf(mode2Distribution, samples[j]); differentialEntropy_.add(-std::log(fx)); } double differentialEntropy = maths::CBasicStatistics::mean(differentialEntropy_); - for (std::size_t i = 0; i < 10; ++i) - { + for (std::size_t i = 0; i < 10; ++i) { rng.random_shuffle(samples.begin(), samples.end()); COneOfNPrior modePrior(makeModePrior()); CMultimodalPrior filter1(makePrior(&modePrior)); COneOfNPrior filter2 = modePrior; - for (std::size_t j = 0u; j < samples.size(); ++j) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { TDouble1Vec sample(1, samples[j]); filter1.addSamples(sample); filter2.addSamples(sample); @@ -526,24 +435,19 @@ void CMultimodalPriorTest::testMultipleModes() TMeanAccumulator loss1G; TMeanAccumulator loss12; - for (std::size_t j = 0u; j < samples.size(); ++j) - { - double fx = w1 * boost::math::pdf(mode1Distribution, samples[j]) - + w2 * boost::math::pdf(mode2Distribution, samples[j]); + for (std::size_t j = 0u; j < samples.size(); ++j) { + double fx = w1 * boost::math::pdf(mode1Distribution, samples[j]) + w2 * boost::math::pdf(mode2Distribution, samples[j]); TDouble1Vec sample(1, samples[j]); double l1; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, - filter1.jointLogMarginalLikelihood(sample, l1)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter1.jointLogMarginalLikelihood(sample, l1)); loss1G.add(std::log(fx) - l1); double l2; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, - filter2.jointLogMarginalLikelihood(sample, l2)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter2.jointLogMarginalLikelihood(sample, l2)); loss12.add(l2 - l1); } - LOG_DEBUG("loss1G = " << maths::CBasicStatistics::mean(loss1G) - << ", loss12 = " << maths::CBasicStatistics::mean(loss12) - << ", differential entropy " << differentialEntropy); + LOG_DEBUG("loss1G = " << maths::CBasicStatistics::mean(loss1G) << ", loss12 = " << maths::CBasicStatistics::mean(loss12) + << ", differential entropy " << differentialEntropy); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(loss12) < 0.0); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(loss1G) / differentialEntropy < 0.0); @@ -551,8 +455,7 @@ void CMultimodalPriorTest::testMultipleModes() } loss /= 10.0; - LOG_DEBUG("loss = " << loss - << ", differential entropy = " << differentialEntropy); + LOG_DEBUG("loss = " << loss << ", differential entropy = " << differentialEntropy); CPPUNIT_ASSERT(loss / differentialEntropy < 0.0); } { @@ -593,25 +496,21 @@ void CMultimodalPriorTest::testMultipleModes() double loss = 0.0; TMeanAccumulator differentialEntropy_; - for (std::size_t j = 0u; j < samples.size(); ++j) - { - double fx = w1 * boost::math::pdf(mode1Distribution, samples[j]) - + w2 * boost::math::pdf(mode2Distribution, samples[j]) - + w3 * boost::math::pdf(mode3Distribution, samples[j]); + for (std::size_t j = 0u; j < samples.size(); ++j) { + double fx = w1 * boost::math::pdf(mode1Distribution, samples[j]) + w2 * boost::math::pdf(mode2Distribution, samples[j]) + + w3 * boost::math::pdf(mode3Distribution, samples[j]); differentialEntropy_.add(-std::log(fx)); } double differentialEntropy = maths::CBasicStatistics::mean(differentialEntropy_); - for (std::size_t i = 0; i < 10; ++i) - { + for (std::size_t i = 0; i < 10; ++i) { rng.random_shuffle(samples.begin(), samples.end()); COneOfNPrior modePrior(makeModePrior()); CMultimodalPrior filter1(makePrior(&modePrior)); COneOfNPrior filter2 = modePrior; - for (std::size_t j = 0u; j < samples.size(); ++j) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { TDouble1Vec sample(1, samples[j]); filter1.addSamples(sample); filter2.addSamples(sample); @@ -623,25 +522,20 @@ void CMultimodalPriorTest::testMultipleModes() TMeanAccumulator loss1G; TMeanAccumulator loss12; - for (std::size_t j = 0u; j < samples.size(); ++j) - { - double fx = w1 * boost::math::pdf(mode1Distribution, samples[j]) - + w2 * boost::math::pdf(mode2Distribution, samples[j]) - + w3 * boost::math::pdf(mode3Distribution, samples[j]); + for (std::size_t j = 0u; j < samples.size(); ++j) { + double fx = w1 * boost::math::pdf(mode1Distribution, samples[j]) + w2 * boost::math::pdf(mode2Distribution, samples[j]) + + w3 * boost::math::pdf(mode3Distribution, samples[j]); TDouble1Vec sample(1, samples[j]); double l1; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, - filter1.jointLogMarginalLikelihood(sample, l1)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter1.jointLogMarginalLikelihood(sample, l1)); loss1G.add(std::log(fx) - l1); double l2; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, - filter2.jointLogMarginalLikelihood(sample, l2)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter2.jointLogMarginalLikelihood(sample, l2)); loss12.add(l2 - l1); } - LOG_DEBUG("loss1G = " << maths::CBasicStatistics::mean(loss1G) - << ", loss12 = " << maths::CBasicStatistics::mean(loss12) - << ", differential entropy " << differentialEntropy); + LOG_DEBUG("loss1G = " << maths::CBasicStatistics::mean(loss1G) << ", loss12 = " << maths::CBasicStatistics::mean(loss12) + << ", differential entropy " << differentialEntropy); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(loss12) < 0.0); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(loss1G) / differentialEntropy < 0.001); @@ -649,8 +543,7 @@ void CMultimodalPriorTest::testMultipleModes() } loss /= 10.0; - LOG_DEBUG("loss = " << loss - << ", differential entropy = " << differentialEntropy); + LOG_DEBUG("loss = " << loss << ", differential entropy = " << differentialEntropy); CPPUNIT_ASSERT(loss / differentialEntropy < 0.0); } { @@ -691,25 +584,21 @@ void CMultimodalPriorTest::testMultipleModes() double loss = 0.0; TMeanAccumulator differentialEntropy_; - for (std::size_t j = 0u; j < samples.size(); ++j) - { - double fx = w1 * boost::math::pdf(mode1Distribution, samples[j]) - + w2 * boost::math::pdf(mode2Distribution, samples[j]) - + w3 * boost::math::pdf(mode3Distribution, samples[j]); + for (std::size_t j = 0u; j < samples.size(); ++j) { + double fx = w1 * boost::math::pdf(mode1Distribution, samples[j]) + w2 * boost::math::pdf(mode2Distribution, samples[j]) + + w3 * boost::math::pdf(mode3Distribution, samples[j]); differentialEntropy_.add(-std::log(fx)); } double differentialEntropy = maths::CBasicStatistics::mean(differentialEntropy_); - for (std::size_t i = 0; i < 10; ++i) - { + for (std::size_t i = 0; i < 10; ++i) { rng.random_shuffle(samples.begin(), samples.end()); COneOfNPrior modePrior(makeModePrior()); CMultimodalPrior filter1(makePrior(&modePrior)); COneOfNPrior filter2 = modePrior; - for (std::size_t j = 0u; j < samples.size(); ++j) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { TDouble1Vec sample(1, samples[j]); filter1.addSamples(sample); filter2.addSamples(sample); @@ -721,25 +610,20 @@ void CMultimodalPriorTest::testMultipleModes() TMeanAccumulator loss1G; TMeanAccumulator loss12; - for (std::size_t j = 0u; j < samples.size(); ++j) - { - double fx = w1 * boost::math::pdf(mode1Distribution, samples[j]) - + w2 * boost::math::pdf(mode2Distribution, samples[j]) - + w3 * boost::math::pdf(mode3Distribution, samples[j]); + for (std::size_t j = 0u; j < samples.size(); ++j) { + double fx = w1 * boost::math::pdf(mode1Distribution, samples[j]) + w2 * boost::math::pdf(mode2Distribution, samples[j]) + + w3 * boost::math::pdf(mode3Distribution, samples[j]); TDouble1Vec sample(1, samples[j]); double l1; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, - filter1.jointLogMarginalLikelihood(sample, l1)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter1.jointLogMarginalLikelihood(sample, l1)); loss1G.add(std::log(fx) - l1); double l2; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, - filter2.jointLogMarginalLikelihood(sample, l2)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter2.jointLogMarginalLikelihood(sample, l2)); loss12.add(l2 - l1); } - LOG_DEBUG("loss1G = " << maths::CBasicStatistics::mean(loss1G) - << ", loss12 = " << maths::CBasicStatistics::mean(loss12) - << ", differential entropy " << differentialEntropy); + LOG_DEBUG("loss1G = " << maths::CBasicStatistics::mean(loss1G) << ", loss12 = " << maths::CBasicStatistics::mean(loss12) + << ", differential entropy " << differentialEntropy); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(loss12) < 0.0); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(loss1G) / differentialEntropy < 0.01); @@ -747,14 +631,12 @@ void CMultimodalPriorTest::testMultipleModes() } loss /= 10.0; - LOG_DEBUG("loss = " << loss - << ", differential entropy = " << differentialEntropy); + LOG_DEBUG("loss = " << loss << ", differential entropy = " << differentialEntropy); CPPUNIT_ASSERT(loss / differentialEntropy < 0.003); } } -void CMultimodalPriorTest::testMarginalLikelihood() -{ +void CMultimodalPriorTest::testMarginalLikelihood() { LOG_DEBUG("+------------------------------------------------+"); LOG_DEBUG("| CMultimodalPriorTest::testMarginalLikelihood |"); LOG_DEBUG("+------------------------------------------------+"); @@ -778,23 +660,18 @@ void CMultimodalPriorTest::testMarginalLikelihood() rng.generateLogNormalSamples(location, squareScale, 100, samples); filter.addSamples(samples); - maths_t::ESampleWeightStyle weightStyles[] = - { - maths_t::E_SampleCountWeight, - maths_t::E_SampleWinsorisationWeight, - maths_t::E_SampleCountWeight - }; - double weights[] = { 0.1, 1.0, 10.0 }; + maths_t::ESampleWeightStyle weightStyles[] = { + maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight, maths_t::E_SampleCountWeight}; + double weights[] = {0.1, 1.0, 10.0}; - for (std::size_t i = 0u; i < boost::size(weightStyles); ++i) - { - for (std::size_t j = 0u; j < boost::size(weights); ++j) - { + for (std::size_t i = 0u; i < boost::size(weightStyles); ++i) { + for (std::size_t j = 0u; j < boost::size(weights); ++j) { double lb, ub; filter.minusLogJointCdf(maths_t::TWeightStyleVec(1, weightStyles[i]), TDouble1Vec(1, 20000.0), TDouble4Vec1Vec(1, TDouble4Vec(1, weights[j])), - lb, ub); + lb, + ub); LOG_DEBUG("-log(c.d.f) = " << (lb + ub) / 2.0); CPPUNIT_ASSERT(lb >= 0.0); CPPUNIT_ASSERT(ub >= 0.0); @@ -806,9 +683,9 @@ void CMultimodalPriorTest::testMarginalLikelihood() // test data and that the c.d.f. <= 1 and that the expected value // of the log likelihood tends to the differential entropy. - const double decayRates[] = { 0.0, 0.001, 0.01 }; + const double decayRates[] = {0.0, 0.001, 0.01}; - unsigned int numberSamples[] = { 2u, 20u, 500u }; + unsigned int numberSamples[] = {2u, 20u, 500u}; const double tolerance = 0.01; test::CRandomNumbers rng; @@ -823,31 +700,22 @@ void CMultimodalPriorTest::testMarginalLikelihood() const double mean3 = 25.0; const double variance3 = 3.0; TDoubleVec samples1; - rng.generateNormalSamples(mean1, variance1, - static_cast(w1 * 500.0), - samples1); + rng.generateNormalSamples(mean1, variance1, static_cast(w1 * 500.0), samples1); TDoubleVec samples2; - rng.generateNormalSamples(mean2, variance2, - static_cast(w2 * 500.0), - samples2); + rng.generateNormalSamples(mean2, variance2, static_cast(w2 * 500.0), samples2); TDoubleVec samples3; - rng.generateNormalSamples(mean3, variance3, - static_cast(w3 * 500.0), - samples3); + rng.generateNormalSamples(mean3, variance3, static_cast(w3 * 500.0), samples3); TDoubleVec samples; samples.insert(samples.end(), samples1.begin(), samples1.end()); samples.insert(samples.end(), samples2.begin(), samples2.end()); samples.insert(samples.end(), samples3.begin(), samples3.end()); rng.random_shuffle(samples.begin(), samples.end()); - for (size_t i = 0; i < boost::size(numberSamples); ++i) - { - for (size_t j = 0; j < boost::size(decayRates); ++j) - { + for (size_t i = 0; i < boost::size(numberSamples); ++i) { + for (size_t j = 0; j < boost::size(decayRates); ++j) { CMultimodalPrior filter(makePrior(decayRates[j])); - for (std::size_t k = 0u; k < samples.size(); ++k) - { + for (std::size_t k = 0u; k < samples.size(); ++k) { filter.addSamples(TDouble1Vec(1, samples[k])); filter.propagateForwardsByTime(1.0); CPPUNIT_ASSERT(filter.checkInvariants()); @@ -859,16 +727,13 @@ void CMultimodalPriorTest::testMarginalLikelihood() const double eps = 1e-4; - for (size_t k = 5; k < 31; ++k) - { + for (size_t k = 5; k < 31; ++k) { TDouble1Vec sample(1, static_cast(k)); - LOG_DEBUG("number = " << numberSamples[i] - << ", sample = " << sample[0]); + LOG_DEBUG("number = " << numberSamples[i] << ", sample = " << sample[0]); double logLikelihood = 0.0; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, - filter.jointLogMarginalLikelihood(sample, logLikelihood)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter.jointLogMarginalLikelihood(sample, logLikelihood)); double pdf = std::exp(logLikelihood); double lowerBound = 0.0, upperBound = 0.0; @@ -905,17 +770,11 @@ void CMultimodalPriorTest::testMarginalLikelihood() LOG_DEBUG("# modes = " << filter.numberModes()); TDoubleVec manySamples1; - rng.generateNormalSamples(mean1, variance1, - static_cast(w1 * 100000.0), - manySamples1); + rng.generateNormalSamples(mean1, variance1, static_cast(w1 * 100000.0), manySamples1); TDoubleVec manySamples2; - rng.generateNormalSamples(mean2, variance2, - static_cast(w2 * 100000.0), - manySamples2); + rng.generateNormalSamples(mean2, variance2, static_cast(w2 * 100000.0), manySamples2); TDoubleVec manySamples3; - rng.generateNormalSamples(mean3, variance3, - static_cast(w3 * 100000.0), - manySamples3); + rng.generateNormalSamples(mean3, variance3, static_cast(w3 * 100000.0), manySamples3); TDoubleVec manySamples; manySamples.insert(manySamples.end(), manySamples1.begin(), manySamples1.end()); manySamples.insert(manySamples.end(), manySamples2.begin(), manySamples2.end()); @@ -934,33 +793,26 @@ void CMultimodalPriorTest::testMarginalLikelihood() double expectedDifferentialEntropy = maths::CTools::differentialEntropy(f); double differentialEntropy = 0.0; - for (std::size_t i = 0u; i < manySamples.size(); ++i) - { - if (i % 1000 == 0) - { + for (std::size_t i = 0u; i < manySamples.size(); ++i) { + if (i % 1000 == 0) { LOG_DEBUG("Processed " << i << " samples"); } TDouble1Vec sample(1, manySamples[i]); filter.addSamples(sample); double logLikelihood = 0.0; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, - filter.jointLogMarginalLikelihood(sample, logLikelihood)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter.jointLogMarginalLikelihood(sample, logLikelihood)); differentialEntropy -= logLikelihood; } differentialEntropy /= static_cast(manySamples.size()); - LOG_DEBUG("differentialEntropy = " << differentialEntropy - << ", expectedDifferentialEntropy = " << expectedDifferentialEntropy); + LOG_DEBUG("differentialEntropy = " << differentialEntropy << ", expectedDifferentialEntropy = " << expectedDifferentialEntropy); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedDifferentialEntropy, - differentialEntropy, - 0.05 * expectedDifferentialEntropy); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedDifferentialEntropy, differentialEntropy, 0.05 * expectedDifferentialEntropy); } } -void CMultimodalPriorTest::testMarginalLikelihoodMode() -{ +void CMultimodalPriorTest::testMarginalLikelihoodMode() { LOG_DEBUG("+----------------------------------------------------+"); LOG_DEBUG("| CMultimodalPriorTest::testMarginalLikelihoodMode |"); LOG_DEBUG("+----------------------------------------------------+"); @@ -978,25 +830,15 @@ void CMultimodalPriorTest::testMarginalLikelihoodMode() double mean2 = 8.0; double variance2 = 1.5; TDoubleVec samples1; - rng.generateNormalSamples(mean1, variance1, - static_cast(w1 * 500.0), - samples1); + rng.generateNormalSamples(mean1, variance1, static_cast(w1 * 500.0), samples1); TDoubleVec samples2; - rng.generateNormalSamples(mean2, variance2, - static_cast(w2 * 500.0), - samples2); + rng.generateNormalSamples(mean2, variance2, static_cast(w2 * 500.0), samples2); TDoubleVec samples; samples.insert(samples.end(), samples1.begin(), samples1.end()); samples.insert(samples.end(), samples2.begin(), samples2.end()); rng.random_shuffle(samples.begin(), samples.end()); - const double varianceScales[] = - { - 0.1, 0.2, 0.3, 0.4, 0.5, - 0.6, 0.7, 0.8, 0.9, 1.0, - 1.2, 1.5, 2.0, 2.5, 3.0, - 4.0, 5.0 - }; + const double varianceScales[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.2, 1.5, 2.0, 2.5, 3.0, 4.0, 5.0}; CMultimodalPrior filter(makePrior()); filter.addSamples(samples); @@ -1006,8 +848,7 @@ void CMultimodalPriorTest::testMarginalLikelihoodMode() TDouble4Vec1Vec weights(1, weight); std::size_t totalCount = 0u; - for (std::size_t i = 0u; i < boost::size(varianceScales); ++i) - { + for (std::size_t i = 0u; i < boost::size(varianceScales); ++i) { double vs = varianceScales[i]; weight[0] = vs; weights[0][0] = vs; @@ -1015,32 +856,19 @@ void CMultimodalPriorTest::testMarginalLikelihoodMode() double mode = filter.marginalLikelihoodMode(weightStyle, weight); LOG_DEBUG("marginalLikelihoodMode = " << mode); // Should be near 8. - CPPUNIT_ASSERT_DOUBLES_EQUAL(8.0, - filter.marginalLikelihoodMode(weightStyle, weight), - 2.0); + CPPUNIT_ASSERT_DOUBLES_EQUAL(8.0, filter.marginalLikelihoodMode(weightStyle, weight), 2.0); double eps = 0.01; double modeMinusEps = mode - eps; - double modePlusEps = mode + eps; + double modePlusEps = mode + eps; double fMode, fModeMinusEps, fModePlusEps; - filter.jointLogMarginalLikelihood(weightStyle, - TDouble1Vec(1, mode), - weights, - fMode); - filter.jointLogMarginalLikelihood(weightStyle, - TDouble1Vec(1, modeMinusEps), - weights, - fModeMinusEps); - filter.jointLogMarginalLikelihood(weightStyle, - TDouble1Vec(1, modePlusEps), - weights, - fModePlusEps); + filter.jointLogMarginalLikelihood(weightStyle, TDouble1Vec(1, mode), weights, fMode); + filter.jointLogMarginalLikelihood(weightStyle, TDouble1Vec(1, modeMinusEps), weights, fModeMinusEps); + filter.jointLogMarginalLikelihood(weightStyle, TDouble1Vec(1, modePlusEps), weights, fModePlusEps); fMode = std::exp(fMode); fModeMinusEps = std::exp(fModeMinusEps); - fModePlusEps = std::exp(fModePlusEps); + fModePlusEps = std::exp(fModePlusEps); double gradient = (fModePlusEps - fModeMinusEps) / 2.0 / eps; - LOG_DEBUG("f(mode) = " << fMode - << ", f(mode-eps) = " << fModeMinusEps - << ", f(mode + eps) = " << fModePlusEps); + LOG_DEBUG("f(mode) = " << fMode << ", f(mode-eps) = " << fModeMinusEps << ", f(mode + eps) = " << fModePlusEps); LOG_DEBUG("gradient = " << gradient); CPPUNIT_ASSERT(std::fabs(gradient) < 0.05); CPPUNIT_ASSERT(fMode > 0.999 * fModeMinusEps); @@ -1049,16 +877,11 @@ void CMultimodalPriorTest::testMarginalLikelihoodMode() rng.generateUniformSamples(mean1, mean2, 500, trials); std::size_t count = 0u; TDoubleVec fTrials; - for (std::size_t j = 0u; j < trials.size(); ++j) - { + for (std::size_t j = 0u; j < trials.size(); ++j) { double fTrial; - filter.jointLogMarginalLikelihood(weightStyle, - TDouble1Vec(1, trials[j]), - weights, - fTrial); + filter.jointLogMarginalLikelihood(weightStyle, TDouble1Vec(1, trials[j]), weights, fTrial); fTrial = std::exp(fTrial); - if (fTrial > fMode) - { + if (fTrial > fMode) { LOG_DEBUG("f(" << trials[j] << ") = " << fTrial << " > " << fMode); ++count; } @@ -1073,8 +896,7 @@ void CMultimodalPriorTest::testMarginalLikelihoodMode() CPPUNIT_ASSERT(totalCount < 11); } -void CMultimodalPriorTest::testMarginalLikelihoodConfidenceInterval() -{ +void CMultimodalPriorTest::testMarginalLikelihoodConfidenceInterval() { LOG_DEBUG("+------------------------------------------------------------------+"); LOG_DEBUG("| CMultimodalPriorTest::testMarginalLikelihoodConfidenceInterval |"); LOG_DEBUG("+------------------------------------------------------------------+"); @@ -1095,47 +917,32 @@ void CMultimodalPriorTest::testMarginalLikelihoodConfidenceInterval() double mean2 = 8.0; double variance2 = 2.0; TDoubleVec samples1; - rng.generateLogNormalSamples(location1, squareScale1, - static_cast(w1 * 2000.0), - samples1); + rng.generateLogNormalSamples(location1, squareScale1, static_cast(w1 * 2000.0), samples1); TDoubleVec samples2; - rng.generateNormalSamples(mean2, variance2, - static_cast(w2 * 2000.0), - samples2); + rng.generateNormalSamples(mean2, variance2, static_cast(w2 * 2000.0), samples2); TDoubleVec samples; samples.insert(samples.end(), samples1.begin(), samples1.end()); samples.insert(samples.end(), samples2.begin(), samples2.end()); rng.random_shuffle(samples.begin(), samples.end()); - const double varianceScales[] = - { - 0.1, 0.2, 0.3, 0.4, 0.5, - 0.6, 0.7, 0.8, 0.9, 1.0, - 1.2, 1.5, 2.0, 2.5, 3.0, - 4.0, 5.0 - }; + const double varianceScales[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.2, 1.5, 2.0, 2.5, 3.0, 4.0, 5.0}; - const double percentages[] = - { - 5.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 95.0, 99.0, 99.9, 99.99 - }; + const double percentages[] = {5.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 95.0, 99.0, 99.9, 99.99}; CMultimodalPrior filter(makePrior()); filter.addSamples(samples); - for (std::size_t i = 0u; i < boost::size(varianceScales); ++i) - { + for (std::size_t i = 0u; i < boost::size(varianceScales); ++i) { LOG_DEBUG("*** vs = " << varianceScales[i] << " ***"); TMeanAccumulator error; - for (std::size_t j = 0u; j < boost::size(percentages); ++j) - { + for (std::size_t j = 0u; j < boost::size(percentages); ++j) { LOG_DEBUG("** percentage = " << percentages[j] << " **"); double q1, q2; filter.marginalLikelihoodQuantileForTest(50.0 - percentages[j] / 2.0, 1e-3, q1); filter.marginalLikelihoodQuantileForTest(50.0 + percentages[j] / 2.0, 1e-3, q2); TDoubleDoublePr interval = filter.marginalLikelihoodConfidenceInterval(percentages[j]); LOG_DEBUG("[q1, q2] = [" << q1 << ", " << q2 << "]" - << ", interval = " << core::CContainerPrinter::print(interval)); + << ", interval = " << core::CContainerPrinter::print(interval)); CPPUNIT_ASSERT_DOUBLES_EQUAL(q1, interval.first, 0.1); CPPUNIT_ASSERT_DOUBLES_EQUAL(q2, interval.second, 0.05); error.add(std::fabs(interval.first - q1)); @@ -1147,20 +954,15 @@ void CMultimodalPriorTest::testMarginalLikelihoodConfidenceInterval() std::sort(samples.begin(), samples.end()); TMeanAccumulator error; - for (std::size_t i = 0u; i < boost::size(percentages); ++i) - { + for (std::size_t i = 0u; i < boost::size(percentages); ++i) { LOG_DEBUG("** percentage = " << percentages[i] << " **"); - std::size_t i1 = static_cast( - static_cast(samples.size()) - * (50.0 - percentages[i] / 2.0) / 100.0 + 0.5); - std::size_t i2 = static_cast( - static_cast(samples.size()) - * (50.0 + percentages[i] / 2.0) / 100.0 + 0.5); + std::size_t i1 = static_cast(static_cast(samples.size()) * (50.0 - percentages[i] / 2.0) / 100.0 + 0.5); + std::size_t i2 = static_cast(static_cast(samples.size()) * (50.0 + percentages[i] / 2.0) / 100.0 + 0.5); double q1 = samples[i1]; double q2 = samples[std::min(i2, samples.size() - 1)]; TDoubleDoublePr interval = filter.marginalLikelihoodConfidenceInterval(percentages[i]); LOG_DEBUG("[q1, q2] = [" << q1 << ", " << q2 << "]" - << ", interval = " << core::CContainerPrinter::print(interval)); + << ", interval = " << core::CContainerPrinter::print(interval)); CPPUNIT_ASSERT_DOUBLES_EQUAL(q1, interval.first, std::max(0.1 * q1, 0.15)); CPPUNIT_ASSERT_DOUBLES_EQUAL(q2, interval.second, 0.1 * q2); error.add(std::fabs(interval.first - q1) / q1); @@ -1170,8 +972,7 @@ void CMultimodalPriorTest::testMarginalLikelihoodConfidenceInterval() CPPUNIT_ASSERT(maths::CBasicStatistics::mean(error) < 0.05); } - LOG_DEBUG("Problem Case (Issue 439)") - { + LOG_DEBUG("Problem Case (Issue 439)") { std::ifstream file; file.open("testfiles/poorly_conditioned_multimodal.txt"); std::ostringstream state; @@ -1180,30 +981,30 @@ void CMultimodalPriorTest::testMarginalLikelihoodConfidenceInterval() core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(state.str())); core::CRapidXmlStateRestoreTraverser traverser(parser); - maths::SDistributionRestoreParams params(maths_t::E_ContinuousData, 0.0, + maths::SDistributionRestoreParams params(maths_t::E_ContinuousData, + 0.0, maths::MINIMUM_CLUSTER_SPLIT_FRACTION, maths::MINIMUM_CLUSTER_SPLIT_COUNT, maths::MINIMUM_CATEGORY_COUNT); TPriorPtr prior; maths::CPriorStateSerialiser restorer; CPPUNIT_ASSERT(restorer(params, prior, traverser)); - TDoubleDoublePr median = prior->marginalLikelihoodConfidenceInterval( - 0, maths::CConstantWeights::COUNT, maths::CConstantWeights::UNIT); - TDoubleDoublePr i90 = prior->marginalLikelihoodConfidenceInterval( - 90, maths::CConstantWeights::COUNT, maths::CConstantWeights::UNIT); + TDoubleDoublePr median = + prior->marginalLikelihoodConfidenceInterval(0, maths::CConstantWeights::COUNT, maths::CConstantWeights::UNIT); + TDoubleDoublePr i90 = + prior->marginalLikelihoodConfidenceInterval(90, maths::CConstantWeights::COUNT, maths::CConstantWeights::UNIT); LOG_DEBUG("median = " << maths::CBasicStatistics::mean(median)); LOG_DEBUG("confidence interval = " << core::CContainerPrinter::print(i90)); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(median) > i90.first); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(median) < i90.second); - CPPUNIT_ASSERT_DOUBLES_EQUAL( -112.0, i90.first, 0.5); + CPPUNIT_ASSERT_DOUBLES_EQUAL(-112.0, i90.first, 0.5); CPPUNIT_ASSERT_DOUBLES_EQUAL(158952.0, i90.second, 0.5); } } -void CMultimodalPriorTest::testSampleMarginalLikelihood() -{ +void CMultimodalPriorTest::testSampleMarginalLikelihood() { LOG_DEBUG("+------------------------------------------------------+"); LOG_DEBUG("| CMultimodalPriorTest::testSampleMarginalLikelihood |"); LOG_DEBUG("+------------------------------------------------------+"); @@ -1243,8 +1044,7 @@ void CMultimodalPriorTest::testSampleMarginalLikelihood() TMeanVarSkewAccumulator sampleMoments; - for (std::size_t i = 0u; i < 3u; ++i) - { + for (std::size_t i = 0u; i < 3u; ++i) { LOG_DEBUG("sample = " << samples[i]); sampleMoments.add(samples[i]); @@ -1259,8 +1059,7 @@ void CMultimodalPriorTest::testSampleMarginalLikelihood() TMeanAccumulator meanVarError; std::size_t numberSampled = 20u; - for (std::size_t i = 3u; i < samples.size(); ++i) - { + for (std::size_t i = 3u; i < samples.size(); ++i) { LOG_DEBUG("sample = " << samples[i]); sampleMoments.add(samples[i]); @@ -1275,36 +1074,30 @@ void CMultimodalPriorTest::testSampleMarginalLikelihood() sampledMoments = std::for_each(sampled.begin(), sampled.end(), sampledMoments); LOG_DEBUG("expectedMean = " << filter.marginalLikelihoodMean() - << ", sampledMean = " << maths::CBasicStatistics::mean(sampledMoments)); + << ", sampledMean = " << maths::CBasicStatistics::mean(sampledMoments)); LOG_DEBUG("expectedVariance = " << filter.marginalLikelihoodVariance() - << ", sampledVariance = " << maths::CBasicStatistics::variance(sampledMoments)); + << ", sampledVariance = " << maths::CBasicStatistics::variance(sampledMoments)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(filter.marginalLikelihoodMean(), - maths::CBasicStatistics::mean(sampledMoments), - 0.005 * filter.marginalLikelihoodMean()); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + filter.marginalLikelihoodMean(), maths::CBasicStatistics::mean(sampledMoments), 0.005 * filter.marginalLikelihoodMean()); CPPUNIT_ASSERT_DOUBLES_EQUAL(filter.marginalLikelihoodVariance(), maths::CBasicStatistics::variance(sampledMoments), 0.2 * filter.marginalLikelihoodVariance()); - meanMeanError.add( std::fabs( filter.marginalLikelihoodMean() - - maths::CBasicStatistics::mean(sampledMoments)) - / filter.marginalLikelihoodMean()); - meanVarError.add(std::fabs( filter.marginalLikelihoodVariance() - - maths::CBasicStatistics::variance(sampledMoments)) - / filter.marginalLikelihoodVariance()); + meanMeanError.add(std::fabs(filter.marginalLikelihoodMean() - maths::CBasicStatistics::mean(sampledMoments)) / + filter.marginalLikelihoodMean()); + meanVarError.add(std::fabs(filter.marginalLikelihoodVariance() - maths::CBasicStatistics::variance(sampledMoments)) / + filter.marginalLikelihoodVariance()); } std::sort(sampled.begin(), sampled.end()); - for (std::size_t j = 1u; j < sampled.size(); ++j) - { - double q = 100.0 * static_cast(j) - / static_cast(sampled.size()); + for (std::size_t j = 1u; j < sampled.size(); ++j) { + double q = 100.0 * static_cast(j) / static_cast(sampled.size()); double expectedQuantile; CPPUNIT_ASSERT(filter.marginalLikelihoodQuantileForTest(q, eps, expectedQuantile)); - LOG_DEBUG("quantile = " << q - << ", x_quantile = " << expectedQuantile - << ", quantile range = [" << sampled[j - 1] << "," << sampled[j] << "]"); + LOG_DEBUG("quantile = " << q << ", x_quantile = " << expectedQuantile << ", quantile range = [" << sampled[j - 1] << "," + << sampled[j] << "]"); CPPUNIT_ASSERT(expectedQuantile >= 0.98 * sampled[j - 1]); CPPUNIT_ASSERT(expectedQuantile <= 1.02 * sampled[j]); @@ -1319,12 +1112,10 @@ void CMultimodalPriorTest::testSampleMarginalLikelihood() sampled.clear(); filter.sampleMarginalLikelihood(numberSampled, sampled); TMeanVarSkewAccumulator sampledMoments; - for (std::size_t i = 0u; i < sampled.size(); ++i) - { + for (std::size_t i = 0u; i < sampled.size(); ++i) { sampledMoments.add(sampled[i]); } - LOG_DEBUG("Sample moments = " << sampledMoments - << ", sampled moments = " << sampleMoments); + LOG_DEBUG("Sample moments = " << sampledMoments << ", sampled moments = " << sampleMoments); CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(sampleMoments), maths::CBasicStatistics::mean(sampledMoments), 1e-4 * maths::CBasicStatistics::mean(sampleMoments)); @@ -1336,8 +1127,7 @@ void CMultimodalPriorTest::testSampleMarginalLikelihood() 0.1 * maths::CBasicStatistics::skewness(sampleMoments)); } -void CMultimodalPriorTest::testCdf() -{ +void CMultimodalPriorTest::testCdf() { LOG_DEBUG("+---------------------------------+"); LOG_DEBUG("| CMultimodalPriorTest::testCdf |"); LOG_DEBUG("+---------------------------------+"); @@ -1349,24 +1139,21 @@ void CMultimodalPriorTest::testCdf() // cdf x for x < 0 = 1 // cdf complement x for x < 0 = 0 - const double locations[] = { 1.0, 3.0 }; - const double squareScales[] = { 0.5, 0.3 }; - const std::size_t n[] = { 100u, 100u }; + const double locations[] = {1.0, 3.0}; + const double squareScales[] = {0.5, 0.3}; + const std::size_t n[] = {100u, 100u}; test::CRandomNumbers rng; - CGammaRateConjugate gamma( - maths::CGammaRateConjugate::nonInformativePrior(maths_t::E_ContinuousData)); - CLogNormalMeanPrecConjugate logNormal( - maths::CLogNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData)); + CGammaRateConjugate gamma(maths::CGammaRateConjugate::nonInformativePrior(maths_t::E_ContinuousData)); + CLogNormalMeanPrecConjugate logNormal(maths::CLogNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData)); COneOfNPrior::TPriorPtrVec priors; priors.push_back(COneOfNPrior::TPriorPtr(gamma.clone())); priors.push_back(COneOfNPrior::TPriorPtr(logNormal.clone())); COneOfNPrior modePrior(maths::COneOfNPrior(priors, maths_t::E_ContinuousData)); CMultimodalPrior filter(makePrior(&modePrior)); - for (std::size_t i = 0u; i < boost::size(n); ++i) - { + for (std::size_t i = 0u; i < boost::size(n); ++i) { TDoubleVec samples; rng.generateLogNormalSamples(locations[i], squareScales[i], n[i], samples); filter.addSamples(samples); @@ -1381,13 +1168,11 @@ void CMultimodalPriorTest::testCdf() double f = (lowerBound + upperBound) / 2.0; CPPUNIT_ASSERT(filter.minusLogJointCdfComplement(TDouble1Vec(1, -1.0), lowerBound, upperBound)); double fComplement = (lowerBound + upperBound) / 2.0; - LOG_DEBUG("log(F(x)) = " << -f - << ", log(1 - F(x)) = " << fComplement); + LOG_DEBUG("log(F(x)) = " << -f << ", log(1 - F(x)) = " << fComplement); CPPUNIT_ASSERT_DOUBLES_EQUAL(std::log(std::numeric_limits::min()), -f, 1e-8); CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, std::exp(-fComplement), 1e-8); - for (std::size_t j = 1u; j < 1000; ++j) - { + for (std::size_t j = 1u; j < 1000; ++j) { double x = static_cast(j) / 2.0; CPPUNIT_ASSERT(filter.minusLogJointCdf(TDouble1Vec(1, x), lowerBound, upperBound)); @@ -1395,14 +1180,12 @@ void CMultimodalPriorTest::testCdf() CPPUNIT_ASSERT(filter.minusLogJointCdfComplement(TDouble1Vec(1, x), lowerBound, upperBound)); fComplement = (lowerBound + upperBound) / 2.0; - LOG_DEBUG("log(F(x)) = " << (f == 0.0 ? f : -f) - << ", log(1 - F(x)) = " << (fComplement == 0.0 ? fComplement : -fComplement)); + LOG_DEBUG("log(F(x)) = " << (f == 0.0 ? f : -f) << ", log(1 - F(x)) = " << (fComplement == 0.0 ? fComplement : -fComplement)); CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, std::exp(-f) + std::exp(-fComplement), 1e-8); } } -void CMultimodalPriorTest::testProbabilityOfLessLikelySamples() -{ +void CMultimodalPriorTest::testProbabilityOfLessLikelySamples() { LOG_DEBUG("+------------------------------------------------------------+"); LOG_DEBUG("| CMultimodalPriorTest::testProbabilityOfLessLikelySamples |"); LOG_DEBUG("+------------------------------------------------------------+"); @@ -1419,13 +1202,9 @@ void CMultimodalPriorTest::testProbabilityOfLessLikelySamples() double variance1 = 1.0, variance2 = 1.0; TDoubleVec samples1; - rng.generateNormalSamples(mean1, variance1, - static_cast(10000.0 * weight1), - samples1); + rng.generateNormalSamples(mean1, variance1, static_cast(10000.0 * weight1), samples1); TDoubleVec samples2; - rng.generateNormalSamples(mean2, variance2, - static_cast(10000.0 * weight2), - samples2); + rng.generateNormalSamples(mean2, variance2, static_cast(10000.0 * weight2), samples2); TDoubleVec samples; samples.insert(samples.end(), samples1.begin(), samples1.end()); samples.insert(samples.end(), samples2.begin(), samples2.end()); @@ -1443,35 +1222,28 @@ void CMultimodalPriorTest::testProbabilityOfLessLikelySamples() filter.addSamples(samples); LOG_DEBUG("# modes = " << filter.numberModes()); - double x[] = { 46.0, 49.0, 54.0, 55.0, 68.0 }; + double x[] = {46.0, 49.0, 54.0, 55.0, 68.0}; double error = 0.0; - for (std::size_t i = 0u; i < boost::size(x); ++i) - { + for (std::size_t i = 0u; i < boost::size(x); ++i) { double expectedProbability; double deviation; probabilityOfLessLikelySample(mixture, x[i], expectedProbability, deviation); double lowerBound; double upperBound; - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - TDouble1Vec(1, x[i]), - lowerBound, upperBound); - LOG_DEBUG("lowerBound = " << lowerBound - << ", upperBound = " << upperBound - << ", expectedProbability = " << expectedProbability - << ", deviation = " << deviation); + filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, TDouble1Vec(1, x[i]), lowerBound, upperBound); + LOG_DEBUG("lowerBound = " << lowerBound << ", upperBound = " << upperBound << ", expectedProbability = " << expectedProbability + << ", deviation = " << deviation); double probability = (lowerBound + upperBound) / 2.0; - error += probability < expectedProbability - 2.0 * deviation ? - (expectedProbability - 2.0 * deviation) - probability : - (probability > expectedProbability + 2.0 * deviation ? - probability - (expectedProbability + 2.0 * deviation) : 0.0); - - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedProbability, - probability, - std::max(3.0 * deviation, 3e-5)); + error += + probability < expectedProbability - 2.0 * deviation + ? (expectedProbability - 2.0 * deviation) - probability + : (probability > expectedProbability + 2.0 * deviation ? probability - (expectedProbability + 2.0 * deviation) : 0.0); + + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedProbability, probability, std::max(3.0 * deviation, 3e-5)); } error /= static_cast(boost::size(x)); @@ -1480,41 +1252,41 @@ void CMultimodalPriorTest::testProbabilityOfLessLikelySamples() double lb, ub; maths_t::ETail tail; - filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, - maths_t::TWeightStyleVec(1, maths_t::E_SampleCountVarianceScaleWeight), - TDouble1Vec(1, 49.0), - TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0)), - lb, ub, tail); + filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, + maths_t::TWeightStyleVec(1, maths_t::E_SampleCountVarianceScaleWeight), + TDouble1Vec(1, 49.0), + TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0)), + lb, + ub, + tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); - filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, - maths_t::TWeightStyleVec(1, maths_t::E_SampleCountVarianceScaleWeight), - TDouble1Vec(1, 54.0), - TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0)), - lb, ub, tail); + filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, + maths_t::TWeightStyleVec(1, maths_t::E_SampleCountVarianceScaleWeight), + TDouble1Vec(1, 54.0), + TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0)), + lb, + ub, + tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); - filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, - maths_t::TWeightStyleVec(1, maths_t::E_SampleCountVarianceScaleWeight), - TDouble1Vec(1, 59.0), - TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0)), - lb, ub, tail); + filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, + maths_t::TWeightStyleVec(1, maths_t::E_SampleCountVarianceScaleWeight), + TDouble1Vec(1, 59.0), + TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0)), + lb, + ub, + tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } { - double weights[] = { 0.6, 0.2, 0.2 }; - double locations[] = { 1.0, 2.5, 4.0 }; - double squareScales[] = { 0.1, 0.05, 0.3 }; + double weights[] = {0.6, 0.2, 0.2}; + double locations[] = {1.0, 2.5, 4.0}; + double squareScales[] = {0.1, 0.05, 0.3}; TDoubleVec samples; samples.reserve(20000u); - for (std::size_t i = 0u; i < boost::size(weights); ++i) - { + for (std::size_t i = 0u; i < boost::size(weights); ++i) { TDoubleVec modeSamples; - rng.generateLogNormalSamples(locations[i], squareScales[i], - static_cast(20000.0 * weights[i]), - modeSamples); + rng.generateLogNormalSamples(locations[i], squareScales[i], static_cast(20000.0 * weights[i]), modeSamples); samples.insert(samples.end(), modeSamples.begin(), modeSamples.end()); } rng.random_shuffle(samples.begin(), samples.end()); @@ -1530,35 +1302,29 @@ void CMultimodalPriorTest::testProbabilityOfLessLikelySamples() filter.addSamples(samples); LOG_DEBUG("# modes = " << filter.numberModes()); - double x[] = { 2.0, 3.0, 9.0, 15.0, 18.0, 22.0, 40.0, 60.0, 80.0, 110.0 }; + double x[] = {2.0, 3.0, 9.0, 15.0, 18.0, 22.0, 40.0, 60.0, 80.0, 110.0}; double error = 0.0; - for (std::size_t i = 0u; i < boost::size(x); ++i) - { + for (std::size_t i = 0u; i < boost::size(x); ++i) { double expectedProbability; double deviation; probabilityOfLessLikelySample(mixture, x[i], expectedProbability, deviation); double lowerBound; double upperBound; - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - TDouble1Vec(1, x[i]), - lowerBound, upperBound); - LOG_DEBUG("lowerBound = " << lowerBound - << ", upperBound = " << upperBound - << ", expectedProbability = " << expectedProbability - << ", deviation = " << deviation); + filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, TDouble1Vec(1, x[i]), lowerBound, upperBound); + LOG_DEBUG("lowerBound = " << lowerBound << ", upperBound = " << upperBound << ", expectedProbability = " << expectedProbability + << ", deviation = " << deviation); double probability = (lowerBound + upperBound) / 2.0; - error += probability < expectedProbability - 2.0 * deviation ? - (expectedProbability - 2.0 * deviation) - probability : - (probability > expectedProbability + 2.0 * deviation ? - probability - (expectedProbability + 2.0 * deviation) : 0.0); - - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedProbability, - probability, - std::min(0.2 * expectedProbability + std::max(3.0 * deviation, 1e-10), 0.06)); + error += + probability < expectedProbability - 2.0 * deviation + ? (expectedProbability - 2.0 * deviation) - probability + : (probability > expectedProbability + 2.0 * deviation ? probability - (expectedProbability + 2.0 * deviation) : 0.0); + + CPPUNIT_ASSERT_DOUBLES_EQUAL( + expectedProbability, probability, std::min(0.2 * expectedProbability + std::max(3.0 * deviation, 1e-10), 0.06)); } error /= static_cast(boost::size(x)); @@ -1566,18 +1332,15 @@ void CMultimodalPriorTest::testProbabilityOfLessLikelySamples() CPPUNIT_ASSERT(error < 0.009); } { - double weights[] = { 0.6, 0.4 }; - double shapes[] = { 2.0, 300.0 }; - double scales[] = { 0.5, 1.5 }; + double weights[] = {0.6, 0.4}; + double shapes[] = {2.0, 300.0}; + double scales[] = {0.5, 1.5}; TDoubleVec samples; samples.reserve(20000u); - for (std::size_t i = 0u; i < boost::size(weights); ++i) - { + for (std::size_t i = 0u; i < boost::size(weights); ++i) { TDoubleVec modeSamples; - rng.generateGammaSamples(shapes[i], scales[i], - static_cast(20000.0 * weights[i]), - modeSamples); + rng.generateGammaSamples(shapes[i], scales[i], static_cast(20000.0 * weights[i]), modeSamples); samples.insert(samples.end(), modeSamples.begin(), modeSamples.end()); } rng.random_shuffle(samples.begin(), samples.end()); @@ -1592,35 +1355,28 @@ void CMultimodalPriorTest::testProbabilityOfLessLikelySamples() filter.addSamples(samples); LOG_DEBUG("# modes = " << filter.numberModes()); - double x[] = { 0.5, 1.5, 3.0, 35.0, 100.0, 320.0, 340.0, 360.0, 380.0, 410.0 }; + double x[] = {0.5, 1.5, 3.0, 35.0, 100.0, 320.0, 340.0, 360.0, 380.0, 410.0}; double error = 0.0; - for (std::size_t i = 0u; i < boost::size(x); ++i) - { + for (std::size_t i = 0u; i < boost::size(x); ++i) { double expectedProbability; double deviation; probabilityOfLessLikelySample(mixture, x[i], expectedProbability, deviation); double lowerBound; double upperBound; - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - TDouble1Vec(1, x[i]), - lowerBound, upperBound); - LOG_DEBUG("lowerBound = " << lowerBound - << ", upperBound = " << upperBound - << ", expectedProbability = " << expectedProbability - << ", deviation = " << deviation); + filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, TDouble1Vec(1, x[i]), lowerBound, upperBound); + LOG_DEBUG("lowerBound = " << lowerBound << ", upperBound = " << upperBound << ", expectedProbability = " << expectedProbability + << ", deviation = " << deviation); double probability = (lowerBound + upperBound) / 2.0; - error += probability < expectedProbability - 2.0 * deviation ? - (expectedProbability - 2.0 * deviation) - probability : - (probability > expectedProbability + 2.0 * deviation ? - probability - (expectedProbability + 2.0 * deviation) : 0.0); - - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedProbability, - probability, - 0.18 * expectedProbability + std::max(2.5 * deviation, 1e-3)); + error += + probability < expectedProbability - 2.0 * deviation + ? (expectedProbability - 2.0 * deviation) - probability + : (probability > expectedProbability + 2.0 * deviation ? probability - (expectedProbability + 2.0 * deviation) : 0.0); + + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedProbability, probability, 0.18 * expectedProbability + std::max(2.5 * deviation, 1e-3)); } error /= static_cast(boost::size(x)); @@ -1629,8 +1385,7 @@ void CMultimodalPriorTest::testProbabilityOfLessLikelySamples() } } -void CMultimodalPriorTest::testLargeValues() -{ +void CMultimodalPriorTest::testLargeValues() { LOG_DEBUG("+-----------------------------------------+"); LOG_DEBUG("| CMultimodalPriorTest::testLargeValues |"); LOG_DEBUG("+-----------------------------------------+"); @@ -1638,108 +1393,72 @@ void CMultimodalPriorTest::testLargeValues() // Check that the confidence interval calculation stays // well conditioned for very large values. - TDoubleVec values{7.324121e+10, 7.251927e+10, 7.152208e+10, 7.089604e+10, 7.018831e+10, - 6.94266e+10, 6.890659e+10, 6.837292e+10, 6.794372e+10, 6.793463e+10, - 6.785385e+10, 6.773589e+10, 6.787609e+10, 6.760049e+10, 6.709596e+10, - 6.701824e+10, 6.672568e+10, 6.617609e+10, 6.620431e+10, 6.627069e+10, - 6.617393e+10, 6.633176e+10, 6.600326e+10, 6.530363e+10, 6.494482e+10, - 6.433443e+10, 6.362233e+10, 6.317814e+10, 6.296127e+10, 6.272491e+10, - 6.243567e+10, 6.19567e+10, 6.13123e+10, 6.150823e+10, 6.160438e+10, - 6.106396e+10, 6.128276e+10, 6.13318e+10, 6.161243e+10, 6.182719e+10, - 6.177156e+10, 6.174539e+10, 6.216147e+10, 6.272091e+10, 6.286637e+10, - 6.310137e+10, 6.315882e+10, 6.312109e+10, 6.312296e+10, 6.312432e+10, - 6.328676e+10, 6.37708e+10, 6.421867e+10, 6.490675e+10, 6.547625e+10, - 6.593425e+10, 6.67186e+10, 6.755033e+10, 6.754501e+10, 6.730381e+10, - 6.76163e+10, 6.761007e+10, 6.745505e+10, 6.777796e+10, 6.783472e+10, - 6.779558e+10, 6.787643e+10, 6.800003e+10, 6.840413e+10, 6.856255e+10, - 6.939239e+10, 6.907512e+10, 6.914988e+10, 6.901868e+10, 6.884531e+10, - 6.934499e+10, 6.955862e+10, 6.938019e+10, 6.942022e+10, 6.950912e+10, - 6.979618e+10, 7.064871e+10, 7.152501e+10, 7.178129e+10, 7.2239e+10, - 7.257321e+10, 7.28913e+10, 7.365193e+10, 7.432521e+10, 7.475098e+10, - 7.553025e+10, 7.654561e+10, 7.698032e+10, 7.768267e+10, 7.826669e+10, - 7.866854e+10, 7.924608e+10, 7.998602e+10, 8.038091e+10, 8.094976e+10, - 8.145126e+10, 8.132123e+10, 8.142747e+10, 8.148276e+10, 8.118588e+10, - 8.122279e+10, 8.078815e+10, 8.008936e+10, 7.991103e+10, 7.981722e+10, - 7.932372e+10, 7.900164e+10, 7.881053e+10, 7.837734e+10, 7.847101e+10, - 7.816575e+10, 7.789224e+10, 7.803634e+10, 7.827226e+10, 7.812112e+10, - 7.814848e+10, 7.812407e+10, 7.779805e+10, 7.783394e+10, 7.768365e+10, - 7.74484e+10, 7.740301e+10, 7.725512e+10, 7.666682e+10, 7.635862e+10, - 7.592468e+10, 7.539656e+10, 7.529974e+10, 7.501661e+10, 7.442706e+10, - 7.406878e+10, 7.347894e+10, 7.268775e+10, 7.23729e+10, 7.171337e+10, - 7.146626e+10, 7.130693e+10, 7.066356e+10, 6.977915e+10, 6.915126e+10, - 6.830462e+10, 6.73021e+10, 6.67686e+10, 6.600806e+10, 6.504958e+10, - 6.427045e+10, 6.35093e+10, 6.277891e+10, 6.258429e+10, 6.184866e+10, - 6.114754e+10, 6.093035e+10, 6.063859e+10, 5.999596e+10, 5.952608e+10, - 5.927059e+10, 5.831014e+10, 5.763428e+10, 5.77239e+10, 5.82414e+10, - 5.911797e+10, 5.987076e+10, 5.976584e+10, 6.017487e+10, 6.023042e+10, - 6.029144e+10, 6.068466e+10, 6.139924e+10, 6.208432e+10, 6.259237e+10, - 6.300856e+10, 6.342197e+10, 6.423638e+10, 6.494938e+10, 6.478293e+10, - 6.444705e+10, 6.432593e+10, 6.437474e+10, 6.447832e+10, 6.450247e+10, - 6.398122e+10, 6.399681e+10, 6.406744e+10, 6.404553e+10, 6.417746e+10, - 6.39819e+10, 6.389218e+10, 6.453242e+10, 6.491168e+10, 6.493824e+10, - 6.524365e+10, 6.537463e+10, 6.543864e+10, 6.583769e+10, 6.596521e+10, - 6.641129e+10, 6.718787e+10, 6.741177e+10, 6.776819e+10, 6.786579e+10, - 6.783788e+10, 6.790788e+10, 6.77233e+10, 6.738099e+10, 6.718351e+10, - 6.739131e+10, 6.752051e+10, 6.747344e+10, 6.757187e+10, 6.739908e+10, - 6.702725e+10, 6.70474e+10, 6.708783e+10, 6.72989e+10, 6.75298e+10, - 6.727323e+10, 6.677787e+10, 6.686342e+10, 6.687026e+10, 6.714555e+10, - 6.750766e+10, 6.807156e+10, 6.847816e+10, 6.915895e+10, 6.958225e+10, - 6.970934e+10, 6.972807e+10, 6.973312e+10, 6.970858e+10, 6.962325e+10, - 6.968693e+10, 6.965446e+10, 6.983768e+10, 6.974386e+10, 6.992195e+10, - 7.010707e+10, 7.004337e+10, 7.006336e+10, 7.06312e+10, 7.078169e+10, - 7.080609e+10, 7.107845e+10, 7.084754e+10, 7.032667e+10, 7.052029e+10, - 7.031464e+10, 7.006906e+10, 7.018558e+10, 7.022278e+10, 7.012379e+10, - 7.043974e+10, 7.016036e+10, 6.975801e+10, 6.95197e+10, 6.92444e+10, - 6.85828e+10, 6.808828e+10, 6.74055e+10, 6.663602e+10, 6.588224e+10, - 6.52747e+10, 6.412303e+10, 6.315978e+10, 6.268569e+10, 6.219346e+10, - 6.177174e+10, 6.101807e+10, 6.018369e+10, 5.97554e+10, 5.924427e+10, - 5.867325e+10, 5.814079e+10, 5.745633e+10, 5.641881e+10, 5.608709e+10, - 5.529503e+10, 5.450575e+10, 5.383054e+10, 5.297568e+10, 5.210389e+10, - 5.139513e+10, 5.03026e+10, 4.922761e+10, 4.839502e+10, 4.739353e+10, - 4.605013e+10, 4.486422e+10, 4.369101e+10, 4.241115e+10, 4.128026e+10, - 4.025775e+10, 3.915851e+10, 3.819004e+10, 3.700971e+10, 3.581475e+10, - 3.498126e+10, 3.384422e+10, 3.224959e+10, 3.108637e+10, 2.997983e+10, - 2.86439e+10, 2.774108e+10, 2.682793e+10, 2.590098e+10, 2.500665e+10, - 2.368987e+10, 2.24582e+10, 2.158596e+10, 2.062636e+10, 1.942922e+10, - 1.873734e+10, 1.823214e+10, 1.726518e+10, 1.665115e+10, 1.582729e+10, - 1.477715e+10, 1.406265e+10, 1.285904e+10, 1.145722e+10, 1.038312e+10, - 9.181713e+09, 8.141138e+09, 7.45358e+09, 6.59996e+09, 5.72857e+09, - 5.136189e+09, 4.51829e+09, 3.649536e+09, 2.990132e+09, 2.29392e+09, - 1.390141e+09, 5.611192e+08, -1.62469e+08, -1.041465e+09, -1.804217e+09, - -2.923116e+09, -4.205691e+09, -5.09832e+09, -6.12155e+09, -7.10503e+09, - -7.957297e+09, -9.107372e+09, -1.039097e+10, -1.133152e+10, -1.221205e+10, - -1.318018e+10, -1.402195e+10, -1.512e+10, -1.634369e+10, -1.710999e+10, - -1.786548e+10, -1.866482e+10, -1.938912e+10, -2.039964e+10, -2.160603e+10, - -2.259855e+10, -2.353314e+10, -2.449689e+10, -2.52005e+10, -2.627104e+10, - -2.730019e+10, -2.815777e+10, -2.920027e+10, -3.03507e+10, -3.126021e+10, - -3.212383e+10, -3.329089e+10, -3.402306e+10, -3.475361e+10, -3.572698e+10, - -3.644467e+10, -3.721484e+10, -3.800023e+10, -3.865459e+10, -3.918282e+10, - -3.983764e+10, -4.051065e+10, -4.119051e+10, -4.202436e+10, -4.24868e+10, - -4.340278e+10, -4.418258e+10, -4.490206e+10, -4.587365e+10, -4.697342e+10, - -4.778222e+10, -4.882614e+10, -4.984197e+10, -5.051089e+10, -5.143766e+10, - -5.252824e+10, -5.353136e+10, -5.436329e+10, -5.533555e+10, -5.623246e+10, - -5.689744e+10, -5.798439e+10, -5.882786e+10, -5.96284e+10, -6.061507e+10, - -6.145417e+10, -6.235327e+10, -6.335978e+10, -6.405788e+10, -6.496648e+10, - -6.600807e+10, -6.686964e+10, -6.782611e+10, -6.890904e+10, -6.941638e+10, - -7.012465e+10, -7.113145e+10, -7.186233e+10, -7.2293e+10, -7.313894e+10, - -7.394114e+10, -7.475566e+10, -7.572029e+10, -7.660066e+10, -7.738602e+10, - -7.846013e+10, -7.921084e+10, -7.986093e+10, -8.07113e+10, -8.159104e+10, - -8.243174e+10, -8.305353e+10, -8.346367e+10, -8.402575e+10, -8.482895e+10, - -8.536747e+10, -8.581526e+10, -8.640365e+10, -8.683093e+10, -8.724777e+10, - -8.746026e+10, -8.760338e+10, -8.809235e+10, -8.870936e+10, -8.905536e+10, - -8.953669e+10, -9.031665e+10, -9.090067e+10, -9.135409e+10, -9.185499e+10, - -9.225697e+10, -9.253896e+10, -9.314785e+10, -9.354807e+10, -9.391591e+10, - -9.436751e+10, -9.471133e+10, -9.517393e+10, -9.587184e+10, -9.619209e+10, - -9.607482e+10, -9.593427e+10, -9.604743e+10, -9.619758e+10, -9.62449e+10, - -9.61466e+10, -9.636941e+10, -9.692289e+10, -9.735416e+10, -9.774056e+10, - -9.828883e+10, -9.859253e+10, -9.888183e+10, -9.95351e+10, -1.001142e+11}; - - maths::CGammaRateConjugate gammaPrior = - maths::CGammaRateConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.2, 0.001); - maths::CNormalMeanPrecConjugate normalPrior = - maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.001); + TDoubleVec values{ + 7.324121e+10, 7.251927e+10, 7.152208e+10, 7.089604e+10, 7.018831e+10, 6.94266e+10, 6.890659e+10, 6.837292e+10, + 6.794372e+10, 6.793463e+10, 6.785385e+10, 6.773589e+10, 6.787609e+10, 6.760049e+10, 6.709596e+10, 6.701824e+10, + 6.672568e+10, 6.617609e+10, 6.620431e+10, 6.627069e+10, 6.617393e+10, 6.633176e+10, 6.600326e+10, 6.530363e+10, + 6.494482e+10, 6.433443e+10, 6.362233e+10, 6.317814e+10, 6.296127e+10, 6.272491e+10, 6.243567e+10, 6.19567e+10, + 6.13123e+10, 6.150823e+10, 6.160438e+10, 6.106396e+10, 6.128276e+10, 6.13318e+10, 6.161243e+10, 6.182719e+10, + 6.177156e+10, 6.174539e+10, 6.216147e+10, 6.272091e+10, 6.286637e+10, 6.310137e+10, 6.315882e+10, 6.312109e+10, + 6.312296e+10, 6.312432e+10, 6.328676e+10, 6.37708e+10, 6.421867e+10, 6.490675e+10, 6.547625e+10, 6.593425e+10, + 6.67186e+10, 6.755033e+10, 6.754501e+10, 6.730381e+10, 6.76163e+10, 6.761007e+10, 6.745505e+10, 6.777796e+10, + 6.783472e+10, 6.779558e+10, 6.787643e+10, 6.800003e+10, 6.840413e+10, 6.856255e+10, 6.939239e+10, 6.907512e+10, + 6.914988e+10, 6.901868e+10, 6.884531e+10, 6.934499e+10, 6.955862e+10, 6.938019e+10, 6.942022e+10, 6.950912e+10, + 6.979618e+10, 7.064871e+10, 7.152501e+10, 7.178129e+10, 7.2239e+10, 7.257321e+10, 7.28913e+10, 7.365193e+10, + 7.432521e+10, 7.475098e+10, 7.553025e+10, 7.654561e+10, 7.698032e+10, 7.768267e+10, 7.826669e+10, 7.866854e+10, + 7.924608e+10, 7.998602e+10, 8.038091e+10, 8.094976e+10, 8.145126e+10, 8.132123e+10, 8.142747e+10, 8.148276e+10, + 8.118588e+10, 8.122279e+10, 8.078815e+10, 8.008936e+10, 7.991103e+10, 7.981722e+10, 7.932372e+10, 7.900164e+10, + 7.881053e+10, 7.837734e+10, 7.847101e+10, 7.816575e+10, 7.789224e+10, 7.803634e+10, 7.827226e+10, 7.812112e+10, + 7.814848e+10, 7.812407e+10, 7.779805e+10, 7.783394e+10, 7.768365e+10, 7.74484e+10, 7.740301e+10, 7.725512e+10, + 7.666682e+10, 7.635862e+10, 7.592468e+10, 7.539656e+10, 7.529974e+10, 7.501661e+10, 7.442706e+10, 7.406878e+10, + 7.347894e+10, 7.268775e+10, 7.23729e+10, 7.171337e+10, 7.146626e+10, 7.130693e+10, 7.066356e+10, 6.977915e+10, + 6.915126e+10, 6.830462e+10, 6.73021e+10, 6.67686e+10, 6.600806e+10, 6.504958e+10, 6.427045e+10, 6.35093e+10, + 6.277891e+10, 6.258429e+10, 6.184866e+10, 6.114754e+10, 6.093035e+10, 6.063859e+10, 5.999596e+10, 5.952608e+10, + 5.927059e+10, 5.831014e+10, 5.763428e+10, 5.77239e+10, 5.82414e+10, 5.911797e+10, 5.987076e+10, 5.976584e+10, + 6.017487e+10, 6.023042e+10, 6.029144e+10, 6.068466e+10, 6.139924e+10, 6.208432e+10, 6.259237e+10, 6.300856e+10, + 6.342197e+10, 6.423638e+10, 6.494938e+10, 6.478293e+10, 6.444705e+10, 6.432593e+10, 6.437474e+10, 6.447832e+10, + 6.450247e+10, 6.398122e+10, 6.399681e+10, 6.406744e+10, 6.404553e+10, 6.417746e+10, 6.39819e+10, 6.389218e+10, + 6.453242e+10, 6.491168e+10, 6.493824e+10, 6.524365e+10, 6.537463e+10, 6.543864e+10, 6.583769e+10, 6.596521e+10, + 6.641129e+10, 6.718787e+10, 6.741177e+10, 6.776819e+10, 6.786579e+10, 6.783788e+10, 6.790788e+10, 6.77233e+10, + 6.738099e+10, 6.718351e+10, 6.739131e+10, 6.752051e+10, 6.747344e+10, 6.757187e+10, 6.739908e+10, 6.702725e+10, + 6.70474e+10, 6.708783e+10, 6.72989e+10, 6.75298e+10, 6.727323e+10, 6.677787e+10, 6.686342e+10, 6.687026e+10, + 6.714555e+10, 6.750766e+10, 6.807156e+10, 6.847816e+10, 6.915895e+10, 6.958225e+10, 6.970934e+10, 6.972807e+10, + 6.973312e+10, 6.970858e+10, 6.962325e+10, 6.968693e+10, 6.965446e+10, 6.983768e+10, 6.974386e+10, 6.992195e+10, + 7.010707e+10, 7.004337e+10, 7.006336e+10, 7.06312e+10, 7.078169e+10, 7.080609e+10, 7.107845e+10, 7.084754e+10, + 7.032667e+10, 7.052029e+10, 7.031464e+10, 7.006906e+10, 7.018558e+10, 7.022278e+10, 7.012379e+10, 7.043974e+10, + 7.016036e+10, 6.975801e+10, 6.95197e+10, 6.92444e+10, 6.85828e+10, 6.808828e+10, 6.74055e+10, 6.663602e+10, + 6.588224e+10, 6.52747e+10, 6.412303e+10, 6.315978e+10, 6.268569e+10, 6.219346e+10, 6.177174e+10, 6.101807e+10, + 6.018369e+10, 5.97554e+10, 5.924427e+10, 5.867325e+10, 5.814079e+10, 5.745633e+10, 5.641881e+10, 5.608709e+10, + 5.529503e+10, 5.450575e+10, 5.383054e+10, 5.297568e+10, 5.210389e+10, 5.139513e+10, 5.03026e+10, 4.922761e+10, + 4.839502e+10, 4.739353e+10, 4.605013e+10, 4.486422e+10, 4.369101e+10, 4.241115e+10, 4.128026e+10, 4.025775e+10, + 3.915851e+10, 3.819004e+10, 3.700971e+10, 3.581475e+10, 3.498126e+10, 3.384422e+10, 3.224959e+10, 3.108637e+10, + 2.997983e+10, 2.86439e+10, 2.774108e+10, 2.682793e+10, 2.590098e+10, 2.500665e+10, 2.368987e+10, 2.24582e+10, + 2.158596e+10, 2.062636e+10, 1.942922e+10, 1.873734e+10, 1.823214e+10, 1.726518e+10, 1.665115e+10, 1.582729e+10, + 1.477715e+10, 1.406265e+10, 1.285904e+10, 1.145722e+10, 1.038312e+10, 9.181713e+09, 8.141138e+09, 7.45358e+09, + 6.59996e+09, 5.72857e+09, 5.136189e+09, 4.51829e+09, 3.649536e+09, 2.990132e+09, 2.29392e+09, 1.390141e+09, + 5.611192e+08, -1.62469e+08, -1.041465e+09, -1.804217e+09, -2.923116e+09, -4.205691e+09, -5.09832e+09, -6.12155e+09, + -7.10503e+09, -7.957297e+09, -9.107372e+09, -1.039097e+10, -1.133152e+10, -1.221205e+10, -1.318018e+10, -1.402195e+10, + -1.512e+10, -1.634369e+10, -1.710999e+10, -1.786548e+10, -1.866482e+10, -1.938912e+10, -2.039964e+10, -2.160603e+10, + -2.259855e+10, -2.353314e+10, -2.449689e+10, -2.52005e+10, -2.627104e+10, -2.730019e+10, -2.815777e+10, -2.920027e+10, + -3.03507e+10, -3.126021e+10, -3.212383e+10, -3.329089e+10, -3.402306e+10, -3.475361e+10, -3.572698e+10, -3.644467e+10, + -3.721484e+10, -3.800023e+10, -3.865459e+10, -3.918282e+10, -3.983764e+10, -4.051065e+10, -4.119051e+10, -4.202436e+10, + -4.24868e+10, -4.340278e+10, -4.418258e+10, -4.490206e+10, -4.587365e+10, -4.697342e+10, -4.778222e+10, -4.882614e+10, + -4.984197e+10, -5.051089e+10, -5.143766e+10, -5.252824e+10, -5.353136e+10, -5.436329e+10, -5.533555e+10, -5.623246e+10, + -5.689744e+10, -5.798439e+10, -5.882786e+10, -5.96284e+10, -6.061507e+10, -6.145417e+10, -6.235327e+10, -6.335978e+10, + -6.405788e+10, -6.496648e+10, -6.600807e+10, -6.686964e+10, -6.782611e+10, -6.890904e+10, -6.941638e+10, -7.012465e+10, + -7.113145e+10, -7.186233e+10, -7.2293e+10, -7.313894e+10, -7.394114e+10, -7.475566e+10, -7.572029e+10, -7.660066e+10, + -7.738602e+10, -7.846013e+10, -7.921084e+10, -7.986093e+10, -8.07113e+10, -8.159104e+10, -8.243174e+10, -8.305353e+10, + -8.346367e+10, -8.402575e+10, -8.482895e+10, -8.536747e+10, -8.581526e+10, -8.640365e+10, -8.683093e+10, -8.724777e+10, + -8.746026e+10, -8.760338e+10, -8.809235e+10, -8.870936e+10, -8.905536e+10, -8.953669e+10, -9.031665e+10, -9.090067e+10, + -9.135409e+10, -9.185499e+10, -9.225697e+10, -9.253896e+10, -9.314785e+10, -9.354807e+10, -9.391591e+10, -9.436751e+10, + -9.471133e+10, -9.517393e+10, -9.587184e+10, -9.619209e+10, -9.607482e+10, -9.593427e+10, -9.604743e+10, -9.619758e+10, + -9.62449e+10, -9.61466e+10, -9.636941e+10, -9.692289e+10, -9.735416e+10, -9.774056e+10, -9.828883e+10, -9.859253e+10, + -9.888183e+10, -9.95351e+10, -1.001142e+11}; + + maths::CGammaRateConjugate gammaPrior = maths::CGammaRateConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.2, 0.001); + maths::CNormalMeanPrecConjugate normalPrior = maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.001); maths::CLogNormalMeanPrecConjugate logNormalPrior = - maths::CLogNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.2, 0.001); + maths::CLogNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.2, 0.001); maths::COneOfNPrior::TPriorPtrVec modePriors; modePriors.reserve(3u); @@ -1747,27 +1466,17 @@ void CMultimodalPriorTest::testLargeValues() modePriors.push_back(TPriorPtr(logNormalPrior.clone())); modePriors.push_back(TPriorPtr(normalPrior.clone())); maths::COneOfNPrior modePrior(modePriors, maths_t::E_ContinuousData, 0.001); - maths::CXMeansOnline1d clusterer(maths_t::E_ContinuousData, - maths::CAvailableModeDistributions::ALL, - maths_t::E_ClustersFractionWeight, - 0.001, 0.05, 12, 0.8 / 3.0); + maths::CXMeansOnline1d clusterer( + maths_t::E_ContinuousData, maths::CAvailableModeDistributions::ALL, maths_t::E_ClustersFractionWeight, 0.001, 0.05, 12, 0.8 / 3.0); maths::CMultimodalPrior multimodalPrior(maths_t::E_ContinuousData, clusterer, modePrior, 0.001); - for (auto value : values) - { + for (auto value : values) { - multimodalPrior.addSamples(maths::CConstantWeights::COUNT, - TDouble1Vec(1, value), - TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0/3.0))); - if (!multimodalPrior.isNonInformative()) - { + multimodalPrior.addSamples(maths::CConstantWeights::COUNT, TDouble1Vec(1, value), TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0 / 3.0))); + if (!multimodalPrior.isNonInformative()) { TDoubleDoublePr interval = - multimodalPrior.marginalLikelihoodConfidenceInterval( - 95.0, - maths::CConstantWeights::COUNT, - maths::CConstantWeights::UNIT); - if (interval.second - interval.first >= 3e11) - { + multimodalPrior.marginalLikelihoodConfidenceInterval(95.0, maths::CConstantWeights::COUNT, maths::CConstantWeights::UNIT); + if (interval.second - interval.first >= 3e11) { LOG_DEBUG("interval = " << interval.second - interval.first); LOG_DEBUG(multimodalPrior.print()); } @@ -1776,8 +1485,7 @@ void CMultimodalPriorTest::testLargeValues() } } -void CMultimodalPriorTest::testSeasonalVarianceScale() -{ +void CMultimodalPriorTest::testSeasonalVarianceScale() { LOG_DEBUG("+---------------------------------------------------+"); LOG_DEBUG("| CMultimodalPriorTest::testSeasonalVarianceScale |"); LOG_DEBUG("+---------------------------------------------------+"); @@ -1807,7 +1515,7 @@ void CMultimodalPriorTest::testSeasonalVarianceScale() TDoubleVec samples3; rng.generateNormalSamples(mean3, variance3, 100, samples3); - double varianceScales[] = { 0.2, 0.5, 1.0, 2.0, 5.0 }; + double varianceScales[] = {0.2, 0.5, 1.0, 2.0, 5.0}; maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleSeasonalVarianceScaleWeight); TDouble4Vec weight(1, 1.0); TDouble4Vec1Vec weights(1, weight); @@ -1825,19 +1533,14 @@ void CMultimodalPriorTest::testSeasonalVarianceScale() v = filter.marginalLikelihoodVariance(); LOG_DEBUG("v = " << v); - double points[] = { 0.5, 4.0, 12.0, 20.0, 40.0, 50.0, 60.0 }; + double points[] = {0.5, 4.0, 12.0, 20.0, 40.0, 50.0, 60.0}; double unscaledExpectationVariance; - filter.expectation(CVarianceKernel(filter.marginalLikelihoodMean()), - 50, - unscaledExpectationVariance); + filter.expectation(CVarianceKernel(filter.marginalLikelihoodMean()), 50, unscaledExpectationVariance); LOG_DEBUG("unscaledExpectationVariance = " << unscaledExpectationVariance); - CPPUNIT_ASSERT_DOUBLES_EQUAL(v, - unscaledExpectationVariance, - 1e-2 * unscaledExpectationVariance); + CPPUNIT_ASSERT_DOUBLES_EQUAL(v, unscaledExpectationVariance, 1e-2 * unscaledExpectationVariance); - for (std::size_t i = 0u; i < boost::size(varianceScales); ++i) - { + for (std::size_t i = 0u; i < boost::size(varianceScales); ++i) { double vs = varianceScales[i]; weight[0] = vs; weights[0][0] = vs; @@ -1850,22 +1553,15 @@ void CMultimodalPriorTest::testSeasonalVarianceScale() LOG_DEBUG("sv = " << filter.marginalLikelihoodVariance(weightStyle, weight)); double expectationVariance; - filter.expectation(CVarianceKernel(filter.marginalLikelihoodMean()), - 50, - expectationVariance, - weightStyle, - weight); + filter.expectation(CVarianceKernel(filter.marginalLikelihoodMean()), 50, expectationVariance, weightStyle, weight); LOG_DEBUG("expectationVariance = " << expectationVariance); - CPPUNIT_ASSERT_DOUBLES_EQUAL(vs * unscaledExpectationVariance, - expectationVariance, - 1e-3 * vs * unscaledExpectationVariance); + CPPUNIT_ASSERT_DOUBLES_EQUAL(vs * unscaledExpectationVariance, expectationVariance, 1e-3 * vs * unscaledExpectationVariance); CPPUNIT_ASSERT_DOUBLES_EQUAL(filter.marginalLikelihoodVariance(weightStyle, weight), expectationVariance, 1e-3 * filter.marginalLikelihoodVariance(weightStyle, weight)); TDouble1Vec sample(1, 0.0); - for (std::size_t j = 0u; j < boost::size(points); ++j) - { + for (std::size_t j = 0u; j < boost::size(points); ++j) { TDouble1Vec x(1, points[j]); double fx; filter.jointLogMarginalLikelihood(weightStyle, x, weights, fx); @@ -1876,9 +1572,7 @@ void CMultimodalPriorTest::testSeasonalVarianceScale() double FxPlusEps = std::exp(-(lb + ub) / 2.0); filter.minusLogJointCdf(weightStyle, xMinusEps, weights, lb, ub); double FxMinusEps = std::exp(-(lb + ub) / 2.0); - LOG_DEBUG("x = " << points[j] - << ", log(f(x)) = " << fx - << ", log(dF/dx)) = " << std::log((FxPlusEps - FxMinusEps) / 2e-3)); + LOG_DEBUG("x = " << points[j] << ", log(f(x)) = " << fx << ", log(dF/dx)) = " << std::log((FxPlusEps - FxMinusEps) / 2e-3)); CPPUNIT_ASSERT_DOUBLES_EQUAL(fx, std::log((FxPlusEps - FxMinusEps) / 2e-3), 0.05 * std::fabs(fx)); sample[0] = m + (points[j] - m) / std::sqrt(vs); @@ -1886,22 +1580,15 @@ void CMultimodalPriorTest::testSeasonalVarianceScale() double expectedLowerBound; double expectedUpperBound; maths_t::ETail expectedTail; - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - weightStyle, - sample, - weights, - expectedLowerBound, expectedUpperBound, expectedTail); + filter.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, weightStyle, sample, weights, expectedLowerBound, expectedUpperBound, expectedTail); sample[0] = points[j]; weights[0][0] = vs; double lowerBound; double upperBound; maths_t::ETail tail; - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - weightStyle, - sample, - weights, - lowerBound, upperBound, tail); + filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, weightStyle, sample, weights, lowerBound, upperBound, tail); LOG_DEBUG("expectedLowerBound = " << expectedLowerBound); LOG_DEBUG("lowerBound = " << lowerBound); @@ -1910,30 +1597,20 @@ void CMultimodalPriorTest::testSeasonalVarianceScale() LOG_DEBUG("expectedTail = " << expectedTail); LOG_DEBUG("tail = " << tail); - if ((expectedLowerBound + expectedUpperBound) < 0.02) - { - CPPUNIT_ASSERT_DOUBLES_EQUAL(std::log(expectedLowerBound), - std::log(lowerBound), - 0.1 * std::fabs(std::log(expectedLowerBound))); - CPPUNIT_ASSERT_DOUBLES_EQUAL(std::log(expectedUpperBound), - std::log(upperBound), - 0.1 * std::fabs(std::log(expectedUpperBound))); - } - else - { - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedLowerBound, - lowerBound, - 0.05 * expectedLowerBound); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedUpperBound, - upperBound, - 0.05 * expectedUpperBound); + if ((expectedLowerBound + expectedUpperBound) < 0.02) { + CPPUNIT_ASSERT_DOUBLES_EQUAL( + std::log(expectedLowerBound), std::log(lowerBound), 0.1 * std::fabs(std::log(expectedLowerBound))); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + std::log(expectedUpperBound), std::log(upperBound), 0.1 * std::fabs(std::log(expectedUpperBound))); + } else { + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedLowerBound, lowerBound, 0.05 * expectedLowerBound); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedUpperBound, upperBound, 0.05 * expectedUpperBound); } CPPUNIT_ASSERT_EQUAL(expectedTail, tail); } } } - for (std::size_t i = 0u; i < boost::size(varianceScales); ++i) - { + for (std::size_t i = 0u; i < boost::size(varianceScales); ++i) { double vs = varianceScales[i]; TDouble1Vec samples(samples1.begin(), samples1.end()); @@ -1943,14 +1620,13 @@ void CMultimodalPriorTest::testSeasonalVarianceScale() CMultimodalPrior filter(makePrior()); weights[0][0] = vs; - for (std::size_t j = 0u; j < samples.size(); ++j) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { filter.addSamples(weightStyle, TDouble1Vec(1, samples[j]), weights); } double sm = filter.marginalLikelihoodMean(); double sv = filter.marginalLikelihoodVariance(); - LOG_DEBUG("m = " << m << ", v = " << v); + LOG_DEBUG("m = " << m << ", v = " << v); LOG_DEBUG("sm = " << sm << ", sv = " << sv); CPPUNIT_ASSERT_DOUBLES_EQUAL(m, sm, 0.12 * m); @@ -1958,8 +1634,7 @@ void CMultimodalPriorTest::testSeasonalVarianceScale() } } -void CMultimodalPriorTest::testPersist() -{ +void CMultimodalPriorTest::testPersist() { LOG_DEBUG("+-------------------------------------+"); LOG_DEBUG("| CMultimodalPriorTest::testPersist |"); LOG_DEBUG("+-------------------------------------+"); @@ -1975,15 +1650,10 @@ void CMultimodalPriorTest::testPersist() samples.insert(samples.end(), samples2.begin(), samples2.end()); rng.random_shuffle(samples.begin(), samples.end()); - maths::CXMeansOnline1d clusterer(maths_t::E_ContinuousData, - maths::CAvailableModeDistributions::ALL, - maths_t::E_ClustersFractionWeight); - maths::CGammaRateConjugate gamma = - maths::CGammaRateConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.01); - maths::CLogNormalMeanPrecConjugate logNormal = - maths::CLogNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.01); - maths::CNormalMeanPrecConjugate normal = - maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); + maths::CXMeansOnline1d clusterer(maths_t::E_ContinuousData, maths::CAvailableModeDistributions::ALL, maths_t::E_ClustersFractionWeight); + maths::CGammaRateConjugate gamma = maths::CGammaRateConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.01); + maths::CLogNormalMeanPrecConjugate logNormal = maths::CLogNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.01); + maths::CNormalMeanPrecConjugate normal = maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); COneOfNPrior::TPriorPtrVec priors; priors.push_back(COneOfNPrior::TPriorPtr(gamma.clone())); @@ -1991,14 +1661,10 @@ void CMultimodalPriorTest::testPersist() priors.push_back(COneOfNPrior::TPriorPtr(normal.clone())); COneOfNPrior modePrior(maths::COneOfNPrior(priors, maths_t::E_ContinuousData)); - maths::CMultimodalPrior origFilter(maths_t::E_ContinuousData, - clusterer, - modePrior); - for (std::size_t i = 0u; i < samples.size(); ++i) - { - origFilter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, samples[i]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0))); + maths::CMultimodalPrior origFilter(maths_t::E_ContinuousData, clusterer, modePrior); + for (std::size_t i = 0u; i < samples.size(); ++i) { + origFilter.addSamples( + maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), TDouble1Vec(1, samples[i]), TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0))); } double decayRate = origFilter.decayRate(); uint64_t checksum = origFilter.checksum(); @@ -2024,8 +1690,7 @@ void CMultimodalPriorTest::testPersist() maths::MINIMUM_CATEGORY_COUNT); maths::CMultimodalPrior restoredFilter(params, traverser); - LOG_DEBUG("orig checksum = " << checksum - << " restored checksum = " << restoredFilter.checksum()); + LOG_DEBUG("orig checksum = " << checksum << " restored checksum = " << restoredFilter.checksum()); CPPUNIT_ASSERT_EQUAL(checksum, restoredFilter.checksum()); // The XML representation of the new filter should be the same as the original @@ -2038,49 +1703,34 @@ void CMultimodalPriorTest::testPersist() CPPUNIT_ASSERT_EQUAL(origXml, newXml); } -CppUnit::Test *CMultimodalPriorTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CMultimodalPriorTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultimodalPriorTest::testMultipleUpdate", - &CMultimodalPriorTest::testMultipleUpdate) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultimodalPriorTest::testPropagation", - &CMultimodalPriorTest::testPropagation) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultimodalPriorTest::testSingleMode", - &CMultimodalPriorTest::testSingleMode) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultimodalPriorTest::testMultipleModes", - &CMultimodalPriorTest::testMultipleModes) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultimodalPriorTest::testMarginalLikelihood", - &CMultimodalPriorTest::testMarginalLikelihood) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultimodalPriorTest::testMarginalLikelihoodMode", - &CMultimodalPriorTest::testMarginalLikelihoodMode) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultimodalPriorTest::testMarginalLikelihoodConfidenceInterval", - &CMultimodalPriorTest::testMarginalLikelihoodConfidenceInterval) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultimodalPriorTest::testSampleMarginalLikelihood", - &CMultimodalPriorTest::testSampleMarginalLikelihood) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultimodalPriorTest::testCdf", - &CMultimodalPriorTest::testCdf) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultimodalPriorTest::testProbabilityOfLessLikelySamples", - &CMultimodalPriorTest::testProbabilityOfLessLikelySamples) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultimodalPriorTest::testSeasonalVarianceScale", - &CMultimodalPriorTest::testSeasonalVarianceScale) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultimodalPriorTest::testLargeValues", - &CMultimodalPriorTest::testLargeValues) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultimodalPriorTest::testPersist", - &CMultimodalPriorTest::testPersist) ); +CppUnit::Test* CMultimodalPriorTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CMultimodalPriorTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CMultimodalPriorTest::testMultipleUpdate", + &CMultimodalPriorTest::testMultipleUpdate)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CMultimodalPriorTest::testPropagation", &CMultimodalPriorTest::testPropagation)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CMultimodalPriorTest::testSingleMode", &CMultimodalPriorTest::testSingleMode)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CMultimodalPriorTest::testMultipleModes", &CMultimodalPriorTest::testMultipleModes)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMultimodalPriorTest::testMarginalLikelihood", + &CMultimodalPriorTest::testMarginalLikelihood)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMultimodalPriorTest::testMarginalLikelihoodMode", + &CMultimodalPriorTest::testMarginalLikelihoodMode)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMultimodalPriorTest::testMarginalLikelihoodConfidenceInterval", + &CMultimodalPriorTest::testMarginalLikelihoodConfidenceInterval)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMultimodalPriorTest::testSampleMarginalLikelihood", + &CMultimodalPriorTest::testSampleMarginalLikelihood)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMultimodalPriorTest::testCdf", &CMultimodalPriorTest::testCdf)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMultimodalPriorTest::testProbabilityOfLessLikelySamples", + &CMultimodalPriorTest::testProbabilityOfLessLikelySamples)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMultimodalPriorTest::testSeasonalVarianceScale", + &CMultimodalPriorTest::testSeasonalVarianceScale)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CMultimodalPriorTest::testLargeValues", &CMultimodalPriorTest::testLargeValues)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CMultimodalPriorTest::testPersist", &CMultimodalPriorTest::testPersist)); return suiteOfTests; } diff --git a/lib/maths/unittest/CMultimodalPriorTest.h b/lib/maths/unittest/CMultimodalPriorTest.h index ac26567fea..c0b77b26ea 100644 --- a/lib/maths/unittest/CMultimodalPriorTest.h +++ b/lib/maths/unittest/CMultimodalPriorTest.h @@ -9,24 +9,23 @@ #include -class CMultimodalPriorTest : public CppUnit::TestFixture -{ - public: - void testMultipleUpdate(); - void testPropagation(); - void testSingleMode(); - void testMultipleModes(); - void testMarginalLikelihood(); - void testMarginalLikelihoodMode(); - void testMarginalLikelihoodConfidenceInterval(); - void testSampleMarginalLikelihood(); - void testCdf(); - void testProbabilityOfLessLikelySamples(); - void testSeasonalVarianceScale(); - void testLargeValues(); - void testPersist(); +class CMultimodalPriorTest : public CppUnit::TestFixture { +public: + void testMultipleUpdate(); + void testPropagation(); + void testSingleMode(); + void testMultipleModes(); + void testMarginalLikelihood(); + void testMarginalLikelihoodMode(); + void testMarginalLikelihoodConfidenceInterval(); + void testSampleMarginalLikelihood(); + void testCdf(); + void testProbabilityOfLessLikelySamples(); + void testSeasonalVarianceScale(); + void testLargeValues(); + void testPersist(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CMultimodalPriorTest_h diff --git a/lib/maths/unittest/CMultinomialConjugateTest.cc b/lib/maths/unittest/CMultinomialConjugateTest.cc index 932fe51e0f..f0d34d5051 100644 --- a/lib/maths/unittest/CMultinomialConjugateTest.cc +++ b/lib/maths/unittest/CMultinomialConjugateTest.cc @@ -7,12 +7,12 @@ #include "CMultinomialConjugateTest.h" #include +#include +#include #include #include #include #include -#include -#include #include #include @@ -40,8 +40,7 @@ using TDoubleDoublePr = std::pair; using TDoubleDoublePrVec = std::vector; using CMultinomialConjugate = CPriorTestInterfaceMixin; -void CMultinomialConjugateTest::testMultipleUpdate() -{ +void CMultinomialConjugateTest::testMultipleUpdate() { LOG_DEBUG("+-------------------------------------------------+"); LOG_DEBUG("| CMultinomialConjugateTest::testMultipleUpdate |"); LOG_DEBUG("+-------------------------------------------------+"); @@ -49,8 +48,8 @@ void CMultinomialConjugateTest::testMultipleUpdate() // Test that we get the same result updating once with a vector of 100 // samples of an R.V. versus updating individually 100 times. - const double rawCategories[] = { -1.2, 5.1, 2.0, 18.0, 10.3 }; - const double rawProbabilities[] = { 0.17, 0.13, 0.35, 0.3, 0.05 }; + const double rawCategories[] = {-1.2, 5.1, 2.0, 18.0, 10.3}; + const double rawProbabilities[] = {0.17, 0.13, 0.35, 0.3, 0.05}; const TDoubleVec categories(boost::begin(rawCategories), boost::end(rawCategories)); const TDoubleVec probabilities(boost::begin(rawProbabilities), boost::end(rawProbabilities)); @@ -62,8 +61,7 @@ void CMultinomialConjugateTest::testMultipleUpdate() CMultinomialConjugate filter1(CMultinomialConjugate::nonInformativePrior(5u)); CMultinomialConjugate filter2(filter1); - for (std::size_t j = 0u; j < samples.size(); ++j) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { filter1.addSamples(TDouble1Vec(1, samples[j])); } filter2.addSamples(samples); @@ -73,8 +71,7 @@ void CMultinomialConjugateTest::testMultipleUpdate() CPPUNIT_ASSERT(filter1.equalTolerance(filter2, equal)); } -void CMultinomialConjugateTest::testPropagation() -{ +void CMultinomialConjugateTest::testPropagation() { LOG_DEBUG("+----------------------------------------------+"); LOG_DEBUG("| CMultinomialConjugateTest::testPropagation |"); LOG_DEBUG("+----------------------------------------------+"); @@ -82,8 +79,8 @@ void CMultinomialConjugateTest::testPropagation() // Test that propagation doesn't affect the expected values // of probabilities. - const double rawCategories[] = { 0.0, 1.1, 2.0 }; - const double rawProbabilities[] = { 0.27, 0.13, 0.6 }; + const double rawCategories[] = {0.0, 1.1, 2.0}; + const double rawProbabilities[] = {0.27, 0.13, 0.6}; const TDoubleVec categories(boost::begin(rawCategories), boost::end(rawCategories)); const TDoubleVec probabilities(boost::begin(rawProbabilities), boost::end(rawProbabilities)); @@ -94,8 +91,7 @@ void CMultinomialConjugateTest::testPropagation() CMultinomialConjugate filter(CMultinomialConjugate::nonInformativePrior(5u)); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { filter.addSamples(TDouble1Vec(1, static_cast(samples[i]))); } @@ -105,20 +101,15 @@ void CMultinomialConjugateTest::testPropagation() TDoubleVec propagatedExpectedProbabilities = filter.probabilities(); - LOG_DEBUG("expectedProbabilities = " - << core::CContainerPrinter::print(expectedProbabilities) - << ", propagatedExpectedProbabilities = " - << core::CContainerPrinter::print(propagatedExpectedProbabilities)); + LOG_DEBUG("expectedProbabilities = " << core::CContainerPrinter::print(expectedProbabilities) << ", propagatedExpectedProbabilities = " + << core::CContainerPrinter::print(propagatedExpectedProbabilities)); using TEqual = maths::CEqualWithTolerance; TEqual equal(maths::CToleranceTypes::E_AbsoluteTolerance, 1e-12); - CPPUNIT_ASSERT(std::equal(expectedProbabilities.begin(), - expectedProbabilities.end(), - propagatedExpectedProbabilities.begin(), equal)); + CPPUNIT_ASSERT(std::equal(expectedProbabilities.begin(), expectedProbabilities.end(), propagatedExpectedProbabilities.begin(), equal)); } -void CMultinomialConjugateTest::testProbabilityEstimation() -{ +void CMultinomialConjugateTest::testProbabilityEstimation() { LOG_DEBUG("+--------------------------------------------------------+"); LOG_DEBUG("| CMultinomialConjugateTest::testProbabilityEstimation |"); LOG_DEBUG("+--------------------------------------------------------+"); @@ -128,102 +119,72 @@ void CMultinomialConjugateTest::testProbabilityEstimation() // the true probabilities lie in various confidence intervals the // correct percentage of the times. - const double rawCategories[] = { 0.0, 1.1, 2.0, 5.0, 12.0, 15.0 }; - const double rawProbabilities[] = { 0.1, 0.15, 0.12, 0.31, 0.03, 0.29 }; - const TDoubleVec categories(boost::begin(rawCategories), - boost::end(rawCategories)); - const TDoubleVec probabilities(boost::begin(rawProbabilities), - boost::end(rawProbabilities)); + const double rawCategories[] = {0.0, 1.1, 2.0, 5.0, 12.0, 15.0}; + const double rawProbabilities[] = {0.1, 0.15, 0.12, 0.31, 0.03, 0.29}; + const TDoubleVec categories(boost::begin(rawCategories), boost::end(rawCategories)); + const TDoubleVec probabilities(boost::begin(rawProbabilities), boost::end(rawProbabilities)); - const double decayRates[] = { 0.0, 0.001, 0.01 }; + const double decayRates[] = {0.0, 0.001, 0.01}; const unsigned int nTests = 5000u; - const double testIntervals[] = { 50.0, 60.0, 70.0, 80.0, 85.0, 90.0, 95.0, 99.0 }; + const double testIntervals[] = {50.0, 60.0, 70.0, 80.0, 85.0, 90.0, 95.0, 99.0}; - for (size_t i = 0; i < boost::size(decayRates); ++i) - { + for (size_t i = 0; i < boost::size(decayRates); ++i) { test::CRandomNumbers rng; - TUIntVec errors[] = - { - TUIntVec(6, 0), - TUIntVec(6, 0), - TUIntVec(6, 0), - TUIntVec(6, 0), - TUIntVec(6, 0), - TUIntVec(6, 0), - TUIntVec(6, 0), - TUIntVec(6, 0) - }; - - for (unsigned int test = 0; test < nTests; ++test) - { + TUIntVec errors[] = { + TUIntVec(6, 0), TUIntVec(6, 0), TUIntVec(6, 0), TUIntVec(6, 0), TUIntVec(6, 0), TUIntVec(6, 0), TUIntVec(6, 0), TUIntVec(6, 0)}; + + for (unsigned int test = 0; test < nTests; ++test) { TDoubleVec samples; rng.generateMultinomialSamples(categories, probabilities, 500, samples); CMultinomialConjugate filter(CMultinomialConjugate::nonInformativePrior(6, decayRates[i])); - for (std::size_t j = 0u; j < samples.size(); ++j) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { filter.addSamples(TDouble1Vec(1, samples[j])); filter.propagateForwardsByTime(1.0); } - for (size_t j = 0u; j < boost::size(testIntervals); ++j) - { - TDoubleDoublePrVec confidenceIntervals = - filter.confidenceIntervalProbabilities(testIntervals[j]); + for (size_t j = 0u; j < boost::size(testIntervals); ++j) { + TDoubleDoublePrVec confidenceIntervals = filter.confidenceIntervalProbabilities(testIntervals[j]); CPPUNIT_ASSERT_EQUAL(confidenceIntervals.size(), probabilities.size()); - for (std::size_t k = 0u; k < probabilities.size(); ++k) - { - if (probabilities[k] < confidenceIntervals[k].first - || probabilities[k] > confidenceIntervals[k].second) - { + for (std::size_t k = 0u; k < probabilities.size(); ++k) { + if (probabilities[k] < confidenceIntervals[k].first || probabilities[k] > confidenceIntervals[k].second) { ++errors[j][k]; } } } } - for (size_t j = 0; j < boost::size(testIntervals); ++j) - { + for (size_t j = 0; j < boost::size(testIntervals); ++j) { TDoubleVec intervals; intervals.reserve(errors[j].size()); - for (std::size_t k = 0u; k < errors[j].size(); ++k) - { + for (std::size_t k = 0u; k < errors[j].size(); ++k) { intervals.push_back(100.0 * errors[j][k] / static_cast(nTests)); } - LOG_DEBUG("interval = " << core::CContainerPrinter::print(intervals) - << ", expectedInterval = " << (100.0 - testIntervals[j])); + LOG_DEBUG("interval = " << core::CContainerPrinter::print(intervals) << ", expectedInterval = " << (100.0 - testIntervals[j])); // If the decay rate is zero the intervals should be accurate. // Otherwise, they should be an upper bound. double meanError = 0.0; - for (std::size_t k = 0u; k < intervals.size(); ++k) - { - if (decayRates[i] == 0.0) - { - CPPUNIT_ASSERT_DOUBLES_EQUAL(intervals[k], - 100.0 - testIntervals[j], - std::min(5.0, 0.4 * (100.0 - testIntervals[j]))); + for (std::size_t k = 0u; k < intervals.size(); ++k) { + if (decayRates[i] == 0.0) { + CPPUNIT_ASSERT_DOUBLES_EQUAL(intervals[k], 100.0 - testIntervals[j], std::min(5.0, 0.4 * (100.0 - testIntervals[j]))); meanError += std::fabs(intervals[k] - (100.0 - testIntervals[j])); - } - else - { + } else { CPPUNIT_ASSERT(intervals[k] <= (100.0 - testIntervals[j])); } } meanError /= static_cast(intervals.size()); LOG_DEBUG("meanError = " << meanError); - CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, meanError, - std::min(2.0, 0.2 * (100.0 - testIntervals[j]))); + CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, meanError, std::min(2.0, 0.2 * (100.0 - testIntervals[j]))); } } } -void CMultinomialConjugateTest::testMarginalLikelihood() -{ +void CMultinomialConjugateTest::testMarginalLikelihood() { LOG_DEBUG("+-----------------------------------------------------+"); LOG_DEBUG("| CMultinomialConjugateTest::testMarginalLikelihood |"); LOG_DEBUG("+-----------------------------------------------------+"); @@ -235,46 +196,37 @@ void CMultinomialConjugateTest::testMarginalLikelihood() test::CRandomNumbers rng; - const double rawCategories[] = { 0.0, 1.0, 2.0 }; - const double rawProbabilities[] = { 0.15, 0.5, 0.35 }; - const TDoubleVec categories(boost::begin(rawCategories), - boost::end(rawCategories)); - const TDoubleVec probabilities(boost::begin(rawProbabilities), - boost::end(rawProbabilities)); + const double rawCategories[] = {0.0, 1.0, 2.0}; + const double rawProbabilities[] = {0.15, 0.5, 0.35}; + const TDoubleVec categories(boost::begin(rawCategories), boost::end(rawCategories)); + const TDoubleVec probabilities(boost::begin(rawProbabilities), boost::end(rawProbabilities)); TDoubleVec samples; rng.generateMultinomialSamples(categories, probabilities, 50, samples); - const double decayRates[] = { 0.0, 0.001, 0.01 }; + const double decayRates[] = {0.0, 0.001, 0.01}; - for (size_t i = 0; i < boost::size(decayRates); ++i) - { + for (size_t i = 0; i < boost::size(decayRates); ++i) { LOG_DEBUG("**** Decay rate = " << decayRates[i] << " ****"); CMultinomialConjugate filter(CMultinomialConjugate::nonInformativePrior(3, decayRates[i])); - for (std::size_t j = 0u; j < samples.size(); ++j) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { TDouble1Vec sample(1, samples[j]); filter.addSamples(sample); filter.propagateForwardsByTime(1.0); double logp; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, - filter.jointLogMarginalLikelihood(sample, logp)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter.jointLogMarginalLikelihood(sample, logp)); - const TDoubleVec &filterCategories = filter.categories(); - std::size_t k = std::lower_bound(filterCategories.begin(), - filterCategories.end(), - samples[j]) - filterCategories.begin(); + const TDoubleVec& filterCategories = filter.categories(); + std::size_t k = std::lower_bound(filterCategories.begin(), filterCategories.end(), samples[j]) - filterCategories.begin(); TDoubleVec filterProbabilities(filter.probabilities()); CPPUNIT_ASSERT(k < filterProbabilities.size()); double p = filterProbabilities[k]; - LOG_DEBUG("sample = " << samples[j] - << ", expected likelihood = " << p - << ", likelihood = " << std::exp(logp)); + LOG_DEBUG("sample = " << samples[j] << ", expected likelihood = " << p << ", likelihood = " << std::exp(logp)); CPPUNIT_ASSERT_DOUBLES_EQUAL(p, std::exp(logp), 1e-12); } @@ -296,24 +248,19 @@ void CMultinomialConjugateTest::testMarginalLikelihood() test::CRandomNumbers rng; - const double rawCategories[] = { 0.0, 1.0, 2.0 }; - const double rawProbabilities[] = { 0.1, 0.6, 0.3 }; - const TDoubleVec categories(boost::begin(rawCategories), - boost::end(rawCategories)); - const TDoubleVec probabilities(boost::begin(rawProbabilities), - boost::end(rawProbabilities)); + const double rawCategories[] = {0.0, 1.0, 2.0}; + const double rawProbabilities[] = {0.1, 0.6, 0.3}; + const TDoubleVec categories(boost::begin(rawCategories), boost::end(rawCategories)); + const TDoubleVec probabilities(boost::begin(rawProbabilities), boost::end(rawProbabilities)); // Compute the outer products of size 2 and 3. TDoubleVecVec o2, o3; - for (std::size_t i = 0u; i < categories.size(); ++i) - { - for (std::size_t j = i; j < categories.size(); ++j) - { + for (std::size_t i = 0u; i < categories.size(); ++i) { + for (std::size_t j = i; j < categories.size(); ++j) { o2.push_back(TDoubleVec()); o2.back().push_back(categories[i]); o2.back().push_back(categories[j]); - for (std::size_t k = j; k < categories.size(); ++k) - { + for (std::size_t k = j; k < categories.size(); ++k) { o3.push_back(TDoubleVec()); o3.back().push_back(categories[i]); o3.back().push_back(categories[j]); @@ -324,9 +271,8 @@ void CMultinomialConjugateTest::testMarginalLikelihood() LOG_DEBUG("o2 = " << core::CContainerPrinter::print(o2)); LOG_DEBUG("o3 = " << core::CContainerPrinter::print(o3)); - double rawConcentrations[] = { 1000.0, 6000.0, 3000.0 }; - TDoubleVec concentrations(boost::begin(rawConcentrations), - boost::end(rawConcentrations)); + double rawConcentrations[] = {1000.0, 6000.0, 3000.0}; + TDoubleVec concentrations(boost::begin(rawConcentrations), boost::end(rawConcentrations)); CMultinomialConjugate filter(maths::CMultinomialConjugate(3, categories, concentrations)); @@ -336,15 +282,12 @@ void CMultinomialConjugateTest::testMarginalLikelihood() // Compute the likelihoods of the various 2-category combinations. TDoubleVec p2; - for (std::size_t i = 0u; i < o2.size(); ++i) - { + for (std::size_t i = 0u; i < o2.size(); ++i) { double p; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, - filter.jointLogMarginalLikelihood(o2[i], p)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter.jointLogMarginalLikelihood(o2[i], p)); p = std::exp(p); p2.push_back(p); - LOG_DEBUG("categories = " << core::CContainerPrinter::print(o2[i]) - << ", p = " << p); + LOG_DEBUG("categories = " << core::CContainerPrinter::print(o2[i]) << ", p = " << p); } CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, std::accumulate(p2.begin(), p2.end(), 0.0), 1e-10); @@ -353,8 +296,7 @@ void CMultinomialConjugateTest::testMarginalLikelihood() TDoubleVec samples; rng.generateMultinomialSamples(categories, probabilities, 2 * nTests, samples); - for (unsigned int test = 0u; test < nTests; ++test) - { + for (unsigned int test = 0u; test < nTests; ++test) { TDoubleVec sample; sample.push_back(samples[2 * test]); sample.push_back(samples[2 * test + 1]); @@ -365,13 +307,10 @@ void CMultinomialConjugateTest::testMarginalLikelihood() frequencies[i] += 1.0; } - for (std::size_t i = 0u; i < o2.size(); ++i) - { + for (std::size_t i = 0u; i < o2.size(); ++i) { double p = frequencies[i] / static_cast(nTests); - LOG_DEBUG("category = " << core::CContainerPrinter::print(o2[i]) - << ", p = " << p - << ", expected p = " << p2[i]); + LOG_DEBUG("category = " << core::CContainerPrinter::print(o2[i]) << ", p = " << p << ", expected p = " << p2[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(p, p2[i], 0.05 * std::max(p, p2[i])); } } @@ -379,15 +318,12 @@ void CMultinomialConjugateTest::testMarginalLikelihood() // Compute the likelihoods of the various 3-category combinations. TDoubleVec p3; - for (std::size_t i = 0u; i < o3.size(); ++i) - { + for (std::size_t i = 0u; i < o3.size(); ++i) { double p; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, - filter.jointLogMarginalLikelihood(o3[i], p)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter.jointLogMarginalLikelihood(o3[i], p)); p = std::exp(p); p3.push_back(p); - LOG_DEBUG("categories = " << core::CContainerPrinter::print(o3[i]) - << ", p = " << p); + LOG_DEBUG("categories = " << core::CContainerPrinter::print(o3[i]) << ", p = " << p); } CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, std::accumulate(p3.begin(), p3.end(), 0.0), 1e-10); @@ -396,8 +332,7 @@ void CMultinomialConjugateTest::testMarginalLikelihood() TDoubleVec samples; rng.generateMultinomialSamples(categories, probabilities, 3 * nTests, samples); - for (unsigned int test = 0u; test < nTests; ++test) - { + for (unsigned int test = 0u; test < nTests; ++test) { TDoubleVec sample; sample.push_back(samples[3 * test]); sample.push_back(samples[3 * test + 1]); @@ -409,21 +344,17 @@ void CMultinomialConjugateTest::testMarginalLikelihood() frequencies[i] += 1.0; } - for (std::size_t i = 0u; i < o3.size(); ++i) - { + for (std::size_t i = 0u; i < o3.size(); ++i) { double p = frequencies[i] / static_cast(nTests); - LOG_DEBUG("category = " << core::CContainerPrinter::print(o3[i]) - << ", p = " << p - << ", expected p = " << p3[i]); + LOG_DEBUG("category = " << core::CContainerPrinter::print(o3[i]) << ", p = " << p << ", expected p = " << p3[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(p, p3[i], 0.05 * std::max(p, p3[i])); } } } } -void CMultinomialConjugateTest::testSampleMarginalLikelihood() -{ +void CMultinomialConjugateTest::testSampleMarginalLikelihood() { LOG_DEBUG("+-----------------------------------------------------------+"); LOG_DEBUG("| CMultinomialConjugateTest::testSampleMarginalLikelihood |"); LOG_DEBUG("+-----------------------------------------------------------+"); @@ -439,9 +370,8 @@ void CMultinomialConjugateTest::testSampleMarginalLikelihood() // small as possible. { - const double rawCategories[] = { 1.1, 1.2, 2.1, 2.2 }; - const TDoubleVec categories(boost::begin(rawCategories), - boost::end(rawCategories)); + const double rawCategories[] = {1.1, 1.2, 2.1, 2.2}; + const TDoubleVec categories(boost::begin(rawCategories), boost::end(rawCategories)); // The probabilities {P(i)} are proportional to the number of samples // of each category we add to the filter. @@ -459,14 +389,12 @@ void CMultinomialConjugateTest::testSampleMarginalLikelihood() LOG_DEBUG("samples = " << core::CContainerPrinter::print(samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[1.1, 1.1, 1.1, 1.2, 2.1, 2.1, 2.2, 2.2, 2.2, 2.2]"), - core::CContainerPrinter::print(samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[1.1, 1.1, 1.1, 1.2, 2.1, 2.1, 2.2, 2.2, 2.2, 2.2]"), core::CContainerPrinter::print(samples)); } { - const double rawCategories[] = { 1.1, 1.2, 2.1, 2.2, 3.2, 5.1 }; - const TDoubleVec categories(boost::begin(rawCategories), - boost::end(rawCategories)); + const double rawCategories[] = {1.1, 1.2, 2.1, 2.2, 3.2, 5.1}; + const TDoubleVec categories(boost::begin(rawCategories), boost::end(rawCategories)); CMultinomialConjugate filter(CMultinomialConjugate::nonInformativePrior(6u)); @@ -483,14 +411,12 @@ void CMultinomialConjugateTest::testSampleMarginalLikelihood() LOG_DEBUG("samples = " << core::CContainerPrinter::print(samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[1.1, 1.2, 1.2, 2.1, 2.1, 2.2, 2.2, 2.2, 2.2, 5.1]"), - core::CContainerPrinter::print(samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[1.1, 1.2, 1.2, 2.1, 2.1, 2.2, 2.2, 2.2, 2.2, 5.1]"), core::CContainerPrinter::print(samples)); } { - const double rawCategories[] = { 1.1, 1.2, 2.1, 2.2, 3.2, 5.1 }; - const TDoubleVec categories(boost::begin(rawCategories), - boost::end(rawCategories)); + const double rawCategories[] = {1.1, 1.2, 2.1, 2.2, 3.2, 5.1}; + const TDoubleVec categories(boost::begin(rawCategories), boost::end(rawCategories)); CMultinomialConjugate filter(CMultinomialConjugate::nonInformativePrior(6u)); @@ -507,13 +433,11 @@ void CMultinomialConjugateTest::testSampleMarginalLikelihood() LOG_DEBUG("samples = " << core::CContainerPrinter::print(samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[1.1, 1.2, 1.2, 2.1, 2.1, 2.2, 2.2, 2.2, 2.2, 3.2]"), - core::CContainerPrinter::print(samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[1.1, 1.2, 1.2, 2.1, 2.1, 2.2, 2.2, 2.2, 2.2, 3.2]"), core::CContainerPrinter::print(samples)); } } -void CMultinomialConjugateTest::testProbabilityOfLessLikelySamples() -{ +void CMultinomialConjugateTest::testProbabilityOfLessLikelySamples() { LOG_DEBUG("+-----------------------------------------------------------------+"); LOG_DEBUG("| CMultinomialConjugateTest::testProbabilityOfLessLikelySamples |"); LOG_DEBUG("+-----------------------------------------------------------------+"); @@ -533,9 +457,8 @@ void CMultinomialConjugateTest::testProbabilityOfLessLikelySamples() LOG_DEBUG("**** two sided ****"); { - const double rawCategories[] = { 1.1, 1.2, 2.1, 2.2, 3.2, 5.1 }; - const TDoubleVec categories(boost::begin(rawCategories), - boost::end(rawCategories)); + const double rawCategories[] = {1.1, 1.2, 2.1, 2.2, 3.2, 5.1}; + const TDoubleVec categories(boost::begin(rawCategories), boost::end(rawCategories)); { CMultinomialConjugate filter(CMultinomialConjugate::nonInformativePrior(6u)); @@ -555,10 +478,10 @@ void CMultinomialConjugateTest::testProbabilityOfLessLikelySamples() TDouble4Vec1Vec(1, TDouble4Vec(1, 39000.0))); // P = 0.39 filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), TDouble1Vec(1, categories[4]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 4000.0))); // P = 0.04 + TDouble4Vec1Vec(1, TDouble4Vec(1, 4000.0))); // P = 0.04 filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), TDouble1Vec(1, categories[5]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 6000.0))); // P = 0.06 + TDouble4Vec1Vec(1, TDouble4Vec(1, 6000.0))); // P = 0.06 // We expect the following probabilities for each category: // P(1.1) = 0.20 @@ -567,20 +490,14 @@ void CMultinomialConjugateTest::testProbabilityOfLessLikelySamples() // P(2.2) = 1.00 // P(3.2) = 0.04 // P(5.1) = 0.10 - double expectedProbabilities[] = { 0.20, 0.32, 0.61, 1.0, 0.04, 0.10 }; + double expectedProbabilities[] = {0.20, 0.32, 0.61, 1.0, 0.04, 0.10}; - for (size_t i = 0; i < boost::size(categories); ++i) - { + for (size_t i = 0; i < boost::size(categories); ++i) { double lowerBound, upperBound; - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - TDouble1Vec(1, categories[i]), - lowerBound, - upperBound); + filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, TDouble1Vec(1, categories[i]), lowerBound, upperBound); - LOG_DEBUG("category = " << categories[i] - << ", lower bound = " << lowerBound - << ", upper bound = " << upperBound - << ", expected probability = " << expectedProbabilities[i]); + LOG_DEBUG("category = " << categories[i] << ", lower bound = " << lowerBound << ", upper bound = " << upperBound + << ", expected probability = " << expectedProbabilities[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(lowerBound, expectedProbabilities[i], 1e-10); CPPUNIT_ASSERT_DOUBLES_EQUAL(upperBound, expectedProbabilities[i], 1e-10); @@ -605,30 +522,24 @@ void CMultinomialConjugateTest::testProbabilityOfLessLikelySamples() TDouble4Vec1Vec(1, TDouble4Vec(1, 39000.0))); // P = 0.39 filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), TDouble1Vec(1, categories[4]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 5000.0))); // P = 0.05 + TDouble4Vec1Vec(1, TDouble4Vec(1, 5000.0))); // P = 0.05 filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), TDouble1Vec(1, categories[5]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 5000.0))); // P = 0.05 + TDouble4Vec1Vec(1, TDouble4Vec(1, 5000.0))); // P = 0.05 // We expect the following probabilities for each category: // P(1.1) = P(1.2) = 0.32 // P(2.1) = 0.61 // P(2.2) = 1.00 // P(3.2) = P(5.1) = 0.10 - double expectedProbabilities[] = { 0.32, 0.32, 0.61, 1.0, 0.1, 0.1 }; + double expectedProbabilities[] = {0.32, 0.32, 0.61, 1.0, 0.1, 0.1}; - for (size_t i = 0; i < boost::size(categories); ++i) - { + for (size_t i = 0; i < boost::size(categories); ++i) { double lowerBound, upperBound; - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - TDouble1Vec(1, categories[i]), - lowerBound, - upperBound); + filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, TDouble1Vec(1, categories[i]), lowerBound, upperBound); - LOG_DEBUG("category = " << categories[i] - << ", lower bound = " << lowerBound - << ", upper bound = " << upperBound - << ", expected probability = " << expectedProbabilities[i]); + LOG_DEBUG("category = " << categories[i] << ", lower bound = " << lowerBound << ", upper bound = " << upperBound + << ", expected probability = " << expectedProbabilities[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(lowerBound, expectedProbabilities[i], 1e-10); CPPUNIT_ASSERT_DOUBLES_EQUAL(upperBound, expectedProbabilities[i], 1e-10); @@ -653,29 +564,23 @@ void CMultinomialConjugateTest::testProbabilityOfLessLikelySamples() TDouble4Vec1Vec(1, TDouble4Vec(1, 30000.0))); // P = 0.30 filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), TDouble1Vec(1, categories[4]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 5000.0))); // P = 0.05 + TDouble4Vec1Vec(1, TDouble4Vec(1, 5000.0))); // P = 0.05 filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), TDouble1Vec(1, categories[5]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 5000.0))); // P = 0.05 + TDouble4Vec1Vec(1, TDouble4Vec(1, 5000.0))); // P = 0.05 // We expect the following probabilities for each category: // P(1.1) = P(1.2) = 0.40 // P(2.1) = P(2.2) = 1.00 // P(3.2) = P(5.1) = 0.10 - double expectedProbabilities[] = { 0.4, 0.4, 1.0, 1.0, 0.1, 0.1 }; + double expectedProbabilities[] = {0.4, 0.4, 1.0, 1.0, 0.1, 0.1}; - for (size_t i = 0; i < boost::size(categories); ++i) - { + for (size_t i = 0; i < boost::size(categories); ++i) { double lowerBound, upperBound; - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - TDouble1Vec(1, categories[i]), - lowerBound, - upperBound); + filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, TDouble1Vec(1, categories[i]), lowerBound, upperBound); - LOG_DEBUG("category = " << categories[i] - << ", lower bound = " << lowerBound - << ", upper bound = " << upperBound - << ", expected probability = " << expectedProbabilities[i]); + LOG_DEBUG("category = " << categories[i] << ", lower bound = " << lowerBound << ", upper bound = " << upperBound + << ", expected probability = " << expectedProbabilities[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(lowerBound, expectedProbabilities[i], 1e-10); CPPUNIT_ASSERT_DOUBLES_EQUAL(upperBound, expectedProbabilities[i], 1e-10); @@ -691,20 +596,14 @@ void CMultinomialConjugateTest::testProbabilityOfLessLikelySamples() // We expect the following probabilities for each category: // P(1.1) = P(1.2) = P(2.1) = P(2.2) = 1.0 - double expectedProbabilities[] = { 0.95, 0.95, 0.95, 0.95 }; + double expectedProbabilities[] = {0.95, 0.95, 0.95, 0.95}; - for (size_t i = 0; i < boost::size(expectedProbabilities); ++i) - { + for (size_t i = 0; i < boost::size(expectedProbabilities); ++i) { double lowerBound, upperBound; - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - TDouble1Vec(1, categories[i]), - lowerBound, - upperBound); + filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, TDouble1Vec(1, categories[i]), lowerBound, upperBound); - LOG_DEBUG("category = " << categories[i] - << ", lower bound = " << lowerBound - << ", upper bound = " << upperBound - << ", expected probability = " << expectedProbabilities[i]); + LOG_DEBUG("category = " << categories[i] << ", lower bound = " << lowerBound << ", upper bound = " << upperBound + << ", expected probability = " << expectedProbabilities[i]); CPPUNIT_ASSERT(lowerBound > expectedProbabilities[i]); CPPUNIT_ASSERT(upperBound > expectedProbabilities[i]); @@ -716,41 +615,32 @@ void CMultinomialConjugateTest::testProbabilityOfLessLikelySamples() using TDoubleVecDoubleMap = std::map; using TDoubleVecDoubleMapCItr = TDoubleVecDoubleMap::const_iterator; - double categoryProbabilities[] = { 0.10, 0.12, 0.29, 0.39, 0.04, 0.06 }; + double categoryProbabilities[] = {0.10, 0.12, 0.29, 0.39, 0.04, 0.06}; TDoubleDoubleVecMap categoryPairProbabilities; - for (size_t i = 0u; i < boost::size(categories); ++i) - { - for (size_t j = i; j < boost::size(categories); ++j) - { - double p = (i != j ? 2.0 : 1.0) - * categoryProbabilities[i] * categoryProbabilities[j]; + for (size_t i = 0u; i < boost::size(categories); ++i) { + for (size_t j = i; j < boost::size(categories); ++j) { + double p = (i != j ? 2.0 : 1.0) * categoryProbabilities[i] * categoryProbabilities[j]; - TDoubleVec &categoryPair = categoryPairProbabilities.insert( - TDoubleDoubleVecMap::value_type(p, TDoubleVec())).first->second; + TDoubleVec& categoryPair = + categoryPairProbabilities.insert(TDoubleDoubleVecMap::value_type(p, TDoubleVec())).first->second; categoryPair.push_back(categories[i]); categoryPair.push_back(categories[j]); } } - LOG_DEBUG("category pair probabilities = " - << core::CContainerPrinter::print(categoryPairProbabilities)); + LOG_DEBUG("category pair probabilities = " << core::CContainerPrinter::print(categoryPairProbabilities)); double pc = 0.0; TDoubleVecDoubleMap trueProbabilities; - for (TDoubleDoubleVecMapCItr itr = categoryPairProbabilities.begin(); - itr != categoryPairProbabilities.end(); - ++itr) - { + for (TDoubleDoubleVecMapCItr itr = categoryPairProbabilities.begin(); itr != categoryPairProbabilities.end(); ++itr) { pc += itr->first * static_cast(itr->second.size() / 2u); - for (std::size_t i = 0u; i < itr->second.size(); i += 2u) - { + for (std::size_t i = 0u; i < itr->second.size(); i += 2u) { TDoubleVec categoryPair; categoryPair.push_back(itr->second[i]); categoryPair.push_back(itr->second[i + 1u]); trueProbabilities.insert(TDoubleVecDoubleMap::value_type(categoryPair, pc)); } } - LOG_DEBUG("true probabilities = " - << core::CContainerPrinter::print(trueProbabilities)); + LOG_DEBUG("true probabilities = " << core::CContainerPrinter::print(trueProbabilities)); CMultinomialConjugate filter(CMultinomialConjugate::nonInformativePrior(6u)); @@ -769,46 +659,35 @@ void CMultinomialConjugateTest::testProbabilityOfLessLikelySamples() TDouble4Vec1Vec(1, TDouble4Vec(1, 39000.0))); // P = 0.39 filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), TDouble1Vec(1, categories[4]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 4000.0))); // P = 0.04 + TDouble4Vec1Vec(1, TDouble4Vec(1, 4000.0))); // P = 0.04 filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), TDouble1Vec(1, categories[5]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 6000.0))); // P = 0.06 + TDouble4Vec1Vec(1, TDouble4Vec(1, 6000.0))); // P = 0.06 - double expectedProbabilities[] = { 0.2, 0.32, 0.61, 1.0, 0.04, 0.1 }; + double expectedProbabilities[] = {0.2, 0.32, 0.61, 1.0, 0.04, 0.1}; - for (TDoubleVecDoubleMapCItr itr = trueProbabilities.begin(); - itr != trueProbabilities.end(); - ++itr) - { + for (TDoubleVecDoubleMapCItr itr = trueProbabilities.begin(); itr != trueProbabilities.end(); ++itr) { TDoubleVec categoryPair; categoryPair.push_back(itr->first[0]); categoryPair.push_back(itr->first[1]); double lowerBound, upperBound; - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - categoryPair, - lowerBound, - upperBound); + filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, categoryPair, lowerBound, upperBound); CPPUNIT_ASSERT_EQUAL(lowerBound, upperBound); double probability = lowerBound; maths::CJointProbabilityOfLessLikelySamples expectedProbabilityCalculator; { - ptrdiff_t i = std::lower_bound(categories.begin(), - categories.end(), - itr->first[0]) - categories.begin(); - ptrdiff_t j = std::lower_bound(categories.begin(), - categories.end(), - itr->first[1]) - categories.begin(); + ptrdiff_t i = std::lower_bound(categories.begin(), categories.end(), itr->first[0]) - categories.begin(); + ptrdiff_t j = std::lower_bound(categories.begin(), categories.end(), itr->first[1]) - categories.begin(); expectedProbabilityCalculator.add(expectedProbabilities[i]); expectedProbabilityCalculator.add(expectedProbabilities[j]); } double expectedProbability; CPPUNIT_ASSERT(expectedProbabilityCalculator.calculate(expectedProbability)); - LOG_DEBUG("category pair = " << core::CContainerPrinter::print(itr->first) - << ", probability = " << probability - << ", expected probability = " << expectedProbability - << ", true probability = " << itr->second); + LOG_DEBUG("category pair = " << core::CContainerPrinter::print(itr->first) << ", probability = " << probability + << ", expected probability = " << expectedProbability + << ", true probability = " << itr->second); CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedProbability, probability, 1e-10); } @@ -816,111 +695,80 @@ void CMultinomialConjugateTest::testProbabilityOfLessLikelySamples() } { // Test the function to compute all category probabilities. - const double rawCategories[] = - { - 1.1, 1.2, 2.1, 2.2, 3.2, 5.1, 5.5, 6.0, 6.2, 6.6, - 7.8, 8.0, 9.0, 9.9, 10.0, 10.1, 10.2, 12.0, 12.1, 12.8, - 13.1, 13.7, 15.2, 17.1, 17.5, 17.9, 18.2, 19.6, 20.0, 20.2 - }; - const double rawProbabilities[] = - { - 0.02, 0.05, 0.01, 0.2, 0.001, 0.03, 0.02, 0.005, 0.1, 0.03, - 0.04, 0.01, 0.001, 0.006, 0.02, 0.05, 0.001, 0.001, 0.01, 0.01, - 0.2, 0.01, 0.02, 0.07, 0.01, 0.002, 0.01, 0.02, 0.03, 0.013 - }; - - CPPUNIT_ASSERT_EQUAL(boost::size(rawCategories), - boost::size(rawProbabilities)); + const double rawCategories[] = {1.1, 1.2, 2.1, 2.2, 3.2, 5.1, 5.5, 6.0, 6.2, 6.6, 7.8, 8.0, 9.0, 9.9, 10.0, + 10.1, 10.2, 12.0, 12.1, 12.8, 13.1, 13.7, 15.2, 17.1, 17.5, 17.9, 18.2, 19.6, 20.0, 20.2}; + const double rawProbabilities[] = {0.02, 0.05, 0.01, 0.2, 0.001, 0.03, 0.02, 0.005, 0.1, 0.03, + 0.04, 0.01, 0.001, 0.006, 0.02, 0.05, 0.001, 0.001, 0.01, 0.01, + 0.2, 0.01, 0.02, 0.07, 0.01, 0.002, 0.01, 0.02, 0.03, 0.013}; + + CPPUNIT_ASSERT_EQUAL(boost::size(rawCategories), boost::size(rawProbabilities)); test::CRandomNumbers rng; const std::size_t numberSamples = 10000u; // Generate samples from the Dirichlet prior. TDoubleVecVec dirichletSamples(boost::size(rawProbabilities)); - for (size_t i = 0; i < boost::size(rawProbabilities); ++i) - { - TDoubleVec &samples = dirichletSamples[i]; - rng.generateGammaSamples(rawProbabilities[i] * 100.0, - 1.0, - numberSamples, - samples); + for (size_t i = 0; i < boost::size(rawProbabilities); ++i) { + TDoubleVec& samples = dirichletSamples[i]; + rng.generateGammaSamples(rawProbabilities[i] * 100.0, 1.0, numberSamples, samples); } - for (std::size_t i = 0u; i < numberSamples; ++i) - { + for (std::size_t i = 0u; i < numberSamples; ++i) { double n = 0.0; - for (std::size_t j = 0u; j < dirichletSamples.size(); ++j) - { + for (std::size_t j = 0u; j < dirichletSamples.size(); ++j) { n += dirichletSamples[j][i]; } - for (std::size_t j = 0u; j < dirichletSamples.size(); ++j) - { + for (std::size_t j = 0u; j < dirichletSamples.size(); ++j) { dirichletSamples[j][i] /= n; } } // Compute the expected probabilities w.r.t. the Dirichlet prior. TDoubleVec expectedProbabilities(boost::size(rawCategories), 0.0); - for (std::size_t i = 0u; i < numberSamples; ++i) - { + for (std::size_t i = 0u; i < numberSamples; ++i) { TDoubleSizePrVec probabilities; probabilities.reserve(dirichletSamples.size() + 1); - for (std::size_t j = 0u; j < dirichletSamples.size(); ++j) - { + for (std::size_t j = 0u; j < dirichletSamples.size(); ++j) { probabilities.push_back(TDoubleSizePr(dirichletSamples[j][i], j)); } std::sort(probabilities.begin(), probabilities.end()); - for (std::size_t j = 1u; j < probabilities.size(); ++j) - { + for (std::size_t j = 1u; j < probabilities.size(); ++j) { probabilities[j].first += probabilities[j - 1].first; } probabilities.push_back(TDoubleSizePr(1.0, probabilities.size())); - for (std::size_t j = 0u; j < probabilities.size() - 1; ++j) - { + for (std::size_t j = 0u; j < probabilities.size() - 1; ++j) { expectedProbabilities[probabilities[j].second] += probabilities[j + 1].first; } } - for (std::size_t i = 0u; i < expectedProbabilities.size(); ++i) - { + for (std::size_t i = 0u; i < expectedProbabilities.size(); ++i) { expectedProbabilities[i] /= static_cast(numberSamples); } - LOG_DEBUG("expectedProbabilities = " - << core::CContainerPrinter::print(expectedProbabilities)); - - TDoubleVec categories(boost::begin(rawCategories), - boost::end(rawCategories)); - CMultinomialConjugate filter( - CMultinomialConjugate::nonInformativePrior(categories.size())); - for (std::size_t i = 0u; i < categories.size(); ++i) - { + LOG_DEBUG("expectedProbabilities = " << core::CContainerPrinter::print(expectedProbabilities)); + + TDoubleVec categories(boost::begin(rawCategories), boost::end(rawCategories)); + CMultinomialConjugate filter(CMultinomialConjugate::nonInformativePrior(categories.size())); + for (std::size_t i = 0u; i < categories.size(); ++i) { filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), TDouble1Vec(1, categories[i]), TDouble4Vec1Vec(1, TDouble4Vec(1, rawProbabilities[i] * 100.0))); } TDoubleVec lowerBounds, upperBounds; - filter.probabilitiesOfLessLikelyCategories(maths_t::E_TwoSided, - lowerBounds, - upperBounds); + filter.probabilitiesOfLessLikelyCategories(maths_t::E_TwoSided, lowerBounds, upperBounds); LOG_DEBUG("probabilities = " << core::CContainerPrinter::print(lowerBounds)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(lowerBounds), - core::CContainerPrinter::print(upperBounds)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(lowerBounds), core::CContainerPrinter::print(upperBounds)); double totalError = 0.0; - for (std::size_t i = 0u; i < lowerBounds.size(); ++i) - { + for (std::size_t i = 0u; i < lowerBounds.size(); ++i) { CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedProbabilities[i], lowerBounds[i], 0.1); totalError += std::fabs(lowerBounds[i] - expectedProbabilities[i]); } LOG_DEBUG("totalError = " << totalError); CPPUNIT_ASSERT(totalError < 0.7); - for (std::size_t i = 0u; i < categories.size(); ++i) - { + for (std::size_t i = 0u; i < categories.size(); ++i) { double lowerBound, upperBound; - CPPUNIT_ASSERT(filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - TDouble1Vec(1, categories[i]), - lowerBound, - upperBound)); + CPPUNIT_ASSERT( + filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, TDouble1Vec(1, categories[i]), lowerBound, upperBound)); CPPUNIT_ASSERT_EQUAL(lowerBound, upperBound); CPPUNIT_ASSERT_DOUBLES_EQUAL(lowerBounds[i], lowerBound, 1e-10); } @@ -932,23 +780,19 @@ void CMultinomialConjugateTest::testProbabilityOfLessLikelySamples() } } -void CMultinomialConjugateTest::testAnomalyScore() -{ +void CMultinomialConjugateTest::testAnomalyScore() { } -void CMultinomialConjugateTest::testRemoveCategories() -{ +void CMultinomialConjugateTest::testRemoveCategories() { LOG_DEBUG("+---------------------------------------------------+"); LOG_DEBUG("| CMultinomialConjugateTest::testRemoveCategories |"); LOG_DEBUG("+---------------------------------------------------+"); - double rawCategories[] = { 1.0, 3.0, 15.0, 17.0, 19.0, 20.0 }; - double rawConcentrations[] = { 1.0, 2.0, 1.5, 12.0, 10.0, 2.0 }; + double rawCategories[] = {1.0, 3.0, 15.0, 17.0, 19.0, 20.0}; + double rawConcentrations[] = {1.0, 2.0, 1.5, 12.0, 10.0, 2.0}; - TDoubleVec categories(boost::begin(rawCategories), - boost::end(rawCategories)); - TDoubleVec concentrationParameters(boost::begin(rawConcentrations), - boost::end(rawConcentrations)); + TDoubleVec categories(boost::begin(rawCategories), boost::end(rawCategories)); + TDoubleVec concentrationParameters(boost::begin(rawConcentrations), boost::end(rawConcentrations)); { CMultinomialConjugate prior(maths::CMultinomialConjugate(100, categories, concentrationParameters)); @@ -969,10 +813,8 @@ void CMultinomialConjugateTest::testRemoveCategories() expectedConcentrations.push_back(2.0); CMultinomialConjugate expectedPrior(maths::CMultinomialConjugate(100, expectedCategories, expectedConcentrations)); - LOG_DEBUG("expectedCategories = " - << core::CContainerPrinter::print(expectedCategories)); - LOG_DEBUG("expectedConcentrations = " - << core::CContainerPrinter::print(expectedConcentrations)); + LOG_DEBUG("expectedCategories = " << core::CContainerPrinter::print(expectedCategories)); + LOG_DEBUG("expectedConcentrations = " << core::CContainerPrinter::print(expectedConcentrations)); CPPUNIT_ASSERT_EQUAL(expectedPrior.checksum(), prior.checksum()); } @@ -995,10 +837,8 @@ void CMultinomialConjugateTest::testRemoveCategories() expectedConcentrations.push_back(10.0); CMultinomialConjugate expectedPrior(maths::CMultinomialConjugate(90, expectedCategories, expectedConcentrations)); - LOG_DEBUG("expectedCategories = " - << core::CContainerPrinter::print(expectedCategories)); - LOG_DEBUG("expectedConcentrations = " - << core::CContainerPrinter::print(expectedConcentrations)); + LOG_DEBUG("expectedCategories = " << core::CContainerPrinter::print(expectedCategories)); + LOG_DEBUG("expectedConcentrations = " << core::CContainerPrinter::print(expectedConcentrations)); CPPUNIT_ASSERT_EQUAL(expectedPrior.checksum(), prior.checksum()); } @@ -1013,29 +853,24 @@ void CMultinomialConjugateTest::testRemoveCategories() } } -void CMultinomialConjugateTest::testPersist() -{ +void CMultinomialConjugateTest::testPersist() { LOG_DEBUG("+------------------------------------------+"); LOG_DEBUG("| CMultinomialConjugateTest::testPersist |"); LOG_DEBUG("+------------------------------------------+"); - const double rawCategories[] = { -1.0, 5.0, 2.1, 78.0, 15.3}; - const double rawProbabilities[] = { 0.1, 0.2, 0.35, 0.3, 0.05 }; - const TDoubleVec categories(boost::begin(rawCategories), - boost::end(rawCategories)); - const TDoubleVec probabilities(boost::begin(rawProbabilities), - boost::end(rawProbabilities)); + const double rawCategories[] = {-1.0, 5.0, 2.1, 78.0, 15.3}; + const double rawProbabilities[] = {0.1, 0.2, 0.35, 0.3, 0.05}; + const TDoubleVec categories(boost::begin(rawCategories), boost::end(rawCategories)); + const TDoubleVec probabilities(boost::begin(rawProbabilities), boost::end(rawProbabilities)); test::CRandomNumbers rng; TDoubleVec samples; rng.generateMultinomialSamples(categories, probabilities, 100, samples); maths::CMultinomialConjugate origFilter(CMultinomialConjugate::nonInformativePrior(5)); - for (std::size_t i = 0u; i < samples.size(); ++i) - { - origFilter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, samples[i]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0))); + for (std::size_t i = 0u; i < samples.size(); ++i) { + origFilter.addSamples( + maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), TDouble1Vec(1, samples[i]), TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0))); } double decayRate = origFilter.decayRate(); uint64_t checksum = origFilter.checksum(); @@ -1061,8 +896,7 @@ void CMultinomialConjugateTest::testPersist() maths::MINIMUM_CATEGORY_COUNT); maths::CMultinomialConjugate restoredFilter(params, traverser); - LOG_DEBUG("orig checksum = " << checksum - << " restored checksum = " << restoredFilter.checksum()); + LOG_DEBUG("orig checksum = " << checksum << " restored checksum = " << restoredFilter.checksum()); CPPUNIT_ASSERT_EQUAL(checksum, restoredFilter.checksum()); @@ -1076,64 +910,48 @@ void CMultinomialConjugateTest::testPersist() CPPUNIT_ASSERT_EQUAL(origXml, newXml); } -void CMultinomialConjugateTest::testOverflow() -{ +void CMultinomialConjugateTest::testOverflow() { } -void CMultinomialConjugateTest::testConcentration() -{ +void CMultinomialConjugateTest::testConcentration() { CMultinomialConjugate filter(CMultinomialConjugate::nonInformativePrior(5u)); - for (std::size_t i = 1u; i <= 5u; ++i) - { + for (std::size_t i = 1u; i <= 5u; ++i) { filter.addSamples(TDouble1Vec(i, static_cast(i))); } double concentration; - for (std::size_t i = 1u; i <= 5u; ++i) - { + for (std::size_t i = 1u; i <= 5u; ++i) { double category = static_cast(i); filter.concentration(category, concentration); CPPUNIT_ASSERT_EQUAL(category, concentration); } } -CppUnit::Test *CMultinomialConjugateTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CMultinomialConjugateTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultinomialConjugateTest::testMultipleUpdate", - &CMultinomialConjugateTest::testMultipleUpdate) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultinomialConjugateTest::testPropagation", - &CMultinomialConjugateTest::testPropagation) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultinomialConjugateTest::testProbabilityEstimation", - &CMultinomialConjugateTest::testProbabilityEstimation) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultinomialConjugateTest::testMarginalLikelihood", - &CMultinomialConjugateTest::testMarginalLikelihood) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultinomialConjugateTest::testSampleMarginalLikelihood", - &CMultinomialConjugateTest::testSampleMarginalLikelihood) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultinomialConjugateTest::testProbabilityOfLessLikelySamples", - &CMultinomialConjugateTest::testProbabilityOfLessLikelySamples) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultinomialConjugateTest::testAnomalyScore", - &CMultinomialConjugateTest::testAnomalyScore) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultinomialConjugateTest::testRemoveCategories", - &CMultinomialConjugateTest::testRemoveCategories) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultinomialConjugateTest::testPersist", - &CMultinomialConjugateTest::testPersist) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultinomialConjugateTest::testOverflow", - &CMultinomialConjugateTest::testOverflow) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultinomialConjugateTest::testConcentration", - &CMultinomialConjugateTest::testConcentration) ); +CppUnit::Test* CMultinomialConjugateTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CMultinomialConjugateTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CMultinomialConjugateTest::testMultipleUpdate", + &CMultinomialConjugateTest::testMultipleUpdate)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMultinomialConjugateTest::testPropagation", + &CMultinomialConjugateTest::testPropagation)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMultinomialConjugateTest::testProbabilityEstimation", + &CMultinomialConjugateTest::testProbabilityEstimation)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMultinomialConjugateTest::testMarginalLikelihood", + &CMultinomialConjugateTest::testMarginalLikelihood)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMultinomialConjugateTest::testSampleMarginalLikelihood", + &CMultinomialConjugateTest::testSampleMarginalLikelihood)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultinomialConjugateTest::testProbabilityOfLessLikelySamples", &CMultinomialConjugateTest::testProbabilityOfLessLikelySamples)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMultinomialConjugateTest::testAnomalyScore", + &CMultinomialConjugateTest::testAnomalyScore)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMultinomialConjugateTest::testRemoveCategories", + &CMultinomialConjugateTest::testRemoveCategories)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMultinomialConjugateTest::testPersist", + &CMultinomialConjugateTest::testPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMultinomialConjugateTest::testOverflow", + &CMultinomialConjugateTest::testOverflow)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMultinomialConjugateTest::testConcentration", + &CMultinomialConjugateTest::testConcentration)); return suiteOfTests; } diff --git a/lib/maths/unittest/CMultinomialConjugateTest.h b/lib/maths/unittest/CMultinomialConjugateTest.h index 95b5beb0da..bfe5652d37 100644 --- a/lib/maths/unittest/CMultinomialConjugateTest.h +++ b/lib/maths/unittest/CMultinomialConjugateTest.h @@ -9,22 +9,21 @@ #include -class CMultinomialConjugateTest : public CppUnit::TestFixture -{ - public: - void testMultipleUpdate(); - void testPropagation(); - void testProbabilityEstimation(); - void testMarginalLikelihood(); - void testSampleMarginalLikelihood(); - void testProbabilityOfLessLikelySamples(); - void testAnomalyScore(); - void testRemoveCategories(); - void testPersist(); - void testOverflow(); - void testConcentration(); +class CMultinomialConjugateTest : public CppUnit::TestFixture { +public: + void testMultipleUpdate(); + void testPropagation(); + void testProbabilityEstimation(); + void testMarginalLikelihood(); + void testSampleMarginalLikelihood(); + void testProbabilityOfLessLikelySamples(); + void testAnomalyScore(); + void testRemoveCategories(); + void testPersist(); + void testOverflow(); + void testConcentration(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CMultinomialConjugateTest_h diff --git a/lib/maths/unittest/CMultivariateConstantPriorTest.cc b/lib/maths/unittest/CMultivariateConstantPriorTest.cc index 7151135dbc..c84f302f5f 100644 --- a/lib/maths/unittest/CMultivariateConstantPriorTest.cc +++ b/lib/maths/unittest/CMultivariateConstantPriorTest.cc @@ -24,25 +24,20 @@ using namespace ml; using namespace handy_typedefs; -namespace -{ +namespace { const maths_t::TWeightStyleVec COUNT_WEIGHT(1, maths_t::E_SampleCountWeight); -TDouble10Vec4Vec unitWeight(std::size_t dimension) -{ +TDouble10Vec4Vec unitWeight(std::size_t dimension) { return TDouble10Vec4Vec(1, TDouble10Vec(dimension, 1.0)); } -TDouble10Vec4Vec1Vec singleUnitWeight(std::size_t dimension) -{ +TDouble10Vec4Vec1Vec singleUnitWeight(std::size_t dimension) { return TDouble10Vec4Vec1Vec(1, unitWeight(dimension)); } - } -void CMultivariateConstantPriorTest::testAddSamples() -{ +void CMultivariateConstantPriorTest::testAddSamples() { LOG_DEBUG("+--------------------------------------------------+"); LOG_DEBUG("| CMultivariateConstantPriorTest::testAddSamples |"); LOG_DEBUG("+--------------------------------------------------+"); @@ -51,31 +46,24 @@ void CMultivariateConstantPriorTest::testAddSamples() maths::CMultivariateConstantPrior filter(2); - double wrongDimension[] = { 1.3, 2.1, 7.9 }; + double wrongDimension[] = {1.3, 2.1, 7.9}; - filter.addSamples(COUNT_WEIGHT, - TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(wrongDimension), - boost::end(wrongDimension))), - singleUnitWeight(3)); + filter.addSamples( + COUNT_WEIGHT, TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(wrongDimension), boost::end(wrongDimension))), singleUnitWeight(3)); CPPUNIT_ASSERT(filter.isNonInformative()); - double nans[] = { 1.3, std::numeric_limits::quiet_NaN() }; + double nans[] = {1.3, std::numeric_limits::quiet_NaN()}; - filter.addSamples(COUNT_WEIGHT, - TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(nans), boost::end(nans))), - singleUnitWeight(3)); + filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(nans), boost::end(nans))), singleUnitWeight(3)); CPPUNIT_ASSERT(filter.isNonInformative()); - double constant[] = { 1.4, 1.0 }; + double constant[] = {1.4, 1.0}; - filter.addSamples(COUNT_WEIGHT, - TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(constant), boost::end(constant))), - singleUnitWeight(2)); + filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(constant), boost::end(constant))), singleUnitWeight(2)); CPPUNIT_ASSERT(!filter.isNonInformative()); } -void CMultivariateConstantPriorTest::testMarginalLikelihood() -{ +void CMultivariateConstantPriorTest::testMarginalLikelihood() { LOG_DEBUG("+----------------------------------------------------------+"); LOG_DEBUG("| CMultivariateConstantPriorTest::testMarginalLikelihood |"); LOG_DEBUG("+----------------------------------------------------------+"); @@ -86,16 +74,13 @@ void CMultivariateConstantPriorTest::testMarginalLikelihood() maths::CMultivariateConstantPrior filter(2); - double constant[] = { 1.3, 17.3 }; - double different[] = { 1.1, 17.3 }; + double constant[] = {1.3, 17.3}; + double different[] = {1.1, 17.3}; double likelihood; CPPUNIT_ASSERT_EQUAL(maths_t::E_FpFailed, - filter.jointLogMarginalLikelihood(COUNT_WEIGHT, - TDouble10Vec1Vec(), - singleUnitWeight(2), - likelihood)); + filter.jointLogMarginalLikelihood(COUNT_WEIGHT, TDouble10Vec1Vec(), singleUnitWeight(2), likelihood)); CPPUNIT_ASSERT_EQUAL(maths_t::E_FpFailed, filter.jointLogMarginalLikelihood(COUNT_WEIGHT, TDouble10Vec1Vec(2, TDouble10Vec(boost::begin(constant), boost::end(constant))), @@ -108,9 +93,7 @@ void CMultivariateConstantPriorTest::testMarginalLikelihood() likelihood)); CPPUNIT_ASSERT_EQUAL(boost::numeric::bounds::lowest(), likelihood); - filter.addSamples(COUNT_WEIGHT, - TDouble10Vec1Vec(2, TDouble10Vec(boost::begin(constant), boost::end(constant))), - singleUnitWeight(2)); + filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(2, TDouble10Vec(boost::begin(constant), boost::end(constant))), singleUnitWeight(2)); CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter.jointLogMarginalLikelihood(COUNT_WEIGHT, @@ -119,16 +102,16 @@ void CMultivariateConstantPriorTest::testMarginalLikelihood() likelihood)); CPPUNIT_ASSERT_EQUAL(std::log(boost::numeric::bounds::highest()), likelihood); - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpOverflowed, - filter.jointLogMarginalLikelihood(COUNT_WEIGHT, - TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(different), boost::end(different))), - singleUnitWeight(2), - likelihood)); + CPPUNIT_ASSERT_EQUAL( + maths_t::E_FpOverflowed, + filter.jointLogMarginalLikelihood(COUNT_WEIGHT, + TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(different), boost::end(different))), + singleUnitWeight(2), + likelihood)); CPPUNIT_ASSERT_EQUAL(boost::numeric::bounds::lowest(), likelihood); } -void CMultivariateConstantPriorTest::testMarginalLikelihoodMean() -{ +void CMultivariateConstantPriorTest::testMarginalLikelihoodMean() { LOG_DEBUG("+--------------------------------------------------------------+"); LOG_DEBUG("| CMultivariateConstantPriorTest::testMarginalLikelihoodMean |"); LOG_DEBUG("+--------------------------------------------------------------+"); @@ -138,20 +121,15 @@ void CMultivariateConstantPriorTest::testMarginalLikelihoodMean() maths::CMultivariateConstantPrior filter(3); - CPPUNIT_ASSERT_EQUAL(std::string("[0, 0, 0]"), - core::CContainerPrinter::print(filter.marginalLikelihoodMean())); + CPPUNIT_ASSERT_EQUAL(std::string("[0, 0, 0]"), core::CContainerPrinter::print(filter.marginalLikelihoodMean())); - double constant[] = { 1.2, 6.0, 14.1 }; - filter.addSamples(COUNT_WEIGHT, - TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(constant), boost::end(constant))), - singleUnitWeight(3)); + double constant[] = {1.2, 6.0, 14.1}; + filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(constant), boost::end(constant))), singleUnitWeight(3)); - CPPUNIT_ASSERT_EQUAL(std::string("[1.2, 6, 14.1]"), - core::CContainerPrinter::print(filter.marginalLikelihoodMean())); + CPPUNIT_ASSERT_EQUAL(std::string("[1.2, 6, 14.1]"), core::CContainerPrinter::print(filter.marginalLikelihoodMean())); } -void CMultivariateConstantPriorTest::testMarginalLikelihoodMode() -{ +void CMultivariateConstantPriorTest::testMarginalLikelihoodMode() { LOG_DEBUG("+--------------------------------------------------------------+"); LOG_DEBUG("| CMultivariateConstantPriorTest::testMarginalLikelihoodMode |"); LOG_DEBUG("+--------------------------------------------------------------+"); @@ -164,17 +142,14 @@ void CMultivariateConstantPriorTest::testMarginalLikelihoodMode() CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(filter.marginalLikelihoodMean()), core::CContainerPrinter::print(filter.marginalLikelihoodMode(COUNT_WEIGHT, unitWeight(4)))); - double constant[] = { 1.1, 6.5, 12.3, 14.1 }; - filter.addSamples(COUNT_WEIGHT, - TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(constant), boost::end(constant))), - singleUnitWeight(4)); + double constant[] = {1.1, 6.5, 12.3, 14.1}; + filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(constant), boost::end(constant))), singleUnitWeight(4)); CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(filter.marginalLikelihoodMean()), core::CContainerPrinter::print(filter.marginalLikelihoodMode(COUNT_WEIGHT, unitWeight(4)))); } -void CMultivariateConstantPriorTest::testMarginalLikelihoodCovariance() -{ +void CMultivariateConstantPriorTest::testMarginalLikelihoodCovariance() { LOG_DEBUG("+--------------------------------------------------------------------+"); LOG_DEBUG("| CMultivariateConstantPriorTest::testMarginalLikelihoodCovariance |"); LOG_DEBUG("+--------------------------------------------------------------------+"); @@ -186,39 +161,31 @@ void CMultivariateConstantPriorTest::testMarginalLikelihoodCovariance() TDouble10Vec10Vec covariance = filter.marginalLikelihoodCovariance(); CPPUNIT_ASSERT_EQUAL(std::size_t(4), covariance.size()); - for (std::size_t i = 0u; i < 4; ++i) - { + for (std::size_t i = 0u; i < 4; ++i) { CPPUNIT_ASSERT_EQUAL(std::size_t(4), covariance[i].size()); CPPUNIT_ASSERT_EQUAL(boost::numeric::bounds::highest(), covariance[i][i]); - for (std::size_t j = 0; j < i; ++j) - { + for (std::size_t j = 0; j < i; ++j) { CPPUNIT_ASSERT_EQUAL(0.0, covariance[i][j]); } - for (std::size_t j = i+1; j < 4; ++j) - { + for (std::size_t j = i + 1; j < 4; ++j) { CPPUNIT_ASSERT_EQUAL(0.0, covariance[i][j]); } } - double constant[] = { 1.1, 6.5, 12.3, 14.1 }; - filter.addSamples(COUNT_WEIGHT, - TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(constant), boost::end(constant))), - singleUnitWeight(4)); + double constant[] = {1.1, 6.5, 12.3, 14.1}; + filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(constant), boost::end(constant))), singleUnitWeight(4)); - covariance = filter.marginalLikelihoodCovariance(); + covariance = filter.marginalLikelihoodCovariance(); CPPUNIT_ASSERT_EQUAL(std::size_t(4), covariance.size()); - for (std::size_t i = 0u; i < 4; ++i) - { + for (std::size_t i = 0u; i < 4; ++i) { CPPUNIT_ASSERT_EQUAL(std::size_t(4), covariance[i].size()); - for (std::size_t j = 0; j < 4; ++j) - { + for (std::size_t j = 0; j < 4; ++j) { CPPUNIT_ASSERT_EQUAL(0.0, covariance[i][j]); } } } -void CMultivariateConstantPriorTest::testSampleMarginalLikelihood() -{ +void CMultivariateConstantPriorTest::testSampleMarginalLikelihood() { LOG_DEBUG("+----------------------------------------------------------------+"); LOG_DEBUG("| CMultivariateConstantPriorTest::testSampleMarginalLikelihood |"); LOG_DEBUG("+----------------------------------------------------------------+"); @@ -232,22 +199,18 @@ void CMultivariateConstantPriorTest::testSampleMarginalLikelihood() filter.sampleMarginalLikelihood(3, samples); CPPUNIT_ASSERT_EQUAL(std::size_t(0), samples.size()); - double constant[] = { 1.2, 4.1 }; + double constant[] = {1.2, 4.1}; - filter.addSamples(COUNT_WEIGHT, - TDouble10Vec1Vec(2, TDouble10Vec(boost::begin(constant), boost::end(constant))), - singleUnitWeight(2)); + filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(2, TDouble10Vec(boost::begin(constant), boost::end(constant))), singleUnitWeight(2)); filter.sampleMarginalLikelihood(4, samples); CPPUNIT_ASSERT_EQUAL(std::size_t(4), samples.size()); - for (std::size_t i = 0u; i < 4; ++i) - { + for (std::size_t i = 0u; i < 4; ++i) { CPPUNIT_ASSERT_EQUAL(std::string("[1.2, 4.1]"), core::CContainerPrinter::print(samples[i])); } } -void CMultivariateConstantPriorTest::testProbabilityOfLessLikelySamples() -{ +void CMultivariateConstantPriorTest::testProbabilityOfLessLikelySamples() { LOG_DEBUG("+----------------------------------------------------------------------+"); LOG_DEBUG("| CMultivariateConstantPriorTest::testProbabilityOfLessLikelySamples |"); LOG_DEBUG("+----------------------------------------------------------------------+"); @@ -257,27 +220,14 @@ void CMultivariateConstantPriorTest::testProbabilityOfLessLikelySamples() maths::CMultivariateConstantPrior filter(2); - double samples_[][2] = - { - { 1.3, 1.4 }, - { 1.1, 1.6 }, - { 1.0, 5.4 } - }; - TDouble10Vec1Vec samples[] = - { - TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(samples_[0]), boost::end(samples_[0]))), - TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(samples_[1]), boost::end(samples_[1]))), - TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(samples_[2]), boost::end(samples_[2]))) - }; - for (std::size_t i = 0u; i < boost::size(samples); ++i) - { + double samples_[][2] = {{1.3, 1.4}, {1.1, 1.6}, {1.0, 5.4}}; + TDouble10Vec1Vec samples[] = {TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(samples_[0]), boost::end(samples_[0]))), + TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(samples_[1]), boost::end(samples_[1]))), + TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(samples_[2]), boost::end(samples_[2])))}; + for (std::size_t i = 0u; i < boost::size(samples); ++i) { double lb, ub; maths::CMultivariateConstantPrior::TTail10Vec tail; - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - COUNT_WEIGHT, - samples[i], - singleUnitWeight(2), - lb, ub, tail); + filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, COUNT_WEIGHT, samples[i], singleUnitWeight(2), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(1.0, lb); CPPUNIT_ASSERT_EQUAL(1.0, ub); LOG_DEBUG("tail = " << core::CContainerPrinter::print(tail)); @@ -287,21 +237,11 @@ void CMultivariateConstantPriorTest::testProbabilityOfLessLikelySamples() filter.addSamples(COUNT_WEIGHT, samples[0], singleUnitWeight(2)); CPPUNIT_ASSERT(!filter.isNonInformative()); - std::string expectedTails[] = - { - "[0, 0]", - "[1, 2]", - "[1, 2]" - }; - for (std::size_t i = 0u; i < boost::size(samples); ++i) - { + std::string expectedTails[] = {"[0, 0]", "[1, 2]", "[1, 2]"}; + for (std::size_t i = 0u; i < boost::size(samples); ++i) { double lb, ub; maths::CMultivariateConstantPrior::TTail10Vec tail; - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - COUNT_WEIGHT, - samples[i], - singleUnitWeight(2), - lb, ub, tail); + filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, COUNT_WEIGHT, samples[i], singleUnitWeight(2), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(i == 0 ? 1.0 : 0.0, lb); CPPUNIT_ASSERT_EQUAL(i == 0 ? 1.0 : 0.0, ub); LOG_DEBUG("tail = " << core::CContainerPrinter::print(tail)); @@ -309,8 +249,7 @@ void CMultivariateConstantPriorTest::testProbabilityOfLessLikelySamples() } } -void CMultivariateConstantPriorTest::testPersist() -{ +void CMultivariateConstantPriorTest::testPersist() { LOG_DEBUG("+-----------------------------------------------+"); LOG_DEBUG("| CMultivariateConstantPriorTest::testPersist |"); LOG_DEBUG("+-----------------------------------------------+"); @@ -338,8 +277,7 @@ void CMultivariateConstantPriorTest::testPersist() maths::CMultivariateConstantPrior restoredFilter(3, traverser); - LOG_DEBUG("orig checksum = " << checksum - << " restored checksum = " << restoredFilter.checksum()); + LOG_DEBUG("orig checksum = " << checksum << " restored checksum = " << restoredFilter.checksum()); CPPUNIT_ASSERT_EQUAL(checksum, restoredFilter.checksum()); // The XML representation of the new filter should be the same as the original @@ -354,7 +292,7 @@ void CMultivariateConstantPriorTest::testPersist() LOG_DEBUG("*** Constant ***"); { - double constant[] = { 1.2, 4.1, 1.0 / 3.0 }; + double constant[] = {1.2, 4.1, 1.0 / 3.0}; maths::CMultivariateConstantPrior origFilter(3, TDouble10Vec(boost::begin(constant), boost::end(constant))); uint64_t checksum = origFilter.checksum(); @@ -375,8 +313,7 @@ void CMultivariateConstantPriorTest::testPersist() maths::CMultivariateConstantPrior restoredFilter(3, traverser); - LOG_DEBUG("orig checksum = " << checksum - << " restored checksum = " << restoredFilter.checksum()); + LOG_DEBUG("orig checksum = " << checksum << " restored checksum = " << restoredFilter.checksum()); CPPUNIT_ASSERT_EQUAL(checksum, restoredFilter.checksum()); // The XML representation of the new filter should be the same as the original @@ -390,34 +327,27 @@ void CMultivariateConstantPriorTest::testPersist() } } -CppUnit::Test *CMultivariateConstantPriorTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CMultivariateConstantPriorTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultivariateConstantPriorTest::testAddSamples", - &CMultivariateConstantPriorTest::testAddSamples) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultivariateConstantPriorTest::testMarginalLikelihood", - &CMultivariateConstantPriorTest::testMarginalLikelihood) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultivariateConstantPriorTest::testMarginalLikelihoodMean", - &CMultivariateConstantPriorTest::testMarginalLikelihoodMean) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultivariateConstantPriorTest::testMarginalLikelihoodMode", - &CMultivariateConstantPriorTest::testMarginalLikelihoodMode) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultivariateConstantPriorTest::testMarginalLikelihoodCovariance", - &CMultivariateConstantPriorTest::testMarginalLikelihoodCovariance) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultivariateConstantPriorTest::testSampleMarginalLikelihood", - &CMultivariateConstantPriorTest::testSampleMarginalLikelihood) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultivariateConstantPriorTest::testProbabilityOfLessLikelySamples", - &CMultivariateConstantPriorTest::testProbabilityOfLessLikelySamples) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultivariateConstantPriorTest::testPersist", - &CMultivariateConstantPriorTest::testPersist) ); +CppUnit::Test* CMultivariateConstantPriorTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CMultivariateConstantPriorTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CMultivariateConstantPriorTest::testAddSamples", + &CMultivariateConstantPriorTest::testAddSamples)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMultivariateConstantPriorTest::testMarginalLikelihood", + &CMultivariateConstantPriorTest::testMarginalLikelihood)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultivariateConstantPriorTest::testMarginalLikelihoodMean", &CMultivariateConstantPriorTest::testMarginalLikelihoodMean)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultivariateConstantPriorTest::testMarginalLikelihoodMode", &CMultivariateConstantPriorTest::testMarginalLikelihoodMode)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CMultivariateConstantPriorTest::testMarginalLikelihoodCovariance", + &CMultivariateConstantPriorTest::testMarginalLikelihoodCovariance)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultivariateConstantPriorTest::testSampleMarginalLikelihood", &CMultivariateConstantPriorTest::testSampleMarginalLikelihood)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CMultivariateConstantPriorTest::testProbabilityOfLessLikelySamples", + &CMultivariateConstantPriorTest::testProbabilityOfLessLikelySamples)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMultivariateConstantPriorTest::testPersist", + &CMultivariateConstantPriorTest::testPersist)); return suiteOfTests; } diff --git a/lib/maths/unittest/CMultivariateConstantPriorTest.h b/lib/maths/unittest/CMultivariateConstantPriorTest.h index 7e21dcd5ae..cc72b1684e 100644 --- a/lib/maths/unittest/CMultivariateConstantPriorTest.h +++ b/lib/maths/unittest/CMultivariateConstantPriorTest.h @@ -9,19 +9,18 @@ #include -class CMultivariateConstantPriorTest : public CppUnit::TestFixture -{ - public: - void testAddSamples(); - void testMarginalLikelihood(); - void testMarginalLikelihoodMean(); - void testMarginalLikelihoodMode(); - void testMarginalLikelihoodCovariance(); - void testSampleMarginalLikelihood(); - void testProbabilityOfLessLikelySamples(); - void testPersist(); +class CMultivariateConstantPriorTest : public CppUnit::TestFixture { +public: + void testAddSamples(); + void testMarginalLikelihood(); + void testMarginalLikelihoodMean(); + void testMarginalLikelihoodMode(); + void testMarginalLikelihoodCovariance(); + void testSampleMarginalLikelihood(); + void testProbabilityOfLessLikelySamples(); + void testPersist(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CMultivariateConstantPriorTest_h diff --git a/lib/maths/unittest/CMultivariateMultimodalPriorTest.cc b/lib/maths/unittest/CMultivariateMultimodalPriorTest.cc index 233fe7b05b..8a397ea1e8 100644 --- a/lib/maths/unittest/CMultivariateMultimodalPriorTest.cc +++ b/lib/maths/unittest/CMultivariateMultimodalPriorTest.cc @@ -35,8 +35,7 @@ using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumula using TMean2Accumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; using TCovariances2 = maths::CBasicStatistics::SSampleCovariances; -namespace -{ +namespace { const maths_t::TWeightStyleVec COUNT_WEIGHT(1, maths_t::E_SampleCountWeight); const maths_t::TWeightStyleVec VARIANCE_WEIGHT(1, maths_t::E_SampleCountVarianceScaleWeight); @@ -44,56 +43,38 @@ const TDouble10Vec UNIT_WEIGHT_2(2, 1.0); const TDouble10Vec4Vec1Vec SINGLE_UNIT_WEIGHT_2(1, TDouble10Vec4Vec(1, UNIT_WEIGHT_2)); template -class CMultivariateMultimodalPriorForTest : public maths::CMultivariateMultimodalPrior -{ - public: - using TClusterer = typename maths::CMultivariateMultimodalPrior::TClusterer; - using TModeVec = typename maths::CMultivariateMultimodalPrior::TModeVec; - - public: - CMultivariateMultimodalPriorForTest(const maths::CMultivariateMultimodalPrior &prior) : - maths::CMultivariateMultimodalPrior(prior) - {} - - const TModeVec &modes() const - { - return this->maths::CMultivariateMultimodalPrior::modes(); - } +class CMultivariateMultimodalPriorForTest : public maths::CMultivariateMultimodalPrior { +public: + using TClusterer = typename maths::CMultivariateMultimodalPrior::TClusterer; + using TModeVec = typename maths::CMultivariateMultimodalPrior::TModeVec; + +public: + CMultivariateMultimodalPriorForTest(const maths::CMultivariateMultimodalPrior& prior) + : maths::CMultivariateMultimodalPrior(prior) {} + + const TModeVec& modes() const { return this->maths::CMultivariateMultimodalPrior::modes(); } }; template -maths::CMultivariateMultimodalPrior makePrior(maths_t::EDataType dataType, - double decayRate = 0.0) -{ - maths::CXMeansOnline clusterer(dataType, - maths_t::E_ClustersFractionWeight, - decayRate); +maths::CMultivariateMultimodalPrior makePrior(maths_t::EDataType dataType, double decayRate = 0.0) { + maths::CXMeansOnline clusterer(dataType, maths_t::E_ClustersFractionWeight, decayRate); return maths::CMultivariateMultimodalPrior( - dataType, - clusterer, - maths::CMultivariateNormalConjugate::nonInformativePrior(dataType, decayRate), - decayRate); + dataType, clusterer, maths::CMultivariateNormalConjugate::nonInformativePrior(dataType, decayRate), decayRate); } -void gaussianSamples(test::CRandomNumbers &rng, +void gaussianSamples(test::CRandomNumbers& rng, std::size_t modes, - const std::size_t *n, + const std::size_t* n, const double (*means)[2], const double (*covariances)[3], - TDouble10Vec1Vec &samples) -{ - for (std::size_t i = 0u; i < modes; ++i) - { + TDouble10Vec1Vec& samples) { + for (std::size_t i = 0u; i < modes; ++i) { TVector2 mean(means[i], means[i] + 2); TMatrix2 covariance(covariances[i], covariances[i] + 3); TDoubleVecVec samples_; - rng.generateMultivariateNormalSamples(mean.toVector(), - covariance.toVectors(), - n[i], - samples_); + rng.generateMultivariateNormalSamples(mean.toVector(), covariance.toVectors(), n[i], samples_); samples.reserve(samples.size() + samples_.size()); - for (std::size_t j = 0u; j < samples_.size(); ++j) - { + for (std::size_t j = 0u; j < samples_.size(); ++j) { samples.push_back(TDouble10Vec(samples_[j].begin(), samples_[j].end())); } } @@ -101,14 +82,9 @@ void gaussianSamples(test::CRandomNumbers &rng, } template -double logLikelihood(const double w[N], - const double means[N][2], - const double covariances[N][3], - const TDouble10Vec &x) -{ +double logLikelihood(const double w[N], const double means[N][2], const double covariances[N][3], const TDouble10Vec& x) { double lx = 0.0; - for (std::size_t i = 0u; i < N; ++i) - { + for (std::size_t i = 0u; i < N; ++i) { TVector2 mean(means[i]); TMatrix2 covariance(covariances[i], covariances[i] + 3); double ll; @@ -118,42 +94,29 @@ double logLikelihood(const double w[N], return std::log(lx); } -double logLikelihood(const TDoubleVec &w, - const TDoubleVecVec &means, - const TDoubleVecVecVec &covariances, - const TDoubleVec &x) -{ +double logLikelihood(const TDoubleVec& w, const TDoubleVecVec& means, const TDoubleVecVecVec& covariances, const TDoubleVec& x) { double lx = 0.0; - for (std::size_t i = 0u; i < w.size(); ++i) - { + for (std::size_t i = 0u; i < w.size(); ++i) { double ll; - maths::gaussianLogLikelihood(TMatrix2(covariances[i]), - TVector2(x) - TVector2(means[i]), - ll); + maths::gaussianLogLikelihood(TMatrix2(covariances[i]), TVector2(x) - TVector2(means[i]), ll); lx += w[i] * std::exp(ll); } return std::log(lx); } -void empiricalProbabilityOfLessLikelySamples(const TDoubleVec &w, - const TDoubleVecVec &means, - const TDoubleVecVecVec &covariances, - TDoubleVec &result) -{ +void empiricalProbabilityOfLessLikelySamples(const TDoubleVec& w, + const TDoubleVecVec& means, + const TDoubleVecVecVec& covariances, + TDoubleVec& result) { test::CRandomNumbers rng; double m = static_cast(w.size()); - for (std::size_t i = 0u; i < w.size(); ++i) - { + for (std::size_t i = 0u; i < w.size(); ++i) { TDoubleVecVec samples; - rng.generateMultivariateNormalSamples(means[i], - covariances[i], - static_cast(w[i] * 1000.0 * m), - samples); + rng.generateMultivariateNormalSamples(means[i], covariances[i], static_cast(w[i] * 1000.0 * m), samples); result.reserve(samples.size()); - for (std::size_t j = 0u; j < samples.size(); ++j) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { result.push_back(logLikelihood(w, means, covariances, samples[j])); } } @@ -161,22 +124,22 @@ void empiricalProbabilityOfLessLikelySamples(const TDoubleVec &w, std::sort(result.begin(), result.end()); } -std::string print(maths_t::EDataType dataType) -{ - switch (dataType) - { - case maths_t::E_DiscreteData: return "Discrete"; - case maths_t::E_IntegerData: return "Integer"; - case maths_t::E_ContinuousData: return "Continuous"; - case maths_t::E_MixedData: return "Mixed"; +std::string print(maths_t::EDataType dataType) { + switch (dataType) { + case maths_t::E_DiscreteData: + return "Discrete"; + case maths_t::E_IntegerData: + return "Integer"; + case maths_t::E_ContinuousData: + return "Continuous"; + case maths_t::E_MixedData: + return "Mixed"; } return ""; } - } -void CMultivariateMultimodalPriorTest::testMultipleUpdate() -{ +void CMultivariateMultimodalPriorTest::testMultipleUpdate() { LOG_DEBUG("+--------------------------------------------------------+"); LOG_DEBUG("| CMultivariateMultimodalPriorTest::testMultipleUpdate |"); LOG_DEBUG("+--------------------------------------------------------+"); @@ -184,41 +147,30 @@ void CMultivariateMultimodalPriorTest::testMultipleUpdate() // Test that we get the same result updating once with a vector of 100 // samples of an R.V. versus updating individually 100 times. - const std::size_t n[] = { 100 }; - const double means[][2] = { { 10.0, 20.0 } }; - const double covariances[][3] = { { 3.0, 1.0, 2.0 } }; + const std::size_t n[] = {100}; + const double means[][2] = {{10.0, 20.0}}; + const double covariances[][3] = {{3.0, 1.0, 2.0}}; test::CRandomNumbers rng; TDouble10Vec1Vec samples; gaussianSamples(rng, boost::size(n), n, means, covariances, samples); - const maths_t::EDataType dataTypes[] = - { - maths_t::E_IntegerData, - maths_t::E_ContinuousData - }; + const maths_t::EDataType dataTypes[] = {maths_t::E_IntegerData, maths_t::E_ContinuousData}; LOG_DEBUG("****** Test vanilla ******"); - for (size_t i = 0; i < boost::size(dataTypes); ++i) - { + for (size_t i = 0; i < boost::size(dataTypes); ++i) { LOG_DEBUG("*** data type = " << print(dataTypes[i]) << " ***"); maths::CMultivariateMultimodalPrior<2> filter1(makePrior<2>(dataTypes[i])); maths::CMultivariateMultimodalPrior<2> filter2(filter1); maths::CSampling::seed(); - for (std::size_t j = 0; j < samples.size(); ++j) - { - filter1.addSamples(COUNT_WEIGHT, - TDouble10Vec1Vec(1, samples[j]), - SINGLE_UNIT_WEIGHT_2); + for (std::size_t j = 0; j < samples.size(); ++j) { + filter1.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[j]), SINGLE_UNIT_WEIGHT_2); } maths::CSampling::seed(); - filter2.addSamples(COUNT_WEIGHT, - samples, - TDouble10Vec4Vec1Vec(samples.size(), - TDouble10Vec4Vec(1, UNIT_WEIGHT_2))); + filter2.addSamples(COUNT_WEIGHT, samples, TDouble10Vec4Vec1Vec(samples.size(), TDouble10Vec4Vec(1, UNIT_WEIGHT_2))); LOG_DEBUG("checksum 1 " << filter1.checksum()); LOG_DEBUG("checksum 2 " << filter2.checksum()); @@ -226,8 +178,7 @@ void CMultivariateMultimodalPriorTest::testMultipleUpdate() } LOG_DEBUG("****** Test with variance scale ******"); - for (size_t i = 0; i < boost::size(dataTypes); ++i) - { + for (size_t i = 0; i < boost::size(dataTypes); ++i) { LOG_DEBUG("*** data type = " << print(dataTypes[i]) << " ***"); maths::CMultivariateMultimodalPrior<2> filter1(makePrior<2>(dataTypes[i])); @@ -237,8 +188,7 @@ void CMultivariateMultimodalPriorTest::testMultipleUpdate() weights.resize(samples.size() / 2, TDouble10Vec4Vec(1, TDouble10Vec(2, 1.5))); weights.resize(samples.size(), TDouble10Vec4Vec(1, TDouble10Vec(2, 2.0))); maths::CSampling::seed(); - for (std::size_t j = 0u; j < samples.size(); ++j) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { TDouble10Vec1Vec sample(1, samples[j]); TDouble10Vec4Vec1Vec weight(1, weights[j]); filter1.addSamples(VARIANCE_WEIGHT, sample, weight); @@ -252,8 +202,7 @@ void CMultivariateMultimodalPriorTest::testMultipleUpdate() } } -void CMultivariateMultimodalPriorTest::testPropagation() -{ +void CMultivariateMultimodalPriorTest::testPropagation() { LOG_DEBUG("+-----------------------------------------------------+"); LOG_DEBUG("| CMultivariateMultimodalPriorTest::testPropagation |"); LOG_DEBUG("+-----------------------------------------------------+"); @@ -266,17 +215,9 @@ void CMultivariateMultimodalPriorTest::testPropagation() const double eps = 1e-3; - const std::size_t n[] = { 400, 600 }; - const double means[][2] = - { - { 10.0, 10.0 }, - { 20.0, 20.0 } - }; - const double covariances[][3] = - { - { 8.0, 1.0, 8.0 }, - { 20.0, -4.0, 10.0 } - }; + const std::size_t n[] = {400, 600}; + const double means[][2] = {{10.0, 10.0}, {20.0, 20.0}}; + const double covariances[][3] = {{8.0, 1.0, 8.0}, {20.0, -4.0, 10.0}}; test::CRandomNumbers rng; @@ -288,9 +229,7 @@ void CMultivariateMultimodalPriorTest::testPropagation() const double decayRate = 0.1; maths::CMultivariateMultimodalPrior<2> filter(makePrior<2>(maths_t::E_ContinuousData, decayRate)); - filter.addSamples(COUNT_WEIGHT, - samples, - TDouble10Vec4Vec1Vec(samples.size(), TDouble10Vec4Vec(1, UNIT_WEIGHT_2))); + filter.addSamples(COUNT_WEIGHT, samples, TDouble10Vec4Vec1Vec(samples.size(), TDouble10Vec4Vec(1, UNIT_WEIGHT_2))); double numberSamples = filter.numberSamples(); TDouble10Vec mean = filter.marginalLikelihoodMean(); @@ -313,26 +252,22 @@ void CMultivariateMultimodalPriorTest::testPropagation() CPPUNIT_ASSERT((TVector2(propagatedMean) - TVector2(mean)).euclidean() < eps * TVector2(mean).euclidean()); Eigen::MatrixXd c(2, 2); Eigen::MatrixXd cp(2, 2); - for (std::size_t i = 0u; i < 2; ++i) - { - for (std::size_t j = 0u; j < 2; ++j) - { - c(i,j) = covariance[i][j]; - cp(i,j) = propagatedCovariance[i][j]; + for (std::size_t i = 0u; i < 2; ++i) { + for (std::size_t j = 0u; j < 2; ++j) { + c(i, j) = covariance[i][j]; + cp(i, j) = propagatedCovariance[i][j]; } } - Eigen::VectorXd sv = c.jacobiSvd().singularValues(); + Eigen::VectorXd sv = c.jacobiSvd().singularValues(); Eigen::VectorXd svp = cp.jacobiSvd().singularValues(); LOG_DEBUG("singular values = " << sv.transpose()); LOG_DEBUG("propagated singular values = " << svp.transpose()); - for (std::size_t i = 0u; i < 2; ++i) - { + for (std::size_t i = 0u; i < 2; ++i) { CPPUNIT_ASSERT(svp(i) > sv(i)); } } -void CMultivariateMultimodalPriorTest::testSingleMode() -{ +void CMultivariateMultimodalPriorTest::testSingleMode() { LOG_DEBUG("+----------------------------------------------------+"); LOG_DEBUG("| CMultivariateMultimodalPriorTest::testSingleMode |"); LOG_DEBUG("+----------------------------------------------------+"); @@ -341,9 +276,9 @@ void CMultivariateMultimodalPriorTest::testSingleMode() maths::CSampling::seed(); - const std::size_t n[] = { 500 }; - const double means[][2] = { { 20.0, 20.0 } }; - const double covariances[][3] = { { 40.0, 10.0, 20.0 } }; + const std::size_t n[] = {500}; + const double means[][2] = {{20.0, 20.0}}; + const double covariances[][3] = {{40.0, 10.0, 20.0}}; test::CRandomNumbers rng; @@ -352,17 +287,13 @@ void CMultivariateMultimodalPriorTest::testSingleMode() maths::CMultivariateMultimodalPrior<2> filter(makePrior<2>(maths_t::E_ContinuousData)); - for (std::size_t i = 0; i < samples.size(); ++i) - { - filter.addSamples(COUNT_WEIGHT, - TDouble10Vec1Vec(1, samples[i]), - SINGLE_UNIT_WEIGHT_2); + for (std::size_t i = 0; i < samples.size(); ++i) { + filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[i]), SINGLE_UNIT_WEIGHT_2); CPPUNIT_ASSERT_EQUAL(std::size_t(1), filter.numberModes()); } } -void CMultivariateMultimodalPriorTest::testMultipleModes() -{ +void CMultivariateMultimodalPriorTest::testMultipleModes() { LOG_DEBUG("+-------------------------------------------------------+"); LOG_DEBUG("| CMultivariateMultimodalPriorTest::testMultipleModes |"); LOG_DEBUG("+-------------------------------------------------------+"); @@ -382,79 +313,53 @@ void CMultivariateMultimodalPriorTest::testMultipleModes() LOG_DEBUG("Mixture Normals"); { - const std::size_t n[] = { 400, 600 }; - const double means[][2] = - { - { 10.0, 10.0 }, - { 20.0, 20.0 } - }; - const double covariances[][3] = - { - { 4.0, 1.0, 4.0 }, - { 10.0, -4.0, 6.0 } - }; + const std::size_t n[] = {400, 600}; + const double means[][2] = {{10.0, 10.0}, {20.0, 20.0}}; + const double covariances[][3] = {{4.0, 1.0, 4.0}, {10.0, -4.0, 6.0}}; TDouble10Vec1Vec samples; gaussianSamples(rng, boost::size(n), n, means, covariances, samples); - double w[] = - { - n[0] / static_cast(n[0] + n[1]), - n[1] / static_cast(n[0] + n[1]) - }; + double w[] = {n[0] / static_cast(n[0] + n[1]), n[1] / static_cast(n[0] + n[1])}; double loss = 0.0; TMeanAccumulator differentialEntropy_; - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { differentialEntropy_.add(-logLikelihood<2>(w, means, covariances, samples[i])); } double differentialEntropy = maths::CBasicStatistics::mean(differentialEntropy_); - for (std::size_t i = 0; i < 10; ++i) - { + for (std::size_t i = 0; i < 10; ++i) { rng.random_shuffle(samples.begin(), samples.end()); maths::CMultivariateMultimodalPrior<2> filter1(makePrior<2>(maths_t::E_ContinuousData)); maths::CMultivariateNormalConjugate<2> filter2 = - maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData); + maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData); - filter1.addSamples(COUNT_WEIGHT, - samples, - TDouble10Vec4Vec1Vec(samples.size(), TDouble10Vec4Vec(1, UNIT_WEIGHT_2))); - filter2.addSamples(COUNT_WEIGHT, - samples, - TDouble10Vec4Vec1Vec(samples.size(), TDouble10Vec4Vec(1, UNIT_WEIGHT_2))); + filter1.addSamples(COUNT_WEIGHT, samples, TDouble10Vec4Vec1Vec(samples.size(), TDouble10Vec4Vec(1, UNIT_WEIGHT_2))); + filter2.addSamples(COUNT_WEIGHT, samples, TDouble10Vec4Vec1Vec(samples.size(), TDouble10Vec4Vec(1, UNIT_WEIGHT_2))); CPPUNIT_ASSERT_EQUAL(std::size_t(2), filter1.numberModes()); TMeanAccumulator loss1G; TMeanAccumulator loss12; - for (std::size_t j = 0u; j < samples.size(); ++j) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { double ll = logLikelihood<2>(w, means, covariances, samples[j]); TDouble10Vec1Vec sample(1, samples[j]); double l1; CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, - filter1.jointLogMarginalLikelihood(COUNT_WEIGHT, - sample, - SINGLE_UNIT_WEIGHT_2, - l1)); + filter1.jointLogMarginalLikelihood(COUNT_WEIGHT, sample, SINGLE_UNIT_WEIGHT_2, l1)); loss1G.add(ll - l1); double l2; CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, - filter2.jointLogMarginalLikelihood(COUNT_WEIGHT, - sample, - SINGLE_UNIT_WEIGHT_2, - l2)); + filter2.jointLogMarginalLikelihood(COUNT_WEIGHT, sample, SINGLE_UNIT_WEIGHT_2, l2)); loss12.add(l2 - l1); } - LOG_DEBUG("loss1G = " << maths::CBasicStatistics::mean(loss1G) - << ", loss12 = " << maths::CBasicStatistics::mean(loss12) - << ", differential entropy " << differentialEntropy); + LOG_DEBUG("loss1G = " << maths::CBasicStatistics::mean(loss1G) << ", loss12 = " << maths::CBasicStatistics::mean(loss12) + << ", differential entropy " << differentialEntropy); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(loss12) < 0.0); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(loss1G) / differentialEntropy < 0.0); @@ -462,14 +367,12 @@ void CMultivariateMultimodalPriorTest::testMultipleModes() } loss /= 10.0; - LOG_DEBUG("loss = " << loss - << ", differential entropy = " << differentialEntropy); + LOG_DEBUG("loss = " << loss << ", differential entropy = " << differentialEntropy); CPPUNIT_ASSERT(loss / differentialEntropy < 0.0); } } -void CMultivariateMultimodalPriorTest::testSplitAndMerge() -{ +void CMultivariateMultimodalPriorTest::testSplitAndMerge() { LOG_DEBUG("+-------------------------------------------------------+"); LOG_DEBUG("| CMultivariateMultimodalPriorTest::testSplitAndMerge |"); LOG_DEBUG("+-------------------------------------------------------+"); @@ -482,41 +385,20 @@ void CMultivariateMultimodalPriorTest::testSplitAndMerge() test::CRandomNumbers rng; - double means_[][2] = - { - { 10, 15 }, - { 30, 10 }, - { 10, 15 }, - { 30, 10 } - }; - double covariances_[][2][2] = - { - { { 10, 2}, {2, 15} }, - { { 30, 8}, {8, 15} }, - { {100, 2}, {2, 15} }, - { {100, 2}, {2, 15} } - }; + double means_[][2] = {{10, 15}, {30, 10}, {10, 15}, {30, 10}}; + double covariances_[][2][2] = {{{10, 2}, {2, 15}}, {{30, 8}, {8, 15}}, {{100, 2}, {2, 15}}, {{100, 2}, {2, 15}}}; TDoubleVecVec means(boost::size(means_)); TDoubleVecVecVec covariances(boost::size(means_)); - for (std::size_t i = 0u; i < boost::size(means_); ++i) - { + for (std::size_t i = 0u; i < boost::size(means_); ++i) { means[i].assign(&means_[i][0], &means_[i][2]); - for (std::size_t j = 0u; j < 2; ++j) - { - covariances[i].push_back(TDoubleVec(&covariances_[i][j][0], - &covariances_[i][j][2])); + for (std::size_t j = 0u; j < 2; ++j) { + covariances[i].push_back(TDoubleVec(&covariances_[i][j][0], &covariances_[i][j][2])); } } - LOG_DEBUG("Clusters Split and Merge") - { - std::size_t n[][4] = - { - { 200, 0, 0, 0 }, - { 100, 100, 0, 0 }, - { 0, 0, 300, 300 } - }; + LOG_DEBUG("Clusters Split and Merge") { + std::size_t n[][4] = {{200, 0, 0, 0}, {100, 100, 0, 0}, {0, 0, 300, 300}}; TCovariances2 totalCovariances; TCovariances2 modeCovariances[4]; @@ -532,15 +414,12 @@ void CMultivariateMultimodalPriorTest::testSplitAndMerge() //std::size_t subplotCounts[] = { 50, 200, 250, 450, 500, 550, 585, 615, 650, 750, 800, 1000, 10000 }; //TDouble10Vec1Vec pointsToDate; - for (std::size_t i = 0u; i < boost::size(n); ++i) - { + for (std::size_t i = 0u; i < boost::size(n); ++i) { TDoubleVecVec samples; - for (std::size_t j = 0u; j < boost::size(n[i]); ++j) - { + for (std::size_t j = 0u; j < boost::size(n[i]); ++j) { TDoubleVecVec samples_; rng.generateMultivariateNormalSamples(means[j], covariances[j], n[i][j], samples_); - for (std::size_t k = 0u; k < samples_.size(); ++k) - { + for (std::size_t k = 0u; k < samples_.size(); ++k) { modeCovariances[j].add(TVector2(samples_[k])); totalCovariances.add(TVector2(samples_[k])); } @@ -549,11 +428,9 @@ void CMultivariateMultimodalPriorTest::testSplitAndMerge() rng.random_shuffle(samples.begin(), samples.end()); LOG_DEBUG("# samples = " << samples.size()); - for (std::size_t j = 0u; j < samples.size(); ++j) - { - filter.addSamples(COUNT_WEIGHT, - TDouble10Vec1Vec(1, samples[j]), - TDouble10Vec4Vec1Vec(1, TDouble10Vec4Vec(1, UNIT_WEIGHT_2))); + for (std::size_t j = 0u; j < samples.size(); ++j) { + filter.addSamples( + COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[j]), TDouble10Vec4Vec1Vec(1, TDouble10Vec4Vec(1, UNIT_WEIGHT_2))); //pointsToDate.push_back(samples[j]); //if (pointsToDate.size() == subplotCounts[subplot]) @@ -573,33 +450,30 @@ void CMultivariateMultimodalPriorTest::testSplitAndMerge() //} } - const CMultivariateMultimodalPriorForTest<2>::TModeVec &modes = filter.modes(); + const CMultivariateMultimodalPriorForTest<2>::TModeVec& modes = filter.modes(); LOG_DEBUG("# modes = " << modes.size()); LOG_DEBUG("prior = " << filter.print()); - for (std::size_t j = 0u; j < modes.size(); ++j) - { + for (std::size_t j = 0u; j < modes.size(); ++j) { maths::CBasicStatistics::COrderStatisticsStack meanError; maths::CBasicStatistics::COrderStatisticsStack covError; - if (modes.size() == 1) - { - meanError.add(( TVector2(modes[j].s_Prior->marginalLikelihoodMean()) - - maths::CBasicStatistics::mean(totalCovariances)).euclidean()); + if (modes.size() == 1) { + meanError.add((TVector2(modes[j].s_Prior->marginalLikelihoodMean()) - maths::CBasicStatistics::mean(totalCovariances)) + .euclidean()); TMatrix2 mlc(modes[j].s_Prior->marginalLikelihoodCovariance()); TMatrix2 tcm = maths::CBasicStatistics::covariances(totalCovariances); covError.add((mlc - tcm).frobenius() / tcm.frobenius()); - } - else - { - for (std::size_t k = 0u; k < boost::size(modeCovariances); ++k) - { - meanError.add( ( TVector2(modes[j].s_Prior->marginalLikelihoodMean()) - - maths::CBasicStatistics::mean(modeCovariances[k])).euclidean() - / maths::CBasicStatistics::mean(modeCovariances[k]).euclidean()); - covError.add( ( TMatrix2(modes[j].s_Prior->marginalLikelihoodCovariance()) - - maths::CBasicStatistics::covariances(modeCovariances[k])).frobenius() - / maths::CBasicStatistics::covariances(modeCovariances[k]).frobenius()); + } else { + for (std::size_t k = 0u; k < boost::size(modeCovariances); ++k) { + meanError.add( + (TVector2(modes[j].s_Prior->marginalLikelihoodMean()) - maths::CBasicStatistics::mean(modeCovariances[k])) + .euclidean() / + maths::CBasicStatistics::mean(modeCovariances[k]).euclidean()); + covError.add((TMatrix2(modes[j].s_Prior->marginalLikelihoodCovariance()) - + maths::CBasicStatistics::covariances(modeCovariances[k])) + .frobenius() / + maths::CBasicStatistics::covariances(modeCovariances[k]).frobenius()); } } @@ -620,8 +494,7 @@ void CMultivariateMultimodalPriorTest::testSplitAndMerge() } } -void CMultivariateMultimodalPriorTest::testMarginalLikelihood() -{ +void CMultivariateMultimodalPriorTest::testMarginalLikelihood() { LOG_DEBUG("+------------------------------------------------------------+"); LOG_DEBUG("| CMultivariateMultimodalPriorTest::testMarginalLikelihood |"); LOG_DEBUG("+------------------------------------------------------------+"); @@ -636,24 +509,21 @@ void CMultivariateMultimodalPriorTest::testMarginalLikelihood() test::CRandomNumbers rng; - std::size_t sizes_[] = { 200, 150, 300 }; + std::size_t sizes_[] = {200, 150, 300}; TSizeVec sizes(boost::begin(sizes_), boost::end(sizes_)); TMeanAccumulator meanZ; TMeanAccumulator meanMeanError; TMeanAccumulator meanCovarianceError; - for (std::size_t t = 0u; t < 10; /**/) - { + for (std::size_t t = 0u; t < 10; /**/) { TVector2Vec means; TMatrix2Vec covariances; TVector2VecVec samples_; rng.generateRandomMultivariateNormals(sizes, means, covariances, samples_); TDouble10Vec1Vec samples; - for (std::size_t i = 0u; i < samples_.size(); ++i) - { - for (std::size_t j = 0u; j < samples_[i].size(); ++j) - { + for (std::size_t i = 0u; i < samples_.size(); ++i) { + for (std::size_t j = 0u; j < samples_[i].size(); ++j) { samples.push_back(samples_[i][j].toVector()); } } @@ -662,61 +532,55 @@ void CMultivariateMultimodalPriorTest::testMarginalLikelihood() maths::CMultivariateMultimodalPrior<2> filter(makePrior<2>(maths_t::E_ContinuousData)); filter.addSamples(COUNT_WEIGHT, samples, TDouble10Vec4Vec1Vec(samples.size(), TDouble10Vec4Vec(1, UNIT_WEIGHT_2))); LOG_DEBUG("# modes = " << filter.numberModes()); - if (filter.numberModes() != 3) - { + if (filter.numberModes() != 3) { continue; } - LOG_DEBUG("*** Test " << t+1 << " ***"); + LOG_DEBUG("*** Test " << t + 1 << " ***"); ++t; TDouble10Vec m = filter.marginalLikelihoodMean(); TDouble10Vec10Vec v = filter.marginalLikelihoodCovariance(); TVector2 expectedMean(m.begin(), m.end()); - double elements[] = { v[0][0], v[0][1], v[1][1] }; + double elements[] = {v[0][0], v[0][1], v[1][1]}; TMatrix2 expectedCovariance(elements, elements + 3); double z = 0.0; TVector2 actualMean(0.0); TMatrix2 actualCovariance(0.0); - for (std::size_t i = 0u; i < means.size(); ++i) - { + for (std::size_t i = 0u; i < means.size(); ++i) { double trace = covariances[i].trace(); LOG_DEBUG("m = " << means[i]); LOG_DEBUG("v = " << trace); - double intervals[][2] = - { - { means[i](0) - 3.0 * std::sqrt(trace), means[i](1) - 3.0 * std::sqrt(trace) }, - { means[i](0) - 3.0 * std::sqrt(trace), means[i](1) - 1.0 * std::sqrt(trace) }, - { means[i](0) - 3.0 * std::sqrt(trace), means[i](1) + 1.0 * std::sqrt(trace) }, - { means[i](0) - 1.0 * std::sqrt(trace), means[i](1) - 3.0 * std::sqrt(trace) }, - { means[i](0) - 1.0 * std::sqrt(trace), means[i](1) - 1.0 * std::sqrt(trace) }, - { means[i](0) - 1.0 * std::sqrt(trace), means[i](1) + 1.0 * std::sqrt(trace) }, - { means[i](0) + 1.0 * std::sqrt(trace), means[i](1) - 3.0 * std::sqrt(trace) }, - { means[i](0) + 1.0 * std::sqrt(trace), means[i](1) - 1.0 * std::sqrt(trace) }, - { means[i](0) + 1.0 * std::sqrt(trace), means[i](1) + 1.0 * std::sqrt(trace) } - }; + double intervals[][2] = {{means[i](0) - 3.0 * std::sqrt(trace), means[i](1) - 3.0 * std::sqrt(trace)}, + {means[i](0) - 3.0 * std::sqrt(trace), means[i](1) - 1.0 * std::sqrt(trace)}, + {means[i](0) - 3.0 * std::sqrt(trace), means[i](1) + 1.0 * std::sqrt(trace)}, + {means[i](0) - 1.0 * std::sqrt(trace), means[i](1) - 3.0 * std::sqrt(trace)}, + {means[i](0) - 1.0 * std::sqrt(trace), means[i](1) - 1.0 * std::sqrt(trace)}, + {means[i](0) - 1.0 * std::sqrt(trace), means[i](1) + 1.0 * std::sqrt(trace)}, + {means[i](0) + 1.0 * std::sqrt(trace), means[i](1) - 3.0 * std::sqrt(trace)}, + {means[i](0) + 1.0 * std::sqrt(trace), means[i](1) - 1.0 * std::sqrt(trace)}, + {means[i](0) + 1.0 * std::sqrt(trace), means[i](1) + 1.0 * std::sqrt(trace)}}; CUnitKernel<2> likelihoodKernel(filter); CMeanKernel<2> meanKernel(filter); CCovarianceKernel<2> covarianceKernel(filter, expectedMean); - for (std::size_t j = 0u; j < boost::size(intervals); ++j) - { + for (std::size_t j = 0u; j < boost::size(intervals); ++j) { TDoubleVec a(boost::begin(intervals[j]), boost::end(intervals[j])); TDoubleVec b(a); b[0] += 2.0 * std::sqrt(trace); b[1] += 2.0 * std::sqrt(trace); double zj; - maths::CIntegration::sparseGaussLegendre(likelihoodKernel, a, b, zj); + maths::CIntegration::sparseGaussLegendre( + likelihoodKernel, a, b, zj); TVector2 mj; - maths::CIntegration::sparseGaussLegendre(meanKernel, a, b, mj); + maths::CIntegration::sparseGaussLegendre( + meanKernel, a, b, mj); TMatrix2 cj; - maths::CIntegration::sparseGaussLegendre(covarianceKernel, a, b, cj); + maths::CIntegration::sparseGaussLegendre( + covarianceKernel, a, b, cj); z += zj; actualMean += mj; @@ -752,8 +616,7 @@ void CMultivariateMultimodalPriorTest::testMarginalLikelihood() CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanCovarianceError) < 0.04); } -void CMultivariateMultimodalPriorTest::testMarginalLikelihoodMean() -{ +void CMultivariateMultimodalPriorTest::testMarginalLikelihoodMean() { LOG_DEBUG("+----------------------------------------------------------------+"); LOG_DEBUG("| CMultivariateMultimodalPriorTest::testMarginalLikelihoodMean |"); LOG_DEBUG("+----------------------------------------------------------------+"); @@ -765,17 +628,9 @@ void CMultivariateMultimodalPriorTest::testMarginalLikelihoodMean() const double eps = 0.05; - const std::size_t n[] = { 400, 600 }; - const double means[][2] = - { - { 10.0, 10.0 }, - { 20.0, 20.0 } - }; - const double covariances[][3] = - { - { 8.0, 1.0, 8.0 }, - { 20.0, -4.0, 10.0 } - }; + const std::size_t n[] = {400, 600}; + const double means[][2] = {{10.0, 10.0}, {20.0, 20.0}}; + const double covariances[][3] = {{8.0, 1.0, 8.0}, {20.0, -4.0, 10.0}}; test::CRandomNumbers rng; @@ -788,20 +643,17 @@ void CMultivariateMultimodalPriorTest::testMarginalLikelihoodMean() TMean2Accumulator expectedMean; TMeanAccumulator meanError; - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec{samples[i]}, SINGLE_UNIT_WEIGHT_2); expectedMean.add(TVector2(samples[i])); - if (i % 10 == 0) - { + if (i % 10 == 0) { LOG_DEBUG("sample mean = " << maths::CBasicStatistics::mean(expectedMean)); LOG_DEBUG("distribution mean = " << core::CContainerPrinter::print(filter.marginalLikelihoodMean())); } - double error = ( maths::CBasicStatistics::mean(expectedMean) - - TVector2(filter.marginalLikelihoodMean())).euclidean() - / maths::CBasicStatistics::mean(expectedMean).euclidean(); + double error = (maths::CBasicStatistics::mean(expectedMean) - TVector2(filter.marginalLikelihoodMean())).euclidean() / + maths::CBasicStatistics::mean(expectedMean).euclidean(); CPPUNIT_ASSERT(error < eps); meanError.add(error); } @@ -810,8 +662,7 @@ void CMultivariateMultimodalPriorTest::testMarginalLikelihoodMean() CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError) < 0.002); } -void CMultivariateMultimodalPriorTest::testMarginalLikelihoodMode() -{ +void CMultivariateMultimodalPriorTest::testMarginalLikelihoodMode() { LOG_DEBUG("+----------------------------------------------------------------+"); LOG_DEBUG("| CMultivariateMultimodalPriorTest::testMarginalLikelihoodMode |"); LOG_DEBUG("+----------------------------------------------------------------+"); @@ -824,48 +675,37 @@ void CMultivariateMultimodalPriorTest::testMarginalLikelihoodMode() double eps = 1e-6; - std::size_t sizes_[] = { 150, 200, 100 }; + std::size_t sizes_[] = {150, 200, 100}; TSizeVec sizes(boost::begin(sizes_), boost::end(sizes_)); test::CRandomNumbers rng; TMeanAccumulator meanError; - for (std::size_t t = 0u; t < 50; ++t) - { + for (std::size_t t = 0u; t < 50; ++t) { TVector2Vec means; TMatrix2Vec covariances; TVector2VecVec samples_; rng.generateRandomMultivariateNormals(sizes, means, covariances, samples_); TDouble10Vec1Vec samples; - for (std::size_t i = 0u; i < samples_.size(); ++i) - { - for (std::size_t j = 0u; j < samples_[i].size(); ++j) - { + for (std::size_t i = 0u; i < samples_.size(); ++i) { + for (std::size_t j = 0u; j < samples_[i].size(); ++j) { samples.push_back(samples_[i][j].toVector()); } } rng.random_shuffle(samples.begin(), samples.end()); CMultivariateMultimodalPriorForTest<2> filter(makePrior<2>(maths_t::E_ContinuousData)); - filter.addSamples(COUNT_WEIGHT, - samples, - TDouble10Vec4Vec1Vec(samples.size(), SINGLE_UNIT_WEIGHT_2[0])); + filter.addSamples(COUNT_WEIGHT, samples, TDouble10Vec4Vec1Vec(samples.size(), SINGLE_UNIT_WEIGHT_2[0])); TDouble10Vec mode = filter.marginalLikelihoodMode(COUNT_WEIGHT, SINGLE_UNIT_WEIGHT_2[0]); TVector2 expectedMode; TMaxAccumulator maxLikelihood; - for (std::size_t i = 0u; i < filter.modes().size(); ++i) - { - TDouble10Vec mi = (filter.modes())[i].s_Prior->marginalLikelihoodMode(COUNT_WEIGHT, - SINGLE_UNIT_WEIGHT_2[0]); + for (std::size_t i = 0u; i < filter.modes().size(); ++i) { + TDouble10Vec mi = (filter.modes())[i].s_Prior->marginalLikelihoodMode(COUNT_WEIGHT, SINGLE_UNIT_WEIGHT_2[0]); double likelihood; - filter.jointLogMarginalLikelihood(COUNT_WEIGHT, - TDouble10Vec1Vec(1, mi), - SINGLE_UNIT_WEIGHT_2, - likelihood); - if (maxLikelihood.add(likelihood)) - { + filter.jointLogMarginalLikelihood(COUNT_WEIGHT, TDouble10Vec1Vec(1, mi), SINGLE_UNIT_WEIGHT_2, likelihood); + if (maxLikelihood.add(likelihood)) { expectedMode = TVector2(mi); } } @@ -882,8 +722,7 @@ void CMultivariateMultimodalPriorTest::testMarginalLikelihoodMode() CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError) < 0.02); } -void CMultivariateMultimodalPriorTest::testSampleMarginalLikelihood() -{ +void CMultivariateMultimodalPriorTest::testSampleMarginalLikelihood() { LOG_DEBUG("+------------------------------------------------------------------+"); LOG_DEBUG("| CMultivariateMultimodalPriorTest::testSampleMarginalLikelihood |"); LOG_DEBUG("+------------------------------------------------------------------+"); @@ -900,35 +739,22 @@ void CMultivariateMultimodalPriorTest::testSampleMarginalLikelihood() test::CRandomNumbers rng; - const std::size_t n[] = { 400, 600 }; - const double means_[][2] = - { - { 10.0, 10.0 }, - { 20.0, 20.0 } - }; - const double covariances_[][3] = - { - { 8.0, 1.0, 8.0 }, - { 20.0, -4.0, 10.0 } - }; + const std::size_t n[] = {400, 600}; + const double means_[][2] = {{10.0, 10.0}, {20.0, 20.0}}; + const double covariances_[][3] = {{8.0, 1.0, 8.0}, {20.0, -4.0, 10.0}}; TVector2Vec means; TMatrix2Vec covariances; TDouble10Vec1Vec samples; - for (std::size_t i = 0u; i < boost::size(n); ++i) - { + for (std::size_t i = 0u; i < boost::size(n); ++i) { TVector2 mean(means_[i]); TMatrix2 covariance(covariances_[i], covariances_[i] + 3); means.push_back(mean); covariances.push_back(covariance); TDoubleVecVec samples_; - rng.generateMultivariateNormalSamples(mean.toVector(), - covariance.toVectors(), - n[i], - samples_); + rng.generateMultivariateNormalSamples(mean.toVector(), covariance.toVectors(), n[i], samples_); samples.reserve(samples.size() + samples_.size()); - for (std::size_t j = 0u; j < samples_.size(); ++j) - { + for (std::size_t j = 0u; j < samples_.size(); ++j) { samples.push_back(TDouble10Vec(samples_[j].begin(), samples_[j].end())); } } @@ -936,9 +762,7 @@ void CMultivariateMultimodalPriorTest::testSampleMarginalLikelihood() LOG_DEBUG("# samples = " << samples.size()); maths::CMultivariateMultimodalPrior<2> filter(makePrior<2>(maths_t::E_ContinuousData)); - filter.addSamples(COUNT_WEIGHT, - samples, - TDouble10Vec4Vec1Vec(samples.size(), TDouble10Vec4Vec(1, UNIT_WEIGHT_2))); + filter.addSamples(COUNT_WEIGHT, samples, TDouble10Vec4Vec1Vec(samples.size(), TDouble10Vec4Vec(1, UNIT_WEIGHT_2))); TDouble10Vec1Vec sampled; filter.sampleMarginalLikelihood(300, sampled); @@ -947,8 +771,7 @@ void CMultivariateMultimodalPriorTest::testSampleMarginalLikelihood() TDouble10Vec10Vec expectedCovariance_ = filter.marginalLikelihoodCovariance(); TCovariances2 sampledCovariances; - for (std::size_t i = 0u; i < sampled.size(); ++i) - { + for (std::size_t i = 0u; i < sampled.size(); ++i) { sampledCovariances.add(TVector2(sampled[i])); } @@ -964,16 +787,14 @@ void CMultivariateMultimodalPriorTest::testSampleMarginalLikelihood() CPPUNIT_ASSERT((sampledCovariance - expectedCovariance).frobenius() < 5e-3 * expectedCovariance.frobenius()); TCovariances2 modeSampledCovariances[2]; - for (std::size_t i = 0u; i < sampled.size(); ++i) - { + for (std::size_t i = 0u; i < sampled.size(); ++i) { double l1, l2; maths::gaussianLogLikelihood(covariances[0], TVector2(sampled[i]) - means[0], l1); maths::gaussianLogLikelihood(covariances[1], TVector2(sampled[i]) - means[1], l2); modeSampledCovariances[l1 > l2 ? 0 : 1].add(TVector2(sampled[i])); } - for (std::size_t i = 0u; i < 2; ++i) - { + for (std::size_t i = 0u; i < 2; ++i) { TVector2 modeSampledMean = maths::CBasicStatistics::mean(modeSampledCovariances[i]); TMatrix2 modeSampledCovariance = maths::CBasicStatistics::covariances(modeSampledCovariances[i]); LOG_DEBUG("sample mean = " << means[i]); @@ -983,14 +804,13 @@ void CMultivariateMultimodalPriorTest::testSampleMarginalLikelihood() CPPUNIT_ASSERT((modeSampledMean - means[i]).euclidean() < 0.03 * means[i].euclidean()); CPPUNIT_ASSERT((modeSampledCovariance - covariances[i]).frobenius() < 0.2 * covariances[i].frobenius()); } - CPPUNIT_ASSERT_DOUBLES_EQUAL(static_cast(n[0]) / static_cast(n[1]), - maths::CBasicStatistics::count(modeSampledCovariances[0]) - / maths::CBasicStatistics::count(modeSampledCovariances[1]), - 0.02); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + static_cast(n[0]) / static_cast(n[1]), + maths::CBasicStatistics::count(modeSampledCovariances[0]) / maths::CBasicStatistics::count(modeSampledCovariances[1]), + 0.02); } -void CMultivariateMultimodalPriorTest::testProbabilityOfLessLikelySamples() -{ +void CMultivariateMultimodalPriorTest::testProbabilityOfLessLikelySamples() { LOG_DEBUG("+------------------------------------------------------------------------+"); LOG_DEBUG("| CMultivariateMultimodalPriorTest::testProbabilityOfLessLikelySamples |"); LOG_DEBUG("+------------------------------------------------------------------------+"); @@ -1000,48 +820,25 @@ void CMultivariateMultimodalPriorTest::testProbabilityOfLessLikelySamples() maths::CSampling::seed(); - const double w_[][3] = - { - { 0.25, 0.3, 0.45 }, - { 0.1, 0.3, 0.6 } - }; - const double means_[][3][2] = - { - { {10, 10}, { 15, 18}, {10, 60} }, - { { 0, 0}, {-20, -30}, {40, 15} } - }; - const double covariances_[][3][2][2] = - { - {{ {10, 0}, { 0, 10} }, { {10, 9}, { 9, 10} }, { {10, -9}, {-9, 10} }}, - {{ { 5, 0}, { 0, 5} }, { {40, 9}, { 9, 40} }, { {30, -27}, {-27, 30} }} - }; - const double offsets[][2] = - { - { 0.0, 0.0 }, - { 0.0, 6.0 }, - { 4.0, 0.0 }, - { 6.0, 6.0 }, - { 6.0, -6.0 }, - { -8.0, 8.0 }, - { -8.0, -8.0 } - }; + const double w_[][3] = {{0.25, 0.3, 0.45}, {0.1, 0.3, 0.6}}; + const double means_[][3][2] = {{{10, 10}, {15, 18}, {10, 60}}, {{0, 0}, {-20, -30}, {40, 15}}}; + const double covariances_[][3][2][2] = {{{{10, 0}, {0, 10}}, {{10, 9}, {9, 10}}, {{10, -9}, {-9, 10}}}, + {{{5, 0}, {0, 5}}, {{40, 9}, {9, 40}}, {{30, -27}, {-27, 30}}}}; + const double offsets[][2] = {{0.0, 0.0}, {0.0, 6.0}, {4.0, 0.0}, {6.0, 6.0}, {6.0, -6.0}, {-8.0, 8.0}, {-8.0, -8.0}}; test::CRandomNumbers rng; - for (std::size_t i = 0u; i < boost::size(w_); ++i) - { + for (std::size_t i = 0u; i < boost::size(w_); ++i) { std::size_t n = (boost::size(w_[i])); TDoubleVec w(n); TDoubleVecVec means(n); TDoubleVecVecVec covariances(n); - for (std::size_t j = 0u; j < boost::size(w_[i]); ++j) - { + for (std::size_t j = 0u; j < boost::size(w_[i]); ++j) { w[j] = w_[i][j]; means[j].assign(means_[i][j], means_[i][j] + 2); covariances[j].resize(2); - for (std::size_t k = 0u; k < 2; ++k) - { + for (std::size_t k = 0u; k < 2; ++k) { covariances[j][k].assign(covariances_[i][j][k], covariances_[i][j][k] + 2); } } @@ -1049,20 +846,15 @@ void CMultivariateMultimodalPriorTest::testProbabilityOfLessLikelySamples() LOG_DEBUG("covariances = " << core::CContainerPrinter::print(covariances)); TDoubleVecVec samples; - for (std::size_t j = 0u; j < w.size(); ++j) - { + for (std::size_t j = 0u; j < w.size(); ++j) { TDoubleVecVec samples_; - rng.generateMultivariateNormalSamples(means[j], - covariances[j], - static_cast(w[j] * 1000.0), - samples_); + rng.generateMultivariateNormalSamples(means[j], covariances[j], static_cast(w[j] * 1000.0), samples_); samples.insert(samples.end(), samples_.begin(), samples_.end()); } rng.random_shuffle(samples.begin(), samples.end()); CMultivariateMultimodalPriorForTest<2> filter(makePrior<2>(maths_t::E_ContinuousData)); - for (std::size_t k = 0u; k < samples.size(); ++k) - { + for (std::size_t k = 0u; k < samples.size(); ++k) { filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[k]), SINGLE_UNIT_WEIGHT_2); } LOG_DEBUG("# modes = " << filter.numberModes()); @@ -1070,32 +862,25 @@ void CMultivariateMultimodalPriorTest::testProbabilityOfLessLikelySamples() TDoubleVec p; empiricalProbabilityOfLessLikelySamples(w, means, covariances, p); - for (std::size_t j = 0u; j < means.size(); ++j) - { + for (std::size_t j = 0u; j < means.size(); ++j) { TMeanAccumulator meanAbsError; TMeanAccumulator meanRelError; - for (std::size_t k = 0u; k < boost::size(offsets); ++k) - { + for (std::size_t k = 0u; k < boost::size(offsets); ++k) { TVector2 x = TVector2(means[j]) + TVector2(offsets[k]); double ll = logLikelihood(w, means, covariances, x.toVector()); - double px = static_cast(std::lower_bound(p.begin(), p.end(), ll) - p.begin()) - / static_cast(p.size()); + double px = static_cast(std::lower_bound(p.begin(), p.end(), ll) - p.begin()) / static_cast(p.size()); double lb, ub; maths::CMultivariatePrior::TTail10Vec tail; - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - COUNT_WEIGHT, - TDouble10Vec1Vec(1, x.toVector()), - SINGLE_UNIT_WEIGHT_2, - lb, ub, tail); + filter.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, COUNT_WEIGHT, TDouble10Vec1Vec(1, x.toVector()), SINGLE_UNIT_WEIGHT_2, lb, ub, tail); double pa = (lb + ub) / 2.0; LOG_DEBUG(" p(" << x << "), actual = " << pa << ", expected = " << px); meanAbsError.add(std::fabs(px - pa)); - if (px < 1.0 && px > 0.0) - { + if (px < 1.0 && px > 0.0) { meanRelError.add(std::fabs(std::log(px) - std::log(pa)) / std::fabs(std::log(px))); } } @@ -1109,8 +894,7 @@ void CMultivariateMultimodalPriorTest::testProbabilityOfLessLikelySamples() } } -void CMultivariateMultimodalPriorTest::testIntegerData() -{ +void CMultivariateMultimodalPriorTest::testIntegerData() { LOG_DEBUG("+-----------------------------------------------------+"); LOG_DEBUG("| CMultivariateMultimodalPriorTest::testIntegerData |"); LOG_DEBUG("+-----------------------------------------------------+"); @@ -1118,8 +902,7 @@ void CMultivariateMultimodalPriorTest::testIntegerData() // TODO } -void CMultivariateMultimodalPriorTest::testLowVariationData() -{ +void CMultivariateMultimodalPriorTest::testLowVariationData() { LOG_DEBUG("+----------------------------------------------------------+"); LOG_DEBUG("| CMultivariateMultimodalPriorTest::testLowVariationData |"); LOG_DEBUG("+----------------------------------------------------------+"); @@ -1127,8 +910,7 @@ void CMultivariateMultimodalPriorTest::testLowVariationData() // TODO } -void CMultivariateMultimodalPriorTest::testLatLongData() -{ +void CMultivariateMultimodalPriorTest::testLatLongData() { LOG_DEBUG("+-----------------------------------------------------+"); LOG_DEBUG("| CMultivariateMultimodalPriorTest::testLatLongData |"); LOG_DEBUG("+-----------------------------------------------------+"); @@ -1137,29 +919,25 @@ void CMultivariateMultimodalPriorTest::testLatLongData() using TTimeDoubleVecPrVec = std::vector; TTimeDoubleVecPrVec timeseries; - CPPUNIT_ASSERT(test::CTimeSeriesTestData::parse("testfiles/lat_lng.csv", - timeseries, - test::CTimeSeriesTestData::CSV_UNIX_BIVALUED_REGEX)); + CPPUNIT_ASSERT( + test::CTimeSeriesTestData::parse("testfiles/lat_lng.csv", timeseries, test::CTimeSeriesTestData::CSV_UNIX_BIVALUED_REGEX)); CPPUNIT_ASSERT(!timeseries.empty()); - LOG_DEBUG("timeseries = " << core::CContainerPrinter::print(timeseries.begin(), - timeseries.begin() + 10) - << " ..."); + LOG_DEBUG("timeseries = " << core::CContainerPrinter::print(timeseries.begin(), timeseries.begin() + 10) << " ..."); maths_t::EDataType dataType = maths_t::E_ContinuousData; - boost::shared_ptr modePrior = - maths::CMultivariateNormalConjugateFactory::nonInformative(2, dataType, 0.001); + boost::shared_ptr modePrior = maths::CMultivariateNormalConjugateFactory::nonInformative(2, dataType, 0.001); boost::shared_ptr filter = - maths::CMultivariateMultimodalPriorFactory::nonInformative(2, // dimension - dataType, 0.0005, - maths_t::E_ClustersFractionWeight, - 0.02, // minimumClusterFraction - 4, // minimumClusterCount - 0.8, // minimumCategoryCount - *modePrior); - - for (std::size_t i = 0u; i < timeseries.size(); ++i) - { + maths::CMultivariateMultimodalPriorFactory::nonInformative(2, // dimension + dataType, + 0.0005, + maths_t::E_ClustersFractionWeight, + 0.02, // minimumClusterFraction + 4, // minimumClusterCount + 0.8, // minimumCategoryCount + *modePrior); + + for (std::size_t i = 0u; i < timeseries.size(); ++i) { filter->addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, timeseries[i].second), SINGLE_UNIT_WEIGHT_2); filter->propagateForwardsByTime(1.0); } @@ -1186,8 +964,7 @@ void CMultivariateMultimodalPriorTest::testLatLongData() //f << prior->printMarginalLikelihoodFunction(0, 1); } -void CMultivariateMultimodalPriorTest::testPersist() -{ +void CMultivariateMultimodalPriorTest::testPersist() { LOG_DEBUG("+-------------------------------------------------+"); LOG_DEBUG("| CMultivariateMultimodalPriorTest::testPersist |"); LOG_DEBUG("+-------------------------------------------------+"); @@ -1196,17 +973,9 @@ void CMultivariateMultimodalPriorTest::testPersist() maths::CSampling::seed(); - std::size_t n[] = { 100, 100 }; - const double means[][2] = - { - { 10.0, 20.0 }, - { 100.0, 30.0 } - }; - const double covariances[][3] = - { - { 3.0, 1.0, 2.0 }, - { 60.0, 20.0, 70.0 } - }; + std::size_t n[] = {100, 100}; + const double means[][2] = {{10.0, 20.0}, {100.0, 30.0}}; + const double covariances[][3] = {{3.0, 1.0, 2.0}, {60.0, 20.0, 70.0}}; test::CRandomNumbers rng; @@ -1219,11 +988,8 @@ void CMultivariateMultimodalPriorTest::testPersist() maths::CMultivariateMultimodalPrior<2> origFilter(makePrior<2>(dataType)); - for (std::size_t i = 0u; i < samples.size(); ++i) - { - origFilter.addSamples(COUNT_WEIGHT, - TDouble10Vec1Vec(1, samples[i]), - SINGLE_UNIT_WEIGHT_2); + for (std::size_t i = 0u; i < samples.size(); ++i) { + origFilter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[i]), SINGLE_UNIT_WEIGHT_2); } uint64_t checksum = origFilter.checksum(); @@ -1241,14 +1007,14 @@ void CMultivariateMultimodalPriorTest::testPersist() CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - maths::SDistributionRestoreParams params(dataType, decayRate + 0.1, + maths::SDistributionRestoreParams params(dataType, + decayRate + 0.1, maths::MINIMUM_CLUSTER_SPLIT_FRACTION, maths::MINIMUM_CLUSTER_SPLIT_COUNT, maths::MINIMUM_CATEGORY_COUNT); maths::CMultivariateMultimodalPrior<2> restoredFilter(params, traverser); - LOG_DEBUG("orig checksum = " << checksum - << " restored checksum = " << restoredFilter.checksum()); + LOG_DEBUG("orig checksum = " << checksum << " restored checksum = " << restoredFilter.checksum()); CPPUNIT_ASSERT_EQUAL(checksum, restoredFilter.checksum()); // The XML representation of the new filter should be the same as the original @@ -1261,52 +1027,38 @@ void CMultivariateMultimodalPriorTest::testPersist() CPPUNIT_ASSERT_EQUAL(origXml, newXml); } -CppUnit::Test *CMultivariateMultimodalPriorTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CMultivariateMultimodalPriorTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultivariateMultimodalPriorTest::testMultipleUpdate", - &CMultivariateMultimodalPriorTest::testMultipleUpdate) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultivariateMultimodalPriorTest::testPropagation", - &CMultivariateMultimodalPriorTest::testPropagation) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultivariateMultimodalPriorTest::testSingleMode", - &CMultivariateMultimodalPriorTest::testSingleMode) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultivariateMultimodalPriorTest::testMultipleModes", - &CMultivariateMultimodalPriorTest::testMultipleModes) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultivariateMultimodalPriorTest::testSplitAndMerge", - &CMultivariateMultimodalPriorTest::testSplitAndMerge) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultivariateMultimodalPriorTest::testMarginalLikelihood", - &CMultivariateMultimodalPriorTest::testMarginalLikelihood) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultivariateMultimodalPriorTest::testMarginalLikelihoodMean", - &CMultivariateMultimodalPriorTest::testMarginalLikelihoodMean) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultivariateMultimodalPriorTest::testMarginalLikelihoodMode", - &CMultivariateMultimodalPriorTest::testMarginalLikelihoodMode) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultivariateMultimodalPriorTest::testSampleMarginalLikelihood", - &CMultivariateMultimodalPriorTest::testSampleMarginalLikelihood) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultivariateMultimodalPriorTest::testProbabilityOfLessLikelySamples", - &CMultivariateMultimodalPriorTest::testProbabilityOfLessLikelySamples) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultivariateMultimodalPriorTest::testIntegerData", - &CMultivariateMultimodalPriorTest::testIntegerData) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultivariateMultimodalPriorTest::testLowVariationData", - &CMultivariateMultimodalPriorTest::testLowVariationData) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultivariateMultimodalPriorTest::testLatLongData", - &CMultivariateMultimodalPriorTest::testLatLongData) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultivariateMultimodalPriorTest::testPersist", - &CMultivariateMultimodalPriorTest::testPersist) ); +CppUnit::Test* CMultivariateMultimodalPriorTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CMultivariateMultimodalPriorTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CMultivariateMultimodalPriorTest::testMultipleUpdate", + &CMultivariateMultimodalPriorTest::testMultipleUpdate)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMultivariateMultimodalPriorTest::testPropagation", + &CMultivariateMultimodalPriorTest::testPropagation)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMultivariateMultimodalPriorTest::testSingleMode", + &CMultivariateMultimodalPriorTest::testSingleMode)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMultivariateMultimodalPriorTest::testMultipleModes", + &CMultivariateMultimodalPriorTest::testMultipleModes)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMultivariateMultimodalPriorTest::testSplitAndMerge", + &CMultivariateMultimodalPriorTest::testSplitAndMerge)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultivariateMultimodalPriorTest::testMarginalLikelihood", &CMultivariateMultimodalPriorTest::testMarginalLikelihood)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultivariateMultimodalPriorTest::testMarginalLikelihoodMean", &CMultivariateMultimodalPriorTest::testMarginalLikelihoodMean)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultivariateMultimodalPriorTest::testMarginalLikelihoodMode", &CMultivariateMultimodalPriorTest::testMarginalLikelihoodMode)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultivariateMultimodalPriorTest::testSampleMarginalLikelihood", &CMultivariateMultimodalPriorTest::testSampleMarginalLikelihood)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CMultivariateMultimodalPriorTest::testProbabilityOfLessLikelySamples", + &CMultivariateMultimodalPriorTest::testProbabilityOfLessLikelySamples)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMultivariateMultimodalPriorTest::testIntegerData", + &CMultivariateMultimodalPriorTest::testIntegerData)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultivariateMultimodalPriorTest::testLowVariationData", &CMultivariateMultimodalPriorTest::testLowVariationData)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMultivariateMultimodalPriorTest::testLatLongData", + &CMultivariateMultimodalPriorTest::testLatLongData)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMultivariateMultimodalPriorTest::testPersist", + &CMultivariateMultimodalPriorTest::testPersist)); return suiteOfTests; } diff --git a/lib/maths/unittest/CMultivariateMultimodalPriorTest.h b/lib/maths/unittest/CMultivariateMultimodalPriorTest.h index aa257cf9fd..ce96eaccb1 100644 --- a/lib/maths/unittest/CMultivariateMultimodalPriorTest.h +++ b/lib/maths/unittest/CMultivariateMultimodalPriorTest.h @@ -9,25 +9,24 @@ #include -class CMultivariateMultimodalPriorTest : public CppUnit::TestFixture -{ - public: - void testMultipleUpdate(); - void testPropagation(); - void testSingleMode(); - void testMultipleModes(); - void testSplitAndMerge(); - void testMarginalLikelihood(); - void testMarginalLikelihoodMean(); - void testMarginalLikelihoodMode(); - void testSampleMarginalLikelihood(); - void testProbabilityOfLessLikelySamples(); - void testIntegerData(); - void testLowVariationData(); - void testLatLongData(); - void testPersist(); +class CMultivariateMultimodalPriorTest : public CppUnit::TestFixture { +public: + void testMultipleUpdate(); + void testPropagation(); + void testSingleMode(); + void testMultipleModes(); + void testSplitAndMerge(); + void testMarginalLikelihood(); + void testMarginalLikelihoodMean(); + void testMarginalLikelihoodMode(); + void testSampleMarginalLikelihood(); + void testProbabilityOfLessLikelySamples(); + void testIntegerData(); + void testLowVariationData(); + void testLatLongData(); + void testPersist(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CMultivariateMultimodalPriorTest_h diff --git a/lib/maths/unittest/CMultivariateNormalConjugateTest.cc b/lib/maths/unittest/CMultivariateNormalConjugateTest.cc index 604765ba81..0e3a23dc04 100644 --- a/lib/maths/unittest/CMultivariateNormalConjugateTest.cc +++ b/lib/maths/unittest/CMultivariateNormalConjugateTest.cc @@ -28,82 +28,66 @@ using TDoubleDoublePr = std::pair; using TDoubleDoublePrVec = std::vector; using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; -namespace -{ +namespace { const maths_t::TWeightStyleVec COUNT_WEIGHT(1, maths_t::E_SampleCountWeight); const maths_t::TWeightStyleVec VARIANCE_WEIGHT(1, maths_t::E_SampleCountVarianceScaleWeight); const TDouble10Vec4Vec UNIT_WEIGHT_2(1, TDouble10Vec(2, 1.0)); const TDouble10Vec4Vec1Vec SINGLE_UNIT_WEIGHT_2(1, TDouble10Vec4Vec(1, TDouble10Vec(2, 1.0))); -void empiricalProbabilityOfLessLikelySamples(const TDoubleVec &mean, - const TDoubleVecVec &covariance, - TDoubleVec &result) -{ +void empiricalProbabilityOfLessLikelySamples(const TDoubleVec& mean, const TDoubleVecVec& covariance, TDoubleVec& result) { test::CRandomNumbers rng; TDoubleVecVec samples; rng.generateMultivariateNormalSamples(mean, covariance, 1000, samples); result.resize(samples.size()); - for (std::size_t i = 0u; i < samples.size(); ++i) - { - maths::gaussianLogLikelihood(TMatrix2(covariance), - TVector2(samples[i]) - TVector2(mean), - result[i]); + for (std::size_t i = 0u; i < samples.size(); ++i) { + maths::gaussianLogLikelihood(TMatrix2(covariance), TVector2(samples[i]) - TVector2(mean), result[i]); } std::sort(result.begin(), result.end()); } -std::string print(maths_t::EDataType dataType) -{ - switch (dataType) - { - case maths_t::E_DiscreteData: return "Discrete"; - case maths_t::E_IntegerData: return "Integer"; - case maths_t::E_ContinuousData: return "Continuous"; - case maths_t::E_MixedData: return "Mixed"; +std::string print(maths_t::EDataType dataType) { + switch (dataType) { + case maths_t::E_DiscreteData: + return "Discrete"; + case maths_t::E_IntegerData: + return "Integer"; + case maths_t::E_ContinuousData: + return "Continuous"; + case maths_t::E_MixedData: + return "Mixed"; } return ""; } -void gaussianSamples(test::CRandomNumbers &rng, +void gaussianSamples(test::CRandomNumbers& rng, std::size_t n, const double (&means)[2], const double (&covariances)[3], - TDouble10Vec1Vec &samples) -{ + TDouble10Vec1Vec& samples) { TVector2 mean(means, means + 2); TMatrix2 covariance(covariances, covariances + 3); TDoubleVecVec samples_; - rng.generateMultivariateNormalSamples(mean.toVector(), - covariance.toVectors(), - n, - samples_); + rng.generateMultivariateNormalSamples(mean.toVector(), covariance.toVectors(), n, samples_); samples.reserve(samples.size() + samples_.size()); - for (std::size_t j = 0u; j < samples_.size(); ++j) - { + for (std::size_t j = 0u; j < samples_.size(); ++j) { samples.push_back(TDouble10Vec(samples_[j].begin(), samples_[j].end())); } LOG_DEBUG("# samples = " << samples.size()); } - } -void CMultivariateNormalConjugateTest::testMultipleUpdate() -{ +void CMultivariateNormalConjugateTest::testMultipleUpdate() { LOG_DEBUG("+--------------------------------------------------------+"); LOG_DEBUG("| CMultivariateNormalConjugateTest::testMultipleUpdate |"); LOG_DEBUG("+--------------------------------------------------------+"); maths::CSampling::seed(); - const maths_t::EDataType dataTypes[] = - { - maths_t::E_IntegerData, - maths_t::E_ContinuousData - }; + const maths_t::EDataType dataTypes[] = {maths_t::E_IntegerData, maths_t::E_ContinuousData}; - const double mean[] = { 10.0, 20.0 }; - const double covariance[] = { 3.0, 1.0, 2.0 }; + const double mean[] = {10.0, 20.0}; + const double covariance[] = {3.0, 1.0, 2.0}; test::CRandomNumbers rng; @@ -114,16 +98,13 @@ void CMultivariateNormalConjugateTest::testMultipleUpdate() gaussianSamples(rng, 100, mean, covariance, samples); LOG_DEBUG("****** Test vanilla ******"); - for (std::size_t i = 0; i < boost::size(dataTypes); ++i) - { + for (std::size_t i = 0; i < boost::size(dataTypes); ++i) { LOG_DEBUG("*** data type = " << print(dataTypes[i]) << " ***"); - maths::CMultivariateNormalConjugate<2> filter1( - maths::CMultivariateNormalConjugate<2>::nonInformativePrior(dataTypes[i])); + maths::CMultivariateNormalConjugate<2> filter1(maths::CMultivariateNormalConjugate<2>::nonInformativePrior(dataTypes[i])); maths::CMultivariateNormalConjugate<2> filter2(filter1); - for (std::size_t j = 0u; j < samples.size(); ++j) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { filter1.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[j]), SINGLE_UNIT_WEIGHT_2); } TDouble10Vec4Vec1Vec weights(samples.size(), UNIT_WEIGHT_2); @@ -133,20 +114,17 @@ void CMultivariateNormalConjugateTest::testMultipleUpdate() } LOG_DEBUG("****** Test with variance scale ******"); - for (size_t i = 0; i < boost::size(dataTypes); ++i) - { + for (size_t i = 0; i < boost::size(dataTypes); ++i) { LOG_DEBUG("*** data type = " << print(dataTypes[i]) << " ***"); - maths::CMultivariateNormalConjugate<2> filter1( - maths::CMultivariateNormalConjugate<2>::nonInformativePrior(dataTypes[i])); + maths::CMultivariateNormalConjugate<2> filter1(maths::CMultivariateNormalConjugate<2>::nonInformativePrior(dataTypes[i])); maths::CMultivariateNormalConjugate<2> filter2(filter1); TDouble10Vec4Vec1Vec weights; weights.resize(samples.size() / 2, TDouble10Vec4Vec(1, TDouble10Vec(2, 1.5))); weights.resize(samples.size(), TDouble10Vec4Vec(1, TDouble10Vec(2, 2.0))); - for (std::size_t j = 0u; j < samples.size(); ++j) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { TDouble10Vec1Vec sample(1, samples[j]); TDouble10Vec4Vec1Vec weight(1, weights[j]); filter1.addSamples(VARIANCE_WEIGHT, sample, weight); @@ -159,18 +137,15 @@ void CMultivariateNormalConjugateTest::testMultipleUpdate() // Test the count weight is equivalent to adding repeated samples. LOG_DEBUG("****** Test count weight ******"); - for (size_t i = 0; i < boost::size(dataTypes); ++i) - { + for (size_t i = 0; i < boost::size(dataTypes); ++i) { LOG_DEBUG("*** data type = " << print(dataTypes[i]) << " ***"); - maths::CMultivariateNormalConjugate<2> filter1( - maths::CMultivariateNormalConjugate<2>::nonInformativePrior(dataTypes[i])); + maths::CMultivariateNormalConjugate<2> filter1(maths::CMultivariateNormalConjugate<2>::nonInformativePrior(dataTypes[i])); maths::CMultivariateNormalConjugate<2> filter2(filter1); double x = 3.0; std::size_t count = 10; - for (std::size_t j = 0u; j < count; ++j) - { + for (std::size_t j = 0u; j < count; ++j) { filter1.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, TDouble10Vec(2, x)), SINGLE_UNIT_WEIGHT_2); } TDouble10Vec1Vec sample(1, TDouble10Vec(2, x)); @@ -181,8 +156,7 @@ void CMultivariateNormalConjugateTest::testMultipleUpdate() } } -void CMultivariateNormalConjugateTest::testPropagation() -{ +void CMultivariateNormalConjugateTest::testPropagation() { LOG_DEBUG("+-----------------------------------------------------+"); LOG_DEBUG("| CMultivariateNormalConjugateTest::testPropagation |"); LOG_DEBUG("+-----------------------------------------------------+"); @@ -192,28 +166,22 @@ void CMultivariateNormalConjugateTest::testPropagation() maths::CSampling::seed(); - const maths_t::EDataType dataTypes[] = - { - maths_t::E_IntegerData, - maths_t::E_ContinuousData - }; + const maths_t::EDataType dataTypes[] = {maths_t::E_IntegerData, maths_t::E_ContinuousData}; const double eps = 1e-12; - const double mean[] = { 10.0, 20.0 }; - const double covariance[] = { 3.0, 1.0, 2.0 }; + const double mean[] = {10.0, 20.0}; + const double covariance[] = {3.0, 1.0, 2.0}; test::CRandomNumbers rng; TDouble10Vec1Vec samples; gaussianSamples(rng, 100, mean, covariance, samples); - for (std::size_t i = 0u; i < boost::size(dataTypes); ++i) - { + for (std::size_t i = 0u; i < boost::size(dataTypes); ++i) { LOG_DEBUG("*** data type = " << print(dataTypes[i]) << " ***"); - maths::CMultivariateNormalConjugate<2> filter( - maths::CMultivariateNormalConjugate<2>::nonInformativePrior(dataTypes[i], 0.1)); + maths::CMultivariateNormalConjugate<2> filter(maths::CMultivariateNormalConjugate<2>::nonInformativePrior(dataTypes[i], 0.1)); TDouble10Vec4Vec1Vec weights(samples.size(), UNIT_WEIGHT_2); filter.addSamples(COUNT_WEIGHT, samples, weights); @@ -236,8 +204,7 @@ void CMultivariateNormalConjugateTest::testPropagation() } } -void CMultivariateNormalConjugateTest::testMeanVectorEstimation() -{ +void CMultivariateNormalConjugateTest::testMeanVectorEstimation() { LOG_DEBUG("+--------------------------------------------------------------+"); LOG_DEBUG("| CMultivariateNormalConjugateTest::testMeanVectorEstimation |"); LOG_DEBUG("+--------------------------------------------------------------+"); @@ -249,45 +216,37 @@ void CMultivariateNormalConjugateTest::testMeanVectorEstimation() maths::CSampling::seed(); - const double decayRates[] = { 0.0, 0.001, 0.01 }; + const double decayRates[] = {0.0, 0.001, 0.01}; const unsigned int nt = 500u; - const double testIntervals[] = { 50.0, 60.0, 70.0, 80.0, 85.0, 90.0, 95.0, 99.0 }; + const double testIntervals[] = {50.0, 60.0, 70.0, 80.0, 85.0, 90.0, 95.0, 99.0}; test::CRandomNumbers rng; - for (std::size_t i = 0; i < boost::size(decayRates); ++i) - { + for (std::size_t i = 0; i < boost::size(decayRates); ++i) { LOG_DEBUG("decay rate = " << decayRates[i]); - unsigned int errors[][8] = - { - { 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u }, - { 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u } - }; + unsigned int errors[][8] = {{0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u}, {0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u}}; - for (unsigned int t = 0; t < nt; ++t) - { - if ((t % 50) == 0) - { + for (unsigned int t = 0; t < nt; ++t) { + if ((t % 50) == 0) { LOG_DEBUG("test = " << t << " / " << nt); } // Generate the samples. - double mean_[] = { 0.5 * (t + 1.0), t + 1.0 }; - double covariances_[] = { 40.0, 12.0, 20.0 }; + double mean_[] = {0.5 * (t + 1.0), t + 1.0}; + double covariances_[] = {40.0, 12.0, 20.0}; TDoubleVec mean(mean_, mean_ + 2); TDoubleVecVec covariances; - covariances.push_back(TDoubleVec(covariances_, covariances_ + 2)); + covariances.push_back(TDoubleVec(covariances_, covariances_ + 2)); covariances.push_back(TDoubleVec(covariances_ + 1, covariances_ + 3)); TDoubleVecVec samples; rng.generateMultivariateNormalSamples(mean, covariances, 500, samples); // Create the posterior. maths::CMultivariateNormalConjugate<2> filter( - maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData, decayRates[i])); - for (std::size_t j = 0u; j < samples.size(); ++j) - { + maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData, decayRates[i])); + for (std::size_t j = 0u; j < samples.size(); ++j) { filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[j]), SINGLE_UNIT_WEIGHT_2); filter.propagateForwardsByTime(1.0); } @@ -297,49 +256,37 @@ void CMultivariateNormalConjugateTest::testMeanVectorEstimation() TVector2Vec meanSamples; filter.randomSampleMeanPrior(n, meanSamples); TDoubleVecVec componentSamples(2); - for (std::size_t j = 0; j < meanSamples.size(); ++j) - { + for (std::size_t j = 0; j < meanSamples.size(); ++j) { componentSamples[0].push_back(meanSamples[j](0)); componentSamples[1].push_back(meanSamples[j](1)); } std::sort(componentSamples[0].begin(), componentSamples[0].end()); std::sort(componentSamples[1].begin(), componentSamples[1].end()); - for (std::size_t j = 0; j < boost::size(testIntervals); ++j) - { - std::size_t l = static_cast( static_cast(n) - * (0.5 - testIntervals[j] / 200.0)); - std::size_t u = static_cast( static_cast(n) - * (0.5 + testIntervals[j] / 200.0)); - for (std::size_t k = 0u; k < 2; ++k) - { + for (std::size_t j = 0; j < boost::size(testIntervals); ++j) { + std::size_t l = static_cast(static_cast(n) * (0.5 - testIntervals[j] / 200.0)); + std::size_t u = static_cast(static_cast(n) * (0.5 + testIntervals[j] / 200.0)); + for (std::size_t k = 0u; k < 2; ++k) { double a = componentSamples[k][l]; double b = componentSamples[k][u]; - if (mean_[k] < a || mean_[k] > b) - { + if (mean_[k] < a || mean_[k] > b) { ++errors[k][j]; } } } } - for (std::size_t j = 0; j < boost::size(testIntervals); ++j) - { - for (std::size_t k = 0u; k < 2; ++k) - { + for (std::size_t j = 0; j < boost::size(testIntervals); ++j) { + for (std::size_t k = 0u; k < 2; ++k) { double interval = 100.0 * errors[k][j] / static_cast(nt); - LOG_DEBUG("interval = " << interval - << ", expectedInterval = " << (100.0 - testIntervals[j])); + LOG_DEBUG("interval = " << interval << ", expectedInterval = " << (100.0 - testIntervals[j])); // If the decay rate is zero the intervals should be accurate. // Otherwise, they should be an upper bound. - if (decayRates[i] == 0.0) - { + if (decayRates[i] == 0.0) { CPPUNIT_ASSERT_DOUBLES_EQUAL(interval, (100.0 - testIntervals[j]), 5.0); - } - else - { + } else { CPPUNIT_ASSERT(interval <= (100.0 - testIntervals[j]) + 4.0); } } @@ -347,8 +294,7 @@ void CMultivariateNormalConjugateTest::testMeanVectorEstimation() } } -void CMultivariateNormalConjugateTest::testPrecisionMatrixEstimation() -{ +void CMultivariateNormalConjugateTest::testPrecisionMatrixEstimation() { LOG_DEBUG("+-------------------------------------------------------------------+"); LOG_DEBUG("| CMultivariateNormalConjugateTest::testPrecisionMatrixEstimation |"); LOG_DEBUG("+-------------------------------------------------------------------+"); @@ -360,57 +306,43 @@ void CMultivariateNormalConjugateTest::testPrecisionMatrixEstimation() maths::CSampling::seed(); - const double decayRates[] = { 0.0, 0.004, 0.04 }; + const double decayRates[] = {0.0, 0.004, 0.04}; const unsigned int nt = 500u; - const double testIntervals[] = { 50.0, 60.0, 70.0, 80.0, 85.0, 90.0, 95.0, 99.0 }; + const double testIntervals[] = {50.0, 60.0, 70.0, 80.0, 85.0, 90.0, 95.0, 99.0}; test::CRandomNumbers rng; - for (std::size_t i = 0; i < boost::size(decayRates); ++i) - { + for (std::size_t i = 0; i < boost::size(decayRates); ++i) { LOG_DEBUG("decay rate = " << decayRates[i]); - unsigned int errors[][8] = - { - { 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u }, - { 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u }, - { 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u } - }; - std::size_t ij[][2] = { {0,0}, {0,1}, {1,1} }; + unsigned int errors[][8] = {{0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u}, {0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u}, {0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u}}; + std::size_t ij[][2] = {{0, 0}, {0, 1}, {1, 1}}; TDoubleVec covariancesii; rng.generateUniformSamples(10.0, 20.0, 2 * nt, covariancesii); TDoubleVec covariancesij; - rng.generateUniformSamples(-5.0, 5.0, 1 * nt, covariancesij); + rng.generateUniformSamples(-5.0, 5.0, 1 * nt, covariancesij); - for (unsigned int t = 0; t < nt; ++t) - { - if ((t % 50) == 0) - { + for (unsigned int t = 0; t < nt; ++t) { + if ((t % 50) == 0) { LOG_DEBUG("test = " << t << " / " << nt); } // Generate the samples. - double mean_[] = { 10.0, 10.0 }; - double covariances_[] = - { - covariancesii[2 * t], - covariancesij[t], - covariancesii[2 * t + 1] - }; + double mean_[] = {10.0, 10.0}; + double covariances_[] = {covariancesii[2 * t], covariancesij[t], covariancesii[2 * t + 1]}; TDoubleVec mean(mean_, mean_ + 2); TDoubleVecVec covariances; - covariances.push_back(TDoubleVec(covariances_, covariances_ + 2)); + covariances.push_back(TDoubleVec(covariances_, covariances_ + 2)); covariances.push_back(TDoubleVec(covariances_ + 1, covariances_ + 3)); TDoubleVecVec samples; rng.generateMultivariateNormalSamples(mean, covariances, 500, samples); // Create the posterior. maths::CMultivariateNormalConjugate<2> filter( - maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData, decayRates[i])); - for (std::size_t j = 0u; j < samples.size(); ++j) - { + maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData, decayRates[i])); + for (std::size_t j = 0u; j < samples.size(); ++j) { filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[j]), SINGLE_UNIT_WEIGHT_2); filter.propagateForwardsByTime(1.0); } @@ -420,8 +352,7 @@ void CMultivariateNormalConjugateTest::testPrecisionMatrixEstimation() TMatrix2Vec precisionSamples; filter.randomSamplePrecisionMatrixPrior(n, precisionSamples); TDouble10Vec4Vec elementSamples(3); - for (std::size_t j = 0; j < precisionSamples.size(); ++j) - { + for (std::size_t j = 0; j < precisionSamples.size(); ++j) { elementSamples[0].push_back(precisionSamples[j](0, 0)); elementSamples[1].push_back(precisionSamples[j](1, 0)); elementSamples[2].push_back(precisionSamples[j](1, 1)); @@ -433,42 +364,30 @@ void CMultivariateNormalConjugateTest::testPrecisionMatrixEstimation() TMatrix2 covarianceMatrix(covariances_, covariances_ + 3); TMatrix2 precisionMatrix(maths::fromDenseMatrix(maths::toDenseMatrix(covarianceMatrix).inverse())); - for (std::size_t j = 0; j < boost::size(testIntervals); ++j) - { - std::size_t l = static_cast( static_cast(n) - * (0.5 - testIntervals[j] / 200.0)); - std::size_t u = static_cast( static_cast(n) - * (0.5 + testIntervals[j] / 200.0)); - for (std::size_t k = 0u; k < elementSamples.size(); ++k) - { + for (std::size_t j = 0; j < boost::size(testIntervals); ++j) { + std::size_t l = static_cast(static_cast(n) * (0.5 - testIntervals[j] / 200.0)); + std::size_t u = static_cast(static_cast(n) * (0.5 + testIntervals[j] / 200.0)); + for (std::size_t k = 0u; k < elementSamples.size(); ++k) { double a = elementSamples[k][l]; double b = elementSamples[k][u]; - if ( precisionMatrix(ij[k][0], ij[k][1]) < a - || precisionMatrix(ij[k][0], ij[k][1]) > b) - { + if (precisionMatrix(ij[k][0], ij[k][1]) < a || precisionMatrix(ij[k][0], ij[k][1]) > b) { ++errors[k][j]; } } } } - for (std::size_t j = 0; j < boost::size(testIntervals); ++j) - { - for (std::size_t k = 0u; k < boost::size(errors); ++k) - { + for (std::size_t j = 0; j < boost::size(testIntervals); ++j) { + for (std::size_t k = 0u; k < boost::size(errors); ++k) { double interval = 100.0 * errors[k][j] / static_cast(nt); - LOG_DEBUG("interval = " << interval - << ", expectedInterval = " << (100.0 - testIntervals[j])); + LOG_DEBUG("interval = " << interval << ", expectedInterval = " << (100.0 - testIntervals[j])); // If the decay rate is zero the intervals should be accurate. // Otherwise, they should be an upper bound. - if (decayRates[i] == 0.0) - { + if (decayRates[i] == 0.0) { CPPUNIT_ASSERT_DOUBLES_EQUAL(interval, (100.0 - testIntervals[j]), 4.0); - } - else - { + } else { CPPUNIT_ASSERT(interval <= (100.0 - testIntervals[j])); } } @@ -476,8 +395,7 @@ void CMultivariateNormalConjugateTest::testPrecisionMatrixEstimation() } } -void CMultivariateNormalConjugateTest::testMarginalLikelihood() -{ +void CMultivariateNormalConjugateTest::testMarginalLikelihood() { LOG_DEBUG("+------------------------------------------------------------+"); LOG_DEBUG("| CMultivariateNormalConjugateTest::testMarginalLikelihood |"); LOG_DEBUG("+------------------------------------------------------------+"); @@ -495,67 +413,55 @@ void CMultivariateNormalConjugateTest::testMarginalLikelihood() test::CRandomNumbers rng; TDoubleVec meani; - rng.generateUniformSamples( 0.0, 50.0, 2 * nt, meani); + rng.generateUniformSamples(0.0, 50.0, 2 * nt, meani); TDoubleVec covariancesii; - rng.generateUniformSamples( 20.0, 500.0, 2 * nt, covariancesii); + rng.generateUniformSamples(20.0, 500.0, 2 * nt, covariancesii); TDoubleVec covariancesij; - rng.generateUniformSamples(-10.0, 10.0, 1 * nt, covariancesij); + rng.generateUniformSamples(-10.0, 10.0, 1 * nt, covariancesij); - for (std::size_t t = 0u; t < nt; ++t) - { - LOG_DEBUG("*** Test " << t+1 << " ***"); + for (std::size_t t = 0u; t < nt; ++t) { + LOG_DEBUG("*** Test " << t + 1 << " ***"); // Generate the samples. - double mean_[] = { meani[2 * t], meani[2 * t + 1] }; - double covariances_[] = - { - covariancesii[2 * t], - covariancesij[t], - covariancesii[2 * t + 1] - }; + double mean_[] = {meani[2 * t], meani[2 * t + 1]}; + double covariances_[] = {covariancesii[2 * t], covariancesij[t], covariancesii[2 * t + 1]}; TDoubleVec mean(mean_, mean_ + 2); TDoubleVecVec covariances; - covariances.push_back(TDoubleVec(covariances_, covariances_ + 2)); + covariances.push_back(TDoubleVec(covariances_, covariances_ + 2)); covariances.push_back(TDoubleVec(covariances_ + 1, covariances_ + 3)); TDoubleVecVec samples; maths::CSampling::multivariateNormalSample(mean, covariances, 20, samples); maths::CMultivariateNormalConjugate<2> filter( - maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData)); + maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData)); TMeanAccumulator meanMeanError; TMeanAccumulator meanCovarianceError; - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[i]), SINGLE_UNIT_WEIGHT_2); - if (!filter.isNonInformative()) - { + if (!filter.isNonInformative()) { TDouble10Vec m = filter.marginalLikelihoodMean(); TDouble10Vec10Vec v = filter.marginalLikelihoodCovariance(); LOG_DEBUG("m = " << core::CContainerPrinter::print(m)); LOG_DEBUG("v = " << core::CContainerPrinter::print(v)); double trace = 0.0; - for (std::size_t j = 0u; j < v.size(); ++j) - { + for (std::size_t j = 0u; j < v.size(); ++j) { trace += v[j][j]; } - double intervals[][2] = - { - { m[0] - 3.0 * std::sqrt(trace), m[1] - 3.0 * std::sqrt(trace) }, - { m[0] - 3.0 * std::sqrt(trace), m[1] - 1.0 * std::sqrt(trace) }, - { m[0] - 3.0 * std::sqrt(trace), m[1] + 1.0 * std::sqrt(trace) }, - { m[0] - 1.0 * std::sqrt(trace), m[1] - 3.0 * std::sqrt(trace) }, - { m[0] - 1.0 * std::sqrt(trace), m[1] - 1.0 * std::sqrt(trace) }, - { m[0] - 1.0 * std::sqrt(trace), m[1] + 1.0 * std::sqrt(trace) }, - { m[0] + 1.0 * std::sqrt(trace), m[1] - 3.0 * std::sqrt(trace) }, - { m[0] + 1.0 * std::sqrt(trace), m[1] - 1.0 * std::sqrt(trace) }, - { m[0] + 1.0 * std::sqrt(trace), m[1] + 1.0 * std::sqrt(trace) } - }; + double intervals[][2] = {{m[0] - 3.0 * std::sqrt(trace), m[1] - 3.0 * std::sqrt(trace)}, + {m[0] - 3.0 * std::sqrt(trace), m[1] - 1.0 * std::sqrt(trace)}, + {m[0] - 3.0 * std::sqrt(trace), m[1] + 1.0 * std::sqrt(trace)}, + {m[0] - 1.0 * std::sqrt(trace), m[1] - 3.0 * std::sqrt(trace)}, + {m[0] - 1.0 * std::sqrt(trace), m[1] - 1.0 * std::sqrt(trace)}, + {m[0] - 1.0 * std::sqrt(trace), m[1] + 1.0 * std::sqrt(trace)}, + {m[0] + 1.0 * std::sqrt(trace), m[1] - 3.0 * std::sqrt(trace)}, + {m[0] + 1.0 * std::sqrt(trace), m[1] - 1.0 * std::sqrt(trace)}, + {m[0] + 1.0 * std::sqrt(trace), m[1] + 1.0 * std::sqrt(trace)}}; TVector2 expectedMean(m.begin(), m.end()); - double elements[] = { v[0][0], v[0][1], v[1][1] }; + double elements[] = {v[0][0], v[0][1], v[1][1]}; TMatrix2 expectedCovariance(elements, elements + 3); CUnitKernel<2> likelihoodKernel(filter); @@ -565,22 +471,21 @@ void CMultivariateNormalConjugateTest::testMarginalLikelihood() double z = 0.0; TVector2 actualMean(0.0); TMatrix2 actualCovariance(0.0); - for (std::size_t j = 0u; j < boost::size(intervals); ++j) - { + for (std::size_t j = 0u; j < boost::size(intervals); ++j) { TDoubleVec a(boost::begin(intervals[j]), boost::end(intervals[j])); TDoubleVec b(a); b[0] += 2.0 * std::sqrt(trace); b[1] += 2.0 * std::sqrt(trace); double zj; - maths::CIntegration::sparseGaussLegendre(likelihoodKernel, a, b, zj); + maths::CIntegration::sparseGaussLegendre( + likelihoodKernel, a, b, zj); TVector2 mj; - maths::CIntegration::sparseGaussLegendre(meanKernel, a, b, mj); + maths::CIntegration::sparseGaussLegendre( + meanKernel, a, b, mj); TMatrix2 cj; - maths::CIntegration::sparseGaussLegendre(covarianceKernel, a, b, cj); + maths::CIntegration::sparseGaussLegendre( + covarianceKernel, a, b, cj); z += zj; actualMean += mj; @@ -608,8 +513,7 @@ void CMultivariateNormalConjugateTest::testMarginalLikelihood() } } -void CMultivariateNormalConjugateTest::testMarginalLikelihoodMode() -{ +void CMultivariateNormalConjugateTest::testMarginalLikelihoodMode() { LOG_DEBUG("+----------------------------------------------------------------+"); LOG_DEBUG("| CMultivariateNormalConjugateTest::testMarginalLikelihoodMode |"); LOG_DEBUG("+----------------------------------------------------------------+"); @@ -617,18 +521,16 @@ void CMultivariateNormalConjugateTest::testMarginalLikelihoodMode() // Test that the marginal likelihood mode is at a stationary maximum // of the likelihood function. - const double mean[] = { 10.0, 20.0 }; - const double covariance[] = { 3.0, 1.0, 2.0 }; + const double mean[] = {10.0, 20.0}; + const double covariance[] = {3.0, 1.0, 2.0}; test::CRandomNumbers rng; TDouble10Vec1Vec samples; gaussianSamples(rng, 100, mean, covariance, samples); - maths::CMultivariateNormalConjugate<2> filter( - maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData)); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + maths::CMultivariateNormalConjugate<2> filter(maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData)); + for (std::size_t i = 0u; i < samples.size(); ++i) { filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[i]), SINGLE_UNIT_WEIGHT_2); } LOG_DEBUG("prior = " << filter.print()); @@ -637,16 +539,14 @@ void CMultivariateNormalConjugateTest::testMarginalLikelihoodMode() TDoubleVec epsilons; rng.generateUniformSamples(-0.01, 0.01, 10, epsilons); - for (std::size_t i = 0u; i < epsilons.size(); i += 2) - { + for (std::size_t i = 0u; i < epsilons.size(); i += 2) { TDouble10Vec1Vec modeMinusEps(1, TDouble10Vec(2)); TDouble10Vec1Vec modePlusEps(1, TDouble10Vec(2)); double norm = 0.0; - for (std::size_t j = 0u; j < 2; ++j) - { + for (std::size_t j = 0u; j < 2; ++j) { double eps = epsilons[i + j]; modeMinusEps[0][j] = mode[j] - eps; - modePlusEps[0][j] = mode[j] + eps; + modePlusEps[0][j] = mode[j] + eps; norm += eps * eps; } LOG_DEBUG("mode - eps = " << core::CContainerPrinter::print(modeMinusEps)); @@ -664,8 +564,7 @@ void CMultivariateNormalConjugateTest::testMarginalLikelihoodMode() } } -void CMultivariateNormalConjugateTest::testSampleMarginalLikelihood() -{ +void CMultivariateNormalConjugateTest::testSampleMarginalLikelihood() { LOG_DEBUG("+------------------------------------------------------------------+"); LOG_DEBUG("| CMultivariateNormalConjugateTest::testSampleMarginalLikelihood |"); LOG_DEBUG("+------------------------------------------------------------------+"); @@ -678,8 +577,8 @@ void CMultivariateNormalConjugateTest::testSampleMarginalLikelihood() test::CRandomNumbers rng; - const double mean_[] = { 50.0, 20.0 }; - const double covariance_[] = { 8.0, 3.0, 5.0 }; + const double mean_[] = {50.0, 20.0}; + const double covariance_[] = {8.0, 3.0, 5.0}; TVector2 mean(mean_); TMatrix2 covariance(covariance_, covariance_ + 3); @@ -687,25 +586,19 @@ void CMultivariateNormalConjugateTest::testSampleMarginalLikelihood() TDouble10Vec1Vec samples; gaussianSamples(rng, 50, mean_, covariance_, samples); - maths::CMultivariateNormalConjugate<2> filter( - maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData)); + maths::CMultivariateNormalConjugate<2> filter(maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData)); std::size_t i = 0u; - for (/**/; i < samples.size(); ++i) - { - if (!filter.isNonInformative()) - { + for (/**/; i < samples.size(); ++i) { + if (!filter.isNonInformative()) { break; } TDouble10Vec1Vec resamples; filter.sampleMarginalLikelihood(40, resamples); - if (filter.numberSamples() == 0) - { + if (filter.numberSamples() == 0) { CPPUNIT_ASSERT(resamples.empty()); - } - else - { + } else { CPPUNIT_ASSERT(resamples.size() == 1); CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(filter.marginalLikelihoodMean()), core::CContainerPrinter::print(resamples[0])); @@ -715,15 +608,12 @@ void CMultivariateNormalConjugateTest::testSampleMarginalLikelihood() } TDoubleVec p; - empiricalProbabilityOfLessLikelySamples(mean.toVector(), - covariance.toVectors(), - p); + empiricalProbabilityOfLessLikelySamples(mean.toVector(), covariance.toVectors(), p); TMeanAccumulator pAbsError; TMeanAccumulator pRelError; - for (/**/; i < samples.size(); ++i) - { + for (/**/; i < samples.size(); ++i) { maths::CBasicStatistics::SSampleCovariances covariances; TVector2 likelihoodMean(filter.marginalLikelihoodMean()); @@ -731,13 +621,12 @@ void CMultivariateNormalConjugateTest::testSampleMarginalLikelihood() TDouble10Vec1Vec resamples; filter.sampleMarginalLikelihood(40, resamples); - for (std::size_t j = 0u; j < resamples.size(); ++j) - { + for (std::size_t j = 0u; j < resamples.size(); ++j) { covariances.add(TVector2(resamples[j])); } TVector2 sampleMean = maths::CBasicStatistics::mean(covariances); - TMatrix2 sampleCov = maths::CBasicStatistics::covariances(covariances); + TMatrix2 sampleCov = maths::CBasicStatistics::covariances(covariances); LOG_DEBUG("likelihood mean = " << likelihoodMean); LOG_DEBUG("sample mean = " << sampleMean); @@ -748,25 +637,17 @@ void CMultivariateNormalConjugateTest::testSampleMarginalLikelihood() CPPUNIT_ASSERT((sampleCov - likelihoodCov).frobenius() / likelihoodCov.frobenius() < 0.01); TDoubleVec sampleProbabilities; - for (std::size_t j = 0u; j < resamples.size(); ++j) - { + for (std::size_t j = 0u; j < resamples.size(); ++j) { double ll; - filter.jointLogMarginalLikelihood(COUNT_WEIGHT, - TDouble10Vec1Vec(1, resamples[j]), - SINGLE_UNIT_WEIGHT_2, - ll); - sampleProbabilities.push_back( static_cast(std::lower_bound(p.begin(), - p.end(), - ll) - p.begin()) - / static_cast(p.size())); + filter.jointLogMarginalLikelihood(COUNT_WEIGHT, TDouble10Vec1Vec(1, resamples[j]), SINGLE_UNIT_WEIGHT_2, ll); + sampleProbabilities.push_back(static_cast(std::lower_bound(p.begin(), p.end(), ll) - p.begin()) / + static_cast(p.size())); } std::sort(sampleProbabilities.begin(), sampleProbabilities.end()); LOG_DEBUG("sample p = " << core::CContainerPrinter::print(sampleProbabilities)); - for (std::size_t j = 0u; j < sampleProbabilities.size(); ++j) - { - double expectedProbability = static_cast(j + 1) - / static_cast(sampleProbabilities.size()); + for (std::size_t j = 0u; j < sampleProbabilities.size(); ++j) { + double expectedProbability = static_cast(j + 1) / static_cast(sampleProbabilities.size()); double error = std::fabs(sampleProbabilities[j] - expectedProbability); pAbsError.add(error); pRelError.add(error / expectedProbability); @@ -780,8 +661,7 @@ void CMultivariateNormalConjugateTest::testSampleMarginalLikelihood() CPPUNIT_ASSERT(maths::CBasicStatistics::mean(pRelError) < 0.3); } -void CMultivariateNormalConjugateTest::testProbabilityOfLessLikelySamples() -{ +void CMultivariateNormalConjugateTest::testProbabilityOfLessLikelySamples() { LOG_DEBUG("+------------------------------------------------------------------------+"); LOG_DEBUG("| CMultivariateNormalConjugateTest::testProbabilityOfLessLikelySamples |"); LOG_DEBUG("+------------------------------------------------------------------------+"); @@ -791,40 +671,23 @@ void CMultivariateNormalConjugateTest::testProbabilityOfLessLikelySamples() maths::CSampling::seed(); - const double means[][2] = - { - { -10.0, -100.0 }, - { 0.0, 0.0 }, - { 100.0, 50.0 }, - }; - const double covariances[][3] = - { - { 10.0, 0.0, 10.0 }, - { 10.0, 9.0, 10.0 }, - { 10.0, -9.0, 10.0 } - }; - const double offsets[][2] = - { - { 0.0, 0.0 }, - { 0.0, 6.0 }, - { 4.0, 0.0 }, - { 6.0, 6.0 }, - { 6.0, -6.0 }, - { -8.0, 8.0 }, - { -8.0, -8.0 } - }; + const double means[][2] = { + {-10.0, -100.0}, + {0.0, 0.0}, + {100.0, 50.0}, + }; + const double covariances[][3] = {{10.0, 0.0, 10.0}, {10.0, 9.0, 10.0}, {10.0, -9.0, 10.0}}; + const double offsets[][2] = {{0.0, 0.0}, {0.0, 6.0}, {4.0, 0.0}, {6.0, 6.0}, {6.0, -6.0}, {-8.0, 8.0}, {-8.0, -8.0}}; test::CRandomNumbers rng; - for (std::size_t i = 0u; i < boost::size(means); ++i) - { + for (std::size_t i = 0u; i < boost::size(means); ++i) { TDoubleVec mean(means[i], means[i] + 2); LOG_DEBUG("mean = " << core::CContainerPrinter::print(mean)); - for (std::size_t j = 0u; j < boost::size(covariances); ++j) - { + for (std::size_t j = 0u; j < boost::size(covariances); ++j) { TDoubleVecVec covariance; - covariance.push_back(TDoubleVec(covariances[j], covariances[j] + 2)); + covariance.push_back(TDoubleVec(covariances[j], covariances[j] + 2)); covariance.push_back(TDoubleVec(covariances[j] + 1, covariances[j] + 3)); LOG_DEBUG("covariances = " << core::CContainerPrinter::print(covariance)); @@ -832,9 +695,8 @@ void CMultivariateNormalConjugateTest::testProbabilityOfLessLikelySamples() rng.generateMultivariateNormalSamples(mean, covariance, 500, samples); maths::CMultivariateNormalConjugate<2> filter( - maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData)); - for (std::size_t k = 0u; k < samples.size(); ++k) - { + maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData)); + for (std::size_t k = 0u; k < samples.size(); ++k) { filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[k]), SINGLE_UNIT_WEIGHT_2); } @@ -844,29 +706,22 @@ void CMultivariateNormalConjugateTest::testProbabilityOfLessLikelySamples() TMeanAccumulator meanAbsError; TMeanAccumulator meanRelError; - for (std::size_t k = 0u; k < boost::size(offsets); ++k) - { + for (std::size_t k = 0u; k < boost::size(offsets); ++k) { TVector2 x = TVector2(mean) + TVector2(offsets[k]); double ll; maths::gaussianLogLikelihood(TMatrix2(covariance), TVector2(offsets[k]), ll); - double px = static_cast(std::lower_bound(p.begin(), p.end(), ll) - p.begin()) - / static_cast(p.size()); - + double px = static_cast(std::lower_bound(p.begin(), p.end(), ll) - p.begin()) / static_cast(p.size()); double lb, ub; maths::CMultivariatePrior::TTail10Vec tail; - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - COUNT_WEIGHT, - TDouble10Vec1Vec(1, x.toVector()), - SINGLE_UNIT_WEIGHT_2, - lb, ub, tail); + filter.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, COUNT_WEIGHT, TDouble10Vec1Vec(1, x.toVector()), SINGLE_UNIT_WEIGHT_2, lb, ub, tail); double pa = (lb + ub) / 2.0; LOG_DEBUG(" p(" << x << "), actual = " << pa << ", expected = " << px); meanAbsError.add(std::fabs(px - pa)); - if (px < 1.0 && px > 0.0) - { + if (px < 1.0 && px > 0.0) { meanRelError.add(std::fabs(std::log(px) - std::log(pa)) / std::fabs(std::log(px))); } } @@ -880,8 +735,7 @@ void CMultivariateNormalConjugateTest::testProbabilityOfLessLikelySamples() } } -void CMultivariateNormalConjugateTest::testIntegerData() -{ +void CMultivariateNormalConjugateTest::testIntegerData() { LOG_DEBUG("+-----------------------------------------------------+"); LOG_DEBUG("| CMultivariateNormalConjugateTest::testIntegerData |"); LOG_DEBUG("+-----------------------------------------------------+"); @@ -891,49 +745,37 @@ void CMultivariateNormalConjugateTest::testIntegerData() // The idea of this test is to check that the inferred model agrees in the // limit (large n) with the model inferred from such data. - const double means[][2] = - { - { -10.0, -100.0 }, - { 0.0, 0.0 }, - { 100.0, 50.0 }, - }; - const double covariances[][3] = - { - { 10.0, 0.0, 10.0 }, - { 10.0, 9.0, 10.0 } - }; + const double means[][2] = { + {-10.0, -100.0}, + {0.0, 0.0}, + {100.0, 50.0}, + }; + const double covariances[][3] = {{10.0, 0.0, 10.0}, {10.0, 9.0, 10.0}}; const std::size_t n = 10000u; test::CRandomNumbers rng; - for (std::size_t i = 0u; i < boost::size(means); ++i) - { + for (std::size_t i = 0u; i < boost::size(means); ++i) { TVector2 mean(means[i], means[i] + 2); - for (std::size_t j = 0u; j < boost::size(covariances); ++j) - { + for (std::size_t j = 0u; j < boost::size(covariances); ++j) { TMatrix2 covariance(covariances[j], covariances[j] + 3); TDoubleVecVec samples; - rng.generateMultivariateNormalSamples(mean.toVector(), - covariance.toVectors(), - n, - samples); + rng.generateMultivariateNormalSamples(mean.toVector(), covariance.toVectors(), n, samples); TDoubleVecVec uniform; TDoubleVec uniform_; rng.generateUniformSamples(0.0, 1.0, 2 * n, uniform_); - for (std::size_t k = 0u; k < uniform_.size(); k += 2) - { + for (std::size_t k = 0u; k < uniform_.size(); k += 2) { uniform.push_back(TDoubleVec(&uniform_[k], &uniform_[k] + 2)); } maths::CMultivariateNormalConjugate<2> filter1( - maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_IntegerData)); + maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_IntegerData)); maths::CMultivariateNormalConjugate<2> filter2( - maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData)); + maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData)); - for (std::size_t k = 0u; k < n; ++k) - { + for (std::size_t k = 0u; k < n; ++k) { TVector2 x(samples[k]); TDouble10Vec1Vec sample(1, x.toVector()); filter1.addSamples(COUNT_WEIGHT, sample, SINGLE_UNIT_WEIGHT_2); @@ -941,15 +783,12 @@ void CMultivariateNormalConjugateTest::testIntegerData() filter2.addSamples(COUNT_WEIGHT, sample, SINGLE_UNIT_WEIGHT_2); } - CPPUNIT_ASSERT(filter1.equalTolerance(filter2, - maths::CToleranceTypes::E_RelativeTolerance - | maths::CToleranceTypes::E_AbsoluteTolerance, - 0.005)); + CPPUNIT_ASSERT(filter1.equalTolerance( + filter2, maths::CToleranceTypes::E_RelativeTolerance | maths::CToleranceTypes::E_AbsoluteTolerance, 0.005)); TMeanAccumulator meanLogLikelihood1; TMeanAccumulator meanLogLikelihood2; - for (std::size_t k = 0u; k < n; ++k) - { + for (std::size_t k = 0u; k < n; ++k) { TVector2 x(samples[k]); TDouble10Vec1Vec sample(1, x.toVector()); @@ -964,65 +803,53 @@ void CMultivariateNormalConjugateTest::testIntegerData() } LOG_DEBUG("meanLogLikelihood1 = " << maths::CBasicStatistics::mean(meanLogLikelihood1) - << ", meanLogLikelihood2 = " << maths::CBasicStatistics::mean(meanLogLikelihood2)); + << ", meanLogLikelihood2 = " << maths::CBasicStatistics::mean(meanLogLikelihood2)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(meanLogLikelihood1), - maths::CBasicStatistics::mean(meanLogLikelihood2), - 0.03); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + maths::CBasicStatistics::mean(meanLogLikelihood1), maths::CBasicStatistics::mean(meanLogLikelihood2), 0.03); } } } -void CMultivariateNormalConjugateTest::testLowVariationData() -{ +void CMultivariateNormalConjugateTest::testLowVariationData() { LOG_DEBUG("+----------------------------------------------------------+"); LOG_DEBUG("| CMultivariateNormalConjugateTest::testLowVariationData |"); LOG_DEBUG("+----------------------------------------------------------+"); { - maths::CMultivariateNormalConjugate<2> filter( - maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_IntegerData)); - for (std::size_t i = 0u; i < 100; ++i) - { - filter.addSamples(COUNT_WEIGHT, - TDouble10Vec1Vec(1, TDouble10Vec(2, 430.0)), - SINGLE_UNIT_WEIGHT_2); + maths::CMultivariateNormalConjugate<2> filter(maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_IntegerData)); + for (std::size_t i = 0u; i < 100; ++i) { + filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, TDouble10Vec(2, 430.0)), SINGLE_UNIT_WEIGHT_2); } TDouble10Vec10Vec covariances = filter.marginalLikelihoodCovariance(); - LOG_DEBUG("covariance matrix " - << core::CContainerPrinter::print(covariances)); + LOG_DEBUG("covariance matrix " << core::CContainerPrinter::print(covariances)); CPPUNIT_ASSERT_DOUBLES_EQUAL(12.0, 2.0 / (covariances[0][0] + covariances[1][1]), 0.3); } { maths::CMultivariateNormalConjugate<2> filter( - maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData)); - for (std::size_t i = 0u; i < 100; ++i) - { - filter.addSamples(COUNT_WEIGHT, - TDouble10Vec1Vec(1, TDouble10Vec(2, 430.0)), - SINGLE_UNIT_WEIGHT_2); + maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData)); + for (std::size_t i = 0u; i < 100; ++i) { + filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, TDouble10Vec(2, 430.0)), SINGLE_UNIT_WEIGHT_2); } TDouble10Vec10Vec covariances = filter.marginalLikelihoodCovariance(); - LOG_DEBUG("covariance matrix " - << core::CContainerPrinter::print(covariances)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0 / maths::MINIMUM_COEFFICIENT_OF_VARIATION - / std::sqrt(2.0) / 430.5, - std::sqrt(2.0 / (covariances[0][0] + covariances[1][1])), 0.4); + LOG_DEBUG("covariance matrix " << core::CContainerPrinter::print(covariances)); + CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0 / maths::MINIMUM_COEFFICIENT_OF_VARIATION / std::sqrt(2.0) / 430.5, + std::sqrt(2.0 / (covariances[0][0] + covariances[1][1])), + 0.4); } } -void CMultivariateNormalConjugateTest::testPersist() -{ +void CMultivariateNormalConjugateTest::testPersist() { LOG_DEBUG("+-------------------------------------------------+"); LOG_DEBUG("| CMultivariateNormalConjugateTest::testPersist |"); LOG_DEBUG("+-------------------------------------------------+"); // Check that persist/restore is idempotent. - const double mean[] = { 10.0, 20.0 }; - const double covariance[] = { 3.0, 1.0, 2.0 }; + const double mean[] = {10.0, 20.0}; + const double covariance[] = {3.0, 1.0, 2.0}; test::CRandomNumbers rng; @@ -1031,10 +858,8 @@ void CMultivariateNormalConjugateTest::testPersist() maths_t::EDataType dataType = maths_t::E_ContinuousData; - maths::CMultivariateNormalConjugate<2> origFilter( - maths::CMultivariateNormalConjugate<2>::nonInformativePrior(dataType)); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + maths::CMultivariateNormalConjugate<2> origFilter(maths::CMultivariateNormalConjugate<2>::nonInformativePrior(dataType)); + for (std::size_t i = 0u; i < samples.size(); ++i) { origFilter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[i]), SINGLE_UNIT_WEIGHT_2); } double decayRate = origFilter.decayRate(); @@ -1054,14 +879,14 @@ void CMultivariateNormalConjugateTest::testPersist() CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - maths::SDistributionRestoreParams params(dataType, decayRate + 0.1, + maths::SDistributionRestoreParams params(dataType, + decayRate + 0.1, maths::MINIMUM_CLUSTER_SPLIT_FRACTION, maths::MINIMUM_CLUSTER_SPLIT_COUNT, maths::MINIMUM_CATEGORY_COUNT); maths::CMultivariateNormalConjugate<2> restoredFilter(params, traverser); - LOG_DEBUG("orig checksum = " << checksum - << " restored checksum = " << restoredFilter.checksum()); + LOG_DEBUG("orig checksum = " << checksum << " restored checksum = " << restoredFilter.checksum()); CPPUNIT_ASSERT_EQUAL(checksum, restoredFilter.checksum()); // The XML representation of the new filter should be the same as the original @@ -1074,8 +899,7 @@ void CMultivariateNormalConjugateTest::testPersist() CPPUNIT_ASSERT_EQUAL(origXml, newXml); } -void CMultivariateNormalConjugateTest::calibrationExperiment() -{ +void CMultivariateNormalConjugateTest::calibrationExperiment() { LOG_DEBUG("+-----------------------------------------------------------+"); LOG_DEBUG("| CMultivariateNormalConjugateTest::calibrationExperiment |"); LOG_DEBUG("+-----------------------------------------------------------+"); @@ -1083,63 +907,37 @@ void CMultivariateNormalConjugateTest::calibrationExperiment() using TVector10 = maths::CVectorNx1; using TMatrix10 = maths::CSymmetricMatrixNxN; - double means[] = { 10.0, 10.0, 20.0, 20.0, 30.0, 20.0, 10.0, 40.0, 30.0, 20.0 }; - double covariances[] = { 10.0, - 9.0, 10.0, - -5.0, 1.0, 6.0, - -8.0, 9.0, 4.0, 20.0, - 8.0, 3.0, 1.0, 12.0, 12.0, - -4.0, 2.0, 1.0, 1.0, 4.0, 4.0, - 5.0, 1.0, 3.0, 8.0, 10.0, 3.0, 10.0, - 9.0, 9.0, 5.0, 19.0, 11.0, 3.0, 9.0, 25.0, - 5.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 20.0, - 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 }; + double means[] = {10.0, 10.0, 20.0, 20.0, 30.0, 20.0, 10.0, 40.0, 30.0, 20.0}; + double covariances[] = {10.0, 9.0, 10.0, -5.0, 1.0, 6.0, -8.0, 9.0, 4.0, 20.0, 8.0, 3.0, 1.0, 12.0, 12.0, -4.0, 2.0, 1.0, 1.0, + 4.0, 4.0, 5.0, 1.0, 3.0, 8.0, 10.0, 3.0, 10.0, 9.0, 9.0, 5.0, 19.0, 11.0, 3.0, 9.0, 25.0, 5.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 20.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}; TVector10 mean(means, means + boost::size(means)); TMatrix10 covariance(covariances, covariances + boost::size(covariances)); test::CRandomNumbers rng; TDoubleVecVec samples_; - rng.generateMultivariateNormalSamples(mean.toVector(), - covariance.toVectors(), - 2000, - samples_); + rng.generateMultivariateNormalSamples(mean.toVector(), covariance.toVectors(), 2000, samples_); TDouble10Vec1Vec samples; samples.reserve(samples.size() + samples_.size()); - for (std::size_t j = 0u; j < samples_.size(); ++j) - { + for (std::size_t j = 0u; j < samples_.size(); ++j) { samples.push_back(TDouble10Vec(samples_[j].begin(), samples_[j].end())); } - maths::CMultivariateNormalConjugate<2> filters[] = - { - maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData), - maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData), - maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData), - maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData), - maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData), - maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData), - maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData), - maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData), - maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData) - }; - std::size_t indices[][2] = - { - { 0, 1 }, - { 0, 2 }, - { 0, 3 }, - { 0, 4 }, - { 0, 5 }, - { 0, 6 }, - { 0, 7 }, - { 0, 8 }, - { 0, 9 } - }; - - for (std::size_t i = 0u; i < 200; ++i) - { - for (std::size_t j = 0u; j < boost::size(filters); ++j) - { + maths::CMultivariateNormalConjugate<2> filters[] = { + maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData), + maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData), + maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData), + maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData), + maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData), + maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData), + maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData), + maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData), + maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData)}; + std::size_t indices[][2] = {{0, 1}, {0, 2}, {0, 3}, {0, 4}, {0, 5}, {0, 6}, {0, 7}, {0, 8}, {0, 9}}; + + for (std::size_t i = 0u; i < 200; ++i) { + for (std::size_t j = 0u; j < boost::size(filters); ++j) { TDouble10Vec1Vec sample(1, TDouble10Vec(2)); sample[0][0] = samples[i][indices[j][0]]; sample[0][1] = samples[i][indices[j][1]]; @@ -1150,22 +948,16 @@ void CMultivariateNormalConjugateTest::calibrationExperiment() TDoubleVecVec p(boost::size(filters)); TDoubleVec mp; TDoubleVec ep; - for (std::size_t i = 200u; i < 2000; ++i) - { + for (std::size_t i = 200u; i < 2000; ++i) { double mpi = 1.0; maths::CProbabilityOfExtremeSample epi; - for (std::size_t j = 0u; j < boost::size(filters); ++j) - { + for (std::size_t j = 0u; j < boost::size(filters); ++j) { TDouble10Vec1Vec sample(1, TDouble10Vec(2)); sample[0][0] = samples[i][indices[j][0]]; sample[0][1] = samples[i][indices[j][1]]; double lb, ub; maths::CMultivariatePrior::TTail10Vec tail; - filters[j].probabilityOfLessLikelySamples(maths_t::E_TwoSided, - COUNT_WEIGHT, - sample, - SINGLE_UNIT_WEIGHT_2, - lb, ub, tail); + filters[j].probabilityOfLessLikelySamples(maths_t::E_TwoSided, COUNT_WEIGHT, sample, SINGLE_UNIT_WEIGHT_2, lb, ub, tail); p[j].push_back((lb + ub) / 2.0); mpi = std::min(mpi, (lb + ub) / 2.0); epi.add((lb + ub) / 2.0, 0.5); @@ -1176,75 +968,54 @@ void CMultivariateNormalConjugateTest::calibrationExperiment() ep.push_back(pi); } - for (std::size_t i = 0u; i < p.size(); ++i) - { + for (std::size_t i = 0u; i < p.size(); ++i) { std::sort(p[i].begin(), p[i].end()); } std::sort(mp.begin(), mp.end()); std::sort(ep.begin(), ep.end()); - double test[] = { 0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99 }; - for (std::size_t i = 0u; i < boost::size(test); ++i) - { - for (std::size_t j = 0u; j < p.size(); ++j) - { + double test[] = {0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99}; + for (std::size_t i = 0u; i < boost::size(test); ++i) { + for (std::size_t j = 0u; j < p.size(); ++j) { LOG_DEBUG(j << ") " << test[i] << " " - << static_cast(std::lower_bound(p[j].begin(), p[j].end(), test[i]) - p[j].begin()) - / static_cast(p[j].size())); + << static_cast(std::lower_bound(p[j].begin(), p[j].end(), test[i]) - p[j].begin()) / + static_cast(p[j].size())); } - LOG_DEBUG("min " << test[i] << " " - << static_cast(std::lower_bound(mp.begin(), mp.end(), test[i]) - mp.begin()) - / static_cast(mp.size())); - LOG_DEBUG("corrected min " << test[i] << " " - << static_cast(std::lower_bound(ep.begin(), ep.end(), test[i]) - ep.begin()) - / static_cast(ep.size())); + LOG_DEBUG("min " + << test[i] << " " + << static_cast(std::lower_bound(mp.begin(), mp.end(), test[i]) - mp.begin()) / static_cast(mp.size())); + LOG_DEBUG("corrected min " + << test[i] << " " + << static_cast(std::lower_bound(ep.begin(), ep.end(), test[i]) - ep.begin()) / static_cast(ep.size())); } } -void CMultivariateNormalConjugateTest::dataGenerator() -{ +void CMultivariateNormalConjugateTest::dataGenerator() { LOG_DEBUG("+---------------------------------------------------+"); LOG_DEBUG("| CMultivariateNormalConjugateTest::dataGenerator |"); LOG_DEBUG("+---------------------------------------------------+"); - const double means[][2] = - { - { 10.0, 20.0 }, - { 30.0, 25.0 }, - { 50.0, 5.0 }, - { 100.0, 50.0 } - }; - const double covariances[][3] = - { - { 3.0, 2.0, 2.0 }, - { 6.0, -4.0, 5.0 }, - { 4.0, 1.0, 3.0 }, - { 20.0, -12.0, 12.0 } - }; - - double anomalies[][4] = - { - { 7000.0, 0.0, 2.8, -2.8 }, - { 7001.0, 0.0, 2.8, -2.8 }, - { 7002.0, 0.0, 2.8, -2.8 }, - { 7003.0, 0.0, 2.8, -2.8 }, - { 8000.0, 3.0, 3.5, 4.9 }, - { 8001.0, 3.0, 3.5, 4.9 }, - { 8002.0, 3.0, 3.5, 4.9 }, - { 8003.0, 3.0, 3.5, 4.9 }, - { 8004.0, 3.0, 3.5, 4.9 }, - { 8005.0, 3.0, 3.5, 4.9 } - }; + const double means[][2] = {{10.0, 20.0}, {30.0, 25.0}, {50.0, 5.0}, {100.0, 50.0}}; + const double covariances[][3] = {{3.0, 2.0, 2.0}, {6.0, -4.0, 5.0}, {4.0, 1.0, 3.0}, {20.0, -12.0, 12.0}}; + + double anomalies[][4] = {{7000.0, 0.0, 2.8, -2.8}, + {7001.0, 0.0, 2.8, -2.8}, + {7002.0, 0.0, 2.8, -2.8}, + {7003.0, 0.0, 2.8, -2.8}, + {8000.0, 3.0, 3.5, 4.9}, + {8001.0, 3.0, 3.5, 4.9}, + {8002.0, 3.0, 3.5, 4.9}, + {8003.0, 3.0, 3.5, 4.9}, + {8004.0, 3.0, 3.5, 4.9}, + {8005.0, 3.0, 3.5, 4.9}}; test::CRandomNumbers rng; TDouble10Vec1Vec samples[4]; - for (std::size_t i = 0u; i < boost::size(means); ++i) - { + for (std::size_t i = 0u; i < boost::size(means); ++i) { gaussianSamples(rng, 10000, means[i], covariances[i], samples[i]); } - for (std::size_t i = 0u; i < boost::size(anomalies); ++i) - { + for (std::size_t i = 0u; i < boost::size(anomalies); ++i) { std::size_t j = static_cast(anomalies[i][1]); std::size_t k = static_cast(anomalies[i][0]); samples[j][k][0] += anomalies[i][2]; @@ -1254,54 +1025,42 @@ void CMultivariateNormalConjugateTest::dataGenerator() std::ofstream f; f.open("four_2d_gaussian.csv"); core_t::TTime time = 1451606400; - for (std::size_t i = 0u; i < 10000; ++i, time += 30) - { - for (std::size_t j = 0u; j < boost::size(samples); ++j) - { - f << time << ",x" << 2*j << "," << samples[j][i][0] << "\n"; - f << time << ",x" << 2*j+1 << "," << samples[j][i][1] << "\n"; + for (std::size_t i = 0u; i < 10000; ++i, time += 30) { + for (std::size_t j = 0u; j < boost::size(samples); ++j) { + f << time << ",x" << 2 * j << "," << samples[j][i][0] << "\n"; + f << time << ",x" << 2 * j + 1 << "," << samples[j][i][1] << "\n"; } } f.close(); } -CppUnit::Test *CMultivariateNormalConjugateTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CMultivariateNormalConjugateTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultivariateNormalConjugateTest::testMultipleUpdate", - &CMultivariateNormalConjugateTest::testMultipleUpdate) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultivariateNormalConjugateTest::testPropagation", - &CMultivariateNormalConjugateTest::testPropagation) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultivariateNormalConjugateTest::testMeanVectorEstimation", - &CMultivariateNormalConjugateTest::testMeanVectorEstimation) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultivariateNormalConjugateTest::testPrecisionMatrixEstimation", - &CMultivariateNormalConjugateTest::testPrecisionMatrixEstimation) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultivariateNormalConjugateTest::testMarginalLikelihood", - &CMultivariateNormalConjugateTest::testMarginalLikelihood) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultivariateNormalConjugateTest::testMarginalLikelihoodMode", - &CMultivariateNormalConjugateTest::testMarginalLikelihoodMode) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultivariateNormalConjugateTest::testSampleMarginalLikelihood", - &CMultivariateNormalConjugateTest::testSampleMarginalLikelihood) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultivariateNormalConjugateTest::testProbabilityOfLessLikelySamples", - &CMultivariateNormalConjugateTest::testProbabilityOfLessLikelySamples) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultivariateNormalConjugateTest::testIntegerData", - &CMultivariateNormalConjugateTest::testIntegerData) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultivariateNormalConjugateTest::testLowVariationData", - &CMultivariateNormalConjugateTest::testLowVariationData) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultivariateNormalConjugateTest::testPersist", - &CMultivariateNormalConjugateTest::testPersist) ); +CppUnit::Test* CMultivariateNormalConjugateTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CMultivariateNormalConjugateTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CMultivariateNormalConjugateTest::testMultipleUpdate", + &CMultivariateNormalConjugateTest::testMultipleUpdate)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMultivariateNormalConjugateTest::testPropagation", + &CMultivariateNormalConjugateTest::testPropagation)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultivariateNormalConjugateTest::testMeanVectorEstimation", &CMultivariateNormalConjugateTest::testMeanVectorEstimation)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CMultivariateNormalConjugateTest::testPrecisionMatrixEstimation", + &CMultivariateNormalConjugateTest::testPrecisionMatrixEstimation)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultivariateNormalConjugateTest::testMarginalLikelihood", &CMultivariateNormalConjugateTest::testMarginalLikelihood)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultivariateNormalConjugateTest::testMarginalLikelihoodMode", &CMultivariateNormalConjugateTest::testMarginalLikelihoodMode)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultivariateNormalConjugateTest::testSampleMarginalLikelihood", &CMultivariateNormalConjugateTest::testSampleMarginalLikelihood)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CMultivariateNormalConjugateTest::testProbabilityOfLessLikelySamples", + &CMultivariateNormalConjugateTest::testProbabilityOfLessLikelySamples)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMultivariateNormalConjugateTest::testIntegerData", + &CMultivariateNormalConjugateTest::testIntegerData)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultivariateNormalConjugateTest::testLowVariationData", &CMultivariateNormalConjugateTest::testLowVariationData)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMultivariateNormalConjugateTest::testPersist", + &CMultivariateNormalConjugateTest::testPersist)); //suiteOfTests->addTest( new CppUnit::TestCaller( // "CMultivariateNormalConjugateTest::calibrationExperiment", // &CMultivariateNormalConjugateTest::calibrationExperiment) ); diff --git a/lib/maths/unittest/CMultivariateNormalConjugateTest.h b/lib/maths/unittest/CMultivariateNormalConjugateTest.h index d19e811f2a..7c96e80064 100644 --- a/lib/maths/unittest/CMultivariateNormalConjugateTest.h +++ b/lib/maths/unittest/CMultivariateNormalConjugateTest.h @@ -9,24 +9,23 @@ #include -class CMultivariateNormalConjugateTest : public CppUnit::TestFixture -{ - public: - void testMultipleUpdate(); - void testPropagation(); - void testMeanVectorEstimation(); - void testPrecisionMatrixEstimation(); - void testMarginalLikelihood(); - void testMarginalLikelihoodMode(); - void testSampleMarginalLikelihood(); - void testProbabilityOfLessLikelySamples(); - void testIntegerData(); - void testLowVariationData(); - void testPersist(); - void calibrationExperiment(); - void dataGenerator(); +class CMultivariateNormalConjugateTest : public CppUnit::TestFixture { +public: + void testMultipleUpdate(); + void testPropagation(); + void testMeanVectorEstimation(); + void testPrecisionMatrixEstimation(); + void testMarginalLikelihood(); + void testMarginalLikelihoodMode(); + void testSampleMarginalLikelihood(); + void testProbabilityOfLessLikelySamples(); + void testIntegerData(); + void testLowVariationData(); + void testPersist(); + void calibrationExperiment(); + void dataGenerator(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CMultivariateNormalConjugateTest_h diff --git a/lib/maths/unittest/CMultivariateOneOfNPriorTest.cc b/lib/maths/unittest/CMultivariateOneOfNPriorTest.cc index 2aa1e93110..5a81a02d42 100644 --- a/lib/maths/unittest/CMultivariateOneOfNPriorTest.cc +++ b/lib/maths/unittest/CMultivariateOneOfNPriorTest.cc @@ -30,8 +30,7 @@ using namespace ml; using namespace handy_typedefs; -namespace -{ +namespace { using TDoubleVec = std::vector; using TDoubleVecVec = std::vector; @@ -44,101 +43,78 @@ const maths_t::TWeightStyleVec VARIANCE_WEIGHT(1, maths_t::E_SampleCountVariance const TDouble10Vec4Vec UNIT_WEIGHT_2(1, TDouble10Vec(2, 1.0)); const TDouble10Vec4Vec1Vec SINGLE_UNIT_WEIGHT_2(1, UNIT_WEIGHT_2); -class CMinusLogLikelihood : public maths::CGradientDescent::CFunction -{ - public: - CMinusLogLikelihood(const maths::CMultivariateOneOfNPrior &prior) : - m_Prior(&prior) - {} - - bool operator()(const maths::CGradientDescent::TVector &x, double &result) const - { - if (m_Prior->jointLogMarginalLikelihood( - COUNT_WEIGHT, - TDouble10Vec1Vec(1, TDouble10Vec(x.toVector())), - SINGLE_UNIT_WEIGHT_2, - result) == maths_t::E_FpNoErrors) - { - result = -result; - return true; - } - return false; +class CMinusLogLikelihood : public maths::CGradientDescent::CFunction { +public: + CMinusLogLikelihood(const maths::CMultivariateOneOfNPrior& prior) : m_Prior(&prior) {} + + bool operator()(const maths::CGradientDescent::TVector& x, double& result) const { + if (m_Prior->jointLogMarginalLikelihood( + COUNT_WEIGHT, TDouble10Vec1Vec(1, TDouble10Vec(x.toVector())), SINGLE_UNIT_WEIGHT_2, result) == + maths_t::E_FpNoErrors) { + result = -result; + return true; } + return false; + } - private: - const maths::CMultivariateOneOfNPrior *m_Prior; +private: + const maths::CMultivariateOneOfNPrior* m_Prior; }; template -maths::CMultivariateMultimodalPrior makeMultimodal(maths_t::EDataType dataType, - double decayRate = 0.0) -{ - maths::CXMeansOnline clusterer(dataType, - maths_t::E_ClustersFractionWeight, - decayRate); +maths::CMultivariateMultimodalPrior makeMultimodal(maths_t::EDataType dataType, double decayRate = 0.0) { + maths::CXMeansOnline clusterer(dataType, maths_t::E_ClustersFractionWeight, decayRate); return maths::CMultivariateMultimodalPrior( - dataType, - clusterer, - maths::CMultivariateNormalConjugate::nonInformativePrior(dataType, decayRate), - decayRate); + dataType, clusterer, maths::CMultivariateNormalConjugate::nonInformativePrior(dataType, decayRate), decayRate); } template -maths::CMultivariateOneOfNPrior makeOneOfN(maths_t::EDataType dataType, - double decayRate = 0.0) -{ +maths::CMultivariateOneOfNPrior makeOneOfN(maths_t::EDataType dataType, double decayRate = 0.0) { TPriorPtrVec priors; priors.push_back(TPriorPtr(maths::CMultivariateNormalConjugate::nonInformativePrior(dataType, decayRate).clone())); priors.push_back(TPriorPtr(makeMultimodal(dataType, decayRate).clone())); return maths::CMultivariateOneOfNPrior(N, priors, dataType, decayRate); } -void gaussianSamples(test::CRandomNumbers &rng, +void gaussianSamples(test::CRandomNumbers& rng, std::size_t modes, - const std::size_t *n, + const std::size_t* n, const double (*means)[2], const double (*covariances)[3], - TDouble10Vec1Vec &samples) -{ - for (std::size_t i = 0u; i < modes; ++i) - { + TDouble10Vec1Vec& samples) { + for (std::size_t i = 0u; i < modes; ++i) { TVector2 mean(means[i], means[i] + 2); TMatrix2 covariance(covariances[i], covariances[i] + 3); TDoubleVecVec samples_; - rng.generateMultivariateNormalSamples(mean.toVector(), - covariance.toVectors(), - n[i], - samples_); + rng.generateMultivariateNormalSamples(mean.toVector(), covariance.toVectors(), n[i], samples_); samples.reserve(samples.size() + samples_.size()); - for (std::size_t j = 0u; j < samples_.size(); ++j) - { + for (std::size_t j = 0u; j < samples_.size(); ++j) { samples.push_back(TDouble10Vec(samples_[j].begin(), samples_[j].end())); } } LOG_DEBUG("# samples = " << samples.size()); } -double sum(const TDoubleVec &x) -{ +double sum(const TDoubleVec& x) { return std::accumulate(x.begin(), x.end(), 0.0); } -std::string print(maths_t::EDataType dataType) -{ - switch (dataType) - { - case maths_t::E_DiscreteData: return "Discrete"; - case maths_t::E_IntegerData: return "Integer"; - case maths_t::E_ContinuousData: return "Continuous"; - case maths_t::E_MixedData: return "Mixed"; +std::string print(maths_t::EDataType dataType) { + switch (dataType) { + case maths_t::E_DiscreteData: + return "Discrete"; + case maths_t::E_IntegerData: + return "Integer"; + case maths_t::E_ContinuousData: + return "Continuous"; + case maths_t::E_MixedData: + return "Mixed"; } return ""; } - } -void CMultivariateOneOfNPriorTest::testMultipleUpdate() -{ +void CMultivariateOneOfNPriorTest::testMultipleUpdate() { LOG_DEBUG("+----------------------------------------------------+"); LOG_DEBUG("| CMultivariateOneOfNPriorTest::testMultipleUpdate |"); LOG_DEBUG("+----------------------------------------------------+"); @@ -148,14 +124,10 @@ void CMultivariateOneOfNPriorTest::testMultipleUpdate() maths::CSampling::CScopeMockRandomNumberGenerator scopeMockRng; - const maths_t::EDataType dataTypes[] = - { - maths_t::E_IntegerData, - maths_t::E_ContinuousData - }; + const maths_t::EDataType dataTypes[] = {maths_t::E_IntegerData, maths_t::E_ContinuousData}; - const double mean_[] = { 10.0, 20.0 }; - const double covariance_[] = { 20.0, 1.0, 16.0 }; + const double mean_[] = {10.0, 20.0}; + const double covariance_[] = {20.0, 1.0, 16.0}; TVector2 mean(mean_); TMatrix2 covariance(covariance_, covariance_ + 3); @@ -166,39 +138,31 @@ void CMultivariateOneOfNPriorTest::testMultipleUpdate() TDouble10Vec1Vec samples; { TDoubleVecVec samples_; - rng.generateMultivariateNormalSamples(mean.toVector(), - covariance.toVectors(), - 100, - samples_); + rng.generateMultivariateNormalSamples(mean.toVector(), covariance.toVectors(), 100, samples_); seedSamples.reserve(10); - for (std::size_t i = 0u; i < 10; ++i) - { + for (std::size_t i = 0u; i < 10; ++i) { seedSamples.push_back(TDouble10Vec(samples_[i].begin(), samples_[i].end())); } samples.reserve(samples_.size() - 10); - for (std::size_t i = 10u; i < samples_.size(); ++i) - { + for (std::size_t i = 10u; i < samples_.size(); ++i) { samples.push_back(TDouble10Vec(samples_[i].begin(), samples_[i].end())); } } LOG_DEBUG("****** Test vanilla ******"); - for (std::size_t i = 0u; i < boost::size(dataTypes); ++i) - { + for (std::size_t i = 0u; i < boost::size(dataTypes); ++i) { LOG_DEBUG("*** data type = " << print(dataTypes[i]) << " ***"); maths::CMultivariateOneOfNPrior filter1(makeOneOfN<2>(dataTypes[i])); maths::CMultivariateOneOfNPrior filter2(filter1); - for (std::size_t j = 0u; j < seedSamples.size(); ++j) - { + for (std::size_t j = 0u; j < seedSamples.size(); ++j) { TDouble10Vec1Vec sample(1, seedSamples[j]); TDouble10Vec4Vec1Vec weight(1, TDouble10Vec4Vec(1, TDouble10Vec(2, 1.0))); filter1.addSamples(COUNT_WEIGHT, sample, weight); filter2.addSamples(COUNT_WEIGHT, sample, weight); } - for (std::size_t j = 0u; j < samples.size(); ++j) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { TDouble10Vec1Vec sample(1, samples[j]); TDouble10Vec4Vec1Vec weight(1, TDouble10Vec4Vec(1, TDouble10Vec(2, 1.0))); filter1.addSamples(COUNT_WEIGHT, sample, weight); @@ -212,15 +176,13 @@ void CMultivariateOneOfNPriorTest::testMultipleUpdate() } LOG_DEBUG("****** Test with variance scale ******"); - for (size_t i = 0; i < boost::size(dataTypes); ++i) - { + for (size_t i = 0; i < boost::size(dataTypes); ++i) { LOG_DEBUG("*** data type = " << print(dataTypes[i]) << " ***"); maths::CMultivariateOneOfNPrior filter1(makeOneOfN<2>(dataTypes[i])); maths::CMultivariateOneOfNPrior filter2(filter1); - for (std::size_t j = 0u; j < seedSamples.size(); ++j) - { + for (std::size_t j = 0u; j < seedSamples.size(); ++j) { TDouble10Vec1Vec sample(1, seedSamples[j]); TDouble10Vec4Vec1Vec weight(1, TDouble10Vec4Vec(1, TDouble10Vec(2, 1.0))); filter1.addSamples(COUNT_WEIGHT, sample, weight); @@ -229,8 +191,7 @@ void CMultivariateOneOfNPriorTest::testMultipleUpdate() TDouble10Vec4Vec1Vec weights; weights.resize(samples.size() / 2, TDouble10Vec4Vec(1, TDouble10Vec(2, 1.5))); weights.resize(samples.size(), TDouble10Vec4Vec(1, TDouble10Vec(2, 2.0))); - for (std::size_t j = 0u; j < samples.size(); ++j) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { TDouble10Vec1Vec sample(1, samples[j]); TDouble10Vec4Vec1Vec weight(1, weights[j]); filter1.addSamples(VARIANCE_WEIGHT, sample, weight); @@ -243,8 +204,7 @@ void CMultivariateOneOfNPriorTest::testMultipleUpdate() } } -void CMultivariateOneOfNPriorTest::testPropagation() -{ +void CMultivariateOneOfNPriorTest::testPropagation() { LOG_DEBUG("+-------------------------------------------------+"); LOG_DEBUG("| CMultivariateOneOfNPriorTest::testPropagation |"); LOG_DEBUG("+-------------------------------------------------+"); @@ -257,17 +217,9 @@ void CMultivariateOneOfNPriorTest::testPropagation() const double eps = 2e-3; - const std::size_t n[] = { 400, 600 }; - const double means[][2] = - { - { 10.0, 10.0 }, - { 20.0, 20.0 } - }; - const double covariances[][3] = - { - { 8.0, 1.0, 8.0 }, - { 20.0, -4.0, 10.0 } - }; + const std::size_t n[] = {400, 600}; + const double means[][2] = {{10.0, 10.0}, {20.0, 20.0}}; + const double covariances[][3] = {{8.0, 1.0, 8.0}, {20.0, -4.0, 10.0}}; test::CRandomNumbers rng; @@ -279,11 +231,8 @@ void CMultivariateOneOfNPriorTest::testPropagation() const double decayRate = 0.1; maths::CMultivariateOneOfNPrior filter(makeOneOfN<2>(maths_t::E_ContinuousData, decayRate)); - for (std::size_t i = 0u; i < samples.size(); ++i) - { - filter.addSamples(COUNT_WEIGHT, - TDouble10Vec1Vec(1, samples[i]), - TDouble10Vec4Vec1Vec(1, UNIT_WEIGHT_2)); + for (std::size_t i = 0u; i < samples.size(); ++i) { + filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[i]), TDouble10Vec4Vec1Vec(1, UNIT_WEIGHT_2)); } double numberSamples = filter.numberSamples(); @@ -311,27 +260,23 @@ void CMultivariateOneOfNPriorTest::testPropagation() CPPUNIT_ASSERT((TVector2(propagatedMean) - TVector2(mean)).euclidean() < eps * TVector2(mean).euclidean()); Eigen::MatrixXd c(2, 2); Eigen::MatrixXd cp(2, 2); - for (std::size_t i = 0u; i < 2; ++i) - { - for (std::size_t j = 0u; j < 2; ++j) - { - c(i,j) = covariance[i][j]; - cp(i,j) = propagatedCovariance[i][j]; + for (std::size_t i = 0u; i < 2; ++i) { + for (std::size_t j = 0u; j < 2; ++j) { + c(i, j) = covariance[i][j]; + cp(i, j) = propagatedCovariance[i][j]; } } - Eigen::VectorXd sv = c.jacobiSvd().singularValues(); + Eigen::VectorXd sv = c.jacobiSvd().singularValues(); Eigen::VectorXd svp = cp.jacobiSvd().singularValues(); LOG_DEBUG("singular values = " << sv.transpose()); LOG_DEBUG("propagated singular values = " << svp.transpose()); - for (std::size_t i = 0u; i < 2; ++i) - { + for (std::size_t i = 0u; i < 2; ++i) { CPPUNIT_ASSERT(svp(i) > sv(i)); } CPPUNIT_ASSERT(propagatedLogWeightRatio < logWeightRatio); } -void CMultivariateOneOfNPriorTest::testWeightUpdate() -{ +void CMultivariateOneOfNPriorTest::testWeightUpdate() { LOG_DEBUG("+--------------------------------------------------+"); LOG_DEBUG("| CMultivariateOneOfNPriorTest::testWeightUpdate |"); LOG_DEBUG("+--------------------------------------------------+"); @@ -343,22 +288,20 @@ void CMultivariateOneOfNPriorTest::testWeightUpdate() test::CRandomNumbers rng; { - const std::size_t n[] = { 100 }; - const double mean[][2] = { { 10.0, 20.0 } }; - const double covariance[][3] = { { 3.0, 1.0, 2.0 } }; + const std::size_t n[] = {100}; + const double mean[][2] = {{10.0, 20.0}}; + const double covariance[][3] = {{3.0, 1.0, 2.0}}; TDouble10Vec1Vec samples; gaussianSamples(rng, boost::size(n), n, mean, covariance, samples); using TEqual = maths::CEqualWithTolerance; TEqual equal(maths::CToleranceTypes::E_AbsoluteTolerance, 1e-10); - const double decayRates[] = { 0.0, 0.004, 0.04 }; + const double decayRates[] = {0.0, 0.004, 0.04}; - for (std::size_t i = 0; i < boost::size(decayRates); ++i) - { + for (std::size_t i = 0; i < boost::size(decayRates); ++i) { maths::CMultivariateOneOfNPrior filter(makeOneOfN<2>(maths_t::E_ContinuousData, decayRates[i])); - for (std::size_t j = 0u; j < samples.size(); ++j) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[j]), SINGLE_UNIT_WEIGHT_2); CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, sum(filter.weights()), 1e-6); filter.propagateForwardsByTime(1.0); @@ -370,32 +313,22 @@ void CMultivariateOneOfNPriorTest::testWeightUpdate() { // Test that non-zero decay rate behaves as expected. - const std::size_t n[] = { 4000, 6000 }; - const double means[][2] = - { - { 10.0, 10.0 }, - { 20.0, 20.0 } - }; - const double covariances[][3] = - { - { 8.0, 1.0, 8.0 }, - { 20.0, -4.0, 10.0 } - }; + const std::size_t n[] = {4000, 6000}; + const double means[][2] = {{10.0, 10.0}, {20.0, 20.0}}; + const double covariances[][3] = {{8.0, 1.0, 8.0}, {20.0, -4.0, 10.0}}; TDouble10Vec1Vec samples; gaussianSamples(rng, boost::size(n), n, means, covariances, samples); rng.random_shuffle(samples.begin(), samples.end()); - const double decayRates[] = { 0.0008, 0.004, 0.02 }; + const double decayRates[] = {0.0008, 0.004, 0.02}; double previousLogWeightRatio = -6700; - for (std::size_t i = 0u; i < boost::size(decayRates); ++i) - { + for (std::size_t i = 0u; i < boost::size(decayRates); ++i) { maths::CMultivariateOneOfNPrior filter(makeOneOfN<2>(maths_t::E_ContinuousData, decayRates[i])); - for (std::size_t j = 0u; j < samples.size(); ++j) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[j]), SINGLE_UNIT_WEIGHT_2); filter.propagateForwardsByTime(1.0); } @@ -412,25 +345,16 @@ void CMultivariateOneOfNPriorTest::testWeightUpdate() } } -void CMultivariateOneOfNPriorTest::testModelUpdate() -{ +void CMultivariateOneOfNPriorTest::testModelUpdate() { LOG_DEBUG("+-------------------------------------------------+"); LOG_DEBUG("| CMultivariateOneOfNPriorTest::testModelUpdate |"); LOG_DEBUG("+-------------------------------------------------+"); maths::CSampling::CScopeMockRandomNumberGenerator scopeMockRng; - const std::size_t n[] = { 400, 600 }; - const double means[][2] = - { - { 10.0, 10.0 }, - { 20.0, 20.0 } - }; - const double covariances[][3] = - { - { 8.0, 1.0, 8.0 }, - { 20.0, -4.0, 10.0 } - }; + const std::size_t n[] = {400, 600}; + const double means[][2] = {{10.0, 10.0}, {20.0, 20.0}}; + const double covariances[][3] = {{8.0, 1.0, 8.0}, {20.0, -4.0, 10.0}}; test::CRandomNumbers rng; @@ -438,16 +362,10 @@ void CMultivariateOneOfNPriorTest::testModelUpdate() gaussianSamples(rng, boost::size(n), n, means, covariances, samples); rng.random_shuffle(samples.begin(), samples.end()); - const maths_t::EDataType dataTypes[] = - { - maths_t::E_IntegerData, - maths_t::E_ContinuousData - }; + const maths_t::EDataType dataTypes[] = {maths_t::E_IntegerData, maths_t::E_ContinuousData}; - for (std::size_t i = 0u; i < boost::size(dataTypes); ++i) - { - maths::CMultivariateNormalConjugate<2> normal = - maths::CMultivariateNormalConjugate<2>::nonInformativePrior(dataTypes[i]); + for (std::size_t i = 0u; i < boost::size(dataTypes); ++i) { + maths::CMultivariateNormalConjugate<2> normal = maths::CMultivariateNormalConjugate<2>::nonInformativePrior(dataTypes[i]); maths::CMultivariateMultimodalPrior<2> multimodal = makeMultimodal<2>(dataTypes[i]); maths::CMultivariateOneOfNPrior oneOfN(makeOneOfN<2>(dataTypes[i])); @@ -460,8 +378,7 @@ void CMultivariateOneOfNPriorTest::testModelUpdate() } } -void CMultivariateOneOfNPriorTest::testModelSelection() -{ +void CMultivariateOneOfNPriorTest::testModelSelection() { LOG_DEBUG("+----------------------------------------------------+"); LOG_DEBUG("| CMultivariateOneOfNPriorTest::testModelSelection |"); LOG_DEBUG("+----------------------------------------------------+"); @@ -469,8 +386,7 @@ void CMultivariateOneOfNPriorTest::testModelSelection() // TODO When copula models are available. } -void CMultivariateOneOfNPriorTest::testMarginalLikelihood() -{ +void CMultivariateOneOfNPriorTest::testMarginalLikelihood() { LOG_DEBUG("+--------------------------------------------------------+"); LOG_DEBUG("| CMultivariateOneOfNPriorTest::testMarginalLikelihood |"); LOG_DEBUG("+--------------------------------------------------------+"); @@ -492,25 +408,21 @@ void CMultivariateOneOfNPriorTest::testMarginalLikelihood() LOG_DEBUG("*** Normal ***"); TDoubleVec meani; - rng.generateUniformSamples( 0.0, 50.0, 2 * nt, meani); + rng.generateUniformSamples(0.0, 50.0, 2 * nt, meani); TDoubleVec covariancesii; - rng.generateUniformSamples( 20.0, 500.0, 2 * nt, covariancesii); + rng.generateUniformSamples(20.0, 500.0, 2 * nt, covariancesii); TDoubleVec covariancesij; - rng.generateUniformSamples(-10.0, 10.0, 1 * nt, covariancesij); + rng.generateUniformSamples(-10.0, 10.0, 1 * nt, covariancesij); - for (std::size_t t = 0u; t < nt; ++t) - { - LOG_DEBUG("*** Test " << t+1 << " ***"); + for (std::size_t t = 0u; t < nt; ++t) { + LOG_DEBUG("*** Test " << t + 1 << " ***"); // Generate the samples. - double mean_[] = { meani[2 * t], meani[2 * t + 1] }; - double covariances_[] = - { - covariancesii[2 * t], covariancesij[t], covariancesii[2 * t + 1] - }; + double mean_[] = {meani[2 * t], meani[2 * t + 1]}; + double covariances_[] = {covariancesii[2 * t], covariancesij[t], covariancesii[2 * t + 1]}; TDoubleVec mean(mean_, mean_ + 2); TDoubleVecVec covariances; - covariances.push_back(TDoubleVec(covariances_, covariances_ + 2)); + covariances.push_back(TDoubleVec(covariances_, covariances_ + 2)); covariances.push_back(TDoubleVec(covariances_ + 1, covariances_ + 3)); TDoubleVecVec samples; maths::CSampling::multivariateNormalSample(mean, covariances, 20, samples); @@ -520,36 +432,30 @@ void CMultivariateOneOfNPriorTest::testMarginalLikelihood() TMeanAccumulator meanMeanError; TMeanAccumulator meanCovarianceError; - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[i]), SINGLE_UNIT_WEIGHT_2); - if (!filter.isNonInformative()) - { + if (!filter.isNonInformative()) { TDouble10Vec m = filter.marginalLikelihoodMean(); TDouble10Vec10Vec v = filter.marginalLikelihoodCovariance(); LOG_DEBUG("m = " << core::CContainerPrinter::print(m)); LOG_DEBUG("v = " << core::CContainerPrinter::print(v)); double trace = 0.0; - for (std::size_t j = 0u; j < v.size(); ++j) - { + for (std::size_t j = 0u; j < v.size(); ++j) { trace += v[j][j]; } - double intervals[][2] = - { - { m[0] - 3.0 * std::sqrt(trace), m[1] - 3.0 * std::sqrt(trace) }, - { m[0] - 3.0 * std::sqrt(trace), m[1] - 1.0 * std::sqrt(trace) }, - { m[0] - 3.0 * std::sqrt(trace), m[1] + 1.0 * std::sqrt(trace) }, - { m[0] - 1.0 * std::sqrt(trace), m[1] - 3.0 * std::sqrt(trace) }, - { m[0] - 1.0 * std::sqrt(trace), m[1] - 1.0 * std::sqrt(trace) }, - { m[0] - 1.0 * std::sqrt(trace), m[1] + 1.0 * std::sqrt(trace) }, - { m[0] + 1.0 * std::sqrt(trace), m[1] - 3.0 * std::sqrt(trace) }, - { m[0] + 1.0 * std::sqrt(trace), m[1] - 1.0 * std::sqrt(trace) }, - { m[0] + 1.0 * std::sqrt(trace), m[1] + 1.0 * std::sqrt(trace) } - }; + double intervals[][2] = {{m[0] - 3.0 * std::sqrt(trace), m[1] - 3.0 * std::sqrt(trace)}, + {m[0] - 3.0 * std::sqrt(trace), m[1] - 1.0 * std::sqrt(trace)}, + {m[0] - 3.0 * std::sqrt(trace), m[1] + 1.0 * std::sqrt(trace)}, + {m[0] - 1.0 * std::sqrt(trace), m[1] - 3.0 * std::sqrt(trace)}, + {m[0] - 1.0 * std::sqrt(trace), m[1] - 1.0 * std::sqrt(trace)}, + {m[0] - 1.0 * std::sqrt(trace), m[1] + 1.0 * std::sqrt(trace)}, + {m[0] + 1.0 * std::sqrt(trace), m[1] - 3.0 * std::sqrt(trace)}, + {m[0] + 1.0 * std::sqrt(trace), m[1] - 1.0 * std::sqrt(trace)}, + {m[0] + 1.0 * std::sqrt(trace), m[1] + 1.0 * std::sqrt(trace)}}; TVector2 expectedMean(m.begin(), m.end()); - double elements[] = { v[0][0], v[0][1], v[1][1] }; + double elements[] = {v[0][0], v[0][1], v[1][1]}; TMatrix2 expectedCovariance(elements, elements + 3); CUnitKernel<2> likelihoodKernel(filter); @@ -559,22 +465,21 @@ void CMultivariateOneOfNPriorTest::testMarginalLikelihood() double z = 0.0; TVector2 actualMean(0.0); TMatrix2 actualCovariance(0.0); - for (std::size_t j = 0u; j < boost::size(intervals); ++j) - { + for (std::size_t j = 0u; j < boost::size(intervals); ++j) { TDoubleVec a(boost::begin(intervals[j]), boost::end(intervals[j])); TDoubleVec b(a); b[0] += 2.0 * std::sqrt(trace); b[1] += 2.0 * std::sqrt(trace); double zj; - maths::CIntegration::sparseGaussLegendre(likelihoodKernel, a, b, zj); + maths::CIntegration::sparseGaussLegendre( + likelihoodKernel, a, b, zj); TVector2 mj; - maths::CIntegration::sparseGaussLegendre(meanKernel, a, b, mj); + maths::CIntegration::sparseGaussLegendre( + meanKernel, a, b, mj); TMatrix2 cj; - maths::CIntegration::sparseGaussLegendre(covarianceKernel, a, b, cj); + maths::CIntegration::sparseGaussLegendre( + covarianceKernel, a, b, cj); z += zj; actualMean += mj; @@ -604,38 +509,33 @@ void CMultivariateOneOfNPriorTest::testMarginalLikelihood() { LOG_DEBUG("*** Multimodal ***"); - std::size_t sizes_[] = { 200, 150, 300 }; + std::size_t sizes_[] = {200, 150, 300}; TSizeVec sizes(boost::begin(sizes_), boost::end(sizes_)); TMeanAccumulator meanZ; TMeanAccumulator meanMeanError; TMeanAccumulator meanCovarianceError; - for (std::size_t t = 0u; t < nt; ++t) - { - LOG_DEBUG("*** Test " << t+1 << " ***"); + for (std::size_t t = 0u; t < nt; ++t) { + LOG_DEBUG("*** Test " << t + 1 << " ***"); TVector2Vec means; TMatrix2Vec covariances; TVector2VecVec samples_; rng.generateRandomMultivariateNormals(sizes, means, covariances, samples_); TDouble10Vec1Vec samples; - for (std::size_t i = 0u; i < means.size(); ++i) - { + for (std::size_t i = 0u; i < means.size(); ++i) { means[i] += TVector2(20.0); } - for (std::size_t i = 0u; i < samples_.size(); ++i) - { - for (std::size_t j = 0u; j < samples_[i].size(); ++j) - { + for (std::size_t i = 0u; i < samples_.size(); ++i) { + for (std::size_t j = 0u; j < samples_[i].size(); ++j) { samples.push_back((TVector2(20.0) + samples_[i][j]).toVector()); } } rng.random_shuffle(samples.begin(), samples.end()); maths::CMultivariateOneOfNPrior filter(makeOneOfN<2>(maths_t::E_ContinuousData)); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[i]), SINGLE_UNIT_WEIGHT_2); } @@ -643,50 +543,45 @@ void CMultivariateOneOfNPriorTest::testMarginalLikelihood() TDouble10Vec10Vec v = filter.marginalLikelihoodCovariance(); TVector2 expectedMean(m.begin(), m.end()); - double elements[] = { v[0][0], v[0][1], v[1][1] }; + double elements[] = {v[0][0], v[0][1], v[1][1]}; TMatrix2 expectedCovariance(elements, elements + 3); double z = 0.0; TVector2 actualMean(0.0); TMatrix2 actualCovariance(0.0); - for (std::size_t i = 0u; i < means.size(); ++i) - { + for (std::size_t i = 0u; i < means.size(); ++i) { double trace = covariances[i].trace(); LOG_DEBUG("m = " << means[i]); LOG_DEBUG("v = " << trace); - double intervals[][2] = - { - { means[i](0) - 3.0 * std::sqrt(trace), means[i](1) - 3.0 * std::sqrt(trace) }, - { means[i](0) - 3.0 * std::sqrt(trace), means[i](1) - 1.0 * std::sqrt(trace) }, - { means[i](0) - 3.0 * std::sqrt(trace), means[i](1) + 1.0 * std::sqrt(trace) }, - { means[i](0) - 1.0 * std::sqrt(trace), means[i](1) - 3.0 * std::sqrt(trace) }, - { means[i](0) - 1.0 * std::sqrt(trace), means[i](1) - 1.0 * std::sqrt(trace) }, - { means[i](0) - 1.0 * std::sqrt(trace), means[i](1) + 1.0 * std::sqrt(trace) }, - { means[i](0) + 1.0 * std::sqrt(trace), means[i](1) - 3.0 * std::sqrt(trace) }, - { means[i](0) + 1.0 * std::sqrt(trace), means[i](1) - 1.0 * std::sqrt(trace) }, - { means[i](0) + 1.0 * std::sqrt(trace), means[i](1) + 1.0 * std::sqrt(trace) } - }; + double intervals[][2] = {{means[i](0) - 3.0 * std::sqrt(trace), means[i](1) - 3.0 * std::sqrt(trace)}, + {means[i](0) - 3.0 * std::sqrt(trace), means[i](1) - 1.0 * std::sqrt(trace)}, + {means[i](0) - 3.0 * std::sqrt(trace), means[i](1) + 1.0 * std::sqrt(trace)}, + {means[i](0) - 1.0 * std::sqrt(trace), means[i](1) - 3.0 * std::sqrt(trace)}, + {means[i](0) - 1.0 * std::sqrt(trace), means[i](1) - 1.0 * std::sqrt(trace)}, + {means[i](0) - 1.0 * std::sqrt(trace), means[i](1) + 1.0 * std::sqrt(trace)}, + {means[i](0) + 1.0 * std::sqrt(trace), means[i](1) - 3.0 * std::sqrt(trace)}, + {means[i](0) + 1.0 * std::sqrt(trace), means[i](1) - 1.0 * std::sqrt(trace)}, + {means[i](0) + 1.0 * std::sqrt(trace), means[i](1) + 1.0 * std::sqrt(trace)}}; CUnitKernel<2> likelihoodKernel(filter); CMeanKernel<2> meanKernel(filter); CCovarianceKernel<2> covarianceKernel(filter, expectedMean); - for (std::size_t j = 0u; j < boost::size(intervals); ++j) - { + for (std::size_t j = 0u; j < boost::size(intervals); ++j) { TDoubleVec a(boost::begin(intervals[j]), boost::end(intervals[j])); TDoubleVec b(a); b[0] += 2.0 * std::sqrt(trace); b[1] += 2.0 * std::sqrt(trace); double zj; - maths::CIntegration::sparseGaussLegendre(likelihoodKernel, a, b, zj); + maths::CIntegration::sparseGaussLegendre( + likelihoodKernel, a, b, zj); TVector2 mj; - maths::CIntegration::sparseGaussLegendre(meanKernel, a, b, mj); + maths::CIntegration::sparseGaussLegendre( + meanKernel, a, b, mj); TMatrix2 cj; - maths::CIntegration::sparseGaussLegendre(covarianceKernel, a, b, cj); + maths::CIntegration::sparseGaussLegendre( + covarianceKernel, a, b, cj); z += zj; actualMean += mj; @@ -723,8 +618,7 @@ void CMultivariateOneOfNPriorTest::testMarginalLikelihood() } } -void CMultivariateOneOfNPriorTest::testMarginalLikelihoodMean() -{ +void CMultivariateOneOfNPriorTest::testMarginalLikelihoodMean() { LOG_DEBUG("+------------------------------------------------------------+"); LOG_DEBUG("| CMultivariateOneOfNPriorTest::testMarginalLikelihoodMean |"); LOG_DEBUG("+------------------------------------------------------------+"); @@ -748,10 +642,9 @@ void CMultivariateOneOfNPriorTest::testMarginalLikelihoodMean() test::CRandomNumbers rng; - double expectedMeanErrors[] = { 1e-6, 0.05 }; + double expectedMeanErrors[] = {1e-6, 0.05}; - for (std::size_t i = 0u; i < sizes.size(); ++i) - { + for (std::size_t i = 0u; i < sizes.size(); ++i) { LOG_DEBUG("# modes = " << sizes[i].size()); TVector2Vec means; @@ -759,10 +652,8 @@ void CMultivariateOneOfNPriorTest::testMarginalLikelihoodMean() TVector2VecVec samples_; rng.generateRandomMultivariateNormals(sizes[i], means, covariances, samples_); TDouble10Vec1Vec samples; - for (std::size_t j = 0u; j < samples_.size(); ++j) - { - for (std::size_t k = 0u; k < samples_[j].size(); ++k) - { + for (std::size_t j = 0u; j < samples_.size(); ++j) { + for (std::size_t k = 0u; k < samples_[j].size(); ++k) { samples.push_back(samples_[j][k].toVector()); } } @@ -773,20 +664,17 @@ void CMultivariateOneOfNPriorTest::testMarginalLikelihoodMean() TMeanAccumulator meanError; TMean2Accumulator expectedMean; - for (std::size_t j = 0u; j < samples.size(); ++j) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[j]), SINGLE_UNIT_WEIGHT_2); expectedMean.add(TVector2(samples[j])); - if (!filter.isNonInformative()) - { - if (j % 10 == 0) - { + if (!filter.isNonInformative()) { + if (j % 10 == 0) { LOG_DEBUG("expected = " << maths::CBasicStatistics::mean(expectedMean) - << " actual = " << core::CContainerPrinter::print(filter.marginalLikelihoodMean())); + << " actual = " << core::CContainerPrinter::print(filter.marginalLikelihoodMean())); } - double error = (TVector2(filter.marginalLikelihoodMean()) - maths::CBasicStatistics::mean(expectedMean)).euclidean() - / maths::CBasicStatistics::mean(expectedMean).euclidean(); + double error = (TVector2(filter.marginalLikelihoodMean()) - maths::CBasicStatistics::mean(expectedMean)).euclidean() / + maths::CBasicStatistics::mean(expectedMean).euclidean(); meanError.add(error); CPPUNIT_ASSERT(error < 0.2); } @@ -797,8 +685,7 @@ void CMultivariateOneOfNPriorTest::testMarginalLikelihoodMean() } } -void CMultivariateOneOfNPriorTest::testMarginalLikelihoodMode() -{ +void CMultivariateOneOfNPriorTest::testMarginalLikelihoodMode() { LOG_DEBUG("+----------------------------------------------------+"); LOG_DEBUG("| CMultivariateOneOfNPriorTest::testMultipleUpdate |"); LOG_DEBUG("+----------------------------------------------------+"); @@ -813,37 +700,29 @@ void CMultivariateOneOfNPriorTest::testMarginalLikelihoodMode() { LOG_DEBUG("****** Normal ******"); - const double means[][2] = - { - { 10.0, 10.0 }, - { 50.0, 50.0 }, - }; - const double covariances[][3] = - { - { 1.0, 0.0, 1.0 }, - { 10.0, 0.0, 10.0 }, - }; - double learnRates[] = { 0.1, 0.3 }; - - for (std::size_t i = 0u; i < boost::size(means); ++i) - { - for (std::size_t j = 0u; j < boost::size(covariances); ++j) - { - std::size_t n[] = { 100 }; - const double mean[][2] = { { means[i][0], means[i][1] } }; - const double covariance[][3] = - { - { covariances[i][0], covariances[i][1], covariances[i][2] } - }; - LOG_DEBUG("*** mean = " << core::CContainerPrinter::print(mean[0], mean[0] + 2) - << ", variance = " << covariance[0][0] << " ***"); + const double means[][2] = { + {10.0, 10.0}, + {50.0, 50.0}, + }; + const double covariances[][3] = { + {1.0, 0.0, 1.0}, + {10.0, 0.0, 10.0}, + }; + double learnRates[] = {0.1, 0.3}; + + for (std::size_t i = 0u; i < boost::size(means); ++i) { + for (std::size_t j = 0u; j < boost::size(covariances); ++j) { + std::size_t n[] = {100}; + const double mean[][2] = {{means[i][0], means[i][1]}}; + const double covariance[][3] = {{covariances[i][0], covariances[i][1], covariances[i][2]}}; + LOG_DEBUG("*** mean = " << core::CContainerPrinter::print(mean[0], mean[0] + 2) << ", variance = " << covariance[0][0] + << " ***"); TDouble10Vec1Vec samples; gaussianSamples(rng, 1, n, mean, covariance, samples); maths::CMultivariateOneOfNPrior filter(makeOneOfN<2>(maths_t::E_ContinuousData)); - for (std::size_t k = 0u; k < samples.size(); ++k) - { + for (std::size_t k = 0u; k < samples.size(); ++k) { filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[k]), SINGLE_UNIT_WEIGHT_2); } @@ -856,15 +735,14 @@ void CMultivariateOneOfNPriorTest::testMarginalLikelihoodMode() maths::CVector(mean[0], mean[0] + 2), likelihood, gradientOfLikelihood, - expectedMode, likelihoods); + expectedMode, + likelihoods); TDouble10Vec mode = filter.marginalLikelihoodMode(COUNT_WEIGHT, UNIT_WEIGHT_2); - LOG_DEBUG("marginalLikelihoodMode = " << core::CContainerPrinter::print(mode) - << ", expectedMode = " << expectedMode); + LOG_DEBUG("marginalLikelihoodMode = " << core::CContainerPrinter::print(mode) << ", expectedMode = " << expectedMode); - for (std::size_t k = 0u; k < 2; ++k) - { + for (std::size_t k = 0u; k < 2; ++k) { CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMode(k), mode[k], 0.01 * expectedMode(k)); } } @@ -874,24 +752,21 @@ void CMultivariateOneOfNPriorTest::testMarginalLikelihoodMode() { LOG_DEBUG("****** Multimodal ******"); - const std::size_t n[] = { 100, 100 }; - const double means[][2] = - { - { 10.0, 10.0 }, - { 16.0, 18.0 }, - }; - const double covariances[][3] = - { - { 4.0, 0.0, 4.0 }, - { 10.0, 0.0, 10.0 }, - }; + const std::size_t n[] = {100, 100}; + const double means[][2] = { + {10.0, 10.0}, + {16.0, 18.0}, + }; + const double covariances[][3] = { + {4.0, 0.0, 4.0}, + {10.0, 0.0, 10.0}, + }; TDouble10Vec1Vec samples; gaussianSamples(rng, boost::size(n), n, means, covariances, samples); maths::CMultivariateOneOfNPrior filter(makeOneOfN<2>(maths_t::E_ContinuousData)); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[i]), SINGLE_UNIT_WEIGHT_2); } @@ -904,22 +779,20 @@ void CMultivariateOneOfNPriorTest::testMarginalLikelihoodMode() maths::CVector(means[0], means[0] + 2), likelihood, gradientOfLikelihood, - expectedMode, likelihoods); + expectedMode, + likelihoods); TDouble10Vec mode = filter.marginalLikelihoodMode(COUNT_WEIGHT, UNIT_WEIGHT_2); - LOG_DEBUG("marginalLikelihoodMode = " << core::CContainerPrinter::print(mode) - << ", expectedMode = " << expectedMode); + LOG_DEBUG("marginalLikelihoodMode = " << core::CContainerPrinter::print(mode) << ", expectedMode = " << expectedMode); - for (std::size_t i = 0u; i < 2; ++i) - { + for (std::size_t i = 0u; i < 2; ++i) { CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMode(i), mode[i], 0.2 * expectedMode(i)); } } } -void CMultivariateOneOfNPriorTest::testSampleMarginalLikelihood() -{ +void CMultivariateOneOfNPriorTest::testSampleMarginalLikelihood() { LOG_DEBUG("+--------------------------------------------------------------+"); LOG_DEBUG("| CMultivariateOneOfNPriorTest::testSampleMarginalLikelihood |"); LOG_DEBUG("+--------------------------------------------------------------+"); @@ -930,17 +803,15 @@ void CMultivariateOneOfNPriorTest::testSampleMarginalLikelihood() test::CRandomNumbers rng; - const std::size_t n[] = { 50, 50 }; - const double means[][2] = - { - { 10.0, 10.0 }, - { 25.0, 25.0 }, - }; - const double covariances[][3] = - { - { 4.0, 0.0, 4.0 }, - { 10.0, 0.0, 10.0 }, - }; + const std::size_t n[] = {50, 50}; + const double means[][2] = { + {10.0, 10.0}, + {25.0, 25.0}, + }; + const double covariances[][3] = { + {4.0, 0.0, 4.0}, + {10.0, 0.0, 10.0}, + }; TDouble10Vec1Vec samples; gaussianSamples(rng, boost::size(n), n, means, covariances, samples); @@ -948,12 +819,10 @@ void CMultivariateOneOfNPriorTest::testSampleMarginalLikelihood() maths::CMultivariateOneOfNPrior filter(makeOneOfN<2>(maths_t::E_ContinuousData)); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[i]), SINGLE_UNIT_WEIGHT_2); - if (!filter.isNonInformative()) - { + if (!filter.isNonInformative()) { TDoubleVec weights = filter.weights(); LOG_DEBUG("weights = " << core::CContainerPrinter::print(weights)); @@ -973,20 +842,16 @@ void CMultivariateOneOfNPriorTest::testSampleMarginalLikelihood() posteriorModels[1]->sampleMarginalLikelihood(counts[1], multimodalSamples); TDouble10Vec1Vec expectedSampled(normalSamples); - expectedSampled.insert(expectedSampled.end(), - multimodalSamples.begin(), - multimodalSamples.end()); + expectedSampled.insert(expectedSampled.end(), multimodalSamples.begin(), multimodalSamples.end()); std::sort(expectedSampled.begin(), expectedSampled.end()); LOG_DEBUG("expected samples = " << core::CContainerPrinter::print(expectedSampled)); LOG_DEBUG("samples = " << core::CContainerPrinter::print(sampled)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedSampled), - core::CContainerPrinter::print(sampled)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedSampled), core::CContainerPrinter::print(sampled)); } } } -void CMultivariateOneOfNPriorTest::testProbabilityOfLessLikelySamples() -{ +void CMultivariateOneOfNPriorTest::testProbabilityOfLessLikelySamples() { LOG_DEBUG("+--------------------------------------------------------------------+"); LOG_DEBUG("| CMultivariateOneOfNPriorTest::testProbabilityOfLessLikelySamples |"); LOG_DEBUG("+--------------------------------------------------------------------+"); @@ -998,17 +863,15 @@ void CMultivariateOneOfNPriorTest::testProbabilityOfLessLikelySamples() test::CRandomNumbers rng; - const std::size_t n[] = { 100, 100 }; - const double means[][2] = - { - { 10.0, 10.0 }, - { 16.0, 18.0 }, - }; - const double covariances[][3] = - { - { 4.0, 0.0, 4.0 }, - { 10.0, 0.0, 10.0 }, - }; + const std::size_t n[] = {100, 100}; + const double means[][2] = { + {10.0, 10.0}, + {16.0, 18.0}, + }; + const double covariances[][3] = { + {4.0, 0.0, 4.0}, + {10.0, 0.0, 10.0}, + }; TDouble10Vec1Vec samples; gaussianSamples(rng, boost::size(n), n, means, covariances, samples); @@ -1018,20 +881,14 @@ void CMultivariateOneOfNPriorTest::testProbabilityOfLessLikelySamples() TMeanAccumulator error; - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { TDouble10Vec1Vec sample(1, samples[i]); filter.addSamples(COUNT_WEIGHT, sample, SINGLE_UNIT_WEIGHT_2); double lowerBound, upperBound; maths::CMultivariatePrior::TTail10Vec tail; - CPPUNIT_ASSERT(filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - COUNT_WEIGHT, - sample, - SINGLE_UNIT_WEIGHT_2, - lowerBound, - upperBound, - tail)); + CPPUNIT_ASSERT(filter.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, COUNT_WEIGHT, sample, SINGLE_UNIT_WEIGHT_2, lowerBound, upperBound, tail)); CPPUNIT_ASSERT_EQUAL(lowerBound, upperBound); double probability = (lowerBound + upperBound) / 2.0; @@ -1040,29 +897,19 @@ void CMultivariateOneOfNPriorTest::testProbabilityOfLessLikelySamples() TDoubleVec weights(filter.weights()); maths::CMultivariateOneOfNPrior::TPriorCPtr3Vec models(filter.models()); - for (std::size_t j = 0u; j < weights.size(); ++j) - { + for (std::size_t j = 0u; j < weights.size(); ++j) { double modelLowerBound, modelUpperBound; double weight = weights[j]; CPPUNIT_ASSERT(models[j]->probabilityOfLessLikelySamples( - maths_t::E_TwoSided, - COUNT_WEIGHT, - sample, - SINGLE_UNIT_WEIGHT_2, - modelLowerBound, - modelUpperBound, - tail)); + maths_t::E_TwoSided, COUNT_WEIGHT, sample, SINGLE_UNIT_WEIGHT_2, modelLowerBound, modelUpperBound, tail)); CPPUNIT_ASSERT_EQUAL(modelLowerBound, modelUpperBound); double modelProbability = (modelLowerBound + modelUpperBound) / 2.0; expectedProbability += weight * modelProbability; } - LOG_DEBUG("weights = " << core::CContainerPrinter::print(weights) - << ", expectedProbability = " << expectedProbability - << ", probability = " << probability); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedProbability, - probability, - 0.3 * std::max(expectedProbability, probability)); + LOG_DEBUG("weights = " << core::CContainerPrinter::print(weights) << ", expectedProbability = " << expectedProbability + << ", probability = " << probability); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedProbability, probability, 0.3 * std::max(expectedProbability, probability)); error.add(std::fabs(probability - expectedProbability)); } @@ -1070,17 +917,16 @@ void CMultivariateOneOfNPriorTest::testProbabilityOfLessLikelySamples() CPPUNIT_ASSERT(maths::CBasicStatistics::mean(error) < 0.01); } -void CMultivariateOneOfNPriorTest::testPersist() -{ +void CMultivariateOneOfNPriorTest::testPersist() { LOG_DEBUG("+---------------------------------------------+"); LOG_DEBUG("| CMultivariateOneOfNPriorTest::testPersist |"); LOG_DEBUG("+---------------------------------------------+"); // Check that persist/restore is idempotent. - const std::size_t n[] = { 100 }; - const double mean[][2] = { { 10.0, 20.0 } }; - const double covariance[][3] = { { 3.0, 1.0, 2.0 } }; + const std::size_t n[] = {100}; + const double mean[][2] = {{10.0, 20.0}}; + const double covariance[][3] = {{3.0, 1.0, 2.0}}; test::CRandomNumbers rng; @@ -1090,8 +936,7 @@ void CMultivariateOneOfNPriorTest::testPersist() maths_t::EDataType dataType = maths_t::E_ContinuousData; maths::CMultivariateOneOfNPrior origFilter(makeOneOfN<2>(dataType)); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { origFilter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[i]), SINGLE_UNIT_WEIGHT_2); } std::size_t dimension = origFilter.dimension(); @@ -1112,14 +957,14 @@ void CMultivariateOneOfNPriorTest::testPersist() CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - maths::SDistributionRestoreParams params(dataType, decayRate + 0.1, + maths::SDistributionRestoreParams params(dataType, + decayRate + 0.1, maths::MINIMUM_CLUSTER_SPLIT_FRACTION, maths::MINIMUM_CLUSTER_SPLIT_COUNT, maths::MINIMUM_CATEGORY_COUNT); maths::CMultivariateOneOfNPrior restoredFilter(dimension, params, traverser); - LOG_DEBUG("orig checksum = " << checksum - << " restored checksum = " << restoredFilter.checksum()); + LOG_DEBUG("orig checksum = " << checksum << " restored checksum = " << restoredFilter.checksum()); CPPUNIT_ASSERT_EQUAL(checksum, restoredFilter.checksum()); // The XML representation of the new filter should be the same as the original @@ -1132,43 +977,32 @@ void CMultivariateOneOfNPriorTest::testPersist() CPPUNIT_ASSERT_EQUAL(origXml, newXml); } -CppUnit::Test *CMultivariateOneOfNPriorTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CMultivariateOneOfNPriorTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultivariateOneOfNPriorTest::testMultipleUpdate", - &CMultivariateOneOfNPriorTest::testMultipleUpdate) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultivariateOneOfNPriorTest::testPropagation", - &CMultivariateOneOfNPriorTest::testPropagation) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultivariateOneOfNPriorTest::testWeightUpdate", - &CMultivariateOneOfNPriorTest::testWeightUpdate) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultivariateOneOfNPriorTest::testModelUpdate", - &CMultivariateOneOfNPriorTest::testModelUpdate) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultivariateOneOfNPriorTest::testModelSelection", - &CMultivariateOneOfNPriorTest::testModelSelection) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultivariateOneOfNPriorTest::testMarginalLikelihood", - &CMultivariateOneOfNPriorTest::testMarginalLikelihood) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultivariateOneOfNPriorTest::testMarginalLikelihoodMean", - &CMultivariateOneOfNPriorTest::testMarginalLikelihoodMean) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultivariateOneOfNPriorTest::testMarginalLikelihoodMode", - &CMultivariateOneOfNPriorTest::testMarginalLikelihoodMode) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultivariateOneOfNPriorTest::testSampleMarginalLikelihood", - &CMultivariateOneOfNPriorTest::testSampleMarginalLikelihood) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultivariateOneOfNPriorTest::testProbabilityOfLessLikelySamples", - &CMultivariateOneOfNPriorTest::testProbabilityOfLessLikelySamples) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMultivariateOneOfNPriorTest::testPersist", - &CMultivariateOneOfNPriorTest::testPersist) ); +CppUnit::Test* CMultivariateOneOfNPriorTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CMultivariateOneOfNPriorTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CMultivariateOneOfNPriorTest::testMultipleUpdate", + &CMultivariateOneOfNPriorTest::testMultipleUpdate)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMultivariateOneOfNPriorTest::testPropagation", + &CMultivariateOneOfNPriorTest::testPropagation)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMultivariateOneOfNPriorTest::testWeightUpdate", + &CMultivariateOneOfNPriorTest::testWeightUpdate)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMultivariateOneOfNPriorTest::testModelUpdate", + &CMultivariateOneOfNPriorTest::testModelUpdate)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMultivariateOneOfNPriorTest::testModelSelection", + &CMultivariateOneOfNPriorTest::testModelSelection)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMultivariateOneOfNPriorTest::testMarginalLikelihood", + &CMultivariateOneOfNPriorTest::testMarginalLikelihood)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMultivariateOneOfNPriorTest::testMarginalLikelihoodMean", + &CMultivariateOneOfNPriorTest::testMarginalLikelihoodMean)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMultivariateOneOfNPriorTest::testMarginalLikelihoodMode", + &CMultivariateOneOfNPriorTest::testMarginalLikelihoodMode)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultivariateOneOfNPriorTest::testSampleMarginalLikelihood", &CMultivariateOneOfNPriorTest::testSampleMarginalLikelihood)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CMultivariateOneOfNPriorTest::testProbabilityOfLessLikelySamples", + &CMultivariateOneOfNPriorTest::testProbabilityOfLessLikelySamples)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMultivariateOneOfNPriorTest::testPersist", + &CMultivariateOneOfNPriorTest::testPersist)); return suiteOfTests; } diff --git a/lib/maths/unittest/CMultivariateOneOfNPriorTest.h b/lib/maths/unittest/CMultivariateOneOfNPriorTest.h index 9d0623a32d..06d210c395 100644 --- a/lib/maths/unittest/CMultivariateOneOfNPriorTest.h +++ b/lib/maths/unittest/CMultivariateOneOfNPriorTest.h @@ -9,22 +9,21 @@ #include -class CMultivariateOneOfNPriorTest : public CppUnit::TestFixture -{ - public: - void testMultipleUpdate(); - void testPropagation(); - void testWeightUpdate(); - void testModelUpdate(); - void testModelSelection(); - void testMarginalLikelihood(); - void testMarginalLikelihoodMean(); - void testMarginalLikelihoodMode(); - void testSampleMarginalLikelihood(); - void testProbabilityOfLessLikelySamples(); - void testPersist(); +class CMultivariateOneOfNPriorTest : public CppUnit::TestFixture { +public: + void testMultipleUpdate(); + void testPropagation(); + void testWeightUpdate(); + void testModelUpdate(); + void testModelSelection(); + void testMarginalLikelihood(); + void testMarginalLikelihoodMean(); + void testMarginalLikelihoodMode(); + void testSampleMarginalLikelihood(); + void testProbabilityOfLessLikelySamples(); + void testPersist(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CMultivariateOneOfNPriorTest_h diff --git a/lib/maths/unittest/CNaiveBayesTest.cc b/lib/maths/unittest/CNaiveBayesTest.cc index 7791ad1872..18a43c8ed3 100644 --- a/lib/maths/unittest/CNaiveBayesTest.cc +++ b/lib/maths/unittest/CNaiveBayesTest.cc @@ -34,8 +34,7 @@ using TDoubleSizePrVec = std::vector; using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; using TMeanVarAccumulator = maths::CBasicStatistics::SSampleMeanVar::TAccumulator; -void CNaiveBayesTest::testClassification() -{ +void CNaiveBayesTest::testClassification() { LOG_DEBUG("+---------------------------------------+"); LOG_DEBUG("| CNaiveBayesTest::testClassification |"); LOG_DEBUG("+---------------------------------------+"); @@ -55,30 +54,25 @@ void CNaiveBayesTest::testClassification() test::CRandomNumbers rng; TDoubleVec trainingData[4]; - rng.generateNormalSamples( 0.0, 12.0, 100, trainingData[0]); + rng.generateNormalSamples(0.0, 12.0, 100, trainingData[0]); rng.generateNormalSamples(10.0, 16.0, 100, trainingData[1]); - rng.generateNormalSamples( 3.0, 14.0, 200, trainingData[2]); + rng.generateNormalSamples(3.0, 14.0, 200, trainingData[2]); rng.generateNormalSamples(-5.0, 24.0, 200, trainingData[3]); TMeanAccumulator meanMeanError; - for (auto initialCount : {0.0, 100.0}) - { - maths::CNormalMeanPrecConjugate normal{ - maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData)}; + for (auto initialCount : {0.0, 100.0}) { + maths::CNormalMeanPrecConjugate normal{maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData)}; maths::CNaiveBayes nb{maths::CNaiveBayesFeatureDensityFromPrior(normal)}; - if (initialCount > 0) - { + if (initialCount > 0) { nb.initialClassCounts({{initialCount, 1}, {initialCount, 2}}); } - for (std::size_t i = 0u; i < 100; ++i) - { + for (std::size_t i = 0u; i < 100; ++i) { nb.addTrainingDataPoint(1, {{trainingData[0][i]}, {trainingData[1][i]}}); } - for (std::size_t i = 0u; i < 200; ++i) - { + for (std::size_t i = 0u; i < 200; ++i) { nb.addTrainingDataPoint(2, {{trainingData[2][i]}, {trainingData[3][i]}}); } @@ -109,62 +103,50 @@ void CNaiveBayesTest::testClassification() // ratios for those feature values. boost::math::normal class1[]{ - boost::math::normal{maths::CBasicStatistics::mean(moments[0]), - std::sqrt(maths::CBasicStatistics::variance(moments[0]))}, - boost::math::normal{maths::CBasicStatistics::mean(moments[1]), - std::sqrt(maths::CBasicStatistics::variance(moments[1]))}}; + boost::math::normal{maths::CBasicStatistics::mean(moments[0]), std::sqrt(maths::CBasicStatistics::variance(moments[0]))}, + boost::math::normal{maths::CBasicStatistics::mean(moments[1]), std::sqrt(maths::CBasicStatistics::variance(moments[1]))}}; boost::math::normal class2[]{ - boost::math::normal{maths::CBasicStatistics::mean(moments[2]), - std::sqrt(maths::CBasicStatistics::variance(moments[2]))}, - boost::math::normal{maths::CBasicStatistics::mean(moments[3]), - std::sqrt(maths::CBasicStatistics::variance(moments[3]))}}; + boost::math::normal{maths::CBasicStatistics::mean(moments[2]), std::sqrt(maths::CBasicStatistics::variance(moments[2]))}, + boost::math::normal{maths::CBasicStatistics::mean(moments[3]), std::sqrt(maths::CBasicStatistics::variance(moments[3]))}}; TDoubleVec xtest; rng.generateNormalSamples(0.0, 64.0, 40, xtest); TMeanAccumulator meanErrors[3]; - for (std::size_t i = 0u; i < xtest.size(); i += 2) - { - auto test = [i](double p1, double p2, const TDoubleSizePrVec &p, TMeanAccumulator &meanError) - { - double Z{p1 + p2}; - p1 /= Z; - p2 /= Z; - double p1_{p[0].second == 1 ? p[0].first : p[1].first}; - double p2_{p[0].second == 1 ? p[1].first : p[0].first}; - - if (i % 10 == 0) - { - LOG_DEBUG(i << ") expected P(1) = " << p1 << ", P(2) = " << p2 - << " got P(1) = " << p1_ << ", P(2) = " << p2_); - } - - CPPUNIT_ASSERT_EQUAL(std::size_t(2), p.size()); - CPPUNIT_ASSERT_DOUBLES_EQUAL(p1, p1_, 0.03); - CPPUNIT_ASSERT_DOUBLES_EQUAL(p2, p2_, 0.03); - if (p1 > 0.001) - { - meanError.add(std::fabs((p1 - p1_) / p1)); - } - if (p2 > 0.001) - { - meanError.add(std::fabs((p2 - p2_) / p2)); - } - }; + for (std::size_t i = 0u; i < xtest.size(); i += 2) { + auto test = [i](double p1, double p2, const TDoubleSizePrVec& p, TMeanAccumulator& meanError) { + double Z{p1 + p2}; + p1 /= Z; + p2 /= Z; + double p1_{p[0].second == 1 ? p[0].first : p[1].first}; + double p2_{p[0].second == 1 ? p[1].first : p[0].first}; + + if (i % 10 == 0) { + LOG_DEBUG(i << ") expected P(1) = " << p1 << ", P(2) = " << p2 << " got P(1) = " << p1_ << ", P(2) = " << p2_); + } + + CPPUNIT_ASSERT_EQUAL(std::size_t(2), p.size()); + CPPUNIT_ASSERT_DOUBLES_EQUAL(p1, p1_, 0.03); + CPPUNIT_ASSERT_DOUBLES_EQUAL(p2, p2_, 0.03); + if (p1 > 0.001) { + meanError.add(std::fabs((p1 - p1_) / p1)); + } + if (p2 > 0.001) { + meanError.add(std::fabs((p2 - p2_) / p2)); + } + }; // Supply both feature values. - double p1{P1 * maths::CTools::safePdf(class1[0], xtest[i]) - * maths::CTools::safePdf(class1[1], xtest[i+1])}; - double p2{P2 * maths::CTools::safePdf(class2[0], xtest[i]) - * maths::CTools::safePdf(class2[1], xtest[i+1])}; - probabilities = nb.highestClassProbabilities(2, {{xtest[i]}, {xtest[i+1]}}); + double p1{P1 * maths::CTools::safePdf(class1[0], xtest[i]) * maths::CTools::safePdf(class1[1], xtest[i + 1])}; + double p2{P2 * maths::CTools::safePdf(class2[0], xtest[i]) * maths::CTools::safePdf(class2[1], xtest[i + 1])}; + probabilities = nb.highestClassProbabilities(2, {{xtest[i]}, {xtest[i + 1]}}); test(p1, p2, probabilities, meanErrors[0]); // Miss out the first feature value. - p1 = P1 * maths::CTools::safePdf(class1[1], xtest[i+1]); - p2 = P2 * maths::CTools::safePdf(class2[1], xtest[i+1]); - probabilities = nb.highestClassProbabilities(2, {{}, {xtest[i+1]}}); + p1 = P1 * maths::CTools::safePdf(class1[1], xtest[i + 1]); + p2 = P2 * maths::CTools::safePdf(class2[1], xtest[i + 1]); + probabilities = nb.highestClassProbabilities(2, {{}, {xtest[i + 1]}}); test(p1, p2, probabilities, meanErrors[1]); // Miss out the second feature value. @@ -174,18 +156,15 @@ void CNaiveBayesTest::testClassification() test(p1, p2, probabilities, meanErrors[2]); } - for (std::size_t i = 0u; i < 3; ++i) - { - LOG_DEBUG("Mean relative error = " - << maths::CBasicStatistics::mean(meanErrors[i])); + for (std::size_t i = 0u; i < 3; ++i) { + LOG_DEBUG("Mean relative error = " << maths::CBasicStatistics::mean(meanErrors[i])); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanErrors[i]) < 0.05); meanMeanError += meanErrors[i]; } } } -void CNaiveBayesTest::testPropagationByTime() -{ +void CNaiveBayesTest::testPropagationByTime() { LOG_DEBUG("+------------------------------------------+"); LOG_DEBUG("| CNaiveBayesTest::testPropagationByTime |"); LOG_DEBUG("+------------------------------------------+"); @@ -195,18 +174,15 @@ void CNaiveBayesTest::testPropagationByTime() test::CRandomNumbers rng; - maths::CNormalMeanPrecConjugate normal{ - maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.05)}; - maths::CNaiveBayes nb[]{ - maths::CNaiveBayes{maths::CNaiveBayesFeatureDensityFromPrior(normal), 0.05}, - maths::CNaiveBayes{maths::CNaiveBayesFeatureDensityFromPrior(normal), 0.05}}; + maths::CNormalMeanPrecConjugate normal{maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.05)}; + maths::CNaiveBayes nb[]{maths::CNaiveBayes{maths::CNaiveBayesFeatureDensityFromPrior(normal), 0.05}, + maths::CNaiveBayes{maths::CNaiveBayesFeatureDensityFromPrior(normal), 0.05}}; TDoubleVec trainingData[4]; - for (std::size_t i = 0u; i < 1000; ++i) - { + for (std::size_t i = 0u; i < 1000; ++i) { double x{static_cast(i)}; - rng.generateNormalSamples( 0.02 * x - 14.0, 16.0, 1, trainingData[0]); - rng.generateNormalSamples( 0.02 * x - 14.0, 16.0, 1, trainingData[1]); + rng.generateNormalSamples(0.02 * x - 14.0, 16.0, 1, trainingData[0]); + rng.generateNormalSamples(0.02 * x - 14.0, 16.0, 1, trainingData[1]); rng.generateNormalSamples(-0.02 * x + 14.0, 16.0, 1, trainingData[2]); rng.generateNormalSamples(-0.02 * x + 14.0, 16.0, 1, trainingData[3]); @@ -216,7 +192,7 @@ void CNaiveBayesTest::testPropagationByTime() nb[1].addTrainingDataPoint(1, {{trainingData[0][0]}, {trainingData[1][0]}}); nb[1].addTrainingDataPoint(2, {{trainingData[2][0]}, {trainingData[3][0]}}); -} + } // Check that the value: // - (-10,-10) gets assigned to class 2 @@ -224,26 +200,20 @@ void CNaiveBayesTest::testPropagationByTime() // for the aged classifier and vice versa. { - TDoubleSizePrVec probabilities[]{ - nb[0].highestClassProbabilities(2, {{-10.0}, {-10.0}}), - nb[1].highestClassProbabilities(2, {{-10.0}, {-10.0}})}; - LOG_DEBUG("Aged class probabilities = " - << core::CContainerPrinter::print(probabilities[0])); - LOG_DEBUG("Class probabilities = " - << core::CContainerPrinter::print(probabilities[1])); + TDoubleSizePrVec probabilities[]{nb[0].highestClassProbabilities(2, {{-10.0}, {-10.0}}), + nb[1].highestClassProbabilities(2, {{-10.0}, {-10.0}})}; + LOG_DEBUG("Aged class probabilities = " << core::CContainerPrinter::print(probabilities[0])); + LOG_DEBUG("Class probabilities = " << core::CContainerPrinter::print(probabilities[1])); CPPUNIT_ASSERT_EQUAL(std::size_t(2), probabilities[0][0].second); CPPUNIT_ASSERT(probabilities[0][0].first > 0.99); CPPUNIT_ASSERT_EQUAL(std::size_t(1), probabilities[1][0].second); CPPUNIT_ASSERT(probabilities[1][0].first > 0.95); } { - TDoubleSizePrVec probabilities[]{ - nb[0].highestClassProbabilities(2, {{10.0}, {10.0}}), - nb[1].highestClassProbabilities(2, {{10.0}, {10.0}})}; - LOG_DEBUG("Aged class probabilities = " - << core::CContainerPrinter::print(probabilities[0])); - LOG_DEBUG("Class probabilities = " - << core::CContainerPrinter::print(probabilities[1])); + TDoubleSizePrVec probabilities[]{nb[0].highestClassProbabilities(2, {{10.0}, {10.0}}), + nb[1].highestClassProbabilities(2, {{10.0}, {10.0}})}; + LOG_DEBUG("Aged class probabilities = " << core::CContainerPrinter::print(probabilities[0])); + LOG_DEBUG("Class probabilities = " << core::CContainerPrinter::print(probabilities[1])); CPPUNIT_ASSERT_EQUAL(std::size_t(1), probabilities[0][0].second); CPPUNIT_ASSERT(probabilities[0][0].first > 0.99); CPPUNIT_ASSERT_EQUAL(std::size_t(2), probabilities[1][0].second); @@ -251,8 +221,7 @@ void CNaiveBayesTest::testPropagationByTime() } } -void CNaiveBayesTest::testMemoryUsage() -{ +void CNaiveBayesTest::testMemoryUsage() { LOG_DEBUG("+------------------------------------+"); LOG_DEBUG("| CNaiveBayesTest::testMemoryUsage |"); LOG_DEBUG("+------------------------------------+"); @@ -260,28 +229,25 @@ void CNaiveBayesTest::testMemoryUsage() // Check invariants. using TMemoryUsagePtr = boost::scoped_ptr; - using TNaiveBayesPtr = boost::shared_ptr; + using TNaiveBayesPtr = boost::shared_ptr; test::CRandomNumbers rng; TDoubleVec trainingData[4]; - rng.generateNormalSamples( 0.0, 12.0, 100, trainingData[0]); + rng.generateNormalSamples(0.0, 12.0, 100, trainingData[0]); rng.generateNormalSamples(10.0, 16.0, 100, trainingData[1]); - rng.generateNormalSamples( 3.0, 14.0, 200, trainingData[2]); + rng.generateNormalSamples(3.0, 14.0, 200, trainingData[2]); rng.generateNormalSamples(-5.0, 24.0, 200, trainingData[3]); TMeanAccumulator meanMeanError; - maths::CNormalMeanPrecConjugate normal{ - maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.1)}; + maths::CNormalMeanPrecConjugate normal{maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.1)}; TNaiveBayesPtr nb{new maths::CNaiveBayes{maths::CNaiveBayesFeatureDensityFromPrior(normal), 0.1}}; - for (std::size_t i = 0u; i < 100; ++i) - { + for (std::size_t i = 0u; i < 100; ++i) { nb->addTrainingDataPoint(1, {{trainingData[0][i]}, {trainingData[1][i]}}); } - for (std::size_t i = 0u; i < 200; ++i) - { + for (std::size_t i = 0u; i < 200; ++i) { nb->addTrainingDataPoint(2, {{trainingData[2][i]}, {trainingData[3][i]}}); } @@ -293,12 +259,10 @@ void CNaiveBayesTest::testMemoryUsage() CPPUNIT_ASSERT_EQUAL(memoryUsage, mem->usage()); LOG_DEBUG("Memory = " << core::CMemory::dynamicSize(nb)); - CPPUNIT_ASSERT_EQUAL(memoryUsage + sizeof(maths::CNaiveBayes), - core::CMemory::dynamicSize(nb)); + CPPUNIT_ASSERT_EQUAL(memoryUsage + sizeof(maths::CNaiveBayes), core::CMemory::dynamicSize(nb)); } -void CNaiveBayesTest::testPersist() -{ +void CNaiveBayesTest::testPersist() { LOG_DEBUG("+--------------------------------+"); LOG_DEBUG("| CNaiveBayesTest::testPersist |"); LOG_DEBUG("+--------------------------------+"); @@ -306,23 +270,20 @@ void CNaiveBayesTest::testPersist() test::CRandomNumbers rng; TDoubleVec trainingData[4]; - rng.generateNormalSamples( 0.0, 12.0, 100, trainingData[0]); + rng.generateNormalSamples(0.0, 12.0, 100, trainingData[0]); rng.generateNormalSamples(10.0, 16.0, 100, trainingData[1]); - rng.generateNormalSamples( 3.0, 14.0, 200, trainingData[2]); + rng.generateNormalSamples(3.0, 14.0, 200, trainingData[2]); rng.generateNormalSamples(-5.0, 24.0, 200, trainingData[3]); TMeanAccumulator meanMeanError; - maths::CNormalMeanPrecConjugate normal{ - maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.1)}; + maths::CNormalMeanPrecConjugate normal{maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.1)}; maths::CNaiveBayes origNb{maths::CNaiveBayesFeatureDensityFromPrior(normal), 0.1}; - for (std::size_t i = 0u; i < 100; ++i) - { + for (std::size_t i = 0u; i < 100; ++i) { origNb.addTrainingDataPoint(1, {{trainingData[0][i]}, {trainingData[1][i]}}); } - for (std::size_t i = 0u; i < 200; ++i) - { + for (std::size_t i = 0u; i < 200; ++i) { origNb.addTrainingDataPoint(2, {{trainingData[2][i]}, {trainingData[3][i]}}); } @@ -353,22 +314,15 @@ void CNaiveBayesTest::testPersist() CPPUNIT_ASSERT_EQUAL(origXml, restoredXml); } -CppUnit::Test *CNaiveBayesTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CNaiveBayesTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CNaiveBayesTest::testClassification", - &CNaiveBayesTest::testClassification) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CNaiveBayesTest::testPropagationByTime", - &CNaiveBayesTest::testPropagationByTime) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CNaiveBayesTest::testMemoryUsage", - &CNaiveBayesTest::testMemoryUsage) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CNaiveBayesTest::testPersist", - &CNaiveBayesTest::testPersist) ); +CppUnit::Test* CNaiveBayesTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CNaiveBayesTest"); + + suiteOfTests->addTest( + new CppUnit::TestCaller("CNaiveBayesTest::testClassification", &CNaiveBayesTest::testClassification)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CNaiveBayesTest::testPropagationByTime", &CNaiveBayesTest::testPropagationByTime)); + suiteOfTests->addTest(new CppUnit::TestCaller("CNaiveBayesTest::testMemoryUsage", &CNaiveBayesTest::testMemoryUsage)); + suiteOfTests->addTest(new CppUnit::TestCaller("CNaiveBayesTest::testPersist", &CNaiveBayesTest::testPersist)); return suiteOfTests; } diff --git a/lib/maths/unittest/CNaiveBayesTest.h b/lib/maths/unittest/CNaiveBayesTest.h index 2efcf1daa0..ac4ed35cd7 100644 --- a/lib/maths/unittest/CNaiveBayesTest.h +++ b/lib/maths/unittest/CNaiveBayesTest.h @@ -9,15 +9,14 @@ #include -class CNaiveBayesTest : public CppUnit::TestFixture -{ - public: - void testClassification(); - void testPropagationByTime(); - void testMemoryUsage(); - void testPersist(); +class CNaiveBayesTest : public CppUnit::TestFixture { +public: + void testClassification(); + void testPropagationByTime(); + void testMemoryUsage(); + void testPersist(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CNaiveBayesTest_h diff --git a/lib/maths/unittest/CNaturalBreaksClassifierTest.cc b/lib/maths/unittest/CNaturalBreaksClassifierTest.cc index 53d5d295f8..4f4e8d0d91 100644 --- a/lib/maths/unittest/CNaturalBreaksClassifierTest.cc +++ b/lib/maths/unittest/CNaturalBreaksClassifierTest.cc @@ -23,8 +23,7 @@ using namespace ml; using namespace maths; -namespace -{ +namespace { using TDoubleVec = std::vector; using TTuple = CNaturalBreaksClassifier::TTuple; @@ -32,10 +31,7 @@ using TTupleVec = std::vector; //! Computes the deviation of a category.mac1Password -bool computeDeviation(const TTuple &category, - std::size_t p, - double &result) -{ +bool computeDeviation(const TTuple& category, std::size_t p, double& result) { double count = CBasicStatistics::count(category); double variance = CBasicStatistics::variance(category); result = std::sqrt((count - 1.0) * variance); @@ -43,11 +39,7 @@ bool computeDeviation(const TTuple &category, } //! Branch and bound exhaustive search for the optimum split. -bool naturalBreaksBranchAndBound(const TTupleVec &categories, - std::size_t n, - std::size_t p, - TTupleVec &result) -{ +bool naturalBreaksBranchAndBound(const TTupleVec& categories, std::size_t n, std::size_t p, TTupleVec& result) { using TSizeVec = std::vector; // Find the minimum variance partition. @@ -66,8 +58,7 @@ bool naturalBreaksBranchAndBound(const TTupleVec &categories, TSizeVec split; split.reserve(m + 1); - for (std::size_t i = 1u; i < n; ++i) - { + for (std::size_t i = 1u; i < n; ++i) { split.push_back(i); } split.push_back(N); @@ -75,8 +66,7 @@ bool naturalBreaksBranchAndBound(const TTupleVec &categories, TSizeVec end; end.reserve(m + 1); - for (std::size_t i = N - m; i < N; ++i) - { + for (std::size_t i = N - m; i < N; ++i) { end.push_back(i); } end.push_back(N); @@ -84,65 +74,50 @@ bool naturalBreaksBranchAndBound(const TTupleVec &categories, TSizeVec bestSplit; double deviationMin = INF; - for (;;) - { + for (;;) { LOG_TRACE("split = " << core::CContainerPrinter::print(split)); double deviation = 0.0; - for (std::size_t i = 0u, j = 0u; i < split.size(); ++i) - { + for (std::size_t i = 0u, j = 0u; i < split.size(); ++i) { TTuple category; - for (/**/; j < split[i]; ++j) - { + for (/**/; j < split[i]; ++j) { category += categories[j]; } double categoryDeviation; - if (!computeDeviation(category, p, categoryDeviation) - || (deviation >= deviationMin && i < m - 1)) - { + if (!computeDeviation(category, p, categoryDeviation) || (deviation >= deviationMin && i < m - 1)) { // We can prune all possible solutions which have // sub-split (split[0], ... split[i]) since their // deviation is necessarily larger than the minimum // we've found so far. We do this by incrementing // split[j], for j > i, such that we'll increment // split[i] below. - for (++i; i < m; ++i) - { + for (++i; i < m; ++i) { split[i] = N - (m - i); } deviation = INF; - LOG_TRACE("Pruning solutions variation = " << deviation - << ", deviationMin = " << deviationMin - << ", split = " << core::CContainerPrinter::print(split)); - } - else - { + LOG_TRACE("Pruning solutions variation = " << deviation << ", deviationMin = " << deviationMin + << ", split = " << core::CContainerPrinter::print(split)); + } else { deviation += categoryDeviation; } } - if (deviation < deviationMin) - { + if (deviation < deviationMin) { bestSplit = split; deviationMin = deviation; - LOG_TRACE("splitMin = " << core::CContainerPrinter::print(result) - << ", deviationMin = " << deviationMin); + LOG_TRACE("splitMin = " << core::CContainerPrinter::print(result) << ", deviationMin = " << deviationMin); } - if (split == end) - { + if (split == end) { break; } - for (std::size_t i = 1; i <= m; ++i) - { - if (split[m - i] < N - i) - { + for (std::size_t i = 1; i <= m; ++i) { + if (split[m - i] < N - i) { ++split[m - i]; - for (--i; i > 0; --i) - { + for (--i; i > 0; --i) { split[m - i] = split[m - i - 1] + 1; } break; @@ -150,17 +125,14 @@ bool naturalBreaksBranchAndBound(const TTupleVec &categories, } } - if (deviationMin == INF) - { + if (deviationMin == INF) { return false; } result.reserve(n); - for (std::size_t i = 0u, j = 0u; i < bestSplit.size(); ++i) - { + for (std::size_t i = 0u, j = 0u; i < bestSplit.size(); ++i) { TTuple category; - for (/**/; j < bestSplit[i]; ++j) - { + for (/**/; j < bestSplit[i]; ++j) { category += categories[j]; } result.push_back(category); @@ -168,11 +140,9 @@ bool naturalBreaksBranchAndBound(const TTupleVec &categories, return true; } - } -void CNaturalBreaksClassifierTest::testCategories() -{ +void CNaturalBreaksClassifierTest::testCategories() { LOG_DEBUG("+------------------------------------------------+"); LOG_DEBUG("| CNaturalBreaksClassifierTest::testCategories |"); LOG_DEBUG("+------------------------------------------------+"); @@ -186,13 +156,10 @@ void CNaturalBreaksClassifierTest::testCategories() TDoubleVec samples; rng.generateUniformSamples(0.0, 15.0, 5000, samples); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { classifier.add(samples[i]); - if (i > 0 && i % 50 == 0) - { - for (std::size_t j = 3u; j < 7; ++j) - { + if (i > 0 && i % 50 == 0) { + for (std::size_t j = 3u; j < 7; ++j) { LOG_DEBUG("# samples = " << i << ", # splits = " << j); TTupleVec split; @@ -206,8 +173,7 @@ void CNaturalBreaksClassifierTest::testCategories() LOG_DEBUG("expected = " << core::CContainerPrinter::print(expectedSplit)); LOG_DEBUG("actual = " << core::CContainerPrinter::print(split)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedSplit), - core::CContainerPrinter::print(split)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedSplit), core::CContainerPrinter::print(split)); } } } @@ -223,21 +189,15 @@ void CNaturalBreaksClassifierTest::testCategories() TDoubleVec samples; rng.generateUniformSamples(0.0, 15.0, 500, samples); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { classifier.add(samples[i]); - if (i > 0 && i % 10 == 0) - { - for (std::size_t j = 3u; j < 7; ++j) - { + if (i > 0 && i % 10 == 0) { + for (std::size_t j = 3u; j < 7; ++j) { std::size_t k = 1u; - do - { + do { k *= 2; - LOG_DEBUG("# samples = " << i - << ", # splits = " << j - << ", minimum cluster size = " << k); + LOG_DEBUG("# samples = " << i << ", # splits = " << j << ", minimum cluster size = " << k); TTupleVec split; bool haveSplit = classifier.categories(j, k, split); @@ -249,16 +209,13 @@ void CNaturalBreaksClassifierTest::testCategories() CPPUNIT_ASSERT_EQUAL(expectSplit, haveSplit); - if (expectSplit && haveSplit) - { + if (expectSplit && haveSplit) { LOG_DEBUG("expected = " << core::CContainerPrinter::print(expectedSplit)); LOG_DEBUG("actual = " << core::CContainerPrinter::print(split)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedSplit), - core::CContainerPrinter::print(split)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedSplit), core::CContainerPrinter::print(split)); } - } - while (k < i / j); + } while (k < i / j); } } } @@ -281,8 +238,7 @@ void CNaturalBreaksClassifierTest::testCategories() TDoubleVec samples; rng.generateNormalSamples(10.0, 3.0, 5000, samples); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { classifier.add(samples[i]); } @@ -293,12 +249,10 @@ void CNaturalBreaksClassifierTest::testCategories() // overlap significantly. double c1 = CBasicStatistics::count(twoSplit[0]); double c2 = CBasicStatistics::count(twoSplit[1]); - LOG_DEBUG("count ratio = " << c1/c2); - CPPUNIT_ASSERT(std::fabs(c1/c2 - 1.0) < 0.8); - double separation = std::fabs(CBasicStatistics::mean(twoSplit[0]) - - CBasicStatistics::mean(twoSplit[1])) - / (std::sqrt(CBasicStatistics::variance(twoSplit[0])) - + std::sqrt(CBasicStatistics::variance(twoSplit[1]))); + LOG_DEBUG("count ratio = " << c1 / c2); + CPPUNIT_ASSERT(std::fabs(c1 / c2 - 1.0) < 0.8); + double separation = std::fabs(CBasicStatistics::mean(twoSplit[0]) - CBasicStatistics::mean(twoSplit[1])) / + (std::sqrt(CBasicStatistics::variance(twoSplit[0])) + std::sqrt(CBasicStatistics::variance(twoSplit[1]))); LOG_DEBUG("separation = " << separation); CPPUNIT_ASSERT(std::fabs(separation - 1.0) < 0.4); } @@ -323,29 +277,23 @@ void CNaturalBreaksClassifierTest::testCategories() { CNaturalBreaksClassifier classifier(6); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { classifier.add(samples[i]); } { TTupleVec twoSplit; classifier.categories(2u, 0, twoSplit); - LOG_DEBUG("split 1 = " << CBasicStatistics::print(twoSplit[0]) - << ", split 2 = " << CBasicStatistics::print(twoSplit[1]) - << ", (mean1,var1) = (" << mean1 << "," << var1 << ")" - << ", (mean2,var2) = (" << mean2 << "," << var2 << ")"); + LOG_DEBUG("split 1 = " << CBasicStatistics::print(twoSplit[0]) << ", split 2 = " << CBasicStatistics::print(twoSplit[1]) + << ", (mean1,var1) = (" << mean1 << "," << var1 << ")" + << ", (mean2,var2) = (" << mean2 << "," << var2 << ")"); CPPUNIT_ASSERT(std::fabs(CBasicStatistics::mean(twoSplit[0]) - mean1) < 0.5); CPPUNIT_ASSERT(std::fabs(CBasicStatistics::variance(twoSplit[0]) - var1) < 0.6); - CPPUNIT_ASSERT(std::fabs(CBasicStatistics::count(twoSplit[0]) - - static_cast(n1)) - / static_cast(n1) < 0.33); + CPPUNIT_ASSERT(std::fabs(CBasicStatistics::count(twoSplit[0]) - static_cast(n1)) / static_cast(n1) < 0.33); CPPUNIT_ASSERT(std::fabs(CBasicStatistics::mean(twoSplit[1]) - mean2) < 0.4); CPPUNIT_ASSERT(std::fabs(CBasicStatistics::variance(twoSplit[1]) - var2) < 0.63); - CPPUNIT_ASSERT(std::fabs(CBasicStatistics::count(twoSplit[1]) - - static_cast(n2)) - / static_cast(n2) < 0.11); + CPPUNIT_ASSERT(std::fabs(CBasicStatistics::count(twoSplit[1]) - static_cast(n2)) / static_cast(n2) < 0.11); } } @@ -357,40 +305,33 @@ void CNaturalBreaksClassifierTest::testCategories() double totalVarError2 = 0.0; double totalCountError2 = 0.0; - for (int i = 0; i < 500; ++i) - { + for (int i = 0; i < 500; ++i) { rng.random_shuffle(samples.begin(), samples.end()); CNaturalBreaksClassifier classifier(12); - for (std::size_t j = 0u; j < samples.size(); ++j) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { classifier.add(samples[j]); } TTupleVec twoSplit; classifier.categories(2u, 0, twoSplit); - LOG_DEBUG("split 1 = " << CBasicStatistics::print(twoSplit[0]) - << ", split 2 = " << CBasicStatistics::print(twoSplit[1])); + LOG_DEBUG("split 1 = " << CBasicStatistics::print(twoSplit[0]) << ", split 2 = " << CBasicStatistics::print(twoSplit[1])); CPPUNIT_ASSERT(std::fabs(CBasicStatistics::mean(twoSplit[0]) - mean1) < 0.7); CPPUNIT_ASSERT(std::fabs(CBasicStatistics::variance(twoSplit[0]) - var1) < 0.4); - CPPUNIT_ASSERT(std::fabs(CBasicStatistics::count(twoSplit[0]) - - static_cast(n1)) / static_cast(n1) < 0.7); + CPPUNIT_ASSERT(std::fabs(CBasicStatistics::count(twoSplit[0]) - static_cast(n1)) / static_cast(n1) < 0.7); CPPUNIT_ASSERT(std::fabs(CBasicStatistics::mean(twoSplit[1]) - mean2) < 0.6); CPPUNIT_ASSERT(std::fabs(CBasicStatistics::variance(twoSplit[1]) - var2) < 1.0); - CPPUNIT_ASSERT(std::fabs(CBasicStatistics::count(twoSplit[1]) - - static_cast(n2)) / static_cast(n2) < 0.3); + CPPUNIT_ASSERT(std::fabs(CBasicStatistics::count(twoSplit[1]) - static_cast(n2)) / static_cast(n2) < 0.3); totalMeanError1 += std::fabs(CBasicStatistics::mean(twoSplit[0]) - mean1); totalVarError1 += std::fabs(CBasicStatistics::variance(twoSplit[0]) - var1); - totalCountError1 += std::fabs(CBasicStatistics::count(twoSplit[0]) - - static_cast(n1)) / static_cast(n1); + totalCountError1 += std::fabs(CBasicStatistics::count(twoSplit[0]) - static_cast(n1)) / static_cast(n1); totalMeanError2 += std::fabs(CBasicStatistics::mean(twoSplit[1]) - mean2); totalVarError2 += std::fabs(CBasicStatistics::variance(twoSplit[1]) - var2); - totalCountError2 += std::fabs(CBasicStatistics::count(twoSplit[1]) - - static_cast(n2)) / static_cast(n2); + totalCountError2 += std::fabs(CBasicStatistics::count(twoSplit[1]) - static_cast(n2)) / static_cast(n2); } totalMeanError1 /= 500.0; @@ -400,16 +341,14 @@ void CNaturalBreaksClassifierTest::testCategories() totalVarError2 /= 500.0; totalCountError2 /= 500.0; - LOG_DEBUG("mean mean error 1 = " << totalMeanError1 - << ", mean variance error 1 = " << totalVarError1 - << ", mean count error 1 = " << totalCountError1); + LOG_DEBUG("mean mean error 1 = " << totalMeanError1 << ", mean variance error 1 = " << totalVarError1 + << ", mean count error 1 = " << totalCountError1); CPPUNIT_ASSERT(totalMeanError1 < 0.21); CPPUNIT_ASSERT(totalVarError1 < 0.2); CPPUNIT_ASSERT(totalCountError1 < 0.3); - LOG_DEBUG("mean mean error 2 = " << totalMeanError2 - << ", mean variance error 2 = " << totalVarError2 - << ", mean count error 2 = " << totalCountError2); + LOG_DEBUG("mean mean error 2 = " << totalMeanError2 << ", mean variance error 2 = " << totalVarError2 + << ", mean count error 2 = " << totalCountError2); CPPUNIT_ASSERT(totalMeanError2 < 0.3); CPPUNIT_ASSERT(totalVarError2 < 0.56); CPPUNIT_ASSERT(totalCountError2 < 0.1); @@ -452,41 +391,34 @@ void CNaturalBreaksClassifierTest::testCategories() double totalVarError2 = 0.0; double totalCountError2 = 0.0; - for (int i = 0; i < 500; ++i) - { + for (int i = 0; i < 500; ++i) { rng.random_shuffle(samples.begin(), samples.end()); CNaturalBreaksClassifier classifier(12); - for (std::size_t j = 0u; j < samples.size(); ++j) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { classifier.add(samples[j]); } TTupleVec twoSplit; classifier.categories(3u, 0, twoSplit); - LOG_DEBUG("split 1 = " << CBasicStatistics::print(twoSplit[0]) - << ", split 2 = " << CBasicStatistics::print(twoSplit[1]) - << ", split 3 = " << CBasicStatistics::print(twoSplit[2])); + LOG_DEBUG("split 1 = " << CBasicStatistics::print(twoSplit[0]) << ", split 2 = " << CBasicStatistics::print(twoSplit[1]) + << ", split 3 = " << CBasicStatistics::print(twoSplit[2])); CPPUNIT_ASSERT(std::fabs(CBasicStatistics::mean(twoSplit[0]) - mean1) < 0.15); CPPUNIT_ASSERT(std::fabs(CBasicStatistics::variance(twoSplit[0]) - var1) < 0.4); - CPPUNIT_ASSERT(std::fabs(CBasicStatistics::count(twoSplit[0]) - - static_cast(n1)) / static_cast(n1) < 0.05); + CPPUNIT_ASSERT(std::fabs(CBasicStatistics::count(twoSplit[0]) - static_cast(n1)) / static_cast(n1) < 0.05); CPPUNIT_ASSERT(std::fabs(CBasicStatistics::mean(twoSplit[1]) - mean2) < 0.5); CPPUNIT_ASSERT(std::fabs(CBasicStatistics::variance(twoSplit[1]) - var2) < 2.5); - CPPUNIT_ASSERT(std::fabs(CBasicStatistics::count(twoSplit[1]) - - static_cast(n2)) / static_cast(n2) < 0.15); + CPPUNIT_ASSERT(std::fabs(CBasicStatistics::count(twoSplit[1]) - static_cast(n2)) / static_cast(n2) < 0.15); totalMeanError1 += std::fabs(CBasicStatistics::mean(twoSplit[0]) - mean1); totalVarError1 += std::fabs(CBasicStatistics::variance(twoSplit[0]) - var1); - totalCountError1 += std::fabs(CBasicStatistics::count(twoSplit[0]) - - static_cast(n1)) / static_cast(n1); + totalCountError1 += std::fabs(CBasicStatistics::count(twoSplit[0]) - static_cast(n1)) / static_cast(n1); totalMeanError2 += std::fabs(CBasicStatistics::mean(twoSplit[1]) - mean2); totalVarError2 += std::fabs(CBasicStatistics::variance(twoSplit[1]) - var2); - totalCountError2 += std::fabs(CBasicStatistics::count(twoSplit[1]) - - static_cast(n2)) / static_cast(n2); + totalCountError2 += std::fabs(CBasicStatistics::count(twoSplit[1]) - static_cast(n2)) / static_cast(n2); } totalMeanError1 /= 500.0; @@ -496,24 +428,21 @@ void CNaturalBreaksClassifierTest::testCategories() totalVarError2 /= 500.0; totalCountError2 /= 500.0; - LOG_DEBUG("mean mean error 1 = " << totalMeanError1 - << ", mean variance error 1 = " << totalVarError1 - << ", mean count error 1 = " << totalCountError1); + LOG_DEBUG("mean mean error 1 = " << totalMeanError1 << ", mean variance error 1 = " << totalVarError1 + << ", mean count error 1 = " << totalCountError1); CPPUNIT_ASSERT(totalMeanError1 < 0.05); CPPUNIT_ASSERT(totalVarError1 < 0.1); CPPUNIT_ASSERT(totalCountError1 < 0.01); - LOG_DEBUG("mean mean error 2 = " << totalMeanError2 - << ", mean variance error 2 = " << totalVarError2 - << ", mean count error 2 = " << totalCountError2); + LOG_DEBUG("mean mean error 2 = " << totalMeanError2 << ", mean variance error 2 = " << totalVarError2 + << ", mean count error 2 = " << totalCountError2); CPPUNIT_ASSERT(totalMeanError2 < 0.15); CPPUNIT_ASSERT(totalVarError2 < 1.0); CPPUNIT_ASSERT(totalCountError2 < 0.1); } } -void CNaturalBreaksClassifierTest::testPropagateForwardsByTime() -{ +void CNaturalBreaksClassifierTest::testPropagateForwardsByTime() { LOG_DEBUG("+-------------------------------------------------------------+"); LOG_DEBUG("| CNaturalBreaksClassifierTest::testPropagateForwardsByTime |"); LOG_DEBUG("+-------------------------------------------------------------+"); @@ -534,8 +463,7 @@ void CNaturalBreaksClassifierTest::testPropagateForwardsByTime() CNaturalBreaksClassifier classifier(4, 0.1); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { classifier.add(samples[i]); } @@ -552,14 +480,12 @@ void CNaturalBreaksClassifierTest::testPropagateForwardsByTime() // We expect the category with count of 1 to have been pruned out. CPPUNIT_ASSERT_EQUAL(std::size_t(3), categories.size()); - for (std::size_t i = 0u; i < categories.size(); ++i) - { + for (std::size_t i = 0u; i < categories.size(); ++i) { CPPUNIT_ASSERT(maths::CBasicStatistics::mean(categories[i]) != 100.0); } } -void CNaturalBreaksClassifierTest::testSample() -{ +void CNaturalBreaksClassifierTest::testSample() { LOG_DEBUG("+--------------------------------------------+"); LOG_DEBUG("| CNaturalBreaksClassifierTest::testSample |"); LOG_DEBUG("+--------------------------------------------+"); @@ -592,8 +518,7 @@ void CNaturalBreaksClassifierTest::testSample() CNaturalBreaksClassifier classifier(8, 0.1); TDoubleVec expectedSampled; - for (std::size_t i = 0u; i < 5; ++i) - { + for (std::size_t i = 0u; i < 5; ++i) { classifier.add(samples[i]); expectedSampled.push_back(samples[i]); } @@ -608,15 +533,13 @@ void CNaturalBreaksClassifierTest::testSample() LOG_DEBUG("sampled = " << core::CContainerPrinter::print(sampled)); double error = 0.0; - for (std::size_t i = 0u; i < expectedSampled.size(); ++i) - { + for (std::size_t i = 0u; i < expectedSampled.size(); ++i) { error += std::fabs(expectedSampled[i] - sampled[i]); } CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, error, 2e-6); - for (std::size_t i = 5u; i < samples.size(); ++i) - { + for (std::size_t i = 5u; i < samples.size(); ++i) { classifier.add(samples[i]); } @@ -626,14 +549,10 @@ void CNaturalBreaksClassifierTest::testSample() TMeanVarAccumulator meanVar1; TMeanVarAccumulator meanVar2; - for (std::size_t i = 0u; i < sampled.size(); ++i) - { - if (sampled[i] < 15.0) - { + for (std::size_t i = 0u; i < sampled.size(); ++i) { + if (sampled[i] < 15.0) { meanVar1.add(sampled[i]); - } - else - { + } else { meanVar2.add(sampled[i]); } } @@ -642,22 +561,13 @@ void CNaturalBreaksClassifierTest::testSample() LOG_DEBUG("mean, variance 1 = " << meanVar1); LOG_DEBUG("expected mean, variance 2 = " << expectedMeanVar2); LOG_DEBUG("mean, variance 2 = " << meanVar2); - CPPUNIT_ASSERT_DOUBLES_EQUAL(CBasicStatistics::mean(expectedMeanVar1), - CBasicStatistics::mean(meanVar1), - 0.01); - CPPUNIT_ASSERT_DOUBLES_EQUAL(CBasicStatistics::variance(expectedMeanVar1), - CBasicStatistics::variance(meanVar1), - 0.1); - CPPUNIT_ASSERT_DOUBLES_EQUAL(CBasicStatistics::mean(expectedMeanVar2), - CBasicStatistics::mean(meanVar2), - 0.01); - CPPUNIT_ASSERT_DOUBLES_EQUAL(CBasicStatistics::variance(expectedMeanVar2), - CBasicStatistics::variance(meanVar2), - 0.1); + CPPUNIT_ASSERT_DOUBLES_EQUAL(CBasicStatistics::mean(expectedMeanVar1), CBasicStatistics::mean(meanVar1), 0.01); + CPPUNIT_ASSERT_DOUBLES_EQUAL(CBasicStatistics::variance(expectedMeanVar1), CBasicStatistics::variance(meanVar1), 0.1); + CPPUNIT_ASSERT_DOUBLES_EQUAL(CBasicStatistics::mean(expectedMeanVar2), CBasicStatistics::mean(meanVar2), 0.01); + CPPUNIT_ASSERT_DOUBLES_EQUAL(CBasicStatistics::variance(expectedMeanVar2), CBasicStatistics::variance(meanVar2), 0.1); } -void CNaturalBreaksClassifierTest::testPersist() -{ +void CNaturalBreaksClassifierTest::testPersist() { LOG_DEBUG("+--------------------------------------------+"); LOG_DEBUG("| CNaturalBreaksClassifierTest::testSample |"); LOG_DEBUG("+--------------------------------------------+"); @@ -676,8 +586,7 @@ void CNaturalBreaksClassifierTest::testPersist() CNaturalBreaksClassifier origClassifier(8, 0.1); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { origClassifier.add(samples[i]); } @@ -698,15 +607,15 @@ void CNaturalBreaksClassifierTest::testPersist() // Restore the XML into a new classifier. CNaturalBreaksClassifier restoredClassifier(8); - maths::SDistributionRestoreParams params(maths_t::E_ContinuousData, 0.2, + maths::SDistributionRestoreParams params(maths_t::E_ContinuousData, + 0.2, maths::MINIMUM_CLUSTER_SPLIT_FRACTION, maths::MINIMUM_CLUSTER_SPLIT_COUNT, maths::MINIMUM_CATEGORY_COUNT); - CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind(&CNaturalBreaksClassifier::acceptRestoreTraverser, - &restoredClassifier, boost::cref(params), _1))); + CPPUNIT_ASSERT(traverser.traverseSubLevel( + boost::bind(&CNaturalBreaksClassifier::acceptRestoreTraverser, &restoredClassifier, boost::cref(params), _1))); - LOG_DEBUG("orig checksum = " << checksum - << " restored checksum = " << restoredClassifier.checksum()); + LOG_DEBUG("orig checksum = " << checksum << " restored checksum = " << restoredClassifier.checksum()); CPPUNIT_ASSERT_EQUAL(checksum, restoredClassifier.checksum()); // The XML representation of the new filter should be the same @@ -720,22 +629,17 @@ void CNaturalBreaksClassifierTest::testPersist() CPPUNIT_ASSERT_EQUAL(origXml, newXml); } -CppUnit::Test *CNaturalBreaksClassifierTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CNaturalBreaksClassifierTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CNaturalBreaksClassifierTest::testCategories", - &CNaturalBreaksClassifierTest::testCategories) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CNaturalBreaksClassifierTest::testPropagateForwardsByTime", - &CNaturalBreaksClassifierTest::testPropagateForwardsByTime) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CNaturalBreaksClassifierTest::testSample", - &CNaturalBreaksClassifierTest::testSample) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CNaturalBreaksClassifierTest::testPersist", - &CNaturalBreaksClassifierTest::testPersist) ); +CppUnit::Test* CNaturalBreaksClassifierTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CNaturalBreaksClassifierTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CNaturalBreaksClassifierTest::testCategories", + &CNaturalBreaksClassifierTest::testCategories)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CNaturalBreaksClassifierTest::testPropagateForwardsByTime", &CNaturalBreaksClassifierTest::testPropagateForwardsByTime)); + suiteOfTests->addTest(new CppUnit::TestCaller("CNaturalBreaksClassifierTest::testSample", + &CNaturalBreaksClassifierTest::testSample)); + suiteOfTests->addTest(new CppUnit::TestCaller("CNaturalBreaksClassifierTest::testPersist", + &CNaturalBreaksClassifierTest::testPersist)); return suiteOfTests; } diff --git a/lib/maths/unittest/CNaturalBreaksClassifierTest.h b/lib/maths/unittest/CNaturalBreaksClassifierTest.h index 805ab06f60..5fc0fd0af1 100644 --- a/lib/maths/unittest/CNaturalBreaksClassifierTest.h +++ b/lib/maths/unittest/CNaturalBreaksClassifierTest.h @@ -9,15 +9,14 @@ #include -class CNaturalBreaksClassifierTest : public CppUnit::TestFixture -{ - public: - void testCategories(); - void testPropagateForwardsByTime(); - void testSample(); - void testPersist(); +class CNaturalBreaksClassifierTest : public CppUnit::TestFixture { +public: + void testCategories(); + void testPropagateForwardsByTime(); + void testSample(); + void testPersist(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CNaturalBreaksClassifierTest_h diff --git a/lib/maths/unittest/CNormalMeanPrecConjugateTest.cc b/lib/maths/unittest/CNormalMeanPrecConjugateTest.cc index 2d70e2b1b7..813079fcf1 100644 --- a/lib/maths/unittest/CNormalMeanPrecConjugateTest.cc +++ b/lib/maths/unittest/CNormalMeanPrecConjugateTest.cc @@ -34,8 +34,7 @@ using namespace ml; using namespace handy_typedefs; -namespace -{ +namespace { using TDoubleVec = std::vector; using TDoubleDoublePr = std::pair; @@ -44,16 +43,12 @@ using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumula using TMeanVarAccumulator = maths::CBasicStatistics::SSampleMeanVar::TAccumulator; using CNormalMeanPrecConjugate = CPriorTestInterfaceMixin; -CNormalMeanPrecConjugate makePrior(maths_t::EDataType dataType = maths_t::E_ContinuousData, - const double &decayRate = 0.0) -{ +CNormalMeanPrecConjugate makePrior(maths_t::EDataType dataType = maths_t::E_ContinuousData, const double& decayRate = 0.0) { return CNormalMeanPrecConjugate::nonInformativePrior(dataType, decayRate); } - } -void CNormalMeanPrecConjugateTest::testMultipleUpdate() -{ +void CNormalMeanPrecConjugateTest::testMultipleUpdate() { LOG_DEBUG("+----------------------------------------------------+"); LOG_DEBUG("| CNormalMeanPrecConjugateTest::testMultipleUpdate |"); LOG_DEBUG("+----------------------------------------------------+"); @@ -63,11 +58,7 @@ void CNormalMeanPrecConjugateTest::testMultipleUpdate() using TEqual = maths::CEqualWithTolerance; - const maths_t::EDataType dataTypes[] = - { - maths_t::E_IntegerData, - maths_t::E_ContinuousData - }; + const maths_t::EDataType dataTypes[] = {maths_t::E_IntegerData, maths_t::E_ContinuousData}; const double mean = 10.0; const double variance = 3.0; @@ -77,13 +68,11 @@ void CNormalMeanPrecConjugateTest::testMultipleUpdate() TDoubleVec samples; rng.generateNormalSamples(mean, variance, 100, samples); - for (std::size_t i = 0; i < boost::size(dataTypes); ++i) - { + for (std::size_t i = 0; i < boost::size(dataTypes); ++i) { CNormalMeanPrecConjugate filter1(makePrior(dataTypes[i])); CNormalMeanPrecConjugate filter2(filter1); - for (std::size_t j = 0u; j < samples.size(); ++j) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { filter1.addSamples(TDouble1Vec(1, samples[j])); } filter2.addSamples(samples); @@ -97,21 +86,15 @@ void CNormalMeanPrecConjugateTest::testMultipleUpdate() // Test with variance scale. - for (size_t i = 0; i < boost::size(dataTypes); ++i) - { + for (size_t i = 0; i < boost::size(dataTypes); ++i) { CNormalMeanPrecConjugate filter1(makePrior(dataTypes[i])); CNormalMeanPrecConjugate filter2(filter1); maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); - for (std::size_t j = 0u; j < samples.size(); ++j) - { - filter1.addSamples(weightStyle, - TDouble1Vec(1, samples[j]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 2.0))); + for (std::size_t j = 0u; j < samples.size(); ++j) { + filter1.addSamples(weightStyle, TDouble1Vec(1, samples[j]), TDouble4Vec1Vec(1, TDouble4Vec(1, 2.0))); } - filter2.addSamples(weightStyle, - samples, - TDouble4Vec1Vec(samples.size(), TDouble4Vec(1, 2.0))); + filter2.addSamples(weightStyle, samples, TDouble4Vec1Vec(samples.size(), TDouble4Vec(1, 2.0))); LOG_DEBUG(filter1.print()); LOG_DEBUG("vs"); @@ -122,15 +105,13 @@ void CNormalMeanPrecConjugateTest::testMultipleUpdate() // Test the count weight is equivalent to adding repeated samples. - for (size_t i = 0; i < boost::size(dataTypes); ++i) - { + for (size_t i = 0; i < boost::size(dataTypes); ++i) { CNormalMeanPrecConjugate filter1(makePrior(dataTypes[i])); CNormalMeanPrecConjugate filter2(filter1); double x = 3.0; std::size_t count = 10; - for (std::size_t j = 0u; j < count; ++j) - { + for (std::size_t j = 0u; j < count; ++j) { filter1.addSamples(TDouble1Vec(1, x)); } filter2.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), @@ -142,8 +123,7 @@ void CNormalMeanPrecConjugateTest::testMultipleUpdate() } } -void CNormalMeanPrecConjugateTest::testPropagation() -{ +void CNormalMeanPrecConjugateTest::testPropagation() { LOG_DEBUG("+-------------------------------------------------+"); LOG_DEBUG("| CNormalMeanPrecConjugateTest::testPropagation |"); LOG_DEBUG("+-------------------------------------------------+"); @@ -160,8 +140,7 @@ void CNormalMeanPrecConjugateTest::testPropagation() CNormalMeanPrecConjugate filter(makePrior(maths_t::E_ContinuousData, 0.1)); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { filter.addSamples(TDouble1Vec(1, static_cast(samples[i]))); } @@ -173,17 +152,14 @@ void CNormalMeanPrecConjugateTest::testPropagation() double propagatedMean = filter.mean(); double propagatedPrecision = filter.precision(); - LOG_DEBUG("mean = " << mean - << ", precision = " << precision - << ", propagatedMean = " << propagatedMean - << ", propagatedPrecision = " << propagatedPrecision); + LOG_DEBUG("mean = " << mean << ", precision = " << precision << ", propagatedMean = " << propagatedMean + << ", propagatedPrecision = " << propagatedPrecision); CPPUNIT_ASSERT_DOUBLES_EQUAL(mean, propagatedMean, eps); CPPUNIT_ASSERT_DOUBLES_EQUAL(precision, propagatedPrecision, eps); } -void CNormalMeanPrecConjugateTest::testMeanEstimation() -{ +void CNormalMeanPrecConjugateTest::testMeanEstimation() { LOG_DEBUG("+----------------------------------------------------+"); LOG_DEBUG("| CNormalMeanPrecConjugateTest::testMeanEstimation |"); LOG_DEBUG("+----------------------------------------------------+"); @@ -193,19 +169,17 @@ void CNormalMeanPrecConjugateTest::testMeanEstimation() // mean of a Gaussian process lies in various confidence intervals // the correct percentage of the times. - const double decayRates[] = { 0.0, 0.001, 0.01 }; + const double decayRates[] = {0.0, 0.001, 0.01}; const unsigned int nTests = 500u; - const double testIntervals[] = { 50.0, 60.0, 70.0, 80.0, 85.0, 90.0, 95.0, 99.0 }; + const double testIntervals[] = {50.0, 60.0, 70.0, 80.0, 85.0, 90.0, 95.0, 99.0}; - for (std::size_t i = 0; i < boost::size(decayRates); ++i) - { + for (std::size_t i = 0; i < boost::size(decayRates); ++i) { test::CRandomNumbers rng; - unsigned int errors[] = { 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u }; + unsigned int errors[] = {0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u}; - for (unsigned int test = 0; test < nTests; ++test) - { + for (unsigned int test = 0; test < nTests; ++test) { double mean = 0.5 * (test + 1); double variance = 4.0; @@ -214,47 +188,37 @@ void CNormalMeanPrecConjugateTest::testMeanEstimation() CNormalMeanPrecConjugate filter(makePrior(maths_t::E_ContinuousData, decayRates[i])); - for (std::size_t j = 0u; j < samples.size(); ++j) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { filter.addSamples(TDouble1Vec(1, samples[j])); filter.propagateForwardsByTime(1.0); } - for (std::size_t j = 0; j < boost::size(testIntervals); ++j) - { - TDoubleDoublePr confidenceInterval = - filter.confidenceIntervalMean(testIntervals[j]); + for (std::size_t j = 0; j < boost::size(testIntervals); ++j) { + TDoubleDoublePr confidenceInterval = filter.confidenceIntervalMean(testIntervals[j]); - if (mean < confidenceInterval.first || mean > confidenceInterval.second) - { + if (mean < confidenceInterval.first || mean > confidenceInterval.second) { ++errors[j]; } } } - for (std::size_t j = 0; j < boost::size(testIntervals); ++j) - { + for (std::size_t j = 0; j < boost::size(testIntervals); ++j) { double interval = 100.0 * errors[j] / static_cast(nTests); - LOG_DEBUG("interval = " << interval - << ", expectedInterval = " << (100.0 - testIntervals[j])); + LOG_DEBUG("interval = " << interval << ", expectedInterval = " << (100.0 - testIntervals[j])); // If the decay rate is zero the intervals should be accurate. // Otherwise, they should be an upper bound. - if (decayRates[i] == 0.0) - { + if (decayRates[i] == 0.0) { CPPUNIT_ASSERT_DOUBLES_EQUAL(interval, (100.0 - testIntervals[j]), 4.0); - } - else - { + } else { CPPUNIT_ASSERT(interval <= (100.0 - testIntervals[j])); } } } } -void CNormalMeanPrecConjugateTest::testPrecisionEstimation() -{ +void CNormalMeanPrecConjugateTest::testPrecisionEstimation() { LOG_DEBUG("+---------------------------------------------------------+"); LOG_DEBUG("| CNormalMeanPrecConjugateTest::testPrecisionEstimation |"); LOG_DEBUG("+---------------------------------------------------------+"); @@ -264,19 +228,17 @@ void CNormalMeanPrecConjugateTest::testPrecisionEstimation() // true precision of a Gaussian process lies in various confidence // intervals the correct percentage of the times. - const double decayRates[] = { 0.0, 0.001, 0.01 }; + const double decayRates[] = {0.0, 0.001, 0.01}; const unsigned int nTests = 1000u; - const double testIntervals[] = { 50.0, 60.0, 70.0, 80.0, 85.0, 90.0, 95.0, 99.0 }; + const double testIntervals[] = {50.0, 60.0, 70.0, 80.0, 85.0, 90.0, 95.0, 99.0}; - for (std::size_t i = 0; i < boost::size(decayRates); ++i) - { + for (std::size_t i = 0; i < boost::size(decayRates); ++i) { test::CRandomNumbers rng; - unsigned int errors[] = { 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u }; + unsigned int errors[] = {0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u}; - for (unsigned int test = 0; test < nTests; ++test) - { + for (unsigned int test = 0; test < nTests; ++test) { double mean = 0.5 * (test + 1); double variance = 2.0 + 0.01 * test; double precision = 1 / variance; @@ -286,60 +248,44 @@ void CNormalMeanPrecConjugateTest::testPrecisionEstimation() CNormalMeanPrecConjugate filter(makePrior(maths_t::E_ContinuousData, decayRates[i])); - for (std::size_t j = 0u; j < samples.size(); ++j) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { filter.addSamples(TDouble1Vec(1, samples[j])); filter.propagateForwardsByTime(1.0); } - for (std::size_t j = 0; j < boost::size(testIntervals); ++j) - { - TDoubleDoublePr confidenceInterval = - filter.confidenceIntervalPrecision(testIntervals[j]); + for (std::size_t j = 0; j < boost::size(testIntervals); ++j) { + TDoubleDoublePr confidenceInterval = filter.confidenceIntervalPrecision(testIntervals[j]); - if (precision < confidenceInterval.first || - precision > confidenceInterval.second) - { + if (precision < confidenceInterval.first || precision > confidenceInterval.second) { ++errors[j]; } } } - for (std::size_t j = 0; j < boost::size(testIntervals); ++j) - { + for (std::size_t j = 0; j < boost::size(testIntervals); ++j) { double interval = 100.0 * errors[j] / static_cast(nTests); - LOG_DEBUG("interval = " << interval - << ", expectedInterval = " << (100.0 - testIntervals[j])); + LOG_DEBUG("interval = " << interval << ", expectedInterval = " << (100.0 - testIntervals[j])); // If the decay rate is zero the intervals should be accurate. // Otherwise, they should be an upper bound. - if (decayRates[i] == 0.0) - { + if (decayRates[i] == 0.0) { CPPUNIT_ASSERT_DOUBLES_EQUAL(interval, (100.0 - testIntervals[j]), 3.0); - } - else - { + } else { CPPUNIT_ASSERT(interval <= (100.0 - testIntervals[j])); } } } } -void CNormalMeanPrecConjugateTest::testMarginalLikelihood() -{ +void CNormalMeanPrecConjugateTest::testMarginalLikelihood() { LOG_DEBUG("+--------------------------------------------------------+"); LOG_DEBUG("| CNormalMeanPrecConjugateTest::testMarginalLikelihood |"); LOG_DEBUG("+--------------------------------------------------------+"); // Check that the c.d.f. <= 1 at extreme. - maths_t::EDataType dataTypes[] = - { - maths_t::E_ContinuousData, - maths_t::E_IntegerData - }; - for (std::size_t t = 0u; t < boost::size(dataTypes); ++t) - { + maths_t::EDataType dataTypes[] = {maths_t::E_ContinuousData, maths_t::E_IntegerData}; + for (std::size_t t = 0u; t < boost::size(dataTypes); ++t) { CNormalMeanPrecConjugate filter(makePrior(dataTypes[t])); const double mean = 1.0; @@ -351,23 +297,18 @@ void CNormalMeanPrecConjugateTest::testMarginalLikelihood() rng.generateNormalSamples(mean, variance, 200, samples); filter.addSamples(samples); - maths_t::ESampleWeightStyle weightStyles[] = - { - maths_t::E_SampleCountWeight, - maths_t::E_SampleWinsorisationWeight, - maths_t::E_SampleCountWeight - }; - double weights[] = { 0.1, 1.0, 10.0 }; + maths_t::ESampleWeightStyle weightStyles[] = { + maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight, maths_t::E_SampleCountWeight}; + double weights[] = {0.1, 1.0, 10.0}; - for (std::size_t i = 0u; i < boost::size(weightStyles); ++i) - { - for (std::size_t j = 0u; j < boost::size(weights); ++j) - { + for (std::size_t i = 0u; i < boost::size(weightStyles); ++i) { + for (std::size_t j = 0u; j < boost::size(weights); ++j) { double lb, ub; filter.minusLogJointCdf(maths_t::TWeightStyleVec(1, weightStyles[i]), TDouble1Vec(1, 1000.0), TDouble4Vec1Vec(1, TDouble4Vec(1, weights[j])), - lb, ub); + lb, + ub); LOG_DEBUG("-log(c.d.f) = " << (lb + ub) / 2.0); CPPUNIT_ASSERT(lb >= 0.0); CPPUNIT_ASSERT(ub >= 0.0); @@ -378,26 +319,23 @@ void CNormalMeanPrecConjugateTest::testMarginalLikelihood() // Check that the marginal likelihood and c.d.f. agree for some // test data and that the c.d.f. <= 1. - const double decayRates[] = { 0.0, 0.001, 0.01 }; + const double decayRates[] = {0.0, 0.001, 0.01}; const double mean = 5.0; const double variance = 1.0; test::CRandomNumbers rng; - unsigned int numberSamples[] = { 2u, 10u, 500u }; + unsigned int numberSamples[] = {2u, 10u, 500u}; const double tolerance = 1e-3; - for (std::size_t i = 0; i < boost::size(numberSamples); ++i) - { + for (std::size_t i = 0; i < boost::size(numberSamples); ++i) { TDoubleVec samples; rng.generateNormalSamples(mean, variance, numberSamples[i], samples); - for (std::size_t j = 0; j < boost::size(decayRates); ++j) - { + for (std::size_t j = 0; j < boost::size(decayRates); ++j) { CNormalMeanPrecConjugate filter(makePrior(maths_t::E_ContinuousData, decayRates[j])); - for (std::size_t k = 0u; k < samples.size(); ++k) - { + for (std::size_t k = 0u; k < samples.size(); ++k) { filter.addSamples(TDouble1Vec(1, samples[k])); filter.propagateForwardsByTime(1.0); } @@ -406,22 +344,16 @@ void CNormalMeanPrecConjugateTest::testMarginalLikelihood() // c.d.f. at a range of deltas from the true mean. const double eps = 1e-4; - double deltas[] = - { - -5.0, -4.0, -3.0, -2.0, -1.0, -0.5, 0.0, 0.5, 1.0, 2.0, 3.0, 4.0, 5.0 - }; + double deltas[] = {-5.0, -4.0, -3.0, -2.0, -1.0, -0.5, 0.0, 0.5, 1.0, 2.0, 3.0, 4.0, 5.0}; - for (std::size_t k = 0; k < boost::size(deltas); ++k) - { + for (std::size_t k = 0; k < boost::size(deltas); ++k) { double x = mean + deltas[k] * std::sqrt(variance); TDouble1Vec sample(1, x); - LOG_DEBUG("number = " << numberSamples[i] - << ", sample = " << sample[0]); + LOG_DEBUG("number = " << numberSamples[i] << ", sample = " << sample[0]); double logLikelihood = 0.0; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, - filter.jointLogMarginalLikelihood(sample, logLikelihood)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter.jointLogMarginalLikelihood(sample, logLikelihood)); double pdf = std::exp(logLikelihood); double lowerBound = 0.0, upperBound = 0.0; @@ -467,54 +399,44 @@ void CNormalMeanPrecConjugateTest::testMarginalLikelihood() TDoubleVec samples; rng.generateNormalSamples(mean, variance, 100000, samples); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { TDouble1Vec sample(1, samples[i]); filter.addSamples(sample); double logLikelihood = 0.0; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, - filter.jointLogMarginalLikelihood(sample, logLikelihood)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter.jointLogMarginalLikelihood(sample, logLikelihood)); differentialEntropy -= logLikelihood; } differentialEntropy /= static_cast(samples.size()); - LOG_DEBUG("differentialEntropy = " << differentialEntropy - << ", expectedDifferentialEntropy = " << expectedDifferentialEntropy); + LOG_DEBUG("differentialEntropy = " << differentialEntropy << ", expectedDifferentialEntropy = " << expectedDifferentialEntropy); CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedDifferentialEntropy, differentialEntropy, 2e-3); } { boost::math::normal_distribution<> normal(mean, std::sqrt(variance)); - const double varianceScales[] = - { - 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.2, 1.5, 2.0, 2.5, 3.0, 4.0, 5.0 - }; + const double varianceScales[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.2, 1.5, 2.0, 2.5, 3.0, 4.0, 5.0}; CNormalMeanPrecConjugate filter(makePrior()); TDoubleVec samples; rng.generateNormalSamples(mean, variance, 1000, samples); filter.addSamples(samples); - const double percentages[] = - { - 5.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 95.0 - }; + const double percentages[] = {5.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 95.0}; { // Test that marginal likelihood confidence intervals are // what we'd expect for various variance scales. TMeanAccumulator error; - for (std::size_t i = 0u; i < boost::size(percentages); ++i) - { + for (std::size_t i = 0u; i < boost::size(percentages); ++i) { double q1, q2; filter.marginalLikelihoodQuantileForTest(50.0 - percentages[i] / 2.0, 1e-3, q1); filter.marginalLikelihoodQuantileForTest(50.0 + percentages[i] / 2.0, 1e-3, q2); TDoubleDoublePr interval = filter.marginalLikelihoodConfidenceInterval(percentages[i]); LOG_DEBUG("[q1, q2] = [" << q1 << ", " << q2 << "]" - << ", interval = " << core::CContainerPrinter::print(interval)); + << ", interval = " << core::CContainerPrinter::print(interval)); CPPUNIT_ASSERT_DOUBLES_EQUAL(q1, interval.first, 0.005); CPPUNIT_ASSERT_DOUBLES_EQUAL(q2, interval.second, 0.005); error.add(std::fabs(interval.first - q1)); @@ -525,22 +447,18 @@ void CNormalMeanPrecConjugateTest::testMarginalLikelihood() } { TMeanAccumulator totalError; - for (std::size_t i = 0u; i < boost::size(varianceScales); ++i) - { + for (std::size_t i = 0u; i < boost::size(varianceScales); ++i) { TMeanAccumulator error; double vs = varianceScales[i]; boost::math::normal_distribution<> scaledNormal(mean, std::sqrt(vs * variance)); LOG_DEBUG("*** vs = " << vs << " ***"); - for (std::size_t j = 0u; j < boost::size(percentages); ++j) - { + for (std::size_t j = 0u; j < boost::size(percentages); ++j) { double q1 = boost::math::quantile(scaledNormal, (50.0 - percentages[j] / 2.0) / 100.0); double q2 = boost::math::quantile(scaledNormal, (50.0 + percentages[j] / 2.0) / 100.0); TDoubleDoublePr interval = filter.marginalLikelihoodConfidenceInterval( - percentages[j], - maths_t::TWeightStyleVec(1, maths_t::E_SampleCountVarianceScaleWeight), - TDouble4Vec(1, vs)); + percentages[j], maths_t::TWeightStyleVec(1, maths_t::E_SampleCountVarianceScaleWeight), TDouble4Vec(1, vs)); LOG_DEBUG("[q1, q2] = [" << q1 << ", " << q2 << "]" - << ", interval = " << core::CContainerPrinter::print(interval)); + << ", interval = " << core::CContainerPrinter::print(interval)); CPPUNIT_ASSERT_DOUBLES_EQUAL(q1, interval.first, 0.3); CPPUNIT_ASSERT_DOUBLES_EQUAL(q2, interval.second, 0.3); error.add(std::fabs(interval.first - q1)); @@ -556,8 +474,7 @@ void CNormalMeanPrecConjugateTest::testMarginalLikelihood() } } -void CNormalMeanPrecConjugateTest::testMarginalLikelihoodMean() -{ +void CNormalMeanPrecConjugateTest::testMarginalLikelihoodMean() { LOG_DEBUG("+------------------------------------------------------------+"); LOG_DEBUG("| CNormalMeanPrecConjugateTest::testMarginalLikelihoodMean |"); LOG_DEBUG("+------------------------------------------------------------+"); @@ -565,17 +482,14 @@ void CNormalMeanPrecConjugateTest::testMarginalLikelihoodMean() // Test that the expectation of the marginal likelihood matches // the expected mean of the marginal likelihood. - const double means[] = { 1.0, 5.0, 100.0 }; - const double variances[] = { 2.0, 5.0, 20.0 }; + const double means[] = {1.0, 5.0, 100.0}; + const double variances[] = {2.0, 5.0, 20.0}; test::CRandomNumbers rng; - for (std::size_t i = 0u; i < boost::size(means); ++i) - { - for (std::size_t j = 0u; j < boost::size(variances); ++j) - { - LOG_DEBUG("*** mean = " << means[i] - << ", variance = " << variances[j] << " ***"); + for (std::size_t i = 0u; i < boost::size(means); ++i) { + for (std::size_t j = 0u; j < boost::size(variances); ++j) { + LOG_DEBUG("*** mean = " << means[i] << ", variance = " << variances[j] << " ***"); CNormalMeanPrecConjugate filter(makePrior()); @@ -587,26 +501,20 @@ void CNormalMeanPrecConjugateTest::testMarginalLikelihoodMean() rng.generateNormalSamples(means[i], variances[j], 100, samples); TMeanAccumulator relativeError; - for (std::size_t k = 0u; k < samples.size(); ++k) - { + for (std::size_t k = 0u; k < samples.size(); ++k) { filter.addSamples(TDouble1Vec(1, samples[k])); double expectedMean; CPPUNIT_ASSERT(filter.marginalLikelihoodMeanForTest(expectedMean)); - if (k % 10 == 0) - { - LOG_DEBUG("marginalLikelihoodMean = " << filter.marginalLikelihoodMean() - << ", expectedMean = " << expectedMean); + if (k % 10 == 0) { + LOG_DEBUG("marginalLikelihoodMean = " << filter.marginalLikelihoodMean() << ", expectedMean = " << expectedMean); } // The error is at the precision of the numerical integration. - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMean, - filter.marginalLikelihoodMean(), - 0.01); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMean, filter.marginalLikelihoodMean(), 0.01); - relativeError.add(std::fabs(expectedMean - filter.marginalLikelihoodMean()) - / expectedMean); + relativeError.add(std::fabs(expectedMean - filter.marginalLikelihoodMean()) / expectedMean); } LOG_DEBUG("relativeError = " << maths::CBasicStatistics::mean(relativeError)); @@ -615,8 +523,7 @@ void CNormalMeanPrecConjugateTest::testMarginalLikelihoodMean() } } -void CNormalMeanPrecConjugateTest::testMarginalLikelihoodMode() -{ +void CNormalMeanPrecConjugateTest::testMarginalLikelihoodMode() { LOG_DEBUG("+-----------------------------------------------------------+"); LOG_DEBUG("| CNormalMeanPrecConjugateTest::testMarginalLikelihoodMode |"); LOG_DEBUG("+-----------------------------------------------------------+"); @@ -624,21 +531,15 @@ void CNormalMeanPrecConjugateTest::testMarginalLikelihoodMode() // Test that the marginal likelihood mode is what we'd expect // with variances variance scales. - const double means[] = { 1.0, 5.0, 100.0 }; - const double variances[] = { 2.0, 5.0, 20.0 }; - const double varianceScales[] = - { - 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.2, 1.5, 2.0, 2.5, 3.0, 4.0, 5.0 - }; + const double means[] = {1.0, 5.0, 100.0}; + const double variances[] = {2.0, 5.0, 20.0}; + const double varianceScales[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.2, 1.5, 2.0, 2.5, 3.0, 4.0, 5.0}; test::CRandomNumbers rng; - for (std::size_t i = 0u; i < boost::size(means); ++i) - { - for (std::size_t j = 0u; j < boost::size(variances); ++j) - { - LOG_DEBUG("*** mean = " << means[i] - << ", variance = " << variances[j] << " ***"); + for (std::size_t i = 0u; i < boost::size(means); ++i) { + for (std::size_t j = 0u; j < boost::size(variances); ++j) { + LOG_DEBUG("*** mean = " << means[i] << ", variance = " << variances[j] << " ***"); CNormalMeanPrecConjugate filter(makePrior()); @@ -649,24 +550,21 @@ void CNormalMeanPrecConjugateTest::testMarginalLikelihoodMode() maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); TDouble4Vec weight(1, 1.0); - for (std::size_t k = 0u; k < boost::size(varianceScales); ++k) - { + for (std::size_t k = 0u; k < boost::size(varianceScales); ++k) { double vs = varianceScales[i]; weight[0] = vs; boost::math::normal_distribution<> scaledNormal(means[i], std::sqrt(vs * variances[j])); double expectedMode = boost::math::mode(scaledNormal); LOG_DEBUG("marginalLikelihoodMode = " << filter.marginalLikelihoodMode(weightStyle, weight) - << ", expectedMode = " << expectedMode); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMode, - filter.marginalLikelihoodMode(weightStyle, weight), - 0.12 * std::sqrt(variances[j])); + << ", expectedMode = " << expectedMode); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + expectedMode, filter.marginalLikelihoodMode(weightStyle, weight), 0.12 * std::sqrt(variances[j])); } } } } -void CNormalMeanPrecConjugateTest::testMarginalLikelihoodVariance() -{ +void CNormalMeanPrecConjugateTest::testMarginalLikelihoodVariance() { LOG_DEBUG("+----------------------------------------------------------------+"); LOG_DEBUG("| CNormalMeanPrecConjugateTest::testMarginalLikelihoodVariance |"); LOG_DEBUG("+----------------------------------------------------------------+"); @@ -675,17 +573,14 @@ void CNormalMeanPrecConjugateTest::testMarginalLikelihoodVariance() // the marginal likelihood matches the expected variance of the // marginal likelihood. - const double means[] = { 1.0, 5.0, 100.0 }; - const double variances[] = { 2.0, 5.0, 20.0 }; + const double means[] = {1.0, 5.0, 100.0}; + const double variances[] = {2.0, 5.0, 20.0}; test::CRandomNumbers rng; - for (std::size_t i = 0u; i < boost::size(means); ++i) - { - for (std::size_t j = 0u; j < boost::size(variances); ++j) - { - LOG_DEBUG("*** mean = " << means[i] - << ", variance = " << variances[j] << " ***"); + for (std::size_t i = 0u; i < boost::size(means); ++i) { + for (std::size_t j = 0u; j < boost::size(variances); ++j) { + LOG_DEBUG("*** mean = " << means[i] << ", variance = " << variances[j] << " ***"); CNormalMeanPrecConjugate filter(makePrior()); @@ -697,25 +592,19 @@ void CNormalMeanPrecConjugateTest::testMarginalLikelihoodVariance() rng.generateNormalSamples(means[i], variances[j], 100, samples); TMeanAccumulator relativeError; - for (std::size_t k = 0u; k < samples.size(); ++k) - { + for (std::size_t k = 0u; k < samples.size(); ++k) { filter.addSamples(TDouble1Vec(1, samples[k])); double expectedVariance; CPPUNIT_ASSERT(filter.marginalLikelihoodVarianceForTest(expectedVariance)); - if (k % 10 == 0) - { + if (k % 10 == 0) { LOG_DEBUG("marginalLikelihoodVariance = " << filter.marginalLikelihoodVariance() - << ", expectedVariance = " << expectedVariance); + << ", expectedVariance = " << expectedVariance); } - // The error is at the precision of the numerical integration. - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedVariance, - filter.marginalLikelihoodVariance(), - 0.2); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedVariance, filter.marginalLikelihoodVariance(), 0.2); - relativeError.add(std::fabs(expectedVariance - filter.marginalLikelihoodVariance()) - / expectedVariance); + relativeError.add(std::fabs(expectedVariance - filter.marginalLikelihoodVariance()) / expectedVariance); } LOG_DEBUG("relativeError = " << maths::CBasicStatistics::mean(relativeError)); @@ -724,8 +613,7 @@ void CNormalMeanPrecConjugateTest::testMarginalLikelihoodVariance() } } -void CNormalMeanPrecConjugateTest::testSampleMarginalLikelihood() -{ +void CNormalMeanPrecConjugateTest::testSampleMarginalLikelihood() { LOG_DEBUG("+--------------------------------------------------------------+"); LOG_DEBUG("| CNormalMeanPrecConjugateTest::testSampleMarginalLikelihood |"); LOG_DEBUG("+--------------------------------------------------------------+"); @@ -753,8 +641,7 @@ void CNormalMeanPrecConjugateTest::testSampleMarginalLikelihood() TDouble1Vec sampled; - for (std::size_t i = 0u; i < 1u; ++i) - { + for (std::size_t i = 0u; i < 1u; ++i) { filter.addSamples(TDouble1Vec(1, samples[i])); sampled.clear(); @@ -767,8 +654,7 @@ void CNormalMeanPrecConjugateTest::testSampleMarginalLikelihood() TMeanAccumulator meanVarError; std::size_t numberSampled = 20u; - for (std::size_t i = 1u; i < samples.size(); ++i) - { + for (std::size_t i = 1u; i < samples.size(); ++i) { filter.addSamples(TDouble1Vec(1, samples[i])); sampled.clear(); @@ -779,32 +665,26 @@ void CNormalMeanPrecConjugateTest::testSampleMarginalLikelihood() sampledMoments = std::for_each(sampled.begin(), sampled.end(), sampledMoments); LOG_DEBUG("expectedMean = " << filter.marginalLikelihoodMean() - << ", sampledMean = " << maths::CBasicStatistics::mean(sampledMoments)); + << ", sampledMean = " << maths::CBasicStatistics::mean(sampledMoments)); LOG_DEBUG("expectedVariance = " << filter.marginalLikelihoodVariance() - << ", sampledVariance = " << maths::CBasicStatistics::variance(sampledMoments)); + << ", sampledVariance = " << maths::CBasicStatistics::variance(sampledMoments)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(filter.marginalLikelihoodMean(), - maths::CBasicStatistics::mean(sampledMoments), - 1e-8); + CPPUNIT_ASSERT_DOUBLES_EQUAL(filter.marginalLikelihoodMean(), maths::CBasicStatistics::mean(sampledMoments), 1e-8); CPPUNIT_ASSERT_DOUBLES_EQUAL(filter.marginalLikelihoodVariance(), maths::CBasicStatistics::variance(sampledMoments), 0.2 * filter.marginalLikelihoodVariance()); - meanVarError.add(std::fabs( filter.marginalLikelihoodVariance() - - maths::CBasicStatistics::variance(sampledMoments)) - / filter.marginalLikelihoodVariance()); + meanVarError.add(std::fabs(filter.marginalLikelihoodVariance() - maths::CBasicStatistics::variance(sampledMoments)) / + filter.marginalLikelihoodVariance()); std::sort(sampled.begin(), sampled.end()); - for (std::size_t j = 1u; j < sampled.size(); ++j) - { - double q = 100.0 * static_cast(j) - / static_cast(numberSampled); + for (std::size_t j = 1u; j < sampled.size(); ++j) { + double q = 100.0 * static_cast(j) / static_cast(numberSampled); double expectedQuantile; CPPUNIT_ASSERT(filter.marginalLikelihoodQuantileForTest(q, eps, expectedQuantile)); - LOG_DEBUG("quantile = " << q - << ", x_quantile = " << expectedQuantile - << ", quantile range = [" << sampled[j - 1] << "," << sampled[j] << "]"); + LOG_DEBUG("quantile = " << q << ", x_quantile = " << expectedQuantile << ", quantile range = [" << sampled[j - 1] << "," + << sampled[j] << "]"); CPPUNIT_ASSERT(expectedQuantile >= sampled[j - 1]); CPPUNIT_ASSERT(expectedQuantile <= sampled[j]); @@ -815,8 +695,7 @@ void CNormalMeanPrecConjugateTest::testSampleMarginalLikelihood() CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanVarError) < 0.04); } -void CNormalMeanPrecConjugateTest::testCdf() -{ +void CNormalMeanPrecConjugateTest::testCdf() { LOG_DEBUG("+-----------------------------------------+"); LOG_DEBUG("| CNormalMeanPrecConjugateTest::testCdf |"); LOG_DEBUG("+-----------------------------------------+"); @@ -828,14 +707,13 @@ void CNormalMeanPrecConjugateTest::testCdf() const double mean = 20.0; const double variance = 5.0; - const std::size_t n[] = { 20u, 80u }; + const std::size_t n[] = {20u, 80u}; test::CRandomNumbers rng; CNormalMeanPrecConjugate filter(makePrior()); - for (std::size_t i = 0u; i < boost::size(n); ++i) - { + for (std::size_t i = 0u; i < boost::size(n); ++i) { TDoubleVec samples; rng.generateNormalSamples(mean, variance, n[i], samples); @@ -846,8 +724,7 @@ void CNormalMeanPrecConjugateTest::testCdf() CPPUNIT_ASSERT(!filter.minusLogJointCdf(TDouble1Vec(), lowerBound, upperBound)); CPPUNIT_ASSERT(!filter.minusLogJointCdfComplement(TDouble1Vec(), lowerBound, upperBound)); - for (std::size_t j = 1u; j < 500; ++j) - { + for (std::size_t j = 1u; j < 500; ++j) { double x = static_cast(j) / 2.0; CPPUNIT_ASSERT(filter.minusLogJointCdf(TDouble1Vec(1, x), lowerBound, upperBound)); @@ -855,15 +732,13 @@ void CNormalMeanPrecConjugateTest::testCdf() CPPUNIT_ASSERT(filter.minusLogJointCdfComplement(TDouble1Vec(1, x), lowerBound, upperBound)); double fComplement = (lowerBound + upperBound) / 2.0; - LOG_DEBUG("log(F(x)) = " << (f == 0.0 ? f : -f) - << ", log(1 - F(x)) = " << (fComplement == 0.0 ? fComplement : -fComplement)); + LOG_DEBUG("log(F(x)) = " << (f == 0.0 ? f : -f) << ", log(1 - F(x)) = " << (fComplement == 0.0 ? fComplement : -fComplement)); CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, std::exp(-f) + std::exp(-fComplement), 1e-10); } } } -void CNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples() -{ +void CNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples() { LOG_DEBUG("+--------------------------------------------------------------------+"); LOG_DEBUG("| CNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples |"); LOG_DEBUG("+--------------------------------------------------------------------+"); @@ -875,20 +750,17 @@ void CNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples() // We also check that the tail calculation attributes samples to // the appropriate tail of the distribution. - const double means[] = { 0.1, 1.5, 3.0 }; - const double variances[] = { 0.2, 0.4, 1.5 }; - const double vs[] = { 0.5, 1.0, 2.0 }; + const double means[] = {0.1, 1.5, 3.0}; + const double variances[] = {0.2, 0.4, 1.5}; + const double vs[] = {0.5, 1.0, 2.0}; test::CRandomNumbers rng; TMeanAccumulator meanError; - for (size_t i = 0; i < boost::size(means); ++i) - { - for (size_t j = 0; j < boost::size(variances); ++j) - { - LOG_DEBUG("means = " << means[i] - << ", variance = " << variances[j]); + for (size_t i = 0; i < boost::size(means); ++i) { + for (size_t j = 0; j < boost::size(variances); ++j) { + LOG_DEBUG("means = " << means[i] << ", variance = " << variances[j]); TDoubleVec samples; rng.generateNormalSamples(means[i], variances[j], 1000, samples); @@ -897,11 +769,10 @@ void CNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples() filter.addSamples(samples); double mean = filter.mean(); - double sd = std::sqrt(1.0 / filter.precision()); + double sd = std::sqrt(1.0 / filter.precision()); TDoubleVec likelihoods; - for (std::size_t k = 0u; k < samples.size(); ++k) - { + for (std::size_t k = 0u; k < samples.size(); ++k) { double likelihood; filter.jointLogMarginalLikelihood(TDouble1Vec(1, samples[k]), likelihood); likelihoods.push_back(likelihood); @@ -909,27 +780,22 @@ void CNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples() std::sort(likelihoods.begin(), likelihoods.end()); boost::math::normal_distribution<> normal(mean, sd); - for (std::size_t k = 1u; k < 10; ++k) - { + for (std::size_t k = 1u; k < 10; ++k) { double x = boost::math::quantile(normal, static_cast(k) / 10.0); TDouble1Vec sample(1, x); double fx; filter.jointLogMarginalLikelihood(sample, fx); - double px = static_cast(std::lower_bound(likelihoods.begin(), - likelihoods.end(), fx) - - likelihoods.begin()) - / static_cast(likelihoods.size()); + double px = static_cast(std::lower_bound(likelihoods.begin(), likelihoods.end(), fx) - likelihoods.begin()) / + static_cast(likelihoods.size()); double lb, ub; filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, sample, lb, ub); double ssd = std::sqrt(px * (1.0 - px) / static_cast(samples.size())); - LOG_DEBUG("expected P(x) = " << px - << ", actual P(x) = " << (lb + ub) / 2.0 - << " sample sd = " << ssd); + LOG_DEBUG("expected P(x) = " << px << ", actual P(x) = " << (lb + ub) / 2.0 << " sample sd = " << ssd); CPPUNIT_ASSERT_DOUBLES_EQUAL(px, (lb + ub) / 2.0, 3.0 * ssd); @@ -938,11 +804,9 @@ void CNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples() maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); - for (std::size_t k = 0u; k < boost::size(vs); ++k) - { - double mode = filter.marginalLikelihoodMode(weightStyle, - TDouble4Vec(1, vs[k])); - double ss[] = { 0.9 * mode, 1.1 * mode }; + for (std::size_t k = 0u; k < boost::size(vs); ++k) { + double mode = filter.marginalLikelihoodMode(weightStyle, TDouble4Vec(1, vs[k])); + double ss[] = {0.9 * mode, 1.1 * mode}; LOG_DEBUG("vs = " << vs[k] << ", mode = " << mode); @@ -950,59 +814,58 @@ void CNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples() maths_t::ETail tail; { - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - weightStyle, - TDouble1Vec(1, ss[0]), - TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), - lb, ub, tail); + filter.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, weightStyle, TDouble1Vec(1, ss[0]), TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); - if (mode > 0.0) - { + if (mode > 0.0) { filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, weightStyle, TDouble1Vec(ss, ss + 2), TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, ub, tail); + lb, + ub, + tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); filter.probabilityOfLessLikelySamples(maths_t::E_OneSidedBelow, weightStyle, TDouble1Vec(ss, ss + 2), TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, ub, tail); + lb, + ub, + tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); filter.probabilityOfLessLikelySamples(maths_t::E_OneSidedAbove, weightStyle, TDouble1Vec(ss, ss + 2), TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, ub, tail); + lb, + ub, + tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } } - if (mode > 0.0) - { - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - weightStyle, - TDouble1Vec(1, ss[1]), - TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), - lb, ub, tail); + if (mode > 0.0) { + filter.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, weightStyle, TDouble1Vec(1, ss[1]), TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - weightStyle, - TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, ub, tail); + filter.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, weightStyle, TDouble1Vec(ss, ss + 2), TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); filter.probabilityOfLessLikelySamples(maths_t::E_OneSidedBelow, weightStyle, TDouble1Vec(ss, ss + 2), TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, ub, tail); + lb, + ub, + tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); filter.probabilityOfLessLikelySamples(maths_t::E_OneSidedAbove, weightStyle, TDouble1Vec(ss, ss + 2), TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, ub, tail); + lb, + ub, + tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } } @@ -1013,8 +876,7 @@ void CNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples() CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError) < 0.01); } -void CNormalMeanPrecConjugateTest::testAnomalyScore() -{ +void CNormalMeanPrecConjugateTest::testAnomalyScore() { LOG_DEBUG("+--------------------------------------------------+"); LOG_DEBUG("| CNormalMeanPrecConjugateTest::testAnomalyScore |"); LOG_DEBUG("+--------------------------------------------------+"); @@ -1028,15 +890,15 @@ void CNormalMeanPrecConjugateTest::testAnomalyScore() using TUIntVec = std::vector; - const double decayRates[] = { 0.0, 0.001, 0.01 }; + const double decayRates[] = {0.0, 0.001, 0.01}; - const double means[] = { 3.0, 15.0, 200.0 }; - const double variances[] = { 2.0, 5.0, 50.0 }; + const double means[] = {3.0, 15.0, 200.0}; + const double variances[] = {2.0, 5.0, 50.0}; const double threshold = 0.01; - const unsigned int anomalyTimes[] = { 30u, 120u, 300u, 420u }; - const double anomalies[] = { 4.0, 5.0, 10.0, 15.0, 0.0 }; + const unsigned int anomalyTimes[] = {30u, 120u, 300u, 420u}; + const double anomalies[] = {4.0, 5.0, 10.0, 15.0, 0.0}; test::CRandomNumbers rng; @@ -1046,12 +908,10 @@ void CNormalMeanPrecConjugateTest::testAnomalyScore() file.open("results.m"); double totalFalsePositiveRate = 0.0; - std::size_t totalPositives[] = { 0u, 0u, 0u }; + std::size_t totalPositives[] = {0u, 0u, 0u}; - for (std::size_t i = 0; i < boost::size(means); ++i) - { - for (std::size_t j = 0; j < boost::size(variances); ++j) - { + for (std::size_t i = 0; i < boost::size(means); ++i) { + for (std::size_t j = 0; j < boost::size(variances); ++j) { LOG_DEBUG("mean = " << means[i] << ", variance = " << variances[j]); boost::math::normal_distribution<> normal(means[i], std::sqrt(variances[j])); @@ -1059,8 +919,7 @@ void CNormalMeanPrecConjugateTest::testAnomalyScore() TDoubleVec samples; rng.generateNormalSamples(means[i], variances[j], 500, samples); - for (std::size_t k = 0; k < boost::size(decayRates); ++k) - { + for (std::size_t k = 0; k < boost::size(decayRates); ++k) { CNormalMeanPrecConjugate filter(makePrior(maths_t::E_ContinuousData, decayRates[k])); ++test; @@ -1071,21 +930,18 @@ void CNormalMeanPrecConjugateTest::testAnomalyScore() scores << "score" << test << " = ["; TUIntVec candidateAnomalies; - for (unsigned int time = 0; time < samples.size(); ++time) - { - double sample = samples[time] - + (anomalies[std::find(boost::begin(anomalyTimes), - boost::end(anomalyTimes), time) - - boost::begin(anomalyTimes)] - * boost::math::standard_deviation(normal)); + for (unsigned int time = 0; time < samples.size(); ++time) { + double sample = + samples[time] + + (anomalies[std::find(boost::begin(anomalyTimes), boost::end(anomalyTimes), time) - boost::begin(anomalyTimes)] * + boost::math::standard_deviation(normal)); TDouble1Vec sampleVec(1, sample); filter.addSamples(sampleVec); double score; filter.anomalyScore(maths_t::E_TwoSided, sampleVec, score); - if (score > threshold) - { + if (score > threshold) { candidateAnomalies.push_back(time); } @@ -1097,8 +953,7 @@ void CNormalMeanPrecConjugateTest::testAnomalyScore() x << "];\n"; scores << "];\n"; - file << x.str() << scores.str() - << "plot(x" << test << ", score" << test << ");\n" + file << x.str() << scores.str() << "plot(x" << test << ", score" << test << ");\n" << "input(\"Hit any key for next test\");\n\n"; TUIntVec falsePositives; @@ -1108,9 +963,7 @@ void CNormalMeanPrecConjugateTest::testAnomalyScore() boost::end(anomalyTimes), std::back_inserter(falsePositives)); - double falsePositiveRate = - static_cast(falsePositives.size()) - / static_cast(samples.size()); + double falsePositiveRate = static_cast(falsePositives.size()) / static_cast(samples.size()); totalFalsePositiveRate += falsePositiveRate; @@ -1121,8 +974,7 @@ void CNormalMeanPrecConjugateTest::testAnomalyScore() boost::end(anomalyTimes), std::back_inserter(positives)); - LOG_DEBUG("falsePositiveRate = " << falsePositiveRate - << ", positives = " << positives.size()); + LOG_DEBUG("falsePositiveRate = " << falsePositiveRate << ", positives = " << positives.size()); // False alarm rate should be less than 0.6%. CPPUNIT_ASSERT(falsePositiveRate <= 0.006); @@ -1139,8 +991,7 @@ void CNormalMeanPrecConjugateTest::testAnomalyScore() LOG_DEBUG("totalFalsePositiveRate = " << totalFalsePositiveRate); - for (std::size_t i = 0; i < boost::size(totalPositives); ++i) - { + for (std::size_t i = 0; i < boost::size(totalPositives); ++i) { LOG_DEBUG("positives = " << totalPositives[i]); // Should detect all but one anomaly. @@ -1151,8 +1002,7 @@ void CNormalMeanPrecConjugateTest::testAnomalyScore() CPPUNIT_ASSERT(totalFalsePositiveRate < 0.003); } -void CNormalMeanPrecConjugateTest::testIntegerData() -{ +void CNormalMeanPrecConjugateTest::testIntegerData() { LOG_DEBUG("+-------------------------------------------------+"); LOG_DEBUG("| CNormalMeanPrecConjugateTest::testIntegerData |"); LOG_DEBUG("+-------------------------------------------------+"); @@ -1178,8 +1028,7 @@ void CNormalMeanPrecConjugateTest::testIntegerData() CNormalMeanPrecConjugate filter1(makePrior(maths_t::E_IntegerData)); CNormalMeanPrecConjugate filter2(makePrior(maths_t::E_ContinuousData)); - for (std::size_t i = 0; i < nSamples; ++i) - { + for (std::size_t i = 0; i < nSamples; ++i) { double x = floor(samples[i]); TDouble1Vec sample(1, x); @@ -1195,8 +1044,7 @@ void CNormalMeanPrecConjugateTest::testIntegerData() TMeanAccumulator meanLogLikelihood1; TMeanAccumulator meanLogLikelihood2; - for (std::size_t j = 0u; j < nSamples; ++j) - { + for (std::size_t j = 0u; j < nSamples; ++j) { double x = std::floor(samples[j]); TDouble1Vec sample(1, x); @@ -1211,11 +1059,10 @@ void CNormalMeanPrecConjugateTest::testIntegerData() } LOG_DEBUG("meanLogLikelihood1 = " << maths::CBasicStatistics::mean(meanLogLikelihood1) - << ", meanLogLikelihood2 = " << maths::CBasicStatistics::mean(meanLogLikelihood2)); + << ", meanLogLikelihood2 = " << maths::CBasicStatistics::mean(meanLogLikelihood2)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(meanLogLikelihood1), - maths::CBasicStatistics::mean(meanLogLikelihood2), - 0.02); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + maths::CBasicStatistics::mean(meanLogLikelihood1), maths::CBasicStatistics::mean(meanLogLikelihood2), 0.02); } { @@ -1230,8 +1077,7 @@ void CNormalMeanPrecConjugateTest::testIntegerData() TMeanAccumulator meanProbability1; TMeanAccumulator meanProbability2; - for (std::size_t i = 0; i < nSamples; ++i) - { + for (std::size_t i = 0; i < nSamples; ++i) { double x = std::floor(samples[i]); TDouble1Vec sample(1, x); @@ -1258,47 +1104,36 @@ void CNormalMeanPrecConjugateTest::testIntegerData() } } -void CNormalMeanPrecConjugateTest::testLowVariationData() -{ +void CNormalMeanPrecConjugateTest::testLowVariationData() { LOG_DEBUG("+------------------------------------------------------+"); LOG_DEBUG("| CNormalMeanPrecConjugateTest::testLowVariationData |"); LOG_DEBUG("+------------------------------------------------------+"); { CNormalMeanPrecConjugate filter(makePrior(maths_t::E_IntegerData)); - for (std::size_t i = 0u; i < 100; ++i) - { + for (std::size_t i = 0u; i < 100; ++i) { filter.addSamples(TDouble1Vec(1, 430.0)); } TDoubleDoublePr interval = filter.marginalLikelihoodConfidenceInterval(68.0); double sigma = (interval.second - interval.first) / 2.0; - LOG_DEBUG("68% confidence interval " - << core::CContainerPrinter::print(interval) - << ", approximate variance = " << sigma * sigma); + LOG_DEBUG("68% confidence interval " << core::CContainerPrinter::print(interval) << ", approximate variance = " << sigma * sigma); CPPUNIT_ASSERT_DOUBLES_EQUAL(12.0, 1.0 / (sigma * sigma), 0.15); } { CNormalMeanPrecConjugate filter(makePrior(maths_t::E_ContinuousData)); - for (std::size_t i = 0u; i < 100; ++i) - { + for (std::size_t i = 0u; i < 100; ++i) { filter.addSamples(TDouble1Vec(1, 430.0)); } TDoubleDoublePr interval = filter.marginalLikelihoodConfidenceInterval(68.0); double sigma = (interval.second - interval.first) / 2.0; - LOG_DEBUG("68% confidence interval " - << core::CContainerPrinter::print(interval) - << ", approximate s.t.d. = " << sigma); - CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0 - / maths::MINIMUM_COEFFICIENT_OF_VARIATION - / 430.5, - 1.0 / sigma, 7.0); + LOG_DEBUG("68% confidence interval " << core::CContainerPrinter::print(interval) << ", approximate s.t.d. = " << sigma); + CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0 / maths::MINIMUM_COEFFICIENT_OF_VARIATION / 430.5, 1.0 / sigma, 7.0); } } -void CNormalMeanPrecConjugateTest::testPersist() -{ +void CNormalMeanPrecConjugateTest::testPersist() { LOG_DEBUG("+---------------------------------------------+"); LOG_DEBUG("| CNormalMeanPrecConjugateTest::testPersist |"); LOG_DEBUG("+---------------------------------------------+"); @@ -1314,11 +1149,9 @@ void CNormalMeanPrecConjugateTest::testPersist() rng.generateNormalSamples(mean, variance, 100, samples); maths::CNormalMeanPrecConjugate origFilter(makePrior()); - for (std::size_t i = 0u; i < samples.size(); ++i) - { - origFilter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, samples[i]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0))); + for (std::size_t i = 0u; i < samples.size(); ++i) { + origFilter.addSamples( + maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), TDouble1Vec(1, samples[i]), TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0))); } double decayRate = origFilter.decayRate(); uint64_t checksum = origFilter.checksum(); @@ -1344,8 +1177,7 @@ void CNormalMeanPrecConjugateTest::testPersist() maths::MINIMUM_CATEGORY_COUNT); maths::CNormalMeanPrecConjugate restoredFilter(params, traverser); - LOG_DEBUG("orig checksum = " << checksum - << " restored checksum = " << restoredFilter.checksum()); + LOG_DEBUG("orig checksum = " << checksum << " restored checksum = " << restoredFilter.checksum()); CPPUNIT_ASSERT_EQUAL(checksum, restoredFilter.checksum()); // The XML representation of the new filter should be the same as the original @@ -1358,8 +1190,7 @@ void CNormalMeanPrecConjugateTest::testPersist() CPPUNIT_ASSERT_EQUAL(origXml, newXml); } -void CNormalMeanPrecConjugateTest::testSeasonalVarianceScale() -{ +void CNormalMeanPrecConjugateTest::testSeasonalVarianceScale() { LOG_DEBUG("+-----------------------------------------------------------+"); LOG_DEBUG("| CNormalMeanPrecConjugateTest::testSeasonalVarianceScale |"); LOG_DEBUG("+-----------------------------------------------------------+"); @@ -1374,19 +1205,17 @@ void CNormalMeanPrecConjugateTest::testSeasonalVarianceScale() // expected. // 7) Updating with scaled samples behaves as expected. - const double means[] = { 0.2, 1.0, 20.0 }; - const double variances[] = { 0.2, 1.0, 20.0 }; + const double means[] = {0.2, 1.0, 20.0}; + const double variances[] = {0.2, 1.0, 20.0}; test::CRandomNumbers rng; - for (std::size_t i = 0u; i < boost::size(means); ++i) - { - for (std::size_t j = 0u; j < boost::size(variances); ++j) - { + for (std::size_t i = 0u; i < boost::size(means); ++i) { + for (std::size_t j = 0u; j < boost::size(variances); ++j) { TDoubleVec samples; rng.generateNormalSamples(means[i], variances[j], 100, samples); - double varianceScales[] = { 0.2, 0.5, 1.0, 2.0, 5.0 }; + double varianceScales[] = {0.2, 0.5, 1.0, 2.0, 5.0}; maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleSeasonalVarianceScaleWeight); TDouble4Vec weight(1, 1.0); TDouble4Vec1Vec weights(1, weight); @@ -1403,16 +1232,13 @@ void CNormalMeanPrecConjugateTest::testSeasonalVarianceScale() double s = std::sqrt(v); LOG_DEBUG("m = " << m << ", v = " << v); - double points[] = { m - 3.0 * s, m - s, m, m + s, m + 3.0 * s }; + double points[] = {m - 3.0 * s, m - s, m, m + s, m + 3.0 * s}; double unscaledExpectationVariance; - filter.expectation(CVarianceKernel(filter.marginalLikelihoodMean()), - 100, - unscaledExpectationVariance); + filter.expectation(CVarianceKernel(filter.marginalLikelihoodMean()), 100, unscaledExpectationVariance); LOG_DEBUG("unscaledExpectationVariance = " << unscaledExpectationVariance); - for (std::size_t k = 0u; k < boost::size(varianceScales); ++k) - { + for (std::size_t k = 0u; k < boost::size(varianceScales); ++k) { double vs = varianceScales[k]; weight[0] = vs; weights[0][0] = vs; @@ -1425,15 +1251,10 @@ void CNormalMeanPrecConjugateTest::testSeasonalVarianceScale() LOG_DEBUG("sv = " << filter.marginalLikelihoodVariance(weightStyle, weight)); double expectationVariance; - filter.expectation(CVarianceKernel(filter.marginalLikelihoodMean()), - 100, - expectationVariance, - weightStyle, - weight); + filter.expectation(CVarianceKernel(filter.marginalLikelihoodMean()), 100, expectationVariance, weightStyle, weight); LOG_DEBUG("expectationVariance = " << expectationVariance); - CPPUNIT_ASSERT_DOUBLES_EQUAL(vs * unscaledExpectationVariance, - expectationVariance, - 0.01 * vs * unscaledExpectationVariance); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + vs * unscaledExpectationVariance, expectationVariance, 0.01 * vs * unscaledExpectationVariance); CPPUNIT_ASSERT_DOUBLES_EQUAL(filter.marginalLikelihoodVariance(weightStyle, weight), expectationVariance, 0.01 * filter.marginalLikelihoodVariance(weightStyle, weight)); @@ -1444,15 +1265,12 @@ void CNormalMeanPrecConjugateTest::testSeasonalVarianceScale() filter.jointLogMarginalLikelihood(weightStyle, TDouble1Vec(1, mode - 1e-3), weights, fmMinusEps); filter.jointLogMarginalLikelihood(weightStyle, TDouble1Vec(1, mode), weights, fm); filter.jointLogMarginalLikelihood(weightStyle, TDouble1Vec(1, mode + 1e-3), weights, fmPlusEps); - LOG_DEBUG("log(f(mode)) = " << fm - << ", log(f(mode - eps)) = " << fmMinusEps - << ", log(f(mode + eps)) = " << fmPlusEps); + LOG_DEBUG("log(f(mode)) = " << fm << ", log(f(mode - eps)) = " << fmMinusEps << ", log(f(mode + eps)) = " << fmPlusEps); CPPUNIT_ASSERT(fm > fmMinusEps); CPPUNIT_ASSERT(fm > fmPlusEps); CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, (std::exp(fmPlusEps) - std::exp(fmMinusEps)) / 2e-3, 1e-6); TDouble1Vec sample(1, 0.0); - for (std::size_t l = 0u; l < boost::size(points); ++l) - { + for (std::size_t l = 0u; l < boost::size(points); ++l) { TDouble1Vec x(1, points[l]); double fx; filter.jointLogMarginalLikelihood(weightStyle, x, weights, fx); @@ -1463,11 +1281,8 @@ void CNormalMeanPrecConjugateTest::testSeasonalVarianceScale() double FxPlusEps = std::exp(-(lb + ub) / 2.0); filter.minusLogJointCdf(weightStyle, xMinusEps, weights, lb, ub); double FxMinusEps = std::exp(-(lb + ub) / 2.0); - LOG_DEBUG("x = " << points[l] - << ", log(f(x)) = " << fx - << ", F(x - eps) = " << FxMinusEps - << ", F(x + eps) = " << FxPlusEps - << ", log(dF/dx)) = " << std::log((FxPlusEps - FxMinusEps) / 2e-3)); + LOG_DEBUG("x = " << points[l] << ", log(f(x)) = " << fx << ", F(x - eps) = " << FxMinusEps << ", F(x + eps) = " + << FxPlusEps << ", log(dF/dx)) = " << std::log((FxPlusEps - FxMinusEps) / 2e-3)); CPPUNIT_ASSERT_DOUBLES_EQUAL(fx, std::log((FxPlusEps - FxMinusEps) / 2e-3), 0.05 * std::fabs(fx)); sample[0] = m + (points[l] - m) / std::sqrt(vs); @@ -1475,22 +1290,16 @@ void CNormalMeanPrecConjugateTest::testSeasonalVarianceScale() double expectedLowerBound; double expectedUpperBound; maths_t::ETail expectedTail; - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - weightStyle, - sample, - weights, - expectedLowerBound, expectedUpperBound, expectedTail); + filter.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, weightStyle, sample, weights, expectedLowerBound, expectedUpperBound, expectedTail); sample[0] = points[l]; weights[0][0] = vs; double lowerBound; double upperBound; maths_t::ETail tail; - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - weightStyle, - sample, - weights, - lowerBound, upperBound, tail); + filter.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, weightStyle, sample, weights, lowerBound, upperBound, tail); LOG_DEBUG("expectedLowerBound = " << expectedLowerBound); LOG_DEBUG("lowerBound = " << lowerBound); @@ -1499,44 +1308,33 @@ void CNormalMeanPrecConjugateTest::testSeasonalVarianceScale() LOG_DEBUG("expectedTail = " << expectedTail); LOG_DEBUG("tail = " << tail); - if ((expectedLowerBound + expectedUpperBound) < 0.02) - { - CPPUNIT_ASSERT_DOUBLES_EQUAL(std::log(expectedLowerBound), - std::log(lowerBound), - 0.1 * std::fabs(std::log(expectedLowerBound))); - CPPUNIT_ASSERT_DOUBLES_EQUAL(std::log(expectedUpperBound), - std::log(upperBound), - 0.1 * std::fabs(std::log(expectedUpperBound))); - } - else - { - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedLowerBound, - lowerBound, - 0.01 * expectedLowerBound); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedUpperBound, - upperBound, - 0.01 * expectedUpperBound); + if ((expectedLowerBound + expectedUpperBound) < 0.02) { + CPPUNIT_ASSERT_DOUBLES_EQUAL( + std::log(expectedLowerBound), std::log(lowerBound), 0.1 * std::fabs(std::log(expectedLowerBound))); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + std::log(expectedUpperBound), std::log(upperBound), 0.1 * std::fabs(std::log(expectedUpperBound))); + } else { + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedLowerBound, lowerBound, 0.01 * expectedLowerBound); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedUpperBound, upperBound, 0.01 * expectedUpperBound); } CPPUNIT_ASSERT_EQUAL(expectedTail, tail); } } } - for (std::size_t k = 0u; k < boost::size(varianceScales); ++k) - { + for (std::size_t k = 0u; k < boost::size(varianceScales); ++k) { double vs = varianceScales[k]; rng.random_shuffle(samples.begin(), samples.end()); CNormalMeanPrecConjugate filter(makePrior()); weights[0][0] = vs; - for (std::size_t l = 0u; l < samples.size(); ++l) - { + for (std::size_t l = 0u; l < samples.size(); ++l) { filter.addSamples(weightStyle, TDouble1Vec(1, samples[l]), weights); } double sm = filter.marginalLikelihoodMean(); double sv = filter.marginalLikelihoodVariance(); - LOG_DEBUG("m = " << m << ", v = " << v); + LOG_DEBUG("m = " << m << ", v = " << v); LOG_DEBUG("sm = " << sm << ", sv = " << sv); CPPUNIT_ASSERT_DOUBLES_EQUAL(m, sm, std::fabs(0.25 * m)); @@ -1546,8 +1344,7 @@ void CNormalMeanPrecConjugateTest::testSeasonalVarianceScale() } } -void CNormalMeanPrecConjugateTest::testCountVarianceScale() -{ +void CNormalMeanPrecConjugateTest::testCountVarianceScale() { LOG_DEBUG("+--------------------------------------------------------+"); LOG_DEBUG("| CNormalMeanPrecConjugateTest::testCountVarianceScale |"); LOG_DEBUG("+--------------------------------------------------------+"); @@ -1573,27 +1370,20 @@ void CNormalMeanPrecConjugateTest::testCountVarianceScale() const double mean = 12.0; const double variance = 3.0; - const double varianceScales[] = - { - 0.20, 0.50, 0.75, 1.50, 2.00, 5.00 - }; + const double varianceScales[] = {0.20, 0.50, 0.75, 1.50, 2.00, 5.00}; LOG_DEBUG(""); LOG_DEBUG("****** probabilityOfLessLikelySamples ******"); - const double percentiles[] = - { - 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0 - }; - const std::size_t nSamples[] = { 30u, 1000u }; + const double percentiles[] = {10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0}; + const std::size_t nSamples[] = {30u, 1000u}; const std::size_t nScaledSamples = 10000u; - double percentileErrorTolerances[] = { 0.15, 0.03 }; - double totalErrorTolerances[] = { 0.25, 0.13 }; + double percentileErrorTolerances[] = {0.15, 0.03}; + double totalErrorTolerances[] = {0.25, 0.13}; double totalTotalError = 0.0; - for (std::size_t i = 0; i < boost::size(nSamples); ++i) - { + for (std::size_t i = 0; i < boost::size(nSamples); ++i) { LOG_DEBUG("**** nSamples = " << nSamples[i] << " ****"); test::CRandomNumbers rng; @@ -1612,33 +1402,26 @@ void CNormalMeanPrecConjugateTest::testCountVarianceScale() TDoubleVec probabilities; probabilities.reserve(nScaledSamples); - for (std::size_t j = 0; j < unscaledSamples.size(); ++j) - { + for (std::size_t j = 0; j < unscaledSamples.size(); ++j) { TDouble1Vec sample(1, unscaledSamples[j]); double lowerBound, upperBound; - CPPUNIT_ASSERT(filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - sample, - lowerBound, - upperBound)); + CPPUNIT_ASSERT(filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, sample, lowerBound, upperBound)); CPPUNIT_ASSERT_EQUAL(lowerBound, upperBound); double probability = (lowerBound + upperBound) / 2.0; probabilities.push_back(probability); } std::sort(probabilities.begin(), probabilities.end()); - for (std::size_t j = 0; j < boost::size(percentiles); ++j) - { - std::size_t index = static_cast( - static_cast(nScaledSamples) * percentiles[j]/100.0); - double error = std::fabs(probabilities[index] - percentiles[j]/100.0); + for (std::size_t j = 0; j < boost::size(percentiles); ++j) { + std::size_t index = static_cast(static_cast(nScaledSamples) * percentiles[j] / 100.0); + double error = std::fabs(probabilities[index] - percentiles[j] / 100.0); expectedPercentileErrors.push_back(error); expectedTotalError += error; } } - for (std::size_t j = 0; j < boost::size(varianceScales); ++j) - { + for (std::size_t j = 0; j < boost::size(varianceScales); ++j) { LOG_DEBUG("**** variance scale = " << varianceScales[j] << " ****"); TDoubleVec scaledSamples; @@ -1646,19 +1429,17 @@ void CNormalMeanPrecConjugateTest::testCountVarianceScale() TDoubleVec probabilities; probabilities.reserve(nScaledSamples); - for (std::size_t k = 0; k < scaledSamples.size(); ++k) - { + for (std::size_t k = 0; k < scaledSamples.size(); ++k) { double lowerBound, upperBound; maths_t::ETail tail; - CPPUNIT_ASSERT(filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, - maths_t::TWeightStyleVec(1, maths_t::E_SampleCountVarianceScaleWeight), - TDouble1Vec(1, scaledSamples[k]), - TDouble4Vec1Vec(1, TDouble4Vec(1, varianceScales[j])), - lowerBound, - upperBound, - tail)); + CPPUNIT_ASSERT(filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, + maths_t::TWeightStyleVec(1, maths_t::E_SampleCountVarianceScaleWeight), + TDouble1Vec(1, scaledSamples[k]), + TDouble4Vec1Vec(1, TDouble4Vec(1, varianceScales[j])), + lowerBound, + upperBound, + tail)); CPPUNIT_ASSERT_EQUAL(lowerBound, upperBound); double probability = (lowerBound + upperBound) / 2.0; probabilities.push_back(probability); @@ -1666,26 +1447,21 @@ void CNormalMeanPrecConjugateTest::testCountVarianceScale() std::sort(probabilities.begin(), probabilities.end()); double totalError = 0.0; - for (std::size_t k = 0; k < boost::size(percentiles); ++k) - { - std::size_t index = static_cast( - static_cast(nScaledSamples) * percentiles[k]/100.0); - double error = fabs(probabilities[index] - percentiles[k]/100.0); + for (std::size_t k = 0; k < boost::size(percentiles); ++k) { + std::size_t index = static_cast(static_cast(nScaledSamples) * percentiles[k] / 100.0); + double error = fabs(probabilities[index] - percentiles[k] / 100.0); totalError += error; double errorThreshold = percentileErrorTolerances[i] + expectedPercentileErrors[k]; - LOG_DEBUG("percentile = " << percentiles[k] - << ", probability = " << probabilities[index] - << ", error = " << error - << ", error threshold = " << errorThreshold); + LOG_DEBUG("percentile = " << percentiles[k] << ", probability = " << probabilities[index] << ", error = " << error + << ", error threshold = " << errorThreshold); CPPUNIT_ASSERT(error < errorThreshold); } double totalErrorThreshold = totalErrorTolerances[i] + expectedTotalError; - LOG_DEBUG("totalError = " << totalError - << ", totalError threshold = " << totalErrorThreshold); + LOG_DEBUG("totalError = " << totalError << ", totalError threshold = " << totalErrorThreshold); CPPUNIT_ASSERT(totalError < totalErrorThreshold); totalTotalError += totalError; @@ -1700,8 +1476,7 @@ void CNormalMeanPrecConjugateTest::testCountVarianceScale() test::CRandomNumbers rng; - for (std::size_t i = 0; i < boost::size(varianceScales); ++i) - { + for (std::size_t i = 0; i < boost::size(varianceScales); ++i) { LOG_DEBUG("**** variance scale = " << varianceScales[i] << " ****"); boost::math::normal_distribution<> normal(mean, std::sqrt(varianceScales[i] * variance)); @@ -1717,22 +1492,19 @@ void CNormalMeanPrecConjugateTest::testCountVarianceScale() TDoubleVec scaledSamples; rng.generateNormalSamples(mean, varianceScales[i] * variance, 10000, scaledSamples); - for (std::size_t j = 0u; j < scaledSamples.size(); ++j) - { + for (std::size_t j = 0u; j < scaledSamples.size(); ++j) { double logLikelihood = 0.0; CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, - filter.jointLogMarginalLikelihood( - maths_t::TWeightStyleVec(1, maths_t::E_SampleCountVarianceScaleWeight), - TDouble1Vec(1, scaledSamples[j]), - TDouble4Vec1Vec(1, TDouble4Vec(1, varianceScales[i])), - logLikelihood)); + filter.jointLogMarginalLikelihood(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountVarianceScaleWeight), + TDouble1Vec(1, scaledSamples[j]), + TDouble4Vec1Vec(1, TDouble4Vec(1, varianceScales[i])), + logLikelihood)); differentialEntropy -= logLikelihood; } differentialEntropy /= static_cast(scaledSamples.size()); - LOG_DEBUG("differentialEntropy = " << differentialEntropy - << ", expectedDifferentialEntropy = " << expectedDifferentialEntropy); + LOG_DEBUG("differentialEntropy = " << differentialEntropy << ", expectedDifferentialEntropy = " << expectedDifferentialEntropy); CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedDifferentialEntropy, differentialEntropy, 0.03); } @@ -1745,101 +1517,76 @@ void CNormalMeanPrecConjugateTest::testCountVarianceScale() // the variance is correctly estimated if we compensate using a // variance scale. - const double testIntervals[] = { 50.0, 60.0, 70.0, 80.0, 85.0, 90.0, 95.0, 99.0 }; - unsigned int errors[] = { 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u }; + const double testIntervals[] = {50.0, 60.0, 70.0, 80.0, 85.0, 90.0, 95.0, 99.0}; + unsigned int errors[] = {0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u}; maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); - double variances[] = { 1.0, 5.0 }; + double variances[] = {1.0, 5.0}; double precision = 1 / variances[0]; - for (std::size_t t = 0; t < 1000; ++t) - { + for (std::size_t t = 0; t < 1000; ++t) { CNormalMeanPrecConjugate filter(makePrior()); - for (std::size_t i = 0u; i < boost::size(variances); ++i) - { + for (std::size_t i = 0u; i < boost::size(variances); ++i) { TDoubleVec samples; rng.generateNormalSamples(0.0, variances[i], 1000, samples); TDouble4Vec1Vec weights(samples.size(), TDouble4Vec(1, variances[i])); filter.addSamples(weightStyle, samples, weights); } - for (std::size_t i = 0; i < boost::size(testIntervals); ++i) - { - TDoubleDoublePr confidenceInterval = - filter.confidenceIntervalPrecision(testIntervals[i]); - if (precision < confidenceInterval.first || - precision > confidenceInterval.second) - { + for (std::size_t i = 0; i < boost::size(testIntervals); ++i) { + TDoubleDoublePr confidenceInterval = filter.confidenceIntervalPrecision(testIntervals[i]); + if (precision < confidenceInterval.first || precision > confidenceInterval.second) { ++errors[i]; } } } - for (std::size_t i = 0; i < boost::size(testIntervals); ++i) - { + for (std::size_t i = 0; i < boost::size(testIntervals); ++i) { double interval = 100.0 * errors[i] / 1000.0; - LOG_DEBUG("interval = " << interval - << ", expectedInterval = " << (100.0 - testIntervals[i])); + LOG_DEBUG("interval = " << interval << ", expectedInterval = " << (100.0 - testIntervals[i])); CPPUNIT_ASSERT_DOUBLES_EQUAL(interval, (100.0 - testIntervals[i]), 4.0); } } -CppUnit::Test* CNormalMeanPrecConjugateTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CNormalMeanPrecConjugateTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CNormalMeanPrecConjugateTest::testMultipleUpdate", - &CNormalMeanPrecConjugateTest::testMultipleUpdate) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CNormalMeanPrecConjugateTest::testPropagation", - &CNormalMeanPrecConjugateTest::testPropagation) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CNormalMeanPrecConjugateTest::testMeanEstimation", - &CNormalMeanPrecConjugateTest::testMeanEstimation) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CNormalMeanPrecConjugateTest::testPrecisionEstimation", - &CNormalMeanPrecConjugateTest::testPrecisionEstimation) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CNormalMeanPrecConjugateTest::testMarginalLikelihood", - &CNormalMeanPrecConjugateTest::testMarginalLikelihood) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CNormalMeanPrecConjugateTest::testMarginalLikelihoodMean", - &CNormalMeanPrecConjugateTest::testMarginalLikelihoodMean) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CNormalMeanPrecConjugateTest::testMarginalLikelihoodMode", - &CNormalMeanPrecConjugateTest::testMarginalLikelihoodMode) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CNormalMeanPrecConjugateTest::testMarginalLikelihoodVariance", - &CNormalMeanPrecConjugateTest::testMarginalLikelihoodVariance) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CNormalMeanPrecConjugateTest::testSampleMarginalLikelihood", - &CNormalMeanPrecConjugateTest::testSampleMarginalLikelihood) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CNormalMeanPrecConjugateTest::testCdf", - &CNormalMeanPrecConjugateTest::testCdf) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples", - &CNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CNormalMeanPrecConjugateTest::testAnomalyScore", - &CNormalMeanPrecConjugateTest::testAnomalyScore) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CNormalMeanPrecConjugateTest::testIntegerData", - &CNormalMeanPrecConjugateTest::testIntegerData) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CNormalMeanPrecConjugateTest::testLowVariationData", - &CNormalMeanPrecConjugateTest::testLowVariationData) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CNormalMeanPrecConjugateTest::testPersist", - &CNormalMeanPrecConjugateTest::testPersist) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CNormalMeanPrecConjugateTest::testSeasonalVarianceScale", - &CNormalMeanPrecConjugateTest::testSeasonalVarianceScale) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CNormalMeanPrecConjugateTest::testCountVarianceScale", - &CNormalMeanPrecConjugateTest::testCountVarianceScale) ); +CppUnit::Test* CNormalMeanPrecConjugateTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CNormalMeanPrecConjugateTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CNormalMeanPrecConjugateTest::testMultipleUpdate", + &CNormalMeanPrecConjugateTest::testMultipleUpdate)); + suiteOfTests->addTest(new CppUnit::TestCaller("CNormalMeanPrecConjugateTest::testPropagation", + &CNormalMeanPrecConjugateTest::testPropagation)); + suiteOfTests->addTest(new CppUnit::TestCaller("CNormalMeanPrecConjugateTest::testMeanEstimation", + &CNormalMeanPrecConjugateTest::testMeanEstimation)); + suiteOfTests->addTest(new CppUnit::TestCaller("CNormalMeanPrecConjugateTest::testPrecisionEstimation", + &CNormalMeanPrecConjugateTest::testPrecisionEstimation)); + suiteOfTests->addTest(new CppUnit::TestCaller("CNormalMeanPrecConjugateTest::testMarginalLikelihood", + &CNormalMeanPrecConjugateTest::testMarginalLikelihood)); + suiteOfTests->addTest(new CppUnit::TestCaller("CNormalMeanPrecConjugateTest::testMarginalLikelihoodMean", + &CNormalMeanPrecConjugateTest::testMarginalLikelihoodMean)); + suiteOfTests->addTest(new CppUnit::TestCaller("CNormalMeanPrecConjugateTest::testMarginalLikelihoodMode", + &CNormalMeanPrecConjugateTest::testMarginalLikelihoodMode)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CNormalMeanPrecConjugateTest::testMarginalLikelihoodVariance", &CNormalMeanPrecConjugateTest::testMarginalLikelihoodVariance)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CNormalMeanPrecConjugateTest::testSampleMarginalLikelihood", &CNormalMeanPrecConjugateTest::testSampleMarginalLikelihood)); + suiteOfTests->addTest(new CppUnit::TestCaller("CNormalMeanPrecConjugateTest::testCdf", + &CNormalMeanPrecConjugateTest::testCdf)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples", + &CNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples)); + suiteOfTests->addTest(new CppUnit::TestCaller("CNormalMeanPrecConjugateTest::testAnomalyScore", + &CNormalMeanPrecConjugateTest::testAnomalyScore)); + suiteOfTests->addTest(new CppUnit::TestCaller("CNormalMeanPrecConjugateTest::testIntegerData", + &CNormalMeanPrecConjugateTest::testIntegerData)); + suiteOfTests->addTest(new CppUnit::TestCaller("CNormalMeanPrecConjugateTest::testLowVariationData", + &CNormalMeanPrecConjugateTest::testLowVariationData)); + suiteOfTests->addTest(new CppUnit::TestCaller("CNormalMeanPrecConjugateTest::testPersist", + &CNormalMeanPrecConjugateTest::testPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller("CNormalMeanPrecConjugateTest::testSeasonalVarianceScale", + &CNormalMeanPrecConjugateTest::testSeasonalVarianceScale)); + suiteOfTests->addTest(new CppUnit::TestCaller("CNormalMeanPrecConjugateTest::testCountVarianceScale", + &CNormalMeanPrecConjugateTest::testCountVarianceScale)); return suiteOfTests; } diff --git a/lib/maths/unittest/CNormalMeanPrecConjugateTest.h b/lib/maths/unittest/CNormalMeanPrecConjugateTest.h index 9dc2e914d4..5fccc740c3 100644 --- a/lib/maths/unittest/CNormalMeanPrecConjugateTest.h +++ b/lib/maths/unittest/CNormalMeanPrecConjugateTest.h @@ -9,29 +9,27 @@ #include +class CNormalMeanPrecConjugateTest : public CppUnit::TestFixture { +public: + void testMultipleUpdate(); + void testPropagation(); + void testMeanEstimation(); + void testPrecisionEstimation(); + void testMarginalLikelihood(); + void testMarginalLikelihoodMean(); + void testMarginalLikelihoodMode(); + void testMarginalLikelihoodVariance(); + void testSampleMarginalLikelihood(); + void testCdf(); + void testProbabilityOfLessLikelySamples(); + void testAnomalyScore(); + void testIntegerData(); + void testLowVariationData(); + void testPersist(); + void testSeasonalVarianceScale(); + void testCountVarianceScale(); -class CNormalMeanPrecConjugateTest : public CppUnit::TestFixture -{ - public: - void testMultipleUpdate(); - void testPropagation(); - void testMeanEstimation(); - void testPrecisionEstimation(); - void testMarginalLikelihood(); - void testMarginalLikelihoodMean(); - void testMarginalLikelihoodMode(); - void testMarginalLikelihoodVariance(); - void testSampleMarginalLikelihood(); - void testCdf(); - void testProbabilityOfLessLikelySamples(); - void testAnomalyScore(); - void testIntegerData(); - void testLowVariationData(); - void testPersist(); - void testSeasonalVarianceScale(); - void testCountVarianceScale(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CNormalMeanPrecConjugateTest_h diff --git a/lib/maths/unittest/COneOfNPriorTest.cc b/lib/maths/unittest/COneOfNPriorTest.cc index a9a291bb67..b283d749eb 100644 --- a/lib/maths/unittest/COneOfNPriorTest.cc +++ b/lib/maths/unittest/COneOfNPriorTest.cc @@ -28,9 +28,9 @@ #include #include -#include #include #include +#include #include #include #include @@ -41,8 +41,7 @@ using namespace ml; using namespace handy_typedefs; -namespace -{ +namespace { using TUIntVec = std::vector; using TDoubleVec = std::vector; @@ -59,42 +58,33 @@ using CNormalMeanPrecConjugate = CPriorTestInterfaceMixin; using CPoissonMeanConjugate = CPriorTestInterfaceMixin; -COneOfNPrior::TPriorPtrVec clone(const TPriorPtrVec &models, - const TOptionalDouble &decayRate = TOptionalDouble()) -{ +COneOfNPrior::TPriorPtrVec clone(const TPriorPtrVec& models, const TOptionalDouble& decayRate = TOptionalDouble()) { COneOfNPrior::TPriorPtrVec result; result.reserve(models.size()); - for (std::size_t i = 0u; i < models.size(); ++i) - { + for (std::size_t i = 0u; i < models.size(); ++i) { result.push_back(COneOfNPrior::TPriorPtr(models[i]->clone())); - if (decayRate) - { + if (decayRate) { result.back()->decayRate(*decayRate); } } return result; } -void truncateUpTo(const double &value, TDoubleVec &samples) -{ - for (std::size_t i = 0u; i < samples.size(); ++i) - { +void truncateUpTo(const double& value, TDoubleVec& samples) { + for (std::size_t i = 0u; i < samples.size(); ++i) { samples[i] = std::max(samples[i], value); } } -double sum(const TDoubleVec &values) -{ +double sum(const TDoubleVec& values) { return std::accumulate(values.begin(), values.end(), 0.0); } using maths_t::E_ContinuousData; using maths_t::E_IntegerData; - } -void COneOfNPriorTest::testFilter() -{ +void COneOfNPriorTest::testFilter() { LOG_DEBUG("+--------------------------------+"); LOG_DEBUG("| COneOfNPriorTest::testFilter |"); LOG_DEBUG("+--------------------------------+"); @@ -115,8 +105,7 @@ void COneOfNPriorTest::testFilter() // Make sure we don't have negative values. truncateUpTo(0.0, samples); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { filter.addSamples(TDouble1Vec(1, samples[i])); } @@ -124,8 +113,7 @@ void COneOfNPriorTest::testFilter() CPPUNIT_ASSERT_EQUAL(std::size_t(4), filter.models().size()); - filter.removeModels(maths::CPrior::CModelFilter().remove(maths::CPrior::E_Poisson) - .remove(maths::CPrior::E_Gamma)); + filter.removeModels(maths::CPrior::CModelFilter().remove(maths::CPrior::E_Poisson).remove(maths::CPrior::E_Gamma)); CPPUNIT_ASSERT_EQUAL(std::size_t(2), filter.models().size()); CPPUNIT_ASSERT_EQUAL(maths::CPrior::E_LogNormal, filter.models()[0]->type()); @@ -134,8 +122,7 @@ void COneOfNPriorTest::testFilter() CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, std::accumulate(weights.begin(), weights.end(), 0.0), 1e-6); } -void COneOfNPriorTest::testMultipleUpdate() -{ +void COneOfNPriorTest::testMultipleUpdate() { LOG_DEBUG("+----------------------------------------+"); LOG_DEBUG("| COneOfNPriorTest::testMultipleUpdate |"); LOG_DEBUG("+----------------------------------------+"); @@ -160,8 +147,7 @@ void COneOfNPriorTest::testMultipleUpdate() // Deal with improper prior pathology. TDoubleVec seedSamples; rng.generateNormalSamples(mean, variance, 2, seedSamples); - for (std::size_t i = 0u; i < seedSamples.size(); ++i) - { + for (std::size_t i = 0u; i < seedSamples.size(); ++i) { TDouble1Vec sample(1, seedSamples[i]); filter1.addSamples(sample); filter2.addSamples(sample); @@ -173,8 +159,7 @@ void COneOfNPriorTest::testMultipleUpdate() // Make sure we don't have negative values. truncateUpTo(0.0, samples); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { filter1.addSamples(TDouble1Vec(1, samples[i])); } filter2.addSamples(samples); @@ -189,36 +174,28 @@ void COneOfNPriorTest::testMultipleUpdate() LOG_DEBUG("weight1 = " << core::CContainerPrinter::print(weights1)); LOG_DEBUG("weight2 = " << core::CContainerPrinter::print(weights2)); CPPUNIT_ASSERT(weights1.size() == weights2.size()); - CPPUNIT_ASSERT(std::equal(weights1.begin(), weights1.end(), - weights2.begin(), - equal)); + CPPUNIT_ASSERT(std::equal(weights1.begin(), weights1.end(), weights2.begin(), equal)); COneOfNPrior::TPriorCPtrVec models1 = filter1.models(); COneOfNPrior::TPriorCPtrVec models2 = filter2.models(); CPPUNIT_ASSERT(models1.size() == models2.size()); - const maths::CPoissonMeanConjugate *poisson1 = - dynamic_cast(models1[0]); - const maths::CPoissonMeanConjugate *poisson2 = - dynamic_cast(models2[0]); + const maths::CPoissonMeanConjugate* poisson1 = dynamic_cast(models1[0]); + const maths::CPoissonMeanConjugate* poisson2 = dynamic_cast(models2[0]); CPPUNIT_ASSERT(poisson1 && poisson2); CPPUNIT_ASSERT(poisson1->equalTolerance(*poisson2, equal)); - const maths::CNormalMeanPrecConjugate *normal1 = - dynamic_cast(models1[1]); - const maths::CNormalMeanPrecConjugate *normal2 = - dynamic_cast(models2[1]); + const maths::CNormalMeanPrecConjugate* normal1 = dynamic_cast(models1[1]); + const maths::CNormalMeanPrecConjugate* normal2 = dynamic_cast(models2[1]); CPPUNIT_ASSERT(normal1 && normal2); CPPUNIT_ASSERT(normal1->equalTolerance(*normal2, equal)); - // Test the count weight is equivalent to adding repeated samples. double x = 3.0; std::size_t count = 10; - for (std::size_t j = 0u; j < count; ++j) - { + for (std::size_t j = 0u; j < count; ++j) { filter1.addSamples(TDouble1Vec(1, x)); } filter2.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), @@ -228,8 +205,7 @@ void COneOfNPriorTest::testMultipleUpdate() CPPUNIT_ASSERT_EQUAL(filter1.checksum(), filter2.checksum()); } -void COneOfNPriorTest::testWeights() -{ +void COneOfNPriorTest::testWeights() { LOG_DEBUG("+---------------------------------+"); LOG_DEBUG("| COneOfNPriorTest::testWeights |"); LOG_DEBUG("+---------------------------------+"); @@ -243,14 +219,11 @@ void COneOfNPriorTest::testWeights() using TEqual = maths::CEqualWithTolerance; TEqual equal(maths::CToleranceTypes::E_AbsoluteTolerance, 1e-10); - const double decayRates[] = { 0.0, 0.001, 0.01 }; + const double decayRates[] = {0.0, 0.001, 0.01}; - for (std::size_t rate = 0; rate < boost::size(decayRates); ++rate) - { + for (std::size_t rate = 0; rate < boost::size(decayRates); ++rate) { // Test that the filter weights stay normalized. - COneOfNPrior filter(maths::COneOfNPrior(clone(models, decayRates[rate]), - E_ContinuousData, - decayRates[rate])); + COneOfNPrior filter(maths::COneOfNPrior(clone(models, decayRates[rate]), E_ContinuousData, decayRates[rate])); const double mean = 20.0; const double variance = 3.0; @@ -261,8 +234,7 @@ void COneOfNPriorTest::testWeights() // Make sure we don't have negative values. truncateUpTo(0.0, samples); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { filter.addSamples(TDouble1Vec(1, samples[i])); CPPUNIT_ASSERT(equal(sum(filter.weights()), 1.0)); filter.propagateForwardsByTime(1.0); @@ -274,26 +246,22 @@ void COneOfNPriorTest::testWeights() { // Test that non-zero decay rate behaves as expected. - const double decayRates[] = { 0.0002, 0.001, 0.005 }; + const double decayRates[] = {0.0002, 0.001, 0.005}; const double rate = 5.0; double previousLogWeightRatio = -500; - for (std::size_t decayRate = 0; decayRate < boost::size(decayRates); ++decayRate) - { + for (std::size_t decayRate = 0; decayRate < boost::size(decayRates); ++decayRate) { TPriorPtrVec models; models.push_back(TPriorPtr(CPoissonMeanConjugate::nonInformativePrior().clone())); models.push_back(TPriorPtr(CNormalMeanPrecConjugate::nonInformativePrior(E_IntegerData).clone())); - COneOfNPrior filter(maths::COneOfNPrior(clone(models, decayRates[decayRate]), - E_IntegerData, - decayRates[decayRate])); + COneOfNPrior filter(maths::COneOfNPrior(clone(models, decayRates[decayRate]), E_IntegerData, decayRates[decayRate])); TUIntVec samples; rng.generatePoissonSamples(rate, 10000, samples); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { filter.addSamples(TDouble1Vec(1, static_cast(samples[i]))); filter.propagateForwardsByTime(1.0); } @@ -311,8 +279,7 @@ void COneOfNPriorTest::testWeights() } } -void COneOfNPriorTest::testModels() -{ +void COneOfNPriorTest::testModels() { LOG_DEBUG("+--------------------------------+"); LOG_DEBUG("| COneOfNPriorTest::testModels |"); LOG_DEBUG("+--------------------------------+"); @@ -341,24 +308,18 @@ void COneOfNPriorTest::testModels() TUIntVec samples; rng.generatePoissonSamples(rate, 3000, samples); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { filter.addSamples(TDouble1Vec(1, static_cast(samples[i]))); } COneOfNPrior::TPriorCPtrVec posteriorModels = filter.models(); - const maths::CPoissonMeanConjugate *poissonModel = - dynamic_cast(posteriorModels[0]); - const maths::CNormalMeanPrecConjugate *normalModel = - dynamic_cast(posteriorModels[1]); + const maths::CPoissonMeanConjugate* poissonModel = dynamic_cast(posteriorModels[0]); + const maths::CNormalMeanPrecConjugate* normalModel = dynamic_cast(posteriorModels[1]); CPPUNIT_ASSERT(poissonModel && normalModel); - LOG_DEBUG("Poisson mean = " << poissonModel->priorMean() - << ", expectedMean = " << rate); - LOG_DEBUG("Normal mean = " << normalModel->mean() - << ", expectedMean = " << mean - << ", precision = " << normalModel->precision() - << ", expectedPrecision " << (1.0 / variance)); + LOG_DEBUG("Poisson mean = " << poissonModel->priorMean() << ", expectedMean = " << rate); + LOG_DEBUG("Normal mean = " << normalModel->mean() << ", expectedMean = " << mean << ", precision = " << normalModel->precision() + << ", expectedPrecision " << (1.0 / variance)); CPPUNIT_ASSERT(std::fabs(poissonModel->priorMean() - rate) / rate < 0.01); CPPUNIT_ASSERT(std::fabs(normalModel->mean() - mean) / mean < 0.01); @@ -381,24 +342,18 @@ void COneOfNPriorTest::testModels() TDoubleVec samples; rng.generateNormalSamples(mean, variance, 1000, samples); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { filter.addSamples(TDouble1Vec(1, samples[i])); } COneOfNPrior::TPriorCPtrVec posteriorModels = filter.models(); - const maths::CPoissonMeanConjugate *poissonModel = - dynamic_cast(posteriorModels[0]); - const maths::CNormalMeanPrecConjugate *normalModel = - dynamic_cast(posteriorModels[1]); + const maths::CPoissonMeanConjugate* poissonModel = dynamic_cast(posteriorModels[0]); + const maths::CNormalMeanPrecConjugate* normalModel = dynamic_cast(posteriorModels[1]); CPPUNIT_ASSERT(poissonModel && normalModel); - LOG_DEBUG("Poisson mean = " << poissonModel->priorMean() - << ", expectedMean = " << rate); - LOG_DEBUG("Normal mean = " << normalModel->mean() - << ", expectedMean = " << mean - << ", precision = " << normalModel->precision() - << ", expectedPrecision " << (1.0 / variance)); + LOG_DEBUG("Poisson mean = " << poissonModel->priorMean() << ", expectedMean = " << rate); + LOG_DEBUG("Normal mean = " << normalModel->mean() << ", expectedMean = " << mean << ", precision = " << normalModel->precision() + << ", expectedPrecision " << (1.0 / variance)); CPPUNIT_ASSERT(std::fabs(poissonModel->priorMean() - rate) / rate < 0.01); CPPUNIT_ASSERT(std::fabs(normalModel->mean() - mean) / mean < 0.01); @@ -406,8 +361,7 @@ void COneOfNPriorTest::testModels() } } -void COneOfNPriorTest::testModelSelection() -{ +void COneOfNPriorTest::testModelSelection() { LOG_DEBUG("+----------------------------------------+"); LOG_DEBUG("| COneOfNPriorTest::testModelSelection |"); LOG_DEBUG("+----------------------------------------+"); @@ -437,30 +391,25 @@ void COneOfNPriorTest::testModelSelection() boost::math::normal_distribution<> normal(mean, std::sqrt(variance)); double poissonExpectedLogWeight = -maths::CTools::differentialEntropy(poisson); - double normalExpectedLogWeight = -maths::CTools::differentialEntropy(normal); + double normalExpectedLogWeight = -maths::CTools::differentialEntropy(normal); COneOfNPrior filter(maths::COneOfNPrior(clone(models), E_ContinuousData)); TUIntVec samples; rng.generatePoissonSamples(rate, nSamples, samples); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { filter.addSamples(TDouble1Vec(1, static_cast(samples[i]))); } - double expectedLogWeightRatio = - (normalExpectedLogWeight - poissonExpectedLogWeight) - * static_cast(nSamples); + double expectedLogWeightRatio = (normalExpectedLogWeight - poissonExpectedLogWeight) * static_cast(nSamples); TDoubleVec logWeights = filter.logWeights(); double logWeightRatio = logWeights[1] - logWeights[0]; - LOG_DEBUG("expectedLogWeightRatio = " << expectedLogWeightRatio - << ", logWeightRatio = " << logWeightRatio); + LOG_DEBUG("expectedLogWeightRatio = " << expectedLogWeightRatio << ", logWeightRatio = " << logWeightRatio); - CPPUNIT_ASSERT(std::fabs(logWeightRatio - expectedLogWeightRatio) - / std::fabs(expectedLogWeightRatio) < 0.05); + CPPUNIT_ASSERT(std::fabs(logWeightRatio - expectedLogWeightRatio) / std::fabs(expectedLogWeightRatio) < 0.05); } { @@ -484,7 +433,7 @@ void COneOfNPriorTest::testModelSelection() models.push_back(TPriorPtr(CPoissonMeanConjugate::nonInformativePrior().clone())); models.push_back(TPriorPtr(CNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData).clone())); - const unsigned int nSamples[] = { 1000u, 2000u, 3000u }; + const unsigned int nSamples[] = {1000u, 2000u, 3000u}; const double mean = 100.0; const double variance = 5.0; @@ -492,10 +441,9 @@ void COneOfNPriorTest::testModelSelection() boost::math::normal_distribution<> normal(mean, std::sqrt(variance)); double poissonExpectedLogWeight = -maths::CTools::differentialEntropy(poissonApprox); - double normalExpectedLogWeight = -maths::CTools::differentialEntropy(normal); + double normalExpectedLogWeight = -maths::CTools::differentialEntropy(normal); - for (size_t n = 0; n < boost::size(nSamples); ++n) - { + for (size_t n = 0; n < boost::size(nSamples); ++n) { COneOfNPrior filter(maths::COneOfNPrior(clone(models), E_ContinuousData)); TDoubleVec samples; @@ -504,23 +452,18 @@ void COneOfNPriorTest::testModelSelection() // Make sure we don't have negative values. truncateUpTo(0.0, samples); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { filter.addSamples(TDouble1Vec(1, samples[i])); } - double expectedLogWeightRatio = - (poissonExpectedLogWeight - normalExpectedLogWeight) - * static_cast(nSamples[n]); + double expectedLogWeightRatio = (poissonExpectedLogWeight - normalExpectedLogWeight) * static_cast(nSamples[n]); TDoubleVec logWeights = filter.logWeights(); double logWeightRatio = logWeights[0] - logWeights[1]; - LOG_DEBUG("expectedLogWeightRatio = " << expectedLogWeightRatio - << ", logWeightRatio = " << logWeightRatio); + LOG_DEBUG("expectedLogWeightRatio = " << expectedLogWeightRatio << ", logWeightRatio = " << logWeightRatio); - CPPUNIT_ASSERT(std::fabs(logWeightRatio - expectedLogWeightRatio) - / std::fabs(expectedLogWeightRatio) < 0.35); + CPPUNIT_ASSERT(std::fabs(logWeightRatio - expectedLogWeightRatio) / std::fabs(expectedLogWeightRatio) < 0.35); } } { @@ -537,20 +480,16 @@ void COneOfNPriorTest::testModelSelection() TPriorPtrVec models; models.push_back(TPriorPtr(CNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData).clone())); - maths::CXMeansOnline1d clusterer(maths_t::E_ContinuousData, - maths::CAvailableModeDistributions::ALL, - maths_t::E_ClustersFractionWeight); - maths::CNormalMeanPrecConjugate normal = - maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); + maths::CXMeansOnline1d clusterer( + maths_t::E_ContinuousData, maths::CAvailableModeDistributions::ALL, maths_t::E_ClustersFractionWeight); + maths::CNormalMeanPrecConjugate normal = maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); maths::COneOfNPrior::TPriorPtrVec mode; mode.push_back(COneOfNPrior::TPriorPtr(normal.clone())); - models.push_back(TPriorPtr(new maths::CMultimodalPrior(maths_t::E_ContinuousData, - clusterer, - maths::COneOfNPrior(mode, maths_t::E_ContinuousData)))); + models.push_back(TPriorPtr( + new maths::CMultimodalPrior(maths_t::E_ContinuousData, clusterer, maths::COneOfNPrior(mode, maths_t::E_ContinuousData)))); COneOfNPrior filter(maths::COneOfNPrior(clone(models), E_ContinuousData)); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { filter.addSamples(TDouble1Vec(1, samples[i])); } @@ -559,20 +498,18 @@ void COneOfNPriorTest::testModelSelection() LOG_DEBUG("logWeightRatio = " << logWeightRatio); CPPUNIT_ASSERT(std::exp(logWeightRatio) < 1e-6); - } + } } -void COneOfNPriorTest::testMarginalLikelihood() -{ +void COneOfNPriorTest::testMarginalLikelihood() { LOG_DEBUG("+--------------------------------------------+"); LOG_DEBUG("| COneOfNPriorTest::testMarginalLikelihood |"); LOG_DEBUG("+--------------------------------------------+"); // Check that the c.d.f. <= 1 at extreme. - maths_t::EDataType dataTypes[] = { E_ContinuousData, E_IntegerData }; + maths_t::EDataType dataTypes[] = {E_ContinuousData, E_IntegerData}; - for (std::size_t t = 0u; t < boost::size(dataTypes); ++t) - { + for (std::size_t t = 0u; t < boost::size(dataTypes); ++t) { TPriorPtrVec models; models.push_back(TPriorPtr(CPoissonMeanConjugate::nonInformativePrior().clone())); models.push_back(TPriorPtr(CNormalMeanPrecConjugate::nonInformativePrior(dataTypes[t]).clone())); @@ -589,23 +526,18 @@ void COneOfNPriorTest::testMarginalLikelihood() rng.generateLogNormalSamples(location, squareScale, 10, samples); filter.addSamples(samples); - maths_t::ESampleWeightStyle weightStyles[] = - { - maths_t::E_SampleCountWeight, - maths_t::E_SampleWinsorisationWeight, - maths_t::E_SampleCountWeight - }; - double weights[] = { 0.1, 1.0, 10.0 }; - - for (std::size_t i = 0u; i < boost::size(weightStyles); ++i) - { - for (std::size_t j = 0u; j < boost::size(weights); ++j) - { + maths_t::ESampleWeightStyle weightStyles[] = { + maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight, maths_t::E_SampleCountWeight}; + double weights[] = {0.1, 1.0, 10.0}; + + for (std::size_t i = 0u; i < boost::size(weightStyles); ++i) { + for (std::size_t j = 0u; j < boost::size(weights); ++j) { double lb, ub; filter.minusLogJointCdf(maths_t::TWeightStyleVec(1, weightStyles[i]), TDouble1Vec(1, 10000.0), TDouble4Vec1Vec(1, TDouble4Vec(1, weights[j])), - lb, ub); + lb, + ub); LOG_DEBUG("-log(c.d.f) = " << (lb + ub) / 2.0); CPPUNIT_ASSERT(lb >= 0.0); CPPUNIT_ASSERT(ub >= 0.0); @@ -629,12 +561,10 @@ void COneOfNPriorTest::testMarginalLikelihood() TDoubleVec samples; rng.generateLogNormalSamples(1.0, 1.0, 99, samples); - for (std::size_t i = 0u; i < 2; ++i) - { + for (std::size_t i = 0u; i < 2; ++i) { filter.addSamples(TDouble1Vec(1, samples[i])); } - for (std::size_t i = 2u; i < samples.size(); ++i) - { + for (std::size_t i = 2u; i < samples.size(); ++i) { filter.addSamples(TDouble1Vec(1, samples[i])); TDoubleDoublePr interval = filter.marginalLikelihoodConfidenceInterval(99.0); @@ -642,11 +572,9 @@ void COneOfNPriorTest::testMarginalLikelihood() double x = interval.first; double dx = (interval.second - interval.first) / 20.0; - for (std::size_t j = 0u; j < 20; ++j, x += dx) - { + for (std::size_t j = 0u; j < 20; ++j, x += dx) { double fx; - CPPUNIT_ASSERT(filter.jointLogMarginalLikelihood(TDouble1Vec(1, x), fx) - == maths_t::E_FpNoErrors); + CPPUNIT_ASSERT(filter.jointLogMarginalLikelihood(TDouble1Vec(1, x), fx) == maths_t::E_FpNoErrors); fx = std::exp(fx); double lb; @@ -662,7 +590,6 @@ void COneOfNPriorTest::testMarginalLikelihood() LOG_DEBUG("x = " << x << ", f(x) = " << fx << ", dF(x)/dx = " << dFdx); CPPUNIT_ASSERT_DOUBLES_EQUAL(fx, dFdx, std::max(1e-5, 1e-3 * FxPlusEps)); - CPPUNIT_ASSERT(filter.minusLogJointCdf(TDouble1Vec(1, x), lb, ub)); double Fx = std::exp(-(lb + ub) / 2.0); @@ -675,8 +602,7 @@ void COneOfNPriorTest::testMarginalLikelihood() } } -void COneOfNPriorTest::testMarginalLikelihoodMean() -{ +void COneOfNPriorTest::testMarginalLikelihoodMean() { LOG_DEBUG("+------------------------------------------------+"); LOG_DEBUG("| COneOfNPriorTest::testMarginalLikelihoodMean |"); LOG_DEBUG("+------------------------------------------------+"); @@ -689,13 +615,11 @@ void COneOfNPriorTest::testMarginalLikelihoodMean() { LOG_DEBUG("****** Normal ******"); - const double means[] = { 10.0, 50.0 }; - const double variances[] = { 1.0, 10.0 }; + const double means[] = {10.0, 50.0}; + const double variances[] = {1.0, 10.0}; - for (std::size_t i = 0u; i < boost::size(means); ++i) - { - for (std::size_t j = 0u; j < boost::size(variances); ++j) - { + for (std::size_t i = 0u; i < boost::size(means); ++i) { + for (std::size_t j = 0u; j < boost::size(variances); ++j) { LOG_DEBUG("*** mean = " << means[i] << ", variance = " << variances[j] << " ***"); TPriorPtrVec models; @@ -711,23 +635,18 @@ void COneOfNPriorTest::testMarginalLikelihoodMean() TDoubleVec samples; rng.generateNormalSamples(means[i], variances[j], 100, samples); - for (std::size_t k = 0u; k < samples.size(); ++k) - { + for (std::size_t k = 0u; k < samples.size(); ++k) { filter.addSamples(TDouble1Vec(1, samples[k])); double expectedMean; CPPUNIT_ASSERT(filter.marginalLikelihoodMeanForTest(expectedMean)); - if (k % 10 == 0) - { - LOG_DEBUG("marginalLikelihoodMean = " << filter.marginalLikelihoodMean() - << ", expectedMean = " << expectedMean); + if (k % 10 == 0) { + LOG_DEBUG("marginalLikelihoodMean = " << filter.marginalLikelihoodMean() << ", expectedMean = " << expectedMean); } // The error is at the precision of the numerical integration. - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMean, - filter.marginalLikelihoodMean(), - 0.01 * expectedMean); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMean, filter.marginalLikelihoodMean(), 0.01 * expectedMean); } } } @@ -736,15 +655,12 @@ void COneOfNPriorTest::testMarginalLikelihoodMean() { LOG_DEBUG("****** Log Normal ******"); - const double locations[] = { 0.1, 1.0 }; - const double squareScales[] = { 0.1, 1.0 }; + const double locations[] = {0.1, 1.0}; + const double squareScales[] = {0.1, 1.0}; - for (std::size_t i = 0u; i < boost::size(locations); ++i) - { - for (std::size_t j = 0u; j < boost::size(squareScales); ++j) - { - LOG_DEBUG("*** location = " << locations[i] - << ", squareScale = " << squareScales[j] << " ***"); + for (std::size_t i = 0u; i < boost::size(locations); ++i) { + for (std::size_t j = 0u; j < boost::size(squareScales); ++j) { + LOG_DEBUG("*** location = " << locations[i] << ", squareScale = " << squareScales[j] << " ***"); TPriorPtrVec models; models.push_back(TPriorPtr(CLogNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData).clone())); @@ -761,22 +677,17 @@ void COneOfNPriorTest::testMarginalLikelihoodMean() TMeanAccumulator relativeError; - for (std::size_t k = 0u; k < samples.size(); ++k) - { + for (std::size_t k = 0u; k < samples.size(); ++k) { filter.addSamples(TDouble1Vec(1, samples[k])); double expectedMean; CPPUNIT_ASSERT(filter.marginalLikelihoodMeanForTest(expectedMean)); - if (k % 10 == 0) - { - LOG_DEBUG("marginalLikelihoodMean = " << filter.marginalLikelihoodMean() - << ", expectedMean = " << expectedMean); + if (k % 10 == 0) { + LOG_DEBUG("marginalLikelihoodMean = " << filter.marginalLikelihoodMean() << ", expectedMean = " << expectedMean); } - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMean, - filter.marginalLikelihoodMean(), - 0.2 * expectedMean); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMean, filter.marginalLikelihoodMean(), 0.2 * expectedMean); relativeError.add(std::fabs(filter.marginalLikelihoodMean() - expectedMean) / expectedMean); } @@ -788,8 +699,7 @@ void COneOfNPriorTest::testMarginalLikelihoodMean() } } -void COneOfNPriorTest::testMarginalLikelihoodMode() -{ +void COneOfNPriorTest::testMarginalLikelihoodMode() { LOG_DEBUG("+------------------------------------------------+"); LOG_DEBUG("| COneOfNPriorTest::testMarginalLikelihoodMode |"); LOG_DEBUG("+------------------------------------------------+"); @@ -802,13 +712,11 @@ void COneOfNPriorTest::testMarginalLikelihoodMode() { LOG_DEBUG("****** Normal ******"); - const double means[] = { 10.0, 50.0 }; - const double variances[] = { 1.0, 10.0 }; + const double means[] = {10.0, 50.0}; + const double variances[] = {1.0, 10.0}; - for (std::size_t i = 0u; i < boost::size(means); ++i) - { - for (std::size_t j = 0u; j < boost::size(variances); ++j) - { + for (std::size_t i = 0u; i < boost::size(means); ++i) { + for (std::size_t j = 0u; j < boost::size(variances); ++j) { LOG_DEBUG("*** mean = " << means[i] << ", variance = " << variances[j] << " ***"); TPriorPtrVec models; @@ -832,8 +740,7 @@ void COneOfNPriorTest::testMarginalLikelihoodMode() double b = means[i] + 2.0 * std::sqrt(variances[j]); maths::CSolvers::maximize(a, b, likelihood(a), likelihood(b), likelihood, 0.0, iterations, mode, fmode); - LOG_DEBUG("marginalLikelihoodMode = " << filter.marginalLikelihoodMode() - << ", expectedMode = " << mode); + LOG_DEBUG("marginalLikelihoodMode = " << filter.marginalLikelihoodMode() << ", expectedMode = " << mode); CPPUNIT_ASSERT_DOUBLES_EQUAL(mode, filter.marginalLikelihoodMode(), 0.01 * mode); } @@ -843,15 +750,12 @@ void COneOfNPriorTest::testMarginalLikelihoodMode() { LOG_DEBUG("****** Log Normal ******"); - const double locations[] = { 0.1, 1.0 }; - const double squareScales[] = { 0.1, 2.0 }; + const double locations[] = {0.1, 1.0}; + const double squareScales[] = {0.1, 2.0}; - for (std::size_t i = 0u; i < boost::size(locations); ++i) - { - for (std::size_t j = 0u; j < boost::size(squareScales); ++j) - { - LOG_DEBUG("*** location = " << locations[i] - << ", squareScale = " << squareScales[j] << " ***"); + for (std::size_t i = 0u; i < boost::size(locations); ++i) { + for (std::size_t j = 0u; j < boost::size(squareScales); ++j) { + LOG_DEBUG("*** location = " << locations[i] << ", squareScale = " << squareScales[j] << " ***"); TPriorPtrVec models; models.push_back(TPriorPtr(CLogNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData).clone())); @@ -875,8 +779,7 @@ void COneOfNPriorTest::testMarginalLikelihoodMode() double b = boost::math::mode(logNormal) + 1.0 * boost::math::standard_deviation(logNormal); maths::CSolvers::maximize(a, b, likelihood(a), likelihood(b), likelihood, 0.0, iterations, mode, fmode); - LOG_DEBUG("marginalLikelihoodMode = " << filter.marginalLikelihoodMode() - << ", expectedMode = " << mode); + LOG_DEBUG("marginalLikelihoodMode = " << filter.marginalLikelihoodMode() << ", expectedMode = " << mode); CPPUNIT_ASSERT_DOUBLES_EQUAL(mode, filter.marginalLikelihoodMode(), 0.05 * mode); } @@ -884,8 +787,7 @@ void COneOfNPriorTest::testMarginalLikelihoodMode() } } -void COneOfNPriorTest::testMarginalLikelihoodVariance() -{ +void COneOfNPriorTest::testMarginalLikelihoodVariance() { LOG_DEBUG("+----------------------------------------------------+"); LOG_DEBUG("| COneOfNPriorTest::testMarginalLikelihoodVariance |"); LOG_DEBUG("+----------------------------------------------------+"); @@ -899,15 +801,12 @@ void COneOfNPriorTest::testMarginalLikelihoodVariance() { LOG_DEBUG("****** Normal ******"); - double means[] = { 10.0, 100.0 }; - double variances[] = { 1.0, 10.0 }; + double means[] = {10.0, 100.0}; + double variances[] = {1.0, 10.0}; - for (std::size_t i = 0u; i < boost::size(means); ++i) - { - for (std::size_t j = 0u; j < boost::size(variances); ++j) - { - LOG_DEBUG("*** mean = " << means[i] - << ", variance = " << variances[j] << " ***"); + for (std::size_t i = 0u; i < boost::size(means); ++i) { + for (std::size_t j = 0u; j < boost::size(variances); ++j) { + LOG_DEBUG("*** mean = " << means[i] << ", variance = " << variances[j] << " ***"); TPriorPtrVec models; models.push_back(TPriorPtr(CNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData).clone())); @@ -923,24 +822,19 @@ void COneOfNPriorTest::testMarginalLikelihoodVariance() rng.generateNormalSamples(means[i], variances[j], 100, samples); TMeanAccumulator relativeError; - for (std::size_t k = 0u; k < samples.size(); ++k) - { + for (std::size_t k = 0u; k < samples.size(); ++k) { filter.addSamples(TDouble1Vec(1, samples[k])); double expectedVariance; CPPUNIT_ASSERT(filter.marginalLikelihoodVarianceForTest(expectedVariance)); - if (k % 10 == 0) - { + if (k % 10 == 0) { LOG_DEBUG("marginalLikelihoodVariance = " << filter.marginalLikelihoodVariance() - << ", expectedVariance = " << expectedVariance); + << ", expectedVariance = " << expectedVariance); } // The error is at the precision of the numerical integration. - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedVariance, - filter.marginalLikelihoodVariance(), - 0.02 * expectedVariance); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedVariance, filter.marginalLikelihoodVariance(), 0.02 * expectedVariance); - relativeError.add(std::fabs(expectedVariance - filter.marginalLikelihoodVariance()) - / expectedVariance); + relativeError.add(std::fabs(expectedVariance - filter.marginalLikelihoodVariance()) / expectedVariance); } LOG_DEBUG("relativeError = " << maths::CBasicStatistics::mean(relativeError)); @@ -952,15 +846,12 @@ void COneOfNPriorTest::testMarginalLikelihoodVariance() { LOG_DEBUG("****** Gamma ******"); - const double shapes[] = { 5.0, 20.0, 40.0 }; - const double scales[] = { 1.0, 10.0, 20.0 }; + const double shapes[] = {5.0, 20.0, 40.0}; + const double scales[] = {1.0, 10.0, 20.0}; - for (std::size_t i = 0u; i < boost::size(shapes); ++i) - { - for (std::size_t j = 0u; j < boost::size(scales); ++j) - { - LOG_DEBUG("*** shape = " << shapes[i] - << ", scale = " << scales[j] << " ***"); + for (std::size_t i = 0u; i < boost::size(shapes); ++i) { + for (std::size_t j = 0u; j < boost::size(scales); ++j) { + LOG_DEBUG("*** shape = " << shapes[i] << ", scale = " << scales[j] << " ***"); TPriorPtrVec models; models.push_back(TPriorPtr(CNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData).clone())); @@ -977,27 +868,22 @@ void COneOfNPriorTest::testMarginalLikelihoodVariance() TMeanAccumulator relativeError; - for (std::size_t k = 0u; k < samples.size(); ++k) - { + for (std::size_t k = 0u; k < samples.size(); ++k) { filter.addSamples(TDouble1Vec(1, samples[k])); double expectedVariance; CPPUNIT_ASSERT(filter.marginalLikelihoodVarianceForTest(expectedVariance)); - if (k % 10 == 0) - { + if (k % 10 == 0) { LOG_DEBUG("marginalLikelihoodVariance = " << filter.marginalLikelihoodVariance() - << ", expectedVariance = " << expectedVariance); + << ", expectedVariance = " << expectedVariance); } // The error is mainly due to the truncation in the // integration range used to compute the expected mean. - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedVariance, - filter.marginalLikelihoodVariance(), - 0.01 * expectedVariance); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedVariance, filter.marginalLikelihoodVariance(), 0.01 * expectedVariance); - relativeError.add(std::fabs(expectedVariance - filter.marginalLikelihoodVariance()) - / expectedVariance); + relativeError.add(std::fabs(expectedVariance - filter.marginalLikelihoodVariance()) / expectedVariance); } LOG_DEBUG("relativeError = " << maths::CBasicStatistics::mean(relativeError)); @@ -1007,8 +893,7 @@ void COneOfNPriorTest::testMarginalLikelihoodVariance() } } -void COneOfNPriorTest::testSampleMarginalLikelihood() -{ +void COneOfNPriorTest::testSampleMarginalLikelihood() { LOG_DEBUG("+--------------------------------------------------+"); LOG_DEBUG("| COneOfNPriorTest::testSampleMarginalLikelihood |"); LOG_DEBUG("+--------------------------------------------------+"); @@ -1029,8 +914,7 @@ void COneOfNPriorTest::testSampleMarginalLikelihood() TDoubleVec samples; rng.generateNormalSamples(mean, variance, 20, samples); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { filter.addSamples(TDouble1Vec(1, samples[i])); } @@ -1048,20 +932,16 @@ void COneOfNPriorTest::testSampleMarginalLikelihood() posteriorModels[1]->sampleMarginalLikelihood(5, logNormalSamples); TDoubleVec expectedSampled(normalSamples); - expectedSampled.insert(expectedSampled.end(), - logNormalSamples.begin(), - logNormalSamples.end()); + expectedSampled.insert(expectedSampled.end(), logNormalSamples.begin(), logNormalSamples.end()); LOG_DEBUG("expected samples = " << core::CContainerPrinter::print(expectedSampled) - << ", samples = " << core::CContainerPrinter::print(sampled)); + << ", samples = " << core::CContainerPrinter::print(sampled)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedSampled), - core::CContainerPrinter::print(sampled)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedSampled), core::CContainerPrinter::print(sampled)); rng.generateNormalSamples(mean, variance, 80, samples); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { filter.addSamples(TDouble1Vec(1, samples[i])); } @@ -1076,19 +956,15 @@ void COneOfNPriorTest::testSampleMarginalLikelihood() posteriorModels[1]->sampleMarginalLikelihood(0, logNormalSamples); expectedSampled = normalSamples; - expectedSampled.insert(expectedSampled.end(), - logNormalSamples.begin(), - logNormalSamples.end()); + expectedSampled.insert(expectedSampled.end(), logNormalSamples.begin(), logNormalSamples.end()); LOG_DEBUG("expected samples = " << core::CContainerPrinter::print(expectedSampled) - << ", samples = " << core::CContainerPrinter::print(sampled)); + << ", samples = " << core::CContainerPrinter::print(sampled)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedSampled), - core::CContainerPrinter::print(sampled)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedSampled), core::CContainerPrinter::print(sampled)); } -void COneOfNPriorTest::testCdf() -{ +void COneOfNPriorTest::testCdf() { LOG_DEBUG("+-----------------------------+"); LOG_DEBUG("| COneOfNPriorTest::testCdf |"); LOG_DEBUG("+-----------------------------+"); @@ -1097,7 +973,7 @@ void COneOfNPriorTest::testCdf() const double mean = 20.0; const double variance = 5.0; - const std::size_t n[] = { 20u, 80u }; + const std::size_t n[] = {20u, 80u}; test::CRandomNumbers rng; @@ -1107,13 +983,11 @@ void COneOfNPriorTest::testCdf() models.push_back(TPriorPtr(CGammaRateConjugate::nonInformativePrior(E_ContinuousData).clone())); COneOfNPrior filter(maths::COneOfNPrior(clone(models), E_ContinuousData)); - for (std::size_t i = 0u; i < boost::size(n); ++i) - { + for (std::size_t i = 0u; i < boost::size(n); ++i) { TDoubleVec samples; rng.generateNormalSamples(mean, variance, n[i], samples); - for (std::size_t j = 0u; j < samples.size(); ++j) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { filter.addSamples(TDouble1Vec(1, samples[j])); } @@ -1121,8 +995,7 @@ void COneOfNPriorTest::testCdf() CPPUNIT_ASSERT(!filter.minusLogJointCdf(TDouble1Vec(), lb, ub)); CPPUNIT_ASSERT(!filter.minusLogJointCdfComplement(TDouble1Vec(), lb, ub)); - for (std::size_t j = 1u; j < 500; ++j) - { + for (std::size_t j = 1u; j < 500; ++j) { double x = static_cast(j) / 2.0; CPPUNIT_ASSERT(filter.minusLogJointCdf(TDouble1Vec(1, x), lb, ub)); @@ -1130,15 +1003,13 @@ void COneOfNPriorTest::testCdf() CPPUNIT_ASSERT(filter.minusLogJointCdfComplement(TDouble1Vec(1, x), lb, ub)); double fComplement = (lb + ub) / 2.0; - LOG_DEBUG("log(F(x)) = " << (f == 0.0 ? f : -f) - << ", log(1 - F(x)) = " << (fComplement == 0.0 ? fComplement : -fComplement)); + LOG_DEBUG("log(F(x)) = " << (f == 0.0 ? f : -f) << ", log(1 - F(x)) = " << (fComplement == 0.0 ? fComplement : -fComplement)); CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, std::exp(-f) + std::exp(-fComplement), 1e-10); } } } -void COneOfNPriorTest::testProbabilityOfLessLikelySamples() -{ +void COneOfNPriorTest::testProbabilityOfLessLikelySamples() { LOG_DEBUG("+--------------------------------------------------------+"); LOG_DEBUG("| COneOfNPriorTest::testProbabilityOfLessLikelySamples |"); LOG_DEBUG("+--------------------------------------------------------+"); @@ -1148,7 +1019,7 @@ void COneOfNPriorTest::testProbabilityOfLessLikelySamples() const double location = 0.7; const double squareScale = 1.3; - const double vs[] = { 0.5, 1.0, 2.0 }; + const double vs[] = {0.5, 1.0, 2.0}; TPriorPtrVec initialModels; initialModels.push_back(TPriorPtr(CNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData).clone())); @@ -1161,8 +1032,7 @@ void COneOfNPriorTest::testProbabilityOfLessLikelySamples() TDoubleVec samples; rng.generateLogNormalSamples(location, squareScale, 200, samples); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { TDouble1Vec sample(1, samples[i]); filter.addSamples(sample); @@ -1178,99 +1048,87 @@ void COneOfNPriorTest::testProbabilityOfLessLikelySamples() TDoubleVec weights(filter.weights()); COneOfNPrior::TPriorCPtrVec models(filter.models()); - for (std::size_t j = 0u; j < weights.size(); ++j) - { + for (std::size_t j = 0u; j < weights.size(); ++j) { double weight = weights[j]; - CPPUNIT_ASSERT(models[j]->probabilityOfLessLikelySamples( - maths_t::E_TwoSided, - maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, sample[0]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0)), - lb, ub, tail)); + CPPUNIT_ASSERT(models[j]->probabilityOfLessLikelySamples(maths_t::E_TwoSided, + maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), + TDouble1Vec(1, sample[0]), + TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0)), + lb, + ub, + tail)); CPPUNIT_ASSERT_EQUAL(lb, ub); double modelProbability = (lb + ub) / 2.0; expectedProbability += weight * modelProbability; } - LOG_DEBUG("weights = " << core::CContainerPrinter::print(weights) - << ", expectedProbability = " << expectedProbability - << ", probability = " << probability); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedProbability, - probability, - 1e-3 * std::max(expectedProbability, probability)); + LOG_DEBUG("weights = " << core::CContainerPrinter::print(weights) << ", expectedProbability = " << expectedProbability + << ", probability = " << probability); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedProbability, probability, 1e-3 * std::max(expectedProbability, probability)); maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); - for (std::size_t k = 0u; ((i+1) % 11 == 0) && k < boost::size(vs); ++k) - { + for (std::size_t k = 0u; ((i + 1) % 11 == 0) && k < boost::size(vs); ++k) { double mode = filter.marginalLikelihoodMode(weightStyle, TDouble4Vec(1, vs[k])); - double ss[] = { 0.9 * mode, 1.1 * mode }; + double ss[] = {0.9 * mode, 1.1 * mode}; LOG_DEBUG("vs = " << vs[k] << ", mode = " << mode); - if (mode > 0.0) - { - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - weightStyle, - TDouble1Vec(1, ss[0]), - TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), - lb, ub, tail); + if (mode > 0.0) { + filter.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, weightStyle, TDouble1Vec(1, ss[0]), TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); - if (mode > 0.0) - { - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - weightStyle, - TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, ub, tail); + if (mode > 0.0) { + filter.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, weightStyle, TDouble1Vec(ss, ss + 2), TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); filter.probabilityOfLessLikelySamples(maths_t::E_OneSidedBelow, weightStyle, TDouble1Vec(ss, ss + 2), TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, ub, tail); + lb, + ub, + tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); filter.probabilityOfLessLikelySamples(maths_t::E_OneSidedAbove, weightStyle, TDouble1Vec(ss, ss + 2), TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, ub, tail); + lb, + ub, + tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } } - if (mode > 0.0) - { - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - weightStyle, - TDouble1Vec(1, ss[1]), - TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), - lb, ub, tail); + if (mode > 0.0) { + filter.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, weightStyle, TDouble1Vec(1, ss[1]), TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - weightStyle, - TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, ub, tail); + filter.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, weightStyle, TDouble1Vec(ss, ss + 2), TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); filter.probabilityOfLessLikelySamples(maths_t::E_OneSidedBelow, weightStyle, TDouble1Vec(ss, ss + 2), TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, ub, tail); + lb, + ub, + tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); filter.probabilityOfLessLikelySamples(maths_t::E_OneSidedAbove, weightStyle, TDouble1Vec(ss, ss + 2), TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, ub, tail); + lb, + ub, + tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } } } } -void COneOfNPriorTest::testPersist() -{ +void COneOfNPriorTest::testPersist() { LOG_DEBUG("+---------------------------------+"); LOG_DEBUG("| COneOfNPriorTest::testPersist |"); LOG_DEBUG("+---------------------------------+"); @@ -1278,10 +1136,8 @@ void COneOfNPriorTest::testPersist() // Check that persist/restore is idempotent. TPriorPtrVec models; - models.push_back(TPriorPtr( - CPoissonMeanConjugate::nonInformativePrior().clone())); - models.push_back(TPriorPtr( - CNormalMeanPrecConjugate::nonInformativePrior(E_IntegerData).clone())); + models.push_back(TPriorPtr(CPoissonMeanConjugate::nonInformativePrior().clone())); + models.push_back(TPriorPtr(CNormalMeanPrecConjugate::nonInformativePrior(E_IntegerData).clone())); const double mean = 10.0; const double variance = 3.0; @@ -1295,11 +1151,9 @@ void COneOfNPriorTest::testPersist() truncateUpTo(0.0, samples); maths::COneOfNPrior origFilter(clone(models), E_IntegerData); - for (std::size_t i = 0u; i < samples.size(); ++i) - { - origFilter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, samples[i]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0))); + for (std::size_t i = 0u; i < samples.size(); ++i) { + origFilter.addSamples( + maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), TDouble1Vec(1, samples[i]), TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0))); } double decayRate = origFilter.decayRate(); uint64_t checksum = origFilter.checksum(); @@ -1317,7 +1171,7 @@ void COneOfNPriorTest::testPersist() core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - + maths::SDistributionRestoreParams params(E_IntegerData, decayRate + 0.1, maths::MINIMUM_CLUSTER_SPLIT_FRACTION, @@ -1325,8 +1179,7 @@ void COneOfNPriorTest::testPersist() maths::MINIMUM_CATEGORY_COUNT); maths::COneOfNPrior restoredFilter(params, traverser); - LOG_DEBUG("orig checksum = " << checksum - << " restored checksum = " << restoredFilter.checksum()); + LOG_DEBUG("orig checksum = " << checksum << " restored checksum = " << restoredFilter.checksum()); CPPUNIT_ASSERT_EQUAL(checksum, restoredFilter.checksum()); // The XML representation of the new filter should be the same as the original @@ -1339,52 +1192,30 @@ void COneOfNPriorTest::testPersist() CPPUNIT_ASSERT_EQUAL(origXml, newXml); } -CppUnit::Test *COneOfNPriorTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("COneOfNPriorTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "COneOfNPriorTest::testFilter", - &COneOfNPriorTest::testFilter) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "COneOfNPriorTest::testMultipleUpdate", - &COneOfNPriorTest::testMultipleUpdate) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "COneOfNPriorTest::testWeights", - &COneOfNPriorTest::testWeights) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "COneOfNPriorTest::testModels", - &COneOfNPriorTest::testModels) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "COneOfNPriorTest::testModelSelection", - &COneOfNPriorTest::testModelSelection) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "COneOfNPriorTest::testMarginalLikelihood", - &COneOfNPriorTest::testMarginalLikelihood) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "COneOfNPriorTest::testSampleMarginalLikelihood", - &COneOfNPriorTest::testSampleMarginalLikelihood) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "COneOfNPriorTest::testMarginalLikelihoodMean", - &COneOfNPriorTest::testMarginalLikelihoodMean) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "COneOfNPriorTest::testMarginalLikelihoodMode", - &COneOfNPriorTest::testMarginalLikelihoodMode) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "COneOfNPriorTest::testMarginalLikelihoodVariance", - &COneOfNPriorTest::testMarginalLikelihoodVariance) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "COneOfNPriorTest::testCdf", - &COneOfNPriorTest::testCdf) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "COneOfNPriorTest::testProbabilityOfLessLikelySamples", - &COneOfNPriorTest::testProbabilityOfLessLikelySamples) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "COneOfNPriorTest::testPersist", - &COneOfNPriorTest::testPersist) ); +CppUnit::Test* COneOfNPriorTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("COneOfNPriorTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("COneOfNPriorTest::testFilter", &COneOfNPriorTest::testFilter)); + suiteOfTests->addTest( + new CppUnit::TestCaller("COneOfNPriorTest::testMultipleUpdate", &COneOfNPriorTest::testMultipleUpdate)); + suiteOfTests->addTest(new CppUnit::TestCaller("COneOfNPriorTest::testWeights", &COneOfNPriorTest::testWeights)); + suiteOfTests->addTest(new CppUnit::TestCaller("COneOfNPriorTest::testModels", &COneOfNPriorTest::testModels)); + suiteOfTests->addTest( + new CppUnit::TestCaller("COneOfNPriorTest::testModelSelection", &COneOfNPriorTest::testModelSelection)); + suiteOfTests->addTest( + new CppUnit::TestCaller("COneOfNPriorTest::testMarginalLikelihood", &COneOfNPriorTest::testMarginalLikelihood)); + suiteOfTests->addTest(new CppUnit::TestCaller("COneOfNPriorTest::testSampleMarginalLikelihood", + &COneOfNPriorTest::testSampleMarginalLikelihood)); + suiteOfTests->addTest(new CppUnit::TestCaller("COneOfNPriorTest::testMarginalLikelihoodMean", + &COneOfNPriorTest::testMarginalLikelihoodMean)); + suiteOfTests->addTest(new CppUnit::TestCaller("COneOfNPriorTest::testMarginalLikelihoodMode", + &COneOfNPriorTest::testMarginalLikelihoodMode)); + suiteOfTests->addTest(new CppUnit::TestCaller("COneOfNPriorTest::testMarginalLikelihoodVariance", + &COneOfNPriorTest::testMarginalLikelihoodVariance)); + suiteOfTests->addTest(new CppUnit::TestCaller("COneOfNPriorTest::testCdf", &COneOfNPriorTest::testCdf)); + suiteOfTests->addTest(new CppUnit::TestCaller("COneOfNPriorTest::testProbabilityOfLessLikelySamples", + &COneOfNPriorTest::testProbabilityOfLessLikelySamples)); + suiteOfTests->addTest(new CppUnit::TestCaller("COneOfNPriorTest::testPersist", &COneOfNPriorTest::testPersist)); return suiteOfTests; } - - - diff --git a/lib/maths/unittest/COneOfNPriorTest.h b/lib/maths/unittest/COneOfNPriorTest.h index 92ad45db3e..931760fcf1 100644 --- a/lib/maths/unittest/COneOfNPriorTest.h +++ b/lib/maths/unittest/COneOfNPriorTest.h @@ -9,25 +9,23 @@ #include +class COneOfNPriorTest : public CppUnit::TestFixture { +public: + void testFilter(); + void testMultipleUpdate(); + void testWeights(); + void testModels(); + void testModelSelection(); + void testMarginalLikelihood(); + void testMarginalLikelihoodMean(); + void testMarginalLikelihoodMode(); + void testMarginalLikelihoodVariance(); + void testSampleMarginalLikelihood(); + void testCdf(); + void testProbabilityOfLessLikelySamples(); + void testPersist(); -class COneOfNPriorTest : public CppUnit::TestFixture -{ - public: - void testFilter(); - void testMultipleUpdate(); - void testWeights(); - void testModels(); - void testModelSelection(); - void testMarginalLikelihood(); - void testMarginalLikelihoodMean(); - void testMarginalLikelihoodMode(); - void testMarginalLikelihoodVariance(); - void testSampleMarginalLikelihood(); - void testCdf(); - void testProbabilityOfLessLikelySamples(); - void testPersist(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_COneOfNPriorTest_h diff --git a/lib/maths/unittest/COrderingsTest.cc b/lib/maths/unittest/COrderingsTest.cc index 8e9c8a8152..f41220d79c 100644 --- a/lib/maths/unittest/COrderingsTest.cc +++ b/lib/maths/unittest/COrderingsTest.cc @@ -23,52 +23,40 @@ using namespace ml; -namespace -{ +namespace { using TOptionalDouble = boost::optional; -class CDictionary -{ - public: - using TStrVec = std::vector; - static std::size_t ms_Copies; +class CDictionary { +public: + using TStrVec = std::vector; + static std::size_t ms_Copies; - public: - CDictionary(const TStrVec &words) : m_Words(words) {} +public: + CDictionary(const TStrVec& words) : m_Words(words) {} - CDictionary &operator=(const CDictionary &other) - { - ++ms_Copies; - m_Words = other.m_Words; - return *this; - } + CDictionary& operator=(const CDictionary& other) { + ++ms_Copies; + m_Words = other.m_Words; + return *this; + } - void swap(CDictionary &other) - { - m_Words.swap(other.m_Words); - } + void swap(CDictionary& other) { m_Words.swap(other.m_Words); } - std::string print() const - { - return core::CContainerPrinter::print(m_Words); - } + std::string print() const { return core::CContainerPrinter::print(m_Words); } - private: - TStrVec m_Words; +private: + TStrVec m_Words; }; std::size_t CDictionary::ms_Copies(0u); -void swap(CDictionary &lhs, CDictionary &rhs) -{ +void swap(CDictionary& lhs, CDictionary& rhs) { lhs.swap(rhs); } - } -void COrderingsTest::testOptionalOrdering() -{ +void COrderingsTest::testOptionalOrdering() { LOG_DEBUG("+----------------------------------------+"); LOG_DEBUG("| COrderingsTest::testOptionalOrdering |"); LOG_DEBUG("+----------------------------------------+"); @@ -80,76 +68,74 @@ void COrderingsTest::testOptionalOrdering() maths::COrderings::SOptionalLess less; - CPPUNIT_ASSERT( less(big, null)); + CPPUNIT_ASSERT(less(big, null)); CPPUNIT_ASSERT(!less(null, big)); CPPUNIT_ASSERT(!less(null, null)); - CPPUNIT_ASSERT( less(100.0, null)); + CPPUNIT_ASSERT(less(100.0, null)); CPPUNIT_ASSERT(!less(null, 100.0)); - CPPUNIT_ASSERT( less(one, two)); - CPPUNIT_ASSERT( less(one, 2.0)); + CPPUNIT_ASSERT(less(one, two)); + CPPUNIT_ASSERT(less(one, 2.0)); CPPUNIT_ASSERT(!less(two, one)); CPPUNIT_ASSERT(!less(2.0, one)); CPPUNIT_ASSERT(!less(one, one)); - CPPUNIT_ASSERT( less(one, big)); + CPPUNIT_ASSERT(less(one, big)); maths::COrderings::SOptionalGreater greater; CPPUNIT_ASSERT(!greater(big, null)); - CPPUNIT_ASSERT( greater(null, big)); + CPPUNIT_ASSERT(greater(null, big)); CPPUNIT_ASSERT(!greater(null, null)); CPPUNIT_ASSERT(!greater(100.0, null)); - CPPUNIT_ASSERT( greater(null, 100.0)); + CPPUNIT_ASSERT(greater(null, 100.0)); CPPUNIT_ASSERT(!greater(one, two)); CPPUNIT_ASSERT(!greater(one, 2.0)); - CPPUNIT_ASSERT( greater(two, one)); - CPPUNIT_ASSERT( greater(2.0, one)); + CPPUNIT_ASSERT(greater(two, one)); + CPPUNIT_ASSERT(greater(2.0, one)); CPPUNIT_ASSERT(!greater(one, one)); CPPUNIT_ASSERT(!greater(one, big)); } -void COrderingsTest::testPtrOrdering() -{ +void COrderingsTest::testPtrOrdering() { LOG_DEBUG("+-----------------------------------+"); LOG_DEBUG("| COrderingsTest::testPtrOrdering |"); LOG_DEBUG("+-----------------------------------+"); - const double *null = 0; + const double* null = 0; double one_(1.0); double two_(2.0); double hundred_(100.0); double big_(std::numeric_limits::max()); - const double *one(&one_); - const double *two(&two_); - const double *hundred(&hundred_); - const double *big(&big_); + const double* one(&one_); + const double* two(&two_); + const double* hundred(&hundred_); + const double* big(&big_); maths::COrderings::SPtrLess less; - CPPUNIT_ASSERT( less(big, null)); + CPPUNIT_ASSERT(less(big, null)); CPPUNIT_ASSERT(!less(null, big)); CPPUNIT_ASSERT(!less(null, null)); - CPPUNIT_ASSERT( less(hundred, null)); + CPPUNIT_ASSERT(less(hundred, null)); CPPUNIT_ASSERT(!less(null, hundred)); - CPPUNIT_ASSERT( less(one, two)); + CPPUNIT_ASSERT(less(one, two)); CPPUNIT_ASSERT(!less(two, one)); CPPUNIT_ASSERT(!less(one, one)); - CPPUNIT_ASSERT( less(one, big)); + CPPUNIT_ASSERT(less(one, big)); maths::COrderings::SPtrGreater greater; CPPUNIT_ASSERT(!greater(big, null)); - CPPUNIT_ASSERT( greater(null, big)); + CPPUNIT_ASSERT(greater(null, big)); CPPUNIT_ASSERT(!greater(null, null)); CPPUNIT_ASSERT(!greater(hundred, null)); - CPPUNIT_ASSERT( greater(null, hundred)); + CPPUNIT_ASSERT(greater(null, hundred)); CPPUNIT_ASSERT(!greater(one, two)); - CPPUNIT_ASSERT( greater(two, one)); + CPPUNIT_ASSERT(greater(two, one)); CPPUNIT_ASSERT(!greater(one, one)); CPPUNIT_ASSERT(!greater(one, big)); } -void COrderingsTest::testLess() -{ +void COrderingsTest::testLess() { LOG_DEBUG("+----------------------------+"); LOG_DEBUG("| COrderingsTest::testLess |"); LOG_DEBUG("+----------------------------+"); @@ -162,34 +148,34 @@ void COrderingsTest::testLess() TOptionalDouble two(2.0); TOptionalDouble big(std::numeric_limits::max()); - CPPUNIT_ASSERT( less(big, null)); + CPPUNIT_ASSERT(less(big, null)); CPPUNIT_ASSERT(!less(null, big)); CPPUNIT_ASSERT(!less(null, null)); - CPPUNIT_ASSERT( less(one, two)); + CPPUNIT_ASSERT(less(one, two)); CPPUNIT_ASSERT(!less(two, one)); CPPUNIT_ASSERT(!less(one, one)); - CPPUNIT_ASSERT( less(one, big)); + CPPUNIT_ASSERT(less(one, big)); } { - const double *null = 0; + const double* null = 0; double one_(1.0); double two_(2.0); double hundred_(100.0); double big_(std::numeric_limits::max()); - const double *one(&one_); - const double *two(&two_); - const double *hundred(&hundred_); - const double *big(&big_); + const double* one(&one_); + const double* two(&two_); + const double* hundred(&hundred_); + const double* big(&big_); - CPPUNIT_ASSERT( less(big, null)); + CPPUNIT_ASSERT(less(big, null)); CPPUNIT_ASSERT(!less(null, big)); CPPUNIT_ASSERT(!less(null, null)); - CPPUNIT_ASSERT( less(hundred, null)); + CPPUNIT_ASSERT(less(hundred, null)); CPPUNIT_ASSERT(!less(null, hundred)); - CPPUNIT_ASSERT( less(one, two)); + CPPUNIT_ASSERT(less(one, two)); CPPUNIT_ASSERT(!less(two, one)); CPPUNIT_ASSERT(!less(one, one)); - CPPUNIT_ASSERT( less(one, big)); + CPPUNIT_ASSERT(less(one, big)); } double one(1.0); @@ -197,50 +183,35 @@ void COrderingsTest::testLess() double three(3.0); { - CPPUNIT_ASSERT( less(std::make_pair(std::make_pair(one, three), three), - std::make_pair(std::make_pair(two, two), two))); - CPPUNIT_ASSERT( less(std::make_pair(std::make_pair(one, two), three), - std::make_pair(std::make_pair(one, three), two))); - CPPUNIT_ASSERT( less(std::make_pair(std::make_pair(one, two), two), - std::make_pair(std::make_pair(one, two), three))); - CPPUNIT_ASSERT(!less(std::make_pair(std::make_pair(one, two), three), - std::make_pair(std::make_pair(one, two), three))); - CPPUNIT_ASSERT(!less(std::make_pair(std::make_pair(two, two), two), - std::make_pair(std::make_pair(one, three), three))); - CPPUNIT_ASSERT(!less(std::make_pair(std::make_pair(one, three), two), - std::make_pair(std::make_pair(one, two), three))); - CPPUNIT_ASSERT(!less(std::make_pair(std::make_pair(one, two), three), - std::make_pair(std::make_pair(one, two), two))); + CPPUNIT_ASSERT(less(std::make_pair(std::make_pair(one, three), three), std::make_pair(std::make_pair(two, two), two))); + CPPUNIT_ASSERT(less(std::make_pair(std::make_pair(one, two), three), std::make_pair(std::make_pair(one, three), two))); + CPPUNIT_ASSERT(less(std::make_pair(std::make_pair(one, two), two), std::make_pair(std::make_pair(one, two), three))); + CPPUNIT_ASSERT(!less(std::make_pair(std::make_pair(one, two), three), std::make_pair(std::make_pair(one, two), three))); + CPPUNIT_ASSERT(!less(std::make_pair(std::make_pair(two, two), two), std::make_pair(std::make_pair(one, three), three))); + CPPUNIT_ASSERT(!less(std::make_pair(std::make_pair(one, three), two), std::make_pair(std::make_pair(one, two), three))); + CPPUNIT_ASSERT(!less(std::make_pair(std::make_pair(one, two), three), std::make_pair(std::make_pair(one, two), two))); } { - CPPUNIT_ASSERT( less(std::make_pair(std::make_pair(&one, three), three), - std::make_pair(std::make_pair(&two, two), two))); - CPPUNIT_ASSERT( less(std::make_pair(std::make_pair(&one, two), three), - std::make_pair(std::make_pair(&one, three), two))); - CPPUNIT_ASSERT( less(std::make_pair(std::make_pair(one, &two), two), - std::make_pair(std::make_pair(one, &two), three))); - CPPUNIT_ASSERT(!less(std::make_pair(std::make_pair(one, &two), three), - std::make_pair(std::make_pair(one, &two), three))); - CPPUNIT_ASSERT(!less(std::make_pair(std::make_pair(two, two), &two), - std::make_pair(std::make_pair(one, three), &three))); - CPPUNIT_ASSERT(!less(std::make_pair(std::make_pair(&one, &three), &two), - std::make_pair(std::make_pair(&one, &two), &three))); - CPPUNIT_ASSERT(!less(std::make_pair(std::make_pair(one, two), three), - std::make_pair(std::make_pair(one, two), two))); + CPPUNIT_ASSERT(less(std::make_pair(std::make_pair(&one, three), three), std::make_pair(std::make_pair(&two, two), two))); + CPPUNIT_ASSERT(less(std::make_pair(std::make_pair(&one, two), three), std::make_pair(std::make_pair(&one, three), two))); + CPPUNIT_ASSERT(less(std::make_pair(std::make_pair(one, &two), two), std::make_pair(std::make_pair(one, &two), three))); + CPPUNIT_ASSERT(!less(std::make_pair(std::make_pair(one, &two), three), std::make_pair(std::make_pair(one, &two), three))); + CPPUNIT_ASSERT(!less(std::make_pair(std::make_pair(two, two), &two), std::make_pair(std::make_pair(one, three), &three))); + CPPUNIT_ASSERT(!less(std::make_pair(std::make_pair(&one, &three), &two), std::make_pair(std::make_pair(&one, &two), &three))); + CPPUNIT_ASSERT(!less(std::make_pair(std::make_pair(one, two), three), std::make_pair(std::make_pair(one, two), two))); } } -void COrderingsTest::testFirstLess() -{ +void COrderingsTest::testFirstLess() { LOG_DEBUG("+---------------------------------+"); LOG_DEBUG("| COrderingsTest::testFirstLess |"); LOG_DEBUG("+---------------------------------+"); maths::COrderings::SFirstLess less; - CPPUNIT_ASSERT( less(std::make_pair(1.0, 1.0), std::make_pair(2.0, 1.0))); - CPPUNIT_ASSERT( less(1.0, std::make_pair(2.0, 1.0))); - CPPUNIT_ASSERT( less(std::make_pair(1.0, 2.0), 2.0)); + CPPUNIT_ASSERT(less(std::make_pair(1.0, 1.0), std::make_pair(2.0, 1.0))); + CPPUNIT_ASSERT(less(1.0, std::make_pair(2.0, 1.0))); + CPPUNIT_ASSERT(less(std::make_pair(1.0, 2.0), 2.0)); CPPUNIT_ASSERT(!less(std::make_pair(1.0, 1.0), std::make_pair(1.0, 2.0))); CPPUNIT_ASSERT(!less(1.0, std::make_pair(1.0, 2.0))); CPPUNIT_ASSERT(!less(std::make_pair(1.0, 1.0), 1.0)); @@ -248,7 +219,7 @@ void COrderingsTest::testFirstLess() CPPUNIT_ASSERT(!less(2.0, std::make_pair(1.0, 1.0))); CPPUNIT_ASSERT(!less(std::make_pair(2.0, 2.0), 1.0)); - CPPUNIT_ASSERT( less(std::make_pair(std::make_pair(1.0, 1.0), 1.0), std::make_pair(std::make_pair(1.0, 2.0), 1.0))); + CPPUNIT_ASSERT(less(std::make_pair(std::make_pair(1.0, 1.0), 1.0), std::make_pair(std::make_pair(1.0, 2.0), 1.0))); CPPUNIT_ASSERT(!less(std::make_pair(std::make_pair(1.0, 1.0), 1.0), std::make_pair(std::make_pair(1.0, 1.0), 1.0))); CPPUNIT_ASSERT(!less(std::make_pair(std::make_pair(1.0, 2.0), 1.0), std::make_pair(std::make_pair(1.0, 1.0), 1.0))); @@ -256,9 +227,9 @@ void COrderingsTest::testFirstLess() double two(2.0); double three(3.0); - CPPUNIT_ASSERT( less(std::make_pair(&one, &one), std::make_pair(&two, &one))); - CPPUNIT_ASSERT( less(&one, std::make_pair(&two, &one))); - CPPUNIT_ASSERT( less(std::make_pair(&one, &two), &two)); + CPPUNIT_ASSERT(less(std::make_pair(&one, &one), std::make_pair(&two, &one))); + CPPUNIT_ASSERT(less(&one, std::make_pair(&two, &one))); + CPPUNIT_ASSERT(less(std::make_pair(&one, &two), &two)); CPPUNIT_ASSERT(!less(std::make_pair(&one, &one), std::make_pair(&one, &two))); CPPUNIT_ASSERT(!less(&one, std::make_pair(&one, &two))); CPPUNIT_ASSERT(!less(std::make_pair(&one, &one), &one)); @@ -267,8 +238,7 @@ void COrderingsTest::testFirstLess() CPPUNIT_ASSERT(!less(std::make_pair(&two, &two), &one)); } -void COrderingsTest::testFirstGreater() -{ +void COrderingsTest::testFirstGreater() { LOG_DEBUG("+------------------------------------+"); LOG_DEBUG("| COrderingsTest::testFirstGreater |"); LOG_DEBUG("+------------------------------------+"); @@ -281,13 +251,13 @@ void COrderingsTest::testFirstGreater() CPPUNIT_ASSERT(!greater(std::make_pair(1.0, 2.0), std::make_pair(1.0, 1.0))); CPPUNIT_ASSERT(!greater(2.0, std::make_pair(2.0, 1.0))); CPPUNIT_ASSERT(!greater(std::make_pair(1.0, 2.0), 1.0)); - CPPUNIT_ASSERT( greater(std::make_pair(2.0, 2.0), std::make_pair(1.0, 3.0))); - CPPUNIT_ASSERT( greater(2.0, std::make_pair(1.0, 1.0))); - CPPUNIT_ASSERT( greater(std::make_pair(2.0, 2.0), 1.0)); + CPPUNIT_ASSERT(greater(std::make_pair(2.0, 2.0), std::make_pair(1.0, 3.0))); + CPPUNIT_ASSERT(greater(2.0, std::make_pair(1.0, 1.0))); + CPPUNIT_ASSERT(greater(std::make_pair(2.0, 2.0), 1.0)); CPPUNIT_ASSERT(!greater(std::make_pair(std::make_pair(1.0, 1.0), 1.0), std::make_pair(std::make_pair(1.0, 2.0), 1.0))); CPPUNIT_ASSERT(!greater(std::make_pair(std::make_pair(1.0, 1.0), 1.0), std::make_pair(std::make_pair(1.0, 1.0), 1.0))); - CPPUNIT_ASSERT( greater(std::make_pair(std::make_pair(1.0, 2.0), 1.0), std::make_pair(std::make_pair(1.0, 1.0), 1.0))); + CPPUNIT_ASSERT(greater(std::make_pair(std::make_pair(1.0, 2.0), 1.0), std::make_pair(std::make_pair(1.0, 1.0), 1.0))); double one(1.0); double two(2.0); @@ -299,13 +269,12 @@ void COrderingsTest::testFirstGreater() CPPUNIT_ASSERT(!greater(std::make_pair(&one, &two), std::make_pair(&one, &one))); CPPUNIT_ASSERT(!greater(&two, std::make_pair(&two, &one))); CPPUNIT_ASSERT(!greater(std::make_pair(&one, &two), &one)); - CPPUNIT_ASSERT( greater(std::make_pair(&two, &two), std::make_pair(&one, &three))); - CPPUNIT_ASSERT( greater(&two, std::make_pair(&one, &two))); - CPPUNIT_ASSERT( greater(std::make_pair(&two, &one), &one)); + CPPUNIT_ASSERT(greater(std::make_pair(&two, &two), std::make_pair(&one, &three))); + CPPUNIT_ASSERT(greater(&two, std::make_pair(&one, &two))); + CPPUNIT_ASSERT(greater(std::make_pair(&two, &one), &one)); } -void COrderingsTest::testSecondLess() -{ +void COrderingsTest::testSecondLess() { LOG_DEBUG("+----------------------------------+"); LOG_DEBUG("| COrderingsTest::testSecondLess |"); LOG_DEBUG("+----------------------------------+"); @@ -318,15 +287,15 @@ void COrderingsTest::testSecondLess() CPPUNIT_ASSERT(!less(std::make_pair(1.0, 1.0), std::make_pair(2.0, 1.0))); CPPUNIT_ASSERT(!less(1.0, std::make_pair(2.0, 1.0))); CPPUNIT_ASSERT(!less(std::make_pair(1.0, 2.0), 2.0)); - CPPUNIT_ASSERT( less(std::make_pair(2.0, 2.0), std::make_pair(1.0, 3.0))); - CPPUNIT_ASSERT( less(2.0, std::make_pair(1.0, 3.0))); - CPPUNIT_ASSERT( less(std::make_pair(2.0, 1.0), 2.0)); + CPPUNIT_ASSERT(less(std::make_pair(2.0, 2.0), std::make_pair(1.0, 3.0))); + CPPUNIT_ASSERT(less(2.0, std::make_pair(1.0, 3.0))); + CPPUNIT_ASSERT(less(std::make_pair(2.0, 1.0), 2.0)); - CPPUNIT_ASSERT( less(std::make_pair(1.0, std::make_pair(1.0, 2.0)), std::make_pair(2.0, std::make_pair(2.0, 1.0)))); + CPPUNIT_ASSERT(less(std::make_pair(1.0, std::make_pair(1.0, 2.0)), std::make_pair(2.0, std::make_pair(2.0, 1.0)))); CPPUNIT_ASSERT(!less(std::make_pair(1.0, std::make_pair(1.0, 2.0)), std::make_pair(2.0, std::make_pair(1.0, 2.0)))); CPPUNIT_ASSERT(!less(std::make_pair(1.0, std::make_pair(2.0, 2.0)), std::make_pair(2.0, std::make_pair(2.0, 1.0)))); - CPPUNIT_ASSERT( less(std::make_pair(1.0, 1.0), std::make_pair(3.0, std::make_pair(1.0, 2.0)))); - CPPUNIT_ASSERT( less(std::make_pair(1.0, std::make_pair(3.0, 1.0)), std::make_pair(3.0, 2.0))); + CPPUNIT_ASSERT(less(std::make_pair(1.0, 1.0), std::make_pair(3.0, std::make_pair(1.0, 2.0)))); + CPPUNIT_ASSERT(less(std::make_pair(1.0, std::make_pair(3.0, 1.0)), std::make_pair(3.0, 2.0))); double one(1.0); double two(2.0); @@ -338,22 +307,21 @@ void COrderingsTest::testSecondLess() CPPUNIT_ASSERT(!less(std::make_pair(&one, &one), std::make_pair(&two, &one))); CPPUNIT_ASSERT(!less(&one, std::make_pair(&two, &one))); CPPUNIT_ASSERT(!less(std::make_pair(&one, &two), &two)); - CPPUNIT_ASSERT( less(std::make_pair(&two, &two), std::make_pair(&one, &three))); - CPPUNIT_ASSERT( less(&two, std::make_pair(&one, &three))); - CPPUNIT_ASSERT( less(std::make_pair(&two, &one), &two)); + CPPUNIT_ASSERT(less(std::make_pair(&two, &two), std::make_pair(&one, &three))); + CPPUNIT_ASSERT(less(&two, std::make_pair(&one, &three))); + CPPUNIT_ASSERT(less(std::make_pair(&two, &one), &two)); } -void COrderingsTest::testSecondGreater() -{ +void COrderingsTest::testSecondGreater() { LOG_DEBUG("+-------------------------------------+"); LOG_DEBUG("| COrderingsTest::testSecondGreater |"); LOG_DEBUG("+-------------------------------------+"); maths::COrderings::SSecondGreater greater; - CPPUNIT_ASSERT( greater(std::make_pair(1.0, 2.0), std::make_pair(2.0, 1.0))); - CPPUNIT_ASSERT( greater(2.0, std::make_pair(3.0, 1.0))); - CPPUNIT_ASSERT( greater(std::make_pair(1.0, 3.0), 2.0)); + CPPUNIT_ASSERT(greater(std::make_pair(1.0, 2.0), std::make_pair(2.0, 1.0))); + CPPUNIT_ASSERT(greater(2.0, std::make_pair(3.0, 1.0))); + CPPUNIT_ASSERT(greater(std::make_pair(1.0, 3.0), 2.0)); CPPUNIT_ASSERT(!greater(std::make_pair(1.0, 1.0), std::make_pair(2.0, 1.0))); CPPUNIT_ASSERT(!greater(1.0, std::make_pair(2.0, 1.0))); CPPUNIT_ASSERT(!greater(std::make_pair(1.0, 2.0), 2.0)); @@ -361,19 +329,19 @@ void COrderingsTest::testSecondGreater() CPPUNIT_ASSERT(!greater(2.0, std::make_pair(1.0, 3.0))); CPPUNIT_ASSERT(!greater(std::make_pair(2.0, 1.0), 2.0)); - CPPUNIT_ASSERT( greater(std::make_pair(1.0, std::make_pair(2.0, 2.0)), std::make_pair(2.0, std::make_pair(2.0, 1.0)))); + CPPUNIT_ASSERT(greater(std::make_pair(1.0, std::make_pair(2.0, 2.0)), std::make_pair(2.0, std::make_pair(2.0, 1.0)))); CPPUNIT_ASSERT(!greater(std::make_pair(1.0, std::make_pair(2.0, 2.0)), std::make_pair(2.0, std::make_pair(2.0, 2.0)))); CPPUNIT_ASSERT(!greater(std::make_pair(1.0, std::make_pair(2.0, 2.0)), std::make_pair(2.0, std::make_pair(2.0, 3.0)))); - CPPUNIT_ASSERT( greater(std::make_pair(2.0, 2.0), std::make_pair(3.0, std::make_pair(1.0, 2.0)))); - CPPUNIT_ASSERT( greater(std::make_pair(1.0, std::make_pair(3.0, 3.0)), std::make_pair(3.0, 2.0))); + CPPUNIT_ASSERT(greater(std::make_pair(2.0, 2.0), std::make_pair(3.0, std::make_pair(1.0, 2.0)))); + CPPUNIT_ASSERT(greater(std::make_pair(1.0, std::make_pair(3.0, 3.0)), std::make_pair(3.0, 2.0))); double one(1.0); double two(2.0); double three(3.0); - CPPUNIT_ASSERT( greater(std::make_pair(&one, &two), std::make_pair(&two, &one))); - CPPUNIT_ASSERT( greater(&two, std::make_pair(&three, &one))); - CPPUNIT_ASSERT( greater(std::make_pair(&one, &three), &two)); + CPPUNIT_ASSERT(greater(std::make_pair(&one, &two), std::make_pair(&two, &one))); + CPPUNIT_ASSERT(greater(&two, std::make_pair(&three, &one))); + CPPUNIT_ASSERT(greater(std::make_pair(&one, &three), &two)); CPPUNIT_ASSERT(!greater(std::make_pair(&one, &one), std::make_pair(&two, &one))); CPPUNIT_ASSERT(!greater(&one, std::make_pair(&two, &one))); CPPUNIT_ASSERT(!greater(std::make_pair(&one, &two), &two)); @@ -382,8 +350,7 @@ void COrderingsTest::testSecondGreater() CPPUNIT_ASSERT(!greater(std::make_pair(&two, &two), &two)); } -void COrderingsTest::testDereference() -{ +void COrderingsTest::testDereference() { LOG_DEBUG("+-----------------------------------+"); LOG_DEBUG("| COrderingsTest::testDereference |"); LOG_DEBUG("+-----------------------------------+"); @@ -392,26 +359,22 @@ void COrderingsTest::testDereference() using TDoubleVecCItr = std::vector::const_iterator; using TDoubleVecCItrVec = std::vector; - double values_[] = { 10.0, 1.0, 5.0, 3.0, 1.0 }; + double values_[] = {10.0, 1.0, 5.0, 3.0, 1.0}; TDoubleVec values(boost::begin(values_), boost::end(values_)); TDoubleVecCItrVec iterators; - for (TDoubleVecCItr i = values.begin(); i != values.end(); ++i) - { + for (TDoubleVecCItr i = values.begin(); i != values.end(); ++i) { iterators.push_back(i); } - std::sort(iterators.begin(), iterators.end(), - core::CFunctional::SDereference()); + std::sort(iterators.begin(), iterators.end(), core::CFunctional::SDereference()); std::sort(boost::begin(values_), boost::end(values_)); - for (std::size_t i = 0u; i < boost::size(values); ++i) - { + for (std::size_t i = 0u; i < boost::size(values); ++i) { LOG_DEBUG("expected " << values_[i] << ", got " << *iterators[i]); CPPUNIT_ASSERT_EQUAL(values_[i], *iterators[i]); } } -void COrderingsTest::testLexicographicalCompare() -{ +void COrderingsTest::testLexicographicalCompare() { LOG_DEBUG("+----------------------------------------------+"); LOG_DEBUG("| COrderingsTest::testLexicographicalCompare |"); LOG_DEBUG("+----------------------------------------------+"); @@ -431,8 +394,8 @@ void COrderingsTest::testLexicographicalCompare() TDoubleDoublePr p2(1.2, 1.5); TDoubleVec v1, v2; - double a1[] = { 1.2, 1.3, 2.1 }; - double a2[] = { 1.2, 2.2, 2.0 }; + double a1[] = {1.2, 1.3, 2.1}; + double a2[] = {1.2, 2.2, 2.0}; v1.assign(boost::begin(a1), boost::end(a1)); v2.assign(boost::begin(a2), boost::end(a2)); @@ -445,27 +408,27 @@ void COrderingsTest::testLexicographicalCompare() CPPUNIT_ASSERT(v1 < v2); CPPUNIT_ASSERT(s1 < s2); - CPPUNIT_ASSERT( maths::COrderings::lexicographical_compare(i1, i2)); + CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, i2)); CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, i1)); CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i2, i1)); CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, i2, greater)); CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, i1, greater)); - CPPUNIT_ASSERT( maths::COrderings::lexicographical_compare(i2, i1, greater)); + CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i2, i1, greater)); - CPPUNIT_ASSERT( maths::COrderings::lexicographical_compare(i1, p1, i2, p1)); - CPPUNIT_ASSERT( maths::COrderings::lexicographical_compare(i1, p1, i1, p2)); + CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p1, i2, p1)); + CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p1, i1, p2)); CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, p1, i1, p1)); CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i2, p1, i1, p1)); CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, p2, i1, p1)); CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, p1, i2, p1, greater)); CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, p1, i1, p2, greater)); CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, p1, i1, p1, greater)); - CPPUNIT_ASSERT( maths::COrderings::lexicographical_compare(i2, p1, i1, p1, greater)); - CPPUNIT_ASSERT( maths::COrderings::lexicographical_compare(i1, p2, i1, p1, greater)); + CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i2, p1, i1, p1, greater)); + CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p2, i1, p1, greater)); - CPPUNIT_ASSERT( maths::COrderings::lexicographical_compare(i1, p1, d1, i2, p1, d1)); - CPPUNIT_ASSERT( maths::COrderings::lexicographical_compare(i1, p1, d1, i1, p2, d1)); - CPPUNIT_ASSERT( maths::COrderings::lexicographical_compare(i1, p1, d1, i1, p1, d2)); + CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p1, d1, i2, p1, d1)); + CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p1, d1, i1, p2, d1)); + CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p1, d1, i1, p1, d2)); CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, p1, d1, i1, p1, d1)); CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i2, p1, d1, i1, p1, d1)); CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, p2, d1, i1, p1, d1)); @@ -474,14 +437,14 @@ void COrderingsTest::testLexicographicalCompare() CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, p1, d1, i1, p2, d1, greater)); CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, p1, d1, i1, p1, d2, greater)); CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, p1, d1, i1, p1, d1, greater)); - CPPUNIT_ASSERT( maths::COrderings::lexicographical_compare(i2, p1, d1, i1, p1, d1, greater)); - CPPUNIT_ASSERT( maths::COrderings::lexicographical_compare(i1, p2, d1, i1, p1, d1, greater)); - CPPUNIT_ASSERT( maths::COrderings::lexicographical_compare(i1, p1, d2, i1, p1, d1, greater)); - - CPPUNIT_ASSERT( maths::COrderings::lexicographical_compare(i1, p1, d1, v1, i2, p1, d1, v1)); - CPPUNIT_ASSERT( maths::COrderings::lexicographical_compare(i1, p1, d1, v1, i1, p2, d1, v1)); - CPPUNIT_ASSERT( maths::COrderings::lexicographical_compare(i1, p1, d1, v1, i1, p1, d2, v1)); - CPPUNIT_ASSERT( maths::COrderings::lexicographical_compare(i1, p1, d1, v1, i1, p1, d1, v2)); + CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i2, p1, d1, i1, p1, d1, greater)); + CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p2, d1, i1, p1, d1, greater)); + CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p1, d2, i1, p1, d1, greater)); + + CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p1, d1, v1, i2, p1, d1, v1)); + CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p1, d1, v1, i1, p2, d1, v1)); + CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p1, d1, v1, i1, p1, d2, v1)); + CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p1, d1, v1, i1, p1, d1, v2)); CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, p1, d1, v1, i1, p1, d1, v1)); CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i2, p1, d1, v1, i1, p1, d1, v1)); CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, p2, d1, v1, i1, p1, d1, v1)); @@ -492,16 +455,16 @@ void COrderingsTest::testLexicographicalCompare() CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, p1, d1, v1, i1, p1, d2, v1, greater)); CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, p1, d1, v1, i1, p1, d1, v2, greater)); CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, p1, d1, v1, i1, p1, d1, v1, greater)); - CPPUNIT_ASSERT( maths::COrderings::lexicographical_compare(i2, p1, d1, v1, i1, p1, d1, v1, greater)); - CPPUNIT_ASSERT( maths::COrderings::lexicographical_compare(i1, p2, d1, v1, i1, p1, d1, v1, greater)); - CPPUNIT_ASSERT( maths::COrderings::lexicographical_compare(i1, p1, d2, v1, i1, p1, d1, v1, greater)); - CPPUNIT_ASSERT( maths::COrderings::lexicographical_compare(i1, p1, d1, v2, i1, p1, d1, v1, greater)); - - CPPUNIT_ASSERT( maths::COrderings::lexicographical_compare(i1, p1, d1, v1, s1, i2, p1, d1, v1, s1)); - CPPUNIT_ASSERT( maths::COrderings::lexicographical_compare(i1, p1, d1, v1, s1, i1, p2, d1, v1, s1)); - CPPUNIT_ASSERT( maths::COrderings::lexicographical_compare(i1, p1, d1, v1, s1, i1, p1, d2, v1, s1)); - CPPUNIT_ASSERT( maths::COrderings::lexicographical_compare(i1, p1, d1, v1, s1, i1, p1, d1, v2, s1)); - CPPUNIT_ASSERT( maths::COrderings::lexicographical_compare(i1, p1, d1, v1, s1, i1, p1, d1, v1, s2)); + CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i2, p1, d1, v1, i1, p1, d1, v1, greater)); + CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p2, d1, v1, i1, p1, d1, v1, greater)); + CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p1, d2, v1, i1, p1, d1, v1, greater)); + CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p1, d1, v2, i1, p1, d1, v1, greater)); + + CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p1, d1, v1, s1, i2, p1, d1, v1, s1)); + CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p1, d1, v1, s1, i1, p2, d1, v1, s1)); + CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p1, d1, v1, s1, i1, p1, d2, v1, s1)); + CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p1, d1, v1, s1, i1, p1, d1, v2, s1)); + CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p1, d1, v1, s1, i1, p1, d1, v1, s2)); CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, p1, d1, v1, s1, i1, p1, d1, v1, s1)); CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i2, p1, d1, v1, s1, i1, p1, d1, v1, s1)); CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, p2, d1, v1, s1, i1, p1, d1, v1, s1)); @@ -514,15 +477,14 @@ void COrderingsTest::testLexicographicalCompare() CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, p1, d1, v1, s1, i1, p1, d1, v2, s1, greater)); CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, p1, d1, v1, s1, i1, p1, d1, v1, s2, greater)); CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, p1, d1, v1, s1, i1, p1, d1, v1, s1, greater)); - CPPUNIT_ASSERT( maths::COrderings::lexicographical_compare(i2, p1, d1, v1, s1, i1, p1, d1, v1, s1, greater)); - CPPUNIT_ASSERT( maths::COrderings::lexicographical_compare(i1, p2, d1, v1, s1, i1, p1, d1, v1, s1, greater)); - CPPUNIT_ASSERT( maths::COrderings::lexicographical_compare(i1, p1, d2, v1, s1, i1, p1, d1, v1, s1, greater)); - CPPUNIT_ASSERT( maths::COrderings::lexicographical_compare(i1, p1, d1, v2, s1, i1, p1, d1, v1, s1, greater)); - CPPUNIT_ASSERT( maths::COrderings::lexicographical_compare(i1, p1, d1, v1, s2, i1, p1, d1, v1, s1, greater)); + CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i2, p1, d1, v1, s1, i1, p1, d1, v1, s1, greater)); + CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p2, d1, v1, s1, i1, p1, d1, v1, s1, greater)); + CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p1, d2, v1, s1, i1, p1, d1, v1, s1, greater)); + CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p1, d1, v2, s1, i1, p1, d1, v1, s1, greater)); + CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p1, d1, v1, s2, i1, p1, d1, v1, s1, greater)); } -void COrderingsTest::testSimultaneousSort() -{ +void COrderingsTest::testSimultaneousSort() { LOG_DEBUG("+----------------------------------------+"); LOG_DEBUG("| COrderingsTest::testSimultaneousSort |"); LOG_DEBUG("+----------------------------------------+"); @@ -538,13 +500,9 @@ void COrderingsTest::testSimultaneousSort() using TDoubleDoubleTupleMap = std::map; { - TDoubleVec keys{ 0.0, 1.0, 0.2, 1.1, 0.7, 5.0 }; - TStrVec values{std::string(1, 'c'), - std::string(1, 'q'), - std::string(1, '!'), - std::string(1, 'a'), - std::string(1, 'z'), - std::string(1, 'p')}; + TDoubleVec keys{0.0, 1.0, 0.2, 1.1, 0.7, 5.0}; + TStrVec values{ + std::string(1, 'c'), std::string(1, 'q'), std::string(1, '!'), std::string(1, 'a'), std::string(1, 'z'), std::string(1, 'p')}; std::string expectedKeys("[0, 0.2, 0.7, 1, 1.1, 5]"); std::string expectedValues("[c, !, z, q, a, p]"); @@ -560,19 +518,10 @@ void COrderingsTest::testSimultaneousSort() CPPUNIT_ASSERT_EQUAL(expectedValues, core::CContainerPrinter::print(values)); } { - TDouble1Vec keys{ 7.0, 1.0, 0.2, 1.1, 0.7, 5.0 }; - TStrVec values1{std::string(1, 'w'), - std::string(1, 'q'), - std::string(1, '~'), - std::string(1, 'e'), - std::string(1, ';'), - std::string(1, 'y')}; - TDoubleDoublePrVec values2{{2.0, 1.0}, - {2.1, 1.1}, - {1.3, 1.9}, - {3.2, 12.9}, - {1.2, 10.1}, - {1.3, 6.2}}; + TDouble1Vec keys{7.0, 1.0, 0.2, 1.1, 0.7, 5.0}; + TStrVec values1{ + std::string(1, 'w'), std::string(1, 'q'), std::string(1, '~'), std::string(1, 'e'), std::string(1, ';'), std::string(1, 'y')}; + TDoubleDoublePrVec values2{{2.0, 1.0}, {2.1, 1.1}, {1.3, 1.9}, {3.2, 12.9}, {1.2, 10.1}, {1.3, 6.2}}; std::string expectedKeys("[0.2, 0.7, 1, 1.1, 5, 7]"); std::string expectedValues1("[~, ;, q, e, y, w]"); @@ -588,7 +537,7 @@ void COrderingsTest::testSimultaneousSort() } test::CRandomNumbers rng; { - TDoubleVec keys{ 7.1, 0.1, 0.9, 1.4, 0.7, 5.1, 80.0, 4.0 }; + TDoubleVec keys{7.1, 0.1, 0.9, 1.4, 0.7, 5.1, 80.0, 4.0}; TStrVec values1{std::string("a1"), std::string("23"), std::string("~1"), @@ -597,19 +546,11 @@ void COrderingsTest::testSimultaneousSort() std::string("zz"), std::string("sss"), std::string("pq")}; - TDoubleDoublePrVec values2{{1.0, 1.0}, - {4.1, 1.1}, - {5.3, 3.9}, - {7.2, 22.9}, - {2.2, 1.1}, - {0.3, 16.2}, - {21.2, 11.1}, - {10.3, 13.2}}; + TDoubleDoublePrVec values2{{1.0, 1.0}, {4.1, 1.1}, {5.3, 3.9}, {7.2, 22.9}, {2.2, 1.1}, {0.3, 16.2}, {21.2, 11.1}, {10.3, 13.2}}; TStrVec rawWords; rng.generateWords(5, keys.size() * 5, rawWords); TDictionaryVec values3; - for (std::size_t i = 0u; i < rawWords.size(); i += 5) - { + for (std::size_t i = 0u; i < rawWords.size(); i += 5) { TStrVec words(rawWords.begin() + i, rawWords.begin() + i + 5); values3.push_back(CDictionary(words)); } @@ -626,8 +567,7 @@ void COrderingsTest::testSimultaneousSort() CPPUNIT_ASSERT_EQUAL(expectedKeys, core::CContainerPrinter::print(keys)); CPPUNIT_ASSERT_EQUAL(expectedValues1, core::CContainerPrinter::print(values1)); CPPUNIT_ASSERT_EQUAL(expectedValues2, core::CContainerPrinter::print(values2)); - for (const auto &value : values3) - { + for (const auto& value : values3) { CPPUNIT_ASSERT_EQUAL(std::size_t(0), value.ms_Copies); } @@ -639,16 +579,15 @@ void COrderingsTest::testSimultaneousSort() CPPUNIT_ASSERT_EQUAL(expectedKeys, core::CContainerPrinter::print(keys)); CPPUNIT_ASSERT_EQUAL(expectedValues1, core::CContainerPrinter::print(values1)); CPPUNIT_ASSERT_EQUAL(expectedValues2, core::CContainerPrinter::print(values2)); - for (const auto &value : values3) - { + for (const auto& value : values3) { CPPUNIT_ASSERT_EQUAL(std::size_t(0), value.ms_Copies); } } { - TDoubleVec values1{ 5.0, 4.0, 3.0, 2.0, 1.0 }; - TDoubleVec values2{ 1.0, 3.0, 2.0, 5.0, 4.0 }; - TDoubleVec values3{ 4.0, 2.0, 3.0, 3.0, 5.0 }; - TDoubleVec values4{ 2.0, 1.0, 5.0, 4.0, 1.0 }; + TDoubleVec values1{5.0, 4.0, 3.0, 2.0, 1.0}; + TDoubleVec values2{1.0, 3.0, 2.0, 5.0, 4.0}; + TDoubleVec values3{4.0, 2.0, 3.0, 3.0, 5.0}; + TDoubleVec values4{2.0, 1.0, 5.0, 4.0, 1.0}; TDoubleRangeVec range1{values1, 1, 4}; TDoubleRangeVec range2{values2, 1, 4}; TDoubleRangeVec range3{values3, 1, 4}; @@ -682,8 +621,7 @@ void COrderingsTest::testSimultaneousSort() CPPUNIT_ASSERT_EQUAL(std::string("[2, 1, 4, 5, 1]"), core::CContainerPrinter::print(values4)); } { - for (std::size_t i = 0u; i < 50; ++i) - { + for (std::size_t i = 0u; i < 50; ++i) { TDoubleVec raw; rng.generateUniformSamples(0.0, 10.0, 50, raw); @@ -694,8 +632,7 @@ void COrderingsTest::testSimultaneousSort() TDoubleVec values4(raw.begin() + 40, raw.begin() + 50); TDoubleDoubleTupleMap expected; - for (std::size_t j = 0u; j < 10; ++j) - { + for (std::size_t j = 0u; j < 10; ++j) { expected[keys[j]] = TDoubleTuple(values1[j], values2[j], values3[j], values4[j]); } @@ -703,8 +640,7 @@ void COrderingsTest::testSimultaneousSort() LOG_DEBUG("keys = " << core::CContainerPrinter::print(keys)); auto itr = expected.begin(); - for (std::size_t j = 0u; j < keys.size(); ++j, ++itr) - { + for (std::size_t j = 0u; j < keys.size(); ++j, ++itr) { CPPUNIT_ASSERT_EQUAL(itr->first, keys[j]); CPPUNIT_ASSERT_EQUAL(itr->second.get<0>(), values1[j]); CPPUNIT_ASSERT_EQUAL(itr->second.get<1>(), values2[j]); @@ -715,40 +651,22 @@ void COrderingsTest::testSimultaneousSort() } } -CppUnit::Test *COrderingsTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("COrderingsTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "COrderingsTest::testOptionalOrdering", - &COrderingsTest::testOptionalOrdering) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "COrderingsTest::testPtrOrdering", - &COrderingsTest::testPtrOrdering) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "COrderingsTest::testLess", - &COrderingsTest::testLess) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "COrderingsTest::testFirstLess", - &COrderingsTest::testFirstLess) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "COrderingsTest::testFirstGreater", - &COrderingsTest::testFirstGreater) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "COrderingsTest::testSecondLess", - &COrderingsTest::testSecondLess) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "COrderingsTest::testSecondGreater", - &COrderingsTest::testSecondGreater) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "COrderingsTest::testDereference", - &COrderingsTest::testDereference) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "COrderingsTest::testLexicographicalCompare", - &COrderingsTest::testLexicographicalCompare) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "COrderingsTest::testSimultaneousSort", - &COrderingsTest::testSimultaneousSort) ); +CppUnit::Test* COrderingsTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("COrderingsTest"); + + suiteOfTests->addTest( + new CppUnit::TestCaller("COrderingsTest::testOptionalOrdering", &COrderingsTest::testOptionalOrdering)); + suiteOfTests->addTest(new CppUnit::TestCaller("COrderingsTest::testPtrOrdering", &COrderingsTest::testPtrOrdering)); + suiteOfTests->addTest(new CppUnit::TestCaller("COrderingsTest::testLess", &COrderingsTest::testLess)); + suiteOfTests->addTest(new CppUnit::TestCaller("COrderingsTest::testFirstLess", &COrderingsTest::testFirstLess)); + suiteOfTests->addTest(new CppUnit::TestCaller("COrderingsTest::testFirstGreater", &COrderingsTest::testFirstGreater)); + suiteOfTests->addTest(new CppUnit::TestCaller("COrderingsTest::testSecondLess", &COrderingsTest::testSecondLess)); + suiteOfTests->addTest(new CppUnit::TestCaller("COrderingsTest::testSecondGreater", &COrderingsTest::testSecondGreater)); + suiteOfTests->addTest(new CppUnit::TestCaller("COrderingsTest::testDereference", &COrderingsTest::testDereference)); + suiteOfTests->addTest( + new CppUnit::TestCaller("COrderingsTest::testLexicographicalCompare", &COrderingsTest::testLexicographicalCompare)); + suiteOfTests->addTest( + new CppUnit::TestCaller("COrderingsTest::testSimultaneousSort", &COrderingsTest::testSimultaneousSort)); return suiteOfTests; } diff --git a/lib/maths/unittest/COrderingsTest.h b/lib/maths/unittest/COrderingsTest.h index ae697a5e68..5e5e39afd7 100644 --- a/lib/maths/unittest/COrderingsTest.h +++ b/lib/maths/unittest/COrderingsTest.h @@ -9,21 +9,20 @@ #include -class COrderingsTest : public CppUnit::TestFixture -{ - public: - void testOptionalOrdering(); - void testPtrOrdering(); - void testLess(); - void testFirstLess(); - void testFirstGreater(); - void testSecondLess(); - void testSecondGreater(); - void testDereference(); - void testLexicographicalCompare(); - void testSimultaneousSort(); +class COrderingsTest : public CppUnit::TestFixture { +public: + void testOptionalOrdering(); + void testPtrOrdering(); + void testLess(); + void testFirstLess(); + void testFirstGreater(); + void testSecondLess(); + void testSecondGreater(); + void testDereference(); + void testLexicographicalCompare(); + void testSimultaneousSort(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_COrderingsTest_h diff --git a/lib/maths/unittest/COrdinalTest.cc b/lib/maths/unittest/COrdinalTest.cc index d32e9ca5b5..b82e74a184 100644 --- a/lib/maths/unittest/COrdinalTest.cc +++ b/lib/maths/unittest/COrdinalTest.cc @@ -22,23 +22,19 @@ using namespace ml; -namespace -{ +namespace { using TDoubleVec = std::vector; template -std::string precisePrint(T x) -{ +std::string precisePrint(T x) { std::ostringstream o; o << std::setprecision(18) << x; return o.str(); } - } -void COrdinalTest::testEqual() -{ +void COrdinalTest::testEqual() { LOG_DEBUG(""); LOG_DEBUG("+---------------------------+"); LOG_DEBUG("| COrdinalTest::testEqual |"); @@ -46,8 +42,7 @@ void COrdinalTest::testEqual() test::CRandomNumbers rng; - for (std::size_t i = 0u; i < 1000; ++i) - { + for (std::size_t i = 0u; i < 1000; ++i) { TDoubleVec sample; rng.generateUniformSamples(-10000.0, 10000.0, 1, sample); bool equal = maths::COrdinal(static_cast(sample[0])) == maths::COrdinal(static_cast(sample[0])); @@ -56,8 +51,7 @@ void COrdinalTest::testEqual() CPPUNIT_ASSERT_EQUAL(true, equal); equal = maths::COrdinal(sample[0]) == maths::COrdinal(sample[0]); CPPUNIT_ASSERT_EQUAL(true, equal); - if (sample[0] >= 0.0) - { + if (sample[0] >= 0.0) { equal = maths::COrdinal(static_cast(sample[0])) == maths::COrdinal(std::floor(sample[0])); CPPUNIT_ASSERT_EQUAL(true, equal); equal = maths::COrdinal(std::floor(sample[0])) == maths::COrdinal(static_cast(sample[0])); @@ -66,9 +60,7 @@ void COrdinalTest::testEqual() CPPUNIT_ASSERT_EQUAL(true, equal); equal = maths::COrdinal(std::floor(sample[0])) == maths::COrdinal(static_cast(sample[0])); CPPUNIT_ASSERT_EQUAL(true, equal); - } - else - { + } else { equal = maths::COrdinal(static_cast(sample[0])) == maths::COrdinal(std::ceil(sample[0])); CPPUNIT_ASSERT_EQUAL(true, equal); equal = maths::COrdinal(std::ceil(sample[0])) == maths::COrdinal(static_cast(sample[0])); @@ -78,36 +70,26 @@ void COrdinalTest::testEqual() // Test doubles outside the integer range. double small = -1e37; - double large = 1e23; + double large = 1e23; CPPUNIT_ASSERT(maths::COrdinal(small) != maths::COrdinal(boost::numeric::bounds::lowest())); CPPUNIT_ASSERT(maths::COrdinal(large) != maths::COrdinal(boost::numeric::bounds::highest())); CPPUNIT_ASSERT(maths::COrdinal(boost::numeric::bounds::lowest()) != maths::COrdinal(small)); CPPUNIT_ASSERT(maths::COrdinal(boost::numeric::bounds::highest()) != maths::COrdinal(large)); // Check some integer values which can't be represented as doubles. - maths::COrdinal s1[] = - { - maths::COrdinal(int64_t(-179809067369808278)), - maths::COrdinal(int64_t(-179809067369808277)), - maths::COrdinal(int64_t( 569817345679111267)), - maths::COrdinal(int64_t( 569817345679111268)) - }; - maths::COrdinal s2[] = - { - maths::COrdinal(uint64_t( 569817345679111267)), - maths::COrdinal(uint64_t( 569817345679111268)) - }; - for (std::size_t i = 0u; i < boost::size(s1); ++i) - { + maths::COrdinal s1[] = {maths::COrdinal(int64_t(-179809067369808278)), + maths::COrdinal(int64_t(-179809067369808277)), + maths::COrdinal(int64_t(569817345679111267)), + maths::COrdinal(int64_t(569817345679111268))}; + maths::COrdinal s2[] = {maths::COrdinal(uint64_t(569817345679111267)), maths::COrdinal(uint64_t(569817345679111268))}; + for (std::size_t i = 0u; i < boost::size(s1); ++i) { LOG_DEBUG(s1[i] << " (as double " << precisePrint(s1[i].asDouble()) << ")"); - for (std::size_t j = 0u; j < i; ++j) - { + for (std::size_t j = 0u; j < i; ++j) { CPPUNIT_ASSERT(s1[i] != s1[j]); } CPPUNIT_ASSERT(s1[i] == s1[i]); CPPUNIT_ASSERT(s1[i] != maths::COrdinal(s1[i].asDouble())); - for (std::size_t j = i+1; j < boost::size(s1); ++j) - { + for (std::size_t j = i + 1; j < boost::size(s1); ++j) { CPPUNIT_ASSERT(s1[i] != s1[j]); } } @@ -117,8 +99,7 @@ void COrdinalTest::testEqual() CPPUNIT_ASSERT(s2[1] != s1[0]); } -void COrdinalTest::testLess() -{ +void COrdinalTest::testLess() { LOG_DEBUG(""); LOG_DEBUG("+--------------------------+"); LOG_DEBUG("| COrdinalTest::testLess |"); @@ -128,41 +109,35 @@ void COrdinalTest::testLess() // Test some random orderings on integer types which don't overflow. - for (std::size_t i = 0u; i < 1000; ++i) - { + for (std::size_t i = 0u; i < 1000; ++i) { TDoubleVec samples; rng.generateUniformSamples(-10000.0, 10000.0, 2, samples); - bool less = static_cast(samples[0]) < static_cast(samples[1]); + bool less = static_cast(samples[0]) < static_cast(samples[1]); bool ordinalLess = maths::COrdinal(static_cast(samples[0])) < maths::COrdinal(static_cast(samples[1])); CPPUNIT_ASSERT_EQUAL(less, ordinalLess); - if (samples[0] >= 0.0) - { - less = static_cast(samples[0]) < static_cast(samples[1]); + if (samples[0] >= 0.0) { + less = static_cast(samples[0]) < static_cast(samples[1]); ordinalLess = maths::COrdinal(static_cast(samples[0])) < maths::COrdinal(static_cast(samples[1])); } - if (samples[1] >= 0.0) - { - less = static_cast(samples[0]) < static_cast(samples[1]); + if (samples[1] >= 0.0) { + less = static_cast(samples[0]) < static_cast(samples[1]); ordinalLess = maths::COrdinal(static_cast(samples[0])) < maths::COrdinal(static_cast(samples[1])); } - if (samples[0] >= 0.0 && samples[1] >= 0.0) - { - less = static_cast(samples[0]) < static_cast(samples[1]); + if (samples[0] >= 0.0 && samples[1] >= 0.0) { + less = static_cast(samples[0]) < static_cast(samples[1]); ordinalLess = maths::COrdinal(static_cast(samples[0])) < maths::COrdinal(static_cast(samples[1])); } - less = static_cast(static_cast(samples[0])) < samples[1]; + less = static_cast(static_cast(samples[0])) < samples[1]; ordinalLess = maths::COrdinal(static_cast(samples[0])) < maths::COrdinal(samples[1]); - less = samples[0] < static_cast(static_cast(samples[1])); + less = samples[0] < static_cast(static_cast(samples[1])); ordinalLess = maths::COrdinal(samples[0]) < maths::COrdinal(static_cast(samples[1])); - less = samples[0] < samples[1]; - if (samples[0] >= 0.0) - { - less = static_cast(static_cast(samples[0])) < samples[1]; + less = samples[0] < samples[1]; + if (samples[0] >= 0.0) { + less = static_cast(static_cast(samples[0])) < samples[1]; ordinalLess = maths::COrdinal(static_cast(samples[0])) < maths::COrdinal(samples[1]); } - if (samples[1] >= 0.0) - { - less = samples[0] < static_cast(static_cast(samples[1])); + if (samples[1] >= 0.0) { + less = samples[0] < static_cast(static_cast(samples[1])); ordinalLess = maths::COrdinal(samples[0]) < maths::COrdinal(static_cast(samples[1])); } ordinalLess = maths::COrdinal(samples[0]) < maths::COrdinal(samples[1]); @@ -171,47 +146,36 @@ void COrdinalTest::testLess() // Test doubles outside the integer range. double small = -1e37; - double large = 1e23; - CPPUNIT_ASSERT( maths::COrdinal(small) < maths::COrdinal(boost::numeric::bounds::lowest())); + double large = 1e23; + CPPUNIT_ASSERT(maths::COrdinal(small) < maths::COrdinal(boost::numeric::bounds::lowest())); CPPUNIT_ASSERT(!(maths::COrdinal(boost::numeric::bounds::lowest()) < maths::COrdinal(small))); - CPPUNIT_ASSERT( maths::COrdinal(large) > maths::COrdinal(boost::numeric::bounds::highest())); + CPPUNIT_ASSERT(maths::COrdinal(large) > maths::COrdinal(boost::numeric::bounds::highest())); CPPUNIT_ASSERT(!(maths::COrdinal(boost::numeric::bounds::highest()) > maths::COrdinal(large))); - CPPUNIT_ASSERT( maths::COrdinal(large) > maths::COrdinal(boost::numeric::bounds::highest())); + CPPUNIT_ASSERT(maths::COrdinal(large) > maths::COrdinal(boost::numeric::bounds::highest())); CPPUNIT_ASSERT(!(maths::COrdinal(boost::numeric::bounds::highest()) > maths::COrdinal(large))); // Check some integer values which can't be represented as doubles. - maths::COrdinal s1[] = - { - maths::COrdinal(int64_t(-179809067369808278)), - maths::COrdinal(int64_t(-179809067369808277)), - maths::COrdinal(int64_t( 569817345679111267)), - maths::COrdinal(int64_t( 569817345679111268)) - }; - maths::COrdinal s2[] = - { - maths::COrdinal(uint64_t( 569817345679111267)), - maths::COrdinal(uint64_t( 569817345679111268)) - }; - for (std::size_t i = 0u; i < boost::size(s1); ++i) - { + maths::COrdinal s1[] = {maths::COrdinal(int64_t(-179809067369808278)), + maths::COrdinal(int64_t(-179809067369808277)), + maths::COrdinal(int64_t(569817345679111267)), + maths::COrdinal(int64_t(569817345679111268))}; + maths::COrdinal s2[] = {maths::COrdinal(uint64_t(569817345679111267)), maths::COrdinal(uint64_t(569817345679111268))}; + for (std::size_t i = 0u; i < boost::size(s1); ++i) { LOG_DEBUG(s1[i] << " (as double " << precisePrint(s1[i].asDouble()) << ")"); - for (std::size_t j = 0u; j < i; ++j) - { + for (std::size_t j = 0u; j < i; ++j) { CPPUNIT_ASSERT(!(s1[i] < s1[j])); } - for (std::size_t j = i+1; j < boost::size(s1); ++j) - { + for (std::size_t j = i + 1; j < boost::size(s1); ++j) { CPPUNIT_ASSERT(s1[i] < s1[j]); } } - CPPUNIT_ASSERT( s1[2] < s2[1]); + CPPUNIT_ASSERT(s1[2] < s2[1]); CPPUNIT_ASSERT(!(s1[3] < s2[0])); - CPPUNIT_ASSERT( s2[0] < s1[3]); + CPPUNIT_ASSERT(s2[0] < s1[3]); CPPUNIT_ASSERT(!(s2[3] < s1[0])); } -void COrdinalTest::testIsNan() -{ +void COrdinalTest::testIsNan() { LOG_DEBUG(""); LOG_DEBUG("+---------------------------+"); LOG_DEBUG("| COrdinalTest::testIsNan |"); @@ -256,8 +220,7 @@ void COrdinalTest::testIsNan() } } -void COrdinalTest::testAsDouble() -{ +void COrdinalTest::testAsDouble() { LOG_DEBUG(""); LOG_DEBUG("+------------------------------+"); LOG_DEBUG("| COrdinalTest::testAsDouble |"); @@ -267,8 +230,7 @@ void COrdinalTest::testAsDouble() test::CRandomNumbers rng; - for (std::size_t i = 0u; i < 100; ++i) - { + for (std::size_t i = 0u; i < 100; ++i) { TDoubleVec sample; rng.generateUniformSamples(-20000.0, 0.0, 1, sample); maths::COrdinal signedOrdinal(static_cast(sample[0])); @@ -285,24 +247,16 @@ void COrdinalTest::testAsDouble() // Check some integer values which can't be represented as doubles. - int64_t s[] = - { - -179809067369808278, - -179809067369808277, - 569817345679111267, - 569817345679111268 - }; + int64_t s[] = {-179809067369808278, -179809067369808277, 569817345679111267, 569817345679111268}; - for (std::size_t i = 0u; i < boost::size(s); ++i) - { + for (std::size_t i = 0u; i < boost::size(s); ++i) { maths::COrdinal o(s[i]); LOG_DEBUG(o << " (as double " << precisePrint(o.asDouble()) << ")"); CPPUNIT_ASSERT_EQUAL(static_cast(s[i]), o.asDouble()); } } -void COrdinalTest::testHash() -{ +void COrdinalTest::testHash() { LOG_DEBUG(""); LOG_DEBUG("+--------------------------+"); LOG_DEBUG("| COrdinalTest::testHash |"); @@ -318,8 +272,7 @@ void COrdinalTest::testHash() TSizeUSet unsignedHashes; TSizeUSet doubleHashes; - for (std::size_t i = 0u; i < 100; ++i) - { + for (std::size_t i = 0u; i < 100; ++i) { TDoubleVec sample; rng.generateUniformSamples(-20000.0, 0.0, 1, sample); maths::COrdinal signedOrdinal(static_cast(sample[0])); @@ -342,28 +295,15 @@ void COrdinalTest::testHash() CPPUNIT_ASSERT_EQUAL(std::size_t(100), doubleHashes.size()); } -CppUnit::Test *COrdinalTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("COrdinalTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "COrdinalTest::testEqual", - &COrdinalTest::testEqual) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "COrdinalTest::testLess", - &COrdinalTest::testLess) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "COrdinalTest::testLess", - &COrdinalTest::testLess) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "COrdinalTest::testIsNan", - &COrdinalTest::testIsNan) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "COrdinalTest::testAsDouble", - &COrdinalTest::testAsDouble) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "COrdinalTest::testHash", - &COrdinalTest::testHash) ); +CppUnit::Test* COrdinalTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("COrdinalTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("COrdinalTest::testEqual", &COrdinalTest::testEqual)); + suiteOfTests->addTest(new CppUnit::TestCaller("COrdinalTest::testLess", &COrdinalTest::testLess)); + suiteOfTests->addTest(new CppUnit::TestCaller("COrdinalTest::testLess", &COrdinalTest::testLess)); + suiteOfTests->addTest(new CppUnit::TestCaller("COrdinalTest::testIsNan", &COrdinalTest::testIsNan)); + suiteOfTests->addTest(new CppUnit::TestCaller("COrdinalTest::testAsDouble", &COrdinalTest::testAsDouble)); + suiteOfTests->addTest(new CppUnit::TestCaller("COrdinalTest::testHash", &COrdinalTest::testHash)); return suiteOfTests; } diff --git a/lib/maths/unittest/COrdinalTest.h b/lib/maths/unittest/COrdinalTest.h index e390c2da4e..11f5bfe2ec 100644 --- a/lib/maths/unittest/COrdinalTest.h +++ b/lib/maths/unittest/COrdinalTest.h @@ -9,16 +9,15 @@ #include -class COrdinalTest : public CppUnit::TestFixture -{ - public: - void testEqual(); - void testLess(); - void testIsNan(); - void testAsDouble(); - void testHash(); +class COrdinalTest : public CppUnit::TestFixture { +public: + void testEqual(); + void testLess(); + void testIsNan(); + void testAsDouble(); + void testHash(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_COrderingsTest_h diff --git a/lib/maths/unittest/CPRNGTest.cc b/lib/maths/unittest/CPRNGTest.cc index 755630f193..351007fd4f 100644 --- a/lib/maths/unittest/CPRNGTest.cc +++ b/lib/maths/unittest/CPRNGTest.cc @@ -19,8 +19,7 @@ using namespace ml; -void CPRNGTest::testSplitMix64() -{ +void CPRNGTest::testSplitMix64() { LOG_DEBUG("+-----------------------------+"); LOG_DEBUG("| CPRNGTest::testSplitMix64 |"); LOG_DEBUG("+-----------------------------+"); @@ -33,24 +32,22 @@ void CPRNGTest::testSplitMix64() // Test min and max. maths::CBasicStatistics::COrderStatisticsStack min; maths::CBasicStatistics::COrderStatisticsStack> max; - for (std::size_t i = 0u; i < 10000; ++i) - { + for (std::size_t i = 0u; i < 10000; ++i) { uint64_t x = rng1(); min.add(x); max.add(x); } LOG_DEBUG("min = " << min[0] << ", max = " << max[0]); - CPPUNIT_ASSERT(min[0] < (maths::CPRNG::CSplitMix64::max() - maths::CPRNG::CSplitMix64::min()) / 2000); - CPPUNIT_ASSERT(max[0] > maths::CPRNG::CSplitMix64::max() - - (maths::CPRNG::CSplitMix64::max() - maths::CPRNG::CSplitMix64::min()) / 2000); + CPPUNIT_ASSERT(min[0] < (maths::CPRNG::CSplitMix64::max() - maths::CPRNG::CSplitMix64::min()) / 2000); + CPPUNIT_ASSERT(max[0] > + maths::CPRNG::CSplitMix64::max() - (maths::CPRNG::CSplitMix64::max() - maths::CPRNG::CSplitMix64::min()) / 2000); // Test generate. maths::CPRNG::CSplitMix64 rng2 = rng1; - uint64_t samples1[50] = { 0u }; + uint64_t samples1[50] = {0u}; rng1.generate(&samples1[0], &samples1[50]); - uint64_t samples2[50] = { 0u }; - for (std::size_t i = 0u; i < 50; ++i) - { + uint64_t samples2[50] = {0u}; + for (std::size_t i = 0u; i < 50; ++i) { samples2[i] = rng2(); } CPPUNIT_ASSERT(std::equal(&samples1[0], &samples1[50], &samples2[0])); @@ -58,16 +55,14 @@ void CPRNGTest::testSplitMix64() // Test distribution. { boost::random::mt19937_64 mt; - double p1[50] = { 0.0 }; - double p2[50] = { 0.0 }; + double p1[50] = {0.0}; + double p2[50] = {0.0}; maths::CBasicStatistics::SSampleMean::TAccumulator m1; maths::CBasicStatistics::SSampleMean::TAccumulator m2; - for (std::size_t t = 0u; t < 50; ++t) - { + for (std::size_t t = 0u; t < 50; ++t) { maths::CStatisticalTests::CCramerVonMises cvm1(50); maths::CStatisticalTests::CCramerVonMises cvm2(50); - for (std::size_t i = 0u; i < 5000; ++i) - { + for (std::size_t i = 0u; i < 5000; ++i) { cvm1.addF(u01(rng1)); cvm2.addF(u01(mt)); } @@ -85,16 +80,14 @@ void CPRNGTest::testSplitMix64() { boost::random::mt11213b mt; boost::math::normal_distribution<> n410(4.0, 10.0); - double p1[50] = { 0.0 }; - double p2[50] = { 0.0 }; + double p1[50] = {0.0}; + double p2[50] = {0.0}; maths::CBasicStatistics::SSampleMean::TAccumulator m1; maths::CBasicStatistics::SSampleMean::TAccumulator m2; - for (std::size_t t = 0u; t < 50; ++t) - { + for (std::size_t t = 0u; t < 50; ++t) { maths::CStatisticalTests::CCramerVonMises cvm1(50); maths::CStatisticalTests::CCramerVonMises cvm2(50); - for (std::size_t i = 0u; i < 5000; ++i) - { + for (std::size_t i = 0u; i < 5000; ++i) { cvm1.addF(boost::math::cdf(n410, norm(rng1))); cvm2.addF(boost::math::cdf(n410, norm(mt))); } @@ -113,12 +106,10 @@ void CPRNGTest::testSplitMix64() // Test discard. maths::CPRNG::CSplitMix64 rng3 = rng1; rng1.discard(10); - for (std::size_t i = 0u; i < 10; ++i) - { + for (std::size_t i = 0u; i < 10; ++i) { rng3(); } - for (std::size_t t = 0u; t < 500; ++t) - { + for (std::size_t t = 0u; t < 500; ++t) { CPPUNIT_ASSERT_EQUAL(rng1(), rng3()); } @@ -127,14 +118,12 @@ void CPRNGTest::testSplitMix64() LOG_DEBUG("state = " << state); maths::CPRNG::CSplitMix64 rng4; CPPUNIT_ASSERT(rng4.fromString(state)); - for (std::size_t t = 0u; t < 500; ++t) - { + for (std::size_t t = 0u; t < 500; ++t) { CPPUNIT_ASSERT_EQUAL(rng1(), rng4()); } } -void CPRNGTest::testXorOShiro128Plus() -{ +void CPRNGTest::testXorOShiro128Plus() { LOG_DEBUG("+-----------------------------------+"); LOG_DEBUG("| CPRNGTest::testXorOShiro128Plus |"); LOG_DEBUG("+-----------------------------------+"); @@ -147,24 +136,22 @@ void CPRNGTest::testXorOShiro128Plus() // Test min and max. maths::CBasicStatistics::COrderStatisticsStack min; maths::CBasicStatistics::COrderStatisticsStack> max; - for (std::size_t i = 0u; i < 10000; ++i) - { + for (std::size_t i = 0u; i < 10000; ++i) { uint64_t x = rng1(); min.add(x); max.add(x); } LOG_DEBUG("min = " << min[0] << ", max = " << max[0]); - CPPUNIT_ASSERT(min[0] < (maths::CPRNG::CXorOShiro128Plus::max() - maths::CPRNG::CXorOShiro128Plus::min()) / 2000); - CPPUNIT_ASSERT(max[0] > maths::CPRNG::CXorOShiro128Plus::max() - - (maths::CPRNG::CXorOShiro128Plus::max() - maths::CPRNG::CXorOShiro128Plus::min()) / 2000); + CPPUNIT_ASSERT(min[0] < (maths::CPRNG::CXorOShiro128Plus::max() - maths::CPRNG::CXorOShiro128Plus::min()) / 2000); + CPPUNIT_ASSERT(max[0] > maths::CPRNG::CXorOShiro128Plus::max() - + (maths::CPRNG::CXorOShiro128Plus::max() - maths::CPRNG::CXorOShiro128Plus::min()) / 2000); // Test generate. maths::CPRNG::CXorOShiro128Plus rng2 = rng1; - uint64_t samples1[50] = { 0u }; + uint64_t samples1[50] = {0u}; rng1.generate(&samples1[0], &samples1[50]); - uint64_t samples2[50] = { 0u }; - for (std::size_t i = 0u; i < 50; ++i) - { + uint64_t samples2[50] = {0u}; + for (std::size_t i = 0u; i < 50; ++i) { samples2[i] = rng2(); } CPPUNIT_ASSERT(std::equal(&samples1[0], &samples1[50], &samples2[0])); @@ -172,16 +159,14 @@ void CPRNGTest::testXorOShiro128Plus() // Test distribution. { boost::random::mt11213b mt; - double p1[50] = { 0.0 }; - double p2[50] = { 0.0 }; + double p1[50] = {0.0}; + double p2[50] = {0.0}; maths::CBasicStatistics::SSampleMean::TAccumulator m1; maths::CBasicStatistics::SSampleMean::TAccumulator m2; - for (std::size_t t = 0u; t < 50; ++t) - { + for (std::size_t t = 0u; t < 50; ++t) { maths::CStatisticalTests::CCramerVonMises cvm1(50); maths::CStatisticalTests::CCramerVonMises cvm2(50); - for (std::size_t i = 0u; i < 5000; ++i) - { + for (std::size_t i = 0u; i < 5000; ++i) { cvm1.addF(u01(rng1)); cvm2.addF(u01(mt)); } @@ -199,16 +184,14 @@ void CPRNGTest::testXorOShiro128Plus() { boost::random::mt19937_64 mt; boost::math::normal_distribution<> nm44(-4.0, 4.0); - double p1[50] = { 0.0 }; - double p2[50] = { 0.0 }; + double p1[50] = {0.0}; + double p2[50] = {0.0}; maths::CBasicStatistics::SSampleMean::TAccumulator m1; maths::CBasicStatistics::SSampleMean::TAccumulator m2; - for (std::size_t t = 0u; t < 50; ++t) - { + for (std::size_t t = 0u; t < 50; ++t) { maths::CStatisticalTests::CCramerVonMises cvm1(50); maths::CStatisticalTests::CCramerVonMises cvm2(50); - for (std::size_t i = 0u; i < 5000; ++i) - { + for (std::size_t i = 0u; i < 5000; ++i) { cvm1.addF(boost::math::cdf(nm44, norm(rng1))); cvm2.addF(boost::math::cdf(nm44, norm(mt))); } @@ -227,12 +210,10 @@ void CPRNGTest::testXorOShiro128Plus() // Test discard. maths::CPRNG::CXorOShiro128Plus rng3 = rng1; rng1.discard(10); - for (std::size_t i = 0u; i < 10; ++i) - { + for (std::size_t i = 0u; i < 10; ++i) { rng3(); } - for (std::size_t t = 0u; t < 500; ++t) - { + for (std::size_t t = 0u; t < 500; ++t) { CPPUNIT_ASSERT_EQUAL(rng1(), rng3()); } @@ -242,15 +223,13 @@ void CPRNGTest::testXorOShiro128Plus() // offset, i.e. rng + n + jump == rng + jump + n. maths::CPRNG::CXorOShiro128Plus rng4(rng1); maths::CPRNG::CXorOShiro128Plus rng5(rng1); - std::size_t steps[] = { 10, 3, 19 }; - for (std::size_t s = 0u; s < boost::size(steps); ++s) - { + std::size_t steps[] = {10, 3, 19}; + for (std::size_t s = 0u; s < boost::size(steps); ++s) { rng4.jump(); rng4.discard(steps[s]); rng5.discard(steps[s]); rng5.jump(); - for (std::size_t t = 0u; t < 20; ++t) - { + for (std::size_t t = 0u; t < 20; ++t) { CPPUNIT_ASSERT_EQUAL(rng4(), rng5()); } } @@ -259,14 +238,12 @@ void CPRNGTest::testXorOShiro128Plus() std::string state = rng1.toString(); LOG_DEBUG("state = " << state); CPPUNIT_ASSERT(rng4.fromString(state)); - for (std::size_t t = 0u; t < 500; ++t) - { + for (std::size_t t = 0u; t < 500; ++t) { CPPUNIT_ASSERT_EQUAL(rng1(), rng4()); } } -void CPRNGTest::testXorShift1024Mult() -{ +void CPRNGTest::testXorShift1024Mult() { LOG_DEBUG("+-----------------------------------+"); LOG_DEBUG("| CPRNGTest::testXorShift1024Mult |"); LOG_DEBUG("+-----------------------------------+"); @@ -279,24 +256,22 @@ void CPRNGTest::testXorShift1024Mult() // Test min and max. maths::CBasicStatistics::COrderStatisticsStack min; maths::CBasicStatistics::COrderStatisticsStack> max; - for (std::size_t i = 0u; i < 10000; ++i) - { + for (std::size_t i = 0u; i < 10000; ++i) { uint64_t x = rng1(); min.add(x); max.add(x); } LOG_DEBUG("min = " << min[0] << ", max = " << max[0]); - CPPUNIT_ASSERT(min[0] < (maths::CPRNG::CXorShift1024Mult::max() - maths::CPRNG::CXorShift1024Mult::min()) / 2000); - CPPUNIT_ASSERT(max[0] > maths::CPRNG::CXorShift1024Mult::max() - - (maths::CPRNG::CXorShift1024Mult::max() - maths::CPRNG::CXorShift1024Mult::min()) / 2000); + CPPUNIT_ASSERT(min[0] < (maths::CPRNG::CXorShift1024Mult::max() - maths::CPRNG::CXorShift1024Mult::min()) / 2000); + CPPUNIT_ASSERT(max[0] > maths::CPRNG::CXorShift1024Mult::max() - + (maths::CPRNG::CXorShift1024Mult::max() - maths::CPRNG::CXorShift1024Mult::min()) / 2000); // Test generate. maths::CPRNG::CXorShift1024Mult rng2 = rng1; - uint64_t samples1[50] = { 0u }; + uint64_t samples1[50] = {0u}; rng1.generate(&samples1[0], &samples1[50]); - uint64_t samples2[50] = { 0u }; - for (std::size_t i = 0u; i < 50; ++i) - { + uint64_t samples2[50] = {0u}; + for (std::size_t i = 0u; i < 50; ++i) { samples2[i] = rng2(); } CPPUNIT_ASSERT(std::equal(&samples1[0], &samples1[50], &samples2[0])); @@ -304,16 +279,14 @@ void CPRNGTest::testXorShift1024Mult() // Test distribution. { boost::random::mt19937_64 mt; - double p1[50] = { 0.0 }; - double p2[50] = { 0.0 }; + double p1[50] = {0.0}; + double p2[50] = {0.0}; maths::CBasicStatistics::SSampleMean::TAccumulator m1; maths::CBasicStatistics::SSampleMean::TAccumulator m2; - for (std::size_t t = 0u; t < 50; ++t) - { + for (std::size_t t = 0u; t < 50; ++t) { maths::CStatisticalTests::CCramerVonMises cvm1(50); maths::CStatisticalTests::CCramerVonMises cvm2(50); - for (std::size_t i = 0u; i < 5000; ++i) - { + for (std::size_t i = 0u; i < 5000; ++i) { cvm1.addF(u01(rng1)); cvm2.addF(u01(mt)); } @@ -331,16 +304,14 @@ void CPRNGTest::testXorShift1024Mult() { boost::random::mt11213b mt; boost::math::normal_distribution<> n1008000(100.0, 8000.0); - double p1[50] = { 0.0 }; - double p2[50] = { 0.0 }; + double p1[50] = {0.0}; + double p2[50] = {0.0}; maths::CBasicStatistics::SSampleMean::TAccumulator m1; maths::CBasicStatistics::SSampleMean::TAccumulator m2; - for (std::size_t t = 0u; t < 50; ++t) - { + for (std::size_t t = 0u; t < 50; ++t) { maths::CStatisticalTests::CCramerVonMises cvm1(50); maths::CStatisticalTests::CCramerVonMises cvm2(50); - for (std::size_t i = 0u; i < 5000; ++i) - { + for (std::size_t i = 0u; i < 5000; ++i) { cvm1.addF(boost::math::cdf(n1008000, norm(rng1))); cvm2.addF(boost::math::cdf(n1008000, norm(mt))); } @@ -359,12 +330,10 @@ void CPRNGTest::testXorShift1024Mult() // Test discard. maths::CPRNG::CXorShift1024Mult rng3 = rng1; rng1.discard(10); - for (std::size_t i = 0u; i < 10; ++i) - { + for (std::size_t i = 0u; i < 10; ++i) { rng3(); } - for (std::size_t t = 0u; t < 500; ++t) - { + for (std::size_t t = 0u; t < 500; ++t) { CPPUNIT_ASSERT_EQUAL(rng1(), rng3()); } @@ -374,15 +343,13 @@ void CPRNGTest::testXorShift1024Mult() // offset, i.e. rng + n + jump == rng + jump + n. maths::CPRNG::CXorShift1024Mult rng4(rng1); maths::CPRNG::CXorShift1024Mult rng5(rng1); - std::size_t steps[] = { 10, 3, 19 }; - for (std::size_t s = 0u; s < boost::size(steps); ++s) - { + std::size_t steps[] = {10, 3, 19}; + for (std::size_t s = 0u; s < boost::size(steps); ++s) { rng4.jump(); rng4.discard(steps[s]); rng5.discard(steps[s]); rng5.jump(); - for (std::size_t t = 0u; t < 20; ++t) - { + for (std::size_t t = 0u; t < 20; ++t) { CPPUNIT_ASSERT_EQUAL(rng4(), rng5()); } } @@ -392,25 +359,17 @@ void CPRNGTest::testXorShift1024Mult() std::string state = rng1.toString(); LOG_DEBUG("state = " << state); CPPUNIT_ASSERT(rng4.fromString(state)); - for (std::size_t t = 0u; t < 500; ++t) - { + for (std::size_t t = 0u; t < 500; ++t) { CPPUNIT_ASSERT_EQUAL(rng1(), rng4()); } } -CppUnit::Test *CPRNGTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CPRNGTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CPRNGTest::testSplitMix64", - &CPRNGTest::testSplitMix64) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CPRNGTest::testXorOShiro128Plus", - &CPRNGTest::testXorOShiro128Plus) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CPRNGTest::testXorShift1024Mult", - &CPRNGTest::testXorShift1024Mult) ); +CppUnit::Test* CPRNGTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CPRNGTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CPRNGTest::testSplitMix64", &CPRNGTest::testSplitMix64)); + suiteOfTests->addTest(new CppUnit::TestCaller("CPRNGTest::testXorOShiro128Plus", &CPRNGTest::testXorOShiro128Plus)); + suiteOfTests->addTest(new CppUnit::TestCaller("CPRNGTest::testXorShift1024Mult", &CPRNGTest::testXorShift1024Mult)); return suiteOfTests; } diff --git a/lib/maths/unittest/CPRNGTest.h b/lib/maths/unittest/CPRNGTest.h index 695bfe8494..15fee8fd0d 100644 --- a/lib/maths/unittest/CPRNGTest.h +++ b/lib/maths/unittest/CPRNGTest.h @@ -9,14 +9,13 @@ #include -class CPRNGTest : public CppUnit::TestFixture -{ - public: - void testSplitMix64(); - void testXorOShiro128Plus(); - void testXorShift1024Mult(); +class CPRNGTest : public CppUnit::TestFixture { +public: + void testSplitMix64(); + void testXorOShiro128Plus(); + void testXorShift1024Mult(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CPRNGTest_h diff --git a/lib/maths/unittest/CPackedBitVectorTest.cc b/lib/maths/unittest/CPackedBitVectorTest.cc index d07acd2df4..bfd1cfb62f 100644 --- a/lib/maths/unittest/CPackedBitVectorTest.cc +++ b/lib/maths/unittest/CPackedBitVectorTest.cc @@ -25,30 +25,24 @@ using TBoolVec = std::vector; using TSizeVec = std::vector; using TPackedBitVectorVec = std::vector; -namespace -{ +namespace { -std::string print(const maths::CPackedBitVector &v) -{ +std::string print(const maths::CPackedBitVector& v) { std::ostringstream result; result << v; return result.str(); } -std::string toBitString(const TBoolVec &v) -{ +std::string toBitString(const TBoolVec& v) { std::ostringstream result; - for (std::size_t i = 0u; i < v.size(); ++i) - { + for (std::size_t i = 0u; i < v.size(); ++i) { result << static_cast(v[i]); } return result.str(); } - } -void CPackedBitVectorTest::testCreation() -{ +void CPackedBitVectorTest::testCreation() { LOG_DEBUG("+--------------------------------------+"); LOG_DEBUG("| CPackedBitVectorTest::testCreation |"); LOG_DEBUG("+--------------------------------------+"); @@ -56,82 +50,63 @@ void CPackedBitVectorTest::testCreation() maths::CPackedBitVector test1(3, true); LOG_DEBUG("test1 = " << test1); CPPUNIT_ASSERT_EQUAL(std::size_t(3), test1.dimension()); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(TBoolVec(3, true)), - core::CContainerPrinter::print(test1.toBitVector())); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(TBoolVec(3, true)), core::CContainerPrinter::print(test1.toBitVector())); maths::CPackedBitVector test2(5, false); LOG_DEBUG("test2 = " << test2); CPPUNIT_ASSERT_EQUAL(std::size_t(5), test2.dimension()); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(TBoolVec(5, false)), - core::CContainerPrinter::print(test2.toBitVector())); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(TBoolVec(5, false)), core::CContainerPrinter::print(test2.toBitVector())); maths::CPackedBitVector test3(255, true); LOG_DEBUG("test3 = " << test3); CPPUNIT_ASSERT_EQUAL(std::size_t(255), test3.dimension()); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(TBoolVec(255, true)), - core::CContainerPrinter::print(test3.toBitVector())); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(TBoolVec(255, true)), core::CContainerPrinter::print(test3.toBitVector())); maths::CPackedBitVector test4(279, true); LOG_DEBUG("test4 = " << test4); CPPUNIT_ASSERT_EQUAL(std::size_t(279), test4.dimension()); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(TBoolVec(279, true)), - core::CContainerPrinter::print(test4.toBitVector())); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(TBoolVec(279, true)), core::CContainerPrinter::print(test4.toBitVector())); maths::CPackedBitVector test5(512, false); LOG_DEBUG("test5 = " << test5); CPPUNIT_ASSERT_EQUAL(std::size_t(512), test5.dimension()); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(TBoolVec(512, false)), - core::CContainerPrinter::print(test5.toBitVector())); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(TBoolVec(512, false)), core::CContainerPrinter::print(test5.toBitVector())); maths::CPackedBitVector test6((TBoolVec())); LOG_DEBUG("test6 = " << test6); CPPUNIT_ASSERT_EQUAL(std::size_t(0), test6.dimension()); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print((TBoolVec())), - core::CContainerPrinter::print(test6.toBitVector())); - - bool bits1_[] = - { - true, - true - }; + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print((TBoolVec())), core::CContainerPrinter::print(test6.toBitVector())); + + bool bits1_[] = {true, true}; TBoolVec bits1(boost::begin(bits1_), boost::end(bits1_)); maths::CPackedBitVector test7(bits1); LOG_DEBUG("test7 = " << test7); CPPUNIT_ASSERT_EQUAL(bits1.size(), test7.dimension()); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(bits1), - core::CContainerPrinter::print(test7.toBitVector())); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(bits1), core::CContainerPrinter::print(test7.toBitVector())); - bool bits2_[] = - { - true, false, false, true, true, false, false, false, false, true, true, true, true, false - }; + bool bits2_[] = {true, false, false, true, true, false, false, false, false, true, true, true, true, false}; TBoolVec bits2(boost::begin(bits2_), boost::end(bits2_)); maths::CPackedBitVector test8(bits2); LOG_DEBUG("test8 = " << test8); CPPUNIT_ASSERT_EQUAL(bits2.size(), test8.dimension()); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(bits2), - core::CContainerPrinter::print(test8.toBitVector())); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(bits2), core::CContainerPrinter::print(test8.toBitVector())); test::CRandomNumbers rng; TSizeVec components; - for (std::size_t t = 0u; t < 100; ++t) - { + for (std::size_t t = 0u; t < 100; ++t) { rng.generateUniformSamples(0, 2, 30, components); TBoolVec bits3(components.begin(), components.end()); maths::CPackedBitVector test9(bits3); - if ((t + 1) % 10 == 0) - { + if ((t + 1) % 10 == 0) { LOG_DEBUG("test9 = " << test9); } CPPUNIT_ASSERT_EQUAL(bits3.size(), test9.dimension()); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(bits3), - core::CContainerPrinter::print(test9.toBitVector())); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(bits3), core::CContainerPrinter::print(test9.toBitVector())); } } -void CPackedBitVectorTest::testExtend() -{ +void CPackedBitVectorTest::testExtend() { LOG_DEBUG("+------------------------------------+"); LOG_DEBUG("| CPackedBitVectorTest::testExtend |"); LOG_DEBUG("+------------------------------------+"); @@ -167,21 +142,18 @@ void CPackedBitVectorTest::testExtend() LOG_DEBUG("test2 = " << test2); CPPUNIT_ASSERT_EQUAL(std::size_t(255), test2.dimension()); TBoolVec bits1(255, true); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(bits1), - core::CContainerPrinter::print(test2.toBitVector())); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(bits1), core::CContainerPrinter::print(test2.toBitVector())); test2.extend(false); bits1.push_back(false); LOG_DEBUG("test2 = " << test2); CPPUNIT_ASSERT_EQUAL(std::size_t(256), test2.dimension()); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(bits1), - core::CContainerPrinter::print(test2.toBitVector())); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(bits1), core::CContainerPrinter::print(test2.toBitVector())); maths::CPackedBitVector test3(255, true); test3.extend(false); LOG_DEBUG("test3 = " << test2); CPPUNIT_ASSERT_EQUAL(std::size_t(256), test3.dimension()); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(bits1), - core::CContainerPrinter::print(test3.toBitVector())); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(bits1), core::CContainerPrinter::print(test3.toBitVector())); test::CRandomNumbers rng; @@ -191,17 +163,14 @@ void CPackedBitVectorTest::testExtend() TBoolVec bits2; maths::CPackedBitVector test4; - for (std::size_t i = 0u; i < components.size(); ++i) - { + for (std::size_t i = 0u; i < components.size(); ++i) { bits2.push_back(components[i] > 0); test4.extend(components[i] > 0); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(bits2), - core::CContainerPrinter::print(test4.toBitVector())); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(bits2), core::CContainerPrinter::print(test4.toBitVector())); } } -void CPackedBitVectorTest::testContract() -{ +void CPackedBitVectorTest::testContract() { LOG_DEBUG("+--------------------------------------+"); LOG_DEBUG("| CPackedBitVectorTest::testContract |"); LOG_DEBUG("+--------------------------------------+"); @@ -211,15 +180,8 @@ void CPackedBitVectorTest::testContract() test1.extend(true); test1.extend(false); test1.extend(true); - std::string expected[] = - { - "[1 1 0 1]", - "[1 0 1]", - "[0 1]", - "[1]" - }; - for (const std::string *e = expected; test1.dimension() > 0; ++e) - { + std::string expected[] = {"[1 1 0 1]", "[1 0 1]", "[0 1]", "[1]"}; + for (const std::string* e = expected; test1.dimension() > 0; ++e) { LOG_DEBUG("test1 = " << test1); CPPUNIT_ASSERT_EQUAL(*e, print(test1)); test1.contract(); @@ -230,31 +192,26 @@ void CPackedBitVectorTest::testContract() bits1.push_back(true); bits1.push_back(false); maths::CPackedBitVector test2(bits1); - for (std::size_t i = 0u; i < 10; ++i) - { + for (std::size_t i = 0u; i < 10; ++i) { bits1.erase(bits1.begin()); test2.contract(); LOG_DEBUG("test2 = " << test2); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(bits1), - core::CContainerPrinter::print(test2.toBitVector())); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(bits1), core::CContainerPrinter::print(test2.toBitVector())); } TBoolVec bits2(1024, true); bits2.push_back(false); maths::CPackedBitVector test3(1024, true); test3.extend(false); - for (std::size_t i = 0u; i < 10; ++i) - { + for (std::size_t i = 0u; i < 10; ++i) { bits2.erase(bits2.begin()); test3.contract(); LOG_DEBUG("test3 = " << test3); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(bits2), - core::CContainerPrinter::print(test3.toBitVector())); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(bits2), core::CContainerPrinter::print(test3.toBitVector())); } } -void CPackedBitVectorTest::testOperators() -{ +void CPackedBitVectorTest::testOperators() { LOG_DEBUG("+---------------------------------------+"); LOG_DEBUG("| CPackedBitVectorTest::testOperators |"); LOG_DEBUG("+---------------------------------------+"); @@ -264,24 +221,20 @@ void CPackedBitVectorTest::testOperators() TPackedBitVectorVec test; TSizeVec components; - for (std::size_t t = 0u; t < 20; ++t) - { + for (std::size_t t = 0u; t < 20; ++t) { rng.generateUniformSamples(0, 2, 20, components); TBoolVec bits(components.begin(), components.end()); test.push_back(maths::CPackedBitVector(bits)); } - for (std::size_t i = 0u; i < test.size(); ++i) - { - for (std::size_t j = i; j < test.size(); ++j) - { + for (std::size_t i = 0u; i < test.size(); ++i) { + for (std::size_t j = i; j < test.size(); ++j) { CPPUNIT_ASSERT(test[i] < test[j] || test[i] > test[j] || test[i] == test[j]); } } } -void CPackedBitVectorTest::testInner() -{ +void CPackedBitVectorTest::testInner() { LOG_DEBUG("+-----------------------------------+"); LOG_DEBUG("| CPackedBitVectorTest::testInner |"); LOG_DEBUG("+-----------------------------------+"); @@ -291,29 +244,21 @@ void CPackedBitVectorTest::testInner() maths::CPackedBitVector test1(10, true); maths::CPackedBitVector test2(10, false); - bool bits1[] = - { - true, true, false, false, true, false, false, false, true, true - }; - maths::CPackedBitVector test3(TBoolVec(boost::begin(bits1), - boost::end(bits1))); - bool bits2[] = - { - false, false, true, false, true, false, false, false, false, false - }; - maths::CPackedBitVector test4(TBoolVec(boost::begin(bits2), - boost::end(bits2))); + bool bits1[] = {true, true, false, false, true, false, false, false, true, true}; + maths::CPackedBitVector test3(TBoolVec(boost::begin(bits1), boost::end(bits1))); + bool bits2[] = {false, false, true, false, true, false, false, false, false, false}; + maths::CPackedBitVector test4(TBoolVec(boost::begin(bits2), boost::end(bits2))); CPPUNIT_ASSERT_EQUAL(10.0, test1.inner(test1)); - CPPUNIT_ASSERT_EQUAL(0.0, test1.inner(test2)); - CPPUNIT_ASSERT_EQUAL(5.0, test1.inner(test3)); - CPPUNIT_ASSERT_EQUAL(2.0, test1.inner(test4)); - CPPUNIT_ASSERT_EQUAL(0.0, test2.inner(test2)); - CPPUNIT_ASSERT_EQUAL(0.0, test2.inner(test3)); - CPPUNIT_ASSERT_EQUAL(0.0, test2.inner(test4)); - CPPUNIT_ASSERT_EQUAL(5.0, test3.inner(test3)); - CPPUNIT_ASSERT_EQUAL(1.0, test3.inner(test4)); - CPPUNIT_ASSERT_EQUAL(2.0, test4.inner(test4)); + CPPUNIT_ASSERT_EQUAL(0.0, test1.inner(test2)); + CPPUNIT_ASSERT_EQUAL(5.0, test1.inner(test3)); + CPPUNIT_ASSERT_EQUAL(2.0, test1.inner(test4)); + CPPUNIT_ASSERT_EQUAL(0.0, test2.inner(test2)); + CPPUNIT_ASSERT_EQUAL(0.0, test2.inner(test3)); + CPPUNIT_ASSERT_EQUAL(0.0, test2.inner(test4)); + CPPUNIT_ASSERT_EQUAL(5.0, test3.inner(test3)); + CPPUNIT_ASSERT_EQUAL(1.0, test3.inner(test4)); + CPPUNIT_ASSERT_EQUAL(2.0, test4.inner(test4)); maths::CPackedBitVector test5(570, true); test5.extend(false); @@ -337,27 +282,22 @@ void CPackedBitVectorTest::testInner() TVectorVec comparison; TSizeVec components; - for (std::size_t t = 0u; t < 50; ++t) - { + for (std::size_t t = 0u; t < 50; ++t) { rng.generateUniformSamples(0, 2, 50, components); TBoolVec bits3(components.begin(), components.end()); test7.push_back(maths::CPackedBitVector(bits3)); comparison.push_back(TVector(bits3.begin(), bits3.end())); } - for (std::size_t i = 0u; i < test7.size(); ++i) - { + for (std::size_t i = 0u; i < test7.size(); ++i) { LOG_DEBUG("Testing " << test7[i]); - for (std::size_t j = 0u; j < test7.size(); ++j) - { - CPPUNIT_ASSERT_EQUAL(comparison[i].inner(comparison[j]), - test7[i].inner(test7[j])); + for (std::size_t j = 0u; j < test7.size(); ++j) { + CPPUNIT_ASSERT_EQUAL(comparison[i].inner(comparison[j]), test7[i].inner(test7[j])); } } } -void CPackedBitVectorTest::testBitwiseOr() -{ +void CPackedBitVectorTest::testBitwiseOr() { LOG_DEBUG("+---------------------------------------+"); LOG_DEBUG("| CPackedBitVectorTest::testBitwiseOr |"); LOG_DEBUG("+---------------------------------------+"); @@ -370,66 +310,51 @@ void CPackedBitVectorTest::testBitwiseOr() TBitSetVec comparison; TSizeVec components; - for (std::size_t t = 0u; t < 50; ++t) - { + for (std::size_t t = 0u; t < 50; ++t) { rng.generateUniformSamples(0, 2, 50, components); TBoolVec bits(components.begin(), components.end()); test.push_back(maths::CPackedBitVector(bits)); comparison.push_back(std::bitset<50>(toBitString(bits))); } - for (std::size_t i = 0u; i < test.size(); ++i) - { + for (std::size_t i = 0u; i < test.size(); ++i) { LOG_DEBUG("Testing " << test[i]); - for (std::size_t j = 0u; j < test.size(); ++j) - { + for (std::size_t j = 0u; j < test.size(); ++j) { { double expected = 0.0; std::bitset<50> bitwiseOr = comparison[i] | comparison[j]; - for (std::size_t k = 0u; k < bitwiseOr.size(); ++k) - { + for (std::size_t k = 0u; k < bitwiseOr.size(); ++k) { expected += bitwiseOr[k] ? 1.0 : 0.0; } - if (j % 10 == 0) - { + if (j % 10 == 0) { LOG_DEBUG("or = " << expected); } - CPPUNIT_ASSERT_EQUAL(expected, test[i].inner(test[j], - maths::CPackedBitVector::E_OR)); + CPPUNIT_ASSERT_EQUAL(expected, test[i].inner(test[j], maths::CPackedBitVector::E_OR)); } { double expected = 0.0; std::bitset<50> bitwiseXor = comparison[i] ^ comparison[j]; - for (std::size_t k = 0u; k < bitwiseXor.size(); ++k) - { + for (std::size_t k = 0u; k < bitwiseXor.size(); ++k) { expected += bitwiseXor[k] ? 1.0 : 0.0; } - if (j % 10 == 0) - { + if (j % 10 == 0) { LOG_DEBUG("xor = " << expected); } - CPPUNIT_ASSERT_EQUAL(expected, test[i].inner(test[j], - maths::CPackedBitVector::E_XOR)); + CPPUNIT_ASSERT_EQUAL(expected, test[i].inner(test[j], maths::CPackedBitVector::E_XOR)); } } } } -void CPackedBitVectorTest::testPersist() -{ +void CPackedBitVectorTest::testPersist() { LOG_DEBUG("+-------------------------------------+"); LOG_DEBUG("| CPackedBitVectorTest::testPersist |"); LOG_DEBUG("+-------------------------------------+"); - bool bits[] = - { - true, true, false, false, true, false, false, false, true, true - }; + bool bits[] = {true, true, false, false, true, false, false, false, true, true}; - for (std::size_t t = 0u; t < boost::size(bits); ++t) - { - maths::CPackedBitVector origVector(TBoolVec(boost::begin(bits), - boost::begin(bits) + t)); + for (std::size_t t = 0u; t < boost::size(bits); ++t) { + maths::CPackedBitVector origVector(TBoolVec(boost::begin(bits), boost::begin(bits) + t)); std::string origXml = origVector.toDelimited(); LOG_DEBUG("xml = " << origXml); @@ -441,31 +366,23 @@ void CPackedBitVectorTest::testPersist() } } -CppUnit::Test *CPackedBitVectorTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CPackedBitVectorTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CPackedBitVectorTest::testCreation", - &CPackedBitVectorTest::testCreation) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CPackedBitVectorTest::testExtend", - &CPackedBitVectorTest::testExtend) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CPackedBitVectorTest::testContract", - &CPackedBitVectorTest::testContract) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CPackedBitVectorTest::testOperators", - &CPackedBitVectorTest::testOperators) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CPackedBitVectorTest::testInner", - &CPackedBitVectorTest::testInner) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CPackedBitVectorTest::testBitwiseOr", - &CPackedBitVectorTest::testBitwiseOr) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CPackedBitVectorTest::testPersist", - &CPackedBitVectorTest::testPersist) ); +CppUnit::Test* CPackedBitVectorTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CPackedBitVectorTest"); + + suiteOfTests->addTest( + new CppUnit::TestCaller("CPackedBitVectorTest::testCreation", &CPackedBitVectorTest::testCreation)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CPackedBitVectorTest::testExtend", &CPackedBitVectorTest::testExtend)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CPackedBitVectorTest::testContract", &CPackedBitVectorTest::testContract)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CPackedBitVectorTest::testOperators", &CPackedBitVectorTest::testOperators)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CPackedBitVectorTest::testInner", &CPackedBitVectorTest::testInner)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CPackedBitVectorTest::testBitwiseOr", &CPackedBitVectorTest::testBitwiseOr)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CPackedBitVectorTest::testPersist", &CPackedBitVectorTest::testPersist)); return suiteOfTests; } diff --git a/lib/maths/unittest/CPackedBitVectorTest.h b/lib/maths/unittest/CPackedBitVectorTest.h index 3f2682dcae..250ba0951a 100644 --- a/lib/maths/unittest/CPackedBitVectorTest.h +++ b/lib/maths/unittest/CPackedBitVectorTest.h @@ -9,18 +9,17 @@ #include -class CPackedBitVectorTest : public CppUnit::TestFixture -{ - public: - void testCreation(); - void testExtend(); - void testContract(); - void testOperators(); - void testInner(); - void testBitwiseOr(); - void testPersist(); +class CPackedBitVectorTest : public CppUnit::TestFixture { +public: + void testCreation(); + void testExtend(); + void testContract(); + void testOperators(); + void testInner(); + void testBitwiseOr(); + void testPersist(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CPackedBitVectorTest_h diff --git a/lib/maths/unittest/CPeriodicityHypothesisTestsTest.cc b/lib/maths/unittest/CPeriodicityHypothesisTestsTest.cc index ae19050fb1..191121ed42 100644 --- a/lib/maths/unittest/CPeriodicityHypothesisTestsTest.cc +++ b/lib/maths/unittest/CPeriodicityHypothesisTestsTest.cc @@ -13,8 +13,8 @@ #include #include -#include #include +#include #include "TestUtils.h" @@ -26,8 +26,7 @@ using namespace ml; using namespace handy_typedefs; -namespace -{ +namespace { using TDoubleVec = std::vector; using TSizeVec = std::vector; using TTimeVec = std::vector; @@ -42,8 +41,7 @@ const core_t::TTime DAY{core::constants::DAY}; const core_t::TTime WEEK{core::constants::WEEK}; } -void CPeriodicityHypothesisTestsTest::testNonPeriodic() -{ +void CPeriodicityHypothesisTestsTest::testNonPeriodic() { LOG_DEBUG("+----------------------------------------------------+"); LOG_DEBUG("| CPeriodicityHypothesisTestsTest::testNonPeriodic |"); LOG_DEBUG("+----------------------------------------------------+"); @@ -63,38 +61,35 @@ void CPeriodicityHypothesisTestsTest::testNonPeriodic() double FP{0.0}; double TN{0.0}; - for (std::size_t test = 0u; test < 50; ++test) - { - if (test % 10 == 0) - { + for (std::size_t test = 0u; test < 50; ++test) { + if (test % 10 == 0) { LOG_DEBUG("test " << test << " / 50"); } - for (auto window : windows) - { - for (auto bucketLength : bucketLengths) - { - switch (test % 3) - { - case 0: rng.generateNormalSamples(0.0, 0.4, window / bucketLength, noise); break; - case 1: rng.generateGammaSamples(1.0, 5.0, window / bucketLength, noise); break; - case 2: rng.generateLogNormalSamples(0.2, 0.3, window / bucketLength, noise); break; + for (auto window : windows) { + for (auto bucketLength : bucketLengths) { + switch (test % 3) { + case 0: + rng.generateNormalSamples(0.0, 0.4, window / bucketLength, noise); + break; + case 1: + rng.generateGammaSamples(1.0, 5.0, window / bucketLength, noise); + break; + case 2: + rng.generateLogNormalSamples(0.2, 0.3, window / bucketLength, noise); + break; } rng.generateUniformSamples(0, generators.size(), 1, index); rng.generateUniformSamples(3, 20, 1, repeats); maths::CPeriodicityHypothesisTests hypotheses; - hypotheses.initialize(bucketLength, window, - window / static_cast(repeats[0])); + hypotheses.initialize(bucketLength, window, window / static_cast(repeats[0])); - for (core_t::TTime time = 10000; time < 10000 + window; time += bucketLength) - { - hypotheses.add(time, generators[index[0]](time) - + noise[(time - 10000) / bucketLength]); + for (core_t::TTime time = 10000; time < 10000 + window; time += bucketLength) { + hypotheses.add(time, generators[index[0]](time) + noise[(time - 10000) / bucketLength]); } maths::CPeriodicityHypothesisTestsResult result{hypotheses.test()}; - if (result.periodic()) - { + if (result.periodic()) { LOG_DEBUG("result = " << result.print()); } FP += result.periodic() ? 1.0 : 0.0; @@ -107,8 +102,7 @@ void CPeriodicityHypothesisTestsTest::testNonPeriodic() CPPUNIT_ASSERT(TN / (FP + TN) > 0.995); } -void CPeriodicityHypothesisTestsTest::testDiurnal() -{ +void CPeriodicityHypothesisTestsTest::testDiurnal() { LOG_DEBUG("+------------------------------------------------+"); LOG_DEBUG("| CPeriodicityHypothesisTestsTest::testDiurnal |"); LOG_DEBUG("+------------------------------------------------+"); @@ -122,14 +116,11 @@ void CPeriodicityHypothesisTestsTest::testDiurnal() TTimeVec bucketLengths{TEN_MINS, HALF_HOUR}; TSizeVec permittedGenerators{2, 4, 4, 5}; TGeneratorVec generators{smoothDaily, spikeyDaily, smoothWeekly, weekends, spikeyWeekly}; - TStrVec expected - { - "{ 'daily' }", - "{ 'daily' }", - "{ 'weekly' }", - "{ 'weekend daily' 'weekday daily' 'weekend weekly' 'weekday weekly' }", - "{ 'daily' 'weekly' }" - }; + TStrVec expected{"{ 'daily' }", + "{ 'daily' }", + "{ 'weekly' }", + "{ 'weekend daily' 'weekday daily' 'weekend weekly' 'weekday weekly' }", + "{ 'daily' 'weekly' }"}; test::CRandomNumbers rng; @@ -140,42 +131,38 @@ void CPeriodicityHypothesisTestsTest::testDiurnal() double TP{0.0}; double FN{0.0}; - for (std::size_t test = 0u; test < 100; ++test) - { - if (test % 10 == 0) - { + for (std::size_t test = 0u; test < 100; ++test) { + if (test % 10 == 0) { LOG_DEBUG("test " << test << " / 100"); } - for (std::size_t i = 0u; i < windows.size(); ++i) - { + for (std::size_t i = 0u; i < windows.size(); ++i) { core_t::TTime window{windows[i]}; - for (auto bucketLength : bucketLengths) - { - switch (test % 3) - { - case 0: rng.generateNormalSamples(0.0, 1.0, window / bucketLength, noise); break; - case 1: rng.generateGammaSamples(1.0, 1.0, window / bucketLength, noise); break; - case 2: rng.generateLogNormalSamples(0.2, 0.3, window / bucketLength, noise); break; + for (auto bucketLength : bucketLengths) { + switch (test % 3) { + case 0: + rng.generateNormalSamples(0.0, 1.0, window / bucketLength, noise); + break; + case 1: + rng.generateGammaSamples(1.0, 1.0, window / bucketLength, noise); + break; + case 2: + rng.generateLogNormalSamples(0.2, 0.3, window / bucketLength, noise); + break; } rng.generateUniformSamples(0, permittedGenerators[i], 1, index); rng.generateUniformSamples(3, 20, 1, repeats); maths::CPeriodicityHypothesisTests hypotheses; - hypotheses.initialize(bucketLength, window, - window / static_cast(repeats[0])); + hypotheses.initialize(bucketLength, window, window / static_cast(repeats[0])); - for (core_t::TTime time = 10000; time < 10000 + window; time += bucketLength) - { - hypotheses.add(time, 20.0 * generators[index[0]](time) - + noise[(time - 10000) / bucketLength]); + for (core_t::TTime time = 10000; time < 10000 + window; time += bucketLength) { + hypotheses.add(time, 20.0 * generators[index[0]](time) + noise[(time - 10000) / bucketLength]); } maths::CPeriodicityHypothesisTestsResult result{hypotheses.test()}; - if (result.print() != expected[index[0]]) - { - LOG_DEBUG("result = " << result.print() - << " expected " << expected[index[0]]); + if (result.print() != expected[index[0]]) { + LOG_DEBUG("result = " << result.print() << " expected " << expected[index[0]]); } TP += result.print() == expected[index[0]] ? 1.0 : 0.0; FN += result.print() == expected[index[0]] ? 0.0 : 1.0; @@ -193,16 +180,11 @@ void CPeriodicityHypothesisTestsTest::testDiurnal() TTimeDoublePrVec timeseries; core_t::TTime startTime; core_t::TTime endTime; - CPPUNIT_ASSERT(test::CTimeSeriesTestData::parse("testfiles/spikey_data.csv", - timeseries, - startTime, - endTime, - test::CTimeSeriesTestData::CSV_UNIX_REGEX)); + CPPUNIT_ASSERT(test::CTimeSeriesTestData::parse( + "testfiles/spikey_data.csv", timeseries, startTime, endTime, test::CTimeSeriesTestData::CSV_UNIX_REGEX)); CPPUNIT_ASSERT(!timeseries.empty()); - LOG_DEBUG("timeseries = " << core::CContainerPrinter::print(timeseries.begin(), - timeseries.begin() + 10) - << " ..."); + LOG_DEBUG("timeseries = " << core::CContainerPrinter::print(timeseries.begin(), timeseries.begin() + 10) << " ..."); TTimeVec lastTests{timeseries[0].first, timeseries[0].first}; TTimeVec windows{4 * DAY, 14 * DAY}; @@ -211,13 +193,10 @@ void CPeriodicityHypothesisTestsTest::testDiurnal() hypotheses[0].initialize(HOUR, windows[0], DAY); hypotheses[1].initialize(HOUR, windows[1], DAY); - for (std::size_t i = 0u; i < timeseries.size(); ++i) - { + for (std::size_t i = 0u; i < timeseries.size(); ++i) { core_t::TTime time{timeseries[i].first}; - for (std::size_t j = 0u; j < 2; ++j) - { - if (time > lastTests[j] + windows[j]) - { + for (std::size_t j = 0u; j < 2; ++j) { + if (time > lastTests[j] + windows[j]) { maths::CPeriodicityHypothesisTestsResult result{hypotheses[j].test()}; CPPUNIT_ASSERT_EQUAL(std::string("{ 'daily' }"), result.print()); hypotheses[j] = maths::CPeriodicityHypothesisTests(); @@ -235,16 +214,11 @@ void CPeriodicityHypothesisTestsTest::testDiurnal() TTimeDoublePrVec timeseries; core_t::TTime startTime; core_t::TTime endTime; - CPPUNIT_ASSERT(test::CTimeSeriesTestData::parse("testfiles/diurnal.csv", - timeseries, - startTime, - endTime, - test::CTimeSeriesTestData::CSV_UNIX_REGEX)); + CPPUNIT_ASSERT(test::CTimeSeriesTestData::parse( + "testfiles/diurnal.csv", timeseries, startTime, endTime, test::CTimeSeriesTestData::CSV_UNIX_REGEX)); CPPUNIT_ASSERT(!timeseries.empty()); - LOG_DEBUG("timeseries = " << core::CContainerPrinter::print(timeseries.begin(), - timeseries.begin() + 10) - << " ..."); + LOG_DEBUG("timeseries = " << core::CContainerPrinter::print(timeseries.begin(), timeseries.begin() + 10) << " ..."); core_t::TTime lastTest{timeseries[0].first}; core_t::TTime window{14 * DAY}; @@ -252,11 +226,9 @@ void CPeriodicityHypothesisTestsTest::testDiurnal() maths::CPeriodicityHypothesisTests hypotheses; hypotheses.initialize(HOUR, window, DAY); - for (std::size_t i = 0u; i < timeseries.size(); ++i) - { + for (std::size_t i = 0u; i < timeseries.size(); ++i) { core_t::TTime time{timeseries[i].first}; - if (time > lastTest + window) - { + if (time > lastTest + window) { maths::CPeriodicityHypothesisTestsResult result{hypotheses.test()}; CPPUNIT_ASSERT_EQUAL(std::string("{ 'weekend daily' 'weekday daily' }"), result.print()); hypotheses = maths::CPeriodicityHypothesisTests(); @@ -281,9 +253,7 @@ void CPeriodicityHypothesisTestsTest::testDiurnal() test::CTimeSeriesTestData::CSV_ISO8601_DATE_FORMAT)); CPPUNIT_ASSERT(!timeseries.empty()); - LOG_DEBUG("timeseries = " << core::CContainerPrinter::print(timeseries.begin(), - timeseries.begin() + 10) - << " ..."); + LOG_DEBUG("timeseries = " << core::CContainerPrinter::print(timeseries.begin(), timeseries.begin() + 10) << " ..."); core_t::TTime lastTest{timeseries[0].first}; core_t::TTime window{14 * DAY}; @@ -291,11 +261,9 @@ void CPeriodicityHypothesisTestsTest::testDiurnal() maths::CPeriodicityHypothesisTests hypotheses; hypotheses.initialize(HOUR, window, DAY); - for (std::size_t i = 0u; i < timeseries.size(); ++i) - { + for (std::size_t i = 0u; i < timeseries.size(); ++i) { core_t::TTime time{timeseries[i].first}; - if (time > lastTest + window) - { + if (time > lastTest + window) { maths::CPeriodicityHypothesisTestsResult result{hypotheses.test()}; CPPUNIT_ASSERT_EQUAL(std::string("{ }"), result.print()); hypotheses = maths::CPeriodicityHypothesisTests(); @@ -320,9 +288,7 @@ void CPeriodicityHypothesisTestsTest::testDiurnal() test::CTimeSeriesTestData::CSV_ISO8601_DATE_FORMAT)); CPPUNIT_ASSERT(!timeseries.empty()); - LOG_DEBUG("timeseries = " << core::CContainerPrinter::print(timeseries.begin(), - timeseries.begin() + 10) - << " ..."); + LOG_DEBUG("timeseries = " << core::CContainerPrinter::print(timeseries.begin(), timeseries.begin() + 10) << " ..."); core_t::TTime lastTest{timeseries[0].first}; core_t::TTime window{14 * DAY}; @@ -330,14 +296,12 @@ void CPeriodicityHypothesisTestsTest::testDiurnal() maths::CPeriodicityHypothesisTests hypotheses; hypotheses.initialize(HOUR, window, DAY); - for (std::size_t i = 0u; i < timeseries.size(); ++i) - { + for (std::size_t i = 0u; i < timeseries.size(); ++i) { core_t::TTime time{timeseries[i].first}; - if (time > lastTest + window) - { + if (time > lastTest + window) { maths::CPeriodicityHypothesisTestsResult result{hypotheses.test()}; - CPPUNIT_ASSERT( result.print() == "{ 'weekend daily' 'weekday daily' }" - || result.print() == "{ 'weekend daily' 'weekday daily' 'weekend weekly' 'weekday weekly' }"); + CPPUNIT_ASSERT(result.print() == "{ 'weekend daily' 'weekday daily' }" || + result.print() == "{ 'weekend daily' 'weekday daily' 'weekend weekly' 'weekday weekly' }"); hypotheses = maths::CPeriodicityHypothesisTests(); hypotheses.initialize(HOUR, window, DAY); lastTest += window; @@ -347,8 +311,7 @@ void CPeriodicityHypothesisTestsTest::testDiurnal() } } -void CPeriodicityHypothesisTestsTest::testNonDiurnal() -{ +void CPeriodicityHypothesisTestsTest::testNonDiurnal() { LOG_DEBUG("+---------------------------------------------------+"); LOG_DEBUG("| CPeriodicityHypothesisTestsTest::testNonDiurnal |"); LOG_DEBUG("+---------------------------------------------------+"); @@ -369,40 +332,39 @@ void CPeriodicityHypothesisTestsTest::testNonDiurnal() double TP{0.0}; double FN{0.0}; - for (std::size_t test = 0u; test < 100; ++test) - { - if (test % 10 == 0) - { + for (std::size_t test = 0u; test < 100; ++test) { + if (test % 10 == 0) { LOG_DEBUG("test " << test << " / 100"); } - for (std::size_t i = 0u; i < windows.size(); ++i) - { + for (std::size_t i = 0u; i < windows.size(); ++i) { core_t::TTime window{windows[i]}; TDoubleVec scaling_; rng.generateUniformSamples(1.0, 5.0, 1, scaling_); double scaling{test % 2 == 0 ? scaling_[0] : 1.0 / scaling_[0]}; - for (std::size_t j = 0u; j < bucketLengths.size(); ++j) - { + for (std::size_t j = 0u; j < bucketLengths.size(); ++j) { core_t::TTime bucketLength{bucketLengths[j]}; - core_t::TTime period{maths::CIntegerTools::floor( - static_cast(static_cast(DAY) / scaling), - bucketLength)}; + core_t::TTime period{ + maths::CIntegerTools::floor(static_cast(static_cast(DAY) / scaling), bucketLength)}; scaling = static_cast(DAY) / static_cast(period); - if (scaling == 1.0 || window < 3 * period) - { + if (scaling == 1.0 || window < 3 * period) { continue; } maths::CPeriodicityHypothesisTestsResult expected; expected.add(core::CStringUtils::typeToString(period), false, 0, period, {0, period}); - switch (test % 3) - { - case 0: rng.generateNormalSamples(0.0, 1.0, window / bucketLength, noise); break; - case 1: rng.generateGammaSamples(1.0, 1.0, window / bucketLength, noise); break; - case 2: rng.generateLogNormalSamples(0.2, 0.3, window / bucketLength, noise); break; + switch (test % 3) { + case 0: + rng.generateNormalSamples(0.0, 1.0, window / bucketLength, noise); + break; + case 1: + rng.generateGammaSamples(1.0, 1.0, window / bucketLength, noise); + break; + case 2: + rng.generateLogNormalSamples(0.2, 0.3, window / bucketLength, noise); + break; } rng.generateUniformSamples(0, permittedGenerators[j], 1, index); rng.generateUniformSamples(3, 20, 1, repeats); @@ -410,17 +372,13 @@ void CPeriodicityHypothesisTestsTest::testNonDiurnal() maths::CPeriodicityHypothesisTests hypotheses; hypotheses.initialize(bucketLength, window, period); - for (core_t::TTime time = 10000; time < 10000 + window; time += bucketLength) - { - hypotheses.add(time, 20.0 * scale(scaling, time, generators[index[0]]) - + noise[(time - 10000) / bucketLength]); + for (core_t::TTime time = 10000; time < 10000 + window; time += bucketLength) { + hypotheses.add(time, 20.0 * scale(scaling, time, generators[index[0]]) + noise[(time - 10000) / bucketLength]); } maths::CPeriodicityHypothesisTestsResult result{hypotheses.test()}; - if (result.print() != expected.print()) - { - LOG_DEBUG("result = " << result.print() - << " expected " << expected.print()); + if (result.print() != expected.print()) { + LOG_DEBUG("result = " << result.print() << " expected " << expected.print()); } TP += result.print() == expected.print() ? 1.0 : 0.0; FN += result.print() == expected.print() ? 0.0 : 1.0; @@ -432,35 +390,28 @@ void CPeriodicityHypothesisTestsTest::testNonDiurnal() CPPUNIT_ASSERT(TP / (TP + FN) > 0.99); } -void CPeriodicityHypothesisTestsTest::testWithSparseData() -{ +void CPeriodicityHypothesisTestsTest::testWithSparseData() { LOG_DEBUG("+-----------------------------------------------------------+"); LOG_DEBUG("| CPeriodicityHypothesisTestsTest::testTestWithSparseData |"); LOG_DEBUG("+-----------------------------------------------------------+"); test::CRandomNumbers rng; - LOG_DEBUG("Daily Periodic") - { + LOG_DEBUG("Daily Periodic") { maths::CPeriodicityHypothesisTests hypotheses; hypotheses.initialize(HALF_HOUR, WEEK, DAY); core_t::TTime time = 0; - for (std::size_t t = 0u; t < 7; ++t) - { - for (auto value : { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, - 10.0, 10.0, 8.0, 4.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0}) - { - if (value > 0.0) - { + for (std::size_t t = 0u; t < 7; ++t) { + for (auto value : {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 20.0, 18.0, 10.0, 4.0, + 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, 10.0, 10.0, 8.0, 4.0, 3.0, 1.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0}) { + if (value > 0.0) { hypotheses.add(time, value); } time += HALF_HOUR; } - if (t > 3) - { + if (t > 3) { maths::CPeriodicityHypothesisTestsResult result{hypotheses.test()}; LOG_DEBUG("result = " << result.print()); CPPUNIT_ASSERT_EQUAL(std::string("{ 'daily' }"), result.print()); @@ -468,21 +419,16 @@ void CPeriodicityHypothesisTestsTest::testWithSparseData() } } - LOG_DEBUG("Daily Not Periodic") - { + LOG_DEBUG("Daily Not Periodic") { maths::CPeriodicityHypothesisTests hypotheses; hypotheses.initialize(HALF_HOUR, WEEK, DAY); core_t::TTime time = 0; - for (std::size_t t = 0u; t < 7; ++t) - { - for (auto value : { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, - 10.0, 10.0, 8.0, 4.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0}) - { - if (value > 0.0) - { + for (std::size_t t = 0u; t < 7; ++t) { + for (auto value : {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 20.0, 18.0, 10.0, 4.0, + 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, 10.0, 10.0, 8.0, 4.0, 3.0, 1.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0}) { + if (value > 0.0) { TDoubleVec rand; rng.generateUniformSamples(-1.0, 1.0, 1, rand); hypotheses.add(time, rand[0]); @@ -496,38 +442,28 @@ void CPeriodicityHypothesisTestsTest::testWithSparseData() } } - LOG_DEBUG("Weekly") - { + LOG_DEBUG("Weekly") { maths::CPeriodicityHypothesisTests hypotheses; hypotheses.initialize(HOUR, 2 * WEEK, WEEK); core_t::TTime time = 0; - for (std::size_t t = 0u; t < 4; ++t) - { - for (auto value : { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 10.0, 10.0, 8.0, 4.0, 3.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, - 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, - 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, - 10.0, 10.0, 8.0, 4.0, 3.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, - 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, - 10.0, 10.0, 8.0, 4.0, 3.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}) - { - if (value > 0.0) - { + for (std::size_t t = 0u; t < 4; ++t) { + for (auto value : + {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 10.0, 10.0, 8.0, 4.0, 3.0, 1.0, 1.0, 3.0, 0.0, + 0.0, 0.0, 0.0, 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 20.0, 18.0, 10.0, + 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, + 8.0, 9.0, 9.0, 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, 10.0, 10.0, 8.0, 4.0, 3.0, 1.0, + 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, 10.0, 10.0, 8.0, + 4.0, 3.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}) { + if (value > 0.0) { hypotheses.add(time, value); } time += HOUR; } - if (t >= 2) - { + if (t >= 2) { maths::CPeriodicityHypothesisTestsResult result{hypotheses.test()}; LOG_DEBUG("result = " << result.print()); CPPUNIT_ASSERT_EQUAL(std::string("{ 'daily' 'weekly' }"), result.print()); @@ -535,31 +471,22 @@ void CPeriodicityHypothesisTestsTest::testWithSparseData() } } - LOG_DEBUG("Weekly Not Periodic") - { + LOG_DEBUG("Weekly Not Periodic") { maths::CPeriodicityHypothesisTests hypotheses; hypotheses.initialize(HOUR, 4 * WEEK, WEEK); core_t::TTime time = 0; - for (std::size_t t = 0u; t < 4; ++t) - { - for (auto value : { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 10.0, 10.0, 8.0, 4.0, 3.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, - 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, - 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, - 10.0, 10.0, 8.0, 4.0, 3.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, - 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, - 10.0, 10.0, 8.0, 4.0, 3.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}) - { - if (value > 0.0) - { + for (std::size_t t = 0u; t < 4; ++t) { + for (auto value : + {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 10.0, 10.0, 8.0, 4.0, 3.0, 1.0, 1.0, 3.0, 0.0, + 0.0, 0.0, 0.0, 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 20.0, 18.0, 10.0, + 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, + 8.0, 9.0, 9.0, 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, 10.0, 10.0, 8.0, 4.0, 3.0, 1.0, + 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, 10.0, 10.0, 8.0, + 4.0, 3.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}) { + if (value > 0.0) { TDoubleVec rand; rng.generateUniformSamples(-1.0, 1.0, 1, rand); hypotheses.add(time, rand[0]); @@ -574,8 +501,7 @@ void CPeriodicityHypothesisTestsTest::testWithSparseData() } } -void CPeriodicityHypothesisTestsTest::testTestForPeriods() -{ +void CPeriodicityHypothesisTestsTest::testTestForPeriods() { LOG_DEBUG("+-------------------------------------------------------+"); LOG_DEBUG("| CPeriodicityHypothesisTestsTest::testTestForPeriods |"); LOG_DEBUG("+-------------------------------------------------------+"); @@ -598,40 +524,39 @@ void CPeriodicityHypothesisTestsTest::testTestForPeriods() TDoubleVec TP{0.0, 0.0, 0.0}; TDoubleVec FN{0.0, 0.0, 0.0}; - for (std::size_t test = 0u; test < 100; ++test) - { - if (test % 10 == 0) - { + for (std::size_t test = 0u; test < 100; ++test) { + if (test % 10 == 0) { LOG_DEBUG("test " << test << " / 100"); } - for (std::size_t i = 0u; i < windows.size(); ++i) - { + for (std::size_t i = 0u; i < windows.size(); ++i) { core_t::TTime window{windows[i]}; TDoubleVec scaling_; rng.generateUniformSamples(1.0, 5.0, 1, scaling_); double scaling{test % 2 == 0 ? scaling_[0] : 1.0 / scaling_[0]}; - for (std::size_t j = 0u; j < bucketLengths.size(); ++j) - { + for (std::size_t j = 0u; j < bucketLengths.size(); ++j) { core_t::TTime bucketLength{bucketLengths[j]}; - core_t::TTime period{maths::CIntegerTools::floor( - static_cast(static_cast(DAY) / scaling), - bucketLength)}; + core_t::TTime period{ + maths::CIntegerTools::floor(static_cast(static_cast(DAY) / scaling), bucketLength)}; scaling = static_cast(DAY) / static_cast(period); - if (scaling == 1.0 || window < 3 * period) - { + if (scaling == 1.0 || window < 3 * period) { continue; } maths::CPeriodicityHypothesisTestsResult expected; expected.add(core::CStringUtils::typeToString(period), false, 0, period, {0, period}); - switch (test % 3) - { - case 0: rng.generateNormalSamples(0.0, 1.0, window / bucketLength, noise); break; - case 1: rng.generateGammaSamples(1.0, 1.0, window / bucketLength, noise); break; - case 2: rng.generateLogNormalSamples(0.2, 0.3, window / bucketLength, noise); break; + switch (test % 3) { + case 0: + rng.generateNormalSamples(0.0, 1.0, window / bucketLength, noise); + break; + case 1: + rng.generateGammaSamples(1.0, 1.0, window / bucketLength, noise); + break; + case 2: + rng.generateLogNormalSamples(0.2, 0.3, window / bucketLength, noise); + break; } rng.generateUniformSamples(0, permittedGenerators[j], 1, index); rng.generateUniformSamples(3, 20, 1, repeats); @@ -640,36 +565,28 @@ void CPeriodicityHypothesisTestsTest::testTestForPeriods() hypotheses.initialize(bucketLength, window, period); maths::TFloatMeanAccumulatorVec values(window / bucketLength); - for (core_t::TTime time = startTime; time < startTime + window; time += bucketLength) - { + for (core_t::TTime time = startTime; time < startTime + window; time += bucketLength) { std::size_t bucket((time - startTime) / bucketLength); double value{20.0 * scale(scaling, time, generators[index[0]]) + noise[bucket]}; values[bucket].add(value); } maths::CPeriodicityHypothesisTestsConfig config; - maths::CPeriodicityHypothesisTestsResult result{ - maths::testForPeriods(config, startTime, bucketLength, values)}; - if (result.print() != expected.print()) - { - LOG_DEBUG("result = " << result.print() - << " expected " << expected.print()); + maths::CPeriodicityHypothesisTestsResult result{maths::testForPeriods(config, startTime, bucketLength, values)}; + if (result.print() != expected.print()) { + LOG_DEBUG("result = " << result.print() << " expected " << expected.print()); } TP[0] += result.print() == expected.print() ? 1.0 : 0.0; FN[0] += result.print() == expected.print() ? 0.0 : 1.0; - if (result.components().size() == 1) - { + if (result.components().size() == 1) { core_t::TTime modp{result.components()[0].s_Period % period}; - double error{ static_cast(std::min(modp, std::abs(period - modp))) - / static_cast(period)}; + double error{static_cast(std::min(modp, std::abs(period - modp))) / static_cast(period)}; TP[1] += error < 0.01 ? 1.0 : 0.0; FN[1] += error < 0.01 ? 0.0 : 1.0; TP[2] += error < 0.05 ? 1.0 : 0.0; FN[2] += error < 0.05 ? 0.0 : 1.0; - } - else - { + } else { FN[0] += 1.0; FN[1] += 1.0; FN[2] += 1.0; @@ -686,26 +603,19 @@ void CPeriodicityHypothesisTestsTest::testTestForPeriods() CPPUNIT_ASSERT(TP[2] / (TP[2] + FN[2]) > 0.99); } -CppUnit::Test *CPeriodicityHypothesisTestsTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CPeriodicityHypothesisTestsTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CPeriodicityHypothesisTestsTest::testNonPeriodic", - &CPeriodicityHypothesisTestsTest::testNonPeriodic) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CPeriodicityHypothesisTestsTest::testDiurnal", - &CPeriodicityHypothesisTestsTest::testDiurnal) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CPeriodicityHypothesisTestsTest::testNonDiurnal", - &CPeriodicityHypothesisTestsTest::testNonDiurnal) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CPeriodicityHypothesisTestsTest::testWithSparseData", - &CPeriodicityHypothesisTestsTest::testWithSparseData) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CPeriodicityHypothesisTestsTest::testTestForPeriods", - &CPeriodicityHypothesisTestsTest::testTestForPeriods) ); +CppUnit::Test* CPeriodicityHypothesisTestsTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CPeriodicityHypothesisTestsTest"); - return suiteOfTests; + suiteOfTests->addTest(new CppUnit::TestCaller("CPeriodicityHypothesisTestsTest::testNonPeriodic", + &CPeriodicityHypothesisTestsTest::testNonPeriodic)); + suiteOfTests->addTest(new CppUnit::TestCaller("CPeriodicityHypothesisTestsTest::testDiurnal", + &CPeriodicityHypothesisTestsTest::testDiurnal)); + suiteOfTests->addTest(new CppUnit::TestCaller("CPeriodicityHypothesisTestsTest::testNonDiurnal", + &CPeriodicityHypothesisTestsTest::testNonDiurnal)); + suiteOfTests->addTest(new CppUnit::TestCaller("CPeriodicityHypothesisTestsTest::testWithSparseData", + &CPeriodicityHypothesisTestsTest::testWithSparseData)); + suiteOfTests->addTest(new CppUnit::TestCaller("CPeriodicityHypothesisTestsTest::testTestForPeriods", + &CPeriodicityHypothesisTestsTest::testTestForPeriods)); + return suiteOfTests; } diff --git a/lib/maths/unittest/CPeriodicityHypothesisTestsTest.h b/lib/maths/unittest/CPeriodicityHypothesisTestsTest.h index 1115515c60..b1a2215fc5 100644 --- a/lib/maths/unittest/CPeriodicityHypothesisTestsTest.h +++ b/lib/maths/unittest/CPeriodicityHypothesisTestsTest.h @@ -9,16 +9,15 @@ #include -class CPeriodicityHypothesisTestsTest : public CppUnit::TestFixture -{ - public: - void testNonPeriodic(); - void testDiurnal(); - void testNonDiurnal(); - void testWithSparseData(); - void testTestForPeriods(); +class CPeriodicityHypothesisTestsTest : public CppUnit::TestFixture { +public: + void testNonPeriodic(); + void testDiurnal(); + void testNonDiurnal(); + void testWithSparseData(); + void testTestForPeriods(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CPeriodicityHypothesisTestsTest_h diff --git a/lib/maths/unittest/CPoissonMeanConjugateTest.cc b/lib/maths/unittest/CPoissonMeanConjugateTest.cc index 925f4b207a..03013940dd 100644 --- a/lib/maths/unittest/CPoissonMeanConjugateTest.cc +++ b/lib/maths/unittest/CPoissonMeanConjugateTest.cc @@ -33,8 +33,7 @@ using namespace ml; using namespace handy_typedefs; -namespace -{ +namespace { using TUIntVec = std::vector; using TDoubleVec = std::vector; @@ -43,11 +42,9 @@ using TDoubleDoublePrVec = std::vector; using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; using TMeanVarAccumulator = maths::CBasicStatistics::SSampleMeanVar::TAccumulator; using CPoissonMeanConjugate = CPriorTestInterfaceMixin; - } -void CPoissonMeanConjugateTest::testMultipleUpdate() -{ +void CPoissonMeanConjugateTest::testMultipleUpdate() { LOG_DEBUG("+-------------------------------------------------+"); LOG_DEBUG("| CPoissonMeanConjugateTest::testMultipleUpdate |"); LOG_DEBUG("+-------------------------------------------------+"); @@ -63,8 +60,7 @@ void CPoissonMeanConjugateTest::testMultipleUpdate() TUIntVec samples_; rng.generatePoissonSamples(rate, 100, samples_); TDoubleVec samples; - for (std::size_t i = 0u; i < samples_.size(); ++i) - { + for (std::size_t i = 0u; i < samples_.size(); ++i) { samples.push_back(static_cast(samples_[i])); } @@ -72,8 +68,7 @@ void CPoissonMeanConjugateTest::testMultipleUpdate() CPoissonMeanConjugate filter1(CPoissonMeanConjugate::nonInformativePrior()); CPoissonMeanConjugate filter2(filter1); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { filter1.addSamples(TDouble1Vec(1, samples[i])); } filter2.addSamples(samples); @@ -89,15 +84,10 @@ void CPoissonMeanConjugateTest::testMultipleUpdate() CPoissonMeanConjugate filter2(filter1); maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); - for (std::size_t j = 0u; j < samples.size(); ++j) - { - filter1.addSamples(weightStyle, - TDouble1Vec(1, samples[j]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 2.0))); + for (std::size_t j = 0u; j < samples.size(); ++j) { + filter1.addSamples(weightStyle, TDouble1Vec(1, samples[j]), TDouble4Vec1Vec(1, TDouble4Vec(1, 2.0))); } - filter2.addSamples(weightStyle, - samples, - TDouble4Vec1Vec(samples.size(), TDouble4Vec(1, 2.0))); + filter2.addSamples(weightStyle, samples, TDouble4Vec1Vec(samples.size(), TDouble4Vec(1, 2.0))); LOG_DEBUG(filter1.print()); LOG_DEBUG("vs"); @@ -114,13 +104,11 @@ void CPoissonMeanConjugateTest::testMultipleUpdate() double x = 3.0; std::size_t count = 10; - for (std::size_t j = 0u; j < count; ++j) - { + for (std::size_t j = 0u; j < count; ++j) { filter1.addSamples(TDouble1Vec(1, x)); } - filter2.addSamples(maths::CConstantWeights::COUNT, - TDouble1Vec(1, x), - TDouble4Vec1Vec(1, TDouble4Vec(1, static_cast(count)))); + filter2.addSamples( + maths::CConstantWeights::COUNT, TDouble1Vec(1, x), TDouble4Vec1Vec(1, TDouble4Vec(1, static_cast(count)))); LOG_DEBUG(filter1.print()); LOG_DEBUG("vs"); @@ -130,8 +118,7 @@ void CPoissonMeanConjugateTest::testMultipleUpdate() } } -void CPoissonMeanConjugateTest::testPropagation() -{ +void CPoissonMeanConjugateTest::testPropagation() { LOG_DEBUG("+----------------------------------------------+"); LOG_DEBUG("| CPoissonMeanConjugateTest::testPropagation |"); LOG_DEBUG("+----------------------------------------------+"); @@ -146,11 +133,9 @@ void CPoissonMeanConjugateTest::testPropagation() TUIntVec samples; rng.generatePoissonSamples(1.0, 500, samples); - CPoissonMeanConjugate filter( - CPoissonMeanConjugate::nonInformativePrior(0.0, 0.1)); + CPoissonMeanConjugate filter(CPoissonMeanConjugate::nonInformativePrior(0.0, 0.1)); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { filter.addSamples(TDouble1Vec(1, static_cast(samples[i]))); } @@ -160,14 +145,12 @@ void CPoissonMeanConjugateTest::testPropagation() double propagatedMean = filter.marginalLikelihoodMean(); - LOG_DEBUG("mean = " << mean - << ", propagatedMean = " << propagatedMean); + LOG_DEBUG("mean = " << mean << ", propagatedMean = " << propagatedMean); CPPUNIT_ASSERT_DOUBLES_EQUAL(mean, propagatedMean, eps); } -void CPoissonMeanConjugateTest::testMeanEstimation() -{ +void CPoissonMeanConjugateTest::testMeanEstimation() { LOG_DEBUG("+-------------------------------------------------+"); LOG_DEBUG("| CPoissonMeanConjugateTest::testMeanEstimation |"); LOG_DEBUG("+-------------------------------------------------+"); @@ -177,68 +160,54 @@ void CPoissonMeanConjugateTest::testMeanEstimation() // mean of a Poisson process lies in various confidence intervals // the correct percentage of the times. - const double decayRates[] = { 0.0, 0.001, 0.01 }; + const double decayRates[] = {0.0, 0.001, 0.01}; const unsigned int nTests = 500u; - const double testIntervals[] = { 50.0, 60.0, 70.0, 80.0, 85.0, 90.0, 95.0, 99.0 }; + const double testIntervals[] = {50.0, 60.0, 70.0, 80.0, 85.0, 90.0, 95.0, 99.0}; - for (std::size_t i = 0; i < boost::size(decayRates); ++i) - { + for (std::size_t i = 0; i < boost::size(decayRates); ++i) { test::CRandomNumbers rng; - double errors[] = { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; + double errors[] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}; - for (unsigned int test = 0; test < nTests; ++test) - { + for (unsigned int test = 0; test < nTests; ++test) { double rate = test + 1; TUIntVec samples; rng.generatePoissonSamples(rate, 500, samples); - CPoissonMeanConjugate filter( - CPoissonMeanConjugate::nonInformativePrior(0.0, decayRates[i])); + CPoissonMeanConjugate filter(CPoissonMeanConjugate::nonInformativePrior(0.0, decayRates[i])); - for (std::size_t j = 0u; j < samples.size(); ++j) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { filter.addSamples(TDouble1Vec(1, static_cast(samples[j]))); filter.propagateForwardsByTime(1.0); } - for (std::size_t j = 0; j < boost::size(testIntervals); ++j) - { - TDoubleDoublePr confidenceInterval = - filter.meanConfidenceInterval(testIntervals[j]); + for (std::size_t j = 0; j < boost::size(testIntervals); ++j) { + TDoubleDoublePr confidenceInterval = filter.meanConfidenceInterval(testIntervals[j]); - if (rate < confidenceInterval.first || - rate > confidenceInterval.second) - { + if (rate < confidenceInterval.first || rate > confidenceInterval.second) { errors[j] += 1.0; } } } - for (std::size_t j = 0; j < boost::size(testIntervals); ++j) - { + for (std::size_t j = 0; j < boost::size(testIntervals); ++j) { double interval = 100.0 * errors[j] / static_cast(nTests); - LOG_DEBUG("interval = " << interval - << ", expectedInterval = " << (100.0 - testIntervals[j])); + LOG_DEBUG("interval = " << interval << ", expectedInterval = " << (100.0 - testIntervals[j])); // If the decay rate is zero the intervals should be accurate. // Otherwise, they should be an upper bound. - if (decayRates[i] == 0.0) - { + if (decayRates[i] == 0.0) { CPPUNIT_ASSERT_DOUBLES_EQUAL(interval, (100.0 - testIntervals[j]), 4.0); - } - else - { + } else { CPPUNIT_ASSERT(interval <= (100.0 - testIntervals[j])); } } } } -void CPoissonMeanConjugateTest::testMarginalLikelihood() -{ +void CPoissonMeanConjugateTest::testMarginalLikelihood() { LOG_DEBUG("+-----------------------------------------------------+"); LOG_DEBUG("| CPoissonMeanConjugateTest::testMarginalLikelihood |"); LOG_DEBUG("+-----------------------------------------------------+"); @@ -254,26 +223,21 @@ void CPoissonMeanConjugateTest::testMarginalLikelihood() const double epsilon = 1e-9; - const double decayRates[] = { 0.0, 0.001, 0.01 }; + const double decayRates[] = {0.0, 0.001, 0.01}; - for (std::size_t i = 0u; i < boost::size(decayRates); ++i) - { - CPoissonMeanConjugate filter( - CPoissonMeanConjugate::nonInformativePrior(0.0, decayRates[i])); + for (std::size_t i = 0u; i < boost::size(decayRates); ++i) { + CPoissonMeanConjugate filter(CPoissonMeanConjugate::nonInformativePrior(0.0, decayRates[i])); - for (std::size_t j = 0u; j < samples.size(); ++j) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { filter.addSamples(TDouble1Vec(1, static_cast(samples[j]))); filter.propagateForwardsByTime(1.0); } double cdf = 0.0; - for (unsigned int x = 0; x < 20; ++x) - { + for (unsigned int x = 0; x < 20; ++x) { double logLikelihood = 0.0; TDouble1Vec sample(1, static_cast(x)); - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, - filter.jointLogMarginalLikelihood(sample, logLikelihood)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter.jointLogMarginalLikelihood(sample, logLikelihood)); cdf += std::exp(logLikelihood); double lb, ub; @@ -281,9 +245,7 @@ void CPoissonMeanConjugateTest::testMarginalLikelihood() CPPUNIT_ASSERT_EQUAL(lb, ub); double minusLogCdf = (lb + ub) / 2.0; - LOG_DEBUG("sample = " << x - << ", -log(cdf) = " << (-std::log(cdf)) - << ", minusLogCdf = " << minusLogCdf); + LOG_DEBUG("sample = " << x << ", -log(cdf) = " << (-std::log(cdf)) << ", minusLogCdf = " << minusLogCdf); CPPUNIT_ASSERT_DOUBLES_EQUAL(minusLogCdf, -std::log(cdf), epsilon); CPPUNIT_ASSERT(minusLogCdf >= 0.0); @@ -294,19 +256,17 @@ void CPoissonMeanConjugateTest::testMarginalLikelihood() { // Now test a range of priors. - const double shapes[] = { 25.0, 80.0, 600.0, 1200.0 }; - const double rates[] = { 5.0, 4.0, 10.0, 3.0 }; + const double shapes[] = {25.0, 80.0, 600.0, 1200.0}; + const double rates[] = {5.0, 4.0, 10.0, 3.0}; CPPUNIT_ASSERT(boost::size(shapes) == boost::size(rates)); // We'll sample the c.d.f. at mean -2, -1, 0, 1 and 2 s.t.d. - const double sampleStds[] = { -2.0, -1.0, 0.0, 1.0, 2.0 }; + const double sampleStds[] = {-2.0, -1.0, 0.0, 1.0, 2.0}; - for (std::size_t i = 0; i < boost::size(shapes); ++i) - { + for (std::size_t i = 0; i < boost::size(shapes); ++i) { CPoissonMeanConjugate filter(maths::CPoissonMeanConjugate(0.0, shapes[i], rates[i])); - for (std::size_t j = 0; j < boost::size(sampleStds); ++j) - { + for (std::size_t j = 0; j < boost::size(sampleStds); ++j) { double mean = filter.marginalLikelihoodMean(); unsigned int sample = static_cast(mean + sampleStds[j] * std::sqrt(mean)); @@ -317,18 +277,15 @@ void CPoissonMeanConjugateTest::testMarginalLikelihood() CPPUNIT_ASSERT(minusLogCdf >= 0.0); double cdf = 0.0; - for (unsigned int x = 0; x <= sample; ++x) - { + for (unsigned int x = 0; x <= sample; ++x) { double logLikelihood = 0.0; CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, - filter.jointLogMarginalLikelihood(TDouble1Vec(1, static_cast(x)), - logLikelihood)); + filter.jointLogMarginalLikelihood(TDouble1Vec(1, static_cast(x)), logLikelihood)); cdf += std::exp(logLikelihood); cdf = std::min(cdf, 1.0); } - LOG_DEBUG("-log(cdf) = " << -std::log(cdf) - << ", minusLogCdf = " << minusLogCdf); + LOG_DEBUG("-log(cdf) = " << -std::log(cdf) << ", minusLogCdf = " << minusLogCdf); // We'll tolerate a 5% error in the -log(c.d.f.) since // we're approximating for large mean. @@ -356,34 +313,29 @@ void CPoissonMeanConjugateTest::testMarginalLikelihood() TUIntVec seedSamples; rng.generatePoissonSamples(rate, 100, seedSamples); - for (std::size_t i = 0u; i < seedSamples.size(); ++i) - { + for (std::size_t i = 0u; i < seedSamples.size(); ++i) { filter.addSamples(TDouble1Vec(1, static_cast(seedSamples[i]))); } TUIntVec samples; rng.generatePoissonSamples(rate, 5000, samples); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { TDouble1Vec sample(1, static_cast(samples[i])); filter.addSamples(sample); double logLikelihood = 0.0; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, - filter.jointLogMarginalLikelihood(sample, logLikelihood)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter.jointLogMarginalLikelihood(sample, logLikelihood)); differentialEntropy -= logLikelihood; } differentialEntropy /= static_cast(samples.size()); - LOG_DEBUG("differentialEntropy = " << differentialEntropy - << ", expectedDifferentialEntropy = " << expectedDifferentialEntropy); + LOG_DEBUG("differentialEntropy = " << differentialEntropy << ", expectedDifferentialEntropy = " << expectedDifferentialEntropy); CPPUNIT_ASSERT(std::fabs(differentialEntropy - expectedDifferentialEntropy) < 0.01); } } -void CPoissonMeanConjugateTest::testMarginalLikelihoodMode() -{ +void CPoissonMeanConjugateTest::testMarginalLikelihoodMode() { LOG_DEBUG("+---------------------------------------------------------+"); LOG_DEBUG("| CPoissonMeanConjugateTest::testMarginalLikelihoodMode |"); LOG_DEBUG("+---------------------------------------------------------+"); @@ -391,16 +343,12 @@ void CPoissonMeanConjugateTest::testMarginalLikelihoodMode() // Test that the marginal likelihood mode is what we'd expect // with variances variance scales. - const double rates[] = { 0.1, 5.0, 100.0 }; - const double varianceScales[] = - { - 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.2, 1.5, 2.0, 2.5, 3.0, 4.0, 5.0 - }; + const double rates[] = {0.1, 5.0, 100.0}; + const double varianceScales[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.2, 1.5, 2.0, 2.5, 3.0, 4.0, 5.0}; test::CRandomNumbers rng; - for (std::size_t i = 0u; i < boost::size(rates); ++i) - { + for (std::size_t i = 0u; i < boost::size(rates); ++i) { LOG_DEBUG("*** rate = " << rates[i] << " ***"); boost::math::poisson_distribution<> poisson(rates[i]); @@ -408,47 +356,40 @@ void CPoissonMeanConjugateTest::testMarginalLikelihoodMode() CPoissonMeanConjugate filter(CPoissonMeanConjugate::nonInformativePrior()); TUIntVec samples; rng.generatePoissonSamples(rates[i], 1000, samples); - for (std::size_t j = 0u; j < samples.size(); ++j) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { filter.addSamples(TDouble1Vec(1, static_cast(samples[j]))); } maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); TDouble4Vec weight(1, 1.0); - for (std::size_t j = 0u; j < boost::size(varianceScales); ++j) - { + for (std::size_t j = 0u; j < boost::size(varianceScales); ++j) { double vs = varianceScales[j]; weight[0] = vs; double expectedMode = boost::math::mode(poisson); LOG_DEBUG("marginalLikelihoodMode = " << filter.marginalLikelihoodMode(weightStyle, weight) - << ", expectedMode = " << expectedMode); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMode, - filter.marginalLikelihoodMode(weightStyle, weight), - 1.0); + << ", expectedMode = " << expectedMode); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMode, filter.marginalLikelihoodMode(weightStyle, weight), 1.0); } } } -void CPoissonMeanConjugateTest::testMarginalLikelihoodVariance() -{ +void CPoissonMeanConjugateTest::testMarginalLikelihoodVariance() { LOG_DEBUG("+-------------------------------------------------------------+"); LOG_DEBUG("| CPoissonMeanConjugateTest::testMarginalLikelihoodVariance |"); LOG_DEBUG("+-------------------------------------------------------------+"); - const double rates[] = { 0.1, 5.0, 100.0 }; + const double rates[] = {0.1, 5.0, 100.0}; test::CRandomNumbers rng; - for (std::size_t i = 0u; i < boost::size(rates); ++i) - { + for (std::size_t i = 0u; i < boost::size(rates); ++i) { LOG_DEBUG("*** rate = " << rates[i] << " ***"); CPoissonMeanConjugate filter(CPoissonMeanConjugate::nonInformativePrior()); TUIntVec seedSamples; rng.generatePoissonSamples(rates[i], 5, seedSamples); - for (std::size_t j = 0u; j < seedSamples.size(); ++j) - { + for (std::size_t j = 0u; j < seedSamples.size(); ++j) { filter.addSamples(TDouble1Vec(1, static_cast(seedSamples[j]))); } @@ -456,24 +397,19 @@ void CPoissonMeanConjugateTest::testMarginalLikelihoodVariance() rng.generatePoissonSamples(rates[i], 100, samples); TMeanAccumulator relativeError; - for (std::size_t j = 0u; j < samples.size(); ++j) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { filter.addSamples(TDouble1Vec(1, static_cast(samples[j]))); double expectedVariance; CPPUNIT_ASSERT(filter.marginalLikelihoodVarianceForTest(expectedVariance)); - if (j % 10 == 0) - { + if (j % 10 == 0) { LOG_DEBUG("marginalLikelihoodVariance = " << filter.marginalLikelihoodVariance() - << ", expectedVariance = " << expectedVariance); + << ", expectedVariance = " << expectedVariance); } // The error is at the precision of the numerical integration. - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedVariance, - filter.marginalLikelihoodVariance(), - 0.3 * expectedVariance); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedVariance, filter.marginalLikelihoodVariance(), 0.3 * expectedVariance); - relativeError.add(std::fabs(expectedVariance - filter.marginalLikelihoodVariance()) - / expectedVariance); + relativeError.add(std::fabs(expectedVariance - filter.marginalLikelihoodVariance()) / expectedVariance); } LOG_DEBUG("relativeError = " << maths::CBasicStatistics::mean(relativeError)); @@ -481,8 +417,7 @@ void CPoissonMeanConjugateTest::testMarginalLikelihoodVariance() } } -void CPoissonMeanConjugateTest::testSampleMarginalLikelihood() -{ +void CPoissonMeanConjugateTest::testSampleMarginalLikelihood() { LOG_DEBUG("+-----------------------------------------------------------+"); LOG_DEBUG("| CPoissonMeanConjugateTest::testSampleMarginalLikelihood |"); LOG_DEBUG("+-----------------------------------------------------------+"); @@ -496,12 +431,11 @@ void CPoissonMeanConjugateTest::testSampleMarginalLikelihood() // jointLogMarginalLikelihood and minusLogJointCdf so use these // to compute the mean and percentiles. - const double rates[] = { 5.0, 200.0 }; + const double rates[] = {5.0, 200.0}; const double eps = 1e-3; - for (std::size_t i = 0; i < boost::size(rates); ++i) - { + for (std::size_t i = 0; i < boost::size(rates); ++i) { test::CRandomNumbers rng; TUIntVec samples; @@ -514,8 +448,7 @@ void CPoissonMeanConjugateTest::testSampleMarginalLikelihood() TMeanAccumulator meanVarError; std::size_t numberSampled = 20u; - for (std::size_t j = 0u; j < samples.size(); ++j) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { filter.addSamples(TDouble1Vec(1, samples[j])); sampled.clear(); @@ -526,31 +459,26 @@ void CPoissonMeanConjugateTest::testSampleMarginalLikelihood() sampledMomemts = std::for_each(sampled.begin(), sampled.end(), sampledMomemts); LOG_DEBUG("expectedMean = " << filter.marginalLikelihoodMean() - << ", sampledMean = " << maths::CBasicStatistics::mean(sampledMomemts)); + << ", sampledMean = " << maths::CBasicStatistics::mean(sampledMomemts)); LOG_DEBUG("expectedMean = " << filter.marginalLikelihoodVariance() - << ", sampledVariance = " << maths::CBasicStatistics::variance(sampledMomemts)); + << ", sampledVariance = " << maths::CBasicStatistics::variance(sampledMomemts)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(filter.marginalLikelihoodMean(), - maths::CBasicStatistics::mean(sampledMomemts), - 1e-8); + CPPUNIT_ASSERT_DOUBLES_EQUAL(filter.marginalLikelihoodMean(), maths::CBasicStatistics::mean(sampledMomemts), 1e-8); CPPUNIT_ASSERT_DOUBLES_EQUAL(filter.marginalLikelihoodVariance(), maths::CBasicStatistics::variance(sampledMomemts), 0.15 * filter.marginalLikelihoodVariance()); - meanVarError.add( std::fabs( filter.marginalLikelihoodVariance() - - maths::CBasicStatistics::variance(sampledMomemts)) - / filter.marginalLikelihoodVariance()); + meanVarError.add(std::fabs(filter.marginalLikelihoodVariance() - maths::CBasicStatistics::variance(sampledMomemts)) / + filter.marginalLikelihoodVariance()); std::sort(sampled.begin(), sampled.end()); - for (std::size_t k = 3u; k < sampled.size(); ++k) - { + for (std::size_t k = 3u; k < sampled.size(); ++k) { double q = 100.0 * static_cast(k) / static_cast(numberSampled); double expectedQuantile; CPPUNIT_ASSERT(filter.marginalLikelihoodQuantileForTest(q, eps, expectedQuantile)); - LOG_DEBUG("quantile = " << q - << ", x_quantile = " << expectedQuantile - << ", quantile range = [" << sampled[k - 3] << "," << sampled[k] << "]"); + LOG_DEBUG("quantile = " << q << ", x_quantile = " << expectedQuantile << ", quantile range = [" << sampled[k - 3] << "," + << sampled[k] << "]"); // Because the c.d.f. function for discrete R.V.s includes // the value of the p.d.f. the interval that contains the @@ -570,8 +498,7 @@ void CPoissonMeanConjugateTest::testSampleMarginalLikelihood() } } -void CPoissonMeanConjugateTest::testCdf() -{ +void CPoissonMeanConjugateTest::testCdf() { LOG_DEBUG("+--------------------------------------+"); LOG_DEBUG("| CPoissonMeanConjugateTest::testCdf |"); LOG_DEBUG("+--------------------------------------+"); @@ -584,19 +511,17 @@ void CPoissonMeanConjugateTest::testCdf() // cdf complement x for x < 0 = 0 const double rate = 5.0; - const std::size_t n[] = { 20u, 80u }; + const std::size_t n[] = {20u, 80u}; test::CRandomNumbers rng; CPoissonMeanConjugate filter(CPoissonMeanConjugate::nonInformativePrior()); - for (std::size_t i = 0u; i < boost::size(n); ++i) - { + for (std::size_t i = 0u; i < boost::size(n); ++i) { TUIntVec samples; rng.generatePoissonSamples(rate, n[i], samples); - for (std::size_t j = 0u; j < samples.size(); ++j) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { filter.addSamples(TDouble1Vec(1, samples[j])); } @@ -608,13 +533,11 @@ void CPoissonMeanConjugateTest::testCdf() double f = (lb + ub) / 2.0; CPPUNIT_ASSERT(filter.minusLogJointCdfComplement(TDouble1Vec(1, -1.0), lb, ub)); double fComplement = (lb + ub) / 2.0; - LOG_DEBUG("log(F(x)) = " << -f - << ", log(1 - F(x)) = " << fComplement); + LOG_DEBUG("log(F(x)) = " << -f << ", log(1 - F(x)) = " << fComplement); CPPUNIT_ASSERT_DOUBLES_EQUAL(std::log(std::numeric_limits::min()), -f, 1e-10); CPPUNIT_ASSERT_EQUAL(1.0, std::exp(-fComplement)); - for (std::size_t j = 1u; j < 500; ++j) - { + for (std::size_t j = 1u; j < 500; ++j) { double x = static_cast(j) / 2.0; CPPUNIT_ASSERT(filter.minusLogJointCdf(TDouble1Vec(1, x), lb, ub)); @@ -622,15 +545,13 @@ void CPoissonMeanConjugateTest::testCdf() CPPUNIT_ASSERT(filter.minusLogJointCdfComplement(TDouble1Vec(1, x), lb, ub)); fComplement = (lb + ub) / 2.0; - LOG_DEBUG("log(F(x)) = " << (f == 0.0 ? f : -f) - << ", log(1 - F(x)) = " << (fComplement == 0.0 ? fComplement : -fComplement)); + LOG_DEBUG("log(F(x)) = " << (f == 0.0 ? f : -f) << ", log(1 - F(x)) = " << (fComplement == 0.0 ? fComplement : -fComplement)); CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, std::exp(-f) + std::exp(-fComplement), 1e-10); } } } -void CPoissonMeanConjugateTest::testProbabilityOfLessLikelySamples() -{ +void CPoissonMeanConjugateTest::testProbabilityOfLessLikelySamples() { LOG_DEBUG("+-----------------------------------------------------------------+"); LOG_DEBUG("| CPoissonMeanConjugateTest::testProbabilityOfLessLikelySamples |"); LOG_DEBUG("+-----------------------------------------------------------------+"); @@ -642,31 +563,28 @@ void CPoissonMeanConjugateTest::testProbabilityOfLessLikelySamples() // We also check that the tail calculation attributes samples to // the appropriate tail of the distribution. - const double rates[] = { 0.1, 10.0, 50.0 }; - const double vs[] = { 0.5, 1.0, 2.0 }; + const double rates[] = {0.1, 10.0, 50.0}; + const double vs[] = {0.5, 1.0, 2.0}; test::CRandomNumbers rng; TMeanAccumulator meanError; - for (size_t i = 0; i < boost::size(rates); ++i) - { + for (size_t i = 0; i < boost::size(rates); ++i) { LOG_DEBUG("rate = " << rates[i]); TUIntVec samples; rng.generatePoissonSamples(rates[i], 1000, samples); CPoissonMeanConjugate filter(CPoissonMeanConjugate::nonInformativePrior()); - for (std::size_t j = 0u; j < samples.size(); ++j) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { filter.addSamples(TDouble1Vec(1, static_cast(samples[j]))); } double mean = filter.priorMean(); TDoubleVec likelihoods; - for (std::size_t j = 0u; j < samples.size(); ++j) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { double likelihood; filter.jointLogMarginalLikelihood(TDouble1Vec(1, samples[j]), likelihood); likelihoods.push_back(likelihood); @@ -674,28 +592,22 @@ void CPoissonMeanConjugateTest::testProbabilityOfLessLikelySamples() std::sort(likelihoods.begin(), likelihoods.end()); boost::math::poisson_distribution<> poisson(mean); - for (std::size_t k = 1u; k < 10; ++k) - { + for (std::size_t k = 1u; k < 10; ++k) { double x = boost::math::quantile(poisson, static_cast(k) / 10.0); TDouble1Vec sample(1, x); double fx; filter.jointLogMarginalLikelihood(sample, fx); - double px = static_cast(std::upper_bound(likelihoods.begin(), - likelihoods.end(), fx) - - likelihoods.begin()) - / static_cast(likelihoods.size()); + double px = static_cast(std::upper_bound(likelihoods.begin(), likelihoods.end(), fx) - likelihoods.begin()) / + static_cast(likelihoods.size()); double lb, ub; filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, sample, lb, ub); double ssd = std::sqrt(px * (1.0 - px) / static_cast(samples.size())); - LOG_DEBUG("x = " << x - << ", expected P(x) = " << px - << ", actual P(x) = " << (lb + ub) / 2.0 - << " sample sd = " << ssd); + LOG_DEBUG("x = " << x << ", expected P(x) = " << px << ", actual P(x) = " << (lb + ub) / 2.0 << " sample sd = " << ssd); CPPUNIT_ASSERT_DOUBLES_EQUAL(px, (lb + ub) / 2.0, 8.0 * ssd); @@ -704,71 +616,63 @@ void CPoissonMeanConjugateTest::testProbabilityOfLessLikelySamples() maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); - for (std::size_t k = 0u; k < boost::size(vs); ++k) - { + for (std::size_t k = 0u; k < boost::size(vs); ++k) { double mode = filter.marginalLikelihoodMode(weightStyle, TDouble4Vec(1, vs[k])); - double ss[] = { 0.9 * mode, 1.1 * mode }; + double ss[] = {0.9 * mode, 1.1 * mode}; LOG_DEBUG("vs = " << vs[k] << ", mode = " << mode); double lb, ub; maths_t::ETail tail; - if (mode > 0.0) - { - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - weightStyle, - TDouble1Vec(1, ss[0]), - TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), - lb, ub, tail); + if (mode > 0.0) { + filter.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, weightStyle, TDouble1Vec(1, ss[0]), TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); - if (mode > 0.0) - { - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - weightStyle, - TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, ub, tail); + if (mode > 0.0) { + filter.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, weightStyle, TDouble1Vec(ss, ss + 2), TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); filter.probabilityOfLessLikelySamples(maths_t::E_OneSidedBelow, weightStyle, TDouble1Vec(ss, ss + 2), TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, ub, tail); + lb, + ub, + tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); filter.probabilityOfLessLikelySamples(maths_t::E_OneSidedAbove, weightStyle, TDouble1Vec(ss, ss + 2), TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, ub, tail); + lb, + ub, + tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } } - if (mode > 0.0) - { - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - weightStyle, - TDouble1Vec(1, ss[1]), - TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), - lb, ub, tail); + if (mode > 0.0) { + filter.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, weightStyle, TDouble1Vec(1, ss[1]), TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - weightStyle, - TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, ub, tail); + filter.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, weightStyle, TDouble1Vec(ss, ss + 2), TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); filter.probabilityOfLessLikelySamples(maths_t::E_OneSidedBelow, weightStyle, TDouble1Vec(ss, ss + 2), TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, ub, tail); + lb, + ub, + tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); filter.probabilityOfLessLikelySamples(maths_t::E_OneSidedAbove, weightStyle, TDouble1Vec(ss, ss + 2), TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, ub, tail); + lb, + ub, + tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } } @@ -778,8 +682,7 @@ void CPoissonMeanConjugateTest::testProbabilityOfLessLikelySamples() CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError) < 0.02); } -void CPoissonMeanConjugateTest::testAnomalyScore() -{ +void CPoissonMeanConjugateTest::testAnomalyScore() { LOG_DEBUG("+-----------------------------------------------+"); LOG_DEBUG("| CPoissonMeanConjugateTest::testAnomalyScore |"); LOG_DEBUG("+-----------------------------------------------+"); @@ -791,14 +694,14 @@ void CPoissonMeanConjugateTest::testAnomalyScore() // 1) high probability of detecting the anomalies, and // 2) a very low rate of false positives. - const double decayRates[] = { 0.0, 0.001, 0.1 }; + const double decayRates[] = {0.0, 0.001, 0.1}; - const double processRates[] = { 3.0, 15.0, 200.0 }; + const double processRates[] = {3.0, 15.0, 200.0}; const double threshold = 0.02; - const unsigned int anomalyTimes[] = { 30u, 120u, 300u, 420u }; - const double anomalies[] = { 4.0, 5.0, 10.0, 15.0, 0.0 }; + const unsigned int anomalyTimes[] = {30u, 120u, 300u, 420u}; + const double anomalies[] = {4.0, 5.0, 10.0, 15.0, 0.0}; test::CRandomNumbers rng; @@ -808,10 +711,9 @@ void CPoissonMeanConjugateTest::testAnomalyScore() file.open("results.m"); double totalFalsePositiveRate = 0.0; - std::size_t totalPositives[] = { 0u, 0u, 0u }; + std::size_t totalPositives[] = {0u, 0u, 0u}; - for (std::size_t i = 0; i < boost::size(processRates); ++i) - { + for (std::size_t i = 0; i < boost::size(processRates); ++i) { LOG_DEBUG("processRate = " << processRates[i]); boost::math::poisson_distribution<> poisson(processRates[i]); @@ -819,10 +721,8 @@ void CPoissonMeanConjugateTest::testAnomalyScore() TUIntVec samples; rng.generatePoissonSamples(processRates[i], 500, samples); - for (std::size_t j = 0; j < boost::size(decayRates); ++j) - { - CPoissonMeanConjugate filter( - CPoissonMeanConjugate::nonInformativePrior(0.0, decayRates[j])); + for (std::size_t j = 0; j < boost::size(decayRates); ++j) { + CPoissonMeanConjugate filter(CPoissonMeanConjugate::nonInformativePrior(0.0, decayRates[j])); ++test; @@ -832,21 +732,18 @@ void CPoissonMeanConjugateTest::testAnomalyScore() scores << "score" << test << " = ["; TUIntVec candidateAnomalies; - for (unsigned int time = 0; time < samples.size(); ++time) - { - double sample = samples[time] - + (anomalies[std::find(boost::begin(anomalyTimes), - boost::end(anomalyTimes), time) - - boost::begin(anomalyTimes)] - * boost::math::standard_deviation(poisson)); + for (unsigned int time = 0; time < samples.size(); ++time) { + double sample = + samples[time] + + (anomalies[std::find(boost::begin(anomalyTimes), boost::end(anomalyTimes), time) - boost::begin(anomalyTimes)] * + boost::math::standard_deviation(poisson)); TDouble1Vec sampleVec(1, sample); filter.addSamples(sampleVec); double score; filter.anomalyScore(maths_t::E_TwoSided, sampleVec, score); - if (score > threshold) - { + if (score > threshold) { candidateAnomalies.push_back(time); } @@ -858,8 +755,7 @@ void CPoissonMeanConjugateTest::testAnomalyScore() x << "];\n"; scores << "];\n"; - file << x.str() << scores.str() - << "plot(x" << test << ", score" << test << ");\n" + file << x.str() << scores.str() << "plot(x" << test << ", score" << test << ");\n" << "input(\"Hit any key for next test\");\n\n"; TUIntVec falsePositives; @@ -869,9 +765,7 @@ void CPoissonMeanConjugateTest::testAnomalyScore() boost::end(anomalyTimes), std::back_inserter(falsePositives)); - double falsePositiveRate = - static_cast(falsePositives.size()) - / static_cast(samples.size()); + double falsePositiveRate = static_cast(falsePositives.size()) / static_cast(samples.size()); totalFalsePositiveRate += falsePositiveRate; @@ -882,8 +776,7 @@ void CPoissonMeanConjugateTest::testAnomalyScore() boost::end(anomalyTimes), std::back_inserter(positives)); - LOG_DEBUG("falsePositiveRate = " << falsePositiveRate - << ", positives = " << positives.size()); + LOG_DEBUG("falsePositiveRate = " << falsePositiveRate << ", positives = " << positives.size()); // False alarm rate should be less than 0.4%. CPPUNIT_ASSERT(falsePositiveRate <= 0.02); @@ -898,8 +791,7 @@ void CPoissonMeanConjugateTest::testAnomalyScore() totalFalsePositiveRate /= static_cast(test); LOG_DEBUG("totalFalsePositiveRate = " << totalFalsePositiveRate); - for (std::size_t i = 0; i < boost::size(totalPositives); ++i) - { + for (std::size_t i = 0; i < boost::size(totalPositives); ++i) { LOG_DEBUG("positives = " << totalPositives[i]); // Should detect all but one anomaly. @@ -910,8 +802,7 @@ void CPoissonMeanConjugateTest::testAnomalyScore() CPPUNIT_ASSERT(totalFalsePositiveRate <= 0.004); } -void CPoissonMeanConjugateTest::testOffset() -{ +void CPoissonMeanConjugateTest::testOffset() { LOG_DEBUG("+-----------------------------------------+"); LOG_DEBUG("| CPoissonMeanConjugateTest::testOffset |"); LOG_DEBUG("+-----------------------------------------+"); @@ -919,8 +810,8 @@ void CPoissonMeanConjugateTest::testOffset() // The idea of this test is to check that the offset correctly cancels // out a translation applied to a log-normally distributed data set. - const double offsets[] = { -0.5, 0.5 }; - const double decayRates[] = { 0.0, 0.001, 0.01 }; + const double offsets[] = {-0.5, 0.5}; + const double decayRates[] = {0.0, 0.001, 0.01}; const double rate = 4.0; @@ -931,17 +822,12 @@ void CPoissonMeanConjugateTest::testOffset() TUIntVec samples; rng.generatePoissonSamples(rate, 100, samples); - for (std::size_t i = 0; i < boost::size(offsets); ++i) - { - for (std::size_t j = 0; j < boost::size(decayRates); ++j) - { - CPoissonMeanConjugate filter1( - CPoissonMeanConjugate::nonInformativePrior(offsets[i], decayRates[j])); - CPoissonMeanConjugate filter2( - CPoissonMeanConjugate::nonInformativePrior(0.0, decayRates[j])); - - for (std::size_t k = 0u; k < samples.size(); ++k) - { + for (std::size_t i = 0; i < boost::size(offsets); ++i) { + for (std::size_t j = 0; j < boost::size(decayRates); ++j) { + CPoissonMeanConjugate filter1(CPoissonMeanConjugate::nonInformativePrior(offsets[i], decayRates[j])); + CPoissonMeanConjugate filter2(CPoissonMeanConjugate::nonInformativePrior(0.0, decayRates[j])); + + for (std::size_t k = 0u; k < samples.size(); ++k) { TDouble1Vec offsetSample(1, samples[k] - offsets[i]); filter1.addSamples(offsetSample); filter1.propagateForwardsByTime(1.0); @@ -976,8 +862,7 @@ void CPoissonMeanConjugateTest::testOffset() } } -void CPoissonMeanConjugateTest::testPersist() -{ +void CPoissonMeanConjugateTest::testPersist() { LOG_DEBUG("+------------------------------------------+"); LOG_DEBUG("| CPoissonMeanConjugateTest::testPersist |"); LOG_DEBUG("+------------------------------------------+"); @@ -990,11 +875,9 @@ void CPoissonMeanConjugateTest::testPersist() rng.generatePoissonSamples(rate, 100, samples); maths::CPoissonMeanConjugate origFilter(CPoissonMeanConjugate::nonInformativePrior()); - for (std::size_t i = 0u; i < samples.size(); ++i) - { - origFilter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, samples[i]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0))); + for (std::size_t i = 0u; i < samples.size(); ++i) { + origFilter.addSamples( + maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), TDouble1Vec(1, samples[i]), TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0))); } double decayRate = origFilter.decayRate(); uint64_t checksum = origFilter.checksum(); @@ -1020,8 +903,7 @@ void CPoissonMeanConjugateTest::testPersist() maths::MINIMUM_CATEGORY_COUNT); maths::CPoissonMeanConjugate restoredFilter(params, traverser); - LOG_DEBUG("orig checksum = " << checksum - << " restored checksum = " << restoredFilter.checksum()); + LOG_DEBUG("orig checksum = " << checksum << " restored checksum = " << restoredFilter.checksum()); CPPUNIT_ASSERT_EQUAL(checksum, restoredFilter.checksum()); // The XML representation of the new filter should be the same @@ -1035,8 +917,7 @@ void CPoissonMeanConjugateTest::testPersist() CPPUNIT_ASSERT_EQUAL(origXml, newXml); } -void CPoissonMeanConjugateTest::testNegativeSample() -{ +void CPoissonMeanConjugateTest::testNegativeSample() { LOG_DEBUG("+-------------------------------------------------+"); LOG_DEBUG("| CPoissonMeanConjugateTest::testNegativeSample |"); LOG_DEBUG("+-------------------------------------------------+"); @@ -1055,8 +936,7 @@ void CPoissonMeanConjugateTest::testNegativeSample() rng.generatePoissonSamples(rate, 100, samples_); TDoubleVec samples; samples.reserve(samples_.size()); - for (std::size_t i = 0u; i < samples_.size(); ++i) - { + for (std::size_t i = 0u; i < samples_.size(); ++i) { samples.push_back(static_cast(samples_[i])); } @@ -1077,50 +957,35 @@ void CPoissonMeanConjugateTest::testNegativeSample() CPPUNIT_ASSERT(filter1.equalTolerance(filter2, equal)); } -CppUnit::Test *CPoissonMeanConjugateTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CPoissonMeanConjugateTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CPoissonMeanConjugateTest::testMultipleUpdate", - &CPoissonMeanConjugateTest::testMultipleUpdate) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CPoissonMeanConjugateTest::testPropagation", - &CPoissonMeanConjugateTest::testPropagation) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CPoissonMeanConjugateTest::testMeanEstimation", - &CPoissonMeanConjugateTest::testMeanEstimation) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CPoissonMeanConjugateTest::testMarginalLikelihood", - &CPoissonMeanConjugateTest::testMarginalLikelihood) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CPoissonMeanConjugateTest::testMarginalLikelihoodMode", - &CPoissonMeanConjugateTest::testMarginalLikelihoodMode) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CPoissonMeanConjugateTest::testMarginalLikelihoodVariance", - &CPoissonMeanConjugateTest::testMarginalLikelihoodVariance) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CPoissonMeanConjugateTest::testSampleMarginalLikelihood", - &CPoissonMeanConjugateTest::testSampleMarginalLikelihood) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CPoissonMeanConjugateTest::testCdf", - &CPoissonMeanConjugateTest::testCdf) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CPoissonMeanConjugateTest::testProbabilityOfLessLikelySamples", - &CPoissonMeanConjugateTest::testProbabilityOfLessLikelySamples) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CPoissonMeanConjugateTest::testAnomalyScore", - &CPoissonMeanConjugateTest::testAnomalyScore) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CPoissonMeanConjugateTest::testOffset", - &CPoissonMeanConjugateTest::testOffset) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CPoissonMeanConjugateTest::testPersist", - &CPoissonMeanConjugateTest::testPersist) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CPoissonMeanConjugateTest::testNegativeSample", - &CPoissonMeanConjugateTest::testNegativeSample) ); +CppUnit::Test* CPoissonMeanConjugateTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CPoissonMeanConjugateTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CPoissonMeanConjugateTest::testMultipleUpdate", + &CPoissonMeanConjugateTest::testMultipleUpdate)); + suiteOfTests->addTest(new CppUnit::TestCaller("CPoissonMeanConjugateTest::testPropagation", + &CPoissonMeanConjugateTest::testPropagation)); + suiteOfTests->addTest(new CppUnit::TestCaller("CPoissonMeanConjugateTest::testMeanEstimation", + &CPoissonMeanConjugateTest::testMeanEstimation)); + suiteOfTests->addTest(new CppUnit::TestCaller("CPoissonMeanConjugateTest::testMarginalLikelihood", + &CPoissonMeanConjugateTest::testMarginalLikelihood)); + suiteOfTests->addTest(new CppUnit::TestCaller("CPoissonMeanConjugateTest::testMarginalLikelihoodMode", + &CPoissonMeanConjugateTest::testMarginalLikelihoodMode)); + suiteOfTests->addTest(new CppUnit::TestCaller("CPoissonMeanConjugateTest::testMarginalLikelihoodVariance", + &CPoissonMeanConjugateTest::testMarginalLikelihoodVariance)); + suiteOfTests->addTest(new CppUnit::TestCaller("CPoissonMeanConjugateTest::testSampleMarginalLikelihood", + &CPoissonMeanConjugateTest::testSampleMarginalLikelihood)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CPoissonMeanConjugateTest::testCdf", &CPoissonMeanConjugateTest::testCdf)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CPoissonMeanConjugateTest::testProbabilityOfLessLikelySamples", &CPoissonMeanConjugateTest::testProbabilityOfLessLikelySamples)); + suiteOfTests->addTest(new CppUnit::TestCaller("CPoissonMeanConjugateTest::testAnomalyScore", + &CPoissonMeanConjugateTest::testAnomalyScore)); + suiteOfTests->addTest(new CppUnit::TestCaller("CPoissonMeanConjugateTest::testOffset", + &CPoissonMeanConjugateTest::testOffset)); + suiteOfTests->addTest(new CppUnit::TestCaller("CPoissonMeanConjugateTest::testPersist", + &CPoissonMeanConjugateTest::testPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller("CPoissonMeanConjugateTest::testNegativeSample", + &CPoissonMeanConjugateTest::testNegativeSample)); return suiteOfTests; } - diff --git a/lib/maths/unittest/CPoissonMeanConjugateTest.h b/lib/maths/unittest/CPoissonMeanConjugateTest.h index c85584f790..8a302249f9 100644 --- a/lib/maths/unittest/CPoissonMeanConjugateTest.h +++ b/lib/maths/unittest/CPoissonMeanConjugateTest.h @@ -9,25 +9,23 @@ #include +class CPoissonMeanConjugateTest : public CppUnit::TestFixture { +public: + void testMultipleUpdate(); + void testPropagation(); + void testMeanEstimation(); + void testMarginalLikelihood(); + void testMarginalLikelihoodMode(); + void testMarginalLikelihoodVariance(); + void testSampleMarginalLikelihood(); + void testCdf(); + void testProbabilityOfLessLikelySamples(); + void testAnomalyScore(); + void testOffset(); + void testPersist(); + void testNegativeSample(); -class CPoissonMeanConjugateTest : public CppUnit::TestFixture -{ - public: - void testMultipleUpdate(); - void testPropagation(); - void testMeanEstimation(); - void testMarginalLikelihood(); - void testMarginalLikelihoodMode(); - void testMarginalLikelihoodVariance(); - void testSampleMarginalLikelihood(); - void testCdf(); - void testProbabilityOfLessLikelySamples(); - void testAnomalyScore(); - void testOffset(); - void testPersist(); - void testNegativeSample(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CPoissonMeanConjugateTest_h diff --git a/lib/maths/unittest/CPriorTest.cc b/lib/maths/unittest/CPriorTest.cc index 15c0631053..7646a87e9a 100644 --- a/lib/maths/unittest/CPriorTest.cc +++ b/lib/maths/unittest/CPriorTest.cc @@ -10,11 +10,11 @@ #include #include -#include -#include -#include #include +#include #include +#include +#include #include #include @@ -26,72 +26,55 @@ using namespace ml; using namespace handy_typedefs; -namespace -{ +namespace { using TDoubleVec = std::vector; -class CX -{ - public: - bool operator()(const double &x, double &result) const - { - result = x; - return true; - } +class CX { +public: + bool operator()(const double& x, double& result) const { + result = x; + return true; + } }; -class CVariance -{ - public: - CVariance(const double mean) : m_Mean(mean) {} +class CVariance { +public: + CVariance(const double mean) : m_Mean(mean) {} - bool operator()(const double &x, double &result) const - { - result = (x - m_Mean) * (x - m_Mean); - return true; - } + bool operator()(const double& x, double& result) const { + result = (x - m_Mean) * (x - m_Mean); + return true; + } - private: - double m_Mean; +private: + double m_Mean; }; -class CMinusLogLikelihood -{ - public: - using TDoubleVecVec = std::vector; - - public: - CMinusLogLikelihood(const maths::CPrior &prior) : - m_Prior(&prior), - m_WeightStyle(1, maths_t::E_SampleCountWeight), - m_X(1, 0.0), - m_Weight(1, TDoubleVec(1, 1.0)) - {} - - bool operator()(const double &x, double &result) const - { - m_X[0] = x; - maths_t::EFloatingPointErrorStatus status = - m_Prior->jointLogMarginalLikelihood(m_WeightStyle, - m_X, - m_Weight, - result); - result = -result; - return !(status & maths_t::E_FpFailed); - } - - private: - const maths::CPrior *m_Prior; - maths_t::TWeightStyleVec m_WeightStyle; - mutable TDoubleVec m_X; - TDoubleVecVec m_Weight; -}; +class CMinusLogLikelihood { +public: + using TDoubleVecVec = std::vector; + +public: + CMinusLogLikelihood(const maths::CPrior& prior) + : m_Prior(&prior), m_WeightStyle(1, maths_t::E_SampleCountWeight), m_X(1, 0.0), m_Weight(1, TDoubleVec(1, 1.0)) {} + bool operator()(const double& x, double& result) const { + m_X[0] = x; + maths_t::EFloatingPointErrorStatus status = m_Prior->jointLogMarginalLikelihood(m_WeightStyle, m_X, m_Weight, result); + result = -result; + return !(status & maths_t::E_FpFailed); + } + +private: + const maths::CPrior* m_Prior; + maths_t::TWeightStyleVec m_WeightStyle; + mutable TDoubleVec m_X; + TDoubleVecVec m_Weight; +}; } -void CPriorTest::testExpectation() -{ +void CPriorTest::testExpectation() { LOG_DEBUG("+-------------------------------+"); LOG_DEBUG("| CPriorTest::testExpectation |"); LOG_DEBUG("+-------------------------------+"); @@ -101,8 +84,7 @@ void CPriorTest::testExpectation() test::CRandomNumbers rng; - CNormalMeanPrecConjugate prior( - maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData)); + CNormalMeanPrecConjugate prior(maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData)); TDoubleVec samples; rng.generateNormalSamples(1.0, 1.5, 10000u, samples); @@ -113,57 +95,39 @@ void CPriorTest::testExpectation() double trueMean = maths::CBasicStatistics::mean(moments); LOG_DEBUG("true mean = " << trueMean); - for (std::size_t n = 1; n < 10; ++n) - { + for (std::size_t n = 1; n < 10; ++n) { double mean; CPPUNIT_ASSERT(prior.expectation(CX(), n, mean)); - LOG_DEBUG("n = " << n - << ", mean = " << mean - << ", error = " << std::fabs(mean - trueMean)); + LOG_DEBUG("n = " << n << ", mean = " << mean << ", error = " << std::fabs(mean - trueMean)); CPPUNIT_ASSERT_DOUBLES_EQUAL(trueMean, mean, 1e-10); } - double varianceErrors[] = - { - 1.4, 0.1, 0.05, 0.01, 0.005, 0.0008, 0.0008, 0.0007, 0.0005 - }; + double varianceErrors[] = {1.4, 0.1, 0.05, 0.01, 0.005, 0.0008, 0.0008, 0.0007, 0.0005}; double trueVariance = maths::CBasicStatistics::variance(moments); LOG_DEBUG("true variance = " << trueVariance); - for (std::size_t n = 1; n < 10; ++n) - { + for (std::size_t n = 1; n < 10; ++n) { double variance; CPPUNIT_ASSERT(prior.expectation(CVariance(prior.mean()), n, variance)); - LOG_DEBUG("n = " << n - << ", variance = " << variance - << ", error = " << std::fabs(variance - trueVariance)); + LOG_DEBUG("n = " << n << ", variance = " << variance << ", error = " << std::fabs(variance - trueVariance)); CPPUNIT_ASSERT_DOUBLES_EQUAL(trueVariance, variance, varianceErrors[n - 1]); } - double entropyErrors[] = - { - 0.5, 0.05, 0.01, 0.005, 0.001, 0.0003, 0.0003, 0.0002, 0.0002 - }; + double entropyErrors[] = {0.5, 0.05, 0.01, 0.005, 0.001, 0.0003, 0.0003, 0.0002, 0.0002}; boost::math::normal_distribution<> normal(trueMean, std::sqrt(trueVariance)); double trueEntropy = maths::CTools::differentialEntropy(normal); LOG_DEBUG("true differential entropy = " << trueEntropy); - for (std::size_t n = 1; n < 10; ++n) - { + for (std::size_t n = 1; n < 10; ++n) { double entropy; CPPUNIT_ASSERT(prior.expectation(CMinusLogLikelihood(prior), n, entropy)); - LOG_DEBUG("n = " << n - << ", differential entropy = " << entropy - << ", error = " << std::fabs(entropy - trueEntropy)); + LOG_DEBUG("n = " << n << ", differential entropy = " << entropy << ", error = " << std::fabs(entropy - trueEntropy)); CPPUNIT_ASSERT_DOUBLES_EQUAL(trueEntropy, entropy, entropyErrors[n - 1]); } } -CppUnit::Test* CPriorTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CPriorTest"); +CppUnit::Test* CPriorTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CPriorTest"); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CPriorTest::testExpectation", - &CPriorTest::testExpectation) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CPriorTest::testExpectation", &CPriorTest::testExpectation)); return suiteOfTests; } diff --git a/lib/maths/unittest/CPriorTest.h b/lib/maths/unittest/CPriorTest.h index 0a51f47c53..d0cdf88d6f 100644 --- a/lib/maths/unittest/CPriorTest.h +++ b/lib/maths/unittest/CPriorTest.h @@ -9,12 +9,11 @@ #include -class CPriorTest : public CppUnit::TestFixture -{ - public: - void testExpectation(); +class CPriorTest : public CppUnit::TestFixture { +public: + void testExpectation(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CPriorTest_h diff --git a/lib/maths/unittest/CProbabilityAggregatorsTest.cc b/lib/maths/unittest/CProbabilityAggregatorsTest.cc index 6ba48e26f0..efa5f524c1 100644 --- a/lib/maths/unittest/CProbabilityAggregatorsTest.cc +++ b/lib/maths/unittest/CProbabilityAggregatorsTest.cc @@ -6,8 +6,8 @@ #include "CProbabilityAggregatorsTest.h" -#include #include +#include #include #include @@ -23,34 +23,26 @@ using namespace ml; using namespace maths; using namespace test; -namespace -{ +namespace { using TDoubleVec = std::vector; -class CGammaKernel -{ - public: - CGammaKernel(const double &s, const double &x) : - m_S(s), m_X(x) - { - } +class CGammaKernel { +public: + CGammaKernel(const double& s, const double& x) : m_S(s), m_X(x) {} - bool operator()(const double &u, double &result) const - { - result = std::pow(m_X - std::log(1.0 - u/m_S), m_S - 1.0); - return true; - } + bool operator()(const double& u, double& result) const { + result = std::pow(m_X - std::log(1.0 - u / m_S), m_S - 1.0); + return true; + } - private: - double m_S; - double m_X; +private: + double m_S; + double m_X; }; -double logUpperIncompleteGamma(double s, double x) -{ - if (s <= 1.0) - { +double logUpperIncompleteGamma(double s, double x) { + if (s <= 1.0) { // We want to evaluate: // Int_u=x,inf{ (u^(s-1) * exp(-u) }du // @@ -66,8 +58,7 @@ double logUpperIncompleteGamma(double s, double x) CGammaKernel kernel(s, x); int n = 40; - for (int i = 0; i < n; ++i) - { + for (int i = 0; i < n; ++i) { double a = s * static_cast(i) / static_cast(n); double b = s * (static_cast(i) + 1.0) / static_cast(n); double partialRemainder; @@ -88,81 +79,61 @@ double logUpperIncompleteGamma(double s, double x) return normalizer + std::log(std::exp(t1 - normalizer) + std::exp(t2 - normalizer)); } -class CExpectedLogProbabilityOfMFromNExtremeSamples -{ - public: - using TMinValueAccumulator = CBasicStatistics::COrderStatisticsHeap; - - class CLogIntegrand - { - public: - CLogIntegrand(const TDoubleVec &limits, - std::size_t n, - std::size_t m, - std::size_t i) : - m_Limits(limits), m_N(n), m_M(m), m_I(i) - { - } - - bool operator()(double x, double &result) const - { - result = this->evaluate(x); - return true; - } - - private: - double evaluate(double x) const - { - if (m_I == m_M) - { - return static_cast(m_N - m_M) * std::log(1.0 - x); - } - double result; - CLogIntegrand f(m_Limits, m_N, m_M, m_I + 1u); - CIntegration::logGaussLegendre(f, x, m_Limits[m_I], result); - return result; - } - - TDoubleVec m_Limits; - std::size_t m_N; - std::size_t m_M; - std::size_t m_I; - }; +class CExpectedLogProbabilityOfMFromNExtremeSamples { +public: + using TMinValueAccumulator = CBasicStatistics::COrderStatisticsHeap; + class CLogIntegrand { public: - CExpectedLogProbabilityOfMFromNExtremeSamples(std::size_t m) : - m_P(m), - m_N(0u) - { - } + CLogIntegrand(const TDoubleVec& limits, std::size_t n, std::size_t m, std::size_t i) : m_Limits(limits), m_N(n), m_M(m), m_I(i) {} - void add(const double &probability) - { - m_P.add(probability); - ++m_N; + bool operator()(double x, double& result) const { + result = this->evaluate(x); + return true; } - double calculate() - { + private: + double evaluate(double x) const { + if (m_I == m_M) { + return static_cast(m_N - m_M) * std::log(1.0 - x); + } double result; - m_P.sort(); - TDoubleVec p(m_P.begin(), m_P.end()); - CLogIntegrand f(p, m_N, p.size(), 1u); - CIntegration::logGaussLegendre(f, 0, p[0], result); - result += boost::math::lgamma(static_cast(m_N) + 1.0) - - boost::math::lgamma(static_cast(m_N - p.size()) + 1.0); + CLogIntegrand f(m_Limits, m_N, m_M, m_I + 1u); + CIntegration::logGaussLegendre(f, x, m_Limits[m_I], result); return result; } - private: - TMinValueAccumulator m_P; + TDoubleVec m_Limits; std::size_t m_N; -}; + std::size_t m_M; + std::size_t m_I; + }; + +public: + CExpectedLogProbabilityOfMFromNExtremeSamples(std::size_t m) : m_P(m), m_N(0u) {} + + void add(const double& probability) { + m_P.add(probability); + ++m_N; + } + double calculate() { + double result; + m_P.sort(); + TDoubleVec p(m_P.begin(), m_P.end()); + CLogIntegrand f(p, m_N, p.size(), 1u); + CIntegration::logGaussLegendre(f, 0, p[0], result); + result += boost::math::lgamma(static_cast(m_N) + 1.0) - boost::math::lgamma(static_cast(m_N - p.size()) + 1.0); + return result; + } + +private: + TMinValueAccumulator m_P; + std::size_t m_N; +}; } -void CProbabilityAggregatorsTest::testJointProbabilityOfLessLikelySamples() -{ +void CProbabilityAggregatorsTest::testJointProbabilityOfLessLikelySamples() { LOG_DEBUG("+------------------------------------------------------------------------+"); LOG_DEBUG("| CProbabilityAggregatorsTest::testJointProbabilityOfLessLikelySamples |"); LOG_DEBUG("+------------------------------------------------------------------------+"); @@ -189,7 +160,7 @@ void CProbabilityAggregatorsTest::testJointProbabilityOfLessLikelySamples() { const unsigned int numberSamples = 20000u; - const double percentiles[] = { 0.02, 0.1, 0.3, 0.5 }; + const double percentiles[] = {0.02, 0.1, 0.3, 0.5}; TDoubleVec samples1; rng.generateNormalSamples(1.0, 3.0, numberSamples, samples1); @@ -206,26 +177,16 @@ void CProbabilityAggregatorsTest::testJointProbabilityOfLessLikelySamples() double totalExpectedCount = 0.0; double totalCount = 0.0; - for (size_t i = 0; i < boost::size(percentiles); ++i) - { - for (size_t j = 0; j < boost::size(percentiles); ++j) - { - for (size_t k = 0; k < boost::size(percentiles); ++k) - { - LOG_DEBUG("percentile1 = " << percentiles[i] - << ", percentile2 = " << percentiles[j] - << ", percentile3 = " << percentiles[k]); - - double probabilities[] = - { - 2.0 * percentiles[i], - 2.0 * percentiles[j], - 2.0 * percentiles[k] - }; + for (size_t i = 0; i < boost::size(percentiles); ++i) { + for (size_t j = 0; j < boost::size(percentiles); ++j) { + for (size_t k = 0; k < boost::size(percentiles); ++k) { + LOG_DEBUG("percentile1 = " << percentiles[i] << ", percentile2 = " << percentiles[j] + << ", percentile3 = " << percentiles[k]); + + double probabilities[] = {2.0 * percentiles[i], 2.0 * percentiles[j], 2.0 * percentiles[k]}; CJointProbabilityOfLessLikelySamples jointProbability; - for (size_t l = 0; l < boost::size(probabilities); ++l) - { + for (size_t l = 0; l < boost::size(probabilities); ++l) { LOG_DEBUG("probability = " << probabilities[l]); jointProbability.add(probabilities[l]); } @@ -239,17 +200,13 @@ void CProbabilityAggregatorsTest::testJointProbabilityOfLessLikelySamples() double quantile1 = boost::math::quantile(normal1, percentiles[i]); double quantile2 = boost::math::quantile(normal2, percentiles[j]); double quantile3 = boost::math::quantile(normal3, percentiles[k]); - double likelihood = CTools::safePdf(normal1, quantile1) - * CTools::safePdf(normal2, quantile2) - * CTools::safePdf(normal3, quantile3); - - for (unsigned int sample = 0; sample < numberSamples; ++sample) - { - double sampleLikelihood = CTools::safePdf(normal1, samples1[sample]) - * CTools::safePdf(normal2, samples2[sample]) - * CTools::safePdf(normal3, samples3[sample]); - if (sampleLikelihood < likelihood) - { + double likelihood = + CTools::safePdf(normal1, quantile1) * CTools::safePdf(normal2, quantile2) * CTools::safePdf(normal3, quantile3); + + for (unsigned int sample = 0; sample < numberSamples; ++sample) { + double sampleLikelihood = CTools::safePdf(normal1, samples1[sample]) * CTools::safePdf(normal2, samples2[sample]) * + CTools::safePdf(normal3, samples3[sample]); + if (sampleLikelihood < likelihood) { count += 1.0; } } @@ -265,8 +222,7 @@ void CProbabilityAggregatorsTest::testJointProbabilityOfLessLikelySamples() } } - double totalError = std::fabs(totalCount - totalExpectedCount) - / std::max(totalCount, totalExpectedCount); + double totalError = std::fabs(totalCount - totalExpectedCount) / std::max(totalCount, totalExpectedCount); LOG_DEBUG("totalError = " << totalError); CPPUNIT_ASSERT(totalError < 0.01); } @@ -276,8 +232,7 @@ void CProbabilityAggregatorsTest::testJointProbabilityOfLessLikelySamples() rng.generateUniformSamples(0.0, 1.0, 100u, probabilities); std::fill_n(std::back_inserter(probabilities), 5u, 1e-4); CJointProbabilityOfLessLikelySamples expectedJointProbability; - for (std::size_t i = 0u; i < probabilities.size(); ++i) - { + for (std::size_t i = 0u; i < probabilities.size(); ++i) { expectedJointProbability.add(probabilities[i]); double p; @@ -292,18 +247,14 @@ void CProbabilityAggregatorsTest::testJointProbabilityOfLessLikelySamples() double probability; CPPUNIT_ASSERT(jointProbability.calculate(probability)); - LOG_DEBUG("probability = " << probability - << ", expectedProbability = " << expectedProbability); + LOG_DEBUG("probability = " << probability << ", expectedProbability = " << expectedProbability); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedProbability, - probability, - 1e-5 * expectedProbability); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedProbability, probability, 1e-5 * expectedProbability); } } } -void CProbabilityAggregatorsTest::testLogJointProbabilityOfLessLikelySamples() -{ +void CProbabilityAggregatorsTest::testLogJointProbabilityOfLessLikelySamples() { LOG_DEBUG("+---------------------------------------------------------------------------+"); LOG_DEBUG("| CProbabilityAggregatorsTest::testLogJointProbabilityOfLessLikelySamples |"); LOG_DEBUG("+---------------------------------------------------------------------------+"); @@ -317,8 +268,7 @@ void CProbabilityAggregatorsTest::testLogJointProbabilityOfLessLikelySamples() CLogJointProbabilityOfLessLikelySamples logJointProbability; std::string line; - while (std::getline(ifs, line)) - { + while (std::getline(ifs, line)) { double probability; CPPUNIT_ASSERT(ml::core::CStringUtils::stringToType(line, probability)); logJointProbability.add(probability); @@ -334,8 +284,7 @@ void CProbabilityAggregatorsTest::testLogJointProbabilityOfLessLikelySamples() double lowerBound, upperBound; CPPUNIT_ASSERT(logJointProbability.calculateLowerBound(lowerBound)); CPPUNIT_ASSERT(logJointProbability.calculateUpperBound(upperBound)); - LOG_DEBUG("log(pu) - log(p) = " << upperBound - logP - << ", log(p) - log(pl) " << logP - lowerBound); + LOG_DEBUG("log(pu) - log(p) = " << upperBound - logP << ", log(p) - log(pl) " << logP - lowerBound); CPPUNIT_ASSERT(logP < upperBound); CPPUNIT_ASSERT(logP > lowerBound); @@ -346,11 +295,10 @@ void CProbabilityAggregatorsTest::testLogJointProbabilityOfLessLikelySamples() // Now test the quality of bounds near underflow. { - const double p[] = { 1e-1, 1e-2, 1e-3, 1e-4 }; - const double expectedErrors[] = { 7.7e-4, 2.6e-4, 2e-4, 1.7e-4 }; + const double p[] = {1e-1, 1e-2, 1e-3, 1e-4}; + const double expectedErrors[] = {7.7e-4, 2.6e-4, 2e-4, 1.7e-4}; - for (size_t i = 0; i < boost::size(p); ++i) - { + for (size_t i = 0; i < boost::size(p); ++i) { LOG_DEBUG("p = " << p[i]); CJointProbabilityOfLessLikelySamples jointProbability; @@ -359,10 +307,8 @@ void CProbabilityAggregatorsTest::testLogJointProbabilityOfLessLikelySamples() double error = 0.0; int count = 0; - for (;;) - { - if (count >= 20) - { + for (;;) { + if (count >= 20) { break; } jointProbability.add(p[i]); @@ -370,8 +316,7 @@ void CProbabilityAggregatorsTest::testLogJointProbabilityOfLessLikelySamples() double probability; CPPUNIT_ASSERT(jointProbability.calculate(probability)); - if (probability < 10.0 * std::numeric_limits::min()) - { + if (probability < 10.0 * std::numeric_limits::min()) { ++count; double s = jointProbability.numberSamples() / 2.0; @@ -384,8 +329,7 @@ void CProbabilityAggregatorsTest::testLogJointProbabilityOfLessLikelySamples() double lowerBound, upperBound; CPPUNIT_ASSERT(logJointProbability.calculateLowerBound(lowerBound)); CPPUNIT_ASSERT(logJointProbability.calculateUpperBound(upperBound)); - LOG_DEBUG("log(pu) - log(p) = " << upperBound - logP - << ", log(p) - log(pl) " << logP - lowerBound); + LOG_DEBUG("log(pu) - log(p) = " << upperBound - logP << ", log(p) - log(pl) " << logP - lowerBound); CPPUNIT_ASSERT(logP < upperBound); CPPUNIT_ASSERT(logP > lowerBound); @@ -393,9 +337,7 @@ void CProbabilityAggregatorsTest::testLogJointProbabilityOfLessLikelySamples() CPPUNIT_ASSERT_DOUBLES_EQUAL(upperBound, lowerBound, std::fabs(8e-4 * upperBound)); error += (upperBound - lowerBound) / std::fabs(upperBound); - } - else if (jointProbability.numberSamples() > 1.0) - { + } else if (jointProbability.numberSamples() > 1.0) { double s = jointProbability.numberSamples() / 2.0; double x = jointProbability.distance() / 2.0; @@ -420,8 +362,7 @@ void CProbabilityAggregatorsTest::testLogJointProbabilityOfLessLikelySamples() } } -void CProbabilityAggregatorsTest::testProbabilityOfExtremeSample() -{ +void CProbabilityAggregatorsTest::testProbabilityOfExtremeSample() { LOG_DEBUG("+-------------------------------------------------------------+"); LOG_DEBUG("| CProbabilityAggregatorsTest::testProbabilityExtremeSample |"); LOG_DEBUG("+-------------------------------------------------------------+"); @@ -429,34 +370,19 @@ void CProbabilityAggregatorsTest::testProbabilityOfExtremeSample() // The idea of this test is to check that the extreme sample // probability is correctly predicted. - std::size_t sampleSizes[] = - { - 2u, - 20u, - 1500u - }; - - double probabilities[] = - { - 0.1, - 0.05, - 0.01, - 0.001, - 0.000001 - }; + std::size_t sampleSizes[] = {2u, 20u, 1500u}; + + double probabilities[] = {0.1, 0.05, 0.01, 0.001, 0.000001}; CRandomNumbers rng; double totalError = 0.0; double totalProbability = 0.0; - for (size_t i = 0; i < boost::size(sampleSizes); ++i) - { - for (size_t j = 0; j < boost::size(probabilities); ++j) - { + for (size_t i = 0; i < boost::size(sampleSizes); ++i) { + for (size_t j = 0; j < boost::size(probabilities); ++j) { CProbabilityOfExtremeSample probabilityCalculator; - for (std::size_t k = 0u; k < sampleSizes[i]; ++k) - { + for (std::size_t k = 0u; k < sampleSizes[i]; ++k) { // Add on a small positive number to make sure we are // sampling the minimum probability. double noise = static_cast(k % 20) / 50.0; @@ -466,15 +392,13 @@ void CProbabilityAggregatorsTest::testProbabilityOfExtremeSample() double probability; CPPUNIT_ASSERT(probabilityCalculator.calculate(probability)); - LOG_DEBUG("sample size = " << sampleSizes[i] - << ", extreme sample probability = " << probabilities[j] - << ", probability = " << probability); + LOG_DEBUG("sample size = " << sampleSizes[i] << ", extreme sample probability = " << probabilities[j] + << ", probability = " << probability); unsigned int nTrials = 10000u; unsigned int count = 0; - for (unsigned int k = 0; k < nTrials; ++k) - { + for (unsigned int k = 0; k < nTrials; ++k) { TDoubleVec samples; rng.generateNormalSamples(0.0, 1.0, sampleSizes[i], samples); boost::math::normal_distribution<> normal(0.0, std::sqrt(1.0)); @@ -482,23 +406,19 @@ void CProbabilityAggregatorsTest::testProbabilityOfExtremeSample() using TMinValue = CBasicStatistics::COrderStatisticsStack; TMinValue minValue; - for (std::size_t l = 0u; l < samples.size(); ++l) - { + for (std::size_t l = 0u; l < samples.size(); ++l) { double p = 2.0 * boost::math::cdf(normal, -std::fabs(samples[l])); minValue.add(p); } - if (minValue[0] < probabilities[j]) - { + if (minValue[0] < probabilities[j]) { ++count; } } - double expectedProbability = static_cast(count) - / static_cast(nTrials); - LOG_DEBUG("count = " << count - << ", expectedProbability = " << expectedProbability - << ", error = " << std::fabs(probability - expectedProbability)); + double expectedProbability = static_cast(count) / static_cast(nTrials); + LOG_DEBUG("count = " << count << ", expectedProbability = " << expectedProbability + << ", error = " << std::fabs(probability - expectedProbability)); CPPUNIT_ASSERT_DOUBLES_EQUAL(probability, expectedProbability, 0.012); @@ -507,13 +427,11 @@ void CProbabilityAggregatorsTest::testProbabilityOfExtremeSample() } } - LOG_DEBUG("totalError = " << totalError - << ", totalProbability = " << totalProbability); + LOG_DEBUG("totalError = " << totalError << ", totalProbability = " << totalProbability); CPPUNIT_ASSERT(totalError / totalProbability < 0.01); } -void CProbabilityAggregatorsTest::testProbabilityOfMFromNExtremeSamples() -{ +void CProbabilityAggregatorsTest::testProbabilityOfMFromNExtremeSamples() { LOG_DEBUG("+----------------------------------------------------------------------+"); LOG_DEBUG("| CProbabilityAggregatorsTest::testProbabilityOfMFromNExtremeSamples |"); LOG_DEBUG("+----------------------------------------------------------------------+"); @@ -533,18 +451,13 @@ void CProbabilityAggregatorsTest::testProbabilityOfMFromNExtremeSamples() // 10) Underflow of numerical integration. { - double probabilities[] = - { - 0.5, 0.5, 0.4, 0.02, 0.7, 0.9, 0.4, 0.2, 0.03, 0.5, 0.6 - }; + double probabilities[] = {0.5, 0.5, 0.4, 0.02, 0.7, 0.9, 0.4, 0.2, 0.03, 0.5, 0.6}; - for (std::size_t i = 1u; i < 6u; ++i) - { + for (std::size_t i = 1u; i < 6u; ++i) { CExpectedLogProbabilityOfMFromNExtremeSamples expectedProbabilityCalculator(i); CLogProbabilityOfMFromNExtremeSamples probabilityCalculator(i); - for (std::size_t j = 0; j < boost::size(probabilities); ++j) - { + for (std::size_t j = 0; j < boost::size(probabilities); ++j) { expectedProbabilityCalculator.add(probabilities[j]); probabilityCalculator.add(probabilities[j]); } @@ -553,18 +466,14 @@ void CProbabilityAggregatorsTest::testProbabilityOfMFromNExtremeSamples() double p2; CPPUNIT_ASSERT(probabilityCalculator.calculate(p2)); - LOG_DEBUG("log(probability) = " << p2 - << ", expected log(probability) = " << p1); + LOG_DEBUG("log(probability) = " << p2 << ", expected log(probability) = " << p1); CPPUNIT_ASSERT_DOUBLES_EQUAL(p1, p2, 1e-8 * std::fabs(std::max(p1, p2))); } } { - double probabilities[] = - { - 0.0001, 0.005, 0.01, 0.1, 0.2 - }; + double probabilities[] = {0.0001, 0.005, 0.01, 0.1, 0.2}; std::size_t numberProbabilities = boost::size(probabilities); @@ -572,44 +481,36 @@ void CProbabilityAggregatorsTest::testProbabilityOfMFromNExtremeSamples() CRandomNumbers rng; - for (std::size_t i = 2; i < 4; ++i) - { + for (std::size_t i = 2; i < 4; ++i) { CPPUNIT_ASSERT(i <= numberProbabilities); using TSizeVec = std::vector; TSizeVec index(i, 0); - for (std::size_t j = 1; j < i; ++j) - { + for (std::size_t j = 1; j < i; ++j) { index[j] = j; } TSizeVec lastIndex(i, 0); - for (std::size_t j = 0; j < i; ++j) - { + for (std::size_t j = 0; j < i; ++j) { lastIndex[j] = numberProbabilities - i + j; } double totalError = 0.0; double totalProbability = 0.0; - for (;;) - { + for (;;) { TDoubleVec extremeSampleProbabilities; - for (std::size_t j = 0u; j < index.size(); ++j) - { + for (std::size_t j = 0u; j < index.size(); ++j) { extremeSampleProbabilities.push_back(probabilities[index[j]]); } - LOG_DEBUG("extreme samples probabilities = " - << core::CContainerPrinter::print(extremeSampleProbabilities)); + LOG_DEBUG("extreme samples probabilities = " << core::CContainerPrinter::print(extremeSampleProbabilities)); CLogProbabilityOfMFromNExtremeSamples probabilityCalculator(i); - for (std::size_t j = 0u; j < index.size(); ++j) - { + for (std::size_t j = 0u; j < index.size(); ++j) { probabilityCalculator.add(probabilities[index[j]]); } - for (std::size_t j = 0u; j < numberSamples - index.size(); ++j) - { + for (std::size_t j = 0u; j < numberSamples - index.size(); ++j) { probabilityCalculator.add(0.3); } @@ -620,8 +521,7 @@ void CProbabilityAggregatorsTest::testProbabilityOfMFromNExtremeSamples() unsigned int nTrials = 50000u; unsigned int count = 0; - for (unsigned int j = 0; j < nTrials; ++j) - { + for (unsigned int j = 0; j < nTrials; ++j) { TDoubleVec samples; rng.generateNormalSamples(0.0, 1.0, numberSamples, samples); boost::math::normal_distribution<> normal(0.0, std::sqrt(1.0)); @@ -629,8 +529,7 @@ void CProbabilityAggregatorsTest::testProbabilityOfMFromNExtremeSamples() using TMinValues = CBasicStatistics::COrderStatisticsHeap; TMinValues minValues(i); - for (std::size_t k = 0u; k < samples.size(); ++k) - { + for (std::size_t k = 0u; k < samples.size(); ++k) { double p1 = 2.0 * boost::math::cdf(normal, -std::fabs(samples[k])); minValues.add(p1); } @@ -638,45 +537,36 @@ void CProbabilityAggregatorsTest::testProbabilityOfMFromNExtremeSamples() ++count; minValues.sort(); - for (size_t k = 0; k < i; ++k) - { - if (minValues[k] > probabilities[index[k]]) - { + for (size_t k = 0; k < i; ++k) { + if (minValues[k] > probabilities[index[k]]) { --count; break; } } } - double expectedProbability = static_cast(count) - / static_cast(nTrials); + double expectedProbability = static_cast(count) / static_cast(nTrials); double error = std::fabs(p - expectedProbability); double relativeError = error / std::max(p, expectedProbability); - LOG_DEBUG("probability = " << p - << ", expectedProbability = " << expectedProbability - << ", error = " << error - << ", relative error = " << relativeError); + LOG_DEBUG("probability = " << p << ", expectedProbability = " << expectedProbability << ", error = " << error + << ", relative error = " << relativeError); CPPUNIT_ASSERT(relativeError < 0.33); totalError += error; totalProbability += std::max(p, expectedProbability); - if (index >= lastIndex) - { + if (index >= lastIndex) { break; } - for (std::size_t j = i; j > 0; --j) - { - if (index[j - 1] < numberProbabilities + j - i - 1) - { + for (std::size_t j = i; j > 0; --j) { + if (index[j - 1] < numberProbabilities + j - i - 1) { std::size_t next = ++index[j - 1]; - for (++j, ++next; j < i + 1; ++j, ++next) - { + for (++j, ++next; j < i + 1; ++j, ++next) { index[j - 1] = std::min(next, numberProbabilities - 1); } break; @@ -684,37 +574,45 @@ void CProbabilityAggregatorsTest::testProbabilityOfMFromNExtremeSamples() } } - LOG_DEBUG("totalError = " << totalError - << ", totalRelativeError = " << (totalError / totalProbability)); + LOG_DEBUG("totalError = " << totalError << ", totalRelativeError = " << (totalError / totalProbability)); CPPUNIT_ASSERT(totalError < 0.01 * totalProbability); } } { - double probabilities[] = - { - 1.90005e-6, 2.09343e-5, 2.36102e-5, 2.36102e-4, 3.21197e-4, - 0.104481, 0.311476, 0.46037, 0.958691, 0.144973, - 0.345924, 0.111316, 0.346185, 0.993074, 0.0902145, - 0.0902145, 0.673371, 0.346075, 0.346025 - }; + double probabilities[] = {1.90005e-6, + 2.09343e-5, + 2.36102e-5, + 2.36102e-4, + 3.21197e-4, + 0.104481, + 0.311476, + 0.46037, + 0.958691, + 0.144973, + 0.345924, + 0.111316, + 0.346185, + 0.993074, + 0.0902145, + 0.0902145, + 0.673371, + 0.346075, + 0.346025}; std::size_t n = boost::size(probabilities); - std::size_t numberSamples[] = { n, 10*n, 1000*n }; + std::size_t numberSamples[] = {n, 10 * n, 1000 * n}; - for (std::size_t i = 1u; i < 6; ++i) - { + for (std::size_t i = 1u; i < 6; ++i) { LOG_DEBUG("M = " << i); - for (std::size_t j = 0; j < boost::size(numberSamples); ++j) - { + for (std::size_t j = 0; j < boost::size(numberSamples); ++j) { LOG_DEBUG("N = " << numberSamples[j]); CExpectedLogProbabilityOfMFromNExtremeSamples expectedProbabilityCalculator(i); CLogProbabilityOfMFromNExtremeSamples probabilityCalculator(i); - for (std::size_t k = 0; k < numberSamples[j]; ++k) - { + for (std::size_t k = 0; k < numberSamples[j]; ++k) { expectedProbabilityCalculator.add(probabilities[k % n]); probabilityCalculator.add(probabilities[k % n]); } @@ -723,44 +621,32 @@ void CProbabilityAggregatorsTest::testProbabilityOfMFromNExtremeSamples() double p2; CPPUNIT_ASSERT(probabilityCalculator.calculate(p2)); - LOG_DEBUG("log(probability) = " << p2 - << ", expected log(probability) = " << p1); + LOG_DEBUG("log(probability) = " << p2 << ", expected log(probability) = " << p1); CPPUNIT_ASSERT_DOUBLES_EQUAL(p1, p2, 1e-4 * std::fabs(std::max(p1, p2))); } } } { - double probabilities[] = - { - 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, - 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, - 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, - 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, - 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, - 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, - 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, - 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, - 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, - 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, - 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, - 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, - 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, - 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, - 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, - 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, - 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, - 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, - 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, - 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, - 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, - 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, - 0.9917012 - }; + double probabilities[] = { + 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, + 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, + 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, + 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, + 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, + 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, + 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, + 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, + 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, + 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, + 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, + 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, + 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, + 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, + 0.9917012}; CLogProbabilityOfMFromNExtremeSamples probabilityCalculator(5); - for (std::size_t i = 0u; i < boost::size(probabilities); ++i) - { + for (std::size_t i = 0u; i < boost::size(probabilities); ++i) { probabilityCalculator.add(probabilities[i]); } @@ -772,8 +658,7 @@ void CProbabilityAggregatorsTest::testProbabilityOfMFromNExtremeSamples() { CLogProbabilityOfMFromNExtremeSamples probabilityCalculator(5); - for (std::size_t i = 0u; i < 10; ++i) - { + for (std::size_t i = 0u; i < 10; ++i) { probabilityCalculator.add(1.0 - 1e-10); } @@ -786,8 +671,7 @@ void CProbabilityAggregatorsTest::testProbabilityOfMFromNExtremeSamples() { CExpectedLogProbabilityOfMFromNExtremeSamples expectedProbabilityCalculator(3); CLogProbabilityOfMFromNExtremeSamples probabilityCalculator(3); - for (std::size_t i = 0u; i < 10; ++i) - { + for (std::size_t i = 0u; i < 10; ++i) { expectedProbabilityCalculator.add(maths::CTools::smallestProbability()); probabilityCalculator.add(maths::CTools::smallestProbability()); } @@ -806,21 +690,12 @@ void CProbabilityAggregatorsTest::testProbabilityOfMFromNExtremeSamples() CExpectedLogProbabilityOfMFromNExtremeSamples expectedProbabilityCalculator(5); CLogProbabilityOfMFromNExtremeSamples probabilityCalculator(5); - double pmin[] = - { - 0.004703117, - 0.05059556, - 1.0 - std::numeric_limits::epsilon(), - 1.0, - 1.0 - }; - for (std::size_t i = 0; i < boost::size(pmin); ++i) - { + double pmin[] = {0.004703117, 0.05059556, 1.0 - std::numeric_limits::epsilon(), 1.0, 1.0}; + for (std::size_t i = 0; i < boost::size(pmin); ++i) { probabilityCalculator.add(pmin[i]); expectedProbabilityCalculator.add(pmin[i]); } - for (std::size_t i = boost::size(pmin); i < 22; ++i) - { + for (std::size_t i = boost::size(pmin); i < 22; ++i) { probabilityCalculator.add(1.0); expectedProbabilityCalculator.add(1.0); } @@ -833,7 +708,7 @@ void CProbabilityAggregatorsTest::testProbabilityOfMFromNExtremeSamples() } { - double p[] = { 0.000234811, 1-2e-16, 1-1.5e-16 }; + double p[] = {0.000234811, 1 - 2e-16, 1 - 1.5e-16}; CExpectedLogProbabilityOfMFromNExtremeSamples expectedProbabilityCalculator(3); CLogProbabilityOfMFromNExtremeSamples probabilityCalculator(3); @@ -845,8 +720,7 @@ void CProbabilityAggregatorsTest::testProbabilityOfMFromNExtremeSamples() probabilityCalculator.add(p[1]); probabilityCalculator.add(p[2]); - for (std::size_t i = 0u; i < 19; ++i) - { + for (std::size_t i = 0u; i < 19; ++i) { expectedProbabilityCalculator.add(1.0); probabilityCalculator.add(1.0); } @@ -859,21 +733,16 @@ void CProbabilityAggregatorsTest::testProbabilityOfMFromNExtremeSamples() } { - double probabilities[] = - { - 0.08528782661735056, 0.3246988524001009, 0.5428693993904167, 0.9999999999999999, 0.9999999999999999 - }; + double probabilities[] = {0.08528782661735056, 0.3246988524001009, 0.5428693993904167, 0.9999999999999999, 0.9999999999999999}; CExpectedLogProbabilityOfMFromNExtremeSamples expectedProbabilityCalculator(5); CLogProbabilityOfMFromNExtremeSamples probabilityCalculator(5); - for (std::size_t i = 0u; i < boost::size(probabilities); ++i) - { + for (std::size_t i = 0u; i < boost::size(probabilities); ++i) { expectedProbabilityCalculator.add(probabilities[i]); probabilityCalculator.add(probabilities[i]); } - for (std::size_t i = 0u; i < 19; ++i) - { + for (std::size_t i = 0u; i < 19; ++i) { expectedProbabilityCalculator.add(1.0); probabilityCalculator.add(1.0); } @@ -886,18 +755,13 @@ void CProbabilityAggregatorsTest::testProbabilityOfMFromNExtremeSamples() } { - double probabilities[] = - { - 3.622684004911715e-76, 3.622684004911715e-76, 0.1534837115755979, 0.1608058997234747, 0.5143979767475618 - }; + double probabilities[] = {3.622684004911715e-76, 3.622684004911715e-76, 0.1534837115755979, 0.1608058997234747, 0.5143979767475618}; CLogProbabilityOfMFromNExtremeSamples probabilityCalculator(5); - for (std::size_t i = 0; i < 21402; ++i) - { + for (std::size_t i = 0; i < 21402; ++i) { probabilityCalculator.add(1.0); } - for (std::size_t i = 0; i < 5; ++i) - { + for (std::size_t i = 0; i < 5; ++i) { probabilityCalculator.add(probabilities[i]); } double p1 = -306.072; @@ -908,22 +772,20 @@ void CProbabilityAggregatorsTest::testProbabilityOfMFromNExtremeSamples() } } -CppUnit::Test *CProbabilityAggregatorsTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CProbabilityAggregatorsTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CProbabilityAggregatorsTest::testJointProbabilityOfLessLikelySamples", - &CProbabilityAggregatorsTest::testJointProbabilityOfLessLikelySamples) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CProbabilityAggregatorsTest::testLogJointProbabilityOfLessLikelySamples", - &CProbabilityAggregatorsTest::testLogJointProbabilityOfLessLikelySamples) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CProbabilityAggregatorsTest::testProbabilityOfExtremeSample", - &CProbabilityAggregatorsTest::testProbabilityOfExtremeSample) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CProbabilityAggregatorsTest::testProbabilityOfMFromNExtremeSamples", - &CProbabilityAggregatorsTest::testProbabilityOfMFromNExtremeSamples) ); +CppUnit::Test* CProbabilityAggregatorsTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CProbabilityAggregatorsTest"); + + suiteOfTests->addTest( + new CppUnit::TestCaller("CProbabilityAggregatorsTest::testJointProbabilityOfLessLikelySamples", + &CProbabilityAggregatorsTest::testJointProbabilityOfLessLikelySamples)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CProbabilityAggregatorsTest::testLogJointProbabilityOfLessLikelySamples", + &CProbabilityAggregatorsTest::testLogJointProbabilityOfLessLikelySamples)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CProbabilityAggregatorsTest::testProbabilityOfExtremeSample", &CProbabilityAggregatorsTest::testProbabilityOfExtremeSample)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CProbabilityAggregatorsTest::testProbabilityOfMFromNExtremeSamples", + &CProbabilityAggregatorsTest::testProbabilityOfMFromNExtremeSamples)); return suiteOfTests; } diff --git a/lib/maths/unittest/CProbabilityAggregatorsTest.h b/lib/maths/unittest/CProbabilityAggregatorsTest.h index 4f87bdc377..3acd499141 100644 --- a/lib/maths/unittest/CProbabilityAggregatorsTest.h +++ b/lib/maths/unittest/CProbabilityAggregatorsTest.h @@ -9,15 +9,14 @@ #include -class CProbabilityAggregatorsTest : public CppUnit::TestFixture -{ - public: - void testJointProbabilityOfLessLikelySamples(); - void testLogJointProbabilityOfLessLikelySamples(); - void testProbabilityOfExtremeSample(); - void testProbabilityOfMFromNExtremeSamples(); +class CProbabilityAggregatorsTest : public CppUnit::TestFixture { +public: + void testJointProbabilityOfLessLikelySamples(); + void testLogJointProbabilityOfLessLikelySamples(); + void testProbabilityOfExtremeSample(); + void testProbabilityOfMFromNExtremeSamples(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CProbabilityAggregatorsTest_h diff --git a/lib/maths/unittest/CProbabilityCalibratorTest.cc b/lib/maths/unittest/CProbabilityCalibratorTest.cc index a057655a77..336fb0b667 100644 --- a/lib/maths/unittest/CProbabilityCalibratorTest.cc +++ b/lib/maths/unittest/CProbabilityCalibratorTest.cc @@ -23,8 +23,7 @@ using namespace ml; -void CProbabilityCalibratorTest::testCalibration() -{ +void CProbabilityCalibratorTest::testCalibration() { LOG_DEBUG("+-----------------------------------------------+"); LOG_DEBUG("| CProbabilityCalibratorTest::testCalibration |"); LOG_DEBUG("+-----------------------------------------------+"); @@ -37,11 +36,8 @@ void CProbabilityCalibratorTest::testCalibration() // probabilities, i.e. fitting a normal a log-normal and multi- // modal distributions. - maths::CProbabilityCalibrator::EStyle styles[] = - { - maths::CProbabilityCalibrator::E_PartialCalibration, - maths::CProbabilityCalibrator::E_FullCalibration - }; + maths::CProbabilityCalibrator::EStyle styles[] = {maths::CProbabilityCalibrator::E_PartialCalibration, + maths::CProbabilityCalibrator::E_FullCalibration}; test::CRandomNumbers rng; @@ -50,25 +46,21 @@ void CProbabilityCalibratorTest::testCalibration() TDoubleVec samples; rng.generateLogNormalSamples(2.0, 0.9, 5000u, samples); - double improvements[] = { 0.03, 0.07 }; - double maxImprovements[] = { 0.01, 0.9 }; + double improvements[] = {0.03, 0.07}; + double maxImprovements[] = {0.01, 0.9}; - for (std::size_t i = 0u; i < boost::size(styles); ++i) - { + for (std::size_t i = 0u; i < boost::size(styles); ++i) { maths::CProbabilityCalibrator calibrator(styles[i], 0.99); - CNormalMeanPrecConjugate normal = - CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); - CLogNormalMeanPrecConjugate lognormal = - CLogNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); + CNormalMeanPrecConjugate normal = CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); + CLogNormalMeanPrecConjugate lognormal = CLogNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); double rawError = 0.0; double calibratedError = 0.0; double maxRawError = 0.0; double maxCalibratedError = 0.0; - for (std::size_t j = 0u; j < samples.size(); ++j) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { TDoubleVec sample(1u, samples[j]); normal.addSamples(sample); lognormal.addSamples(sample); @@ -77,11 +69,7 @@ void CProbabilityCalibratorTest::testCalibration() double upperBound; double rawProbability = 1.0; - if (normal.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - sample, - lowerBound, - upperBound)) - { + if (normal.probabilityOfLessLikelySamples(maths_t::E_TwoSided, sample, lowerBound, upperBound)) { rawProbability = (lowerBound + upperBound) / 2.0; } @@ -89,11 +77,7 @@ void CProbabilityCalibratorTest::testCalibration() double calibratedProbability = calibrator.calibrate(rawProbability); double trueProbability = 1.0; - if (lognormal.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - sample, - lowerBound, - upperBound)) - { + if (lognormal.probabilityOfLessLikelySamples(maths_t::E_TwoSided, sample, lowerBound, upperBound)) { trueProbability = (lowerBound + upperBound) / 2.0; } @@ -106,10 +90,8 @@ void CProbabilityCalibratorTest::testCalibration() maxCalibratedError = std::max(maxCalibratedError, calibrated); } - LOG_DEBUG("totalRawError = " << rawError - << ", maxRawError = " << maxRawError); - LOG_DEBUG("totalCalibratedError = " << calibratedError - << ", maxCalibratedError = " << maxCalibratedError); + LOG_DEBUG("totalRawError = " << rawError << ", maxRawError = " << maxRawError); + LOG_DEBUG("totalCalibratedError = " << calibratedError << ", maxCalibratedError = " << maxCalibratedError); CPPUNIT_ASSERT((rawError - calibratedError) / rawError > improvements[i]); CPPUNIT_ASSERT((maxRawError - maxCalibratedError) / maxRawError > maxImprovements[i]); } @@ -127,41 +109,32 @@ void CProbabilityCalibratorTest::testCalibration() rng.random_shuffle(samples.begin(), samples.end()); - double improvements[] = { 0.18, 0.19 }; - double maxImprovements[] = { 0.0, -0.04 }; + double improvements[] = {0.18, 0.19}; + double maxImprovements[] = {0.0, -0.04}; - for (std::size_t i = 0u; i < boost::size(styles); ++i) - { + for (std::size_t i = 0u; i < boost::size(styles); ++i) { maths::CProbabilityCalibrator calibrator(styles[i], 0.99); - CNormalMeanPrecConjugate normal = - CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); - CNormalMeanPrecConjugate normal1 = - CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); - CNormalMeanPrecConjugate normal2 = - CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); + CNormalMeanPrecConjugate normal = CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); + CNormalMeanPrecConjugate normal1 = CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); + CNormalMeanPrecConjugate normal2 = CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); double rawError = 0.0; double calibratedError = 0.0; double maxRawError = 0.0; double maxCalibratedError = 0.0; - for (std::size_t j = 0u; j < samples.size(); ++j) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { TDoubleVec sample(1u, samples[j]); normal.addSamples(sample); - CNormalMeanPrecConjugate &mode = samples[j] < 10.0 ? normal1 : normal2; + CNormalMeanPrecConjugate& mode = samples[j] < 10.0 ? normal1 : normal2; mode.addSamples(sample); double lowerBound; double upperBound; double rawProbability = 1.0; - if (normal.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - sample, - lowerBound, - upperBound)) - { + if (normal.probabilityOfLessLikelySamples(maths_t::E_TwoSided, sample, lowerBound, upperBound)) { rawProbability = (lowerBound + upperBound) / 2.0; } @@ -169,11 +142,7 @@ void CProbabilityCalibratorTest::testCalibration() double calibratedProbability = calibrator.calibrate(rawProbability); double trueProbability = 1.0; - if (mode.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - sample, - lowerBound, - upperBound)) - { + if (mode.probabilityOfLessLikelySamples(maths_t::E_TwoSided, sample, lowerBound, upperBound)) { trueProbability = (lowerBound + upperBound) / 2.0; } @@ -186,23 +155,19 @@ void CProbabilityCalibratorTest::testCalibration() maxCalibratedError = std::max(maxCalibratedError, calibrated); } - LOG_DEBUG("totalRawError = " << rawError - << ", maxRawError = " << maxRawError); - LOG_DEBUG("totalCalibratedError = " << calibratedError - << ", maxCalibratedError = " << maxCalibratedError); + LOG_DEBUG("totalRawError = " << rawError << ", maxRawError = " << maxRawError); + LOG_DEBUG("totalCalibratedError = " << calibratedError << ", maxCalibratedError = " << maxCalibratedError); CPPUNIT_ASSERT((rawError - calibratedError) / rawError >= improvements[i]); CPPUNIT_ASSERT((maxRawError - maxCalibratedError) / maxRawError >= maxImprovements[i]); } } } -CppUnit::Test *CProbabilityCalibratorTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CProbabilityCalibratorTest"); +CppUnit::Test* CProbabilityCalibratorTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CProbabilityCalibratorTest"); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CProbabilityCalibratorTest::testCalibration", - &CProbabilityCalibratorTest::testCalibration) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CProbabilityCalibratorTest::testCalibration", + &CProbabilityCalibratorTest::testCalibration)); return suiteOfTests; } diff --git a/lib/maths/unittest/CProbabilityCalibratorTest.h b/lib/maths/unittest/CProbabilityCalibratorTest.h index 930ba4c5cf..60f40e70a5 100644 --- a/lib/maths/unittest/CProbabilityCalibratorTest.h +++ b/lib/maths/unittest/CProbabilityCalibratorTest.h @@ -9,13 +9,11 @@ #include +class CProbabilityCalibratorTest : public CppUnit::TestFixture { +public: + void testCalibration(); -class CProbabilityCalibratorTest : public CppUnit::TestFixture -{ - public: - void testCalibration(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CProbabilityCalibratorTest_h diff --git a/lib/maths/unittest/CQDigestTest.cc b/lib/maths/unittest/CQDigestTest.cc index 5a6358d533..4a58187903 100644 --- a/lib/maths/unittest/CQDigestTest.cc +++ b/lib/maths/unittest/CQDigestTest.cc @@ -30,8 +30,7 @@ using TDoubleVec = std::vector; using TUInt32UInt64Pr = std::pair; using TUInt32UInt64PrVec = std::vector; -void CQDigestTest::testAdd() -{ +void CQDigestTest::testAdd() { LOG_DEBUG("+-------------------------+"); LOG_DEBUG("| CQDigestTest::testAdd |"); LOG_DEBUG("+-------------------------+"); @@ -43,8 +42,7 @@ void CQDigestTest::testAdd() // All one value. CQDigest qDigest(10u); - for (std::size_t i = 0u; i < 50u; ++i) - { + for (std::size_t i = 0u; i < 50u; ++i) { qDigest.add(5); } @@ -59,17 +57,15 @@ void CQDigestTest::testAdd() // Less than or equal k unique values. CQDigest qDigest(5u); - std::string expectedDigests[] = - { - std::string("1 | 5 | { \"[0,0],1,1\" \"[0,1],0,1\" }"), - std::string("2 | 5 | { \"[0,0],1,1\" \"[1,1],1,1\" \"[0,1],0,2\" }"), - std::string("3 | 5 | { \"[0,0],1,1\" \"[1,1],1,1\" \"[2,2],1,1\" \"[0,3],0,3\" }"), - std::string("4 | 5 | { \"[0,0],1,1\" \"[1,1],1,1\" \"[2,2],1,1\" \"[3,3],1,1\" \"[0,3],0,4\" }"), - std::string("5 | 5 | { \"[0,0],1,1\" \"[1,1],1,1\" \"[2,2],1,1\" \"[3,3],1,1\" \"[4,4],1,1\" \"[0,7],0,5\" }"), - }; - - for (std::size_t i = 0u; i < 5u; ++i) - { + std::string expectedDigests[] = { + std::string("1 | 5 | { \"[0,0],1,1\" \"[0,1],0,1\" }"), + std::string("2 | 5 | { \"[0,0],1,1\" \"[1,1],1,1\" \"[0,1],0,2\" }"), + std::string("3 | 5 | { \"[0,0],1,1\" \"[1,1],1,1\" \"[2,2],1,1\" \"[0,3],0,3\" }"), + std::string("4 | 5 | { \"[0,0],1,1\" \"[1,1],1,1\" \"[2,2],1,1\" \"[3,3],1,1\" \"[0,3],0,4\" }"), + std::string("5 | 5 | { \"[0,0],1,1\" \"[1,1],1,1\" \"[2,2],1,1\" \"[3,3],1,1\" \"[4,4],1,1\" \"[0,7],0,5\" }"), + }; + + for (std::size_t i = 0u; i < 5u; ++i) { qDigest.add(static_cast(i)); LOG_DEBUG(qDigest.print()); @@ -84,7 +80,7 @@ void CQDigestTest::testAdd() { using TUInt64Set = std::multiset; - const double expectedMaxErrors[] = { 0.007, 0.01, 0.12, 0.011, 0.016, 0.018, 0.023, 0.025, 0.02 }; + const double expectedMaxErrors[] = {0.007, 0.01, 0.12, 0.011, 0.016, 0.018, 0.023, 0.025, 0.02}; CRandomNumbers generator; @@ -93,11 +89,10 @@ void CQDigestTest::testAdd() TDoubleVec samples; generator.generateUniformSamples(0.0, 5000.0, 10000u, samples); - double totalErrors[] = { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; + double totalErrors[] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}; TUInt64Set orderedSamples; - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { uint32_t sample = static_cast(std::floor(samples[i])); qDigest.add(sample); @@ -107,27 +102,20 @@ void CQDigestTest::testAdd() double n = static_cast(i + 1u); - if (i > 99) - { - for (unsigned int j = 1; j < 10; ++j) - { + if (i > 99) { + for (unsigned int j = 1; j < 10; ++j) { double q = static_cast(j) / 10.0; uint32_t quantile; qDigest.quantile(q, quantile); - std::size_t rank = std::distance(orderedSamples.begin(), - orderedSamples.lower_bound(quantile)); + std::size_t rank = std::distance(orderedSamples.begin(), orderedSamples.lower_bound(quantile)); double error = (static_cast(rank) - q * n) / n; - if ((i + 1) % 1000 == 0) - { - LOG_DEBUG("q = " << q - << ", quantile = " << quantile - << ", rank = " << rank - << ", n = " << n - << ", error " << error); + if ((i + 1) % 1000 == 0) { + LOG_DEBUG("q = " << q << ", quantile = " << quantile << ", rank = " << rank << ", n = " << n << ", error " + << error); } CPPUNIT_ASSERT(std::fabs(error) < 0.06); @@ -137,27 +125,23 @@ void CQDigestTest::testAdd() } } - for (size_t i = 0; i < boost::size(totalErrors); ++i) - { + for (size_t i = 0; i < boost::size(totalErrors); ++i) { totalErrors[i] /= static_cast(samples.size()); } LOG_DEBUG("total errors = " << core::CContainerPrinter::print(totalErrors)); - for (size_t i = 0; i < boost::size(totalErrors); ++i) - { + for (size_t i = 0; i < boost::size(totalErrors); ++i) { CPPUNIT_ASSERT(totalErrors[i] < expectedMaxErrors[i]); } } } -void CQDigestTest::testMerge() -{ +void CQDigestTest::testMerge() { // TODO } -void CQDigestTest::testCdf() -{ +void CQDigestTest::testCdf() { LOG_DEBUG("+-------------------------+"); LOG_DEBUG("| CQDigestTest::testCdf |"); LOG_DEBUG("+-------------------------+"); @@ -175,8 +159,7 @@ void CQDigestTest::testCdf() generator.generateUniformSamples(0.0, 500.0, nSamples, samples); std::size_t s = 0u; - for (/**/; s < std::min(k, samples.size()); ++s) - { + for (/**/; s < std::min(k, samples.size()); ++s) { uint32_t sample = static_cast(std::floor(samples[s])); qDigest.add(sample); } @@ -185,23 +168,19 @@ void CQDigestTest::testCdf() qDigest.summary(summary); LOG_DEBUG("summary = " << core::CContainerPrinter::print(summary)); - for (std::size_t i = 0u; i < summary.size(); ++i) - { + for (std::size_t i = 0u; i < summary.size(); ++i) { double lowerBound; double upperBound; qDigest.cdf(summary[i].first, 0.0, lowerBound, upperBound); - LOG_DEBUG("x = " << summary[i].first - << ", F(x) >= " << lowerBound - << ", F(x) <= " << upperBound); + LOG_DEBUG("x = " << summary[i].first << ", F(x) >= " << lowerBound << ", F(x) <= " << upperBound); double fx = static_cast(summary[i].second) / 100.0; CPPUNIT_ASSERT(fx >= lowerBound && fx <= upperBound); } - for (/**/; s < samples.size(); ++s) - { + for (/**/; s < samples.size(); ++s) { uint32_t sample = static_cast(std::floor(samples[s])); qDigest.add(sample); } @@ -209,31 +188,25 @@ void CQDigestTest::testCdf() qDigest.summary(summary); LOG_DEBUG("summary = " << core::CContainerPrinter::print(summary)); - for (std::size_t i = 0u; i < summary.size(); ++i) - { + for (std::size_t i = 0u; i < summary.size(); ++i) { double lowerBound; double upperBound; qDigest.cdf(summary[i].first, 0.0, lowerBound, upperBound); // The expected lower bound. - double fx = static_cast(summary[i].second) - / static_cast(nSamples); + double fx = static_cast(summary[i].second) / static_cast(nSamples); // Get the true c.d.f. value. double ft = std::min(static_cast(summary[i].first) / 500.0, 1.0); - LOG_DEBUG("x = " << summary[i].first - << ", F(x) = " << ft - << ", F(x) >= " << lowerBound - << ", F(x) <= " << upperBound); + LOG_DEBUG("x = " << summary[i].first << ", F(x) = " << ft << ", F(x) >= " << lowerBound << ", F(x) <= " << upperBound); CPPUNIT_ASSERT(fx >= lowerBound && fx <= upperBound); CPPUNIT_ASSERT(ft >= lowerBound - 0.01 && ft <= upperBound + 0.01); } } -void CQDigestTest::testSummary() -{ +void CQDigestTest::testSummary() { LOG_DEBUG("+-----------------------------+"); LOG_DEBUG("| CQDigestTest::testSummary |"); LOG_DEBUG("+-----------------------------+"); @@ -246,8 +219,7 @@ void CQDigestTest::testSummary() CRandomNumbers generator; generator.generateUniformSamples(0.0, 500.0, 100u, samples); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { uint32_t sample = static_cast(std::floor(samples[i])); qDigest.add(sample); } @@ -258,16 +230,13 @@ void CQDigestTest::testSummary() qDigest.summary(summary); LOG_DEBUG("summary = " << core::CContainerPrinter::print(summary)); - for (std::size_t i = 0u; i < summary.size(); ++i) - { + for (std::size_t i = 0u; i < summary.size(); ++i) { double q = static_cast(summary[i].second) / 100.0; uint32_t xq; qDigest.quantile(q, xq); - LOG_DEBUG("q = " << q - << ", x(q) = " << summary[i].first - << ", expected x(q) = " << xq); + LOG_DEBUG("q = " << q << ", x(q) = " << summary[i].first << ", expected x(q) = " << xq); CPPUNIT_ASSERT_EQUAL(xq, summary[i].first); } @@ -282,8 +251,7 @@ void CQDigestTest::testSummary() TUInt32UInt64PrVec summary; qDigest.summary(summary); - CPPUNIT_ASSERT_EQUAL(std::string("[(3, 1)]"), - core::CContainerPrinter::print(summary)); + CPPUNIT_ASSERT_EQUAL(std::string("[(3, 1)]"), core::CContainerPrinter::print(summary)); } // Edge case: non-zero count at the root. @@ -298,13 +266,11 @@ void CQDigestTest::testSummary() TUInt32UInt64PrVec summary; qDigest.summary(summary); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 3), (7, 4)]"), - core::CContainerPrinter::print(summary)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 3), (7, 4)]"), core::CContainerPrinter::print(summary)); } } -void CQDigestTest::testPropagateForwardByTime() -{ +void CQDigestTest::testPropagateForwardByTime() { LOG_DEBUG("+--------------------------------------------+"); LOG_DEBUG("| CQDigestTest::testPropagateForwardByTime |"); LOG_DEBUG("+--------------------------------------------+"); @@ -316,8 +282,7 @@ void CQDigestTest::testPropagateForwardByTime() CQDigest qDigest(10u, 1.0); - for (std::size_t i = 0; i < 10; ++i) - { + for (std::size_t i = 0; i < 10; ++i) { qDigest.add(0); qDigest.add(3); qDigest.add(2); @@ -351,12 +316,11 @@ void CQDigestTest::testPropagateForwardByTime() CRandomNumbers rng; double mean = 10000.0; - double std = 100.0; + double std = 100.0; TDoubleVec samples; rng.generateNormalSamples(mean, std * std, 200000, samples); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { qDigest.add(static_cast(samples[i] + 0.5)); } @@ -368,8 +332,7 @@ void CQDigestTest::testPropagateForwardByTime() TMeanAccumlator error; boost::math::normal_distribution<> normal(mean, std); - for (double x = mean - 5.0 * std; x <= mean + 5 * std; x += 5.0) - { + for (double x = mean - 5.0 * std; x <= mean + 5 * std; x += 5.0) { double lb, ub; CPPUNIT_ASSERT(qDigest.cdf(static_cast(x), 0.0, lb, ub)); cdfLower.push_back(lb); @@ -386,12 +349,11 @@ void CQDigestTest::testPropagateForwardByTime() uint64_t nAged = qDigest.n(); LOG_DEBUG("nAged = " << nAged); - CPPUNIT_ASSERT_DOUBLES_EQUAL(0.001, double(n - nAged)/double(n), 5e-4); + CPPUNIT_ASSERT_DOUBLES_EQUAL(0.001, double(n - nAged) / double(n), 5e-4); TDoubleVec cdfLowerAged; TDoubleVec cdfUpperAged; - for (double x = mean - 5.0 * std; x <= mean + 5 * std; x += 5.0) - { + for (double x = mean - 5.0 * std; x <= mean + 5 * std; x += 5.0) { double lb, ub; CPPUNIT_ASSERT(qDigest.cdf(static_cast(x), 0.0, lb, ub)); cdfLowerAged.push_back(lb); @@ -399,14 +361,9 @@ void CQDigestTest::testPropagateForwardByTime() } TMeanAccumlator diff; - for (std::size_t i = 0; i < cdfLower.size(); ++i) - { - CPPUNIT_ASSERT_DOUBLES_EQUAL(cdfLower[i], - cdfLowerAged[i], - std::min(5e-5, 2e-3 * cdfLower[i])); - CPPUNIT_ASSERT_DOUBLES_EQUAL(cdfUpper[i], - cdfUpperAged[i], - std::min(5e-5, 2e-3 * cdfUpper[i])); + for (std::size_t i = 0; i < cdfLower.size(); ++i) { + CPPUNIT_ASSERT_DOUBLES_EQUAL(cdfLower[i], cdfLowerAged[i], std::min(5e-5, 2e-3 * cdfLower[i])); + CPPUNIT_ASSERT_DOUBLES_EQUAL(cdfUpper[i], cdfUpperAged[i], std::min(5e-5, 2e-3 * cdfUpper[i])); diff.add(std::fabs(cdfLower[i] - cdfLowerAged[i])); diff.add(std::fabs(cdfUpper[i] - cdfUpperAged[i])); } @@ -423,18 +380,15 @@ void CQDigestTest::testPropagateForwardByTime() CRandomNumbers rng; double mean = 10000.0; - double std = 100.0; + double std = 100.0; TDoubleVec samples; - for (std::size_t i = 0u; i < 500; ++i) - { + for (std::size_t i = 0u; i < 500; ++i) { rng.generateNormalSamples(mean, std * std, 2000, samples); - for (std::size_t j = 0u; j < samples.size(); ++j) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { qDigest.add(static_cast(samples[j] + 0.5)); } - if (i % 10 == 0) - { + if (i % 10 == 0) { LOG_DEBUG("iteration = " << i); } qDigest.propagateForwardsByTime(1.0); @@ -443,8 +397,7 @@ void CQDigestTest::testPropagateForwardByTime() TMeanAccumlator error; boost::math::normal_distribution<> normal(mean, std); - for (double x = mean - 5.0 * std; x <= mean + 5 * std; x += 5.0) - { + for (double x = mean - 5.0 * std; x <= mean + 5 * std; x += 5.0) { double lb, ub; CPPUNIT_ASSERT(qDigest.cdf(static_cast(x), 0.0, lb, ub)); double f = boost::math::cdf(normal, x); @@ -459,8 +412,7 @@ void CQDigestTest::testPropagateForwardByTime() } } -void CQDigestTest::testScale() -{ +void CQDigestTest::testScale() { LOG_DEBUG("+---------------------------+"); LOG_DEBUG("| CQDigestTest::testScale |"); LOG_DEBUG("+---------------------------+"); @@ -468,8 +420,7 @@ void CQDigestTest::testScale() { CQDigest qDigest(10u, 1.0); - for (std::size_t i = 0; i < 10; ++i) - { + for (std::size_t i = 0; i < 10; ++i) { qDigest.add(0); qDigest.add(3); qDigest.add(2); @@ -493,8 +444,7 @@ void CQDigestTest::testScale() qDigest.print()); // Test that adding more values after scaling works - for (std::size_t i = 0; i < 10; ++i) - { + for (std::size_t i = 0; i < 10; ++i) { qDigest.add(0); qDigest.add(7); qDigest.add(5); @@ -518,25 +468,23 @@ void CQDigestTest::testScale() } { - const double scales[] = { 1.5, 1.7, 2.2, 3.1, 4.0, 5.0 }; - const double maxMaxType1[] = { 0.17, 0.19, 0.32, 0.31, 0.38, 0.33 }; - const double maxTotalType1[] = { 2.0, 2.5, 9.6, 6.8, 8.7, 12.8 }; - const double maxMaxType2[] = { 0.11, 0.1, 0.15, 0.18, 0.19, 0.22 }; - const double maxTotalType2[] = { 1.9, 1.1, 1.1, 3.3, 2.9, 10.1 }; + const double scales[] = {1.5, 1.7, 2.2, 3.1, 4.0, 5.0}; + const double maxMaxType1[] = {0.17, 0.19, 0.32, 0.31, 0.38, 0.33}; + const double maxTotalType1[] = {2.0, 2.5, 9.6, 6.8, 8.7, 12.8}; + const double maxMaxType2[] = {0.11, 0.1, 0.15, 0.18, 0.19, 0.22}; + const double maxTotalType2[] = {1.9, 1.1, 1.1, 3.3, 2.9, 10.1}; TDoubleVec samples; CRandomNumbers generator; generator.generateNormalSamples(50.0, 5.0, 500u, samples); - for (std::size_t i = 0u; i < boost::size(scales); ++i) - { + for (std::size_t i = 0u; i < boost::size(scales); ++i) { LOG_DEBUG("*** Testing scale = " << scales[i] << " ***"); CQDigest qDigest(20u); CQDigest qDigestScaled(20u); - for (std::size_t j = 0; j < samples.size(); ++j) - { + for (std::size_t j = 0; j < samples.size(); ++j) { qDigest.add(static_cast(samples[j])); qDigestScaled.add(static_cast(scales[i] * samples[j])); } @@ -549,10 +497,8 @@ void CQDigestTest::testScale() double maxType2 = 0.0; double totalType2 = 0.0; - uint32_t end = static_cast( - scales[i] * *std::max_element(samples.begin(), samples.end())) + 1; - for (uint32_t j = 0; j < end; ++j) - { + uint32_t end = static_cast(scales[i] * *std::max_element(samples.begin(), samples.end())) + 1; + for (uint32_t j = 0; j < end; ++j) { double expectedLowerBound; double expectedUpperBound; qDigestScaled.cdf(j, 0.0, expectedLowerBound, expectedUpperBound); @@ -560,10 +506,8 @@ void CQDigestTest::testScale() double lowerBound; double upperBound; qDigest.cdf(j, 0.0, lowerBound, upperBound); - double type1 = std::fabs(expectedLowerBound - lowerBound) - + std::fabs(expectedUpperBound - upperBound); - double type2 = std::max(lowerBound - expectedLowerBound, 0.0) - + std::max(expectedUpperBound - upperBound, 0.0); + double type1 = std::fabs(expectedLowerBound - lowerBound) + std::fabs(expectedUpperBound - upperBound); + double type2 = std::max(lowerBound - expectedLowerBound, 0.0) + std::max(expectedUpperBound - upperBound, 0.0); maxType1 = std::max(maxType1, type1); totalType1 += type1; maxType2 = std::max(maxType2, type2); @@ -581,8 +525,7 @@ void CQDigestTest::testScale() } } -void CQDigestTest::testPersist() -{ +void CQDigestTest::testPersist() { LOG_DEBUG("+-----------------------------+"); LOG_DEBUG("| CQDigestTest::testPersist |"); LOG_DEBUG("+-----------------------------+"); @@ -595,8 +538,7 @@ void CQDigestTest::testPersist() TDoubleVec samples; generator.generateUniformSamples(0.0, 5000.0, 1000u, samples); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { uint32_t sample = static_cast(std::floor(samples[i])); origQDigest.add(sample); @@ -617,9 +559,7 @@ void CQDigestTest::testPersist() core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind(&CQDigest::acceptRestoreTraverser, - &restoredQDigest, - _1))); + CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind(&CQDigest::acceptRestoreTraverser, &restoredQDigest, _1))); } CPPUNIT_ASSERT(restoredQDigest.checkInvariants()); @@ -635,31 +575,17 @@ void CQDigestTest::testPersist() CPPUNIT_ASSERT_EQUAL(origXml, newXml); } -CppUnit::Test *CQDigestTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CQDigestTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CQDigestTest::testAdd", - &CQDigestTest::testAdd) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CQDigestTest::testMerge", - &CQDigestTest::testMerge) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CQDigestTest::testCdf", - &CQDigestTest::testCdf) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CQDigestTest::testSummary", - &CQDigestTest::testSummary) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CQDigestTest::testPropagateForwardByTime", - &CQDigestTest::testPropagateForwardByTime)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CQDigestTest::testScale", - &CQDigestTest::testScale)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CQDigestTest::testPersist", - &CQDigestTest::testPersist) ); +CppUnit::Test* CQDigestTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CQDigestTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CQDigestTest::testAdd", &CQDigestTest::testAdd)); + suiteOfTests->addTest(new CppUnit::TestCaller("CQDigestTest::testMerge", &CQDigestTest::testMerge)); + suiteOfTests->addTest(new CppUnit::TestCaller("CQDigestTest::testCdf", &CQDigestTest::testCdf)); + suiteOfTests->addTest(new CppUnit::TestCaller("CQDigestTest::testSummary", &CQDigestTest::testSummary)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CQDigestTest::testPropagateForwardByTime", &CQDigestTest::testPropagateForwardByTime)); + suiteOfTests->addTest(new CppUnit::TestCaller("CQDigestTest::testScale", &CQDigestTest::testScale)); + suiteOfTests->addTest(new CppUnit::TestCaller("CQDigestTest::testPersist", &CQDigestTest::testPersist)); return suiteOfTests; } diff --git a/lib/maths/unittest/CQDigestTest.h b/lib/maths/unittest/CQDigestTest.h index 7fee4f80b4..1c1ca4b781 100644 --- a/lib/maths/unittest/CQDigestTest.h +++ b/lib/maths/unittest/CQDigestTest.h @@ -9,19 +9,17 @@ #include +class CQDigestTest : public CppUnit::TestFixture { +public: + void testAdd(); + void testMerge(); + void testCdf(); + void testSummary(); + void testPropagateForwardByTime(); + void testScale(); + void testPersist(); -class CQDigestTest : public CppUnit::TestFixture -{ - public: - void testAdd(); - void testMerge(); - void testCdf(); - void testSummary(); - void testPropagateForwardByTime(); - void testScale(); - void testPersist(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CQDigestTest_h diff --git a/lib/maths/unittest/CQuantileSketchTest.cc b/lib/maths/unittest/CQuantileSketchTest.cc index c2a270f53b..ca657cc412 100644 --- a/lib/maths/unittest/CQuantileSketchTest.cc +++ b/lib/maths/unittest/CQuantileSketchTest.cc @@ -23,20 +23,18 @@ using namespace ml; -namespace -{ +namespace { using TDoubleVec = std::vector; using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; void testSketch(maths::CQuantileSketch::EInterpolation interpolation, std::size_t n, - TDoubleVec &samples, + TDoubleVec& samples, double maxBias, double maxError, - TMeanAccumulator &meanBias, - TMeanAccumulator &meanError) -{ + TMeanAccumulator& meanBias, + TMeanAccumulator& meanError) { maths::CQuantileSketch sketch(interpolation, n); sketch = std::for_each(samples.begin(), samples.end(), sketch); @@ -47,8 +45,7 @@ void testSketch(maths::CQuantileSketch::EInterpolation interpolation, TMeanAccumulator bias; TMeanAccumulator error; - for (std::size_t i = 1u; i < 20; ++i) - { + for (std::size_t i = 1u; i < 20; ++i) { double q = static_cast(i) / 20.0; double xq = samples[static_cast(static_cast(N) * q)]; double sq; @@ -62,21 +59,16 @@ void testSketch(maths::CQuantileSketch::EInterpolation interpolation, sketch.quantile(100.0, max); double scale = max - min; - LOG_DEBUG("bias = " << maths::CBasicStatistics::mean(bias) - << ", error " << maths::CBasicStatistics::mean(error)); + LOG_DEBUG("bias = " << maths::CBasicStatistics::mean(bias) << ", error " << maths::CBasicStatistics::mean(error)); CPPUNIT_ASSERT(std::fabs(maths::CBasicStatistics::mean(bias)) < maxBias); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(error) < maxError); - meanBias += maths::CBasicStatistics::accumulator(maths::CBasicStatistics::count(bias), - maths::CBasicStatistics::mean(bias) / scale); - meanError += maths::CBasicStatistics::accumulator(maths::CBasicStatistics::count(error), - maths::CBasicStatistics::mean(error) / scale); + meanBias += maths::CBasicStatistics::accumulator(maths::CBasicStatistics::count(bias), maths::CBasicStatistics::mean(bias) / scale); + meanError += maths::CBasicStatistics::accumulator(maths::CBasicStatistics::count(error), maths::CBasicStatistics::mean(error) / scale); } - } -void CQuantileSketchTest::testAdd() -{ +void CQuantileSketchTest::testAdd() { LOG_DEBUG("+--------------------------------+"); LOG_DEBUG("| CQuantileSketchTest::testAdd |"); LOG_DEBUG("+--------------------------------+"); @@ -92,18 +84,16 @@ void CQuantileSketchTest::testAdd() CPPUNIT_ASSERT(sketch.checkInvariants()); // Test add via operator(). - double x[] = { 1.8, 2.1 }; + double x[] = {1.8, 2.1}; sketch = std::for_each(x, x + 2, sketch); CPPUNIT_ASSERT(sketch.checkInvariants()); LOG_DEBUG("sketch = " << core::CContainerPrinter::print(sketch.knots())); CPPUNIT_ASSERT_EQUAL(6.0, sketch.count()); - CPPUNIT_ASSERT_EQUAL(std::string("[(1.2, 1), (0.9, 3), (1.8, 1), (2.1, 1)]"), - core::CContainerPrinter::print(sketch.knots())); + CPPUNIT_ASSERT_EQUAL(std::string("[(1.2, 1), (0.9, 3), (1.8, 1), (2.1, 1)]"), core::CContainerPrinter::print(sketch.knots())); } -void CQuantileSketchTest::testReduce() -{ +void CQuantileSketchTest::testReduce() { LOG_DEBUG("+-----------------------------------+"); LOG_DEBUG("| CQuantileSketchTest::testReduce |"); LOG_DEBUG("+-----------------------------------+"); @@ -114,25 +104,14 @@ void CQuantileSketchTest::testReduce() // Test duplicate points. - double points[][2] = - { - { 5.0, 1.0 }, - { 0.4, 2.0 }, - { 0.4, 1.0 }, - { 1.0, 1.0 }, - { 1.2, 2.0 }, - { 1.2, 1.5 }, - { 5.0, 1.0 } - }; - for (std::size_t i = 0u; i < boost::size(points); ++i) - { + double points[][2] = {{5.0, 1.0}, {0.4, 2.0}, {0.4, 1.0}, {1.0, 1.0}, {1.2, 2.0}, {1.2, 1.5}, {5.0, 1.0}}; + for (std::size_t i = 0u; i < boost::size(points); ++i) { sketch.add(points[i][0], points[i][1]); CPPUNIT_ASSERT(sketch.checkInvariants()); } LOG_DEBUG("sketch = " << core::CContainerPrinter::print(sketch.knots())); - CPPUNIT_ASSERT_EQUAL(std::string("[(0.4, 3), (1, 1), (1.2, 3.5), (5, 2)]"), - core::CContainerPrinter::print(sketch.knots())); + CPPUNIT_ASSERT_EQUAL(std::string("[(0.4, 3), (1, 1), (1.2, 3.5), (5, 2)]"), core::CContainerPrinter::print(sketch.knots())); // Regular compress (merging two point). @@ -141,15 +120,14 @@ void CQuantileSketchTest::testReduce() sketch.add(0.0); LOG_DEBUG("sketch = " << core::CContainerPrinter::print(sketch.knots())); CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1), (0.15, 2), (0.4, 3), (1, 1), (1.2, 3.5), (5, 2)]"), - core::CContainerPrinter::print(sketch.knots())); + core::CContainerPrinter::print(sketch.knots())); } { // Multiple points compressed at once. maths::CQuantileSketch sketch(maths::CQuantileSketch::E_Linear, 30); - for (std::size_t i = 0u; i <= 30; ++i) - { + for (std::size_t i = 0u; i <= 30; ++i) { sketch.add(static_cast(i)); CPPUNIT_ASSERT(sketch.checkInvariants()); } @@ -165,32 +143,22 @@ void CQuantileSketchTest::testReduce() { // Test the quantiles are reasonable at a compression ratio of 2:1. - double points[] = - { - 1.0, 2.0, 40.0, 13.0, 5.0, 6.0, 4.0, 7.0, 15.0, 17.0, - 19.0, 44.0, 42.0, 3.0, 46.0, 48.0, 50.0, 21.0, 23.0, 52.0 - }; - double cdf[] = - { - 5.0, 10.0, 15.0, 20.0, 25.0, 30.0, 35.0, 40.0, 45.0, 50.0, - 55.0, 60.0, 65.0, 70.0, 75.0, 80.0, 85.0, 90.0, 95.0, 100.0 - }; + double points[] = {1.0, 2.0, 40.0, 13.0, 5.0, 6.0, 4.0, 7.0, 15.0, 17.0, 19.0, 44.0, 42.0, 3.0, 46.0, 48.0, 50.0, 21.0, 23.0, 52.0}; + double cdf[] = {5.0, 10.0, 15.0, 20.0, 25.0, 30.0, 35.0, 40.0, 45.0, 50.0, + 55.0, 60.0, 65.0, 70.0, 75.0, 80.0, 85.0, 90.0, 95.0, 100.0}; maths::CQuantileSketch sketch(maths::CQuantileSketch::E_Linear, 10); - for (std::size_t i = 0u; i < boost::size(points); ++i) - { + for (std::size_t i = 0u; i < boost::size(points); ++i) { sketch.add(points[i]); CPPUNIT_ASSERT(sketch.checkInvariants()); - if ((i+1) % 5 == 0) - { + if ((i + 1) % 5 == 0) { LOG_DEBUG("sketch = " << core::CContainerPrinter::print(sketch.knots())); } } std::sort(boost::begin(points), boost::end(points)); TMeanAccumulator error; - for (std::size_t i = 0u; i < boost::size(cdf); ++i) - { + for (std::size_t i = 0u; i < boost::size(cdf); ++i) { double x; CPPUNIT_ASSERT(sketch.quantile(cdf[i], x)); LOG_DEBUG("expected quantile = " << points[i] << ", actual quantile = " << x); @@ -207,25 +175,14 @@ void CQuantileSketchTest::testReduce() // Test duplicate points. - double points[][2] = - { - { 5.0, 1.0 }, - { 0.4, 2.0 }, - { 0.4, 1.0 }, - { 1.0, 1.0 }, - { 1.2, 2.0 }, - { 1.2, 1.5 }, - { 5.0, 1.0 } - }; - for (std::size_t i = 0u; i < boost::size(points); ++i) - { + double points[][2] = {{5.0, 1.0}, {0.4, 2.0}, {0.4, 1.0}, {1.0, 1.0}, {1.2, 2.0}, {1.2, 1.5}, {5.0, 1.0}}; + for (std::size_t i = 0u; i < boost::size(points); ++i) { sketch.add(points[i][0], points[i][1]); CPPUNIT_ASSERT(sketch.checkInvariants()); } LOG_DEBUG("sketch = " << core::CContainerPrinter::print(sketch.knots())); - CPPUNIT_ASSERT_EQUAL(std::string("[(0.4, 3), (1, 1), (1.2, 3.5), (5, 2)]"), - core::CContainerPrinter::print(sketch.knots())); + CPPUNIT_ASSERT_EQUAL(std::string("[(0.4, 3), (1, 1), (1.2, 3.5), (5, 2)]"), core::CContainerPrinter::print(sketch.knots())); // Regular compress (merging two point). @@ -241,8 +198,7 @@ void CQuantileSketchTest::testReduce() maths::CQuantileSketch sketch(maths::CQuantileSketch::E_PiecewiseConstant, 30); - for (std::size_t i = 0u; i <= 30; ++i) - { + for (std::size_t i = 0u; i <= 30; ++i) { sketch.add(static_cast(i)); CPPUNIT_ASSERT(sketch.checkInvariants()); } @@ -258,32 +214,22 @@ void CQuantileSketchTest::testReduce() { // Test the quantiles are reasonable at a compression ratio of 2:1. - double points[] = - { - 1.0, 2.0, 40.0, 13.0, 5.0, 6.0, 4.0, 7.0, 15.0, 17.0, - 19.0, 44.0, 42.0, 3.0, 46.0, 48.0, 50.0, 21.0, 23.0, 52.0 - }; - double cdf[] = - { - 5.0, 10.0, 15.0, 20.0, 25.0, 30.0, 35.0, 40.0, 45.0, 50.0, - 55.0, 60.0, 65.0, 70.0, 75.0, 80.0, 85.0, 90.0, 95.0, 100.0 - }; + double points[] = {1.0, 2.0, 40.0, 13.0, 5.0, 6.0, 4.0, 7.0, 15.0, 17.0, 19.0, 44.0, 42.0, 3.0, 46.0, 48.0, 50.0, 21.0, 23.0, 52.0}; + double cdf[] = {5.0, 10.0, 15.0, 20.0, 25.0, 30.0, 35.0, 40.0, 45.0, 50.0, + 55.0, 60.0, 65.0, 70.0, 75.0, 80.0, 85.0, 90.0, 95.0, 100.0}; maths::CQuantileSketch sketch(maths::CQuantileSketch::E_PiecewiseConstant, 10); - for (std::size_t i = 0u; i < boost::size(points); ++i) - { + for (std::size_t i = 0u; i < boost::size(points); ++i) { sketch.add(points[i]); CPPUNIT_ASSERT(sketch.checkInvariants()); - if ((i+1) % 5 == 0) - { + if ((i + 1) % 5 == 0) { LOG_DEBUG("sketch = " << core::CContainerPrinter::print(sketch.knots())); } } std::sort(boost::begin(points), boost::end(points)); TMeanAccumulator error; - for (std::size_t i = 0u; i < boost::size(cdf); ++i) - { + for (std::size_t i = 0u; i < boost::size(cdf); ++i) { double x; CPPUNIT_ASSERT(sketch.quantile(cdf[i], x)); LOG_DEBUG("expected quantile = " << points[i] << ", actual quantile = " << x); @@ -295,8 +241,7 @@ void CQuantileSketchTest::testReduce() } } -void CQuantileSketchTest::testMerge() -{ +void CQuantileSketchTest::testMerge() { LOG_DEBUG("+----------------------------------+"); LOG_DEBUG("| CQuantileSketchTest::testMerge |"); LOG_DEBUG("+----------------------------------+"); @@ -326,21 +271,13 @@ void CQuantileSketchTest::testMerge() { // Test the quantiles are reasonable at a compression ratio of 2:1. - double points[] = - { - 1.0, 2.0, 40.0, 13.0, 5.0, 6.0, 4.0, 7.0, 15.0, 17.0, - 19.0, 44.0, 42.0, 3.0, 46.0, 48.0, 50.0, 21.0, 23.0, 52.0 - }; - double cdf[] = - { - 5.0, 10.0, 15.0, 20.0, 25.0, 30.0, 35.0, 40.0, 45.0, 50.0, - 55.0, 60.0, 65.0, 70.0, 75.0, 80.0, 85.0, 90.0, 95.0, 100.0 - }; + double points[] = {1.0, 2.0, 40.0, 13.0, 5.0, 6.0, 4.0, 7.0, 15.0, 17.0, 19.0, 44.0, 42.0, 3.0, 46.0, 48.0, 50.0, 21.0, 23.0, 52.0}; + double cdf[] = {5.0, 10.0, 15.0, 20.0, 25.0, 30.0, 35.0, 40.0, 45.0, 50.0, + 55.0, 60.0, 65.0, 70.0, 75.0, 80.0, 85.0, 90.0, 95.0, 100.0}; maths::CQuantileSketch sketch1(maths::CQuantileSketch::E_Linear, 10); maths::CQuantileSketch sketch2(maths::CQuantileSketch::E_Linear, 10); - for (std::size_t i = 0u; i < boost::size(points); i += 2) - { + for (std::size_t i = 0u; i < boost::size(points); i += 2) { sketch1.add(points[i]); sketch2.add(points[i + 1]); } @@ -352,8 +289,7 @@ void CQuantileSketchTest::testMerge() std::sort(boost::begin(points), boost::end(points)); TMeanAccumulator error; - for (std::size_t i = 0u; i < boost::size(cdf); ++i) - { + for (std::size_t i = 0u; i < boost::size(cdf); ++i) { double x; CPPUNIT_ASSERT(sketch3.quantile(cdf[i], x)); LOG_DEBUG("expected quantile = " << points[i] << ", actual quantile = " << x); @@ -365,8 +301,7 @@ void CQuantileSketchTest::testMerge() } } -void CQuantileSketchTest::testMedian() -{ +void CQuantileSketchTest::testMedian() { LOG_DEBUG("+-----------------------------------+"); LOG_DEBUG("| CQuantileSketchTest::testMedian |"); LOG_DEBUG("+-----------------------------------+"); @@ -409,8 +344,7 @@ void CQuantileSketchTest::testMedian() TMeanAccumulator bias; TMeanAccumulator error; - for (std::size_t t = 0u; t < 500; ++t) - { + for (std::size_t t = 0u; t < 500; ++t) { TDoubleVec samples; rng.generateUniformSamples(0.0, 100.0, 501, samples); maths::CQuantileSketch sketch(maths::CQuantileSketch::E_PiecewiseConstant, 20); @@ -430,8 +364,7 @@ void CQuantileSketchTest::testMedian() CPPUNIT_ASSERT(maths::CBasicStatistics::mean(error) < 1.6); } -void CQuantileSketchTest::testPropagateForwardByTime() -{ +void CQuantileSketchTest::testPropagateForwardByTime() { LOG_DEBUG("+---------------------------------------------------+"); LOG_DEBUG("| CQuantileSketchTest::testPropagateForwardByTime |"); LOG_DEBUG("+---------------------------------------------------+"); @@ -451,8 +384,7 @@ void CQuantileSketchTest::testPropagateForwardByTime() CPPUNIT_ASSERT(sketch.checkInvariants()); } -void CQuantileSketchTest::testQuantileAccuracy() -{ +void CQuantileSketchTest::testQuantileAccuracy() { LOG_DEBUG("+---------------------------------------------+"); LOG_DEBUG("| CQuantileSketchTest::testQuantileAccuracy |"); LOG_DEBUG("+---------------------------------------------+"); @@ -466,14 +398,13 @@ void CQuantileSketchTest::testQuantileAccuracy() { TMeanAccumulator meanBias; TMeanAccumulator meanError; - for (std::size_t t = 0u; t < 5; ++t) - { + for (std::size_t t = 0u; t < 5; ++t) { TDoubleVec samples; rng.generateUniformSamples(0.0, 20.0 * static_cast(t + 1), 1000, samples); testSketch(maths::CQuantileSketch::E_Linear, 20, samples, 0.15, 0.3, meanBias, meanError); } - LOG_DEBUG("mean bias = " << std::fabs(maths::CBasicStatistics::mean(meanBias)) - << ", mean error " << maths::CBasicStatistics::mean(meanError)); + LOG_DEBUG("mean bias = " << std::fabs(maths::CBasicStatistics::mean(meanBias)) << ", mean error " + << maths::CBasicStatistics::mean(meanError)); CPPUNIT_ASSERT(std::fabs(maths::CBasicStatistics::mean(meanBias)) < 0.0007); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError) < 0.003); } @@ -482,16 +413,12 @@ void CQuantileSketchTest::testQuantileAccuracy() { TMeanAccumulator meanBias; TMeanAccumulator meanError; - for (std::size_t t = 0u; t < 5; ++t) - { + for (std::size_t t = 0u; t < 5; ++t) { TDoubleVec samples; - rng.generateNormalSamples(20.0 * static_cast(t), - 20.0 * static_cast(t + 1), - 1000, samples); + rng.generateNormalSamples(20.0 * static_cast(t), 20.0 * static_cast(t + 1), 1000, samples); testSketch(maths::CQuantileSketch::E_Linear, 20, samples, 0.16, 0.2, meanBias, meanError); } - LOG_DEBUG("mean bias = " << maths::CBasicStatistics::mean(meanBias) - << ", mean error " << maths::CBasicStatistics::mean(meanError)); + LOG_DEBUG("mean bias = " << maths::CBasicStatistics::mean(meanBias) << ", mean error " << maths::CBasicStatistics::mean(meanError)); CPPUNIT_ASSERT(std::fabs(maths::CBasicStatistics::mean(meanBias)) < 0.002); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError) < 0.003); } @@ -500,16 +427,12 @@ void CQuantileSketchTest::testQuantileAccuracy() { TMeanAccumulator meanBias; TMeanAccumulator meanError; - for (std::size_t t = 0u; t < 5; ++t) - { + for (std::size_t t = 0u; t < 5; ++t) { TDoubleVec samples; - rng.generateLogNormalSamples(0.1 * static_cast(t), - 0.4 * static_cast(t + 1), - 1000, samples); + rng.generateLogNormalSamples(0.1 * static_cast(t), 0.4 * static_cast(t + 1), 1000, samples); testSketch(maths::CQuantileSketch::E_Linear, 20, samples, 0.11, 0.12, meanBias, meanError); } - LOG_DEBUG("mean bias = " << maths::CBasicStatistics::mean(meanBias) - << ", mean error " << maths::CBasicStatistics::mean(meanError)); + LOG_DEBUG("mean bias = " << maths::CBasicStatistics::mean(meanBias) << ", mean error " << maths::CBasicStatistics::mean(meanError)); CPPUNIT_ASSERT(std::fabs(maths::CBasicStatistics::mean(meanBias)) < 0.0006); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError) < 0.0009); } @@ -519,45 +442,32 @@ void CQuantileSketchTest::testQuantileAccuracy() TMeanAccumulator meanErrorLinear; TMeanAccumulator meanBiasPiecewise; TMeanAccumulator meanErrorPiecewise; - for (std::size_t t = 0u; t < 5; ++t) - { + for (std::size_t t = 0u; t < 5; ++t) { TDoubleVec samples_[4] = {}; - rng.generateNormalSamples(10.0 * static_cast(t), - 20.0 * static_cast(t + 1), - 400, samples_[0]); - rng.generateNormalSamples(20.0 * static_cast(t), - 20.0 * static_cast(t + 1), - 600, samples_[1]); - rng.generateNormalSamples(100.0 * static_cast(t), - 40.0 * static_cast(t + 1), - 400, samples_[2]); - rng.generateUniformSamples(500.0 * static_cast(t), - 550.0 * static_cast(t + 1), - 600, samples_[3]); + rng.generateNormalSamples(10.0 * static_cast(t), 20.0 * static_cast(t + 1), 400, samples_[0]); + rng.generateNormalSamples(20.0 * static_cast(t), 20.0 * static_cast(t + 1), 600, samples_[1]); + rng.generateNormalSamples(100.0 * static_cast(t), 40.0 * static_cast(t + 1), 400, samples_[2]); + rng.generateUniformSamples(500.0 * static_cast(t), 550.0 * static_cast(t + 1), 600, samples_[3]); TDoubleVec samples; - for (std::size_t i = 0u; i < 4; ++i) - { + for (std::size_t i = 0u; i < 4; ++i) { samples.insert(samples.end(), samples_[i].begin(), samples_[i].end()); } rng.random_shuffle(samples.begin(), samples.end()); - testSketch(maths::CQuantileSketch::E_Linear, 40, samples, 49, 50, - meanBiasLinear, meanErrorLinear); - testSketch(maths::CQuantileSketch::E_PiecewiseConstant, 40, samples, 55, 56, - meanBiasPiecewise, meanErrorPiecewise); + testSketch(maths::CQuantileSketch::E_Linear, 40, samples, 49, 50, meanBiasLinear, meanErrorLinear); + testSketch(maths::CQuantileSketch::E_PiecewiseConstant, 40, samples, 55, 56, meanBiasPiecewise, meanErrorPiecewise); } - LOG_DEBUG("linear mean bias = " << maths::CBasicStatistics::mean(meanBiasLinear) - << ", mean error " << maths::CBasicStatistics::mean(meanErrorLinear)); + LOG_DEBUG("linear mean bias = " << maths::CBasicStatistics::mean(meanBiasLinear) << ", mean error " + << maths::CBasicStatistics::mean(meanErrorLinear)); CPPUNIT_ASSERT(std::fabs(maths::CBasicStatistics::mean(meanBiasLinear)) < 0.012); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanErrorLinear) < 0.013); - LOG_DEBUG("piecewise mean bias = " << maths::CBasicStatistics::mean(meanBiasPiecewise) - << ", mean error " << maths::CBasicStatistics::mean(meanErrorPiecewise)); + LOG_DEBUG("piecewise mean bias = " << maths::CBasicStatistics::mean(meanBiasPiecewise) << ", mean error " + << maths::CBasicStatistics::mean(meanErrorPiecewise)); CPPUNIT_ASSERT(std::fabs(maths::CBasicStatistics::mean(meanBiasPiecewise)) < 0.015); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanErrorPiecewise) < 0.015); } } -void CQuantileSketchTest::testCdf() -{ +void CQuantileSketchTest::testCdf() { LOG_DEBUG("+--------------------------------+"); LOG_DEBUG("| CQuantileSketchTest::testCdf |"); LOG_DEBUG("+--------------------------------+"); @@ -568,35 +478,29 @@ void CQuantileSketchTest::testCdf() LOG_DEBUG("*** Exact ***"); { - double values[] = { 1.3, 5.2, 0.3, 0.7, 6.9, 10.3, 0.1, -2.9, 9.3, 0.0 }; + double values[] = {1.3, 5.2, 0.3, 0.7, 6.9, 10.3, 0.1, -2.9, 9.3, 0.0}; { maths::CQuantileSketch sketch(maths::CQuantileSketch::E_PiecewiseConstant, 10); sketch = std::for_each(boost::begin(values), boost::end(values), sketch); - for (std::size_t i = 0u; i < 10; ++i) - { + for (std::size_t i = 0u; i < 10; ++i) { double x; sketch.quantile(10.0 * static_cast(i) + 5.0, x); double f; sketch.cdf(x, f); - LOG_DEBUG("x = " << x - << ", f(exact) = " << static_cast(i) / 10.0 + 0.05 - << ", f(actual) = " << f); + LOG_DEBUG("x = " << x << ", f(exact) = " << static_cast(i) / 10.0 + 0.05 << ", f(actual) = " << f); CPPUNIT_ASSERT_DOUBLES_EQUAL(static_cast(i) / 10.0 + 0.05, f, 1e-6); } } { maths::CQuantileSketch sketch(maths::CQuantileSketch::E_Linear, 10); sketch = std::for_each(boost::begin(values), boost::end(values), sketch); - for (std::size_t i = 0u; i < 10; ++i) - { + for (std::size_t i = 0u; i < 10; ++i) { double x; sketch.quantile(10.0 * static_cast(i) + 5.0, x); double f; sketch.cdf(x, f); - LOG_DEBUG("x = " << x - << ", f(exact) = " << static_cast(i) / 10.0 + 0.05 - << ", f(actual) = " << f); + LOG_DEBUG("x = " << x << ", f(exact) = " << static_cast(i) / 10.0 + 0.05 << ", f(actual) = " << f); CPPUNIT_ASSERT_DOUBLES_EQUAL(static_cast(i) / 10.0 + 0.05, f, 1e-6); } @@ -613,25 +517,20 @@ void CQuantileSketchTest::testCdf() { TMeanAccumulator meanBias; TMeanAccumulator meanError; - for (std::size_t t = 0u; t < 5; ++t) - { - LOG_DEBUG("test " << t+1); + for (std::size_t t = 0u; t < 5; ++t) { + LOG_DEBUG("test " << t + 1); TDoubleVec samples; rng.generateUniformSamples(0.0, 20.0 * static_cast(t + 1), 1000, samples); { maths::CQuantileSketch sketch(maths::CQuantileSketch::E_Linear, 20); sketch = std::for_each(samples.begin(), samples.end(), sketch); - for (std::size_t i = 0u; i <= 100; ++i) - { + for (std::size_t i = 0u; i <= 100; ++i) { double x; sketch.quantile(static_cast(i), x); double f; sketch.cdf(x, f); - if (i % 10 == 0) - { - LOG_DEBUG(" x = " << x - << ", f(exact) = " << static_cast(i) / 100.0 - << ", f(actual) = " << f); + if (i % 10 == 0) { + LOG_DEBUG(" x = " << x << ", f(exact) = " << static_cast(i) / 100.0 << ", f(actual) = " << f); } CPPUNIT_ASSERT_DOUBLES_EQUAL(static_cast(i) / 100.0, f, 1e-6); } @@ -640,8 +539,7 @@ void CQuantileSketchTest::testCdf() } } -void CQuantileSketchTest::testPersist() -{ +void CQuantileSketchTest::testPersist() { LOG_DEBUG("+------------------------------------+"); LOG_DEBUG("| CQuantileSketchTest::testPersist |"); LOG_DEBUG("+------------------------------------+"); @@ -651,8 +549,7 @@ void CQuantileSketchTest::testPersist() generator.generateUniformSamples(0.0, 5000.0, 500u, samples); maths::CQuantileSketch origSketch(maths::CQuantileSketch::E_Linear, 100u); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { origSketch.add(samples[i]); } @@ -670,9 +567,7 @@ void CQuantileSketchTest::testPersist() core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind(&maths::CQuantileSketch::acceptRestoreTraverser, - &restoredSketch, - _1))); + CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind(&maths::CQuantileSketch::acceptRestoreTraverser, &restoredSketch, _1))); } // Checksums should agree. @@ -688,34 +583,22 @@ void CQuantileSketchTest::testPersist() CPPUNIT_ASSERT_EQUAL(origXml, newXml); } -CppUnit::Test* CQuantileSketchTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CQuantileSketchTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CQuantileSketchTest::testAdd", - &CQuantileSketchTest::testAdd) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CQuantileSketchTest::testReduce", - &CQuantileSketchTest::testReduce) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CQuantileSketchTest::testMerge", - &CQuantileSketchTest::testMerge) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CQuantileSketchTest::testMedian", - &CQuantileSketchTest::testMedian) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CQuantileSketchTest::testPropagateForwardByTime", - &CQuantileSketchTest::testPropagateForwardByTime)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CQuantileSketchTest::testQuantileAccuracy", - &CQuantileSketchTest::testQuantileAccuracy)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CQuantileSketchTest::testCdf", - &CQuantileSketchTest::testCdf)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CQuantileSketchTest::testPersist", - &CQuantileSketchTest::testPersist) ); +CppUnit::Test* CQuantileSketchTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CQuantileSketchTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CQuantileSketchTest::testAdd", &CQuantileSketchTest::testAdd)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CQuantileSketchTest::testReduce", &CQuantileSketchTest::testReduce)); + suiteOfTests->addTest(new CppUnit::TestCaller("CQuantileSketchTest::testMerge", &CQuantileSketchTest::testMerge)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CQuantileSketchTest::testMedian", &CQuantileSketchTest::testMedian)); + suiteOfTests->addTest(new CppUnit::TestCaller("CQuantileSketchTest::testPropagateForwardByTime", + &CQuantileSketchTest::testPropagateForwardByTime)); + suiteOfTests->addTest(new CppUnit::TestCaller("CQuantileSketchTest::testQuantileAccuracy", + &CQuantileSketchTest::testQuantileAccuracy)); + suiteOfTests->addTest(new CppUnit::TestCaller("CQuantileSketchTest::testCdf", &CQuantileSketchTest::testCdf)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CQuantileSketchTest::testPersist", &CQuantileSketchTest::testPersist)); return suiteOfTests; } diff --git a/lib/maths/unittest/CQuantileSketchTest.h b/lib/maths/unittest/CQuantileSketchTest.h index f98acf164b..a3e28a9aff 100644 --- a/lib/maths/unittest/CQuantileSketchTest.h +++ b/lib/maths/unittest/CQuantileSketchTest.h @@ -9,19 +9,18 @@ #include -class CQuantileSketchTest : public CppUnit::TestFixture -{ - public: - void testAdd(); - void testReduce(); - void testMerge(); - void testMedian(); - void testPropagateForwardByTime(); - void testQuantileAccuracy(); - void testCdf(); - void testPersist(); +class CQuantileSketchTest : public CppUnit::TestFixture { +public: + void testAdd(); + void testReduce(); + void testMerge(); + void testMedian(); + void testPropagateForwardByTime(); + void testQuantileAccuracy(); + void testCdf(); + void testPersist(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CQuantileSketchTest_h diff --git a/lib/maths/unittest/CRadialBasisFunctionTest.cc b/lib/maths/unittest/CRadialBasisFunctionTest.cc index b29b08852a..cb48523d47 100644 --- a/lib/maths/unittest/CRadialBasisFunctionTest.cc +++ b/lib/maths/unittest/CRadialBasisFunctionTest.cc @@ -16,92 +16,68 @@ using namespace ml; -namespace -{ - -class CValueAdaptor -{ - public: - using result_type = double; - - public: - CValueAdaptor(const maths::CRadialBasisFunction &function, - double centre, - double scale) : - m_Function(&function), - m_Centre(centre), - m_Scale(scale) - { - } +namespace { - bool operator()(double x, double &result) const - { - result = m_Function->value(x, m_Centre, m_Scale); - return true; - } +class CValueAdaptor { +public: + using result_type = double; + +public: + CValueAdaptor(const maths::CRadialBasisFunction& function, double centre, double scale) + : m_Function(&function), m_Centre(centre), m_Scale(scale) {} + + bool operator()(double x, double& result) const { + result = m_Function->value(x, m_Centre, m_Scale); + return true; + } - private: - const maths::CRadialBasisFunction *m_Function; - double m_Centre; - double m_Scale; +private: + const maths::CRadialBasisFunction* m_Function; + double m_Centre; + double m_Scale; }; -class CSquareDerivativeAdaptor -{ - public: - CSquareDerivativeAdaptor(const maths::CRadialBasisFunction &function, - double centre, - double scale) : - m_Function(&function), - m_Centre(centre), - m_Scale(scale) - { - } +class CSquareDerivativeAdaptor { +public: + CSquareDerivativeAdaptor(const maths::CRadialBasisFunction& function, double centre, double scale) + : m_Function(&function), m_Centre(centre), m_Scale(scale) {} - bool operator()(double x, double &result) const - { - double d = m_Function->derivative(x, m_Centre, m_Scale); - result = d * d; - return true; - } + bool operator()(double x, double& result) const { + double d = m_Function->derivative(x, m_Centre, m_Scale); + result = d * d; + return true; + } - private: - const maths::CRadialBasisFunction *m_Function; - double m_Centre; - double m_Scale; +private: + const maths::CRadialBasisFunction* m_Function; + double m_Centre; + double m_Scale; }; - } -void CRadialBasisFunctionTest::testDerivative() -{ +void CRadialBasisFunctionTest::testDerivative() { LOG_DEBUG("+--------------------------------------------+"); LOG_DEBUG("| CRadialBasisFunctionTest::testDerivative |"); LOG_DEBUG("+--------------------------------------------+"); const double a = 0.0; const double b = 10.0; - const double centres[] = { 0.0, 5.0, 10.0 }; - const double scales[] = { 5.0, 1.0, 0.1 }; + const double centres[] = {0.0, 5.0, 10.0}; + const double scales[] = {5.0, 1.0, 0.1}; const double eps = 1e-3; LOG_DEBUG("*** Gaussian ***"); - for (std::size_t i = 0u; i < boost::size(centres); ++i) - { - for (std::size_t j = 0u; j < boost::size(scales); ++j) - { + for (std::size_t i = 0u; i < boost::size(centres); ++i) { + for (std::size_t j = 0u; j < boost::size(scales); ++j) { LOG_DEBUG("centre = " << centres[i] << ", scale = " << scales[j]); maths::CGaussianBasisFunction gaussian; - for (std::size_t k = 0u; k < 10; ++k) - { + for (std::size_t k = 0u; k < 10; ++k) { double x = a + static_cast(k) / 10.0 * (b - a); double d = gaussian.derivative(x, centres[i], scales[j]); - double e = (gaussian.value(x + eps, centres[i], scales[j]) - - gaussian.value(x - eps, centres[i], scales[j])) - / 2.0 / eps; + double e = (gaussian.value(x + eps, centres[i], scales[j]) - gaussian.value(x - eps, centres[i], scales[j])) / 2.0 / eps; // Centred difference nuemrical derivative should // be accurate to o(eps^2). @@ -112,20 +88,17 @@ void CRadialBasisFunctionTest::testDerivative() LOG_DEBUG("*** Inverse Quadratic ***"); - for (std::size_t i = 0u; i < boost::size(centres); ++i) - { - for (std::size_t j = 0u; j < boost::size(scales); ++j) - { + for (std::size_t i = 0u; i < boost::size(centres); ++i) { + for (std::size_t j = 0u; j < boost::size(scales); ++j) { LOG_DEBUG("centre = " << centres[i] << ", scale = " << scales[j]); maths::CInverseQuadraticBasisFunction inverseQuadratic; - for (std::size_t k = 0u; k < 10; ++k) - { + for (std::size_t k = 0u; k < 10; ++k) { double x = a + static_cast(k) / 10.0 * (b - a); double d = inverseQuadratic.derivative(x, centres[i], scales[j]); - double e = (inverseQuadratic.value(x + eps, centres[i], scales[j]) - - inverseQuadratic.value(x - eps, centres[i], scales[j])) - / 2.0 / eps; + double e = + (inverseQuadratic.value(x + eps, centres[i], scales[j]) - inverseQuadratic.value(x - eps, centres[i], scales[j])) / + 2.0 / eps; // Centred difference nuemrical derivative should // be accurate to o(eps^2). @@ -133,35 +106,30 @@ void CRadialBasisFunctionTest::testDerivative() } } } - } -void CRadialBasisFunctionTest::testMean() -{ +void CRadialBasisFunctionTest::testMean() { LOG_DEBUG("+--------------------------------------+"); LOG_DEBUG("| CRadialBasisFunctionTest::testMean |"); LOG_DEBUG("+--------------------------------------+"); const double a = 0.0; const double b = 10.0; - const double centres[] = { 0.0, 5.0, 10.0 }; - const double scales[] = { 5.0, 1.0, 0.1 }; + const double centres[] = {0.0, 5.0, 10.0}; + const double scales[] = {5.0, 1.0, 0.1}; const double eps = 1e-3; LOG_DEBUG("*** Gaussian ***"); - for (std::size_t i = 0u; i < boost::size(centres); ++i) - { - for (std::size_t j = 0u; j < boost::size(scales); ++j) - { + for (std::size_t i = 0u; i < boost::size(centres); ++i) { + for (std::size_t j = 0u; j < boost::size(scales); ++j) { LOG_DEBUG("centre = " << centres[i] << ", scale = " << scales[j]); maths::CGaussianBasisFunction gaussian; CValueAdaptor f(gaussian, centres[i], scales[j]); double expectedMean = 0.0; - for (std::size_t k = 0u; k < 20; ++k) - { + for (std::size_t k = 0u; k < 20; ++k) { double aa = a + static_cast(k) / 20.0 * (b - a); double bb = a + static_cast(k + 1) / 20.0 * (b - a); double interval; @@ -171,25 +139,21 @@ void CRadialBasisFunctionTest::testMean() expectedMean /= (b - a); double mean = gaussian.mean(a, b, centres[i], scales[j]); - LOG_DEBUG("expectedMean = " << expectedMean - << ", mean = " << mean); + LOG_DEBUG("expectedMean = " << expectedMean << ", mean = " << mean); CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMean, mean, eps * mean); } } LOG_DEBUG("*** Inverse Quadratic ***"); - for (std::size_t i = 0u; i < boost::size(centres); ++i) - { - for (std::size_t j = 0u; j < boost::size(scales); ++j) - { + for (std::size_t i = 0u; i < boost::size(centres); ++i) { + for (std::size_t j = 0u; j < boost::size(scales); ++j) { LOG_DEBUG("centre = " << centres[i] << ", scale = " << scales[j]); maths::CInverseQuadraticBasisFunction inverseQuadratic; CValueAdaptor f(inverseQuadratic, centres[i], scales[j]); double expectedMean = 0.0; - for (std::size_t k = 0u; k < 20; ++k) - { + for (std::size_t k = 0u; k < 20; ++k) { double aa = a + static_cast(k) / 20.0 * (b - a); double bb = a + static_cast(k + 1) / 20.0 * (b - a); double interval; @@ -199,39 +163,34 @@ void CRadialBasisFunctionTest::testMean() expectedMean /= (b - a); double mean = inverseQuadratic.mean(a, b, centres[i], scales[j]); - LOG_DEBUG("expectedMean = " << expectedMean - << ", mean = " << mean); + LOG_DEBUG("expectedMean = " << expectedMean << ", mean = " << mean); CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMean, mean, eps * mean); } } } -void CRadialBasisFunctionTest::testMeanSquareDerivative() -{ +void CRadialBasisFunctionTest::testMeanSquareDerivative() { LOG_DEBUG("+--------------------------------------+"); LOG_DEBUG("| CRadialBasisFunctionTest::testMean |"); LOG_DEBUG("+--------------------------------------+"); const double a = 0.0; const double b = 10.0; - const double centres[] = { 0.0, 5.0, 10.0 }; - const double scales[] = { 5.0, 1.0, 0.1 }; + const double centres[] = {0.0, 5.0, 10.0}; + const double scales[] = {5.0, 1.0, 0.1}; const double eps = 1e-3; LOG_DEBUG("*** Gaussian ***"); - for (std::size_t i = 0u; i < boost::size(centres); ++i) - { - for (std::size_t j = 0u; j < boost::size(scales); ++j) - { + for (std::size_t i = 0u; i < boost::size(centres); ++i) { + for (std::size_t j = 0u; j < boost::size(scales); ++j) { LOG_DEBUG("centre = " << centres[i] << ", scale = " << scales[j]); maths::CGaussianBasisFunction gaussian; CSquareDerivativeAdaptor f(gaussian, centres[i], scales[j]); double expectedMean = 0.0; - for (std::size_t k = 0u; k < 50; ++k) - { + for (std::size_t k = 0u; k < 50; ++k) { double aa = a + static_cast(k) / 50.0 * (b - a); double bb = a + static_cast(k + 1) / 50.0 * (b - a); double interval; @@ -241,25 +200,21 @@ void CRadialBasisFunctionTest::testMeanSquareDerivative() expectedMean /= (b - a); double mean = gaussian.meanSquareDerivative(a, b, centres[i], scales[j]); - LOG_DEBUG("expectedMean = " << expectedMean - << ", mean = " << mean); + LOG_DEBUG("expectedMean = " << expectedMean << ", mean = " << mean); CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMean, mean, eps * mean); } } LOG_DEBUG("*** Inverse Quadratic ***"); - for (std::size_t i = 0u; i < boost::size(centres); ++i) - { - for (std::size_t j = 0u; j < boost::size(scales); ++j) - { + for (std::size_t i = 0u; i < boost::size(centres); ++i) { + for (std::size_t j = 0u; j < boost::size(scales); ++j) { LOG_DEBUG("centre = " << centres[i] << ", scale = " << scales[j]); maths::CInverseQuadraticBasisFunction inverseQuadratic; CSquareDerivativeAdaptor f(inverseQuadratic, centres[i], scales[j]); double expectedMean = 0.0; - for (std::size_t k = 0u; k < 50; ++k) - { + for (std::size_t k = 0u; k < 50; ++k) { double aa = a + static_cast(k) / 50.0 * (b - a); double bb = a + static_cast(k + 1) / 50.0 * (b - a); double interval; @@ -269,49 +224,39 @@ void CRadialBasisFunctionTest::testMeanSquareDerivative() expectedMean /= (b - a); double mean = inverseQuadratic.meanSquareDerivative(a, b, centres[i], scales[j]); - LOG_DEBUG("expectedMean = " << expectedMean - << ", mean = " << mean); + LOG_DEBUG("expectedMean = " << expectedMean << ", mean = " << mean); CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMean, mean, eps * mean); } } } -void CRadialBasisFunctionTest::testProduct() -{ +void CRadialBasisFunctionTest::testProduct() { LOG_DEBUG("+-----------------------------------------+"); LOG_DEBUG("| CRadialBasisFunctionTest::testProduct |"); LOG_DEBUG("+-----------------------------------------+"); const double a = 0.0; const double b = 10.0; - const double centres[] = { 0.0, 5.0, 10.0 }; - const double scales[] = { 5.0, 1.0, 0.1 }; + const double centres[] = {0.0, 5.0, 10.0}; + const double scales[] = {5.0, 1.0, 0.1}; const double eps = 1e-3; LOG_DEBUG("*** Gaussian ***"); - for (std::size_t i = 0u; i < boost::size(centres); ++i) - { - for (std::size_t j = 0u; j < boost::size(centres); ++j) - { - for (std::size_t k = 0u; k < boost::size(scales); ++k) - { - for (std::size_t l = 0u; l < boost::size(scales); ++l) - { - LOG_DEBUG("centre1 = " << centres[i] - << ", centre2 = " << centres[j] - << ", scale1 = " << scales[k] - << ", scale2 = " << scales[l]); + for (std::size_t i = 0u; i < boost::size(centres); ++i) { + for (std::size_t j = 0u; j < boost::size(centres); ++j) { + for (std::size_t k = 0u; k < boost::size(scales); ++k) { + for (std::size_t l = 0u; l < boost::size(scales); ++l) { + LOG_DEBUG("centre1 = " << centres[i] << ", centre2 = " << centres[j] << ", scale1 = " << scales[k] + << ", scale2 = " << scales[l]); maths::CGaussianBasisFunction gaussian; CValueAdaptor f1(gaussian, centres[i], scales[k]); CValueAdaptor f2(gaussian, centres[j], scales[l]); - maths::CCompositeFunctions::CProduct f(f1, f2); + maths::CCompositeFunctions::CProduct f(f1, f2); double expectedProduct = 0.0; - for (std::size_t m = 0u; m < 50; ++m) - { + for (std::size_t m = 0u; m < 50; ++m) { double aa = a + static_cast(m) / 50.0 * (b - a); double bb = a + static_cast(m + 1) / 50.0 * (b - a); double interval; @@ -320,13 +265,8 @@ void CRadialBasisFunctionTest::testProduct() } expectedProduct /= (b - a); - double product = gaussian.product(a, b, - centres[i], - centres[j], - scales[k], - scales[l]); - LOG_DEBUG("expectedMean = " << expectedProduct - << ", mean = " << product); + double product = gaussian.product(a, b, centres[i], centres[j], scales[k], scales[l]); + LOG_DEBUG("expectedMean = " << expectedProduct << ", mean = " << product); CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedProduct, product, eps * product); } } @@ -335,27 +275,19 @@ void CRadialBasisFunctionTest::testProduct() LOG_DEBUG("*** Inverse Quadratic ***"); - for (std::size_t i = 0u; i < boost::size(centres); ++i) - { - for (std::size_t j = 0u; j < boost::size(centres); ++j) - { - for (std::size_t k = 0u; k < boost::size(scales); ++k) - { - for (std::size_t l = 0u; l < boost::size(scales); ++l) - { - LOG_DEBUG("centre1 = " << centres[i] - << ", centre2 = " << centres[j] - << ", scale1 = " << scales[k] - << ", scale2 = " << scales[l]); + for (std::size_t i = 0u; i < boost::size(centres); ++i) { + for (std::size_t j = 0u; j < boost::size(centres); ++j) { + for (std::size_t k = 0u; k < boost::size(scales); ++k) { + for (std::size_t l = 0u; l < boost::size(scales); ++l) { + LOG_DEBUG("centre1 = " << centres[i] << ", centre2 = " << centres[j] << ", scale1 = " << scales[k] + << ", scale2 = " << scales[l]); maths::CInverseQuadraticBasisFunction inverseQuadratic; CValueAdaptor f1(inverseQuadratic, centres[i], scales[k]); CValueAdaptor f2(inverseQuadratic, centres[j], scales[l]); double expectedProduct = 0.0; - maths::CCompositeFunctions::CProduct f(f1, f2); - for (std::size_t m = 0u; m < 50; ++m) - { + maths::CCompositeFunctions::CProduct f(f1, f2); + for (std::size_t m = 0u; m < 50; ++m) { double aa = a + static_cast(m) / 50.0 * (b - a); double bb = a + static_cast(m + 1) / 50.0 * (b - a); double interval; @@ -364,13 +296,8 @@ void CRadialBasisFunctionTest::testProduct() } expectedProduct /= (b - a); - double product = inverseQuadratic.product(a, b, - centres[i], - centres[j], - scales[k], - scales[l]); - LOG_DEBUG("expectedProduct = " << expectedProduct - << ", product = " << product); + double product = inverseQuadratic.product(a, b, centres[i], centres[j], scales[k], scales[l]); + LOG_DEBUG("expectedProduct = " << expectedProduct << ", product = " << product); CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedProduct, product, eps * product); } } @@ -378,22 +305,17 @@ void CRadialBasisFunctionTest::testProduct() } } -CppUnit::Test *CRadialBasisFunctionTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CRadialBasisFunctionTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CRadialBasisFunctionTest::testDerivative", - &CRadialBasisFunctionTest::testDerivative) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CRadialBasisFunctionTest::testMean", - &CRadialBasisFunctionTest::testMean) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CRadialBasisFunctionTest::testMeanSquareDerivative", - &CRadialBasisFunctionTest::testMeanSquareDerivative) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CRadialBasisFunctionTest::testProduct", - &CRadialBasisFunctionTest::testProduct) ); +CppUnit::Test* CRadialBasisFunctionTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CRadialBasisFunctionTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CRadialBasisFunctionTest::testDerivative", + &CRadialBasisFunctionTest::testDerivative)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CRadialBasisFunctionTest::testMean", &CRadialBasisFunctionTest::testMean)); + suiteOfTests->addTest(new CppUnit::TestCaller("CRadialBasisFunctionTest::testMeanSquareDerivative", + &CRadialBasisFunctionTest::testMeanSquareDerivative)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CRadialBasisFunctionTest::testProduct", &CRadialBasisFunctionTest::testProduct)); return suiteOfTests; } diff --git a/lib/maths/unittest/CRadialBasisFunctionTest.h b/lib/maths/unittest/CRadialBasisFunctionTest.h index ff4bcfbc21..129fcb770e 100644 --- a/lib/maths/unittest/CRadialBasisFunctionTest.h +++ b/lib/maths/unittest/CRadialBasisFunctionTest.h @@ -9,16 +9,14 @@ #include +class CRadialBasisFunctionTest : public CppUnit::TestFixture { +public: + void testDerivative(); + void testMean(); + void testMeanSquareDerivative(); + void testProduct(); -class CRadialBasisFunctionTest : public CppUnit::TestFixture -{ - public: - void testDerivative(); - void testMean(); - void testMeanSquareDerivative(); - void testProduct(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CRadialBasisFunctionTest_h diff --git a/lib/maths/unittest/CRandomProjectionClustererTest.cc b/lib/maths/unittest/CRandomProjectionClustererTest.cc index 99e8ffd6ae..c0ad89752c 100644 --- a/lib/maths/unittest/CRandomProjectionClustererTest.cc +++ b/lib/maths/unittest/CRandomProjectionClustererTest.cc @@ -14,8 +14,7 @@ using namespace ml; -namespace -{ +namespace { using TDoubleVec = std::vector; using TDoubleVecVec = std::vector; @@ -25,75 +24,48 @@ using TVector = maths::CVector; using TVector5 = maths::CVectorNx1; using TCovariances = maths::CBasicStatistics::SSampleCovariances; -struct SFirstLess -{ - bool operator()(const TSizeVec &lhs, const TSizeVec &rhs) const - { - return lhs[0] < rhs[0]; - } +struct SFirstLess { + bool operator()(const TSizeVec& lhs, const TSizeVec& rhs) const { return lhs[0] < rhs[0]; } }; template -class CRandomProjectionClustererForTest : public maths::CRandomProjectionClustererBatch -{ - public: - using TVectorArrayVec = typename maths::CRandomProjectionClustererBatch::TVectorArrayVec; - using TDoubleVecVec = typename maths::CRandomProjectionClustererBatch::TDoubleVecVec; - using TVectorNx1VecVec = typename maths::CRandomProjectionClustererBatch::TVectorNx1VecVec; - using TSvdNxNVecVec = typename maths::CRandomProjectionClustererBatch::TSvdNxNVecVec; - using TSizeUSet = typename maths::CRandomProjectionClustererBatch::TSizeUSet; - using TMeanAccumulatorVecVec = typename maths::CRandomProjectionClustererBatch::TMeanAccumulatorVecVec; - - public: - CRandomProjectionClustererForTest(double compression = 1.0) : - maths::CRandomProjectionClustererBatch(compression) - {} - - const TVectorArrayVec &projections() const - { - return this->maths::CRandomProjectionClustererBatch::projections(); - } +class CRandomProjectionClustererForTest : public maths::CRandomProjectionClustererBatch { +public: + using TVectorArrayVec = typename maths::CRandomProjectionClustererBatch::TVectorArrayVec; + using TDoubleVecVec = typename maths::CRandomProjectionClustererBatch::TDoubleVecVec; + using TVectorNx1VecVec = typename maths::CRandomProjectionClustererBatch::TVectorNx1VecVec; + using TSvdNxNVecVec = typename maths::CRandomProjectionClustererBatch::TSvdNxNVecVec; + using TSizeUSet = typename maths::CRandomProjectionClustererBatch::TSizeUSet; + using TMeanAccumulatorVecVec = typename maths::CRandomProjectionClustererBatch::TMeanAccumulatorVecVec; + +public: + CRandomProjectionClustererForTest(double compression = 1.0) : maths::CRandomProjectionClustererBatch(compression) {} + + const TVectorArrayVec& projections() const { return this->maths::CRandomProjectionClustererBatch::projections(); } + + template + void clusterProjections(CLUSTERER clusterer, TDoubleVecVec& W, TVectorNx1VecVec& M, TSvdNxNVecVec& C, TSizeUSet& I) const { + std::size_t b = this->projectedData().size(); + W.resize(b); + M.resize(b); + C.resize(b); + this->maths::CRandomProjectionClustererBatch::clusterProjections(clusterer, W, M, C, I); + } - template - void clusterProjections(CLUSTERER clusterer, - TDoubleVecVec &W, - TVectorNx1VecVec &M, - TSvdNxNVecVec &C, - TSizeUSet &I) const - { - std::size_t b = this->projectedData().size(); - W.resize(b); - M.resize(b); - C.resize(b); - this->maths::CRandomProjectionClustererBatch::clusterProjections(clusterer, W, M, C, I); - } + void neighbourhoods(const TSizeUSet& I, TSizeVecVec& H) const { this->maths::CRandomProjectionClustererBatch::neighbourhoods(I, H); } - void neighbourhoods(const TSizeUSet &I, TSizeVecVec &H) const - { - this->maths::CRandomProjectionClustererBatch::neighbourhoods(I, H); - } - - void similarities(const TDoubleVecVec &W, - const TVectorNx1VecVec &M, - const TSvdNxNVecVec &C, - const TSizeVecVec &H, - TDoubleVecVec &S) const - { - this->maths::CRandomProjectionClustererBatch::similarities(W, M, C, H, S); - } + void + similarities(const TDoubleVecVec& W, const TVectorNx1VecVec& M, const TSvdNxNVecVec& C, const TSizeVecVec& H, TDoubleVecVec& S) const { + this->maths::CRandomProjectionClustererBatch::similarities(W, M, C, H, S); + } - void clusterNeighbourhoods(TDoubleVecVec &S, - const TSizeVecVec &H, - TSizeVecVec &result) const - { - this->maths::CRandomProjectionClustererBatch::clusterNeighbourhoods(S, H, result); - } + void clusterNeighbourhoods(TDoubleVecVec& S, const TSizeVecVec& H, TSizeVecVec& result) const { + this->maths::CRandomProjectionClustererBatch::clusterNeighbourhoods(S, H, result); + } }; - } -void CRandomProjectionClustererTest::testGenerateProjections() -{ +void CRandomProjectionClustererTest::testGenerateProjections() { LOG_DEBUG("+-----------------------------------------------------------+"); LOG_DEBUG("| CRandomProjectionClustererTest::testGenerateProjections |"); LOG_DEBUG("+-----------------------------------------------------------+"); @@ -110,11 +82,10 @@ void CRandomProjectionClustererTest::testGenerateProjections() CRandomProjectionClustererForTest<5> clusterer; CPPUNIT_ASSERT(clusterer.initialise(5, 3)); - const TVectorArrayVec &projections = clusterer.projections(); + const TVectorArrayVec& projections = clusterer.projections(); LOG_DEBUG("projections = " << core::CContainerPrinter::print(projections)); - CPPUNIT_ASSERT_EQUAL(std::string("[[[1 0 0], [0 1 0], [0 0 1], [0 0 0], [0 0 0]]]"), - core::CContainerPrinter::print(projections)); + CPPUNIT_ASSERT_EQUAL(std::string("[[[1 0 0], [0 1 0], [0 0 1], [0 0 0], [0 0 0]]]"), core::CContainerPrinter::print(projections)); } // Test that the projections are mutually orthonormal and @@ -122,25 +93,21 @@ void CRandomProjectionClustererTest::testGenerateProjections() TMeanAccumulator error; - for (std::size_t t = 10; t < 50; ++t) - { + for (std::size_t t = 10; t < 50; ++t) { LOG_DEBUG("*** trial = " << t << " ***"); CRandomProjectionClustererForTest<5> clusterer; CPPUNIT_ASSERT(clusterer.initialise(6, t)); - const TVectorArrayVec &projections = clusterer.projections(); + const TVectorArrayVec& projections = clusterer.projections(); CPPUNIT_ASSERT_EQUAL(std::size_t(6), projections.size()); - for (std::size_t i = 0u; i < projections.size(); ++i) - { - for (std::size_t j = 0u; j < 5; ++j) - { + for (std::size_t i = 0u; i < projections.size(); ++i) { + for (std::size_t j = 0u; j < 5; ++j) { CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, projections[i][j].inner(projections[i][j]), 1e-10); - for (std::size_t k = j+1; k < 5; ++k) - { + for (std::size_t k = j + 1; k < 5; ++k) { CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, projections[i][j].inner(projections[i][k]), 1e-10); } } @@ -148,14 +115,10 @@ void CRandomProjectionClustererTest::testGenerateProjections() TMeanVarAccumulator moments; - for (std::size_t i = 0u; i < projections.size(); ++i) - { - for (std::size_t j = i+1; j < projections.size(); ++j) - { - for (std::size_t k = 0u; k < 5; ++k) - { - for (std::size_t l = 0u; l < 5; ++l) - { + for (std::size_t i = 0u; i < projections.size(); ++i) { + for (std::size_t j = i + 1; j < projections.size(); ++j) { + for (std::size_t k = 0u; k < 5; ++k) { + for (std::size_t l = 0u; l < 5; ++l) { moments.add(projections[i][k].inner(projections[j][l])); } } @@ -167,9 +130,8 @@ void CRandomProjectionClustererTest::testGenerateProjections() CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, maths::CBasicStatistics::mean(moments), 1.0 / static_cast(t)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0 / static_cast(t), - maths::CBasicStatistics::variance(moments), - 0.2 / static_cast(t)); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + 1.0 / static_cast(t), maths::CBasicStatistics::variance(moments), 0.2 / static_cast(t)); error.add(static_cast(t) * std::fabs(maths::CBasicStatistics::variance(moments) - 1.0 / static_cast(t))); } @@ -178,8 +140,7 @@ void CRandomProjectionClustererTest::testGenerateProjections() CPPUNIT_ASSERT(maths::CBasicStatistics::mean(error) < 0.06); } -void CRandomProjectionClustererTest::testClusterProjections() -{ +void CRandomProjectionClustererTest::testClusterProjections() { LOG_DEBUG("+----------------------------------------------------------+"); LOG_DEBUG("| CRandomProjectionClustererTest::testClusterProjections |"); LOG_DEBUG("+----------------------------------------------------------+"); @@ -195,8 +156,7 @@ void CRandomProjectionClustererTest::testClusterProjections() TDoubleVec mean1(dimension, 0.0); TDoubleVec mean2(dimension, 10.0); TDoubleVecVec covariance(dimension, TDoubleVec(dimension, 0.0)); - for (std::size_t i = 0u; i < 30; ++i) - { + for (std::size_t i = 0u; i < 30; ++i) { covariance[i][i] = 1.0; } @@ -207,33 +167,26 @@ void CRandomProjectionClustererTest::testClusterProjections() CRandomProjectionClustererForTest<5> clusterer; clusterer.initialise(4, dimension); - for (std::size_t i = 0u; i < samples1.size(); ++i) - { + for (std::size_t i = 0u; i < samples1.size(); ++i) { clusterer.add(TVector(samples1[i].begin(), samples1[i].end())); } - for (std::size_t i = 0u; i < samples2.size(); ++i) - { + for (std::size_t i = 0u; i < samples2.size(); ++i) { clusterer.add(TVector(samples2[i].begin(), samples2[i].end())); } TDoubleVec expectedWeights; CRandomProjectionClustererForTest<5>::TVectorNx1VecVec expectedMeans; - expectedWeights.push_back( static_cast(samples1.size()) - / static_cast(samples1.size() + samples2.size())); - expectedWeights.push_back( static_cast(samples2.size()) - / static_cast(samples1.size() + samples2.size())); + expectedWeights.push_back(static_cast(samples1.size()) / static_cast(samples1.size() + samples2.size())); + expectedWeights.push_back(static_cast(samples2.size()) / static_cast(samples1.size() + samples2.size())); std::sort(expectedWeights.begin(), expectedWeights.end()); - for (std::size_t i = 0u; i < clusterer.projections().size(); ++i) - { + for (std::size_t i = 0u; i < clusterer.projections().size(); ++i) { CRandomProjectionClustererForTest<5>::TVectorNx1Vec means; { TCovariances covariances; - for (std::size_t j = 0u; j < samples1.size(); ++j) - { + for (std::size_t j = 0u; j < samples1.size(); ++j) { TVector x(samples1[j].begin(), samples1[j].end()); TVector5 projection; - for (std::size_t k = 0u; k < 5; ++k) - { + for (std::size_t k = 0u; k < 5; ++k) { projection(k) = clusterer.projections()[i][k].inner(x); } covariances.add(projection); @@ -242,12 +195,10 @@ void CRandomProjectionClustererTest::testClusterProjections() } { TCovariances covariances; - for (std::size_t j = 0u; j < samples2.size(); ++j) - { + for (std::size_t j = 0u; j < samples2.size(); ++j) { TVector x(samples2[j].begin(), samples2[j].end()); TVector5 projection; - for (std::size_t k = 0u; k < 5; ++k) - { + for (std::size_t k = 0u; k < 5; ++k) { projection(k) = clusterer.projections()[i][k].inner(x); } covariances.add(projection); @@ -264,8 +215,8 @@ void CRandomProjectionClustererTest::testClusterProjections() CRandomProjectionClustererForTest<5>::TVectorNx1VecVec means; CRandomProjectionClustererForTest<5>::TSvdNxNVecVec covariances; CRandomProjectionClustererForTest<5>::TSizeUSet samples; - clusterer.clusterProjections(maths::forRandomProjectionClusterer(maths::CKMeansFast(), 2, 5), - weights_, means, covariances, samples); + clusterer.clusterProjections( + maths::forRandomProjectionClusterer(maths::CKMeansFast(), 2, 5), weights_, means, covariances, samples); CPPUNIT_ASSERT_EQUAL(std::size_t(4), weights_.size()); CPPUNIT_ASSERT_EQUAL(std::size_t(4), means.size()); @@ -273,8 +224,7 @@ void CRandomProjectionClustererTest::testClusterProjections() CPPUNIT_ASSERT(samples.size() >= std::size_t(8)); TDoubleVec weights(2, 0.0); - for (std::size_t i = 0u; i < 4; ++i) - { + for (std::size_t i = 0u; i < 4; ++i) { std::sort(weights_[i].begin(), weights_[i].end()); weights[0] += weights_[i][0] / 4.0; weights[1] += weights_[i][1] / 4.0; @@ -283,14 +233,11 @@ void CRandomProjectionClustererTest::testClusterProjections() LOG_DEBUG("weights = " << core::CContainerPrinter::print(weights)); LOG_DEBUG("means = " << core::CContainerPrinter::print(means)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedWeights), - core::CContainerPrinter::print(weights)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedMeans), - core::CContainerPrinter::print(means)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedWeights), core::CContainerPrinter::print(weights)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedMeans), core::CContainerPrinter::print(means)); } -void CRandomProjectionClustererTest::testNeighbourhoods() -{ +void CRandomProjectionClustererTest::testNeighbourhoods() { LOG_DEBUG("+------------------------------------------------------+"); LOG_DEBUG("| CRandomProjectionClustererTest::testNeighbourhoods |"); LOG_DEBUG("+------------------------------------------------------+"); @@ -306,23 +253,17 @@ void CRandomProjectionClustererTest::testNeighbourhoods() test::CRandomNumbers rng; std::size_t dimension = 30u; - std::size_t n[] = { 30, 50, 40 }; + std::size_t n[] = {30, 50, 40}; TDoubleVec means[3] = {}; - for (std::size_t i = 0u; i < boost::size(means); ++i) - { + for (std::size_t i = 0u; i < boost::size(means); ++i) { rng.generateUniformSamples(0.0, 10.0, dimension, means[i]); LOG_DEBUG("mean = " << core::CContainerPrinter::print(means[i])); } - TDoubleVecVec covariances[] = - { - TDoubleVecVec(dimension, TDoubleVec(dimension, 0.0)), - TDoubleVecVec(dimension, TDoubleVec(dimension, 0.0)), - TDoubleVecVec(dimension, TDoubleVec(dimension, 0.0)) - }; - for (std::size_t i = 0u; i < boost::size(covariances); ++i) - { - for (std::size_t j = 0u; j < 30; ++j) - { + TDoubleVecVec covariances[] = {TDoubleVecVec(dimension, TDoubleVec(dimension, 0.0)), + TDoubleVecVec(dimension, TDoubleVec(dimension, 0.0)), + TDoubleVecVec(dimension, TDoubleVec(dimension, 0.0))}; + for (std::size_t i = 0u; i < boost::size(covariances); ++i) { + for (std::size_t j = 0u; j < 30; ++j) { covariances[i][j][j] = 1.0 + static_cast(i); } } @@ -331,12 +272,10 @@ void CRandomProjectionClustererTest::testNeighbourhoods() CRandomProjectionClustererForTest<5> clusterer; clusterer.initialise(4, dimension); - for (std::size_t i = 0u; i < boost::size(n); ++i) - { + for (std::size_t i = 0u; i < boost::size(n); ++i) { TDoubleVecVec samples; rng.generateMultivariateNormalSamples(means[i], covariances[i], n[i], samples); - for (std::size_t j = 0u; j < samples.size(); ++j) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { clusterer.add(TVector(samples[j])); sampleVectors.push_back(TVector(samples[j])); } @@ -346,8 +285,8 @@ void CRandomProjectionClustererTest::testNeighbourhoods() CRandomProjectionClustererForTest<5>::TVectorNx1VecVec clusterMeans; CRandomProjectionClustererForTest<5>::TSvdNxNVecVec clusterCovariances; CRandomProjectionClustererForTest<5>::TSizeUSet examples; - clusterer.clusterProjections(maths::forRandomProjectionClusterer(maths::CKMeansFast(), 3, 5), - weights, clusterMeans, clusterCovariances, examples); + clusterer.clusterProjections( + maths::forRandomProjectionClusterer(maths::CKMeansFast(), 3, 5), weights, clusterMeans, clusterCovariances, examples); LOG_DEBUG("examples = " << core::CContainerPrinter::print(examples)); TSizeVecVec neighbourhoods(examples.size()); @@ -356,20 +295,16 @@ void CRandomProjectionClustererTest::testNeighbourhoods() TSizeVecVec expectedNeighbourhoods(examples.size()); TVectorVec exampleVectors; - for (auto i = examples.begin(); i != examples.end(); ++i) - { + for (auto i = examples.begin(); i != examples.end(); ++i) { LOG_DEBUG("example = " << sampleVectors[*i]); exampleVectors.push_back(sampleVectors[*i]); } - for (std::size_t i = 0u; i < sampleVectors.size(); ++i) - { + for (std::size_t i = 0u; i < sampleVectors.size(); ++i) { std::size_t closest = 0u; double distance = (sampleVectors[i] - exampleVectors[0]).euclidean(); - for (std::size_t j = 1u; j < exampleVectors.size(); ++j) - { + for (std::size_t j = 1u; j < exampleVectors.size(); ++j) { double dj = (sampleVectors[i] - exampleVectors[j]).euclidean(); - if (dj < distance) - { + if (dj < distance) { closest = j; distance = dj; } @@ -381,12 +316,9 @@ void CRandomProjectionClustererTest::testNeighbourhoods() LOG_DEBUG("expected neighbours = " << core::CContainerPrinter::print(expectedNeighbourhoods)); maths::CBasicStatistics::SSampleMean::TAccumulator meanJaccard; - for (std::size_t i = 0u; i < neighbourhoods.size(); ++i) - { - double jaccard = maths::CSetTools::jaccard(neighbourhoods[i].begin(), - neighbourhoods[i].end(), - expectedNeighbourhoods[i].begin(), - expectedNeighbourhoods[i].end()); + for (std::size_t i = 0u; i < neighbourhoods.size(); ++i) { + double jaccard = maths::CSetTools::jaccard( + neighbourhoods[i].begin(), neighbourhoods[i].end(), expectedNeighbourhoods[i].begin(), expectedNeighbourhoods[i].end()); LOG_DEBUG("jaccard = " << jaccard); meanJaccard.add(jaccard, static_cast(expectedNeighbourhoods[i].size())); CPPUNIT_ASSERT(jaccard > 0.1); @@ -396,8 +328,7 @@ void CRandomProjectionClustererTest::testNeighbourhoods() CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanJaccard) > 0.35); } -void CRandomProjectionClustererTest::testSimilarities() -{ +void CRandomProjectionClustererTest::testSimilarities() { LOG_DEBUG("+----------------------------------------------------+"); LOG_DEBUG("| CRandomProjectionClustererTest::testSimilarities |"); LOG_DEBUG("+----------------------------------------------------+"); @@ -405,23 +336,17 @@ void CRandomProjectionClustererTest::testSimilarities() test::CRandomNumbers rng; std::size_t dimension = 30u; - std::size_t n[] = { 30, 50, 40 }; + std::size_t n[] = {30, 50, 40}; TDoubleVec means[3] = {}; - for (std::size_t i = 0u; i < boost::size(means); ++i) - { + for (std::size_t i = 0u; i < boost::size(means); ++i) { rng.generateUniformSamples(0.0, 10.0, dimension, means[i]); LOG_DEBUG("mean = " << core::CContainerPrinter::print(means[i])); } - TDoubleVecVec covariances[] = - { - TDoubleVecVec(dimension, TDoubleVec(dimension, 0.0)), - TDoubleVecVec(dimension, TDoubleVec(dimension, 0.0)), - TDoubleVecVec(dimension, TDoubleVec(dimension, 0.0)) - }; - for (std::size_t i = 0u; i < boost::size(covariances); ++i) - { - for (std::size_t j = 0u; j < 30; ++j) - { + TDoubleVecVec covariances[] = {TDoubleVecVec(dimension, TDoubleVec(dimension, 0.0)), + TDoubleVecVec(dimension, TDoubleVec(dimension, 0.0)), + TDoubleVecVec(dimension, TDoubleVec(dimension, 0.0))}; + for (std::size_t i = 0u; i < boost::size(covariances); ++i) { + for (std::size_t j = 0u; j < 30; ++j) { covariances[i][j][j] = 1.0 + static_cast(i); } } @@ -430,12 +355,10 @@ void CRandomProjectionClustererTest::testSimilarities() CRandomProjectionClustererForTest<5> clusterer(1.5); clusterer.initialise(4, dimension); - for (std::size_t i = 0u; i < boost::size(n); ++i) - { + for (std::size_t i = 0u; i < boost::size(n); ++i) { TDoubleVecVec samples; rng.generateMultivariateNormalSamples(means[i], covariances[i], n[i], samples); - for (std::size_t j = 0u; j < samples.size(); ++j) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { clusterer.add(TVector(samples[j])); clusters.push_back(i); } @@ -445,23 +368,19 @@ void CRandomProjectionClustererTest::testSimilarities() CRandomProjectionClustererForTest<5>::TVectorNx1VecVec clusterMeans; CRandomProjectionClustererForTest<5>::TSvdNxNVecVec clusterCovariances; CRandomProjectionClustererForTest<5>::TSizeUSet examples; - clusterer.clusterProjections(maths::forRandomProjectionClusterer(maths::CKMeansFast(), 3, 5), - weights, clusterMeans, clusterCovariances, examples); + clusterer.clusterProjections( + maths::forRandomProjectionClusterer(maths::CKMeansFast(), 3, 5), weights, clusterMeans, clusterCovariances, examples); LOG_DEBUG("examples = " << core::CContainerPrinter::print(examples)); TSizeVecVec expectedConnectivity(examples.size(), TSizeVec(examples.size())); TSizeVec examples_(examples.begin(), examples.end()); - for (std::size_t i = 0u; i < examples_.size(); ++i) - { - for (std::size_t j = 0u; j <= i; ++j) - { - expectedConnectivity[i][j] = expectedConnectivity[j][i] = - clusters[examples_[i]] == clusters[examples_[j]] ? 1 : 0; + for (std::size_t i = 0u; i < examples_.size(); ++i) { + for (std::size_t j = 0u; j <= i; ++j) { + expectedConnectivity[i][j] = expectedConnectivity[j][i] = clusters[examples_[i]] == clusters[examples_[j]] ? 1 : 0; } } LOG_DEBUG("expected connectivity ="); - for (std::size_t i = 0u; i < expectedConnectivity.size(); ++i) - { + for (std::size_t i = 0u; i < expectedConnectivity.size(); ++i) { LOG_DEBUG(" " << core::CContainerPrinter::print(expectedConnectivity[i])); } @@ -472,33 +391,27 @@ void CRandomProjectionClustererTest::testSimilarities() clusterer.similarities(weights, clusterMeans, clusterCovariances, neighbourhoods, similarities); TSizeVecVec connectivity(examples.size(), TSizeVec(examples.size())); - for (std::size_t i = 0u; i < similarities.size(); ++i) - { + for (std::size_t i = 0u; i < similarities.size(); ++i) { TDoubleVec s; - for (std::size_t j = 0u; j <= i; ++j) - { + for (std::size_t j = 0u; j <= i; ++j) { s.push_back(similarities[i][j]); connectivity[i][j] = connectivity[j][i] = similarities[i][j] < 10.0 ? 1 : 0; } LOG_DEBUG(core::CContainerPrinter::print(s)); } LOG_DEBUG("connectivity ="); - for (std::size_t i = 0u; i < connectivity.size(); ++i) - { + for (std::size_t i = 0u; i < connectivity.size(); ++i) { LOG_DEBUG(" " << core::CContainerPrinter::print(connectivity[i])); } - for (std::size_t i = 0u; i < expectedConnectivity.size(); ++i) - { - for (std::size_t j = 0u; j <= i; ++j) - { + for (std::size_t i = 0u; i < expectedConnectivity.size(); ++i) { + for (std::size_t j = 0u; j <= i; ++j) { CPPUNIT_ASSERT_EQUAL(expectedConnectivity[i][j], connectivity[i][j]); } } } -void CRandomProjectionClustererTest::testClusterNeighbourhoods() -{ +void CRandomProjectionClustererTest::testClusterNeighbourhoods() { LOG_DEBUG("+-------------------------------------------------------------+"); LOG_DEBUG("| CRandomProjectionClustererTest::testClusterNeighbourhoods |"); LOG_DEBUG("+-------------------------------------------------------------+"); @@ -508,23 +421,17 @@ void CRandomProjectionClustererTest::testClusterNeighbourhoods() test::CRandomNumbers rng; std::size_t dimension = 30u; - std::size_t n[] = { 30, 50, 40 }; + std::size_t n[] = {30, 50, 40}; TDoubleVec means[3] = {}; - for (std::size_t i = 0u; i < boost::size(means); ++i) - { + for (std::size_t i = 0u; i < boost::size(means); ++i) { rng.generateUniformSamples(0.0, 10.0, dimension, means[i]); LOG_DEBUG("mean = " << core::CContainerPrinter::print(means[i])); } - TDoubleVecVec covariances[] = - { - TDoubleVecVec(dimension, TDoubleVec(dimension, 0.0)), - TDoubleVecVec(dimension, TDoubleVec(dimension, 0.0)), - TDoubleVecVec(dimension, TDoubleVec(dimension, 0.0)) - }; - for (std::size_t i = 0u; i < boost::size(covariances); ++i) - { - for (std::size_t j = 0u; j < 30; ++j) - { + TDoubleVecVec covariances[] = {TDoubleVecVec(dimension, TDoubleVec(dimension, 0.0)), + TDoubleVecVec(dimension, TDoubleVec(dimension, 0.0)), + TDoubleVecVec(dimension, TDoubleVec(dimension, 0.0))}; + for (std::size_t i = 0u; i < boost::size(covariances); ++i) { + for (std::size_t j = 0u; j < 30; ++j) { covariances[i][j][j] = 1.0 + static_cast(i); } } @@ -533,12 +440,10 @@ void CRandomProjectionClustererTest::testClusterNeighbourhoods() CRandomProjectionClustererForTest<5> clusterer(1.5); clusterer.initialise(4, dimension); - for (std::size_t i = 0u; i < boost::size(n); ++i) - { + for (std::size_t i = 0u; i < boost::size(n); ++i) { TDoubleVecVec samples; rng.generateMultivariateNormalSamples(means[i], covariances[i], n[i], samples); - for (std::size_t j = 0u; j < samples.size(); ++j) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { clusterer.add(TVector(samples[j])); clusters.push_back(i); } @@ -548,8 +453,8 @@ void CRandomProjectionClustererTest::testClusterNeighbourhoods() CRandomProjectionClustererForTest<5>::TVectorNx1VecVec clusterMeans; CRandomProjectionClustererForTest<5>::TSvdNxNVecVec clusterCovariances; CRandomProjectionClustererForTest<5>::TSizeUSet examples; - clusterer.clusterProjections(maths::forRandomProjectionClusterer(maths::CKMeansFast(), 3, 5), - weights, clusterMeans, clusterCovariances, examples); + clusterer.clusterProjections( + maths::forRandomProjectionClusterer(maths::CKMeansFast(), 3, 5), weights, clusterMeans, clusterCovariances, examples); LOG_DEBUG("examples = " << core::CContainerPrinter::print(examples)); TSizeVecVec neighbourhoods(examples.size()); @@ -560,10 +465,8 @@ void CRandomProjectionClustererTest::testClusterNeighbourhoods() TSizeVecVec expectedClustering(boost::size(n)); LOG_DEBUG("expected clustering ="); - for (std::size_t i = 0u, j = 0u; i < boost::size(n); ++i) - { - for (std::size_t ni = j + n[i]; j < ni; ++j) - { + for (std::size_t i = 0u, j = 0u; i < boost::size(n); ++i) { + for (std::size_t ni = j + n[i]; j < ni; ++j) { expectedClustering[i].push_back(j); } LOG_DEBUG(" " << core::CContainerPrinter::print(expectedClustering[i])); @@ -572,54 +475,42 @@ void CRandomProjectionClustererTest::testClusterNeighbourhoods() TSizeVecVec clustering; clusterer.clusterNeighbourhoods(similarities, neighbourhoods, clustering); - for (std::size_t i = 0u; i < clustering.size(); ++i) - { + for (std::size_t i = 0u; i < clustering.size(); ++i) { std::sort(clustering[i].begin(), clustering[i].end()); } std::sort(clustering.begin(), clustering.end(), SFirstLess()); LOG_DEBUG("clustering ="); - for (std::size_t i = 0u; i < clustering.size(); ++i) - { + for (std::size_t i = 0u; i < clustering.size(); ++i) { LOG_DEBUG(" " << core::CContainerPrinter::print(clustering[i])); } - for (std::size_t i = 0u; i < expectedClustering.size(); ++i) - { - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedClustering[i]), - core::CContainerPrinter::print(clustering[i])); + for (std::size_t i = 0u; i < expectedClustering.size(); ++i) { + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedClustering[i]), core::CContainerPrinter::print(clustering[i])); } } -void CRandomProjectionClustererTest::testAccuracy() -{ +void CRandomProjectionClustererTest::testAccuracy() { LOG_DEBUG("+------------------------------------------------+"); LOG_DEBUG("| CRandomProjectionClustererTest::testAccuracy |"); LOG_DEBUG("+------------------------------------------------+"); } -CppUnit::Test *CRandomProjectionClustererTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CRandomProjectionClustererTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CRandomProjectionClustererTest::testGenerateProjections", - &CRandomProjectionClustererTest::testGenerateProjections) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CRandomProjectionClustererTest::testClusterProjections", - &CRandomProjectionClustererTest::testClusterProjections) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CRandomProjectionClustererTest::testNeighbourhoods", - &CRandomProjectionClustererTest::testNeighbourhoods) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CRandomProjectionClustererTest::testSimilarities", - &CRandomProjectionClustererTest::testSimilarities) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CRandomProjectionClustererTest::testClusterNeighbourhoods", - &CRandomProjectionClustererTest::testClusterNeighbourhoods) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CRandomProjectionClustererTest::testAccuracy", - &CRandomProjectionClustererTest::testAccuracy) ); +CppUnit::Test* CRandomProjectionClustererTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CRandomProjectionClustererTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRandomProjectionClustererTest::testGenerateProjections", &CRandomProjectionClustererTest::testGenerateProjections)); + suiteOfTests->addTest(new CppUnit::TestCaller("CRandomProjectionClustererTest::testClusterProjections", + &CRandomProjectionClustererTest::testClusterProjections)); + suiteOfTests->addTest(new CppUnit::TestCaller("CRandomProjectionClustererTest::testNeighbourhoods", + &CRandomProjectionClustererTest::testNeighbourhoods)); + suiteOfTests->addTest(new CppUnit::TestCaller("CRandomProjectionClustererTest::testSimilarities", + &CRandomProjectionClustererTest::testSimilarities)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRandomProjectionClustererTest::testClusterNeighbourhoods", &CRandomProjectionClustererTest::testClusterNeighbourhoods)); + suiteOfTests->addTest(new CppUnit::TestCaller("CRandomProjectionClustererTest::testAccuracy", + &CRandomProjectionClustererTest::testAccuracy)); return suiteOfTests; } diff --git a/lib/maths/unittest/CRandomProjectionClustererTest.h b/lib/maths/unittest/CRandomProjectionClustererTest.h index 847f2de5fd..78c6a9f2ef 100644 --- a/lib/maths/unittest/CRandomProjectionClustererTest.h +++ b/lib/maths/unittest/CRandomProjectionClustererTest.h @@ -9,17 +9,16 @@ #include -class CRandomProjectionClustererTest : public CppUnit::TestFixture -{ - public: - void testGenerateProjections(); - void testClusterProjections(); - void testNeighbourhoods(); - void testSimilarities(); - void testClusterNeighbourhoods(); - void testAccuracy(); +class CRandomProjectionClustererTest : public CppUnit::TestFixture { +public: + void testGenerateProjections(); + void testClusterProjections(); + void testNeighbourhoods(); + void testSimilarities(); + void testClusterNeighbourhoods(); + void testAccuracy(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CRandomProjectionClustererTest_h diff --git a/lib/maths/unittest/CRegressionTest.cc b/lib/maths/unittest/CRegressionTest.cc index 254995c953..20cafe3442 100644 --- a/lib/maths/unittest/CRegressionTest.cc +++ b/lib/maths/unittest/CRegressionTest.cc @@ -25,32 +25,24 @@ using namespace ml; using TDoubleVec = std::vector; using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; -namespace -{ +namespace { template -T sum(const T ¶ms, const T &delta) -{ +T sum(const T& params, const T& delta) { T result; - for (std::size_t i = 0u; i < params.size(); ++i) - { + for (std::size_t i = 0u; i < params.size(); ++i) { result[i] = params[i] + delta[i]; } return result; } template -double squareResidual(const T ¶ms, - const TDoubleVec &x, - const TDoubleVec &y) -{ +double squareResidual(const T& params, const TDoubleVec& x, const TDoubleVec& y) { double result = 0.0; - for (std::size_t i = 0u; i < x.size(); ++i) - { + for (std::size_t i = 0u; i < x.size(); ++i) { double yi = 0.0; double xi = 1.0; - for (std::size_t j = 0u; j < params.size(); ++j, xi *= x[i]) - { + for (std::size_t j = 0u; j < params.size(); ++j, xi *= x[i]) { yi += params[j] * xi; } result += (y[i] - yi) * (y[i] - yi); @@ -61,11 +53,9 @@ double squareResidual(const T ¶ms, using TDoubleArray2 = boost::array; using TDoubleArray3 = boost::array; using TDoubleArray4 = boost::array; - } -void CRegressionTest::testInvariants() -{ +void CRegressionTest::testInvariants() { LOG_DEBUG("+----------------------------------+"); LOG_DEBUG("| CRegressionTest::testInvariants |"); LOG_DEBUG("+----------------------------------+"); @@ -80,8 +70,7 @@ void CRegressionTest::testInvariants() double slope = 2.0; double curvature = 0.2; - for (std::size_t t = 0u; t < 100; ++t) - { + for (std::size_t t = 0u; t < 100; ++t) { maths::CRegression::CLeastSquaresOnline<2, double> ls; TDoubleVec increments; @@ -92,8 +81,7 @@ void CRegressionTest::testInvariants() TDoubleVec xs; TDoubleVec ys; double x = 0.0; - for (std::size_t i = 0u; i < n; ++i) - { + for (std::size_t i = 0u; i < n; ++i) { x += increments[i]; double y = curvature * x * x + slope * x + intercept + errors[i]; ls.add(x, y); @@ -106,25 +94,22 @@ void CRegressionTest::testInvariants() double residual = squareResidual(params, xs, ys); - if (t % 10 == 0) - { + if (t % 10 == 0) { LOG_DEBUG("params = " << core::CContainerPrinter::print(params)); LOG_DEBUG("residual = " << residual); } TDoubleVec delta; rng.generateUniformSamples(-1e-4, 1e-4, 15, delta); - for (std::size_t j = 0u; j < delta.size(); j += 3) - { + for (std::size_t j = 0u; j < delta.size(); j += 3) { TDoubleArray3 deltaj; deltaj[0] = delta[j]; - deltaj[1] = delta[j+1]; - deltaj[2] = delta[j+2]; + deltaj[1] = delta[j + 1]; + deltaj[2] = delta[j + 2]; double residualj = squareResidual(sum(params, deltaj), xs, ys); - if (t % 10 == 0) - { + if (t % 10 == 0) { LOG_DEBUG(" delta residual " << residualj); } @@ -133,8 +118,7 @@ void CRegressionTest::testInvariants() } } -void CRegressionTest::testFit() -{ +void CRegressionTest::testFit() { LOG_DEBUG("+----------------------------+"); LOG_DEBUG("| CRegressionTest::testFit |"); LOG_DEBUG("+----------------------------+"); @@ -150,8 +134,7 @@ void CRegressionTest::testFit() TMeanAccumulator interceptError; TMeanAccumulator slopeError; - for (std::size_t t = 0u; t < 100; ++t) - { + for (std::size_t t = 0u; t < 100; ++t) { maths::CRegression::CLeastSquaresOnline<1> ls; TDoubleVec increments; @@ -160,8 +143,7 @@ void CRegressionTest::testFit() rng.generateNormalSamples(0.0, 2.0, n, errors); double x = 0.0; - for (std::size_t i = 0u; i < n; ++i) - { + for (std::size_t i = 0u; i < n; ++i) { double y = slope * x + intercept + errors[i]; ls.add(x, y); x += increments[i]; @@ -170,8 +152,7 @@ void CRegressionTest::testFit() TDoubleArray2 params; CPPUNIT_ASSERT(ls.parameters(params)); - if (t % 10 == 0) - { + if (t % 10 == 0) { LOG_DEBUG("params = " << core::CContainerPrinter::print(params)); } @@ -190,8 +171,7 @@ void CRegressionTest::testFit() // Test a variety of the randomly generated polynomial fits. { - for (std::size_t t = 0u; t < 10; ++t) - { + for (std::size_t t = 0u; t < 10; ++t) { maths::CRegression::CLeastSquaresOnline<2, double> ls; TDoubleVec curve; @@ -201,8 +181,7 @@ void CRegressionTest::testFit() rng.generateUniformSamples(1.0, 2.0, n, increments); double x = 0.0; - for (std::size_t i = 0u; i < n; ++i) - { + for (std::size_t i = 0u; i < n; ++i) { double y = curve[2] * x * x + curve[1] * x + curve[0]; ls.add(x, y); x += increments[i]; @@ -213,16 +192,14 @@ void CRegressionTest::testFit() LOG_DEBUG("curve = " << core::CContainerPrinter::print(curve)); LOG_DEBUG("params = " << core::CContainerPrinter::print(params)); - for (std::size_t i = 0u; i < curve.size(); ++i) - { + for (std::size_t i = 0u; i < curve.size(); ++i) { CPPUNIT_ASSERT_DOUBLES_EQUAL(curve[i], params[i], 0.03 * curve[i]); } } } } -void CRegressionTest::testShiftAbscissa() -{ +void CRegressionTest::testShiftAbscissa() { LOG_DEBUG("+--------------------------------------+"); LOG_DEBUG("| CRegressionTest::testShiftAbscissa |"); LOG_DEBUG("+--------------------------------------+"); @@ -237,8 +214,7 @@ void CRegressionTest::testShiftAbscissa() maths::CRegression::CLeastSquaresOnline<1> ls; maths::CRegression::CLeastSquaresOnline<1> lss; - for (std::size_t i = 0u; i < 100; ++i) - { + for (std::size_t i = 0u; i < 100; ++i) { double x = static_cast(i); ls.add(x, slope * x + intercept); lss.add((x - 50.0), slope * x + intercept); @@ -272,8 +248,7 @@ void CRegressionTest::testShiftAbscissa() maths::CRegression::CLeastSquaresOnline<2, double> ls; maths::CRegression::CLeastSquaresOnline<2, double> lss; - for (std::size_t i = 0u; i < 100; ++i) - { + for (std::size_t i = 0u; i < 100; ++i) { double x = static_cast(i); ls.add(x, curvature * x * x + slope * x + intercept); lss.add(x - 50.0, curvature * x * x + slope * x + intercept); @@ -304,8 +279,7 @@ void CRegressionTest::testShiftAbscissa() } } -void CRegressionTest::testShiftOrdinate() -{ +void CRegressionTest::testShiftOrdinate() { LOG_DEBUG("+--------------------------------------+"); LOG_DEBUG("| CRegressionTest::testShiftOrdinate |"); LOG_DEBUG("+--------------------------------------+"); @@ -315,8 +289,7 @@ void CRegressionTest::testShiftOrdinate() // of the derivatives. maths::CRegression::CLeastSquaresOnline<3, double> regression; - for (double x = 0.0; x < 100.0; x += 1.0) - { + for (double x = 0.0; x < 100.0; x += 1.0) { regression.add(x, 0.01 * x * x * x - 0.2 * x * x + 1.0 * x + 10.0); } @@ -332,13 +305,12 @@ void CRegressionTest::testShiftOrdinate() LOG_DEBUG("parameters 2 = " << core::CContainerPrinter::print(params2)); CPPUNIT_ASSERT_DOUBLES_EQUAL(1000.0 + params1[0], params2[0], 1e-6 * std::fabs(params1[0])); - CPPUNIT_ASSERT_DOUBLES_EQUAL( params1[1], params2[1], 1e-6 * std::fabs(params1[1])); - CPPUNIT_ASSERT_DOUBLES_EQUAL( params1[2], params2[2], 1e-6 * std::fabs(params1[2])); - CPPUNIT_ASSERT_DOUBLES_EQUAL( params1[3], params2[3], 1e-6 * std::fabs(params1[3])); + CPPUNIT_ASSERT_DOUBLES_EQUAL(params1[1], params2[1], 1e-6 * std::fabs(params1[1])); + CPPUNIT_ASSERT_DOUBLES_EQUAL(params1[2], params2[2], 1e-6 * std::fabs(params1[2])); + CPPUNIT_ASSERT_DOUBLES_EQUAL(params1[3], params2[3], 1e-6 * std::fabs(params1[3])); } -void CRegressionTest::testShiftGradient() -{ +void CRegressionTest::testShiftGradient() { LOG_DEBUG("+--------------------------------------+"); LOG_DEBUG("| CRegressionTest::testShiftGradient |"); LOG_DEBUG("+--------------------------------------+"); @@ -348,8 +320,7 @@ void CRegressionTest::testShiftGradient() // of the derivatives. maths::CRegression::CLeastSquaresOnline<3, double> regression; - for (double x = 0.0; x < 100.0; x += 1.0) - { + for (double x = 0.0; x < 100.0; x += 1.0) { regression.add(x, 0.01 * x * x * x - 0.2 * x * x + 1.0 * x + 10.0); } @@ -364,14 +335,13 @@ void CRegressionTest::testShiftGradient() LOG_DEBUG("parameters 1 = " << core::CContainerPrinter::print(params1)); LOG_DEBUG("parameters 2 = " << core::CContainerPrinter::print(params2)); - CPPUNIT_ASSERT_DOUBLES_EQUAL( params1[0], params2[0], 1e-6 * std::fabs(params1[0])); + CPPUNIT_ASSERT_DOUBLES_EQUAL(params1[0], params2[0], 1e-6 * std::fabs(params1[0])); CPPUNIT_ASSERT_DOUBLES_EQUAL(10.0 + params1[1], params2[1], 1e-6 * std::fabs(params1[1])); - CPPUNIT_ASSERT_DOUBLES_EQUAL( params1[2], params2[2], 1e-6 * std::fabs(params1[2])); - CPPUNIT_ASSERT_DOUBLES_EQUAL( params1[3], params2[3], 1e-6 * std::fabs(params1[3])); + CPPUNIT_ASSERT_DOUBLES_EQUAL(params1[2], params2[2], 1e-6 * std::fabs(params1[2])); + CPPUNIT_ASSERT_DOUBLES_EQUAL(params1[3], params2[3], 1e-6 * std::fabs(params1[3])); } -void CRegressionTest::testLinearScale() -{ +void CRegressionTest::testLinearScale() { LOG_DEBUG("+------------------------------------+"); LOG_DEBUG("| CRegressionTest::testLinearScale |"); LOG_DEBUG("+------------------------------------+"); @@ -380,8 +350,7 @@ void CRegressionTest::testLinearScale() // scales all the parameters. maths::CRegression::CLeastSquaresOnline<3, double> regression; - for (double x = 0.0; x < 100.0; x += 1.0) - { + for (double x = 0.0; x < 100.0; x += 1.0) { regression.add(x, 0.01 * x * x * x - 0.2 * x * x + 1.0 * x + 10.0); } @@ -396,8 +365,7 @@ void CRegressionTest::testLinearScale() LOG_DEBUG("parameters 1 = " << core::CContainerPrinter::print(params1)); LOG_DEBUG("parameters 2 = " << core::CContainerPrinter::print(params2)); - for (std::size_t i = 0u; i < 4; ++i) - { + for (std::size_t i = 0u; i < 4; ++i) { CPPUNIT_ASSERT_DOUBLES_EQUAL(0.1 * params1[i], params2[i], 1e-6); } @@ -408,14 +376,12 @@ void CRegressionTest::testLinearScale() LOG_DEBUG("parameters 1 = " << core::CContainerPrinter::print(params1)); LOG_DEBUG("parameters 2 = " << core::CContainerPrinter::print(params2)); - for (std::size_t i = 0u; i < 4; ++i) - { + for (std::size_t i = 0u; i < 4; ++i) { CPPUNIT_ASSERT_DOUBLES_EQUAL(10.0 * params1[i], params2[i], 1e-6); } } -void CRegressionTest::testAge() -{ +void CRegressionTest::testAge() { LOG_DEBUG("+----------------------------+"); LOG_DEBUG("| CRegressionTest::testAge |"); LOG_DEBUG("+----------------------------+"); @@ -429,8 +395,7 @@ void CRegressionTest::testAge() { maths::CRegression::CLeastSquaresOnline<1> ls; - for (std::size_t i = 0u; i <= 100; ++i) - { + for (std::size_t i = 0u; i <= 100; ++i) { double x = static_cast(i); ls.add(x, slope * x + intercept, 5.0); } @@ -481,8 +446,7 @@ void CRegressionTest::testAge() { maths::CRegression::CLeastSquaresOnline<2, double> ls; - for (std::size_t i = 0u; i <= 100; ++i) - { + for (std::size_t i = 0u; i <= 100; ++i) { double x = static_cast(i); ls.add(x, curvature * x * x + slope * x + intercept, 5.0); } @@ -539,8 +503,7 @@ void CRegressionTest::testAge() } } -void CRegressionTest::testPrediction() -{ +void CRegressionTest::testPrediction() { LOG_DEBUG("+-----------------------------------+"); LOG_DEBUG("| CRegressionTest::testPrediction |"); LOG_DEBUG("+-----------------------------------+"); @@ -562,8 +525,7 @@ void CRegressionTest::testPrediction() TMeanAccumulator e4; double x0 = 0.0; - for (std::size_t i = 0u; i <= 400; ++i) - { + for (std::size_t i = 0u; i <= 400; ++i) { double x = 0.005 * pi * static_cast(i); double y = std::sin(x); @@ -579,8 +541,7 @@ void CRegressionTest::testPrediction() ls3.add(x - x0, y); ls3.age(0.95); - if (x > x0 + 2.0) - { + if (x > x0 + 2.0) { ls1.shiftAbscissa(-2.0); ls2.shiftAbscissa(-2.0); ls3.shiftAbscissa(-2.0); @@ -593,24 +554,14 @@ void CRegressionTest::testPrediction() TDoubleArray3 params3; ls2.parameters(params3); - double y3 = params3[2] * (x - x0) * (x - x0) - + params3[1] * (x - x0) - + params3[0]; + double y3 = params3[2] * (x - x0) * (x - x0) + params3[1] * (x - x0) + params3[0]; TDoubleArray4 params4; ls3.parameters(params4); - double y4 = params4[3] * (x - x0) * (x - x0) * (x - x0) - + params4[2] * (x - x0) * (x - x0) - + params4[1] * (x - x0) - + params4[0]; - - if (i % 10 == 0) - { - LOG_DEBUG("y = " << y - << ", m = " << maths::CBasicStatistics::mean(m) - << ", y2 = " << y2 - << ", y3 = " << y3 - << ", y4 = " << y4); + double y4 = params4[3] * (x - x0) * (x - x0) * (x - x0) + params4[2] * (x - x0) * (x - x0) + params4[1] * (x - x0) + params4[0]; + + if (i % 10 == 0) { + LOG_DEBUG("y = " << y << ", m = " << maths::CBasicStatistics::mean(m) << ", y2 = " << y2 << ", y3 = " << y3 << ", y4 = " << y4); } em.add((y - maths::CBasicStatistics::mean(m)) * (y - maths::CBasicStatistics::mean(m))); @@ -619,17 +570,14 @@ void CRegressionTest::testPrediction() e4.add((y - y4) * (y - y4)); } - LOG_DEBUG("em = " << maths::CBasicStatistics::mean(em) - << ", e2 = " << maths::CBasicStatistics::mean(e2) - << ", e3 = " << maths::CBasicStatistics::mean(e3) - << ", e4 = " << maths::CBasicStatistics::mean(e4)); + LOG_DEBUG("em = " << maths::CBasicStatistics::mean(em) << ", e2 = " << maths::CBasicStatistics::mean(e2) + << ", e3 = " << maths::CBasicStatistics::mean(e3) << ", e4 = " << maths::CBasicStatistics::mean(e4)); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(e2) < 0.27 * maths::CBasicStatistics::mean(em)); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(e3) < 0.08 * maths::CBasicStatistics::mean(em)); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(e4) < 0.025 * maths::CBasicStatistics::mean(em)); } -void CRegressionTest::testCombination() -{ +void CRegressionTest::testCombination() { LOG_DEBUG("+------------------------------------+"); LOG_DEBUG("| CRegressionTest::testCombination |"); LOG_DEBUG("+------------------------------------+"); @@ -653,15 +601,13 @@ void CRegressionTest::testCombination() maths::CRegression::CLeastSquaresOnline<2> lsB; maths::CRegression::CLeastSquaresOnline<2> ls; - for (std::size_t i = 0u; i < (2 * n) / 3; ++i) - { + for (std::size_t i = 0u; i < (2 * n) / 3; ++i) { double x = static_cast(i); double y = curvature * x * x + slope * x + intercept + errors[i]; lsA.add(x, y); ls.add(x, y); } - for (std::size_t i = (2 * n) / 3; i < n; ++i) - { + for (std::size_t i = (2 * n) / 3; i < n; ++i) { double x = static_cast(i); double y = curvature * x * x + slope * x + intercept + errors[i]; lsB.add(x, y); @@ -679,19 +625,17 @@ void CRegressionTest::testCombination() TDoubleArray3 paramsAPlusB; lsAPlusB.parameters(paramsAPlusB); - LOG_DEBUG("params A = " < regression; - for (std::size_t i = 0u; i < 20; ++i) - { + for (std::size_t i = 0u; i < 20; ++i) { double x = static_cast(i); regression.add(x, 5.0 + 0.3 * x); } @@ -838,42 +780,35 @@ void CRegressionTest::testScale() LOG_DEBUG("statistic = " << regression2.statistic()); TDoubleArray2 params2; regression2.parameters(params2); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(params1), - core::CContainerPrinter::print(params2)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(params1), core::CContainerPrinter::print(params2)); CPPUNIT_ASSERT_EQUAL(maths::CBasicStatistics::count(regression2.statistic()), 10.0); maths::CRegression::CLeastSquaresOnline<1, double> regression3 = regression2.scaled(0.5); LOG_DEBUG("statistic = " << regression3.statistic()); TDoubleArray2 params3; regression3.parameters(params3); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(params1), - core::CContainerPrinter::print(params3)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(params1), core::CContainerPrinter::print(params3)); CPPUNIT_ASSERT_EQUAL(maths::CBasicStatistics::count(regression3.statistic()), 5.0); } template -class CRegressionPrediction -{ - public: - using result_type = double; - - public: - CRegressionPrediction(const maths::CRegression::CLeastSquaresOnline ®ression) : - m_Regression(regression) - {} - - bool operator()(double x, double &result) const - { - result = m_Regression.predict(x); - return true; - } +class CRegressionPrediction { +public: + using result_type = double; + +public: + CRegressionPrediction(const maths::CRegression::CLeastSquaresOnline& regression) : m_Regression(regression) {} + + bool operator()(double x, double& result) const { + result = m_Regression.predict(x); + return true; + } - private: - maths::CRegression::CLeastSquaresOnline m_Regression; +private: + maths::CRegression::CLeastSquaresOnline m_Regression; }; -void CRegressionTest::testMean() -{ +void CRegressionTest::testMean() { LOG_DEBUG("+-----------------------------+"); LOG_DEBUG("| CRegressionTest::testMean |"); LOG_DEBUG("+-----------------------------+"); @@ -882,22 +817,16 @@ void CRegressionTest::testMean() // of the regression. test::CRandomNumbers rng; - for (std::size_t i = 0; i < 5; ++i) - { + for (std::size_t i = 0; i < 5; ++i) { TDoubleVec coeffs; rng.generateUniformSamples(-1.0, 1.0, 4, coeffs); maths::CRegression::CLeastSquaresOnline<3, double> regression; - for (double x = 0.0; x < 10.0; x += 1.0) - { - regression.add(x, 0.2 * coeffs[0] * x * x * x - + 0.4 * coeffs[1] * x * x - + coeffs[2] * x - + 2.0 * coeffs[3]); + for (double x = 0.0; x < 10.0; x += 1.0) { + regression.add(x, 0.2 * coeffs[0] * x * x * x + 0.4 * coeffs[1] * x * x + coeffs[2] * x + 2.0 * coeffs[3]); } double expected; - maths::CIntegration::gaussLegendre(CRegressionPrediction<3>(regression), - 10.0, 15.0, expected); + maths::CIntegration::gaussLegendre(CRegressionPrediction<3>(regression), 10.0, 15.0, expected); expected /= 5.0; double actual = regression.mean(10.0, 15.0); LOG_DEBUG("expected = " << expected); @@ -905,8 +834,7 @@ void CRegressionTest::testMean() CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, 1e-6); // Test interval spanning 0.0. - maths::CIntegration::gaussLegendre(CRegressionPrediction<3>(regression), - -3.0, 0.0, expected); + maths::CIntegration::gaussLegendre(CRegressionPrediction<3>(regression), -3.0, 0.0, expected); expected /= 3.0; actual = regression.mean(-3.0, 0.0); LOG_DEBUG("expected = " << expected); @@ -914,8 +842,8 @@ void CRegressionTest::testMean() CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, 1e-6); // Test zero length interval. - maths::CIntegration::gaussLegendre(CRegressionPrediction<3>(regression), - -3.0, -3.0 + 1e-7, expected); + maths::CIntegration::gaussLegendre( + CRegressionPrediction<3>(regression), -3.0, -3.0 + 1e-7, expected); expected /= 1e-7; actual = regression.mean(-3.0, -3.0); LOG_DEBUG("expected = " << expected); @@ -924,8 +852,7 @@ void CRegressionTest::testMean() } } -void CRegressionTest::testCovariances() -{ +void CRegressionTest::testCovariances() { LOG_DEBUG("+------------------------------------+"); LOG_DEBUG("| CRegressionTest::testCovariances |"); LOG_DEBUG("+------------------------------------+"); @@ -947,13 +874,11 @@ void CRegressionTest::testCovariances() double variance = 16.0; maths::CBasicStatistics::SSampleCovariances covariances; - for (std::size_t i = 0u; i < 500; ++i) - { + for (std::size_t i = 0u; i < 500; ++i) { TDoubleVec noise; rng.generateNormalSamples(0.0, variance, static_cast(n), noise); maths::CRegression::CLeastSquaresOnline<1, double> regression; - for (double x = 0.0; x < n; x += 1.0) - { + for (double x = 0.0; x < n; x += 1.0) { regression.add(x, 1.5 * x + noise[static_cast(x)]); } TDoubleArray2 params; @@ -963,8 +888,7 @@ void CRegressionTest::testCovariances() TMatrix2 expected = maths::CBasicStatistics::covariances(covariances); maths::CRegression::CLeastSquaresOnline<1, double> regression; - for (double x = 0.0; x < n; x += 1.0) - { + for (double x = 0.0; x < n; x += 1.0) { regression.add(x, 1.5 * x); } TMatrix2 actual; @@ -981,13 +905,11 @@ void CRegressionTest::testCovariances() double variance = 16.0; maths::CBasicStatistics::SSampleCovariances covariances; - for (std::size_t i = 0u; i < 500; ++i) - { + for (std::size_t i = 0u; i < 500; ++i) { TDoubleVec noise; rng.generateNormalSamples(0.0, variance, static_cast(n), noise); maths::CRegression::CLeastSquaresOnline<2, double> regression; - for (double x = 0.0; x < n; x += 1.0) - { + for (double x = 0.0; x < n; x += 1.0) { regression.add(x, 0.25 * x * x + 1.5 * x + noise[static_cast(x)]); } TDoubleArray3 params; @@ -997,8 +919,7 @@ void CRegressionTest::testCovariances() TMatrix3 expected = maths::CBasicStatistics::covariances(covariances); maths::CRegression::CLeastSquaresOnline<2, double> regression; - for (double x = 0.0; x < n; x += 1.0) - { + for (double x = 0.0; x < n; x += 1.0) { regression.add(x, 0.25 * x * x + 1.5 * x); } TMatrix3 actual; @@ -1010,22 +931,19 @@ void CRegressionTest::testCovariances() } } -void CRegressionTest::testParameters() -{ +void CRegressionTest::testParameters() { LOG_DEBUG("+-----------------------------------+"); LOG_DEBUG("| CRegressionTest::testParameters |"); LOG_DEBUG("+-----------------------------------+"); maths::CRegression::CLeastSquaresOnline<3, double> regression; - for (std::size_t i = 0u; i < 20; ++i) - { + for (std::size_t i = 0u; i < 20; ++i) { double x = static_cast(i); regression.add(x, 5.0 + 0.3 * x + 0.5 * x * x - 0.03 * x * x * x); } - for (std::size_t i = 20u; i < 25; ++i) - { + for (std::size_t i = 20u; i < 25; ++i) { TDoubleArray4 params1 = regression.parameters(static_cast(i - 19)); maths::CRegression::CLeastSquaresOnline<3, double> regression2(regression); @@ -1035,13 +953,11 @@ void CRegressionTest::testParameters() LOG_DEBUG("params 1 = " << core::CContainerPrinter::print(params1)); LOG_DEBUG("params 2 = " << core::CContainerPrinter::print(params2)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(params2), - core::CContainerPrinter::print(params1)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(params2), core::CContainerPrinter::print(params1)); } } -void CRegressionTest::testPersist() -{ +void CRegressionTest::testPersist() { LOG_DEBUG("+--------------------------------+"); LOG_DEBUG("| CRegressionTest::testPersist |"); LOG_DEBUG("+--------------------------------+"); @@ -1050,8 +966,7 @@ void CRegressionTest::testPersist() maths::CRegression::CLeastSquaresOnline<2, double> origRegression; - for (std::size_t i = 0u; i < 20; ++i) - { + for (std::size_t i = 0u; i < 20; ++i) { double x = static_cast(i); origRegression.add(x, 5.0 + 0.3 * x + 0.5 * x * x); } @@ -1071,12 +986,10 @@ void CRegressionTest::testPersist() core::CRapidXmlStateRestoreTraverser traverser(parser); maths::CRegression::CLeastSquaresOnline<2, double> restoredRegression; - CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind( - &maths::CRegression::CLeastSquaresOnline<2, double>::acceptRestoreTraverser, - &restoredRegression, _1))); + CPPUNIT_ASSERT(traverser.traverseSubLevel( + boost::bind(&maths::CRegression::CLeastSquaresOnline<2, double>::acceptRestoreTraverser, &restoredRegression, _1))); - CPPUNIT_ASSERT_EQUAL(origRegression.checksum(), - restoredRegression.checksum()); + CPPUNIT_ASSERT_EQUAL(origRegression.checksum(), restoredRegression.checksum()); std::string restoredXml; { @@ -1087,8 +1000,7 @@ void CRegressionTest::testPersist() CPPUNIT_ASSERT_EQUAL(origXml, restoredXml); } -void CRegressionTest::testParameterProcess() -{ +void CRegressionTest::testParameterProcess() { LOG_DEBUG("+-----------------------------------------+"); LOG_DEBUG("| CRegressionTest::testParameterProcess |"); LOG_DEBUG("+-----------------------------------------+"); @@ -1108,24 +1020,20 @@ void CRegressionTest::testParameterProcess() test::CRandomNumbers rng; - double variances[] = { 1.0, 0.5, 0.1, 5.0, 10.0 }; - double intervals[] = { 0.4, 0.4, 0.8, 0.6, 0.7, 0.5, 0.6, 1.3, 0.3, 1.7, - 0.3, 0.5, 1.0, 0.2, 0.3, 0.1, 0.5, 1.4, 0.7, 0.9, - 0.1, 0.4, 0.8, 1.0, 0.6, 0.5, 0.8, 1.3, 0.3, 1.7, - 0.3, 1.2, 0.3, 1.2, 0.3, 0.1, 0.5, 0.4, 0.7, 0.9, - 0.8, 0.6, 0.8, 1.1, 0.6, 0.5, 0.5, 1.3, 0.3, 0.7 }; + double variances[] = {1.0, 0.5, 0.1, 5.0, 10.0}; + double intervals[] = {0.4, 0.4, 0.8, 0.6, 0.7, 0.5, 0.6, 1.3, 0.3, 1.7, 0.3, 0.5, 1.0, 0.2, 0.3, 0.1, 0.5, + 1.4, 0.7, 0.9, 0.1, 0.4, 0.8, 1.0, 0.6, 0.5, 0.8, 1.3, 0.3, 1.7, 0.3, 1.2, 0.3, 1.2, + 0.3, 0.1, 0.5, 0.4, 0.7, 0.9, 0.8, 0.6, 0.8, 1.1, 0.6, 0.5, 0.5, 1.3, 0.3, 0.7}; TMeanAccumulator error; - for (std::size_t test = 0u; test < boost::size(variances); ++test) - { + for (std::size_t test = 0u; test < boost::size(variances); ++test) { LOG_DEBUG("variance = " << variances[test]); TMeanAccumulator actual; TMeanAccumulator estimate; - for (std::size_t run = 0u; run < 25; ++run) - { + for (std::size_t run = 0u; run < 25; ++run) { maths::CRegression::CLeastSquaresOnline<3, double> regression; maths::CRegression::CLeastSquaresOnlineParameterProcess<4, double> parameterProcess; @@ -1133,14 +1041,11 @@ void CRegressionTest::testParameterProcess() double x = 0.0; double v = 5.0; double a = 1.0; - for (std::size_t i = 0u; i < boost::size(intervals); t += intervals[i], ++i) - { + for (std::size_t i = 0u; i < boost::size(intervals); t += intervals[i], ++i) { double dt = intervals[i]; TDoubleVec da; - rng.generateNormalSamples(0.0, variances[test], - static_cast(dt / 0.05), da); - for (auto da_ : da) - { + rng.generateNormalSamples(0.0, variances[test], static_cast(dt / 0.05), da); + for (auto da_ : da) { x += (v + 0.5 * a * 0.05) * 0.05; v += a * 0.05; a += da_; @@ -1150,27 +1055,22 @@ void CRegressionTest::testParameterProcess() TVector paramsDrift(regression.parameters(t + dt)); regression.add(t + dt, x); paramsDrift -= TVector(regression.parameters(t + dt)); - if (sufficientHistoryBeforeUpdate && regression.range() >= 1.0) - { + if (sufficientHistoryBeforeUpdate && regression.range() >= 1.0) { parameterProcess.add(t + dt, paramsDrift, TVector(dt)); } parameterProcess.age(std::exp(-0.05 * intervals[i])); } TMeanVarAccumulator moments; - for (std::size_t trial = 0u; trial < 500; ++trial) - { + for (std::size_t trial = 0u; trial < 500; ++trial) { double xt = 0.0; double vt = 0.0; double at = 0.0; - for (std::size_t i = 0u; i < 5; ++i) - { + for (std::size_t i = 0u; i < 5; ++i) { double dt = intervals[i]; TDoubleVec da; - rng.generateNormalSamples(0.0, variances[test], - static_cast(dt / 0.05), da); - for (auto da_ : da) - { + rng.generateNormalSamples(0.0, variances[test], static_cast(dt / 0.05), da); + for (auto da_ : da) { xt += (vt + 0.5 * at * 0.05) * 0.05; vt += at * 0.05; at += da_; @@ -1180,10 +1080,8 @@ void CRegressionTest::testParameterProcess() } double interval = std::accumulate(intervals, intervals + 5, 0.0); - if (run % 5 == 0) - { - LOG_DEBUG(" " << maths::CBasicStatistics::variance(moments) - << " vs " << parameterProcess.predictionVariance(interval)); + if (run % 5 == 0) { + LOG_DEBUG(" " << maths::CBasicStatistics::variance(moments) << " vs " << parameterProcess.predictionVariance(interval)); } actual.add(maths::CBasicStatistics::variance(moments)); estimate.add(parameterProcess.predictionVariance(interval)); @@ -1191,70 +1089,40 @@ void CRegressionTest::testParameterProcess() LOG_DEBUG("actual = " << maths::CBasicStatistics::mean(actual)); LOG_DEBUG("estimate = " << maths::CBasicStatistics::mean(estimate)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(actual), - maths::CBasicStatistics::mean(estimate), - 0.25 * maths::CBasicStatistics::mean(actual)); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + maths::CBasicStatistics::mean(actual), maths::CBasicStatistics::mean(estimate), 0.25 * maths::CBasicStatistics::mean(actual)); - error.add(( maths::CBasicStatistics::mean(actual) - - maths::CBasicStatistics::mean(estimate)) / maths::CBasicStatistics::mean(actual)); + error.add((maths::CBasicStatistics::mean(actual) - maths::CBasicStatistics::mean(estimate)) / + maths::CBasicStatistics::mean(actual)); } LOG_DEBUG("error = " << maths::CBasicStatistics::mean(error)); CPPUNIT_ASSERT(std::fabs(maths::CBasicStatistics::mean(error)) < 0.08); } -CppUnit::Test *CRegressionTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CRegressionTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CRegressionTest::testInvariants", - &CRegressionTest::testInvariants) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CRegressionTest::testFit", - &CRegressionTest::testFit) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CRegressionTest::testShiftAbscissa", - &CRegressionTest::testShiftAbscissa) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CRegressionTest::testShiftOrdinate", - &CRegressionTest::testShiftOrdinate) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CRegressionTest::testShiftGradient", - &CRegressionTest::testShiftGradient) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CRegressionTest::testLinearScale", - &CRegressionTest::testLinearScale) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CRegressionTest::testAge", - &CRegressionTest::testAge) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CRegressionTest::testPrediction", - &CRegressionTest::testPrediction) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CRegressionTest::testCombination", - &CRegressionTest::testCombination) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CRegressionTest::testSingular", - &CRegressionTest::testSingular) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CRegressionTest::testScale", - &CRegressionTest::testScale) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CRegressionTest::testMean", - &CRegressionTest::testMean) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CRegressionTest::testCovariances", - &CRegressionTest::testCovariances) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CRegressionTest::testParameters", - &CRegressionTest::testParameters) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CRegressionTest::testPersist", - &CRegressionTest::testPersist) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CRegressionTest::testParameterProcess", - &CRegressionTest::testParameterProcess) ); +CppUnit::Test* CRegressionTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CRegressionTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CRegressionTest::testInvariants", &CRegressionTest::testInvariants)); + suiteOfTests->addTest(new CppUnit::TestCaller("CRegressionTest::testFit", &CRegressionTest::testFit)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CRegressionTest::testShiftAbscissa", &CRegressionTest::testShiftAbscissa)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CRegressionTest::testShiftOrdinate", &CRegressionTest::testShiftOrdinate)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CRegressionTest::testShiftGradient", &CRegressionTest::testShiftGradient)); + suiteOfTests->addTest(new CppUnit::TestCaller("CRegressionTest::testLinearScale", &CRegressionTest::testLinearScale)); + suiteOfTests->addTest(new CppUnit::TestCaller("CRegressionTest::testAge", &CRegressionTest::testAge)); + suiteOfTests->addTest(new CppUnit::TestCaller("CRegressionTest::testPrediction", &CRegressionTest::testPrediction)); + suiteOfTests->addTest(new CppUnit::TestCaller("CRegressionTest::testCombination", &CRegressionTest::testCombination)); + suiteOfTests->addTest(new CppUnit::TestCaller("CRegressionTest::testSingular", &CRegressionTest::testSingular)); + suiteOfTests->addTest(new CppUnit::TestCaller("CRegressionTest::testScale", &CRegressionTest::testScale)); + suiteOfTests->addTest(new CppUnit::TestCaller("CRegressionTest::testMean", &CRegressionTest::testMean)); + suiteOfTests->addTest(new CppUnit::TestCaller("CRegressionTest::testCovariances", &CRegressionTest::testCovariances)); + suiteOfTests->addTest(new CppUnit::TestCaller("CRegressionTest::testParameters", &CRegressionTest::testParameters)); + suiteOfTests->addTest(new CppUnit::TestCaller("CRegressionTest::testPersist", &CRegressionTest::testPersist)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CRegressionTest::testParameterProcess", &CRegressionTest::testParameterProcess)); return suiteOfTests; } diff --git a/lib/maths/unittest/CRegressionTest.h b/lib/maths/unittest/CRegressionTest.h index b84f1dd6ad..2e64708dcd 100644 --- a/lib/maths/unittest/CRegressionTest.h +++ b/lib/maths/unittest/CRegressionTest.h @@ -9,27 +9,26 @@ #include -class CRegressionTest : public CppUnit::TestFixture -{ - public: - void testInvariants(); - void testFit(); - void testShiftAbscissa(); - void testShiftOrdinate(); - void testShiftGradient(); - void testLinearScale(); - void testAge(); - void testPrediction(); - void testCombination(); - void testSingular(); - void testScale(); - void testMean(); - void testCovariances(); - void testParameters(); - void testPersist(); - void testParameterProcess(); +class CRegressionTest : public CppUnit::TestFixture { +public: + void testInvariants(); + void testFit(); + void testShiftAbscissa(); + void testShiftOrdinate(); + void testShiftGradient(); + void testLinearScale(); + void testAge(); + void testPrediction(); + void testCombination(); + void testSingular(); + void testScale(); + void testMean(); + void testCovariances(); + void testParameters(); + void testPersist(); + void testParameterProcess(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CRegressionTest_h diff --git a/lib/maths/unittest/CSamplingTest.cc b/lib/maths/unittest/CSamplingTest.cc index cfe4d102aa..6bcdb1b343 100644 --- a/lib/maths/unittest/CSamplingTest.cc +++ b/lib/maths/unittest/CSamplingTest.cc @@ -22,52 +22,39 @@ using TSizeVec = std::vector; using namespace ml; -namespace -{ +namespace { using TDoubleVecVec = std::vector; -double multinomialProbability(const TDoubleVec &probabilities, - const TSizeVec &counts) -{ +double multinomialProbability(const TDoubleVec& probabilities, const TSizeVec& counts) { std::size_t n = std::accumulate(counts.begin(), counts.end(), std::size_t(0)); double logP = boost::math::lgamma(static_cast(n + 1)); - for (std::size_t i = 0u; i < counts.size(); ++i) - { + for (std::size_t i = 0u; i < counts.size(); ++i) { double ni = static_cast(counts[i]); - if (ni > 0.0) - { + if (ni > 0.0) { logP += ni * std::log(probabilities[i]) - boost::math::lgamma(ni + 1.0); } } return std::exp(logP); } -namespace test_detail -{ +namespace test_detail { //! Subtract two vectors. -TDoubleVec minus(const TDoubleVec &v1, - const TDoubleVec &v2) -{ +TDoubleVec minus(const TDoubleVec& v1, const TDoubleVec& v2) { TDoubleVec result; - for (std::size_t i = 0u; i < v1.size(); ++i) - { + for (std::size_t i = 0u; i < v1.size(); ++i) { result.push_back(v1[i] - v2[i]); } return result; } //! Subtract two matrices. -TDoubleVecVec minus(const TDoubleVecVec &m1, - const TDoubleVecVec &m2) -{ +TDoubleVecVec minus(const TDoubleVecVec& m1, const TDoubleVecVec& m2) { TDoubleVecVec result; - for (std::size_t i = 0u; i < m1.size(); ++i) - { + for (std::size_t i = 0u; i < m1.size(); ++i) { result.push_back(TDoubleVec()); - for (std::size_t j = 0u; j < m1[i].size(); ++j) - { + for (std::size_t j = 0u; j < m1[i].size(); ++j) { result.back().push_back(m1[i][j] - m2[i][j]); } } @@ -75,15 +62,11 @@ TDoubleVecVec minus(const TDoubleVecVec &m1, } //! Compute the outer product of two vectors. -TDoubleVecVec outer(const TDoubleVec &v1, - const TDoubleVec &v2) -{ +TDoubleVecVec outer(const TDoubleVec& v1, const TDoubleVec& v2) { TDoubleVecVec result; - for (std::size_t i = 0u; i < v1.size(); ++i) - { + for (std::size_t i = 0u; i < v1.size(); ++i) { result.push_back(TDoubleVec()); - for (std::size_t j = 0u; j < v2.size(); ++j) - { + for (std::size_t j = 0u; j < v2.size(); ++j) { result.back().push_back(v1[i] * v2[j]); } } @@ -91,60 +74,46 @@ TDoubleVecVec outer(const TDoubleVec &v1, } //! Add two matrices. -void add(const TDoubleVecVec &m1, TDoubleVecVec &m2) -{ - for (std::size_t i = 0u; i < m1.size(); ++i) - { - for (std::size_t j = 0u; j < m1[i].size(); ++j) - { +void add(const TDoubleVecVec& m1, TDoubleVecVec& m2) { + for (std::size_t i = 0u; i < m1.size(); ++i) { + for (std::size_t j = 0u; j < m1[i].size(); ++j) { m2[i][j] += m1[i][j]; } } } //! Divide a matrix by a constant. -void divide(TDoubleVecVec &m, double c) -{ - for (std::size_t i = 0u; i < m.size(); ++i) - { - for (std::size_t j = 0u; j < m[i].size(); ++j) - { +void divide(TDoubleVecVec& m, double c) { + for (std::size_t i = 0u; i < m.size(); ++i) { + for (std::size_t j = 0u; j < m[i].size(); ++j) { m[i][j] /= c; } } } //! Euclidean norm of a vector. -double euclidean(const TDoubleVec &v) -{ +double euclidean(const TDoubleVec& v) { double result = 0.0; - for (std::size_t i = 0u; i < v.size(); ++i) - { + for (std::size_t i = 0u; i < v.size(); ++i) { result += v[i] * v[i]; } return std::sqrt(result); } //! Frobenius norm of a matrix. -double frobenius(const TDoubleVecVec &m) -{ +double frobenius(const TDoubleVecVec& m) { double result = 0.0; - for (std::size_t i = 0u; i < m.size(); ++i) - { - for (std::size_t j = 0u; j < m.size(); ++j) - { + for (std::size_t i = 0u; i < m.size(); ++i) { + for (std::size_t j = 0u; j < m.size(); ++j) { result += m[i][j] * m[i][j]; } } return std::sqrt(result); } - } - } -void CSamplingTest::testMultinomialSample() -{ +void CSamplingTest::testMultinomialSample() { LOG_DEBUG("+----------------------------------------+"); LOG_DEBUG("| CSamplingTest::testMultinomialSample |"); LOG_DEBUG("+----------------------------------------+"); @@ -154,32 +123,25 @@ void CSamplingTest::testMultinomialSample() maths::CSampling::seed(); - double probabilities_[] = { 0.4, 0.25, 0.2, 0.15 }; + double probabilities_[] = {0.4, 0.25, 0.2, 0.15}; - TDoubleVec probabilities(boost::begin(probabilities_), - boost::end(probabilities_)); + TDoubleVec probabilities(boost::begin(probabilities_), boost::end(probabilities_)); TSizeVecDoubleMap empiricalProbabilities; std::size_t n = 1000000u; TSizeVec sample; - for (std::size_t i = 0u; i < n; ++i) - { + for (std::size_t i = 0u; i < n; ++i) { maths::CSampling::multinomialSampleFast(probabilities, 20, sample); empiricalProbabilities[sample] += 1.0 / static_cast(n); } double error = 0.0; double pTotal = 0.0; - for (TSizeVecDoubleMapCItr pItr = empiricalProbabilities.begin(); - pItr != empiricalProbabilities.end(); - ++pItr) - { + for (TSizeVecDoubleMapCItr pItr = empiricalProbabilities.begin(); pItr != empiricalProbabilities.end(); ++pItr) { LOG_DEBUG("counts = " << core::CContainerPrinter::print(pItr->first)); - CPPUNIT_ASSERT_EQUAL(size_t(20), std::accumulate(pItr->first.begin(), - pItr->first.end(), - size_t(0))); + CPPUNIT_ASSERT_EQUAL(size_t(20), std::accumulate(pItr->first.begin(), pItr->first.end(), size_t(0))); double p = multinomialProbability(probabilities, pItr->first); double pe = pItr->second; @@ -193,8 +155,7 @@ void CSamplingTest::testMultinomialSample() CPPUNIT_ASSERT(error < 0.02 * pTotal); } -void CSamplingTest::testMultivariateNormalSample() -{ +void CSamplingTest::testMultivariateNormalSample() { LOG_DEBUG("+-----------------------------------------------+"); LOG_DEBUG("| CSamplingTest::testMultivariateNormalSample |"); LOG_DEBUG("+-----------------------------------------------+"); @@ -204,14 +165,9 @@ void CSamplingTest::testMultivariateNormalSample() maths::CSampling::seed(); { - double m[] = { 1.0, 3.0, 5.0 }; + double m[] = {1.0, 3.0, 5.0}; TDoubleVec m_(boost::begin(m), boost::end(m)); - double C[][3] = - { - { 3.0, 1.0, 0.1 }, - { 1.0, 2.0, -0.3 }, - { 0.1, -0.3, 1.0 } - }; + double C[][3] = {{3.0, 1.0, 0.1}, {1.0, 2.0, -0.3}, {0.1, -0.3, 1.0}}; TDoubleVecVec C_; C_.push_back(TDoubleVec(boost::begin(C[0]), boost::end(C[0]))); C_.push_back(TDoubleVec(boost::begin(C[1]), boost::end(C[1]))); @@ -221,16 +177,14 @@ void CSamplingTest::testMultivariateNormalSample() maths::CSampling::multivariateNormalSample(m_, C_, 1000, samples); TMeanAccumulator mean[3]; - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { mean[0].add(samples[i][0]); mean[1].add(samples[i][1]); mean[2].add(samples[i][2]); } TDoubleVec mean_; - for (std::size_t i = 0u; i < 3; ++i) - { + for (std::size_t i = 0u; i < 3; ++i) { mean_.push_back(maths::CBasicStatistics::mean(mean[i])); } LOG_DEBUG("actual mean = " << core::CContainerPrinter::print(m_)); @@ -244,11 +198,8 @@ void CSamplingTest::testMultivariateNormalSample() // Get the sample covariance matrix. TDoubleVecVec covariance(3, TDoubleVec(3, 0.0)); - for (std::size_t i = 0u; i < samples.size(); ++i) - { - test_detail::add(test_detail::outer(test_detail::minus(samples[i], mean_), - test_detail::minus(samples[i], mean_)), - covariance); + for (std::size_t i = 0u; i < samples.size(); ++i) { + test_detail::add(test_detail::outer(test_detail::minus(samples[i], mean_), test_detail::minus(samples[i], mean_)), covariance); } test_detail::divide(covariance, static_cast(samples.size() - 1)); LOG_DEBUG("actual covariance = " << core::CContainerPrinter::print(covariance)); @@ -265,17 +216,13 @@ void CSamplingTest::testMultivariateNormalSample() } } -CppUnit::Test *CSamplingTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CSamplingTest"); +CppUnit::Test* CSamplingTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CSamplingTest"); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSamplingTest::testMultinomialSample", - &CSamplingTest::testMultinomialSample) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSamplingTest::testMultivariateNormalSample", - &CSamplingTest::testMultivariateNormalSample) ); + suiteOfTests->addTest( + new CppUnit::TestCaller("CSamplingTest::testMultinomialSample", &CSamplingTest::testMultinomialSample)); + suiteOfTests->addTest(new CppUnit::TestCaller("CSamplingTest::testMultivariateNormalSample", + &CSamplingTest::testMultivariateNormalSample)); return suiteOfTests; } - diff --git a/lib/maths/unittest/CSamplingTest.h b/lib/maths/unittest/CSamplingTest.h index a83273078d..5b21108f7f 100644 --- a/lib/maths/unittest/CSamplingTest.h +++ b/lib/maths/unittest/CSamplingTest.h @@ -9,13 +9,12 @@ #include -class CSamplingTest : public CppUnit::TestFixture -{ - public: - void testMultinomialSample(); - void testMultivariateNormalSample(); +class CSamplingTest : public CppUnit::TestFixture { +public: + void testMultinomialSample(); + void testMultivariateNormalSample(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CSamplingTest_h diff --git a/lib/maths/unittest/CSeasonalComponentAdaptiveBucketingTest.cc b/lib/maths/unittest/CSeasonalComponentAdaptiveBucketingTest.cc index 5982d46213..28010e6c63 100644 --- a/lib/maths/unittest/CSeasonalComponentAdaptiveBucketingTest.cc +++ b/lib/maths/unittest/CSeasonalComponentAdaptiveBucketingTest.cc @@ -26,8 +26,7 @@ using namespace ml; -namespace -{ +namespace { using TDoubleVec = std::vector; using TFloatVec = std::vector; using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; @@ -36,8 +35,7 @@ using TMinAccumulator = maths::CBasicStatistics::SMin::TAccumulator; using TMaxAccumulator = maths::CBasicStatistics::SMax::TAccumulator; } -void CSeasonalComponentAdaptiveBucketingTest::testInitialize() -{ +void CSeasonalComponentAdaptiveBucketingTest::testInitialize() { LOG_DEBUG("+-----------------------------------------------------------+"); LOG_DEBUG("| CSeasonalComponentAdaptiveBucketingTest::testInitialize |"); LOG_DEBUG("+-----------------------------------------------------------+"); @@ -52,11 +50,10 @@ void CSeasonalComponentAdaptiveBucketingTest::testInitialize() const std::string expectedValues("[50, 5, 15, 25, 35, 45, 55, 65, 75, 85, 95, 50]"); CPPUNIT_ASSERT(bucketing.initialize(10)); - const TFloatVec &endpoints = bucketing.endpoints(); + const TFloatVec& endpoints = bucketing.endpoints(); CPPUNIT_ASSERT_EQUAL(expectedEndpoints, core::CContainerPrinter::print(endpoints)); - for (core_t::TTime t = 5; t < 100; t += 10) - { + for (core_t::TTime t = 5; t < 100; t += 10) { double v = static_cast(t); bucketing.add(t, v, v); } @@ -68,8 +65,7 @@ void CSeasonalComponentAdaptiveBucketingTest::testInitialize() CPPUNIT_ASSERT_EQUAL(expectedValues, core::CContainerPrinter::print(values)); } -void CSeasonalComponentAdaptiveBucketingTest::testSwap() -{ +void CSeasonalComponentAdaptiveBucketingTest::testSwap() { LOG_DEBUG("+-----------------------------------------------------+"); LOG_DEBUG("| CSeasonalComponentAdaptiveBucketingTest::testSwap |"); LOG_DEBUG("+-----------------------------------------------------+"); @@ -80,16 +76,13 @@ void CSeasonalComponentAdaptiveBucketingTest::testSwap() test::CRandomNumbers rng; bucketing1.initialize(10); - for (std::size_t p = 0; p < 50; ++p) - { + for (std::size_t p = 0; p < 50; ++p) { TDoubleVec noise; rng.generateNormalSamples(0.0, 2.0, 100, noise); - for (std::size_t i = 0u; i < 100; ++i) - { + for (std::size_t i = 0u; i < 100; ++i) { core_t::TTime x = static_cast(100 * p + i); - double y = 0.02 * (static_cast(i) - 50.0) - * (static_cast(i) - 50.0); + double y = 0.02 * (static_cast(i) - 50.0) * (static_cast(i) - 50.0); bucketing1.add(x, y + noise[i], y); } bucketing1.refine(static_cast(100 * (p + 1))); @@ -111,8 +104,7 @@ void CSeasonalComponentAdaptiveBucketingTest::testSwap() CPPUNIT_ASSERT_EQUAL(checksum2, bucketing1.checksum()); } -void CSeasonalComponentAdaptiveBucketingTest::testRefine() -{ +void CSeasonalComponentAdaptiveBucketingTest::testRefine() { LOG_DEBUG("+-------------------------------------------------------+"); LOG_DEBUG("| CSeasonalComponentAdaptiveBucketingTest::testRefine |"); LOG_DEBUG("+-------------------------------------------------------+"); @@ -122,11 +114,8 @@ void CSeasonalComponentAdaptiveBucketingTest::testRefine() { // Test that refine reduces the function approximation error. - core_t::TTime times[] = - { - -1, 3600, 10800, 18000, 25200, 32400, 39600, 46800, 54000, 61200, 68400, 75600, 82800, 86400 - }; - double function[] = { 10, 10, 10, 10, 100, 90, 80, 90, 100, 20, 10, 10, 10, 10 }; + core_t::TTime times[] = {-1, 3600, 10800, 18000, 25200, 32400, 39600, 46800, 54000, 61200, 68400, 75600, 82800, 86400}; + double function[] = {10, 10, 10, 10, 100, 90, 80, 90, 100, 20, 10, 10, 10, 10}; maths::CDiurnalTime time(0, 0, 86400, 86400); maths::CSeasonalComponentAdaptiveBucketing bucketing1(time); @@ -135,25 +124,21 @@ void CSeasonalComponentAdaptiveBucketingTest::testRefine() bucketing1.initialize(12); bucketing2.initialize(12); - for (std::size_t p = 0; p < 20; ++p) - { + for (std::size_t p = 0; p < 20; ++p) { TDoubleVec noise; rng.generateNormalSamples(0.0, 4.0, 48, noise); core_t::TTime start = static_cast(86400 * p); - for (core_t::TTime t = 0; t < 86400; t += 1800) - { + for (core_t::TTime t = 0; t < 86400; t += 1800) { core_t::TTime x = start + t; - ptrdiff_t i = std::lower_bound(boost::begin(times), - boost::end(times), - t) - boost::begin(times); + ptrdiff_t i = std::lower_bound(boost::begin(times), boost::end(times), t) - boost::begin(times); double x0 = static_cast(times[i - 1]); double x1 = static_cast(times[i]); double y0 = function[i - 1]; double y1 = function[i]; - double y = y0 + (y1 - y0) * (static_cast(t) - x0) / (x1 - x0); + double y = y0 + (y1 - y0) * (static_cast(t) - x0) / (x1 - x0); bucketing1.add(x, y + noise[t / 1800], y); bucketing2.add(x, y + noise[t / 1800], y); @@ -163,40 +148,32 @@ void CSeasonalComponentAdaptiveBucketingTest::testRefine() TMeanAccumulator meanError1; TMaxAccumulator maxError1; - const TFloatVec &endpoints1 = bucketing1.endpoints(); + const TFloatVec& endpoints1 = bucketing1.endpoints(); TDoubleVec values1 = bucketing1.values(20 * 86400); - for (std::size_t i = 1; i < endpoints1.size(); ++i) - { - core_t::TTime t = static_cast( - 0.5 * (endpoints1[i] + endpoints1[i-1] + 1.0)); - ptrdiff_t j = std::lower_bound(boost::begin(times), - boost::end(times), - t) - boost::begin(times); + for (std::size_t i = 1; i < endpoints1.size(); ++i) { + core_t::TTime t = static_cast(0.5 * (endpoints1[i] + endpoints1[i - 1] + 1.0)); + ptrdiff_t j = std::lower_bound(boost::begin(times), boost::end(times), t) - boost::begin(times); double x0 = static_cast(times[j - 1]); double x1 = static_cast(times[j]); double y0 = function[j - 1]; double y1 = function[j]; - double y = y0 + (y1 - y0) * (static_cast(t) - x0) / (x1 - x0); + double y = y0 + (y1 - y0) * (static_cast(t) - x0) / (x1 - x0); meanError1.add(std::fabs(values1[i - 1] - y)); maxError1.add(std::fabs(values1[i - 1] - y)); } TMeanAccumulator meanError2; TMaxAccumulator maxError2; - const TFloatVec &endpoints2 = bucketing2.endpoints(); + const TFloatVec& endpoints2 = bucketing2.endpoints(); TDoubleVec values2 = bucketing2.values(20 * 86400); - for (std::size_t i = 1; i < endpoints1.size(); ++i) - { - core_t::TTime t = static_cast( - 0.5 * (endpoints2[i] + endpoints2[i-1] + 1.0)); - ptrdiff_t j = std::lower_bound(boost::begin(times), - boost::end(times), - t) - boost::begin(times); + for (std::size_t i = 1; i < endpoints1.size(); ++i) { + core_t::TTime t = static_cast(0.5 * (endpoints2[i] + endpoints2[i - 1] + 1.0)); + ptrdiff_t j = std::lower_bound(boost::begin(times), boost::end(times), t) - boost::begin(times); double x0 = static_cast(times[j - 1]); double x1 = static_cast(times[j]); double y0 = function[j - 1]; double y1 = function[j]; - double y = y0 + (y1 - y0) * (static_cast(t) - x0) / (x1 - x0); + double y = y0 + (y1 - y0) * (static_cast(t) - x0) / (x1 - x0); meanError2.add(std::fabs(values2[i - 1] - y)); maxError2.add(std::fabs(values2[i - 1] - y)); } @@ -217,23 +194,20 @@ void CSeasonalComponentAdaptiveBucketingTest::testRefine() maths::CSeasonalComponentAdaptiveBucketing bucketing(time, 0.05); bucketing.initialize(10); - for (std::size_t p = 0; p < 50; ++p) - { + for (std::size_t p = 0; p < 50; ++p) { TDoubleVec noise; rng.generateNormalSamples(0.0, 9.0, 100, noise); - for (std::size_t i = 0u; i < 100; ++i) - { + for (std::size_t i = 0u; i < 100; ++i) { core_t::TTime x = static_cast(100 * p + i); - double y = 0.02 * (static_cast(i) - 50.0) - * (static_cast(i) - 50.0); + double y = 0.02 * (static_cast(i) - 50.0) * (static_cast(i) - 50.0); bucketing.add(x, y + noise[i], y); } bucketing.refine(static_cast(100 * (p + 1))); bucketing.propagateForwardsByTime(1.0); } - const TFloatVec &endpoints = bucketing.endpoints(); + const TFloatVec& endpoints = bucketing.endpoints(); TDoubleVec values = bucketing.values(5100); TDoubleVec variances = bucketing.variances(); LOG_DEBUG("endpoints = " << core::CContainerPrinter::print(endpoints)); @@ -244,86 +218,63 @@ void CSeasonalComponentAdaptiveBucketingTest::testRefine() TMeanAccumulator varianceError; TMeanVarAccumulator avgError; - for (std::size_t i = 1u; i < endpoints.size(); ++i) - { - double a = endpoints[i-1]; + for (std::size_t i = 1u; i < endpoints.size(); ++i) { + double a = endpoints[i - 1]; double b = endpoints[i]; LOG_DEBUG("bucket = [" << a << "," << b << "]"); a -= 50.0; b -= 50.0; - double m = values[i-1]; - double v = variances[i-1]; + double m = values[i - 1]; + double v = variances[i - 1]; // Function mean and variance. - double m_ = std::fabs(a) < std::fabs(b) ? - 0.02 / 3.0 * std::pow(b, 3.0) - * (1.0 - std::pow(a/b, 3.0)) / (b-a) : - 0.02 / 3.0 * std::pow(a, 3.0) - * (std::pow(b/a, 3.0) - 1.0) / (b-a); + double m_ = std::fabs(a) < std::fabs(b) ? 0.02 / 3.0 * std::pow(b, 3.0) * (1.0 - std::pow(a / b, 3.0)) / (b - a) + : 0.02 / 3.0 * std::pow(a, 3.0) * (std::pow(b / a, 3.0) - 1.0) / (b - a); double v_ = 9.0; - LOG_DEBUG("m = " << m - << ", m_ = " << m_ - << ", absolute error = " << std::fabs(m - m_)); - LOG_DEBUG("v = " << v - << ", v_ = " << v_ - << ", relative error = " << std::fabs(v - v_) / v_); + LOG_DEBUG("m = " << m << ", m_ = " << m_ << ", absolute error = " << std::fabs(m - m_)); + LOG_DEBUG("v = " << v << ", v_ = " << v_ << ", relative error = " << std::fabs(v - v_) / v_); CPPUNIT_ASSERT_DOUBLES_EQUAL(m_, m, 0.7); CPPUNIT_ASSERT_DOUBLES_EQUAL(v_, v, 0.4 * v_); meanError.add(std::fabs(m_ - m) / m_); varianceError.add(std::fabs(v_ - v) / v_); - if (i == 1 || i == endpoints.size() - 1) - { + if (i == 1 || i == endpoints.size() - 1) { continue; } - if ((b * b / 50.0 - m) * (a * a / 50.0 - m) < 0.0) - { + if ((b * b / 50.0 - m) * (a * a / 50.0 - m) < 0.0) { // Root. double c = b < 0.0 ? -std::sqrt(50.0 * m) : +::sqrt(50.0 * m); // Left and right partial averaging errors. - double l = std::fabs(c) < std::fabs(a) ? - 0.02 / 3.0 * a * a * a - * ((c/a) * (c/a) * (c/a) - 1.0) - m * (c-a) : - 0.02 / 3.0 * c * c * c - * (1.0 - (a/c) * (a/c) * (a/c)) - m * (c-a); - double r = std::fabs(c) < std::fabs(b) ? - 0.02 / 3.0 * b * b * b - * (1.0 - (c/b) * (c/b) * (c/b)) - m * (b-c) : - 0.02 / 3.0 * c * c * c - * ((b/c) * (b/c) * (b/c) - 1.0) - m * (b-c); - LOG_DEBUG("c = " << c - << ", l = " << l << " r = " << r - << ", error = " << std::fabs(l) + std::fabs(r)); + double l = std::fabs(c) < std::fabs(a) ? 0.02 / 3.0 * a * a * a * ((c / a) * (c / a) * (c / a) - 1.0) - m * (c - a) + : 0.02 / 3.0 * c * c * c * (1.0 - (a / c) * (a / c) * (a / c)) - m * (c - a); + double r = std::fabs(c) < std::fabs(b) ? 0.02 / 3.0 * b * b * b * (1.0 - (c / b) * (c / b) * (c / b)) - m * (b - c) + : 0.02 / 3.0 * c * c * c * ((b / c) * (b / c) * (b / c) - 1.0) - m * (b - c); + LOG_DEBUG("c = " << c << ", l = " << l << " r = " << r << ", error = " << std::fabs(l) + std::fabs(r)); avgError.add(std::fabs(l) + std::fabs(r)); - } - else - { + } else { avgError.add(std::fabs((m_ - m) * (b - a))); } } double meanError_ = maths::CBasicStatistics::mean(meanError); double varianceError_ = maths::CBasicStatistics::mean(varianceError); - LOG_DEBUG("meanError = " << meanError_ - << ", varianceError = " << varianceError_); + LOG_DEBUG("meanError = " << meanError_ << ", varianceError = " << varianceError_); CPPUNIT_ASSERT(meanError_ < 0.09); CPPUNIT_ASSERT(varianceError_ < 0.21); double avgErrorMean = maths::CBasicStatistics::mean(avgError); double avgErrorStd = std::sqrt(maths::CBasicStatistics::variance(avgError)); - LOG_DEBUG("avgErrorMean = " << avgErrorMean - << ", avgErrorStd = " << avgErrorStd); + LOG_DEBUG("avgErrorMean = " << avgErrorMean << ", avgErrorStd = " << avgErrorStd); CPPUNIT_ASSERT(avgErrorStd / avgErrorMean < 0.5); } } -void CSeasonalComponentAdaptiveBucketingTest::testPropagateForwardsByTime() -{ +void CSeasonalComponentAdaptiveBucketingTest::testPropagateForwardsByTime() { LOG_DEBUG("+------------------------------------------------------------------------+"); LOG_DEBUG("| CSeasonalComponentAdaptiveBucketingTest::testPropagateForwardsByTime |"); LOG_DEBUG("+------------------------------------------------------------------------+"); @@ -336,13 +287,10 @@ void CSeasonalComponentAdaptiveBucketingTest::testPropagateForwardsByTime() maths::CSeasonalComponentAdaptiveBucketing bucketing(time, 0.2); bucketing.initialize(10); - for (std::size_t p = 0; p < 10; ++p) - { - for (std::size_t i = 0u; i < 100; ++i) - { + for (std::size_t p = 0; p < 10; ++p) { + for (std::size_t i = 0u; i < 100; ++i) { core_t::TTime x = static_cast(100 * p + i); - double y = 0.02 * (static_cast(i) - 50.0) - * (static_cast(i) - 50.0); + double y = 0.02 * (static_cast(i) - 50.0) * (static_cast(i) - 50.0); bucketing.add(x, y, y); } bucketing.refine(static_cast(100 * (p + 1))); @@ -350,43 +298,36 @@ void CSeasonalComponentAdaptiveBucketingTest::testPropagateForwardsByTime() } double lastCount = bucketing.count(); - for (std::size_t i = 0u; i < 20; ++i) - { + for (std::size_t i = 0u; i < 20; ++i) { bucketing.propagateForwardsByTime(1.0); double count = bucketing.count(); - LOG_DEBUG("count = " << count - << ", lastCount = " << lastCount - << " count/lastCount = " << count/lastCount); + LOG_DEBUG("count = " << count << ", lastCount = " << lastCount << " count/lastCount = " << count / lastCount); CPPUNIT_ASSERT(count < lastCount); - CPPUNIT_ASSERT_DOUBLES_EQUAL(0.81873, count/lastCount, 5e-6); + CPPUNIT_ASSERT_DOUBLES_EQUAL(0.81873, count / lastCount, 5e-6); lastCount = count; } } -void CSeasonalComponentAdaptiveBucketingTest::testMinimumBucketLength() -{ +void CSeasonalComponentAdaptiveBucketingTest::testMinimumBucketLength() { LOG_DEBUG("+--------------------------------------------------------------------+"); LOG_DEBUG("| CSeasonalComponentAdaptiveBucketingTest::testMinimumBucketLength |"); LOG_DEBUG("+--------------------------------------------------------------------+"); const double bucketLength = 3600.0; - const double function[] = { 0.0, 0.0, 10.0, 12.0, 11.0, 16.0, 15.0, 1.0, 0.0, 0.0, 0.0, 0.0 }; + const double function[] = {0.0, 0.0, 10.0, 12.0, 11.0, 16.0, 15.0, 1.0, 0.0, 0.0, 0.0, 0.0}; std::size_t n = boost::size(function); test::CRandomNumbers rng; - core_t::TTime period = static_cast(n) - * static_cast(bucketLength); + core_t::TTime period = static_cast(n) * static_cast(bucketLength); maths::CDiurnalTime time(0, 0, period, period); maths::CSeasonalComponentAdaptiveBucketing bucketing1(time, 0.0, 0.0); maths::CSeasonalComponentAdaptiveBucketing bucketing2(time, 0.0, 3000.0); bucketing1.initialize(n); bucketing2.initialize(n); - for (std::size_t i = 0u; i < 20; ++i) - { - for (std::size_t j = 0u; j < n; ++j) - { + for (std::size_t i = 0u; i < 20; ++i) { + for (std::size_t j = 0u; j < n; ++j) { TDoubleVec values; rng.generateNormalSamples(function[j], 1.0, 5, values); @@ -394,11 +335,9 @@ void CSeasonalComponentAdaptiveBucketingTest::testMinimumBucketLength() rng.generateUniformSamples(0.0, bucketLength, values.size(), times); std::sort(times.begin(), times.end()); - for (std::size_t k = 0u; k < times.size(); ++k) - { - core_t::TTime t = static_cast(i) * period - + static_cast(static_cast(j) * bucketLength) - + static_cast(times[k]); + for (std::size_t k = 0u; k < times.size(); ++k) { + core_t::TTime t = static_cast(i) * period + + static_cast(static_cast(j) * bucketLength) + static_cast(times[k]); bucketing1.add(t, values[k], function[j]); bucketing2.add(t, values[k], function[j]); } @@ -406,18 +345,17 @@ void CSeasonalComponentAdaptiveBucketingTest::testMinimumBucketLength() bucketing1.refine(static_cast(i) * period); bucketing2.refine(static_cast(i) * period); - const TFloatVec &endpoints1 = bucketing1.endpoints(); - const TFloatVec &endpoints2 = bucketing2.endpoints(); + const TFloatVec& endpoints1 = bucketing1.endpoints(); + const TFloatVec& endpoints2 = bucketing2.endpoints(); CPPUNIT_ASSERT_EQUAL(endpoints1.size(), endpoints2.size()); TMinAccumulator minimumBucketLength1; TMinAccumulator minimumBucketLength2; double minimumTotalError = 0.0; - for (std::size_t j = 1u; j < endpoints1.size(); ++j) - { - minimumBucketLength1.add(endpoints1[j] - endpoints1[j-1]); - minimumBucketLength2.add(endpoints2[j] - endpoints2[j-1]); - double minimumShift = std::max(3000.0 - (endpoints1[j] - endpoints1[j-1]), 0.0) / 2.0; + for (std::size_t j = 1u; j < endpoints1.size(); ++j) { + minimumBucketLength1.add(endpoints1[j] - endpoints1[j - 1]); + minimumBucketLength2.add(endpoints2[j] - endpoints2[j - 1]); + double minimumShift = std::max(3000.0 - (endpoints1[j] - endpoints1[j - 1]), 0.0) / 2.0; minimumTotalError += minimumShift; } LOG_DEBUG("minimumBucketLength1 = " << minimumBucketLength1); @@ -425,8 +363,7 @@ void CSeasonalComponentAdaptiveBucketingTest::testMinimumBucketLength() CPPUNIT_ASSERT(minimumBucketLength2[0] >= 3000.0); double totalError = 0.0; - for (std::size_t j = 1u; j+1 < endpoints1.size(); ++j) - { + for (std::size_t j = 1u; j + 1 < endpoints1.size(); ++j) { totalError += std::fabs(endpoints2[j] - endpoints1[j]); } LOG_DEBUG("minimumTotalError = " << minimumTotalError); @@ -435,8 +372,7 @@ void CSeasonalComponentAdaptiveBucketingTest::testMinimumBucketLength() } } -void CSeasonalComponentAdaptiveBucketingTest::testUnintialized() -{ +void CSeasonalComponentAdaptiveBucketingTest::testUnintialized() { LOG_DEBUG("+-------------------------------------------------------------+"); LOG_DEBUG("| CSeasonalComponentAdaptiveBucketingTest::testUnintialized |"); LOG_DEBUG("+-------------------------------------------------------------+"); @@ -467,8 +403,7 @@ void CSeasonalComponentAdaptiveBucketingTest::testUnintialized() bucketing.initialize(10); CPPUNIT_ASSERT(bucketing.initialized()); - for (std::size_t i = 0u; i < 10; ++i) - { + for (std::size_t i = 0u; i < 10; ++i) { core_t::TTime t = static_cast(i); double v = static_cast(t * t); bucketing.add(t, v, v); @@ -485,9 +420,7 @@ void CSeasonalComponentAdaptiveBucketingTest::testUnintialized() CPPUNIT_ASSERT(bucketing.variances().empty()); } - -void CSeasonalComponentAdaptiveBucketingTest::testKnots() -{ +void CSeasonalComponentAdaptiveBucketingTest::testKnots() { LOG_DEBUG("+------------------------------------------------------+"); LOG_DEBUG("| CSeasonalComponentAdaptiveBucketingTest::testKnots |"); LOG_DEBUG("+------------------------------------------------------+"); @@ -502,16 +435,13 @@ void CSeasonalComponentAdaptiveBucketingTest::testKnots() maths::CSeasonalComponentAdaptiveBucketing bucketing(time, 0.1, 864.0); bucketing.initialize(20); - for (std::size_t p = 0; p < 5; ++p) - { + for (std::size_t p = 0; p < 5; ++p) { TDoubleVec noise; rng.generateNormalSamples(0.0, 4.0, 100, noise); - for (std::size_t i = 0u; i < 100; ++i) - { + for (std::size_t i = 0u; i < 100; ++i) { core_t::TTime x = static_cast(p * 86400 + 864 * i); - double y = 0.02 * (static_cast(i) - 50.0) - * (static_cast(i) - 50.0); + double y = 0.02 * (static_cast(i) - 50.0) * (static_cast(i) - 50.0); bucketing.add(x, y + noise[i], y); } bucketing.refine(static_cast(86400 * (p + 1))); @@ -519,29 +449,24 @@ void CSeasonalComponentAdaptiveBucketingTest::testKnots() TDoubleVec knots; TDoubleVec values; TDoubleVec variances; - bucketing.knots(static_cast(86400 * (p + 1)), - maths::CSplineTypes::E_Periodic, - knots, values, variances); + bucketing.knots(static_cast(86400 * (p + 1)), maths::CSplineTypes::E_Periodic, knots, values, variances); LOG_DEBUG("knots = " << core::CContainerPrinter::print(knots)); LOG_DEBUG("values = " << core::CContainerPrinter::print(values)); TMeanAccumulator meanError; TMeanAccumulator meanValue; - for (std::size_t i = 0u; i < knots.size(); ++i) - { + for (std::size_t i = 0u; i < knots.size(); ++i) { double x = knots[i] / 864.0; double expectedValue = 0.02 * (x - 50.0) * (x - 50.0); - LOG_DEBUG("expected = " << expectedValue - << ", value = " << values[i]); + LOG_DEBUG("expected = " << expectedValue << ", value = " << values[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedValue, values[i], 15.0); meanError.add(std::fabs(values[i] - expectedValue)); meanValue.add(std::fabs(expectedValue)); } LOG_DEBUG("meanError = " << maths::CBasicStatistics::mean(meanError)); LOG_DEBUG("meanValue = " << maths::CBasicStatistics::mean(meanValue)); - CPPUNIT_ASSERT( maths::CBasicStatistics::mean(meanError) - / maths::CBasicStatistics::mean(meanValue) - < 0.1 / std::sqrt(static_cast(p+1))); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError) / maths::CBasicStatistics::mean(meanValue) < + 0.1 / std::sqrt(static_cast(p + 1))); } } LOG_DEBUG("*** Variances ***"); @@ -550,56 +475,44 @@ void CSeasonalComponentAdaptiveBucketingTest::testKnots() maths::CSeasonalComponentAdaptiveBucketing bucketing(time, 0.1, 864.0); bucketing.initialize(20); - for (std::size_t p = 0; p < 50; ++p) - { + for (std::size_t p = 0; p < 50; ++p) { TDoubleVec noise; - for (std::size_t i = 0u; i < 100; ++i) - { + for (std::size_t i = 0u; i < 100; ++i) { core_t::TTime x = static_cast(p * 86400 + 864 * i); - double v = 0.01 * (static_cast(i) - 50.0) - * (static_cast(i) - 50.0); + double v = 0.01 * (static_cast(i) - 50.0) * (static_cast(i) - 50.0); rng.generateNormalSamples(0.0, v, 1, noise); bucketing.add(x, noise[0], 0.0); } bucketing.refine(static_cast(86400 * (p + 1))); - if ((p+1) % 10 == 0) - { + if ((p + 1) % 10 == 0) { TDoubleVec knots; TDoubleVec values; TDoubleVec variances; - bucketing.knots(static_cast(86400 * (p + 1)), - maths::CSplineTypes::E_Periodic, - knots, values, variances); + bucketing.knots(static_cast(86400 * (p + 1)), maths::CSplineTypes::E_Periodic, knots, values, variances); LOG_DEBUG("knots = " << core::CContainerPrinter::print(knots)); LOG_DEBUG("variances = " << core::CContainerPrinter::print(variances)); TMeanAccumulator meanError; TMeanAccumulator meanVariance; - for (std::size_t i = 0u; i < knots.size(); ++i) - { + for (std::size_t i = 0u; i < knots.size(); ++i) { double x = knots[i] / 864.0; double expectedVariance = 0.01 * (x - 50.0) * (x - 50.0); - LOG_DEBUG("expected = " << expectedVariance - << ", variance = " << variances[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedVariance, - variances[i], - 15.0); + LOG_DEBUG("expected = " << expectedVariance << ", variance = " << variances[i]); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedVariance, variances[i], 15.0); meanError.add(std::fabs(variances[i] - expectedVariance)); meanVariance.add(std::fabs(expectedVariance)); } LOG_DEBUG("meanError = " << maths::CBasicStatistics::mean(meanError)); LOG_DEBUG("meanVariance = " << maths::CBasicStatistics::mean(meanVariance)); - CPPUNIT_ASSERT( maths::CBasicStatistics::mean(meanError) - / maths::CBasicStatistics::mean(meanVariance) < 0.2); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError) / maths::CBasicStatistics::mean(meanVariance) < 0.2); } } } } -void CSeasonalComponentAdaptiveBucketingTest::testLongTermTrendKnots() -{ +void CSeasonalComponentAdaptiveBucketingTest::testLongTermTrendKnots() { LOG_DEBUG("+-------------------------------------------------------------------+"); LOG_DEBUG("| CSeasonalComponentAdaptiveBucketingTest::testLongTermTrendKnots |"); LOG_DEBUG("+-------------------------------------------------------------------+"); @@ -615,42 +528,35 @@ void CSeasonalComponentAdaptiveBucketingTest::testLongTermTrendKnots() bucketing.initialize(20); bucketing.initialValues(0, 0, empty); - for (std::size_t p = 0; p < 100 ; ++p) - { + for (std::size_t p = 0; p < 100; ++p) { TDoubleVec noise; rng.generateNormalSamples(0.0, 100.0, 144, noise); - for (std::size_t i = 0u; i < 144; ++i) - { + for (std::size_t i = 0u; i < 144; ++i) { double x = static_cast(i) / 144.0; - double y = 10.0 * ( std::min(static_cast(p+1) + x, 50.0) - - std::max(static_cast(p+1) + x - 50.0, 0.0) - + 10.0 * std::sin(boost::math::double_constants::two_pi * x)); + double y = 10.0 * (std::min(static_cast(p + 1) + x, 50.0) - std::max(static_cast(p + 1) + x - 50.0, 0.0) + + 10.0 * std::sin(boost::math::double_constants::two_pi * x)); bucketing.add(static_cast(86400 * p + 600 * i), y + noise[i], y); } bucketing.refine(static_cast(86400 * (p + 1))); bucketing.propagateForwardsByTime(1.0); - if (p > 14 && (p + 1) % 5 == 0) - { + if (p > 14 && (p + 1) % 5 == 0) { TDoubleVec knots; TDoubleVec values; TDoubleVec variances; - bucketing.knots(static_cast(86400 * (p+1)), - maths::CSplineTypes::E_Periodic, - knots, values, variances); + bucketing.knots(static_cast(86400 * (p + 1)), maths::CSplineTypes::E_Periodic, knots, values, variances); LOG_DEBUG("knots = " << core::CContainerPrinter::print(knots)); LOG_DEBUG("values = " << core::CContainerPrinter::print(values)); LOG_DEBUG("variances = " << core::CContainerPrinter::print(variances)); TMeanAccumulator meanError; TMeanAccumulator meanValue; - for (std::size_t i = 0u; i < knots.size(); ++i) - { + for (std::size_t i = 0u; i < knots.size(); ++i) { double x = knots[i] / 86400.0; - double expectedValue = 10.0 * ( std::min(static_cast(p+1) + x, 50.0) - - std::max(static_cast(p+1) + x - 50.0, 0.0) - + 10.0 * std::sin(boost::math::double_constants::two_pi * x)); + double expectedValue = + 10.0 * (std::min(static_cast(p + 1) + x, 50.0) - std::max(static_cast(p + 1) + x - 50.0, 0.0) + + 10.0 * std::sin(boost::math::double_constants::two_pi * x)); LOG_DEBUG("expected = " << expectedValue << ", value = " << values[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedValue, values[i], 70.0); meanError.add(std::fabs(values[i] - expectedValue)); @@ -658,14 +564,12 @@ void CSeasonalComponentAdaptiveBucketingTest::testLongTermTrendKnots() } LOG_DEBUG("meanError = " << maths::CBasicStatistics::mean(meanError)); LOG_DEBUG("meanValue = " << maths::CBasicStatistics::mean(meanValue)); - CPPUNIT_ASSERT( maths::CBasicStatistics::mean(meanError) - / maths::CBasicStatistics::mean(meanValue) < 0.15); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError) / maths::CBasicStatistics::mean(meanValue) < 0.15); } } } -void CSeasonalComponentAdaptiveBucketingTest::testShiftValue() -{ +void CSeasonalComponentAdaptiveBucketingTest::testShiftValue() { LOG_DEBUG("+-----------------------------------------------------------+"); LOG_DEBUG("| CSeasonalComponentAdaptiveBucketingTest::testShiftValue |"); LOG_DEBUG("+-----------------------------------------------------------+"); @@ -681,13 +585,11 @@ void CSeasonalComponentAdaptiveBucketingTest::testShiftValue() bucketing.initialValues(0, 0, empty); core_t::TTime t = 0; - for (/**/; t < 40 * 86400; t += 600) - { + for (/**/; t < 40 * 86400; t += 600) { double x = static_cast(t) / 86400.0; double y = x + 20.0 + 20.0 * std::sin(boost::math::double_constants::two_pi * x); bucketing.add(t, y, y); - if (t % 86400 == 0) - { + if (t % 86400 == 0) { bucketing.refine(t); bucketing.propagateForwardsByTime(1.0); } @@ -696,27 +598,25 @@ void CSeasonalComponentAdaptiveBucketingTest::testShiftValue() TDoubleVec knots1; TDoubleVec values1; TDoubleVec variances1; - bucketing.knots(t + 7*86400, maths::CSplineTypes::E_Natural, knots1, values1, variances1); + bucketing.knots(t + 7 * 86400, maths::CSplineTypes::E_Natural, knots1, values1, variances1); bucketing.shiftLevel(20.0); TDoubleVec knots2; TDoubleVec values2; TDoubleVec variances2; - bucketing.knots(t + 7*86400, maths::CSplineTypes::E_Natural, knots2, values2, variances2); + bucketing.knots(t + 7 * 86400, maths::CSplineTypes::E_Natural, knots2, values2, variances2); CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(knots1), core::CContainerPrinter::print(knots2)); CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(variances1), core::CContainerPrinter::print(variances2)); - for (std::size_t i = 0u; i < values1.size(); ++i) - { + for (std::size_t i = 0u; i < values1.size(); ++i) { LOG_DEBUG("values = " << values1[i] << " vs " << values2[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(20.0 + values1[i], values2[i], 1e-6 * values1[i]); } } -void CSeasonalComponentAdaptiveBucketingTest::testSlope() -{ +void CSeasonalComponentAdaptiveBucketingTest::testSlope() { LOG_DEBUG("+------------------------------------------------------+"); LOG_DEBUG("| CSeasonalComponentAdaptiveBucketingTest::testSlope |"); LOG_DEBUG("+------------------------------------------------------+"); @@ -731,13 +631,11 @@ void CSeasonalComponentAdaptiveBucketingTest::testSlope() bucketing.initialValues(0, 0, empty); core_t::TTime t = 0; - for (/**/; t < 60 * 86400; t += 600) - { + for (/**/; t < 60 * 86400; t += 600) { double x = static_cast(t) / 86400.0; double y = x + 20.0 + 20.0 * std::sin(boost::math::double_constants::two_pi * x); bucketing.add(t, y, y); - if (t % 86400 == 0) - { + if (t % 86400 == 0) { bucketing.refine(t); bucketing.propagateForwardsByTime(1.0); } @@ -756,8 +654,7 @@ void CSeasonalComponentAdaptiveBucketingTest::testSlope() CPPUNIT_ASSERT_DOUBLES_EQUAL(slopeBefore + 10.0, slopeAfter, 1e-4); } -void CSeasonalComponentAdaptiveBucketingTest::testPersist() -{ +void CSeasonalComponentAdaptiveBucketingTest::testPersist() { LOG_DEBUG("+--------------------------------------------------------+"); LOG_DEBUG("| CSeasonalComponentAdaptiveBucketingTest::testPersist |"); LOG_DEBUG("+--------------------------------------------------------+"); @@ -771,13 +668,10 @@ void CSeasonalComponentAdaptiveBucketingTest::testPersist() maths::CSeasonalComponentAdaptiveBucketing origBucketing(time, decayRate, minimumBucketLength); origBucketing.initialize(10); - for (std::size_t p = 0; p < 10; ++p) - { - for (std::size_t i = 0u; i < 100; ++i) - { + for (std::size_t p = 0; p < 10; ++p) { + for (std::size_t i = 0u; i < 100; ++i) { core_t::TTime x = static_cast(p * 86400 + 864 * i); - double y = 0.02 * (static_cast(i) - 50.0) - * (static_cast(i) - 50.0); + double y = 0.02 * (static_cast(i) - 50.0) * (static_cast(i) - 50.0); origBucketing.add(x, y, y); } origBucketing.refine(static_cast(86400 * (p + 1))); @@ -799,12 +693,9 @@ void CSeasonalComponentAdaptiveBucketingTest::testPersist() core::CRapidXmlStateRestoreTraverser traverser(parser); // Restore the XML into a new bucketing. - maths::CSeasonalComponentAdaptiveBucketing restoredBucketing(decayRate + 0.1, - minimumBucketLength, - traverser); + maths::CSeasonalComponentAdaptiveBucketing restoredBucketing(decayRate + 0.1, minimumBucketLength, traverser); - LOG_DEBUG("orig checksum = " << checksum - << " restored checksum = " << restoredBucketing.checksum()); + LOG_DEBUG("orig checksum = " << checksum << " restored checksum = " << restoredBucketing.checksum()); CPPUNIT_ASSERT_EQUAL(checksum, restoredBucketing.checksum()); // The XML representation of the new bucketing should be the @@ -818,8 +709,7 @@ void CSeasonalComponentAdaptiveBucketingTest::testPersist() CPPUNIT_ASSERT_EQUAL(origXml, newXml); } -void CSeasonalComponentAdaptiveBucketingTest::testUpgrade() -{ +void CSeasonalComponentAdaptiveBucketingTest::testUpgrade() { LOG_DEBUG("+--------------------------------------------------------+"); LOG_DEBUG("| CSeasonalComponentAdaptiveBucketingTest::testUpgrade |"); LOG_DEBUG("+--------------------------------------------------------+"); @@ -833,13 +723,10 @@ void CSeasonalComponentAdaptiveBucketingTest::testUpgrade() maths::CSeasonalComponentAdaptiveBucketing expectedBucketing(time, decayRate, minimumBucketLength); expectedBucketing.initialize(10); - for (std::size_t p = 0; p < 10; ++p) - { - for (std::size_t i = 0u; i < 100; ++i) - { + for (std::size_t p = 0; p < 10; ++p) { + for (std::size_t i = 0u; i < 100; ++i) { core_t::TTime x = static_cast(p * 86400 + 864 * i); - double y = 0.02 * (static_cast(i) - 50.0) - * (static_cast(i) - 50.0); + double y = 0.02 * (static_cast(i) - 50.0) * (static_cast(i) - 50.0); expectedBucketing.add(x, y, y); } expectedBucketing.refine(static_cast(86400 * (p + 1))); @@ -857,9 +744,7 @@ void CSeasonalComponentAdaptiveBucketingTest::testUpgrade() core::CRapidXmlStateRestoreTraverser traverser(parser); // Restore the XML into a new bucketing. - maths::CSeasonalComponentAdaptiveBucketing restoredBucketing(decayRate + 0.1, - minimumBucketLength, - traverser); + maths::CSeasonalComponentAdaptiveBucketing restoredBucketing(decayRate + 0.1, minimumBucketLength, traverser); // Check that the knots points we get back are very nearly // those we expect. @@ -867,73 +752,58 @@ void CSeasonalComponentAdaptiveBucketingTest::testUpgrade() TDoubleVec expectedKnots; TDoubleVec expectedValues; TDoubleVec expectedVariances; - expectedBucketing.knots(863136, maths::CSplineTypes::E_Periodic, - expectedKnots, expectedValues, expectedVariances); + expectedBucketing.knots(863136, maths::CSplineTypes::E_Periodic, expectedKnots, expectedValues, expectedVariances); TDoubleVec restoredKnots; TDoubleVec restoredValues; TDoubleVec restoredVariances; - expectedBucketing.knots(863136, maths::CSplineTypes::E_Periodic, - restoredKnots, restoredValues, restoredVariances); + expectedBucketing.knots(863136, maths::CSplineTypes::E_Periodic, restoredKnots, restoredValues, restoredVariances); CPPUNIT_ASSERT_EQUAL(expectedBucketing.decayRate(), restoredBucketing.decayRate()); LOG_DEBUG("expected knots = " << core::CContainerPrinter::print(expectedKnots)); LOG_DEBUG("restored knots = " << core::CContainerPrinter::print(restoredKnots)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedKnots), - core::CContainerPrinter::print(restoredKnots)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedKnots), core::CContainerPrinter::print(restoredKnots)); LOG_DEBUG("expected values = " << core::CContainerPrinter::print(expectedValues)); LOG_DEBUG("restored values = " << core::CContainerPrinter::print(restoredValues)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedValues), - core::CContainerPrinter::print(restoredValues)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedValues), core::CContainerPrinter::print(restoredValues)); LOG_DEBUG("expected variances = " << core::CContainerPrinter::print(expectedVariances)); LOG_DEBUG("restored variances = " << core::CContainerPrinter::print(restoredVariances)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedVariances), - core::CContainerPrinter::print(restoredVariances)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedVariances), core::CContainerPrinter::print(restoredVariances)); } -CppUnit::Test *CSeasonalComponentAdaptiveBucketingTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CSeasonalComponentAdaptiveBucketingTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSeasonalComponentAdaptiveBucketingTest::testInitialize", - &CSeasonalComponentAdaptiveBucketingTest::testInitialize) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSeasonalComponentAdaptiveBucketingTest::testSwap", - &CSeasonalComponentAdaptiveBucketingTest::testSwap) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSeasonalComponentAdaptiveBucketingTest::testRefine", - &CSeasonalComponentAdaptiveBucketingTest::testRefine) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSeasonalComponentAdaptiveBucketingTest::testPropagateForwardsByTime", - &CSeasonalComponentAdaptiveBucketingTest::testPropagateForwardsByTime) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSeasonalComponentAdaptiveBucketingTest::testMinimumBucketLength", - &CSeasonalComponentAdaptiveBucketingTest::testMinimumBucketLength) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSeasonalComponentAdaptiveBucketingTest::testUnintialized", - &CSeasonalComponentAdaptiveBucketingTest::testUnintialized) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSeasonalComponentAdaptiveBucketingTest::testKnots", - &CSeasonalComponentAdaptiveBucketingTest::testKnots) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSeasonalComponentAdaptiveBucketingTest::testLongTermTrendKnots", - &CSeasonalComponentAdaptiveBucketingTest::testLongTermTrendKnots) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSeasonalComponentAdaptiveBucketingTest::testShiftValue", - &CSeasonalComponentAdaptiveBucketingTest::testShiftValue) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSeasonalComponentAdaptiveBucketingTest::testSlope", - &CSeasonalComponentAdaptiveBucketingTest::testSlope) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSeasonalComponentAdaptiveBucketingTest::testPersist", - &CSeasonalComponentAdaptiveBucketingTest::testPersist) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSeasonalComponentAdaptiveBucketingTest::testUpgrade", - &CSeasonalComponentAdaptiveBucketingTest::testUpgrade) ); +CppUnit::Test* CSeasonalComponentAdaptiveBucketingTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CSeasonalComponentAdaptiveBucketingTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSeasonalComponentAdaptiveBucketingTest::testInitialize", &CSeasonalComponentAdaptiveBucketingTest::testInitialize)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSeasonalComponentAdaptiveBucketingTest::testSwap", &CSeasonalComponentAdaptiveBucketingTest::testSwap)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSeasonalComponentAdaptiveBucketingTest::testRefine", &CSeasonalComponentAdaptiveBucketingTest::testRefine)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSeasonalComponentAdaptiveBucketingTest::testPropagateForwardsByTime", + &CSeasonalComponentAdaptiveBucketingTest::testPropagateForwardsByTime)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSeasonalComponentAdaptiveBucketingTest::testMinimumBucketLength", + &CSeasonalComponentAdaptiveBucketingTest::testMinimumBucketLength)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSeasonalComponentAdaptiveBucketingTest::testUnintialized", &CSeasonalComponentAdaptiveBucketingTest::testUnintialized)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSeasonalComponentAdaptiveBucketingTest::testKnots", &CSeasonalComponentAdaptiveBucketingTest::testKnots)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CSeasonalComponentAdaptiveBucketingTest::testLongTermTrendKnots", + &CSeasonalComponentAdaptiveBucketingTest::testLongTermTrendKnots)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSeasonalComponentAdaptiveBucketingTest::testShiftValue", &CSeasonalComponentAdaptiveBucketingTest::testShiftValue)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSeasonalComponentAdaptiveBucketingTest::testSlope", &CSeasonalComponentAdaptiveBucketingTest::testSlope)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSeasonalComponentAdaptiveBucketingTest::testPersist", &CSeasonalComponentAdaptiveBucketingTest::testPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSeasonalComponentAdaptiveBucketingTest::testUpgrade", &CSeasonalComponentAdaptiveBucketingTest::testUpgrade)); return suiteOfTests; } diff --git a/lib/maths/unittest/CSeasonalComponentAdaptiveBucketingTest.h b/lib/maths/unittest/CSeasonalComponentAdaptiveBucketingTest.h index 2371fef1be..2d59a463a6 100644 --- a/lib/maths/unittest/CSeasonalComponentAdaptiveBucketingTest.h +++ b/lib/maths/unittest/CSeasonalComponentAdaptiveBucketingTest.h @@ -9,23 +9,22 @@ #include -class CSeasonalComponentAdaptiveBucketingTest : public CppUnit::TestFixture -{ - public: - void testInitialize(); - void testSwap(); - void testRefine(); - void testPropagateForwardsByTime(); - void testMinimumBucketLength(); - void testUnintialized(); - void testKnots(); - void testLongTermTrendKnots(); - void testShiftValue(); - void testSlope(); - void testPersist(); - void testUpgrade(); +class CSeasonalComponentAdaptiveBucketingTest : public CppUnit::TestFixture { +public: + void testInitialize(); + void testSwap(); + void testRefine(); + void testPropagateForwardsByTime(); + void testMinimumBucketLength(); + void testUnintialized(); + void testKnots(); + void testLongTermTrendKnots(); + void testShiftValue(); + void testSlope(); + void testPersist(); + void testUpgrade(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CSeasonalComponentAdaptiveBucketingTest_h diff --git a/lib/maths/unittest/CSeasonalComponentTest.cc b/lib/maths/unittest/CSeasonalComponentTest.cc index 639336dc88..05301037de 100644 --- a/lib/maths/unittest/CSeasonalComponentTest.cc +++ b/lib/maths/unittest/CSeasonalComponentTest.cc @@ -7,10 +7,10 @@ #include "CSeasonalComponentTest.h" #include -#include #include #include #include +#include #include #include @@ -27,8 +27,7 @@ using namespace ml; -namespace -{ +namespace { using TDoubleDoublePr = std::pair; using TDoubleVec = std::vector; @@ -36,62 +35,54 @@ using TTimeVec = std::vector; using TTimeDoublePr = std::pair; using TTimeDoublePrVec = std::vector; -class CTestSeasonalComponent : public maths::CSeasonalComponent -{ - public: - // Bring base class method hidden by the signature above into scope - using maths::CSeasonalComponent::initialize; - - public: - CTestSeasonalComponent(core_t::TTime startTime, - core_t::TTime window, - core_t::TTime period, - std::size_t space, - double decayRate = 0.0, - double minimumBucketLength = 0.0, - maths::CSplineTypes::EBoundaryCondition boundaryCondition = maths::CSplineTypes::E_Periodic, - maths::CSplineTypes::EType valueInterpolationType = maths::CSplineTypes::E_Cubic, - maths::CSplineTypes::EType varianceInterpolationType = maths::CSplineTypes::E_Linear) : - maths::CSeasonalComponent(maths::CDiurnalTime(0, 0, window, period), - space, - decayRate, - minimumBucketLength, - boundaryCondition, - valueInterpolationType, - varianceInterpolationType), - m_StartTime(startTime) - {} - - void addPoint(core_t::TTime time, - double value, - double weight = 1.0) - { - core_t::TTime period = this->time().period(); - if (time > m_StartTime + period) - { - this->updateStartOfCurrentPeriodAndInterpolate(time); - } - this->maths::CSeasonalComponent::add(time, value, weight); +class CTestSeasonalComponent : public maths::CSeasonalComponent { +public: + // Bring base class method hidden by the signature above into scope + using maths::CSeasonalComponent::initialize; + +public: + CTestSeasonalComponent(core_t::TTime startTime, + core_t::TTime window, + core_t::TTime period, + std::size_t space, + double decayRate = 0.0, + double minimumBucketLength = 0.0, + maths::CSplineTypes::EBoundaryCondition boundaryCondition = maths::CSplineTypes::E_Periodic, + maths::CSplineTypes::EType valueInterpolationType = maths::CSplineTypes::E_Cubic, + maths::CSplineTypes::EType varianceInterpolationType = maths::CSplineTypes::E_Linear) + : maths::CSeasonalComponent(maths::CDiurnalTime(0, 0, window, period), + space, + decayRate, + minimumBucketLength, + boundaryCondition, + valueInterpolationType, + varianceInterpolationType), + m_StartTime(startTime) {} + + void addPoint(core_t::TTime time, double value, double weight = 1.0) { + core_t::TTime period = this->time().period(); + if (time > m_StartTime + period) { + this->updateStartOfCurrentPeriodAndInterpolate(time); } + this->maths::CSeasonalComponent::add(time, value, weight); + } - void updateStartOfCurrentPeriodAndInterpolate(core_t::TTime time) - { - core_t::TTime period = this->time().period(); - this->interpolate(maths::CIntegerTools::floor(time, period)); - m_StartTime = maths::CIntegerTools::floor(time, period); - } + void updateStartOfCurrentPeriodAndInterpolate(core_t::TTime time) { + core_t::TTime period = this->time().period(); + this->interpolate(maths::CIntegerTools::floor(time, period)); + m_StartTime = maths::CIntegerTools::floor(time, period); + } - private: - core_t::TTime m_StartTime; +private: + core_t::TTime m_StartTime; }; -void generateSeasonalValues(test::CRandomNumbers &rng, - const TTimeDoublePrVec &function, +void generateSeasonalValues(test::CRandomNumbers& rng, + const TTimeDoublePrVec& function, core_t::TTime startTime, core_t::TTime endTime, std::size_t numberSamples, - TTimeDoublePrVec &samples) -{ + TTimeDoublePrVec& samples) { using TSizeVec = std::vector; // Generate time uniformly at random in the interval @@ -100,37 +91,24 @@ void generateSeasonalValues(test::CRandomNumbers &rng, core_t::TTime period = function[function.size() - 1].first; TSizeVec times; - rng.generateUniformSamples(static_cast(startTime), - static_cast(endTime), - numberSamples, - times); + rng.generateUniformSamples(static_cast(startTime), static_cast(endTime), numberSamples, times); std::sort(times.begin(), times.end()); - for (std::size_t i = 0u; i < times.size(); ++i) - { + for (std::size_t i = 0u; i < times.size(); ++i) { core_t::TTime offset = static_cast(times[i] % period); - std::size_t b = std::lower_bound(function.begin(), - function.end(), - offset, - maths::COrderings::SFirstLess()) - function.begin(); + std::size_t b = std::lower_bound(function.begin(), function.end(), offset, maths::COrderings::SFirstLess()) - function.begin(); b = maths::CTools::truncate(b, std::size_t(1), std::size_t(function.size() - 1)); std::size_t a = b - 1; - double m = (function[b].second - function[a].second) - / static_cast(function[b].first - function[a].first); - samples.push_back(TTimeDoublePr( - times[i], function[a].second - + m * static_cast(offset - function[a].first))); + double m = (function[b].second - function[a].second) / static_cast(function[b].first - function[a].first); + samples.push_back(TTimeDoublePr(times[i], function[a].second + m * static_cast(offset - function[a].first))); } } -double mean(const TDoubleDoublePr &x) -{ +double mean(const TDoubleDoublePr& x) { return (x.first + x.second) / 2.0; } - } -void CSeasonalComponentTest::testNoPeriodicity() -{ +void CSeasonalComponentTest::testNoPeriodicity() { LOG_DEBUG("+---------------------------------------------+"); LOG_DEBUG("| CSeasonalComponentTest::testNoPeriodicity |"); LOG_DEBUG("+---------------------------------------------+"); @@ -138,8 +116,7 @@ void CSeasonalComponentTest::testNoPeriodicity() const core_t::TTime startTime = 1354492800; TTimeDoublePrVec function; - for (std::size_t i = 0; i < 25; ++i) - { + for (std::size_t i = 0; i < 25; ++i) { function.push_back(TTimeDoublePr((i * core::constants::DAY) / 24, 0.0)); } @@ -150,11 +127,7 @@ void CSeasonalComponentTest::testNoPeriodicity() std::size_t n = 5000u; TTimeDoublePrVec samples; - generateSeasonalValues(rng, - function, - startTime, - startTime + 31 * core::constants::DAY, - n, samples); + generateSeasonalValues(rng, function, startTime, startTime + 31 * core::constants::DAY, n, samples); TDoubleVec residuals; rng.generateGammaSamples(10.0, 1.2, n, residuals); @@ -170,12 +143,10 @@ void CSeasonalComponentTest::testNoPeriodicity() double totalError1 = 0.0; double totalError2 = 0.0; core_t::TTime time = startTime; - for (std::size_t i = 0u, d = 0u; i < n; ++i) - { + for (std::size_t i = 0u, d = 0u; i < n; ++i) { seasonal.addPoint(samples[i].first, samples[i].second + residuals[i]); - if (samples[i].first >= time + core::constants::DAY) - { + if (samples[i].first >= time + core::constants::DAY) { LOG_DEBUG("Processing day = " << ++d); time += core::constants::DAY; @@ -186,8 +157,7 @@ void CSeasonalComponentTest::testNoPeriodicity() //ft << "ft = ["; double error1 = 0.0; double error2 = 0.0; - for (std::size_t j = 0u; j < function.size(); ++j) - { + for (std::size_t j = 0u; j < function.size(); ++j) { //t << time + function[j].first << " "; //ft << function[j].second << " "; TDoubleDoublePr interval = seasonal.value(time + function[j].first, 70.0); @@ -200,12 +170,11 @@ void CSeasonalComponentTest::testNoPeriodicity() //t << "];\n"; //ft << "];\n"; - if (d > 1) - { + if (d > 1) { LOG_DEBUG("f(0) = " << mean(seasonal.value(time, 0.0)) - << ", f(T) = " << mean(seasonal.value(time + core::constants::DAY - 1, 0.0))); - CPPUNIT_ASSERT_DOUBLES_EQUAL(mean(seasonal.value(time, 0.0)), - mean(seasonal.value(time + core::constants::DAY - 1, 0.0)), 0.1); + << ", f(T) = " << mean(seasonal.value(time + core::constants::DAY - 1, 0.0))); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + mean(seasonal.value(time, 0.0)), mean(seasonal.value(time + core::constants::DAY - 1, 0.0)), 0.1); } error1 /= static_cast(function.size()); error2 /= static_cast(function.size()); @@ -232,8 +201,7 @@ void CSeasonalComponentTest::testNoPeriodicity() CPPUNIT_ASSERT(totalError2 < 0.15); } -void CSeasonalComponentTest::testConstantPeriodic() -{ +void CSeasonalComponentTest::testConstantPeriodic() { LOG_DEBUG("+------------------------------------------------+"); LOG_DEBUG("| CSeasonalComponentTest::testConstantPeriodic |"); LOG_DEBUG("+------------------------------------------------+"); @@ -247,22 +215,16 @@ void CSeasonalComponentTest::testConstantPeriodic() LOG_DEBUG("*** sin(2 * pi * t / 24 hrs) ***"); TTimeDoublePrVec function; - for (core_t::TTime i = 0u; i < 49; ++i) - { + for (core_t::TTime i = 0u; i < 49; ++i) { core_t::TTime t = (i * core::constants::DAY) / 48; - double ft = 100.0 + 40.0 * std::sin(boost::math::double_constants::two_pi - * static_cast(i) / 48.0); + double ft = 100.0 + 40.0 * std::sin(boost::math::double_constants::two_pi * static_cast(i) / 48.0); function.push_back(TTimeDoublePr(t, ft)); } std::size_t n = 5000u; TTimeDoublePrVec samples; - generateSeasonalValues(rng, - function, - startTime, - startTime + 31 * core::constants::DAY, - n, samples); + generateSeasonalValues(rng, function, startTime, startTime + 31 * core::constants::DAY, n, samples); TDoubleVec residuals; rng.generateGammaSamples(10.0, 1.2, n, residuals); @@ -278,12 +240,10 @@ void CSeasonalComponentTest::testConstantPeriodic() double totalError1 = 0.0; double totalError2 = 0.0; core_t::TTime time = startTime; - for (std::size_t i = 0u, d = 0u; i < n; ++i) - { + for (std::size_t i = 0u, d = 0u; i < n; ++i) { seasonal.addPoint(samples[i].first, samples[i].second + residuals[i]); - if (samples[i].first >= time + core::constants::DAY) - { + if (samples[i].first >= time + core::constants::DAY) { LOG_DEBUG("Processing day = " << ++d); time += core::constants::DAY; @@ -294,8 +254,7 @@ void CSeasonalComponentTest::testConstantPeriodic() //ft << "ft = ["; double error1 = 0.0; double error2 = 0.0; - for (std::size_t j = 0u; j < function.size(); ++j) - { + for (std::size_t j = 0u; j < function.size(); ++j) { //t << time + function[j].first << " "; //ft << function[j].second << " "; TDoubleDoublePr interval = seasonal.value(time + function[j].first, 70.0); @@ -307,12 +266,11 @@ void CSeasonalComponentTest::testConstantPeriodic() //t << "];\n"; //ft << "];\n"; - if (d > 1) - { + if (d > 1) { LOG_DEBUG("f(0) = " << mean(seasonal.value(time, 0.0)) - << ", f(T) = " << mean(seasonal.value(time + core::constants::DAY - 1, 0.0))); - CPPUNIT_ASSERT_DOUBLES_EQUAL(mean(seasonal.value(time, 0.0)), - mean(seasonal.value(time + core::constants::DAY - 1, 0.0)), 0.1); + << ", f(T) = " << mean(seasonal.value(time + core::constants::DAY - 1, 0.0))); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + mean(seasonal.value(time, 0.0)), mean(seasonal.value(time + core::constants::DAY - 1, 0.0)), 0.1); } error1 /= static_cast(function.size()); @@ -346,69 +304,27 @@ void CSeasonalComponentTest::testConstantPeriodic() { LOG_DEBUG("*** piecewise linear ***"); - TTimeDoublePr knotPoints[] = - { - TTimeDoublePr(0, 1.0), - TTimeDoublePr(1800, 1.0), - TTimeDoublePr(3600, 2.0), - TTimeDoublePr(5400, 3.0), - TTimeDoublePr(7200, 5.0), - TTimeDoublePr(9000, 5.0), - TTimeDoublePr(10800, 10.0), - TTimeDoublePr(12600, 10.0), - TTimeDoublePr(14400, 12.0), - TTimeDoublePr(16200, 12.0), - TTimeDoublePr(18000, 14.0), - TTimeDoublePr(19800, 12.0), - TTimeDoublePr(21600, 10.0), - TTimeDoublePr(23400, 14.0), - TTimeDoublePr(25200, 16.0), - TTimeDoublePr(27000, 50.0), - TTimeDoublePr(28800, 300.0), - TTimeDoublePr(30600, 330.0), - TTimeDoublePr(32400, 310.0), - TTimeDoublePr(34200, 290.0), - TTimeDoublePr(36000, 280.0), - TTimeDoublePr(37800, 260.0), - TTimeDoublePr(39600, 250.0), - TTimeDoublePr(41400, 230.0), - TTimeDoublePr(43200, 230.0), - TTimeDoublePr(45000, 220.0), - TTimeDoublePr(46800, 240.0), - TTimeDoublePr(48600, 220.0), - TTimeDoublePr(50400, 260.0), - TTimeDoublePr(52200, 250.0), - TTimeDoublePr(54000, 260.0), - TTimeDoublePr(55800, 270.0), - TTimeDoublePr(57600, 280.0), - TTimeDoublePr(59400, 290.0), - TTimeDoublePr(61200, 290.0), - TTimeDoublePr(63000, 60.0), - TTimeDoublePr(64800, 20.0), - TTimeDoublePr(66600, 18.0), - TTimeDoublePr(68400, 19.0), - TTimeDoublePr(70200, 10.0), - TTimeDoublePr(72000, 10.0), - TTimeDoublePr(73800, 5.0), - TTimeDoublePr(75600, 5.0), - TTimeDoublePr(77400, 10.0), - TTimeDoublePr(79200, 5.0), - TTimeDoublePr(81000, 3.0), - TTimeDoublePr(82800, 1.0), - TTimeDoublePr(84600, 1.0), - TTimeDoublePr(86400, 1.0) - }; + TTimeDoublePr knotPoints[] = { + TTimeDoublePr(0, 1.0), TTimeDoublePr(1800, 1.0), TTimeDoublePr(3600, 2.0), TTimeDoublePr(5400, 3.0), + TTimeDoublePr(7200, 5.0), TTimeDoublePr(9000, 5.0), TTimeDoublePr(10800, 10.0), TTimeDoublePr(12600, 10.0), + TTimeDoublePr(14400, 12.0), TTimeDoublePr(16200, 12.0), TTimeDoublePr(18000, 14.0), TTimeDoublePr(19800, 12.0), + TTimeDoublePr(21600, 10.0), TTimeDoublePr(23400, 14.0), TTimeDoublePr(25200, 16.0), TTimeDoublePr(27000, 50.0), + TTimeDoublePr(28800, 300.0), TTimeDoublePr(30600, 330.0), TTimeDoublePr(32400, 310.0), TTimeDoublePr(34200, 290.0), + TTimeDoublePr(36000, 280.0), TTimeDoublePr(37800, 260.0), TTimeDoublePr(39600, 250.0), TTimeDoublePr(41400, 230.0), + TTimeDoublePr(43200, 230.0), TTimeDoublePr(45000, 220.0), TTimeDoublePr(46800, 240.0), TTimeDoublePr(48600, 220.0), + TTimeDoublePr(50400, 260.0), TTimeDoublePr(52200, 250.0), TTimeDoublePr(54000, 260.0), TTimeDoublePr(55800, 270.0), + TTimeDoublePr(57600, 280.0), TTimeDoublePr(59400, 290.0), TTimeDoublePr(61200, 290.0), TTimeDoublePr(63000, 60.0), + TTimeDoublePr(64800, 20.0), TTimeDoublePr(66600, 18.0), TTimeDoublePr(68400, 19.0), TTimeDoublePr(70200, 10.0), + TTimeDoublePr(72000, 10.0), TTimeDoublePr(73800, 5.0), TTimeDoublePr(75600, 5.0), TTimeDoublePr(77400, 10.0), + TTimeDoublePr(79200, 5.0), TTimeDoublePr(81000, 3.0), TTimeDoublePr(82800, 1.0), TTimeDoublePr(84600, 1.0), + TTimeDoublePr(86400, 1.0)}; TTimeDoublePrVec function(boost::begin(knotPoints), boost::end(knotPoints)); std::size_t n = 6000u; TTimeDoublePrVec samples; - generateSeasonalValues(rng, - function, - startTime, - startTime + 41 * core::constants::DAY, - n, samples); + generateSeasonalValues(rng, function, startTime, startTime + 41 * core::constants::DAY, n, samples); TDoubleVec residuals; rng.generateGammaSamples(10.0, 1.2, n, residuals); @@ -424,12 +340,10 @@ void CSeasonalComponentTest::testConstantPeriodic() double totalError1 = 0.0; double totalError2 = 0.0; core_t::TTime time = startTime; - for (std::size_t i = 0u, d = 0u; i < n; ++i) - { + for (std::size_t i = 0u, d = 0u; i < n; ++i) { seasonal.addPoint(samples[i].first, samples[i].second + residuals[i]); - if (samples[i].first >= time + core::constants::DAY) - { + if (samples[i].first >= time + core::constants::DAY) { LOG_DEBUG("Processing day = " << ++d); time += core::constants::DAY; @@ -440,8 +354,7 @@ void CSeasonalComponentTest::testConstantPeriodic() //ft << "ft = ["; double error1 = 0.0; double error2 = 0.0; - for (std::size_t j = 0u; j < function.size(); ++j) - { + for (std::size_t j = 0u; j < function.size(); ++j) { //t << time + function[j].first << " "; //ft << function[j].second << " "; TDoubleDoublePr interval = seasonal.value(time + function[j].first, 70.0); @@ -454,12 +367,11 @@ void CSeasonalComponentTest::testConstantPeriodic() //t << "];\n"; //ft << "];\n"; - if (d > 1) - { + if (d > 1) { LOG_DEBUG("f(0) = " << mean(seasonal.value(time, 0.0)) - << ", f(T) = " << mean(seasonal.value(time + core::constants::DAY - 1, 0.0))); - CPPUNIT_ASSERT_DOUBLES_EQUAL(mean(seasonal.value(time, 0.0)), - mean(seasonal.value(time + core::constants::DAY - 1, 0.0)), 0.1); + << ", f(T) = " << mean(seasonal.value(time + core::constants::DAY - 1, 0.0))); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + mean(seasonal.value(time, 0.0)), mean(seasonal.value(time + core::constants::DAY - 1, 0.0)), 0.1); } error1 /= static_cast(function.size()); @@ -490,8 +402,7 @@ void CSeasonalComponentTest::testConstantPeriodic() } } -void CSeasonalComponentTest::testTimeVaryingPeriodic() -{ +void CSeasonalComponentTest::testTimeVaryingPeriodic() { LOG_DEBUG("+---------------------------------------------------+"); LOG_DEBUG("| CSeasonalComponentTest::testTimeVaryingPeriodic |"); LOG_DEBUG("+---------------------------------------------------+"); @@ -501,58 +412,20 @@ void CSeasonalComponentTest::testTimeVaryingPeriodic() core_t::TTime startTime = 0; - TTimeDoublePr knotPoints[] = - { - TTimeDoublePr(0, 1.0), - TTimeDoublePr(1800, 1.0), - TTimeDoublePr(3600, 2.0), - TTimeDoublePr(5400, 3.0), - TTimeDoublePr(7200, 5.0), - TTimeDoublePr(9000, 5.0), - TTimeDoublePr(10800, 10.0), - TTimeDoublePr(12600, 10.0), - TTimeDoublePr(14400, 12.0), - TTimeDoublePr(16200, 12.0), - TTimeDoublePr(18000, 14.0), - TTimeDoublePr(19800, 12.0), - TTimeDoublePr(21600, 10.0), - TTimeDoublePr(23400, 14.0), - TTimeDoublePr(25200, 16.0), - TTimeDoublePr(27000, 50.0), - TTimeDoublePr(28800, 300.0), - TTimeDoublePr(30600, 330.0), - TTimeDoublePr(32400, 310.0), - TTimeDoublePr(34200, 290.0), - TTimeDoublePr(36000, 280.0), - TTimeDoublePr(37800, 260.0), - TTimeDoublePr(39600, 250.0), - TTimeDoublePr(41400, 230.0), - TTimeDoublePr(43200, 230.0), - TTimeDoublePr(45000, 220.0), - TTimeDoublePr(46800, 240.0), - TTimeDoublePr(48600, 220.0), - TTimeDoublePr(50400, 260.0), - TTimeDoublePr(52200, 250.0), - TTimeDoublePr(54000, 260.0), - TTimeDoublePr(55800, 270.0), - TTimeDoublePr(57600, 280.0), - TTimeDoublePr(59400, 290.0), - TTimeDoublePr(61200, 290.0), - TTimeDoublePr(63000, 60.0), - TTimeDoublePr(64800, 20.0), - TTimeDoublePr(66600, 18.0), - TTimeDoublePr(68400, 19.0), - TTimeDoublePr(70200, 10.0), - TTimeDoublePr(72000, 10.0), - TTimeDoublePr(73800, 5.0), - TTimeDoublePr(75600, 5.0), - TTimeDoublePr(77400, 10.0), - TTimeDoublePr(79200, 5.0), - TTimeDoublePr(81000, 3.0), - TTimeDoublePr(82800, 1.0), - TTimeDoublePr(84600, 1.0), - TTimeDoublePr(86400, 1.0) - }; + TTimeDoublePr knotPoints[] = { + TTimeDoublePr(0, 1.0), TTimeDoublePr(1800, 1.0), TTimeDoublePr(3600, 2.0), TTimeDoublePr(5400, 3.0), + TTimeDoublePr(7200, 5.0), TTimeDoublePr(9000, 5.0), TTimeDoublePr(10800, 10.0), TTimeDoublePr(12600, 10.0), + TTimeDoublePr(14400, 12.0), TTimeDoublePr(16200, 12.0), TTimeDoublePr(18000, 14.0), TTimeDoublePr(19800, 12.0), + TTimeDoublePr(21600, 10.0), TTimeDoublePr(23400, 14.0), TTimeDoublePr(25200, 16.0), TTimeDoublePr(27000, 50.0), + TTimeDoublePr(28800, 300.0), TTimeDoublePr(30600, 330.0), TTimeDoublePr(32400, 310.0), TTimeDoublePr(34200, 290.0), + TTimeDoublePr(36000, 280.0), TTimeDoublePr(37800, 260.0), TTimeDoublePr(39600, 250.0), TTimeDoublePr(41400, 230.0), + TTimeDoublePr(43200, 230.0), TTimeDoublePr(45000, 220.0), TTimeDoublePr(46800, 240.0), TTimeDoublePr(48600, 220.0), + TTimeDoublePr(50400, 260.0), TTimeDoublePr(52200, 250.0), TTimeDoublePr(54000, 260.0), TTimeDoublePr(55800, 270.0), + TTimeDoublePr(57600, 280.0), TTimeDoublePr(59400, 290.0), TTimeDoublePr(61200, 290.0), TTimeDoublePr(63000, 60.0), + TTimeDoublePr(64800, 20.0), TTimeDoublePr(66600, 18.0), TTimeDoublePr(68400, 19.0), TTimeDoublePr(70200, 10.0), + TTimeDoublePr(72000, 10.0), TTimeDoublePr(73800, 5.0), TTimeDoublePr(75600, 5.0), TTimeDoublePr(77400, 10.0), + TTimeDoublePr(79200, 5.0), TTimeDoublePr(81000, 3.0), TTimeDoublePr(82800, 1.0), TTimeDoublePr(84600, 1.0), + TTimeDoublePr(86400, 1.0)}; TTimeDoublePrVec function(boost::begin(knotPoints), boost::end(knotPoints)); @@ -563,27 +436,21 @@ void CSeasonalComponentTest::testTimeVaryingPeriodic() core_t::TTime time = startTime; - double totalError1 = 0.0; - double totalError2 = 0.0; + double totalError1 = 0.0; + double totalError2 = 0.0; double numberErrors = 0.0; - for (std::size_t d = 0u; d < 365; ++d) - { + for (std::size_t d = 0u; d < 365; ++d) { double scale = 2.0 + 2.0 * std::sin(3.14159265358979 * static_cast(d) / 365.0); TTimeDoublePrVec samples; - generateSeasonalValues(rng, - function, - time, - time + core::constants::DAY, - 100, samples); + generateSeasonalValues(rng, function, time, time + core::constants::DAY, 100, samples); TDoubleVec residuals; rng.generateGammaSamples(10.0, 1.2, 100, residuals); double residualMean = maths::CBasicStatistics::mean(residuals); - for (std::size_t i = 0u; i < 100; ++i) - { + for (std::size_t i = 0u; i < 100; ++i) { seasonal.addPoint(samples[i].first, scale * samples[i].second + residuals[i]); } @@ -593,16 +460,14 @@ void CSeasonalComponentTest::testTimeVaryingPeriodic() seasonal.updateStartOfCurrentPeriodAndInterpolate(time); - if (seasonal.initialized()) - { + if (seasonal.initialized()) { //std::ostringstream t; //std::ostringstream ft; //t << "t = ["; //ft << "ft = ["; double error1 = 0.0; double error2 = 0.0; - for (std::size_t j = 0u; j < function.size(); ++j) - { + for (std::size_t j = 0u; j < function.size(); ++j) { //t << time + function[j].first << " "; //ft << function[j].second << " "; TDoubleDoublePr interval = seasonal.value(time + function[j].first, 70.0); @@ -615,12 +480,11 @@ void CSeasonalComponentTest::testTimeVaryingPeriodic() //t << "];\n"; //ft << "];\n"; - if (d > 1) - { + if (d > 1) { LOG_DEBUG("f(0) = " << mean(seasonal.value(time, 0.0)) - << ", f(T) = " << mean(seasonal.value(time + core::constants::DAY - 1, 0.0))); - CPPUNIT_ASSERT_DOUBLES_EQUAL(mean(seasonal.value(time, 0.0)), - mean(seasonal.value(time + core::constants::DAY - 1, 0.0)), 0.1); + << ", f(T) = " << mean(seasonal.value(time + core::constants::DAY - 1, 0.0))); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + mean(seasonal.value(time, 0.0)), mean(seasonal.value(time + core::constants::DAY - 1, 0.0)), 0.1); } error1 /= static_cast(function.size()); @@ -649,8 +513,7 @@ void CSeasonalComponentTest::testTimeVaryingPeriodic() CPPUNIT_ASSERT(totalError2 / numberErrors < 14.0); } -void CSeasonalComponentTest::testVeryLowVariation() -{ +void CSeasonalComponentTest::testVeryLowVariation() { LOG_DEBUG("+------------------------------------------------+"); LOG_DEBUG("| CSeasonalComponentTest::testVeryLowVariation |"); LOG_DEBUG("+------------------------------------------------+"); @@ -660,8 +523,7 @@ void CSeasonalComponentTest::testVeryLowVariation() const core_t::TTime startTime = 1354492800; TTimeDoublePrVec function; - for (std::size_t i = 0u; i < 25; ++i) - { + for (std::size_t i = 0u; i < 25; ++i) { function.push_back(TTimeDoublePr((i * core::constants::DAY) / 24, 50.0)); } @@ -670,11 +532,7 @@ void CSeasonalComponentTest::testVeryLowVariation() std::size_t n = 5000u; TTimeDoublePrVec samples; - generateSeasonalValues(rng, - function, - startTime, - startTime + 31 * core::constants::DAY, - n, samples); + generateSeasonalValues(rng, function, startTime, startTime + 31 * core::constants::DAY, n, samples); TDoubleVec residuals; rng.generateNormalSamples(0.0, 1e-3, n, residuals); @@ -692,12 +550,10 @@ void CSeasonalComponentTest::testVeryLowVariation() double totalError1 = 0.0; double totalError2 = 0.0; core_t::TTime time = startTime; - for (std::size_t i = 0u, d = 0u; i < n; ++i) - { + for (std::size_t i = 0u, d = 0u; i < n; ++i) { seasonal.addPoint(samples[i].first, samples[i].second + residuals[i]); - if (samples[i].first >= time + core::constants::DAY) - { + if (samples[i].first >= time + core::constants::DAY) { LOG_DEBUG("Processing day = " << ++d); time += core::constants::DAY; @@ -708,8 +564,7 @@ void CSeasonalComponentTest::testVeryLowVariation() //ft << "ft = ["; double error1 = 0.0; double error2 = 0.0; - for (std::size_t j = 0u; j < function.size(); ++j) - { + for (std::size_t j = 0u; j < function.size(); ++j) { //t << time + function[j].first << " "; //ft << function[j].second << " "; TDoubleDoublePr interval = seasonal.value(time + function[j].first, 70.0); @@ -722,12 +577,11 @@ void CSeasonalComponentTest::testVeryLowVariation() //t << "];\n"; //ft << "];\n"; - if (d > 1) - { + if (d > 1) { LOG_DEBUG("f(0) = " << mean(seasonal.value(time, 0.0)) - << ", f(T) = " << mean(seasonal.value(time + core::constants::DAY - 1, 0.0))); - CPPUNIT_ASSERT_DOUBLES_EQUAL(mean(seasonal.value(time, 0.0)), - mean(seasonal.value(time + core::constants::DAY - 1, 0.0)), 0.1); + << ", f(T) = " << mean(seasonal.value(time + core::constants::DAY - 1, 0.0))); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + mean(seasonal.value(time, 0.0)), mean(seasonal.value(time + core::constants::DAY - 1, 0.0)), 0.1); } error1 /= static_cast(function.size()); error2 /= static_cast(function.size()); @@ -754,8 +608,7 @@ void CSeasonalComponentTest::testVeryLowVariation() CPPUNIT_ASSERT_DOUBLES_EQUAL(totalError2, 0.0, 0.04 * deviation); } -void CSeasonalComponentTest::testVariance() -{ +void CSeasonalComponentTest::testVariance() { LOG_DEBUG("+----------------------------------------+"); LOG_DEBUG("| CSeasonalComponentTest::testVariance |"); LOG_DEBUG("+----------------------------------------+"); @@ -767,15 +620,12 @@ void CSeasonalComponentTest::testVariance() test::CRandomNumbers rng; TTimeDoublePrVec function; - for (core_t::TTime i = 0u; i < 481; ++i) - { + for (core_t::TTime i = 0u; i < 481; ++i) { core_t::TTime t = (i * core::constants::DAY) / 48; - double vt = 80.0 + 20.0 * std::sin(boost::math::double_constants::two_pi - * static_cast(i % 48) / 48.0); + double vt = 80.0 + 20.0 * std::sin(boost::math::double_constants::two_pi * static_cast(i % 48) / 48.0); TDoubleVec sample; rng.generateNormalSamples(0.0, vt, 10, sample); - for (std::size_t j = 0u; j < sample.size(); ++j) - { + for (std::size_t j = 0u; j < sample.size(); ++j) { function.push_back(TTimeDoublePr(t, sample[j])); } } @@ -783,22 +633,17 @@ void CSeasonalComponentTest::testVariance() CTestSeasonalComponent seasonal(0, core::constants::DAY, core::constants::DAY, 24); seasonal.initialize(0); - for (std::size_t i = 0u; i < function.size(); ++i) - { + for (std::size_t i = 0u; i < function.size(); ++i) { seasonal.addPoint(function[i].first, function[i].second); } TMeanAccumulator error; - for (core_t::TTime i = 0u; i < 48; ++i) - { + for (core_t::TTime i = 0u; i < 48; ++i) { core_t::TTime t = (i * core::constants::DAY) / 48; - double v_ = 80.0 + 20.0 * std::sin(boost::math::double_constants::two_pi - * static_cast(i) / 48.0); + double v_ = 80.0 + 20.0 * std::sin(boost::math::double_constants::two_pi * static_cast(i) / 48.0); TDoubleDoublePr vv = seasonal.variance(t, 98.0); double v = (vv.first + vv.second) / 2.0; - LOG_DEBUG("v_ = " << v_ - << ", v = " << core::CContainerPrinter::print(vv) - << ", relative error = " << std::fabs(v - v_) / v_); + LOG_DEBUG("v_ = " << v_ << ", v = " << core::CContainerPrinter::print(vv) << ", relative error = " << std::fabs(v - v_) / v_); CPPUNIT_ASSERT_DOUBLES_EQUAL(v_, v, 0.4 * v_); CPPUNIT_ASSERT(v_ > vv.first && v_ < vv.second); @@ -809,8 +654,7 @@ void CSeasonalComponentTest::testVariance() CPPUNIT_ASSERT(maths::CBasicStatistics::mean(error) < 0.11); } -void CSeasonalComponentTest::testPersist() -{ +void CSeasonalComponentTest::testPersist() { LOG_DEBUG("+---------------------------------------+"); LOG_DEBUG("| CSeasonalComponentTest::testPersist |"); LOG_DEBUG("+---------------------------------------+"); @@ -825,22 +669,16 @@ void CSeasonalComponentTest::testPersist() test::CRandomNumbers rng; TTimeDoublePrVec function; - for (core_t::TTime i = 0u; i < 49; ++i) - { + for (core_t::TTime i = 0u; i < 49; ++i) { core_t::TTime t = (i * core::constants::DAY) / 48; - double ft = 100.0 + 40.0 * std::sin(boost::math::double_constants::two_pi - * static_cast(i) / 48.0); + double ft = 100.0 + 40.0 * std::sin(boost::math::double_constants::two_pi * static_cast(i) / 48.0); function.push_back(TTimeDoublePr(t, ft)); } std::size_t n = 3300u; TTimeDoublePrVec samples; - generateSeasonalValues(rng, - function, - startTime, - startTime + 31 * core::constants::DAY, - n, samples); + generateSeasonalValues(rng, function, startTime, startTime + 31 * core::constants::DAY, n, samples); TDoubleVec residuals; rng.generateGammaSamples(10.0, 1.2, n, residuals); @@ -848,10 +686,8 @@ void CSeasonalComponentTest::testPersist() CTestSeasonalComponent origSeasonal(startTime, core::constants::DAY, core::constants::DAY, 24, decayRate); origSeasonal.initialize(startTime); - for (std::size_t i = 0u; i < n; ++i) - { - origSeasonal.addPoint(samples[i].first, - samples[i].second + residuals[i]); + for (std::size_t i = 0u; i < n; ++i) { + origSeasonal.addPoint(samples[i].first, samples[i].second + residuals[i]); } std::string origXml; @@ -880,52 +716,39 @@ void CSeasonalComponentTest::testPersist() // Test that the values and variances of the original and // restored components are similar. - for (core_t::TTime time = 0; time < core::constants::DAY; time += minute) - { + for (core_t::TTime time = 0; time < core::constants::DAY; time += minute) { TDoubleDoublePr xo = origSeasonal.value(time, 80.0); TDoubleDoublePr xn = restoredSeasonal.value(time, 80.0); - if (time % (15 * minute) == 0) - { - LOG_DEBUG("xo = " << core::CContainerPrinter::print(xo) - << ", xn = " << core::CContainerPrinter::print(xn)); - + if (time % (15 * minute) == 0) { + LOG_DEBUG("xo = " << core::CContainerPrinter::print(xo) << ", xn = " << core::CContainerPrinter::print(xn)); } CPPUNIT_ASSERT_DOUBLES_EQUAL(xo.first, xn.first, 0.3); CPPUNIT_ASSERT_DOUBLES_EQUAL(xo.second, xn.second, 0.3); TDoubleDoublePr vo = origSeasonal.variance(time, 80.0); TDoubleDoublePr vn = origSeasonal.variance(time, 80.0); - if (time % (15 * minute) == 0) - { - LOG_DEBUG("vo = " << core::CContainerPrinter::print(vo) - << ", vn = " << core::CContainerPrinter::print(vn)); + if (time % (15 * minute) == 0) { + LOG_DEBUG("vo = " << core::CContainerPrinter::print(vo) << ", vn = " << core::CContainerPrinter::print(vn)); } CPPUNIT_ASSERT_DOUBLES_EQUAL(vo.first, vn.first, 1e-3); CPPUNIT_ASSERT_DOUBLES_EQUAL(vo.second, vn.second, 1e-3); } } -CppUnit::Test *CSeasonalComponentTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CSeasonalComponentTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSeasonalComponentTest::testNoPeriodicity", - &CSeasonalComponentTest::testNoPeriodicity) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSeasonalComponentTest::testConstantPeriodic", - &CSeasonalComponentTest::testConstantPeriodic) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSeasonalComponentTest::testTimeVaryingPeriodic", - &CSeasonalComponentTest::testTimeVaryingPeriodic) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSeasonalComponentTest::testVeryLowVariation", - &CSeasonalComponentTest::testVeryLowVariation) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSeasonalComponentTest::testVariance", - &CSeasonalComponentTest::testVariance) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSeasonalComponentTest::testPersist", - &CSeasonalComponentTest::testPersist) ); +CppUnit::Test* CSeasonalComponentTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CSeasonalComponentTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CSeasonalComponentTest::testNoPeriodicity", + &CSeasonalComponentTest::testNoPeriodicity)); + suiteOfTests->addTest(new CppUnit::TestCaller("CSeasonalComponentTest::testConstantPeriodic", + &CSeasonalComponentTest::testConstantPeriodic)); + suiteOfTests->addTest(new CppUnit::TestCaller("CSeasonalComponentTest::testTimeVaryingPeriodic", + &CSeasonalComponentTest::testTimeVaryingPeriodic)); + suiteOfTests->addTest(new CppUnit::TestCaller("CSeasonalComponentTest::testVeryLowVariation", + &CSeasonalComponentTest::testVeryLowVariation)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CSeasonalComponentTest::testVariance", &CSeasonalComponentTest::testVariance)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CSeasonalComponentTest::testPersist", &CSeasonalComponentTest::testPersist)); return suiteOfTests; } diff --git a/lib/maths/unittest/CSeasonalComponentTest.h b/lib/maths/unittest/CSeasonalComponentTest.h index 4279f9ed8d..1835f85c97 100644 --- a/lib/maths/unittest/CSeasonalComponentTest.h +++ b/lib/maths/unittest/CSeasonalComponentTest.h @@ -9,17 +9,16 @@ #include -class CSeasonalComponentTest : public CppUnit::TestFixture -{ - public: - void testNoPeriodicity(); - void testConstantPeriodic(); - void testTimeVaryingPeriodic(); - void testVeryLowVariation(); - void testVariance(); - void testPersist(); +class CSeasonalComponentTest : public CppUnit::TestFixture { +public: + void testNoPeriodicity(); + void testConstantPeriodic(); + void testTimeVaryingPeriodic(); + void testVeryLowVariation(); + void testVariance(); + void testPersist(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CSeasonalComponentTest_h diff --git a/lib/maths/unittest/CSetToolsTest.cc b/lib/maths/unittest/CSetToolsTest.cc index d85c011ac3..f05b2189d7 100644 --- a/lib/maths/unittest/CSetToolsTest.cc +++ b/lib/maths/unittest/CSetToolsTest.cc @@ -24,8 +24,7 @@ using namespace ml; using TDoubleVec = std::vector; using TSizeVec = std::vector; -void CSetToolsTest::testInplaceSetDifference() -{ +void CSetToolsTest::testInplaceSetDifference() { LOG_DEBUG("+-------------------------------------------+"); LOG_DEBUG("| CSetToolsTest::testInplaceSetDifference |"); LOG_DEBUG("+-------------------------------------------+"); @@ -34,44 +33,33 @@ void CSetToolsTest::testInplaceSetDifference() { LOG_DEBUG("Edge cases"); - double a[] = { 1.0, 1.1, 1.2, 3.4, 7.8 }; + double a[] = {1.0, 1.1, 1.2, 3.4, 7.8}; TDoubleVec A(boost::begin(a), boost::end(a)); - for (std::size_t i = 0u; i < boost::size(a); ++i) - { + for (std::size_t i = 0u; i < boost::size(a); ++i) { TDoubleVec left; - for (std::size_t j = 0; j < i; ++j) - { + for (std::size_t j = 0; j < i; ++j) { left.push_back(a[j]); } TDoubleVec expected; - std::set_difference(A.begin(), A.end(), - left.begin(), left.end(), - std::back_inserter(expected)); + std::set_difference(A.begin(), A.end(), left.begin(), left.end(), std::back_inserter(expected)); TDoubleVec test = A; maths::CSetTools::inplace_set_difference(test, left.begin(), left.end()); - LOG_DEBUG("A = " << core::CContainerPrinter::print(A) - << ", B = " << core::CContainerPrinter::print(left) - << ", A - B = " << core::CContainerPrinter::print(test)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expected), - core::CContainerPrinter::print(test)); + LOG_DEBUG("A = " << core::CContainerPrinter::print(A) << ", B = " << core::CContainerPrinter::print(left) + << ", A - B = " << core::CContainerPrinter::print(test)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expected), core::CContainerPrinter::print(test)); TDoubleVec right; - for (std::size_t j = i; j < boost::size(a); ++j) - { + for (std::size_t j = i; j < boost::size(a); ++j) { right.push_back(a[j]); } expected.clear(); - std::set_difference(A.begin(), A.end(), - right.begin(), right.end(), - std::back_inserter(expected)); + std::set_difference(A.begin(), A.end(), right.begin(), right.end(), std::back_inserter(expected)); test = A; maths::CSetTools::inplace_set_difference(test, right.begin(), right.end()); - LOG_DEBUG("A = " << core::CContainerPrinter::print(A) - << ", B = " << core::CContainerPrinter::print(right) - << ", A - B = " << core::CContainerPrinter::print(test)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expected), - core::CContainerPrinter::print(test)); + LOG_DEBUG("A = " << core::CContainerPrinter::print(A) << ", B = " << core::CContainerPrinter::print(right) + << ", A - B = " << core::CContainerPrinter::print(test)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expected), core::CContainerPrinter::print(test)); } } @@ -79,8 +67,7 @@ void CSetToolsTest::testInplaceSetDifference() test::CRandomNumbers rng; - for (std::size_t t = 0u; t < 100; ++t) - { + for (std::size_t t = 0u; t < 100; ++t) { TDoubleVec A; rng.generateUniformSamples(0.0, 100.0, t, A); std::sort(A.begin(), A.end()); @@ -88,39 +75,31 @@ void CSetToolsTest::testInplaceSetDifference() TDoubleVec B; TDoubleVec mask; rng.generateUniformSamples(0.0, 1.0, t, mask); - for (std::size_t i = 0u; i < mask.size(); ++i) - { - if (mask[i] < 0.2) - { + for (std::size_t i = 0u; i < mask.size(); ++i) { + if (mask[i] < 0.2) { B.push_back(A[i]); } } TDoubleVec expected; - std::set_difference(A.begin(), A.end(), - B.begin(), B.end(), - std::back_inserter(expected)); + std::set_difference(A.begin(), A.end(), B.begin(), B.end(), std::back_inserter(expected)); - if ((t + 1) % 10 == 0) - { + if ((t + 1) % 10 == 0) { LOG_DEBUG("A = " << core::CContainerPrinter::print(A)); LOG_DEBUG("B = " << core::CContainerPrinter::print(B)); } maths::CSetTools::inplace_set_difference(A, B.begin(), B.end()); - if ((t + 1) % 10 == 0) - { + if ((t + 1) % 10 == 0) { LOG_DEBUG("A - B = " << core::CContainerPrinter::print(A)); } - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expected), - core::CContainerPrinter::print(A)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expected), core::CContainerPrinter::print(A)); } } -void CSetToolsTest::testSetSizes() -{ +void CSetToolsTest::testSetSizes() { LOG_DEBUG("+-------------------------------+"); LOG_DEBUG("| CSetToolsTest::testSetSizes |"); LOG_DEBUG("+-------------------------------+"); @@ -128,52 +107,37 @@ void CSetToolsTest::testSetSizes() { LOG_DEBUG("Edge cases"); - double a[] = { 1.0, 1.1, 1.2, 3.4, 7.8 }; + double a[] = {1.0, 1.1, 1.2, 3.4, 7.8}; TDoubleVec A(boost::begin(a), boost::end(a)); - for (std::size_t i = 0u; i < boost::size(a); ++i) - { + for (std::size_t i = 0u; i < boost::size(a); ++i) { TDoubleVec left; - for (std::size_t j = 0; j < i; ++j) - { + for (std::size_t j = 0; j < i; ++j) { left.push_back(a[j]); } TDoubleVec expected; - std::set_intersection(A.begin(), A.end(), - left.begin(), left.end(), - std::back_inserter(expected)); - std::size_t test = maths::CSetTools::setIntersectSize(A.begin(), A.end(), - left.begin(), left.end()); - LOG_DEBUG("A = " << core::CContainerPrinter::print(A) - << ", B = " << core::CContainerPrinter::print(left) - << ", |A ^ B| = " << test); + std::set_intersection(A.begin(), A.end(), left.begin(), left.end(), std::back_inserter(expected)); + std::size_t test = maths::CSetTools::setIntersectSize(A.begin(), A.end(), left.begin(), left.end()); + LOG_DEBUG("A = " << core::CContainerPrinter::print(A) << ", B = " << core::CContainerPrinter::print(left) + << ", |A ^ B| = " << test); CPPUNIT_ASSERT_EQUAL(expected.size(), test); TDoubleVec right; - for (std::size_t j = i; j < boost::size(a); ++j) - { + for (std::size_t j = i; j < boost::size(a); ++j) { right.push_back(a[j]); } expected.clear(); - std::set_intersection(A.begin(), A.end(), - right.begin(), right.end(), - std::back_inserter(expected)); - test = maths::CSetTools::setIntersectSize(A.begin(), A.end(), - right.begin(), right.end()); - LOG_DEBUG("A = " << core::CContainerPrinter::print(A) - << ", B = " << core::CContainerPrinter::print(right) - << ", |A ^ B| = " << test); + std::set_intersection(A.begin(), A.end(), right.begin(), right.end(), std::back_inserter(expected)); + test = maths::CSetTools::setIntersectSize(A.begin(), A.end(), right.begin(), right.end()); + LOG_DEBUG("A = " << core::CContainerPrinter::print(A) << ", B = " << core::CContainerPrinter::print(right) + << ", |A ^ B| = " << test); CPPUNIT_ASSERT_EQUAL(expected.size(), test); expected.clear(); - std::set_union(left.begin(), left.end(), - right.begin(), right.end(), - std::back_inserter(expected)); - test = maths::CSetTools::setUnionSize(left.begin(), left.end(), - right.begin(), right.end()); - LOG_DEBUG("A = " << core::CContainerPrinter::print(left) - << ", B = " << core::CContainerPrinter::print(right) - << ", |A U B| = " << test); + std::set_union(left.begin(), left.end(), right.begin(), right.end(), std::back_inserter(expected)); + test = maths::CSetTools::setUnionSize(left.begin(), left.end(), right.begin(), right.end()); + LOG_DEBUG("A = " << core::CContainerPrinter::print(left) << ", B = " << core::CContainerPrinter::print(right) + << ", |A U B| = " << test); CPPUNIT_ASSERT_EQUAL(expected.size(), test); } } @@ -182,8 +146,7 @@ void CSetToolsTest::testSetSizes() test::CRandomNumbers rng; - for (std::size_t t = 0u; t < 100; ++t) - { + for (std::size_t t = 0u; t < 100; ++t) { TDoubleVec A; rng.generateUniformSamples(0.0, 100.0, t, A); std::sort(A.begin(), A.end()); @@ -191,44 +154,34 @@ void CSetToolsTest::testSetSizes() TDoubleVec B; TDoubleVec mask; rng.generateUniformSamples(0.0, 1.0, t, mask); - for (std::size_t i = 0u; i < mask.size(); ++i) - { - if (mask[i] < 0.2) - { + for (std::size_t i = 0u; i < mask.size(); ++i) { + if (mask[i] < 0.2) { B.push_back(A[i]); } } TDoubleVec expected; - std::set_intersection(A.begin(), A.end(), - B.begin(), B.end(), - std::back_inserter(expected)); + std::set_intersection(A.begin(), A.end(), B.begin(), B.end(), std::back_inserter(expected)); - if ((t + 1) % 10 == 0) - { + if ((t + 1) % 10 == 0) { LOG_DEBUG("A = " << core::CContainerPrinter::print(A)); LOG_DEBUG("B = " << core::CContainerPrinter::print(B)); } - std::size_t test = maths::CSetTools::setIntersectSize(A.begin(), A.end(), - B.begin(), B.end()); + std::size_t test = maths::CSetTools::setIntersectSize(A.begin(), A.end(), B.begin(), B.end()); - if ((t + 1) % 10 == 0) - { + if ((t + 1) % 10 == 0) { LOG_DEBUG("|A ^ B| = " << test); } CPPUNIT_ASSERT_EQUAL(expected.size(), test); expected.clear(); - std::set_union(A.begin(), A.end(), - B.begin(), B.end(), - std::back_inserter(expected)); + std::set_union(A.begin(), A.end(), B.begin(), B.end(), std::back_inserter(expected)); test = maths::CSetTools::setUnionSize(A.begin(), A.end(), B.begin(), B.end()); - if ((t + 1) % 10 == 0) - { + if ((t + 1) % 10 == 0) { LOG_DEBUG("|A U B| = " << test); } @@ -236,8 +189,7 @@ void CSetToolsTest::testSetSizes() } } -void CSetToolsTest::testJaccard() -{ +void CSetToolsTest::testJaccard() { LOG_DEBUG("+------------------------------+"); LOG_DEBUG("| CSetToolsTest::testJaccard |"); LOG_DEBUG("+------------------------------+"); @@ -245,21 +197,20 @@ void CSetToolsTest::testJaccard() { LOG_DEBUG("Edge cases"); - double A[] = { 0.0, 1.2, 3.2 }; - double B[] = { 0.0, 1.2, 3.2, 5.1 }; + double A[] = {0.0, 1.2, 3.2}; + double B[] = {0.0, 1.2, 3.2, 5.1}; - CPPUNIT_ASSERT_EQUAL(0.0, maths::CSetTools::jaccard(A, A, B, B)); - CPPUNIT_ASSERT_EQUAL(1.0, maths::CSetTools::jaccard(A, A + 3, B, B + 3)); + CPPUNIT_ASSERT_EQUAL(0.0, maths::CSetTools::jaccard(A, A, B, B)); + CPPUNIT_ASSERT_EQUAL(1.0, maths::CSetTools::jaccard(A, A + 3, B, B + 3)); CPPUNIT_ASSERT_EQUAL(0.75, maths::CSetTools::jaccard(A, A + 3, B, B + 4)); - CPPUNIT_ASSERT_EQUAL(0.0, maths::CSetTools::jaccard(A, A + 3, B + 3, B + 4)); + CPPUNIT_ASSERT_EQUAL(0.0, maths::CSetTools::jaccard(A, A + 3, B + 3, B + 4)); } LOG_DEBUG("Random"); test::CRandomNumbers rng; - for (std::size_t t = 0u; t < 500; ++t) - { + for (std::size_t t = 0u; t < 500; ++t) { TSizeVec sizes; rng.generateUniformSamples(t / 2 + 1, (3 * t) / 2 + 2, 2, sizes); @@ -274,22 +225,15 @@ void CSetToolsTest::testJaccard() B.erase(std::unique(B.begin(), B.end()), B.end()); TSizeVec AIntersectB; - std::set_intersection(A.begin(), A.end(), - B.begin(), B.end(), - std::back_inserter(AIntersectB)); + std::set_intersection(A.begin(), A.end(), B.begin(), B.end(), std::back_inserter(AIntersectB)); TSizeVec AUnionB; - std::set_union(A.begin(), A.end(), - B.begin(), B.end(), - std::back_inserter(AUnionB)); + std::set_union(A.begin(), A.end(), B.begin(), B.end(), std::back_inserter(AUnionB)); - double expected = static_cast(AIntersectB.size()) - / static_cast(AUnionB.size()); - double actual = maths::CSetTools::jaccard(A.begin(), A.end(), - B.begin(), B.end()); + double expected = static_cast(AIntersectB.size()) / static_cast(AUnionB.size()); + double actual = maths::CSetTools::jaccard(A.begin(), A.end(), B.begin(), B.end()); - if ((t + 1) % 10 == 0) - { + if ((t + 1) % 10 == 0) { LOG_DEBUG("Jaccard expected = " << expected); LOG_DEBUG("Jaccard actual = " << actual); } @@ -297,8 +241,7 @@ void CSetToolsTest::testJaccard() } } -void CSetToolsTest::testOverlap() -{ +void CSetToolsTest::testOverlap() { LOG_DEBUG("+------------------------------+"); LOG_DEBUG("| CSetToolsTest::testOverlap |"); LOG_DEBUG("+------------------------------+"); @@ -306,8 +249,8 @@ void CSetToolsTest::testOverlap() { LOG_DEBUG("Edge cases"); - double A[] = { 0.0, 1.2, 3.2 }; - double B[] = { 0.0, 1.2, 3.2, 5.1 }; + double A[] = {0.0, 1.2, 3.2}; + double B[] = {0.0, 1.2, 3.2, 5.1}; CPPUNIT_ASSERT_EQUAL(0.0, maths::CSetTools::overlap(A, A, B, B)); CPPUNIT_ASSERT_EQUAL(1.0, maths::CSetTools::overlap(A, A + 3, B, B + 3)); @@ -319,8 +262,7 @@ void CSetToolsTest::testOverlap() test::CRandomNumbers rng; - for (std::size_t t = 0u; t < 500; ++t) - { + for (std::size_t t = 0u; t < 500; ++t) { TSizeVec sizes; rng.generateUniformSamples(t / 2 + 1, (3 * t) / 2 + 2, 2, sizes); @@ -335,19 +277,14 @@ void CSetToolsTest::testOverlap() B.erase(std::unique(B.begin(), B.end()), B.end()); TSizeVec AIntersectB; - std::set_intersection(A.begin(), A.end(), - B.begin(), B.end(), - std::back_inserter(AIntersectB)); + std::set_intersection(A.begin(), A.end(), B.begin(), B.end(), std::back_inserter(AIntersectB)); std::size_t min = std::min(A.size(), B.size()); - double expected = static_cast(AIntersectB.size()) - / static_cast(min); - double actual = maths::CSetTools::overlap(A.begin(), A.end(), - B.begin(), B.end()); + double expected = static_cast(AIntersectB.size()) / static_cast(min); + double actual = maths::CSetTools::overlap(A.begin(), A.end(), B.begin(), B.end()); - if ((t + 1) % 10 == 0) - { + if ((t + 1) % 10 == 0) { LOG_DEBUG("Overlap expected = " << expected); LOG_DEBUG("Overlap actual = " << actual); } @@ -355,22 +292,14 @@ void CSetToolsTest::testOverlap() } } -CppUnit::Test* CSetToolsTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CSetToolsTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSetToolsTest::testInplaceSetDifference", - &CSetToolsTest::testInplaceSetDifference) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSetToolsTest::testSetSizes", - &CSetToolsTest::testSetSizes) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSetToolsTest::testJaccard", - &CSetToolsTest::testJaccard) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSetToolsTest::testOverlap", - &CSetToolsTest::testOverlap) ); +CppUnit::Test* CSetToolsTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CSetToolsTest"); + + suiteOfTests->addTest( + new CppUnit::TestCaller("CSetToolsTest::testInplaceSetDifference", &CSetToolsTest::testInplaceSetDifference)); + suiteOfTests->addTest(new CppUnit::TestCaller("CSetToolsTest::testSetSizes", &CSetToolsTest::testSetSizes)); + suiteOfTests->addTest(new CppUnit::TestCaller("CSetToolsTest::testJaccard", &CSetToolsTest::testJaccard)); + suiteOfTests->addTest(new CppUnit::TestCaller("CSetToolsTest::testOverlap", &CSetToolsTest::testOverlap)); return suiteOfTests; } diff --git a/lib/maths/unittest/CSetToolsTest.h b/lib/maths/unittest/CSetToolsTest.h index 3d7682ec60..27dab23abe 100644 --- a/lib/maths/unittest/CSetToolsTest.h +++ b/lib/maths/unittest/CSetToolsTest.h @@ -9,15 +9,14 @@ #include -class CSetToolsTest : public CppUnit::TestFixture -{ - public: - void testInplaceSetDifference(); - void testSetSizes(); - void testJaccard(); - void testOverlap(); +class CSetToolsTest : public CppUnit::TestFixture { +public: + void testInplaceSetDifference(); + void testSetSizes(); + void testJaccard(); + void testOverlap(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CSetToolsTest_h diff --git a/lib/maths/unittest/CSignalTest.cc b/lib/maths/unittest/CSignalTest.cc index 44994b74d5..2330f2532b 100644 --- a/lib/maths/unittest/CSignalTest.cc +++ b/lib/maths/unittest/CSignalTest.cc @@ -17,115 +17,89 @@ using namespace ml; -namespace -{ +namespace { using TDoubleVec = std::vector; using TSizeVec = std::vector; -std::string print(const maths::CSignal::TComplexVec &f) -{ +std::string print(const maths::CSignal::TComplexVec& f) { std::ostringstream result; - for (std::size_t i = 0u; i < f.size(); ++i) - { + for (std::size_t i = 0u; i < f.size(); ++i) { LOG_DEBUG(f[i].real() << " + " << f[i].imag() << 'i'); } return result.str(); } -void bruteForceDft(maths::CSignal::TComplexVec &f, double sign) -{ +void bruteForceDft(maths::CSignal::TComplexVec& f, double sign) { maths::CSignal::TComplexVec result(f.size(), maths::CSignal::TComplex(0.0, 0.0)); - for (std::size_t k = 0u; k < f.size(); ++k) - { - for (std::size_t n = 0u; n < f.size(); ++n) - { - double t = -sign * boost::math::double_constants::two_pi * static_cast(k * n) - / static_cast(f.size()); + for (std::size_t k = 0u; k < f.size(); ++k) { + for (std::size_t n = 0u; n < f.size(); ++n) { + double t = -sign * boost::math::double_constants::two_pi * static_cast(k * n) / static_cast(f.size()); result[k] += maths::CSignal::TComplex(std::cos(t), std::sin(t)) * f[n]; } - if (sign < 0.0) - { + if (sign < 0.0) { result[k] /= static_cast(f.size()); } } f.swap(result); } - } -void CSignalTest::testFFTVersusOctave() -{ +void CSignalTest::testFFTVersusOctave() { LOG_DEBUG("+------------------------------------+"); LOG_DEBUG("| CSignalTest::testFFTVersusOctave |"); LOG_DEBUG("+------------------------------------+"); // Test versus values calculated using octave fft. - double x[][20] = - { - { - 2555.33, 1451.79, 465.60, 4394.83, -1553.24, - -2772.07, -3977.73, 2249.31, -2006.04, 3540.84, - 4271.63, 4648.81, -727.90, 2285.24, 3129.56, - -3596.79, -1968.66, 3795.18, 1627.84, 228.40 - }, - { - 4473.77, -4815.63, -818.38, -1953.72, -2323.39, - -3007.25, 4444.24, 435.21, 3613.32, 3471.37, - -1735.72, 2560.82, -2383.29, -2370.23, -4921.04, - -541.25, 1516.69, -2028.42, 3981.02, 3156.88 - } - }; + double x[][20] = {{2555.33, 1451.79, 465.60, 4394.83, -1553.24, -2772.07, -3977.73, 2249.31, -2006.04, 3540.84, + 4271.63, 4648.81, -727.90, 2285.24, 3129.56, -3596.79, -1968.66, 3795.18, 1627.84, 228.40}, + {4473.77, -4815.63, -818.38, -1953.72, -2323.39, -3007.25, 4444.24, 435.21, 3613.32, 3471.37, + -1735.72, 2560.82, -2383.29, -2370.23, -4921.04, -541.25, 1516.69, -2028.42, 3981.02, 3156.88}}; maths::CSignal::TComplexVec fx; - for (std::size_t i = 0u; i < 20; ++i) - { + for (std::size_t i = 0u; i < 20; ++i) { fx.push_back(maths::CSignal::TComplex(x[0][i], x[1][i])); } LOG_DEBUG("*** Power of 2 Length ***"); { - double expected[][2] = - { - // length 2 - { 4007.1, -341.9 }, - { 1103.5, 9289.4 }, - // length 4 - { 8867.5, -3114.0 }, - { -772.18, 8235.2 }, - { -2825.7, 10425.0 }, - { 4951.6, 2349.1}, - // length 8 - { 2813.8, -3565.1 }, - { -2652.4, -1739.5 }, - { -1790.1, 6488.9 }, - { 4933.6, 6326.1 }, - { -7833.9, 15118.0 }, - { 344.29, 6447.2 }, - { 10819.0, -9439.9 }, - { 13809.0, 16155.0 }, - // length 16 - { 14359.0, -5871.2 }, - { 1176.5, -3143.9 }, - { 636.25, -1666.2 }, - { -2819.0, 8259.4 }, - { -12844.0, 9601.7 }, - { -2292.3, -5598.5 }, - { 11737.0, 4809.3 }, - { -2499.2, -143.95 }, - { -10045.0, 6570.2 }, - { 27277.0, 10002.0 }, - { 870.01, 16083.0 }, - { 21695.0, 19192.0 }, - { 1601.9, 3220.9 }, - { -7675.7, 5483.5 }, - { -1921.5, 31949.0 }, - { 1629.0, -27167.0 } - }; - - for (std::size_t i = 0u, l = 2u; l < fx.size(); i += l, l <<= 1) - { + double expected[][2] = {// length 2 + {4007.1, -341.9}, + {1103.5, 9289.4}, + // length 4 + {8867.5, -3114.0}, + {-772.18, 8235.2}, + {-2825.7, 10425.0}, + {4951.6, 2349.1}, + // length 8 + {2813.8, -3565.1}, + {-2652.4, -1739.5}, + {-1790.1, 6488.9}, + {4933.6, 6326.1}, + {-7833.9, 15118.0}, + {344.29, 6447.2}, + {10819.0, -9439.9}, + {13809.0, 16155.0}, + // length 16 + {14359.0, -5871.2}, + {1176.5, -3143.9}, + {636.25, -1666.2}, + {-2819.0, 8259.4}, + {-12844.0, 9601.7}, + {-2292.3, -5598.5}, + {11737.0, 4809.3}, + {-2499.2, -143.95}, + {-10045.0, 6570.2}, + {27277.0, 10002.0}, + {870.01, 16083.0}, + {21695.0, 19192.0}, + {1601.9, 3220.9}, + {-7675.7, 5483.5}, + {-1921.5, 31949.0}, + {1629.0, -27167.0}}; + + for (std::size_t i = 0u, l = 2u; l < fx.size(); i += l, l <<= 1) { LOG_DEBUG("Testing length " << l); maths::CSignal::TComplexVec actual(fx.begin(), fx.begin() + l); @@ -133,10 +107,8 @@ void CSignalTest::testFFTVersusOctave() LOG_DEBUG(print(actual)); double error = 0.0; - for (std::size_t j = 0u; j < l; ++j) - { - error += std::abs(actual[j] - maths::CSignal::TComplex(expected[i+j][0], - expected[i+j][1])); + for (std::size_t j = 0u; j < l; ++j) { + error += std::abs(actual[j] - maths::CSignal::TComplex(expected[i + j][0], expected[i + j][1])); } error /= static_cast(l); LOG_DEBUG("error = " << error); @@ -146,37 +118,16 @@ void CSignalTest::testFFTVersusOctave() LOG_DEBUG("*** Arbitrary Length ***"); { - double expected[][2] = - { - { 18042.0, 755.0 }, - { 961.0, 5635.6 }, - { -5261.8, 7542.2 }, - { -12814.0, 2250.2 }, - { -8248.5, 6620.5 }, - { -21626.0, 3570.6 }, - { 6551.5, -12732.0 }, - { 6009.5, 10622.0 }, - { 9954.0, -1224.2 }, - { -2871.5, 7073.6 }, - { -14409.0, 10939.0 }, - { 13682.0, 25304.0 }, - { -10468.0, -6338.5 }, - { 6506.0, 6283.3 }, - { 32665.0, 5127.7 }, - { 3190.7, 4323.4 }, - { -6988.7, -3865.0 }, - { -3881.4, 4360.8 }, - { 46434.0, 20556.0 }, - { -6319.6, -7329.0 } - }; + double expected[][2] = {{18042.0, 755.0}, {961.0, 5635.6}, {-5261.8, 7542.2}, {-12814.0, 2250.2}, {-8248.5, 6620.5}, + {-21626.0, 3570.6}, {6551.5, -12732.0}, {6009.5, 10622.0}, {9954.0, -1224.2}, {-2871.5, 7073.6}, + {-14409.0, 10939.0}, {13682.0, 25304.0}, {-10468.0, -6338.5}, {6506.0, 6283.3}, {32665.0, 5127.7}, + {3190.7, 4323.4}, {-6988.7, -3865.0}, {-3881.4, 4360.8}, {46434.0, 20556.0}, {-6319.6, -7329.0}}; maths::CSignal::TComplexVec actual(fx.begin(), fx.end()); maths::CSignal::fft(actual); double error = 0.0; - for (std::size_t j = 0u; j < actual.size(); ++j) - { - error += std::abs(actual[j] - maths::CSignal::TComplex(expected[j][0], - expected[j][1])); + for (std::size_t j = 0u; j < actual.size(); ++j) { + error += std::abs(actual[j] - maths::CSignal::TComplex(expected[j][0], expected[j][1])); } error /= static_cast(actual.size()); LOG_DEBUG("error = " << error); @@ -184,78 +135,61 @@ void CSignalTest::testFFTVersusOctave() } } -void CSignalTest::testIFFTVersusOctave() -{ +void CSignalTest::testIFFTVersusOctave() { LOG_DEBUG("+-------------------------------------+"); LOG_DEBUG("| CSignalTest::testIFFTVersusOctave |"); LOG_DEBUG("+-------------------------------------+"); // Test versus values calculated using octave ifft. - double x[][20] = - { - { - 2555.33, 1451.79, 465.60, 4394.83, -1553.24, - -2772.07, -3977.73, 2249.31, -2006.04, 3540.84, - 4271.63, 4648.81, -727.90, 2285.24, 3129.56, - -3596.79, -1968.66, 3795.18, 1627.84, 228.40 - }, - { - 4473.77, -4815.63, -818.38, -1953.72, -2323.39, - -3007.25, 4444.24, 435.21, 3613.32, 3471.37, - -1735.72, 2560.82, -2383.29, -2370.23, -4921.04, - -541.25, 1516.69, -2028.42, 3981.02, 3156.88 - } - }; + double x[][20] = {{2555.33, 1451.79, 465.60, 4394.83, -1553.24, -2772.07, -3977.73, 2249.31, -2006.04, 3540.84, + 4271.63, 4648.81, -727.90, 2285.24, 3129.56, -3596.79, -1968.66, 3795.18, 1627.84, 228.40}, + {4473.77, -4815.63, -818.38, -1953.72, -2323.39, -3007.25, 4444.24, 435.21, 3613.32, 3471.37, + -1735.72, 2560.82, -2383.29, -2370.23, -4921.04, -541.25, 1516.69, -2028.42, 3981.02, 3156.88}}; maths::CSignal::TComplexVec fx; - for (std::size_t i = 0u; i < 20; ++i) - { + for (std::size_t i = 0u; i < 20; ++i) { fx.push_back(maths::CSignal::TComplex(x[0][i], x[1][i])); } LOG_DEBUG("*** Powers of 2 Length ***"); { - double expected[][2] = - { - // length 2 - { 2003.56, -170.93 }, - { 551.77, 4644.70 }, - // length 4 - { 2216.89, -778.49 }, - { 1237.91, 587.28 }, - { -706.42, 2606.19 }, - { -193.04, 2058.80 }, - { 351.73, -445.64 }, - // length 8 - { 1726.09, 2019.35 }, - { 1352.32, -1179.99 }, - { 43.04, 805.89 }, - { -979.24, 1889.70 }, - { 616.70, 790.77 }, - { -223.77, 811.12 }, - { -331.55, -217.44 }, - { 897.45, -366.95 }, - // length 16 - { 101.81, -1697.92 }, - { -120.10, 1996.81 }, - { -479.73, 342.72 }, - { 100.12, 201.31 }, - { 1355.94, 1199.49 }, - { 54.38, 1005.18 }, - { 1704.78, 625.13 }, - { -627.80, 410.64 }, - { -156.20, -9.00 }, - { 733.56, 300.58 }, - { -143.27, -349.91 }, - { -802.73, 600.10 }, - { -176.19, 516.21 }, - { 39.77, -104.14 }, - { 73.53, -196.49 } - }; - - for (std::size_t i = 0u, l = 2u; l < fx.size(); i += l, l <<= 1) - { + double expected[][2] = {// length 2 + {2003.56, -170.93}, + {551.77, 4644.70}, + // length 4 + {2216.89, -778.49}, + {1237.91, 587.28}, + {-706.42, 2606.19}, + {-193.04, 2058.80}, + {351.73, -445.64}, + // length 8 + {1726.09, 2019.35}, + {1352.32, -1179.99}, + {43.04, 805.89}, + {-979.24, 1889.70}, + {616.70, 790.77}, + {-223.77, 811.12}, + {-331.55, -217.44}, + {897.45, -366.95}, + // length 16 + {101.81, -1697.92}, + {-120.10, 1996.81}, + {-479.73, 342.72}, + {100.12, 201.31}, + {1355.94, 1199.49}, + {54.38, 1005.18}, + {1704.78, 625.13}, + {-627.80, 410.64}, + {-156.20, -9.00}, + {733.56, 300.58}, + {-143.27, -349.91}, + {-802.73, 600.10}, + {-176.19, 516.21}, + {39.77, -104.14}, + {73.53, -196.49}}; + + for (std::size_t i = 0u, l = 2u; l < fx.size(); i += l, l <<= 1) { LOG_DEBUG("Testing length " << l); maths::CSignal::TComplexVec actual(fx.begin(), fx.begin() + l); @@ -263,10 +197,8 @@ void CSignalTest::testIFFTVersusOctave() LOG_DEBUG(print(actual)); double error = 0.0; - for (std::size_t j = 0u; j < l; ++j) - { - error += std::abs(actual[j] - maths::CSignal::TComplex(expected[i+j][0], - expected[i+j][1])); + for (std::size_t j = 0u; j < l; ++j) { + error += std::abs(actual[j] - maths::CSignal::TComplex(expected[i + j][0], expected[i + j][1])); } error /= static_cast(l); LOG_DEBUG("error = " << error); @@ -275,8 +207,7 @@ void CSignalTest::testIFFTVersusOctave() } } -void CSignalTest::testFFTRandomized() -{ +void CSignalTest::testFFTRandomized() { LOG_DEBUG("+----------------------------------+"); LOG_DEBUG("| CSignalTest::testFFTRandomized |"); LOG_DEBUG("+----------------------------------+"); @@ -291,15 +222,10 @@ void CSignalTest::testFFTRandomized() TSizeVec lengths; rng.generateUniformSamples(2, 100, 1000, lengths); - for (std::size_t i = 0u, j = 0u; - i < lengths.size() && j + 2 * lengths[i] < components.size(); - ++i, j += 2*lengths[i]) - { + for (std::size_t i = 0u, j = 0u; i < lengths.size() && j + 2 * lengths[i] < components.size(); ++i, j += 2 * lengths[i]) { maths::CSignal::TComplexVec expected; - for (std::size_t k = 0u; k < lengths[i]; ++k) - { - expected.push_back(maths::CSignal::TComplex(components[j + 2*k], - components[j + 2*k + 1])); + for (std::size_t k = 0u; k < lengths[i]; ++k) { + expected.push_back(maths::CSignal::TComplex(components[j + 2 * k], components[j + 2 * k + 1])); } maths::CSignal::TComplexVec actual(expected); @@ -307,21 +233,18 @@ void CSignalTest::testFFTRandomized() maths::CSignal::fft(actual); double error = 0.0; - for (std::size_t k = 0u; k < actual.size(); ++k) - { + for (std::size_t k = 0u; k < actual.size(); ++k) { error += std::abs(actual[k] - expected[k]); } - if (i % 5 == 0 || error >= 1e-5) - { + if (i % 5 == 0 || error >= 1e-5) { LOG_DEBUG("length = " << lengths[i] << ", error = " << error); } CPPUNIT_ASSERT(error < 1e-5); } } -void CSignalTest::testIFFTRandomized() -{ +void CSignalTest::testIFFTRandomized() { LOG_DEBUG("+-----------------------------------+"); LOG_DEBUG("| CSignalTest::testIFFTRandomized |"); LOG_DEBUG("+-----------------------------------+"); @@ -336,15 +259,10 @@ void CSignalTest::testIFFTRandomized() TSizeVec lengths; rng.generateUniformSamples(2, 100, 1000, lengths); - for (std::size_t i = 0u, j = 0u; - i < lengths.size() && j + 2 * lengths[i] < components.size(); - ++i, j += 2*lengths[i]) - { + for (std::size_t i = 0u, j = 0u; i < lengths.size() && j + 2 * lengths[i] < components.size(); ++i, j += 2 * lengths[i]) { maths::CSignal::TComplexVec expected; - for (std::size_t k = 0u; k < lengths[i]; ++k) - { - expected.push_back(maths::CSignal::TComplex(components[j + 2*k], - components[j + 2*k + 1])); + for (std::size_t k = 0u; k < lengths[i]; ++k) { + expected.push_back(maths::CSignal::TComplex(components[j + 2 * k], components[j + 2 * k + 1])); } maths::CSignal::TComplexVec actual(expected); @@ -352,21 +270,18 @@ void CSignalTest::testIFFTRandomized() maths::CSignal::ifft(actual); double error = 0.0; - for (std::size_t k = 0u; k < actual.size(); ++k) - { + for (std::size_t k = 0u; k < actual.size(); ++k) { error += std::abs(actual[k] - expected[k]); } - if (i % 5 == 0 || error >= 1e-5) - { + if (i % 5 == 0 || error >= 1e-5) { LOG_DEBUG("length = " << lengths[i] << ", error = " << error); } CPPUNIT_ASSERT(error < 1e-5); } } -void CSignalTest::testFFTIFFTIdempotency() -{ +void CSignalTest::testFFTIFFTIdempotency() { LOG_DEBUG("+---------------------------------------+"); LOG_DEBUG("| CSignalTest::testFFTIFFTIdempotency |"); LOG_DEBUG("+---------------------------------------+"); @@ -381,15 +296,10 @@ void CSignalTest::testFFTIFFTIdempotency() TSizeVec lengths; rng.generateUniformSamples(2, 100, 1000, lengths); - for (std::size_t i = 0u, j = 0u; - i < lengths.size() && j + 2 * lengths[i] < components.size(); - ++i, j += 2*lengths[i]) - { + for (std::size_t i = 0u, j = 0u; i < lengths.size() && j + 2 * lengths[i] < components.size(); ++i, j += 2 * lengths[i]) { maths::CSignal::TComplexVec expected; - for (std::size_t k = 0u; k < lengths[i]; ++k) - { - expected.push_back(maths::CSignal::TComplex(components[j + 2*k], - components[j + 2*k + 1])); + for (std::size_t k = 0u; k < lengths[i]; ++k) { + expected.push_back(maths::CSignal::TComplex(components[j + 2 * k], components[j + 2 * k + 1])); } maths::CSignal::TComplexVec actual(expected); @@ -397,21 +307,18 @@ void CSignalTest::testFFTIFFTIdempotency() maths::CSignal::ifft(actual); double error = 0.0; - for (std::size_t k = 0u; k < actual.size(); ++k) - { + for (std::size_t k = 0u; k < actual.size(); ++k) { error += std::abs(actual[k] - expected[k]); } - if (i % 5 == 0 || error >= 1e-5) - { + if (i % 5 == 0 || error >= 1e-5) { LOG_DEBUG("length = " << lengths[i] << ", error = " << error); } CPPUNIT_ASSERT(error < 1e-5); } } -void CSignalTest::testAutocorrelations() -{ +void CSignalTest::testAutocorrelations() { LOG_DEBUG("+-------------------------------------+"); LOG_DEBUG("| CSignalTest::testAutocorrelations |"); LOG_DEBUG("+-------------------------------------+"); @@ -421,58 +328,41 @@ void CSignalTest::testAutocorrelations() TSizeVec sizes; rng.generateUniformSamples(10, 30, 100, sizes); - for (std::size_t t = 0u; t < sizes.size(); ++t) - { + for (std::size_t t = 0u; t < sizes.size(); ++t) { TDoubleVec values_; rng.generateUniformSamples(-10.0, 10.0, sizes[t], values_); maths::CSignal::TFloatMeanAccumulatorVec values(sizes[t]); - for (std::size_t i = 0u; i < values_.size(); ++i) - { + for (std::size_t i = 0u; i < values_.size(); ++i) { values[i].add(values_[i]); } TDoubleVec expected; - for (std::size_t offset = 1; offset < values.size(); ++offset) - { + for (std::size_t offset = 1; offset < values.size(); ++offset) { expected.push_back(maths::CSignal::autocorrelation(offset, values)); } TDoubleVec actual; maths::CSignal::autocorrelations(values, actual); - if (t % 10 == 0) - { + if (t % 10 == 0) { LOG_DEBUG("expected = " << core::CContainerPrinter::print(expected)); LOG_DEBUG("actual = " << core::CContainerPrinter::print(actual)); } - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expected), - core::CContainerPrinter::print(actual)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expected), core::CContainerPrinter::print(actual)); } } -CppUnit::Test *CSignalTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CSignalTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSignalTest::testFFTVersusOctave", - &CSignalTest::testFFTVersusOctave) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSignalTest::testIFFTVersusOctave", - &CSignalTest::testIFFTVersusOctave) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSignalTest::testFFTRandomized", - &CSignalTest::testFFTRandomized) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSignalTest::testIFFTRandomized", - &CSignalTest::testIFFTRandomized) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSignalTest::testFFTIFFTIdempotency", - &CSignalTest::testFFTIFFTIdempotency) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSignalTest::testAutocorrelations", - &CSignalTest::testAutocorrelations) ); +CppUnit::Test* CSignalTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CSignalTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CSignalTest::testFFTVersusOctave", &CSignalTest::testFFTVersusOctave)); + suiteOfTests->addTest(new CppUnit::TestCaller("CSignalTest::testIFFTVersusOctave", &CSignalTest::testIFFTVersusOctave)); + suiteOfTests->addTest(new CppUnit::TestCaller("CSignalTest::testFFTRandomized", &CSignalTest::testFFTRandomized)); + suiteOfTests->addTest(new CppUnit::TestCaller("CSignalTest::testIFFTRandomized", &CSignalTest::testIFFTRandomized)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CSignalTest::testFFTIFFTIdempotency", &CSignalTest::testFFTIFFTIdempotency)); + suiteOfTests->addTest(new CppUnit::TestCaller("CSignalTest::testAutocorrelations", &CSignalTest::testAutocorrelations)); return suiteOfTests; } diff --git a/lib/maths/unittest/CSignalTest.h b/lib/maths/unittest/CSignalTest.h index d43a50d6aa..05d2e07477 100644 --- a/lib/maths/unittest/CSignalTest.h +++ b/lib/maths/unittest/CSignalTest.h @@ -9,17 +9,16 @@ #include -class CSignalTest : public CppUnit::TestFixture -{ - public: - void testFFTVersusOctave(); - void testIFFTVersusOctave(); - void testFFTRandomized(); - void testIFFTRandomized(); - void testFFTIFFTIdempotency(); - void testAutocorrelations(); +class CSignalTest : public CppUnit::TestFixture { +public: + void testFFTVersusOctave(); + void testIFFTVersusOctave(); + void testFFTRandomized(); + void testIFFTRandomized(); + void testFFTIFFTIdempotency(); + void testAutocorrelations(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CSignalTest_h diff --git a/lib/maths/unittest/CSolversTest.cc b/lib/maths/unittest/CSolversTest.cc index 3de3133aa1..daaeb6047b 100644 --- a/lib/maths/unittest/CSolversTest.cc +++ b/lib/maths/unittest/CSolversTest.cc @@ -19,74 +19,57 @@ using namespace ml; using namespace maths; -namespace -{ +namespace { //! Root at 5. -double f1(const double &x) -{ +double f1(const double& x) { return x - 5.0; } //! Roots at 1 and 2. -double f2(const double &x) -{ - return x*x - 3.0*x + 2.0; +double f2(const double& x) { + return x * x - 3.0 * x + 2.0; } //! Note that this is a contractive map (i.e. df/dx < 1) //! so we can find the root by an iterative scheme. //! There is a root is in the interval:\n //! [0.739085133215160, 0.739085133215161] -double f3(const double &x) -{ +double f3(const double& x) { return std::cos(x) - x; } //! Root at x = 2/3. -double f4(const double &x) -{ - return x <= 2.0/3.0 ? - std::pow(std::fabs(x - 2.0/3.0), 0.2) : - -std::pow(std::fabs(x - 2.0/3.0), 0.2); +double f4(const double& x) { + return x <= 2.0 / 3.0 ? std::pow(std::fabs(x - 2.0 / 3.0), 0.2) : -std::pow(std::fabs(x - 2.0 / 3.0), 0.2); } //! This has local maxima at 4 and 10. -double f5(const double &x) -{ - return 1.1 * std::exp(-(x-4.0) * (x-4.0)) - + 0.4 * std::exp(-(x-10.0) * (x-10.0) / 4.0); +double f5(const double& x) { + return 1.1 * std::exp(-(x - 4.0) * (x - 4.0)) + 0.4 * std::exp(-(x - 10.0) * (x - 10.0) / 4.0); } //! This has local maxima at 4, 6 and 10. -double f6(const double &x) -{ - return 1.1 * std::exp(-2.0 * (x-4.0) * (x-4.0)) - + 0.1 * std::exp(-(x-6.0) * (x-6.0)) - + 0.4 * std::exp(-(x-10.0) * (x-10.0) / 2.0); +double f6(const double& x) { + return 1.1 * std::exp(-2.0 * (x - 4.0) * (x - 4.0)) + 0.1 * std::exp(-(x - 6.0) * (x - 6.0)) + + 0.4 * std::exp(-(x - 10.0) * (x - 10.0) / 2.0); } -class CLog -{ - public: - using result_type = double; - - public: - double operator()(const double &x) const - { - if (x <= 0.0) - { - throw std::range_error("Bad value to log " - + core::CStringUtils::typeToString(x)); - } - return std::log(x); +class CLog { +public: + using result_type = double; + +public: + double operator()(const double& x) const { + if (x <= 0.0) { + throw std::range_error("Bad value to log " + core::CStringUtils::typeToString(x)); } + return std::log(x); + } }; - } -void CSolversTest::testBracket() -{ +void CSolversTest::testBracket() { LOG_DEBUG("+-----------------------------+"); LOG_DEBUG("| CSolversTest::testBracket |"); LOG_DEBUG("+-----------------------------+"); @@ -97,11 +80,7 @@ void CSolversTest::testBracket() double a = 0.5, b = 0.5; double fa = f(a), fb = f(b); CPPUNIT_ASSERT(CSolvers::rightBracket(a, b, fa, fb, f, maxIterations)); - LOG_DEBUG("a = " << a - << ", b = " << b - << ", f(a) = " << fa - << ", f(b) = " << fb - << ", maxIterations = " << maxIterations); + LOG_DEBUG("a = " << a << ", b = " << b << ", f(a) = " << fa << ", f(b) = " << fb << ", maxIterations = " << maxIterations); CPPUNIT_ASSERT_EQUAL(f(a), fa); CPPUNIT_ASSERT_EQUAL(f(b), fb); CPPUNIT_ASSERT(fa * fb <= 0.0); @@ -113,11 +92,7 @@ void CSolversTest::testBracket() double a = 0.5, b = 0.6; double fa = f(a), fb = f(b); CPPUNIT_ASSERT(CSolvers::rightBracket(a, b, fa, fb, f, maxIterations)); - LOG_DEBUG("a = " << a - << ", b = " << b - << ", f(a) = " << fa - << ", f(b) = " << fb - << ", maxIterations = " << maxIterations); + LOG_DEBUG("a = " << a << ", b = " << b << ", f(a) = " << fa << ", f(b) = " << fb << ", maxIterations = " << maxIterations); CPPUNIT_ASSERT_EQUAL(f(a), fa); CPPUNIT_ASSERT_EQUAL(f(b), fb); CPPUNIT_ASSERT(fa * fb <= 0.0); @@ -129,11 +104,7 @@ void CSolversTest::testBracket() double a = 0.5, b = 5.0; double fa = f(a), fb = f(b); CPPUNIT_ASSERT(CSolvers::rightBracket(a, b, fa, fb, f, maxIterations)); - LOG_DEBUG("a = " << a - << ", b = " << b - << ", f(a) = " << fa - << ", f(b) = " << fb - << ", maxIterations = " << maxIterations); + LOG_DEBUG("a = " << a << ", b = " << b << ", f(a) = " << fa << ", f(b) = " << fb << ", maxIterations = " << maxIterations); CPPUNIT_ASSERT_EQUAL(f(a), fa); CPPUNIT_ASSERT_EQUAL(f(b), fb); CPPUNIT_ASSERT(fa * fb <= 0.0); @@ -144,21 +115,15 @@ void CSolversTest::testBracket() std::size_t maxIterations = 10u; double a = 100.0, b = 100.0; double fa = f(a), fb = f(b); - CPPUNIT_ASSERT(CSolvers::leftBracket(a, b, fa, fb, f, maxIterations, - std::numeric_limits::min())); - LOG_DEBUG("a = " << a - << ", b = " << b - << ", f(a) = " << fa - << ", f(b) = " << fb - << ", maxIterations = " << maxIterations); + CPPUNIT_ASSERT(CSolvers::leftBracket(a, b, fa, fb, f, maxIterations, std::numeric_limits::min())); + LOG_DEBUG("a = " << a << ", b = " << b << ", f(a) = " << fa << ", f(b) = " << fb << ", maxIterations = " << maxIterations); CPPUNIT_ASSERT_EQUAL(f(a), fa); CPPUNIT_ASSERT_EQUAL(f(b), fb); CPPUNIT_ASSERT(fa * fb <= 0.0); } } -void CSolversTest::testBisection() -{ +void CSolversTest::testBisection() { LOG_DEBUG("+-------------------------------+"); LOG_DEBUG("| CSolversTest::testBisection |"); LOG_DEBUG("+-------------------------------+"); @@ -197,10 +162,8 @@ void CSolversTest::testBisection() iterations = 10; CEqualWithTolerance equal(CToleranceTypes::E_AbsoluteTolerance, 0.1); CPPUNIT_ASSERT(CSolvers::bisection(a, b, -5.0, 5.0, &f1, iterations, equal, bestGuess)); - LOG_DEBUG("a = " << a << ", b = " << b - << ", f(a) = " << f1(a) << ", f(b) = " << f1(b) - << ", iterations = " << iterations - << ", bestGuess = " << bestGuess); + LOG_DEBUG("a = " << a << ", b = " << b << ", f(a) = " << f1(a) << ", f(b) = " << f1(b) << ", iterations = " << iterations + << ", bestGuess = " << bestGuess); CPPUNIT_ASSERT_EQUAL(5.0, bestGuess); } @@ -209,8 +172,7 @@ void CSolversTest::testBisection() LOG_DEBUG("-"); LOG_DEBUG("*** f(x) = cos(x) - x ***"); double lastError = 0.7390851332151607; - for (std::size_t i = 3; i < 20; ++i) - { + for (std::size_t i = 3; i < 20; ++i) { a = -10.0; b = 10.0; iterations = i; @@ -220,25 +182,21 @@ void CSolversTest::testBisection() LOG_DEBUG("iterations = " << iterations); CPPUNIT_ASSERT_EQUAL(i, iterations); - LOG_DEBUG("a = " << a << ", b = " << b - << ", f(a) = " << f3(a) << ", f(b) = " << f3(b)); + LOG_DEBUG("a = " << a << ", b = " << b << ", f(a) = " << f3(a) << ", f(b) = " << f3(b)); CPPUNIT_ASSERT(f3(a) * f3(b) <= 0.0); double error = std::fabs(bestGuess - 0.7390851332151607); - LOG_DEBUG("bestGuess = " << bestGuess - << ", f(bestGuess) = " << f3(bestGuess) - << ", error = " << error); - CPPUNIT_ASSERT(error < std::fabs((a + b)/2.0 - 0.7390851332151607)); + LOG_DEBUG("bestGuess = " << bestGuess << ", f(bestGuess) = " << f3(bestGuess) << ", error = " << error); + CPPUNIT_ASSERT(error < std::fabs((a + b) / 2.0 - 0.7390851332151607)); double convergenceFactor = error / lastError; lastError = error; - if (i != 3) - { + if (i != 3) { LOG_DEBUG("convergenceFactor = " << convergenceFactor); } LOG_DEBUG("-") } - double meanConvergenceFactor = std::pow(lastError / 0.7390851332151607, 1.0/20.0); + double meanConvergenceFactor = std::pow(lastError / 0.7390851332151607, 1.0 / 20.0); LOG_DEBUG("mean convergence factor = " << meanConvergenceFactor); CPPUNIT_ASSERT(meanConvergenceFactor < 0.4); } @@ -254,12 +212,11 @@ void CSolversTest::testBisection() LOG_DEBUG("*** f(x) = { |x - 2.0/3.0|^0.2 x <= 2.0/3.0 ***"); LOG_DEBUG(" { -|x - 2.0/3.0|^0.2 otherwise"); double lastInterval = 20.0; - double lastError = 2.0/3.0; + double lastError = 2.0 / 3.0; double convergenceFactor = 1.0; - for (std::size_t i = 3u; i < 40; ++i) - { + for (std::size_t i = 3u; i < 40; ++i) { a = -10.0; - b = 10.0; + b = 10.0; iterations = i; CEqualWithTolerance equal(CToleranceTypes::E_AbsoluteTolerance, 0.0); CSolvers::bisection(a, b, &f4, iterations, equal, bestGuess); @@ -267,26 +224,21 @@ void CSolversTest::testBisection() LOG_DEBUG("iterations = " << iterations); CPPUNIT_ASSERT_EQUAL(i, iterations); - LOG_DEBUG("a = " << a << ", b = " << b - << ", f(a) = " << f4(a) << ", f(b) = " << f4(b)); + LOG_DEBUG("a = " << a << ", b = " << b << ", f(a) = " << f4(a) << ", f(b) = " << f4(b)); CPPUNIT_ASSERT(f4(a) * f4(b) <= 0.0); CPPUNIT_ASSERT_DOUBLES_EQUAL(0.5 * lastInterval, b - a, 1e-5); lastInterval = b - a; - double error = std::fabs(bestGuess - 2.0/3.0); - LOG_DEBUG("bestGuess = " << bestGuess - << ", f(bestGuess) = " << f4(bestGuess) - << ", error = " << error); - CPPUNIT_ASSERT(error < std::fabs((a + b)/2.0 - 2.0/3.0)); + double error = std::fabs(bestGuess - 2.0 / 3.0); + LOG_DEBUG("bestGuess = " << bestGuess << ", f(bestGuess) = " << f4(bestGuess) << ", error = " << error); + CPPUNIT_ASSERT(error < std::fabs((a + b) / 2.0 - 2.0 / 3.0)); convergenceFactor *= (error / lastError); lastError = error; - if ((i - 2) % 4 == 0) - { + if ((i - 2) % 4 == 0) { convergenceFactor = std::pow(convergenceFactor, 0.25); LOG_DEBUG("convergence factor = " << convergenceFactor); - if (i - 2 != 4) - { + if (i - 2 != 4) { CPPUNIT_ASSERT_DOUBLES_EQUAL(0.5, convergenceFactor, 1e-5); } convergenceFactor = 1.0; @@ -294,14 +246,13 @@ void CSolversTest::testBisection() LOG_DEBUG("-") } - double meanConvergenceFactor = std::pow(lastError / (2.0/3.0), 1.0/40.0); + double meanConvergenceFactor = std::pow(lastError / (2.0 / 3.0), 1.0 / 40.0); LOG_DEBUG("mean convergence factor = " << meanConvergenceFactor); CPPUNIT_ASSERT(meanConvergenceFactor < 0.56); } } -void CSolversTest::testBrent() -{ +void CSolversTest::testBrent() { LOG_DEBUG("+---------------------------+"); LOG_DEBUG("| CSolversTest::testBrent |"); LOG_DEBUG("+---------------------------+"); @@ -349,8 +300,7 @@ void CSolversTest::testBrent() { LOG_DEBUG("*** f(x) = cos(x) - x ***"); double lastError = 0.7390851332151607; - for (std::size_t i = 3; i < 8; ++i) - { + for (std::size_t i = 3; i < 8; ++i) { a = -10.0; b = 10.0; iterations = i; @@ -361,19 +311,15 @@ void CSolversTest::testBrent() LOG_DEBUG("iterations = " << iterations); CPPUNIT_ASSERT_EQUAL(i, iterations); - LOG_DEBUG("a = " << a << ", b = " << b - << ", f(a) = " << f3(a) << ", f(b) = " << f3(b)); + LOG_DEBUG("a = " << a << ", b = " << b << ", f(a) = " << f3(a) << ", f(b) = " << f3(b)); CPPUNIT_ASSERT(f3(a) * f3(b) <= 0.0); double error = std::fabs(bestGuess - 0.7390851332151607); - LOG_DEBUG("bestGuess = " << bestGuess - << ", f(bestGuess) = " << f3(bestGuess) - << ", error = " << error); - CPPUNIT_ASSERT(error < std::fabs((a + b)/2.0 - 0.7390851332151607)); + LOG_DEBUG("bestGuess = " << bestGuess << ", f(bestGuess) = " << f3(bestGuess) << ", error = " << error); + CPPUNIT_ASSERT(error < std::fabs((a + b) / 2.0 - 0.7390851332151607)); double convergenceFactor = error / lastError; lastError = error; - if (i != 3) - { + if (i != 3) { LOG_DEBUG("convergenceFactor = " << convergenceFactor); CPPUNIT_ASSERT(convergenceFactor < 0.75); } @@ -388,9 +334,8 @@ void CSolversTest::testBrent() LOG_DEBUG("-") LOG_DEBUG("*** f(x) = { |x - 2.0/3.0|^0.2 x <= 2.0/3.0 ***"); LOG_DEBUG(" { -|x - 2.0/3.0|^0.2 otherwise"); - double lastError = 2.0/3.0; - for (std::size_t i = 3; i < 40; ++i) - { + double lastError = 2.0 / 3.0; + for (std::size_t i = 3; i < 40; ++i) { a = -10.0; b = 10.0; iterations = i; @@ -401,28 +346,24 @@ void CSolversTest::testBrent() LOG_DEBUG("iterations = " << iterations); CPPUNIT_ASSERT_EQUAL(i, iterations); - LOG_DEBUG("a = " << a << ", b = " << b - << ", f(a) = " << f4(a) << ", f(b) = " << f4(b)); + LOG_DEBUG("a = " << a << ", b = " << b << ", f(a) = " << f4(a) << ", f(b) = " << f4(b)); CPPUNIT_ASSERT(f4(a) * f4(b) <= 0.0); - double error = std::fabs(bestGuess - 2.0/3.0); - LOG_DEBUG("bestGuess = " << bestGuess - << ", f(bestGuess) = " << f4(bestGuess) - << ", error = " << error); - CPPUNIT_ASSERT(error < std::fabs((a + b)/2.0 - 2.0/3.0)); + double error = std::fabs(bestGuess - 2.0 / 3.0); + LOG_DEBUG("bestGuess = " << bestGuess << ", f(bestGuess) = " << f4(bestGuess) << ", error = " << error); + CPPUNIT_ASSERT(error < std::fabs((a + b) / 2.0 - 2.0 / 3.0)); double convergenceFactor = error / lastError; lastError = error; LOG_DEBUG("convergence factor = " << convergenceFactor); } - double meanConvergenceFactor = std::pow(lastError / (2.0/3.0), 1.0/40.0); + double meanConvergenceFactor = std::pow(lastError / (2.0 / 3.0), 1.0 / 40.0); LOG_DEBUG("mean convergence factor = " << meanConvergenceFactor); CPPUNIT_ASSERT(meanConvergenceFactor < 0.505); } } -void CSolversTest::testSublevelSet() -{ +void CSolversTest::testSublevelSet() { using TDoubleDoublePr = std::pair; // Should converge immediately to minimum of quadratic. @@ -433,65 +374,46 @@ void CSolversTest::testSublevelSet() LOG_DEBUG("*** f(x) = 1.1 * exp(-(x-4)^2) + 0.4 * exp(-(x-10)^2/4) ***"); double fmax = 0.9 * f5(10.0); - for (std::size_t i = 0u; i < 30u; ++i, fmax *= 0.9) - { + for (std::size_t i = 0u; i < 30u; ++i, fmax *= 0.9) { LOG_DEBUG("fmax = " << fmax); - if (CSolvers::sublevelSet(4.0, 10.0, f5(4.0), f5(10.0), &f5, fmax, 10, sublevelSet)) - { + if (CSolvers::sublevelSet(4.0, 10.0, f5(4.0), f5(10.0), &f5, fmax, 10, sublevelSet)) { CPPUNIT_ASSERT_DOUBLES_EQUAL(fmax, f5(sublevelSet.first), 1e-5); CPPUNIT_ASSERT_DOUBLES_EQUAL(fmax, f5(sublevelSet.second), 1e-5); - } - else - { + } else { CPPUNIT_ASSERT(sublevelSet.second - sublevelSet.first < 1e-4); } LOG_DEBUG("sublevelSet = " << core::CContainerPrinter::print(sublevelSet)); - LOG_DEBUG("f(a) = " << f5(sublevelSet.first) - << ", f(b) = " << f5(sublevelSet.second)); + LOG_DEBUG("f(a) = " << f5(sublevelSet.first) << ", f(b) = " << f5(sublevelSet.second)); } LOG_DEBUG("*** f(x) = 1.1 * exp(-2.0*(x-4)^2) + 0.1 * exp(-(x-6)^2) + 0.4 * exp(-(x-10)^2/2) ***"); fmax = 0.9 * f6(10.0); - for (std::size_t i = 0u; i < 15u; ++i, fmax *= 0.9) - { + for (std::size_t i = 0u; i < 15u; ++i, fmax *= 0.9) { LOG_DEBUG("fmax = " << fmax); bool found = CSolvers::sublevelSet(4.0, 10.0, f6(4.0), f6(10.0), &f6, fmax, 15, sublevelSet); LOG_DEBUG("sublevelSet = " << core::CContainerPrinter::print(sublevelSet)); - LOG_DEBUG("f(a) = " << f6(sublevelSet.first) - << ", f(b) = " << f6(sublevelSet.second)); + LOG_DEBUG("f(a) = " << f6(sublevelSet.first) << ", f(b) = " << f6(sublevelSet.second)); - if (found) - { + if (found) { CPPUNIT_ASSERT_DOUBLES_EQUAL(fmax, f6(sublevelSet.first), 1e-4); CPPUNIT_ASSERT_DOUBLES_EQUAL(fmax, f6(sublevelSet.second), 1e-4); - } - else - { + } else { CPPUNIT_ASSERT(sublevelSet.second - sublevelSet.first < 1e-4); } } } -CppUnit::Test *CSolversTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CSolversTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSolversTest::testBracket", - &CSolversTest::testBracket) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSolversTest::testBisection", - &CSolversTest::testBisection) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSolversTest::testBrent", - &CSolversTest::testBrent) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSolversTest::testSublevelSet", - &CSolversTest::testSublevelSet) ); +CppUnit::Test* CSolversTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CSolversTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CSolversTest::testBracket", &CSolversTest::testBracket)); + suiteOfTests->addTest(new CppUnit::TestCaller("CSolversTest::testBisection", &CSolversTest::testBisection)); + suiteOfTests->addTest(new CppUnit::TestCaller("CSolversTest::testBrent", &CSolversTest::testBrent)); + suiteOfTests->addTest(new CppUnit::TestCaller("CSolversTest::testSublevelSet", &CSolversTest::testSublevelSet)); return suiteOfTests; } diff --git a/lib/maths/unittest/CSolversTest.h b/lib/maths/unittest/CSolversTest.h index ec3a78fb20..1cfa8280b2 100644 --- a/lib/maths/unittest/CSolversTest.h +++ b/lib/maths/unittest/CSolversTest.h @@ -9,15 +9,14 @@ #include -class CSolversTest : public CppUnit::TestFixture -{ - public: - void testBracket(); - void testBisection(); - void testBrent(); - void testSublevelSet(); +class CSolversTest : public CppUnit::TestFixture { +public: + void testBracket(); + void testBisection(); + void testBrent(); + void testSublevelSet(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CSolversTest_h diff --git a/lib/maths/unittest/CSplineTest.cc b/lib/maths/unittest/CSplineTest.cc index f3a1ea908e..7f46c5b438 100644 --- a/lib/maths/unittest/CSplineTest.cc +++ b/lib/maths/unittest/CSplineTest.cc @@ -9,8 +9,8 @@ #include #include -#include #include +#include #include #include @@ -19,44 +19,36 @@ using namespace ml; -namespace -{ +namespace { using TDoubleVec = std::vector; using TSizeVec = std::vector; -class CSplineFunctor -{ - public: - CSplineFunctor(const maths::CSpline<> &spline) : - m_Spline(&spline) - { - } +class CSplineFunctor { +public: + CSplineFunctor(const maths::CSpline<>& spline) : m_Spline(&spline) {} - bool operator()(double x, double &fx) const - { - fx = m_Spline->value(x); - return true; - } + bool operator()(double x, double& fx) const { + fx = m_Spline->value(x); + return true; + } - private: - const maths::CSpline<> *m_Spline; +private: + const maths::CSpline<>* m_Spline; }; -std::string print(maths::CSplineTypes::EType type) -{ - switch (type) - { - case maths::CSplineTypes::E_Linear: return "linear"; - case maths::CSplineTypes::E_Cubic: return "cubic"; +std::string print(maths::CSplineTypes::EType type) { + switch (type) { + case maths::CSplineTypes::E_Linear: + return "linear"; + case maths::CSplineTypes::E_Cubic: + return "cubic"; } return std::string(); } - } -void CSplineTest::testNatural() -{ +void CSplineTest::testNatural() { LOG_DEBUG("+----------------------------+"); LOG_DEBUG("| CSplineTest::testNatural |"); LOG_DEBUG("+----------------------------+"); @@ -66,8 +58,8 @@ void CSplineTest::testNatural() // end points. { - double x_[] = { 0.0, 20.0, 21.0, 30.0, 56.0, 100.0, 102.0 }; - double y_[] = { 1.0, 5.0, 4.0, 13.0, 20.0, 12.0, 17.0 }; + double x_[] = {0.0, 20.0, 21.0, 30.0, 56.0, 100.0, 102.0}; + double y_[] = {1.0, 5.0, 4.0, 13.0, 20.0, 12.0, 17.0}; TDoubleVec x(boost::begin(x_), boost::end(x_)); TDoubleVec y(boost::begin(y_), boost::end(y_)); @@ -75,8 +67,7 @@ void CSplineTest::testNatural() maths::CSpline<> spline(maths::CSplineTypes::E_Cubic); spline.interpolate(x, y, maths::CSplineTypes::E_Natural); - for (std::size_t i = 0u; i < x.size(); ++i) - { + for (std::size_t i = 0u; i < x.size(); ++i) { double yy = spline.value(x[i]); LOG_DEBUG("f(x[" << i << "]) = " << yy); CPPUNIT_ASSERT_EQUAL(y[i], yy); @@ -90,22 +81,20 @@ void CSplineTest::testNatural() CPPUNIT_ASSERT(std::fabs(yp - yy) < 1e-2); } - const TDoubleVec &curvatures = spline.curvatures(); + const TDoubleVec& curvatures = spline.curvatures(); std::size_t n = curvatures.size(); - LOG_DEBUG("curvatures[0] = " << curvatures[0] - << ", curvatures[n] = " << curvatures[n-1]); + LOG_DEBUG("curvatures[0] = " << curvatures[0] << ", curvatures[n] = " << curvatures[n - 1]); CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, curvatures[0], 1e-10); - CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, curvatures[n-1], 1e-10); + CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, curvatures[n - 1], 1e-10); } { - double x_[] = { 0.0, 0.1, 0.3, 0.33, 0.5, 0.75, 0.8, 1.0 }; + double x_[] = {0.0, 0.1, 0.3, 0.33, 0.5, 0.75, 0.8, 1.0}; TDoubleVec x(boost::begin(x_), boost::end(x_)); TDoubleVec y; y.reserve(x.size()); - for (std::size_t i = 0u; i < x.size(); ++i) - { + for (std::size_t i = 0u; i < x.size(); ++i) { x[i] *= boost::math::double_constants::two_pi; y.push_back(std::sin(x[i])); } @@ -113,34 +102,29 @@ void CSplineTest::testNatural() maths::CSpline<> spline(maths::CSplineTypes::E_Cubic); spline.interpolate(x, y, maths::CSplineTypes::E_Natural); - for (std::size_t i = 0u; i < 21; ++i) - { - double xx = boost::math::double_constants::two_pi - * static_cast(i) / 20.0; + for (std::size_t i = 0u; i < 21; ++i) { + double xx = boost::math::double_constants::two_pi * static_cast(i) / 20.0; double yy = spline.value(xx); - LOG_DEBUG("spline(" << xx << ") = " << yy - << ", f(" << xx << ") = " << std::sin(xx)); + LOG_DEBUG("spline(" << xx << ") = " << yy << ", f(" << xx << ") = " << std::sin(xx)); CPPUNIT_ASSERT(std::fabs(std::sin(xx) - yy) < 0.02); } - const TDoubleVec &curvatures = spline.curvatures(); + const TDoubleVec& curvatures = spline.curvatures(); std::size_t n = curvatures.size(); - LOG_DEBUG("curvatures[0] = " << curvatures[0] - << ", curvatures[n] = " << curvatures[n-1]); + LOG_DEBUG("curvatures[0] = " << curvatures[0] << ", curvatures[n] = " << curvatures[n - 1]); CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, curvatures[0], 1e-10); - CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, curvatures[n-1], 1e-10); + CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, curvatures[n - 1], 1e-10); } } -void CSplineTest::testParabolicRunout() -{ +void CSplineTest::testParabolicRunout() { LOG_DEBUG("+------------------------------------+"); LOG_DEBUG("| CSplineTest::testParabolicRunout |"); LOG_DEBUG("+------------------------------------+"); { - double x_[] = { 0.0, 20.0, 21.0, 30.0, 56.0, 100.0, 102.0 }; - double y_[] = { 1.0, 5.0, 4.0, 13.0, 20.0, 12.0, 17.0 }; + double x_[] = {0.0, 20.0, 21.0, 30.0, 56.0, 100.0, 102.0}; + double y_[] = {1.0, 5.0, 4.0, 13.0, 20.0, 12.0, 17.0}; TDoubleVec x(boost::begin(x_), boost::end(x_)); TDoubleVec y(boost::begin(y_), boost::end(y_)); @@ -148,8 +132,7 @@ void CSplineTest::testParabolicRunout() maths::CSpline<> spline(maths::CSplineTypes::E_Cubic); spline.interpolate(x, y, maths::CSplineTypes::E_ParabolicRunout); - for (std::size_t i = 0u; i < x.size(); ++i) - { + for (std::size_t i = 0u; i < x.size(); ++i) { double yy = spline.value(x[i]); LOG_DEBUG("f(x[" << i << "]) = " << yy); CPPUNIT_ASSERT_EQUAL(y[i], yy); @@ -163,71 +146,56 @@ void CSplineTest::testParabolicRunout() CPPUNIT_ASSERT(std::fabs(yp - yy) < 1e-2); } - const TDoubleVec &curvatures = spline.curvatures(); + const TDoubleVec& curvatures = spline.curvatures(); std::size_t n = curvatures.size(); - LOG_DEBUG("curvatures[0] = " << curvatures[0] - << ", curvatures[1] = " << curvatures[1]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(curvatures[0], - curvatures[1], 1e-10); - LOG_DEBUG("curvatures[n-1] = " << curvatures[n-2] - << ", curvatures[n] = " << curvatures[n-1]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(curvatures[n-2], - curvatures[n-1], 1e-10); + LOG_DEBUG("curvatures[0] = " << curvatures[0] << ", curvatures[1] = " << curvatures[1]); + CPPUNIT_ASSERT_DOUBLES_EQUAL(curvatures[0], curvatures[1], 1e-10); + LOG_DEBUG("curvatures[n-1] = " << curvatures[n - 2] << ", curvatures[n] = " << curvatures[n - 1]); + CPPUNIT_ASSERT_DOUBLES_EQUAL(curvatures[n - 2], curvatures[n - 1], 1e-10); } { - double x_[] = { 0.0, 0.1, 0.3, 0.33, 0.5, 0.75, 0.8, 1.0 }; + double x_[] = {0.0, 0.1, 0.3, 0.33, 0.5, 0.75, 0.8, 1.0}; TDoubleVec x(boost::begin(x_), boost::end(x_)); TDoubleVec y; y.reserve(x.size()); - for (std::size_t i = 0u; i < x.size(); ++i) - { + for (std::size_t i = 0u; i < x.size(); ++i) { x[i] *= boost::math::double_constants::two_pi; y.push_back(std::sin(x[i])); } - maths::CSpline<> spline(maths::CSplineTypes::E_Cubic); spline.interpolate(x, y, maths::CSplineTypes::E_ParabolicRunout); - for (std::size_t i = 0u; i < 21; ++i) - { - double xx = boost::math::double_constants::two_pi - * static_cast(i) / 20.0; + for (std::size_t i = 0u; i < 21; ++i) { + double xx = boost::math::double_constants::two_pi * static_cast(i) / 20.0; double yy = spline.value(xx); - LOG_DEBUG("spline(" << xx << ") = " << yy - << ", f(" << xx << ") = " << std::sin(xx)); + LOG_DEBUG("spline(" << xx << ") = " << yy << ", f(" << xx << ") = " << std::sin(xx)); CPPUNIT_ASSERT(std::fabs(std::sin(xx) - yy) < 0.04); } - const TDoubleVec &curvatures = spline.curvatures(); + const TDoubleVec& curvatures = spline.curvatures(); std::size_t n = curvatures.size(); - LOG_DEBUG("curvatures[0] = " << curvatures[0] - << ", curvatures[1] = " << curvatures[1]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(curvatures[0], - curvatures[1], 1e-10); - LOG_DEBUG("curvatures[n-1] = " << curvatures[n-2] - << ", curvatures[n] = " << curvatures[n-1]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(curvatures[n-2], - curvatures[n-1], 1e-10); + LOG_DEBUG("curvatures[0] = " << curvatures[0] << ", curvatures[1] = " << curvatures[1]); + CPPUNIT_ASSERT_DOUBLES_EQUAL(curvatures[0], curvatures[1], 1e-10); + LOG_DEBUG("curvatures[n-1] = " << curvatures[n - 2] << ", curvatures[n] = " << curvatures[n - 1]); + CPPUNIT_ASSERT_DOUBLES_EQUAL(curvatures[n - 2], curvatures[n - 1], 1e-10); } } -void CSplineTest::testPeriodic() -{ +void CSplineTest::testPeriodic() { LOG_DEBUG("+-----------------------------+"); LOG_DEBUG("| CSplineTest::testPeriodic |"); LOG_DEBUG("+-----------------------------+"); { - double x_[] = { 0.0, 0.1, 0.3, 0.33, 0.5, 0.75, 0.8, 1.0 }; + double x_[] = {0.0, 0.1, 0.3, 0.33, 0.5, 0.75, 0.8, 1.0}; TDoubleVec x(boost::begin(x_), boost::end(x_)); TDoubleVec y; y.reserve(x.size()); - for (std::size_t i = 0u; i < x.size(); ++i) - { + for (std::size_t i = 0u; i < x.size(); ++i) { x[i] *= boost::math::double_constants::two_pi; y.push_back(std::cos(x[i])); } @@ -235,30 +203,20 @@ void CSplineTest::testPeriodic() maths::CSpline<> spline(maths::CSplineTypes::E_Cubic); spline.interpolate(x, y, maths::CSplineTypes::E_Periodic); - for (std::size_t i = 0u; i < 21; ++i) - { - double xx = boost::math::double_constants::two_pi - * static_cast(i) / 20.0; + for (std::size_t i = 0u; i < 21; ++i) { + double xx = boost::math::double_constants::two_pi * static_cast(i) / 20.0; double yy = spline.value(xx); - LOG_DEBUG("spline(" << xx << ") = " << yy - << ", f(" << xx << ") = " << std::cos(xx)); + LOG_DEBUG("spline(" << xx << ") = " << yy << ", f(" << xx << ") = " << std::cos(xx)); CPPUNIT_ASSERT(std::fabs(std::cos(xx) - yy) < 0.02); } } { TDoubleVec x; - for (std::size_t i = 0u; i < 40; ++i) - { + for (std::size_t i = 0u; i < 40; ++i) { x.push_back(static_cast(i) * 5.0); } - double y_[] = - { - 10.0, 7.0, 5.0, 3.0, 1.5, - 3.5, 7.5, 15.5, 15.6, 15.5, - 15.0, 14.0, 13.0, 12.0, 10.0, - 8.0, 4.0, 4.1, 10.0, 10.0 - }; + double y_[] = {10.0, 7.0, 5.0, 3.0, 1.5, 3.5, 7.5, 15.5, 15.6, 15.5, 15.0, 14.0, 13.0, 12.0, 10.0, 8.0, 4.0, 4.1, 10.0, 10.0}; TDoubleVec y(boost::begin(y_), boost::end(y_)); y.insert(y.end(), boost::begin(y_), boost::end(y_)); @@ -268,12 +226,10 @@ void CSplineTest::testPeriodic() maths::CSpline<> spline(maths::CSplineTypes::E_Cubic); spline.interpolate(x, y, maths::CSplineTypes::E_Periodic); - for (std::size_t i = 0; i < 200; ++i) - { + for (std::size_t i = 0; i < 200; ++i) { double xx = static_cast(i); double yy = spline.value(static_cast(i)); - if (i % 5 == 0) - { + if (i % 5 == 0) { LOG_DEBUG("t = " << xx << ", y = " << y[i / 5] << ", ySpline= " << yy); CPPUNIT_ASSERT_DOUBLES_EQUAL(y[i / 5], yy, 1e-6); } @@ -281,8 +237,7 @@ void CSplineTest::testPeriodic() } } -void CSplineTest::testMean() -{ +void CSplineTest::testMean() { LOG_DEBUG("+-------------------------+"); LOG_DEBUG("| CSplineTest::testMean |"); LOG_DEBUG("+-------------------------+"); @@ -291,21 +246,16 @@ void CSplineTest::testMean() // (numerical) integral and the expected mean of the cosine // over a whole number of periods. - maths::CSplineTypes::EType types[] = - { - maths::CSplineTypes::E_Linear, - maths::CSplineTypes::E_Cubic - }; + maths::CSplineTypes::EType types[] = {maths::CSplineTypes::E_Linear, maths::CSplineTypes::E_Cubic}; { - double x_[] = { 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0 }; + double x_[] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0}; TDoubleVec x(boost::begin(x_), boost::end(x_)); std::size_t n = x.size() - 1; - double y_[] = { 0.0, 3.0, 4.0, 1.0, 6.0, 6.0, 5.0, 2.0, 2.5, 3.0, 5.0 }; + double y_[] = {0.0, 3.0, 4.0, 1.0, 6.0, 6.0, 5.0, 2.0, 2.5, 3.0, 5.0}; TDoubleVec y(boost::begin(y_), boost::end(y_)); - for (std::size_t t = 0u; t < boost::size(types); ++t) - { + for (std::size_t t = 0u; t < boost::size(types); ++t) { LOG_DEBUG("*** Interpolation '" << print(types[t]) << "' ***"); maths::CSpline<> spline(types[t]); @@ -313,9 +263,8 @@ void CSplineTest::testMean() double expectedMean = 0.0; CSplineFunctor f(spline); - for (std::size_t i = 1; i < x.size(); ++i) - { - double a = x[i-1]; + for (std::size_t i = 1; i < x.size(); ++i) { + double a = x[i - 1]; double b = x[i]; double integral; maths::CIntegration::gaussLegendre(f, a, b, integral); @@ -323,18 +272,14 @@ void CSplineTest::testMean() } expectedMean /= (x[n] - x[0]); - LOG_DEBUG("expectedMean = " << expectedMean - << ", mean = " << spline.mean()); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMean, - spline.mean(), - std::numeric_limits::epsilon() * expectedMean); + LOG_DEBUG("expectedMean = " << expectedMean << ", mean = " << spline.mean()); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMean, spline.mean(), std::numeric_limits::epsilon() * expectedMean); } } { test::CRandomNumbers rng; - for (std::size_t i = 0u; i < 100; ++i) - { + for (std::size_t i = 0u; i < 100; ++i) { TSizeVec n; rng.generateUniformSamples(10, 20, 1, n); TDoubleVec x; @@ -346,8 +291,7 @@ void CSplineTest::testMean() LOG_DEBUG("n = " << n[0]); LOG_DEBUG("x = " << core::CContainerPrinter::print(x)); LOG_DEBUG("y = " << core::CContainerPrinter::print(y)); - for (std::size_t t = 0; t < boost::size(types); ++t) - { + for (std::size_t t = 0; t < boost::size(types); ++t) { LOG_DEBUG("*** Interpolation '" << print(types[t]) << "' ***"); maths::CSpline<> spline(types[t]); @@ -355,9 +299,8 @@ void CSplineTest::testMean() double expectedMean = 0.0; CSplineFunctor f(spline); - for (std::size_t j = 1; j < x.size(); ++j) - { - double a = x[j-1]; + for (std::size_t j = 1; j < x.size(); ++j) { + double a = x[j - 1]; double b = x[j]; double integral; maths::CIntegration::gaussLegendre(f, a, b, integral); @@ -365,8 +308,7 @@ void CSplineTest::testMean() } expectedMean /= (x[n[0] - 1] - x[0]); - LOG_DEBUG("expectedMean = " << expectedMean - << ", mean = " << spline.mean()); + LOG_DEBUG("expectedMean = " << expectedMean << ", mean = " << spline.mean()); CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMean, spline.mean(), 1e-4); } @@ -376,15 +318,12 @@ void CSplineTest::testMean() { TDoubleVec x; TDoubleVec y; - for (std::size_t i = 0u; i < 21; ++i) - { + for (std::size_t i = 0u; i < 21; ++i) { x.push_back(static_cast(20 * i)); - y.push_back(std::cos(boost::math::double_constants::two_pi - * static_cast(i) / 10.0)); + y.push_back(std::cos(boost::math::double_constants::two_pi * static_cast(i) / 10.0)); } - for (std::size_t t = 0u; t < boost::size(types); ++t) - { + for (std::size_t t = 0u; t < boost::size(types); ++t) { LOG_DEBUG("*** Interpolation '" << print(types[t]) << "' ***"); maths::CSpline<> spline(types[t]); @@ -396,27 +335,21 @@ void CSplineTest::testMean() } } -void CSplineTest::testIllposed() -{ +void CSplineTest::testIllposed() { LOG_DEBUG("+-----------------------------+"); LOG_DEBUG("| CSplineTest::testIllposed |"); LOG_DEBUG("+-----------------------------+"); // Test a case where some of the knot points are colocated. - double x_[] = { 0.0, 0.0, 10.0, 10.0, 15.0, 15.5, 20.0, 20.0, 20.0, 28.0, 30.0, 30.0 }; + double x_[] = {0.0, 0.0, 10.0, 10.0, 15.0, 15.5, 20.0, 20.0, 20.0, 28.0, 30.0, 30.0}; TDoubleVec x(boost::begin(x_), boost::end(x_)); - double y_[] = { 0.0, 0.0, 1.9, 2.1, 3.0, 3.1, 4.0, 4.0, 4.0, 5.6, 5.9, 6.1 }; + double y_[] = {0.0, 0.0, 1.9, 2.1, 3.0, 3.1, 4.0, 4.0, 4.0, 5.6, 5.9, 6.1}; TDoubleVec y(boost::begin(y_), boost::end(y_)); - maths::CSplineTypes::EType types[] = - { - maths::CSplineTypes::E_Linear, - maths::CSplineTypes::E_Cubic - }; + maths::CSplineTypes::EType types[] = {maths::CSplineTypes::E_Linear, maths::CSplineTypes::E_Cubic}; - for (std::size_t t = 0u; t < boost::size(types); ++t) - { + for (std::size_t t = 0u; t < boost::size(types); ++t) { LOG_DEBUG("*** Interpolation '" << print(types[t]) << "' ***"); maths::CSpline<> spline(types[t]); @@ -426,26 +359,19 @@ void CSplineTest::testIllposed() // be zero (to working precision). TDoubleVec curvatures = spline.curvatures(); - LOG_DEBUG("curvatures = " - << core::CContainerPrinter::print(curvatures)); - for (std::size_t i = 0u; i < curvatures.size(); ++i) - { + LOG_DEBUG("curvatures = " << core::CContainerPrinter::print(curvatures)); + for (std::size_t i = 0u; i < curvatures.size(); ++i) { CPPUNIT_ASSERT(std::fabs(curvatures[i]) < 2e-7); } - for (std::size_t i = 0u; i <= 30; ++i) - { - LOG_DEBUG("expected = " << 0.2 * static_cast(i) - << ", actual = " << spline.value(static_cast(i))); - CPPUNIT_ASSERT_DOUBLES_EQUAL(0.2 * static_cast(i), - spline.value(static_cast(i)), - 5e-7); + for (std::size_t i = 0u; i <= 30; ++i) { + LOG_DEBUG("expected = " << 0.2 * static_cast(i) << ", actual = " << spline.value(static_cast(i))); + CPPUNIT_ASSERT_DOUBLES_EQUAL(0.2 * static_cast(i), spline.value(static_cast(i)), 5e-7); } } } -void CSplineTest::testSlope() -{ +void CSplineTest::testSlope() { LOG_DEBUG("+--------------------------+"); LOG_DEBUG("| CSplineTest::testSlope |"); LOG_DEBUG("+--------------------------+"); @@ -453,40 +379,31 @@ void CSplineTest::testSlope() // Test that the slope and absolute slope agree with the // numerical derivatives of the value. - maths::CSplineTypes::EType types[] = - { - maths::CSplineTypes::E_Linear, - maths::CSplineTypes::E_Cubic - }; + maths::CSplineTypes::EType types[] = {maths::CSplineTypes::E_Linear, maths::CSplineTypes::E_Cubic}; double eps = 1e-4; { - double x_[] = { 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0 }; + double x_[] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0}; TDoubleVec x(boost::begin(x_), boost::end(x_)); std::size_t n = x.size() - 1; - double y_[] = { 0.0, 3.0, 4.0, 1.0, 6.0, 6.0, 5.0, 2.0, 2.5, 3.0, 5.0 }; + double y_[] = {0.0, 3.0, 4.0, 1.0, 6.0, 6.0, 5.0, 2.0, 2.5, 3.0, 5.0}; TDoubleVec y(boost::begin(y_), boost::end(y_)); double range = x[n] - x[0]; - for (std::size_t t = 0u; t < boost::size(types); ++t) - { + for (std::size_t t = 0u; t < boost::size(types); ++t) { LOG_DEBUG("*** Interpolation '" << print(types[t]) << "' ***"); maths::CSpline<> spline(types[t]); spline.interpolate(x, y, maths::CSplineTypes::E_Natural); CSplineFunctor f(spline); - for (std::size_t i = 1u; i < 20; ++i) - { + for (std::size_t i = 1u; i < 20; ++i) { double xi = x[0] + range * static_cast(i) / 20.0 + eps; - double xiPlusEps = xi + eps; + double xiPlusEps = xi + eps; double xiMinusEps = xi - eps; double slope = spline.slope(xi); - double numericalSlope = ( spline.value(xiPlusEps) - - spline.value(xiMinusEps) ) / (2 * eps); - LOG_DEBUG("x = " << xi - << ", slope = " << slope - << ", numerical slope = " << numericalSlope); + double numericalSlope = (spline.value(xiPlusEps) - spline.value(xiMinusEps)) / (2 * eps); + LOG_DEBUG("x = " << xi << ", slope = " << slope << ", numerical slope = " << numericalSlope); CPPUNIT_ASSERT_DOUBLES_EQUAL(numericalSlope, slope, 6.0 * eps * eps); } } @@ -494,8 +411,7 @@ void CSplineTest::testSlope() { test::CRandomNumbers rng; - for (std::size_t i = 0u; i < 100; ++i) - { + for (std::size_t i = 0u; i < 100; ++i) { TSizeVec n; rng.generateUniformSamples(10, 20, 1, n); TDoubleVec x; @@ -507,10 +423,8 @@ void CSplineTest::testSlope() LOG_DEBUG("n = " << n[0]); LOG_DEBUG("x = " << core::CContainerPrinter::print(x)); LOG_DEBUG("y = " << core::CContainerPrinter::print(y)); - for (std::size_t t = 0; t < boost::size(types); ++t) - { - if (i % 10 == 0) - { + for (std::size_t t = 0; t < boost::size(types); ++t) { + if (i % 10 == 0) { LOG_DEBUG("*** Interpolation '" << print(types[t]) << "' ***"); } @@ -518,23 +432,16 @@ void CSplineTest::testSlope() spline.interpolate(x, y, maths::CSplineTypes::E_Natural); CSplineFunctor f(spline); - for (std::size_t j = 1; j < n[0]; ++j) - { - double xj = (x[j] + x[j-1]) / 2.0; - double xiPlusEps = xj + eps; + for (std::size_t j = 1; j < n[0]; ++j) { + double xj = (x[j] + x[j - 1]) / 2.0; + double xiPlusEps = xj + eps; double xiMinusEps = xj - eps; double slope = spline.slope(xj); - double numericalSlope = ( spline.value(xiPlusEps) - - spline.value(xiMinusEps) ) / (2 * eps); - if (i % 10 == 0) - { - LOG_DEBUG("x = " << xj - << ", slope = " << slope - << ", numerical slope = " << numericalSlope); + double numericalSlope = (spline.value(xiPlusEps) - spline.value(xiMinusEps)) / (2 * eps); + if (i % 10 == 0) { + LOG_DEBUG("x = " << xj << ", slope = " << slope << ", numerical slope = " << numericalSlope); } - CPPUNIT_ASSERT_DOUBLES_EQUAL(numericalSlope, - slope, - 1e-3 * std::fabs(numericalSlope)); + CPPUNIT_ASSERT_DOUBLES_EQUAL(numericalSlope, slope, 1e-3 * std::fabs(numericalSlope)); } } } @@ -543,40 +450,32 @@ void CSplineTest::testSlope() { TDoubleVec x; TDoubleVec y; - for (std::size_t i = 0u; i < 21; ++i) - { + for (std::size_t i = 0u; i < 21; ++i) { x.push_back(static_cast(20 * i)); - y.push_back(std::cos(boost::math::double_constants::two_pi - * static_cast(i) / 10.0)); + y.push_back(std::cos(boost::math::double_constants::two_pi * static_cast(i) / 10.0)); } - double range = x[x.size()-1] - x[0]; + double range = x[x.size() - 1] - x[0]; - for (std::size_t t = 0u; t < boost::size(types); ++t) - { + for (std::size_t t = 0u; t < boost::size(types); ++t) { LOG_DEBUG("*** Interpolation '" << print(types[t]) << "' ***"); maths::CSpline<> spline(types[t]); spline.interpolate(x, y, maths::CSplineTypes::E_Periodic); - for (std::size_t i = 1u; i < 20; ++i) - { + for (std::size_t i = 1u; i < 20; ++i) { double xi = x[0] + range * static_cast(i) / 20.0 + eps; - double xiPlusEps = xi + eps; + double xiPlusEps = xi + eps; double xiMinusEps = xi - eps; double slope = spline.slope(xi); - double numericalSlope = ( spline.value(xiPlusEps) - - spline.value(xiMinusEps) ) / (2 * eps); - LOG_DEBUG("x = " << xi - << ", slope = " << slope - << ", numerical slope = " << numericalSlope); + double numericalSlope = (spline.value(xiPlusEps) - spline.value(xiMinusEps)) / (2 * eps); + LOG_DEBUG("x = " << xi << ", slope = " << slope << ", numerical slope = " << numericalSlope); CPPUNIT_ASSERT_DOUBLES_EQUAL(numericalSlope, slope, eps * eps); } } } } -void CSplineTest::testSplineReference() -{ +void CSplineTest::testSplineReference() { LOG_DEBUG("+------------------------------------+"); LOG_DEBUG("| CSplineTest::testSplineReference |"); LOG_DEBUG("+------------------------------------+"); @@ -586,13 +485,12 @@ void CSplineTest::testSplineReference() using TDoubleVecRef = boost::reference_wrapper; using TSplineRef = maths::CSpline; - double x_[] = { 0.0, 0.1, 0.3, 0.33, 0.5, 0.75, 0.8, 1.0 }; + double x_[] = {0.0, 0.1, 0.3, 0.33, 0.5, 0.75, 0.8, 1.0}; TDoubleVec x(boost::begin(x_), boost::end(x_)); TDoubleVec y; y.reserve(x.size()); - for (std::size_t i = 0u; i < x.size(); ++i) - { + for (std::size_t i = 0u; i < x.size(); ++i) { x[i] *= boost::math::double_constants::two_pi; y.push_back(std::sin(x[i])); } @@ -603,70 +501,43 @@ void CSplineTest::testSplineReference() TFloatVec knotsStorage; TFloatVec valuesStorage; TDoubleVec curvaturesStorage; - TSplineRef splineRef(maths::CSplineTypes::E_Cubic, - boost::ref(knotsStorage), - boost::ref(valuesStorage), - boost::ref(curvaturesStorage)); + TSplineRef splineRef(maths::CSplineTypes::E_Cubic, boost::ref(knotsStorage), boost::ref(valuesStorage), boost::ref(curvaturesStorage)); splineRef.interpolate(x, y, maths::CSplineTypes::E_Natural); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(spline.knots()), - core::CContainerPrinter::print(splineRef.knots())); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(spline.values()), - core::CContainerPrinter::print(splineRef.values())); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(spline.curvatures()), - core::CContainerPrinter::print(splineRef.curvatures())); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(spline.knots()), core::CContainerPrinter::print(splineRef.knots())); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(spline.values()), core::CContainerPrinter::print(splineRef.values())); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(spline.curvatures()), core::CContainerPrinter::print(splineRef.curvatures())); - for (std::size_t i = 0u; i < 21; ++i) - { - double xx = boost::math::double_constants::two_pi - * static_cast(i) / 20.0; + for (std::size_t i = 0u; i < 21; ++i) { + double xx = boost::math::double_constants::two_pi * static_cast(i) / 20.0; - LOG_DEBUG("spline.value(" << xx << ") = " << spline.value(xx) - << ", splineRef.value(" << xx << ") = " << splineRef.value(xx)); + LOG_DEBUG("spline.value(" << xx << ") = " << spline.value(xx) << ", splineRef.value(" << xx << ") = " << splineRef.value(xx)); CPPUNIT_ASSERT_EQUAL(spline.value(xx), splineRef.value(xx)); - LOG_DEBUG("spline.slope(" << xx << ") = " << spline.slope(xx) - << ", splineRef.slope(" << xx << ") = " << splineRef.slope(xx)); + LOG_DEBUG("spline.slope(" << xx << ") = " << spline.slope(xx) << ", splineRef.slope(" << xx << ") = " << splineRef.slope(xx)); CPPUNIT_ASSERT_EQUAL(spline.slope(xx), splineRef.slope(xx)); } - LOG_DEBUG("spline.mean() = " << spline.mean() - << ", splineRef.mean() = " << splineRef.mean()); + LOG_DEBUG("spline.mean() = " << spline.mean() << ", splineRef.mean() = " << splineRef.mean()); CPPUNIT_ASSERT_EQUAL(spline.mean(), splineRef.mean()); - LOG_DEBUG("spline.absSlope() = " << spline.absSlope() - << ", splineRef.absSlope() = " << splineRef.absSlope()); + LOG_DEBUG("spline.absSlope() = " << spline.absSlope() << ", splineRef.absSlope() = " << splineRef.absSlope()); CPPUNIT_ASSERT_EQUAL(spline.absSlope(), splineRef.absSlope()); LOG_DEBUG("splineRef.memoryUsage = " << splineRef.memoryUsage()); CPPUNIT_ASSERT_EQUAL(std::size_t(0), splineRef.memoryUsage()); } -CppUnit::Test *CSplineTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CSplineTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSplineTest::testNatural", - &CSplineTest::testNatural) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSplineTest::testParabolicRunout", - &CSplineTest::testParabolicRunout) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSplineTest::testPeriodic", - &CSplineTest::testPeriodic) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSplineTest::testMean", - &CSplineTest::testMean) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSplineTest::testIllposed", - &CSplineTest::testIllposed) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSplineTest::testSlope", - &CSplineTest::testSlope) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSplineTest::testSplineReference", - &CSplineTest::testSplineReference) ); +CppUnit::Test* CSplineTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CSplineTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CSplineTest::testNatural", &CSplineTest::testNatural)); + suiteOfTests->addTest(new CppUnit::TestCaller("CSplineTest::testParabolicRunout", &CSplineTest::testParabolicRunout)); + suiteOfTests->addTest(new CppUnit::TestCaller("CSplineTest::testPeriodic", &CSplineTest::testPeriodic)); + suiteOfTests->addTest(new CppUnit::TestCaller("CSplineTest::testMean", &CSplineTest::testMean)); + suiteOfTests->addTest(new CppUnit::TestCaller("CSplineTest::testIllposed", &CSplineTest::testIllposed)); + suiteOfTests->addTest(new CppUnit::TestCaller("CSplineTest::testSlope", &CSplineTest::testSlope)); + suiteOfTests->addTest(new CppUnit::TestCaller("CSplineTest::testSplineReference", &CSplineTest::testSplineReference)); return suiteOfTests; } diff --git a/lib/maths/unittest/CSplineTest.h b/lib/maths/unittest/CSplineTest.h index 43026bef67..a34a40c729 100644 --- a/lib/maths/unittest/CSplineTest.h +++ b/lib/maths/unittest/CSplineTest.h @@ -9,18 +9,17 @@ #include -class CSplineTest : public CppUnit::TestFixture -{ - public: - void testNatural(); - void testParabolicRunout(); - void testPeriodic(); - void testMean(); - void testIllposed(); - void testSlope(); - void testSplineReference(); +class CSplineTest : public CppUnit::TestFixture { +public: + void testNatural(); + void testParabolicRunout(); + void testPeriodic(); + void testMean(); + void testIllposed(); + void testSlope(); + void testSplineReference(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CSpline_h diff --git a/lib/maths/unittest/CStatisticalTestsTest.cc b/lib/maths/unittest/CStatisticalTestsTest.cc index 9d1ff90029..1f10afa574 100644 --- a/lib/maths/unittest/CStatisticalTestsTest.cc +++ b/lib/maths/unittest/CStatisticalTestsTest.cc @@ -6,11 +6,11 @@ #include "CStatisticalTestsTest.h" -#include #include #include #include #include +#include #include #include @@ -31,8 +31,7 @@ using namespace ml; using TDoubleVec = std::vector; -void CStatisticalTestsTest::testCramerVonMises() -{ +void CStatisticalTestsTest::testCramerVonMises() { LOG_DEBUG("+---------------------------------------------+"); LOG_DEBUG("| CStatisticalTestsTest::testCramerVonMises |"); LOG_DEBUG("+---------------------------------------------+"); @@ -41,14 +40,13 @@ void CStatisticalTestsTest::testCramerVonMises() // are correct if the random variable and the distribution // function are perfectly matched. - const std::size_t n[] = { 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20, 30, 40, 50, 100, 200, 500 }; + const std::size_t n[] = {2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20, 30, 40, 50, 100, 200, 500}; test::CRandomNumbers rng; double averageMeanError = 0.0; - for (std::size_t i = 0u; i < boost::size(n); ++i) - { + for (std::size_t i = 0u; i < boost::size(n); ++i) { LOG_DEBUG("*** n = " << n[i] << " ***"); { LOG_DEBUG("N(" << 5.0 << "," << std::sqrt(2.0) << ")"); @@ -57,11 +55,9 @@ void CStatisticalTestsTest::testCramerVonMises() boost::math::normal_distribution<> normal(5.0, std::sqrt(2.0)); TDoubleVec p; - for (std::size_t j = 0u; j < samples.size()/n[i]; ++j) - { - maths::CStatisticalTests::CCramerVonMises cvm(n[i]-1); - for (std::size_t k = n[i]*j; k < n[i]*(j+1); ++k) - { + for (std::size_t j = 0u; j < samples.size() / n[i]; ++j) { + maths::CStatisticalTests::CCramerVonMises cvm(n[i] - 1); + for (std::size_t k = n[i] * j; k < n[i] * (j + 1); ++k) { cvm.addF(boost::math::cdf(normal, samples[k])); } p.push_back(cvm.pValue()); @@ -69,17 +65,11 @@ void CStatisticalTestsTest::testCramerVonMises() std::sort(p.begin(), p.end()); double meanError = 0.0; - for (std::size_t j = 0; j < 21; ++j) - { + for (std::size_t j = 0; j < 21; ++j) { double percentile = static_cast(j) / 20.0; - double pp = static_cast(std::lower_bound(p.begin(), - p.end(), - percentile) - - p.begin()) - / static_cast(p.size()); - LOG_DEBUG("percentile = " << percentile - << ", p value percentile = " << pp - << ", error = " << std::fabs(pp - percentile)); + double pp = + static_cast(std::lower_bound(p.begin(), p.end(), percentile) - p.begin()) / static_cast(p.size()); + LOG_DEBUG("percentile = " << percentile << ", p value percentile = " << pp << ", error = " << std::fabs(pp - percentile)); meanError += std::fabs(pp - percentile); CPPUNIT_ASSERT(std::fabs(pp - percentile) < 0.055); } @@ -95,11 +85,9 @@ void CStatisticalTestsTest::testCramerVonMises() boost::math::lognormal_distribution<> lognormal(2.0, 1.0); TDoubleVec p; - for (std::size_t j = 0u; j < samples.size()/n[i]; ++j) - { - maths::CStatisticalTests::CCramerVonMises cvm(n[i]-1); - for (std::size_t k = n[i]*j; k < n[i]*(j+1); ++k) - { + for (std::size_t j = 0u; j < samples.size() / n[i]; ++j) { + maths::CStatisticalTests::CCramerVonMises cvm(n[i] - 1); + for (std::size_t k = n[i] * j; k < n[i] * (j + 1); ++k) { cvm.addF(boost::math::cdf(lognormal, samples[k])); } p.push_back(cvm.pValue()); @@ -107,17 +95,11 @@ void CStatisticalTestsTest::testCramerVonMises() std::sort(p.begin(), p.end()); double meanError = 0.0; - for (std::size_t j = 0; j < 21; ++j) - { + for (std::size_t j = 0; j < 21; ++j) { double percentile = static_cast(j) / 20.0; - double pp = static_cast(std::lower_bound(p.begin(), - p.end(), - percentile) - - p.begin()) - / static_cast(p.size()); - LOG_DEBUG("percentile = " << percentile - << ", p value percentile = " << pp - << ", error = " << std::fabs(pp - percentile)); + double pp = + static_cast(std::lower_bound(p.begin(), p.end(), percentile) - p.begin()) / static_cast(p.size()); + LOG_DEBUG("percentile = " << percentile << ", p value percentile = " << pp << ", error = " << std::fabs(pp - percentile)); meanError += std::fabs(pp - percentile); CPPUNIT_ASSERT(std::fabs(pp - percentile) < 0.055); } @@ -133,8 +115,7 @@ void CStatisticalTestsTest::testCramerVonMises() CPPUNIT_ASSERT(averageMeanError < 0.011); } -void CStatisticalTestsTest::testPersist() -{ +void CStatisticalTestsTest::testPersist() { LOG_DEBUG("+--------------------------------------+"); LOG_DEBUG("| CStatisticalTestsTest::testPersist |"); LOG_DEBUG("+--------------------------------------+"); @@ -150,8 +131,7 @@ void CStatisticalTestsTest::testPersist() maths::CStatisticalTests::CCramerVonMises origCvm(9); TDoubleVec p; - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { origCvm.addF(boost::math::cdf(normal, samples[i])); } @@ -170,8 +150,7 @@ void CStatisticalTestsTest::testPersist() core::CRapidXmlStateRestoreTraverser traverser(parser); maths::CStatisticalTests::CCramerVonMises restoredCvm(traverser); - CPPUNIT_ASSERT_EQUAL(origCvm.checksum(), - restoredCvm.checksum()); + CPPUNIT_ASSERT_EQUAL(origCvm.checksum(), restoredCvm.checksum()); std::string newXml; { @@ -183,16 +162,13 @@ void CStatisticalTestsTest::testPersist() } } -CppUnit::Test *CStatisticalTestsTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CStatisticalTestsTest"); +CppUnit::Test* CStatisticalTestsTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CStatisticalTestsTest"); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CStatisticalTestsTest::testCramerVonMises", - &CStatisticalTestsTest::testCramerVonMises) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CStatisticalTestsTest::testPersist", - &CStatisticalTestsTest::testPersist) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CStatisticalTestsTest::testCramerVonMises", + &CStatisticalTestsTest::testCramerVonMises)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CStatisticalTestsTest::testPersist", &CStatisticalTestsTest::testPersist)); return suiteOfTests; } diff --git a/lib/maths/unittest/CStatisticalTestsTest.h b/lib/maths/unittest/CStatisticalTestsTest.h index f09d4c5f61..59cd83eb5d 100644 --- a/lib/maths/unittest/CStatisticalTestsTest.h +++ b/lib/maths/unittest/CStatisticalTestsTest.h @@ -9,13 +9,12 @@ #include -class CStatisticalTestsTest : public CppUnit::TestFixture -{ - public: - void testCramerVonMises(); - void testPersist(); +class CStatisticalTestsTest : public CppUnit::TestFixture { +public: + void testCramerVonMises(); + void testPersist(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CStatisticalTestsTest_h diff --git a/lib/maths/unittest/CTimeSeriesChangeDetectorTest.cc b/lib/maths/unittest/CTimeSeriesChangeDetectorTest.cc index 93c852be9b..9bc3b4441e 100644 --- a/lib/maths/unittest/CTimeSeriesChangeDetectorTest.cc +++ b/lib/maths/unittest/CTimeSeriesChangeDetectorTest.cc @@ -7,10 +7,10 @@ #include "CTimeSeriesChangeDetectorTest.h" #include -#include #include #include #include +#include #include #include @@ -34,8 +34,7 @@ using namespace ml; -namespace -{ +namespace { using TDoubleVec = std::vector; using TDouble2Vec = core::CSmallVector; @@ -48,14 +47,11 @@ using TPriorPtrVec = std::vector; core_t::TTime BUCKET_LENGTH{1800}; const double DECAY_RATE{0.0002}; -TPriorPtr makeResidualModel() -{ - maths::CGammaRateConjugate gamma{ - maths::CGammaRateConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.1, DECAY_RATE)}; +TPriorPtr makeResidualModel() { + maths::CGammaRateConjugate gamma{maths::CGammaRateConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.1, DECAY_RATE)}; maths::CLogNormalMeanPrecConjugate lognormal{ - maths::CLogNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, 1.0, DECAY_RATE)}; - maths::CNormalMeanPrecConjugate normal{ - maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, DECAY_RATE)}; + maths::CLogNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, 1.0, DECAY_RATE)}; + maths::CNormalMeanPrecConjugate normal{maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, DECAY_RATE)}; TPriorPtrVec mode; mode.reserve(3u); @@ -63,10 +59,8 @@ TPriorPtr makeResidualModel() mode.emplace_back(lognormal.clone()); mode.emplace_back(normal.clone()); maths::COneOfNPrior modePrior{mode, maths_t::E_ContinuousData, DECAY_RATE}; - maths::CXMeansOnline1d clusterer{maths_t::E_ContinuousData, - maths::CAvailableModeDistributions::ALL, - maths_t::E_ClustersFractionWeight, - DECAY_RATE, 0.05, 12.0, 1.0}; + maths::CXMeansOnline1d clusterer{ + maths_t::E_ContinuousData, maths::CAvailableModeDistributions::ALL, maths_t::E_ClustersFractionWeight, DECAY_RATE, 0.05, 12.0, 1.0}; maths::CMultimodalPrior multimodal{maths_t::E_ContinuousData, clusterer, modePrior, DECAY_RATE}; TPriorPtrVec models; @@ -77,11 +71,9 @@ TPriorPtr makeResidualModel() return TPriorPtr{maths::COneOfNPrior{mode, maths_t::E_ContinuousData, DECAY_RATE}.clone()}; } - } -void CTimeSeriesChangeDetectorTest::testNoChange() -{ +void CTimeSeriesChangeDetectorTest::testNoChange() { LOG_DEBUG("+-----------------------------------------------+"); LOG_DEBUG("| CTimeSeriesChangeDetectorTest::testNoChange |"); LOG_DEBUG("+-----------------------------------------------+"); @@ -92,47 +84,45 @@ void CTimeSeriesChangeDetectorTest::testNoChange() TDoubleVec scales{0.1, 1.0, 2.0, 3.0, 5.0, 8.0}; TDoubleVec samples; - for (std::size_t t = 0u; t < 100; ++t) - { - if (t % 10 == 0) - { + for (std::size_t t = 0u; t < 100; ++t) { + if (t % 10 == 0) { LOG_DEBUG(t << "%"); } - switch (t % 3) - { - case 0: rng.generateNormalSamples(10.0, variances[(t/3) % variances.size()], 1000, samples); break; - case 1: rng.generateLogNormalSamples(1.0, scales[(t/3) % scales.size()], 1000, samples); break; - case 2: rng.generateGammaSamples(10.0, 10.0 * scales[(t/3) % scales.size()], 1000, samples); break; + switch (t % 3) { + case 0: + rng.generateNormalSamples(10.0, variances[(t / 3) % variances.size()], 1000, samples); + break; + case 1: + rng.generateLogNormalSamples(1.0, scales[(t / 3) % scales.size()], 1000, samples); + break; + case 2: + rng.generateGammaSamples(10.0, 10.0 * scales[(t / 3) % scales.size()], 1000, samples); + break; } TDecompositionPtr trendModel(new maths::CTimeSeriesDecomposition{DECAY_RATE, BUCKET_LENGTH}); TPriorPtr residualModel(makeResidualModel()); - auto addSampleToModel = [&trendModel, &residualModel](core_t::TTime time, double x) - { - trendModel->addPoint(time, x); - double detrended{trendModel->detrend(time, x, 0.0)}; - residualModel->addSamples(maths::CConstantWeights::COUNT, {detrended}, {{1.0}}); - residualModel->propagateForwardsByTime(1.0); - }; + auto addSampleToModel = [&trendModel, &residualModel](core_t::TTime time, double x) { + trendModel->addPoint(time, x); + double detrended{trendModel->detrend(time, x, 0.0)}; + residualModel->addSamples(maths::CConstantWeights::COUNT, {detrended}, {{1.0}}); + residualModel->propagateForwardsByTime(1.0); + }; core_t::TTime time{0}; - for (std::size_t i = 0u; i < 950; ++i) - { + for (std::size_t i = 0u; i < 950; ++i) { addSampleToModel(time, samples[i]); time += BUCKET_LENGTH; } - maths::CUnivariateTimeSeriesChangeDetector detector{trendModel, residualModel, - 6 * core::constants::HOUR, - 24 * core::constants::HOUR, 14.0}; - for (std::size_t i = 950u; i < samples.size(); ++i) - { + maths::CUnivariateTimeSeriesChangeDetector detector{ + trendModel, residualModel, 6 * core::constants::HOUR, 24 * core::constants::HOUR, 14.0}; + for (std::size_t i = 950u; i < samples.size(); ++i) { addSampleToModel(time, samples[i]); detector.addSamples(maths::CConstantWeights::COUNT, {{time, samples[i]}}, {{1.0}}); - if (detector.stopTesting()) - { + if (detector.stopTesting()) { break; } @@ -143,24 +133,18 @@ void CTimeSeriesChangeDetectorTest::testNoChange() } } -void CTimeSeriesChangeDetectorTest::testLevelShift() -{ +void CTimeSeriesChangeDetectorTest::testLevelShift() { LOG_DEBUG("+-------------------------------------------------+"); LOG_DEBUG("| CTimeSeriesChangeDetectorTest::testLevelShift |"); LOG_DEBUG("+-------------------------------------------------+"); TGeneratorVec trends{constant, ramp, smoothDaily, weekends, spikeyDaily}; - this->testChange(trends, - maths::SChangeDescription::E_LevelShift, - [](TGenerator trend, core_t::TTime time) - { - return trend(time) + 0.5; - }, 5.0, 15.0); + this->testChange( + trends, maths::SChangeDescription::E_LevelShift, [](TGenerator trend, core_t::TTime time) { return trend(time) + 0.5; }, 5.0, 15.0); } -void CTimeSeriesChangeDetectorTest::testLinearScale() -{ +void CTimeSeriesChangeDetectorTest::testLinearScale() { LOG_DEBUG("+--------------------------------------------------+"); LOG_DEBUG("| CTimeSeriesChangeDetectorTest::testLinearScale |"); LOG_DEBUG("+--------------------------------------------------+"); @@ -169,14 +153,12 @@ void CTimeSeriesChangeDetectorTest::testLinearScale() this->testChange(trends, maths::SChangeDescription::E_LinearScale, - [](TGenerator trend, core_t::TTime time) - { - return 3.0 * trend(time); - }, 3.0, 15.0); + [](TGenerator trend, core_t::TTime time) { return 3.0 * trend(time); }, + 3.0, + 15.0); } -void CTimeSeriesChangeDetectorTest::testTimeShift() -{ +void CTimeSeriesChangeDetectorTest::testTimeShift() { LOG_DEBUG("+------------------------------------------------+"); LOG_DEBUG("| CTimeSeriesChangeDetectorTest::testTimeShift |"); LOG_DEBUG("+------------------------------------------------+"); @@ -185,21 +167,18 @@ void CTimeSeriesChangeDetectorTest::testTimeShift() this->testChange(trends, maths::SChangeDescription::E_TimeShift, - [](TGenerator trend, core_t::TTime time) - { - return trend(time - core::constants::HOUR); - }, -static_cast(core::constants::HOUR), 24.0); + [](TGenerator trend, core_t::TTime time) { return trend(time - core::constants::HOUR); }, + -static_cast(core::constants::HOUR), + 24.0); this->testChange(trends, maths::SChangeDescription::E_TimeShift, - [](TGenerator trend, core_t::TTime time) - { - return trend(time + core::constants::HOUR); - }, +static_cast(core::constants::HOUR), 24.0); + [](TGenerator trend, core_t::TTime time) { return trend(time + core::constants::HOUR); }, + +static_cast(core::constants::HOUR), + 24.0); } -void CTimeSeriesChangeDetectorTest::testPersist() -{ +void CTimeSeriesChangeDetectorTest::testPersist() { LOG_DEBUG("+----------------------------------------------+"); LOG_DEBUG("| CTimeSeriesChangeDetectorTest::testPersist |"); LOG_DEBUG("+----------------------------------------------+"); @@ -212,36 +191,28 @@ void CTimeSeriesChangeDetectorTest::testPersist() TDecompositionPtr trendModel(new maths::CTimeSeriesDecomposition{DECAY_RATE, BUCKET_LENGTH}); TPriorPtr residualModel(makeResidualModel()); - auto addSampleToModel = [&trendModel, &residualModel](core_t::TTime time, double x) - { - trendModel->addPoint(time, x); - double detrended{trendModel->detrend(time, x, 0.0)}; - residualModel->addSamples(maths::CConstantWeights::COUNT, - {detrended}, - maths::CConstantWeights::SINGLE_UNIT); - residualModel->propagateForwardsByTime(1.0); - }; + auto addSampleToModel = [&trendModel, &residualModel](core_t::TTime time, double x) { + trendModel->addPoint(time, x); + double detrended{trendModel->detrend(time, x, 0.0)}; + residualModel->addSamples(maths::CConstantWeights::COUNT, {detrended}, maths::CConstantWeights::SINGLE_UNIT); + residualModel->propagateForwardsByTime(1.0); + }; core_t::TTime time{0}; - for (std::size_t i = 0u; i < 990; ++i) - { + for (std::size_t i = 0u; i < 990; ++i) { addSampleToModel(time, samples[i]); time += BUCKET_LENGTH; } - maths::CUnivariateTimeSeriesChangeDetector origDetector{trendModel, residualModel, - 6 * core::constants::HOUR, - 24 * core::constants::HOUR, 12.0}; + maths::CUnivariateTimeSeriesChangeDetector origDetector{ + trendModel, residualModel, 6 * core::constants::HOUR, 24 * core::constants::HOUR, 12.0}; - maths::CModelParams modelParams{BUCKET_LENGTH, 1.0, 0.0, 1.0, - 6 * core::constants::HOUR, - 24 * core::constants::HOUR}; + maths::CModelParams modelParams{BUCKET_LENGTH, 1.0, 0.0, 1.0, 6 * core::constants::HOUR, 24 * core::constants::HOUR}; maths::SDistributionRestoreParams distributionParams{maths_t::E_ContinuousData, DECAY_RATE}; maths::STimeSeriesDecompositionRestoreParams decompositionParams{DECAY_RATE, BUCKET_LENGTH, distributionParams}; maths::SModelRestoreParams params{modelParams, decompositionParams, distributionParams}; - for (std::size_t i = 990u; i < samples.size(); ++i) - { + for (std::size_t i = 990u; i < samples.size(); ++i) { addSampleToModel(time, samples[i]); std::string origXml; { @@ -250,51 +221,41 @@ void CTimeSeriesChangeDetectorTest::testPersist() inserter.toXml(origXml); } - maths::CUnivariateTimeSeriesChangeDetector restoredDetector{trendModel, residualModel, - 6 * core::constants::HOUR, - 24 * core::constants::HOUR, 12.0}; + maths::CUnivariateTimeSeriesChangeDetector restoredDetector{ + trendModel, residualModel, 6 * core::constants::HOUR, 24 * core::constants::HOUR, 12.0}; core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - traverser.traverseSubLevel(boost::bind( - &maths::CUnivariateTimeSeriesChangeDetector::acceptRestoreTraverser, - &restoredDetector, boost::cref(params), _1)); + traverser.traverseSubLevel( + boost::bind(&maths::CUnivariateTimeSeriesChangeDetector::acceptRestoreTraverser, &restoredDetector, boost::cref(params), _1)); - LOG_DEBUG("expected " << origDetector.checksum() - << " got " << restoredDetector.checksum()); + LOG_DEBUG("expected " << origDetector.checksum() << " got " << restoredDetector.checksum()); CPPUNIT_ASSERT_EQUAL(origDetector.checksum(), restoredDetector.checksum()); } } -CppUnit::Test *CTimeSeriesChangeDetectorTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CTimeSeriesChangeDetectorTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTimeSeriesChangeDetectorTest::testNoChange", - &CTimeSeriesChangeDetectorTest::testNoChange) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTimeSeriesChangeDetectorTest::testLevelShift", - &CTimeSeriesChangeDetectorTest::testLevelShift) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTimeSeriesChangeDetectorTest::testLinearScale", - &CTimeSeriesChangeDetectorTest::testLinearScale) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTimeSeriesChangeDetectorTest::testTimeShift", - &CTimeSeriesChangeDetectorTest::testTimeShift) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTimeSeriesChangeDetectorTest::testPersist", - &CTimeSeriesChangeDetectorTest::testPersist) ); +CppUnit::Test* CTimeSeriesChangeDetectorTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CTimeSeriesChangeDetectorTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesChangeDetectorTest::testNoChange", + &CTimeSeriesChangeDetectorTest::testNoChange)); + suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesChangeDetectorTest::testLevelShift", + &CTimeSeriesChangeDetectorTest::testLevelShift)); + suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesChangeDetectorTest::testLinearScale", + &CTimeSeriesChangeDetectorTest::testLinearScale)); + suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesChangeDetectorTest::testTimeShift", + &CTimeSeriesChangeDetectorTest::testTimeShift)); + suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesChangeDetectorTest::testPersist", + &CTimeSeriesChangeDetectorTest::testPersist)); return suiteOfTests; } -void CTimeSeriesChangeDetectorTest::testChange(const TGeneratorVec &trends, +void CTimeSeriesChangeDetectorTest::testChange(const TGeneratorVec& trends, maths::SChangeDescription::EDescription description, TChange applyChange, double expectedChange, - double expectedMeanBucketsToDetectChange) -{ + double expectedMeanBucketsToDetectChange) { using TOptionalSize = boost::optional; using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; @@ -303,10 +264,8 @@ void CTimeSeriesChangeDetectorTest::testChange(const TGeneratorVec &trends, TMeanAccumulator meanBucketsToDetect; TDoubleVec samples; - for (std::size_t t = 0u; t < 100; ++t) - { - if (t % 10 == 0) - { + for (std::size_t t = 0u; t < 100; ++t) { + if (t % 10 == 0) { LOG_DEBUG(t << "%"); } @@ -315,49 +274,40 @@ void CTimeSeriesChangeDetectorTest::testChange(const TGeneratorVec &trends, TDecompositionPtr trendModel(new maths::CTimeSeriesDecomposition{DECAY_RATE, BUCKET_LENGTH}); TPriorPtr residualModel(makeResidualModel()); - auto addSampleToModel = [&trendModel, &residualModel](core_t::TTime time, double x, double weight) - { - trendModel->addPoint(time, x, maths::CConstantWeights::COUNT, {weight}); - double detrended{trendModel->detrend(time, x, 0.0)}; - residualModel->addSamples(maths::CConstantWeights::COUNT, {detrended}, {{weight}}); - residualModel->propagateForwardsByTime(1.0); - }; + auto addSampleToModel = [&trendModel, &residualModel](core_t::TTime time, double x, double weight) { + trendModel->addPoint(time, x, maths::CConstantWeights::COUNT, {weight}); + double detrended{trendModel->detrend(time, x, 0.0)}; + residualModel->addSamples(maths::CConstantWeights::COUNT, {detrended}, {{weight}}); + residualModel->propagateForwardsByTime(1.0); + }; core_t::TTime time{0}; - for (std::size_t i = 0u; i < 950; ++i) - { + for (std::size_t i = 0u; i < 950; ++i) { double x{10.0 * trends[t % trends.size()](time) + samples[i]}; addSampleToModel(time, x, 1.0); time += BUCKET_LENGTH; } - maths::CUnivariateTimeSeriesChangeDetector detector{trendModel, residualModel, - 6 * core::constants::HOUR, - 24 * core::constants::HOUR, 14.0}; + maths::CUnivariateTimeSeriesChangeDetector detector{ + trendModel, residualModel, 6 * core::constants::HOUR, 24 * core::constants::HOUR, 14.0}; TOptionalSize bucketsToDetect; - for (std::size_t i = 950u; i < samples.size(); ++i) - { + for (std::size_t i = 950u; i < samples.size(); ++i) { double x{10.0 * applyChange(trends[t % trends.size()], time) + samples[i]}; addSampleToModel(time, x, 0.5); detector.addSamples(maths::CConstantWeights::COUNT, {{time, x}}, {{1.0}}); auto change = detector.change(); - if (change) - { - if (!bucketsToDetect) - { + if (change) { + if (!bucketsToDetect) { bucketsToDetect.reset(i - 949); } CPPUNIT_ASSERT_EQUAL(change->s_Description, description); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedChange, - change->s_Value[0], - 0.5 * std::fabs(expectedChange)); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedChange, change->s_Value[0], 0.5 * std::fabs(expectedChange)); break; } - if (detector.stopTesting()) - { + if (detector.stopTesting()) { break; } diff --git a/lib/maths/unittest/CTimeSeriesChangeDetectorTest.h b/lib/maths/unittest/CTimeSeriesChangeDetectorTest.h index c3fc9acb02..350cf927cc 100644 --- a/lib/maths/unittest/CTimeSeriesChangeDetectorTest.h +++ b/lib/maths/unittest/CTimeSeriesChangeDetectorTest.h @@ -13,28 +13,27 @@ #include -class CTimeSeriesChangeDetectorTest : public CppUnit::TestFixture -{ - public: - void testNoChange(); - void testLevelShift(); - void testLinearScale(); - void testTimeShift(); - void testPersist(); - - static CppUnit::Test *suite(); - - private: - using TGenerator = std::function; - using TGeneratorVec = std::vector; - using TChange = std::function; - - private: - void testChange(const TGeneratorVec &trends, - ml::maths::SChangeDescription::EDescription description, - TChange applyChange, - double expectedChange, - double expectedMeanBucketsToDetectChange); +class CTimeSeriesChangeDetectorTest : public CppUnit::TestFixture { +public: + void testNoChange(); + void testLevelShift(); + void testLinearScale(); + void testTimeShift(); + void testPersist(); + + static CppUnit::Test* suite(); + +private: + using TGenerator = std::function; + using TGeneratorVec = std::vector; + using TChange = std::function; + +private: + void testChange(const TGeneratorVec& trends, + ml::maths::SChangeDescription::EDescription description, + TChange applyChange, + double expectedChange, + double expectedMeanBucketsToDetectChange); }; #endif // INCLUDED_CTimeSeriesChangeDetectorTest_h diff --git a/lib/maths/unittest/CTimeSeriesDecompositionTest.cc b/lib/maths/unittest/CTimeSeriesDecompositionTest.cc index cebf548e7b..2ea3048046 100644 --- a/lib/maths/unittest/CTimeSeriesDecompositionTest.cc +++ b/lib/maths/unittest/CTimeSeriesDecompositionTest.cc @@ -8,18 +8,18 @@ #include #include -#include #include #include #include +#include #include #include #include -#include #include #include #include +#include #include #include @@ -32,8 +32,7 @@ using namespace ml; -namespace -{ +namespace { using TDoubleDoublePr = std::pair; using TDoubleVec = std::vector; @@ -44,37 +43,30 @@ using TTimeDoublePrVec = std::vector; using TSeasonalComponentVec = maths_t::TSeasonalComponentVec; using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; -double mean(const TDoubleDoublePr &x) -{ +double mean(const TDoubleDoublePr& x) { return (x.first + x.second) / 2.0; } const core_t::TTime FIVE_MINS = 300; -const core_t::TTime TEN_MINS = 600; +const core_t::TTime TEN_MINS = 600; const core_t::TTime HALF_HOUR = core::constants::HOUR / 2; -const core_t::TTime HOUR = core::constants::HOUR; -const core_t::TTime DAY = core::constants::DAY; -const core_t::TTime WEEK = core::constants::WEEK; -const core_t::TTime YEAR = core::constants::YEAR; - +const core_t::TTime HOUR = core::constants::HOUR; +const core_t::TTime DAY = core::constants::DAY; +const core_t::TTime WEEK = core::constants::WEEK; +const core_t::TTime YEAR = core::constants::YEAR; } -void CTimeSeriesDecompositionTest::testSuperpositionOfSines() -{ +void CTimeSeriesDecompositionTest::testSuperpositionOfSines() { LOG_DEBUG("+----------------------------------------------------------+"); LOG_DEBUG("| CTimeSeriesDecompositionTest::testSuperpositionOfSines |"); LOG_DEBUG("+----------------------------------------------------------+"); TTimeVec times; TDoubleVec trend; - for (core_t::TTime time = 0; time < 100 * WEEK + 1; time += HALF_HOUR) - { - double weekly = 1200.0 + 1000.0 * std::sin(boost::math::double_constants::two_pi - * static_cast(time) - / static_cast(WEEK)); - double daily = 5.0 + 5.0 * std::sin(boost::math::double_constants::two_pi - * static_cast(time) - / static_cast(DAY)); + for (core_t::TTime time = 0; time < 100 * WEEK + 1; time += HALF_HOUR) { + double weekly = + 1200.0 + 1000.0 * std::sin(boost::math::double_constants::two_pi * static_cast(time) / static_cast(WEEK)); + double daily = 5.0 + 5.0 * std::sin(boost::math::double_constants::two_pi * static_cast(time) / static_cast(DAY)); times.push_back(time); trend.push_back(weekly * daily); } @@ -101,15 +93,13 @@ void CTimeSeriesDecompositionTest::testSuperpositionOfSines() //TDoubleVec f; //TDoubleVec r; - for (std::size_t i = 0u; i < times.size(); ++i) - { + for (std::size_t i = 0u; i < times.size(); ++i) { core_t::TTime time = times[i]; double value = trend[i] + noise[i]; decomposition.addPoint(time, value); - if (time >= lastWeek + WEEK) - { + if (time >= lastWeek + WEEK) { LOG_DEBUG("Processing week"); double sumResidual = 0.0; @@ -118,16 +108,15 @@ void CTimeSeriesDecompositionTest::testSuperpositionOfSines() double maxValue = 0.0; double percentileError = 0.0; - for (core_t::TTime t = lastWeek; t < lastWeek + WEEK; t += HALF_HOUR) - { + for (core_t::TTime t = lastWeek; t < lastWeek + WEEK; t += HALF_HOUR) { TDoubleDoublePr prediction = decomposition.value(t, 70.0); double residual = std::fabs(trend[t / HALF_HOUR] - mean(prediction)); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += std::fabs(trend[t / HALF_HOUR]); maxValue = std::max(maxValue, std::fabs(trend[t / HALF_HOUR])); - percentileError += std::max(std::max(prediction.first - trend[t / HALF_HOUR], - trend[t / HALF_HOUR] - prediction.second), 0.0); + percentileError += + std::max(std::max(prediction.first - trend[t / HALF_HOUR], trend[t / HALF_HOUR] - prediction.second), 0.0); //f.push_back(mean(value)); //r.push_back(mean(value) - trend[t / HALF_HOUR]); } @@ -136,8 +125,7 @@ void CTimeSeriesDecompositionTest::testSuperpositionOfSines() LOG_DEBUG("'max residual' / 'max value' = " << maxResidual / maxValue); LOG_DEBUG("70% error = " << percentileError / sumValue); - if (time >= 2 * WEEK) - { + if (time >= 2 * WEEK) { CPPUNIT_ASSERT(sumResidual < 0.04 * sumValue); CPPUNIT_ASSERT(maxResidual < 0.04 * maxValue); CPPUNIT_ASSERT(percentileError < 0.02 * sumValue); @@ -166,8 +154,7 @@ void CTimeSeriesDecompositionTest::testSuperpositionOfSines() CPPUNIT_ASSERT(totalPercentileError < 0.01 * totalSumValue); } -void CTimeSeriesDecompositionTest::testDistortedPeriodic() -{ +void CTimeSeriesDecompositionTest::testDistortedPeriodic() { LOG_DEBUG("+-------------------------------------------------------+"); LOG_DEBUG("| CTimeSeriesDecompositionTest::testDistortedPeriodic |"); LOG_DEBUG("+-------------------------------------------------------+"); @@ -175,80 +162,59 @@ void CTimeSeriesDecompositionTest::testDistortedPeriodic() const core_t::TTime bucketLength = HOUR; const core_t::TTime startTime = 0; const TDoubleVec timeseries{ - 323444, 960510, 880176, 844190, 823993, 814251, 857187, 856791, 862060, 919632, - 1083704, 2904437, 4601750, 5447896, 5827498, 5924161, 5851895, 5768661, 5927840, 5326236, - 4037245, 1958521, 1360753, 1005194, 901930, 856605, 838370, 810396, 776815, 751163, - 793055, 823974, 820458, 840647, 878594, 1192154, 2321550, 2646460, 2760957, 2838611, - 2784696, 2798327, 2643123, 2028970, 1331199, 1098105, 930971, 907562, 903603, 873554, - 879375, 852853, 828554, 819726, 872418, 856365, 860880, 867119, 873912, 885405, - 1053530, 1487664, 1555301, 1637137, 1672030, 1659346, 1514673, 1228543, 1011740, 928749, - 809702, 838931, 847904, 829188, 822558, 798517, 767446, 750486, 783165, 815612, - 825365, 873486, 1165250, 2977382, 4868975, 6050263, 6470794, 6271899, 6449326, 6352992, - 6162712, 6257295, 4570133, 1781374, 1182546, 665858, 522585, 481588, 395139, 380770, - 379182, 356068, 353498, 347707, 350931, 417253, 989129, 2884728, 4640841, 5423474, - 6246182, 6432793, 6338419, 6312346, 6294323, 6102676, 4505021, 2168289, 1411233, 1055797, - 954338, 918498, 904236, 870193, 843259, 682538, 895407, 883550, 897026, 918838, - 1262303, 3208919, 5193013, 5787263, 6255837, 6337684, 6335017, 6278740, 6191046, 6183259, - 4455055, 2004058, 1425910, 1069949, 942839, 899157, 895133, 858268, 837338, 820983, - 870863, 871873, 881182, 918795, 1237336, 3069272, 4708229, 5672066, 6291124, 6407806, - 6479889, 6533138, 3473382, 6534838, 4800911, 2668073, 1644350, 1282450, 1131734, 1009042, - 891099, 857339, 842849, 816513, 879200, 848292, 858014, 906642, 1208147, 2964568, - 5215885, 5777105, 6332104, 6130733, 6284960, 6157055, 6165520, 5771121, 4309930, 2150044, - 1475275, 1065030, 967267, 890413, 887174, 835741, 814749, 817443, 853085, 851040, - 866029, 867612, 917833, 1225383, 2326451, 2837337, 2975288, 3034415, 3056379, 3181951, - 2938511, 2400202, 1444952, 1058781, 845703, 810419, 805781, 789438, 799674, 775703, - 756145, 727587, 756489, 789886, 784948, 788247, 802013, 832272, 845033, 873396, - 1018788, 1013089, 1095001, 1022910, 798183, 519186, 320507, 247320, 139372, 129477, - 145576, 122348, 120286, 89370, 95583, 88985, 89009, 97425, 103628, 153229, - 675828, 2807240, 4652249, 5170466, 5642965, 5608709, 5697374, 5546758, 5368913, 5161602, - 3793675, 1375703, 593920, 340764, 197075, 174981, 158274, 130148, 125235, 122526, - 113896, 116249, 126881, 213814, 816723, 2690434, 4827493, 5723621, 6219650, 6492638, - 6570160, 6493706, 6495303, 6301872, 4300612, 1543551, 785562, 390012, 234939, 202190, - 142855, 135218, 124238, 111981, 104807, 107687, 129438, 190294, 779698, 2864053, - 5079395, 5912629, 6481437, 6284107, 6451007, 6177724, 5993932, 6075918, 4140658, 1481179, - 682711, 328387, 233915, 182721, 170860, 139540, 137613, 121669, 116906, 121780, - 127887, 199762, 783099, 2890355, 4658524, 5535842, 6117719, 6322938, 6570422, 6396874, - 6586615, 6332100, 4715160, 2604366, 1525620, 906137, 499019, 358856, 225543, 171388, - 153826, 149910, 141092, 136459, 161202, 240704, 766755, 3011958, 5024254, 5901640, - 6244757, 6257553, 6380236, 6394732, 6385424, 5876960, 4182127, 1868461, 883771, 377159, - 264435, 196674, 181845, 138307, 136055, 133143, 129791, 133694, 127502, 136351, - 212305, 777873, 2219051, 2732315, 2965287, 2895288, 2829988, 2818268, 2513817, 1866217, - 985099, 561287, 205195, 173997, 166428, 165294, 130072, 113917, 113282, 112466, - 103406, 115687, 159863, 158310, 225454, 516925, 1268760, 1523357, 1607510, 1560200, - 1483823, 1401526, 999236, 495292, 299905, 286900, 209697, 169881, 157560, 139030, - 132342, 187941, 126162, 106587, 108759, 109495, 116386, 208504, 676794, 1549362, - 2080332, 2488707, 2699237, 2862970, 2602994, 2554047, 2364456, 1997686, 1192434, 891293, - 697769, 391385, 234311, 231839, 160520, 155870, 142220, 139360, 142885, 141589, - 166792, 443202, 2019645, 4558828, 5982111, 6408009, 6514598, 6567566, 6686935, 6532886, - 6473927, 5475257, 2889913, 1524673, 938262, 557410, 325965, 186484, 174831, 211765, - 145477, 148318, 130425, 136431, 182002, 442272, 2078908, 4628945, 5767034, 6212302, - 6566196, 6527687, 6365204, 6226173, 6401203, 5629733, 3004625, 1555528, 1025549, 492910, - 347948, 298725, 272955, 238279, 209290, 188551, 175447, 173960, 190875, 468340, - 1885268, 4133457, 5350137, 5885807, 6331254, 6420279, 6589448, 6483637, 6557769, 5543938, - 3482732, 2010293, 1278681, 735111, 406042, 283694, 181213, 160207, 136347, 113484, - 118521, 127725, 151408, 396552, 1900747, 4400918, 5546984, 6213423, 6464686, 6442904, - 6385002, 6248314, 5880523, 4816342, 2597450, 1374071, 751391, 362615, 215644, 175158, - 116896, 127935, 110407, 113054, 105841, 113717, 177240, 206515, 616005, 1718878, - 2391747, 2450915, 2653897, 2922320, 2808467, 2490078, 1829760, 1219997, 643936, 400743, - 208976, 119623, 110170, 99338, 93661, 100187, 90803, 83980, 75950, 78805, - 95664, 108467, 128293, 294080, 720811, 965705, 1048021, 1125912, 1194746, 1114704, - 799721, 512542, 353694, 291046, 229723, 206109, 183482, 192225, 191906, 176942, - 148163, 145405, 145728, 159016, 181991, 436297, 1983374, 4688246, 5853284, 6243628, - 6730707, 6660743, 6476024, 6422004, 6335113, 5386230, 2761698, 1230646, 763506, 359071, - 223956, 189020, 158090, 145730, 135338, 114941, 108313, 120023, 167161, 440103, - 1781778, 4428615, 5701824, 6296598, 6541586, 6809286, 6716690, 6488941, 6567385, 5633685, - 2760255, 1316495, 732572, 316496, 225013, 202664, 171295, 143195, 123555, 125327, - 123357, 135419, 194933, 428197, 2181096, 4672692, 5854393, 6553263, 6653127, 6772664, - 6899086, 6794041, 6900871, 6087645, 2814928, 1393906, 894417, 413459, 280839, 237468, - 184947, 214658, 180059, 145215, 134793, 133423, 191388, 417885, 2081899, 4836758, - 5803495, 6451696, 7270708, 7628500, 7208066, 7403079, 7548585, 6323024, 3763029, 2197174, - 1359687, 857604, 471729, 338888, 177156, 150619, 145775, 132845, 110888, 121863, - 141321, 440528, 2020529, 4615833, 5772372, 6318037, 6481658, 6454979, 6489447, 6558612, - 6114653, 5009113, 2541519, 1329520, 663124, 311088, 200332, 141768, 120845, 120603, - 114688, 111340, 95757, 91444, 103287, 130905, 551108, 1988083, 2885196, 2962413, - 3070689, 3061746, 2999362, 2993871, 2287683, 1539262, 763592, 393769, 193094, 126535, - 131721, 125761, 105550, 89077, 90295, 93853, 84496, 77731, 89389, 101269, - 153379, 443022, 1114121, 1556021, 1607693, 1589743, 1746231, 1432261, 1022052}; + 323444, 960510, 880176, 844190, 823993, 814251, 857187, 856791, 862060, 919632, 1083704, 2904437, 4601750, 5447896, + 5827498, 5924161, 5851895, 5768661, 5927840, 5326236, 4037245, 1958521, 1360753, 1005194, 901930, 856605, 838370, 810396, + 776815, 751163, 793055, 823974, 820458, 840647, 878594, 1192154, 2321550, 2646460, 2760957, 2838611, 2784696, 2798327, + 2643123, 2028970, 1331199, 1098105, 930971, 907562, 903603, 873554, 879375, 852853, 828554, 819726, 872418, 856365, + 860880, 867119, 873912, 885405, 1053530, 1487664, 1555301, 1637137, 1672030, 1659346, 1514673, 1228543, 1011740, 928749, + 809702, 838931, 847904, 829188, 822558, 798517, 767446, 750486, 783165, 815612, 825365, 873486, 1165250, 2977382, + 4868975, 6050263, 6470794, 6271899, 6449326, 6352992, 6162712, 6257295, 4570133, 1781374, 1182546, 665858, 522585, 481588, + 395139, 380770, 379182, 356068, 353498, 347707, 350931, 417253, 989129, 2884728, 4640841, 5423474, 6246182, 6432793, + 6338419, 6312346, 6294323, 6102676, 4505021, 2168289, 1411233, 1055797, 954338, 918498, 904236, 870193, 843259, 682538, + 895407, 883550, 897026, 918838, 1262303, 3208919, 5193013, 5787263, 6255837, 6337684, 6335017, 6278740, 6191046, 6183259, + 4455055, 2004058, 1425910, 1069949, 942839, 899157, 895133, 858268, 837338, 820983, 870863, 871873, 881182, 918795, + 1237336, 3069272, 4708229, 5672066, 6291124, 6407806, 6479889, 6533138, 3473382, 6534838, 4800911, 2668073, 1644350, 1282450, + 1131734, 1009042, 891099, 857339, 842849, 816513, 879200, 848292, 858014, 906642, 1208147, 2964568, 5215885, 5777105, + 6332104, 6130733, 6284960, 6157055, 6165520, 5771121, 4309930, 2150044, 1475275, 1065030, 967267, 890413, 887174, 835741, + 814749, 817443, 853085, 851040, 866029, 867612, 917833, 1225383, 2326451, 2837337, 2975288, 3034415, 3056379, 3181951, + 2938511, 2400202, 1444952, 1058781, 845703, 810419, 805781, 789438, 799674, 775703, 756145, 727587, 756489, 789886, + 784948, 788247, 802013, 832272, 845033, 873396, 1018788, 1013089, 1095001, 1022910, 798183, 519186, 320507, 247320, + 139372, 129477, 145576, 122348, 120286, 89370, 95583, 88985, 89009, 97425, 103628, 153229, 675828, 2807240, + 4652249, 5170466, 5642965, 5608709, 5697374, 5546758, 5368913, 5161602, 3793675, 1375703, 593920, 340764, 197075, 174981, + 158274, 130148, 125235, 122526, 113896, 116249, 126881, 213814, 816723, 2690434, 4827493, 5723621, 6219650, 6492638, + 6570160, 6493706, 6495303, 6301872, 4300612, 1543551, 785562, 390012, 234939, 202190, 142855, 135218, 124238, 111981, + 104807, 107687, 129438, 190294, 779698, 2864053, 5079395, 5912629, 6481437, 6284107, 6451007, 6177724, 5993932, 6075918, + 4140658, 1481179, 682711, 328387, 233915, 182721, 170860, 139540, 137613, 121669, 116906, 121780, 127887, 199762, + 783099, 2890355, 4658524, 5535842, 6117719, 6322938, 6570422, 6396874, 6586615, 6332100, 4715160, 2604366, 1525620, 906137, + 499019, 358856, 225543, 171388, 153826, 149910, 141092, 136459, 161202, 240704, 766755, 3011958, 5024254, 5901640, + 6244757, 6257553, 6380236, 6394732, 6385424, 5876960, 4182127, 1868461, 883771, 377159, 264435, 196674, 181845, 138307, + 136055, 133143, 129791, 133694, 127502, 136351, 212305, 777873, 2219051, 2732315, 2965287, 2895288, 2829988, 2818268, + 2513817, 1866217, 985099, 561287, 205195, 173997, 166428, 165294, 130072, 113917, 113282, 112466, 103406, 115687, + 159863, 158310, 225454, 516925, 1268760, 1523357, 1607510, 1560200, 1483823, 1401526, 999236, 495292, 299905, 286900, + 209697, 169881, 157560, 139030, 132342, 187941, 126162, 106587, 108759, 109495, 116386, 208504, 676794, 1549362, + 2080332, 2488707, 2699237, 2862970, 2602994, 2554047, 2364456, 1997686, 1192434, 891293, 697769, 391385, 234311, 231839, + 160520, 155870, 142220, 139360, 142885, 141589, 166792, 443202, 2019645, 4558828, 5982111, 6408009, 6514598, 6567566, + 6686935, 6532886, 6473927, 5475257, 2889913, 1524673, 938262, 557410, 325965, 186484, 174831, 211765, 145477, 148318, + 130425, 136431, 182002, 442272, 2078908, 4628945, 5767034, 6212302, 6566196, 6527687, 6365204, 6226173, 6401203, 5629733, + 3004625, 1555528, 1025549, 492910, 347948, 298725, 272955, 238279, 209290, 188551, 175447, 173960, 190875, 468340, + 1885268, 4133457, 5350137, 5885807, 6331254, 6420279, 6589448, 6483637, 6557769, 5543938, 3482732, 2010293, 1278681, 735111, + 406042, 283694, 181213, 160207, 136347, 113484, 118521, 127725, 151408, 396552, 1900747, 4400918, 5546984, 6213423, + 6464686, 6442904, 6385002, 6248314, 5880523, 4816342, 2597450, 1374071, 751391, 362615, 215644, 175158, 116896, 127935, + 110407, 113054, 105841, 113717, 177240, 206515, 616005, 1718878, 2391747, 2450915, 2653897, 2922320, 2808467, 2490078, + 1829760, 1219997, 643936, 400743, 208976, 119623, 110170, 99338, 93661, 100187, 90803, 83980, 75950, 78805, + 95664, 108467, 128293, 294080, 720811, 965705, 1048021, 1125912, 1194746, 1114704, 799721, 512542, 353694, 291046, + 229723, 206109, 183482, 192225, 191906, 176942, 148163, 145405, 145728, 159016, 181991, 436297, 1983374, 4688246, + 5853284, 6243628, 6730707, 6660743, 6476024, 6422004, 6335113, 5386230, 2761698, 1230646, 763506, 359071, 223956, 189020, + 158090, 145730, 135338, 114941, 108313, 120023, 167161, 440103, 1781778, 4428615, 5701824, 6296598, 6541586, 6809286, + 6716690, 6488941, 6567385, 5633685, 2760255, 1316495, 732572, 316496, 225013, 202664, 171295, 143195, 123555, 125327, + 123357, 135419, 194933, 428197, 2181096, 4672692, 5854393, 6553263, 6653127, 6772664, 6899086, 6794041, 6900871, 6087645, + 2814928, 1393906, 894417, 413459, 280839, 237468, 184947, 214658, 180059, 145215, 134793, 133423, 191388, 417885, + 2081899, 4836758, 5803495, 6451696, 7270708, 7628500, 7208066, 7403079, 7548585, 6323024, 3763029, 2197174, 1359687, 857604, + 471729, 338888, 177156, 150619, 145775, 132845, 110888, 121863, 141321, 440528, 2020529, 4615833, 5772372, 6318037, + 6481658, 6454979, 6489447, 6558612, 6114653, 5009113, 2541519, 1329520, 663124, 311088, 200332, 141768, 120845, 120603, + 114688, 111340, 95757, 91444, 103287, 130905, 551108, 1988083, 2885196, 2962413, 3070689, 3061746, 2999362, 2993871, + 2287683, 1539262, 763592, 393769, 193094, 126535, 131721, 125761, 105550, 89077, 90295, 93853, 84496, 77731, + 89389, 101269, 153379, 443022, 1114121, 1556021, 1607693, 1589743, 1746231, 1432261, 1022052}; core_t::TTime time = startTime; core_t::TTime lastWeek = startTime; @@ -264,12 +230,10 @@ void CTimeSeriesDecompositionTest::testDistortedPeriodic() double totalMaxValue = 0.0; double totalPercentileError = 0.0; - for (std::size_t i = 0u; i < timeseries.size(); ++i, time += bucketLength) - { + for (std::size_t i = 0u; i < timeseries.size(); ++i, time += bucketLength) { decomposition.addPoint(time, timeseries[i]); - if (time >= lastWeek + WEEK || i == boost::size(timeseries) - 1) - { + if (time >= lastWeek + WEEK || i == boost::size(timeseries) - 1) { LOG_DEBUG("Processing week"); //TDoubleVec t; @@ -282,19 +246,16 @@ void CTimeSeriesDecompositionTest::testDistortedPeriodic() double maxValue = 0.0; double percentileError = 0.0; - for (core_t::TTime tt = lastWeek; - tt < lastWeek + WEEK && - static_cast(tt / HOUR) < boost::size(timeseries); - tt += HOUR) - { + for (core_t::TTime tt = lastWeek; tt < lastWeek + WEEK && static_cast(tt / HOUR) < boost::size(timeseries); + tt += HOUR) { TDoubleDoublePr prediction = decomposition.value(tt, 70.0); double residual = std::fabs(timeseries[tt / HOUR] - mean(prediction)); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += std::fabs(timeseries[tt / HOUR]); maxValue = std::max(maxValue, std::fabs(timeseries[tt / HOUR])); - percentileError += std::max(std::max(prediction.first - timeseries[tt / HOUR], - timeseries[tt / HOUR] - prediction.second), 0.0); + percentileError += + std::max(std::max(prediction.first - timeseries[tt / HOUR], timeseries[tt / HOUR] - prediction.second), 0.0); //t.push_back(tt); //f.push_back(timeseries[tt / HOUR]); //fe.push_back(mean(value)); @@ -304,8 +265,7 @@ void CTimeSeriesDecompositionTest::testDistortedPeriodic() LOG_DEBUG("'max residual' / 'max value' = " << maxResidual / maxValue); LOG_DEBUG("70% error = " << percentileError / sumValue); - if (time >= 2 * WEEK) - { + if (time >= 2 * WEEK) { CPPUNIT_ASSERT(sumResidual < 0.30 * sumValue); CPPUNIT_ASSERT(maxResidual < 0.56 * maxValue); CPPUNIT_ASSERT(percentileError < 0.21 * sumValue); @@ -336,22 +296,18 @@ void CTimeSeriesDecompositionTest::testDistortedPeriodic() CPPUNIT_ASSERT(totalPercentileError < 0.03 * totalSumValue); } -void CTimeSeriesDecompositionTest::testMinimizeLongComponents() -{ +void CTimeSeriesDecompositionTest::testMinimizeLongComponents() { LOG_DEBUG("+------------------------------------------------------------+"); LOG_DEBUG("| CTimeSeriesDecompositionTest::testMinimizeLongComponents |"); LOG_DEBUG("+------------------------------------------------------------+"); - double weights[] = { 1.0, 0.1, 1.0, 1.0, 0.1, 1.0, 1.0 }; + double weights[] = {1.0, 0.1, 1.0, 1.0, 0.1, 1.0, 1.0}; TTimeVec times; TDoubleVec trend; - for (core_t::TTime time = 0; time < 100 * WEEK; time += HALF_HOUR) - { + for (core_t::TTime time = 0; time < 100 * WEEK; time += HALF_HOUR) { double weight = weights[(time / DAY) % 7]; - double daily = 100.0 * std::sin(boost::math::double_constants::two_pi - * static_cast(time) - / static_cast(DAY)); + double daily = 100.0 * std::sin(boost::math::double_constants::two_pi * static_cast(time) / static_cast(DAY)); times.push_back(time); trend.push_back(weight * daily); } @@ -380,15 +336,13 @@ void CTimeSeriesDecompositionTest::testMinimizeLongComponents() double refinements = 0.0; core_t::TTime lastWeek = 0; - for (std::size_t i = 0u; i < times.size(); ++i) - { + for (std::size_t i = 0u; i < times.size(); ++i) { core_t::TTime time = times[i]; double value = trend[i] + noise[i]; decomposition.addPoint(time, value); - if (time >= lastWeek + WEEK) - { + if (time >= lastWeek + WEEK) { LOG_DEBUG("Processing week"); double sumResidual = 0.0; @@ -397,16 +351,15 @@ void CTimeSeriesDecompositionTest::testMinimizeLongComponents() double maxValue = 0.0; double percentileError = 0.0; - for (core_t::TTime t = lastWeek; t < lastWeek + WEEK; t += HALF_HOUR) - { + for (core_t::TTime t = lastWeek; t < lastWeek + WEEK; t += HALF_HOUR) { TDoubleDoublePr prediction = decomposition.value(t, 70.0); double residual = std::fabs(trend[t / HALF_HOUR] - mean(prediction)); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += std::fabs(trend[t / HALF_HOUR]); maxValue = std::max(maxValue, std::fabs(trend[t / HALF_HOUR])); - percentileError += std::max(std::max(prediction.first - trend[t / HALF_HOUR], - trend[t / HALF_HOUR] - prediction.second), 0.0); + percentileError += + std::max(std::max(prediction.first - trend[t / HALF_HOUR], trend[t / HALF_HOUR] - prediction.second), 0.0); //f.push_back(mean(value)); //r.push_back(residual); } @@ -415,8 +368,7 @@ void CTimeSeriesDecompositionTest::testMinimizeLongComponents() LOG_DEBUG("'max residual' / 'max value' = " << maxResidual / maxValue); LOG_DEBUG("70% error = " << percentileError / sumValue); - if (time >= 2 * WEEK) - { + if (time >= 2 * WEEK) { CPPUNIT_ASSERT(sumResidual < 0.16 * sumValue); CPPUNIT_ASSERT(maxResidual < 0.35 * maxValue); CPPUNIT_ASSERT(percentileError < 0.05 * sumValue); @@ -427,10 +379,8 @@ void CTimeSeriesDecompositionTest::testMinimizeLongComponents() totalMaxValue += maxValue; totalPercentileError += percentileError; - for (const auto &component : decomposition.seasonalComponents()) - { - if (component.initialized() && component.time().period() == WEEK) - { + for (const auto& component : decomposition.seasonalComponents()) { + if (component.initialized() && component.time().period() == WEEK) { double slope = component.valueSpline().absSlope(); meanSlope += slope; LOG_DEBUG("weekly |slope| = " << slope); @@ -463,22 +413,18 @@ void CTimeSeriesDecompositionTest::testMinimizeLongComponents() CPPUNIT_ASSERT(meanSlope < 0.0015); } -void CTimeSeriesDecompositionTest::testWeekend() -{ +void CTimeSeriesDecompositionTest::testWeekend() { LOG_DEBUG("+---------------------------------------------+"); LOG_DEBUG("| CTimeSeriesDecompositionTest::testWeekend |"); LOG_DEBUG("+---------------------------------------------+"); - double weights[] = { 0.1, 0.1, 1.0, 1.0, 1.0, 1.0, 1.0 }; + double weights[] = {0.1, 0.1, 1.0, 1.0, 1.0, 1.0, 1.0}; TTimeVec times; TDoubleVec trend; - for (core_t::TTime time = 0; time < 100 * WEEK; time += HALF_HOUR) - { + for (core_t::TTime time = 0; time < 100 * WEEK; time += HALF_HOUR) { double weight = weights[(time / DAY) % 7]; - double daily = 100.0 * std::sin(boost::math::double_constants::two_pi - * static_cast(time) - / static_cast(DAY)); + double daily = 100.0 * std::sin(boost::math::double_constants::two_pi * static_cast(time) / static_cast(DAY)); times.push_back(time); trend.push_back(weight * daily); } @@ -505,15 +451,13 @@ void CTimeSeriesDecompositionTest::testWeekend() double totalPercentileError = 0.0; core_t::TTime lastWeek = 0; - for (std::size_t i = 0u; i < times.size(); ++i) - { + for (std::size_t i = 0u; i < times.size(); ++i) { core_t::TTime time = times[i]; double value = trend[i] + noise[i]; decomposition.addPoint(time, value); - if (time >= lastWeek + WEEK) - { + if (time >= lastWeek + WEEK) { LOG_DEBUG("Processing week"); double sumResidual = 0.0; @@ -522,16 +466,15 @@ void CTimeSeriesDecompositionTest::testWeekend() double maxValue = 0.0; double percentileError = 0.0; - for (core_t::TTime t = lastWeek; t < lastWeek + WEEK; t += HALF_HOUR) - { + for (core_t::TTime t = lastWeek; t < lastWeek + WEEK; t += HALF_HOUR) { TDoubleDoublePr prediction = decomposition.value(t, 70.0); double residual = std::fabs(trend[t / HALF_HOUR] - mean(prediction)); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += std::fabs(trend[t / HALF_HOUR]); maxValue = std::max(maxValue, std::fabs(trend[t / HALF_HOUR])); - percentileError += std::max(std::max(prediction.first - trend[t / HALF_HOUR], - trend[t / HALF_HOUR] - prediction.second), 0.0); + percentileError += + std::max(std::max(prediction.first - trend[t / HALF_HOUR], trend[t / HALF_HOUR] - prediction.second), 0.0); //f.push_back(mean(value)); //r.push_back(residual); } @@ -540,8 +483,7 @@ void CTimeSeriesDecompositionTest::testWeekend() LOG_DEBUG("'max residual' / 'max value' = " << maxResidual / maxValue); LOG_DEBUG("70% error = " << percentileError / sumValue); - if (time >= 3 * WEEK) - { + if (time >= 3 * WEEK) { CPPUNIT_ASSERT(sumResidual < 0.07 * sumValue); CPPUNIT_ASSERT(maxResidual < 0.15 * maxValue); CPPUNIT_ASSERT(percentileError < 0.03 * sumValue); @@ -571,19 +513,16 @@ void CTimeSeriesDecompositionTest::testWeekend() CPPUNIT_ASSERT(totalPercentileError < 0.012 * totalSumValue); } -void CTimeSeriesDecompositionTest::testSinglePeriodicity() -{ +void CTimeSeriesDecompositionTest::testSinglePeriodicity() { LOG_DEBUG("+-------------------------------------------------------+"); LOG_DEBUG("| CTimeSeriesDecompositionTest::testSinglePeriodicity |"); LOG_DEBUG("+-------------------------------------------------------+"); TTimeVec times; TDoubleVec trend; - for (core_t::TTime time = 0; time < 10 * WEEK + 1; time += HALF_HOUR) - { - double daily = 100.0 + 100.0 * std::sin(boost::math::double_constants::two_pi - * static_cast(time) - / static_cast(DAY)); + for (core_t::TTime time = 0; time < 10 * WEEK + 1; time += HALF_HOUR) { + double daily = + 100.0 + 100.0 * std::sin(boost::math::double_constants::two_pi * static_cast(time) / static_cast(DAY)); times.push_back(time); trend.push_back(daily); } @@ -612,15 +551,13 @@ void CTimeSeriesDecompositionTest::testSinglePeriodicity() double totalPercentileError = 0.0; core_t::TTime lastWeek = 0; - for (std::size_t i = 0u; i < times.size(); ++i) - { + for (std::size_t i = 0u; i < times.size(); ++i) { core_t::TTime time = times[i]; double value = trend[i] + noise[i]; decomposition.addPoint(time, value); - if (time >= lastWeek + WEEK) - { + if (time >= lastWeek + WEEK) { LOG_DEBUG("Processing week"); double sumResidual = 0.0; @@ -629,18 +566,16 @@ void CTimeSeriesDecompositionTest::testSinglePeriodicity() double maxValue = 0.0; double percentileError = 0.0; - for (core_t::TTime t = lastWeek; - t < lastWeek + WEEK; - t += HALF_HOUR) - { + for (core_t::TTime t = lastWeek; t < lastWeek + WEEK; t += HALF_HOUR) { TDoubleDoublePr prediction = decomposition.value(t, 70.0); double residual = std::fabs(trend[t / HALF_HOUR] + noiseMean - mean(prediction)); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += std::fabs(trend[t / HALF_HOUR]); maxValue = std::max(maxValue, std::fabs(trend[t / HALF_HOUR])); - percentileError += std::max(std::max(prediction.first - (trend[t / HALF_HOUR] + noiseMean), - (trend[t / HALF_HOUR] + noiseMean) - prediction.second), 0.0); + percentileError += std::max( + std::max(prediction.first - (trend[t / HALF_HOUR] + noiseMean), (trend[t / HALF_HOUR] + noiseMean) - prediction.second), + 0.0); //f.push_back(mean(value)); //r.push_back(residual); } @@ -649,8 +584,7 @@ void CTimeSeriesDecompositionTest::testSinglePeriodicity() LOG_DEBUG("'max residual' / 'max value' = " << maxResidual / maxValue); LOG_DEBUG("70% error = " << percentileError / sumValue); - if (time >= 1 * WEEK) - { + if (time >= 1 * WEEK) { CPPUNIT_ASSERT(sumResidual < 0.06 * sumValue); CPPUNIT_ASSERT(maxResidual < 0.08 * maxValue); CPPUNIT_ASSERT(percentileError < 0.02 * sumValue); @@ -662,7 +596,7 @@ void CTimeSeriesDecompositionTest::testSinglePeriodicity() totalPercentileError += percentileError; // Check that only the daily component has been initialized. - const TSeasonalComponentVec &components = decomposition.seasonalComponents(); + const TSeasonalComponentVec& components = decomposition.seasonalComponents(); CPPUNIT_ASSERT_EQUAL(std::size_t(1), components.size()); CPPUNIT_ASSERT_EQUAL(DAY, components[0].time().period()); CPPUNIT_ASSERT(components[0].initialized()); @@ -680,7 +614,7 @@ void CTimeSeriesDecompositionTest::testSinglePeriodicity() CPPUNIT_ASSERT(totalPercentileError < 0.01 * totalSumValue); // Check that only the daily component has been initialized. - const TSeasonalComponentVec &components = decomposition.seasonalComponents(); + const TSeasonalComponentVec& components = decomposition.seasonalComponents(); CPPUNIT_ASSERT_EQUAL(std::size_t(1), components.size()); CPPUNIT_ASSERT_EQUAL(DAY, components[0].time().period()); CPPUNIT_ASSERT(components[0].initialized()); @@ -691,31 +625,20 @@ void CTimeSeriesDecompositionTest::testSinglePeriodicity() //file << "plot(t(1:length(r)), r, 'k');\n"; } -void CTimeSeriesDecompositionTest::testSeasonalOnset() -{ +void CTimeSeriesDecompositionTest::testSeasonalOnset() { LOG_DEBUG("+---------------------------------------------------+"); LOG_DEBUG("| CTimeSeriesDecompositionTest::testSeasonalOnset |"); LOG_DEBUG("+---------------------------------------------------+"); - const double daily[] = - { - 0.0, 0.0, 0.0, 0.0, 5.0, 5.0, - 5.0, 40.0, 40.0, 40.0, 30.0, 30.0, - 35.0, 35.0, 40.0, 50.0, 60.0, 80.0, - 80.0, 10.0, 5.0, 0.0, 0.0, 0.0 - }; - const double weekly[] = - { - 0.1, 0.1, 1.2, 1.0, 1.0, 0.9, 1.5 - }; + const double daily[] = {0.0, 0.0, 0.0, 0.0, 5.0, 5.0, 5.0, 40.0, 40.0, 40.0, 30.0, 30.0, + 35.0, 35.0, 40.0, 50.0, 60.0, 80.0, 80.0, 10.0, 5.0, 0.0, 0.0, 0.0}; + const double weekly[] = {0.1, 0.1, 1.2, 1.0, 1.0, 0.9, 1.5}; TTimeVec times; TDoubleVec trend; - for (core_t::TTime time = 0; time < 150 * WEEK + 1; time += HOUR) - { + for (core_t::TTime time = 0; time < 150 * WEEK + 1; time += HOUR) { double value = 0.0; - if (time > 10 * WEEK) - { + if (time > 10 * WEEK) { value += daily[(time % DAY) / HOUR]; value *= weekly[(time % WEEK) / DAY]; } @@ -745,15 +668,13 @@ void CTimeSeriesDecompositionTest::testSeasonalOnset() double totalPercentileError = 0.0; core_t::TTime lastWeek = 0; - for (std::size_t i = 0u; i < times.size(); ++i) - { + for (std::size_t i = 0u; i < times.size(); ++i) { core_t::TTime time = times[i]; double value = trend[i] + noise[i]; decomposition.addPoint(time, value); - if (time >= lastWeek + WEEK) - { + if (time >= lastWeek + WEEK) { LOG_DEBUG("Processing week"); double sumResidual = 0.0; @@ -761,24 +682,20 @@ void CTimeSeriesDecompositionTest::testSeasonalOnset() double sumValue = 0.0; double maxValue = 0.0; double percentileError = 0.0; - for (core_t::TTime t = lastWeek; t < lastWeek + WEEK; t += HOUR) - { + for (core_t::TTime t = lastWeek; t < lastWeek + WEEK; t += HOUR) { TDoubleDoublePr prediction = decomposition.value(t, 70.0); double residual = std::fabs(trend[t / HOUR] - mean(prediction)); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += std::fabs(trend[t / HOUR]); maxValue = std::max(maxValue, std::fabs(trend[t / HOUR])); - percentileError += std::max(std::max(prediction.first - trend[t / HOUR], - trend[t / HOUR] - prediction.second), 0.0); + percentileError += std::max(std::max(prediction.first - trend[t / HOUR], trend[t / HOUR] - prediction.second), 0.0); //f.push_back(mean(value)); //r.push_back(residual); } - LOG_DEBUG("'sum residual' / 'sum value' = " - << (sumResidual == 0.0 ? 0.0 : sumResidual / sumValue)); - LOG_DEBUG("'max residual' / 'max value' = " - << (maxResidual == 0.0 ? 0.0 : maxResidual / maxValue)); + LOG_DEBUG("'sum residual' / 'sum value' = " << (sumResidual == 0.0 ? 0.0 : sumResidual / sumValue)); + LOG_DEBUG("'max residual' / 'max value' = " << (maxResidual == 0.0 ? 0.0 : maxResidual / maxValue)); LOG_DEBUG("70% error = " << (percentileError == 0.0 ? 0.0 : percentileError / sumValue)); totalSumResidual += sumResidual; @@ -787,23 +704,18 @@ void CTimeSeriesDecompositionTest::testSeasonalOnset() totalMaxValue += maxValue; totalPercentileError += percentileError; - const TSeasonalComponentVec &components = decomposition.seasonalComponents(); - if (time > 11 * WEEK) - { + const TSeasonalComponentVec& components = decomposition.seasonalComponents(); + if (time > 11 * WEEK) { // Check that both components have been initialized. CPPUNIT_ASSERT(components.size() > 2); CPPUNIT_ASSERT(components[0].initialized()); CPPUNIT_ASSERT(components[1].initialized()); CPPUNIT_ASSERT(components[2].initialized()); - } - else if (time > 10 * WEEK) - { + } else if (time > 10 * WEEK) { // Check that both components have been initialized. CPPUNIT_ASSERT_EQUAL(std::size_t(1), components.size()); CPPUNIT_ASSERT(components[0].initialized()); - } - else - { + } else { // Check that neither component has been initialized. CPPUNIT_ASSERT(components.empty()); } @@ -824,8 +736,7 @@ void CTimeSeriesDecompositionTest::testSeasonalOnset() CPPUNIT_ASSERT(totalPercentileError < 0.03 * totalSumValue); } -void CTimeSeriesDecompositionTest::testVarianceScale() -{ +void CTimeSeriesDecompositionTest::testVarianceScale() { LOG_DEBUG("+---------------------------------------------------+"); LOG_DEBUG("| CTimeSeriesDecompositionTest::testVarianceScale |"); LOG_DEBUG("+---------------------------------------------------+"); @@ -839,14 +750,11 @@ void CTimeSeriesDecompositionTest::testVarianceScale() core_t::TTime time = 0; maths::CTimeSeriesDecomposition decomposition(0.01, TEN_MINS); - for (std::size_t i = 0u; i < 50; ++i) - { - for (core_t::TTime t = 0; t < DAY; t += TEN_MINS) - { + for (std::size_t i = 0u; i < 50; ++i) { + for (core_t::TTime t = 0; t < DAY; t += TEN_MINS) { double value = 1.0; double variance = 1.0; - if (t >= 3600 && t < 7200) - { + if (t >= 3600 && t < 7200) { value = 5.0; variance = 10.0; } @@ -862,23 +770,18 @@ void CTimeSeriesDecompositionTest::testVarianceScale() TMeanAccumulator error; TMeanAccumulator percentileError; TMeanAccumulator meanScale; - for (core_t::TTime t = 0; t < DAY; t += TEN_MINS) - { + for (core_t::TTime t = 0; t < DAY; t += TEN_MINS) { double variance = 1.0; - if (t >= 3600 && t < 7200) - { + if (t >= 3600 && t < 7200) { variance = 10.0; } double expectedScale = variance / meanVariance; TDoubleDoublePr interval = decomposition.scale(time + t, meanVariance, 70.0); - LOG_DEBUG("time = " << t - << ", expectedScale = " << expectedScale - << ", scale = " << core::CContainerPrinter::print(interval)); + LOG_DEBUG("time = " << t << ", expectedScale = " << expectedScale << ", scale = " << core::CContainerPrinter::print(interval)); double scale = (interval.first + interval.second) / 2.0; error.add(std::fabs(scale - expectedScale)); meanScale.add(scale); - percentileError.add(std::max(std::max(interval.first - expectedScale, - expectedScale - interval.second), 0.0)); + percentileError.add(std::max(std::max(interval.first - expectedScale, expectedScale - interval.second), 0.0)); } LOG_DEBUG("mean error = " << maths::CBasicStatistics::mean(error)); @@ -893,16 +796,11 @@ void CTimeSeriesDecompositionTest::testVarianceScale() core_t::TTime time = 0; maths::CTimeSeriesDecomposition decomposition(0.01, TEN_MINS); - for (std::size_t i = 0u; i < 50; ++i) - { - for (core_t::TTime t = 0; t < DAY; t += TEN_MINS) - { - double value = 5.0 * std::sin(boost::math::double_constants::two_pi - * static_cast(t) - / static_cast(DAY)); + for (std::size_t i = 0u; i < 50; ++i) { + for (core_t::TTime t = 0; t < DAY; t += TEN_MINS) { + double value = 5.0 * std::sin(boost::math::double_constants::two_pi * static_cast(t) / static_cast(DAY)); double variance = 1.0; - if (t >= 3600 && t < 7200) - { + if (t >= 3600 && t < 7200) { variance = 10.0; } TDoubleVec noise; @@ -917,23 +815,18 @@ void CTimeSeriesDecompositionTest::testVarianceScale() TMeanAccumulator error; TMeanAccumulator percentileError; TMeanAccumulator meanScale; - for (core_t::TTime t = 0; t < DAY; t += TEN_MINS) - { + for (core_t::TTime t = 0; t < DAY; t += TEN_MINS) { double variance = 1.0; - if (t >= 3600 && t < 7200) - { + if (t >= 3600 && t < 7200) { variance = 10.0; } double expectedScale = variance / meanVariance; TDoubleDoublePr interval = decomposition.scale(time + t, meanVariance, 70.0); - LOG_DEBUG("time = " << t - << ", expectedScale = " << expectedScale - << ", scale = " << core::CContainerPrinter::print(interval)); + LOG_DEBUG("time = " << t << ", expectedScale = " << expectedScale << ", scale = " << core::CContainerPrinter::print(interval)); double scale = (interval.first + interval.second) / 2.0; error.add(std::fabs(scale - expectedScale)); meanScale.add(scale); - percentileError.add(std::max(std::max(interval.first - expectedScale, - expectedScale - interval.second), 0.0)); + percentileError.add(std::max(std::max(interval.first - expectedScale, expectedScale - interval.second), 0.0)); } LOG_DEBUG("mean error = " << maths::CBasicStatistics::mean(error)); @@ -949,33 +842,28 @@ void CTimeSeriesDecompositionTest::testVarianceScale() TTimeVec times; TDoubleVec trend; - for (core_t::TTime time = 0; time < length; time += HALF_HOUR) - { + for (core_t::TTime time = 0; time < length; time += HALF_HOUR) { times.push_back(time); double x = static_cast(time); - trend.push_back(150.0 + 100.0 * std::sin( boost::math::double_constants::two_pi * x - / static_cast(240 * DAY) - / (1.0 - x / static_cast(2 * length))) - + 10.0 * std::sin( boost::math::double_constants::two_pi * x - / static_cast(DAY))); + trend.push_back(150.0 + + 100.0 * std::sin(boost::math::double_constants::two_pi * x / static_cast(240 * DAY) / + (1.0 - x / static_cast(2 * length))) + + 10.0 * std::sin(boost::math::double_constants::two_pi * x / static_cast(DAY))); } TDoubleVec noise; rng.generateNormalSamples(0.0, 4.0, times.size(), noise); maths::CTimeSeriesDecomposition decomposition(0.024, HALF_HOUR); - for (std::size_t i = 0u; i < times.size(); ++i) - { + for (std::size_t i = 0u; i < times.size(); ++i) { decomposition.addPoint(times[i], trend[i] + 0.3 * noise[i]); } TMeanAccumulator meanScale; double meanVariance = decomposition.meanVariance(); - for (core_t::TTime t = 0; t < DAY; t += TEN_MINS) - { + for (core_t::TTime t = 0; t < DAY; t += TEN_MINS) { TDoubleDoublePr interval = decomposition.scale(times.back() + t, meanVariance, 70.0); - LOG_DEBUG("time = " << t - << ", scale = " << core::CContainerPrinter::print(interval)); + LOG_DEBUG("time = " << t << ", scale = " << core::CContainerPrinter::print(interval)); double scale = (interval.first + interval.second) / 2.0; meanScale.add(scale); } @@ -985,8 +873,7 @@ void CTimeSeriesDecompositionTest::testVarianceScale() } } -void CTimeSeriesDecompositionTest::testSpikeyDataProblemCase() -{ +void CTimeSeriesDecompositionTest::testSpikeyDataProblemCase() { LOG_DEBUG("+-----------------------------------------------------------+"); LOG_DEBUG("| CTimeSeriesDecompositionTest::testSpikeyDataProblemCase |"); LOG_DEBUG("+-----------------------------------------------------------+"); @@ -994,16 +881,10 @@ void CTimeSeriesDecompositionTest::testSpikeyDataProblemCase() TTimeDoublePrVec timeseries; core_t::TTime startTime; core_t::TTime endTime; - CPPUNIT_ASSERT(test::CTimeSeriesTestData::parse("testfiles/spikey_data.csv", - timeseries, - startTime, - endTime, - "^([0-9]+),([0-9\\.]+)")); + CPPUNIT_ASSERT(test::CTimeSeriesTestData::parse("testfiles/spikey_data.csv", timeseries, startTime, endTime, "^([0-9]+),([0-9\\.]+)")); CPPUNIT_ASSERT(!timeseries.empty()); - LOG_DEBUG("timeseries = " << core::CContainerPrinter::print(timeseries.begin(), - timeseries.begin() + 10) - << " ..."); + LOG_DEBUG("timeseries = " << core::CContainerPrinter::print(timeseries.begin(), timeseries.begin() + 10) << " ..."); double totalSumResidual = 0.0; double totalMaxResidual = 0.0; @@ -1012,18 +893,15 @@ void CTimeSeriesDecompositionTest::testSpikeyDataProblemCase() double totalPercentileError = 0.0; maths::CTimeSeriesDecomposition decomposition(0.01, FIVE_MINS); - maths::CNormalMeanPrecConjugate model = - maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.01); + maths::CNormalMeanPrecConjugate model = maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.01); core_t::TTime lastWeek = (startTime / WEEK + 1) * WEEK; TTimeDoublePrVec lastWeekTimeseries; - for (std::size_t i = 0u; i < timeseries.size(); ++i) - { + for (std::size_t i = 0u; i < timeseries.size(); ++i) { core_t::TTime time = timeseries[i].first; double value = timeseries[i].second; - if (time > lastWeek + WEEK) - { + if (time > lastWeek + WEEK) { LOG_DEBUG("Processing week"); double sumResidual = 0.0; @@ -1032,26 +910,22 @@ void CTimeSeriesDecompositionTest::testSpikeyDataProblemCase() double maxValue = 0.0; double percentileError = 0.0; - for (std::size_t j = 0u; j < lastWeekTimeseries.size(); ++j) - { + for (std::size_t j = 0u; j < lastWeekTimeseries.size(); ++j) { TDoubleDoublePr prediction = decomposition.value(lastWeekTimeseries[j].first, 70.0); double residual = std::fabs(lastWeekTimeseries[j].second - mean(prediction)); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += std::fabs(lastWeekTimeseries[j].second); maxValue = std::max(maxValue, std::fabs(lastWeekTimeseries[j].second)); - percentileError += std::max(std::max(prediction.first - lastWeekTimeseries[j].second, - lastWeekTimeseries[j].second - prediction.second), 0.0); + percentileError += std::max( + std::max(prediction.first - lastWeekTimeseries[j].second, lastWeekTimeseries[j].second - prediction.second), 0.0); } - LOG_DEBUG("'sum residual' / 'sum value' = " - << (sumResidual == 0.0 ? 0.0 : sumResidual / sumValue)); - LOG_DEBUG("'max residual' / 'max value' = " - << (maxResidual == 0.0 ? 0.0 : maxResidual / maxValue)); + LOG_DEBUG("'sum residual' / 'sum value' = " << (sumResidual == 0.0 ? 0.0 : sumResidual / sumValue)); + LOG_DEBUG("'max residual' / 'max value' = " << (maxResidual == 0.0 ? 0.0 : maxResidual / maxValue)); LOG_DEBUG("70% error = " << percentileError / sumValue); - if (time >= startTime + WEEK) - { + if (time >= startTime + WEEK) { totalSumResidual += sumResidual; totalMaxResidual += maxResidual; totalSumValue += sumValue; @@ -1062,13 +936,11 @@ void CTimeSeriesDecompositionTest::testSpikeyDataProblemCase() lastWeekTimeseries.clear(); lastWeek += WEEK; } - if (time > lastWeek) - { + if (time > lastWeek) { lastWeekTimeseries.push_back(timeseries[i]); } - if (decomposition.addPoint(time, value)) - { + if (decomposition.addPoint(time, value)) { model.setToNonInformative(0.0, 0.01); } model.addSamples(maths_t::TWeightStyleVec{maths_t::E_SampleCountWeight}, @@ -1094,20 +966,20 @@ void CTimeSeriesDecompositionTest::testSpikeyDataProblemCase() double pMinScaled = 1.0; double pMinUnscaled = 1.0; - for (std::size_t i = 0u; timeseries[i].first < startTime + DAY; ++i) - { + for (std::size_t i = 0u; timeseries[i].first < startTime + DAY; ++i) { core_t::TTime time = timeseries[i].first; double value = timeseries[i].second; double variance = model.marginalLikelihoodVariance(); double lb, ub; maths_t::ETail tail; - model.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, - maths_t::TWeightStyleVec{maths_t::E_SampleSeasonalVarianceScaleWeight}, - TDoubleVec{decomposition.detrend(time, value, 70.0)}, - TDoubleVecVec{TDoubleVec{std::max(decomposition.scale(time, variance, 70.0).second, 0.25)}}, - lb, ub, tail); + model.probabilityOfLessLikelySamples(maths_t::E_TwoSided, + maths_t::TWeightStyleVec{maths_t::E_SampleSeasonalVarianceScaleWeight}, + TDoubleVec{decomposition.detrend(time, value, 70.0)}, + TDoubleVecVec{TDoubleVec{std::max(decomposition.scale(time, variance, 70.0).second, 0.25)}}, + lb, + ub, + tail); double pScaled = (lb + ub) / 2.0; pMinScaled = std::min(pMinScaled, pScaled); @@ -1117,12 +989,13 @@ void CTimeSeriesDecompositionTest::testSpikeyDataProblemCase() //scales.push_back(mean(decomposition.scale(time, variance, 70.0))); //probs.push_back(-std::log(pScaled)); - model.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, - maths_t::TWeightStyleVec(1, maths_t::E_SampleSeasonalVarianceScaleWeight), - TDoubleVec(1, decomposition.detrend(time, value, 70.0)), - TDoubleVecVec(1, TDoubleVec(1, 1.0)), - lb, ub, tail); + model.probabilityOfLessLikelySamples(maths_t::E_TwoSided, + maths_t::TWeightStyleVec(1, maths_t::E_SampleSeasonalVarianceScaleWeight), + TDoubleVec(1, decomposition.detrend(time, value, 70.0)), + TDoubleVecVec(1, TDoubleVec(1, 1.0)), + lb, + ub, + tail); double pUnscaled = (lb + ub) / 2.0; pMinUnscaled = std::min(pMinUnscaled, pUnscaled); } @@ -1142,8 +1015,7 @@ void CTimeSeriesDecompositionTest::testSpikeyDataProblemCase() CPPUNIT_ASSERT(pMinScaled > 1e11 * pMinUnscaled); } -void CTimeSeriesDecompositionTest::testDiurnalProblemCase() -{ +void CTimeSeriesDecompositionTest::testDiurnalProblemCase() { LOG_DEBUG("+--------------------------------------------------------+"); LOG_DEBUG("| CTimeSeriesDecompositionTest::testDiurnalProblemCase |"); LOG_DEBUG("+--------------------------------------------------------+"); @@ -1151,16 +1023,10 @@ void CTimeSeriesDecompositionTest::testDiurnalProblemCase() TTimeDoublePrVec timeseries; core_t::TTime startTime; core_t::TTime endTime; - CPPUNIT_ASSERT(test::CTimeSeriesTestData::parse("testfiles/diurnal.csv", - timeseries, - startTime, - endTime, - "^([0-9]+),([0-9\\.]+)")); + CPPUNIT_ASSERT(test::CTimeSeriesTestData::parse("testfiles/diurnal.csv", timeseries, startTime, endTime, "^([0-9]+),([0-9\\.]+)")); CPPUNIT_ASSERT(!timeseries.empty()); - LOG_DEBUG("timeseries = " << core::CContainerPrinter::print(timeseries.begin(), - timeseries.begin() + 10) - << " ..."); + LOG_DEBUG("timeseries = " << core::CContainerPrinter::print(timeseries.begin(), timeseries.begin() + 10) << " ..."); //std::ofstream file; //file.open("results.m"); @@ -1179,13 +1045,11 @@ void CTimeSeriesDecompositionTest::testDiurnalProblemCase() core_t::TTime lastWeek = (startTime / WEEK + 1) * WEEK; TTimeDoublePrVec lastWeekTimeseries; - for (std::size_t i = 0u; i < timeseries.size(); ++i) - { + for (std::size_t i = 0u; i < timeseries.size(); ++i) { core_t::TTime time = timeseries[i].first; double value = timeseries[i].second; - if (time > lastWeek + WEEK) - { + if (time > lastWeek + WEEK) { LOG_DEBUG("Processing week"); double sumResidual = 0.0; @@ -1194,16 +1058,15 @@ void CTimeSeriesDecompositionTest::testDiurnalProblemCase() double maxValue = 0.0; double percentileError = 0.0; - for (std::size_t j = 0u; j < lastWeekTimeseries.size(); ++j) - { + for (std::size_t j = 0u; j < lastWeekTimeseries.size(); ++j) { TDoubleDoublePr prediction = decomposition.value(lastWeekTimeseries[j].first, 70.0); double residual = std::fabs(lastWeekTimeseries[j].second - mean(prediction)); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += std::fabs(lastWeekTimeseries[j].second); maxValue = std::max(maxValue, std::fabs(lastWeekTimeseries[j].second)); - percentileError += std::max(std::max(prediction.first - lastWeekTimeseries[j].second, - lastWeekTimeseries[j].second - prediction.second), 0.0); + percentileError += std::max( + std::max(prediction.first - lastWeekTimeseries[j].second, lastWeekTimeseries[j].second - prediction.second), 0.0); //times.push_back(lastWeekTimeseries[j].first); //values.push_back(lastWeekTimeseries[j].second); //f.push_back(mean(value)); @@ -1214,8 +1077,7 @@ void CTimeSeriesDecompositionTest::testDiurnalProblemCase() LOG_DEBUG("'max residual' / 'max value' = " << maxResidual / maxValue); LOG_DEBUG("70% error = " << percentileError / sumValue); - if (time >= startTime + 2 * WEEK) - { + if (time >= startTime + 2 * WEEK) { totalSumResidual += sumResidual; totalMaxResidual += maxResidual; totalSumValue += sumValue; @@ -1226,18 +1088,15 @@ void CTimeSeriesDecompositionTest::testDiurnalProblemCase() lastWeekTimeseries.clear(); lastWeek += WEEK; } - if (time > lastWeek) - { + if (time > lastWeek) { lastWeekTimeseries.push_back(timeseries[i]); } decomposition.addPoint(time, value); } - LOG_DEBUG("total 'sum residual' / 'sum value' = " - << totalSumResidual / totalSumValue); - LOG_DEBUG("total 'max residual' / 'max value' = " - << totalMaxResidual / totalMaxValue); + LOG_DEBUG("total 'sum residual' / 'sum value' = " << totalSumResidual / totalSumValue); + LOG_DEBUG("total 'max residual' / 'max value' = " << totalMaxResidual / totalMaxValue); LOG_DEBUG("total 70% error = " << totalPercentileError / totalSumValue); CPPUNIT_ASSERT(totalSumResidual < 0.27 * totalSumValue); @@ -1256,8 +1115,7 @@ void CTimeSeriesDecompositionTest::testDiurnalProblemCase() TMeanAccumulator scale; double variance = decomposition.meanVariance(); core_t::TTime time = maths::CIntegerTools::floor(endTime, DAY); - for (core_t::TTime t = time; t < time + WEEK; t += TEN_MINS) - { + for (core_t::TTime t = time; t < time + WEEK; t += TEN_MINS) { scale.add(mean(decomposition.scale(t, variance, 70.0))); } @@ -1265,8 +1123,7 @@ void CTimeSeriesDecompositionTest::testDiurnalProblemCase() CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, maths::CBasicStatistics::mean(scale), 0.07); } -void CTimeSeriesDecompositionTest::testComplexDiurnalProblemCase() -{ +void CTimeSeriesDecompositionTest::testComplexDiurnalProblemCase() { LOG_DEBUG("+---------------------------------------------------------------+"); LOG_DEBUG("| CTimeSeriesDecompositionTest::testComplexDiurnalProblemCase |"); LOG_DEBUG("+---------------------------------------------------------------+"); @@ -1282,9 +1139,7 @@ void CTimeSeriesDecompositionTest::testComplexDiurnalProblemCase() test::CTimeSeriesTestData::CSV_ISO8601_DATE_FORMAT)); CPPUNIT_ASSERT(!timeseries.empty()); - LOG_DEBUG("timeseries = " << core::CContainerPrinter::print(timeseries.begin(), - timeseries.begin() + 10) - << " ..."); + LOG_DEBUG("timeseries = " << core::CContainerPrinter::print(timeseries.begin(), timeseries.begin() + 10) << " ..."); //std::ofstream file; //file.open("results.m"); @@ -1303,13 +1158,11 @@ void CTimeSeriesDecompositionTest::testComplexDiurnalProblemCase() core_t::TTime lastWeek = (startTime / WEEK + 1) * WEEK; TTimeDoublePrVec lastWeekTimeseries; - for (std::size_t i = 0u; i < timeseries.size(); ++i) - { + for (std::size_t i = 0u; i < timeseries.size(); ++i) { core_t::TTime time = timeseries[i].first; double value = timeseries[i].second; - if (time > lastWeek + WEEK) - { + if (time > lastWeek + WEEK) { LOG_DEBUG("Processing week"); double sumResidual = 0.0; @@ -1318,30 +1171,26 @@ void CTimeSeriesDecompositionTest::testComplexDiurnalProblemCase() double maxValue = 0.0; double percentileError = 0.0; - for (std::size_t j = 0u; j < lastWeekTimeseries.size(); ++j) - { + for (std::size_t j = 0u; j < lastWeekTimeseries.size(); ++j) { TDoubleDoublePr prediction = decomposition.value(lastWeekTimeseries[j].first, 70.0); double residual = std::fabs(lastWeekTimeseries[j].second - mean(prediction)); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += std::fabs(lastWeekTimeseries[j].second); maxValue = std::max(maxValue, std::fabs(lastWeekTimeseries[j].second)); - percentileError += std::max(std::max(prediction.first - lastWeekTimeseries[j].second, - lastWeekTimeseries[j].second - prediction.second), 0.0); + percentileError += std::max( + std::max(prediction.first - lastWeekTimeseries[j].second, lastWeekTimeseries[j].second - prediction.second), 0.0); //times.push_back(lastWeekTimeseries[j].first); //values.push_back(lastWeekTimeseries[j].second); //f.push_back(mean(value)); //r.push_back(residual); } - LOG_DEBUG("'sum residual' / 'sum value' = " - << (sumResidual == 0.0 ? 0.0 : sumResidual / sumValue)); - LOG_DEBUG("'max residual' / 'max value' = " - << (maxResidual == 0.0 ? 0.0 : maxResidual / maxValue)); + LOG_DEBUG("'sum residual' / 'sum value' = " << (sumResidual == 0.0 ? 0.0 : sumResidual / sumValue)); + LOG_DEBUG("'max residual' / 'max value' = " << (maxResidual == 0.0 ? 0.0 : maxResidual / maxValue)); LOG_DEBUG("70% error = " << percentileError / sumValue); - if (time >= startTime + 2 * WEEK) - { + if (time >= startTime + 2 * WEEK) { totalSumResidual += sumResidual; totalMaxResidual += maxResidual; totalSumValue += sumValue; @@ -1352,8 +1201,7 @@ void CTimeSeriesDecompositionTest::testComplexDiurnalProblemCase() lastWeekTimeseries.clear(); lastWeek += WEEK; } - if (time > lastWeek) - { + if (time > lastWeek) { lastWeekTimeseries.push_back(timeseries[i]); } @@ -1378,16 +1226,14 @@ void CTimeSeriesDecompositionTest::testComplexDiurnalProblemCase() //file << "plot(t(1:length(r)), r, 'k');\n"; } -void CTimeSeriesDecompositionTest::testDiurnalPeriodicityWithMissingValues() -{ +void CTimeSeriesDecompositionTest::testDiurnalPeriodicityWithMissingValues() { LOG_DEBUG("+-------------------------------------------------------------------------+"); LOG_DEBUG("| CTimeSeriesDecompositionTest::testDiurnalPeriodicityWithMissingValues |"); LOG_DEBUG("+-------------------------------------------------------------------------+"); test::CRandomNumbers rng; - LOG_DEBUG("Daily Periodic") - { + LOG_DEBUG("Daily Periodic") { //std::ofstream file; //file.open("results.m"); //TDoubleVec times; @@ -1397,24 +1243,17 @@ void CTimeSeriesDecompositionTest::testDiurnalPeriodicityWithMissingValues() TMeanAccumulator error; maths::CTimeSeriesDecomposition decomposition(0.01, HALF_HOUR); core_t::TTime time = 0; - for (std::size_t t = 0u; t < 50; ++t) - { - for (auto value : { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, - 10.0, 10.0, 8.0, 4.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0}) - { - if (value > 0.0) - { + for (std::size_t t = 0u; t < 50; ++t) { + for (auto value : {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 20.0, 18.0, 10.0, 4.0, + 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, 10.0, 10.0, 8.0, 4.0, 3.0, 1.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0}) { + if (value > 0.0) { TDoubleVec noise; rng.generateNormalSamples(10.0, 2.0, 1, noise); decomposition.addPoint(time, value + noise[0]); - if (decomposition.initialized()) - { - error.add(std::fabs( - ( value + noise[0] - - maths::CBasicStatistics::mean(decomposition.value(time, 0.0)))) - / std::fabs(value + noise[0])); + if (decomposition.initialized()) { + error.add(std::fabs((value + noise[0] - maths::CBasicStatistics::mean(decomposition.value(time, 0.0)))) / + std::fabs(value + noise[0])); } //times.push_back(time); //values.push_back(value + noise[0]); @@ -1435,8 +1274,7 @@ void CTimeSeriesDecompositionTest::testDiurnalPeriodicityWithMissingValues() //file << "plot(t(1:length(fe)), fe);\n"; } - LOG_DEBUG("Weekly") - { + LOG_DEBUG("Weekly") { //std::ofstream file; //file.open("results.m"); //TDoubleVec times; @@ -1446,34 +1284,23 @@ void CTimeSeriesDecompositionTest::testDiurnalPeriodicityWithMissingValues() TMeanAccumulator error; maths::CTimeSeriesDecomposition decomposition(0.01, HOUR); core_t::TTime time = 0; - for (std::size_t t = 0u; t < 10; ++t) - { - for (auto value : { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 10.0, 10.0, 8.0, 4.0, 3.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, - 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, - 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, - 10.0, 10.0, 8.0, 4.0, 3.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, - 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, - 10.0, 10.0, 8.0, 4.0, 3.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}) - { - if (value > 0.0) - { + for (std::size_t t = 0u; t < 10; ++t) { + for (auto value : + {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 10.0, 10.0, 8.0, 4.0, 3.0, 1.0, 1.0, 3.0, 0.0, + 0.0, 0.0, 0.0, 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 20.0, 18.0, 10.0, + 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, + 8.0, 9.0, 9.0, 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, 10.0, 10.0, 8.0, 4.0, 3.0, 1.0, + 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, 10.0, 10.0, 8.0, + 4.0, 3.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}) { + if (value > 0.0) { TDoubleVec noise; rng.generateNormalSamples(10.0, 2.0, 1, noise); decomposition.addPoint(time, value + noise[0]); - if (decomposition.initialized()) - { - error.add(std::fabs( - ( value + noise[0] - - maths::CBasicStatistics::mean(decomposition.value(time, 0.0)))) - / std::fabs(value + noise[0])); + if (decomposition.initialized()) { + error.add(std::fabs((value + noise[0] - maths::CBasicStatistics::mean(decomposition.value(time, 0.0)))) / + std::fabs(value + noise[0])); } //times.push_back(time); //values.push_back(value + noise[0]); @@ -1495,8 +1322,7 @@ void CTimeSeriesDecompositionTest::testDiurnalPeriodicityWithMissingValues() } } -void CTimeSeriesDecompositionTest::testLongTermTrend() -{ +void CTimeSeriesDecompositionTest::testLongTermTrend() { LOG_DEBUG("+---------------------------------------------------+"); LOG_DEBUG("| CTimeSeriesDecompositionTest::testLongTermTrend |"); LOG_DEBUG("+---------------------------------------------------+"); @@ -1515,10 +1341,8 @@ void CTimeSeriesDecompositionTest::testLongTermTrend() //TDoubleVec f; //TDoubleVec values; - LOG_DEBUG("Linear Ramp") - { - for (core_t::TTime time = 0; time < length; time += HALF_HOUR) - { + LOG_DEBUG("Linear Ramp") { + for (core_t::TTime time = 0; time < length; time += HALF_HOUR) { times.push_back(time); trend.push_back(5.0 + static_cast(time) / static_cast(DAY)); } @@ -1531,23 +1355,19 @@ void CTimeSeriesDecompositionTest::testLongTermTrend() double totalMaxValue = 0.0; core_t::TTime lastDay = times[0]; - for (std::size_t i = 0u; i < times.size(); ++i) - { + for (std::size_t i = 0u; i < times.size(); ++i) { decomposition.addPoint(times[i], trend[i] + noise[i]); - if (times[i] > lastDay + DAY) - { + if (times[i] > lastDay + DAY) { LOG_DEBUG("Processing day " << times[i] / DAY); - if (decomposition.initialized()) - { + if (decomposition.initialized()) { double sumResidual = 0.0; double maxResidual = 0.0; double sumValue = 0.0; double maxValue = 0.0; - for (std::size_t j = i - 48; j < i; ++j) - { + for (std::size_t j = i - 48; j < i; ++j) { TDoubleDoublePr prediction = decomposition.value(times[j], 70.0); double residual = std::fabs(trend[j] - mean(prediction)); sumResidual += residual; @@ -1556,10 +1376,8 @@ void CTimeSeriesDecompositionTest::testLongTermTrend() maxValue = std::max(maxValue, std::fabs(trend[j])); } - LOG_DEBUG("'sum residual' / 'sum value' = " - << (sumResidual == 0.0 ? 0.0 : sumResidual / sumValue)); - LOG_DEBUG("'max residual' / 'max value' = " - << (maxResidual == 0.0 ? 0.0 : maxResidual / maxValue)); + LOG_DEBUG("'sum residual' / 'sum value' = " << (sumResidual == 0.0 ? 0.0 : sumResidual / sumValue)); + LOG_DEBUG("'max residual' / 'max value' = " << (maxResidual == 0.0 ? 0.0 : maxResidual / maxValue)); totalSumResidual += sumResidual; totalMaxResidual += maxResidual; @@ -1591,23 +1409,16 @@ void CTimeSeriesDecompositionTest::testLongTermTrend() LOG_DEBUG("Saw Tooth Not Periodic"); { - core_t::TTime drops[] = - { - 0, 30 * DAY, 50 * DAY, 60 * DAY, 85 * DAY, 100 * DAY, 115 * DAY, 120 * DAY - }; + core_t::TTime drops[] = {0, 30 * DAY, 50 * DAY, 60 * DAY, 85 * DAY, 100 * DAY, 115 * DAY, 120 * DAY}; times.clear(); trend.clear(); { std::size_t i = 0u; - for (core_t::TTime time = 0; - time < length; - time += HALF_HOUR, (time > drops[i] ? ++i : i)) - { + for (core_t::TTime time = 0; time < length; time += HALF_HOUR, (time > drops[i] ? ++i : i)) { times.push_back(time); - trend.push_back(25.0 * static_cast(time - drops[i-1]) - / static_cast(drops[i] - drops[i-1] + 1)); + trend.push_back(25.0 * static_cast(time - drops[i - 1]) / static_cast(drops[i] - drops[i - 1] + 1)); } } @@ -1619,23 +1430,19 @@ void CTimeSeriesDecompositionTest::testLongTermTrend() double totalMaxValue = 0.0; core_t::TTime lastDay = times[0]; - for (std::size_t i = 0u; i < times.size(); ++i) - { + for (std::size_t i = 0u; i < times.size(); ++i) { decomposition.addPoint(times[i], trend[i] + 0.3 * noise[i]); - if (times[i] > lastDay + DAY) - { + if (times[i] > lastDay + DAY) { LOG_DEBUG("Processing day " << times[i] / DAY); - if (decomposition.initialized()) - { + if (decomposition.initialized()) { double sumResidual = 0.0; double maxResidual = 0.0; double sumValue = 0.0; double maxValue = 0.0; - for (std::size_t j = i - 48; j < i; ++j) - { + for (std::size_t j = i - 48; j < i; ++j) { TDoubleDoublePr prediction = decomposition.value(times[j], 70.0); double residual = std::fabs(trend[j] - mean(prediction)); sumResidual += residual; @@ -1644,10 +1451,8 @@ void CTimeSeriesDecompositionTest::testLongTermTrend() maxValue = std::max(maxValue, std::fabs(trend[j])); } - LOG_DEBUG("'sum residual' / 'sum value' = " - << (sumResidual == 0.0 ? 0.0 : sumResidual / sumValue)); - LOG_DEBUG("'max residual' / 'max value' = " - << (maxResidual == 0.0 ? 0.0 : maxResidual / maxValue)); + LOG_DEBUG("'sum residual' / 'sum value' = " << (sumResidual == 0.0 ? 0.0 : sumResidual / sumValue)); + LOG_DEBUG("'max residual' / 'max value' = " << (maxResidual == 0.0 ? 0.0 : maxResidual / maxValue)); totalSumResidual += sumResidual; totalMaxResidual += maxResidual; @@ -1675,27 +1480,23 @@ void CTimeSeriesDecompositionTest::testLongTermTrend() } } -void CTimeSeriesDecompositionTest::testLongTermTrendAndPeriodicity() -{ +void CTimeSeriesDecompositionTest::testLongTermTrendAndPeriodicity() { LOG_DEBUG("+-----------------------------------------------------------------+"); LOG_DEBUG("| CTimeSeriesDecompositionTest::testLongTermTrendAndPeriodicity |"); LOG_DEBUG("+-----------------------------------------------------------------+"); // Test long term mean reverting component plus daily periodic component. - TTimeVec times; TDoubleVec trend; const core_t::TTime length = 120 * DAY; - for (core_t::TTime time = 0; time < length; time += HALF_HOUR) - { + for (core_t::TTime time = 0; time < length; time += HALF_HOUR) { times.push_back(time); double x = static_cast(time); - trend.push_back(150.0 + 100.0 * std::sin( boost::math::double_constants::two_pi * x - / static_cast(240 * DAY) - / (1.0 - x / static_cast(2 * length))) - + 10.0 * std::sin( boost::math::double_constants::two_pi * x - / static_cast(DAY))); + trend.push_back(150.0 + + 100.0 * std::sin(boost::math::double_constants::two_pi * x / static_cast(240 * DAY) / + (1.0 - x / static_cast(2 * length))) + + 10.0 * std::sin(boost::math::double_constants::two_pi * x / static_cast(DAY))); } test::CRandomNumbers rng; @@ -1715,23 +1516,19 @@ void CTimeSeriesDecompositionTest::testLongTermTrendAndPeriodicity() double totalMaxValue = 0.0; core_t::TTime lastDay = times[0]; - for (std::size_t i = 0u; i < times.size(); ++i) - { + for (std::size_t i = 0u; i < times.size(); ++i) { decomposition.addPoint(times[i], trend[i] + 0.3 * noise[i]); - if (times[i] > lastDay + DAY) - { + if (times[i] > lastDay + DAY) { LOG_DEBUG("Processing day " << times[i] / DAY); - if (decomposition.initialized()) - { + if (decomposition.initialized()) { double sumResidual = 0.0; double maxResidual = 0.0; double sumValue = 0.0; double maxValue = 0.0; - for (std::size_t j = i - 48; j < i; ++j) - { + for (std::size_t j = i - 48; j < i; ++j) { TDoubleDoublePr prediction = decomposition.value(times[j], 70.0); double residual = std::fabs(trend[j] - mean(prediction)); sumResidual += residual; @@ -1740,10 +1537,8 @@ void CTimeSeriesDecompositionTest::testLongTermTrendAndPeriodicity() maxValue = std::max(maxValue, std::fabs(trend[j])); } - LOG_DEBUG("'sum residual' / 'sum value' = " - << (sumResidual == 0.0 ? 0.0 : sumResidual / sumValue)); - LOG_DEBUG("'max residual' / 'max value' = " - << (maxResidual == 0.0 ? 0.0 : maxResidual / maxValue)); + LOG_DEBUG("'sum residual' / 'sum value' = " << (sumResidual == 0.0 ? 0.0 : sumResidual / sumValue)); + LOG_DEBUG("'max residual' / 'max value' = " << (maxResidual == 0.0 ? 0.0 : maxResidual / maxValue)); totalSumResidual += sumResidual; totalMaxResidual += maxResidual; @@ -1772,24 +1567,21 @@ void CTimeSeriesDecompositionTest::testLongTermTrendAndPeriodicity() CPPUNIT_ASSERT(totalMaxResidual / totalMaxValue < 0.05); } -void CTimeSeriesDecompositionTest::testNonDiurnal() -{ +void CTimeSeriesDecompositionTest::testNonDiurnal() { LOG_DEBUG("+------------------------------------------------+"); LOG_DEBUG("| CTimeSeriesDecompositionTest::testNonDiurnal |"); LOG_DEBUG("+------------------------------------------------+"); test::CRandomNumbers rng; - LOG_DEBUG("Hourly") - { + LOG_DEBUG("Hourly") { const core_t::TTime length = 21 * DAY; - double periodic[]{ 10.0, 1.0, 0.5, 0.5, 1.0, 5.0, 2.0, 1.0, 0.5, 0.5, 1.0, 3.0 }; + double periodic[]{10.0, 1.0, 0.5, 0.5, 1.0, 5.0, 2.0, 1.0, 0.5, 0.5, 1.0, 3.0}; TTimeVec times; - TDoubleVec trends[2]{ TDoubleVec(), TDoubleVec(8 * DAY / FIVE_MINS) }; - for (core_t::TTime time = 0; time < length; time += FIVE_MINS) - { + TDoubleVec trends[2]{TDoubleVec(), TDoubleVec(8 * DAY / FIVE_MINS)}; + for (core_t::TTime time = 0; time < length; time += FIVE_MINS) { times.push_back(time); trends[0].push_back(periodic[(time / FIVE_MINS) % 12]); trends[1].push_back(periodic[(time / FIVE_MINS) % 12]); @@ -1799,10 +1591,9 @@ void CTimeSeriesDecompositionTest::testNonDiurnal() rng.generateNormalSamples(0.0, 1.0, trends[1].size(), noise); core_t::TTime startTesting[]{3 * HOUR, 16 * DAY}; - TDoubleVec thresholds[]{ TDoubleVec{0.07, 0.06}, TDoubleVec{0.18, 0.13} }; + TDoubleVec thresholds[]{TDoubleVec{0.07, 0.06}, TDoubleVec{0.18, 0.13}}; - for (std::size_t t = 0u; t < 2; ++t) - { + for (std::size_t t = 0u; t < 2; ++t) { //std::ofstream file; //file.open("results.m"); //TDoubleVec f; @@ -1816,23 +1607,19 @@ void CTimeSeriesDecompositionTest::testNonDiurnal() double totalMaxValue = 0.0; core_t::TTime lastHour = times[0] + 3 * DAY; - for (std::size_t i = 0u; i < times.size(); ++i) - { + for (std::size_t i = 0u; i < times.size(); ++i) { decomposition.addPoint(times[i], trends[t][i] + noise[i]); - if (times[i] > lastHour + HOUR) - { + if (times[i] > lastHour + HOUR) { LOG_DEBUG("Processing hour " << times[i] / HOUR); - if (times[i] > startTesting[t]) - { + if (times[i] > startTesting[t]) { double sumResidual = 0.0; double maxResidual = 0.0; double sumValue = 0.0; double maxValue = 0.0; - for (std::size_t j = i - 12; j < i; ++j) - { + for (std::size_t j = i - 12; j < i; ++j) { TDoubleDoublePr prediction = decomposition.value(times[j], 70.0); double residual = std::fabs(trends[t][j] - mean(prediction)); sumResidual += residual; @@ -1841,10 +1628,8 @@ void CTimeSeriesDecompositionTest::testNonDiurnal() maxValue = std::max(maxValue, std::fabs(trends[t][j])); } - LOG_DEBUG("'sum residual' / 'sum value' = " - << (sumResidual == 0.0 ? 0.0 : sumResidual / sumValue)); - LOG_DEBUG("'max residual' / 'max value' = " - << (maxResidual == 0.0 ? 0.0 : maxResidual / maxValue)); + LOG_DEBUG("'sum residual' / 'sum value' = " << (sumResidual == 0.0 ? 0.0 : sumResidual / sumValue)); + LOG_DEBUG("'max residual' / 'max value' = " << (maxResidual == 0.0 ? 0.0 : maxResidual / maxValue)); totalSumResidual += sumResidual; totalMaxResidual += maxResidual; @@ -1878,12 +1663,11 @@ void CTimeSeriesDecompositionTest::testNonDiurnal() { const core_t::TTime length = 20 * DAY; - double periodic[] = { 10.0, 8.0, 5.5, 2.5, 2.0, 5.0, 2.0, 1.0, 1.5, 3.5, 4.0, 7.0 }; + double periodic[] = {10.0, 8.0, 5.5, 2.5, 2.0, 5.0, 2.0, 1.0, 1.5, 3.5, 4.0, 7.0}; TTimeVec times; TDoubleVec trend; - for (core_t::TTime time = 0; time < length; time += TEN_MINS) - { + for (core_t::TTime time = 0; time < length; time += TEN_MINS) { times.push_back(time); trend.push_back(periodic[(time / 4 / HOUR) % 12]); } @@ -1905,23 +1689,19 @@ void CTimeSeriesDecompositionTest::testNonDiurnal() double totalMaxValue = 0.0; core_t::TTime lastTwoDay = times[0] + 3 * DAY; - for (std::size_t i = 0u; i < times.size(); ++i) - { + for (std::size_t i = 0u; i < times.size(); ++i) { decomposition.addPoint(times[i], trend[i] + noise[i]); - if (times[i] > lastTwoDay + 2 * DAY) - { + if (times[i] > lastTwoDay + 2 * DAY) { LOG_DEBUG("Processing two days " << times[i] / 2 * DAY); - if (times[i] > startTesting) - { + if (times[i] > startTesting) { double sumResidual = 0.0; double maxResidual = 0.0; double sumValue = 0.0; double maxValue = 0.0; - for (std::size_t j = i - 288; j < i; ++j) - { + for (std::size_t j = i - 288; j < i; ++j) { TDoubleDoublePr prediction = decomposition.value(times[j], 70.0); double residual = std::fabs(trend[j] - mean(prediction)); sumResidual += residual; @@ -1930,10 +1710,8 @@ void CTimeSeriesDecompositionTest::testNonDiurnal() maxValue = std::max(maxValue, std::fabs(trend[j])); } - LOG_DEBUG("'sum residual' / 'sum value' = " - << (sumResidual == 0.0 ? 0.0 : sumResidual / sumValue)); - LOG_DEBUG("'max residual' / 'max value' = " - << (maxResidual == 0.0 ? 0.0 : maxResidual / maxValue)); + LOG_DEBUG("'sum residual' / 'sum value' = " << (sumResidual == 0.0 ? 0.0 : sumResidual / sumValue)); + LOG_DEBUG("'max residual' / 'max value' = " << (maxResidual == 0.0 ? 0.0 : maxResidual / maxValue)); totalSumResidual += sumResidual; totalMaxResidual += maxResidual; @@ -1963,8 +1741,7 @@ void CTimeSeriesDecompositionTest::testNonDiurnal() } } -void CTimeSeriesDecompositionTest::testYearly() -{ +void CTimeSeriesDecompositionTest::testYearly() { LOG_DEBUG("+--------------------------------------------+"); LOG_DEBUG("| CTimeSeriesDecompositionTest::testYearly |"); LOG_DEBUG("+--------------------------------------------+"); @@ -1975,26 +1752,19 @@ void CTimeSeriesDecompositionTest::testYearly() maths::CTimeSeriesDecomposition decomposition(0.012, 4 * HOUR); maths::CDecayRateController controller( - maths::CDecayRateController::E_PredictionBias - | maths::CDecayRateController::E_PredictionErrorIncrease, 1); + maths::CDecayRateController::E_PredictionBias | maths::CDecayRateController::E_PredictionErrorIncrease, 1); TDoubleVec noise; core_t::TTime time = 0; - for (/**/; time < 4 * YEAR; time += 4 * HOUR) - { - double trend = 15.0 * (2.0 + std::sin( boost::math::double_constants::two_pi - * static_cast(time) - / static_cast(YEAR))) - + 7.5 * std::sin( boost::math::double_constants::two_pi - * static_cast(time) - / static_cast(DAY)); + for (/**/; time < 4 * YEAR; time += 4 * HOUR) { + double trend = + 15.0 * (2.0 + std::sin(boost::math::double_constants::two_pi * static_cast(time) / static_cast(YEAR))) + + 7.5 * std::sin(boost::math::double_constants::two_pi * static_cast(time) / static_cast(DAY)); rng.generateNormalSamples(0.0, 1.0, 1, noise); decomposition.addPoint(time, trend + noise[0]); - if (decomposition.initialized()) - { + if (decomposition.initialized()) { TDouble1Vec prediction{decomposition.meanValue(time)}; TDouble1Vec predictionError{decomposition.detrend(time, trend, 0.0)}; - double multiplier{controller.multiplier( - prediction, {predictionError}, 4 * HOUR, 1.0, 0.0005)}; + double multiplier{controller.multiplier(prediction, {predictionError}, 4 * HOUR, 1.0, 0.0005)}; decomposition.decayRate(multiplier * decomposition.decayRate()); } } @@ -2007,22 +1777,17 @@ void CTimeSeriesDecompositionTest::testYearly() // Predict over one year and check we get reasonable accuracy. TMeanAccumulator meanError; - for (/**/; time < 5 * YEAR; time += 4 * HOUR) - { - double trend = 15.0 * (2.0 + std::sin( boost::math::double_constants::two_pi - * static_cast(time) - / static_cast(YEAR))) - + 7.5 * std::sin( boost::math::double_constants::two_pi - * static_cast(time) - / static_cast(DAY)); + for (/**/; time < 5 * YEAR; time += 4 * HOUR) { + double trend = + 15.0 * (2.0 + std::sin(boost::math::double_constants::two_pi * static_cast(time) / static_cast(YEAR))) + + 7.5 * std::sin(boost::math::double_constants::two_pi * static_cast(time) / static_cast(DAY)); double prediction = maths::CBasicStatistics::mean(decomposition.value(time, 0.0)); double error = std::fabs((prediction - trend) / trend); meanError.add(error); //times.push_back(time); //values.push_back(trend); //f.push_back(prediction); - if (time / HOUR % 40 == 0) - { + if (time / HOUR % 40 == 0) { LOG_DEBUG("error = " << error); } CPPUNIT_ASSERT(error < 0.1); @@ -2038,8 +1803,7 @@ void CTimeSeriesDecompositionTest::testYearly() CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError) < 0.02); } -void CTimeSeriesDecompositionTest::testCalendar() -{ +void CTimeSeriesDecompositionTest::testCalendar() { LOG_DEBUG("+----------------------------------------------+"); LOG_DEBUG("| CTimeSeriesDecompositionTest::testCalendar |"); LOG_DEBUG("+----------------------------------------------+"); @@ -2047,30 +1811,25 @@ void CTimeSeriesDecompositionTest::testCalendar() // Test that we significantly reduce the error on the last Friday of each // month after estimating the appropriate component. - TTimeVec months{ 2505600, // Fri 30th Jan - 4924800, // Fri 27th Feb - 7344000, // Fri 27th Mar - 9763200, // Fri 24th Apr - 12787200, // Fri 29th May - 15206400, // Fri 26th Jun - 18230400, // Fri 31st Jul - 18316800 }; + TTimeVec months{2505600, // Fri 30th Jan + 4924800, // Fri 27th Feb + 7344000, // Fri 27th Mar + 9763200, // Fri 24th Apr + 12787200, // Fri 29th May + 15206400, // Fri 26th Jun + 18230400, // Fri 31st Jul + 18316800}; core_t::TTime end = months.back(); - TDoubleVec errors{ 5.0, 15.0, 35.0, 32.0, 25.0, 36.0, 22.0, 12.0, 3.0 }; - - auto trend = [&months, &errors](core_t::TTime t) - { - double result = 20.0 + 10.0 * std::sin( boost::math::double_constants::two_pi - * static_cast(t) - / static_cast(DAY)); - auto i = std::lower_bound(months.begin(), months.end(), t - DAY); - if ( t >= *i + 7200 - && t < *i + 7200 + static_cast(errors.size()) * HALF_HOUR) - { - result += errors[(t - (*i + 7200)) / HALF_HOUR]; - } - return result; - }; + TDoubleVec errors{5.0, 15.0, 35.0, 32.0, 25.0, 36.0, 22.0, 12.0, 3.0}; + + auto trend = [&months, &errors](core_t::TTime t) { + double result = 20.0 + 10.0 * std::sin(boost::math::double_constants::two_pi * static_cast(t) / static_cast(DAY)); + auto i = std::lower_bound(months.begin(), months.end(), t - DAY); + if (t >= *i + 7200 && t < *i + 7200 + static_cast(errors.size()) * HALF_HOUR) { + result += errors[(t - (*i + 7200)) / HALF_HOUR]; + } + return result; + }; test::CRandomNumbers rng; @@ -2083,25 +1842,21 @@ void CTimeSeriesDecompositionTest::testCalendar() //TDoubleVec values; TDoubleVec noise; - for (core_t::TTime time = 0, count = 0; time < end; time += HALF_HOUR) - { + for (core_t::TTime time = 0, count = 0; time < end; time += HALF_HOUR) { rng.generateNormalSamples(0.0, 4.0, 1, noise); decomposition.addPoint(time, trend(time) + noise[0]); - if (time - DAY == *std::lower_bound(months.begin(), months.end(), time - DAY)) - { + if (time - DAY == *std::lower_bound(months.begin(), months.end(), time - DAY)) { LOG_DEBUG("*** time = " << time << " ***"); std::size_t largeErrorCount = 0u; - for (core_t::TTime time_ = time - DAY; time_ < time; time_ += TEN_MINS) - { + for (core_t::TTime time_ = time - DAY; time_ < time; time_ += TEN_MINS) { double prediction = maths::CBasicStatistics::mean(decomposition.value(time_)); - double variance = 4.0 * maths::CBasicStatistics::mean(decomposition.scale(time_, 4.0, 0.0)); - double actual = trend(time_); - if (std::fabs(prediction - actual) / std::sqrt(variance) > 3.0) - { + double variance = 4.0 * maths::CBasicStatistics::mean(decomposition.scale(time_, 4.0, 0.0)); + double actual = trend(time_); + if (std::fabs(prediction - actual) / std::sqrt(variance) > 3.0) { LOG_DEBUG(" prediction = " << prediction); LOG_DEBUG(" variance = " << variance); LOG_DEBUG(" trend = " << trend(time_)); @@ -2111,7 +1866,7 @@ void CTimeSeriesDecompositionTest::testCalendar() LOG_DEBUG("large error count = " << largeErrorCount); CPPUNIT_ASSERT(++count > 4 || largeErrorCount > 15); - CPPUNIT_ASSERT( count < 5 || largeErrorCount <= 5); + CPPUNIT_ASSERT(count < 5 || largeErrorCount <= 5); } //times.push_back(time); @@ -2126,16 +1881,12 @@ void CTimeSeriesDecompositionTest::testCalendar() //file << "plot(t, fe);\n"; } -void CTimeSeriesDecompositionTest::testConditionOfTrend() -{ +void CTimeSeriesDecompositionTest::testConditionOfTrend() { LOG_DEBUG("+------------------------------------------------------+"); LOG_DEBUG("| CTimeSeriesDecompositionTest::testConditionOfTrend |"); LOG_DEBUG("+------------------------------------------------------+"); - auto trend = [](core_t::TTime time) - { - return std::pow(static_cast(time) / static_cast(WEEK), 2.0); - }; + auto trend = [](core_t::TTime time) { return std::pow(static_cast(time) / static_cast(WEEK), 2.0); }; const core_t::TTime bucketLength = 6 * HOUR; @@ -2143,19 +1894,16 @@ void CTimeSeriesDecompositionTest::testConditionOfTrend() maths::CTimeSeriesDecomposition decomposition(0.0005, bucketLength); TDoubleVec noise; - for (core_t::TTime time = 0; time < 9 * YEAR; time += 6 * HOUR) - { + for (core_t::TTime time = 0; time < 9 * YEAR; time += 6 * HOUR) { rng.generateNormalSamples(0.0, 4.0, 1, noise); decomposition.addPoint(time, trend(time) + noise[0]); - if (time > 10 * WEEK) - { + if (time > 10 * WEEK) { CPPUNIT_ASSERT(std::fabs(decomposition.detrend(time, trend(time), 0.0)) < 3.0); } } } -void CTimeSeriesDecompositionTest::testSwap() -{ +void CTimeSeriesDecompositionTest::testSwap() { LOG_DEBUG("+------------------------------------------+"); LOG_DEBUG("| CTimeSeriesDecompositionTest::testSwap |"); LOG_DEBUG("+------------------------------------------+"); @@ -2166,11 +1914,8 @@ void CTimeSeriesDecompositionTest::testSwap() TTimeVec times; TDoubleVec trend1; TDoubleVec trend2; - for (core_t::TTime time = 0; time < 10 * WEEK + 1; time += HALF_HOUR) - { - double daily = 15.0 + 10.0 * std::sin(boost::math::double_constants::two_pi - * static_cast(time) - / static_cast(DAY)); + for (core_t::TTime time = 0; time < 10 * WEEK + 1; time += HALF_HOUR) { + double daily = 15.0 + 10.0 * std::sin(boost::math::double_constants::two_pi * static_cast(time) / static_cast(DAY)); times.push_back(time); trend1.push_back(daily); trend2.push_back(2.0 * daily); @@ -2183,10 +1928,9 @@ void CTimeSeriesDecompositionTest::testSwap() maths::CTimeSeriesDecomposition decomposition1(decayRate, bucketLength); maths::CTimeSeriesDecomposition decomposition2(2.0 * decayRate, 2 * bucketLength); - for (std::size_t i = 0u; i < times.size(); i += 2) - { + for (std::size_t i = 0u; i < times.size(); i += 2) { decomposition1.addPoint(times[i], trend1[i] + noise[i]); - decomposition2.addPoint(times[i], trend2[i] + noise[i+1]); + decomposition2.addPoint(times[i], trend2[i] + noise[i + 1]); } uint64_t checksum1 = decomposition1.checksum(); @@ -2200,8 +1944,7 @@ void CTimeSeriesDecompositionTest::testSwap() CPPUNIT_ASSERT_EQUAL(checksum2, decomposition1.checksum()); } -void CTimeSeriesDecompositionTest::testPersist() -{ +void CTimeSeriesDecompositionTest::testPersist() { LOG_DEBUG("+---------------------------------------------+"); LOG_DEBUG("| CTimeSeriesDecompositionTest::testPersist |"); LOG_DEBUG("+---------------------------------------------+"); @@ -2212,11 +1955,8 @@ void CTimeSeriesDecompositionTest::testPersist() TTimeVec times; TDoubleVec trend; - for (core_t::TTime time = 0; time < 10 * WEEK + 1; time += HALF_HOUR) - { - double daily = 15.0 + 10.0 * std::sin(boost::math::double_constants::two_pi - * static_cast(time) - / static_cast(DAY)); + for (core_t::TTime time = 0; time < 10 * WEEK + 1; time += HALF_HOUR) { + double daily = 15.0 + 10.0 * std::sin(boost::math::double_constants::two_pi * static_cast(time) / static_cast(DAY)); times.push_back(time); trend.push_back(daily); } @@ -2227,8 +1967,7 @@ void CTimeSeriesDecompositionTest::testPersist() maths::CTimeSeriesDecomposition origDecomposition(decayRate, bucketLength); - for (std::size_t i = 0u; i < times.size(); ++i) - { + for (std::size_t i = 0u; i < times.size(); ++i) { origDecomposition.addPoint(times[i], trend[i] + noise[i]); } @@ -2246,8 +1985,7 @@ void CTimeSeriesDecompositionTest::testPersist() CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); maths::STimeSeriesDecompositionRestoreParams params{ - decayRate + 0.1, bucketLength, - maths::SDistributionRestoreParams{maths_t::E_ContinuousData, decayRate + 0.1}}; + decayRate + 0.1, bucketLength, maths::SDistributionRestoreParams{maths_t::E_ContinuousData, decayRate + 0.1}}; maths::CTimeSeriesDecomposition restoredDecomposition(params, traverser); @@ -2260,8 +1998,7 @@ void CTimeSeriesDecompositionTest::testPersist() CPPUNIT_ASSERT_EQUAL(origXml, newXml); } -void CTimeSeriesDecompositionTest::testUpgrade() -{ +void CTimeSeriesDecompositionTest::testUpgrade() { LOG_DEBUG("+---------------------------------------------+"); LOG_DEBUG("| CTimeSeriesDecompositionTest::testUpgrade |"); LOG_DEBUG("+---------------------------------------------+"); @@ -2269,27 +2006,24 @@ void CTimeSeriesDecompositionTest::testUpgrade() // Check we can validly upgrade existing state. using TStrVec = std::vector; - auto load = [](const std::string &name, std::string &result) - { - std::ifstream file; - file.open(name); - std::stringbuf buf; - file >> &buf; - result = buf.str(); - }; - auto stringToPair = [](const std::string &str) - { - double first; - double second; - std::size_t n{str.find(",")}; - CPPUNIT_ASSERT(n != std::string::npos); - core::CStringUtils::stringToType(str.substr(0, n), first); - core::CStringUtils::stringToType(str.substr(n + 1), second); - return TDoubleDoublePr{first, second}; - }; - - maths::STimeSeriesDecompositionRestoreParams params{ - 0.1, HALF_HOUR, maths::SDistributionRestoreParams{maths_t::E_ContinuousData, 0.1}}; + auto load = [](const std::string& name, std::string& result) { + std::ifstream file; + file.open(name); + std::stringbuf buf; + file >> &buf; + result = buf.str(); + }; + auto stringToPair = [](const std::string& str) { + double first; + double second; + std::size_t n{str.find(",")}; + CPPUNIT_ASSERT(n != std::string::npos); + core::CStringUtils::stringToType(str.substr(0, n), first); + core::CStringUtils::stringToType(str.substr(n + 1), second); + return TDoubleDoublePr{first, second}; + }; + + maths::STimeSeriesDecompositionRestoreParams params{0.1, HALF_HOUR, maths::SDistributionRestoreParams{maths_t::E_ContinuousData, 0.1}}; std::string empty; LOG_DEBUG("*** Seasonal and Calendar Components ***"); @@ -2330,26 +2064,15 @@ void CTimeSeriesDecompositionTest::testUpgrade() CPPUNIT_ASSERT_DOUBLES_EQUAL(5994.36, meanValue, 0.005); CPPUNIT_ASSERT_DOUBLES_EQUAL(286374.0, meanVariance, 0.5); - for (core_t::TTime time = 60480000, i = 0; - i < static_cast(expectedValues.size()); - time += HALF_HOUR, ++i) - { + for (core_t::TTime time = 60480000, i = 0; i < static_cast(expectedValues.size()); time += HALF_HOUR, ++i) { TDoubleDoublePr expectedValue{stringToPair(expectedValues[i])}; TDoubleDoublePr expectedScale{stringToPair(expectedScales[i])}; TDoubleDoublePr value{decomposition.value(time, 10.0)}; TDoubleDoublePr scale{decomposition.scale(time, 286374.0, 10.0)}; - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedValue.first, - value.first, - 0.005 * std::fabs(expectedValue.first)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedValue.second, - value.second, - 0.005 * std::fabs(expectedValue.second)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedScale.first, - scale.first, - 0.005 * expectedScale.first); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedScale.second, - scale.second, - 0.005 * std::max(expectedScale.second, 0.4)); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedValue.first, value.first, 0.005 * std::fabs(expectedValue.first)); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedValue.second, value.second, 0.005 * std::fabs(expectedValue.second)); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedScale.first, scale.first, 0.005 * expectedScale.first); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedScale.second, scale.second, 0.005 * std::max(expectedScale.second, 0.4)); } } @@ -2395,34 +2118,19 @@ void CTimeSeriesDecompositionTest::testUpgrade() TMeanAccumulator meanValueError; TMeanAccumulator meanScaleError; - for (core_t::TTime time = 10366200, i = 0; - i < static_cast(expectedValues.size()); - time += HALF_HOUR, ++i) - { + for (core_t::TTime time = 10366200, i = 0; i < static_cast(expectedValues.size()); time += HALF_HOUR, ++i) { TDoubleDoublePr expectedValue{stringToPair(expectedValues[i])}; TDoubleDoublePr expectedScale{stringToPair(expectedScales[i])}; TDoubleDoublePr value{decomposition.value(time, 10.0)}; TDoubleDoublePr scale{decomposition.scale(time, 96.1654, 10.0)}; - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedValue.first, - value.first, - 0.1 * std::fabs(expectedValue.first)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedValue.second, - value.second, - 0.1 * std::fabs(expectedValue.second)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedScale.first, - scale.first, - 0.3 * expectedScale.first); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedScale.second, - scale.second, - 0.3 * expectedScale.second); - meanValueError.add( std::fabs(expectedValue.first - value.first) - / std::fabs(expectedValue.first)); - meanValueError.add( std::fabs(expectedValue.second - value.second) - / std::fabs(expectedValue.second)); - meanScaleError.add( std::fabs(expectedScale.first - scale.first) - / expectedScale.first); - meanScaleError.add( std::fabs(expectedScale.second - scale.second) - / expectedScale.second); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedValue.first, value.first, 0.1 * std::fabs(expectedValue.first)); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedValue.second, value.second, 0.1 * std::fabs(expectedValue.second)); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedScale.first, scale.first, 0.3 * expectedScale.first); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedScale.second, scale.second, 0.3 * expectedScale.second); + meanValueError.add(std::fabs(expectedValue.first - value.first) / std::fabs(expectedValue.first)); + meanValueError.add(std::fabs(expectedValue.second - value.second) / std::fabs(expectedValue.second)); + meanScaleError.add(std::fabs(expectedScale.first - scale.first) / expectedScale.first); + meanScaleError.add(std::fabs(expectedScale.second - scale.second) / expectedScale.second); } LOG_DEBUG("Mean value error = " << maths::CBasicStatistics::mean(meanValueError)); @@ -2432,71 +2140,50 @@ void CTimeSeriesDecompositionTest::testUpgrade() } } - -CppUnit::Test *CTimeSeriesDecompositionTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CTimeSeriesDecompositionTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTimeSeriesDecompositionTest::testSuperpositionOfSines", - &CTimeSeriesDecompositionTest::testSuperpositionOfSines) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTimeSeriesDecompositionTest::testDistortedPeriodic", - &CTimeSeriesDecompositionTest::testDistortedPeriodic) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTimeSeriesDecompositionTest::testMinimizeLongComponents", - &CTimeSeriesDecompositionTest::testMinimizeLongComponents) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTimeSeriesDecompositionTest::testWeekend", - &CTimeSeriesDecompositionTest::testWeekend) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTimeSeriesDecompositionTest::testSinglePeriodicity", - &CTimeSeriesDecompositionTest::testSinglePeriodicity) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTimeSeriesDecompositionTest::testSeasonalOnset", - &CTimeSeriesDecompositionTest::testSeasonalOnset) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTimeSeriesDecompositionTest::testVarianceScale", - &CTimeSeriesDecompositionTest::testVarianceScale) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTimeSeriesDecompositionTest::testSpikeyDataProblemCase", - &CTimeSeriesDecompositionTest::testSpikeyDataProblemCase) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTimeSeriesDecompositionTest::testDiurnalProblemCase", - &CTimeSeriesDecompositionTest::testDiurnalProblemCase) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTimeSeriesDecompositionTest::testComplexDiurnalProblemCase", - &CTimeSeriesDecompositionTest::testComplexDiurnalProblemCase) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTimeSeriesDecompositionTest::testDiurnalPeriodicityWithMissingValues", - &CTimeSeriesDecompositionTest::testDiurnalPeriodicityWithMissingValues) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTimeSeriesDecompositionTest::testLongTermTrend", - &CTimeSeriesDecompositionTest::testLongTermTrend) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTimeSeriesDecompositionTest::testLongTermTrendAndPeriodicity", - &CTimeSeriesDecompositionTest::testLongTermTrendAndPeriodicity) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTimeSeriesDecompositionTest::testNonDiurnal", - &CTimeSeriesDecompositionTest::testNonDiurnal) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTimeSeriesDecompositionTest::testYearly", - &CTimeSeriesDecompositionTest::testYearly) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTimeSeriesDecompositionTest::testCalendar", - &CTimeSeriesDecompositionTest::testCalendar) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTimeSeriesDecompositionTest::testConditionOfTrend", - &CTimeSeriesDecompositionTest::testConditionOfTrend) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTimeSeriesDecompositionTest::testSwap", - &CTimeSeriesDecompositionTest::testSwap) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTimeSeriesDecompositionTest::testPersist", - &CTimeSeriesDecompositionTest::testPersist) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTimeSeriesDecompositionTest::testUpgrade", - &CTimeSeriesDecompositionTest::testUpgrade) ); +CppUnit::Test* CTimeSeriesDecompositionTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CTimeSeriesDecompositionTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesDecompositionTest::testSuperpositionOfSines", + &CTimeSeriesDecompositionTest::testSuperpositionOfSines)); + suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesDecompositionTest::testDistortedPeriodic", + &CTimeSeriesDecompositionTest::testDistortedPeriodic)); + suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesDecompositionTest::testMinimizeLongComponents", + &CTimeSeriesDecompositionTest::testMinimizeLongComponents)); + suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesDecompositionTest::testWeekend", + &CTimeSeriesDecompositionTest::testWeekend)); + suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesDecompositionTest::testSinglePeriodicity", + &CTimeSeriesDecompositionTest::testSinglePeriodicity)); + suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesDecompositionTest::testSeasonalOnset", + &CTimeSeriesDecompositionTest::testSeasonalOnset)); + suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesDecompositionTest::testVarianceScale", + &CTimeSeriesDecompositionTest::testVarianceScale)); + suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesDecompositionTest::testSpikeyDataProblemCase", + &CTimeSeriesDecompositionTest::testSpikeyDataProblemCase)); + suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesDecompositionTest::testDiurnalProblemCase", + &CTimeSeriesDecompositionTest::testDiurnalProblemCase)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTimeSeriesDecompositionTest::testComplexDiurnalProblemCase", &CTimeSeriesDecompositionTest::testComplexDiurnalProblemCase)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CTimeSeriesDecompositionTest::testDiurnalPeriodicityWithMissingValues", + &CTimeSeriesDecompositionTest::testDiurnalPeriodicityWithMissingValues)); + suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesDecompositionTest::testLongTermTrend", + &CTimeSeriesDecompositionTest::testLongTermTrend)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTimeSeriesDecompositionTest::testLongTermTrendAndPeriodicity", &CTimeSeriesDecompositionTest::testLongTermTrendAndPeriodicity)); + suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesDecompositionTest::testNonDiurnal", + &CTimeSeriesDecompositionTest::testNonDiurnal)); + suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesDecompositionTest::testYearly", + &CTimeSeriesDecompositionTest::testYearly)); + suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesDecompositionTest::testCalendar", + &CTimeSeriesDecompositionTest::testCalendar)); + suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesDecompositionTest::testConditionOfTrend", + &CTimeSeriesDecompositionTest::testConditionOfTrend)); + suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesDecompositionTest::testSwap", + &CTimeSeriesDecompositionTest::testSwap)); + suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesDecompositionTest::testPersist", + &CTimeSeriesDecompositionTest::testPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesDecompositionTest::testUpgrade", + &CTimeSeriesDecompositionTest::testUpgrade)); return suiteOfTests; } diff --git a/lib/maths/unittest/CTimeSeriesDecompositionTest.h b/lib/maths/unittest/CTimeSeriesDecompositionTest.h index 64b2e0f152..56594c5051 100644 --- a/lib/maths/unittest/CTimeSeriesDecompositionTest.h +++ b/lib/maths/unittest/CTimeSeriesDecompositionTest.h @@ -9,31 +9,30 @@ #include -class CTimeSeriesDecompositionTest : public CppUnit::TestFixture -{ - public: - void testSuperpositionOfSines(); - void testDistortedPeriodic(); - void testMinimizeLongComponents(); - void testWeekend(); - void testSinglePeriodicity(); - void testSeasonalOnset(); - void testVarianceScale(); - void testSpikeyDataProblemCase(); - void testDiurnalProblemCase(); - void testComplexDiurnalProblemCase(); - void testDiurnalPeriodicityWithMissingValues(); - void testLongTermTrend(); - void testLongTermTrendAndPeriodicity(); - void testNonDiurnal(); - void testYearly(); - void testCalendar(); - void testConditionOfTrend(); - void testSwap(); - void testPersist(); - void testUpgrade(); +class CTimeSeriesDecompositionTest : public CppUnit::TestFixture { +public: + void testSuperpositionOfSines(); + void testDistortedPeriodic(); + void testMinimizeLongComponents(); + void testWeekend(); + void testSinglePeriodicity(); + void testSeasonalOnset(); + void testVarianceScale(); + void testSpikeyDataProblemCase(); + void testDiurnalProblemCase(); + void testComplexDiurnalProblemCase(); + void testDiurnalPeriodicityWithMissingValues(); + void testLongTermTrend(); + void testLongTermTrendAndPeriodicity(); + void testNonDiurnal(); + void testYearly(); + void testCalendar(); + void testConditionOfTrend(); + void testSwap(); + void testPersist(); + void testUpgrade(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CTimeSeriesDecompositionTest_h diff --git a/lib/maths/unittest/CTimeSeriesModelTest.cc b/lib/maths/unittest/CTimeSeriesModelTest.cc index e06f6ae399..45376c9511 100644 --- a/lib/maths/unittest/CTimeSeriesModelTest.cc +++ b/lib/maths/unittest/CTimeSeriesModelTest.cc @@ -14,16 +14,16 @@ #include #include #include -#include #include +#include #include #include -#include #include #include #include #include #include +#include #include #include @@ -36,8 +36,7 @@ using namespace ml; -namespace -{ +namespace { using namespace handy_typedefs; using TBool2Vec = core::CSmallVector; using TDoubleVec = std::vector; @@ -64,73 +63,50 @@ const double DECAY_RATE{0.0005}; const std::size_t TAG{0u}; //! \brief Implements the allocator for new correlate priors. -class CTimeSeriesCorrelateModelAllocator : public maths::CTimeSeriesCorrelateModelAllocator -{ - public: - //! Check if we can still allocate any correlations. - virtual bool areAllocationsAllowed() const - { - return true; - } +class CTimeSeriesCorrelateModelAllocator : public maths::CTimeSeriesCorrelateModelAllocator { +public: + //! Check if we can still allocate any correlations. + virtual bool areAllocationsAllowed() const { return true; } - //! Check if \p correlations exceeds the memory limit. - virtual bool exceedsLimit(std::size_t /*correlations*/) const - { - return false; - } + //! Check if \p correlations exceeds the memory limit. + virtual bool exceedsLimit(std::size_t /*correlations*/) const { return false; } - //! Get the maximum number of correlations we should model. - virtual std::size_t maxNumberCorrelations() const - { - return 5000; - } + //! Get the maximum number of correlations we should model. + virtual std::size_t maxNumberCorrelations() const { return 5000; } - //! Get the chunk size in which to allocate correlations. - virtual std::size_t chunkSize() const - { - return 500; - } + //! Get the chunk size in which to allocate correlations. + virtual std::size_t chunkSize() const { return 500; } - //! Create a new prior for a correlation model. - virtual TMultivariatePriorPtr newPrior() const - { - return TMultivariatePriorPtr( - maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData, - DECAY_RATE).clone()); - } + //! Create a new prior for a correlation model. + virtual TMultivariatePriorPtr newPrior() const { + return TMultivariatePriorPtr( + maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData, DECAY_RATE).clone()); + } }; -maths::CModelParams modelParams(core_t::TTime bucketLength) -{ +maths::CModelParams modelParams(core_t::TTime bucketLength) { using TTimeDoubleMap = std::map; static TTimeDoubleMap learnRates; learnRates[bucketLength] = static_cast(bucketLength) / 1800.0; double minimumSeasonalVarianceScale{MINIMUM_SEASONAL_SCALE}; - return maths::CModelParams{bucketLength, - learnRates[bucketLength], DECAY_RATE, - minimumSeasonalVarianceScale, - 12 * core::constants::HOUR, core::constants::DAY}; + return maths::CModelParams{ + bucketLength, learnRates[bucketLength], DECAY_RATE, minimumSeasonalVarianceScale, 12 * core::constants::HOUR, core::constants::DAY}; } <<<<<<< HEAD -maths::CModelAddSamplesParams addSampleParams(double interval, - const maths_t::TWeightStyleVec &weightStyles, - const TDouble2Vec4VecVec &weights) +maths::CModelAddSamplesParams +addSampleParams(double interval, const maths_t::TWeightStyleVec& weightStyles, const TDouble2Vec4VecVec& weights) ======= maths::CNormalMeanPrecConjugate univariateNormal() >>>>>>> d4e4cca70edae4500cc1535a2da582935074a25b { maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(interval) - .weightStyles(weightStyles) - .trendWeights(weights) - .priorWeights(weights); + params.integer(false).propagationInterval(interval).weightStyles(weightStyles).trendWeights(weights).priorWeights(weights); return params; } <<<<<<< HEAD -maths::CModelAddSamplesParams addSampleParams(const TDouble2Vec4VecVec &weights) +maths::CModelAddSamplesParams addSampleParams(const TDouble2Vec4VecVec& weights) ======= maths::CLogNormalMeanPrecConjugate univariateLogNormal() >>>>>>> d4e4cca70edae4500cc1535a2da582935074a25b @@ -138,25 +114,22 @@ maths::CLogNormalMeanPrecConjugate univariateLogNormal() return addSampleParams(1.0, maths::CConstantWeights::COUNT, weights); } -maths::CModelProbabilityParams computeProbabilityParams(const TDouble2Vec4Vec &weight) -{ +maths::CModelProbabilityParams computeProbabilityParams(const TDouble2Vec4Vec& weight) { maths::CModelProbabilityParams params; params.addCalculation(maths_t::E_TwoSided) - .seasonalConfidenceInterval(50.0) - .addBucketEmpty({false}) - .weightStyles(maths::CConstantWeights::COUNT) - .addWeights(weight); + .seasonalConfidenceInterval(50.0) + .addBucketEmpty({false}) + .weightStyles(maths::CConstantWeights::COUNT) + .addWeights(weight); return params; } <<<<<<< HEAD -maths::CNormalMeanPrecConjugate univariateNormal(double decayRate = DECAY_RATE) -{ +maths::CNormalMeanPrecConjugate univariateNormal(double decayRate = DECAY_RATE) { return maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, decayRate); } -maths::CLogNormalMeanPrecConjugate univariateLogNormal(double decayRate = DECAY_RATE) -{ +maths::CLogNormalMeanPrecConjugate univariateLogNormal(double decayRate = DECAY_RATE) { return maths::CLogNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.0, decayRate); } @@ -165,10 +138,8 @@ maths::CMultimodalPrior univariateMultimodal(double decayRate = DECAY_RATE) maths::CMultimodalPrior univariateMultimodal() >>>>>>> d4e4cca70edae4500cc1535a2da582935074a25b { - maths::CXMeansOnline1d clusterer{maths_t::E_ContinuousData, - maths::CAvailableModeDistributions::ALL, - maths_t::E_ClustersFractionWeight, - decayRate}; + maths::CXMeansOnline1d clusterer{ + maths_t::E_ContinuousData, maths::CAvailableModeDistributions::ALL, maths_t::E_ClustersFractionWeight, decayRate}; return maths::CMultimodalPrior{maths_t::E_ContinuousData, clusterer, univariateNormal(), decayRate}; } @@ -187,49 +158,38 @@ maths::CMultivariateMultimodalPrior<3> multivariateMultimodal(double decayRate = maths::CMultivariateMultimodalPrior<3> multivariateMultimodal() >>>>>>> d4e4cca70edae4500cc1535a2da582935074a25b { - maths::CXMeansOnline clusterer(maths_t::E_ContinuousData, - maths_t::E_ClustersFractionWeight, - decayRate); + maths::CXMeansOnline clusterer(maths_t::E_ContinuousData, maths_t::E_ClustersFractionWeight, decayRate); return maths::CMultivariateMultimodalPrior<3>( - maths_t::E_ContinuousData, - clusterer, - maths::CMultivariateNormalConjugate<3>::nonInformativePrior(maths_t::E_ContinuousData, decayRate), - decayRate); + maths_t::E_ContinuousData, + clusterer, + maths::CMultivariateNormalConjugate<3>::nonInformativePrior(maths_t::E_ContinuousData, decayRate), + decayRate); } -maths::CUnivariateTimeSeriesModel::TDecayRateController2Ary decayRateControllers(std::size_t dimension) -{ - return {{maths::CDecayRateController( maths::CDecayRateController::E_PredictionBias - | maths::CDecayRateController::E_PredictionErrorIncrease, - dimension), - maths::CDecayRateController( maths::CDecayRateController::E_PredictionBias - | maths::CDecayRateController::E_PredictionErrorIncrease - | maths::CDecayRateController::E_PredictionErrorDecrease, +maths::CUnivariateTimeSeriesModel::TDecayRateController2Ary decayRateControllers(std::size_t dimension) { + return {{maths::CDecayRateController( + maths::CDecayRateController::E_PredictionBias | maths::CDecayRateController::E_PredictionErrorIncrease, dimension), + maths::CDecayRateController(maths::CDecayRateController::E_PredictionBias | + maths::CDecayRateController::E_PredictionErrorIncrease | + maths::CDecayRateController::E_PredictionErrorDecrease, dimension)}}; } void reinitializePrior(double learnRate, - const maths::CMultivariateTimeSeriesModel &model, - TDecompositionPtr10Vec &trends, - maths::CMultivariatePrior &prior, - TDecayRateController2Ary *controllers = 0) -{ + const maths::CMultivariateTimeSeriesModel& model, + TDecompositionPtr10Vec& trends, + maths::CMultivariatePrior& prior, + TDecayRateController2Ary* controllers = 0) { prior.setToNonInformative(0.0, prior.decayRate()); TDouble10Vec1Vec detrended_{TDouble10Vec(3)}; - for (const auto &value : model.slidingWindow()) - { - for (std::size_t i = 0u; i < value.second.size(); ++i) - { + for (const auto& value : model.slidingWindow()) { + for (std::size_t i = 0u; i < value.second.size(); ++i) { detrended_[0][i] = trends[i]->detrend(value.first, value.second[i], 0.0); } - prior.addSamples(maths::CConstantWeights::COUNT, - detrended_, - {{TDouble10Vec(value.second.size(), learnRate)}}); + prior.addSamples(maths::CConstantWeights::COUNT, detrended_, {{TDouble10Vec(value.second.size(), learnRate)}}); } - if (controllers) - { - for (auto &trend : trends) - { + if (controllers) { + for (auto& trend : trends) { trend->decayRate(trend->decayRate() / (*controllers)[0].multiplier()); } prior.decayRate(prior.decayRate() / (*controllers)[1].multiplier()); @@ -239,8 +199,7 @@ void reinitializePrior(double learnRate, } } -void CTimeSeriesModelTest::testClone() -{ +void CTimeSeriesModelTest::testClone() { LOG_DEBUG("+-----------------------------------+"); LOG_DEBUG("| CTimeSeriesModelTest::testClone |"); LOG_DEBUG("+-----------------------------------+"); @@ -255,19 +214,15 @@ void CTimeSeriesModelTest::testClone() maths::CTimeSeriesDecomposition trend{DECAY_RATE, bucketLength}; auto controllers = decayRateControllers(1); maths::CTimeSeriesCorrelations correlations{MINIMUM_SIGNIFICANT_CORRELATION, DECAY_RATE}; - maths::CUnivariateTimeSeriesModel model(modelParams(bucketLength), 1, - trend, univariateNormal(), - &controllers); + maths::CUnivariateTimeSeriesModel model(modelParams(bucketLength), 1, trend, univariateNormal(), &controllers); model.modelCorrelations(correlations); TDoubleVec samples; rng.generateNormalSamples(1.0, 4.0, 1000, samples); TDouble2Vec4VecVec weights{{{1.0}}}; core_t::TTime time{0}; - for (auto sample : samples) - { - model.addSamples(addSampleParams(weights), - {core::make_triple(time, TDouble2Vec{sample}, TAG)}); + for (auto sample : samples) { + model.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec{sample}, TAG)}); time += bucketLength; } @@ -281,8 +236,7 @@ void CTimeSeriesModelTest::testClone() { maths::CTimeSeriesDecomposition trend{DECAY_RATE, bucketLength}; auto controllers = decayRateControllers(3); - maths::CMultivariateTimeSeriesModel model(modelParams(bucketLength), - trend, multivariateNormal(), &controllers); + maths::CMultivariateTimeSeriesModel model(modelParams(bucketLength), trend, multivariateNormal(), &controllers); TDoubleVec mean{13.0, 9.0, 10.0}; TDoubleVecVec covariance{{3.5, 2.9, 0.5}, {2.9, 3.6, 0.1}, {0.5, 0.1, 2.1}}; @@ -291,10 +245,8 @@ void CTimeSeriesModelTest::testClone() TDouble2Vec4VecVec weights{maths::CConstantWeights::unit(3)}; core_t::TTime time{0}; - for (const auto &sample : samples) - { - model.addSamples(addSampleParams(weights), - {core::make_triple(time, TDouble2Vec(sample), TAG)}); + for (const auto& sample : samples) { + model.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec(sample), TAG)}); time += bucketLength; } @@ -309,8 +261,7 @@ void CTimeSeriesModelTest::testClone() } } -void CTimeSeriesModelTest::testMode() -{ +void CTimeSeriesModelTest::testMode() { LOG_DEBUG("+----------------------------------+"); LOG_DEBUG("| CTimeSeriesModelTest::testMode |"); LOG_DEBUG("+----------------------------------+"); @@ -332,8 +283,7 @@ void CTimeSeriesModelTest::testMode() maths::CUnivariateTimeSeriesModel model{modelParams(bucketLength), 0, trend, prior}; core_t::TTime time{0}; - for (auto sample : samples) - { + for (auto sample : samples) { trend.addPoint(time, sample); TDouble1Vec sample_{trend.detrend(time, sample, 0.0)}; prior.addSamples(maths::CConstantWeights::COUNT, sample_, maths::CConstantWeights::SINGLE_UNIT); @@ -344,14 +294,11 @@ void CTimeSeriesModelTest::testMode() TDouble2Vec4Vec weight{{1.0}}; TDouble2Vec4VecVec weights{weight}; time = 0; - for (auto sample : samples) - { - model.addSamples(addSampleParams(weights), - {core::make_triple(time, TDouble2Vec{sample}, TAG)}); + for (auto sample : samples) { + model.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec{sample}, TAG)}); time += bucketLength; } - double expectedMode{ maths::CBasicStatistics::mean(trend.value(time)) - + prior.marginalLikelihoodMode()}; + double expectedMode{maths::CBasicStatistics::mean(trend.value(time)) + prior.marginalLikelihoodMode()}; TDouble2Vec mode(model.mode(time, maths::CConstantWeights::COUNT, weight)); LOG_DEBUG("expected mode = " << expectedMode); @@ -371,14 +318,12 @@ void CTimeSeriesModelTest::testMode() maths::CUnivariateTimeSeriesModel model{modelParams(bucketLength), 0, trend, prior}; core_t::TTime time{0}; - for (auto &sample : samples) - { - sample += 20.0 + 10.0 * std::sin( boost::math::double_constants::two_pi + for (auto& sample : samples) { + sample += 20.0 + 10.0 * std::sin(boost::math::double_constants::two_pi <<<<<<< HEAD * static_cast(time) / 86400.0); ======= - * static_cast(time) - / static_cast(core::constants::DAY)); + * static_cast(time) / static_cast(core::constants::DAY)); >>>>>>> d4e4cca70edae4500cc1535a2da582935074a25b time += bucketLength; } @@ -386,18 +331,12 @@ void CTimeSeriesModelTest::testMode() TDouble2Vec4Vec weight{{1.0}}; TDouble2Vec4VecVec weights{weight}; time = 0; - for (auto sample : samples) - { - model.addSamples(addSampleParams(weights), - {core::make_triple(time, TDouble2Vec{sample}, TAG)}); - if (trend.addPoint(time, sample)) - { + for (auto sample : samples) { + model.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec{sample}, TAG)}); + if (trend.addPoint(time, sample)) { prior.setToNonInformative(0.0, DECAY_RATE); - for (const auto &value : model.slidingWindow()) - { - prior.addSamples(maths::CConstantWeights::COUNT, - {trend.detrend(value.first, value.second, 0.0)}, - {{learnRate}}); + for (const auto& value : model.slidingWindow()) { + prior.addSamples(maths::CConstantWeights::COUNT, {trend.detrend(value.first, value.second, 0.0)}, {{learnRate}}); } } TDouble1Vec sample_{trend.detrend(time, sample, 0.0)}; @@ -406,8 +345,7 @@ void CTimeSeriesModelTest::testMode() time += bucketLength; } - double expectedMode{ maths::CBasicStatistics::mean(trend.value(time)) - + prior.marginalLikelihoodMode()}; + double expectedMode{maths::CBasicStatistics::mean(trend.value(time)) + prior.marginalLikelihoodMode()}; TDouble2Vec mode(model.mode(time, maths::CConstantWeights::COUNT, weight)); LOG_DEBUG("expected mode = " << expectedMode); @@ -430,37 +368,28 @@ void CTimeSeriesModelTest::testMode() maths::CMultivariateTimeSeriesModel model{modelParams(bucketLength), *trends[0], prior}; core_t::TTime time{0}; - for (const auto &sample : samples) - { + for (const auto& sample : samples) { TDouble10Vec1Vec detrended{TDouble10Vec(3)}; - for (std::size_t i = 0u; i < sample.size(); ++i) - { + for (std::size_t i = 0u; i < sample.size(); ++i) { trends[i]->addPoint(time, sample[i]); detrended[0][i] = trends[i]->detrend(time, sample[i], 0.0); } - prior.addSamples(maths::CConstantWeights::COUNT, - detrended, - maths::CConstantWeights::singleUnit(3)); + prior.addSamples(maths::CConstantWeights::COUNT, detrended, maths::CConstantWeights::singleUnit(3)); prior.propagateForwardsByTime(1.0); } TDouble2Vec4VecVec weights{maths::CConstantWeights::unit(3)}; time = 0; - for (const auto &sample : samples) - { - model.addSamples(addSampleParams(weights), - {core::make_triple(time, TDouble2Vec(sample), TAG)}); + for (const auto& sample : samples) { + model.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec(sample), TAG)}); time += bucketLength; } - TDouble2Vec expectedMode(prior.marginalLikelihoodMode(maths::CConstantWeights::COUNT, - maths::CConstantWeights::unit(3))); - for (std::size_t i = 0u; i < trends.size(); ++i) - { + TDouble2Vec expectedMode( + prior.marginalLikelihoodMode(maths::CConstantWeights::COUNT, maths::CConstantWeights::unit(3))); + for (std::size_t i = 0u; i < trends.size(); ++i) { expectedMode[i] += maths::CBasicStatistics::mean(trends[i]->value(time)); } - TDouble2Vec mode(model.mode(time, - maths::CConstantWeights::COUNT, - maths::CConstantWeights::unit(3))); + TDouble2Vec mode(model.mode(time, maths::CConstantWeights::COUNT, maths::CConstantWeights::unit(3))); LOG_DEBUG("expected mode = " << expectedMode); LOG_DEBUG("mode = " << mode); @@ -485,17 +414,14 @@ void CTimeSeriesModelTest::testMode() maths::CMultivariateTimeSeriesModel model{modelParams(bucketLength), *trends[0], prior}; core_t::TTime time{0}; - for (auto &sample : samples) - { + for (auto& sample : samples) { double amplitude{10.0}; - for (std::size_t i = 0u; i < sample.size(); ++i) - { - sample[i] += 30.0 + amplitude * std::sin( boost::math::double_constants::two_pi + for (std::size_t i = 0u; i < sample.size(); ++i) { + sample[i] += 30.0 + amplitude * std::sin(boost::math::double_constants::two_pi <<<<<<< HEAD * static_cast(time) / 86400.0); ======= - * static_cast(time) - / static_cast(core::constants::DAY)); + * static_cast(time) / static_cast(core::constants::DAY)); >>>>>>> d4e4cca70edae4500cc1535a2da582935074a25b amplitude += 4.0; } @@ -504,38 +430,29 @@ void CTimeSeriesModelTest::testMode() TDouble2Vec4VecVec weights{maths::CConstantWeights::unit(3)}; time = 0; - for (const auto &sample : samples) - { - model.addSamples(addSampleParams(weights), - {core::make_triple(time, TDouble2Vec(sample), TAG)}); + for (const auto& sample : samples) { + model.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec(sample), TAG)}); bool reinitialize{false}; TDouble10Vec1Vec detrended{TDouble10Vec(3)}; - for (std::size_t i = 0u; i < sample.size(); ++i) - { - reinitialize |= trends[i]->addPoint(time, sample[i]); + for (std::size_t i = 0u; i < sample.size(); ++i) { + reinitialize |= trends[i]->addPoint(time, sample[i]); detrended[0][i] = trends[i]->detrend(time, sample[i], 0.0); } - if (reinitialize) - { + if (reinitialize) { reinitializePrior(learnRate, model, trends, prior); } - prior.addSamples(maths::CConstantWeights::COUNT, - detrended, - maths::CConstantWeights::singleUnit(3)); + prior.addSamples(maths::CConstantWeights::COUNT, detrended, maths::CConstantWeights::singleUnit(3)); prior.propagateForwardsByTime(1.0); time += bucketLength; } - TDouble2Vec expectedMode(prior.marginalLikelihoodMode(maths::CConstantWeights::COUNT, - maths::CConstantWeights::unit(3))); - for (std::size_t i = 0u; i < trends.size(); ++i) - { + TDouble2Vec expectedMode( + prior.marginalLikelihoodMode(maths::CConstantWeights::COUNT, maths::CConstantWeights::unit(3))); + for (std::size_t i = 0u; i < trends.size(); ++i) { expectedMode[i] += maths::CBasicStatistics::mean(trends[i]->value(time)); } - TDouble2Vec mode(model.mode(time, - maths::CConstantWeights::COUNT, - maths::CConstantWeights::unit(3))); + TDouble2Vec mode(model.mode(time, maths::CConstantWeights::COUNT, maths::CConstantWeights::unit(3))); LOG_DEBUG("expected mode = " << expectedMode); LOG_DEBUG("mode = " << mode); @@ -546,8 +463,7 @@ void CTimeSeriesModelTest::testMode() } } -void CTimeSeriesModelTest::testAddBucketValue() -{ +void CTimeSeriesModelTest::testAddBucketValue() { LOG_DEBUG("+--------------------------------------------+"); LOG_DEBUG("| CTimeSeriesModelTest::testAddBucketValue |"); LOG_DEBUG("+--------------------------------------------+"); @@ -560,21 +476,19 @@ void CTimeSeriesModelTest::testAddBucketValue() maths::CLogNormalMeanPrecConjugate prior{univariateLogNormal()}; maths::CUnivariateTimeSeriesModel model{modelParams(bucketLength), 0, trend, prior}; - TTimeDouble2VecSizeTrVec samples{core::make_triple(core_t::TTime{20}, TDouble2Vec{3.5}, TAG), - core::make_triple(core_t::TTime{12}, TDouble2Vec{3.9}, TAG), - core::make_triple(core_t::TTime{18}, TDouble2Vec{2.1}, TAG), - core::make_triple(core_t::TTime{12}, TDouble2Vec{1.2}, TAG),}; + TTimeDouble2VecSizeTrVec samples{ + core::make_triple(core_t::TTime{20}, TDouble2Vec{3.5}, TAG), + core::make_triple(core_t::TTime{12}, TDouble2Vec{3.9}, TAG), + core::make_triple(core_t::TTime{18}, TDouble2Vec{2.1}, TAG), + core::make_triple(core_t::TTime{12}, TDouble2Vec{1.2}, TAG), + }; TDouble2Vec4VecVec weights{{{1.0}}, {{1.5}}, {{0.9}}, {{1.9}}}; - for (std::size_t i = 0u; i < samples.size(); ++i) - { - prior.addSamples(maths::CConstantWeights::COUNT, - {samples[i].second[0]}, - {{weights[i][0][0]}}); + for (std::size_t i = 0u; i < samples.size(); ++i) { + prior.addSamples(maths::CConstantWeights::COUNT, {samples[i].second[0]}, {{weights[i][0][0]}}); } prior.propagateForwardsByTime(1.0); - prior.adjustOffset(maths::CConstantWeights::COUNT, - {-1.0}, maths::CConstantWeights::SINGLE_UNIT); + prior.adjustOffset(maths::CConstantWeights::COUNT, {-1.0}, maths::CConstantWeights::SINGLE_UNIT); model.addSamples(addSampleParams(weights), samples); model.addBucketValue({core::make_triple(core_t::TTime{20}, TDouble2Vec{-1.0}, TAG)}); @@ -582,8 +496,7 @@ void CTimeSeriesModelTest::testAddBucketValue() CPPUNIT_ASSERT_EQUAL(prior.checksum(), model.residualModel().checksum()); } -void CTimeSeriesModelTest::testAddSamples() -{ +void CTimeSeriesModelTest::testAddSamples() { LOG_DEBUG("+----------------------------------------+"); LOG_DEBUG("| CTimeSeriesModelTest::testAddSamples |"); LOG_DEBUG("+----------------------------------------+"); @@ -642,22 +555,17 @@ void CTimeSeriesModelTest::testAddSamples() model.addSamples(addSampleParams(weights), samples); - for (std::size_t i = 0u; i < trends.size(); ++i) - { - trends[i]->addPoint(samples[1].first, samples[1].second[i], - maths::CConstantWeights::COUNT, TDouble4Vec{weights[1][0][i]}); - trends[i]->addPoint(samples[2].first, samples[2].second[i], - maths::CConstantWeights::COUNT, TDouble4Vec{weights[2][0][i]}); - trends[i]->addPoint(samples[0].first, samples[0].second[i], - maths::CConstantWeights::COUNT, TDouble4Vec{weights[0][0][i]}); + for (std::size_t i = 0u; i < trends.size(); ++i) { + trends[i]->addPoint(samples[1].first, samples[1].second[i], maths::CConstantWeights::COUNT, TDouble4Vec{weights[1][0][i]}); + trends[i]->addPoint(samples[2].first, samples[2].second[i], maths::CConstantWeights::COUNT, TDouble4Vec{weights[2][0][i]}); + trends[i]->addPoint(samples[0].first, samples[0].second[i], maths::CConstantWeights::COUNT, TDouble4Vec{weights[0][0][i]}); } TDouble10Vec1Vec samples_{samples[2].second, samples[0].second, samples[1].second}; TDouble10Vec4Vec1Vec weights_{{weights[2][0]}, {weights[0][0]}, {weights[1][0]}}; prior.addSamples(maths::CConstantWeights::COUNT, samples_, weights_); prior.propagateForwardsByTime(1.0); - for (std::size_t i = 0u; i < trends.size(); ++i) - { + for (std::size_t i = 0u; i < trends.size(); ++i) { uint64_t checksum1{trends[i]->checksum()}; uint64_t checksum2{model.trendModel()[i]->checksum()}; LOG_DEBUG("checksum1 = " << checksum1 << " checksum2 = " << checksum2); @@ -669,9 +577,8 @@ void CTimeSeriesModelTest::testAddSamples() CPPUNIT_ASSERT_EQUAL(checksum1, checksum2); } - maths_t::TWeightStyleVec weightStyles{maths_t::E_SampleWinsorisationWeight, - maths_t::E_SampleCountWeight, - maths_t::E_SampleCountVarianceScaleWeight}; + maths_t::TWeightStyleVec weightStyles{ + maths_t::E_SampleWinsorisationWeight, maths_t::E_SampleCountWeight, maths_t::E_SampleCountVarianceScaleWeight}; LOG_DEBUG("Propagation interval univariate"); { @@ -684,8 +591,7 @@ void CTimeSeriesModelTest::testAddSamples() TDouble2Vec4VecVec weights{{{0.9}, {1.5}, {1.1}}}; core_t::TTime time{0}; - for (std::size_t i = 0u; i < 3; ++i) - { + for (std::size_t i = 0u; i < 3; ++i) { TTimeDouble2VecSizeTrVec sample{core::make_triple(time, samples[i], TAG)}; model.addSamples(addSampleParams(interval[i], weightStyles, weights), sample); @@ -711,25 +617,18 @@ void CTimeSeriesModelTest::testAddSamples() maths::CMultivariateTimeSeriesModel model{modelParams(bucketLength), *trends[0], prior}; double interval[]{1.0, 1.1, 0.4}; - TDouble2Vec samples[]{{13.5, 13.4, 13.3}, - {13.9, 13.8, 13.7}, - {20.1, 20.0, 10.9}}; + TDouble2Vec samples[]{{13.5, 13.4, 13.3}, {13.9, 13.8, 13.7}, {20.1, 20.0, 10.9}}; TDouble2Vec4VecVec weights{{{0.1, 0.1, 0.2}, {1.0, 1.1, 1.2}, {2.0, 2.1, 2.2}}, {{0.5, 0.6, 0.7}, {2.0, 2.1, 2.2}, {1.0, 1.1, 1.2}}, {{0.9, 1.0, 1.0}, {0.9, 1.0, 1.0}, {1.9, 2.0, 2.0}}}; core_t::TTime time{0}; - for (std::size_t i = 0u; i < 3; ++i) - { + for (std::size_t i = 0u; i < 3; ++i) { TTimeDouble2VecSizeTrVec sample{core::make_triple(time, samples[i], TAG)}; model.addSamples(addSampleParams(interval[i], weightStyles, weights), sample); - TDouble10Vec4Vec weight{TDouble10Vec(weights[0][0]), - TDouble10Vec(weights[0][1]), - TDouble10Vec(weights[0][2])}; - prior.addSamples(weightStyles, - {TDouble10Vec(samples[i])}, - {weight}); + TDouble10Vec4Vec weight{TDouble10Vec(weights[0][0]), TDouble10Vec(weights[0][1]), TDouble10Vec(weights[0][2])}; + prior.addSamples(weightStyles, {TDouble10Vec(samples[i])}, {weight}); prior.propagateForwardsByTime(interval[i]); uint64_t checksum1{prior.checksum()}; @@ -756,28 +655,24 @@ void CTimeSeriesModelTest::testAddSamples() TDouble2Vec4VecVec weights{maths::CConstantWeights::unit(1)}; core_t::TTime time{0}; - for (auto noise : samples) - { - double sample{20.0 + 4.0 * std::sin( boost::math::double_constants::two_pi + for (auto noise : samples) { + double sample{20.0 + + 4.0 * std::sin(boost::math::double_constants::two_pi <<<<<<< HEAD - * static_cast(time) / 86400.0) + * static_cast(time) / 86400.0) ======= - * static_cast(time) / 86400.0) + * static_cast(time) / 86400.0) >>>>>>> d4e4cca70edae4500cc1535a2da582935074a25b - + (time / bucketLength > 1800 ? 10.0 : 0.0) + noise}; + + (time / bucketLength > 1800 ? 10.0 : 0.0) + noise}; TTimeDouble2VecSizeTrVec sample_{core::make_triple(time, TDouble2Vec{sample}, TAG)}; model.addSamples(addSampleParams(weights), sample_); - if (trend.addPoint(time, sample)) - { + if (trend.addPoint(time, sample)) { trend.decayRate(trend.decayRate() / controllers[0].multiplier()); prior.setToNonInformative(0.0, prior.decayRate()); - for (const auto &value : model.slidingWindow()) - { - prior.addSamples(maths::CConstantWeights::COUNT, - {trend.detrend(value.first, value.second, 0.0)}, - {{learnRate}}); + for (const auto& value : model.slidingWindow()) { + prior.addSamples(maths::CConstantWeights::COUNT, {trend.detrend(value.first, value.second, 0.0)}, {{learnRate}}); } prior.decayRate(prior.decayRate() / controllers[1].multiplier()); controllers[0].reset(); @@ -787,17 +682,12 @@ void CTimeSeriesModelTest::testAddSamples() prior.addSamples(maths::CConstantWeights::COUNT, {detrended}, weight); prior.propagateForwardsByTime(1.0); - if (trend.initialized()) - { - double multiplier{controllers[0].multiplier({trend.meanValue(time)}, - {{detrended}}, - bucketLength, - model.params().learnRate(), - DECAY_RATE)}; + if (trend.initialized()) { + double multiplier{controllers[0].multiplier( + {trend.meanValue(time)}, {{detrended}}, bucketLength, model.params().learnRate(), DECAY_RATE)}; trend.decayRate(multiplier * trend.decayRate()); } - if (prior.numberSamples() > 20.0) - { + if (prior.numberSamples() > 20.0) { double multiplier{controllers[1].multiplier({prior.marginalLikelihoodMean()}, {{detrended - prior.marginalLikelihoodMean()}}, bucketLength, @@ -838,23 +728,22 @@ void CTimeSeriesModelTest::testAddSamples() TDouble2Vec4VecVec weights{{{1.0, 1.0, 1.0}}}; core_t::TTime time{0}; - for (auto &sample : samples) - { + for (auto& sample : samples) { bool reinitialize{false}; bool hasTrend{false}; TDouble10Vec1Vec detrended{TDouble10Vec(3)}; TDouble1Vec mean(3); double amplitude{10.0}; - for (std::size_t i = 0u; i < sample.size(); ++i) - { - sample[i] = 30.0 + amplitude * std::sin( boost::math::double_constants::two_pi + for (std::size_t i = 0u; i < sample.size(); ++i) { + sample[i] = 30.0 + + amplitude * std::sin(boost::math::double_constants::two_pi <<<<<<< HEAD - * static_cast(time) / 86400.0) + * static_cast(time) / 86400.0) ======= - * static_cast(time) / 86400.0) + * static_cast(time) / 86400.0) >>>>>>> d4e4cca70edae4500cc1535a2da582935074a25b - + (time / bucketLength > 1800 ? 10.0 : 0.0) + sample[i]; + + (time / bucketLength > 1800 ? 10.0 : 0.0) + sample[i]; reinitialize |= trends[i]->addPoint(time, sample[i]); detrended[0][i] = trends[i]->detrend(time, sample[i], 0.0); mean[i] = trends[i]->meanValue(time); @@ -866,42 +755,30 @@ void CTimeSeriesModelTest::testAddSamples() model.addSamples(addSampleParams(weights), sample_); - if (reinitialize) - { + if (reinitialize) { reinitializePrior(learnRate, model, trends, prior, &controllers); } prior.addSamples(maths::CConstantWeights::COUNT, detrended, weight); prior.propagateForwardsByTime(1.0); - if (hasTrend) - { - double multiplier{controllers[0].multiplier(mean, {detrended[0]}, - bucketLength, - model.params().learnRate(), - DECAY_RATE)}; - for (const auto &trend : trends) - { + if (hasTrend) { + double multiplier{controllers[0].multiplier(mean, {detrended[0]}, bucketLength, model.params().learnRate(), DECAY_RATE)}; + for (const auto& trend : trends) { trend->decayRate(multiplier * trend->decayRate()); } } - if (prior.numberSamples() > 20.0) - { + if (prior.numberSamples() > 20.0) { TDouble1Vec prediction(prior.marginalLikelihoodMean()); TDouble1Vec predictionError(3); - for (std::size_t d = 0u; d < 3; ++d) - { + for (std::size_t d = 0u; d < 3; ++d) { predictionError[d] = detrended[0][d] - prediction[d]; } - double multiplier{controllers[1].multiplier(prediction, - {predictionError}, - bucketLength, - model.params().learnRate(), - DECAY_RATE)}; + double multiplier{ + controllers[1].multiplier(prediction, {predictionError}, bucketLength, model.params().learnRate(), DECAY_RATE)}; prior.decayRate(multiplier * prior.decayRate()); } - for (std::size_t i = 0u; i < trends.size(); ++i) - { + for (std::size_t i = 0u; i < trends.size(); ++i) { uint64_t checksum1{trends[i]->checksum()}; uint64_t checksum2{model.trendModel()[i]->checksum()}; CPPUNIT_ASSERT_EQUAL(checksum1, checksum2); @@ -915,8 +792,7 @@ void CTimeSeriesModelTest::testAddSamples() } } -void CTimeSeriesModelTest::testPredict() -{ +void CTimeSeriesModelTest::testPredict() { LOG_DEBUG("+-------------------------------------+"); LOG_DEBUG("| CTimeSeriesModelTest::testPredict |"); LOG_DEBUG("+-------------------------------------+"); @@ -939,52 +815,41 @@ void CTimeSeriesModelTest::testPredict() rng.generateNormalSamples(0.0, 4.0, 1008, samples); TDouble2Vec4VecVec weights{maths::CConstantWeights::unit(1)}; core_t::TTime time{0}; - for (auto sample : samples) - { - sample += 10.0 + 5.0 * std::sin( boost::math::double_constants::two_pi + for (auto sample : samples) { + sample += 10.0 + 5.0 * std::sin(boost::math::double_constants::two_pi <<<<<<< HEAD * static_cast(time) / 86400.0); ======= - * static_cast(time) / 86400.0); + * static_cast(time) / 86400.0); >>>>>>> d4e4cca70edae4500cc1535a2da582935074a25b - model.addSamples(addSampleParams(weights), - {core::make_triple(time, TDouble2Vec{sample}, TAG)}); + model.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec{sample}, TAG)}); - if (trend.addPoint(time, sample)) - { + if (trend.addPoint(time, sample)) { prior.setToNonInformative(0.0, DECAY_RATE); - for (const auto &value : model.slidingWindow()) - { - prior.addSamples(maths::CConstantWeights::COUNT, - {trend.detrend(value.first, value.second, 0.0)}, - {{learnRate}}); + for (const auto& value : model.slidingWindow()) { + prior.addSamples(maths::CConstantWeights::COUNT, {trend.detrend(value.first, value.second, 0.0)}, {{learnRate}}); } } - prior.addSamples(maths::CConstantWeights::COUNT, - {trend.detrend(time, sample, 0.0)}, - maths::CConstantWeights::SINGLE_UNIT); + prior.addSamples(maths::CConstantWeights::COUNT, {trend.detrend(time, sample, 0.0)}, maths::CConstantWeights::SINGLE_UNIT); prior.propagateForwardsByTime(1.0); time += bucketLength; } TMeanAccumulator meanError; - for (core_t::TTime time_ = time; time_ < time + 86400; time_ += 3600) - { - double trend_{10.0 + 5.0 * std::sin( boost::math::double_constants::two_pi + for (core_t::TTime time_ = time; time_ < time + 86400; time_ += 3600) { + double trend_{10.0 + 5.0 * std::sin(boost::math::double_constants::two_pi <<<<<<< HEAD * static_cast(time_) / 86400.0)}; - double expected{ maths::CBasicStatistics::mean(trend.value(time_)) + double expected{maths::CBasicStatistics::mean(trend.value(time_)) ======= - * static_cast(time_) / 86400.0)}; - double expected{ maths::CBasicStatistics::mean(trend.baseline(time_)) + * static_cast(time_) / 86400.0)}; + double expected{maths::CBasicStatistics::mean(trend.baseline(time_)) >>>>>>> d4e4cca70edae4500cc1535a2da582935074a25b + maths::CBasicStatistics::mean(prior.marginalLikelihoodConfidenceInterval(0.0))}; double predicted{model.predict(time_)[0]}; - LOG_DEBUG("expected = " << expected - << " predicted = " << predicted - << " (trend = " << trend_ << ")"); + LOG_DEBUG("expected = " << expected << " predicted = " << predicted << " (trend = " << trend_ << ")"); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, predicted, 1e-3 * expected); CPPUNIT_ASSERT(std::fabs(trend_ - predicted) / trend_ < 0.3); meanError.add(std::fabs(trend_ - predicted) / trend_); @@ -1002,7 +867,7 @@ void CTimeSeriesModelTest::testPredict() TMeanAccumulator modes[2]; TDoubleVec samples, samples_; - rng.generateNormalSamples( 0.0, 4.0, 500, samples); + rng.generateNormalSamples(0.0, 4.0, 500, samples); rng.generateNormalSamples(10.0, 4.0, 500, samples_); modes[0].add(samples); modes[1].add(samples_); @@ -1011,27 +876,18 @@ void CTimeSeriesModelTest::testPredict() TDouble2Vec4VecVec weights{maths::CConstantWeights::unit(1)}; core_t::TTime time{0}; - for (auto sample : samples) - { - model.addSamples(addSampleParams(weights), - {core::make_triple(time, TDouble2Vec{sample}, TAG)}); + for (auto sample : samples) { + model.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec{sample}, TAG)}); time += bucketLength; } maths::CModel::TSizeDoublePr1Vec empty; - double predicted[]{model.predict(time, empty, {-2.0})[0], - model.predict(time, empty, {12.0})[0]}; - - LOG_DEBUG("expected(0) = " << maths::CBasicStatistics::mean(modes[0]) - << " actual(0) = " << predicted[0]); - LOG_DEBUG("expected(1) = " << maths::CBasicStatistics::mean(modes[1]) - << " actual(1) = " << predicted[1]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(modes[0]), - predicted[0], - 0.1 * maths::CBasicStatistics::mean(modes[0])); - CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(modes[1]), - predicted[1], - 0.01 * maths::CBasicStatistics::mean(modes[1])); + double predicted[]{model.predict(time, empty, {-2.0})[0], model.predict(time, empty, {12.0})[0]}; + + LOG_DEBUG("expected(0) = " << maths::CBasicStatistics::mean(modes[0]) << " actual(0) = " << predicted[0]); + LOG_DEBUG("expected(1) = " << maths::CBasicStatistics::mean(modes[1]) << " actual(1) = " << predicted[1]); + CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(modes[0]), predicted[0], 0.1 * maths::CBasicStatistics::mean(modes[0])); + CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(modes[1]), predicted[1], 0.01 * maths::CBasicStatistics::mean(modes[1])); } LOG_DEBUG("Multivariate Seasonal"); @@ -1045,67 +901,53 @@ void CTimeSeriesModelTest::testPredict() TDoubleVecVec samples; TDoubleVec mean{0.0, 2.0, 1.0}; - rng.generateMultivariateNormalSamples(mean, - {{3.0, 2.9, 0.5}, - {2.9, 2.6, 0.1}, - {0.5, 0.1, 2.0}}, - 1000, samples); + rng.generateMultivariateNormalSamples(mean, {{3.0, 2.9, 0.5}, {2.9, 2.6, 0.1}, {0.5, 0.1, 2.0}}, 1000, samples); TDouble2Vec4VecVec weights{maths::CConstantWeights::unit(3)}; core_t::TTime time{0}; - for (auto &sample : samples) - { - for (auto &coordinate : sample) - { - coordinate += 10.0 + 5.0 * std::sin( boost::math::double_constants::two_pi + for (auto& sample : samples) { + for (auto& coordinate : sample) { + coordinate += 10.0 + 5.0 * std::sin(boost::math::double_constants::two_pi <<<<<<< HEAD * static_cast(time) / 86400.0); ======= - * static_cast(time) / 86400.0); + * static_cast(time) / 86400.0); >>>>>>> d4e4cca70edae4500cc1535a2da582935074a25b } bool reinitialize{false}; TDouble10Vec detrended; - for (std::size_t i = 0u; i < sample.size(); ++i) - { + for (std::size_t i = 0u; i < sample.size(); ++i) { reinitialize |= trends[i]->addPoint(time, sample[i]); detrended.push_back(trends[i]->detrend(time, sample[i], 0.0)); } - if (reinitialize) - { + if (reinitialize) { reinitializePrior(learnRate, model, trends, prior); } - prior.addSamples(maths::CConstantWeights::COUNT, - {detrended}, - maths::CConstantWeights::singleUnit(3)); + prior.addSamples(maths::CConstantWeights::COUNT, {detrended}, maths::CConstantWeights::singleUnit(3)); prior.propagateForwardsByTime(1.0); - model.addSamples(addSampleParams(weights), - {core::make_triple(time, TDouble2Vec(sample), TAG)}); + model.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec(sample), TAG)}); time += bucketLength; } - for (core_t::TTime time_ = time; time_ < time + 86400; time_ += 3600) - { + for (core_t::TTime time_ = time; time_ < time + 86400; time_ += 3600) { maths::CMultivariatePrior::TSize10Vec marginalize{1, 2}; maths::CMultivariatePrior::TSizeDoublePr10Vec condition; - for (std::size_t i = 0u; i < mean.size(); ++i) - { - double trend_{mean[i] + 10.0 + 5.0 * std::sin( boost::math::double_constants::two_pi + for (std::size_t i = 0u; i < mean.size(); ++i) { + double trend_{mean[i] + 10.0 + + 5.0 * std::sin(boost::math::double_constants::two_pi <<<<<<< HEAD - * static_cast(time_) / 86400.0)}; + * static_cast(time_) / 86400.0)}; ======= - * static_cast(time_) / 86400.0)}; + * static_cast(time_) / 86400.0)}; >>>>>>> d4e4cca70edae4500cc1535a2da582935074a25b maths::CMultivariatePrior::TUnivariatePriorPtr margin{prior.univariate(marginalize, condition).first}; - double expected{ maths::CBasicStatistics::mean(trends[i]->value(time_)) - + maths::CBasicStatistics::mean(margin->marginalLikelihoodConfidenceInterval(0.0))}; + double expected{maths::CBasicStatistics::mean(trends[i]->value(time_)) + + maths::CBasicStatistics::mean(margin->marginalLikelihoodConfidenceInterval(0.0))}; double predicted{model.predict(time_)[i]}; --marginalize[std::min(i, marginalize.size() - 1)]; - LOG_DEBUG("expected = " << expected - << " predicted = " << predicted - << " (trend = " << trend_ << ")"); + LOG_DEBUG("expected = " << expected << " predicted = " << predicted << " (trend = " << trend_ << ")"); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, predicted, 1e-3 * expected); CPPUNIT_ASSERT(std::fabs(trend_ - predicted) / trend_ < 0.3); } @@ -1128,17 +970,13 @@ void CTimeSeriesModelTest::testPredict() rng.generateMultivariateNormalSamples(means[0], covariance, 500, samples); TDoubleVecVec samples_; rng.generateMultivariateNormalSamples(means[1], covariance, 500, samples_); - for (const auto &sample : samples) - { - for (std::size_t i = 0u; i < 3; ++i) - { + for (const auto& sample : samples) { + for (std::size_t i = 0u; i < 3; ++i) { modes[0][i].add(sample[i]); } } - for (const auto &sample : samples_) - { - for (std::size_t i = 0u; i < 3; ++i) - { + for (const auto& sample : samples_) { + for (std::size_t i = 0u; i < 3; ++i) { modes[1][i].add(sample[i]); } } @@ -1148,20 +986,15 @@ void CTimeSeriesModelTest::testPredict() TDouble2Vec4VecVec weights{maths::CConstantWeights::unit(3)}; core_t::TTime time{0}; - for (const auto &sample : samples) - { - model.addSamples(addSampleParams(weights), - {core::make_triple(time, TDouble2Vec(sample), TAG)}); + for (const auto& sample : samples) { + model.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec(sample), TAG)}); time += bucketLength; } maths::CModel::TSizeDoublePr1Vec empty; - TDouble2Vec expected[]{maths::CBasicStatistics::mean(modes[0]), - maths::CBasicStatistics::mean(modes[1])}; - TDouble2Vec predicted[]{model.predict(time, empty, { 0.0, 0.0, 0.0}), - model.predict(time, empty, {10.0, 10.0, 10.0})}; - for (std::size_t i = 0u; i < 3; ++i) - { + TDouble2Vec expected[]{maths::CBasicStatistics::mean(modes[0]), maths::CBasicStatistics::mean(modes[1])}; + TDouble2Vec predicted[]{model.predict(time, empty, {0.0, 0.0, 0.0}), model.predict(time, empty, {10.0, 10.0, 10.0})}; + for (std::size_t i = 0u; i < 3; ++i) { LOG_DEBUG("expected(0) = " << expected[0][i] << " actual(0) = " << predicted[0][i]); LOG_DEBUG("expected(1) = " << expected[1][i] << " actual(1) = " << predicted[1][i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected[0][i], predicted[0][i], std::fabs(0.2 * expected[0][i])); @@ -1170,8 +1003,7 @@ void CTimeSeriesModelTest::testPredict() } } -void CTimeSeriesModelTest::testProbability() -{ +void CTimeSeriesModelTest::testProbability() { LOG_DEBUG("+-----------------------------------------+"); LOG_DEBUG("| CTimeSeriesModelTest::testProbability |"); LOG_DEBUG("+-----------------------------------------+"); @@ -1192,37 +1024,34 @@ void CTimeSeriesModelTest::testProbability() LOG_DEBUG("Univariate"); { maths::CUnivariateTimeSeriesModel models[]{ - maths::CUnivariateTimeSeriesModel{modelParams(bucketLength), 1, - maths::CTimeSeriesDecompositionStub{}, - univariateNormal(), 0, false}, - maths::CUnivariateTimeSeriesModel{modelParams(bucketLength), 1, - maths::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}, - univariateNormal(), 0, false}}; + maths::CUnivariateTimeSeriesModel{ + modelParams(bucketLength), 1, maths::CTimeSeriesDecompositionStub{}, univariateNormal(), 0, false}, + maths::CUnivariateTimeSeriesModel{modelParams(bucketLength), + 1, + maths::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}, + univariateNormal(), + 0, + false}}; TDoubleVec samples; rng.generateNormalSamples(10.0, 4.0, 1000, samples); core_t::TTime time{0}; const TDouble2Vec4VecVec weight{maths::CConstantWeights::unit(1)}; - for (auto sample : samples) - { + for (auto sample : samples) { <<<<<<< HEAD - double trend{5.0 + 5.0 * std::sin( boost::math::double_constants::two_pi - * static_cast(time) / 86400.0)}; - models[0].addSamples(addSampleParams(weight), - {core::make_triple(time, TDouble2Vec{sample}, TAG)}); - models[1].addSamples(addSampleParams(weight), - {core::make_triple(time, TDouble2Vec{trend + sample}, TAG)}); + double trend{5.0 + 5.0 * std::sin(boost::math::double_constants::two_pi * static_cast(time) / 86400.0)}; + models[0].addSamples(addSampleParams(weight), {core::make_triple(time, TDouble2Vec{sample}, TAG)}); + models[1].addSamples(addSampleParams(weight), {core::make_triple(time, TDouble2Vec{trend + sample}, TAG)}); ======= maths::CModelAddSamplesParams params; params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weight) - .priorWeights(weight); + .propagationInterval(1.0) + .weightStyles(maths::CConstantWeights::COUNT) + .trendWeights(weight) + .priorWeights(weight); - double trend{5.0 + 5.0 * std::sin( boost::math::double_constants::two_pi - * static_cast(time) / 86400.0)}; + double trend{5.0 + 5.0 * std::sin(boost::math::double_constants::two_pi * static_cast(time) / 86400.0)}; models[0].addSamples(params, {core::make_triple(time, TDouble2Vec{sample}, TAG)}); models[1].addSamples(params, {core::make_triple(time, TDouble2Vec{trend + sample}, TAG)}); @@ -1234,45 +1063,39 @@ void CTimeSeriesModelTest::testProbability() TTime2Vec1Vec time_{{time}}; TDouble2Vec sample{15.0}; - maths_t::EProbabilityCalculation calculations[]{maths_t::E_TwoSided, - maths_t::E_OneSidedAbove}; + maths_t::EProbabilityCalculation calculations[]{maths_t::E_TwoSided, maths_t::E_OneSidedAbove}; double confidences[]{0.0, 20.0, 50.0}; bool empties[]{true, false}; maths_t::TWeightStyleVec weightStyles[]{{maths_t::E_SampleCountVarianceScaleWeight}, - {maths_t::E_SampleCountVarianceScaleWeight, - maths_t::E_SampleSeasonalVarianceScaleWeight}}; + {maths_t::E_SampleCountVarianceScaleWeight, maths_t::E_SampleSeasonalVarianceScaleWeight}}; TDouble2Vec4Vec weights[]{{{0.9}}, {{1.1}, {1.8}}}; - for (auto calculation : calculations) - { + for (auto calculation : calculations) { LOG_DEBUG("calculation = " << calculation); - for (auto confidence : confidences) - { + for (auto confidence : confidences) { LOG_DEBUG(" confidence = " << confidence); - for (auto empty : empties) - { + for (auto empty : empties) { LOG_DEBUG(" empty = " << empty); - for (std::size_t i = 0u; i < boost::size(weightStyles); ++i) - { + for (std::size_t i = 0u; i < boost::size(weightStyles); ++i) { LOG_DEBUG(" weights = " << core::CContainerPrinter::print(weights[i])); double expectedProbability[2]; maths_t::ETail expectedTail[2]; { TDouble4Vec weights_; - for (const auto &weight_ : weights[i]) - { + for (const auto& weight_ : weights[i]) { weights_.push_back(weight_[0]); } double lb[2], ub[2]; models[0].residualModel().probabilityOfLessLikelySamples( - calculation, weightStyles[i], - sample, {weights_}, - lb[0], ub[0], expectedTail[0]); + calculation, weightStyles[i], sample, {weights_}, lb[0], ub[0], expectedTail[0]); models[1].residualModel().probabilityOfLessLikelySamples( - calculation, weightStyles[i], - {models[1].trendModel().detrend(time, sample[0], confidence)}, - {weights_}, - lb[1], ub[1], expectedTail[1]); + calculation, + weightStyles[i], + {models[1].trendModel().detrend(time, sample[0], confidence)}, + {weights_}, + lb[1], + ub[1], + expectedTail[1]); expectedProbability[0] = (lb[0] + ub[0]) / 2.0; expectedProbability[1] = (lb[1] + ub[1]) / 2.0; } @@ -1282,18 +1105,14 @@ void CTimeSeriesModelTest::testProbability() { maths::CModelProbabilityParams params; params.addCalculation(calculation) - .seasonalConfidenceInterval(confidence) - .addBucketEmpty({empty}) - .weightStyles(weightStyles[i]) - .addWeights(weights[i]); + .seasonalConfidenceInterval(confidence) + .addBucketEmpty({empty}) + .weightStyles(weightStyles[i]) + .addWeights(weights[i]); bool conditional; TSize1Vec mostAnomalousCorrelate; - models[0].probability(params, time_, {sample}, - probability[0], tail[0], - conditional, mostAnomalousCorrelate); - models[1].probability(params, time_, {sample}, - probability[1], tail[1], - conditional, mostAnomalousCorrelate); + models[0].probability(params, time_, {sample}, probability[0], tail[0], conditional, mostAnomalousCorrelate); + models[1].probability(params, time_, {sample}, probability[1], tail[1], conditional, mostAnomalousCorrelate); } CPPUNIT_ASSERT_EQUAL(expectedProbability[0], probability[0]); @@ -1309,45 +1128,39 @@ void CTimeSeriesModelTest::testProbability() LOG_DEBUG("Multivariate"); { maths::CMultivariateTimeSeriesModel models[]{ - maths::CMultivariateTimeSeriesModel{modelParams(bucketLength), - maths::CTimeSeriesDecompositionStub{}, - multivariateNormal(), 0, false}, - maths::CMultivariateTimeSeriesModel{modelParams(bucketLength), - maths::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}, - multivariateNormal(), 0, false}}; + maths::CMultivariateTimeSeriesModel{ + modelParams(bucketLength), maths::CTimeSeriesDecompositionStub{}, multivariateNormal(), 0, false}, + maths::CMultivariateTimeSeriesModel{modelParams(bucketLength), + maths::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}, + multivariateNormal(), + 0, + false}}; TDoubleVecVec samples; - rng.generateMultivariateNormalSamples({10.0, 15.0, 11.0}, - {{3.0, 2.9, 0.5}, - {2.9, 2.6, 0.1}, - {0.5, 0.1, 2.0}}, - 1000, samples); + rng.generateMultivariateNormalSamples({10.0, 15.0, 11.0}, {{3.0, 2.9, 0.5}, {2.9, 2.6, 0.1}, {0.5, 0.1, 2.0}}, 1000, samples); core_t::TTime time{0}; const TDouble2Vec4VecVec weight{maths::CConstantWeights::unit(3)}; <<<<<<< HEAD - for (const auto &sample : samples) + for (const auto& sample : samples) ======= - for (auto &sample : samples) + for (auto& sample : samples) >>>>>>> d4e4cca70edae4500cc1535a2da582935074a25b { TDouble2Vec sample_(sample); - models[0].addSamples(addSampleParams(weight), - {core::make_triple(time, sample_, TAG)}); + models[0].addSamples(addSampleParams(weight), {core::make_triple(time, sample_, TAG)}); - double trend{5.0 + 5.0 * std::sin( boost::math::double_constants::two_pi + double trend{5.0 + 5.0 * std::sin(boost::math::double_constants::two_pi <<<<<<< HEAD * static_cast(time) / 86400.0)}; ======= - * static_cast(time) / 86400.0)}; + * static_cast(time) / 86400.0)}; >>>>>>> d4e4cca70edae4500cc1535a2da582935074a25b - for (auto &component : sample_) - { + for (auto& component : sample_) { component += trend; } - models[1].addSamples(addSampleParams(weight), - {core::make_triple(time, sample_, TAG)}); + models[1].addSamples(addSampleParams(weight), {core::make_triple(time, sample_, TAG)}); time += bucketLength; } @@ -1355,49 +1168,37 @@ void CTimeSeriesModelTest::testProbability() TTime2Vec1Vec time_{{time}}; TDouble2Vec sample{15.0, 14.0, 16.0}; - maths_t::EProbabilityCalculation calculations[]{maths_t::E_TwoSided, - maths_t::E_OneSidedAbove}; + maths_t::EProbabilityCalculation calculations[]{maths_t::E_TwoSided, maths_t::E_OneSidedAbove}; double confidences[]{0.0, 20.0, 50.0}; bool empties[]{true, false}; maths_t::TWeightStyleVec weightStyles[]{{maths_t::E_SampleCountVarianceScaleWeight}, - {maths_t::E_SampleCountVarianceScaleWeight, - maths_t::E_SampleSeasonalVarianceScaleWeight}}; + {maths_t::E_SampleCountVarianceScaleWeight, maths_t::E_SampleSeasonalVarianceScaleWeight}}; TDouble2Vec4Vec weights[]{{{0.9, 0.9, 0.8}}, {{1.1, 1.0, 1.2}, {1.8, 1.7, 1.6}}}; - for (auto calculation : calculations) - { + for (auto calculation : calculations) { LOG_DEBUG("calculation = " << calculation); - for (auto confidence : confidences) - { + for (auto confidence : confidences) { LOG_DEBUG(" confidence = " << confidence); - for (auto empty : empties) - { + for (auto empty : empties) { LOG_DEBUG(" empty = " << empty); - for (std::size_t i = 0u; i < boost::size(weightStyles); ++i) - { + for (std::size_t i = 0u; i < boost::size(weightStyles); ++i) { LOG_DEBUG(" weights = " << core::CContainerPrinter::print(weights[i])); double expectedProbability[2]; TTail10Vec expectedTail[2]; { TDouble10Vec4Vec weights_; - for (const auto &weight_ : weights[i]) - { + for (const auto& weight_ : weights[i]) { weights_.push_back(weight_); } double lb[2], ub[2]; models[0].residualModel().probabilityOfLessLikelySamples( - calculation, weightStyles[i], - {TDouble10Vec(sample)}, {weights_}, - lb[0], ub[0], expectedTail[0]); + calculation, weightStyles[i], {TDouble10Vec(sample)}, {weights_}, lb[0], ub[0], expectedTail[0]); TDouble10Vec detrended; - for (std::size_t j = 0u; j < sample.size(); ++j) - { + for (std::size_t j = 0u; j < sample.size(); ++j) { detrended.push_back(models[1].trendModel()[j]->detrend(time, sample[j], confidence)); } models[1].residualModel().probabilityOfLessLikelySamples( - calculation, weightStyles[i], - {detrended}, {weights_}, - lb[1], ub[1], expectedTail[1]); + calculation, weightStyles[i], {detrended}, {weights_}, lb[1], ub[1], expectedTail[1]); expectedProbability[0] = (lb[0] + ub[0]) / 2.0; expectedProbability[1] = (lb[1] + ub[1]) / 2.0; } @@ -1407,24 +1208,19 @@ void CTimeSeriesModelTest::testProbability() { maths::CModelProbabilityParams params; params.addCalculation(calculation) - .seasonalConfidenceInterval(confidence) - .addBucketEmpty({empty}) - .weightStyles(weightStyles[i]) - .addWeights(weights[i]); + .seasonalConfidenceInterval(confidence) + .addBucketEmpty({empty}) + .weightStyles(weightStyles[i]) + .addWeights(weights[i]); bool conditional; TSize1Vec mostAnomalousCorrelate; - models[0].probability(params, time_, {sample}, - probability[0], tail[0], - conditional, mostAnomalousCorrelate); - models[1].probability(params, time_, {sample}, - probability[1], tail[1], - conditional, mostAnomalousCorrelate); + models[0].probability(params, time_, {sample}, probability[0], tail[0], conditional, mostAnomalousCorrelate); + models[1].probability(params, time_, {sample}, probability[1], tail[1], conditional, mostAnomalousCorrelate); } CPPUNIT_ASSERT_EQUAL(expectedProbability[0], probability[0]); CPPUNIT_ASSERT_EQUAL(expectedProbability[1], probability[1]); - for (std::size_t j = 0u; j < 3; ++j) - { + for (std::size_t j = 0u; j < 3; ++j) { CPPUNIT_ASSERT_EQUAL(expectedTail[0][j], tail[0][j]); CPPUNIT_ASSERT_EQUAL(expectedTail[1][j], tail[1][j]); } @@ -1454,42 +1250,37 @@ void CTimeSeriesModelTest::testProbability() <<<<<<< HEAD for (auto sample : samples) ======= - for (auto &sample : samples) + for (auto& sample : samples) >>>>>>> d4e4cca70edae4500cc1535a2da582935074a25b { - if (std::binary_search(anomalies.begin(), anomalies.end(), bucket++)) - { + if (std::binary_search(anomalies.begin(), anomalies.end(), bucket++)) { sample += 10.0; } - model.addSamples(addSampleParams(weights), - {core::make_triple(time, TDouble2Vec{sample}, TAG)}); + model.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec{sample}, TAG)}); TTail2Vec tail; double probability; bool conditional; TSize1Vec mostAnomalousCorrelate; - model.probability(computeProbabilityParams(weight), {{time}}, {{sample}}, - probability, tail, conditional, mostAnomalousCorrelate); + model.probability( + computeProbabilityParams(weight), {{time}}, {{sample}}, probability, tail, conditional, mostAnomalousCorrelate); smallest.add({probability, bucket - 1}); time += bucketLength; } TSizeVec anomalies_; - std::transform(smallest.begin(), smallest.end(), - std::back_inserter(anomalies_), - [](const TDoubleSizePr &value) { return value.second; }); + std::transform( + smallest.begin(), smallest.end(), std::back_inserter(anomalies_), [](const TDoubleSizePr& value) { return value.second; }); std::sort(anomalies_.begin(), anomalies_.end()); LOG_DEBUG("expected anomalies = " << core::CContainerPrinter::print(anomalies)); LOG_DEBUG("actual anomalies = " << core::CContainerPrinter::print(anomalies_)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(anomalies), - core::CContainerPrinter::print(anomalies_)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(anomalies), core::CContainerPrinter::print(anomalies_)); } } -void CTimeSeriesModelTest::testWeights() -{ +void CTimeSeriesModelTest::testWeights() { LOG_DEBUG("+-------------------------------------+"); LOG_DEBUG("| CTimeSeriesModelTest::testWeights |"); LOG_DEBUG("+-------------------------------------+"); @@ -1515,38 +1306,34 @@ void CTimeSeriesModelTest::testWeights() rng.generateNormalSamples(0.0, 4.0, 1008, samples); TDouble2Vec4VecVec weights{{{1.0}}}; core_t::TTime time{0}; - for (auto sample : samples) - { - double scale{10.0 + 5.0 * std::sin( boost::math::double_constants::two_pi + for (auto sample : samples) { + double scale{10.0 + 5.0 * std::sin(boost::math::double_constants::two_pi <<<<<<< HEAD * static_cast(time) / 86400.0)}; ======= - * static_cast(time) / 86400.0)}; + * static_cast(time) / 86400.0)}; >>>>>>> d4e4cca70edae4500cc1535a2da582935074a25b sample = scale * (1.0 + 0.1 * sample); - model.addSamples(addSampleParams(weights), - {core::make_triple(time, TDouble2Vec{sample}, TAG)}); + model.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec{sample}, TAG)}); time += bucketLength; } LOG_DEBUG("Seasonal"); TMeanAccumulator error; - for (core_t::TTime time_ = time; time_ < time + 86400; time_ += 3600) - { - double dataScale{std::pow(1.0 + 0.5 * std::sin( boost::math::double_constants::two_pi + for (core_t::TTime time_ = time; time_ < time + 86400; time_ += 3600) { + double dataScale{std::pow(1.0 + 0.5 * std::sin(boost::math::double_constants::two_pi <<<<<<< HEAD - * static_cast(time_) / 86400.0), 2.0)}; + * static_cast(time_) / 86400.0), + 2.0)}; ======= - * static_cast(time_) / 86400.0), 2.0)}; + * static_cast(time_) / 86400.0), + 2.0)}; >>>>>>> d4e4cca70edae4500cc1535a2da582935074a25b - double expectedScale{model.trendModel().scale( - time_, model.residualModel().marginalLikelihoodVariance(), 0.0).second}; + double expectedScale{model.trendModel().scale(time_, model.residualModel().marginalLikelihoodVariance(), 0.0).second}; double scale{model.seasonalWeight(0.0, time_)[0]}; - LOG_DEBUG("expected weight = " << expectedScale - << ", weight = " << scale - << " (data weight = " << dataScale << ")"); + LOG_DEBUG("expected weight = " << expectedScale << ", weight = " << scale << " (data weight = " << dataScale << ")"); CPPUNIT_ASSERT_EQUAL(std::max(expectedScale, MINIMUM_SEASONAL_SCALE), scale); error.add(std::fabs(scale - dataScale) / dataScale); @@ -1557,8 +1344,7 @@ void CTimeSeriesModelTest::testWeights() LOG_DEBUG("Winsorisation"); TDouble2Vec prediction(model.predict(time)); double lastWeight = 1.0; - for (std::size_t i = 0u; i < 10; ++i) - { + for (std::size_t i = 0u; i < 10; ++i) { double weight_{model.winsorisationWeight(0.0, time, prediction)[0]}; LOG_DEBUG("weight = " << weight_); CPPUNIT_ASSERT(weight_ <= lastWeight); @@ -1574,27 +1360,21 @@ void CTimeSeriesModelTest::testWeights() maths::CMultivariateTimeSeriesModel model{modelParams(bucketLength), trend, prior}; TDoubleVecVec samples; - rng.generateMultivariateNormalSamples({10.0, 15.0, 11.0}, - {{3.0, 2.9, 0.5}, - {2.9, 2.6, 0.1}, - {0.5, 0.1, 2.0}}, - 1008, samples); + rng.generateMultivariateNormalSamples({10.0, 15.0, 11.0}, {{3.0, 2.9, 0.5}, {2.9, 2.6, 0.1}, {0.5, 0.1, 2.0}}, 1008, samples); TDouble2Vec4VecVec weights{{{1.0, 1.0, 1.0}}}; core_t::TTime time{0}; - for (auto &sample : samples) - { - double scale{10.0 + 5.0 * std::sin( boost::math::double_constants::two_pi + for (auto& sample : samples) { + double scale{10.0 + 5.0 * std::sin(boost::math::double_constants::two_pi <<<<<<< HEAD * static_cast(time) / 86400.0)}; - for (auto &component : sample) + for (auto& component : sample) ======= - * static_cast(time) / 86400.0)}; + * static_cast(time) / 86400.0)}; bool reinitialize{false}; TDouble10Vec1Vec detrended{TDouble10Vec(3)}; - for (std::size_t i = 0u; i < sample.size(); ++i) - { + for (std::size_t i = 0u; i < sample.size(); ++i) { sample[i] = scale * (1.0 + 0.1 * sample[i]); reinitialize |= trends[i]->addPoint(time, sample[i]); detrended[0][i] = trends[i]->detrend(time, sample[i], 0.0); @@ -1604,30 +1384,27 @@ void CTimeSeriesModelTest::testWeights() { component = scale * (1.0 + 0.1 * component); } - model.addSamples(addSampleParams(weights), - {core::make_triple(time, TDouble2Vec(sample), TAG)}); + model.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec(sample), TAG)}); time += bucketLength; } LOG_DEBUG("Seasonal"); TMeanAccumulator error; - for (core_t::TTime time_ = time; time_ < time + 86400; time_ += 3600) - { - double dataScale{std::pow(1.0 + 0.5 * std::sin( boost::math::double_constants::two_pi + for (core_t::TTime time_ = time; time_ < time + 86400; time_ += 3600) { + double dataScale{std::pow(1.0 + 0.5 * std::sin(boost::math::double_constants::two_pi <<<<<<< HEAD - * static_cast(time_) / 86400.0), 2.0)}; + * static_cast(time_) / 86400.0), + 2.0)}; ======= - * static_cast(time_) / 86400.0), 2.0)}; + * static_cast(time_) / 86400.0), + 2.0)}; >>>>>>> d4e4cca70edae4500cc1535a2da582935074a25b - for (std::size_t i = 0u; i < 3; ++i) - { - double expectedScale{model.trendModel()[i]->scale( - time_, model.residualModel().marginalLikelihoodVariances()[i], 0.0).second}; + for (std::size_t i = 0u; i < 3; ++i) { + double expectedScale{ + model.trendModel()[i]->scale(time_, model.residualModel().marginalLikelihoodVariances()[i], 0.0).second}; double scale{model.seasonalWeight(0.0, time_)[i]}; - LOG_DEBUG("expected weight = " << expectedScale - << ", weight = " << scale - << " (data weight = " << dataScale << ")"); + LOG_DEBUG("expected weight = " << expectedScale << ", weight = " << scale << " (data weight = " << dataScale << ")"); CPPUNIT_ASSERT_EQUAL(std::max(expectedScale, MINIMUM_SEASONAL_SCALE), scale); error.add(std::fabs(scale - dataScale) / dataScale); } @@ -1638,8 +1415,7 @@ void CTimeSeriesModelTest::testWeights() LOG_DEBUG("Winsorisation"); TDouble2Vec prediction(model.predict(time)); double lastWeight = 1.0; - for (std::size_t i = 0u; i < 10; ++i) - { + for (std::size_t i = 0u; i < 10; ++i) { double weight_{model.winsorisationWeight(0.0, time, prediction)[0]}; LOG_DEBUG("weight = " << weight_); CPPUNIT_ASSERT(weight_ <= lastWeight); @@ -1649,8 +1425,7 @@ void CTimeSeriesModelTest::testWeights() } } -void CTimeSeriesModelTest::testMemoryUsage() -{ +void CTimeSeriesModelTest::testMemoryUsage() { LOG_DEBUG("+-----------------------------------------+"); LOG_DEBUG("| CTimeSeriesModelTest::testMemoryUsage |"); LOG_DEBUG("+-----------------------------------------+"); @@ -1666,40 +1441,31 @@ void CTimeSeriesModelTest::testMemoryUsage() maths::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; auto controllers = decayRateControllers(1); boost::scoped_ptr model{ - new maths::CUnivariateTimeSeriesModel{modelParams(bucketLength), 0, - trend, univariateNormal(), - &controllers}}; + new maths::CUnivariateTimeSeriesModel{modelParams(bucketLength), 0, trend, univariateNormal(), &controllers}}; TDoubleVec samples; rng.generateNormalSamples(1.0, 4.0, 1000, samples); TDouble2Vec4VecVec weights{{{1.0}}}; core_t::TTime time{0}; - for (auto sample : samples) - { + for (auto sample : samples) { <<<<<<< HEAD - sample += 10.0 + 5.0 * std::sin( boost::math::double_constants::two_pi - * static_cast(time) / 86400.0); + sample += 10.0 + 5.0 * std::sin(boost::math::double_constants::two_pi * static_cast(time) / 86400.0); ======= maths::CModelAddSamplesParams params; params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); - sample += 10.0 + 5.0 * std::sin( boost::math::double_constants::two_pi - * static_cast(time) / 86400.0); + .propagationInterval(1.0) + .weightStyles(maths::CConstantWeights::COUNT) + .trendWeights(weights) + .priorWeights(weights); + sample += 10.0 + 5.0 * std::sin(boost::math::double_constants::two_pi * static_cast(time) / 86400.0); >>>>>>> d4e4cca70edae4500cc1535a2da582935074a25b trend.addPoint(time, sample); - model->addSamples(addSampleParams(weights), - {core::make_triple(time, TDouble2Vec{sample}, TAG)}); + model->addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec{sample}, TAG)}); time += bucketLength; } - std::size_t expectedSize{ sizeof(maths::CTimeSeriesDecomposition) - + trend.memoryUsage() - + sizeof(maths::CNormalMeanPrecConjugate) - + sizeof(maths::CUnivariateTimeSeriesModel::TDecayRateController2Ary) - + 2 * controllers[0].memoryUsage()}; + std::size_t expectedSize{sizeof(maths::CTimeSeriesDecomposition) + trend.memoryUsage() + sizeof(maths::CNormalMeanPrecConjugate) + + sizeof(maths::CUnivariateTimeSeriesModel::TDecayRateController2Ary) + 2 * controllers[0].memoryUsage()}; std::size_t size = model->memoryUsage(); LOG_DEBUG("size " << size << " expected " << expectedSize); CPPUNIT_ASSERT(size < 1.1 * expectedSize); @@ -1720,37 +1486,29 @@ void CTimeSeriesModelTest::testMemoryUsage() TDouble2Vec4VecVec weights{maths::CConstantWeights::unit(3)}; core_t::TTime time{0}; - for (auto &sample : samples) - { + for (auto& sample : samples) { <<<<<<< HEAD - for (auto &coordinate : sample) - { - coordinate += 10.0 + 5.0 * std::sin( boost::math::double_constants::two_pi - * static_cast(time) / 86400.0); + for (auto& coordinate : sample) { + coordinate += 10.0 + 5.0 * std::sin(boost::math::double_constants::two_pi * static_cast(time) / 86400.0); ======= maths::CModelAddSamplesParams params; params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); - for (auto &coordinate : sample) - { - coordinate += 10.0 + 5.0 * std::sin( boost::math::double_constants::two_pi - * static_cast(time) / 86400.0); + .propagationInterval(1.0) + .weightStyles(maths::CConstantWeights::COUNT) + .trendWeights(weights) + .priorWeights(weights); + for (auto& coordinate : sample) { + coordinate += 10.0 + 5.0 * std::sin(boost::math::double_constants::two_pi * static_cast(time) / 86400.0); >>>>>>> d4e4cca70edae4500cc1535a2da582935074a25b } trend.addPoint(time, sample[0]); - model->addSamples(addSampleParams(weights), - {core::make_triple(time, TDouble2Vec(sample), TAG)}); + model->addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec(sample), TAG)}); time += bucketLength; } - std::size_t expectedSize{ 3 * sizeof(maths::CTimeSeriesDecomposition) - + 3 * trend.memoryUsage() - + sizeof(maths::CMultivariateNormalConjugate<3>) - + sizeof(maths::CUnivariateTimeSeriesModel::TDecayRateController2Ary) - + 2 * controllers[0].memoryUsage()}; + std::size_t expectedSize{3 * sizeof(maths::CTimeSeriesDecomposition) + 3 * trend.memoryUsage() + + sizeof(maths::CMultivariateNormalConjugate<3>) + + sizeof(maths::CUnivariateTimeSeriesModel::TDecayRateController2Ary) + 2 * controllers[0].memoryUsage()}; std::size_t size = model->memoryUsage(); LOG_DEBUG("size " << size << " expected " << expectedSize); CPPUNIT_ASSERT(size < 1.1 * expectedSize); @@ -1759,8 +1517,7 @@ void CTimeSeriesModelTest::testMemoryUsage() // TODO LOG_DEBUG("Correlates"); } -void CTimeSeriesModelTest::testPersist() -{ +void CTimeSeriesModelTest::testPersist() { LOG_DEBUG("+-------------------------------------+"); LOG_DEBUG("| CTimeSeriesModelTest::testPersist |"); LOG_DEBUG("+-------------------------------------+"); @@ -1776,18 +1533,14 @@ void CTimeSeriesModelTest::testPersist() { maths::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; auto controllers = decayRateControllers(1); - maths::CUnivariateTimeSeriesModel origModel{params, 1, - trend, univariateNormal(), - &controllers}; + maths::CUnivariateTimeSeriesModel origModel{params, 1, trend, univariateNormal(), &controllers}; TDoubleVec samples; rng.generateNormalSamples(1.0, 4.0, 1000, samples); TDouble2Vec4VecVec weights{{{1.0}}}; core_t::TTime time{0}; - for (auto sample : samples) - { - origModel.addSamples(addSampleParams(weights), - {core::make_triple(time, TDouble2Vec{sample}, TAG)}); + for (auto sample : samples) { + origModel.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec{sample}, TAG)}); time += bucketLength; } @@ -1807,9 +1560,7 @@ void CTimeSeriesModelTest::testPersist() core::CRapidXmlStateRestoreTraverser traverser(parser); maths::SDistributionRestoreParams distributionParams{maths_t::E_ContinuousData, DECAY_RATE}; - maths::STimeSeriesDecompositionRestoreParams decompositionParams{24.0 * DECAY_RATE, - bucketLength, - distributionParams}; + maths::STimeSeriesDecompositionRestoreParams decompositionParams{24.0 * DECAY_RATE, bucketLength, distributionParams}; maths::SModelRestoreParams restoreParams{params, decompositionParams, distributionParams}; maths::CUnivariateTimeSeriesModel restoredModel{restoreParams, traverser}; @@ -1819,24 +1570,17 @@ void CTimeSeriesModelTest::testPersist() LOG_DEBUG("Multivariate"); { TDoubleVecVec samples; - rng.generateMultivariateNormalSamples({11.0, 10.0, 12.0}, - {{4.0, 2.9, 0.5}, - {2.9, 2.6, 0.1}, - {0.5, 0.1, 2.0}}, - 1000, samples); + rng.generateMultivariateNormalSamples({11.0, 10.0, 12.0}, {{4.0, 2.9, 0.5}, {2.9, 2.6, 0.1}, {0.5, 0.1, 2.0}}, 1000, samples); maths::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; maths::CMultivariateNormalConjugate<3> prior{multivariateNormal()}; auto controllers = decayRateControllers(3); - maths::CMultivariateTimeSeriesModel origModel{modelParams(bucketLength), - trend, prior, &controllers}; + maths::CMultivariateTimeSeriesModel origModel{modelParams(bucketLength), trend, prior, &controllers}; TDouble2Vec4VecVec weights{maths::CConstantWeights::unit(3)}; core_t::TTime time{0}; - for (const auto &sample : samples) - { - origModel.addSamples(addSampleParams(weights), - {core::make_triple(time, TDouble2Vec(sample), TAG)}); + for (const auto& sample : samples) { + origModel.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec(sample), TAG)}); time += bucketLength; } @@ -1856,9 +1600,7 @@ void CTimeSeriesModelTest::testPersist() core::CRapidXmlStateRestoreTraverser traverser(parser); maths::SDistributionRestoreParams distributionParams{maths_t::E_ContinuousData, DECAY_RATE}; - maths::STimeSeriesDecompositionRestoreParams decompositionParams{24.0 * DECAY_RATE, - bucketLength, - distributionParams}; + maths::STimeSeriesDecompositionRestoreParams decompositionParams{24.0 * DECAY_RATE, bucketLength, distributionParams}; maths::SModelRestoreParams restoreParams{params, decompositionParams, distributionParams}; maths::CMultivariateTimeSeriesModel restoredModel{restoreParams, traverser}; @@ -1868,8 +1610,7 @@ void CTimeSeriesModelTest::testPersist() // TODO LOG_DEBUG("Correlates"); } -void CTimeSeriesModelTest::testUpgrade() -{ +void CTimeSeriesModelTest::testUpgrade() { LOG_DEBUG("+-------------------------------------+"); LOG_DEBUG("| CTimeSeriesModelTest::testUpgrade |"); LOG_DEBUG("+-------------------------------------+"); @@ -1881,14 +1622,13 @@ void CTimeSeriesModelTest::testUpgrade() // restoring either. using TStrVec = std::vector; - auto load = [](const std::string &name, std::string &result) - { - std::ifstream file; - file.open(name); - std::stringbuf buf; - file >> &buf; - result = buf.str(); - }; + auto load = [](const std::string& name, std::string& result) { + std::ifstream file; + file.open(name); + std::stringbuf buf; + file >> &buf; + result = buf.str(); + }; core_t::TTime bucketLength{600}; core_t::TTime halfHour{1800}; @@ -1912,26 +1652,19 @@ void CTimeSeriesModelTest::testUpgrade() core::CRapidXmlStateRestoreTraverser traverser(parser); maths::SDistributionRestoreParams distributionParams{maths_t::E_ContinuousData, DECAY_RATE}; - maths::STimeSeriesDecompositionRestoreParams decompositionParams{24.0 * DECAY_RATE, - bucketLength, - distributionParams}; + maths::STimeSeriesDecompositionRestoreParams decompositionParams{24.0 * DECAY_RATE, bucketLength, distributionParams}; maths::SModelRestoreParams restoreParams{params, decompositionParams, distributionParams}; maths::CUnivariateTimeSeriesModel restoredModel{restoreParams, traverser}; TStrVec expectedInterval; TStrVec interval; - for (core_t::TTime time = 600000, i = 0; - i < static_cast(expectedIntervals.size()); - time += halfHour, ++i) - { + for (core_t::TTime time = 600000, i = 0; i < static_cast(expectedIntervals.size()); time += halfHour, ++i) { expectedInterval.clear(); interval.clear(); core::CStringUtils::tokenise(",", expectedIntervals[i], expectedInterval, empty); - std::string interval_{core::CContainerPrinter::print( - restoredModel.confidenceInterval(time, 90.0, - maths::CConstantWeights::COUNT, - maths::CConstantWeights::unit(1)))}; + std::string interval_{core::CContainerPrinter::print(restoredModel.confidenceInterval( + time, 90.0, maths::CConstantWeights::COUNT, maths::CConstantWeights::unit(1)))}; core::CStringUtils::replace("[", "", interval_); core::CStringUtils::replace("]", "", interval_); core::CStringUtils::replace(" ", "", interval_); @@ -1939,11 +1672,9 @@ void CTimeSeriesModelTest::testUpgrade() core::CStringUtils::tokenise(",", interval_, interval, empty); CPPUNIT_ASSERT_EQUAL(expectedInterval.size(), interval.size()); - for (std::size_t j = 0u; j < expectedInterval.size(); ++j) - { - CPPUNIT_ASSERT_DOUBLES_EQUAL(boost::lexical_cast(expectedInterval[j]), - boost::lexical_cast(interval[j]), - 0.0001); + for (std::size_t j = 0u; j < expectedInterval.size(); ++j) { + CPPUNIT_ASSERT_DOUBLES_EQUAL( + boost::lexical_cast(expectedInterval[j]), boost::lexical_cast(interval[j]), 0.0001); } } } @@ -1965,26 +1696,19 @@ void CTimeSeriesModelTest::testUpgrade() core::CRapidXmlStateRestoreTraverser traverser(parser); maths::SDistributionRestoreParams distributionParams{maths_t::E_ContinuousData, DECAY_RATE}; - maths::STimeSeriesDecompositionRestoreParams decompositionParams{24.0 * DECAY_RATE, - bucketLength, - distributionParams}; + maths::STimeSeriesDecompositionRestoreParams decompositionParams{24.0 * DECAY_RATE, bucketLength, distributionParams}; maths::SModelRestoreParams restoreParams{params, decompositionParams, distributionParams}; maths::CMultivariateTimeSeriesModel restoredModel{restoreParams, traverser}; TStrVec expectedInterval; TStrVec interval; - for (core_t::TTime time = 600000, i = 0; - i < static_cast(expectedIntervals.size()); - time += halfHour, ++i) - { + for (core_t::TTime time = 600000, i = 0; i < static_cast(expectedIntervals.size()); time += halfHour, ++i) { expectedInterval.clear(); interval.clear(); core::CStringUtils::tokenise(",", expectedIntervals[i], expectedInterval, empty); - std::string interval_{core::CContainerPrinter::print( - restoredModel.confidenceInterval(time, 90.0, - maths::CConstantWeights::COUNT, - maths::CConstantWeights::unit(3)))}; + std::string interval_{core::CContainerPrinter::print(restoredModel.confidenceInterval( + time, 90.0, maths::CConstantWeights::COUNT, maths::CConstantWeights::unit(3)))}; core::CStringUtils::replace("[", "", interval_); core::CStringUtils::replace("]", "", interval_); core::CStringUtils::replace(" ", "", interval_); @@ -1992,18 +1716,15 @@ void CTimeSeriesModelTest::testUpgrade() core::CStringUtils::tokenise(",", interval_, interval, empty); CPPUNIT_ASSERT_EQUAL(expectedInterval.size(), interval.size()); - for (std::size_t j = 0u; j < expectedInterval.size(); ++j) - { - CPPUNIT_ASSERT_DOUBLES_EQUAL(boost::lexical_cast(expectedInterval[j]), - boost::lexical_cast(interval[j]), - 0.0001); + for (std::size_t j = 0u; j < expectedInterval.size(); ++j) { + CPPUNIT_ASSERT_DOUBLES_EQUAL( + boost::lexical_cast(expectedInterval[j]), boost::lexical_cast(interval[j]), 0.0001); } } } } -void CTimeSeriesModelTest::testAddSamplesWithCorrelations() -{ +void CTimeSeriesModelTest::testAddSamplesWithCorrelations() { LOG_DEBUG("+--------------------------------------------------------+"); LOG_DEBUG("| CTimeSeriesModelTest::testAddSamplesWithCorrelations |"); LOG_DEBUG("+--------------------------------------------------------+"); @@ -2029,13 +1750,10 @@ void CTimeSeriesModelTest::testAddSamplesWithCorrelations() TDouble2Vec4VecVec weights{{{1.0}}}; core_t::TTime time{0}; - for (auto sample : samples) - { + for (auto sample : samples) { correlations.refresh(allocator); - models[0].addSamples(addSampleParams(weights), - {core::make_triple(time, TDouble2Vec{sample[0]}, TAG)}); - models[1].addSamples(addSampleParams(weights), - {core::make_triple(time, TDouble2Vec{sample[1]}, TAG)}); + models[0].addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec{sample[0]}, TAG)}); + models[1].addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec{sample[1]}, TAG)}); correlations.processSamples(maths::CConstantWeights::COUNT); time += bucketLength; } @@ -2045,15 +1763,13 @@ void CTimeSeriesModelTest::testAddSamplesWithCorrelations() // TODO LOG_DEBUG("Correlations with tags (for population)"); } -void CTimeSeriesModelTest::testProbabilityWithCorrelations() -{ +void CTimeSeriesModelTest::testProbabilityWithCorrelations() { LOG_DEBUG("+---------------------------------------------------------+"); LOG_DEBUG("| CTimeSeriesModelTest::testProbabilityWithCorrelations |"); LOG_DEBUG("+---------------------------------------------------------+"); } -void CTimeSeriesModelTest::testAnomalyModel() -{ +void CTimeSeriesModelTest::testAnomalyModel() { LOG_DEBUG("+------------------------------------------+"); LOG_DEBUG("| CTimeSeriesModelTest::testAnomalyModel |"); LOG_DEBUG("+------------------------------------------+"); @@ -2067,8 +1783,7 @@ void CTimeSeriesModelTest::testAnomalyModel() std::size_t length = 2000; - LOG_DEBUG("Univariate") - { + LOG_DEBUG("Univariate") { TSizeVec anomalies; rng.generateUniformSamples(0, length, 30, anomalies); std::sort(anomalies.begin(), anomalies.end()); @@ -2089,25 +1804,21 @@ void CTimeSeriesModelTest::testAnomalyModel() TDouble2Vec4VecVec weights{weight}; std::size_t bucket{0}; core_t::TTime time{0}; - for (auto &sample : samples) - { - if (std::binary_search(anomalies.begin(), anomalies.end(), bucket++)) - { + for (auto& sample : samples) { + if (std::binary_search(anomalies.begin(), anomalies.end(), bucket++)) { sample += 12.0; } - if (bucket >= length - 100 && bucket < length - 92) - { + if (bucket >= length - 100 && bucket < length - 92) { sample += 8.0; } - model.addSamples(addSampleParams(weights), - {core::make_triple(time, TDouble2Vec{sample}, TAG)}); + model.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec{sample}, TAG)}); TTail2Vec tail; double probability; bool conditional; TSize1Vec mostAnomalousCorrelate; - model.probability(computeProbabilityParams(weight), {{time}}, {{sample}}, - probability, tail, conditional, mostAnomalousCorrelate); + model.probability( + computeProbabilityParams(weight), {{time}}, {{sample}}, probability, tail, conditional, mostAnomalousCorrelate); mostAnomalous.add({std::log(probability), bucket}); //scores.push_back(maths::CTools::deviation(probability)); @@ -2118,19 +1829,15 @@ void CTimeSeriesModelTest::testAnomalyModel() TSizeVec anomalyBuckets; TDoubleVec anomalyProbabilities; - for (const auto &anomaly : mostAnomalous) - { + for (const auto& anomaly : mostAnomalous) { anomalyBuckets.push_back(anomaly.second); anomalyProbabilities.push_back(std::exp(anomaly.first)); } LOG_DEBUG("anomalies = " << core::CContainerPrinter::print(anomalyBuckets)); LOG_DEBUG("probabilities = " << core::CContainerPrinter::print(anomalyProbabilities)); - CPPUNIT_ASSERT(std::find(anomalyBuckets.begin(), - anomalyBuckets.end(), 1905) != anomalyBuckets.end()); - CPPUNIT_ASSERT(std::find(anomalyBuckets.begin(), - anomalyBuckets.end(), 1906) != anomalyBuckets.end()); - CPPUNIT_ASSERT(std::find(anomalyBuckets.begin(), - anomalyBuckets.end(), 1907) != anomalyBuckets.end()); + CPPUNIT_ASSERT(std::find(anomalyBuckets.begin(), anomalyBuckets.end(), 1905) != anomalyBuckets.end()); + CPPUNIT_ASSERT(std::find(anomalyBuckets.begin(), anomalyBuckets.end(), 1906) != anomalyBuckets.end()); + CPPUNIT_ASSERT(std::find(anomalyBuckets.begin(), anomalyBuckets.end(), 1907) != anomalyBuckets.end()); //file << "v = " << core::CContainerPrinter::print(samples) << ";\n"; //file << "s = " << core::CContainerPrinter::print(scores) << ";\n"; @@ -2141,18 +1848,13 @@ void CTimeSeriesModelTest::testAnomalyModel() //file << "plot([1:length(s)], s, 'r');\n"; } - LOG_DEBUG("Multivariate") - { + LOG_DEBUG("Multivariate") { TSizeVec anomalies; rng.generateUniformSamples(0, length, 30, anomalies); std::sort(anomalies.begin(), anomalies.end()); core_t::TTime bucketLength{600}; TDoubleVecVec samples; - rng.generateMultivariateNormalSamples({10.0, 10.0, 10.0}, - {{4.0, 0.9, 0.5}, - {0.9, 2.6, 0.1}, - {0.5, 0.1, 3.0}}, - length, samples); + rng.generateMultivariateNormalSamples({10.0, 10.0, 10.0}, {{4.0, 0.9, 0.5}, {0.9, 2.6, 0.1}, {0.5, 0.1, 3.0}}, length, samples); maths::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; maths::CMultivariateNormalConjugate<3> prior{multivariateNormal()}; @@ -2167,29 +1869,24 @@ void CTimeSeriesModelTest::testAnomalyModel() TDouble2Vec4VecVec weights{weight}; core_t::TTime time{0}; std::size_t bucket{0}; - for (auto &sample : samples) - { - for (auto &coordinate : sample) - { - if (std::binary_search(anomalies.begin(), anomalies.end(), bucket)) - { + for (auto& sample : samples) { + for (auto& coordinate : sample) { + if (std::binary_search(anomalies.begin(), anomalies.end(), bucket)) { coordinate += 12.0; } - if (bucket >= length - 100 && bucket < length - 92) - { + if (bucket >= length - 100 && bucket < length - 92) { coordinate += 8.0; } } ++bucket; - model.addSamples(addSampleParams(weights), - {core::make_triple(time, TDouble2Vec(sample), TAG)}); + model.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec(sample), TAG)}); TTail2Vec tail; double probability; bool conditional; TSize1Vec mostAnomalousCorrelate; - model.probability(computeProbabilityParams(weight), {{time}}, {(sample)}, - probability, tail, conditional, mostAnomalousCorrelate); + model.probability( + computeProbabilityParams(weight), {{time}}, {(sample)}, probability, tail, conditional, mostAnomalousCorrelate); mostAnomalous.add({std::log(probability), bucket}); //scores.push_back(maths::CTools::deviation(probability)); @@ -2200,15 +1897,13 @@ void CTimeSeriesModelTest::testAnomalyModel() TSizeVec anomalyBuckets; TDoubleVec anomalyProbabilities; - for (const auto &anomaly : mostAnomalous) - { + for (const auto& anomaly : mostAnomalous) { anomalyBuckets.push_back(anomaly.second); anomalyProbabilities.push_back(std::exp(anomaly.first)); } LOG_DEBUG("anomalies = " << core::CContainerPrinter::print(anomalyBuckets)); LOG_DEBUG("probabilities = " << core::CContainerPrinter::print(anomalyProbabilities)); - CPPUNIT_ASSERT(std::find(anomalyBuckets.begin(), - anomalyBuckets.end(), 1908) != anomalyBuckets.end()); + CPPUNIT_ASSERT(std::find(anomalyBuckets.begin(), anomalyBuckets.end(), 1908) != anomalyBuckets.end()); //file << "v = ["; //for (const auto &sample : samples) @@ -2229,8 +1924,7 @@ void CTimeSeriesModelTest::testAnomalyModel() } } -void CTimeSeriesModelTest::testStepChangeDiscontinuities() -{ +void CTimeSeriesModelTest::testStepChangeDiscontinuities() { LOG_DEBUG("+-------------------------------------------------------+"); LOG_DEBUG("| CTimeSeriesModelTest::testStepChangeDiscontinuities |"); LOG_DEBUG("+-------------------------------------------------------+"); @@ -2243,12 +1937,11 @@ void CTimeSeriesModelTest::testStepChangeDiscontinuities() using TDouble3VecVec = std::vector; TDouble2Vec4VecVec weight{{{1.0}}}; - auto updateModel = [&](core_t::TTime time, double value, maths::CUnivariateTimeSeriesModel &model) - { - weight[0][0] = model.winsorisationWeight(0.0, time, {value}); - model.addSamples(addSampleParams(1.0, {maths_t::E_SampleWinsorisationWeight}, weight), - {core::make_triple(time, TDouble2Vec{value}, TAG)}); - }; + auto updateModel = [&](core_t::TTime time, double value, maths::CUnivariateTimeSeriesModel& model) { + weight[0][0] = model.winsorisationWeight(0.0, time, {value}); + model.addSamples(addSampleParams(1.0, {maths_t::E_SampleWinsorisationWeight}, weight), + {core::make_triple(time, TDouble2Vec{value}, TAG)}); + }; //std::ostringstream actual, modelBounds; //actual << "r = ["; @@ -2271,21 +1964,17 @@ void CTimeSeriesModelTest::testStepChangeDiscontinuities() core_t::TTime bucketLength{600}; maths::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; auto controllers = decayRateControllers(1); - maths::CUnivariateTimeSeriesModel model{modelParams(bucketLength), 0, - trend, univariateNormal(DECAY_RATE / 3.0), - &controllers}; + maths::CUnivariateTimeSeriesModel model{modelParams(bucketLength), 0, trend, univariateNormal(DECAY_RATE / 3.0), &controllers}; // Add some data to the model. core_t::TTime time{0}; TDoubleVec samples; double level{20.0}; - for (auto dl : {10.0, 20.0, 15.0, 50.0, 30.0, 40.0, 15.0, 40.0, 25.0}) - { + for (auto dl : {10.0, 20.0, 15.0, 50.0, 30.0, 40.0, 15.0, 40.0, 25.0}) { level += dl; rng.generateNormalSamples(level, 2.0, 300 + static_cast(2.0 * dl), samples); - for (auto sample : samples) - { + for (auto sample : samples) { updateModel(time, sample, model); //updateTestDebug(time, sample, model); time += bucketLength; @@ -2293,8 +1982,7 @@ void CTimeSeriesModelTest::testStepChangeDiscontinuities() } level += 30.0; rng.generateNormalSamples(level, 2.0, 100, samples); - for (auto sample : samples) - { + for (auto sample : samples) { updateModel(time, sample, model); //updateTestDebug(time, sample, model); time += bucketLength; @@ -2304,8 +1992,7 @@ void CTimeSeriesModelTest::testStepChangeDiscontinuities() TDoubleVec expected; rng.generateNormalSamples(level, 2.0, 260, expected); - for (auto dl : {25.0, 40.0}) - { + for (auto dl : {25.0, 40.0}) { level += dl; rng.generateNormalSamples(level, 2.0, 300 + static_cast(2.0 * dl), samples); expected.insert(expected.end(), samples.begin(), samples.end()); @@ -2319,15 +2006,12 @@ void CTimeSeriesModelTest::testStepChangeDiscontinuities() //file << modelBounds.str() << "];"; //file << "y = ["; TDouble3VecVec forecast; - auto pushErrorBar = [&](const maths::SErrorBar &errorBar) - { - forecast.push_back({errorBar.s_LowerBound, - errorBar.s_Predicted, - errorBar.s_UpperBound}); - //file << errorBar.s_LowerBound << "," - // << errorBar.s_Predicted << "," - // << errorBar.s_UpperBound << std::endl; - }; + auto pushErrorBar = [&](const maths::SErrorBar& errorBar) { + forecast.push_back({errorBar.s_LowerBound, errorBar.s_Predicted, errorBar.s_UpperBound}); + //file << errorBar.s_LowerBound << "," + // << errorBar.s_Predicted << "," + // << errorBar.s_UpperBound << std::endl; + }; std::string m; model.forecast(time, time + 800 * bucketLength, 90.0, {-1000.0}, {1000.0}, pushErrorBar, m); @@ -2335,11 +2019,9 @@ void CTimeSeriesModelTest::testStepChangeDiscontinuities() //file << "];"; double outOfBounds{0.0}; - for (std::size_t i = 0u; i < forecast.size(); ++i) - { + for (std::size_t i = 0u; i < forecast.size(); ++i) { CPPUNIT_ASSERT_DOUBLES_EQUAL(expected[i], forecast[i][1], 0.1 * expected[i]); - outOfBounds += static_cast( expected[i] < forecast[i][0] - || expected[i] > forecast[i][2]); + outOfBounds += static_cast(expected[i] < forecast[i][0] || expected[i] > forecast[i][2]); } double percentageOutOfBounds{100.0 * outOfBounds / static_cast(forecast.size())}; LOG_DEBUG("% out-of-bounds = " << percentageOutOfBounds); @@ -2351,19 +2033,16 @@ void CTimeSeriesModelTest::testStepChangeDiscontinuities() core_t::TTime bucketLength{1800}; maths::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; auto controllers = decayRateControllers(1); - maths::CUnivariateTimeSeriesModel model{modelParams(bucketLength), 0, - trend, univariateNormal(), &controllers}; + maths::CUnivariateTimeSeriesModel model{modelParams(bucketLength), 0, trend, univariateNormal(), &controllers}; // Add some data to the model. core_t::TTime time{0}; double value{10.0}; TDoubleVec noise; - for (auto slope : {0.08, 0.056, 0.028, 0.044, 0.06, 0.03}) - { + for (auto slope : {0.08, 0.056, 0.028, 0.044, 0.06, 0.03}) { value = 5.0; - while (value < 95.0) - { + while (value < 95.0) { rng.generateNormalSamples(0.0, 2.0, 1, noise); updateModel(time, value + noise[0], model); //updateTestDebug(time, value + noise[0], model); @@ -2371,11 +2050,9 @@ void CTimeSeriesModelTest::testStepChangeDiscontinuities() value += slope; } } - for (auto slope : {0.042}) - { + for (auto slope : {0.042}) { value = 5.0; - for (std::size_t i = 0u; i < 1500; ++i) - { + for (std::size_t i = 0u; i < 1500; ++i) { rng.generateNormalSamples(0.0, 2.0, 1, noise); updateModel(time, value + noise[0], model); //updateTestDebug(time, value + noise[0], model); @@ -2387,10 +2064,8 @@ void CTimeSeriesModelTest::testStepChangeDiscontinuities() // Generate expected values from the same process. TDoubleVec expected; - for (auto slope : {0.05, 0.04}) - { - while (expected.size() < 2000 && value < 95.0) - { + for (auto slope : {0.05, 0.04}) { + while (expected.size() < 2000 && value < 95.0) { rng.generateNormalSamples(0.0, 2.0, 1, noise); expected.push_back(value + noise[0]); //actual << value + noise[0] << std::endl; @@ -2407,15 +2082,12 @@ void CTimeSeriesModelTest::testStepChangeDiscontinuities() //file << modelBounds.str() << "];"; //file << "y = ["; TDouble3VecVec forecast; - auto pushErrorBar = [&](const maths::SErrorBar &errorBar) - { - forecast.push_back({errorBar.s_LowerBound, - errorBar.s_Predicted, - errorBar.s_UpperBound}); - //file << errorBar.s_LowerBound << "," - // << errorBar.s_Predicted << "," - // << errorBar.s_UpperBound << std::endl; - }; + auto pushErrorBar = [&](const maths::SErrorBar& errorBar) { + forecast.push_back({errorBar.s_LowerBound, errorBar.s_Predicted, errorBar.s_UpperBound}); + //file << errorBar.s_LowerBound << "," + // << errorBar.s_Predicted << "," + // << errorBar.s_UpperBound << std::endl; + }; std::string m; model.forecast(time, time + 2000 * bucketLength, 90.0, {-1000.0}, {1000.0}, pushErrorBar, m); @@ -2423,10 +2095,8 @@ void CTimeSeriesModelTest::testStepChangeDiscontinuities() //file << "];"; double outOfBounds{0.0}; - for (std::size_t i = 0u; i < forecast.size(); ++i) - { - outOfBounds += static_cast( expected[i] < forecast[i][0] - || expected[i] > forecast[i][2]); + for (std::size_t i = 0u; i < forecast.size(); ++i) { + outOfBounds += static_cast(expected[i] < forecast[i][0] || expected[i] > forecast[i][2]); } double percentageOutOfBounds{100.0 * outOfBounds / static_cast(forecast.size())}; LOG_DEBUG("% out-of-bounds = " << percentageOutOfBounds); @@ -2434,8 +2104,7 @@ void CTimeSeriesModelTest::testStepChangeDiscontinuities() } } -void CTimeSeriesModelTest::testLinearScaling() -{ +void CTimeSeriesModelTest::testLinearScaling() { LOG_DEBUG("+-------------------------------------------+"); LOG_DEBUG("| CTimeSeriesModelTest::testLinearScaling |"); LOG_DEBUG("+-------------------------------------------+"); @@ -2446,12 +2115,11 @@ void CTimeSeriesModelTest::testLinearScaling() // 2) linearly scale up the same periodic pattern. TDouble2Vec4VecVec weight{{{1.0}}}; - auto updateModel = [&](core_t::TTime time, double value, maths::CUnivariateTimeSeriesModel &model) - { - weight[0][0] = model.winsorisationWeight(0.0, time, {value}); - model.addSamples(addSampleParams(1.0, {maths_t::E_SampleWinsorisationWeight}, weight), - {core::make_triple(time, TDouble2Vec{value}, TAG)}); - }; + auto updateModel = [&](core_t::TTime time, double value, maths::CUnivariateTimeSeriesModel& model) { + weight[0][0] = model.winsorisationWeight(0.0, time, {value}); + model.addSamples(addSampleParams(1.0, {maths_t::E_SampleWinsorisationWeight}, weight), + {core::make_triple(time, TDouble2Vec{value}, TAG)}); + }; //std::ostringstream actual, modelBounds; //actual << "r = ["; @@ -2474,15 +2142,12 @@ void CTimeSeriesModelTest::testLinearScaling() core_t::TTime bucketLength{600}; maths::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; auto controllers = decayRateControllers(1); - maths::CUnivariateTimeSeriesModel model{modelParams(bucketLength), 0, - trend, univariateNormal(DECAY_RATE / 3.0), - &controllers}; + maths::CUnivariateTimeSeriesModel model{modelParams(bucketLength), 0, trend, univariateNormal(DECAY_RATE / 3.0), &controllers}; core_t::TTime time{0}; TDoubleVec samples; rng.generateNormalSamples(0.0, noiseVariance, 1000, samples); - for (auto sample : samples) - { + for (auto sample : samples) { sample += 12.0 + 10.0 * smoothDaily(time); updateModel(time, sample, model); //updateTestDebug(time, sample, model); @@ -2492,21 +2157,19 @@ void CTimeSeriesModelTest::testLinearScaling() // Scale by 0.3 rng.generateNormalSamples(0.0, noiseVariance, 200, samples); - for (auto sample : samples) - { + for (auto sample : samples) { sample = 0.3 * (12.0 + 10.0 * smoothDaily(time) + sample); updateModel(time, sample, model); //updateTestDebug(time, sample, model); time += bucketLength; } rng.generateNormalSamples(0.0, noiseVariance, 1500, samples); - for (auto sample : samples) - { + for (auto sample : samples) { sample = 0.3 * (12.0 + 10.0 * smoothDaily(time) + sample); updateModel(time, sample, model); //updateTestDebug(time, sample, model); auto x = model.confidenceInterval(time, 90.0, {maths_t::E_SampleCountWeight}, {{1.0}}); - CPPUNIT_ASSERT(::fabs(sample - x[1][0]) < 1.2 * std::sqrt(noiseVariance)); + CPPUNIT_ASSERT(::fabs(sample - x[1][0]) < 1.2 * std::sqrt(noiseVariance)); CPPUNIT_ASSERT(::fabs(x[2][0] - x[0][0]) < 3.3 * std::sqrt(noiseVariance)); time += bucketLength; } @@ -2514,21 +2177,19 @@ void CTimeSeriesModelTest::testLinearScaling() // Scale by 2 / 0.3 rng.generateNormalSamples(0.0, noiseVariance, 200, samples); - for (auto sample : samples) - { + for (auto sample : samples) { sample = 2.0 * (12.0 + 10.0 * smoothDaily(time)) + sample; updateModel(time, sample, model); //updateTestDebug(time, sample, model); time += bucketLength; } rng.generateNormalSamples(0.0, noiseVariance, 400, samples); - for (auto sample : samples) - { + for (auto sample : samples) { sample = 2.0 * (12.0 + 10.0 * smoothDaily(time)) + sample; updateModel(time, sample, model); //updateTestDebug(time, sample, model); auto x = model.confidenceInterval(time, 90.0, {maths_t::E_SampleCountWeight}, {{1.0}}); - CPPUNIT_ASSERT(::fabs(sample - x[1][0]) < 3.1 * std::sqrt(noiseVariance)); + CPPUNIT_ASSERT(::fabs(sample - x[1][0]) < 3.1 * std::sqrt(noiseVariance)); CPPUNIT_ASSERT(::fabs(x[2][0] - x[0][0]) < 3.3 * std::sqrt(noiseVariance)); time += bucketLength; } @@ -2539,19 +2200,17 @@ void CTimeSeriesModelTest::testLinearScaling() //file << modelBounds.str() << "];"; } -void CTimeSeriesModelTest::testDaylightSaving() -{ +void CTimeSeriesModelTest::testDaylightSaving() { LOG_DEBUG("+--------------------------------------------+"); LOG_DEBUG("| CTimeSeriesModelTest::testDaylightSaving |"); LOG_DEBUG("+--------------------------------------------+"); TDouble2Vec4VecVec weight{{{1.0}}}; - auto updateModel = [&](core_t::TTime time, double value, maths::CUnivariateTimeSeriesModel &model) - { - weight[0][0] = model.winsorisationWeight(0.0, time, {value}); - model.addSamples(addSampleParams(1.0, {maths_t::E_SampleWinsorisationWeight}, weight), - {core::make_triple(time, TDouble2Vec{value}, TAG)}); - }; + auto updateModel = [&](core_t::TTime time, double value, maths::CUnivariateTimeSeriesModel& model) { + weight[0][0] = model.winsorisationWeight(0.0, time, {value}); + model.addSamples(addSampleParams(1.0, {maths_t::E_SampleWinsorisationWeight}, weight), + {core::make_triple(time, TDouble2Vec{value}, TAG)}); + }; //std::ostringstream actual, modelBounds; //actual << "r = ["; @@ -2575,15 +2234,12 @@ void CTimeSeriesModelTest::testDaylightSaving() core_t::TTime bucketLength{600}; maths::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; auto controllers = decayRateControllers(1); - maths::CUnivariateTimeSeriesModel model{modelParams(bucketLength), 0, - trend, univariateNormal(DECAY_RATE / 3.0), - &controllers}; + maths::CUnivariateTimeSeriesModel model{modelParams(bucketLength), 0, trend, univariateNormal(DECAY_RATE / 3.0), &controllers}; core_t::TTime time{0}; TDoubleVec samples; rng.generateNormalSamples(0.0, noiseVariance, 1000, samples); - for (auto sample : samples) - { + for (auto sample : samples) { sample += 12.0 + 10.0 * smoothDaily(time); updateModel(time, sample, model); //updateTestDebug(time, sample, model); @@ -2593,22 +2249,20 @@ void CTimeSeriesModelTest::testDaylightSaving() // Shift by +1 hr. rng.generateNormalSamples(0.0, noiseVariance, 200, samples); - for (auto sample : samples) - { + for (auto sample : samples) { sample += 12.0 + 10.0 * smoothDaily(time + hour); updateModel(time, sample, model); //updateTestDebug(time, sample, model); time += bucketLength; } rng.generateNormalSamples(0.0, noiseVariance, 1500, samples); - for (auto sample : samples) - { + for (auto sample : samples) { sample += 12.0 + 10.0 * smoothDaily(time + hour); updateModel(time, sample, model); //updateTestDebug(time, sample, model); CPPUNIT_ASSERT_EQUAL(hour, model.trendModel().timeShift()); auto x = model.confidenceInterval(time, 90.0, {maths_t::E_SampleCountWeight}, {{1.0}}); - CPPUNIT_ASSERT(::fabs(sample - x[1][0]) < 3.6 * std::sqrt(noiseVariance)); + CPPUNIT_ASSERT(::fabs(sample - x[1][0]) < 3.6 * std::sqrt(noiseVariance)); CPPUNIT_ASSERT(::fabs(x[2][0] - x[0][0]) < 3.6 * std::sqrt(noiseVariance)); time += bucketLength; } @@ -2616,22 +2270,20 @@ void CTimeSeriesModelTest::testDaylightSaving() // Shift by -1 hr. rng.generateNormalSamples(0.0, noiseVariance, 200, samples); - for (auto sample : samples) - { + for (auto sample : samples) { sample += 12.0 + 10.0 * smoothDaily(time); updateModel(time, sample, model); //updateTestDebug(time, sample, model); time += bucketLength; } rng.generateNormalSamples(0.0, noiseVariance, 400, samples); - for (auto sample : samples) - { + for (auto sample : samples) { sample += 12.0 + 10.0 * smoothDaily(time); updateModel(time, sample, model); //updateTestDebug(time, sample, model); CPPUNIT_ASSERT_EQUAL(core_t::TTime(0), model.trendModel().timeShift()); auto x = model.confidenceInterval(time, 90.0, {maths_t::E_SampleCountWeight}, {{1.0}}); - CPPUNIT_ASSERT(::fabs(sample - x[1][0]) < 4.1 * std::sqrt(noiseVariance)); + CPPUNIT_ASSERT(::fabs(sample - x[1][0]) < 4.1 * std::sqrt(noiseVariance)); CPPUNIT_ASSERT(::fabs(x[2][0] - x[0][0]) < 3.8 * std::sqrt(noiseVariance)); time += bucketLength; } @@ -2642,58 +2294,40 @@ void CTimeSeriesModelTest::testDaylightSaving() //file << modelBounds.str() << "];"; } -CppUnit::Test *CTimeSeriesModelTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CTimeSeriesModelTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTimeSeriesModelTest::testClone", - &CTimeSeriesModelTest::testClone) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTimeSeriesModelTest::testMode", - &CTimeSeriesModelTest::testMode) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTimeSeriesModelTest::testAddBucketValue", - &CTimeSeriesModelTest::testAddBucketValue) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTimeSeriesModelTest::testAddSamples", - &CTimeSeriesModelTest::testAddSamples) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTimeSeriesModelTest::testPredict", - &CTimeSeriesModelTest::testPredict) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTimeSeriesModelTest::testProbability", - &CTimeSeriesModelTest::testProbability) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTimeSeriesModelTest::testWeights", - &CTimeSeriesModelTest::testWeights) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTimeSeriesModelTest::testMemoryUsage", - &CTimeSeriesModelTest::testMemoryUsage) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTimeSeriesModelTest::testPersist", - &CTimeSeriesModelTest::testPersist) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTimeSeriesModelTest::testUpgrade", - &CTimeSeriesModelTest::testUpgrade) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTimeSeriesModelTest::testAddSamplesWithCorrelations", - &CTimeSeriesModelTest::testAddSamplesWithCorrelations) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTimeSeriesModelTest::testProbabilityWithCorrelations", - &CTimeSeriesModelTest::testProbabilityWithCorrelations) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTimeSeriesModelTest::testAnomalyModel", - &CTimeSeriesModelTest::testAnomalyModel) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTimeSeriesModelTest::testStepChangeDiscontinuities", - &CTimeSeriesModelTest::testStepChangeDiscontinuities) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTimeSeriesModelTest::testLinearScaling", - &CTimeSeriesModelTest::testLinearScaling) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTimeSeriesModelTest::testDaylightSaving", - &CTimeSeriesModelTest::testDaylightSaving) ); +CppUnit::Test* CTimeSeriesModelTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CTimeSeriesModelTest"); + + suiteOfTests->addTest( + new CppUnit::TestCaller("CTimeSeriesModelTest::testClone", &CTimeSeriesModelTest::testClone)); + suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesModelTest::testMode", &CTimeSeriesModelTest::testMode)); + suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesModelTest::testAddBucketValue", + &CTimeSeriesModelTest::testAddBucketValue)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CTimeSeriesModelTest::testAddSamples", &CTimeSeriesModelTest::testAddSamples)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CTimeSeriesModelTest::testPredict", &CTimeSeriesModelTest::testPredict)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CTimeSeriesModelTest::testProbability", &CTimeSeriesModelTest::testProbability)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CTimeSeriesModelTest::testWeights", &CTimeSeriesModelTest::testWeights)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CTimeSeriesModelTest::testMemoryUsage", &CTimeSeriesModelTest::testMemoryUsage)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CTimeSeriesModelTest::testPersist", &CTimeSeriesModelTest::testPersist)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CTimeSeriesModelTest::testUpgrade", &CTimeSeriesModelTest::testUpgrade)); + suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesModelTest::testAddSamplesWithCorrelations", + &CTimeSeriesModelTest::testAddSamplesWithCorrelations)); + suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesModelTest::testProbabilityWithCorrelations", + &CTimeSeriesModelTest::testProbabilityWithCorrelations)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CTimeSeriesModelTest::testAnomalyModel", &CTimeSeriesModelTest::testAnomalyModel)); + suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesModelTest::testStepChangeDiscontinuities", + &CTimeSeriesModelTest::testStepChangeDiscontinuities)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CTimeSeriesModelTest::testLinearScaling", &CTimeSeriesModelTest::testLinearScaling)); + suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesModelTest::testDaylightSaving", + &CTimeSeriesModelTest::testDaylightSaving)); return suiteOfTests; } diff --git a/lib/maths/unittest/CTimeSeriesModelTest.h b/lib/maths/unittest/CTimeSeriesModelTest.h index 3727957fa4..0e2355a777 100644 --- a/lib/maths/unittest/CTimeSeriesModelTest.h +++ b/lib/maths/unittest/CTimeSeriesModelTest.h @@ -9,27 +9,26 @@ #include -class CTimeSeriesModelTest : public CppUnit::TestFixture -{ - public: - void testClone(); - void testMode(); - void testAddBucketValue(); - void testAddSamples(); - void testPredict(); - void testProbability(); - void testWeights(); - void testMemoryUsage(); - void testPersist(); - void testUpgrade(); - void testAddSamplesWithCorrelations(); - void testProbabilityWithCorrelations(); - void testAnomalyModel(); - void testStepChangeDiscontinuities(); - void testLinearScaling(); - void testDaylightSaving(); +class CTimeSeriesModelTest : public CppUnit::TestFixture { +public: + void testClone(); + void testMode(); + void testAddBucketValue(); + void testAddSamples(); + void testPredict(); + void testProbability(); + void testWeights(); + void testMemoryUsage(); + void testPersist(); + void testUpgrade(); + void testAddSamplesWithCorrelations(); + void testProbabilityWithCorrelations(); + void testAnomalyModel(); + void testStepChangeDiscontinuities(); + void testLinearScaling(); + void testDaylightSaving(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CTimeSeriesModelTest_h diff --git a/lib/maths/unittest/CToolsTest.cc b/lib/maths/unittest/CToolsTest.cc index 86ddb1c332..8ec7507426 100644 --- a/lib/maths/unittest/CToolsTest.cc +++ b/lib/maths/unittest/CToolsTest.cc @@ -6,8 +6,8 @@ #include "CToolsTest.h" -#include #include +#include #include #include @@ -19,179 +19,141 @@ #include -#include -#include #include #include #include #include +#include +#include using namespace ml; using namespace maths; using namespace test; -namespace -{ +namespace { using TDoubleDoublePr = std::pair; using TDoubleBoolPr = std::pair; using TDoubleVec = std::vector; -namespace adapters -{ +namespace adapters { template -bool isDiscrete(const DISTRIBUTION&) -{ +bool isDiscrete(const DISTRIBUTION&) { return false; } -bool isDiscrete(const boost::math::negative_binomial_distribution<>&) -{ +bool isDiscrete(const boost::math::negative_binomial_distribution<>&) { return true; } template -TDoubleDoublePr support(const DISTRIBUTION &distribution) -{ +TDoubleDoublePr support(const DISTRIBUTION& distribution) { return boost::math::support(distribution); } -TDoubleDoublePr support(const CLogTDistribution &logt) -{ +TDoubleDoublePr support(const CLogTDistribution& logt) { CLogTDistribution::TOptionalDouble minimum = localMinimum(logt); - return TDoubleDoublePr(minimum ? *minimum : 0.0, - boost::math::tools::max_value()); + return TDoubleDoublePr(minimum ? *minimum : 0.0, boost::math::tools::max_value()); } template -TDoubleBoolPr stationaryPoint(const DISTRIBUTION &distribution) -{ +TDoubleBoolPr stationaryPoint(const DISTRIBUTION& distribution) { return TDoubleBoolPr(boost::math::mode(distribution), true); } -TDoubleBoolPr stationaryPoint(const CLogTDistribution &logt) -{ +TDoubleBoolPr stationaryPoint(const CLogTDistribution& logt) { return TDoubleBoolPr(ml::maths::mode(logt), true); } -TDoubleBoolPr stationaryPoint(const boost::math::beta_distribution<> &beta) -{ - if (beta.alpha() < 1.0 && beta.beta() < 1.0) - { - return TDoubleBoolPr((beta.alpha() - 1.0) - / (beta.alpha() + beta.beta() - 2.0), false); +TDoubleBoolPr stationaryPoint(const boost::math::beta_distribution<>& beta) { + if (beta.alpha() < 1.0 && beta.beta() < 1.0) { + return TDoubleBoolPr((beta.alpha() - 1.0) / (beta.alpha() + beta.beta() - 2.0), false); } return TDoubleBoolPr(boost::math::mode(beta), true); } template -double pdf(const DISTRIBUTION &distribution, const double &x) -{ +double pdf(const DISTRIBUTION& distribution, const double& x) { return CTools::safePdf(distribution, x); } -double pdf(const CLogTDistribution &logt, const double &x) -{ +double pdf(const CLogTDistribution& logt, const double& x) { return ml::maths::pdf(logt, x); } template -double cdf(const DISTRIBUTION &distribution, const double &x) -{ +double cdf(const DISTRIBUTION& distribution, const double& x) { return CTools::safeCdf(distribution, x); } -double cdf(const CLogTDistribution &logt, const double &x) -{ +double cdf(const CLogTDistribution& logt, const double& x) { return ml::maths::cdf(logt, x); } template -double cdfComplement(const DISTRIBUTION &distribution, const double &x) -{ +double cdfComplement(const DISTRIBUTION& distribution, const double& x) { return CTools::safeCdfComplement(distribution, x); } -double cdfComplement(const CLogTDistribution &logt, const double &x) -{ +double cdfComplement(const CLogTDistribution& logt, const double& x) { return ml::maths::cdfComplement(logt, x); } } // adapters:: template -double numericalProbabilityOfLessLikelySampleImpl(const DISTRIBUTION &distribution, double x) -{ +double numericalProbabilityOfLessLikelySampleImpl(const DISTRIBUTION& distribution, double x) { TDoubleBoolPr stationaryPoint = adapters::stationaryPoint(distribution); double eps = 1e-8; double pdf = adapters::pdf(distribution, x); - LOG_TRACE("x = " << x - << ", f(x) = " << pdf - << ", stationaryPoint = " << stationaryPoint.first); + LOG_TRACE("x = " << x << ", f(x) = " << pdf << ", stationaryPoint = " << stationaryPoint.first); double x1 = stationaryPoint.first; - if (x > stationaryPoint.first) - { + if (x > stationaryPoint.first) { // Search for lower bound. double minX = adapters::support(distribution).first + eps; - for (double increment = std::max(x1 / 2.0, 1.0); - x1 > minX && - ((stationaryPoint.second && adapters::pdf(distribution, x1) > pdf) - || (!stationaryPoint.second && adapters::pdf(distribution, x1) < pdf)); - x1 = std::max(x1 - increment, minX), increment *= 2.0) - { + for (double increment = std::max(x1 / 2.0, 1.0); x1 > minX && ((stationaryPoint.second && adapters::pdf(distribution, x1) > pdf) || + (!stationaryPoint.second && adapters::pdf(distribution, x1) < pdf)); + x1 = std::max(x1 - increment, minX), increment *= 2.0) { // Empty. } } double x2 = stationaryPoint.first; - if (x < stationaryPoint.first) - { + if (x < stationaryPoint.first) { // Search for upper bound. double maxX = adapters::support(distribution).second - eps; - for (double increment = std::max(x2 / 2.0, 1.0); - x2 < maxX && - ((stationaryPoint.second && adapters::pdf(distribution, x2) > pdf) - || (!stationaryPoint.second && adapters::pdf(distribution, x2) < pdf)); - x2 = std::min(x2 + increment, maxX), increment *= 2.0) - { + for (double increment = std::max(x2 / 2.0, 1.0); x2 < maxX && ((stationaryPoint.second && adapters::pdf(distribution, x2) > pdf) || + (!stationaryPoint.second && adapters::pdf(distribution, x2) < pdf)); + x2 = std::min(x2 + increment, maxX), increment *= 2.0) { // Empty. } } LOG_TRACE("1) x1 = " << x1 << ", x2 = " << x2); - if (adapters::pdf(distribution, x1) > adapters::pdf(distribution, x2)) - { + if (adapters::pdf(distribution, x1) > adapters::pdf(distribution, x2)) { std::swap(x1, x2); } LOG_TRACE("2) x1 = " << x1 << ", x2 = " << x2); // Binary search. - while (std::fabs(x1 - x2) > eps) - { + while (std::fabs(x1 - x2) > eps) { double x3 = (x1 + x2) / 2.0; - if (adapters::pdf(distribution, x3) > pdf) - { + if (adapters::pdf(distribution, x3) > pdf) { x2 = x3; - } - else - { + } else { x1 = x3; } } LOG_TRACE("3) x1 = " << x1 << ", x2 = " << x2); double y = (x > (x1 + x2) / 2.0) ? std::min(x1, x2) : std::max(x1, x2); - if (x > y) - { + if (x > y) { std::swap(x, y); } - LOG_TRACE("x = " << x - << ", y = " << y - << ", f(x) = " << adapters::pdf(distribution, x) - << ", f(y) = " << adapters::pdf(distribution, y)); + LOG_TRACE("x = " << x << ", y = " << y << ", f(x) = " << adapters::pdf(distribution, x) + << ", f(y) = " << adapters::pdf(distribution, y)); - if (stationaryPoint.second) - { - double cdfy = adapters::cdfComplement(distribution, y) - + (adapters::isDiscrete(distribution) ? adapters::pdf(distribution, y) : 0.0); + if (stationaryPoint.second) { + double cdfy = + adapters::cdfComplement(distribution, y) + (adapters::isDiscrete(distribution) ? adapters::pdf(distribution, y) : 0.0); double cdfx = adapters::cdf(distribution, x); LOG_TRACE("F(x) = " << cdfx << ", 1 - F(y) = " << cdfy); @@ -199,8 +161,7 @@ double numericalProbabilityOfLessLikelySampleImpl(const DISTRIBUTION &distributi return cdfx + cdfy; } - double cdfy = adapters::cdf(distribution, y) - + (adapters::isDiscrete(distribution) ? adapters::pdf(distribution, y) : 0.0); + double cdfy = adapters::cdf(distribution, y) + (adapters::isDiscrete(distribution) ? adapters::pdf(distribution, y) : 0.0); double cdfx = adapters::cdf(distribution, x); LOG_TRACE("F(x) = " << cdfx << ", F(y) = " << cdfy); @@ -209,60 +170,49 @@ double numericalProbabilityOfLessLikelySampleImpl(const DISTRIBUTION &distributi } template -double numericalProbabilityOfLessLikelySample(const DISTRIBUTION &distribution, double x) -{ +double numericalProbabilityOfLessLikelySample(const DISTRIBUTION& distribution, double x) { return numericalProbabilityOfLessLikelySampleImpl(distribution, x); } -double numericalProbabilityOfLessLikelySample( - const boost::math::negative_binomial_distribution<> &negativeBinomial, double x) -{ +double numericalProbabilityOfLessLikelySample(const boost::math::negative_binomial_distribution<>& negativeBinomial, double x) { double fx = CTools::safePdf(negativeBinomial, x); double m = boost::math::mode(negativeBinomial); double fm = CTools::safePdf(negativeBinomial, m); - if (fx >= fm) - { + if (fx >= fm) { return 1.0; } double f0 = CTools::safePdf(negativeBinomial, 0.0); - if (x > m && fx < f0) - { - return CTools::safeCdfComplement(negativeBinomial, x) - + CTools::safePdf(negativeBinomial, x); + if (x > m && fx < f0) { + return CTools::safeCdfComplement(negativeBinomial, x) + CTools::safePdf(negativeBinomial, x); } return numericalProbabilityOfLessLikelySampleImpl(negativeBinomial, x); } -double numericalProbabilityOfLessLikelySample(const CLogTDistribution &logt, double x) -{ +double numericalProbabilityOfLessLikelySample(const CLogTDistribution& logt, double x) { // We need special handling for the case that the p.d.f. is // single sided and if it is greater at x than the local "mode". double m = mode(logt); - if (m == 0.0) - { + if (m == 0.0) { return cdfComplement(logt, x); } double fx = pdf(logt, x); double fm = pdf(logt, m); - if (fx > fm) - { + if (fx > fm) { return cdfComplement(logt, x); } CLogTDistribution::TOptionalDouble xmin = localMinimum(logt); - if (xmin && (pdf(logt, *xmin) > fx || *xmin == m)) - { + if (xmin && (pdf(logt, *xmin) > fx || *xmin == m)) { return cdfComplement(logt, x); } return numericalProbabilityOfLessLikelySampleImpl(logt, x); } -double numericalProbabilityOfLessLikelySample(const boost::math::beta_distribution<> &beta, double x) -{ +double numericalProbabilityOfLessLikelySample(const boost::math::beta_distribution<>& beta, double x) { // We need special handling of the case that the equal p.d.f. // point is very close to 0 or 1. @@ -272,16 +222,13 @@ double numericalProbabilityOfLessLikelySample(const boost::math::beta_distributi double b = beta.beta(); double xmin = 1000.0 * std::numeric_limits::min(); - if (a >= 1.0 && fx < CTools::safePdf(beta, xmin)) - { - return std::exp( a * std::log(xmin) -std::log(a) - + boost::math::lgamma(a + b) - boost::math::lgamma(a) - boost::math::lgamma(b)) - + CTools::safeCdfComplement(beta, x); + if (a >= 1.0 && fx < CTools::safePdf(beta, xmin)) { + return std::exp(a * std::log(xmin) - std::log(a) + boost::math::lgamma(a + b) - boost::math::lgamma(a) - boost::math::lgamma(b)) + + CTools::safeCdfComplement(beta, x); } double xmax = 1.0 - std::numeric_limits::epsilon(); - if (b >= 1.0 && fx < CTools::safePdf(beta, xmax)) - { + if (b >= 1.0 && fx < CTools::safePdf(beta, xmax)) { double y = std::exp(std::log(boost::math::beta(a, b) * fx) / b); return std::pow(y, b) / b / boost::math::beta(a, b) + CTools::safeCdf(beta, x); } @@ -290,55 +237,44 @@ double numericalProbabilityOfLessLikelySample(const boost::math::beta_distributi } template -class CPdf -{ - public: - using result_type = double; - - public: - CPdf(const DISTRIBUTION &distribution) : - m_Distribution(distribution) - { - } +class CPdf { +public: + using result_type = double; - bool operator()(double x, double &result) const - { - result = boost::math::pdf(m_Distribution, x); - return true; - } +public: + CPdf(const DISTRIBUTION& distribution) : m_Distribution(distribution) {} + + bool operator()(double x, double& result) const { + result = boost::math::pdf(m_Distribution, x); + return true; + } - private: - DISTRIBUTION m_Distribution; +private: + DISTRIBUTION m_Distribution; }; -class CIdentity -{ - public: - using result_type = double; +class CIdentity { +public: + using result_type = double; - public: - bool operator()(double x, double &result) const - { - result = x; - return true; - } +public: + bool operator()(double x, double& result) const { + result = x; + return true; + } }; template -double numericalIntervalExpectation(const DISTRIBUTION &distribution, - double a, - double b) -{ +double numericalIntervalExpectation(const DISTRIBUTION& distribution, double a, double b) { double numerator = 0.0; double denominator = 0.0; CPdf fx(distribution); maths::CCompositeFunctions::CProduct, CIdentity> xfx(fx); double dx = (b - a) / 10.0; - for (std::size_t i = 0u; i < 10; ++i, a += dx) - { + for (std::size_t i = 0u; i < 10; ++i, a += dx) { double fxi; - CPPUNIT_ASSERT(maths::CIntegration::gaussLegendre(fx, a, a + dx, fxi)); + CPPUNIT_ASSERT(maths::CIntegration::gaussLegendre(fx, a, a + dx, fxi)); double xfxi; CPPUNIT_ASSERT(maths::CIntegration::gaussLegendre(xfx, a, a + dx, xfxi)); numerator += xfxi; @@ -349,57 +285,41 @@ double numericalIntervalExpectation(const DISTRIBUTION &distribution, } template -class CTruncatedPdf -{ - public: - CTruncatedPdf(const maths::CMixtureDistribution &mixture, - double cutoff) : - m_Mixture(mixture), - m_Cutoff(cutoff) - {} - - bool operator()(double x, double &fx) const - { - fx = maths::pdf(m_Mixture, x); - if (fx > m_Cutoff) - { - fx = 0.0; - } - return true; +class CTruncatedPdf { +public: + CTruncatedPdf(const maths::CMixtureDistribution& mixture, double cutoff) : m_Mixture(mixture), m_Cutoff(cutoff) {} + + bool operator()(double x, double& fx) const { + fx = maths::pdf(m_Mixture, x); + if (fx > m_Cutoff) { + fx = 0.0; } + return true; + } - private: - const maths::CMixtureDistribution &m_Mixture; - double m_Cutoff; +private: + const maths::CMixtureDistribution& m_Mixture; + double m_Cutoff; }; template -class CLogPdf -{ - public: - CLogPdf(const maths::CMixtureDistribution &mixture) : - m_Mixture(mixture) - {} - - double operator()(double x) const - { - return std::log(maths::pdf(m_Mixture, x)); - } +class CLogPdf { +public: + CLogPdf(const maths::CMixtureDistribution& mixture) : m_Mixture(mixture) {} - bool operator()(double x, double &fx) const - { - fx = std::log(maths::pdf(m_Mixture, x)); - return true; - } + double operator()(double x) const { return std::log(maths::pdf(m_Mixture, x)); } - private: - const maths::CMixtureDistribution &m_Mixture; -}; + bool operator()(double x, double& fx) const { + fx = std::log(maths::pdf(m_Mixture, x)); + return true; + } +private: + const maths::CMixtureDistribution& m_Mixture; +}; } -void CToolsTest::testProbabilityOfLessLikelySample() -{ +void CToolsTest::testProbabilityOfLessLikelySample() { LOG_DEBUG("+-------------------------------------------------+"); LOG_DEBUG("| CToolsTest::testProbabilityOfLessLikelySample |"); LOG_DEBUG("+-------------------------------------------------+"); @@ -423,7 +343,6 @@ void CToolsTest::testProbabilityOfLessLikelySample() maths_t::ETail tail = maths_t::E_UndeterminedTail; double m; - LOG_DEBUG("******** normal ********"); boost::math::normal_distribution<> normal(3.0, 5.0); @@ -449,7 +368,6 @@ void CToolsTest::testProbabilityOfLessLikelySample() CPPUNIT_ASSERT_DOUBLES_EQUAL(p1, p2, 0.01 * std::max(p1, p2)); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); - LOG_DEBUG("******** student's t ********"); boost::math::students_t_distribution<> students(2.0); @@ -476,33 +394,25 @@ void CToolsTest::testProbabilityOfLessLikelySample() CPPUNIT_ASSERT_DOUBLES_EQUAL(p1, p2, 0.01 * std::max(p1, p2)); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); - LOG_DEBUG("******** negative binomial ********"); { - double successFraction[] = { 0.5, 1.0, 10.0, 100.0, 1000.0 }; - double successProbability[] = { 1e-3, 0.25, 0.5, 0.75, 1.0 - 1e-3 }; + double successFraction[] = {0.5, 1.0, 10.0, 100.0, 1000.0}; + double successProbability[] = {1e-3, 0.25, 0.5, 0.75, 1.0 - 1e-3}; - for (size_t i = 0; i < boost::size(successFraction); ++i) - { - for (size_t j = 0; j < boost::size(successProbability); ++j) - { - LOG_DEBUG("**** r = " << successFraction[i] - << ", p = " << successProbability[j] << " ****"); + for (size_t i = 0; i < boost::size(successFraction); ++i) { + for (size_t j = 0; j < boost::size(successProbability); ++j) { + LOG_DEBUG("**** r = " << successFraction[i] << ", p = " << successProbability[j] << " ****"); - boost::math::negative_binomial_distribution<> negativeBinomial(successFraction[i], - successProbability[j]); + boost::math::negative_binomial_distribution<> negativeBinomial(successFraction[i], successProbability[j]); - if (successFraction[i] <= 1.0) - { + if (successFraction[i] <= 1.0) { // Monotone decreasing. double x = std::fabs(std::log(successProbability[j])); - for (int l = 0; l < 10; ++l) - { + for (int l = 0; l < 10; ++l) { tail = maths_t::E_UndeterminedTail; x = std::floor(2.0 * x + 0.5); - p1 = CTools::safeCdfComplement(negativeBinomial, x) - + CTools::safePdf(negativeBinomial, x); + p1 = CTools::safeCdfComplement(negativeBinomial, x) + CTools::safePdf(negativeBinomial, x); p2 = probabilityOfLessLikelySample(negativeBinomial, x, tail); LOG_DEBUG("x = " << x << ", p1 = " << p1 << ", p2 = " << p2); CPPUNIT_ASSERT_DOUBLES_EQUAL(p1, p2, 1e-3 * std::max(p1, p2)); @@ -517,21 +427,21 @@ void CToolsTest::testProbabilityOfLessLikelySample() CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); double offset = m1; - for (int l = 0; l < 5; ++l) - { + for (int l = 0; l < 5; ++l) { offset /= 2.0; double x = std::floor(m1 - offset); tail = maths_t::E_UndeterminedTail; p1 = numericalProbabilityOfLessLikelySample(negativeBinomial, x); p2 = probabilityOfLessLikelySample(negativeBinomial, x, tail); - LOG_DEBUG("x = " << x - << ", p1 = " << p1 << ", p2 = " << p2 - << ", log(p1) = " << std::log(p1) << ", log(p2) = " << std::log(p2)); - CPPUNIT_ASSERT(std::fabs(p1 - p2) <= 0.02 * std::max(p1, p2) - || std::fabs(std::log(p1) - std::log(p2)) <= 0.02 * std::fabs(std::min(std::log(p1), std::log(p2)))); - if (offset > 0.0) CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); - if (offset == 0.0) CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); + LOG_DEBUG("x = " << x << ", p1 = " << p1 << ", p2 = " << p2 << ", log(p1) = " << std::log(p1) + << ", log(p2) = " << std::log(p2)); + CPPUNIT_ASSERT(std::fabs(p1 - p2) <= 0.02 * std::max(p1, p2) || + std::fabs(std::log(p1) - std::log(p2)) <= 0.02 * std::fabs(std::min(std::log(p1), std::log(p2)))); + if (offset > 0.0) + CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); + if (offset == 0.0) + CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); x = std::ceil(m1 + offset); tail = maths_t::E_UndeterminedTail; @@ -539,38 +449,41 @@ void CToolsTest::testProbabilityOfLessLikelySample() p2 = probabilityOfLessLikelySample(negativeBinomial, x, tail); LOG_DEBUG("x = " << x << ", p1 = " << p1 << ", p2 = " << p2); CPPUNIT_ASSERT_DOUBLES_EQUAL(p1, p2, 0.02 * std::max(p1, p2)); - if (offset > 0.0) CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); - if (offset == 0.0) CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); + if (offset > 0.0) + CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); + if (offset == 0.0) + CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); } double factor = 1.0; - for (int l = 0; l < 5; ++l) - { + for (int l = 0; l < 5; ++l) { factor *= 2.0; double x = std::floor(m1 / factor); tail = maths_t::E_UndeterminedTail; p1 = numericalProbabilityOfLessLikelySample(negativeBinomial, x); p2 = probabilityOfLessLikelySample(negativeBinomial, x, tail); - LOG_DEBUG("x = " << x - << ", p1 = " << p1 << ", p2 = " << p2 - << ", log(p1) = " << std::log(p1) << ", log(p2) = " << std::log(p2)); - CPPUNIT_ASSERT(std::fabs(p1 - p2) <= 0.01 * std::max(p1, p2) - || std::fabs(std::log(p1) - std::log(p2)) <= 0.05 * std::fabs(std::min(std::log(p1), std::log(p2)))); - if (x != m1) CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); - if (x == m1) CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); + LOG_DEBUG("x = " << x << ", p1 = " << p1 << ", p2 = " << p2 << ", log(p1) = " << std::log(p1) + << ", log(p2) = " << std::log(p2)); + CPPUNIT_ASSERT(std::fabs(p1 - p2) <= 0.01 * std::max(p1, p2) || + std::fabs(std::log(p1) - std::log(p2)) <= 0.05 * std::fabs(std::min(std::log(p1), std::log(p2)))); + if (x != m1) + CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); + if (x == m1) + CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); x = std::ceil(m1 * factor); tail = maths_t::E_UndeterminedTail; p1 = numericalProbabilityOfLessLikelySample(negativeBinomial, x); p2 = probabilityOfLessLikelySample(negativeBinomial, x, tail); - LOG_DEBUG("x = " << x - << ", p1 = " << p1 << ", p2 = " << p2 - << ", log(p1) = " << std::log(p1) << ", log(p2) = " << std::log(p2)); - CPPUNIT_ASSERT(std::fabs(p1 - p2) <= 0.01 * std::max(p1, p2) - || std::fabs(std::log(p1) - std::log(p2)) <= 0.05 * std::fabs(std::min(std::log(p1), std::log(p2)))); - if (x != m1) CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); - if (x == m1) CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); + LOG_DEBUG("x = " << x << ", p1 = " << p1 << ", p2 = " << p2 << ", log(p1) = " << std::log(p1) + << ", log(p2) = " << std::log(p2)); + CPPUNIT_ASSERT(std::fabs(p1 - p2) <= 0.01 * std::max(p1, p2) || + std::fabs(std::log(p1) - std::log(p2)) <= 0.05 * std::fabs(std::min(std::log(p1), std::log(p2)))); + if (x != m1) + CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); + if (x == m1) + CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); } } } @@ -605,36 +518,26 @@ void CToolsTest::testProbabilityOfLessLikelySample() CPPUNIT_ASSERT_DOUBLES_EQUAL(p1, p2, 0.01 * std::max(p1, p2)); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); - LOG_DEBUG("******** log t ********"); { - double degreesFreedom[] = { 1.0, 5.0, 50.0 }; - double locations[] = { -0.5, 0.1, 1.0, 5.0 }; - double scales[] = { 0.1, 1.0, 5.0 }; - - for (size_t i = 0; i < boost::size(degreesFreedom); ++i) - { - for (size_t j = 0; j < boost::size(locations); ++j) - { - for (size_t k = 0; k < boost::size(scales); ++k) - { - LOG_DEBUG("**** v = " << degreesFreedom[i] - << ", l = " << locations[j] - << ", s = " << scales[k] << " ****"); - - CLogTDistribution logt(degreesFreedom[i], - locations[j], - scales[k]); + double degreesFreedom[] = {1.0, 5.0, 50.0}; + double locations[] = {-0.5, 0.1, 1.0, 5.0}; + double scales[] = {0.1, 1.0, 5.0}; + + for (size_t i = 0; i < boost::size(degreesFreedom); ++i) { + for (size_t j = 0; j < boost::size(locations); ++j) { + for (size_t k = 0; k < boost::size(scales); ++k) { + LOG_DEBUG("**** v = " << degreesFreedom[i] << ", l = " << locations[j] << ", s = " << scales[k] << " ****"); + + CLogTDistribution logt(degreesFreedom[i], locations[j], scales[k]); double m1 = mode(logt); - if (m1 == 0.0) - { + if (m1 == 0.0) { // Monotone decreasing. double x = std::exp(locations[j]) / 32.0; tail = maths_t::E_UndeterminedTail; - for (int l = 0; l < 10; ++l) - { + for (int l = 0; l < 10; ++l) { x *= 2.0; p1 = cdfComplement(logt, x); p2 = probabilityOfLessLikelySample(logt, x, tail); @@ -646,8 +549,7 @@ void CToolsTest::testProbabilityOfLessLikelySample() } double offset = m1; - for (int l = 0; l < 5; ++l) - { + for (int l = 0; l < 5; ++l) { offset /= 2.0; double x = m1 - offset; @@ -668,60 +570,51 @@ void CToolsTest::testProbabilityOfLessLikelySample() } double factor = 1.0; - for (int l = 0; l < 5; ++l) - { + for (int l = 0; l < 5; ++l) { factor *= 2.0; double x = m1 / factor; tail = maths_t::E_UndeterminedTail; p1 = numericalProbabilityOfLessLikelySample(logt, x); p2 = probabilityOfLessLikelySample(logt, x, tail); - LOG_DEBUG("x = " << x - << ", p1 = " << p1 << ", p2 = " << p2 - << ", log(p1) = " << std::log(p1) << ", log(p2) = " << std::log(p2)); - CPPUNIT_ASSERT(std::fabs(p1 - p2) <= 0.01 * std::max(p1, p2) - || std::fabs(std::log(p1) - std::log(p2)) <= 0.05 * std::fabs(std::min(std::log(p1), std::log(p2)))); + LOG_DEBUG("x = " << x << ", p1 = " << p1 << ", p2 = " << p2 << ", log(p1) = " << std::log(p1) + << ", log(p2) = " << std::log(p2)); + CPPUNIT_ASSERT(std::fabs(p1 - p2) <= 0.01 * std::max(p1, p2) || + std::fabs(std::log(p1) - std::log(p2)) <= 0.05 * std::fabs(std::min(std::log(p1), std::log(p2)))); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); x = m1 * factor; tail = maths_t::E_UndeterminedTail; p1 = numericalProbabilityOfLessLikelySample(logt, x); p2 = probabilityOfLessLikelySample(logt, x, tail); - LOG_DEBUG("x = " << x - << ", p1 = " << p1 << ", p2 = " << p2 - << ", log(p1) = " << std::log(p1) << ", log(p2) = " << std::log(p2)); + LOG_DEBUG("x = " << x << ", p1 = " << p1 << ", p2 = " << p2 << ", log(p1) = " << std::log(p1) + << ", log(p2) = " << std::log(p2)); - CPPUNIT_ASSERT(std::fabs(p1 - p2) <= 0.01 * std::max(p1, p2) - || std::fabs(std::log(p1) - std::log(p2)) <= 0.05 * std::fabs(std::min(std::log(p1), std::log(p2)))); + CPPUNIT_ASSERT(std::fabs(p1 - p2) <= 0.01 * std::max(p1, p2) || + std::fabs(std::log(p1) - std::log(p2)) <= 0.05 * std::fabs(std::min(std::log(p1), std::log(p2)))); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); - } + } } } } } - LOG_DEBUG("******** gamma ********"); { - double shapes[] = { 0.1, 1.0, 1.1, 100.0, 10000.0 }; - double scales[] = { 0.0001, 0.01, 1.0, 10.0 }; + double shapes[] = {0.1, 1.0, 1.1, 100.0, 10000.0}; + double scales[] = {0.0001, 0.01, 1.0, 10.0}; - for (size_t i = 0; i < boost::size(shapes); ++i) - { - for (size_t j = 0; j < boost::size(scales); ++j) - { - LOG_DEBUG("***** shape = " << shapes[i] - << ", scale = " << scales[j] << " *****"); + for (size_t i = 0; i < boost::size(shapes); ++i) { + for (size_t j = 0; j < boost::size(scales); ++j) { + LOG_DEBUG("***** shape = " << shapes[i] << ", scale = " << scales[j] << " *****"); boost::math::gamma_distribution<> gamma(shapes[i], scales[j]); - if (shapes[i] <= 1.0) - { + if (shapes[i] <= 1.0) { double x = boost::math::mean(gamma); tail = maths_t::E_UndeterminedTail; - for (int k = 0; k < 10; ++k) - { + for (int k = 0; k < 10; ++k) { x *= 2.0; p1 = CTools::safeCdfComplement(gamma, x); p2 = probabilityOfLessLikelySample(gamma, x, tail); @@ -736,150 +629,124 @@ void CToolsTest::testProbabilityOfLessLikelySample() LOG_DEBUG("mode = " << m1); double offset = 1.0; - for (int k = 0; k < 5; ++k) - { + for (int k = 0; k < 5; ++k) { offset /= 2.0; double x = (1.0 - offset) * m1; tail = maths_t::E_UndeterminedTail; p1 = numericalProbabilityOfLessLikelySample(gamma, x); p2 = probabilityOfLessLikelySample(gamma, x, tail); - LOG_DEBUG("x = " << x - << ", p1 = " << p1 << ", p2 = " << p2 - << ", log(p1) = " << std::log(p1) << ", log(p2) = " << std::log(p2)); - CPPUNIT_ASSERT(std::fabs(p1 - p2) <= 0.06 * std::max(p1, p2) - || std::fabs(std::log(p1) - std::log(p2)) <= 0.01 * std::fabs(std::min(std::log(p1), std::log(p2)))); + LOG_DEBUG("x = " << x << ", p1 = " << p1 << ", p2 = " << p2 << ", log(p1) = " << std::log(p1) + << ", log(p2) = " << std::log(p2)); + CPPUNIT_ASSERT(std::fabs(p1 - p2) <= 0.06 * std::max(p1, p2) || + std::fabs(std::log(p1) - std::log(p2)) <= 0.01 * std::fabs(std::min(std::log(p1), std::log(p2)))); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); double y = (1.0 + offset) * m1; tail = maths_t::E_UndeterminedTail; p1 = numericalProbabilityOfLessLikelySample(gamma, y); p2 = probabilityOfLessLikelySample(gamma, y, tail); - LOG_DEBUG("y = " << y - << ", p1 = " << p1 << ", p2 = " << p2 - << ", log(p1) = " << std::log(p1) << ", log(p2) = " << std::log(p2)); - CPPUNIT_ASSERT(std::fabs(p1 - p2) <= 0.06 * std::max(p1, p2) - || std::fabs(std::log(p1) - std::log(p2)) <= 0.01 * std::fabs(std::min(std::log(p1), std::log(p2)))); + LOG_DEBUG("y = " << y << ", p1 = " << p1 << ", p2 = " << p2 << ", log(p1) = " << std::log(p1) + << ", log(p2) = " << std::log(p2)); + CPPUNIT_ASSERT(std::fabs(p1 - p2) <= 0.06 * std::max(p1, p2) || + std::fabs(std::log(p1) - std::log(p2)) <= 0.01 * std::fabs(std::min(std::log(p1), std::log(p2)))); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } double factor = 1.0; - for (int k = 0; k < 5; ++k) - { + for (int k = 0; k < 5; ++k) { factor *= 2.0; double x = m1 / factor; tail = maths_t::E_UndeterminedTail; p1 = numericalProbabilityOfLessLikelySample(gamma, x); p2 = probabilityOfLessLikelySample(gamma, x, tail); - LOG_DEBUG("x = " << x - << ", p1 = " << p1 << ", p2 = " << p2 - << ", log(p1) = " << std::log(p1) << ", log(p2) = " << std::log(p2)); - CPPUNIT_ASSERT(std::fabs(p1 - p2) <= 0.1 * std::max(p1, p2) - || std::fabs(std::log(p1) - std::log(p2)) <= 0.01 * std::fabs(std::min(std::log(p1), std::log(p2)))); + LOG_DEBUG("x = " << x << ", p1 = " << p1 << ", p2 = " << p2 << ", log(p1) = " << std::log(p1) + << ", log(p2) = " << std::log(p2)); + CPPUNIT_ASSERT(std::fabs(p1 - p2) <= 0.1 * std::max(p1, p2) || + std::fabs(std::log(p1) - std::log(p2)) <= 0.01 * std::fabs(std::min(std::log(p1), std::log(p2)))); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); double y = factor * m1; tail = maths_t::E_UndeterminedTail; p1 = numericalProbabilityOfLessLikelySample(gamma, y); p2 = probabilityOfLessLikelySample(gamma, y, tail); - LOG_DEBUG("y = " << y - << ", p1 = " << p1 << ", p2 = " << p2 - << ", log(p1) = " << std::log(p1) << ", log(p2) = " << std::log(p2)); - CPPUNIT_ASSERT(std::fabs(p1 - p2) <= 0.1 * std::max(p1, p2) - || std::fabs(std::log(p1) - std::log(p2)) <= 0.01 * std::fabs(std::min(std::log(p1), std::log(p2)))); + LOG_DEBUG("y = " << y << ", p1 = " << p1 << ", p2 = " << p2 << ", log(p1) = " << std::log(p1) + << ", log(p2) = " << std::log(p2)); + CPPUNIT_ASSERT(std::fabs(p1 - p2) <= 0.1 * std::max(p1, p2) || + std::fabs(std::log(p1) - std::log(p2)) <= 0.01 * std::fabs(std::min(std::log(p1), std::log(p2)))); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } } } } - LOG_DEBUG("******** beta ********"); { - double alphas[] = { 0.01, 0.98, 1.0, 1.01, 1000.0 }; - double betas[] = { 0.01, 0.98, 1.0, 1.01, 1000.0 }; + double alphas[] = {0.01, 0.98, 1.0, 1.01, 1000.0}; + double betas[] = {0.01, 0.98, 1.0, 1.01, 1000.0}; - for (size_t i = 0; i < boost::size(alphas); ++i) - { - for (size_t j = 0; j < boost::size(betas); ++j) - { - LOG_DEBUG("**** alpha = " << alphas[i] - << ", beta = " << betas[j] << " ****"); + for (size_t i = 0; i < boost::size(alphas); ++i) { + for (size_t j = 0; j < boost::size(betas); ++j) { + LOG_DEBUG("**** alpha = " << alphas[i] << ", beta = " << betas[j] << " ****"); boost::math::beta_distribution<> beta(alphas[i], betas[j]); - if (alphas[i] == 1.0 && betas[j] == 1.0) - { + if (alphas[i] == 1.0 && betas[j] == 1.0) { // Constant. - for (int k = 0; k < 6; ++k) - { + for (int k = 0; k < 6; ++k) { double x = static_cast(k) / 5.0; tail = maths_t::E_UndeterminedTail; p1 = 1.0; p2 = probabilityOfLessLikelySample(beta, x, tail); - LOG_DEBUG("x = " << x - << ", f(x) = " << CTools::safePdf(beta, x) - << ", p1 = " << p1 << ", p2 = " << p2); + LOG_DEBUG("x = " << x << ", f(x) = " << CTools::safePdf(beta, x) << ", p1 = " << p1 << ", p2 = " << p2); CPPUNIT_ASSERT_EQUAL(p1, p2); CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); } - } - else if (alphas[i] <= 1.0 && betas[j] >= 1.0) - { + } else if (alphas[i] <= 1.0 && betas[j] >= 1.0) { // Monotone decreasing. - for (int k = 0; k < 6; ++k) - { + for (int k = 0; k < 6; ++k) { double x = static_cast(k) / 5.0; tail = maths_t::E_UndeterminedTail; p1 = CTools::safeCdfComplement(beta, x); p2 = probabilityOfLessLikelySample(beta, x, tail); - LOG_DEBUG("x = " << x - << ", f(x) = " << CTools::safePdf(beta, x) - << ", p1 = " << p1 << ", p2 = " << p2); + LOG_DEBUG("x = " << x << ", f(x) = " << CTools::safePdf(beta, x) << ", p1 = " << p1 << ", p2 = " << p2); CPPUNIT_ASSERT_DOUBLES_EQUAL(p1, p2, 1e-3 * std::max(p1, p2)); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } - } - else if (alphas[i] >= 1.0 && betas[j] <= 1.0) - { + } else if (alphas[i] >= 1.0 && betas[j] <= 1.0) { // Monotone increasing. - for (int k = 0; k < 6; ++k) - { + for (int k = 0; k < 6; ++k) { double x = static_cast(k) / 5.0; tail = maths_t::E_UndeterminedTail; p1 = CTools::safeCdf(beta, x); p2 = probabilityOfLessLikelySample(beta, x, tail); - LOG_DEBUG("x = " << x - << ", f(x) = " << CTools::safePdf(beta, x) - << ", p1 = " << p1 << ", p2 = " << p2); + LOG_DEBUG("x = " << x << ", f(x) = " << CTools::safePdf(beta, x) << ", p1 = " << p1 << ", p2 = " << p2); CPPUNIT_ASSERT_DOUBLES_EQUAL(p1, p2, 1e-3 * std::max(p1, p2)); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); } - } - else - { + } else { double stationaryPoint = adapters::stationaryPoint(beta).first; bool maximum = adapters::stationaryPoint(beta).second; LOG_DEBUG("stationary point = " << stationaryPoint); double epsMinus = stationaryPoint; double epsPlus = 1.0 - stationaryPoint; - for (int k = 0; k < 5; ++k) - { + for (int k = 0; k < 5; ++k) { epsMinus /= 2.0; double xMinus = stationaryPoint - epsMinus; tail = maths_t::E_UndeterminedTail; p1 = numericalProbabilityOfLessLikelySample(beta, xMinus); p2 = probabilityOfLessLikelySample(beta, xMinus, tail); - LOG_DEBUG("x- = " << xMinus - << ", p1 = " << p1 << ", p2 = " << p2 - << ", log(p1) = " << log(p1) << ", log(p2) = " << std::log(p2)); - CPPUNIT_ASSERT(std::fabs(p1 - p2) <= 0.05 * std::max(p1, p2) - || std::fabs(std::log(p1) - std::log(p2)) < 0.25 * std::fabs(std::min(std::log(p1), std::log(p2)))); - if (maximum) CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); - if (!maximum) CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); + LOG_DEBUG("x- = " << xMinus << ", p1 = " << p1 << ", p2 = " << p2 << ", log(p1) = " << log(p1) + << ", log(p2) = " << std::log(p2)); + CPPUNIT_ASSERT(std::fabs(p1 - p2) <= 0.05 * std::max(p1, p2) || + std::fabs(std::log(p1) - std::log(p2)) < 0.25 * std::fabs(std::min(std::log(p1), std::log(p2)))); + if (maximum) + CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); + if (!maximum) + CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); epsPlus /= 2.0; double xPlus = stationaryPoint + epsPlus; @@ -888,8 +755,10 @@ void CToolsTest::testProbabilityOfLessLikelySample() p2 = probabilityOfLessLikelySample(beta, xPlus, tail); LOG_DEBUG("x+ = " << xPlus << ", p1 = " << p1 << ", p2 = " << p2); CPPUNIT_ASSERT_DOUBLES_EQUAL(p1, p2, 0.05 * std::max(p1, p2)); - if (maximum) CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); - if (!maximum) CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); + if (maximum) + CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); + if (!maximum) + CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); } } } @@ -920,8 +789,7 @@ void CToolsTest::testProbabilityOfLessLikelySample() } } -void CToolsTest::testIntervalExpectation() -{ +void CToolsTest::testIntervalExpectation() { LOG_DEBUG("+---------------------------------------+"); LOG_DEBUG("| CToolsTest::testIntervalExpectation |"); LOG_DEBUG("+---------------------------------------+"); @@ -940,22 +808,22 @@ void CToolsTest::testIntervalExpectation() { boost::math::normal_distribution<> normal(10.0, 5.0); expected = numericalIntervalExpectation(normal, 0.0, 12.0); - actual = expectation(normal, 0.0, 12.0); + actual = expectation(normal, 0.0, 12.0); LOG_DEBUG("expected = " << expected << ", actual = " << actual); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, 1e-5 * expected); expected = numericalIntervalExpectation(normal, -40.0, 13.0); - actual = expectation(normal, boost::numeric::bounds::lowest(), 13.0); + actual = expectation(normal, boost::numeric::bounds::lowest(), 13.0); LOG_DEBUG("expected = " << expected << ", actual = " << actual); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, 1e-5 * expected); expected = 7.0; - actual = expectation(normal, 7.0, 7.0); + actual = expectation(normal, 7.0, 7.0); LOG_DEBUG("expected = " << expected << ", actual = " << actual); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, 1e-12 * expected); expected = 8.1; - actual = expectation(normal, 8.1, 8.1 * (1.0 + std::numeric_limits::epsilon())); + actual = expectation(normal, 8.1, 8.1 * (1.0 + std::numeric_limits::epsilon())); LOG_DEBUG("expected = " << expected << ", actual = " << actual); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, 1e-12 * expected); } @@ -964,22 +832,22 @@ void CToolsTest::testIntervalExpectation() { boost::math::lognormal_distribution<> logNormal(1.5, 0.8); expected = numericalIntervalExpectation(logNormal, 0.5, 7.0); - actual = expectation(logNormal, 0.5, 7.0); + actual = expectation(logNormal, 0.5, 7.0); LOG_DEBUG("expected = " << expected << ", actual = " << actual); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, 1e-5 * expected); expected = numericalIntervalExpectation(logNormal, 0.0, 9.0); - actual = expectation(logNormal, boost::numeric::bounds::lowest(), 9.0); + actual = expectation(logNormal, boost::numeric::bounds::lowest(), 9.0); LOG_DEBUG("expected = " << expected << ", actual = " << actual); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, 1e-5 * expected); expected = 6.0; - actual = expectation(logNormal, 6.0, 6.0); + actual = expectation(logNormal, 6.0, 6.0); LOG_DEBUG("expected = " << expected << ", actual = " << actual); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, 1e-12 * expected); expected = 8.1; - actual = expectation(logNormal, 8.1, 8.1 * (1.0 + std::numeric_limits::epsilon())); + actual = expectation(logNormal, 8.1, 8.1 * (1.0 + std::numeric_limits::epsilon())); LOG_DEBUG("expected = " << expected << ", actual = " << actual); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, 1e-12 * expected); } @@ -988,29 +856,28 @@ void CToolsTest::testIntervalExpectation() { boost::math::gamma_distribution<> gamma(5.0, 3.0); expected = numericalIntervalExpectation(gamma, 0.5, 4.0); - actual = expectation(gamma, 0.5, 4.0); + actual = expectation(gamma, 0.5, 4.0); LOG_DEBUG("expected = " << expected << ", actual = " << actual); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, 1e-5 * expected); expected = numericalIntervalExpectation(gamma, 0.0, 5.0); - actual = expectation(gamma, boost::numeric::bounds::lowest(), 5.0); + actual = expectation(gamma, boost::numeric::bounds::lowest(), 5.0); LOG_DEBUG("expected = " << expected << ", actual = " << actual); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, 1e-5 * expected); expected = 6.0; - actual = expectation(gamma, 6.0, 6.0); + actual = expectation(gamma, 6.0, 6.0); LOG_DEBUG("expected = " << expected << ", actual = " << actual); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, 1e-12 * expected); expected = 8.1; - actual = expectation(gamma, 8.1, 8.1 * (1.0 + std::numeric_limits::epsilon())); + actual = expectation(gamma, 8.1, 8.1 * (1.0 + std::numeric_limits::epsilon())); LOG_DEBUG("expected = " << expected << ", actual = " << actual); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, 1e-12 * expected); } } -void CToolsTest::testMixtureProbabilityOfLessLikelySample() -{ +void CToolsTest::testMixtureProbabilityOfLessLikelySample() { LOG_DEBUG("+--------------------------------------------------------+"); LOG_DEBUG("| CToolsTest::testMixtureProbabilityOfLessLikelySample |"); LOG_DEBUG("+--------------------------------------------------------+"); @@ -1030,20 +897,17 @@ void CToolsTest::testMixtureProbabilityOfLessLikelySample() TDoubleVec weights; rng.generateUniformSamples(1.0, 10.0, n, weights); - for (std::size_t i = 4u; i <= 20; i += 4) - { + for (std::size_t i = 4u; i <= 20; i += 4) { LOG_DEBUG("*** modes = " << i << " ***"); TMeanAccumulator meanError; TMeanAccumulator meanLogError; - for (std::size_t j = 0u; j < n - i; j += i) - { + for (std::size_t j = 0u; j < n - i; j += i) { TDoubleVec modeWeights; std::vector modes; - double a = std::numeric_limits::max(); + double a = std::numeric_limits::max(); double b = -std::numeric_limits::max(); - for (std::size_t k = 0u; k < i; ++k) - { + for (std::size_t k = 0u; k < i; ++k) { modeWeights.push_back(weights[j + k]); modes.push_back(boost::math::normal(means[j + k], sd[j + k])); a = std::min(a, means[j + k]); @@ -1051,24 +915,17 @@ void CToolsTest::testMixtureProbabilityOfLessLikelySample() } maths::CMixtureDistribution mixture(modeWeights, modes); - for (std::size_t k = 0u; k < x.size(); ++k) - { + for (std::size_t k = 0u; k < x.size(); ++k) { double logFx = maths::pdf(mixture, x[k]); - if (logFx == 0.0) - { + if (logFx == 0.0) { logFx = 10.0 * core::constants::LOG_MIN_DOUBLE; - } - else - { + } else { logFx = std::log(logFx); } maths::CTools::CMixtureProbabilityOfLessLikelySample calculator(i, x[k], logFx, a, b); - for (std::size_t l = 0u; l < modeWeights.size(); ++l) - { - calculator.addMode((mixture.weights())[l], - boost::math::mean(modes[l]), - boost::math::standard_deviation(modes[l])); + for (std::size_t l = 0u; l < modeWeights.size(); ++l) { + calculator.addMode((mixture.weights())[l], boost::math::mean(modes[l]), boost::math::standard_deviation(modes[l])); } double pTails = 0.0; @@ -1086,40 +943,29 @@ void CToolsTest::testMixtureProbabilityOfLessLikelySample() double pExpected = pTails; CTruncatedPdf pdf(mixture, std::exp(logFx)); - for (double xi = a, l = 0, step = 0.5 * (b - a) / std::floor(b - a); - l < 2 * static_cast(b - a); - xi += step, ++l) - { + for (double xi = a, l = 0, step = 0.5 * (b - a) / std::floor(b - a); l < 2 * static_cast(b - a); + xi += step, ++l) { double pi; maths::CIntegration::gaussLegendre(pdf, xi, xi + step, pi); pExpected += pi; } - if (j % 50 == 0) - { + if (j % 50 == 0) { LOG_DEBUG("pTails = " << pTails); - LOG_DEBUG("x = " << x[k] - << ", log(f(x)) = " << logFx - << ", P(x) = " << p - << ", expected P(x) = " << pExpected); + LOG_DEBUG("x = " << x[k] << ", log(f(x)) = " << logFx << ", P(x) = " << p << ", expected P(x) = " << pExpected); } CPPUNIT_ASSERT(pExpected > 0.0); - if (pExpected > 0.1) - { + if (pExpected > 0.1) { CPPUNIT_ASSERT_DOUBLES_EQUAL(pExpected, p, 0.12); - } - else if (pExpected > 1e-10) - { + } else if (pExpected > 1e-10) { CPPUNIT_ASSERT_DOUBLES_EQUAL(std::log(pExpected), std::log(p), 0.15 * std::fabs(std::log(pExpected))); - } - else - { + } else { CPPUNIT_ASSERT_DOUBLES_EQUAL(std::log(pExpected), std::log(p), 0.015 * std::fabs(std::log(pExpected))); } meanError.add(std::fabs(p - pExpected)); - meanLogError.add(std::fabs(std::log(p) - std::log(pExpected)) - / std::max(std::fabs(std::log(pExpected)), std::fabs(std::log(p)))); + meanLogError.add(std::fabs(std::log(p) - std::log(pExpected)) / + std::max(std::fabs(std::log(pExpected)), std::fabs(std::log(p)))); } } @@ -1130,8 +976,7 @@ void CToolsTest::testMixtureProbabilityOfLessLikelySample() } } -void CToolsTest::testAnomalyScore() -{ +void CToolsTest::testAnomalyScore() { LOG_DEBUG("+--------------------------------+"); LOG_DEBUG("| CToolsTest::testAnomalyScore |"); LOG_DEBUG("+--------------------------------+"); @@ -1139,28 +984,21 @@ void CToolsTest::testAnomalyScore() // Test p = inverseDeviation(deviation(p)) double p = 0.04; - for (std::size_t i = 0u; i < 305; ++i, p *= 0.1) - { + for (std::size_t i = 0u; i < 305; ++i, p *= 0.1) { double anomalyScore = CTools::anomalyScore(p); LOG_DEBUG("p = " << p << ", anomalyScore = " << anomalyScore); CPPUNIT_ASSERT_DOUBLES_EQUAL(p, CTools::inverseAnomalyScore(anomalyScore), 1e-3 * p); } } -void CToolsTest::testSpread() -{ +void CToolsTest::testSpread() { LOG_DEBUG("+--------------------------+"); LOG_DEBUG("| CToolsTest::testSpread |"); LOG_DEBUG("+--------------------------+"); double period = 86400.0; { - double raw[] = - { - 15.0, 120.0, 4500.0, - 9000.0, 25700.0, 43100.0, - 73000.0, 74000.0, 84300.0 - }; + double raw[] = {15.0, 120.0, 4500.0, 9000.0, 25700.0, 43100.0, 73000.0, 74000.0, 84300.0}; double separation = 20.0; TDoubleVec points(boost::begin(raw), boost::end(raw)); std::string expected = core::CContainerPrinter::print(points); @@ -1173,12 +1011,7 @@ void CToolsTest::testSpread() CPPUNIT_ASSERT_EQUAL(expected, core::CContainerPrinter::print(points)); } { - double raw[] = - { - 150.0, 170.0, 4500.0, 4650.0, - 4700.0, 4800.0, 73000.0, 73150.0, - 73500.0, 73600.0, 73800.0, 74000.0 - }; + double raw[] = {150.0, 170.0, 4500.0, 4650.0, 4700.0, 4800.0, 73000.0, 73150.0, 73500.0, 73600.0, 73800.0, 74000.0}; double separation = 126.0; std::string expected = "[97, 223, 4473.5, 4599.5, 4725.5, 4851.5, 73000, 73150, 73487, 73613, 73800, 74000]"; TDoubleVec points(boost::begin(raw), boost::end(raw)); @@ -1188,8 +1021,7 @@ void CToolsTest::testSpread() } { CRandomNumbers rng; - for (std::size_t i = 0u; i < 100; ++i) - { + for (std::size_t i = 0u; i < 100; ++i) { TDoubleVec origSamples; rng.generateUniformSamples(1000.0, static_cast(period) - 1000.0, 100, origSamples); TDoubleVec samples(origSamples); @@ -1197,17 +1029,12 @@ void CToolsTest::testSpread() std::sort(origSamples.begin(), origSamples.end()); double eps = 1e-3; - double dcost = (samples[0] + eps - origSamples[0]) - * (samples[0] + eps - origSamples[0]) - - (samples[0] - eps - origSamples[0]) - * (samples[0] - eps - origSamples[0]); - for (std::size_t j = 1u; j < samples.size(); ++j) - { + double dcost = (samples[0] + eps - origSamples[0]) * (samples[0] + eps - origSamples[0]) - + (samples[0] - eps - origSamples[0]) * (samples[0] - eps - origSamples[0]); + for (std::size_t j = 1u; j < samples.size(); ++j) { CPPUNIT_ASSERT(samples[j] - samples[j - 1] >= 150.0 - eps); - dcost += (samples[j] + eps - origSamples[j ]) - * (samples[j] + eps - origSamples[j]) - - (samples[j] - eps - origSamples[j]) - * (samples[j] - eps - origSamples[j]); + dcost += (samples[j] + eps - origSamples[j]) * (samples[j] + eps - origSamples[j]) - + (samples[j] - eps - origSamples[j]) * (samples[j] - eps - origSamples[j]); } dcost /= 2.0 * eps; LOG_DEBUG("d(cost)/dx = " << dcost); @@ -1216,8 +1043,7 @@ void CToolsTest::testSpread() } } -void CToolsTest::testFastLog() -{ +void CToolsTest::testFastLog() { LOG_DEBUG("+---------------------------+"); LOG_DEBUG("| CToolsTest::testFastLog |"); LOG_DEBUG("+---------------------------+"); @@ -1228,81 +1054,60 @@ void CToolsTest::testFastLog() { TDoubleVec x; rng.generateUniformSamples(-100.0, 0.0, 10000, x); - for (std::size_t i = 0u; i < x.size(); ++i) - { - if (i % 100 == 0) - { - LOG_DEBUG("x = " << std::exp(x[i]) - << ", log(x) = " << x[i] - << ", fast log(x) = " << maths::CTools::fastLog(std::exp(x[i]))); + for (std::size_t i = 0u; i < x.size(); ++i) { + if (i % 100 == 0) { + LOG_DEBUG("x = " << std::exp(x[i]) << ", log(x) = " << x[i] + << ", fast log(x) = " << maths::CTools::fastLog(std::exp(x[i]))); } - CPPUNIT_ASSERT_DOUBLES_EQUAL(x[i], - maths::CTools::fastLog(std::exp(x[i])), - 5e-5); + CPPUNIT_ASSERT_DOUBLES_EQUAL(x[i], maths::CTools::fastLog(std::exp(x[i])), 5e-5); } } // Mid { TDoubleVec x; rng.generateUniformSamples(1.0, 1e6, 10000, x); - for (std::size_t i = 0u; i < x.size(); ++i) - { - if (i % 100 == 0) - { - LOG_DEBUG("x = " << x[i] - << ", log(x) = " << std::log(x[i]) - << ", fast log(x) = " << maths::CTools::fastLog(x[i])); + for (std::size_t i = 0u; i < x.size(); ++i) { + if (i % 100 == 0) { + LOG_DEBUG("x = " << x[i] << ", log(x) = " << std::log(x[i]) << ", fast log(x) = " << maths::CTools::fastLog(x[i])); } - CPPUNIT_ASSERT_DOUBLES_EQUAL(std::log(x[i]), - maths::CTools::fastLog(x[i]), - 5e-5); + CPPUNIT_ASSERT_DOUBLES_EQUAL(std::log(x[i]), maths::CTools::fastLog(x[i]), 5e-5); } } // Large { TDoubleVec x; rng.generateUniformSamples(20.0, 80.0, 10000, x); - for (std::size_t i = 0u; i < x.size(); ++i) - { - if (i % 100 == 0) - { - LOG_DEBUG("x = " << std::exp(x[i]) - << ", log(x) = " << x[i] - << ", fast log(x) = " << maths::CTools::fastLog(std::exp(x[i]))); + for (std::size_t i = 0u; i < x.size(); ++i) { + if (i % 100 == 0) { + LOG_DEBUG("x = " << std::exp(x[i]) << ", log(x) = " << x[i] + << ", fast log(x) = " << maths::CTools::fastLog(std::exp(x[i]))); } - CPPUNIT_ASSERT_DOUBLES_EQUAL(x[i], - maths::CTools::fastLog(std::exp(x[i])), - 5e-5); + CPPUNIT_ASSERT_DOUBLES_EQUAL(x[i], maths::CTools::fastLog(std::exp(x[i])), 5e-5); } } } -void CToolsTest::testMiscellaneous() -{ +void CToolsTest::testMiscellaneous() { LOG_DEBUG("+---------------------------------+"); LOG_DEBUG("| CToolsTest::testMiscellaneous |"); LOG_DEBUG("+---------------------------------+"); - double x_[] = { 0.0, 3.2, 2.1, -1.8, 4.5 }; + double x_[] = {0.0, 3.2, 2.1, -1.8, 4.5}; maths::CVectorNx1 x(x_, x_ + 5); maths::CVectorNx1 a(-2.0); - maths::CVectorNx1 b( 5.0); - - double expected[][5] = - { - { 0.0, 3.2, 2.1, -1.8, 4.5 }, - { 0.0, 3.2, 2.1, -1.5, 4.5 }, - { 0.0, 3.2, 2.1, -1.0, 4.0 }, - { 0.0, 3.2, 2.1, -0.5, 3.5 }, - { 0.0, 3.0, 2.1, 0.0, 3.0 }, - { 0.5, 2.5, 2.1, 0.5, 2.5 }, - { 1.0, 2.0, 2.0, 1.0, 2.0 }, - { 1.5, 1.5, 1.5, 1.5, 1.5 } - }; - - for (std::size_t i = 0u; a <= b; ++i) - { + maths::CVectorNx1 b(5.0); + + double expected[][5] = {{0.0, 3.2, 2.1, -1.8, 4.5}, + {0.0, 3.2, 2.1, -1.5, 4.5}, + {0.0, 3.2, 2.1, -1.0, 4.0}, + {0.0, 3.2, 2.1, -0.5, 3.5}, + {0.0, 3.0, 2.1, 0.0, 3.0}, + {0.5, 2.5, 2.1, 0.5, 2.5}, + {1.0, 2.0, 2.0, 1.0, 2.0}, + {1.5, 1.5, 1.5, 1.5, 1.5}}; + + for (std::size_t i = 0u; a <= b; ++i) { maths::CVectorNx1 expect(expected[i]); maths::CVectorNx1 actual = maths::CTools::truncate(x, a, b); @@ -1314,33 +1119,18 @@ void CToolsTest::testMiscellaneous() } } -CppUnit::Test *CToolsTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CToolsTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CToolsTest::testProbabilityOfLessLikelySample", - &CToolsTest::testProbabilityOfLessLikelySample) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CToolsTest::testIntervalExpectation", - &CToolsTest::testIntervalExpectation) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CToolsTest::testMixtureProbabilityOfLessLikelySample", - &CToolsTest::testMixtureProbabilityOfLessLikelySample) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CToolsTest::testAnomalyScore", - &CToolsTest::testAnomalyScore) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CToolsTest::testSpread", - &CToolsTest::testSpread) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CToolsTest::testFastLog", - &CToolsTest::testFastLog) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CToolsTest::testMiscellaneous", - &CToolsTest::testMiscellaneous) ); +CppUnit::Test* CToolsTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CToolsTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CToolsTest::testProbabilityOfLessLikelySample", + &CToolsTest::testProbabilityOfLessLikelySample)); + suiteOfTests->addTest(new CppUnit::TestCaller("CToolsTest::testIntervalExpectation", &CToolsTest::testIntervalExpectation)); + suiteOfTests->addTest(new CppUnit::TestCaller("CToolsTest::testMixtureProbabilityOfLessLikelySample", + &CToolsTest::testMixtureProbabilityOfLessLikelySample)); + suiteOfTests->addTest(new CppUnit::TestCaller("CToolsTest::testAnomalyScore", &CToolsTest::testAnomalyScore)); + suiteOfTests->addTest(new CppUnit::TestCaller("CToolsTest::testSpread", &CToolsTest::testSpread)); + suiteOfTests->addTest(new CppUnit::TestCaller("CToolsTest::testFastLog", &CToolsTest::testFastLog)); + suiteOfTests->addTest(new CppUnit::TestCaller("CToolsTest::testMiscellaneous", &CToolsTest::testMiscellaneous)); return suiteOfTests; } - - diff --git a/lib/maths/unittest/CToolsTest.h b/lib/maths/unittest/CToolsTest.h index 599986316d..8f01243a1a 100644 --- a/lib/maths/unittest/CToolsTest.h +++ b/lib/maths/unittest/CToolsTest.h @@ -9,18 +9,17 @@ #include -class CToolsTest : public CppUnit::TestFixture -{ - public: - void testProbabilityOfLessLikelySample(); - void testIntervalExpectation(); - void testMixtureProbabilityOfLessLikelySample(); - void testAnomalyScore(); - void testSpread(); - void testFastLog(); - void testMiscellaneous(); +class CToolsTest : public CppUnit::TestFixture { +public: + void testProbabilityOfLessLikelySample(); + void testIntervalExpectation(); + void testMixtureProbabilityOfLessLikelySample(); + void testAnomalyScore(); + void testSpread(); + void testFastLog(); + void testMiscellaneous(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CToolsTest_h diff --git a/lib/maths/unittest/CTrendComponentTest.cc b/lib/maths/unittest/CTrendComponentTest.cc index 50572d87e0..a73b0a9f48 100644 --- a/lib/maths/unittest/CTrendComponentTest.cc +++ b/lib/maths/unittest/CTrendComponentTest.cc @@ -7,9 +7,9 @@ #include "CTrendComponentTest.h" #include -#include #include #include +#include #include #include @@ -25,24 +25,19 @@ using namespace ml; -namespace -{ +namespace { using TDoubleVec = std::vector; using TDoubleVecVec = std::vector; using TDouble1Vec = core::CSmallVector; using TDouble3Vec = core::CSmallVector; using TDouble3VecVec = std::vector; -using TGenerator = TDoubleVec (*)(test::CRandomNumbers &, - core_t::TTime, core_t::TTime, core_t::TTime); +using TGenerator = TDoubleVec (*)(test::CRandomNumbers&, core_t::TTime, core_t::TTime, core_t::TTime); using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; using TMeanVarAccumulator = maths::CBasicStatistics::SSampleMeanVar::TAccumulator; using TRegression = maths::CRegression::CLeastSquaresOnline<2, double>; -TDoubleVec multiscaleRandomWalk(test::CRandomNumbers &rng, - core_t::TTime bucketLength, - core_t::TTime start, core_t::TTime end) -{ +TDoubleVec multiscaleRandomWalk(test::CRandomNumbers& rng, core_t::TTime bucketLength, core_t::TTime start, core_t::TTime end) { TDoubleVecVec noise(4); core_t::TTime buckets{(end - start) / bucketLength + 1}; @@ -50,19 +45,17 @@ TDoubleVec multiscaleRandomWalk(test::CRandomNumbers &rng, rng.generateNormalSamples(0.0, 0.5, buckets, noise[1]); rng.generateNormalSamples(0.0, 1.0, buckets, noise[2]); rng.generateNormalSamples(0.0, 5.0, buckets, noise[3]); - for (core_t::TTime i = 1; i < buckets; ++i) - { - noise[0][i] = 0.998 * noise[0][i-1] + 0.002 * noise[0][i]; - noise[1][i] = 0.99 * noise[1][i-1] + 0.01 * noise[1][i]; - noise[2][i] = 0.9 * noise[2][i-1] + 0.1 * noise[2][i]; + for (core_t::TTime i = 1; i < buckets; ++i) { + noise[0][i] = 0.998 * noise[0][i - 1] + 0.002 * noise[0][i]; + noise[1][i] = 0.99 * noise[1][i - 1] + 0.01 * noise[1][i]; + noise[2][i] = 0.9 * noise[2][i - 1] + 0.1 * noise[2][i]; } TDoubleVec result; result.reserve(buckets); TDoubleVec rw{0.0, 0.0, 0.0}; - for (core_t::TTime i = 0; i < buckets; ++i) - { + for (core_t::TTime i = 0; i < buckets; ++i) { rw[0] = rw[0] + noise[0][i]; rw[1] = rw[1] + noise[1][i]; rw[2] = rw[2] + noise[2][i]; @@ -73,10 +66,7 @@ TDoubleVec multiscaleRandomWalk(test::CRandomNumbers &rng, return result; } -TDoubleVec piecewiseLinear(test::CRandomNumbers &rng, - core_t::TTime bucketLength, - core_t::TTime start, core_t::TTime end) -{ +TDoubleVec piecewiseLinear(test::CRandomNumbers& rng, core_t::TTime bucketLength, core_t::TTime start, core_t::TTime end) { core_t::TTime buckets{(end - start) / bucketLength + 1}; TDoubleVec knots; @@ -92,12 +82,10 @@ TDoubleVec piecewiseLinear(test::CRandomNumbers &rng, double value{0.0}; - auto knot = knots.begin(); + auto knot = knots.begin(); auto slope = slopes.begin(); - for (core_t::TTime time = start; time < end; time += bucketLength) - { - if (time > start + static_cast(bucketLength * *knot)) - { + for (core_t::TTime time = start; time < end; time += bucketLength) { + if (time > start + static_cast(bucketLength * *knot)) { ++knot; ++slope; } @@ -108,10 +96,7 @@ TDoubleVec piecewiseLinear(test::CRandomNumbers &rng, return result; } -TDoubleVec staircase(test::CRandomNumbers &rng, - core_t::TTime bucketLength, - core_t::TTime start, core_t::TTime end) -{ +TDoubleVec staircase(test::CRandomNumbers& rng, core_t::TTime bucketLength, core_t::TTime start, core_t::TTime end) { core_t::TTime buckets{(end - start) / bucketLength + 1}; TDoubleVec knots; @@ -129,10 +114,8 @@ TDoubleVec staircase(test::CRandomNumbers &rng, auto knot = knots.begin(); auto step = steps.begin(); - for (core_t::TTime time = start; time < end; time += bucketLength) - { - if (time > start + static_cast(bucketLength * *knot)) - { + for (core_t::TTime time = start; time < end; time += bucketLength) { + if (time > start + static_cast(bucketLength * *knot)) { value += *step; ++knot; ++step; @@ -143,10 +126,7 @@ TDoubleVec staircase(test::CRandomNumbers &rng, return result; } -TDoubleVec switching(test::CRandomNumbers &rng, - core_t::TTime bucketLength, - core_t::TTime start, core_t::TTime end) -{ +TDoubleVec switching(test::CRandomNumbers& rng, core_t::TTime bucketLength, core_t::TTime start, core_t::TTime end) { core_t::TTime buckets{(end - start) / bucketLength + 1}; TDoubleVec knots; @@ -164,10 +144,8 @@ TDoubleVec switching(test::CRandomNumbers &rng, auto knot = knots.begin(); auto step = steps.begin(); - for (core_t::TTime time = start; time < end; time += bucketLength) - { - if (time > start + static_cast(bucketLength * *knot)) - { + for (core_t::TTime time = start; time < end; time += bucketLength) { + if (time > start + static_cast(bucketLength * *knot)) { value += *step; ++knot; ++step; @@ -177,11 +155,9 @@ TDoubleVec switching(test::CRandomNumbers &rng, return result; } - } -void CTrendComponentTest::testValueAndVariance() -{ +void CTrendComponentTest::testValueAndVariance() { LOG_DEBUG("+---------------------------------------------+"); LOG_DEBUG("| CTrendComponentTest::testValueAndVariance |"); LOG_DEBUG("+---------------------------------------------+"); @@ -199,25 +175,21 @@ void CTrendComponentTest::testValueAndVariance() TDoubleVec values(multiscaleRandomWalk(rng, bucketLength, start, end)); maths::CTrendComponent component{0.012}; - maths::CDecayRateController controller( maths::CDecayRateController::E_PredictionBias - | maths::CDecayRateController::E_PredictionErrorIncrease, 1); + maths::CDecayRateController controller( + maths::CDecayRateController::E_PredictionBias | maths::CDecayRateController::E_PredictionErrorIncrease, 1); TMeanVarAccumulator normalisedResiduals; - for (core_t::TTime time = start; time < end; time += bucketLength) - { + for (core_t::TTime time = start; time < end; time += bucketLength) { double value{values[(time - start) / bucketLength]}; double prediction{maths::CBasicStatistics::mean(component.value(time, 0.0))}; - if (time > start + bucketLength) - { + if (time > start + bucketLength) { double variance{maths::CBasicStatistics::mean(component.variance(0.0))}; normalisedResiduals.add((value - prediction) / std::sqrt(variance)); } component.add(time, value); - controller.multiplier({prediction}, - {{values[(time - start) / bucketLength] - prediction}}, - bucketLength, 1.0, 0.012); + controller.multiplier({prediction}, {{values[(time - start) / bucketLength] - prediction}}, bucketLength, 1.0, 0.012); component.decayRate(0.012 * controller.multiplier()); component.propagateForwardsByTime(bucketLength); } @@ -227,8 +199,7 @@ void CTrendComponentTest::testValueAndVariance() CPPUNIT_ASSERT(std::fabs(maths::CBasicStatistics::variance(normalisedResiduals) - 1.0) < 0.2); } -void CTrendComponentTest::testDecayRate() -{ +void CTrendComponentTest::testDecayRate() { LOG_DEBUG("+--------------------------------------+"); LOG_DEBUG("| CTrendComponentTest::testDecayRate |"); LOG_DEBUG("+--------------------------------------+"); @@ -251,13 +222,12 @@ void CTrendComponentTest::testDecayRate() maths::CTrendComponent component{0.012}; TRegression regression; - maths::CDecayRateController controller( maths::CDecayRateController::E_PredictionBias - | maths::CDecayRateController::E_PredictionErrorIncrease, 1); + maths::CDecayRateController controller( + maths::CDecayRateController::E_PredictionBias | maths::CDecayRateController::E_PredictionErrorIncrease, 1); TMeanAccumulator error; TMeanAccumulator level; - for (core_t::TTime time = start; time < end; time += bucketLength) - { + for (core_t::TTime time = start; time < end; time += bucketLength) { double value{values[(time - start) / bucketLength]}; component.add(time, value); regression.add(time / 604800.0, value); @@ -267,9 +237,7 @@ void CTrendComponentTest::testDecayRate() error.add(std::fabs(prediction - expectedPrediction)); level.add(value); - controller.multiplier({prediction}, - {{values[(time - start) / bucketLength] - prediction}}, - bucketLength, 1.0, 0.012); + controller.multiplier({prediction}, {{values[(time - start) / bucketLength] - prediction}}, bucketLength, 1.0, 0.012); component.decayRate(0.012 * controller.multiplier()); component.propagateForwardsByTime(bucketLength); regression.age(std::exp(-0.012 * controller.multiplier() * 600.0 / 86400.0)); @@ -278,8 +246,7 @@ void CTrendComponentTest::testDecayRate() //expectedPredictions.push_back(expectedPrediction); } - double relativeError{ maths::CBasicStatistics::mean(error) - / std::fabs(maths::CBasicStatistics::mean(level))}; + double relativeError{maths::CBasicStatistics::mean(error) / std::fabs(maths::CBasicStatistics::mean(level))}; LOG_DEBUG("relative error = " << relativeError); //file << "f = " << core::CContainerPrinter::print(values) << ";" << std::endl; @@ -287,8 +254,7 @@ void CTrendComponentTest::testDecayRate() //file << "pe = " << core::CContainerPrinter::print(expectedPredictions) << ";" << std::endl; } -void CTrendComponentTest::testForecast() -{ +void CTrendComponentTest::testForecast() { LOG_DEBUG("+-------------------------------------+"); LOG_DEBUG("| CTrendComponentTest::testForecast |"); LOG_DEBUG("+-------------------------------------+"); @@ -297,75 +263,65 @@ void CTrendComponentTest::testForecast() test::CRandomNumbers rng; - auto testForecast = [&rng](TGenerator generate, - core_t::TTime start, - core_t::TTime end) - { - //std::ofstream file; - //file.open("results.m"); - //TDoubleVec predictions; - //TDoubleVec forecastPredictions; - //TDoubleVec forecastLower; - //TDoubleVec forecastUpper; - - core_t::TTime bucketLength{600}; - TDoubleVec values(generate(rng, bucketLength, start, end + 1000 * bucketLength)); - - maths::CTrendComponent component{0.012}; - maths::CDecayRateController controller( maths::CDecayRateController::E_PredictionBias - | maths::CDecayRateController::E_PredictionErrorIncrease, 1); - - core_t::TTime time{0}; - for (/**/; time < end; time += bucketLength) - { - component.add(time, values[time / bucketLength]); - component.propagateForwardsByTime(bucketLength); - - double prediction{maths::CBasicStatistics::mean(component.value(time, 0.0))}; - controller.multiplier({prediction}, - {{values[time / bucketLength] - prediction}}, - bucketLength, 0.3, 0.012); - component.decayRate(0.012 * controller.multiplier()); - //predictions.push_back(prediction); - } - - component.shiftOrigin(time); - - TDouble3VecVec forecast; - component.forecast(time, time + 1000 * bucketLength, 3600, 95.0, - [](core_t::TTime) { return TDouble3Vec(3, 0.0); }, - [&forecast](core_t::TTime, const TDouble3Vec &value) - { - forecast.push_back(value); - }); - - TMeanAccumulator meanError; - TMeanAccumulator meanErrorAt95; - for (auto &errorbar : forecast) - { - core_t::TTime bucket{(time - start) / bucketLength}; - meanError.add( std::fabs((values[bucket] - errorbar[1]) - / std::fabs(values[bucket]))); - meanErrorAt95.add( std::max(std::max(values[bucket] - errorbar[2], - errorbar[0] - values[bucket]), 0.0) - / std::fabs(values[bucket])); - //forecastLower.push_back(errorbar[0]); - //forecastPredictions.push_back(errorbar[1]); - //forecastUpper.push_back(errorbar[2]); - } - - //file << "f = " << core::CContainerPrinter::print(values) << ";" << std::endl; - //file << "p = " << core::CContainerPrinter::print(predictions) << ";" << std::endl; - //file << "fl = " << core::CContainerPrinter::print(forecastLower) << ";" << std::endl; - //file << "fm = " << core::CContainerPrinter::print(forecastPredictions) << ";" << std::endl; - //file << "fu = " << core::CContainerPrinter::print(forecastUpper) << ";" << std::endl; - - LOG_DEBUG("error = " << maths::CBasicStatistics::mean(meanError)); - LOG_DEBUG("error @ 95% = " << maths::CBasicStatistics::mean(meanErrorAt95)); - - return std::make_pair(maths::CBasicStatistics::mean(meanError), - maths::CBasicStatistics::mean(meanErrorAt95)); - }; + auto testForecast = [&rng](TGenerator generate, core_t::TTime start, core_t::TTime end) { + //std::ofstream file; + //file.open("results.m"); + //TDoubleVec predictions; + //TDoubleVec forecastPredictions; + //TDoubleVec forecastLower; + //TDoubleVec forecastUpper; + + core_t::TTime bucketLength{600}; + TDoubleVec values(generate(rng, bucketLength, start, end + 1000 * bucketLength)); + + maths::CTrendComponent component{0.012}; + maths::CDecayRateController controller( + maths::CDecayRateController::E_PredictionBias | maths::CDecayRateController::E_PredictionErrorIncrease, 1); + + core_t::TTime time{0}; + for (/**/; time < end; time += bucketLength) { + component.add(time, values[time / bucketLength]); + component.propagateForwardsByTime(bucketLength); + + double prediction{maths::CBasicStatistics::mean(component.value(time, 0.0))}; + controller.multiplier({prediction}, {{values[time / bucketLength] - prediction}}, bucketLength, 0.3, 0.012); + component.decayRate(0.012 * controller.multiplier()); + //predictions.push_back(prediction); + } + + component.shiftOrigin(time); + + TDouble3VecVec forecast; + component.forecast(time, + time + 1000 * bucketLength, + 3600, + 95.0, + [](core_t::TTime) { return TDouble3Vec(3, 0.0); }, + [&forecast](core_t::TTime, const TDouble3Vec& value) { forecast.push_back(value); }); + + TMeanAccumulator meanError; + TMeanAccumulator meanErrorAt95; + for (auto& errorbar : forecast) { + core_t::TTime bucket{(time - start) / bucketLength}; + meanError.add(std::fabs((values[bucket] - errorbar[1]) / std::fabs(values[bucket]))); + meanErrorAt95.add(std::max(std::max(values[bucket] - errorbar[2], errorbar[0] - values[bucket]), 0.0) / + std::fabs(values[bucket])); + //forecastLower.push_back(errorbar[0]); + //forecastPredictions.push_back(errorbar[1]); + //forecastUpper.push_back(errorbar[2]); + } + + //file << "f = " << core::CContainerPrinter::print(values) << ";" << std::endl; + //file << "p = " << core::CContainerPrinter::print(predictions) << ";" << std::endl; + //file << "fl = " << core::CContainerPrinter::print(forecastLower) << ";" << std::endl; + //file << "fm = " << core::CContainerPrinter::print(forecastPredictions) << ";" << std::endl; + //file << "fu = " << core::CContainerPrinter::print(forecastUpper) << ";" << std::endl; + + LOG_DEBUG("error = " << maths::CBasicStatistics::mean(meanError)); + LOG_DEBUG("error @ 95% = " << maths::CBasicStatistics::mean(meanErrorAt95)); + + return std::make_pair(maths::CBasicStatistics::mean(meanError), maths::CBasicStatistics::mean(meanErrorAt95)); + }; double error; double errorAt95; @@ -399,8 +355,7 @@ void CTrendComponentTest::testForecast() } } -void CTrendComponentTest::testPersist() -{ +void CTrendComponentTest::testPersist() { LOG_DEBUG("+------------------------------------+"); LOG_DEBUG("| CTrendComponentTest::testPersist |"); LOG_DEBUG("+------------------------------------+"); @@ -417,8 +372,7 @@ void CTrendComponentTest::testPersist() maths::CTrendComponent origComponent{0.012}; - for (core_t::TTime time = start; time < end; time += bucketLength) - { + for (core_t::TTime time = start; time < end; time += bucketLength) { double value{values[(time - start) / bucketLength]}; origComponent.add(time, value); origComponent.propagateForwardsByTime(bucketLength); @@ -440,8 +394,7 @@ void CTrendComponentTest::testPersist() maths::SDistributionRestoreParams params{maths_t::E_ContinuousData, 0.1}; maths::CTrendComponent restoredComponent{0.1}; - traverser.traverseSubLevel(boost::bind(&maths::CTrendComponent::acceptRestoreTraverser, - &restoredComponent, boost::cref(params), _1)); + traverser.traverseSubLevel(boost::bind(&maths::CTrendComponent::acceptRestoreTraverser, &restoredComponent, boost::cref(params), _1)); CPPUNIT_ASSERT_EQUAL(origComponent.checksum(), restoredComponent.checksum()); @@ -454,22 +407,17 @@ void CTrendComponentTest::testPersist() CPPUNIT_ASSERT_EQUAL(origXml, newXml); } -CppUnit::Test *CTrendComponentTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CTrendComponentTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTrendComponentTest::testValueAndVariance", - &CTrendComponentTest::testValueAndVariance) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTrendComponentTest::testDecayRate", - &CTrendComponentTest::testDecayRate) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTrendComponentTest::testForecast", - &CTrendComponentTest::testForecast) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTrendComponentTest::testPersist", - &CTrendComponentTest::testPersist) ); +CppUnit::Test* CTrendComponentTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CTrendComponentTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CTrendComponentTest::testValueAndVariance", + &CTrendComponentTest::testValueAndVariance)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CTrendComponentTest::testDecayRate", &CTrendComponentTest::testDecayRate)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CTrendComponentTest::testForecast", &CTrendComponentTest::testForecast)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CTrendComponentTest::testPersist", &CTrendComponentTest::testPersist)); return suiteOfTests; } diff --git a/lib/maths/unittest/CTrendComponentTest.h b/lib/maths/unittest/CTrendComponentTest.h index d8778bb4c6..cf92c50402 100644 --- a/lib/maths/unittest/CTrendComponentTest.h +++ b/lib/maths/unittest/CTrendComponentTest.h @@ -9,15 +9,14 @@ #include -class CTrendComponentTest : public CppUnit::TestFixture -{ - public: - void testValueAndVariance(); - void testDecayRate(); - void testForecast(); - void testPersist(); +class CTrendComponentTest : public CppUnit::TestFixture { +public: + void testValueAndVariance(); + void testDecayRate(); + void testForecast(); + void testPersist(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CTrendComponentTest_h diff --git a/lib/maths/unittest/CTrendTestsTest.cc b/lib/maths/unittest/CTrendTestsTest.cc index ad0fe3582c..c36d84856f 100644 --- a/lib/maths/unittest/CTrendTestsTest.cc +++ b/lib/maths/unittest/CTrendTestsTest.cc @@ -6,13 +6,13 @@ #include "CTrendTestsTest.h" -#include #include -#include #include #include #include #include +#include +#include #include #include @@ -30,20 +30,18 @@ using namespace ml; -namespace -{ +namespace { using TDoubleVec = std::vector; using TTimeVec = std::vector; using TTimeDoublePr = std::pair; using TTimeDoublePrVec = std::vector; const core_t::TTime HALF_HOUR = core::constants::HOUR / 2; -const core_t::TTime DAY = core::constants::DAY; -const core_t::TTime WEEK = core::constants::WEEK; +const core_t::TTime DAY = core::constants::DAY; +const core_t::TTime WEEK = core::constants::WEEK; } -void CTrendTestsTest::testRandomizedPeriodicity() -{ +void CTrendTestsTest::testRandomizedPeriodicity() { LOG_DEBUG("+----------------------------------------------+"); LOG_DEBUG("| CTrendTestsTest::testRandomizedPeriodicity |"); LOG_DEBUG("+----------------------------------------------+"); @@ -57,8 +55,7 @@ void CTrendTestsTest::testRandomizedPeriodicity() TMeanAccumulator typeI; TMeanAccumulator typeII; - for (std::size_t t = 0u; t < 5; ++t) - { + for (std::size_t t = 0u; t < 5; ++t) { LOG_DEBUG("*** test = " << t << " ***"); core_t::TTime time = 0; @@ -70,44 +67,26 @@ void CTrendTestsTest::testRandomizedPeriodicity() maths::CRandomizedPeriodicityTest::reset(); maths::CRandomizedPeriodicityTest rtests[8]; - double falsePositives[3] = { 0.0, 0.0, 0.0 }; - double trueNegatives[3] = { 0.0, 0.0, 0.0 }; - double truePositives[5] = { 0.0, 0.0, 0.0, 0.0, 0.0 }; - double falseNegatives[5] = { 0.0, 0.0, 0.0, 0.0, 0.0 }; + double falsePositives[3] = {0.0, 0.0, 0.0}; + double trueNegatives[3] = {0.0, 0.0, 0.0}; + double truePositives[5] = {0.0, 0.0, 0.0, 0.0, 0.0}; + double falseNegatives[5] = {0.0, 0.0, 0.0, 0.0, 0.0}; TMeanVarAccumulator timeToDetectionMoments[5]; TMaxAccumulator timeToDetectionMax[5]; - core_t::TTime lastTruePositive[5] = { time, time, time, time, time }; - TFunction functions[] = - { - &constant, - &ramp, - &markov, - &smoothDaily, - &smoothWeekly, - &spikeyDaily, - &spikeyWeekly, - &weekends - }; - - for (std::size_t i = 0u; i < samples.size(); ++i) - { - for (std::size_t j = 0u; j < boost::size(functions); ++j) - { + core_t::TTime lastTruePositive[5] = {time, time, time, time, time}; + TFunction functions[] = {&constant, &ramp, &markov, &smoothDaily, &smoothWeekly, &spikeyDaily, &spikeyWeekly, &weekends}; + + for (std::size_t i = 0u; i < samples.size(); ++i) { + for (std::size_t j = 0u; j < boost::size(functions); ++j) { rtests[j].add(time, 600.0 * (functions[j])(time) + samples[i]); } - if (time >= day + DAY) - { - for (std::size_t j = 0u; j < boost::size(rtests); ++j) - { - if (j < 3) - { + if (time >= day + DAY) { + for (std::size_t j = 0u; j < boost::size(rtests); ++j) { + if (j < 3) { (rtests[j].test() ? falsePositives[j] : trueNegatives[j]) += 1.0; - } - else - { + } else { (rtests[j].test() ? truePositives[j - 3] : falseNegatives[j - 3]) += 1.0; - if (rtests[j].test()) - { + if (rtests[j].test()) { timeToDetectionMoments[j - 3].add(time - lastTruePositive[j - 3]); timeToDetectionMax[j - 3].add(static_cast(time - lastTruePositive[j - 3])); lastTruePositive[j - 3] = time; @@ -121,21 +100,18 @@ void CTrendTestsTest::testRandomizedPeriodicity() LOG_DEBUG("falsePositives = " << core::CContainerPrinter::print(falsePositives)); LOG_DEBUG("trueNegatives = " << core::CContainerPrinter::print(trueNegatives)); - for (std::size_t i = 0u; i < boost::size(falsePositives); ++i) - { + for (std::size_t i = 0u; i < boost::size(falsePositives); ++i) { CPPUNIT_ASSERT(falsePositives[i] / trueNegatives[i] < 0.1); typeI.add(falsePositives[i] / trueNegatives[i]); } LOG_DEBUG("truePositives = " << core::CContainerPrinter::print(truePositives)); LOG_DEBUG("falseNegatives = " << core::CContainerPrinter::print(falseNegatives)); - for (std::size_t i = 0u; i < boost::size(falsePositives); ++i) - { + for (std::size_t i = 0u; i < boost::size(falsePositives); ++i) { CPPUNIT_ASSERT(falseNegatives[i] / truePositives[i] < 0.2); typeII.add(falseNegatives[i] / truePositives[i]); } - for (std::size_t i = 0u; i < boost::size(timeToDetectionMoments); ++i) - { + for (std::size_t i = 0u; i < boost::size(timeToDetectionMoments); ++i) { LOG_DEBUG("time to detect moments = " << timeToDetectionMoments[i]); LOG_DEBUG("maximum time to detect = " << timeToDetectionMax[i][0]); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(timeToDetectionMoments[i]) < 1.5 * DAY); @@ -149,8 +125,7 @@ void CTrendTestsTest::testRandomizedPeriodicity() CPPUNIT_ASSERT(maths::CBasicStatistics::mean(typeII) < 0.05); } -void CTrendTestsTest::testCalendarCyclic() -{ +void CTrendTestsTest::testCalendarCyclic() { LOG_DEBUG("+---------------------------------------+"); LOG_DEBUG("| CTrendTestsTest::testCalendarCyclic |"); LOG_DEBUG("+---------------------------------------+"); @@ -165,39 +140,32 @@ void CTrendTestsTest::testCalendarCyclic() { // Repeated error on the second day of the month. - core_t::TTime months[] = - { - 86400, // 2nd Jan - 2764800, // 2nd Feb - 5184000, // 2nd Mar - 7862400, // 2nd Apr - 10454400 // 2nd May - }; + core_t::TTime months[] = { + 86400, // 2nd Jan + 2764800, // 2nd Feb + 5184000, // 2nd Mar + 7862400, // 2nd Apr + 10454400 // 2nd May + }; core_t::TTime end = months[boost::size(months) - 1] + 86400; maths::CCalendarCyclicTest cyclic(HALF_HOUR); TDoubleVec error; - for (core_t::TTime time = 0; time <= end; time += HALF_HOUR) - { - ptrdiff_t i = maths::CTools::truncate(std::lower_bound(boost::begin(months), - boost::end(months), - time) - boost::begin(months), + for (core_t::TTime time = 0; time <= end; time += HALF_HOUR) { + ptrdiff_t i = maths::CTools::truncate(std::lower_bound(boost::begin(months), boost::end(months), time) - boost::begin(months), ptrdiff_t(1), ptrdiff_t(boost::size(months))); rng.generateNormalSamples(0.0, 10.0, 1, error); - if (time >= months[i - 1] + 30000 && time < months[i - 1] + 50000) - { + if (time >= months[i - 1] + 30000 && time < months[i - 1] + 50000) { error[0] *= 5.0; } cyclic.add(time, error[0]); - if (time > 121 * DAY && time % DAY == 0) - { + if (time > 121 * DAY && time % DAY == 0) { TOptionalFeature feature = cyclic.test(); - CPPUNIT_ASSERT_EQUAL(std::string("2nd day of month"), - core::CContainerPrinter::print(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("2nd day of month"), core::CContainerPrinter::print(feature)); } } } @@ -206,39 +174,32 @@ void CTrendTestsTest::testCalendarCyclic() { // Repeated error on the last day of the month. - core_t::TTime months[] = - { - 2592000, // 31st Jan - 5011200, // 28th Feb - 7689600, // 31st Mar - 10281600, // 30th Apr - 12960000 // 31st May - }; + core_t::TTime months[] = { + 2592000, // 31st Jan + 5011200, // 28th Feb + 7689600, // 31st Mar + 10281600, // 30th Apr + 12960000 // 31st May + }; core_t::TTime end = months[boost::size(months) - 1] + 86400; maths::CCalendarCyclicTest cyclic(HALF_HOUR); TDoubleVec error; - for (core_t::TTime time = 0; time <= end; time += HALF_HOUR) - { - ptrdiff_t i = maths::CTools::truncate(std::lower_bound(boost::begin(months), - boost::end(months), - time) - boost::begin(months), + for (core_t::TTime time = 0; time <= end; time += HALF_HOUR) { + ptrdiff_t i = maths::CTools::truncate(std::lower_bound(boost::begin(months), boost::end(months), time) - boost::begin(months), ptrdiff_t(1), ptrdiff_t(boost::size(months))); rng.generateNormalSamples(0.0, 10.0, 1, error); - if (time >= months[i - 1] + 10000 && time < months[i - 1] + 20000) - { + if (time >= months[i - 1] + 10000 && time < months[i - 1] + 20000) { error[0] += 12.0; } cyclic.add(time, error[0]); - if (time > 121 * DAY && time % DAY == 0) - { + if (time > 121 * DAY && time % DAY == 0) { TOptionalFeature feature = cyclic.test(); - CPPUNIT_ASSERT_EQUAL(std::string("0 days before end of month"), - core::CContainerPrinter::print(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("0 days before end of month"), core::CContainerPrinter::print(feature)); } } } @@ -247,39 +208,32 @@ void CTrendTestsTest::testCalendarCyclic() { // Repeated error on first Monday of each month. - core_t::TTime months[] = - { - 345600, // Mon 5th Jan - 2764800, // Mon 2nd Feb - 5184000, // Mon 2nd Mar - 8208000, // Mon 6th Apr - 10627200 // Mon 4th May - }; + core_t::TTime months[] = { + 345600, // Mon 5th Jan + 2764800, // Mon 2nd Feb + 5184000, // Mon 2nd Mar + 8208000, // Mon 6th Apr + 10627200 // Mon 4th May + }; core_t::TTime end = months[boost::size(months) - 1] + 86400; maths::CCalendarCyclicTest cyclic(HALF_HOUR); TDoubleVec error; - for (core_t::TTime time = 0; time <= end; time += HALF_HOUR) - { - ptrdiff_t i = maths::CTools::truncate(std::lower_bound(boost::begin(months), - boost::end(months), - time) - boost::begin(months), + for (core_t::TTime time = 0; time <= end; time += HALF_HOUR) { + ptrdiff_t i = maths::CTools::truncate(std::lower_bound(boost::begin(months), boost::end(months), time) - boost::begin(months), ptrdiff_t(1), ptrdiff_t(boost::size(months))); rng.generateNormalSamples(0.0, 10.0, 1, error); - if (time >= months[i - 1] + 45000 && time < months[i - 1] + 60000) - { + if (time >= months[i - 1] + 45000 && time < months[i - 1] + 60000) { error[0] += 12.0; } cyclic.add(time, error[0]); - if (time > 121 * DAY && time % DAY == 0) - { + if (time > 121 * DAY && time % DAY == 0) { TOptionalFeature feature = cyclic.test(); - CPPUNIT_ASSERT_EQUAL(std::string("1st Monday of month"), - core::CContainerPrinter::print(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("1st Monday of month"), core::CContainerPrinter::print(feature)); } } } @@ -287,46 +241,38 @@ void CTrendTestsTest::testCalendarCyclic() LOG_DEBUG("Day of week weeks before end of month"); { // Repeated error on last Friday of each month. - core_t::TTime months[] = - { - 2505600, // Fri 30th Jan - 4924800, // Fri 27th Feb - 7344000, // Fri 27th Mar - 9763200, // Fri 24th Apr - 12787200 // Fri 29th May - }; + core_t::TTime months[] = { + 2505600, // Fri 30th Jan + 4924800, // Fri 27th Feb + 7344000, // Fri 27th Mar + 9763200, // Fri 24th Apr + 12787200 // Fri 29th May + }; core_t::TTime end = months[boost::size(months) - 1] + 86400; maths::CCalendarCyclicTest cyclic(HALF_HOUR); TDoubleVec error; - for (core_t::TTime time = 0; time <= end; time += HALF_HOUR) - { - ptrdiff_t i = maths::CTools::truncate(std::lower_bound(boost::begin(months), - boost::end(months), - time) - boost::begin(months), + for (core_t::TTime time = 0; time <= end; time += HALF_HOUR) { + ptrdiff_t i = maths::CTools::truncate(std::lower_bound(boost::begin(months), boost::end(months), time) - boost::begin(months), ptrdiff_t(1), ptrdiff_t(boost::size(months))); rng.generateNormalSamples(0.0, 10.0, 1, error); - if (time >= months[i - 1] + 45000 && time < months[i - 1] + 60000) - { + if (time >= months[i - 1] + 45000 && time < months[i - 1] + 60000) { error[0] += 12.0; } cyclic.add(time, error[0]); - if (time > 121 * DAY && time % DAY == 0) - { + if (time > 121 * DAY && time % DAY == 0) { TOptionalFeature feature = cyclic.test(); - CPPUNIT_ASSERT_EQUAL(std::string("0 Fridays before end of month"), - core::CContainerPrinter::print(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("0 Fridays before end of month"), core::CContainerPrinter::print(feature)); } } } } -void CTrendTestsTest::testPersist() -{ +void CTrendTestsTest::testPersist() { LOG_DEBUG("+--------------------------------+"); LOG_DEBUG("| CTrendTestsTest::testPersist |"); LOG_DEBUG("+--------------------------------+"); @@ -336,8 +282,7 @@ void CTrendTestsTest::testPersist() LOG_DEBUG("Test CRandomizedPeriodicityTest"); { maths::CRandomizedPeriodicityTest test; - for (core_t::TTime t = 1400000000; t < 1400050000; t += 5000) - { + for (core_t::TTime t = 1400000000; t < 1400050000; t += 5000) { test.add(t, 0.2); } @@ -366,8 +311,7 @@ void CTrendTestsTest::testPersist() core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind( - &maths::CRandomizedPeriodicityTest::acceptRestoreTraverser, &test2, _1))); + CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind(&maths::CRandomizedPeriodicityTest::acceptRestoreTraverser, &test2, _1))); } std::string newXml; { @@ -381,8 +325,7 @@ void CTrendTestsTest::testPersist() core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origStaticsXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - CPPUNIT_ASSERT(traverser.traverseSubLevel( - &maths::CRandomizedPeriodicityTest::staticsAcceptRestoreTraverser)); + CPPUNIT_ASSERT(traverser.traverseSubLevel(&maths::CRandomizedPeriodicityTest::staticsAcceptRestoreTraverser)); } std::string newStaticsXml; { @@ -403,8 +346,7 @@ void CTrendTestsTest::testPersist() maths::CCalendarCyclicTest orig(HALF_HOUR); TDoubleVec error; - for (core_t::TTime time = 0; time <= 12787200; time += HALF_HOUR) - { + for (core_t::TTime time = 0; time <= 12787200; time += HALF_HOUR) { rng.generateNormalSamples(0.0, 10.0, 1, error); orig.add(time, error[0]); } @@ -423,8 +365,7 @@ void CTrendTestsTest::testPersist() core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind( - &maths::CCalendarCyclicTest::acceptRestoreTraverser, &restored, _1))); + CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind(&maths::CCalendarCyclicTest::acceptRestoreTraverser, &restored, _1))); } CPPUNIT_ASSERT_EQUAL(orig.checksum(), restored.checksum()); @@ -438,19 +379,14 @@ void CTrendTestsTest::testPersist() } } -CppUnit::Test *CTrendTestsTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CTrendTestsTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTrendTestsTest::testRandomizedPeriodicity", - &CTrendTestsTest::testRandomizedPeriodicity) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTrendTestsTest::testCalendarCyclic", - &CTrendTestsTest::testCalendarCyclic) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CTrendTestsTest::testPersist", - &CTrendTestsTest::testPersist) ); +CppUnit::Test* CTrendTestsTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CTrendTestsTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CTrendTestsTest::testRandomizedPeriodicity", + &CTrendTestsTest::testRandomizedPeriodicity)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CTrendTestsTest::testCalendarCyclic", &CTrendTestsTest::testCalendarCyclic)); + suiteOfTests->addTest(new CppUnit::TestCaller("CTrendTestsTest::testPersist", &CTrendTestsTest::testPersist)); return suiteOfTests; } diff --git a/lib/maths/unittest/CTrendTestsTest.h b/lib/maths/unittest/CTrendTestsTest.h index 0b97c6ceb9..813d2da3d2 100644 --- a/lib/maths/unittest/CTrendTestsTest.h +++ b/lib/maths/unittest/CTrendTestsTest.h @@ -9,14 +9,13 @@ #include -class CTrendTestsTest : public CppUnit::TestFixture -{ - public: - void testRandomizedPeriodicity(); - void testCalendarCyclic(); - void testPersist(); +class CTrendTestsTest : public CppUnit::TestFixture { +public: + void testRandomizedPeriodicity(); + void testCalendarCyclic(); + void testPersist(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CTrendTestsTest_h diff --git a/lib/maths/unittest/CXMeansOnline1dTest.cc b/lib/maths/unittest/CXMeansOnline1dTest.cc index 6b3b14be2b..762b9e6fa2 100644 --- a/lib/maths/unittest/CXMeansOnline1dTest.cc +++ b/lib/maths/unittest/CXMeansOnline1dTest.cc @@ -8,11 +8,11 @@ #include #include -#include -#include #include #include #include +#include +#include #include #include @@ -26,60 +26,49 @@ using namespace ml; -namespace -{ +namespace { using TDoubleVec = std::vector; using TClusterVec = maths::CXMeansOnline1d::TClusterVec; using TMeanVarAccumulator = maths::CBasicStatistics::SSampleMeanVar::TAccumulator; -bool restore(const maths::SDistributionRestoreParams ¶ms, - core::CRapidXmlStateRestoreTraverser &traverser, - maths::CXMeansOnline1d::CCluster &result) -{ - return traverser.traverseSubLevel(boost::bind(&maths::CXMeansOnline1d::CCluster::acceptRestoreTraverser, - &result, boost::cref(params), _1)); +bool restore(const maths::SDistributionRestoreParams& params, + core::CRapidXmlStateRestoreTraverser& traverser, + maths::CXMeansOnline1d::CCluster& result) { + return traverser.traverseSubLevel( + boost::bind(&maths::CXMeansOnline1d::CCluster::acceptRestoreTraverser, &result, boost::cref(params), _1)); } -void debug(const TClusterVec &clusters) -{ +void debug(const TClusterVec& clusters) { std::ostringstream c; c << "["; - for (std::size_t j = 0u; j < clusters.size(); ++j) - { - c << " (" << clusters[j].weight(maths_t::E_ClustersFractionWeight) - << ", " << clusters[j].centre() - << ", " << clusters[j].spread() << ")"; + for (std::size_t j = 0u; j < clusters.size(); ++j) { + c << " (" << clusters[j].weight(maths_t::E_ClustersFractionWeight) << ", " << clusters[j].centre() << ", " << clusters[j].spread() + << ")"; } c << " ]"; LOG_DEBUG("clusters = " << c.str()); } - } -void CXMeansOnline1dTest::testCluster() -{ +void CXMeansOnline1dTest::testCluster() { LOG_DEBUG("+------------------------------------+"); LOG_DEBUG("| CXMeansOnline1dTest::testCluster |"); LOG_DEBUG("+------------------------------------+"); - maths::CXMeansOnline1d clusterer(maths_t::E_ContinuousData, - maths::CAvailableModeDistributions::ALL, - maths_t::E_ClustersFractionWeight, - 0.1); + maths::CXMeansOnline1d clusterer( + maths_t::E_ContinuousData, maths::CAvailableModeDistributions::ALL, maths_t::E_ClustersFractionWeight, 0.1); maths::CXMeansOnline1d::CCluster cluster(clusterer); - double x1[] = { 1.1, 2.3, 1.5, 0.9, 4.7, 3.2, 2.8, 2.3, 1.9, 2.6 }; - double c1[] = { 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0 }; + double x1[] = {1.1, 2.3, 1.5, 0.9, 4.7, 3.2, 2.8, 2.3, 1.9, 2.6}; + double c1[] = {1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0}; TDoubleVec values; maths::CBasicStatistics::SSampleMeanVar::TAccumulator moments; - for (std::size_t i = 0u; i < boost::size(x1); ++i) - { + for (std::size_t i = 0u; i < boost::size(x1); ++i) { cluster.add(x1[i], c1[i]); moments.add(x1[i], c1[i]); - for (std::size_t j = 0u; j < static_cast(c1[i]); ++j) - { + for (std::size_t j = 0u; j < static_cast(c1[i]); ++j) { values.push_back(x1[i]); } } @@ -88,7 +77,7 @@ void CXMeansOnline1dTest::testCluster() LOG_DEBUG("spread = " << cluster.spread()); LOG_DEBUG("weight = " << cluster.weight(maths_t::E_ClustersFractionWeight)); - double expectedCount = maths::CBasicStatistics::count(moments); + double expectedCount = maths::CBasicStatistics::count(moments); double expectedCentre = maths::CBasicStatistics::mean(moments); double expectedSpread = std::sqrt(maths::CBasicStatistics::variance(moments)); LOG_DEBUG("expected count = " << expectedCount); @@ -113,8 +102,7 @@ void CXMeansOnline1dTest::testCluster() maths::CBasicStatistics::SSampleMeanVar::TAccumulator percentileError; std::sort(values.begin(), values.end()); - for (std::size_t i = 0u; i < 10; ++i) - { + for (std::size_t i = 0u; i < 10; ++i) { double p = static_cast(10 * i) + 5.0; double expectedPercentile = values[static_cast(p / 100.0 * static_cast(values.size()) + 0.5)]; LOG_DEBUG(p << " percentile = " << cluster.percentile(p)); @@ -131,8 +119,7 @@ void CXMeansOnline1dTest::testCluster() LOG_DEBUG("samples = " << core::CContainerPrinter::print(samples)); maths::CBasicStatistics::SSampleMeanVar::TAccumulator sampleMoments; - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { sampleMoments.add(samples[i]); } double sampleCentre = maths::CBasicStatistics::mean(sampleMoments); @@ -143,8 +130,8 @@ void CXMeansOnline1dTest::testCluster() CPPUNIT_ASSERT_DOUBLES_EQUAL(cluster.spread(), sampleSpread, 0.2); CPPUNIT_ASSERT_DOUBLES_EQUAL(std::log(cluster.count()), - - cluster.logLikelihoodFromCluster(maths_t::E_ClustersEqualWeight, 1.5) - + cluster.logLikelihoodFromCluster(maths_t::E_ClustersFractionWeight, 1.5), + -cluster.logLikelihoodFromCluster(maths_t::E_ClustersEqualWeight, 1.5) + + cluster.logLikelihoodFromCluster(maths_t::E_ClustersFractionWeight, 1.5), 1e-10); uint64_t origChecksum = cluster.checksum(0); @@ -163,7 +150,8 @@ void CXMeansOnline1dTest::testCluster() core::CRapidXmlStateRestoreTraverser traverser(parser); maths::CXMeansOnline1d::CCluster restoredCluster(clusterer); - maths::SDistributionRestoreParams params(maths_t::E_ContinuousData, 0.1, + maths::SDistributionRestoreParams params(maths_t::E_ContinuousData, + 0.1, maths::MINIMUM_CLUSTER_SPLIT_FRACTION, maths::MINIMUM_CLUSTER_SPLIT_COUNT, maths::MINIMUM_CATEGORY_COUNT); @@ -171,29 +159,25 @@ void CXMeansOnline1dTest::testCluster() uint64_t restoredChecksum = restoredCluster.checksum(0); CPPUNIT_ASSERT_EQUAL(origChecksum, restoredChecksum); - double x2[] = { 10.3, 10.6, 10.7, 9.8, 11.2, 11.0 }; - double c2[] = { 2.0, 1.0, 1.0, 2.0, 2.0, 1.0 }; - for (std::size_t i = 0u; i < boost::size(x2); ++i) - { + double x2[] = {10.3, 10.6, 10.7, 9.8, 11.2, 11.0}; + double c2[] = {2.0, 1.0, 1.0, 2.0, 2.0, 1.0}; + for (std::size_t i = 0u; i < boost::size(x2); ++i) { cluster.add(x2[i], c2[i]); - } maths::CXMeansOnline1d::TOptionalClusterClusterPr split = - cluster.split(maths::CAvailableModeDistributions::ALL, - 5.0, 0.0, std::make_pair(0.0, 15.0), clusterer.indexGenerator()); + cluster.split(maths::CAvailableModeDistributions::ALL, 5.0, 0.0, std::make_pair(0.0, 15.0), clusterer.indexGenerator()); CPPUNIT_ASSERT(split); LOG_DEBUG("left centre = " << split->first.centre()); LOG_DEBUG("left spread = " << split->first.spread()); LOG_DEBUG("right centre = " << split->second.centre()); LOG_DEBUG("right spread = " << split->second.spread()); - CPPUNIT_ASSERT_DOUBLES_EQUAL(2.4, split->first.centre(), 0.05); - CPPUNIT_ASSERT_DOUBLES_EQUAL(1.1, split->first.spread(), 0.1); + CPPUNIT_ASSERT_DOUBLES_EQUAL(2.4, split->first.centre(), 0.05); + CPPUNIT_ASSERT_DOUBLES_EQUAL(1.1, split->first.spread(), 0.1); CPPUNIT_ASSERT_DOUBLES_EQUAL(10.5, split->second.centre(), 0.05); - CPPUNIT_ASSERT_DOUBLES_EQUAL(0.6, split->second.spread(), 0.1); + CPPUNIT_ASSERT_DOUBLES_EQUAL(0.6, split->second.spread(), 0.1); } -void CXMeansOnline1dTest::testMixtureOfGaussians() -{ +void CXMeansOnline1dTest::testMixtureOfGaussians() { LOG_DEBUG("+-----------------------------------------------+"); LOG_DEBUG("| CXMeansOnline1dTest::testMixtureOfGaussians |"); LOG_DEBUG("+-----------------------------------------------+"); @@ -215,12 +199,7 @@ void CXMeansOnline1dTest::testMixtureOfGaussians() TDoubleVec mode3; rng.generateNormalSamples(35.0, 2.25, 150u, mode3); - TMeanVarAccumulator expectedClusters[] = - { - TMeanVarAccumulator(), - TMeanVarAccumulator(), - TMeanVarAccumulator() - }; + TMeanVarAccumulator expectedClusters[] = {TMeanVarAccumulator(), TMeanVarAccumulator(), TMeanVarAccumulator()}; expectedClusters[0].add(mode1); expectedClusters[1].add(mode2); expectedClusters[2].add(mode3); @@ -234,25 +213,20 @@ void CXMeansOnline1dTest::testMixtureOfGaussians() double meanError = 0.0; double spreadError = 0.0; - for (unsigned int i = 0u; i < 50u; ++i) - { + for (unsigned int i = 0u; i < 50u; ++i) { // Randomize the input order. rng.random_shuffle(samples.begin(), samples.end()); - maths::CXMeansOnline1d clusterer(maths_t::E_ContinuousData, - maths::CAvailableModeDistributions::ALL, - maths_t::E_ClustersFractionWeight, - 0.001); + maths::CXMeansOnline1d clusterer( + maths_t::E_ContinuousData, maths::CAvailableModeDistributions::ALL, maths_t::E_ClustersFractionWeight, 0.001); //std::ostringstream name; //name << "results.m." << i; //std::ofstream file; //file.open(name.str().c_str()); - for (std::size_t j = 0u; j < samples.size(); ++j) - { - if (j % 50 == 0) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { + if (j % 50 == 0) { LOG_DEBUG("time = " << j); } @@ -275,7 +249,7 @@ void CXMeansOnline1dTest::testMixtureOfGaussians() // Check we've got three clusters and their position // and spread is what we'd expect. - const TClusterVec &clusters = clusterer.clusters(); + const TClusterVec& clusters = clusterer.clusters(); debug(clusters); LOG_DEBUG("expected = " << core::CContainerPrinter::print(expectedClusters)) @@ -283,18 +257,11 @@ void CXMeansOnline1dTest::testMixtureOfGaussians() LOG_DEBUG("# clusters = " << clusters.size()); CPPUNIT_ASSERT_EQUAL(std::size_t(3), clusters.size()); - for (std::size_t j = 0u; j < clusters.size(); ++j) - { - CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(expectedClusters[j]), - clusters[j].centre(), - 0.1); - CPPUNIT_ASSERT_DOUBLES_EQUAL(std::sqrt(maths::CBasicStatistics::variance(expectedClusters[j])), - clusters[j].spread(), - 0.4); - meanError += std::fabs(clusters[j].centre() - - maths::CBasicStatistics::mean(expectedClusters[j])); - spreadError += std::fabs(clusters[j].spread() - - std::sqrt(maths::CBasicStatistics::variance(expectedClusters[j]))); + for (std::size_t j = 0u; j < clusters.size(); ++j) { + CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(expectedClusters[j]), clusters[j].centre(), 0.1); + CPPUNIT_ASSERT_DOUBLES_EQUAL(std::sqrt(maths::CBasicStatistics::variance(expectedClusters[j])), clusters[j].spread(), 0.4); + meanError += std::fabs(clusters[j].centre() - maths::CBasicStatistics::mean(expectedClusters[j])); + spreadError += std::fabs(clusters[j].spread() - std::sqrt(maths::CBasicStatistics::variance(expectedClusters[j]))); } } @@ -325,15 +292,11 @@ void CXMeansOnline1dTest::testMixtureOfGaussians() std::copy(mode1.begin(), mode1.end(), std::back_inserter(samples)); std::copy(mode2.begin(), mode2.end(), std::back_inserter(samples)); - maths::CXMeansOnline1d clusterer(maths_t::E_ContinuousData, - maths::CAvailableModeDistributions::ALL, - maths_t::E_ClustersFractionWeight, - 0.001); + maths::CXMeansOnline1d clusterer( + maths_t::E_ContinuousData, maths::CAvailableModeDistributions::ALL, maths_t::E_ClustersFractionWeight, 0.001); - for (std::size_t j = 0u; j < samples.size(); ++j) - { - if (j % 50 == 0) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { + if (j % 50 == 0) { LOG_DEBUG("time = " << j); } @@ -344,18 +307,14 @@ void CXMeansOnline1dTest::testMixtureOfGaussians() // Check we've got one cluster (the small cluster should // have been deleted) and its mean and spread is what we'd // expect. - const TClusterVec &clusters = clusterer.clusters(); + const TClusterVec& clusters = clusterer.clusters(); debug(clusters); LOG_DEBUG("expected = " << expectedClusters); CPPUNIT_ASSERT_EQUAL(std::size_t(1), clusters.size()); - CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(expectedClusters), - clusters[0].centre(), - 0.05); - CPPUNIT_ASSERT_DOUBLES_EQUAL(std::sqrt(maths::CBasicStatistics::variance(expectedClusters)), - clusters[0].spread(), - 0.3); + CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(expectedClusters), clusters[0].centre(), 0.05); + CPPUNIT_ASSERT_DOUBLES_EQUAL(std::sqrt(maths::CBasicStatistics::variance(expectedClusters)), clusters[0].spread(), 0.3); } // Test 3: @@ -369,11 +328,7 @@ void CXMeansOnline1dTest::testMixtureOfGaussians() TDoubleVec mode2; rng.generateNormalSamples(11.0, 1.0, 200u, mode2); - TMeanVarAccumulator expectedClusters[] = - { - TMeanVarAccumulator(), - TMeanVarAccumulator() - }; + TMeanVarAccumulator expectedClusters[] = {TMeanVarAccumulator(), TMeanVarAccumulator()}; expectedClusters[0].add(mode1); expectedClusters[1].add(mode2); @@ -385,20 +340,15 @@ void CXMeansOnline1dTest::testMixtureOfGaussians() double meanError = 0.0; double spreadError = 0.0; - for (unsigned int i = 0u; i < 50u; ++i) - { + for (unsigned int i = 0u; i < 50u; ++i) { // Randomize the input order. rng.random_shuffle(samples.begin(), samples.end()); - maths::CXMeansOnline1d clusterer(maths_t::E_ContinuousData, - maths::CAvailableModeDistributions::ALL, - maths_t::E_ClustersFractionWeight, - 0.001); + maths::CXMeansOnline1d clusterer( + maths_t::E_ContinuousData, maths::CAvailableModeDistributions::ALL, maths_t::E_ClustersFractionWeight, 0.001); - for (std::size_t j = 0u; j < samples.size(); ++j) - { - if (j % 50 == 0) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { + if (j % 50 == 0) { LOG_DEBUG("time = " << j); } @@ -408,23 +358,16 @@ void CXMeansOnline1dTest::testMixtureOfGaussians() // Check we've got one cluster and its position // and spread is what we'd expect. - const TClusterVec &clusters = clusterer.clusters(); + const TClusterVec& clusters = clusterer.clusters(); debug(clusters); CPPUNIT_ASSERT_EQUAL(std::size_t(2), clusters.size()); - for (std::size_t j = 0u; j < clusters.size(); ++j) - { - CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(expectedClusters[j]), - clusters[j].centre(), - 0.4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(std::sqrt(maths::CBasicStatistics::variance(expectedClusters[j])), - clusters[j].spread(), - 0.3); - meanError += std::fabs(clusters[j].centre() - - maths::CBasicStatistics::mean(expectedClusters[j])); - spreadError += std::fabs(clusters[j].spread() - - std::sqrt(maths::CBasicStatistics::variance(expectedClusters[j]))); + for (std::size_t j = 0u; j < clusters.size(); ++j) { + CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(expectedClusters[j]), clusters[j].centre(), 0.4); + CPPUNIT_ASSERT_DOUBLES_EQUAL(std::sqrt(maths::CBasicStatistics::variance(expectedClusters[j])), clusters[j].spread(), 0.3); + meanError += std::fabs(clusters[j].centre() - maths::CBasicStatistics::mean(expectedClusters[j])); + spreadError += std::fabs(clusters[j].spread() - std::sqrt(maths::CBasicStatistics::variance(expectedClusters[j]))); } } @@ -437,8 +380,7 @@ void CXMeansOnline1dTest::testMixtureOfGaussians() } } -void CXMeansOnline1dTest::testMixtureOfUniforms() -{ +void CXMeansOnline1dTest::testMixtureOfUniforms() { LOG_DEBUG("+----------------------------------------------+"); LOG_DEBUG("| CXMeansOnline1dTest::testMixtureOfUniforms |"); LOG_DEBUG("+----------------------------------------------+"); @@ -452,11 +394,7 @@ void CXMeansOnline1dTest::testMixtureOfUniforms() TDoubleVec mode2; rng.generateUniformSamples(25.0, 30.0, 200u, mode2); - TMeanVarAccumulator expectedClusters[] = - { - TMeanVarAccumulator(), - TMeanVarAccumulator() - }; + TMeanVarAccumulator expectedClusters[] = {TMeanVarAccumulator(), TMeanVarAccumulator()}; expectedClusters[0].add(mode1); expectedClusters[1].add(mode2); @@ -469,20 +407,15 @@ void CXMeansOnline1dTest::testMixtureOfUniforms() double spreadError = 0.0; maths::CXMeansOnline1d::TSizeDoublePr2Vec dummy; - for (unsigned int i = 0u; i < 50u; ++i) - { + for (unsigned int i = 0u; i < 50u; ++i) { // Randomize the input order. rng.random_shuffle(samples.begin(), samples.end()); - maths::CXMeansOnline1d clusterer(maths_t::E_ContinuousData, - maths::CAvailableModeDistributions::ALL, - maths_t::E_ClustersFractionWeight, - 0.001); + maths::CXMeansOnline1d clusterer( + maths_t::E_ContinuousData, maths::CAvailableModeDistributions::ALL, maths_t::E_ClustersFractionWeight, 0.001); - for (std::size_t j = 0u; j < samples.size(); ++j) - { - if (j % 50 == 0) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { + if (j % 50 == 0) { LOG_DEBUG("time = " << j); } @@ -491,25 +424,18 @@ void CXMeansOnline1dTest::testMixtureOfUniforms() // Check we've got two clusters and their position // and spread is what we'd expect. - const TClusterVec &clusters = clusterer.clusters(); + const TClusterVec& clusters = clusterer.clusters(); debug(clusters); LOG_DEBUG("expected = " << core::CContainerPrinter::print(expectedClusters)); LOG_DEBUG("# clusters = " << clusters.size()); CPPUNIT_ASSERT_EQUAL(std::size_t(2), clusters.size()); - for (std::size_t j = 0u; j < clusters.size(); ++j) - { - CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(expectedClusters[j]), - clusters[j].centre(), - 0.01); - CPPUNIT_ASSERT_DOUBLES_EQUAL(std::sqrt(maths::CBasicStatistics::variance(expectedClusters[j])), - clusters[j].spread(), - 0.02); - meanError += std::fabs(clusters[j].centre() - - maths::CBasicStatistics::mean(expectedClusters[j])); - spreadError += std::fabs(clusters[j].spread() - - std::sqrt(maths::CBasicStatistics::variance(expectedClusters[j]))); + for (std::size_t j = 0u; j < clusters.size(); ++j) { + CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(expectedClusters[j]), clusters[j].centre(), 0.01); + CPPUNIT_ASSERT_DOUBLES_EQUAL(std::sqrt(maths::CBasicStatistics::variance(expectedClusters[j])), clusters[j].spread(), 0.02); + meanError += std::fabs(clusters[j].centre() - maths::CBasicStatistics::mean(expectedClusters[j])); + spreadError += std::fabs(clusters[j].spread() - std::sqrt(maths::CBasicStatistics::variance(expectedClusters[j]))); } } @@ -521,8 +447,7 @@ void CXMeansOnline1dTest::testMixtureOfUniforms() CPPUNIT_ASSERT(spreadError < 0.01); } -void CXMeansOnline1dTest::testMixtureOfLogNormals() -{ +void CXMeansOnline1dTest::testMixtureOfLogNormals() { LOG_DEBUG("+------------------------------------------------+"); LOG_DEBUG("| CXMeansOnline1dTest::testMixtureOfLogNormals |"); LOG_DEBUG("+------------------------------------------------+"); @@ -536,11 +461,7 @@ void CXMeansOnline1dTest::testMixtureOfLogNormals() TDoubleVec mode2; rng.generateLogNormalSamples(4.0, 0.01, 100u, mode2); - TMeanVarAccumulator expectedClusters[] = - { - TMeanVarAccumulator(), - TMeanVarAccumulator() - }; + TMeanVarAccumulator expectedClusters[] = {TMeanVarAccumulator(), TMeanVarAccumulator()}; expectedClusters[0].add(mode1); expectedClusters[1].add(mode2); @@ -553,25 +474,20 @@ void CXMeansOnline1dTest::testMixtureOfLogNormals() double spreadError = 0.0; maths::CXMeansOnline1d::TSizeDoublePr2Vec dummy; - for (unsigned int i = 0u; i < 50u; ++i) - { + for (unsigned int i = 0u; i < 50u; ++i) { // Randomize the input order. rng.random_shuffle(samples.begin(), samples.end()); - maths::CXMeansOnline1d clusterer(maths_t::E_ContinuousData, - maths::CAvailableModeDistributions::ALL, - maths_t::E_ClustersFractionWeight, - 0.001); + maths::CXMeansOnline1d clusterer( + maths_t::E_ContinuousData, maths::CAvailableModeDistributions::ALL, maths_t::E_ClustersFractionWeight, 0.001); //std::ostringstream name; //name << "results.m." << i; //std::ofstream file; //file.open(name.str().c_str()); - for (std::size_t j = 0u; j < samples.size(); ++j) - { - if (j % 50 == 0) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { + if (j % 50 == 0) { LOG_DEBUG("time = " << j); } @@ -593,27 +509,23 @@ void CXMeansOnline1dTest::testMixtureOfLogNormals() // Check we've got two clusters and their position // and spread is what we'd expect. - const TClusterVec &clusters = clusterer.clusters(); + const TClusterVec& clusters = clusterer.clusters(); debug(clusters); LOG_DEBUG("expected = " << core::CContainerPrinter::print(expectedClusters)); LOG_DEBUG("# clusters = " << clusters.size()); CPPUNIT_ASSERT_EQUAL(std::size_t(2), clusters.size()); - for (std::size_t j = 0u; j < clusters.size(); ++j) - { + for (std::size_t j = 0u; j < clusters.size(); ++j) { CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(expectedClusters[j]), clusters[j].centre(), - 0.03 * std::max(maths::CBasicStatistics::mean(expectedClusters[j]), - clusters[j].centre())); - CPPUNIT_ASSERT_DOUBLES_EQUAL(std::sqrt(maths::CBasicStatistics::variance(expectedClusters[j])), - clusters[j].spread(), - 0.5 * std::max(std::sqrt(maths::CBasicStatistics::variance(expectedClusters[j])), - clusters[j].spread())); - meanError += std::fabs(clusters[j].centre() - - maths::CBasicStatistics::mean(expectedClusters[j])); - spreadError += std::fabs(clusters[j].spread() - - std::sqrt(maths::CBasicStatistics::variance(expectedClusters[j]))); + 0.03 * std::max(maths::CBasicStatistics::mean(expectedClusters[j]), clusters[j].centre())); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + std::sqrt(maths::CBasicStatistics::variance(expectedClusters[j])), + clusters[j].spread(), + 0.5 * std::max(std::sqrt(maths::CBasicStatistics::variance(expectedClusters[j])), clusters[j].spread())); + meanError += std::fabs(clusters[j].centre() - maths::CBasicStatistics::mean(expectedClusters[j])); + spreadError += std::fabs(clusters[j].spread() - std::sqrt(maths::CBasicStatistics::variance(expectedClusters[j]))); } } @@ -625,8 +537,7 @@ void CXMeansOnline1dTest::testMixtureOfLogNormals() CPPUNIT_ASSERT(spreadError < 0.14); } -void CXMeansOnline1dTest::testOutliers() -{ +void CXMeansOnline1dTest::testOutliers() { LOG_DEBUG("+-------------------------------------+"); LOG_DEBUG("| CXMeansOnline1dTest::testOutliers |"); LOG_DEBUG("+-------------------------------------+"); @@ -639,11 +550,7 @@ void CXMeansOnline1dTest::testOutliers() rng.generateNormalSamples(18.0, 1.0, 50u, mode2); TDoubleVec outliers(7u, 2000.0); - TMeanVarAccumulator expectedClusters[] = - { - TMeanVarAccumulator(), - TMeanVarAccumulator() - }; + TMeanVarAccumulator expectedClusters[] = {TMeanVarAccumulator(), TMeanVarAccumulator()}; expectedClusters[0].add(mode1); expectedClusters[1].add(mode2); expectedClusters[1].add(outliers); @@ -658,8 +565,7 @@ void CXMeansOnline1dTest::testOutliers() double n = 0.0; maths::CXMeansOnline1d::TSizeDoublePr2Vec dummy; - for (unsigned int i = 0u; i < 50u; ++i) - { + for (unsigned int i = 0u; i < 50u; ++i) { // Randomize the input order. rng.random_shuffle(samples.begin(), samples.end()); @@ -669,59 +575,51 @@ void CXMeansOnline1dTest::testOutliers() 0.001, // decay rate 0.01); // mode fraction - for (std::size_t j = 0u; j < outliers.size(); ++j) - { + for (std::size_t j = 0u; j < outliers.size(); ++j) { clusterer.add(outliers[j], dummy); } - for (std::size_t j = 0u; j < samples.size(); ++j) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { clusterer.add(samples[j], dummy); } // Check we've got two clusters and their position // and spread is what we'd expect. - const TClusterVec &clusters = clusterer.clusters(); + const TClusterVec& clusters = clusterer.clusters(); debug(clusters); LOG_DEBUG("expected = " << core::CContainerPrinter::print(expectedClusters)); LOG_DEBUG("# clusters = " << clusters.size()); - if (clusters.size() != 2) continue; + if (clusters.size() != 2) + continue; n += 1.0; CPPUNIT_ASSERT_EQUAL(std::size_t(2), clusters.size()); - for (std::size_t j = 0u; j < clusters.size(); ++j) - { + for (std::size_t j = 0u; j < clusters.size(); ++j) { CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(expectedClusters[j]), clusters[j].centre(), - 0.01 * std::max(maths::CBasicStatistics::mean(expectedClusters[j]), - clusters[j].centre())); - CPPUNIT_ASSERT_DOUBLES_EQUAL(std::sqrt(maths::CBasicStatistics::variance(expectedClusters[j])), - clusters[j].spread(), - 0.03 * std::max(std::sqrt(maths::CBasicStatistics::variance(expectedClusters[j])), - clusters[j].spread())); - meanError += std::fabs(clusters[j].centre() - - maths::CBasicStatistics::mean(expectedClusters[j])); - spreadError += std::fabs(clusters[j].spread() - - std::sqrt(maths::CBasicStatistics::variance(expectedClusters[j]))); + 0.01 * std::max(maths::CBasicStatistics::mean(expectedClusters[j]), clusters[j].centre())); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + std::sqrt(maths::CBasicStatistics::variance(expectedClusters[j])), + clusters[j].spread(), + 0.03 * std::max(std::sqrt(maths::CBasicStatistics::variance(expectedClusters[j])), clusters[j].spread())); + meanError += std::fabs(clusters[j].centre() - maths::CBasicStatistics::mean(expectedClusters[j])); + spreadError += std::fabs(clusters[j].spread() - std::sqrt(maths::CBasicStatistics::variance(expectedClusters[j]))); } } meanError /= n; spreadError /= n; - LOG_DEBUG("meanError = " << meanError - << ", spreadError = " << spreadError - << ", n = " << n); + LOG_DEBUG("meanError = " << meanError << ", spreadError = " << spreadError << ", n = " << n); CPPUNIT_ASSERT(meanError < 0.15); CPPUNIT_ASSERT(spreadError < 1.0); } -void CXMeansOnline1dTest::testManyClusters() -{ +void CXMeansOnline1dTest::testManyClusters() { LOG_DEBUG("+-----------------------------------------+"); LOG_DEBUG("| CXMeansOnline1dTest::testManyClusters |"); LOG_DEBUG("+-----------------------------------------+"); @@ -732,16 +630,11 @@ void CXMeansOnline1dTest::testManyClusters() TTimeDoublePrVec timeseries; core_t::TTime startTime; core_t::TTime endTime; - CPPUNIT_ASSERT(test::CTimeSeriesTestData::parse("testfiles/times.csv", - timeseries, - startTime, - endTime, - test::CTimeSeriesTestData::CSV_UNIX_REGEX)); + CPPUNIT_ASSERT( + test::CTimeSeriesTestData::parse("testfiles/times.csv", timeseries, startTime, endTime, test::CTimeSeriesTestData::CSV_UNIX_REGEX)); CPPUNIT_ASSERT(!timeseries.empty()); - LOG_DEBUG("timeseries = " << core::CContainerPrinter::print(timeseries.begin(), - timeseries.begin() + 10) - << " ..."); + LOG_DEBUG("timeseries = " << core::CContainerPrinter::print(timeseries.begin(), timeseries.begin() + 10) << " ..."); maths::CXMeansOnline1d clusterer(maths_t::E_IntegerData, maths::CAvailableModeDistributions::ALL, @@ -751,42 +644,36 @@ void CXMeansOnline1dTest::testManyClusters() 2); // mode count maths::CXMeansOnline1d::TSizeDoublePr2Vec dummy; - for (std::size_t i = 0u; i < timeseries.size(); ++i) - { + for (std::size_t i = 0u; i < timeseries.size(); ++i) { core_t::TTime tow = timeseries[i].first % core::constants::WEEK; clusterer.add(static_cast(tow), dummy); } // Check we've got ten clusters. - const TClusterVec &clusters = clusterer.clusters(); + const TClusterVec& clusters = clusterer.clusters(); debug(clusters); CPPUNIT_ASSERT_EQUAL(std::size_t(10), clusters.size()); } -void CXMeansOnline1dTest::testLowVariation() -{ +void CXMeansOnline1dTest::testLowVariation() { LOG_DEBUG("+-----------------------------------------+"); LOG_DEBUG("| CXMeansOnline1dTest::testLowVariation |"); LOG_DEBUG("+-----------------------------------------+"); - maths::CXMeansOnline1d clusterer(maths_t::E_ContinuousData, - maths::CAvailableModeDistributions::ALL, - maths_t::E_ClustersFractionWeight); + maths::CXMeansOnline1d clusterer(maths_t::E_ContinuousData, maths::CAvailableModeDistributions::ALL, maths_t::E_ClustersFractionWeight); maths::CXMeansOnline1d::TSizeDoublePr2Vec dummy; - for (std::size_t i = 0u; i < 200; ++i) - { + for (std::size_t i = 0u; i < 200; ++i) { clusterer.add(static_cast(i % 2), dummy); } - const TClusterVec &clusters = clusterer.clusters(); + const TClusterVec& clusters = clusterer.clusters(); debug(clusters); CPPUNIT_ASSERT_EQUAL(std::size_t(2), clusters.size()); } -void CXMeansOnline1dTest::testAdaption() -{ +void CXMeansOnline1dTest::testAdaption() { LOG_DEBUG("+-------------------------------------+"); LOG_DEBUG("| CXMeansOnline1dTest::testAdaption |"); LOG_DEBUG("+-------------------------------------+"); @@ -798,8 +685,7 @@ void CXMeansOnline1dTest::testAdaption() // TODO } -void CXMeansOnline1dTest::testLargeHistory() -{ +void CXMeansOnline1dTest::testLargeHistory() { LOG_DEBUG("+-----------------------------------------+"); LOG_DEBUG("| CXMeansOnline1dTest::testLargeHistory |"); LOG_DEBUG("+-----------------------------------------+"); @@ -832,8 +718,7 @@ void CXMeansOnline1dTest::testLargeHistory() samples.insert(samples.end(), samples2.begin(), samples2.end()); rng.random_shuffle(samples.begin() + 5000, samples.end()); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { reference.add(samples[i]); clusterer.add(samples[i]); reference.propagateForwardsByTime(1.0); @@ -844,8 +729,7 @@ void CXMeansOnline1dTest::testLargeHistory() CPPUNIT_ASSERT_EQUAL(std::size_t(2), clusterer.clusters().size()); } -void CXMeansOnline1dTest::testPersist() -{ +void CXMeansOnline1dTest::testPersist() { LOG_DEBUG("+------------------------------------+"); LOG_DEBUG("| CXMeansOnline1dTest::testPersist |"); LOG_DEBUG("+------------------------------------+"); @@ -867,14 +751,11 @@ void CXMeansOnline1dTest::testPersist() std::copy(mode2.begin(), mode2.end(), std::back_inserter(samples)); std::copy(mode3.begin(), mode3.end(), std::back_inserter(samples)); - maths::CXMeansOnline1d clusterer(maths_t::E_ContinuousData, - maths::CAvailableModeDistributions::ALL, - maths_t::E_ClustersEqualWeight, - 0.05); + maths::CXMeansOnline1d clusterer( + maths_t::E_ContinuousData, maths::CAvailableModeDistributions::ALL, maths_t::E_ClustersEqualWeight, 0.05); maths::CXMeansOnline1d::TSizeDoublePr2Vec dummy; - for (std::size_t j = 0u; j < samples.size(); ++j) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { clusterer.add(samples[j], dummy); clusterer.propagateForwardsByTime(1.0); } @@ -889,7 +770,8 @@ void CXMeansOnline1dTest::testPersist() LOG_DEBUG("Clusterer XML representation:\n" << origXml); // Restore the XML into a new clusterer. - maths::SDistributionRestoreParams params(maths_t::E_ContinuousData, 0.15, + maths::SDistributionRestoreParams params(maths_t::E_ContinuousData, + 0.15, maths::MINIMUM_CLUSTER_SPLIT_FRACTION, maths::MINIMUM_CLUSTER_SPLIT_COUNT, maths::MINIMUM_CATEGORY_COUNT); @@ -909,16 +791,12 @@ void CXMeansOnline1dTest::testPersist() CPPUNIT_ASSERT_EQUAL(origXml, newXml); } -void CXMeansOnline1dTest::testPruneEmptyCluster() -{ +void CXMeansOnline1dTest::testPruneEmptyCluster() { LOG_DEBUG("+----------------------------------------------+"); LOG_DEBUG("| CXMeansOnline1dTest::testPruneEmptyCluster |"); LOG_DEBUG("+----------------------------------------------+"); - maths::CXMeansOnline1d clusterer(maths_t::E_ContinuousData, - maths::CAvailableModeDistributions::ALL, - maths_t::E_ClustersFractionWeight); - + maths::CXMeansOnline1d clusterer(maths_t::E_ContinuousData, maths::CAvailableModeDistributions::ALL, maths_t::E_ClustersFractionWeight); maths::CXMeansOnline1d::CCluster cluster1(clusterer); cluster1.add(1.0, 12.0); @@ -930,7 +808,6 @@ void CXMeansOnline1dTest::testPruneEmptyCluster() cluster1.add(1.6, 3.0); clusterer.m_Clusters.push_back(cluster1); - maths::CXMeansOnline1d::CCluster cluster2(clusterer); cluster2.add(4.4, 15.0); cluster2.add(4.5, 2.0); @@ -949,44 +826,31 @@ void CXMeansOnline1dTest::testPruneEmptyCluster() CPPUNIT_ASSERT_EQUAL(std::size_t(2), clusterer.clusters().size()); } -CppUnit::Test *CXMeansOnline1dTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CXMeansOnline1dTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXMeansOnline1dTest::testCluster", - &CXMeansOnline1dTest::testCluster) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXMeansOnline1dTest::testMixtureOfGaussians", - &CXMeansOnline1dTest::testMixtureOfGaussians) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXMeansOnline1dTest::testMixtureOfUniforms", - &CXMeansOnline1dTest::testMixtureOfUniforms) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXMeansOnline1dTest::testMixtureOfLogNormals", - &CXMeansOnline1dTest::testMixtureOfLogNormals) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXMeansOnline1dTest::testOutliers", - &CXMeansOnline1dTest::testOutliers) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXMeansOnline1dTest::testManyClusters", - &CXMeansOnline1dTest::testManyClusters) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXMeansOnline1dTest::testLowVariation", - &CXMeansOnline1dTest::testLowVariation) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXMeansOnline1dTest::testAdaption", - &CXMeansOnline1dTest::testAdaption) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXMeansOnline1dTest::testLargeHistory", - &CXMeansOnline1dTest::testLargeHistory) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXMeansOnline1dTest::testPersist", - &CXMeansOnline1dTest::testPersist) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXMeansOnline1dTest::testPruneEmptyCluster", - &CXMeansOnline1dTest::testPruneEmptyCluster) ); +CppUnit::Test* CXMeansOnline1dTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CXMeansOnline1dTest"); + + suiteOfTests->addTest( + new CppUnit::TestCaller("CXMeansOnline1dTest::testCluster", &CXMeansOnline1dTest::testCluster)); + suiteOfTests->addTest(new CppUnit::TestCaller("CXMeansOnline1dTest::testMixtureOfGaussians", + &CXMeansOnline1dTest::testMixtureOfGaussians)); + suiteOfTests->addTest(new CppUnit::TestCaller("CXMeansOnline1dTest::testMixtureOfUniforms", + &CXMeansOnline1dTest::testMixtureOfUniforms)); + suiteOfTests->addTest(new CppUnit::TestCaller("CXMeansOnline1dTest::testMixtureOfLogNormals", + &CXMeansOnline1dTest::testMixtureOfLogNormals)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CXMeansOnline1dTest::testOutliers", &CXMeansOnline1dTest::testOutliers)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CXMeansOnline1dTest::testManyClusters", &CXMeansOnline1dTest::testManyClusters)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CXMeansOnline1dTest::testLowVariation", &CXMeansOnline1dTest::testLowVariation)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CXMeansOnline1dTest::testAdaption", &CXMeansOnline1dTest::testAdaption)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CXMeansOnline1dTest::testLargeHistory", &CXMeansOnline1dTest::testLargeHistory)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CXMeansOnline1dTest::testPersist", &CXMeansOnline1dTest::testPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller("CXMeansOnline1dTest::testPruneEmptyCluster", + &CXMeansOnline1dTest::testPruneEmptyCluster)); return suiteOfTests; } - diff --git a/lib/maths/unittest/CXMeansOnline1dTest.h b/lib/maths/unittest/CXMeansOnline1dTest.h index bf1b86a1d4..37ae25ff7b 100644 --- a/lib/maths/unittest/CXMeansOnline1dTest.h +++ b/lib/maths/unittest/CXMeansOnline1dTest.h @@ -9,22 +9,21 @@ #include -class CXMeansOnline1dTest : public CppUnit::TestFixture -{ - public: - void testCluster(); - void testMixtureOfGaussians(); - void testMixtureOfUniforms(); - void testMixtureOfLogNormals(); - void testOutliers(); - void testManyClusters(); - void testLowVariation(); - void testAdaption(); - void testLargeHistory(); - void testPersist(); - void testPruneEmptyCluster(); +class CXMeansOnline1dTest : public CppUnit::TestFixture { +public: + void testCluster(); + void testMixtureOfGaussians(); + void testMixtureOfUniforms(); + void testMixtureOfLogNormals(); + void testOutliers(); + void testManyClusters(); + void testLowVariation(); + void testAdaption(); + void testLargeHistory(); + void testPersist(); + void testPruneEmptyCluster(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CXMeansOnline1dTest_h diff --git a/lib/maths/unittest/CXMeansOnlineTest.cc b/lib/maths/unittest/CXMeansOnlineTest.cc index cfc3de6216..09c96fd52e 100644 --- a/lib/maths/unittest/CXMeansOnlineTest.cc +++ b/lib/maths/unittest/CXMeansOnlineTest.cc @@ -21,8 +21,7 @@ using namespace ml; -namespace -{ +namespace { using TDoubleVec = std::vector; using TDoubleVecVec = std::vector; @@ -37,82 +36,64 @@ using TMatrix = TXMeans2::TMatrixPrecise; using TMatrixVec = std::vector; template -class CXMeansOnlineForTest : public maths::CXMeansOnline -{ - public: - using TSizeDoublePr2Vec = typename maths::CXMeansOnline::TSizeDoublePr2Vec; - using TPoint = typename maths::CXMeansOnline::TPointPrecise; - using TClusterVec = typename maths::CXMeansOnline::TClusterVec; - using maths::CXMeansOnline::add; - - public: - CXMeansOnlineForTest(maths_t::EDataType dataType, - maths_t::EClusterWeightCalc weightCalc, - double decayRate = 0.0, - double minimumClusterFraction = 0.0) : - maths::CXMeansOnline(dataType, weightCalc, decayRate, minimumClusterFraction) - {} - - void add(const TPoint &x, double count = 1.0) - { - TSizeDoublePr2Vec dummy; - this->maths::CXMeansOnline::add(x, dummy, count); - } +class CXMeansOnlineForTest : public maths::CXMeansOnline { +public: + using TSizeDoublePr2Vec = typename maths::CXMeansOnline::TSizeDoublePr2Vec; + using TPoint = typename maths::CXMeansOnline::TPointPrecise; + using TClusterVec = typename maths::CXMeansOnline::TClusterVec; + using maths::CXMeansOnline::add; + +public: + CXMeansOnlineForTest(maths_t::EDataType dataType, + maths_t::EClusterWeightCalc weightCalc, + double decayRate = 0.0, + double minimumClusterFraction = 0.0) + : maths::CXMeansOnline(dataType, weightCalc, decayRate, minimumClusterFraction) {} + + void add(const TPoint& x, double count = 1.0) { + TSizeDoublePr2Vec dummy; + this->maths::CXMeansOnline::add(x, dummy, count); + } - const TClusterVec &clusters() const - { - return this->maths::CXMeansOnline::clusters(); - } + const TClusterVec& clusters() const { return this->maths::CXMeansOnline::clusters(); } }; using TXMeans2ForTest = CXMeansOnlineForTest; using TXMeans2FloatForTest = CXMeansOnlineForTest; -bool restore(const maths::SDistributionRestoreParams ¶ms, - core::CRapidXmlStateRestoreTraverser &traverser, - TXMeans2::CCluster &result) -{ - return traverser.traverseSubLevel(boost::bind(&TXMeans2::CCluster::acceptRestoreTraverser, - &result, boost::cref(params), _1)); +bool restore(const maths::SDistributionRestoreParams& params, core::CRapidXmlStateRestoreTraverser& traverser, TXMeans2::CCluster& result) { + return traverser.traverseSubLevel(boost::bind(&TXMeans2::CCluster::acceptRestoreTraverser, &result, boost::cref(params), _1)); } - } -void CXMeansOnlineTest::testCluster() -{ +void CXMeansOnlineTest::testCluster() { LOG_DEBUG("+----------------------------------+"); LOG_DEBUG("| CXMeansOnlineTest::testCluster |"); LOG_DEBUG("+----------------------------------+"); // Test the core functionality of cluster. - TXMeans2 clusterer(maths_t::E_ContinuousData, - maths_t::E_ClustersFractionWeight, - 0.1); + TXMeans2 clusterer(maths_t::E_ContinuousData, maths_t::E_ClustersFractionWeight, 0.1); TXMeans2::CCluster cluster(clusterer); - double x1[][2] = - { - { 1.1, 2.0 }, - { 2.3, 2.1 }, - { 1.5, 1.4 }, - { 0.9, 0.8 }, - { 4.7, 3.9 }, - { 3.2, 3.2 }, - { 2.8, 2.7 }, - { 2.3, 1.5 }, - { 1.9, 1.6 }, - { 2.6, 2.1 }, - { 2.0, 2.2 }, - { 1.7, 1.9 }, - { 1.8, 1.7 }, - { 2.1, 1.9 } - }; - double c1[] = { 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 }; + double x1[][2] = {{1.1, 2.0}, + {2.3, 2.1}, + {1.5, 1.4}, + {0.9, 0.8}, + {4.7, 3.9}, + {3.2, 3.2}, + {2.8, 2.7}, + {2.3, 1.5}, + {1.9, 1.6}, + {2.6, 2.1}, + {2.0, 2.2}, + {1.7, 1.9}, + {1.8, 1.7}, + {2.1, 1.9}}; + double c1[] = {1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}; TCovariances2 moments; - for (std::size_t i = 0u; i < boost::size(x1); ++i) - { + for (std::size_t i = 0u; i < boost::size(x1); ++i) { cluster.add(TPoint(x1[i]), c1[i]); moments.add(TPoint(x1[i]), TPoint(c1[i])); } @@ -121,7 +102,7 @@ void CXMeansOnlineTest::testCluster() LOG_DEBUG("spread = " << cluster.spread()); LOG_DEBUG("weight = " << cluster.weight(maths_t::E_ClustersFractionWeight)); - double expectedCount = maths::CBasicStatistics::count(moments); + double expectedCount = maths::CBasicStatistics::count(moments); TPoint expectedCentre = maths::CBasicStatistics::mean(moments); double expectedSpread = std::sqrt(maths::CBasicStatistics::maximumLikelihoodCovariances(moments).trace() / 2.0); LOG_DEBUG("expected count = " << expectedCount); @@ -149,8 +130,7 @@ void CXMeansOnlineTest::testCluster() LOG_DEBUG("samples = " << core::CContainerPrinter::print(samples)); TCovariances2 sampleMoments; - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { sampleMoments.add(samples[i]); } TPoint sampleCentre = maths::CBasicStatistics::mean(sampleMoments); @@ -161,8 +141,8 @@ void CXMeansOnlineTest::testCluster() CPPUNIT_ASSERT_DOUBLES_EQUAL(cluster.spread(), sampleSpread, 0.1); CPPUNIT_ASSERT_DOUBLES_EQUAL(std::log(cluster.count()), - - cluster.logLikelihoodFromCluster(maths_t::E_ClustersEqualWeight, TPoint(1.5)) - + cluster.logLikelihoodFromCluster(maths_t::E_ClustersFractionWeight, TPoint(1.5)), + -cluster.logLikelihoodFromCluster(maths_t::E_ClustersEqualWeight, TPoint(1.5)) + + cluster.logLikelihoodFromCluster(maths_t::E_ClustersFractionWeight, TPoint(1.5)), 1e-10); uint64_t origChecksum = cluster.checksum(0); @@ -181,7 +161,8 @@ void CXMeansOnlineTest::testCluster() core::CRapidXmlStateRestoreTraverser traverser(parser); TXMeans2::CCluster restoredCluster(clusterer); - maths::SDistributionRestoreParams params(maths_t::E_ContinuousData, 0.1, + maths::SDistributionRestoreParams params(maths_t::E_ContinuousData, + 0.1, maths::MINIMUM_CLUSTER_SPLIT_FRACTION, maths::MINIMUM_CLUSTER_SPLIT_COUNT, maths::MINIMUM_CATEGORY_COUNT); @@ -189,24 +170,13 @@ void CXMeansOnlineTest::testCluster() uint64_t restoredChecksum = restoredCluster.checksum(0); CPPUNIT_ASSERT_EQUAL(origChecksum, restoredChecksum); - double x2[][2] = - { - { 10.3, 10.4 }, - { 10.6, 10.5 }, - { 10.7, 11.0 }, - { 9.8, 10.2 }, - { 11.2, 11.4 }, - { 11.0, 10.7 }, - { 11.5, 11.3 } - }; - double c2[] = { 2.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0 }; - for (std::size_t i = 0u; i < boost::size(x2); ++i) - { + double x2[][2] = {{10.3, 10.4}, {10.6, 10.5}, {10.7, 11.0}, {9.8, 10.2}, {11.2, 11.4}, {11.0, 10.7}, {11.5, 11.3}}; + double c2[] = {2.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0}; + for (std::size_t i = 0u; i < boost::size(x2); ++i) { cluster.add(TPoint(x2[i]), c2[i]); } maths::CPRNG::CXorOShiro128Plus rng; - TXMeans2::TOptionalClusterClusterPr split = - cluster.split(rng, 5.0, clusterer.indexGenerator()); + TXMeans2::TOptionalClusterClusterPr split = cluster.split(rng, 5.0, clusterer.indexGenerator()); CPPUNIT_ASSERT(split); TPointVec centres; centres.push_back(split->first.centre()); @@ -217,11 +187,7 @@ void CXMeansOnlineTest::testCluster() maths::COrderings::simultaneousSort(centres, spreads); LOG_DEBUG("centres = " << core::CContainerPrinter::print(centres)); LOG_DEBUG("spreads = " << core::CContainerPrinter::print(spreads)); - double expectedCentres[][2] = - { - { 2.25, 2.1125 }, - { 10.64, 10.75 } - }; + double expectedCentres[][2] = {{2.25, 2.1125}, {10.64, 10.75}}; CPPUNIT_ASSERT((centres[0] - TPoint(expectedCentres[0])).euclidean() < 1e-5); CPPUNIT_ASSERT((centres[1] - TPoint(expectedCentres[1])).euclidean() < 1e-5); CPPUNIT_ASSERT_DOUBLES_EQUAL(0.796035, spreads[0], 1e-5); @@ -232,25 +198,18 @@ void CXMeansOnlineTest::testCluster() CPPUNIT_ASSERT_EQUAL(false, split->first.shouldMerge(split->second)); CPPUNIT_ASSERT_EQUAL(false, split->second.shouldMerge(split->first)); - if (split->first.centre() < split->second.centre()) - { - for (std::size_t i = 0u; i < boost::size(x1); ++i) - { + if (split->first.centre() < split->second.centre()) { + for (std::size_t i = 0u; i < boost::size(x1); ++i) { split->second.add(TPoint(x1[i]), c1[i]); } - for (std::size_t i = 0u; i < boost::size(x2); ++i) - { + for (std::size_t i = 0u; i < boost::size(x2); ++i) { split->first.add(TPoint(x2[i]), c2[i]); } - } - else - { - for (std::size_t i = 0u; i < boost::size(x1); ++i) - { + } else { + for (std::size_t i = 0u; i < boost::size(x1); ++i) { split->first.add(TPoint(x1[i]), c1[i]); } - for (std::size_t i = 0u; i < boost::size(x2); ++i) - { + for (std::size_t i = 0u; i < boost::size(x2); ++i) { split->second.add(TPoint(x2[i]), c2[i]); } } @@ -259,8 +218,7 @@ void CXMeansOnlineTest::testCluster() CPPUNIT_ASSERT_EQUAL(true, split->second.shouldMerge(split->first)); } -void CXMeansOnlineTest::testClusteringVanilla() -{ +void CXMeansOnlineTest::testClusteringVanilla() { LOG_DEBUG("+--------------------------------------------+"); LOG_DEBUG("| CXMeansOnlineTest::testClusteringVanilla |"); LOG_DEBUG("+--------------------------------------------+"); @@ -273,43 +231,28 @@ void CXMeansOnlineTest::testClusteringVanilla() test::CRandomNumbers rng; - double means[][2] = - { - { 10, 15 }, - { 40, 10 }, - { 12, 35 } - }; - double covariances[][2][2] = - { - { {10, 2}, { 2, 15} }, - { {30, 8}, { 8, 15} }, - { {20, -11}, {-11, 25} } - }; - - for (std::size_t t = 0u; t < 10; ++t) - { + double means[][2] = {{10, 15}, {40, 10}, {12, 35}}; + double covariances[][2][2] = {{{10, 2}, {2, 15}}, {{30, 8}, {8, 15}}, {{20, -11}, {-11, 25}}}; + + for (std::size_t t = 0u; t < 10; ++t) { LOG_DEBUG("*** test " << t << " ***"); TDoubleVec mean(&means[0][0], &means[0][2]); TDoubleVecVec covariance; - for (std::size_t i = 0u; i < 2; ++i) - { - covariance.push_back(TDoubleVec(&covariances[0][i][0], - &covariances[0][i][2])); + for (std::size_t i = 0u; i < 2; ++i) { + covariance.push_back(TDoubleVec(&covariances[0][i][0], &covariances[0][i][2])); } TDoubleVecVec samples; rng.generateMultivariateNormalSamples(mean, covariance, 200, samples); - TXMeans2ForTest clusterer(maths_t::E_ContinuousData, - maths_t::E_ClustersFractionWeight); + TXMeans2ForTest clusterer(maths_t::E_ContinuousData, maths_t::E_ClustersFractionWeight); std::size_t n = 0u; - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { clusterer.add(TPoint(samples[i])); n += clusterer.numberClusters(); } double s = static_cast(samples.size()); - double c = static_cast(n) / s; + double c = static_cast(n) / s; LOG_DEBUG("# clusters = " << c); CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, c, 1.0 / s); } @@ -320,58 +263,50 @@ void CXMeansOnlineTest::testClusteringVanilla() maths::CBasicStatistics::SSampleMean::TAccumulator meanMeanError; maths::CBasicStatistics::SSampleMean::TAccumulator meanCovError; - for (std::size_t t = 0u; t < 10; ++t) - { + for (std::size_t t = 0u; t < 10; ++t) { LOG_DEBUG("*** test " << t << " ***"); TDoubleVecVec samples; TPointVec centres; TCovariances2Vec expectedMoments(boost::size(means)); - for (std::size_t i = 0u; i < boost::size(means); ++i) - { + for (std::size_t i = 0u; i < boost::size(means); ++i) { TDoubleVec mean(&means[i][0], &means[i][2]); TDoubleVecVec covariance; - for (std::size_t j = 0u; j < 2; ++j) - { - covariance.push_back(TDoubleVec(&covariances[i][j][0], - &covariances[i][j][2])); + for (std::size_t j = 0u; j < 2; ++j) { + covariance.push_back(TDoubleVec(&covariances[i][j][0], &covariances[i][j][2])); } TDoubleVecVec samples_; rng.generateMultivariateNormalSamples(mean, covariance, 200, samples_); samples.insert(samples.end(), samples_.begin(), samples_.end()); - for (std::size_t j = 0u; j < samples_.size(); ++j) - { + for (std::size_t j = 0u; j < samples_.size(); ++j) { expectedMoments[i].add(TPoint(samples_[j])); } } rng.random_shuffle(samples.begin(), samples.end()); - TXMeans2ForTest clusterer(maths_t::E_ContinuousData, - maths_t::E_ClustersFractionWeight); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + TXMeans2ForTest clusterer(maths_t::E_ContinuousData, maths_t::E_ClustersFractionWeight); + for (std::size_t i = 0u; i < samples.size(); ++i) { clusterer.add(TPoint(samples[i])); } - const TXMeans2ForTest::TClusterVec &clusters = clusterer.clusters(); + const TXMeans2ForTest::TClusterVec& clusters = clusterer.clusters(); LOG_DEBUG("# clusters = " << clusters.size()); CPPUNIT_ASSERT_EQUAL(std::size_t(3), clusters.size()); - for (std::size_t i = 0u; i < clusters.size(); ++i) - { + for (std::size_t i = 0u; i < clusters.size(); ++i) { LOG_DEBUG("moments = " << maths::CBasicStatistics::print(clusters[i].covariances())); maths::CBasicStatistics::COrderStatisticsStack meanError; maths::CBasicStatistics::COrderStatisticsStack covError; - for (std::size_t j = 0u; j < expectedMoments.size(); ++j) - { - meanError.add( ( maths::CBasicStatistics::mean(clusters[i].covariances()) - - maths::CBasicStatistics::mean(expectedMoments[j])).euclidean() - / maths::CBasicStatistics::mean(expectedMoments[j]).euclidean()); - covError.add( ( maths::CBasicStatistics::covariances(clusters[i].covariances()) - - maths::CBasicStatistics::covariances(expectedMoments[j])).frobenius() - / maths::CBasicStatistics::covariances(expectedMoments[j]).frobenius()); + for (std::size_t j = 0u; j < expectedMoments.size(); ++j) { + meanError.add((maths::CBasicStatistics::mean(clusters[i].covariances()) - maths::CBasicStatistics::mean(expectedMoments[j])) + .euclidean() / + maths::CBasicStatistics::mean(expectedMoments[j]).euclidean()); + covError.add((maths::CBasicStatistics::covariances(clusters[i].covariances()) - + maths::CBasicStatistics::covariances(expectedMoments[j])) + .frobenius() / + maths::CBasicStatistics::covariances(expectedMoments[j]).frobenius()); } LOG_DEBUG("mean error = " << meanError[0]); LOG_DEBUG("covariance error = " << covError[0]); @@ -388,8 +323,7 @@ void CXMeansOnlineTest::testClusteringVanilla() CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanCovError) < 0.06); } -void CXMeansOnlineTest::testClusteringWithOutliers() -{ +void CXMeansOnlineTest::testClusteringWithOutliers() { LOG_DEBUG("+-------------------------------------------------+"); LOG_DEBUG("| CXMeansOnlineTest::testClusteringWithOutliers |"); LOG_DEBUG("+-------------------------------------------------+"); @@ -401,30 +335,13 @@ void CXMeansOnlineTest::testClusteringWithOutliers() test::CRandomNumbers rng; - double means[][2] = - { - { 10, 15 }, - { 40, 10 } - }; - double covariances[][2][2] = - { - { {10, 2}, { 2, 15} }, - { {30, 8}, { 8, 15} } - }; - - double outliers_[][2] = - { - { 600, 10 }, - { 650, 11 }, - { 610, 12 }, - { 700, 16 }, - { 690, 14 } - }; + double means[][2] = {{10, 15}, {40, 10}}; + double covariances[][2][2] = {{{10, 2}, {2, 15}}, {{30, 8}, {8, 15}}}; + + double outliers_[][2] = {{600, 10}, {650, 11}, {610, 12}, {700, 16}, {690, 14}}; TDoubleVecVec outliers; - for (std::size_t i = 0u; i < boost::size(outliers_); ++i) - { - outliers.push_back(TDoubleVec(boost::begin(outliers_[i]), - boost::end(outliers_[i]))); + for (std::size_t i = 0u; i < boost::size(outliers_); ++i) { + outliers.push_back(TDoubleVec(boost::begin(outliers_[i]), boost::end(outliers_[i]))); } // We use the cluster moments to indirectly measure the purity @@ -433,69 +350,57 @@ void CXMeansOnlineTest::testClusteringWithOutliers() maths::CBasicStatistics::SSampleMean::TAccumulator meanMeanError; maths::CBasicStatistics::SSampleMean::TAccumulator meanCovError; - for (std::size_t t = 0u; t < 10; ++t) - { + for (std::size_t t = 0u; t < 10; ++t) { LOG_DEBUG("*** test " << t << " ***"); TDoubleVecVec samples; TPointVec centres; TCovariances2Vec expectedMoments(boost::size(means)); - for (std::size_t i = 0u; i < boost::size(means); ++i) - { + for (std::size_t i = 0u; i < boost::size(means); ++i) { TDoubleVec mean(&means[i][0], &means[i][2]); TDoubleVecVec covariance; - for (std::size_t j = 0u; j < 2; ++j) - { - covariance.push_back(TDoubleVec(&covariances[i][j][0], - &covariances[i][j][2])); + for (std::size_t j = 0u; j < 2; ++j) { + covariance.push_back(TDoubleVec(&covariances[i][j][0], &covariances[i][j][2])); } TDoubleVecVec samples_; rng.generateMultivariateNormalSamples(mean, covariance, 200, samples_); samples.insert(samples.end(), samples_.begin(), samples_.end()); - for (std::size_t j = 0u; j < samples_.size(); ++j) - { + for (std::size_t j = 0u; j < samples_.size(); ++j) { expectedMoments[i].add(TPoint(samples_[j])); } } - for (std::size_t i = 0u; i < outliers.size(); ++i) - { + for (std::size_t i = 0u; i < outliers.size(); ++i) { expectedMoments[1].add(TPoint(outliers[i])); } rng.random_shuffle(samples.begin(), samples.end()); - TXMeans2ForTest clusterer(maths_t::E_ContinuousData, - maths_t::E_ClustersFractionWeight, - 0.0, - 0.01); + TXMeans2ForTest clusterer(maths_t::E_ContinuousData, maths_t::E_ClustersFractionWeight, 0.0, 0.01); - for (std::size_t i = 0u; i < outliers.size(); ++i) - { + for (std::size_t i = 0u; i < outliers.size(); ++i) { clusterer.add(TPoint(outliers[i])); } - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { clusterer.add(TPoint(samples[i])); } - const TXMeans2ForTest::TClusterVec &clusters = clusterer.clusters(); + const TXMeans2ForTest::TClusterVec& clusters = clusterer.clusters(); LOG_DEBUG("# clusters = " << clusters.size()); CPPUNIT_ASSERT_EQUAL(std::size_t(2), clusters.size()); - for (std::size_t i = 0u; i < clusters.size(); ++i) - { + for (std::size_t i = 0u; i < clusters.size(); ++i) { LOG_DEBUG("moments = " << maths::CBasicStatistics::print(clusters[i].covariances())); maths::CBasicStatistics::COrderStatisticsStack meanError; maths::CBasicStatistics::COrderStatisticsStack covError; - for (std::size_t j = 0u; j < expectedMoments.size(); ++j) - { - meanError.add( ( maths::CBasicStatistics::mean(clusters[i].covariances()) - - maths::CBasicStatistics::mean(expectedMoments[j])).euclidean() - / maths::CBasicStatistics::mean(expectedMoments[j]).euclidean()); - covError.add( ( maths::CBasicStatistics::covariances(clusters[i].covariances()) - - maths::CBasicStatistics::covariances(expectedMoments[j])).frobenius() - / maths::CBasicStatistics::covariances(expectedMoments[j]).frobenius()); + for (std::size_t j = 0u; j < expectedMoments.size(); ++j) { + meanError.add((maths::CBasicStatistics::mean(clusters[i].covariances()) - maths::CBasicStatistics::mean(expectedMoments[j])) + .euclidean() / + maths::CBasicStatistics::mean(expectedMoments[j]).euclidean()); + covError.add((maths::CBasicStatistics::covariances(clusters[i].covariances()) - + maths::CBasicStatistics::covariances(expectedMoments[j])) + .frobenius() / + maths::CBasicStatistics::covariances(expectedMoments[j]).frobenius()); } LOG_DEBUG("meanError = " << meanError[0]); @@ -513,8 +418,7 @@ void CXMeansOnlineTest::testClusteringWithOutliers() CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanCovError) < 0.07); } -void CXMeansOnlineTest::testManyClusters() -{ +void CXMeansOnlineTest::testManyClusters() { LOG_DEBUG("+---------------------------------------+"); LOG_DEBUG("| CXMeansOnlineTest::testManyClusters |"); LOG_DEBUG("+---------------------------------------+"); @@ -529,13 +433,7 @@ void CXMeansOnlineTest::testManyClusters() // close on the order of the data's differential entropy given the // generating distribution. - const std::size_t sizes_[] = - { - 1800, 800, 1100, 400, 600, - 400, 600, 1300, 400, 900, - 500, 700, 400, 800, 1500, - 1200, 500, 300, 1200, 800 - }; + const std::size_t sizes_[] = {1800, 800, 1100, 400, 600, 400, 600, 1300, 400, 900, 500, 700, 400, 800, 1500, 1200, 500, 300, 1200, 800}; TSizeVec sizes(boost::begin(sizes_), boost::end(sizes_)); double Z = static_cast(std::accumulate(sizes.begin(), sizes.end(), 0)); @@ -547,22 +445,17 @@ void CXMeansOnlineTest::testManyClusters() TPointVecVec samples_; rng.generateRandomMultivariateNormals(sizes, means, covariances, samples_); TPointVec samples; - for (std::size_t i = 0u; i < samples_.size(); ++i) - { - for (std::size_t j = 0u; j < samples_[i].size(); ++j) - { + for (std::size_t i = 0u; i < samples_.size(); ++i) { + for (std::size_t j = 0u; j < samples_[i].size(); ++j) { samples.push_back(samples_[i][j]); - } } TDoubleVec lgenerating(samples.size()); TMeanAccumulator differentialEntropy; - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { lgenerating[i] = 0.0; - for (std::size_t j = 0u; j < means.size(); ++j) - { + for (std::size_t j = 0u; j < means.size(); ++j) { double lj; maths::gaussianLogLikelihood(covariances[j], samples[i] - means[j], lj); lgenerating[i] += static_cast(sizes[j]) * std::exp(lj); @@ -572,32 +465,27 @@ void CXMeansOnlineTest::testManyClusters() } LOG_DEBUG("differentialEntropy = " << maths::CBasicStatistics::mean(differentialEntropy)); - for (std::size_t t = 0u; t < 5; ++t) - { + for (std::size_t t = 0u; t < 5; ++t) { LOG_DEBUG("*** test " << t << " ***"); rng.random_shuffle(samples.begin(), samples.end()); - TXMeans2ForTest clusterer(maths_t::E_ContinuousData, - maths_t::E_ClustersFractionWeight); + TXMeans2ForTest clusterer(maths_t::E_ContinuousData, maths_t::E_ClustersFractionWeight); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { clusterer.add(samples[i]); } - const TXMeans2ForTest::TClusterVec &clusters = clusterer.clusters(); + const TXMeans2ForTest::TClusterVec& clusters = clusterer.clusters(); LOG_DEBUG("# clusters = " << clusters.size()); TMeanAccumulator loss; - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { double l = 0.0; - for (std::size_t j = 0u; j < clusters.size(); ++j) - { + for (std::size_t j = 0u; j < clusters.size(); ++j) { double n = maths::CBasicStatistics::count(clusters[j].covariances()); - const TPoint &mean = maths::CBasicStatistics::mean(clusters[j].covariances()); - const TMatrix &covariance = maths::CBasicStatistics::maximumLikelihoodCovariances(clusters[j].covariances()); + const TPoint& mean = maths::CBasicStatistics::mean(clusters[j].covariances()); + const TMatrix& covariance = maths::CBasicStatistics::maximumLikelihoodCovariances(clusters[j].covariances()); double lj; maths::gaussianLogLikelihood(covariance, samples[i] - mean, lj); l += n * std::exp(lj); @@ -610,8 +498,7 @@ void CXMeansOnlineTest::testManyClusters() } } -void CXMeansOnlineTest::testAdaption() -{ +void CXMeansOnlineTest::testAdaption() { LOG_DEBUG("+-----------------------------------+"); LOG_DEBUG("| CXMeansOnlineTest::testAdaption |"); LOG_DEBUG("+-----------------------------------+"); @@ -626,60 +513,35 @@ void CXMeansOnlineTest::testAdaption() test::CRandomNumbers rng; - double means_[][2] = - { - { 10, 15 }, - { 30, 10 }, - { 10, 15 }, - { 30, 10 } - }; - double covariances_[][2][2] = - { - { { 10, 2}, {2, 15} }, - { { 30, 8}, {8, 15} }, - { {100, 2}, {2, 15} }, - { {100, 2}, {2, 15} } - }; + double means_[][2] = {{10, 15}, {30, 10}, {10, 15}, {30, 10}}; + double covariances_[][2][2] = {{{10, 2}, {2, 15}}, {{30, 8}, {8, 15}}, {{100, 2}, {2, 15}}, {{100, 2}, {2, 15}}}; TDoubleVecVec means(boost::size(means_)); TDoubleVecVecVec covariances(boost::size(means_)); - for (std::size_t i = 0u; i < boost::size(means_); ++i) - { + for (std::size_t i = 0u; i < boost::size(means_); ++i) { means[i].assign(&means_[i][0], &means_[i][2]); - for (std::size_t j = 0u; j < 2; ++j) - { - covariances[i].push_back(TDoubleVec(&covariances_[i][j][0], - &covariances_[i][j][2])); + for (std::size_t j = 0u; j < 2; ++j) { + covariances[i].push_back(TDoubleVec(&covariances_[i][j][0], &covariances_[i][j][2])); } } - LOG_DEBUG("Clusters Split and Merge") - { - std::size_t n[][4] = - { - { 200, 0, 0, 0 }, - { 100, 100, 0, 0 }, - { 0, 0, 300, 300 } - }; + LOG_DEBUG("Clusters Split and Merge") { + std::size_t n[][4] = {{200, 0, 0, 0}, {100, 100, 0, 0}, {0, 0, 300, 300}}; TCovariances2 totalCovariances; TCovariances2 modeCovariances[4]; - TXMeans2ForTest clusterer(maths_t::E_ContinuousData, - maths_t::E_ClustersFractionWeight); + TXMeans2ForTest clusterer(maths_t::E_ContinuousData, maths_t::E_ClustersFractionWeight); maths::CBasicStatistics::SSampleMean::TAccumulator meanMeanError; maths::CBasicStatistics::SSampleMean::TAccumulator meanCovError; - for (std::size_t i = 0u; i < boost::size(n); ++i) - { + for (std::size_t i = 0u; i < boost::size(n); ++i) { TDoubleVecVec samples; - for (std::size_t j = 0u; j < boost::size(n[i]); ++j) - { + for (std::size_t j = 0u; j < boost::size(n[i]); ++j) { TDoubleVecVec samples_; rng.generateMultivariateNormalSamples(means[j], covariances[j], n[i][j], samples_); - for (std::size_t k = 0u; k < samples_.size(); ++k) - { + for (std::size_t k = 0u; k < samples_.size(); ++k) { modeCovariances[j].add(TPoint(samples_[k])); totalCovariances.add(TPoint(samples_[k])); } @@ -688,37 +550,35 @@ void CXMeansOnlineTest::testAdaption() rng.random_shuffle(samples.begin(), samples.end()); LOG_DEBUG("# samples = " << samples.size()); - for (std::size_t j = 0u; j < samples.size(); ++j) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { clusterer.add(TPoint(samples[j])); } - const TXMeans2ForTest::TClusterVec &clusters = clusterer.clusters(); + const TXMeans2ForTest::TClusterVec& clusters = clusterer.clusters(); LOG_DEBUG("# clusters = " << clusters.size()); - for (std::size_t j = 0u; j < clusters.size(); ++j) - { + for (std::size_t j = 0u; j < clusters.size(); ++j) { maths::CBasicStatistics::COrderStatisticsStack meanError; maths::CBasicStatistics::COrderStatisticsStack covError; - if (clusters.size() == 1) - { - meanError.add(( maths::CBasicStatistics::mean(clusters[j].covariances()) - - maths::CBasicStatistics::mean(totalCovariances)).euclidean()); - covError.add( ( maths::CBasicStatistics::covariances(clusters[j].covariances()) - - maths::CBasicStatistics::covariances(totalCovariances)).frobenius() - / maths::CBasicStatistics::covariances(totalCovariances).frobenius()); - } - else - { - for (std::size_t k = 0u; k < boost::size(modeCovariances); ++k) - { - meanError.add( ( maths::CBasicStatistics::mean(clusters[j].covariances()) - - maths::CBasicStatistics::mean(modeCovariances[k])).euclidean() - / maths::CBasicStatistics::mean(modeCovariances[k]).euclidean()); - covError.add( ( maths::CBasicStatistics::covariances(clusters[j].covariances()) - - maths::CBasicStatistics::covariances(modeCovariances[k])).frobenius() - / maths::CBasicStatistics::covariances(modeCovariances[k]).frobenius()); + if (clusters.size() == 1) { + meanError.add( + (maths::CBasicStatistics::mean(clusters[j].covariances()) - maths::CBasicStatistics::mean(totalCovariances)) + .euclidean()); + covError.add((maths::CBasicStatistics::covariances(clusters[j].covariances()) - + maths::CBasicStatistics::covariances(totalCovariances)) + .frobenius() / + maths::CBasicStatistics::covariances(totalCovariances).frobenius()); + } else { + for (std::size_t k = 0u; k < boost::size(modeCovariances); ++k) { + meanError.add( + (maths::CBasicStatistics::mean(clusters[j].covariances()) - maths::CBasicStatistics::mean(modeCovariances[k])) + .euclidean() / + maths::CBasicStatistics::mean(modeCovariances[k]).euclidean()); + covError.add((maths::CBasicStatistics::covariances(clusters[j].covariances()) - + maths::CBasicStatistics::covariances(modeCovariances[k])) + .frobenius() / + maths::CBasicStatistics::covariances(modeCovariances[k]).frobenius()); } } @@ -739,8 +599,7 @@ void CXMeansOnlineTest::testAdaption() } } -void CXMeansOnlineTest::testLargeHistory() -{ +void CXMeansOnlineTest::testLargeHistory() { LOG_DEBUG("+---------------------------------------+"); LOG_DEBUG("| CXMeansOnlineTest::testLargeHistory |"); LOG_DEBUG("+---------------------------------------+"); @@ -767,18 +626,15 @@ void CXMeansOnlineTest::testLargeHistory() rng.generateNormalSamples(15.0, 1.0, 200, samples2); TPointVec samples; - for (std::size_t i = 0u; i < samples1.size(); i += 2) - { - samples.push_back(TPoint(TDoubleVec(&samples1[i], &samples1[i+2]))); + for (std::size_t i = 0u; i < samples1.size(); i += 2) { + samples.push_back(TPoint(TDoubleVec(&samples1[i], &samples1[i + 2]))); } - for (std::size_t i = 0u; i < samples2.size(); i += 2) - { - samples.push_back(TPoint(TDoubleVec(&samples2[i], &samples2[i+2]))); + for (std::size_t i = 0u; i < samples2.size(); i += 2) { + samples.push_back(TPoint(TDoubleVec(&samples2[i], &samples2[i + 2]))); } rng.random_shuffle(samples.begin() + 5000, samples.end()); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { reference.add(samples[i]); clusterer.add(samples[i]); reference.propagateForwardsByTime(1.0); @@ -789,8 +645,7 @@ void CXMeansOnlineTest::testLargeHistory() CPPUNIT_ASSERT_EQUAL(std::size_t(2), clusterer.clusters().size()); } -void CXMeansOnlineTest::testLatLongData() -{ +void CXMeansOnlineTest::testLatLongData() { LOG_DEBUG("+--------------------------------------+"); LOG_DEBUG("| CXMeansOnlineTest::testLatLongData |"); LOG_DEBUG("+--------------------------------------+"); @@ -800,24 +655,18 @@ void CXMeansOnlineTest::testLatLongData() using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; TTimeDoubleVecPrVec timeseries; - CPPUNIT_ASSERT(test::CTimeSeriesTestData::parse("testfiles/lat_lng.csv", - timeseries, - test::CTimeSeriesTestData::CSV_UNIX_BIVALUED_REGEX)); + CPPUNIT_ASSERT( + test::CTimeSeriesTestData::parse("testfiles/lat_lng.csv", timeseries, test::CTimeSeriesTestData::CSV_UNIX_BIVALUED_REGEX)); CPPUNIT_ASSERT(!timeseries.empty()); - LOG_DEBUG("timeseries = " << core::CContainerPrinter::print(timeseries.begin(), - timeseries.begin() + 10) - << " ..."); + LOG_DEBUG("timeseries = " << core::CContainerPrinter::print(timeseries.begin(), timeseries.begin() + 10) << " ..."); std::size_t n = timeseries.size(); TCovariances2 reference; - TXMeans2FloatForTest clusterer(maths_t::E_ContinuousData, - maths_t::E_ClustersFractionWeight, - 0.0005); + TXMeans2FloatForTest clusterer(maths_t::E_ContinuousData, maths_t::E_ClustersFractionWeight, 0.0005); - for (std::size_t i = 0u; i < n; ++i) - { + for (std::size_t i = 0u; i < n; ++i) { TPoint x(timeseries[i].second); reference.add(x); clusterer.add(x); @@ -827,8 +676,7 @@ void CXMeansOnlineTest::testLatLongData() TMeanAccumulator LLR; TMeanAccumulator LLC; - for (std::size_t i = 0u; i < n; ++i) - { + for (std::size_t i = 0u; i < n; ++i) { TPoint x(timeseries[i].second); { @@ -841,17 +689,16 @@ void CXMeansOnlineTest::testLatLongData() { double ll = 0.0; - double Z = 0.0; - const TXMeans2FloatForTest::TClusterVec &clusters = clusterer.clusters(); - for (std::size_t j = 0u; j < clusters.size(); ++j) - { + double Z = 0.0; + const TXMeans2FloatForTest::TClusterVec& clusters = clusterer.clusters(); + for (std::size_t j = 0u; j < clusters.size(); ++j) { double w = maths::CBasicStatistics::count(clusters[j].covariances()); TPoint mean = maths::CBasicStatistics::mean(clusters[j].covariances()); TMatrix covariance = maths::CBasicStatistics::covariances(clusters[j].covariances()); double llj; maths::gaussianLogLikelihood(covariance, x - mean, llj); ll += w * std::exp(llj); - Z += w; + Z += w; } ll /= Z; LLC.add(std::log(ll)); @@ -863,8 +710,7 @@ void CXMeansOnlineTest::testLatLongData() CPPUNIT_ASSERT(maths::CBasicStatistics::mean(LLC) < 0.6 * maths::CBasicStatistics::mean(LLR)); } -void CXMeansOnlineTest::testPersist() -{ +void CXMeansOnlineTest::testPersist() { LOG_DEBUG("+----------------------------------+"); LOG_DEBUG("| CXMeansOnlineTest::testPersist |"); LOG_DEBUG("+----------------------------------+"); @@ -873,30 +719,17 @@ void CXMeansOnlineTest::testPersist() test::CRandomNumbers rng; - double means[][2] = - { - { 10, 15 }, - { 40, 10 }, - { 12, 35 } - }; - double covariances[][2][2] = - { - { {10, 2}, { 2, 15} }, - { {30, 8}, { 8, 15} }, - { {20, -11}, {-11, 25} } - }; + double means[][2] = {{10, 15}, {40, 10}, {12, 35}}; + double covariances[][2][2] = {{{10, 2}, {2, 15}}, {{30, 8}, {8, 15}}, {{20, -11}, {-11, 25}}}; TDoubleVecVec samples; TPointVec centres; - for (std::size_t i = 0u; i < boost::size(means); ++i) - { + for (std::size_t i = 0u; i < boost::size(means); ++i) { TDoubleVec mean(&means[i][0], &means[i][2]); TDoubleVecVec covariance; - for (std::size_t j = 0u; j < 2; ++j) - { - covariance.push_back(TDoubleVec(&covariances[i][j][0], - &covariances[i][j][2])); + for (std::size_t j = 0u; j < 2; ++j) { + covariance.push_back(TDoubleVec(&covariances[i][j][0], &covariances[i][j][2])); } TDoubleVecVec samples_; rng.generateMultivariateNormalSamples(mean, covariance, 200, samples_); @@ -904,10 +737,8 @@ void CXMeansOnlineTest::testPersist() } rng.random_shuffle(samples.begin(), samples.end()); - TXMeans2ForTest clusterer(maths_t::E_ContinuousData, - maths_t::E_ClustersFractionWeight); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + TXMeans2ForTest clusterer(maths_t::E_ContinuousData, maths_t::E_ClustersFractionWeight); + for (std::size_t i = 0u; i < samples.size(); ++i) { clusterer.add(TPoint(samples[i])); } @@ -921,7 +752,8 @@ void CXMeansOnlineTest::testPersist() LOG_DEBUG("Clusterer XML representation:\n" << origXml); // Restore the XML into a new clusterer. - maths::SDistributionRestoreParams params(maths_t::E_ContinuousData, 0.15, + maths::SDistributionRestoreParams params(maths_t::E_ContinuousData, + 0.15, maths::MINIMUM_CLUSTER_SPLIT_FRACTION, maths::MINIMUM_CLUSTER_SPLIT_COUNT, maths::MINIMUM_CATEGORY_COUNT); @@ -939,34 +771,22 @@ void CXMeansOnlineTest::testPersist() CPPUNIT_ASSERT_EQUAL(origXml, newXml); } -CppUnit::Test *CXMeansOnlineTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CXMeansOnlineTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXMeansOnlineTest::testCluster", - &CXMeansOnlineTest::testCluster) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXMeansOnlineTest::testClusteringVanilla", - &CXMeansOnlineTest::testClusteringVanilla) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXMeansOnlineTest::testClusteringWithOutliers", - &CXMeansOnlineTest::testClusteringWithOutliers) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXMeansOnlineTest::testManyClusters", - &CXMeansOnlineTest::testManyClusters) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXMeansOnlineTest::testAdaption", - &CXMeansOnlineTest::testAdaption) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXMeansOnlineTest::testLargeHistory", - &CXMeansOnlineTest::testLargeHistory) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXMeansOnlineTest::testLatLongData", - &CXMeansOnlineTest::testLatLongData) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXMeansOnlineTest::testPersist", - &CXMeansOnlineTest::testPersist) ); +CppUnit::Test* CXMeansOnlineTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CXMeansOnlineTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CXMeansOnlineTest::testCluster", &CXMeansOnlineTest::testCluster)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CXMeansOnlineTest::testClusteringVanilla", &CXMeansOnlineTest::testClusteringVanilla)); + suiteOfTests->addTest(new CppUnit::TestCaller("CXMeansOnlineTest::testClusteringWithOutliers", + &CXMeansOnlineTest::testClusteringWithOutliers)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CXMeansOnlineTest::testManyClusters", &CXMeansOnlineTest::testManyClusters)); + suiteOfTests->addTest(new CppUnit::TestCaller("CXMeansOnlineTest::testAdaption", &CXMeansOnlineTest::testAdaption)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CXMeansOnlineTest::testLargeHistory", &CXMeansOnlineTest::testLargeHistory)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CXMeansOnlineTest::testLatLongData", &CXMeansOnlineTest::testLatLongData)); + suiteOfTests->addTest(new CppUnit::TestCaller("CXMeansOnlineTest::testPersist", &CXMeansOnlineTest::testPersist)); return suiteOfTests; } diff --git a/lib/maths/unittest/CXMeansOnlineTest.h b/lib/maths/unittest/CXMeansOnlineTest.h index 3311a26f22..34674c7ece 100644 --- a/lib/maths/unittest/CXMeansOnlineTest.h +++ b/lib/maths/unittest/CXMeansOnlineTest.h @@ -9,19 +9,18 @@ #include -class CXMeansOnlineTest : public CppUnit::TestFixture -{ - public: - void testCluster(); - void testClusteringVanilla(); - void testClusteringWithOutliers(); - void testManyClusters(); - void testAdaption(); - void testLargeHistory(); - void testLatLongData(); - void testPersist(); +class CXMeansOnlineTest : public CppUnit::TestFixture { +public: + void testCluster(); + void testClusteringVanilla(); + void testClusteringWithOutliers(); + void testManyClusters(); + void testAdaption(); + void testLargeHistory(); + void testLatLongData(); + void testPersist(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CXMeansOnlineTest_h diff --git a/lib/maths/unittest/CXMeansTest.cc b/lib/maths/unittest/CXMeansTest.cc index 61f039c749..de92602882 100644 --- a/lib/maths/unittest/CXMeansTest.cc +++ b/lib/maths/unittest/CXMeansTest.cc @@ -23,8 +23,7 @@ using namespace ml; -namespace -{ +namespace { using TDoubleVec = std::vector; using TSizeVec = std::vector; @@ -46,103 +45,72 @@ using TMatrix4 = maths::CSymmetricMatrixNxN; using TMatrix4Vec = std::vector; //! \brief Expose internals of x-means for testing. -template> -class CXMeansForTest : public maths::CXMeans -{ - public: - using TUInt64USet = typename maths::CXMeans::TUInt64USet; - - public: - CXMeansForTest(std::size_t kmax) : - maths::CXMeans(kmax) - {} - - void improveParams(std::size_t kmeansIterations) - { - this->maths::CXMeans::improveParams(kmeansIterations); - } +template> +class CXMeansForTest : public maths::CXMeans { +public: + using TUInt64USet = typename maths::CXMeans::TUInt64USet; - bool improveStructure(std::size_t clusterSeeds, - std::size_t kmeansIterations) - { - return this->maths::CXMeans::improveStructure(clusterSeeds, kmeansIterations); - } +public: + CXMeansForTest(std::size_t kmax) : maths::CXMeans(kmax) {} - const TUInt64USet &inactive() const - { - return this->maths::CXMeans::inactive(); - } + void improveParams(std::size_t kmeansIterations) { this->maths::CXMeans::improveParams(kmeansIterations); } + + bool improveStructure(std::size_t clusterSeeds, std::size_t kmeansIterations) { + return this->maths::CXMeans::improveStructure(clusterSeeds, kmeansIterations); + } + + const TUInt64USet& inactive() const { return this->maths::CXMeans::inactive(); } }; template -double logfSphericalGaussian(const POINT &mean, - double variance, - const POINT &x) -{ +double logfSphericalGaussian(const POINT& mean, double variance, const POINT& x) { double d = static_cast(x.dimension()); double r = (x - mean).euclidean(); - return -0.5 * d * std::log(boost::math::double_constants::two_pi * variance) - -0.5 * r * r / variance; + return -0.5 * d * std::log(boost::math::double_constants::two_pi * variance) - 0.5 * r * r / variance; } -class CEmpiricalKullbackLeibler -{ - public: - double value() const - { - return maths::CBasicStatistics::mean(m_Divergence) - - std::log(maths::CBasicStatistics::count(m_Divergence)); - } +class CEmpiricalKullbackLeibler { +public: + double value() const { return maths::CBasicStatistics::mean(m_Divergence) - std::log(maths::CBasicStatistics::count(m_Divergence)); } - template - void add(const std::vector &points) - { - typename maths::CBasicStatistics::SSampleMeanVar::TAccumulator moments; - moments.add(points); - POINT mean = maths::CBasicStatistics::mean(moments); - POINT variances = maths::CBasicStatistics::variance(moments); - - maths::CBasicStatistics::SSampleMean::TAccumulator variance_; - for (std::size_t i = 0u; i < variances.dimension(); ++i) - { - variance_.add(variances(i)); - } - double variance = maths::CBasicStatistics::mean(variance_); - for (std::size_t i = 0u; i < points.size(); ++i) - { - m_Divergence.add(-logfSphericalGaussian(mean, variance, points[i])); - } + template + void add(const std::vector& points) { + typename maths::CBasicStatistics::SSampleMeanVar::TAccumulator moments; + moments.add(points); + POINT mean = maths::CBasicStatistics::mean(moments); + POINT variances = maths::CBasicStatistics::variance(moments); + + maths::CBasicStatistics::SSampleMean::TAccumulator variance_; + for (std::size_t i = 0u; i < variances.dimension(); ++i) { + variance_.add(variances(i)); + } + double variance = maths::CBasicStatistics::mean(variance_); + for (std::size_t i = 0u; i < points.size(); ++i) { + m_Divergence.add(-logfSphericalGaussian(mean, variance, points[i])); } + } - private: - maths::CBasicStatistics::SSampleMean::TAccumulator m_Divergence; +private: + maths::CBasicStatistics::SSampleMean::TAccumulator m_Divergence; }; -void computePurities(const TSizeVecVec &clusters, - TDoubleVec &purities) -{ +void computePurities(const TSizeVecVec& clusters, TDoubleVec& purities) { purities.clear(); purities.resize(clusters.size()); TSizeVec counts; - for (std::size_t i = 0u; i < clusters.size(); ++i) - { + for (std::size_t i = 0u; i < clusters.size(); ++i) { counts.clear(); - for (std::size_t j = 0u; j < clusters[i].size(); ++j) - { + for (std::size_t j = 0u; j < clusters[i].size(); ++j) { counts.resize(std::max(counts.size(), clusters[i][j] + 1)); ++counts[clusters[i][j]]; } - purities[i] = static_cast(*std::max_element(counts.begin(), counts.end())) - / static_cast(clusters[i].size()); + purities[i] = static_cast(*std::max_element(counts.begin(), counts.end())) / static_cast(clusters[i].size()); } } - } -void CXMeansTest::testCluster() -{ +void CXMeansTest::testCluster() { LOG_DEBUG("+----------------------------+"); LOG_DEBUG("| CXMeansTest::testCluster |"); LOG_DEBUG("+----------------------------+"); @@ -158,8 +126,7 @@ void CXMeansTest::testCluster() TDoubleVec samples; rng.generateUniformSamples(-100.0, 400.0, 800, samples); - for (std::size_t t = 0u; t < 100; ++t) - { + for (std::size_t t = 0u; t < 100; ++t) { LOG_DEBUG("Test " << t); { maths::CXMeans::CCluster cluster1; @@ -169,8 +136,7 @@ void CXMeansTest::testCluster() CPPUNIT_ASSERT_EQUAL(std::size_t(0), cluster2.size()); TVector2Vec points; - for (std::size_t i = 0u; i < samples.size(); i += 2) - { + for (std::size_t i = 0u; i < samples.size(); i += 2) { points.push_back(TVector2(&samples[i], &samples[i + 2])); } TVector2Vec pointsCopy(points); @@ -193,8 +159,7 @@ void CXMeansTest::testCluster() CPPUNIT_ASSERT_EQUAL(std::size_t(0), cluster2.size()); TVector4Vec points; - for (std::size_t i = 0u; i < samples.size(); i += 4) - { + for (std::size_t i = 0u; i < samples.size(); i += 4) { points.push_back(TVector4(&samples[i], &samples[i + 4])); } TVector4Vec pointsCopy(points); @@ -212,8 +177,7 @@ void CXMeansTest::testCluster() } } -void CXMeansTest::testImproveStructure() -{ +void CXMeansTest::testImproveStructure() { LOG_DEBUG("+-------------------------------------+"); LOG_DEBUG("| CXMeansTest::testImproveStructure |"); LOG_DEBUG("+-------------------------------------+"); @@ -224,18 +188,16 @@ void CXMeansTest::testImproveStructure() maths::CSampling::seed(); - double means[][2] = { { 10.0, 20.0 }, { 50.0, 30.0 } }; - double covariances[][3] = { { 10.0, -3.0, 15.0 }, { 20.0, 2.0, 5.0 } }; + double means[][2] = {{10.0, 20.0}, {50.0, 30.0}}; + double covariances[][3] = {{10.0, -3.0, 15.0}, {20.0, 2.0, 5.0}}; TMeanAccumulator meanError; - for (std::size_t t = 0u; t < 10; ++t) - { + for (std::size_t t = 0u; t < 10; ++t) { LOG_DEBUG("Test " << t); TVector2Vec points; - for (std::size_t i = 0u; i < 2; ++i) - { + for (std::size_t i = 0u; i < 2; ++i) { TVector2 mean(&means[i][0], &means[i][2]); TMatrix2 covariance(&covariances[i][0], &covariances[i][3]); TVector2Vec cluster; @@ -249,8 +211,7 @@ void CXMeansTest::testImproveStructure() TVector2Vec clusters; TUInt64Vec oldChecksums; - for (std::size_t i = 0u; i < xmeans.clusters().size(); ++i) - { + for (std::size_t i = 0u; i < xmeans.clusters().size(); ++i) { clusters.push_back(xmeans.clusters()[i].centre()); oldChecksums.push_back(xmeans.clusters()[i].checksum()); } @@ -258,8 +219,7 @@ void CXMeansTest::testImproveStructure() std::sort(oldChecksums.begin(), oldChecksums.end()); LOG_DEBUG("centres = " << core::CContainerPrinter::print(clusters)); - for (std::size_t i = 0u; i < clusters.size(); ++i) - { + for (std::size_t i = 0u; i < clusters.size(); ++i) { TVector2 mean(&means[i][0], &means[i][2]); double error = (clusters[i] - mean).euclidean(); CPPUNIT_ASSERT(error < 0.75); @@ -270,20 +230,17 @@ void CXMeansTest::testImproveStructure() // as inactive. xmeans.improveStructure(2, 5); TUInt64Vec newChecksums; - for (std::size_t i = 0u; i < xmeans.clusters().size(); ++i) - { + for (std::size_t i = 0u; i < xmeans.clusters().size(); ++i) { clusters.push_back(xmeans.clusters()[i].centre()); newChecksums.push_back(xmeans.clusters()[i].checksum()); } std::sort(newChecksums.begin(), newChecksums.end()); TUInt64Vec inactive; - std::set_intersection(oldChecksums.begin(), oldChecksums.end(), - newChecksums.begin(), newChecksums.end(), - std::back_inserter(inactive)); + std::set_intersection( + oldChecksums.begin(), oldChecksums.end(), newChecksums.begin(), newChecksums.end(), std::back_inserter(inactive)); LOG_DEBUG("inactive = " << core::CContainerPrinter::print(inactive)); - for (std::size_t i = 0u; i < inactive.size(); ++i) - { + for (std::size_t i = 0u; i < inactive.size(); ++i) { CPPUNIT_ASSERT(xmeans.inactive().count(inactive[i]) > 0); } } @@ -292,8 +249,7 @@ void CXMeansTest::testImproveStructure() CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError) < 0.25); } -void CXMeansTest::testImproveParams() -{ +void CXMeansTest::testImproveParams() { LOG_DEBUG("+----------------------------------+"); LOG_DEBUG("| CXMeansTest::testImproveParams |"); LOG_DEBUG("+----------------------------------+"); @@ -303,16 +259,14 @@ void CXMeansTest::testImproveParams() maths::CSampling::seed(); - double means[][2] = { { 10.0, 20.0 }, { 30.0, 30.0 } }; - double covariances[][3] = { { 10.0, -3.0, 15.0 }, { 20.0, 2.0, 5.0 } }; + double means[][2] = {{10.0, 20.0}, {30.0, 30.0}}; + double covariances[][3] = {{10.0, -3.0, 15.0}, {20.0, 2.0, 5.0}}; - for (std::size_t t = 0u; t < 10; ++t) - { + for (std::size_t t = 0u; t < 10; ++t) { LOG_DEBUG("Test " << t); TVector2Vec points; - for (std::size_t i = 0u; i < 2; ++i) - { + for (std::size_t i = 0u; i < 2; ++i) { TVector2 mean(&means[i][0], &means[i][2]); TMatrix2 covariance(&covariances[i][0], &covariances[i][3]); TVector2Vec cluster; @@ -328,8 +282,7 @@ void CXMeansTest::testImproveParams() xmeans.improveStructure(2, 1); TVector2Vec seedCentres; - for (std::size_t i = 0u; i < xmeans.clusters().size(); ++i) - { + for (std::size_t i = 0u; i < xmeans.clusters().size(); ++i) { seedCentres.push_back(xmeans.clusters()[i].centre()); } std::sort(seedCentres.begin(), seedCentres.end()); @@ -344,21 +297,18 @@ void CXMeansTest::testImproveParams() std::sort(expectedCentres.begin(), expectedCentres.end()); TVector2Vec centres; - for (std::size_t i = 0u; i < xmeans.clusters().size(); ++i) - { + for (std::size_t i = 0u; i < xmeans.clusters().size(); ++i) { centres.push_back(xmeans.clusters()[i].centre()); } std::sort(centres.begin(), centres.end()); LOG_DEBUG("expected centres = " << core::CContainerPrinter::print(expectedCentres)); LOG_DEBUG("centres = " << core::CContainerPrinter::print(centres)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedCentres), - core::CContainerPrinter::print(centres)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedCentres), core::CContainerPrinter::print(centres)); } } -void CXMeansTest::testOneCluster() -{ +void CXMeansTest::testOneCluster() { LOG_DEBUG("+-------------------------------+"); LOG_DEBUG("| CXMeansTest::testOneCluster |"); LOG_DEBUG("+-------------------------------+"); @@ -375,13 +325,11 @@ void CXMeansTest::testOneCluster() TMeanAccumulator meanNumberClusters; - TVector2Vec means; TMatrix2Vec covariances; TVector2VecVec points; - for (std::size_t t = 0; t < 50; ++t) - { + for (std::size_t t = 0; t < 50; ++t) { LOG_DEBUG("*** test = " << t << " ***"); TSizeVec sizes(1, size); @@ -397,8 +345,7 @@ void CXMeansTest::testOneCluster() xmeans.run(3, 3, 5); CEmpiricalKullbackLeibler klc; - for (std::size_t i = 0u; i < xmeans.clusters().size(); ++i) - { + for (std::size_t i = 0u; i < xmeans.clusters().size(); ++i) { klc.add(xmeans.clusters()[i].points()); } @@ -407,19 +354,16 @@ void CXMeansTest::testOneCluster() LOG_DEBUG(" clusters empirical KL = " << klc.value()); meanNumberClusters.add(static_cast(xmeans.clusters().size())); - if (xmeans.clusters().size() > 1) - { + if (xmeans.clusters().size() > 1) { CPPUNIT_ASSERT(kl.value() - klc.value() > 0.7); } } - LOG_DEBUG("mean number clusters = " - << maths::CBasicStatistics::mean(meanNumberClusters)); + LOG_DEBUG("mean number clusters = " << maths::CBasicStatistics::mean(meanNumberClusters)); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanNumberClusters) < 1.15); } -void CXMeansTest::testFiveClusters() -{ +void CXMeansTest::testFiveClusters() { LOG_DEBUG("+---------------------------------+"); LOG_DEBUG("| CXMeansTest::testFiveClusters |"); LOG_DEBUG("+---------------------------------+"); @@ -428,7 +372,7 @@ void CXMeansTest::testFiveClusters() maths::CSampling::seed(); - const std::size_t sizes_[] = { 500, 800, 100, 400, 600 }; + const std::size_t sizes_[] = {500, 800, 100, 400, 600}; TSizeVec sizes(boost::begin(sizes_), boost::end(sizes_)); test::CRandomNumbers rng; @@ -445,9 +389,8 @@ void CXMeansTest::testFiveClusters() //std::ofstream file; //file.open("results.m"); - for (std::size_t t = 0; t < 50; ++t) - { - LOG_DEBUG("*** test = " << t+1 << " ***"); + for (std::size_t t = 0; t < 50; ++t) { + LOG_DEBUG("*** test = " << t + 1 << " ***"); rng.generateRandomMultivariateNormals(sizes, means, covariances, points); @@ -456,12 +399,9 @@ void CXMeansTest::testFiveClusters() flatPoints.clear(); CEmpiricalKullbackLeibler kl; - for (std::size_t i = 0u; i < points.size(); ++i) - { + for (std::size_t i = 0u; i < points.size(); ++i) { kl.add(points[i]); - flatPoints.insert(flatPoints.end(), - points[i].begin(), - points[i].end()); + flatPoints.insert(flatPoints.end(), points[i].begin(), points[i].end()); std::sort(points[i].begin(), points[i].end()); } @@ -475,32 +415,23 @@ void CXMeansTest::testFiveClusters() TSizeVecVec trueClusters(xmeans.clusters().size()); std::size_t n = 0u; - for (std::size_t i = 0u; i < xmeans.clusters().size(); ++i) - { - const TVector2Vec &clusterPoints = xmeans.clusters()[i].points(); + for (std::size_t i = 0u; i < xmeans.clusters().size(); ++i) { + const TVector2Vec& clusterPoints = xmeans.clusters()[i].points(); klc.add(clusterPoints); n += clusterPoints.size(); //file << "y" << t+1 << i+1 << " = ["; - for (std::size_t j = 0u; j < clusterPoints.size(); ++j) - { + for (std::size_t j = 0u; j < clusterPoints.size(); ++j) { //file << clusterPoints[j](0) << "," << clusterPoints[j](1) << "\n"; std::size_t k = 0u; - for (/**/; k < points.size(); ++k) - { - for (TVector2VecCItr itr = std::lower_bound(points[k].begin(), - points[k].end(), - clusterPoints[j]), - end = std::upper_bound(points[k].begin(), - points[k].end(), - clusterPoints[j]); + for (/**/; k < points.size(); ++k) { + for (TVector2VecCItr itr = std::lower_bound(points[k].begin(), points[k].end(), clusterPoints[j]), + end = std::upper_bound(points[k].begin(), points[k].end(), clusterPoints[j]); itr != end; - ++itr) - { - if (clusterPoints[j] == *itr) - { + ++itr) { + if (clusterPoints[j] == *itr) { goto FoundPoint; } } @@ -509,7 +440,7 @@ void CXMeansTest::testFiveClusters() LOG_ERROR("Didn't find " << clusterPoints[j]); CPPUNIT_ASSERT(false); - FoundPoint: + FoundPoint: trueClusters[i].push_back(k); } //file << "];\n"; @@ -522,11 +453,9 @@ void CXMeansTest::testFiveClusters() double minPurity = 1.0; TMeanAccumulator totalPurity; - for (std::size_t i = 0u; i < purities.size(); ++i) - { + for (std::size_t i = 0u; i < purities.size(); ++i) { minPurity = std::min(minPurity, purities[i]); - totalPurity.add(purities[i], - static_cast(xmeans.clusters()[i].size())); + totalPurity.add(purities[i], static_cast(xmeans.clusters()[i].size())); } LOG_DEBUG(" centres = " << core::CContainerPrinter::print(xmeans.centres())); @@ -535,7 +464,7 @@ void CXMeansTest::testFiveClusters() LOG_DEBUG(" clusters empirical KL = " << klc.value()); LOG_DEBUG(" minPurity = " << minPurity); LOG_DEBUG(" totalPurity = " << maths::CBasicStatistics::mean(totalPurity)); - CPPUNIT_ASSERT(minPurity > 0.39); + CPPUNIT_ASSERT(minPurity > 0.39); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(totalPurity) > 0.54); meanNumberClusters.add(static_cast(xmeans.clusters().size())); @@ -543,10 +472,8 @@ void CXMeansTest::testFiveClusters() meanTotalPurity.add(maths::CBasicStatistics::mean(totalPurity)); } - LOG_DEBUG("mean number clusters = " - << maths::CBasicStatistics::mean(meanNumberClusters)); - LOG_DEBUG("sd number clusters = " - << std::sqrt(maths::CBasicStatistics::variance(meanNumberClusters))); + LOG_DEBUG("mean number clusters = " << maths::CBasicStatistics::mean(meanNumberClusters)); + LOG_DEBUG("sd number clusters = " << std::sqrt(maths::CBasicStatistics::variance(meanNumberClusters))); LOG_DEBUG("KL gain = " << maths::CBasicStatistics::mean(klgain)); LOG_DEBUG("mean total purity = " << maths::CBasicStatistics::mean(meanTotalPurity)); @@ -556,8 +483,7 @@ void CXMeansTest::testFiveClusters() CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanTotalPurity) > 0.93); } -void CXMeansTest::testTwentyClusters() -{ +void CXMeansTest::testTwentyClusters() { LOG_DEBUG("+-----------------------------------+"); LOG_DEBUG("| CXMeansTest::testTwentyClusters |"); LOG_DEBUG("+-----------------------------------+"); @@ -566,13 +492,7 @@ void CXMeansTest::testTwentyClusters() maths::CSampling::seed(); - const std::size_t sizes_[] = - { - 1800, 800, 1100, 400, 600, - 400, 600, 1300, 400, 900, - 500, 700, 400, 800, 1500, - 1200, 500, 300, 1200, 800 - }; + const std::size_t sizes_[] = {1800, 800, 1100, 400, 600, 400, 600, 1300, 400, 900, 500, 700, 400, 800, 1500, 1200, 500, 300, 1200, 800}; TSizeVec sizes(boost::begin(sizes_), boost::end(sizes_)); test::CRandomNumbers rng; @@ -586,12 +506,9 @@ void CXMeansTest::testTwentyClusters() CEmpiricalKullbackLeibler kl; TVector2Vec flatPoints; - for (std::size_t i = 0u; i < points.size(); ++i) - { + for (std::size_t i = 0u; i < points.size(); ++i) { kl.add(points[i]); - flatPoints.insert(flatPoints.end(), - points[i].begin(), - points[i].end()); + flatPoints.insert(flatPoints.end(), points[i].begin(), points[i].end()); std::sort(points[i].begin(), points[i].end()); } @@ -610,32 +527,23 @@ void CXMeansTest::testTwentyClusters() TSizeVecVec trueClusters(xmeans.clusters().size()); std::size_t n = 0u; - for (std::size_t i = 0u; i < xmeans.clusters().size(); ++i) - { - const TVector2Vec &clusterPoints = xmeans.clusters()[i].points(); + for (std::size_t i = 0u; i < xmeans.clusters().size(); ++i) { + const TVector2Vec& clusterPoints = xmeans.clusters()[i].points(); klc.add(clusterPoints); n += clusterPoints.size(); //file << "y" << i+1 << " = ["; - for (std::size_t j = 0u; j < clusterPoints.size(); ++j) - { + for (std::size_t j = 0u; j < clusterPoints.size(); ++j) { //file << clusterPoints[j](0) << "," << clusterPoints[j](1) << "\n"; std::size_t k = 0u; - for (/**/; k < points.size(); ++k) - { - for (TVector2VecCItr itr = std::lower_bound(points[k].begin(), - points[k].end(), - clusterPoints[j]), - end = std::upper_bound(points[k].begin(), - points[k].end(), - clusterPoints[j]); + for (/**/; k < points.size(); ++k) { + for (TVector2VecCItr itr = std::lower_bound(points[k].begin(), points[k].end(), clusterPoints[j]), + end = std::upper_bound(points[k].begin(), points[k].end(), clusterPoints[j]); itr != end; - ++itr) - { - if (clusterPoints[j] == *itr) - { + ++itr) { + if (clusterPoints[j] == *itr) { goto FoundPoint; } } @@ -644,7 +552,7 @@ void CXMeansTest::testTwentyClusters() LOG_ERROR("Didn't find " << clusterPoints[j]); CPPUNIT_ASSERT(false); - FoundPoint: + FoundPoint: trueClusters[i].push_back(k); } //file << "];\n"; @@ -657,11 +565,9 @@ void CXMeansTest::testTwentyClusters() double minPurity = 1.0; TMeanAccumulator totalPurity; - for (std::size_t i = 0u; i < purities.size(); ++i) - { + for (std::size_t i = 0u; i < purities.size(); ++i) { minPurity = std::min(minPurity, purities[i]); - totalPurity.add(purities[i], - static_cast(xmeans.clusters()[i].size())); + totalPurity.add(purities[i], static_cast(xmeans.clusters()[i].size())); } LOG_DEBUG("purities = " << core::CContainerPrinter::print(purities)); @@ -676,8 +582,7 @@ void CXMeansTest::testTwentyClusters() CPPUNIT_ASSERT(maths::CBasicStatistics::mean(totalPurity) > 0.8); } -void CXMeansTest::testPoorlyConditioned() -{ +void CXMeansTest::testPoorlyConditioned() { LOG_DEBUG("+--------------------------------------+"); LOG_DEBUG("| CXMeansTest::testPoorlyConditioned |"); LOG_DEBUG("+--------------------------------------+"); @@ -686,71 +591,34 @@ void CXMeansTest::testPoorlyConditioned() maths::CSampling::seed(); - double points_[][2] = - { - { 0.0, 0.0 }, - { 1.0, 0.5 }, - { 2.0, 1.0 }, - { 3.0, 1.5 }, - { 4.0, 2.0 }, - { 5.0, 2.5 }, - { 6.0, 3.0 }, - { 7.0, 3.5 }, - { 8.0, 4.0 }, - { 9.0, 4.5 }, - { 101.0, 21.9 }, - { 102.0, 21.2 }, - { 101.5, 22.0 }, - { 104.0, 23.0 }, - { 102.6, 21.4 }, - { 101.3, 22.0 }, - { 101.2, 21.0 }, - { 101.1, 22.1 }, - { 101.7, 23.0 }, - { 101.0, 24.0 }, - { 50.0, 50.0 }, - { 51.0, 51.0 }, - { 50.0, 51.0 }, - { 54.0, 53.0 }, - { 52.0, 51.0 }, - { 51.0, 52.0 }, - { 51.0, 52.0 }, - { 53.0, 53.0 }, - { 53.0, 52.0 }, - { 52.0, 54.0 }, - { 52.0, 52.0 }, - { 52.0, 52.0 }, - { 53.0, 52.0 }, - { 51.0, 52.0 } - }; + double points_[][2] = {{0.0, 0.0}, {1.0, 0.5}, {2.0, 1.0}, {3.0, 1.5}, {4.0, 2.0}, {5.0, 2.5}, {6.0, 3.0}, + {7.0, 3.5}, {8.0, 4.0}, {9.0, 4.5}, {101.0, 21.9}, {102.0, 21.2}, {101.5, 22.0}, {104.0, 23.0}, + {102.6, 21.4}, {101.3, 22.0}, {101.2, 21.0}, {101.1, 22.1}, {101.7, 23.0}, {101.0, 24.0}, {50.0, 50.0}, + {51.0, 51.0}, {50.0, 51.0}, {54.0, 53.0}, {52.0, 51.0}, {51.0, 52.0}, {51.0, 52.0}, {53.0, 53.0}, + {53.0, 52.0}, {52.0, 54.0}, {52.0, 52.0}, {52.0, 52.0}, {53.0, 52.0}, {51.0, 52.0}}; TVector2Vec cluster1; - for (std::size_t i = 0u; i < 10; ++i) - { + for (std::size_t i = 0u; i < 10; ++i) { cluster1.push_back(TVector2(&points_[i][0], &points_[i][2])); } std::sort(cluster1.begin(), cluster1.end()); TVector2Vec cluster2; - for (std::size_t i = 10u; i < 20; ++i) - { + for (std::size_t i = 10u; i < 20; ++i) { cluster2.push_back(TVector2(&points_[i][0], &points_[i][2])); } std::sort(cluster2.begin(), cluster2.end()); TVector2Vec cluster3; - for (std::size_t i = 20u; i < boost::size(points_); ++i) - { + for (std::size_t i = 20u; i < boost::size(points_); ++i) { cluster3.push_back(TVector2(&points_[i][0], &points_[i][2])); } std::sort(cluster3.begin(), cluster3.end()); maths::CXMeans> xmeans(5); - for (std::size_t t = 0u; t < 10; ++t) - { + for (std::size_t t = 0u; t < 10; ++t) { LOG_DEBUG("*** test = " << t << " ***"); TVector2Vec points; - for (std::size_t i = 0u; i < boost::size(points_); ++i) - { + for (std::size_t i = 0u; i < boost::size(points_); ++i) { points.push_back(TVector2(&points_[i][0], &points_[i][2])); } @@ -758,43 +626,25 @@ void CXMeansTest::testPoorlyConditioned() xmeans.run(4, 4, 5); LOG_DEBUG("# clusters = " << xmeans.clusters().size()); - for (std::size_t i = 0u; i < xmeans.clusters().size(); ++i) - { + for (std::size_t i = 0u; i < xmeans.clusters().size(); ++i) { TVector2Vec clusterPoints = xmeans.clusters()[i].points(); std::sort(clusterPoints.begin(), clusterPoints.end()); LOG_DEBUG("points = " << core::CContainerPrinter::print(clusterPoints)); - CPPUNIT_ASSERT( clusterPoints == cluster1 - || clusterPoints == cluster2 - || clusterPoints == cluster3); + CPPUNIT_ASSERT(clusterPoints == cluster1 || clusterPoints == cluster2 || clusterPoints == cluster3); } } } -CppUnit::Test *CXMeansTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CXMeansTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXMeansTest::testCluster", - &CXMeansTest::testCluster) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXMeansTest::testImproveStructure", - &CXMeansTest::testImproveStructure) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXMeansTest::testImproveParams", - &CXMeansTest::testImproveParams) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXMeansTest::testOneCluster", - &CXMeansTest::testOneCluster) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXMeansTest::testFiveClusters", - &CXMeansTest::testFiveClusters) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXMeansTest::testTwentyClusters", - &CXMeansTest::testTwentyClusters) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CXMeansTest::testPoorlyConditioned", - &CXMeansTest::testPoorlyConditioned) ); +CppUnit::Test* CXMeansTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CXMeansTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CXMeansTest::testCluster", &CXMeansTest::testCluster)); + suiteOfTests->addTest(new CppUnit::TestCaller("CXMeansTest::testImproveStructure", &CXMeansTest::testImproveStructure)); + suiteOfTests->addTest(new CppUnit::TestCaller("CXMeansTest::testImproveParams", &CXMeansTest::testImproveParams)); + suiteOfTests->addTest(new CppUnit::TestCaller("CXMeansTest::testOneCluster", &CXMeansTest::testOneCluster)); + suiteOfTests->addTest(new CppUnit::TestCaller("CXMeansTest::testFiveClusters", &CXMeansTest::testFiveClusters)); + suiteOfTests->addTest(new CppUnit::TestCaller("CXMeansTest::testTwentyClusters", &CXMeansTest::testTwentyClusters)); + suiteOfTests->addTest(new CppUnit::TestCaller("CXMeansTest::testPoorlyConditioned", &CXMeansTest::testPoorlyConditioned)); return suiteOfTests; } diff --git a/lib/maths/unittest/CXMeansTest.h b/lib/maths/unittest/CXMeansTest.h index b8d115241f..2b8947165a 100644 --- a/lib/maths/unittest/CXMeansTest.h +++ b/lib/maths/unittest/CXMeansTest.h @@ -9,18 +9,17 @@ #include -class CXMeansTest : public CppUnit::TestFixture -{ - public: - void testCluster(); - void testImproveParams(); - void testImproveStructure(); - void testOneCluster(); - void testFiveClusters(); - void testTwentyClusters(); - void testPoorlyConditioned(); +class CXMeansTest : public CppUnit::TestFixture { +public: + void testCluster(); + void testImproveParams(); + void testImproveStructure(); + void testOneCluster(); + void testFiveClusters(); + void testTwentyClusters(); + void testPoorlyConditioned(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CXMeansTest_h diff --git a/lib/maths/unittest/Main.cc b/lib/maths/unittest/Main.cc index 25fe8c5365..e8bcc74fb2 100644 --- a/lib/maths/unittest/Main.cc +++ b/lib/maths/unittest/Main.cc @@ -12,25 +12,25 @@ #include "CBjkstUniqueValuesTest.h" #include "CBootstrapClustererTest.h" #include "CBoundingBoxTest.h" -#include "CCalendarFeatureTest.h" #include "CCalendarComponentAdaptiveBucketingTest.h" +#include "CCalendarFeatureTest.h" #include "CCategoricalToolsTest.h" #include "CChecksumTest.h" #include "CClustererTest.h" #include "CCountMinSketchTest.h" #include "CDecayRateControllerTest.h" -#include "CEqualWithToleranceTest.h" #include "CEntropySketchTest.h" +#include "CEqualWithToleranceTest.h" #include "CForecastTest.h" #include "CGammaRateConjugateTest.h" #include "CGramSchmidtTest.h" #include "CInformationCriteriaTest.h" #include "CIntegerToolsTest.h" #include "CIntegrationTest.h" -#include "CKdTreeTest.h" #include "CKMeansFastTest.h" #include "CKMeansOnlineTest.h" #include "CKMostCorrelatedTest.h" +#include "CKdTreeTest.h" #include "CLassoLogisticRegressionTest.h" #include "CLinearAlgebraTest.h" #include "CLogNormalMeanPrecConjugateTest.h" @@ -51,11 +51,11 @@ #include "COneOfNPriorTest.h" #include "COrderingsTest.h" #include "COrdinalTest.h" +#include "CPRNGTest.h" #include "CPackedBitVectorTest.h" #include "CPeriodicityHypothesisTestsTest.h" #include "CPoissonMeanConjugateTest.h" #include "CPriorTest.h" -#include "CPRNGTest.h" #include "CProbabilityAggregatorsTest.h" #include "CProbabilityCalibratorTest.h" #include "CQDigestTest.h" @@ -64,8 +64,8 @@ #include "CRandomProjectionClustererTest.h" #include "CRegressionTest.h" #include "CSamplingTest.h" -#include "CSeasonalComponentTest.h" #include "CSeasonalComponentAdaptiveBucketingTest.h" +#include "CSeasonalComponentTest.h" #include "CSetToolsTest.h" #include "CSignalTest.h" #include "CSolversTest.h" @@ -77,89 +77,87 @@ #include "CToolsTest.h" #include "CTrendComponentTest.h" #include "CTrendTestsTest.h" -#include "CXMeansTest.h" -#include "CXMeansOnlineTest.h" #include "CXMeansOnline1dTest.h" +#include "CXMeansOnlineTest.h" +#include "CXMeansTest.h" -int main(int argc, const char **argv) -{ - ml::test::CTestRunner runner(argc, argv); +int main(int argc, const char** argv) { + ml::test::CTestRunner runner(argc, argv); - runner.addTest( CAgglomerativeClustererTest::suite() ); - runner.addTest( CAssignmentTest::suite() ); - runner.addTest( CBasicStatisticsTest::suite() ); - runner.addTest( CBjkstUniqueValuesTest::suite() ); - runner.addTest( CBootstrapClustererTest::suite() ); - runner.addTest( CBoundingBoxTest::suite() ); - runner.addTest( CCategoricalToolsTest::suite() ); - runner.addTest( CCalendarFeatureTest::suite() ); - runner.addTest( CCalendarComponentAdaptiveBucketingTest::suite() ); - runner.addTest( CChecksumTest::suite() ); - runner.addTest( CClustererTest::suite() ); - runner.addTest( CCountMinSketchTest::suite() ); - runner.addTest( CDecayRateControllerTest::suite() ); - runner.addTest( CEqualWithToleranceTest::suite() ); - runner.addTest( CEntropySketchTest::suite() ); - runner.addTest( CForecastTest::suite() ); - runner.addTest( CGammaRateConjugateTest::suite() ); - runner.addTest( CGramSchmidtTest::suite() ); - runner.addTest( CInformationCriteriaTest::suite() ); - runner.addTest( CIntegerToolsTest::suite() ); - runner.addTest( CIntegrationTest::suite() ); - runner.addTest( CKdTreeTest::suite() ); - runner.addTest( CKMeansFastTest::suite() ); - runner.addTest( CKMeansOnlineTest::suite() ); - runner.addTest( CKMostCorrelatedTest::suite() ); - runner.addTest( CLassoLogisticRegressionTest::suite() ); - runner.addTest( CLinearAlgebraTest::suite() ); - runner.addTest( CLogNormalMeanPrecConjugateTest::suite() ); - runner.addTest( CLogTDistributionTest::suite() ); - runner.addTest( CMathsFuncsTest::suite() ); - runner.addTest( CMathsMemoryTest::suite() ); - runner.addTest( CMixtureDistributionTest::suite() ); - runner.addTest( CModelTest::suite() ); - runner.addTest( CMultimodalPriorTest::suite() ); - runner.addTest( CMultinomialConjugateTest::suite() ); - runner.addTest( CMultivariateConstantPriorTest::suite() ); - runner.addTest( CMultivariateMultimodalPriorTest::suite() ); - runner.addTest( CMultivariateNormalConjugateTest::suite() ); - runner.addTest( CMultivariateOneOfNPriorTest::suite() ); - runner.addTest( CNaiveBayesTest::suite() ); - runner.addTest( CNaturalBreaksClassifierTest::suite() ); - runner.addTest( CNormalMeanPrecConjugateTest::suite() ); - runner.addTest( COneOfNPriorTest::suite() ); - runner.addTest( COrderingsTest::suite() ); - runner.addTest( COrdinalTest::suite() ); - runner.addTest( CPackedBitVectorTest::suite() ); - runner.addTest( CPeriodicityHypothesisTestsTest::suite() ); - runner.addTest( CPoissonMeanConjugateTest::suite() ); - runner.addTest( CPriorTest::suite() ); - runner.addTest( CPRNGTest::suite() ); - runner.addTest( CProbabilityAggregatorsTest::suite() ); - runner.addTest( CProbabilityCalibratorTest::suite() ); - runner.addTest( CQDigestTest::suite() ); - runner.addTest( CQuantileSketchTest::suite() ); - runner.addTest( CRadialBasisFunctionTest::suite() ); - runner.addTest( CRandomProjectionClustererTest::suite() ); - runner.addTest( CRegressionTest::suite() ); - runner.addTest( CSamplingTest::suite() ); - runner.addTest( CSeasonalComponentTest::suite() ); - runner.addTest( CSeasonalComponentAdaptiveBucketingTest::suite() ); - runner.addTest( CSetToolsTest::suite() ); - runner.addTest( CSignalTest::suite() ); - runner.addTest( CSolversTest::suite() ); - runner.addTest( CSplineTest::suite() ); - runner.addTest( CStatisticalTestsTest::suite() ); - runner.addTest( CTimeSeriesChangeDetectorTest::suite() ); - runner.addTest( CTimeSeriesDecompositionTest::suite() ); - runner.addTest( CTimeSeriesModelTest::suite() ); - runner.addTest( CToolsTest::suite() ); - runner.addTest( CTrendComponentTest::suite() ); - runner.addTest( CTrendTestsTest::suite() ); - runner.addTest( CXMeansTest::suite() ); - runner.addTest( CXMeansOnlineTest::suite() ); - runner.addTest( CXMeansOnline1dTest::suite() ); + runner.addTest(CAgglomerativeClustererTest::suite()); + runner.addTest(CAssignmentTest::suite()); + runner.addTest(CBasicStatisticsTest::suite()); + runner.addTest(CBjkstUniqueValuesTest::suite()); + runner.addTest(CBootstrapClustererTest::suite()); + runner.addTest(CBoundingBoxTest::suite()); + runner.addTest(CCategoricalToolsTest::suite()); + runner.addTest(CCalendarFeatureTest::suite()); + runner.addTest(CCalendarComponentAdaptiveBucketingTest::suite()); + runner.addTest(CChecksumTest::suite()); + runner.addTest(CClustererTest::suite()); + runner.addTest(CCountMinSketchTest::suite()); + runner.addTest(CDecayRateControllerTest::suite()); + runner.addTest(CEqualWithToleranceTest::suite()); + runner.addTest(CEntropySketchTest::suite()); + runner.addTest(CForecastTest::suite()); + runner.addTest(CGammaRateConjugateTest::suite()); + runner.addTest(CGramSchmidtTest::suite()); + runner.addTest(CInformationCriteriaTest::suite()); + runner.addTest(CIntegerToolsTest::suite()); + runner.addTest(CIntegrationTest::suite()); + runner.addTest(CKdTreeTest::suite()); + runner.addTest(CKMeansFastTest::suite()); + runner.addTest(CKMeansOnlineTest::suite()); + runner.addTest(CKMostCorrelatedTest::suite()); + runner.addTest(CLassoLogisticRegressionTest::suite()); + runner.addTest(CLinearAlgebraTest::suite()); + runner.addTest(CLogNormalMeanPrecConjugateTest::suite()); + runner.addTest(CLogTDistributionTest::suite()); + runner.addTest(CMathsFuncsTest::suite()); + runner.addTest(CMathsMemoryTest::suite()); + runner.addTest(CMixtureDistributionTest::suite()); + runner.addTest(CModelTest::suite()); + runner.addTest(CMultimodalPriorTest::suite()); + runner.addTest(CMultinomialConjugateTest::suite()); + runner.addTest(CMultivariateConstantPriorTest::suite()); + runner.addTest(CMultivariateMultimodalPriorTest::suite()); + runner.addTest(CMultivariateNormalConjugateTest::suite()); + runner.addTest(CMultivariateOneOfNPriorTest::suite()); + runner.addTest(CNaiveBayesTest::suite()); + runner.addTest(CNaturalBreaksClassifierTest::suite()); + runner.addTest(CNormalMeanPrecConjugateTest::suite()); + runner.addTest(COneOfNPriorTest::suite()); + runner.addTest(COrderingsTest::suite()); + runner.addTest(COrdinalTest::suite()); + runner.addTest(CPackedBitVectorTest::suite()); + runner.addTest(CPeriodicityHypothesisTestsTest::suite()); + runner.addTest(CPoissonMeanConjugateTest::suite()); + runner.addTest(CPriorTest::suite()); + runner.addTest(CPRNGTest::suite()); + runner.addTest(CProbabilityAggregatorsTest::suite()); + runner.addTest(CProbabilityCalibratorTest::suite()); + runner.addTest(CQDigestTest::suite()); + runner.addTest(CQuantileSketchTest::suite()); + runner.addTest(CRadialBasisFunctionTest::suite()); + runner.addTest(CRandomProjectionClustererTest::suite()); + runner.addTest(CRegressionTest::suite()); + runner.addTest(CSamplingTest::suite()); + runner.addTest(CSeasonalComponentTest::suite()); + runner.addTest(CSeasonalComponentAdaptiveBucketingTest::suite()); + runner.addTest(CSetToolsTest::suite()); + runner.addTest(CSignalTest::suite()); + runner.addTest(CSolversTest::suite()); + runner.addTest(CSplineTest::suite()); + runner.addTest(CStatisticalTestsTest::suite()); + runner.addTest(CTimeSeriesChangeDetectorTest::suite()); + runner.addTest(CTimeSeriesDecompositionTest::suite()); + runner.addTest(CTimeSeriesModelTest::suite()); + runner.addTest(CToolsTest::suite()); + runner.addTest(CTrendComponentTest::suite()); + runner.addTest(CTrendTestsTest::suite()); + runner.addTest(CXMeansTest::suite()); + runner.addTest(CXMeansOnlineTest::suite()); + runner.addTest(CXMeansOnline1dTest::suite()); return !runner.runTests(); } - diff --git a/lib/maths/unittest/TestUtils.cc b/lib/maths/unittest/TestUtils.cc index 3cd3b76b74..499f3a1a1f 100644 --- a/lib/maths/unittest/TestUtils.cc +++ b/lib/maths/unittest/TestUtils.cc @@ -14,160 +14,114 @@ #include -namespace ml -{ +namespace ml { using namespace maths; using namespace handy_typedefs; -namespace -{ +namespace { const core_t::TTime HALF_HOUR{core::constants::HOUR / 2}; const core_t::TTime DAY{core::constants::DAY}; const core_t::TTime WEEK{core::constants::WEEK}; //! \brief Computes the c.d.f. of the prior minus the target supplied //! to its constructor at specific locations. -class CCdf : public std::unary_function -{ - public: - enum EStyle - { - E_Lower, - E_Upper, - E_GeometricMean - }; - - public: - CCdf(EStyle style, const CPrior &prior, double target) : - m_Style(style), - m_Prior(&prior), - m_Target(target), - m_X(1u) - {} - - double operator()(double x) const - { - double lowerBound, upperBound; - - m_X[0] = x; - if (!m_Prior->minusLogJointCdf(CConstantWeights::COUNT_VARIANCE, - m_X, - CConstantWeights::SINGLE_UNIT, - lowerBound, upperBound)) - { - // We have no choice but to throw because this is - // invoked inside a boost root finding function. - - LOG_ERROR("Failed to evaluate c.d.f. at " << x); - throw std::runtime_error("Failed to evaluate c.d.f."); - } +class CCdf : public std::unary_function { +public: + enum EStyle { E_Lower, E_Upper, E_GeometricMean }; - switch (m_Style) - { - case E_Lower: return std::exp(-lowerBound) - m_Target; - case E_Upper: return std::exp(-upperBound) - m_Target; - case E_GeometricMean: return std::exp(-(lowerBound + upperBound) / 2.0) - m_Target; - } +public: + CCdf(EStyle style, const CPrior& prior, double target) : m_Style(style), m_Prior(&prior), m_Target(target), m_X(1u) {} + + double operator()(double x) const { + double lowerBound, upperBound; + + m_X[0] = x; + if (!m_Prior->minusLogJointCdf(CConstantWeights::COUNT_VARIANCE, m_X, CConstantWeights::SINGLE_UNIT, lowerBound, upperBound)) { + // We have no choice but to throw because this is + // invoked inside a boost root finding function. + + LOG_ERROR("Failed to evaluate c.d.f. at " << x); + throw std::runtime_error("Failed to evaluate c.d.f."); + } + + switch (m_Style) { + case E_Lower: + return std::exp(-lowerBound) - m_Target; + case E_Upper: + return std::exp(-upperBound) - m_Target; + case E_GeometricMean: return std::exp(-(lowerBound + upperBound) / 2.0) - m_Target; } + return std::exp(-(lowerBound + upperBound) / 2.0) - m_Target; + } - private: - EStyle m_Style; - const CPrior *m_Prior; - double m_Target; - mutable TDouble1Vec m_X; +private: + EStyle m_Style; + const CPrior* m_Prior; + double m_Target; + mutable TDouble1Vec m_X; }; //! Set \p result to \p x. -bool identity(double x, double &result) -{ +bool identity(double x, double& result) { result = x; return true; } //! Computes the residual from a specified mean. -class CResidual -{ - public: - using result_type = double; - - public: - CResidual(double mean) : m_Mean(mean) {} - - bool operator()(double x, double &result) const - { - result = (x - m_Mean) * (x - m_Mean); - return true; - } +class CResidual { +public: + using result_type = double; - private: - double m_Mean; +public: + CResidual(double mean) : m_Mean(mean) {} + + bool operator()(double x, double& result) const { + result = (x - m_Mean) * (x - m_Mean); + return true; + } + +private: + double m_Mean; }; } -CPriorTestInterface::CPriorTestInterface(CPrior &prior) : - m_Prior(&prior) -{ +CPriorTestInterface::CPriorTestInterface(CPrior& prior) : m_Prior(&prior) { } -void CPriorTestInterface::addSamples(const TDouble1Vec &samples) -{ +void CPriorTestInterface::addSamples(const TDouble1Vec& samples) { TDouble4Vec1Vec weights(samples.size(), TWeights::UNIT); m_Prior->addSamples(TWeights::COUNT, samples, weights); } -maths_t::EFloatingPointErrorStatus -CPriorTestInterface::jointLogMarginalLikelihood(const TDouble1Vec &samples, - double &result) const -{ +maths_t::EFloatingPointErrorStatus CPriorTestInterface::jointLogMarginalLikelihood(const TDouble1Vec& samples, double& result) const { TDouble4Vec1Vec weights(samples.size(), TWeights::UNIT); return m_Prior->jointLogMarginalLikelihood(TWeights::COUNT, samples, weights, result); } -bool CPriorTestInterface::minusLogJointCdf(const TDouble1Vec &samples, - double &lowerBound, - double &upperBound) const -{ +bool CPriorTestInterface::minusLogJointCdf(const TDouble1Vec& samples, double& lowerBound, double& upperBound) const { TDouble4Vec1Vec weights(samples.size(), TWeights::UNIT); - return m_Prior->minusLogJointCdf(TWeights::COUNT, - samples, - weights, - lowerBound, upperBound); + return m_Prior->minusLogJointCdf(TWeights::COUNT, samples, weights, lowerBound, upperBound); } -bool CPriorTestInterface::minusLogJointCdfComplement(const TDouble1Vec &samples, - double &lowerBound, - double &upperBound) const -{ +bool CPriorTestInterface::minusLogJointCdfComplement(const TDouble1Vec& samples, double& lowerBound, double& upperBound) const { TDouble4Vec1Vec weights(samples.size(), TWeights::UNIT); - return m_Prior->minusLogJointCdfComplement(TWeights::COUNT, - samples, - weights, - lowerBound, upperBound); + return m_Prior->minusLogJointCdfComplement(TWeights::COUNT, samples, weights, lowerBound, upperBound); } bool CPriorTestInterface::probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TDouble1Vec &samples, - double &lowerBound, - double &upperBound) const -{ + const TDouble1Vec& samples, + double& lowerBound, + double& upperBound) const { TDouble4Vec1Vec weights(samples.size(), TWeights::UNIT); maths_t::ETail tail; - return m_Prior->probabilityOfLessLikelySamples(calculation, - TWeights::COUNT, - samples, - weights, - lowerBound, upperBound, tail); + return m_Prior->probabilityOfLessLikelySamples(calculation, TWeights::COUNT, samples, weights, lowerBound, upperBound, tail); } -bool CPriorTestInterface::anomalyScore(maths_t::EProbabilityCalculation calculation, - const TDouble1Vec &samples, - double &result) const -{ +bool CPriorTestInterface::anomalyScore(maths_t::EProbabilityCalculation calculation, const TDouble1Vec& samples, double& result) const { TDoubleDoublePr1Vec weightedSamples; weightedSamples.reserve(samples.size()); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { weightedSamples.push_back(std::make_pair(samples[i], 1.0)); } return this->anomalyScore(calculation, maths_t::E_SampleCountWeight, weightedSamples, result); @@ -175,30 +129,21 @@ bool CPriorTestInterface::anomalyScore(maths_t::EProbabilityCalculation calculat bool CPriorTestInterface::anomalyScore(maths_t::EProbabilityCalculation calculation, maths_t::ESampleWeightStyle weightStyle, - const TDoubleDoublePr1Vec &samples, - double &result) const -{ + const TDoubleDoublePr1Vec& samples, + double& result) const { result = 0.0; TWeightStyleVec weightStyles(1, weightStyle); TDouble1Vec samples_(samples.size()); TDouble4Vec1Vec weights(samples.size(), TWeights::UNIT); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { samples_[i] = samples[i].first; weights[i][0] = samples[i].second; } double lowerBound, upperBound; maths_t::ETail tail; - if (!m_Prior->probabilityOfLessLikelySamples(calculation, - weightStyles, - samples_, - weights, - lowerBound, - upperBound, - tail)) - { + if (!m_Prior->probabilityOfLessLikelySamples(calculation, weightStyles, samples_, weights, lowerBound, upperBound, tail)) { LOG_ERROR("Failed computing probability of less likely samples"); return false; } @@ -208,34 +153,24 @@ bool CPriorTestInterface::anomalyScore(maths_t::EProbabilityCalculation calculat return true; } -bool CPriorTestInterface::marginalLikelihoodQuantileForTest(double percentage, - double eps, - double &result) const -{ +bool CPriorTestInterface::marginalLikelihoodQuantileForTest(double percentage, double eps, double& result) const { result = 0.0; percentage /= 100.0; double step = 1.0; TDoubleDoublePr bracket(0.0, step); - try - { + try { CCdf cdf(percentage < 0.5 ? CCdf::E_Lower : CCdf::E_Upper, *m_Prior, percentage); TDoubleDoublePr fBracket(cdf(bracket.first), cdf(bracket.second)); std::size_t maxIterations = 100u; - for (/**/; - fBracket.first * fBracket.second > 0.0 && maxIterations > 0; - --maxIterations) - { + for (/**/; fBracket.first * fBracket.second > 0.0 && maxIterations > 0; --maxIterations) { step *= 2.0; - if (fBracket.first > 0.0) - { + if (fBracket.first > 0.0) { bracket.first -= step; fBracket.first = cdf(bracket.first); - } - else if (fBracket.second < 0.0) - { + } else if (fBracket.second < 0.0) { bracket.second += step; fBracket.second = cdf(bracket.second); } @@ -243,25 +178,18 @@ bool CPriorTestInterface::marginalLikelihoodQuantileForTest(double percentage, CEqualWithTolerance equal(CToleranceTypes::E_AbsoluteTolerance, 2.0 * eps); - CSolvers::solve(bracket.first, bracket.second, - fBracket.first, fBracket.second, - cdf, maxIterations, equal, result); - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to compute quantile: " << e.what() - << ", quantile = " << percentage); + CSolvers::solve(bracket.first, bracket.second, fBracket.first, fBracket.second, cdf, maxIterations, equal, result); + } catch (const std::exception& e) { + LOG_ERROR("Failed to compute quantile: " << e.what() << ", quantile = " << percentage); return false; } return true; } -bool CPriorTestInterface::marginalLikelihoodMeanForTest(double &result) const -{ +bool CPriorTestInterface::marginalLikelihoodMeanForTest(double& result) const { using TMarginalLikelihood = CCompositeFunctions::CExp; - using TFunctionTimesMarginalLikelihood = - CCompositeFunctions::CProduct; + using TFunctionTimesMarginalLikelihood = CCompositeFunctions::CProduct; const double eps = 1e-3; unsigned int steps = 100u; @@ -269,15 +197,12 @@ bool CPriorTestInterface::marginalLikelihoodMeanForTest(double &result) const result = 0.0; double a, b; - if ( !this->marginalLikelihoodQuantileForTest(0.001, eps, a) - || !this->marginalLikelihoodQuantileForTest(99.999, eps, b)) - { + if (!this->marginalLikelihoodQuantileForTest(0.001, eps, a) || !this->marginalLikelihoodQuantileForTest(99.999, eps, b)) { LOG_ERROR("Unable to compute mean likelihood"); return false; } - if (m_Prior->dataType() == maths_t::E_IntegerData) - { + if (m_Prior->dataType() == maths_t::E_IntegerData) { b = std::ceil(b); a = std::floor(a); steps = static_cast(b - a) + 1; @@ -289,11 +214,9 @@ bool CPriorTestInterface::marginalLikelihoodMeanForTest(double &result) const double x = a; double step = (b - a) / static_cast(steps); - for (unsigned int i = 0; i < steps; ++i, x += step) - { + for (unsigned int i = 0; i < steps; ++i, x += step) { double integral; - if (!CIntegration::gaussLegendre(xTimesLikelihood, x, x + step, integral)) - { + if (!CIntegration::gaussLegendre(xTimesLikelihood, x, x + step, integral)) { return false; } result += integral; @@ -302,8 +225,7 @@ bool CPriorTestInterface::marginalLikelihoodMeanForTest(double &result) const return true; } -bool CPriorTestInterface::marginalLikelihoodVarianceForTest(double &result) const -{ +bool CPriorTestInterface::marginalLikelihoodVarianceForTest(double& result) const { using TMarginalLikelihood = CCompositeFunctions::CExp; using TResidualTimesMarginalLikelihood = CCompositeFunctions::CProduct; @@ -313,15 +235,12 @@ bool CPriorTestInterface::marginalLikelihoodVarianceForTest(double &result) cons result = 0.0; double a, b; - if ( !this->marginalLikelihoodQuantileForTest(0.001, eps, a) - || !this->marginalLikelihoodQuantileForTest(99.999, eps, b)) - { + if (!this->marginalLikelihoodQuantileForTest(0.001, eps, a) || !this->marginalLikelihoodQuantileForTest(99.999, eps, b)) { LOG_ERROR("Unable to compute mean likelihood"); return false; } - if (m_Prior->dataType() == maths_t::E_IntegerData) - { + if (m_Prior->dataType() == maths_t::E_IntegerData) { b = std::ceil(b); a = std::floor(a); steps = static_cast(b - a) + 1; @@ -333,11 +252,9 @@ bool CPriorTestInterface::marginalLikelihoodVarianceForTest(double &result) cons double x = a; double step = (b - a) / static_cast(steps); - for (unsigned int i = 0; i < steps; ++i, x += step) - { + for (unsigned int i = 0; i < steps; ++i, x += step) { double integral; - if (!CIntegration::gaussLegendre(residualTimesLikelihood, x, x + step, integral)) - { + if (!CIntegration::gaussLegendre(residualTimesLikelihood, x, x + step, integral)) { return false; } result += integral; @@ -346,103 +263,63 @@ bool CPriorTestInterface::marginalLikelihoodVarianceForTest(double &result) cons return true; } -double constant(core_t::TTime /*time*/) -{ +double constant(core_t::TTime /*time*/) { return 4.0; } -double ramp(core_t::TTime time) -{ +double ramp(core_t::TTime time) { return 0.1 * static_cast(time) / static_cast(WEEK); } -double markov(core_t::TTime time) -{ +double markov(core_t::TTime time) { static double state{0.2}; - if (time % WEEK == 0) - { + if (time % WEEK == 0) { core::CHashing::CMurmurHash2BT hasher; - state = 2.0 * static_cast(hasher(time)) - / static_cast(std::numeric_limits::max()); + state = 2.0 * static_cast(hasher(time)) / static_cast(std::numeric_limits::max()); } return state; } -double smoothDaily(core_t::TTime time) -{ - return std::sin( boost::math::double_constants::two_pi - * static_cast(time) - / static_cast(DAY)); +double smoothDaily(core_t::TTime time) { + return std::sin(boost::math::double_constants::two_pi * static_cast(time) / static_cast(DAY)); } -double smoothWeekly(core_t::TTime time) -{ - return std::sin( boost::math::double_constants::two_pi - * static_cast(time) - / static_cast(WEEK)); +double smoothWeekly(core_t::TTime time) { + return std::sin(boost::math::double_constants::two_pi * static_cast(time) / static_cast(WEEK)); } -double spikeyDaily(core_t::TTime time) -{ - double pattern[] - { - 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, - 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, - 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, - 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.1 - }; +double spikeyDaily(core_t::TTime time) { + double pattern[]{1.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, + 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, + 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.1}; return pattern[(time % DAY) / HALF_HOUR]; } -double spikeyWeekly(core_t::TTime time) -{ - double pattern[] - { - 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, - 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, - 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, - 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.1, - 0.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, - 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, - 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, - 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.1, - 0.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, - 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, - 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, - 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.1, - 0.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, - 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, - 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, - 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.1, - 0.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, - 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, - 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, - 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.1, - 0.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, - 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, - 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, - 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.1, - 0.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, - 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, - 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, - 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.1 - }; +double spikeyWeekly(core_t::TTime time) { + double pattern[]{ + 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, + 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.1, 0.0, 0.1, 0.1, 0.1, + 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, + 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.1, 0.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, + 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, + 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.1, 0.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, + 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, + 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.1, 0.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, + 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, + 1.0, 0.1, 0.1, 0.1, 0.1, 0.1, 0.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, + 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, + 0.1, 0.1, 0.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, + 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.1}; return pattern[(time % WEEK) / HALF_HOUR]; } -double weekends(core_t::TTime time) -{ - double amplitude[] = { 1.0, 0.9, 0.8, 0.9, 1.1, 0.2, 0.05 }; - return amplitude[(time % WEEK) / DAY] - * std::sin( boost::math::double_constants::two_pi - * static_cast(time) - / static_cast(DAY)); +double weekends(core_t::TTime time) { + double amplitude[] = {1.0, 0.9, 0.8, 0.9, 1.1, 0.2, 0.05}; + return amplitude[(time % WEEK) / DAY] * + std::sin(boost::math::double_constants::two_pi * static_cast(time) / static_cast(DAY)); } -double scale(double scale, core_t::TTime time, TGenerator generator) -{ +double scale(double scale, core_t::TTime time, TGenerator generator) { return generator(static_cast(scale * static_cast(time))); } - } - diff --git a/lib/maths/unittest/TestUtils.h b/lib/maths/unittest/TestUtils.h index dd9000072a..9a77f2bd91 100644 --- a/lib/maths/unittest/TestUtils.h +++ b/lib/maths/unittest/TestUtils.h @@ -7,21 +7,19 @@ #ifndef INCLUDED_ml_TestUtils_h #define INCLUDED_ml_TestUtils_h -#include #include +#include #include #include -#include #include +#include -#include #include +#include -namespace ml -{ -namespace handy_typedefs -{ +namespace ml { +namespace handy_typedefs { using TDouble1Vec = core::CSmallVector; using TDouble4Vec = core::CSmallVector; using TDouble10Vec = core::CSmallVector; @@ -45,99 +43,88 @@ using TGeneratorVec = std::vector; //! //! DESCRIPTION:\n //! This is a mix in interface for use within the testing framework. -class CPriorTestInterface -{ - public: - using TDoubleDoublePr = std::pair; - using TDoubleDoublePr1Vec = core::CSmallVector; - using TWeightStyleVec = maths_t::TWeightStyleVec; - using TWeights = maths::CConstantWeights; - - public: - explicit CPriorTestInterface(maths::CPrior &prior); - - //! Wrapper which takes care of weights. - void addSamples(const handy_typedefs::TDouble1Vec &samples); - - //! Wrapper which takes care of weights. - maths_t::EFloatingPointErrorStatus - jointLogMarginalLikelihood(const handy_typedefs::TDouble1Vec &samples, - double &result) const; - - //! Wrapper which takes care of weights. - bool minusLogJointCdf(const handy_typedefs::TDouble1Vec &samples, - double &lowerBound, - double &upperBound) const; - - //! Wrapper which takes care of weights. - bool minusLogJointCdfComplement(const handy_typedefs::TDouble1Vec &samples, - double &lowerBound, - double &upperBound) const; - - //! Wrapper which takes care of weights. - bool probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const handy_typedefs::TDouble1Vec &samples, - double &lowerBound, - double &upperBound) const; - - //! A wrapper around weighted compute anomaly scores which uses unit - //! weights for all samples. - bool anomalyScore(maths_t::EProbabilityCalculation calculation, - const handy_typedefs::TDouble1Vec &samples, - double &result) const; - - //! Calculate an anomaly score for a collection of independent samples - //! from the variable. - //! - //! \param[in] calculation The style of the probability calculation - //! (see maths_t::EProbabilityCalculation for details). - //! \param[in] weightStyle Controls the interpretation of the weight that - //! is associated with each sample. See maths_t::ESampleWeightStyle for - //! more details. - //! \param[in] samples A collection of samples of the variable. - //! Each pair is the sample and weight, i.e. \f$(x_i, \gamma_i)\f$ where - //! \f$x_i\f$ is \f$i^{th}\f$ sample and \f$\gamma_i\f$ is the weight of - //! that sample. - //! \param[out] result Filled in with the total anomaly score of \p samples. - bool anomalyScore(maths_t::EProbabilityCalculation calculation, - maths_t::ESampleWeightStyle weightStyle, - const TDoubleDoublePr1Vec &samples, - double &result) const; - - //! This is a slow method that uses numerical root finding to compute - //! the quantile so ***only*** use this for testing. - //! - //! \param[in] percentage The desired quantile expressed as a percentage. - //! \param[in] eps The tolerated error in the quantile: if it could be - //! calculated, \p result will be no further than \p eps away from - //! the exact quantile. - //! \param[out] result Filled in with the quantile if it could be found. - //! \note Since this is for testing purposes only it is not especially - //! robust. For example, it won't handle a normal with mean of \f$10^8\f$ - //! and standard deviation of \f$10^{-8}\f$ particularly well. - bool marginalLikelihoodQuantileForTest(double percentage, - double eps, - double &result) const; - - //! This is a slow method that uses numerical integration to compute - //! the mean so ***only*** use this for testing. - //! - //! \param[out] result Filled in with the mean if it could be found. - //! \note This makes use of marginalLikelihoodQuantile and suffers - //! the same limitations. - bool marginalLikelihoodMeanForTest(double &result) const; - - //! This is a slow method that uses numerical integration to compute - //! the variance so ***only*** use this for testing. - //! - //! \param[out] result Filled in with the variance if it could be - //! found. - //! \note This makes use of marginalLikelihoodQuantile and suffers - //! the same limitations. - bool marginalLikelihoodVarianceForTest(double &result) const; - - protected: - maths::CPrior *m_Prior; +class CPriorTestInterface { +public: + using TDoubleDoublePr = std::pair; + using TDoubleDoublePr1Vec = core::CSmallVector; + using TWeightStyleVec = maths_t::TWeightStyleVec; + using TWeights = maths::CConstantWeights; + +public: + explicit CPriorTestInterface(maths::CPrior& prior); + + //! Wrapper which takes care of weights. + void addSamples(const handy_typedefs::TDouble1Vec& samples); + + //! Wrapper which takes care of weights. + maths_t::EFloatingPointErrorStatus jointLogMarginalLikelihood(const handy_typedefs::TDouble1Vec& samples, double& result) const; + + //! Wrapper which takes care of weights. + bool minusLogJointCdf(const handy_typedefs::TDouble1Vec& samples, double& lowerBound, double& upperBound) const; + + //! Wrapper which takes care of weights. + bool minusLogJointCdfComplement(const handy_typedefs::TDouble1Vec& samples, double& lowerBound, double& upperBound) const; + + //! Wrapper which takes care of weights. + bool probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, + const handy_typedefs::TDouble1Vec& samples, + double& lowerBound, + double& upperBound) const; + + //! A wrapper around weighted compute anomaly scores which uses unit + //! weights for all samples. + bool anomalyScore(maths_t::EProbabilityCalculation calculation, const handy_typedefs::TDouble1Vec& samples, double& result) const; + + //! Calculate an anomaly score for a collection of independent samples + //! from the variable. + //! + //! \param[in] calculation The style of the probability calculation + //! (see maths_t::EProbabilityCalculation for details). + //! \param[in] weightStyle Controls the interpretation of the weight that + //! is associated with each sample. See maths_t::ESampleWeightStyle for + //! more details. + //! \param[in] samples A collection of samples of the variable. + //! Each pair is the sample and weight, i.e. \f$(x_i, \gamma_i)\f$ where + //! \f$x_i\f$ is \f$i^{th}\f$ sample and \f$\gamma_i\f$ is the weight of + //! that sample. + //! \param[out] result Filled in with the total anomaly score of \p samples. + bool anomalyScore(maths_t::EProbabilityCalculation calculation, + maths_t::ESampleWeightStyle weightStyle, + const TDoubleDoublePr1Vec& samples, + double& result) const; + + //! This is a slow method that uses numerical root finding to compute + //! the quantile so ***only*** use this for testing. + //! + //! \param[in] percentage The desired quantile expressed as a percentage. + //! \param[in] eps The tolerated error in the quantile: if it could be + //! calculated, \p result will be no further than \p eps away from + //! the exact quantile. + //! \param[out] result Filled in with the quantile if it could be found. + //! \note Since this is for testing purposes only it is not especially + //! robust. For example, it won't handle a normal with mean of \f$10^8\f$ + //! and standard deviation of \f$10^{-8}\f$ particularly well. + bool marginalLikelihoodQuantileForTest(double percentage, double eps, double& result) const; + + //! This is a slow method that uses numerical integration to compute + //! the mean so ***only*** use this for testing. + //! + //! \param[out] result Filled in with the mean if it could be found. + //! \note This makes use of marginalLikelihoodQuantile and suffers + //! the same limitations. + bool marginalLikelihoodMeanForTest(double& result) const; + + //! This is a slow method that uses numerical integration to compute + //! the variance so ***only*** use this for testing. + //! + //! \param[out] result Filled in with the variance if it could be + //! found. + //! \note This makes use of marginalLikelihoodQuantile and suffers + //! the same limitations. + bool marginalLikelihoodVarianceForTest(double& result) const; + +protected: + maths::CPrior* m_Prior; }; //! \brief A mix in of test interface which brings the necessary functions @@ -151,169 +138,137 @@ class CPriorTestInterface //! standards, because it's the cleanest way to implement this functionality. //! DON'T use this elsewhere. template -class CPriorTestInterfaceMixin : public PRIOR, public CPriorTestInterface -{ - public: - using PRIOR::addSamples; - using PRIOR::jointLogMarginalLikelihood; - using PRIOR::minusLogJointCdf; - using PRIOR::minusLogJointCdfComplement; - using PRIOR::probabilityOfLessLikelySamples; - using CPriorTestInterface::addSamples; - using CPriorTestInterface::jointLogMarginalLikelihood; - using CPriorTestInterface::minusLogJointCdf; - using CPriorTestInterface::minusLogJointCdfComplement; - using CPriorTestInterface::probabilityOfLessLikelySamples; - - public: - CPriorTestInterfaceMixin(const PRIOR &prior) : - PRIOR(prior), - CPriorTestInterface(static_cast(*this)) - {} - - CPriorTestInterfaceMixin(const CPriorTestInterfaceMixin &other) : - PRIOR(static_cast(other)), - CPriorTestInterface(static_cast(*this)) - {} - - virtual ~CPriorTestInterfaceMixin() {} - - //! Swap the contents efficiently. - void swap(CPriorTestInterfaceMixin &other) - { - this->PRIOR::swap(other); - } - - //! Clone the object. - virtual CPriorTestInterfaceMixin *clone() const - { - return new CPriorTestInterfaceMixin(*this); - } +class CPriorTestInterfaceMixin : public PRIOR, public CPriorTestInterface { +public: + using CPriorTestInterface::addSamples; + using CPriorTestInterface::jointLogMarginalLikelihood; + using CPriorTestInterface::minusLogJointCdf; + using CPriorTestInterface::minusLogJointCdfComplement; + using CPriorTestInterface::probabilityOfLessLikelySamples; + using PRIOR::addSamples; + using PRIOR::jointLogMarginalLikelihood; + using PRIOR::minusLogJointCdf; + using PRIOR::minusLogJointCdfComplement; + using PRIOR::probabilityOfLessLikelySamples; + +public: + CPriorTestInterfaceMixin(const PRIOR& prior) : PRIOR(prior), CPriorTestInterface(static_cast(*this)) {} + + CPriorTestInterfaceMixin(const CPriorTestInterfaceMixin& other) + : PRIOR(static_cast(other)), CPriorTestInterface(static_cast(*this)) {} + + virtual ~CPriorTestInterfaceMixin() {} + + //! Swap the contents efficiently. + void swap(CPriorTestInterfaceMixin& other) { this->PRIOR::swap(other); } + + //! Clone the object. + virtual CPriorTestInterfaceMixin* clone() const { return new CPriorTestInterfaceMixin(*this); } }; - //! \brief Kernel for checking normalization with CPrior::expectation. -class C1dUnitKernel -{ - public: - bool operator()(double /*x*/, double &result) const - { - result = 1.0; - return true; - } +class C1dUnitKernel { +public: + bool operator()(double /*x*/, double& result) const { + result = 1.0; + return true; + } }; //! \brief Kernel for computing the variance with CPrior::expectation. -class CVarianceKernel -{ - public: - CVarianceKernel(double mean) : m_Mean(mean) {} - - bool operator()(double x, double &result) const - { - result = (x - m_Mean) * (x - m_Mean); - return true; - } - - private: - double m_Mean; +class CVarianceKernel { +public: + CVarianceKernel(double mean) : m_Mean(mean) {} + + bool operator()(double x, double& result) const { + result = (x - m_Mean) * (x - m_Mean); + return true; + } + +private: + double m_Mean; }; //! \brief A constant unit kernel. template -class CUnitKernel -{ - public: - CUnitKernel(const maths::CMultivariatePrior &prior) : - m_Prior(&prior), - m_X(1) - {} - - bool operator()(const maths::CVectorNx1 &x, double &result) const - { - m_X[0].assign(x.begin(), x.end()); - m_Prior->jointLogMarginalLikelihood(maths::CConstantWeights::COUNT, m_X, SINGLE_UNIT, result); - result = std::exp(result); - return true; - } - - private: - static handy_typedefs::TDouble10Vec4Vec1Vec SINGLE_UNIT; - - private: - const maths::CMultivariatePrior *m_Prior; - mutable handy_typedefs::TDouble10Vec1Vec m_X; +class CUnitKernel { +public: + CUnitKernel(const maths::CMultivariatePrior& prior) : m_Prior(&prior), m_X(1) {} + + bool operator()(const maths::CVectorNx1& x, double& result) const { + m_X[0].assign(x.begin(), x.end()); + m_Prior->jointLogMarginalLikelihood(maths::CConstantWeights::COUNT, m_X, SINGLE_UNIT, result); + result = std::exp(result); + return true; + } + +private: + static handy_typedefs::TDouble10Vec4Vec1Vec SINGLE_UNIT; + +private: + const maths::CMultivariatePrior* m_Prior; + mutable handy_typedefs::TDouble10Vec1Vec m_X; }; template -handy_typedefs::TDouble10Vec4Vec1Vec CUnitKernel::SINGLE_UNIT(1, handy_typedefs::TDouble10Vec4Vec(1, handy_typedefs::TDouble10Vec(N, 1.0))); +handy_typedefs::TDouble10Vec4Vec1Vec CUnitKernel::SINGLE_UNIT(1, + handy_typedefs::TDouble10Vec4Vec(1, handy_typedefs::TDouble10Vec(N, 1.0))); //! \brief The kernel for computing the mean of a multivariate prior. template -class CMeanKernel -{ - public: - CMeanKernel(const maths::CMultivariatePrior &prior) : - m_Prior(&prior), - m_X(1) - {} - - bool operator()(const maths::CVectorNx1 &x, - maths::CVectorNx1 &result) const - { - m_X[0].assign(x.begin(), x.end()); - double likelihood; - m_Prior->jointLogMarginalLikelihood(maths::CConstantWeights::COUNT, m_X, SINGLE_UNIT, likelihood); - likelihood = std::exp(likelihood); - result = x * likelihood; - return true; - } - - private: - static handy_typedefs::TDouble10Vec4Vec1Vec SINGLE_UNIT; - - private: - const maths::CMultivariatePrior *m_Prior; - mutable handy_typedefs::TDouble10Vec1Vec m_X; +class CMeanKernel { +public: + CMeanKernel(const maths::CMultivariatePrior& prior) : m_Prior(&prior), m_X(1) {} + + bool operator()(const maths::CVectorNx1& x, maths::CVectorNx1& result) const { + m_X[0].assign(x.begin(), x.end()); + double likelihood; + m_Prior->jointLogMarginalLikelihood(maths::CConstantWeights::COUNT, m_X, SINGLE_UNIT, likelihood); + likelihood = std::exp(likelihood); + result = x * likelihood; + return true; + } + +private: + static handy_typedefs::TDouble10Vec4Vec1Vec SINGLE_UNIT; + +private: + const maths::CMultivariatePrior* m_Prior; + mutable handy_typedefs::TDouble10Vec1Vec m_X; }; template -handy_typedefs::TDouble10Vec4Vec1Vec CMeanKernel::SINGLE_UNIT(1, handy_typedefs::TDouble10Vec4Vec(1, handy_typedefs::TDouble10Vec(N, 1.0))); +handy_typedefs::TDouble10Vec4Vec1Vec CMeanKernel::SINGLE_UNIT(1, + handy_typedefs::TDouble10Vec4Vec(1, handy_typedefs::TDouble10Vec(N, 1.0))); //! \brief The kernel for computing the variance of a multivariate prior. template -class CCovarianceKernel -{ - public: - CCovarianceKernel(const maths::CMultivariatePrior &prior, - const maths::CVectorNx1 &mean) : - m_Prior(&prior), - m_Mean(mean), - m_X(1) - {} - - bool operator()(const maths::CVectorNx1 &x, - maths::CSymmetricMatrixNxN &result) const - { - m_X[0].assign(x.begin(), x.end()); - double likelihood; - m_Prior->jointLogMarginalLikelihood(maths::CConstantWeights::COUNT, m_X, SINGLE_UNIT, likelihood); - likelihood = std::exp(likelihood); - result = (x - m_Mean).outer() * likelihood; - return true; - } - - private: - static handy_typedefs::TDouble10Vec4Vec1Vec SINGLE_UNIT; - - private: - const maths::CMultivariatePrior *m_Prior; - maths::CVectorNx1 m_Mean; - mutable handy_typedefs::TDouble10Vec1Vec m_X; +class CCovarianceKernel { +public: + CCovarianceKernel(const maths::CMultivariatePrior& prior, const maths::CVectorNx1& mean) + : m_Prior(&prior), m_Mean(mean), m_X(1) {} + + bool operator()(const maths::CVectorNx1& x, maths::CSymmetricMatrixNxN& result) const { + m_X[0].assign(x.begin(), x.end()); + double likelihood; + m_Prior->jointLogMarginalLikelihood(maths::CConstantWeights::COUNT, m_X, SINGLE_UNIT, likelihood); + likelihood = std::exp(likelihood); + result = (x - m_Mean).outer() * likelihood; + return true; + } + +private: + static handy_typedefs::TDouble10Vec4Vec1Vec SINGLE_UNIT; + +private: + const maths::CMultivariatePrior* m_Prior; + maths::CVectorNx1 m_Mean; + mutable handy_typedefs::TDouble10Vec1Vec m_X; }; template -handy_typedefs::TDouble10Vec4Vec1Vec CCovarianceKernel::SINGLE_UNIT(1, handy_typedefs::TDouble10Vec4Vec(1, handy_typedefs::TDouble10Vec(N, 1.0))); +handy_typedefs::TDouble10Vec4Vec1Vec + CCovarianceKernel::SINGLE_UNIT(1, handy_typedefs::TDouble10Vec4Vec(1, handy_typedefs::TDouble10Vec(N, 1.0))); //! A constant function. double constant(core_t::TTime time); @@ -341,7 +296,6 @@ double weekends(core_t::TTime time); //! Scales time input to \p generator. double scale(double scale, core_t::TTime time, handy_typedefs::TGenerator generator); - } #endif // INCLUDED_ml_TestUtils_h diff --git a/lib/model/CAnnotatedProbability.cc b/lib/model/CAnnotatedProbability.cc index 668f6a9bac..578f730cfe 100644 --- a/lib/model/CAnnotatedProbability.cc +++ b/lib/model/CAnnotatedProbability.cc @@ -13,12 +13,9 @@ #include -namespace ml -{ -namespace model -{ -namespace -{ +namespace ml { +namespace model { +namespace { const std::string PROBABILITY_TAG("a"); const std::string ATTRIBUTE_PROBABILITIES_TAG("b"); const std::string INFLUENCE_NAME_TAG("c"); @@ -35,31 +32,27 @@ const std::string ANOMALY_TYPE_TAG("m"); const std::string CORRELATED_ATTRIBUTE_TAG("n"); } -SAttributeProbability::SAttributeProbability() : - s_Cid(0), - s_Probability(1.0), - s_Type(model_t::CResultType::E_Unconditional), - s_Feature(model_t::E_IndividualCountByBucketAndPerson) -{} +SAttributeProbability::SAttributeProbability() + : s_Cid(0), s_Probability(1.0), s_Type(model_t::CResultType::E_Unconditional), s_Feature(model_t::E_IndividualCountByBucketAndPerson) { +} SAttributeProbability::SAttributeProbability(std::size_t cid, - const core::CStoredStringPtr &attribute, + const core::CStoredStringPtr& attribute, double probability, model_t::CResultType type, model_t::EFeature feature, - const TStoredStringPtr1Vec &correlatedAttributes, - const TSizeDoublePr1Vec &correlated) : - s_Cid(cid), - s_Attribute(attribute), - s_Probability(probability), - s_Type(type), - s_Feature(feature), - s_CorrelatedAttributes(correlatedAttributes), - s_Correlated(correlated) -{} + const TStoredStringPtr1Vec& correlatedAttributes, + const TSizeDoublePr1Vec& correlated) + : s_Cid(cid), + s_Attribute(attribute), + s_Probability(probability), + s_Type(type), + s_Feature(feature), + s_CorrelatedAttributes(correlatedAttributes), + s_Correlated(correlated) { +} -bool SAttributeProbability::operator<(const SAttributeProbability &other) const -{ +bool SAttributeProbability::operator<(const SAttributeProbability& other) const { return maths::COrderings::lexicographical_compare(s_Probability, *s_Attribute, s_Feature, @@ -72,13 +65,11 @@ bool SAttributeProbability::operator<(const SAttributeProbability &other) const other.s_Correlated); } -void SAttributeProbability::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void SAttributeProbability::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(ATTRIBUTE_TAG, *s_Attribute); // We don't persist s_Cid because it isn't used in restored results. inserter.insertValue(ANOMALY_TYPE_TAG, s_Type.asUint()); - for (const auto &attribute : s_CorrelatedAttributes) - { + for (const auto& attribute : s_CorrelatedAttributes) { inserter.insertValue(CORRELATED_ATTRIBUTE_TAG, *attribute); } // We don't persist s_Correlated because it isn't used in restored results. @@ -89,105 +80,73 @@ void SAttributeProbability::acceptPersistInserter(core::CStatePersistInserter &i core::CPersistUtils::persist(BASELINE_BUCKET_MEAN_TAG, s_BaselineBucketMean, inserter); } -bool SAttributeProbability::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name = traverser.name(); - if (name == ATTRIBUTE_TAG) - { +bool SAttributeProbability::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); + if (name == ATTRIBUTE_TAG) { s_Attribute = CStringStore::names().get(traverser.value()); - } - else if (name == ANOMALY_TYPE_TAG) - { + } else if (name == ANOMALY_TYPE_TAG) { unsigned int type; - if (!core::CStringUtils::stringToType(traverser.value(), type)) - { + if (!core::CStringUtils::stringToType(traverser.value(), type)) { LOG_ERROR("Failed to restore " << traverser.name() << " / " << traverser.value()); return false; } s_Type = model_t::CResultType(type); - } - else if (name == CORRELATED_ATTRIBUTE_TAG) - { + } else if (name == CORRELATED_ATTRIBUTE_TAG) { s_CorrelatedAttributes.push_back(CStringStore::names().get(traverser.value())); - } - else if (name == PROBABILITY_TAG) - { - if (!core::CPersistUtils::restore(PROBABILITY_TAG, s_Probability, traverser)) - { + } else if (name == PROBABILITY_TAG) { + if (!core::CPersistUtils::restore(PROBABILITY_TAG, s_Probability, traverser)) { LOG_ERROR("Failed to restore " << traverser.name() << " / " << traverser.value()); return false; } - } - else if (name == FEATURE_TAG) - { + } else if (name == FEATURE_TAG) { std::size_t feature; - if (!core::CPersistUtils::restore(FEATURE_TAG, feature, traverser)) - { + if (!core::CPersistUtils::restore(FEATURE_TAG, feature, traverser)) { LOG_ERROR("Failed to restore " << traverser.name() << " / " << traverser.value()); return false; } - s_Feature = model_t::EFeature(feature); - } - else if (name == DESCRIPTIVE_DATA_TAG) - { + s_Feature = model_t::EFeature(feature); + } else if (name == DESCRIPTIVE_DATA_TAG) { using TSizeDoublePrVec = std::vector; TSizeDoublePrVec data; - if (!core::CPersistUtils::restore(DESCRIPTIVE_DATA_TAG, data, traverser)) - { + if (!core::CPersistUtils::restore(DESCRIPTIVE_DATA_TAG, data, traverser)) { LOG_ERROR("Failed to restore " << traverser.name() << " / " << traverser.value()); return false; } s_DescriptiveData.reserve(data.size()); - for (const auto &data_ : data) - { + for (const auto& data_ : data) { s_DescriptiveData.emplace_back(annotated_probability::EDescriptiveData(data_.first), data_.second); } - } - else if (name == CURRENT_BUCKET_VALUE_TAG) - { - if (!core::CPersistUtils::restore(CURRENT_BUCKET_VALUE_TAG, s_CurrentBucketValue, traverser)) - { + } else if (name == CURRENT_BUCKET_VALUE_TAG) { + if (!core::CPersistUtils::restore(CURRENT_BUCKET_VALUE_TAG, s_CurrentBucketValue, traverser)) { LOG_ERROR("Failed to restore " << traverser.name() << " / " << traverser.value()); return false; } - } - else if (name == BASELINE_BUCKET_MEAN_TAG) - { - if (!core::CPersistUtils::restore(BASELINE_BUCKET_MEAN_TAG, s_BaselineBucketMean, traverser)) - { + } else if (name == BASELINE_BUCKET_MEAN_TAG) { + if (!core::CPersistUtils::restore(BASELINE_BUCKET_MEAN_TAG, s_BaselineBucketMean, traverser)) { LOG_ERROR("Failed to restore " << traverser.name() << " / " << traverser.value()); return false; } } - } - while (traverser.next()); + } while (traverser.next()); return true; } -void SAttributeProbability::addDescriptiveData(annotated_probability::EDescriptiveData key, double value) -{ +void SAttributeProbability::addDescriptiveData(annotated_probability::EDescriptiveData key, double value) { s_DescriptiveData.emplace_back(key, value); } -SAnnotatedProbability::SAnnotatedProbability() : - s_Probability(1.0), - s_ResultType(model_t::CResultType::E_Final) -{} +SAnnotatedProbability::SAnnotatedProbability() : s_Probability(1.0), s_ResultType(model_t::CResultType::E_Final) { +} -SAnnotatedProbability::SAnnotatedProbability(double p) : - s_Probability(p), - s_ResultType(model_t::CResultType::E_Final) -{} +SAnnotatedProbability::SAnnotatedProbability(double p) : s_Probability(p), s_ResultType(model_t::CResultType::E_Final) { +} -void SAnnotatedProbability::addDescriptiveData(annotated_probability::EDescriptiveData key, double value) -{ +void SAnnotatedProbability::addDescriptiveData(annotated_probability::EDescriptiveData key, double value) { s_DescriptiveData.emplace_back(key, value); } -void SAnnotatedProbability::swap(SAnnotatedProbability &other) -{ +void SAnnotatedProbability::swap(SAnnotatedProbability& other) { std::swap(s_Probability, other.s_Probability); s_AttributeProbabilities.swap(other.s_AttributeProbabilities); s_Influences.swap(other.s_Influences); @@ -196,101 +155,72 @@ void SAnnotatedProbability::swap(SAnnotatedProbability &other) std::swap(s_BaselineBucketCount, other.s_BaselineBucketCount); } -void SAnnotatedProbability::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void SAnnotatedProbability::acceptPersistInserter(core::CStatePersistInserter& inserter) const { core::CPersistUtils::persist(PROBABILITY_TAG, s_Probability, inserter); core::CPersistUtils::persist(ATTRIBUTE_PROBABILITIES_TAG, s_AttributeProbabilities, inserter); - for (const auto &influence : s_Influences) - { + for (const auto& influence : s_Influences) { inserter.insertValue(INFLUENCE_NAME_TAG, *influence.first.first); inserter.insertValue(INFLUENCE_VALUE_TAG, *influence.first.second); inserter.insertValue(INFLUENCE_TAG, influence.second); } - if (s_CurrentBucketCount) - { + if (s_CurrentBucketCount) { core::CPersistUtils::persist(CURRENT_BUCKET_COUNT_TAG, *s_CurrentBucketCount, inserter); } - if (s_BaselineBucketCount) - { + if (s_BaselineBucketCount) { core::CPersistUtils::persist(BASELINE_BUCKET_COUNT_TAG, *s_BaselineBucketCount, inserter); } } -bool SAnnotatedProbability::isInterim() const -{ +bool SAnnotatedProbability::isInterim() const { return s_ResultType.isInterim(); } -bool SAnnotatedProbability::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name = traverser.name(); +bool SAnnotatedProbability::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); core::CStoredStringPtr influencerName; core::CStoredStringPtr influencerValue; double d; - if (name == PROBABILITY_TAG) - { - if (!core::CPersistUtils::restore(PROBABILITY_TAG, s_Probability, traverser)) - { + if (name == PROBABILITY_TAG) { + if (!core::CPersistUtils::restore(PROBABILITY_TAG, s_Probability, traverser)) { LOG_ERROR("Restore error for " << traverser.name() << " / " << traverser.value()); return false; } - } - else if (name == ATTRIBUTE_PROBABILITIES_TAG) - { - if (!core::CPersistUtils::restore(ATTRIBUTE_PROBABILITIES_TAG, - s_AttributeProbabilities, - traverser)) - { + } else if (name == ATTRIBUTE_PROBABILITIES_TAG) { + if (!core::CPersistUtils::restore(ATTRIBUTE_PROBABILITIES_TAG, s_AttributeProbabilities, traverser)) { LOG_ERROR("Restore error for " << traverser.name() << " / " << traverser.value()); return false; } - } - else if (name == INFLUENCE_NAME_TAG) - { + } else if (name == INFLUENCE_NAME_TAG) { influencerName = CStringStore::influencers().get(traverser.value()); - } - else if (name == INFLUENCE_VALUE_TAG) - { + } else if (name == INFLUENCE_VALUE_TAG) { influencerValue = CStringStore::influencers().get(traverser.value()); - } - else if (name == INFLUENCE_TAG) - { - if (!core::CStringUtils::stringToType(traverser.value(), d)) - { + } else if (name == INFLUENCE_TAG) { + if (!core::CStringUtils::stringToType(traverser.value(), d)) { LOG_ERROR("Restore error for " << traverser.name() << " / " << traverser.value()); return false; } s_Influences.emplace_back(TStoredStringPtrStoredStringPtrPr(influencerName, influencerValue), d); - } - else if (name == CURRENT_BUCKET_COUNT_TAG) - { + } else if (name == CURRENT_BUCKET_COUNT_TAG) { uint64_t i; - if (!core::CPersistUtils::restore(CURRENT_BUCKET_COUNT_TAG, i, traverser)) - { + if (!core::CPersistUtils::restore(CURRENT_BUCKET_COUNT_TAG, i, traverser)) { LOG_ERROR("Restore error for " << traverser.name() << " / " << traverser.value()); return false; } s_CurrentBucketCount.reset(i); - } - else if (name == BASELINE_BUCKET_COUNT_TAG) - { - if (!core::CPersistUtils::restore(BASELINE_BUCKET_COUNT_TAG, d, traverser)) - { + } else if (name == BASELINE_BUCKET_COUNT_TAG) { + if (!core::CPersistUtils::restore(BASELINE_BUCKET_COUNT_TAG, d, traverser)) { LOG_ERROR("Restore error for " << traverser.name() << " / " << traverser.value()); return false; } s_BaselineBucketCount.reset(d); } - } - while (traverser.next()); + } while (traverser.next()); return true; } - } } diff --git a/lib/model/CAnnotatedProbabilityBuilder.cc b/lib/model/CAnnotatedProbabilityBuilder.cc index e5921a75ab..44515ff1fc 100644 --- a/lib/model/CAnnotatedProbabilityBuilder.cc +++ b/lib/model/CAnnotatedProbabilityBuilder.cc @@ -11,99 +11,83 @@ #include -namespace ml -{ -namespace model -{ - -CAnnotatedProbabilityBuilder::CAnnotatedProbabilityBuilder(SAnnotatedProbability &annotatedProbability) - : m_Result(annotatedProbability), - m_NumberAttributeProbabilities(1), - m_NumberOfPeople(0), - m_AttributeProbabilityPrior(0), - m_PersonAttributeProbabilityPrior(0), - m_MinAttributeProbabilities(1), - m_DistinctTotalAttributes(0), - m_DistinctRareAttributes(0), - m_RareAttributes(0), - m_IsPopulation(false), - m_IsRare(false), - m_IsFreqRare(false) -{ +namespace ml { +namespace model { + +CAnnotatedProbabilityBuilder::CAnnotatedProbabilityBuilder(SAnnotatedProbability& annotatedProbability) + : m_Result(annotatedProbability), + m_NumberAttributeProbabilities(1), + m_NumberOfPeople(0), + m_AttributeProbabilityPrior(0), + m_PersonAttributeProbabilityPrior(0), + m_MinAttributeProbabilities(1), + m_DistinctTotalAttributes(0), + m_DistinctRareAttributes(0), + m_RareAttributes(0), + m_IsPopulation(false), + m_IsRare(false), + m_IsFreqRare(false) { m_Result.s_AttributeProbabilities.clear(); m_Result.s_Influences.clear(); } -CAnnotatedProbabilityBuilder::CAnnotatedProbabilityBuilder(SAnnotatedProbability &annotatedProbability, +CAnnotatedProbabilityBuilder::CAnnotatedProbabilityBuilder(SAnnotatedProbability& annotatedProbability, std::size_t numberAttributeProbabilities, function_t::EFunction function, std::size_t numberOfPeople) - : m_Result(annotatedProbability), - m_NumberAttributeProbabilities(numberAttributeProbabilities), - m_NumberOfPeople(numberOfPeople), - m_AttributeProbabilityPrior(0), - m_PersonAttributeProbabilityPrior(0), - m_MinAttributeProbabilities(numberAttributeProbabilities), - m_DistinctTotalAttributes(0), - m_DistinctRareAttributes(0), - m_RareAttributes(0), - m_IsPopulation(function_t::isPopulation(function)), - m_IsRare(false), - m_IsFreqRare(false) -{ + : m_Result(annotatedProbability), + m_NumberAttributeProbabilities(numberAttributeProbabilities), + m_NumberOfPeople(numberOfPeople), + m_AttributeProbabilityPrior(0), + m_PersonAttributeProbabilityPrior(0), + m_MinAttributeProbabilities(numberAttributeProbabilities), + m_DistinctTotalAttributes(0), + m_DistinctRareAttributes(0), + m_RareAttributes(0), + m_IsPopulation(function_t::isPopulation(function)), + m_IsRare(false), + m_IsFreqRare(false) { m_Result.s_AttributeProbabilities.clear(); m_Result.s_Influences.clear(); - if (function == function_t::E_IndividualRare || function == function_t::E_PopulationRare) - { + if (function == function_t::E_IndividualRare || function == function_t::E_PopulationRare) { m_IsRare = true; - } - else if (function == function_t::E_PopulationFreqRare) - { + } else if (function == function_t::E_PopulationFreqRare) { m_IsFreqRare = true; } } -void CAnnotatedProbabilityBuilder::personFrequency(double frequency, bool everSeenBefore) -{ - if (m_IsRare && m_IsPopulation == false) - { - if (everSeenBefore) - { +void CAnnotatedProbabilityBuilder::personFrequency(double frequency, bool everSeenBefore) { + if (m_IsRare && m_IsPopulation == false) { + if (everSeenBefore) { double period = (frequency == 0.0) ? 0.0 : 1 / frequency; m_Result.addDescriptiveData(annotated_probability::E_PERSON_PERIOD, period); - } - else - { + } else { m_Result.addDescriptiveData(annotated_probability::E_PERSON_NEVER_SEEN_BEFORE, 1.0); } } } -void CAnnotatedProbabilityBuilder::attributeProbabilityPrior(const maths::CMultinomialConjugate *prior) -{ +void CAnnotatedProbabilityBuilder::attributeProbabilityPrior(const maths::CMultinomialConjugate* prior) { m_AttributeProbabilityPrior = prior; } -void CAnnotatedProbabilityBuilder::personAttributeProbabilityPrior(const maths::CMultinomialConjugate *prior) -{ +void CAnnotatedProbabilityBuilder::personAttributeProbabilityPrior(const maths::CMultinomialConjugate* prior) { m_PersonAttributeProbabilityPrior = prior; } -void CAnnotatedProbabilityBuilder::probability(double p) -{ +void CAnnotatedProbabilityBuilder::probability(double p) { m_Result.s_Probability = p; } void CAnnotatedProbabilityBuilder::addAttributeProbability(std::size_t cid, - const core::CStoredStringPtr &attribute, + const core::CStoredStringPtr& attribute, double pAttribute, double pGivenAttribute_, model_t::CResultType type, model_t::EFeature feature, - const TStoredStringPtr1Vec &correlatedAttributes, - const TSizeDoublePr1Vec &correlated) -{ + const TStoredStringPtr1Vec& correlatedAttributes, + const TSizeDoublePr1Vec& correlated) { type.set(m_Result.s_ResultType.asInterimOrFinal()); SAttributeProbability pGivenAttribute(cid, attribute, pGivenAttribute_, type, feature, correlatedAttributes, correlated); this->addAttributeDescriptiveData(cid, pAttribute, pGivenAttribute); @@ -113,10 +97,8 @@ void CAnnotatedProbabilityBuilder::addAttributeProbability(std::size_t cid, void CAnnotatedProbabilityBuilder::addAttributeDescriptiveData(std::size_t cid, double pAttribute, - SAttributeProbability &attributeProbability) -{ - if (m_IsPopulation && (m_IsRare || m_IsFreqRare)) - { + SAttributeProbability& attributeProbability) { + if (m_IsPopulation && (m_IsRare || m_IsFreqRare)) { double concentration; m_AttributeProbabilityPrior->concentration(static_cast(cid), concentration); attributeProbability.addDescriptiveData(annotated_probability::E_ATTRIBUTE_CONCENTRATION, concentration); @@ -125,55 +107,41 @@ void CAnnotatedProbabilityBuilder::addAttributeDescriptiveData(std::size_t cid, m_PersonAttributeProbabilityPrior->concentration(static_cast(cid), activityConcentration); attributeProbability.addDescriptiveData(annotated_probability::E_ACTIVITY_CONCENTRATION, activityConcentration); - if (pAttribute < maths::LARGEST_SIGNIFICANT_PROBABILITY) - { + if (pAttribute < maths::LARGEST_SIGNIFICANT_PROBABILITY) { m_DistinctRareAttributes++; m_RareAttributes += activityConcentration; } } } -void CAnnotatedProbabilityBuilder::build() -{ +void CAnnotatedProbabilityBuilder::build() { this->addDescriptiveData(); - if (m_NumberAttributeProbabilities > 0 && m_MinAttributeProbabilities.count() > 0) - { + if (m_NumberAttributeProbabilities > 0 && m_MinAttributeProbabilities.count() > 0) { m_MinAttributeProbabilities.sort(); m_Result.s_AttributeProbabilities.reserve(m_MinAttributeProbabilities.count()); - double cutoff = std::max(1.1 * m_MinAttributeProbabilities[0].s_Probability, - maths::LARGEST_SIGNIFICANT_PROBABILITY); + double cutoff = std::max(1.1 * m_MinAttributeProbabilities[0].s_Probability, maths::LARGEST_SIGNIFICANT_PROBABILITY); - for (std::size_t i = 0u; - i < m_MinAttributeProbabilities.count() && m_MinAttributeProbabilities[i].s_Probability <= cutoff; - ++i) - { + for (std::size_t i = 0u; i < m_MinAttributeProbabilities.count() && m_MinAttributeProbabilities[i].s_Probability <= cutoff; ++i) { m_Result.s_AttributeProbabilities.push_back(m_MinAttributeProbabilities[i]); } } } -void CAnnotatedProbabilityBuilder::addDescriptiveData() -{ - if (m_IsPopulation && (m_IsRare || m_IsFreqRare)) - { - m_Result.addDescriptiveData(annotated_probability::E_PERSON_COUNT, - static_cast(m_NumberOfPeople)); - if (m_IsRare) - { +void CAnnotatedProbabilityBuilder::addDescriptiveData() { + if (m_IsPopulation && (m_IsRare || m_IsFreqRare)) { + m_Result.addDescriptiveData(annotated_probability::E_PERSON_COUNT, static_cast(m_NumberOfPeople)); + if (m_IsRare) { m_Result.addDescriptiveData(annotated_probability::E_DISTINCT_RARE_ATTRIBUTES_COUNT, static_cast(m_DistinctRareAttributes)); m_Result.addDescriptiveData(annotated_probability::E_DISTINCT_TOTAL_ATTRIBUTES_COUNT, static_cast(m_DistinctTotalAttributes)); - } - else if (m_IsFreqRare) - { + } else if (m_IsFreqRare) { double totalConcentration = m_PersonAttributeProbabilityPrior->totalConcentration(); m_Result.addDescriptiveData(annotated_probability::E_RARE_ATTRIBUTES_COUNT, m_RareAttributes); m_Result.addDescriptiveData(annotated_probability::E_TOTAL_ATTRIBUTES_COUNT, totalConcentration); } } } - } } diff --git a/lib/model/CAnomalyDetector.cc b/lib/model/CAnomalyDetector.cc index ce1edd3258..02cab913d3 100644 --- a/lib/model/CAnomalyDetector.cc +++ b/lib/model/CAnomalyDetector.cc @@ -31,15 +31,11 @@ #include #include - -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { // We use short field names to reduce the state size -namespace -{ +namespace { using TModelDetailsViewPtr = CAnomalyDetectorModel::CModelDetailsViewPtr; @@ -63,21 +59,17 @@ const std::string DATA_GATHERER_TAG("a"); const std::string MODELS_TAG("b"); const std::string MODEL_TAG("d"); -CAnomalyDetector::TDataGathererPtr makeDataGatherer(const CAnomalyDetector::TModelFactoryCPtr &factory, - core_t::TTime startTime, - const std::string &partitionFieldValue) -{ +CAnomalyDetector::TDataGathererPtr +makeDataGatherer(const CAnomalyDetector::TModelFactoryCPtr& factory, core_t::TTime startTime, const std::string& partitionFieldValue) { CModelFactory::SGathererInitializationData initData(startTime, partitionFieldValue); return CAnomalyDetector::TDataGathererPtr(factory->makeDataGatherer(initData)); } -CAnomalyDetector::TModelPtr makeModel(const CAnomalyDetector::TModelFactoryCPtr &factory, - const CAnomalyDetector::TDataGathererPtr &dataGatherer) -{ +CAnomalyDetector::TModelPtr makeModel(const CAnomalyDetector::TModelFactoryCPtr& factory, + const CAnomalyDetector::TDataGathererPtr& dataGatherer) { CModelFactory::SModelInitializationData initData(dataGatherer); return CAnomalyDetector::TModelPtr(factory->makeModel(initData)); } - } // Increment this every time a change to the state is made that requires @@ -98,41 +90,33 @@ const std::string CAnomalyDetector::SUM_NAME("sum"); const std::string CAnomalyDetector::LAT_LONG_NAME("lat_long"); const std::string CAnomalyDetector::EMPTY_STRING; - CAnomalyDetector::CAnomalyDetector(int detectorIndex, - CLimits &limits, - const CAnomalyDetectorModelConfig &modelConfig, - const std::string &partitionFieldValue, + CLimits& limits, + const CAnomalyDetectorModelConfig& modelConfig, + const std::string& partitionFieldValue, core_t::TTime firstTime, - const TModelFactoryCPtr &modelFactory) + const TModelFactoryCPtr& modelFactory) : m_Limits(limits), m_DetectorIndex(detectorIndex), m_ModelConfig(modelConfig), - m_LastBucketEndTime(maths::CIntegerTools::ceil(firstTime, - modelConfig.bucketLength())), + m_LastBucketEndTime(maths::CIntegerTools::ceil(firstTime, modelConfig.bucketLength())), m_DataGatherer(makeDataGatherer(modelFactory, m_LastBucketEndTime, partitionFieldValue)), m_ModelFactory(modelFactory), m_Model(makeModel(modelFactory, m_DataGatherer)), - m_IsForPersistence(false) -{ - if (m_DataGatherer == 0) - { + m_IsForPersistence(false) { + if (m_DataGatherer == 0) { LOG_ABORT("Failed to construct data gatherer for detector: " << this->description()); } - if (m_Model == 0) - { + if (m_Model == 0) { LOG_ABORT("Failed to construct model for detector: " << this->description()); } limits.resourceMonitor().registerComponent(*this); - LOG_DEBUG("CAnomalyDetector(): " << this->description() - << " for '" << m_DataGatherer->partitionFieldValue() << "'" - << ", first time = " << firstTime - << ", bucketLength = " << modelConfig.bucketLength() - << ", m_LastBucketEndTime = " << m_LastBucketEndTime); + LOG_DEBUG("CAnomalyDetector(): " << this->description() << " for '" << m_DataGatherer->partitionFieldValue() << "'" + << ", first time = " << firstTime << ", bucketLength = " << modelConfig.bucketLength() + << ", m_LastBucketEndTime = " << m_LastBucketEndTime); } -CAnomalyDetector::CAnomalyDetector(bool isForPersistence, - const CAnomalyDetector &other) +CAnomalyDetector::CAnomalyDetector(bool isForPersistence, const CAnomalyDetector& other) : m_Limits(other.m_Limits), m_DetectorIndex(other.m_DetectorIndex), m_ModelConfig(other.m_ModelConfig), @@ -143,64 +127,50 @@ CAnomalyDetector::CAnomalyDetector(bool isForPersistence, m_ModelFactory(other.m_ModelFactory), // Shallow copy of model factory is OK m_Model(other.m_Model->cloneForPersistence()), // Empty message propagation function is fine in this case - m_IsForPersistence(isForPersistence) -{ - if (!isForPersistence) - { + m_IsForPersistence(isForPersistence) { + if (!isForPersistence) { LOG_ABORT("This constructor only creates clones for persistence"); } } -CAnomalyDetector::~CAnomalyDetector() -{ - if (!m_IsForPersistence) - { +CAnomalyDetector::~CAnomalyDetector() { + if (!m_IsForPersistence) { m_Limits.resourceMonitor().unRegisterComponent(*this); } } -size_t CAnomalyDetector::numberActivePeople() const -{ +size_t CAnomalyDetector::numberActivePeople() const { return m_DataGatherer->numberActivePeople(); } -size_t CAnomalyDetector::numberActiveAttributes() const -{ +size_t CAnomalyDetector::numberActiveAttributes() const { return m_DataGatherer->numberActiveAttributes(); } -size_t CAnomalyDetector::maxDimension() const -{ +size_t CAnomalyDetector::maxDimension() const { return m_DataGatherer->maxDimension(); } -void CAnomalyDetector::zeroModelsToTime(core_t::TTime time) -{ +void CAnomalyDetector::zeroModelsToTime(core_t::TTime time) { // If there has been a big gap in the times, we might need to sample // many buckets; if there has been no gap, the loop may legitimately // have no iterations. core_t::TTime bucketLength = m_ModelConfig.bucketLength(); - while (time >= (m_LastBucketEndTime + bucketLength)) - { + while (time >= (m_LastBucketEndTime + bucketLength)) { core_t::TTime bucketStartTime = m_LastBucketEndTime; m_LastBucketEndTime += bucketLength; - LOG_TRACE("sample: m_DetectorKey = '" << this->description() - << "', bucketStartTime = " << bucketStartTime - << ", m_LastBucketEndTime = " << m_LastBucketEndTime); + LOG_TRACE("sample: m_DetectorKey = '" << this->description() << "', bucketStartTime = " << bucketStartTime + << ", m_LastBucketEndTime = " << m_LastBucketEndTime); // Update the statistical models. - m_Model->sample(bucketStartTime, - m_LastBucketEndTime, - m_Limits.resourceMonitor()); + m_Model->sample(bucketStartTime, m_LastBucketEndTime, m_Limits.resourceMonitor()); } } -bool CAnomalyDetector::acceptRestoreTraverser(const std::string &partitionFieldValue, - core::CStateRestoreTraverser &traverser) -{ +bool CAnomalyDetector::acceptRestoreTraverser(const std::string& partitionFieldValue, core::CStateRestoreTraverser& traverser) { // As the model pointer will change during restore, we unregister // the detector from the resource monitor. We can register it // again at the end of restore. @@ -212,233 +182,160 @@ bool CAnomalyDetector::acceptRestoreTraverser(const std::string &partitionFieldV // We expect tags immediately below the root storing the first time the // models were created and the models IN THAT ORDER. - do - { - const std::string &name = traverser.name(); - if (name == MODEL_AND_GATHERER_TAG) - { - if (traverser.traverseSubLevel(boost::bind(&CAnomalyDetector::legacyModelEnsembleAcceptRestoreTraverser, - this, - boost::cref(partitionFieldValue), - _1)) == false) - { + do { + const std::string& name = traverser.name(); + if (name == MODEL_AND_GATHERER_TAG) { + if (traverser.traverseSubLevel(boost::bind( + &CAnomalyDetector::legacyModelEnsembleAcceptRestoreTraverser, this, boost::cref(partitionFieldValue), _1)) == false) { LOG_ERROR("Invalid model ensemble section in " << traverser.value()); return false; } - } - else if (name == SIMPLE_COUNT_STATICS) - { - if (traverser.traverseSubLevel(boost::bind(&CAnomalyDetector::staticsAcceptRestoreTraverser, - this, - _1)) == false) - { + } else if (name == SIMPLE_COUNT_STATICS) { + if (traverser.traverseSubLevel(boost::bind(&CAnomalyDetector::staticsAcceptRestoreTraverser, this, _1)) == false) { LOG_ERROR("Invalid simple count statics in " << traverser.value()); return false; } } - } - while (traverser.next()); + } while (traverser.next()); m_Limits.resourceMonitor().registerComponent(*this); return true; } -bool CAnomalyDetector::legacyModelEnsembleAcceptRestoreTraverser(const std::string &partitionFieldValue, - core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name = traverser.name(); - if (name == DATA_GATHERER_TAG) - { +bool CAnomalyDetector::legacyModelEnsembleAcceptRestoreTraverser(const std::string& partitionFieldValue, + core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); + if (name == DATA_GATHERER_TAG) { m_DataGatherer.reset(m_ModelFactory->makeDataGatherer(partitionFieldValue, traverser)); - if (!m_DataGatherer) - { + if (!m_DataGatherer) { LOG_ERROR("Failed to restore the data gatherer from " << traverser.value()); return false; } - } - else if (name == MODELS_TAG) - { - if (traverser.traverseSubLevel(boost::bind(&CAnomalyDetector::legacyModelsAcceptRestoreTraverser, - this, - _1)) == false) - { + } else if (name == MODELS_TAG) { + if (traverser.traverseSubLevel(boost::bind(&CAnomalyDetector::legacyModelsAcceptRestoreTraverser, this, _1)) == false) { LOG_ERROR("Failed to restore live models from " << traverser.value()); return false; } } - } - while (traverser.next()); + } while (traverser.next()); return true; } -bool CAnomalyDetector::legacyModelsAcceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name = traverser.name(); - if (name == MODEL_TAG) - { +bool CAnomalyDetector::legacyModelsAcceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); + if (name == MODEL_TAG) { CModelFactory::SModelInitializationData initData(m_DataGatherer); m_Model.reset(m_ModelFactory->makeModel(initData, traverser)); - if (!m_Model) - { + if (!m_Model) { LOG_ERROR("Failed to extract model from " << traverser.value()); return false; } } - } - while (traverser.next()); + } while (traverser.next()); return true; } -bool CAnomalyDetector::staticsAcceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name = traverser.name(); - if (name == RANDOMIZED_PERIODIC_TAG) - { - if (traverser.traverseSubLevel( - &maths::CRandomizedPeriodicityTest::staticsAcceptRestoreTraverser) == false) - { +bool CAnomalyDetector::staticsAcceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); + if (name == RANDOMIZED_PERIODIC_TAG) { + if (traverser.traverseSubLevel(&maths::CRandomizedPeriodicityTest::staticsAcceptRestoreTraverser) == false) { LOG_ERROR("Failed to restore randomized periodic test state"); return false; } - } - else if (name == STATISTICS_TAG) - { - if (traverser.traverseSubLevel( - &core::CStatistics::staticsAcceptRestoreTraverser) == false) - { + } else if (name == STATISTICS_TAG) { + if (traverser.traverseSubLevel(&core::CStatistics::staticsAcceptRestoreTraverser) == false) { LOG_ERROR("Failed to restore statistics"); return false; } - } - else if (name == SAMPLING_TAG) - { - if (traverser.traverseSubLevel( - &maths::CSampling::staticsAcceptRestoreTraverser) == false) - { + } else if (name == SAMPLING_TAG) { + if (traverser.traverseSubLevel(&maths::CSampling::staticsAcceptRestoreTraverser) == false) { LOG_ERROR("Failed to restore sampling state"); return false; } } - } - while (traverser.next()); + } while (traverser.next()); return true; } - -bool CAnomalyDetector::partitionFieldAcceptRestoreTraverser(core::CStateRestoreTraverser &traverser, - std::string &partitionFieldValue) -{ - do - { - const std::string &name = traverser.name(); - if (name == PARTITION_FIELD_VALUE_TAG) - { +bool CAnomalyDetector::partitionFieldAcceptRestoreTraverser(core::CStateRestoreTraverser& traverser, std::string& partitionFieldValue) { + do { + const std::string& name = traverser.name(); + if (name == PARTITION_FIELD_VALUE_TAG) { partitionFieldValue = traverser.value(); return true; } - } - while (traverser.next()); + } while (traverser.next()); return false; } -bool CAnomalyDetector::keyAcceptRestoreTraverser(core::CStateRestoreTraverser &traverser, - CSearchKey &key) -{ - do - { - const std::string &name = traverser.name(); - if (name == KEY_TAG) - { +bool CAnomalyDetector::keyAcceptRestoreTraverser(core::CStateRestoreTraverser& traverser, CSearchKey& key) { + do { + const std::string& name = traverser.name(); + if (name == KEY_TAG) { bool successful(true); key = CSearchKey(traverser, successful); - if (successful == false) - { + if (successful == false) { LOG_ERROR("Invalid key in " << traverser.value()); return false; } return true; } - } - while (traverser.next()); + } while (traverser.next()); return false; } -void CAnomalyDetector::keyAcceptPersistInserter(core::CStatePersistInserter &inserter) const -{ - inserter.insertLevel(KEY_TAG, - boost::bind(&CSearchKey::acceptPersistInserter, - &m_DataGatherer->searchKey(), - _1)); +void CAnomalyDetector::keyAcceptPersistInserter(core::CStatePersistInserter& inserter) const { + inserter.insertLevel(KEY_TAG, boost::bind(&CSearchKey::acceptPersistInserter, &m_DataGatherer->searchKey(), _1)); } -void CAnomalyDetector::partitionFieldAcceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CAnomalyDetector::partitionFieldAcceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(PARTITION_FIELD_VALUE_TAG, m_DataGatherer->partitionFieldValue()); } -void CAnomalyDetector::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CAnomalyDetector::acceptPersistInserter(core::CStatePersistInserter& inserter) const { // Persist static members only once within the simple count detector // and do this first so that other model components can use // static strings - if (this->isSimpleCount()) - { - inserter.insertLevel(SIMPLE_COUNT_STATICS, - boost::bind(&CAnomalyDetector::staticsAcceptPersistInserter, - this, - _1)); + if (this->isSimpleCount()) { + inserter.insertLevel(SIMPLE_COUNT_STATICS, boost::bind(&CAnomalyDetector::staticsAcceptPersistInserter, this, _1)); } // Persist what used to belong in model ensemble at a separate level to ensure BWC - inserter.insertLevel(MODEL_AND_GATHERER_TAG, - boost::bind(&CAnomalyDetector::legacyModelEnsembleAcceptPersistInserter, - this, - _1)); + inserter.insertLevel(MODEL_AND_GATHERER_TAG, boost::bind(&CAnomalyDetector::legacyModelEnsembleAcceptPersistInserter, this, _1)); } -void CAnomalyDetector::staticsAcceptPersistInserter(core::CStatePersistInserter &inserter) const -{ - inserter.insertLevel(RANDOMIZED_PERIODIC_TAG, - &maths::CRandomizedPeriodicityTest::staticsAcceptPersistInserter); +void CAnomalyDetector::staticsAcceptPersistInserter(core::CStatePersistInserter& inserter) const { + inserter.insertLevel(RANDOMIZED_PERIODIC_TAG, &maths::CRandomizedPeriodicityTest::staticsAcceptPersistInserter); inserter.insertLevel(STATISTICS_TAG, &core::CStatistics::staticsAcceptPersistInserter); inserter.insertLevel(SAMPLING_TAG, &maths::CSampling::staticsAcceptPersistInserter); } -void CAnomalyDetector::legacyModelEnsembleAcceptPersistInserter(core::CStatePersistInserter &inserter) const -{ - inserter.insertLevel(DATA_GATHERER_TAG, boost::bind(&CDataGatherer::acceptPersistInserter, - boost::cref(*m_DataGatherer), _1)); +void CAnomalyDetector::legacyModelEnsembleAcceptPersistInserter(core::CStatePersistInserter& inserter) const { + inserter.insertLevel(DATA_GATHERER_TAG, boost::bind(&CDataGatherer::acceptPersistInserter, boost::cref(*m_DataGatherer), _1)); // This level seems redundant but it is simulating state as it was when CModelEnsemble // was around. inserter.insertLevel(MODELS_TAG, boost::bind(&CAnomalyDetector::legacyModelsAcceptPersistInserter, this, _1)); } -void CAnomalyDetector::legacyModelsAcceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CAnomalyDetector::legacyModelsAcceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertLevel(MODEL_TAG, boost::bind(&CAnomalyDetectorModel::acceptPersistInserter, m_Model.get(), _1)); } -const CAnomalyDetector::TStrVec &CAnomalyDetector::fieldsOfInterest() const -{ +const CAnomalyDetector::TStrVec& CAnomalyDetector::fieldsOfInterest() const { return m_DataGatherer->fieldsOfInterest(); } -void CAnomalyDetector::addRecord(core_t::TTime time, - const TStrCPtrVec &fieldValues) -{ - const TStrCPtrVec &processedFieldValues = this->preprocessFieldValues(fieldValues); +void CAnomalyDetector::addRecord(core_t::TTime time, const TStrCPtrVec& fieldValues) { + const TStrCPtrVec& processedFieldValues = this->preprocessFieldValues(fieldValues); CEventData eventData; eventData.time(time); @@ -446,25 +343,18 @@ void CAnomalyDetector::addRecord(core_t::TTime time, m_DataGatherer->addArrival(processedFieldValues, eventData, m_Limits.resourceMonitor()); } -const CAnomalyDetector::TStrCPtrVec & -CAnomalyDetector::preprocessFieldValues(const TStrCPtrVec &fieldValues) -{ +const CAnomalyDetector::TStrCPtrVec& CAnomalyDetector::preprocessFieldValues(const TStrCPtrVec& fieldValues) { return fieldValues; } -void CAnomalyDetector::buildResults(core_t::TTime bucketStartTime, - core_t::TTime bucketEndTime, - CHierarchicalResults &results) -{ +void CAnomalyDetector::buildResults(core_t::TTime bucketStartTime, core_t::TTime bucketEndTime, CHierarchicalResults& results) { core_t::TTime bucketLength = m_ModelConfig.bucketLength(); - if (m_ModelConfig.bucketResultsDelay()) - { + if (m_ModelConfig.bucketResultsDelay()) { bucketLength /= 2; } bucketStartTime = maths::CIntegerTools::floor(bucketStartTime, bucketLength); bucketEndTime = maths::CIntegerTools::floor(bucketEndTime, bucketLength); - if (bucketEndTime <= m_LastBucketEndTime) - { + if (bucketEndTime <= m_LastBucketEndTime) { return; } @@ -472,23 +362,13 @@ void CAnomalyDetector::buildResults(core_t::TTime bucketStartTime, this->buildResultsHelper(bucketStartTime, bucketEndTime, - boost::bind(&CAnomalyDetector::sample, - this, - _1, - _2, - boost::ref(m_Limits.resourceMonitor())), - boost::bind(&CAnomalyDetector::updateLastSampledBucket, - this, - _1), + boost::bind(&CAnomalyDetector::sample, this, _1, _2, boost::ref(m_Limits.resourceMonitor())), + boost::bind(&CAnomalyDetector::updateLastSampledBucket, this, _1), results); } -void CAnomalyDetector::sample(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor &resourceMonitor) -{ - if (endTime <= startTime) - { +void CAnomalyDetector::sample(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) { + if (endTime <= startTime) { // Nothing to sample. return; } @@ -496,54 +376,39 @@ void CAnomalyDetector::sample(core_t::TTime startTime, core_t::TTime bucketLength = m_ModelConfig.bucketLength(); bool isEndOfBucketSample = endTime % bucketLength == 0; - if (isEndOfBucketSample) - { + if (isEndOfBucketSample) { LOG_TRACE("Going to do end-of-bucket sample"); - } - else - { + } else { LOG_TRACE("Going to do out-of-phase sampleBucketStatistics"); } - for (core_t::TTime time = startTime; time < endTime; time += bucketLength) - { - if (isEndOfBucketSample) - { + for (core_t::TTime time = startTime; time < endTime; time += bucketLength) { + if (isEndOfBucketSample) { m_Model->sample(time, time + bucketLength, resourceMonitor); - } - else - { + } else { m_Model->sampleBucketStatistics(time, time + bucketLength, resourceMonitor); } } - if ((endTime / bucketLength) % 10 == 0) - { + if ((endTime / bucketLength) % 10 == 0) { // Even if memory limiting is disabled, force a refresh every 10 buckets // so the user has some idea what's going on with memory. (Note: the // 10 bucket interval is inexact as sampling may not take place for // every bucket. However, it's probably good enough.) resourceMonitor.forceRefresh(*this); - } - else - { + } else { resourceMonitor.refresh(*this); } } -void CAnomalyDetector::sampleBucketStatistics(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor &resourceMonitor) -{ - if (endTime <= startTime) - { +void CAnomalyDetector::sampleBucketStatistics(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) { + if (endTime <= startTime) { // Nothing to sample. return; } core_t::TTime bucketLength = m_ModelConfig.bucketLength(); - for (core_t::TTime time = startTime; time < endTime; time += bucketLength) - { + for (core_t::TTime time = startTime; time < endTime; time += bucketLength) { m_Model->sampleBucketStatistics(time, time + bucketLength, resourceMonitor); } resourceMonitor.refresh(*this); @@ -552,78 +417,64 @@ void CAnomalyDetector::sampleBucketStatistics(core_t::TTime startTime, void CAnomalyDetector::generateModelPlot(core_t::TTime bucketStartTime, core_t::TTime bucketEndTime, double boundsPercentile, - const TStrSet &terms, - TModelPlotDataVec &modelPlots) const -{ + const TStrSet& terms, + TModelPlotDataVec& modelPlots) const { if (bucketEndTime <= bucketStartTime) { return; } - if ( terms.empty() - || m_DataGatherer->partitionFieldValue().empty() - || terms.find(m_DataGatherer->partitionFieldValue()) != terms.end()) - { - const CSearchKey &key = m_DataGatherer->searchKey(); + if (terms.empty() || m_DataGatherer->partitionFieldValue().empty() || + terms.find(m_DataGatherer->partitionFieldValue()) != terms.end()) { + const CSearchKey& key = m_DataGatherer->searchKey(); TModelDetailsViewPtr view = m_Model.get()->details(); - if (view.get()) - { + if (view.get()) { core_t::TTime bucketLength = m_ModelConfig.bucketLength(); - for (core_t::TTime time = bucketStartTime; time < bucketEndTime; time += bucketLength) - { + for (core_t::TTime time = bucketStartTime; time < bucketEndTime; time += bucketLength) { modelPlots.emplace_back(time, - key.partitionFieldName(), - m_DataGatherer->partitionFieldValue(), - key.overFieldName(), - key.byFieldName(), - bucketLength, - m_DetectorIndex); + key.partitionFieldName(), + m_DataGatherer->partitionFieldValue(), + key.overFieldName(), + key.byFieldName(), + bucketLength, + m_DetectorIndex); view->modelPlot(time, boundsPercentile, terms, modelPlots.back()); } } } } -CForecastDataSink::SForecastModelPrerequisites CAnomalyDetector::getForecastPrerequisites() const -{ +CForecastDataSink::SForecastModelPrerequisites CAnomalyDetector::getForecastPrerequisites() const { CForecastDataSink::SForecastModelPrerequisites prerequisites{0, 0, 0, true, false}; TModelDetailsViewPtr view = m_Model->details(); // The view can be empty, e.g. for the counting model. - if (view.get() == nullptr) - { + if (view.get() == nullptr) { return prerequisites; } prerequisites.s_IsPopulation = m_DataGatherer->isPopulation(); - if (prerequisites.s_IsPopulation) - { + if (prerequisites.s_IsPopulation) { return prerequisites; } - const CSearchKey &key = m_DataGatherer->searchKey(); + const CSearchKey& key = m_DataGatherer->searchKey(); prerequisites.s_IsSupportedFunction = function_t::isForecastSupported(key.function()); - if (prerequisites.s_IsSupportedFunction == false) - { + if (prerequisites.s_IsSupportedFunction == false) { return prerequisites; } - for (std::size_t pid = 0u, maxPid = m_DataGatherer->numberPeople(); pid < maxPid; ++pid) - { + for (std::size_t pid = 0u, maxPid = m_DataGatherer->numberPeople(); pid < maxPid; ++pid) { // todo: Add terms filtering here - if (m_DataGatherer->isPersonActive(pid)) - { - for (auto feature : view->features()) - { - const maths::CModel *model = view->model(feature, pid); + if (m_DataGatherer->isPersonActive(pid)) { + for (auto feature : view->features()) { + const maths::CModel* model = view->model(feature, pid); // The model might not exist, e.g. for categorical features. - if (model != nullptr) - { + if (model != nullptr) { ++prerequisites.s_NumberOfModels; - if (model->isForecastPossible()) - { + if (model->isForecastPossible()) { ++prerequisites.s_NumberOfForecastableModels; } prerequisites.s_MemoryUsageForDetector += model->memoryUsage(); @@ -635,43 +486,34 @@ CForecastDataSink::SForecastModelPrerequisites CAnomalyDetector::getForecastPrer return prerequisites; } -CForecastDataSink::SForecastResultSeries CAnomalyDetector::getForecastModels() const -{ +CForecastDataSink::SForecastResultSeries CAnomalyDetector::getForecastModels() const { CForecastDataSink::SForecastResultSeries series; - if (m_DataGatherer->isPopulation()) - { + if (m_DataGatherer->isPopulation()) { return series; } TModelDetailsViewPtr view = m_Model.get()->details(); // The view can be empty, e.g. for the counting model. - if (view.get() == nullptr) - { + if (view.get() == nullptr) { return series; } - const CSearchKey &key = m_DataGatherer->searchKey(); + const CSearchKey& key = m_DataGatherer->searchKey(); series.s_ByFieldName = key.byFieldName(); series.s_DetectorIndex = m_DetectorIndex; series.s_PartitionFieldName = key.partitionFieldName(); series.s_PartitionFieldValue = m_DataGatherer->partitionFieldValue(); - for (std::size_t pid = 0u, maxPid = m_DataGatherer->numberPeople(); pid < maxPid; ++pid) - { + for (std::size_t pid = 0u, maxPid = m_DataGatherer->numberPeople(); pid < maxPid; ++pid) { // todo: Add terms filtering here - if (m_DataGatherer->isPersonActive(pid)) - { - for (auto feature : view->features()) - { - const maths::CModel *model = view->model(feature, pid); - if (model != nullptr && model->isForecastPossible()) - { + if (m_DataGatherer->isPersonActive(pid)) { + for (auto feature : view->features()) { + const maths::CModel* model = view->model(feature, pid); + if (model != nullptr && model->isForecastPossible()) { series.s_ToForecast.emplace_back( - feature, - CForecastDataSink::TMathsModelPtr(model->cloneForForecast()), - m_DataGatherer->personName(pid)); + feature, CForecastDataSink::TMathsModelPtr(model->cloneForForecast()), m_DataGatherer->personName(pid)); } } } @@ -680,117 +522,88 @@ CForecastDataSink::SForecastResultSeries CAnomalyDetector::getForecastModels() c return series; } -void CAnomalyDetector::buildInterimResults(core_t::TTime bucketStartTime, - core_t::TTime bucketEndTime, - CHierarchicalResults &results) -{ +void CAnomalyDetector::buildInterimResults(core_t::TTime bucketStartTime, core_t::TTime bucketEndTime, CHierarchicalResults& results) { this->buildResultsHelper(bucketStartTime, bucketEndTime, - boost::bind(&CAnomalyDetector::sampleBucketStatistics, - this, - _1, - _2, - boost::ref(m_Limits.resourceMonitor())), - boost::bind(&CAnomalyDetector::noUpdateLastSampledBucket, - this, - _1), + boost::bind(&CAnomalyDetector::sampleBucketStatistics, this, _1, _2, boost::ref(m_Limits.resourceMonitor())), + boost::bind(&CAnomalyDetector::noUpdateLastSampledBucket, this, _1), results); } -void CAnomalyDetector::pruneModels() -{ +void CAnomalyDetector::pruneModels() { // Purge out any ancient models which are effectively dead. m_Model->prune(m_Model->defaultPruneWindow()); } -void CAnomalyDetector::resetBucket(core_t::TTime bucketStart) -{ +void CAnomalyDetector::resetBucket(core_t::TTime bucketStart) { m_DataGatherer->resetBucket(bucketStart); } -void CAnomalyDetector::releaseMemory(core_t::TTime samplingCutoffTime) -{ +void CAnomalyDetector::releaseMemory(core_t::TTime samplingCutoffTime) { m_DataGatherer->releaseMemory(samplingCutoffTime); } -void CAnomalyDetector::showMemoryUsage(std::ostream &stream) const -{ +void CAnomalyDetector::showMemoryUsage(std::ostream& stream) const { core::CMemoryUsage mem; this->debugMemoryUsage(mem.addChild()); mem.compress(); mem.print(stream); - if (mem.usage() != this->memoryUsage()) - { - LOG_ERROR("Discrepancy in memory report: " << mem.usage() - << " from debug, but " << this->memoryUsage() - << " from normal"); + if (mem.usage() != this->memoryUsage()) { + LOG_ERROR("Discrepancy in memory report: " << mem.usage() << " from debug, but " << this->memoryUsage() << " from normal"); } } -void CAnomalyDetector::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CAnomalyDetector::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("Anomaly Detector Memory Usage"); core::CMemoryDebug::dynamicSize("m_Model", m_Model, mem); } -std::size_t CAnomalyDetector::memoryUsage() const -{ +std::size_t CAnomalyDetector::memoryUsage() const { // We only account for the model in CResourceMonitor, // so we just include that here. std::size_t mem = core::CMemory::dynamicSize(m_Model); return mem; } -const core_t::TTime &CAnomalyDetector::lastBucketEndTime() const -{ +const core_t::TTime& CAnomalyDetector::lastBucketEndTime() const { return m_LastBucketEndTime; } -core_t::TTime &CAnomalyDetector::lastBucketEndTime() -{ +core_t::TTime& CAnomalyDetector::lastBucketEndTime() { return m_LastBucketEndTime; } -core_t::TTime CAnomalyDetector::modelBucketLength() const -{ +core_t::TTime CAnomalyDetector::modelBucketLength() const { return m_ModelConfig.bucketLength(); } -std::string CAnomalyDetector::description() const -{ +std::string CAnomalyDetector::description() const { auto beginInfluencers = m_DataGatherer->beginInfluencers(); auto endInfluencers = m_DataGatherer->endInfluencers(); - return m_DataGatherer->description() - + (m_DataGatherer->partitionFieldValue().empty() ? "" : "/") - + m_DataGatherer->partitionFieldValue() + - (beginInfluencers != endInfluencers ? (" " + - core::CContainerPrinter::print(beginInfluencers, endInfluencers)) : ""); + return m_DataGatherer->description() + (m_DataGatherer->partitionFieldValue().empty() ? "" : "/") + + m_DataGatherer->partitionFieldValue() + + (beginInfluencers != endInfluencers ? (" " + core::CContainerPrinter::print(beginInfluencers, endInfluencers)) : ""); } -void CAnomalyDetector::timeNow(core_t::TTime time) -{ +void CAnomalyDetector::timeNow(core_t::TTime time) { m_DataGatherer->timeNow(time); } -void CAnomalyDetector::skipSampling(core_t::TTime endTime) -{ +void CAnomalyDetector::skipSampling(core_t::TTime endTime) { m_Model->skipSampling(endTime); m_LastBucketEndTime = endTime; } -template +template void CAnomalyDetector::buildResultsHelper(core_t::TTime bucketStartTime, core_t::TTime bucketEndTime, SAMPLE_FUNC sampleFunc, LAST_SAMPLED_BUCKET_UPDATE_FUNC lastSampledBucketUpdateFunc, - CHierarchicalResults &results) -{ + CHierarchicalResults& results) { core_t::TTime bucketLength = m_ModelConfig.bucketLength(); - LOG_TRACE("sample: m_DetectorKey = '" << this->description() - << "', bucketStartTime = " << bucketStartTime - << ", bucketEndTime = " << bucketEndTime); + LOG_TRACE("sample: m_DetectorKey = '" << this->description() << "', bucketStartTime = " << bucketStartTime + << ", bucketEndTime = " << bucketEndTime); // Update the statistical models. sampleFunc(bucketStartTime, bucketEndTime); @@ -804,64 +617,49 @@ void CAnomalyDetector::buildResultsHelper(core_t::TTime bucketStartTime, bucketStartTime, bucketEndTime, 10, // TODO max number of attributes - results)) - { - if (bucketEndTime % bucketLength == 0) - { + results)) { + if (bucketEndTime % bucketLength == 0) { lastSampledBucketUpdateFunc(bucketEndTime); } } } -void CAnomalyDetector::updateLastSampledBucket(core_t::TTime bucketEndTime) -{ +void CAnomalyDetector::updateLastSampledBucket(core_t::TTime bucketEndTime) { m_LastBucketEndTime = std::max(m_LastBucketEndTime, bucketEndTime); } -void CAnomalyDetector::noUpdateLastSampledBucket(core_t::TTime /*bucketEndTime*/) const -{ +void CAnomalyDetector::noUpdateLastSampledBucket(core_t::TTime /*bucketEndTime*/) const { // Do nothing } -std::string CAnomalyDetector::toCue() const -{ - return m_DataGatherer->searchKey().toCue() + m_DataGatherer->searchKey().CUE_DELIMITER - + m_DataGatherer->partitionFieldValue(); +std::string CAnomalyDetector::toCue() const { + return m_DataGatherer->searchKey().toCue() + m_DataGatherer->searchKey().CUE_DELIMITER + m_DataGatherer->partitionFieldValue(); } -std::string CAnomalyDetector::debug() const -{ +std::string CAnomalyDetector::debug() const { return m_DataGatherer->searchKey().debug() + '/' + m_DataGatherer->partitionFieldValue(); } -bool CAnomalyDetector::isSimpleCount() const -{ +bool CAnomalyDetector::isSimpleCount() const { return false; } -void CAnomalyDetector::initSimpleCounting() -{ +void CAnomalyDetector::initSimpleCounting() { bool addedPerson = false; m_DataGatherer->addPerson(COUNT_NAME, m_Limits.resourceMonitor(), addedPerson); } -const CAnomalyDetector::TModelPtr &CAnomalyDetector::model() const -{ +const CAnomalyDetector::TModelPtr& CAnomalyDetector::model() const { return m_Model; } -CAnomalyDetector::TModelPtr &CAnomalyDetector::model() -{ +CAnomalyDetector::TModelPtr& CAnomalyDetector::model() { return m_Model; } -std::ostream &operator<<(std::ostream &strm, const CAnomalyDetector &detector) -{ - strm << detector.m_DataGatherer->searchKey() - << '/' - << detector.m_DataGatherer->partitionFieldValue(); +std::ostream& operator<<(std::ostream& strm, const CAnomalyDetector& detector) { + strm << detector.m_DataGatherer->searchKey() << '/' << detector.m_DataGatherer->partitionFieldValue(); return strm; } - } } diff --git a/lib/model/CAnomalyDetectorModel.cc b/lib/model/CAnomalyDetectorModel.cc index e0cfd06137..30725bed39 100644 --- a/lib/model/CAnomalyDetectorModel.cc +++ b/lib/model/CAnomalyDetectorModel.cc @@ -34,12 +34,9 @@ #include -namespace ml -{ -namespace model -{ -namespace -{ +namespace ml { +namespace model { +namespace { const std::string MODEL_TAG{"a"}; const std::string EMPTY; @@ -47,123 +44,96 @@ const model_t::CResultType SKIP_SAMPLING_RESULT_TYPE; const CAnomalyDetectorModel::TStr1Vec EMPTY_STRING_LIST; -bool checkRules(const SModelParams::TDetectionRuleVec &detectionRules, - const CAnomalyDetectorModel &model, +bool checkRules(const SModelParams::TDetectionRuleVec& detectionRules, + const CAnomalyDetectorModel& model, model_t::EFeature feature, CDetectionRule::ERuleAction action, - const model_t::CResultType &resultType, + const model_t::CResultType& resultType, std::size_t pid, std::size_t cid, - core_t::TTime time) -{ + core_t::TTime time) { bool isIgnored{false}; - for (auto &rule : detectionRules) - { - isIgnored = isIgnored || rule.apply(action, - model, - feature, - resultType, - pid, cid, time); + for (auto& rule : detectionRules) { + isIgnored = isIgnored || rule.apply(action, model, feature, resultType, pid, cid, time); } return isIgnored; } -bool checkScheduledEvents(const SModelParams::TStrDetectionRulePrVec &scheduledEvents, - const CAnomalyDetectorModel &model, +bool checkScheduledEvents(const SModelParams::TStrDetectionRulePrVec& scheduledEvents, + const CAnomalyDetectorModel& model, model_t::EFeature feature, CDetectionRule::ERuleAction action, - const model_t::CResultType &resultType, + const model_t::CResultType& resultType, std::size_t pid, std::size_t cid, - core_t::TTime time) -{ + core_t::TTime time) { bool isIgnored{false}; - for (auto &event : scheduledEvents) - { - isIgnored = isIgnored || event.second.apply(action, - model, - feature, - resultType, - pid, cid, time); + for (auto& event : scheduledEvents) { + isIgnored = isIgnored || event.second.apply(action, model, feature, resultType, pid, cid, time); } return isIgnored; } - } -CAnomalyDetectorModel::CAnomalyDetectorModel(const SModelParams ¶ms, - const TDataGathererPtr &dataGatherer, - const TFeatureInfluenceCalculatorCPtrPrVecVec &influenceCalculators) : - m_Params(params), - m_DataGatherer(dataGatherer), - m_BucketCount(0.0), - m_InfluenceCalculators(influenceCalculators), - m_InterimBucketCorrector(new CInterimBucketCorrector(dataGatherer->bucketLength())) -{ - if (!m_DataGatherer) - { +CAnomalyDetectorModel::CAnomalyDetectorModel(const SModelParams& params, + const TDataGathererPtr& dataGatherer, + const TFeatureInfluenceCalculatorCPtrPrVecVec& influenceCalculators) + : m_Params(params), + m_DataGatherer(dataGatherer), + m_BucketCount(0.0), + m_InfluenceCalculators(influenceCalculators), + m_InterimBucketCorrector(new CInterimBucketCorrector(dataGatherer->bucketLength())) { + if (!m_DataGatherer) { LOG_ABORT("Must provide a data gatherer"); } - for (auto &calculators : m_InfluenceCalculators) - { + for (auto& calculators : m_InfluenceCalculators) { std::sort(calculators.begin(), calculators.end(), maths::COrderings::SFirstLess()); } } -CAnomalyDetectorModel::CAnomalyDetectorModel(bool isForPersistence, const CAnomalyDetectorModel &other) : - // The copy of m_DataGatherer is a shallow copy. This would be unacceptable - // if we were going to persist the data gatherer from within this class. - // We don't, so that's OK, but the next issue is that another thread will be - // modifying the data gatherer m_DataGatherer points to whilst this object - // is being persisted. Therefore, persistence must only call methods on the - // data gatherer that are invariant. - m_Params(other.m_Params), - m_DataGatherer(other.m_DataGatherer), - m_PersonBucketCounts(other.m_PersonBucketCounts), - m_BucketCount(other.m_BucketCount), - m_InfluenceCalculators(), - m_InterimBucketCorrector(new CInterimBucketCorrector(*other.m_InterimBucketCorrector)) -{ - if (!isForPersistence) - { +CAnomalyDetectorModel::CAnomalyDetectorModel(bool isForPersistence, const CAnomalyDetectorModel& other) + : // The copy of m_DataGatherer is a shallow copy. This would be unacceptable + // if we were going to persist the data gatherer from within this class. + // We don't, so that's OK, but the next issue is that another thread will be + // modifying the data gatherer m_DataGatherer points to whilst this object + // is being persisted. Therefore, persistence must only call methods on the + // data gatherer that are invariant. + m_Params(other.m_Params), + m_DataGatherer(other.m_DataGatherer), + m_PersonBucketCounts(other.m_PersonBucketCounts), + m_BucketCount(other.m_BucketCount), + m_InfluenceCalculators(), + m_InterimBucketCorrector(new CInterimBucketCorrector(*other.m_InterimBucketCorrector)) { + if (!isForPersistence) { LOG_ABORT("This constructor only creates clones for persistence"); } } -std::string CAnomalyDetectorModel::description() const -{ +std::string CAnomalyDetectorModel::description() const { return m_DataGatherer->description(); } -const std::string &CAnomalyDetectorModel::personName(std::size_t pid) const -{ +const std::string& CAnomalyDetectorModel::personName(std::size_t pid) const { return m_DataGatherer->personName(pid, core::CStringUtils::typeToString(pid)); } -const std::string &CAnomalyDetectorModel::personName(std::size_t pid, - const std::string &fallback) const -{ +const std::string& CAnomalyDetectorModel::personName(std::size_t pid, const std::string& fallback) const { return m_DataGatherer->personName(pid, fallback); } -std::string CAnomalyDetectorModel::printPeople(const TSizeVec &pids, std::size_t limit) const -{ - if (pids.empty()) - { +std::string CAnomalyDetectorModel::printPeople(const TSizeVec& pids, std::size_t limit) const { + if (pids.empty()) { return std::string(); } - if (limit == 0) - { + if (limit == 0) { return core::CStringUtils::typeToString(pids.size()) + " in total"; } std::string result{this->personName(pids[0])}; - for (std::size_t i = 1u; i < std::min(limit, pids.size()); ++i) - { + for (std::size_t i = 1u; i < std::min(limit, pids.size()); ++i) { result += ' '; result += this->personName(pids[i]); } - if (limit < pids.size()) - { + if (limit < pids.size()) { result += " and "; result += core::CStringUtils::typeToString(pids.size() - limit); result += " others"; @@ -171,40 +141,31 @@ std::string CAnomalyDetectorModel::printPeople(const TSizeVec &pids, std::size_t return result; } -std::size_t CAnomalyDetectorModel::numberOfPeople() const -{ +std::size_t CAnomalyDetectorModel::numberOfPeople() const { return m_DataGatherer->numberActivePeople(); } -const std::string &CAnomalyDetectorModel::attributeName(std::size_t cid) const -{ +const std::string& CAnomalyDetectorModel::attributeName(std::size_t cid) const { return m_DataGatherer->attributeName(cid, core::CStringUtils::typeToString(cid)); } -const std::string &CAnomalyDetectorModel::attributeName(std::size_t cid, - const std::string &fallback) const -{ +const std::string& CAnomalyDetectorModel::attributeName(std::size_t cid, const std::string& fallback) const { return m_DataGatherer->attributeName(cid, fallback); } -std::string CAnomalyDetectorModel::printAttributes(const TSizeVec &cids, std::size_t limit) const -{ - if (cids.empty()) - { +std::string CAnomalyDetectorModel::printAttributes(const TSizeVec& cids, std::size_t limit) const { + if (cids.empty()) { return std::string(); } - if (limit == 0) - { + if (limit == 0) { return core::CStringUtils::typeToString(cids.size()) + " in total"; } std::string result{this->attributeName(cids[0])}; - for (std::size_t i = 1u; i < std::min(limit, cids.size()); ++i) - { + for (std::size_t i = 1u; i < std::min(limit, cids.size()); ++i) { result += ' '; result += this->attributeName(cids[i]); } - if (limit < cids.size()) - { + if (limit < cids.size()) { result += " and "; result += core::CStringUtils::typeToString(cids.size() - limit); result += " others"; @@ -212,44 +173,33 @@ std::string CAnomalyDetectorModel::printAttributes(const TSizeVec &cids, std::si return result; } -void CAnomalyDetectorModel::sampleBucketStatistics(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor &/*resourceMonitor*/) -{ - const CDataGatherer &gatherer{this->dataGatherer()}; +void CAnomalyDetectorModel::sampleBucketStatistics(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& /*resourceMonitor*/) { + const CDataGatherer& gatherer{this->dataGatherer()}; core_t::TTime bucketLength{this->bucketLength()}; - for (core_t::TTime time = startTime; time < endTime; time += bucketLength) - { - const auto &counts = gatherer.bucketCounts(time); + for (core_t::TTime time = startTime; time < endTime; time += bucketLength) { + const auto& counts = gatherer.bucketCounts(time); std::size_t totalBucketCount{0u}; - for (const auto &count : counts) - { + for (const auto& count : counts) { totalBucketCount += CDataGatherer::extractData(count); } this->currentBucketTotalCount(totalBucketCount); } } -void CAnomalyDetectorModel::sample(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor &/*resourceMonitor*/) -{ +void CAnomalyDetectorModel::sample(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& /*resourceMonitor*/) { using TSizeUSet = boost::unordered_set; - const CDataGatherer &gatherer{this->dataGatherer()}; + const CDataGatherer& gatherer{this->dataGatherer()}; core_t::TTime bucketLength{this->bucketLength()}; - for (core_t::TTime time = startTime; time < endTime; time += bucketLength) - { - const auto &counts = gatherer.bucketCounts(time); + for (core_t::TTime time = startTime; time < endTime; time += bucketLength) { + const auto& counts = gatherer.bucketCounts(time); std::size_t totalBucketCount{0u}; TSizeUSet uniquePeople; - for (const auto &count : counts) - { + for (const auto& count : counts) { std::size_t pid{CDataGatherer::extractPersonId(count)}; - if (uniquePeople.insert(pid).second) - { + if (uniquePeople.insert(pid).second) { m_PersonBucketCounts[pid] += 1.0; } totalBucketCount += CDataGatherer::extractData(count); @@ -260,21 +210,18 @@ void CAnomalyDetectorModel::sample(core_t::TTime startTime, m_BucketCount += 1.0; double alpha{std::exp(-this->params().s_DecayRate)}; - for (std::size_t pid = 0u; pid < m_PersonBucketCounts.size(); ++pid) - { + for (std::size_t pid = 0u; pid < m_PersonBucketCounts.size(); ++pid) { m_PersonBucketCounts[pid] *= alpha; } m_BucketCount *= alpha; } } -void CAnomalyDetectorModel::skipSampling(core_t::TTime endTime) -{ - CDataGatherer &gatherer{this->dataGatherer()}; +void CAnomalyDetectorModel::skipSampling(core_t::TTime endTime) { + CDataGatherer& gatherer{this->dataGatherer()}; core_t::TTime startTime{gatherer.earliestBucketStartTime()}; - if (!gatherer.validateSampleTimes(startTime, endTime)) - { + if (!gatherer.validateSampleTimes(startTime, endTime)) { return; } @@ -284,14 +231,12 @@ void CAnomalyDetectorModel::skipSampling(core_t::TTime endTime) } bool CAnomalyDetectorModel::addResults(int detector, - core_t::TTime startTime, - core_t::TTime endTime, - std::size_t numberAttributeProbabilities, - CHierarchicalResults &results) const -{ + core_t::TTime startTime, + core_t::TTime endTime, + std::size_t numberAttributeProbabilities, + CHierarchicalResults& results) const { TSizeVec personIds; - if (!this->bucketStatsAvailable(startTime)) - { + if (!this->bucketStatsAvailable(startTime)) { LOG_TRACE("No stats available for time " << startTime); return false; } @@ -301,28 +246,20 @@ bool CAnomalyDetectorModel::addResults(int detector, CPartitioningFields partitioningFields(m_DataGatherer->partitionFieldName(), m_DataGatherer->partitionFieldValue()); partitioningFields.add(m_DataGatherer->personFieldName(), EMPTY); - for (auto pid : personIds) - { - if (this->category() == model_t::E_Counting) - { + for (auto pid : personIds) { + if (this->category() == model_t::E_Counting) { SAnnotatedProbability annotatedProbability; - this->computeProbability(pid, startTime, endTime, partitioningFields, - numberAttributeProbabilities, annotatedProbability); + this->computeProbability(pid, startTime, endTime, partitioningFields, numberAttributeProbabilities, annotatedProbability); results.addSimpleCountResult(annotatedProbability, this, startTime); - } - else - { + } else { LOG_TRACE("AddResult, for time [" << startTime << "," << endTime << ")"); partitioningFields.back().second = boost::cref(this->personName(pid)); - std::for_each(m_DataGatherer->beginInfluencers(), - m_DataGatherer->endInfluencers(), - [&results](const std::string &influencer) - { results.addInfluencer(influencer); }); + std::for_each(m_DataGatherer->beginInfluencers(), m_DataGatherer->endInfluencers(), [&results](const std::string& influencer) { + results.addInfluencer(influencer); + }); SAnnotatedProbability annotatedProbability; annotatedProbability.s_ResultType = results.resultType(); - if (this->computeProbability(pid, startTime, endTime, partitioningFields, - numberAttributeProbabilities, annotatedProbability)) - { + if (this->computeProbability(pid, startTime, endTime, partitioningFields, numberAttributeProbabilities, annotatedProbability)) { function_t::EFunction function{m_DataGatherer->function()}; results.addModelResult(detector, this->isPopulation(), @@ -343,42 +280,32 @@ bool CAnomalyDetectorModel::addResults(int detector, return true; } -std::size_t CAnomalyDetectorModel::defaultPruneWindow() const -{ +std::size_t CAnomalyDetectorModel::defaultPruneWindow() const { // The longest we'll consider keeping priors for is 1M buckets. double decayRate{this->params().s_DecayRate}; double factor{this->params().s_PruneWindowScaleMaximum}; - return (decayRate == 0.0) ? - MAXIMUM_PERMITTED_AGE : - std::min(static_cast(factor / decayRate), MAXIMUM_PERMITTED_AGE); + return (decayRate == 0.0) ? MAXIMUM_PERMITTED_AGE : std::min(static_cast(factor / decayRate), MAXIMUM_PERMITTED_AGE); } -std::size_t CAnomalyDetectorModel::minimumPruneWindow() const -{ +std::size_t CAnomalyDetectorModel::minimumPruneWindow() const { double decayRate{this->params().s_DecayRate}; double factor{this->params().s_PruneWindowScaleMinimum}; - return (decayRate == 0.0) ? - MAXIMUM_PERMITTED_AGE : - std::min(static_cast(factor / decayRate), MAXIMUM_PERMITTED_AGE); + return (decayRate == 0.0) ? MAXIMUM_PERMITTED_AGE : std::min(static_cast(factor / decayRate), MAXIMUM_PERMITTED_AGE); } -void CAnomalyDetectorModel::prune() -{ +void CAnomalyDetectorModel::prune() { this->prune(this->defaultPruneWindow()); } -uint64_t CAnomalyDetectorModel::checksum(bool /*includeCurrentBucketStats*/) const -{ +uint64_t CAnomalyDetectorModel::checksum(bool /*includeCurrentBucketStats*/) const { using TStrCRefUInt64Map = std::map; uint64_t seed{m_DataGatherer->checksum()}; seed = maths::CChecksum::calculate(seed, m_Params); seed = maths::CChecksum::calculate(seed, m_BucketCount); TStrCRefUInt64Map hashes; - for (std::size_t pid = 0u; pid < m_PersonBucketCounts.size(); ++pid) - { - if (m_DataGatherer->isPersonActive(pid)) - { - uint64_t &hash{hashes[boost::cref(m_DataGatherer->personName(pid))]}; + for (std::size_t pid = 0u; pid < m_PersonBucketCounts.size(); ++pid) { + if (m_DataGatherer->isPersonActive(pid)) { + uint64_t& hash{hashes[boost::cref(m_DataGatherer->personName(pid))]}; hash = maths::CChecksum::calculate(hash, m_PersonBucketCounts[pid]); } } @@ -387,8 +314,7 @@ uint64_t CAnomalyDetectorModel::checksum(bool /*includeCurrentBucketStats*/) con return maths::CChecksum::calculate(seed, hashes); } -void CAnomalyDetectorModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CAnomalyDetectorModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CAnomalyDetectorModel"); core::CMemoryDebug::dynamicSize("m_DataGatherer", m_DataGatherer, mem); core::CMemoryDebug::dynamicSize("m_Params", m_Params, mem); @@ -397,8 +323,7 @@ void CAnomalyDetectorModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr core::CMemoryDebug::dynamicSize("m_InterimBucketCorrector", m_InterimBucketCorrector, mem); } -std::size_t CAnomalyDetectorModel::memoryUsage() const -{ +std::size_t CAnomalyDetectorModel::memoryUsage() const { std::size_t mem{core::CMemory::dynamicSize(m_Params)}; mem += core::CMemory::dynamicSize(m_DataGatherer); mem += core::CMemory::dynamicSize(m_PersonBucketCounts); @@ -407,345 +332,291 @@ std::size_t CAnomalyDetectorModel::memoryUsage() const return mem; } -CAnomalyDetectorModel::TOptionalSize CAnomalyDetectorModel::estimateMemoryUsage(std::size_t numberPeople, - std::size_t numberAttributes, - std::size_t numberCorrelations) const -{ +CAnomalyDetectorModel::TOptionalSize +CAnomalyDetectorModel::estimateMemoryUsage(std::size_t numberPeople, std::size_t numberAttributes, std::size_t numberCorrelations) const { CMemoryUsageEstimator::TSizeArray predictors; - predictors[CMemoryUsageEstimator::E_People] = numberPeople; - predictors[CMemoryUsageEstimator::E_Attributes] = numberAttributes; + predictors[CMemoryUsageEstimator::E_People] = numberPeople; + predictors[CMemoryUsageEstimator::E_Attributes] = numberAttributes; predictors[CMemoryUsageEstimator::E_Correlations] = numberCorrelations; return this->memoryUsageEstimator()->estimate(predictors); } std::size_t CAnomalyDetectorModel::estimateMemoryUsageOrComputeAndUpdate(std::size_t numberPeople, - std::size_t numberAttributes, - std::size_t numberCorrelations) -{ - TOptionalSize estimate{this->estimateMemoryUsage(numberPeople, - numberAttributes, - numberCorrelations)}; - if (estimate) - { + std::size_t numberAttributes, + std::size_t numberCorrelations) { + TOptionalSize estimate{this->estimateMemoryUsage(numberPeople, numberAttributes, numberCorrelations)}; + if (estimate) { return estimate.get(); } std::size_t computed{this->computeMemoryUsage()}; CMemoryUsageEstimator::TSizeArray predictors; - predictors[CMemoryUsageEstimator::E_People] = numberPeople; - predictors[CMemoryUsageEstimator::E_Attributes] = numberAttributes; + predictors[CMemoryUsageEstimator::E_People] = numberPeople; + predictors[CMemoryUsageEstimator::E_Attributes] = numberAttributes; predictors[CMemoryUsageEstimator::E_Correlations] = numberCorrelations; this->memoryUsageEstimator()->addValue(predictors, computed); return computed; } -const CDataGatherer &CAnomalyDetectorModel::dataGatherer() const -{ +const CDataGatherer& CAnomalyDetectorModel::dataGatherer() const { return *m_DataGatherer; } -CDataGatherer &CAnomalyDetectorModel::dataGatherer() -{ +CDataGatherer& CAnomalyDetectorModel::dataGatherer() { return *m_DataGatherer; } -core_t::TTime CAnomalyDetectorModel::bucketLength() const -{ +core_t::TTime CAnomalyDetectorModel::bucketLength() const { return m_DataGatherer->bucketLength(); } -double CAnomalyDetectorModel::personFrequency(std::size_t pid) const -{ +double CAnomalyDetectorModel::personFrequency(std::size_t pid) const { return m_BucketCount <= 0.0 ? 0.5 : m_PersonBucketCounts[pid] / m_BucketCount; } -bool CAnomalyDetectorModel::isTimeUnset(core_t::TTime time) -{ +bool CAnomalyDetectorModel::isTimeUnset(core_t::TTime time) { return time == TIME_UNSET; } -CPersonFrequencyGreaterThan CAnomalyDetectorModel::personFilter() const -{ +CPersonFrequencyGreaterThan CAnomalyDetectorModel::personFilter() const { return CPersonFrequencyGreaterThan(*this, m_Params.get().s_ExcludePersonFrequency); } -CAttributeFrequencyGreaterThan CAnomalyDetectorModel::attributeFilter() const -{ +CAttributeFrequencyGreaterThan CAnomalyDetectorModel::attributeFilter() const { return CAttributeFrequencyGreaterThan(*this, m_Params.get().s_ExcludeAttributeFrequency); } -const SModelParams &CAnomalyDetectorModel::params() const -{ +const SModelParams& CAnomalyDetectorModel::params() const { return m_Params; } -double CAnomalyDetectorModel::learnRate(model_t::EFeature feature) const -{ +double CAnomalyDetectorModel::learnRate(model_t::EFeature feature) const { return model_t::learnRate(feature, m_Params); } -const CInfluenceCalculator *CAnomalyDetectorModel::influenceCalculator(model_t::EFeature feature, - std::size_t iid) const -{ - if (iid >= m_InfluenceCalculators.size()) - { +const CInfluenceCalculator* CAnomalyDetectorModel::influenceCalculator(model_t::EFeature feature, std::size_t iid) const { + if (iid >= m_InfluenceCalculators.size()) { LOG_ERROR("Influencer identifier " << iid << " out of range"); return 0; } - const TFeatureInfluenceCalculatorCPtrPrVec &calculators{m_InfluenceCalculators[iid]}; - auto result = std::lower_bound(calculators.begin(), - calculators.end(), - feature, - maths::COrderings::SFirstLess()); + const TFeatureInfluenceCalculatorCPtrPrVec& calculators{m_InfluenceCalculators[iid]}; + auto result = std::lower_bound(calculators.begin(), calculators.end(), feature, maths::COrderings::SFirstLess()); return result != calculators.end() && result->first == feature ? result->second.get() : 0; } -const CAnomalyDetectorModel::TDoubleVec &CAnomalyDetectorModel::personBucketCounts() const -{ +const CAnomalyDetectorModel::TDoubleVec& CAnomalyDetectorModel::personBucketCounts() const { return m_PersonBucketCounts; } -CAnomalyDetectorModel::TDoubleVec &CAnomalyDetectorModel::personBucketCounts() -{ +CAnomalyDetectorModel::TDoubleVec& CAnomalyDetectorModel::personBucketCounts() { return m_PersonBucketCounts; } -void CAnomalyDetectorModel::windowBucketCount(double windowBucketCount) -{ +void CAnomalyDetectorModel::windowBucketCount(double windowBucketCount) { m_BucketCount = windowBucketCount; } -double CAnomalyDetectorModel::windowBucketCount() const -{ +double CAnomalyDetectorModel::windowBucketCount() const { return m_BucketCount; } -void CAnomalyDetectorModel::createNewModels(std::size_t n, std::size_t /*m*/) -{ - if (n > 0) - { +void CAnomalyDetectorModel::createNewModels(std::size_t n, std::size_t /*m*/) { + if (n > 0) { n += m_PersonBucketCounts.size(); core::CAllocationStrategy::resize(m_PersonBucketCounts, n, 0.0); } } -void CAnomalyDetectorModel::updateRecycledModels() -{ - TSizeVec &people{m_DataGatherer->recycledPersonIds()}; - for (auto pid : people) - { +void CAnomalyDetectorModel::updateRecycledModels() { + TSizeVec& people{m_DataGatherer->recycledPersonIds()}; + for (auto pid : people) { m_PersonBucketCounts[pid] = 0.0; } people.clear(); } -const CInterimBucketCorrector &CAnomalyDetectorModel::interimValueCorrector() const -{ +const CInterimBucketCorrector& CAnomalyDetectorModel::interimValueCorrector() const { return *m_InterimBucketCorrector; } bool CAnomalyDetectorModel::shouldIgnoreResult(model_t::EFeature feature, - const model_t::CResultType &resultType, - std::size_t pid, - std::size_t cid, - core_t::TTime time) const -{ - bool shouldIgnore = checkScheduledEvents(this->params().s_ScheduledEvents.get(), boost::cref(*this), feature, - CDetectionRule::E_FilterResults, resultType, pid, cid, time) || - checkRules(this->params().s_DetectionRules.get(), boost::cref(*this), feature, - CDetectionRule::E_FilterResults, resultType, pid, cid, time); + const model_t::CResultType& resultType, + std::size_t pid, + std::size_t cid, + core_t::TTime time) const { + bool shouldIgnore = checkScheduledEvents(this->params().s_ScheduledEvents.get(), + boost::cref(*this), + feature, + CDetectionRule::E_FilterResults, + resultType, + pid, + cid, + time) || + checkRules(this->params().s_DetectionRules.get(), + boost::cref(*this), + feature, + CDetectionRule::E_FilterResults, + resultType, + pid, + cid, + time); return shouldIgnore; } -bool CAnomalyDetectorModel::shouldIgnoreSample(model_t::EFeature feature, - std::size_t pid, - std::size_t cid, - core_t::TTime time) const -{ - bool shouldIgnore = checkScheduledEvents(this->params().s_ScheduledEvents.get(), boost::cref(*this), feature, - CDetectionRule::E_SkipSampling, SKIP_SAMPLING_RESULT_TYPE, pid, cid, time) || - checkRules(this->params().s_DetectionRules.get(), boost::cref(*this), feature, - CDetectionRule::E_SkipSampling, SKIP_SAMPLING_RESULT_TYPE, pid, cid, time); +bool CAnomalyDetectorModel::shouldIgnoreSample(model_t::EFeature feature, std::size_t pid, std::size_t cid, core_t::TTime time) const { + bool shouldIgnore = checkScheduledEvents(this->params().s_ScheduledEvents.get(), + boost::cref(*this), + feature, + CDetectionRule::E_SkipSampling, + SKIP_SAMPLING_RESULT_TYPE, + pid, + cid, + time) || + checkRules(this->params().s_DetectionRules.get(), + boost::cref(*this), + feature, + CDetectionRule::E_SkipSampling, + SKIP_SAMPLING_RESULT_TYPE, + pid, + cid, + time); return shouldIgnore; } -bool CAnomalyDetectorModel::interimBucketCorrectorAcceptRestoreTraverser( - core::CStateRestoreTraverser &traverser) -{ - if (traverser.traverseSubLevel(boost::bind(&CInterimBucketCorrector::acceptRestoreTraverser, - m_InterimBucketCorrector.get(), _1)) == false) - { +bool CAnomalyDetectorModel::interimBucketCorrectorAcceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + if (traverser.traverseSubLevel(boost::bind(&CInterimBucketCorrector::acceptRestoreTraverser, m_InterimBucketCorrector.get(), _1)) == + false) { LOG_ERROR("Invalid interim bucket corrector"); return false; } return true; } -void CAnomalyDetectorModel::interimBucketCorrectorAcceptPersistInserter(const std::string &tag, - core::CStatePersistInserter &inserter) const -{ - inserter.insertLevel(tag, boost::bind(&CInterimBucketCorrector::acceptPersistInserter, - m_InterimBucketCorrector.get(), _1)); +void CAnomalyDetectorModel::interimBucketCorrectorAcceptPersistInserter(const std::string& tag, + core::CStatePersistInserter& inserter) const { + inserter.insertLevel(tag, boost::bind(&CInterimBucketCorrector::acceptPersistInserter, m_InterimBucketCorrector.get(), _1)); } -const CAnomalyDetectorModel::TStr1Vec &CAnomalyDetectorModel::scheduledEventDescriptions(core_t::TTime /*time*/) const -{ +const CAnomalyDetectorModel::TStr1Vec& CAnomalyDetectorModel::scheduledEventDescriptions(core_t::TTime /*time*/) const { return EMPTY_STRING_LIST; } -maths::CModel *CAnomalyDetectorModel::tinyModel() -{ +maths::CModel* CAnomalyDetectorModel::tinyModel() { return new maths::CModelStub; } -const std::size_t CAnomalyDetectorModel::MAXIMUM_PERMITTED_AGE(1000000); +const std::size_t CAnomalyDetectorModel::MAXIMUM_PERMITTED_AGE(1000000); const core_t::TTime CAnomalyDetectorModel::TIME_UNSET(-1); -const std::string CAnomalyDetectorModel::EMPTY_STRING; - +const std::string CAnomalyDetectorModel::EMPTY_STRING; -CAnomalyDetectorModel::SFeatureModels::SFeatureModels(model_t::EFeature feature, TMathsModelPtr newModel) : - s_Feature(feature), s_NewModel(newModel) -{} +CAnomalyDetectorModel::SFeatureModels::SFeatureModels(model_t::EFeature feature, TMathsModelPtr newModel) + : s_Feature(feature), s_NewModel(newModel) { +} -bool CAnomalyDetectorModel::SFeatureModels::acceptRestoreTraverser(const SModelParams ¶ms_, - core::CStateRestoreTraverser &traverser) -{ +bool CAnomalyDetectorModel::SFeatureModels::acceptRestoreTraverser(const SModelParams& params_, core::CStateRestoreTraverser& traverser) { maths_t::EDataType dataType{s_NewModel->dataType()}; - maths::SModelRestoreParams params{s_NewModel->params(), - params_.decompositionRestoreParams(dataType), - params_.distributionRestoreParams(dataType)}; - do - { - if (traverser.name() == MODEL_TAG) - { + maths::SModelRestoreParams params{ + s_NewModel->params(), params_.decompositionRestoreParams(dataType), params_.distributionRestoreParams(dataType)}; + do { + if (traverser.name() == MODEL_TAG) { TMathsModelPtr prior; - if (!traverser.traverseSubLevel(boost::bind(maths::CModelStateSerialiser(), - boost::cref(params), boost::ref(prior), _1))) - { + if (!traverser.traverseSubLevel( + boost::bind(maths::CModelStateSerialiser(), boost::cref(params), boost::ref(prior), _1))) { return false; } s_Models.push_back(prior); } - } - while (traverser.next()); + } while (traverser.next()); return true; } -void CAnomalyDetectorModel::SFeatureModels::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ - for (const auto &model : s_Models) - { - inserter.insertLevel(MODEL_TAG, boost::bind(maths::CModelStateSerialiser(), - boost::cref(*model), _1)); +void CAnomalyDetectorModel::SFeatureModels::acceptPersistInserter(core::CStatePersistInserter& inserter) const { + for (const auto& model : s_Models) { + inserter.insertLevel(MODEL_TAG, boost::bind(maths::CModelStateSerialiser(), boost::cref(*model), _1)); } } -void CAnomalyDetectorModel::SFeatureModels::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CAnomalyDetectorModel::SFeatureModels::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("SFeatureModels"); core::CMemoryDebug::dynamicSize("s_NewModel", s_NewModel, mem); core::CMemoryDebug::dynamicSize("s_Models", s_Models, mem); } -std::size_t CAnomalyDetectorModel::SFeatureModels::memoryUsage() const -{ +std::size_t CAnomalyDetectorModel::SFeatureModels::memoryUsage() const { return core::CMemory::dynamicSize(s_NewModel) + core::CMemory::dynamicSize(s_Models); } - CAnomalyDetectorModel::SFeatureCorrelateModels::SFeatureCorrelateModels(model_t::EFeature feature, - TMultivariatePriorPtr modelPrior, - TCorrelationsPtr model) : - s_Feature(feature), - s_ModelPrior(modelPrior), - s_Models(model->clone()) -{} - -bool CAnomalyDetectorModel::SFeatureCorrelateModels::acceptRestoreTraverser(const SModelParams ¶ms_, - core::CStateRestoreTraverser &traverser) -{ + TMultivariatePriorPtr modelPrior, + TCorrelationsPtr model) + : s_Feature(feature), s_ModelPrior(modelPrior), s_Models(model->clone()) { +} + +bool CAnomalyDetectorModel::SFeatureCorrelateModels::acceptRestoreTraverser(const SModelParams& params_, + core::CStateRestoreTraverser& traverser) { maths_t::EDataType dataType{s_ModelPrior->dataType()}; maths::SDistributionRestoreParams params{params_.distributionRestoreParams(dataType)}; std::size_t count{0u}; - do - { - if (traverser.name() == MODEL_TAG) - { - if ( !traverser.traverseSubLevel(boost::bind(&maths::CTimeSeriesCorrelations::acceptRestoreTraverser, - s_Models.get(), boost::cref(params), _1)) - || count++ > 0) - { + do { + if (traverser.name() == MODEL_TAG) { + if (!traverser.traverseSubLevel( + boost::bind(&maths::CTimeSeriesCorrelations::acceptRestoreTraverser, s_Models.get(), boost::cref(params), _1)) || + count++ > 0) { return false; } } - } - while (traverser.next()); + } while (traverser.next()); return true; } -void CAnomalyDetectorModel::SFeatureCorrelateModels::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ - inserter.insertLevel(MODEL_TAG, boost::bind(&maths::CTimeSeriesCorrelations::acceptPersistInserter, - s_Models.get(), _1)); +void CAnomalyDetectorModel::SFeatureCorrelateModels::acceptPersistInserter(core::CStatePersistInserter& inserter) const { + inserter.insertLevel(MODEL_TAG, boost::bind(&maths::CTimeSeriesCorrelations::acceptPersistInserter, s_Models.get(), _1)); } -void CAnomalyDetectorModel::SFeatureCorrelateModels::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CAnomalyDetectorModel::SFeatureCorrelateModels::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("SFeatureCorrelateModels"); core::CMemoryDebug::dynamicSize("s_ModelPrior", s_ModelPrior, mem); core::CMemoryDebug::dynamicSize("s_Models", s_Models, mem); } -std::size_t CAnomalyDetectorModel::SFeatureCorrelateModels::memoryUsage() const -{ +std::size_t CAnomalyDetectorModel::SFeatureCorrelateModels::memoryUsage() const { return core::CMemory::dynamicSize(s_ModelPrior) + core::CMemory::dynamicSize(s_Models); } +CAnomalyDetectorModel::CTimeSeriesCorrelateModelAllocator::CTimeSeriesCorrelateModelAllocator(CResourceMonitor& resourceMonitor, + TMemoryUsage memoryUsage, + std::size_t resourceLimit, + std::size_t maxNumberCorrelations) + : m_ResourceMonitor(&resourceMonitor), + m_MemoryUsage(memoryUsage), + m_ResourceLimit(resourceLimit), + m_MaxNumberCorrelations(maxNumberCorrelations) { +} -CAnomalyDetectorModel::CTimeSeriesCorrelateModelAllocator::CTimeSeriesCorrelateModelAllocator( - CResourceMonitor &resourceMonitor, - TMemoryUsage memoryUsage, - std::size_t resourceLimit, - std::size_t maxNumberCorrelations) : - m_ResourceMonitor(&resourceMonitor), - m_MemoryUsage(memoryUsage), - m_ResourceLimit(resourceLimit), - m_MaxNumberCorrelations(maxNumberCorrelations) -{} - -bool CAnomalyDetectorModel::CTimeSeriesCorrelateModelAllocator::areAllocationsAllowed() const -{ +bool CAnomalyDetectorModel::CTimeSeriesCorrelateModelAllocator::areAllocationsAllowed() const { return m_ResourceMonitor->areAllocationsAllowed(); } -bool CAnomalyDetectorModel::CTimeSeriesCorrelateModelAllocator::exceedsLimit(std::size_t correlations) const -{ - return !m_ResourceMonitor->haveNoLimit() - && m_MemoryUsage(correlations) >= m_ResourceLimit; +bool CAnomalyDetectorModel::CTimeSeriesCorrelateModelAllocator::exceedsLimit(std::size_t correlations) const { + return !m_ResourceMonitor->haveNoLimit() && m_MemoryUsage(correlations) >= m_ResourceLimit; } -std::size_t CAnomalyDetectorModel::CTimeSeriesCorrelateModelAllocator::maxNumberCorrelations() const -{ +std::size_t CAnomalyDetectorModel::CTimeSeriesCorrelateModelAllocator::maxNumberCorrelations() const { return m_MaxNumberCorrelations; } -std::size_t CAnomalyDetectorModel::CTimeSeriesCorrelateModelAllocator::chunkSize() const -{ +std::size_t CAnomalyDetectorModel::CTimeSeriesCorrelateModelAllocator::chunkSize() const { return 500; } -CAnomalyDetectorModel::TMultivariatePriorPtr -CAnomalyDetectorModel::CTimeSeriesCorrelateModelAllocator::newPrior() const -{ +CAnomalyDetectorModel::TMultivariatePriorPtr CAnomalyDetectorModel::CTimeSeriesCorrelateModelAllocator::newPrior() const { return TMultivariatePriorPtr(m_PrototypePrior->clone()); } -void CAnomalyDetectorModel::CTimeSeriesCorrelateModelAllocator::prototypePrior(const TMultivariatePriorPtr &prior) -{ +void CAnomalyDetectorModel::CTimeSeriesCorrelateModelAllocator::prototypePrior(const TMultivariatePriorPtr& prior) { m_PrototypePrior = prior; } - } } diff --git a/lib/model/CAnomalyDetectorModelConfig.cc b/lib/model/CAnomalyDetectorModelConfig.cc index ae52f2c34f..700267ceb6 100644 --- a/lib/model/CAnomalyDetectorModelConfig.cc +++ b/lib/model/CAnomalyDetectorModelConfig.cc @@ -7,12 +7,13 @@ #include #include -#include #include +#include -#include #include +#include +#include #include #include #include @@ -22,7 +23,6 @@ #include #include #include -#include #include #include @@ -30,30 +30,23 @@ #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { -namespace -{ +namespace { const CAnomalyDetectorModelConfig::TIntDetectionRuleVecUMap EMPTY_RULES_MAP; const CAnomalyDetectorModelConfig::TStrDetectionRulePrVec EMPTY_EVENTS; -namespace detail -{ +namespace detail { -core_t::TTime validateBucketLength(core_t::TTime length) -{ +core_t::TTime validateBucketLength(core_t::TTime length) { // A zero or negative length is used by the individual commands to request // the default length - this avoids the need for the commands to know the // default length return length <= 0 ? CAnomalyDetectorModelConfig::DEFAULT_BUCKET_LENGTH : length; } - } - } const std::string CAnomalyDetectorModelConfig::DEFAULT_MULTIVARIATE_COMPONENT_DELIMITER(","); @@ -81,39 +74,33 @@ const double CAnomalyDetectorModelConfig::DEFAULT_PRUNE_WINDOW_SCALE_MINIMUM(0.2 const double CAnomalyDetectorModelConfig::DEFAULT_PRUNE_WINDOW_SCALE_MAXIMUM(4.0); const double CAnomalyDetectorModelConfig::DEFAULT_CORRELATION_MODELS_OVERHEAD(3.0); const double CAnomalyDetectorModelConfig::DEFAULT_MINIMUM_SIGNIFICANT_CORRELATION(0.3); -const double CAnomalyDetectorModelConfig::DEFAULT_AGGREGATION_STYLE_PARAMS[][model_t::NUMBER_AGGREGATION_PARAMS] = - { - { 0.0, 1.0, 1.0, 1.0 }, - { 0.5, 0.5, 1.0, 5.0 }, - { 0.5, 0.5, 1.0, 1.0 } - }; +const double CAnomalyDetectorModelConfig::DEFAULT_AGGREGATION_STYLE_PARAMS[][model_t::NUMBER_AGGREGATION_PARAMS] = {{0.0, 1.0, 1.0, 1.0}, + {0.5, 0.5, 1.0, 5.0}, + {0.5, 0.5, 1.0, 1.0}}; // The default for maximumanomalousprobability now matches the default // for unusualprobabilitythreshold in mllimits.conf - this avoids // inconsistencies in output const double CAnomalyDetectorModelConfig::DEFAULT_MAXIMUM_ANOMALOUS_PROBABILITY(0.035); const double CAnomalyDetectorModelConfig::DEFAULT_NOISE_PERCENTILE(50.0); const double CAnomalyDetectorModelConfig::DEFAULT_NOISE_MULTIPLIER(1.0); -const CAnomalyDetectorModelConfig::TDoubleDoublePr CAnomalyDetectorModelConfig::DEFAULT_NORMALIZED_SCORE_KNOT_POINTS[9] = - { - CAnomalyDetectorModelConfig::TDoubleDoublePr(0.0, 0.0), - CAnomalyDetectorModelConfig::TDoubleDoublePr(70.0, 1.0), - CAnomalyDetectorModelConfig::TDoubleDoublePr(85.0, 1.2), - CAnomalyDetectorModelConfig::TDoubleDoublePr(90.0, 1.5), - CAnomalyDetectorModelConfig::TDoubleDoublePr(95.0, 3.0), - CAnomalyDetectorModelConfig::TDoubleDoublePr(97.0, 20.0), - CAnomalyDetectorModelConfig::TDoubleDoublePr(99.0, 50.0), - CAnomalyDetectorModelConfig::TDoubleDoublePr(99.9, 90.0), - CAnomalyDetectorModelConfig::TDoubleDoublePr(100.0, 100.0) - }; +const CAnomalyDetectorModelConfig::TDoubleDoublePr CAnomalyDetectorModelConfig::DEFAULT_NORMALIZED_SCORE_KNOT_POINTS[9] = { + CAnomalyDetectorModelConfig::TDoubleDoublePr(0.0, 0.0), + CAnomalyDetectorModelConfig::TDoubleDoublePr(70.0, 1.0), + CAnomalyDetectorModelConfig::TDoubleDoublePr(85.0, 1.2), + CAnomalyDetectorModelConfig::TDoubleDoublePr(90.0, 1.5), + CAnomalyDetectorModelConfig::TDoubleDoublePr(95.0, 3.0), + CAnomalyDetectorModelConfig::TDoubleDoublePr(97.0, 20.0), + CAnomalyDetectorModelConfig::TDoubleDoublePr(99.0, 50.0), + CAnomalyDetectorModelConfig::TDoubleDoublePr(99.9, 90.0), + CAnomalyDetectorModelConfig::TDoubleDoublePr(100.0, 100.0)}; CAnomalyDetectorModelConfig CAnomalyDetectorModelConfig::defaultConfig(core_t::TTime bucketLength, - model_t::ESummaryMode summaryMode, - const std::string &summaryCountFieldName, - core_t::TTime latency, - std::size_t bucketResultsDelay, - bool multivariateByFields, - const std::string &multipleBucketLengths) -{ + model_t::ESummaryMode summaryMode, + const std::string& summaryCountFieldName, + core_t::TTime latency, + std::size_t bucketResultsDelay, + bool multivariateByFields, + const std::string& multipleBucketLengths) { bucketLength = detail::validateBucketLength(bucketLength); double learnRate = DEFAULT_LEARN_RATE * bucketNormalizationFactor(bucketLength); @@ -129,18 +116,13 @@ CAnomalyDetectorModelConfig CAnomalyDetectorModelConfig::defaultConfig(core_t::T TFactoryTypeFactoryPtrMap factories; params.s_MinimumModeFraction = DEFAULT_INDIVIDUAL_MINIMUM_MODE_FRACTION; - factories[E_EventRateFactory].reset( - new CEventRateModelFactory(params, summaryMode, summaryCountFieldName)); - factories[E_MetricFactory].reset( - new CMetricModelFactory(params, summaryMode, summaryCountFieldName)); - factories[E_EventRatePopulationFactory].reset( - new CEventRatePopulationModelFactory(params, summaryMode, summaryCountFieldName)); + factories[E_EventRateFactory].reset(new CEventRateModelFactory(params, summaryMode, summaryCountFieldName)); + factories[E_MetricFactory].reset(new CMetricModelFactory(params, summaryMode, summaryCountFieldName)); + factories[E_EventRatePopulationFactory].reset(new CEventRatePopulationModelFactory(params, summaryMode, summaryCountFieldName)); params.s_MinimumModeFraction = DEFAULT_POPULATION_MINIMUM_MODE_FRACTION; - factories[E_MetricPopulationFactory].reset( - new CMetricPopulationModelFactory(params, summaryMode, summaryCountFieldName)); + factories[E_MetricPopulationFactory].reset(new CMetricPopulationModelFactory(params, summaryMode, summaryCountFieldName)); params.s_MinimumModeFraction = 1.0; - factories[E_CountingFactory].reset( - new CCountingModelFactory(params, summaryMode, summaryCountFieldName)); + factories[E_CountingFactory].reset(new CCountingModelFactory(params, summaryMode, summaryCountFieldName)); CAnomalyDetectorModelConfig result; result.bucketLength(bucketLength); @@ -152,74 +134,58 @@ CAnomalyDetectorModelConfig CAnomalyDetectorModelConfig::defaultConfig(core_t::T // De-rates the decay and learn rate to account for differences from the // standard bucket length. -double CAnomalyDetectorModelConfig::bucketNormalizationFactor(core_t::TTime bucketLength) -{ - return std::min(1.0, static_cast(bucketLength) - / static_cast(STANDARD_BUCKET_LENGTH)); +double CAnomalyDetectorModelConfig::bucketNormalizationFactor(core_t::TTime bucketLength) { + return std::min(1.0, static_cast(bucketLength) / static_cast(STANDARD_BUCKET_LENGTH)); } // Standard decay rate for time series decompositions given the specified // model decay rate and bucket length. -double CAnomalyDetectorModelConfig::trendDecayRate(double modelDecayRate, core_t::TTime bucketLength) -{ +double CAnomalyDetectorModelConfig::trendDecayRate(double modelDecayRate, core_t::TTime bucketLength) { double scale = static_cast(bucketLength / 24 / STANDARD_BUCKET_LENGTH); - return std::min(24.0 * modelDecayRate - / bucketNormalizationFactor(bucketLength) - / std::max(scale, 1.0), 0.1); -} - -CAnomalyDetectorModelConfig::CAnomalyDetectorModelConfig() : - m_BucketLength(STANDARD_BUCKET_LENGTH), - m_BucketResultsDelay(DEFAULT_BUCKET_RESULTS_DELAY), - m_MultivariateByFields(false), - m_ModelPlotBoundsPercentile(-1.0), - m_MaximumAnomalousProbability(DEFAULT_MAXIMUM_ANOMALOUS_PROBABILITY), - m_NoisePercentile(DEFAULT_NOISE_PERCENTILE), - m_NoiseMultiplier(DEFAULT_NOISE_MULTIPLIER), - m_NormalizedScoreKnotPoints(boost::begin(DEFAULT_NORMALIZED_SCORE_KNOT_POINTS), - boost::end(DEFAULT_NORMALIZED_SCORE_KNOT_POINTS)), - m_PerPartitionNormalisation(false), - m_DetectionRules(EMPTY_RULES_MAP), - m_ScheduledEvents(EMPTY_EVENTS) -{ - for (std::size_t i = 0u; i < model_t::NUMBER_AGGREGATION_STYLES; ++i) - { - for (std::size_t j = 0u; j < model_t::NUMBER_AGGREGATION_PARAMS; ++j) - { + return std::min(24.0 * modelDecayRate / bucketNormalizationFactor(bucketLength) / std::max(scale, 1.0), 0.1); +} + +CAnomalyDetectorModelConfig::CAnomalyDetectorModelConfig() + : m_BucketLength(STANDARD_BUCKET_LENGTH), + m_BucketResultsDelay(DEFAULT_BUCKET_RESULTS_DELAY), + m_MultivariateByFields(false), + m_ModelPlotBoundsPercentile(-1.0), + m_MaximumAnomalousProbability(DEFAULT_MAXIMUM_ANOMALOUS_PROBABILITY), + m_NoisePercentile(DEFAULT_NOISE_PERCENTILE), + m_NoiseMultiplier(DEFAULT_NOISE_MULTIPLIER), + m_NormalizedScoreKnotPoints(boost::begin(DEFAULT_NORMALIZED_SCORE_KNOT_POINTS), boost::end(DEFAULT_NORMALIZED_SCORE_KNOT_POINTS)), + m_PerPartitionNormalisation(false), + m_DetectionRules(EMPTY_RULES_MAP), + m_ScheduledEvents(EMPTY_EVENTS) { + for (std::size_t i = 0u; i < model_t::NUMBER_AGGREGATION_STYLES; ++i) { + for (std::size_t j = 0u; j < model_t::NUMBER_AGGREGATION_PARAMS; ++j) { m_AggregationStyleParams[i][j] = DEFAULT_AGGREGATION_STYLE_PARAMS[i][j]; } } } -void CAnomalyDetectorModelConfig::bucketLength(core_t::TTime length) -{ +void CAnomalyDetectorModelConfig::bucketLength(core_t::TTime length) { m_BucketLength = length; - for (auto &factory : m_Factories) - { + for (auto& factory : m_Factories) { factory.second->updateBucketLength(length); } } -void CAnomalyDetectorModelConfig::bucketResultsDelay(std::size_t delay) -{ +void CAnomalyDetectorModelConfig::bucketResultsDelay(std::size_t delay) { m_BucketResultsDelay = delay; } CAnomalyDetectorModelConfig::TTimeVec CAnomalyDetectorModelConfig::multipleBucketLengths(core_t::TTime bucketLength, - const std::string &multipleBucketLengths) -{ + const std::string& multipleBucketLengths) { TStrVec multiBucketTokens; core::CRegex regex; regex.init(","); regex.split(multipleBucketLengths, multiBucketTokens); TTimeVec multiBuckets; - for (TStrVecCItr itr = multiBucketTokens.begin(); itr != multiBucketTokens.end(); ++itr) - { + for (TStrVecCItr itr = multiBucketTokens.begin(); itr != multiBucketTokens.end(); ++itr) { core_t::TTime t = 0; - if (core::CStringUtils::stringToType(*itr, t)) - { - if ((t <= bucketLength) || (t % bucketLength != 0)) - { + if (core::CStringUtils::stringToType(*itr, t)) { + if ((t <= bucketLength) || (t % bucketLength != 0)) { LOG_ERROR("MultipleBucketLength " << t << " must be a multiple of " << bucketLength); return TTimeVec(); } @@ -230,77 +196,62 @@ CAnomalyDetectorModelConfig::TTimeVec CAnomalyDetectorModelConfig::multipleBucke return multiBuckets; } -void CAnomalyDetectorModelConfig::multivariateByFields(bool enabled) -{ +void CAnomalyDetectorModelConfig::multivariateByFields(bool enabled) { m_MultivariateByFields = enabled; } -void CAnomalyDetectorModelConfig::factories(const TFactoryTypeFactoryPtrMap &factories) -{ +void CAnomalyDetectorModelConfig::factories(const TFactoryTypeFactoryPtrMap& factories) { m_Factories = factories; } -bool CAnomalyDetectorModelConfig::aggregationStyleParams(model_t::EAggregationStyle style, - model_t::EAggregationParam param, - double value) -{ - switch (param) - { +bool CAnomalyDetectorModelConfig::aggregationStyleParams(model_t::EAggregationStyle style, model_t::EAggregationParam param, double value) { + switch (param) { case model_t::E_JointProbabilityWeight: - if (value < 0.0 || value > 1.0) - { + if (value < 0.0 || value > 1.0) { LOG_ERROR("joint probability weight " << value << " out of in range [0,1]"); return false; } m_AggregationStyleParams[style][model_t::E_JointProbabilityWeight] = value; break; case model_t::E_ExtremeProbabilityWeight: - if (value < 0.0 || value > 1.0) - { + if (value < 0.0 || value > 1.0) { LOG_ERROR("extreme probability weight " << value << " out of in range [0,1]"); return false; } m_AggregationStyleParams[style][model_t::E_ExtremeProbabilityWeight] = value; break; case model_t::E_MinExtremeSamples: - if (value < 1.0 || value > 10.0) - { + if (value < 1.0 || value > 10.0) { LOG_ERROR("min extreme samples " << value << " out of in range [0,10]"); return false; } m_AggregationStyleParams[style][model_t::E_MinExtremeSamples] = value; m_AggregationStyleParams[style][model_t::E_MaxExtremeSamples] = - std::max(value, m_AggregationStyleParams[style][model_t::E_MaxExtremeSamples]); + std::max(value, m_AggregationStyleParams[style][model_t::E_MaxExtremeSamples]); break; case model_t::E_MaxExtremeSamples: - if (value < 1.0 || value > 10.0) - { + if (value < 1.0 || value > 10.0) { LOG_ERROR("max extreme samples " << value << " out of in range [0,10]"); return false; } m_AggregationStyleParams[style][model_t::E_MaxExtremeSamples] = value; m_AggregationStyleParams[style][model_t::E_MinExtremeSamples] = - std::min(value, m_AggregationStyleParams[style][model_t::E_MinExtremeSamples]); + std::min(value, m_AggregationStyleParams[style][model_t::E_MinExtremeSamples]); break; } return true; } -void CAnomalyDetectorModelConfig::maximumAnomalousProbability(double probability) -{ +void CAnomalyDetectorModelConfig::maximumAnomalousProbability(double probability) { double minimum = 100 * maths::MINUSCULE_PROBABILITY; - if (probability < minimum || probability > 1.0) - { - LOG_INFO("Maximum anomalous probability " << probability - << " out of range [" << minimum << "," << 1.0 << "] truncating"); + if (probability < minimum || probability > 1.0) { + LOG_INFO("Maximum anomalous probability " << probability << " out of range [" << minimum << "," << 1.0 << "] truncating"); } m_MaximumAnomalousProbability = maths::CTools::truncate(probability, minimum, 1.0); } -bool CAnomalyDetectorModelConfig::noisePercentile(double percentile) -{ - if (percentile < 0.0 || percentile > 100.0) - { +bool CAnomalyDetectorModelConfig::noisePercentile(double percentile) { + if (percentile < 0.0 || percentile > 100.0) { LOG_ERROR("Noise percentile " << percentile << " out of range [0, 100]"); return false; } @@ -308,10 +259,8 @@ bool CAnomalyDetectorModelConfig::noisePercentile(double percentile) return true; } -bool CAnomalyDetectorModelConfig::noiseMultiplier(double multiplier) -{ - if (multiplier <= 0.0) - { +bool CAnomalyDetectorModelConfig::noiseMultiplier(double multiplier) { + if (multiplier <= 0.0) { LOG_ERROR("Noise multiplier must be positive"); return false; } @@ -319,91 +268,67 @@ bool CAnomalyDetectorModelConfig::noiseMultiplier(double multiplier) return true; } -bool CAnomalyDetectorModelConfig::normalizedScoreKnotPoints(const TDoubleDoublePrVec &points) -{ - if (points.empty()) - { +bool CAnomalyDetectorModelConfig::normalizedScoreKnotPoints(const TDoubleDoublePrVec& points) { + if (points.empty()) { LOG_ERROR("Must provide at least two know points"); return false; } - if (points[0].first != 0.0 && points[0].second != 0.0) - { + if (points[0].first != 0.0 && points[0].second != 0.0) { LOG_ERROR("First knot point must be (0,0)"); return false; } - if (points.back().first != 100.0 && points.back().second != 100.0) - { + if (points.back().first != 100.0 && points.back().second != 100.0) { LOG_ERROR("Last knot point must be (100,100)"); return false; } - for (std::size_t i = 0u; i < points.size(); i += 2) - { - if (points[i].first < 0.0 || points[i].first > 100.0) - { + for (std::size_t i = 0u; i < points.size(); i += 2) { + if (points[i].first < 0.0 || points[i].first > 100.0) { LOG_ERROR("Unexpected value " << points[i].first << " for percentile"); return false; } - if (points[i].second < 0.0 || points[i].second > 100.0) - { + if (points[i].second < 0.0 || points[i].second > 100.0) { LOG_ERROR("Unexpected value " << points[i].second << " for score"); return false; } } - if (!boost::algorithm::is_sorted(points.begin(), - points.end(), - maths::COrderings::SFirstLess())) - { - LOG_ERROR("Percentiles must be monotonic increasing " - << core::CContainerPrinter::print(points)); + if (!boost::algorithm::is_sorted(points.begin(), points.end(), maths::COrderings::SFirstLess())) { + LOG_ERROR("Percentiles must be monotonic increasing " << core::CContainerPrinter::print(points)); return false; } - if (!boost::algorithm::is_sorted(points.begin(), - points.end(), - maths::COrderings::SSecondLess())) - { - LOG_ERROR("Scores must be monotonic increasing " - << core::CContainerPrinter::print(points)); + if (!boost::algorithm::is_sorted(points.begin(), points.end(), maths::COrderings::SSecondLess())) { + LOG_ERROR("Scores must be monotonic increasing " << core::CContainerPrinter::print(points)); return false; } m_NormalizedScoreKnotPoints = points; - m_NormalizedScoreKnotPoints.erase(std::unique(m_NormalizedScoreKnotPoints.begin(), - m_NormalizedScoreKnotPoints.end()), + m_NormalizedScoreKnotPoints.erase(std::unique(m_NormalizedScoreKnotPoints.begin(), m_NormalizedScoreKnotPoints.end()), m_NormalizedScoreKnotPoints.end()); return true; } -bool CAnomalyDetectorModelConfig::init(const std::string &configFile) -{ +bool CAnomalyDetectorModelConfig::init(const std::string& configFile) { boost::property_tree::ptree propTree; return this->init(configFile, propTree); } -bool CAnomalyDetectorModelConfig::init(const std::string &configFile, - boost::property_tree::ptree &propTree) -{ +bool CAnomalyDetectorModelConfig::init(const std::string& configFile, boost::property_tree::ptree& propTree) { LOG_DEBUG("Reading config file " << configFile); - try - { + try { std::ifstream strm(configFile.c_str()); - if (!strm.is_open()) - { + if (!strm.is_open()) { LOG_ERROR("Error opening config file " << configFile); return false; } CLimits::skipUtf8Bom(strm); boost::property_tree::ini_parser::read_ini(strm, propTree); - } - catch (boost::property_tree::ptree_error &e) - { + } catch (boost::property_tree::ptree_error& e) { LOG_ERROR("Error reading config file " << configFile << " : " << e.what()); return false; } - if (this->init(propTree) == false) - { + if (this->init(propTree) == false) { LOG_ERROR("Error reading config file " << configFile); return false; } @@ -411,38 +336,27 @@ bool CAnomalyDetectorModelConfig::init(const std::string &configFile, return true; } -bool CAnomalyDetectorModelConfig::init(const boost::property_tree::ptree &propTree) -{ +bool CAnomalyDetectorModelConfig::init(const boost::property_tree::ptree& propTree) { static const std::string MODEL_STANZA("model"); static const std::string ANOMALY_SCORE_STANZA("anomalyscore"); bool result = true; - for (boost::property_tree::ptree::const_iterator i = propTree.begin(); - i != propTree.end(); - ++i) - { - const std::string &stanzaName = i->first; - const boost::property_tree::ptree &propertyTree = i->second; - - if (stanzaName == MODEL_STANZA) - { - if (this->processStanza(propertyTree) == false) - { + for (boost::property_tree::ptree::const_iterator i = propTree.begin(); i != propTree.end(); ++i) { + const std::string& stanzaName = i->first; + const boost::property_tree::ptree& propertyTree = i->second; + + if (stanzaName == MODEL_STANZA) { + if (this->processStanza(propertyTree) == false) { LOG_ERROR("Error reading model config stanza: " << MODEL_STANZA); result = false; } - } - else if (stanzaName == ANOMALY_SCORE_STANZA) - { - if (this->processStanza(propertyTree) == false) - { + } else if (stanzaName == ANOMALY_SCORE_STANZA) { + if (this->processStanza(propertyTree) == false) { LOG_ERROR("Error reading model config stanza: " << ANOMALY_SCORE_STANZA); result = false; } - } - else - { + } else { LOG_WARN("Ignoring unknown model config stanza: " << stanzaName); } } @@ -450,31 +364,25 @@ bool CAnomalyDetectorModelConfig::init(const boost::property_tree::ptree &propTr return result; } -bool CAnomalyDetectorModelConfig::configureModelPlot(const std::string &modelPlotConfigFile) -{ +bool CAnomalyDetectorModelConfig::configureModelPlot(const std::string& modelPlotConfigFile) { LOG_DEBUG("Reading model plot config file " << modelPlotConfigFile); boost::property_tree::ptree propTree; - try - { + try { std::ifstream strm(modelPlotConfigFile.c_str()); - if (!strm.is_open()) - { + if (!strm.is_open()) { LOG_ERROR("Error opening model plot config file " << modelPlotConfigFile); return false; } CLimits::skipUtf8Bom(strm); boost::property_tree::ini_parser::read_ini(strm, propTree); - } - catch (boost::property_tree::ptree_error &e) - { + } catch (boost::property_tree::ptree_error& e) { LOG_ERROR("Error reading model plot config file " << modelPlotConfigFile << " : " << e.what()); return false; } - if (this->configureModelPlot(propTree) == false) - { + if (this->configureModelPlot(propTree) == false) { LOG_ERROR("Error reading model plot config file " << modelPlotConfigFile); return false; } @@ -482,111 +390,90 @@ bool CAnomalyDetectorModelConfig::configureModelPlot(const std::string &modelPlo return true; } -namespace -{ +namespace { // Model debug config properties const std::string BOUNDS_PERCENTILE_PROPERTY("boundspercentile"); const std::string TERMS_PROPERTY("terms"); } -bool CAnomalyDetectorModelConfig::configureModelPlot(const boost::property_tree::ptree &propTree) -{ - try - { +bool CAnomalyDetectorModelConfig::configureModelPlot(const boost::property_tree::ptree& propTree) { + try { std::string valueStr(propTree.get(BOUNDS_PERCENTILE_PROPERTY)); - if (core::CStringUtils::stringToType(valueStr, m_ModelPlotBoundsPercentile) == false) - { + if (core::CStringUtils::stringToType(valueStr, m_ModelPlotBoundsPercentile) == false) { LOG_ERROR("Cannot parse as double: " << valueStr); return false; } - } - catch (boost::property_tree::ptree_error &) - { - LOG_ERROR("Error reading model debug config. Property '" - << BOUNDS_PERCENTILE_PROPERTY << "' is missing"); + } catch (boost::property_tree::ptree_error&) { + LOG_ERROR("Error reading model debug config. Property '" << BOUNDS_PERCENTILE_PROPERTY << "' is missing"); return false; } m_ModelPlotTerms.clear(); - try - { + try { std::string valueStr(propTree.get(TERMS_PROPERTY)); using TStrVec = core::CStringUtils::TStrVec; TStrVec tokens; std::string remainder; core::CStringUtils::tokenise(",", valueStr, tokens, remainder); - if (!remainder.empty()) - { + if (!remainder.empty()) { tokens.push_back(remainder); } - for (std::size_t i = 0; i < tokens.size(); ++i) - { + for (std::size_t i = 0; i < tokens.size(); ++i) { m_ModelPlotTerms.insert(tokens[i]); } - } - catch (boost::property_tree::ptree_error &) - { - LOG_ERROR("Error reading model debug config. Property '" - << TERMS_PROPERTY << "' is missing"); + } catch (boost::property_tree::ptree_error&) { + LOG_ERROR("Error reading model debug config. Property '" << TERMS_PROPERTY << "' is missing"); return false; } return true; } -CAnomalyDetectorModelConfig::TModelFactoryCPtr -CAnomalyDetectorModelConfig::factory(const CSearchKey &key) const -{ +CAnomalyDetectorModelConfig::TModelFactoryCPtr CAnomalyDetectorModelConfig::factory(const CSearchKey& key) const { TModelFactoryCPtr result = m_FactoryCache[key]; - if (!result) - { - result = key.isSimpleCount() ? - this->factory(key.identifier(), - key.function(), - true, - key.excludeFrequent(), - key.partitionFieldName(), - key.overFieldName(), - key.byFieldName(), - key.fieldName(), - key.influenceFieldNames()) : - this->factory(key.identifier(), - key.function(), - key.useNull(), - key.excludeFrequent(), - key.partitionFieldName(), - key.overFieldName(), - key.byFieldName(), - key.fieldName(), - key.influenceFieldNames()); + if (!result) { + result = key.isSimpleCount() ? this->factory(key.identifier(), + key.function(), + true, + key.excludeFrequent(), + key.partitionFieldName(), + key.overFieldName(), + key.byFieldName(), + key.fieldName(), + key.influenceFieldNames()) + : this->factory(key.identifier(), + key.function(), + key.useNull(), + key.excludeFrequent(), + key.partitionFieldName(), + key.overFieldName(), + key.byFieldName(), + key.fieldName(), + key.influenceFieldNames()); } return result; } CAnomalyDetectorModelConfig::TModelFactoryCPtr CAnomalyDetectorModelConfig::factory(int identifier, - function_t::EFunction function, - bool useNull, - model_t::EExcludeFrequent excludeFrequent, - const std::string &partitionFieldName, - const std::string &overFieldName, - const std::string &byFieldName, - const std::string &valueFieldName, - const CSearchKey::TStoredStringPtrVec &influenceFieldNames) const -{ - const TFeatureVec &features = function_t::features(function); + function_t::EFunction function, + bool useNull, + model_t::EExcludeFrequent excludeFrequent, + const std::string& partitionFieldName, + const std::string& overFieldName, + const std::string& byFieldName, + const std::string& valueFieldName, + const CSearchKey::TStoredStringPtrVec& influenceFieldNames) const { + const TFeatureVec& features = function_t::features(function); // Simple state machine to deduce the factory type from // a collection of features. EFactoryType factory = E_UnknownFactory; - for (std::size_t i = 0u; i < features.size(); ++i) - { - switch (factory) - { + for (std::size_t i = 0u; i < features.size(); ++i) { + switch (factory) { case E_EventRateFactory: - switch (model_t::analysisCategory(features[i])) - { + switch (model_t::analysisCategory(features[i])) { case model_t::E_EventRate: break; case model_t::E_Metric: @@ -602,8 +489,7 @@ CAnomalyDetectorModelConfig::factory(int identifier, break; case E_MetricFactory: - switch (model_t::analysisCategory(features[i])) - { + switch (model_t::analysisCategory(features[i])) { case model_t::E_EventRate: case model_t::E_Metric: break; @@ -617,8 +503,7 @@ CAnomalyDetectorModelConfig::factory(int identifier, break; case E_EventRatePopulationFactory: - switch (model_t::analysisCategory(features[i])) - { + switch (model_t::analysisCategory(features[i])) { case model_t::E_EventRate: case model_t::E_Metric: factory = E_BadFactory; @@ -634,8 +519,7 @@ CAnomalyDetectorModelConfig::factory(int identifier, break; case E_MetricPopulationFactory: - switch (model_t::analysisCategory(features[i])) - { + switch (model_t::analysisCategory(features[i])) { case model_t::E_EventRate: case model_t::E_Metric: case model_t::E_PopulationEventRate: @@ -652,8 +536,7 @@ CAnomalyDetectorModelConfig::factory(int identifier, break; case E_EventRatePeersFactory: - switch (model_t::analysisCategory(features[i])) - { + switch (model_t::analysisCategory(features[i])) { case model_t::E_EventRate: case model_t::E_Metric: case model_t::E_PopulationEventRate: @@ -669,8 +552,7 @@ CAnomalyDetectorModelConfig::factory(int identifier, break; case E_CountingFactory: - switch (model_t::analysisCategory(features[i])) - { + switch (model_t::analysisCategory(features[i])) { case model_t::E_EventRate: case model_t::E_Metric: case model_t::E_PopulationEventRate: @@ -683,12 +565,9 @@ CAnomalyDetectorModelConfig::factory(int identifier, break; case E_UnknownFactory: - switch (model_t::analysisCategory(features[i])) - { + switch (model_t::analysisCategory(features[i])) { case model_t::E_EventRate: - factory = CSearchKey::isSimpleCount(function, byFieldName) ? - E_CountingFactory : - E_EventRateFactory; + factory = CSearchKey::isSimpleCount(function, byFieldName) ? E_CountingFactory : E_EventRateFactory; break; case model_t::E_Metric: factory = E_MetricFactory; @@ -715,33 +594,25 @@ CAnomalyDetectorModelConfig::factory(int identifier, } TFactoryTypeFactoryPtrMapCItr prototype = m_Factories.find(factory); - if (prototype == m_Factories.end()) - { - LOG_ABORT("No factory for features = " - << core::CContainerPrinter::print(features)); + if (prototype == m_Factories.end()) { + LOG_ABORT("No factory for features = " << core::CContainerPrinter::print(features)); } TModelFactoryPtr result(prototype->second->clone()); result->identifier(identifier); TStrVec influences; influences.reserve(influenceFieldNames.size()); - for (const auto &influenceFieldName : influenceFieldNames) - { + for (const auto& influenceFieldName : influenceFieldNames) { influences.push_back(*influenceFieldName); } - result->fieldNames(partitionFieldName, - overFieldName, - byFieldName, - valueFieldName, - influences); + result->fieldNames(partitionFieldName, overFieldName, byFieldName, valueFieldName, influences); result->useNull(useNull); result->excludeFrequent(excludeFrequent); result->features(features); result->bucketResultsDelay(m_BucketResultsDelay); result->multivariateByFields(m_MultivariateByFields); TIntDetectionRuleVecUMapCItr rulesItr = m_DetectionRules.get().find(identifier); - if (rulesItr != m_DetectionRules.get().end()) - { + if (rulesItr != m_DetectionRules.get().end()) { result->detectionRules(TDetectionRuleVecCRef(rulesItr->second)); } result->scheduledEvents(m_ScheduledEvents); @@ -749,123 +620,97 @@ CAnomalyDetectorModelConfig::factory(int identifier, return result; } -void CAnomalyDetectorModelConfig::decayRate(double value) -{ - for (auto &factory : m_Factories) - { +void CAnomalyDetectorModelConfig::decayRate(double value) { + for (auto& factory : m_Factories) { factory.second->decayRate(value); } } -double CAnomalyDetectorModelConfig::decayRate() const -{ +double CAnomalyDetectorModelConfig::decayRate() const { return m_Factories.begin()->second->modelParams().s_DecayRate; } -core_t::TTime CAnomalyDetectorModelConfig::bucketLength() const -{ +core_t::TTime CAnomalyDetectorModelConfig::bucketLength() const { return m_BucketLength; } -core_t::TTime CAnomalyDetectorModelConfig::latency() const -{ +core_t::TTime CAnomalyDetectorModelConfig::latency() const { return m_BucketLength * m_Factories.begin()->second->modelParams().s_LatencyBuckets; } -std::size_t CAnomalyDetectorModelConfig::latencyBuckets() const -{ +std::size_t CAnomalyDetectorModelConfig::latencyBuckets() const { return m_Factories.begin()->second->modelParams().s_LatencyBuckets; } -std::size_t CAnomalyDetectorModelConfig::bucketResultsDelay() const -{ +std::size_t CAnomalyDetectorModelConfig::bucketResultsDelay() const { return m_BucketResultsDelay; } -bool CAnomalyDetectorModelConfig::multivariateByFields() const -{ +bool CAnomalyDetectorModelConfig::multivariateByFields() const { return m_MultivariateByFields; } -void CAnomalyDetectorModelConfig::modelPlotBoundsPercentile(double percentile) -{ - if (percentile < 0.0 || percentile >= 100.0) - { +void CAnomalyDetectorModelConfig::modelPlotBoundsPercentile(double percentile) { + if (percentile < 0.0 || percentile >= 100.0) { LOG_ERROR("Bad confidence interval"); return; } m_ModelPlotBoundsPercentile = percentile; } -double CAnomalyDetectorModelConfig::modelPlotBoundsPercentile() const -{ +double CAnomalyDetectorModelConfig::modelPlotBoundsPercentile() const { return m_ModelPlotBoundsPercentile; } -void CAnomalyDetectorModelConfig::modelPlotTerms(TStrSet terms) -{ +void CAnomalyDetectorModelConfig::modelPlotTerms(TStrSet terms) { m_ModelPlotTerms.swap(terms); } -const CAnomalyDetectorModelConfig::TStrSet &CAnomalyDetectorModelConfig::modelPlotTerms() const -{ +const CAnomalyDetectorModelConfig::TStrSet& CAnomalyDetectorModelConfig::modelPlotTerms() const { return m_ModelPlotTerms; } -double CAnomalyDetectorModelConfig::aggregationStyleParam(model_t::EAggregationStyle style, - model_t::EAggregationParam param) const -{ +double CAnomalyDetectorModelConfig::aggregationStyleParam(model_t::EAggregationStyle style, model_t::EAggregationParam param) const { return m_AggregationStyleParams[style][param]; } -double CAnomalyDetectorModelConfig::maximumAnomalousProbability() const -{ +double CAnomalyDetectorModelConfig::maximumAnomalousProbability() const { return m_MaximumAnomalousProbability; } -double CAnomalyDetectorModelConfig::noisePercentile() const -{ +double CAnomalyDetectorModelConfig::noisePercentile() const { return m_NoisePercentile; } -double CAnomalyDetectorModelConfig::noiseMultiplier() const -{ +double CAnomalyDetectorModelConfig::noiseMultiplier() const { return m_NoiseMultiplier; } -const CAnomalyDetectorModelConfig::TDoubleDoublePrVec & -CAnomalyDetectorModelConfig::normalizedScoreKnotPoints() const -{ +const CAnomalyDetectorModelConfig::TDoubleDoublePrVec& CAnomalyDetectorModelConfig::normalizedScoreKnotPoints() const { return m_NormalizedScoreKnotPoints; } -bool CAnomalyDetectorModelConfig::perPartitionNormalization() const -{ +bool CAnomalyDetectorModelConfig::perPartitionNormalization() const { return m_PerPartitionNormalisation; } -void CAnomalyDetectorModelConfig::perPartitionNormalization(bool value) -{ +void CAnomalyDetectorModelConfig::perPartitionNormalization(bool value) { m_PerPartitionNormalisation = value; } -void CAnomalyDetectorModelConfig::detectionRules(TIntDetectionRuleVecUMapCRef detectionRules) -{ +void CAnomalyDetectorModelConfig::detectionRules(TIntDetectionRuleVecUMapCRef detectionRules) { m_DetectionRules = detectionRules; } -void CAnomalyDetectorModelConfig::scheduledEvents(TStrDetectionRulePrVecCRef scheduledEvents) -{ +void CAnomalyDetectorModelConfig::scheduledEvents(TStrDetectionRulePrVecCRef scheduledEvents) { m_ScheduledEvents = scheduledEvents; } -core_t::TTime CAnomalyDetectorModelConfig::samplingAgeCutoff() const -{ +core_t::TTime CAnomalyDetectorModelConfig::samplingAgeCutoff() const { return m_Factories.begin()->second->modelParams().s_SamplingAgeCutoff; } -namespace -{ +namespace { const std::string ONLINE_LEARN_RATE_PROPERTY("learnrate"); const std::string DECAY_RATE_PROPERTY("decayrate"); const std::string INITIAL_DECAY_RATE_MULTIPLIER_PROPERTY("initialdecayratemultiplier"); @@ -885,283 +730,204 @@ const std::string NORMALIZED_SCORE_KNOT_POINTS("normalizedscoreknotpoints"); const std::string PER_PARTITION_NORMALIZATION_PROPERTY("perPartitionNormalization"); } -bool CAnomalyDetectorModelConfig::processStanza(const boost::property_tree::ptree &propertyTree) -{ +bool CAnomalyDetectorModelConfig::processStanza(const boost::property_tree::ptree& propertyTree) { using TStrVec = std::vector; bool result = true; - for (const auto &property : propertyTree) - { + for (const auto& property : propertyTree) { std::string propName = property.first; std::string propValue = property.second.data(); core::CStringUtils::trimWhitespace(propValue); - if (propName == ONLINE_LEARN_RATE_PROPERTY) - { + if (propName == ONLINE_LEARN_RATE_PROPERTY) { double learnRate = DEFAULT_LEARN_RATE; - if (core::CStringUtils::stringToType(propValue, learnRate) == false || learnRate <= 0.0) - { + if (core::CStringUtils::stringToType(propValue, learnRate) == false || learnRate <= 0.0) { LOG_ERROR("Invalid value for property " << propName << " : " << propValue); result = false; continue; } learnRate *= bucketNormalizationFactor(this->bucketLength()); - for (auto &factory : m_Factories) - { + for (auto& factory : m_Factories) { factory.second->learnRate(learnRate); } - } - else if (propName == DECAY_RATE_PROPERTY) - { + } else if (propName == DECAY_RATE_PROPERTY) { double decayRate = DEFAULT_DECAY_RATE; - if (core::CStringUtils::stringToType(propValue, decayRate) == false || decayRate <= 0.0) - { + if (core::CStringUtils::stringToType(propValue, decayRate) == false || decayRate <= 0.0) { LOG_ERROR("Invalid value for property " << propName << " : " << propValue); result = false; continue; } decayRate *= bucketNormalizationFactor(this->bucketLength()); - for (auto &factory : m_Factories) - { + for (auto& factory : m_Factories) { factory.second->decayRate(decayRate); } - } - else if (propName == INITIAL_DECAY_RATE_MULTIPLIER_PROPERTY) - { + } else if (propName == INITIAL_DECAY_RATE_MULTIPLIER_PROPERTY) { double multiplier = DEFAULT_INITIAL_DECAY_RATE_MULTIPLIER; - if (core::CStringUtils::stringToType(propValue, multiplier) == false || multiplier < 1.0) - { + if (core::CStringUtils::stringToType(propValue, multiplier) == false || multiplier < 1.0) { LOG_ERROR("Invalid value for property " << propName << " : " << propValue); result = false; continue; } - for (auto &factory : m_Factories) - { + for (auto& factory : m_Factories) { factory.second->initialDecayRateMultiplier(multiplier); } - } - else if (propName == MAXIMUM_UPDATES_PER_BUCKET_PROPERTY) - { + } else if (propName == MAXIMUM_UPDATES_PER_BUCKET_PROPERTY) { double maximumUpdatesPerBucket; - if ( core::CStringUtils::stringToType(propValue, maximumUpdatesPerBucket) == false - || maximumUpdatesPerBucket < 0.0) - { + if (core::CStringUtils::stringToType(propValue, maximumUpdatesPerBucket) == false || maximumUpdatesPerBucket < 0.0) { LOG_ERROR("Invalid value for property " << propName << " : " << propValue); result = false; continue; } - for (auto &factory : m_Factories) - { + for (auto& factory : m_Factories) { factory.second->maximumUpdatesPerBucket(maximumUpdatesPerBucket); } - } - else if (propName == INDIVIDUAL_MODE_FRACTION_PROPERTY) - { + } else if (propName == INDIVIDUAL_MODE_FRACTION_PROPERTY) { double fraction; - if ( core::CStringUtils::stringToType(propValue, fraction) == false - || fraction < 0.0 - || fraction > 1.0) - { + if (core::CStringUtils::stringToType(propValue, fraction) == false || fraction < 0.0 || fraction > 1.0) { LOG_ERROR("Invalid value for property " << propName << " : " << propValue); result = false; continue; } - if (m_Factories.count(E_EventRateFactory) > 0) - { + if (m_Factories.count(E_EventRateFactory) > 0) { m_Factories[E_EventRateFactory]->minimumModeFraction(fraction); } - if (m_Factories.count(E_MetricFactory) > 0) - { + if (m_Factories.count(E_MetricFactory) > 0) { m_Factories[E_MetricFactory]->minimumModeFraction(fraction); } - } - else if (propName == POPULATION_MODE_FRACTION_PROPERTY) - { + } else if (propName == POPULATION_MODE_FRACTION_PROPERTY) { double fraction; - if ( core::CStringUtils::stringToType(propValue, fraction) == false - || fraction < 0.0 - || fraction > 1.0) - { + if (core::CStringUtils::stringToType(propValue, fraction) == false || fraction < 0.0 || fraction > 1.0) { LOG_ERROR("Invalid value for property " << propName << " : " << propValue); result = false; continue; } - if (m_Factories.count(E_EventRatePopulationFactory) > 0) - { + if (m_Factories.count(E_EventRatePopulationFactory) > 0) { m_Factories[E_EventRatePopulationFactory]->minimumModeFraction(fraction); } - if (m_Factories.count(E_MetricPopulationFactory) > 0) - { + if (m_Factories.count(E_MetricPopulationFactory) > 0) { m_Factories[E_MetricPopulationFactory]->minimumModeFraction(fraction); } - } - else if (propName == PEERS_MODE_FRACTION_PROPERTY) - { + } else if (propName == PEERS_MODE_FRACTION_PROPERTY) { double fraction; - if ( core::CStringUtils::stringToType(propValue, fraction) == false - || fraction < 0.0 - || fraction > 1.0) - { + if (core::CStringUtils::stringToType(propValue, fraction) == false || fraction < 0.0 || fraction > 1.0) { LOG_ERROR("Invalid value for property " << propName << " : " << propValue); result = false; continue; } - if (m_Factories.count(E_EventRatePeersFactory) > 0) - { + if (m_Factories.count(E_EventRatePeersFactory) > 0) { m_Factories[E_EventRatePeersFactory]->minimumModeFraction(fraction); } - } - else if (propName == COMPONENT_SIZE_PROPERTY) - { + } else if (propName == COMPONENT_SIZE_PROPERTY) { int componentSize; - if ( core::CStringUtils::stringToType(propValue, componentSize) == false - || componentSize < 0) - { + if (core::CStringUtils::stringToType(propValue, componentSize) == false || componentSize < 0) { LOG_ERROR("Invalid value of property " << propName << " : " << propValue); result = false; continue; } - for (auto &factory : m_Factories) - { + for (auto& factory : m_Factories) { factory.second->componentSize(componentSize); } - } - else if (propName == SAMPLE_COUNT_FACTOR_PROPERTY) - { + } else if (propName == SAMPLE_COUNT_FACTOR_PROPERTY) { int factor; - if (core::CStringUtils::stringToType(propValue, factor) == false || factor < 0) - { + if (core::CStringUtils::stringToType(propValue, factor) == false || factor < 0) { LOG_ERROR("Invalid value for property " << propName << " : " << propValue); result = false; continue; } - for (auto &factory : m_Factories) - { + for (auto& factory : m_Factories) { factory.second->sampleCountFactor(factor); } - } - else if (propName == PRUNE_WINDOW_SCALE_MINIMUM) - { + } else if (propName == PRUNE_WINDOW_SCALE_MINIMUM) { double factor; - if (core::CStringUtils::stringToType(propValue, factor) == false) - { + if (core::CStringUtils::stringToType(propValue, factor) == false) { LOG_ERROR("Invalid value for property " << propName << " : " << propValue); result = false; continue; } - for (auto &factory : m_Factories) - { + for (auto& factory : m_Factories) { factory.second->pruneWindowScaleMinimum(factor); } - } - else if (propName == PRUNE_WINDOW_SCALE_MAXIMUM) - { + } else if (propName == PRUNE_WINDOW_SCALE_MAXIMUM) { double factor; - if (core::CStringUtils::stringToType(propValue, factor) == false) - { + if (core::CStringUtils::stringToType(propValue, factor) == false) { LOG_ERROR("Invalid value for property " << propName << " : " << propValue); result = false; continue; } - for (auto &factory : m_Factories) - { + for (auto& factory : m_Factories) { factory.second->pruneWindowScaleMaximum(factor); } - } - else if (propName == AGGREGATION_STYLE_PARAMS) - { + } else if (propName == AGGREGATION_STYLE_PARAMS) { core::CStringUtils::trimWhitespace(propValue); propValue = core::CStringUtils::normaliseWhitespace(propValue); TStrVec strings; std::string remainder; core::CStringUtils::tokenise(" ", propValue, strings, remainder); - if (!remainder.empty()) - { + if (!remainder.empty()) { strings.push_back(remainder); } std::size_t n = model_t::NUMBER_AGGREGATION_STYLES * model_t::NUMBER_AGGREGATION_PARAMS; - if (strings.size() != n) - { + if (strings.size() != n) { LOG_ERROR("Expected " << n << " values for " << propName); result = false; continue; } - for (std::size_t j = 0u, l = 0u; j < model_t::NUMBER_AGGREGATION_STYLES; ++j) - { - for (std::size_t k = 0u; k < model_t::NUMBER_AGGREGATION_PARAMS; ++k, ++l) - { + for (std::size_t j = 0u, l = 0u; j < model_t::NUMBER_AGGREGATION_STYLES; ++j) { + for (std::size_t k = 0u; k < model_t::NUMBER_AGGREGATION_PARAMS; ++k, ++l) { double value; - if (core::CStringUtils::stringToType(strings[l], value) == false) - { + if (core::CStringUtils::stringToType(strings[l], value) == false) { LOG_ERROR("Unexpected value " << strings[l] << " in property " << propName); result = false; continue; } - this->aggregationStyleParams(static_cast(j), - static_cast(k), - value); + this->aggregationStyleParams( + static_cast(j), static_cast(k), value); } } - } - else if (propName == MAXIMUM_ANOMALOUS_PROBABILITY_PROPERTY) - { + } else if (propName == MAXIMUM_ANOMALOUS_PROBABILITY_PROPERTY) { double probability; - if (core::CStringUtils::stringToType(propValue, probability) == false) - { + if (core::CStringUtils::stringToType(propValue, probability) == false) { LOG_ERROR("Invalid value for property " << propName << " : " << propValue); result = false; continue; } this->maximumAnomalousProbability(probability); - } - else if (propName == NOISE_PERCENTILE_PROPERTY) - { + } else if (propName == NOISE_PERCENTILE_PROPERTY) { double percentile; - if ( core::CStringUtils::stringToType(propValue, percentile) == false - || this->noisePercentile(percentile) == false) - { + if (core::CStringUtils::stringToType(propValue, percentile) == false || this->noisePercentile(percentile) == false) { LOG_ERROR("Invalid value for property " << propName << " : " << propValue); result = false; continue; } - } - else if (propName == NOISE_MULTIPLIER_PROPERTY) - { + } else if (propName == NOISE_MULTIPLIER_PROPERTY) { double multiplier; - if ( core::CStringUtils::stringToType(propValue, multiplier) == false - || this->noiseMultiplier(multiplier) == false) - { + if (core::CStringUtils::stringToType(propValue, multiplier) == false || this->noiseMultiplier(multiplier) == false) { LOG_ERROR("Invalid value for property " << propName << " : " << propValue); result = false; continue; } - } - else if (propName == NORMALIZED_SCORE_KNOT_POINTS) - { + } else if (propName == NORMALIZED_SCORE_KNOT_POINTS) { core::CStringUtils::trimWhitespace(propValue); propValue = core::CStringUtils::normaliseWhitespace(propValue); TStrVec strings; std::string remainder; core::CStringUtils::tokenise(" ", propValue, strings, remainder); - if (!remainder.empty()) - { + if (!remainder.empty()) { strings.push_back(remainder); } - if (strings.empty() || (strings.size() % 2) != 0) - { - LOG_ERROR("Expected even number of values for property " << propName - << " " << core::CContainerPrinter::print(strings)); + if (strings.empty() || (strings.size() % 2) != 0) { + LOG_ERROR("Expected even number of values for property " << propName << " " << core::CContainerPrinter::print(strings)); result = false; continue; } @@ -1169,21 +935,16 @@ bool CAnomalyDetectorModelConfig::processStanza(const boost::property_tree::ptre TDoubleDoublePrVec points; points.reserve(strings.size() / 2 + 2); points.emplace_back(0.0, 0.0); - for (std::size_t j = 0u; j < strings.size(); j += 2) - { + for (std::size_t j = 0u; j < strings.size(); j += 2) { double rate; double score; - if (core::CStringUtils::stringToType(strings[j], rate) == false) - { - LOG_ERROR("Unexpected value " << strings[j] - << " for rate in property " << propName); + if (core::CStringUtils::stringToType(strings[j], rate) == false) { + LOG_ERROR("Unexpected value " << strings[j] << " for rate in property " << propName); result = false; continue; } - if (core::CStringUtils::stringToType(strings[j+1], score) == false) - { - LOG_ERROR("Unexpected value " << strings[j+1] - << " for score in property " << propName); + if (core::CStringUtils::stringToType(strings[j + 1], score) == false) { + LOG_ERROR("Unexpected value " << strings[j + 1] << " for score in property " << propName); result = false; continue; } @@ -1191,13 +952,9 @@ bool CAnomalyDetectorModelConfig::processStanza(const boost::property_tree::ptre } points.emplace_back(100.0, 100.0); this->normalizedScoreKnotPoints(points); - } - else if (propName == PER_PARTITION_NORMALIZATION_PROPERTY) - { + } else if (propName == PER_PARTITION_NORMALIZATION_PROPERTY) { - } - else - { + } else { LOG_WARN("Ignoring unknown property " << propName); } } @@ -1205,10 +962,8 @@ bool CAnomalyDetectorModelConfig::processStanza(const boost::property_tree::ptre return result; } -double CAnomalyDetectorModelConfig::bucketNormalizationFactor() const -{ +double CAnomalyDetectorModelConfig::bucketNormalizationFactor() const { return bucketNormalizationFactor(m_BucketLength); } - } } diff --git a/lib/model/CAnomalyScore.cc b/lib/model/CAnomalyScore.cc index 007af128b5..b173e4286a 100644 --- a/lib/model/CAnomalyScore.cc +++ b/lib/model/CAnomalyScore.cc @@ -7,16 +7,16 @@ #include #include -#include #include #include +#include #include #include #include #include -#include #include +#include #include #include @@ -30,32 +30,23 @@ #include #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { -namespace -{ +namespace { using TDoubleVec = std::vector; //! Add valid \p probabilities to \p aggregator and return the //! number of valid probabilities. template -std::size_t addProbabilities(const TDoubleVec &probabilities, - AGGREGATOR &aggregator) -{ +std::size_t addProbabilities(const TDoubleVec& probabilities, AGGREGATOR& aggregator) { std::size_t n = 0u; - for (std::size_t i = 0u; i < probabilities.size(); ++i) - { + for (std::size_t i = 0u; i < probabilities.size(); ++i) { double p = probabilities[i]; - if (!(p >= 0.0 && p <= 1.0)) - { + if (!(p >= 0.0 && p <= 1.0)) { LOG_ERROR("Invalid probability " << p); - } - else - { + } else { ++n; aggregator.add(p); } @@ -64,14 +55,12 @@ std::size_t addProbabilities(const TDoubleVec &probabilities, } //! The function to convert probabilities to *raw* scores. -double probabilityToScore(double probability) -{ +double probabilityToScore(double probability) { return maths::CTools::anomalyScore(probability); } //! The function to convert *raw* scores to probabilities. -double scoreToProbability(double score) -{ +double scoreToProbability(double score) { return maths::CTools::inverseAnomalyScore(score); } @@ -88,7 +77,6 @@ const std::string EMPTY_STRING; // This is the version to assume for the format that doesn't contain a version // attribute - NEVER CHANGE THIS const std::string MISSING_VERSION_FORMAT_VERSION("1"); - } const std::string CAnomalyScore::MLCUE_ATTRIBUTE("mlcue"); @@ -101,7 +89,6 @@ const std::string CAnomalyScore::TIME_ATTRIBUTE("time"); // existing state to be discarded const std::string CAnomalyScore::CURRENT_FORMAT_VERSION("3"); - const std::string CAnomalyScore::WARNING_SEVERITY("warning"); const std::string CAnomalyScore::MINOR_SEVERITY("minor"); const std::string CAnomalyScore::MAJOR_SEVERITY("major"); @@ -112,15 +99,13 @@ bool CAnomalyScore::compute(double jointProbabilityWeight, std::size_t minExtremeSamples, std::size_t maxExtremeSamples, double maximumAnomalousProbability, - const TDoubleVec &probabilities, - double &overallAnomalyScore, - double &overallProbability) -{ + const TDoubleVec& probabilities, + double& overallAnomalyScore, + double& overallProbability) { overallAnomalyScore = 0.0; overallProbability = 1.0; - if (probabilities.empty()) - { + if (probabilities.empty()) { // Nothing to do. return true; } @@ -131,53 +116,43 @@ bool CAnomalyScore::compute(double jointProbabilityWeight, // Note the upper bound is significantly tighter, so we just // use that in the following calculation. double logPJoint; - if (!logPJointCalculator.calculateUpperBound(logPJoint)) - { + if (!logPJointCalculator.calculateUpperBound(logPJoint)) { LOG_ERROR("Unable to calculate anomaly score" << ", probabilities = " << core::CContainerPrinter::print(probabilities)); return false; } // Sanity check the probability not greater than 1.0. - if (logPJoint > 0.0) - { - LOG_ERROR("Invalid log joint probability " << logPJoint - << ", probabilities = " << core::CContainerPrinter::print(probabilities)); + if (logPJoint > 0.0) { + LOG_ERROR("Invalid log joint probability " << logPJoint << ", probabilities = " << core::CContainerPrinter::print(probabilities)); return false; } double logPExtreme = 0.0; - for (std::size_t m = 1u, i = maths::CTools::truncate(minExtremeSamples, m, n); i <= n; ++i) - { + for (std::size_t m = 1u, i = maths::CTools::truncate(minExtremeSamples, m, n); i <= n; ++i) { maths::CLogProbabilityOfMFromNExtremeSamples logPExtremeCalculator(i); addProbabilities(probabilities, logPExtremeCalculator); double logPi; - if (!logPExtremeCalculator.calibrated(logPi)) - { + if (!logPExtremeCalculator.calibrated(logPi)) { LOG_ERROR("Unable to calculate anomaly score" << ", probabilities = " << core::CContainerPrinter::print(probabilities)); return false; } - if (logPi < logPExtreme) - { + if (logPi < logPExtreme) { logPExtreme = logPi; } } // Sanity check the probability in the range [0, 1]. - if (logPExtreme > 0.0) - { + if (logPExtreme > 0.0) { LOG_ERROR("Invalid log extreme probability " << logPExtreme - << ", probabilities = " << core::CContainerPrinter::print(probabilities)); + << ", probabilities = " << core::CContainerPrinter::print(probabilities)); return false; } double logMaximumAnomalousProbability = std::log(maximumAnomalousProbability); - if ( logPJoint > logMaximumAnomalousProbability - && logPExtreme > logMaximumAnomalousProbability) - { - overallProbability = std::exp(jointProbabilityWeight * logPJoint) - * std::exp(extremeProbabilityWeight * logPExtreme); + if (logPJoint > logMaximumAnomalousProbability && logPExtreme > logMaximumAnomalousProbability) { + overallProbability = std::exp(jointProbabilityWeight * logPJoint) * std::exp(extremeProbabilityWeight * logPExtreme); return true; } @@ -187,59 +162,38 @@ bool CAnomalyScore::compute(double jointProbabilityWeight, static const double NORMAL_RANGE_SCORE_FRACTION = 0.8; static const double LOG_SMALLEST_PROBABILITY = std::log(maths::CTools::smallestProbability()); static const double SMALLEST_PROBABILITY_DEVIATION = probabilityToScore(maths::CTools::smallestProbability()); - static const double SMALLEST_LOG_JOINT_PROBABILTY = -100000.0; + static const double SMALLEST_LOG_JOINT_PROBABILTY = -100000.0; static const double SMALLEST_LOG_EXTREME_PROBABILTY = -1500.0; - if (logPJoint < LOG_SMALLEST_PROBABILITY) - { + if (logPJoint < LOG_SMALLEST_PROBABILITY) { double interpolate = - std::min( (logPJoint - LOG_SMALLEST_PROBABILITY) - / (SMALLEST_LOG_JOINT_PROBABILTY - LOG_SMALLEST_PROBABILITY), 1.0); - overallAnomalyScore = ( NORMAL_RANGE_SCORE_FRACTION - + (1.0 - NORMAL_RANGE_SCORE_FRACTION) * interpolate) - * jointProbabilityWeight - * SMALLEST_PROBABILITY_DEVIATION; - } - else - { - overallAnomalyScore = NORMAL_RANGE_SCORE_FRACTION - * jointProbabilityWeight - * probabilityToScore(std::exp(logPJoint)); + std::min((logPJoint - LOG_SMALLEST_PROBABILITY) / (SMALLEST_LOG_JOINT_PROBABILTY - LOG_SMALLEST_PROBABILITY), 1.0); + overallAnomalyScore = (NORMAL_RANGE_SCORE_FRACTION + (1.0 - NORMAL_RANGE_SCORE_FRACTION) * interpolate) * jointProbabilityWeight * + SMALLEST_PROBABILITY_DEVIATION; + } else { + overallAnomalyScore = NORMAL_RANGE_SCORE_FRACTION * jointProbabilityWeight * probabilityToScore(std::exp(logPJoint)); } - if (logPExtreme < LOG_SMALLEST_PROBABILITY) - { + if (logPExtreme < LOG_SMALLEST_PROBABILITY) { double interpolate = - std::min( (logPExtreme - LOG_SMALLEST_PROBABILITY) - / (SMALLEST_LOG_EXTREME_PROBABILTY - LOG_SMALLEST_PROBABILITY), 1.0); - overallAnomalyScore += ( NORMAL_RANGE_SCORE_FRACTION - + (1.0 - NORMAL_RANGE_SCORE_FRACTION) * interpolate) - * extremeProbabilityWeight - * SMALLEST_PROBABILITY_DEVIATION; - } - else - { - overallAnomalyScore += NORMAL_RANGE_SCORE_FRACTION - * extremeProbabilityWeight - * probabilityToScore(std::exp(logPExtreme)); + std::min((logPExtreme - LOG_SMALLEST_PROBABILITY) / (SMALLEST_LOG_EXTREME_PROBABILTY - LOG_SMALLEST_PROBABILITY), 1.0); + overallAnomalyScore += (NORMAL_RANGE_SCORE_FRACTION + (1.0 - NORMAL_RANGE_SCORE_FRACTION) * interpolate) * + extremeProbabilityWeight * SMALLEST_PROBABILITY_DEVIATION; + } else { + overallAnomalyScore += NORMAL_RANGE_SCORE_FRACTION * extremeProbabilityWeight * probabilityToScore(std::exp(logPExtreme)); } // Invert the deviation in the region it is 1-to-1 otherwise // use the weighted harmonic mean. - overallProbability = overallAnomalyScore > 0.0 ? - scoreToProbability(std::min( overallAnomalyScore - / NORMAL_RANGE_SCORE_FRACTION, - SMALLEST_PROBABILITY_DEVIATION)) : - std::exp(jointProbabilityWeight * logPJoint) - * std::exp(extremeProbabilityWeight * logPExtreme); - - LOG_TRACE("logJointProbability = " << logPJoint - << ", jointProbabilityWeight = " << jointProbabilityWeight - << ", logExtremeProbability = " << logPExtreme - << ", extremeProbabilityWeight = " << extremeProbabilityWeight - << ", overallProbability = " << overallProbability - << ", overallAnomalyScore = " << overallAnomalyScore - << ", # probabilities = " << probabilities.size() + overallProbability = + overallAnomalyScore > 0.0 + ? scoreToProbability(std::min(overallAnomalyScore / NORMAL_RANGE_SCORE_FRACTION, SMALLEST_PROBABILITY_DEVIATION)) + : std::exp(jointProbabilityWeight * logPJoint) * std::exp(extremeProbabilityWeight * logPExtreme); + + LOG_TRACE("logJointProbability = " + << logPJoint << ", jointProbabilityWeight = " << jointProbabilityWeight << ", logExtremeProbability = " << logPExtreme + << ", extremeProbabilityWeight = " << extremeProbabilityWeight << ", overallProbability = " << overallProbability + << ", overallAnomalyScore = " << overallAnomalyScore << ", # probabilities = " << probabilities.size() << ", probabilities = " << core::CContainerPrinter::print(probabilities)); return true; @@ -249,19 +203,15 @@ CAnomalyScore::CComputer::CComputer(double jointProbabilityWeight, double extremeProbabilityWeight, std::size_t minExtremeSamples, std::size_t maxExtremeSamples, - double maximumAnomalousProbability) : - m_JointProbabilityWeight(jointProbabilityWeight), - m_ExtremeProbabilityWeight(extremeProbabilityWeight), - m_MinExtremeSamples(std::min(minExtremeSamples, maxExtremeSamples)), - m_MaxExtremeSamples(maxExtremeSamples), - m_MaximumAnomalousProbability(maximumAnomalousProbability) -{ + double maximumAnomalousProbability) + : m_JointProbabilityWeight(jointProbabilityWeight), + m_ExtremeProbabilityWeight(extremeProbabilityWeight), + m_MinExtremeSamples(std::min(minExtremeSamples, maxExtremeSamples)), + m_MaxExtremeSamples(maxExtremeSamples), + m_MaximumAnomalousProbability(maximumAnomalousProbability) { } -bool CAnomalyScore::CComputer::operator()(const TDoubleVec &probabilities, - double &overallAnomalyScore, - double &overallProbability) const -{ +bool CAnomalyScore::CComputer::operator()(const TDoubleVec& probabilities, double& overallAnomalyScore, double& overallProbability) const { return CAnomalyScore::compute(m_JointProbabilityWeight, m_ExtremeProbabilityWeight, m_MinExtremeSamples, @@ -272,66 +222,55 @@ bool CAnomalyScore::CComputer::operator()(const TDoubleVec &probabilities, overallProbability); } -CAnomalyScore::CNormalizer::CNormalizer(const CAnomalyDetectorModelConfig &config) : - m_NoisePercentile(config.noisePercentile()), - m_NoiseMultiplier(config.noiseMultiplier()), - m_NormalizedScoreKnotPoints(config.normalizedScoreKnotPoints()), - m_MaximumNormalizedScore(100.0), - m_HighPercentileScore(std::numeric_limits::max()), - m_HighPercentileCount(0ull), - m_BucketNormalizationFactor(config.bucketNormalizationFactor()), - m_RawScoreQuantileSummary(201, config.decayRate()), - m_RawScoreHighQuantileSummary(201, config.decayRate()), - m_DecayRate(config.decayRate() - * std::max( static_cast(config.bucketLength()) - / static_cast(CAnomalyDetectorModelConfig::STANDARD_BUCKET_LENGTH), 1.0)), - m_TimeToQuantileDecay(QUANTILE_DECAY_TIME) -{ +CAnomalyScore::CNormalizer::CNormalizer(const CAnomalyDetectorModelConfig& config) + : m_NoisePercentile(config.noisePercentile()), + m_NoiseMultiplier(config.noiseMultiplier()), + m_NormalizedScoreKnotPoints(config.normalizedScoreKnotPoints()), + m_MaximumNormalizedScore(100.0), + m_HighPercentileScore(std::numeric_limits::max()), + m_HighPercentileCount(0ull), + m_BucketNormalizationFactor(config.bucketNormalizationFactor()), + m_RawScoreQuantileSummary(201, config.decayRate()), + m_RawScoreHighQuantileSummary(201, config.decayRate()), + m_DecayRate(config.decayRate() * std::max(static_cast(config.bucketLength()) / + static_cast(CAnomalyDetectorModelConfig::STANDARD_BUCKET_LENGTH), + 1.0)), + m_TimeToQuantileDecay(QUANTILE_DECAY_TIME) { } -bool CAnomalyScore::CNormalizer::canNormalize() const -{ +bool CAnomalyScore::CNormalizer::canNormalize() const { return m_RawScoreQuantileSummary.n() > 0; } -bool CAnomalyScore::CNormalizer::normalize(TDoubleVec &scores) const -{ +bool CAnomalyScore::CNormalizer::normalize(TDoubleVec& scores) const { double origScore(std::accumulate(scores.begin(), scores.end(), 0.0)); double normalizedScore(origScore); - if (this->normalize(normalizedScore) == false) - { + if (this->normalize(normalizedScore) == false) { // Error will have been logged by the called method return false; } - if (normalizedScore == origScore) - { + if (normalizedScore == origScore) { // Nothing to do. return true; } // Normalize the individual scores. - for (TDoubleVecItr scoreItr = scores.begin(); - scoreItr != scores.end(); - ++scoreItr) - { + for (TDoubleVecItr scoreItr = scores.begin(); scoreItr != scores.end(); ++scoreItr) { *scoreItr *= normalizedScore / origScore; } return true; } -bool CAnomalyScore::CNormalizer::normalize(double &score) const -{ - if (score == 0.0) - { +bool CAnomalyScore::CNormalizer::normalize(double& score) const { + if (score == 0.0) { // Nothing to do. return true; } - if (m_RawScoreQuantileSummary.n() == 0) - { + if (m_RawScoreQuantileSummary.n() == 0) { LOG_ERROR("No scores have been added to the quantile summary"); return false; } @@ -340,13 +279,7 @@ bool CAnomalyScore::CNormalizer::normalize(double &score) const static const double CONFIDENCE_INTERVAL = 70.0; - double normalizedScores[] = - { - m_MaximumNormalizedScore, - m_MaximumNormalizedScore, - m_MaximumNormalizedScore, - m_MaximumNormalizedScore - }; + double normalizedScores[] = {m_MaximumNormalizedScore, m_MaximumNormalizedScore, m_MaximumNormalizedScore, m_MaximumNormalizedScore}; uint32_t discreteScore = this->discreteScore(score); @@ -390,26 +323,17 @@ bool CAnomalyScore::CNormalizer::normalize(double &score) const uint32_t noiseScore; m_RawScoreQuantileSummary.quantile(m_NoisePercentile / 100.0, noiseScore); TDoubleDoublePrVecCItr knotPoint = - std::lower_bound(m_NormalizedScoreKnotPoints.begin(), - m_NormalizedScoreKnotPoints.end(), - TDoubleDoublePr(m_NoisePercentile, 0.0)); - double signalStrength = m_NoiseMultiplier - * 10.0 / DISCRETIZATION_FACTOR - * (static_cast(discreteScore) - static_cast(noiseScore)); + std::lower_bound(m_NormalizedScoreKnotPoints.begin(), m_NormalizedScoreKnotPoints.end(), TDoubleDoublePr(m_NoisePercentile, 0.0)); + double signalStrength = + m_NoiseMultiplier * 10.0 / DISCRETIZATION_FACTOR * (static_cast(discreteScore) - static_cast(noiseScore)); double l0; double u0; m_RawScoreQuantileSummary.cdf(0, 0.0, l0, u0); - normalizedScores[0] = knotPoint->second * std::max(1.0 + signalStrength, 0.0) - + m_MaximumNormalizedScore - * std::max(2.0 * std::min(50.0 * (l0 + u0) - / m_NoisePercentile, 1.0) - 1.0, 0.0); - LOG_TRACE("normalizedScores[0] = " << normalizedScores[0] - << ", knotPoint = " << knotPoint->second - << ", discreteScore = " << discreteScore - << ", noiseScore = " << noiseScore - << ", l(0) = " << l0 - << ", u(0) = " << u0 - << ", signalStrength = " << signalStrength); + normalizedScores[0] = knotPoint->second * std::max(1.0 + signalStrength, 0.0) + + m_MaximumNormalizedScore * std::max(2.0 * std::min(50.0 * (l0 + u0) / m_NoisePercentile, 1.0) - 1.0, 0.0); + LOG_TRACE("normalizedScores[0] = " << normalizedScores[0] << ", knotPoint = " << knotPoint->second + << ", discreteScore = " << discreteScore << ", noiseScore = " << noiseScore << ", l(0) = " << l0 + << ", u(0) = " << u0 << ", signalStrength = " << signalStrength); // Compute the raw normalized score. Note we compute the probability // of seeing a lower score on the normal bucket length and convert @@ -421,98 +345,64 @@ bool CAnomalyScore::CNormalizer::normalize(double &score) const this->quantile(score, CONFIDENCE_INTERVAL, lowerBound, upperBound); double lowerPercentile = 100.0 * std::pow(lowerBound, 1.0 / m_BucketNormalizationFactor); double upperPercentile = 100.0 * std::pow(upperBound, 1.0 / m_BucketNormalizationFactor); - if (lowerPercentile > upperPercentile) - { + if (lowerPercentile > upperPercentile) { std::swap(lowerPercentile, upperPercentile); } lowerPercentile = maths::CTools::truncate(lowerPercentile, 0.0, 100.0); upperPercentile = maths::CTools::truncate(upperPercentile, 0.0, 100.0); - std::size_t lowerKnotPoint = - std::max(std::lower_bound(m_NormalizedScoreKnotPoints.begin(), - m_NormalizedScoreKnotPoints.end(), - lowerPercentile, - maths::COrderings::SFirstLess()) - - m_NormalizedScoreKnotPoints.begin(), ptrdiff_t(1)); - std::size_t upperKnotPoint = - std::max(std::lower_bound(m_NormalizedScoreKnotPoints.begin(), - m_NormalizedScoreKnotPoints.end(), - upperPercentile, - maths::COrderings::SFirstLess()) - - m_NormalizedScoreKnotPoints.begin(), ptrdiff_t(1)); - if (lowerKnotPoint < m_NormalizedScoreKnotPoints.size()) - { - const TDoubleDoublePr &left = m_NormalizedScoreKnotPoints[lowerKnotPoint - 1]; - const TDoubleDoublePr &right = m_NormalizedScoreKnotPoints[lowerKnotPoint]; + std::size_t lowerKnotPoint = std::max( + std::lower_bound( + m_NormalizedScoreKnotPoints.begin(), m_NormalizedScoreKnotPoints.end(), lowerPercentile, maths::COrderings::SFirstLess()) - + m_NormalizedScoreKnotPoints.begin(), + ptrdiff_t(1)); + std::size_t upperKnotPoint = std::max( + std::lower_bound( + m_NormalizedScoreKnotPoints.begin(), m_NormalizedScoreKnotPoints.end(), upperPercentile, maths::COrderings::SFirstLess()) - + m_NormalizedScoreKnotPoints.begin(), + ptrdiff_t(1)); + if (lowerKnotPoint < m_NormalizedScoreKnotPoints.size()) { + const TDoubleDoublePr& left = m_NormalizedScoreKnotPoints[lowerKnotPoint - 1]; + const TDoubleDoublePr& right = m_NormalizedScoreKnotPoints[lowerKnotPoint]; // Linearly interpolate between the two knot points. - normalizedScores[1] = left.second - + (right.second - left.second) - * (lowerPercentile - left.first) - / (right.first - left.first); - } - else - { + normalizedScores[1] = left.second + (right.second - left.second) * (lowerPercentile - left.first) / (right.first - left.first); + } else { normalizedScores[1] = m_MaximumNormalizedScore; } - if (upperKnotPoint < m_NormalizedScoreKnotPoints.size()) - { - const TDoubleDoublePr &left = m_NormalizedScoreKnotPoints[upperKnotPoint - 1]; - const TDoubleDoublePr &right = m_NormalizedScoreKnotPoints[upperKnotPoint]; + if (upperKnotPoint < m_NormalizedScoreKnotPoints.size()) { + const TDoubleDoublePr& left = m_NormalizedScoreKnotPoints[upperKnotPoint - 1]; + const TDoubleDoublePr& right = m_NormalizedScoreKnotPoints[upperKnotPoint]; // Linearly interpolate between the two knot points. - normalizedScores[1] = ( normalizedScores[1] - + left.second - + (right.second - left.second) - * (upperPercentile - left.first) - / (right.first - left.first)) / 2.0; + normalizedScores[1] = (normalizedScores[1] + left.second + + (right.second - left.second) * (upperPercentile - left.first) / (right.first - left.first)) / + 2.0; + } else { + normalizedScores[1] = (normalizedScores[1] + m_MaximumNormalizedScore) / 2.0; } - else - { - normalizedScores[1] = (normalizedScores[1] + m_MaximumNormalizedScore ) / 2.0; - } - LOG_TRACE("normalizedScores[1] = " << normalizedScores[1] - << ", lowerBound = " << lowerBound - << ", upperBound = " << upperBound - << ", lowerPercentile = " << lowerPercentile - << ", upperPercentile = " << upperPercentile); + LOG_TRACE("normalizedScores[1] = " << normalizedScores[1] << ", lowerBound = " << lowerBound << ", upperBound = " << upperBound + << ", lowerPercentile = " << lowerPercentile << ", upperPercentile = " << upperPercentile); // Compute the maximum score ceiling. double ratio = score / m_MaxScore[0]; - double curves[] = - { - 0.0 + 1.5 * ratio, - 0.5 + 0.5 * ratio - }; + double curves[] = {0.0 + 1.5 * ratio, 0.5 + 0.5 * ratio}; normalizedScores[2] = m_MaximumNormalizedScore * (*std::min_element(curves, curves + 2)); - LOG_TRACE("normalizedScores[2] = " << normalizedScores[2] - << ", score = " << score - << ", maxScore = " << m_MaxScore[0]); + LOG_TRACE("normalizedScores[2] = " << normalizedScores[2] << ", score = " << score << ", maxScore = " << m_MaxScore[0]); // Logarithmically interpolate the maximum score between the // largest significant and small probability. - static const double M = ( probabilityToScore(maths::SMALL_PROBABILITY) - - probabilityToScore(maths::LARGEST_SIGNIFICANT_PROBABILITY)) - / ( std::log(maths::SMALL_PROBABILITY) - - std::log(maths::LARGEST_SIGNIFICANT_PROBABILITY)); + static const double M = (probabilityToScore(maths::SMALL_PROBABILITY) - probabilityToScore(maths::LARGEST_SIGNIFICANT_PROBABILITY)) / + (std::log(maths::SMALL_PROBABILITY) - std::log(maths::LARGEST_SIGNIFICANT_PROBABILITY)); static const double C = std::log(maths::LARGEST_SIGNIFICANT_PROBABILITY); - normalizedScores[3] = m_MaximumNormalizedScore - * (0.95 * M * (std::log(scoreToProbability(score)) - C) + 0.05); - LOG_TRACE("normalizedScores[3] = " << normalizedScores[3] - << ", score = " << score - << ", probability = " << scoreToProbability(score)); - - score = std::min(*std::min_element(boost::begin(normalizedScores), - boost::end(normalizedScores)), - m_MaximumNormalizedScore); + normalizedScores[3] = m_MaximumNormalizedScore * (0.95 * M * (std::log(scoreToProbability(score)) - C) + 0.05); + LOG_TRACE("normalizedScores[3] = " << normalizedScores[3] << ", score = " << score << ", probability = " << scoreToProbability(score)); + + score = std::min(*std::min_element(boost::begin(normalizedScores), boost::end(normalizedScores)), m_MaximumNormalizedScore); LOG_TRACE("normalizedScore = " << score); return true; } -void CAnomalyScore::CNormalizer::quantile(double score, - double confidence, - double &lowerBound, - double &upperBound) const -{ +void CAnomalyScore::CNormalizer::quantile(double score, double confidence, double& lowerBound, double& upperBound) const { uint32_t discreteScore = this->discreteScore(score); double n = static_cast(m_RawScoreQuantileSummary.n()); double lowerQuantile = (100.0 - confidence) / 200.0; @@ -520,15 +410,13 @@ void CAnomalyScore::CNormalizer::quantile(double score, double h = static_cast(m_HighPercentileCount); double f = h / n; - if (!(f >= 0.0 && f <= 1.0)) - { + if (!(f >= 0.0 && f <= 1.0)) { LOG_ERROR("h = " << h << ", n = " << n); } double fl = maths::CQDigest::cdfQuantile(n, f, lowerQuantile); double fu = maths::CQDigest::cdfQuantile(n, f, upperQuantile); - if (discreteScore <= m_HighPercentileScore || m_RawScoreHighQuantileSummary.n() == 0) - { + if (discreteScore <= m_HighPercentileScore || m_RawScoreHighQuantileSummary.n() == 0) { m_RawScoreQuantileSummary.cdf(discreteScore, 0.0, lowerBound, upperBound); double pdfLowerBound; @@ -536,19 +424,15 @@ void CAnomalyScore::CNormalizer::quantile(double score, m_RawScoreQuantileSummary.pdf(discreteScore, 0.0, pdfLowerBound, pdfUpperBound); lowerBound = maths::CTools::truncate(lowerBound - pdfUpperBound, 0.0, fl); upperBound = maths::CTools::truncate(upperBound - pdfLowerBound, 0.0, fu); - if ( !(lowerBound >= 0.0 && lowerBound <= 1.0) - || !(upperBound >= 0.0 && upperBound <= 1.0)) - { - LOG_ERROR("score = " << score - << ", cdf = [" << lowerBound << "," << upperBound << "]" - << ", pdf = [" << pdfLowerBound << "," << pdfUpperBound << "]"); + if (!(lowerBound >= 0.0 && lowerBound <= 1.0) || !(upperBound >= 0.0 && upperBound <= 1.0)) { + LOG_ERROR("score = " << score << ", cdf = [" << lowerBound << "," << upperBound << "]" + << ", pdf = [" << pdfLowerBound << "," << pdfUpperBound << "]"); } lowerBound = maths::CQDigest::cdfQuantile(n, lowerBound, lowerQuantile); upperBound = maths::CQDigest::cdfQuantile(n, upperBound, upperQuantile); - LOG_TRACE("score = " << score - << ", cdf = [" << lowerBound << "," << upperBound << "]" - << ", pdf = [" << pdfLowerBound << "," << pdfUpperBound << "]"); + LOG_TRACE("score = " << score << ", cdf = [" << lowerBound << "," << upperBound << "]" + << ", pdf = [" << pdfLowerBound << "," << pdfUpperBound << "]"); return; } @@ -563,66 +447,47 @@ void CAnomalyScore::CNormalizer::quantile(double score, double cutoffCdfLowerBound; double cutoffCdfUpperBound; - m_RawScoreHighQuantileSummary.cdf(m_HighPercentileScore, - 0.0, - cutoffCdfLowerBound, - cutoffCdfUpperBound); + m_RawScoreHighQuantileSummary.cdf(m_HighPercentileScore, 0.0, cutoffCdfLowerBound, cutoffCdfUpperBound); double pdfLowerBound; double pdfUpperBound; m_RawScoreHighQuantileSummary.pdf(discreteScore, 0.0, pdfLowerBound, pdfUpperBound); - lowerBound = fl + (1.0 - fl) * std::max(lowerBound - - cutoffCdfUpperBound - - pdfUpperBound, 0.0) - / std::max(1.0 - cutoffCdfUpperBound, - std::numeric_limits::epsilon()); - upperBound = fu + (1.0 - fu) * std::max(upperBound - - cutoffCdfLowerBound - - pdfLowerBound, 0.0) - / std::max(1.0 - cutoffCdfLowerBound, - std::numeric_limits::epsilon()); - if ( !(lowerBound >= 0.0 && lowerBound <= 1.0) - || !(upperBound >= 0.0 && upperBound <= 1.0)) - { - LOG_ERROR("score = " << score - << ", cdf = [" << lowerBound << "," << upperBound << "]" - << ", cutoff = [" << cutoffCdfLowerBound << "," << cutoffCdfUpperBound << "]" - << ", pdf = [" << pdfLowerBound << "," << pdfUpperBound << "]" - << ", f = " << f); + lowerBound = fl + (1.0 - fl) * std::max(lowerBound - cutoffCdfUpperBound - pdfUpperBound, 0.0) / + std::max(1.0 - cutoffCdfUpperBound, std::numeric_limits::epsilon()); + upperBound = fu + (1.0 - fu) * std::max(upperBound - cutoffCdfLowerBound - pdfLowerBound, 0.0) / + std::max(1.0 - cutoffCdfLowerBound, std::numeric_limits::epsilon()); + if (!(lowerBound >= 0.0 && lowerBound <= 1.0) || !(upperBound >= 0.0 && upperBound <= 1.0)) { + LOG_ERROR("score = " << score << ", cdf = [" << lowerBound << "," << upperBound << "]" + << ", cutoff = [" << cutoffCdfLowerBound << "," << cutoffCdfUpperBound << "]" + << ", pdf = [" << pdfLowerBound << "," << pdfUpperBound << "]" + << ", f = " << f); } lowerBound = maths::CQDigest::cdfQuantile(n, lowerBound, lowerQuantile); upperBound = maths::CQDigest::cdfQuantile(n, upperBound, upperQuantile); - LOG_TRACE("score = " << score - << ", cdf = [" << lowerBound << "," << upperBound << "]" - << ", cutoff = [" << cutoffCdfLowerBound << "," << cutoffCdfUpperBound << "]" - << ", pdf = [" << pdfLowerBound << "," << pdfUpperBound << "]" - << ", f = " << f); + LOG_TRACE("score = " << score << ", cdf = [" << lowerBound << "," << upperBound << "]" + << ", cutoff = [" << cutoffCdfLowerBound << "," << cutoffCdfUpperBound << "]" + << ", pdf = [" << pdfLowerBound << "," << pdfUpperBound << "]" + << ", f = " << f); } -bool CAnomalyScore::CNormalizer::updateQuantiles(const TDoubleVec &scores) -{ +bool CAnomalyScore::CNormalizer::updateQuantiles(const TDoubleVec& scores) { return this->updateQuantiles(std::accumulate(scores.begin(), scores.end(), 0.0)); } -bool CAnomalyScore::CNormalizer::updateQuantiles(double score) -{ +bool CAnomalyScore::CNormalizer::updateQuantiles(double score) { using TUInt32UInt64Pr = std::pair; using TUInt32UInt64PrVec = std::vector; bool bigChange(false); double oldMaxScore(m_MaxScore.count() == 0 ? 0.0 : m_MaxScore[0]); m_MaxScore.add(score); - if (m_MaxScore[0] > BIG_CHANGE_FACTOR * oldMaxScore) - { + if (m_MaxScore[0] > BIG_CHANGE_FACTOR * oldMaxScore) { bigChange = true; - LOG_DEBUG("Big change in normalizer - max score updated from " - << oldMaxScore << " to " << m_MaxScore[0]); + LOG_DEBUG("Big change in normalizer - max score updated from " << oldMaxScore << " to " << m_MaxScore[0]); } uint32_t discreteScore = this->discreteScore(score); - LOG_TRACE("score = " << score - << ", discreteScore = " << discreteScore - << ", maxScore = " << m_MaxScore[0]); + LOG_TRACE("score = " << score << ", discreteScore = " << discreteScore << ", maxScore = " << m_MaxScore[0]); uint64_t n = m_RawScoreQuantileSummary.n(); uint64_t k = m_RawScoreQuantileSummary.k(); @@ -632,52 +497,39 @@ bool CAnomalyScore::CNormalizer::updateQuantiles(double score) // the unique values we have seen so far. So we extract the values // greater than the HIGH_PERCENTILE percentile at this point to // initialize the fine grain high quantile summary. - if ((n + 1) == k) - { + if ((n + 1) == k) { LOG_TRACE("Initializing H"); TUInt32UInt64PrVec L; m_RawScoreQuantileSummary.summary(L); - if (L.empty()) - { - LOG_ERROR("High quantile summary is empty: " - << m_RawScoreQuantileSummary.print()); - } - else - { - uint64_t highPercentileCount = - static_cast((HIGH_PERCENTILE / 100.0) - * static_cast(n) + 0.5); + if (L.empty()) { + LOG_ERROR("High quantile summary is empty: " << m_RawScoreQuantileSummary.print()); + } else { + uint64_t highPercentileCount = static_cast((HIGH_PERCENTILE / 100.0) * static_cast(n) + 0.5); // Estimate the high percentile score and update the count. std::size_t i = 1u; m_HighPercentileScore = L[0].first; m_HighPercentileCount = L[0].second; - for (/**/; i < L.size(); ++i) - { - if (L[i].second > highPercentileCount) - { + for (/**/; i < L.size(); ++i) { + if (L[i].second > highPercentileCount) { m_HighPercentileScore = L[i - 1].first; m_HighPercentileCount = L[i - 1].second; break; } } - if (m_HighPercentileCount > n) - { + if (m_HighPercentileCount > n) { LOG_ERROR("Invalid c(H) " << m_HighPercentileCount); LOG_ERROR("target " << highPercentileCount); LOG_ERROR("L " << core::CContainerPrinter::print(L)); m_HighPercentileCount = n; } - LOG_TRACE("s(H) = " << m_HighPercentileScore - << ", c(H) = " << m_HighPercentileCount - << ", percentile = " << 100.0 * static_cast(m_HighPercentileCount) - / static_cast(n) << "%" - << ", desired c(H) = " << highPercentileCount); + LOG_TRACE("s(H) = " << m_HighPercentileScore << ", c(H) = " << m_HighPercentileCount + << ", percentile = " << 100.0 * static_cast(m_HighPercentileCount) / static_cast(n) << "%" + << ", desired c(H) = " << highPercentileCount); // Populate the high quantile summary. - for (/**/; i < L.size(); ++i) - { + for (/**/; i < L.size(); ++i) { uint32_t x = L[i].first; uint64_t m = L[i].second - L[i - 1].second; @@ -689,65 +541,45 @@ bool CAnomalyScore::CNormalizer::updateQuantiles(double score) m_RawScoreQuantileSummary.add(discreteScore); - if (discreteScore <= m_HighPercentileScore) - { + if (discreteScore <= m_HighPercentileScore) { ++m_HighPercentileCount; - } - else - { + } else { m_RawScoreHighQuantileSummary.add(discreteScore); } - LOG_TRACE("percentile = " << static_cast(m_HighPercentileCount) - / static_cast(n + 1)); + LOG_TRACE("percentile = " << static_cast(m_HighPercentileCount) / static_cast(n + 1)); // Periodically refresh the high percentile score. - if ((n + 1) > k && (n + 1) % k == 0) - { + if ((n + 1) > k && (n + 1) % k == 0) { LOG_TRACE("Refreshing high quantile summary"); - uint64_t highPercentileCount = - static_cast((HIGH_PERCENTILE / 100.0) - * static_cast(n + 1) + 0.5); + uint64_t highPercentileCount = static_cast((HIGH_PERCENTILE / 100.0) * static_cast(n + 1) + 0.5); - LOG_TRACE("s(H) = " << m_HighPercentileScore - << ", c(H) = " << m_HighPercentileCount - << ", desired c(H) = " << highPercentileCount); + LOG_TRACE("s(H) = " << m_HighPercentileScore << ", c(H) = " << m_HighPercentileCount << ", desired c(H) = " << highPercentileCount); - if (m_HighPercentileCount > highPercentileCount) - { + if (m_HighPercentileCount > highPercentileCount) { TUInt32UInt64PrVec L; m_RawScoreQuantileSummary.summary(L); TUInt32UInt64PrVec H; m_RawScoreHighQuantileSummary.summary(H); - std::size_t i0 = std::min(static_cast( - std::lower_bound(L.begin(), L.end(), - highPercentileCount, - maths::COrderings::SSecondLess()) - - L.begin()), L.size() - 1); - std::size_t j = std::min(static_cast( - std::upper_bound(H.begin(), H.end(), - L[i0], - maths::COrderings::SFirstLess()) - - H.begin()), H.size() - 1); + std::size_t i0 = + std::min(static_cast( + std::lower_bound(L.begin(), L.end(), highPercentileCount, maths::COrderings::SSecondLess()) - L.begin()), + L.size() - 1); + std::size_t j = + std::min(static_cast(std::upper_bound(H.begin(), H.end(), L[i0], maths::COrderings::SFirstLess()) - H.begin()), + H.size() - 1); uint64_t r = L[i0].second; - for (std::size_t i = i0 + 1; - i < L.size() && L[i0].second + m_RawScoreHighQuantileSummary.n() < n+1; - ++i) - { - for (/**/; j < H.size() && H[j].first <= L[i].first; ++j) - { - r += (H[j].second - (j == 0 ? - static_cast(0) : - H[j - 1].second)); + for (std::size_t i = i0 + 1; i < L.size() && L[i0].second + m_RawScoreHighQuantileSummary.n() < n + 1; ++i) { + for (/**/; j < H.size() && H[j].first <= L[i].first; ++j) { + r += (H[j].second - (j == 0 ? static_cast(0) : H[j - 1].second)); } uint32_t x = L[i].first; uint64_t m = r < L[i].second ? L[i].second - r : static_cast(0); r += m; - if (m > 0) - { + if (m > 0) { LOG_TRACE("Adding (" << x << ',' << m << ") to H"); m_RawScoreHighQuantileSummary.add(x, m); } @@ -755,44 +587,31 @@ bool CAnomalyScore::CNormalizer::updateQuantiles(double score) m_HighPercentileScore = L[i0].first; m_HighPercentileCount = L[i0].second; - if (m_HighPercentileCount > n+1) - { + if (m_HighPercentileCount > n + 1) { LOG_ERROR("Invalid c(H) " << m_HighPercentileCount); LOG_ERROR("target " << highPercentileCount); LOG_ERROR("L " << core::CContainerPrinter::print(L)); m_HighPercentileCount = n; } - LOG_TRACE("s(H) = " << m_HighPercentileScore - << ", c(H) = " << m_HighPercentileCount - << ", percentile = " << 100.0 * static_cast(m_HighPercentileCount) - / static_cast(n + 1) << "%"); - } - else - { - m_RawScoreQuantileSummary.quantile(HIGH_PERCENTILE / 100.0, - m_HighPercentileScore); + LOG_TRACE("s(H) = " << m_HighPercentileScore << ", c(H) = " << m_HighPercentileCount << ", percentile = " + << 100.0 * static_cast(m_HighPercentileCount) / static_cast(n + 1) << "%"); + } else { + m_RawScoreQuantileSummary.quantile(HIGH_PERCENTILE / 100.0, m_HighPercentileScore); double lowerBound, upperBound; - m_RawScoreQuantileSummary.cdf(m_HighPercentileScore, - 0.0, - lowerBound, - upperBound); - m_HighPercentileCount = static_cast(static_cast(n + 1) - * lowerBound + 0.5); - - LOG_TRACE("s(H) = " << m_HighPercentileScore - << ", c(H) = " << m_HighPercentileCount - << ", percentile = " << 100.0 * lowerBound << "%"); + m_RawScoreQuantileSummary.cdf(m_HighPercentileScore, 0.0, lowerBound, upperBound); + m_HighPercentileCount = static_cast(static_cast(n + 1) * lowerBound + 0.5); + + LOG_TRACE("s(H) = " << m_HighPercentileScore << ", c(H) = " << m_HighPercentileCount << ", percentile = " << 100.0 * lowerBound + << "%"); } } return bigChange; } -void CAnomalyScore::CNormalizer::propagateForwardByTime(double time) -{ - if (time < 0.0) - { +void CAnomalyScore::CNormalizer::propagateForwardByTime(double time) { + if (time < 0.0) { LOG_ERROR("Can't propagate normalizer backwards in time"); return; } @@ -804,91 +623,64 @@ void CAnomalyScore::CNormalizer::propagateForwardByTime(double time) // we can accurately estimate high quantiles. We achieve this by only // aging them a certain fraction of the time. m_TimeToQuantileDecay -= time; - if (m_TimeToQuantileDecay <= 0.0) - { - time = std::floor( (QUANTILE_DECAY_TIME - m_TimeToQuantileDecay) - / QUANTILE_DECAY_TIME); + if (m_TimeToQuantileDecay <= 0.0) { + time = std::floor((QUANTILE_DECAY_TIME - m_TimeToQuantileDecay) / QUANTILE_DECAY_TIME); uint64_t n = m_RawScoreQuantileSummary.n(); m_RawScoreQuantileSummary.propagateForwardsByTime(time); m_RawScoreHighQuantileSummary.propagateForwardsByTime(time); - if (n > 0) - { - m_HighPercentileCount = static_cast( - static_cast(m_RawScoreQuantileSummary.n()) - / static_cast(n) - * static_cast(m_HighPercentileCount) + 0.5); + if (n > 0) { + m_HighPercentileCount = static_cast(static_cast(m_RawScoreQuantileSummary.n()) / static_cast(n) * + static_cast(m_HighPercentileCount) + + 0.5); } - m_TimeToQuantileDecay += QUANTILE_DECAY_TIME - + std::floor(-m_TimeToQuantileDecay / QUANTILE_DECAY_TIME); + m_TimeToQuantileDecay += QUANTILE_DECAY_TIME + std::floor(-m_TimeToQuantileDecay / QUANTILE_DECAY_TIME); } } -bool CAnomalyScore::CNormalizer::isUpgradable(const std::string &fromVersion, - const std::string &toVersion) -{ +bool CAnomalyScore::CNormalizer::isUpgradable(const std::string& fromVersion, const std::string& toVersion) { // Any changes to this method need to be reflected in the upgrade() method // below to prevent an inconsistency where this method says an upgrade is // possible but the upgrade() method can't do it. - return (fromVersion == "1" && toVersion == "2") - || (fromVersion == "1" && toVersion == "3") - || (fromVersion == "2" && toVersion == "3"); + return (fromVersion == "1" && toVersion == "2") || (fromVersion == "1" && toVersion == "3") || (fromVersion == "2" && toVersion == "3"); } -bool CAnomalyScore::CNormalizer::upgrade(const std::string &loadedVersion, - const std::string ¤tVersion) -{ - if (loadedVersion == currentVersion) - { +bool CAnomalyScore::CNormalizer::upgrade(const std::string& loadedVersion, const std::string& currentVersion) { + if (loadedVersion == currentVersion) { // No upgrade required. return true; } // We know how to upgrade between versions 1, 2 and 3. - static const double HIGH_SCORE_UPGRADE_FACTOR[][3] = - { - { 1.0, 0.3, 0.3 }, - { 1.0 / 0.3, 1.0, 1.0 }, - { 1.0 / 0.3, 1.0, 1.0 }, - }; - static const double Q_DIGEST_UPGRADE_FACTOR[][3] = - { - { 1.0, 3.0, 30.0 }, - { 1.0 / 3.0, 1.0, 10.0 }, - { 1.0 / 30.0, 1.0 / 10.0, 1.0 } - }; + static const double HIGH_SCORE_UPGRADE_FACTOR[][3] = { + {1.0, 0.3, 0.3}, + {1.0 / 0.3, 1.0, 1.0}, + {1.0 / 0.3, 1.0, 1.0}, + }; + static const double Q_DIGEST_UPGRADE_FACTOR[][3] = {{1.0, 3.0, 30.0}, {1.0 / 3.0, 1.0, 10.0}, {1.0 / 30.0, 1.0 / 10.0, 1.0}}; std::size_t i, j; - if ( !core::CStringUtils::stringToType(loadedVersion, i) - || !core::CStringUtils::stringToType(currentVersion, j) - || i-1 >= boost::size(HIGH_SCORE_UPGRADE_FACTOR) - || j-1 >= boost::size(HIGH_SCORE_UPGRADE_FACTOR[0])) - { - LOG_ERROR("Don't know how to upgrade quantiles from version " << - loadedVersion << " to version " << currentVersion); + if (!core::CStringUtils::stringToType(loadedVersion, i) || !core::CStringUtils::stringToType(currentVersion, j) || + i - 1 >= boost::size(HIGH_SCORE_UPGRADE_FACTOR) || j - 1 >= boost::size(HIGH_SCORE_UPGRADE_FACTOR[0])) { + LOG_ERROR("Don't know how to upgrade quantiles from version " << loadedVersion << " to version " << currentVersion); return false; } + double highScoreUpgradeFactor = HIGH_SCORE_UPGRADE_FACTOR[i - 1][j - 1]; + double qDigestUpgradeFactor = Q_DIGEST_UPGRADE_FACTOR[i - 1][j - 1]; - double highScoreUpgradeFactor = HIGH_SCORE_UPGRADE_FACTOR[i-1][j-1]; - double qDigestUpgradeFactor = Q_DIGEST_UPGRADE_FACTOR[i-1][j-1]; - - LOG_INFO("Upgrading quantiles from version " << loadedVersion << - " to version " << currentVersion << - " - will scale highest score by " << highScoreUpgradeFactor << - " and Q digest min/max values by " << qDigestUpgradeFactor); + LOG_INFO("Upgrading quantiles from version " << loadedVersion << " to version " << currentVersion << " - will scale highest score by " + << highScoreUpgradeFactor << " and Q digest min/max values by " << qDigestUpgradeFactor); // For the maximum score aging is equivalent to scaling. m_MaxScore.age(highScoreUpgradeFactor); - if (m_RawScoreQuantileSummary.scale(qDigestUpgradeFactor) == false) - { + if (m_RawScoreQuantileSummary.scale(qDigestUpgradeFactor) == false) { LOG_ERROR("Failed to scale raw score quantiles"); return false; } - if (m_RawScoreHighQuantileSummary.scale(qDigestUpgradeFactor) == false) - { + if (m_RawScoreHighQuantileSummary.scale(qDigestUpgradeFactor) == false) { LOG_ERROR("Failed to scale raw score high quantiles"); return false; } @@ -896,8 +688,7 @@ bool CAnomalyScore::CNormalizer::upgrade(const std::string &loadedVersion, return true; } -void CAnomalyScore::CNormalizer::clear() -{ +void CAnomalyScore::CNormalizer::clear() { m_HighPercentileScore = std::numeric_limits::max(); m_HighPercentileCount = 0ull; m_MaxScore.clear(); @@ -906,85 +697,59 @@ void CAnomalyScore::CNormalizer::clear() m_TimeToQuantileDecay = QUANTILE_DECAY_TIME; } -void CAnomalyScore::CNormalizer::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CAnomalyScore::CNormalizer::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(HIGH_PERCENTILE_SCORE_TAG, m_HighPercentileScore); inserter.insertValue(HIGH_PERCENTILE_COUNT_TAG, m_HighPercentileCount); inserter.insertValue(MAX_SCORE_TAG, m_MaxScore.toDelimited()); - inserter.insertLevel(RAW_SCORE_QUANTILE_SUMMARY, - boost::bind(&maths::CQDigest::acceptPersistInserter, - &m_RawScoreQuantileSummary, _1)); + inserter.insertLevel(RAW_SCORE_QUANTILE_SUMMARY, boost::bind(&maths::CQDigest::acceptPersistInserter, &m_RawScoreQuantileSummary, _1)); inserter.insertLevel(RAW_SCORE_HIGH_QUANTILE_SUMMARY, - boost::bind(&maths::CQDigest::acceptPersistInserter, - &m_RawScoreHighQuantileSummary, _1)); + boost::bind(&maths::CQDigest::acceptPersistInserter, &m_RawScoreHighQuantileSummary, _1)); inserter.insertValue(TIME_TO_QUANTILE_DECAY_TAG, m_TimeToQuantileDecay); } -bool CAnomalyScore::CNormalizer::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name = traverser.name(); +bool CAnomalyScore::CNormalizer::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); - if (name == HIGH_PERCENTILE_SCORE_TAG) - { + if (name == HIGH_PERCENTILE_SCORE_TAG) { // This used to be 64 bit but is now 32 bit, so may need adjusting // on restoration uint64_t highPercentileScore64(0); - if (core::CStringUtils::stringToType(traverser.value(), - highPercentileScore64) == false) - { + if (core::CStringUtils::stringToType(traverser.value(), highPercentileScore64) == false) { LOG_ERROR("Invalid high percentile score in " << traverser.value()); return false; } - m_HighPercentileScore = static_cast(std::min(highPercentileScore64, - static_cast(std::numeric_limits::max()))); - } - else if (name == HIGH_PERCENTILE_COUNT_TAG) - { - if (core::CStringUtils::stringToType(traverser.value(), - m_HighPercentileCount) == false) - { + m_HighPercentileScore = + static_cast(std::min(highPercentileScore64, static_cast(std::numeric_limits::max()))); + } else if (name == HIGH_PERCENTILE_COUNT_TAG) { + if (core::CStringUtils::stringToType(traverser.value(), m_HighPercentileCount) == false) { LOG_ERROR("Invalid high percentile count in " << traverser.value()); return false; } - } - else if (name == MAX_SCORE_TAG) - { - if (m_MaxScore.fromDelimited(traverser.value()) == false) - { + } else if (name == MAX_SCORE_TAG) { + if (m_MaxScore.fromDelimited(traverser.value()) == false) { LOG_ERROR("Invalid max score in " << traverser.value()); return false; } - } - else if (name == RAW_SCORE_QUANTILE_SUMMARY) - { - if (traverser.traverseSubLevel(boost::bind(&maths::CQDigest::acceptRestoreTraverser, - &m_RawScoreQuantileSummary, _1)) == false) - { - LOG_ERROR("Invalid raw score quantile summary in " << - traverser.value()); + } else if (name == RAW_SCORE_QUANTILE_SUMMARY) { + if (traverser.traverseSubLevel(boost::bind(&maths::CQDigest::acceptRestoreTraverser, &m_RawScoreQuantileSummary, _1)) == + false) { + LOG_ERROR("Invalid raw score quantile summary in " << traverser.value()); return false; } - } - else if (name == RAW_SCORE_HIGH_QUANTILE_SUMMARY) - { - if (traverser.traverseSubLevel(boost::bind(&maths::CQDigest::acceptRestoreTraverser, - &m_RawScoreHighQuantileSummary, _1)) == false) - { - LOG_ERROR("Invalid raw score high quantile summary in " << - traverser.value()); + } else if (name == RAW_SCORE_HIGH_QUANTILE_SUMMARY) { + if (traverser.traverseSubLevel(boost::bind(&maths::CQDigest::acceptRestoreTraverser, &m_RawScoreHighQuantileSummary, _1)) == + false) { + LOG_ERROR("Invalid raw score high quantile summary in " << traverser.value()); return false; } } - } - while (traverser.next()); + } while (traverser.next()); return true; } -uint64_t CAnomalyScore::CNormalizer::checksum() const -{ +uint64_t CAnomalyScore::CNormalizer::checksum() const { uint64_t seed = static_cast(m_NoisePercentile); seed = maths::CChecksum::calculate(seed, m_NoiseMultiplier); seed = maths::CChecksum::calculate(seed, m_NormalizedScoreKnotPoints); @@ -999,13 +764,11 @@ uint64_t CAnomalyScore::CNormalizer::checksum() const return maths::CChecksum::calculate(seed, m_TimeToQuantileDecay); } -uint32_t CAnomalyScore::CNormalizer::discreteScore(double rawScore) const -{ +uint32_t CAnomalyScore::CNormalizer::discreteScore(double rawScore) const { return static_cast(DISCRETIZATION_FACTOR * rawScore + 0.5); } -double CAnomalyScore::CNormalizer::rawScore(uint32_t discreteScore) const -{ +double CAnomalyScore::CNormalizer::rawScore(uint32_t discreteScore) const { return static_cast(discreteScore) / DISCRETIZATION_FACTOR; } @@ -1015,97 +778,70 @@ const double CAnomalyScore::CNormalizer::QUANTILE_DECAY_TIME = 20.0; const double CAnomalyScore::CNormalizer::BIG_CHANGE_FACTOR = 1.1; // We use short field names to reduce the state size -namespace -{ +namespace { const std::string NORMALIZER_TAG("a"); } -const std::string &CAnomalyScore::normalizedScoreToSeverity(double normalizedScore) -{ - if (normalizedScore < 25.0) - { +const std::string& CAnomalyScore::normalizedScoreToSeverity(double normalizedScore) { + if (normalizedScore < 25.0) { return WARNING_SEVERITY; } - if (normalizedScore < 50.0) - { + if (normalizedScore < 50.0) { return MINOR_SEVERITY; } - if (normalizedScore < 75.0) - { + if (normalizedScore < 75.0) { return MAJOR_SEVERITY; } return CRITICAL_SEVERITY; } -bool CAnomalyScore::normalizerFromJson(const std::string &json, - CNormalizer &normalizer) -{ +bool CAnomalyScore::normalizerFromJson(const std::string& json, CNormalizer& normalizer) { std::istringstream iss(json); core::CJsonStateRestoreTraverser traverser(iss); return normalizerFromJson(traverser, normalizer); } -bool CAnomalyScore::normalizerFromJson(core::CStateRestoreTraverser &traverser, - CNormalizer &normalizer) -{ +bool CAnomalyScore::normalizerFromJson(core::CStateRestoreTraverser& traverser, CNormalizer& normalizer) { bool restoredNormalizer(false); std::string restoredVersion(MISSING_VERSION_FORMAT_VERSION); - while (traverser.next()) - { - const std::string &name = traverser.name(); + while (traverser.next()) { + const std::string& name = traverser.name(); - if (name == MLVERSION_ATTRIBUTE) - { + if (name == MLVERSION_ATTRIBUTE) { restoredVersion = traverser.value(); - if (restoredVersion != CURRENT_FORMAT_VERSION) - { - if (normalizer.isUpgradable(restoredVersion, CURRENT_FORMAT_VERSION)) - { - LOG_DEBUG("Restored quantiles JSON version is " << restoredVersion << - "; current JSON version is " << CURRENT_FORMAT_VERSION << - " - will upgrade quantiles"); - } - else - { + if (restoredVersion != CURRENT_FORMAT_VERSION) { + if (normalizer.isUpgradable(restoredVersion, CURRENT_FORMAT_VERSION)) { + LOG_DEBUG("Restored quantiles JSON version is " << restoredVersion << "; current JSON version is " + << CURRENT_FORMAT_VERSION << " - will upgrade quantiles"); + } else { // If the version has changed and the format is too different to // even upgrade then start again from scratch - this counts as a // successful load - LOG_INFO("Restored quantiles JSON version is " << restoredVersion << - "; current JSON version is " << CURRENT_FORMAT_VERSION << - " - will restart quantiles from scratch"); + LOG_INFO("Restored quantiles JSON version is " << restoredVersion << "; current JSON version is " + << CURRENT_FORMAT_VERSION << " - will restart quantiles from scratch"); return true; } } - } - else if (name == NORMALIZER_TAG) - { - restoredNormalizer = traverser.traverseSubLevel(boost::bind( - &CAnomalyScore::CNormalizer::acceptRestoreTraverser, - &normalizer, - _1)); - if (!restoredNormalizer) - { + } else if (name == NORMALIZER_TAG) { + restoredNormalizer = + traverser.traverseSubLevel(boost::bind(&CAnomalyScore::CNormalizer::acceptRestoreTraverser, &normalizer, _1)); + if (!restoredNormalizer) { LOG_ERROR("Unable to restore quantiles to the normaliser"); } } } - if (restoredNormalizer && - restoredVersion != CURRENT_FORMAT_VERSION) - { - LOG_INFO("Restored quantiles JSON version is " << restoredVersion << - "; current JSON version is " << CURRENT_FORMAT_VERSION << - " - will attempt upgrade"); - - if (normalizer.upgrade(restoredVersion, CURRENT_FORMAT_VERSION) == false) - { - LOG_ERROR("Failed to upgrade quantiles from version " << restoredVersion << - " to version " << CURRENT_FORMAT_VERSION); + if (restoredNormalizer && restoredVersion != CURRENT_FORMAT_VERSION) { + LOG_INFO("Restored quantiles JSON version is " << restoredVersion << "; current JSON version is " << CURRENT_FORMAT_VERSION + << " - will attempt upgrade"); + + if (normalizer.upgrade(restoredVersion, CURRENT_FORMAT_VERSION) == false) { + LOG_ERROR("Failed to upgrade quantiles from version " << restoredVersion << " to version " << CURRENT_FORMAT_VERSION); return false; } } @@ -1113,13 +849,12 @@ bool CAnomalyScore::normalizerFromJson(core::CStateRestoreTraverser &traverser, return restoredNormalizer; } -void CAnomalyScore::normalizerToJson(const CNormalizer &normalizer, - const std::string &searchKey, - const std::string &cue, - const std::string &description, +void CAnomalyScore::normalizerToJson(const CNormalizer& normalizer, + const std::string& searchKey, + const std::string& cue, + const std::string& description, core_t::TTime time, - std::string &json) -{ + std::string& json) { std::ostringstream ss; // The JSON inserter will only close the object when it is destroyed @@ -1135,16 +870,10 @@ void CAnomalyScore::normalizerToJson(const CNormalizer &normalizer, inserter.insertValue(MLVERSION_ATTRIBUTE, CURRENT_FORMAT_VERSION); inserter.insertValue(TIME_ATTRIBUTE, core::CStringUtils::typeToString(time)); - inserter.insertLevel(NORMALIZER_TAG, - boost::bind(&CNormalizer::acceptPersistInserter, - &normalizer, - _1)); + inserter.insertLevel(NORMALIZER_TAG, boost::bind(&CNormalizer::acceptPersistInserter, &normalizer, _1)); } json = ss.str(); } - - } } - diff --git a/lib/model/CBucketGatherer.cc b/lib/model/CBucketGatherer.cc index b55bfeae4c..ad42376f02 100644 --- a/lib/model/CBucketGatherer.cc +++ b/lib/model/CBucketGatherer.cc @@ -25,13 +25,10 @@ #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { -namespace -{ +namespace { // We use short field names to reduce the state size const std::string BUCKET_START_TAG("b"); @@ -39,8 +36,7 @@ const std::string BUCKET_COUNT_TAG("k"); const std::string INFLUENCERS_COUNT_TAG("l"); const std::string BUCKET_EXPLICIT_NULLS_TAG("m"); -namespace detail -{ +namespace detail { using TSizeSizePr = std::pair; using TSizeSizePrUInt64Pr = std::pair; @@ -56,49 +52,37 @@ const std::string INFLUENCE_ITEM_TAG("a"); const std::string INFLUENCE_COUNT_TAG("b"); //! Persist a person, attribute and count tuple. -void insertPersonAttributeCounts(const TSizeSizePrUInt64Pr &tuple, - core::CStatePersistInserter &inserter) -{ +void insertPersonAttributeCounts(const TSizeSizePrUInt64Pr& tuple, core::CStatePersistInserter& inserter) { inserter.insertValue(PERSON_UID_TAG, CDataGatherer::extractPersonId(tuple)); inserter.insertValue(ATTRIBUTE_UID_TAG, CDataGatherer::extractAttributeId(tuple)); inserter.insertValue(COUNT_TAG, CDataGatherer::extractData(tuple)); } //! Restore a person, attribute and count. -bool restorePersonAttributeCounts(core::CStateRestoreTraverser &traverser, - TSizeSizePr &key, - uint64_t &count) -{ - do - { - const std::string &name = traverser.name(); +bool restorePersonAttributeCounts(core::CStateRestoreTraverser& traverser, TSizeSizePr& key, uint64_t& count) { + do { + const std::string& name = traverser.name(); RESTORE_BUILT_IN(PERSON_UID_TAG, key.first) RESTORE_BUILT_IN(ATTRIBUTE_UID_TAG, key.second) RESTORE_BUILT_IN(COUNT_TAG, count) - } - while (traverser.next()); + } while (traverser.next()); return true; } //! Persist a collection of influencer person and attribute counts. -void insertInfluencerPersonAttributeCounts(const TSizeSizePrStoredStringPtrPrUInt64UMap &map, - core::CStatePersistInserter &inserter) -{ +void insertInfluencerPersonAttributeCounts(const TSizeSizePrStoredStringPtrPrUInt64UMap& map, core::CStatePersistInserter& inserter) { std::vector ordered; ordered.reserve(map.size()); - for (auto i = map.begin(); i != map.end(); ++i) - { + for (auto i = map.begin(); i != map.end(); ++i) { ordered.push_back(i); } - std::sort(ordered.begin(), ordered.end(), - [](TSizeSizePrStoredStringPtrPrUInt64UMapCItr lhs, TSizeSizePrStoredStringPtrPrUInt64UMapCItr rhs) - { - return maths::COrderings::lexicographical_compare(lhs->first.first, *lhs->first.second, lhs->second, - rhs->first.first, *rhs->first.second, rhs->second); - }); - - for (std::size_t i = 0u; i < ordered.size(); ++i) - { + std::sort( + ordered.begin(), ordered.end(), [](TSizeSizePrStoredStringPtrPrUInt64UMapCItr lhs, TSizeSizePrStoredStringPtrPrUInt64UMapCItr rhs) { + return maths::COrderings::lexicographical_compare( + lhs->first.first, *lhs->first.second, lhs->second, rhs->first.first, *rhs->first.second, rhs->second); + }); + + for (std::size_t i = 0u; i < ordered.size(); ++i) { inserter.insertValue(PERSON_UID_TAG, CDataGatherer::extractPersonId(ordered[i]->first)); inserter.insertValue(ATTRIBUTE_UID_TAG, CDataGatherer::extractAttributeId(ordered[i]->first)); inserter.insertValue(INFLUENCER_TAG, *CDataGatherer::extractData(ordered[i]->first)); @@ -107,104 +91,81 @@ void insertInfluencerPersonAttributeCounts(const TSizeSizePrStoredStringPtrPrUIn } //! Restore a collection of influencer person and attribute counts. -bool restoreInfluencerPersonAttributeCounts(core::CStateRestoreTraverser &traverser, - TSizeSizePrStoredStringPtrPrUInt64UMap &map) -{ +bool restoreInfluencerPersonAttributeCounts(core::CStateRestoreTraverser& traverser, TSizeSizePrStoredStringPtrPrUInt64UMap& map) { std::size_t person = 0; std::size_t attribute = 0; std::string influence = ""; uint64_t count = 0; - do - { + do { const std::string name = traverser.name(); RESTORE_BUILT_IN(PERSON_UID_TAG, person) RESTORE_BUILT_IN(ATTRIBUTE_UID_TAG, attribute) RESTORE_NO_ERROR(INFLUENCER_TAG, influence = traverser.value()) - if (name == COUNT_TAG) - { - if (core::CStringUtils::stringToType(traverser.value(), count) == false) - { + if (name == COUNT_TAG) { + if (core::CStringUtils::stringToType(traverser.value(), count) == false) { LOG_ERROR("Failed to restore COUNT_TAG, got " << traverser.value()); return false; } map[{{person, attribute}, CStringStore::influencers().get(influence)}] = count; } - } - while (traverser.next()); + } while (traverser.next()); return true; } //! \brief Manages persistence of bucket counts. -struct SBucketCountsPersister -{ +struct SBucketCountsPersister { using TSizeSizePrUInt64UMap = CBucketGatherer::TSizeSizePrUInt64UMap; - void operator()(const TSizeSizePrUInt64UMap &bucketCounts, core::CStatePersistInserter &inserter) - { + void operator()(const TSizeSizePrUInt64UMap& bucketCounts, core::CStatePersistInserter& inserter) { CBucketGatherer::TSizeSizePrUInt64PrVec personAttributeCounts; personAttributeCounts.reserve(bucketCounts.size()); personAttributeCounts.assign(bucketCounts.begin(), bucketCounts.end()); std::sort(personAttributeCounts.begin(), personAttributeCounts.end()); - for (std::size_t i = 0; i < personAttributeCounts.size(); ++i) - { + for (std::size_t i = 0; i < personAttributeCounts.size(); ++i) { inserter.insertLevel(PERSON_ATTRIBUTE_COUNT_TAG, - boost::bind(&insertPersonAttributeCounts, - boost::cref(personAttributeCounts[i]), _1)); + boost::bind(&insertPersonAttributeCounts, boost::cref(personAttributeCounts[i]), _1)); } } - bool operator()(TSizeSizePrUInt64UMap &bucketCounts, core::CStateRestoreTraverser &traverser) - { - do - { + bool operator()(TSizeSizePrUInt64UMap& bucketCounts, core::CStateRestoreTraverser& traverser) { + do { TSizeSizePr key; uint64_t count; - if (!traverser.hasSubLevel()) - { + if (!traverser.hasSubLevel()) { continue; } - if (traverser.traverseSubLevel(boost::bind(&restorePersonAttributeCounts, - _1, boost::ref(key), boost::ref(count))) == false) - { + if (traverser.traverseSubLevel(boost::bind(&restorePersonAttributeCounts, _1, boost::ref(key), boost::ref(count))) == false) { LOG_ERROR("Invalid person attribute count"); continue; } bucketCounts[key] = count; - } - while (traverser.next()); + } while (traverser.next()); return true; } }; //! \brief Manages persistence influencer bucket counts. -struct SInfluencerCountsPersister -{ +struct SInfluencerCountsPersister { using TSizeSizePrStoredStringPtrPrUInt64UMapVec = CBucketGatherer::TSizeSizePrStoredStringPtrPrUInt64UMapVec; - void operator()(const TSizeSizePrStoredStringPtrPrUInt64UMapVec &data, core::CStatePersistInserter &inserter) - { - for (std::size_t i = 0; i < data.size(); ++i) - { + void operator()(const TSizeSizePrStoredStringPtrPrUInt64UMapVec& data, core::CStatePersistInserter& inserter) { + for (std::size_t i = 0; i < data.size(); ++i) { inserter.insertValue(INFLUENCE_COUNT_TAG, i); - inserter.insertLevel(INFLUENCE_ITEM_TAG, - boost::bind(&insertInfluencerPersonAttributeCounts, boost::cref(data[i]), _1)); + inserter.insertLevel(INFLUENCE_ITEM_TAG, boost::bind(&insertInfluencerPersonAttributeCounts, boost::cref(data[i]), _1)); } } - bool operator()(TSizeSizePrStoredStringPtrPrUInt64UMapVec &data, core::CStateRestoreTraverser &traverser) const - { + bool operator()(TSizeSizePrStoredStringPtrPrUInt64UMapVec& data, core::CStateRestoreTraverser& traverser) const { std::size_t i = 0; - do - { + do { const std::string name = traverser.name(); RESTORE_BUILT_IN(INFLUENCE_COUNT_TAG, i) - RESTORE_SETUP_TEARDOWN(INFLUENCE_ITEM_TAG, - data.resize(std::max(data.size(), i + 1)), - traverser.traverseSubLevel(boost::bind(&restoreInfluencerPersonAttributeCounts, - _1, boost::ref(data[i]))), - /**/) - } - while (traverser.next()); + RESTORE_SETUP_TEARDOWN( + INFLUENCE_ITEM_TAG, + data.resize(std::max(data.size(), i + 1)), + traverser.traverseSubLevel(boost::bind(&restoreInfluencerPersonAttributeCounts, _1, boost::ref(data[i]))), + /**/) + } while (traverser.next()); return true; } }; @@ -215,48 +176,40 @@ struct SInfluencerCountsPersister const std::string CBucketGatherer::EVENTRATE_BUCKET_GATHERER_TAG("a"); const std::string CBucketGatherer::METRIC_BUCKET_GATHERER_TAG("b"); -CBucketGatherer::CBucketGatherer(CDataGatherer &dataGatherer, - core_t::TTime startTime) : - m_DataGatherer(dataGatherer), - m_EarliestTime(startTime), - m_BucketStart(startTime), - m_PersonAttributeCounts(dataGatherer.params().s_LatencyBuckets, - dataGatherer.params().s_BucketLength, - startTime, - TSizeSizePrUInt64UMap(1)), - m_PersonAttributeExplicitNulls(dataGatherer.params().s_LatencyBuckets, - dataGatherer.params().s_BucketLength, - startTime, - TSizeSizePrUSet(1)), - m_InfluencerCounts(dataGatherer.params().s_LatencyBuckets + 3, - dataGatherer.params().s_BucketLength, - startTime) -{} - -CBucketGatherer::CBucketGatherer(bool isForPersistence, - const CBucketGatherer &other) : - m_DataGatherer(other.m_DataGatherer), - m_EarliestTime(other.m_EarliestTime), - m_BucketStart(other.m_BucketStart), - m_PersonAttributeCounts(other.m_PersonAttributeCounts), - m_MultiBucketPersonAttributeCounts(other.m_MultiBucketPersonAttributeCounts), - m_PersonAttributeExplicitNulls(other.m_PersonAttributeExplicitNulls), - m_MultiBucketPersonAttributeExplicitNulls(other.m_MultiBucketPersonAttributeExplicitNulls), - m_InfluencerCounts(other.m_InfluencerCounts), - m_MultiBucketInfluencerCounts(other.m_MultiBucketInfluencerCounts) -{ - if (!isForPersistence) - { +CBucketGatherer::CBucketGatherer(CDataGatherer& dataGatherer, core_t::TTime startTime) + : m_DataGatherer(dataGatherer), + m_EarliestTime(startTime), + m_BucketStart(startTime), + m_PersonAttributeCounts(dataGatherer.params().s_LatencyBuckets, + dataGatherer.params().s_BucketLength, + startTime, + TSizeSizePrUInt64UMap(1)), + m_PersonAttributeExplicitNulls(dataGatherer.params().s_LatencyBuckets, + dataGatherer.params().s_BucketLength, + startTime, + TSizeSizePrUSet(1)), + m_InfluencerCounts(dataGatherer.params().s_LatencyBuckets + 3, dataGatherer.params().s_BucketLength, startTime) { +} + +CBucketGatherer::CBucketGatherer(bool isForPersistence, const CBucketGatherer& other) + : m_DataGatherer(other.m_DataGatherer), + m_EarliestTime(other.m_EarliestTime), + m_BucketStart(other.m_BucketStart), + m_PersonAttributeCounts(other.m_PersonAttributeCounts), + m_MultiBucketPersonAttributeCounts(other.m_MultiBucketPersonAttributeCounts), + m_PersonAttributeExplicitNulls(other.m_PersonAttributeExplicitNulls), + m_MultiBucketPersonAttributeExplicitNulls(other.m_MultiBucketPersonAttributeExplicitNulls), + m_InfluencerCounts(other.m_InfluencerCounts), + m_MultiBucketInfluencerCounts(other.m_MultiBucketInfluencerCounts) { + if (!isForPersistence) { LOG_ABORT("This constructor only creates clones for persistence"); } } -bool CBucketGatherer::addEventData(CEventData &data) -{ +bool CBucketGatherer::addEventData(CEventData& data) { core_t::TTime time = data.time(); - if (time < this->earliestBucketStartTime()) - { + if (time < this->earliestBucketStartTime()) { // Ignore records that are out of the latency window // Records in an incomplete first bucket will end up here LOG_TRACE("Ignored = " << time); @@ -265,26 +218,21 @@ bool CBucketGatherer::addEventData(CEventData &data) this->timeNow(time); - if (!data.personId() || !data.attributeId() || !data.count()) - { + if (!data.personId() || !data.attributeId() || !data.count()) { // The record was incomplete. return false; } - std::size_t pid = *data.personId(); - std::size_t cid = *data.attributeId(); + std::size_t pid = *data.personId(); + std::size_t cid = *data.attributeId(); std::size_t count = *data.count(); - if ( (pid != CDynamicStringIdRegistry::INVALID_ID) - && (cid != CDynamicStringIdRegistry::INVALID_ID)) - { + if ((pid != CDynamicStringIdRegistry::INVALID_ID) && (cid != CDynamicStringIdRegistry::INVALID_ID)) { // Has the person/attribute been deleted from the gatherer? - if (!m_DataGatherer.isPersonActive(pid)) - { + if (!m_DataGatherer.isPersonActive(pid)) { LOG_DEBUG("Not adding value for deleted person " << pid); return false; } - if (m_DataGatherer.isPopulation() && !m_DataGatherer.isAttributeActive(cid)) - { + if (m_DataGatherer.isPopulation() && !m_DataGatherer.isAttributeActive(cid)) { LOG_DEBUG("Not adding value for deleted attribute " << cid); return false; } @@ -293,36 +241,31 @@ bool CBucketGatherer::addEventData(CEventData &data) // If record is explicit null just note that a null record has been seen // for the given (pid, cid) pair. - if (data.isExplicitNull()) - { - TSizeSizePrUSet &bucketExplicitNulls = m_PersonAttributeExplicitNulls.get(time); + if (data.isExplicitNull()) { + TSizeSizePrUSet& bucketExplicitNulls = m_PersonAttributeExplicitNulls.get(time); bucketExplicitNulls.insert(pidCid); return true; } - TSizeSizePrUInt64UMap &bucketCounts = m_PersonAttributeCounts.get(time); - if (count > 0) - { + TSizeSizePrUInt64UMap& bucketCounts = m_PersonAttributeCounts.get(time); + if (count > 0) { bucketCounts[pidCid] += count; } const CEventData::TOptionalStrVec influences = data.influences(); - TSizeSizePrStoredStringPtrPrUInt64UMapVec &influencerCounts = m_InfluencerCounts.get(time); + TSizeSizePrStoredStringPtrPrUInt64UMapVec& influencerCounts = m_InfluencerCounts.get(time); influencerCounts.resize(influences.size()); TStoredStringPtrVec canonicalInfluences(influences.size()); - for (std::size_t i = 0u; i < influences.size(); ++i) - { - const CEventData::TOptionalStr &influence = influences[i]; - if (influence) - { - const core::CStoredStringPtr &inf = CStringStore::influencers().get(*influence); + for (std::size_t i = 0u; i < influences.size(); ++i) { + const CEventData::TOptionalStr& influence = influences[i]; + if (influence) { + const core::CStoredStringPtr& inf = CStringStore::influencers().get(*influence); canonicalInfluences[i] = inf; - if (count > 0) - { - influencerCounts[i].emplace(boost::unordered::piecewise_construct, - boost::make_tuple(pidCid, inf), - boost::make_tuple(uint64_t(0))).first->second += count; + if (count > 0) { + influencerCounts[i] + .emplace(boost::unordered::piecewise_construct, boost::make_tuple(pidCid, inf), boost::make_tuple(uint64_t(0))) + .first->second += count; } } } @@ -332,23 +275,19 @@ bool CBucketGatherer::addEventData(CEventData &data) return true; } -void CBucketGatherer::timeNow(core_t::TTime time) -{ +void CBucketGatherer::timeNow(core_t::TTime time) { this->hiddenTimeNow(time, false); } -void CBucketGatherer::hiddenTimeNow(core_t::TTime time, bool skipUpdates) -{ +void CBucketGatherer::hiddenTimeNow(core_t::TTime time, bool skipUpdates) { m_EarliestTime = std::min(m_EarliestTime, time); core_t::TTime n = (time - m_BucketStart) / this->bucketLength(); - if (n <= 0) - { + if (n <= 0) { return; } core_t::TTime newBucketStart = m_BucketStart; - for (core_t::TTime i = 0; i < n; ++i) - { + for (core_t::TTime i = 0; i < n; ++i) { newBucketStart += this->bucketLength(); // The order here is important. While starting new buckets @@ -359,10 +298,8 @@ void CBucketGatherer::hiddenTimeNow(core_t::TTime time, bool skipUpdates) m_PersonAttributeCounts.push(TSizeSizePrUInt64UMap(1), newBucketStart); m_PersonAttributeExplicitNulls.push(TSizeSizePrUSet(1), newBucketStart); m_InfluencerCounts.push(TSizeSizePrStoredStringPtrPrUInt64UMapVec(), newBucketStart); - for (auto bucketLength : m_DataGatherer.params().s_MultipleBucketLengths) - { - if (newBucketStart % bucketLength == 0) - { + for (auto bucketLength : m_DataGatherer.params().s_MultipleBucketLengths) { + if (newBucketStart % bucketLength == 0) { m_MultiBucketPersonAttributeCounts[bucketLength].clear(); m_MultiBucketPersonAttributeExplicitNulls[bucketLength].clear(); m_MultiBucketInfluencerCounts[bucketLength].clear(); @@ -372,92 +309,73 @@ void CBucketGatherer::hiddenTimeNow(core_t::TTime time, bool skipUpdates) } } -void CBucketGatherer::sampleNow(core_t::TTime sampleBucketStart) -{ - core_t::TTime timeNow = sampleBucketStart - + (m_DataGatherer.params().s_LatencyBuckets + 1) * this->bucketLength() - 1; +void CBucketGatherer::sampleNow(core_t::TTime sampleBucketStart) { + core_t::TTime timeNow = sampleBucketStart + (m_DataGatherer.params().s_LatencyBuckets + 1) * this->bucketLength() - 1; this->timeNow(timeNow); this->sample(sampleBucketStart); } -void CBucketGatherer::skipSampleNow(core_t::TTime sampleBucketStart) -{ - core_t::TTime timeNow = sampleBucketStart - + (m_DataGatherer.params().s_LatencyBuckets + 1) * this->bucketLength() - 1; +void CBucketGatherer::skipSampleNow(core_t::TTime sampleBucketStart) { + core_t::TTime timeNow = sampleBucketStart + (m_DataGatherer.params().s_LatencyBuckets + 1) * this->bucketLength() - 1; this->hiddenTimeNow(timeNow, true); } -void CBucketGatherer::sample(core_t::TTime time) -{ +void CBucketGatherer::sample(core_t::TTime time) { // Merge the current bucket's statistics into multiple bucket statistics. - for (auto bucketLength : m_DataGatherer.params().s_MultipleBucketLengths) - { - auto &multipleBucketPersonAttributeCounts = m_MultiBucketPersonAttributeCounts[bucketLength]; - for (const auto &count : m_PersonAttributeCounts.get(time)) - { + for (auto bucketLength : m_DataGatherer.params().s_MultipleBucketLengths) { + auto& multipleBucketPersonAttributeCounts = m_MultiBucketPersonAttributeCounts[bucketLength]; + for (const auto& count : m_PersonAttributeCounts.get(time)) { multipleBucketPersonAttributeCounts[count.first] += count.second; } - auto &multipleBucketPersonAttributeExplicitNulls = m_MultiBucketPersonAttributeExplicitNulls[bucketLength]; - for (const auto &nulls : m_PersonAttributeExplicitNulls.get(time)) - { + auto& multipleBucketPersonAttributeExplicitNulls = m_MultiBucketPersonAttributeExplicitNulls[bucketLength]; + for (const auto& nulls : m_PersonAttributeExplicitNulls.get(time)) { multipleBucketPersonAttributeExplicitNulls.insert(nulls); } - const TSizeSizePrStoredStringPtrPrUInt64UMapVec &influencerCounts = m_InfluencerCounts.get(time); - auto &multiBucketInfluencerCounts = m_MultiBucketInfluencerCounts[bucketLength]; + const TSizeSizePrStoredStringPtrPrUInt64UMapVec& influencerCounts = m_InfluencerCounts.get(time); + auto& multiBucketInfluencerCounts = m_MultiBucketInfluencerCounts[bucketLength]; multiBucketInfluencerCounts.resize(influencerCounts.size()); - for (std::size_t i = 0u; i < influencerCounts.size(); ++i) - { - for (const auto &count : influencerCounts[i]) - { + for (std::size_t i = 0u; i < influencerCounts.size(); ++i) { + for (const auto& count : influencerCounts[i]) { multiBucketInfluencerCounts[i][count.first] += count.second; } } } } -void CBucketGatherer::personNonZeroCounts(core_t::TTime time, TSizeUInt64PrVec &result) const -{ +void CBucketGatherer::personNonZeroCounts(core_t::TTime time, TSizeUInt64PrVec& result) const { using TSizeUInt64Map = std::map; result.clear(); - if (!this->dataAvailable(time)) - { - LOG_ERROR("No statistics at " << time - << ", current bucket = " << this->printCurrentBucket()); + if (!this->dataAvailable(time)) { + LOG_ERROR("No statistics at " << time << ", current bucket = " << this->printCurrentBucket()); return; } TSizeUInt64Map personCounts; - for (const auto &count : this->bucketCounts(time)) - { + for (const auto& count : this->bucketCounts(time)) { personCounts[CDataGatherer::extractPersonId(count)] += CDataGatherer::extractData(count); } result.reserve(personCounts.size()); result.assign(personCounts.begin(), personCounts.end()); } -void CBucketGatherer::recyclePeople(const TSizeVec &peopleToRemove) -{ - if (!peopleToRemove.empty()) - { +void CBucketGatherer::recyclePeople(const TSizeVec& peopleToRemove) { + if (!peopleToRemove.empty()) { remove(peopleToRemove, CDataGatherer::SExtractPersonId(), m_PersonAttributeCounts); remove(peopleToRemove, CDataGatherer::SExtractPersonId(), m_PersonAttributeExplicitNulls); remove(peopleToRemove, CDataGatherer::SExtractPersonId(), m_InfluencerCounts); } } -void CBucketGatherer::removePeople(std::size_t lowestPersonToRemove) -{ - if (lowestPersonToRemove < m_DataGatherer.numberPeople()) - { +void CBucketGatherer::removePeople(std::size_t lowestPersonToRemove) { + if (lowestPersonToRemove < m_DataGatherer.numberPeople()) { TSizeVec peopleToRemove; std::size_t maxPersonId = m_DataGatherer.numberPeople(); peopleToRemove.reserve(maxPersonId - lowestPersonToRemove); - for (std::size_t pid = lowestPersonToRemove; pid < maxPersonId; ++pid) - { + for (std::size_t pid = lowestPersonToRemove; pid < maxPersonId; ++pid) { peopleToRemove.push_back(pid); } remove(peopleToRemove, CDataGatherer::SExtractPersonId(), m_PersonAttributeCounts); @@ -466,25 +384,20 @@ void CBucketGatherer::removePeople(std::size_t lowestPersonToRemove) } } -void CBucketGatherer::recycleAttributes(const TSizeVec &attributesToRemove) -{ - if (!attributesToRemove.empty()) - { +void CBucketGatherer::recycleAttributes(const TSizeVec& attributesToRemove) { + if (!attributesToRemove.empty()) { remove(attributesToRemove, CDataGatherer::SExtractAttributeId(), m_PersonAttributeCounts); remove(attributesToRemove, CDataGatherer::SExtractAttributeId(), m_PersonAttributeExplicitNulls); remove(attributesToRemove, CDataGatherer::SExtractAttributeId(), m_InfluencerCounts); } } -void CBucketGatherer::removeAttributes(std::size_t lowestAttributeToRemove) -{ - if (lowestAttributeToRemove < m_DataGatherer.numberAttributes()) - { +void CBucketGatherer::removeAttributes(std::size_t lowestAttributeToRemove) { + if (lowestAttributeToRemove < m_DataGatherer.numberAttributes()) { TSizeVec attributesToRemove; const std::size_t numAttributes = m_DataGatherer.numberAttributes(); attributesToRemove.reserve(numAttributes - lowestAttributeToRemove); - for (std::size_t cid = lowestAttributeToRemove; cid < numAttributes; ++cid) - { + for (std::size_t cid = lowestAttributeToRemove; cid < numAttributes; ++cid) { attributesToRemove.push_back(cid); } remove(attributesToRemove, CDataGatherer::SExtractAttributeId(), m_PersonAttributeCounts); @@ -493,35 +406,27 @@ void CBucketGatherer::removeAttributes(std::size_t lowestAttributeToRemove) } } -core_t::TTime CBucketGatherer::currentBucketStartTime() const -{ +core_t::TTime CBucketGatherer::currentBucketStartTime() const { return m_BucketStart; } -void CBucketGatherer::currentBucketStartTime(core_t::TTime time) -{ +void CBucketGatherer::currentBucketStartTime(core_t::TTime time) { m_BucketStart = time; } -core_t::TTime CBucketGatherer::earliestBucketStartTime() const -{ - return this->currentBucketStartTime() - - (m_DataGatherer.params().s_LatencyBuckets * this->bucketLength()); +core_t::TTime CBucketGatherer::earliestBucketStartTime() const { + return this->currentBucketStartTime() - (m_DataGatherer.params().s_LatencyBuckets * this->bucketLength()); } -core_t::TTime CBucketGatherer::bucketLength() const -{ +core_t::TTime CBucketGatherer::bucketLength() const { return m_DataGatherer.params().s_BucketLength; } -bool CBucketGatherer::dataAvailable(core_t::TTime time) const -{ +bool CBucketGatherer::dataAvailable(core_t::TTime time) const { return time >= m_EarliestTime && time >= this->earliestBucketStartTime(); } -bool CBucketGatherer::validateSampleTimes(core_t::TTime &startTime, - core_t::TTime endTime) const -{ +bool CBucketGatherer::validateSampleTimes(core_t::TTime& startTime, core_t::TTime endTime) const { // Sanity checks: // 1) The start and end times are aligned to bucket boundaries. // 2) The end time is greater than the start time, @@ -530,28 +435,22 @@ bool CBucketGatherer::validateSampleTimes(core_t::TTime &startTime, // 4) The start time is greater than or equal to the start time // of the last sampled bucket - if (!maths::CIntegerTools::aligned(startTime, this->bucketLength())) - { + if (!maths::CIntegerTools::aligned(startTime, this->bucketLength())) { LOG_ERROR("Sample start time " << startTime << " is not bucket aligned"); LOG_ERROR("However, my bucketStart time is " << m_BucketStart); return false; } - if (!maths::CIntegerTools::aligned(endTime, this->bucketLength())) - { + if (!maths::CIntegerTools::aligned(endTime, this->bucketLength())) { LOG_ERROR("Sample end time " << endTime << " is not bucket aligned"); return false; } - if (endTime <= startTime) - { + if (endTime <= startTime) { LOG_ERROR("End time " << endTime << " is not greater than the start time " << startTime); return false; } - for (/**/; startTime < endTime; startTime += this->bucketLength()) - { - if (!this->dataAvailable(startTime)) - { - LOG_ERROR("No counts available at " << startTime - << ", current bucket = " << this->printCurrentBucket()); + for (/**/; startTime < endTime; startTime += this->bucketLength()) { + if (!this->dataAvailable(startTime)) { + LOG_ERROR("No counts available at " << startTime << ", current bucket = " << this->printCurrentBucket()); continue; } return true; @@ -560,44 +459,35 @@ bool CBucketGatherer::validateSampleTimes(core_t::TTime &startTime, return false; } -const CDataGatherer &CBucketGatherer::dataGatherer() const -{ +const CDataGatherer& CBucketGatherer::dataGatherer() const { return m_DataGatherer; } -std::string CBucketGatherer::printCurrentBucket() const -{ +std::string CBucketGatherer::printCurrentBucket() const { std::ostringstream result; result << "[" << m_BucketStart << "," << m_BucketStart + this->bucketLength() << ")"; return result.str(); } -const CBucketGatherer::TSizeSizePrUInt64UMap &CBucketGatherer::bucketCounts(core_t::TTime time) const -{ +const CBucketGatherer::TSizeSizePrUInt64UMap& CBucketGatherer::bucketCounts(core_t::TTime time) const { return m_PersonAttributeCounts.get(time); } -const CBucketGatherer::TSizeSizePrStoredStringPtrPrUInt64UMapVec & - CBucketGatherer::influencerCounts(core_t::TTime time) const -{ +const CBucketGatherer::TSizeSizePrStoredStringPtrPrUInt64UMapVec& CBucketGatherer::influencerCounts(core_t::TTime time) const { return m_InfluencerCounts.get(time); } -bool CBucketGatherer::hasExplicitNullsOnly(core_t::TTime time, std::size_t pid, std::size_t cid) const -{ - const TSizeSizePrUSet &bucketExplicitNulls = m_PersonAttributeExplicitNulls.get(time); - if (bucketExplicitNulls.empty()) - { +bool CBucketGatherer::hasExplicitNullsOnly(core_t::TTime time, std::size_t pid, std::size_t cid) const { + const TSizeSizePrUSet& bucketExplicitNulls = m_PersonAttributeExplicitNulls.get(time); + if (bucketExplicitNulls.empty()) { return false; } - const TSizeSizePrUInt64UMap &bucketCounts = m_PersonAttributeCounts.get(time); + const TSizeSizePrUInt64UMap& bucketCounts = m_PersonAttributeCounts.get(time); TSizeSizePr pidCid = std::make_pair(pid, cid); - return bucketExplicitNulls.find(pidCid) != bucketExplicitNulls.end() - && bucketCounts.find(pidCid) == bucketCounts.end(); + return bucketExplicitNulls.find(pidCid) != bucketExplicitNulls.end() && bucketCounts.find(pidCid) == bucketCounts.end(); } -uint64_t CBucketGatherer::checksum() const -{ +uint64_t CBucketGatherer::checksum() const { using TStrCRef = boost::reference_wrapper; using TStrCRefStrCRefPr = std::pair; using TStrCRefStrCRefPrVec = std::vector; @@ -607,40 +497,30 @@ uint64_t CBucketGatherer::checksum() const uint64_t result = maths::CChecksum::calculate(0, m_BucketStart); result = maths::CChecksum::calculate(result, m_PersonAttributeCounts.latestBucketEnd()); - for (const auto &bucketCounts : m_PersonAttributeCounts) - { + for (const auto& bucketCounts : m_PersonAttributeCounts) { TStrCRefStrCRefPrUInt64PrVec personAttributeCounts; personAttributeCounts.reserve(bucketCounts.size()); - for (const auto &count : bucketCounts) - { + for (const auto& count : bucketCounts) { std::size_t pid = CDataGatherer::extractPersonId(count); std::size_t cid = CDataGatherer::extractAttributeId(count); - TStrCRefStrCRefPr key(TStrCRef(m_DataGatherer.personName(pid)), - TStrCRef(m_DataGatherer.attributeName(cid))); + TStrCRefStrCRefPr key(TStrCRef(m_DataGatherer.personName(pid)), TStrCRef(m_DataGatherer.attributeName(cid))); personAttributeCounts.emplace_back(key, CDataGatherer::extractData(count)); } - std::sort(personAttributeCounts.begin(), - personAttributeCounts.end(), - maths::COrderings::SLexicographicalCompare()); + std::sort(personAttributeCounts.begin(), personAttributeCounts.end(), maths::COrderings::SLexicographicalCompare()); result = maths::CChecksum::calculate(result, personAttributeCounts); } result = maths::CChecksum::calculate(result, m_PersonAttributeExplicitNulls.latestBucketEnd()); - for (const auto &bucketExplicitNulls : m_PersonAttributeExplicitNulls) - { + for (const auto& bucketExplicitNulls : m_PersonAttributeExplicitNulls) { TStrCRefStrCRefPrVec personAttributeExplicitNulls; personAttributeExplicitNulls.reserve(bucketExplicitNulls.size()); - for (const auto &nulls : bucketExplicitNulls) - { + for (const auto& nulls : bucketExplicitNulls) { std::size_t pid = CDataGatherer::extractPersonId(nulls); std::size_t cid = CDataGatherer::extractAttributeId(nulls); - TStrCRefStrCRefPr key(TStrCRef(m_DataGatherer.personName(pid)), - TStrCRef(m_DataGatherer.attributeName(cid))); + TStrCRefStrCRefPr key(TStrCRef(m_DataGatherer.personName(pid)), TStrCRef(m_DataGatherer.attributeName(cid))); personAttributeExplicitNulls.push_back(key); } - std::sort(personAttributeExplicitNulls.begin(), - personAttributeExplicitNulls.end(), - maths::COrderings::SLexicographicalCompare()); + std::sort(personAttributeExplicitNulls.begin(), personAttributeExplicitNulls.end(), maths::COrderings::SLexicographicalCompare()); result = maths::CChecksum::calculate(result, personAttributeExplicitNulls); } @@ -649,42 +529,34 @@ uint64_t CBucketGatherer::checksum() const return result; } -void CBucketGatherer::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CBucketGatherer::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CBucketGatherer"); core::CMemoryDebug::dynamicSize("m_PersonAttributeCounts", m_PersonAttributeCounts, mem); core::CMemoryDebug::dynamicSize("m_PersonAttributeExplicitNulls", m_PersonAttributeExplicitNulls, mem); core::CMemoryDebug::dynamicSize("m_Influencers", m_InfluencerCounts, mem); } -std::size_t CBucketGatherer::memoryUsage() const -{ +std::size_t CBucketGatherer::memoryUsage() const { std::size_t mem = core::CMemory::dynamicSize(m_PersonAttributeCounts); mem += core::CMemory::dynamicSize(m_PersonAttributeExplicitNulls); mem += core::CMemory::dynamicSize(m_InfluencerCounts); return mem; } -void CBucketGatherer::clear() -{ +void CBucketGatherer::clear() { m_PersonAttributeCounts.clear(TSizeSizePrUInt64UMap(1)); m_PersonAttributeExplicitNulls.clear(TSizeSizePrUSet(1)); m_InfluencerCounts.clear(); } -bool CBucketGatherer::resetBucket(core_t::TTime bucketStart) -{ - if (!maths::CIntegerTools::aligned(bucketStart, this->bucketLength())) - { +bool CBucketGatherer::resetBucket(core_t::TTime bucketStart) { + if (!maths::CIntegerTools::aligned(bucketStart, this->bucketLength())) { LOG_ERROR("Bucket start time " << bucketStart << " is not bucket aligned"); return false; } - if ( !this->dataAvailable(bucketStart) - || bucketStart >= this->currentBucketStartTime() + this->bucketLength()) - { - LOG_WARN("No data available at " << bucketStart - << ", current bucket = " << this->printCurrentBucket()); + if (!this->dataAvailable(bucketStart) || bucketStart >= this->currentBucketStartTime() + this->bucketLength()) { + LOG_WARN("No data available at " << bucketStart << ", current bucket = " << this->printCurrentBucket()); return false; } @@ -695,56 +567,48 @@ bool CBucketGatherer::resetBucket(core_t::TTime bucketStart) return true; } -void CBucketGatherer::baseAcceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CBucketGatherer::baseAcceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(BUCKET_START_TAG, m_BucketStart); - inserter.insertLevel(BUCKET_COUNT_TAG, boost::bind( - TSizeSizePrUInt64UMapQueue::CSerializer(), - boost::cref(m_PersonAttributeCounts), _1)); - inserter.insertLevel(INFLUENCERS_COUNT_TAG, boost::bind( - TSizeSizePrStoredStringPtrPrUInt64UMapVecQueue::CSerializer(), - boost::cref(m_InfluencerCounts), _1)); + inserter.insertLevel(BUCKET_COUNT_TAG, + boost::bind(TSizeSizePrUInt64UMapQueue::CSerializer(), + boost::cref(m_PersonAttributeCounts), + _1)); + inserter.insertLevel( + INFLUENCERS_COUNT_TAG, + boost::bind(TSizeSizePrStoredStringPtrPrUInt64UMapVecQueue::CSerializer(), + boost::cref(m_InfluencerCounts), + _1)); core::CPersistUtils::persist(BUCKET_EXPLICIT_NULLS_TAG, m_PersonAttributeExplicitNulls, inserter); } -bool CBucketGatherer::baseAcceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ +bool CBucketGatherer::baseAcceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { this->clear(); - do - { - const std::string &name = traverser.name(); + do { + const std::string& name = traverser.name(); RESTORE_BUILT_IN(BUCKET_START_TAG, m_BucketStart) - RESTORE_SETUP_TEARDOWN( - BUCKET_COUNT_TAG, - m_PersonAttributeCounts = TSizeSizePrUInt64UMapQueue(m_DataGatherer.params().s_LatencyBuckets, - this->bucketLength(), - m_BucketStart, - TSizeSizePrUInt64UMap(1)), - traverser.traverseSubLevel(boost::bind( - TSizeSizePrUInt64UMapQueue::CSerializer(TSizeSizePrUInt64UMap(1)), - boost::ref(m_PersonAttributeCounts), _1)), - /**/) - RESTORE_SETUP_TEARDOWN( - INFLUENCERS_COUNT_TAG, - m_InfluencerCounts = TSizeSizePrStoredStringPtrPrUInt64UMapVecQueue(m_DataGatherer.params().s_LatencyBuckets + 3, - this->bucketLength(), - m_BucketStart), - traverser.traverseSubLevel(boost::bind( - TSizeSizePrStoredStringPtrPrUInt64UMapVecQueue::CSerializer(), - boost::ref(m_InfluencerCounts), _1)), - /**/) - RESTORE_SETUP_TEARDOWN( - BUCKET_EXPLICIT_NULLS_TAG, - m_PersonAttributeExplicitNulls = TSizeSizePrUSetQueue(m_DataGatherer.params().s_LatencyBuckets, - this->bucketLength(), - m_BucketStart, - TSizeSizePrUSet(1)), - core::CPersistUtils::restore(BUCKET_EXPLICIT_NULLS_TAG, m_PersonAttributeExplicitNulls, traverser), - /**/) - } - while (traverser.next()); + RESTORE_SETUP_TEARDOWN(BUCKET_COUNT_TAG, + m_PersonAttributeCounts = TSizeSizePrUInt64UMapQueue( + m_DataGatherer.params().s_LatencyBuckets, this->bucketLength(), m_BucketStart, TSizeSizePrUInt64UMap(1)), + traverser.traverseSubLevel(boost::bind( + TSizeSizePrUInt64UMapQueue::CSerializer(TSizeSizePrUInt64UMap(1)), + boost::ref(m_PersonAttributeCounts), + _1)), + /**/) + RESTORE_SETUP_TEARDOWN(INFLUENCERS_COUNT_TAG, + m_InfluencerCounts = TSizeSizePrStoredStringPtrPrUInt64UMapVecQueue( + m_DataGatherer.params().s_LatencyBuckets + 3, this->bucketLength(), m_BucketStart), + traverser.traverseSubLevel(boost::bind( + TSizeSizePrStoredStringPtrPrUInt64UMapVecQueue::CSerializer(), + boost::ref(m_InfluencerCounts), + _1)), + /**/) + RESTORE_SETUP_TEARDOWN(BUCKET_EXPLICIT_NULLS_TAG, + m_PersonAttributeExplicitNulls = TSizeSizePrUSetQueue( + m_DataGatherer.params().s_LatencyBuckets, this->bucketLength(), m_BucketStart, TSizeSizePrUSet(1)), + core::CPersistUtils::restore(BUCKET_EXPLICIT_NULLS_TAG, m_PersonAttributeExplicitNulls, traverser), + /**/) + } while (traverser.next()); return true; } - } } diff --git a/lib/model/CCountingModel.cc b/lib/model/CCountingModel.cc index 3de36ce090..95d79e5817 100644 --- a/lib/model/CCountingModel.cc +++ b/lib/model/CCountingModel.cc @@ -14,12 +14,9 @@ #include #include -namespace ml -{ -namespace model -{ -namespace -{ +namespace ml { +namespace model { +namespace { const std::string WINDOW_BUCKET_COUNT_TAG("a"); const std::string PERSON_BUCKET_COUNT_TAG("b"); const std::string MEAN_COUNT_TAG("c"); @@ -33,202 +30,145 @@ const CCountingModel::TStr1Vec EMPTY_STRING_LIST; const std::string INTERIM_BUCKET_CORRECTOR_TAG("e"); } -CCountingModel::CCountingModel(const SModelParams ¶ms, - const TDataGathererPtr &dataGatherer) : - CAnomalyDetectorModel(params, dataGatherer, TFeatureInfluenceCalculatorCPtrPrVecVec()), - m_StartTime(CAnomalyDetectorModel::TIME_UNSET) -{} - -CCountingModel::CCountingModel(const SModelParams ¶ms, - const TDataGathererPtr &dataGatherer, - core::CStateRestoreTraverser &traverser) : - CAnomalyDetectorModel(params, dataGatherer, TFeatureInfluenceCalculatorCPtrPrVecVec()), - m_StartTime(CAnomalyDetectorModel::TIME_UNSET) -{ - traverser.traverseSubLevel(boost::bind(&CCountingModel::acceptRestoreTraverser, - this, _1)); +CCountingModel::CCountingModel(const SModelParams& params, const TDataGathererPtr& dataGatherer) + : CAnomalyDetectorModel(params, dataGatherer, TFeatureInfluenceCalculatorCPtrPrVecVec()), + m_StartTime(CAnomalyDetectorModel::TIME_UNSET) { } +CCountingModel::CCountingModel(const SModelParams& params, const TDataGathererPtr& dataGatherer, core::CStateRestoreTraverser& traverser) + : CAnomalyDetectorModel(params, dataGatherer, TFeatureInfluenceCalculatorCPtrPrVecVec()), + m_StartTime(CAnomalyDetectorModel::TIME_UNSET) { + traverser.traverseSubLevel(boost::bind(&CCountingModel::acceptRestoreTraverser, this, _1)); +} -CCountingModel::CCountingModel(bool isForPersistence, const CCountingModel &other) : - CAnomalyDetectorModel(isForPersistence, other), - m_StartTime(0), - m_MeanCounts(other.m_MeanCounts) -{ - if (!isForPersistence) - { +CCountingModel::CCountingModel(bool isForPersistence, const CCountingModel& other) + : CAnomalyDetectorModel(isForPersistence, other), m_StartTime(0), m_MeanCounts(other.m_MeanCounts) { + if (!isForPersistence) { LOG_ABORT("This constructor only creates clones for persistence"); } } -void CCountingModel::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CCountingModel::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(WINDOW_BUCKET_COUNT_TAG, this->windowBucketCount(), core::CIEEE754::E_SinglePrecision); core::CPersistUtils::persist(PERSON_BUCKET_COUNT_TAG, this->personBucketCounts(), inserter); core::CPersistUtils::persist(MEAN_COUNT_TAG, m_MeanCounts, inserter); this->interimBucketCorrectorAcceptPersistInserter(INTERIM_BUCKET_CORRECTOR_TAG, inserter); } -bool CCountingModel::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name = traverser.name(); - if (name == WINDOW_BUCKET_COUNT_TAG) - { +bool CCountingModel::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); + if (name == WINDOW_BUCKET_COUNT_TAG) { double count; - if (core::CStringUtils::stringToType(traverser.value(), count) == false) - { + if (core::CStringUtils::stringToType(traverser.value(), count) == false) { LOG_ERROR("Invalid bucket count in " << traverser.value()); return false; } this->windowBucketCount(count); - } - else if (name == PERSON_BUCKET_COUNT_TAG) - { - if (core::CPersistUtils::restore(name, this->personBucketCounts(), traverser) == false) - { + } else if (name == PERSON_BUCKET_COUNT_TAG) { + if (core::CPersistUtils::restore(name, this->personBucketCounts(), traverser) == false) { LOG_ERROR("Invalid bucket counts in " << traverser.value()); return false; } - } - else if (name == MEAN_COUNT_TAG) - { - if (core::CPersistUtils::restore(name, m_MeanCounts, traverser) == false) - { + } else if (name == MEAN_COUNT_TAG) { + if (core::CPersistUtils::restore(name, m_MeanCounts, traverser) == false) { LOG_ERROR("Invalid mean counts"); return false; } - } - else if (name == INTERIM_BUCKET_CORRECTOR_TAG) - { - if (this->interimBucketCorrectorAcceptRestoreTraverser(traverser) == false) - { + } else if (name == INTERIM_BUCKET_CORRECTOR_TAG) { + if (this->interimBucketCorrectorAcceptRestoreTraverser(traverser) == false) { return false; } } - } - while (traverser.next()); + } while (traverser.next()); return true; } -CAnomalyDetectorModel *CCountingModel::cloneForPersistence() const -{ +CAnomalyDetectorModel* CCountingModel::cloneForPersistence() const { return new CCountingModel(true, *this); } -model_t::EModelType CCountingModel::category() const -{ +model_t::EModelType CCountingModel::category() const { return model_t::E_Counting; } -bool CCountingModel::isPopulation() const -{ +bool CCountingModel::isPopulation() const { return false; } -bool CCountingModel::isEventRate() const -{ +bool CCountingModel::isEventRate() const { return false; } -bool CCountingModel::isMetric() const -{ +bool CCountingModel::isMetric() const { return false; } -CCountingModel::TOptionalUInt64 - CCountingModel::currentBucketCount(std::size_t pid, core_t::TTime time) const -{ - if (!this->bucketStatsAvailable(time)) - { - LOG_ERROR("No statistics at " << time - << ", current bucket = " << this->printCurrentBucket()); +CCountingModel::TOptionalUInt64 CCountingModel::currentBucketCount(std::size_t pid, core_t::TTime time) const { + if (!this->bucketStatsAvailable(time)) { + LOG_ERROR("No statistics at " << time << ", current bucket = " << this->printCurrentBucket()); return TOptionalUInt64(); } - auto result = std::lower_bound(m_Counts.begin(), m_Counts.end(), - pid, maths::COrderings::SFirstLess()); + auto result = std::lower_bound(m_Counts.begin(), m_Counts.end(), pid, maths::COrderings::SFirstLess()); - return result != m_Counts.end() && result->first == pid ? - result->second : static_cast(0); + return result != m_Counts.end() && result->first == pid ? result->second : static_cast(0); } -CCountingModel::TOptionalDouble - CCountingModel::baselineBucketCount(std::size_t pid) const -{ - return pid < m_MeanCounts.size() ? - maths::CBasicStatistics::mean(m_MeanCounts[pid]) : 0.0; +CCountingModel::TOptionalDouble CCountingModel::baselineBucketCount(std::size_t pid) const { + return pid < m_MeanCounts.size() ? maths::CBasicStatistics::mean(m_MeanCounts[pid]) : 0.0; } CCountingModel::TDouble1Vec - CCountingModel::currentBucketValue(model_t::EFeature /*feature*/, - std::size_t pid, - std::size_t /*cid*/, - core_t::TTime time) const -{ +CCountingModel::currentBucketValue(model_t::EFeature /*feature*/, std::size_t pid, std::size_t /*cid*/, core_t::TTime time) const { TOptionalUInt64 count = this->currentBucketCount(pid, time); return count ? TDouble1Vec(1, static_cast(*count)) : TDouble1Vec(); } -CCountingModel::TDouble1Vec - CCountingModel::baselineBucketMean(model_t::EFeature /*feature*/, - std::size_t pid, - std::size_t /*cid*/, - model_t::CResultType /*type*/, - const TSizeDoublePr1Vec &/*correlated*/, - core_t::TTime /*time*/) const -{ +CCountingModel::TDouble1Vec CCountingModel::baselineBucketMean(model_t::EFeature /*feature*/, + std::size_t pid, + std::size_t /*cid*/, + model_t::CResultType /*type*/, + const TSizeDoublePr1Vec& /*correlated*/, + core_t::TTime /*time*/) const { TOptionalDouble count = this->baselineBucketCount(pid); return count ? TDouble1Vec(1, *count) : TDouble1Vec(); } -void CCountingModel::currentBucketPersonIds(core_t::TTime time, TSizeVec &result) const -{ +void CCountingModel::currentBucketPersonIds(core_t::TTime time, TSizeVec& result) const { using TSizeUSet = boost::unordered_set; result.clear(); - if (!this->bucketStatsAvailable(time)) - { - LOG_ERROR("No statistics at " << time - << ", current bucket = " << this->printCurrentBucket()); + if (!this->bucketStatsAvailable(time)) { + LOG_ERROR("No statistics at " << time << ", current bucket = " << this->printCurrentBucket()); return; } TSizeUSet people; - for (const auto &count : m_Counts) - { + for (const auto& count : m_Counts) { people.insert(count.first); } result.reserve(people.size()); result.assign(people.begin(), people.end()); } -void CCountingModel::sampleOutOfPhase(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor &resourceMonitor) -{ +void CCountingModel::sampleOutOfPhase(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) { this->sampleBucketStatistics(startTime, endTime, resourceMonitor); } -void CCountingModel::sampleBucketStatistics(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor &resourceMonitor) -{ - CDataGatherer &gatherer = this->dataGatherer(); +void CCountingModel::sampleBucketStatistics(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) { + CDataGatherer& gatherer = this->dataGatherer(); m_ScheduledEventDescriptions.clear(); - if (!gatherer.dataAvailable(startTime)) - { + if (!gatherer.dataAvailable(startTime)) { return; } core_t::TTime bucketLength = gatherer.bucketLength(); - for (core_t::TTime time = startTime; time < endTime; time += bucketLength) - { + for (core_t::TTime time = startTime; time < endTime; time += bucketLength) { this->CAnomalyDetectorModel::sampleBucketStatistics(time, time + bucketLength, resourceMonitor); gatherer.timeNow(time); this->updateCurrentBucketsStats(time); @@ -239,29 +179,23 @@ void CCountingModel::sampleBucketStatistics(core_t::TTime startTime, } } -void CCountingModel::sample(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor &resourceMonitor) -{ - CDataGatherer &gatherer = this->dataGatherer(); +void CCountingModel::sample(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) { + CDataGatherer& gatherer = this->dataGatherer(); m_ScheduledEventDescriptions.clear(); - if (!gatherer.validateSampleTimes(startTime, endTime)) - { + if (!gatherer.validateSampleTimes(startTime, endTime)) { return; } this->createUpdateNewModels(startTime, resourceMonitor); core_t::TTime bucketLength = gatherer.bucketLength(); - for (core_t::TTime time = startTime; time < endTime; time += bucketLength) - { + for (core_t::TTime time = startTime; time < endTime; time += bucketLength) { gatherer.sampleNow(time); this->CAnomalyDetectorModel::sample(time, time + bucketLength, resourceMonitor); this->updateCurrentBucketsStats(time); - for (const auto &count : m_Counts) - { + for (const auto& count : m_Counts) { m_MeanCounts[count.first].add(static_cast(count.second)); } @@ -271,29 +205,23 @@ void CCountingModel::sample(core_t::TTime startTime, } } -void CCountingModel::setMatchedEventsDescriptions(core_t::TTime sampleTime, core_t::TTime bucketStartTime) -{ +void CCountingModel::setMatchedEventsDescriptions(core_t::TTime sampleTime, core_t::TTime bucketStartTime) { SModelParams::TStrDetectionRulePrVec matchedEvents = this->checkScheduledEvents(sampleTime); - if (matchedEvents.empty() == false) - { + if (matchedEvents.empty() == false) { TStr1Vec descriptions; - for (auto &event : matchedEvents) - { + for (auto& event : matchedEvents) { descriptions.push_back(event.first); } m_ScheduledEventDescriptions[bucketStartTime] = descriptions; } } -SModelParams::TStrDetectionRulePrVec -CCountingModel::checkScheduledEvents(core_t::TTime sampleTime) const -{ - const SModelParams::TStrDetectionRulePrVec &events = this->params().s_ScheduledEvents.get(); +SModelParams::TStrDetectionRulePrVec CCountingModel::checkScheduledEvents(core_t::TTime sampleTime) const { + const SModelParams::TStrDetectionRulePrVec& events = this->params().s_ScheduledEvents.get(); SModelParams::TStrDetectionRulePrVec matchedEvents; - for (auto &event : events) - { + for (auto& event : events) { // Note that as the counting model is not aware of partitions // scheduled events cannot support partitions as the code stands. if (event.second.apply(CDetectionRule::E_SkipSampling, @@ -302,8 +230,7 @@ CCountingModel::checkScheduledEvents(core_t::TTime sampleTime) const model_t::CResultType(), model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID, model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID, - sampleTime)) - { + sampleTime)) { matchedEvents.push_back(event); } } @@ -311,138 +238,112 @@ CCountingModel::checkScheduledEvents(core_t::TTime sampleTime) const return matchedEvents; } -void CCountingModel::currentBucketTotalCount(uint64_t /*totalCount*/) -{ +void CCountingModel::currentBucketTotalCount(uint64_t /*totalCount*/) { // Do nothing } -void CCountingModel::doSkipSampling(core_t::TTime /*startTime*/, core_t::TTime /*endTime*/) -{ +void CCountingModel::doSkipSampling(core_t::TTime /*startTime*/, core_t::TTime /*endTime*/) { // Do nothing } -void CCountingModel::prune(std::size_t /*maximumAge*/) -{ +void CCountingModel::prune(std::size_t /*maximumAge*/) { } bool CCountingModel::computeProbability(std::size_t pid, core_t::TTime startTime, core_t::TTime endTime, - CPartitioningFields &/*partitioningFields*/, + CPartitioningFields& /*partitioningFields*/, std::size_t /*numberAttributeProbabilities*/, - SAnnotatedProbability &result) const -{ + SAnnotatedProbability& result) const { result = SAnnotatedProbability(1.0); - result.s_CurrentBucketCount = this->currentBucketCount(pid, (startTime + endTime) / 2 - 1); + result.s_CurrentBucketCount = this->currentBucketCount(pid, (startTime + endTime) / 2 - 1); result.s_BaselineBucketCount = this->baselineBucketCount(pid); return true; } -bool CCountingModel::computeTotalProbability(const std::string &/*person*/, +bool CCountingModel::computeTotalProbability(const std::string& /*person*/, std::size_t /*numberAttributeProbabilities*/, - TOptionalDouble &probability, - TAttributeProbability1Vec &attributeProbabilities) const -{ + TOptionalDouble& probability, + TAttributeProbability1Vec& attributeProbabilities) const { probability.reset(1.0); attributeProbabilities.clear(); return true; } -uint64_t CCountingModel::checksum(bool includeCurrentBucketStats) const -{ +uint64_t CCountingModel::checksum(bool includeCurrentBucketStats) const { uint64_t result = this->CAnomalyDetectorModel::checksum(includeCurrentBucketStats); result = maths::CChecksum::calculate(result, m_MeanCounts); - if (includeCurrentBucketStats) - { + if (includeCurrentBucketStats) { result = maths::CChecksum::calculate(result, m_StartTime); result = maths::CChecksum::calculate(result, m_Counts); } return result; } -void CCountingModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CCountingModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CCountingModel"); this->CAnomalyDetectorModel::debugMemoryUsage(mem->addChild()); core::CMemoryDebug::dynamicSize("m_Counts", m_Counts, mem); core::CMemoryDebug::dynamicSize("m_MeanCounts", m_MeanCounts, mem); } -std::size_t CCountingModel::memoryUsage() const -{ - return this->CAnomalyDetectorModel::memoryUsage() - + core::CMemory::dynamicSize(m_Counts) - + core::CMemory::dynamicSize(m_MeanCounts); +std::size_t CCountingModel::memoryUsage() const { + return this->CAnomalyDetectorModel::memoryUsage() + core::CMemory::dynamicSize(m_Counts) + core::CMemory::dynamicSize(m_MeanCounts); } -std::size_t CCountingModel::computeMemoryUsage() const -{ +std::size_t CCountingModel::computeMemoryUsage() const { return this->memoryUsage(); } -std::size_t CCountingModel::staticSize() const -{ +std::size_t CCountingModel::staticSize() const { return sizeof(*this); } -CCountingModel::CModelDetailsViewPtr CCountingModel::details() const -{ +CCountingModel::CModelDetailsViewPtr CCountingModel::details() const { return CModelDetailsViewPtr(); } -core_t::TTime CCountingModel::currentBucketStartTime() const -{ +core_t::TTime CCountingModel::currentBucketStartTime() const { return m_StartTime; } -void CCountingModel::currentBucketStartTime(core_t::TTime time) -{ +void CCountingModel::currentBucketStartTime(core_t::TTime time) { m_StartTime = time; } -const CCountingModel::TStr1Vec &CCountingModel::scheduledEventDescriptions(core_t::TTime time) const -{ +const CCountingModel::TStr1Vec& CCountingModel::scheduledEventDescriptions(core_t::TTime time) const { auto it = m_ScheduledEventDescriptions.find(time); - if (it == m_ScheduledEventDescriptions.end()) - { + if (it == m_ScheduledEventDescriptions.end()) { return EMPTY_STRING_LIST; } return it->second; } -double CCountingModel::attributeFrequency(std::size_t /*cid*/) const -{ +double CCountingModel::attributeFrequency(std::size_t /*cid*/) const { return 1.0; } -void CCountingModel::createUpdateNewModels(core_t::TTime /*time*/, - CResourceMonitor &/*resourceMonitor*/) -{ +void CCountingModel::createUpdateNewModels(core_t::TTime /*time*/, CResourceMonitor& /*resourceMonitor*/) { this->updateRecycledModels(); - CDataGatherer &gatherer = this->dataGatherer(); + CDataGatherer& gatherer = this->dataGatherer(); std::size_t numberNewPeople = gatherer.numberPeople(); std::size_t numberExistingPeople = m_MeanCounts.size(); - numberNewPeople = numberNewPeople > numberExistingPeople ? - numberNewPeople - numberExistingPeople : 0; - if (numberNewPeople > 0) - { + numberNewPeople = numberNewPeople > numberExistingPeople ? numberNewPeople - numberExistingPeople : 0; + if (numberNewPeople > 0) { LOG_TRACE("Creating " << numberNewPeople << " new people"); this->createNewModels(numberNewPeople, 0); } } -void CCountingModel::createNewModels(std::size_t n, std::size_t m) -{ - if (n > 0) - { +void CCountingModel::createNewModels(std::size_t n, std::size_t m) { + if (n > 0) { core::CAllocationStrategy::resize(m_MeanCounts, m_MeanCounts.size() + n); } this->CAnomalyDetectorModel::createNewModels(n, m); } -void CCountingModel::updateCurrentBucketsStats(core_t::TTime time) -{ - CDataGatherer &gatherer = this->dataGatherer(); +void CCountingModel::updateCurrentBucketsStats(core_t::TTime time) { + CDataGatherer& gatherer = this->dataGatherer(); // Currently, we only remember one bucket. m_StartTime = time; @@ -451,45 +352,34 @@ void CCountingModel::updateCurrentBucketsStats(core_t::TTime time) // Results are only output if currentBucketPersonIds is // not empty. Therefore, we need to explicitly set the // count to 0 so that we output results. - if (m_Counts.empty()) - { + if (m_Counts.empty()) { m_Counts.emplace_back(0, 0); } } -void CCountingModel::updateRecycledModels() -{ - for (auto person : this->dataGatherer().recycledPersonIds()) - { +void CCountingModel::updateRecycledModels() { + for (auto person : this->dataGatherer().recycledPersonIds()) { m_MeanCounts[person] = TMeanAccumulator(); } this->CAnomalyDetectorModel::updateRecycledModels(); } -void CCountingModel::clearPrunedResources(const TSizeVec &/*people*/, const TSizeVec &/*attributes*/) -{ +void CCountingModel::clearPrunedResources(const TSizeVec& /*people*/, const TSizeVec& /*attributes*/) { // Nothing to prune } -bool CCountingModel::bucketStatsAvailable(core_t::TTime time) const -{ +bool CCountingModel::bucketStatsAvailable(core_t::TTime time) const { return time >= m_StartTime && time < m_StartTime + this->bucketLength(); } -std::string CCountingModel::printCurrentBucket() const -{ +std::string CCountingModel::printCurrentBucket() const { std::ostringstream result; result << "[" << m_StartTime << "," << m_StartTime + this->bucketLength() << ")"; return result.str(); } -CMemoryUsageEstimator *CCountingModel::memoryUsageEstimator() const -{ +CMemoryUsageEstimator* CCountingModel::memoryUsageEstimator() const { return 0; } - - } } - - diff --git a/lib/model/CCountingModelFactory.cc b/lib/model/CCountingModelFactory.cc index e8d4905b2c..ea81faeefb 100644 --- a/lib/model/CCountingModelFactory.cc +++ b/lib/model/CCountingModelFactory.cc @@ -11,58 +11,50 @@ #include #include -#include #include +#include #include #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { -CCountingModelFactory::CCountingModelFactory(const SModelParams ¶ms, +CCountingModelFactory::CCountingModelFactory(const SModelParams& params, model_t::ESummaryMode summaryMode, - const std::string &summaryCountFieldName) : - CModelFactory(params), - m_Identifier(), - m_SummaryMode(summaryMode), - m_SummaryCountFieldName(summaryCountFieldName), - m_UseNull(false), - m_BucketResultsDelay(0) -{} - -CCountingModelFactory *CCountingModelFactory::clone() const -{ + const std::string& summaryCountFieldName) + : CModelFactory(params), + m_Identifier(), + m_SummaryMode(summaryMode), + m_SummaryCountFieldName(summaryCountFieldName), + m_UseNull(false), + m_BucketResultsDelay(0) { +} + +CCountingModelFactory* CCountingModelFactory::clone() const { return new CCountingModelFactory(*this); } -CAnomalyDetectorModel *CCountingModelFactory::makeModel(const SModelInitializationData &initData) const -{ +CAnomalyDetectorModel* CCountingModelFactory::makeModel(const SModelInitializationData& initData) const { TDataGathererPtr dataGatherer = initData.s_DataGatherer; - if (!dataGatherer) - { + if (!dataGatherer) { LOG_ERROR("NULL data gatherer"); return 0; } return new CCountingModel(this->modelParams(), dataGatherer); } -CAnomalyDetectorModel *CCountingModelFactory::makeModel(const SModelInitializationData &initData, - core::CStateRestoreTraverser &traverser) const -{ +CAnomalyDetectorModel* CCountingModelFactory::makeModel(const SModelInitializationData& initData, + core::CStateRestoreTraverser& traverser) const { TDataGathererPtr dataGatherer = initData.s_DataGatherer; - if (!dataGatherer) - { + if (!dataGatherer) { LOG_ERROR("NULL data gatherer"); return 0; } return new CCountingModel(this->modelParams(), dataGatherer, traverser); } -CDataGatherer *CCountingModelFactory::makeDataGatherer(const SGathererInitializationData &initData) const -{ +CDataGatherer* CCountingModelFactory::makeDataGatherer(const SGathererInitializationData& initData) const { return new CDataGatherer(model_t::E_EventRate, m_SummaryMode, this->modelParams(), @@ -80,9 +72,8 @@ CDataGatherer *CCountingModelFactory::makeDataGatherer(const SGathererInitializa 0); } -CDataGatherer *CCountingModelFactory::makeDataGatherer(const std::string &partitionFieldValue, - core::CStateRestoreTraverser &traverser) const -{ +CDataGatherer* CCountingModelFactory::makeDataGatherer(const std::string& partitionFieldValue, + core::CStateRestoreTraverser& traverser) const { return new CDataGatherer(model_t::E_EventRate, m_SummaryMode, this->modelParams(), @@ -98,31 +89,22 @@ CDataGatherer *CCountingModelFactory::makeDataGatherer(const std::string &partit traverser); } -CCountingModelFactory::TPriorPtr - CCountingModelFactory::defaultPrior(model_t::EFeature /*feature*/, - const SModelParams &/*params*/) const -{ +CCountingModelFactory::TPriorPtr CCountingModelFactory::defaultPrior(model_t::EFeature /*feature*/, const SModelParams& /*params*/) const { return boost::make_shared(); } -CCountingModelFactory::TMultivariatePriorPtr - CCountingModelFactory::defaultMultivariatePrior(model_t::EFeature feature, - const SModelParams &/*params*/) const -{ +CCountingModelFactory::TMultivariatePriorPtr CCountingModelFactory::defaultMultivariatePrior(model_t::EFeature feature, + const SModelParams& /*params*/) const { return boost::make_shared(model_t::dimension(feature)); } -CCountingModelFactory::TMultivariatePriorPtr - CCountingModelFactory::defaultCorrelatePrior(model_t::EFeature /*feature*/, - const SModelParams &/*params*/) const -{ +CCountingModelFactory::TMultivariatePriorPtr CCountingModelFactory::defaultCorrelatePrior(model_t::EFeature /*feature*/, + const SModelParams& /*params*/) const { return boost::make_shared(2); } -const CSearchKey &CCountingModelFactory::searchKey() const -{ - if (!m_SearchKeyCache) - { +const CSearchKey& CCountingModelFactory::searchKey() const { + if (!m_SearchKeyCache) { m_SearchKeyCache.reset(CSearchKey(m_Identifier, function_t::function(m_Features), m_UseNull, @@ -135,72 +117,57 @@ const CSearchKey &CCountingModelFactory::searchKey() const return *m_SearchKeyCache; } -bool CCountingModelFactory::isSimpleCount() const -{ - return CSearchKey::isSimpleCount(function_t::function(m_Features), - m_PersonFieldName); +bool CCountingModelFactory::isSimpleCount() const { + return CSearchKey::isSimpleCount(function_t::function(m_Features), m_PersonFieldName); } -model_t::ESummaryMode CCountingModelFactory::summaryMode() const -{ +model_t::ESummaryMode CCountingModelFactory::summaryMode() const { return m_SummaryMode; } -maths_t::EDataType CCountingModelFactory::dataType() const -{ +maths_t::EDataType CCountingModelFactory::dataType() const { return maths_t::E_IntegerData; } -void CCountingModelFactory::identifier(int identifier) -{ +void CCountingModelFactory::identifier(int identifier) { m_Identifier = identifier; m_SearchKeyCache.reset(); } -void CCountingModelFactory::fieldNames(const std::string &partitionFieldName, - const std::string &/*overFieldName*/, - const std::string &byFieldName, - const std::string &/*valueFieldName*/, - const TStrVec &/*influenceFieldNames*/) -{ +void CCountingModelFactory::fieldNames(const std::string& partitionFieldName, + const std::string& /*overFieldName*/, + const std::string& byFieldName, + const std::string& /*valueFieldName*/, + const TStrVec& /*influenceFieldNames*/) { m_PartitionFieldName = partitionFieldName; m_PersonFieldName = byFieldName; m_SearchKeyCache.reset(); } -void CCountingModelFactory::useNull(bool useNull) -{ +void CCountingModelFactory::useNull(bool useNull) { m_UseNull = useNull; m_SearchKeyCache.reset(); } -void CCountingModelFactory::features(const TFeatureVec &features) -{ +void CCountingModelFactory::features(const TFeatureVec& features) { m_Features = features; m_SearchKeyCache.reset(); } -void CCountingModelFactory::bucketResultsDelay(std::size_t bucketResultsDelay) -{ +void CCountingModelFactory::bucketResultsDelay(std::size_t bucketResultsDelay) { m_BucketResultsDelay = bucketResultsDelay; } -CCountingModelFactory::TStrCRefVec - CCountingModelFactory::partitioningFields() const -{ +CCountingModelFactory::TStrCRefVec CCountingModelFactory::partitioningFields() const { TStrCRefVec result; result.reserve(2); - if (!m_PartitionFieldName.empty()) - { + if (!m_PartitionFieldName.empty()) { result.emplace_back(m_PartitionFieldName); } - if (!m_PersonFieldName.empty()) - { + if (!m_PersonFieldName.empty()) { result.emplace_back(m_PersonFieldName); } return result; } - } } - diff --git a/lib/model/CDataClassifier.cc b/lib/model/CDataClassifier.cc index 805d0291f7..05129282c2 100644 --- a/lib/model/CDataClassifier.cc +++ b/lib/model/CDataClassifier.cc @@ -16,70 +16,51 @@ #include -namespace ml -{ -namespace model -{ -namespace -{ +namespace ml { +namespace model { +namespace { const double EPS{10.0 * std::numeric_limits::epsilon()}; const std::string IS_INTEGER_TAG{"a"}; const std::string IS_NON_NEGATIVE_TAG{"b"}; std::string EMPTY_STRING; } -void CDataClassifier::add(model_t::EFeature feature, - double value, - unsigned int count) -{ +void CDataClassifier::add(model_t::EFeature feature, double value, unsigned int count) { m_IsNonNegative = m_IsNonNegative && value >= 0.0; - if (m_IsInteger) - { - if (model_t::isMeanFeature(feature)) - { + if (m_IsInteger) { + if (model_t::isMeanFeature(feature)) { value *= count; } m_IsInteger = maths::CIntegerTools::isInteger(value, EPS * value); } } -void CDataClassifier::add(model_t::EFeature feature, - const TDouble1Vec &values, - unsigned int count) -{ - for (const auto &value : values) - { +void CDataClassifier::add(model_t::EFeature feature, const TDouble1Vec& values, unsigned int count) { + for (const auto& value : values) { this->add(feature, value, count); } } -bool CDataClassifier::isInteger() const -{ +bool CDataClassifier::isInteger() const { return m_IsInteger; } -bool CDataClassifier::isNonNegative() const -{ +bool CDataClassifier::isNonNegative() const { return m_IsNonNegative; } -void CDataClassifier::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CDataClassifier::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(IS_INTEGER_TAG, static_cast(m_IsInteger)); inserter.insertValue(IS_NON_NEGATIVE_TAG, static_cast(m_IsNonNegative)); } -bool CDataClassifier::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name = traverser.name(); +bool CDataClassifier::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); RESTORE_BOOL(IS_INTEGER_TAG, m_IsInteger) RESTORE_BOOL(IS_NON_NEGATIVE_TAG, m_IsNonNegative) - } - while (traverser.next()); + } while (traverser.next()); return true; } - } } diff --git a/lib/model/CDataGatherer.cc b/lib/model/CDataGatherer.cc index e0d60ec925..758ef01c93 100644 --- a/lib/model/CDataGatherer.cc +++ b/lib/model/CDataGatherer.cc @@ -28,13 +28,10 @@ #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { -namespace -{ +namespace { const std::string FEATURE_TAG("a"); const std::string PEOPLE_REGISTRY_TAG("b"); @@ -49,88 +46,81 @@ const std::string ATTRIBUTE("attribute"); const std::string EMPTY_STRING; -namespace detail -{ +namespace detail { //! Make sure \p features only includes supported features, doesn't //! contain any duplicates, etc. -const CDataGatherer::TFeatureVec &sanitize(CDataGatherer::TFeatureVec &features, - model_t::EAnalysisCategory gathererType) -{ +const CDataGatherer::TFeatureVec& sanitize(CDataGatherer::TFeatureVec& features, model_t::EAnalysisCategory gathererType) { std::size_t j = 0u; - for (std::size_t i = 0u; i < features.size(); ++i) - { - switch (gathererType) - { - case model_t::E_EventRate: - case model_t::E_PopulationEventRate: - case model_t::E_PeersEventRate: - switch (features[i]) - { - CASE_INDIVIDUAL_COUNT: - features[j] = features[i]; - ++j; - break; - - CASE_INDIVIDUAL_METRIC: - LOG_ERROR("Unexpected feature = " << model_t::print(features[i])); - break; - - CASE_POPULATION_COUNT: - features[j] = features[i]; - ++j; - break; - - CASE_POPULATION_METRIC: - LOG_ERROR("Unexpected feature = " << model_t::print(features[i])); - break; - - CASE_PEERS_COUNT: - features[j] = features[i]; - ++j; - break; - - CASE_PEERS_METRIC: - LOG_ERROR("Unexpected feature = " << model_t::print(features[i])); - break; - } + for (std::size_t i = 0u; i < features.size(); ++i) { + switch (gathererType) { + case model_t::E_EventRate: + case model_t::E_PopulationEventRate: + case model_t::E_PeersEventRate: + switch (features[i]) { + CASE_INDIVIDUAL_COUNT: + features[j] = features[i]; + ++j; + break; + + CASE_INDIVIDUAL_METRIC: + LOG_ERROR("Unexpected feature = " << model_t::print(features[i])); + break; + + CASE_POPULATION_COUNT: + features[j] = features[i]; + ++j; + break; + + CASE_POPULATION_METRIC: + LOG_ERROR("Unexpected feature = " << model_t::print(features[i])); + break; + + CASE_PEERS_COUNT: + features[j] = features[i]; + ++j; break; - case model_t::E_Metric: - case model_t::E_PopulationMetric: - case model_t::E_PeersMetric: - - switch (features[i]) - { - CASE_INDIVIDUAL_COUNT: - LOG_ERROR("Unexpected feature = " << model_t::print(features[i])) - break; - - CASE_INDIVIDUAL_METRIC: - features[j] = features[i]; - ++j; - break; - - CASE_POPULATION_COUNT: - LOG_ERROR("Unexpected feature = " << model_t::print(features[i])) - break; - - CASE_POPULATION_METRIC: - features[j] = features[i]; - ++j; - break; - - CASE_PEERS_COUNT: - LOG_ERROR("Unexpected feature = " << model_t::print(features[i])) - break; - - CASE_PEERS_METRIC: - features[j] = features[i]; - ++j; - break; - } + CASE_PEERS_METRIC: + LOG_ERROR("Unexpected feature = " << model_t::print(features[i])); break; + } + break; + + case model_t::E_Metric: + case model_t::E_PopulationMetric: + case model_t::E_PeersMetric: + + switch (features[i]) { + CASE_INDIVIDUAL_COUNT: + LOG_ERROR("Unexpected feature = " << model_t::print(features[i])) + break; + + CASE_INDIVIDUAL_METRIC: + features[j] = features[i]; + ++j; + break; + + CASE_POPULATION_COUNT: + LOG_ERROR("Unexpected feature = " << model_t::print(features[i])) + break; + + CASE_POPULATION_METRIC: + features[j] = features[i]; + ++j; + break; + + CASE_PEERS_COUNT: + LOG_ERROR("Unexpected feature = " << model_t::print(features[i])) + break; + + CASE_PEERS_METRIC: + features[j] = features[i]; + ++j; + break; + } + break; } } @@ -142,27 +132,23 @@ const CDataGatherer::TFeatureVec &sanitize(CDataGatherer::TFeatureVec &features, } //! Wrapper which copies \p features. -CDataGatherer::TFeatureVec sanitize(const CDataGatherer::TFeatureVec &features, - model_t::EAnalysisCategory gathererType) -{ +CDataGatherer::TFeatureVec sanitize(const CDataGatherer::TFeatureVec& features, model_t::EAnalysisCategory gathererType) { CDataGatherer::TFeatureVec result(features); return sanitize(result, gathererType); } //! Check if the gatherer is for population modelling. -bool isPopulation(model_t::EAnalysisCategory gathererType) -{ - switch (gathererType) - { - case model_t::E_EventRate: - case model_t::E_Metric: - return false; +bool isPopulation(model_t::EAnalysisCategory gathererType) { + switch (gathererType) { + case model_t::E_EventRate: + case model_t::E_Metric: + return false; - case model_t::E_PopulationEventRate: - case model_t::E_PeersEventRate: - case model_t::E_PopulationMetric: - case model_t::E_PeersMetric: - return true; + case model_t::E_PopulationEventRate: + case model_t::E_PeersEventRate: + case model_t::E_PopulationMetric: + case model_t::E_PeersMetric: + return true; } return false; } @@ -170,7 +156,6 @@ bool isPopulation(model_t::EAnalysisCategory gathererType) } // detail:: } // unnamed:: - const std::string CDataGatherer::EXPLICIT_NULL("null"); const std::size_t CDataGatherer::EXPLICIT_NULL_SUMMARY_COUNT(std::numeric_limits::max()); const std::size_t CDataGatherer::ESTIMATED_MEM_USAGE_PER_BY_FIELD(10000); @@ -178,37 +163,33 @@ const std::size_t CDataGatherer::ESTIMATED_MEM_USAGE_PER_OVER_FIELD(1000); CDataGatherer::CDataGatherer(model_t::EAnalysisCategory gathererType, model_t::ESummaryMode summaryMode, - const SModelParams &modelParams, - const std::string &summaryCountFieldName, - const std::string &partitionFieldName, - const std::string &partitionFieldValue, - const std::string &personFieldName, - const std::string &attributeFieldName, - const std::string &valueFieldName, - const TStrVec &influenceFieldNames, + const SModelParams& modelParams, + const std::string& summaryCountFieldName, + const std::string& partitionFieldName, + const std::string& partitionFieldValue, + const std::string& personFieldName, + const std::string& attributeFieldName, + const std::string& valueFieldName, + const TStrVec& influenceFieldNames, bool useNull, - const CSearchKey &key, - const TFeatureVec &features, + const CSearchKey& key, + const TFeatureVec& features, core_t::TTime startTime, - int sampleCountOverride) : - m_GathererType(gathererType), - m_Features(detail::sanitize(features, gathererType)), - m_SummaryMode(summaryMode), - m_Params(modelParams), - m_PartitionFieldName(partitionFieldName), - m_PartitionFieldValue(CStringStore::names().get(partitionFieldValue)), - m_SearchKey(key), - m_PeopleRegistry(PERSON, - stat_t::E_NumberNewPeople, - stat_t::E_NumberNewPeopleNotAllowed, - stat_t::E_NumberNewPeopleRecycled), - m_AttributesRegistry(ATTRIBUTE, - stat_t::E_NumberNewAttributes, - stat_t::E_NumberNewAttributesNotAllowed, - stat_t::E_NumberNewAttributesRecycled), - m_Population(detail::isPopulation(gathererType)), - m_UseNull(useNull) -{ + int sampleCountOverride) + : m_GathererType(gathererType), + m_Features(detail::sanitize(features, gathererType)), + m_SummaryMode(summaryMode), + m_Params(modelParams), + m_PartitionFieldName(partitionFieldName), + m_PartitionFieldValue(CStringStore::names().get(partitionFieldValue)), + m_SearchKey(key), + m_PeopleRegistry(PERSON, stat_t::E_NumberNewPeople, stat_t::E_NumberNewPeopleNotAllowed, stat_t::E_NumberNewPeopleRecycled), + m_AttributesRegistry(ATTRIBUTE, + stat_t::E_NumberNewAttributes, + stat_t::E_NumberNewAttributesNotAllowed, + stat_t::E_NumberNewAttributesRecycled), + m_Population(detail::isPopulation(gathererType)), + m_UseNull(useNull) { // Constructor needs to create 1 bucket gatherer at the startTime // and possibly 1 bucket gatherer at (startTime + bucketLength / 2). @@ -224,8 +205,7 @@ CDataGatherer::CDataGatherer(model_t::EAnalysisCategory gathererType, startTime, sampleCountOverride); - if (modelParams.s_BucketResultsDelay > 0) - { + if (modelParams.s_BucketResultsDelay > 0) { this->createBucketGatherer(gathererType, summaryCountFieldName, personFieldName, @@ -239,34 +219,30 @@ CDataGatherer::CDataGatherer(model_t::EAnalysisCategory gathererType, CDataGatherer::CDataGatherer(model_t::EAnalysisCategory gathererType, model_t::ESummaryMode summaryMode, - const SModelParams &modelParams, - const std::string &summaryCountFieldName, - const std::string &partitionFieldName, - const std::string &partitionFieldValue, - const std::string &personFieldName, - const std::string &attributeFieldName, - const std::string &valueFieldName, - const TStrVec &influenceFieldNames, + const SModelParams& modelParams, + const std::string& summaryCountFieldName, + const std::string& partitionFieldName, + const std::string& partitionFieldValue, + const std::string& personFieldName, + const std::string& attributeFieldName, + const std::string& valueFieldName, + const TStrVec& influenceFieldNames, bool useNull, - const CSearchKey &key, - core::CStateRestoreTraverser &traverser) : - m_GathererType(gathererType), - m_SummaryMode(summaryMode), - m_Params(modelParams), - m_PartitionFieldName(partitionFieldName), - m_PartitionFieldValue(CStringStore::names().get(partitionFieldValue)), - m_SearchKey(key), - m_PeopleRegistry(PERSON, - stat_t::E_NumberNewPeople, - stat_t::E_NumberNewPeopleNotAllowed, - stat_t::E_NumberNewPeopleRecycled), - m_AttributesRegistry(ATTRIBUTE, - stat_t::E_NumberNewAttributes, - stat_t::E_NumberNewAttributesNotAllowed, - stat_t::E_NumberNewAttributesRecycled), - m_Population(detail::isPopulation(gathererType)), - m_UseNull(useNull) -{ + const CSearchKey& key, + core::CStateRestoreTraverser& traverser) + : m_GathererType(gathererType), + m_SummaryMode(summaryMode), + m_Params(modelParams), + m_PartitionFieldName(partitionFieldName), + m_PartitionFieldValue(CStringStore::names().get(partitionFieldValue)), + m_SearchKey(key), + m_PeopleRegistry(PERSON, stat_t::E_NumberNewPeople, stat_t::E_NumberNewPeopleNotAllowed, stat_t::E_NumberNewPeopleRecycled), + m_AttributesRegistry(ATTRIBUTE, + stat_t::E_NumberNewAttributes, + stat_t::E_NumberNewAttributesNotAllowed, + stat_t::E_NumberNewAttributesRecycled), + m_Population(detail::isPopulation(gathererType)), + m_UseNull(useNull) { if (traverser.traverseSubLevel(boost::bind(&CDataGatherer::acceptRestoreTraverser, this, boost::cref(summaryCountFieldName), @@ -274,151 +250,120 @@ CDataGatherer::CDataGatherer(model_t::EAnalysisCategory gathererType, boost::cref(attributeFieldName), boost::cref(valueFieldName), boost::cref(influenceFieldNames), - _1)) == false) - { + _1)) == false) { LOG_ERROR("Failed to correctly restore data gatherer"); } } -CDataGatherer::CDataGatherer(bool isForPersistence, const CDataGatherer &other) : - m_GathererType(other.m_GathererType), - m_Features(other.m_Features), - m_SummaryMode(other.m_SummaryMode), - m_Params(other.m_Params), - m_PartitionFieldName(other.m_PartitionFieldName), - m_PartitionFieldValue(other.m_PartitionFieldValue), - m_SearchKey(other.m_SearchKey), - m_PeopleRegistry(isForPersistence, other.m_PeopleRegistry), - m_AttributesRegistry(isForPersistence, other.m_AttributesRegistry), - m_Population(other.m_Population), - m_UseNull(other.m_UseNull) -{ - if (!isForPersistence) - { +CDataGatherer::CDataGatherer(bool isForPersistence, const CDataGatherer& other) + : m_GathererType(other.m_GathererType), + m_Features(other.m_Features), + m_SummaryMode(other.m_SummaryMode), + m_Params(other.m_Params), + m_PartitionFieldName(other.m_PartitionFieldName), + m_PartitionFieldValue(other.m_PartitionFieldValue), + m_SearchKey(other.m_SearchKey), + m_PeopleRegistry(isForPersistence, other.m_PeopleRegistry), + m_AttributesRegistry(isForPersistence, other.m_AttributesRegistry), + m_Population(other.m_Population), + m_UseNull(other.m_UseNull) { + if (!isForPersistence) { LOG_ABORT("This constructor only creates clones for persistence"); } - for (TBucketGathererPVecCItr i = other.m_Gatherers.begin(); i != other.m_Gatherers.end(); ++i) - { + for (TBucketGathererPVecCItr i = other.m_Gatherers.begin(); i != other.m_Gatherers.end(); ++i) { m_Gatherers.push_back((*i)->cloneForPersistence()); } - if (other.m_SampleCounts) - { + if (other.m_SampleCounts) { m_SampleCounts.reset(other.m_SampleCounts->cloneForPersistence()); } } -CDataGatherer::~CDataGatherer() -{ - for (TBucketGathererPVecItr i = m_Gatherers.begin(); i != m_Gatherers.end(); ++i) - { +CDataGatherer::~CDataGatherer() { + for (TBucketGathererPVecItr i = m_Gatherers.begin(); i != m_Gatherers.end(); ++i) { delete *i; } } -CDataGatherer *CDataGatherer::cloneForPersistence() const -{ +CDataGatherer* CDataGatherer::cloneForPersistence() const { return new CDataGatherer(true, *this); } -model_t::ESummaryMode CDataGatherer::summaryMode() const -{ +model_t::ESummaryMode CDataGatherer::summaryMode() const { return m_SummaryMode; } -model::function_t::EFunction CDataGatherer::function() const -{ +model::function_t::EFunction CDataGatherer::function() const { return function_t::function(this->features()); } -bool CDataGatherer::isPopulation() const -{ +bool CDataGatherer::isPopulation() const { return m_Population; } -std::string CDataGatherer::description() const -{ +std::string CDataGatherer::description() const { return m_Gatherers.front()->description(); } -std::size_t CDataGatherer::maxDimension() const -{ +std::size_t CDataGatherer::maxDimension() const { return std::max(this->numberPeople(), this->numberAttributes()); } -const std::string &CDataGatherer::partitionFieldName() const -{ +const std::string& CDataGatherer::partitionFieldName() const { return m_PartitionFieldName; } -const std::string &CDataGatherer::partitionFieldValue() const -{ +const std::string& CDataGatherer::partitionFieldValue() const { return *m_PartitionFieldValue; } -const CSearchKey &CDataGatherer::searchKey() const -{ +const CSearchKey& CDataGatherer::searchKey() const { return m_SearchKey; } -CDataGatherer::TStrVecCItr CDataGatherer::beginInfluencers() const -{ +CDataGatherer::TStrVecCItr CDataGatherer::beginInfluencers() const { return m_Gatherers.front()->beginInfluencers(); } -CDataGatherer::TStrVecCItr CDataGatherer::endInfluencers() const -{ +CDataGatherer::TStrVecCItr CDataGatherer::endInfluencers() const { return m_Gatherers.front()->endInfluencers(); } -const std::string &CDataGatherer::personFieldName() const -{ +const std::string& CDataGatherer::personFieldName() const { return m_Gatherers.front()->personFieldName(); } -const std::string &CDataGatherer::attributeFieldName() const -{ +const std::string& CDataGatherer::attributeFieldName() const { return m_Gatherers.front()->attributeFieldName(); } -const std::string &CDataGatherer::valueFieldName() const -{ +const std::string& CDataGatherer::valueFieldName() const { return m_Gatherers.front()->valueFieldName(); } -const CDataGatherer::TStrVec &CDataGatherer::fieldsOfInterest() const -{ +const CDataGatherer::TStrVec& CDataGatherer::fieldsOfInterest() const { return m_Gatherers.front()->fieldsOfInterest(); } -std::size_t CDataGatherer::numberByFieldValues() const -{ +std::size_t CDataGatherer::numberByFieldValues() const { return this->isPopulation() ? this->numberActiveAttributes() : this->numberActivePeople(); } -std::size_t CDataGatherer::numberOverFieldValues() const -{ +std::size_t CDataGatherer::numberOverFieldValues() const { return this->isPopulation() ? this->numberActivePeople() : 0; } -bool CDataGatherer::processFields(const TStrCPtrVec &fieldValues, - CEventData &result, - CResourceMonitor &resourceMonitor) -{ +bool CDataGatherer::processFields(const TStrCPtrVec& fieldValues, CEventData& result, CResourceMonitor& resourceMonitor) { return m_Gatherers.front()->processFields(fieldValues, result, resourceMonitor); } -bool CDataGatherer::addArrival(const TStrCPtrVec &fieldValues, - CEventData &data, - CResourceMonitor &resourceMonitor) -{ +bool CDataGatherer::addArrival(const TStrCPtrVec& fieldValues, CEventData& data, CResourceMonitor& resourceMonitor) { // We process fields even if we are in the first partial bucket so that // we add enough extra memory to the resource monitor in order to control // the number of partitions created. m_Gatherers.front()->processFields(fieldValues, data, resourceMonitor); core_t::TTime time = data.time(); - if (time < m_Gatherers.front()->earliestBucketStartTime()) - { + if (time < m_Gatherers.front()->earliestBucketStartTime()) { // Ignore records that are out of the latency window. // Records in an incomplete first bucket will end up here, // but we don't want to model these. @@ -426,102 +371,80 @@ bool CDataGatherer::addArrival(const TStrCPtrVec &fieldValues, } bool result = true; - for (TBucketGathererPVecItr i = m_Gatherers.begin(); i != m_Gatherers.end(); ++i) - { + for (TBucketGathererPVecItr i = m_Gatherers.begin(); i != m_Gatherers.end(); ++i) { result &= (*i)->addEventData(data); } return result; } -void CDataGatherer::sampleNow(core_t::TTime sampleBucketStart) -{ +void CDataGatherer::sampleNow(core_t::TTime sampleBucketStart) { this->chooseBucketGatherer(sampleBucketStart).sampleNow(sampleBucketStart); } -void CDataGatherer::skipSampleNow(core_t::TTime sampleBucketStart) -{ - for (TBucketGathererPVecItr i = m_Gatherers.begin(); i != m_Gatherers.end(); ++i) - { +void CDataGatherer::skipSampleNow(core_t::TTime sampleBucketStart) { + for (TBucketGathererPVecItr i = m_Gatherers.begin(); i != m_Gatherers.end(); ++i) { (*i)->skipSampleNow(sampleBucketStart); } } -std::size_t CDataGatherer::numberFeatures() const -{ +std::size_t CDataGatherer::numberFeatures() const { return m_Features.size(); } -bool CDataGatherer::hasFeature(model_t::EFeature feature) const -{ +bool CDataGatherer::hasFeature(model_t::EFeature feature) const { return std::binary_search(m_Features.begin(), m_Features.end(), feature); } -model_t::EFeature CDataGatherer::feature(std::size_t i) const -{ +model_t::EFeature CDataGatherer::feature(std::size_t i) const { return m_Features[i]; } -const CDataGatherer::TFeatureVec &CDataGatherer::features() const -{ +const CDataGatherer::TFeatureVec& CDataGatherer::features() const { return m_Features; } -std::size_t CDataGatherer::numberActivePeople() const -{ +std::size_t CDataGatherer::numberActivePeople() const { return m_PeopleRegistry.numberActiveNames(); } -std::size_t CDataGatherer::numberPeople() const -{ +std::size_t CDataGatherer::numberPeople() const { return m_PeopleRegistry.numberNames(); } -bool CDataGatherer::personId(const std::string &person, std::size_t &result) const -{ +bool CDataGatherer::personId(const std::string& person, std::size_t& result) const { return m_PeopleRegistry.id(person, result); } -bool CDataGatherer::anyPersonId(std::size_t &result) const -{ +bool CDataGatherer::anyPersonId(std::size_t& result) const { return m_PeopleRegistry.anyId(result); } -const std::string &CDataGatherer::personName(std::size_t pid) const -{ +const std::string& CDataGatherer::personName(std::size_t pid) const { return this->personName(pid, DEFAULT_PERSON_NAME); } -const core::CStoredStringPtr &CDataGatherer::personNamePtr(std::size_t pid) const -{ +const core::CStoredStringPtr& CDataGatherer::personNamePtr(std::size_t pid) const { return m_PeopleRegistry.namePtr(pid); } -const std::string &CDataGatherer::personName(std::size_t pid, - const std::string &fallback) const -{ +const std::string& CDataGatherer::personName(std::size_t pid, const std::string& fallback) const { return m_PeopleRegistry.name(pid, fallback); } -void CDataGatherer::personNonZeroCounts(core_t::TTime time, - TSizeUInt64PrVec &result) const -{ +void CDataGatherer::personNonZeroCounts(core_t::TTime time, TSizeUInt64PrVec& result) const { return this->chooseBucketGatherer(time).personNonZeroCounts(time, result); } -void CDataGatherer::recyclePeople(const TSizeVec &peopleToRemove) -{ - if (peopleToRemove.empty()) - { +void CDataGatherer::recyclePeople(const TSizeVec& peopleToRemove) { + if (peopleToRemove.empty()) { return; } - for (TBucketGathererPVecItr i = m_Gatherers.begin(); i != m_Gatherers.end(); ++i) - { + for (TBucketGathererPVecItr i = m_Gatherers.begin(); i != m_Gatherers.end(); ++i) { (*i)->recyclePeople(peopleToRemove); } - if (!this->isPopulation() && m_SampleCounts) - { + if (!this->isPopulation() && m_SampleCounts) { m_SampleCounts->recycle(peopleToRemove); } @@ -529,91 +452,68 @@ void CDataGatherer::recyclePeople(const TSizeVec &peopleToRemove) core::CStatistics::instance().stat(stat_t::E_NumberPrunedItems).increment(peopleToRemove.size()); } -void CDataGatherer::removePeople(std::size_t lowestPersonToRemove) -{ - if (lowestPersonToRemove >= this->numberPeople()) - { +void CDataGatherer::removePeople(std::size_t lowestPersonToRemove) { + if (lowestPersonToRemove >= this->numberPeople()) { return; } - if (!this->isPopulation() && m_SampleCounts) - { + if (!this->isPopulation() && m_SampleCounts) { m_SampleCounts->remove(lowestPersonToRemove); } - for (TBucketGathererPVecItr i = m_Gatherers.begin(); i != m_Gatherers.end(); ++i) - { + for (TBucketGathererPVecItr i = m_Gatherers.begin(); i != m_Gatherers.end(); ++i) { (*i)->removePeople(lowestPersonToRemove); } m_PeopleRegistry.removeNames(lowestPersonToRemove); } -CDataGatherer::TSizeVec &CDataGatherer::recycledPersonIds() -{ +CDataGatherer::TSizeVec& CDataGatherer::recycledPersonIds() { return m_PeopleRegistry.recycledIds(); } -bool CDataGatherer::isPersonActive(std::size_t pid) const -{ +bool CDataGatherer::isPersonActive(std::size_t pid) const { return m_PeopleRegistry.isIdActive(pid); } -std::size_t CDataGatherer::addPerson(const std::string &person, - CResourceMonitor &resourceMonitor, - bool &addedPerson) -{ - return m_PeopleRegistry.addName(person, - this->chooseBucketGatherer(0).currentBucketStartTime(), - resourceMonitor, - addedPerson); +std::size_t CDataGatherer::addPerson(const std::string& person, CResourceMonitor& resourceMonitor, bool& addedPerson) { + return m_PeopleRegistry.addName(person, this->chooseBucketGatherer(0).currentBucketStartTime(), resourceMonitor, addedPerson); } -std::size_t CDataGatherer::numberActiveAttributes() const -{ +std::size_t CDataGatherer::numberActiveAttributes() const { return m_AttributesRegistry.numberActiveNames(); } -std::size_t CDataGatherer::numberAttributes() const -{ +std::size_t CDataGatherer::numberAttributes() const { return m_AttributesRegistry.numberNames(); } -bool CDataGatherer::attributeId(const std::string &attribute, std::size_t &result) const -{ +bool CDataGatherer::attributeId(const std::string& attribute, std::size_t& result) const { return m_AttributesRegistry.id(attribute, result); } -const std::string &CDataGatherer::attributeName(std::size_t cid) const -{ +const std::string& CDataGatherer::attributeName(std::size_t cid) const { return this->attributeName(cid, DEFAULT_ATTRIBUTE_NAME); } -const std::string &CDataGatherer::attributeName(std::size_t cid, - const std::string &fallback) const -{ +const std::string& CDataGatherer::attributeName(std::size_t cid, const std::string& fallback) const { return m_AttributesRegistry.name(cid, fallback); } -const core::CStoredStringPtr &CDataGatherer::attributeNamePtr(std::size_t cid) const -{ +const core::CStoredStringPtr& CDataGatherer::attributeNamePtr(std::size_t cid) const { return m_AttributesRegistry.namePtr(cid); } -void CDataGatherer::recycleAttributes(const TSizeVec &attributesToRemove) -{ - if (attributesToRemove.empty()) - { +void CDataGatherer::recycleAttributes(const TSizeVec& attributesToRemove) { + if (attributesToRemove.empty()) { return; } - if (this->isPopulation() && m_SampleCounts) - { + if (this->isPopulation() && m_SampleCounts) { m_SampleCounts->recycle(attributesToRemove); } - for (TBucketGathererPVecItr i = m_Gatherers.begin(); i != m_Gatherers.end(); ++i) - { + for (TBucketGathererPVecItr i = m_Gatherers.begin(); i != m_Gatherers.end(); ++i) { (*i)->recycleAttributes(attributesToRemove); } @@ -621,154 +521,114 @@ void CDataGatherer::recycleAttributes(const TSizeVec &attributesToRemove) core::CStatistics::instance().stat(stat_t::E_NumberPrunedItems).increment(attributesToRemove.size()); } -void CDataGatherer::removeAttributes(std::size_t lowestAttributeToRemove) -{ - if (lowestAttributeToRemove >= this->numberAttributes()) - { +void CDataGatherer::removeAttributes(std::size_t lowestAttributeToRemove) { + if (lowestAttributeToRemove >= this->numberAttributes()) { return; } - if (this->isPopulation() && m_SampleCounts) - { + if (this->isPopulation() && m_SampleCounts) { m_SampleCounts->remove(lowestAttributeToRemove); } - for (TBucketGathererPVecItr i = m_Gatherers.begin(); i != m_Gatherers.end(); ++i) - { + for (TBucketGathererPVecItr i = m_Gatherers.begin(); i != m_Gatherers.end(); ++i) { (*i)->removeAttributes(lowestAttributeToRemove); } m_AttributesRegistry.removeNames(lowestAttributeToRemove); } -CDataGatherer::TSizeVec &CDataGatherer::recycledAttributeIds() -{ +CDataGatherer::TSizeVec& CDataGatherer::recycledAttributeIds() { return m_AttributesRegistry.recycledIds(); } -bool CDataGatherer::isAttributeActive(std::size_t cid) const -{ +bool CDataGatherer::isAttributeActive(std::size_t cid) const { return m_AttributesRegistry.isIdActive(cid); } -std::size_t CDataGatherer::addAttribute(const std::string &attribute, - CResourceMonitor &resourceMonitor, - bool &addedAttribute) -{ - return m_AttributesRegistry.addName(attribute, - this->chooseBucketGatherer(0).currentBucketStartTime(), - resourceMonitor, - addedAttribute); +std::size_t CDataGatherer::addAttribute(const std::string& attribute, CResourceMonitor& resourceMonitor, bool& addedAttribute) { + return m_AttributesRegistry.addName(attribute, this->chooseBucketGatherer(0).currentBucketStartTime(), resourceMonitor, addedAttribute); } -double CDataGatherer::sampleCount(std::size_t id) const -{ - if (m_SampleCounts) - { +double CDataGatherer::sampleCount(std::size_t id) const { + if (m_SampleCounts) { return static_cast(m_SampleCounts->count(id)); - } - else - { + } else { LOG_ERROR("Sample count for non-metric gatherer"); return 0.0; } } -double CDataGatherer::effectiveSampleCount(std::size_t id) const -{ - if (m_SampleCounts) - { +double CDataGatherer::effectiveSampleCount(std::size_t id) const { + if (m_SampleCounts) { return m_SampleCounts->effectiveSampleCount(id); - } - else - { + } else { LOG_ERROR("Effective sample count for non-metric gatherer"); return 0.0; } } -void CDataGatherer::resetSampleCount(std::size_t id) -{ - if (m_SampleCounts) - { +void CDataGatherer::resetSampleCount(std::size_t id) { + if (m_SampleCounts) { m_SampleCounts->resetSampleCount(*this, id); } } -CDataGatherer::TSampleCountsPtr CDataGatherer::sampleCounts() const -{ +CDataGatherer::TSampleCountsPtr CDataGatherer::sampleCounts() const { return m_SampleCounts; } // Be careful here! -core_t::TTime CDataGatherer::currentBucketStartTime() const -{ +core_t::TTime CDataGatherer::currentBucketStartTime() const { return m_Gatherers.front()->currentBucketStartTime(); } // Be careful here! -void CDataGatherer::currentBucketStartTime(core_t::TTime bucketStart) -{ +void CDataGatherer::currentBucketStartTime(core_t::TTime bucketStart) { m_Gatherers[0]->currentBucketStartTime(bucketStart); - if (m_Gatherers.size() > 1) - { + if (m_Gatherers.size() > 1) { m_Gatherers[1]->currentBucketStartTime(bucketStart - (m_Gatherers[1]->bucketLength() / 2)); } } -core_t::TTime CDataGatherer::bucketLength() const -{ +core_t::TTime CDataGatherer::bucketLength() const { return m_Gatherers.front()->bucketLength(); } -bool CDataGatherer::dataAvailable(core_t::TTime time) const -{ +bool CDataGatherer::dataAvailable(core_t::TTime time) const { return this->chooseBucketGatherer(time).dataAvailable(time); } -bool CDataGatherer::validateSampleTimes(core_t::TTime &startTime, - core_t::TTime endTime) const -{ +bool CDataGatherer::validateSampleTimes(core_t::TTime& startTime, core_t::TTime endTime) const { return this->chooseBucketGatherer(startTime).validateSampleTimes(startTime, endTime); } -void CDataGatherer::timeNow(core_t::TTime time) -{ - for (TBucketGathererPVecItr i = m_Gatherers.begin(); i != m_Gatherers.end(); ++i) - { +void CDataGatherer::timeNow(core_t::TTime time) { + for (TBucketGathererPVecItr i = m_Gatherers.begin(); i != m_Gatherers.end(); ++i) { (*i)->timeNow(time); } } -std::string CDataGatherer::printCurrentBucket(core_t::TTime time) const -{ +std::string CDataGatherer::printCurrentBucket(core_t::TTime time) const { return this->chooseBucketGatherer(time).printCurrentBucket(); } -const CDataGatherer::TSizeSizePrUInt64UMap & - CDataGatherer::bucketCounts(core_t::TTime time) const -{ +const CDataGatherer::TSizeSizePrUInt64UMap& CDataGatherer::bucketCounts(core_t::TTime time) const { return this->chooseBucketGatherer(time).bucketCounts(time); } -const CDataGatherer::TSizeSizePrStoredStringPtrPrUInt64UMapVec & - CDataGatherer::influencerCounts(core_t::TTime time) const -{ +const CDataGatherer::TSizeSizePrStoredStringPtrPrUInt64UMapVec& CDataGatherer::influencerCounts(core_t::TTime time) const { return this->chooseBucketGatherer(time).influencerCounts(time); } -uint64_t CDataGatherer::checksum() const -{ +uint64_t CDataGatherer::checksum() const { uint64_t result = m_PeopleRegistry.checksum(); result = maths::CChecksum::calculate(result, m_AttributesRegistry); result = maths::CChecksum::calculate(result, m_SummaryMode); result = maths::CChecksum::calculate(result, m_Features); - if (m_SampleCounts) - { + if (m_SampleCounts) { result = maths::CChecksum::calculate(result, m_SampleCounts->checksum(*this)); } - for (TBucketGathererPVecCItr i = m_Gatherers.begin(); i != m_Gatherers.end(); ++i) - { + for (TBucketGathererPVecCItr i = m_Gatherers.begin(); i != m_Gatherers.end(); ++i) { result = maths::CChecksum::calculate(result, (*i)->checksum()); } @@ -777,126 +637,93 @@ uint64_t CDataGatherer::checksum() const return result; } -void CDataGatherer::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CDataGatherer::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CDataGatherer"); core::CMemoryDebug::dynamicSize("m_Features", m_Features, mem); core::CMemoryDebug::dynamicSize("m_PeopleRegistry", m_PeopleRegistry, mem); core::CMemoryDebug::dynamicSize("m_AttributesRegistry", m_AttributesRegistry, mem); core::CMemoryDebug::dynamicSize("m_PartitionFieldName", m_PartitionFieldName, mem); core::CMemoryDebug::dynamicSize("m_SampleCounts", m_SampleCounts, mem); - for (TBucketGathererPVecCItr i = m_Gatherers.begin(); i != m_Gatherers.end(); ++i) - { + for (TBucketGathererPVecCItr i = m_Gatherers.begin(); i != m_Gatherers.end(); ++i) { core::CMemoryDebug::dynamicSize("BucketGatherer", *(*i), mem); } } -std::size_t CDataGatherer::memoryUsage() const -{ - std::size_t mem = core::CMemory::dynamicSize(m_Features); +std::size_t CDataGatherer::memoryUsage() const { + std::size_t mem = core::CMemory::dynamicSize(m_Features); mem += core::CMemory::dynamicSize(m_PeopleRegistry); mem += core::CMemory::dynamicSize(m_AttributesRegistry); mem += core::CMemory::dynamicSize(m_PartitionFieldName); mem += core::CMemory::dynamicSize(m_SampleCounts); - for (TBucketGathererPVecCItr i = m_Gatherers.begin(); i != m_Gatherers.end(); ++i) - { + for (TBucketGathererPVecCItr i = m_Gatherers.begin(); i != m_Gatherers.end(); ++i) { mem += core::CMemory::dynamicSize(*(*i)); } return mem; } -bool CDataGatherer::useNull() const -{ +bool CDataGatherer::useNull() const { return m_UseNull; } -void CDataGatherer::clear() -{ +void CDataGatherer::clear() { m_PeopleRegistry.clear(); m_AttributesRegistry.clear(); - if (m_SampleCounts) - { + if (m_SampleCounts) { m_SampleCounts->clear(); } - for (TBucketGathererPVecItr i = m_Gatherers.begin(); i != m_Gatherers.end(); ++i) - { + for (TBucketGathererPVecItr i = m_Gatherers.begin(); i != m_Gatherers.end(); ++i) { (*i)->clear(); } } -bool CDataGatherer::resetBucket(core_t::TTime bucketStart) -{ +bool CDataGatherer::resetBucket(core_t::TTime bucketStart) { bool result = true; - for (TBucketGathererPVecItr i = m_Gatherers.begin(); i != m_Gatherers.end(); ++i) - { + for (TBucketGathererPVecItr i = m_Gatherers.begin(); i != m_Gatherers.end(); ++i) { result &= (*i)->resetBucket(bucketStart); } return result; } -void CDataGatherer::releaseMemory(core_t::TTime samplingCutoffTime) -{ - if (this->isPopulation()) - { - for (auto &gatherer : m_Gatherers) - { +void CDataGatherer::releaseMemory(core_t::TTime samplingCutoffTime) { + if (this->isPopulation()) { + for (auto& gatherer : m_Gatherers) { gatherer->releaseMemory(samplingCutoffTime); } } } -const SModelParams &CDataGatherer::params() const -{ +const SModelParams& CDataGatherer::params() const { return m_Params; } -void CDataGatherer::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ - for (std::size_t i = 0u; i < m_Features.size(); ++i) - { +void CDataGatherer::acceptPersistInserter(core::CStatePersistInserter& inserter) const { + for (std::size_t i = 0u; i < m_Features.size(); ++i) { inserter.insertValue(FEATURE_TAG, static_cast(m_Features[i])); } - inserter.insertLevel(PEOPLE_REGISTRY_TAG, - boost::bind(&CDynamicStringIdRegistry::acceptPersistInserter, - m_PeopleRegistry, _1)); - inserter.insertLevel(ATTRIBUTES_REGISTRY_TAG, - boost::bind(&CDynamicStringIdRegistry::acceptPersistInserter, - m_AttributesRegistry, _1)); - - if (m_SampleCounts) - { - inserter.insertLevel(SAMPLE_COUNTS_TAG, - boost::bind(&CSampleCounts::acceptPersistInserter, - m_SampleCounts.get(), - _1)); + inserter.insertLevel(PEOPLE_REGISTRY_TAG, boost::bind(&CDynamicStringIdRegistry::acceptPersistInserter, m_PeopleRegistry, _1)); + inserter.insertLevel(ATTRIBUTES_REGISTRY_TAG, boost::bind(&CDynamicStringIdRegistry::acceptPersistInserter, m_AttributesRegistry, _1)); + + if (m_SampleCounts) { + inserter.insertLevel(SAMPLE_COUNTS_TAG, boost::bind(&CSampleCounts::acceptPersistInserter, m_SampleCounts.get(), _1)); } - inserter.insertLevel(BUCKET_GATHERER_TAG, - boost::bind(&CDataGatherer::persistBucketGatherers, - this, - _1)); + inserter.insertLevel(BUCKET_GATHERER_TAG, boost::bind(&CDataGatherer::persistBucketGatherers, this, _1)); } -bool CDataGatherer::determineMetricCategory(TMetricCategoryVec &fieldMetricCategories) const -{ - if (m_Features.empty()) - { +bool CDataGatherer::determineMetricCategory(TMetricCategoryVec& fieldMetricCategories) const { + if (m_Features.empty()) { LOG_WARN("No features to determine metric category from"); return false; } - if (m_Features.size() > 1) - { + if (m_Features.size() > 1) { LOG_WARN(m_Features.size() << " features to determine metric category " - "from - only the first will be used"); + "from - only the first will be used"); } model_t::EMetricCategory result; - if (model_t::metricCategory(m_Features.front(), result) == false) - { - LOG_ERROR("Unable to map feature " - << model_t::print(m_Features.front()) - << " to a metric category"); + if (model_t::metricCategory(m_Features.front(), result) == false) { + LOG_ERROR("Unable to map feature " << model_t::print(m_Features.front()) << " to a metric category"); return false; } @@ -905,12 +732,8 @@ bool CDataGatherer::determineMetricCategory(TMetricCategoryVec &fieldMetricCateg return true; } -bool CDataGatherer::extractCountFromField(const std::string &fieldName, - const std::string *fieldValue, - std::size_t &count) const -{ - if (fieldValue == 0) - { +bool CDataGatherer::extractCountFromField(const std::string& fieldName, const std::string* fieldValue, std::size_t& count) const { + if (fieldValue == 0) { // Treat not present as explicit null count = EXPLICIT_NULL_SUMMARY_COUNT; return true; @@ -918,15 +741,13 @@ bool CDataGatherer::extractCountFromField(const std::string &fieldName, std::string fieldValueCopy(*fieldValue); core::CStringUtils::trimWhitespace(fieldValueCopy); - if (fieldValueCopy.empty() || fieldValueCopy == EXPLICIT_NULL) - { + if (fieldValueCopy.empty() || fieldValueCopy == EXPLICIT_NULL) { count = EXPLICIT_NULL_SUMMARY_COUNT; return true; } double count_; - if (core::CStringUtils::stringToType(fieldValueCopy, count_) == false || count_ < 0.0) - { + if (core::CStringUtils::stringToType(fieldValueCopy, count_) == false || count_ < 0.0) { LOG_ERROR("Unable to extract count " << fieldName << " from " << fieldValueCopy); return false; } @@ -936,60 +757,48 @@ bool CDataGatherer::extractCountFromField(const std::string &fieldName, return count > 0; } -bool CDataGatherer::extractMetricFromField(const std::string &fieldName, - std::string fieldValue, - TDouble1Vec &result) const -{ +bool CDataGatherer::extractMetricFromField(const std::string& fieldName, std::string fieldValue, TDouble1Vec& result) const { result.clear(); core::CStringUtils::trimWhitespace(fieldValue); - if (fieldValue.empty()) - { + if (fieldValue.empty()) { LOG_WARN("Configured metric " << fieldName << " not present in event"); return false; } - const std::string &delimiter = m_Params.get().s_MultivariateComponentDelimiter; + const std::string& delimiter = m_Params.get().s_MultivariateComponentDelimiter; // Split the string up by the delimiter and parse each token separately. std::size_t first = 0u; - do - { + do { std::size_t last = fieldValue.find(delimiter, first); double value; // Avoid a string duplication in the (common) case of only one value - bool convertedOk = (first == 0 && last == std::string::npos) ? - core::CStringUtils::stringToType(fieldValue, - value) : - core::CStringUtils::stringToType(fieldValue.substr(first, last - first), - value); - if (!convertedOk) - { + bool convertedOk = (first == 0 && last == std::string::npos) + ? core::CStringUtils::stringToType(fieldValue, value) + : core::CStringUtils::stringToType(fieldValue.substr(first, last - first), value); + if (!convertedOk) { LOG_ERROR("Unable to extract " << fieldName << " from " << fieldValue); result.clear(); return false; } - if (maths::CMathsFuncs::isFinite(value) == false) - { + if (maths::CMathsFuncs::isFinite(value) == false) { LOG_ERROR("Bad value for " << fieldName << " from " << fieldValue); result.clear(); return false; } result.push_back(value); first = last + (last != std::string::npos ? delimiter.length() : 0); - } - while (first != std::string::npos); + } while (first != std::string::npos); return true; } -core_t::TTime CDataGatherer::earliestBucketStartTime() const -{ +core_t::TTime CDataGatherer::earliestBucketStartTime() const { return m_Gatherers.front()->earliestBucketStartTime(); } -bool CDataGatherer::checkInvariants() const -{ +bool CDataGatherer::checkInvariants() const { LOG_DEBUG("Checking invariants for people registry"); bool result = m_PeopleRegistry.checkInvariants(); LOG_DEBUG("Checking invariants for attributes registry"); @@ -997,42 +806,33 @@ bool CDataGatherer::checkInvariants() const return result; } -const CBucketGatherer &CDataGatherer::chooseBucketGatherer(core_t::TTime time) const -{ +const CBucketGatherer& CDataGatherer::chooseBucketGatherer(core_t::TTime time) const { return const_cast(this)->chooseBucketGatherer(time); } -CBucketGatherer &CDataGatherer::chooseBucketGatherer(core_t::TTime time) -{ +CBucketGatherer& CDataGatherer::chooseBucketGatherer(core_t::TTime time) { core_t::TTime bucketLength = m_Gatherers.front()->bucketLength(); - if ((m_Gatherers.size() > 1) && (time % bucketLength != 0)) - { + if ((m_Gatherers.size() > 1) && (time % bucketLength != 0)) { return *m_Gatherers[1]; - } - else - { + } else { return *m_Gatherers[0]; } } -bool CDataGatherer::acceptRestoreTraverser(const std::string &summaryCountFieldName, - const std::string &personFieldName, - const std::string &attributeFieldName, - const std::string &valueFieldName, - const TStrVec &influenceFieldNames, - core::CStateRestoreTraverser &traverser) -{ +bool CDataGatherer::acceptRestoreTraverser(const std::string& summaryCountFieldName, + const std::string& personFieldName, + const std::string& attributeFieldName, + const std::string& valueFieldName, + const TStrVec& influenceFieldNames, + core::CStateRestoreTraverser& traverser) { this->clear(); m_Features.clear(); - do - { - const std::string &name = traverser.name(); - if (name == FEATURE_TAG) - { + do { + const std::string& name = traverser.name(); + if (name == FEATURE_TAG) { int feature(-1); - if (core::CStringUtils::stringToType(traverser.value(), feature) == false || feature < 0) - { + if (core::CStringUtils::stringToType(traverser.value(), feature) == false || feature < 0) { LOG_ERROR("Invalid feature in " << traverser.value()); return false; } @@ -1040,16 +840,12 @@ bool CDataGatherer::acceptRestoreTraverser(const std::string &summaryCountFieldN continue; } RESTORE(PEOPLE_REGISTRY_TAG, - traverser.traverseSubLevel(boost::bind(&CDynamicStringIdRegistry::acceptRestoreTraverser, - &m_PeopleRegistry, _1))) + traverser.traverseSubLevel(boost::bind(&CDynamicStringIdRegistry::acceptRestoreTraverser, &m_PeopleRegistry, _1))) RESTORE(ATTRIBUTES_REGISTRY_TAG, - traverser.traverseSubLevel(boost::bind(&CDynamicStringIdRegistry::acceptRestoreTraverser, - &m_AttributesRegistry, _1))) + traverser.traverseSubLevel(boost::bind(&CDynamicStringIdRegistry::acceptRestoreTraverser, &m_AttributesRegistry, _1))) RESTORE_SETUP_TEARDOWN(SAMPLE_COUNTS_TAG, m_SampleCounts.reset(new CSampleCounts(0)), - traverser.traverseSubLevel(boost::bind(&CSampleCounts::acceptRestoreTraverser, - m_SampleCounts.get(), - _1)), + traverser.traverseSubLevel(boost::bind(&CSampleCounts::acceptRestoreTraverser, m_SampleCounts.get(), _1)), /**/) RESTORE(BUCKET_GATHERER_TAG, traverser.traverseSubLevel(boost::bind(&CDataGatherer::restoreBucketGatherer, @@ -1060,119 +856,79 @@ bool CDataGatherer::acceptRestoreTraverser(const std::string &summaryCountFieldN boost::cref(valueFieldName), boost::cref(influenceFieldNames), _1))) - } - while (traverser.next()); + } while (traverser.next()); return true; } -bool CDataGatherer::restoreBucketGatherer(const std::string &summaryCountFieldName, - const std::string &personFieldName, - const std::string &attributeFieldName, - const std::string &valueFieldName, - const TStrVec &influenceFieldNames, - core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name = traverser.name(); - if (name == CBucketGatherer::EVENTRATE_BUCKET_GATHERER_TAG) - { - CEventRateBucketGatherer *gatherer = new CEventRateBucketGatherer(*this, - summaryCountFieldName, - personFieldName, - attributeFieldName, - valueFieldName, - influenceFieldNames, - traverser); - - if (gatherer == 0) - { +bool CDataGatherer::restoreBucketGatherer(const std::string& summaryCountFieldName, + const std::string& personFieldName, + const std::string& attributeFieldName, + const std::string& valueFieldName, + const TStrVec& influenceFieldNames, + core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); + if (name == CBucketGatherer::EVENTRATE_BUCKET_GATHERER_TAG) { + CEventRateBucketGatherer* gatherer = new CEventRateBucketGatherer( + *this, summaryCountFieldName, personFieldName, attributeFieldName, valueFieldName, influenceFieldNames, traverser); + + if (gatherer == 0) { LOG_ERROR("Failed to create gatherer"); return false; } m_Gatherers.push_back(gatherer); - } - else if (name == CBucketGatherer::METRIC_BUCKET_GATHERER_TAG) - { - CMetricBucketGatherer *gatherer = new CMetricBucketGatherer(*this, - summaryCountFieldName, - personFieldName, - attributeFieldName, - valueFieldName, - influenceFieldNames, - traverser); - if (gatherer == 0) - { + } else if (name == CBucketGatherer::METRIC_BUCKET_GATHERER_TAG) { + CMetricBucketGatherer* gatherer = new CMetricBucketGatherer( + *this, summaryCountFieldName, personFieldName, attributeFieldName, valueFieldName, influenceFieldNames, traverser); + if (gatherer == 0) { LOG_ERROR("Failed to create gatherer"); return false; } m_Gatherers.push_back(gatherer); } - } - while (traverser.next()); + } while (traverser.next()); return true; } -void CDataGatherer::persistBucketGatherers(core::CStatePersistInserter &inserter) const -{ - for (TBucketGathererPVecCItr i = m_Gatherers.begin(); i != m_Gatherers.end(); ++i) - { - const std::string &tag = (*i)->persistenceTag(); +void CDataGatherer::persistBucketGatherers(core::CStatePersistInserter& inserter) const { + for (TBucketGathererPVecCItr i = m_Gatherers.begin(); i != m_Gatherers.end(); ++i) { + const std::string& tag = (*i)->persistenceTag(); - if (tag == CBucketGatherer::EVENTRATE_BUCKET_GATHERER_TAG) - { - CEventRateBucketGatherer *const gatherer = dynamic_cast(*i); - inserter.insertLevel(tag, boost::bind(&CEventRateBucketGatherer::acceptPersistInserter, - boost::cref(gatherer), _1)); - } - else if (tag == CBucketGatherer::METRIC_BUCKET_GATHERER_TAG) - { - CMetricBucketGatherer *const gatherer = dynamic_cast(*i); - inserter.insertLevel(tag, boost::bind(&CMetricBucketGatherer::acceptPersistInserter, - boost::cref(gatherer), _1)); + if (tag == CBucketGatherer::EVENTRATE_BUCKET_GATHERER_TAG) { + CEventRateBucketGatherer* const gatherer = dynamic_cast(*i); + inserter.insertLevel(tag, boost::bind(&CEventRateBucketGatherer::acceptPersistInserter, boost::cref(gatherer), _1)); + } else if (tag == CBucketGatherer::METRIC_BUCKET_GATHERER_TAG) { + CMetricBucketGatherer* const gatherer = dynamic_cast(*i); + inserter.insertLevel(tag, boost::bind(&CMetricBucketGatherer::acceptPersistInserter, boost::cref(gatherer), _1)); } } } void CDataGatherer::createBucketGatherer(model_t::EAnalysisCategory gathererType, - const std::string &summaryCountFieldName, - const std::string &personFieldName, - const std::string &attributeFieldName, - const std::string &valueFieldName, - const TStrVec &influenceFieldNames, + const std::string& summaryCountFieldName, + const std::string& personFieldName, + const std::string& attributeFieldName, + const std::string& valueFieldName, + const TStrVec& influenceFieldNames, core_t::TTime startTime, - unsigned int sampleCountOverride) -{ - switch (gathererType) - { - case model_t::E_EventRate: - case model_t::E_PopulationEventRate: - case model_t::E_PeersEventRate: - m_Gatherers.push_back(new CEventRateBucketGatherer(*this, - summaryCountFieldName, - personFieldName, - attributeFieldName, - valueFieldName, - influenceFieldNames, - startTime)); - break; - case model_t::E_Metric: - case model_t::E_PopulationMetric: - case model_t::E_PeersMetric: - m_SampleCounts.reset(new CSampleCounts(sampleCountOverride)); - m_Gatherers.push_back(new CMetricBucketGatherer(*this, - summaryCountFieldName, - personFieldName, - attributeFieldName, - valueFieldName, - influenceFieldNames, - startTime)); - break; + unsigned int sampleCountOverride) { + switch (gathererType) { + case model_t::E_EventRate: + case model_t::E_PopulationEventRate: + case model_t::E_PeersEventRate: + m_Gatherers.push_back(new CEventRateBucketGatherer( + *this, summaryCountFieldName, personFieldName, attributeFieldName, valueFieldName, influenceFieldNames, startTime)); + break; + case model_t::E_Metric: + case model_t::E_PopulationMetric: + case model_t::E_PeersMetric: + m_SampleCounts.reset(new CSampleCounts(sampleCountOverride)); + m_Gatherers.push_back(new CMetricBucketGatherer( + *this, summaryCountFieldName, personFieldName, attributeFieldName, valueFieldName, influenceFieldNames, startTime)); + break; } } - - } } diff --git a/lib/model/CDetectionRule.cc b/lib/model/CDetectionRule.cc index eb0e223158..6e5eb20d59 100644 --- a/lib/model/CDetectionRule.cc +++ b/lib/model/CDetectionRule.cc @@ -9,142 +9,105 @@ #include #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { CDetectionRule::CDetectionRule() - : m_Action(E_FilterResults), - m_Conditions(), - m_ConditionsConnective(E_Or), - m_TargetFieldName(), - m_TargetFieldValue() -{ + : m_Action(E_FilterResults), m_Conditions(), m_ConditionsConnective(E_Or), m_TargetFieldName(), m_TargetFieldValue() { m_Conditions.reserve(1); } -void CDetectionRule::action(int action) -{ +void CDetectionRule::action(int action) { m_Action = action; } -void CDetectionRule::conditionsConnective(EConditionsConnective connective) -{ +void CDetectionRule::conditionsConnective(EConditionsConnective connective) { m_ConditionsConnective = connective; } -void CDetectionRule::addCondition(const CRuleCondition &condition) -{ +void CDetectionRule::addCondition(const CRuleCondition& condition) { m_Conditions.push_back(condition); } -void CDetectionRule::targetFieldName(const std::string &targetFieldName) -{ +void CDetectionRule::targetFieldName(const std::string& targetFieldName) { m_TargetFieldName = targetFieldName; } -void CDetectionRule::targetFieldValue(const std::string &targetFieldValue) -{ +void CDetectionRule::targetFieldValue(const std::string& targetFieldValue) { m_TargetFieldValue = targetFieldValue; } bool CDetectionRule::apply(ERuleAction action, - const CAnomalyDetectorModel &model, + const CAnomalyDetectorModel& model, model_t::EFeature feature, - const model_t::CResultType &resultType, + const model_t::CResultType& resultType, std::size_t pid, std::size_t cid, - core_t::TTime time) const -{ - if (!(m_Action & action)) - { + core_t::TTime time) const { + if (!(m_Action & action)) { return false; } - if (this->isInScope(model, pid, cid) == false) - { + if (this->isInScope(model, pid, cid) == false) { return false; } - for (std::size_t i = 0; i < m_Conditions.size(); ++i) - { - bool conditionResult = m_Conditions[i].test( - model, feature, resultType, !m_TargetFieldName.empty(), pid, cid, time); - switch (m_ConditionsConnective) - { - case E_Or: - if (conditionResult == true) - { - return true; - } - break; - case E_And: - if (conditionResult == false) - { - return false; - } - break; + for (std::size_t i = 0; i < m_Conditions.size(); ++i) { + bool conditionResult = m_Conditions[i].test(model, feature, resultType, !m_TargetFieldName.empty(), pid, cid, time); + switch (m_ConditionsConnective) { + case E_Or: + if (conditionResult == true) { + return true; + } + break; + case E_And: + if (conditionResult == false) { + return false; + } + break; } } - switch (m_ConditionsConnective) - { - case E_Or: - return false; - case E_And: - return true; + switch (m_ConditionsConnective) { + case E_Or: + return false; + case E_And: + return true; } return false; } -bool CDetectionRule::isInScope(const CAnomalyDetectorModel &model, - std::size_t pid, - std::size_t cid) const -{ - if (m_TargetFieldName.empty() || m_TargetFieldValue.empty()) - { +bool CDetectionRule::isInScope(const CAnomalyDetectorModel& model, std::size_t pid, std::size_t cid) const { + if (m_TargetFieldName.empty() || m_TargetFieldValue.empty()) { return true; } - const CDataGatherer &gatherer = model.dataGatherer(); - if (m_TargetFieldName == gatherer.partitionFieldName()) - { + const CDataGatherer& gatherer = model.dataGatherer(); + if (m_TargetFieldName == gatherer.partitionFieldName()) { return m_TargetFieldValue == gatherer.partitionFieldValue(); - } - else if (m_TargetFieldName == gatherer.personFieldName()) - { + } else if (m_TargetFieldName == gatherer.personFieldName()) { return m_TargetFieldValue == gatherer.personName(pid); - } - else if (m_TargetFieldName == gatherer.attributeFieldName()) - { + } else if (m_TargetFieldName == gatherer.attributeFieldName()) { return m_TargetFieldValue == gatherer.attributeName(cid); - } - else - { + } else { LOG_ERROR("Unexpected targetFieldName = " << m_TargetFieldName); } return false; } -std::string CDetectionRule::print() const -{ +std::string CDetectionRule::print() const { std::string result = this->printAction(); - if (m_TargetFieldName.empty() == false) - { + if (m_TargetFieldName.empty() == false) { result += " (" + m_TargetFieldName; - if (m_TargetFieldValue.empty() == false) - { + if (m_TargetFieldValue.empty() == false) { result += ":" + m_TargetFieldValue; } result += ")"; } result += " IF "; - for (std::size_t i = 0; i < m_Conditions.size(); ++i) - { + for (std::size_t i = 0; i < m_Conditions.size(); ++i) { result += m_Conditions[i].print(); - if (i < m_Conditions.size() - 1) - { + if (i < m_Conditions.size() - 1) { result += " "; result += this->printConditionsConnective(); result += " "; @@ -153,17 +116,13 @@ std::string CDetectionRule::print() const return result; } -std::string CDetectionRule::printAction() const -{ +std::string CDetectionRule::printAction() const { std::string result; - if (E_FilterResults & m_Action) - { + if (E_FilterResults & m_Action) { result += "FILTER_RESULTS"; } - if (E_SkipSampling & m_Action) - { - if (result.empty() == false) - { + if (E_SkipSampling & m_Action) { + if (result.empty() == false) { result += " AND "; } result += "SKIP_SAMPLING"; @@ -171,17 +130,14 @@ std::string CDetectionRule::printAction() const return result; } -std::string CDetectionRule::printConditionsConnective() const -{ - switch (m_ConditionsConnective) - { - case E_And: - return "AND"; - case E_Or: - return "OR"; +std::string CDetectionRule::printConditionsConnective() const { + switch (m_ConditionsConnective) { + case E_And: + return "AND"; + case E_Or: + return "OR"; } return std::string(); } - } } diff --git a/lib/model/CDetectorEqualizer.cc b/lib/model/CDetectorEqualizer.cc index 5272e4f40e..77406794ec 100644 --- a/lib/model/CDetectorEqualizer.cc +++ b/lib/model/CDetectorEqualizer.cc @@ -11,58 +11,44 @@ #include #include -#include #include +#include #include #include -namespace ml -{ -namespace model -{ -namespace -{ +namespace ml { +namespace model { +namespace { const std::string DETECTOR_TAG("a"); const std::string SKETCH_TAG("b"); const std::string EMPTY_TAG("c"); } -void CDetectorEqualizer::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ - if (m_Sketches.empty()) - { +void CDetectorEqualizer::acceptPersistInserter(core::CStatePersistInserter& inserter) const { + if (m_Sketches.empty()) { inserter.insertValue(EMPTY_TAG, " "); } - for (const auto &sketch : m_Sketches) - { + for (const auto& sketch : m_Sketches) { inserter.insertValue(DETECTOR_TAG, sketch.first); - inserter.insertLevel(SKETCH_TAG, boost::bind(&maths::CQuantileSketch::acceptPersistInserter, - boost::cref(sketch.second), _1)); + inserter.insertLevel(SKETCH_TAG, boost::bind(&maths::CQuantileSketch::acceptPersistInserter, boost::cref(sketch.second), _1)); } } -bool CDetectorEqualizer::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ +bool CDetectorEqualizer::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { boost::optional detector; - do - { - const std::string &name = traverser.name(); - RESTORE_SETUP_TEARDOWN(DETECTOR_TAG, - detector.reset(0), - core::CStringUtils::stringToType(traverser.value(), *detector), + do { + const std::string& name = traverser.name(); + RESTORE_SETUP_TEARDOWN(DETECTOR_TAG, detector.reset(0), core::CStringUtils::stringToType(traverser.value(), *detector), /**/) - if (name == SKETCH_TAG) - { - if (!detector) - { + if (name == SKETCH_TAG) { + if (!detector) { LOG_ERROR("Expected the detector label first"); return false; } m_Sketches.emplace_back(*detector, maths::CQuantileSketch(SKETCH_INTERPOLATION, SKETCH_SIZE)); - if (traverser.traverseSubLevel(boost::bind(&maths::CQuantileSketch::acceptRestoreTraverser, - boost::ref(m_Sketches.back().second), _1)) == false) - { + if (traverser.traverseSubLevel( + boost::bind(&maths::CQuantileSketch::acceptRestoreTraverser, boost::ref(m_Sketches.back().second), _1)) == false) { LOG_ERROR("Failed to restore SKETCH_TAG, got " << traverser.value()); m_Sketches.pop_back(); return false; @@ -70,31 +56,25 @@ bool CDetectorEqualizer::acceptRestoreTraverser(core::CStateRestoreTraverser &tr detector.reset(); continue; } - } - while (traverser.next()); + } while (traverser.next()); return true; } -void CDetectorEqualizer::add(int detector, double probability) -{ +void CDetectorEqualizer::add(int detector, double probability) { double logp = -maths::CTools::fastLog(probability); this->sketch(detector).add(logp); } -double CDetectorEqualizer::correct(int detector, double probability) -{ +double CDetectorEqualizer::correct(int detector, double probability) { LOG_TRACE("# detectors = " << m_Sketches.size()); - if (m_Sketches.size() == 1) - { + if (m_Sketches.size() == 1) { return probability; } - const maths::CQuantileSketch &sketch = this->sketch(detector); + const maths::CQuantileSketch& sketch = this->sketch(detector); - for (const auto &sketch_ : m_Sketches) - { - if (sketch_.second.count() < MINIMUM_COUNT_FOR_CORRECTION) - { + for (const auto& sketch_ : m_Sketches) { + if (sketch_.second.count() < MINIMUM_COUNT_FOR_CORRECTION) { return probability; } } @@ -105,18 +85,15 @@ double CDetectorEqualizer::correct(int detector, double probability) double logp = -maths::CTools::fastLog(probability); double percentage; - if (sketch.cdf(logp, percentage)) - { + if (sketch.cdf(logp, percentage)) { percentage *= 100.0; LOG_TRACE("log(p) = " << logp << ", c.d.f. = " << percentage); std::vector logps; logps.reserve(m_Sketches.size()); - for (const auto &sketch_ : m_Sketches) - { + for (const auto& sketch_ : m_Sketches) { double logpi; - if (sketch_.second.quantile(percentage, logpi)) - { + if (sketch_.second.quantile(percentage, logpi)) { logps.push_back(logpi); } } @@ -124,7 +101,7 @@ double CDetectorEqualizer::correct(int detector, double probability) LOG_TRACE("quantiles = " << core::CContainerPrinter::print(logps)); std::size_t n = logps.size(); - double logpc = n % 2 == 0 ? (logps[n/2-1] + logps[n/2]) / 2.0 : logps[n/2]; + double logpc = n % 2 == 0 ? (logps[n / 2 - 1] + logps[n / 2]) / 2.0 : logps[n / 2]; double alpha = maths::CTools::truncate((logp - A) / (B - A), 0.0, 1.0); LOG_TRACE("Corrected log(p) = " << -alpha * logpc - (1.0 - alpha) * logp); @@ -134,44 +111,34 @@ double CDetectorEqualizer::correct(int detector, double probability) return probability; } -void CDetectorEqualizer::clear() -{ +void CDetectorEqualizer::clear() { m_Sketches.clear(); } -void CDetectorEqualizer::age(double factor) -{ - for (auto &sketch : m_Sketches) - { +void CDetectorEqualizer::age(double factor) { + for (auto& sketch : m_Sketches) { sketch.second.age(factor); } } -uint64_t CDetectorEqualizer::checksum() const -{ +uint64_t CDetectorEqualizer::checksum() const { return maths::CChecksum::calculate(0, m_Sketches); } -double CDetectorEqualizer::largestProbabilityToCorrect() -{ +double CDetectorEqualizer::largestProbabilityToCorrect() { return maths::LARGEST_SIGNIFICANT_PROBABILITY; } -maths::CQuantileSketch &CDetectorEqualizer::sketch(int detector) -{ - auto i = std::lower_bound(m_Sketches.begin(), m_Sketches.end(), - detector, maths::COrderings::SFirstLess()); - if (i == m_Sketches.end() || i->first != detector) - { +maths::CQuantileSketch& CDetectorEqualizer::sketch(int detector) { + auto i = std::lower_bound(m_Sketches.begin(), m_Sketches.end(), detector, maths::COrderings::SFirstLess()); + if (i == m_Sketches.end() || i->first != detector) { i = m_Sketches.insert(i, {detector, maths::CQuantileSketch(SKETCH_INTERPOLATION, SKETCH_SIZE)}); } return i->second; } -const maths::CQuantileSketch::EInterpolation - CDetectorEqualizer::SKETCH_INTERPOLATION(maths::CQuantileSketch::E_Linear); +const maths::CQuantileSketch::EInterpolation CDetectorEqualizer::SKETCH_INTERPOLATION(maths::CQuantileSketch::E_Linear); const std::size_t CDetectorEqualizer::SKETCH_SIZE(100); const double CDetectorEqualizer::MINIMUM_COUNT_FOR_CORRECTION(1.5); - } } diff --git a/lib/model/CDynamicStringIdRegistry.cc b/lib/model/CDynamicStringIdRegistry.cc index 2e48507887..0e9ab5855e 100644 --- a/lib/model/CDynamicStringIdRegistry.cc +++ b/lib/model/CDynamicStringIdRegistry.cc @@ -17,62 +17,47 @@ #include -namespace ml -{ -namespace model -{ -namespace -{ +namespace ml { +namespace model { +namespace { const std::string NAMES_TAG("a"); const std::string FREE_NAMES_TAG("b"); const std::string RECYCLED_NAMES_TAG("c"); } -CDynamicStringIdRegistry::CDynamicStringIdRegistry(const std::string &nameType, +CDynamicStringIdRegistry::CDynamicStringIdRegistry(const std::string& nameType, stat_t::EStatTypes addedStat, stat_t::EStatTypes addNotAllowedStat, - stat_t::EStatTypes recycledStat) : - m_NameType(nameType), - m_AddedStat(addedStat), - m_AddNotAllowedStat(addNotAllowedStat), - m_RecycledStat(recycledStat), - m_Uids(1) -{ + stat_t::EStatTypes recycledStat) + : m_NameType(nameType), m_AddedStat(addedStat), m_AddNotAllowedStat(addNotAllowedStat), m_RecycledStat(recycledStat), m_Uids(1) { } -CDynamicStringIdRegistry::CDynamicStringIdRegistry(bool isForPersistence, - const CDynamicStringIdRegistry &other) : - m_NameType(other.m_NameType), - m_AddedStat(other.m_AddedStat), - m_AddNotAllowedStat(other.m_AddNotAllowedStat), - m_RecycledStat(other.m_RecycledStat), - m_Dictionary(other.m_Dictionary), - m_Uids(other.m_Uids), - m_Names(other.m_Names), - m_FreeUids(other.m_FreeUids), - m_RecycledUids(other.m_RecycledUids) -{ - if (!isForPersistence) - { +CDynamicStringIdRegistry::CDynamicStringIdRegistry(bool isForPersistence, const CDynamicStringIdRegistry& other) + : m_NameType(other.m_NameType), + m_AddedStat(other.m_AddedStat), + m_AddNotAllowedStat(other.m_AddNotAllowedStat), + m_RecycledStat(other.m_RecycledStat), + m_Dictionary(other.m_Dictionary), + m_Uids(other.m_Uids), + m_Names(other.m_Names), + m_FreeUids(other.m_FreeUids), + m_RecycledUids(other.m_RecycledUids) { + if (!isForPersistence) { LOG_ABORT("This constructor only creates clones for persistence"); } } -const std::string &CDynamicStringIdRegistry::name(std::size_t id, const std::string &fallback) const -{ +const std::string& CDynamicStringIdRegistry::name(std::size_t id, const std::string& fallback) const { return id >= m_Names.size() ? fallback : *m_Names[id]; } -const core::CStoredStringPtr &CDynamicStringIdRegistry::namePtr(std::size_t id) const -{ +const core::CStoredStringPtr& CDynamicStringIdRegistry::namePtr(std::size_t id) const { return m_Names[id]; } -bool CDynamicStringIdRegistry::id(const std::string &name, std::size_t &result) const -{ +bool CDynamicStringIdRegistry::id(const std::string& name, std::size_t& result) const { TWordSizeUMapCItr itr = m_Uids.find(m_Dictionary.word(name)); - if (itr == m_Uids.end()) - { + if (itr == m_Uids.end()) { result = INVALID_ID; return false; } @@ -80,11 +65,9 @@ bool CDynamicStringIdRegistry::id(const std::string &name, std::size_t &result) return true; } -bool CDynamicStringIdRegistry::anyId(std::size_t &result) const -{ +bool CDynamicStringIdRegistry::anyId(std::size_t& result) const { TWordSizeUMapCItr itr = m_Uids.begin(); - if (itr == m_Uids.end()) - { + if (itr == m_Uids.end()) { result = INVALID_ID; return false; } @@ -92,50 +75,35 @@ bool CDynamicStringIdRegistry::anyId(std::size_t &result) const return true; } -std::size_t CDynamicStringIdRegistry::numberActiveNames() const -{ +std::size_t CDynamicStringIdRegistry::numberActiveNames() const { return m_Uids.size(); } -std::size_t CDynamicStringIdRegistry::numberNames() const -{ +std::size_t CDynamicStringIdRegistry::numberNames() const { return m_Names.size(); } -bool CDynamicStringIdRegistry::isIdActive(std::size_t id) const -{ - return id < m_Names.size() && !std::binary_search(m_FreeUids.begin(), - m_FreeUids.end(), - id, - std::greater()); +bool CDynamicStringIdRegistry::isIdActive(std::size_t id) const { + return id < m_Names.size() && !std::binary_search(m_FreeUids.begin(), m_FreeUids.end(), id, std::greater()); } -std::size_t CDynamicStringIdRegistry::addName(const std::string &name, - core_t::TTime time, - CResourceMonitor &resourceMonitor, - bool &addedPerson) -{ +std::size_t +CDynamicStringIdRegistry::addName(const std::string& name, core_t::TTime time, CResourceMonitor& resourceMonitor, bool& addedPerson) { // Get the identifier or create one if this is the // first time we've seen them. (Use emplace to avoid copying // the string if it is already in the collection.) - std::size_t newId = m_FreeUids.empty() ? - m_Names.size() : - m_FreeUids.back(); + std::size_t newId = m_FreeUids.empty() ? m_Names.size() : m_FreeUids.back(); std::size_t id = 0; // Is there any space in the system for us to expand the models? - if (resourceMonitor.areAllocationsAllowed()) - { + if (resourceMonitor.areAllocationsAllowed()) { id = m_Uids.emplace(m_Dictionary.word(name), newId).first->second; - } - else - { + } else { // In this case we can only deal with existing people TWordSizeUMapCItr itr = m_Uids.find(m_Dictionary.word(name)); - if (itr == m_Uids.end()) - { + if (itr == m_Uids.end()) { LOG_TRACE("Can't add new " << m_NameType << " - allocations not allowed"); resourceMonitor.acceptAllocationFailureResult(time); core::CStatistics::stat(m_AddNotAllowedStat).increment(); @@ -144,22 +112,16 @@ std::size_t CDynamicStringIdRegistry::addName(const std::string &name, id = itr->second; } - if (id >= m_Names.size()) - { + if (id >= m_Names.size()) { m_Names.push_back(CStringStore::names().get(name)); addedPerson = true; core::CStatistics::stat(m_AddedStat).increment(); - } - else if (id == newId) - { + } else if (id == newId) { LOG_TRACE("Recycling " << id << " for " << m_NameType << " " << name); m_Names[id] = CStringStore::names().get(name); - if (m_FreeUids.empty()) - { + if (m_FreeUids.empty()) { LOG_ERROR("Unexpectedly missing free " << m_NameType << " entry for " << id); - } - else - { + } else { m_FreeUids.pop_back(); } m_RecycledUids.push_back(id); @@ -169,29 +131,22 @@ std::size_t CDynamicStringIdRegistry::addName(const std::string &name, return id; } -void CDynamicStringIdRegistry::removeNames(std::size_t lowestNameToRemove) -{ +void CDynamicStringIdRegistry::removeNames(std::size_t lowestNameToRemove) { std::size_t numberNames = this->numberNames(); - if (lowestNameToRemove >= numberNames) - { + if (lowestNameToRemove >= numberNames) { return; } - for (std::size_t id = lowestNameToRemove; id < numberNames; ++id) - { + for (std::size_t id = lowestNameToRemove; id < numberNames; ++id) { m_Uids.erase(m_Dictionary.word(*m_Names[id])); } m_Names.erase(m_Names.begin() + lowestNameToRemove, m_Names.end()); } -void CDynamicStringIdRegistry::recycleNames(const TSizeVec &namesToRemove, - const std::string &defaultName) -{ - for (std::size_t i = 0u; i < namesToRemove.size(); ++i) - { +void CDynamicStringIdRegistry::recycleNames(const TSizeVec& namesToRemove, const std::string& defaultName) { + for (std::size_t i = 0u; i < namesToRemove.size(); ++i) { std::size_t id = namesToRemove[i]; - if (id >= m_Names.size()) - { + if (id >= m_Names.size()) { LOG_ERROR("Unexpected " << m_NameType << " identifier " << id); continue; } @@ -204,33 +159,26 @@ void CDynamicStringIdRegistry::recycleNames(const TSizeVec &namesToRemove, m_FreeUids.erase(std::unique(m_FreeUids.begin(), m_FreeUids.end()), m_FreeUids.end()); } -CDynamicStringIdRegistry::TSizeVec &CDynamicStringIdRegistry::recycledIds() -{ +CDynamicStringIdRegistry::TSizeVec& CDynamicStringIdRegistry::recycledIds() { return m_RecycledUids; } -bool CDynamicStringIdRegistry::checkInvariants() const -{ +bool CDynamicStringIdRegistry::checkInvariants() const { using TSizeUSet = boost::unordered_set; bool result = true; - if (m_Uids.size() > m_Names.size()) - { - LOG_ERROR("Unexpected extra " - << (m_Uids.size() - m_Names.size()) << " " << m_NameType << " uids"); + if (m_Uids.size() > m_Names.size()) { + LOG_ERROR("Unexpected extra " << (m_Uids.size() - m_Names.size()) << " " << m_NameType << " uids"); result = false; } TSizeUSet uniqueIds; - for (TWordSizeUMapCItr i = m_Uids.begin(); i != m_Uids.end(); ++i) - { - if (!uniqueIds.insert(i->second).second) - { + for (TWordSizeUMapCItr i = m_Uids.begin(); i != m_Uids.end(); ++i) { + if (!uniqueIds.insert(i->second).second) { LOG_ERROR("Duplicate id " << i->second); result = false; } - if (i->second > m_Names.size()) - { + if (i->second > m_Names.size()) { LOG_ERROR(m_NameType << " id " << i->second << " out of range [0, " << m_Names.size() << ")"); result = false; } @@ -239,25 +187,21 @@ bool CDynamicStringIdRegistry::checkInvariants() const return result; } -void CDynamicStringIdRegistry::clear() -{ +void CDynamicStringIdRegistry::clear() { m_Uids.clear(); m_Names.clear(); m_FreeUids.clear(); m_RecycledUids.clear(); } -uint64_t CDynamicStringIdRegistry::checksum() const -{ +uint64_t CDynamicStringIdRegistry::checksum() const { using TStrCRef = boost::reference_wrapper; using TStrCRefVec = std::vector; TStrCRefVec people; people.reserve(m_Names.size()); - for (std::size_t pid = 0u; pid < m_Names.size(); ++pid) - { - if (this->isIdActive(pid)) - { + for (std::size_t pid = 0u; pid < m_Names.size(); ++pid) { + if (this->isIdActive(pid)) { people.emplace_back(*m_Names[pid]); } } @@ -265,8 +209,7 @@ uint64_t CDynamicStringIdRegistry::checksum() const return maths::CChecksum::calculate(0, people); } -void CDynamicStringIdRegistry::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CDynamicStringIdRegistry::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CDynamicStringIdRegistry"); core::CMemoryDebug::dynamicSize("m_NameType", m_NameType, mem); core::CMemoryDebug::dynamicSize("m_PersonUids", m_Uids, mem); @@ -275,9 +218,8 @@ void CDynamicStringIdRegistry::debugMemoryUsage(core::CMemoryUsage::TMemoryUsage core::CMemoryDebug::dynamicSize("m_RecycledPersonUids", m_RecycledUids, mem); } -std::size_t CDynamicStringIdRegistry::memoryUsage() const -{ - std::size_t mem = core::CMemory::dynamicSize(m_Uids); +std::size_t CDynamicStringIdRegistry::memoryUsage() const { + std::size_t mem = core::CMemory::dynamicSize(m_Uids); mem += core::CMemory::dynamicSize(m_NameType); mem += core::CMemory::dynamicSize(m_Uids); mem += core::CMemory::dynamicSize(m_Names); @@ -286,59 +228,39 @@ std::size_t CDynamicStringIdRegistry::memoryUsage() const return mem; } -void CDynamicStringIdRegistry::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CDynamicStringIdRegistry::acceptPersistInserter(core::CStatePersistInserter& inserter) const { // Explicity save all shared strings, on the understanding that any other // owners will also save their copies - for (std::size_t i = 0; i < m_Names.size(); i++) - { + for (std::size_t i = 0; i < m_Names.size(); i++) { inserter.insertValue(NAMES_TAG, *m_Names[i]); } core::CPersistUtils::persist(FREE_NAMES_TAG, m_FreeUids, inserter); core::CPersistUtils::persist(RECYCLED_NAMES_TAG, m_RecycledUids, inserter); } -bool CDynamicStringIdRegistry::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name = traverser.name(); - if (name == NAMES_TAG) - { +bool CDynamicStringIdRegistry::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); + if (name == NAMES_TAG) { m_Names.push_back(CStringStore::names().get(traverser.value())); - } - else if (name == FREE_NAMES_TAG) - { - if (!core::CPersistUtils::restore(FREE_NAMES_TAG, m_FreeUids, traverser)) - { + } else if (name == FREE_NAMES_TAG) { + if (!core::CPersistUtils::restore(FREE_NAMES_TAG, m_FreeUids, traverser)) { return false; } - } - else if (name == RECYCLED_NAMES_TAG) - { - if (!core::CPersistUtils::restore(RECYCLED_NAMES_TAG, m_RecycledUids, traverser)) - { + } else if (name == RECYCLED_NAMES_TAG) { + if (!core::CPersistUtils::restore(RECYCLED_NAMES_TAG, m_RecycledUids, traverser)) { return false; } } - } - while (traverser.next()); + } while (traverser.next()); // Some of the entries in m_Names may relate to IDs that are free to // reuse. We mustn't add these to the ID maps. - for (std::size_t id = 0; id < m_Names.size(); ++id) - { - if (std::binary_search(m_FreeUids.begin(), - m_FreeUids.end(), - id, - std::greater())) - { - LOG_TRACE("Restore ignoring free " << m_NameType << " name " - << *m_Names[id] << " = id " << id); - } - else - { + for (std::size_t id = 0; id < m_Names.size(); ++id) { + if (std::binary_search(m_FreeUids.begin(), m_FreeUids.end(), id, std::greater())) { + LOG_TRACE("Restore ignoring free " << m_NameType << " name " << *m_Names[id] << " = id " << id); + } else { m_Uids[m_Dictionary.word(*m_Names[id])] = id; } } @@ -347,6 +269,5 @@ bool CDynamicStringIdRegistry::acceptRestoreTraverser(core::CStateRestoreTravers } const std::size_t CDynamicStringIdRegistry::INVALID_ID(std::numeric_limits::max()); - } } diff --git a/lib/model/CEventData.cc b/lib/model/CEventData.cc index 00bc08d16b..8a4bc552ca 100644 --- a/lib/model/CEventData.cc +++ b/lib/model/CEventData.cc @@ -10,30 +10,19 @@ #include #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { -namespace -{ +namespace { const CEventData::TDouble1VecArray DUMMY_ARRAY = CEventData::TDouble1VecArray(); const std::string DASH("-"); - } -CEventData::CEventData() : - m_Time(0), - m_Pid(), - m_Cids(), - m_Values(), - m_IsExplicitNull(false) -{ +CEventData::CEventData() : m_Time(0), m_Pid(), m_Cids(), m_Values(), m_IsExplicitNull(false) { } -void CEventData::swap(CEventData &other) -{ +void CEventData::swap(CEventData& other) { std::swap(m_Time, other.m_Time); std::swap(m_Pid, other.m_Pid); m_Cids.swap(other.m_Cids); @@ -43,8 +32,7 @@ void CEventData::swap(CEventData &other) std::swap(m_IsExplicitNull, other.m_IsExplicitNull); } -void CEventData::clear() -{ +void CEventData::clear() { m_Time = 0; m_Pid = boost::none; m_Cids.clear(); @@ -54,153 +42,118 @@ void CEventData::clear() m_IsExplicitNull = false; } -void CEventData::time(core_t::TTime time) -{ +void CEventData::time(core_t::TTime time) { m_Time = time; } -bool CEventData::person(std::size_t pid) -{ - if (!m_Pid) - { +bool CEventData::person(std::size_t pid) { + if (!m_Pid) { m_Pid.reset(pid); - } - else if (pid != m_Pid) - { - LOG_ERROR("Ignoring subsequent person " << pid - << ", current person " << *m_Pid); + } else if (pid != m_Pid) { + LOG_ERROR("Ignoring subsequent person " << pid << ", current person " << *m_Pid); return false; } return true; } -void CEventData::addAttribute(TOptionalSize cid) -{ +void CEventData::addAttribute(TOptionalSize cid) { m_Cids.push_back(cid); } -void CEventData::addValue(const TDouble1Vec &value) -{ +void CEventData::addValue(const TDouble1Vec& value) { m_Values.push_back(TOptionalDouble1VecArraySizePr()); - if (!value.empty()) - { + if (!value.empty()) { m_Values.back().reset(TDouble1VecArraySizePr(TDouble1VecArray(), 1)); m_Values.back()->first.fill(value); m_Values.back()->second = 1; } } -void CEventData::stringValue(const std::string &value) -{ +void CEventData::stringValue(const std::string& value) { m_StringValue.reset(value); } -void CEventData::addInfluence(const TOptionalStr &influence) -{ +void CEventData::addInfluence(const TOptionalStr& influence) { m_Influences.push_back(influence); } -void CEventData::addCountStatistic(std::size_t count) -{ +void CEventData::addCountStatistic(std::size_t count) { TDouble1VecArraySizePr values; values.first.fill(TDouble1Vec(1, 0.0)); values.second = count; m_Values.push_back(values); } -void CEventData::addStatistics(const TDouble1VecArraySizePr &values) -{ +void CEventData::addStatistics(const TDouble1VecArraySizePr& values) { m_Values.push_back(values); } -core_t::TTime CEventData::time() const -{ +core_t::TTime CEventData::time() const { return m_Time; } -CEventData::TOptionalSize CEventData::personId() const -{ +CEventData::TOptionalSize CEventData::personId() const { return m_Pid; } -CEventData::TOptionalSize CEventData::attributeId() const -{ - if (m_Cids.size() != 1) - { - LOG_ERROR("Call to attribute identifier ambiguous: " - << core::CContainerPrinter::print(m_Cids)); +CEventData::TOptionalSize CEventData::attributeId() const { + if (m_Cids.size() != 1) { + LOG_ERROR("Call to attribute identifier ambiguous: " << core::CContainerPrinter::print(m_Cids)); return TOptionalSize(); } return m_Cids[0]; } -const CEventData::TDouble1VecArray &CEventData::values() const -{ - if (m_Values.size() != 1) - { - LOG_ERROR("Call to value ambiguous: " - << core::CContainerPrinter::print(m_Values)); +const CEventData::TDouble1VecArray& CEventData::values() const { + if (m_Values.size() != 1) { + LOG_ERROR("Call to value ambiguous: " << core::CContainerPrinter::print(m_Values)); return DUMMY_ARRAY; } return m_Values[0] ? m_Values[0]->first : DUMMY_ARRAY; } -const CEventData::TOptionalStr &CEventData::stringValue() const -{ +const CEventData::TOptionalStr& CEventData::stringValue() const { return m_StringValue; } -const CEventData::TOptionalStrVec &CEventData::influences() const -{ +const CEventData::TOptionalStrVec& CEventData::influences() const { return m_Influences; } -CEventData::TOptionalSize CEventData::count() const -{ - if (m_Values.size() != 1) - { - LOG_ERROR("Call to count ambiguous: " - << core::CContainerPrinter::print(m_Values)); +CEventData::TOptionalSize CEventData::count() const { + if (m_Values.size() != 1) { + LOG_ERROR("Call to count ambiguous: " << core::CContainerPrinter::print(m_Values)); return TOptionalSize(); } return m_Values[0] ? m_Values[0]->second : TOptionalSize(); } -std::string CEventData::print() const -{ - return core::CStringUtils::typeToString(m_Time) - + ' ' + (m_Pid ? core::CStringUtils::typeToString(*m_Pid) : DASH) - + ' ' + core::CContainerPrinter::print(m_Cids) - + ' ' + core::CContainerPrinter::print(m_Values); +std::string CEventData::print() const { + return core::CStringUtils::typeToString(m_Time) + ' ' + (m_Pid ? core::CStringUtils::typeToString(*m_Pid) : DASH) + ' ' + + core::CContainerPrinter::print(m_Cids) + ' ' + core::CContainerPrinter::print(m_Values); } -CEventData::TOptionalSize CEventData::attributeId(std::size_t i) const -{ +CEventData::TOptionalSize CEventData::attributeId(std::size_t i) const { return i < m_Cids.size() ? m_Cids[i] : TOptionalSize(); } -const CEventData::TDouble1VecArray &CEventData::values(std::size_t i) const -{ +const CEventData::TDouble1VecArray& CEventData::values(std::size_t i) const { return i < m_Values.size() && m_Values[i] ? m_Values[i]->first : DUMMY_ARRAY; } -CEventData::TOptionalSize CEventData::count(std::size_t i) const -{ +CEventData::TOptionalSize CEventData::count(std::size_t i) const { return i < m_Values.size() && m_Values[i] ? m_Values[i]->second : TOptionalSize(); } -void CEventData::setExplicitNull() -{ +void CEventData::setExplicitNull() { // Set count to 0 to avoid checks of count being unset this->addCountStatistic(0); m_IsExplicitNull = true; } -bool CEventData::isExplicitNull() const -{ +bool CEventData::isExplicitNull() const { return m_IsExplicitNull; } - } } diff --git a/lib/model/CEventRateBucketGatherer.cc b/lib/model/CEventRateBucketGatherer.cc index 245b1f7d76..dcb1acae00 100644 --- a/lib/model/CEventRateBucketGatherer.cc +++ b/lib/model/CEventRateBucketGatherer.cc @@ -8,10 +8,10 @@ #include #include -#include #include #include #include +#include #include #include @@ -27,8 +27,8 @@ #include #include -#include #include +#include #include #include @@ -37,13 +37,10 @@ #include #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { -namespace -{ +namespace { using TSizeVec = std::vector; using TStrVec = std::vector; @@ -81,257 +78,192 @@ const std::string INFLUENCER_UNIQUE_STRINGS_TAG("a"); const std::string UNIQUE_STRINGS_TAG("b"); //! \brief Manages persistence of time-of-day feature data maps. -struct STimesBucketSerializer -{ - void operator()(const TSizeSizePrMeanAccumulatorUMap ×, - core::CStatePersistInserter &inserter) - { +struct STimesBucketSerializer { + void operator()(const TSizeSizePrMeanAccumulatorUMap& times, core::CStatePersistInserter& inserter) { std::vector ordered; ordered.reserve(times.size()); - for (auto i = times.begin(); i != times.end(); ++i) - { + for (auto i = times.begin(); i != times.end(); ++i) { ordered.push_back(i); } - std::sort(ordered.begin(), ordered.end(), - core::CFunctional::SDereference()); - for (std::size_t i = 0u; i < ordered.size(); ++i) - { + std::sort(ordered.begin(), ordered.end(), core::CFunctional::SDereference()); + for (std::size_t i = 0u; i < ordered.size(); ++i) { inserter.insertValue(PERSON_TAG, CDataGatherer::extractPersonId(*ordered[i])); inserter.insertValue(ATTRIBUTE_TAG, CDataGatherer::extractAttributeId(*ordered[i])); inserter.insertValue(MEAN_TIMES_TAG, CDataGatherer::extractData(*ordered[i]).toDelimited()); } } - bool operator()(TSizeSizePrMeanAccumulatorUMap ×, - core::CStateRestoreTraverser &traverser) const - { + bool operator()(TSizeSizePrMeanAccumulatorUMap& times, core::CStateRestoreTraverser& traverser) const { std::size_t pid = 0; std::size_t cid = 0; - do - { - const std::string &name = traverser.name(); + do { + const std::string& name = traverser.name(); RESTORE_BUILT_IN(PERSON_TAG, pid) RESTORE_BUILT_IN(ATTRIBUTE_TAG, cid) RESTORE(MEAN_TIMES_TAG, times[TSizeSizePr(pid, cid)].fromDelimited(traverser.value())) - } - while (traverser.next()); + } while (traverser.next()); return true; } }; //! \brief Manages persistence of unique string feature data maps. -struct SStrDataBucketSerializer -{ - void operator()(const TSizeSizePrStrDataUMap &strings, - core::CStatePersistInserter &inserter) - { +struct SStrDataBucketSerializer { + void operator()(const TSizeSizePrStrDataUMap& strings, core::CStatePersistInserter& inserter) { std::vector ordered; ordered.reserve(strings.size()); - for (auto i = strings.begin(); i != strings.end(); ++i) - { + for (auto i = strings.begin(); i != strings.end(); ++i) { ordered.push_back(i); } - std::sort(ordered.begin(), ordered.end(), - core::CFunctional::SDereference()); - for (std::size_t i = 0u; i != ordered.size(); ++i) - { - inserter.insertValue(PERSON_TAG, - CDataGatherer::extractPersonId(*ordered[i])); - inserter.insertValue(ATTRIBUTE_TAG, - CDataGatherer::extractAttributeId(*ordered[i])); - inserter.insertLevel(STRING_ITEM_TAG, boost::bind( - &CUniqueStringFeatureData::acceptPersistInserter, - boost::cref(CDataGatherer::extractData(*ordered[i])), _1)); - } - } - bool operator()(TSizeSizePrStrDataUMap &map, - core::CStateRestoreTraverser &traverser) const - { + std::sort(ordered.begin(), ordered.end(), core::CFunctional::SDereference()); + for (std::size_t i = 0u; i != ordered.size(); ++i) { + inserter.insertValue(PERSON_TAG, CDataGatherer::extractPersonId(*ordered[i])); + inserter.insertValue(ATTRIBUTE_TAG, CDataGatherer::extractAttributeId(*ordered[i])); + inserter.insertLevel( + STRING_ITEM_TAG, + boost::bind(&CUniqueStringFeatureData::acceptPersistInserter, boost::cref(CDataGatherer::extractData(*ordered[i])), _1)); + } + } + bool operator()(TSizeSizePrStrDataUMap& map, core::CStateRestoreTraverser& traverser) const { std::size_t pid = 0; std::size_t cid = 0; - do - { - const std::string &name = traverser.name(); + do { + const std::string& name = traverser.name(); RESTORE_BUILT_IN(PERSON_TAG, pid) RESTORE_BUILT_IN(ATTRIBUTE_TAG, cid) RESTORE(STRING_ITEM_TAG, - traverser.traverseSubLevel(boost::bind( - &CUniqueStringFeatureData::acceptRestoreTraverser, - boost::ref(map[TSizeSizePr(pid, cid)]), _1))) - } - while (traverser.next()); + traverser.traverseSubLevel( + boost::bind(&CUniqueStringFeatureData::acceptRestoreTraverser, boost::ref(map[TSizeSizePr(pid, cid)]), _1))) + } while (traverser.next()); return true; } }; //! Serialize \p data. -void persistAttributePeopleData(const TSizeUSetVec &data, - core::CStatePersistInserter &inserter) -{ +void persistAttributePeopleData(const TSizeUSetVec& data, core::CStatePersistInserter& inserter) { // Persist the vector in reverse order, because it means we'll // find out the correct size more efficiently on restore. std::size_t index = data.size(); - while (index > 0) - { + while (index > 0) { --index; inserter.insertValue(ATTRIBUTE_TAG, index); - const TSizeUSet &people = data[index]; + const TSizeUSet& people = data[index]; // Persist the person identifiers in sorted order to make // it easier to compare state records. TSizeVec orderedPeople(people.begin(), people.end()); std::sort(orderedPeople.begin(), orderedPeople.end()); - for (std::size_t i = 0u; i < orderedPeople.size(); ++i) - { + for (std::size_t i = 0u; i < orderedPeople.size(); ++i) { inserter.insertValue(PERSON_TAG, orderedPeople[i]); } } } //! Serialize \p featureData. -void persistFeatureData(const TCategoryAnyMap &featureData, - core::CStatePersistInserter &inserter) -{ - for (const auto &data_ : featureData) - { +void persistFeatureData(const TCategoryAnyMap& featureData, core::CStatePersistInserter& inserter) { + for (const auto& data_ : featureData) { model_t::EEventRateCategory category = data_.first; - const boost::any &data = data_.second; - try - { - switch (category) - { + const boost::any& data = data_.second; + try { + switch (category) { case model_t::E_DiurnalTimes: - inserter.insertLevel(TIMES_OF_DAY_TAG, boost::bind( - TSizeSizePrMeanAccumulatorUMapQueue::CSerializer(), - boost::cref(boost::any_cast(data)), _1)); + inserter.insertLevel(TIMES_OF_DAY_TAG, + boost::bind(TSizeSizePrMeanAccumulatorUMapQueue::CSerializer(), + boost::cref(boost::any_cast(data)), + _1)); break; case model_t::E_MeanArrivalTimes: // TODO break; case model_t::E_AttributePeople: - inserter.insertLevel(ATTRIBUTE_PEOPLE_TAG, boost::bind( - &persistAttributePeopleData, - boost::cref(boost::any_cast(data)), _1)); + inserter.insertLevel(ATTRIBUTE_PEOPLE_TAG, + boost::bind(&persistAttributePeopleData, boost::cref(boost::any_cast(data)), _1)); break; case model_t::E_UniqueValues: - inserter.insertLevel(UNIQUE_VALUES_TAG, boost::bind( - TSizeSizePrStrDataUMapQueue::CSerializer(), - boost::cref(boost::any_cast(data)), _1)); + inserter.insertLevel(UNIQUE_VALUES_TAG, + boost::bind(TSizeSizePrStrDataUMapQueue::CSerializer(), + boost::cref(boost::any_cast(data)), + _1)); break; } - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to serialize data for " << category - << ": " << e.what()); - } + } catch (const std::exception& e) { LOG_ERROR("Failed to serialize data for " << category << ": " << e.what()); } } } //! Extract \p data from a state document. -bool restoreAttributePeopleData(core::CStateRestoreTraverser &traverser, - TSizeUSetVec &data) -{ +bool restoreAttributePeopleData(core::CStateRestoreTraverser& traverser, TSizeUSetVec& data) { size_t lastCid = 0; bool seenCid = false; - do - { - const std::string &name = traverser.name(); - if (name == ATTRIBUTE_TAG) - { - if (core::CStringUtils::stringToType(traverser.value(), lastCid) == false) - { + do { + const std::string& name = traverser.name(); + if (name == ATTRIBUTE_TAG) { + if (core::CStringUtils::stringToType(traverser.value(), lastCid) == false) { LOG_ERROR("Invalid attribute ID in " << traverser.value()); return false; } seenCid = true; - if (lastCid >= data.size()) - { + if (lastCid >= data.size()) { data.resize(lastCid + 1); } - } - else if (name == PERSON_TAG) - { - if (!seenCid) - { - LOG_ERROR("Incorrect format - person ID before attribute ID in " - << traverser.value()); + } else if (name == PERSON_TAG) { + if (!seenCid) { + LOG_ERROR("Incorrect format - person ID before attribute ID in " << traverser.value()); return false; } std::size_t pid = 0; - if (core::CStringUtils::stringToType(traverser.value(), pid) == false) - { + if (core::CStringUtils::stringToType(traverser.value(), pid) == false) { LOG_ERROR("Invalid person ID in " << traverser.value()); return false; } data[lastCid].insert(pid); } - } - while (traverser.next()); + } while (traverser.next()); return true; } //! Extract \p featureData from a state document. -bool restoreFeatureData(core::CStateRestoreTraverser &traverser, - TCategoryAnyMap &featureData, +bool restoreFeatureData(core::CStateRestoreTraverser& traverser, + TCategoryAnyMap& featureData, std::size_t latencyBuckets, core_t::TTime bucketLength, - core_t::TTime currentBucketStartTime) -{ - const std::string &name = traverser.name(); - if (name == ATTRIBUTE_PEOPLE_TAG) - { - TSizeUSetVec *data{boost::unsafe_any_cast( - &featureData.emplace(model_t::E_AttributePeople, - TSizeUSetVec()).first->second)}; - if (traverser.traverseSubLevel(boost::bind(&restoreAttributePeopleData, - _1, boost::ref(*data))) == false) - { + core_t::TTime currentBucketStartTime) { + const std::string& name = traverser.name(); + if (name == ATTRIBUTE_PEOPLE_TAG) { + TSizeUSetVec* data{ + boost::unsafe_any_cast(&featureData.emplace(model_t::E_AttributePeople, TSizeUSetVec()).first->second)}; + if (traverser.traverseSubLevel(boost::bind(&restoreAttributePeopleData, _1, boost::ref(*data))) == false) { LOG_ERROR("Invalid attribute/people mapping in " << traverser.value()); return false; } - } - else if (name == UNIQUE_VALUES_TAG) - { - if (featureData.count(model_t::E_UniqueValues) != 0) - { + } else if (name == UNIQUE_VALUES_TAG) { + if (featureData.count(model_t::E_UniqueValues) != 0) { featureData.erase(model_t::E_UniqueValues); } - TSizeSizePrStrDataUMapQueue *data{boost::unsafe_any_cast( - &featureData.emplace(model_t::E_UniqueValues, - TSizeSizePrStrDataUMapQueue( - latencyBuckets, - bucketLength, - currentBucketStartTime, - TSizeSizePrStrDataUMap(1))).first->second)}; + TSizeSizePrStrDataUMapQueue* data{boost::unsafe_any_cast( + &featureData + .emplace(model_t::E_UniqueValues, + TSizeSizePrStrDataUMapQueue(latencyBuckets, bucketLength, currentBucketStartTime, TSizeSizePrStrDataUMap(1))) + .first->second)}; if (traverser.traverseSubLevel(boost::bind( - TSizeSizePrStrDataUMapQueue::CSerializer(TSizeSizePrStrDataUMap(1)), - boost::ref(*data), _1)) == false) - { + TSizeSizePrStrDataUMapQueue::CSerializer(TSizeSizePrStrDataUMap(1)), boost::ref(*data), _1)) == + false) { LOG_ERROR("Invalid unique value mapping in " << traverser.value()); return false; } - } - else if (name == TIMES_OF_DAY_TAG) - { - if (featureData.count(model_t::E_DiurnalTimes) == 0) - { + } else if (name == TIMES_OF_DAY_TAG) { + if (featureData.count(model_t::E_DiurnalTimes) == 0) { featureData.erase(model_t::E_DiurnalTimes); } - TSizeSizePrMeanAccumulatorUMapQueue *data{boost::unsafe_any_cast( - &featureData.emplace(model_t::E_DiurnalTimes, - TSizeSizePrMeanAccumulatorUMapQueue( - latencyBuckets, - bucketLength, - currentBucketStartTime)).first->second)}; + TSizeSizePrMeanAccumulatorUMapQueue* data{boost::unsafe_any_cast( + &featureData + .emplace(model_t::E_DiurnalTimes, + TSizeSizePrMeanAccumulatorUMapQueue(latencyBuckets, bucketLength, currentBucketStartTime)) + .first->second)}; if (traverser.traverseSubLevel(boost::bind( - TSizeSizePrMeanAccumulatorUMapQueue::CSerializer(), - boost::ref(*data), _1)) == false) - { + TSizeSizePrMeanAccumulatorUMapQueue::CSerializer(), boost::ref(*data), _1)) == false) { LOG_ERROR("Invalid times mapping in " << traverser.value()); return false; } @@ -340,44 +272,42 @@ bool restoreFeatureData(core::CStateRestoreTraverser &traverser, } //! Get the by field name. -const std::string &byField(bool population, const TStrVec &fieldNames) -{ +const std::string& byField(bool population, const TStrVec& fieldNames) { return population ? fieldNames[1] : fieldNames[0]; } //! Get the over field name. -const std::string &overField(bool population, const TStrVec &fieldNames) -{ +const std::string& overField(bool population, const TStrVec& fieldNames) { return population ? fieldNames[0] : EMPTY_STRING; } -template struct SMaybeConst {}; -template struct SMaybeConst { using TRef = T &; }; -template struct SMaybeConst { using TRef = const T &; }; +template +struct SMaybeConst {}; +template +struct SMaybeConst { + using TRef = T&; +}; +template +struct SMaybeConst { + using TRef = const T&; +}; //! Apply a function \p f to all the data held in [\p begin, \p end). template -void apply(ITR begin, ITR end, const F &f) -{ - for (ITR itr = begin; itr != end; ++itr) - { +void apply(ITR begin, ITR end, const F& f) { + for (ITR itr = begin; itr != end; ++itr) { model_t::EEventRateCategory category = itr->first; - try - { - switch (category) - { - case model_t::E_DiurnalTimes: - { + try { + switch (category) { + case model_t::E_DiurnalTimes: { f(boost::any_cast::TRef>(itr->second)); break; } - case model_t::E_MeanArrivalTimes: - { + case model_t::E_MeanArrivalTimes: { // TODO break; } - case model_t::E_AttributePeople: - { + case model_t::E_AttributePeople: { f(boost::any_cast::TRef>(itr->second)); break; } @@ -385,246 +315,154 @@ void apply(ITR begin, ITR end, const F &f) f(boost::any_cast::TRef>(itr->second)); break; } - } - catch (const std::exception &e) - { - LOG_ERROR("Apply failed for " << category << ": " << e.what()); - } + } catch (const std::exception& e) { LOG_ERROR("Apply failed for " << category << ": " << e.what()); } } } //! Apply a function \p f to all the data held in \p featureData. template -void apply(T &featureData, const F &f) -{ +void apply(T& featureData, const F& f) { apply(featureData.begin(), featureData.end(), f); } //! \brief Removes people from the feature data. -struct SRemovePeople -{ - void operator()(TSizeUSetVec &attributePeople, - std::size_t lowestPersonToRemove, - std::size_t endPeople) const - { - for (std::size_t cid = 0u; cid < attributePeople.size(); ++cid) - { - for (std::size_t pid = lowestPersonToRemove; pid < endPeople; ++pid) - { +struct SRemovePeople { + void operator()(TSizeUSetVec& attributePeople, std::size_t lowestPersonToRemove, std::size_t endPeople) const { + for (std::size_t cid = 0u; cid < attributePeople.size(); ++cid) { + for (std::size_t pid = lowestPersonToRemove; pid < endPeople; ++pid) { attributePeople[cid].erase(pid); } } } - void operator()(TSizeUSetVec &attributePeople, - const TSizeVec &peopleToRemove) const - { - for (std::size_t cid = 0u; cid < attributePeople.size(); ++cid) - { - for (std::size_t i = 0u; i < peopleToRemove.size(); ++i) - { + void operator()(TSizeUSetVec& attributePeople, const TSizeVec& peopleToRemove) const { + for (std::size_t cid = 0u; cid < attributePeople.size(); ++cid) { + for (std::size_t i = 0u; i < peopleToRemove.size(); ++i) { attributePeople[cid].erase(peopleToRemove[i]); } } } - void operator()(TSizeSizePrStrDataUMapQueue &peopleAttributeUniqueValues, - std::size_t lowestPersonToRemove, - std::size_t endPeople) const - { - for (auto &bucket : peopleAttributeUniqueValues) - { - for (auto i = bucket.begin(); i != bucket.end(); /**/) - { - if (CDataGatherer::extractPersonId(*i) >= lowestPersonToRemove && - CDataGatherer::extractPersonId(*i) < endPeople) - { + void + operator()(TSizeSizePrStrDataUMapQueue& peopleAttributeUniqueValues, std::size_t lowestPersonToRemove, std::size_t endPeople) const { + for (auto& bucket : peopleAttributeUniqueValues) { + for (auto i = bucket.begin(); i != bucket.end(); /**/) { + if (CDataGatherer::extractPersonId(*i) >= lowestPersonToRemove && CDataGatherer::extractPersonId(*i) < endPeople) { i = bucket.erase(i); - } - else - { + } else { ++i; } } } } - void operator()(TSizeSizePrStrDataUMapQueue &peopleAttributeUniqueValues, - const TSizeVec &peopleToRemove) const - { - CBucketGatherer::remove(peopleToRemove, - CDataGatherer::SExtractPersonId(), - peopleAttributeUniqueValues); - } - void operator()(TSizeSizePrMeanAccumulatorUMapQueue &arrivalTimes, - std::size_t lowestPersonToRemove, - std::size_t endPeople) const - { - for (auto &bucket : arrivalTimes) - { - for (auto i = bucket.begin(); i != bucket.end(); /**/) - { - if (CDataGatherer::extractPersonId(*i) >= lowestPersonToRemove && - CDataGatherer::extractPersonId(*i) < endPeople) - { + void operator()(TSizeSizePrStrDataUMapQueue& peopleAttributeUniqueValues, const TSizeVec& peopleToRemove) const { + CBucketGatherer::remove(peopleToRemove, CDataGatherer::SExtractPersonId(), peopleAttributeUniqueValues); + } + void operator()(TSizeSizePrMeanAccumulatorUMapQueue& arrivalTimes, std::size_t lowestPersonToRemove, std::size_t endPeople) const { + for (auto& bucket : arrivalTimes) { + for (auto i = bucket.begin(); i != bucket.end(); /**/) { + if (CDataGatherer::extractPersonId(*i) >= lowestPersonToRemove && CDataGatherer::extractPersonId(*i) < endPeople) { i = bucket.erase(i); - } - else - { + } else { ++i; } } } } - void operator()(TSizeSizePrMeanAccumulatorUMapQueue &arrivalTimes, - const TSizeVec &peopleToRemove) const - { - CBucketGatherer::remove(peopleToRemove, - CDataGatherer::SExtractPersonId(), - arrivalTimes); + void operator()(TSizeSizePrMeanAccumulatorUMapQueue& arrivalTimes, const TSizeVec& peopleToRemove) const { + CBucketGatherer::remove(peopleToRemove, CDataGatherer::SExtractPersonId(), arrivalTimes); } }; //! \brief Removes attributes from the feature data. -struct SRemoveAttributes -{ - void operator()(TSizeUSetVec &attributePeople, - std::size_t lowestAttributeToRemove) const - { - if (lowestAttributeToRemove < attributePeople.size()) - { - attributePeople.erase(attributePeople.begin() + lowestAttributeToRemove, - attributePeople.end()); - } - } - void operator()(TSizeUSetVec &attributePeople, - const TSizeVec &attributesToRemove) const - { - for (std::size_t i = 0u; i < attributesToRemove.size(); ++i) - { +struct SRemoveAttributes { + void operator()(TSizeUSetVec& attributePeople, std::size_t lowestAttributeToRemove) const { + if (lowestAttributeToRemove < attributePeople.size()) { + attributePeople.erase(attributePeople.begin() + lowestAttributeToRemove, attributePeople.end()); + } + } + void operator()(TSizeUSetVec& attributePeople, const TSizeVec& attributesToRemove) const { + for (std::size_t i = 0u; i < attributesToRemove.size(); ++i) { attributePeople[attributesToRemove[i]].clear(); } } - void operator()(TSizeSizePrStrDataUMapQueue &peopleAttributeUniqueValues, - std::size_t lowestAttributeToRemove) const - { - for (auto &bucket : peopleAttributeUniqueValues) - { - for (auto i = bucket.begin(); i != bucket.end(); /**/) - { - if (CDataGatherer::extractAttributeId(*i) >= lowestAttributeToRemove) - { + void operator()(TSizeSizePrStrDataUMapQueue& peopleAttributeUniqueValues, std::size_t lowestAttributeToRemove) const { + for (auto& bucket : peopleAttributeUniqueValues) { + for (auto i = bucket.begin(); i != bucket.end(); /**/) { + if (CDataGatherer::extractAttributeId(*i) >= lowestAttributeToRemove) { i = bucket.erase(i); - } - else - { + } else { ++i; } } } } - void operator()(TSizeSizePrStrDataUMapQueue &peopleAttributeUniqueValues, - const TSizeVec &attributesToRemove) const - { - CBucketGatherer::remove(attributesToRemove, - CDataGatherer::SExtractAttributeId(), - peopleAttributeUniqueValues); - } - void operator()(TSizeSizePrMeanAccumulatorUMapQueue &arrivalTimes, - std::size_t lowestAttributeToRemove) const - { - for (auto &bucket : arrivalTimes) - { - for (auto i = bucket.begin(); i != bucket.end(); /**/) - { - if (CDataGatherer::extractAttributeId(*i) >= lowestAttributeToRemove) - { + void operator()(TSizeSizePrStrDataUMapQueue& peopleAttributeUniqueValues, const TSizeVec& attributesToRemove) const { + CBucketGatherer::remove(attributesToRemove, CDataGatherer::SExtractAttributeId(), peopleAttributeUniqueValues); + } + void operator()(TSizeSizePrMeanAccumulatorUMapQueue& arrivalTimes, std::size_t lowestAttributeToRemove) const { + for (auto& bucket : arrivalTimes) { + for (auto i = bucket.begin(); i != bucket.end(); /**/) { + if (CDataGatherer::extractAttributeId(*i) >= lowestAttributeToRemove) { i = bucket.erase(i); - } - else - { + } else { ++i; } } } } - void operator()(TSizeSizePrMeanAccumulatorUMapQueue &arrivalTimes, - const TSizeVec &attributesToRemove) const - { - CBucketGatherer::remove(attributesToRemove, - CDataGatherer::SExtractAttributeId(), - arrivalTimes); + void operator()(TSizeSizePrMeanAccumulatorUMapQueue& arrivalTimes, const TSizeVec& attributesToRemove) const { + CBucketGatherer::remove(attributesToRemove, CDataGatherer::SExtractAttributeId(), arrivalTimes); } }; //! \brief Computes a checksum for the feature data. -struct SChecksum -{ - void operator()(const TSizeUSetVec &attributePeople, - const CDataGatherer &gatherer, - TStrUInt64Map &hashes) const - { +struct SChecksum { + void operator()(const TSizeUSetVec& attributePeople, const CDataGatherer& gatherer, TStrUInt64Map& hashes) const { using TStrCRef = boost::reference_wrapper; using TStrCRefVec = std::vector; - for (std::size_t cid = 0u; cid < attributePeople.size(); ++cid) - { - if (gatherer.isAttributeActive(cid)) - { + for (std::size_t cid = 0u; cid < attributePeople.size(); ++cid) { + if (gatherer.isAttributeActive(cid)) { TStrCRefVec people; people.reserve(attributePeople[cid].size()); - for (const auto &person : attributePeople[cid]) - { - if (gatherer.isPersonActive(person)) - { + for (const auto& person : attributePeople[cid]) { + if (gatherer.isPersonActive(person)) { people.emplace_back(gatherer.personName(person)); } } - std::sort(people.begin(), people.end(), - maths::COrderings::SReferenceLess()); - uint64_t &hash = hashes[gatherer.attributeName(cid)]; + std::sort(people.begin(), people.end(), maths::COrderings::SReferenceLess()); + uint64_t& hash = hashes[gatherer.attributeName(cid)]; hash = maths::CChecksum::calculate(hash, people); } } } - void operator()(const TSizeSizePrStrDataUMapQueue &peopleAttributeUniqueValues, - const CDataGatherer &gatherer, - TStrUInt64Map &hashes) const - { - for (const auto &uniques : peopleAttributeUniqueValues) - { + void + operator()(const TSizeSizePrStrDataUMapQueue& peopleAttributeUniqueValues, const CDataGatherer& gatherer, TStrUInt64Map& hashes) const { + for (const auto& uniques : peopleAttributeUniqueValues) { this->checksum(uniques, gatherer, hashes); } } - void operator()(const TSizeSizePrMeanAccumulatorUMapQueue &arrivalTimes, - const CDataGatherer &gatherer, - TStrUInt64Map &hashes) const - { - for (const auto &time : arrivalTimes) - { + void operator()(const TSizeSizePrMeanAccumulatorUMapQueue& arrivalTimes, const CDataGatherer& gatherer, TStrUInt64Map& hashes) const { + for (const auto& time : arrivalTimes) { this->checksum(time, gatherer, hashes); } } template - void checksum(const boost::unordered_map &bucket, - const CDataGatherer &gatherer, - TStrUInt64Map &hashes) const - { + void checksum(const boost::unordered_map& bucket, const CDataGatherer& gatherer, TStrUInt64Map& hashes) const { using TSizeUInt64VecUMap = boost::unordered_map; TSizeUInt64VecUMap attributeHashes; - for (const auto &value : bucket) - { + for (const auto& value : bucket) { std::size_t pid = CDataGatherer::extractPersonId(value); std::size_t cid = CDataGatherer::extractAttributeId(value); - if (gatherer.isPersonActive(pid) && gatherer.isAttributeActive(cid)) - { + if (gatherer.isPersonActive(pid) && gatherer.isAttributeActive(cid)) { attributeHashes[cid].push_back(maths::CChecksum::calculate(0, value.second)); } } - for (auto &hash_ : attributeHashes) - { + for (auto& hash_ : attributeHashes) { std::sort(hash_.second.begin(), hash_.second.end()); - uint64_t &hash = hashes[gatherer.attributeName(hash_.first)]; + uint64_t& hash = hashes[gatherer.attributeName(hash_.first)]; hash = maths::CChecksum::calculate(hash, hash_.second); } } @@ -632,116 +470,83 @@ struct SChecksum //! \brief Resize the feature data to accommodate a specified //! person and attribute identifier. -struct SResize -{ - void operator()(TSizeUSetVec &attributePeople, - std::size_t /*pid*/, - std::size_t cid) const - { - if (cid >= attributePeople.size()) - { +struct SResize { + void operator()(TSizeUSetVec& attributePeople, std::size_t /*pid*/, std::size_t cid) const { + if (cid >= attributePeople.size()) { attributePeople.resize(cid + 1); } } - void operator()(TSizeSizePrStrDataUMapQueue &/*data*/, - std::size_t /*pid*/, - std::size_t /*cid*/) const - { + void operator()(TSizeSizePrStrDataUMapQueue& /*data*/, std::size_t /*pid*/, std::size_t /*cid*/) const { // Not needed } - void operator()(const TSizeSizePrMeanAccumulatorUMapQueue &/*arrivalTimes*/, - std::size_t /*pid*/, - std::size_t /*cid*/) const - { + void operator()(const TSizeSizePrMeanAccumulatorUMapQueue& /*arrivalTimes*/, std::size_t /*pid*/, std::size_t /*cid*/) const { // Not needed } }; //! \brief Updates the feature data with some aggregated records. -struct SAddValue -{ - void operator()(TSizeUSetVec &attributePeople, +struct SAddValue { + void operator()(TSizeUSetVec& attributePeople, std::size_t pid, std::size_t cid, core_t::TTime /*time*/, std::size_t /*count*/, - const CEventData::TDouble1VecArray &/*values*/, - const CEventData::TOptionalStr &/*uniqueStrings*/, - const TStoredStringPtrVec &/*influences*/) const - { + const CEventData::TDouble1VecArray& /*values*/, + const CEventData::TOptionalStr& /*uniqueStrings*/, + const TStoredStringPtrVec& /*influences*/) const { attributePeople[cid].insert(pid); } - void operator()(TSizeSizePrStrDataUMapQueue &personAttributeUniqueCounts, + void operator()(TSizeSizePrStrDataUMapQueue& personAttributeUniqueCounts, std::size_t pid, std::size_t cid, core_t::TTime time, std::size_t /*count*/, - const CEventData::TDouble1VecArray &/*values*/, - const CEventData::TOptionalStr &uniqueString, - const TStoredStringPtrVec &influences) const - { - if (!uniqueString) - { + const CEventData::TDouble1VecArray& /*values*/, + const CEventData::TOptionalStr& uniqueString, + const TStoredStringPtrVec& influences) const { + if (!uniqueString) { return; } - if (time > personAttributeUniqueCounts.latestBucketEnd()) - { + if (time > personAttributeUniqueCounts.latestBucketEnd()) { LOG_ERROR("No queue item for time " << time); personAttributeUniqueCounts.push(TSizeSizePrStrDataUMap(1), time); } - TSizeSizePrStrDataUMap &counts = personAttributeUniqueCounts.get(time); + TSizeSizePrStrDataUMap& counts = personAttributeUniqueCounts.get(time); counts[{pid, cid}].insert(*uniqueString, influences); } - void operator()(TSizeSizePrMeanAccumulatorUMapQueue &arrivalTimes, + void operator()(TSizeSizePrMeanAccumulatorUMapQueue& arrivalTimes, std::size_t pid, std::size_t cid, core_t::TTime time, std::size_t count, - const CEventData::TDouble1VecArray &values, - const CEventData::TOptionalStr &/*uniqueStrings*/, - const TStoredStringPtrVec &/*influences*/) const - { - if (time > arrivalTimes.latestBucketEnd()) - { + const CEventData::TDouble1VecArray& values, + const CEventData::TOptionalStr& /*uniqueStrings*/, + const TStoredStringPtrVec& /*influences*/) const { + if (time > arrivalTimes.latestBucketEnd()) { LOG_ERROR("No queue item for time " << time); arrivalTimes.push(TSizeSizePrMeanAccumulatorUMap(1), time); } - TSizeSizePrMeanAccumulatorUMap × = arrivalTimes.get(time); - for (std::size_t i = 0; i < count; i++) - { + TSizeSizePrMeanAccumulatorUMap& times = arrivalTimes.get(time); + for (std::size_t i = 0; i < count; i++) { times[{pid, cid}].add(values[i][0]); } } }; //! \brief Updates the feature data for the start of a new bucket. -struct SNewBucket -{ - void operator()(TSizeUSetVec &/*attributePeople*/, - core_t::TTime /*time*/) const - { - } - void operator()(TSizeSizePrStrDataUMapQueue &personAttributeUniqueCounts, - core_t::TTime time) const - { - if (time > personAttributeUniqueCounts.latestBucketEnd()) - { +struct SNewBucket { + void operator()(TSizeUSetVec& /*attributePeople*/, core_t::TTime /*time*/) const {} + void operator()(TSizeSizePrStrDataUMapQueue& personAttributeUniqueCounts, core_t::TTime time) const { + if (time > personAttributeUniqueCounts.latestBucketEnd()) { personAttributeUniqueCounts.push(TSizeSizePrStrDataUMap(1), time); - } - else - { + } else { personAttributeUniqueCounts.get(time).clear(); } } - void operator()(TSizeSizePrMeanAccumulatorUMapQueue &arrivalTimes, - core_t::TTime time) const - { - if (time > arrivalTimes.latestBucketEnd()) - { + void operator()(TSizeSizePrMeanAccumulatorUMapQueue& arrivalTimes, core_t::TTime time) const { + if (time > arrivalTimes.latestBucketEnd()) { arrivalTimes.push(TSizeSizePrMeanAccumulatorUMap(1), time); - } - else - { + } else { arrivalTimes.get(time).clear(); } } @@ -752,24 +557,19 @@ const std::string DICTIONARY_WORD_TAG("a"); const std::string UNIQUE_WORD_TAG("b"); //! Persist a collection of unique strings. -void persistUniqueStrings(const CUniqueStringFeatureData::TWordStringUMap &map, - core::CStatePersistInserter &inserter) -{ +void persistUniqueStrings(const CUniqueStringFeatureData::TWordStringUMap& map, core::CStatePersistInserter& inserter) { using TWordVec = std::vector; - if (!map.empty()) - { + if (!map.empty()) { // Order the map keys to ensure consistent persistence TWordVec keys; keys.reserve(map.size()); - for (const auto &value : map) - { + for (const auto& value : map) { keys.push_back(value.first); } std::sort(keys.begin(), keys.end()); - for (const auto &key : keys) - { + for (const auto& key : keys) { inserter.insertValue(DICTIONARY_WORD_TAG, key.toDelimited()); inserter.insertValue(UNIQUE_WORD_TAG, map.at(key)); } @@ -777,42 +577,33 @@ void persistUniqueStrings(const CUniqueStringFeatureData::TWordStringUMap &map, } //! Restore a collection of unique strings. -bool restoreUniqueStrings(core::CStateRestoreTraverser &traverser, - CUniqueStringFeatureData::TWordStringUMap &map) -{ +bool restoreUniqueStrings(core::CStateRestoreTraverser& traverser, CUniqueStringFeatureData::TWordStringUMap& map) { CUniqueStringFeatureData::TWord word; - do - { - const std::string &name = traverser.name(); + do { + const std::string& name = traverser.name(); RESTORE(DICTIONARY_WORD_TAG, word.fromDelimited(traverser.value())) RESTORE_NO_ERROR(UNIQUE_WORD_TAG, map[word] = traverser.value()) - } - while (traverser.next()); + } while (traverser.next()); return true; } //! Persist influencer collections of unique strings. -void persistInfluencerUniqueStrings(const CUniqueStringFeatureData::TStoredStringPtrWordSetUMap &map, - core::CStatePersistInserter &inserter) -{ +void persistInfluencerUniqueStrings(const CUniqueStringFeatureData::TStoredStringPtrWordSetUMap& map, + core::CStatePersistInserter& inserter) { using TStoredStringPtrVec = std::vector; - if (!map.empty()) - { + if (!map.empty()) { // Order the map keys to ensure consistent persistence TStoredStringPtrVec keys; keys.reserve(map.size()); - for (const auto &influence : map) - { + for (const auto& influence : map) { keys.push_back(influence.first); } std::sort(keys.begin(), keys.end(), maths::COrderings::SLess()); - for (const auto &key : keys) - { + for (const auto& key : keys) { inserter.insertValue(DICTIONARY_WORD_TAG, *key); - for (const auto &word : map.at(key)) - { + for (const auto& word : map.at(key)) { inserter.insertValue(UNIQUE_WORD_TAG, word.toDelimited()); } } @@ -820,41 +611,30 @@ void persistInfluencerUniqueStrings(const CUniqueStringFeatureData::TStoredStrin } //! Restore influencer collections of unique strings. -bool restoreInfluencerUniqueStrings(core::CStateRestoreTraverser &traverser, - CUniqueStringFeatureData::TStoredStringPtrWordSetUMap &data) -{ +bool restoreInfluencerUniqueStrings(core::CStateRestoreTraverser& traverser, CUniqueStringFeatureData::TStoredStringPtrWordSetUMap& data) { std::string key; - do - { - const std::string &name = traverser.name(); - if (name == DICTIONARY_WORD_TAG) - { + do { + const std::string& name = traverser.name(); + if (name == DICTIONARY_WORD_TAG) { key = traverser.value(); - } - else if (name == UNIQUE_WORD_TAG) - { + } else if (name == UNIQUE_WORD_TAG) { CUniqueStringFeatureData::TWord value; - if (value.fromDelimited(traverser.value()) == false) - { + if (value.fromDelimited(traverser.value()) == false) { LOG_ERROR("Failed to restore word " << traverser.value()); return false; } auto i = data.begin(); - for (/**/; i != data.end(); ++i) - { - if (*i->first == key) - { + for (/**/; i != data.end(); ++i) { + if (*i->first == key) { i->second.insert(value); break; } } - if (i == data.end()) - { + if (i == data.end()) { data[CStringStore::influencers().get(key)].insert(value); } } - } - while (traverser.next()); + } while (traverser.next()); return true; } @@ -862,19 +642,16 @@ bool restoreInfluencerUniqueStrings(core::CStateRestoreTraverser &traverser, //! Register the callbacks for computing the size of feature data gatherers //! with \p visitor. template -void registerMemoryCallbacks(VISITOR &visitor) -{ +void registerMemoryCallbacks(VISITOR& visitor) { visitor.template registerCallback(); visitor.template registerCallback(); visitor.template registerCallback(); } //! Register the callbacks for computing the size of feature data gatherers. -void registerMemoryCallbacks() -{ +void registerMemoryCallbacks() { static std::atomic_flag once = ATOMIC_FLAG_INIT; - if (once.test_and_set() == false) - { + if (once.test_and_set() == false) { registerMemoryCallbacks(core::CMemory::anyVisitor()); registerMemoryCallbacks(core::CMemoryDebug::anyVisitor()); } @@ -882,163 +659,117 @@ void registerMemoryCallbacks() } // unnamed:: - -CEventRateBucketGatherer::CEventRateBucketGatherer(CDataGatherer &dataGatherer, - const std::string &summaryCountFieldName, - const std::string &personFieldName, - const std::string &attributeFieldName, - const std::string &valueFieldName, - const TStrVec &influenceFieldNames, - core_t::TTime startTime) : - CBucketGatherer(dataGatherer, startTime), - m_BeginInfluencingFields(0), - m_BeginValueField(0), - m_BeginSummaryFields(0) -{ - this->initializeFieldNames(personFieldName, - attributeFieldName, - valueFieldName, - summaryCountFieldName, - influenceFieldNames); +CEventRateBucketGatherer::CEventRateBucketGatherer(CDataGatherer& dataGatherer, + const std::string& summaryCountFieldName, + const std::string& personFieldName, + const std::string& attributeFieldName, + const std::string& valueFieldName, + const TStrVec& influenceFieldNames, + core_t::TTime startTime) + : CBucketGatherer(dataGatherer, startTime), m_BeginInfluencingFields(0), m_BeginValueField(0), m_BeginSummaryFields(0) { + this->initializeFieldNames(personFieldName, attributeFieldName, valueFieldName, summaryCountFieldName, influenceFieldNames); this->initializeFeatureData(); } -CEventRateBucketGatherer::CEventRateBucketGatherer(CDataGatherer &dataGatherer, - const std::string &summaryCountFieldName, - const std::string &personFieldName, - const std::string &attributeFieldName, - const std::string &valueFieldName, - const TStrVec &influenceFieldNames, - core::CStateRestoreTraverser &traverser) : - CBucketGatherer(dataGatherer, 0), - m_BeginInfluencingFields(0), - m_BeginValueField(0), - m_BeginSummaryFields(0) -{ - this->initializeFieldNames(personFieldName, - attributeFieldName, - valueFieldName, - summaryCountFieldName, - influenceFieldNames); +CEventRateBucketGatherer::CEventRateBucketGatherer(CDataGatherer& dataGatherer, + const std::string& summaryCountFieldName, + const std::string& personFieldName, + const std::string& attributeFieldName, + const std::string& valueFieldName, + const TStrVec& influenceFieldNames, + core::CStateRestoreTraverser& traverser) + : CBucketGatherer(dataGatherer, 0), m_BeginInfluencingFields(0), m_BeginValueField(0), m_BeginSummaryFields(0) { + this->initializeFieldNames(personFieldName, attributeFieldName, valueFieldName, summaryCountFieldName, influenceFieldNames); traverser.traverseSubLevel(boost::bind(&CEventRateBucketGatherer::acceptRestoreTraverser, this, _1)); } -CEventRateBucketGatherer::CEventRateBucketGatherer(bool isForPersistence, - const CEventRateBucketGatherer &other) : - CBucketGatherer(isForPersistence, other), - m_FieldNames(other.m_FieldNames), - m_BeginInfluencingFields(other.m_BeginInfluencingFields), - m_BeginValueField(other.m_BeginValueField), - m_BeginSummaryFields(other.m_BeginSummaryFields), - m_FeatureData(other.m_FeatureData) -{ - if (!isForPersistence) - { +CEventRateBucketGatherer::CEventRateBucketGatherer(bool isForPersistence, const CEventRateBucketGatherer& other) + : CBucketGatherer(isForPersistence, other), + m_FieldNames(other.m_FieldNames), + m_BeginInfluencingFields(other.m_BeginInfluencingFields), + m_BeginValueField(other.m_BeginValueField), + m_BeginSummaryFields(other.m_BeginSummaryFields), + m_FeatureData(other.m_FeatureData) { + if (!isForPersistence) { LOG_ABORT("This constructor only creates clones for persistence"); } } -bool CEventRateBucketGatherer::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ +bool CEventRateBucketGatherer::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { this->clear(); - do - { - const std::string &name = traverser.name(); - RESTORE(BASE_TAG, traverser.traverseSubLevel(boost::bind( - &CBucketGatherer::baseAcceptRestoreTraverser, this, _1))) - if (restoreFeatureData(traverser, m_FeatureData, - m_DataGatherer.params().s_LatencyBuckets, - this->bucketLength(), this->currentBucketStartTime()) == false) - { + do { + const std::string& name = traverser.name(); + RESTORE(BASE_TAG, traverser.traverseSubLevel(boost::bind(&CBucketGatherer::baseAcceptRestoreTraverser, this, _1))) + if (restoreFeatureData( + traverser, m_FeatureData, m_DataGatherer.params().s_LatencyBuckets, this->bucketLength(), this->currentBucketStartTime()) == + false) { LOG_ERROR("Invalid feature data in " << traverser.value()); return false; } - } - while (traverser.next()); + } while (traverser.next()); return true; } -void CEventRateBucketGatherer::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CEventRateBucketGatherer::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertLevel(BASE_TAG, boost::bind(&CBucketGatherer::baseAcceptPersistInserter, this, _1)); persistFeatureData(m_FeatureData, inserter); } -CBucketGatherer *CEventRateBucketGatherer::cloneForPersistence() const -{ +CBucketGatherer* CEventRateBucketGatherer::cloneForPersistence() const { return new CEventRateBucketGatherer(true, *this); } -const std::string &CEventRateBucketGatherer::persistenceTag() const -{ +const std::string& CEventRateBucketGatherer::persistenceTag() const { return CBucketGatherer::EVENTRATE_BUCKET_GATHERER_TAG; } -const std::string &CEventRateBucketGatherer::personFieldName() const -{ +const std::string& CEventRateBucketGatherer::personFieldName() const { return m_FieldNames[0]; } -const std::string &CEventRateBucketGatherer::attributeFieldName() const -{ +const std::string& CEventRateBucketGatherer::attributeFieldName() const { return m_DataGatherer.isPopulation() ? m_FieldNames[1] : EMPTY_STRING; } -const std::string &CEventRateBucketGatherer::valueFieldName() const -{ - return m_BeginValueField != m_BeginSummaryFields ? - m_FieldNames[m_BeginValueField] : EMPTY_STRING; +const std::string& CEventRateBucketGatherer::valueFieldName() const { + return m_BeginValueField != m_BeginSummaryFields ? m_FieldNames[m_BeginValueField] : EMPTY_STRING; } -CEventRateBucketGatherer::TStrVecCItr CEventRateBucketGatherer::beginInfluencers() const -{ +CEventRateBucketGatherer::TStrVecCItr CEventRateBucketGatherer::beginInfluencers() const { return m_FieldNames.begin() + m_BeginInfluencingFields; } -CEventRateBucketGatherer::TStrVecCItr CEventRateBucketGatherer::endInfluencers() const -{ +CEventRateBucketGatherer::TStrVecCItr CEventRateBucketGatherer::endInfluencers() const { return m_FieldNames.begin() + m_BeginValueField; } -const CEventRateBucketGatherer::TStrVec &CEventRateBucketGatherer::fieldsOfInterest() const -{ +const CEventRateBucketGatherer::TStrVec& CEventRateBucketGatherer::fieldsOfInterest() const { return m_FieldNames; } -std::string CEventRateBucketGatherer::description() const -{ - return function_t::name(function_t::function(m_DataGatherer.features())) - + (m_BeginValueField == m_BeginSummaryFields ? "" : (" " + m_FieldNames[m_BeginValueField])) - + (byField(m_DataGatherer.isPopulation(), m_FieldNames).empty() ? "" : " by ") - + byField(m_DataGatherer.isPopulation(), m_FieldNames) - + (overField(m_DataGatherer.isPopulation(), m_FieldNames).empty() ? "" : " over ") - + overField(m_DataGatherer.isPopulation(), m_FieldNames) - + (m_DataGatherer.partitionFieldName().empty() ? "" : " partition=") - + m_DataGatherer.partitionFieldName(); +std::string CEventRateBucketGatherer::description() const { + return function_t::name(function_t::function(m_DataGatherer.features())) + + (m_BeginValueField == m_BeginSummaryFields ? "" : (" " + m_FieldNames[m_BeginValueField])) + + (byField(m_DataGatherer.isPopulation(), m_FieldNames).empty() ? "" : " by ") + + byField(m_DataGatherer.isPopulation(), m_FieldNames) + + (overField(m_DataGatherer.isPopulation(), m_FieldNames).empty() ? "" : " over ") + + overField(m_DataGatherer.isPopulation(), m_FieldNames) + (m_DataGatherer.partitionFieldName().empty() ? "" : " partition=") + + m_DataGatherer.partitionFieldName(); } -bool CEventRateBucketGatherer::processFields(const TStrCPtrVec &fieldValues, - CEventData &result, - CResourceMonitor &resourceMonitor) -{ +bool CEventRateBucketGatherer::processFields(const TStrCPtrVec& fieldValues, CEventData& result, CResourceMonitor& resourceMonitor) { using TOptionalSize = boost::optional; using TOptionalStr = boost::optional; - if (fieldValues.size() != m_FieldNames.size()) - { - LOG_ERROR("Unexpected field values: " - << core::CContainerPrinter::print(fieldValues) << - ", for field names: " - << core::CContainerPrinter::print(m_FieldNames)); + if (fieldValues.size() != m_FieldNames.size()) { + LOG_ERROR("Unexpected field values: " << core::CContainerPrinter::print(fieldValues) + << ", for field names: " << core::CContainerPrinter::print(m_FieldNames)); return false; } - const std::string *person = (fieldValues[0] == 0 && m_DataGatherer.useNull()) ? - &EMPTY_STRING : - fieldValues[0]; - if (person == 0) - { + const std::string* person = (fieldValues[0] == 0 && m_DataGatherer.useNull()) ? &EMPTY_STRING : fieldValues[0]; + if (person == 0) { // Just ignore: the "person" field wasn't present in the // record. Note that we don't warn here since we'll permit // a small fraction of records to having missing field @@ -1046,98 +777,71 @@ bool CEventRateBucketGatherer::processFields(const TStrCPtrVec &fieldValues, return false; } - for (std::size_t i = m_DataGatherer.isPopulation() + 1; i < m_BeginValueField; ++i) - { - result.addInfluence(fieldValues[i] ? - TOptionalStr(*fieldValues[i]) : - TOptionalStr()); + for (std::size_t i = m_DataGatherer.isPopulation() + 1; i < m_BeginValueField; ++i) { + result.addInfluence(fieldValues[i] ? TOptionalStr(*fieldValues[i]) : TOptionalStr()); } - if (m_BeginValueField != m_BeginSummaryFields) - { - if (const std::string *value = fieldValues[m_BeginValueField]) - { + if (m_BeginValueField != m_BeginSummaryFields) { + if (const std::string* value = fieldValues[m_BeginValueField]) { result.stringValue(*value); } } std::size_t count = 1; - if (m_DataGatherer.summaryMode() != model_t::E_None) - { - if (m_DataGatherer.extractCountFromField(m_FieldNames[m_BeginSummaryFields], - fieldValues[m_BeginSummaryFields], - count) == false) - { + if (m_DataGatherer.summaryMode() != model_t::E_None) { + if (m_DataGatherer.extractCountFromField(m_FieldNames[m_BeginSummaryFields], fieldValues[m_BeginSummaryFields], count) == false) { result.addValue(); return true; } } - if (count == CDataGatherer::EXPLICIT_NULL_SUMMARY_COUNT) - { + if (count == CDataGatherer::EXPLICIT_NULL_SUMMARY_COUNT) { result.setExplicitNull(); - } - else - { + } else { model_t::EFeature feature = m_DataGatherer.feature(0); if ((feature == model_t::E_IndividualTimeOfDayByBucketAndPerson) || - (feature == model_t::E_PopulationTimeOfDayByBucketPersonAndAttribute)) - { + (feature == model_t::E_PopulationTimeOfDayByBucketPersonAndAttribute)) { double t = static_cast(result.time() % core::constants::DAY); result.addValue(TDouble1Vec(1, t)); - } - else if ((feature == model_t::E_IndividualTimeOfWeekByBucketAndPerson) || - (feature == model_t::E_PopulationTimeOfWeekByBucketPersonAndAttribute)) - { + } else if ((feature == model_t::E_IndividualTimeOfWeekByBucketAndPerson) || + (feature == model_t::E_PopulationTimeOfWeekByBucketPersonAndAttribute)) { double t = static_cast(result.time() % core::constants::WEEK); result.addValue(TDouble1Vec(1, t)); - } - else - { + } else { result.addCountStatistic(count); } } bool addedPerson = false; std::size_t personId = CDynamicStringIdRegistry::INVALID_ID; - if (result.isExplicitNull()) - { + if (result.isExplicitNull()) { m_DataGatherer.personId(*person, personId); - } - else - { + } else { personId = m_DataGatherer.addPerson(*person, resourceMonitor, addedPerson); } - if (personId == CDynamicStringIdRegistry::INVALID_ID) - { - if (!result.isExplicitNull()) - { + if (personId == CDynamicStringIdRegistry::INVALID_ID) { + if (!result.isExplicitNull()) { LOG_TRACE("Couldn't create a person, over memory limit"); } return false; } - if (addedPerson) - { - resourceMonitor.addExtraMemory(m_DataGatherer.isPopulation() ? - CDataGatherer::ESTIMATED_MEM_USAGE_PER_OVER_FIELD : CDataGatherer::ESTIMATED_MEM_USAGE_PER_BY_FIELD); - (m_DataGatherer.isPopulation() ? core::CStatistics::stat(stat_t::E_NumberOverFields) : - core::CStatistics::stat(stat_t::E_NumberByFields)).increment(); + if (addedPerson) { + resourceMonitor.addExtraMemory(m_DataGatherer.isPopulation() ? CDataGatherer::ESTIMATED_MEM_USAGE_PER_OVER_FIELD + : CDataGatherer::ESTIMATED_MEM_USAGE_PER_BY_FIELD); + (m_DataGatherer.isPopulation() ? core::CStatistics::stat(stat_t::E_NumberOverFields) + : core::CStatistics::stat(stat_t::E_NumberByFields)) + .increment(); } - if (!result.person(personId)) - { + if (!result.person(personId)) { LOG_ERROR("Bad by field value: " << *person); return false; } - if (m_DataGatherer.isPopulation()) - { - const std::string *attribute = (fieldValues[1] == 0 && m_DataGatherer.useNull()) ? - &EMPTY_STRING : - fieldValues[1]; + if (m_DataGatherer.isPopulation()) { + const std::string* attribute = (fieldValues[1] == 0 && m_DataGatherer.useNull()) ? &EMPTY_STRING : fieldValues[1]; - if (attribute == 0) - { + if (attribute == 0) { // Just ignore: the "by" field wasn't present in the // record. This doesn't necessarily stop us processing // the record by other models so we don't return false. @@ -1150,88 +854,66 @@ bool CEventRateBucketGatherer::processFields(const TStrCPtrVec &fieldValues, bool addedAttribute = false; std::size_t newAttribute = CDynamicStringIdRegistry::INVALID_ID; - if (result.isExplicitNull()) - { + if (result.isExplicitNull()) { m_DataGatherer.attributeId(*attribute, newAttribute); - } - else - { + } else { newAttribute = m_DataGatherer.addAttribute(*attribute, resourceMonitor, addedAttribute); } result.addAttribute(TOptionalSize(newAttribute)); - if (addedAttribute) - { + if (addedAttribute) { resourceMonitor.addExtraMemory(CDataGatherer::ESTIMATED_MEM_USAGE_PER_BY_FIELD); core::CStatistics::stat(stat_t::E_NumberByFields).increment(); } - } - else - { + } else { result.addAttribute(std::size_t(0)); } return true; } -void CEventRateBucketGatherer::recyclePeople(const TSizeVec &peopleToRemove) -{ - if (peopleToRemove.empty()) - { +void CEventRateBucketGatherer::recyclePeople(const TSizeVec& peopleToRemove) { + if (peopleToRemove.empty()) { return; } - apply(m_FeatureData, boost::bind(SRemovePeople(), _1, - boost::cref(peopleToRemove))); + apply(m_FeatureData, boost::bind(SRemovePeople(), _1, boost::cref(peopleToRemove))); this->CBucketGatherer::recyclePeople(peopleToRemove); } -void CEventRateBucketGatherer::removePeople(std::size_t lowestPersonToRemove) -{ - apply(m_FeatureData, boost::bind(SRemovePeople(), _1, - lowestPersonToRemove, - m_DataGatherer.numberPeople())); +void CEventRateBucketGatherer::removePeople(std::size_t lowestPersonToRemove) { + apply(m_FeatureData, boost::bind(SRemovePeople(), _1, lowestPersonToRemove, m_DataGatherer.numberPeople())); this->CBucketGatherer::removePeople(lowestPersonToRemove); } -void CEventRateBucketGatherer::recycleAttributes(const TSizeVec &attributesToRemove) -{ - if (attributesToRemove.empty()) - { +void CEventRateBucketGatherer::recycleAttributes(const TSizeVec& attributesToRemove) { + if (attributesToRemove.empty()) { return; } - apply(m_FeatureData, boost::bind(SRemoveAttributes(), _1, - boost::cref(attributesToRemove))); + apply(m_FeatureData, boost::bind(SRemoveAttributes(), _1, boost::cref(attributesToRemove))); this->CBucketGatherer::recycleAttributes(attributesToRemove); } -void CEventRateBucketGatherer::removeAttributes(std::size_t lowestAttributeToRemove) -{ - apply(m_FeatureData, boost::bind(SRemoveAttributes(), _1, - lowestAttributeToRemove)); +void CEventRateBucketGatherer::removeAttributes(std::size_t lowestAttributeToRemove) { + apply(m_FeatureData, boost::bind(SRemoveAttributes(), _1, lowestAttributeToRemove)); this->CBucketGatherer::removeAttributes(lowestAttributeToRemove); } -uint64_t CEventRateBucketGatherer::checksum() const -{ +uint64_t CEventRateBucketGatherer::checksum() const { uint64_t seed = this->CBucketGatherer::checksum(); TStrUInt64Map hashes; - apply(m_FeatureData, boost::bind(SChecksum(), _1, - boost::cref(m_DataGatherer), - boost::ref(hashes))); + apply(m_FeatureData, boost::bind(SChecksum(), _1, boost::cref(m_DataGatherer), boost::ref(hashes))); LOG_TRACE("seed = " << seed); LOG_TRACE("hashes = " << core::CContainerPrinter::print(hashes)); core::CHashing::CSafeMurmurHash2String64 hasher; - return core::CHashing::hashCombine( - seed, hasher(core::CContainerPrinter::print(hashes))); + return core::CHashing::hashCombine(seed, hasher(core::CContainerPrinter::print(hashes))); } -void CEventRateBucketGatherer::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CEventRateBucketGatherer::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { registerMemoryCallbacks(); mem->setName("CPopulationEventRateDataGatherer"); CBucketGatherer::debugMemoryUsage(mem->addChild()); @@ -1239,8 +921,7 @@ void CEventRateBucketGatherer::debugMemoryUsage(core::CMemoryUsage::TMemoryUsage core::CMemoryDebug::dynamicSize("m_FeatureData", m_FeatureData, mem); } -std::size_t CEventRateBucketGatherer::memoryUsage() const -{ +std::size_t CEventRateBucketGatherer::memoryUsage() const { registerMemoryCallbacks(); std::size_t mem = CBucketGatherer::memoryUsage(); mem += core::CMemory::dynamicSize(m_FieldNames); @@ -1248,53 +929,41 @@ std::size_t CEventRateBucketGatherer::memoryUsage() const return mem; } -std::size_t CEventRateBucketGatherer::staticSize() const -{ +std::size_t CEventRateBucketGatherer::staticSize() const { return sizeof(*this); } -void CEventRateBucketGatherer::clear() -{ +void CEventRateBucketGatherer::clear() { this->CBucketGatherer::clear(); m_FeatureData.clear(); this->initializeFeatureData(); } -bool CEventRateBucketGatherer::resetBucket(core_t::TTime bucketStart) -{ +bool CEventRateBucketGatherer::resetBucket(core_t::TTime bucketStart) { return this->CBucketGatherer::resetBucket(bucketStart); } -void CEventRateBucketGatherer::releaseMemory(core_t::TTime /*samplingCutoffTime*/) -{ +void CEventRateBucketGatherer::releaseMemory(core_t::TTime /*samplingCutoffTime*/) { // Nothing to release } -void CEventRateBucketGatherer::sample(core_t::TTime time) -{ +void CEventRateBucketGatherer::sample(core_t::TTime time) { // Merge smallest bucket into longer buckets, if they exist this->CBucketGatherer::sample(time); } -void CEventRateBucketGatherer::featureData(core_t::TTime time, core_t::TTime /*bucketLength*/, - TFeatureAnyPrVec &result) const -{ +void CEventRateBucketGatherer::featureData(core_t::TTime time, core_t::TTime /*bucketLength*/, TFeatureAnyPrVec& result) const { result.clear(); - if ( !this->dataAvailable(time) - || time >= this->currentBucketStartTime() + this->bucketLength()) - { - LOG_DEBUG("No data available at " << time - << ", current bucket = " << this->printCurrentBucket()); + if (!this->dataAvailable(time) || time >= this->currentBucketStartTime() + this->bucketLength()) { + LOG_DEBUG("No data available at " << time << ", current bucket = " << this->printCurrentBucket()); return; } - for (std::size_t i = 0u, n = m_DataGatherer.numberFeatures(); i < n; ++i) - { + for (std::size_t i = 0u, n = m_DataGatherer.numberFeatures(); i < n; ++i) { const model_t::EFeature feature = m_DataGatherer.feature(i); - switch (feature) - { + switch (feature) { case model_t::E_IndividualCountByBucketAndPerson: this->personCounts(feature, time, result); break; @@ -1400,71 +1069,54 @@ void CEventRateBucketGatherer::featureData(core_t::TTime time, core_t::TTime /*b } } -void CEventRateBucketGatherer::personCounts(model_t::EFeature feature, - core_t::TTime time, - TFeatureAnyPrVec &result_) const -{ - if (m_DataGatherer.isPopulation()) - { +void CEventRateBucketGatherer::personCounts(model_t::EFeature feature, core_t::TTime time, TFeatureAnyPrVec& result_) const { + if (m_DataGatherer.isPopulation()) { LOG_ERROR("Function does not support population analysis."); return; } result_.emplace_back(feature, TSizeFeatureDataPrVec()); - auto &result = *boost::unsafe_any_cast(&result_.back().second); + auto& result = *boost::unsafe_any_cast(&result_.back().second); result.reserve(m_DataGatherer.numberActivePeople()); - for (std::size_t pid = 0u, n = m_DataGatherer.numberPeople(); pid < n; ++pid) - { - if ( !m_DataGatherer.isPersonActive(pid) - || this->hasExplicitNullsOnly(time, pid, model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID)) - { + for (std::size_t pid = 0u, n = m_DataGatherer.numberPeople(); pid < n; ++pid) { + if (!m_DataGatherer.isPersonActive(pid) || this->hasExplicitNullsOnly(time, pid, model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID)) { continue; } result.emplace_back(pid, 0); } - for (const auto &count_ : this->bucketCounts(time)) - { - uint64_t &count = std::lower_bound(result.begin(), result.end(), - CDataGatherer::extractPersonId(count_), - maths::COrderings::SFirstLess())->second.s_Count; + for (const auto& count_ : this->bucketCounts(time)) { + uint64_t& count = + std::lower_bound(result.begin(), result.end(), CDataGatherer::extractPersonId(count_), maths::COrderings::SFirstLess()) + ->second.s_Count; count += CDataGatherer::extractData(count_); } this->addInfluencerCounts(time, result); } -void CEventRateBucketGatherer::nonZeroPersonCounts(model_t::EFeature feature, - core_t::TTime time, - TFeatureAnyPrVec &result_) const -{ +void CEventRateBucketGatherer::nonZeroPersonCounts(model_t::EFeature feature, core_t::TTime time, TFeatureAnyPrVec& result_) const { result_.emplace_back(feature, TSizeFeatureDataPrVec()); - auto &result = *boost::unsafe_any_cast(&result_.back().second); + auto& result = *boost::unsafe_any_cast(&result_.back().second); - const TSizeSizePrUInt64UMap &personAttributeCounts = this->bucketCounts(time); + const TSizeSizePrUInt64UMap& personAttributeCounts = this->bucketCounts(time); result.reserve(personAttributeCounts.size()); - for (const auto &count : personAttributeCounts) - { - result.emplace_back(CDataGatherer::extractPersonId(count), - CDataGatherer::extractData(count)); + for (const auto& count : personAttributeCounts) { + result.emplace_back(CDataGatherer::extractPersonId(count), CDataGatherer::extractData(count)); } std::sort(result.begin(), result.end(), maths::COrderings::SFirstLess()); this->addInfluencerCounts(time, result); } -void CEventRateBucketGatherer::personIndicator(model_t::EFeature feature, - core_t::TTime time, - TFeatureAnyPrVec &result_) const -{ +void CEventRateBucketGatherer::personIndicator(model_t::EFeature feature, core_t::TTime time, TFeatureAnyPrVec& result_) const { result_.emplace_back(feature, TSizeFeatureDataPrVec()); - auto &result = *boost::unsafe_any_cast(&result_.back().second); + auto& result = *boost::unsafe_any_cast(&result_.back().second); - const TSizeSizePrUInt64UMap &personAttributeCounts = this->bucketCounts(time); + const TSizeSizePrUInt64UMap& personAttributeCounts = this->bucketCounts(time); result.reserve(personAttributeCounts.size()); - for (const auto &count : personAttributeCounts) - { + for (const auto& count : personAttributeCounts) { result.emplace_back(CDataGatherer::extractPersonId(count), 1); } std::sort(result.begin(), result.end(), maths::COrderings::SFirstLess()); @@ -1472,27 +1124,19 @@ void CEventRateBucketGatherer::personIndicator(model_t::EFeature feature, this->addInfluencerCounts(time, result); } -void CEventRateBucketGatherer::personArrivalTimes(model_t::EFeature feature, - core_t::TTime /*time*/, - TFeatureAnyPrVec &result_) const -{ +void CEventRateBucketGatherer::personArrivalTimes(model_t::EFeature feature, core_t::TTime /*time*/, TFeatureAnyPrVec& result_) const { // TODO result_.emplace_back(feature, TSizeFeatureDataPrVec()); } -void CEventRateBucketGatherer::nonZeroAttributeCounts(model_t::EFeature feature, - core_t::TTime time, - TFeatureAnyPrVec &result_) const -{ +void CEventRateBucketGatherer::nonZeroAttributeCounts(model_t::EFeature feature, core_t::TTime time, TFeatureAnyPrVec& result_) const { result_.emplace_back(feature, TSizeSizePrFeatureDataPrVec()); - auto &result = *boost::unsafe_any_cast(&result_.back().second); + auto& result = *boost::unsafe_any_cast(&result_.back().second); - const TSizeSizePrUInt64UMap &personAttributeCounts = this->bucketCounts(time); + const TSizeSizePrUInt64UMap& personAttributeCounts = this->bucketCounts(time); result.reserve(personAttributeCounts.size()); - for (const auto &count : personAttributeCounts) - { - if (CDataGatherer::extractData(count) > 0) - { + for (const auto& count : personAttributeCounts) { + if (CDataGatherer::extractData(count) > 0) { result.emplace_back(count.first, CDataGatherer::extractData(count)); } } @@ -1501,226 +1145,161 @@ void CEventRateBucketGatherer::nonZeroAttributeCounts(model_t::EFeature feature, this->addInfluencerCounts(time, result); } -void CEventRateBucketGatherer::peoplePerAttribute(model_t::EFeature feature, - TFeatureAnyPrVec &result_) const -{ +void CEventRateBucketGatherer::peoplePerAttribute(model_t::EFeature feature, TFeatureAnyPrVec& result_) const { result_.emplace_back(feature, TSizeSizePrFeatureDataPrVec()); - auto &result = *boost::unsafe_any_cast(&result_.back().second); + auto& result = *boost::unsafe_any_cast(&result_.back().second); auto i = m_FeatureData.find(model_t::E_AttributePeople); - if (i == m_FeatureData.end()) - { + if (i == m_FeatureData.end()) { return; } - try - { - const TSizeUSetVec &attributePeople = boost::any_cast(i->second); + try { + const TSizeUSetVec& attributePeople = boost::any_cast(i->second); result.reserve(attributePeople.size()); - for (std::size_t cid = 0u; cid < attributePeople.size(); ++cid) - { - if (m_DataGatherer.isAttributeActive(cid)) - { + for (std::size_t cid = 0u; cid < attributePeople.size(); ++cid) { + if (m_DataGatherer.isAttributeActive(cid)) { result.emplace_back(TSizeSizePr(0, cid), attributePeople[cid].size()); } } - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to extract " - << model_t::print(model_t::E_PopulationUniquePersonCountByAttribute) - << ": " << e.what()); + } catch (const std::exception& e) { + LOG_ERROR("Failed to extract " << model_t::print(model_t::E_PopulationUniquePersonCountByAttribute) << ": " << e.what()); } } -void CEventRateBucketGatherer::attributeIndicator(model_t::EFeature feature, - core_t::TTime time, - TFeatureAnyPrVec &result_) const -{ +void CEventRateBucketGatherer::attributeIndicator(model_t::EFeature feature, core_t::TTime time, TFeatureAnyPrVec& result_) const { result_.emplace_back(feature, TSizeSizePrFeatureDataPrVec()); - auto &result = *boost::unsafe_any_cast(&result_.back().second); + auto& result = *boost::unsafe_any_cast(&result_.back().second); - const TSizeSizePrUInt64UMap &counts = this->bucketCounts(time); + const TSizeSizePrUInt64UMap& counts = this->bucketCounts(time); result.reserve(counts.size()); - for (const auto &count : counts) - { - if (CDataGatherer::extractData(count) > 0) - { + for (const auto& count : counts) { + if (CDataGatherer::extractData(count) > 0) { result.emplace_back(count.first, 1); } } std::sort(result.begin(), result.end(), maths::COrderings::SFirstLess()); this->addInfluencerCounts(time, result); - for (std::size_t i = 0u; i < result.size(); ++i) - { - SEventRateFeatureData &data = result[i].second; - for (std::size_t j = 0u; j < data.s_InfluenceValues.size(); ++j) - { - for (std::size_t k = 0u; k < data.s_InfluenceValues[j].size(); ++k) - { + for (std::size_t i = 0u; i < result.size(); ++i) { + SEventRateFeatureData& data = result[i].second; + for (std::size_t j = 0u; j < data.s_InfluenceValues.size(); ++j) { + for (std::size_t k = 0u; k < data.s_InfluenceValues[j].size(); ++k) { data.s_InfluenceValues[j][k].second.first = TDoubleVec{1.0}; } } } } -void CEventRateBucketGatherer::bucketUniqueValuesPerPerson(model_t::EFeature feature, - core_t::TTime time, - TFeatureAnyPrVec &result_) const -{ +void CEventRateBucketGatherer::bucketUniqueValuesPerPerson(model_t::EFeature feature, core_t::TTime time, TFeatureAnyPrVec& result_) const { result_.emplace_back(feature, TSizeFeatureDataPrVec()); - auto &result = *boost::unsafe_any_cast(&result_.back().second); + auto& result = *boost::unsafe_any_cast(&result_.back().second); auto i = m_FeatureData.find(model_t::E_UniqueValues); - if (i == m_FeatureData.end()) - { + if (i == m_FeatureData.end()) { return; } - try - { - const auto &personAttributeUniqueValues = - boost::any_cast(i->second).get(time); + try { + const auto& personAttributeUniqueValues = boost::any_cast(i->second).get(time); result.reserve(personAttributeUniqueValues.size()); - for (const auto &uniques : personAttributeUniqueValues) - { + for (const auto& uniques : personAttributeUniqueValues) { result.emplace_back(CDataGatherer::extractPersonId(uniques), 0); CDataGatherer::extractData(uniques).populateDistinctCountFeatureData(result.back().second); } std::sort(result.begin(), result.end(), maths::COrderings::SFirstLess()); - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to extract " - << model_t::print(model_t::E_IndividualUniqueCountByBucketAndPerson) - << ": " << e.what()); + } catch (const std::exception& e) { + LOG_ERROR("Failed to extract " << model_t::print(model_t::E_IndividualUniqueCountByBucketAndPerson) << ": " << e.what()); } } void CEventRateBucketGatherer::bucketUniqueValuesPerPersonAttribute(model_t::EFeature feature, core_t::TTime time, - TFeatureAnyPrVec &result_) const -{ + TFeatureAnyPrVec& result_) const { result_.emplace_back(feature, TSizeSizePrFeatureDataPrVec()); - auto &result = *boost::unsafe_any_cast(&result_.back().second); + auto& result = *boost::unsafe_any_cast(&result_.back().second); auto i = m_FeatureData.find(model_t::E_UniqueValues); - if (i == m_FeatureData.end()) - { + if (i == m_FeatureData.end()) { return; } - try - { - const auto &personAttributeUniqueValues = - boost::any_cast(i->second).get(time); + try { + const auto& personAttributeUniqueValues = boost::any_cast(i->second).get(time); result.reserve(personAttributeUniqueValues.size()); - for (const auto &uniques : personAttributeUniqueValues) - { + for (const auto& uniques : personAttributeUniqueValues) { result.emplace_back(uniques.first, 0); CDataGatherer::extractData(uniques).populateDistinctCountFeatureData(result.back().second); } std::sort(result.begin(), result.end(), maths::COrderings::SFirstLess()); - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to extract " - << model_t::print(model_t::E_PopulationUniqueCountByBucketPersonAndAttribute) - << ": " << e.what()); + } catch (const std::exception& e) { + LOG_ERROR("Failed to extract " << model_t::print(model_t::E_PopulationUniqueCountByBucketPersonAndAttribute) << ": " << e.what()); } } void CEventRateBucketGatherer::bucketCompressedLengthPerPerson(model_t::EFeature feature, core_t::TTime time, - TFeatureAnyPrVec &result_) const -{ + TFeatureAnyPrVec& result_) const { result_.emplace_back(feature, TSizeFeatureDataPrVec()); - auto &result = *boost::unsafe_any_cast(&result_.back().second); + auto& result = *boost::unsafe_any_cast(&result_.back().second); auto i = m_FeatureData.find(model_t::E_UniqueValues); - if (i == m_FeatureData.end()) - { + if (i == m_FeatureData.end()) { return; } - try - { - const auto &personAttributeUniqueValues = - boost::any_cast(i->second).get(time); + try { + const auto& personAttributeUniqueValues = boost::any_cast(i->second).get(time); result.reserve(personAttributeUniqueValues.size()); - for (const auto &uniques : personAttributeUniqueValues) - { + for (const auto& uniques : personAttributeUniqueValues) { result.emplace_back(CDataGatherer::extractPersonId(uniques), 0); CDataGatherer::extractData(uniques).populateInfoContentFeatureData(result.back().second); } std::sort(result.begin(), result.end(), maths::COrderings::SFirstLess()); - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to extract " - << model_t::print(model_t::E_IndividualInfoContentByBucketAndPerson) - << ": " << e.what()); + } catch (const std::exception& e) { + LOG_ERROR("Failed to extract " << model_t::print(model_t::E_IndividualInfoContentByBucketAndPerson) << ": " << e.what()); } } - void CEventRateBucketGatherer::bucketCompressedLengthPerPersonAttribute(model_t::EFeature feature, core_t::TTime time, - TFeatureAnyPrVec &result_) const -{ + TFeatureAnyPrVec& result_) const { result_.emplace_back(feature, TSizeSizePrFeatureDataPrVec()); - auto &result = *boost::unsafe_any_cast(&result_.back().second); + auto& result = *boost::unsafe_any_cast(&result_.back().second); auto i = m_FeatureData.find(model_t::E_UniqueValues); - if (i == m_FeatureData.end()) - { + if (i == m_FeatureData.end()) { return; } - try - { - const auto &personAttributeUniqueValues = - boost::any_cast(i->second).get(time); + try { + const auto& personAttributeUniqueValues = boost::any_cast(i->second).get(time); result.reserve(personAttributeUniqueValues.size()); - for (const auto &uniques : personAttributeUniqueValues) - { + for (const auto& uniques : personAttributeUniqueValues) { result.emplace_back(uniques.first, 0); CDataGatherer::extractData(uniques).populateInfoContentFeatureData(result.back().second); } std::sort(result.begin(), result.end(), maths::COrderings::SFirstLess()); - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to extract " - << model_t::print(model_t::E_PopulationInfoContentByBucketPersonAndAttribute) - << ": " << e.what()); + } catch (const std::exception& e) { + LOG_ERROR("Failed to extract " << model_t::print(model_t::E_PopulationInfoContentByBucketPersonAndAttribute) << ": " << e.what()); } } -void CEventRateBucketGatherer::bucketMeanTimesPerPerson(model_t::EFeature feature, - core_t::TTime time, - TFeatureAnyPrVec &result_) const -{ +void CEventRateBucketGatherer::bucketMeanTimesPerPerson(model_t::EFeature feature, core_t::TTime time, TFeatureAnyPrVec& result_) const { result_.emplace_back(feature, TSizeFeatureDataPrVec()); - auto &result = *boost::unsafe_any_cast(&result_.back().second); + auto& result = *boost::unsafe_any_cast(&result_.back().second); auto i = m_FeatureData.find(model_t::E_DiurnalTimes); - if (i == m_FeatureData.end()) - { + if (i == m_FeatureData.end()) { return; } - try - { - const auto &arrivalTimes = - boost::any_cast(i->second).get(time); + try { + const auto& arrivalTimes = boost::any_cast(i->second).get(time); result.reserve(arrivalTimes.size()); - for (const auto &time_ : arrivalTimes) - { + for (const auto& time_ : arrivalTimes) { result.emplace_back(CDataGatherer::extractPersonId(time_), - static_cast(maths::CBasicStatistics::mean( - CDataGatherer::extractData(time_)))); + static_cast(maths::CBasicStatistics::mean(CDataGatherer::extractData(time_)))); } std::sort(result.begin(), result.end(), maths::COrderings::SFirstLess()); @@ -1728,50 +1307,33 @@ void CEventRateBucketGatherer::bucketMeanTimesPerPerson(model_t::EFeature featur // so the best we can do is use the person and attribute // bucket mean. this->addInfluencerCounts(time, result); - for (std::size_t j = 0u; j < result.size(); ++j) - { - SEventRateFeatureData &data = result[j].second; - for (std::size_t k = 0u; k < data.s_InfluenceValues.size(); ++k) - { - for (std::size_t l = 0u; l < data.s_InfluenceValues[k].size(); ++l) - { - data.s_InfluenceValues[k][l].second.first = - TDouble1Vec{static_cast(data.s_Count)}; + for (std::size_t j = 0u; j < result.size(); ++j) { + SEventRateFeatureData& data = result[j].second; + for (std::size_t k = 0u; k < data.s_InfluenceValues.size(); ++k) { + for (std::size_t l = 0u; l < data.s_InfluenceValues[k].size(); ++l) { + data.s_InfluenceValues[k][l].second.first = TDouble1Vec{static_cast(data.s_Count)}; } } } - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to extract " - << model_t::print(model_t::E_DiurnalTimes) - << ": " << e.what()); - } + } catch (const std::exception& e) { LOG_ERROR("Failed to extract " << model_t::print(model_t::E_DiurnalTimes) << ": " << e.what()); } } void CEventRateBucketGatherer::bucketMeanTimesPerPersonAttribute(model_t::EFeature feature, core_t::TTime time, - TFeatureAnyPrVec &result_) const -{ + TFeatureAnyPrVec& result_) const { result_.emplace_back(feature, TSizeSizePrFeatureDataPrVec()); - auto &result = *boost::unsafe_any_cast(&result_.back().second); + auto& result = *boost::unsafe_any_cast(&result_.back().second); auto i = m_FeatureData.find(model_t::E_DiurnalTimes); - if (i == m_FeatureData.end()) - { + if (i == m_FeatureData.end()) { return; } - try - { - const auto &arrivalTimes = - boost::any_cast(i->second).get(time); + try { + const auto& arrivalTimes = boost::any_cast(i->second).get(time); result.reserve(arrivalTimes.size()); - for (const auto &time_ : arrivalTimes) - { - result.emplace_back(time_.first, - static_cast(maths::CBasicStatistics::mean( - CDataGatherer::extractData(time_)))); + for (const auto& time_ : arrivalTimes) { + result.emplace_back(time_.first, static_cast(maths::CBasicStatistics::mean(CDataGatherer::extractData(time_)))); } std::sort(result.begin(), result.end(), maths::COrderings::SFirstLess()); @@ -1779,80 +1341,59 @@ void CEventRateBucketGatherer::bucketMeanTimesPerPersonAttribute(model_t::EFeatu // so the best we can do is use the person and attribute // bucket mean. this->addInfluencerCounts(time, result); - for (std::size_t j = 0u; j < result.size(); ++j) - { - SEventRateFeatureData &data = result[j].second; - for (std::size_t k = 0u; k < data.s_InfluenceValues.size(); ++k) - { - for (std::size_t l = 0u; l < data.s_InfluenceValues[k].size(); ++l) - { - data.s_InfluenceValues[k][l].second.first = - TDouble1Vec{static_cast(data.s_Count)}; + for (std::size_t j = 0u; j < result.size(); ++j) { + SEventRateFeatureData& data = result[j].second; + for (std::size_t k = 0u; k < data.s_InfluenceValues.size(); ++k) { + for (std::size_t l = 0u; l < data.s_InfluenceValues[k].size(); ++l) { + data.s_InfluenceValues[k][l].second.first = TDouble1Vec{static_cast(data.s_Count)}; } } } - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to extract " - << model_t::print(model_t::E_DiurnalTimes) - << ": " << e.what()); - } + } catch (const std::exception& e) { LOG_ERROR("Failed to extract " << model_t::print(model_t::E_DiurnalTimes) << ": " << e.what()); } } -void CEventRateBucketGatherer::resize(std::size_t pid, std::size_t cid) -{ +void CEventRateBucketGatherer::resize(std::size_t pid, std::size_t cid) { apply(m_FeatureData, boost::bind(SResize(), _1, pid, cid)); } void CEventRateBucketGatherer::addValue(std::size_t pid, std::size_t cid, core_t::TTime time, - const CEventData::TDouble1VecArray &values, + const CEventData::TDouble1VecArray& values, std::size_t count, - const CEventData::TOptionalStr &stringValue, - const TStoredStringPtrVec &influences) -{ + const CEventData::TOptionalStr& stringValue, + const TStoredStringPtrVec& influences) { // Check that we are correctly sized - a person/attribute might have been added this->resize(pid, cid); - apply(m_FeatureData, boost::bind(SAddValue(), _1, pid, cid, - time, count, - boost::cref(values), - boost::cref(stringValue), - boost::cref(influences))); + apply( + m_FeatureData, + boost::bind(SAddValue(), _1, pid, cid, time, count, boost::cref(values), boost::cref(stringValue), boost::cref(influences))); } -void CEventRateBucketGatherer::startNewBucket(core_t::TTime time, bool /*skipUpdates*/) -{ +void CEventRateBucketGatherer::startNewBucket(core_t::TTime time, bool /*skipUpdates*/) { apply(m_FeatureData, boost::bind(SNewBucket(), _1, time)); } -void CEventRateBucketGatherer::initializeFieldNames(const std::string &personFieldName, - const std::string &attributeFieldName, - const std::string &valueFieldName, - const std::string &summaryCountFieldName, - const TStrVec &influenceFieldNames) -{ +void CEventRateBucketGatherer::initializeFieldNames(const std::string& personFieldName, + const std::string& attributeFieldName, + const std::string& valueFieldName, + const std::string& summaryCountFieldName, + const TStrVec& influenceFieldNames) { m_FieldNames.push_back(personFieldName); - if (m_DataGatherer.isPopulation()) - { + if (m_DataGatherer.isPopulation()) { m_FieldNames.push_back(attributeFieldName); } m_BeginInfluencingFields = m_FieldNames.size(); - m_FieldNames.insert(m_FieldNames.end(), - influenceFieldNames.begin(), - influenceFieldNames.end()); + m_FieldNames.insert(m_FieldNames.end(), influenceFieldNames.begin(), influenceFieldNames.end()); m_BeginValueField = m_FieldNames.size(); - if (!valueFieldName.empty()) - { + if (!valueFieldName.empty()) { m_FieldNames.push_back(valueFieldName); } m_BeginSummaryFields = m_FieldNames.size(); - switch (m_DataGatherer.summaryMode()) - { + switch (m_DataGatherer.summaryMode()) { case model_t::E_None: break; case model_t::E_Manual: @@ -1864,12 +1405,9 @@ void CEventRateBucketGatherer::initializeFieldNames(const std::string &personFie TStrVec(m_FieldNames).swap(m_FieldNames); } -void CEventRateBucketGatherer::initializeFeatureData() -{ - for (std::size_t i = 0u, n = m_DataGatherer.numberFeatures(); i < n; ++i) - { - switch (m_DataGatherer.feature(i)) - { +void CEventRateBucketGatherer::initializeFeatureData() { + for (std::size_t i = 0u, n = m_DataGatherer.numberFeatures(); i < n; ++i) { + switch (m_DataGatherer.feature(i)) { case model_t::E_IndividualCountByBucketAndPerson: case model_t::E_IndividualNonZeroCountByBucketAndPerson: case model_t::E_IndividualTotalBucketCountByPerson: @@ -1885,10 +1423,8 @@ void CEventRateBucketGatherer::initializeFeatureData() break; case model_t::E_IndividualTimeOfDayByBucketAndPerson: case model_t::E_IndividualTimeOfWeekByBucketAndPerson: - m_FeatureData[model_t::E_DiurnalTimes] = - TSizeSizePrMeanAccumulatorUMapQueue(m_DataGatherer.params().s_LatencyBuckets, - this->bucketLength(), - this->currentBucketStartTime()); + m_FeatureData[model_t::E_DiurnalTimes] = TSizeSizePrMeanAccumulatorUMapQueue( + m_DataGatherer.params().s_LatencyBuckets, this->bucketLength(), this->currentBucketStartTime()); break; case model_t::E_IndividualLowNonZeroCountByBucketAndPerson: @@ -1901,11 +1437,8 @@ void CEventRateBucketGatherer::initializeFeatureData() case model_t::E_IndividualInfoContentByBucketAndPerson: case model_t::E_IndividualHighInfoContentByBucketAndPerson: case model_t::E_IndividualLowInfoContentByBucketAndPerson: - m_FeatureData[model_t::E_UniqueValues] = - TSizeSizePrStrDataUMapQueue(m_DataGatherer.params().s_LatencyBuckets, - this->bucketLength(), - this->currentBucketStartTime(), - TSizeSizePrStrDataUMap(1)); + m_FeatureData[model_t::E_UniqueValues] = TSizeSizePrStrDataUMapQueue( + m_DataGatherer.params().s_LatencyBuckets, this->bucketLength(), this->currentBucketStartTime(), TSizeSizePrStrDataUMap(1)); break; case model_t::E_PopulationAttributeTotalCountByPerson: @@ -1924,18 +1457,13 @@ void CEventRateBucketGatherer::initializeFeatureData() case model_t::E_PopulationInfoContentByBucketPersonAndAttribute: case model_t::E_PopulationLowInfoContentByBucketPersonAndAttribute: case model_t::E_PopulationHighInfoContentByBucketPersonAndAttribute: - m_FeatureData[model_t::E_UniqueValues] = - TSizeSizePrStrDataUMapQueue(m_DataGatherer.params().s_LatencyBuckets, - this->bucketLength(), - this->currentBucketStartTime(), - TSizeSizePrStrDataUMap(1)); + m_FeatureData[model_t::E_UniqueValues] = TSizeSizePrStrDataUMapQueue( + m_DataGatherer.params().s_LatencyBuckets, this->bucketLength(), this->currentBucketStartTime(), TSizeSizePrStrDataUMap(1)); break; case model_t::E_PopulationTimeOfDayByBucketPersonAndAttribute: case model_t::E_PopulationTimeOfWeekByBucketPersonAndAttribute: - m_FeatureData[model_t::E_DiurnalTimes] = - TSizeSizePrMeanAccumulatorUMapQueue(m_DataGatherer.params().s_LatencyBuckets, - this->bucketLength(), - this->currentBucketStartTime()); + m_FeatureData[model_t::E_DiurnalTimes] = TSizeSizePrMeanAccumulatorUMapQueue( + m_DataGatherer.params().s_LatencyBuckets, this->bucketLength(), this->currentBucketStartTime()); break; case model_t::E_PeersAttributeTotalCountByPerson: @@ -1950,18 +1478,13 @@ void CEventRateBucketGatherer::initializeFeatureData() case model_t::E_PeersInfoContentByBucketPersonAndAttribute: case model_t::E_PeersLowInfoContentByBucketPersonAndAttribute: case model_t::E_PeersHighInfoContentByBucketPersonAndAttribute: - m_FeatureData[model_t::E_UniqueValues] = - TSizeSizePrStrDataUMapQueue(m_DataGatherer.params().s_LatencyBuckets, - this->bucketLength(), - this->currentBucketStartTime(), - TSizeSizePrStrDataUMap(1)); + m_FeatureData[model_t::E_UniqueValues] = TSizeSizePrStrDataUMapQueue( + m_DataGatherer.params().s_LatencyBuckets, this->bucketLength(), this->currentBucketStartTime(), TSizeSizePrStrDataUMap(1)); break; case model_t::E_PeersTimeOfDayByBucketPersonAndAttribute: case model_t::E_PeersTimeOfWeekByBucketPersonAndAttribute: - m_FeatureData[model_t::E_DiurnalTimes] = - TSizeSizePrMeanAccumulatorUMapQueue(m_DataGatherer.params().s_LatencyBuckets, - this->bucketLength(), - this->currentBucketStartTime()); + m_FeatureData[model_t::E_DiurnalTimes] = TSizeSizePrMeanAccumulatorUMapQueue( + m_DataGatherer.params().s_LatencyBuckets, this->bucketLength(), this->currentBucketStartTime()); break; CASE_INDIVIDUAL_METRIC: @@ -1973,233 +1496,174 @@ void CEventRateBucketGatherer::initializeFeatureData() } } -void CEventRateBucketGatherer::addInfluencerCounts(core_t::TTime time, - TSizeFeatureDataPrVec &result) const -{ - const TSizeSizePrStoredStringPtrPrUInt64UMapVec &influencers = this->influencerCounts(time); - if (influencers.empty()) - { +void CEventRateBucketGatherer::addInfluencerCounts(core_t::TTime time, TSizeFeatureDataPrVec& result) const { + const TSizeSizePrStoredStringPtrPrUInt64UMapVec& influencers = this->influencerCounts(time); + if (influencers.empty()) { return; } - for (std::size_t i = 0u; i < result.size(); ++i) - { + for (std::size_t i = 0u; i < result.size(); ++i) { result[i].second.s_InfluenceValues.resize(influencers.size()); } - for (std::size_t i = 0u; i < influencers.size(); ++i) - { - for (const auto &influence : influencers[i]) - { + for (std::size_t i = 0u; i < influencers.size(); ++i) { + for (const auto& influence : influencers[i]) { std::size_t pid = CDataGatherer::extractPersonId(influence.first); - auto k = std::lower_bound(result.begin(), result.end(), - pid, maths::COrderings::SFirstLess()); - if (k == result.end() || k->first != pid) - { + auto k = std::lower_bound(result.begin(), result.end(), pid, maths::COrderings::SFirstLess()); + if (k == result.end() || k->first != pid) { LOG_ERROR("Missing feature data for person " << m_DataGatherer.personName(pid)); continue; } - k->second.s_InfluenceValues[i].emplace_back( - TStrCRef(*CDataGatherer::extractData(influence.first)), - TDouble1VecDoublePr(TDouble1Vec{static_cast(influence.second)}, 1.0)); + k->second.s_InfluenceValues[i].emplace_back(TStrCRef(*CDataGatherer::extractData(influence.first)), + TDouble1VecDoublePr(TDouble1Vec{static_cast(influence.second)}, 1.0)); } } } -void CEventRateBucketGatherer::addInfluencerCounts(core_t::TTime time, - TSizeSizePrFeatureDataPrVec &result) const -{ - const TSizeSizePrStoredStringPtrPrUInt64UMapVec &influencers = this->influencerCounts(time); - if (influencers.empty()) - { +void CEventRateBucketGatherer::addInfluencerCounts(core_t::TTime time, TSizeSizePrFeatureDataPrVec& result) const { + const TSizeSizePrStoredStringPtrPrUInt64UMapVec& influencers = this->influencerCounts(time); + if (influencers.empty()) { return; } - for (std::size_t i = 0u; i < result.size(); ++i) - { + for (std::size_t i = 0u; i < result.size(); ++i) { result[i].second.s_InfluenceValues.resize(influencers.size()); } - for (std::size_t i = 0u; i < influencers.size(); ++i) - { - for (const auto &influence : influencers[i]) - { - auto k = std::lower_bound(result.begin(), result.end(), - influence.first.first, - maths::COrderings::SFirstLess()); - if (k == result.end() || k->first != influence.first.first) - { + for (std::size_t i = 0u; i < influencers.size(); ++i) { + for (const auto& influence : influencers[i]) { + auto k = std::lower_bound(result.begin(), result.end(), influence.first.first, maths::COrderings::SFirstLess()); + if (k == result.end() || k->first != influence.first.first) { std::size_t pid = CDataGatherer::extractPersonId(influence.first); std::size_t cid = CDataGatherer::extractAttributeId(influence.first); - LOG_ERROR("Missing feature data for person " << m_DataGatherer.personName(pid) - << " and attribute " << m_DataGatherer.attributeName(cid)); + LOG_ERROR("Missing feature data for person " << m_DataGatherer.personName(pid) << " and attribute " + << m_DataGatherer.attributeName(cid)); continue; } - k->second.s_InfluenceValues[i].emplace_back( - TStrCRef(*CDataGatherer::extractData(influence.first)), - TDouble1VecDoublePr(TDouble1Vec{static_cast(influence.second)}, 1.0)); + k->second.s_InfluenceValues[i].emplace_back(TStrCRef(*CDataGatherer::extractData(influence.first)), + TDouble1VecDoublePr(TDouble1Vec{static_cast(influence.second)}, 1.0)); } } } ////// CUniqueStringFeatureData ////// -void CUniqueStringFeatureData::insert(const std::string &value, const TStoredStringPtrVec &influences) -{ +void CUniqueStringFeatureData::insert(const std::string& value, const TStoredStringPtrVec& influences) { TWord valueHash = m_Dictionary1.word(value); m_UniqueStrings.emplace(valueHash, value); - if (influences.size() > m_InfluencerUniqueStrings.size()) - { + if (influences.size() > m_InfluencerUniqueStrings.size()) { m_InfluencerUniqueStrings.resize(influences.size()); } - for (std::size_t i = 0; i < influences.size(); ++i) - { + for (std::size_t i = 0; i < influences.size(); ++i) { // The influence strings are optional. - if (influences[i]) - { + if (influences[i]) { m_InfluencerUniqueStrings[i][influences[i]].insert(valueHash); } } } -void CUniqueStringFeatureData::populateDistinctCountFeatureData(SEventRateFeatureData &featureData) const -{ +void CUniqueStringFeatureData::populateDistinctCountFeatureData(SEventRateFeatureData& featureData) const { featureData.s_Count = m_UniqueStrings.size(); featureData.s_InfluenceValues.clear(); featureData.s_InfluenceValues.resize(m_InfluencerUniqueStrings.size()); - for (std::size_t i = 0u; i < m_InfluencerUniqueStrings.size(); ++i) - { - TStrCRefDouble1VecDoublePrPrVec &data = featureData.s_InfluenceValues[i]; + for (std::size_t i = 0u; i < m_InfluencerUniqueStrings.size(); ++i) { + TStrCRefDouble1VecDoublePrPrVec& data = featureData.s_InfluenceValues[i]; data.reserve(m_InfluencerUniqueStrings[i].size()); - for (const auto &influence : m_InfluencerUniqueStrings[i]) - { + for (const auto& influence : m_InfluencerUniqueStrings[i]) { data.emplace_back(TStrCRef(*influence.first), - TDouble1VecDoublePr( - TDouble1Vec{static_cast(influence.second.size())}, 1.0)); + TDouble1VecDoublePr(TDouble1Vec{static_cast(influence.second.size())}, 1.0)); } } } -void CUniqueStringFeatureData::populateInfoContentFeatureData(SEventRateFeatureData &featureData) const -{ +void CUniqueStringFeatureData::populateInfoContentFeatureData(SEventRateFeatureData& featureData) const { using TStrCRefVec = std::vector; featureData.s_InfluenceValues.clear(); core::CCompressUtils compressor(true); - try - { + try { TStrCRefVec strings; strings.reserve(m_UniqueStrings.size()); - for (const auto &string : m_UniqueStrings) - { + for (const auto& string : m_UniqueStrings) { strings.emplace_back(string.second); } std::sort(strings.begin(), strings.end(), maths::COrderings::SLess()); - std::for_each(strings.begin(), strings.end(), - [&compressor](const std::string &string) { compressor.addString(string); }); + std::for_each(strings.begin(), strings.end(), [&compressor](const std::string& string) { compressor.addString(string); }); std::size_t length = 0u; - if (compressor.compressedLength(true, length) == false) - { + if (compressor.compressedLength(true, length) == false) { LOG_ERROR("Failed to get compressed length"); compressor.reset(); } featureData.s_Count = length; featureData.s_InfluenceValues.reserve(m_InfluencerUniqueStrings.size()); - for (std::size_t i = 0u; i < m_InfluencerUniqueStrings.size(); ++i) - { + for (std::size_t i = 0u; i < m_InfluencerUniqueStrings.size(); ++i) { featureData.s_InfluenceValues.push_back(TStrCRefDouble1VecDoublePrPrVec()); - TStrCRefDouble1VecDoublePrPrVec &data = featureData.s_InfluenceValues.back(); - for (const auto &influence : m_InfluencerUniqueStrings[i]) - { + TStrCRefDouble1VecDoublePrPrVec& data = featureData.s_InfluenceValues.back(); + for (const auto& influence : m_InfluencerUniqueStrings[i]) { strings.clear(); strings.reserve(influence.second.size()); - for (const auto &word : influence.second) - { + for (const auto& word : influence.second) { strings.emplace_back(m_UniqueStrings.at(word)); } std::sort(strings.begin(), strings.end(), maths::COrderings::SLess()); - std::for_each(strings.begin(), strings.end(), - [&compressor](const std::string &string) { compressor.addString(string); }); + std::for_each(strings.begin(), strings.end(), [&compressor](const std::string& string) { compressor.addString(string); }); length = 0u; - if (compressor.compressedLength(true, length) == false) - { + if (compressor.compressedLength(true, length) == false) { LOG_ERROR("Failed to get compressed length"); compressor.reset(); } - data.emplace_back(TStrCRef(*influence.first), - TDouble1VecDoublePr( - TDouble1Vec{static_cast(length)}, 1.0)); + data.emplace_back(TStrCRef(*influence.first), TDouble1VecDoublePr(TDouble1Vec{static_cast(length)}, 1.0)); } } - } - catch (const std::exception &e) - { - LOG_ERROR("Failed to get info content: " << e.what()); - } + } catch (const std::exception& e) { LOG_ERROR("Failed to get info content: " << e.what()); } } -void CUniqueStringFeatureData::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ - inserter.insertLevel(UNIQUE_STRINGS_TAG, - boost::bind(&persistUniqueStrings, - boost::cref(m_UniqueStrings), _1)); - for (std::size_t i = 0u; i < m_InfluencerUniqueStrings.size(); ++i) - { +void CUniqueStringFeatureData::acceptPersistInserter(core::CStatePersistInserter& inserter) const { + inserter.insertLevel(UNIQUE_STRINGS_TAG, boost::bind(&persistUniqueStrings, boost::cref(m_UniqueStrings), _1)); + for (std::size_t i = 0u; i < m_InfluencerUniqueStrings.size(); ++i) { inserter.insertLevel(INFLUENCER_UNIQUE_STRINGS_TAG, - boost::bind(&persistInfluencerUniqueStrings, - boost::cref(m_InfluencerUniqueStrings[i]), _1)); + boost::bind(&persistInfluencerUniqueStrings, boost::cref(m_InfluencerUniqueStrings[i]), _1)); } } -bool CUniqueStringFeatureData::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name = traverser.name(); - RESTORE(UNIQUE_STRINGS_TAG, - traverser.traverseSubLevel(boost::bind(&restoreUniqueStrings, - _1, boost::ref(m_UniqueStrings)))) - RESTORE_SETUP_TEARDOWN(INFLUENCER_UNIQUE_STRINGS_TAG, - m_InfluencerUniqueStrings.push_back(TStoredStringPtrWordSetUMap()), - traverser.traverseSubLevel(boost::bind(&restoreInfluencerUniqueStrings, - _1, boost::ref(m_InfluencerUniqueStrings.back()))), - /**/) - } - while (traverser.next()); +bool CUniqueStringFeatureData::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); + RESTORE(UNIQUE_STRINGS_TAG, traverser.traverseSubLevel(boost::bind(&restoreUniqueStrings, _1, boost::ref(m_UniqueStrings)))) + RESTORE_SETUP_TEARDOWN( + INFLUENCER_UNIQUE_STRINGS_TAG, + m_InfluencerUniqueStrings.push_back(TStoredStringPtrWordSetUMap()), + traverser.traverseSubLevel(boost::bind(&restoreInfluencerUniqueStrings, _1, boost::ref(m_InfluencerUniqueStrings.back()))), + /**/) + } while (traverser.next()); return true; } -uint64_t CUniqueStringFeatureData::checksum() const -{ +uint64_t CUniqueStringFeatureData::checksum() const { uint64_t seed = maths::CChecksum::calculate(0, m_UniqueStrings); return maths::CChecksum::calculate(seed, m_InfluencerUniqueStrings); } -void CUniqueStringFeatureData::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CUniqueStringFeatureData::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CUniqueStringFeatureData", sizeof(*this)); core::CMemoryDebug::dynamicSize("s_NoInfluenceUniqueStrings", m_UniqueStrings, mem); core::CMemoryDebug::dynamicSize("s_InfluenceUniqueStrings", m_InfluencerUniqueStrings, mem); } -std::size_t CUniqueStringFeatureData::memoryUsage() const -{ +std::size_t CUniqueStringFeatureData::memoryUsage() const { std::size_t mem = sizeof(*this); mem += core::CMemory::dynamicSize(m_UniqueStrings); mem += core::CMemory::dynamicSize(m_InfluencerUniqueStrings); return mem; } -std::string CUniqueStringFeatureData::print() const -{ - return "(" + core::CContainerPrinter::print(m_UniqueStrings) + ", " + - core::CContainerPrinter::print(m_InfluencerUniqueStrings) + ")"; +std::string CUniqueStringFeatureData::print() const { + return "(" + core::CContainerPrinter::print(m_UniqueStrings) + ", " + core::CContainerPrinter::print(m_InfluencerUniqueStrings) + ")"; } - } } diff --git a/lib/model/CEventRateModel.cc b/lib/model/CEventRateModel.cc index 00b5353e5b..cb11969726 100644 --- a/lib/model/CEventRateModel.cc +++ b/lib/model/CEventRateModel.cc @@ -9,10 +9,10 @@ #include #include #include -#include #include #include #include +#include #include #include @@ -27,9 +27,9 @@ #include #include #include +#include #include #include -#include #include #include @@ -43,13 +43,10 @@ #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { -namespace -{ +namespace { using TDouble2Vec = core::CSmallVector; using TDouble2Vec1Vec = core::CSmallVector; @@ -61,128 +58,103 @@ using TTime2Vec = core::CSmallVector; const std::string INDIVIDUAL_STATE_TAG("a"); const std::string PROBABILITY_PRIOR_TAG("b"); -const maths_t::TWeightStyleVec SAMPLE_WEIGHT_STYLES{maths_t::E_SampleCountWeight, - maths_t::E_SampleWinsorisationWeight}; +const maths_t::TWeightStyleVec SAMPLE_WEIGHT_STYLES{maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight}; const maths_t::TWeightStyleVec PROBABILITY_WEIGHT_STYLES(1, maths_t::E_SampleSeasonalVarianceScaleWeight); - } -CEventRateModel::CEventRateModel(const SModelParams ¶ms, - const TDataGathererPtr &dataGatherer, - const TFeatureMathsModelPtrPrVec &newFeatureModels, - const TFeatureMultivariatePriorPtrPrVec &newFeatureCorrelateModelPriors, - const TFeatureCorrelationsPtrPrVec &featureCorrelatesModels, - const maths::CMultinomialConjugate &probabilityPrior, - const TFeatureInfluenceCalculatorCPtrPrVecVec &influenceCalculators) : - CIndividualModel(params, dataGatherer, - newFeatureModels, - newFeatureCorrelateModelPriors, - featureCorrelatesModels, - influenceCalculators), - m_CurrentBucketStats(CAnomalyDetectorModel::TIME_UNSET), - m_ProbabilityPrior(probabilityPrior) -{} - -CEventRateModel::CEventRateModel(const SModelParams ¶ms, - const TDataGathererPtr &dataGatherer, - const TFeatureMathsModelPtrPrVec &newFeatureModels, - const TFeatureMultivariatePriorPtrPrVec &newFeatureCorrelateModelPriors, - const TFeatureCorrelationsPtrPrVec &featureCorrelatesModels, - const TFeatureInfluenceCalculatorCPtrPrVecVec &influenceCalculators, - core::CStateRestoreTraverser &traverser) : - CIndividualModel(params, dataGatherer, - newFeatureModels, - newFeatureCorrelateModelPriors, - featureCorrelatesModels, - influenceCalculators), - m_CurrentBucketStats(CAnomalyDetectorModel::TIME_UNSET) -{ +CEventRateModel::CEventRateModel(const SModelParams& params, + const TDataGathererPtr& dataGatherer, + const TFeatureMathsModelPtrPrVec& newFeatureModels, + const TFeatureMultivariatePriorPtrPrVec& newFeatureCorrelateModelPriors, + const TFeatureCorrelationsPtrPrVec& featureCorrelatesModels, + const maths::CMultinomialConjugate& probabilityPrior, + const TFeatureInfluenceCalculatorCPtrPrVecVec& influenceCalculators) + : CIndividualModel(params, + dataGatherer, + newFeatureModels, + newFeatureCorrelateModelPriors, + featureCorrelatesModels, + influenceCalculators), + m_CurrentBucketStats(CAnomalyDetectorModel::TIME_UNSET), + m_ProbabilityPrior(probabilityPrior) { +} + +CEventRateModel::CEventRateModel(const SModelParams& params, + const TDataGathererPtr& dataGatherer, + const TFeatureMathsModelPtrPrVec& newFeatureModels, + const TFeatureMultivariatePriorPtrPrVec& newFeatureCorrelateModelPriors, + const TFeatureCorrelationsPtrPrVec& featureCorrelatesModels, + const TFeatureInfluenceCalculatorCPtrPrVecVec& influenceCalculators, + core::CStateRestoreTraverser& traverser) + : CIndividualModel(params, + dataGatherer, + newFeatureModels, + newFeatureCorrelateModelPriors, + featureCorrelatesModels, + influenceCalculators), + m_CurrentBucketStats(CAnomalyDetectorModel::TIME_UNSET) { traverser.traverseSubLevel(boost::bind(&CEventRateModel::acceptRestoreTraverser, this, _1)); } -CEventRateModel::CEventRateModel(bool isForPersistence, const CEventRateModel &other) : - CIndividualModel(isForPersistence, other), - m_CurrentBucketStats(0), // Not needed for persistence so minimally constructed - m_ProbabilityPrior(other.m_ProbabilityPrior) -{ - if (!isForPersistence) - { +CEventRateModel::CEventRateModel(bool isForPersistence, const CEventRateModel& other) + : CIndividualModel(isForPersistence, other), + m_CurrentBucketStats(0), // Not needed for persistence so minimally constructed + m_ProbabilityPrior(other.m_ProbabilityPrior) { + if (!isForPersistence) { LOG_ABORT("This constructor only creates clones for persistence"); } } -void CEventRateModel::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CEventRateModel::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertLevel(INDIVIDUAL_STATE_TAG, boost::bind(&CEventRateModel::doAcceptPersistInserter, this, _1)); - inserter.insertLevel(PROBABILITY_PRIOR_TAG, - boost::bind(&maths::CMultinomialConjugate::acceptPersistInserter, - &m_ProbabilityPrior, _1)); -} - -bool CEventRateModel::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name = traverser.name(); - if (name == INDIVIDUAL_STATE_TAG) - { - if (traverser.traverseSubLevel(boost::bind(&CEventRateModel::doAcceptRestoreTraverser, - this, _1)) == false) - { + inserter.insertLevel(PROBABILITY_PRIOR_TAG, boost::bind(&maths::CMultinomialConjugate::acceptPersistInserter, &m_ProbabilityPrior, _1)); +} + +bool CEventRateModel::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); + if (name == INDIVIDUAL_STATE_TAG) { + if (traverser.traverseSubLevel(boost::bind(&CEventRateModel::doAcceptRestoreTraverser, this, _1)) == false) { // Logging handled already. return false; } - } - else if (name == PROBABILITY_PRIOR_TAG) - { - maths::CMultinomialConjugate prior( - this->params().distributionRestoreParams(maths_t::E_DiscreteData), traverser); + } else if (name == PROBABILITY_PRIOR_TAG) { + maths::CMultinomialConjugate prior(this->params().distributionRestoreParams(maths_t::E_DiscreteData), traverser); m_ProbabilityPrior.swap(prior); } - } - while (traverser.next()); + } while (traverser.next()); return true; } -CAnomalyDetectorModel *CEventRateModel::cloneForPersistence() const -{ +CAnomalyDetectorModel* CEventRateModel::cloneForPersistence() const { return new CEventRateModel(true, *this); } -model_t::EModelType CEventRateModel::category() const -{ +model_t::EModelType CEventRateModel::category() const { return model_t::E_EventRateOnline; } -bool CEventRateModel::isEventRate() const -{ +bool CEventRateModel::isEventRate() const { return true; } -bool CEventRateModel::isMetric() const -{ +bool CEventRateModel::isMetric() const { return false; } -void CEventRateModel::currentBucketPersonIds(core_t::TTime time, TSizeVec &result) const -{ +void CEventRateModel::currentBucketPersonIds(core_t::TTime time, TSizeVec& result) const { this->CIndividualModel::currentBucketPersonIds(time, m_CurrentBucketStats.s_FeatureData, result); } -CEventRateModel::TOptionalDouble CEventRateModel::baselineBucketCount(std::size_t /*pid*/) const -{ +CEventRateModel::TOptionalDouble CEventRateModel::baselineBucketCount(std::size_t /*pid*/) const { return TOptionalDouble(); } -CEventRateModel::TDouble1Vec CEventRateModel::currentBucketValue(model_t::EFeature feature, - std::size_t pid, - std::size_t /*cid*/, - core_t::TTime time) const -{ - const TFeatureData *data = this->featureData(feature, pid, time); - if (data) - { +CEventRateModel::TDouble1Vec +CEventRateModel::currentBucketValue(model_t::EFeature feature, std::size_t pid, std::size_t /*cid*/, core_t::TTime time) const { + const TFeatureData* data = this->featureData(feature, pid, time); + if (data) { return TDouble1Vec(1, static_cast(data->s_Count)); } return TDouble1Vec(); @@ -192,79 +164,61 @@ CEventRateModel::TDouble1Vec CEventRateModel::baselineBucketMean(model_t::EFeatu std::size_t pid, std::size_t cid, model_t::CResultType type, - const TSizeDoublePr1Vec &correlated, - core_t::TTime time) const -{ - const maths::CModel *model{this->model(feature, pid)}; - if (!model) - { + const TSizeDoublePr1Vec& correlated, + core_t::TTime time) const { + const maths::CModel* model{this->model(feature, pid)}; + if (!model) { return TDouble1Vec(); } static const TSizeDoublePr1Vec NO_CORRELATED; TDouble2Vec hint; - if (model_t::isDiurnal(feature)) - { + if (model_t::isDiurnal(feature)) { hint = this->currentBucketValue(feature, pid, cid, time); } TDouble1Vec result(model->predict(time, type.isUnconditional() ? NO_CORRELATED : correlated, hint)); double probability = 1.0; - if (model_t::isConstant(feature) && !m_Probabilities.lookup(pid, probability)) - { + if (model_t::isConstant(feature) && !m_Probabilities.lookup(pid, probability)) { probability = 1.0; } - for (auto &coord : result) - { + for (auto& coord : result) { coord = probability * model_t::inverseOffsetCountToZero(feature, coord); } - this->correctBaselineForInterim(feature, pid, type, correlated, - this->currentBucketInterimCorrections(), result); + this->correctBaselineForInterim(feature, pid, type, correlated, this->currentBucketInterimCorrections(), result); TDouble1VecDouble1VecPr support{model_t::support(feature)}; return maths::CTools::truncate(result, support.first, support.second); } -void CEventRateModel::sampleBucketStatistics(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor &resourceMonitor) -{ +void CEventRateModel::sampleBucketStatistics(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) { this->createUpdateNewModels(startTime, resourceMonitor); this->currentBucketInterimCorrections().clear(); - this->CIndividualModel::sampleBucketStatistics(startTime, endTime, - this->personFilter(), - m_CurrentBucketStats.s_FeatureData, - resourceMonitor); + this->CIndividualModel::sampleBucketStatistics( + startTime, endTime, this->personFilter(), m_CurrentBucketStats.s_FeatureData, resourceMonitor); } -void CEventRateModel::sample(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor &resourceMonitor) -{ - CDataGatherer &gatherer = this->dataGatherer(); +void CEventRateModel::sample(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) { + CDataGatherer& gatherer = this->dataGatherer(); core_t::TTime bucketLength = gatherer.bucketLength(); - if (!gatherer.validateSampleTimes(startTime, endTime)) - { + if (!gatherer.validateSampleTimes(startTime, endTime)) { return; } this->createUpdateNewModels(startTime, resourceMonitor); this->currentBucketInterimCorrections().clear(); - for (core_t::TTime time = startTime; time < endTime; time += bucketLength) - { + for (core_t::TTime time = startTime; time < endTime; time += bucketLength) { LOG_TRACE("Sampling [" << time << "," << time + bucketLength << ")"); gatherer.sampleNow(time); gatherer.featureData(time, bucketLength, m_CurrentBucketStats.s_FeatureData); - const CIndividualModel::TTimeVec &preSampleLastBucketTimes = this->lastBucketTimes(); + const CIndividualModel::TTimeVec& preSampleLastBucketTimes = this->lastBucketTimes(); CIndividualModel::TSizeTimeUMap lastBucketTimesMap; - for (const auto &featureData : m_CurrentBucketStats.s_FeatureData) - { - for (const auto &data : featureData.second) - { + for (const auto& featureData : m_CurrentBucketStats.s_FeatureData) { + for (const auto& data : featureData.second) { std::size_t pid = data.first; lastBucketTimesMap[pid] = preSampleLastBucketTimes[pid]; } @@ -276,59 +230,49 @@ void CEventRateModel::sample(core_t::TTime startTime, maths::CModel::TTimeDouble2VecSizeTrVec values; maths::CModelAddSamplesParams::TDouble2Vec4VecVec weights(1); - for (auto &featureData : m_CurrentBucketStats.s_FeatureData) - { + for (auto& featureData : m_CurrentBucketStats.s_FeatureData) { model_t::EFeature feature = featureData.first; - TSizeFeatureDataPrVec &data = featureData.second; + TSizeFeatureDataPrVec& data = featureData.second; std::size_t dimension = model_t::dimension(feature); LOG_TRACE(model_t::print(feature) << ": " << core::CContainerPrinter::print(data)); - if (feature == model_t::E_IndividualTotalBucketCountByPerson) - { - for (const auto &data_ : data) - { - if (data_.second.s_Count > 0) - { + if (feature == model_t::E_IndividualTotalBucketCountByPerson) { + for (const auto& data_ : data) { + if (data_.second.s_Count > 0) { LOG_TRACE("person = " << this->personName(data_.first)); m_ProbabilityPrior.addSamples(maths::CConstantWeights::COUNT, TDouble1Vec{static_cast(data_.first)}, maths::CConstantWeights::SINGLE_UNIT); } } - if (!data.empty()) - { + if (!data.empty()) { m_ProbabilityPrior.propagateForwardsByTime(1.0); } continue; } - if (model_t::isCategorical(feature)) - { + if (model_t::isCategorical(feature)) { continue; } this->applyFilter(model_t::E_XF_By, true, this->personFilter(), data); - for (const auto &data_ : data) - { + for (const auto& data_ : data) { std::size_t pid = data_.first; - maths::CModel *model = this->model(feature, pid); - if (!model) - { + maths::CModel* model = this->model(feature, pid); + if (!model) { LOG_ERROR("Missing model for " << this->personName(pid)); continue; } core_t::TTime sampleTime = model_t::sampleTime(feature, time, bucketLength); - if (this->shouldIgnoreSample(feature, pid, model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID, sampleTime)) - { + if (this->shouldIgnoreSample(feature, pid, model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID, sampleTime)) { model->skipTime(sampleTime - lastBucketTimesMap[pid]); continue; } double emptyBucketWeight = this->emptyBucketWeight(feature, pid, time); - if (emptyBucketWeight == 0.0) - { + if (emptyBucketWeight == 0.0) { continue; } @@ -336,13 +280,9 @@ void CEventRateModel::sample(core_t::TTime startTime, double derate = this->derate(pid, sampleTime); double interval = (1.0 + (this->params().s_InitialDecayRateMultiplier - 1.0) * derate) * emptyBucketWeight; - LOG_TRACE("Bucket = " << this->printCurrentBucket() - << ", feature = " << model_t::print(feature) - << ", count = " << count - << ", person = " << this->personName(pid) - << ", empty bucket weight = " << emptyBucketWeight - << ", derate = " << derate - << ", interval = " << interval); + LOG_TRACE("Bucket = " << this->printCurrentBucket() << ", feature = " << model_t::print(feature) << ", count = " << count + << ", person = " << this->personName(pid) << ", empty bucket weight = " << emptyBucketWeight + << ", derate = " << derate << ", interval = " << interval); model->params().probabilityBucketEmpty(this->probabilityBucketEmpty(feature, pid)); @@ -354,14 +294,13 @@ void CEventRateModel::sample(core_t::TTime startTime, maths::CModelAddSamplesParams params; params.integer(true) - .nonNegative(true) - .propagationInterval(interval) - .weightStyles(SAMPLE_WEIGHT_STYLES) - .trendWeights(weights) - .priorWeights(weights); - - if (model->addSamples(params, values) == maths::CModel::E_Reset) - { + .nonNegative(true) + .propagationInterval(interval) + .weightStyles(SAMPLE_WEIGHT_STYLES) + .trendWeights(weights) + .priorWeights(weights); + + if (model->addSamples(params, values) == maths::CModel::E_Reset) { gatherer.resetSampleCount(pid); } } @@ -375,21 +314,18 @@ void CEventRateModel::sample(core_t::TTime startTime, bool CEventRateModel::computeProbability(std::size_t pid, core_t::TTime startTime, core_t::TTime endTime, - CPartitioningFields &partitioningFields, + CPartitioningFields& partitioningFields, std::size_t /*numberAttributeProbabilities*/, - SAnnotatedProbability &result) const -{ - const CDataGatherer &gatherer = this->dataGatherer(); + SAnnotatedProbability& result) const { + const CDataGatherer& gatherer = this->dataGatherer(); core_t::TTime bucketLength = gatherer.bucketLength(); - if (endTime != startTime + bucketLength) - { + if (endTime != startTime + bucketLength) { LOG_ERROR("Can only compute probability for single bucket"); return false; } - if (pid >= this->firstBucketTimes().size()) - { + if (pid >= this->firstBucketTimes().size()) { // This is not necessarily an error: the person might have been added // only in an out of phase bucket so far LOG_TRACE("No first time for person = " << gatherer.personName(pid)); @@ -410,23 +346,20 @@ bool CEventRateModel::computeProbability(std::size_t pid, bool addPersonProbability = false; - for (std::size_t i = 0u, n = gatherer.numberFeatures(); i < n; ++i) - { + for (std::size_t i = 0u, n = gatherer.numberFeatures(); i < n; ++i) { model_t::EFeature feature = gatherer.feature(i); - if (model_t::isCategorical(feature)) - { + if (model_t::isCategorical(feature)) { continue; } - const TFeatureData *data = this->featureData(feature, pid, startTime); - if (!data) - { + const TFeatureData* data = this->featureData(feature, pid, startTime); + if (!data) { continue; } if (this->shouldIgnoreResult(feature, result.s_ResultType, - pid, model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID, - model_t::sampleTime(feature, startTime, bucketLength))) - { + pid, + model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID, + model_t::sampleTime(feature, startTime, bucketLength))) { continue; } @@ -434,15 +367,12 @@ bool CEventRateModel::computeProbability(std::size_t pid, LOG_TRACE("Compute probability for " << data->print()); - if (this->correlates(feature, pid, startTime)) - { + if (this->correlates(feature, pid, startTime)) { CProbabilityAndInfluenceCalculator::SCorrelateParams params(partitioningFields); TStrCRefDouble1VecDouble1VecPrPrVecVecVec influenceValues; this->fill(feature, pid, startTime, result.isInterim(), params, influenceValues); this->addProbabilityAndInfluences(pid, params, influenceValues, pFeatures, resultBuilder); - } - else - { + } else { CProbabilityAndInfluenceCalculator::SParams params(partitioningFields); this->fill(feature, pid, startTime, result.isInterim(), params); this->addProbabilityAndInfluences(pid, params, data->s_InfluenceValues, pFeatures, resultBuilder); @@ -452,25 +382,21 @@ bool CEventRateModel::computeProbability(std::size_t pid, TOptionalUInt64 count = this->currentBucketCount(pid, startTime); pJoint.add(pFeatures); - if (addPersonProbability && count && *count != 0) - { + if (addPersonProbability && count && *count != 0) { double p; - if (m_Probabilities.lookup(pid, p)) - { + if (m_Probabilities.lookup(pid, p)) { LOG_TRACE("P(" << gatherer.personName(pid) << ") = " << p); pJoint.addProbability(p); } } - if (pJoint.empty()) - { + if (pJoint.empty()) { LOG_TRACE("No samples in [" << startTime << "," << endTime << ")"); return false; } double p; - if (!pJoint.calculate(p, result.s_Influences)) - { + if (!pJoint.calculate(p, result.s_Influences)) { LOG_ERROR("Failed to compute probability"); return false; } @@ -484,27 +410,22 @@ bool CEventRateModel::computeProbability(std::size_t pid, return true; } -uint64_t CEventRateModel::checksum(bool includeCurrentBucketStats) const -{ +uint64_t CEventRateModel::checksum(bool includeCurrentBucketStats) const { using TStrCRefUInt64Map = std::map; uint64_t seed = this->CIndividualModel::checksum(includeCurrentBucketStats); TStrCRefUInt64Map hashes; - const TDoubleVec &categories = m_ProbabilityPrior.categories(); - const TDoubleVec &concentrations = m_ProbabilityPrior.concentrations(); - for (std::size_t i = 0u; i < categories.size(); ++i) - { - uint64_t &hash = hashes[boost::cref(this->personName(static_cast(categories[i])))]; + const TDoubleVec& categories = m_ProbabilityPrior.categories(); + const TDoubleVec& concentrations = m_ProbabilityPrior.concentrations(); + for (std::size_t i = 0u; i < categories.size(); ++i) { + uint64_t& hash = hashes[boost::cref(this->personName(static_cast(categories[i])))]; hash = maths::CChecksum::calculate(hash, concentrations[i]); } - if (includeCurrentBucketStats) - { - for (const auto &featureData_ : m_CurrentBucketStats.s_FeatureData) - { - for (const auto &data : featureData_.second) - { - uint64_t &hash = hashes[boost::cref(this->personName(data.first))]; + if (includeCurrentBucketStats) { + for (const auto& featureData_ : m_CurrentBucketStats.s_FeatureData) { + for (const auto& data : featureData_.second) { + uint64_t& hash = hashes[boost::cref(this->personName(data.first))]; hash = maths::CChecksum::calculate(hash, data.second.s_Count); } } @@ -516,32 +437,25 @@ uint64_t CEventRateModel::checksum(bool includeCurrentBucketStats) const return maths::CChecksum::calculate(seed, hashes); } -void CEventRateModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CEventRateModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CEventRateModel"); this->CIndividualModel::debugMemoryUsage(mem->addChild()); - core::CMemoryDebug::dynamicSize("m_CurrentBucketStats.s_PersonCounts", - m_CurrentBucketStats.s_PersonCounts, mem); - core::CMemoryDebug::dynamicSize("m_CurrentBucketStats.s_FeatureData", - m_CurrentBucketStats.s_FeatureData, mem); - core::CMemoryDebug::dynamicSize("m_CurrentBucketStats.s_InterimCorrections", - m_CurrentBucketStats.s_InterimCorrections, mem); + core::CMemoryDebug::dynamicSize("m_CurrentBucketStats.s_PersonCounts", m_CurrentBucketStats.s_PersonCounts, mem); + core::CMemoryDebug::dynamicSize("m_CurrentBucketStats.s_FeatureData", m_CurrentBucketStats.s_FeatureData, mem); + core::CMemoryDebug::dynamicSize("m_CurrentBucketStats.s_InterimCorrections", m_CurrentBucketStats.s_InterimCorrections, mem); core::CMemoryDebug::dynamicSize("s_Probabilities", m_Probabilities, mem); core::CMemoryDebug::dynamicSize("m_ProbabilityPrior", m_ProbabilityPrior, mem); } -std::size_t CEventRateModel::memoryUsage() const -{ +std::size_t CEventRateModel::memoryUsage() const { return this->CIndividualModel::memoryUsage(); } -std::size_t CEventRateModel::staticSize() const -{ +std::size_t CEventRateModel::staticSize() const { return sizeof(*this); } -std::size_t CEventRateModel::computeMemoryUsage() const -{ +std::size_t CEventRateModel::computeMemoryUsage() const { std::size_t mem = this->CIndividualModel::computeMemoryUsage(); mem += core::CMemory::dynamicSize(m_CurrentBucketStats.s_PersonCounts); mem += core::CMemory::dynamicSize(m_CurrentBucketStats.s_FeatureData); @@ -551,82 +465,62 @@ std::size_t CEventRateModel::computeMemoryUsage() const return mem; } -CEventRateModel::CModelDetailsViewPtr CEventRateModel::details() const -{ +CEventRateModel::CModelDetailsViewPtr CEventRateModel::details() const { return CModelDetailsViewPtr(new CEventRateModelDetailsView(*this)); } -const CEventRateModel::TFeatureData *CEventRateModel::featureData(model_t::EFeature feature, - std::size_t pid, - core_t::TTime time) const -{ +const CEventRateModel::TFeatureData* CEventRateModel::featureData(model_t::EFeature feature, std::size_t pid, core_t::TTime time) const { return this->CIndividualModel::featureData(feature, pid, time, m_CurrentBucketStats.s_FeatureData); } -core_t::TTime CEventRateModel::currentBucketStartTime() const -{ +core_t::TTime CEventRateModel::currentBucketStartTime() const { return m_CurrentBucketStats.s_StartTime; } -void CEventRateModel::currentBucketStartTime(core_t::TTime time) -{ +void CEventRateModel::currentBucketStartTime(core_t::TTime time) { m_CurrentBucketStats.s_StartTime = time; } -const CEventRateModel::TSizeUInt64PrVec &CEventRateModel::currentBucketPersonCounts() const -{ +const CEventRateModel::TSizeUInt64PrVec& CEventRateModel::currentBucketPersonCounts() const { return m_CurrentBucketStats.s_PersonCounts; } -CEventRateModel::TSizeUInt64PrVec &CEventRateModel::currentBucketPersonCounts() -{ +CEventRateModel::TSizeUInt64PrVec& CEventRateModel::currentBucketPersonCounts() { return m_CurrentBucketStats.s_PersonCounts; } -void CEventRateModel::currentBucketTotalCount(uint64_t totalCount) -{ +void CEventRateModel::currentBucketTotalCount(uint64_t totalCount) { m_CurrentBucketStats.s_TotalCount = totalCount; } -uint64_t CEventRateModel::currentBucketTotalCount() const -{ +uint64_t CEventRateModel::currentBucketTotalCount() const { return m_CurrentBucketStats.s_TotalCount; } -CIndividualModel::TFeatureSizeSizeTripleDouble1VecUMap & - CEventRateModel::currentBucketInterimCorrections() const -{ +CIndividualModel::TFeatureSizeSizeTripleDouble1VecUMap& CEventRateModel::currentBucketInterimCorrections() const { return m_CurrentBucketStats.s_InterimCorrections; } -void CEventRateModel::createNewModels(std::size_t n, std::size_t m) -{ +void CEventRateModel::createNewModels(std::size_t n, std::size_t m) { this->CIndividualModel::createNewModels(n, m); } -void CEventRateModel::updateRecycledModels() -{ +void CEventRateModel::updateRecycledModels() { this->CIndividualModel::updateRecycledModels(); } -void CEventRateModel::clearPrunedResources(const TSizeVec &people, - const TSizeVec &attributes) -{ - CDataGatherer &gatherer = this->dataGatherer(); +void CEventRateModel::clearPrunedResources(const TSizeVec& people, const TSizeVec& attributes) { + CDataGatherer& gatherer = this->dataGatherer(); // Stop collecting for these people and add them to the free list. gatherer.recyclePeople(people); - if (gatherer.dataAvailable(m_CurrentBucketStats.s_StartTime)) - { - gatherer.featureData(m_CurrentBucketStats.s_StartTime, - gatherer.bucketLength(), - m_CurrentBucketStats.s_FeatureData); + if (gatherer.dataAvailable(m_CurrentBucketStats.s_StartTime)) { + gatherer.featureData(m_CurrentBucketStats.s_StartTime, gatherer.bucketLength(), m_CurrentBucketStats.s_FeatureData); } TDoubleVec categoriesToRemove; categoriesToRemove.reserve(people.size()); - for (std::size_t i = 0u; i < people.size(); ++i) - { + for (std::size_t i = 0u; i < people.size(); ++i) { categoriesToRemove.push_back(static_cast(people[i])); } m_ProbabilityPrior.removeCategories(categoriesToRemove); @@ -635,20 +529,14 @@ void CEventRateModel::clearPrunedResources(const TSizeVec &people, this->CIndividualModel::clearPrunedResources(people, attributes); } -bool CEventRateModel::correlates(model_t::EFeature feature, - std::size_t pid, - core_t::TTime time) const -{ - if (model_t::dimension(feature) > 1 || !this->params().s_MultivariateByFields) - { +bool CEventRateModel::correlates(model_t::EFeature feature, std::size_t pid, core_t::TTime time) const { + if (model_t::dimension(feature) > 1 || !this->params().s_MultivariateByFields) { return false; } - const maths::CModel *model{this->model(feature, pid)}; - for (const auto &correlate : model->correlates()) - { - if (this->featureData(feature, pid == correlate[0] ? correlate[1] : correlate[0], time)) - { + const maths::CModel* model{this->model(feature, pid)}; + for (const auto& correlate : model->correlates()) { + if (this->featureData(feature, pid == correlate[0] ? correlate[1] : correlate[0], time)) { return true; } } @@ -659,10 +547,9 @@ void CEventRateModel::fill(model_t::EFeature feature, std::size_t pid, core_t::TTime bucketTime, bool interim, - CProbabilityAndInfluenceCalculator::SParams ¶ms) const -{ - const TFeatureData *data{this->featureData(feature, pid, bucketTime)}; - const maths::CModel *model{this->model(feature, pid)}; + CProbabilityAndInfluenceCalculator::SParams& params) const { + const TFeatureData* data{this->featureData(feature, pid, bucketTime)}; + const maths::CModel* model{this->model(feature, pid)}; core_t::TTime time{model_t::sampleTime(feature, bucketTime, this->bucketLength())}; TOptionalUInt64 count{this->currentBucketCount(pid, bucketTime)}; double value{model_t::offsetCountToZero(feature, static_cast(data->s_Count))}; @@ -673,34 +560,31 @@ void CEventRateModel::fill(model_t::EFeature feature, params.s_ElapsedTime = bucketTime - this->firstBucketTimes()[pid]; params.s_Time.assign(1, TTime2Vec{time}); params.s_Value.assign(1, TDouble2Vec{value}); - if (interim && model_t::requiresInterimResultAdjustment(feature)) - { + if (interim && model_t::requiresInterimResultAdjustment(feature)) { double mode{params.s_Model->mode(time, PROBABILITY_WEIGHT_STYLES, weight)[0]}; - TDouble2Vec correction{this->interimValueCorrector().corrections( - time, this->currentBucketTotalCount(), mode, value)}; + TDouble2Vec correction{this->interimValueCorrector().corrections(time, this->currentBucketTotalCount(), mode, value)}; params.s_Value[0] += correction; this->currentBucketInterimCorrections().emplace(core::make_triple(feature, pid, pid), correction); } params.s_Count = 1.0; params.s_ComputeProbabilityParams.addCalculation(model_t::probabilityCalculation(feature)) - .weightStyles(PROBABILITY_WEIGHT_STYLES) - .addBucketEmpty(TBool2Vec(1, !count || *count == 0)) - .addWeights(weight); + .weightStyles(PROBABILITY_WEIGHT_STYLES) + .addBucketEmpty(TBool2Vec(1, !count || *count == 0)) + .addWeights(weight); } void CEventRateModel::fill(model_t::EFeature feature, std::size_t pid, core_t::TTime bucketTime, bool interim, - CProbabilityAndInfluenceCalculator::SCorrelateParams ¶ms, - TStrCRefDouble1VecDouble1VecPrPrVecVecVec &influenceValues) const -{ + CProbabilityAndInfluenceCalculator::SCorrelateParams& params, + TStrCRefDouble1VecDouble1VecPrPrVecVecVec& influenceValues) const { using TStrCRefDouble1VecDoublePrPr = std::pair; - const CDataGatherer &gatherer{this->dataGatherer()}; - const maths::CModel *model{this->model(feature, pid)}; - const TSize2Vec1Vec &correlates{model->correlates()}; - const TTimeVec &firstBucketTimes{this->firstBucketTimes()}; + const CDataGatherer& gatherer{this->dataGatherer()}; + const maths::CModel* model{this->model(feature, pid)}; + const TSize2Vec1Vec& correlates{model->correlates()}; + const TTimeVec& firstBucketTimes{this->firstBucketTimes()}; core_t::TTime time{model_t::sampleTime(feature, bucketTime, gatherer.bucketLength())}; params.s_Feature = feature; @@ -712,8 +596,7 @@ void CEventRateModel::fill(model_t::EFeature feature, params.s_Variables.resize(correlates.size()); params.s_CorrelatedLabels.resize(correlates.size()); params.s_Correlated.resize(correlates.size()); - params.s_ComputeProbabilityParams.addCalculation(model_t::probabilityCalculation(feature)) - .weightStyles(PROBABILITY_WEIGHT_STYLES); + params.s_ComputeProbabilityParams.addCalculation(model_t::probabilityCalculation(feature)).weightStyles(PROBABILITY_WEIGHT_STYLES); // These are indexed as follows: // influenceValues["influencer name"]["correlate"]["influence value"] @@ -725,50 +608,43 @@ void CEventRateModel::fill(model_t::EFeature feature, // Declared outside the loop to minimize the number of times it is created. TDouble1VecDouble1VecPr value; - for (std::size_t i = 0u; i < correlates.size(); ++i) - { + for (std::size_t i = 0u; i < correlates.size(); ++i) { TSize2Vec variables = pid == correlates[i][0] ? TSize2Vec{0, 1} : TSize2Vec{1, 0}; params.s_CorrelatedLabels[i] = gatherer.personNamePtr(correlates[i][variables[1]]); params.s_Correlated[i] = correlates[i][variables[1]]; params.s_Variables[i] = variables; - const maths::CModel *models[]{model, this->model(feature, correlates[i][variables[1]])}; + const maths::CModel* models[]{model, this->model(feature, correlates[i][variables[1]])}; TDouble2Vec4Vec weight(1, TDouble2Vec(2)); weight[0][variables[0]] = models[0]->seasonalWeight(maths::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL, time)[0]; weight[0][variables[1]] = models[1]->seasonalWeight(maths::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL, time)[0]; TOptionalUInt64 count[2]; count[0] = this->currentBucketCount(correlates[i][0], bucketTime); count[1] = this->currentBucketCount(correlates[i][1], bucketTime); - params.s_ComputeProbabilityParams.addBucketEmpty(TBool2Vec{!count[0] || *count[0] == 0, - !count[1] || *count[1] == 0}) - .addWeights(weight); + params.s_ComputeProbabilityParams.addBucketEmpty(TBool2Vec{!count[0] || *count[0] == 0, !count[1] || *count[1] == 0}) + .addWeights(weight); - const TFeatureData *data[2]; + const TFeatureData* data[2]; data[0] = this->featureData(feature, correlates[i][0], bucketTime); data[1] = this->featureData(feature, correlates[i][1], bucketTime); - if (data[0] && data[1]) - { + if (data[0] && data[1]) { params.s_ElapsedTime = std::min(params.s_ElapsedTime, bucketTime - firstBucketTimes[correlates[i][0]]); params.s_ElapsedTime = std::min(params.s_ElapsedTime, bucketTime - firstBucketTimes[correlates[i][1]]); params.s_Times[i] = TTime2Vec(2, time); params.s_Values[i] = TDouble2Vec{model_t::offsetCountToZero(feature, static_cast(data[0]->s_Count)), model_t::offsetCountToZero(feature, static_cast(data[1]->s_Count))}; - for (std::size_t j = 0u; j < data[0]->s_InfluenceValues.size(); ++j) - { - for (const auto &influenceValue : data[0]->s_InfluenceValues[j]) - { + for (std::size_t j = 0u; j < data[0]->s_InfluenceValues.size(); ++j) { + for (const auto& influenceValue : data[0]->s_InfluenceValues[j]) { TStrCRef influence = influenceValue.first; - std::size_t match = static_cast( - std::find_if(data[1]->s_InfluenceValues[j].begin(), - data[1]->s_InfluenceValues[j].end(), - [influence](const TStrCRefDouble1VecDoublePrPr &value_) - { - return value_.first.get() == influence.get(); - }) - data[1]->s_InfluenceValues[j].begin()); - if (match < data[1]->s_InfluenceValues[j].size()) - { - const TDouble1VecDoublePr &value0 = influenceValue.second; - const TDouble1VecDoublePr &value1 = data[1]->s_InfluenceValues[j][match].second; - value.first = TDouble1Vec{value0.first[0], value1.first[0]}; + std::size_t match = static_cast(std::find_if(data[1]->s_InfluenceValues[j].begin(), + data[1]->s_InfluenceValues[j].end(), + [influence](const TStrCRefDouble1VecDoublePrPr& value_) { + return value_.first.get() == influence.get(); + }) - + data[1]->s_InfluenceValues[j].begin()); + if (match < data[1]->s_InfluenceValues[j].size()) { + const TDouble1VecDoublePr& value0 = influenceValue.second; + const TDouble1VecDoublePr& value1 = data[1]->s_InfluenceValues[j][match].second; + value.first = TDouble1Vec{value0.first[0], value1.first[0]}; value.second = TDouble1Vec{value0.second, value1.second}; influenceValues[j][i].emplace_back(influence, value); } @@ -776,21 +652,16 @@ void CEventRateModel::fill(model_t::EFeature feature, } } } - if (interim && model_t::requiresInterimResultAdjustment(feature)) - { - TDouble2Vec1Vec modes = params.s_Model->correlateModes(time, PROBABILITY_WEIGHT_STYLES, - params.s_ComputeProbabilityParams.weights()); - for (std::size_t i = 0u; i < modes.size(); ++i) - { - TDouble2Vec &value_ = params.s_Values[i]; - if (!value_.empty()) - { - TDouble2Vec correction(this->interimValueCorrector().corrections( - time, this->currentBucketTotalCount(), modes[i], value_)); + if (interim && model_t::requiresInterimResultAdjustment(feature)) { + TDouble2Vec1Vec modes = + params.s_Model->correlateModes(time, PROBABILITY_WEIGHT_STYLES, params.s_ComputeProbabilityParams.weights()); + for (std::size_t i = 0u; i < modes.size(); ++i) { + TDouble2Vec& value_ = params.s_Values[i]; + if (!value_.empty()) { + TDouble2Vec correction(this->interimValueCorrector().corrections(time, this->currentBucketTotalCount(), modes[i], value_)); value_ += correction; - this->currentBucketInterimCorrections().emplace( - core::make_triple(feature, pid, params.s_Correlated[i]), - TDouble1Vec{correction[params.s_Variables[i][0]]}); + this->currentBucketInterimCorrections().emplace(core::make_triple(feature, pid, params.s_Correlated[i]), + TDouble1Vec{correction[params.s_Variables[i][0]]}); } } } @@ -798,11 +669,7 @@ void CEventRateModel::fill(model_t::EFeature feature, ////////// CEventRateModel::SBucketStats Implementation ////////// -CEventRateModel::SBucketStats::SBucketStats(core_t::TTime startTime) : - s_StartTime(startTime), - s_TotalCount(0), - s_InterimCorrections(1) -{} - +CEventRateModel::SBucketStats::SBucketStats(core_t::TTime startTime) : s_StartTime(startTime), s_TotalCount(0), s_InterimCorrections(1) { +} } } diff --git a/lib/model/CEventRateModelFactory.cc b/lib/model/CEventRateModelFactory.cc index fc100bddc7..e622b2d052 100644 --- a/lib/model/CEventRateModelFactory.cc +++ b/lib/model/CEventRateModelFactory.cc @@ -25,41 +25,35 @@ #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { -CEventRateModelFactory::CEventRateModelFactory(const SModelParams ¶ms, +CEventRateModelFactory::CEventRateModelFactory(const SModelParams& params, model_t::ESummaryMode summaryMode, - const std::string &summaryCountFieldName) : - CModelFactory(params), - m_Identifier(), - m_SummaryMode(summaryMode), - m_SummaryCountFieldName(summaryCountFieldName), - m_UseNull(false), - m_BucketResultsDelay(0) -{} - -CEventRateModelFactory *CEventRateModelFactory::clone() const -{ + const std::string& summaryCountFieldName) + : CModelFactory(params), + m_Identifier(), + m_SummaryMode(summaryMode), + m_SummaryCountFieldName(summaryCountFieldName), + m_UseNull(false), + m_BucketResultsDelay(0) { +} + +CEventRateModelFactory* CEventRateModelFactory::clone() const { return new CEventRateModelFactory(*this); } -CAnomalyDetectorModel *CEventRateModelFactory::makeModel(const SModelInitializationData &initData) const -{ +CAnomalyDetectorModel* CEventRateModelFactory::makeModel(const SModelInitializationData& initData) const { TDataGathererPtr dataGatherer = initData.s_DataGatherer; - if (!dataGatherer) - { + if (!dataGatherer) { LOG_ERROR("NULL data gatherer"); return 0; } - const TFeatureVec &features = dataGatherer->features(); + const TFeatureVec& features = dataGatherer->features(); TFeatureInfluenceCalculatorCPtrPrVecVec influenceCalculators; influenceCalculators.reserve(m_InfluenceFieldNames.size()); - for (const auto &name : m_InfluenceFieldNames) - { + for (const auto& name : m_InfluenceFieldNames) { influenceCalculators.push_back(this->defaultInfluenceCalculators(name, features)); } @@ -72,21 +66,18 @@ CAnomalyDetectorModel *CEventRateModelFactory::makeModel(const SModelInitializat influenceCalculators); } -CAnomalyDetectorModel *CEventRateModelFactory::makeModel(const SModelInitializationData &initData, - core::CStateRestoreTraverser &traverser) const -{ +CAnomalyDetectorModel* CEventRateModelFactory::makeModel(const SModelInitializationData& initData, + core::CStateRestoreTraverser& traverser) const { TDataGathererPtr dataGatherer = initData.s_DataGatherer; - if (!dataGatherer) - { + if (!dataGatherer) { LOG_ERROR("NULL data gatherer"); return 0; } - const TFeatureVec &features = dataGatherer->features(); + const TFeatureVec& features = dataGatherer->features(); TFeatureInfluenceCalculatorCPtrPrVecVec influenceCalculators; influenceCalculators.reserve(m_InfluenceFieldNames.size()); - for (const auto &name : m_InfluenceFieldNames) - { + for (const auto& name : m_InfluenceFieldNames) { influenceCalculators.push_back(this->defaultInfluenceCalculators(name, features)); } @@ -95,11 +86,11 @@ CAnomalyDetectorModel *CEventRateModelFactory::makeModel(const SModelInitializat this->defaultFeatureModels(features, dataGatherer->bucketLength(), 0.4, true), this->defaultCorrelatePriors(features), this->defaultCorrelates(features), - influenceCalculators, traverser); + influenceCalculators, + traverser); } -CDataGatherer *CEventRateModelFactory::makeDataGatherer(const SGathererInitializationData &initData) const -{ +CDataGatherer* CEventRateModelFactory::makeDataGatherer(const SGathererInitializationData& initData) const { return new CDataGatherer(model_t::E_EventRate, m_SummaryMode, this->modelParams(), @@ -117,9 +108,8 @@ CDataGatherer *CEventRateModelFactory::makeDataGatherer(const SGathererInitializ initData.s_SampleOverrideCount); } -CDataGatherer *CEventRateModelFactory::makeDataGatherer(const std::string &partitionFieldValue, - core::CStateRestoreTraverser &traverser) const -{ +CDataGatherer* CEventRateModelFactory::makeDataGatherer(const std::string& partitionFieldValue, + core::CStateRestoreTraverser& traverser) const { return new CDataGatherer(model_t::E_EventRate, m_SummaryMode, this->modelParams(), @@ -135,26 +125,21 @@ CDataGatherer *CEventRateModelFactory::makeDataGatherer(const std::string &parti traverser); } -CEventRateModelFactory::TPriorPtr CEventRateModelFactory::defaultPrior(model_t::EFeature feature, - const SModelParams ¶ms) const -{ +CEventRateModelFactory::TPriorPtr CEventRateModelFactory::defaultPrior(model_t::EFeature feature, const SModelParams& params) const { // Categorical data all use the multinomial prior. The creation // of these priors is managed by defaultCategoricalPrior. - if (model_t::isCategorical(feature)) - { + if (model_t::isCategorical(feature)) { return TPriorPtr(); } // If the feature data only ever takes a single value we use a // special lightweight prior. - if (model_t::isConstant(feature)) - { + if (model_t::isConstant(feature)) { return boost::make_shared(); } // Gaussian mixture for modeling time-of-day and time-of-week. - if (model_t::isDiurnal(feature)) - { + if (model_t::isDiurnal(feature)) { return this->timeOfDayPrior(params); } @@ -169,17 +154,14 @@ CEventRateModelFactory::TPriorPtr CEventRateModelFactory::defaultPrior(model_t:: maths_t::EDataType dataType = this->dataType(); - maths::CGammaRateConjugate gammaPrior = - maths::CGammaRateConjugate::nonInformativePrior(dataType, 0.0, params.s_DecayRate); + maths::CGammaRateConjugate gammaPrior = maths::CGammaRateConjugate::nonInformativePrior(dataType, 0.0, params.s_DecayRate); maths::CLogNormalMeanPrecConjugate logNormalPrior = - maths::CLogNormalMeanPrecConjugate::nonInformativePrior(dataType, 0.0, params.s_DecayRate); + maths::CLogNormalMeanPrecConjugate::nonInformativePrior(dataType, 0.0, params.s_DecayRate); - maths::CNormalMeanPrecConjugate normalPrior = - maths::CNormalMeanPrecConjugate::nonInformativePrior(dataType, params.s_DecayRate); + maths::CNormalMeanPrecConjugate normalPrior = maths::CNormalMeanPrecConjugate::nonInformativePrior(dataType, params.s_DecayRate); - maths::CPoissonMeanConjugate poissonPrior = - maths::CPoissonMeanConjugate::nonInformativePrior(0.0, params.s_DecayRate); + maths::CPoissonMeanConjugate poissonPrior = maths::CPoissonMeanConjugate::nonInformativePrior(0.0, params.s_DecayRate); // Create the component priors. TPriorPtrVec priors; @@ -188,8 +170,7 @@ CEventRateModelFactory::TPriorPtr CEventRateModelFactory::defaultPrior(model_t:: priors.emplace_back(logNormalPrior.clone()); priors.emplace_back(normalPrior.clone()); priors.emplace_back(poissonPrior.clone()); - if (params.s_MinimumModeFraction <= 0.5) - { + if (params.s_MinimumModeFraction <= 0.5) { // Create the multimode prior. TPriorPtrVec modePriors; modePriors.reserve(3u); @@ -211,43 +192,35 @@ CEventRateModelFactory::TPriorPtr CEventRateModelFactory::defaultPrior(model_t:: return boost::make_shared(priors, dataType, params.s_DecayRate); } -CEventRateModelFactory::TMultivariatePriorPtr -CEventRateModelFactory::defaultMultivariatePrior(model_t::EFeature feature, - const SModelParams ¶ms) const -{ +CEventRateModelFactory::TMultivariatePriorPtr CEventRateModelFactory::defaultMultivariatePrior(model_t::EFeature feature, + const SModelParams& params) const { std::size_t dimension = model_t::dimension(feature); TMultivariatePriorPtrVec priors; priors.reserve(params.s_MinimumModeFraction <= 0.5 ? 2u : 1u); TMultivariatePriorPtr multivariateNormal(this->multivariateNormalPrior(dimension, params)); priors.push_back(multivariateNormal); - if (params.s_MinimumModeFraction <= 0.5) - { + if (params.s_MinimumModeFraction <= 0.5) { priors.push_back(this->multivariateMultimodalPrior(dimension, params, *multivariateNormal)); } return this->multivariateOneOfNPrior(dimension, params, priors); } -CEventRateModelFactory::TMultivariatePriorPtr -CEventRateModelFactory::defaultCorrelatePrior(model_t::EFeature /*feature*/, - const SModelParams ¶ms) const -{ +CEventRateModelFactory::TMultivariatePriorPtr CEventRateModelFactory::defaultCorrelatePrior(model_t::EFeature /*feature*/, + const SModelParams& params) const { TMultivariatePriorPtrVec priors; priors.reserve(params.s_MinimumModeFraction <= 0.5 ? 2u : 1u); TMultivariatePriorPtr multivariateNormal = this->multivariateNormalPrior(2, params); priors.push_back(multivariateNormal); - if (params.s_MinimumModeFraction <= 0.5) - { + if (params.s_MinimumModeFraction <= 0.5) { priors.push_back(this->multivariateMultimodalPrior(2, params, *multivariateNormal)); } return this->multivariateOneOfNPrior(2, params, priors); } -const CSearchKey &CEventRateModelFactory::searchKey() const -{ - if (!m_SearchKeyCache) - { +const CSearchKey& CEventRateModelFactory::searchKey() const { + if (!m_SearchKeyCache) { m_SearchKeyCache.reset(CSearchKey(m_Identifier, function_t::function(m_Features), m_UseNull, @@ -261,33 +234,28 @@ const CSearchKey &CEventRateModelFactory::searchKey() const return *m_SearchKeyCache; } -bool CEventRateModelFactory::isSimpleCount() const -{ +bool CEventRateModelFactory::isSimpleCount() const { return CSearchKey::isSimpleCount(function_t::function(m_Features), m_PersonFieldName); } -model_t::ESummaryMode CEventRateModelFactory::summaryMode() const -{ +model_t::ESummaryMode CEventRateModelFactory::summaryMode() const { return m_SummaryMode; } -maths_t::EDataType CEventRateModelFactory::dataType() const -{ +maths_t::EDataType CEventRateModelFactory::dataType() const { return maths_t::E_IntegerData; } -void CEventRateModelFactory::identifier(int identifier) -{ +void CEventRateModelFactory::identifier(int identifier) { m_Identifier = identifier; m_SearchKeyCache.reset(); } -void CEventRateModelFactory::fieldNames(const std::string &partitionFieldName, - const std::string &/*overFieldName*/, - const std::string &byFieldName, - const std::string &valueFieldName, - const TStrVec &influenceFieldNames) -{ +void CEventRateModelFactory::fieldNames(const std::string& partitionFieldName, + const std::string& /*overFieldName*/, + const std::string& byFieldName, + const std::string& valueFieldName, + const TStrVec& influenceFieldNames) { m_PartitionFieldName = partitionFieldName; m_PersonFieldName = byFieldName; m_ValueFieldName = valueFieldName; @@ -295,37 +263,30 @@ void CEventRateModelFactory::fieldNames(const std::string &partitionFieldName, m_SearchKeyCache.reset(); } -void CEventRateModelFactory::useNull(bool useNull) -{ +void CEventRateModelFactory::useNull(bool useNull) { m_UseNull = useNull; m_SearchKeyCache.reset(); } -void CEventRateModelFactory::features(const TFeatureVec &features) -{ +void CEventRateModelFactory::features(const TFeatureVec& features) { m_Features = features; m_SearchKeyCache.reset(); } -void CEventRateModelFactory::bucketResultsDelay(std::size_t bucketResultsDelay) -{ +void CEventRateModelFactory::bucketResultsDelay(std::size_t bucketResultsDelay) { m_BucketResultsDelay = bucketResultsDelay; } -CEventRateModelFactory::TStrCRefVec CEventRateModelFactory::partitioningFields() const -{ +CEventRateModelFactory::TStrCRefVec CEventRateModelFactory::partitioningFields() const { TStrCRefVec result; result.reserve(2); - if (!m_PartitionFieldName.empty()) - { + if (!m_PartitionFieldName.empty()) { result.emplace_back(m_PartitionFieldName); } - if (!m_PersonFieldName.empty()) - { + if (!m_PersonFieldName.empty()) { result.emplace_back(m_PersonFieldName); } return result; } - } } diff --git a/lib/model/CEventRatePopulationModel.cc b/lib/model/CEventRatePopulationModel.cc index 459c903394..f1b6156bd4 100644 --- a/lib/model/CEventRatePopulationModel.cc +++ b/lib/model/CEventRatePopulationModel.cc @@ -35,13 +35,10 @@ #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { -namespace -{ +namespace { using TDouble2Vec = core::CSmallVector; using TDouble2Vec1Vec = core::CSmallVector; @@ -55,8 +52,7 @@ using TFeatureSizeSizePrFeatureDataPrVecPrVec = std::vector; //! \brief The values and weights for an attribute. -struct SValuesAndWeights -{ +struct SValuesAndWeights { maths::CModel::TTimeDouble2VecSizeTrVec s_Values; maths::CModelAddSamplesParams::TDouble2Vec4VecVec s_Weights; }; @@ -71,173 +67,135 @@ const std::string FEATURE_CORRELATE_MODELS_TAG("e"); const std::string MEMORY_ESTIMATOR_TAG("f"); const std::string EMPTY_STRING(""); -const maths_t::TWeightStyleVec SAMPLE_WEIGHT_STYLES{maths_t::E_SampleCountWeight, - maths_t::E_SampleWinsorisationWeight}; +const maths_t::TWeightStyleVec SAMPLE_WEIGHT_STYLES{maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight}; const maths_t::TWeightStyleVec PROBABILITY_WEIGHT_STYLES(1, maths_t::E_SampleSeasonalVarianceScaleWeight); - } -CEventRatePopulationModel::CEventRatePopulationModel(const SModelParams ¶ms, - const TDataGathererPtr &dataGatherer, - const TFeatureMathsModelPtrPrVec &newFeatureModels, - const TFeatureMultivariatePriorPtrPrVec &newFeatureCorrelateModelPriors, - const TFeatureCorrelationsPtrPrVec &featureCorrelatesModels, - const TFeatureInfluenceCalculatorCPtrPrVecVec &influenceCalculators) : - CPopulationModel(params, dataGatherer, influenceCalculators), - m_CurrentBucketStats( dataGatherer->currentBucketStartTime() - - dataGatherer->bucketLength()), - m_NewAttributeProbabilityPrior( - maths::CMultinomialConjugate::nonInformativePrior( - boost::numeric::bounds::highest(), params.s_DecayRate)), - m_AttributeProbabilityPrior( - maths::CMultinomialConjugate::nonInformativePrior( - boost::numeric::bounds::highest(), params.s_DecayRate)), - m_Probabilities(0.05) -{ +CEventRatePopulationModel::CEventRatePopulationModel(const SModelParams& params, + const TDataGathererPtr& dataGatherer, + const TFeatureMathsModelPtrPrVec& newFeatureModels, + const TFeatureMultivariatePriorPtrPrVec& newFeatureCorrelateModelPriors, + const TFeatureCorrelationsPtrPrVec& featureCorrelatesModels, + const TFeatureInfluenceCalculatorCPtrPrVecVec& influenceCalculators) + : CPopulationModel(params, dataGatherer, influenceCalculators), + m_CurrentBucketStats(dataGatherer->currentBucketStartTime() - dataGatherer->bucketLength()), + m_NewAttributeProbabilityPrior( + maths::CMultinomialConjugate::nonInformativePrior(boost::numeric::bounds::highest(), params.s_DecayRate)), + m_AttributeProbabilityPrior( + maths::CMultinomialConjugate::nonInformativePrior(boost::numeric::bounds::highest(), params.s_DecayRate)), + m_Probabilities(0.05) { this->initialize(newFeatureModels, newFeatureCorrelateModelPriors, featureCorrelatesModels); } -CEventRatePopulationModel::CEventRatePopulationModel(const SModelParams ¶ms, - const TDataGathererPtr &dataGatherer, - const TFeatureMathsModelPtrPrVec &newFeatureModels, - const TFeatureMultivariatePriorPtrPrVec &newFeatureCorrelateModelPriors, - const TFeatureCorrelationsPtrPrVec &featureCorrelatesModels, - const TFeatureInfluenceCalculatorCPtrPrVecVec &influenceCalculators, - core::CStateRestoreTraverser &traverser) : - CPopulationModel(params, dataGatherer, influenceCalculators), - m_CurrentBucketStats( dataGatherer->currentBucketStartTime() - - dataGatherer->bucketLength()), - m_Probabilities(0.05) -{ +CEventRatePopulationModel::CEventRatePopulationModel(const SModelParams& params, + const TDataGathererPtr& dataGatherer, + const TFeatureMathsModelPtrPrVec& newFeatureModels, + const TFeatureMultivariatePriorPtrPrVec& newFeatureCorrelateModelPriors, + const TFeatureCorrelationsPtrPrVec& featureCorrelatesModels, + const TFeatureInfluenceCalculatorCPtrPrVecVec& influenceCalculators, + core::CStateRestoreTraverser& traverser) + : CPopulationModel(params, dataGatherer, influenceCalculators), + m_CurrentBucketStats(dataGatherer->currentBucketStartTime() - dataGatherer->bucketLength()), + m_Probabilities(0.05) { this->initialize(newFeatureModels, newFeatureCorrelateModelPriors, featureCorrelatesModels); - traverser.traverseSubLevel(boost::bind(&CEventRatePopulationModel::acceptRestoreTraverser, - this, _1)); + traverser.traverseSubLevel(boost::bind(&CEventRatePopulationModel::acceptRestoreTraverser, this, _1)); } -void CEventRatePopulationModel::initialize(const TFeatureMathsModelPtrPrVec &newFeatureModels, - const TFeatureMultivariatePriorPtrPrVec &newFeatureCorrelateModelPriors, - const TFeatureCorrelationsPtrPrVec &featureCorrelatesModels) -{ +void CEventRatePopulationModel::initialize(const TFeatureMathsModelPtrPrVec& newFeatureModels, + const TFeatureMultivariatePriorPtrPrVec& newFeatureCorrelateModelPriors, + const TFeatureCorrelationsPtrPrVec& featureCorrelatesModels) { m_FeatureModels.reserve(newFeatureModels.size()); - for (const auto &model : newFeatureModels) - { + for (const auto& model : newFeatureModels) { m_FeatureModels.emplace_back(model.first, model.second); } - std::sort(m_FeatureModels.begin(), m_FeatureModels.end(), - [](const SFeatureModels &lhs, - const SFeatureModels &rhs) - { return lhs.s_Feature < rhs.s_Feature; } ); + std::sort(m_FeatureModels.begin(), m_FeatureModels.end(), [](const SFeatureModels& lhs, const SFeatureModels& rhs) { + return lhs.s_Feature < rhs.s_Feature; + }); - if (this->params().s_MultivariateByFields) - { + if (this->params().s_MultivariateByFields) { m_FeatureCorrelatesModels.reserve(featureCorrelatesModels.size()); - for (std::size_t i = 0u; i < featureCorrelatesModels.size(); ++i) - { - m_FeatureCorrelatesModels.emplace_back(featureCorrelatesModels[i].first, - newFeatureCorrelateModelPriors[i].second, - featureCorrelatesModels[i].second); + for (std::size_t i = 0u; i < featureCorrelatesModels.size(); ++i) { + m_FeatureCorrelatesModels.emplace_back( + featureCorrelatesModels[i].first, newFeatureCorrelateModelPriors[i].second, featureCorrelatesModels[i].second); } - std::sort(m_FeatureCorrelatesModels.begin(), m_FeatureCorrelatesModels.end(), - [](const SFeatureCorrelateModels &lhs, - const SFeatureCorrelateModels &rhs) - { return lhs.s_Feature < rhs.s_Feature; }); + std::sort(m_FeatureCorrelatesModels.begin(), + m_FeatureCorrelatesModels.end(), + [](const SFeatureCorrelateModels& lhs, const SFeatureCorrelateModels& rhs) { return lhs.s_Feature < rhs.s_Feature; }); } } -CEventRatePopulationModel::CEventRatePopulationModel(bool isForPersistence, - const CEventRatePopulationModel &other) : - CPopulationModel(isForPersistence, other), - m_CurrentBucketStats(0), // Not needed for persistence so minimally constructed - m_NewAttributeProbabilityPrior(other.m_NewAttributeProbabilityPrior), - m_AttributeProbabilityPrior(other.m_AttributeProbabilityPrior), - m_Probabilities(0.05), // Not needed for persistence so minimally construct - m_MemoryEstimator(other.m_MemoryEstimator) -{ - if (!isForPersistence) - { +CEventRatePopulationModel::CEventRatePopulationModel(bool isForPersistence, const CEventRatePopulationModel& other) + : CPopulationModel(isForPersistence, other), + m_CurrentBucketStats(0), // Not needed for persistence so minimally constructed + m_NewAttributeProbabilityPrior(other.m_NewAttributeProbabilityPrior), + m_AttributeProbabilityPrior(other.m_AttributeProbabilityPrior), + m_Probabilities(0.05), // Not needed for persistence so minimally construct + m_MemoryEstimator(other.m_MemoryEstimator) { + if (!isForPersistence) { LOG_ABORT("This constructor only creates clones for persistence"); } m_FeatureModels.reserve(m_FeatureModels.size()); - for (const auto &feature : other.m_FeatureModels) - { + for (const auto& feature : other.m_FeatureModels) { m_FeatureModels.emplace_back(feature.s_Feature, feature.s_NewModel); m_FeatureModels.back().s_Models.reserve(feature.s_Models.size()); - for (const auto &model : feature.s_Models) - { + for (const auto& model : feature.s_Models) { m_FeatureModels.back().s_Models.emplace_back(model->cloneForPersistence()); } } m_FeatureCorrelatesModels.reserve(other.m_FeatureCorrelatesModels.size()); - for (const auto &feature : other.m_FeatureCorrelatesModels) - { - m_FeatureCorrelatesModels.emplace_back(feature.s_Feature, feature.s_ModelPrior, - TCorrelationsPtr(feature.s_Models->cloneForPersistence())); + for (const auto& feature : other.m_FeatureCorrelatesModels) { + m_FeatureCorrelatesModels.emplace_back( + feature.s_Feature, feature.s_ModelPrior, TCorrelationsPtr(feature.s_Models->cloneForPersistence())); } } -void CEventRatePopulationModel::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ - inserter.insertLevel(POPULATION_STATE_TAG, - boost::bind(&CEventRatePopulationModel::doAcceptPersistInserter, this, _1)); +void CEventRatePopulationModel::acceptPersistInserter(core::CStatePersistInserter& inserter) const { + inserter.insertLevel(POPULATION_STATE_TAG, boost::bind(&CEventRatePopulationModel::doAcceptPersistInserter, this, _1)); inserter.insertLevel(NEW_ATTRIBUTE_PROBABILITY_PRIOR_TAG, - boost::bind(&maths::CMultinomialConjugate::acceptPersistInserter, - &m_NewAttributeProbabilityPrior, _1)); + boost::bind(&maths::CMultinomialConjugate::acceptPersistInserter, &m_NewAttributeProbabilityPrior, _1)); inserter.insertLevel(ATTRIBUTE_PROBABILITY_PRIOR_TAG, - boost::bind(&maths::CMultinomialConjugate::acceptPersistInserter, - &m_AttributeProbabilityPrior, _1)); - for (const auto &feature : m_FeatureModels) - { - inserter.insertLevel(FEATURE_MODELS_TAG, - boost::bind(&SFeatureModels::acceptPersistInserter, &feature, _1)); + boost::bind(&maths::CMultinomialConjugate::acceptPersistInserter, &m_AttributeProbabilityPrior, _1)); + for (const auto& feature : m_FeatureModels) { + inserter.insertLevel(FEATURE_MODELS_TAG, boost::bind(&SFeatureModels::acceptPersistInserter, &feature, _1)); } - for (const auto &feature : m_FeatureCorrelatesModels) - { - inserter.insertLevel(FEATURE_CORRELATE_MODELS_TAG, - boost::bind(&SFeatureCorrelateModels::acceptPersistInserter, &feature, _1)); + for (const auto& feature : m_FeatureCorrelatesModels) { + inserter.insertLevel(FEATURE_CORRELATE_MODELS_TAG, boost::bind(&SFeatureCorrelateModels::acceptPersistInserter, &feature, _1)); } core::CPersistUtils::persist(MEMORY_ESTIMATOR_TAG, m_MemoryEstimator, inserter); } -bool CEventRatePopulationModel::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ +bool CEventRatePopulationModel::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { std::size_t i = 0u, j = 0u; - do - { - const std::string &name = traverser.name(); + do { + const std::string& name = traverser.name(); RESTORE(POPULATION_STATE_TAG, - traverser.traverseSubLevel(boost::bind(&CEventRatePopulationModel::doAcceptRestoreTraverser, - this, _1))) - RESTORE_NO_ERROR(NEW_ATTRIBUTE_PROBABILITY_PRIOR_TAG, - maths::CMultinomialConjugate restored( - this->params().distributionRestoreParams(maths_t::E_DiscreteData), traverser); - m_NewAttributeProbabilityPrior.swap(restored)) - RESTORE_NO_ERROR(ATTRIBUTE_PROBABILITY_PRIOR_TAG, - maths::CMultinomialConjugate restored( - this->params().distributionRestoreParams(maths_t::E_DiscreteData), traverser); - m_AttributeProbabilityPrior.swap(restored)) + traverser.traverseSubLevel(boost::bind(&CEventRatePopulationModel::doAcceptRestoreTraverser, this, _1))) + RESTORE_NO_ERROR( + NEW_ATTRIBUTE_PROBABILITY_PRIOR_TAG, + maths::CMultinomialConjugate restored(this->params().distributionRestoreParams(maths_t::E_DiscreteData), traverser); + m_NewAttributeProbabilityPrior.swap(restored)) + RESTORE_NO_ERROR( + ATTRIBUTE_PROBABILITY_PRIOR_TAG, + maths::CMultinomialConjugate restored(this->params().distributionRestoreParams(maths_t::E_DiscreteData), traverser); + m_AttributeProbabilityPrior.swap(restored)) RESTORE(FEATURE_MODELS_TAG, - i == m_FeatureModels.size() - || traverser.traverseSubLevel(boost::bind(&SFeatureModels::acceptRestoreTraverser, - &m_FeatureModels[i++], boost::cref(this->params()), _1))) - RESTORE(FEATURE_CORRELATE_MODELS_TAG, - j == m_FeatureCorrelatesModels.size() - || traverser.traverseSubLevel(boost::bind(&SFeatureCorrelateModels::acceptRestoreTraverser, - &m_FeatureCorrelatesModels[j++], boost::cref(this->params()), _1))) - RESTORE(MEMORY_ESTIMATOR_TAG, - core::CPersistUtils::restore(MEMORY_ESTIMATOR_TAG, m_MemoryEstimator, traverser)) - } - while (traverser.next()); - - for (auto &feature : m_FeatureModels) - { - for (auto &model : feature.s_Models) - { - for (const auto &correlates : m_FeatureCorrelatesModels) - { - if (feature.s_Feature == correlates.s_Feature) - { + i == m_FeatureModels.size() || + traverser.traverseSubLevel( + boost::bind(&SFeatureModels::acceptRestoreTraverser, &m_FeatureModels[i++], boost::cref(this->params()), _1))) + RESTORE( + FEATURE_CORRELATE_MODELS_TAG, + j == m_FeatureCorrelatesModels.size() || + traverser.traverseSubLevel(boost::bind( + &SFeatureCorrelateModels::acceptRestoreTraverser, &m_FeatureCorrelatesModels[j++], boost::cref(this->params()), _1))) + RESTORE(MEMORY_ESTIMATOR_TAG, core::CPersistUtils::restore(MEMORY_ESTIMATOR_TAG, m_MemoryEstimator, traverser)) + } while (traverser.next()); + + for (auto& feature : m_FeatureModels) { + for (auto& model : feature.s_Models) { + for (const auto& correlates : m_FeatureCorrelatesModels) { + if (feature.s_Feature == correlates.s_Feature) { model->modelCorrelations(*correlates.s_Models); } } @@ -247,111 +205,88 @@ bool CEventRatePopulationModel::acceptRestoreTraverser(core::CStateRestoreTraver return true; } -CAnomalyDetectorModel *CEventRatePopulationModel::cloneForPersistence() const -{ +CAnomalyDetectorModel* CEventRatePopulationModel::cloneForPersistence() const { return new CEventRatePopulationModel(true, *this); } -model_t::EModelType CEventRatePopulationModel::category() const -{ +model_t::EModelType CEventRatePopulationModel::category() const { return model_t::E_EventRateOnline; } -bool CEventRatePopulationModel::isEventRate() const -{ +bool CEventRatePopulationModel::isEventRate() const { return true; } -bool CEventRatePopulationModel::isMetric() const -{ +bool CEventRatePopulationModel::isMetric() const { return false; } CEventRatePopulationModel::TDouble1Vec - CEventRatePopulationModel::currentBucketValue(model_t::EFeature feature, - std::size_t pid, - std::size_t cid, - core_t::TTime time) const -{ - const TSizeSizePrFeatureDataPrVec &featureData = this->featureData(feature, time); +CEventRatePopulationModel::currentBucketValue(model_t::EFeature feature, std::size_t pid, std::size_t cid, core_t::TTime time) const { + const TSizeSizePrFeatureDataPrVec& featureData = this->featureData(feature, time); auto i = find(featureData, pid, cid); return i != featureData.end() ? extractValue(feature, *i) : TDouble1Vec(1, 0.0); } -CEventRatePopulationModel::TDouble1Vec - CEventRatePopulationModel::baselineBucketMean(model_t::EFeature feature, - std::size_t pid, - std::size_t cid, - model_t::CResultType type, - const TSizeDoublePr1Vec &correlated, - core_t::TTime time) const -{ - const maths::CModel *model{this->model(feature, cid)}; - if (!model) - { +CEventRatePopulationModel::TDouble1Vec CEventRatePopulationModel::baselineBucketMean(model_t::EFeature feature, + std::size_t pid, + std::size_t cid, + model_t::CResultType type, + const TSizeDoublePr1Vec& correlated, + core_t::TTime time) const { + const maths::CModel* model{this->model(feature, cid)}; + if (!model) { return TDouble1Vec(); } static const TSizeDoublePr1Vec NO_CORRELATED; TDouble2Vec hint; - if (model_t::isDiurnal(feature)) - { + if (model_t::isDiurnal(feature)) { hint = this->currentBucketValue(feature, pid, cid, time); } TDouble1Vec result(model->predict(time, type.isUnconditional() ? NO_CORRELATED : correlated, hint)); double probability = 1.0; - if (model_t::isConstant(feature) && !m_AttributeProbabilities.lookup(pid, probability)) - { + if (model_t::isConstant(feature) && !m_AttributeProbabilities.lookup(pid, probability)) { probability = 1.0; } - for (auto &coord : result) - { + for (auto& coord : result) { coord = probability * model_t::inverseOffsetCountToZero(feature, coord); } - this->correctBaselineForInterim(feature, pid, cid, type, correlated, - this->currentBucketInterimCorrections(), result); + this->correctBaselineForInterim(feature, pid, cid, type, correlated, this->currentBucketInterimCorrections(), result); TDouble1VecDouble1VecPr support{model_t::support(feature)}; return maths::CTools::truncate(result, support.first, support.second); } -bool CEventRatePopulationModel::bucketStatsAvailable(core_t::TTime time) const -{ - return time >= m_CurrentBucketStats.s_StartTime - && time < m_CurrentBucketStats.s_StartTime + this->bucketLength(); +bool CEventRatePopulationModel::bucketStatsAvailable(core_t::TTime time) const { + return time >= m_CurrentBucketStats.s_StartTime && time < m_CurrentBucketStats.s_StartTime + this->bucketLength(); } -void CEventRatePopulationModel::sampleBucketStatistics(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor &resourceMonitor) -{ - CDataGatherer &gatherer = this->dataGatherer(); +void CEventRatePopulationModel::sampleBucketStatistics(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) { + CDataGatherer& gatherer = this->dataGatherer(); core_t::TTime bucketLength = gatherer.bucketLength(); - if (!gatherer.dataAvailable(startTime)) - { + if (!gatherer.dataAvailable(startTime)) { return; } this->createUpdateNewModels(startTime, resourceMonitor); this->currentBucketInterimCorrections().clear(); - for (core_t::TTime time = startTime; time < endTime; time += bucketLength) - { + for (core_t::TTime time = startTime; time < endTime; time += bucketLength) { this->CAnomalyDetectorModel::sampleBucketStatistics(time, time + bucketLength, resourceMonitor); // Currently, we only remember one bucket. m_CurrentBucketStats.s_StartTime = time; - TSizeUInt64PrVec &personCounts = m_CurrentBucketStats.s_PersonCounts; + TSizeUInt64PrVec& personCounts = m_CurrentBucketStats.s_PersonCounts; gatherer.personNonZeroCounts(time, personCounts); this->applyFilter(model_t::E_XF_Over, false, this->personFilter(), personCounts); TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; gatherer.featureData(time, bucketLength, featureData); - for (auto &featureData_ : featureData) - { + for (auto& featureData_ : featureData) { model_t::EFeature feature = featureData_.first; - TSizeSizePrFeatureDataPrVec &data = m_CurrentBucketStats.s_FeatureData[feature]; + TSizeSizePrFeatureDataPrVec& data = m_CurrentBucketStats.s_FeatureData[feature]; data.swap(featureData_.second); LOG_TRACE(model_t::print(feature) << ": " << core::CContainerPrinter::print(data)); this->applyFilters(false, this->personFilter(), this->attributeFilter(), data); @@ -359,22 +294,17 @@ void CEventRatePopulationModel::sampleBucketStatistics(core_t::TTime startTime, } } -void CEventRatePopulationModel::sample(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor &resourceMonitor) -{ - CDataGatherer &gatherer = this->dataGatherer(); +void CEventRatePopulationModel::sample(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) { + CDataGatherer& gatherer = this->dataGatherer(); core_t::TTime bucketLength = gatherer.bucketLength(); - if (!gatherer.validateSampleTimes(startTime, endTime)) - { + if (!gatherer.validateSampleTimes(startTime, endTime)) { return; } this->createUpdateNewModels(startTime, resourceMonitor); this->currentBucketInterimCorrections().clear(); - for (core_t::TTime time = startTime; time < endTime; time += bucketLength) - { + for (core_t::TTime time = startTime; time < endTime; time += bucketLength) { LOG_TRACE("Sampling [" << time << "," << time + bucketLength << ")"); gatherer.sampleNow(time); @@ -382,13 +312,11 @@ void CEventRatePopulationModel::sample(core_t::TTime startTime, gatherer.featureData(time, bucketLength, featureData); this->CPopulationModel::sample(time, time + bucketLength, resourceMonitor); - const TTimeVec &preSampleAttributeLastBucketTimes = this->attributeLastBucketTimes(); + const TTimeVec& preSampleAttributeLastBucketTimes = this->attributeLastBucketTimes(); TSizeTimeUMap attributeLastBucketTimesMap; - for (const auto &featureData_ : featureData) - { - TSizeSizePrFeatureDataPrVec &data = m_CurrentBucketStats.s_FeatureData[featureData_.first]; - for (const auto &data_ : data) - { + for (const auto& featureData_ : featureData) { + TSizeSizePrFeatureDataPrVec& data = m_CurrentBucketStats.s_FeatureData[featureData_.first]; + for (const auto& data_ : data) { std::size_t cid = CDataGatherer::extractAttributeId(data_); attributeLastBucketTimesMap[cid] = preSampleAttributeLastBucketTimes[cid]; } @@ -396,37 +324,30 @@ void CEventRatePopulationModel::sample(core_t::TTime startTime, // Currently, we only remember one bucket. m_CurrentBucketStats.s_StartTime = time; - TSizeUInt64PrVec &personCounts = m_CurrentBucketStats.s_PersonCounts; + TSizeUInt64PrVec& personCounts = m_CurrentBucketStats.s_PersonCounts; gatherer.personNonZeroCounts(time, personCounts); this->applyFilter(model_t::E_XF_Over, true, this->personFilter(), personCounts); - - for (auto &featureData_ : featureData) - { + for (auto& featureData_ : featureData) { model_t::EFeature feature = featureData_.first; - TSizeSizePrFeatureDataPrVec &data = m_CurrentBucketStats.s_FeatureData[feature]; + TSizeSizePrFeatureDataPrVec& data = m_CurrentBucketStats.s_FeatureData[feature]; data.swap(featureData_.second); LOG_TRACE(model_t::print(feature) << ": " << core::CContainerPrinter::print(data)); - if (feature == model_t::E_PopulationUniquePersonCountByAttribute) - { + if (feature == model_t::E_PopulationUniquePersonCountByAttribute) { TDoubleVec categories; TDoubleVec concentrations; categories.reserve(data.size()); concentrations.reserve(data.size()); - for (const auto &tuple : data) - { + for (const auto& tuple : data) { categories.push_back(static_cast(CDataGatherer::extractAttributeId(tuple))); concentrations.push_back(static_cast(CDataGatherer::extractData(tuple).s_Count)); } - maths::CMultinomialConjugate prior(boost::numeric::bounds::highest(), - categories, - concentrations); + maths::CMultinomialConjugate prior(boost::numeric::bounds::highest(), categories, concentrations); m_AttributeProbabilityPrior.swap(prior); continue; } - if (model_t::isCategorical(feature)) - { + if (model_t::isCategorical(feature)) { continue; } @@ -438,40 +359,32 @@ void CEventRatePopulationModel::sample(core_t::TTime startTime, // Set up fuzzy de-duplication. TSizeFuzzyDeduplicateUMap fuzzy; - if (data.size() >= this->params().s_MinimumToDeduplicate) - { - for (const auto &data_ : data) - { + if (data.size() >= this->params().s_MinimumToDeduplicate) { + for (const auto& data_ : data) { std::size_t cid = CDataGatherer::extractAttributeId(data_); uint64_t count = CDataGatherer::extractData(data_).s_Count; fuzzy[cid].add({static_cast(count)}); } - for (auto &fuzzy_ : fuzzy) - { - fuzzy_.second.computeEpsilons(bucketLength, - this->params().s_MinimumToDeduplicate); + for (auto& fuzzy_ : fuzzy) { + fuzzy_.second.computeEpsilons(bucketLength, this->params().s_MinimumToDeduplicate); } } - for (const auto &data_ : data) - { + for (const auto& data_ : data) { std::size_t pid = CDataGatherer::extractPersonId(data_); std::size_t cid = CDataGatherer::extractAttributeId(data_); uint64_t count = CDataGatherer::extractData(data_).s_Count; double value = model_t::offsetCountToZero(feature, static_cast(count)); - maths::CModel *model{this->model(feature, cid)}; - if (!model) - { + maths::CModel* model{this->model(feature, cid)}; + if (!model) { LOG_ERROR("Missing model for " << this->attributeName(cid)); continue; } - if (this->shouldIgnoreSample(feature, pid, cid, sampleTime)) - { + if (this->shouldIgnoreSample(feature, pid, cid, sampleTime)) { core_t::TTime skipTime = sampleTime - attributeLastBucketTimesMap[cid]; - if (skipTime > 0) - { + if (skipTime > 0) { model->skipTime(skipTime); // Update the last time so we don't advance the same model // multiple times (once per person) @@ -481,50 +394,39 @@ void CEventRatePopulationModel::sample(core_t::TTime startTime, continue; } - LOG_TRACE("Adding " << value - << " for person = " << gatherer.personName(pid) - << " and attribute = " << gatherer.attributeName(cid)); + LOG_TRACE("Adding " << value << " for person = " << gatherer.personName(pid) + << " and attribute = " << gatherer.attributeName(cid)); - SValuesAndWeights &attribute = attributes[cid]; - std::size_t duplicate = data.size() >= this->params().s_MinimumToDeduplicate ? - fuzzy[cid].duplicate(sampleTime, {value}) : - attribute.s_Values.size(); + SValuesAndWeights& attribute = attributes[cid]; + std::size_t duplicate = data.size() >= this->params().s_MinimumToDeduplicate ? fuzzy[cid].duplicate(sampleTime, {value}) + : attribute.s_Values.size(); - if (duplicate < attribute.s_Values.size()) - { - attribute.s_Weights[duplicate][0][0] += this->sampleRateWeight(pid, cid) - * this->learnRate(feature); - } - else - { + if (duplicate < attribute.s_Values.size()) { + attribute.s_Weights[duplicate][0][0] += this->sampleRateWeight(pid, cid) * this->learnRate(feature); + } else { attribute.s_Values.emplace_back(sampleTime, TDouble2Vec{value}, pid); - attribute.s_Weights.emplace_back( - TDouble2Vec4Vec{{ this->sampleRateWeight(pid, cid) - * this->learnRate(feature)}, - model->winsorisationWeight(1.0, sampleTime, {value})}); + attribute.s_Weights.emplace_back(TDouble2Vec4Vec{{this->sampleRateWeight(pid, cid) * this->learnRate(feature)}, + model->winsorisationWeight(1.0, sampleTime, {value})}); } } - for (auto &attribute : attributes) - { + for (auto& attribute : attributes) { std::size_t cid = attribute.first; maths::CModelAddSamplesParams params; params.integer(true) - .nonNegative(true) - .propagationInterval(this->propagationTime(cid, sampleTime)) - .weightStyles(SAMPLE_WEIGHT_STYLES) - .trendWeights(attribute.second.s_Weights) - .priorWeights(attribute.second.s_Weights); - maths::CModel *model{this->model(feature, cid)}; - if (model->addSamples(params, attribute.second.s_Values) == maths::CModel::E_Reset) - { + .nonNegative(true) + .propagationInterval(this->propagationTime(cid, sampleTime)) + .weightStyles(SAMPLE_WEIGHT_STYLES) + .trendWeights(attribute.second.s_Weights) + .priorWeights(attribute.second.s_Weights); + maths::CModel* model{this->model(feature, cid)}; + if (model->addSamples(params, attribute.second.s_Values) == maths::CModel::E_Reset) { gatherer.resetSampleCount(cid); } } } - for (const auto &feature : m_FeatureCorrelatesModels) - { + for (const auto& feature : m_FeatureCorrelatesModels) { feature.s_Models->processSamples(SAMPLE_WEIGHT_STYLES); } @@ -533,19 +435,14 @@ void CEventRatePopulationModel::sample(core_t::TTime startTime, } } -void CEventRatePopulationModel::prune(std::size_t maximumAge) -{ - CDataGatherer &gatherer = this->dataGatherer(); +void CEventRatePopulationModel::prune(std::size_t maximumAge) { + CDataGatherer& gatherer = this->dataGatherer(); TSizeVec peopleToRemove; TSizeVec attributesToRemove; - this->peopleAndAttributesToRemove(m_CurrentBucketStats.s_StartTime, - maximumAge, - peopleToRemove, - attributesToRemove); + this->peopleAndAttributesToRemove(m_CurrentBucketStats.s_StartTime, maximumAge, peopleToRemove, attributesToRemove); - if (peopleToRemove.empty() && attributesToRemove.empty()) - { + if (peopleToRemove.empty() && attributesToRemove.empty()) { return; } @@ -559,20 +456,17 @@ void CEventRatePopulationModel::prune(std::size_t maximumAge) gatherer.recyclePeople(peopleToRemove); gatherer.recycleAttributes(attributesToRemove); - if (gatherer.dataAvailable(m_CurrentBucketStats.s_StartTime)) - { + if (gatherer.dataAvailable(m_CurrentBucketStats.s_StartTime)) { TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; gatherer.featureData(m_CurrentBucketStats.s_StartTime, gatherer.bucketLength(), featureData); - for (auto &feature : featureData) - { + for (auto& feature : featureData) { m_CurrentBucketStats.s_FeatureData[feature.first].swap(feature.second); } } TDoubleVec categoriesToRemove; categoriesToRemove.reserve(attributesToRemove.size()); - for (auto attribute : attributesToRemove) - { + for (auto attribute : attributesToRemove) { categoriesToRemove.push_back(static_cast(attribute)); } std::sort(categoriesToRemove.begin(), categoriesToRemove.end()); @@ -586,20 +480,17 @@ void CEventRatePopulationModel::prune(std::size_t maximumAge) bool CEventRatePopulationModel::computeProbability(std::size_t pid, core_t::TTime startTime, core_t::TTime endTime, - CPartitioningFields &partitioningFields, + CPartitioningFields& partitioningFields, std::size_t numberAttributeProbabilities, - SAnnotatedProbability &result) const -{ - const CDataGatherer &gatherer = this->dataGatherer(); + SAnnotatedProbability& result) const { + const CDataGatherer& gatherer = this->dataGatherer(); core_t::TTime bucketLength = gatherer.bucketLength(); - if (endTime != startTime + bucketLength) - { + if (endTime != startTime + bucketLength) { LOG_ERROR("Can only compute probability for single bucket"); return false; } - if (pid > gatherer.numberPeople()) - { + if (pid > gatherer.numberPeople()) { LOG_TRACE("No person for pid = " << pid); return false; } @@ -620,8 +511,7 @@ bool CEventRatePopulationModel::computeProbability(std::size_t pid, CProbabilityAndInfluenceCalculator pConditionalTemplate(this->params().s_InfluenceCutoff); pConditionalTemplate.addAggregator(maths::CJointProbabilityOfLessLikelySamples()); pConditionalTemplate.addAggregator(maths::CProbabilityOfExtremeSample()); - if (this->params().s_CacheProbabilities) - { + if (this->params().s_CacheProbabilities) { pConditionalTemplate.addCache(m_Probabilities); } TSizeProbabilityAndInfluenceUMap pConditional; @@ -637,83 +527,69 @@ bool CEventRatePopulationModel::computeProbability(std::size_t pid, resultBuilder.attributeProbabilityPrior(&m_AttributeProbabilityPrior); resultBuilder.personAttributeProbabilityPrior(&personAttributeProbabilityPrior); - for (std::size_t i = 0u; i < gatherer.numberFeatures(); ++i) - { + for (std::size_t i = 0u; i < gatherer.numberFeatures(); ++i) { model_t::EFeature feature = gatherer.feature(i); LOG_TRACE("feature = " << model_t::print(feature)); - if (feature == model_t::E_PopulationAttributeTotalCountByPerson) - { - const TSizeSizePrFeatureDataPrVec &data = this->featureData(feature, startTime); + if (feature == model_t::E_PopulationAttributeTotalCountByPerson) { + const TSizeSizePrFeatureDataPrVec& data = this->featureData(feature, startTime); TSizeSizePr range = personRange(data, pid); - for (std::size_t j = range.first; j < range.second; ++j) - { + for (std::size_t j = range.first; j < range.second; ++j) { TDouble1Vec category(1, static_cast(CDataGatherer::extractAttributeId(data[j]))); TDouble4Vec1Vec weights(1, TDouble4Vec(1, static_cast(CDataGatherer::extractData(data[j]).s_Count))); personAttributeProbabilityPrior.addSamples(maths::CConstantWeights::COUNT, category, weights); } continue; } - if (model_t::isCategorical(feature)) - { + if (model_t::isCategorical(feature)) { continue; } - const TSizeSizePrFeatureDataPrVec &featureData = this->featureData(feature, startTime); + const TSizeSizePrFeatureDataPrVec& featureData = this->featureData(feature, startTime); TSizeSizePr range = personRange(featureData, pid); - for (std::size_t j = range.first; j < range.second; ++j) - { + for (std::size_t j = range.first; j < range.second; ++j) { std::size_t cid = CDataGatherer::extractAttributeId(featureData[j]); - if (this->shouldIgnoreResult(feature, result.s_ResultType, pid, cid, - model_t::sampleTime(feature, startTime, bucketLength))) - { + if (this->shouldIgnoreResult(feature, result.s_ResultType, pid, cid, model_t::sampleTime(feature, startTime, bucketLength))) { continue; } partitioningFields.back().second = TStrCRef(gatherer.attributeName(cid)); - if (this->correlates(feature, pid, cid, startTime)) - { + if (this->correlates(feature, pid, cid, startTime)) { // TODO - } - else - { + } else { CProbabilityAndInfluenceCalculator::SParams params(partitioningFields); this->fill(feature, pid, cid, startTime, result.isInterim(), params); model_t::CResultType type; TSize1Vec mostAnomalousCorrelate; if (pConditional.emplace(cid, pConditionalTemplate) - .first->second.addProbability(feature, cid, *params.s_Model, + .first->second.addProbability(feature, + cid, + *params.s_Model, params.s_ElapsedTime, params.s_ComputeProbabilityParams, - params.s_Time, params.s_Value, - params.s_Probability, params.s_Tail, - type, mostAnomalousCorrelate)) - { - LOG_TRACE("P(" << params.describe() - << ", attribute = " << gatherer.attributeName(cid) - << ", person = " << gatherer.personName(pid) << ") = " - << params.s_Probability); - CProbabilityAndInfluenceCalculator &calculator = - pConditional.emplace(cid, pConditionalTemplate).first->second; - const auto &influenceValues = CDataGatherer::extractData(featureData[j]).s_InfluenceValues; - for (std::size_t k = 0u; k < influenceValues.size(); ++k) - { - if (const CInfluenceCalculator *influenceCalculator = this->influenceCalculator(feature, k)) - { + params.s_Time, + params.s_Value, + params.s_Probability, + params.s_Tail, + type, + mostAnomalousCorrelate)) { + LOG_TRACE("P(" << params.describe() << ", attribute = " << gatherer.attributeName(cid) + << ", person = " << gatherer.personName(pid) << ") = " << params.s_Probability); + CProbabilityAndInfluenceCalculator& calculator = pConditional.emplace(cid, pConditionalTemplate).first->second; + const auto& influenceValues = CDataGatherer::extractData(featureData[j]).s_InfluenceValues; + for (std::size_t k = 0u; k < influenceValues.size(); ++k) { + if (const CInfluenceCalculator* influenceCalculator = this->influenceCalculator(feature, k)) { calculator.plugin(*influenceCalculator); calculator.addInfluences(*(gatherer.beginInfluencers() + k), influenceValues[k], params); } } minimumProbabilityFeatures[cid].add({params.s_Probability, feature}); - } - else - { - LOG_ERROR("Unable to compute P(" << params.describe() - << ", attribute = " << gatherer.attributeName(cid) - << ", person = " << gatherer.personName(pid) << ")"); + } else { + LOG_ERROR("Unable to compute P(" << params.describe() << ", attribute = " << gatherer.attributeName(cid) + << ", person = " << gatherer.personName(pid) << ")"); } } } @@ -722,15 +598,13 @@ bool CEventRatePopulationModel::computeProbability(std::size_t pid, CProbabilityAndInfluenceCalculator pJoint(this->params().s_InfluenceCutoff); pJoint.addAggregator(maths::CJointProbabilityOfLessLikelySamples()); - for (const auto &pConditional_ : pConditional) - { + for (const auto& pConditional_ : pConditional) { std::size_t cid = pConditional_.first; CProbabilityAndInfluenceCalculator pPersonAndAttribute(this->params().s_InfluenceCutoff); pPersonAndAttribute.addAggregator(maths::CJointProbabilityOfLessLikelySamples()); pPersonAndAttribute.add(pConditional_.second); double pAttribute; - if (m_AttributeProbabilities.lookup(cid, pAttribute)) - { + if (m_AttributeProbabilities.lookup(cid, pAttribute)) { pPersonAndAttribute.addProbability(pAttribute); } LOG_TRACE("P(" << gatherer.attributeName(cid) << ") = " << pAttribute); @@ -742,42 +616,37 @@ bool CEventRatePopulationModel::computeProbability(std::size_t pid, // multinomial distribution. double w = 1.0; double pAttributeGivenPerson; - if (personAttributeProbabilityPrior.probability(static_cast(cid), - pAttributeGivenPerson)) - { - w = maths::CCategoricalTools::probabilityOfCategory(pConditional.size(), - pAttributeGivenPerson); + if (personAttributeProbabilityPrior.probability(static_cast(cid), pAttributeGivenPerson)) { + w = maths::CCategoricalTools::probabilityOfCategory(pConditional.size(), pAttributeGivenPerson); } LOG_TRACE("w = " << w); pJoint.add(pPersonAndAttribute, w); auto feature = minimumProbabilityFeatures.find(cid); - if (feature == minimumProbabilityFeatures.end()) - { + if (feature == minimumProbabilityFeatures.end()) { LOG_ERROR("No feature for " << gatherer.attributeName(cid)); - } - else - { + } else { double p; pPersonAndAttribute.calculate(p); - resultBuilder.addAttributeProbability(cid, gatherer.attributeNamePtr(cid), - pAttribute, p, + resultBuilder.addAttributeProbability(cid, + gatherer.attributeNamePtr(cid), + pAttribute, + p, model_t::CResultType::E_Unconditional, (feature->second)[0].second, - NO_CORRELATED_ATTRIBUTES, NO_CORRELATES); + NO_CORRELATED_ATTRIBUTES, + NO_CORRELATES); } } - if (pJoint.empty()) - { + if (pJoint.empty()) { LOG_TRACE("No samples in [" << startTime << "," << endTime << ")"); return false; } double p; - if (!pJoint.calculate(p, result.s_Influences)) - { + if (!pJoint.calculate(p, result.s_Influences)) { LOG_ERROR("Failed to compute probability of " << this->personName(pid)); return false; } @@ -788,86 +657,66 @@ bool CEventRatePopulationModel::computeProbability(std::size_t pid, return true; } -bool CEventRatePopulationModel::computeTotalProbability(const std::string &/*person*/, +bool CEventRatePopulationModel::computeTotalProbability(const std::string& /*person*/, std::size_t /*numberAttributeProbabilities*/, - TOptionalDouble &probability, - TAttributeProbability1Vec &attributeProbabilities) const -{ + TOptionalDouble& probability, + TAttributeProbability1Vec& attributeProbabilities) const { probability = TOptionalDouble(); attributeProbabilities.clear(); return true; } -uint64_t CEventRatePopulationModel::checksum(bool includeCurrentBucketStats) const -{ +uint64_t CEventRatePopulationModel::checksum(bool includeCurrentBucketStats) const { uint64_t seed = this->CPopulationModel::checksum(includeCurrentBucketStats); seed = maths::CChecksum::calculate(seed, m_NewAttributeProbabilityPrior); - if (includeCurrentBucketStats) - { + if (includeCurrentBucketStats) { seed = maths::CChecksum::calculate(seed, m_CurrentBucketStats.s_StartTime); } using TStrCRefStrCRefPr = std::pair; - using TStrCRefStrCRefPrUInt64Map = - std::map; + using TStrCRefStrCRefPrUInt64Map = std::map; - const CDataGatherer &gatherer = this->dataGatherer(); + const CDataGatherer& gatherer = this->dataGatherer(); TStrCRefStrCRefPrUInt64Map hashes; - const TDoubleVec &categories = m_AttributeProbabilityPrior.categories(); - const TDoubleVec &concentrations = m_AttributeProbabilityPrior.concentrations(); - for (std::size_t i = 0u; i < categories.size(); ++i) - { + const TDoubleVec& categories = m_AttributeProbabilityPrior.categories(); + const TDoubleVec& concentrations = m_AttributeProbabilityPrior.concentrations(); + for (std::size_t i = 0u; i < categories.size(); ++i) { std::size_t cid = static_cast(categories[i]); - uint64_t &hash = hashes[{boost::cref(EMPTY_STRING), - boost::cref(this->attributeName(cid))}]; + uint64_t& hash = hashes[{boost::cref(EMPTY_STRING), boost::cref(this->attributeName(cid))}]; hash = maths::CChecksum::calculate(hash, concentrations[i]); } - for (const auto &feature : m_FeatureModels) - { - for (std::size_t cid = 0u; cid < feature.s_Models.size(); ++cid) - { - if (gatherer.isAttributeActive(cid)) - { - uint64_t &hash = hashes[{boost::cref(EMPTY_STRING), - boost::cref(gatherer.attributeName(cid))}]; + for (const auto& feature : m_FeatureModels) { + for (std::size_t cid = 0u; cid < feature.s_Models.size(); ++cid) { + if (gatherer.isAttributeActive(cid)) { + uint64_t& hash = hashes[{boost::cref(EMPTY_STRING), boost::cref(gatherer.attributeName(cid))}]; hash = maths::CChecksum::calculate(hash, feature.s_Models[cid]); } } } - for (const auto &feature : m_FeatureCorrelatesModels) - { - for (const auto &model : feature.s_Models->correlationModels()) - { + for (const auto& feature : m_FeatureCorrelatesModels) { + for (const auto& model : feature.s_Models->correlationModels()) { std::size_t cids[]{model.first.first, model.first.second}; - if (gatherer.isAttributeActive(cids[0]) && gatherer.isAttributeActive(cids[1])) - { - uint64_t &hash = hashes[{boost::cref(gatherer.attributeName(cids[0])), - boost::cref(gatherer.attributeName(cids[1]))}]; + if (gatherer.isAttributeActive(cids[0]) && gatherer.isAttributeActive(cids[1])) { + uint64_t& hash = hashes[{boost::cref(gatherer.attributeName(cids[0])), boost::cref(gatherer.attributeName(cids[1]))}]; hash = maths::CChecksum::calculate(hash, model.second); } } } - if (includeCurrentBucketStats) - { - for (const auto &personCount : this->personCounts()) - { - uint64_t &hash = hashes[{boost::cref(gatherer.personName(personCount.first)), - boost::cref(EMPTY_STRING)}]; + if (includeCurrentBucketStats) { + for (const auto& personCount : this->personCounts()) { + uint64_t& hash = hashes[{boost::cref(gatherer.personName(personCount.first)), boost::cref(EMPTY_STRING)}]; hash = maths::CChecksum::calculate(hash, personCount.second); } - for (const auto &feature : m_CurrentBucketStats.s_FeatureData) - { - for (const auto &data : feature.second) - { + for (const auto& feature : m_CurrentBucketStats.s_FeatureData) { + for (const auto& data : feature.second) { std::size_t pid = CDataGatherer::extractPersonId(data); std::size_t cid = CDataGatherer::extractAttributeId(data); - uint64_t &hash = hashes[{boost::cref(this->personName(pid)), - boost::cref(this->attributeName(cid))}]; + uint64_t& hash = hashes[{boost::cref(this->personName(pid)), boost::cref(this->attributeName(cid))}]; hash = maths::CChecksum::calculate(hash, CDataGatherer::extractData(data).s_Count); } } @@ -879,38 +728,29 @@ uint64_t CEventRatePopulationModel::checksum(bool includeCurrentBucketStats) con return maths::CChecksum::calculate(seed, hashes); } -void CEventRatePopulationModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CEventRatePopulationModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CEventRatePopulationModel"); this->CPopulationModel::debugMemoryUsage(mem->addChild()); - core::CMemoryDebug::dynamicSize("m_CurrentBucketStats.s_PersonCounts", - m_CurrentBucketStats.s_PersonCounts, mem); - core::CMemoryDebug::dynamicSize("m_CurrentBucketStats.s_FeatureData", - m_CurrentBucketStats.s_FeatureData, mem); - core::CMemoryDebug::dynamicSize("m_CurrentBucketStats.s_InterimCorrections", - m_CurrentBucketStats.s_InterimCorrections, mem); - core::CMemoryDebug::dynamicSize("m_AttributeProbabilities", - m_AttributeProbabilities, mem); - core::CMemoryDebug::dynamicSize("m_NewPersonAttributePrior", - m_NewAttributeProbabilityPrior, mem); - core::CMemoryDebug::dynamicSize("m_AttributeProbabilityPrior", - m_AttributeProbabilityPrior, mem); + core::CMemoryDebug::dynamicSize("m_CurrentBucketStats.s_PersonCounts", m_CurrentBucketStats.s_PersonCounts, mem); + core::CMemoryDebug::dynamicSize("m_CurrentBucketStats.s_FeatureData", m_CurrentBucketStats.s_FeatureData, mem); + core::CMemoryDebug::dynamicSize("m_CurrentBucketStats.s_InterimCorrections", m_CurrentBucketStats.s_InterimCorrections, mem); + core::CMemoryDebug::dynamicSize("m_AttributeProbabilities", m_AttributeProbabilities, mem); + core::CMemoryDebug::dynamicSize("m_NewPersonAttributePrior", m_NewAttributeProbabilityPrior, mem); + core::CMemoryDebug::dynamicSize("m_AttributeProbabilityPrior", m_AttributeProbabilityPrior, mem); core::CMemoryDebug::dynamicSize("m_FeatureModels", m_FeatureModels, mem); core::CMemoryDebug::dynamicSize("m_FeatureCorrelatesModels", m_FeatureCorrelatesModels, mem); core::CMemoryDebug::dynamicSize("m_MemoryEstimator", m_MemoryEstimator, mem); } -std::size_t CEventRatePopulationModel::memoryUsage() const -{ - const CDataGatherer &gatherer = this->dataGatherer(); +std::size_t CEventRatePopulationModel::memoryUsage() const { + const CDataGatherer& gatherer = this->dataGatherer(); TOptionalSize estimate = this->estimateMemoryUsage(gatherer.numberActivePeople(), gatherer.numberActiveAttributes(), 0); // # correlations return estimate ? estimate.get() : this->computeMemoryUsage(); } -std::size_t CEventRatePopulationModel::computeMemoryUsage() const -{ +std::size_t CEventRatePopulationModel::computeMemoryUsage() const { std::size_t mem = this->CPopulationModel::memoryUsage(); mem += core::CMemory::dynamicSize(m_CurrentBucketStats.s_PersonCounts); mem += core::CMemory::dynamicSize(m_CurrentBucketStats.s_FeatureData); @@ -924,82 +764,63 @@ std::size_t CEventRatePopulationModel::computeMemoryUsage() const return mem; } -CMemoryUsageEstimator *CEventRatePopulationModel::memoryUsageEstimator() const -{ +CMemoryUsageEstimator* CEventRatePopulationModel::memoryUsageEstimator() const { return &m_MemoryEstimator; } -std::size_t CEventRatePopulationModel::staticSize() const -{ +std::size_t CEventRatePopulationModel::staticSize() const { return sizeof(*this); } -CEventRatePopulationModel::CModelDetailsViewPtr CEventRatePopulationModel::details() const -{ +CEventRatePopulationModel::CModelDetailsViewPtr CEventRatePopulationModel::details() const { return CModelDetailsViewPtr(new CEventRatePopulationModelDetailsView(*this)); } -const CEventRatePopulationModel::TSizeSizePrFeatureDataPrVec & - CEventRatePopulationModel::featureData(model_t::EFeature feature, core_t::TTime time) const -{ +const CEventRatePopulationModel::TSizeSizePrFeatureDataPrVec& CEventRatePopulationModel::featureData(model_t::EFeature feature, + core_t::TTime time) const { static const TSizeSizePrFeatureDataPrVec EMPTY; - if (!this->bucketStatsAvailable(time)) - { - LOG_ERROR("No statistics at " << time - << ", current bucket = [" << m_CurrentBucketStats.s_StartTime - << "," << m_CurrentBucketStats.s_StartTime + this->bucketLength() << ")"); + if (!this->bucketStatsAvailable(time)) { + LOG_ERROR("No statistics at " << time << ", current bucket = [" << m_CurrentBucketStats.s_StartTime << "," + << m_CurrentBucketStats.s_StartTime + this->bucketLength() << ")"); return EMPTY; } auto result = m_CurrentBucketStats.s_FeatureData.find(feature); return result == m_CurrentBucketStats.s_FeatureData.end() ? EMPTY : result->second; } -core_t::TTime CEventRatePopulationModel::currentBucketStartTime() const -{ +core_t::TTime CEventRatePopulationModel::currentBucketStartTime() const { return m_CurrentBucketStats.s_StartTime; } -void CEventRatePopulationModel::currentBucketStartTime(core_t::TTime startTime) -{ +void CEventRatePopulationModel::currentBucketStartTime(core_t::TTime startTime) { m_CurrentBucketStats.s_StartTime = startTime; } -void CEventRatePopulationModel::currentBucketTotalCount(uint64_t totalCount) -{ +void CEventRatePopulationModel::currentBucketTotalCount(uint64_t totalCount) { m_CurrentBucketStats.s_TotalCount = totalCount; } -uint64_t CEventRatePopulationModel::currentBucketTotalCount() const -{ +uint64_t CEventRatePopulationModel::currentBucketTotalCount() const { return m_CurrentBucketStats.s_TotalCount; } -const CEventRatePopulationModel::TSizeUInt64PrVec &CEventRatePopulationModel::personCounts() const -{ +const CEventRatePopulationModel::TSizeUInt64PrVec& CEventRatePopulationModel::personCounts() const { return m_CurrentBucketStats.s_PersonCounts; } -CEventRatePopulationModel::TCorrectionKeyDouble1VecUMap & - CEventRatePopulationModel::currentBucketInterimCorrections() const -{ +CEventRatePopulationModel::TCorrectionKeyDouble1VecUMap& CEventRatePopulationModel::currentBucketInterimCorrections() const { return m_CurrentBucketStats.s_InterimCorrections; } -void CEventRatePopulationModel::createNewModels(std::size_t n, std::size_t m) -{ - if (m > 0) - { - for (auto &feature : m_FeatureModels) - { +void CEventRatePopulationModel::createNewModels(std::size_t n, std::size_t m) { + if (m > 0) { + for (auto& feature : m_FeatureModels) { std::size_t newM = feature.s_Models.size() + m; core::CAllocationStrategy::reserve(feature.s_Models, newM); - for (std::size_t cid = feature.s_Models.size(); cid < newM; ++cid) - { + for (std::size_t cid = feature.s_Models.size(); cid < newM; ++cid) { feature.s_Models.emplace_back(feature.s_NewModel->clone(cid)); - for (const auto &correlates : m_FeatureCorrelatesModels) - { - if (feature.s_Feature == correlates.s_Feature) - { + for (const auto& correlates : m_FeatureCorrelatesModels) { + if (feature.s_Feature == correlates.s_Feature) { feature.s_Models.back()->modelCorrelations(*correlates.s_Models); } } @@ -1009,18 +830,13 @@ void CEventRatePopulationModel::createNewModels(std::size_t n, std::size_t m) this->CPopulationModel::createNewModels(n, m); } -void CEventRatePopulationModel::updateRecycledModels() -{ - CDataGatherer &gatherer = this->dataGatherer(); - for (auto cid : gatherer.recycledAttributeIds()) - { - for (auto &feature : m_FeatureModels) - { +void CEventRatePopulationModel::updateRecycledModels() { + CDataGatherer& gatherer = this->dataGatherer(); + for (auto cid : gatherer.recycledAttributeIds()) { + for (auto& feature : m_FeatureModels) { feature.s_Models[cid].reset(feature.s_NewModel->clone(cid)); - for (const auto &correlates : m_FeatureCorrelatesModels) - { - if (feature.s_Feature == correlates.s_Feature) - { + for (const auto& correlates : m_FeatureCorrelatesModels) { + if (feature.s_Feature == correlates.s_Feature) { feature.s_Models.back()->modelCorrelations(*correlates.s_Models); } } @@ -1029,83 +845,59 @@ void CEventRatePopulationModel::updateRecycledModels() this->CPopulationModel::updateRecycledModels(); } -void CEventRatePopulationModel::refreshCorrelationModels(std::size_t resourceLimit, - CResourceMonitor &resourceMonitor) -{ +void CEventRatePopulationModel::refreshCorrelationModels(std::size_t resourceLimit, CResourceMonitor& resourceMonitor) { std::size_t n = this->numberOfPeople(); double maxNumberCorrelations = this->params().s_CorrelationModelsOverhead * static_cast(n); auto memoryUsage = boost::bind(&CAnomalyDetectorModel::estimateMemoryUsageOrComputeAndUpdate, this, n, 0, _1); - CTimeSeriesCorrelateModelAllocator allocator(resourceMonitor, memoryUsage, resourceLimit, - static_cast(maxNumberCorrelations + 0.5)); - for (auto &feature : m_FeatureCorrelatesModels) - { + CTimeSeriesCorrelateModelAllocator allocator( + resourceMonitor, memoryUsage, resourceLimit, static_cast(maxNumberCorrelations + 0.5)); + for (auto& feature : m_FeatureCorrelatesModels) { allocator.prototypePrior(feature.s_ModelPrior); feature.s_Models->refresh(allocator); } } -void CEventRatePopulationModel::clearPrunedResources(const TSizeVec &/*people*/, - const TSizeVec &attributes) -{ - for (auto cid : attributes) - { - for (auto &feature : m_FeatureModels) - { +void CEventRatePopulationModel::clearPrunedResources(const TSizeVec& /*people*/, const TSizeVec& attributes) { + for (auto cid : attributes) { + for (auto& feature : m_FeatureModels) { feature.s_Models[cid].reset(this->tinyModel()); } } } -void CEventRatePopulationModel::doSkipSampling(core_t::TTime startTime, core_t::TTime endTime) -{ +void CEventRatePopulationModel::doSkipSampling(core_t::TTime startTime, core_t::TTime endTime) { core_t::TTime gap = endTime - startTime; - for (auto &feature : m_FeatureModels) - { - for (auto &model : feature.s_Models) - { + for (auto& feature : m_FeatureModels) { + for (auto& model : feature.s_Models) { model->skipTime(gap); } } this->CPopulationModel::doSkipSampling(startTime, endTime); } -const maths::CModel *CEventRatePopulationModel::model(model_t::EFeature feature, std::size_t cid) const -{ +const maths::CModel* CEventRatePopulationModel::model(model_t::EFeature feature, std::size_t cid) const { return const_cast(this)->model(feature, cid); } -maths::CModel *CEventRatePopulationModel::model(model_t::EFeature feature, std::size_t cid) -{ - auto i = std::find_if(m_FeatureModels.begin(), m_FeatureModels.end(), - [feature](const SFeatureModels &model) - { - return model.s_Feature == feature; - }); +maths::CModel* CEventRatePopulationModel::model(model_t::EFeature feature, std::size_t cid) { + auto i = std::find_if( + m_FeatureModels.begin(), m_FeatureModels.end(), [feature](const SFeatureModels& model) { return model.s_Feature == feature; }); return i != m_FeatureModels.end() && cid < i->s_Models.size() ? i->s_Models[cid].get() : 0; } -bool CEventRatePopulationModel::correlates(model_t::EFeature feature, - std::size_t pid, - std::size_t cid, - core_t::TTime time) const -{ - if (model_t::dimension(feature) > 1 || !this->params().s_MultivariateByFields) - { +bool CEventRatePopulationModel::correlates(model_t::EFeature feature, std::size_t pid, std::size_t cid, core_t::TTime time) const { + if (model_t::dimension(feature) > 1 || !this->params().s_MultivariateByFields) { return false; } - const maths::CModel *model{this->model(feature, cid)}; - const TSizeSizePrFeatureDataPrVec &data = this->featureData(feature, time); + const maths::CModel* model{this->model(feature, cid)}; + const TSizeSizePrFeatureDataPrVec& data = this->featureData(feature, time); TSizeSizePr range = personRange(data, pid); - for (std::size_t j = range.first; j < range.second; ++j) - { + for (std::size_t j = range.first; j < range.second; ++j) { std::size_t cids[]{cid, CDataGatherer::extractAttributeId(data[j])}; - for (const auto &correlate : model->correlates()) - { - if ( (cids[0] == correlate[0] && cids[1] == correlate[1]) - || (cids[1] == correlate[0] && cids[0] == correlate[1])) - { + for (const auto& correlate : model->correlates()) { + if ((cids[0] == correlate[0] && cids[1] == correlate[1]) || (cids[1] == correlate[0] && cids[0] == correlate[1])) { return true; } } @@ -1118,43 +910,36 @@ void CEventRatePopulationModel::fill(model_t::EFeature feature, std::size_t cid, core_t::TTime bucketTime, bool interim, - CProbabilityAndInfluenceCalculator::SParams ¶ms) const -{ + CProbabilityAndInfluenceCalculator::SParams& params) const { auto data = find(this->featureData(feature, bucketTime), pid, cid); - const maths::CModel *model{this->model(feature, cid)}; + const maths::CModel* model{this->model(feature, cid)}; core_t::TTime time{model_t::sampleTime(feature, bucketTime, this->bucketLength())}; TDouble2Vec4Vec weight{model->seasonalWeight(maths::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL, time)}; - double value{model_t::offsetCountToZero( - feature, static_cast(CDataGatherer::extractData(*data).s_Count))}; + double value{model_t::offsetCountToZero(feature, static_cast(CDataGatherer::extractData(*data).s_Count))}; params.s_Feature = feature; params.s_Model = model; params.s_ElapsedTime = bucketTime - this->attributeFirstBucketTimes()[cid]; params.s_Time.assign(1, TTime2Vec{time}); params.s_Value.assign(1, TDouble2Vec{value}); - if (interim && model_t::requiresInterimResultAdjustment(feature)) - { + if (interim && model_t::requiresInterimResultAdjustment(feature)) { double mode{params.s_Model->mode(time, PROBABILITY_WEIGHT_STYLES, weight)[0]}; - TDouble2Vec correction{this->interimValueCorrector().corrections( - time, this->currentBucketTotalCount(), mode, value)}; + TDouble2Vec correction{this->interimValueCorrector().corrections(time, this->currentBucketTotalCount(), mode, value)}; params.s_Value[0] += correction; this->currentBucketInterimCorrections().emplace(CCorrectionKey(feature, pid, cid), correction); } params.s_Count = 1.0; params.s_ComputeProbabilityParams.tag(pid) - .addCalculation(model_t::probabilityCalculation(feature)) - .weightStyles(PROBABILITY_WEIGHT_STYLES) - .addBucketEmpty(TBool2Vec(1, false)) - .addWeights(weight); + .addCalculation(model_t::probabilityCalculation(feature)) + .weightStyles(PROBABILITY_WEIGHT_STYLES) + .addBucketEmpty(TBool2Vec(1, false)) + .addWeights(weight); } ////////// CEventRatePopulationModel::SBucketStats Implementation ////////// -CEventRatePopulationModel::SBucketStats::SBucketStats(core_t::TTime startTime) : - s_StartTime(startTime), - s_TotalCount(0), - s_InterimCorrections(1) -{} - +CEventRatePopulationModel::SBucketStats::SBucketStats(core_t::TTime startTime) + : s_StartTime(startTime), s_TotalCount(0), s_InterimCorrections(1) { +} } } diff --git a/lib/model/CEventRatePopulationModelFactory.cc b/lib/model/CEventRatePopulationModelFactory.cc index 1fa158b0cf..9710cf2f12 100644 --- a/lib/model/CEventRatePopulationModelFactory.cc +++ b/lib/model/CEventRatePopulationModelFactory.cc @@ -11,8 +11,8 @@ #include #include #include -#include #include +#include #include #include #include @@ -25,41 +25,35 @@ #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { -CEventRatePopulationModelFactory::CEventRatePopulationModelFactory(const SModelParams ¶ms, +CEventRatePopulationModelFactory::CEventRatePopulationModelFactory(const SModelParams& params, model_t::ESummaryMode summaryMode, - const std::string &summaryCountFieldName) : - CModelFactory(params), - m_Identifier(), - m_SummaryMode(summaryMode), - m_SummaryCountFieldName(summaryCountFieldName), - m_UseNull(false), - m_BucketResultsDelay(0) -{} - -CEventRatePopulationModelFactory *CEventRatePopulationModelFactory::clone() const -{ + const std::string& summaryCountFieldName) + : CModelFactory(params), + m_Identifier(), + m_SummaryMode(summaryMode), + m_SummaryCountFieldName(summaryCountFieldName), + m_UseNull(false), + m_BucketResultsDelay(0) { +} + +CEventRatePopulationModelFactory* CEventRatePopulationModelFactory::clone() const { return new CEventRatePopulationModelFactory(*this); } -CAnomalyDetectorModel *CEventRatePopulationModelFactory::makeModel(const SModelInitializationData &initData) const -{ +CAnomalyDetectorModel* CEventRatePopulationModelFactory::makeModel(const SModelInitializationData& initData) const { TDataGathererPtr dataGatherer = initData.s_DataGatherer; - if (!dataGatherer) - { + if (!dataGatherer) { LOG_ERROR("NULL data gatherer"); return 0; } - const TFeatureVec &features = dataGatherer->features(); + const TFeatureVec& features = dataGatherer->features(); TFeatureInfluenceCalculatorCPtrPrVecVec influenceCalculators; influenceCalculators.reserve(m_InfluenceFieldNames.size()); - for (const auto &name : m_InfluenceFieldNames) - { + for (const auto& name : m_InfluenceFieldNames) { influenceCalculators.push_back(this->defaultInfluenceCalculators(name, features)); } @@ -71,21 +65,18 @@ CAnomalyDetectorModel *CEventRatePopulationModelFactory::makeModel(const SModelI influenceCalculators); } -CAnomalyDetectorModel *CEventRatePopulationModelFactory::makeModel(const SModelInitializationData &initData, - core::CStateRestoreTraverser &traverser) const -{ +CAnomalyDetectorModel* CEventRatePopulationModelFactory::makeModel(const SModelInitializationData& initData, + core::CStateRestoreTraverser& traverser) const { TDataGathererPtr dataGatherer = initData.s_DataGatherer; - if (!dataGatherer) - { + if (!dataGatherer) { LOG_ERROR("NULL data gatherer"); return 0; } - const TFeatureVec &features = dataGatherer->features(); + const TFeatureVec& features = dataGatherer->features(); TFeatureInfluenceCalculatorCPtrPrVecVec influenceCalculators; influenceCalculators.reserve(m_InfluenceFieldNames.size()); - for (const auto &name : m_InfluenceFieldNames) - { + for (const auto& name : m_InfluenceFieldNames) { influenceCalculators.push_back(this->defaultInfluenceCalculators(name, features)); } @@ -98,8 +89,7 @@ CAnomalyDetectorModel *CEventRatePopulationModelFactory::makeModel(const SModelI traverser); } -CDataGatherer *CEventRatePopulationModelFactory::makeDataGatherer(const SGathererInitializationData &initData) const -{ +CDataGatherer* CEventRatePopulationModelFactory::makeDataGatherer(const SGathererInitializationData& initData) const { return new CDataGatherer(model_t::E_PopulationEventRate, m_SummaryMode, this->modelParams(), @@ -117,9 +107,8 @@ CDataGatherer *CEventRatePopulationModelFactory::makeDataGatherer(const SGathere 0); } -CDataGatherer *CEventRatePopulationModelFactory::makeDataGatherer(const std::string &partitionFieldValue, - core::CStateRestoreTraverser &traverser) const -{ +CDataGatherer* CEventRatePopulationModelFactory::makeDataGatherer(const std::string& partitionFieldValue, + core::CStateRestoreTraverser& traverser) const { return new CDataGatherer(model_t::E_PopulationEventRate, m_SummaryMode, this->modelParams(), @@ -135,26 +124,21 @@ CDataGatherer *CEventRatePopulationModelFactory::makeDataGatherer(const std::str traverser); } -CEventRatePopulationModelFactory::TPriorPtr - CEventRatePopulationModelFactory::defaultPrior(model_t::EFeature feature, - const SModelParams ¶ms) const -{ +CEventRatePopulationModelFactory::TPriorPtr CEventRatePopulationModelFactory::defaultPrior(model_t::EFeature feature, + const SModelParams& params) const { // Categorical data all use the multinomial prior. The creation // of these priors is managed by defaultCategoricalPrior. - if (model_t::isCategorical(feature)) - { + if (model_t::isCategorical(feature)) { return TPriorPtr(); } // If the feature data only ever takes a single value we use a // special lightweight prior. - if (model_t::isConstant(feature)) - { + if (model_t::isConstant(feature)) { return boost::make_shared(); } - if (model_t::isDiurnal(feature)) - { + if (model_t::isDiurnal(feature)) { return this->timeOfDayPrior(params); } @@ -169,17 +153,14 @@ CEventRatePopulationModelFactory::TPriorPtr maths_t::EDataType dataType = this->dataType(); - maths::CGammaRateConjugate gammaPrior = - maths::CGammaRateConjugate::nonInformativePrior(dataType, 0.0, params.s_DecayRate); + maths::CGammaRateConjugate gammaPrior = maths::CGammaRateConjugate::nonInformativePrior(dataType, 0.0, params.s_DecayRate); maths::CLogNormalMeanPrecConjugate logNormalPrior = - maths::CLogNormalMeanPrecConjugate::nonInformativePrior(dataType, 0.0, params.s_DecayRate); + maths::CLogNormalMeanPrecConjugate::nonInformativePrior(dataType, 0.0, params.s_DecayRate); - maths::CNormalMeanPrecConjugate normalPrior = - maths::CNormalMeanPrecConjugate::nonInformativePrior(dataType, params.s_DecayRate); + maths::CNormalMeanPrecConjugate normalPrior = maths::CNormalMeanPrecConjugate::nonInformativePrior(dataType, params.s_DecayRate); - maths::CPoissonMeanConjugate poissonPrior = - maths::CPoissonMeanConjugate::nonInformativePrior(0.0, params.s_DecayRate); + maths::CPoissonMeanConjugate poissonPrior = maths::CPoissonMeanConjugate::nonInformativePrior(0.0, params.s_DecayRate); // Create the component priors. TPriorPtrVec priors; @@ -188,8 +169,7 @@ CEventRatePopulationModelFactory::TPriorPtr priors.emplace_back(logNormalPrior.clone()); priors.emplace_back(normalPrior.clone()); priors.emplace_back(poissonPrior.clone()); - if (params.s_MinimumModeFraction <= 0.5) - { + if (params.s_MinimumModeFraction <= 0.5) { // Create the multimode prior. TPriorPtrVec modePriors; modePriors.reserve(3u); @@ -212,17 +192,14 @@ CEventRatePopulationModelFactory::TPriorPtr } CEventRatePopulationModelFactory::TMultivariatePriorPtr - CEventRatePopulationModelFactory::defaultMultivariatePrior(model_t::EFeature feature, - const SModelParams ¶ms) const -{ +CEventRatePopulationModelFactory::defaultMultivariatePrior(model_t::EFeature feature, const SModelParams& params) const { std::size_t dimension = model_t::dimension(feature); TMultivariatePriorPtrVec priors; priors.reserve(params.s_MinimumModeFraction <= 0.5 ? 2u : 1u); TMultivariatePriorPtr multivariateNormal = this->multivariateNormalPrior(dimension, params); priors.push_back(multivariateNormal); - if (params.s_MinimumModeFraction <= 0.5) - { + if (params.s_MinimumModeFraction <= 0.5) { priors.push_back(this->multivariateMultimodalPrior(dimension, params, *multivariateNormal)); } @@ -230,24 +207,19 @@ CEventRatePopulationModelFactory::TMultivariatePriorPtr } CEventRatePopulationModelFactory::TMultivariatePriorPtr - CEventRatePopulationModelFactory::defaultCorrelatePrior(model_t::EFeature /*feature*/, - const SModelParams ¶ms) const -{ +CEventRatePopulationModelFactory::defaultCorrelatePrior(model_t::EFeature /*feature*/, const SModelParams& params) const { TMultivariatePriorPtrVec priors; priors.reserve(params.s_MinimumModeFraction <= 0.5 ? 2u : 1u); TMultivariatePriorPtr multivariateNormal = this->multivariateNormalPrior(2, params); priors.push_back(multivariateNormal); - if (params.s_MinimumModeFraction <= 0.5) - { + if (params.s_MinimumModeFraction <= 0.5) { priors.push_back(this->multivariateMultimodalPrior(2, params, *multivariateNormal)); } return this->multivariateOneOfNPrior(2, params, priors); } -const CSearchKey &CEventRatePopulationModelFactory::searchKey() const -{ - if (!m_SearchKeyCache) - { +const CSearchKey& CEventRatePopulationModelFactory::searchKey() const { + if (!m_SearchKeyCache) { m_SearchKeyCache.reset(CSearchKey(m_Identifier, function_t::function(m_Features), m_UseNull, @@ -261,33 +233,28 @@ const CSearchKey &CEventRatePopulationModelFactory::searchKey() const return *m_SearchKeyCache; } -bool CEventRatePopulationModelFactory::isSimpleCount() const -{ +bool CEventRatePopulationModelFactory::isSimpleCount() const { return false; } -model_t::ESummaryMode CEventRatePopulationModelFactory::summaryMode() const -{ +model_t::ESummaryMode CEventRatePopulationModelFactory::summaryMode() const { return m_SummaryMode; } -maths_t::EDataType CEventRatePopulationModelFactory::dataType() const -{ +maths_t::EDataType CEventRatePopulationModelFactory::dataType() const { return maths_t::E_IntegerData; } -void CEventRatePopulationModelFactory::identifier(int identifier) -{ +void CEventRatePopulationModelFactory::identifier(int identifier) { m_Identifier = identifier; m_SearchKeyCache.reset(); } -void CEventRatePopulationModelFactory::fieldNames(const std::string &partitionFieldName, - const std::string &overFieldName, - const std::string &byFieldName, - const std::string &valueFieldName, - const TStrVec &influenceFieldNames) -{ +void CEventRatePopulationModelFactory::fieldNames(const std::string& partitionFieldName, + const std::string& overFieldName, + const std::string& byFieldName, + const std::string& valueFieldName, + const TStrVec& influenceFieldNames) { m_PartitionFieldName = partitionFieldName; m_PersonFieldName = overFieldName; m_AttributeFieldName = byFieldName; @@ -296,42 +263,33 @@ void CEventRatePopulationModelFactory::fieldNames(const std::string &partitionFi m_SearchKeyCache.reset(); } -void CEventRatePopulationModelFactory::useNull(bool useNull) -{ +void CEventRatePopulationModelFactory::useNull(bool useNull) { m_UseNull = useNull; m_SearchKeyCache.reset(); } -void CEventRatePopulationModelFactory::features(const TFeatureVec &features) -{ +void CEventRatePopulationModelFactory::features(const TFeatureVec& features) { m_Features = features; m_SearchKeyCache.reset(); } -void CEventRatePopulationModelFactory::bucketResultsDelay(std::size_t bucketResultsDelay) -{ +void CEventRatePopulationModelFactory::bucketResultsDelay(std::size_t bucketResultsDelay) { m_BucketResultsDelay = bucketResultsDelay; } -CEventRatePopulationModelFactory::TStrCRefVec - CEventRatePopulationModelFactory::partitioningFields() const -{ +CEventRatePopulationModelFactory::TStrCRefVec CEventRatePopulationModelFactory::partitioningFields() const { TStrCRefVec result; result.reserve(3); - if (!m_PartitionFieldName.empty()) - { + if (!m_PartitionFieldName.empty()) { result.emplace_back(m_PartitionFieldName); } - if (!m_PersonFieldName.empty()) - { + if (!m_PersonFieldName.empty()) { result.emplace_back(m_PersonFieldName); } - if (!m_AttributeFieldName.empty()) - { + if (!m_AttributeFieldName.empty()) { result.emplace_back(m_AttributeFieldName); } return result; } - } } diff --git a/lib/model/CFeatureData.cc b/lib/model/CFeatureData.cc index 095a8a1372..998cf4e464 100644 --- a/lib/model/CFeatureData.cc +++ b/lib/model/CFeatureData.cc @@ -15,33 +15,25 @@ #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { -namespace -{ +namespace { using TSizeVec = std::vector; //! Get the sequence [0, N). template -const TSizeVec &sequence() -{ - static const TSizeVec result(boost::counting_iterator(0), - boost::counting_iterator(N)); +const TSizeVec& sequence() { + static const TSizeVec result(boost::counting_iterator(0), boost::counting_iterator(N)); return result; } - } ////// CFeatureDataIndexing ////// -const TSizeVec &CFeatureDataIndexing::valueIndices(std::size_t dimension) -{ - switch (dimension) - { +const TSizeVec& CFeatureDataIndexing::valueIndices(std::size_t dimension) { + switch (dimension) { case 1: return sequence<1>(); case 2: @@ -70,36 +62,30 @@ const TSizeVec &CFeatureDataIndexing::valueIndices(std::size_t dimension) ////// SEventRateFeatureData ////// -SEventRateFeatureData::SEventRateFeatureData(uint64_t count) : - s_Count(count) -{} +SEventRateFeatureData::SEventRateFeatureData(uint64_t count) : s_Count(count) { +} -void SEventRateFeatureData::swap(SEventRateFeatureData &other) -{ +void SEventRateFeatureData::swap(SEventRateFeatureData& other) { std::swap(s_Count, other.s_Count); s_InfluenceValues.swap(other.s_InfluenceValues); } -std::string SEventRateFeatureData::print() const -{ +std::string SEventRateFeatureData::print() const { std::ostringstream result; result << s_Count; - if (!s_InfluenceValues.empty()) - { + if (!s_InfluenceValues.empty()) { result << ", " << core::CContainerPrinter::print(s_InfluenceValues); } return result.str(); } -std::size_t SEventRateFeatureData::memoryUsage() const -{ +std::size_t SEventRateFeatureData::memoryUsage() const { std::size_t mem = sizeof(*this); mem += core::CMemory::dynamicSize(s_InfluenceValues); return mem; } -void SEventRateFeatureData::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void SEventRateFeatureData::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("SMetricFeatureData", sizeof(*this)); core::CMemoryDebug::dynamicSize("s_InfluenceValues", s_InfluenceValues, mem); } @@ -107,57 +93,43 @@ void SEventRateFeatureData::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr ////// SMetricFeatureData ////// SMetricFeatureData::SMetricFeatureData(core_t::TTime bucketTime, - const TDouble1Vec &bucketValue, + const TDouble1Vec& bucketValue, double bucketVarianceScale, double bucketCount, - TStrCRefDouble1VecDoublePrPrVecVec &influenceValues, + TStrCRefDouble1VecDoublePrPrVecVec& influenceValues, bool isInteger, bool isNonNegative, - const TSampleVec &samples) : - s_BucketValue(boost::in_place(bucketTime, - bucketValue, - bucketVarianceScale, - bucketCount)), - s_IsInteger(isInteger), - s_IsNonNegative(isNonNegative), - s_Samples(samples) -{ + const TSampleVec& samples) + : s_BucketValue(boost::in_place(bucketTime, bucketValue, bucketVarianceScale, bucketCount)), + s_IsInteger(isInteger), + s_IsNonNegative(isNonNegative), + s_Samples(samples) { s_InfluenceValues.swap(influenceValues); } -SMetricFeatureData::SMetricFeatureData(bool isInteger, - bool isNonNegative, - const TSampleVec &samples) : - s_IsInteger(isInteger), - s_IsNonNegative(isNonNegative), - s_Samples(samples) -{} - -std::string SMetricFeatureData::print() const -{ +SMetricFeatureData::SMetricFeatureData(bool isInteger, bool isNonNegative, const TSampleVec& samples) + : s_IsInteger(isInteger), s_IsNonNegative(isNonNegative), s_Samples(samples) { +} + +std::string SMetricFeatureData::print() const { std::ostringstream result; - result << "value = " << core::CContainerPrinter::print(s_BucketValue) - << ", is integer " << s_IsInteger - << ", is non-negative " << s_IsNonNegative - << ", samples = " << core::CContainerPrinter::print(s_Samples); + result << "value = " << core::CContainerPrinter::print(s_BucketValue) << ", is integer " << s_IsInteger << ", is non-negative " + << s_IsNonNegative << ", samples = " << core::CContainerPrinter::print(s_Samples); return result.str(); } -std::size_t SMetricFeatureData::memoryUsage() const -{ +std::size_t SMetricFeatureData::memoryUsage() const { std::size_t mem = core::CMemory::dynamicSize(s_BucketValue); mem += core::CMemory::dynamicSize(s_InfluenceValues); mem += core::CMemory::dynamicSize(s_Samples); return mem; } -void SMetricFeatureData::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void SMetricFeatureData::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("SMetricFeatureData"); core::CMemoryDebug::dynamicSize("s_BucketValue", s_BucketValue, mem); core::CMemoryDebug::dynamicSize("s_InfluenceValues", s_InfluenceValues, mem); core::CMemoryDebug::dynamicSize("s_Samples", s_Samples, mem); } - } } diff --git a/lib/model/CForecastDataSink.cc b/lib/model/CForecastDataSink.cc index fa7e7edb81..7a7673749a 100644 --- a/lib/model/CForecastDataSink.cc +++ b/lib/model/CForecastDataSink.cc @@ -10,13 +10,10 @@ #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { -namespace -{ +namespace { using TStrVec = std::vector; // static strings @@ -56,47 +53,36 @@ const std::string CForecastDataSink::PROGRESS("forecast_progress"); const std::string CForecastDataSink::STATUS("forecast_status"); CForecastDataSink::SForecastModelWrapper::SForecastModelWrapper(model_t::EFeature feature, - TMathsModelPtr &&forecastModel, - const std::string &byFieldValue) - :s_Feature(feature), - s_ForecastModel(std::move(forecastModel)), - s_ByFieldValue(byFieldValue) -{ + TMathsModelPtr&& forecastModel, + const std::string& byFieldValue) + : s_Feature(feature), s_ForecastModel(std::move(forecastModel)), s_ByFieldValue(byFieldValue) { } -CForecastDataSink::SForecastModelWrapper::SForecastModelWrapper(SForecastModelWrapper &&other) - :s_Feature(other.s_Feature), - s_ForecastModel(std::move(other.s_ForecastModel)), - s_ByFieldValue(std::move(other.s_ByFieldValue)) -{ +CForecastDataSink::SForecastModelWrapper::SForecastModelWrapper(SForecastModelWrapper&& other) + : s_Feature(other.s_Feature), s_ForecastModel(std::move(other.s_ForecastModel)), s_ByFieldValue(std::move(other.s_ByFieldValue)) { } CForecastDataSink::SForecastResultSeries::SForecastResultSeries() - :s_DetectorIndex(), - s_ToForecast(), - s_PartitionFieldValue(), - s_ByFieldName() -{ + : s_DetectorIndex(), s_ToForecast(), s_PartitionFieldValue(), s_ByFieldName() { } -CForecastDataSink::SForecastResultSeries::SForecastResultSeries(SForecastResultSeries &&other) - :s_DetectorIndex(other.s_DetectorIndex), - s_ToForecast(std::move(other.s_ToForecast)), - s_PartitionFieldName(std::move(other.s_PartitionFieldName)), - s_PartitionFieldValue(std::move(other.s_PartitionFieldValue)), - s_ByFieldName(std::move(other.s_ByFieldName)) -{ +CForecastDataSink::SForecastResultSeries::SForecastResultSeries(SForecastResultSeries&& other) + : s_DetectorIndex(other.s_DetectorIndex), + s_ToForecast(std::move(other.s_ToForecast)), + s_PartitionFieldName(std::move(other.s_PartitionFieldName)), + s_PartitionFieldValue(std::move(other.s_PartitionFieldValue)), + s_ByFieldName(std::move(other.s_ByFieldName)) { } -CForecastDataSink::CForecastDataSink(const std::string &jobId, - const std::string &forecastId, - const std::string &forecastAlias, +CForecastDataSink::CForecastDataSink(const std::string& jobId, + const std::string& forecastId, + const std::string& forecastAlias, core_t::TTime createTime, core_t::TTime startTime, core_t::TTime endTime, core_t::TTime expiryTime, size_t memoryUsage, - core::CJsonOutputStreamWrapper &outStream) + core::CJsonOutputStreamWrapper& outStream) : m_JobId(jobId), m_ForecastId(forecastId), m_ForecastAlias(forecastAlias), @@ -106,12 +92,10 @@ CForecastDataSink::CForecastDataSink(const std::string &jobId, m_StartTime(startTime), m_EndTime(endTime), m_ExpiryTime(expiryTime), - m_MemoryUsage(memoryUsage) -{ + m_MemoryUsage(memoryUsage) { } -void CForecastDataSink::writeStats(const double progress, uint64_t runtime, const TStrUMap &messages, bool successful) -{ +void CForecastDataSink::writeStats(const double progress, uint64_t runtime, const TStrUMap& messages, bool successful) { rapidjson::Document doc = m_Writer.makeDoc(); this->writeCommonStatsFields(doc); @@ -122,18 +106,12 @@ void CForecastDataSink::writeStats(const double progress, uint64_t runtime, cons m_Writer.addUIntFieldToObj(PROCESSING_TIME_MS, runtime, doc); m_Writer.addStringArrayFieldToObj(MESSAGES, messages, doc); - if (progress < 1.0) - { + if (progress < 1.0) { m_Writer.addStringFieldReferenceToObj(STATUS, STATUS_STARTED, doc); - } - else - { - if (successful) - { + } else { + if (successful) { m_Writer.addStringFieldReferenceToObj(STATUS, STATUS_FINISHED, doc); - } - else - { + } else { m_Writer.addStringFieldReferenceToObj(STATUS, STATUS_FAILED, doc); } } @@ -142,40 +120,35 @@ void CForecastDataSink::writeStats(const double progress, uint64_t runtime, cons this->push(progress == 1.0, doc); } -void CForecastDataSink::writeScheduledMessage() -{ +void CForecastDataSink::writeScheduledMessage() { rapidjson::Value doc(rapidjson::kObjectType); this->writeCommonStatsFields(doc); m_Writer.addStringFieldReferenceToObj(STATUS, STATUS_SCHEDULED, doc); - this->push(true/*important, therefore flush*/, doc); + this->push(true /*important, therefore flush*/, doc); } -void CForecastDataSink::writeErrorMessage(const std::string &message) -{ +void CForecastDataSink::writeErrorMessage(const std::string& message) { rapidjson::Document doc = m_Writer.makeDoc(); this->writeCommonStatsFields(doc); TStrVec messages{message}; m_Writer.addStringArrayFieldToObj(MESSAGES, messages, doc); m_Writer.addStringFieldReferenceToObj(STATUS, STATUS_FAILED, doc); - this->push(true/*important, therefore flush*/, doc); + this->push(true /*important, therefore flush*/, doc); } -void CForecastDataSink::writeFinalMessage(const std::string &message) -{ +void CForecastDataSink::writeFinalMessage(const std::string& message) { rapidjson::Document doc = m_Writer.makeDoc(); this->writeCommonStatsFields(doc); TStrVec messages{message}; m_Writer.addStringArrayFieldToObj(MESSAGES, messages, doc); m_Writer.addStringFieldReferenceToObj(STATUS, STATUS_FINISHED, doc); - this->push(true/*important, therefore flush*/, doc); + this->push(true /*important, therefore flush*/, doc); } -void CForecastDataSink::writeCommonStatsFields(rapidjson::Value &doc) -{ +void CForecastDataSink::writeCommonStatsFields(rapidjson::Value& doc) { m_Writer.addStringFieldReferenceToObj(JOB_ID, m_JobId, doc); m_Writer.addStringFieldReferenceToObj(FORECAST_ID, m_ForecastId, doc); - if (m_ForecastAlias.empty() == false) - { + if (m_ForecastAlias.empty() == false) { m_Writer.addStringFieldReferenceToObj(FORECAST_ALIAS, m_ForecastAlias, doc); } m_Writer.addTimeFieldToObj(CREATE_TIME, m_CreateTime, doc); @@ -183,60 +156,51 @@ void CForecastDataSink::writeCommonStatsFields(rapidjson::Value &doc) m_Writer.addTimeFieldToObj(START_TIME, m_StartTime, doc); m_Writer.addTimeFieldToObj(END_TIME, m_EndTime, doc); - if (m_ExpiryTime != m_CreateTime) - { + if (m_ExpiryTime != m_CreateTime) { m_Writer.addTimeFieldToObj(EXPIRY_TIME, m_ExpiryTime, doc); } } -void CForecastDataSink::push(bool flush, rapidjson::Value &doc) -{ +void CForecastDataSink::push(bool flush, rapidjson::Value& doc) { rapidjson::Document wrapper = m_Writer.makeDoc(); m_Writer.addMember(MODEL_FORECAST_STATS, doc, wrapper); m_Writer.write(wrapper); - if (flush) - { + if (flush) { m_Writer.flush(); } - } -uint64_t CForecastDataSink::numRecordsWritten() const -{ +uint64_t CForecastDataSink::numRecordsWritten() const { return m_NumRecordsWritten; } void CForecastDataSink::push(const maths::SErrorBar errorBar, - const std::string &feature, - const std::string &partitionFieldName, - const std::string &partitionFieldValue, - const std::string &byFieldName, - const std::string &byFieldValue, - int detectorIndex) -{ + const std::string& feature, + const std::string& partitionFieldName, + const std::string& partitionFieldValue, + const std::string& byFieldName, + const std::string& byFieldValue, + int detectorIndex) { ++m_NumRecordsWritten; rapidjson::Document doc = m_Writer.makeDoc(); m_Writer.addStringFieldReferenceToObj(JOB_ID, m_JobId, doc); m_Writer.addIntFieldToObj(DETECTOR_INDEX, detectorIndex, doc); m_Writer.addStringFieldReferenceToObj(FORECAST_ID, m_ForecastId, doc); - if (m_ForecastAlias.empty() == false) - { + if (m_ForecastAlias.empty() == false) { m_Writer.addStringFieldReferenceToObj(FORECAST_ALIAS, m_ForecastAlias, doc); } m_Writer.addStringFieldCopyToObj(FEATURE, feature, doc, true); // time is in Java format - milliseconds since the epoch m_Writer.addTimeFieldToObj(TIMESTAMP, errorBar.s_Time, doc); m_Writer.addIntFieldToObj(BUCKET_SPAN, errorBar.s_BucketLength, doc); - if (!partitionFieldName.empty()) - { + if (!partitionFieldName.empty()) { m_Writer.addStringFieldCopyToObj(PARTITION_FIELD_NAME, partitionFieldName, doc); m_Writer.addStringFieldCopyToObj(PARTITION_FIELD_VALUE, partitionFieldValue, doc, true); } - if (!byFieldName.empty()) - { + if (!byFieldName.empty()) { m_Writer.addStringFieldCopyToObj(BY_FIELD_NAME, byFieldName, doc); m_Writer.addStringFieldCopyToObj(BY_FIELD_VALUE, byFieldValue, doc, true); } @@ -248,7 +212,6 @@ void CForecastDataSink::push(const maths::SErrorBar errorBar, rapidjson::Document wrapper = m_Writer.makeDoc(); m_Writer.addMember(MODEL_FORECAST, doc, wrapper); m_Writer.write(wrapper); - } } /* namespace model */ diff --git a/lib/model/CGathererTools.cc b/lib/model/CGathererTools.cc index 7d5982b3c8..28fec3c515 100644 --- a/lib/model/CGathererTools.cc +++ b/lib/model/CGathererTools.cc @@ -18,20 +18,17 @@ #include #include #include -#include #include +#include #include #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { -namespace -{ +namespace { const std::string CLASSIFIER_TAG("a"); const std::string LAST_TIME_TAG("b"); @@ -44,22 +41,15 @@ const std::string SUM_MAP_KEY_TAG("b"); const std::string SUM_MAP_VALUE_TAG("c"); //! \brief Manages persistence of bucket sums. -struct SSumSerializer -{ +struct SSumSerializer { using TSampleVec = std::vector; - void operator()(const TSampleVec &sample, core::CStatePersistInserter &inserter) const - { - inserter.insertValue(SUM_SAMPLE_TAG, - core::CPersistUtils::toString(sample, CSample::SToString())); + void operator()(const TSampleVec& sample, core::CStatePersistInserter& inserter) const { + inserter.insertValue(SUM_SAMPLE_TAG, core::CPersistUtils::toString(sample, CSample::SToString())); } - bool operator()(TSampleVec &sample, core::CStateRestoreTraverser &traverser) const - { - if ( traverser.name() != SUM_SAMPLE_TAG - || core::CPersistUtils::fromString(traverser.value(), - CSample::SFromString(), - sample) == false) - { + bool operator()(TSampleVec& sample, core::CStateRestoreTraverser& traverser) const { + if (traverser.name() != SUM_SAMPLE_TAG || + core::CPersistUtils::fromString(traverser.value(), CSample::SFromString(), sample) == false) { LOG_ERROR("Invalid sample in: " << traverser.value()) return false; } @@ -68,115 +58,86 @@ struct SSumSerializer }; //! \brief Manages persistence of influence bucket sums. -struct SInfluencerSumSerializer -{ +struct SInfluencerSumSerializer { using TStoredStringPtrDoubleUMap = boost::unordered_map; using TStoredStringPtrDoubleUMapCItr = TStoredStringPtrDoubleUMap::const_iterator; using TStrCRef = boost::reference_wrapper; using TStrCRefDoublePr = std::pair; using TStrCRefDoublePrVec = std::vector; - void operator()(const TStoredStringPtrDoubleUMap &map, core::CStatePersistInserter &inserter) const - { + void operator()(const TStoredStringPtrDoubleUMap& map, core::CStatePersistInserter& inserter) const { TStrCRefDoublePrVec ordered; ordered.reserve(map.size()); - for (TStoredStringPtrDoubleUMapCItr i = map.begin(); i != map.end(); ++i) - { + for (TStoredStringPtrDoubleUMapCItr i = map.begin(); i != map.end(); ++i) { ordered.emplace_back(TStrCRef(*i->first), i->second); } std::sort(ordered.begin(), ordered.end(), maths::COrderings::SFirstLess()); - for (std::size_t i = 0u; i < ordered.size(); ++i) - { + for (std::size_t i = 0u; i < ordered.size(); ++i) { inserter.insertValue(SUM_MAP_KEY_TAG, ordered[i].first); - inserter.insertValue(SUM_MAP_VALUE_TAG, - ordered[i].second, - core::CIEEE754::E_SinglePrecision); + inserter.insertValue(SUM_MAP_VALUE_TAG, ordered[i].second, core::CIEEE754::E_SinglePrecision); } } - bool operator()(TStoredStringPtrDoubleUMap &map, core::CStateRestoreTraverser &traverser) const - { + bool operator()(TStoredStringPtrDoubleUMap& map, core::CStateRestoreTraverser& traverser) const { std::string key; - do - { - const std::string &name = traverser.name(); - if (name == SUM_MAP_KEY_TAG) - { + do { + const std::string& name = traverser.name(); + if (name == SUM_MAP_KEY_TAG) { key = traverser.value(); - } - else if (name == SUM_MAP_VALUE_TAG) - { - if (core::CStringUtils::stringToType(traverser.value(), - map[CStringStore::influencers().get(key)]) == false) - { + } else if (name == SUM_MAP_VALUE_TAG) { + if (core::CStringUtils::stringToType(traverser.value(), map[CStringStore::influencers().get(key)]) == false) { LOG_ERROR("Invalid sum in " << traverser.value()); return false; } } - } - while (traverser.next()); + } while (traverser.next()); return true; } }; } // unnamed:: -CGathererTools::CArrivalTimeGatherer::CArrivalTimeGatherer() : m_LastTime(FIRST_TIME) -{ +CGathererTools::CArrivalTimeGatherer::CArrivalTimeGatherer() : m_LastTime(FIRST_TIME) { } -CGathererTools::TOptionalDouble CGathererTools::CArrivalTimeGatherer::featureData() const -{ - return maths::CBasicStatistics::count(m_Value) > 0.0 ? - TOptionalDouble(maths::CBasicStatistics::mean(m_Value)) : TOptionalDouble(); +CGathererTools::TOptionalDouble CGathererTools::CArrivalTimeGatherer::featureData() const { + return maths::CBasicStatistics::count(m_Value) > 0.0 ? TOptionalDouble(maths::CBasicStatistics::mean(m_Value)) : TOptionalDouble(); } -void CGathererTools::CArrivalTimeGatherer::startNewBucket() -{ +void CGathererTools::CArrivalTimeGatherer::startNewBucket() { m_Value = TAccumulator(); } -void CGathererTools::CArrivalTimeGatherer::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CGathererTools::CArrivalTimeGatherer::acceptPersistInserter(core::CStatePersistInserter& inserter) const { // Because we always serialize immediately after processing a bucket // we will have already used the bucket value and samples so these // don't need to be serialized. inserter.insertValue(LAST_TIME_TAG, m_LastTime); } -bool CGathererTools::CArrivalTimeGatherer::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name = traverser.name(); - if (name == LAST_TIME_TAG) - { - if (core::CStringUtils::stringToType(traverser.value(), m_LastTime) == false) - { +bool CGathererTools::CArrivalTimeGatherer::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); + if (name == LAST_TIME_TAG) { + if (core::CStringUtils::stringToType(traverser.value(), m_LastTime) == false) { LOG_ERROR("Invalid last time in " << traverser.value()); continue; } } - } - while (traverser.next()); + } while (traverser.next()); return true; } -uint64_t CGathererTools::CArrivalTimeGatherer::checksum() const -{ +uint64_t CGathererTools::CArrivalTimeGatherer::checksum() const { return maths::CChecksum::calculate(static_cast(m_LastTime), m_Value); } -std::string CGathererTools::CArrivalTimeGatherer::print() const -{ +std::string CGathererTools::CArrivalTimeGatherer::print() const { std::ostringstream o; - if (maths::CBasicStatistics::count(m_Value) > 0.0) - { + if (maths::CBasicStatistics::count(m_Value) > 0.0) { o << maths::CBasicStatistics::mean(m_Value); - } - else - { + } else { o << "-"; } o << " (" << m_LastTime << ")"; @@ -185,111 +146,86 @@ std::string CGathererTools::CArrivalTimeGatherer::print() const const core_t::TTime CGathererTools::CArrivalTimeGatherer::FIRST_TIME(std::numeric_limits::min()); -CGathererTools::CSumGatherer::CSumGatherer(const SModelParams ¶ms, +CGathererTools::CSumGatherer::CSumGatherer(const SModelParams& params, std::size_t /*dimension*/, core_t::TTime startTime, core_t::TTime bucketLength, TStrVecCItr beginInfluencers, - TStrVecCItr endInfluencers) : - m_Classifier(), - m_BucketSums(params.s_LatencyBuckets, bucketLength, startTime), - m_InfluencerBucketSums(std::distance(beginInfluencers, endInfluencers), - TStoredStringPtrDoubleUMapQueue(params.s_LatencyBuckets + 3, - bucketLength, - startTime, - TStoredStringPtrDoubleUMap(1))) -{ + TStrVecCItr endInfluencers) + : m_Classifier(), + m_BucketSums(params.s_LatencyBuckets, bucketLength, startTime), + m_InfluencerBucketSums( + std::distance(beginInfluencers, endInfluencers), + TStoredStringPtrDoubleUMapQueue(params.s_LatencyBuckets + 3, bucketLength, startTime, TStoredStringPtrDoubleUMap(1))) { } -std::size_t CGathererTools::CSumGatherer::dimension() const -{ +std::size_t CGathererTools::CSumGatherer::dimension() const { return 1; } -SMetricFeatureData CGathererTools::CSumGatherer::featureData(core_t::TTime time, core_t::TTime /*bucketLength*/, - const TSampleVec &emptySample) const -{ +SMetricFeatureData +CGathererTools::CSumGatherer::featureData(core_t::TTime time, core_t::TTime /*bucketLength*/, const TSampleVec& emptySample) const { using TStrCRef = boost::reference_wrapper; using TDouble1VecDoublePr = std::pair; using TStrCRefDouble1VecDoublePrPr = std::pair; using TStrCRefDouble1VecDoublePrPrVec = std::vector; using TStrCRefDouble1VecDoublePrPrVecVec = std::vector; - const TSampleVec *sum = &m_BucketSums.get(time); - if (sum->empty()) - { + const TSampleVec* sum = &m_BucketSums.get(time); + if (sum->empty()) { sum = &emptySample; } TStrCRefDouble1VecDoublePrPrVecVec influenceValues(m_InfluencerBucketSums.size()); - for (std::size_t i = 0u; i < m_InfluencerBucketSums.size(); ++i) - { - const TStoredStringPtrDoubleUMap &influencerStats = m_InfluencerBucketSums[i].get(time); + for (std::size_t i = 0u; i < m_InfluencerBucketSums.size(); ++i) { + const TStoredStringPtrDoubleUMap& influencerStats = m_InfluencerBucketSums[i].get(time); influenceValues[i].reserve(influencerStats.size()); - for (const auto &stat : influencerStats) - { - influenceValues[i].emplace_back(TStrCRef(*stat.first), - TDouble1VecDoublePr(TDouble1Vec{stat.second}, 1.0)); - + for (const auto& stat : influencerStats) { + influenceValues[i].emplace_back(TStrCRef(*stat.first), TDouble1VecDoublePr(TDouble1Vec{stat.second}, 1.0)); } } - if (!sum->empty()) - { + if (!sum->empty()) { return {(*sum)[0].time(), (*sum)[0].value(), (*sum)[0].varianceScale(), (*sum)[0].count(), influenceValues, - m_Classifier.isInteger() - && maths::CIntegerTools::isInteger(((*sum)[0].value())[0]), - m_Classifier.isNonNegative(), *sum}; + m_Classifier.isInteger() && maths::CIntegerTools::isInteger(((*sum)[0].value())[0]), + m_Classifier.isNonNegative(), + *sum}; } return {m_Classifier.isInteger(), m_Classifier.isNonNegative(), *sum}; } -bool CGathererTools::CSumGatherer::sample(core_t::TTime /*time*/, unsigned int /*sampleCount*/) -{ +bool CGathererTools::CSumGatherer::sample(core_t::TTime /*time*/, unsigned int /*sampleCount*/) { return false; } -void CGathererTools::CSumGatherer::startNewBucket(core_t::TTime time) -{ - TSampleVec &sum = m_BucketSums.earliest(); - if (!sum.empty()) - { +void CGathererTools::CSumGatherer::startNewBucket(core_t::TTime time) { + TSampleVec& sum = m_BucketSums.earliest(); + if (!sum.empty()) { m_Classifier.add(model_t::E_IndividualSumByBucketAndPerson, sum[0].value(), 1); } m_BucketSums.push(TSampleVec(), time); - for (std::size_t i = 0u; i < m_InfluencerBucketSums.size(); ++i) - { + for (std::size_t i = 0u; i < m_InfluencerBucketSums.size(); ++i) { m_InfluencerBucketSums[i].push(TStoredStringPtrDoubleUMap(1), time); } } -void CGathererTools::CSumGatherer::resetBucket(core_t::TTime bucketStart) -{ +void CGathererTools::CSumGatherer::resetBucket(core_t::TTime bucketStart) { m_BucketSums.get(bucketStart).clear(); - for (std::size_t i = 0u; i < m_InfluencerBucketSums.size(); ++i) - { + for (std::size_t i = 0u; i < m_InfluencerBucketSums.size(); ++i) { m_InfluencerBucketSums[i].get(bucketStart).clear(); } } -void CGathererTools::CSumGatherer::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ - inserter.insertLevel(CLASSIFIER_TAG, - boost::bind(&CDataClassifier::acceptPersistInserter, - &m_Classifier, - _1)); - if (m_BucketSums.size() > 0) - { +void CGathererTools::CSumGatherer::acceptPersistInserter(core::CStatePersistInserter& inserter) const { + inserter.insertLevel(CLASSIFIER_TAG, boost::bind(&CDataClassifier::acceptPersistInserter, &m_Classifier, _1)); + if (m_BucketSums.size() > 0) { inserter.insertLevel(BUCKET_SUM_QUEUE_TAG, - boost::bind(TSampleVecQueue::CSerializer(), - boost::cref(m_BucketSums), - _1)); + boost::bind(TSampleVecQueue::CSerializer(), boost::cref(m_BucketSums), _1)); } - for (std::size_t i = 0u; i < m_InfluencerBucketSums.size(); ++i) - { + for (std::size_t i = 0u; i < m_InfluencerBucketSums.size(); ++i) { inserter.insertLevel(INFLUENCER_BUCKET_SUM_QUEUE_TAG, boost::bind(TStoredStringPtrDoubleUMapQueue::CSerializer(), boost::cref(m_InfluencerBucketSums[i]), @@ -297,92 +233,66 @@ void CGathererTools::CSumGatherer::acceptPersistInserter(core::CStatePersistInse } } -bool CGathererTools::CSumGatherer::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ +bool CGathererTools::CSumGatherer::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { std::size_t i = 0u; - do - { - const std::string &name = traverser.name(); - if (name == CLASSIFIER_TAG) - { - if (traverser.traverseSubLevel(boost::bind(&CDataClassifier::acceptRestoreTraverser, - &m_Classifier, - _1)) == false) - { + do { + const std::string& name = traverser.name(); + if (name == CLASSIFIER_TAG) { + if (traverser.traverseSubLevel(boost::bind(&CDataClassifier::acceptRestoreTraverser, &m_Classifier, _1)) == false) { LOG_ERROR("Invalid classifier in " << traverser.value()); continue; } - } - else if (name == BUCKET_SUM_QUEUE_TAG) - { + } else if (name == BUCKET_SUM_QUEUE_TAG) { if (traverser.traverseSubLevel( - boost::bind(TSampleVecQueue::CSerializer(), - boost::ref(m_BucketSums), - _1)) == false) - { + boost::bind(TSampleVecQueue::CSerializer(), boost::ref(m_BucketSums), _1)) == false) { LOG_ERROR("Invalid bucket queue in " << traverser.value()); return false; } - } - else if (name == INFLUENCER_BUCKET_SUM_QUEUE_TAG) - { - if ( i < m_InfluencerBucketSums.size() - && traverser.traverseSubLevel( - boost::bind(TStoredStringPtrDoubleUMapQueue::CSerializer(TStoredStringPtrDoubleUMap(1)), - boost::ref(m_InfluencerBucketSums[i++]), - _1)) == false) - { + } else if (name == INFLUENCER_BUCKET_SUM_QUEUE_TAG) { + if (i < m_InfluencerBucketSums.size() && + traverser.traverseSubLevel( + boost::bind(TStoredStringPtrDoubleUMapQueue::CSerializer(TStoredStringPtrDoubleUMap(1)), + boost::ref(m_InfluencerBucketSums[i++]), + _1)) == false) { LOG_ERROR("Invalid bucket queue in " << traverser.value()); return false; } } - } - while (traverser.next()); + } while (traverser.next()); return true; } -uint64_t CGathererTools::CSumGatherer::checksum() const -{ +uint64_t CGathererTools::CSumGatherer::checksum() const { uint64_t seed = static_cast(m_Classifier.isInteger()); seed = maths::CChecksum::calculate(seed, m_Classifier.isNonNegative()); seed = maths::CChecksum::calculate(seed, m_BucketSums); return maths::CChecksum::calculate(seed, m_InfluencerBucketSums); } -void CGathererTools::CSumGatherer::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CGathererTools::CSumGatherer::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CSumGatherer"); core::CMemoryDebug::dynamicSize("m_BucketSums", m_BucketSums, mem); core::CMemoryDebug::dynamicSize("m_InfluencerBucketSums", m_InfluencerBucketSums, mem); } -std::size_t CGathererTools::CSumGatherer::memoryUsage() const -{ - return core::CMemory::dynamicSize(m_BucketSums) - + core::CMemory::dynamicSize(m_InfluencerBucketSums); +std::size_t CGathererTools::CSumGatherer::memoryUsage() const { + return core::CMemory::dynamicSize(m_BucketSums) + core::CMemory::dynamicSize(m_InfluencerBucketSums); } -std::string CGathererTools::CSumGatherer::print() const -{ +std::string CGathererTools::CSumGatherer::print() const { std::ostringstream result; - result << m_Classifier.isInteger() - << ' ' << m_BucketSums.print() - << ' ' << core::CContainerPrinter::print(m_InfluencerBucketSums); + result << m_Classifier.isInteger() << ' ' << m_BucketSums.print() << ' ' << core::CContainerPrinter::print(m_InfluencerBucketSums); return result.str(); } -bool CGathererTools::CSumGatherer::isRedundant(core_t::TTime /*samplingCutoffTime*/) const -{ - for (const auto &bucket : m_BucketSums) - { - if (bucket.empty() == false) - { +bool CGathererTools::CSumGatherer::isRedundant(core_t::TTime /*samplingCutoffTime*/) const { + for (const auto& bucket : m_BucketSums) { + if (bucket.empty() == false) { return false; } } return true; } - } } diff --git a/lib/model/CHierarchicalResults.cc b/lib/model/CHierarchicalResults.cc index cf9329b5d5..3b51ae46ca 100644 --- a/lib/model/CHierarchicalResults.cc +++ b/lib/model/CHierarchicalResults.cc @@ -23,17 +23,12 @@ #include #include +namespace ml { +namespace model { -namespace ml -{ -namespace model -{ +namespace hierarchical_results_detail { -namespace hierarchical_results_detail -{ - -namespace -{ +namespace { using TNodeCPtr = SNode::TNodeCPtr; @@ -84,43 +79,34 @@ const std::string COUNT("count"); core::CStoredStringPtr UNSET_STRING(core::CStoredStringPtr::makeStoredString(std::string())); //! Check if a string reference is unset. -bool unset(core::CStoredStringPtr value) -{ +bool unset(core::CStoredStringPtr value) { return value.get() == UNSET_STRING.get(); } //! True if the node is a leaf. -bool isLeaf(const SNode &node) -{ +bool isLeaf(const SNode& node) { return node.s_Children.empty(); } //! True if the node is aggregate. -bool isAggregate(const SNode &node) -{ +bool isAggregate(const SNode& node) { return node.s_Children.size() > 0; } //! Check if the underlying strings are equal. -bool equal(const core::CStoredStringPtr &lhs, const core::CStoredStringPtr &rhs) -{ +bool equal(const core::CStoredStringPtr& lhs, const core::CStoredStringPtr& rhs) { return unset(lhs) == unset(rhs) && *lhs == *rhs; } //! Check if both underlying strings are equal. -bool equal(const TStoredStringPtrStoredStringPtrPr &lhs, const TStoredStringPtrStoredStringPtrPr &rhs) -{ - return unset(lhs.first) == unset(rhs.first) - && *lhs.first == *rhs.first - && unset(lhs.second) == unset(rhs.second) - && *lhs.second == *rhs.second; +bool equal(const TStoredStringPtrStoredStringPtrPr& lhs, const TStoredStringPtrStoredStringPtrPr& rhs) { + return unset(lhs.first) == unset(rhs.first) && *lhs.first == *rhs.first && unset(lhs.second) == unset(rhs.second) && + *lhs.second == *rhs.second; } //! Orders nodes by the value of their person field. -struct SPersonValueLess -{ - bool operator()(const TNodeCPtr &lhs, const TNodeCPtr &rhs) const - { +struct SPersonValueLess { + bool operator()(const TNodeCPtr& lhs, const TNodeCPtr& rhs) const { return maths::COrderings::lexicographical_compare(*lhs->s_Spec.s_PartitionFieldName, *lhs->s_Spec.s_PartitionFieldValue, *lhs->s_Spec.s_PersonFieldName, @@ -135,10 +121,8 @@ struct SPersonValueLess }; //! Orders nodes by the name of their person field. -struct SPersonNameLess -{ - bool operator()(const TNodeCPtr &lhs, const TNodeCPtr &rhs) const - { +struct SPersonNameLess { + bool operator()(const TNodeCPtr& lhs, const TNodeCPtr& rhs) const { return maths::COrderings::lexicographical_compare(*lhs->s_Spec.s_PartitionFieldName, *lhs->s_Spec.s_PartitionFieldValue, *lhs->s_Spec.s_PersonFieldName, @@ -149,10 +133,8 @@ struct SPersonNameLess }; //! Orders nodes by the value of their partition field. -struct SPartitionValueLess -{ - bool operator()(const TNodeCPtr &lhs, const TNodeCPtr &rhs) const - { +struct SPartitionValueLess { + bool operator()(const TNodeCPtr& lhs, const TNodeCPtr& rhs) const { return maths::COrderings::lexicographical_compare(*lhs->s_Spec.s_PartitionFieldName, *lhs->s_Spec.s_PartitionFieldValue, *rhs->s_Spec.s_PartitionFieldName, @@ -161,57 +143,44 @@ struct SPartitionValueLess }; //! Orders nodes by the name of their partition field. -struct SPartitionNameLess -{ - bool operator()(const TNodeCPtr &lhs, const TNodeCPtr &rhs) const - { +struct SPartitionNameLess { + bool operator()(const TNodeCPtr& lhs, const TNodeCPtr& rhs) const { return *lhs->s_Spec.s_PartitionFieldName < *rhs->s_Spec.s_PartitionFieldName; } }; //! Return the node pointer. -SNode *address(SNode *ptr) -{ +SNode* address(SNode* ptr) { return ptr; } //! Get the address of a node value. -SNode *address(SNode &value) -{ +SNode* address(SNode& value) { return &value; } //! Aggregate the nodes in a layer. template -void aggregateLayer(ITR beginLayer, - ITR endLayer, - CHierarchicalResults &results, - FACTORY newNode, - std::vector &newLayer) -{ +void aggregateLayer(ITR beginLayer, ITR endLayer, CHierarchicalResults& results, FACTORY newNode, std::vector& newLayer) { using TNodePtrVec = std::vector; using TNodeCPtrNodePtrVecMap = std::map; newLayer.clear(); TNodeCPtrNodePtrVecMap aggregation; - for (ITR i = beginLayer; i != endLayer; ++i) - { + for (ITR i = beginLayer; i != endLayer; ++i) { aggregation[address(*i)].push_back(address(*i)); } newLayer.reserve(aggregation.size()); - for (const auto &children : aggregation) - { + for (const auto& children : aggregation) { LOG_TRACE("aggregating = " << core::CContainerPrinter::print(children.second)); - if (children.second.size() > 1) - { - SNode &aggregate = (results.*newNode)(); + if (children.second.size() > 1) { + SNode& aggregate = (results.*newNode)(); bool population = false; aggregate.s_Children.reserve(children.second.size()); - for (const auto &child : children.second) - { + for (const auto& child : children.second) { aggregate.s_Children.push_back(child); child->s_Parent = &aggregate; population |= child->s_Spec.s_IsPopulation; @@ -219,9 +188,7 @@ void aggregateLayer(ITR beginLayer, aggregate.s_Spec.s_IsPopulation = population; aggregate.propagateFields(); newLayer.push_back(&aggregate); - } - else - { + } else { newLayer.push_back(children.second[0]); } } @@ -235,189 +202,141 @@ void aggregateLayer(ITR beginLayer, //! of a collection of hierarchical results. It propagates each //! influencing field value to the highest node in the tree such //! that it is either the person or partition field of that node. -class CCommonInfluencePropagator : public CHierarchicalResultsVisitor -{ - public: - virtual void visit(const CHierarchicalResults &/*results*/, - const TNode &node, - bool /*pivot*/) - { - if (this->isLeaf(node)) - { - std::sort(node.s_AnnotatedProbability.s_Influences.begin(), - node.s_AnnotatedProbability.s_Influences.end(), - maths::COrderings::SFirstLess()); - } - else - { - for (const auto &child : node.s_Children) - { - for (const auto &influence : child->s_AnnotatedProbability.s_Influences) - { - if ( equal({node.s_Spec.s_PartitionFieldName, - node.s_Spec.s_PartitionFieldValue}, influence.first) - || equal({node.s_Spec.s_PersonFieldName, - node.s_Spec.s_PersonFieldValue}, influence.first)) - { - auto i = std::lower_bound(node.s_AnnotatedProbability.s_Influences.begin(), - node.s_AnnotatedProbability.s_Influences.end(), - influence.first, - maths::COrderings::SFirstLess()); - if (i == node.s_AnnotatedProbability.s_Influences.end()) - { - node.s_AnnotatedProbability.s_Influences.push_back(influence); - } - else if (!equal(i->first, influence.first)) - { - node.s_AnnotatedProbability.s_Influences.insert(i, influence); - } +class CCommonInfluencePropagator : public CHierarchicalResultsVisitor { +public: + virtual void visit(const CHierarchicalResults& /*results*/, const TNode& node, bool /*pivot*/) { + if (this->isLeaf(node)) { + std::sort(node.s_AnnotatedProbability.s_Influences.begin(), + node.s_AnnotatedProbability.s_Influences.end(), + maths::COrderings::SFirstLess()); + } else { + for (const auto& child : node.s_Children) { + for (const auto& influence : child->s_AnnotatedProbability.s_Influences) { + if (equal({node.s_Spec.s_PartitionFieldName, node.s_Spec.s_PartitionFieldValue}, influence.first) || + equal({node.s_Spec.s_PersonFieldName, node.s_Spec.s_PersonFieldValue}, influence.first)) { + auto i = std::lower_bound(node.s_AnnotatedProbability.s_Influences.begin(), + node.s_AnnotatedProbability.s_Influences.end(), + influence.first, + maths::COrderings::SFirstLess()); + if (i == node.s_AnnotatedProbability.s_Influences.end()) { + node.s_AnnotatedProbability.s_Influences.push_back(influence); + } else if (!equal(i->first, influence.first)) { + node.s_AnnotatedProbability.s_Influences.insert(i, influence); } } } } } + } }; } // unnamed:: - -SResultSpec::SResultSpec() : - s_Detector(0), - s_IsSimpleCount(false), - s_IsPopulation(false), - s_UseNull(false), - s_PartitionFieldName(UNSET_STRING), - s_PartitionFieldValue(UNSET_STRING), - s_PersonFieldName(UNSET_STRING), - s_PersonFieldValue(UNSET_STRING), - s_ValueFieldName(UNSET_STRING), - s_FunctionName(UNSET_STRING), - s_ByFieldName(UNSET_STRING), - s_Function(function_t::E_IndividualCount) -{ +SResultSpec::SResultSpec() + : s_Detector(0), + s_IsSimpleCount(false), + s_IsPopulation(false), + s_UseNull(false), + s_PartitionFieldName(UNSET_STRING), + s_PartitionFieldValue(UNSET_STRING), + s_PersonFieldName(UNSET_STRING), + s_PersonFieldValue(UNSET_STRING), + s_ValueFieldName(UNSET_STRING), + s_FunctionName(UNSET_STRING), + s_ByFieldName(UNSET_STRING), + s_Function(function_t::E_IndividualCount) { } -std::string SResultSpec::print() const -{ - return '\'' + core::CStringUtils::typeToStringPretty(s_IsSimpleCount) - + '/' + core::CStringUtils::typeToStringPretty(s_IsPopulation) - + '/' + *s_FunctionName - + '/' + *s_PartitionFieldName - + '/' + *s_PartitionFieldValue - + '/' + *s_PersonFieldName - + '/' + *s_PersonFieldValue - + '/' + *s_ValueFieldName + '\''; +std::string SResultSpec::print() const { + return '\'' + core::CStringUtils::typeToStringPretty(s_IsSimpleCount) + '/' + core::CStringUtils::typeToStringPretty(s_IsPopulation) + + '/' + *s_FunctionName + '/' + *s_PartitionFieldName + '/' + *s_PartitionFieldValue + '/' + *s_PersonFieldName + '/' + + *s_PersonFieldValue + '/' + *s_ValueFieldName + '\''; } -void SResultSpec::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void SResultSpec::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(DETECTOR_ID_TAG, s_Detector); inserter.insertValue(SIMPLE_COUNT_TAG, s_IsSimpleCount); inserter.insertValue(POPULATION_TAG, s_IsPopulation); inserter.insertValue(USE_NULL_TAG, s_UseNull); core::CPersistUtils::persist(FUNCTION_TAG, s_Function, inserter); - if (!unset(s_PartitionFieldName)) - { + if (!unset(s_PartitionFieldName)) { inserter.insertValue(PARTITION_FIELD_NAME_TAG, *s_PartitionFieldName); } - if (!unset(s_PartitionFieldValue)) - { + if (!unset(s_PartitionFieldValue)) { inserter.insertValue(PARTITION_FIELD_VALUE_TAG, *s_PartitionFieldValue); } - if (!unset(s_PersonFieldName)) - { + if (!unset(s_PersonFieldName)) { inserter.insertValue(PERSON_FIELD_NAME_TAG, *s_PersonFieldName); } - if (!unset(s_PersonFieldValue)) - { + if (!unset(s_PersonFieldValue)) { inserter.insertValue(PERSON_FIELD_VALUE_TAG, *s_PersonFieldValue); } - if (!unset(s_ValueFieldName)) - { + if (!unset(s_ValueFieldName)) { inserter.insertValue(VALUE_FIELD_NAME_TAG, *s_ValueFieldName); } - if (!unset(s_FunctionName)) - { + if (!unset(s_FunctionName)) { inserter.insertValue(FUNCTION_NAME_TAG, *s_FunctionName); } - if (!unset(s_ByFieldName)) - { + if (!unset(s_ByFieldName)) { inserter.insertValue(BY_FIELD_NAME_TAG, *s_ByFieldName); } } -bool SResultSpec::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name = traverser.name(); +bool SResultSpec::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); RESTORE_BUILT_IN(DETECTOR_ID_TAG, s_Detector) RESTORE_BUILT_IN(SIMPLE_COUNT_TAG, s_IsSimpleCount) RESTORE_BUILT_IN(POPULATION_TAG, s_IsPopulation) RESTORE_BUILT_IN(USE_NULL_TAG, s_UseNull) - RESTORE_SETUP_TEARDOWN(FUNCTION_TAG, - int f = 0, - core::CPersistUtils::restore(FUNCTION_TAG, f, traverser), - s_Function = function_t::EFunction(f)) - RESTORE_NO_ERROR(PARTITION_FIELD_NAME_TAG, - s_PartitionFieldName = CStringStore::names().get(traverser.value())) - RESTORE_NO_ERROR(PARTITION_FIELD_VALUE_TAG, - s_PartitionFieldValue = CStringStore::names().get(traverser.value())) - RESTORE_NO_ERROR(PERSON_FIELD_NAME_TAG, - s_PersonFieldName = CStringStore::names().get(traverser.value())) - RESTORE_NO_ERROR(PERSON_FIELD_VALUE_TAG, - s_PersonFieldValue = CStringStore::names().get(traverser.value())) - RESTORE_NO_ERROR(VALUE_FIELD_NAME_TAG, - s_ValueFieldName = CStringStore::names().get(traverser.value())) - RESTORE_NO_ERROR(FUNCTION_NAME_TAG, - s_FunctionName = CStringStore::names().get(traverser.value())) - RESTORE_NO_ERROR(BY_FIELD_NAME_TAG, - s_ByFieldName = CStringStore::names().get(traverser.value())) - } - while (traverser.next()); + RESTORE_SETUP_TEARDOWN( + FUNCTION_TAG, int f = 0, core::CPersistUtils::restore(FUNCTION_TAG, f, traverser), s_Function = function_t::EFunction(f)) + RESTORE_NO_ERROR(PARTITION_FIELD_NAME_TAG, s_PartitionFieldName = CStringStore::names().get(traverser.value())) + RESTORE_NO_ERROR(PARTITION_FIELD_VALUE_TAG, s_PartitionFieldValue = CStringStore::names().get(traverser.value())) + RESTORE_NO_ERROR(PERSON_FIELD_NAME_TAG, s_PersonFieldName = CStringStore::names().get(traverser.value())) + RESTORE_NO_ERROR(PERSON_FIELD_VALUE_TAG, s_PersonFieldValue = CStringStore::names().get(traverser.value())) + RESTORE_NO_ERROR(VALUE_FIELD_NAME_TAG, s_ValueFieldName = CStringStore::names().get(traverser.value())) + RESTORE_NO_ERROR(FUNCTION_NAME_TAG, s_FunctionName = CStringStore::names().get(traverser.value())) + RESTORE_NO_ERROR(BY_FIELD_NAME_TAG, s_ByFieldName = CStringStore::names().get(traverser.value())) + } while (traverser.next()); return true; } - -SNode::SNode() : - s_Parent(0), - s_AnnotatedProbability(1.0), - s_Detector(-3), - s_AggregationStyle(-1), - s_SmallestChildProbability(1.0), - s_SmallestDescendantProbability(1.0), - s_RawAnomalyScore(0.0), - s_NormalizedAnomalyScore(0.0), - s_Model(0), - s_BucketStartTime(0), - s_BucketLength(0) -{} - -SNode::SNode(const SResultSpec &simpleSearch, SAnnotatedProbability &annotatedProbability) : - s_Parent(0), - s_Spec(simpleSearch), - s_Detector(simpleSearch.s_Detector), - s_AggregationStyle(-1), - s_SmallestChildProbability(annotatedProbability.s_Probability), - s_SmallestDescendantProbability(1.0), - s_RawAnomalyScore(0.0), - s_NormalizedAnomalyScore(0.0), - s_Model(0), - s_BucketStartTime(0), - s_BucketLength(0) -{ +SNode::SNode() + : s_Parent(0), + s_AnnotatedProbability(1.0), + s_Detector(-3), + s_AggregationStyle(-1), + s_SmallestChildProbability(1.0), + s_SmallestDescendantProbability(1.0), + s_RawAnomalyScore(0.0), + s_NormalizedAnomalyScore(0.0), + s_Model(0), + s_BucketStartTime(0), + s_BucketLength(0) { +} + +SNode::SNode(const SResultSpec& simpleSearch, SAnnotatedProbability& annotatedProbability) + : s_Parent(0), + s_Spec(simpleSearch), + s_Detector(simpleSearch.s_Detector), + s_AggregationStyle(-1), + s_SmallestChildProbability(annotatedProbability.s_Probability), + s_SmallestDescendantProbability(1.0), + s_RawAnomalyScore(0.0), + s_NormalizedAnomalyScore(0.0), + s_Model(0), + s_BucketStartTime(0), + s_BucketLength(0) { s_AnnotatedProbability.swap(annotatedProbability); } -double SNode::probability() const -{ +double SNode::probability() const { return s_AnnotatedProbability.s_Probability; } -void SNode::propagateFields() -{ - if (s_Children.empty()) - { +void SNode::propagateFields() { + if (s_Children.empty()) { return; } @@ -426,51 +345,34 @@ void SNode::propagateFields() s_Spec.s_PersonFieldName = s_Children[0]->s_Spec.s_PersonFieldName; s_Spec.s_PersonFieldValue = s_Children[0]->s_Spec.s_PersonFieldValue; s_BucketStartTime = s_Children[0]->s_BucketStartTime; - for (std::size_t i = 1u; i < s_Children.size(); ++i) - { - if ( !unset(s_Spec.s_PartitionFieldName) - && !equal(s_Spec.s_PartitionFieldName, - s_Children[i]->s_Spec.s_PartitionFieldName)) - { + for (std::size_t i = 1u; i < s_Children.size(); ++i) { + if (!unset(s_Spec.s_PartitionFieldName) && !equal(s_Spec.s_PartitionFieldName, s_Children[i]->s_Spec.s_PartitionFieldName)) { s_Spec.s_PartitionFieldName = UNSET_STRING; s_Spec.s_PartitionFieldValue = UNSET_STRING; s_Spec.s_PersonFieldName = UNSET_STRING; s_Spec.s_PersonFieldValue = UNSET_STRING; } - if ( !unset(s_Spec.s_PartitionFieldValue) - && !equal(s_Spec.s_PartitionFieldValue, - s_Children[i]->s_Spec.s_PartitionFieldValue)) - { + if (!unset(s_Spec.s_PartitionFieldValue) && !equal(s_Spec.s_PartitionFieldValue, s_Children[i]->s_Spec.s_PartitionFieldValue)) { s_Spec.s_PartitionFieldValue = UNSET_STRING; s_Spec.s_PersonFieldName = UNSET_STRING; s_Spec.s_PersonFieldValue = UNSET_STRING; } - if ( !unset(s_Spec.s_PersonFieldName) - && !equal(s_Spec.s_PersonFieldName, - s_Children[i]->s_Spec.s_PersonFieldName)) - { + if (!unset(s_Spec.s_PersonFieldName) && !equal(s_Spec.s_PersonFieldName, s_Children[i]->s_Spec.s_PersonFieldName)) { s_Spec.s_PersonFieldName = UNSET_STRING; } - if ( !unset(s_Spec.s_PersonFieldValue) - && !equal(s_Spec.s_PersonFieldValue, - s_Children[i]->s_Spec.s_PersonFieldValue)) - { + if (!unset(s_Spec.s_PersonFieldValue) && !equal(s_Spec.s_PersonFieldValue, s_Children[i]->s_Spec.s_PersonFieldValue)) { s_Spec.s_PersonFieldValue = UNSET_STRING; } } } -std::string SNode::print() const -{ - return s_Spec.print() - + ": " + core::CStringUtils::typeToStringPretty(this->probability()) - + ", " + core::CStringUtils::typeToStringPretty(s_RawAnomalyScore) - + (s_AnnotatedProbability.s_Influences.empty() ? "" : - ", " + core::CContainerPrinter::print(s_AnnotatedProbability.s_Influences)); +std::string SNode::print() const { + return s_Spec.print() + ": " + core::CStringUtils::typeToStringPretty(this->probability()) + ", " + + core::CStringUtils::typeToStringPretty(s_RawAnomalyScore) + + (s_AnnotatedProbability.s_Influences.empty() ? "" : ", " + core::CContainerPrinter::print(s_AnnotatedProbability.s_Influences)); } -void SNode::swap(SNode &other) -{ +void SNode::swap(SNode& other) { std::swap(s_Parent, other.s_Parent); s_Children.swap(other.s_Children); std::swap(s_Spec, other.s_Spec); @@ -486,9 +388,7 @@ void SNode::swap(SNode &other) std::swap(s_BucketLength, other.s_BucketLength); } -void SNode::acceptPersistInserter1(core::CStatePersistInserter &inserter, - TNodePtrSizeUMap &nodePointers) const -{ +void SNode::acceptPersistInserter1(core::CStatePersistInserter& inserter, TNodePtrSizeUMap& nodePointers) const { std::size_t index = nodePointers.emplace(this, nodePointers.size()).first->second; inserter.insertValue(SELF_TAG, index); core::CPersistUtils::persist(SPEC_TAG, s_Spec, inserter); @@ -503,25 +403,19 @@ void SNode::acceptPersistInserter1(core::CStatePersistInserter &inserter, inserter.insertValue(BUCKET_LENGTH_TAG, s_BucketLength); } -void SNode::acceptPersistInserter2(core::CStatePersistInserter &inserter, - const TNodePtrSizeUMap &nodePointers) const -{ - if (s_Parent != 0) - { +void SNode::acceptPersistInserter2(core::CStatePersistInserter& inserter, const TNodePtrSizeUMap& nodePointers) const { + if (s_Parent != 0) { auto found = nodePointers.find(s_Parent); - if (found == nodePointers.end()) - { + if (found == nodePointers.end()) { LOG_ERROR("Parent not in persistence hierarchy!"); return; } core::CPersistUtils::persist(PARENT_TAG, found->second, inserter); } - for (const auto &child : s_Children) - { + for (const auto& child : s_Children) { auto found = nodePointers.find(child); - if (found == nodePointers.end()) - { + if (found == nodePointers.end()) { LOG_ERROR("Child not in persistence hierarchy!"); return; } @@ -529,19 +423,15 @@ void SNode::acceptPersistInserter2(core::CStatePersistInserter &inserter, } } -bool SNode::acceptRestoreTraverser1(core::CStateRestoreTraverser &traverser, - TSizeNodePtrUMap &nodePointers) -{ - do - { - const std::string &name = traverser.name(); +bool SNode::acceptRestoreTraverser1(core::CStateRestoreTraverser& traverser, TSizeNodePtrUMap& nodePointers) { + do { + const std::string& name = traverser.name(); RESTORE_SETUP_TEARDOWN(SELF_TAG, std::size_t index = 0, core::CStringUtils::stringToType(traverser.value(), index), nodePointers.insert(std::make_pair(index, this))) RESTORE(SPEC_TAG, core::CPersistUtils::restore(SPEC_TAG, s_Spec, traverser)) - RESTORE(ANNOTATED_PROBABILITY_TAG, - core::CPersistUtils::restore(ANNOTATED_PROBABILITY_TAG, s_AnnotatedProbability, traverser)) + RESTORE(ANNOTATED_PROBABILITY_TAG, core::CPersistUtils::restore(ANNOTATED_PROBABILITY_TAG, s_AnnotatedProbability, traverser)) RESTORE_BUILT_IN(DETECTOR_TAG, s_Detector); RESTORE_BUILT_IN(AGGREGATION_STYLE_TAG, s_AggregationStyle); RESTORE_BUILT_IN(SMALLEST_CHILD_TAG, s_SmallestChildProbability) @@ -550,55 +440,42 @@ bool SNode::acceptRestoreTraverser1(core::CStateRestoreTraverser &traverser, RESTORE_BUILT_IN(NORMALIZED_ANOMALY_SCORE_TAG, s_NormalizedAnomalyScore) RESTORE_BUILT_IN(BUCKET_START_TAG, s_BucketStartTime) RESTORE_BUILT_IN(BUCKET_LENGTH_TAG, s_BucketLength) - } - while (traverser.next()); + } while (traverser.next()); return true; } -bool SNode::acceptRestoreTraverser2(core::CStateRestoreTraverser &traverser, - const TSizeNodePtrUMap &nodePointers) -{ - do - { - const std::string &name = traverser.name(); +bool SNode::acceptRestoreTraverser2(core::CStateRestoreTraverser& traverser, const TSizeNodePtrUMap& nodePointers) { + do { + const std::string& name = traverser.name(); std::size_t index = 0; - if (name == PARENT_TAG) - { - if (!core::CPersistUtils::restore(PARENT_TAG, index, traverser)) - { + if (name == PARENT_TAG) { + if (!core::CPersistUtils::restore(PARENT_TAG, index, traverser)) { LOG_ERROR("Restore error for " << traverser.name() << " / " << traverser.value()); return false; } auto found = nodePointers.find(index); - if (found == nodePointers.end()) - { + if (found == nodePointers.end()) { LOG_ERROR("Parent not in persistence hierarchy!"); return false; } s_Parent = found->second; - } - else if (name == CHILD_TAG) - { - if (!core::CPersistUtils::restore(CHILD_TAG, index, traverser)) - { + } else if (name == CHILD_TAG) { + if (!core::CPersistUtils::restore(CHILD_TAG, index, traverser)) { LOG_ERROR("Restore error for " << traverser.name() << " / " << traverser.value()); return false; } auto found = nodePointers.find(index); - if (found == nodePointers.end()) - { + if (found == nodePointers.end()) { LOG_ERROR("Parent not in persistence hierarchy!"); return false; } s_Children.push_back(found->second); } - } - while (traverser.next()); + } while (traverser.next()); return true; } -void swap(SNode &node1, SNode &node2) -{ +void swap(SNode& node1, SNode& node2) { node1.swap(node2); } @@ -606,15 +483,12 @@ void swap(SNode &node1, SNode &node2) using namespace hierarchical_results_detail; +CHierarchicalResults::CHierarchicalResults() : m_ResultType(model_t::CResultType::E_Final) { +} -CHierarchicalResults::CHierarchicalResults() : - m_ResultType(model_t::CResultType::E_Final) -{} - -void CHierarchicalResults::addSimpleCountResult(SAnnotatedProbability &annotatedProbability, - const CAnomalyDetectorModel *model, - core_t::TTime bucketStartTime) -{ +void CHierarchicalResults::addSimpleCountResult(SAnnotatedProbability& annotatedProbability, + const CAnomalyDetectorModel* model, + core_t::TTime bucketStartTime) { TResultSpec search; search.s_IsSimpleCount = true; search.s_IsPopulation = false; @@ -628,7 +502,7 @@ void CHierarchicalResults::addSimpleCountResult(SAnnotatedProbability &annotated // For simple counts we set all the anomaly scores to 0 // and all the probabilities to 100%. - TNode &leaf = this->newLeaf(search, annotatedProbability); + TNode& leaf = this->newLeaf(search, annotatedProbability); leaf.s_Model = model; leaf.s_BucketStartTime = bucketStartTime; leaf.s_BucketLength = (model ? model->bucketLength() : 0); @@ -636,17 +510,16 @@ void CHierarchicalResults::addSimpleCountResult(SAnnotatedProbability &annotated void CHierarchicalResults::addModelResult(int detector, bool isPopulation, - const std::string &functionName, + const std::string& functionName, function_t::EFunction function, - const std::string &partitionFieldName, - const std::string &partitionFieldValue, - const std::string &personFieldName, - const std::string &personFieldValue, - const std::string &valueFieldName, - SAnnotatedProbability &annotatedProbability, - const CAnomalyDetectorModel *model, - core_t::TTime bucketStartTime) -{ + const std::string& partitionFieldName, + const std::string& partitionFieldValue, + const std::string& personFieldName, + const std::string& personFieldValue, + const std::string& valueFieldName, + SAnnotatedProbability& annotatedProbability, + const CAnomalyDetectorModel* model, + core_t::TTime bucketStartTime) { TResultSpec spec; spec.s_Detector = detector; spec.s_IsSimpleCount = false; @@ -660,19 +533,17 @@ void CHierarchicalResults::addModelResult(int detector, spec.s_PersonFieldValue = CStringStore::names().get(personFieldValue); spec.s_ValueFieldName = CStringStore::names().get(valueFieldName); spec.s_ByFieldName = (model ? CStringStore::names().get(model->dataGatherer().searchKey().byFieldName()) : UNSET_STRING); - TNode &leaf = this->newLeaf(spec, annotatedProbability); + TNode& leaf = this->newLeaf(spec, annotatedProbability); leaf.s_Model = model; leaf.s_BucketStartTime = bucketStartTime; leaf.s_BucketLength = (model ? model->bucketLength() : 0); } -void CHierarchicalResults::addInfluencer(const std::string &name) -{ +void CHierarchicalResults::addInfluencer(const std::string& name) { this->newPivotRoot(CStringStore::influencers().get(name)); } -void CHierarchicalResults::buildHierarchy() -{ +void CHierarchicalResults::buildHierarchy() { using TNodePtrVec = std::vector; m_Nodes.erase(std::remove_if(m_Nodes.begin(), m_Nodes.end(), isAggregate), m_Nodes.end()); @@ -680,18 +551,14 @@ void CHierarchicalResults::buildHierarchy() // To make life easier for downstream code, bring a simple count node // to the front of the deque (if there is one). auto simpleCountItr = m_Nodes.end(); - for (auto i = m_Nodes.begin(); i != m_Nodes.end(); ++i) - { + for (auto i = m_Nodes.begin(); i != m_Nodes.end(); ++i) { i->s_Parent = 0; - if (i->s_Spec.s_IsSimpleCount) - { + if (i->s_Spec.s_IsSimpleCount) { simpleCountItr = i; } } - if (simpleCountItr != m_Nodes.end()) - { - while (simpleCountItr != m_Nodes.begin()) - { + if (simpleCountItr != m_Nodes.end()) { + while (simpleCountItr != m_Nodes.begin()) { auto next = simpleCountItr; std::iter_swap(--simpleCountItr, next); } @@ -702,18 +569,14 @@ void CHierarchicalResults::buildHierarchy() LOG_TRACE("Distinct values of the person field"); { - aggregateLayer(m_Nodes.begin(), m_Nodes.end(), - *this, &CHierarchicalResults::newNode, - layer); + aggregateLayer(m_Nodes.begin(), m_Nodes.end(), *this, &CHierarchicalResults::newNode, layer); LOG_TRACE("layer = " << core::CContainerPrinter::print(layer)); } LOG_TRACE("Distinct person field names"); { newLayer.reserve(layer.size()); - aggregateLayer(layer.begin(), layer.end(), - *this, &CHierarchicalResults::newNode, - newLayer); + aggregateLayer(layer.begin(), layer.end(), *this, &CHierarchicalResults::newNode, newLayer); newLayer.swap(layer); LOG_TRACE("layer = " << core::CContainerPrinter::print(layer)); } @@ -721,9 +584,7 @@ void CHierarchicalResults::buildHierarchy() LOG_TRACE("Distinct partition field values"); { newLayer.reserve(layer.size()); - aggregateLayer(layer.begin(), layer.end(), - *this, &CHierarchicalResults::newNode, - newLayer); + aggregateLayer(layer.begin(), layer.end(), *this, &CHierarchicalResults::newNode, newLayer); newLayer.swap(layer); LOG_TRACE("layer = " << core::CContainerPrinter::print(layer)); } @@ -731,19 +592,15 @@ void CHierarchicalResults::buildHierarchy() LOG_TRACE("Distinct partition field names"); { newLayer.reserve(layer.size()); - aggregateLayer(layer.begin(), layer.end(), - *this, &CHierarchicalResults::newNode, - newLayer); + aggregateLayer(layer.begin(), layer.end(), *this, &CHierarchicalResults::newNode, newLayer); newLayer.swap(layer); LOG_TRACE("layer = " << core::CContainerPrinter::print(layer)); } - if (layer.size() > 1) - { - TNode &root = this->newNode(); + if (layer.size() > 1) { + TNode& root = this->newNode(); bool population = false; - for (std::size_t i = 0u; i < layer.size(); ++i) - { + for (std::size_t i = 0u; i < layer.size(); ++i) { root.s_Children.push_back(layer[i]); layer[i]->s_Parent = &root; population |= layer[i]->s_Spec.s_IsPopulation; @@ -758,137 +615,106 @@ void CHierarchicalResults::buildHierarchy() this->bottomUpBreadthFirst(influencePropagator); } -void CHierarchicalResults::createPivots() -{ +void CHierarchicalResults::createPivots() { LOG_TRACE("Creating pivots"); - for (const auto &node : m_Nodes) - { - const auto &parentInfluences = node.s_Parent->s_AnnotatedProbability.s_Influences; - for (const auto &influence : node.s_AnnotatedProbability.s_Influences) - { - if (node.s_Parent && std::binary_search(parentInfluences.begin(), - parentInfluences.end(), - influence, - maths::COrderings::SFirstLess())) - { + for (const auto& node : m_Nodes) { + const auto& parentInfluences = node.s_Parent->s_AnnotatedProbability.s_Influences; + for (const auto& influence : node.s_AnnotatedProbability.s_Influences) { + if (node.s_Parent && + std::binary_search(parentInfluences.begin(), parentInfluences.end(), influence, maths::COrderings::SFirstLess())) { continue; } this->newPivot(influence.first).s_Children.push_back(&node); } } - for (auto &pivot : m_PivotNodes) - { - TNode &root = this->newPivotRoot(pivot.second.s_Spec.s_PersonFieldName); + for (auto& pivot : m_PivotNodes) { + TNode& root = this->newPivotRoot(pivot.second.s_Spec.s_PersonFieldName); root.s_Children.push_back(&pivot.second); pivot.second.s_Parent = &root; } } -const CHierarchicalResults::TNode *CHierarchicalResults::root() const -{ - if (m_Nodes.empty()) - { +const CHierarchicalResults::TNode* CHierarchicalResults::root() const { + if (m_Nodes.empty()) { return 0; } - if (m_Nodes.size() == 1) - { + if (m_Nodes.size() == 1) { return &m_Nodes.front(); } - const TNode &result = m_Nodes.back(); - if (isLeaf(result)) - { + const TNode& result = m_Nodes.back(); + if (isLeaf(result)) { return 0; } return &result; } -const CHierarchicalResults::TNode *CHierarchicalResults::influencer(const TStoredStringPtr &influencerName, - const TStoredStringPtr &influencerValue) const -{ +const CHierarchicalResults::TNode* CHierarchicalResults::influencer(const TStoredStringPtr& influencerName, + const TStoredStringPtr& influencerValue) const { auto i = m_PivotNodes.find({influencerName, influencerValue}); return i != m_PivotNodes.end() ? &i->second : 0; } -void CHierarchicalResults::bottomUpBreadthFirst(CHierarchicalResultsVisitor &visitor) const -{ - for (const auto &node : m_Nodes) - { +void CHierarchicalResults::bottomUpBreadthFirst(CHierarchicalResultsVisitor& visitor) const { + for (const auto& node : m_Nodes) { visitor.visit(*this, node, /*pivot =*/false); } } -void CHierarchicalResults::topDownBreadthFirst(CHierarchicalResultsVisitor &visitor) const -{ - for (auto i = m_Nodes.rbegin(); i != m_Nodes.rend(); ++i) - { +void CHierarchicalResults::topDownBreadthFirst(CHierarchicalResultsVisitor& visitor) const { + for (auto i = m_Nodes.rbegin(); i != m_Nodes.rend(); ++i) { visitor.visit(*this, *i, /*pivot =*/false); } } -void CHierarchicalResults::postorderDepthFirst(CHierarchicalResultsVisitor &visitor) const -{ - if (const TNode *root = this->root()) - { +void CHierarchicalResults::postorderDepthFirst(CHierarchicalResultsVisitor& visitor) const { + if (const TNode* root = this->root()) { this->postorderDepthFirst(root, visitor); } } -void CHierarchicalResults::pivotsBottomUpBreadthFirst(CHierarchicalResultsVisitor &visitor) const -{ - for (const auto &pivot : m_PivotNodes) - { +void CHierarchicalResults::pivotsBottomUpBreadthFirst(CHierarchicalResultsVisitor& visitor) const { + for (const auto& pivot : m_PivotNodes) { visitor.visit(*this, pivot.second, /*pivot =*/true); } - for (const auto &root : m_PivotRootNodes) - { + for (const auto& root : m_PivotRootNodes) { visitor.visit(*this, root.second, /*pivot =*/true); } } -void CHierarchicalResults::pivotsTopDownBreadthFirst(CHierarchicalResultsVisitor &visitor) const -{ - for (const auto &root : m_PivotRootNodes) - { +void CHierarchicalResults::pivotsTopDownBreadthFirst(CHierarchicalResultsVisitor& visitor) const { + for (const auto& root : m_PivotRootNodes) { visitor.visit(*this, root.second, /*pivot =*/true); } - for (const auto &pivot : m_PivotNodes) - { + for (const auto& pivot : m_PivotNodes) { visitor.visit(*this, pivot.second, /*pivot =*/true); } } -bool CHierarchicalResults::empty() const -{ +bool CHierarchicalResults::empty() const { return m_Nodes.empty(); } -std::size_t CHierarchicalResults::resultCount() const -{ +std::size_t CHierarchicalResults::resultCount() const { std::size_t result = 0u; - for (const auto &node : m_Nodes) - { - if (isLeaf(node) && !node.s_Spec.s_IsSimpleCount) - { + for (const auto& node : m_Nodes) { + if (isLeaf(node) && !node.s_Spec.s_IsSimpleCount) { ++result; } } return result; } -void CHierarchicalResults::setInterim() -{ +void CHierarchicalResults::setInterim() { m_ResultType.set(model_t::CResultType::E_Interim); } -model_t::CResultType CHierarchicalResults::resultType() const -{ +model_t::CResultType CHierarchicalResults::resultType() const { return m_ResultType; } -void CHierarchicalResults::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CHierarchicalResults::acceptPersistInserter(core::CStatePersistInserter& inserter) const { using TStoredStringPtrNodeMapCItr = TStoredStringPtrNodeMap::const_iterator; using TStoredStringPtrNodeMapCItrVec = std::vector; using TStoredStringPtrStoredStringPtrPrNodeMapCItr = TStoredStringPtrStoredStringPtrPrNodeMap::const_iterator; @@ -896,99 +722,74 @@ void CHierarchicalResults::acceptPersistInserter(core::CStatePersistInserter &in TNodePtrSizeUMap nodePointers; - for (const auto &node : m_Nodes) - { - inserter.insertLevel(NODES_1_TAG, boost::bind(&SNode::acceptPersistInserter1, - boost::cref(node), - _1, boost::ref(nodePointers))); + for (const auto& node : m_Nodes) { + inserter.insertLevel(NODES_1_TAG, boost::bind(&SNode::acceptPersistInserter1, boost::cref(node), _1, boost::ref(nodePointers))); } // Sort the keys by *value* order to ensure consistent persist state. TStoredStringPtrStoredStringPtrPrNodeMapCItrVec pivotIterators; pivotIterators.reserve(m_PivotNodes.size()); - for (auto i = m_PivotNodes.begin(); i != m_PivotNodes.end(); ++i) - { + for (auto i = m_PivotNodes.begin(); i != m_PivotNodes.end(); ++i) { pivotIterators.push_back(i); } - std::sort(pivotIterators.begin(), pivotIterators.end(), - core::CFunctional::SDereference()); - for (auto i : pivotIterators) - { + std::sort(pivotIterators.begin(), pivotIterators.end(), core::CFunctional::SDereference()); + for (auto i : pivotIterators) { core::CPersistUtils::persist(PIVOT_NAME_TAG, *i->first.first, inserter); core::CPersistUtils::persist(PIVOT_VALUE_TAG, *i->first.second, inserter); - inserter.insertLevel(PIVOT_NODES_1_TAG, boost::bind(&SNode::acceptPersistInserter1, - boost::cref(i->second), - _1, boost::ref(nodePointers))); + inserter.insertLevel(PIVOT_NODES_1_TAG, + boost::bind(&SNode::acceptPersistInserter1, boost::cref(i->second), _1, boost::ref(nodePointers))); } // Sort the keys by *value* order to ensure consistent persist state. TStoredStringPtrNodeMapCItrVec pivotRootIterators; pivotRootIterators.reserve(m_PivotRootNodes.size()); - for (auto i = m_PivotRootNodes.begin(); i != m_PivotRootNodes.end(); ++i) - { + for (auto i = m_PivotRootNodes.begin(); i != m_PivotRootNodes.end(); ++i) { pivotRootIterators.push_back(i); } - std::sort(pivotRootIterators.begin(), pivotRootIterators.end(), - core::CFunctional::SDereference()); - for (auto i : pivotRootIterators) - { + std::sort(pivotRootIterators.begin(), pivotRootIterators.end(), core::CFunctional::SDereference()); + for (auto i : pivotRootIterators) { core::CPersistUtils::persist(PIVOT_NAME_TAG, *i->first, inserter); - inserter.insertLevel(PIVOT_ROOT_NODES_1_TAG, boost::bind(&SNode::acceptPersistInserter1, - boost::cref(i->second), - _1, boost::ref(nodePointers))); + inserter.insertLevel(PIVOT_ROOT_NODES_1_TAG, + boost::bind(&SNode::acceptPersistInserter1, boost::cref(i->second), _1, boost::ref(nodePointers))); } - for (const auto &node : m_Nodes) - { - inserter.insertLevel(NODES_2_TAG, boost::bind(&SNode::acceptPersistInserter2, - boost::cref(node), - _1, boost::cref(nodePointers))); + for (const auto& node : m_Nodes) { + inserter.insertLevel(NODES_2_TAG, boost::bind(&SNode::acceptPersistInserter2, boost::cref(node), _1, boost::cref(nodePointers))); } - for (auto i : pivotIterators) - { + for (auto i : pivotIterators) { core::CPersistUtils::persist(PIVOT_NAME_TAG, *i->first.first, inserter); core::CPersistUtils::persist(PIVOT_VALUE_TAG, *i->first.second, inserter); - inserter.insertLevel(PIVOT_NODES_2_TAG, boost::bind(&SNode::acceptPersistInserter2, - boost::cref(i->second), - _1, boost::cref(nodePointers))); + inserter.insertLevel(PIVOT_NODES_2_TAG, + boost::bind(&SNode::acceptPersistInserter2, boost::cref(i->second), _1, boost::cref(nodePointers))); } - for (auto i : pivotRootIterators) - { + for (auto i : pivotRootIterators) { core::CPersistUtils::persist(PIVOT_NAME_TAG, *i->first, inserter); - inserter.insertLevel(PIVOT_ROOT_NODES_2_TAG, boost::bind(&SNode::acceptPersistInserter2, - boost::cref(i->second), - _1, boost::cref(nodePointers))); + inserter.insertLevel(PIVOT_ROOT_NODES_2_TAG, + boost::bind(&SNode::acceptPersistInserter2, boost::cref(i->second), _1, boost::cref(nodePointers))); } } -bool CHierarchicalResults::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ +bool CHierarchicalResults::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { TSizeNodePtrUMap nodePointers; core::CStoredStringPtr influencerName; core::CStoredStringPtr influencerValue; std::size_t nodesFullyRestored = 0; - do - { - const std::string &name = traverser.name(); + do { + const std::string& name = traverser.name(); RESTORE_SETUP_TEARDOWN(NODES_1_TAG, m_Nodes.push_back(SNode()), - traverser.traverseSubLevel(boost::bind(&SNode::acceptRestoreTraverser1, - boost::ref(m_Nodes.back()), - _1, boost::ref(nodePointers))), + traverser.traverseSubLevel( + boost::bind(&SNode::acceptRestoreTraverser1, boost::ref(m_Nodes.back()), _1, boost::ref(nodePointers))), /**/) - if (name == NODES_2_TAG) - { - if (nodesFullyRestored > m_Nodes.size()) - { + if (name == NODES_2_TAG) { + if (nodesFullyRestored > m_Nodes.size()) { LOG_ERROR("Invalid restore index for node: " << nodesFullyRestored); } - if (traverser.traverseSubLevel(boost::bind(&SNode::acceptRestoreTraverser2, - boost::ref(m_Nodes[nodesFullyRestored]), - _1, boost::cref(nodePointers))) == false) - { + if (traverser.traverseSubLevel(boost::bind( + &SNode::acceptRestoreTraverser2, boost::ref(m_Nodes[nodesFullyRestored]), _1, boost::cref(nodePointers))) == false) { LOG_ERROR("Failed to restore node"); return false; } @@ -997,37 +798,28 @@ bool CHierarchicalResults::acceptRestoreTraverser(core::CStateRestoreTraverser & } RESTORE_NO_ERROR(PIVOT_NAME_TAG, influencerName = CStringStore::influencers().get(traverser.value())) RESTORE_NO_ERROR(PIVOT_VALUE_TAG, influencerValue = CStringStore::influencers().get(traverser.value())) - if (name == PIVOT_NODES_1_TAG) - { - if (!influencerName || !influencerValue) - { + if (name == PIVOT_NODES_1_TAG) { + if (!influencerName || !influencerValue) { LOG_ERROR("Invalid influencers for node"); return false; } - SNode &node = m_PivotNodes[TStoredStringPtrStoredStringPtrPr(influencerName, influencerValue)]; - if (traverser.traverseSubLevel(boost::bind(&SNode::acceptRestoreTraverser1, - boost::ref(node), - _1, boost::ref(nodePointers))) == false) - { + SNode& node = m_PivotNodes[TStoredStringPtrStoredStringPtrPr(influencerName, influencerValue)]; + if (traverser.traverseSubLevel(boost::bind(&SNode::acceptRestoreTraverser1, boost::ref(node), _1, boost::ref(nodePointers))) == + false) { LOG_ERROR("Failed to restore pivot node"); return false; } influencerName = core::CStoredStringPtr(); influencerValue = core::CStoredStringPtr(); continue; - } - else if (name == PIVOT_NODES_2_TAG) - { - if (!influencerName || !influencerValue) - { + } else if (name == PIVOT_NODES_2_TAG) { + if (!influencerName || !influencerValue) { LOG_ERROR("Invalid influencers for node"); return false; } - SNode &node = m_PivotNodes[TStoredStringPtrStoredStringPtrPr(influencerName, influencerValue)]; - if (traverser.traverseSubLevel(boost::bind(&SNode::acceptRestoreTraverser2, - boost::ref(node), - _1, boost::cref(nodePointers))) == false) - { + SNode& node = m_PivotNodes[TStoredStringPtrStoredStringPtrPr(influencerName, influencerValue)]; + if (traverser.traverseSubLevel(boost::bind(&SNode::acceptRestoreTraverser2, boost::ref(node), _1, boost::cref(nodePointers))) == + false) { LOG_ERROR("Failed to restore pivot node"); return false; } @@ -1035,187 +827,142 @@ bool CHierarchicalResults::acceptRestoreTraverser(core::CStateRestoreTraverser & influencerValue = core::CStoredStringPtr(); continue; } - if (name == PIVOT_ROOT_NODES_1_TAG) - { - if (!influencerName) - { + if (name == PIVOT_ROOT_NODES_1_TAG) { + if (!influencerName) { LOG_ERROR("Invalid influencer for node"); return false; } - SNode &node = m_PivotRootNodes[influencerName]; - if (traverser.traverseSubLevel(boost::bind(&SNode::acceptRestoreTraverser1, - boost::ref(node), - _1, boost::ref(nodePointers))) == false) - { + SNode& node = m_PivotRootNodes[influencerName]; + if (traverser.traverseSubLevel(boost::bind(&SNode::acceptRestoreTraverser1, boost::ref(node), _1, boost::ref(nodePointers))) == + false) { LOG_ERROR("Failed to restore pivot node"); return false; } influencerName = core::CStoredStringPtr(); continue; } - if (name == PIVOT_ROOT_NODES_2_TAG) - { - if (!influencerName) - { + if (name == PIVOT_ROOT_NODES_2_TAG) { + if (!influencerName) { LOG_ERROR("Invalid influencer for node"); return false; } - SNode &node = m_PivotRootNodes[influencerName]; - if (traverser.traverseSubLevel(boost::bind(&SNode::acceptRestoreTraverser2, - boost::ref(node), - _1, boost::cref(nodePointers))) == false) - { + SNode& node = m_PivotRootNodes[influencerName]; + if (traverser.traverseSubLevel(boost::bind(&SNode::acceptRestoreTraverser2, boost::ref(node), _1, boost::cref(nodePointers))) == + false) { LOG_ERROR("Failed to restore pivot node"); return false; } influencerName = core::CStoredStringPtr(); continue; } - } - while (traverser.next()); + } while (traverser.next()); return true; } -std::string CHierarchicalResults::print() const -{ +std::string CHierarchicalResults::print() const { std::ostringstream ss; - for (const auto &node : m_Nodes) - { + for (const auto& node : m_Nodes) { ss << "\t" << node.print() << core_t::LINE_ENDING; } return ss.str(); } -CHierarchicalResults::TNode &CHierarchicalResults::newNode() -{ +CHierarchicalResults::TNode& CHierarchicalResults::newNode() { m_Nodes.push_back(TNode()); return m_Nodes.back(); } -CHierarchicalResults::TNode & - CHierarchicalResults::newLeaf(const TResultSpec &simpleSearch, - SAnnotatedProbability &annotatedProbability) -{ +CHierarchicalResults::TNode& CHierarchicalResults::newLeaf(const TResultSpec& simpleSearch, SAnnotatedProbability& annotatedProbability) { m_Nodes.emplace_back(simpleSearch, annotatedProbability); return m_Nodes.back(); } -CHierarchicalResults::TNode &CHierarchicalResults::newPivot(TStoredStringPtrStoredStringPtrPr key) -{ - TNode &result = m_PivotNodes[key]; +CHierarchicalResults::TNode& CHierarchicalResults::newPivot(TStoredStringPtrStoredStringPtrPr key) { + TNode& result = m_PivotNodes[key]; result.s_Spec.s_PersonFieldName = key.first; result.s_Spec.s_PersonFieldValue = key.second; return result; } -CHierarchicalResults::TNode &CHierarchicalResults::newPivotRoot(const TStoredStringPtr &key) -{ - TNode &result = m_PivotRootNodes[key]; +CHierarchicalResults::TNode& CHierarchicalResults::newPivotRoot(const TStoredStringPtr& key) { + TNode& result = m_PivotRootNodes[key]; result.s_Spec.s_PersonFieldName = key; result.s_Spec.s_PersonFieldValue = UNSET_STRING; return result; } -void CHierarchicalResults::postorderDepthFirst(const TNode *node, - CHierarchicalResultsVisitor &visitor) const -{ - for (const auto &child : node->s_Children) - { +void CHierarchicalResults::postorderDepthFirst(const TNode* node, CHierarchicalResultsVisitor& visitor) const { + for (const auto& child : node->s_Children) { this->postorderDepthFirst(child, visitor); } visitor.visit(*this, *node, /*pivot =*/false); } -CHierarchicalResultsVisitor::~CHierarchicalResultsVisitor() -{ +CHierarchicalResultsVisitor::~CHierarchicalResultsVisitor() { } -bool CHierarchicalResultsVisitor::isRoot(const TNode &node) -{ +bool CHierarchicalResultsVisitor::isRoot(const TNode& node) { return !node.s_Parent; } -bool CHierarchicalResultsVisitor::isLeaf(const TNode &node) -{ +bool CHierarchicalResultsVisitor::isLeaf(const TNode& node) { return node.s_Children.empty(); } -bool CHierarchicalResultsVisitor::isPartitioned(const TNode &node) -{ - return !((*node.s_Spec.s_PartitionFieldName).empty()) - && unset(node.s_Spec.s_PartitionFieldValue); +bool CHierarchicalResultsVisitor::isPartitioned(const TNode& node) { + return !((*node.s_Spec.s_PartitionFieldName).empty()) && unset(node.s_Spec.s_PartitionFieldValue); } -bool CHierarchicalResultsVisitor::isPartition(const TNode &node) -{ - return !((*node.s_Spec.s_PartitionFieldName).empty()) - && !unset(node.s_Spec.s_PartitionFieldValue) - && ( CHierarchicalResultsVisitor::isRoot(node) - || unset(node.s_Parent->s_Spec.s_PartitionFieldValue)); +bool CHierarchicalResultsVisitor::isPartition(const TNode& node) { + return !((*node.s_Spec.s_PartitionFieldName).empty()) && !unset(node.s_Spec.s_PartitionFieldValue) && + (CHierarchicalResultsVisitor::isRoot(node) || unset(node.s_Parent->s_Spec.s_PartitionFieldValue)); } -bool CHierarchicalResultsVisitor::isPerson(const TNode &node) -{ - if ((*node.s_Spec.s_PersonFieldName).empty() || isPartitioned(node)) - { +bool CHierarchicalResultsVisitor::isPerson(const TNode& node) { + if ((*node.s_Spec.s_PersonFieldName).empty() || isPartitioned(node)) { return false; } - if (!isPopulation(node)) - { - return unset(node.s_Spec.s_PersonFieldValue) - || CHierarchicalResultsVisitor::isRoot(node) - || unset(node.s_Parent->s_Spec.s_PersonFieldName); + if (!isPopulation(node)) { + return unset(node.s_Spec.s_PersonFieldValue) || CHierarchicalResultsVisitor::isRoot(node) || + unset(node.s_Parent->s_Spec.s_PersonFieldName); } - return !unset(node.s_Spec.s_PersonFieldValue) - && ( CHierarchicalResultsVisitor::isRoot(node) - || (unset(node.s_Parent->s_Spec.s_PersonFieldValue))); + return !unset(node.s_Spec.s_PersonFieldValue) && + (CHierarchicalResultsVisitor::isRoot(node) || (unset(node.s_Parent->s_Spec.s_PersonFieldValue))); } -bool CHierarchicalResultsVisitor::isAttribute(const TNode &node) -{ - if (!isLeaf(node) || isPartition(node) || isRoot(node)) - { +bool CHierarchicalResultsVisitor::isAttribute(const TNode& node) { + if (!isLeaf(node) || isPartition(node) || isRoot(node)) { return false; } - if (isPerson(*node.s_Parent)) - { + if (isPerson(*node.s_Parent)) { return true; } return !isPopulation(node); } -bool CHierarchicalResultsVisitor::isSimpleCount(const TNode &node) -{ +bool CHierarchicalResultsVisitor::isSimpleCount(const TNode& node) { return node.s_Spec.s_IsSimpleCount; } -bool CHierarchicalResultsVisitor::isPopulation(const TNode &node) -{ +bool CHierarchicalResultsVisitor::isPopulation(const TNode& node) { return node.s_Spec.s_IsPopulation; } -const CHierarchicalResultsVisitor::TNode * - CHierarchicalResultsVisitor::nearestAncestorForWhichWeWriteResults(const TNode &node) -{ - const TNode *result = &node; - for (result = result->s_Parent; - result && !isTypeForWhichWeWriteResults(*result, false); - result = result->s_Parent) - { +const CHierarchicalResultsVisitor::TNode* CHierarchicalResultsVisitor::nearestAncestorForWhichWeWriteResults(const TNode& node) { + const TNode* result = &node; + for (result = result->s_Parent; result && !isTypeForWhichWeWriteResults(*result, false); result = result->s_Parent) { } return result; } -bool CHierarchicalResultsVisitor::isTypeForWhichWeWriteResults(const TNode &node, bool pivot) -{ +bool CHierarchicalResultsVisitor::isTypeForWhichWeWriteResults(const TNode& node, bool pivot) { return pivot || isLeaf(node) || isRoot(node) || isPartition(node); } -bool CHierarchicalResultsVisitor::shouldWriteResult(const CLimits &limits, - const CHierarchicalResults &results, - const TNode &node, - bool pivot) -{ +bool CHierarchicalResultsVisitor::shouldWriteResult(const CLimits& limits, + const CHierarchicalResults& results, + const TNode& node, + bool pivot) { double p = std::min(node.probability(), node.s_SmallestDescendantProbability); // This test ensures that we output results at aggregated levels in the @@ -1223,8 +970,7 @@ bool CHierarchicalResultsVisitor::shouldWriteResult(const CLimits &limits, // condition the UI can be very confusing, as it's not necessarily possible // to find anything when searching upwards from lowest level anomalies to // the aggregated levels above. - if (p < limits.unusualProbabilityThreshold() && isTypeForWhichWeWriteResults(node, pivot)) - { + if (p < limits.unusualProbabilityThreshold() && isTypeForWhichWeWriteResults(node, pivot)) { return true; } @@ -1233,8 +979,7 @@ bool CHierarchicalResultsVisitor::shouldWriteResult(const CLimits &limits, // (However, if this is removed in the future another test must be added to // prevent the root node being allowed to permeate to the last test in this // method.) - if (CHierarchicalResultsVisitor::isRoot(node)) - { + if (CHierarchicalResultsVisitor::isRoot(node)) { return false; } @@ -1247,11 +992,9 @@ bool CHierarchicalResultsVisitor::shouldWriteResult(const CLimits &limits, // in the UI where a user drills down from an aggregated result and sees // nothing. static const double OUTPUT_TOLERANCE(1.2); - const TNode *ancestor = nearestAncestorForWhichWeWriteResults(node); - if ( ancestor - && p <= OUTPUT_TOLERANCE * ancestor->s_SmallestDescendantProbability - && shouldWriteResult(limits, results, *ancestor, pivot)) - { + const TNode* ancestor = nearestAncestorForWhichWeWriteResults(node); + if (ancestor && p <= OUTPUT_TOLERANCE * ancestor->s_SmallestDescendantProbability && + shouldWriteResult(limits, results, *ancestor, pivot)) { return true; } @@ -1260,20 +1003,15 @@ bool CHierarchicalResultsVisitor::shouldWriteResult(const CLimits &limits, // the test above nodes written as a result of this test must either have // a low probability themselves or be in branch of the results tree which // contains low probability results. - for (const auto &influence : node.s_AnnotatedProbability.s_Influences) - { - const TNode *influencer = results.influencer(influence.first.first, - influence.first.second); - if ( influencer - && p <= OUTPUT_TOLERANCE * influencer->s_SmallestDescendantProbability - && shouldWriteResult(limits, results, *influencer, /*pivot = */ true)) - { + for (const auto& influence : node.s_AnnotatedProbability.s_Influences) { + const TNode* influencer = results.influencer(influence.first.first, influence.first.second); + if (influencer && p <= OUTPUT_TOLERANCE * influencer->s_SmallestDescendantProbability && + shouldWriteResult(limits, results, *influencer, /*pivot = */ true)) { return true; } } return false; } - } } diff --git a/lib/model/CHierarchicalResultsAggregator.cc b/lib/model/CHierarchicalResultsAggregator.cc index fbb58bf17c..7d9585aef3 100644 --- a/lib/model/CHierarchicalResultsAggregator.cc +++ b/lib/model/CHierarchicalResultsAggregator.cc @@ -12,9 +12,9 @@ #include #include -#include #include #include +#include #include #include @@ -34,12 +34,9 @@ #include #include -namespace ml -{ -namespace model -{ -namespace -{ +namespace ml { +namespace model { +namespace { using TStoredStringPtr = CHierarchicalResults::TStoredStringPtr; using TStoredStringPtrStoredStringPtrPr = CHierarchicalResults::TStoredStringPtrStoredStringPtrPr; @@ -47,54 +44,36 @@ using TStoredStringPtrStoredStringPtrPrDoublePr = CHierarchicalResults::TStoredS using TStoredStringPtrStoredStringPtrPrDoublePrVec = CHierarchicalResults::TStoredStringPtrStoredStringPtrPrDoublePrVec; //! \brief Creates new detector equalizers. -class CDetectorEqualizerFactory -{ - public: - CDetectorEqualizer make(const std::string &/*name1*/, - const std::string &/*name2*/, - const std::string &/*name3*/, - const std::string &/*name4*/) const - { - return CDetectorEqualizer(); - } +class CDetectorEqualizerFactory { +public: + CDetectorEqualizer + make(const std::string& /*name1*/, const std::string& /*name2*/, const std::string& /*name3*/, const std::string& /*name4*/) const { + return CDetectorEqualizer(); + } - CDetectorEqualizer make(const std::string &/*name1*/, - const std::string &/*name2*/) const - { - return CDetectorEqualizer(); - } + CDetectorEqualizer make(const std::string& /*name1*/, const std::string& /*name2*/) const { return CDetectorEqualizer(); } - CDetectorEqualizer make(const std::string &/*name*/) const - { - return CDetectorEqualizer(); - } + CDetectorEqualizer make(const std::string& /*name*/) const { return CDetectorEqualizer(); } }; //! Check if the underlying strings are equal. -bool equal(const TStoredStringPtrStoredStringPtrPr &lhs, const TStoredStringPtrStoredStringPtrPr &rhs) -{ +bool equal(const TStoredStringPtrStoredStringPtrPr& lhs, const TStoredStringPtrStoredStringPtrPr& rhs) { return *lhs.first == *rhs.first && *lhs.second == *rhs.second; } //! Compute the probability of \p influence. -bool influenceProbability(const TStoredStringPtrStoredStringPtrPrDoublePrVec &influences, - const TStoredStringPtr &influencerName, - const TStoredStringPtr &influencerValue, - double p, double &result) -{ +bool influenceProbability(const TStoredStringPtrStoredStringPtrPrDoublePrVec& influences, + const TStoredStringPtr& influencerName, + const TStoredStringPtr& influencerValue, + double p, + double& result) { TStoredStringPtrStoredStringPtrPr influence(influencerName, influencerValue); std::size_t k{static_cast( - std::lower_bound(influences.begin(), - influences.end(), - influence, - maths::COrderings::SFirstLess()) - - influences.begin())}; - - if (k < influences.size() && equal(influences[k].first, influence)) - { - result = influences[k].second == 1.0 ? - p : std::exp(influences[k].second * maths::CTools::fastLog(p)); + std::lower_bound(influences.begin(), influences.end(), influence, maths::COrderings::SFirstLess()) - influences.begin())}; + + if (k < influences.size() && equal(influences[k].first, influence)) { + result = influences[k].second == 1.0 ? p : std::exp(influences[k].second * maths::CTools::fastLog(p)); return true; } @@ -111,55 +90,43 @@ const std::string LEAF_TAG("f"); } // unnamed:: -CHierarchicalResultsAggregator::CHierarchicalResultsAggregator(const CAnomalyDetectorModelConfig &modelConfig) : - TBase(TDetectorEqualizer()), - m_Job(E_NoOp), - m_DecayRate(modelConfig.decayRate()), - m_MaximumAnomalousProbability(modelConfig.maximumAnomalousProbability()) -{ +CHierarchicalResultsAggregator::CHierarchicalResultsAggregator(const CAnomalyDetectorModelConfig& modelConfig) + : TBase(TDetectorEqualizer()), + m_Job(E_NoOp), + m_DecayRate(modelConfig.decayRate()), + m_MaximumAnomalousProbability(modelConfig.maximumAnomalousProbability()) { this->refresh(modelConfig); } -void CHierarchicalResultsAggregator::setJob(EJob job) -{ +void CHierarchicalResultsAggregator::setJob(EJob job) { m_Job = job; } -void CHierarchicalResultsAggregator::refresh(const CAnomalyDetectorModelConfig &modelConfig) -{ +void CHierarchicalResultsAggregator::refresh(const CAnomalyDetectorModelConfig& modelConfig) { m_DecayRate = modelConfig.decayRate(); m_MaximumAnomalousProbability = modelConfig.maximumAnomalousProbability(); - for (std::size_t i = 0u; i < model_t::NUMBER_AGGREGATION_STYLES; ++i) - { - for (std::size_t j = 0u; j < model_t::NUMBER_AGGREGATION_PARAMS; ++j) - { - m_Parameters[i][j] = modelConfig.aggregationStyleParam(static_cast(i), - static_cast(j)); + for (std::size_t i = 0u; i < model_t::NUMBER_AGGREGATION_STYLES; ++i) { + for (std::size_t j = 0u; j < model_t::NUMBER_AGGREGATION_PARAMS; ++j) { + m_Parameters[i][j] = + modelConfig.aggregationStyleParam(static_cast(i), static_cast(j)); } } } -void CHierarchicalResultsAggregator::clear() -{ +void CHierarchicalResultsAggregator::clear() { this->TBase::clear(); } -void CHierarchicalResultsAggregator::visit(const CHierarchicalResults &/*results*/, const TNode &node, bool pivot) -{ - if (isLeaf(node)) - { +void CHierarchicalResultsAggregator::visit(const CHierarchicalResults& /*results*/, const TNode& node, bool pivot) { + if (isLeaf(node)) { this->aggregateLeaf(node); - } - else - { + } else { this->aggregateNode(node, pivot); } } -void CHierarchicalResultsAggregator::propagateForwardByTime(double time) -{ - if (time < 0.0) - { +void CHierarchicalResultsAggregator::propagateForwardByTime(double time) { + if (time < 0.0) { LOG_ERROR("Can't propagate normalizer backwards in time"); return; } @@ -167,10 +134,8 @@ void CHierarchicalResultsAggregator::propagateForwardByTime(double time) this->age(boost::bind(&TDetectorEqualizer::age, _1, factor)); } -void CHierarchicalResultsAggregator::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ - inserter.insertLevel(BUCKET_TAG, boost::bind(&TDetectorEqualizer::acceptPersistInserter, - boost::cref(this->bucketElement()), _1)); +void CHierarchicalResultsAggregator::acceptPersistInserter(core::CStatePersistInserter& inserter) const { + inserter.insertLevel(BUCKET_TAG, boost::bind(&TDetectorEqualizer::acceptPersistInserter, boost::cref(this->bucketElement()), _1)); core::CPersistUtils::persist(INFLUENCER_BUCKET_TAG, this->influencerBucketSet(), inserter); core::CPersistUtils::persist(INFLUENCER_TAG, this->influencerSet(), inserter); core::CPersistUtils::persist(PARTITION_TAG, this->partitionSet(), inserter); @@ -178,50 +143,40 @@ void CHierarchicalResultsAggregator::acceptPersistInserter(core::CStatePersistIn core::CPersistUtils::persist(LEAF_TAG, this->leafSet(), inserter); } -bool CHierarchicalResultsAggregator::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name = traverser.name(); - RESTORE(BUCKET_TAG, traverser.traverseSubLevel(boost::bind(&TDetectorEqualizer::acceptRestoreTraverser, - boost::ref(this->bucketElement()), _1))) - RESTORE(INFLUENCER_BUCKET_TAG, core::CPersistUtils::restore(INFLUENCER_BUCKET_TAG, - this->influencerBucketSet(), - traverser)); +bool CHierarchicalResultsAggregator::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); + RESTORE(BUCKET_TAG, + traverser.traverseSubLevel(boost::bind(&TDetectorEqualizer::acceptRestoreTraverser, boost::ref(this->bucketElement()), _1))) + RESTORE(INFLUENCER_BUCKET_TAG, core::CPersistUtils::restore(INFLUENCER_BUCKET_TAG, this->influencerBucketSet(), traverser)); RESTORE(INFLUENCER_TAG, core::CPersistUtils::restore(INFLUENCER_TAG, this->influencerSet(), traverser)); RESTORE(PARTITION_TAG, core::CPersistUtils::restore(PARTITION_TAG, this->partitionSet(), traverser)); RESTORE(PERSON_TAG, core::CPersistUtils::restore(PERSON_TAG, this->personSet(), traverser)); RESTORE(LEAF_TAG, core::CPersistUtils::restore(LEAF_TAG, this->leafSet(), traverser)); - } - while (traverser.next()); + } while (traverser.next()); return true; } -uint64_t CHierarchicalResultsAggregator::checksum() const -{ +uint64_t CHierarchicalResultsAggregator::checksum() const { uint64_t seed = static_cast(m_DecayRate); seed = maths::CChecksum::calculate(seed, m_Parameters); seed = maths::CChecksum::calculate(seed, m_MaximumAnomalousProbability); return this->TBase::checksum(seed); } -void CHierarchicalResultsAggregator::aggregateLeaf(const TNode &node) -{ - if (isSimpleCount(node)) - { +void CHierarchicalResultsAggregator::aggregateLeaf(const TNode& node) { + if (isSimpleCount(node)) { return; } int detector{node.s_Detector}; double probability{node.probability()}; - if (!maths::CMathsFuncs::isFinite(probability)) - { + if (!maths::CMathsFuncs::isFinite(probability)) { probability = 1.0; } probability = maths::CTools::truncate(probability, maths::CTools::smallestProbability(), 1.0); this->correctProbability(node, false, detector, probability); - model_t::EAggregationStyle style{ - isAttribute(node) ? model_t::E_AggregateAttributes : model_t::E_AggregatePeople}; + model_t::EAggregationStyle style{isAttribute(node) ? model_t::E_AggregateAttributes : model_t::E_AggregatePeople}; node.s_AnnotatedProbability.s_Probability = probability; node.s_AggregationStyle = style; @@ -230,14 +185,12 @@ void CHierarchicalResultsAggregator::aggregateLeaf(const TNode &node) node.s_RawAnomalyScore = maths::CTools::anomalyScore(probability); } -void CHierarchicalResultsAggregator::aggregateNode(const TNode &node, bool pivot) -{ +void CHierarchicalResultsAggregator::aggregateNode(const TNode& node, bool pivot) { LOG_TRACE("node = " << node.print() << ", pivot = " << pivot); std::size_t numberDetectors; TIntSizePrDouble1VecUMap partition[N]; - if (!this->partitionChildProbabilities(node, pivot, numberDetectors, partition)) - { + if (!this->partitionChildProbabilities(node, pivot, numberDetectors, partition)) { return; } LOG_TRACE("partition = " << core::CContainerPrinter::print(partition)); @@ -245,34 +198,31 @@ void CHierarchicalResultsAggregator::aggregateNode(const TNode &node, bool pivot int detector; int aggregation; TDouble1Vec detectorProbabilities; - this->detectorProbabilities(node, pivot, numberDetectors, partition, - detector, aggregation, detectorProbabilities); - LOG_TRACE("detector = " << detector - << ", aggregation = " << aggregation - << ", detector probabilities = " << detectorProbabilities); + this->detectorProbabilities(node, pivot, numberDetectors, partition, detector, aggregation, detectorProbabilities); + LOG_TRACE("detector = " << detector << ", aggregation = " << aggregation << ", detector probabilities = " << detectorProbabilities); - const double *params{m_Parameters[model_t::E_AggregateDetectors]}; + const double* params{m_Parameters[model_t::E_AggregateDetectors]}; CAnomalyScore::compute(params[model_t::E_JointProbabilityWeight], params[model_t::E_ExtremeProbabilityWeight], static_cast(params[model_t::E_MinExtremeSamples]), static_cast(params[model_t::E_MaxExtremeSamples]), m_MaximumAnomalousProbability, detectorProbabilities, - node.s_RawAnomalyScore, node.s_AnnotatedProbability.s_Probability); + node.s_RawAnomalyScore, + node.s_AnnotatedProbability.s_Probability); node.s_Detector = detector; node.s_AggregationStyle = aggregation; LOG_TRACE("probability = " << node.probability()); } -bool CHierarchicalResultsAggregator::partitionChildProbabilities(const TNode &node, bool pivot, - std::size_t &numberDetectors, - TIntSizePrDouble1VecUMap (&partition)[N]) -{ +bool CHierarchicalResultsAggregator::partitionChildProbabilities(const TNode& node, + bool pivot, + std::size_t& numberDetectors, + TIntSizePrDouble1VecUMap (&partition)[N]) { using TSizeFSet = boost::container::flat_set; using TMinAccumulator = maths::CBasicStatistics::SMin::TAccumulator; - for (std::size_t i = 0u; i < N; ++i) - { + for (std::size_t i = 0u; i < N; ++i) { partition[i].reserve(node.s_Children.size()); } @@ -281,42 +231,34 @@ bool CHierarchicalResultsAggregator::partitionChildProbabilities(const TNode &no TMinAccumulator pMinChild; TMinAccumulator pMinDescendent; - for (const auto &child : node.s_Children) - { - if (isSimpleCount(*child)) - { + for (const auto& child : node.s_Children) { + if (isSimpleCount(*child)) { continue; } double probability{child->probability()}; std::size_t key{0}; - if ( pivot - && !isRoot(node) - && !influenceProbability(child->s_AnnotatedProbability.s_Influences, - node.s_Spec.s_PersonFieldName, - node.s_Spec.s_PersonFieldValue, - probability, probability)) - { + if (pivot && !isRoot(node) && + !influenceProbability(child->s_AnnotatedProbability.s_Influences, + node.s_Spec.s_PersonFieldName, + node.s_Spec.s_PersonFieldValue, + probability, + probability)) { LOG_ERROR("Couldn't find influence for " << child->print()); continue; - } - else - { + } else { key = this->hash(*child); } haveResult = true; pMinChild.add(probability); - if (isTypeForWhichWeWriteResults(*child, pivot)) - { + if (isTypeForWhichWeWriteResults(*child, pivot)) { pMinDescendent.add(probability); } pMinDescendent.add(child->s_SmallestDescendantProbability); - model_t::EAggregationStyle style{ - static_cast(child->s_AggregationStyle)}; - switch (style) - { + model_t::EAggregationStyle style{static_cast(child->s_AggregationStyle)}; + switch (style) { case model_t::E_AggregatePeople: case model_t::E_AggregateAttributes: detectors.insert(child->s_Detector); @@ -328,12 +270,9 @@ bool CHierarchicalResultsAggregator::partitionChildProbabilities(const TNode &no } } - if (haveResult) - { - node.s_SmallestChildProbability = - maths::CTools::truncate(pMinChild[0], maths::CTools::smallestProbability(), 1.0); - node.s_SmallestDescendantProbability = - maths::CTools::truncate(pMinDescendent[0], maths::CTools::smallestProbability(), 1.0); + if (haveResult) { + node.s_SmallestChildProbability = maths::CTools::truncate(pMinChild[0], maths::CTools::smallestProbability(), 1.0); + node.s_SmallestDescendantProbability = maths::CTools::truncate(pMinDescendent[0], maths::CTools::smallestProbability(), 1.0); } numberDetectors = detectors.size(); LOG_TRACE("detector = " << core::CContainerPrinter::print(detectors)); @@ -341,72 +280,72 @@ bool CHierarchicalResultsAggregator::partitionChildProbabilities(const TNode &no return haveResult; } -void CHierarchicalResultsAggregator::detectorProbabilities(const TNode &node, bool pivot, +void CHierarchicalResultsAggregator::detectorProbabilities(const TNode& node, + bool pivot, std::size_t numberDetectors, const TIntSizePrDouble1VecUMap (&partition)[N], - int &detector, int &aggregation, - TDouble1Vec &probabilities) -{ + int& detector, + int& aggregation, + TDouble1Vec& probabilities) { using TDouble1Vec = core::CSmallVector; using TIntDouble1VecFMap = boost::container::flat_map; int fallback{static_cast(model_t::E_AggregatePeople)}; detector = -3; - aggregation = ( pivot - || isPartition(node) - || (isPopulation(node) && isPerson(node))) ? fallback : -1; + aggregation = (pivot || isPartition(node) || (isPopulation(node) && isPerson(node))) ? fallback : -1; TIntDouble1VecFMap detectorProbabilities; detectorProbabilities.reserve(numberDetectors); - for (int i = 0u; i < static_cast(N); ++i) - { - const double *params{m_Parameters[i]}; - for (const auto &subset : partition[i]) - { + for (int i = 0u; i < static_cast(N); ++i) { + const double* params{m_Parameters[i]}; + for (const auto& subset : partition[i]) { int detector_{subset.first.first}; double probability; - if (subset.second.size() == 1) - { + if (subset.second.size() == 1) { probability = subset.second[0]; - } - else - { + } else { double rawAnomalyScore; CAnomalyScore::compute(params[model_t::E_JointProbabilityWeight], params[model_t::E_ExtremeProbabilityWeight], static_cast(params[model_t::E_MinExtremeSamples]), static_cast(params[model_t::E_MaxExtremeSamples]), m_MaximumAnomalousProbability, - subset.second, rawAnomalyScore, probability); + subset.second, + rawAnomalyScore, + probability); } - if (!maths::CMathsFuncs::isFinite(probability)) - { + if (!maths::CMathsFuncs::isFinite(probability)) { probability = 1.0; } detectorProbabilities[detector_].push_back(probability); - switch (detector) - { - case -3: detector = detector_; /*first value we've seen*/ break; - case -2: /*we have a mix of detectors*/ break; - default: detector = (detector != detector_ ? -2 : detector_); break; + switch (detector) { + case -3: + detector = detector_; /*first value we've seen*/ + break; + case -2: /*we have a mix of detectors*/ + break; + default: + detector = (detector != detector_ ? -2 : detector_); + break; } - switch (aggregation) - { - case -1: aggregation = i; /*first value we've seen*/ break; - default: aggregation = (aggregation != i ? fallback : i); break; + switch (aggregation) { + case -1: + aggregation = i; /*first value we've seen*/ + break; + default: + aggregation = (aggregation != i ? fallback : i); + break; } } } probabilities.reserve(numberDetectors); - for (const auto &dp : detectorProbabilities) - { + for (const auto& dp : detectorProbabilities) { double probability{dp.second[0]}; - if (dp.second.size() > 1) - { - const double *params{m_Parameters[model_t::E_AggregatePeople]}; + if (dp.second.size() > 1) { + const double* params{m_Parameters[model_t::E_AggregatePeople]}; double rawAnomalyScore; CAnomalyScore::compute(params[model_t::E_JointProbabilityWeight], params[model_t::E_ExtremeProbabilityWeight], @@ -414,37 +353,31 @@ void CHierarchicalResultsAggregator::detectorProbabilities(const TNode &node, bo static_cast(params[model_t::E_MaxExtremeSamples]), m_MaximumAnomalousProbability, dp.second, - rawAnomalyScore, probability); + rawAnomalyScore, + probability); } probabilities.push_back(this->correctProbability(node, pivot, dp.first, probability)); } } -std::size_t CHierarchicalResultsAggregator::hash(const TNode &node) const -{ +std::size_t CHierarchicalResultsAggregator::hash(const TNode& node) const { std::size_t result{HASHER(node.s_Spec.s_PartitionFieldValue)}; - if (node.s_Spec.s_IsPopulation) - { + if (node.s_Spec.s_IsPopulation) { boost::hash_combine(result, HASHER(node.s_Spec.s_PersonFieldValue)); } return result; } -double CHierarchicalResultsAggregator::correctProbability(const TNode &node, bool pivot, - int detector, double probability) -{ +double CHierarchicalResultsAggregator::correctProbability(const TNode& node, bool pivot, int detector, double probability) { using TMaxAccumulator = maths::CBasicStatistics::SMax::TAccumulator; - if (probability < CDetectorEqualizer::largestProbabilityToCorrect()) - { + if (probability < CDetectorEqualizer::largestProbabilityToCorrect()) { CDetectorEqualizerFactory factory; TDetectorEqualizerPtrVec equalizers; this->elements(node, pivot, factory, equalizers); TMaxAccumulator corrected; - for (auto &equalizer : equalizers) - { - switch (m_Job) - { + for (auto& equalizer : equalizers) { + switch (m_Job) { case E_UpdateAndCorrect: equalizer->add(detector, probability); corrected.add(equalizer->correct(detector, probability)); @@ -456,14 +389,12 @@ double CHierarchicalResultsAggregator::correctProbability(const TNode &node, boo break; } } - if (corrected.count() > 0) - { + if (corrected.count() > 0) { probability = corrected[0]; } } return probability; } - } } diff --git a/lib/model/CHierarchicalResultsNormalizer.cc b/lib/model/CHierarchicalResultsNormalizer.cc index 99ffc3317c..fea023379a 100644 --- a/lib/model/CHierarchicalResultsNormalizer.cc +++ b/lib/model/CHierarchicalResultsNormalizer.cc @@ -22,43 +22,31 @@ #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { -namespace -{ +namespace { //! \brief Creates new normalizer instances. -class CNormalizerFactory -{ - public: - using TNormalizer = CHierarchicalResultsNormalizer::TNormalizer; - - CNormalizerFactory(const CAnomalyDetectorModelConfig &modelConfig) : m_ModelConfig(modelConfig) {} - - TNormalizer make(const std::string &name1, - const std::string &name2, - const std::string &name3, - const std::string &name4) const - { - return make(name1 + ' ' + name2 + ' ' + name3 + ' ' + name4); - } +class CNormalizerFactory { +public: + using TNormalizer = CHierarchicalResultsNormalizer::TNormalizer; - TNormalizer make(const std::string &name1, const std::string &name2) const - { - return make(name1 + ' ' + name2); - } + CNormalizerFactory(const CAnomalyDetectorModelConfig& modelConfig) : m_ModelConfig(modelConfig) {} - TNormalizer make(const std::string &name) const - { - return TNormalizer(name, boost::make_shared(m_ModelConfig)); - } + TNormalizer make(const std::string& name1, const std::string& name2, const std::string& name3, const std::string& name4) const { + return make(name1 + ' ' + name2 + ' ' + name3 + ' ' + name4); + } + + TNormalizer make(const std::string& name1, const std::string& name2) const { return make(name1 + ' ' + name2); } - private: - //! The model configuration file. - const CAnomalyDetectorModelConfig &m_ModelConfig; + TNormalizer make(const std::string& name) const { + return TNormalizer(name, boost::make_shared(m_ModelConfig)); + } + +private: + //! The model configuration file. + const CAnomalyDetectorModelConfig& m_ModelConfig; }; // The bucket cue is "sysChange" for historical reasons. Do NOT tidy this up @@ -71,92 +59,71 @@ const std::string PARTITION_CUE_PREFIX("part"); const std::string PERSON_CUE_PREFIX("per"); const std::string LEAF_CUE_PREFIX("leaf"); const std::string EMPTY_STRING; - } -namespace hierarchical_results_normalizer_detail -{ +namespace hierarchical_results_normalizer_detail { -SNormalizer::SNormalizer(const std::string &description, const TNormalizerPtr &normalizer) : - s_Description(description), - s_Normalizer(normalizer) -{ +SNormalizer::SNormalizer(const std::string& description, const TNormalizerPtr& normalizer) + : s_Description(description), s_Normalizer(normalizer) { } -void SNormalizer::clear() -{ +void SNormalizer::clear() { s_Normalizer->clear(); } -void SNormalizer::propagateForwardByTime(double time) -{ +void SNormalizer::propagateForwardByTime(double time) { s_Normalizer->propagateForwardByTime(time); } -uint64_t SNormalizer::checksum() const -{ +uint64_t SNormalizer::checksum() const { uint64_t seed = maths::CChecksum::calculate(0, s_Description); return maths::CChecksum::calculate(seed, s_Normalizer); } - } -CHierarchicalResultsNormalizer::CHierarchicalResultsNormalizer(const CAnomalyDetectorModelConfig &modelConfig) : - TBase(TNormalizer(std::string(), boost::make_shared(modelConfig))), - m_Job(E_NoOp), - m_ModelConfig(modelConfig), - m_HasLastUpdateCausedBigChange(false) -{ +CHierarchicalResultsNormalizer::CHierarchicalResultsNormalizer(const CAnomalyDetectorModelConfig& modelConfig) + : TBase(TNormalizer(std::string(), boost::make_shared(modelConfig))), + m_Job(E_NoOp), + m_ModelConfig(modelConfig), + m_HasLastUpdateCausedBigChange(false) { } -void CHierarchicalResultsNormalizer::setJob(EJob job) -{ +void CHierarchicalResultsNormalizer::setJob(EJob job) { m_Job = job; } -void CHierarchicalResultsNormalizer::clear() -{ +void CHierarchicalResultsNormalizer::clear() { this->TBase::clear(); m_HasLastUpdateCausedBigChange = false; } -void CHierarchicalResultsNormalizer::resetBigChange() -{ +void CHierarchicalResultsNormalizer::resetBigChange() { m_HasLastUpdateCausedBigChange = false; } -void CHierarchicalResultsNormalizer::visit(const CHierarchicalResults &/*results*/, - const TNode &node, - bool pivot) -{ +void CHierarchicalResultsNormalizer::visit(const CHierarchicalResults& /*results*/, const TNode& node, bool pivot) { CNormalizerFactory factory(m_ModelConfig); TNormalizerPtrVec normalizers; this->elements(node, pivot, factory, normalizers, m_ModelConfig.perPartitionNormalization()); - if (normalizers.empty()) - { + if (normalizers.empty()) { return; } // This has to use the deviation of the probability rather than // the anomaly score stored on the bucket because the later is // scaled so that it sums to the bucket anomaly score. - double score = node.probability() > m_ModelConfig.maximumAnomalousProbability() ? - 0.0 : - maths::CTools::anomalyScore(node.probability()); + double score = node.probability() > m_ModelConfig.maximumAnomalousProbability() ? 0.0 : maths::CTools::anomalyScore(node.probability()); - switch (m_Job) - { + switch (m_Job) { case E_Update: - for (std::size_t i = 0u; i < normalizers.size(); ++i) - { + for (std::size_t i = 0u; i < normalizers.size(); ++i) { m_HasLastUpdateCausedBigChange |= normalizers[i]->s_Normalizer->updateQuantiles(score); } break; case E_Normalize: // Normalize with the lowest suitable normalizer. - if (!normalizers[0]->s_Normalizer->normalize(score)) - { + if (!normalizers[0]->s_Normalizer->normalize(score)) { LOG_ERROR("Failed to normalize " << score << " for " << node.s_Spec.print()); } node.s_NormalizedAnomalyScore = score; @@ -167,127 +134,79 @@ void CHierarchicalResultsNormalizer::visit(const CHierarchicalResults &/*results } } -void CHierarchicalResultsNormalizer::propagateForwardByTime(double time) -{ - if (time < 0.0) - { +void CHierarchicalResultsNormalizer::propagateForwardByTime(double time) { + if (time < 0.0) { LOG_ERROR("Can't propagate normalizer backwards in time"); return; } this->age(boost::bind(&TNormalizer::propagateForwardByTime, _1, time)); } -bool CHierarchicalResultsNormalizer::hasLastUpdateCausedBigChange() const -{ +bool CHierarchicalResultsNormalizer::hasLastUpdateCausedBigChange() const { return m_HasLastUpdateCausedBigChange; } -void CHierarchicalResultsNormalizer::toJson(core_t::TTime time, - const std::string &key, - std::string &json, - bool makeArray) const -{ - TStrVec jsonVec( 1 // m_RootNormalizer - + this->influencerBucketSet().size() - + this->influencerSet().size() - + this->partitionSet().size() - + this->personSet().size() - + this->leafSet().size()); +void CHierarchicalResultsNormalizer::toJson(core_t::TTime time, const std::string& key, std::string& json, bool makeArray) const { + TStrVec jsonVec(1 // m_RootNormalizer + + this->influencerBucketSet().size() + this->influencerSet().size() + this->partitionSet().size() + + this->personSet().size() + this->leafSet().size()); std::size_t index = 0; - for (std::size_t i = 0; i < this->leafSet().size(); ++i) - { - const TWord &word = this->leafSet()[i].first; - const TNormalizer &normalizer = this->leafSet()[i].second; - CAnomalyScore::normalizerToJson(*normalizer.s_Normalizer, - key, - leafCue(word), - normalizer.s_Description, - time, - jsonVec[index++]); + for (std::size_t i = 0; i < this->leafSet().size(); ++i) { + const TWord& word = this->leafSet()[i].first; + const TNormalizer& normalizer = this->leafSet()[i].second; + CAnomalyScore::normalizerToJson(*normalizer.s_Normalizer, key, leafCue(word), normalizer.s_Description, time, jsonVec[index++]); } - for (std::size_t i = 0; i < this->personSet().size(); ++i) - { - const TWord &word = this->personSet()[i].first; - const TNormalizer &normalizer = this->personSet()[i].second; - CAnomalyScore::normalizerToJson(*normalizer.s_Normalizer, - key, - personCue(word), - normalizer.s_Description, - time, - jsonVec[index++]); + for (std::size_t i = 0; i < this->personSet().size(); ++i) { + const TWord& word = this->personSet()[i].first; + const TNormalizer& normalizer = this->personSet()[i].second; + CAnomalyScore::normalizerToJson(*normalizer.s_Normalizer, key, personCue(word), normalizer.s_Description, time, jsonVec[index++]); } - for (std::size_t i = 0; i < this->partitionSet().size(); ++i) - { - const TWord &word = this->partitionSet()[i].first; - const TNormalizer &normalizer = this->partitionSet()[i].second; - CAnomalyScore::normalizerToJson(*normalizer.s_Normalizer, - key, - partitionCue(word), - normalizer.s_Description, - time, - jsonVec[index++]); + for (std::size_t i = 0; i < this->partitionSet().size(); ++i) { + const TWord& word = this->partitionSet()[i].first; + const TNormalizer& normalizer = this->partitionSet()[i].second; + CAnomalyScore::normalizerToJson( + *normalizer.s_Normalizer, key, partitionCue(word), normalizer.s_Description, time, jsonVec[index++]); } - for (std::size_t i = 0; i < this->influencerSet().size(); ++i) - { - const TWord &word = this->influencerSet()[i].first; - const TNormalizer &normalizer = this->influencerSet()[i].second; - CAnomalyScore::normalizerToJson(*normalizer.s_Normalizer, - key, - influencerCue(word), - normalizer.s_Description, - time, - jsonVec[index++]); + for (std::size_t i = 0; i < this->influencerSet().size(); ++i) { + const TWord& word = this->influencerSet()[i].first; + const TNormalizer& normalizer = this->influencerSet()[i].second; + CAnomalyScore::normalizerToJson( + *normalizer.s_Normalizer, key, influencerCue(word), normalizer.s_Description, time, jsonVec[index++]); } - for (std::size_t i = 0; i < this->influencerBucketSet().size(); ++i) - { - const TWord &word = this->influencerBucketSet()[i].first; - const TNormalizer &normalizer = this->influencerBucketSet()[i].second; - CAnomalyScore::normalizerToJson(*normalizer.s_Normalizer, - key, - influencerBucketCue(word), - normalizer.s_Description, - time, - jsonVec[index++]); + for (std::size_t i = 0; i < this->influencerBucketSet().size(); ++i) { + const TWord& word = this->influencerBucketSet()[i].first; + const TNormalizer& normalizer = this->influencerBucketSet()[i].second; + CAnomalyScore::normalizerToJson( + *normalizer.s_Normalizer, key, influencerBucketCue(word), normalizer.s_Description, time, jsonVec[index++]); } // Put the bucket normalizer last so that incomplete restorations can be // detected by checking whether the bucket normalizer is restored - CAnomalyScore::normalizerToJson(*this->bucketElement().s_Normalizer, - key, - bucketCue(), - "root", - time, - jsonVec[index++]); + CAnomalyScore::normalizerToJson(*this->bucketElement().s_Normalizer, key, bucketCue(), "root", time, jsonVec[index++]); json = core::CStringUtils::join(jsonVec, ","); - if (makeArray) - { + if (makeArray) { json.insert(size_t(0), 1, '['); json += ']'; } } -CHierarchicalResultsNormalizer::ERestoreOutcome - CHierarchicalResultsNormalizer::fromJsonStream(std::istream &inputStream) -{ +CHierarchicalResultsNormalizer::ERestoreOutcome CHierarchicalResultsNormalizer::fromJsonStream(std::istream& inputStream) { bool isBucketNormalizerRestored = false; this->TBase::clear(); core::CJsonStateRestoreTraverser traverser(inputStream); - do - { + do { // Call name() to prime the traverser if it hasn't started - if (traverser.name() == EMPTY_STRING) - { - if (traverser.isEof()) - { + if (traverser.name() == EMPTY_STRING) { + if (traverser.isEof()) { LOG_DEBUG("No normalizer state to restore"); // this is not an error return E_Ok; @@ -295,218 +214,166 @@ CHierarchicalResultsNormalizer::ERestoreOutcome } // The MLCUE_ATTRIBUTE should always be the first field - if (traverser.name() != CAnomalyScore::MLCUE_ATTRIBUTE) - { - if (!traverser.next()) - { + if (traverser.name() != CAnomalyScore::MLCUE_ATTRIBUTE) { + if (!traverser.next()) { LOG_INFO("No normalizer state to restore"); return E_Ok; } - LOG_ERROR("Expected " << CAnomalyScore::MLCUE_ATTRIBUTE << - " field in quantiles JSON got " << traverser.name() << - " = " << traverser.value()); + LOG_ERROR("Expected " << CAnomalyScore::MLCUE_ATTRIBUTE << " field in quantiles JSON got " << traverser.name() << " = " + << traverser.value()); return E_Corrupt; } const std::string cue(traverser.value()); - if (cue == BUCKET_CUE) - { - if (CAnomalyScore::normalizerFromJson(traverser, - *this->bucketElement().s_Normalizer) == false) - { + if (cue == BUCKET_CUE) { + if (CAnomalyScore::normalizerFromJson(traverser, *this->bucketElement().s_Normalizer) == false) { LOG_ERROR("Unable to restore bucket normalizer"); return E_Corrupt; } isBucketNormalizerRestored = true; - } - else - { - TWordNormalizerPrVec *normalizerVec = 0; + } else { + TWordNormalizerPrVec* normalizerVec = 0; TDictionary::TUInt64Array hashArray; - if (!this->parseCue(cue, normalizerVec, hashArray)) - { + if (!this->parseCue(cue, normalizerVec, hashArray)) { return E_Corrupt; } - if (normalizerVec != 0) - { - if (!traverser.next()) - { - LOG_ERROR("Cannot restore hierarchical normalizer - end of object reached when " << - CAnomalyScore::MLKEY_ATTRIBUTE << " was expected"); + if (normalizerVec != 0) { + if (!traverser.next()) { + LOG_ERROR("Cannot restore hierarchical normalizer - end of object reached when " << CAnomalyScore::MLKEY_ATTRIBUTE + << " was expected"); return E_Corrupt; } - if (!traverser.next()) - { - LOG_ERROR("Cannot restore hierarchical normalizer - end of object reached when " << - CAnomalyScore::MLQUANTILESDESCRIPTION_ATTRIBUTE << " was expected"); + if (!traverser.next()) { + LOG_ERROR("Cannot restore hierarchical normalizer - end of object reached when " + << CAnomalyScore::MLQUANTILESDESCRIPTION_ATTRIBUTE << " was expected"); return E_Corrupt; } - if (traverser.name() != CAnomalyScore::MLQUANTILESDESCRIPTION_ATTRIBUTE) - { - LOG_ERROR("Cannot restore hierarchical normalizer - " << - CAnomalyScore::MLQUANTILESDESCRIPTION_ATTRIBUTE << - " element expected but found " << traverser.name() << '=' << traverser.value()); + if (traverser.name() != CAnomalyScore::MLQUANTILESDESCRIPTION_ATTRIBUTE) { + LOG_ERROR("Cannot restore hierarchical normalizer - " << CAnomalyScore::MLQUANTILESDESCRIPTION_ATTRIBUTE + << " element expected but found " << traverser.name() << '=' + << traverser.value()); return E_Corrupt; } std::string quantileDesc(traverser.value()); - boost::shared_ptr normalizer = - boost::make_shared(m_ModelConfig); + boost::shared_ptr normalizer = boost::make_shared(m_ModelConfig); normalizerVec->emplace_back(TWord(hashArray), TNormalizer(quantileDesc, normalizer)); - if (CAnomalyScore::normalizerFromJson(traverser, *normalizer) == false) - { + if (CAnomalyScore::normalizerFromJson(traverser, *normalizer) == false) { LOG_ERROR("Unable to restore normalizer with cue " << cue); return E_Corrupt; } } } - } - while (traverser.nextObject()); + } while (traverser.nextObject()); this->sort(); - LOG_DEBUG(this->influencerBucketSet().size() << " influencer bucket normalizers, " << - this->influencerSet().size() << " influencer normalizers, " << - this->partitionSet().size() << " partition normalizers, " << - this->personSet().size() << " person normalizers and " << - this->leafSet().size() << " leaf normalizers restored from JSON stream"); + LOG_DEBUG(this->influencerBucketSet().size() + << " influencer bucket normalizers, " << this->influencerSet().size() << " influencer normalizers, " + << this->partitionSet().size() << " partition normalizers, " << this->personSet().size() << " person normalizers and " + << this->leafSet().size() << " leaf normalizers restored from JSON stream"); return isBucketNormalizerRestored ? E_Ok : E_Incomplete; } -const CAnomalyScore::CNormalizer &CHierarchicalResultsNormalizer::bucketNormalizer() const -{ +const CAnomalyScore::CNormalizer& CHierarchicalResultsNormalizer::bucketNormalizer() const { return *this->bucketElement().s_Normalizer; } -const CAnomalyScore::CNormalizer * - CHierarchicalResultsNormalizer::influencerBucketNormalizer(const std::string &influencerFieldName) const -{ - const TNormalizer *normalizer = this->influencerBucketElement(influencerFieldName); +const CAnomalyScore::CNormalizer* CHierarchicalResultsNormalizer::influencerBucketNormalizer(const std::string& influencerFieldName) const { + const TNormalizer* normalizer = this->influencerBucketElement(influencerFieldName); return normalizer ? normalizer->s_Normalizer.get() : 0; } -const CAnomalyScore::CNormalizer * - CHierarchicalResultsNormalizer::influencerNormalizer(const std::string &influencerFieldName) const -{ - const TNormalizer *normalizer = this->influencerElement(influencerFieldName); +const CAnomalyScore::CNormalizer* CHierarchicalResultsNormalizer::influencerNormalizer(const std::string& influencerFieldName) const { + const TNormalizer* normalizer = this->influencerElement(influencerFieldName); return normalizer ? normalizer->s_Normalizer.get() : 0; } -const CAnomalyScore::CNormalizer * - CHierarchicalResultsNormalizer::partitionNormalizer(const std::string &partitionFieldName) const -{ - const TNormalizer *normalizer = this->partitionElement(partitionFieldName); +const CAnomalyScore::CNormalizer* CHierarchicalResultsNormalizer::partitionNormalizer(const std::string& partitionFieldName) const { + const TNormalizer* normalizer = this->partitionElement(partitionFieldName); return normalizer ? normalizer->s_Normalizer.get() : 0; } -const CAnomalyScore::CNormalizer * - CHierarchicalResultsNormalizer::personNormalizer(const std::string &partitionFieldName, - const std::string &personFieldName) const -{ - const TNormalizer *normalizer = this->personElement(partitionFieldName, personFieldName); +const CAnomalyScore::CNormalizer* CHierarchicalResultsNormalizer::personNormalizer(const std::string& partitionFieldName, + const std::string& personFieldName) const { + const TNormalizer* normalizer = this->personElement(partitionFieldName, personFieldName); return normalizer ? normalizer->s_Normalizer.get() : 0; } -const CAnomalyScore::CNormalizer * - CHierarchicalResultsNormalizer::leafNormalizer(const std::string &partitionFieldName, - const std::string &personFieldName, - const std::string &functionName, - const std::string &valueFieldName) const -{ - const TNormalizer *normalizer = this->leafElement(partitionFieldName, - personFieldName, - functionName, - valueFieldName); +const CAnomalyScore::CNormalizer* CHierarchicalResultsNormalizer::leafNormalizer(const std::string& partitionFieldName, + const std::string& personFieldName, + const std::string& functionName, + const std::string& valueFieldName) const { + const TNormalizer* normalizer = this->leafElement(partitionFieldName, personFieldName, functionName, valueFieldName); return normalizer ? normalizer->s_Normalizer.get() : 0; } -bool CHierarchicalResultsNormalizer::parseCue(const std::string &cue, - TWordNormalizerPrVec *&normalizers, - TDictionary::TUInt64Array &hashArray) -{ +bool CHierarchicalResultsNormalizer::parseCue(const std::string& cue, + TWordNormalizerPrVec*& normalizers, + TDictionary::TUInt64Array& hashArray) { normalizers = 0; std::size_t hashStartPos = 0; - if (cue.compare(0, INFLUENCER_BUCKET_CUE_PREFIX.length(), INFLUENCER_BUCKET_CUE_PREFIX) == 0) - { + if (cue.compare(0, INFLUENCER_BUCKET_CUE_PREFIX.length(), INFLUENCER_BUCKET_CUE_PREFIX) == 0) { normalizers = &this->influencerBucketSet(); hashStartPos = INFLUENCER_BUCKET_CUE_PREFIX.length(); - } - else if (cue.compare(0, INFLUENCER_CUE_PREFIX.length(), INFLUENCER_CUE_PREFIX) == 0) - { + } else if (cue.compare(0, INFLUENCER_CUE_PREFIX.length(), INFLUENCER_CUE_PREFIX) == 0) { normalizers = &this->influencerSet(); hashStartPos = INFLUENCER_CUE_PREFIX.length(); - } - else if (cue.compare(0, PARTITION_CUE_PREFIX.length(), PARTITION_CUE_PREFIX) == 0) - { + } else if (cue.compare(0, PARTITION_CUE_PREFIX.length(), PARTITION_CUE_PREFIX) == 0) { normalizers = &this->partitionSet(); hashStartPos = PARTITION_CUE_PREFIX.length(); - } - else if (cue.compare(0, PERSON_CUE_PREFIX.length(), PERSON_CUE_PREFIX) == 0) - { + } else if (cue.compare(0, PERSON_CUE_PREFIX.length(), PERSON_CUE_PREFIX) == 0) { normalizers = &this->personSet(); hashStartPos = PERSON_CUE_PREFIX.length(); - } - else if (cue.compare(0, LEAF_CUE_PREFIX.length(), LEAF_CUE_PREFIX) == 0) - { + } else if (cue.compare(0, LEAF_CUE_PREFIX.length(), LEAF_CUE_PREFIX) == 0) { normalizers = &this->leafSet(); hashStartPos = LEAF_CUE_PREFIX.length(); - } - else - { + } else { LOG_WARN("Did not understand normalizer cue " << cue); return true; } LOG_TRACE("cue = " << cue << ", hash = " << cue.substr(hashStartPos)); - if (core::CStringUtils::stringToType(cue.substr(hashStartPos), hashArray[0]) == false) - { - LOG_ERROR("Unable to parse normalizer hash from cue " << cue - << " starting at position " << hashStartPos); + if (core::CStringUtils::stringToType(cue.substr(hashStartPos), hashArray[0]) == false) { + LOG_ERROR("Unable to parse normalizer hash from cue " << cue << " starting at position " << hashStartPos); return false; } return true; } -const std::string &CHierarchicalResultsNormalizer::bucketCue() -{ +const std::string& CHierarchicalResultsNormalizer::bucketCue() { return BUCKET_CUE; } -std::string CHierarchicalResultsNormalizer::influencerBucketCue(const TWord &word) -{ +std::string CHierarchicalResultsNormalizer::influencerBucketCue(const TWord& word) { return INFLUENCER_BUCKET_CUE_PREFIX + core::CStringUtils::typeToString(word.hash64()); } -std::string CHierarchicalResultsNormalizer::influencerCue(const TWord &word) -{ +std::string CHierarchicalResultsNormalizer::influencerCue(const TWord& word) { return INFLUENCER_CUE_PREFIX + core::CStringUtils::typeToString(word.hash64()); } -std::string CHierarchicalResultsNormalizer::partitionCue(const TWord &word) -{ +std::string CHierarchicalResultsNormalizer::partitionCue(const TWord& word) { return PARTITION_CUE_PREFIX + core::CStringUtils::typeToString(word.hash64()); } -std::string CHierarchicalResultsNormalizer::personCue(const TWord &word) -{ +std::string CHierarchicalResultsNormalizer::personCue(const TWord& word) { return PERSON_CUE_PREFIX + core::CStringUtils::typeToString(word.hash64()); } -std::string CHierarchicalResultsNormalizer::leafCue(const TWord &word) -{ +std::string CHierarchicalResultsNormalizer::leafCue(const TWord& word) { return LEAF_CUE_PREFIX + core::CStringUtils::typeToString(word.hash64()); } - } } diff --git a/lib/model/CHierarchicalResultsPopulator.cc b/lib/model/CHierarchicalResultsPopulator.cc index d1212993b5..eb9efd290d 100644 --- a/lib/model/CHierarchicalResultsPopulator.cc +++ b/lib/model/CHierarchicalResultsPopulator.cc @@ -12,57 +12,45 @@ #include #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { -CHierarchicalResultsPopulator::CHierarchicalResultsPopulator(const CLimits &limits) : - m_Limits(limits) -{ +CHierarchicalResultsPopulator::CHierarchicalResultsPopulator(const CLimits& limits) : m_Limits(limits) { } -void CHierarchicalResultsPopulator::visit(const CHierarchicalResults &results, - const TNode &node, - bool pivot) -{ - if (!this->isLeaf(node) || !this->shouldWriteResult(m_Limits, results, node, pivot)) - { +void CHierarchicalResultsPopulator::visit(const CHierarchicalResults& results, const TNode& node, bool pivot) { + if (!this->isLeaf(node) || !this->shouldWriteResult(m_Limits, results, node, pivot)) { return; } - if (!node.s_Model) - { + if (!node.s_Model) { LOG_ERROR("No model for " << node.s_Spec.print()); return; } - const CDataGatherer &gatherer = node.s_Model->dataGatherer(); + const CDataGatherer& gatherer = node.s_Model->dataGatherer(); std::size_t pid; - if (!gatherer.personId(*node.s_Spec.s_PersonFieldValue, pid)) - { + if (!gatherer.personId(*node.s_Spec.s_PersonFieldValue, pid)) { LOG_ERROR("No identifier for '" << *node.s_Spec.s_PersonFieldValue << "'"); return; } - SAnnotatedProbability &probability = node.s_AnnotatedProbability; - for (std::size_t i = 0; i < probability.s_AttributeProbabilities.size(); ++i) - { - const SAttributeProbability &attribute = probability.s_AttributeProbabilities[i]; - attribute.s_CurrentBucketValue = node.s_Model->currentBucketValue(attribute.s_Feature, - pid, attribute.s_Cid, - node.s_BucketStartTime + node.s_BucketLength / 2); + SAnnotatedProbability& probability = node.s_AnnotatedProbability; + for (std::size_t i = 0; i < probability.s_AttributeProbabilities.size(); ++i) { + const SAttributeProbability& attribute = probability.s_AttributeProbabilities[i]; + attribute.s_CurrentBucketValue = + node.s_Model->currentBucketValue(attribute.s_Feature, pid, attribute.s_Cid, node.s_BucketStartTime + node.s_BucketLength / 2); attribute.s_BaselineBucketMean = node.s_Model->baselineBucketMean(attribute.s_Feature, - pid, attribute.s_Cid, + pid, + attribute.s_Cid, attribute.s_Type, attribute.s_Correlated, node.s_BucketStartTime + node.s_BucketLength / 2); } - probability.s_CurrentBucketCount = node.s_Model->currentBucketCount(pid, node.s_BucketStartTime); + probability.s_CurrentBucketCount = node.s_Model->currentBucketCount(pid, node.s_BucketStartTime); probability.s_BaselineBucketCount = node.s_Model->baselineBucketCount(pid); } - } } diff --git a/lib/model/CHierarchicalResultsProbabilityFinalizer.cc b/lib/model/CHierarchicalResultsProbabilityFinalizer.cc index 540e9fe292..2223d536e3 100644 --- a/lib/model/CHierarchicalResultsProbabilityFinalizer.cc +++ b/lib/model/CHierarchicalResultsProbabilityFinalizer.cc @@ -8,20 +8,13 @@ #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { -void CHierarchicalResultsProbabilityFinalizer::visit(const CHierarchicalResults &/*results*/, - const TNode &node, - bool /*pivot*/) -{ - if (node.s_RawAnomalyScore > 0.0) - { +void CHierarchicalResultsProbabilityFinalizer::visit(const CHierarchicalResults& /*results*/, const TNode& node, bool /*pivot*/) { + if (node.s_RawAnomalyScore > 0.0) { node.s_AnnotatedProbability.s_Probability = maths::CTools::inverseAnomalyScore(node.s_RawAnomalyScore); } } - } } diff --git a/lib/model/CIndividualModel.cc b/lib/model/CIndividualModel.cc index 9b12973c78..4fe518324c 100644 --- a/lib/model/CIndividualModel.cc +++ b/lib/model/CIndividualModel.cc @@ -10,8 +10,8 @@ #include #include #include -#include #include +#include #include #include @@ -23,20 +23,17 @@ #include #include #include -#include #include +#include #include #include #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { -namespace -{ +namespace { using TStrCRef = boost::reference_wrapper; using TStrCRefUInt64Map = std::map; @@ -45,15 +42,10 @@ using TStrCRefStrCRefPrUInt64Map = std::map -void hashActive(const CDataGatherer &gatherer, - const std::vector &values, - TStrCRefUInt64Map &hashes) -{ - for (std::size_t pid = 0u; pid < values.size(); ++pid) - { - if (gatherer.isPersonActive(pid)) - { - uint64_t &hash = hashes[boost::cref(gatherer.personName(pid))]; +void hashActive(const CDataGatherer& gatherer, const std::vector& values, TStrCRefUInt64Map& hashes) { + for (std::size_t pid = 0u; pid < values.size(); ++pid) { + if (gatherer.isPersonActive(pid)) { + uint64_t& hash = hashes[boost::cref(gatherer.personName(pid))]; hash = maths::CChecksum::calculate(hash, values[pid]); } } @@ -75,166 +67,122 @@ const std::string FEATURE_CORRELATE_MODELS_TAG("f"); const std::string INTERIM_BUCKET_CORRECTOR_TAG("h"); const std::string MEMORY_ESTIMATOR_TAG("i"); - } -CIndividualModel::CIndividualModel(const SModelParams ¶ms, - const TDataGathererPtr &dataGatherer, - const TFeatureMathsModelPtrPrVec &newFeatureModels, - const TFeatureMultivariatePriorPtrPrVec &newFeatureCorrelateModelPriors, - const TFeatureCorrelationsPtrPrVec &featureCorrelatesModels, - const TFeatureInfluenceCalculatorCPtrPrVecVec &influenceCalculators) : - CAnomalyDetectorModel(params, dataGatherer, influenceCalculators) -{ +CIndividualModel::CIndividualModel(const SModelParams& params, + const TDataGathererPtr& dataGatherer, + const TFeatureMathsModelPtrPrVec& newFeatureModels, + const TFeatureMultivariatePriorPtrPrVec& newFeatureCorrelateModelPriors, + const TFeatureCorrelationsPtrPrVec& featureCorrelatesModels, + const TFeatureInfluenceCalculatorCPtrPrVecVec& influenceCalculators) + : CAnomalyDetectorModel(params, dataGatherer, influenceCalculators) { m_FeatureModels.reserve(newFeatureModels.size()); - for (const auto &model : newFeatureModels) - { + for (const auto& model : newFeatureModels) { m_FeatureModels.emplace_back(model.first, model.second); } - std::sort(m_FeatureModels.begin(), m_FeatureModels.end(), - [](const SFeatureModels &lhs, - const SFeatureModels &rhs) - { return lhs.s_Feature < rhs.s_Feature; } ); + std::sort(m_FeatureModels.begin(), m_FeatureModels.end(), [](const SFeatureModels& lhs, const SFeatureModels& rhs) { + return lhs.s_Feature < rhs.s_Feature; + }); - if (this->params().s_MultivariateByFields) - { + if (this->params().s_MultivariateByFields) { m_FeatureCorrelatesModels.reserve(featureCorrelatesModels.size()); - for (std::size_t i = 0u; i < featureCorrelatesModels.size(); ++i) - { - m_FeatureCorrelatesModels.emplace_back(featureCorrelatesModels[i].first, - newFeatureCorrelateModelPriors[i].second, - featureCorrelatesModels[i].second); + for (std::size_t i = 0u; i < featureCorrelatesModels.size(); ++i) { + m_FeatureCorrelatesModels.emplace_back( + featureCorrelatesModels[i].first, newFeatureCorrelateModelPriors[i].second, featureCorrelatesModels[i].second); } - std::sort(m_FeatureCorrelatesModels.begin(), m_FeatureCorrelatesModels.end(), - [](const SFeatureCorrelateModels &lhs, - const SFeatureCorrelateModels &rhs) - { return lhs.s_Feature < rhs.s_Feature; }); + std::sort(m_FeatureCorrelatesModels.begin(), + m_FeatureCorrelatesModels.end(), + [](const SFeatureCorrelateModels& lhs, const SFeatureCorrelateModels& rhs) { return lhs.s_Feature < rhs.s_Feature; }); } } -CIndividualModel::CIndividualModel(bool isForPersistence, const CIndividualModel &other) : - CAnomalyDetectorModel(isForPersistence, other), - m_FirstBucketTimes(other.m_FirstBucketTimes), - m_LastBucketTimes(other.m_LastBucketTimes), - m_MemoryEstimator(other.m_MemoryEstimator) -{ - if (!isForPersistence) - { +CIndividualModel::CIndividualModel(bool isForPersistence, const CIndividualModel& other) + : CAnomalyDetectorModel(isForPersistence, other), + m_FirstBucketTimes(other.m_FirstBucketTimes), + m_LastBucketTimes(other.m_LastBucketTimes), + m_MemoryEstimator(other.m_MemoryEstimator) { + if (!isForPersistence) { LOG_ABORT("This constructor only creates clones for persistence"); } m_FeatureModels.reserve(m_FeatureModels.size()); - for (const auto &feature : other.m_FeatureModels) - { + for (const auto& feature : other.m_FeatureModels) { m_FeatureModels.emplace_back(feature.s_Feature, feature.s_NewModel); m_FeatureModels.back().s_Models.reserve(feature.s_Models.size()); - for (const auto &model : feature.s_Models) - { + for (const auto& model : feature.s_Models) { m_FeatureModels.back().s_Models.emplace_back(model->cloneForPersistence()); } } m_FeatureCorrelatesModels.reserve(other.m_FeatureCorrelatesModels.size()); - for (const auto &feature : other.m_FeatureCorrelatesModels) - { - m_FeatureCorrelatesModels.emplace_back(feature.s_Feature, feature.s_ModelPrior, - TCorrelationsPtr(feature.s_Models->cloneForPersistence())); + for (const auto& feature : other.m_FeatureCorrelatesModels) { + m_FeatureCorrelatesModels.emplace_back( + feature.s_Feature, feature.s_ModelPrior, TCorrelationsPtr(feature.s_Models->cloneForPersistence())); } } -bool CIndividualModel::isPopulation() const -{ +bool CIndividualModel::isPopulation() const { return false; } -CIndividualModel::TOptionalUInt64 - CIndividualModel::currentBucketCount(std::size_t pid, core_t::TTime time) const -{ - if (!this->bucketStatsAvailable(time)) - { - LOG_ERROR("No statistics at " << time - << ", current bucket = " << this->printCurrentBucket()); +CIndividualModel::TOptionalUInt64 CIndividualModel::currentBucketCount(std::size_t pid, core_t::TTime time) const { + if (!this->bucketStatsAvailable(time)) { + LOG_ERROR("No statistics at " << time << ", current bucket = " << this->printCurrentBucket()); return TOptionalUInt64(); } - auto result = std::lower_bound(this->currentBucketPersonCounts().begin(), - this->currentBucketPersonCounts().end(), - pid, maths::COrderings::SFirstLess()); + auto result = std::lower_bound( + this->currentBucketPersonCounts().begin(), this->currentBucketPersonCounts().end(), pid, maths::COrderings::SFirstLess()); - return result != this->currentBucketPersonCounts().end() - && result->first == pid ? result->second : static_cast(0); + return result != this->currentBucketPersonCounts().end() && result->first == pid ? result->second : static_cast(0); } -bool CIndividualModel::bucketStatsAvailable(core_t::TTime time) const -{ - return time >= this->currentBucketStartTime() - && time < this->currentBucketStartTime() + this->bucketLength(); +bool CIndividualModel::bucketStatsAvailable(core_t::TTime time) const { + return time >= this->currentBucketStartTime() && time < this->currentBucketStartTime() + this->bucketLength(); } -void CIndividualModel::sampleBucketStatistics(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor &resourceMonitor) -{ - CDataGatherer &gatherer = this->dataGatherer(); +void CIndividualModel::sampleBucketStatistics(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) { + CDataGatherer& gatherer = this->dataGatherer(); - if (!gatherer.dataAvailable(startTime)) - { + if (!gatherer.dataAvailable(startTime)) { return; } - for (core_t::TTime time = startTime, bucketLength = gatherer.bucketLength(); - time < endTime; - time += bucketLength) - { + for (core_t::TTime time = startTime, bucketLength = gatherer.bucketLength(); time < endTime; time += bucketLength) { this->CAnomalyDetectorModel::sampleBucketStatistics(time, time + bucketLength, resourceMonitor); // Currently, we only remember one bucket. this->currentBucketStartTime(time); - TSizeUInt64PrVec &personCounts = this->currentBucketPersonCounts(); + TSizeUInt64PrVec& personCounts = this->currentBucketPersonCounts(); gatherer.personNonZeroCounts(time, personCounts); this->applyFilter(model_t::E_XF_By, false, this->personFilter(), personCounts); } } -void CIndividualModel::sampleOutOfPhase(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor &resourceMonitor) -{ - CDataGatherer &gatherer = this->dataGatherer(); - if (!gatherer.dataAvailable(startTime)) - { +void CIndividualModel::sampleOutOfPhase(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) { + CDataGatherer& gatherer = this->dataGatherer(); + if (!gatherer.dataAvailable(startTime)) { return; } - for (core_t::TTime time = startTime, bucketLength = gatherer.bucketLength(); - time < endTime; - time += bucketLength) - { + for (core_t::TTime time = startTime, bucketLength = gatherer.bucketLength(); time < endTime; time += bucketLength) { gatherer.sampleNow(time); this->sampleBucketStatistics(time, time + bucketLength, resourceMonitor); } } -void CIndividualModel::sample(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor &resourceMonitor) -{ - const CDataGatherer &gatherer = this->dataGatherer(); +void CIndividualModel::sample(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) { + const CDataGatherer& gatherer = this->dataGatherer(); - for (core_t::TTime time = startTime, bucketLength = gatherer.bucketLength(); - time < endTime; - time += bucketLength) - { + for (core_t::TTime time = startTime, bucketLength = gatherer.bucketLength(); time < endTime; time += bucketLength) { this->CAnomalyDetectorModel::sample(time, time + bucketLength, resourceMonitor); this->currentBucketStartTime(time); - TSizeUInt64PrVec &personCounts = this->currentBucketPersonCounts(); + TSizeUInt64PrVec& personCounts = this->currentBucketPersonCounts(); gatherer.personNonZeroCounts(time, personCounts); - for (const auto &count : personCounts) - { + for (const auto& count : personCounts) { std::size_t pid = count.first; - if (CAnomalyDetectorModel::isTimeUnset(m_FirstBucketTimes[pid])) - { + if (CAnomalyDetectorModel::isTimeUnset(m_FirstBucketTimes[pid])) { m_FirstBucketTimes[pid] = time; } m_LastBucketTimes[pid] = time; @@ -243,36 +191,28 @@ void CIndividualModel::sample(core_t::TTime startTime, } } -void CIndividualModel::prune(std::size_t maximumAge) -{ +void CIndividualModel::prune(std::size_t maximumAge) { core_t::TTime time = this->currentBucketStartTime(); - if (time <= 0) - { + if (time <= 0) { return; } - CDataGatherer &gatherer = this->dataGatherer(); + CDataGatherer& gatherer = this->dataGatherer(); TSizeVec peopleToRemove; - for (std::size_t pid = 0u; pid < m_LastBucketTimes.size(); ++pid) - { - if (gatherer.isPersonActive(pid) && !CAnomalyDetectorModel::isTimeUnset(m_LastBucketTimes[pid])) - { - std::size_t bucketsSinceLastEvent = static_cast( - (time - m_LastBucketTimes[pid]) / gatherer.bucketLength()); - if (bucketsSinceLastEvent > maximumAge) - { + for (std::size_t pid = 0u; pid < m_LastBucketTimes.size(); ++pid) { + if (gatherer.isPersonActive(pid) && !CAnomalyDetectorModel::isTimeUnset(m_LastBucketTimes[pid])) { + std::size_t bucketsSinceLastEvent = static_cast((time - m_LastBucketTimes[pid]) / gatherer.bucketLength()); + if (bucketsSinceLastEvent > maximumAge) { LOG_TRACE(gatherer.personName(pid) - << ", bucketsSinceLastEvent = " << bucketsSinceLastEvent - << ", maximumAge = " << maximumAge); + << ", bucketsSinceLastEvent = " << bucketsSinceLastEvent << ", maximumAge = " << maximumAge); peopleToRemove.push_back(pid); } } } - if (peopleToRemove.empty()) - { + if (peopleToRemove.empty()) { return; } @@ -284,53 +224,44 @@ void CIndividualModel::prune(std::size_t maximumAge) this->clearPrunedResources(peopleToRemove, TSizeVec()); } -bool CIndividualModel::computeTotalProbability(const std::string &/*person*/, +bool CIndividualModel::computeTotalProbability(const std::string& /*person*/, std::size_t /*numberAttributeProbabilities*/, - TOptionalDouble &probability, - TAttributeProbability1Vec &attributeProbabilities) const -{ + TOptionalDouble& probability, + TAttributeProbability1Vec& attributeProbabilities) const { probability = TOptionalDouble(); attributeProbabilities.clear(); return true; } -uint64_t CIndividualModel::checksum(bool includeCurrentBucketStats) const -{ +uint64_t CIndividualModel::checksum(bool includeCurrentBucketStats) const { uint64_t seed = this->CAnomalyDetectorModel::checksum(includeCurrentBucketStats); TStrCRefUInt64Map hashes1; - const CDataGatherer &gatherer = this->dataGatherer(); + const CDataGatherer& gatherer = this->dataGatherer(); hashActive(gatherer, m_FirstBucketTimes, hashes1); hashActive(gatherer, m_LastBucketTimes, hashes1); - for (const auto &feature : m_FeatureModels) - { + for (const auto& feature : m_FeatureModels) { hashActive(gatherer, feature.s_Models, hashes1); } TStrCRefStrCRefPrUInt64Map hashes2; - for (const auto &feature : m_FeatureCorrelatesModels) - { - for (const auto &model : feature.s_Models->correlationModels()) - { + for (const auto& feature : m_FeatureCorrelatesModels) { + for (const auto& model : feature.s_Models->correlationModels()) { std::size_t pids[]{model.first.first, model.first.second}; - if (gatherer.isPersonActive(pids[0]) && gatherer.isPersonActive(pids[1])) - { - uint64_t &hash = hashes2[{boost::cref(this->personName(pids[0])), - boost::cref(this->personName(pids[1]))}]; + if (gatherer.isPersonActive(pids[0]) && gatherer.isPersonActive(pids[1])) { + uint64_t& hash = hashes2[{boost::cref(this->personName(pids[0])), boost::cref(this->personName(pids[1]))}]; hash = maths::CChecksum::calculate(hash, model.second); } } } - if (includeCurrentBucketStats) - { + if (includeCurrentBucketStats) { seed = maths::CChecksum::calculate(seed, this->currentBucketStartTime()); - const TSizeUInt64PrVec &personCounts = this->currentBucketPersonCounts(); - for (const auto &count : personCounts) - { - uint64_t &hash = hashes1[boost::cref(this->personName(count.first))]; + const TSizeUInt64PrVec& personCounts = this->currentBucketPersonCounts(); + for (const auto& count : personCounts) { + uint64_t& hash = hashes1[boost::cref(this->personName(count.first))]; hash = maths::CChecksum::calculate(hash, count.second); } } @@ -343,8 +274,7 @@ uint64_t CIndividualModel::checksum(bool includeCurrentBucketStats) const return maths::CChecksum::calculate(seed, hashes2); } -void CIndividualModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CIndividualModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CIndividualModel"); this->CAnomalyDetectorModel::debugMemoryUsage(mem->addChild()); core::CMemoryDebug::dynamicSize("m_FirstBucketTimes", m_FirstBucketTimes, mem); @@ -354,17 +284,14 @@ void CIndividualModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) core::CMemoryDebug::dynamicSize("m_MemoryEstimator", m_MemoryEstimator, mem); } -std::size_t CIndividualModel::memoryUsage() const -{ - const CDataGatherer &gatherer = this->dataGatherer(); - TOptionalSize estimate = this->estimateMemoryUsage(gatherer.numberActivePeople(), - gatherer.numberActiveAttributes(), - this->numberCorrelations()); +std::size_t CIndividualModel::memoryUsage() const { + const CDataGatherer& gatherer = this->dataGatherer(); + TOptionalSize estimate = + this->estimateMemoryUsage(gatherer.numberActivePeople(), gatherer.numberActiveAttributes(), this->numberCorrelations()); return estimate ? estimate.get() : this->computeMemoryUsage(); } -std::size_t CIndividualModel::computeMemoryUsage() const -{ +std::size_t CIndividualModel::computeMemoryUsage() const { std::size_t mem = this->CAnomalyDetectorModel::memoryUsage(); mem += core::CMemory::dynamicSize(m_FirstBucketTimes); mem += core::CMemory::dynamicSize(m_LastBucketTimes); @@ -374,44 +301,33 @@ std::size_t CIndividualModel::computeMemoryUsage() const return mem; } -CMemoryUsageEstimator *CIndividualModel::memoryUsageEstimator() const -{ +CMemoryUsageEstimator* CIndividualModel::memoryUsageEstimator() const { return &m_MemoryEstimator; } -std::size_t CIndividualModel::staticSize() const -{ +std::size_t CIndividualModel::staticSize() const { return sizeof(*this); } -void CIndividualModel::doAcceptPersistInserter(core::CStatePersistInserter &inserter) const -{ - inserter.insertValue(WINDOW_BUCKET_COUNT_TAG, - this->windowBucketCount(), - core::CIEEE754::E_SinglePrecision); +void CIndividualModel::doAcceptPersistInserter(core::CStatePersistInserter& inserter) const { + inserter.insertValue(WINDOW_BUCKET_COUNT_TAG, this->windowBucketCount(), core::CIEEE754::E_SinglePrecision); core::CPersistUtils::persist(PERSON_BUCKET_COUNT_TAG, this->personBucketCounts(), inserter); core::CPersistUtils::persist(FIRST_BUCKET_TIME_TAG, m_FirstBucketTimes, inserter); core::CPersistUtils::persist(LAST_BUCKET_TIME_TAG, m_LastBucketTimes, inserter); - for (const auto &feature : m_FeatureModels) - { - inserter.insertLevel(FEATURE_MODELS_TAG, - boost::bind(&SFeatureModels::acceptPersistInserter, &feature, _1)); + for (const auto& feature : m_FeatureModels) { + inserter.insertLevel(FEATURE_MODELS_TAG, boost::bind(&SFeatureModels::acceptPersistInserter, &feature, _1)); } - for (const auto &feature : m_FeatureCorrelatesModels) - { - inserter.insertLevel(FEATURE_CORRELATE_MODELS_TAG, - boost::bind(&SFeatureCorrelateModels::acceptPersistInserter, &feature, _1)); + for (const auto& feature : m_FeatureCorrelatesModels) { + inserter.insertLevel(FEATURE_CORRELATE_MODELS_TAG, boost::bind(&SFeatureCorrelateModels::acceptPersistInserter, &feature, _1)); } this->interimBucketCorrectorAcceptPersistInserter(INTERIM_BUCKET_CORRECTOR_TAG, inserter); core::CPersistUtils::persist(MEMORY_ESTIMATOR_TAG, m_MemoryEstimator, inserter); } -bool CIndividualModel::doAcceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ +bool CIndividualModel::doAcceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { std::size_t i = 0u, j = 0u; - do - { - const std::string &name = traverser.name(); + do { + const std::string& name = traverser.name(); RESTORE_SETUP_TEARDOWN(WINDOW_BUCKET_COUNT_TAG, double count, core::CStringUtils::stringToType(traverser.value(), count), @@ -420,26 +336,22 @@ bool CIndividualModel::doAcceptRestoreTraverser(core::CStateRestoreTraverser &tr RESTORE(FIRST_BUCKET_TIME_TAG, core::CPersistUtils::restore(name, m_FirstBucketTimes, traverser)) RESTORE(LAST_BUCKET_TIME_TAG, core::CPersistUtils::restore(name, m_LastBucketTimes, traverser)) RESTORE(FEATURE_MODELS_TAG, - i == m_FeatureModels.size() - || traverser.traverseSubLevel(boost::bind(&SFeatureModels::acceptRestoreTraverser, - &m_FeatureModels[i++], boost::cref(this->params()), _1))) - RESTORE(FEATURE_CORRELATE_MODELS_TAG, - j == m_FeatureCorrelatesModels.size() - || traverser.traverseSubLevel(boost::bind(&SFeatureCorrelateModels::acceptRestoreTraverser, - &m_FeatureCorrelatesModels[j++], boost::cref(this->params()), _1))) + i == m_FeatureModels.size() || + traverser.traverseSubLevel( + boost::bind(&SFeatureModels::acceptRestoreTraverser, &m_FeatureModels[i++], boost::cref(this->params()), _1))) + RESTORE( + FEATURE_CORRELATE_MODELS_TAG, + j == m_FeatureCorrelatesModels.size() || + traverser.traverseSubLevel(boost::bind( + &SFeatureCorrelateModels::acceptRestoreTraverser, &m_FeatureCorrelatesModels[j++], boost::cref(this->params()), _1))) RESTORE(INTERIM_BUCKET_CORRECTOR_TAG, this->interimBucketCorrectorAcceptRestoreTraverser(traverser)) RESTORE(MEMORY_ESTIMATOR_TAG, core::CPersistUtils::restore(MEMORY_ESTIMATOR_TAG, m_MemoryEstimator, traverser)) - } - while (traverser.next()); + } while (traverser.next()); - for (auto &feature : m_FeatureModels) - { - for (auto &model : feature.s_Models) - { - for (const auto &correlates : m_FeatureCorrelatesModels) - { - if (feature.s_Feature == correlates.s_Feature) - { + for (auto& feature : m_FeatureModels) { + for (auto& model : feature.s_Models) { + for (const auto& correlates : m_FeatureCorrelatesModels) { + if (feature.s_Feature == correlates.s_Feature) { model->modelCorrelations(*correlates.s_Models); } } @@ -449,49 +361,40 @@ bool CIndividualModel::doAcceptRestoreTraverser(core::CStateRestoreTraverser &tr return true; } -void CIndividualModel::createUpdateNewModels(core_t::TTime time, CResourceMonitor &resourceMonitor) -{ +void CIndividualModel::createUpdateNewModels(core_t::TTime time, CResourceMonitor& resourceMonitor) { this->updateRecycledModels(); - CDataGatherer &gatherer = this->dataGatherer(); + CDataGatherer& gatherer = this->dataGatherer(); std::size_t numberExistingPeople = m_FirstBucketTimes.size(); std::size_t numberCorrelations = this->numberCorrelations(); - TOptionalSize usageEstimate = this->estimateMemoryUsage(std::min(numberExistingPeople, - gatherer.numberActivePeople()), + TOptionalSize usageEstimate = this->estimateMemoryUsage(std::min(numberExistingPeople, gatherer.numberActivePeople()), 0, // # attributes numberCorrelations); std::size_t ourUsage = usageEstimate ? usageEstimate.get() : this->computeMemoryUsage(); std::size_t resourceLimit = ourUsage + resourceMonitor.allocationLimit(); std::size_t numberNewPeople = gatherer.numberPeople(); - numberNewPeople = numberNewPeople > numberExistingPeople ? - numberNewPeople - numberExistingPeople : 0; + numberNewPeople = numberNewPeople > numberExistingPeople ? numberNewPeople - numberExistingPeople : 0; - while ( numberNewPeople > 0 - && resourceMonitor.areAllocationsAllowed() - && (resourceMonitor.haveNoLimit() || ourUsage < resourceLimit)) - { + while (numberNewPeople > 0 && resourceMonitor.areAllocationsAllowed() && (resourceMonitor.haveNoLimit() || ourUsage < resourceLimit)) { // We batch people in CHUNK_SIZE (500) and create models in chunks // and test usage after each chunk. std::size_t numberToCreate = std::min(numberNewPeople, CHUNK_SIZE); - LOG_TRACE("Creating batch of " << numberToCreate << " people of remaining " << numberNewPeople << ". " - << resourceLimit - ourUsage << " free bytes remaining"); + LOG_TRACE("Creating batch of " << numberToCreate << " people of remaining " << numberNewPeople << ". " << resourceLimit - ourUsage + << " free bytes remaining"); this->createNewModels(numberToCreate, 0); numberExistingPeople += numberToCreate; numberNewPeople -= numberToCreate; - if (numberNewPeople > 0 && resourceMonitor.haveNoLimit() == false) - { + if (numberNewPeople > 0 && resourceMonitor.haveNoLimit() == false) { ourUsage = this->estimateMemoryUsageOrComputeAndUpdate(numberExistingPeople, 0, numberCorrelations); } } - if (numberNewPeople > 0) - { + if (numberNewPeople > 0) { resourceMonitor.acceptAllocationFailureResult(time); LOG_DEBUG("Not enough memory to create models"); - core::CStatistics::instance().stat(stat_t::E_NumberMemoryLimitModelCreationFailures). - increment(numberNewPeople); + core::CStatistics::instance().stat(stat_t::E_NumberMemoryLimitModelCreationFailures).increment(numberNewPeople); std::size_t toRemove = gatherer.numberPeople() - numberNewPeople; gatherer.removePeople(toRemove); } @@ -499,23 +402,17 @@ void CIndividualModel::createUpdateNewModels(core_t::TTime time, CResourceMonito this->refreshCorrelationModels(resourceLimit, resourceMonitor); } -void CIndividualModel::createNewModels(std::size_t n, std::size_t m) -{ - if (n > 0) - { +void CIndividualModel::createNewModels(std::size_t n, std::size_t m) { + if (n > 0) { std::size_t newN = m_FirstBucketTimes.size() + n; core::CAllocationStrategy::resize(m_FirstBucketTimes, newN, CAnomalyDetectorModel::TIME_UNSET); core::CAllocationStrategy::resize(m_LastBucketTimes, newN, CAnomalyDetectorModel::TIME_UNSET); - for (auto &feature : m_FeatureModels) - { + for (auto& feature : m_FeatureModels) { core::CAllocationStrategy::reserve(feature.s_Models, newN); - for (std::size_t pid = feature.s_Models.size(); pid < newN; ++pid) - { + for (std::size_t pid = feature.s_Models.size(); pid < newN; ++pid) { feature.s_Models.emplace_back(feature.s_NewModel->clone(pid)); - for (const auto &correlates : m_FeatureCorrelatesModels) - { - if (feature.s_Feature == correlates.s_Feature) - { + for (const auto& correlates : m_FeatureCorrelatesModels) { + if (feature.s_Feature == correlates.s_Feature) { feature.s_Models.back()->modelCorrelations(*correlates.s_Models); } } @@ -525,19 +422,14 @@ void CIndividualModel::createNewModels(std::size_t n, std::size_t m) this->CAnomalyDetectorModel::createNewModels(n, m); } -void CIndividualModel::updateRecycledModels() -{ - for (auto pid : this->dataGatherer().recycledPersonIds()) - { +void CIndividualModel::updateRecycledModels() { + for (auto pid : this->dataGatherer().recycledPersonIds()) { m_FirstBucketTimes[pid] = CAnomalyDetectorModel::TIME_UNSET; - m_LastBucketTimes[pid] = CAnomalyDetectorModel::TIME_UNSET; - for (auto &feature : m_FeatureModels) - { + m_LastBucketTimes[pid] = CAnomalyDetectorModel::TIME_UNSET; + for (auto& feature : m_FeatureModels) { feature.s_Models[pid].reset(feature.s_NewModel->clone(pid)); - for (const auto &correlates : m_FeatureCorrelatesModels) - { - if (feature.s_Feature == correlates.s_Feature) - { + for (const auto& correlates : m_FeatureCorrelatesModels) { + if (feature.s_Feature == correlates.s_Feature) { feature.s_Models.back()->modelCorrelations(*correlates.s_Models); } } @@ -546,83 +438,60 @@ void CIndividualModel::updateRecycledModels() this->CAnomalyDetectorModel::updateRecycledModels(); } -void CIndividualModel::refreshCorrelationModels(std::size_t resourceLimit, - CResourceMonitor &resourceMonitor) -{ +void CIndividualModel::refreshCorrelationModels(std::size_t resourceLimit, CResourceMonitor& resourceMonitor) { std::size_t n = this->numberOfPeople(); double maxNumberCorrelations = this->params().s_CorrelationModelsOverhead * static_cast(n); auto memoryUsage = boost::bind(&CAnomalyDetectorModel::estimateMemoryUsageOrComputeAndUpdate, this, n, 0, _1); - CTimeSeriesCorrelateModelAllocator allocator(resourceMonitor, memoryUsage, resourceLimit, - static_cast(maxNumberCorrelations)); - for (auto &feature : m_FeatureCorrelatesModels) - { + CTimeSeriesCorrelateModelAllocator allocator( + resourceMonitor, memoryUsage, resourceLimit, static_cast(maxNumberCorrelations)); + for (auto& feature : m_FeatureCorrelatesModels) { allocator.prototypePrior(feature.s_ModelPrior); feature.s_Models->refresh(allocator); } } -void CIndividualModel::clearPrunedResources(const TSizeVec &people, const TSizeVec &/*attributes*/) -{ - for (auto pid : people) - { - for (auto &feature : m_FeatureModels) - { +void CIndividualModel::clearPrunedResources(const TSizeVec& people, const TSizeVec& /*attributes*/) { + for (auto pid : people) { + for (auto& feature : m_FeatureModels) { feature.s_Models[pid].reset(this->tinyModel()); } } } -double CIndividualModel::emptyBucketWeight(model_t::EFeature feature, - std::size_t pid, - core_t::TTime time) const -{ +double CIndividualModel::emptyBucketWeight(model_t::EFeature feature, std::size_t pid, core_t::TTime time) const { double result = 1.0; - if (model_t::countsEmptyBuckets(feature)) - { + if (model_t::countsEmptyBuckets(feature)) { TOptionalUInt64 count = this->currentBucketCount(pid, time); - if (!count || *count == 0) - { + if (!count || *count == 0) { double frequency = this->personFrequency(pid); - result = model_t::emptyBucketCountWeight(feature, frequency, - this->params().s_CutoffToModelEmptyBuckets); + result = model_t::emptyBucketCountWeight(feature, frequency, this->params().s_CutoffToModelEmptyBuckets); } } return result; } -double CIndividualModel::probabilityBucketEmpty(model_t::EFeature feature, std::size_t pid) const -{ +double CIndividualModel::probabilityBucketEmpty(model_t::EFeature feature, std::size_t pid) const { double result = 0.0; - if (model_t::countsEmptyBuckets(feature)) - { + if (model_t::countsEmptyBuckets(feature)) { double frequency = this->personFrequency(pid); - double emptyBucketWeight = model_t::emptyBucketCountWeight( - feature, frequency, - this->params().s_CutoffToModelEmptyBuckets); + double emptyBucketWeight = model_t::emptyBucketCountWeight(feature, frequency, this->params().s_CutoffToModelEmptyBuckets); result = (1.0 - frequency) * (1.0 - emptyBucketWeight); } return result; } -const maths::CModel *CIndividualModel::model(model_t::EFeature feature, std::size_t pid) const -{ +const maths::CModel* CIndividualModel::model(model_t::EFeature feature, std::size_t pid) const { return const_cast(this)->model(feature, pid); } -maths::CModel *CIndividualModel::model(model_t::EFeature feature, std::size_t pid) -{ - auto i = std::find_if(m_FeatureModels.begin(), m_FeatureModels.end(), - [feature](const SFeatureModels &model) - { - return model.s_Feature == feature; - }); +maths::CModel* CIndividualModel::model(model_t::EFeature feature, std::size_t pid) { + auto i = std::find_if( + m_FeatureModels.begin(), m_FeatureModels.end(), [feature](const SFeatureModels& model) { return model.s_Feature == feature; }); return i != m_FeatureModels.end() && pid < i->s_Models.size() ? i->s_Models[pid].get() : 0; } -void CIndividualModel::sampleCorrelateModels(const maths_t::TWeightStyleVec &weightStyles) -{ - for (const auto &feature : m_FeatureCorrelatesModels) - { +void CIndividualModel::sampleCorrelateModels(const maths_t::TWeightStyleVec& weightStyles) { + for (const auto& feature : m_FeatureCorrelatesModels) { feature.s_Models->processSamples(weightStyles); } } @@ -630,91 +499,71 @@ void CIndividualModel::sampleCorrelateModels(const maths_t::TWeightStyleVec &wei void CIndividualModel::correctBaselineForInterim(model_t::EFeature feature, std::size_t pid, model_t::CResultType type, - const TSizeDoublePr1Vec &correlated, - const TFeatureSizeSizeTripleDouble1VecUMap &corrections, - TDouble1Vec &result) const -{ - if (type.isInterim() && model_t::requiresInterimResultAdjustment(feature)) - { + const TSizeDoublePr1Vec& correlated, + const TFeatureSizeSizeTripleDouble1VecUMap& corrections, + TDouble1Vec& result) const { + if (type.isInterim() && model_t::requiresInterimResultAdjustment(feature)) { TFeatureSizeSizeTriple key(feature, pid, pid); - switch (type.asConditionalOrUnconditional()) - { + switch (type.asConditionalOrUnconditional()) { case model_t::CResultType::E_Unconditional: break; case model_t::CResultType::E_Conditional: - if (!correlated.empty()) - { + if (!correlated.empty()) { key.third = correlated[0].first; } break; } auto correction = corrections.find(key); - if (correction != corrections.end()) - { + if (correction != corrections.end()) { result -= correction->second; } } } -const CIndividualModel::TTimeVec &CIndividualModel::firstBucketTimes() const -{ +const CIndividualModel::TTimeVec& CIndividualModel::firstBucketTimes() const { return m_FirstBucketTimes; } -const CIndividualModel::TTimeVec &CIndividualModel::lastBucketTimes() const -{ +const CIndividualModel::TTimeVec& CIndividualModel::lastBucketTimes() const { return m_LastBucketTimes; } -double CIndividualModel::derate(std::size_t pid, core_t::TTime time) const -{ - return std::max(1.0 - static_cast(time - m_FirstBucketTimes[pid]) - / static_cast(3 * core::constants::WEEK), 0.0); +double CIndividualModel::derate(std::size_t pid, core_t::TTime time) const { + return std::max(1.0 - static_cast(time - m_FirstBucketTimes[pid]) / static_cast(3 * core::constants::WEEK), 0.0); } -std::string CIndividualModel::printCurrentBucket() const -{ +std::string CIndividualModel::printCurrentBucket() const { std::ostringstream result; - result << "[" << this->currentBucketStartTime() << "," - << this->currentBucketStartTime() + this->bucketLength() << ")"; + result << "[" << this->currentBucketStartTime() << "," << this->currentBucketStartTime() + this->bucketLength() << ")"; return result.str(); } -std::size_t CIndividualModel::numberCorrelations() const -{ +std::size_t CIndividualModel::numberCorrelations() const { std::size_t result = 0u; - for (const auto &feature : m_FeatureCorrelatesModels) - { + for (const auto& feature : m_FeatureCorrelatesModels) { result += feature.s_Models->correlationModels().size(); } return result; } -double CIndividualModel::attributeFrequency(std::size_t /*cid*/) const -{ +double CIndividualModel::attributeFrequency(std::size_t /*cid*/) const { return 1.0; } -void CIndividualModel::doSkipSampling(core_t::TTime startTime, core_t::TTime endTime) -{ +void CIndividualModel::doSkipSampling(core_t::TTime startTime, core_t::TTime endTime) { core_t::TTime gap = endTime - startTime; - for (auto &time : m_LastBucketTimes) - { - if (!CAnomalyDetectorModel::isTimeUnset(time)) - { + for (auto& time : m_LastBucketTimes) { + if (!CAnomalyDetectorModel::isTimeUnset(time)) { time = time + gap; } } - for (auto &feature : m_FeatureModels) - { - for (auto &model : feature.s_Models) - { + for (auto& feature : m_FeatureModels) { + for (auto& model : feature.s_Models) { model->skipTime(gap); } } } - } } diff --git a/lib/model/CInterimBucketCorrector.cc b/lib/model/CInterimBucketCorrector.cc index 5e379e4fac..afa4702905 100644 --- a/lib/model/CInterimBucketCorrector.cc +++ b/lib/model/CInterimBucketCorrector.cc @@ -18,46 +18,35 @@ #include -namespace ml -{ -namespace model -{ -namespace -{ +namespace ml { +namespace model { +namespace { const std::size_t COMPONENT_SIZE(24); const std::string COUNT_TREND_TAG("a"); const std::string COUNT_MEAN_TAG("b"); -double decayRate(core_t::TTime bucketLength) -{ - return CAnomalyDetectorModelConfig::DEFAULT_DECAY_RATE - * CAnomalyDetectorModelConfig::bucketNormalizationFactor(bucketLength); +double decayRate(core_t::TTime bucketLength) { + return CAnomalyDetectorModelConfig::DEFAULT_DECAY_RATE * CAnomalyDetectorModelConfig::bucketNormalizationFactor(bucketLength); } -double trendDecayRate(core_t::TTime bucketLength) -{ +double trendDecayRate(core_t::TTime bucketLength) { return CAnomalyDetectorModelConfig::trendDecayRate(decayRate(bucketLength), bucketLength); } } CInterimBucketCorrector::CInterimBucketCorrector(core_t::TTime bucketLength) - : m_BucketLength(bucketLength), - m_CountTrend(trendDecayRate(bucketLength), bucketLength, COMPONENT_SIZE) -{} - -CInterimBucketCorrector::CInterimBucketCorrector(const CInterimBucketCorrector &other) - : m_BucketLength(other.m_BucketLength), - m_CountTrend(other.m_CountTrend), - m_CountMean(other.m_CountMean) -{} - -core_t::TTime CInterimBucketCorrector::calcBucketMidPoint(core_t::TTime time) const -{ + : m_BucketLength(bucketLength), m_CountTrend(trendDecayRate(bucketLength), bucketLength, COMPONENT_SIZE) { +} + +CInterimBucketCorrector::CInterimBucketCorrector(const CInterimBucketCorrector& other) + : m_BucketLength(other.m_BucketLength), m_CountTrend(other.m_CountTrend), m_CountMean(other.m_CountMean) { +} + +core_t::TTime CInterimBucketCorrector::calcBucketMidPoint(core_t::TTime time) const { return maths::CIntegerTools::floor(time, m_BucketLength) + m_BucketLength / 2; } -void CInterimBucketCorrector::update(core_t::TTime time, std::size_t bucketCount) -{ +void CInterimBucketCorrector::update(core_t::TTime time, std::size_t bucketCount) { core_t::TTime bucketMidPoint = this->calcBucketMidPoint(time); m_CountTrend.addPoint(bucketMidPoint, static_cast(bucketCount)); @@ -67,87 +56,60 @@ void CInterimBucketCorrector::update(core_t::TTime time, std::size_t bucketCount m_CountMean.add(bucketCount); } -double CInterimBucketCorrector::estimateBucketCompleteness(core_t::TTime time, - std::size_t currentCount) const -{ +double CInterimBucketCorrector::estimateBucketCompleteness(core_t::TTime time, std::size_t currentCount) const { core_t::TTime bucketMidPoint = this->calcBucketMidPoint(time); - double bucketCount = m_CountTrend.initialized() ? - maths::CBasicStatistics::mean(m_CountTrend.value(bucketMidPoint)) : - maths::CBasicStatistics::mean(m_CountMean); - return bucketCount > 0.0 ? - maths::CTools::truncate( static_cast(currentCount) - / bucketCount, 0.0, 1.0) : 1.0; -} - -double CInterimBucketCorrector::corrections(core_t::TTime time, - std::size_t currentCount, - double mode, - double value) const -{ + double bucketCount = m_CountTrend.initialized() ? maths::CBasicStatistics::mean(m_CountTrend.value(bucketMidPoint)) + : maths::CBasicStatistics::mean(m_CountMean); + return bucketCount > 0.0 ? maths::CTools::truncate(static_cast(currentCount) / bucketCount, 0.0, 1.0) : 1.0; +} + +double CInterimBucketCorrector::corrections(core_t::TTime time, std::size_t currentCount, double mode, double value) const { double correction = (1.0 - this->estimateBucketCompleteness(time, currentCount)) * mode; - return maths::CTools::truncate(mode - value, - std::min(0.0, correction), - std::max(0.0, correction)); + return maths::CTools::truncate(mode - value, std::min(0.0, correction), std::max(0.0, correction)); } -CInterimBucketCorrector::TDouble10Vec - CInterimBucketCorrector::corrections(core_t::TTime time, - std::size_t currentCount, - const TDouble10Vec &modes, - const TDouble10Vec &values) const -{ +CInterimBucketCorrector::TDouble10Vec CInterimBucketCorrector::corrections(core_t::TTime time, + std::size_t currentCount, + const TDouble10Vec& modes, + const TDouble10Vec& values) const { TDouble10Vec corrections(values.size(), 0.0); double incompleteBucketFraction = 1.0 - this->estimateBucketCompleteness(time, currentCount); double correction = 0.0; - for (std::size_t i = 0; i < corrections.size(); ++i) - { + for (std::size_t i = 0; i < corrections.size(); ++i) { correction = incompleteBucketFraction * modes[i]; - corrections[i] = maths::CTools::truncate(modes[i] - values[i], - std::min(0.0, correction), - std::max(0.0, correction)); + corrections[i] = maths::CTools::truncate(modes[i] - values[i], std::min(0.0, correction), std::max(0.0, correction)); } return corrections; } -void CInterimBucketCorrector::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CInterimBucketCorrector::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CInterimBucketCorrector"); core::CMemoryDebug::dynamicSize("m_CountTrend", m_CountTrend, mem); } -std::size_t CInterimBucketCorrector::memoryUsage() const -{ +std::size_t CInterimBucketCorrector::memoryUsage() const { return core::CMemory::dynamicSize(m_CountTrend); } -void CInterimBucketCorrector::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CInterimBucketCorrector::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(COUNT_MEAN_TAG, m_CountMean.toDelimited()); core::CPersistUtils::persist(COUNT_TREND_TAG, m_CountTrend, inserter); } -bool CInterimBucketCorrector::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name = traverser.name(); - if (name == COUNT_TREND_TAG) - { - maths::SDistributionRestoreParams changeModelParams{maths_t::E_ContinuousData, - decayRate(m_BucketLength)}; - maths::STimeSeriesDecompositionRestoreParams params{trendDecayRate(m_BucketLength), - m_BucketLength, - COMPONENT_SIZE, - changeModelParams}; +bool CInterimBucketCorrector::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); + if (name == COUNT_TREND_TAG) { + maths::SDistributionRestoreParams changeModelParams{maths_t::E_ContinuousData, decayRate(m_BucketLength)}; + maths::STimeSeriesDecompositionRestoreParams params{ + trendDecayRate(m_BucketLength), m_BucketLength, COMPONENT_SIZE, changeModelParams}; maths::CTimeSeriesDecomposition restored(params, traverser); m_CountTrend.swap(restored); continue; } RESTORE(COUNT_MEAN_TAG, m_CountMean.fromDelimited(traverser.value())) - } - while (traverser.next()); + } while (traverser.next()); return true; } - } } diff --git a/lib/model/CLimits.cc b/lib/model/CLimits.cc index 711204954c..cf3cc0fbd9 100644 --- a/lib/model/CLimits.cc +++ b/lib/model/CLimits.cc @@ -10,12 +10,8 @@ #include - -namespace ml -{ -namespace model -{ - +namespace ml { +namespace model { // Initialise statics const size_t CLimits::DEFAULT_AUTOCONFIG_EVENTS(10000); @@ -25,63 +21,42 @@ const size_t CLimits::DEFAULT_RESULTS_MAX_EXAMPLES(4); // The probability threshold is stored as a percentage in the config file const double CLimits::DEFAULT_RESULTS_UNUSUAL_PROBABILITY_THRESHOLD(3.5); - -CLimits::CLimits() : m_AutoConfigEvents(DEFAULT_AUTOCONFIG_EVENTS), - m_AnomalyMaxTimeBuckets(DEFAULT_ANOMALY_MAX_TIME_BUCKETS), - m_MaxExamples(DEFAULT_RESULTS_MAX_EXAMPLES), - m_UnusualProbabilityThreshold(DEFAULT_RESULTS_UNUSUAL_PROBABILITY_THRESHOLD), - m_MemoryLimitMB(CResourceMonitor::DEFAULT_MEMORY_LIMIT_MB), - m_ResourceMonitor() -{ +CLimits::CLimits() + : m_AutoConfigEvents(DEFAULT_AUTOCONFIG_EVENTS), + m_AnomalyMaxTimeBuckets(DEFAULT_ANOMALY_MAX_TIME_BUCKETS), + m_MaxExamples(DEFAULT_RESULTS_MAX_EXAMPLES), + m_UnusualProbabilityThreshold(DEFAULT_RESULTS_UNUSUAL_PROBABILITY_THRESHOLD), + m_MemoryLimitMB(CResourceMonitor::DEFAULT_MEMORY_LIMIT_MB), + m_ResourceMonitor() { } -CLimits::~CLimits() -{ +CLimits::~CLimits() { } -bool CLimits::init(const std::string &configFile) -{ +bool CLimits::init(const std::string& configFile) { boost::property_tree::ptree propTree; - try - { + try { std::ifstream strm(configFile.c_str()); - if (!strm.is_open()) - { + if (!strm.is_open()) { LOG_ERROR("Error opening config file " << configFile); return false; } this->skipUtf8Bom(strm); boost::property_tree::ini_parser::read_ini(strm, propTree); - } - catch (boost::property_tree::ptree_error &e) - { - LOG_ERROR("Error reading config file " << configFile << - " : " << e.what()); + } catch (boost::property_tree::ptree_error& e) { + LOG_ERROR("Error reading config file " << configFile << " : " << e.what()); return false; } - if (this->processSetting(propTree, - "autoconfig.events", - DEFAULT_AUTOCONFIG_EVENTS, - m_AutoConfigEvents) == false || - this->processSetting(propTree, - "anomaly.maxtimebuckets", - DEFAULT_ANOMALY_MAX_TIME_BUCKETS, - m_AnomalyMaxTimeBuckets) == false || - this->processSetting(propTree, - "results.maxexamples", - DEFAULT_RESULTS_MAX_EXAMPLES, - m_MaxExamples) == false || + if (this->processSetting(propTree, "autoconfig.events", DEFAULT_AUTOCONFIG_EVENTS, m_AutoConfigEvents) == false || + this->processSetting(propTree, "anomaly.maxtimebuckets", DEFAULT_ANOMALY_MAX_TIME_BUCKETS, m_AnomalyMaxTimeBuckets) == false || + this->processSetting(propTree, "results.maxexamples", DEFAULT_RESULTS_MAX_EXAMPLES, m_MaxExamples) == false || this->processSetting(propTree, "results.unusualprobabilitythreshold", DEFAULT_RESULTS_UNUSUAL_PROBABILITY_THRESHOLD, m_UnusualProbabilityThreshold) == false || - this->processSetting(propTree, - "memory.modelmemorylimit", - CResourceMonitor::DEFAULT_MEMORY_LIMIT_MB, - m_MemoryLimitMB) == false) - { + this->processSetting(propTree, "memory.modelmemorylimit", CResourceMonitor::DEFAULT_MEMORY_LIMIT_MB, m_MemoryLimitMB) == false) { LOG_ERROR("Error processing config file " << configFile); return false; } @@ -91,50 +66,39 @@ bool CLimits::init(const std::string &configFile) return true; } -size_t CLimits::autoConfigEvents() const -{ +size_t CLimits::autoConfigEvents() const { return m_AutoConfigEvents; } -size_t CLimits::anomalyMaxTimeBuckets() const -{ +size_t CLimits::anomalyMaxTimeBuckets() const { return m_AnomalyMaxTimeBuckets; } -size_t CLimits::maxExamples() const -{ +size_t CLimits::maxExamples() const { return m_MaxExamples; } -double CLimits::unusualProbabilityThreshold() const -{ +double CLimits::unusualProbabilityThreshold() const { return m_UnusualProbabilityThreshold / 100.0; } -size_t CLimits::memoryLimitMB() const -{ +size_t CLimits::memoryLimitMB() const { return m_MemoryLimitMB; } -CResourceMonitor &CLimits::resourceMonitor() -{ +CResourceMonitor& CLimits::resourceMonitor() { return m_ResourceMonitor; } -void CLimits::skipUtf8Bom(std::ifstream &strm) -{ - if (strm.tellg() != std::streampos(0)) - { +void CLimits::skipUtf8Bom(std::ifstream& strm) { + if (strm.tellg() != std::streampos(0)) { return; } std::ios_base::iostate origState(strm.rdstate()); // The 3 bytes 0xEF, 0xBB, 0xBF form a UTF-8 byte order marker (BOM) - if (strm.get() == 0xEF) - { - if (strm.get() == 0xBB) - { - if (strm.get() == 0xBF) - { + if (strm.get() == 0xEF) { + if (strm.get() == 0xBB) { + if (strm.get() == 0xBF) { LOG_DEBUG("Skipping UTF-8 BOM"); return; } @@ -146,8 +110,5 @@ void CLimits::skipUtf8Bom(std::ifstream &strm) // There was no BOM, so seek back to the beginning of the file strm.seekg(0); } - - } } - diff --git a/lib/model/CMemoryUsageEstimator.cc b/lib/model/CMemoryUsageEstimator.cc index 4766a55a19..f218297348 100644 --- a/lib/model/CMemoryUsageEstimator.cc +++ b/lib/model/CMemoryUsageEstimator.cc @@ -19,13 +19,10 @@ #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { -namespace -{ +namespace { const std::size_t MAXIMUM_ESTIMATES_BEFORE_NEW_VALUE(10); const std::string VALUES_TAG("a"); } @@ -33,46 +30,37 @@ const std::string VALUES_TAG("a"); CMemoryUsageEstimator::CMemoryUsageEstimator() : m_Values(2 * E_NumberPredictors), // Initialise this so that the first estimate triggers a calculation - m_NumEstimatesSinceValue(MAXIMUM_ESTIMATES_BEFORE_NEW_VALUE - 1) -{ + m_NumEstimatesSinceValue(MAXIMUM_ESTIMATES_BEFORE_NEW_VALUE - 1) { } -CMemoryUsageEstimator::TOptionalSize -CMemoryUsageEstimator::estimate(const TSizeArray &predictors) -{ +CMemoryUsageEstimator::TOptionalSize CMemoryUsageEstimator::estimate(const TSizeArray& predictors) { using TDoubleArray = boost::array; - if (m_Values.size() < E_NumberPredictors) - { + if (m_Values.size() < E_NumberPredictors) { return TOptionalSize(); } - if (m_NumEstimatesSinceValue >= MAXIMUM_ESTIMATES_BEFORE_NEW_VALUE) - { + if (m_NumEstimatesSinceValue >= MAXIMUM_ESTIMATES_BEFORE_NEW_VALUE) { return TOptionalSize(); } std::size_t last = m_Values.size() - 1; TDoubleArray x0; - for (std::size_t i = 0u; i < m_Values[last].first.size(); ++i) - { + for (std::size_t i = 0u; i < m_Values[last].first.size(); ++i) { x0[i] = static_cast(m_Values[last].first[i]); } double c0 = static_cast(m_Values[last].second); bool origin = true; - for (std::size_t i = 0u; i < predictors.size(); ++i) - { + for (std::size_t i = 0u; i < predictors.size(); ++i) { origin &= (predictors[i] == 0); - if (predictors[i] - static_cast(x0[i]) > this->maximumExtrapolation(static_cast(i))) - { - LOG_TRACE("Sample too big for variance of predictor(" << i << "): " - << predictors[i] << " > " << this->maximumExtrapolation(static_cast(i))); + if (predictors[i] - static_cast(x0[i]) > this->maximumExtrapolation(static_cast(i))) { + LOG_TRACE("Sample too big for variance of predictor(" << i << "): " << predictors[i] << " > " + << this->maximumExtrapolation(static_cast(i))); return TOptionalSize(); } } - if (origin) - { + if (origin) { return TOptionalSize(); } @@ -81,19 +69,16 @@ CMemoryUsageEstimator::estimate(const TSizeArray &predictors) Eigen::MatrixXd X(m_Values.size(), static_cast(E_NumberPredictors)); Eigen::VectorXd y(m_Values.size()); - for (std::size_t i = 0u; i < m_Values.size(); i++) - { - for (std::size_t j = 0u; j < E_NumberPredictors; ++j) - { - X(i,j) = static_cast(m_Values[i].first[j]) - x0[j]; + for (std::size_t i = 0u; i < m_Values.size(); i++) { + for (std::size_t j = 0u; j < E_NumberPredictors; ++j) { + X(i, j) = static_cast(m_Values[i].first[j]) - x0[j]; } y(i) = static_cast(m_Values[i].second) - c0; } Eigen::MatrixXd theta = X.jacobiSvd(Eigen::ComputeThinU | Eigen::ComputeThinV).solve(y); double predicted = c0; - for (std::size_t i = 0u; i < E_NumberPredictors; ++i) - { + for (std::size_t i = 0u; i < E_NumberPredictors; ++i) { predicted += std::max(theta(i), 0.0) * (static_cast(predictors[i]) - x0[i]); } std::size_t mem = static_cast(predicted + 0.5); @@ -103,27 +88,21 @@ CMemoryUsageEstimator::estimate(const TSizeArray &predictors) return TOptionalSize(mem); } -void CMemoryUsageEstimator::addValue(const TSizeArray &predictors, std::size_t memory) -{ +void CMemoryUsageEstimator::addValue(const TSizeArray& predictors, std::size_t memory) { LOG_TRACE("Add Value for " << core::CContainerPrinter::print(predictors) << ": " << memory); m_NumEstimatesSinceValue = 0; - if (m_Values.size() == m_Values.capacity()) - { + if (m_Values.size() == m_Values.capacity()) { // Replace closest. std::size_t closest = 0u; std::size_t closestDistance = boost::numeric::bounds::highest(); - for (std::size_t i = 0u; closestDistance > 0 && i < m_Values.size(); ++i) - { + for (std::size_t i = 0u; closestDistance > 0 && i < m_Values.size(); ++i) { std::size_t distance = 0u; - for (std::size_t j = 0u; j < predictors.size(); ++j) - { - distance += std::max(m_Values[i].first[j], predictors[j]) - - std::min(m_Values[i].first[j], predictors[j]); + for (std::size_t j = 0u; j < predictors.size(); ++j) { + distance += std::max(m_Values[i].first[j], predictors[j]) - std::min(m_Values[i].first[j], predictors[j]); } - if (distance < closestDistance) - { + if (distance < closestDistance) { closest = i; closestDistance = distance; } @@ -134,47 +113,37 @@ void CMemoryUsageEstimator::addValue(const TSizeArray &predictors, std::size_t m core::CStatistics::stat(stat_t::E_NumberMemoryUsageChecks).increment(); } -void CMemoryUsageEstimator::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CMemoryUsageEstimator::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CMemoryUsageEstimator"); core::CMemoryDebug::dynamicSize("m_Values", m_Values, mem); } -std::size_t CMemoryUsageEstimator::memoryUsage() const -{ +std::size_t CMemoryUsageEstimator::memoryUsage() const { return core::CMemory::dynamicSize(m_Values); } -void CMemoryUsageEstimator::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CMemoryUsageEstimator::acceptPersistInserter(core::CStatePersistInserter& inserter) const { core::CPersistUtils::persist(VALUES_TAG, m_Values, inserter); } -bool CMemoryUsageEstimator::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name = traverser.name(); - if (name == VALUES_TAG) - { - if (!core::CPersistUtils::restore(VALUES_TAG, m_Values, traverser)) - { +bool CMemoryUsageEstimator::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); + if (name == VALUES_TAG) { + if (!core::CPersistUtils::restore(VALUES_TAG, m_Values, traverser)) { LOG_ERROR("Failed to restore values"); return false; } } - } - while (traverser.next()); + } while (traverser.next()); return true; } -std::size_t CMemoryUsageEstimator::maximumExtrapolation(EComponent component) const -{ +std::size_t CMemoryUsageEstimator::maximumExtrapolation(EComponent component) const { std::size_t min = boost::numeric::bounds::highest(); std::size_t max = boost::numeric::bounds::lowest(); - for (std::size_t i = 0u; i < m_Values.size(); ++i) - { + for (std::size_t i = 0u; i < m_Values.size(); ++i) { min = std::max(min, m_Values[i].first[component]); max = std::max(max, m_Values[i].first[component]); } diff --git a/lib/model/CMetricBucketGatherer.cc b/lib/model/CMetricBucketGatherer.cc index 67ba993fc4..3f844799e6 100644 --- a/lib/model/CMetricBucketGatherer.cc +++ b/lib/model/CMetricBucketGatherer.cc @@ -32,13 +32,10 @@ #include #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { -namespace -{ +namespace { using TSizeSizePr = std::pair; using TDoubleVec = std::vector; @@ -104,38 +101,69 @@ const std::string ATTRIBUTE_TAG("a"); const std::string DATA_TAG("b"); const std::string PERSON_TAG("c"); - //! Get the by field name. -const std::string &byField(bool population, const TStrVec &fieldNames) -{ +const std::string& byField(bool population, const TStrVec& fieldNames) { return population ? fieldNames[1] : fieldNames[0]; } //! Get the over field name. -const std::string &overField(bool population, const TStrVec &fieldNames) -{ +const std::string& overField(bool population, const TStrVec& fieldNames) { return population ? fieldNames[0] : EMPTY_STRING; } -template struct SDataType {}; -template<> struct SDataType { using Type = TSizeSizeMeanGathererUMapUMap; }; -template<> struct SDataType { using Type = TSizeSizeMedianGathererUMapUMap; }; -template<> struct SDataType { using Type = TSizeSizeMinGathererUMapUMap; }; -template<> struct SDataType { using Type = TSizeSizeMaxGathererUMapUMap; }; -template<> struct SDataType { using Type = TSizeSizeSumGathererUMapUMap; }; -template<> struct SDataType { using Type = TSizeSizeVarianceGathererUMapUMap; }; -template<> struct SDataType { using Type = TSizeSizeMultivariateMeanGathererUMapUMap; }; -template<> struct SDataType { using Type = TSizeSizeMultivariateMinGathererUMapUMap; }; -template<> struct SDataType { using Type = TSizeSizeMultivariateMaxGathererUMapUMap; }; -template struct SMaybeConst {}; -template struct SMaybeConst { using Type = T;}; -template struct SMaybeConst { using Type = const T; }; +template +struct SDataType {}; +template<> +struct SDataType { + using Type = TSizeSizeMeanGathererUMapUMap; +}; +template<> +struct SDataType { + using Type = TSizeSizeMedianGathererUMapUMap; +}; +template<> +struct SDataType { + using Type = TSizeSizeMinGathererUMapUMap; +}; +template<> +struct SDataType { + using Type = TSizeSizeMaxGathererUMapUMap; +}; +template<> +struct SDataType { + using Type = TSizeSizeSumGathererUMapUMap; +}; +template<> +struct SDataType { + using Type = TSizeSizeVarianceGathererUMapUMap; +}; +template<> +struct SDataType { + using Type = TSizeSizeMultivariateMeanGathererUMapUMap; +}; +template<> +struct SDataType { + using Type = TSizeSizeMultivariateMinGathererUMapUMap; +}; +template<> +struct SDataType { + using Type = TSizeSizeMultivariateMaxGathererUMapUMap; +}; +template +struct SMaybeConst {}; +template +struct SMaybeConst { + using Type = T; +}; +template +struct SMaybeConst { + using Type = const T; +}; //! Register the callbacks for computing the size of feature data gatherers //! with \p visitor. template -void registerMemoryCallbacks(VISITOR &visitor) -{ +void registerMemoryCallbacks(VISITOR& visitor) { visitor.template registerCallback(); visitor.template registerCallback(); visitor.template registerCallback(); @@ -148,11 +176,9 @@ void registerMemoryCallbacks(VISITOR &visitor) } //! Register the callbacks for computing the size of feature data gatherers. -void registerMemoryCallbacks() -{ +void registerMemoryCallbacks() { static std::atomic_flag once = ATOMIC_FLAG_INIT; - if (once.test_and_set() == false) - { + if (once.test_and_set() == false) { registerMemoryCallbacks(core::CMemory::anyVisitor()); registerMemoryCallbacks(core::CMemoryDebug::anyVisitor()); } @@ -161,36 +187,47 @@ void registerMemoryCallbacks() //! Apply a function \p f to a gatherer held as a value by map entry \p i //! of an explicit metric category template -void apply(ITR i, const F &f) -{ +void apply(ITR i, const F& f) { using TDataType = typename SDataType::Type; f(i->first, boost::any_cast::Type&>(i->second)); } //! Apply a function \p f to all the gatherers held in [\p begin, \p end). template -bool apply(ITR begin, ITR end, const F &f) -{ - for (ITR i = begin; i != end; ++i) - { +bool apply(ITR begin, ITR end, const F& f) { + for (ITR i = begin; i != end; ++i) { model_t::EMetricCategory category = i->first.first; - try - { - switch (category) - { - case model_t::E_Mean: apply(i, f); break; - case model_t::E_Median: apply(i, f); break; - case model_t::E_Min: apply(i, f); break; - case model_t::E_Max: apply(i, f); break; - case model_t::E_Variance: apply(i, f); break; - case model_t::E_Sum: apply(i, f); break; - case model_t::E_MultivariateMean: apply(i, f); break; - case model_t::E_MultivariateMin: apply(i, f); break; - case model_t::E_MultivariateMax: apply(i, f); break; + try { + switch (category) { + case model_t::E_Mean: + apply(i, f); + break; + case model_t::E_Median: + apply(i, f); + break; + case model_t::E_Min: + apply(i, f); + break; + case model_t::E_Max: + apply(i, f); + break; + case model_t::E_Variance: + apply(i, f); + break; + case model_t::E_Sum: + apply(i, f); + break; + case model_t::E_MultivariateMean: + apply(i, f); + break; + case model_t::E_MultivariateMin: + apply(i, f); + break; + case model_t::E_MultivariateMax: + apply(i, f); + break; } - } - catch (const std::exception &e) - { + } catch (const std::exception& e) { LOG_ERROR("Apply failed for " << category << ": " << e.what()); return false; } @@ -201,979 +238,756 @@ bool apply(ITR begin, ITR end, const F &f) //! Apply a function \p f to all the gatherers held in \p data. template -bool apply(T &data, const F &f) -{ +bool apply(T& data, const F& f) { return apply(data.begin(), data.end(), f); } //! Initialize feature data for a specific category template -void initializeFeatureDataInstance(std::size_t dimension, - TCategorySizePrAnyMap &featureData) -{ +void initializeFeatureDataInstance(std::size_t dimension, TCategorySizePrAnyMap& featureData) { using Type = typename SDataType::Type; featureData[{CATEGORY, dimension}] = Type(); } //! Persists the data gatherers (for individual metric categories). -class CPersistFeatureData -{ - public: - template - void operator()(const TCategorySizePr &category, - const TSizeSizeTUMapUMap &data, - core::CStatePersistInserter &inserter) const - { - if (data.empty()) - { - inserter.insertValue(this->tagName(category), EMPTY_STRING); - return; - } - - inserter.insertLevel(this->tagName(category), - boost::bind(SDoPersist(), boost::cref(data), _1)); +class CPersistFeatureData { +public: + template + void operator()(const TCategorySizePr& category, const TSizeSizeTUMapUMap& data, core::CStatePersistInserter& inserter) const { + if (data.empty()) { + inserter.insertValue(this->tagName(category), EMPTY_STRING); + return; } - private: - std::string tagName(const TCategorySizePr &category) const - { - switch (category.first) - { - case model_t::E_Mean: return MEAN_TAG; - case model_t::E_Median: return MEDIAN_TAG; - case model_t::E_Min: return MIN_TAG; - case model_t::E_Max: return MAX_TAG; - case model_t::E_Variance: return VARIANCE_TAG; - case model_t::E_Sum: return SUM_TAG; - case model_t::E_MultivariateMean: return MULTIVARIATE_MEAN_TAG - + core::CStringUtils::typeToString(category.second); - case model_t::E_MultivariateMin: return MULTIVARIATE_MIN_TAG - + core::CStringUtils::typeToString(category.second); - case model_t::E_MultivariateMax: return MULTIVARIATE_MAX_TAG - + core::CStringUtils::typeToString(category.second); - } - return EMPTY_STRING; - } + inserter.insertLevel(this->tagName(category), boost::bind(SDoPersist(), boost::cref(data), _1)); + } - struct SDoPersist - { - template - void operator()(const TSizeSizeTUMapUMap &data, core::CStatePersistInserter &inserter) const - { - using TSizeSizeTUMapUMapCItr = typename TSizeSizeTUMapUMap::const_iterator; - std::vector dataItrs; - dataItrs.reserve(data.size()); - for (auto i = data.cbegin(); i != data.cend(); ++i) - { - dataItrs.push_back(i); - } - std::sort(dataItrs.begin(), dataItrs.end(), - [](TSizeSizeTUMapUMapCItr lhs, TSizeSizeTUMapUMapCItr rhs) { return lhs->first < rhs->first; }); +private: + std::string tagName(const TCategorySizePr& category) const { + switch (category.first) { + case model_t::E_Mean: + return MEAN_TAG; + case model_t::E_Median: + return MEDIAN_TAG; + case model_t::E_Min: + return MIN_TAG; + case model_t::E_Max: + return MAX_TAG; + case model_t::E_Variance: + return VARIANCE_TAG; + case model_t::E_Sum: + return SUM_TAG; + case model_t::E_MultivariateMean: + return MULTIVARIATE_MEAN_TAG + core::CStringUtils::typeToString(category.second); + case model_t::E_MultivariateMin: + return MULTIVARIATE_MIN_TAG + core::CStringUtils::typeToString(category.second); + case model_t::E_MultivariateMax: + return MULTIVARIATE_MAX_TAG + core::CStringUtils::typeToString(category.second); + } + return EMPTY_STRING; + } - for (auto itr : dataItrs) - { - inserter.insertLevel(ATTRIBUTE_TAG, boost::bind(SDoPersist(), itr->first, boost::cref(itr->second), _1)); - } + struct SDoPersist { + template + void operator()(const TSizeSizeTUMapUMap& data, core::CStatePersistInserter& inserter) const { + using TSizeSizeTUMapUMapCItr = typename TSizeSizeTUMapUMap::const_iterator; + std::vector dataItrs; + dataItrs.reserve(data.size()); + for (auto i = data.cbegin(); i != data.cend(); ++i) { + dataItrs.push_back(i); } + std::sort(dataItrs.begin(), dataItrs.end(), [](TSizeSizeTUMapUMapCItr lhs, TSizeSizeTUMapUMapCItr rhs) { + return lhs->first < rhs->first; + }); - template - void operator()(std::size_t cid, - const TSizeTUMap &pidMap, - core::CStatePersistInserter &inserter) const - { - inserter.insertValue(ATTRIBUTE_TAG, cid); - - using TSizeTUMapCItr = typename TSizeTUMap::const_iterator; - std::vector pidItrs; - pidItrs.reserve(pidMap.size()); - for (auto i = pidMap.cbegin(); i != pidMap.cend(); ++i) - { - pidItrs.push_back(i); - } - std::sort(pidItrs.begin(), pidItrs.end(), - [](TSizeTUMapCItr lhs, TSizeTUMapCItr rhs) { return lhs->first < rhs->first; }); + for (auto itr : dataItrs) { + inserter.insertLevel(ATTRIBUTE_TAG, boost::bind(SDoPersist(), itr->first, boost::cref(itr->second), _1)); + } + } - for (auto itr : pidItrs) - { - inserter.insertLevel(PERSON_TAG, boost::bind(SDoPersist(), itr->first, boost::cref(itr->second), _1)); - } + template + void operator()(std::size_t cid, const TSizeTUMap& pidMap, core::CStatePersistInserter& inserter) const { + inserter.insertValue(ATTRIBUTE_TAG, cid); + + using TSizeTUMapCItr = typename TSizeTUMap::const_iterator; + std::vector pidItrs; + pidItrs.reserve(pidMap.size()); + for (auto i = pidMap.cbegin(); i != pidMap.cend(); ++i) { + pidItrs.push_back(i); } + std::sort(pidItrs.begin(), pidItrs.end(), [](TSizeTUMapCItr lhs, TSizeTUMapCItr rhs) { return lhs->first < rhs->first; }); - template - void operator()(std::size_t pid, - const T &data, - core::CStatePersistInserter &inserter) const - { - inserter.insertValue(PERSON_TAG, pid); - inserter.insertLevel(DATA_TAG, boost::bind(&T::acceptPersistInserter, &data, _1)); + for (auto itr : pidItrs) { + inserter.insertLevel(PERSON_TAG, boost::bind(SDoPersist(), itr->first, boost::cref(itr->second), _1)); } - }; + } + + template + void operator()(std::size_t pid, const T& data, core::CStatePersistInserter& inserter) const { + inserter.insertValue(PERSON_TAG, pid); + inserter.insertLevel(DATA_TAG, boost::bind(&T::acceptPersistInserter, &data, _1)); + } + }; }; //! Restores the data gatherers (for individual metric categories). template -class CRestoreFeatureData -{ - public: - bool operator()(core::CStateRestoreTraverser &traverser, - std::size_t dimension, - bool isNewVersion, - const CMetricBucketGatherer &gatherer, - TCategorySizePrAnyMap &result) const - { - boost::any &data = result[{CATEGORY, dimension}]; - return this->restore(traverser, dimension, isNewVersion, gatherer, data); - } - - private: - //! Add a restored data gatherer to \p result. - bool restore(core::CStateRestoreTraverser &traverser, - std::size_t dimension, - bool isNewVersion, - const CMetricBucketGatherer &gatherer, - boost::any &result) const - { - using Type = typename SDataType::Type; - if (result.empty()) - { - result = Type(); - } - Type &data = *boost::unsafe_any_cast(&result); +class CRestoreFeatureData { +public: + bool operator()(core::CStateRestoreTraverser& traverser, + std::size_t dimension, + bool isNewVersion, + const CMetricBucketGatherer& gatherer, + TCategorySizePrAnyMap& result) const { + boost::any& data = result[{CATEGORY, dimension}]; + return this->restore(traverser, dimension, isNewVersion, gatherer, data); + } - // An empty sub-level implies a person with 100% invalid data. - if (!traverser.hasSubLevel()) - { - return true; - } +private: + //! Add a restored data gatherer to \p result. + bool restore(core::CStateRestoreTraverser& traverser, + std::size_t dimension, + bool isNewVersion, + const CMetricBucketGatherer& gatherer, + boost::any& result) const { + using Type = typename SDataType::Type; + if (result.empty()) { + result = Type(); + } + Type& data = *boost::unsafe_any_cast(&result); - if (isNewVersion) - { - return traverser.traverseSubLevel(boost::bind( - CDoNewRestore(dimension), _1, boost::cref(gatherer), boost::ref(data))); - } - else - { - return traverser.traverseSubLevel(boost::bind( - CDoOldRestore(dimension), _1, boost::cref(gatherer), boost::ref(data))); - } + // An empty sub-level implies a person with 100% invalid data. + if (!traverser.hasSubLevel()) { + return true; } - //! \brief Responsible for restoring individual gatherers. - class CDoNewRestore - { - public: - CDoNewRestore(std::size_t dimension) : m_Dimension(dimension) {} - - template - bool operator()(core::CStateRestoreTraverser &traverser, - const CMetricBucketGatherer &gatherer, - TSizeSizeTUMapUMap &result) const - { - do - { - const std::string &name = traverser.name(); - if (name == ATTRIBUTE_TAG) - { - if (traverser.traverseSubLevel(boost::bind( - &CDoNewRestore::restoreAttributes, this, _1, boost::cref(gatherer), boost::ref(result))) == false) - { - LOG_ERROR("Invalid data in " << traverser.value()); - return false; - } - } - } - while (traverser.next()); + if (isNewVersion) { + return traverser.traverseSubLevel(boost::bind(CDoNewRestore(dimension), _1, boost::cref(gatherer), boost::ref(data))); + } else { + return traverser.traverseSubLevel(boost::bind(CDoOldRestore(dimension), _1, boost::cref(gatherer), boost::ref(data))); + } + } - return true; - } + //! \brief Responsible for restoring individual gatherers. + class CDoNewRestore { + public: + CDoNewRestore(std::size_t dimension) : m_Dimension(dimension) {} - template - bool restoreAttributes(core::CStateRestoreTraverser &traverser, - const CMetricBucketGatherer &gatherer, - TSizeSizeTUMapUMap &result) const - { - std::size_t lastCid(0); - bool seenCid(false); - - do - { - const std::string &name = traverser.name(); - if (name == ATTRIBUTE_TAG) - { - if (core::CStringUtils::stringToType(traverser.value(), lastCid) == false) - { - LOG_ERROR("Invalid attribute ID in " << traverser.value()); - return false; - } - seenCid = true; - result[lastCid] = TSizeTUMap(1); - } - else if (name == PERSON_TAG) - { - if (!seenCid) - { - LOG_ERROR("Incorrect format - person before attribute ID in " << - traverser.value()); - return false; - } - if (traverser.traverseSubLevel(boost::bind( - &CDoNewRestore::restorePeople, this, _1, boost::cref(gatherer), boost::ref(result[lastCid]))) == false) - { - LOG_ERROR("Invalid data in " << traverser.value()); - return false; - } - } + template + bool + operator()(core::CStateRestoreTraverser& traverser, const CMetricBucketGatherer& gatherer, TSizeSizeTUMapUMap& result) const { + do { + const std::string& name = traverser.name(); + if (name == ATTRIBUTE_TAG) { + if (traverser.traverseSubLevel(boost::bind( + &CDoNewRestore::restoreAttributes, this, _1, boost::cref(gatherer), boost::ref(result))) == false) { + LOG_ERROR("Invalid data in " << traverser.value()); + return false; } - while (traverser.next()); - - return true; } + } while (traverser.next()); - template - bool restorePeople(core::CStateRestoreTraverser &traverser, - const CMetricBucketGatherer &gatherer, - TSizeTUMap &result) const - { - std::size_t lastPid(0); - bool seenPid(false); - - do - { - const std::string &name = traverser.name(); - if (name == PERSON_TAG) - { - if (core::CStringUtils::stringToType(traverser.value(), lastPid) == false) - { - LOG_ERROR("Invalid person ID in " << traverser.value()); - return false; - } - seenPid = true; - } - else if (name == DATA_TAG) - { - if (!seenPid) - { - LOG_ERROR("Incorrect format - data before person ID in " << traverser.value()); - return false; - } - T initial(gatherer.dataGatherer().params(), - m_Dimension, - gatherer.currentBucketStartTime(), - gatherer.bucketLength(), - gatherer.beginInfluencers(), - gatherer.endInfluencers()); - if (traverser.traverseSubLevel(boost::bind(&T::acceptRestoreTraverser, - &initial, - _1)) == false) - { - LOG_ERROR("Invalid data in " << traverser.value()); - return false; - } - result.emplace(lastPid, initial); - } - } - while (traverser.next()); + return true; + } - return true; + template + bool restoreAttributes(core::CStateRestoreTraverser& traverser, + const CMetricBucketGatherer& gatherer, + TSizeSizeTUMapUMap& result) const { + std::size_t lastCid(0); + bool seenCid(false); + + do { + const std::string& name = traverser.name(); + if (name == ATTRIBUTE_TAG) { + if (core::CStringUtils::stringToType(traverser.value(), lastCid) == false) { + LOG_ERROR("Invalid attribute ID in " << traverser.value()); + return false; + } + seenCid = true; + result[lastCid] = TSizeTUMap(1); + } else if (name == PERSON_TAG) { + if (!seenCid) { + LOG_ERROR("Incorrect format - person before attribute ID in " << traverser.value()); + return false; + } + if (traverser.traverseSubLevel(boost::bind( + &CDoNewRestore::restorePeople, this, _1, boost::cref(gatherer), boost::ref(result[lastCid]))) == false) { + LOG_ERROR("Invalid data in " << traverser.value()); + return false; + } } + } while (traverser.next()); + + return true; + } - private: - std::size_t m_Dimension; - }; - - //! \brief Responsible for restoring individual gatherers. - class CDoOldRestore - { - public: - CDoOldRestore(std::size_t dimension) : m_Dimension(dimension) {} - - template - bool operator()(core::CStateRestoreTraverser &traverser, - const CMetricBucketGatherer &gatherer, - TSizeSizeTUMapUMap &result) const - { - bool isPopulation = gatherer.dataGatherer().isPopulation(); - if (isPopulation) - { - this->restorePopulation(traverser, gatherer, result); + template + bool restorePeople(core::CStateRestoreTraverser& traverser, const CMetricBucketGatherer& gatherer, TSizeTUMap& result) const { + std::size_t lastPid(0); + bool seenPid(false); + + do { + const std::string& name = traverser.name(); + if (name == PERSON_TAG) { + if (core::CStringUtils::stringToType(traverser.value(), lastPid) == false) { + LOG_ERROR("Invalid person ID in " << traverser.value()); + return false; } - else - { - this->restoreIndividual(traverser, gatherer, result); + seenPid = true; + } else if (name == DATA_TAG) { + if (!seenPid) { + LOG_ERROR("Incorrect format - data before person ID in " << traverser.value()); + return false; } - return true; + T initial(gatherer.dataGatherer().params(), + m_Dimension, + gatherer.currentBucketStartTime(), + gatherer.bucketLength(), + gatherer.beginInfluencers(), + gatherer.endInfluencers()); + if (traverser.traverseSubLevel(boost::bind(&T::acceptRestoreTraverser, &initial, _1)) == false) { + LOG_ERROR("Invalid data in " << traverser.value()); + return false; + } + result.emplace(lastPid, initial); } + } while (traverser.next()); - template - bool restoreIndividual(core::CStateRestoreTraverser &traverser, - const CMetricBucketGatherer &gatherer, - TSizeSizeTUMapUMap &result) const - { - std::size_t pid(0); - do - { - const std::string &name = traverser.name(); - if (name == DATA_TAG) - { - T initial(gatherer.dataGatherer().params(), - m_Dimension, - gatherer.currentBucketStartTime(), - gatherer.bucketLength(), - gatherer.beginInfluencers(), - gatherer.endInfluencers()); - if (traverser.traverseSubLevel(boost::bind( - &T::acceptRestoreTraverser, &initial, _1)) == false) - { - LOG_ERROR("Invalid data in " << traverser.value()); - return false; - } - result[model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID].emplace(pid, initial); - pid++; - } - } - while (traverser.next()); + return true; + } - return true; + private: + std::size_t m_Dimension; + }; + + //! \brief Responsible for restoring individual gatherers. + class CDoOldRestore { + public: + CDoOldRestore(std::size_t dimension) : m_Dimension(dimension) {} + + template + bool + operator()(core::CStateRestoreTraverser& traverser, const CMetricBucketGatherer& gatherer, TSizeSizeTUMapUMap& result) const { + bool isPopulation = gatherer.dataGatherer().isPopulation(); + if (isPopulation) { + this->restorePopulation(traverser, gatherer, result); + } else { + this->restoreIndividual(traverser, gatherer, result); + } + return true; + } + + template + bool restoreIndividual(core::CStateRestoreTraverser& traverser, + const CMetricBucketGatherer& gatherer, + TSizeSizeTUMapUMap& result) const { + std::size_t pid(0); + do { + const std::string& name = traverser.name(); + if (name == DATA_TAG) { + T initial(gatherer.dataGatherer().params(), + m_Dimension, + gatherer.currentBucketStartTime(), + gatherer.bucketLength(), + gatherer.beginInfluencers(), + gatherer.endInfluencers()); + if (traverser.traverseSubLevel(boost::bind(&T::acceptRestoreTraverser, &initial, _1)) == false) { + LOG_ERROR("Invalid data in " << traverser.value()); + return false; + } + result[model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID].emplace(pid, initial); + pid++; } + } while (traverser.next()); + + return true; + } + + template + bool restorePopulation(core::CStateRestoreTraverser& traverser, + const CMetricBucketGatherer& gatherer, + TSizeSizeTUMapUMap& result) const { + std::size_t pid; + + std::size_t lastCid(0); + bool seenCid(false); + + do { + const std::string& name = traverser.name(); + if (name == ATTRIBUTE_TAG) { + if (core::CStringUtils::stringToType(traverser.value(), lastCid) == false) { + LOG_ERROR("Invalid attribute ID in " << traverser.value()); + return false; + } + seenCid = true; + } else if (name == DATA_TAG) { + if (!seenCid) { + LOG_ERROR("Incorrect format - data before attribute ID in " << traverser.value()); + return false; + } - template - bool restorePopulation(core::CStateRestoreTraverser &traverser, - const CMetricBucketGatherer &gatherer, - TSizeSizeTUMapUMap &result) const - { - std::size_t pid; - - std::size_t lastCid(0); - bool seenCid(false); - - do - { - const std::string &name = traverser.name(); - if (name == ATTRIBUTE_TAG) - { - if (core::CStringUtils::stringToType(traverser.value(), lastCid) == false) - { - LOG_ERROR("Invalid attribute ID in " << traverser.value()); - return false; - } - seenCid = true; - } - else if (name == DATA_TAG) - { - if (!seenCid) - { - LOG_ERROR("Incorrect format - data before attribute ID in " << - traverser.value()); - return false; - } - - T initial(gatherer.dataGatherer().params(), - m_Dimension, - gatherer.currentBucketStartTime(), - gatherer.bucketLength(), - gatherer.beginInfluencers(), - gatherer.endInfluencers()); - if (traverser.traverseSubLevel(boost::bind(&T::acceptRestoreTraverser, - &initial, - _1)) == false) - { - LOG_ERROR("Invalid data in " << traverser.value()); - return false; - } - - auto &pidMap = result[lastCid]; - pid = pidMap.size(); - pidMap.emplace(pid, initial); - } + T initial(gatherer.dataGatherer().params(), + m_Dimension, + gatherer.currentBucketStartTime(), + gatherer.bucketLength(), + gatherer.beginInfluencers(), + gatherer.endInfluencers()); + if (traverser.traverseSubLevel(boost::bind(&T::acceptRestoreTraverser, &initial, _1)) == false) { + LOG_ERROR("Invalid data in " << traverser.value()); + return false; } - while (traverser.next()); - return true; + auto& pidMap = result[lastCid]; + pid = pidMap.size(); + pidMap.emplace(pid, initial); } + } while (traverser.next()); + + return true; + } - private: - std::size_t m_Dimension; - }; + private: + std::size_t m_Dimension; + }; }; //! Removes the people from the data gatherers. -struct SRemovePeople -{ - public: - template - void operator()(const TCategorySizePr &/*category*/, - TSizeSizeTUMapUMap &data, - std::size_t begin, - std::size_t end) const - { - for (auto &cidEntry : data) - { - for (std::size_t pid = begin; pid < end; ++pid) - { - cidEntry.second.erase(pid); - } +struct SRemovePeople { +public: + template + void operator()(const TCategorySizePr& /*category*/, TSizeSizeTUMapUMap& data, std::size_t begin, std::size_t end) const { + for (auto& cidEntry : data) { + for (std::size_t pid = begin; pid < end; ++pid) { + cidEntry.second.erase(pid); } } + } - template - void operator()(const TCategorySizePr &/*category*/, - TSizeSizeTUMapUMap &data, - const TSizeVec &peopleToRemove) const - { - for (auto &cidEntry : data) - { - for (auto pid : peopleToRemove) - { - cidEntry.second.erase(pid); - } + template + void operator()(const TCategorySizePr& /*category*/, TSizeSizeTUMapUMap& data, const TSizeVec& peopleToRemove) const { + for (auto& cidEntry : data) { + for (auto pid : peopleToRemove) { + cidEntry.second.erase(pid); } } + } }; //! Removes attributes from the data gatherers. -struct SRemoveAttributes -{ +struct SRemoveAttributes { template - void operator()(const TCategorySizePr &/*category*/, - TSizeSizeTUMapUMap &data, - const TSizeVec &attributesToRemove) const - { - for (auto cid : attributesToRemove) - { + void operator()(const TCategorySizePr& /*category*/, TSizeSizeTUMapUMap& data, const TSizeVec& attributesToRemove) const { + for (auto cid : attributesToRemove) { data.erase(cid); } } template - void operator()(const TCategorySizePr &/*category*/, - TSizeSizeTUMapUMap &data, - std::size_t begin, - std::size_t end) const - { - for (std::size_t cid = begin; cid < end; ++cid) - { + void operator()(const TCategorySizePr& /*category*/, TSizeSizeTUMapUMap& data, std::size_t begin, std::size_t end) const { + for (std::size_t cid = begin; cid < end; ++cid) { data.erase(cid); } } }; //! Sample the metric statistics. -struct SDoSample -{ - public: - template - void operator()(const TCategorySizePr &/*category*/, - TSizeSizeTUMapUMap &data, - core_t::TTime time, - const CMetricBucketGatherer &gatherer, - CDataGatherer::TSampleCountsPtr sampleCounts) const - { - for (const auto &count : gatherer.bucketCounts(time)) - { - std::size_t pid = CDataGatherer::extractPersonId(count); - std::size_t cid = CDataGatherer::extractAttributeId(count); - std::size_t activeId = gatherer.dataGatherer().isPopulation() ? cid : pid; - auto cidEntry = data.find(cid); - if (cidEntry == data.end()) - { - LOG_ERROR("No gatherer for attribute " << gatherer.dataGatherer().attributeName(cid) - << " of person " << gatherer.dataGatherer().personName(pid)); - } - else - { - auto pidEntry = cidEntry->second.find(pid); - if (pidEntry == cidEntry->second.end()) - { - LOG_ERROR("No gatherer for attribute " << gatherer.dataGatherer().attributeName(cid) - << " of person " << gatherer.dataGatherer().personName(pid)); - } - else if (pidEntry->second.sample(time, sampleCounts->count(activeId))) - { - sampleCounts->updateSampleVariance(activeId); - } +struct SDoSample { +public: + template + void operator()(const TCategorySizePr& /*category*/, + TSizeSizeTUMapUMap& data, + core_t::TTime time, + const CMetricBucketGatherer& gatherer, + CDataGatherer::TSampleCountsPtr sampleCounts) const { + for (const auto& count : gatherer.bucketCounts(time)) { + std::size_t pid = CDataGatherer::extractPersonId(count); + std::size_t cid = CDataGatherer::extractAttributeId(count); + std::size_t activeId = gatherer.dataGatherer().isPopulation() ? cid : pid; + auto cidEntry = data.find(cid); + if (cidEntry == data.end()) { + LOG_ERROR("No gatherer for attribute " << gatherer.dataGatherer().attributeName(cid) << " of person " + << gatherer.dataGatherer().personName(pid)); + } else { + auto pidEntry = cidEntry->second.find(pid); + if (pidEntry == cidEntry->second.end()) { + LOG_ERROR("No gatherer for attribute " << gatherer.dataGatherer().attributeName(cid) << " of person " + << gatherer.dataGatherer().personName(pid)); + } else if (pidEntry->second.sample(time, sampleCounts->count(activeId))) { + sampleCounts->updateSampleVariance(activeId); } } } + } }; //! Stably hashes the collection of data gatherers. -struct SHash -{ - public: - template - void operator()(const TCategorySizePr &/*category*/, - const TSizeSizeTUMapUMap &data, - const CMetricBucketGatherer &gatherer, - TStrCRefStrCRefPrUInt64Map &hashes) const - { - for (const auto &cidEntry : data) - { - std::size_t cid = cidEntry.first; - if (gatherer.dataGatherer().isAttributeActive(cid)) - { - TStrCRef cidName = TStrCRef(gatherer.dataGatherer().attributeName(cid)); - for (const auto &pidEntry : cidEntry.second) - { - std::size_t pid = pidEntry.first; - if (gatherer.dataGatherer().isPersonActive(pid)) - { - TStrCRef pidName = TStrCRef(gatherer.dataGatherer().personName(pid)); - hashes.emplace(std::piecewise_construct, - std::forward_as_tuple(cidName, pidName), - std::forward_as_tuple(pidEntry.second.checksum())); - } +struct SHash { +public: + template + void operator()(const TCategorySizePr& /*category*/, + const TSizeSizeTUMapUMap& data, + const CMetricBucketGatherer& gatherer, + TStrCRefStrCRefPrUInt64Map& hashes) const { + for (const auto& cidEntry : data) { + std::size_t cid = cidEntry.first; + if (gatherer.dataGatherer().isAttributeActive(cid)) { + TStrCRef cidName = TStrCRef(gatherer.dataGatherer().attributeName(cid)); + for (const auto& pidEntry : cidEntry.second) { + std::size_t pid = pidEntry.first; + if (gatherer.dataGatherer().isPersonActive(pid)) { + TStrCRef pidName = TStrCRef(gatherer.dataGatherer().personName(pid)); + hashes.emplace(std::piecewise_construct, + std::forward_as_tuple(cidName, pidName), + std::forward_as_tuple(pidEntry.second.checksum())); } } } } + } }; //! Extracts feature data from a collection of gatherers. -struct SExtractFeatureData -{ - public: - using TFeatureAnyPr = std::pair; - using TFeatureAnyPrVec = std::vector; +struct SExtractFeatureData { +public: + using TFeatureAnyPr = std::pair; + using TFeatureAnyPrVec = std::vector; - public: - template - void operator()(const TCategorySizePr &/*category*/, - const TSizeSizeTUMapUMap &data, - const CMetricBucketGatherer &gatherer, - model_t::EFeature feature, - core_t::TTime time, - core_t::TTime bucketLength, - TFeatureAnyPrVec &result) const - { - if (gatherer.dataGatherer().isPopulation()) - { - result.emplace_back(feature, TSizeSizePrFeatureDataPrVec()); - this->featureData(data, gatherer, time, bucketLength, this->isSum(feature), - *boost::unsafe_any_cast(&result.back().second)); - } - else - { - result.emplace_back(feature, TSizeFeatureDataPrVec()); - this->featureData(data, gatherer, time, bucketLength, this->isSum(feature), - *boost::unsafe_any_cast(&result.back().second)); - } +public: + template + void operator()(const TCategorySizePr& /*category*/, + const TSizeSizeTUMapUMap& data, + const CMetricBucketGatherer& gatherer, + model_t::EFeature feature, + core_t::TTime time, + core_t::TTime bucketLength, + TFeatureAnyPrVec& result) const { + if (gatherer.dataGatherer().isPopulation()) { + result.emplace_back(feature, TSizeSizePrFeatureDataPrVec()); + this->featureData(data, + gatherer, + time, + bucketLength, + this->isSum(feature), + *boost::unsafe_any_cast(&result.back().second)); + } else { + result.emplace_back(feature, TSizeFeatureDataPrVec()); + this->featureData(data, + gatherer, + time, + bucketLength, + this->isSum(feature), + *boost::unsafe_any_cast(&result.back().second)); } + } - private: - static const TSampleVec ZERO_SAMPLE; +private: + static const TSampleVec ZERO_SAMPLE; - private: - bool isSum(model_t::EFeature feature) const - { - return feature == model_t::E_IndividualSumByBucketAndPerson - || feature == model_t::E_IndividualLowSumByBucketAndPerson - || feature == model_t::E_IndividualHighSumByBucketAndPerson; - } +private: + bool isSum(model_t::EFeature feature) const { + return feature == model_t::E_IndividualSumByBucketAndPerson || feature == model_t::E_IndividualLowSumByBucketAndPerson || + feature == model_t::E_IndividualHighSumByBucketAndPerson; + } - template - void featureData(const TSizeSizeTUMapUMap &data, - const CMetricBucketGatherer &gatherer, - core_t::TTime time, - core_t::TTime bucketLength, - bool isSum, - U &result) const - { - result.clear(); - if (isSum) - { - if (data.empty() == false) - { - auto &pidMap = data.begin()->second; - result.reserve(pidMap.size()); - for (auto &pidEntry : pidMap) - { - std::size_t pid = pidEntry.first; - if (gatherer.hasExplicitNullsOnly(time, pid, model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID) == false) - { - this->featureData(pidEntry.second, - gatherer, - pid, - model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID, - time, - bucketLength, - result); - } + template + void featureData(const TSizeSizeTUMapUMap& data, + const CMetricBucketGatherer& gatherer, + core_t::TTime time, + core_t::TTime bucketLength, + bool isSum, + U& result) const { + result.clear(); + if (isSum) { + if (data.empty() == false) { + auto& pidMap = data.begin()->second; + result.reserve(pidMap.size()); + for (auto& pidEntry : pidMap) { + std::size_t pid = pidEntry.first; + if (gatherer.hasExplicitNullsOnly(time, pid, model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID) == false) { + this->featureData( + pidEntry.second, gatherer, pid, model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID, time, bucketLength, result); } } } - else - { - const TSizeSizePrUInt64UMap &counts = gatherer.bucketCounts(time); - result.reserve(counts.size()); - for (const auto &count : counts) - { - std::size_t cid = CDataGatherer::extractAttributeId(count); - auto cidEntry = data.find(cid); - if (cidEntry == data.end()) - { - LOG_ERROR("No gatherers for attribute " << gatherer.dataGatherer().attributeName(cid)); - continue; - } - std::size_t pid = CDataGatherer::extractPersonId(count); - auto pidEntry = cidEntry->second.find(pid); - if (pidEntry == cidEntry->second.end()) - { - LOG_ERROR("No gatherers for person " << gatherer.dataGatherer().personName(pid)); - continue; - } - - this->featureData(pidEntry->second, gatherer, pid, cid, time, bucketLength, result); + } else { + const TSizeSizePrUInt64UMap& counts = gatherer.bucketCounts(time); + result.reserve(counts.size()); + for (const auto& count : counts) { + std::size_t cid = CDataGatherer::extractAttributeId(count); + auto cidEntry = data.find(cid); + if (cidEntry == data.end()) { + LOG_ERROR("No gatherers for attribute " << gatherer.dataGatherer().attributeName(cid)); + continue; } + std::size_t pid = CDataGatherer::extractPersonId(count); + auto pidEntry = cidEntry->second.find(pid); + if (pidEntry == cidEntry->second.end()) { + LOG_ERROR("No gatherers for person " << gatherer.dataGatherer().personName(pid)); + continue; + } + + this->featureData(pidEntry->second, gatherer, pid, cid, time, bucketLength, result); } - std::sort(result.begin(), result.end(), maths::COrderings::SFirstLess()); } + std::sort(result.begin(), result.end(), maths::COrderings::SFirstLess()); + } - //! Individual model specialization - template - void featureData(const T &data, - const CMetricBucketGatherer &gatherer, - std::size_t pid, - std::size_t /*cid*/, - core_t::TTime time, - core_t::TTime bucketLength, - TSizeFeatureDataPrVec &result) const - { - result.emplace_back(pid, this->featureData(data, time, bucketLength, - gatherer.dataGatherer().effectiveSampleCount(pid))); - } + //! Individual model specialization + template + void featureData(const T& data, + const CMetricBucketGatherer& gatherer, + std::size_t pid, + std::size_t /*cid*/, + core_t::TTime time, + core_t::TTime bucketLength, + TSizeFeatureDataPrVec& result) const { + result.emplace_back(pid, this->featureData(data, time, bucketLength, gatherer.dataGatherer().effectiveSampleCount(pid))); + } - //! Population model specialization - template - void featureData(const T &data, - const CMetricBucketGatherer &gatherer, - std::size_t pid, - std::size_t cid, - core_t::TTime time, - core_t::TTime bucketLength, - TSizeSizePrFeatureDataPrVec &result) const - { - result.emplace_back(TSizeSizePr(pid, cid), - this->featureData(data, time, bucketLength, - gatherer.dataGatherer().effectiveSampleCount(cid))); - } + //! Population model specialization + template + void featureData(const T& data, + const CMetricBucketGatherer& gatherer, + std::size_t pid, + std::size_t cid, + core_t::TTime time, + core_t::TTime bucketLength, + TSizeSizePrFeatureDataPrVec& result) const { + result.emplace_back(TSizeSizePr(pid, cid), + this->featureData(data, time, bucketLength, gatherer.dataGatherer().effectiveSampleCount(cid))); + } - SMetricFeatureData featureData(const CGathererTools::CSumGatherer &data, - core_t::TTime time, - core_t::TTime bucketLength, - double /*effectiveSampleCount*/) const - { - return data.featureData(time, bucketLength, ZERO_SAMPLE); - } + SMetricFeatureData featureData(const CGathererTools::CSumGatherer& data, + core_t::TTime time, + core_t::TTime bucketLength, + double /*effectiveSampleCount*/) const { + return data.featureData(time, bucketLength, ZERO_SAMPLE); + } - template - inline SMetricFeatureData featureData(const T &data, - core_t::TTime time, - core_t::TTime bucketLength, - double effectiveSampleCount) const - { - return data.featureData(time, bucketLength, effectiveSampleCount); - } + template + inline SMetricFeatureData + featureData(const T& data, core_t::TTime time, core_t::TTime bucketLength, double effectiveSampleCount) const { + return data.featureData(time, bucketLength, effectiveSampleCount); + } }; const TSampleVec SExtractFeatureData::ZERO_SAMPLE(1, CSample(0, TDoubleVec(1, 0.0), 1.0, 1.0)); //! Adds a value to the specified data gatherers. -struct SAddValue -{ - struct SStatistic - { +struct SAddValue { + struct SStatistic { core_t::TTime s_Time; - const CEventData::TDouble1VecArray *s_Values; + const CEventData::TDouble1VecArray* s_Values; unsigned int s_Count; unsigned int s_SampleCount; - const TStoredStringPtrVec *s_Influences; + const TStoredStringPtrVec* s_Influences; }; template - inline void operator()(const TCategorySizePr &category, - TSizeSizeTUMapUMap &data, + inline void operator()(const TCategorySizePr& category, + TSizeSizeTUMapUMap& data, std::size_t pid, std::size_t cid, - const CMetricBucketGatherer &gatherer, - const SStatistic &stat) const - { - auto &entry = data[cid].emplace(boost::unordered::piecewise_construct, - boost::make_tuple(pid), - boost::make_tuple(boost::cref(gatherer.dataGatherer().params()), - category.second, - gatherer.currentBucketStartTime(), - gatherer.bucketLength(), - gatherer.beginInfluencers(), - gatherer.endInfluencers())).first->second; - entry.add(stat.s_Time, - (*stat.s_Values)[category.first], - stat.s_Count, - stat.s_SampleCount, - *stat.s_Influences); + const CMetricBucketGatherer& gatherer, + const SStatistic& stat) const { + auto& entry = data[cid] + .emplace(boost::unordered::piecewise_construct, + boost::make_tuple(pid), + boost::make_tuple(boost::cref(gatherer.dataGatherer().params()), + category.second, + gatherer.currentBucketStartTime(), + gatherer.bucketLength(), + gatherer.beginInfluencers(), + gatherer.endInfluencers())) + .first->second; + entry.add(stat.s_Time, (*stat.s_Values)[category.first], stat.s_Count, stat.s_SampleCount, *stat.s_Influences); } }; //! Updates gatherers with the start of a new bucket. -struct SStartNewBucket -{ - public: - template - void operator()(const TCategorySizePr &/*category*/, - TSizeSizeTUMapUMap &data, - core_t::TTime time) const - { - for (auto &cidEntry : data) - { - for (auto &pidEntry : cidEntry.second) - { - pidEntry.second.startNewBucket(time); - } +struct SStartNewBucket { +public: + template + void operator()(const TCategorySizePr& /*category*/, TSizeSizeTUMapUMap& data, core_t::TTime time) const { + for (auto& cidEntry : data) { + for (auto& pidEntry : cidEntry.second) { + pidEntry.second.startNewBucket(time); } } + } }; //! Resets data stored for buckets containing a specified time. -struct SResetBucket -{ - public: - template - void operator()(const TCategorySizePr &/*category*/, - TSizeSizeTUMapUMap &data, - core_t::TTime bucketStart) const - { - for (auto &cidEntry : data) - { - for (auto &pidEntry : cidEntry.second) - { - pidEntry.second.resetBucket(bucketStart); - } +struct SResetBucket { +public: + template + void operator()(const TCategorySizePr& /*category*/, TSizeSizeTUMapUMap& data, core_t::TTime bucketStart) const { + for (auto& cidEntry : data) { + for (auto& pidEntry : cidEntry.second) { + pidEntry.second.resetBucket(bucketStart); } } + } }; //! Releases memory that is no longer needed. -struct SReleaseMemory -{ - public: - template - void operator()(const TCategorySizePr &/*category*/, - TSizeSizeTUMapUMap &data, - core_t::TTime samplingCutoffTime) const - { - for (auto &cidEntry : data) - { - auto &pidMap = cidEntry.second; - for (auto i = pidMap.begin(); i != pidMap.end(); /**/) - { - if (i->second.isRedundant(samplingCutoffTime)) - { - i = pidMap.erase(i); - } - else - { - ++i; - } +struct SReleaseMemory { +public: + template + void operator()(const TCategorySizePr& /*category*/, TSizeSizeTUMapUMap& data, core_t::TTime samplingCutoffTime) const { + for (auto& cidEntry : data) { + auto& pidMap = cidEntry.second; + for (auto i = pidMap.begin(); i != pidMap.end(); /**/) { + if (i->second.isRedundant(samplingCutoffTime)) { + i = pidMap.erase(i); + } else { + ++i; } - } } + } }; } // unnamed:: -CMetricBucketGatherer::CMetricBucketGatherer(CDataGatherer &dataGatherer, - const std::string &summaryCountFieldName, - const std::string &personFieldName, - const std::string &attributeFieldName, - const std::string &valueFieldName, - const TStrVec &influenceFieldNames, - core_t::TTime startTime) : - CBucketGatherer(dataGatherer, startTime), - m_ValueFieldName(valueFieldName), - m_BeginInfluencingFields(0), - m_BeginValueFields(0) -{ +CMetricBucketGatherer::CMetricBucketGatherer(CDataGatherer& dataGatherer, + const std::string& summaryCountFieldName, + const std::string& personFieldName, + const std::string& attributeFieldName, + const std::string& valueFieldName, + const TStrVec& influenceFieldNames, + core_t::TTime startTime) + : CBucketGatherer(dataGatherer, startTime), m_ValueFieldName(valueFieldName), m_BeginInfluencingFields(0), m_BeginValueFields(0) { this->initializeFieldNamesPart1(personFieldName, attributeFieldName, influenceFieldNames); this->initializeFieldNamesPart2(valueFieldName, summaryCountFieldName); this->initializeFeatureData(); } -CMetricBucketGatherer::CMetricBucketGatherer(CDataGatherer &dataGatherer, - const std::string &summaryCountFieldName, - const std::string &personFieldName, - const std::string &attributeFieldName, - const std::string &valueFieldName, - const TStrVec &influenceFieldNames, - core::CStateRestoreTraverser &traverser) : - CBucketGatherer(dataGatherer, 0), - m_ValueFieldName(valueFieldName), - m_BeginValueFields(0) -{ +CMetricBucketGatherer::CMetricBucketGatherer(CDataGatherer& dataGatherer, + const std::string& summaryCountFieldName, + const std::string& personFieldName, + const std::string& attributeFieldName, + const std::string& valueFieldName, + const TStrVec& influenceFieldNames, + core::CStateRestoreTraverser& traverser) + : CBucketGatherer(dataGatherer, 0), m_ValueFieldName(valueFieldName), m_BeginValueFields(0) { this->initializeFieldNamesPart1(personFieldName, attributeFieldName, influenceFieldNames); traverser.traverseSubLevel(boost::bind(&CMetricBucketGatherer::acceptRestoreTraverser, this, _1)); this->initializeFieldNamesPart2(valueFieldName, summaryCountFieldName); } -CMetricBucketGatherer::CMetricBucketGatherer(bool isForPersistence, - const CMetricBucketGatherer &other) : - CBucketGatherer(isForPersistence, other), - m_ValueFieldName(other.m_ValueFieldName), - m_FieldNames(other.m_FieldNames), - m_BeginInfluencingFields(0), - m_BeginValueFields(0), - m_FeatureData(other.m_FeatureData) -{ - if (!isForPersistence) - { +CMetricBucketGatherer::CMetricBucketGatherer(bool isForPersistence, const CMetricBucketGatherer& other) + : CBucketGatherer(isForPersistence, other), + m_ValueFieldName(other.m_ValueFieldName), + m_FieldNames(other.m_FieldNames), + m_BeginInfluencingFields(0), + m_BeginValueFields(0), + m_FeatureData(other.m_FeatureData) { + if (!isForPersistence) { LOG_ABORT("This constructor only creates clones for persistence"); } } -void CMetricBucketGatherer::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CMetricBucketGatherer::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertLevel(BASE_TAG, boost::bind(&CBucketGatherer::baseAcceptPersistInserter, this, _1)); inserter.insertValue(VERSION_TAG, CURRENT_VERSION); apply(m_FeatureData, boost::bind(CPersistFeatureData(), _1, _2, boost::ref(inserter))); } -bool CMetricBucketGatherer::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ +bool CMetricBucketGatherer::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { std::string version; bool isCurrentVersion(false); - do - { - const std::string &name = traverser.name(); - if (name == BASE_TAG) - { - if (traverser.traverseSubLevel(boost::bind(&CBucketGatherer::baseAcceptRestoreTraverser, - this, _1)) == false) - { + do { + const std::string& name = traverser.name(); + if (name == BASE_TAG) { + if (traverser.traverseSubLevel(boost::bind(&CBucketGatherer::baseAcceptRestoreTraverser, this, _1)) == false) { LOG_ERROR("Invalid data gatherer in " << traverser.value()); return false; } - } - else if (name == VERSION_TAG) - { - if (core::CStringUtils::stringToType(traverser.value(), version) == false) - { + } else if (name == VERSION_TAG) { + if (core::CStringUtils::stringToType(traverser.value(), version) == false) { LOG_ERROR("Invalid version in " << traverser.value()); return false; } isCurrentVersion = (version == CURRENT_VERSION); - } - else if (this->acceptRestoreTraverserInternal(traverser, isCurrentVersion) == false) - { + } else if (this->acceptRestoreTraverserInternal(traverser, isCurrentVersion) == false) { // Soldier on or we'll get a core dump later. } - } - while (traverser.next()); + } while (traverser.next()); return true; } -bool CMetricBucketGatherer::acceptRestoreTraverserInternal(core::CStateRestoreTraverser &traverser, - bool isCurrentVersion) -{ - const std::string &name = traverser.name(); - if (name == MEAN_TAG) - { +bool CMetricBucketGatherer::acceptRestoreTraverserInternal(core::CStateRestoreTraverser& traverser, bool isCurrentVersion) { + const std::string& name = traverser.name(); + if (name == MEAN_TAG) { CRestoreFeatureData restore; - if (restore(traverser, 1, isCurrentVersion, *this, m_FeatureData) == false) - { + if (restore(traverser, 1, isCurrentVersion, *this, m_FeatureData) == false) { LOG_ERROR("Invalid mean data in " << traverser.value()); return false; } - } - else if (name == MIN_TAG) - { + } else if (name == MIN_TAG) { CRestoreFeatureData restore; - if (restore(traverser, 1, isCurrentVersion, *this, m_FeatureData) == false) - { + if (restore(traverser, 1, isCurrentVersion, *this, m_FeatureData) == false) { LOG_ERROR("Invalid min data in " << traverser.value()); return false; } - } - else if (name == MAX_TAG) - { + } else if (name == MAX_TAG) { CRestoreFeatureData restore; - if (restore(traverser, 1, isCurrentVersion, *this, m_FeatureData) == false) - { + if (restore(traverser, 1, isCurrentVersion, *this, m_FeatureData) == false) { LOG_ERROR("Invalid max data in " << traverser.value()); return false; } - } - else if (name == SUM_TAG) - { + } else if (name == SUM_TAG) { CRestoreFeatureData restore; - if (restore(traverser, 1, isCurrentVersion, *this, m_FeatureData) == false) - { + if (restore(traverser, 1, isCurrentVersion, *this, m_FeatureData) == false) { LOG_ERROR("Invalid sum data in " << traverser.value()); return false; } - } - else if (name == MEDIAN_TAG) - { + } else if (name == MEDIAN_TAG) { CRestoreFeatureData restore; - if (restore(traverser, 1, isCurrentVersion, *this, m_FeatureData) == false) - { + if (restore(traverser, 1, isCurrentVersion, *this, m_FeatureData) == false) { LOG_ERROR("Invalid median data in " << traverser.value()); return false; } - } - else if (name == VARIANCE_TAG) - { + } else if (name == VARIANCE_TAG) { CRestoreFeatureData restore; - if (restore(traverser, 1, isCurrentVersion, *this, m_FeatureData) == false) - { + if (restore(traverser, 1, isCurrentVersion, *this, m_FeatureData) == false) { LOG_ERROR("Invalid variance data in " << traverser.value()); return false; } - } - else if (name.find(MULTIVARIATE_MEAN_TAG) != std::string::npos) - { + } else if (name.find(MULTIVARIATE_MEAN_TAG) != std::string::npos) { std::size_t dimension; - if (core::CStringUtils::stringToType(name.substr(MULTIVARIATE_MEAN_TAG.length()), - dimension) == false) - { + if (core::CStringUtils::stringToType(name.substr(MULTIVARIATE_MEAN_TAG.length()), dimension) == false) { LOG_ERROR("Invalid dimension in " << name); return false; } CRestoreFeatureData restore; - if (restore(traverser, dimension, isCurrentVersion, *this, m_FeatureData) == false) - { + if (restore(traverser, dimension, isCurrentVersion, *this, m_FeatureData) == false) { LOG_ERROR("Invalid multivariate mean data in " << traverser.value()); return false; } - } - else if (name.find(MULTIVARIATE_MIN_TAG) != std::string::npos) - { + } else if (name.find(MULTIVARIATE_MIN_TAG) != std::string::npos) { std::size_t dimension; - if (core::CStringUtils::stringToType(name.substr(MULTIVARIATE_MIN_TAG.length()), - dimension) == false) - { + if (core::CStringUtils::stringToType(name.substr(MULTIVARIATE_MIN_TAG.length()), dimension) == false) { LOG_ERROR("Invalid dimension in " << name); return false; } CRestoreFeatureData restore; - if (restore(traverser, dimension, isCurrentVersion, *this, m_FeatureData) == false) - { + if (restore(traverser, dimension, isCurrentVersion, *this, m_FeatureData) == false) { LOG_ERROR("Invalid multivariate min data in " << traverser.value()); return false; } - } - else if (name.find(MULTIVARIATE_MAX_TAG) != std::string::npos) - { + } else if (name.find(MULTIVARIATE_MAX_TAG) != std::string::npos) { std::size_t dimension; - if (core::CStringUtils::stringToType(name.substr(MULTIVARIATE_MAX_TAG.length()), - dimension) == false) - { + if (core::CStringUtils::stringToType(name.substr(MULTIVARIATE_MAX_TAG.length()), dimension) == false) { LOG_ERROR("Invalid dimension in " << name); return false; } CRestoreFeatureData restore; - if (restore(traverser, dimension, isCurrentVersion, *this, m_FeatureData) == false) - { + if (restore(traverser, dimension, isCurrentVersion, *this, m_FeatureData) == false) { LOG_ERROR("Invalid multivariate max data in " << traverser.value()); return false; } @@ -1182,78 +996,58 @@ bool CMetricBucketGatherer::acceptRestoreTraverserInternal(core::CStateRestoreTr return true; } -CBucketGatherer *CMetricBucketGatherer::cloneForPersistence() const -{ +CBucketGatherer* CMetricBucketGatherer::cloneForPersistence() const { return new CMetricBucketGatherer(true, *this); } -const std::string &CMetricBucketGatherer::persistenceTag() const -{ +const std::string& CMetricBucketGatherer::persistenceTag() const { return CBucketGatherer::METRIC_BUCKET_GATHERER_TAG; } -const std::string &CMetricBucketGatherer::personFieldName() const -{ +const std::string& CMetricBucketGatherer::personFieldName() const { return m_FieldNames[0]; } -const std::string &CMetricBucketGatherer::attributeFieldName() const -{ +const std::string& CMetricBucketGatherer::attributeFieldName() const { return m_DataGatherer.isPopulation() ? m_FieldNames[1] : EMPTY_STRING; } -const std::string &CMetricBucketGatherer::valueFieldName() const -{ +const std::string& CMetricBucketGatherer::valueFieldName() const { return m_ValueFieldName; } -CMetricBucketGatherer::TStrVecCItr CMetricBucketGatherer::beginInfluencers() const -{ +CMetricBucketGatherer::TStrVecCItr CMetricBucketGatherer::beginInfluencers() const { return m_FieldNames.begin() + m_BeginInfluencingFields; } -CMetricBucketGatherer::TStrVecCItr CMetricBucketGatherer::endInfluencers() const -{ +CMetricBucketGatherer::TStrVecCItr CMetricBucketGatherer::endInfluencers() const { return m_FieldNames.begin() + m_BeginValueFields; } -const TStrVec &CMetricBucketGatherer::fieldsOfInterest() const -{ +const TStrVec& CMetricBucketGatherer::fieldsOfInterest() const { return m_FieldNames; } -std::string CMetricBucketGatherer::description() const -{ - return function_t::name(function_t::function(m_DataGatherer.features())) - + (m_ValueFieldName.empty() ? "" : " ") + m_ValueFieldName + - + (byField(m_DataGatherer.isPopulation(), m_FieldNames).empty() ? "" : " by ") - + byField(m_DataGatherer.isPopulation(), m_FieldNames) - + (overField(m_DataGatherer.isPopulation(), m_FieldNames).empty() ? "" : " over ") - + overField(m_DataGatherer.isPopulation(), m_FieldNames) - + (m_DataGatherer.partitionFieldName().empty() ? "" : " partition=") - + m_DataGatherer.partitionFieldName(); +std::string CMetricBucketGatherer::description() const { + return function_t::name(function_t::function(m_DataGatherer.features())) + (m_ValueFieldName.empty() ? "" : " ") + m_ValueFieldName + + +(byField(m_DataGatherer.isPopulation(), m_FieldNames).empty() ? "" : " by ") + + byField(m_DataGatherer.isPopulation(), m_FieldNames) + + (overField(m_DataGatherer.isPopulation(), m_FieldNames).empty() ? "" : " over ") + + overField(m_DataGatherer.isPopulation(), m_FieldNames) + (m_DataGatherer.partitionFieldName().empty() ? "" : " partition=") + + m_DataGatherer.partitionFieldName(); } -bool CMetricBucketGatherer::processFields(const TStrCPtrVec &fieldValues, - CEventData &result, - CResourceMonitor &resourceMonitor) -{ +bool CMetricBucketGatherer::processFields(const TStrCPtrVec& fieldValues, CEventData& result, CResourceMonitor& resourceMonitor) { using TOptionalStr = boost::optional; - if (fieldValues.size() != m_FieldNames.size()) - { - LOG_ERROR("Unexpected field values: " - << core::CContainerPrinter::print(fieldValues) - << ", for field names: " - << core::CContainerPrinter::print(m_FieldNames)); + if (fieldValues.size() != m_FieldNames.size()) { + LOG_ERROR("Unexpected field values: " << core::CContainerPrinter::print(fieldValues) + << ", for field names: " << core::CContainerPrinter::print(m_FieldNames)); return false; } - const std::string *person = (fieldValues[0] == 0 && m_DataGatherer.useNull()) ? - &EMPTY_STRING : - fieldValues[0]; - if (person == 0) - { + const std::string* person = (fieldValues[0] == 0 && m_DataGatherer.useNull()) ? &EMPTY_STRING : fieldValues[0]; + if (person == 0) { // Just ignore: the "person" field wasn't present in the // record. Since all models in an aggregate share this // field we can't process this record further. Note that @@ -1267,118 +1061,85 @@ bool CMetricBucketGatherer::processFields(const TStrCPtrVec &fieldValues, // models so we don't return false. std::size_t i = m_BeginInfluencingFields; - for (/**/; i < m_BeginValueFields; ++i) - { + for (/**/; i < m_BeginValueFields; ++i) { result.addInfluence(fieldValues[i] ? TOptionalStr(*fieldValues[i]) : TOptionalStr()); } - if (m_DataGatherer.summaryMode() != model_t::E_None) - { + if (m_DataGatherer.summaryMode() != model_t::E_None) { CEventData::TDouble1VecArraySizePr statistics; statistics.first.fill(TDouble1Vec(1, 0.0)); - if (m_DataGatherer.extractCountFromField(m_FieldNames[i], - fieldValues[i], - statistics.second) == false) - { + if (m_DataGatherer.extractCountFromField(m_FieldNames[i], fieldValues[i], statistics.second) == false) { result.addValue(); return true; } ++i; bool allOk = true; - if (m_FieldNames.size() > statistics.first.size() + i) - { - LOG_ERROR("Inconsistency - more statistic field names than allowed " - << m_FieldNames.size() - i << " > " << statistics.first.size()); + if (m_FieldNames.size() > statistics.first.size() + i) { + LOG_ERROR("Inconsistency - more statistic field names than allowed " << m_FieldNames.size() - i << " > " + << statistics.first.size()); allOk = false; } - if (m_FieldNames.size() > m_FieldMetricCategories.size() + i) - { - LOG_ERROR("Inconsistency - more statistic field names than metric categories " - << m_FieldNames.size() - i << " > " << m_FieldMetricCategories.size()); + if (m_FieldNames.size() > m_FieldMetricCategories.size() + i) { + LOG_ERROR("Inconsistency - more statistic field names than metric categories " << m_FieldNames.size() - i << " > " + << m_FieldMetricCategories.size()); allOk = false; } - for (std::size_t j = 0u; allOk && i < m_FieldNames.size(); ++i, ++j) - { + for (std::size_t j = 0u; allOk && i < m_FieldNames.size(); ++i, ++j) { model_t::EMetricCategory category = m_FieldMetricCategories[j]; - if ( fieldValues[i] == 0 - || m_DataGatherer.extractMetricFromField(m_FieldNames[i], - *fieldValues[i], - statistics.first[category]) == false) - { + if (fieldValues[i] == 0 || + m_DataGatherer.extractMetricFromField(m_FieldNames[i], *fieldValues[i], statistics.first[category]) == false) { allOk = false; } } - if (allOk) - { - if (statistics.second == CDataGatherer::EXPLICIT_NULL_SUMMARY_COUNT) - { + if (allOk) { + if (statistics.second == CDataGatherer::EXPLICIT_NULL_SUMMARY_COUNT) { result.setExplicitNull(); - } - else - { + } else { result.addStatistics(statistics); } - } - else - { + } else { result.addValue(); } - } - else - { + } else { TDouble1Vec value; - if ( fieldValues[i] != 0 - && m_DataGatherer.extractMetricFromField(m_FieldNames[i], *fieldValues[i], value) == true) - { + if (fieldValues[i] != 0 && m_DataGatherer.extractMetricFromField(m_FieldNames[i], *fieldValues[i], value) == true) { result.addValue(value); - } - else - { + } else { result.addValue(); } } bool addedPerson = false; std::size_t pid = CDynamicStringIdRegistry::INVALID_ID; - if (result.isExplicitNull()) - { + if (result.isExplicitNull()) { m_DataGatherer.personId(*person, pid); - } - else - { + } else { pid = m_DataGatherer.addPerson(*person, resourceMonitor, addedPerson); } - if (pid == CDynamicStringIdRegistry::INVALID_ID) - { - if (!result.isExplicitNull()) - { + if (pid == CDynamicStringIdRegistry::INVALID_ID) { + if (!result.isExplicitNull()) { LOG_TRACE("Couldn't create a person, over memory limit"); } return false; } - if (addedPerson) - { - resourceMonitor.addExtraMemory(m_DataGatherer.isPopulation() ? - CDataGatherer::ESTIMATED_MEM_USAGE_PER_OVER_FIELD : CDataGatherer::ESTIMATED_MEM_USAGE_PER_BY_FIELD); - (m_DataGatherer.isPopulation() ? core::CStatistics::stat(stat_t::E_NumberOverFields) : - core::CStatistics::stat(stat_t::E_NumberByFields)).increment(); + if (addedPerson) { + resourceMonitor.addExtraMemory(m_DataGatherer.isPopulation() ? CDataGatherer::ESTIMATED_MEM_USAGE_PER_OVER_FIELD + : CDataGatherer::ESTIMATED_MEM_USAGE_PER_BY_FIELD); + (m_DataGatherer.isPopulation() ? core::CStatistics::stat(stat_t::E_NumberOverFields) + : core::CStatistics::stat(stat_t::E_NumberByFields)) + .increment(); } - if (!result.person(pid)) - { + if (!result.person(pid)) { LOG_ERROR("Bad by field value: " << *person); return false; } - const std::string *attribute = (fieldValues[1] == 0 && m_DataGatherer.useNull()) ? - &EMPTY_STRING : - fieldValues[1]; + const std::string* attribute = (fieldValues[1] == 0 && m_DataGatherer.useNull()) ? &EMPTY_STRING : fieldValues[1]; - if (m_DataGatherer.isPopulation()) - { - if (attribute == 0) - { + if (m_DataGatherer.isPopulation()) { + if (attribute == 0) { // Just ignore: the "by" field wasn't present in the // record. This doesn't necessarily stop us processing // the record by other models so we don't return false. @@ -1392,24 +1153,18 @@ bool CMetricBucketGatherer::processFields(const TStrCPtrVec &fieldValues, bool addedAttribute = false; std::size_t cid = CDynamicStringIdRegistry::INVALID_ID; - if (result.isExplicitNull()) - { + if (result.isExplicitNull()) { m_DataGatherer.attributeId(*attribute, cid); - } - else - { + } else { cid = m_DataGatherer.addAttribute(*attribute, resourceMonitor, addedAttribute); } result.addAttribute(cid); - if (addedAttribute) - { + if (addedAttribute) { resourceMonitor.addExtraMemory(CDataGatherer::ESTIMATED_MEM_USAGE_PER_BY_FIELD); core::CStatistics::stat(stat_t::E_NumberByFields).increment(); } - } - else - { + } else { // Add the unique attribute. result.addAttribute(std::size_t(0)); } @@ -1417,71 +1172,53 @@ bool CMetricBucketGatherer::processFields(const TStrCPtrVec &fieldValues, return true; } -void CMetricBucketGatherer::recyclePeople(const TSizeVec &peopleToRemove) -{ - if (peopleToRemove.empty()) - { +void CMetricBucketGatherer::recyclePeople(const TSizeVec& peopleToRemove) { + if (peopleToRemove.empty()) { return; } - apply(m_FeatureData, boost::bind(SRemovePeople(), _1, _2, - boost::cref(peopleToRemove))); + apply(m_FeatureData, boost::bind(SRemovePeople(), _1, _2, boost::cref(peopleToRemove))); this->CBucketGatherer::recyclePeople(peopleToRemove); } -void CMetricBucketGatherer::removePeople(std::size_t lowestPersonToRemove) -{ - apply(m_FeatureData, boost::bind(SRemovePeople(), _1, _2, - lowestPersonToRemove, - m_DataGatherer.numberPeople())); +void CMetricBucketGatherer::removePeople(std::size_t lowestPersonToRemove) { + apply(m_FeatureData, boost::bind(SRemovePeople(), _1, _2, lowestPersonToRemove, m_DataGatherer.numberPeople())); this->CBucketGatherer::removePeople(lowestPersonToRemove); } -void CMetricBucketGatherer::recycleAttributes(const TSizeVec &attributesToRemove) -{ - if (attributesToRemove.empty()) - { +void CMetricBucketGatherer::recycleAttributes(const TSizeVec& attributesToRemove) { + if (attributesToRemove.empty()) { return; } - if (m_DataGatherer.isPopulation()) - { - apply(m_FeatureData, boost::bind(SRemoveAttributes(), _1, _2, - boost::cref(attributesToRemove))); + if (m_DataGatherer.isPopulation()) { + apply(m_FeatureData, boost::bind(SRemoveAttributes(), _1, _2, boost::cref(attributesToRemove))); } this->CBucketGatherer::recycleAttributes(attributesToRemove); } -void CMetricBucketGatherer::removeAttributes(std::size_t lowestAttributeToRemove) -{ - if (m_DataGatherer.isPopulation()) - { - apply(m_FeatureData, boost::bind(SRemoveAttributes(), _1, _2, - lowestAttributeToRemove, - m_DataGatherer.numberAttributes())); +void CMetricBucketGatherer::removeAttributes(std::size_t lowestAttributeToRemove) { + if (m_DataGatherer.isPopulation()) { + apply(m_FeatureData, boost::bind(SRemoveAttributes(), _1, _2, lowestAttributeToRemove, m_DataGatherer.numberAttributes())); } this->CBucketGatherer::removeAttributes(lowestAttributeToRemove); } -uint64_t CMetricBucketGatherer::checksum() const -{ +uint64_t CMetricBucketGatherer::checksum() const { uint64_t seed = this->CBucketGatherer::checksum(); seed = maths::CChecksum::calculate(seed, m_DataGatherer.params().s_DecayRate); TStrCRefStrCRefPrUInt64Map hashes; - apply(m_FeatureData, boost::bind(SHash(), _1, _2, - boost::cref(*this), - boost::ref(hashes))); + apply(m_FeatureData, boost::bind(SHash(), _1, _2, boost::cref(*this), boost::ref(hashes))); LOG_TRACE("seed = " << seed); LOG_TRACE("hashes = " << core::CContainerPrinter::print(hashes)); return maths::CChecksum::calculate(seed, hashes); } -void CMetricBucketGatherer::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CMetricBucketGatherer::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { registerMemoryCallbacks(); mem->setName("CMetricBucketGatherer"); this->CBucketGatherer::debugMemoryUsage(mem->addChild()); @@ -1491,8 +1228,7 @@ void CMetricBucketGatherer::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr core::CMemoryDebug::dynamicSize("m_FeatureData", m_FeatureData, mem); } -std::size_t CMetricBucketGatherer::memoryUsage() const -{ +std::size_t CMetricBucketGatherer::memoryUsage() const { registerMemoryCallbacks(); std::size_t mem = this->CBucketGatherer::memoryUsage(); mem += core::CMemory::dynamicSize(m_ValueFieldName); @@ -1502,94 +1238,70 @@ std::size_t CMetricBucketGatherer::memoryUsage() const return mem; } -std::size_t CMetricBucketGatherer::staticSize() const -{ +std::size_t CMetricBucketGatherer::staticSize() const { return sizeof(*this); } -void CMetricBucketGatherer::clear() -{ +void CMetricBucketGatherer::clear() { this->CBucketGatherer::clear(); m_FeatureData.clear(); this->initializeFeatureData(); } -bool CMetricBucketGatherer::resetBucket(core_t::TTime bucketStart) -{ - if (this->CBucketGatherer::resetBucket(bucketStart) == false) - { +bool CMetricBucketGatherer::resetBucket(core_t::TTime bucketStart) { + if (this->CBucketGatherer::resetBucket(bucketStart) == false) { return false; } apply(m_FeatureData, boost::bind(SResetBucket(), _1, _2, bucketStart)); return true; } -void CMetricBucketGatherer::releaseMemory(core_t::TTime samplingCutoffTime) -{ +void CMetricBucketGatherer::releaseMemory(core_t::TTime samplingCutoffTime) { apply(m_FeatureData, boost::bind(SReleaseMemory(), _1, _2, samplingCutoffTime)); } -void CMetricBucketGatherer::sample(core_t::TTime time) -{ - if (m_DataGatherer.sampleCounts()) - { - apply(m_FeatureData, boost::bind(SDoSample(), _1, _2, - time, boost::cref(*this), - m_DataGatherer.sampleCounts())); +void CMetricBucketGatherer::sample(core_t::TTime time) { + if (m_DataGatherer.sampleCounts()) { + apply(m_FeatureData, boost::bind(SDoSample(), _1, _2, time, boost::cref(*this), m_DataGatherer.sampleCounts())); } // Merge smallest bucket into longer buckets, if they exist this->CBucketGatherer::sample(time); } -void CMetricBucketGatherer::featureData(core_t::TTime time, core_t::TTime bucketLength, - TFeatureAnyPrVec &result) const -{ +void CMetricBucketGatherer::featureData(core_t::TTime time, core_t::TTime bucketLength, TFeatureAnyPrVec& result) const { result.clear(); - if ( !this->dataAvailable(time) - || time >= this->currentBucketStartTime() + this->bucketLength()) - { + if (!this->dataAvailable(time) || time >= this->currentBucketStartTime() + this->bucketLength()) { LOG_DEBUG("No data available at " << time); return; } - for (std::size_t i = 0u, n = m_DataGatherer.numberFeatures(); i < n; ++i) - { + for (std::size_t i = 0u, n = m_DataGatherer.numberFeatures(); i < n; ++i) { model_t::EFeature feature = m_DataGatherer.feature(i); model_t::EMetricCategory category; - if (model_t::metricCategory(feature, category)) - { + if (model_t::metricCategory(feature, category)) { std::size_t dimension = model_t::dimension(feature); auto begin = m_FeatureData.find({category, dimension}); - if (begin != m_FeatureData.end()) - { + if (begin != m_FeatureData.end()) { auto end = begin; ++end; - apply(begin, end, boost::bind(SExtractFeatureData(), _1, _2, - boost::cref(*this), - feature, time, bucketLength, - boost::ref(result))); - } - else - { + apply( + begin, + end, + boost::bind(SExtractFeatureData(), _1, _2, boost::cref(*this), feature, time, bucketLength, boost::ref(result))); + } else { LOG_ERROR("No data for category " << model_t::print(category)); } - } - else - { + } else { LOG_ERROR("Unexpected feature " << model_t::print(feature)); } } } -void CMetricBucketGatherer::resize(std::size_t pid, std::size_t cid) -{ - if (m_DataGatherer.sampleCounts()) - { +void CMetricBucketGatherer::resize(std::size_t pid, std::size_t cid) { + if (m_DataGatherer.sampleCounts()) { m_DataGatherer.sampleCounts()->resize(m_DataGatherer.isPopulation() ? cid : pid); - } - else - { + } else { LOG_ERROR("Invalid sample counts for gatherer"); } } @@ -1597,11 +1309,10 @@ void CMetricBucketGatherer::resize(std::size_t pid, std::size_t cid) void CMetricBucketGatherer::addValue(std::size_t pid, std::size_t cid, core_t::TTime time, - const CEventData::TDouble1VecArray &values, + const CEventData::TDouble1VecArray& values, std::size_t count, - const CEventData::TOptionalStr &/*stringValue*/, - const TStoredStringPtrVec &influences) -{ + const CEventData::TOptionalStr& /*stringValue*/, + const TStoredStringPtrVec& influences) { // Check that we are correctly sized - a person/attribute might have been added this->resize(pid, cid); @@ -1609,24 +1320,18 @@ void CMetricBucketGatherer::addValue(std::size_t pid, stat.s_Time = time; stat.s_Values = &values; stat.s_Count = static_cast(count); - if (m_DataGatherer.sampleCounts()) - { - stat.s_SampleCount = m_DataGatherer.sampleCounts()->count( - m_DataGatherer.isPopulation() ? cid : pid); - } - else - { + if (m_DataGatherer.sampleCounts()) { + stat.s_SampleCount = m_DataGatherer.sampleCounts()->count(m_DataGatherer.isPopulation() ? cid : pid); + } else { LOG_ERROR("Invalid sample counts for gatherer"); stat.s_SampleCount = 0.0; } stat.s_Influences = &influences; - apply(m_FeatureData, boost::bind(SAddValue(), _1, _2, pid, cid, - boost::cref(*this), boost::ref(stat))); + apply(m_FeatureData, boost::bind(SAddValue(), _1, _2, pid, cid, boost::cref(*this), boost::ref(stat))); } -void CMetricBucketGatherer::startNewBucket(core_t::TTime time, bool skipUpdates) -{ +void CMetricBucketGatherer::startNewBucket(core_t::TTime time, bool skipUpdates) { LOG_TRACE("StartNewBucket, " << time << " @ " << this); using TUInt64Vec = std::vector; using TSizeUInt64VecUMap = boost::unordered_map; @@ -1634,34 +1339,26 @@ void CMetricBucketGatherer::startNewBucket(core_t::TTime time, bool skipUpdates) // Only update the sampleCounts if we are the primary bucket gatherer. // This is the only place where the bucket gatherer needs to know about its // status within the celestial plain, which is a bit ugly... - if (!skipUpdates && time % this->bucketLength() == 0) - { + if (!skipUpdates && time % this->bucketLength() == 0) { core_t::TTime earliestAvailableBucketStartTime = this->earliestBucketStartTime(); - if (this->dataAvailable(earliestAvailableBucketStartTime)) - { + if (this->dataAvailable(earliestAvailableBucketStartTime)) { TSizeUInt64VecUMap counts; - const TSizeSizePrUInt64UMap &counts_ = this->bucketCounts(earliestAvailableBucketStartTime); - for (const auto &count : counts_) - { - if (m_DataGatherer.isPopulation()) - { + const TSizeSizePrUInt64UMap& counts_ = this->bucketCounts(earliestAvailableBucketStartTime); + for (const auto& count : counts_) { + if (m_DataGatherer.isPopulation()) { counts[CDataGatherer::extractAttributeId(count)].push_back(CDataGatherer::extractData(count)); - } - else - { - counts.emplace(CDataGatherer::extractPersonId(count), - TUInt64Vec{0}).first->second[0] += CDataGatherer::extractData(count); + } else { + counts.emplace(CDataGatherer::extractPersonId(count), TUInt64Vec{0}).first->second[0] += + CDataGatherer::extractData(count); } } double alpha = std::exp(-m_DataGatherer.params().s_DecayRate); - for (auto &count : counts) - { + for (auto& count : counts) { std::sort(count.second.begin(), count.second.end()); std::size_t n = count.second.size() / 2; - double median = count.second.size() % 2 == 0 ? - static_cast(count.second[n - 1] + count.second[n]) / 2.0 : - static_cast(count.second[n]); + double median = count.second.size() % 2 == 0 ? static_cast(count.second[n - 1] + count.second[n]) / 2.0 + : static_cast(count.second[n]); m_DataGatherer.sampleCounts()->updateMeanNonZeroBucketCount(count.first, median, alpha); } m_DataGatherer.sampleCounts()->refresh(m_DataGatherer); @@ -1670,44 +1367,33 @@ void CMetricBucketGatherer::startNewBucket(core_t::TTime time, bool skipUpdates) apply(m_FeatureData, boost::bind(SStartNewBucket(), _1, _2, time)); } -void CMetricBucketGatherer::initializeFieldNamesPart1(const std::string &personFieldName, - const std::string &attributeFieldName, - const TStrVec &influenceFieldNames) -{ - switch (m_DataGatherer.summaryMode()) - { +void CMetricBucketGatherer::initializeFieldNamesPart1(const std::string& personFieldName, + const std::string& attributeFieldName, + const TStrVec& influenceFieldNames) { + switch (m_DataGatherer.summaryMode()) { case model_t::E_None: - m_FieldNames.reserve( 2 - + static_cast(m_DataGatherer.isPopulation()) - + influenceFieldNames.size()); + m_FieldNames.reserve(2 + static_cast(m_DataGatherer.isPopulation()) + influenceFieldNames.size()); m_FieldNames.push_back(personFieldName); - if (m_DataGatherer.isPopulation()) m_FieldNames.push_back(attributeFieldName); + if (m_DataGatherer.isPopulation()) + m_FieldNames.push_back(attributeFieldName); m_BeginInfluencingFields = m_FieldNames.size(); - m_FieldNames.insert(m_FieldNames.end(), - influenceFieldNames.begin(), - influenceFieldNames.end()); + m_FieldNames.insert(m_FieldNames.end(), influenceFieldNames.begin(), influenceFieldNames.end()); m_BeginValueFields = m_FieldNames.size(); break; case model_t::E_Manual: - m_FieldNames.reserve( 3 - + static_cast(m_DataGatherer.isPopulation()) - + influenceFieldNames.size()); + m_FieldNames.reserve(3 + static_cast(m_DataGatherer.isPopulation()) + influenceFieldNames.size()); m_FieldNames.push_back(personFieldName); - if (m_DataGatherer.isPopulation()) m_FieldNames.push_back(attributeFieldName); + if (m_DataGatherer.isPopulation()) + m_FieldNames.push_back(attributeFieldName); m_BeginInfluencingFields = m_FieldNames.size(); - m_FieldNames.insert(m_FieldNames.end(), - influenceFieldNames.begin(), - influenceFieldNames.end()); + m_FieldNames.insert(m_FieldNames.end(), influenceFieldNames.begin(), influenceFieldNames.end()); m_BeginValueFields = m_FieldNames.size(); break; }; } -void CMetricBucketGatherer::initializeFieldNamesPart2(const std::string &valueFieldName, - const std::string &summaryCountFieldName) -{ - switch (m_DataGatherer.summaryMode()) - { +void CMetricBucketGatherer::initializeFieldNamesPart2(const std::string& valueFieldName, const std::string& summaryCountFieldName) { + switch (m_DataGatherer.summaryMode()) { case model_t::E_None: m_FieldNames.push_back(valueFieldName); break; @@ -1719,34 +1405,45 @@ void CMetricBucketGatherer::initializeFieldNamesPart2(const std::string &valueFi }; } -void CMetricBucketGatherer::initializeFeatureData() -{ - for (std::size_t i = 0u, n = m_DataGatherer.numberFeatures(); i < n; ++i) - { +void CMetricBucketGatherer::initializeFeatureData() { + for (std::size_t i = 0u, n = m_DataGatherer.numberFeatures(); i < n; ++i) { const model_t::EFeature feature = m_DataGatherer.feature(i); model_t::EMetricCategory category; - if (model_t::metricCategory(feature, category)) - { + if (model_t::metricCategory(feature, category)) { std::size_t dimension = model_t::dimension(feature); - switch (category) - { - case model_t::E_Mean: initializeFeatureDataInstance(dimension, m_FeatureData); break; - case model_t::E_Median: initializeFeatureDataInstance(dimension, m_FeatureData); break; - case model_t::E_Min: initializeFeatureDataInstance(dimension, m_FeatureData); break; - case model_t::E_Max: initializeFeatureDataInstance(dimension, m_FeatureData); break; - case model_t::E_Variance: initializeFeatureDataInstance(dimension, m_FeatureData); break; - case model_t::E_Sum: initializeFeatureDataInstance(dimension, m_FeatureData); break; - case model_t::E_MultivariateMean: initializeFeatureDataInstance(dimension, m_FeatureData); break; - case model_t::E_MultivariateMin: initializeFeatureDataInstance(dimension, m_FeatureData); break; - case model_t::E_MultivariateMax: initializeFeatureDataInstance(dimension, m_FeatureData); break; + switch (category) { + case model_t::E_Mean: + initializeFeatureDataInstance(dimension, m_FeatureData); + break; + case model_t::E_Median: + initializeFeatureDataInstance(dimension, m_FeatureData); + break; + case model_t::E_Min: + initializeFeatureDataInstance(dimension, m_FeatureData); + break; + case model_t::E_Max: + initializeFeatureDataInstance(dimension, m_FeatureData); + break; + case model_t::E_Variance: + initializeFeatureDataInstance(dimension, m_FeatureData); + break; + case model_t::E_Sum: + initializeFeatureDataInstance(dimension, m_FeatureData); + break; + case model_t::E_MultivariateMean: + initializeFeatureDataInstance(dimension, m_FeatureData); + break; + case model_t::E_MultivariateMin: + initializeFeatureDataInstance(dimension, m_FeatureData); + break; + case model_t::E_MultivariateMax: + initializeFeatureDataInstance(dimension, m_FeatureData); + break; } - } - else - { + } else { LOG_ERROR("Unexpected feature = " << model_t::print(m_DataGatherer.feature(i))); } } } - } } diff --git a/lib/model/CMetricModel.cc b/lib/model/CMetricModel.cc index f8579ac38d..4546a67c09 100644 --- a/lib/model/CMetricModel.cc +++ b/lib/model/CMetricModel.cc @@ -29,8 +29,8 @@ #include #include #include -#include #include +#include #include #include @@ -42,13 +42,10 @@ #include #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { -namespace -{ +namespace { using TTime1Vec = core::CSmallVector; using TDouble1Vec = core::CSmallVector; @@ -67,117 +64,96 @@ const maths_t::TWeightStyleVec SAMPLE_WEIGHT_STYLES{maths_t::E_SampleCountWeight maths_t::E_SampleCountVarianceScaleWeight}; const maths_t::TWeightStyleVec PROBABILITY_WEIGHT_STYLES{maths_t::E_SampleSeasonalVarianceScaleWeight, maths_t::E_SampleCountVarianceScaleWeight}; - } -CMetricModel::CMetricModel(const SModelParams ¶ms, - const TDataGathererPtr &dataGatherer, - const TFeatureMathsModelPtrPrVec &newFeatureModels, - const TFeatureMultivariatePriorPtrPrVec &newFeatureCorrelateModelPriors, - const TFeatureCorrelationsPtrPrVec &featureCorrelatesModels, - const TFeatureInfluenceCalculatorCPtrPrVecVec &influenceCalculators) : - CIndividualModel(params, dataGatherer, - newFeatureModels, - newFeatureCorrelateModelPriors, - featureCorrelatesModels, - influenceCalculators), - m_CurrentBucketStats(CAnomalyDetectorModel::TIME_UNSET) -{} - -CMetricModel::CMetricModel(const SModelParams ¶ms, - const TDataGathererPtr &dataGatherer, - const TFeatureMathsModelPtrPrVec &newFeatureModels, - const TFeatureMultivariatePriorPtrPrVec &newFeatureCorrelateModelPriors, - const TFeatureCorrelationsPtrPrVec &featureCorrelatesModels, - const TFeatureInfluenceCalculatorCPtrPrVecVec &influenceCalculators, - core::CStateRestoreTraverser &traverser) : - CIndividualModel(params, dataGatherer, - newFeatureModels, - newFeatureCorrelateModelPriors, - featureCorrelatesModels, - influenceCalculators), - m_CurrentBucketStats(CAnomalyDetectorModel::TIME_UNSET) -{ - traverser.traverseSubLevel(boost::bind(&CMetricModel::acceptRestoreTraverser, - this, _1)); -} - -CMetricModel::CMetricModel(bool isForPersistence, const CMetricModel &other) : - CIndividualModel(isForPersistence, other), - m_CurrentBucketStats(0) // Not needed for persistence so minimally constructed -{ - if (!isForPersistence) - { +CMetricModel::CMetricModel(const SModelParams& params, + const TDataGathererPtr& dataGatherer, + const TFeatureMathsModelPtrPrVec& newFeatureModels, + const TFeatureMultivariatePriorPtrPrVec& newFeatureCorrelateModelPriors, + const TFeatureCorrelationsPtrPrVec& featureCorrelatesModels, + const TFeatureInfluenceCalculatorCPtrPrVecVec& influenceCalculators) + : CIndividualModel(params, + dataGatherer, + newFeatureModels, + newFeatureCorrelateModelPriors, + featureCorrelatesModels, + influenceCalculators), + m_CurrentBucketStats(CAnomalyDetectorModel::TIME_UNSET) { +} + +CMetricModel::CMetricModel(const SModelParams& params, + const TDataGathererPtr& dataGatherer, + const TFeatureMathsModelPtrPrVec& newFeatureModels, + const TFeatureMultivariatePriorPtrPrVec& newFeatureCorrelateModelPriors, + const TFeatureCorrelationsPtrPrVec& featureCorrelatesModels, + const TFeatureInfluenceCalculatorCPtrPrVecVec& influenceCalculators, + core::CStateRestoreTraverser& traverser) + : CIndividualModel(params, + dataGatherer, + newFeatureModels, + newFeatureCorrelateModelPriors, + featureCorrelatesModels, + influenceCalculators), + m_CurrentBucketStats(CAnomalyDetectorModel::TIME_UNSET) { + traverser.traverseSubLevel(boost::bind(&CMetricModel::acceptRestoreTraverser, this, _1)); +} + +CMetricModel::CMetricModel(bool isForPersistence, const CMetricModel& other) + : CIndividualModel(isForPersistence, other), + m_CurrentBucketStats(0) // Not needed for persistence so minimally constructed +{ + if (!isForPersistence) { LOG_ABORT("This constructor only creates clones for persistence"); } } -void CMetricModel::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CMetricModel::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertLevel(INDIVIDUAL_STATE_TAG, boost::bind(&CMetricModel::doAcceptPersistInserter, this, _1)); } -bool CMetricModel::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name = traverser.name(); - if (name == INDIVIDUAL_STATE_TAG) - { - if (traverser.traverseSubLevel(boost::bind(&CMetricModel::doAcceptRestoreTraverser, - this, - _1)) == false) - { +bool CMetricModel::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); + if (name == INDIVIDUAL_STATE_TAG) { + if (traverser.traverseSubLevel(boost::bind(&CMetricModel::doAcceptRestoreTraverser, this, _1)) == false) { // Logging handled already. return false; } } - } - while (traverser.next()); + } while (traverser.next()); return true; } -CAnomalyDetectorModel *CMetricModel::cloneForPersistence() const -{ +CAnomalyDetectorModel* CMetricModel::cloneForPersistence() const { return new CMetricModel(true, *this); } -model_t::EModelType CMetricModel::category() const -{ +model_t::EModelType CMetricModel::category() const { return model_t::E_MetricOnline; } -bool CMetricModel::isEventRate() const -{ +bool CMetricModel::isEventRate() const { return false; } -bool CMetricModel::isMetric() const -{ +bool CMetricModel::isMetric() const { return true; } -void CMetricModel::currentBucketPersonIds(core_t::TTime time, TSizeVec &result) const -{ +void CMetricModel::currentBucketPersonIds(core_t::TTime time, TSizeVec& result) const { this->CIndividualModel::currentBucketPersonIds(time, m_CurrentBucketStats.s_FeatureData, result); } -CMetricModel::TOptionalDouble - CMetricModel::baselineBucketCount(const std::size_t /*pid*/) const -{ +CMetricModel::TOptionalDouble CMetricModel::baselineBucketCount(const std::size_t /*pid*/) const { return TOptionalDouble(); } -CMetricModel::TDouble1Vec CMetricModel::currentBucketValue(model_t::EFeature feature, - std::size_t pid, - std::size_t /*cid*/, - core_t::TTime time) const -{ - const TFeatureData *data = this->featureData(feature, pid, time); - if (data) - { - const TOptionalSample &value = data->s_BucketValue; +CMetricModel::TDouble1Vec +CMetricModel::currentBucketValue(model_t::EFeature feature, std::size_t pid, std::size_t /*cid*/, core_t::TTime time) const { + const TFeatureData* data = this->featureData(feature, pid, time); + if (data) { + const TOptionalSample& value = data->s_BucketValue; return value ? value->value(model_t::dimension(feature)) : TDouble1Vec(); } return TDouble1Vec(); @@ -187,62 +163,47 @@ CMetricModel::TDouble1Vec CMetricModel::baselineBucketMean(model_t::EFeature fea std::size_t pid, std::size_t /*cid*/, model_t::CResultType type, - const TSizeDoublePr1Vec &correlated, - core_t::TTime time) const -{ - const maths::CModel *model{this->model(feature, pid)}; - if (!model) - { + const TSizeDoublePr1Vec& correlated, + core_t::TTime time) const { + const maths::CModel* model{this->model(feature, pid)}; + if (!model) { return TDouble1Vec(); } static const TSizeDoublePr1Vec NO_CORRELATED; TDouble1Vec result(model->predict(time, type.isUnconditional() ? NO_CORRELATED : correlated)); - this->correctBaselineForInterim(feature, pid, type, correlated, - this->currentBucketInterimCorrections(), result); + this->correctBaselineForInterim(feature, pid, type, correlated, this->currentBucketInterimCorrections(), result); TDouble1VecDouble1VecPr support = model_t::support(feature); return maths::CTools::truncate(result, support.first, support.second); } -void CMetricModel::sampleBucketStatistics(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor &resourceMonitor) -{ +void CMetricModel::sampleBucketStatistics(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) { this->createUpdateNewModels(startTime, resourceMonitor); m_CurrentBucketStats.s_InterimCorrections.clear(); - this->CIndividualModel::sampleBucketStatistics(startTime, endTime, - this->personFilter(), - m_CurrentBucketStats.s_FeatureData, - resourceMonitor); + this->CIndividualModel::sampleBucketStatistics( + startTime, endTime, this->personFilter(), m_CurrentBucketStats.s_FeatureData, resourceMonitor); } -void CMetricModel::sample(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor &resourceMonitor) -{ - CDataGatherer &gatherer = this->dataGatherer(); +void CMetricModel::sample(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) { + CDataGatherer& gatherer = this->dataGatherer(); core_t::TTime bucketLength = gatherer.bucketLength(); - if (!gatherer.validateSampleTimes(startTime, endTime)) - { + if (!gatherer.validateSampleTimes(startTime, endTime)) { return; } this->createUpdateNewModels(startTime, resourceMonitor); m_CurrentBucketStats.s_InterimCorrections.clear(); - for (core_t::TTime time = startTime; time < endTime; time += bucketLength) - { + for (core_t::TTime time = startTime; time < endTime; time += bucketLength) { LOG_TRACE("Sampling [" << time << "," << time + bucketLength << ")"); gatherer.sampleNow(time); gatherer.featureData(time, bucketLength, m_CurrentBucketStats.s_FeatureData); - const CIndividualModel::TTimeVec &preSampleLastBucketTimes = this->lastBucketTimes(); + const CIndividualModel::TTimeVec& preSampleLastBucketTimes = this->lastBucketTimes(); CIndividualModel::TSizeTimeUMap lastBucketTimesMap; - for (const auto &featureData : m_CurrentBucketStats.s_FeatureData) - { - for (const auto &data : featureData.second) - { + for (const auto& featureData : m_CurrentBucketStats.s_FeatureData) { + for (const auto& data : featureData.second) { std::size_t pid = data.first; lastBucketTimesMap[pid] = preSampleLastBucketTimes[pid]; } @@ -255,74 +216,64 @@ void CMetricModel::sample(core_t::TTime startTime, maths::CModelAddSamplesParams::TDouble2Vec4VecVec trendWeights; maths::CModelAddSamplesParams::TDouble2Vec4VecVec priorWeights; - for (auto &featureData : m_CurrentBucketStats.s_FeatureData) - { + for (auto& featureData : m_CurrentBucketStats.s_FeatureData) { model_t::EFeature feature = featureData.first; - TSizeFeatureDataPrVec &data = featureData.second; + TSizeFeatureDataPrVec& data = featureData.second; std::size_t dimension = model_t::dimension(feature); LOG_TRACE(model_t::print(feature) << " data = " << core::CContainerPrinter::print(data)); this->applyFilter(model_t::E_XF_By, true, this->personFilter(), data); - for (const auto &data_ : data) - { + for (const auto& data_ : data) { std::size_t pid = data_.first; - const CGathererTools::TSampleVec &samples = data_.second.s_Samples; + const CGathererTools::TSampleVec& samples = data_.second.s_Samples; - maths::CModel *model = this->model(feature, pid); - if (!model) - { + maths::CModel* model = this->model(feature, pid); + if (!model) { LOG_ERROR("Missing model for " << this->personName(pid)); continue; } core_t::TTime sampleTime = model_t::sampleTime(feature, time, bucketLength); - if (this->shouldIgnoreSample(feature, pid, model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID, sampleTime)) - { + if (this->shouldIgnoreSample(feature, pid, model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID, sampleTime)) { model->skipTime(time - lastBucketTimesMap[pid]); continue; } - const TOptionalSample &bucket = data_.second.s_BucketValue; - if (model_t::isSampled(feature) && bucket) - { - values.assign(1, core::make_triple(bucket->time(), - TDouble2Vec(bucket->value(dimension)), - model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID)); + const TOptionalSample& bucket = data_.second.s_BucketValue; + if (model_t::isSampled(feature) && bucket) { + values.assign(1, + core::make_triple( + bucket->time(), TDouble2Vec(bucket->value(dimension)), model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID)); model->addBucketValue(values); } double emptyBucketWeight = this->emptyBucketWeight(feature, pid, time); - if (emptyBucketWeight == 0.0) - { + if (emptyBucketWeight == 0.0) { continue; } double derate = this->derate(pid, sampleTime); double interval = (1.0 + (this->params().s_InitialDecayRateMultiplier - 1.0) * derate) * emptyBucketWeight; - double count = this->params().s_MaximumUpdatesPerBucket > 0.0 && samples.size() > 0 ? - this->params().s_MaximumUpdatesPerBucket / static_cast(samples.size()) : 1.0; - - LOG_TRACE("Bucket = " << gatherer.printCurrentBucket(time) - << ", feature = " << model_t::print(feature) - << ", samples = " << core::CContainerPrinter::print(samples) - << ", isInteger = " << data_.second.s_IsInteger - << ", person = " << this->personName(pid) - << ", count weight = " << count - << ", dimension = " << dimension - << ", empty bucket weight = " << emptyBucketWeight); + double count = this->params().s_MaximumUpdatesPerBucket > 0.0 && samples.size() > 0 + ? this->params().s_MaximumUpdatesPerBucket / static_cast(samples.size()) + : 1.0; + + LOG_TRACE("Bucket = " << gatherer.printCurrentBucket(time) << ", feature = " << model_t::print(feature) << ", samples = " + << core::CContainerPrinter::print(samples) << ", isInteger = " << data_.second.s_IsInteger + << ", person = " << this->personName(pid) << ", count weight = " << count + << ", dimension = " << dimension << ", empty bucket weight = " << emptyBucketWeight); model->params().probabilityBucketEmpty(this->probabilityBucketEmpty(feature, pid)); values.resize(samples.size()); trendWeights.resize(samples.size(), TDouble2Vec4Vec(3)); priorWeights.resize(samples.size(), TDouble2Vec4Vec(3)); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { core_t::TTime ti = samples[i].time(); TDouble2Vec vi(samples[i].value(dimension)); double vs = samples[i].varianceScale(); - values[i] = core::make_triple(model_t::sampleTime(feature, time, bucketLength, ti), - vi, model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID); + values[i] = core::make_triple( + model_t::sampleTime(feature, time, bucketLength, ti), vi, model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID); trendWeights[i][0].assign(dimension, emptyBucketWeight * count * this->learnRate(feature) / vs); trendWeights[i][1] = model->winsorisationWeight(derate, ti, vi); trendWeights[i][2].assign(dimension, vs); @@ -333,14 +284,13 @@ void CMetricModel::sample(core_t::TTime startTime, maths::CModelAddSamplesParams params; params.integer(data_.second.s_IsInteger) - .nonNegative(data_.second.s_IsNonNegative) - .propagationInterval(interval) - .weightStyles(SAMPLE_WEIGHT_STYLES) - .trendWeights(trendWeights) - .priorWeights(priorWeights); - - if (model->addSamples(params, values) == maths::CModel::E_Reset) - { + .nonNegative(data_.second.s_IsNonNegative) + .propagationInterval(interval) + .weightStyles(SAMPLE_WEIGHT_STYLES) + .trendWeights(trendWeights) + .priorWeights(priorWeights); + + if (model->addSamples(params, values) == maths::CModel::E_Reset) { gatherer.resetSampleCount(pid); } } @@ -353,23 +303,20 @@ void CMetricModel::sample(core_t::TTime startTime, bool CMetricModel::computeProbability(const std::size_t pid, core_t::TTime startTime, core_t::TTime endTime, - CPartitioningFields &partitioningFields, + CPartitioningFields& partitioningFields, const std::size_t /*numberAttributeProbabilities*/, - SAnnotatedProbability &result) const -{ + SAnnotatedProbability& result) const { CAnnotatedProbabilityBuilder resultBuilder(result); - const CDataGatherer &gatherer = this->dataGatherer(); + const CDataGatherer& gatherer = this->dataGatherer(); core_t::TTime bucketLength = gatherer.bucketLength(); - if (endTime != startTime + bucketLength) - { + if (endTime != startTime + bucketLength) { LOG_ERROR("Can only compute probability for single bucket"); return false; } - if (pid >= this->firstBucketTimes().size()) - { + if (pid >= this->firstBucketTimes().size()) { // This is not necessarily an error: the person might have been added // only in an out of phase bucket so far LOG_TRACE("No first time for person = " << gatherer.personName(pid)); @@ -380,54 +327,45 @@ bool CMetricModel::computeProbability(const std::size_t pid, pJoint.addAggregator(maths::CJointProbabilityOfLessLikelySamples()); pJoint.addAggregator(maths::CProbabilityOfExtremeSample()); - for (std::size_t i = 0u, n = gatherer.numberFeatures(); i < n; ++i) - { + for (std::size_t i = 0u, n = gatherer.numberFeatures(); i < n; ++i) { model_t::EFeature feature = gatherer.feature(i); - if (model_t::isCategorical(feature)) - { + if (model_t::isCategorical(feature)) { continue; } - const TFeatureData *data = this->featureData(feature, pid, startTime); - if (!data || !data->s_BucketValue) - { + const TFeatureData* data = this->featureData(feature, pid, startTime); + if (!data || !data->s_BucketValue) { continue; } - const TOptionalSample &bucket = data->s_BucketValue; + const TOptionalSample& bucket = data->s_BucketValue; if (this->shouldIgnoreResult(feature, result.s_ResultType, pid, model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID, - model_t::sampleTime(feature, startTime, bucketLength, bucket->time()))) - { + model_t::sampleTime(feature, startTime, bucketLength, bucket->time()))) { continue; } LOG_TRACE("Compute probability for " << data->print()); - if (this->correlates(feature, pid, startTime)) - { + if (this->correlates(feature, pid, startTime)) { CProbabilityAndInfluenceCalculator::SCorrelateParams params(partitioningFields); TStrCRefDouble1VecDouble1VecPrPrVecVecVec influenceValues; this->fill(feature, pid, startTime, result.isInterim(), params, influenceValues); this->addProbabilityAndInfluences(pid, params, influenceValues, pJoint, resultBuilder); - } - else - { + } else { CProbabilityAndInfluenceCalculator::SParams params(partitioningFields); this->fill(feature, pid, startTime, result.isInterim(), params); this->addProbabilityAndInfluences(pid, params, data->s_InfluenceValues, pJoint, resultBuilder); } } - if (pJoint.empty()) - { + if (pJoint.empty()) { LOG_TRACE("No samples in [" << startTime << "," << endTime << ")"); return false; } double p; - if (!pJoint.calculate(p, result.s_Influences)) - { + if (!pJoint.calculate(p, result.s_Influences)) { LOG_ERROR("Failed to compute probability"); return false; } @@ -439,8 +377,7 @@ bool CMetricModel::computeProbability(const std::size_t pid, return true; } -uint64_t CMetricModel::checksum(bool includeCurrentBucketStats) const -{ +uint64_t CMetricModel::checksum(bool includeCurrentBucketStats) const { using TStrCRefUInt64Map = std::map; uint64_t seed = this->CIndividualModel::checksum(includeCurrentBucketStats); @@ -448,15 +385,12 @@ uint64_t CMetricModel::checksum(bool includeCurrentBucketStats) const #define KEY(pid) boost::cref(this->personName(pid)) TStrCRefUInt64Map hashes; - if (includeCurrentBucketStats) - { - const TFeatureSizeFeatureDataPrVecPrVec &featureData = m_CurrentBucketStats.s_FeatureData; - for (std::size_t i = 0u; i < featureData.size(); ++i) - { - for (std::size_t j = 0u; j < featureData[i].second.size(); ++j) - { - uint64_t &hash = hashes[KEY(featureData[i].second[j].first)]; - const TFeatureData &data = featureData[i].second[j].second; + if (includeCurrentBucketStats) { + const TFeatureSizeFeatureDataPrVecPrVec& featureData = m_CurrentBucketStats.s_FeatureData; + for (std::size_t i = 0u; i < featureData.size(); ++i) { + for (std::size_t j = 0u; j < featureData[i].second.size(); ++j) { + uint64_t& hash = hashes[KEY(featureData[i].second[j].first)]; + const TFeatureData& data = featureData[i].second[j].second; hash = maths::CChecksum::calculate(hash, data.s_BucketValue); hash = core::CHashing::hashCombine(hash, static_cast(data.s_IsInteger)); hash = maths::CChecksum::calculate(hash, data.s_Samples); @@ -472,25 +406,19 @@ uint64_t CMetricModel::checksum(bool includeCurrentBucketStats) const return maths::CChecksum::calculate(seed, hashes); } -void CMetricModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CMetricModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CMetricModel"); this->CIndividualModel::debugMemoryUsage(mem->addChild()); - core::CMemoryDebug::dynamicSize("m_CurrentBucketStats.s_PersonCounts", - m_CurrentBucketStats.s_PersonCounts, mem); - core::CMemoryDebug::dynamicSize("m_CurrentBucketStats.s_FeatureData", - m_CurrentBucketStats.s_FeatureData, mem); - core::CMemoryDebug::dynamicSize("m_CurrentBucketStats.s_InterimCorrections", - m_CurrentBucketStats.s_InterimCorrections, mem); + core::CMemoryDebug::dynamicSize("m_CurrentBucketStats.s_PersonCounts", m_CurrentBucketStats.s_PersonCounts, mem); + core::CMemoryDebug::dynamicSize("m_CurrentBucketStats.s_FeatureData", m_CurrentBucketStats.s_FeatureData, mem); + core::CMemoryDebug::dynamicSize("m_CurrentBucketStats.s_InterimCorrections", m_CurrentBucketStats.s_InterimCorrections, mem); } -std::size_t CMetricModel::memoryUsage() const -{ +std::size_t CMetricModel::memoryUsage() const { return this->CIndividualModel::memoryUsage(); } -std::size_t CMetricModel::computeMemoryUsage() const -{ +std::size_t CMetricModel::computeMemoryUsage() const { std::size_t mem = this->CIndividualModel::computeMemoryUsage(); mem += core::CMemory::dynamicSize(m_CurrentBucketStats.s_PersonCounts); mem += core::CMemory::dynamicSize(m_CurrentBucketStats.s_FeatureData); @@ -498,97 +426,74 @@ std::size_t CMetricModel::computeMemoryUsage() const return mem; } -std::size_t CMetricModel::staticSize() const -{ +std::size_t CMetricModel::staticSize() const { return sizeof(*this); } -CMetricModel::CModelDetailsViewPtr CMetricModel::details() const -{ +CMetricModel::CModelDetailsViewPtr CMetricModel::details() const { return CModelDetailsViewPtr(new CMetricModelDetailsView(*this)); } -const CMetricModel::TFeatureData *CMetricModel::featureData(model_t::EFeature feature, - std::size_t pid, - core_t::TTime time) const -{ +const CMetricModel::TFeatureData* CMetricModel::featureData(model_t::EFeature feature, std::size_t pid, core_t::TTime time) const { return this->CIndividualModel::featureData(feature, pid, time, m_CurrentBucketStats.s_FeatureData); } -void CMetricModel::createNewModels(std::size_t n, std::size_t m) -{ +void CMetricModel::createNewModels(std::size_t n, std::size_t m) { this->CIndividualModel::createNewModels(n, m); } -void CMetricModel::updateRecycledModels() -{ +void CMetricModel::updateRecycledModels() { this->CIndividualModel::updateRecycledModels(); } -void CMetricModel::clearPrunedResources(const TSizeVec &people, - const TSizeVec &attributes) -{ - CDataGatherer &gatherer = this->dataGatherer(); +void CMetricModel::clearPrunedResources(const TSizeVec& people, const TSizeVec& attributes) { + CDataGatherer& gatherer = this->dataGatherer(); // Stop collecting for these people and add them to the free list. gatherer.recyclePeople(people); - if (gatherer.dataAvailable(m_CurrentBucketStats.s_StartTime)) - { - gatherer.featureData(m_CurrentBucketStats.s_StartTime, - gatherer.bucketLength(), - m_CurrentBucketStats.s_FeatureData); + if (gatherer.dataAvailable(m_CurrentBucketStats.s_StartTime)) { + gatherer.featureData(m_CurrentBucketStats.s_StartTime, gatherer.bucketLength(), m_CurrentBucketStats.s_FeatureData); } this->CIndividualModel::clearPrunedResources(people, attributes); } -core_t::TTime CMetricModel::currentBucketStartTime() const -{ +core_t::TTime CMetricModel::currentBucketStartTime() const { return m_CurrentBucketStats.s_StartTime; } -void CMetricModel::currentBucketStartTime(core_t::TTime time) -{ +void CMetricModel::currentBucketStartTime(core_t::TTime time) { m_CurrentBucketStats.s_StartTime = time; } -uint64_t CMetricModel::currentBucketTotalCount() const -{ +uint64_t CMetricModel::currentBucketTotalCount() const { return m_CurrentBucketStats.s_TotalCount; } -CIndividualModel::TFeatureSizeSizeTripleDouble1VecUMap &CMetricModel::currentBucketInterimCorrections() const -{ +CIndividualModel::TFeatureSizeSizeTripleDouble1VecUMap& CMetricModel::currentBucketInterimCorrections() const { return m_CurrentBucketStats.s_InterimCorrections; } -const CMetricModel::TSizeUInt64PrVec &CMetricModel::currentBucketPersonCounts() const -{ +const CMetricModel::TSizeUInt64PrVec& CMetricModel::currentBucketPersonCounts() const { return m_CurrentBucketStats.s_PersonCounts; } -CMetricModel::TSizeUInt64PrVec &CMetricModel::currentBucketPersonCounts() -{ +CMetricModel::TSizeUInt64PrVec& CMetricModel::currentBucketPersonCounts() { return m_CurrentBucketStats.s_PersonCounts; } -void CMetricModel::currentBucketTotalCount(uint64_t totalCount) -{ +void CMetricModel::currentBucketTotalCount(uint64_t totalCount) { m_CurrentBucketStats.s_TotalCount = totalCount; } -bool CMetricModel::correlates(model_t::EFeature feature, std::size_t pid, core_t::TTime time) const -{ - if (model_t::dimension(feature) > 1 || !this->params().s_MultivariateByFields) - { +bool CMetricModel::correlates(model_t::EFeature feature, std::size_t pid, core_t::TTime time) const { + if (model_t::dimension(feature) > 1 || !this->params().s_MultivariateByFields) { return false; } - const maths::CModel *model{this->model(feature, pid)}; - for (const auto &correlate : model->correlates()) - { - if (this->featureData(feature, pid == correlate[0] ? correlate[1] : correlate[0], time)) - { + const maths::CModel* model{this->model(feature, pid)}; + for (const auto& correlate : model->correlates()) { + if (this->featureData(feature, pid == correlate[0] ? correlate[1] : correlate[0], time)) { return true; } } @@ -599,12 +504,11 @@ void CMetricModel::fill(model_t::EFeature feature, std::size_t pid, core_t::TTime bucketTime, bool interim, - CProbabilityAndInfluenceCalculator::SParams ¶ms) const -{ + CProbabilityAndInfluenceCalculator::SParams& params) const { std::size_t dimension{model_t::dimension(feature)}; - const TFeatureData *data{this->featureData(feature, pid, bucketTime)}; - const TOptionalSample &bucket{data->s_BucketValue}; - const maths::CModel *model{this->model(feature, pid)}; + const TFeatureData* data{this->featureData(feature, pid, bucketTime)}; + const TOptionalSample& bucket{data->s_BucketValue}; + const maths::CModel* model{this->model(feature, pid)}; core_t::TTime time{model_t::sampleTime(feature, bucketTime, this->bucketLength(), bucket->time())}; TDouble2Vec4Vec weights(2); weights[0] = model->seasonalWeight(maths::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL, time); @@ -616,35 +520,32 @@ void CMetricModel::fill(model_t::EFeature feature, params.s_ElapsedTime = bucketTime - this->firstBucketTimes()[pid]; params.s_Time.assign(1, TTime2Vec{time}); params.s_Value.assign(1, bucket->value()); - if (interim && model_t::requiresInterimResultAdjustment(feature)) - { + if (interim && model_t::requiresInterimResultAdjustment(feature)) { TDouble2Vec mode(params.s_Model->mode(time, PROBABILITY_WEIGHT_STYLES, weights)); - TDouble2Vec correction(this->interimValueCorrector().corrections( - time, this->currentBucketTotalCount(), - mode, bucket->value(dimension))); + TDouble2Vec correction( + this->interimValueCorrector().corrections(time, this->currentBucketTotalCount(), mode, bucket->value(dimension))); params.s_Value[0] += correction; this->currentBucketInterimCorrections().emplace(core::make_triple(feature, pid, pid), correction); } params.s_Count = bucket->count(); params.s_ComputeProbabilityParams.addCalculation(model_t::probabilityCalculation(feature)) - .weightStyles(PROBABILITY_WEIGHT_STYLES) - .addBucketEmpty(TBool2Vec(1, !count || *count == 0)) - .addWeights(weights); + .weightStyles(PROBABILITY_WEIGHT_STYLES) + .addBucketEmpty(TBool2Vec(1, !count || *count == 0)) + .addWeights(weights); } void CMetricModel::fill(model_t::EFeature feature, std::size_t pid, core_t::TTime bucketTime, bool interim, - CProbabilityAndInfluenceCalculator::SCorrelateParams ¶ms, - TStrCRefDouble1VecDouble1VecPrPrVecVecVec &influenceValues) const -{ + CProbabilityAndInfluenceCalculator::SCorrelateParams& params, + TStrCRefDouble1VecDouble1VecPrPrVecVecVec& influenceValues) const { using TStrCRefDouble1VecDoublePrPr = std::pair; - const CDataGatherer &gatherer{this->dataGatherer()}; - const maths::CModel *model{this->model(feature, pid)}; - const TSize2Vec1Vec &correlates{model->correlates()}; - const TTimeVec &firstBucketTimes{this->firstBucketTimes()}; + const CDataGatherer& gatherer{this->dataGatherer()}; + const maths::CModel* model{this->model(feature, pid)}; + const TSize2Vec1Vec& correlates{model->correlates()}; + const TTimeVec& firstBucketTimes{this->firstBucketTimes()}; core_t::TTime bucketLength{gatherer.bucketLength()}; params.s_Feature = feature; @@ -656,8 +557,7 @@ void CMetricModel::fill(model_t::EFeature feature, params.s_Variables.resize(correlates.size()); params.s_CorrelatedLabels.resize(correlates.size()); params.s_Correlated.resize(correlates.size()); - params.s_ComputeProbabilityParams.addCalculation(model_t::probabilityCalculation(feature)) - .weightStyles(PROBABILITY_WEIGHT_STYLES); + params.s_ComputeProbabilityParams.addCalculation(model_t::probabilityCalculation(feature)).weightStyles(PROBABILITY_WEIGHT_STYLES); // These are indexed as follows: // influenceValues["influencer name"]["correlate"]["influence value"] @@ -669,61 +569,50 @@ void CMetricModel::fill(model_t::EFeature feature, // Declared outside the loop to minimize the number of times it is created. TDouble1VecDouble1VecPr value; - for (std::size_t i = 0u; i < correlates.size(); ++i) - { + for (std::size_t i = 0u; i < correlates.size(); ++i) { TSize2Vec variables(pid == correlates[i][0] ? TSize2Vec{0, 1} : TSize2Vec{1, 0}); params.s_CorrelatedLabels[i] = gatherer.personNamePtr(correlates[i][variables[1]]); params.s_Correlated[i] = correlates[i][variables[1]]; params.s_Variables[i] = variables; - const maths::CModel *models[]{model, this->model(feature, correlates[i][variables[1]])}; + const maths::CModel* models[]{model, this->model(feature, correlates[i][variables[1]])}; TDouble2Vec4Vec weight(2, TDouble2Vec(2, 1.0)); weight[0][variables[0]] = models[0]->seasonalWeight(maths::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL, bucketTime)[0]; weight[0][variables[1]] = models[1]->seasonalWeight(maths::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL, bucketTime)[0]; - const TFeatureData *data[2]; + const TFeatureData* data[2]; data[0] = this->featureData(feature, correlates[i][0], bucketTime); data[1] = this->featureData(feature, correlates[i][1], bucketTime); - if (data[0] && data[1] && data[0]->s_BucketValue && data[1]->s_BucketValue) - { - const TOptionalSample &bucket0{data[0]->s_BucketValue}; - const TOptionalSample &bucket1{data[1]->s_BucketValue}; - core_t::TTime times[] = - { - model_t::sampleTime(feature, bucketTime, bucketLength, bucket0->time()), - model_t::sampleTime(feature, bucketTime, bucketLength, bucket1->time()) - }; + if (data[0] && data[1] && data[0]->s_BucketValue && data[1]->s_BucketValue) { + const TOptionalSample& bucket0{data[0]->s_BucketValue}; + const TOptionalSample& bucket1{data[1]->s_BucketValue}; + core_t::TTime times[] = {model_t::sampleTime(feature, bucketTime, bucketLength, bucket0->time()), + model_t::sampleTime(feature, bucketTime, bucketLength, bucket1->time())}; params.s_ElapsedTime = std::min(params.s_ElapsedTime, times[0] - firstBucketTimes[correlates[i][0]]); params.s_ElapsedTime = std::min(params.s_ElapsedTime, times[1] - firstBucketTimes[correlates[i][1]]); params.s_Times[i] = TTime2Vec{times[0], times[1]}; params.s_Values[i].resize(2 * bucket0->value().size()); - for (std::size_t j = 0u; j < bucket0->value().size(); ++j) - { - params.s_Values[i][2*j+0] = bucket0->value()[j]; - params.s_Values[i][2*j+1] = bucket1->value()[j]; + for (std::size_t j = 0u; j < bucket0->value().size(); ++j) { + params.s_Values[i][2 * j + 0] = bucket0->value()[j]; + params.s_Values[i][2 * j + 1] = bucket1->value()[j]; } weight[1][variables[0]] = bucket0->varianceScale(); weight[1][variables[1]] = bucket1->varianceScale(); - for (std::size_t j = 0u; j < data[0]->s_InfluenceValues.size(); ++j) - { - for (const auto &influenceValue : data[0]->s_InfluenceValues[j]) - { + for (std::size_t j = 0u; j < data[0]->s_InfluenceValues.size(); ++j) { + for (const auto& influenceValue : data[0]->s_InfluenceValues[j]) { TStrCRef influence = influenceValue.first; - std::size_t match = static_cast( - std::find_if(data[1]->s_InfluenceValues[j].begin(), - data[1]->s_InfluenceValues[j].end(), - [influence](const TStrCRefDouble1VecDoublePrPr &value_) - { - return value_.first.get() == influence.get(); - }) - data[1]->s_InfluenceValues[j].begin()); - if (match < data[1]->s_InfluenceValues[j].size()) - { - const TDouble1VecDoublePr &value0 = influenceValue.second; - const TDouble1VecDoublePr &value1 = data[1]->s_InfluenceValues[j][match].second; + std::size_t match = static_cast(std::find_if(data[1]->s_InfluenceValues[j].begin(), + data[1]->s_InfluenceValues[j].end(), + [influence](const TStrCRefDouble1VecDoublePrPr& value_) { + return value_.first.get() == influence.get(); + }) - + data[1]->s_InfluenceValues[j].begin()); + if (match < data[1]->s_InfluenceValues[j].size()) { + const TDouble1VecDoublePr& value0 = influenceValue.second; + const TDouble1VecDoublePr& value1 = data[1]->s_InfluenceValues[j][match].second; value.first.resize(2 * value0.first.size()); - for (std::size_t k = 0u; k < value0.first.size(); ++k) - { - value.first[2*k+0] = value0.first[k]; - value.first[2*k+1] = value1.first[k]; + for (std::size_t k = 0u; k < value0.first.size(); ++k) { + value.first[2 * k + 0] = value0.first[k]; + value.first[2 * k + 1] = value1.first[k]; } value.second = TDouble1Vec{value0.second, value1.second}; influenceValues[j][i].emplace_back(influence, value); @@ -734,24 +623,17 @@ void CMetricModel::fill(model_t::EFeature feature, TOptionalUInt64 count[2]; count[0] = this->currentBucketCount(correlates[i][0], bucketTime); count[1] = this->currentBucketCount(correlates[i][1], bucketTime); - params.s_ComputeProbabilityParams.addBucketEmpty(TBool2Vec{!count[0] || *count[0] == 0, - !count[1] || *count[1] == 0}) - .addWeights(weight); + params.s_ComputeProbabilityParams.addBucketEmpty(TBool2Vec{!count[0] || *count[0] == 0, !count[1] || *count[1] == 0}) + .addWeights(weight); } - if (interim && model_t::requiresInterimResultAdjustment(feature)) - { + if (interim && model_t::requiresInterimResultAdjustment(feature)) { core_t::TTime time{bucketTime + bucketLength / 2}; - TDouble2Vec1Vec modes(params.s_Model->correlateModes(time, PROBABILITY_WEIGHT_STYLES, - params.s_ComputeProbabilityParams.weights())); - for (std::size_t i = 0u; i < modes.size(); ++i) - { - if (!params.s_Values.empty()) - { + TDouble2Vec1Vec modes(params.s_Model->correlateModes(time, PROBABILITY_WEIGHT_STYLES, params.s_ComputeProbabilityParams.weights())); + for (std::size_t i = 0u; i < modes.size(); ++i) { + if (!params.s_Values.empty()) { TDouble2Vec value_{params.s_Values[i][0], params.s_Values[i][1]}; - TDouble2Vec correction(this->interimValueCorrector().corrections( - time, this->currentBucketTotalCount(), modes[i], value_)); - for (std::size_t j = 0u; j < 2; ++j) - { + TDouble2Vec correction(this->interimValueCorrector().corrections(time, this->currentBucketTotalCount(), modes[i], value_)); + for (std::size_t j = 0u; j < 2; ++j) { params.s_Values[i][j] += correction[j]; } this->currentBucketInterimCorrections().emplace(core::make_triple(feature, pid, params.s_Correlated[i]), @@ -763,13 +645,8 @@ void CMetricModel::fill(model_t::EFeature feature, ////////// CMetricModel::SBucketStats Implementation ////////// -CMetricModel::SBucketStats::SBucketStats(core_t::TTime startTime) : - s_StartTime(startTime), - s_PersonCounts(), - s_TotalCount(0), - s_FeatureData(), - s_InterimCorrections(1) -{} - +CMetricModel::SBucketStats::SBucketStats(core_t::TTime startTime) + : s_StartTime(startTime), s_PersonCounts(), s_TotalCount(0), s_FeatureData(), s_InterimCorrections(1) { +} } } diff --git a/lib/model/CMetricModelFactory.cc b/lib/model/CMetricModelFactory.cc index c082cfd1c7..6f99dcddab 100644 --- a/lib/model/CMetricModelFactory.cc +++ b/lib/model/CMetricModelFactory.cc @@ -23,42 +23,36 @@ #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { -CMetricModelFactory::CMetricModelFactory(const SModelParams ¶ms, +CMetricModelFactory::CMetricModelFactory(const SModelParams& params, model_t::ESummaryMode summaryMode, - const std::string &summaryCountFieldName) : - CModelFactory(params), - m_Identifier(), - m_SummaryMode(summaryMode), - m_SummaryCountFieldName(summaryCountFieldName), - m_UseNull(false), - m_BucketLength(CAnomalyDetectorModelConfig::DEFAULT_BUCKET_LENGTH), - m_BucketResultsDelay(0) -{} - -CMetricModelFactory *CMetricModelFactory::clone() const -{ + const std::string& summaryCountFieldName) + : CModelFactory(params), + m_Identifier(), + m_SummaryMode(summaryMode), + m_SummaryCountFieldName(summaryCountFieldName), + m_UseNull(false), + m_BucketLength(CAnomalyDetectorModelConfig::DEFAULT_BUCKET_LENGTH), + m_BucketResultsDelay(0) { +} + +CMetricModelFactory* CMetricModelFactory::clone() const { return new CMetricModelFactory(*this); } -CAnomalyDetectorModel *CMetricModelFactory::makeModel(const SModelInitializationData &initData) const -{ +CAnomalyDetectorModel* CMetricModelFactory::makeModel(const SModelInitializationData& initData) const { TDataGathererPtr dataGatherer = initData.s_DataGatherer; - if (!dataGatherer) - { + if (!dataGatherer) { LOG_ERROR("NULL data gatherer"); return 0; } - const TFeatureVec &features = dataGatherer->features(); + const TFeatureVec& features = dataGatherer->features(); TFeatureInfluenceCalculatorCPtrPrVecVec influenceCalculators; influenceCalculators.reserve(m_InfluenceFieldNames.size()); - for (const auto &name : m_InfluenceFieldNames) - { + for (const auto& name : m_InfluenceFieldNames) { influenceCalculators.push_back(this->defaultInfluenceCalculators(name, features)); } @@ -70,21 +64,18 @@ CAnomalyDetectorModel *CMetricModelFactory::makeModel(const SModelInitialization influenceCalculators); } -CAnomalyDetectorModel *CMetricModelFactory::makeModel(const SModelInitializationData &initData, - core::CStateRestoreTraverser &traverser) const -{ +CAnomalyDetectorModel* CMetricModelFactory::makeModel(const SModelInitializationData& initData, + core::CStateRestoreTraverser& traverser) const { TDataGathererPtr dataGatherer = initData.s_DataGatherer; - if (!dataGatherer) - { + if (!dataGatherer) { LOG_ERROR("NULL data gatherer"); return 0; } - const TFeatureVec &features = dataGatherer->features(); + const TFeatureVec& features = dataGatherer->features(); TFeatureInfluenceCalculatorCPtrPrVecVec influenceCalculators; influenceCalculators.reserve(m_InfluenceFieldNames.size()); - for (const auto &name : m_InfluenceFieldNames) - { + for (const auto& name : m_InfluenceFieldNames) { influenceCalculators.push_back(this->defaultInfluenceCalculators(name, features)); } @@ -97,8 +88,7 @@ CAnomalyDetectorModel *CMetricModelFactory::makeModel(const SModelInitialization traverser); } -CDataGatherer *CMetricModelFactory::makeDataGatherer(const SGathererInitializationData &initData) const -{ +CDataGatherer* CMetricModelFactory::makeDataGatherer(const SGathererInitializationData& initData) const { return new CDataGatherer(model_t::E_Metric, m_SummaryMode, this->modelParams(), @@ -116,9 +106,8 @@ CDataGatherer *CMetricModelFactory::makeDataGatherer(const SGathererInitializati initData.s_SampleOverrideCount); } -CDataGatherer *CMetricModelFactory::makeDataGatherer(const std::string &partitionFieldValue, - core::CStateRestoreTraverser &traverser) const -{ +CDataGatherer* CMetricModelFactory::makeDataGatherer(const std::string& partitionFieldValue, + core::CStateRestoreTraverser& traverser) const { return new CDataGatherer(model_t::E_Metric, m_SummaryMode, this->modelParams(), @@ -134,21 +123,16 @@ CDataGatherer *CMetricModelFactory::makeDataGatherer(const std::string &partitio traverser); } -CMetricModelFactory::TPriorPtr - CMetricModelFactory::defaultPrior(model_t::EFeature feature, - const SModelParams ¶ms) const -{ +CMetricModelFactory::TPriorPtr CMetricModelFactory::defaultPrior(model_t::EFeature feature, const SModelParams& params) const { // Categorical data all use the multinomial prior. The creation // of these priors is managed by defaultCategoricalPrior. - if (model_t::isCategorical(feature)) - { + if (model_t::isCategorical(feature)) { return TPriorPtr(); } // If the feature data only ever takes a single value we use a // special lightweight prior. - if (model_t::isConstant(feature)) - { + if (model_t::isConstant(feature)) { return boost::make_shared(); } @@ -165,14 +149,12 @@ CMetricModelFactory::TPriorPtr maths_t::EDataType dataType = this->dataType(); - maths::CGammaRateConjugate gammaPrior = - maths::CGammaRateConjugate::nonInformativePrior(dataType, 0.0, params.s_DecayRate); + maths::CGammaRateConjugate gammaPrior = maths::CGammaRateConjugate::nonInformativePrior(dataType, 0.0, params.s_DecayRate); maths::CLogNormalMeanPrecConjugate logNormalPrior = - maths::CLogNormalMeanPrecConjugate::nonInformativePrior(dataType, 0.0, params.s_DecayRate); + maths::CLogNormalMeanPrecConjugate::nonInformativePrior(dataType, 0.0, params.s_DecayRate); - maths::CNormalMeanPrecConjugate normalPrior = - maths::CNormalMeanPrecConjugate::nonInformativePrior(dataType, params.s_DecayRate); + maths::CNormalMeanPrecConjugate normalPrior = maths::CNormalMeanPrecConjugate::nonInformativePrior(dataType, params.s_DecayRate); // Create the component priors. TPriorPtrVec priors; @@ -180,8 +162,7 @@ CMetricModelFactory::TPriorPtr priors.emplace_back(gammaPrior.clone()); priors.emplace_back(logNormalPrior.clone()); priors.emplace_back(normalPrior.clone()); - if (params.s_MinimumModeFraction <= 0.5) - { + if (params.s_MinimumModeFraction <= 0.5) { // Create the multimode prior. TPriorPtrVec modePriors; modePriors.reserve(3u); @@ -203,15 +184,12 @@ CMetricModelFactory::TPriorPtr return boost::make_shared(priors, dataType, params.s_DecayRate); } -CMetricModelFactory::TMultivariatePriorPtr - CMetricModelFactory::defaultMultivariatePrior(model_t::EFeature feature, - const SModelParams ¶ms) const -{ +CMetricModelFactory::TMultivariatePriorPtr CMetricModelFactory::defaultMultivariatePrior(model_t::EFeature feature, + const SModelParams& params) const { std::size_t dimension = model_t::dimension(feature); // Gaussian mixture for modeling (latitude, longitude). - if (model_t::isLatLong(feature)) - { + if (model_t::isLatLong(feature)) { return this->latLongPrior(params); } @@ -219,33 +197,27 @@ CMetricModelFactory::TMultivariatePriorPtr priors.reserve(params.s_MinimumModeFraction <= 0.5 ? 2u : 1u); TMultivariatePriorPtr multivariateNormal = this->multivariateNormalPrior(dimension, params); priors.push_back(multivariateNormal); - if (params.s_MinimumModeFraction <= 0.5) - { + if (params.s_MinimumModeFraction <= 0.5) { priors.push_back(this->multivariateMultimodalPrior(dimension, params, *multivariateNormal)); } return this->multivariateOneOfNPrior(dimension, params, priors); } -CMetricModelFactory::TMultivariatePriorPtr - CMetricModelFactory::defaultCorrelatePrior(model_t::EFeature /*feature*/, - const SModelParams ¶ms) const -{ +CMetricModelFactory::TMultivariatePriorPtr CMetricModelFactory::defaultCorrelatePrior(model_t::EFeature /*feature*/, + const SModelParams& params) const { TMultivariatePriorPtrVec priors; priors.reserve(params.s_MinimumModeFraction <= 0.5 ? 2u : 1u); TMultivariatePriorPtr multivariateNormal = this->multivariateNormalPrior(2, params); priors.push_back(multivariateNormal); - if (params.s_MinimumModeFraction <= 0.5) - { + if (params.s_MinimumModeFraction <= 0.5) { priors.push_back(this->multivariateMultimodalPrior(2, params, *multivariateNormal)); } return this->multivariateOneOfNPrior(2, params, priors); } -const CSearchKey &CMetricModelFactory::searchKey() const -{ - if (!m_SearchKeyCache) - { +const CSearchKey& CMetricModelFactory::searchKey() const { + if (!m_SearchKeyCache) { m_SearchKeyCache.reset(CSearchKey(m_Identifier, function_t::function(m_Features), m_UseNull, @@ -259,33 +231,28 @@ const CSearchKey &CMetricModelFactory::searchKey() const return *m_SearchKeyCache; } -bool CMetricModelFactory::isSimpleCount() const -{ +bool CMetricModelFactory::isSimpleCount() const { return false; } -model_t::ESummaryMode CMetricModelFactory::summaryMode() const -{ +model_t::ESummaryMode CMetricModelFactory::summaryMode() const { return m_SummaryMode; } -maths_t::EDataType CMetricModelFactory::dataType() const -{ +maths_t::EDataType CMetricModelFactory::dataType() const { return maths_t::E_ContinuousData; } -void CMetricModelFactory::identifier(int identifier) -{ +void CMetricModelFactory::identifier(int identifier) { m_Identifier = identifier; m_SearchKeyCache.reset(); } -void CMetricModelFactory::fieldNames(const std::string &partitionFieldName, - const std::string &/*overFieldName*/, - const std::string &byFieldName, - const std::string &valueFieldName, - const TStrVec &influenceFieldNames) -{ +void CMetricModelFactory::fieldNames(const std::string& partitionFieldName, + const std::string& /*overFieldName*/, + const std::string& byFieldName, + const std::string& valueFieldName, + const TStrVec& influenceFieldNames) { m_PartitionFieldName = partitionFieldName; m_PersonFieldName = byFieldName; m_ValueFieldName = valueFieldName; @@ -293,42 +260,34 @@ void CMetricModelFactory::fieldNames(const std::string &partitionFieldName, m_SearchKeyCache.reset(); } -void CMetricModelFactory::useNull(bool useNull) -{ +void CMetricModelFactory::useNull(bool useNull) { m_UseNull = useNull; m_SearchKeyCache.reset(); } -void CMetricModelFactory::features(const TFeatureVec &features) -{ +void CMetricModelFactory::features(const TFeatureVec& features) { m_Features = features; m_SearchKeyCache.reset(); } -void CMetricModelFactory::bucketLength(core_t::TTime bucketLength) -{ +void CMetricModelFactory::bucketLength(core_t::TTime bucketLength) { m_BucketLength = bucketLength; } -void CMetricModelFactory::bucketResultsDelay(std::size_t bucketResultsDelay) -{ +void CMetricModelFactory::bucketResultsDelay(std::size_t bucketResultsDelay) { m_BucketResultsDelay = bucketResultsDelay; } -CMetricModelFactory::TStrCRefVec CMetricModelFactory::partitioningFields() const -{ +CMetricModelFactory::TStrCRefVec CMetricModelFactory::partitioningFields() const { TStrCRefVec result; result.reserve(2); - if (!m_PartitionFieldName.empty()) - { + if (!m_PartitionFieldName.empty()) { result.emplace_back(m_PartitionFieldName); } - if (!m_PersonFieldName.empty()) - { + if (!m_PersonFieldName.empty()) { result.emplace_back(m_PersonFieldName); } return result; } - } } diff --git a/lib/model/CMetricPopulationModel.cc b/lib/model/CMetricPopulationModel.cc index aae4a9193f..4375d800ba 100644 --- a/lib/model/CMetricPopulationModel.cc +++ b/lib/model/CMetricPopulationModel.cc @@ -13,8 +13,8 @@ #include #include -#include #include +#include #include #include #include @@ -39,13 +39,10 @@ #include #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { -namespace -{ +namespace { using TDouble2Vec = core::CSmallVector; using TDouble2Vec1Vec = core::CSmallVector; @@ -60,8 +57,7 @@ using TFeatureSizeSizePrFeatureDataPrVecPrVec = std::vector; //! \brief The values and weights for an attribute. -struct SValuesAndWeights -{ +struct SValuesAndWeights { SValuesAndWeights() : s_IsInteger(false), s_IsNonNegative(false) {} bool s_IsInteger, s_IsNonNegative; maths::CModel::TTimeDouble2VecSizeTrVec s_BucketValues; @@ -85,145 +81,112 @@ const maths_t::TWeightStyleVec PROBABILITY_WEIGHT_STYLES{maths_t::E_SampleSeason } // unnamed:: -CMetricPopulationModel::CMetricPopulationModel(const SModelParams ¶ms, - const TDataGathererPtr &dataGatherer, - const TFeatureMathsModelPtrPrVec &newFeatureModels, - const TFeatureMultivariatePriorPtrPrVec &newFeatureCorrelateModelPriors, - const TFeatureCorrelationsPtrPrVec &featureCorrelatesModels, - const TFeatureInfluenceCalculatorCPtrPrVecVec &influenceCalculators) : - CPopulationModel(params, dataGatherer, influenceCalculators), - m_CurrentBucketStats( dataGatherer->currentBucketStartTime() - - dataGatherer->bucketLength()), - m_Probabilities(0.05) -{ +CMetricPopulationModel::CMetricPopulationModel(const SModelParams& params, + const TDataGathererPtr& dataGatherer, + const TFeatureMathsModelPtrPrVec& newFeatureModels, + const TFeatureMultivariatePriorPtrPrVec& newFeatureCorrelateModelPriors, + const TFeatureCorrelationsPtrPrVec& featureCorrelatesModels, + const TFeatureInfluenceCalculatorCPtrPrVecVec& influenceCalculators) + : CPopulationModel(params, dataGatherer, influenceCalculators), + m_CurrentBucketStats(dataGatherer->currentBucketStartTime() - dataGatherer->bucketLength()), + m_Probabilities(0.05) { this->initialize(newFeatureModels, newFeatureCorrelateModelPriors, featureCorrelatesModels); } -CMetricPopulationModel::CMetricPopulationModel(const SModelParams ¶ms, - const TDataGathererPtr &dataGatherer, - const TFeatureMathsModelPtrPrVec &newFeatureModels, - const TFeatureMultivariatePriorPtrPrVec &newFeatureCorrelateModelPriors, - const TFeatureCorrelationsPtrPrVec &featureCorrelatesModels, - const TFeatureInfluenceCalculatorCPtrPrVecVec &influenceCalculators, - core::CStateRestoreTraverser &traverser) : - CPopulationModel(params, dataGatherer, influenceCalculators), - m_CurrentBucketStats( dataGatherer->currentBucketStartTime() - - dataGatherer->bucketLength()), - m_Probabilities(0.05) -{ +CMetricPopulationModel::CMetricPopulationModel(const SModelParams& params, + const TDataGathererPtr& dataGatherer, + const TFeatureMathsModelPtrPrVec& newFeatureModels, + const TFeatureMultivariatePriorPtrPrVec& newFeatureCorrelateModelPriors, + const TFeatureCorrelationsPtrPrVec& featureCorrelatesModels, + const TFeatureInfluenceCalculatorCPtrPrVecVec& influenceCalculators, + core::CStateRestoreTraverser& traverser) + : CPopulationModel(params, dataGatherer, influenceCalculators), + m_CurrentBucketStats(dataGatherer->currentBucketStartTime() - dataGatherer->bucketLength()), + m_Probabilities(0.05) { this->initialize(newFeatureModels, newFeatureCorrelateModelPriors, featureCorrelatesModels); - traverser.traverseSubLevel(boost::bind(&CMetricPopulationModel::acceptRestoreTraverser, - this, _1)); + traverser.traverseSubLevel(boost::bind(&CMetricPopulationModel::acceptRestoreTraverser, this, _1)); } -void CMetricPopulationModel::initialize(const TFeatureMathsModelPtrPrVec &newFeatureModels, - const TFeatureMultivariatePriorPtrPrVec &newFeatureCorrelateModelPriors, - const TFeatureCorrelationsPtrPrVec &featureCorrelatesModels) -{ +void CMetricPopulationModel::initialize(const TFeatureMathsModelPtrPrVec& newFeatureModels, + const TFeatureMultivariatePriorPtrPrVec& newFeatureCorrelateModelPriors, + const TFeatureCorrelationsPtrPrVec& featureCorrelatesModels) { m_FeatureModels.reserve(newFeatureModels.size()); - for (const auto &model : newFeatureModels) - { + for (const auto& model : newFeatureModels) { m_FeatureModels.emplace_back(model.first, model.second); } - std::sort(m_FeatureModels.begin(), m_FeatureModels.end(), - [](const SFeatureModels &lhs, - const SFeatureModels &rhs) - { return lhs.s_Feature < rhs.s_Feature; } ); + std::sort(m_FeatureModels.begin(), m_FeatureModels.end(), [](const SFeatureModels& lhs, const SFeatureModels& rhs) { + return lhs.s_Feature < rhs.s_Feature; + }); - if (this->params().s_MultivariateByFields) - { + if (this->params().s_MultivariateByFields) { m_FeatureCorrelatesModels.reserve(featureCorrelatesModels.size()); - for (std::size_t i = 0u; i < featureCorrelatesModels.size(); ++i) - { - m_FeatureCorrelatesModels.emplace_back(featureCorrelatesModels[i].first, - newFeatureCorrelateModelPriors[i].second, - featureCorrelatesModels[i].second); + for (std::size_t i = 0u; i < featureCorrelatesModels.size(); ++i) { + m_FeatureCorrelatesModels.emplace_back( + featureCorrelatesModels[i].first, newFeatureCorrelateModelPriors[i].second, featureCorrelatesModels[i].second); } - std::sort(m_FeatureCorrelatesModels.begin(), m_FeatureCorrelatesModels.end(), - [](const SFeatureCorrelateModels &lhs, - const SFeatureCorrelateModels &rhs) - { return lhs.s_Feature < rhs.s_Feature; }); + std::sort(m_FeatureCorrelatesModels.begin(), + m_FeatureCorrelatesModels.end(), + [](const SFeatureCorrelateModels& lhs, const SFeatureCorrelateModels& rhs) { return lhs.s_Feature < rhs.s_Feature; }); } } -CMetricPopulationModel::CMetricPopulationModel(bool isForPersistence, - const CMetricPopulationModel &other) : - CPopulationModel(isForPersistence, other), - m_CurrentBucketStats(0), // Not needed for persistence so minimally constructed - m_Probabilities(0.05), // Not needed for persistence so minimally construct - m_MemoryEstimator(other.m_MemoryEstimator) -{ - if (!isForPersistence) - { +CMetricPopulationModel::CMetricPopulationModel(bool isForPersistence, const CMetricPopulationModel& other) + : CPopulationModel(isForPersistence, other), + m_CurrentBucketStats(0), // Not needed for persistence so minimally constructed + m_Probabilities(0.05), // Not needed for persistence so minimally construct + m_MemoryEstimator(other.m_MemoryEstimator) { + if (!isForPersistence) { LOG_ABORT("This constructor only creates clones for persistence"); } m_FeatureModels.reserve(m_FeatureModels.size()); - for (const auto &feature : other.m_FeatureModels) - { + for (const auto& feature : other.m_FeatureModels) { m_FeatureModels.emplace_back(feature.s_Feature, feature.s_NewModel); m_FeatureModels.back().s_Models.reserve(feature.s_Models.size()); - for (const auto &model : feature.s_Models) - { + for (const auto& model : feature.s_Models) { m_FeatureModels.back().s_Models.emplace_back(model->cloneForPersistence()); } } m_FeatureCorrelatesModels.reserve(other.m_FeatureCorrelatesModels.size()); - for (const auto &feature : other.m_FeatureCorrelatesModels) - { - m_FeatureCorrelatesModels.emplace_back(feature.s_Feature, feature.s_ModelPrior, - TCorrelationsPtr(feature.s_Models->cloneForPersistence())); + for (const auto& feature : other.m_FeatureCorrelatesModels) { + m_FeatureCorrelatesModels.emplace_back( + feature.s_Feature, feature.s_ModelPrior, TCorrelationsPtr(feature.s_Models->cloneForPersistence())); } } -void CMetricPopulationModel::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ - inserter.insertLevel(POPULATION_STATE_TAG, - boost::bind(&CMetricPopulationModel::doAcceptPersistInserter, this, _1)); - for (const auto &feature : m_FeatureModels) - { - inserter.insertLevel(FEATURE_MODELS_TAG, - boost::bind(&SFeatureModels::acceptPersistInserter, &feature, _1)); +void CMetricPopulationModel::acceptPersistInserter(core::CStatePersistInserter& inserter) const { + inserter.insertLevel(POPULATION_STATE_TAG, boost::bind(&CMetricPopulationModel::doAcceptPersistInserter, this, _1)); + for (const auto& feature : m_FeatureModels) { + inserter.insertLevel(FEATURE_MODELS_TAG, boost::bind(&SFeatureModels::acceptPersistInserter, &feature, _1)); } - for (const auto &feature : m_FeatureCorrelatesModels) - { - inserter.insertLevel(FEATURE_CORRELATE_MODELS_TAG, - boost::bind(&SFeatureCorrelateModels::acceptPersistInserter, &feature, _1)); + for (const auto& feature : m_FeatureCorrelatesModels) { + inserter.insertLevel(FEATURE_CORRELATE_MODELS_TAG, boost::bind(&SFeatureCorrelateModels::acceptPersistInserter, &feature, _1)); } core::CPersistUtils::persist(MEMORY_ESTIMATOR_TAG, m_MemoryEstimator, inserter); } -bool CMetricPopulationModel::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ +bool CMetricPopulationModel::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { std::size_t i = 0u, j = 0u; - do - { - const std::string &name = traverser.name(); - RESTORE(POPULATION_STATE_TAG, - traverser.traverseSubLevel(boost::bind(&CMetricPopulationModel::doAcceptRestoreTraverser, - this, _1))) + do { + const std::string& name = traverser.name(); + RESTORE(POPULATION_STATE_TAG, traverser.traverseSubLevel(boost::bind(&CMetricPopulationModel::doAcceptRestoreTraverser, this, _1))) RESTORE(FEATURE_MODELS_TAG, - i == m_FeatureModels.size() - || traverser.traverseSubLevel(boost::bind(&SFeatureModels::acceptRestoreTraverser, - &m_FeatureModels[i++], boost::cref(this->params()), _1))) - RESTORE(FEATURE_CORRELATE_MODELS_TAG, - j == m_FeatureCorrelatesModels.size() - || traverser.traverseSubLevel(boost::bind(&SFeatureCorrelateModels::acceptRestoreTraverser, - &m_FeatureCorrelatesModels[j++], boost::cref(this->params()), _1))) - RESTORE(MEMORY_ESTIMATOR_TAG, - core::CPersistUtils::restore(MEMORY_ESTIMATOR_TAG, m_MemoryEstimator, traverser)) - } - while (traverser.next()); - - for (auto &feature : m_FeatureModels) - { - for (auto &model : feature.s_Models) - { - for (const auto &correlates : m_FeatureCorrelatesModels) - { - if (feature.s_Feature == correlates.s_Feature) - { + i == m_FeatureModels.size() || + traverser.traverseSubLevel( + boost::bind(&SFeatureModels::acceptRestoreTraverser, &m_FeatureModels[i++], boost::cref(this->params()), _1))) + RESTORE( + FEATURE_CORRELATE_MODELS_TAG, + j == m_FeatureCorrelatesModels.size() || + traverser.traverseSubLevel(boost::bind( + &SFeatureCorrelateModels::acceptRestoreTraverser, &m_FeatureCorrelatesModels[j++], boost::cref(this->params()), _1))) + RESTORE(MEMORY_ESTIMATOR_TAG, core::CPersistUtils::restore(MEMORY_ESTIMATOR_TAG, m_MemoryEstimator, traverser)) + } while (traverser.next()); + + for (auto& feature : m_FeatureModels) { + for (auto& model : feature.s_Models) { + for (const auto& correlates : m_FeatureCorrelatesModels) { + if (feature.s_Feature == correlates.s_Feature) { model->modelCorrelations(*correlates.s_Models); } } @@ -233,94 +196,74 @@ bool CMetricPopulationModel::acceptRestoreTraverser(core::CStateRestoreTraverser return true; } -CAnomalyDetectorModel *CMetricPopulationModel::cloneForPersistence() const -{ +CAnomalyDetectorModel* CMetricPopulationModel::cloneForPersistence() const { return new CMetricPopulationModel(true, *this); } -model_t::EModelType CMetricPopulationModel::category() const -{ +model_t::EModelType CMetricPopulationModel::category() const { return model_t::E_MetricOnline; } -bool CMetricPopulationModel::isEventRate() const -{ +bool CMetricPopulationModel::isEventRate() const { return false; } -bool CMetricPopulationModel::isMetric() const -{ +bool CMetricPopulationModel::isMetric() const { return true; } CMetricPopulationModel::TDouble1Vec - CMetricPopulationModel::currentBucketValue(model_t::EFeature feature, - std::size_t pid, - std::size_t cid, - core_t::TTime time) const -{ - const TSizeSizePrFeatureDataPrVec &featureData = this->featureData(feature, time); +CMetricPopulationModel::currentBucketValue(model_t::EFeature feature, std::size_t pid, std::size_t cid, core_t::TTime time) const { + const TSizeSizePrFeatureDataPrVec& featureData = this->featureData(feature, time); auto i = find(featureData, pid, cid); return i != featureData.end() ? extractValue(feature, *i) : TDouble1Vec(); } -CMetricPopulationModel::TDouble1Vec - CMetricPopulationModel::baselineBucketMean(model_t::EFeature feature, - std::size_t pid, - std::size_t cid, - model_t::CResultType type, - const TSizeDoublePr1Vec &correlated, - core_t::TTime time) const -{ - const maths::CModel *model{this->model(feature, cid)}; - if (!model) - { +CMetricPopulationModel::TDouble1Vec CMetricPopulationModel::baselineBucketMean(model_t::EFeature feature, + std::size_t pid, + std::size_t cid, + model_t::CResultType type, + const TSizeDoublePr1Vec& correlated, + core_t::TTime time) const { + const maths::CModel* model{this->model(feature, cid)}; + if (!model) { return TDouble1Vec(); } static const TSizeDoublePr1Vec NO_CORRELATED; TDouble1Vec result(model->predict(time, type.isUnconditional() ? NO_CORRELATED : correlated)); - this->correctBaselineForInterim(feature, pid, cid, type, correlated, - this->currentBucketInterimCorrections(), result); + this->correctBaselineForInterim(feature, pid, cid, type, correlated, this->currentBucketInterimCorrections(), result); TDouble1VecDouble1VecPr support = model_t::support(feature); return maths::CTools::truncate(result, support.first, support.second); } -bool CMetricPopulationModel::bucketStatsAvailable(core_t::TTime time) const -{ - return time >= m_CurrentBucketStats.s_StartTime - && time < m_CurrentBucketStats.s_StartTime + this->bucketLength(); +bool CMetricPopulationModel::bucketStatsAvailable(core_t::TTime time) const { + return time >= m_CurrentBucketStats.s_StartTime && time < m_CurrentBucketStats.s_StartTime + this->bucketLength(); } -void CMetricPopulationModel::sampleBucketStatistics(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor &resourceMonitor) -{ - CDataGatherer &gatherer = this->dataGatherer(); +void CMetricPopulationModel::sampleBucketStatistics(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) { + CDataGatherer& gatherer = this->dataGatherer(); core_t::TTime bucketLength = gatherer.bucketLength(); - if (!gatherer.dataAvailable(startTime)) - { + if (!gatherer.dataAvailable(startTime)) { return; } this->createUpdateNewModels(startTime, resourceMonitor); this->currentBucketInterimCorrections().clear(); - for (core_t::TTime time = startTime; time < endTime; time += bucketLength) - { + for (core_t::TTime time = startTime; time < endTime; time += bucketLength) { this->CAnomalyDetectorModel::sampleBucketStatistics(time, time + bucketLength, resourceMonitor); // Currently, we only remember one bucket. m_CurrentBucketStats.s_StartTime = time; - TSizeUInt64PrVec &personCounts = m_CurrentBucketStats.s_PersonCounts; + TSizeUInt64PrVec& personCounts = m_CurrentBucketStats.s_PersonCounts; gatherer.personNonZeroCounts(time, personCounts); this->applyFilter(model_t::E_XF_Over, false, this->personFilter(), personCounts); TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; gatherer.featureData(time, bucketLength, featureData); - for (auto &featureData_ : featureData) - { + for (auto& featureData_ : featureData) { model_t::EFeature feature = featureData_.first; - TSizeSizePrFeatureDataPrVec &data = m_CurrentBucketStats.s_FeatureData[feature]; + TSizeSizePrFeatureDataPrVec& data = m_CurrentBucketStats.s_FeatureData[feature]; data.swap(featureData_.second); LOG_TRACE(model_t::print(feature) << ": " << core::CContainerPrinter::print(data)); this->applyFilters(false, this->personFilter(), this->attributeFilter(), data); @@ -328,22 +271,17 @@ void CMetricPopulationModel::sampleBucketStatistics(core_t::TTime startTime, } } -void CMetricPopulationModel::sample(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor &resourceMonitor) -{ - CDataGatherer &gatherer = this->dataGatherer(); +void CMetricPopulationModel::sample(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) { + CDataGatherer& gatherer = this->dataGatherer(); core_t::TTime bucketLength = gatherer.bucketLength(); - if (!gatherer.validateSampleTimes(startTime, endTime)) - { + if (!gatherer.validateSampleTimes(startTime, endTime)) { return; } this->createUpdateNewModels(startTime, resourceMonitor); this->currentBucketInterimCorrections().clear(); - for (core_t::TTime time = startTime; time < endTime; time += bucketLength) - { + for (core_t::TTime time = startTime; time < endTime; time += bucketLength) { LOG_TRACE("Sampling [" << time << "," << time + bucketLength << ")"); gatherer.sampleNow(time); @@ -351,13 +289,11 @@ void CMetricPopulationModel::sample(core_t::TTime startTime, TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; gatherer.featureData(time, bucketLength, featureData); - const TTimeVec &preSampleAttributeLastBucketTimes = this->attributeLastBucketTimes(); + const TTimeVec& preSampleAttributeLastBucketTimes = this->attributeLastBucketTimes(); TSizeTimeUMap attributeLastBucketTimesMap; - for (const auto &featureData_ : featureData) - { - TSizeSizePrFeatureDataPrVec &data = m_CurrentBucketStats.s_FeatureData[featureData_.first]; - for (const auto &data_ : data) - { + for (const auto& featureData_ : featureData) { + TSizeSizePrFeatureDataPrVec& data = m_CurrentBucketStats.s_FeatureData[featureData_.first]; + for (const auto& data_ : data) { std::size_t cid = CDataGatherer::extractAttributeId(data_); attributeLastBucketTimesMap[cid] = preSampleAttributeLastBucketTimes[cid]; } @@ -367,17 +303,16 @@ void CMetricPopulationModel::sample(core_t::TTime startTime, // Currently, we only remember one bucket. m_CurrentBucketStats.s_StartTime = time; - TSizeUInt64PrVec &personCounts = m_CurrentBucketStats.s_PersonCounts; + TSizeUInt64PrVec& personCounts = m_CurrentBucketStats.s_PersonCounts; gatherer.personNonZeroCounts(time, personCounts); this->applyFilter(model_t::E_XF_Over, true, this->personFilter(), personCounts); - const TTimeVec &attributeLastBucketTimes = this->attributeLastBucketTimes(); + const TTimeVec& attributeLastBucketTimes = this->attributeLastBucketTimes(); - for (auto &featureData_ : featureData) - { + for (auto& featureData_ : featureData) { model_t::EFeature feature = featureData_.first; std::size_t dimension = model_t::dimension(feature); - TSizeSizePrFeatureDataPrVec &data = m_CurrentBucketStats.s_FeatureData[feature]; + TSizeSizePrFeatureDataPrVec& data = m_CurrentBucketStats.s_FeatureData[feature]; data.swap(featureData_.second); LOG_TRACE(model_t::print(feature) << ": " << core::CContainerPrinter::print(data)); this->applyFilters(true, this->personFilter(), this->attributeFilter(), data); @@ -386,47 +321,38 @@ void CMetricPopulationModel::sample(core_t::TTime startTime, TSizeFuzzyDeduplicateUMap fuzzy; // Set up fuzzy de-duplication. - if (data.size() >= this->params().s_MinimumToDeduplicate) - { - for (const auto &data_ : data) - { + if (data.size() >= this->params().s_MinimumToDeduplicate) { + for (const auto& data_ : data) { std::size_t cid = CDataGatherer::extractAttributeId(data_); - const CGathererTools::TSampleVec &samples = CDataGatherer::extractData(data_).s_Samples; - for (const auto &sample : samples) - { + const CGathererTools::TSampleVec& samples = CDataGatherer::extractData(data_).s_Samples; + for (const auto& sample : samples) { fuzzy[cid].add(TDouble2Vec(sample.value(dimension))); } } - for (auto &fuzzy_ : fuzzy) - { - fuzzy_.second.computeEpsilons(bucketLength, - this->params().s_MinimumToDeduplicate); + for (auto& fuzzy_ : fuzzy) { + fuzzy_.second.computeEpsilons(bucketLength, this->params().s_MinimumToDeduplicate); } } - for (const auto &data_ : data) - { + for (const auto& data_ : data) { std::size_t pid = CDataGatherer::extractPersonId(data_); std::size_t cid = CDataGatherer::extractAttributeId(data_); - const TOptionalSample &bucket = CDataGatherer::extractData(data_).s_BucketValue; - const CGathererTools::TSampleVec &samples = CDataGatherer::extractData(data_).s_Samples; + const TOptionalSample& bucket = CDataGatherer::extractData(data_).s_BucketValue; + const CGathererTools::TSampleVec& samples = CDataGatherer::extractData(data_).s_Samples; bool isInteger = CDataGatherer::extractData(data_).s_IsInteger; bool isNonNegative = CDataGatherer::extractData(data_).s_IsNonNegative; core_t::TTime cutoff = attributeLastBucketTimes[cid] - this->params().s_SamplingAgeCutoff; - maths::CModel *model{this->model(feature, cid)}; - if (!model) - { + maths::CModel* model{this->model(feature, cid)}; + if (!model) { LOG_ERROR("Missing model for " << this->attributeName(cid)); continue; } core_t::TTime sampleTime = model_t::sampleTime(feature, time, bucketLength); - if (this->shouldIgnoreSample(feature, pid, cid, sampleTime)) - { + if (this->shouldIgnoreSample(feature, pid, cid, sampleTime)) { core_t::TTime skipTime = sampleTime - attributeLastBucketTimesMap[cid]; - if (skipTime > 0) - { + if (skipTime > 0) { model->skipTime(skipTime); // Update the last time so we don't advance the same model // multiple times (once per person) @@ -435,94 +361,77 @@ void CMetricPopulationModel::sample(core_t::TTime startTime, continue; } - LOG_TRACE("Adding " << CDataGatherer::extractData(data_) - << " for person = " << gatherer.personName(pid) - << " and attribute = " << gatherer.attributeName(cid)); + LOG_TRACE("Adding " << CDataGatherer::extractData(data_) << " for person = " << gatherer.personName(pid) + << " and attribute = " << gatherer.attributeName(cid)); - SValuesAndWeights &attribute = attributes[cid]; + SValuesAndWeights& attribute = attributes[cid]; attribute.s_IsInteger &= isInteger; attribute.s_IsNonNegative &= isNonNegative; - if (model_t::isSampled(feature) && bucket) - { - attribute.s_BucketValues.emplace_back( - bucket->time(), TDouble2Vec(bucket->value(dimension)), pid); + if (model_t::isSampled(feature) && bucket) { + attribute.s_BucketValues.emplace_back(bucket->time(), TDouble2Vec(bucket->value(dimension)), pid); } - std::size_t n = std::count_if(samples.begin(), samples.end(), - [cutoff](const CSample &sample) - { return sample.time() >= cutoff; }); + std::size_t n = + std::count_if(samples.begin(), samples.end(), [cutoff](const CSample& sample) { return sample.time() >= cutoff; }); double updatesPerBucket = this->params().s_MaximumUpdatesPerBucket; - double countWeight = this->sampleRateWeight(pid, cid) - * this->learnRate(feature) - * (updatesPerBucket > 0.0 && n > 0 ? - updatesPerBucket / static_cast(n) : 1.0); + double countWeight = this->sampleRateWeight(pid, cid) * this->learnRate(feature) * + (updatesPerBucket > 0.0 && n > 0 ? updatesPerBucket / static_cast(n) : 1.0); LOG_TRACE("countWeight = " << countWeight); - for (const auto &sample : samples) - { - if (sample.time() < cutoff) - { + for (const auto& sample : samples) { + if (sample.time() < cutoff) { continue; } double vs = sample.varianceScale(); TDouble2Vec value(sample.value(dimension)); - std::size_t duplicate = data.size() >= this->params().s_MinimumToDeduplicate ? - fuzzy[cid].duplicate(sample.time(), value) : - attribute.s_Values.size(); + std::size_t duplicate = data.size() >= this->params().s_MinimumToDeduplicate + ? fuzzy[cid].duplicate(sample.time(), value) + : attribute.s_Values.size(); - if (duplicate < attribute.s_Values.size()) - { + if (duplicate < attribute.s_Values.size()) { std::for_each(attribute.s_TrendWeights[duplicate][0].begin(), attribute.s_TrendWeights[duplicate][0].end(), - [countWeight, vs](double &weight) { weight += countWeight / vs; }); + [countWeight, vs](double& weight) { weight += countWeight / vs; }); std::for_each(attribute.s_PriorWeights[duplicate][0].begin(), attribute.s_PriorWeights[duplicate][0].end(), - [countWeight](double &weight) { weight += countWeight; }); - } - else - { + [countWeight](double& weight) { weight += countWeight; }); + } else { attribute.s_Values.emplace_back(sample.time(), value, pid); - attribute.s_TrendWeights.push_back( - {TDouble2Vec(dimension, countWeight / vs), - model->winsorisationWeight(1.0, sample.time(), value), - TDouble2Vec(dimension, vs)}); - attribute.s_PriorWeights.push_back( - {TDouble2Vec(dimension, countWeight), - model->winsorisationWeight(1.0, sample.time(), value), - TDouble2Vec(dimension, vs)}); + attribute.s_TrendWeights.push_back({TDouble2Vec(dimension, countWeight / vs), + model->winsorisationWeight(1.0, sample.time(), value), + TDouble2Vec(dimension, vs)}); + attribute.s_PriorWeights.push_back({TDouble2Vec(dimension, countWeight), + model->winsorisationWeight(1.0, sample.time(), value), + TDouble2Vec(dimension, vs)}); } } } - for (auto &attribute : attributes) - { + for (auto& attribute : attributes) { std::size_t cid = attribute.first; core_t::TTime latest = boost::numeric::bounds::lowest(); - for (const auto &value : attribute.second.s_Values) - { + for (const auto& value : attribute.second.s_Values) { latest = std::max(latest, value.first); } maths::CModelAddSamplesParams params; params.integer(attribute.second.s_IsInteger) - .nonNegative(attribute.second.s_IsNonNegative) - .propagationInterval(this->propagationTime(cid, latest)) - .weightStyles(SAMPLE_WEIGHT_STYLES) - .trendWeights(attribute.second.s_TrendWeights) - .priorWeights(attribute.second.s_PriorWeights); - - maths::CModel *model{this->model(feature, cid)}; - if (model->addSamples(params, attribute.second.s_Values) == maths::CModel::E_Reset) - { + .nonNegative(attribute.second.s_IsNonNegative) + .propagationInterval(this->propagationTime(cid, latest)) + .weightStyles(SAMPLE_WEIGHT_STYLES) + .trendWeights(attribute.second.s_TrendWeights) + .priorWeights(attribute.second.s_PriorWeights); + + maths::CModel* model{this->model(feature, cid)}; + if (model->addSamples(params, attribute.second.s_Values) == maths::CModel::E_Reset) { gatherer.resetSampleCount(cid); } } } - for (const auto &feature : m_FeatureCorrelatesModels) - { + for (const auto& feature : m_FeatureCorrelatesModels) { feature.s_Models->processSamples(SAMPLE_WEIGHT_STYLES); } @@ -530,19 +439,14 @@ void CMetricPopulationModel::sample(core_t::TTime startTime, } } -void CMetricPopulationModel::prune(std::size_t maximumAge) -{ - CDataGatherer &gatherer = this->dataGatherer(); +void CMetricPopulationModel::prune(std::size_t maximumAge) { + CDataGatherer& gatherer = this->dataGatherer(); TSizeVec peopleToRemove; TSizeVec attributesToRemove; - this->peopleAndAttributesToRemove(m_CurrentBucketStats.s_StartTime, - maximumAge, - peopleToRemove, - attributesToRemove); + this->peopleAndAttributesToRemove(m_CurrentBucketStats.s_StartTime, maximumAge, peopleToRemove, attributesToRemove); - if (peopleToRemove.empty() && attributesToRemove.empty()) - { + if (peopleToRemove.empty() && attributesToRemove.empty()) { return; } std::sort(peopleToRemove.begin(), peopleToRemove.end()); @@ -556,12 +460,10 @@ void CMetricPopulationModel::prune(std::size_t maximumAge) gatherer.recyclePeople(peopleToRemove); gatherer.recycleAttributes(attributesToRemove); - if (gatherer.dataAvailable(m_CurrentBucketStats.s_StartTime)) - { + if (gatherer.dataAvailable(m_CurrentBucketStats.s_StartTime)) { TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; gatherer.featureData(m_CurrentBucketStats.s_StartTime, gatherer.bucketLength(), featureData); - for (auto &feature : featureData) - { + for (auto& feature : featureData) { m_CurrentBucketStats.s_FeatureData[feature.first].swap(feature.second); } } @@ -573,20 +475,17 @@ void CMetricPopulationModel::prune(std::size_t maximumAge) bool CMetricPopulationModel::computeProbability(std::size_t pid, core_t::TTime startTime, core_t::TTime endTime, - CPartitioningFields &partitioningFields, + CPartitioningFields& partitioningFields, std::size_t numberAttributeProbabilities, - SAnnotatedProbability &result) const -{ - const CDataGatherer &gatherer = this->dataGatherer(); + SAnnotatedProbability& result) const { + const CDataGatherer& gatherer = this->dataGatherer(); core_t::TTime bucketLength = gatherer.bucketLength(); - if (endTime != startTime + bucketLength) - { + if (endTime != startTime + bucketLength) { LOG_ERROR("Can only compute probability for single bucket"); return false; } - if (pid > gatherer.numberPeople()) - { + if (pid > gatherer.numberPeople()) { LOG_TRACE("No person for pid = " << pid); return false; } @@ -608,97 +507,87 @@ bool CMetricPopulationModel::computeProbability(std::size_t pid, CProbabilityAndInfluenceCalculator pJoint(this->params().s_InfluenceCutoff); pJoint.addAggregator(maths::CJointProbabilityOfLessLikelySamples()); pJoint.addAggregator(maths::CProbabilityOfExtremeSample()); - if (this->params().s_CacheProbabilities) - { + if (this->params().s_CacheProbabilities) { pJoint.addCache(m_Probabilities); } - for (std::size_t i = 0u; i < gatherer.numberFeatures(); ++i) - { + for (std::size_t i = 0u; i < gatherer.numberFeatures(); ++i) { model_t::EFeature feature = gatherer.feature(i); - if (model_t::isCategorical(feature)) - { + if (model_t::isCategorical(feature)) { continue; } LOG_TRACE("feature = " << model_t::print(feature)); - const TSizeSizePrFeatureDataPrVec &featureData = this->featureData(feature, startTime); + const TSizeSizePrFeatureDataPrVec& featureData = this->featureData(feature, startTime); TSizeSizePr range = personRange(featureData, pid); - for (std::size_t j = range.first; j < range.second; ++j) - { + for (std::size_t j = range.first; j < range.second; ++j) { std::size_t cid = CDataGatherer::extractAttributeId(featureData[j]); partitioningFields.back().second = TStrCRef(gatherer.attributeName(cid)); - const TOptionalSample &bucket = CDataGatherer::extractData(featureData[j]).s_BucketValue; - if (!bucket) - { - LOG_ERROR("Expected a value for feature = " << model_t::print(feature) - << ", person = " << gatherer.personName(pid) - << ", attribute = " << gatherer.attributeName(cid)); + const TOptionalSample& bucket = CDataGatherer::extractData(featureData[j]).s_BucketValue; + if (!bucket) { + LOG_ERROR("Expected a value for feature = " << model_t::print(feature) << ", person = " << gatherer.personName(pid) + << ", attribute = " << gatherer.attributeName(cid)); continue; } - if (this->shouldIgnoreResult(feature, result.s_ResultType, pid, cid, - model_t::sampleTime(feature, startTime, bucketLength, bucket->time()))) - { + if (this->shouldIgnoreResult( + feature, result.s_ResultType, pid, cid, model_t::sampleTime(feature, startTime, bucketLength, bucket->time()))) { continue; } - if (this->correlates(feature, pid, cid, startTime)) - { + if (this->correlates(feature, pid, cid, startTime)) { // TODO - } - else - { + } else { CProbabilityAndInfluenceCalculator::SParams params(partitioningFields); this->fill(feature, pid, cid, startTime, result.isInterim(), params); model_t::CResultType type; TSize1Vec mostAnomalousCorrelate; - if (pJoint.addProbability(feature, cid, *params.s_Model, + if (pJoint.addProbability(feature, + cid, + *params.s_Model, params.s_ElapsedTime, params.s_ComputeProbabilityParams, - params.s_Time, params.s_Value, - params.s_Probability, params.s_Tail, - type, mostAnomalousCorrelate)) - { - LOG_TRACE("P(" << params.describe() - << ", attribute = "<< gatherer.attributeName(cid) - << ", person = " << this->personName(pid) << ") = " << params.s_Probability); - const auto &influenceValues = CDataGatherer::extractData(featureData[j]).s_InfluenceValues; - for (std::size_t k = 0u; k < influenceValues.size(); ++k) - { - if (const CInfluenceCalculator *influenceCalculator = this->influenceCalculator(feature, k)) - { + params.s_Time, + params.s_Value, + params.s_Probability, + params.s_Tail, + type, + mostAnomalousCorrelate)) { + LOG_TRACE("P(" << params.describe() << ", attribute = " << gatherer.attributeName(cid) + << ", person = " << this->personName(pid) << ") = " << params.s_Probability); + const auto& influenceValues = CDataGatherer::extractData(featureData[j]).s_InfluenceValues; + for (std::size_t k = 0u; k < influenceValues.size(); ++k) { + if (const CInfluenceCalculator* influenceCalculator = this->influenceCalculator(feature, k)) { pJoint.plugin(*influenceCalculator); pJoint.addInfluences(*(gatherer.beginInfluencers() + k), influenceValues[k], params); } } - resultBuilder.addAttributeProbability(cid, gatherer.attributeNamePtr(cid), - 1.0, params.s_Probability, + resultBuilder.addAttributeProbability(cid, + gatherer.attributeNamePtr(cid), + 1.0, + params.s_Probability, model_t::CResultType::E_Unconditional, - feature, NO_CORRELATED_ATTRIBUTES, NO_CORRELATES); - } - else - { - LOG_ERROR("Failed to compute P(" << params.describe() - << ", attribute = "<< gatherer.attributeName(cid) - << ", person = " << this->personName(pid) << ")"); + feature, + NO_CORRELATED_ATTRIBUTES, + NO_CORRELATES); + } else { + LOG_ERROR("Failed to compute P(" << params.describe() << ", attribute = " << gatherer.attributeName(cid) + << ", person = " << this->personName(pid) << ")"); } } } } - if (pJoint.empty()) - { + if (pJoint.empty()) { LOG_TRACE("No samples in [" << startTime << "," << endTime << ")"); return false; } double p; - if (!pJoint.calculate(p, result.s_Influences)) - { + if (!pJoint.calculate(p, result.s_Influences)) { LOG_ERROR("Failed to compute probability of " << this->personName(pid)); return false; } @@ -709,76 +598,58 @@ bool CMetricPopulationModel::computeProbability(std::size_t pid, return true; } -bool CMetricPopulationModel::computeTotalProbability(const std::string &/*person*/, +bool CMetricPopulationModel::computeTotalProbability(const std::string& /*person*/, std::size_t /*numberAttributeProbabilities*/, - TOptionalDouble &probability, - TAttributeProbability1Vec &attributeProbabilities) const -{ + TOptionalDouble& probability, + TAttributeProbability1Vec& attributeProbabilities) const { probability = TOptionalDouble(); attributeProbabilities.clear(); return true; } -uint64_t CMetricPopulationModel::checksum(bool includeCurrentBucketStats) const -{ +uint64_t CMetricPopulationModel::checksum(bool includeCurrentBucketStats) const { uint64_t seed = this->CPopulationModel::checksum(includeCurrentBucketStats); - if (includeCurrentBucketStats) - { + if (includeCurrentBucketStats) { seed = maths::CChecksum::calculate(seed, m_CurrentBucketStats.s_StartTime); } using TStrCRefStrCRefPr = std::pair; - using TStrCRefStrCRefPrUInt64Map = - std::map; + using TStrCRefStrCRefPrUInt64Map = std::map; - const CDataGatherer &gatherer = this->dataGatherer(); + const CDataGatherer& gatherer = this->dataGatherer(); TStrCRefStrCRefPrUInt64Map hashes; - for (const auto &feature : m_FeatureModels) - { - for (std::size_t cid = 0u; cid < feature.s_Models.size(); ++cid) - { - if (gatherer.isAttributeActive(cid)) - { - uint64_t &hash = hashes[{boost::cref(EMPTY_STRING), - boost::cref(gatherer.attributeName(cid))}]; + for (const auto& feature : m_FeatureModels) { + for (std::size_t cid = 0u; cid < feature.s_Models.size(); ++cid) { + if (gatherer.isAttributeActive(cid)) { + uint64_t& hash = hashes[{boost::cref(EMPTY_STRING), boost::cref(gatherer.attributeName(cid))}]; hash = maths::CChecksum::calculate(hash, feature.s_Models[cid]); } } } - for (const auto &feature : m_FeatureCorrelatesModels) - { - for (const auto &model : feature.s_Models->correlationModels()) - { + for (const auto& feature : m_FeatureCorrelatesModels) { + for (const auto& model : feature.s_Models->correlationModels()) { std::size_t cids[]{model.first.first, model.first.second}; - if (gatherer.isAttributeActive(cids[0]) && gatherer.isAttributeActive(cids[1])) - { - uint64_t &hash = hashes[{boost::cref(gatherer.attributeName(cids[0])), - boost::cref(gatherer.attributeName(cids[1]))}]; + if (gatherer.isAttributeActive(cids[0]) && gatherer.isAttributeActive(cids[1])) { + uint64_t& hash = hashes[{boost::cref(gatherer.attributeName(cids[0])), boost::cref(gatherer.attributeName(cids[1]))}]; hash = maths::CChecksum::calculate(hash, model.second); } } } - if (includeCurrentBucketStats) - { - for (const auto &personCount : this->personCounts()) - { - uint64_t &hash = hashes[{boost::cref(gatherer.personName(personCount.first)), - boost::cref(EMPTY_STRING)}]; + if (includeCurrentBucketStats) { + for (const auto& personCount : this->personCounts()) { + uint64_t& hash = hashes[{boost::cref(gatherer.personName(personCount.first)), boost::cref(EMPTY_STRING)}]; hash = maths::CChecksum::calculate(hash, personCount.second); } - for (const auto &feature : m_CurrentBucketStats.s_FeatureData) - { - for (const auto &data_ : feature.second) - { + for (const auto& feature : m_CurrentBucketStats.s_FeatureData) { + for (const auto& data_ : feature.second) { std::size_t pid = CDataGatherer::extractPersonId(data_); std::size_t cid = CDataGatherer::extractAttributeId(data_); - const TFeatureData &data = CDataGatherer::extractData(data_); - uint64_t &hash = hashes[{boost::cref(this->personName(pid)), - boost::cref(this->attributeName(cid))}]; + const TFeatureData& data = CDataGatherer::extractData(data_); + uint64_t& hash = hashes[{boost::cref(this->personName(pid)), boost::cref(this->attributeName(cid))}]; hash = maths::CChecksum::calculate(hash, data.s_BucketValue); hash = maths::CChecksum::calculate(hash, data.s_IsInteger); hash = maths::CChecksum::calculate(hash, data.s_Samples); @@ -792,32 +663,26 @@ uint64_t CMetricPopulationModel::checksum(bool includeCurrentBucketStats) const return maths::CChecksum::calculate(seed, hashes); } -void CMetricPopulationModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CMetricPopulationModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CMetricPopulationModel"); this->CPopulationModel::debugMemoryUsage(mem->addChild()); - core::CMemoryDebug::dynamicSize("m_CurrentBucketStats.s_PersonCounts", - m_CurrentBucketStats.s_PersonCounts, mem); - core::CMemoryDebug::dynamicSize("m_CurrentBucketStats.s_FeatureData", - m_CurrentBucketStats.s_FeatureData, mem); - core::CMemoryDebug::dynamicSize("m_CurrentBucketStats.s_InterimCorrections", - m_CurrentBucketStats.s_InterimCorrections, mem); + core::CMemoryDebug::dynamicSize("m_CurrentBucketStats.s_PersonCounts", m_CurrentBucketStats.s_PersonCounts, mem); + core::CMemoryDebug::dynamicSize("m_CurrentBucketStats.s_FeatureData", m_CurrentBucketStats.s_FeatureData, mem); + core::CMemoryDebug::dynamicSize("m_CurrentBucketStats.s_InterimCorrections", m_CurrentBucketStats.s_InterimCorrections, mem); core::CMemoryDebug::dynamicSize("m_FeatureModels", m_FeatureModels, mem); core::CMemoryDebug::dynamicSize("m_FeatureCorrelatesModels", m_FeatureCorrelatesModels, mem); core::CMemoryDebug::dynamicSize("m_MemoryEstimator", m_MemoryEstimator, mem); } -std::size_t CMetricPopulationModel::memoryUsage() const -{ - const CDataGatherer &gatherer = this->dataGatherer(); +std::size_t CMetricPopulationModel::memoryUsage() const { + const CDataGatherer& gatherer = this->dataGatherer(); TOptionalSize estimate = this->estimateMemoryUsage(gatherer.numberActivePeople(), gatherer.numberActiveAttributes(), 0); // # correlations return estimate ? estimate.get() : this->computeMemoryUsage(); } -std::size_t CMetricPopulationModel::computeMemoryUsage() const -{ +std::size_t CMetricPopulationModel::computeMemoryUsage() const { std::size_t mem = this->CPopulationModel::memoryUsage(); mem += core::CMemory::dynamicSize(m_CurrentBucketStats.s_PersonCounts); mem += core::CMemory::dynamicSize(m_CurrentBucketStats.s_FeatureData); @@ -828,82 +693,62 @@ std::size_t CMetricPopulationModel::computeMemoryUsage() const return mem; } -CMemoryUsageEstimator *CMetricPopulationModel::memoryUsageEstimator() const -{ +CMemoryUsageEstimator* CMetricPopulationModel::memoryUsageEstimator() const { return &m_MemoryEstimator; } -std::size_t CMetricPopulationModel::staticSize() const -{ +std::size_t CMetricPopulationModel::staticSize() const { return sizeof(*this); } -CMetricPopulationModel::CModelDetailsViewPtr CMetricPopulationModel::details() const -{ +CMetricPopulationModel::CModelDetailsViewPtr CMetricPopulationModel::details() const { return CModelDetailsViewPtr(new CMetricPopulationModelDetailsView(*this)); } -const TSizeSizePrFeatureDataPrVec & - CMetricPopulationModel::featureData(model_t::EFeature feature, core_t::TTime time) const -{ +const TSizeSizePrFeatureDataPrVec& CMetricPopulationModel::featureData(model_t::EFeature feature, core_t::TTime time) const { static const TSizeSizePrFeatureDataPrVec EMPTY; - if (!this->bucketStatsAvailable(time)) - { - LOG_ERROR("No statistics at " << time - << ", current bucket = [" << m_CurrentBucketStats.s_StartTime - << "," << m_CurrentBucketStats.s_StartTime + this->bucketLength() << ")"); + if (!this->bucketStatsAvailable(time)) { + LOG_ERROR("No statistics at " << time << ", current bucket = [" << m_CurrentBucketStats.s_StartTime << "," + << m_CurrentBucketStats.s_StartTime + this->bucketLength() << ")"); return EMPTY; } auto result = m_CurrentBucketStats.s_FeatureData.find(feature); return result == m_CurrentBucketStats.s_FeatureData.end() ? EMPTY : result->second; } -core_t::TTime CMetricPopulationModel::currentBucketStartTime() const -{ +core_t::TTime CMetricPopulationModel::currentBucketStartTime() const { return m_CurrentBucketStats.s_StartTime; } -void CMetricPopulationModel::currentBucketStartTime(core_t::TTime startTime) -{ +void CMetricPopulationModel::currentBucketStartTime(core_t::TTime startTime) { m_CurrentBucketStats.s_StartTime = startTime; } -uint64_t CMetricPopulationModel::currentBucketTotalCount() const -{ +uint64_t CMetricPopulationModel::currentBucketTotalCount() const { return m_CurrentBucketStats.s_TotalCount; } -void CMetricPopulationModel::currentBucketTotalCount(uint64_t totalCount) -{ +void CMetricPopulationModel::currentBucketTotalCount(uint64_t totalCount) { m_CurrentBucketStats.s_TotalCount = totalCount; } -const CMetricPopulationModel::TSizeUInt64PrVec &CMetricPopulationModel::personCounts() const -{ +const CMetricPopulationModel::TSizeUInt64PrVec& CMetricPopulationModel::personCounts() const { return m_CurrentBucketStats.s_PersonCounts; } -CPopulationModel::TCorrectionKeyDouble1VecUMap & - CMetricPopulationModel::currentBucketInterimCorrections() const -{ +CPopulationModel::TCorrectionKeyDouble1VecUMap& CMetricPopulationModel::currentBucketInterimCorrections() const { return m_CurrentBucketStats.s_InterimCorrections; } -void CMetricPopulationModel::createNewModels(std::size_t n, std::size_t m) -{ - if (m > 0) - { - for (auto &feature : m_FeatureModels) - { +void CMetricPopulationModel::createNewModels(std::size_t n, std::size_t m) { + if (m > 0) { + for (auto& feature : m_FeatureModels) { std::size_t newM = feature.s_Models.size() + m; core::CAllocationStrategy::reserve(feature.s_Models, newM); - for (std::size_t cid = feature.s_Models.size(); cid < newM; ++cid) - { + for (std::size_t cid = feature.s_Models.size(); cid < newM; ++cid) { feature.s_Models.emplace_back(feature.s_NewModel->clone(cid)); - for (const auto &correlates : m_FeatureCorrelatesModels) - { - if (feature.s_Feature == correlates.s_Feature) - { + for (const auto& correlates : m_FeatureCorrelatesModels) { + if (feature.s_Feature == correlates.s_Feature) { feature.s_Models.back()->modelCorrelations(*correlates.s_Models); } } @@ -913,18 +758,13 @@ void CMetricPopulationModel::createNewModels(std::size_t n, std::size_t m) this->CPopulationModel::createNewModels(n, m); } -void CMetricPopulationModel::updateRecycledModels() -{ - CDataGatherer &gatherer = this->dataGatherer(); - for (auto cid : gatherer.recycledAttributeIds()) - { - for (auto &feature : m_FeatureModels) - { +void CMetricPopulationModel::updateRecycledModels() { + CDataGatherer& gatherer = this->dataGatherer(); + for (auto cid : gatherer.recycledAttributeIds()) { + for (auto& feature : m_FeatureModels) { feature.s_Models[cid].reset(feature.s_NewModel->clone(cid)); - for (const auto &correlates : m_FeatureCorrelatesModels) - { - if (feature.s_Feature == correlates.s_Feature) - { + for (const auto& correlates : m_FeatureCorrelatesModels) { + if (feature.s_Feature == correlates.s_Feature) { feature.s_Models.back()->modelCorrelations(*correlates.s_Models); } } @@ -933,34 +773,25 @@ void CMetricPopulationModel::updateRecycledModels() this->CPopulationModel::updateRecycledModels(); } -void CMetricPopulationModel::refreshCorrelationModels(std::size_t resourceLimit, - CResourceMonitor &resourceMonitor) -{ +void CMetricPopulationModel::refreshCorrelationModels(std::size_t resourceLimit, CResourceMonitor& resourceMonitor) { std::size_t n = this->numberOfPeople(); double maxNumberCorrelations = this->params().s_CorrelationModelsOverhead * static_cast(n); auto memoryUsage = boost::bind(&CAnomalyDetectorModel::estimateMemoryUsageOrComputeAndUpdate, this, n, 0, _1); - CTimeSeriesCorrelateModelAllocator allocator(resourceMonitor, memoryUsage, resourceLimit, - static_cast(maxNumberCorrelations + 0.5)); - for (auto &feature : m_FeatureCorrelatesModels) - { + CTimeSeriesCorrelateModelAllocator allocator( + resourceMonitor, memoryUsage, resourceLimit, static_cast(maxNumberCorrelations + 0.5)); + for (auto& feature : m_FeatureCorrelatesModels) { allocator.prototypePrior(feature.s_ModelPrior); feature.s_Models->refresh(allocator); } } -void CMetricPopulationModel::clearPrunedResources(const TSizeVec &/*people*/, - const TSizeVec &/*attributes*/) -{ - CDataGatherer &gatherer = this->dataGatherer(); - for (auto cid : gatherer.recycledAttributeIds()) - { - for (auto &feature : m_FeatureModels) - { +void CMetricPopulationModel::clearPrunedResources(const TSizeVec& /*people*/, const TSizeVec& /*attributes*/) { + CDataGatherer& gatherer = this->dataGatherer(); + for (auto cid : gatherer.recycledAttributeIds()) { + for (auto& feature : m_FeatureModels) { feature.s_Models[cid].reset(feature.s_NewModel->clone(cid)); - for (const auto &correlates : m_FeatureCorrelatesModels) - { - if (feature.s_Feature == correlates.s_Feature) - { + for (const auto& correlates : m_FeatureCorrelatesModels) { + if (feature.s_Feature == correlates.s_Feature) { feature.s_Models.back()->modelCorrelations(*correlates.s_Models); } } @@ -968,56 +799,39 @@ void CMetricPopulationModel::clearPrunedResources(const TSizeVec &/*people*/, } } -void CMetricPopulationModel::doSkipSampling(core_t::TTime startTime, core_t::TTime endTime) -{ +void CMetricPopulationModel::doSkipSampling(core_t::TTime startTime, core_t::TTime endTime) { core_t::TTime gap = endTime - startTime; - for (auto &feature : m_FeatureModels) - { - for (auto &model : feature.s_Models) - { + for (auto& feature : m_FeatureModels) { + for (auto& model : feature.s_Models) { model->skipTime(gap); } } this->CPopulationModel::doSkipSampling(startTime, endTime); } -const maths::CModel *CMetricPopulationModel::model(model_t::EFeature feature, std::size_t cid) const -{ +const maths::CModel* CMetricPopulationModel::model(model_t::EFeature feature, std::size_t cid) const { return const_cast(this)->model(feature, cid); } -maths::CModel *CMetricPopulationModel::model(model_t::EFeature feature, std::size_t cid) -{ - auto i = std::find_if(m_FeatureModels.begin(), m_FeatureModels.end(), - [feature](const SFeatureModels &model) - { - return model.s_Feature == feature; - }); +maths::CModel* CMetricPopulationModel::model(model_t::EFeature feature, std::size_t cid) { + auto i = std::find_if( + m_FeatureModels.begin(), m_FeatureModels.end(), [feature](const SFeatureModels& model) { return model.s_Feature == feature; }); return i != m_FeatureModels.end() && cid < i->s_Models.size() ? i->s_Models[cid].get() : 0; } -bool CMetricPopulationModel::correlates(model_t::EFeature feature, - std::size_t pid, - std::size_t cid, - core_t::TTime time) const -{ - if (model_t::dimension(feature) > 1 || !this->params().s_MultivariateByFields) - { +bool CMetricPopulationModel::correlates(model_t::EFeature feature, std::size_t pid, std::size_t cid, core_t::TTime time) const { + if (model_t::dimension(feature) > 1 || !this->params().s_MultivariateByFields) { return false; } - const maths::CModel *model{this->model(feature, cid)}; - const TSizeSizePrFeatureDataPrVec &data = this->featureData(feature, time); + const maths::CModel* model{this->model(feature, cid)}; + const TSizeSizePrFeatureDataPrVec& data = this->featureData(feature, time); TSizeSizePr range = personRange(data, pid); - for (std::size_t j = range.first; j < range.second; ++j) - { + for (std::size_t j = range.first; j < range.second; ++j) { std::size_t cids[]{cid, CDataGatherer::extractAttributeId(data[j])}; - for (const auto &correlate : model->correlates()) - { - if ( (cids[0] == correlate[0] && cids[1] == correlate[1]) - || (cids[1] == correlate[0] && cids[0] == correlate[1])) - { + for (const auto& correlate : model->correlates()) { + if ((cids[0] == correlate[0] && cids[1] == correlate[1]) || (cids[1] == correlate[0] && cids[0] == correlate[1])) { return true; } } @@ -1030,12 +844,11 @@ void CMetricPopulationModel::fill(model_t::EFeature feature, std::size_t cid, core_t::TTime bucketTime, bool interim, - CProbabilityAndInfluenceCalculator::SParams ¶ms) const -{ + CProbabilityAndInfluenceCalculator::SParams& params) const { std::size_t dimension{model_t::dimension(feature)}; auto data = find(this->featureData(feature, bucketTime), pid, cid); - const maths::CModel *model{this->model(feature, cid)}; - const TOptionalSample &bucket{CDataGatherer::extractData(*data).s_BucketValue}; + const maths::CModel* model{this->model(feature, cid)}; + const TOptionalSample& bucket{CDataGatherer::extractData(*data).s_BucketValue}; core_t::TTime time{model_t::sampleTime(feature, bucketTime, this->bucketLength(), bucket->time())}; TDouble2Vec4Vec weights{model->seasonalWeight(maths::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL, time), TDouble2Vec(dimension, bucket->varianceScale())}; @@ -1045,30 +858,25 @@ void CMetricPopulationModel::fill(model_t::EFeature feature, params.s_ElapsedTime = time - this->attributeFirstBucketTimes()[cid]; params.s_Time.assign(1, TTime2Vec{time}); params.s_Value.assign(1, bucket->value()); - if (interim && model_t::requiresInterimResultAdjustment(feature)) - { + if (interim && model_t::requiresInterimResultAdjustment(feature)) { TDouble2Vec mode(params.s_Model->mode(time, PROBABILITY_WEIGHT_STYLES, weights)); - TDouble2Vec correction(this->interimValueCorrector().corrections( - time, this->currentBucketTotalCount(), - mode, bucket->value(dimension))); + TDouble2Vec correction( + this->interimValueCorrector().corrections(time, this->currentBucketTotalCount(), mode, bucket->value(dimension))); params.s_Value[0] += correction; this->currentBucketInterimCorrections().emplace(CCorrectionKey(feature, pid, cid), correction); } params.s_Count = 1.0; params.s_ComputeProbabilityParams.tag(pid) - .addCalculation(model_t::probabilityCalculation(feature)) - .weightStyles(PROBABILITY_WEIGHT_STYLES) - .addBucketEmpty(TBool2Vec(1, false)) - .addWeights(weights); + .addCalculation(model_t::probabilityCalculation(feature)) + .weightStyles(PROBABILITY_WEIGHT_STYLES) + .addBucketEmpty(TBool2Vec(1, false)) + .addWeights(weights); } ////////// CMetricPopulationModel::SBucketStats Implementation ////////// -CMetricPopulationModel::SBucketStats::SBucketStats(core_t::TTime startTime) : - s_StartTime(startTime), - s_TotalCount(0), - s_InterimCorrections(1) -{} - +CMetricPopulationModel::SBucketStats::SBucketStats(core_t::TTime startTime) + : s_StartTime(startTime), s_TotalCount(0), s_InterimCorrections(1) { +} } } diff --git a/lib/model/CMetricPopulationModelFactory.cc b/lib/model/CMetricPopulationModelFactory.cc index d39d081a8b..0596f244fb 100644 --- a/lib/model/CMetricPopulationModelFactory.cc +++ b/lib/model/CMetricPopulationModelFactory.cc @@ -23,41 +23,35 @@ #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { -CMetricPopulationModelFactory::CMetricPopulationModelFactory(const SModelParams ¶ms, +CMetricPopulationModelFactory::CMetricPopulationModelFactory(const SModelParams& params, model_t::ESummaryMode summaryMode, - const std::string &summaryCountFieldName) : - CModelFactory(params), - m_Identifier(), - m_SummaryMode(summaryMode), - m_SummaryCountFieldName(summaryCountFieldName), - m_UseNull(false), - m_BucketResultsDelay(0) -{} - -CMetricPopulationModelFactory *CMetricPopulationModelFactory::clone() const -{ + const std::string& summaryCountFieldName) + : CModelFactory(params), + m_Identifier(), + m_SummaryMode(summaryMode), + m_SummaryCountFieldName(summaryCountFieldName), + m_UseNull(false), + m_BucketResultsDelay(0) { +} + +CMetricPopulationModelFactory* CMetricPopulationModelFactory::clone() const { return new CMetricPopulationModelFactory(*this); } -CAnomalyDetectorModel *CMetricPopulationModelFactory::makeModel(const SModelInitializationData &initData) const -{ +CAnomalyDetectorModel* CMetricPopulationModelFactory::makeModel(const SModelInitializationData& initData) const { TDataGathererPtr dataGatherer = initData.s_DataGatherer; - if (!dataGatherer) - { + if (!dataGatherer) { LOG_ERROR("NULL data gatherer"); return 0; } - const TFeatureVec &features = dataGatherer->features(); + const TFeatureVec& features = dataGatherer->features(); TFeatureInfluenceCalculatorCPtrPrVecVec influenceCalculators; influenceCalculators.reserve(m_InfluenceFieldNames.size()); - for (const auto &name : m_InfluenceFieldNames) - { + for (const auto& name : m_InfluenceFieldNames) { influenceCalculators.push_back(this->defaultInfluenceCalculators(name, features)); } @@ -69,21 +63,18 @@ CAnomalyDetectorModel *CMetricPopulationModelFactory::makeModel(const SModelInit influenceCalculators); } -CAnomalyDetectorModel *CMetricPopulationModelFactory::makeModel(const SModelInitializationData &initData, - core::CStateRestoreTraverser &traverser) const -{ +CAnomalyDetectorModel* CMetricPopulationModelFactory::makeModel(const SModelInitializationData& initData, + core::CStateRestoreTraverser& traverser) const { TDataGathererPtr dataGatherer = initData.s_DataGatherer; - if (!dataGatherer) - { + if (!dataGatherer) { LOG_ERROR("NULL data gatherer"); return 0; } - const TFeatureVec &features = dataGatherer->features(); + const TFeatureVec& features = dataGatherer->features(); TFeatureInfluenceCalculatorCPtrPrVecVec influenceCalculators; influenceCalculators.reserve(m_InfluenceFieldNames.size()); - for (const auto &name : m_InfluenceFieldNames) - { + for (const auto& name : m_InfluenceFieldNames) { influenceCalculators.push_back(this->defaultInfluenceCalculators(name, features)); } @@ -96,8 +87,7 @@ CAnomalyDetectorModel *CMetricPopulationModelFactory::makeModel(const SModelInit traverser); } -CDataGatherer *CMetricPopulationModelFactory::makeDataGatherer(const SGathererInitializationData &initData) const -{ +CDataGatherer* CMetricPopulationModelFactory::makeDataGatherer(const SGathererInitializationData& initData) const { return new CDataGatherer(model_t::E_PopulationMetric, m_SummaryMode, this->modelParams(), @@ -115,9 +105,8 @@ CDataGatherer *CMetricPopulationModelFactory::makeDataGatherer(const SGathererIn initData.s_SampleOverrideCount); } -CDataGatherer *CMetricPopulationModelFactory::makeDataGatherer(const std::string &partitionFieldValue, - core::CStateRestoreTraverser &traverser) const -{ +CDataGatherer* CMetricPopulationModelFactory::makeDataGatherer(const std::string& partitionFieldValue, + core::CStateRestoreTraverser& traverser) const { return new CDataGatherer(model_t::E_PopulationMetric, m_SummaryMode, this->modelParams(), @@ -133,21 +122,17 @@ CDataGatherer *CMetricPopulationModelFactory::makeDataGatherer(const std::string traverser); } -CMetricPopulationModelFactory::TPriorPtr - CMetricPopulationModelFactory::defaultPrior(model_t::EFeature feature, - const SModelParams ¶ms) const -{ +CMetricPopulationModelFactory::TPriorPtr CMetricPopulationModelFactory::defaultPrior(model_t::EFeature feature, + const SModelParams& params) const { // Categorical data all use the multinomial prior. The creation // of these priors is managed by defaultCategoricalPrior. - if (model_t::isCategorical(feature)) - { + if (model_t::isCategorical(feature)) { return TPriorPtr(); } // If the feature data only ever takes a single value we use a // special lightweight prior. - if (model_t::isConstant(feature)) - { + if (model_t::isConstant(feature)) { return boost::make_shared(); } @@ -164,14 +149,12 @@ CMetricPopulationModelFactory::TPriorPtr maths_t::EDataType dataType = this->dataType(); - maths::CGammaRateConjugate gammaPrior = - maths::CGammaRateConjugate::nonInformativePrior(dataType, 0.0, params.s_DecayRate); + maths::CGammaRateConjugate gammaPrior = maths::CGammaRateConjugate::nonInformativePrior(dataType, 0.0, params.s_DecayRate); maths::CLogNormalMeanPrecConjugate logNormalPrior = - maths::CLogNormalMeanPrecConjugate::nonInformativePrior(dataType, 0.0, params.s_DecayRate); + maths::CLogNormalMeanPrecConjugate::nonInformativePrior(dataType, 0.0, params.s_DecayRate); - maths::CNormalMeanPrecConjugate normalPrior = - maths::CNormalMeanPrecConjugate::nonInformativePrior(dataType, params.s_DecayRate); + maths::CNormalMeanPrecConjugate normalPrior = maths::CNormalMeanPrecConjugate::nonInformativePrior(dataType, params.s_DecayRate); // Create the component priors. TPriorPtrVec priors; @@ -179,8 +162,7 @@ CMetricPopulationModelFactory::TPriorPtr priors.emplace_back(gammaPrior.clone()); priors.emplace_back(logNormalPrior.clone()); priors.emplace_back(normalPrior.clone()); - if (params.s_MinimumModeFraction <= 0.5) - { + if (params.s_MinimumModeFraction <= 0.5) { // Create the multimode prior. TPriorPtrVec modePriors; modePriors.reserve(3u); @@ -203,14 +185,11 @@ CMetricPopulationModelFactory::TPriorPtr } CMetricPopulationModelFactory::TMultivariatePriorPtr - CMetricPopulationModelFactory::defaultMultivariatePrior(model_t::EFeature feature, - const SModelParams ¶ms) const -{ +CMetricPopulationModelFactory::defaultMultivariatePrior(model_t::EFeature feature, const SModelParams& params) const { std::size_t dimension = model_t::dimension(feature); // Gaussian mixture for modeling (latitude, longitude). - if (model_t::isLatLong(feature)) - { + if (model_t::isLatLong(feature)) { return this->latLongPrior(params); } @@ -218,8 +197,7 @@ CMetricPopulationModelFactory::TMultivariatePriorPtr priors.reserve(params.s_MinimumModeFraction <= 0.5 ? 2u : 1u); TMultivariatePriorPtr multivariateNormal = this->multivariateNormalPrior(dimension, params); priors.push_back(multivariateNormal); - if (params.s_MinimumModeFraction <= 0.5) - { + if (params.s_MinimumModeFraction <= 0.5) { priors.push_back(this->multivariateMultimodalPrior(dimension, params, *multivariateNormal)); } @@ -227,24 +205,19 @@ CMetricPopulationModelFactory::TMultivariatePriorPtr } CMetricPopulationModelFactory::TMultivariatePriorPtr - CMetricPopulationModelFactory::defaultCorrelatePrior(model_t::EFeature /*feature*/, - const SModelParams ¶ms) const -{ +CMetricPopulationModelFactory::defaultCorrelatePrior(model_t::EFeature /*feature*/, const SModelParams& params) const { TMultivariatePriorPtrVec priors; priors.reserve(params.s_MinimumModeFraction <= 0.5 ? 2u : 1u); TMultivariatePriorPtr multivariateNormal = this->multivariateNormalPrior(2, params); priors.push_back(multivariateNormal); - if (params.s_MinimumModeFraction <= 0.5) - { + if (params.s_MinimumModeFraction <= 0.5) { priors.push_back(this->multivariateMultimodalPrior(2, params, *multivariateNormal)); } return this->multivariateOneOfNPrior(2, params, priors); } -const CSearchKey &CMetricPopulationModelFactory::searchKey() const -{ - if (!m_SearchKeyCache) - { +const CSearchKey& CMetricPopulationModelFactory::searchKey() const { + if (!m_SearchKeyCache) { m_SearchKeyCache.reset(CSearchKey(m_Identifier, function_t::function(m_Features), m_UseNull, @@ -258,33 +231,28 @@ const CSearchKey &CMetricPopulationModelFactory::searchKey() const return *m_SearchKeyCache; } -bool CMetricPopulationModelFactory::isSimpleCount() const -{ +bool CMetricPopulationModelFactory::isSimpleCount() const { return false; } -model_t::ESummaryMode CMetricPopulationModelFactory::summaryMode() const -{ +model_t::ESummaryMode CMetricPopulationModelFactory::summaryMode() const { return m_SummaryMode; } -maths_t::EDataType CMetricPopulationModelFactory::dataType() const -{ +maths_t::EDataType CMetricPopulationModelFactory::dataType() const { return maths_t::E_ContinuousData; } -void CMetricPopulationModelFactory::identifier(int identifier) -{ +void CMetricPopulationModelFactory::identifier(int identifier) { m_Identifier = identifier; m_SearchKeyCache.reset(); } -void CMetricPopulationModelFactory::fieldNames(const std::string &partitionFieldName, - const std::string &overFieldName, - const std::string &byFieldName, - const std::string &valueFieldName, - const TStrVec &influenceFieldNames) -{ +void CMetricPopulationModelFactory::fieldNames(const std::string& partitionFieldName, + const std::string& overFieldName, + const std::string& byFieldName, + const std::string& valueFieldName, + const TStrVec& influenceFieldNames) { m_PartitionFieldName = partitionFieldName; m_PersonFieldName = overFieldName; m_AttributeFieldName = byFieldName; @@ -293,42 +261,33 @@ void CMetricPopulationModelFactory::fieldNames(const std::string &partitionField m_SearchKeyCache.reset(); } -void CMetricPopulationModelFactory::useNull(bool useNull) -{ +void CMetricPopulationModelFactory::useNull(bool useNull) { m_UseNull = useNull; m_SearchKeyCache.reset(); } -void CMetricPopulationModelFactory::features(const TFeatureVec &features) -{ +void CMetricPopulationModelFactory::features(const TFeatureVec& features) { m_Features = features; m_SearchKeyCache.reset(); } -void CMetricPopulationModelFactory::bucketResultsDelay(std::size_t bucketResultsDelay) -{ +void CMetricPopulationModelFactory::bucketResultsDelay(std::size_t bucketResultsDelay) { m_BucketResultsDelay = bucketResultsDelay; } -CMetricPopulationModelFactory::TStrCRefVec - CMetricPopulationModelFactory::partitioningFields() const -{ +CMetricPopulationModelFactory::TStrCRefVec CMetricPopulationModelFactory::partitioningFields() const { TStrCRefVec result; result.reserve(3); - if (!m_PartitionFieldName.empty()) - { + if (!m_PartitionFieldName.empty()) { result.emplace_back(m_PartitionFieldName); } - if (!m_PersonFieldName.empty()) - { + if (!m_PersonFieldName.empty()) { result.emplace_back(m_PersonFieldName); } - if (!m_AttributeFieldName.empty()) - { + if (!m_AttributeFieldName.empty()) { result.emplace_back(m_AttributeFieldName); } return result; } - } } diff --git a/lib/model/CModelDetailsView.cc b/lib/model/CModelDetailsView.cc index bbd96eeb9a..447e482f73 100644 --- a/lib/model/CModelDetailsView.cc +++ b/lib/model/CModelDetailsView.cc @@ -17,14 +17,10 @@ #include #include -namespace ml -{ -namespace model -{ -namespace -{ -const maths_t::TWeightStyleVec WEIGHT_STYLES{maths_t::E_SampleSeasonalVarianceScaleWeight, - maths_t::E_SampleCountVarianceScaleWeight}; +namespace ml { +namespace model { +namespace { +const maths_t::TWeightStyleVec WEIGHT_STYLES{maths_t::E_SampleSeasonalVarianceScaleWeight, maths_t::E_SampleCountVarianceScaleWeight}; const std::string EMPTY_STRING(""); } @@ -34,46 +30,29 @@ using TDoubleDoublePr = std::pair; ////////// CModelDetailsView Implementation ////////// -bool CModelDetailsView::personId(const std::string &name, - std::size_t &result) const -{ +bool CModelDetailsView::personId(const std::string& name, std::size_t& result) const { return this->base().dataGatherer().personId(name, result); } -bool CModelDetailsView::categoryId(const std::string &attribute, - std::size_t &result) const -{ +bool CModelDetailsView::categoryId(const std::string& attribute, std::size_t& result) const { return this->base().dataGatherer().attributeId(attribute, result); } -const CModelDetailsView::TFeatureVec &CModelDetailsView::features() const -{ +const CModelDetailsView::TFeatureVec& CModelDetailsView::features() const { return this->base().dataGatherer().features(); } -void CModelDetailsView::modelPlot(core_t::TTime time, - double boundsPercentile, - const TStrSet &terms, - CModelPlotData &modelPlotData) const -{ - for (auto feature : this->features()) - { - if (!model_t::isConstant(feature) && !model_t::isCategorical(feature)) - { - if (terms.empty() || !this->hasByField()) - { - for (std::size_t byFieldId = 0; byFieldId < this->maxByFieldId(); ++byFieldId) - { +void CModelDetailsView::modelPlot(core_t::TTime time, double boundsPercentile, const TStrSet& terms, CModelPlotData& modelPlotData) const { + for (auto feature : this->features()) { + if (!model_t::isConstant(feature) && !model_t::isCategorical(feature)) { + if (terms.empty() || !this->hasByField()) { + for (std::size_t byFieldId = 0; byFieldId < this->maxByFieldId(); ++byFieldId) { this->modelPlotForByFieldId(time, boundsPercentile, feature, byFieldId, modelPlotData); } - } - else - { - for (const auto &term : terms) - { + } else { + for (const auto& term : terms) { std::size_t byFieldId(0); - if (this->byFieldId(term, byFieldId)) - { + if (this->byFieldId(term, byFieldId)) { this->modelPlotForByFieldId(time, boundsPercentile, feature, byFieldId, modelPlotData); } } @@ -87,18 +66,15 @@ void CModelDetailsView::modelPlotForByFieldId(core_t::TTime time, double boundsPercentile, model_t::EFeature feature, std::size_t byFieldId, - CModelPlotData &modelPlotData) const -{ + CModelPlotData& modelPlotData) const { using TDouble1VecDouble1VecPr = std::pair; using TDouble2Vec = core::CSmallVector; using TDouble2Vec3Vec = core::CSmallVector; using TDouble2Vec4Vec = core::CSmallVector; - if (this->isByFieldIdActive(byFieldId)) - { - const maths::CModel *model = this->model(feature, byFieldId); - if (!model) - { + if (this->isByFieldIdActive(byFieldId)) { + const maths::CModel* model = this->model(feature, byFieldId); + if (!model) { return; } @@ -114,75 +90,55 @@ void CModelDetailsView::modelPlotForByFieldId(core_t::TTime time, TDouble2Vec3Vec interval(model->confidenceInterval(time, boundsPercentile, WEIGHT_STYLES, weights)); - if (interval.size() == 3) - { - TDouble2Vec lower = maths::CTools::truncate(interval[0], supportLower, supportUpper); - TDouble2Vec upper = maths::CTools::truncate(interval[2], lower, supportUpper); + if (interval.size() == 3) { + TDouble2Vec lower = maths::CTools::truncate(interval[0], supportLower, supportUpper); + TDouble2Vec upper = maths::CTools::truncate(interval[2], lower, supportUpper); TDouble2Vec median = maths::CTools::truncate(interval[1], lower, upper); // TODO This data structure should support multivariate features. - modelPlotData.get(feature, this->byFieldValue(byFieldId)) = - CModelPlotData::SByFieldData(lower[0], upper[0], median[0]); + modelPlotData.get(feature, this->byFieldValue(byFieldId)) = CModelPlotData::SByFieldData(lower[0], upper[0], median[0]); } } } void CModelDetailsView::addCurrentBucketValues(core_t::TTime time, model_t::EFeature feature, - const TStrSet &terms, - CModelPlotData &modelPlotData) const -{ - const CDataGatherer &gatherer = this->base().dataGatherer(); - if (!gatherer.dataAvailable(time)) - { + const TStrSet& terms, + CModelPlotData& modelPlotData) const { + const CDataGatherer& gatherer = this->base().dataGatherer(); + if (!gatherer.dataAvailable(time)) { return; } bool isPopulation{gatherer.isPopulation()}; - auto addCurrentBucketValue = [&](std::size_t pid, std::size_t cid) - { - const std::string &byFieldValue{this->byFieldValue(pid, cid)}; - if (this->contains(terms, byFieldValue)) - { - TDouble1Vec value(this->base().currentBucketValue(feature, pid, cid, time)); - if (!value.empty()) - { - const std::string &overFieldValue{isPopulation ? - this->base().personName(pid) : - EMPTY_STRING}; - modelPlotData.get(feature, byFieldValue).addValue(overFieldValue, value[0]); - } + auto addCurrentBucketValue = [&](std::size_t pid, std::size_t cid) { + const std::string& byFieldValue{this->byFieldValue(pid, cid)}; + if (this->contains(terms, byFieldValue)) { + TDouble1Vec value(this->base().currentBucketValue(feature, pid, cid, time)); + if (!value.empty()) { + const std::string& overFieldValue{isPopulation ? this->base().personName(pid) : EMPTY_STRING}; + modelPlotData.get(feature, byFieldValue).addValue(overFieldValue, value[0]); } - }; - - if (model_t::countsEmptyBuckets(feature)) - { - for (std::size_t pid = 0u; pid < gatherer.numberPeople(); ++pid) - { - if (gatherer.isPersonActive(pid)) - { - if (isPopulation) - { - for (std::size_t cid = 0u; cid < gatherer.numberAttributes(); ++cid) - { - if (gatherer.isAttributeActive(cid)) - { + } + }; + + if (model_t::countsEmptyBuckets(feature)) { + for (std::size_t pid = 0u; pid < gatherer.numberPeople(); ++pid) { + if (gatherer.isPersonActive(pid)) { + if (isPopulation) { + for (std::size_t cid = 0u; cid < gatherer.numberAttributes(); ++cid) { + if (gatherer.isAttributeActive(cid)) { addCurrentBucketValue(pid, cid); } } - } - else - { + } else { addCurrentBucketValue(pid, model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID); } } } - } - else - { - for (const auto &count : gatherer.bucketCounts(time)) - { + } else { + for (const auto& count : gatherer.bucketCounts(time)) { std::size_t pid{gatherer.extractPersonId(count)}; std::size_t cid{gatherer.extractAttributeId(count)}; addCurrentBucketValue(pid, cid); @@ -190,164 +146,114 @@ void CModelDetailsView::addCurrentBucketValues(core_t::TTime time, } } -bool CModelDetailsView::contains(const TStrSet &terms, const std::string &key) -{ +bool CModelDetailsView::contains(const TStrSet& terms, const std::string& key) { return terms.empty() || key.empty() || terms.find(key) != terms.end(); } -bool CModelDetailsView::hasByField() const -{ - return (this->base().isPopulation() ? - this->base().dataGatherer().attributeFieldName() : - this->base().dataGatherer().personFieldName()).empty(); +bool CModelDetailsView::hasByField() const { + return (this->base().isPopulation() ? this->base().dataGatherer().attributeFieldName() : this->base().dataGatherer().personFieldName()) + .empty(); } -std::size_t CModelDetailsView::maxByFieldId() const -{ - return this->base().isPopulation() ? - this->base().dataGatherer().numberAttributes() : - this->base().dataGatherer().numberPeople(); +std::size_t CModelDetailsView::maxByFieldId() const { + return this->base().isPopulation() ? this->base().dataGatherer().numberAttributes() : this->base().dataGatherer().numberPeople(); } -bool CModelDetailsView::byFieldId(const std::string &byFieldValue, - std::size_t &result) const -{ - return this->base().isPopulation() ? - this->base().dataGatherer().attributeId(byFieldValue, result) : - this->base().dataGatherer().personId(byFieldValue, result); +bool CModelDetailsView::byFieldId(const std::string& byFieldValue, std::size_t& result) const { + return this->base().isPopulation() ? this->base().dataGatherer().attributeId(byFieldValue, result) + : this->base().dataGatherer().personId(byFieldValue, result); } -const std::string &CModelDetailsView::byFieldValue(std::size_t byFieldId) const -{ - return this->base().isPopulation() ? - this->base().attributeName(byFieldId) : - this->base().personName(byFieldId); +const std::string& CModelDetailsView::byFieldValue(std::size_t byFieldId) const { + return this->base().isPopulation() ? this->base().attributeName(byFieldId) : this->base().personName(byFieldId); } -const std::string &CModelDetailsView::byFieldValue(std::size_t pid, std::size_t cid) const -{ - return this->base().isPopulation() ? - this->base().attributeName(cid) : this->base().personName(pid); +const std::string& CModelDetailsView::byFieldValue(std::size_t pid, std::size_t cid) const { + return this->base().isPopulation() ? this->base().attributeName(cid) : this->base().personName(pid); } -bool CModelDetailsView::isByFieldIdActive(std::size_t byFieldId) const -{ - return this->base().isPopulation() ? - this->base().dataGatherer().isAttributeActive(byFieldId) : - this->base().dataGatherer().isPersonActive(byFieldId); +bool CModelDetailsView::isByFieldIdActive(std::size_t byFieldId) const { + return this->base().isPopulation() ? this->base().dataGatherer().isAttributeActive(byFieldId) + : this->base().dataGatherer().isPersonActive(byFieldId); } ////////// CEventRateModelDetailsView Implementation ////////// -CEventRateModelDetailsView::CEventRateModelDetailsView(const CEventRateModel &model) : - m_Model(&model) -{} +CEventRateModelDetailsView::CEventRateModelDetailsView(const CEventRateModel& model) : m_Model(&model) { +} -const maths::CModel *CEventRateModelDetailsView::model(model_t::EFeature feature, - std::size_t byFieldId) const -{ +const maths::CModel* CEventRateModelDetailsView::model(model_t::EFeature feature, std::size_t byFieldId) const { return m_Model->model(feature, byFieldId); } -const CAnomalyDetectorModel &CEventRateModelDetailsView::base() const -{ +const CAnomalyDetectorModel& CEventRateModelDetailsView::base() const { return *m_Model; } -double CEventRateModelDetailsView::countVarianceScale(model_t::EFeature /*feature*/, - std::size_t /*byFieldId*/, - core_t::TTime /*time*/) const -{ +double +CEventRateModelDetailsView::countVarianceScale(model_t::EFeature /*feature*/, std::size_t /*byFieldId*/, core_t::TTime /*time*/) const { return 1.0; } ////////// CEventRatePopulationModelDetailsView Implementation ////////// -CEventRatePopulationModelDetailsView::CEventRatePopulationModelDetailsView(const CEventRatePopulationModel &model) : - m_Model(&model) -{ +CEventRatePopulationModelDetailsView::CEventRatePopulationModelDetailsView(const CEventRatePopulationModel& model) : m_Model(&model) { } -const maths::CModel *CEventRatePopulationModelDetailsView::model(model_t::EFeature feature, - std::size_t byFieldId) const -{ +const maths::CModel* CEventRatePopulationModelDetailsView::model(model_t::EFeature feature, std::size_t byFieldId) const { return m_Model->model(feature, byFieldId); } -const CAnomalyDetectorModel &CEventRatePopulationModelDetailsView::base() const -{ +const CAnomalyDetectorModel& CEventRatePopulationModelDetailsView::base() const { return *m_Model; } double CEventRatePopulationModelDetailsView::countVarianceScale(model_t::EFeature /*feature*/, std::size_t /*byFieldId*/, - core_t::TTime /*time*/) const -{ + core_t::TTime /*time*/) const { return 1.0; } ////////// CMetricModelDetailsView Implementation ////////// -CMetricModelDetailsView::CMetricModelDetailsView(const CMetricModel &model) : - m_Model(&model) -{} +CMetricModelDetailsView::CMetricModelDetailsView(const CMetricModel& model) : m_Model(&model) { +} -const maths::CModel *CMetricModelDetailsView::model(model_t::EFeature feature, - std::size_t byFieldId) const -{ +const maths::CModel* CMetricModelDetailsView::model(model_t::EFeature feature, std::size_t byFieldId) const { return m_Model->model(feature, byFieldId); } -const CAnomalyDetectorModel &CMetricModelDetailsView::base() const -{ +const CAnomalyDetectorModel& CMetricModelDetailsView::base() const { return *m_Model; } -double CMetricModelDetailsView::countVarianceScale(model_t::EFeature feature, - std::size_t byFieldId, - core_t::TTime time) const -{ +double CMetricModelDetailsView::countVarianceScale(model_t::EFeature feature, std::size_t byFieldId, core_t::TTime time) const { TOptionalUInt64 count = m_Model->currentBucketCount(byFieldId, time); - if (!count) - { + if (!count) { return 1.0; } - return model_t::varianceScale(feature, - m_Model->dataGatherer().effectiveSampleCount(byFieldId), - static_cast(*count)); + return model_t::varianceScale(feature, m_Model->dataGatherer().effectiveSampleCount(byFieldId), static_cast(*count)); } ////////// CMetricPopulationModelDetailsView Implementation ////////// -CMetricPopulationModelDetailsView::CMetricPopulationModelDetailsView(const CMetricPopulationModel &model) : - m_Model(&model) -{ +CMetricPopulationModelDetailsView::CMetricPopulationModelDetailsView(const CMetricPopulationModel& model) : m_Model(&model) { } -const maths::CModel *CMetricPopulationModelDetailsView::model(model_t::EFeature feature, - std::size_t byFieldId) const -{ +const maths::CModel* CMetricPopulationModelDetailsView::model(model_t::EFeature feature, std::size_t byFieldId) const { return m_Model->model(feature, byFieldId); } -const CAnomalyDetectorModel &CMetricPopulationModelDetailsView::base() const -{ +const CAnomalyDetectorModel& CMetricPopulationModelDetailsView::base() const { return *m_Model; } -double CMetricPopulationModelDetailsView::countVarianceScale(model_t::EFeature feature, - std::size_t byFieldId, - core_t::TTime time) const -{ +double CMetricPopulationModelDetailsView::countVarianceScale(model_t::EFeature feature, std::size_t byFieldId, core_t::TTime time) const { TOptionalUInt64 count = m_Model->currentBucketCount(byFieldId, time); - if (!count) - { + if (!count) { return 1.0; } - return model_t::varianceScale(feature, - m_Model->dataGatherer().effectiveSampleCount(byFieldId), - static_cast(*count)); + return model_t::varianceScale(feature, m_Model->dataGatherer().effectiveSampleCount(byFieldId), static_cast(*count)); } - } } diff --git a/lib/model/CModelFactory.cc b/lib/model/CModelFactory.cc index c01725859b..1c5220bfb0 100644 --- a/lib/model/CModelFactory.cc +++ b/lib/model/CModelFactory.cc @@ -6,15 +6,15 @@ #include -#include #include +#include #include #include #include #include -#include #include +#include #include #include #include @@ -32,48 +32,37 @@ #include #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { const std::string CModelFactory::EMPTY_STRING(""); -CModelFactory::CModelFactory(const SModelParams ¶ms) : m_ModelParams(params) {} +CModelFactory::CModelFactory(const SModelParams& params) : m_ModelParams(params) { +} -const CModelFactory::TFeatureMathsModelPtrPrVec & - CModelFactory::defaultFeatureModels(const TFeatureVec &features, - core_t::TTime bucketLength, - double minimumSeasonalVarianceScale, - bool modelAnomalies) const -{ +const CModelFactory::TFeatureMathsModelPtrPrVec& CModelFactory::defaultFeatureModels(const TFeatureVec& features, + core_t::TTime bucketLength, + double minimumSeasonalVarianceScale, + bool modelAnomalies) const { auto result = m_MathsModelCache.insert({features, TFeatureMathsModelPtrPrVec()}); - if (result.second) - { + if (result.second) { result.first->second.reserve(features.size()); - for (auto feature : features) - { - if (model_t::isCategorical(feature)) - { + for (auto feature : features) { + if (model_t::isCategorical(feature)) { continue; } result.first->second.emplace_back( - feature, this->defaultFeatureModel(feature, bucketLength, - minimumSeasonalVarianceScale, - modelAnomalies)); + feature, this->defaultFeatureModel(feature, bucketLength, minimumSeasonalVarianceScale, modelAnomalies)); } } return result.first->second; } -CModelFactory::TMathsModelPtr - CModelFactory::defaultFeatureModel(model_t::EFeature feature, - core_t::TTime bucketLength, - double minimumSeasonalVarianceScale, - bool modelAnomalies) const -{ - if (model_t::isCategorical(feature)) - { +CModelFactory::TMathsModelPtr CModelFactory::defaultFeatureModel(model_t::EFeature feature, + core_t::TTime bucketLength, + double minimumSeasonalVarianceScale, + bool modelAnomalies) const { + if (model_t::isCategorical(feature)) { return TMathsModelPtr(); } @@ -89,43 +78,36 @@ CModelFactory::TMathsModelPtr std::size_t dimension{model_t::dimension(feature)}; bool controlDecayRate{m_ModelParams.s_ControlDecayRate && !model_t::isConstant(feature)}; - TDecayRateController2Ary controllers{{maths::CDecayRateController{ maths::CDecayRateController::E_PredictionBias - | maths::CDecayRateController::E_PredictionErrorIncrease, - dimension}, - maths::CDecayRateController{ maths::CDecayRateController::E_PredictionBias - | maths::CDecayRateController::E_PredictionErrorIncrease - | maths::CDecayRateController::E_PredictionErrorDecrease, - dimension}}}; + TDecayRateController2Ary controllers{ + {maths::CDecayRateController{maths::CDecayRateController::E_PredictionBias | maths::CDecayRateController::E_PredictionErrorIncrease, + dimension}, + maths::CDecayRateController{maths::CDecayRateController::E_PredictionBias | + maths::CDecayRateController::E_PredictionErrorIncrease | + maths::CDecayRateController::E_PredictionErrorDecrease, + dimension}}}; TDecompositionCPtr trend{this->defaultDecomposition(feature, bucketLength)}; - if (dimension == 1) - { + if (dimension == 1) { TPriorPtr prior{this->defaultPrior(feature)}; - return boost::make_shared( - params, 0, // identifier (unused). - *trend, *prior, - controlDecayRate ? &controllers : 0, - modelAnomalies && !model_t::isConstant(feature)); + return boost::make_shared(params, + 0, // identifier (unused). + *trend, + *prior, + controlDecayRate ? &controllers : 0, + modelAnomalies && !model_t::isConstant(feature)); } TMultivariatePriorPtr prior{this->defaultMultivariatePrior(feature)}; return boost::make_shared( - params, *trend, *prior, - controlDecayRate ? &controllers : 0, - modelAnomalies && !model_t::isConstant(feature)); + params, *trend, *prior, controlDecayRate ? &controllers : 0, modelAnomalies && !model_t::isConstant(feature)); } -const CModelFactory::TFeatureMultivariatePriorPtrPrVec & - CModelFactory::defaultCorrelatePriors(const TFeatureVec &features) const -{ +const CModelFactory::TFeatureMultivariatePriorPtrPrVec& CModelFactory::defaultCorrelatePriors(const TFeatureVec& features) const { auto result = m_CorrelatePriorCache.insert({features, TFeatureMultivariatePriorPtrPrVec()}); - if (result.second) - { + if (result.second) { result.first->second.reserve(features.size()); - for (auto feature : features) - { - if (model_t::isCategorical(feature) || model_t::dimension(feature) > 1) - { + for (auto feature : features) { + if (model_t::isCategorical(feature) || model_t::dimension(feature) > 1) { continue; } result.first->second.emplace_back(feature, this->defaultCorrelatePrior(feature)); @@ -134,100 +116,65 @@ const CModelFactory::TFeatureMultivariatePriorPtrPrVec & return result.first->second; } -const CModelFactory::TFeatureCorrelationsPtrPrVec & - CModelFactory::defaultCorrelates(const TFeatureVec &features) const -{ +const CModelFactory::TFeatureCorrelationsPtrPrVec& CModelFactory::defaultCorrelates(const TFeatureVec& features) const { auto result = m_CorrelationsCache.insert({features, TFeatureCorrelationsPtrPrVec()}); - if (result.second) - { + if (result.second) { result.first->second.reserve(features.size()); - for (auto feature : features) - { - if (!model_t::isCategorical(feature) && model_t::dimension(feature) == 1) - { - result.first->second.emplace_back( - feature, TCorrelationsPtr(new maths::CTimeSeriesCorrelations( - m_ModelParams.s_MinimumSignificantCorrelation, - m_ModelParams.s_DecayRate))); + for (auto feature : features) { + if (!model_t::isCategorical(feature) && model_t::dimension(feature) == 1) { + result.first->second.emplace_back(feature, + TCorrelationsPtr(new maths::CTimeSeriesCorrelations( + m_ModelParams.s_MinimumSignificantCorrelation, m_ModelParams.s_DecayRate))); } } } return result.first->second; } -CModelFactory::TPriorPtr - CModelFactory::defaultPrior(model_t::EFeature feature) const -{ +CModelFactory::TPriorPtr CModelFactory::defaultPrior(model_t::EFeature feature) const { return this->defaultPrior(feature, m_ModelParams); } -CModelFactory::TMultivariatePriorPtr - CModelFactory::defaultMultivariatePrior(model_t::EFeature feature) const -{ +CModelFactory::TMultivariatePriorPtr CModelFactory::defaultMultivariatePrior(model_t::EFeature feature) const { return this->defaultMultivariatePrior(feature, m_ModelParams); } -CModelFactory::TMultivariatePriorPtr - CModelFactory::defaultCorrelatePrior(model_t::EFeature feature) const -{ +CModelFactory::TMultivariatePriorPtr CModelFactory::defaultCorrelatePrior(model_t::EFeature feature) const { return this->defaultCorrelatePrior(feature, m_ModelParams); } -maths::CMultinomialConjugate CModelFactory::defaultCategoricalPrior() const -{ - return maths::CMultinomialConjugate::nonInformativePrior( - boost::numeric::bounds::highest(), - m_ModelParams.s_DecayRate); +maths::CMultinomialConjugate CModelFactory::defaultCategoricalPrior() const { + return maths::CMultinomialConjugate::nonInformativePrior(boost::numeric::bounds::highest(), m_ModelParams.s_DecayRate); } -CModelFactory::TDecompositionCPtr - CModelFactory::defaultDecomposition(model_t::EFeature feature, - core_t::TTime bucketLength) const -{ - if (model_t::isCategorical(feature)) - { +CModelFactory::TDecompositionCPtr CModelFactory::defaultDecomposition(model_t::EFeature feature, core_t::TTime bucketLength) const { + if (model_t::isCategorical(feature)) { return TDecompositionCPtr(); - } - else if (model_t::isDiurnal(feature) || model_t::isConstant(feature)) - { + } else if (model_t::isDiurnal(feature) || model_t::isConstant(feature)) { return boost::make_shared(); } double decayRate = CAnomalyDetectorModelConfig::trendDecayRate(m_ModelParams.s_DecayRate, bucketLength); - return boost::make_shared(decayRate, bucketLength, - m_ModelParams.s_ComponentSize); + return boost::make_shared(decayRate, bucketLength, m_ModelParams.s_ComponentSize); } -const CModelFactory::TFeatureInfluenceCalculatorCPtrPrVec & - CModelFactory::defaultInfluenceCalculators(const std::string &influencerName, - const TFeatureVec &features) const -{ - TFeatureInfluenceCalculatorCPtrPrVec &result = - m_InfluenceCalculatorCache[TStrFeatureVecPr(influencerName, features)]; +const CModelFactory::TFeatureInfluenceCalculatorCPtrPrVec& CModelFactory::defaultInfluenceCalculators(const std::string& influencerName, + const TFeatureVec& features) const { + TFeatureInfluenceCalculatorCPtrPrVec& result = m_InfluenceCalculatorCache[TStrFeatureVecPr(influencerName, features)]; - if (result.empty()) - { + if (result.empty()) { result.reserve(features.size()); TStrCRefVec partitioningFields = this->partitioningFields(); - std::sort(partitioningFields.begin(), - partitioningFields.end(), - maths::COrderings::SReferenceLess()); - - for (auto feature : features) - { - if (model_t::isCategorical(feature)) - { + std::sort(partitioningFields.begin(), partitioningFields.end(), maths::COrderings::SReferenceLess()); + + for (auto feature : features) { + if (model_t::isCategorical(feature)) { continue; } - if (std::binary_search(partitioningFields.begin(), - partitioningFields.end(), - influencerName, - maths::COrderings::SReferenceLess())) - { + if (std::binary_search( + partitioningFields.begin(), partitioningFields.end(), influencerName, maths::COrderings::SReferenceLess())) { result.emplace_back(feature, boost::make_shared()); - } - else - { + } else { result.emplace_back(feature, model_t::influenceCalculator(feature)); } } @@ -236,114 +183,91 @@ const CModelFactory::TFeatureInfluenceCalculatorCPtrPrVec & return result; } -void CModelFactory::sampleCountFactor(std::size_t sampleCountFactor) -{ +void CModelFactory::sampleCountFactor(std::size_t sampleCountFactor) { m_ModelParams.s_SampleCountFactor = sampleCountFactor; } -void CModelFactory::excludeFrequent(model_t::EExcludeFrequent excludeFrequent) -{ +void CModelFactory::excludeFrequent(model_t::EExcludeFrequent excludeFrequent) { m_ModelParams.s_ExcludeFrequent = excludeFrequent; } -void CModelFactory::detectionRules(TDetectionRuleVecCRef detectionRules) -{ +void CModelFactory::detectionRules(TDetectionRuleVecCRef detectionRules) { m_ModelParams.s_DetectionRules = detectionRules; } -void CModelFactory::scheduledEvents(TStrDetectionRulePrVecCRef scheduledEvents) -{ +void CModelFactory::scheduledEvents(TStrDetectionRulePrVecCRef scheduledEvents) { m_ModelParams.s_ScheduledEvents = scheduledEvents; } -void CModelFactory::learnRate(double learnRate) -{ +void CModelFactory::learnRate(double learnRate) { m_ModelParams.s_LearnRate = learnRate; } -void CModelFactory::decayRate(double decayRate) -{ +void CModelFactory::decayRate(double decayRate) { m_ModelParams.s_DecayRate = decayRate; } -void CModelFactory::initialDecayRateMultiplier(double multiplier) -{ +void CModelFactory::initialDecayRateMultiplier(double multiplier) { m_ModelParams.s_InitialDecayRateMultiplier = multiplier; } -void CModelFactory::maximumUpdatesPerBucket(double maximumUpdatesPerBucket) -{ +void CModelFactory::maximumUpdatesPerBucket(double maximumUpdatesPerBucket) { m_ModelParams.s_MaximumUpdatesPerBucket = maximumUpdatesPerBucket; } -void CModelFactory::pruneWindowScaleMinimum(double factor) -{ +void CModelFactory::pruneWindowScaleMinimum(double factor) { m_ModelParams.s_PruneWindowScaleMinimum = factor; } -void CModelFactory::pruneWindowScaleMaximum(double factor) -{ +void CModelFactory::pruneWindowScaleMaximum(double factor) { m_ModelParams.s_PruneWindowScaleMaximum = factor; } -void CModelFactory::multivariateByFields(bool enabled) -{ +void CModelFactory::multivariateByFields(bool enabled) { m_ModelParams.s_MultivariateByFields = enabled; } -void CModelFactory::minimumModeFraction(double minimumModeFraction) -{ +void CModelFactory::minimumModeFraction(double minimumModeFraction) { m_ModelParams.s_MinimumModeFraction = minimumModeFraction; } -void CModelFactory::minimumModeCount(double minimumModeCount) -{ +void CModelFactory::minimumModeCount(double minimumModeCount) { m_ModelParams.s_MinimumModeCount = minimumModeCount; } -void CModelFactory::componentSize(std::size_t componentSize) -{ +void CModelFactory::componentSize(std::size_t componentSize) { m_ModelParams.s_ComponentSize = componentSize; } -double CModelFactory::minimumModeFraction() const -{ +double CModelFactory::minimumModeFraction() const { return m_ModelParams.s_MinimumModeFraction; } -double CModelFactory::minimumModeCount() const -{ +double CModelFactory::minimumModeCount() const { return m_ModelParams.s_MinimumModeCount; } -std::size_t CModelFactory::componentSize() const -{ +std::size_t CModelFactory::componentSize() const { return m_ModelParams.s_ComponentSize; } -void CModelFactory::updateBucketLength(core_t::TTime length) -{ +void CModelFactory::updateBucketLength(core_t::TTime length) { m_ModelParams.s_BucketLength = length; } -void CModelFactory::swap(CModelFactory &other) -{ +void CModelFactory::swap(CModelFactory& other) { std::swap(m_ModelParams, other.m_ModelParams); m_MathsModelCache.swap(other.m_MathsModelCache); m_InfluenceCalculatorCache.swap(other.m_InfluenceCalculatorCache); } -CModelFactory::TMultivariatePriorPtr - CModelFactory::multivariateNormalPrior(std::size_t dimension, const SModelParams ¶ms) const -{ +CModelFactory::TMultivariatePriorPtr CModelFactory::multivariateNormalPrior(std::size_t dimension, const SModelParams& params) const { return maths::CMultivariateNormalConjugateFactory::nonInformative(dimension, this->dataType(), params.s_DecayRate); } -CModelFactory::TMultivariatePriorPtr - CModelFactory::multivariateMultimodalPrior(std::size_t dimension, - const SModelParams ¶ms, - const maths::CMultivariatePrior &modePrior) const -{ +CModelFactory::TMultivariatePriorPtr CModelFactory::multivariateMultimodalPrior(std::size_t dimension, + const SModelParams& params, + const maths::CMultivariatePrior& modePrior) const { return maths::CMultivariateMultimodalPriorFactory::nonInformative(dimension, this->dataType(), params.s_DecayRate, @@ -355,23 +279,15 @@ CModelFactory::TMultivariatePriorPtr } CModelFactory::TMultivariatePriorPtr - CModelFactory::multivariateOneOfNPrior(std::size_t dimension, - const SModelParams ¶ms, - const TMultivariatePriorPtrVec &models) const -{ - return maths::CMultivariateOneOfNPriorFactory::nonInformative(dimension, - this->dataType(), - params.s_DecayRate, - models); -} - -CModelFactory::TPriorPtr CModelFactory::timeOfDayPrior(const SModelParams ¶ms) const -{ +CModelFactory::multivariateOneOfNPrior(std::size_t dimension, const SModelParams& params, const TMultivariatePriorPtrVec& models) const { + return maths::CMultivariateOneOfNPriorFactory::nonInformative(dimension, this->dataType(), params.s_DecayRate, models); +} + +CModelFactory::TPriorPtr CModelFactory::timeOfDayPrior(const SModelParams& params) const { using TPriorPtrVec = std::vector; maths_t::EDataType dataType = this->dataType(); - maths::CNormalMeanPrecConjugate normalPrior = - maths::CNormalMeanPrecConjugate::nonInformativePrior(dataType, params.s_DecayRate); + maths::CNormalMeanPrecConjugate normalPrior = maths::CNormalMeanPrecConjugate::nonInformativePrior(dataType, params.s_DecayRate); // Create a multimodal prior with purely normal distributions // - don't bother with long-tail distributions @@ -391,43 +307,34 @@ CModelFactory::TPriorPtr CModelFactory::timeOfDayPrior(const SModelParams ¶m return boost::make_shared(dataType, clusterer, modePrior, params.s_DecayRate); } -CModelFactory::TMultivariatePriorPtr CModelFactory::latLongPrior(const SModelParams ¶ms) const -{ +CModelFactory::TMultivariatePriorPtr CModelFactory::latLongPrior(const SModelParams& params) const { maths_t::EDataType dataType = this->dataType(); - TMultivariatePriorPtr modePrior = - maths::CMultivariateNormalConjugateFactory::nonInformative(2, dataType, params.s_DecayRate); - return maths::CMultivariateMultimodalPriorFactory::nonInformative( - 2, // dimension - dataType, params.s_DecayRate, - maths_t::E_ClustersFractionWeight, - 0.03, // minimumClusterFraction - 4, // minimumClusterCount - CAnomalyDetectorModelConfig::DEFAULT_CATEGORY_DELETE_FRACTION, - *modePrior); -} - -const SModelParams &CModelFactory::modelParams() const -{ + TMultivariatePriorPtr modePrior = maths::CMultivariateNormalConjugateFactory::nonInformative(2, dataType, params.s_DecayRate); + return maths::CMultivariateMultimodalPriorFactory::nonInformative(2, // dimension + dataType, + params.s_DecayRate, + maths_t::E_ClustersFractionWeight, + 0.03, // minimumClusterFraction + 4, // minimumClusterCount + CAnomalyDetectorModelConfig::DEFAULT_CATEGORY_DELETE_FRACTION, + *modePrior); +} + +const SModelParams& CModelFactory::modelParams() const { return m_ModelParams; } -CModelFactory::SModelInitializationData::SModelInitializationData(const TDataGathererPtr &dataGatherer) : - s_DataGatherer(dataGatherer) -{} +CModelFactory::SModelInitializationData::SModelInitializationData(const TDataGathererPtr& dataGatherer) : s_DataGatherer(dataGatherer) { +} CModelFactory::SGathererInitializationData::SGathererInitializationData(core_t::TTime startTime, - const std::string &partitionFieldValue, - unsigned int sampleOverrideCount) : - s_StartTime(startTime), - s_PartitionFieldValue(partitionFieldValue), - s_SampleOverrideCount(sampleOverrideCount) -{} - -CModelFactory::SGathererInitializationData::SGathererInitializationData(core_t::TTime startTime) : - s_StartTime(startTime), - s_PartitionFieldValue(EMPTY_STRING), - s_SampleOverrideCount(0u) -{} + const std::string& partitionFieldValue, + unsigned int sampleOverrideCount) + : s_StartTime(startTime), s_PartitionFieldValue(partitionFieldValue), s_SampleOverrideCount(sampleOverrideCount) { +} +CModelFactory::SGathererInitializationData::SGathererInitializationData(core_t::TTime startTime) + : s_StartTime(startTime), s_PartitionFieldValue(EMPTY_STRING), s_SampleOverrideCount(0u) { +} } } diff --git a/lib/model/CModelParams.cc b/lib/model/CModelParams.cc index c05c046671..2998bc3318 100644 --- a/lib/model/CModelParams.cc +++ b/lib/model/CModelParams.cc @@ -16,84 +16,74 @@ #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { -namespace -{ +namespace { const SModelParams::TDetectionRuleVec EMPTY_RULES; const SModelParams::TStrDetectionRulePrVec EMPTY_SCHEDULED_EVENTS; const core_t::TTime SAMPLING_AGE_CUTOFF_DEFAULT(2 * core::constants::DAY); } -SModelParams::SModelParams(core_t::TTime bucketLength) : - s_BucketLength(bucketLength), - s_MultivariateComponentDelimiter(CAnomalyDetectorModelConfig::DEFAULT_MULTIVARIATE_COMPONENT_DELIMITER), - s_LearnRate(1.0), - s_DecayRate(0.0), - s_InitialDecayRateMultiplier(CAnomalyDetectorModelConfig::DEFAULT_INITIAL_DECAY_RATE_MULTIPLIER), - s_ControlDecayRate(true), - s_MinimumModeFraction(0.0), - s_MinimumModeCount(CAnomalyDetectorModelConfig::DEFAULT_MINIMUM_CLUSTER_SPLIT_COUNT), - s_CutoffToModelEmptyBuckets(CAnomalyDetectorModelConfig::DEFAULT_CUTOFF_TO_MODEL_EMPTY_BUCKETS), - s_ComponentSize(CAnomalyDetectorModelConfig::DEFAULT_COMPONENT_SIZE), - s_MinimumTimeToDetectChange(CAnomalyDetectorModelConfig::DEFAULT_MINIMUM_TIME_TO_DETECT_CHANGE), - s_MaximumTimeToTestForChange(CAnomalyDetectorModelConfig::DEFAULT_MAXIMUM_TIME_TO_TEST_FOR_CHANGE), - s_ExcludeFrequent(model_t::E_XF_None), - s_ExcludePersonFrequency(0.1), - s_ExcludeAttributeFrequency(0.1), - s_MaximumUpdatesPerBucket(CAnomalyDetectorModelConfig::DEFAULT_MAXIMUM_UPDATES_PER_BUCKET), - s_InfluenceCutoff(CAnomalyDetectorModelConfig::DEFAULT_INFLUENCE_CUTOFF), - s_LatencyBuckets(CAnomalyDetectorModelConfig::DEFAULT_LATENCY_BUCKETS), - s_SampleCountFactor(CAnomalyDetectorModelConfig::DEFAULT_SAMPLE_COUNT_FACTOR_NO_LATENCY), - s_SampleQueueGrowthFactor(CAnomalyDetectorModelConfig::DEFAULT_SAMPLE_QUEUE_GROWTH_FACTOR), - s_PruneWindowScaleMinimum(CAnomalyDetectorModelConfig::DEFAULT_PRUNE_WINDOW_SCALE_MINIMUM), - s_PruneWindowScaleMaximum(CAnomalyDetectorModelConfig::DEFAULT_PRUNE_WINDOW_SCALE_MAXIMUM), - s_CorrelationModelsOverhead(CAnomalyDetectorModelConfig::DEFAULT_CORRELATION_MODELS_OVERHEAD), - s_MultivariateByFields(false), - s_MinimumSignificantCorrelation(CAnomalyDetectorModelConfig::DEFAULT_MINIMUM_SIGNIFICANT_CORRELATION), - s_DetectionRules(EMPTY_RULES), - s_ScheduledEvents(EMPTY_SCHEDULED_EVENTS), - s_BucketResultsDelay(0), - s_MinimumToDeduplicate(10000), - s_CacheProbabilities(true), - s_SamplingAgeCutoff(SAMPLING_AGE_CUTOFF_DEFAULT) -{} +SModelParams::SModelParams(core_t::TTime bucketLength) + : s_BucketLength(bucketLength), + s_MultivariateComponentDelimiter(CAnomalyDetectorModelConfig::DEFAULT_MULTIVARIATE_COMPONENT_DELIMITER), + s_LearnRate(1.0), + s_DecayRate(0.0), + s_InitialDecayRateMultiplier(CAnomalyDetectorModelConfig::DEFAULT_INITIAL_DECAY_RATE_MULTIPLIER), + s_ControlDecayRate(true), + s_MinimumModeFraction(0.0), + s_MinimumModeCount(CAnomalyDetectorModelConfig::DEFAULT_MINIMUM_CLUSTER_SPLIT_COUNT), + s_CutoffToModelEmptyBuckets(CAnomalyDetectorModelConfig::DEFAULT_CUTOFF_TO_MODEL_EMPTY_BUCKETS), + s_ComponentSize(CAnomalyDetectorModelConfig::DEFAULT_COMPONENT_SIZE), + s_MinimumTimeToDetectChange(CAnomalyDetectorModelConfig::DEFAULT_MINIMUM_TIME_TO_DETECT_CHANGE), + s_MaximumTimeToTestForChange(CAnomalyDetectorModelConfig::DEFAULT_MAXIMUM_TIME_TO_TEST_FOR_CHANGE), + s_ExcludeFrequent(model_t::E_XF_None), + s_ExcludePersonFrequency(0.1), + s_ExcludeAttributeFrequency(0.1), + s_MaximumUpdatesPerBucket(CAnomalyDetectorModelConfig::DEFAULT_MAXIMUM_UPDATES_PER_BUCKET), + s_InfluenceCutoff(CAnomalyDetectorModelConfig::DEFAULT_INFLUENCE_CUTOFF), + s_LatencyBuckets(CAnomalyDetectorModelConfig::DEFAULT_LATENCY_BUCKETS), + s_SampleCountFactor(CAnomalyDetectorModelConfig::DEFAULT_SAMPLE_COUNT_FACTOR_NO_LATENCY), + s_SampleQueueGrowthFactor(CAnomalyDetectorModelConfig::DEFAULT_SAMPLE_QUEUE_GROWTH_FACTOR), + s_PruneWindowScaleMinimum(CAnomalyDetectorModelConfig::DEFAULT_PRUNE_WINDOW_SCALE_MINIMUM), + s_PruneWindowScaleMaximum(CAnomalyDetectorModelConfig::DEFAULT_PRUNE_WINDOW_SCALE_MAXIMUM), + s_CorrelationModelsOverhead(CAnomalyDetectorModelConfig::DEFAULT_CORRELATION_MODELS_OVERHEAD), + s_MultivariateByFields(false), + s_MinimumSignificantCorrelation(CAnomalyDetectorModelConfig::DEFAULT_MINIMUM_SIGNIFICANT_CORRELATION), + s_DetectionRules(EMPTY_RULES), + s_ScheduledEvents(EMPTY_SCHEDULED_EVENTS), + s_BucketResultsDelay(0), + s_MinimumToDeduplicate(10000), + s_CacheProbabilities(true), + s_SamplingAgeCutoff(SAMPLING_AGE_CUTOFF_DEFAULT) { +} -void SModelParams::configureLatency(core_t::TTime latency, core_t::TTime bucketLength) -{ +void SModelParams::configureLatency(core_t::TTime latency, core_t::TTime bucketLength) { s_LatencyBuckets = (latency + bucketLength - 1) / bucketLength; - if (s_LatencyBuckets > 0) - { + if (s_LatencyBuckets > 0) { s_SampleCountFactor = CAnomalyDetectorModelConfig::DEFAULT_SAMPLE_COUNT_FACTOR_WITH_LATENCY; - if (s_LatencyBuckets > 50) - { + if (s_LatencyBuckets > 50) { LOG_WARN("There are a large number of buckets in the latency window. " "Please ensure sufficient resources are available for this job."); } } } -double SModelParams::minimumCategoryCount() const -{ +double SModelParams::minimumCategoryCount() const { return s_LearnRate * CAnomalyDetectorModelConfig::DEFAULT_CATEGORY_DELETE_FRACTION; } -maths::STimeSeriesDecompositionRestoreParams SModelParams::decompositionRestoreParams(maths_t::EDataType dataType) const -{ +maths::STimeSeriesDecompositionRestoreParams SModelParams::decompositionRestoreParams(maths_t::EDataType dataType) const { double decayRate{CAnomalyDetectorModelConfig::trendDecayRate(s_DecayRate, s_BucketLength)}; return {decayRate, s_BucketLength, s_ComponentSize, this->distributionRestoreParams(dataType)}; } -maths::SDistributionRestoreParams SModelParams::distributionRestoreParams(maths_t::EDataType dataType) const -{ +maths::SDistributionRestoreParams SModelParams::distributionRestoreParams(maths_t::EDataType dataType) const { return {dataType, s_DecayRate, s_MinimumModeFraction, s_MinimumModeCount, this->minimumCategoryCount()}; } -uint64_t SModelParams::checksum(uint64_t seed) const -{ +uint64_t SModelParams::checksum(uint64_t seed) const { seed = maths::CChecksum::calculate(seed, s_LearnRate); seed = maths::CChecksum::calculate(seed, s_DecayRate); seed = maths::CChecksum::calculate(seed, s_InitialDecayRateMultiplier); @@ -121,6 +111,5 @@ uint64_t SModelParams::checksum(uint64_t seed) const seed = maths::CChecksum::calculate(seed, s_MinimumToDeduplicate); return maths::CChecksum::calculate(seed, s_SamplingAgeCutoff); } - } } diff --git a/lib/model/CModelPlotData.cc b/lib/model/CModelPlotData.cc index 46bec1b32c..bba33315bd 100644 --- a/lib/model/CModelPlotData.cc +++ b/lib/model/CModelPlotData.cc @@ -11,12 +11,9 @@ #include -namespace ml -{ -namespace model -{ -namespace -{ +namespace ml { +namespace model { +namespace { const std::string DATA_PER_FEATURE_TAG("a"); const std::string TIME_TAG("b"); @@ -29,95 +26,67 @@ const std::string LOWER_BOUND_TAG("a"); const std::string UPPER_BOUND_TAG("b"); const std::string MEDIAN_TAG("c"); const std::string VALUES_PER_OVERFIELD_TAG("d"); - } -CModelPlotData::CModelPlotData() : m_Time(0) -{ +CModelPlotData::CModelPlotData() : m_Time(0) { } CModelPlotData::CModelPlotData(core_t::TTime time, - const std::string &partitionFieldName, - const std::string &partitionFieldValue, - const std::string &overFieldName, - const std::string &byFieldName, - core_t::TTime bucketSpan, - int detectorIndex) : - m_Time(time), - m_PartitionFieldName(partitionFieldName), - m_PartitionFieldValue(partitionFieldValue), - m_OverFieldName(overFieldName), - m_ByFieldName(byFieldName), - m_BucketSpan(bucketSpan), - m_DetectorIndex(detectorIndex) -{ -} - -CModelPlotData::SByFieldData::SByFieldData() - : s_LowerBound(0.0), - s_UpperBound(0.0), - s_Median(0.0), - s_ValuesPerOverField() -{ + const std::string& partitionFieldName, + const std::string& partitionFieldValue, + const std::string& overFieldName, + const std::string& byFieldName, + core_t::TTime bucketSpan, + int detectorIndex) + : m_Time(time), + m_PartitionFieldName(partitionFieldName), + m_PartitionFieldValue(partitionFieldValue), + m_OverFieldName(overFieldName), + m_ByFieldName(byFieldName), + m_BucketSpan(bucketSpan), + m_DetectorIndex(detectorIndex) { +} + +CModelPlotData::SByFieldData::SByFieldData() : s_LowerBound(0.0), s_UpperBound(0.0), s_Median(0.0), s_ValuesPerOverField() { } CModelPlotData::SByFieldData::SByFieldData(double lowerBound, double upperBound, double median) - : s_LowerBound(lowerBound), - s_UpperBound(upperBound), - s_Median(median), - s_ValuesPerOverField() -{ + : s_LowerBound(lowerBound), s_UpperBound(upperBound), s_Median(median), s_ValuesPerOverField() { } -void CModelPlotData::SByFieldData::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CModelPlotData::SByFieldData::acceptPersistInserter(core::CStatePersistInserter& inserter) const { core::CPersistUtils::persist(LOWER_BOUND_TAG, s_LowerBound, inserter); core::CPersistUtils::persist(UPPER_BOUND_TAG, s_UpperBound, inserter); core::CPersistUtils::persist(MEDIAN_TAG, s_Median, inserter); core::CPersistUtils::persist(VALUES_PER_OVERFIELD_TAG, s_ValuesPerOverField, inserter); } -bool CModelPlotData::SByFieldData::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name = traverser.name(); - if (name == LOWER_BOUND_TAG) - { - if (!core::CPersistUtils::restore(LOWER_BOUND_TAG, s_LowerBound, traverser)) - { +bool CModelPlotData::SByFieldData::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); + if (name == LOWER_BOUND_TAG) { + if (!core::CPersistUtils::restore(LOWER_BOUND_TAG, s_LowerBound, traverser)) { return false; } - } - else if (name == UPPER_BOUND_TAG) - { - if (!core::CPersistUtils::restore(UPPER_BOUND_TAG, s_UpperBound, traverser)) - { + } else if (name == UPPER_BOUND_TAG) { + if (!core::CPersistUtils::restore(UPPER_BOUND_TAG, s_UpperBound, traverser)) { return false; } - } - else if (name == MEDIAN_TAG) - { - if (!core::CPersistUtils::restore(MEDIAN_TAG, s_Median, traverser)) - { + } else if (name == MEDIAN_TAG) { + if (!core::CPersistUtils::restore(MEDIAN_TAG, s_Median, traverser)) { return false; } - } - else if (name == VALUES_PER_OVERFIELD_TAG) - { - if (!core::CPersistUtils::restore(VALUES_PER_OVERFIELD_TAG, s_ValuesPerOverField, traverser)) - { + } else if (name == VALUES_PER_OVERFIELD_TAG) { + if (!core::CPersistUtils::restore(VALUES_PER_OVERFIELD_TAG, s_ValuesPerOverField, traverser)) { return false; } } - } - while (traverser.next()); + } while (traverser.next()); return true; } -void CModelPlotData::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CModelPlotData::acceptPersistInserter(core::CStatePersistInserter& inserter) const { TIntStrByFieldDataUMapUMap data(m_DataPerFeature.begin(), m_DataPerFeature.end()); core::CPersistUtils::persist(DATA_PER_FEATURE_TAG, data, inserter); core::CPersistUtils::persist(TIME_TAG, m_Time, inserter); @@ -127,129 +96,93 @@ void CModelPlotData::acceptPersistInserter(core::CStatePersistInserter &inserter core::CPersistUtils::persist(BY_FIELD_NAME_TAG, m_ByFieldName, inserter); } -bool CModelPlotData::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name = traverser.name(); - if (name == DATA_PER_FEATURE_TAG) - { +bool CModelPlotData::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); + if (name == DATA_PER_FEATURE_TAG) { TIntStrByFieldDataUMapUMap data; - if (!core::CPersistUtils::restore(DATA_PER_FEATURE_TAG, data, traverser)) - { + if (!core::CPersistUtils::restore(DATA_PER_FEATURE_TAG, data, traverser)) { return false; } m_DataPerFeature.clear(); - for (TIntStrByFieldDataUMapUMap::const_iterator i = data.begin(); - i != data.end(); ++i) - { + for (TIntStrByFieldDataUMapUMap::const_iterator i = data.begin(); i != data.end(); ++i) { m_DataPerFeature.insert(TFeatureStrByFieldDataUMapPr(model_t::EFeature(i->first), i->second)); } - } - else if (name == TIME_TAG) - { - if (!core::CPersistUtils::restore(TIME_TAG, m_Time, traverser)) - { + } else if (name == TIME_TAG) { + if (!core::CPersistUtils::restore(TIME_TAG, m_Time, traverser)) { return false; } - } - else if (name == PARTITION_FIELD_NAME_TAG) - { - if (!core::CPersistUtils::restore(PARTITION_FIELD_NAME_TAG, m_PartitionFieldName, traverser)) - { + } else if (name == PARTITION_FIELD_NAME_TAG) { + if (!core::CPersistUtils::restore(PARTITION_FIELD_NAME_TAG, m_PartitionFieldName, traverser)) { return false; } - } - else if (name == PARTITION_FIELD_VALUE_TAG) - { - if (!core::CPersistUtils::restore(PARTITION_FIELD_VALUE_TAG, m_PartitionFieldValue, traverser)) - { + } else if (name == PARTITION_FIELD_VALUE_TAG) { + if (!core::CPersistUtils::restore(PARTITION_FIELD_VALUE_TAG, m_PartitionFieldValue, traverser)) { return false; } - } - else if (name == OVER_FIELD_NAME_TAG) - { - if (!core::CPersistUtils::restore(OVER_FIELD_NAME_TAG, m_OverFieldName, traverser)) - { + } else if (name == OVER_FIELD_NAME_TAG) { + if (!core::CPersistUtils::restore(OVER_FIELD_NAME_TAG, m_OverFieldName, traverser)) { return false; } - } - else if (name == BY_FIELD_NAME_TAG) - { - if (!core::CPersistUtils::restore(BY_FIELD_NAME_TAG, m_ByFieldName, traverser)) - { + } else if (name == BY_FIELD_NAME_TAG) { + if (!core::CPersistUtils::restore(BY_FIELD_NAME_TAG, m_ByFieldName, traverser)) { return false; } } - } - while (traverser.next()); + } while (traverser.next()); return true; } -const std::string &CModelPlotData::partitionFieldName() const -{ +const std::string& CModelPlotData::partitionFieldName() const { return m_PartitionFieldName; } -const std::string &CModelPlotData::partitionFieldValue() const -{ +const std::string& CModelPlotData::partitionFieldValue() const { return m_PartitionFieldValue; } -const std::string &CModelPlotData::overFieldName() const -{ +const std::string& CModelPlotData::overFieldName() const { return m_OverFieldName; } -const std::string &CModelPlotData::byFieldName() const -{ +const std::string& CModelPlotData::byFieldName() const { return m_ByFieldName; } -core_t::TTime CModelPlotData::time() const -{ +core_t::TTime CModelPlotData::time() const { return m_Time; } -core_t::TTime CModelPlotData::bucketSpan() const -{ +core_t::TTime CModelPlotData::bucketSpan() const { return m_BucketSpan; } -int CModelPlotData::detectorIndex() const -{ +int CModelPlotData::detectorIndex() const { return m_DetectorIndex; } -void CModelPlotData::SByFieldData::addValue(const std::string &personName, double value) -{ +void CModelPlotData::SByFieldData::addValue(const std::string& personName, double value) { s_ValuesPerOverField.emplace_back(personName, value); } -CModelPlotData::TFeatureStrByFieldDataUMapUMapCItr CModelPlotData::begin() const -{ +CModelPlotData::TFeatureStrByFieldDataUMapUMapCItr CModelPlotData::begin() const { return m_DataPerFeature.begin(); } -CModelPlotData::TFeatureStrByFieldDataUMapUMapCItr CModelPlotData::end() const -{ +CModelPlotData::TFeatureStrByFieldDataUMapUMapCItr CModelPlotData::end() const { return m_DataPerFeature.end(); } -CModelPlotData::SByFieldData & -CModelPlotData::get(const model_t::EFeature &feature, const std::string &byFieldValue) -{ +CModelPlotData::SByFieldData& CModelPlotData::get(const model_t::EFeature& feature, const std::string& byFieldValue) { // note: This creates/inserts! elements and returns a reference for writing // data insert happens here return m_DataPerFeature[feature][byFieldValue]; } -std::string CModelPlotData::print() const -{ +std::string CModelPlotData::print() const { return "nothing"; } - } } diff --git a/lib/model/CModelTools.cc b/lib/model/CModelTools.cc index 51544d080c..960b250187 100644 --- a/lib/model/CModelTools.cc +++ b/lib/model/CModelTools.cc @@ -21,46 +21,31 @@ #include #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { -namespace -{ +namespace { using TMinAccumulator = maths::CBasicStatistics::COrderStatisticsStack; //! \brief Visitor to add a probability to variant of possible //! aggregation styles. -struct SAddProbability : public boost::static_visitor -{ - void operator()(double probability, - double weight, - maths::CJointProbabilityOfLessLikelySamples &aggregator) const - { +struct SAddProbability : public boost::static_visitor { + void operator()(double probability, double weight, maths::CJointProbabilityOfLessLikelySamples& aggregator) const { aggregator.add(probability, weight); } - void operator()(double probability, - double /*weight*/, - maths::CProbabilityOfExtremeSample &aggregator) const - { + void operator()(double probability, double /*weight*/, maths::CProbabilityOfExtremeSample& aggregator) const { aggregator.add(probability); } }; //! \brief Visitor to read aggregate probability from a variant //! of possible aggregation styles. -struct SReadProbability : public boost::static_visitor -{ +struct SReadProbability : public boost::static_visitor { template - bool operator()(double weight, - double &result, - const T &aggregator) const - { + bool operator()(double weight, double& result, const T& aggregator) const { double probability; - if (!aggregator.calculate(probability)) - { + if (!aggregator.calculate(probability)) { LOG_ERROR("Failed to compute probability"); return false; } @@ -68,11 +53,9 @@ struct SReadProbability : public boost::static_visitor return true; } template - bool operator()(TMinAccumulator &result, const T &aggregator) const - { + bool operator()(TMinAccumulator& result, const T& aggregator) const { double probability; - if (!aggregator.calculate(probability)) - { + if (!aggregator.calculate(probability)) { LOG_ERROR("Failed to compute probability"); return false; } @@ -80,11 +63,9 @@ struct SReadProbability : public boost::static_visitor return true; } }; - } -void CModelTools::CFuzzyDeduplicate::add(TDouble2Vec value) -{ +void CModelTools::CFuzzyDeduplicate::add(TDouble2Vec value) { // We need a very fast way to compute an approximate percentiles // for a large collection of samples. It is good enough to simply // take a small random sample and compute percentiles on this. We @@ -127,100 +108,75 @@ void CModelTools::CFuzzyDeduplicate::add(TDouble2Vec value) // 98.5 / N < P(f N) <= 100 / N ++m_Count; - if (m_RandomSample.size() < 100) - { + if (m_RandomSample.size() < 100) { m_RandomSample.push_back(std::move(value)); - } - else if (maths::CSampling::uniformSample(m_Rng, 0.0, 1.0) < 100.0 / static_cast(m_Count)) - { + } else if (maths::CSampling::uniformSample(m_Rng, 0.0, 1.0) < 100.0 / static_cast(m_Count)) { std::size_t evict{maths::CSampling::uniformSample(m_Rng, 0, m_RandomSample.size())}; m_RandomSample[evict].swap(value); } } -void CModelTools::CFuzzyDeduplicate::computeEpsilons(core_t::TTime bucketLength, - std::size_t desiredNumberSamples) -{ +void CModelTools::CFuzzyDeduplicate::computeEpsilons(core_t::TTime bucketLength, std::size_t desiredNumberSamples) { m_Quantize = m_Count > 0; - if (m_Quantize) - { + if (m_Quantize) { m_QuantizedValues.reserve(std::min(m_Count, desiredNumberSamples)); m_TimeEps = std::max(bucketLength / 60, core_t::TTime(1)); m_ValueEps.assign(m_RandomSample[0].size(), 0.0); - if (m_RandomSample.size() > 1) - { + if (m_RandomSample.size() > 1) { TDoubleVec values(m_RandomSample.size()); - for (std::size_t i = 0u; i < m_ValueEps.size(); ++i) - { - for (std::size_t j = 0u; j < m_RandomSample.size(); ++j) - { + for (std::size_t i = 0u; i < m_ValueEps.size(); ++i) { + for (std::size_t j = 0u; j < m_RandomSample.size(); ++j) { values[j] = m_RandomSample[j][i]; } - std::size_t p10{ values.size() / 10}; + std::size_t p10{values.size() / 10}; std::size_t p90{(9 * values.size()) / 10}; - std::nth_element(values.begin() , values.begin() + p10, values.end()); + std::nth_element(values.begin(), values.begin() + p10, values.end()); std::nth_element(values.begin() + p10 + 1, values.begin() + p90, values.end()); - m_ValueEps[i] = (values[p90] - values[p10]) - / static_cast(desiredNumberSamples); + m_ValueEps[i] = (values[p90] - values[p10]) / static_cast(desiredNumberSamples); } } m_Count = 0; } } -std::size_t CModelTools::CFuzzyDeduplicate::duplicate(core_t::TTime time, TDouble2Vec value) -{ - return !m_Quantize ? - m_Count++ : - m_QuantizedValues.emplace(boost::unordered::piecewise_construct, - std::forward_as_tuple(this->quantize(time), - this->quantize(value)), - std::forward_as_tuple(m_QuantizedValues.size())).first->second; +std::size_t CModelTools::CFuzzyDeduplicate::duplicate(core_t::TTime time, TDouble2Vec value) { + return !m_Quantize ? m_Count++ + : m_QuantizedValues + .emplace(boost::unordered::piecewise_construct, + std::forward_as_tuple(this->quantize(time), this->quantize(value)), + std::forward_as_tuple(m_QuantizedValues.size())) + .first->second; } -CModelTools::TDouble2Vec CModelTools::CFuzzyDeduplicate::quantize(TDouble2Vec value) const -{ - for (std::size_t i = 0u; i < value.size(); ++i) - { - value[i] = m_ValueEps[i] > 0.0 ? - m_ValueEps[i] * std::floor(value[i] / m_ValueEps[i]) : value[i]; +CModelTools::TDouble2Vec CModelTools::CFuzzyDeduplicate::quantize(TDouble2Vec value) const { + for (std::size_t i = 0u; i < value.size(); ++i) { + value[i] = m_ValueEps[i] > 0.0 ? m_ValueEps[i] * std::floor(value[i] / m_ValueEps[i]) : value[i]; } return value; } -core_t::TTime CModelTools::CFuzzyDeduplicate::quantize(core_t::TTime time) const -{ +core_t::TTime CModelTools::CFuzzyDeduplicate::quantize(core_t::TTime time) const { return maths::CIntegerTools::floor(time, m_TimeEps); } -std::size_t CModelTools::CFuzzyDeduplicate::SDuplicateValueHash::operator()(const TTimeDouble2VecPr &value) const -{ - return static_cast(std::accumulate( - value.second.begin(), value.second.end(), - static_cast(value.first), - [](uint64_t seed, double v) - { - return core::CHashing::hashCombine(seed, static_cast(v)); - })); +std::size_t CModelTools::CFuzzyDeduplicate::SDuplicateValueHash::operator()(const TTimeDouble2VecPr& value) const { + return static_cast( + std::accumulate(value.second.begin(), value.second.end(), static_cast(value.first), [](uint64_t seed, double v) { + return core::CHashing::hashCombine(seed, static_cast(v)); + })); } +CModelTools::CProbabilityAggregator::CProbabilityAggregator(EStyle style) : m_Style(style), m_TotalWeight(0.0) { +} -CModelTools::CProbabilityAggregator::CProbabilityAggregator(EStyle style) : - m_Style(style), m_TotalWeight(0.0) -{} - -bool CModelTools::CProbabilityAggregator::empty() const -{ +bool CModelTools::CProbabilityAggregator::empty() const { return m_TotalWeight == 0.0; } -void CModelTools::CProbabilityAggregator::add(const TAggregator &aggregator, double weight) -{ - switch (m_Style) - { +void CModelTools::CProbabilityAggregator::add(const TAggregator& aggregator, double weight) { + switch (m_Style) { case E_Sum: - if (weight > 0.0) - { + if (weight > 0.0) { m_Aggregators.emplace_back(aggregator, weight); } break; @@ -231,75 +187,56 @@ void CModelTools::CProbabilityAggregator::add(const TAggregator &aggregator, dou } } -void CModelTools::CProbabilityAggregator::add(double probability, double weight) -{ +void CModelTools::CProbabilityAggregator::add(double probability, double weight) { m_TotalWeight += weight; - for (auto &aggregator : m_Aggregators) - { - boost::apply_visitor(boost::bind( - SAddProbability(), probability, weight, _1), aggregator.first); + for (auto& aggregator : m_Aggregators) { + boost::apply_visitor(boost::bind(SAddProbability(), probability, weight, _1), aggregator.first); } } -bool CModelTools::CProbabilityAggregator::calculate(double &result) const -{ +bool CModelTools::CProbabilityAggregator::calculate(double& result) const { result = 1.0; - if (m_TotalWeight == 0.0) - { + if (m_TotalWeight == 0.0) { LOG_TRACE("No samples"); return true; } - if (m_Aggregators.empty()) - { + if (m_Aggregators.empty()) { LOG_ERROR("No probability aggregators specified"); return false; } double p{1.0}; - switch (m_Style) - { - case E_Sum: - { + switch (m_Style) { + case E_Sum: { double n{0.0}; - for (const auto &aggregator : m_Aggregators) - { + for (const auto& aggregator : m_Aggregators) { n += aggregator.second; } - for (const auto &aggregator : m_Aggregators) - { - if (!boost::apply_visitor(boost::bind( - SReadProbability(), aggregator.second / n, boost::ref(p), _1), - aggregator.first)) - { + for (const auto& aggregator : m_Aggregators) { + if (!boost::apply_visitor(boost::bind(SReadProbability(), aggregator.second / n, boost::ref(p), _1), aggregator.first)) { return false; } } break; } - case E_Min: - { + case E_Min: { TMinAccumulator p_; - for (const auto &aggregator : m_Aggregators) - { - if (!boost::apply_visitor(boost::bind( - SReadProbability(), boost::ref(p_), _1), aggregator.first)) - { + for (const auto& aggregator : m_Aggregators) { + if (!boost::apply_visitor(boost::bind(SReadProbability(), boost::ref(p_), _1), aggregator.first)) { return false; } } - if (p_.count() > 0) - { + if (p_.count() > 0) { p = p_[0]; } break; } } - if (p < 0.0 || p > 1.001) - { + if (p < 0.0 || p > 1.001) { LOG_ERROR("Unexpected probability = " << p); } result = maths::CTools::truncate(p, maths::CTools::smallestProbability(), 1.0); @@ -307,25 +244,20 @@ bool CModelTools::CProbabilityAggregator::calculate(double &result) const return true; } +CModelTools::CCategoryProbabilityCache::CCategoryProbabilityCache() : m_Prior(0), m_SmallestProbability(1.0) { +} -CModelTools::CCategoryProbabilityCache::CCategoryProbabilityCache() : - m_Prior(0), m_SmallestProbability(1.0) -{} - -CModelTools::CCategoryProbabilityCache::CCategoryProbabilityCache(const maths::CMultinomialConjugate &prior) : - m_Prior(&prior), m_SmallestProbability(1.0) -{} +CModelTools::CCategoryProbabilityCache::CCategoryProbabilityCache(const maths::CMultinomialConjugate& prior) + : m_Prior(&prior), m_SmallestProbability(1.0) { +} -bool CModelTools::CCategoryProbabilityCache::lookup(std::size_t attribute, double &result) const -{ +bool CModelTools::CCategoryProbabilityCache::lookup(std::size_t attribute, double& result) const { result = 1.0; - if (!m_Prior || m_Prior->isNonInformative()) - { + if (!m_Prior || m_Prior->isNonInformative()) { return false; } - if (m_Cache.empty()) - { + if (m_Cache.empty()) { TDoubleVec lb; TDoubleVec ub; m_Prior->probabilitiesOfLessLikelyCategories(maths_t::E_TwoSided, lb, ub); @@ -333,63 +265,47 @@ bool CModelTools::CCategoryProbabilityCache::lookup(std::size_t attribute, doubl LOG_TRACE("P({c}) <= " << core::CContainerPrinter::print(ub)); m_Cache.swap(lb); m_SmallestProbability = 1.0; - for (std::size_t i = 0u; i < ub.size(); ++i) - { + for (std::size_t i = 0u; i < ub.size(); ++i) { m_Cache[i] = (m_Cache[i] + ub[i]) / 2.0; m_SmallestProbability = std::min(m_SmallestProbability, m_Cache[i]); } } std::size_t index; - result = (!m_Prior->index(static_cast(attribute), index) - || index >= m_Cache.size()) ? - m_SmallestProbability : m_Cache[index]; + result = (!m_Prior->index(static_cast(attribute), index) || index >= m_Cache.size()) ? m_SmallestProbability : m_Cache[index]; return true; } -void CModelTools::CCategoryProbabilityCache::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CModelTools::CCategoryProbabilityCache::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CTools::CLessLikelyProbability"); core::CMemoryDebug::dynamicSize("m_Cache", m_Cache, mem->addChild()); - if (m_Prior) - { + if (m_Prior) { m_Prior->debugMemoryUsage(mem->addChild()); } } -std::size_t CModelTools::CCategoryProbabilityCache::memoryUsage() const -{ +std::size_t CModelTools::CCategoryProbabilityCache::memoryUsage() const { std::size_t mem{core::CMemory::dynamicSize(m_Cache)}; - if (m_Prior) - { + if (m_Prior) { mem += m_Prior->memoryUsage(); } return mem; } +CModelTools::CProbabilityCache::CProbabilityCache(double maximumError) : m_MaximumError(maximumError) { +} -CModelTools::CProbabilityCache::CProbabilityCache(double maximumError) : - m_MaximumError(maximumError) -{} - -void CModelTools::CProbabilityCache::clear() -{ +void CModelTools::CProbabilityCache::clear() { m_Caches.clear(); } -void CModelTools::CProbabilityCache::addModes(model_t::EFeature feature, - std::size_t id, - const maths::CModel &model) -{ - if (model_t::dimension(feature) == 1) - { - TDouble1Vec &modes{m_Caches[{feature, id}].s_Modes}; - if (modes.empty()) - { - TDouble2Vec1Vec modes_(model.residualModes(maths::CConstantWeights::COUNT_VARIANCE, - maths::CConstantWeights::unit(1))); - for (const auto &mode : modes_) - { +void CModelTools::CProbabilityCache::addModes(model_t::EFeature feature, std::size_t id, const maths::CModel& model) { + if (model_t::dimension(feature) == 1) { + TDouble1Vec& modes{m_Caches[{feature, id}].s_Modes}; + if (modes.empty()) { + TDouble2Vec1Vec modes_( + model.residualModes(maths::CConstantWeights::COUNT_VARIANCE, maths::CConstantWeights::unit(1))); + for (const auto& mode : modes_) { modes.push_back(mode[0]); } std::sort(modes.begin(), modes.end()); @@ -397,23 +313,25 @@ void CModelTools::CProbabilityCache::addModes(model_t::EFeature feature, } } -void CModelTools::CProbabilityCache::addProbability(model_t::EFeature feature, std::size_t id, - const TDouble2Vec1Vec &value, - double probability, const TTail2Vec &tail, - bool conditional, const TSize1Vec &mostAnomalousCorrelate) -{ - if (m_MaximumError > 0.0 && value.size() == 1 && value[0].size() == 1) - { - m_Caches[{feature, id}].s_Probabilities.emplace( - value[0][0], SProbability{probability, tail, conditional, mostAnomalousCorrelate}); +void CModelTools::CProbabilityCache::addProbability(model_t::EFeature feature, + std::size_t id, + const TDouble2Vec1Vec& value, + double probability, + const TTail2Vec& tail, + bool conditional, + const TSize1Vec& mostAnomalousCorrelate) { + if (m_MaximumError > 0.0 && value.size() == 1 && value[0].size() == 1) { + m_Caches[{feature, id}].s_Probabilities.emplace(value[0][0], SProbability{probability, tail, conditional, mostAnomalousCorrelate}); } } -bool CModelTools::CProbabilityCache::lookup(model_t::EFeature feature, std::size_t id, - const TDouble2Vec1Vec &value, - double &probability, TTail2Vec &tail, - bool &conditional, TSize1Vec &mostAnomalousCorrelate) const -{ +bool CModelTools::CProbabilityCache::lookup(model_t::EFeature feature, + std::size_t id, + const TDouble2Vec1Vec& value, + double& probability, + TTail2Vec& tail, + bool& conditional, + TSize1Vec& mostAnomalousCorrelate) const { // The idea of this cache is to: // 1. Check that the requested value x is in a region where the // probability as a function of value is monotonic @@ -426,47 +344,36 @@ bool CModelTools::CProbabilityCache::lookup(model_t::EFeature feature, std::size // [a, b] if we can verify it doesn't contain more than one stationary // points and the gradients satisfy P'(a) * P'(b) > 0. - if (m_MaximumError > 0.0 && value.size() == 1 && value[0].size() == 1) - { + if (m_MaximumError > 0.0 && value.size() == 1 && value[0].size() == 1) { auto pos = m_Caches.find({feature, id}); - if (pos != m_Caches.end()) - { + if (pos != m_Caches.end()) { double x{value[0][0]}; - const TDouble1Vec &modes{pos->second.s_Modes}; - const TDoubleProbabilityFMap &probabilities{pos->second.s_Probabilities}; + const TDouble1Vec& modes{pos->second.s_Modes}; + const TDoubleProbabilityFMap& probabilities{pos->second.s_Probabilities}; auto right = probabilities.lower_bound(x); - if (right != probabilities.end() && right->first == x) - { + if (right != probabilities.end() && right->first == x) { probability = right->second.s_Probability; tail = right->second.s_Tail; conditional = right->second.s_Conditional; mostAnomalousCorrelate = right->second.s_MostAnomalousCorrelate; return true; - } - else if ( right != probabilities.end() - && right + 1 != probabilities.end() - && right != probabilities.begin() - && right - 1 != probabilities.begin() - && right - 2 != probabilities.begin()) - { + } else if (right != probabilities.end() && right + 1 != probabilities.end() && right != probabilities.begin() && + right - 1 != probabilities.begin() && right - 2 != probabilities.begin()) { auto left = right - 1; double v[]{(left - 1)->first, left->first, right->first, (right + 1)->first}; auto beginModes = std::lower_bound(modes.begin(), modes.end(), v[0]); - auto endModes = std::lower_bound(modes.begin(), modes.end(), v[3]); + auto endModes = std::lower_bound(modes.begin(), modes.end(), v[3]); LOG_TRACE("v = " << core::CContainerPrinter::print(v)); - if (beginModes == endModes && left->second.s_Tail == right->second.s_Tail) - { - double p[]{(left - 1)->second.s_Probability, - (left )->second.s_Probability, - (right )->second.s_Probability, + if (beginModes == endModes && left->second.s_Tail == right->second.s_Tail) { + double p[]{(left - 1)->second.s_Probability, + (left)->second.s_Probability, + (right)->second.s_Probability, (right + 1)->second.s_Probability}; LOG_TRACE("p(v) = " << core::CContainerPrinter::print(p)); - if ( std::is_sorted(p, p + 4, std::less()) - || std::is_sorted(p, p + 4, std::greater())) - { + if (std::is_sorted(p, p + 4, std::less()) || std::is_sorted(p, p + 4, std::greater())) { auto nearest = x - v[1] < v[2] - x ? left : right; probability = (p[2] * (x - v[1]) + p[1] * (v[2] - x)) / (v[2] - v[1]); tail = nearest->second.s_Tail; @@ -481,6 +388,5 @@ bool CModelTools::CProbabilityCache::lookup(model_t::EFeature feature, std::size return false; } - } } diff --git a/lib/model/CPartitioningFields.cc b/lib/model/CPartitioningFields.cc index 3be9ed72b0..7c552f7fd9 100644 --- a/lib/model/CPartitioningFields.cc +++ b/lib/model/CPartitioningFields.cc @@ -6,52 +6,40 @@ #include -namespace ml -{ -namespace model -{ - -CPartitioningFields::CPartitioningFields(const std::string &partitionFieldName, - const std::string &partitionFieldValue) -{ +namespace ml { +namespace model { + +CPartitioningFields::CPartitioningFields(const std::string& partitionFieldName, const std::string& partitionFieldValue) { m_PartitioningFields.reserve(3); this->add(partitionFieldName, partitionFieldValue); } -void CPartitioningFields::add(const std::string &fieldName, const std::string &fieldValue) -{ +void CPartitioningFields::add(const std::string& fieldName, const std::string& fieldValue) { m_PartitioningFields.emplace_back(TStrCRef(fieldName), TStrCRef(fieldValue)); } -std::size_t CPartitioningFields::size() const -{ +std::size_t CPartitioningFields::size() const { return m_PartitioningFields.size(); } -const CPartitioningFields::TStrCRefStrCRefPr &CPartitioningFields::operator[](std::size_t i) const -{ +const CPartitioningFields::TStrCRefStrCRefPr& CPartitioningFields::operator[](std::size_t i) const { return m_PartitioningFields[i]; } -CPartitioningFields::TStrCRefStrCRefPr &CPartitioningFields::operator[](std::size_t i) -{ +CPartitioningFields::TStrCRefStrCRefPr& CPartitioningFields::operator[](std::size_t i) { return m_PartitioningFields[i]; } -const CPartitioningFields::TStrCRefStrCRefPr &CPartitioningFields::back() const -{ +const CPartitioningFields::TStrCRefStrCRefPr& CPartitioningFields::back() const { return m_PartitioningFields.back(); } -CPartitioningFields::TStrCRefStrCRefPr &CPartitioningFields::back() -{ +CPartitioningFields::TStrCRefStrCRefPr& CPartitioningFields::back() { return m_PartitioningFields.back(); } -const std::string &CPartitioningFields::partitionFieldValue() const -{ +const std::string& CPartitioningFields::partitionFieldValue() const { return m_PartitioningFields[0].second.get(); } - } } diff --git a/lib/model/CPopulationModel.cc b/lib/model/CPopulationModel.cc index 611ce9c97d..069de16f6d 100644 --- a/lib/model/CPopulationModel.cc +++ b/lib/model/CPopulationModel.cc @@ -8,9 +8,9 @@ #include #include +#include #include #include -#include #include #include @@ -29,59 +29,46 @@ #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { -namespace -{ +namespace { using TStrCRef = boost::reference_wrapper; using TStrCRefUInt64Map = std::map; -enum EEntity -{ - E_Person, - E_Attribute -}; +enum EEntity { E_Person, E_Attribute }; const std::string EMPTY; //! Check if \p entity is active. -bool isActive(EEntity entity, const CDataGatherer &gatherer, std::size_t id) -{ - switch (entity) - { - case E_Person: return gatherer.isPersonActive(id); - case E_Attribute: return gatherer.isAttributeActive(id); +bool isActive(EEntity entity, const CDataGatherer& gatherer, std::size_t id) { + switch (entity) { + case E_Person: + return gatherer.isPersonActive(id); + case E_Attribute: + return gatherer.isAttributeActive(id); } return false; } //! Get \p entity's name. -const std::string &name(EEntity entity, const CDataGatherer &gatherer, std::size_t id) -{ - switch (entity) - { - case E_Person: return gatherer.personName(id); - case E_Attribute: return gatherer.attributeName(id); +const std::string& name(EEntity entity, const CDataGatherer& gatherer, std::size_t id) { + switch (entity) { + case E_Person: + return gatherer.personName(id); + case E_Attribute: + return gatherer.attributeName(id); } return EMPTY; } //! Update \p hashes with the hash of the active entities in \p values. template -void hashActive(EEntity entity, - const CDataGatherer &gatherer, - const std::vector &values, - TStrCRefUInt64Map &hashes) -{ - for (std::size_t id = 0u; id < values.size(); ++id) - { - if (isActive(entity, gatherer, id)) - { - uint64_t &hash = hashes[boost::cref(name(entity, gatherer, id))]; +void hashActive(EEntity entity, const CDataGatherer& gatherer, const std::vector& values, TStrCRefUInt64Map& hashes) { + for (std::size_t id = 0u; id < values.size(); ++id) { + if (isActive(entity, gatherer, id)) { + uint64_t& hash = hashes[boost::cref(name(entity, gatherer, id))]; hash = maths::CChecksum::calculate(hash, values[id]); } } @@ -90,12 +77,10 @@ void hashActive(EEntity entity, //! Update \p hashes with the hash of the active entities in \p values. template void hashActive(EEntity entity, - const CDataGatherer &gatherer, - const std::vector> > &values, - TStrCRefUInt64Map &hashes) -{ - for (const auto &value : values) - { + const CDataGatherer& gatherer, + const std::vector>>& values, + TStrCRefUInt64Map& hashes) { + for (const auto& value : values) { hashActive(entity, gatherer, value.second, hashes); } } @@ -120,148 +105,111 @@ const std::string DISTINCT_PERSON_COUNT_TAG("g"); // const std::string EXTRA_DATA_TAG("h"); const std::string INTERIM_BUCKET_CORRECTOR_TAG("i"); - } -CPopulationModel::CPopulationModel(const SModelParams ¶ms, - const TDataGathererPtr &dataGatherer, - const TFeatureInfluenceCalculatorCPtrPrVecVec &influenceCalculators) : - CAnomalyDetectorModel(params, dataGatherer, influenceCalculators), - m_NewDistinctPersonCounts(BJKST_HASHES, BJKST_MAX_SIZE) -{ - const model_t::TFeatureVec &features = dataGatherer->features(); - for (std::size_t i = 0u; i < features.size(); ++i) - { - if (!model_t::isCategorical(features[i]) && !model_t::isConstant(features[i])) - { - m_NewPersonBucketCounts.reset(maths::CCountMinSketch(COUNT_MIN_SKETCH_ROWS, - COUNT_MIN_SKETCH_COLUMNS)); +CPopulationModel::CPopulationModel(const SModelParams& params, + const TDataGathererPtr& dataGatherer, + const TFeatureInfluenceCalculatorCPtrPrVecVec& influenceCalculators) + : CAnomalyDetectorModel(params, dataGatherer, influenceCalculators), m_NewDistinctPersonCounts(BJKST_HASHES, BJKST_MAX_SIZE) { + const model_t::TFeatureVec& features = dataGatherer->features(); + for (std::size_t i = 0u; i < features.size(); ++i) { + if (!model_t::isCategorical(features[i]) && !model_t::isConstant(features[i])) { + m_NewPersonBucketCounts.reset(maths::CCountMinSketch(COUNT_MIN_SKETCH_ROWS, COUNT_MIN_SKETCH_COLUMNS)); break; } } } -CPopulationModel::CPopulationModel(bool isForPersistence, const CPopulationModel &other) : - CAnomalyDetectorModel(isForPersistence, other), - m_PersonLastBucketTimes(other.m_PersonLastBucketTimes), - m_AttributeFirstBucketTimes(other.m_AttributeFirstBucketTimes), - m_AttributeLastBucketTimes(other.m_AttributeLastBucketTimes), - m_NewDistinctPersonCounts(BJKST_HASHES, BJKST_MAX_SIZE), - m_DistinctPersonCounts(other.m_DistinctPersonCounts), - m_PersonAttributeBucketCounts(other.m_PersonAttributeBucketCounts) -{ - if (!isForPersistence) - { +CPopulationModel::CPopulationModel(bool isForPersistence, const CPopulationModel& other) + : CAnomalyDetectorModel(isForPersistence, other), + m_PersonLastBucketTimes(other.m_PersonLastBucketTimes), + m_AttributeFirstBucketTimes(other.m_AttributeFirstBucketTimes), + m_AttributeLastBucketTimes(other.m_AttributeLastBucketTimes), + m_NewDistinctPersonCounts(BJKST_HASHES, BJKST_MAX_SIZE), + m_DistinctPersonCounts(other.m_DistinctPersonCounts), + m_PersonAttributeBucketCounts(other.m_PersonAttributeBucketCounts) { + if (!isForPersistence) { LOG_ABORT("This constructor only creates clones for persistence"); } } -bool CPopulationModel::isPopulation() const -{ +bool CPopulationModel::isPopulation() const { return true; } -CPopulationModel::TOptionalUInt64 - CPopulationModel::currentBucketCount(std::size_t pid, - core_t::TTime time) const -{ - if (!this->bucketStatsAvailable(time)) - { +CPopulationModel::TOptionalUInt64 CPopulationModel::currentBucketCount(std::size_t pid, core_t::TTime time) const { + if (!this->bucketStatsAvailable(time)) { LOG_ERROR("No statistics at " << time); return TOptionalUInt64(); } - const TSizeUInt64PrVec &personCounts = this->personCounts(); - auto i = std::lower_bound(personCounts.begin(), - personCounts.end(), - pid, maths::COrderings::SFirstLess()); - return (i != personCounts.end() && i->first == pid) ? - TOptionalUInt64(i->second) : TOptionalUInt64(); + const TSizeUInt64PrVec& personCounts = this->personCounts(); + auto i = std::lower_bound(personCounts.begin(), personCounts.end(), pid, maths::COrderings::SFirstLess()); + return (i != personCounts.end() && i->first == pid) ? TOptionalUInt64(i->second) : TOptionalUInt64(); } -CPopulationModel::TOptionalDouble - CPopulationModel::baselineBucketCount(std::size_t /*pid*/) const -{ +CPopulationModel::TOptionalDouble CPopulationModel::baselineBucketCount(std::size_t /*pid*/) const { return TOptionalDouble(); } -void CPopulationModel::currentBucketPersonIds(core_t::TTime time, - TSizeVec &result) const -{ +void CPopulationModel::currentBucketPersonIds(core_t::TTime time, TSizeVec& result) const { result.clear(); - if (!this->bucketStatsAvailable(time)) - { + if (!this->bucketStatsAvailable(time)) { LOG_ERROR("No statistics at " << time); return; } - const TSizeUInt64PrVec &personCounts = this->personCounts(); + const TSizeUInt64PrVec& personCounts = this->personCounts(); result.reserve(personCounts.size()); - for (const auto &count : personCounts) - { + for (const auto& count : personCounts) { result.push_back(count.first); } } -void CPopulationModel::sampleOutOfPhase(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor &resourceMonitor) -{ - CDataGatherer &gatherer = this->dataGatherer(); +void CPopulationModel::sampleOutOfPhase(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) { + CDataGatherer& gatherer = this->dataGatherer(); - if (!gatherer.dataAvailable(startTime)) - { + if (!gatherer.dataAvailable(startTime)) { return; } - for (core_t::TTime time = startTime, bucketLength = gatherer.bucketLength(); - time < endTime; - time += bucketLength) - { + for (core_t::TTime time = startTime, bucketLength = gatherer.bucketLength(); time < endTime; time += bucketLength) { gatherer.sampleNow(time); this->sampleBucketStatistics(time, time + bucketLength, resourceMonitor); } } -void CPopulationModel::sample(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor &resourceMonitor) -{ +void CPopulationModel::sample(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) { this->CAnomalyDetectorModel::sample(startTime, endTime, resourceMonitor); - const CDataGatherer &gatherer = this->dataGatherer(); - const CDataGatherer::TSizeSizePrUInt64UMap &counts = gatherer.bucketCounts(startTime); - for (const auto &count : counts) - { + const CDataGatherer& gatherer = this->dataGatherer(); + const CDataGatherer::TSizeSizePrUInt64UMap& counts = gatherer.bucketCounts(startTime); + for (const auto& count : counts) { std::size_t pid = CDataGatherer::extractPersonId(count); std::size_t cid = CDataGatherer::extractAttributeId(count); m_PersonLastBucketTimes[pid] = startTime; - if (CAnomalyDetectorModel::isTimeUnset(m_AttributeFirstBucketTimes[cid])) - { + if (CAnomalyDetectorModel::isTimeUnset(m_AttributeFirstBucketTimes[cid])) { m_AttributeFirstBucketTimes[cid] = startTime; } m_AttributeLastBucketTimes[cid] = startTime; m_DistinctPersonCounts[cid].add(static_cast(pid)); - if (cid < m_PersonAttributeBucketCounts.size()) - { + if (cid < m_PersonAttributeBucketCounts.size()) { m_PersonAttributeBucketCounts[cid].add(static_cast(pid), 1.0); } } double alpha = std::exp(-this->params().s_DecayRate * 1.0); - for (std::size_t cid = 0u; cid < m_PersonAttributeBucketCounts.size(); ++cid) - { + for (std::size_t cid = 0u; cid < m_PersonAttributeBucketCounts.size(); ++cid) { m_PersonAttributeBucketCounts[cid].age(alpha); } } -uint64_t CPopulationModel::checksum(bool includeCurrentBucketStats) const -{ +uint64_t CPopulationModel::checksum(bool includeCurrentBucketStats) const { uint64_t seed = this->CAnomalyDetectorModel::checksum(includeCurrentBucketStats); - const CDataGatherer &gatherer = this->dataGatherer(); + const CDataGatherer& gatherer = this->dataGatherer(); TStrCRefUInt64Map hashes; - hashActive(E_Person, gatherer, m_PersonLastBucketTimes, hashes); + hashActive(E_Person, gatherer, m_PersonLastBucketTimes, hashes); hashActive(E_Attribute, gatherer, m_AttributeFirstBucketTimes, hashes); hashActive(E_Attribute, gatherer, m_AttributeLastBucketTimes, hashes); @@ -271,8 +219,7 @@ uint64_t CPopulationModel::checksum(bool includeCurrentBucketStats) const return maths::CChecksum::calculate(seed, hashes); } -void CPopulationModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CPopulationModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CPopulationModel"); this->CAnomalyDetectorModel::debugMemoryUsage(mem->addChild()); core::CMemoryDebug::dynamicSize("m_PersonLastBucketTimes", m_PersonLastBucketTimes, mem); @@ -284,8 +231,7 @@ void CPopulationModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) core::CMemoryDebug::dynamicSize("m_PersonAttributeBucketCounts", m_PersonAttributeBucketCounts, mem); } -std::size_t CPopulationModel::memoryUsage() const -{ +std::size_t CPopulationModel::memoryUsage() const { std::size_t mem = this->CAnomalyDetectorModel::memoryUsage(); mem += core::CMemory::dynamicSize(m_PersonLastBucketTimes); mem += core::CMemory::dynamicSize(m_AttributeFirstBucketTimes); @@ -297,179 +243,137 @@ std::size_t CPopulationModel::memoryUsage() const return mem; } -double CPopulationModel::attributeFrequency(std::size_t cid) const -{ +double CPopulationModel::attributeFrequency(std::size_t cid) const { std::size_t active = this->dataGatherer().numberActivePeople(); - return active == 0 ? 0.5 : static_cast(m_DistinctPersonCounts[cid].number()) - / static_cast(active); + return active == 0 ? 0.5 : static_cast(m_DistinctPersonCounts[cid].number()) / static_cast(active); } -double CPopulationModel::sampleRateWeight(std::size_t pid, std::size_t cid) const -{ - if ( cid >= m_PersonAttributeBucketCounts.size() - || cid >= m_DistinctPersonCounts.size()) - { +double CPopulationModel::sampleRateWeight(std::size_t pid, std::size_t cid) const { + if (cid >= m_PersonAttributeBucketCounts.size() || cid >= m_DistinctPersonCounts.size()) { return 1.0; } - const maths::CCountMinSketch &counts = m_PersonAttributeBucketCounts[cid]; - const maths::CBjkstUniqueValues &distinctPeople = m_DistinctPersonCounts[cid]; + const maths::CCountMinSketch& counts = m_PersonAttributeBucketCounts[cid]; + const maths::CBjkstUniqueValues& distinctPeople = m_DistinctPersonCounts[cid]; - double personCount = counts.count(static_cast(pid)) - - counts.oneMinusDeltaError(); - if (personCount <= 0.0) - { + double personCount = counts.count(static_cast(pid)) - counts.oneMinusDeltaError(); + if (personCount <= 0.0) { return 1.0; } LOG_TRACE("personCount = " << personCount); double totalCount = counts.totalCount(); double distinctPeopleCount = - std::min(static_cast(distinctPeople.number()), - static_cast(this->dataGatherer().numberActivePeople())); + std::min(static_cast(distinctPeople.number()), static_cast(this->dataGatherer().numberActivePeople())); double meanPersonCount = totalCount / distinctPeopleCount; LOG_TRACE("meanPersonCount = " << meanPersonCount); return std::min(meanPersonCount / personCount, 1.0); } -void CPopulationModel::doAcceptPersistInserter(core::CStatePersistInserter &inserter) const -{ - inserter.insertValue(WINDOW_BUCKET_COUNT_TAG, - this->windowBucketCount(), - core::CIEEE754::E_SinglePrecision); +void CPopulationModel::doAcceptPersistInserter(core::CStatePersistInserter& inserter) const { + inserter.insertValue(WINDOW_BUCKET_COUNT_TAG, this->windowBucketCount(), core::CIEEE754::E_SinglePrecision); core::CPersistUtils::persist(PERSON_BUCKET_COUNT_TAG, this->personBucketCounts(), inserter); core::CPersistUtils::persist(PERSON_LAST_BUCKET_TIME_TAG, m_PersonLastBucketTimes, inserter); core::CPersistUtils::persist(ATTRIBUTE_FIRST_BUCKET_TIME_TAG, m_AttributeFirstBucketTimes, inserter); core::CPersistUtils::persist(ATTRIBUTE_LAST_BUCKET_TIME_TAG, m_AttributeLastBucketTimes, inserter); - for (std::size_t cid = 0; cid < m_PersonAttributeBucketCounts.size(); ++cid) - { + for (std::size_t cid = 0; cid < m_PersonAttributeBucketCounts.size(); ++cid) { inserter.insertLevel(PERSON_ATTRIBUTE_BUCKET_COUNT_TAG, - boost::bind(&maths::CCountMinSketch::acceptPersistInserter, - &m_PersonAttributeBucketCounts[cid], - _1)); + boost::bind(&maths::CCountMinSketch::acceptPersistInserter, &m_PersonAttributeBucketCounts[cid], _1)); } - for (std::size_t cid = 0; cid < m_DistinctPersonCounts.size(); ++cid) - { + for (std::size_t cid = 0; cid < m_DistinctPersonCounts.size(); ++cid) { inserter.insertLevel(DISTINCT_PERSON_COUNT_TAG, - boost::bind(&maths::CBjkstUniqueValues::acceptPersistInserter, - &m_DistinctPersonCounts[cid], - _1)); + boost::bind(&maths::CBjkstUniqueValues::acceptPersistInserter, &m_DistinctPersonCounts[cid], _1)); } this->interimBucketCorrectorAcceptPersistInserter(INTERIM_BUCKET_CORRECTOR_TAG, inserter); } -bool CPopulationModel::doAcceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name = traverser.name(); +bool CPopulationModel::doAcceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); RESTORE_SETUP_TEARDOWN(WINDOW_BUCKET_COUNT_TAG, double count, core::CStringUtils::stringToType(traverser.value(), count), this->windowBucketCount(count)); - RESTORE(PERSON_BUCKET_COUNT_TAG, - core::CPersistUtils::restore(name, this->personBucketCounts(), traverser)) - RESTORE(PERSON_LAST_BUCKET_TIME_TAG, - core::CPersistUtils::restore(name, m_PersonLastBucketTimes, traverser)) - RESTORE(ATTRIBUTE_FIRST_BUCKET_TIME_TAG, - core::CPersistUtils::restore(name, m_AttributeFirstBucketTimes, traverser)) - RESTORE(ATTRIBUTE_LAST_BUCKET_TIME_TAG, - core::CPersistUtils::restore(name, m_AttributeLastBucketTimes, traverser)) - if (name == PERSON_ATTRIBUTE_BUCKET_COUNT_TAG) - { + RESTORE(PERSON_BUCKET_COUNT_TAG, core::CPersistUtils::restore(name, this->personBucketCounts(), traverser)) + RESTORE(PERSON_LAST_BUCKET_TIME_TAG, core::CPersistUtils::restore(name, m_PersonLastBucketTimes, traverser)) + RESTORE(ATTRIBUTE_FIRST_BUCKET_TIME_TAG, core::CPersistUtils::restore(name, m_AttributeFirstBucketTimes, traverser)) + RESTORE(ATTRIBUTE_LAST_BUCKET_TIME_TAG, core::CPersistUtils::restore(name, m_AttributeLastBucketTimes, traverser)) + if (name == PERSON_ATTRIBUTE_BUCKET_COUNT_TAG) { maths::CCountMinSketch sketch(traverser); m_PersonAttributeBucketCounts.push_back(maths::CCountMinSketch(0, 0)); m_PersonAttributeBucketCounts.back().swap(sketch); continue; } - if (name == DISTINCT_PERSON_COUNT_TAG) - { + if (name == DISTINCT_PERSON_COUNT_TAG) { maths::CBjkstUniqueValues sketch(traverser); m_DistinctPersonCounts.push_back(maths::CBjkstUniqueValues(0, 0)); m_DistinctPersonCounts.back().swap(sketch); continue; } RESTORE(INTERIM_BUCKET_CORRECTOR_TAG, this->interimBucketCorrectorAcceptRestoreTraverser(traverser)) - } - while (traverser.next()); + } while (traverser.next()); return true; } -void CPopulationModel::createUpdateNewModels(core_t::TTime time, CResourceMonitor &resourceMonitor) -{ +void CPopulationModel::createUpdateNewModels(core_t::TTime time, CResourceMonitor& resourceMonitor) { this->updateRecycledModels(); - CDataGatherer &gatherer = this->dataGatherer(); + CDataGatherer& gatherer = this->dataGatherer(); std::size_t numberExistingPeople = m_PersonLastBucketTimes.size(); std::size_t numberExistingAttributes = m_AttributeLastBucketTimes.size(); - TOptionalSize usageEstimate = this->estimateMemoryUsage(std::min(numberExistingPeople, - gatherer.numberActivePeople()), - std::min(numberExistingAttributes, - gatherer.numberActiveAttributes()), + TOptionalSize usageEstimate = this->estimateMemoryUsage(std::min(numberExistingPeople, gatherer.numberActivePeople()), + std::min(numberExistingAttributes, gatherer.numberActiveAttributes()), 0); // # correlations std::size_t ourUsage = usageEstimate ? usageEstimate.get() : this->computeMemoryUsage(); std::size_t resourceLimit = ourUsage + resourceMonitor.allocationLimit(); std::size_t numberNewPeople = gatherer.numberPeople(); - numberNewPeople = numberNewPeople > numberExistingPeople ? - numberNewPeople - numberExistingPeople : 0; + numberNewPeople = numberNewPeople > numberExistingPeople ? numberNewPeople - numberExistingPeople : 0; std::size_t numberNewAttributes = gatherer.numberAttributes(); - numberNewAttributes = numberNewAttributes > numberExistingAttributes ? - numberNewAttributes - numberExistingAttributes : 0; + numberNewAttributes = numberNewAttributes > numberExistingAttributes ? numberNewAttributes - numberExistingAttributes : 0; - while ( numberNewPeople > 0 - && resourceMonitor.areAllocationsAllowed() - && (resourceMonitor.haveNoLimit() || ourUsage < resourceLimit)) - { + while (numberNewPeople > 0 && resourceMonitor.areAllocationsAllowed() && (resourceMonitor.haveNoLimit() || ourUsage < resourceLimit)) { // We batch people in CHUNK_SIZE (500) and create models in chunks // and test usage after each chunk. std::size_t numberToCreate = std::min(numberNewPeople, CHUNK_SIZE); - LOG_TRACE("Creating batch of " << numberToCreate << " people of remaining " << numberNewPeople << ". " - << resourceLimit - ourUsage << " free bytes remaining"); + LOG_TRACE("Creating batch of " << numberToCreate << " people of remaining " << numberNewPeople << ". " << resourceLimit - ourUsage + << " free bytes remaining"); this->createNewModels(numberToCreate, 0); numberExistingPeople += numberToCreate; numberNewPeople -= numberToCreate; - if ((numberNewPeople > 0 || numberNewAttributes > 0) && resourceMonitor.haveNoLimit() == false) - { + if ((numberNewPeople > 0 || numberNewAttributes > 0) && resourceMonitor.haveNoLimit() == false) { ourUsage = this->estimateMemoryUsageOrComputeAndUpdate(numberExistingPeople, numberExistingAttributes, 0); } } - while ( numberNewAttributes > 0 - && resourceMonitor.areAllocationsAllowed() - && (resourceMonitor.haveNoLimit() || ourUsage < resourceLimit)) - { + while (numberNewAttributes > 0 && resourceMonitor.areAllocationsAllowed() && + (resourceMonitor.haveNoLimit() || ourUsage < resourceLimit)) { // We batch attributes in CHUNK_SIZE (500) and create models in chunks // and test usage after each chunk. std::size_t numberToCreate = std::min(numberNewAttributes, CHUNK_SIZE); LOG_TRACE("Creating batch of " << numberToCreate << " attributes of remaining " << numberNewAttributes << ". " - << resourceLimit - ourUsage << " free bytes remaining"); + << resourceLimit - ourUsage << " free bytes remaining"); this->createNewModels(0, numberToCreate); numberExistingAttributes += numberToCreate; numberNewAttributes -= numberToCreate; - if (numberNewAttributes > 0 && resourceMonitor.haveNoLimit() == false) - { + if (numberNewAttributes > 0 && resourceMonitor.haveNoLimit() == false) { ourUsage = this->estimateMemoryUsageOrComputeAndUpdate(numberExistingPeople, numberExistingAttributes, 0); } } - if (numberNewPeople > 0) - { + if (numberNewPeople > 0) { resourceMonitor.acceptAllocationFailureResult(time); LOG_DEBUG("Not enough memory to create person models"); - core::CStatistics::instance().stat(stat_t::E_NumberMemoryLimitModelCreationFailures). - increment(numberNewPeople); + core::CStatistics::instance().stat(stat_t::E_NumberMemoryLimitModelCreationFailures).increment(numberNewPeople); std::size_t toRemove = gatherer.numberPeople() - numberNewPeople; gatherer.removePeople(toRemove); } - if (numberNewAttributes > 0) - { + if (numberNewAttributes > 0) { resourceMonitor.acceptAllocationFailureResult(time); LOG_DEBUG("Not enough memory to create attribute models"); - core::CStatistics::instance().stat(stat_t::E_NumberMemoryLimitModelCreationFailures). - increment(numberNewAttributes); + core::CStatistics::instance().stat(stat_t::E_NumberMemoryLimitModelCreationFailures).increment(numberNewAttributes); std::size_t toRemove = gatherer.numberAttributes() - numberNewAttributes; gatherer.removeAttributes(toRemove); } @@ -477,23 +381,17 @@ void CPopulationModel::createUpdateNewModels(core_t::TTime time, CResourceMonito this->refreshCorrelationModels(resourceLimit, resourceMonitor); } -void CPopulationModel::createNewModels(std::size_t n, std::size_t m) -{ - if (n > 0) - { - core::CAllocationStrategy::resize(m_PersonLastBucketTimes, - n + m_PersonLastBucketTimes.size(), - CAnomalyDetectorModel::TIME_UNSET); +void CPopulationModel::createNewModels(std::size_t n, std::size_t m) { + if (n > 0) { + core::CAllocationStrategy::resize(m_PersonLastBucketTimes, n + m_PersonLastBucketTimes.size(), CAnomalyDetectorModel::TIME_UNSET); } - if (m > 0) - { + if (m > 0) { std::size_t newM = m + m_AttributeFirstBucketTimes.size(); core::CAllocationStrategy::resize(m_AttributeFirstBucketTimes, newM, CAnomalyDetectorModel::TIME_UNSET); core::CAllocationStrategy::resize(m_AttributeLastBucketTimes, newM, CAnomalyDetectorModel::TIME_UNSET); core::CAllocationStrategy::resize(m_DistinctPersonCounts, newM, m_NewDistinctPersonCounts); - if (m_NewPersonBucketCounts) - { + if (m_NewPersonBucketCounts) { core::CAllocationStrategy::resize(m_PersonAttributeBucketCounts, newM, *m_NewPersonBucketCounts); } } @@ -501,22 +399,18 @@ void CPopulationModel::createNewModels(std::size_t n, std::size_t m) this->CAnomalyDetectorModel::createNewModels(n, m); } -void CPopulationModel::updateRecycledModels() -{ - CDataGatherer &gatherer = this->dataGatherer(); - for (auto pid : gatherer.recycledPersonIds()) - { +void CPopulationModel::updateRecycledModels() { + CDataGatherer& gatherer = this->dataGatherer(); + for (auto pid : gatherer.recycledPersonIds()) { m_PersonLastBucketTimes[pid] = 0; } - TSizeVec &attributes = gatherer.recycledAttributeIds(); - for (auto cid : attributes) - { + TSizeVec& attributes = gatherer.recycledAttributeIds(); + for (auto cid : attributes) { m_AttributeFirstBucketTimes[cid] = CAnomalyDetectorModel::TIME_UNSET; m_AttributeLastBucketTimes[cid] = CAnomalyDetectorModel::TIME_UNSET; m_DistinctPersonCounts[cid] = m_NewDistinctPersonCounts; - if (m_NewPersonBucketCounts) - { + if (m_NewPersonBucketCounts) { m_PersonAttributeBucketCounts[cid] = *m_NewPersonBucketCounts; } } @@ -529,157 +423,118 @@ void CPopulationModel::correctBaselineForInterim(model_t::EFeature feature, std::size_t pid, std::size_t cid, model_t::CResultType type, - const TSizeDoublePr1Vec &correlated, - const TCorrectionKeyDouble1VecUMap &corrections, - TDouble1Vec &result) const -{ - if (type.isInterim() && model_t::requiresInterimResultAdjustment(feature)) - { + const TSizeDoublePr1Vec& correlated, + const TCorrectionKeyDouble1VecUMap& corrections, + TDouble1Vec& result) const { + if (type.isInterim() && model_t::requiresInterimResultAdjustment(feature)) { std::size_t correlated_ = 0u; - switch (type.asConditionalOrUnconditional()) - { + switch (type.asConditionalOrUnconditional()) { case model_t::CResultType::E_Unconditional: break; case model_t::CResultType::E_Conditional: - if (!correlated.empty()) - { + if (!correlated.empty()) { correlated_ = correlated[0].first; } break; } auto correction = corrections.find(CCorrectionKey(feature, pid, cid, correlated_)); - if (correction != corrections.end()) - { + if (correction != corrections.end()) { result -= (*correction).second; } } } -double CPopulationModel::propagationTime(std::size_t cid, core_t::TTime time) const -{ - return 1.0 + (this->params().s_InitialDecayRateMultiplier - 1.0) - * maths::CTools::truncate(1.0 - static_cast(time - m_AttributeFirstBucketTimes[cid]) - / static_cast(3 * core::constants::WEEK), 0.0, 1.0); +double CPopulationModel::propagationTime(std::size_t cid, core_t::TTime time) const { + return 1.0 + (this->params().s_InitialDecayRateMultiplier - 1.0) * + maths::CTools::truncate(1.0 - static_cast(time - m_AttributeFirstBucketTimes[cid]) / + static_cast(3 * core::constants::WEEK), + 0.0, + 1.0); } -const CPopulationModel::TTimeVec &CPopulationModel::attributeFirstBucketTimes() const -{ +const CPopulationModel::TTimeVec& CPopulationModel::attributeFirstBucketTimes() const { return m_AttributeFirstBucketTimes; } -const CPopulationModel::TTimeVec &CPopulationModel::attributeLastBucketTimes() const -{ +const CPopulationModel::TTimeVec& CPopulationModel::attributeLastBucketTimes() const { return m_AttributeLastBucketTimes; } void CPopulationModel::peopleAndAttributesToRemove(core_t::TTime time, std::size_t maximumAge, - TSizeVec &peopleToRemove, - TSizeVec &attributesToRemove) const -{ - if (time <= 0) - { + TSizeVec& peopleToRemove, + TSizeVec& attributesToRemove) const { + if (time <= 0) { return; } - const CDataGatherer &gatherer = this->dataGatherer(); + const CDataGatherer& gatherer = this->dataGatherer(); - for (std::size_t pid = 0u; pid < m_PersonLastBucketTimes.size(); ++pid) - { - if ((gatherer.isPersonActive(pid)) && - (!CAnomalyDetectorModel::isTimeUnset(m_PersonLastBucketTimes[pid]))) - { - std::size_t bucketsSinceLastEvent = - static_cast((time - m_PersonLastBucketTimes[pid]) - / gatherer.bucketLength()); - if (bucketsSinceLastEvent > maximumAge) - { + for (std::size_t pid = 0u; pid < m_PersonLastBucketTimes.size(); ++pid) { + if ((gatherer.isPersonActive(pid)) && (!CAnomalyDetectorModel::isTimeUnset(m_PersonLastBucketTimes[pid]))) { + std::size_t bucketsSinceLastEvent = static_cast((time - m_PersonLastBucketTimes[pid]) / gatherer.bucketLength()); + if (bucketsSinceLastEvent > maximumAge) { LOG_TRACE(gatherer.personName(pid) - << ", bucketsSinceLastEvent = " << bucketsSinceLastEvent - << ", maximumAge = " << maximumAge); + << ", bucketsSinceLastEvent = " << bucketsSinceLastEvent << ", maximumAge = " << maximumAge); peopleToRemove.push_back(pid); } } } - for (std::size_t cid = 0u; cid < m_AttributeLastBucketTimes.size(); ++cid) - { - if ((gatherer.isAttributeActive(cid)) && - (!CAnomalyDetectorModel::isTimeUnset(m_AttributeLastBucketTimes[cid]))) - { + for (std::size_t cid = 0u; cid < m_AttributeLastBucketTimes.size(); ++cid) { + if ((gatherer.isAttributeActive(cid)) && (!CAnomalyDetectorModel::isTimeUnset(m_AttributeLastBucketTimes[cid]))) { std::size_t bucketsSinceLastEvent = - static_cast((time - m_AttributeLastBucketTimes[cid]) - / gatherer.bucketLength()); - if (bucketsSinceLastEvent > maximumAge) - { + static_cast((time - m_AttributeLastBucketTimes[cid]) / gatherer.bucketLength()); + if (bucketsSinceLastEvent > maximumAge) { LOG_TRACE(gatherer.attributeName(cid) - << ", bucketsSinceLastEvent = " << bucketsSinceLastEvent - << ", maximumAge = " << maximumAge); + << ", bucketsSinceLastEvent = " << bucketsSinceLastEvent << ", maximumAge = " << maximumAge); attributesToRemove.push_back(cid); } } } } -void CPopulationModel::removePeople(const TSizeVec &peopleToRemove) -{ - for (std::size_t i = 0u; i < peopleToRemove.size(); ++i) - { +void CPopulationModel::removePeople(const TSizeVec& peopleToRemove) { + for (std::size_t i = 0u; i < peopleToRemove.size(); ++i) { uint32_t pid = static_cast(peopleToRemove[i]); - for (std::size_t cid = 0u; cid < m_PersonAttributeBucketCounts.size(); ++cid) - { + for (std::size_t cid = 0u; cid < m_PersonAttributeBucketCounts.size(); ++cid) { m_PersonAttributeBucketCounts[cid].removeFromMap(pid); } - for (std::size_t cid = 0u; cid < m_DistinctPersonCounts.size(); ++cid) - { + for (std::size_t cid = 0u; cid < m_DistinctPersonCounts.size(); ++cid) { m_DistinctPersonCounts[cid].remove(pid); } } } -void CPopulationModel::doSkipSampling(core_t::TTime startTime, core_t::TTime endTime) -{ - const CDataGatherer &gatherer = this->dataGatherer(); +void CPopulationModel::doSkipSampling(core_t::TTime startTime, core_t::TTime endTime) { + const CDataGatherer& gatherer = this->dataGatherer(); core_t::TTime gapDuration = endTime - startTime; - for (std::size_t pid = 0u; pid < m_PersonLastBucketTimes.size(); ++pid) - { - if (gatherer.isPersonActive(pid) && !CAnomalyDetectorModel::isTimeUnset(m_PersonLastBucketTimes[pid])) - { + for (std::size_t pid = 0u; pid < m_PersonLastBucketTimes.size(); ++pid) { + if (gatherer.isPersonActive(pid) && !CAnomalyDetectorModel::isTimeUnset(m_PersonLastBucketTimes[pid])) { m_PersonLastBucketTimes[pid] = m_PersonLastBucketTimes[pid] + gapDuration; } } - for (std::size_t cid = 0u; cid < m_AttributeLastBucketTimes.size(); ++cid) - { - if (gatherer.isAttributeActive(cid) && !CAnomalyDetectorModel::isTimeUnset(m_AttributeLastBucketTimes[cid])) - { + for (std::size_t cid = 0u; cid < m_AttributeLastBucketTimes.size(); ++cid) { + if (gatherer.isAttributeActive(cid) && !CAnomalyDetectorModel::isTimeUnset(m_AttributeLastBucketTimes[cid])) { m_AttributeLastBucketTimes[cid] = m_AttributeLastBucketTimes[cid] + gapDuration; } } } -CPopulationModel::CCorrectionKey::CCorrectionKey(model_t::EFeature feature, - std::size_t pid, - std::size_t cid, - std::size_t correlated) : - m_Feature(feature), m_Pid(pid), m_Cid(cid), m_Correlate(correlated) -{} +CPopulationModel::CCorrectionKey::CCorrectionKey(model_t::EFeature feature, std::size_t pid, std::size_t cid, std::size_t correlated) + : m_Feature(feature), m_Pid(pid), m_Cid(cid), m_Correlate(correlated) { +} -bool CPopulationModel::CCorrectionKey::operator==(const CCorrectionKey &rhs) const -{ - return m_Feature == rhs.m_Feature - && m_Pid == rhs.m_Pid - && m_Cid == rhs.m_Cid - && m_Correlate == rhs.m_Correlate; +bool CPopulationModel::CCorrectionKey::operator==(const CCorrectionKey& rhs) const { + return m_Feature == rhs.m_Feature && m_Pid == rhs.m_Pid && m_Cid == rhs.m_Cid && m_Correlate == rhs.m_Correlate; } -std::size_t CPopulationModel::CCorrectionKey::hash() const -{ +std::size_t CPopulationModel::CCorrectionKey::hash() const { uint64_t seed = core::CHashing::hashCombine(static_cast(m_Feature), m_Pid); seed = core::CHashing::hashCombine(seed, m_Cid); return static_cast(core::CHashing::hashCombine(seed, m_Correlate)); } - } } diff --git a/lib/model/CProbabilityAndInfluenceCalculator.cc b/lib/model/CProbabilityAndInfluenceCalculator.cc index 3277089370..0dabbc6a42 100644 --- a/lib/model/CProbabilityAndInfluenceCalculator.cc +++ b/lib/model/CProbabilityAndInfluenceCalculator.cc @@ -19,12 +19,9 @@ #include #include -namespace ml -{ -namespace model -{ -namespace -{ +namespace ml { +namespace model { +namespace { using TSize1Vec = CProbabilityAndInfluenceCalculator::TSize1Vec; using TSize2Vec = CProbabilityAndInfluenceCalculator::TSize2Vec; @@ -52,285 +49,243 @@ using TSizeDoublePr = std::pair; using TSizeDoublePr1Vec = core::CSmallVector; //! Get the canonical influence string pointer. -core::CStoredStringPtr canonical(const std::string &influence) -{ +core::CStoredStringPtr canonical(const std::string& influence) { return CStringStore::influencers().get(influence); } //! \brief Orders two value influences by decreasing influence. -class CDecreasingValueInfluence -{ - public: - CDecreasingValueInfluence(maths_t::ETail tail) : m_Tail(tail) {} - - bool operator()(const TStrCRefDouble1VecDoublePrPr &lhs, - const TStrCRefDouble1VecDoublePrPr &rhs) const - { - return m_Tail == maths_t::E_LeftTail ? - lhs.second.first < rhs.second.first : - lhs.second.first > rhs.second.first; - } +class CDecreasingValueInfluence { +public: + CDecreasingValueInfluence(maths_t::ETail tail) : m_Tail(tail) {} + + bool operator()(const TStrCRefDouble1VecDoublePrPr& lhs, const TStrCRefDouble1VecDoublePrPr& rhs) const { + return m_Tail == maths_t::E_LeftTail ? lhs.second.first < rhs.second.first : lhs.second.first > rhs.second.first; + } - private: - maths_t::ETail m_Tail; +private: + maths_t::ETail m_Tail; }; //! \brief Orders two mean influences by decreasing influence. -class CDecreasingMeanInfluence -{ - public: - using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; - - public: - CDecreasingMeanInfluence(maths_t::ETail tail, const TDouble2Vec &value, double count) : - m_Tail(tail), - m_Mean(maths::CBasicStatistics::accumulator(count, value[0])) - {} - - bool operator()(const TStrCRefDouble1VecDoublePrPr &lhs, - const TStrCRefDouble1VecDoublePrPr &rhs) const - { - TMeanAccumulator l = m_Mean - maths::CBasicStatistics::accumulator(lhs.second.second, - lhs.second.first[0]); - TMeanAccumulator r = m_Mean - maths::CBasicStatistics::accumulator(rhs.second.second, - rhs.second.first[0]); - double ml = maths::CBasicStatistics::mean(l); - double nl = maths::CBasicStatistics::count(l); - double mr = maths::CBasicStatistics::mean(r); - double nr = maths::CBasicStatistics::count(r); - return m_Tail == maths_t::E_LeftTail ? - maths::COrderings::lexicographical_compare(mr, nl, ml, nr) : - maths::COrderings::lexicographical_compare(ml, nl, mr, nr); - } +class CDecreasingMeanInfluence { +public: + using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; + +public: + CDecreasingMeanInfluence(maths_t::ETail tail, const TDouble2Vec& value, double count) + : m_Tail(tail), m_Mean(maths::CBasicStatistics::accumulator(count, value[0])) {} + + bool operator()(const TStrCRefDouble1VecDoublePrPr& lhs, const TStrCRefDouble1VecDoublePrPr& rhs) const { + TMeanAccumulator l = m_Mean - maths::CBasicStatistics::accumulator(lhs.second.second, lhs.second.first[0]); + TMeanAccumulator r = m_Mean - maths::CBasicStatistics::accumulator(rhs.second.second, rhs.second.first[0]); + double ml = maths::CBasicStatistics::mean(l); + double nl = maths::CBasicStatistics::count(l); + double mr = maths::CBasicStatistics::mean(r); + double nr = maths::CBasicStatistics::count(r); + return m_Tail == maths_t::E_LeftTail ? maths::COrderings::lexicographical_compare(mr, nl, ml, nr) + : maths::COrderings::lexicographical_compare(ml, nl, mr, nr); + } - private: - maths_t::ETail m_Tail; - TMeanAccumulator m_Mean; +private: + maths_t::ETail m_Tail; + TMeanAccumulator m_Mean; }; //! \brief Orders two variance influences by decreasing influence. -class CDecreasingVarianceInfluence -{ - public: - using TMeanVarAccumulator = maths::CBasicStatistics::SSampleMeanVar::TAccumulator; - - public: - CDecreasingVarianceInfluence(maths_t::ETail tail, const TDouble2Vec &value, double count) : - m_Tail(tail), - m_Variance(maths::CBasicStatistics::accumulator(count, value[1], value[0])) - {} - - bool operator()(const TStrCRefDouble1VecDoublePrPr &lhs, - const TStrCRefDouble1VecDoublePrPr &rhs) const - { - TMeanVarAccumulator l = m_Variance - maths::CBasicStatistics::accumulator(lhs.second.second, - lhs.second.first[1], - lhs.second.first[0]); - TMeanVarAccumulator r = m_Variance - maths::CBasicStatistics::accumulator(rhs.second.second, - rhs.second.first[1], - rhs.second.first[0]); - double vl = maths::CBasicStatistics::maximumLikelihoodVariance(l); - double nl = maths::CBasicStatistics::count(l); - double vr = maths::CBasicStatistics::maximumLikelihoodVariance(r); - double nr = maths::CBasicStatistics::count(r); - return m_Tail == maths_t::E_LeftTail ? - maths::COrderings::lexicographical_compare(vr, nl, vl, nr) : - maths::COrderings::lexicographical_compare(vl, nl, vr, nr); - } +class CDecreasingVarianceInfluence { +public: + using TMeanVarAccumulator = maths::CBasicStatistics::SSampleMeanVar::TAccumulator; + +public: + CDecreasingVarianceInfluence(maths_t::ETail tail, const TDouble2Vec& value, double count) + : m_Tail(tail), m_Variance(maths::CBasicStatistics::accumulator(count, value[1], value[0])) {} + + bool operator()(const TStrCRefDouble1VecDoublePrPr& lhs, const TStrCRefDouble1VecDoublePrPr& rhs) const { + TMeanVarAccumulator l = + m_Variance - maths::CBasicStatistics::accumulator(lhs.second.second, lhs.second.first[1], lhs.second.first[0]); + TMeanVarAccumulator r = + m_Variance - maths::CBasicStatistics::accumulator(rhs.second.second, rhs.second.first[1], rhs.second.first[0]); + double vl = maths::CBasicStatistics::maximumLikelihoodVariance(l); + double nl = maths::CBasicStatistics::count(l); + double vr = maths::CBasicStatistics::maximumLikelihoodVariance(r); + double nr = maths::CBasicStatistics::count(r); + return m_Tail == maths_t::E_LeftTail ? maths::COrderings::lexicographical_compare(vr, nl, vl, nr) + : maths::COrderings::lexicographical_compare(vl, nl, vr, nr); + } - private: - maths_t::ETail m_Tail; - TMeanVarAccumulator m_Variance; +private: + maths_t::ETail m_Tail; + TMeanVarAccumulator m_Variance; }; //! A safe ratio function \p numerator / \p denominator dealing //! with the case \p n and/or \p d are zero. -double ratio(double numerator, double denominator, double zeroDividedByZero) -{ - if (denominator == 0.0) - { - if (numerator == 0.0) - { +double ratio(double numerator, double denominator, double zeroDividedByZero) { + if (denominator == 0.0) { + if (numerator == 0.0) { return zeroDividedByZero; } - return numerator < 0.0 ? -std::numeric_limits::max() : - std::numeric_limits::max(); + return numerator < 0.0 ? -std::numeric_limits::max() : std::numeric_limits::max(); } return numerator / denominator; } //! \brief Computes the value of summed statistics on the set difference. -class CValueDifference -{ - public: - //! Features. - void operator()(const TDouble2Vec &v, double n, - const TDouble1Vec &vi, double ni, - maths::CModelProbabilityParams ¶ms, - TDouble2Vec &difference) const - { - params.addBucketEmpty(TBool2Vec{n == ni}); - for (std::size_t i = 0u; i < v.size(); ++i) - { - difference[i] = v[i] - vi[i]; - } +class CValueDifference { +public: + //! Features. + void operator()(const TDouble2Vec& v, + double n, + const TDouble1Vec& vi, + double ni, + maths::CModelProbabilityParams& params, + TDouble2Vec& difference) const { + params.addBucketEmpty(TBool2Vec{n == ni}); + for (std::size_t i = 0u; i < v.size(); ++i) { + difference[i] = v[i] - vi[i]; } + } - //! Correlates. - void operator()(const TDouble2Vec &v, const TDouble2Vec &n, - const TDouble1Vec &vi, const TDouble1Vec &ni, - maths::CModelProbabilityParams ¶ms, - TDouble2Vec &difference) const - { - TBool2Vec bucketEmpty(2); - for (std::size_t i = 0u; i < v.size(); ++i) - { - bucketEmpty[i] = ((n[i] - ni[i]) == 0); - difference[i] = v[i] - vi[i]; - - } - params.addBucketEmpty(bucketEmpty); + //! Correlates. + void operator()(const TDouble2Vec& v, + const TDouble2Vec& n, + const TDouble1Vec& vi, + const TDouble1Vec& ni, + maths::CModelProbabilityParams& params, + TDouble2Vec& difference) const { + TBool2Vec bucketEmpty(2); + for (std::size_t i = 0u; i < v.size(); ++i) { + bucketEmpty[i] = ((n[i] - ni[i]) == 0); + difference[i] = v[i] - vi[i]; } + params.addBucketEmpty(bucketEmpty); + } }; //! \brief Computes the value of min, max, dc, etc on the set intersection. -class CValueIntersection -{ - public: - //! Features. - void operator()(const TDouble2Vec &/*v*/, double /*n*/, - const TDouble1Vec &vi, double ni, - maths::CModelProbabilityParams ¶ms, - TDouble2Vec &intersection) const - { - params.addBucketEmpty(TBool2Vec{ni == 0}); - for (std::size_t i = 0u; i < vi.size(); ++i) - { - intersection[i] = vi[i]; - } +class CValueIntersection { +public: + //! Features. + void operator()(const TDouble2Vec& /*v*/, + double /*n*/, + const TDouble1Vec& vi, + double ni, + maths::CModelProbabilityParams& params, + TDouble2Vec& intersection) const { + params.addBucketEmpty(TBool2Vec{ni == 0}); + for (std::size_t i = 0u; i < vi.size(); ++i) { + intersection[i] = vi[i]; } + } - //! Correlates. - void operator()(const TDouble2Vec &/*v*/, const TDouble2Vec &/*n*/, - const TDouble1Vec &vi, const TDouble1Vec &ni, - maths::CModelProbabilityParams ¶ms, - TDouble2Vec &intersection) const - { - TBool2Vec bucketEmpty(2); - for (std::size_t i = 0u; i < vi.size(); ++i) - { - bucketEmpty[i] = (ni[i] == 0); - intersection[i] = vi[i]; - } - params.addBucketEmpty(bucketEmpty); + //! Correlates. + void operator()(const TDouble2Vec& /*v*/, + const TDouble2Vec& /*n*/, + const TDouble1Vec& vi, + const TDouble1Vec& ni, + maths::CModelProbabilityParams& params, + TDouble2Vec& intersection) const { + TBool2Vec bucketEmpty(2); + for (std::size_t i = 0u; i < vi.size(); ++i) { + bucketEmpty[i] = (ni[i] == 0); + intersection[i] = vi[i]; } + params.addBucketEmpty(bucketEmpty); + } }; //! \brief Computes the value of the mean statistic on a set difference. -class CMeanDifference -{ - public: - //! Features. - void operator()(const TDouble2Vec &v, double n, - const TDouble1Vec &vi, double ni, - maths::CModelProbabilityParams ¶ms, - TDouble2Vec &difference) const - { - params.addBucketEmpty(TBool2Vec{n == ni}); - for (std::size_t d = 0u; d < v.size(); ++d) - { - for (std::size_t i = 0u; i < params.weightStyles().size(); ++i) - { - if (params.weightStyles()[i] == maths_t::E_SampleCountVarianceScaleWeight) - { - params.weights()[0][i][d] *= n / (n - ni); - break; - } +class CMeanDifference { +public: + //! Features. + void operator()(const TDouble2Vec& v, + double n, + const TDouble1Vec& vi, + double ni, + maths::CModelProbabilityParams& params, + TDouble2Vec& difference) const { + params.addBucketEmpty(TBool2Vec{n == ni}); + for (std::size_t d = 0u; d < v.size(); ++d) { + for (std::size_t i = 0u; i < params.weightStyles().size(); ++i) { + if (params.weightStyles()[i] == maths_t::E_SampleCountVarianceScaleWeight) { + params.weights()[0][i][d] *= n / (n - ni); + break; } - difference[d] = maths::CBasicStatistics::mean( - maths::CBasicStatistics::accumulator( n, v[d]) - - maths::CBasicStatistics::accumulator(ni, vi[d])); } + difference[d] = maths::CBasicStatistics::mean(maths::CBasicStatistics::accumulator(n, v[d]) - + maths::CBasicStatistics::accumulator(ni, vi[d])); } + } - //! Correlates. - void operator()(const TDouble2Vec &v, const TDouble2Vec &n, - const TDouble1Vec &vi, const TDouble1Vec &ni, - maths::CModelProbabilityParams ¶ms, - TDouble2Vec &difference) const - { - TBool2Vec bucketEmpty(2); - for (std::size_t d = 0u; d < 2; ++d) - { - bucketEmpty[d] = ((n[d] - ni[d]) == 0); - for (std::size_t i = 0u; i < params.weightStyles().size(); ++i) - { - if (params.weightStyles()[i] == maths_t::E_SampleCountVarianceScaleWeight) - { - params.weights()[0][i][d] *= n[d] / (n[d] - ni[d]); - break; - } + //! Correlates. + void operator()(const TDouble2Vec& v, + const TDouble2Vec& n, + const TDouble1Vec& vi, + const TDouble1Vec& ni, + maths::CModelProbabilityParams& params, + TDouble2Vec& difference) const { + TBool2Vec bucketEmpty(2); + for (std::size_t d = 0u; d < 2; ++d) { + bucketEmpty[d] = ((n[d] - ni[d]) == 0); + for (std::size_t i = 0u; i < params.weightStyles().size(); ++i) { + if (params.weightStyles()[i] == maths_t::E_SampleCountVarianceScaleWeight) { + params.weights()[0][i][d] *= n[d] / (n[d] - ni[d]); + break; } - difference[d] = maths::CBasicStatistics::mean( - maths::CBasicStatistics::accumulator( n[d], v[d]) - - maths::CBasicStatistics::accumulator(ni[d], vi[d])); } - params.addBucketEmpty(bucketEmpty); + difference[d] = maths::CBasicStatistics::mean(maths::CBasicStatistics::accumulator(n[d], v[d]) - + maths::CBasicStatistics::accumulator(ni[d], vi[d])); } + params.addBucketEmpty(bucketEmpty); + } }; //! \brief Computes the value of the variance statistic on a set difference. -class CVarianceDifference -{ - public: - //! Features. - void operator()(const TDouble1Vec &v, double n, - const TDouble1Vec &vi, double ni, - maths::CModelProbabilityParams ¶ms, - TDouble2Vec &difference) const - { - std::size_t dimension = v.size() / 2; - params.addBucketEmpty(TBool2Vec{n == ni}); - for (std::size_t d = 0u; d < dimension; ++d) - { - for (std::size_t i = 0u; i < params.weightStyles().size(); ++i) - { - if (params.weightStyles()[i] == maths_t::E_SampleCountVarianceScaleWeight) - { - params.weights()[0][i][d] *= n / (n - ni); - break; - } +class CVarianceDifference { +public: + //! Features. + void operator()(const TDouble1Vec& v, + double n, + const TDouble1Vec& vi, + double ni, + maths::CModelProbabilityParams& params, + TDouble2Vec& difference) const { + std::size_t dimension = v.size() / 2; + params.addBucketEmpty(TBool2Vec{n == ni}); + for (std::size_t d = 0u; d < dimension; ++d) { + for (std::size_t i = 0u; i < params.weightStyles().size(); ++i) { + if (params.weightStyles()[i] == maths_t::E_SampleCountVarianceScaleWeight) { + params.weights()[0][i][d] *= n / (n - ni); + break; } - difference[d] = maths::CBasicStatistics::maximumLikelihoodVariance( - maths::CBasicStatistics::accumulator( n, v[dimension + d], v[d]) - - maths::CBasicStatistics::accumulator(ni, vi[dimension + d], vi[d])); } + difference[d] = + maths::CBasicStatistics::maximumLikelihoodVariance(maths::CBasicStatistics::accumulator(n, v[dimension + d], v[d]) - + maths::CBasicStatistics::accumulator(ni, vi[dimension + d], vi[d])); } + } - //! Correlates. - void operator()(const TDouble2Vec &v, const TDouble2Vec &n, - const TDouble1Vec &vi, const TDouble1Vec &ni, - maths::CModelProbabilityParams ¶ms, - TDouble2Vec &difference) const - { - TBool2Vec bucketEmpty(2); - for (std::size_t d = 0u; d < 2; ++d) - { - bucketEmpty[d] = ((n[d] - ni[d]) == 0); - for (std::size_t i = 0u; i < params.weightStyles().size(); ++i) - { - if (params.weightStyles()[i] == maths_t::E_SampleCountVarianceScaleWeight) - { - params.weights()[0][i][d] *= n[d] / (n[d] - ni[d]); - break; - } + //! Correlates. + void operator()(const TDouble2Vec& v, + const TDouble2Vec& n, + const TDouble1Vec& vi, + const TDouble1Vec& ni, + maths::CModelProbabilityParams& params, + TDouble2Vec& difference) const { + TBool2Vec bucketEmpty(2); + for (std::size_t d = 0u; d < 2; ++d) { + bucketEmpty[d] = ((n[d] - ni[d]) == 0); + for (std::size_t i = 0u; i < params.weightStyles().size(); ++i) { + if (params.weightStyles()[i] == maths_t::E_SampleCountVarianceScaleWeight) { + params.weights()[0][i][d] *= n[d] / (n[d] - ni[d]); + break; } - difference[d] = maths::CBasicStatistics::maximumLikelihoodVariance( - maths::CBasicStatistics::accumulator( n[d], v[2 + d], v[d]) - - maths::CBasicStatistics::accumulator(ni[d], vi[2 + d], vi[d])); } - params.addBucketEmpty(bucketEmpty); + difference[d] = maths::CBasicStatistics::maximumLikelihoodVariance( + maths::CBasicStatistics::accumulator(n[d], v[2 + d], v[d]) - maths::CBasicStatistics::accumulator(ni[d], vi[2 + d], vi[d])); } + params.addBucketEmpty(bucketEmpty); + } }; //! Sets all influences to one. @@ -340,15 +295,12 @@ class CVarianceDifference //! of the records in \p value with distinct values of \p influenceName. //! \param[out] result Filled in with the influences of \p value. template -void doComputeIndicatorInfluences(const core::CStoredStringPtr &influencerName, - const INFLUENCER_VALUES &influencerValues, - TStoredStringPtrStoredStringPtrPrDoublePrVec &result) -{ +void doComputeIndicatorInfluences(const core::CStoredStringPtr& influencerName, + const INFLUENCER_VALUES& influencerValues, + TStoredStringPtrStoredStringPtrPrDoublePrVec& result) { result.reserve(influencerValues.size()); - for (const auto &influencerValue : influencerValues) - { - result.emplace_back(TStoredStringPtrStoredStringPtrPr(influencerName, - canonical(influencerValue.first)), 1.0); + for (const auto& influencerValue : influencerValues) { + result.emplace_back(TStoredStringPtrStoredStringPtrPr(influencerName, canonical(influencerValue.first)), 1.0); } } @@ -377,27 +329,23 @@ template void doComputeInfluences(model_t::EFeature feature, COMPUTE_INFLUENCED_VALUE computeInfluencedValue, COMPUTE_INFLUENCE computeInfluence, - const maths::CModel &model, + const maths::CModel& model, core_t::TTime elapsedTime, - maths::CModelProbabilityParams ¶ms, - const TTime2Vec1Vec &time, - const TDouble2Vec &value, + maths::CModelProbabilityParams& params, + const TTime2Vec1Vec& time, + const TDouble2Vec& value, double count, double probability, - const core::CStoredStringPtr &influencerName, - const TStrCRefDouble1VecDoublePrPrVec &influencerValues, + const core::CStoredStringPtr& influencerName, + const TStrCRefDouble1VecDoublePrPrVec& influencerValues, double cutoff, bool includeCutoff, - TStoredStringPtrStoredStringPtrPrDoublePrVec &result) -{ - if (influencerValues.size() == 1) - { - result.emplace_back(TStoredStringPtrStoredStringPtrPr(influencerName, - canonical(influencerValues[0].first)), 1.0); + TStoredStringPtrStoredStringPtrPrDoublePrVec& result) { + if (influencerValues.size() == 1) { + result.emplace_back(TStoredStringPtrStoredStringPtrPr(influencerName, canonical(influencerValues[0].first)), 1.0); return; } - if (probability == 1.0) - { + if (probability == 1.0) { doComputeIndicatorInfluences(influencerName, influencerValues, result); return; } @@ -412,23 +360,15 @@ void doComputeInfluences(model_t::EFeature feature, double logp = maths::CTools::fastLog(probability); TDouble2Vec4Vec1Vec weights(params.weights()); - for (auto i = influencerValues.begin(); i != influencerValues.end(); ++i) - { - params.weights(weights) - .updateAnomalyModel(false); + for (auto i = influencerValues.begin(); i != influencerValues.end(); ++i) { + params.weights(weights).updateAnomalyModel(false); - computeInfluencedValue(value, count, - i->second.first, - i->second.second, - params, influencedValue[0]); + computeInfluencedValue(value, count, i->second.first, i->second.second, params, influencedValue[0]); double pi; bool conditional; - if (!model.probability(params, time, influencedValue, - pi, tail, conditional, mostAnomalousCorrelate)) - { - LOG_ERROR("Failed to compute P(" << influencedValue[0] - << " | influencer = " << core::CContainerPrinter::print(*i) << ")"); + if (!model.probability(params, time, influencedValue, pi, tail, conditional, mostAnomalousCorrelate)) { + LOG_ERROR("Failed to compute P(" << influencedValue[0] << " | influencer = " << core::CContainerPrinter::print(*i) << ")"); continue; } pi = maths::CTools::truncate(pi, maths::CTools::smallestProbability(), 1.0); @@ -436,42 +376,25 @@ void doComputeInfluences(model_t::EFeature feature, double influence = computeInfluence(logp, maths::CTools::fastLog(pi)); - LOG_TRACE("log(p) = " << logp - << ", tail = " << core::CContainerPrinter::print(tail) - << ", v(i) = " << core::CContainerPrinter::print(influencedValue) - << ", log(p(i)) = " << std::log(pi) - << ", weight = " << core::CContainerPrinter::print(params.weights()) - << ", influence = " << influence - << ", influencer field value = " << i->first.get()); - - if (dimension == 1 && influence >= cutoff) - { - result.emplace_back(TStoredStringPtrStoredStringPtrPr(influencerName, canonical(i->first)), - influence); - } - else if (dimension == 1) - { - if (includeCutoff) - { - result.emplace_back(TStoredStringPtrStoredStringPtrPr(influencerName, canonical(i->first)), - influence); - for (++i; i != influencerValues.end(); ++i) - { - result.emplace_back(TStoredStringPtrStoredStringPtrPr(influencerName, canonical(i->first)), - 0.5 * influence); + LOG_TRACE("log(p) = " << logp << ", tail = " << core::CContainerPrinter::print(tail) + << ", v(i) = " << core::CContainerPrinter::print(influencedValue) << ", log(p(i)) = " << std::log(pi) + << ", weight = " << core::CContainerPrinter::print(params.weights()) << ", influence = " << influence + << ", influencer field value = " << i->first.get()); + + if (dimension == 1 && influence >= cutoff) { + result.emplace_back(TStoredStringPtrStoredStringPtrPr(influencerName, canonical(i->first)), influence); + } else if (dimension == 1) { + if (includeCutoff) { + result.emplace_back(TStoredStringPtrStoredStringPtrPr(influencerName, canonical(i->first)), influence); + for (++i; i != influencerValues.end(); ++i) { + result.emplace_back(TStoredStringPtrStoredStringPtrPr(influencerName, canonical(i->first)), 0.5 * influence); } } break; - } - else if (influence >= cutoff) - { - result.emplace_back(TStoredStringPtrStoredStringPtrPr(influencerName, canonical(i->first)), - influence); - } - else if (includeCutoff) - { - result.emplace_back(TStoredStringPtrStoredStringPtrPr(influencerName, canonical(i->first)), - 0.5 * influence); + } else if (influence >= cutoff) { + result.emplace_back(TStoredStringPtrStoredStringPtrPr(influencerName, canonical(i->first)), influence); + } else if (includeCutoff) { + result.emplace_back(TStoredStringPtrStoredStringPtrPr(influencerName, canonical(i->first)), 0.5 * influence); } } } @@ -484,27 +407,23 @@ template void doComputeCorrelateInfluences(model_t::EFeature feature, COMPUTE_INFLUENCED_VALUE computeInfluencedValue, COMPUTE_INFLUENCE computeInfluence, - const maths::CModel &model, + const maths::CModel& model, core_t::TTime elapsedTime, - maths::CModelProbabilityParams ¶ms, - const TTime2Vec &time, - const TDouble2Vec &value, - const TDouble2Vec &count, + maths::CModelProbabilityParams& params, + const TTime2Vec& time, + const TDouble2Vec& value, + const TDouble2Vec& count, double probability, - const core::CStoredStringPtr &influencerName, - const TStrCRefDouble1VecDouble1VecPrPrVec &influencerValues, + const core::CStoredStringPtr& influencerName, + const TStrCRefDouble1VecDouble1VecPrPrVec& influencerValues, double cutoff, bool includeCutoff, - TStoredStringPtrStoredStringPtrPrDoublePrVec &result) -{ - if (influencerValues.size() == 1) - { - result.emplace_back(TStoredStringPtrStoredStringPtrPr(influencerName, - canonical(influencerValues[0].first)), 1.0); + TStoredStringPtrStoredStringPtrPrDoublePrVec& result) { + if (influencerValues.size() == 1) { + result.emplace_back(TStoredStringPtrStoredStringPtrPr(influencerName, canonical(influencerValues[0].first)), 1.0); return; } - if (probability == 1.0) - { + if (probability == 1.0) { doComputeIndicatorInfluences(influencerName, influencerValues, result); return; } @@ -518,23 +437,16 @@ void doComputeCorrelateInfluences(model_t::EFeature feature, double logp = std::log(probability); TDouble2Vec4Vec1Vec weights(params.weights()); - for (const auto &influence_ : influencerValues) - { - params.weights(weights) - .updateAnomalyModel(false); + for (const auto& influence_ : influencerValues) { + params.weights(weights).updateAnomalyModel(false); - computeInfluencedValue(value, count, - influence_.second.first, - influence_.second.second, - params, influencedValue[0]); + computeInfluencedValue(value, count, influence_.second.first, influence_.second.second, params, influencedValue[0]); double pi; bool conditional; - if (!model.probability(params, TTime2Vec1Vec{time}, influencedValue, - pi, tail, conditional, mostAnomalousCorrelate)) - { + if (!model.probability(params, TTime2Vec1Vec{time}, influencedValue, pi, tail, conditional, mostAnomalousCorrelate)) { LOG_ERROR("Failed to compute P(" << core::CContainerPrinter::print(influencedValue) - << " | influencer = " << core::CContainerPrinter::print(influence_) << ")"); + << " | influencer = " << core::CContainerPrinter::print(influence_) << ")"); continue; } pi = maths::CTools::truncate(pi, maths::CTools::smallestProbability(), 1.0); @@ -542,145 +454,133 @@ void doComputeCorrelateInfluences(model_t::EFeature feature, double influence = computeInfluence(logp, std::log(pi)); - LOG_TRACE("log(p) = " << logp - << ", v(i) = " << core::CContainerPrinter::print(influencedValue) - << ", log(p(i)) = " << std::log(pi) - << ", weight(i) = " << core::CContainerPrinter::print(params.weights()) - << ", influence = " << influence - << ", influencer field value = " << influence_.first.get()); - - if (includeCutoff || influence >= cutoff) - { - result.emplace_back(TStoredStringPtrStoredStringPtrPr(influencerName, - canonical(influence_.first)), influence); + LOG_TRACE("log(p) = " << logp << ", v(i) = " << core::CContainerPrinter::print(influencedValue) << ", log(p(i)) = " << std::log(pi) + << ", weight(i) = " << core::CContainerPrinter::print(params.weights()) << ", influence = " << influence + << ", influencer field value = " << influence_.first.get()); + + if (includeCutoff || influence >= cutoff) { + result.emplace_back(TStoredStringPtrStoredStringPtrPr(influencerName, canonical(influence_.first)), influence); } } } - } -CProbabilityAndInfluenceCalculator::CProbabilityAndInfluenceCalculator(double cutoff) : - m_Cutoff(cutoff), - m_InfluenceCalculator(0), - m_ProbabilityTemplate(CModelTools::CProbabilityAggregator::E_Min), - m_Probability(CModelTools::CProbabilityAggregator::E_Min), - m_ProbabilityCache(0) -{} +CProbabilityAndInfluenceCalculator::CProbabilityAndInfluenceCalculator(double cutoff) + : m_Cutoff(cutoff), + m_InfluenceCalculator(0), + m_ProbabilityTemplate(CModelTools::CProbabilityAggregator::E_Min), + m_Probability(CModelTools::CProbabilityAggregator::E_Min), + m_ProbabilityCache(0) { +} -bool CProbabilityAndInfluenceCalculator::empty() const -{ +bool CProbabilityAndInfluenceCalculator::empty() const { return m_Probability.empty(); } -double CProbabilityAndInfluenceCalculator::cutoff() const -{ +double CProbabilityAndInfluenceCalculator::cutoff() const { return m_Cutoff; } -void CProbabilityAndInfluenceCalculator::plugin(const CInfluenceCalculator &influenceCalculator) -{ +void CProbabilityAndInfluenceCalculator::plugin(const CInfluenceCalculator& influenceCalculator) { m_InfluenceCalculator = &influenceCalculator; } -void CProbabilityAndInfluenceCalculator::addAggregator(const maths::CJointProbabilityOfLessLikelySamples &aggregator) -{ +void CProbabilityAndInfluenceCalculator::addAggregator(const maths::CJointProbabilityOfLessLikelySamples& aggregator) { m_ProbabilityTemplate.add(aggregator); m_Probability.add(aggregator); } -void CProbabilityAndInfluenceCalculator::addAggregator(const maths::CProbabilityOfExtremeSample &aggregator) -{ +void CProbabilityAndInfluenceCalculator::addAggregator(const maths::CProbabilityOfExtremeSample& aggregator) { m_ProbabilityTemplate.add(aggregator); m_Probability.add(aggregator); } -void CProbabilityAndInfluenceCalculator::addCache(CModelTools::CProbabilityCache &cache) -{ +void CProbabilityAndInfluenceCalculator::addCache(CModelTools::CProbabilityCache& cache) { m_ProbabilityCache = &cache; } -void CProbabilityAndInfluenceCalculator::add(const CProbabilityAndInfluenceCalculator &other, double weight) -{ +void CProbabilityAndInfluenceCalculator::add(const CProbabilityAndInfluenceCalculator& other, double weight) { double p = 0.0; - if (!other.m_Probability.calculate(p)) - { + if (!other.m_Probability.calculate(p)) { return; } - if (!other.m_Probability.empty()) - { + if (!other.m_Probability.empty()) { m_Probability.add(p, weight); } - for (const auto &aggregator : other.m_InfluencerProbabilities) - { - if (aggregator.second.calculate(p)) - { - auto &aggregator_ = m_InfluencerProbabilities.emplace(aggregator.first, - other.m_ProbabilityTemplate).first->second; - if (!aggregator.second.empty()) - { + for (const auto& aggregator : other.m_InfluencerProbabilities) { + if (aggregator.second.calculate(p)) { + auto& aggregator_ = m_InfluencerProbabilities.emplace(aggregator.first, other.m_ProbabilityTemplate).first->second; + if (!aggregator.second.empty()) { aggregator_.add(p, weight); } } } } -bool CProbabilityAndInfluenceCalculator::addAttributeProbability(const core::CStoredStringPtr &attribute, +bool CProbabilityAndInfluenceCalculator::addAttributeProbability(const core::CStoredStringPtr& attribute, std::size_t cid, double pAttribute, - SParams ¶ms, - CAnnotatedProbabilityBuilder &builder, - double weight) -{ + SParams& params, + CAnnotatedProbabilityBuilder& builder, + double weight) { model_t::CResultType type; TSize1Vec mostAnomalousCorrelate; - if (this->addProbability(params.s_Feature, cid, *params.s_Model, + if (this->addProbability(params.s_Feature, + cid, + *params.s_Model, params.s_ElapsedTime, params.s_ComputeProbabilityParams, - params.s_Time, params.s_Value, - params.s_Probability, params.s_Tail, - type, mostAnomalousCorrelate, weight)) - { + params.s_Time, + params.s_Value, + params.s_Probability, + params.s_Tail, + type, + mostAnomalousCorrelate, + weight)) { static const TStoredStringPtr1Vec NO_CORRELATED_ATTRIBUTES; static const TSizeDoublePr1Vec NO_CORRELATES; - builder.addAttributeProbability(cid, attribute, - pAttribute, params.s_Probability, + builder.addAttributeProbability(cid, + attribute, + pAttribute, + params.s_Probability, model_t::CResultType::E_Unconditional, params.s_Feature, - NO_CORRELATED_ATTRIBUTES, NO_CORRELATES); + NO_CORRELATED_ATTRIBUTES, + NO_CORRELATES); return true; } return false; } -bool CProbabilityAndInfluenceCalculator::addAttributeProbability(const core::CStoredStringPtr &attribute, +bool CProbabilityAndInfluenceCalculator::addAttributeProbability(const core::CStoredStringPtr& attribute, std::size_t cid, double pAttribute, - SCorrelateParams ¶ms, - CAnnotatedProbabilityBuilder &builder, - double weight) -{ + SCorrelateParams& params, + CAnnotatedProbabilityBuilder& builder, + double weight) { model_t::CResultType type; params.s_MostAnomalousCorrelate.clear(); - if (this->addProbability(params.s_Feature, cid, *params.s_Model, + if (this->addProbability(params.s_Feature, + cid, + *params.s_Model, params.s_ElapsedTime, params.s_ComputeProbabilityParams, - params.s_Times, params.s_Values, - params.s_Probability, params.s_Tail, - type, params.s_MostAnomalousCorrelate, weight)) - { + params.s_Times, + params.s_Values, + params.s_Probability, + params.s_Tail, + type, + params.s_MostAnomalousCorrelate, + weight)) { TStoredStringPtr1Vec correlatedLabels_; TSizeDoublePr1Vec correlated_; - if (!params.s_MostAnomalousCorrelate.empty()) - { + if (!params.s_MostAnomalousCorrelate.empty()) { std::size_t i = params.s_MostAnomalousCorrelate[0]; correlatedLabels_.push_back(params.s_CorrelatedLabels[i]); - correlated_.emplace_back(params.s_Correlated[i], - params.s_Values[i][params.s_Variables[i][1]]); + correlated_.emplace_back(params.s_Correlated[i], params.s_Values[i][params.s_Variables[i][1]]); } - builder.addAttributeProbability(cid, attribute, - pAttribute, params.s_Probability, - type, params.s_Feature, - correlatedLabels_, correlated_); + builder.addAttributeProbability( + cid, attribute, pAttribute, params.s_Probability, type, params.s_Feature, correlatedLabels_, correlated_); return true; } return false; @@ -688,34 +588,28 @@ bool CProbabilityAndInfluenceCalculator::addAttributeProbability(const core::CSt bool CProbabilityAndInfluenceCalculator::addProbability(model_t::EFeature feature, std::size_t id, - const maths::CModel &model, + const maths::CModel& model, core_t::TTime elapsedTime, - const maths::CModelProbabilityParams ¶ms, - const TTime2Vec1Vec &time, - const TDouble2Vec1Vec &values_, - double &probability, - TTail2Vec &tail, - model_t::CResultType &type, - TSize1Vec &mostAnomalousCorrelate, - double weight) -{ - if (values_.empty()) - { + const maths::CModelProbabilityParams& params, + const TTime2Vec1Vec& time, + const TDouble2Vec1Vec& values_, + double& probability, + TTail2Vec& tail, + model_t::CResultType& type, + TSize1Vec& mostAnomalousCorrelate, + double weight) { + if (values_.empty()) { return false; } // Maybe check the cache. - if (!model_t::isConstant(feature) && m_ProbabilityCache) - { + if (!model_t::isConstant(feature) && m_ProbabilityCache) { TDouble2Vec1Vec values(model_t::stripExtraStatistics(feature, values_)); model.detrend(time, params.seasonalConfidenceInterval(), values); bool conditional; - if (m_ProbabilityCache->lookup(feature, id, values, probability, tail, - conditional, mostAnomalousCorrelate)) - { + if (m_ProbabilityCache->lookup(feature, id, values, probability, tail, conditional, mostAnomalousCorrelate)) { m_Probability.add(probability, weight); - type.set(conditional ? model_t::CResultType::E_Conditional : - model_t::CResultType::E_Unconditional); + type.set(conditional ? model_t::CResultType::E_Conditional : model_t::CResultType::E_Unconditional); return true; } } @@ -724,25 +618,16 @@ bool CProbabilityAndInfluenceCalculator::addProbability(model_t::EFeature featur // to calculating. TDouble2Vec1Vec values(model_t::stripExtraStatistics(feature, values_)); bool conditional; - if (model.probability(params, time, values, - probability, tail, - conditional, mostAnomalousCorrelate)) - { - if (!model_t::isConstant(feature)) - { + if (model.probability(params, time, values, probability, tail, conditional, mostAnomalousCorrelate)) { + if (!model_t::isConstant(feature)) { probability = model_t::adjustProbability(feature, elapsedTime, probability); m_Probability.add(probability, weight); - type.set(conditional ? model_t::CResultType::E_Conditional : - model_t::CResultType::E_Unconditional); - if (m_ProbabilityCache) - { + type.set(conditional ? model_t::CResultType::E_Conditional : model_t::CResultType::E_Unconditional); + if (m_ProbabilityCache) { m_ProbabilityCache->addModes(feature, id, model); - m_ProbabilityCache->addProbability(feature, id, values, probability, tail, - conditional, mostAnomalousCorrelate); + m_ProbabilityCache->addProbability(feature, id, values, probability, tail, conditional, mostAnomalousCorrelate); } - } - else - { + } else { type.set(model_t::CResultType::E_Unconditional); mostAnomalousCorrelate.clear(); } @@ -752,115 +637,93 @@ bool CProbabilityAndInfluenceCalculator::addProbability(model_t::EFeature featur return false; } -void CProbabilityAndInfluenceCalculator::addProbability(double probability, double weight) -{ +void CProbabilityAndInfluenceCalculator::addProbability(double probability, double weight) { m_Probability.add(probability, weight); - for (auto &aggregator : m_InfluencerProbabilities) - { + for (auto& aggregator : m_InfluencerProbabilities) { aggregator.second.add(probability, weight); } } -void CProbabilityAndInfluenceCalculator::addInfluences(const std::string &influencerName, - const TStrCRefDouble1VecDoublePrPrVec &influencerValues, - SParams ¶ms, - double weight) -{ - if (!m_InfluenceCalculator) - { +void CProbabilityAndInfluenceCalculator::addInfluences(const std::string& influencerName, + const TStrCRefDouble1VecDoublePrPrVec& influencerValues, + SParams& params, + double weight) { + if (!m_InfluenceCalculator) { LOG_ERROR("No influence calculator plug-in: can't compute influence"); return; } - const std::string *influencerValue = 0; - if (influencerValues.empty()) - { - for (std::size_t i = 0u; i < params.s_PartitioningFields.size(); ++i) - { - if (params.s_PartitioningFields[i].first.get() == influencerName) - { + const std::string* influencerValue = 0; + if (influencerValues.empty()) { + for (std::size_t i = 0u; i < params.s_PartitioningFields.size(); ++i) { + if (params.s_PartitioningFields[i].first.get() == influencerName) { influencerValue = params.s_PartitioningFields[i].second.get_pointer(); break; } } - if (!influencerValue) - { + if (!influencerValue) { return; } } double logp = std::log(std::max(params.s_Probability, maths::CTools::smallestProbability())); - params.s_InfluencerName = canonical(influencerName); + params.s_InfluencerName = canonical(influencerName); params.s_InfluencerValues = influencerValues; params.s_Cutoff = 0.5 / std::max(-logp, 1.0); params.s_IncludeCutoff = true; m_InfluenceCalculator->computeInfluences(params); m_Influences.swap(params.s_Influences); - if (m_Influences.empty() && influencerValue) - { - m_Influences.emplace_back(TStoredStringPtrStoredStringPtrPr(params.s_InfluencerName, - canonical(*influencerValue)), 1.0); + if (m_Influences.empty() && influencerValue) { + m_Influences.emplace_back(TStoredStringPtrStoredStringPtrPr(params.s_InfluencerName, canonical(*influencerValue)), 1.0); } this->commitInfluences(params.s_Feature, logp, weight); } -void CProbabilityAndInfluenceCalculator::addInfluences(const std::string &influencerName, - const TStrCRefDouble1VecDouble1VecPrPrVecVec &influencerValues, - SCorrelateParams ¶ms, - double weight) -{ - if (!m_InfluenceCalculator) - { +void CProbabilityAndInfluenceCalculator::addInfluences(const std::string& influencerName, + const TStrCRefDouble1VecDouble1VecPrPrVecVec& influencerValues, + SCorrelateParams& params, + double weight) { + if (!m_InfluenceCalculator) { LOG_ERROR("No influence calculator plug-in: can't compute influence"); return; } - const std::string *influencerValue = 0; - if (influencerValues.empty()) - { - for (std::size_t i = 0u; i < params.s_PartitioningFields.size(); ++i) - { - if (params.s_PartitioningFields[i].first.get() == influencerName) - { + const std::string* influencerValue = 0; + if (influencerValues.empty()) { + for (std::size_t i = 0u; i < params.s_PartitioningFields.size(); ++i) { + if (params.s_PartitioningFields[i].first.get() == influencerName) { influencerValue = params.s_PartitioningFields[i].second.get_pointer(); break; } } - if (!influencerValue) - { + if (!influencerValue) { return; } } double logp = std::log(std::max(params.s_Probability, maths::CTools::smallestProbability())); - params.s_InfluencerName = canonical(influencerName); + params.s_InfluencerName = canonical(influencerName); params.s_InfluencerValues = influencerValues[params.s_MostAnomalousCorrelate[0]]; params.s_Cutoff = 0.5 / std::max(-logp, 1.0); params.s_IncludeCutoff = true; m_InfluenceCalculator->computeInfluences(params); m_Influences.swap(params.s_Influences); - if (m_Influences.empty() && influencerValue) - { - m_Influences.emplace_back(TStoredStringPtrStoredStringPtrPr(params.s_InfluencerName, - canonical(*influencerValue)), 1.0); + if (m_Influences.empty() && influencerValue) { + m_Influences.emplace_back(TStoredStringPtrStoredStringPtrPr(params.s_InfluencerName, canonical(*influencerValue)), 1.0); } this->commitInfluences(params.s_Feature, logp, weight); } -bool CProbabilityAndInfluenceCalculator::calculate(double &probability) const -{ +bool CProbabilityAndInfluenceCalculator::calculate(double& probability) const { return m_Probability.calculate(probability); } -bool CProbabilityAndInfluenceCalculator::calculate(double &probability, - TStoredStringPtrStoredStringPtrPrDoublePrVec &influences) const -{ - if (!m_Probability.calculate(probability)) - { +bool CProbabilityAndInfluenceCalculator::calculate(double& probability, TStoredStringPtrStoredStringPtrPrDoublePrVec& influences) const { + if (!m_Probability.calculate(probability)) { return false; } @@ -869,18 +732,14 @@ bool CProbabilityAndInfluenceCalculator::calculate(double &probability, double logp = std::log(probability); influences.reserve(m_InfluencerProbabilities.size()); - for (const auto &aggregator : m_InfluencerProbabilities) - { + for (const auto& aggregator : m_InfluencerProbabilities) { double probability_; - if (!aggregator.second.calculate(probability_)) - { - LOG_ERROR("Couldn't calculate probability for influencer " - << core::CContainerPrinter::print(aggregator.first)); + if (!aggregator.second.calculate(probability_)) { + LOG_ERROR("Couldn't calculate probability for influencer " << core::CContainerPrinter::print(aggregator.first)); } LOG_TRACE("influence probability = " << probability_); double influence = CInfluenceCalculator::intersectionInfluence(logp, std::log(probability_)); - if (influence >= m_Cutoff) - { + if (influence >= m_Cutoff) { influences.emplace_back(aggregator.first, influence); } } @@ -889,19 +748,13 @@ bool CProbabilityAndInfluenceCalculator::calculate(double &probability, return true; } -void CProbabilityAndInfluenceCalculator::commitInfluences(model_t::EFeature feature, - double logp, - double weight) -{ +void CProbabilityAndInfluenceCalculator::commitInfluences(model_t::EFeature feature, double logp, double weight) { LOG_TRACE("influences = " << core::CContainerPrinter::print(m_Influences)); - for (const auto &influence : m_Influences) - { - CModelTools::CProbabilityAggregator &aggregator = - m_InfluencerProbabilities.emplace(influence.first, - m_ProbabilityTemplate).first->second; - if (!model_t::isConstant(feature)) - { + for (const auto& influence : m_Influences) { + CModelTools::CProbabilityAggregator& aggregator = + m_InfluencerProbabilities.emplace(influence.first, m_ProbabilityTemplate).first->second; + if (!model_t::isConstant(feature)) { double probability = std::exp(influence.second * logp); LOG_TRACE("Adding = " << influence.first.second.get() << " " << probability); aggregator.add(probability, weight); @@ -909,170 +762,156 @@ void CProbabilityAndInfluenceCalculator::commitInfluences(model_t::EFeature feat } } -CProbabilityAndInfluenceCalculator::SParams::SParams(const CPartitioningFields &partitioningFields) : - s_Feature(), - s_Model(0), - s_ElapsedTime(0), - s_Count(0.0), - s_Probability(1.0), - s_PartitioningFields(partitioningFields), - s_Cutoff(1.0), - s_IncludeCutoff(false) -{} - -std::string CProbabilityAndInfluenceCalculator::SParams::describe() const -{ - return core::CContainerPrinter::print(s_Value) - + " | feature = " + model_t::print(s_Feature) - + ", @ " + core::CContainerPrinter::print(s_Time) - + ", elapsedTime = " + core::CStringUtils::typeToString(s_ElapsedTime); +CProbabilityAndInfluenceCalculator::SParams::SParams(const CPartitioningFields& partitioningFields) + : s_Feature(), + s_Model(0), + s_ElapsedTime(0), + s_Count(0.0), + s_Probability(1.0), + s_PartitioningFields(partitioningFields), + s_Cutoff(1.0), + s_IncludeCutoff(false) { } -CProbabilityAndInfluenceCalculator::SCorrelateParams::SCorrelateParams(const CPartitioningFields &partitioningFields) : - s_Feature(), - s_Model(0), - s_ElapsedTime(0), - s_Probability(1.0), - s_PartitioningFields(partitioningFields), - s_Cutoff(1.0), - s_IncludeCutoff(false) -{} - -std::string CProbabilityAndInfluenceCalculator::SCorrelateParams::describe() const -{ - return core::CContainerPrinter::print(s_Values) - + " | feature = " + model_t::print(s_Feature) - + ", @ " + core::CContainerPrinter::print(s_Times) - + ", elapsedTime = " + core::CStringUtils::typeToString(s_ElapsedTime); +std::string CProbabilityAndInfluenceCalculator::SParams::describe() const { + return core::CContainerPrinter::print(s_Value) + " | feature = " + model_t::print(s_Feature) + ", @ " + + core::CContainerPrinter::print(s_Time) + ", elapsedTime = " + core::CStringUtils::typeToString(s_ElapsedTime); } +CProbabilityAndInfluenceCalculator::SCorrelateParams::SCorrelateParams(const CPartitioningFields& partitioningFields) + : s_Feature(), + s_Model(0), + s_ElapsedTime(0), + s_Probability(1.0), + s_PartitioningFields(partitioningFields), + s_Cutoff(1.0), + s_IncludeCutoff(false) { +} + +std::string CProbabilityAndInfluenceCalculator::SCorrelateParams::describe() const { + return core::CContainerPrinter::print(s_Values) + " | feature = " + model_t::print(s_Feature) + ", @ " + + core::CContainerPrinter::print(s_Times) + ", elapsedTime = " + core::CStringUtils::typeToString(s_ElapsedTime); +} ////// CInfluenceCalculator ////// -CInfluenceCalculator::~CInfluenceCalculator() {} +CInfluenceCalculator::~CInfluenceCalculator() { +} -double CInfluenceCalculator::intersectionInfluence(double logp, double logpi) -{ +double CInfluenceCalculator::intersectionInfluence(double logp, double logpi) { return maths::CTools::truncate(ratio(logpi, logp, 1.0), 0.0, 1.0); } -double CInfluenceCalculator::complementInfluence(double logp, double logpi) -{ +double CInfluenceCalculator::complementInfluence(double logp, double logpi) { return maths::CTools::truncate(1.0 - ratio(logpi, logp, 0.0), 0.0, 1.0); } ////// CInfluenceUnavailableCalculator ////// -void CInfluenceUnavailableCalculator::computeInfluences(TParams ¶ms) const -{ +void CInfluenceUnavailableCalculator::computeInfluences(TParams& params) const { params.s_Influences.clear(); } -void CInfluenceUnavailableCalculator::computeInfluences(TCorrelateParams ¶ms) const -{ +void CInfluenceUnavailableCalculator::computeInfluences(TCorrelateParams& params) const { params.s_Influences.clear(); } ////// CIndicatorInfluenceCalculator ////// -void CIndicatorInfluenceCalculator::computeInfluences(TParams ¶ms) const -{ +void CIndicatorInfluenceCalculator::computeInfluences(TParams& params) const { params.s_Influences.clear(); - doComputeIndicatorInfluences(params.s_InfluencerName, - params.s_InfluencerValues, - params.s_Influences); + doComputeIndicatorInfluences(params.s_InfluencerName, params.s_InfluencerValues, params.s_Influences); } -void CIndicatorInfluenceCalculator::computeInfluences(TCorrelateParams ¶ms) const -{ +void CIndicatorInfluenceCalculator::computeInfluences(TCorrelateParams& params) const { params.s_Influences.clear(); - doComputeIndicatorInfluences(params.s_InfluencerName, - params.s_InfluencerValues, - params.s_Influences); + doComputeIndicatorInfluences(params.s_InfluencerName, params.s_InfluencerValues, params.s_Influences); } ////// CLogProbabilityComplementInfluenceCalculator ////// -void CLogProbabilityComplementInfluenceCalculator::computeInfluences(TParams ¶ms) const -{ +void CLogProbabilityComplementInfluenceCalculator::computeInfluences(TParams& params) const { params.s_Influences.clear(); maths::CModelProbabilityParams params_; - for (std::size_t i = 0u; i < params.s_Tail.size(); ++i) - { - if (params.s_Tail[i] == maths_t::E_RightTail) - { - params_.addCalculation(maths_t::E_OneSidedAbove) - .addCoordinate(i); + for (std::size_t i = 0u; i < params.s_Tail.size(); ++i) { + if (params.s_Tail[i] == maths_t::E_RightTail) { + params_.addCalculation(maths_t::E_OneSidedAbove).addCoordinate(i); } } - if (params_.calculations() > 0) - { + if (params_.calculations() > 0) { params_.seasonalConfidenceInterval(params.s_ComputeProbabilityParams.seasonalConfidenceInterval()) - .weightStyles(params.s_ComputeProbabilityParams.weightStyles()) - .addWeights(params.s_ComputeProbabilityParams.weights()[0]); - - TStrCRefDouble1VecDoublePrPrVec &influencerValues = params.s_InfluencerValues; - if (model_t::dimension(params.s_Feature) == 1) - { - std::sort(influencerValues.begin(), influencerValues.end(), - CDecreasingValueInfluence(params.s_Tail[0])); + .weightStyles(params.s_ComputeProbabilityParams.weightStyles()) + .addWeights(params.s_ComputeProbabilityParams.weights()[0]); + + TStrCRefDouble1VecDoublePrPrVec& influencerValues = params.s_InfluencerValues; + if (model_t::dimension(params.s_Feature) == 1) { + std::sort(influencerValues.begin(), influencerValues.end(), CDecreasingValueInfluence(params.s_Tail[0])); } LOG_TRACE("influencerValues = " << core::CContainerPrinter::print(influencerValues)); - doComputeInfluences(params.s_Feature, CValueDifference(), complementInfluence, - *params.s_Model, params.s_ElapsedTime, params_, - params.s_Time, params.s_Value[0], params.s_Count, - params.s_Probability, params.s_InfluencerName, params.s_InfluencerValues, - params.s_Cutoff, params.s_IncludeCutoff, params.s_Influences); + doComputeInfluences(params.s_Feature, + CValueDifference(), + complementInfluence, + *params.s_Model, + params.s_ElapsedTime, + params_, + params.s_Time, + params.s_Value[0], + params.s_Count, + params.s_Probability, + params.s_InfluencerName, + params.s_InfluencerValues, + params.s_Cutoff, + params.s_IncludeCutoff, + params.s_Influences); } } -void CLogProbabilityComplementInfluenceCalculator::computeInfluences(TCorrelateParams ¶ms) const -{ +void CLogProbabilityComplementInfluenceCalculator::computeInfluences(TCorrelateParams& params) const { params.s_Influences.clear(); - if (params.s_Tail[0] == maths_t::E_RightTail) - { + if (params.s_Tail[0] == maths_t::E_RightTail) { std::size_t correlate = params.s_MostAnomalousCorrelate[0]; maths::CModelProbabilityParams params_; params_.addCalculation(maths_t::E_OneSidedAbove) - .seasonalConfidenceInterval(params.s_ComputeProbabilityParams.seasonalConfidenceInterval()) - .weightStyles(params.s_ComputeProbabilityParams.weightStyles()) - .addWeights(params.s_ComputeProbabilityParams.weights()[correlate]) - .mostAnomalousCorrelate(correlate); + .seasonalConfidenceInterval(params.s_ComputeProbabilityParams.seasonalConfidenceInterval()) + .weightStyles(params.s_ComputeProbabilityParams.weightStyles()) + .addWeights(params.s_ComputeProbabilityParams.weights()[correlate]) + .mostAnomalousCorrelate(correlate); LOG_TRACE("influencerValues = " << core::CContainerPrinter::print(params.s_InfluencerValues)); - doComputeCorrelateInfluences(params.s_Feature, CValueDifference(), complementInfluence, - *params.s_Model, params.s_ElapsedTime, params_, - params.s_Times[correlate], params.s_Values[correlate], params.s_Counts[correlate], - params.s_Probability, params.s_InfluencerName, params.s_InfluencerValues, - params.s_Cutoff, params.s_IncludeCutoff, params.s_Influences); + doComputeCorrelateInfluences(params.s_Feature, + CValueDifference(), + complementInfluence, + *params.s_Model, + params.s_ElapsedTime, + params_, + params.s_Times[correlate], + params.s_Values[correlate], + params.s_Counts[correlate], + params.s_Probability, + params.s_InfluencerName, + params.s_InfluencerValues, + params.s_Cutoff, + params.s_IncludeCutoff, + params.s_Influences); } } ////// CLogProbabilityInfluenceCalculator ////// -namespace -{ +namespace { //! Maybe add \p coordinate and the appropriate calculation to \p params. -void addCoordinate(maths_t::ETail tail, - std::size_t coordinate, - maths::CModelProbabilityParams ¶ms) -{ - switch (tail) - { - case maths_t::E_LeftTail: - { - params.addCalculation(maths_t::E_OneSidedBelow) - .addCoordinate(coordinate); +void addCoordinate(maths_t::ETail tail, std::size_t coordinate, maths::CModelProbabilityParams& params) { + switch (tail) { + case maths_t::E_LeftTail: { + params.addCalculation(maths_t::E_OneSidedBelow).addCoordinate(coordinate); break; } - case maths_t::E_RightTail: - { - params.addCalculation(maths_t::E_OneSidedAbove) - .addCoordinate(coordinate); + case maths_t::E_RightTail: { + params.addCalculation(maths_t::E_OneSidedAbove).addCoordinate(coordinate); break; } case maths_t::E_MixedOrNeitherTail: @@ -1080,176 +919,217 @@ void addCoordinate(maths_t::ETail tail, break; } } - } -void CLogProbabilityInfluenceCalculator::computeInfluences(TParams ¶ms) const -{ +void CLogProbabilityInfluenceCalculator::computeInfluences(TParams& params) const { params.s_Influences.clear(); maths::CModelProbabilityParams params_; - for (std::size_t i = 0u; i < params.s_Tail.size(); ++i) - { + for (std::size_t i = 0u; i < params.s_Tail.size(); ++i) { addCoordinate(params.s_Tail[i], i, params_); } - if (params_.calculations() > 0) - { + if (params_.calculations() > 0) { params_.seasonalConfidenceInterval(params.s_ComputeProbabilityParams.seasonalConfidenceInterval()) - .weightStyles(params.s_ComputeProbabilityParams.weightStyles()) - .addWeights(params.s_ComputeProbabilityParams.weights()[0]); - - TStrCRefDouble1VecDoublePrPrVec &influencerValues = params.s_InfluencerValues; - if (model_t::dimension(params.s_Feature) == 1) - { - std::sort(influencerValues.begin(), influencerValues.end(), - CDecreasingValueInfluence(params.s_Tail[0])); + .weightStyles(params.s_ComputeProbabilityParams.weightStyles()) + .addWeights(params.s_ComputeProbabilityParams.weights()[0]); + + TStrCRefDouble1VecDoublePrPrVec& influencerValues = params.s_InfluencerValues; + if (model_t::dimension(params.s_Feature) == 1) { + std::sort(influencerValues.begin(), influencerValues.end(), CDecreasingValueInfluence(params.s_Tail[0])); } LOG_TRACE("influencerValues = " << core::CContainerPrinter::print(influencerValues)); - doComputeInfluences(params.s_Feature, CValueIntersection(), intersectionInfluence, - *params.s_Model, params.s_ElapsedTime, params_, - params.s_Time, params.s_Value[0], params.s_Count, - params.s_Probability, params.s_InfluencerName, params.s_InfluencerValues, - params.s_Cutoff, params.s_IncludeCutoff, params.s_Influences); + doComputeInfluences(params.s_Feature, + CValueIntersection(), + intersectionInfluence, + *params.s_Model, + params.s_ElapsedTime, + params_, + params.s_Time, + params.s_Value[0], + params.s_Count, + params.s_Probability, + params.s_InfluencerName, + params.s_InfluencerValues, + params.s_Cutoff, + params.s_IncludeCutoff, + params.s_Influences); } } -void CLogProbabilityInfluenceCalculator::computeInfluences(TCorrelateParams ¶ms) const -{ +void CLogProbabilityInfluenceCalculator::computeInfluences(TCorrelateParams& params) const { params.s_Influences.clear(); maths::CModelProbabilityParams params_; addCoordinate(params.s_Tail[0], 0, params_); - if (params_.calculations() > 0) - { + if (params_.calculations() > 0) { std::size_t correlate = params.s_MostAnomalousCorrelate[0]; params_.seasonalConfidenceInterval(params.s_ComputeProbabilityParams.seasonalConfidenceInterval()) - .weightStyles(params.s_ComputeProbabilityParams.weightStyles()) - .addWeights(params.s_ComputeProbabilityParams.weights()[correlate]) - .mostAnomalousCorrelate(correlate); + .weightStyles(params.s_ComputeProbabilityParams.weightStyles()) + .addWeights(params.s_ComputeProbabilityParams.weights()[correlate]) + .mostAnomalousCorrelate(correlate); LOG_TRACE("influencerValues = " << core::CContainerPrinter::print(params.s_InfluencerValues)); - doComputeCorrelateInfluences(params.s_Feature, CValueDifference(), intersectionInfluence, - *params.s_Model, params.s_ElapsedTime, params_, - params.s_Times[correlate], params.s_Values[correlate], params.s_Counts[correlate], - params.s_Probability, params.s_InfluencerName, params.s_InfluencerValues, - params.s_Cutoff, params.s_IncludeCutoff, params.s_Influences); + doComputeCorrelateInfluences(params.s_Feature, + CValueDifference(), + intersectionInfluence, + *params.s_Model, + params.s_ElapsedTime, + params_, + params.s_Times[correlate], + params.s_Values[correlate], + params.s_Counts[correlate], + params.s_Probability, + params.s_InfluencerName, + params.s_InfluencerValues, + params.s_Cutoff, + params.s_IncludeCutoff, + params.s_Influences); } } ////// CMeanInfluenceCalculator ////// -void CMeanInfluenceCalculator::computeInfluences(TParams ¶ms) const -{ +void CMeanInfluenceCalculator::computeInfluences(TParams& params) const { params.s_Influences.clear(); maths::CModelProbabilityParams params_; - for (std::size_t i = 0u; i < params.s_Tail.size(); ++i) - { + for (std::size_t i = 0u; i < params.s_Tail.size(); ++i) { addCoordinate(params.s_Tail[i], i, params_); } - if (params_.calculations() > 0) - { + if (params_.calculations() > 0) { params_.seasonalConfidenceInterval(params.s_ComputeProbabilityParams.seasonalConfidenceInterval()) - .weightStyles(params.s_ComputeProbabilityParams.weightStyles()) - .addWeights(params.s_ComputeProbabilityParams.weights()[0]); + .weightStyles(params.s_ComputeProbabilityParams.weightStyles()) + .addWeights(params.s_ComputeProbabilityParams.weights()[0]); - TStrCRefDouble1VecDoublePrPrVec &influencerValues = params.s_InfluencerValues; - if (model_t::dimension(params.s_Feature) == 1) - { - std::sort(influencerValues.begin(), influencerValues.end(), + TStrCRefDouble1VecDoublePrPrVec& influencerValues = params.s_InfluencerValues; + if (model_t::dimension(params.s_Feature) == 1) { + std::sort(influencerValues.begin(), + influencerValues.end(), CDecreasingMeanInfluence(params.s_Tail[0], params.s_Value[0], params.s_Count)); } LOG_TRACE("influencerValues = " << core::CContainerPrinter::print(params.s_InfluencerValues)); - doComputeInfluences(params.s_Feature, CMeanDifference(), complementInfluence, - *params.s_Model, params.s_ElapsedTime, params_, - params.s_Time, params.s_Value[0], params.s_Count, - params.s_Probability, params.s_InfluencerName, params.s_InfluencerValues, - params.s_Cutoff, params.s_IncludeCutoff, params.s_Influences); + doComputeInfluences(params.s_Feature, + CMeanDifference(), + complementInfluence, + *params.s_Model, + params.s_ElapsedTime, + params_, + params.s_Time, + params.s_Value[0], + params.s_Count, + params.s_Probability, + params.s_InfluencerName, + params.s_InfluencerValues, + params.s_Cutoff, + params.s_IncludeCutoff, + params.s_Influences); } } -void CMeanInfluenceCalculator::computeInfluences(TCorrelateParams ¶ms) const -{ +void CMeanInfluenceCalculator::computeInfluences(TCorrelateParams& params) const { params.s_Influences.clear(); maths::CModelProbabilityParams params_; addCoordinate(params.s_Tail[0], 0, params_); - if (params_.calculations() > 0) - { + if (params_.calculations() > 0) { std::size_t correlate = params.s_MostAnomalousCorrelate[0]; params_.seasonalConfidenceInterval(params.s_ComputeProbabilityParams.seasonalConfidenceInterval()) - .weightStyles(params.s_ComputeProbabilityParams.weightStyles()) - .addWeights(params.s_ComputeProbabilityParams.weights()[correlate]) - .mostAnomalousCorrelate(correlate); + .weightStyles(params.s_ComputeProbabilityParams.weightStyles()) + .addWeights(params.s_ComputeProbabilityParams.weights()[correlate]) + .mostAnomalousCorrelate(correlate); LOG_TRACE("influencerValues = " << core::CContainerPrinter::print(params.s_InfluencerValues)); - doComputeCorrelateInfluences(params.s_Feature, CMeanDifference(), complementInfluence, - *params.s_Model, params.s_ElapsedTime, params_, - params.s_Times[correlate], params.s_Values[correlate], params.s_Counts[correlate], - params.s_Probability, params.s_InfluencerName, params.s_InfluencerValues, - params.s_Cutoff, params.s_IncludeCutoff, params.s_Influences); + doComputeCorrelateInfluences(params.s_Feature, + CMeanDifference(), + complementInfluence, + *params.s_Model, + params.s_ElapsedTime, + params_, + params.s_Times[correlate], + params.s_Values[correlate], + params.s_Counts[correlate], + params.s_Probability, + params.s_InfluencerName, + params.s_InfluencerValues, + params.s_Cutoff, + params.s_IncludeCutoff, + params.s_Influences); } } ////// CVarianceInfluenceCalculator ////// -void CVarianceInfluenceCalculator::computeInfluences(TParams ¶ms) const -{ +void CVarianceInfluenceCalculator::computeInfluences(TParams& params) const { params.s_Influences.clear(); maths::CModelProbabilityParams params_; - for (std::size_t i = 0u; i < params.s_Tail.size(); ++i) - { + for (std::size_t i = 0u; i < params.s_Tail.size(); ++i) { addCoordinate(params.s_Tail[i], i, params_); } - if (params_.calculations() > 0) - { + if (params_.calculations() > 0) { params_.seasonalConfidenceInterval(params.s_ComputeProbabilityParams.seasonalConfidenceInterval()) - .weightStyles(params.s_ComputeProbabilityParams.weightStyles()) - .addWeights(params.s_ComputeProbabilityParams.weights()[0]); + .weightStyles(params.s_ComputeProbabilityParams.weightStyles()) + .addWeights(params.s_ComputeProbabilityParams.weights()[0]); - TStrCRefDouble1VecDoublePrPrVec &influencerValues = params.s_InfluencerValues; - if (model_t::dimension(params.s_Feature) == 1) - { - std::sort(influencerValues.begin(), influencerValues.end(), + TStrCRefDouble1VecDoublePrPrVec& influencerValues = params.s_InfluencerValues; + if (model_t::dimension(params.s_Feature) == 1) { + std::sort(influencerValues.begin(), + influencerValues.end(), CDecreasingVarianceInfluence(params.s_Tail[0], params.s_Value[0], params.s_Count)); } LOG_TRACE("influencerValues = " << core::CContainerPrinter::print(influencerValues)); - doComputeInfluences(params.s_Feature, CVarianceDifference(), complementInfluence, - *params.s_Model, params.s_ElapsedTime, params_, - params.s_Time, params.s_Value[0], params.s_Count, - params.s_Probability, params.s_InfluencerName, params.s_InfluencerValues, - params.s_Cutoff, params.s_IncludeCutoff, params.s_Influences); + doComputeInfluences(params.s_Feature, + CVarianceDifference(), + complementInfluence, + *params.s_Model, + params.s_ElapsedTime, + params_, + params.s_Time, + params.s_Value[0], + params.s_Count, + params.s_Probability, + params.s_InfluencerName, + params.s_InfluencerValues, + params.s_Cutoff, + params.s_IncludeCutoff, + params.s_Influences); } } -void CVarianceInfluenceCalculator::computeInfluences(TCorrelateParams ¶ms) const -{ +void CVarianceInfluenceCalculator::computeInfluences(TCorrelateParams& params) const { params.s_Influences.clear(); maths::CModelProbabilityParams params_; addCoordinate(params.s_Tail[0], 0, params_); - if (params_.calculations() > 0) - { + if (params_.calculations() > 0) { std::size_t correlate = params.s_MostAnomalousCorrelate[0]; params_.seasonalConfidenceInterval(params.s_ComputeProbabilityParams.seasonalConfidenceInterval()) - .weightStyles(params.s_ComputeProbabilityParams.weightStyles()) - .addWeights(params.s_ComputeProbabilityParams.weights()[correlate]) - .mostAnomalousCorrelate(correlate); + .weightStyles(params.s_ComputeProbabilityParams.weightStyles()) + .addWeights(params.s_ComputeProbabilityParams.weights()[correlate]) + .mostAnomalousCorrelate(correlate); LOG_TRACE("influencerValues = " << core::CContainerPrinter::print(params.s_InfluencerValues)); - doComputeCorrelateInfluences(params.s_Feature, CVarianceDifference(), complementInfluence, - *params.s_Model, params.s_ElapsedTime, params_, - params.s_Times[correlate], params.s_Values[correlate], params.s_Counts[correlate], - params.s_Probability, params.s_InfluencerName, params.s_InfluencerValues, - params.s_Cutoff, params.s_IncludeCutoff, params.s_Influences); + doComputeCorrelateInfluences(params.s_Feature, + CVarianceDifference(), + complementInfluence, + *params.s_Model, + params.s_ElapsedTime, + params_, + params.s_Times[correlate], + params.s_Values[correlate], + params.s_Counts[correlate], + params.s_Probability, + params.s_InfluencerName, + params.s_InfluencerValues, + params.s_Cutoff, + params.s_IncludeCutoff, + params.s_Influences); } } - } } diff --git a/lib/model/CResourceMonitor.cc b/lib/model/CResourceMonitor.cc index 74d2e62521..bc17f9392a 100644 --- a/lib/model/CResourceMonitor.cc +++ b/lib/model/CResourceMonitor.cc @@ -6,8 +6,8 @@ #include -#include #include +#include #include #include @@ -16,47 +16,47 @@ #include #include +namespace ml { -namespace ml -{ - -namespace model -{ +namespace model { // Only prune once per hour const core_t::TTime CResourceMonitor::MINIMUM_PRUNE_FREQUENCY(60 * 60); const std::size_t CResourceMonitor::DEFAULT_MEMORY_LIMIT_MB(4096); -CResourceMonitor::CResourceMonitor() : m_AllowAllocations(true), - m_ByteLimitHigh(0), m_ByteLimitLow(0), m_CurrentAnomalyDetectorMemory(0), - m_ExtraMemory(0), m_PreviousTotal(this->totalMemory()), m_Peak(m_PreviousTotal), - m_LastAllocationFailureReport(0), m_MemoryStatus(model_t::E_MemoryStatusOk), - m_HasPruningStarted(false), m_PruneThreshold(0), m_LastPruneTime(0), - m_PruneWindow(std::numeric_limits::max()), - m_PruneWindowMaximum(std::numeric_limits::max()), - m_PruneWindowMinimum(std::numeric_limits::max()), - m_NoLimit(false) -{ +CResourceMonitor::CResourceMonitor() + : m_AllowAllocations(true), + m_ByteLimitHigh(0), + m_ByteLimitLow(0), + m_CurrentAnomalyDetectorMemory(0), + m_ExtraMemory(0), + m_PreviousTotal(this->totalMemory()), + m_Peak(m_PreviousTotal), + m_LastAllocationFailureReport(0), + m_MemoryStatus(model_t::E_MemoryStatusOk), + m_HasPruningStarted(false), + m_PruneThreshold(0), + m_LastPruneTime(0), + m_PruneWindow(std::numeric_limits::max()), + m_PruneWindowMaximum(std::numeric_limits::max()), + m_PruneWindowMinimum(std::numeric_limits::max()), + m_NoLimit(false) { this->updateMemoryLimitsAndPruneThreshold(DEFAULT_MEMORY_LIMIT_MB); } -void CResourceMonitor::memoryUsageReporter(const TMemoryUsageReporterFunc &reporter) -{ +void CResourceMonitor::memoryUsageReporter(const TMemoryUsageReporterFunc& reporter) { m_MemoryUsageReporter = reporter; } -void CResourceMonitor::registerComponent(CAnomalyDetector &detector) -{ +void CResourceMonitor::registerComponent(CAnomalyDetector& detector) { LOG_TRACE("Registering component: " << detector.model()); m_Models.insert({detector.model().get(), std::size_t(0)}); } -void CResourceMonitor::unRegisterComponent(CAnomalyDetector &detector) -{ +void CResourceMonitor::unRegisterComponent(CAnomalyDetector& detector) { auto iter = m_Models.find(detector.model().get()); - if (iter == m_Models.end()) - { + if (iter == m_Models.end()) { LOG_ERROR("Inconsistency - component has not been registered: " << detector.model()); return; } @@ -65,35 +65,27 @@ void CResourceMonitor::unRegisterComponent(CAnomalyDetector &detector) m_Models.erase(iter); } -void CResourceMonitor::memoryLimit(std::size_t limitMBs) -{ +void CResourceMonitor::memoryLimit(std::size_t limitMBs) { this->updateMemoryLimitsAndPruneThreshold(limitMBs); - if (m_NoLimit) - { + if (m_NoLimit) { LOG_INFO("Setting no model memory limit"); - } - else - { + } else { LOG_INFO("Setting model memory limit to " << limitMBs << " MB"); } } -void CResourceMonitor::updateMemoryLimitsAndPruneThreshold(std::size_t limitMBs) -{ +void CResourceMonitor::updateMemoryLimitsAndPruneThreshold(std::size_t limitMBs) { // The threshold for no limit is set such that any negative limit cast to // a size_t (which is unsigned) will be taken to mean no limit - if (limitMBs > std::numeric_limits::max() / 2) - { + if (limitMBs > std::numeric_limits::max() / 2) { m_NoLimit = true; // The high limit is set to around half what it could potentially be. // The reason is that other code will do "what if" calculations on this // number, such as "what would total memory usage be if we allocated 10 // more models?", and it causes problems if these calculations overflow. m_ByteLimitHigh = std::numeric_limits::max() / 2 + 1; - } - else - { + } else { // Background persist causes the memory size to double due to copying // the models. On top of that, after the persist is done we may not // be able to retrieve that memory back. Thus, we halve the requested @@ -109,52 +101,40 @@ void CResourceMonitor::updateMemoryLimitsAndPruneThreshold(std::size_t limitMBs) m_PruneThreshold = static_cast(m_ByteLimitHigh / 5 * 3); } -model_t::EMemoryStatus CResourceMonitor::getMemoryStatus() -{ +model_t::EMemoryStatus CResourceMonitor::getMemoryStatus() { return m_MemoryStatus; } -void CResourceMonitor::refresh(CAnomalyDetector &detector) -{ - if (m_NoLimit) - { +void CResourceMonitor::refresh(CAnomalyDetector& detector) { + if (m_NoLimit) { return; } this->forceRefresh(detector); } -void CResourceMonitor::forceRefresh(CAnomalyDetector &detector) -{ +void CResourceMonitor::forceRefresh(CAnomalyDetector& detector) { this->memUsage(detector.model().get()); core::CStatistics::stat(stat_t::E_MemoryUsage).set(this->totalMemory()); LOG_TRACE("Checking allocations: currently at " << this->totalMemory()); this->updateAllowAllocations(); } -void CResourceMonitor::updateAllowAllocations() -{ +void CResourceMonitor::updateAllowAllocations() { std::size_t total{this->totalMemory()}; - if (m_AllowAllocations) - { - if (total > m_ByteLimitHigh) - { - LOG_INFO("Over allocation limit. " << total << - " bytes used, the limit is " << m_ByteLimitHigh); + if (m_AllowAllocations) { + if (total > m_ByteLimitHigh) { + LOG_INFO("Over allocation limit. " << total << " bytes used, the limit is " << m_ByteLimitHigh); m_AllowAllocations = false; } - } - else - { - if (total < m_ByteLimitLow) - { + } else { + if (total < m_ByteLimitLow) { LOG_INFO("Below allocation limit, used " << total); m_AllowAllocations = true; } } } -bool CResourceMonitor::pruneIfRequired(core_t::TTime endTime) -{ +bool CResourceMonitor::pruneIfRequired(core_t::TTime endTime) { // The basic idea here is that as the memory usage goes up, we // prune models to bring it down again. If usage declines, we // relax the pruning window to let it go back up again. @@ -162,29 +142,24 @@ bool CResourceMonitor::pruneIfRequired(core_t::TTime endTime) std::size_t total{this->totalMemory()}; bool aboveThreshold = total > m_PruneThreshold; - if (m_HasPruningStarted == false && !aboveThreshold) - { + if (m_HasPruningStarted == false && !aboveThreshold) { LOG_TRACE("No pruning required. " << total << " / " << m_PruneThreshold); return false; } - if (endTime < m_LastPruneTime + MINIMUM_PRUNE_FREQUENCY) - { + if (endTime < m_LastPruneTime + MINIMUM_PRUNE_FREQUENCY) { LOG_TRACE("Too soon since last prune to prune again"); return false; } - if (m_Models.empty()) - { + if (m_Models.empty()) { return false; } - if (m_HasPruningStarted == false) - { + if (m_HasPruningStarted == false) { // The longest we'll consider keeping priors for is 1M buckets. - CAnomalyDetectorModel *model = m_Models.begin()->first; - if (model == 0) - { + CAnomalyDetectorModel* model = m_Models.begin()->first; + if (model == 0) { return false; } m_PruneWindowMaximum = model->defaultPruneWindow(); @@ -195,38 +170,30 @@ bool CResourceMonitor::pruneIfRequired(core_t::TTime endTime) LOG_DEBUG("Pruning started. Window (buckets): " << m_PruneWindow); } - if (aboveThreshold) - { + if (aboveThreshold) { // Do a prune and see how much we got back // These are the expensive operations std::size_t usageAfter = 0; - for (auto &model : m_Models) - { + for (auto& model : m_Models) { model.first->prune(m_PruneWindow); model.second = model.first->memoryUsage(); - usageAfter += model.second; + usageAfter += model.second; } m_CurrentAnomalyDetectorMemory = usageAfter; total = this->totalMemory(); this->updateAllowAllocations(); } - LOG_TRACE("Pruning models. Usage: " << - total << ". Current window: " << m_PruneWindow << " buckets"); + LOG_TRACE("Pruning models. Usage: " << total << ". Current window: " << m_PruneWindow << " buckets"); - if (total < m_PruneThreshold) - { + if (total < m_PruneThreshold) { // Expand the window - m_PruneWindow = std::min(m_PruneWindow + std::size_t( - (endTime - m_LastPruneTime) / m_Models.begin()->first->bucketLength()), - m_PruneWindowMaximum); + m_PruneWindow = std::min(m_PruneWindow + std::size_t((endTime - m_LastPruneTime) / m_Models.begin()->first->bucketLength()), + m_PruneWindowMaximum); LOG_TRACE("Expanding window, to " << m_PruneWindow); - } - else - { + } else { // Shrink the window - m_PruneWindow = std::max(static_cast(m_PruneWindow * 99 / 100), - m_PruneWindowMinimum); + m_PruneWindow = std::max(static_cast(m_PruneWindow * 99 / 100), m_PruneWindowMinimum); LOG_TRACE("Shrinking window, to " << m_PruneWindow); } @@ -234,30 +201,24 @@ bool CResourceMonitor::pruneIfRequired(core_t::TTime endTime) return aboveThreshold; } -bool CResourceMonitor::areAllocationsAllowed() const -{ +bool CResourceMonitor::areAllocationsAllowed() const { return m_AllowAllocations; } -bool CResourceMonitor::areAllocationsAllowed(std::size_t size) const -{ - if (m_AllowAllocations) - { +bool CResourceMonitor::areAllocationsAllowed(std::size_t size) const { + if (m_AllowAllocations) { return this->totalMemory() + size < m_ByteLimitHigh; } return false; } -std::size_t CResourceMonitor::allocationLimit() const -{ +std::size_t CResourceMonitor::allocationLimit() const { return m_ByteLimitHigh - std::min(m_ByteLimitHigh, this->totalMemory()); } -void CResourceMonitor::memUsage(CAnomalyDetectorModel *model) -{ +void CResourceMonitor::memUsage(CAnomalyDetectorModel* model) { auto iter = m_Models.find(model); - if (iter == m_Models.end()) - { + if (iter == m_Models.end()) { LOG_ERROR("Inconsistency - component has not been registered: " << model); return; } @@ -267,51 +228,41 @@ void CResourceMonitor::memUsage(CAnomalyDetectorModel *model) m_CurrentAnomalyDetectorMemory += (modelCurrentUsage - modelPreviousUsage); } -void CResourceMonitor::sendMemoryUsageReportIfSignificantlyChanged(core_t::TTime bucketStartTime) -{ - if (this->needToSendReport()) - { +void CResourceMonitor::sendMemoryUsageReportIfSignificantlyChanged(core_t::TTime bucketStartTime) { + if (this->needToSendReport()) { this->sendMemoryUsageReport(bucketStartTime); } } -bool CResourceMonitor::needToSendReport() -{ +bool CResourceMonitor::needToSendReport() { // Has the usage changed by more than 1% ? std::size_t total{this->totalMemory()}; - if ((std::max(total, m_PreviousTotal) - std::min(total, m_PreviousTotal)) > m_PreviousTotal / 100) - { + if ((std::max(total, m_PreviousTotal) - std::min(total, m_PreviousTotal)) > m_PreviousTotal / 100) { return true; } - if (!m_AllocationFailures.empty()) - { + if (!m_AllocationFailures.empty()) { core_t::TTime lastestAllocationError = (--m_AllocationFailures.end())->first; - if (lastestAllocationError > m_LastAllocationFailureReport) - { + if (lastestAllocationError > m_LastAllocationFailureReport) { return true; } } return false; } -void CResourceMonitor::sendMemoryUsageReport(core_t::TTime bucketStartTime) -{ +void CResourceMonitor::sendMemoryUsageReport(core_t::TTime bucketStartTime) { std::size_t total{this->totalMemory()}; m_Peak = std::max(m_Peak, total); - if (m_MemoryUsageReporter) - { + if (m_MemoryUsageReporter) { m_MemoryUsageReporter(this->createMemoryUsageReport(bucketStartTime)); - if (!m_AllocationFailures.empty()) - { + if (!m_AllocationFailures.empty()) { m_LastAllocationFailureReport = m_AllocationFailures.rbegin()->first; } } m_PreviousTotal = total; } -CResourceMonitor::SResults CResourceMonitor::createMemoryUsageReport(core_t::TTime bucketStartTime) -{ +CResourceMonitor::SResults CResourceMonitor::createMemoryUsageReport(core_t::TTime bucketStartTime) { SResults res; res.s_ByFields = 0; res.s_OverFields = 0; @@ -320,55 +271,44 @@ CResourceMonitor::SResults CResourceMonitor::createMemoryUsageReport(core_t::TTi res.s_AllocationFailures = 0; res.s_MemoryStatus = m_MemoryStatus; res.s_BucketStartTime = bucketStartTime; - for (const auto &model : m_Models) - { + for (const auto& model : m_Models) { ++res.s_PartitionFields; res.s_OverFields += model.first->dataGatherer().numberOverFieldValues(); - res.s_ByFields += model.first->dataGatherer().numberByFieldValues(); + res.s_ByFields += model.first->dataGatherer().numberByFieldValues(); } res.s_AllocationFailures += m_AllocationFailures.size(); return res; } -void CResourceMonitor::acceptAllocationFailureResult(core_t::TTime time) -{ +void CResourceMonitor::acceptAllocationFailureResult(core_t::TTime time) { m_MemoryStatus = model_t::E_MemoryStatusHardLimit; ++m_AllocationFailures[time]; } -void CResourceMonitor::acceptPruningResult() -{ - if (m_MemoryStatus == model_t::E_MemoryStatusOk) - { +void CResourceMonitor::acceptPruningResult() { + if (m_MemoryStatus == model_t::E_MemoryStatusOk) { m_MemoryStatus = model_t::E_MemoryStatusSoftLimit; } } -bool CResourceMonitor::haveNoLimit() const -{ +bool CResourceMonitor::haveNoLimit() const { return m_NoLimit; } -void CResourceMonitor::addExtraMemory(std::size_t mem) -{ +void CResourceMonitor::addExtraMemory(std::size_t mem) { m_ExtraMemory += mem; this->updateAllowAllocations(); } -void CResourceMonitor::clearExtraMemory() -{ - if (m_ExtraMemory != 0) - { +void CResourceMonitor::clearExtraMemory() { + if (m_ExtraMemory != 0) { m_ExtraMemory = 0; this->updateAllowAllocations(); } } -std::size_t CResourceMonitor::totalMemory() const -{ - return m_CurrentAnomalyDetectorMemory + m_ExtraMemory + - CStringStore::names().memoryUsage() + - CStringStore::influencers().memoryUsage(); +std::size_t CResourceMonitor::totalMemory() const { + return m_CurrentAnomalyDetectorMemory + m_ExtraMemory + CStringStore::names().memoryUsage() + CStringStore::influencers().memoryUsage(); } } // model diff --git a/lib/model/CResultsQueue.cc b/lib/model/CResultsQueue.cc index 32a1e96ad4..4c082e9610 100644 --- a/lib/model/CResultsQueue.cc +++ b/lib/model/CResultsQueue.cc @@ -10,130 +10,99 @@ #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { -namespace -{ +namespace { const std::string RESULTS_TAG("a"); const std::string LAST_RESULTS_INDEX_TAG("b"); const std::string INITIALISATION_TIME_TAG("c"); } -CResultsQueue::CResultsQueue(std::size_t delayBuckets, - core_t::TTime bucketLength) : - m_Results(delayBuckets, bucketLength, 0), - m_LastResultsIndex(2) -{ +CResultsQueue::CResultsQueue(std::size_t delayBuckets, core_t::TTime bucketLength) + : m_Results(delayBuckets, bucketLength, 0), m_LastResultsIndex(2) { } -void CResultsQueue::push(const CHierarchicalResults &result, core_t::TTime time) -{ - if (m_Results.latestBucketEnd() + 1 - m_Results.bucketLength() == 0) - { +void CResultsQueue::push(const CHierarchicalResults& result, core_t::TTime time) { + if (m_Results.latestBucketEnd() + 1 - m_Results.bucketLength() == 0) { m_Results.reset(time - m_Results.bucketLength()); LOG_TRACE("Resetting results queue. Queue's latestBucketEnd is " << m_Results.latestBucketEnd()); } m_Results.push(result, time); } -void CResultsQueue::push(const CHierarchicalResults &result) -{ +void CResultsQueue::push(const CHierarchicalResults& result) { m_Results.push(result); } -const CHierarchicalResults &CResultsQueue::get(core_t::TTime time) const -{ +const CHierarchicalResults& CResultsQueue::get(core_t::TTime time) const { return m_Results.get(time); } -CHierarchicalResults &CResultsQueue::get(core_t::TTime time) -{ +CHierarchicalResults& CResultsQueue::get(core_t::TTime time) { return m_Results.get(time); } -CHierarchicalResults &CResultsQueue::latest() -{ +CHierarchicalResults& CResultsQueue::latest() { return m_Results.latest(); } -core_t::TTime CResultsQueue::latestBucketEnd() const -{ +core_t::TTime CResultsQueue::latestBucketEnd() const { return m_Results.latestBucketEnd(); } -std::size_t CResultsQueue::size() const -{ +std::size_t CResultsQueue::size() const { return m_Results.size(); } -void CResultsQueue::reset(core_t::TTime time) -{ +void CResultsQueue::reset(core_t::TTime time) { m_Results.reset(time); m_LastResultsIndex = m_Results.size() - 1; } -bool CResultsQueue::hasInterimResults() const -{ +bool CResultsQueue::hasInterimResults() const { return m_Results.size() > 2 && m_LastResultsIndex == 0; } - -core_t::TTime CResultsQueue::chooseResultTime(core_t::TTime bucketStartTime, - core_t::TTime bucketLength, - model::CHierarchicalResults &results) -{ - if (m_Results.size() == 1) - { +core_t::TTime +CResultsQueue::chooseResultTime(core_t::TTime bucketStartTime, core_t::TTime bucketLength, model::CHierarchicalResults& results) { + if (m_Results.size() == 1) { return bucketStartTime; } // Select the correct bucket to use - LOG_TRACE("Asking for queue items at " << (bucketStartTime - bucketLength) << " and " << - (bucketStartTime - (bucketLength / 2))); + LOG_TRACE("Asking for queue items at " << (bucketStartTime - bucketLength) << " and " << (bucketStartTime - (bucketLength / 2))); core_t::TTime resultsTime = 0; - const model::CHierarchicalResults::TNode *node = m_Results.get(bucketStartTime - bucketLength).root(); + const model::CHierarchicalResults::TNode* node = m_Results.get(bucketStartTime - bucketLength).root(); double r1 = 0.0; - if (node) - { + if (node) { r1 = node->s_NormalizedAnomalyScore; } node = m_Results.get(bucketStartTime - (bucketLength / 2)).root(); double r2 = 0.0; - if (node) - { + if (node) { r2 = node->s_NormalizedAnomalyScore; } double r3 = 0.0; - if (results.root()) - { + if (results.root()) { r3 = results.root()->s_NormalizedAnomalyScore; } LOG_TRACE("Testing results " << r1 << ", " << r2 << ", " << r3); - if (m_LastResultsIndex == 0) - { + if (m_LastResultsIndex == 0) { // With 3 clear buckets to look at, start choosing - if ((r3 > r2) && (r3 > r1)) - { + if ((r3 > r2) && (r3 > r1)) { // We want this guy, so choose r1 so that he can be selected next time resultsTime = bucketStartTime - bucketLength; m_LastResultsIndex = 2; - } - else - { + } else { // Pick the bigger of 1 / 2 - if (r2 > r1) - { + if (r2 > r1) { resultsTime = bucketStartTime - (bucketLength / 2); m_LastResultsIndex = 3; - } - else - { + } else { resultsTime = bucketStartTime - bucketLength; m_LastResultsIndex = 2; } @@ -143,51 +112,34 @@ core_t::TTime CResultsQueue::chooseResultTime(core_t::TTime bucketStartTime, return resultsTime; } -void CResultsQueue::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CResultsQueue::acceptPersistInserter(core::CStatePersistInserter& inserter) const { core_t::TTime initialisationTime = m_Results.latestBucketEnd() + 1 - m_Results.bucketLength(); core::CPersistUtils::persist(INITIALISATION_TIME_TAG, initialisationTime, inserter); core::CPersistUtils::persist(RESULTS_TAG, m_Results, inserter); core::CPersistUtils::persist(LAST_RESULTS_INDEX_TAG, m_LastResultsIndex, inserter); } -bool CResultsQueue::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name = traverser.name(); - if (name == RESULTS_TAG) - { - if (!core::CPersistUtils::restore(RESULTS_TAG, m_Results, traverser)) - { +bool CResultsQueue::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); + if (name == RESULTS_TAG) { + if (!core::CPersistUtils::restore(RESULTS_TAG, m_Results, traverser)) { return false; } - } - else if (name == LAST_RESULTS_INDEX_TAG) - { - if (!core::CPersistUtils::restore(LAST_RESULTS_INDEX_TAG, - m_LastResultsIndex, - traverser)) - { + } else if (name == LAST_RESULTS_INDEX_TAG) { + if (!core::CPersistUtils::restore(LAST_RESULTS_INDEX_TAG, m_LastResultsIndex, traverser)) { return false; } - } - else if (name == INITIALISATION_TIME_TAG) - { + } else if (name == INITIALISATION_TIME_TAG) { core_t::TTime initialisationTime = 0; - if (!core::CPersistUtils::restore(INITIALISATION_TIME_TAG, - initialisationTime, - traverser)) - { + if (!core::CPersistUtils::restore(INITIALISATION_TIME_TAG, initialisationTime, traverser)) { return false; } m_Results.reset(initialisationTime); } - } - while (traverser.next()); + } while (traverser.next()); return true; } - } // model } // ml diff --git a/lib/model/CRuleCondition.cc b/lib/model/CRuleCondition.cc index d07b459cda..b31997a5de 100644 --- a/lib/model/CRuleCondition.cc +++ b/lib/model/CRuleCondition.cc @@ -12,144 +12,107 @@ #include #include +namespace ml { +namespace model { -namespace ml -{ -namespace model -{ - -namespace -{ +namespace { const CAnomalyDetectorModel::TSizeDoublePr1Vec EMPTY_CORRELATED; const core::CPatternSet EMPTY_FILTER; } using TDouble1Vec = CAnomalyDetectorModel::TDouble1Vec; -CRuleCondition::SCondition::SCondition(EConditionOperator op, double threshold) - : s_Op(op), - s_Threshold(threshold) -{ +CRuleCondition::SCondition::SCondition(EConditionOperator op, double threshold) : s_Op(op), s_Threshold(threshold) { } -bool CRuleCondition::SCondition::test(double value) const -{ - switch (s_Op) - { - case E_LT: - return value < s_Threshold; - case E_LTE: - return value <= s_Threshold; - case E_GT: - return value > s_Threshold; - case E_GTE: - return value >= s_Threshold; +bool CRuleCondition::SCondition::test(double value) const { + switch (s_Op) { + case E_LT: + return value < s_Threshold; + case E_LTE: + return value <= s_Threshold; + case E_GT: + return value > s_Threshold; + case E_GTE: + return value >= s_Threshold; } return false; } CRuleCondition::CRuleCondition() - : m_Type(E_NumericalActual), - m_Condition(E_LT, 0.0), - m_FieldName(), - m_FieldValue(), - m_ValueFilter(EMPTY_FILTER) -{ + : m_Type(E_NumericalActual), m_Condition(E_LT, 0.0), m_FieldName(), m_FieldValue(), m_ValueFilter(EMPTY_FILTER) { } -void CRuleCondition::type(ERuleConditionType ruleType) -{ +void CRuleCondition::type(ERuleConditionType ruleType) { m_Type = ruleType; } -void CRuleCondition::fieldName(const std::string &fieldName) -{ +void CRuleCondition::fieldName(const std::string& fieldName) { m_FieldName = fieldName; } -void CRuleCondition::fieldValue(const std::string &fieldValue) -{ +void CRuleCondition::fieldValue(const std::string& fieldValue) { m_FieldValue = fieldValue; } -CRuleCondition::SCondition &CRuleCondition::condition() -{ +CRuleCondition::SCondition& CRuleCondition::condition() { return m_Condition; } -void CRuleCondition::valueFilter(const core::CPatternSet &valueFilter) -{ +void CRuleCondition::valueFilter(const core::CPatternSet& valueFilter) { m_ValueFilter = TPatternSetCRef(valueFilter); } -bool CRuleCondition::isCategorical() const -{ +bool CRuleCondition::isCategorical() const { return m_Type == E_Categorical; } -bool CRuleCondition::isNumerical() const -{ +bool CRuleCondition::isNumerical() const { return !this->isCategorical(); } -bool CRuleCondition::test(const CAnomalyDetectorModel &model, +bool CRuleCondition::test(const CAnomalyDetectorModel& model, model_t::EFeature feature, - const model_t::CResultType &resultType, + const model_t::CResultType& resultType, bool isScoped, std::size_t pid, std::size_t cid, - core_t::TTime time) const -{ - const CDataGatherer &gatherer = model.dataGatherer(); + core_t::TTime time) const { + const CDataGatherer& gatherer = model.dataGatherer(); - if (this->isCategorical()) - { - if (m_FieldName == gatherer.partitionFieldName()) - { + if (this->isCategorical()) { + if (m_FieldName == gatherer.partitionFieldName()) { return m_ValueFilter.get().contains(gatherer.partitionFieldValue()); - } - else if (m_FieldName == gatherer.personFieldName()) - { + } else if (m_FieldName == gatherer.personFieldName()) { return m_ValueFilter.get().contains(gatherer.personName(pid)); - } - else if (m_FieldName == gatherer.attributeFieldName()) - { + } else if (m_FieldName == gatherer.attributeFieldName()) { return m_ValueFilter.get().contains(gatherer.attributeName(cid)); - } - else - { + } else { LOG_ERROR("Unexpected fieldName = " << m_FieldName); return false; } - } - else - { - if (m_FieldValue.empty() == false) - { - if (isScoped) - { + } else { + if (m_FieldValue.empty() == false) { + if (isScoped) { // When scoped we are checking if the rule condition applies to the entity // identified by m_FieldName/m_FieldValue, and we do this for all time // series which have resolved to check this condition. // Thus we ignore the supplied pid/cid and instead look up // the time series identifier that matches the condition's m_FieldValue. - bool successfullyResolvedId = model.isPopulation() ? - gatherer.attributeId(m_FieldValue, cid) : gatherer.personId(m_FieldValue, pid); - if (successfullyResolvedId == false) - { + bool successfullyResolvedId = + model.isPopulation() ? gatherer.attributeId(m_FieldValue, cid) : gatherer.personId(m_FieldValue, pid); + if (successfullyResolvedId == false) { return false; } - } - else - { + } else { // For numerical rules the field name may be: // - empty // - the person field name if the detector has only an over field or only a by field // - the attribute field name if the detector has both over and by fields - const std::string &fieldValue = model.isPopulation() && m_FieldName == gatherer.attributeFieldName() ? - gatherer.attributeName(cid) : gatherer.personName(pid); - if (m_FieldValue != fieldValue) - { + const std::string& fieldValue = model.isPopulation() && m_FieldName == gatherer.attributeFieldName() + ? gatherer.attributeName(cid) + : gatherer.personName(pid); + if (m_FieldValue != fieldValue) { return false; } } @@ -158,71 +121,57 @@ bool CRuleCondition::test(const CAnomalyDetectorModel &model, } } -bool CRuleCondition::checkCondition(const CAnomalyDetectorModel &model, +bool CRuleCondition::checkCondition(const CAnomalyDetectorModel& model, model_t::EFeature feature, model_t::CResultType resultType, std::size_t pid, std::size_t cid, - core_t::TTime time) const -{ + core_t::TTime time) const { TDouble1Vec value; - switch (m_Type) - { - case E_Categorical: - { - LOG_ERROR("Should never check numerical condition for categorical rule condition"); + switch (m_Type) { + case E_Categorical: { + LOG_ERROR("Should never check numerical condition for categorical rule condition"); + return false; + } + case E_NumericalActual: { + value = model.currentBucketValue(feature, pid, cid, time); + break; + } + case E_NumericalTypical: { + value = model.baselineBucketMean(feature, pid, cid, resultType, EMPTY_CORRELATED, time); + if (value.empty()) { + // Means prior is non-informative return false; } - case E_NumericalActual: - { - value = model.currentBucketValue(feature, pid, cid, time); - break; - } - case E_NumericalTypical: - { - value = model.baselineBucketMean(feature, pid, cid, resultType, EMPTY_CORRELATED, time); - if (value.empty()) - { - // Means prior is non-informative - return false; - } - break; + break; + } + case E_NumericalDiffAbs: { + value = model.currentBucketValue(feature, pid, cid, time); + TDouble1Vec typical = model.baselineBucketMean(feature, pid, cid, resultType, EMPTY_CORRELATED, time); + if (typical.empty()) { + // Means prior is non-informative + return false; } - case E_NumericalDiffAbs: - { - value = model.currentBucketValue(feature, pid, cid, time); - TDouble1Vec typical = model.baselineBucketMean( - feature, pid, cid, resultType, EMPTY_CORRELATED, time); - if (typical.empty()) - { - // Means prior is non-informative - return false; - } - if (value.size() != typical.size()) - { - LOG_ERROR("Cannot apply rule condition: cannot calculate difference between " << - "actual and typical values due to different dimensions."); - return false; - } - for (std::size_t i = 0; i < value.size(); ++i) - { - value[i] = std::fabs(value[i] - typical[i]); - } - break; + if (value.size() != typical.size()) { + LOG_ERROR("Cannot apply rule condition: cannot calculate difference between " + << "actual and typical values due to different dimensions."); + return false; } - case E_Time: - { - value.push_back(time); - break; + for (std::size_t i = 0; i < value.size(); ++i) { + value[i] = std::fabs(value[i] - typical[i]); } + break; + } + case E_Time: { + value.push_back(time); + break; + } } - if (value.empty()) - { + if (value.empty()) { LOG_ERROR("Value for rule comparison could not be calculated"); return false; } - if (value.size() > 1) - { + if (value.size() > 1) { LOG_ERROR("Numerical rules do not support multivariate analysis"); return false; } @@ -230,64 +179,52 @@ bool CRuleCondition::checkCondition(const CAnomalyDetectorModel &model, return m_Condition.test(value[0]); } -std::string CRuleCondition::print() const -{ +std::string CRuleCondition::print() const { std::string result = this->print(m_Type); - if (m_FieldName.empty() == false) - { + if (m_FieldName.empty() == false) { result += "(" + m_FieldName; - if (m_FieldValue.empty() == false) - { + if (m_FieldValue.empty() == false) { result += ":" + m_FieldValue; } result += ")"; } result += " "; - if (this->isCategorical()) - { + if (this->isCategorical()) { result += "IN FILTER"; - } - else - { - result += this->print(m_Condition.s_Op) + " " - + core::CStringUtils::typeToString(m_Condition.s_Threshold); + } else { + result += this->print(m_Condition.s_Op) + " " + core::CStringUtils::typeToString(m_Condition.s_Threshold); } return result; } -std::string CRuleCondition::print(ERuleConditionType type) const -{ - switch (type) - { - case E_Categorical: - return ""; - case E_NumericalActual: - return "ACTUAL"; - case E_NumericalTypical: - return "TYPICAL"; - case E_NumericalDiffAbs: - return "DIFF_ABS"; - case E_Time: - return "TIME"; +std::string CRuleCondition::print(ERuleConditionType type) const { + switch (type) { + case E_Categorical: + return ""; + case E_NumericalActual: + return "ACTUAL"; + case E_NumericalTypical: + return "TYPICAL"; + case E_NumericalDiffAbs: + return "DIFF_ABS"; + case E_Time: + return "TIME"; } return std::string(); } -std::string CRuleCondition::print(EConditionOperator op) const -{ - switch (op) - { - case E_LT: - return "<"; - case E_LTE: - return "<="; - case E_GT: - return ">"; - case E_GTE: - return ">="; +std::string CRuleCondition::print(EConditionOperator op) const { + switch (op) { + case E_LT: + return "<"; + case E_LTE: + return "<="; + case E_GT: + return ">"; + case E_GTE: + return ">="; } return std::string(); } - } } diff --git a/lib/model/CSample.cc b/lib/model/CSample.cc index 3b928795ae..ee2c2006d1 100644 --- a/lib/model/CSample.cc +++ b/lib/model/CSample.cc @@ -16,55 +16,37 @@ #include #include -namespace ml -{ -namespace model -{ - -std::string CSample::SToString::operator()(const CSample &sample) const -{ - std::string result = core::CStringUtils::typeToString(sample.m_Time) - + core::CPersistUtils::PAIR_DELIMITER - + core::CStringUtils::typeToStringPrecise(sample.m_VarianceScale, - core::CIEEE754::E_SinglePrecision) - + core::CPersistUtils::PAIR_DELIMITER - + core::CStringUtils::typeToStringPrecise(sample.m_Count, - core::CIEEE754::E_SinglePrecision); - for (std::size_t i = 0u; i < sample.m_Value.size(); ++i) - { - result += core::CPersistUtils::PAIR_DELIMITER - + core::CStringUtils::typeToStringPrecise(sample.m_Value[i], - core::CIEEE754::E_SinglePrecision); - +namespace ml { +namespace model { + +std::string CSample::SToString::operator()(const CSample& sample) const { + std::string result = core::CStringUtils::typeToString(sample.m_Time) + core::CPersistUtils::PAIR_DELIMITER + + core::CStringUtils::typeToStringPrecise(sample.m_VarianceScale, core::CIEEE754::E_SinglePrecision) + + core::CPersistUtils::PAIR_DELIMITER + + core::CStringUtils::typeToStringPrecise(sample.m_Count, core::CIEEE754::E_SinglePrecision); + for (std::size_t i = 0u; i < sample.m_Value.size(); ++i) { + result += core::CPersistUtils::PAIR_DELIMITER + + core::CStringUtils::typeToStringPrecise(sample.m_Value[i], core::CIEEE754::E_SinglePrecision); } return result; } -bool CSample::SFromString::operator()(const std::string &token, CSample &value) const -{ +bool CSample::SFromString::operator()(const std::string& token, CSample& value) const { core::CStringUtils::TStrVec tokens; std::string remainder; - core::CStringUtils::tokenise(std::string(1, core::CPersistUtils::PAIR_DELIMITER), - token, - tokens, - remainder); - if (!remainder.empty()) - { + core::CStringUtils::tokenise(std::string(1, core::CPersistUtils::PAIR_DELIMITER), token, tokens, remainder); + if (!remainder.empty()) { tokens.push_back(remainder); } - if ( !core::CStringUtils::stringToType(tokens[0], value.m_Time) - || !core::CStringUtils::stringToType(tokens[1], value.m_VarianceScale) - || !core::CStringUtils::stringToType(tokens[2], value.m_Count)) - { + if (!core::CStringUtils::stringToType(tokens[0], value.m_Time) || !core::CStringUtils::stringToType(tokens[1], value.m_VarianceScale) || + !core::CStringUtils::stringToType(tokens[2], value.m_Count)) { LOG_ERROR("Cannot parse as sample: " << token); return false; } - for (std::size_t i = 3u; i < tokens.size(); ++i) - { + for (std::size_t i = 3u; i < tokens.size(); ++i) { double vi; - if (!core::CStringUtils::stringToType(tokens[i], vi)) - { + if (!core::CStringUtils::stringToType(tokens[i], vi)) { LOG_ERROR("Cannot parse as sample: " << token); return false; } @@ -73,67 +55,45 @@ bool CSample::SFromString::operator()(const std::string &token, CSample &value) return true; } -CSample::CSample() : - m_Time(0), - m_Value(), - m_VarianceScale(0.0), - m_Count(0) -{ +CSample::CSample() : m_Time(0), m_Value(), m_VarianceScale(0.0), m_Count(0) { } -CSample::CSample(core_t::TTime time, - const TDouble1Vec &value, - double varianceScale, - double count) : - m_Time(time), - m_Value(value), - m_VarianceScale(varianceScale), - m_Count(count) -{ +CSample::CSample(core_t::TTime time, const TDouble1Vec& value, double varianceScale, double count) + : m_Time(time), m_Value(value), m_VarianceScale(varianceScale), m_Count(count) { } -CSample::TDouble1Vec CSample::value(std::size_t dimension) const -{ +CSample::TDouble1Vec CSample::value(std::size_t dimension) const { using TSizeVec = std::vector; TDouble1Vec result; - const TSizeVec &indices = CFeatureDataIndexing::valueIndices(dimension); + const TSizeVec& indices = CFeatureDataIndexing::valueIndices(dimension); result.reserve(indices.size()); - for (std::size_t i = 0u; i < indices.size(); ++i) - { + for (std::size_t i = 0u; i < indices.size(); ++i) { result.push_back(m_Value[indices[i]]); } return result; } -uint64_t CSample::checksum() const -{ +uint64_t CSample::checksum() const { uint64_t seed = static_cast(m_Time); seed = maths::CChecksum::calculate(seed, m_Value); seed = maths::CChecksum::calculate(seed, m_VarianceScale); return maths::CChecksum::calculate(seed, m_Count); } -std::string CSample::print() const -{ +std::string CSample::print() const { std::ostringstream result; - result << '(' << m_Time - << ' ' << core::CContainerPrinter::print(m_Value) - << ' ' << m_VarianceScale - << ' ' << m_Count << ')'; + result << '(' << m_Time << ' ' << core::CContainerPrinter::print(m_Value) << ' ' << m_VarianceScale << ' ' << m_Count << ')'; return result.str(); } -void CSample::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CSample::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CSample"); core::CMemoryDebug::dynamicSize("m_Value", m_Value, mem); } -std::size_t CSample::memoryUsage() const -{ +std::size_t CSample::memoryUsage() const { return core::CMemory::dynamicSize(m_Value); } - } } diff --git a/lib/model/CSampleCounts.cc b/lib/model/CSampleCounts.cc index 31762364da..62a04e7afc 100644 --- a/lib/model/CSampleCounts.cc +++ b/lib/model/CSampleCounts.cc @@ -11,8 +11,8 @@ #include #include -#include #include +#include #include @@ -20,13 +20,10 @@ #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { -namespace -{ +namespace { const std::string SAMPLE_COUNT_TAG("b"); const std::string MEAN_NON_ZERO_BUCKET_COUNT_TAG("c"); const std::string EFFECTIVE_SAMPLE_VARIANCE_TAG("d"); @@ -38,30 +35,24 @@ using TStrCRef = boost::reference_wrapper; using TStrCRefUInt64Map = std::map; } -CSampleCounts::CSampleCounts(unsigned int sampleCountOverride) : - m_SampleCountOverride(sampleCountOverride) -{} - -CSampleCounts::CSampleCounts(bool isForPersistence, - const CSampleCounts &other) : - m_SampleCountOverride(other.m_SampleCountOverride), - m_SampleCounts(other.m_SampleCounts), - m_MeanNonZeroBucketCounts(other.m_MeanNonZeroBucketCounts), - m_EffectiveSampleVariances(other.m_EffectiveSampleVariances) -{ - if (!isForPersistence) - { +CSampleCounts::CSampleCounts(unsigned int sampleCountOverride) : m_SampleCountOverride(sampleCountOverride) { +} + +CSampleCounts::CSampleCounts(bool isForPersistence, const CSampleCounts& other) + : m_SampleCountOverride(other.m_SampleCountOverride), + m_SampleCounts(other.m_SampleCounts), + m_MeanNonZeroBucketCounts(other.m_MeanNonZeroBucketCounts), + m_EffectiveSampleVariances(other.m_EffectiveSampleVariances) { + if (!isForPersistence) { LOG_ABORT("This constructor only creates clones for persistence"); } } -CSampleCounts *CSampleCounts::cloneForPersistence() const -{ +CSampleCounts* CSampleCounts::cloneForPersistence() const { return new CSampleCounts(true, *this); } -void CSampleCounts::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CSampleCounts::acceptPersistInserter(core::CStatePersistInserter& inserter) const { // Note m_SampleCountOverride is only for unit tests at present, // hence not persisted or restored. @@ -70,169 +61,117 @@ void CSampleCounts::acceptPersistInserter(core::CStatePersistInserter &inserter) core::CPersistUtils::persist(EFFECTIVE_SAMPLE_VARIANCE_TAG, m_EffectiveSampleVariances, inserter); } -bool CSampleCounts::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name = traverser.name(); - if (name == SAMPLE_COUNT_TAG) - { - if (core::CPersistUtils::restore(name, m_SampleCounts, traverser) == false) - { +bool CSampleCounts::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); + if (name == SAMPLE_COUNT_TAG) { + if (core::CPersistUtils::restore(name, m_SampleCounts, traverser) == false) { LOG_ERROR("Invalid sample counts"); return false; } - } - else if (name == MEAN_NON_ZERO_BUCKET_COUNT_TAG) - { - if (core::CPersistUtils::restore(name, m_MeanNonZeroBucketCounts, traverser) == false) - { + } else if (name == MEAN_NON_ZERO_BUCKET_COUNT_TAG) { + if (core::CPersistUtils::restore(name, m_MeanNonZeroBucketCounts, traverser) == false) { LOG_ERROR("Invalid non-zero bucket count means"); return false; } - } - else if (name == EFFECTIVE_SAMPLE_VARIANCE_TAG) - { - if (core::CPersistUtils::restore(name, m_EffectiveSampleVariances, traverser) == false) - { + } else if (name == EFFECTIVE_SAMPLE_VARIANCE_TAG) { + if (core::CPersistUtils::restore(name, m_EffectiveSampleVariances, traverser) == false) { LOG_ERROR("Invalid effective sample variances"); return false; } } - } - while (traverser.next()); + } while (traverser.next()); return true; } -unsigned int CSampleCounts::count(std::size_t id) const -{ - return m_SampleCountOverride > 0 ? - m_SampleCountOverride : - id < m_SampleCounts.size() ? m_SampleCounts[id] : 0; +unsigned int CSampleCounts::count(std::size_t id) const { + return m_SampleCountOverride > 0 ? m_SampleCountOverride : id < m_SampleCounts.size() ? m_SampleCounts[id] : 0; } -double CSampleCounts::effectiveSampleCount(std::size_t id) const -{ - if (id < m_EffectiveSampleVariances.size()) - { +double CSampleCounts::effectiveSampleCount(std::size_t id) const { + if (id < m_EffectiveSampleVariances.size()) { // This uses the fact that variance ~ 1 / count. double count = maths::CBasicStatistics::count(m_EffectiveSampleVariances[id]); - double mean = maths::CBasicStatistics::mean(m_EffectiveSampleVariances[id]); + double mean = maths::CBasicStatistics::mean(m_EffectiveSampleVariances[id]); return count > 0.0 ? 1.0 / mean : this->count(id); } return 0.0; } -void CSampleCounts::resetSampleCount(const CDataGatherer &gatherer, - std::size_t id) -{ - if (m_SampleCountOverride > 0) - { +void CSampleCounts::resetSampleCount(const CDataGatherer& gatherer, std::size_t id) { + if (m_SampleCountOverride > 0) { return; } - if (id >= m_MeanNonZeroBucketCounts.size()) - { + if (id >= m_MeanNonZeroBucketCounts.size()) { LOG_ERROR("Bad identifier " << id); return; } - const TMeanAccumulator &count_ = m_MeanNonZeroBucketCounts[id]; - if (maths::CBasicStatistics::count(count_) - >= NUMBER_BUCKETS_TO_ESTIMATE_SAMPLE_COUNT) - { + const TMeanAccumulator& count_ = m_MeanNonZeroBucketCounts[id]; + if (maths::CBasicStatistics::count(count_) >= NUMBER_BUCKETS_TO_ESTIMATE_SAMPLE_COUNT) { unsigned sampleCountThreshold = 0; - const CDataGatherer::TFeatureVec &features = gatherer.features(); - for (CDataGatherer::TFeatureVecCItr i = features.begin(); i != features.end(); ++i) - { - sampleCountThreshold = std::max(sampleCountThreshold, - model_t::minimumSampleCount(*i)); + const CDataGatherer::TFeatureVec& features = gatherer.features(); + for (CDataGatherer::TFeatureVecCItr i = features.begin(); i != features.end(); ++i) { + sampleCountThreshold = std::max(sampleCountThreshold, model_t::minimumSampleCount(*i)); } double count = maths::CBasicStatistics::mean(count_); - m_SampleCounts[id] = std::max(sampleCountThreshold, - static_cast(count + 0.5)); - LOG_DEBUG("Setting sample count to " << m_SampleCounts[id] - << " for " << this->name(gatherer, id)); + m_SampleCounts[id] = std::max(sampleCountThreshold, static_cast(count + 0.5)); + LOG_DEBUG("Setting sample count to " << m_SampleCounts[id] << " for " << this->name(gatherer, id)); } } -void CSampleCounts::refresh(const CDataGatherer &gatherer) -{ - if (m_SampleCountOverride > 0) - { +void CSampleCounts::refresh(const CDataGatherer& gatherer) { + if (m_SampleCountOverride > 0) { return; } unsigned sampleCountThreshold = 0; - const CDataGatherer::TFeatureVec &features = gatherer.features(); - for (CDataGatherer::TFeatureVecCItr i = features.begin(); i != features.end(); ++i) - { - sampleCountThreshold = std::max(sampleCountThreshold, - model_t::minimumSampleCount(*i)); + const CDataGatherer::TFeatureVec& features = gatherer.features(); + for (CDataGatherer::TFeatureVecCItr i = features.begin(); i != features.end(); ++i) { + sampleCountThreshold = std::max(sampleCountThreshold, model_t::minimumSampleCount(*i)); } - for (std::size_t id = 0u; id < m_MeanNonZeroBucketCounts.size(); ++id) - { - const TMeanAccumulator &count_ = m_MeanNonZeroBucketCounts[id]; - if (m_SampleCounts[id] > 0) - { - if (maths::CBasicStatistics::count(count_) - >= NUMBER_BUCKETS_TO_REFRESH_SAMPLE_COUNT) - { + for (std::size_t id = 0u; id < m_MeanNonZeroBucketCounts.size(); ++id) { + const TMeanAccumulator& count_ = m_MeanNonZeroBucketCounts[id]; + if (m_SampleCounts[id] > 0) { + if (maths::CBasicStatistics::count(count_) >= NUMBER_BUCKETS_TO_REFRESH_SAMPLE_COUNT) { double count = maths::CBasicStatistics::mean(count_); double scale = count / static_cast(m_SampleCounts[id]); - if ( scale < maths::MINIMUM_ACCURATE_VARIANCE_SCALE - || scale > maths::MAXIMUM_ACCURATE_VARIANCE_SCALE) - { + if (scale < maths::MINIMUM_ACCURATE_VARIANCE_SCALE || scale > maths::MAXIMUM_ACCURATE_VARIANCE_SCALE) { unsigned int oldCount = m_SampleCounts[id]; - unsigned int newCount = std::max(sampleCountThreshold, - static_cast(count + 0.5)); - LOG_TRACE("Sample count " << oldCount - << " is too far from the bucket mean " << count - << " count, resetting to " << newCount - << ". This may cause temporary instability" - << " for " << this->name(gatherer, id) << " (" << id - << "). (Mean count " << count_ << ")"); + unsigned int newCount = std::max(sampleCountThreshold, static_cast(count + 0.5)); + LOG_TRACE("Sample count " << oldCount << " is too far from the bucket mean " << count << " count, resetting to " + << newCount << ". This may cause temporary instability" + << " for " << this->name(gatherer, id) << " (" << id << "). (Mean count " << count_ << ")"); m_SampleCounts[id] = newCount; // Avoid compiler warning in the case of LOG_TRACE being compiled out static_cast(oldCount); } } - } - else if (maths::CBasicStatistics::count(count_) - >= NUMBER_BUCKETS_TO_ESTIMATE_SAMPLE_COUNT) - { + } else if (maths::CBasicStatistics::count(count_) >= NUMBER_BUCKETS_TO_ESTIMATE_SAMPLE_COUNT) { double count = maths::CBasicStatistics::mean(count_); - m_SampleCounts[id] = std::max(sampleCountThreshold, - static_cast(count + 0.5)); - LOG_TRACE("Setting sample count to " << m_SampleCounts[id] - << " for " << this->name(gatherer, id) << " (" << id - << "). (Mean count " << count_ << ")"); + m_SampleCounts[id] = std::max(sampleCountThreshold, static_cast(count + 0.5)); + LOG_TRACE("Setting sample count to " << m_SampleCounts[id] << " for " << this->name(gatherer, id) << " (" << id + << "). (Mean count " << count_ << ")"); } } } -void CSampleCounts::updateSampleVariance(std::size_t id) -{ +void CSampleCounts::updateSampleVariance(std::size_t id) { m_EffectiveSampleVariances[id].add(1.0 / static_cast(this->count(id))); } -void CSampleCounts::updateMeanNonZeroBucketCount(std::size_t id, - double count, - double alpha) -{ +void CSampleCounts::updateMeanNonZeroBucketCount(std::size_t id, double count, double alpha) { m_MeanNonZeroBucketCounts[id].add(count); m_MeanNonZeroBucketCounts[id].age(alpha); m_EffectiveSampleVariances[id].age(alpha); } -void CSampleCounts::recycle(const TSizeVec &idsToRemove) -{ - for (std::size_t i = 0u; i < idsToRemove.size(); ++i) - { +void CSampleCounts::recycle(const TSizeVec& idsToRemove) { + for (std::size_t i = 0u; i < idsToRemove.size(); ++i) { std::size_t id = idsToRemove[i]; - if (id >= m_SampleCounts.size()) - { + if (id >= m_SampleCounts.size()) { continue; } m_SampleCounts[id] = 0; @@ -240,52 +179,34 @@ void CSampleCounts::recycle(const TSizeVec &idsToRemove) m_EffectiveSampleVariances[id] = TMeanAccumulator(); } LOG_TRACE("m_SampleCounts = " << core::CContainerPrinter::print(m_SampleCounts)); - LOG_TRACE("m_MeanNonZeroBucketCounts = " - << core::CContainerPrinter::print(m_MeanNonZeroBucketCounts)); - LOG_TRACE("m_EffectiveSampleVariances = " - << core::CContainerPrinter::print(m_EffectiveSampleVariances)); + LOG_TRACE("m_MeanNonZeroBucketCounts = " << core::CContainerPrinter::print(m_MeanNonZeroBucketCounts)); + LOG_TRACE("m_EffectiveSampleVariances = " << core::CContainerPrinter::print(m_EffectiveSampleVariances)); } -void CSampleCounts::remove(std::size_t lowestIdToRemove) -{ - if (lowestIdToRemove < m_SampleCounts.size()) - { - m_SampleCounts.erase(m_SampleCounts.begin() + lowestIdToRemove, - m_SampleCounts.end()); - m_MeanNonZeroBucketCounts.erase( m_MeanNonZeroBucketCounts.begin() - + lowestIdToRemove, - m_MeanNonZeroBucketCounts.end()); - m_EffectiveSampleVariances.erase( m_EffectiveSampleVariances.begin() - + lowestIdToRemove, - m_EffectiveSampleVariances.end()); +void CSampleCounts::remove(std::size_t lowestIdToRemove) { + if (lowestIdToRemove < m_SampleCounts.size()) { + m_SampleCounts.erase(m_SampleCounts.begin() + lowestIdToRemove, m_SampleCounts.end()); + m_MeanNonZeroBucketCounts.erase(m_MeanNonZeroBucketCounts.begin() + lowestIdToRemove, m_MeanNonZeroBucketCounts.end()); + m_EffectiveSampleVariances.erase(m_EffectiveSampleVariances.begin() + lowestIdToRemove, m_EffectiveSampleVariances.end()); LOG_TRACE("m_SampleCounts = " << core::CContainerPrinter::print(m_SampleCounts)); - LOG_TRACE("m_MeanNonZeroBucketCounts = " - << core::CContainerPrinter::print(m_MeanNonZeroBucketCounts)); - LOG_TRACE("m_EffectiveSampleVariances = " - << core::CContainerPrinter::print(m_EffectiveSampleVariances)); + LOG_TRACE("m_MeanNonZeroBucketCounts = " << core::CContainerPrinter::print(m_MeanNonZeroBucketCounts)); + LOG_TRACE("m_EffectiveSampleVariances = " << core::CContainerPrinter::print(m_EffectiveSampleVariances)); } } -void CSampleCounts::resize(std::size_t id) -{ - if (id >= m_SampleCounts.size()) - { +void CSampleCounts::resize(std::size_t id) { + if (id >= m_SampleCounts.size()) { m_SampleCounts.resize(id + 1); m_MeanNonZeroBucketCounts.resize(id + 1); m_EffectiveSampleVariances.resize(id + 1); } } -uint64_t CSampleCounts::checksum(const CDataGatherer &gatherer) const -{ +uint64_t CSampleCounts::checksum(const CDataGatherer& gatherer) const { TStrCRefUInt64Map hashes; - for (std::size_t id = 0u; id < m_SampleCounts.size(); ++id) - { - if (gatherer.isPopulation() ? - gatherer.isAttributeActive(id) : - gatherer.isPersonActive(id)) - { - uint64_t &hash = hashes[TStrCRef(this->name(gatherer, id))]; + for (std::size_t id = 0u; id < m_SampleCounts.size(); ++id) { + if (gatherer.isPopulation() ? gatherer.isAttributeActive(id) : gatherer.isPersonActive(id)) { + uint64_t& hash = hashes[TStrCRef(this->name(gatherer, id))]; hash = maths::CChecksum::calculate(hash, m_SampleCounts[id]); hash = maths::CChecksum::calculate(hash, m_MeanNonZeroBucketCounts[id]); hash = maths::CChecksum::calculate(hash, m_EffectiveSampleVariances[id]); @@ -295,37 +216,28 @@ uint64_t CSampleCounts::checksum(const CDataGatherer &gatherer) const return maths::CChecksum::calculate(0, hashes); } -void CSampleCounts::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ +void CSampleCounts::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CSampleCounts"); core::CMemoryDebug::dynamicSize("m_SampleCounts", m_SampleCounts, mem); - core::CMemoryDebug::dynamicSize("m_MeanNonZeroBucketCounts", - m_MeanNonZeroBucketCounts, mem); - core::CMemoryDebug::dynamicSize("m_EffectiveSampleVariances", - m_EffectiveSampleVariances, mem); + core::CMemoryDebug::dynamicSize("m_MeanNonZeroBucketCounts", m_MeanNonZeroBucketCounts, mem); + core::CMemoryDebug::dynamicSize("m_EffectiveSampleVariances", m_EffectiveSampleVariances, mem); } -std::size_t CSampleCounts::memoryUsage() const -{ +std::size_t CSampleCounts::memoryUsage() const { std::size_t mem = core::CMemory::dynamicSize(m_SampleCounts); mem += core::CMemory::dynamicSize(m_MeanNonZeroBucketCounts); mem += core::CMemory::dynamicSize(m_EffectiveSampleVariances); return mem; } -void CSampleCounts::clear() -{ +void CSampleCounts::clear() { m_SampleCounts.clear(); m_MeanNonZeroBucketCounts.clear(); m_EffectiveSampleVariances.clear(); } -const std::string &CSampleCounts::name(const CDataGatherer &gatherer, - std::size_t id) const -{ - return gatherer.isPopulation() ? - gatherer.attributeName(id) : - gatherer.personName(id); +const std::string& CSampleCounts::name(const CDataGatherer& gatherer, std::size_t id) const { + return gatherer.isPopulation() ? gatherer.attributeName(id) : gatherer.personName(id); } } // model diff --git a/lib/model/CSearchKey.cc b/lib/model/CSearchKey.cc index f50a7cbce0..29c4ba74e3 100644 --- a/lib/model/CSearchKey.cc +++ b/lib/model/CSearchKey.cc @@ -25,14 +25,10 @@ #include #include +namespace ml { +namespace model { -namespace ml -{ -namespace model -{ - -namespace -{ +namespace { // CSearchKey const std::string FUNCTION_NAME_TAG("a"); @@ -49,13 +45,11 @@ const std::string IDENTIFIER_TAG("i"); const std::string KEY_TAG("a"); const std::string EMPTY_STRING; - } - // Initialise statics const std::string CSearchKey::COUNT_NAME("count"); -const char CSearchKey::CUE_DELIMITER('/'); +const char CSearchKey::CUE_DELIMITER('/'); const std::string CSearchKey::EMPTY_STRING; CSearchKey::CSearchKey(int identifier, @@ -66,113 +60,68 @@ CSearchKey::CSearchKey(int identifier, std::string byFieldName, std::string overFieldName, std::string partitionFieldName, - const TStrVec &influenceFieldNames) - : m_Identifier(identifier), - m_Function(function), - m_UseNull(useNull), - m_ExcludeFrequent(excludeFrequent), - m_Hash(0) -{ + const TStrVec& influenceFieldNames) + : m_Identifier(identifier), m_Function(function), m_UseNull(useNull), m_ExcludeFrequent(excludeFrequent), m_Hash(0) { m_FieldName = CStringStore::names().get(fieldName); m_ByFieldName = CStringStore::names().get(byFieldName); m_OverFieldName = CStringStore::names().get(overFieldName); m_PartitionFieldName = CStringStore::names().get(partitionFieldName); - for (TStrVec::const_iterator i = influenceFieldNames.begin(); i != influenceFieldNames.end(); ++i) - { + for (TStrVec::const_iterator i = influenceFieldNames.begin(); i != influenceFieldNames.end(); ++i) { m_InfluenceFieldNames.push_back(CStringStore::influencers().get(*i)); } } -CSearchKey::CSearchKey(core::CStateRestoreTraverser &traverser, - bool &successful) - : m_Identifier(0), - m_Function(function_t::E_IndividualCount), - m_UseNull(false), - m_ExcludeFrequent(model_t::E_XF_None), - m_Hash(0) -{ - successful = traverser.traverseSubLevel(boost::bind(&CSearchKey::acceptRestoreTraverser, - this, - _1)); +CSearchKey::CSearchKey(core::CStateRestoreTraverser& traverser, bool& successful) + : m_Identifier(0), m_Function(function_t::E_IndividualCount), m_UseNull(false), m_ExcludeFrequent(model_t::E_XF_None), m_Hash(0) { + successful = traverser.traverseSubLevel(boost::bind(&CSearchKey::acceptRestoreTraverser, this, _1)); } -bool CSearchKey::acceptRestoreTraverser(core::CStateRestoreTraverser &traverser) -{ - do - { - const std::string &name = traverser.name(); - if (name == IDENTIFIER_TAG) - { - if (core::CStringUtils::stringToType(traverser.value(), - m_Identifier) == false) - { +bool CSearchKey::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name = traverser.name(); + if (name == IDENTIFIER_TAG) { + if (core::CStringUtils::stringToType(traverser.value(), m_Identifier) == false) { LOG_ERROR("Invalid identifier in " << traverser.value()); return false; } - } - else if (name == FUNCTION_NAME_TAG) - { + } else if (name == FUNCTION_NAME_TAG) { int function(-1); - if (core::CStringUtils::stringToType(traverser.value(), - function) == false - || function < 0) - { + if (core::CStringUtils::stringToType(traverser.value(), function) == false || function < 0) { LOG_ERROR("Invalid function in " << traverser.value()); return false; } m_Function = static_cast(function); - } - else if (name == USE_NULL_TAG) - { + } else if (name == USE_NULL_TAG) { int useNull(-1); - if (core::CStringUtils::stringToType(traverser.value(), - useNull) == false) - { + if (core::CStringUtils::stringToType(traverser.value(), useNull) == false) { LOG_ERROR("Invalid use null flag in " << traverser.value()); return false; } m_UseNull = (useNull != 0); - } - else if (name == EXCLUDE_FREQUENT_TAG) - { + } else if (name == EXCLUDE_FREQUENT_TAG) { int excludeFrequent(-1); - if ((core::CStringUtils::stringToType(traverser.value(), - excludeFrequent) == false) || - (excludeFrequent < 0)) - { + if ((core::CStringUtils::stringToType(traverser.value(), excludeFrequent) == false) || (excludeFrequent < 0)) { LOG_ERROR("Invalid excludeFrequent flag in " << traverser.value()); return false; } m_ExcludeFrequent = static_cast(excludeFrequent); - } - else if (name == FIELD_NAME_TAG) - { + } else if (name == FIELD_NAME_TAG) { m_FieldName = CStringStore::names().get(traverser.value()); - } - else if (name == BY_FIELD_NAME_TAG) - { + } else if (name == BY_FIELD_NAME_TAG) { m_ByFieldName = CStringStore::names().get(traverser.value()); - } - else if (name == OVER_FIELD_NAME_TAG) - { + } else if (name == OVER_FIELD_NAME_TAG) { m_OverFieldName = CStringStore::names().get(traverser.value()); - } - else if (name == PARTITION_FIELD_NAME_TAG) - { + } else if (name == PARTITION_FIELD_NAME_TAG) { m_PartitionFieldName = CStringStore::names().get(traverser.value()); - } - else if (name == INFLUENCE_FIELD_NAME_TAG) - { + } else if (name == INFLUENCE_FIELD_NAME_TAG) { m_InfluenceFieldNames.push_back(CStringStore::influencers().get(traverser.value())); } - } - while (traverser.next()); + } while (traverser.next()); return true; } -void CSearchKey::acceptPersistInserter(core::CStatePersistInserter &inserter) const -{ +void CSearchKey::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(IDENTIFIER_TAG, m_Identifier); inserter.insertValue(FUNCTION_NAME_TAG, static_cast(m_Function)); inserter.insertValue(USE_NULL_TAG, static_cast(m_UseNull)); @@ -181,15 +130,12 @@ void CSearchKey::acceptPersistInserter(core::CStatePersistInserter &inserter) co inserter.insertValue(BY_FIELD_NAME_TAG, *m_ByFieldName); inserter.insertValue(OVER_FIELD_NAME_TAG, *m_OverFieldName); inserter.insertValue(PARTITION_FIELD_NAME_TAG, *m_PartitionFieldName); - for (std::size_t i = 0u; i < m_InfluenceFieldNames.size(); ++i) - { - inserter.insertValue(INFLUENCE_FIELD_NAME_TAG, - *m_InfluenceFieldNames[i]); + for (std::size_t i = 0u; i < m_InfluenceFieldNames.size(); ++i) { + inserter.insertValue(INFLUENCE_FIELD_NAME_TAG, *m_InfluenceFieldNames[i]); } } -void CSearchKey::swap(CSearchKey &other) -{ +void CSearchKey::swap(CSearchKey& other) { std::swap(m_Identifier, other.m_Identifier); std::swap(m_Function, other.m_Function); std::swap(m_UseNull, other.m_UseNull); @@ -202,20 +148,13 @@ void CSearchKey::swap(CSearchKey &other) std::swap(m_Hash, other.m_Hash); } -bool CSearchKey::operator==(const CSearchKey &rhs) const -{ +bool CSearchKey::operator==(const CSearchKey& rhs) const { using TStrEqualTo = std::equal_to; - return this->hash() == rhs.hash() - && m_Identifier == rhs.m_Identifier - && m_Function == rhs.m_Function - && m_UseNull == rhs.m_UseNull - && m_ExcludeFrequent == rhs.m_ExcludeFrequent - && m_FieldName == rhs.m_FieldName - && m_ByFieldName == rhs.m_ByFieldName - && m_OverFieldName == rhs.m_OverFieldName - && m_PartitionFieldName == rhs.m_PartitionFieldName - && m_InfluenceFieldNames.size() == rhs.m_InfluenceFieldNames.size() + return this->hash() == rhs.hash() && m_Identifier == rhs.m_Identifier && m_Function == rhs.m_Function && m_UseNull == rhs.m_UseNull && + m_ExcludeFrequent == rhs.m_ExcludeFrequent && m_FieldName == rhs.m_FieldName && m_ByFieldName == rhs.m_ByFieldName && + m_OverFieldName == rhs.m_OverFieldName && m_PartitionFieldName == rhs.m_PartitionFieldName && + m_InfluenceFieldNames.size() == rhs.m_InfluenceFieldNames.size() // Compare dereferenced strings rather than pointers as there's a // (small) possibility that the string store will not always return // the same pointer for the same string @@ -225,50 +164,37 @@ bool CSearchKey::operator==(const CSearchKey &rhs) const core::CFunctional::SDereference()); } -bool CSearchKey::operator<(const CSearchKey &rhs) const -{ - if (this->hash() == rhs.hash()) - { - if (m_Identifier == rhs.m_Identifier) - { - if (m_Function == rhs.m_Function) - { - if (m_UseNull == rhs.m_UseNull) - { - if (m_ExcludeFrequent == rhs.m_ExcludeFrequent) - { +bool CSearchKey::operator<(const CSearchKey& rhs) const { + if (this->hash() == rhs.hash()) { + if (m_Identifier == rhs.m_Identifier) { + if (m_Function == rhs.m_Function) { + if (m_UseNull == rhs.m_UseNull) { + if (m_ExcludeFrequent == rhs.m_ExcludeFrequent) { // Use compare() to calculate equality and less than in one call int comp(m_FieldName->compare(*rhs.m_FieldName)); - if (comp != 0) - { + if (comp != 0) { return comp < 0; } comp = m_ByFieldName->compare(*rhs.m_ByFieldName); - if (comp != 0) - { + if (comp != 0) { return comp < 0; } comp = m_OverFieldName->compare(*rhs.m_OverFieldName); - if (comp != 0) - { + if (comp != 0) { return comp < 0; } - if (m_InfluenceFieldNames.size() < rhs.m_InfluenceFieldNames.size()) - { + if (m_InfluenceFieldNames.size() < rhs.m_InfluenceFieldNames.size()) { return true; } - if (m_InfluenceFieldNames.size() > rhs.m_InfluenceFieldNames.size()) - { + if (m_InfluenceFieldNames.size() > rhs.m_InfluenceFieldNames.size()) { return false; } - for (std::size_t i = 0u; i < m_InfluenceFieldNames.size(); ++i) - { + for (std::size_t i = 0u; i < m_InfluenceFieldNames.size(); ++i) { comp = m_InfluenceFieldNames[i]->compare(*rhs.m_InfluenceFieldNames[i]); - if (comp != 0) - { + if (comp != 0) { return comp < 0; } } @@ -291,8 +217,7 @@ bool CSearchKey::operator<(const CSearchKey &rhs) const return this->hash() < rhs.hash(); } -namespace -{ +namespace { // This is keyed on a 'by' field name of 'count', which isn't allowed // in a real field config, as it doesn't make sense. @@ -302,44 +227,32 @@ const CSearchKey SIMPLE_COUNT_KEY(0, // identifier model_t::E_XF_None, EMPTY_STRING, CSearchKey::COUNT_NAME); - } -const CSearchKey &CSearchKey::simpleCountKey() -{ +const CSearchKey& CSearchKey::simpleCountKey() { return SIMPLE_COUNT_KEY; } -bool CSearchKey::isSimpleCount() const -{ +bool CSearchKey::isSimpleCount() const { return isSimpleCount(m_Function, *m_ByFieldName); } -bool CSearchKey::isSimpleCount(function_t::EFunction function, - const std::string &byFieldName) -{ - return function == function_t::E_IndividualCount && - byFieldName == COUNT_NAME; +bool CSearchKey::isSimpleCount(function_t::EFunction function, const std::string& byFieldName) { + return function == function_t::E_IndividualCount && byFieldName == COUNT_NAME; } -bool CSearchKey::isMetric() const -{ +bool CSearchKey::isMetric() const { return function_t::isMetric(m_Function); } -bool CSearchKey::isPopulation() const -{ +bool CSearchKey::isPopulation() const { return function_t::isPopulation(m_Function); } -std::string CSearchKey::toCue() const -{ +std::string CSearchKey::toCue() const { std::string cue; cue.reserve(64 + // hopefully covers function description and slashes - m_FieldName->length() + - m_ByFieldName->length() + - m_OverFieldName->length() + - m_PartitionFieldName->length()); + m_FieldName->length() + m_ByFieldName->length() + m_OverFieldName->length() + m_PartitionFieldName->length()); cue += function_t::print(m_Function); cue += CUE_DELIMITER; cue += m_UseNull ? '1' : '0'; @@ -349,12 +262,10 @@ std::string CSearchKey::toCue() const cue += *m_FieldName; cue += CUE_DELIMITER; cue += *m_ByFieldName; - if (!m_OverFieldName->empty() || !m_PartitionFieldName->empty()) - { + if (!m_OverFieldName->empty() || !m_PartitionFieldName->empty()) { cue += CUE_DELIMITER; cue += *m_OverFieldName; - if (!m_PartitionFieldName->empty()) - { + if (!m_PartitionFieldName->empty()) { cue += CUE_DELIMITER; cue += *m_PartitionFieldName; } @@ -362,70 +273,54 @@ std::string CSearchKey::toCue() const return cue; } -std::string CSearchKey::debug() const -{ +std::string CSearchKey::debug() const { std::ostringstream strm; strm << *this; return strm.str(); } -int CSearchKey::identifier() const -{ +int CSearchKey::identifier() const { return m_Identifier; } -function_t::EFunction CSearchKey::function() const -{ +function_t::EFunction CSearchKey::function() const { return m_Function; } -bool CSearchKey::useNull() const -{ +bool CSearchKey::useNull() const { return m_UseNull; } -model_t::EExcludeFrequent CSearchKey::excludeFrequent() const -{ +model_t::EExcludeFrequent CSearchKey::excludeFrequent() const { return m_ExcludeFrequent; } -bool CSearchKey::hasField(const std::string &name) const -{ - return *m_PartitionFieldName == name - || *m_OverFieldName == name - || *m_ByFieldName == name - || *m_FieldName == name; +bool CSearchKey::hasField(const std::string& name) const { + return *m_PartitionFieldName == name || *m_OverFieldName == name || *m_ByFieldName == name || *m_FieldName == name; } -const std::string &CSearchKey::fieldName() const -{ +const std::string& CSearchKey::fieldName() const { return *m_FieldName; } -const std::string &CSearchKey::byFieldName() const -{ +const std::string& CSearchKey::byFieldName() const { return *m_ByFieldName; } -const std::string &CSearchKey::overFieldName() const -{ +const std::string& CSearchKey::overFieldName() const { return *m_OverFieldName; } -const std::string &CSearchKey::partitionFieldName() const -{ +const std::string& CSearchKey::partitionFieldName() const { return *m_PartitionFieldName; } -const CSearchKey::TStoredStringPtrVec &CSearchKey::influenceFieldNames() const -{ +const CSearchKey::TStoredStringPtrVec& CSearchKey::influenceFieldNames() const { return m_InfluenceFieldNames; } -uint64_t CSearchKey::hash() const -{ - if (m_Hash != 0) - { +uint64_t CSearchKey::hash() const { + if (m_Hash != 0) { return m_Hash; } m_Hash = m_UseNull ? 1 : 0; @@ -441,32 +336,16 @@ uint64_t CSearchKey::hash() const return m_Hash; } -std::ostream &operator<<(std::ostream &strm, const CSearchKey &key) -{ +std::ostream& operator<<(std::ostream& strm, const CSearchKey& key) { // The format for this is very similar to the format used by toCue() at the // time of writing. However, do NOT combine the code because the intention // is to simplify toCue() in the future. - strm << key.m_Identifier - << "==" - << function_t::print(key.m_Function) - << '/' - << (key.m_UseNull ? '1' : '0') - << '/' - << static_cast(key.m_ExcludeFrequent) - << '/' - << *key.m_FieldName - << '/' - << *key.m_ByFieldName - << '/' - << *key.m_OverFieldName - << '/' - << *key.m_PartitionFieldName - << '/'; - - for (size_t i = 0; i < key.m_InfluenceFieldNames.size(); ++i) - { - if (i > 0) - { + strm << key.m_Identifier << "==" << function_t::print(key.m_Function) << '/' << (key.m_UseNull ? '1' : '0') << '/' + << static_cast(key.m_ExcludeFrequent) << '/' << *key.m_FieldName << '/' << *key.m_ByFieldName << '/' << *key.m_OverFieldName + << '/' << *key.m_PartitionFieldName << '/'; + + for (size_t i = 0; i < key.m_InfluenceFieldNames.size(); ++i) { + if (i > 0) { strm << ','; } strm << *key.m_InfluenceFieldNames[i]; @@ -474,8 +353,5 @@ std::ostream &operator<<(std::ostream &strm, const CSearchKey &key) return strm; } - - } } - diff --git a/lib/model/CSimpleCountDetector.cc b/lib/model/CSimpleCountDetector.cc index 45aaa430be..8d35f7b7c0 100644 --- a/lib/model/CSimpleCountDetector.cc +++ b/lib/model/CSimpleCountDetector.cc @@ -9,27 +9,18 @@ #include - -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { CSimpleCountDetector::CSimpleCountDetector(int detectorIndex, model_t::ESummaryMode summaryMode, - const CAnomalyDetectorModelConfig &modelConfig, - CLimits &limits, - const std::string &partitionFieldValue, + const CAnomalyDetectorModelConfig& modelConfig, + CLimits& limits, + const std::string& partitionFieldValue, core_t::TTime firstTime, - const TModelFactoryCPtr &modelFactory) - : CAnomalyDetector(detectorIndex, - limits, - modelConfig, - partitionFieldValue, - firstTime, - modelFactory), - m_FieldValues(summaryMode == model_t::E_None ? 1 : 2) -{ + const TModelFactoryCPtr& modelFactory) + : CAnomalyDetector(detectorIndex, limits, modelConfig, partitionFieldValue, firstTime, modelFactory), + m_FieldValues(summaryMode == model_t::E_None ? 1 : 2) { // We use a single event rate detector to maintain the counts, and for the // special case of the simple count detector, we'll create it before we've // seen any events, so that in the extreme case of no events in a search @@ -37,36 +28,26 @@ CSimpleCountDetector::CSimpleCountDetector(int detectorIndex, this->initSimpleCounting(); } -CSimpleCountDetector::CSimpleCountDetector(bool isForPersistence, - const CAnomalyDetector &other) - : CAnomalyDetector(isForPersistence, other) -{ +CSimpleCountDetector::CSimpleCountDetector(bool isForPersistence, const CAnomalyDetector& other) + : CAnomalyDetector(isForPersistence, other) { } -bool CSimpleCountDetector::isSimpleCount() const -{ +bool CSimpleCountDetector::isSimpleCount() const { return true; } -void CSimpleCountDetector::pruneModels() -{ +void CSimpleCountDetector::pruneModels() { return; } -const CAnomalyDetector::TStrCPtrVec & -CSimpleCountDetector::preprocessFieldValues(const TStrCPtrVec &fieldValues) -{ +const CAnomalyDetector::TStrCPtrVec& CSimpleCountDetector::preprocessFieldValues(const TStrCPtrVec& fieldValues) { // The first field value is always the magic word "count", but for // summarised input we need to pass on the true value of the second field - if (m_FieldValues.size() > 1) - { + if (m_FieldValues.size() > 1) { m_FieldValues[1] = (fieldValues.size() > 1) ? fieldValues[1] : &EMPTY_STRING; } return m_FieldValues; } - - } } - diff --git a/lib/model/CStringStore.cc b/lib/model/CStringStore.cc index 754e4e0325..9edd640deb 100644 --- a/lib/model/CStringStore.cc +++ b/lib/model/CStringStore.cc @@ -6,75 +6,60 @@ #include +#include #include #include #include #include -#include #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { -namespace -{ +namespace { //! \brief Helper class to hash a std::string. -struct SStrHash -{ - std::size_t operator()(const std::string &key) const - { +struct SStrHash { + std::size_t operator()(const std::string& key) const { boost::hash hasher; return hasher(key); } } STR_HASH; //! \brief Helper class to compare a std::string and a CStoredStringPtr. -struct SStrStoredStringPtrEqual -{ - bool operator()(const std::string &lhs, const core::CStoredStringPtr &rhs) const - { - return lhs == *rhs; - } +struct SStrStoredStringPtrEqual { + bool operator()(const std::string& lhs, const core::CStoredStringPtr& rhs) const { return lhs == *rhs; } } STR_EQUAL; // To ensure the singletons are constructed before multiple threads may // require them call instance() during the static initialisation phase // of the program. Of course, the instance may already be constructed // before this if another static object has used it. -const CStringStore &DO_NOT_USE_THIS_VARIABLE = CStringStore::names(); -const CStringStore &DO_NOT_USE_THIS_VARIABLE_EITHER = CStringStore::influencers(); - +const CStringStore& DO_NOT_USE_THIS_VARIABLE = CStringStore::names(); +const CStringStore& DO_NOT_USE_THIS_VARIABLE_EITHER = CStringStore::influencers(); } -void CStringStore::tidyUpNotThreadSafe() -{ +void CStringStore::tidyUpNotThreadSafe() { names().pruneRemovedNotThreadSafe(); influencers().pruneNotThreadSafe(); } -CStringStore &CStringStore::names() -{ +CStringStore& CStringStore::names() { static CStringStore namesInstance; return namesInstance; } -CStringStore &CStringStore::influencers() -{ +CStringStore& CStringStore::influencers() { static CStringStore influencersInstance; return influencersInstance; } -const core::CStoredStringPtr &CStringStore::getEmpty() const -{ +const core::CStoredStringPtr& CStringStore::getEmpty() const { return m_EmptyString; } -core::CStoredStringPtr CStringStore::get(const std::string &value) -{ +core::CStoredStringPtr CStringStore::get(const std::string& value) { // This section is expected to be performed frequently. // // We ensure either: @@ -86,42 +71,33 @@ core::CStoredStringPtr CStringStore::get(const std::string &value) // We "leak" strings if there is contention between reading and writing, // which is expected to be rare because inserts are expected to be rare. - if (value.empty()) - { + if (value.empty()) { return m_EmptyString; } core::CStoredStringPtr result; m_Reading.fetch_add(1, std::memory_order_release); - if (m_Writing.load(std::memory_order_consume) == 0) - { + if (m_Writing.load(std::memory_order_consume) == 0) { auto i = m_Strings.find(value, STR_HASH, STR_EQUAL); - if (i != m_Strings.end()) - { + if (i != m_Strings.end()) { result = *i; m_Reading.fetch_sub(1, std::memory_order_release); - } - else - { + } else { m_Writing.fetch_add(1, std::memory_order_acq_rel); // NB: fetch_sub() returns the OLD value, and we know we added 1 in // this thread, hence the test for 1 rather than 0 - if (m_Reading.fetch_sub(1, std::memory_order_release) == 1) - { + if (m_Reading.fetch_sub(1, std::memory_order_release) == 1) { // This section is expected to occur infrequently so inserts // are synchronized with a mutex. core::CScopedFastLock lock(m_Mutex); auto ret = m_Strings.insert(core::CStoredStringPtr::makeStoredString(value)); result = *ret.first; - if (ret.second) - { + if (ret.second) { m_StoredStringsMemUse += result.actualMemoryUsage(); } m_Writing.fetch_sub(1, std::memory_order_release); - } - else - { + } else { m_Writing.fetch_sub(1, std::memory_order_relaxed); // This is leaked in the sense that it will never be shared and // won't count towards our reported memory usage. But it is not @@ -130,9 +106,7 @@ core::CStoredStringPtr CStringStore::get(const std::string &value) result = core::CStoredStringPtr::makeStoredString(value); } } - } - else - { + } else { m_Reading.fetch_sub(1, std::memory_order_relaxed); // This is leaked in the sense that it will never be shared and won't // count towards our reported memory usage. But it is not leaked @@ -144,20 +118,16 @@ core::CStoredStringPtr CStringStore::get(const std::string &value) return result; } -void CStringStore::remove(const std::string &value) -{ +void CStringStore::remove(const std::string& value) { core::CScopedFastLock lock(m_Mutex); m_Removed.push_back(value); } -void CStringStore::pruneRemovedNotThreadSafe() -{ +void CStringStore::pruneRemovedNotThreadSafe() { core::CScopedFastLock lock(m_Mutex); - for (const auto &removed : m_Removed) - { + for (const auto& removed : m_Removed) { auto i = m_Strings.find(removed, STR_HASH, STR_EQUAL); - if (i != m_Strings.end() && i->isUnique()) - { + if (i != m_Strings.end() && i->isUnique()) { m_StoredStringsMemUse -= i->actualMemoryUsage(); m_Strings.erase(i); } @@ -165,30 +135,22 @@ void CStringStore::pruneRemovedNotThreadSafe() m_Removed.clear(); } -void CStringStore::pruneNotThreadSafe() -{ +void CStringStore::pruneNotThreadSafe() { core::CScopedFastLock lock(m_Mutex); - for (auto i = m_Strings.begin(); i != m_Strings.end(); /**/) - { - if (i->isUnique()) - { + for (auto i = m_Strings.begin(); i != m_Strings.end(); /**/) { + if (i->isUnique()) { m_StoredStringsMemUse -= i->actualMemoryUsage(); i = m_Strings.erase(i); - } - else - { + } else { ++i; } } } -void CStringStore::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const -{ - mem->setName( - this == &CStringStore::names() ? "names StringStore" : - (this == &CStringStore::influencers() ? "influencers StringStore" : - "unknown StringStore") - ); +void CStringStore::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { + mem->setName(this == &CStringStore::names() + ? "names StringStore" + : (this == &CStringStore::influencers() ? "influencers StringStore" : "unknown StringStore")); mem->addItem("empty string ptr", m_EmptyString.actualMemoryUsage()); core::CScopedFastLock lock(m_Mutex); core::CMemoryDebug::dynamicSize("stored strings", m_Strings, mem); @@ -196,8 +158,7 @@ void CStringStore::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) con mem->addItem("stored string ptr memory", m_StoredStringsMemUse); } -std::size_t CStringStore::memoryUsage() const -{ +std::size_t CStringStore::memoryUsage() const { std::size_t mem = m_EmptyString.actualMemoryUsage(); core::CScopedFastLock lock(m_Mutex); // The assumption here is that the existence of @@ -215,15 +176,10 @@ std::size_t CStringStore::memoryUsage() const } CStringStore::CStringStore() - : m_Reading(0), - m_Writing(0), - m_EmptyString(core::CStoredStringPtr::makeStoredString(std::string())), - m_StoredStringsMemUse(0) -{ + : m_Reading(0), m_Writing(0), m_EmptyString(core::CStoredStringPtr::makeStoredString(std::string())), m_StoredStringsMemUse(0) { } -void CStringStore::clearEverythingTestOnly() -{ +void CStringStore::clearEverythingTestOnly() { // For tests that assert on memory usage it's important that these // containers get returned to the state of a default constructed container TStoredStringPtrUSet emptySet; @@ -235,4 +191,3 @@ void CStringStore::clearEverythingTestOnly() } // model } // ml - diff --git a/lib/model/FrequencyPredicates.cc b/lib/model/FrequencyPredicates.cc index 05e5cef8cb..de665a7056 100644 --- a/lib/model/FrequencyPredicates.cc +++ b/lib/model/FrequencyPredicates.cc @@ -6,24 +6,15 @@ #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { -CPersonFrequencyGreaterThan::CPersonFrequencyGreaterThan(const CAnomalyDetectorModel &model, - double threshold) : - m_Model(&model), - m_Threshold(threshold) -{ +CPersonFrequencyGreaterThan::CPersonFrequencyGreaterThan(const CAnomalyDetectorModel& model, double threshold) + : m_Model(&model), m_Threshold(threshold) { } -CAttributeFrequencyGreaterThan::CAttributeFrequencyGreaterThan(const CAnomalyDetectorModel &model, - double threshold) : - m_Model(&model), - m_Threshold(threshold) -{ +CAttributeFrequencyGreaterThan::CAttributeFrequencyGreaterThan(const CAnomalyDetectorModel& model, double threshold) + : m_Model(&model), m_Threshold(threshold) { } - } } diff --git a/lib/model/FunctionTypes.cc b/lib/model/FunctionTypes.cc index dcca4fbfd8..53505d6a43 100644 --- a/lib/model/FunctionTypes.cc +++ b/lib/model/FunctionTypes.cc @@ -13,900 +13,627 @@ #include - -namespace ml -{ -namespace model -{ -namespace function_t -{ +namespace ml { +namespace model { +namespace function_t { using TFeatureVec = model_t::TFeatureVec; -bool isIndividual(EFunction function) -{ - switch (function) - { - case E_IndividualCount: - case E_IndividualNonZeroCount: - case E_IndividualRareCount: - case E_IndividualRareNonZeroCount: - case E_IndividualRare: - case E_IndividualLowCounts: - case E_IndividualHighCounts: - case E_IndividualLowNonZeroCount: - case E_IndividualHighNonZeroCount: - case E_IndividualDistinctCount: - case E_IndividualLowDistinctCount: - case E_IndividualHighDistinctCount: - case E_IndividualInfoContent: - case E_IndividualHighInfoContent: - case E_IndividualLowInfoContent: - case E_IndividualTimeOfDay: - case E_IndividualTimeOfWeek: - case E_IndividualMetric: - case E_IndividualMetricMean: - case E_IndividualMetricLowMean: - case E_IndividualMetricHighMean: - case E_IndividualMetricMedian: - case E_IndividualMetricLowMedian: - case E_IndividualMetricHighMedian: - case E_IndividualMetricMin: - case E_IndividualMetricMax: - case E_IndividualMetricVariance: - case E_IndividualMetricLowVariance: - case E_IndividualMetricHighVariance: - case E_IndividualMetricSum: - case E_IndividualMetricLowSum: - case E_IndividualMetricHighSum: - case E_IndividualMetricNonNullSum: - case E_IndividualMetricLowNonNullSum: - case E_IndividualMetricHighNonNullSum: - case E_IndividualLatLong: - case E_IndividualMaxVelocity: - case E_IndividualMinVelocity: - case E_IndividualMeanVelocity: - case E_IndividualSumVelocity: - return true; - - case E_PopulationCount: - case E_PopulationDistinctCount: - case E_PopulationLowDistinctCount: - case E_PopulationHighDistinctCount: - case E_PopulationRare: - case E_PopulationRareCount: - case E_PopulationFreqRare: - case E_PopulationFreqRareCount: - case E_PopulationLowCounts: - case E_PopulationHighCounts: - case E_PopulationInfoContent: - case E_PopulationLowInfoContent: - case E_PopulationHighInfoContent: - case E_PopulationTimeOfDay: - case E_PopulationTimeOfWeek: - case E_PopulationMetric: - case E_PopulationMetricMean: - case E_PopulationMetricLowMean: - case E_PopulationMetricHighMean: - case E_PopulationMetricMedian: - case E_PopulationMetricLowMedian: - case E_PopulationMetricHighMedian: - case E_PopulationMetricMin: - case E_PopulationMetricMax: - case E_PopulationMetricVariance: - case E_PopulationMetricLowVariance: - case E_PopulationMetricHighVariance: - case E_PopulationMetricSum: - case E_PopulationMetricLowSum: - case E_PopulationMetricHighSum: - case E_PopulationLatLong: - case E_PopulationMaxVelocity: - case E_PopulationMinVelocity: - case E_PopulationMeanVelocity: - case E_PopulationSumVelocity: - return false; - - case E_PeersCount: - case E_PeersLowCounts: - case E_PeersHighCounts: - case E_PeersDistinctCount: - case E_PeersLowDistinctCount: - case E_PeersHighDistinctCount: - case E_PeersInfoContent: - case E_PeersLowInfoContent: - case E_PeersHighInfoContent: - case E_PeersTimeOfDay: - case E_PeersTimeOfWeek: - return false; +bool isIndividual(EFunction function) { + switch (function) { + case E_IndividualCount: + case E_IndividualNonZeroCount: + case E_IndividualRareCount: + case E_IndividualRareNonZeroCount: + case E_IndividualRare: + case E_IndividualLowCounts: + case E_IndividualHighCounts: + case E_IndividualLowNonZeroCount: + case E_IndividualHighNonZeroCount: + case E_IndividualDistinctCount: + case E_IndividualLowDistinctCount: + case E_IndividualHighDistinctCount: + case E_IndividualInfoContent: + case E_IndividualHighInfoContent: + case E_IndividualLowInfoContent: + case E_IndividualTimeOfDay: + case E_IndividualTimeOfWeek: + case E_IndividualMetric: + case E_IndividualMetricMean: + case E_IndividualMetricLowMean: + case E_IndividualMetricHighMean: + case E_IndividualMetricMedian: + case E_IndividualMetricLowMedian: + case E_IndividualMetricHighMedian: + case E_IndividualMetricMin: + case E_IndividualMetricMax: + case E_IndividualMetricVariance: + case E_IndividualMetricLowVariance: + case E_IndividualMetricHighVariance: + case E_IndividualMetricSum: + case E_IndividualMetricLowSum: + case E_IndividualMetricHighSum: + case E_IndividualMetricNonNullSum: + case E_IndividualMetricLowNonNullSum: + case E_IndividualMetricHighNonNullSum: + case E_IndividualLatLong: + case E_IndividualMaxVelocity: + case E_IndividualMinVelocity: + case E_IndividualMeanVelocity: + case E_IndividualSumVelocity: + return true; + + case E_PopulationCount: + case E_PopulationDistinctCount: + case E_PopulationLowDistinctCount: + case E_PopulationHighDistinctCount: + case E_PopulationRare: + case E_PopulationRareCount: + case E_PopulationFreqRare: + case E_PopulationFreqRareCount: + case E_PopulationLowCounts: + case E_PopulationHighCounts: + case E_PopulationInfoContent: + case E_PopulationLowInfoContent: + case E_PopulationHighInfoContent: + case E_PopulationTimeOfDay: + case E_PopulationTimeOfWeek: + case E_PopulationMetric: + case E_PopulationMetricMean: + case E_PopulationMetricLowMean: + case E_PopulationMetricHighMean: + case E_PopulationMetricMedian: + case E_PopulationMetricLowMedian: + case E_PopulationMetricHighMedian: + case E_PopulationMetricMin: + case E_PopulationMetricMax: + case E_PopulationMetricVariance: + case E_PopulationMetricLowVariance: + case E_PopulationMetricHighVariance: + case E_PopulationMetricSum: + case E_PopulationMetricLowSum: + case E_PopulationMetricHighSum: + case E_PopulationLatLong: + case E_PopulationMaxVelocity: + case E_PopulationMinVelocity: + case E_PopulationMeanVelocity: + case E_PopulationSumVelocity: + return false; + + case E_PeersCount: + case E_PeersLowCounts: + case E_PeersHighCounts: + case E_PeersDistinctCount: + case E_PeersLowDistinctCount: + case E_PeersHighDistinctCount: + case E_PeersInfoContent: + case E_PeersLowInfoContent: + case E_PeersHighInfoContent: + case E_PeersTimeOfDay: + case E_PeersTimeOfWeek: + return false; } LOG_ERROR("Unexpected function = " << static_cast(function)); return false; } -bool isPopulation(EFunction function) -{ - switch (function) - { - case E_IndividualCount: - case E_IndividualNonZeroCount: - case E_IndividualRareCount: - case E_IndividualRareNonZeroCount: - case E_IndividualRare: - case E_IndividualLowCounts: - case E_IndividualHighCounts: - case E_IndividualLowNonZeroCount: - case E_IndividualHighNonZeroCount: - case E_IndividualDistinctCount: - case E_IndividualLowDistinctCount: - case E_IndividualHighDistinctCount: - case E_IndividualInfoContent: - case E_IndividualHighInfoContent: - case E_IndividualLowInfoContent: - case E_IndividualTimeOfDay: - case E_IndividualTimeOfWeek: - case E_IndividualMetric: - case E_IndividualMetricMean: - case E_IndividualMetricLowMean: - case E_IndividualMetricHighMean: - case E_IndividualMetricMedian: - case E_IndividualMetricLowMedian: - case E_IndividualMetricHighMedian: - case E_IndividualMetricMin: - case E_IndividualMetricMax: - case E_IndividualMetricVariance: - case E_IndividualMetricLowVariance: - case E_IndividualMetricHighVariance: - case E_IndividualMetricSum: - case E_IndividualMetricLowSum: - case E_IndividualMetricHighSum: - case E_IndividualMetricNonNullSum: - case E_IndividualMetricLowNonNullSum: - case E_IndividualMetricHighNonNullSum: - case E_IndividualLatLong: - case E_IndividualMaxVelocity: - case E_IndividualMinVelocity: - case E_IndividualMeanVelocity: - case E_IndividualSumVelocity: - return false; - - case E_PopulationCount: - case E_PopulationDistinctCount: - case E_PopulationLowDistinctCount: - case E_PopulationHighDistinctCount: - case E_PopulationRare: - case E_PopulationRareCount: - case E_PopulationFreqRare: - case E_PopulationFreqRareCount: - case E_PopulationLowCounts: - case E_PopulationHighCounts: - case E_PopulationInfoContent: - case E_PopulationLowInfoContent: - case E_PopulationHighInfoContent: - case E_PopulationTimeOfDay: - case E_PopulationTimeOfWeek: - case E_PopulationMetric: - case E_PopulationMetricMean: - case E_PopulationMetricLowMean: - case E_PopulationMetricHighMean: - case E_PopulationMetricMedian: - case E_PopulationMetricLowMedian: - case E_PopulationMetricHighMedian: - case E_PopulationMetricMin: - case E_PopulationMetricMax: - case E_PopulationMetricVariance: - case E_PopulationMetricLowVariance: - case E_PopulationMetricHighVariance: - case E_PopulationMetricSum: - case E_PopulationMetricLowSum: - case E_PopulationMetricHighSum: - case E_PopulationLatLong: - case E_PopulationMaxVelocity: - case E_PopulationMinVelocity: - case E_PopulationMeanVelocity: - case E_PopulationSumVelocity: - return true; - - case E_PeersCount: - case E_PeersLowCounts: - case E_PeersHighCounts: - case E_PeersDistinctCount: - case E_PeersLowDistinctCount: - case E_PeersHighDistinctCount: - case E_PeersInfoContent: - case E_PeersLowInfoContent: - case E_PeersHighInfoContent: - case E_PeersTimeOfDay: - case E_PeersTimeOfWeek: - return false; +bool isPopulation(EFunction function) { + switch (function) { + case E_IndividualCount: + case E_IndividualNonZeroCount: + case E_IndividualRareCount: + case E_IndividualRareNonZeroCount: + case E_IndividualRare: + case E_IndividualLowCounts: + case E_IndividualHighCounts: + case E_IndividualLowNonZeroCount: + case E_IndividualHighNonZeroCount: + case E_IndividualDistinctCount: + case E_IndividualLowDistinctCount: + case E_IndividualHighDistinctCount: + case E_IndividualInfoContent: + case E_IndividualHighInfoContent: + case E_IndividualLowInfoContent: + case E_IndividualTimeOfDay: + case E_IndividualTimeOfWeek: + case E_IndividualMetric: + case E_IndividualMetricMean: + case E_IndividualMetricLowMean: + case E_IndividualMetricHighMean: + case E_IndividualMetricMedian: + case E_IndividualMetricLowMedian: + case E_IndividualMetricHighMedian: + case E_IndividualMetricMin: + case E_IndividualMetricMax: + case E_IndividualMetricVariance: + case E_IndividualMetricLowVariance: + case E_IndividualMetricHighVariance: + case E_IndividualMetricSum: + case E_IndividualMetricLowSum: + case E_IndividualMetricHighSum: + case E_IndividualMetricNonNullSum: + case E_IndividualMetricLowNonNullSum: + case E_IndividualMetricHighNonNullSum: + case E_IndividualLatLong: + case E_IndividualMaxVelocity: + case E_IndividualMinVelocity: + case E_IndividualMeanVelocity: + case E_IndividualSumVelocity: + return false; + + case E_PopulationCount: + case E_PopulationDistinctCount: + case E_PopulationLowDistinctCount: + case E_PopulationHighDistinctCount: + case E_PopulationRare: + case E_PopulationRareCount: + case E_PopulationFreqRare: + case E_PopulationFreqRareCount: + case E_PopulationLowCounts: + case E_PopulationHighCounts: + case E_PopulationInfoContent: + case E_PopulationLowInfoContent: + case E_PopulationHighInfoContent: + case E_PopulationTimeOfDay: + case E_PopulationTimeOfWeek: + case E_PopulationMetric: + case E_PopulationMetricMean: + case E_PopulationMetricLowMean: + case E_PopulationMetricHighMean: + case E_PopulationMetricMedian: + case E_PopulationMetricLowMedian: + case E_PopulationMetricHighMedian: + case E_PopulationMetricMin: + case E_PopulationMetricMax: + case E_PopulationMetricVariance: + case E_PopulationMetricLowVariance: + case E_PopulationMetricHighVariance: + case E_PopulationMetricSum: + case E_PopulationMetricLowSum: + case E_PopulationMetricHighSum: + case E_PopulationLatLong: + case E_PopulationMaxVelocity: + case E_PopulationMinVelocity: + case E_PopulationMeanVelocity: + case E_PopulationSumVelocity: + return true; + + case E_PeersCount: + case E_PeersLowCounts: + case E_PeersHighCounts: + case E_PeersDistinctCount: + case E_PeersLowDistinctCount: + case E_PeersHighDistinctCount: + case E_PeersInfoContent: + case E_PeersLowInfoContent: + case E_PeersHighInfoContent: + case E_PeersTimeOfDay: + case E_PeersTimeOfWeek: + return false; } LOG_ERROR("Unexpected function = " << static_cast(function)); return false; } -bool isPeers(EFunction function) -{ - switch (function) - { - case E_IndividualCount: - case E_IndividualNonZeroCount: - case E_IndividualRareCount: - case E_IndividualRareNonZeroCount: - case E_IndividualRare: - case E_IndividualLowCounts: - case E_IndividualHighCounts: - case E_IndividualLowNonZeroCount: - case E_IndividualHighNonZeroCount: - case E_IndividualDistinctCount: - case E_IndividualLowDistinctCount: - case E_IndividualHighDistinctCount: - case E_IndividualInfoContent: - case E_IndividualHighInfoContent: - case E_IndividualLowInfoContent: - case E_IndividualTimeOfDay: - case E_IndividualTimeOfWeek: - case E_IndividualMetric: - case E_IndividualMetricMean: - case E_IndividualMetricLowMean: - case E_IndividualMetricHighMean: - case E_IndividualMetricMedian: - case E_IndividualMetricLowMedian: - case E_IndividualMetricHighMedian: - case E_IndividualMetricMin: - case E_IndividualMetricMax: - case E_IndividualMetricVariance: - case E_IndividualMetricLowVariance: - case E_IndividualMetricHighVariance: - case E_IndividualMetricSum: - case E_IndividualMetricLowSum: - case E_IndividualMetricHighSum: - case E_IndividualMetricNonNullSum: - case E_IndividualMetricLowNonNullSum: - case E_IndividualMetricHighNonNullSum: - case E_IndividualLatLong: - case E_IndividualMaxVelocity: - case E_IndividualMinVelocity: - case E_IndividualMeanVelocity: - case E_IndividualSumVelocity: - case E_PopulationCount: - case E_PopulationDistinctCount: - case E_PopulationLowDistinctCount: - case E_PopulationHighDistinctCount: - case E_PopulationRare: - case E_PopulationRareCount: - case E_PopulationFreqRare: - case E_PopulationFreqRareCount: - case E_PopulationLowCounts: - case E_PopulationHighCounts: - case E_PopulationInfoContent: - case E_PopulationLowInfoContent: - case E_PopulationHighInfoContent: - case E_PopulationTimeOfDay: - case E_PopulationTimeOfWeek: - case E_PopulationMetric: - case E_PopulationMetricMean: - case E_PopulationMetricLowMean: - case E_PopulationMetricHighMean: - case E_PopulationMetricMedian: - case E_PopulationMetricLowMedian: - case E_PopulationMetricHighMedian: - case E_PopulationMetricMin: - case E_PopulationMetricMax: - case E_PopulationMetricVariance: - case E_PopulationMetricLowVariance: - case E_PopulationMetricHighVariance: - case E_PopulationMetricSum: - case E_PopulationMetricLowSum: - case E_PopulationMetricHighSum: - case E_PopulationLatLong: - case E_PopulationMaxVelocity: - case E_PopulationMinVelocity: - case E_PopulationMeanVelocity: - case E_PopulationSumVelocity: - return false; - - case E_PeersCount: - case E_PeersLowCounts: - case E_PeersHighCounts: - case E_PeersDistinctCount: - case E_PeersLowDistinctCount: - case E_PeersHighDistinctCount: - case E_PeersInfoContent: - case E_PeersLowInfoContent: - case E_PeersHighInfoContent: - case E_PeersTimeOfDay: - case E_PeersTimeOfWeek: - return true; +bool isPeers(EFunction function) { + switch (function) { + case E_IndividualCount: + case E_IndividualNonZeroCount: + case E_IndividualRareCount: + case E_IndividualRareNonZeroCount: + case E_IndividualRare: + case E_IndividualLowCounts: + case E_IndividualHighCounts: + case E_IndividualLowNonZeroCount: + case E_IndividualHighNonZeroCount: + case E_IndividualDistinctCount: + case E_IndividualLowDistinctCount: + case E_IndividualHighDistinctCount: + case E_IndividualInfoContent: + case E_IndividualHighInfoContent: + case E_IndividualLowInfoContent: + case E_IndividualTimeOfDay: + case E_IndividualTimeOfWeek: + case E_IndividualMetric: + case E_IndividualMetricMean: + case E_IndividualMetricLowMean: + case E_IndividualMetricHighMean: + case E_IndividualMetricMedian: + case E_IndividualMetricLowMedian: + case E_IndividualMetricHighMedian: + case E_IndividualMetricMin: + case E_IndividualMetricMax: + case E_IndividualMetricVariance: + case E_IndividualMetricLowVariance: + case E_IndividualMetricHighVariance: + case E_IndividualMetricSum: + case E_IndividualMetricLowSum: + case E_IndividualMetricHighSum: + case E_IndividualMetricNonNullSum: + case E_IndividualMetricLowNonNullSum: + case E_IndividualMetricHighNonNullSum: + case E_IndividualLatLong: + case E_IndividualMaxVelocity: + case E_IndividualMinVelocity: + case E_IndividualMeanVelocity: + case E_IndividualSumVelocity: + case E_PopulationCount: + case E_PopulationDistinctCount: + case E_PopulationLowDistinctCount: + case E_PopulationHighDistinctCount: + case E_PopulationRare: + case E_PopulationRareCount: + case E_PopulationFreqRare: + case E_PopulationFreqRareCount: + case E_PopulationLowCounts: + case E_PopulationHighCounts: + case E_PopulationInfoContent: + case E_PopulationLowInfoContent: + case E_PopulationHighInfoContent: + case E_PopulationTimeOfDay: + case E_PopulationTimeOfWeek: + case E_PopulationMetric: + case E_PopulationMetricMean: + case E_PopulationMetricLowMean: + case E_PopulationMetricHighMean: + case E_PopulationMetricMedian: + case E_PopulationMetricLowMedian: + case E_PopulationMetricHighMedian: + case E_PopulationMetricMin: + case E_PopulationMetricMax: + case E_PopulationMetricVariance: + case E_PopulationMetricLowVariance: + case E_PopulationMetricHighVariance: + case E_PopulationMetricSum: + case E_PopulationMetricLowSum: + case E_PopulationMetricHighSum: + case E_PopulationLatLong: + case E_PopulationMaxVelocity: + case E_PopulationMinVelocity: + case E_PopulationMeanVelocity: + case E_PopulationSumVelocity: + return false; + + case E_PeersCount: + case E_PeersLowCounts: + case E_PeersHighCounts: + case E_PeersDistinctCount: + case E_PeersLowDistinctCount: + case E_PeersHighDistinctCount: + case E_PeersInfoContent: + case E_PeersLowInfoContent: + case E_PeersHighInfoContent: + case E_PeersTimeOfDay: + case E_PeersTimeOfWeek: + return true; } LOG_ERROR("Unexpected function = " << static_cast(function)); return false; } -bool isMetric(EFunction function) -{ - switch (function) - { - case E_IndividualCount: - case E_IndividualNonZeroCount: - case E_IndividualRareCount: - case E_IndividualRareNonZeroCount: - case E_IndividualRare: - case E_IndividualLowCounts: - case E_IndividualHighCounts: - case E_IndividualLowNonZeroCount: - case E_IndividualHighNonZeroCount: - case E_IndividualDistinctCount: - case E_IndividualLowDistinctCount: - case E_IndividualHighDistinctCount: - case E_IndividualInfoContent: - case E_IndividualHighInfoContent: - case E_IndividualLowInfoContent: - case E_IndividualTimeOfDay: - case E_IndividualTimeOfWeek: - return false; - - case E_IndividualMetric: - case E_IndividualMetricMean: - case E_IndividualMetricLowMean: - case E_IndividualMetricHighMean: - case E_IndividualMetricMedian: - case E_IndividualMetricLowMedian: - case E_IndividualMetricHighMedian: - case E_IndividualMetricMin: - case E_IndividualMetricMax: - case E_IndividualMetricVariance: - case E_IndividualMetricLowVariance: - case E_IndividualMetricHighVariance: - case E_IndividualMetricSum: - case E_IndividualMetricLowSum: - case E_IndividualMetricHighSum: - case E_IndividualMetricNonNullSum: - case E_IndividualMetricLowNonNullSum: - case E_IndividualMetricHighNonNullSum: - case E_IndividualLatLong: - case E_IndividualMaxVelocity: - case E_IndividualMinVelocity: - case E_IndividualMeanVelocity: - case E_IndividualSumVelocity: - return true; - - case E_PopulationCount: - case E_PopulationDistinctCount: - case E_PopulationLowDistinctCount: - case E_PopulationHighDistinctCount: - case E_PopulationRare: - case E_PopulationRareCount: - case E_PopulationFreqRare: - case E_PopulationFreqRareCount: - case E_PopulationLowCounts: - case E_PopulationHighCounts: - case E_PopulationInfoContent: - case E_PopulationLowInfoContent: - case E_PopulationHighInfoContent: - case E_PopulationTimeOfDay: - case E_PopulationTimeOfWeek: - return false; - - case E_PopulationMetric: - case E_PopulationMetricMean: - case E_PopulationMetricLowMean: - case E_PopulationMetricHighMean: - case E_PopulationMetricMedian: - case E_PopulationMetricLowMedian: - case E_PopulationMetricHighMedian: - case E_PopulationMetricMin: - case E_PopulationMetricMax: - case E_PopulationMetricVariance: - case E_PopulationMetricLowVariance: - case E_PopulationMetricHighVariance: - case E_PopulationMetricSum: - case E_PopulationMetricLowSum: - case E_PopulationMetricHighSum: - case E_PopulationLatLong: - case E_PopulationMaxVelocity: - case E_PopulationMinVelocity: - case E_PopulationMeanVelocity: - case E_PopulationSumVelocity: - return true; - - case E_PeersCount: - case E_PeersLowCounts: - case E_PeersHighCounts: - case E_PeersDistinctCount: - case E_PeersLowDistinctCount: - case E_PeersHighDistinctCount: - case E_PeersInfoContent: - case E_PeersLowInfoContent: - case E_PeersHighInfoContent: - case E_PeersTimeOfDay: - case E_PeersTimeOfWeek: - return false; +bool isMetric(EFunction function) { + switch (function) { + case E_IndividualCount: + case E_IndividualNonZeroCount: + case E_IndividualRareCount: + case E_IndividualRareNonZeroCount: + case E_IndividualRare: + case E_IndividualLowCounts: + case E_IndividualHighCounts: + case E_IndividualLowNonZeroCount: + case E_IndividualHighNonZeroCount: + case E_IndividualDistinctCount: + case E_IndividualLowDistinctCount: + case E_IndividualHighDistinctCount: + case E_IndividualInfoContent: + case E_IndividualHighInfoContent: + case E_IndividualLowInfoContent: + case E_IndividualTimeOfDay: + case E_IndividualTimeOfWeek: + return false; + + case E_IndividualMetric: + case E_IndividualMetricMean: + case E_IndividualMetricLowMean: + case E_IndividualMetricHighMean: + case E_IndividualMetricMedian: + case E_IndividualMetricLowMedian: + case E_IndividualMetricHighMedian: + case E_IndividualMetricMin: + case E_IndividualMetricMax: + case E_IndividualMetricVariance: + case E_IndividualMetricLowVariance: + case E_IndividualMetricHighVariance: + case E_IndividualMetricSum: + case E_IndividualMetricLowSum: + case E_IndividualMetricHighSum: + case E_IndividualMetricNonNullSum: + case E_IndividualMetricLowNonNullSum: + case E_IndividualMetricHighNonNullSum: + case E_IndividualLatLong: + case E_IndividualMaxVelocity: + case E_IndividualMinVelocity: + case E_IndividualMeanVelocity: + case E_IndividualSumVelocity: + return true; + + case E_PopulationCount: + case E_PopulationDistinctCount: + case E_PopulationLowDistinctCount: + case E_PopulationHighDistinctCount: + case E_PopulationRare: + case E_PopulationRareCount: + case E_PopulationFreqRare: + case E_PopulationFreqRareCount: + case E_PopulationLowCounts: + case E_PopulationHighCounts: + case E_PopulationInfoContent: + case E_PopulationLowInfoContent: + case E_PopulationHighInfoContent: + case E_PopulationTimeOfDay: + case E_PopulationTimeOfWeek: + return false; + + case E_PopulationMetric: + case E_PopulationMetricMean: + case E_PopulationMetricLowMean: + case E_PopulationMetricHighMean: + case E_PopulationMetricMedian: + case E_PopulationMetricLowMedian: + case E_PopulationMetricHighMedian: + case E_PopulationMetricMin: + case E_PopulationMetricMax: + case E_PopulationMetricVariance: + case E_PopulationMetricLowVariance: + case E_PopulationMetricHighVariance: + case E_PopulationMetricSum: + case E_PopulationMetricLowSum: + case E_PopulationMetricHighSum: + case E_PopulationLatLong: + case E_PopulationMaxVelocity: + case E_PopulationMinVelocity: + case E_PopulationMeanVelocity: + case E_PopulationSumVelocity: + return true; + + case E_PeersCount: + case E_PeersLowCounts: + case E_PeersHighCounts: + case E_PeersDistinctCount: + case E_PeersLowDistinctCount: + case E_PeersHighDistinctCount: + case E_PeersInfoContent: + case E_PeersLowInfoContent: + case E_PeersHighInfoContent: + case E_PeersTimeOfDay: + case E_PeersTimeOfWeek: + return false; } LOG_ERROR("Unexpected function = " << static_cast(function)); return false; } -bool isForecastSupported(EFunction function) -{ - switch (function) - { - case E_IndividualCount: - case E_IndividualNonZeroCount: - case E_IndividualRareCount: - case E_IndividualRareNonZeroCount: - return true; - case E_IndividualRare: - return false; - case E_IndividualLowCounts: - case E_IndividualHighCounts: - case E_IndividualLowNonZeroCount: - case E_IndividualHighNonZeroCount: - case E_IndividualDistinctCount: - case E_IndividualLowDistinctCount: - case E_IndividualHighDistinctCount: - return true; - case E_IndividualInfoContent: - case E_IndividualHighInfoContent: - case E_IndividualLowInfoContent: - case E_IndividualTimeOfDay: - case E_IndividualTimeOfWeek: - return false; - - case E_IndividualMetric: - case E_IndividualMetricMean: - case E_IndividualMetricLowMean: - case E_IndividualMetricHighMean: - case E_IndividualMetricMedian: - case E_IndividualMetricLowMedian: - case E_IndividualMetricHighMedian: - case E_IndividualMetricMin: - case E_IndividualMetricMax: - case E_IndividualMetricVariance: - case E_IndividualMetricLowVariance: - case E_IndividualMetricHighVariance: - case E_IndividualMetricSum: - case E_IndividualMetricLowSum: - case E_IndividualMetricHighSum: - case E_IndividualMetricNonNullSum: - case E_IndividualMetricLowNonNullSum: - case E_IndividualMetricHighNonNullSum: - return true; - case E_IndividualLatLong: - return false; - case E_IndividualMaxVelocity: - case E_IndividualMinVelocity: - case E_IndividualMeanVelocity: - case E_IndividualSumVelocity: - return true; - - case E_PopulationCount: - case E_PopulationDistinctCount: - case E_PopulationLowDistinctCount: - case E_PopulationHighDistinctCount: - case E_PopulationRare: - case E_PopulationRareCount: - case E_PopulationFreqRare: - case E_PopulationFreqRareCount: - case E_PopulationLowCounts: - case E_PopulationHighCounts: - case E_PopulationInfoContent: - case E_PopulationLowInfoContent: - case E_PopulationHighInfoContent: - case E_PopulationTimeOfDay: - case E_PopulationTimeOfWeek: - return false; - - case E_PopulationMetric: - case E_PopulationMetricMean: - case E_PopulationMetricLowMean: - case E_PopulationMetricHighMean: - case E_PopulationMetricMedian: - case E_PopulationMetricLowMedian: - case E_PopulationMetricHighMedian: - case E_PopulationMetricMin: - case E_PopulationMetricMax: - case E_PopulationMetricVariance: - case E_PopulationMetricLowVariance: - case E_PopulationMetricHighVariance: - case E_PopulationMetricSum: - case E_PopulationMetricLowSum: - case E_PopulationMetricHighSum: - case E_PopulationLatLong: - case E_PopulationMaxVelocity: - case E_PopulationMinVelocity: - case E_PopulationMeanVelocity: - case E_PopulationSumVelocity: - return false; - - case E_PeersCount: - case E_PeersLowCounts: - case E_PeersHighCounts: - case E_PeersDistinctCount: - case E_PeersLowDistinctCount: - case E_PeersHighDistinctCount: - case E_PeersInfoContent: - case E_PeersLowInfoContent: - case E_PeersHighInfoContent: - case E_PeersTimeOfDay: - case E_PeersTimeOfWeek: - return false; +bool isForecastSupported(EFunction function) { + switch (function) { + case E_IndividualCount: + case E_IndividualNonZeroCount: + case E_IndividualRareCount: + case E_IndividualRareNonZeroCount: + return true; + case E_IndividualRare: + return false; + case E_IndividualLowCounts: + case E_IndividualHighCounts: + case E_IndividualLowNonZeroCount: + case E_IndividualHighNonZeroCount: + case E_IndividualDistinctCount: + case E_IndividualLowDistinctCount: + case E_IndividualHighDistinctCount: + return true; + case E_IndividualInfoContent: + case E_IndividualHighInfoContent: + case E_IndividualLowInfoContent: + case E_IndividualTimeOfDay: + case E_IndividualTimeOfWeek: + return false; + + case E_IndividualMetric: + case E_IndividualMetricMean: + case E_IndividualMetricLowMean: + case E_IndividualMetricHighMean: + case E_IndividualMetricMedian: + case E_IndividualMetricLowMedian: + case E_IndividualMetricHighMedian: + case E_IndividualMetricMin: + case E_IndividualMetricMax: + case E_IndividualMetricVariance: + case E_IndividualMetricLowVariance: + case E_IndividualMetricHighVariance: + case E_IndividualMetricSum: + case E_IndividualMetricLowSum: + case E_IndividualMetricHighSum: + case E_IndividualMetricNonNullSum: + case E_IndividualMetricLowNonNullSum: + case E_IndividualMetricHighNonNullSum: + return true; + case E_IndividualLatLong: + return false; + case E_IndividualMaxVelocity: + case E_IndividualMinVelocity: + case E_IndividualMeanVelocity: + case E_IndividualSumVelocity: + return true; + + case E_PopulationCount: + case E_PopulationDistinctCount: + case E_PopulationLowDistinctCount: + case E_PopulationHighDistinctCount: + case E_PopulationRare: + case E_PopulationRareCount: + case E_PopulationFreqRare: + case E_PopulationFreqRareCount: + case E_PopulationLowCounts: + case E_PopulationHighCounts: + case E_PopulationInfoContent: + case E_PopulationLowInfoContent: + case E_PopulationHighInfoContent: + case E_PopulationTimeOfDay: + case E_PopulationTimeOfWeek: + return false; + + case E_PopulationMetric: + case E_PopulationMetricMean: + case E_PopulationMetricLowMean: + case E_PopulationMetricHighMean: + case E_PopulationMetricMedian: + case E_PopulationMetricLowMedian: + case E_PopulationMetricHighMedian: + case E_PopulationMetricMin: + case E_PopulationMetricMax: + case E_PopulationMetricVariance: + case E_PopulationMetricLowVariance: + case E_PopulationMetricHighVariance: + case E_PopulationMetricSum: + case E_PopulationMetricLowSum: + case E_PopulationMetricHighSum: + case E_PopulationLatLong: + case E_PopulationMaxVelocity: + case E_PopulationMinVelocity: + case E_PopulationMeanVelocity: + case E_PopulationSumVelocity: + return false; + + case E_PeersCount: + case E_PeersLowCounts: + case E_PeersHighCounts: + case E_PeersDistinctCount: + case E_PeersLowDistinctCount: + case E_PeersHighDistinctCount: + case E_PeersInfoContent: + case E_PeersLowInfoContent: + case E_PeersHighInfoContent: + case E_PeersTimeOfDay: + case E_PeersTimeOfWeek: + return false; } LOG_ERROR("Unexpected function = " << static_cast(function)); return false; } - -namespace -{ +namespace { using TFeatureFunctionVecMap = std::map; using TFeatureFunctionVecMapItr = TFeatureFunctionVecMap::iterator; using TFeatureFunctionVecMapCItr = TFeatureFunctionVecMap::const_iterator; -namespace detail -{ - -const model_t::EFeature INDIVIDUAL_COUNT_FEATURES[] = - { - model_t::E_IndividualCountByBucketAndPerson - }; -const model_t::EFeature INDIVIDUAL_NON_ZERO_COUNT_FEATURES[] = - { - model_t::E_IndividualNonZeroCountByBucketAndPerson - }; -const model_t::EFeature INDIVIDUAL_RARE_COUNT_FEATURES[] = - { - model_t::E_IndividualCountByBucketAndPerson, - model_t::E_IndividualTotalBucketCountByPerson, - }; -const model_t::EFeature INDIVIDUAL_RARE_NON_ZERO_COUNT_FEATURES[] = - { - model_t::E_IndividualNonZeroCountByBucketAndPerson, - model_t::E_IndividualTotalBucketCountByPerson - }; -const model_t::EFeature INDIVIDUAL_RARE_FEATURES[] = - { - model_t::E_IndividualTotalBucketCountByPerson, - model_t::E_IndividualIndicatorOfBucketPerson - }; -const model_t::EFeature INDIVIDUAL_LOW_COUNTS_FEATURES[] = - { - model_t::E_IndividualLowCountsByBucketAndPerson - }; -const model_t::EFeature INDIVIDUAL_HIGH_COUNTS_FEATURES[] = - { - model_t::E_IndividualHighCountsByBucketAndPerson - }; -const model_t::EFeature INDIVIDUAL_LOW_NON_ZERO_COUNT_FEATURES[] = - { - model_t::E_IndividualLowNonZeroCountByBucketAndPerson - }; -const model_t::EFeature INDIVIDUAL_HIGH_NON_ZERO_COUNT_FEATURES[] = - { - model_t::E_IndividualHighNonZeroCountByBucketAndPerson - }; -const model_t::EFeature INDIVIDUAL_DISTINCT_COUNT_FEATURES[] = - { - model_t::E_IndividualUniqueCountByBucketAndPerson - }; -const model_t::EFeature INDIVIDUAL_LOW_DISTINCT_COUNT_FEATURES[] = - { - model_t::E_IndividualLowUniqueCountByBucketAndPerson - }; -const model_t::EFeature INDIVIDUAL_HIGH_DISTINCT_COUNT_FEATURES[] = - { - model_t::E_IndividualHighUniqueCountByBucketAndPerson - }; -const model_t::EFeature INDIVIDUAL_INFO_CONTENT_FEATURES[] = - { - model_t::E_IndividualInfoContentByBucketAndPerson - }; -const model_t::EFeature INDIVIDUAL_HIGH_INFO_CONTENT_FEATURES[] = - { - model_t::E_IndividualHighInfoContentByBucketAndPerson - }; -const model_t::EFeature INDIVIDUAL_LOW_INFO_CONTENT_FEATURES[] = - { - model_t::E_IndividualLowInfoContentByBucketAndPerson - }; -const model_t::EFeature INDIVIDUAL_TIME_OF_DAY_FEATURES[] = - { - model_t::E_IndividualTimeOfDayByBucketAndPerson - }; -const model_t::EFeature INDIVIDUAL_TIME_OF_WEEK_FEATURES[] = - { - model_t::E_IndividualTimeOfWeekByBucketAndPerson - }; -const model_t::EFeature INDIVIDUAL_METRIC_FEATURES[] = - { - model_t::E_IndividualMeanByPerson, - model_t::E_IndividualMinByPerson, - model_t::E_IndividualMaxByPerson - }; -const model_t::EFeature INDIVIDUAL_METRIC_MEAN_FEATURES[] = - { - model_t::E_IndividualMeanByPerson - }; -const model_t::EFeature INDIVIDUAL_METRIC_LOW_MEAN_FEATURES[] = - { - model_t::E_IndividualLowMeanByPerson - }; -const model_t::EFeature INDIVIDUAL_METRIC_HIGH_MEAN_FEATURES[] = - { - model_t::E_IndividualHighMeanByPerson - }; -const model_t::EFeature INDIVIDUAL_METRIC_MEDIAN_FEATURES[] = - { - model_t::E_IndividualMedianByPerson - }; -const model_t::EFeature INDIVIDUAL_METRIC_LOW_MEDIAN_FEATURES[] = - { - model_t::E_IndividualLowMedianByPerson - }; -const model_t::EFeature INDIVIDUAL_METRIC_HIGH_MEDIAN_FEATURES[] = - { - model_t::E_IndividualHighMedianByPerson - }; -const model_t::EFeature INDIVIDUAL_METRIC_MIN_FEATURES[] = - { - model_t::E_IndividualMinByPerson - }; -const model_t::EFeature INDIVIDUAL_METRIC_MAX_FEATURES[] = - { - model_t::E_IndividualMaxByPerson - }; -const model_t::EFeature INDIVIDUAL_METRIC_VARIANCE_FEATURES[] = - { - model_t::E_IndividualVarianceByPerson - }; -const model_t::EFeature INDIVIDUAL_METRIC_LOW_VARIANCE_FEATURES[] = - { - model_t::E_IndividualLowVarianceByPerson - }; -const model_t::EFeature INDIVIDUAL_METRIC_HIGH_VARIANCE_FEATURES[] = - { - model_t::E_IndividualHighVarianceByPerson - }; -const model_t::EFeature INDIVIDUAL_METRIC_SUM_FEATURES[] = - { - model_t::E_IndividualSumByBucketAndPerson - }; -const model_t::EFeature INDIVIDUAL_METRIC_LOW_SUM_FEATURES[] = - { - model_t::E_IndividualLowSumByBucketAndPerson - }; -const model_t::EFeature INDIVIDUAL_METRIC_HIGH_SUM_FEATURES[] = - { - model_t::E_IndividualHighSumByBucketAndPerson - }; -const model_t::EFeature INDIVIDUAL_METRIC_NON_NULL_SUM_FEATURES[] = - { - model_t::E_IndividualNonNullSumByBucketAndPerson - }; -const model_t::EFeature INDIVIDUAL_METRIC_LOW_NON_NULL_SUM_FEATURES[] = - { - model_t::E_IndividualLowNonNullSumByBucketAndPerson - }; -const model_t::EFeature INDIVIDUAL_METRIC_HIGH_NON_NULL_SUM_FEATURES[] = - { - model_t::E_IndividualHighNonNullSumByBucketAndPerson - }; -const model_t::EFeature INDIVIDUAL_LAT_LONG_FEATURES[] = - { - model_t::E_IndividualMeanLatLongByPerson - }; -const model_t::EFeature INDIVIDUAL_MAX_VELOCITY_FEATURES[] = - { - model_t::E_IndividualMaxVelocityByPerson - }; -const model_t::EFeature INDIVIDUAL_MIN_VELOCITY_FEATURES[] = - { - model_t::E_IndividualMinVelocityByPerson - }; -const model_t::EFeature INDIVIDUAL_MEAN_VELOCITY_FEATURES[] = - { - model_t::E_IndividualMeanVelocityByPerson - }; -const model_t::EFeature INDIVIDUAL_SUM_VELOCITY_FEATURES[] = - { - model_t::E_IndividualSumVelocityByPerson - }; -const model_t::EFeature POPULATION_COUNT_FEATURES[] = - { - model_t::E_PopulationCountByBucketPersonAndAttribute - }; -const model_t::EFeature POPULATION_DISTINCT_COUNT_FEATURES[] = - { - model_t::E_PopulationUniqueCountByBucketPersonAndAttribute - }; -const model_t::EFeature POPULATION_LOW_DISTINCT_COUNT_FEATURES[] = - { - model_t::E_PopulationLowUniqueCountByBucketPersonAndAttribute - }; -const model_t::EFeature POPULATION_HIGH_DISTINCT_COUNT_FEATURES[] = - { - model_t::E_PopulationHighUniqueCountByBucketPersonAndAttribute - }; -const model_t::EFeature POPULATION_RARE_FEATURES[] = - { - model_t::E_PopulationIndicatorOfBucketPersonAndAttribute, - model_t::E_PopulationUniquePersonCountByAttribute - }; -const model_t::EFeature POPULATION_RARE_COUNT_FEATURES[] = - { - model_t::E_PopulationCountByBucketPersonAndAttribute, - model_t::E_PopulationUniquePersonCountByAttribute - }; -const model_t::EFeature POPULATION_FREQ_RARE_FEATURES[] = - { - model_t::E_PopulationAttributeTotalCountByPerson, - model_t::E_PopulationIndicatorOfBucketPersonAndAttribute, - model_t::E_PopulationUniquePersonCountByAttribute - }; -const model_t::EFeature POPULATION_FREQ_RARE_COUNT_FEATURES[] = - { - model_t::E_PopulationAttributeTotalCountByPerson, - model_t::E_PopulationCountByBucketPersonAndAttribute, - model_t::E_PopulationUniquePersonCountByAttribute - }; -const model_t::EFeature POPULATION_LOW_COUNTS_FEATURES[] = - { - model_t::E_PopulationLowCountsByBucketPersonAndAttribute - }; -const model_t::EFeature POPULATION_HIGH_COUNTS_FEATURES[] = - { - model_t::E_PopulationHighCountsByBucketPersonAndAttribute - }; -const model_t::EFeature POPULATION_INFO_CONTENT_FEATURES[] = - { - model_t::E_PopulationInfoContentByBucketPersonAndAttribute - }; -const model_t::EFeature POPULATION_LOW_INFO_CONTENT_FEATURES[] = - { - model_t::E_PopulationLowInfoContentByBucketPersonAndAttribute - }; -const model_t::EFeature POPULATION_HIGH_INFO_CONTENT_FEATURES[] = - { - model_t::E_PopulationHighInfoContentByBucketPersonAndAttribute - }; -const model_t::EFeature POPULATION_TIME_OF_DAY_FEATURES[] = - { - model_t::E_PopulationTimeOfDayByBucketPersonAndAttribute - }; -const model_t::EFeature POPULATION_TIME_OF_WEEK_FEATURES[] = - { - model_t::E_PopulationTimeOfWeekByBucketPersonAndAttribute - }; -const model_t::EFeature POPULATION_METRIC_FEATURES[] = - { - model_t::E_PopulationMeanByPersonAndAttribute, - model_t::E_PopulationMinByPersonAndAttribute, - model_t::E_PopulationMaxByPersonAndAttribute - }; -const model_t::EFeature POPULATION_METRIC_MEAN_FEATURES[] = - { - model_t::E_PopulationMeanByPersonAndAttribute - }; -const model_t::EFeature POPULATION_METRIC_LOW_MEAN_FEATURES[] = - { - model_t::E_PopulationLowMeanByPersonAndAttribute - }; -const model_t::EFeature POPULATION_METRIC_HIGH_MEAN_FEATURES[] = - { - model_t::E_PopulationHighMeanByPersonAndAttribute - }; -const model_t::EFeature POPULATION_METRIC_MEDIAN_FEATURES[] = - { - model_t::E_PopulationMedianByPersonAndAttribute - }; -const model_t::EFeature POPULATION_METRIC_LOW_MEDIAN_FEATURES[] = - { - model_t::E_PopulationLowMedianByPersonAndAttribute - }; -const model_t::EFeature POPULATION_METRIC_HIGH_MEDIAN_FEATURES[] = - { - model_t::E_PopulationHighMedianByPersonAndAttribute - }; -const model_t::EFeature POPULATION_METRIC_MIN_FEATURES[] = - { - model_t::E_PopulationMinByPersonAndAttribute - }; -const model_t::EFeature POPULATION_METRIC_MAX_FEATURES[] = - { - model_t::E_PopulationMaxByPersonAndAttribute - }; -const model_t::EFeature POPULATION_METRIC_VARIANCE_FEATURES[] = - { - model_t::E_PopulationVarianceByPersonAndAttribute - }; -const model_t::EFeature POPULATION_METRIC_LOW_VARIANCE_FEATURES[] = - { - model_t::E_PopulationLowVarianceByPersonAndAttribute - }; -const model_t::EFeature POPULATION_METRIC_HIGH_VARIANCE_FEATURES[] = - { - model_t::E_PopulationHighVarianceByPersonAndAttribute - }; -const model_t::EFeature POPULATION_METRIC_SUM_FEATURES[] = - { - model_t::E_PopulationSumByBucketPersonAndAttribute - }; -const model_t::EFeature POPULATION_METRIC_LOW_SUM_FEATURES[] = - { - model_t::E_PopulationLowSumByBucketPersonAndAttribute - }; -const model_t::EFeature POPULATION_METRIC_HIGH_SUM_FEATURES[] = - { - model_t::E_PopulationHighSumByBucketPersonAndAttribute - }; -const model_t::EFeature POPULATION_LAT_LONG_FEATURES[] = - { - model_t::E_PopulationMeanLatLongByPersonAndAttribute - }; -const model_t::EFeature POPULATION_MAX_VELOCITY_FEATURES[] = - { - model_t::E_PopulationMaxVelocityByPersonAndAttribute - }; -const model_t::EFeature POPULATION_MIN_VELOCITY_FEATURES[] = - { - model_t::E_PopulationMinVelocityByPersonAndAttribute - }; -const model_t::EFeature POPULATION_MEAN_VELOCITY_FEATURES[] = - { - model_t::E_PopulationMeanVelocityByPersonAndAttribute - }; -const model_t::EFeature POPULATION_SUM_VELOCITY_FEATURES[] = - { - model_t::E_PopulationSumVelocityByPersonAndAttribute - }; -const model_t::EFeature PEERS_COUNT_FEATURES[] = - { - model_t::E_PeersCountByBucketPersonAndAttribute - }; -const model_t::EFeature PEERS_DISTINCT_COUNT_FEATURES[] = - { - model_t::E_PeersUniqueCountByBucketPersonAndAttribute - }; -const model_t::EFeature PEERS_LOW_DISTINCT_COUNT_FEATURES[] = - { - model_t::E_PeersLowUniqueCountByBucketPersonAndAttribute - }; -const model_t::EFeature PEERS_HIGH_DISTINCT_COUNT_FEATURES[] = - { - model_t::E_PeersHighUniqueCountByBucketPersonAndAttribute - }; -const model_t::EFeature PEERS_LOW_COUNTS_FEATURES[] = - { - model_t::E_PeersLowCountsByBucketPersonAndAttribute - }; -const model_t::EFeature PEERS_HIGH_COUNTS_FEATURES[] = - { - model_t::E_PeersHighCountsByBucketPersonAndAttribute - }; -const model_t::EFeature PEERS_INFO_CONTENT_FEATURES[] = - { - model_t::E_PeersInfoContentByBucketPersonAndAttribute - }; -const model_t::EFeature PEERS_LOW_INFO_CONTENT_FEATURES[] = - { - model_t::E_PeersLowInfoContentByBucketPersonAndAttribute - }; -const model_t::EFeature PEERS_HIGH_INFO_CONTENT_FEATURES[] = - { - model_t::E_PeersHighInfoContentByBucketPersonAndAttribute - }; -const model_t::EFeature PEERS_TIME_OF_DAY_FEATURES[] = - { - model_t::E_PeersTimeOfDayByBucketPersonAndAttribute - }; -const model_t::EFeature PEERS_TIME_OF_WEEK_FEATURES[] = - { - model_t::E_PeersTimeOfWeekByBucketPersonAndAttribute - }; +namespace detail { + +const model_t::EFeature INDIVIDUAL_COUNT_FEATURES[] = {model_t::E_IndividualCountByBucketAndPerson}; +const model_t::EFeature INDIVIDUAL_NON_ZERO_COUNT_FEATURES[] = {model_t::E_IndividualNonZeroCountByBucketAndPerson}; +const model_t::EFeature INDIVIDUAL_RARE_COUNT_FEATURES[] = { + model_t::E_IndividualCountByBucketAndPerson, + model_t::E_IndividualTotalBucketCountByPerson, +}; +const model_t::EFeature INDIVIDUAL_RARE_NON_ZERO_COUNT_FEATURES[] = {model_t::E_IndividualNonZeroCountByBucketAndPerson, + model_t::E_IndividualTotalBucketCountByPerson}; +const model_t::EFeature INDIVIDUAL_RARE_FEATURES[] = {model_t::E_IndividualTotalBucketCountByPerson, + model_t::E_IndividualIndicatorOfBucketPerson}; +const model_t::EFeature INDIVIDUAL_LOW_COUNTS_FEATURES[] = {model_t::E_IndividualLowCountsByBucketAndPerson}; +const model_t::EFeature INDIVIDUAL_HIGH_COUNTS_FEATURES[] = {model_t::E_IndividualHighCountsByBucketAndPerson}; +const model_t::EFeature INDIVIDUAL_LOW_NON_ZERO_COUNT_FEATURES[] = {model_t::E_IndividualLowNonZeroCountByBucketAndPerson}; +const model_t::EFeature INDIVIDUAL_HIGH_NON_ZERO_COUNT_FEATURES[] = {model_t::E_IndividualHighNonZeroCountByBucketAndPerson}; +const model_t::EFeature INDIVIDUAL_DISTINCT_COUNT_FEATURES[] = {model_t::E_IndividualUniqueCountByBucketAndPerson}; +const model_t::EFeature INDIVIDUAL_LOW_DISTINCT_COUNT_FEATURES[] = {model_t::E_IndividualLowUniqueCountByBucketAndPerson}; +const model_t::EFeature INDIVIDUAL_HIGH_DISTINCT_COUNT_FEATURES[] = {model_t::E_IndividualHighUniqueCountByBucketAndPerson}; +const model_t::EFeature INDIVIDUAL_INFO_CONTENT_FEATURES[] = {model_t::E_IndividualInfoContentByBucketAndPerson}; +const model_t::EFeature INDIVIDUAL_HIGH_INFO_CONTENT_FEATURES[] = {model_t::E_IndividualHighInfoContentByBucketAndPerson}; +const model_t::EFeature INDIVIDUAL_LOW_INFO_CONTENT_FEATURES[] = {model_t::E_IndividualLowInfoContentByBucketAndPerson}; +const model_t::EFeature INDIVIDUAL_TIME_OF_DAY_FEATURES[] = {model_t::E_IndividualTimeOfDayByBucketAndPerson}; +const model_t::EFeature INDIVIDUAL_TIME_OF_WEEK_FEATURES[] = {model_t::E_IndividualTimeOfWeekByBucketAndPerson}; +const model_t::EFeature INDIVIDUAL_METRIC_FEATURES[] = {model_t::E_IndividualMeanByPerson, + model_t::E_IndividualMinByPerson, + model_t::E_IndividualMaxByPerson}; +const model_t::EFeature INDIVIDUAL_METRIC_MEAN_FEATURES[] = {model_t::E_IndividualMeanByPerson}; +const model_t::EFeature INDIVIDUAL_METRIC_LOW_MEAN_FEATURES[] = {model_t::E_IndividualLowMeanByPerson}; +const model_t::EFeature INDIVIDUAL_METRIC_HIGH_MEAN_FEATURES[] = {model_t::E_IndividualHighMeanByPerson}; +const model_t::EFeature INDIVIDUAL_METRIC_MEDIAN_FEATURES[] = {model_t::E_IndividualMedianByPerson}; +const model_t::EFeature INDIVIDUAL_METRIC_LOW_MEDIAN_FEATURES[] = {model_t::E_IndividualLowMedianByPerson}; +const model_t::EFeature INDIVIDUAL_METRIC_HIGH_MEDIAN_FEATURES[] = {model_t::E_IndividualHighMedianByPerson}; +const model_t::EFeature INDIVIDUAL_METRIC_MIN_FEATURES[] = {model_t::E_IndividualMinByPerson}; +const model_t::EFeature INDIVIDUAL_METRIC_MAX_FEATURES[] = {model_t::E_IndividualMaxByPerson}; +const model_t::EFeature INDIVIDUAL_METRIC_VARIANCE_FEATURES[] = {model_t::E_IndividualVarianceByPerson}; +const model_t::EFeature INDIVIDUAL_METRIC_LOW_VARIANCE_FEATURES[] = {model_t::E_IndividualLowVarianceByPerson}; +const model_t::EFeature INDIVIDUAL_METRIC_HIGH_VARIANCE_FEATURES[] = {model_t::E_IndividualHighVarianceByPerson}; +const model_t::EFeature INDIVIDUAL_METRIC_SUM_FEATURES[] = {model_t::E_IndividualSumByBucketAndPerson}; +const model_t::EFeature INDIVIDUAL_METRIC_LOW_SUM_FEATURES[] = {model_t::E_IndividualLowSumByBucketAndPerson}; +const model_t::EFeature INDIVIDUAL_METRIC_HIGH_SUM_FEATURES[] = {model_t::E_IndividualHighSumByBucketAndPerson}; +const model_t::EFeature INDIVIDUAL_METRIC_NON_NULL_SUM_FEATURES[] = {model_t::E_IndividualNonNullSumByBucketAndPerson}; +const model_t::EFeature INDIVIDUAL_METRIC_LOW_NON_NULL_SUM_FEATURES[] = {model_t::E_IndividualLowNonNullSumByBucketAndPerson}; +const model_t::EFeature INDIVIDUAL_METRIC_HIGH_NON_NULL_SUM_FEATURES[] = {model_t::E_IndividualHighNonNullSumByBucketAndPerson}; +const model_t::EFeature INDIVIDUAL_LAT_LONG_FEATURES[] = {model_t::E_IndividualMeanLatLongByPerson}; +const model_t::EFeature INDIVIDUAL_MAX_VELOCITY_FEATURES[] = {model_t::E_IndividualMaxVelocityByPerson}; +const model_t::EFeature INDIVIDUAL_MIN_VELOCITY_FEATURES[] = {model_t::E_IndividualMinVelocityByPerson}; +const model_t::EFeature INDIVIDUAL_MEAN_VELOCITY_FEATURES[] = {model_t::E_IndividualMeanVelocityByPerson}; +const model_t::EFeature INDIVIDUAL_SUM_VELOCITY_FEATURES[] = {model_t::E_IndividualSumVelocityByPerson}; +const model_t::EFeature POPULATION_COUNT_FEATURES[] = {model_t::E_PopulationCountByBucketPersonAndAttribute}; +const model_t::EFeature POPULATION_DISTINCT_COUNT_FEATURES[] = {model_t::E_PopulationUniqueCountByBucketPersonAndAttribute}; +const model_t::EFeature POPULATION_LOW_DISTINCT_COUNT_FEATURES[] = {model_t::E_PopulationLowUniqueCountByBucketPersonAndAttribute}; +const model_t::EFeature POPULATION_HIGH_DISTINCT_COUNT_FEATURES[] = {model_t::E_PopulationHighUniqueCountByBucketPersonAndAttribute}; +const model_t::EFeature POPULATION_RARE_FEATURES[] = {model_t::E_PopulationIndicatorOfBucketPersonAndAttribute, + model_t::E_PopulationUniquePersonCountByAttribute}; +const model_t::EFeature POPULATION_RARE_COUNT_FEATURES[] = {model_t::E_PopulationCountByBucketPersonAndAttribute, + model_t::E_PopulationUniquePersonCountByAttribute}; +const model_t::EFeature POPULATION_FREQ_RARE_FEATURES[] = {model_t::E_PopulationAttributeTotalCountByPerson, + model_t::E_PopulationIndicatorOfBucketPersonAndAttribute, + model_t::E_PopulationUniquePersonCountByAttribute}; +const model_t::EFeature POPULATION_FREQ_RARE_COUNT_FEATURES[] = {model_t::E_PopulationAttributeTotalCountByPerson, + model_t::E_PopulationCountByBucketPersonAndAttribute, + model_t::E_PopulationUniquePersonCountByAttribute}; +const model_t::EFeature POPULATION_LOW_COUNTS_FEATURES[] = {model_t::E_PopulationLowCountsByBucketPersonAndAttribute}; +const model_t::EFeature POPULATION_HIGH_COUNTS_FEATURES[] = {model_t::E_PopulationHighCountsByBucketPersonAndAttribute}; +const model_t::EFeature POPULATION_INFO_CONTENT_FEATURES[] = {model_t::E_PopulationInfoContentByBucketPersonAndAttribute}; +const model_t::EFeature POPULATION_LOW_INFO_CONTENT_FEATURES[] = {model_t::E_PopulationLowInfoContentByBucketPersonAndAttribute}; +const model_t::EFeature POPULATION_HIGH_INFO_CONTENT_FEATURES[] = {model_t::E_PopulationHighInfoContentByBucketPersonAndAttribute}; +const model_t::EFeature POPULATION_TIME_OF_DAY_FEATURES[] = {model_t::E_PopulationTimeOfDayByBucketPersonAndAttribute}; +const model_t::EFeature POPULATION_TIME_OF_WEEK_FEATURES[] = {model_t::E_PopulationTimeOfWeekByBucketPersonAndAttribute}; +const model_t::EFeature POPULATION_METRIC_FEATURES[] = {model_t::E_PopulationMeanByPersonAndAttribute, + model_t::E_PopulationMinByPersonAndAttribute, + model_t::E_PopulationMaxByPersonAndAttribute}; +const model_t::EFeature POPULATION_METRIC_MEAN_FEATURES[] = {model_t::E_PopulationMeanByPersonAndAttribute}; +const model_t::EFeature POPULATION_METRIC_LOW_MEAN_FEATURES[] = {model_t::E_PopulationLowMeanByPersonAndAttribute}; +const model_t::EFeature POPULATION_METRIC_HIGH_MEAN_FEATURES[] = {model_t::E_PopulationHighMeanByPersonAndAttribute}; +const model_t::EFeature POPULATION_METRIC_MEDIAN_FEATURES[] = {model_t::E_PopulationMedianByPersonAndAttribute}; +const model_t::EFeature POPULATION_METRIC_LOW_MEDIAN_FEATURES[] = {model_t::E_PopulationLowMedianByPersonAndAttribute}; +const model_t::EFeature POPULATION_METRIC_HIGH_MEDIAN_FEATURES[] = {model_t::E_PopulationHighMedianByPersonAndAttribute}; +const model_t::EFeature POPULATION_METRIC_MIN_FEATURES[] = {model_t::E_PopulationMinByPersonAndAttribute}; +const model_t::EFeature POPULATION_METRIC_MAX_FEATURES[] = {model_t::E_PopulationMaxByPersonAndAttribute}; +const model_t::EFeature POPULATION_METRIC_VARIANCE_FEATURES[] = {model_t::E_PopulationVarianceByPersonAndAttribute}; +const model_t::EFeature POPULATION_METRIC_LOW_VARIANCE_FEATURES[] = {model_t::E_PopulationLowVarianceByPersonAndAttribute}; +const model_t::EFeature POPULATION_METRIC_HIGH_VARIANCE_FEATURES[] = {model_t::E_PopulationHighVarianceByPersonAndAttribute}; +const model_t::EFeature POPULATION_METRIC_SUM_FEATURES[] = {model_t::E_PopulationSumByBucketPersonAndAttribute}; +const model_t::EFeature POPULATION_METRIC_LOW_SUM_FEATURES[] = {model_t::E_PopulationLowSumByBucketPersonAndAttribute}; +const model_t::EFeature POPULATION_METRIC_HIGH_SUM_FEATURES[] = {model_t::E_PopulationHighSumByBucketPersonAndAttribute}; +const model_t::EFeature POPULATION_LAT_LONG_FEATURES[] = {model_t::E_PopulationMeanLatLongByPersonAndAttribute}; +const model_t::EFeature POPULATION_MAX_VELOCITY_FEATURES[] = {model_t::E_PopulationMaxVelocityByPersonAndAttribute}; +const model_t::EFeature POPULATION_MIN_VELOCITY_FEATURES[] = {model_t::E_PopulationMinVelocityByPersonAndAttribute}; +const model_t::EFeature POPULATION_MEAN_VELOCITY_FEATURES[] = {model_t::E_PopulationMeanVelocityByPersonAndAttribute}; +const model_t::EFeature POPULATION_SUM_VELOCITY_FEATURES[] = {model_t::E_PopulationSumVelocityByPersonAndAttribute}; +const model_t::EFeature PEERS_COUNT_FEATURES[] = {model_t::E_PeersCountByBucketPersonAndAttribute}; +const model_t::EFeature PEERS_DISTINCT_COUNT_FEATURES[] = {model_t::E_PeersUniqueCountByBucketPersonAndAttribute}; +const model_t::EFeature PEERS_LOW_DISTINCT_COUNT_FEATURES[] = {model_t::E_PeersLowUniqueCountByBucketPersonAndAttribute}; +const model_t::EFeature PEERS_HIGH_DISTINCT_COUNT_FEATURES[] = {model_t::E_PeersHighUniqueCountByBucketPersonAndAttribute}; +const model_t::EFeature PEERS_LOW_COUNTS_FEATURES[] = {model_t::E_PeersLowCountsByBucketPersonAndAttribute}; +const model_t::EFeature PEERS_HIGH_COUNTS_FEATURES[] = {model_t::E_PeersHighCountsByBucketPersonAndAttribute}; +const model_t::EFeature PEERS_INFO_CONTENT_FEATURES[] = {model_t::E_PeersInfoContentByBucketPersonAndAttribute}; +const model_t::EFeature PEERS_LOW_INFO_CONTENT_FEATURES[] = {model_t::E_PeersLowInfoContentByBucketPersonAndAttribute}; +const model_t::EFeature PEERS_HIGH_INFO_CONTENT_FEATURES[] = {model_t::E_PeersHighInfoContentByBucketPersonAndAttribute}; +const model_t::EFeature PEERS_TIME_OF_DAY_FEATURES[] = {model_t::E_PeersTimeOfDayByBucketPersonAndAttribute}; +const model_t::EFeature PEERS_TIME_OF_WEEK_FEATURES[] = {model_t::E_PeersTimeOfWeekByBucketPersonAndAttribute}; // Function names const std::string COUNT("count"); @@ -952,15 +679,13 @@ const std::string MIN_VELOCITY("min_velocity"); const std::string MEAN_VELOCITY("mean_velocity"); const std::string SUM_VELOCITY("sum_velocity"); const std::string UNEXPECTED_FUNCTION("-"); - } #define BEGIN(x) x -#define END(x) x + sizeof(x)/sizeof(x[0]) +#define END(x) x + sizeof(x) / sizeof(x[0]) //! The features for the count by function. -const TFeatureVec INDIVIDUAL_COUNT_FEATURES(BEGIN(detail::INDIVIDUAL_COUNT_FEATURES), - END(detail::INDIVIDUAL_COUNT_FEATURES)); +const TFeatureVec INDIVIDUAL_COUNT_FEATURES(BEGIN(detail::INDIVIDUAL_COUNT_FEATURES), END(detail::INDIVIDUAL_COUNT_FEATURES)); //! The features for the non-zero count by function. const TFeatureVec INDIVIDUAL_NON_ZERO_COUNT_FEATURES(BEGIN(detail::INDIVIDUAL_NON_ZERO_COUNT_FEATURES), @@ -975,8 +700,7 @@ const TFeatureVec INDIVIDUAL_RARE_NON_ZERO_COUNT_FEATURES(BEGIN(detail::INDIVIDU END(detail::INDIVIDUAL_RARE_NON_ZERO_COUNT_FEATURES)); //! The features for the rare in time by function. -const TFeatureVec INDIVIDUAL_RARE_FEATURES(BEGIN(detail::INDIVIDUAL_RARE_FEATURES), - END(detail::INDIVIDUAL_RARE_FEATURES)); +const TFeatureVec INDIVIDUAL_RARE_FEATURES(BEGIN(detail::INDIVIDUAL_RARE_FEATURES), END(detail::INDIVIDUAL_RARE_FEATURES)); //! The features for the low count by function. const TFeatureVec INDIVIDUAL_LOW_COUNTS_FEATURES(BEGIN(detail::INDIVIDUAL_LOW_COUNTS_FEATURES), @@ -1027,8 +751,7 @@ const TFeatureVec INDIVIDUAL_TIME_OF_WEEK_FEATURES(BEGIN(detail::INDIVIDUAL_TIME END(detail::INDIVIDUAL_TIME_OF_WEEK_FEATURES)); //! The features for the metric by function. -const TFeatureVec INDIVIDUAL_METRIC_FEATURES(BEGIN(detail::INDIVIDUAL_METRIC_FEATURES), - END(detail::INDIVIDUAL_METRIC_FEATURES)); +const TFeatureVec INDIVIDUAL_METRIC_FEATURES(BEGIN(detail::INDIVIDUAL_METRIC_FEATURES), END(detail::INDIVIDUAL_METRIC_FEATURES)); //! The features for the metric mean by function. const TFeatureVec INDIVIDUAL_METRIC_MEAN_FEATURES(BEGIN(detail::INDIVIDUAL_METRIC_MEAN_FEATURES), @@ -1044,15 +767,15 @@ const TFeatureVec INDIVIDUAL_METRIC_HIGH_MEAN_FEATURES(BEGIN(detail::INDIVIDUAL_ //! The features for the metric median by function. const TFeatureVec INDIVIDUAL_METRIC_MEDIAN_FEATURES(BEGIN(detail::INDIVIDUAL_METRIC_MEDIAN_FEATURES), - END(detail::INDIVIDUAL_METRIC_MEDIAN_FEATURES)); + END(detail::INDIVIDUAL_METRIC_MEDIAN_FEATURES)); //! The features for the metric low median by function. const TFeatureVec INDIVIDUAL_METRIC_LOW_MEDIAN_FEATURES(BEGIN(detail::INDIVIDUAL_METRIC_LOW_MEDIAN_FEATURES), - END(detail::INDIVIDUAL_METRIC_LOW_MEDIAN_FEATURES)); + END(detail::INDIVIDUAL_METRIC_LOW_MEDIAN_FEATURES)); //! The features for the metric high median by function. const TFeatureVec INDIVIDUAL_METRIC_HIGH_MEDIAN_FEATURES(BEGIN(detail::INDIVIDUAL_METRIC_HIGH_MEDIAN_FEATURES), - END(detail::INDIVIDUAL_METRIC_HIGH_MEDIAN_FEATURES)); + END(detail::INDIVIDUAL_METRIC_HIGH_MEDIAN_FEATURES)); //! The features for the metric min by function. const TFeatureVec INDIVIDUAL_METRIC_MIN_FEATURES(BEGIN(detail::INDIVIDUAL_METRIC_MIN_FEATURES), @@ -1099,8 +822,7 @@ const TFeatureVec INDIVIDUAL_METRIC_HIGH_NON_NULL_SUM_FEATURES(BEGIN(detail::IND END(detail::INDIVIDUAL_METRIC_HIGH_NON_NULL_SUM_FEATURES)); //! The features for the metric latitude and longitude by function. -const TFeatureVec INDIVIDUAL_LAT_LONG_FEATURES(BEGIN(detail::INDIVIDUAL_LAT_LONG_FEATURES), - END(detail::INDIVIDUAL_LAT_LONG_FEATURES)); +const TFeatureVec INDIVIDUAL_LAT_LONG_FEATURES(BEGIN(detail::INDIVIDUAL_LAT_LONG_FEATURES), END(detail::INDIVIDUAL_LAT_LONG_FEATURES)); //! The features for the metric max velocity by function. const TFeatureVec INDIVIDUAL_MAX_VELOCITY_FEATURES(BEGIN(detail::INDIVIDUAL_MAX_VELOCITY_FEATURES), @@ -1119,8 +841,7 @@ const TFeatureVec INDIVIDUAL_SUM_VELOCITY_FEATURES(BEGIN(detail::INDIVIDUAL_SUM_ END(detail::INDIVIDUAL_SUM_VELOCITY_FEATURES)); //! The features for the count over function. -const TFeatureVec POPULATION_COUNT_FEATURES(BEGIN(detail::POPULATION_COUNT_FEATURES), - END(detail::POPULATION_COUNT_FEATURES)); +const TFeatureVec POPULATION_COUNT_FEATURES(BEGIN(detail::POPULATION_COUNT_FEATURES), END(detail::POPULATION_COUNT_FEATURES)); //! The features for the distinct count over function. const TFeatureVec POPULATION_DISTINCT_COUNT_FEATURES(BEGIN(detail::POPULATION_DISTINCT_COUNT_FEATURES), @@ -1135,16 +856,14 @@ const TFeatureVec POPULATION_HIGH_DISTINCT_COUNT_FEATURES(BEGIN(detail::POPULATI END(detail::POPULATION_HIGH_DISTINCT_COUNT_FEATURES)); //! The features for the rare over function. -const TFeatureVec POPULATION_RARE_FEATURES(BEGIN(detail::POPULATION_RARE_FEATURES), - END(detail::POPULATION_RARE_FEATURES)); +const TFeatureVec POPULATION_RARE_FEATURES(BEGIN(detail::POPULATION_RARE_FEATURES), END(detail::POPULATION_RARE_FEATURES)); //! The features for the rare count over function. const TFeatureVec POPULATION_RARE_COUNT_FEATURES(BEGIN(detail::POPULATION_RARE_COUNT_FEATURES), END(detail::POPULATION_RARE_COUNT_FEATURES)); //! The features for the rare in population over function. -const TFeatureVec POPULATION_FREQ_RARE_FEATURES(BEGIN(detail::POPULATION_FREQ_RARE_FEATURES), - END(detail::POPULATION_FREQ_RARE_FEATURES)); +const TFeatureVec POPULATION_FREQ_RARE_FEATURES(BEGIN(detail::POPULATION_FREQ_RARE_FEATURES), END(detail::POPULATION_FREQ_RARE_FEATURES)); //! The features for the frequent rare count over function. const TFeatureVec POPULATION_FREQ_RARE_COUNT_FEATURES(BEGIN(detail::POPULATION_FREQ_RARE_COUNT_FEATURES), @@ -1179,8 +898,7 @@ const TFeatureVec POPULATION_TIME_OF_WEEK_FEATURES(BEGIN(detail::POPULATION_TIME END(detail::POPULATION_TIME_OF_WEEK_FEATURES)); //! The features for the metric over function. -const TFeatureVec POPULATION_METRIC_FEATURES(BEGIN(detail::POPULATION_METRIC_FEATURES), - END(detail::POPULATION_METRIC_FEATURES)); +const TFeatureVec POPULATION_METRIC_FEATURES(BEGIN(detail::POPULATION_METRIC_FEATURES), END(detail::POPULATION_METRIC_FEATURES)); //! The features for the metric mean over function. const TFeatureVec POPULATION_METRIC_MEAN_FEATURES(BEGIN(detail::POPULATION_METRIC_MEAN_FEATURES), @@ -1196,15 +914,15 @@ const TFeatureVec POPULATION_METRIC_HIGH_MEAN_FEATURES(BEGIN(detail::POPULATION_ //! The features for the metric median over function. const TFeatureVec POPULATION_METRIC_MEDIAN_FEATURES(BEGIN(detail::POPULATION_METRIC_MEDIAN_FEATURES), - END(detail::POPULATION_METRIC_MEDIAN_FEATURES)); + END(detail::POPULATION_METRIC_MEDIAN_FEATURES)); //! The features for the metric low median over function. const TFeatureVec POPULATION_METRIC_LOW_MEDIAN_FEATURES(BEGIN(detail::POPULATION_METRIC_LOW_MEDIAN_FEATURES), - END(detail::POPULATION_METRIC_LOW_MEDIAN_FEATURES)); + END(detail::POPULATION_METRIC_LOW_MEDIAN_FEATURES)); //! The features for the metric high median over function. const TFeatureVec POPULATION_METRIC_HIGH_MEDIAN_FEATURES(BEGIN(detail::POPULATION_METRIC_HIGH_MEDIAN_FEATURES), - END(detail::POPULATION_METRIC_HIGH_MEDIAN_FEATURES)); + END(detail::POPULATION_METRIC_HIGH_MEDIAN_FEATURES)); //! The features for the metric min over function. const TFeatureVec POPULATION_METRIC_MIN_FEATURES(BEGIN(detail::POPULATION_METRIC_MIN_FEATURES), @@ -1238,8 +956,7 @@ const TFeatureVec POPULATION_METRIC_HIGH_SUM_FEATURES(BEGIN(detail::POPULATION_M END(detail::POPULATION_METRIC_HIGH_SUM_FEATURES)); //! The features for the metric lat/long over function. -const TFeatureVec POPULATION_LAT_LONG_FEATURES(BEGIN(detail::POPULATION_LAT_LONG_FEATURES), - END(detail::POPULATION_LAT_LONG_FEATURES)); +const TFeatureVec POPULATION_LAT_LONG_FEATURES(BEGIN(detail::POPULATION_LAT_LONG_FEATURES), END(detail::POPULATION_LAT_LONG_FEATURES)); //! The features for the metric max velocity over function. const TFeatureVec POPULATION_MAX_VELOCITY_FEATURES(BEGIN(detail::POPULATION_MAX_VELOCITY_FEATURES), @@ -1258,20 +975,16 @@ const TFeatureVec POPULATION_SUM_VELOCITY_FEATURES(BEGIN(detail::POPULATION_SUM_ END(detail::POPULATION_SUM_VELOCITY_FEATURES)); //! The features for the count over function. -const TFeatureVec PEERS_COUNT_FEATURES(BEGIN(detail::PEERS_COUNT_FEATURES), - END(detail::PEERS_COUNT_FEATURES)); +const TFeatureVec PEERS_COUNT_FEATURES(BEGIN(detail::PEERS_COUNT_FEATURES), END(detail::PEERS_COUNT_FEATURES)); //! The features for the low count over function. -const TFeatureVec PEERS_LOW_COUNTS_FEATURES(BEGIN(detail::PEERS_LOW_COUNTS_FEATURES), - END(detail::PEERS_LOW_COUNTS_FEATURES)); +const TFeatureVec PEERS_LOW_COUNTS_FEATURES(BEGIN(detail::PEERS_LOW_COUNTS_FEATURES), END(detail::PEERS_LOW_COUNTS_FEATURES)); //! The features for the high count over function. -const TFeatureVec PEERS_HIGH_COUNTS_FEATURES(BEGIN(detail::PEERS_HIGH_COUNTS_FEATURES), - END(detail::PEERS_HIGH_COUNTS_FEATURES)); +const TFeatureVec PEERS_HIGH_COUNTS_FEATURES(BEGIN(detail::PEERS_HIGH_COUNTS_FEATURES), END(detail::PEERS_HIGH_COUNTS_FEATURES)); //! The features for the distinct count over function. -const TFeatureVec PEERS_DISTINCT_COUNT_FEATURES(BEGIN(detail::PEERS_DISTINCT_COUNT_FEATURES), - END(detail::PEERS_DISTINCT_COUNT_FEATURES)); +const TFeatureVec PEERS_DISTINCT_COUNT_FEATURES(BEGIN(detail::PEERS_DISTINCT_COUNT_FEATURES), END(detail::PEERS_DISTINCT_COUNT_FEATURES)); //! The features for the low distinct count over function. const TFeatureVec PEERS_LOW_DISTINCT_COUNT_FEATURES(BEGIN(detail::PEERS_LOW_DISTINCT_COUNT_FEATURES), @@ -1282,8 +995,7 @@ const TFeatureVec PEERS_HIGH_DISTINCT_COUNT_FEATURES(BEGIN(detail::PEERS_HIGH_DI END(detail::PEERS_HIGH_DISTINCT_COUNT_FEATURES)); //! The features for the information content over function. -const TFeatureVec PEERS_INFO_CONTENT_FEATURES(BEGIN(detail::PEERS_INFO_CONTENT_FEATURES), - END(detail::PEERS_INFO_CONTENT_FEATURES)); +const TFeatureVec PEERS_INFO_CONTENT_FEATURES(BEGIN(detail::PEERS_INFO_CONTENT_FEATURES), END(detail::PEERS_INFO_CONTENT_FEATURES)); //! The features for the low information content over function. const TFeatureVec PEERS_LOW_INFO_CONTENT_FEATURES(BEGIN(detail::PEERS_LOW_INFO_CONTENT_FEATURES), @@ -1294,12 +1006,10 @@ const TFeatureVec PEERS_HIGH_INFO_CONTENT_FEATURES(BEGIN(detail::PEERS_HIGH_INFO END(detail::PEERS_HIGH_INFO_CONTENT_FEATURES)); //! The features for the time_of_week over function. -const TFeatureVec PEERS_TIME_OF_DAY_FEATURES(BEGIN(detail::PEERS_TIME_OF_DAY_FEATURES), - END(detail::PEERS_TIME_OF_DAY_FEATURES)); +const TFeatureVec PEERS_TIME_OF_DAY_FEATURES(BEGIN(detail::PEERS_TIME_OF_DAY_FEATURES), END(detail::PEERS_TIME_OF_DAY_FEATURES)); //! The features for the time_of_week over function. -const TFeatureVec PEERS_TIME_OF_WEEK_FEATURES(BEGIN(detail::PEERS_TIME_OF_WEEK_FEATURES), - END(detail::PEERS_TIME_OF_WEEK_FEATURES)); +const TFeatureVec PEERS_TIME_OF_WEEK_FEATURES(BEGIN(detail::PEERS_TIME_OF_WEEK_FEATURES), END(detail::PEERS_TIME_OF_WEEK_FEATURES)); const TFeatureVec EMPTY_FEATURES; const TFunctionVec EMPTY_FUNCTIONS; @@ -1308,291 +1018,285 @@ const TFunctionVec EMPTY_FUNCTIONS; #undef END //! Add the features corresponding to \p function to \p map. -void addFeatures(EFunction function, - TFeatureFunctionVecMap &map) -{ - const TFeatureVec &features = function_t::features(function); - for (std::size_t i = 0u; i < features.size(); ++i) - { +void addFeatures(EFunction function, TFeatureFunctionVecMap& map) { + const TFeatureVec& features = function_t::features(function); + for (std::size_t i = 0u; i < features.size(); ++i) { map[features[i]].push_back(function); } } //! Build a map from features to the functions which include them. -TFeatureFunctionVecMap buildFeatureFunctionMap() -{ +TFeatureFunctionVecMap buildFeatureFunctionMap() { TFeatureFunctionVecMap result; // This is written like this to generate a compiler warning // when a new function is added. This map must include every // function so add a case if you add a new function. - switch (E_IndividualCount) - { - // The fall-through is intentional in this switch: the switched-on value - // selects the first case and then all the calls to addFeatures() are - // made - case E_IndividualCount: - addFeatures(E_IndividualCount, result); - BOOST_FALLTHROUGH; - case E_IndividualNonZeroCount: - addFeatures(E_IndividualNonZeroCount, result); - BOOST_FALLTHROUGH; - case E_IndividualRareCount: - addFeatures(E_IndividualRareCount, result); - BOOST_FALLTHROUGH; - case E_IndividualRareNonZeroCount: - addFeatures(E_IndividualRareNonZeroCount, result); - BOOST_FALLTHROUGH; - case E_IndividualRare: - addFeatures(E_IndividualRare, result); - BOOST_FALLTHROUGH; - case E_IndividualLowCounts: - addFeatures(E_IndividualLowCounts, result); - BOOST_FALLTHROUGH; - case E_IndividualHighCounts: - addFeatures(E_IndividualHighCounts, result); - BOOST_FALLTHROUGH; - case E_IndividualLowNonZeroCount: - addFeatures(E_IndividualLowNonZeroCount, result); - BOOST_FALLTHROUGH; - case E_IndividualHighNonZeroCount: - addFeatures(E_IndividualHighNonZeroCount, result); - BOOST_FALLTHROUGH; - case E_IndividualDistinctCount: - addFeatures(E_IndividualDistinctCount, result); - BOOST_FALLTHROUGH; - case E_IndividualLowDistinctCount: - addFeatures(E_IndividualLowDistinctCount, result); - BOOST_FALLTHROUGH; - case E_IndividualHighDistinctCount: - addFeatures(E_IndividualHighDistinctCount, result); - BOOST_FALLTHROUGH; - case E_IndividualInfoContent: - addFeatures(E_IndividualInfoContent, result); - BOOST_FALLTHROUGH; - case E_IndividualHighInfoContent: - addFeatures(E_IndividualHighInfoContent, result); - BOOST_FALLTHROUGH; - case E_IndividualLowInfoContent: - addFeatures(E_IndividualLowInfoContent, result); - BOOST_FALLTHROUGH; - case E_IndividualTimeOfDay: - addFeatures(E_IndividualTimeOfDay, result); - BOOST_FALLTHROUGH; - case E_IndividualTimeOfWeek: - addFeatures(E_IndividualTimeOfWeek, result); - BOOST_FALLTHROUGH; - case E_IndividualMetric: - addFeatures(E_IndividualMetric, result); - BOOST_FALLTHROUGH; - case E_IndividualMetricMean: - addFeatures(E_IndividualMetricMean, result); - BOOST_FALLTHROUGH; - case E_IndividualMetricLowMean: - addFeatures(E_IndividualMetricLowMean, result); - BOOST_FALLTHROUGH; - case E_IndividualMetricHighMean: - addFeatures(E_IndividualMetricHighMean, result); - BOOST_FALLTHROUGH; - case E_IndividualMetricMedian: - addFeatures(E_IndividualMetricMedian, result); - BOOST_FALLTHROUGH; - case E_IndividualMetricLowMedian: - addFeatures(E_IndividualMetricLowMedian, result); - BOOST_FALLTHROUGH; - case E_IndividualMetricHighMedian: - addFeatures(E_IndividualMetricHighMedian, result); - BOOST_FALLTHROUGH; - case E_IndividualMetricMin: - addFeatures(E_IndividualMetricMin, result); - BOOST_FALLTHROUGH; - case E_IndividualMetricMax: - addFeatures(E_IndividualMetricMax, result); - BOOST_FALLTHROUGH; - case E_IndividualMetricSum: - addFeatures(E_IndividualMetricSum, result); - BOOST_FALLTHROUGH; - case E_IndividualMetricVariance: - addFeatures(E_IndividualMetricVariance, result); - BOOST_FALLTHROUGH; - case E_IndividualMetricLowVariance: - addFeatures(E_IndividualMetricLowVariance, result); - BOOST_FALLTHROUGH; - case E_IndividualMetricHighVariance: - addFeatures(E_IndividualMetricHighVariance, result); - BOOST_FALLTHROUGH; - case E_IndividualMetricLowSum: - addFeatures(E_IndividualMetricLowSum, result); - BOOST_FALLTHROUGH; - case E_IndividualMetricHighSum: - addFeatures(E_IndividualMetricHighSum, result); - BOOST_FALLTHROUGH; - case E_IndividualMetricNonNullSum: - addFeatures(E_IndividualMetricNonNullSum, result); - BOOST_FALLTHROUGH; - case E_IndividualMetricLowNonNullSum: - addFeatures(E_IndividualMetricLowNonNullSum, result); - BOOST_FALLTHROUGH; - case E_IndividualMetricHighNonNullSum: - addFeatures(E_IndividualMetricHighNonNullSum, result); - BOOST_FALLTHROUGH; - case E_IndividualLatLong: - addFeatures(E_IndividualLatLong, result); - BOOST_FALLTHROUGH; - case E_IndividualMaxVelocity: - addFeatures(E_IndividualMaxVelocity, result); - BOOST_FALLTHROUGH; - case E_IndividualMinVelocity: - addFeatures(E_IndividualMinVelocity, result); - BOOST_FALLTHROUGH; - case E_IndividualMeanVelocity: - addFeatures(E_IndividualMeanVelocity, result); - BOOST_FALLTHROUGH; - case E_IndividualSumVelocity: - addFeatures(E_IndividualSumVelocity, result); - BOOST_FALLTHROUGH; - case E_PopulationCount: - addFeatures(E_PopulationCount, result); - BOOST_FALLTHROUGH; - case E_PopulationDistinctCount: - addFeatures(E_PopulationDistinctCount, result); - BOOST_FALLTHROUGH; - case E_PopulationLowDistinctCount: - addFeatures(E_PopulationLowDistinctCount, result); - BOOST_FALLTHROUGH; - case E_PopulationHighDistinctCount: - addFeatures(E_PopulationHighDistinctCount, result); - BOOST_FALLTHROUGH; - case E_PopulationRare: - addFeatures(E_PopulationRare, result); - BOOST_FALLTHROUGH; - case E_PopulationRareCount: - addFeatures(E_PopulationRareCount, result); - BOOST_FALLTHROUGH; - case E_PopulationFreqRare: - addFeatures(E_PopulationFreqRare, result); - BOOST_FALLTHROUGH; - case E_PopulationFreqRareCount: - addFeatures(E_PopulationFreqRareCount, result); - BOOST_FALLTHROUGH; - case E_PopulationLowCounts: - addFeatures(E_PopulationLowCounts, result); - BOOST_FALLTHROUGH; - case E_PopulationHighCounts: - addFeatures(E_PopulationHighCounts, result); - BOOST_FALLTHROUGH; - case E_PopulationInfoContent: - addFeatures(E_PopulationInfoContent, result); - BOOST_FALLTHROUGH; - case E_PopulationLowInfoContent: - addFeatures(E_PopulationLowInfoContent, result); - BOOST_FALLTHROUGH; - case E_PopulationHighInfoContent: - addFeatures(E_PopulationHighInfoContent, result); - BOOST_FALLTHROUGH; - case E_PopulationTimeOfDay: - addFeatures(E_PopulationTimeOfDay, result); - BOOST_FALLTHROUGH; - case E_PopulationTimeOfWeek: - addFeatures(E_PopulationTimeOfWeek, result); - BOOST_FALLTHROUGH; - case E_PopulationMetric: - addFeatures(E_PopulationMetric, result); - BOOST_FALLTHROUGH; - case E_PopulationMetricMean: - addFeatures(E_PopulationMetricMean, result); - BOOST_FALLTHROUGH; - case E_PopulationMetricLowMean: - addFeatures(E_PopulationMetricLowMean, result); - BOOST_FALLTHROUGH; - case E_PopulationMetricHighMean: - addFeatures(E_PopulationMetricHighMean, result); - BOOST_FALLTHROUGH; - case E_PopulationMetricMedian: - addFeatures(E_PopulationMetricMedian, result); - BOOST_FALLTHROUGH; - case E_PopulationMetricLowMedian: - addFeatures(E_PopulationMetricLowMedian, result); - BOOST_FALLTHROUGH; - case E_PopulationMetricHighMedian: - addFeatures(E_PopulationMetricHighMedian, result); - BOOST_FALLTHROUGH; - case E_PopulationMetricMin: - addFeatures(E_PopulationMetricMin, result); - BOOST_FALLTHROUGH; - case E_PopulationMetricMax: - addFeatures(E_PopulationMetricMax, result); - BOOST_FALLTHROUGH; - case E_PopulationMetricVariance: - addFeatures(E_PopulationMetricVariance, result); - BOOST_FALLTHROUGH; - case E_PopulationMetricLowVariance: - addFeatures(E_PopulationMetricLowVariance, result); - BOOST_FALLTHROUGH; - case E_PopulationMetricHighVariance: - addFeatures(E_PopulationMetricHighVariance, result); - BOOST_FALLTHROUGH; - case E_PopulationMetricSum: - addFeatures(E_PopulationMetricSum, result); - BOOST_FALLTHROUGH; - case E_PopulationMetricLowSum: - addFeatures(E_PopulationMetricLowSum, result); - BOOST_FALLTHROUGH; - case E_PopulationMetricHighSum: - addFeatures(E_PopulationMetricHighSum, result); - BOOST_FALLTHROUGH; - case E_PopulationLatLong: - addFeatures(E_PopulationLatLong, result); - BOOST_FALLTHROUGH; - case E_PopulationMaxVelocity: - addFeatures(E_PopulationMaxVelocity, result); - BOOST_FALLTHROUGH; - case E_PopulationMinVelocity: - addFeatures(E_PopulationMinVelocity, result); - BOOST_FALLTHROUGH; - case E_PopulationMeanVelocity: - addFeatures(E_PopulationMeanVelocity, result); - BOOST_FALLTHROUGH; - case E_PopulationSumVelocity: - addFeatures(E_PopulationSumVelocity, result); - BOOST_FALLTHROUGH; - case E_PeersCount: - addFeatures(E_PeersCount, result); - BOOST_FALLTHROUGH; - case E_PeersLowCounts: - addFeatures(E_PeersLowCounts, result); - BOOST_FALLTHROUGH; - case E_PeersHighCounts: - addFeatures(E_PeersHighCounts, result); - BOOST_FALLTHROUGH; - case E_PeersDistinctCount: - addFeatures(E_PeersDistinctCount, result); - BOOST_FALLTHROUGH; - case E_PeersLowDistinctCount: - addFeatures(E_PeersLowDistinctCount, result); - BOOST_FALLTHROUGH; - case E_PeersHighDistinctCount: - addFeatures(E_PeersHighDistinctCount, result); - BOOST_FALLTHROUGH; - case E_PeersInfoContent: - addFeatures(E_PeersInfoContent, result); - BOOST_FALLTHROUGH; - case E_PeersLowInfoContent: - addFeatures(E_PeersLowInfoContent, result); - BOOST_FALLTHROUGH; - case E_PeersHighInfoContent: - addFeatures(E_PeersHighInfoContent, result); - BOOST_FALLTHROUGH; - case E_PeersTimeOfDay: - addFeatures(E_PeersTimeOfDay, result); - BOOST_FALLTHROUGH; - case E_PeersTimeOfWeek: - addFeatures(E_PeersTimeOfWeek, result); + switch (E_IndividualCount) { + // The fall-through is intentional in this switch: the switched-on value + // selects the first case and then all the calls to addFeatures() are + // made + case E_IndividualCount: + addFeatures(E_IndividualCount, result); + BOOST_FALLTHROUGH; + case E_IndividualNonZeroCount: + addFeatures(E_IndividualNonZeroCount, result); + BOOST_FALLTHROUGH; + case E_IndividualRareCount: + addFeatures(E_IndividualRareCount, result); + BOOST_FALLTHROUGH; + case E_IndividualRareNonZeroCount: + addFeatures(E_IndividualRareNonZeroCount, result); + BOOST_FALLTHROUGH; + case E_IndividualRare: + addFeatures(E_IndividualRare, result); + BOOST_FALLTHROUGH; + case E_IndividualLowCounts: + addFeatures(E_IndividualLowCounts, result); + BOOST_FALLTHROUGH; + case E_IndividualHighCounts: + addFeatures(E_IndividualHighCounts, result); + BOOST_FALLTHROUGH; + case E_IndividualLowNonZeroCount: + addFeatures(E_IndividualLowNonZeroCount, result); + BOOST_FALLTHROUGH; + case E_IndividualHighNonZeroCount: + addFeatures(E_IndividualHighNonZeroCount, result); + BOOST_FALLTHROUGH; + case E_IndividualDistinctCount: + addFeatures(E_IndividualDistinctCount, result); + BOOST_FALLTHROUGH; + case E_IndividualLowDistinctCount: + addFeatures(E_IndividualLowDistinctCount, result); + BOOST_FALLTHROUGH; + case E_IndividualHighDistinctCount: + addFeatures(E_IndividualHighDistinctCount, result); + BOOST_FALLTHROUGH; + case E_IndividualInfoContent: + addFeatures(E_IndividualInfoContent, result); + BOOST_FALLTHROUGH; + case E_IndividualHighInfoContent: + addFeatures(E_IndividualHighInfoContent, result); + BOOST_FALLTHROUGH; + case E_IndividualLowInfoContent: + addFeatures(E_IndividualLowInfoContent, result); + BOOST_FALLTHROUGH; + case E_IndividualTimeOfDay: + addFeatures(E_IndividualTimeOfDay, result); + BOOST_FALLTHROUGH; + case E_IndividualTimeOfWeek: + addFeatures(E_IndividualTimeOfWeek, result); + BOOST_FALLTHROUGH; + case E_IndividualMetric: + addFeatures(E_IndividualMetric, result); + BOOST_FALLTHROUGH; + case E_IndividualMetricMean: + addFeatures(E_IndividualMetricMean, result); + BOOST_FALLTHROUGH; + case E_IndividualMetricLowMean: + addFeatures(E_IndividualMetricLowMean, result); + BOOST_FALLTHROUGH; + case E_IndividualMetricHighMean: + addFeatures(E_IndividualMetricHighMean, result); + BOOST_FALLTHROUGH; + case E_IndividualMetricMedian: + addFeatures(E_IndividualMetricMedian, result); + BOOST_FALLTHROUGH; + case E_IndividualMetricLowMedian: + addFeatures(E_IndividualMetricLowMedian, result); + BOOST_FALLTHROUGH; + case E_IndividualMetricHighMedian: + addFeatures(E_IndividualMetricHighMedian, result); + BOOST_FALLTHROUGH; + case E_IndividualMetricMin: + addFeatures(E_IndividualMetricMin, result); + BOOST_FALLTHROUGH; + case E_IndividualMetricMax: + addFeatures(E_IndividualMetricMax, result); + BOOST_FALLTHROUGH; + case E_IndividualMetricSum: + addFeatures(E_IndividualMetricSum, result); + BOOST_FALLTHROUGH; + case E_IndividualMetricVariance: + addFeatures(E_IndividualMetricVariance, result); + BOOST_FALLTHROUGH; + case E_IndividualMetricLowVariance: + addFeatures(E_IndividualMetricLowVariance, result); + BOOST_FALLTHROUGH; + case E_IndividualMetricHighVariance: + addFeatures(E_IndividualMetricHighVariance, result); + BOOST_FALLTHROUGH; + case E_IndividualMetricLowSum: + addFeatures(E_IndividualMetricLowSum, result); + BOOST_FALLTHROUGH; + case E_IndividualMetricHighSum: + addFeatures(E_IndividualMetricHighSum, result); + BOOST_FALLTHROUGH; + case E_IndividualMetricNonNullSum: + addFeatures(E_IndividualMetricNonNullSum, result); + BOOST_FALLTHROUGH; + case E_IndividualMetricLowNonNullSum: + addFeatures(E_IndividualMetricLowNonNullSum, result); + BOOST_FALLTHROUGH; + case E_IndividualMetricHighNonNullSum: + addFeatures(E_IndividualMetricHighNonNullSum, result); + BOOST_FALLTHROUGH; + case E_IndividualLatLong: + addFeatures(E_IndividualLatLong, result); + BOOST_FALLTHROUGH; + case E_IndividualMaxVelocity: + addFeatures(E_IndividualMaxVelocity, result); + BOOST_FALLTHROUGH; + case E_IndividualMinVelocity: + addFeatures(E_IndividualMinVelocity, result); + BOOST_FALLTHROUGH; + case E_IndividualMeanVelocity: + addFeatures(E_IndividualMeanVelocity, result); + BOOST_FALLTHROUGH; + case E_IndividualSumVelocity: + addFeatures(E_IndividualSumVelocity, result); + BOOST_FALLTHROUGH; + case E_PopulationCount: + addFeatures(E_PopulationCount, result); + BOOST_FALLTHROUGH; + case E_PopulationDistinctCount: + addFeatures(E_PopulationDistinctCount, result); + BOOST_FALLTHROUGH; + case E_PopulationLowDistinctCount: + addFeatures(E_PopulationLowDistinctCount, result); + BOOST_FALLTHROUGH; + case E_PopulationHighDistinctCount: + addFeatures(E_PopulationHighDistinctCount, result); + BOOST_FALLTHROUGH; + case E_PopulationRare: + addFeatures(E_PopulationRare, result); + BOOST_FALLTHROUGH; + case E_PopulationRareCount: + addFeatures(E_PopulationRareCount, result); + BOOST_FALLTHROUGH; + case E_PopulationFreqRare: + addFeatures(E_PopulationFreqRare, result); + BOOST_FALLTHROUGH; + case E_PopulationFreqRareCount: + addFeatures(E_PopulationFreqRareCount, result); + BOOST_FALLTHROUGH; + case E_PopulationLowCounts: + addFeatures(E_PopulationLowCounts, result); + BOOST_FALLTHROUGH; + case E_PopulationHighCounts: + addFeatures(E_PopulationHighCounts, result); + BOOST_FALLTHROUGH; + case E_PopulationInfoContent: + addFeatures(E_PopulationInfoContent, result); + BOOST_FALLTHROUGH; + case E_PopulationLowInfoContent: + addFeatures(E_PopulationLowInfoContent, result); + BOOST_FALLTHROUGH; + case E_PopulationHighInfoContent: + addFeatures(E_PopulationHighInfoContent, result); + BOOST_FALLTHROUGH; + case E_PopulationTimeOfDay: + addFeatures(E_PopulationTimeOfDay, result); + BOOST_FALLTHROUGH; + case E_PopulationTimeOfWeek: + addFeatures(E_PopulationTimeOfWeek, result); + BOOST_FALLTHROUGH; + case E_PopulationMetric: + addFeatures(E_PopulationMetric, result); + BOOST_FALLTHROUGH; + case E_PopulationMetricMean: + addFeatures(E_PopulationMetricMean, result); + BOOST_FALLTHROUGH; + case E_PopulationMetricLowMean: + addFeatures(E_PopulationMetricLowMean, result); + BOOST_FALLTHROUGH; + case E_PopulationMetricHighMean: + addFeatures(E_PopulationMetricHighMean, result); + BOOST_FALLTHROUGH; + case E_PopulationMetricMedian: + addFeatures(E_PopulationMetricMedian, result); + BOOST_FALLTHROUGH; + case E_PopulationMetricLowMedian: + addFeatures(E_PopulationMetricLowMedian, result); + BOOST_FALLTHROUGH; + case E_PopulationMetricHighMedian: + addFeatures(E_PopulationMetricHighMedian, result); + BOOST_FALLTHROUGH; + case E_PopulationMetricMin: + addFeatures(E_PopulationMetricMin, result); + BOOST_FALLTHROUGH; + case E_PopulationMetricMax: + addFeatures(E_PopulationMetricMax, result); + BOOST_FALLTHROUGH; + case E_PopulationMetricVariance: + addFeatures(E_PopulationMetricVariance, result); + BOOST_FALLTHROUGH; + case E_PopulationMetricLowVariance: + addFeatures(E_PopulationMetricLowVariance, result); + BOOST_FALLTHROUGH; + case E_PopulationMetricHighVariance: + addFeatures(E_PopulationMetricHighVariance, result); + BOOST_FALLTHROUGH; + case E_PopulationMetricSum: + addFeatures(E_PopulationMetricSum, result); + BOOST_FALLTHROUGH; + case E_PopulationMetricLowSum: + addFeatures(E_PopulationMetricLowSum, result); + BOOST_FALLTHROUGH; + case E_PopulationMetricHighSum: + addFeatures(E_PopulationMetricHighSum, result); + BOOST_FALLTHROUGH; + case E_PopulationLatLong: + addFeatures(E_PopulationLatLong, result); + BOOST_FALLTHROUGH; + case E_PopulationMaxVelocity: + addFeatures(E_PopulationMaxVelocity, result); + BOOST_FALLTHROUGH; + case E_PopulationMinVelocity: + addFeatures(E_PopulationMinVelocity, result); + BOOST_FALLTHROUGH; + case E_PopulationMeanVelocity: + addFeatures(E_PopulationMeanVelocity, result); + BOOST_FALLTHROUGH; + case E_PopulationSumVelocity: + addFeatures(E_PopulationSumVelocity, result); + BOOST_FALLTHROUGH; + case E_PeersCount: + addFeatures(E_PeersCount, result); + BOOST_FALLTHROUGH; + case E_PeersLowCounts: + addFeatures(E_PeersLowCounts, result); + BOOST_FALLTHROUGH; + case E_PeersHighCounts: + addFeatures(E_PeersHighCounts, result); + BOOST_FALLTHROUGH; + case E_PeersDistinctCount: + addFeatures(E_PeersDistinctCount, result); + BOOST_FALLTHROUGH; + case E_PeersLowDistinctCount: + addFeatures(E_PeersLowDistinctCount, result); + BOOST_FALLTHROUGH; + case E_PeersHighDistinctCount: + addFeatures(E_PeersHighDistinctCount, result); + BOOST_FALLTHROUGH; + case E_PeersInfoContent: + addFeatures(E_PeersInfoContent, result); + BOOST_FALLTHROUGH; + case E_PeersLowInfoContent: + addFeatures(E_PeersLowInfoContent, result); + BOOST_FALLTHROUGH; + case E_PeersHighInfoContent: + addFeatures(E_PeersHighInfoContent, result); + BOOST_FALLTHROUGH; + case E_PeersTimeOfDay: + addFeatures(E_PeersTimeOfDay, result); + BOOST_FALLTHROUGH; + case E_PeersTimeOfWeek: + addFeatures(E_PeersTimeOfWeek, result); } - for (TFeatureFunctionVecMapItr i = result.begin(); i != result.end(); ++i) - { + for (TFeatureFunctionVecMapItr i = result.begin(); i != result.end(); ++i) { std::sort(i->second.begin(), i->second.end()); } @@ -1601,217 +1305,207 @@ TFeatureFunctionVecMap buildFeatureFunctionMap() const TFeatureFunctionVecMap FUNCTIONS_BY_FEATURE = buildFeatureFunctionMap(); - //! Get the function with the fewest features. -EFunction mostSpecific(const TFunctionVec &functions) -{ - if (functions.empty()) - { +EFunction mostSpecific(const TFunctionVec& functions) { + if (functions.empty()) { LOG_ABORT("No functions specified"); } EFunction result = functions[0]; std::size_t numberFeatures = features(functions[0]).size(); - for (std::size_t i = 1u; i < functions.size(); ++i) - { + for (std::size_t i = 1u; i < functions.size(); ++i) { std::size_t n = features(functions[i]).size(); - if (n < numberFeatures) - { + if (n < numberFeatures) { result = functions[i]; numberFeatures = n; } } return result; } - } -const TFeatureVec &features(EFunction function) -{ - switch (function) - { - case E_IndividualCount: - return INDIVIDUAL_COUNT_FEATURES; - case E_IndividualNonZeroCount: - return INDIVIDUAL_NON_ZERO_COUNT_FEATURES; - case E_IndividualRareCount: - return INDIVIDUAL_RARE_COUNT_FEATURES; - case E_IndividualRareNonZeroCount: - return INDIVIDUAL_RARE_NON_ZERO_COUNT_FEATURES; - case E_IndividualRare: - return INDIVIDUAL_RARE_FEATURES; - case E_IndividualLowCounts: - return INDIVIDUAL_LOW_COUNTS_FEATURES; - case E_IndividualHighCounts: - return INDIVIDUAL_HIGH_COUNTS_FEATURES; - case E_IndividualLowNonZeroCount: - return INDIVIDUAL_LOW_NON_ZERO_COUNT_FEATURES; - case E_IndividualHighNonZeroCount: - return INDIVIDUAL_HIGH_NON_ZERO_COUNT_FEATURES; - case E_IndividualDistinctCount: - return INDIVIDUAL_DISTINCT_COUNT_FEATURES; - case E_IndividualLowDistinctCount: - return INDIVIDUAL_LOW_DISTINCT_COUNT_FEATURES; - case E_IndividualHighDistinctCount: - return INDIVIDUAL_HIGH_DISTINCT_COUNT_FEATURES; - case E_IndividualInfoContent: - return INDIVIDUAL_INFO_CONTENT_FEATURES; - case E_IndividualHighInfoContent: - return INDIVIDUAL_HIGH_INFO_CONTENT_FEATURES; - case E_IndividualLowInfoContent: - return INDIVIDUAL_LOW_INFO_CONTENT_FEATURES; - case E_IndividualTimeOfDay: - return INDIVIDUAL_TIME_OF_DAY_FEATURES; - case E_IndividualTimeOfWeek: - return INDIVIDUAL_TIME_OF_WEEK_FEATURES; - case E_IndividualMetric: - return INDIVIDUAL_METRIC_FEATURES; - case E_IndividualMetricMean: - return INDIVIDUAL_METRIC_MEAN_FEATURES; - case E_IndividualMetricLowMean: - return INDIVIDUAL_METRIC_LOW_MEAN_FEATURES; - case E_IndividualMetricHighMean: - return INDIVIDUAL_METRIC_HIGH_MEAN_FEATURES; - case E_IndividualMetricMedian: - return INDIVIDUAL_METRIC_MEDIAN_FEATURES; - case E_IndividualMetricLowMedian: - return INDIVIDUAL_METRIC_LOW_MEDIAN_FEATURES; - case E_IndividualMetricHighMedian: - return INDIVIDUAL_METRIC_HIGH_MEDIAN_FEATURES; - case E_IndividualMetricMin: - return INDIVIDUAL_METRIC_MIN_FEATURES; - case E_IndividualMetricMax: - return INDIVIDUAL_METRIC_MAX_FEATURES; - case E_IndividualMetricVariance: - return INDIVIDUAL_METRIC_VARIANCE_FEATURES; - case E_IndividualMetricLowVariance: - return INDIVIDUAL_METRIC_LOW_VARIANCE_FEATURES; - case E_IndividualMetricHighVariance: - return INDIVIDUAL_METRIC_HIGH_VARIANCE_FEATURES; - case E_IndividualMetricSum: - return INDIVIDUAL_METRIC_SUM_FEATURES; - case E_IndividualMetricLowSum: - return INDIVIDUAL_METRIC_LOW_SUM_FEATURES; - case E_IndividualMetricHighSum: - return INDIVIDUAL_METRIC_HIGH_SUM_FEATURES; - case E_IndividualMetricNonNullSum: - return INDIVIDUAL_METRIC_NON_NULL_SUM_FEATURES; - case E_IndividualMetricLowNonNullSum: - return INDIVIDUAL_METRIC_LOW_NON_NULL_SUM_FEATURES; - case E_IndividualMetricHighNonNullSum: - return INDIVIDUAL_METRIC_HIGH_NON_NULL_SUM_FEATURES; - case E_IndividualLatLong: - return INDIVIDUAL_LAT_LONG_FEATURES; - case E_IndividualMaxVelocity: - return INDIVIDUAL_MAX_VELOCITY_FEATURES; - case E_IndividualMinVelocity: - return INDIVIDUAL_MIN_VELOCITY_FEATURES; - case E_IndividualMeanVelocity: - return INDIVIDUAL_MEAN_VELOCITY_FEATURES; - case E_IndividualSumVelocity: - return INDIVIDUAL_SUM_VELOCITY_FEATURES; - case E_PopulationCount: - return POPULATION_COUNT_FEATURES; - case E_PopulationDistinctCount: - return POPULATION_DISTINCT_COUNT_FEATURES; - case E_PopulationLowDistinctCount: - return POPULATION_LOW_DISTINCT_COUNT_FEATURES; - case E_PopulationHighDistinctCount: - return POPULATION_HIGH_DISTINCT_COUNT_FEATURES; - case E_PopulationRare: - return POPULATION_RARE_FEATURES; - case E_PopulationRareCount: - return POPULATION_RARE_COUNT_FEATURES; - case E_PopulationFreqRare: - return POPULATION_FREQ_RARE_FEATURES; - case E_PopulationFreqRareCount: - return POPULATION_FREQ_RARE_COUNT_FEATURES; - case E_PopulationLowCounts: - return POPULATION_LOW_COUNTS_FEATURES; - case E_PopulationHighCounts: - return POPULATION_HIGH_COUNTS_FEATURES; - case E_PopulationInfoContent: - return POPULATION_INFO_CONTENT_FEATURES; - case E_PopulationLowInfoContent: - return POPULATION_LOW_INFO_CONTENT_FEATURES; - case E_PopulationHighInfoContent: - return POPULATION_HIGH_INFO_CONTENT_FEATURES; - case E_PopulationTimeOfDay: - return POPULATION_TIME_OF_DAY_FEATURES; - case E_PopulationTimeOfWeek: - return POPULATION_TIME_OF_WEEK_FEATURES; - case E_PopulationMetric: - return POPULATION_METRIC_FEATURES; - case E_PopulationMetricMean: - return POPULATION_METRIC_MEAN_FEATURES; - case E_PopulationMetricLowMean: - return POPULATION_METRIC_LOW_MEAN_FEATURES; - case E_PopulationMetricHighMean: - return POPULATION_METRIC_HIGH_MEAN_FEATURES; - case E_PopulationMetricMedian: - return POPULATION_METRIC_MEDIAN_FEATURES; - case E_PopulationMetricLowMedian: - return POPULATION_METRIC_LOW_MEDIAN_FEATURES; - case E_PopulationMetricHighMedian: - return POPULATION_METRIC_HIGH_MEDIAN_FEATURES; - case E_PopulationMetricMin: - return POPULATION_METRIC_MIN_FEATURES; - case E_PopulationMetricMax: - return POPULATION_METRIC_MAX_FEATURES; - case E_PopulationMetricVariance: - return POPULATION_METRIC_VARIANCE_FEATURES; - case E_PopulationMetricLowVariance: - return POPULATION_METRIC_LOW_VARIANCE_FEATURES; - case E_PopulationMetricHighVariance: - return POPULATION_METRIC_HIGH_VARIANCE_FEATURES; - case E_PopulationMetricSum: - return POPULATION_METRIC_SUM_FEATURES; - case E_PopulationMetricLowSum: - return POPULATION_METRIC_LOW_SUM_FEATURES; - case E_PopulationMetricHighSum: - return POPULATION_METRIC_HIGH_SUM_FEATURES; - case E_PopulationLatLong: - return POPULATION_LAT_LONG_FEATURES; - case E_PopulationMaxVelocity: - return POPULATION_MAX_VELOCITY_FEATURES; - case E_PopulationMinVelocity: - return POPULATION_MIN_VELOCITY_FEATURES; - case E_PopulationMeanVelocity: - return POPULATION_MEAN_VELOCITY_FEATURES; - case E_PopulationSumVelocity: - return POPULATION_SUM_VELOCITY_FEATURES; - case E_PeersCount: - return PEERS_COUNT_FEATURES; - case E_PeersLowCounts: - return PEERS_LOW_COUNTS_FEATURES; - case E_PeersHighCounts: - return PEERS_HIGH_COUNTS_FEATURES; - case E_PeersDistinctCount: - return PEERS_DISTINCT_COUNT_FEATURES; - case E_PeersLowDistinctCount: - return PEERS_LOW_DISTINCT_COUNT_FEATURES; - case E_PeersHighDistinctCount: - return PEERS_HIGH_DISTINCT_COUNT_FEATURES; - case E_PeersInfoContent: - return PEERS_INFO_CONTENT_FEATURES; - case E_PeersLowInfoContent: - return PEERS_LOW_INFO_CONTENT_FEATURES; - case E_PeersHighInfoContent: - return PEERS_HIGH_INFO_CONTENT_FEATURES; - case E_PeersTimeOfDay: - return PEERS_TIME_OF_DAY_FEATURES; - case E_PeersTimeOfWeek: - return PEERS_TIME_OF_WEEK_FEATURES; +const TFeatureVec& features(EFunction function) { + switch (function) { + case E_IndividualCount: + return INDIVIDUAL_COUNT_FEATURES; + case E_IndividualNonZeroCount: + return INDIVIDUAL_NON_ZERO_COUNT_FEATURES; + case E_IndividualRareCount: + return INDIVIDUAL_RARE_COUNT_FEATURES; + case E_IndividualRareNonZeroCount: + return INDIVIDUAL_RARE_NON_ZERO_COUNT_FEATURES; + case E_IndividualRare: + return INDIVIDUAL_RARE_FEATURES; + case E_IndividualLowCounts: + return INDIVIDUAL_LOW_COUNTS_FEATURES; + case E_IndividualHighCounts: + return INDIVIDUAL_HIGH_COUNTS_FEATURES; + case E_IndividualLowNonZeroCount: + return INDIVIDUAL_LOW_NON_ZERO_COUNT_FEATURES; + case E_IndividualHighNonZeroCount: + return INDIVIDUAL_HIGH_NON_ZERO_COUNT_FEATURES; + case E_IndividualDistinctCount: + return INDIVIDUAL_DISTINCT_COUNT_FEATURES; + case E_IndividualLowDistinctCount: + return INDIVIDUAL_LOW_DISTINCT_COUNT_FEATURES; + case E_IndividualHighDistinctCount: + return INDIVIDUAL_HIGH_DISTINCT_COUNT_FEATURES; + case E_IndividualInfoContent: + return INDIVIDUAL_INFO_CONTENT_FEATURES; + case E_IndividualHighInfoContent: + return INDIVIDUAL_HIGH_INFO_CONTENT_FEATURES; + case E_IndividualLowInfoContent: + return INDIVIDUAL_LOW_INFO_CONTENT_FEATURES; + case E_IndividualTimeOfDay: + return INDIVIDUAL_TIME_OF_DAY_FEATURES; + case E_IndividualTimeOfWeek: + return INDIVIDUAL_TIME_OF_WEEK_FEATURES; + case E_IndividualMetric: + return INDIVIDUAL_METRIC_FEATURES; + case E_IndividualMetricMean: + return INDIVIDUAL_METRIC_MEAN_FEATURES; + case E_IndividualMetricLowMean: + return INDIVIDUAL_METRIC_LOW_MEAN_FEATURES; + case E_IndividualMetricHighMean: + return INDIVIDUAL_METRIC_HIGH_MEAN_FEATURES; + case E_IndividualMetricMedian: + return INDIVIDUAL_METRIC_MEDIAN_FEATURES; + case E_IndividualMetricLowMedian: + return INDIVIDUAL_METRIC_LOW_MEDIAN_FEATURES; + case E_IndividualMetricHighMedian: + return INDIVIDUAL_METRIC_HIGH_MEDIAN_FEATURES; + case E_IndividualMetricMin: + return INDIVIDUAL_METRIC_MIN_FEATURES; + case E_IndividualMetricMax: + return INDIVIDUAL_METRIC_MAX_FEATURES; + case E_IndividualMetricVariance: + return INDIVIDUAL_METRIC_VARIANCE_FEATURES; + case E_IndividualMetricLowVariance: + return INDIVIDUAL_METRIC_LOW_VARIANCE_FEATURES; + case E_IndividualMetricHighVariance: + return INDIVIDUAL_METRIC_HIGH_VARIANCE_FEATURES; + case E_IndividualMetricSum: + return INDIVIDUAL_METRIC_SUM_FEATURES; + case E_IndividualMetricLowSum: + return INDIVIDUAL_METRIC_LOW_SUM_FEATURES; + case E_IndividualMetricHighSum: + return INDIVIDUAL_METRIC_HIGH_SUM_FEATURES; + case E_IndividualMetricNonNullSum: + return INDIVIDUAL_METRIC_NON_NULL_SUM_FEATURES; + case E_IndividualMetricLowNonNullSum: + return INDIVIDUAL_METRIC_LOW_NON_NULL_SUM_FEATURES; + case E_IndividualMetricHighNonNullSum: + return INDIVIDUAL_METRIC_HIGH_NON_NULL_SUM_FEATURES; + case E_IndividualLatLong: + return INDIVIDUAL_LAT_LONG_FEATURES; + case E_IndividualMaxVelocity: + return INDIVIDUAL_MAX_VELOCITY_FEATURES; + case E_IndividualMinVelocity: + return INDIVIDUAL_MIN_VELOCITY_FEATURES; + case E_IndividualMeanVelocity: + return INDIVIDUAL_MEAN_VELOCITY_FEATURES; + case E_IndividualSumVelocity: + return INDIVIDUAL_SUM_VELOCITY_FEATURES; + case E_PopulationCount: + return POPULATION_COUNT_FEATURES; + case E_PopulationDistinctCount: + return POPULATION_DISTINCT_COUNT_FEATURES; + case E_PopulationLowDistinctCount: + return POPULATION_LOW_DISTINCT_COUNT_FEATURES; + case E_PopulationHighDistinctCount: + return POPULATION_HIGH_DISTINCT_COUNT_FEATURES; + case E_PopulationRare: + return POPULATION_RARE_FEATURES; + case E_PopulationRareCount: + return POPULATION_RARE_COUNT_FEATURES; + case E_PopulationFreqRare: + return POPULATION_FREQ_RARE_FEATURES; + case E_PopulationFreqRareCount: + return POPULATION_FREQ_RARE_COUNT_FEATURES; + case E_PopulationLowCounts: + return POPULATION_LOW_COUNTS_FEATURES; + case E_PopulationHighCounts: + return POPULATION_HIGH_COUNTS_FEATURES; + case E_PopulationInfoContent: + return POPULATION_INFO_CONTENT_FEATURES; + case E_PopulationLowInfoContent: + return POPULATION_LOW_INFO_CONTENT_FEATURES; + case E_PopulationHighInfoContent: + return POPULATION_HIGH_INFO_CONTENT_FEATURES; + case E_PopulationTimeOfDay: + return POPULATION_TIME_OF_DAY_FEATURES; + case E_PopulationTimeOfWeek: + return POPULATION_TIME_OF_WEEK_FEATURES; + case E_PopulationMetric: + return POPULATION_METRIC_FEATURES; + case E_PopulationMetricMean: + return POPULATION_METRIC_MEAN_FEATURES; + case E_PopulationMetricLowMean: + return POPULATION_METRIC_LOW_MEAN_FEATURES; + case E_PopulationMetricHighMean: + return POPULATION_METRIC_HIGH_MEAN_FEATURES; + case E_PopulationMetricMedian: + return POPULATION_METRIC_MEDIAN_FEATURES; + case E_PopulationMetricLowMedian: + return POPULATION_METRIC_LOW_MEDIAN_FEATURES; + case E_PopulationMetricHighMedian: + return POPULATION_METRIC_HIGH_MEDIAN_FEATURES; + case E_PopulationMetricMin: + return POPULATION_METRIC_MIN_FEATURES; + case E_PopulationMetricMax: + return POPULATION_METRIC_MAX_FEATURES; + case E_PopulationMetricVariance: + return POPULATION_METRIC_VARIANCE_FEATURES; + case E_PopulationMetricLowVariance: + return POPULATION_METRIC_LOW_VARIANCE_FEATURES; + case E_PopulationMetricHighVariance: + return POPULATION_METRIC_HIGH_VARIANCE_FEATURES; + case E_PopulationMetricSum: + return POPULATION_METRIC_SUM_FEATURES; + case E_PopulationMetricLowSum: + return POPULATION_METRIC_LOW_SUM_FEATURES; + case E_PopulationMetricHighSum: + return POPULATION_METRIC_HIGH_SUM_FEATURES; + case E_PopulationLatLong: + return POPULATION_LAT_LONG_FEATURES; + case E_PopulationMaxVelocity: + return POPULATION_MAX_VELOCITY_FEATURES; + case E_PopulationMinVelocity: + return POPULATION_MIN_VELOCITY_FEATURES; + case E_PopulationMeanVelocity: + return POPULATION_MEAN_VELOCITY_FEATURES; + case E_PopulationSumVelocity: + return POPULATION_SUM_VELOCITY_FEATURES; + case E_PeersCount: + return PEERS_COUNT_FEATURES; + case E_PeersLowCounts: + return PEERS_LOW_COUNTS_FEATURES; + case E_PeersHighCounts: + return PEERS_HIGH_COUNTS_FEATURES; + case E_PeersDistinctCount: + return PEERS_DISTINCT_COUNT_FEATURES; + case E_PeersLowDistinctCount: + return PEERS_LOW_DISTINCT_COUNT_FEATURES; + case E_PeersHighDistinctCount: + return PEERS_HIGH_DISTINCT_COUNT_FEATURES; + case E_PeersInfoContent: + return PEERS_INFO_CONTENT_FEATURES; + case E_PeersLowInfoContent: + return PEERS_LOW_INFO_CONTENT_FEATURES; + case E_PeersHighInfoContent: + return PEERS_HIGH_INFO_CONTENT_FEATURES; + case E_PeersTimeOfDay: + return PEERS_TIME_OF_DAY_FEATURES; + case E_PeersTimeOfWeek: + return PEERS_TIME_OF_WEEK_FEATURES; } LOG_ERROR("Unexpected function = " << static_cast(function)); return EMPTY_FEATURES; } -EFunction function(const TFeatureVec &features) -{ - if (features.empty()) - { +EFunction function(const TFeatureVec& features) { + if (features.empty()) { LOG_ERROR("No features default to '" << print(E_IndividualCount) << "'"); return E_IndividualCount; } @@ -1819,11 +1513,9 @@ EFunction function(const TFeatureVec &features) TFunctionVec candidates; std::size_t i = 0u; - for (/**/; candidates.empty() && i < features.size(); ++i) - { + for (/**/; candidates.empty() && i < features.size(); ++i) { TFeatureFunctionVecMapCItr functionsItr = FUNCTIONS_BY_FEATURE.find(features[i]); - if (functionsItr == FUNCTIONS_BY_FEATURE.end()) - { + if (functionsItr == FUNCTIONS_BY_FEATURE.end()) { LOG_WARN("No functions for feature " << model_t::print(features[i])) continue; } @@ -1835,232 +1527,392 @@ EFunction function(const TFeatureVec &features) TFunctionVec tmp; tmp.reserve(candidates.size()); - for (/**/; !candidates.empty() && i < features.size(); ++i) - { + for (/**/; !candidates.empty() && i < features.size(); ++i) { TFeatureFunctionVecMapCItr functionsItr = FUNCTIONS_BY_FEATURE.find(features[i]); - if (functionsItr == FUNCTIONS_BY_FEATURE.end()) - { + if (functionsItr == FUNCTIONS_BY_FEATURE.end()) { LOG_WARN("No functions for feature " << model_t::print(features[i])) continue; } LOG_TRACE("candidate = " << core::CContainerPrinter::print(functionsItr->second)); - std::set_intersection(candidates.begin(), candidates.end(), - functionsItr->second.begin(), functionsItr->second.end(), - std::back_inserter(tmp)); + std::set_intersection( + candidates.begin(), candidates.end(), functionsItr->second.begin(), functionsItr->second.end(), std::back_inserter(tmp)); candidates.swap(tmp); tmp.clear(); } - if (candidates.empty()) - { + if (candidates.empty()) { EFunction result = mostSpecific(fallback); - LOG_ERROR("Inconsistent features " << core::CContainerPrinter::print(features) - << " defaulting to '" << print(result) << "'"); + LOG_ERROR("Inconsistent features " << core::CContainerPrinter::print(features) << " defaulting to '" << print(result) << "'"); return result; } return mostSpecific(candidates); } -const std::string &name(EFunction function) -{ - switch (function) - { - case E_IndividualCount: return detail::COUNT; - case E_IndividualNonZeroCount: return detail::NON_ZERO_COUNT; - case E_IndividualRareCount: return detail::COUNT; - case E_IndividualRareNonZeroCount: return detail::RARE_NON_ZERO_COUNT; - case E_IndividualRare: return detail::RARE; - case E_IndividualLowCounts: return detail::LOW_COUNT; - case E_IndividualHighCounts: return detail::HIGH_COUNT; - case E_IndividualLowNonZeroCount: return detail::LOW_NON_ZERO_COUNT; - case E_IndividualHighNonZeroCount: return detail::HIGH_NON_ZERO_COUNT; - case E_IndividualDistinctCount: return detail::DISTINCT_COUNT; - case E_IndividualLowDistinctCount: return detail::LOW_DISTINCT_COUNT; - case E_IndividualHighDistinctCount: return detail::HIGH_DISTINCT_COUNT; - case E_IndividualInfoContent: return detail::INFO_CONTENT; - case E_IndividualHighInfoContent: return detail::HIGH_INFO_CONTENT; - case E_IndividualLowInfoContent: return detail::LOW_INFO_CONTENT; - case E_IndividualTimeOfDay: return detail::TIME_OF_DAY; - case E_IndividualTimeOfWeek: return detail::TIME_OF_WEEK; - case E_IndividualMetric: return detail::METRIC; - case E_IndividualMetricMean: return detail::MEAN; - case E_IndividualMetricLowMean: return detail::LOW_MEAN; - case E_IndividualMetricHighMean: return detail::HIGH_MEAN; - case E_IndividualMetricMedian: return detail::MEDIAN; - case E_IndividualMetricLowMedian: return detail::LOW_MEDIAN; - case E_IndividualMetricHighMedian: return detail::HIGH_MEDIAN; - case E_IndividualMetricMin: return detail::MIN; - case E_IndividualMetricMax: return detail::MAX; - case E_IndividualMetricVariance: return detail::VARIANCE; - case E_IndividualMetricLowVariance: return detail::LOW_VARIANCE; - case E_IndividualMetricHighVariance: return detail::HIGH_VARIANCE; - case E_IndividualMetricSum: return detail::SUM; - case E_IndividualMetricLowSum: return detail::LOW_SUM; - case E_IndividualMetricHighSum: return detail::HIGH_SUM; - case E_IndividualMetricNonNullSum: return detail::NON_NULL_SUM; - case E_IndividualMetricLowNonNullSum: return detail::LOW_NON_NULL_SUM; - case E_IndividualMetricHighNonNullSum: return detail::HIGH_NON_NULL_SUM; - case E_IndividualLatLong: return detail::LAT_LONG; - case E_IndividualMaxVelocity: return detail::MAX_VELOCITY; - case E_IndividualMinVelocity: return detail::MIN_VELOCITY; - case E_IndividualMeanVelocity: return detail::MEAN_VELOCITY; - case E_IndividualSumVelocity: return detail::SUM_VELOCITY; - case E_PopulationCount: return detail::COUNT; - case E_PopulationDistinctCount: return detail::DISTINCT_COUNT; - case E_PopulationLowDistinctCount: return detail::LOW_DISTINCT_COUNT; - case E_PopulationHighDistinctCount: return detail::HIGH_DISTINCT_COUNT; - case E_PopulationRare: return detail::RARE; - case E_PopulationRareCount: return detail::RARE_COUNT; - case E_PopulationFreqRare: return detail::FREQ_RARE; - case E_PopulationFreqRareCount: return detail::FREQ_RARE_COUNT; - case E_PopulationLowCounts: return detail::LOW_COUNT; - case E_PopulationHighCounts: return detail::HIGH_COUNT; - case E_PopulationInfoContent: return detail::INFO_CONTENT; - case E_PopulationLowInfoContent: return detail::LOW_INFO_CONTENT; - case E_PopulationHighInfoContent: return detail::HIGH_INFO_CONTENT; - case E_PopulationTimeOfDay: return detail::TIME_OF_DAY; - case E_PopulationTimeOfWeek: return detail::TIME_OF_WEEK; - case E_PopulationMetric: return detail::METRIC; - case E_PopulationMetricMean: return detail::MEAN; - case E_PopulationMetricLowMean: return detail::LOW_MEAN; - case E_PopulationMetricHighMean: return detail::HIGH_MEAN; - case E_PopulationMetricMedian: return detail::MEDIAN; - case E_PopulationMetricLowMedian: return detail::LOW_MEDIAN; - case E_PopulationMetricHighMedian: return detail::HIGH_MEDIAN; - case E_PopulationMetricMin: return detail::MIN; - case E_PopulationMetricMax: return detail::MAX; - case E_PopulationMetricVariance: return detail::VARIANCE; - case E_PopulationMetricLowVariance: return detail::LOW_VARIANCE; - case E_PopulationMetricHighVariance: return detail::HIGH_VARIANCE; - case E_PopulationMetricSum: return detail::SUM; - case E_PopulationMetricLowSum: return detail::LOW_SUM; - case E_PopulationMetricHighSum: return detail::HIGH_SUM; - case E_PopulationLatLong: return detail::LAT_LONG; - case E_PopulationMaxVelocity: return detail::MAX_VELOCITY; - case E_PopulationMinVelocity: return detail::MIN_VELOCITY; - case E_PopulationMeanVelocity: return detail::MEAN_VELOCITY; - case E_PopulationSumVelocity: return detail::SUM_VELOCITY; - case E_PeersCount: return detail::COUNT; - case E_PeersLowCounts: return detail::LOW_COUNT; - case E_PeersHighCounts: return detail::HIGH_COUNT; - case E_PeersDistinctCount: return detail::DISTINCT_COUNT; - case E_PeersLowDistinctCount: return detail::LOW_DISTINCT_COUNT; - case E_PeersHighDistinctCount: return detail::HIGH_DISTINCT_COUNT; - case E_PeersInfoContent: return detail::INFO_CONTENT; - case E_PeersLowInfoContent: return detail::LOW_INFO_CONTENT; - case E_PeersHighInfoContent: return detail::HIGH_INFO_CONTENT; - case E_PeersTimeOfDay: return detail::TIME_OF_DAY; - case E_PeersTimeOfWeek: return detail::TIME_OF_WEEK; +const std::string& name(EFunction function) { + switch (function) { + case E_IndividualCount: + return detail::COUNT; + case E_IndividualNonZeroCount: + return detail::NON_ZERO_COUNT; + case E_IndividualRareCount: + return detail::COUNT; + case E_IndividualRareNonZeroCount: + return detail::RARE_NON_ZERO_COUNT; + case E_IndividualRare: + return detail::RARE; + case E_IndividualLowCounts: + return detail::LOW_COUNT; + case E_IndividualHighCounts: + return detail::HIGH_COUNT; + case E_IndividualLowNonZeroCount: + return detail::LOW_NON_ZERO_COUNT; + case E_IndividualHighNonZeroCount: + return detail::HIGH_NON_ZERO_COUNT; + case E_IndividualDistinctCount: + return detail::DISTINCT_COUNT; + case E_IndividualLowDistinctCount: + return detail::LOW_DISTINCT_COUNT; + case E_IndividualHighDistinctCount: + return detail::HIGH_DISTINCT_COUNT; + case E_IndividualInfoContent: + return detail::INFO_CONTENT; + case E_IndividualHighInfoContent: + return detail::HIGH_INFO_CONTENT; + case E_IndividualLowInfoContent: + return detail::LOW_INFO_CONTENT; + case E_IndividualTimeOfDay: + return detail::TIME_OF_DAY; + case E_IndividualTimeOfWeek: + return detail::TIME_OF_WEEK; + case E_IndividualMetric: + return detail::METRIC; + case E_IndividualMetricMean: + return detail::MEAN; + case E_IndividualMetricLowMean: + return detail::LOW_MEAN; + case E_IndividualMetricHighMean: + return detail::HIGH_MEAN; + case E_IndividualMetricMedian: + return detail::MEDIAN; + case E_IndividualMetricLowMedian: + return detail::LOW_MEDIAN; + case E_IndividualMetricHighMedian: + return detail::HIGH_MEDIAN; + case E_IndividualMetricMin: + return detail::MIN; + case E_IndividualMetricMax: + return detail::MAX; + case E_IndividualMetricVariance: + return detail::VARIANCE; + case E_IndividualMetricLowVariance: + return detail::LOW_VARIANCE; + case E_IndividualMetricHighVariance: + return detail::HIGH_VARIANCE; + case E_IndividualMetricSum: + return detail::SUM; + case E_IndividualMetricLowSum: + return detail::LOW_SUM; + case E_IndividualMetricHighSum: + return detail::HIGH_SUM; + case E_IndividualMetricNonNullSum: + return detail::NON_NULL_SUM; + case E_IndividualMetricLowNonNullSum: + return detail::LOW_NON_NULL_SUM; + case E_IndividualMetricHighNonNullSum: + return detail::HIGH_NON_NULL_SUM; + case E_IndividualLatLong: + return detail::LAT_LONG; + case E_IndividualMaxVelocity: + return detail::MAX_VELOCITY; + case E_IndividualMinVelocity: + return detail::MIN_VELOCITY; + case E_IndividualMeanVelocity: + return detail::MEAN_VELOCITY; + case E_IndividualSumVelocity: + return detail::SUM_VELOCITY; + case E_PopulationCount: + return detail::COUNT; + case E_PopulationDistinctCount: + return detail::DISTINCT_COUNT; + case E_PopulationLowDistinctCount: + return detail::LOW_DISTINCT_COUNT; + case E_PopulationHighDistinctCount: + return detail::HIGH_DISTINCT_COUNT; + case E_PopulationRare: + return detail::RARE; + case E_PopulationRareCount: + return detail::RARE_COUNT; + case E_PopulationFreqRare: + return detail::FREQ_RARE; + case E_PopulationFreqRareCount: + return detail::FREQ_RARE_COUNT; + case E_PopulationLowCounts: + return detail::LOW_COUNT; + case E_PopulationHighCounts: + return detail::HIGH_COUNT; + case E_PopulationInfoContent: + return detail::INFO_CONTENT; + case E_PopulationLowInfoContent: + return detail::LOW_INFO_CONTENT; + case E_PopulationHighInfoContent: + return detail::HIGH_INFO_CONTENT; + case E_PopulationTimeOfDay: + return detail::TIME_OF_DAY; + case E_PopulationTimeOfWeek: + return detail::TIME_OF_WEEK; + case E_PopulationMetric: + return detail::METRIC; + case E_PopulationMetricMean: + return detail::MEAN; + case E_PopulationMetricLowMean: + return detail::LOW_MEAN; + case E_PopulationMetricHighMean: + return detail::HIGH_MEAN; + case E_PopulationMetricMedian: + return detail::MEDIAN; + case E_PopulationMetricLowMedian: + return detail::LOW_MEDIAN; + case E_PopulationMetricHighMedian: + return detail::HIGH_MEDIAN; + case E_PopulationMetricMin: + return detail::MIN; + case E_PopulationMetricMax: + return detail::MAX; + case E_PopulationMetricVariance: + return detail::VARIANCE; + case E_PopulationMetricLowVariance: + return detail::LOW_VARIANCE; + case E_PopulationMetricHighVariance: + return detail::HIGH_VARIANCE; + case E_PopulationMetricSum: + return detail::SUM; + case E_PopulationMetricLowSum: + return detail::LOW_SUM; + case E_PopulationMetricHighSum: + return detail::HIGH_SUM; + case E_PopulationLatLong: + return detail::LAT_LONG; + case E_PopulationMaxVelocity: + return detail::MAX_VELOCITY; + case E_PopulationMinVelocity: + return detail::MIN_VELOCITY; + case E_PopulationMeanVelocity: + return detail::MEAN_VELOCITY; + case E_PopulationSumVelocity: + return detail::SUM_VELOCITY; + case E_PeersCount: + return detail::COUNT; + case E_PeersLowCounts: + return detail::LOW_COUNT; + case E_PeersHighCounts: + return detail::HIGH_COUNT; + case E_PeersDistinctCount: + return detail::DISTINCT_COUNT; + case E_PeersLowDistinctCount: + return detail::LOW_DISTINCT_COUNT; + case E_PeersHighDistinctCount: + return detail::HIGH_DISTINCT_COUNT; + case E_PeersInfoContent: + return detail::INFO_CONTENT; + case E_PeersLowInfoContent: + return detail::LOW_INFO_CONTENT; + case E_PeersHighInfoContent: + return detail::HIGH_INFO_CONTENT; + case E_PeersTimeOfDay: + return detail::TIME_OF_DAY; + case E_PeersTimeOfWeek: + return detail::TIME_OF_WEEK; } LOG_ERROR("Unexpected function = " << static_cast(function)); return detail::UNEXPECTED_FUNCTION; } -std::string print(EFunction function) -{ - switch (function) - { - case E_IndividualCount: return "individual count"; - case E_IndividualNonZeroCount: return "individual non-zero count"; - case E_IndividualRareCount: return "individual rare count"; - case E_IndividualRareNonZeroCount: return "individual rare non-zero count"; - case E_IndividualRare: return "individual rare"; - case E_IndividualLowCounts: return "individual low counts"; - case E_IndividualHighCounts: return "individual high counts"; - case E_IndividualLowNonZeroCount: return "individual low non-zero count"; - case E_IndividualHighNonZeroCount: return "individual high non-zero count"; - case E_IndividualDistinctCount: return "individual distinct count"; - case E_IndividualLowDistinctCount: return "individual low distinct count"; - case E_IndividualHighDistinctCount: return "individual high distinct count"; - case E_IndividualInfoContent: return "individual info_content"; - case E_IndividualHighInfoContent: return "individual high_info_content"; - case E_IndividualLowInfoContent: return "individual low_info_content"; - case E_IndividualTimeOfDay: return "individual time-of-day"; - case E_IndividualTimeOfWeek: return "individual time-of-week"; - case E_IndividualMetric: return "individual metric"; - case E_IndividualMetricMean: return "individual metric mean"; - case E_IndividualMetricLowMean: return "individual metric low mean"; - case E_IndividualMetricHighMean: return "individual metric high mean"; - case E_IndividualMetricMedian: return "individual metric median"; - case E_IndividualMetricLowMedian: return "individual metric low median"; - case E_IndividualMetricHighMedian: return "individual metric high median"; - case E_IndividualMetricMin: return "individual metric minimum"; - case E_IndividualMetricMax: return "individual metric maximum"; - case E_IndividualMetricVariance: return "individual metric variance"; - case E_IndividualMetricLowVariance: return "individual metric low variance"; - case E_IndividualMetricHighVariance: return "individual metric high variance"; - case E_IndividualMetricSum: return "individual metric sum"; - case E_IndividualMetricLowSum: return "individual metric low sum"; - case E_IndividualMetricHighSum: return "individual metric high sum"; - case E_IndividualMetricNonNullSum: return "individual metric non-null sum"; - case E_IndividualMetricLowNonNullSum: return "individual metric low non-null sum"; - case E_IndividualMetricHighNonNullSum: return "individual high non-null sum"; - case E_IndividualLatLong: return "individual latitude/longitude"; - case E_IndividualMaxVelocity: return "individual max velocity"; - case E_IndividualMinVelocity: return "individual min velocity"; - case E_IndividualMeanVelocity: return "individual mean velocity"; - case E_IndividualSumVelocity: return "individual sum velocity"; - case E_PopulationCount: return "population count"; - case E_PopulationDistinctCount: return "population distinct count"; - case E_PopulationLowDistinctCount: return "population low distinct count"; - case E_PopulationHighDistinctCount: return "population high distinct count"; - case E_PopulationRare: return "population rare"; - case E_PopulationRareCount: return "population rare count"; - case E_PopulationFreqRare: return "population frequent rare"; - case E_PopulationFreqRareCount: return "population frequent rare count"; - case E_PopulationLowCounts: return "population low count"; - case E_PopulationHighCounts: return "population high count"; - case E_PopulationInfoContent: return "population information content"; - case E_PopulationLowInfoContent: return "population low information content"; - case E_PopulationHighInfoContent: return "population high information content"; - case E_PopulationTimeOfDay: return "population time-of-day"; - case E_PopulationTimeOfWeek: return "population time-of-week"; - case E_PopulationMetric: return "population metric"; - case E_PopulationMetricMean: return "population metric mean"; - case E_PopulationMetricLowMean: return "population metric low mean"; - case E_PopulationMetricHighMean: return "population metric high mean"; - case E_PopulationMetricMedian: return "population metric median"; - case E_PopulationMetricLowMedian: return "population metric low median"; - case E_PopulationMetricHighMedian: return "population metric high median"; - case E_PopulationMetricMin: return "population metric minimum"; - case E_PopulationMetricMax: return "population metric maximum"; - case E_PopulationMetricVariance: return "population metric variance"; - case E_PopulationMetricLowVariance: return "population metric low variance"; - case E_PopulationMetricHighVariance: return "population metric high variance"; - case E_PopulationMetricSum: return "population metric sum"; - case E_PopulationMetricLowSum: return "population metric low sum"; - case E_PopulationMetricHighSum: return "population metric high sum"; - case E_PopulationLatLong: return "population latitude/longitude"; - case E_PopulationMaxVelocity: return "population max velocity"; - case E_PopulationMinVelocity: return "population min velocity"; - case E_PopulationMeanVelocity: return "population mean velocity"; - case E_PopulationSumVelocity: return "population sum velocity"; - case E_PeersCount: return "peers count"; - case E_PeersLowCounts: return "peers low count"; - case E_PeersHighCounts: return "peers high count"; - case E_PeersDistinctCount: return "peers distinct count"; - case E_PeersLowDistinctCount: return "peers low distinct count"; - case E_PeersHighDistinctCount: return "peers high distinct count"; - case E_PeersInfoContent: return "peers information content"; - case E_PeersLowInfoContent: return "peers low information content"; - case E_PeersHighInfoContent: return "peers high information content"; - case E_PeersTimeOfDay: return "peers time-of-day"; - case E_PeersTimeOfWeek: return "peers time-of-week"; +std::string print(EFunction function) { + switch (function) { + case E_IndividualCount: + return "individual count"; + case E_IndividualNonZeroCount: + return "individual non-zero count"; + case E_IndividualRareCount: + return "individual rare count"; + case E_IndividualRareNonZeroCount: + return "individual rare non-zero count"; + case E_IndividualRare: + return "individual rare"; + case E_IndividualLowCounts: + return "individual low counts"; + case E_IndividualHighCounts: + return "individual high counts"; + case E_IndividualLowNonZeroCount: + return "individual low non-zero count"; + case E_IndividualHighNonZeroCount: + return "individual high non-zero count"; + case E_IndividualDistinctCount: + return "individual distinct count"; + case E_IndividualLowDistinctCount: + return "individual low distinct count"; + case E_IndividualHighDistinctCount: + return "individual high distinct count"; + case E_IndividualInfoContent: + return "individual info_content"; + case E_IndividualHighInfoContent: + return "individual high_info_content"; + case E_IndividualLowInfoContent: + return "individual low_info_content"; + case E_IndividualTimeOfDay: + return "individual time-of-day"; + case E_IndividualTimeOfWeek: + return "individual time-of-week"; + case E_IndividualMetric: + return "individual metric"; + case E_IndividualMetricMean: + return "individual metric mean"; + case E_IndividualMetricLowMean: + return "individual metric low mean"; + case E_IndividualMetricHighMean: + return "individual metric high mean"; + case E_IndividualMetricMedian: + return "individual metric median"; + case E_IndividualMetricLowMedian: + return "individual metric low median"; + case E_IndividualMetricHighMedian: + return "individual metric high median"; + case E_IndividualMetricMin: + return "individual metric minimum"; + case E_IndividualMetricMax: + return "individual metric maximum"; + case E_IndividualMetricVariance: + return "individual metric variance"; + case E_IndividualMetricLowVariance: + return "individual metric low variance"; + case E_IndividualMetricHighVariance: + return "individual metric high variance"; + case E_IndividualMetricSum: + return "individual metric sum"; + case E_IndividualMetricLowSum: + return "individual metric low sum"; + case E_IndividualMetricHighSum: + return "individual metric high sum"; + case E_IndividualMetricNonNullSum: + return "individual metric non-null sum"; + case E_IndividualMetricLowNonNullSum: + return "individual metric low non-null sum"; + case E_IndividualMetricHighNonNullSum: + return "individual high non-null sum"; + case E_IndividualLatLong: + return "individual latitude/longitude"; + case E_IndividualMaxVelocity: + return "individual max velocity"; + case E_IndividualMinVelocity: + return "individual min velocity"; + case E_IndividualMeanVelocity: + return "individual mean velocity"; + case E_IndividualSumVelocity: + return "individual sum velocity"; + case E_PopulationCount: + return "population count"; + case E_PopulationDistinctCount: + return "population distinct count"; + case E_PopulationLowDistinctCount: + return "population low distinct count"; + case E_PopulationHighDistinctCount: + return "population high distinct count"; + case E_PopulationRare: + return "population rare"; + case E_PopulationRareCount: + return "population rare count"; + case E_PopulationFreqRare: + return "population frequent rare"; + case E_PopulationFreqRareCount: + return "population frequent rare count"; + case E_PopulationLowCounts: + return "population low count"; + case E_PopulationHighCounts: + return "population high count"; + case E_PopulationInfoContent: + return "population information content"; + case E_PopulationLowInfoContent: + return "population low information content"; + case E_PopulationHighInfoContent: + return "population high information content"; + case E_PopulationTimeOfDay: + return "population time-of-day"; + case E_PopulationTimeOfWeek: + return "population time-of-week"; + case E_PopulationMetric: + return "population metric"; + case E_PopulationMetricMean: + return "population metric mean"; + case E_PopulationMetricLowMean: + return "population metric low mean"; + case E_PopulationMetricHighMean: + return "population metric high mean"; + case E_PopulationMetricMedian: + return "population metric median"; + case E_PopulationMetricLowMedian: + return "population metric low median"; + case E_PopulationMetricHighMedian: + return "population metric high median"; + case E_PopulationMetricMin: + return "population metric minimum"; + case E_PopulationMetricMax: + return "population metric maximum"; + case E_PopulationMetricVariance: + return "population metric variance"; + case E_PopulationMetricLowVariance: + return "population metric low variance"; + case E_PopulationMetricHighVariance: + return "population metric high variance"; + case E_PopulationMetricSum: + return "population metric sum"; + case E_PopulationMetricLowSum: + return "population metric low sum"; + case E_PopulationMetricHighSum: + return "population metric high sum"; + case E_PopulationLatLong: + return "population latitude/longitude"; + case E_PopulationMaxVelocity: + return "population max velocity"; + case E_PopulationMinVelocity: + return "population min velocity"; + case E_PopulationMeanVelocity: + return "population mean velocity"; + case E_PopulationSumVelocity: + return "population sum velocity"; + case E_PeersCount: + return "peers count"; + case E_PeersLowCounts: + return "peers low count"; + case E_PeersHighCounts: + return "peers high count"; + case E_PeersDistinctCount: + return "peers distinct count"; + case E_PeersLowDistinctCount: + return "peers low distinct count"; + case E_PeersHighDistinctCount: + return "peers high distinct count"; + case E_PeersInfoContent: + return "peers information content"; + case E_PeersLowInfoContent: + return "peers low information content"; + case E_PeersHighInfoContent: + return "peers high information content"; + case E_PeersTimeOfDay: + return "peers time-of-day"; + case E_PeersTimeOfWeek: + return "peers time-of-week"; } LOG_ERROR("Unexpected function = " << static_cast(function)); return "-"; } -std::ostream &operator<<(std::ostream &o, EFunction function) -{ +std::ostream& operator<<(std::ostream& o, EFunction function) { return o << print(function); } - } } } - diff --git a/lib/model/ModelTypes.cc b/lib/model/ModelTypes.cc index add961660f..e078320cd2 100644 --- a/lib/model/ModelTypes.cc +++ b/lib/model/ModelTypes.cc @@ -18,39 +18,33 @@ #include -namespace ml -{ -namespace model_t -{ +namespace ml { +namespace model_t { -namespace -{ +namespace { //! Compute x^4. -double pow4(double x) -{ +double pow4(double x) { double x2 = x * x; return x2 * x2; } - } -std::string print(EModelType type) -{ - switch (type) - { - case E_Counting: return "'counting'"; - case E_EventRateOnline: return "'online event rate'"; - case E_MetricOnline: return "'online metric'"; +std::string print(EModelType type) { + switch (type) { + case E_Counting: + return "'counting'"; + case E_EventRateOnline: + return "'online event rate'"; + case E_MetricOnline: + return "'online metric'"; } return "-"; } MODEL_EXPORT -std::size_t dimension(EFeature feature) -{ - switch (feature) - { +std::size_t dimension(EFeature feature) { + switch (feature) { CASE_INDIVIDUAL_COUNT: return 1; @@ -112,11 +106,8 @@ std::size_t dimension(EFeature feature) return 1; } -TDouble2Vec1Vec stripExtraStatistics(EFeature feature, - const TDouble2Vec1Vec &values) -{ - switch (feature) - { +TDouble2Vec1Vec stripExtraStatistics(EFeature feature, const TDouble2Vec1Vec& values) { + switch (feature) { CASE_INDIVIDUAL_COUNT: return values; @@ -142,12 +133,10 @@ TDouble2Vec1Vec stripExtraStatistics(EFeature feature, return values; case E_IndividualVarianceByPerson: case E_IndividualLowVarianceByPerson: - case E_IndividualHighVarianceByPerson: - { + case E_IndividualHighVarianceByPerson: { TDouble2Vec1Vec result; result.reserve(values.size()); - for (const auto &value : values) - { + for (const auto& value : values) { result.push_back(TDouble2Vec(value.begin(), value.begin() + value.size() / 2)); } return result; @@ -175,12 +164,10 @@ TDouble2Vec1Vec stripExtraStatistics(EFeature feature, return values; case E_PopulationVarianceByPersonAndAttribute: case E_PopulationLowVarianceByPersonAndAttribute: - case E_PopulationHighVarianceByPersonAndAttribute: - { + case E_PopulationHighVarianceByPersonAndAttribute: { TDouble2Vec1Vec result; result.reserve(values.size()); - for (const auto &value : values) - { + for (const auto& value : values) { result.push_back(TDouble2Vec(value.begin(), value.begin() + value.size() / 2)); } return result; @@ -194,10 +181,8 @@ TDouble2Vec1Vec stripExtraStatistics(EFeature feature, return values; } -bool isCategorical(EFeature feature) -{ - switch (feature) - { +bool isCategorical(EFeature feature) { + switch (feature) { case E_IndividualTotalBucketCountByPerson: return true; case E_IndividualCountByBucketAndPerson: @@ -249,10 +234,8 @@ bool isCategorical(EFeature feature) return false; } -bool isDiurnal(EFeature feature) -{ - switch (feature) - { +bool isDiurnal(EFeature feature) { + switch (feature) { case E_IndividualCountByBucketAndPerson: case E_IndividualNonZeroCountByBucketAndPerson: case E_IndividualTotalBucketCountByPerson: @@ -320,10 +303,8 @@ bool isDiurnal(EFeature feature) return false; } -bool isLatLong(EFeature feature) -{ - switch (feature) - { +bool isLatLong(EFeature feature) { + switch (feature) { CASE_INDIVIDUAL_COUNT: return false; @@ -385,10 +366,8 @@ bool isLatLong(EFeature feature) return false; } -bool isConstant(EFeature feature) -{ - switch (feature) - { +bool isConstant(EFeature feature) { + switch (feature) { case E_IndividualIndicatorOfBucketPerson: return true; case E_IndividualCountByBucketAndPerson: @@ -440,61 +419,44 @@ bool isConstant(EFeature feature) return false; } -bool isMeanFeature(EFeature feature) -{ +bool isMeanFeature(EFeature feature) { EMetricCategory category; - return metricCategory(feature, category) - && (category == E_Mean || category == E_MultivariateMean); + return metricCategory(feature, category) && (category == E_Mean || category == E_MultivariateMean); } -bool isMedianFeature(EFeature feature) -{ +bool isMedianFeature(EFeature feature) { EMetricCategory category; - return metricCategory(feature, category) - && (category == E_Median); + return metricCategory(feature, category) && (category == E_Median); } -bool isMinFeature(EFeature feature) -{ +bool isMinFeature(EFeature feature) { EMetricCategory category; - return metricCategory(feature, category) - && (category == E_Min || category == E_MultivariateMin); + return metricCategory(feature, category) && (category == E_Min || category == E_MultivariateMin); } -bool isMaxFeature(EFeature feature) -{ +bool isMaxFeature(EFeature feature) { EMetricCategory category; - return metricCategory(feature, category) - && (category == E_Max || category == E_MultivariateMax); + return metricCategory(feature, category) && (category == E_Max || category == E_MultivariateMax); } -bool isVarianceFeature(EFeature feature) -{ +bool isVarianceFeature(EFeature feature) { EMetricCategory category; - return metricCategory(feature, category) - && (category == E_Variance); + return metricCategory(feature, category) && (category == E_Variance); } -bool isSumFeature(EFeature feature) -{ +bool isSumFeature(EFeature feature) { EMetricCategory category; return metricCategory(feature, category) && category == E_Sum; } -double varianceScale(EFeature feature, - double sampleCount, - double count) -{ - return isMeanFeature(feature) - || isMedianFeature(feature) - || isVarianceFeature(feature) ? - (sampleCount > 0.0 && count > 0.0 ? sampleCount / count : 1.0) : 1.0; +double varianceScale(EFeature feature, double sampleCount, double count) { + return isMeanFeature(feature) || isMedianFeature(feature) || isVarianceFeature(feature) + ? (sampleCount > 0.0 && count > 0.0 ? sampleCount / count : 1.0) + : 1.0; } -bool isSampled(EFeature feature) -{ - switch (feature) - { +bool isSampled(EFeature feature) { + switch (feature) { CASE_INDIVIDUAL_COUNT: return false; @@ -567,10 +529,8 @@ bool isSampled(EFeature feature) return false; } -unsigned minimumSampleCount(EFeature feature) -{ - switch (feature) - { +unsigned minimumSampleCount(EFeature feature) { + switch (feature) { CASE_INDIVIDUAL_COUNT: return 1; @@ -595,7 +555,7 @@ unsigned minimumSampleCount(EFeature feature) case E_IndividualHighMedianByPerson: return 1; - // Population variance needs a minimum population size + // Population variance needs a minimum population size case E_IndividualVarianceByPerson: case E_IndividualLowVarianceByPerson: case E_IndividualHighVarianceByPerson: @@ -622,7 +582,7 @@ unsigned minimumSampleCount(EFeature feature) case E_PopulationSumVelocityByPersonAndAttribute: return 1; - // Population variance needs a minimum population size + // Population variance needs a minimum population size case E_PopulationVarianceByPersonAndAttribute: case E_PopulationLowVarianceByPersonAndAttribute: case E_PopulationHighVarianceByPersonAndAttribute: @@ -637,10 +597,8 @@ unsigned minimumSampleCount(EFeature feature) return 1; } -double offsetCountToZero(EFeature feature, double count) -{ - switch (feature) - { +double offsetCountToZero(EFeature feature, double count) { + switch (feature) { case E_IndividualNonZeroCountByBucketAndPerson: case E_IndividualTotalBucketCountByPerson: case E_IndividualIndicatorOfBucketPerson: @@ -681,18 +639,14 @@ double offsetCountToZero(EFeature feature, double count) return count; } -void offsetCountToZero(EFeature feature, TDouble1Vec &count) -{ - for (std::size_t i = 0u; i < count.size(); ++i) - { +void offsetCountToZero(EFeature feature, TDouble1Vec& count) { + for (std::size_t i = 0u; i < count.size(); ++i) { count[i] = offsetCountToZero(feature, count[i]); } } -double inverseOffsetCountToZero(EFeature feature, double count) -{ - switch (feature) - { +double inverseOffsetCountToZero(EFeature feature, double count) { + switch (feature) { case E_IndividualNonZeroCountByBucketAndPerson: case E_IndividualTotalBucketCountByPerson: case E_IndividualIndicatorOfBucketPerson: @@ -733,18 +687,14 @@ double inverseOffsetCountToZero(EFeature feature, double count) return count; } -void inverseOffsetCountToZero(EFeature feature, TDouble1Vec &count) -{ - for (std::size_t i = 0u; i < count.size(); ++i) - { +void inverseOffsetCountToZero(EFeature feature, TDouble1Vec& count) { + for (std::size_t i = 0u; i < count.size(); ++i) { count[i] = inverseOffsetCountToZero(feature, count[i]); } } -bool countsEmptyBuckets(EFeature feature) -{ - switch (feature) - { +bool countsEmptyBuckets(EFeature feature) { + switch (feature) { case E_IndividualCountByBucketAndPerson: case E_IndividualLowCountsByBucketAndPerson: case E_IndividualHighCountsByBucketAndPerson: @@ -801,20 +751,15 @@ bool countsEmptyBuckets(EFeature feature) return false; } -double emptyBucketCountWeight(EFeature feature, double frequency, double cutoff) -{ - if (countsEmptyBuckets(feature) && cutoff > 0.0) - { +double emptyBucketCountWeight(EFeature feature, double frequency, double cutoff) { + if (countsEmptyBuckets(feature) && cutoff > 0.0) { static const double M = 1.001; static const double C = 0.025; static const double K = std::log((M + 1.0) / (M - 1.0)) / C; double df = frequency - std::min(cutoff + C, 1.0); - if (df < -C) - { + if (df < -C) { return 0.0; - } - else if (df < C) - { + } else if (df < C) { double fa = std::exp(K * df); return 0.5 * (1.0 + M * (fa - 1.0) / (fa + 1.0)); } @@ -822,15 +767,12 @@ double emptyBucketCountWeight(EFeature feature, double frequency, double cutoff) return 1.0; } -double learnRate(EFeature feature, const model::SModelParams ¶ms) -{ +double learnRate(EFeature feature, const model::SModelParams& params) { return isDiurnal(feature) || isLatLong(feature) ? 1.0 : params.s_LearnRate; } -maths_t::EProbabilityCalculation probabilityCalculation(EFeature feature) -{ - switch (feature) - { +maths_t::EProbabilityCalculation probabilityCalculation(EFeature feature) { + switch (feature) { case E_IndividualCountByBucketAndPerson: case E_IndividualNonZeroCountByBucketAndPerson: case E_IndividualTotalBucketCountByPerson: @@ -954,13 +896,8 @@ maths_t::EProbabilityCalculation probabilityCalculation(EFeature feature) return maths_t::E_TwoSided; } -core_t::TTime sampleTime(EFeature feature, - core_t::TTime bucketStartTime, - core_t::TTime bucketLength, - core_t::TTime time) -{ - switch (feature) - { +core_t::TTime sampleTime(EFeature feature, core_t::TTime bucketStartTime, core_t::TTime bucketLength, core_t::TTime time) { + switch (feature) { CASE_INDIVIDUAL_COUNT: return bucketStartTime + bucketLength / 2; @@ -1033,15 +970,13 @@ core_t::TTime sampleTime(EFeature feature, return bucketStartTime + bucketLength / 2; } -TDouble1VecDouble1VecPr support(EFeature feature) -{ +TDouble1VecDouble1VecPr support(EFeature feature) { static const double MIN_DOUBLE = -std::numeric_limits::max(); - static const double MAX_DOUBLE = std::numeric_limits::max(); + static const double MAX_DOUBLE = std::numeric_limits::max(); std::size_t d = dimension(feature); - switch (feature) - { + switch (feature) { case E_IndividualCountByBucketAndPerson: case E_IndividualNonZeroCountByBucketAndPerson: case E_IndividualTotalBucketCountByPerson: @@ -1089,7 +1024,7 @@ TDouble1VecDouble1VecPr support(EFeature feature) case E_IndividualHighVarianceByPerson: return {TDouble1Vec(d, 0.0), TDouble1Vec(d, MAX_DOUBLE)}; case E_IndividualMeanLatLongByPerson: - return {TDouble1Vec(d, -180.0), TDouble1Vec(d, 180.0)}; + return {TDouble1Vec(d, -180.0), TDouble1Vec(d, 180.0)}; case E_PopulationAttributeTotalCountByPerson: case E_PopulationCountByBucketPersonAndAttribute: @@ -1105,8 +1040,7 @@ TDouble1VecDouble1VecPr support(EFeature feature) case E_PopulationHighInfoContentByBucketPersonAndAttribute: return {TDouble1Vec(d, 0.0), TDouble1Vec(d, MAX_DOUBLE)}; case E_PopulationTimeOfDayByBucketPersonAndAttribute: - return {TDouble1Vec(d, 0.0), - TDouble1Vec(d, static_cast(core::constants::DAY))}; + return {TDouble1Vec(d, 0.0), TDouble1Vec(d, static_cast(core::constants::DAY))}; case E_PopulationTimeOfWeekByBucketPersonAndAttribute: return {TDouble1Vec(d, 0.0), TDouble1Vec(d, static_cast(core::constants::WEEK))}; @@ -1131,7 +1065,7 @@ TDouble1VecDouble1VecPr support(EFeature feature) case E_PopulationHighVarianceByPersonAndAttribute: return {TDouble1Vec(d, 0.0), TDouble1Vec(d, MAX_DOUBLE)}; case E_PopulationMeanLatLongByPersonAndAttribute: - return {TDouble1Vec(d, -180.0), TDouble1Vec(d, 180.0)}; + return {TDouble1Vec(d, -180.0), TDouble1Vec(d, 180.0)}; CASE_PEERS_COUNT: return {TDouble1Vec(d, 0.0), TDouble1Vec(d, MAX_DOUBLE)}; @@ -1151,10 +1085,7 @@ TDouble1VecDouble1VecPr support(EFeature feature) return {TDouble1Vec(d, MIN_DOUBLE), TDouble1Vec(d, MAX_DOUBLE)}; } -double adjustProbability(EFeature feature, - core_t::TTime elapsedTime, - double probability) -{ +double adjustProbability(EFeature feature, core_t::TTime elapsedTime, double probability) { // For the time of week calculation we assume that the // probability of less likely samples depends on whether // the value belongs to a cluster that hasn't yet been @@ -1168,8 +1099,7 @@ double adjustProbability(EFeature feature, // decays exponentially over time. double pNewCluster = 0.0; - switch (feature) - { + switch (feature) { case E_IndividualCountByBucketAndPerson: case E_IndividualNonZeroCountByBucketAndPerson: case E_IndividualTotalBucketCountByPerson: @@ -1189,12 +1119,10 @@ double adjustProbability(EFeature feature, case E_IndividualHighInfoContentByBucketAndPerson: break; case E_IndividualTimeOfDayByBucketAndPerson: - pNewCluster = std::exp(-pow4( static_cast(elapsedTime) - / static_cast(core::constants::DAY))); + pNewCluster = std::exp(-pow4(static_cast(elapsedTime) / static_cast(core::constants::DAY))); break; case E_IndividualTimeOfWeekByBucketAndPerson: - pNewCluster = std::exp(-pow4( static_cast(elapsedTime) - / static_cast(core::constants::WEEK))); + pNewCluster = std::exp(-pow4(static_cast(elapsedTime) / static_cast(core::constants::WEEK))); break; CASE_INDIVIDUAL_METRIC: @@ -1214,12 +1142,10 @@ double adjustProbability(EFeature feature, case E_PopulationHighInfoContentByBucketPersonAndAttribute: break; case E_PopulationTimeOfDayByBucketPersonAndAttribute: - pNewCluster = std::exp(-pow4( static_cast(elapsedTime) - / static_cast(core::constants::DAY))); + pNewCluster = std::exp(-pow4(static_cast(elapsedTime) / static_cast(core::constants::DAY))); break; case E_PopulationTimeOfWeekByBucketPersonAndAttribute: - pNewCluster = std::exp(-pow4( static_cast(elapsedTime) - / static_cast(core::constants::WEEK))); + pNewCluster = std::exp(-pow4(static_cast(elapsedTime) / static_cast(core::constants::WEEK))); break; CASE_POPULATION_METRIC: @@ -1237,12 +1163,10 @@ double adjustProbability(EFeature feature, case E_PeersHighInfoContentByBucketPersonAndAttribute: break; case E_PeersTimeOfDayByBucketPersonAndAttribute: - pNewCluster = std::exp(-pow4( static_cast(elapsedTime) - / static_cast(core::constants::DAY))); + pNewCluster = std::exp(-pow4(static_cast(elapsedTime) / static_cast(core::constants::DAY))); break; case E_PeersTimeOfWeekByBucketPersonAndAttribute: - pNewCluster = std::exp(-pow4( static_cast(elapsedTime) - / static_cast(core::constants::WEEK))); + pNewCluster = std::exp(-pow4(static_cast(elapsedTime) / static_cast(core::constants::WEEK))); break; CASE_PEERS_METRIC: @@ -1252,10 +1176,8 @@ double adjustProbability(EFeature feature, return pNewCluster + probability * (1.0 - pNewCluster); } -TInfluenceCalculatorCPtr influenceCalculator(EFeature feature) -{ - switch (feature) - { +TInfluenceCalculatorCPtr influenceCalculator(EFeature feature) { + switch (feature) { case E_IndividualCountByBucketAndPerson: case E_IndividualNonZeroCountByBucketAndPerson: case E_IndividualHighCountsByBucketAndPerson: @@ -1390,10 +1312,8 @@ TInfluenceCalculatorCPtr influenceCalculator(EFeature feature) return TInfluenceCalculatorCPtr(); } -bool requiresInterimResultAdjustment(EFeature feature) -{ - switch (feature) - { +bool requiresInterimResultAdjustment(EFeature feature) { + switch (feature) { case E_IndividualCountByBucketAndPerson: case E_IndividualNonZeroCountByBucketAndPerson: case E_IndividualHighCountsByBucketAndPerson: @@ -1517,10 +1437,8 @@ bool requiresInterimResultAdjustment(EFeature feature) return false; } -const std::string &outputFunctionName(EFeature feature) -{ - switch (feature) - { +const std::string& outputFunctionName(EFeature feature) { + switch (feature) { // Individual event rate features case E_IndividualCountByBucketAndPerson: case E_IndividualNonZeroCountByBucketAndPerson: @@ -1668,10 +1586,8 @@ const std::string &outputFunctionName(EFeature feature) return model::CAnomalyDetector::COUNT_NAME; } -std::string print(EFeature feature) -{ - switch (feature) - { +std::string print(EFeature feature) { + switch (feature) { case E_IndividualCountByBucketAndPerson: return "'count per bucket by person'"; case E_IndividualNonZeroCountByBucketAndPerson: @@ -1867,10 +1783,8 @@ std::string print(EFeature feature) return "-"; } -bool metricCategory(EFeature feature, EMetricCategory &result) -{ - switch (feature) - { +bool metricCategory(EFeature feature, EMetricCategory& result) { + switch (feature) { CASE_INDIVIDUAL_COUNT: return false; @@ -1878,13 +1792,16 @@ bool metricCategory(EFeature feature, EMetricCategory &result) case E_IndividualLowMeanByPerson: case E_IndividualHighMeanByPerson: case E_IndividualMeanVelocityByPerson: - result = E_Mean; return true; + result = E_Mean; + return true; case E_IndividualMinByPerson: case E_IndividualMinVelocityByPerson: - result = E_Min; return true; + result = E_Min; + return true; case E_IndividualMaxByPerson: case E_IndividualMaxVelocityByPerson: - result = E_Max; return true; + result = E_Max; + return true; case E_IndividualSumByBucketAndPerson: case E_IndividualLowSumByBucketAndPerson: case E_IndividualHighSumByBucketAndPerson: @@ -1892,17 +1809,21 @@ bool metricCategory(EFeature feature, EMetricCategory &result) case E_IndividualLowNonNullSumByBucketAndPerson: case E_IndividualHighNonNullSumByBucketAndPerson: case E_IndividualSumVelocityByPerson: - result = E_Sum; return true; + result = E_Sum; + return true; case E_IndividualMeanLatLongByPerson: - result = E_MultivariateMean; return true; + result = E_MultivariateMean; + return true; case E_IndividualMedianByPerson: case E_IndividualLowMedianByPerson: case E_IndividualHighMedianByPerson: - result = E_Median; return true; + result = E_Median; + return true; case E_IndividualVarianceByPerson: case E_IndividualLowVarianceByPerson: case E_IndividualHighVarianceByPerson: - result = E_Variance; return true; + result = E_Variance; + return true; CASE_POPULATION_COUNT: return false; @@ -1911,28 +1832,35 @@ bool metricCategory(EFeature feature, EMetricCategory &result) case E_PopulationLowMeanByPersonAndAttribute: case E_PopulationHighMeanByPersonAndAttribute: case E_PopulationMeanVelocityByPersonAndAttribute: - result = E_Mean; return true; + result = E_Mean; + return true; case E_PopulationMinByPersonAndAttribute: case E_PopulationMinVelocityByPersonAndAttribute: - result = E_Min; return true; + result = E_Min; + return true; case E_PopulationMaxByPersonAndAttribute: case E_PopulationMaxVelocityByPersonAndAttribute: - result = E_Max; return true; + result = E_Max; + return true; case E_PopulationMeanLatLongByPersonAndAttribute: - result = E_MultivariateMean; return true; + result = E_MultivariateMean; + return true; case E_PopulationSumByBucketPersonAndAttribute: case E_PopulationLowSumByBucketPersonAndAttribute: case E_PopulationHighSumByBucketPersonAndAttribute: case E_PopulationSumVelocityByPersonAndAttribute: - result = E_Sum; return true; + result = E_Sum; + return true; case E_PopulationMedianByPersonAndAttribute: case E_PopulationLowMedianByPersonAndAttribute: case E_PopulationHighMedianByPersonAndAttribute: - result = E_Median; return true; + result = E_Median; + return true; case E_PopulationVarianceByPersonAndAttribute: case E_PopulationLowVarianceByPersonAndAttribute: case E_PopulationHighVarianceByPersonAndAttribute: - result = E_Variance; return true; + result = E_Variance; + return true; CASE_PEERS_COUNT: return false; @@ -1940,55 +1868,67 @@ bool metricCategory(EFeature feature, EMetricCategory &result) case E_PeersMeanByPersonAndAttribute: case E_PeersLowMeanByPersonAndAttribute: case E_PeersHighMeanByPersonAndAttribute: - result = E_Mean; return true; + result = E_Mean; + return true; case E_PeersMinByPersonAndAttribute: - result = E_Min; return true; + result = E_Min; + return true; case E_PeersMaxByPersonAndAttribute: - result = E_Max; return true; + result = E_Max; + return true; case E_PeersSumByBucketPersonAndAttribute: case E_PeersLowSumByBucketPersonAndAttribute: case E_PeersHighSumByBucketPersonAndAttribute: - result = E_Sum; return true; + result = E_Sum; + return true; case E_PeersMedianByPersonAndAttribute: - result = E_Median; return true; + result = E_Median; + return true; } return false; } -std::string print(EMetricCategory category) -{ - switch (category) - { - case E_Mean: return "'mean'"; - case E_Min: return "'minimum'"; - case E_Max: return "'maximum'"; - case E_Sum: return "'sum'"; - case E_MultivariateMean: return "'multivariate mean'"; - case E_MultivariateMin: return "'multivariate minimum'"; - case E_MultivariateMax: return "'multivariate maximum'"; - case E_Median: return "'median'"; - case E_Variance: return "'variance'"; +std::string print(EMetricCategory category) { + switch (category) { + case E_Mean: + return "'mean'"; + case E_Min: + return "'minimum'"; + case E_Max: + return "'maximum'"; + case E_Sum: + return "'sum'"; + case E_MultivariateMean: + return "'multivariate mean'"; + case E_MultivariateMin: + return "'multivariate minimum'"; + case E_MultivariateMax: + return "'multivariate maximum'"; + case E_Median: + return "'median'"; + case E_Variance: + return "'variance'"; } return "-"; } -std::string print(EEventRateCategory category) -{ - switch (category) - { - case E_MeanArrivalTimes: return "'mean arrival times'"; - case E_AttributePeople: return "'attributes' people'"; - case E_UniqueValues: return "'unique values'"; - case E_DiurnalTimes: return "'time-of-day values'"; +std::string print(EEventRateCategory category) { + switch (category) { + case E_MeanArrivalTimes: + return "'mean arrival times'"; + case E_AttributePeople: + return "'attributes' people'"; + case E_UniqueValues: + return "'unique values'"; + case E_DiurnalTimes: + return "'time-of-day values'"; } return "-"; } -EAnalysisCategory analysisCategory(EFeature feature) -{ - switch (feature) - { +EAnalysisCategory analysisCategory(EFeature feature) { + switch (feature) { CASE_INDIVIDUAL_COUNT: return E_EventRate; @@ -2011,31 +1951,34 @@ EAnalysisCategory analysisCategory(EFeature feature) return E_EventRate; } -std::string print(EAnalysisCategory category) -{ - switch (category) - { - case E_EventRate: return "'event rate'"; - case E_Metric: return "'metric'"; - case E_PopulationEventRate: return "'population event rate'"; - case E_PopulationMetric: return "'population metric'"; - case E_PeersEventRate: return "'peers event rate'"; - case E_PeersMetric: return "'peers metric'"; +std::string print(EAnalysisCategory category) { + switch (category) { + case E_EventRate: + return "'event rate'"; + case E_Metric: + return "'metric'"; + case E_PopulationEventRate: + return "'population event rate'"; + case E_PopulationMetric: + return "'population metric'"; + case E_PeersEventRate: + return "'peers event rate'"; + case E_PeersMetric: + return "'peers metric'"; } return "-"; } -std::string print(EMemoryStatus memoryStatus) -{ - switch (memoryStatus) - { - case E_MemoryStatusOk: return "ok"; - case E_MemoryStatusSoftLimit: return "soft_limit"; - case E_MemoryStatusHardLimit: return "hard_limit"; +std::string print(EMemoryStatus memoryStatus) { + switch (memoryStatus) { + case E_MemoryStatusOk: + return "ok"; + case E_MemoryStatusSoftLimit: + return "soft_limit"; + case E_MemoryStatusHardLimit: + return "hard_limit"; } return "-"; } - } } - diff --git a/lib/model/unittest/CAnnotatedProbabilityBuilderTest.cc b/lib/model/unittest/CAnnotatedProbabilityBuilderTest.cc index 1b85fec8c0..b9d5f098f3 100644 --- a/lib/model/unittest/CAnnotatedProbabilityBuilderTest.cc +++ b/lib/model/unittest/CAnnotatedProbabilityBuilderTest.cc @@ -15,12 +15,10 @@ #include - using namespace ml; using namespace model; -namespace -{ +namespace { using TDouble1Vec = core::CSmallVector; using TDouble4Vec = core::CSmallVector; using TDouble4Vec1Vec = core::CSmallVector; @@ -41,30 +39,19 @@ const core::CStoredStringPtr C4_PTR(CStringStore::names().get(C4)); const TStoredStringPtr1Vec NO_CORRELATED_ATTRIBUTES; const TSizeDoublePr1Vec NO_CORRELATES; -class CAnnotatedProbabilityBuilderForTest : public CAnnotatedProbabilityBuilder -{ - public: - CAnnotatedProbabilityBuilderForTest(SAnnotatedProbability &annotatedProbability) : - CAnnotatedProbabilityBuilder(annotatedProbability) - {} - - CAnnotatedProbabilityBuilderForTest(SAnnotatedProbability &annotatedProbability, - std::size_t numberAttributeProbabilities, - function_t::EFunction function, - std::size_t numberOfPeople) : - CAnnotatedProbabilityBuilder(annotatedProbability, - numberAttributeProbabilities, - function, - numberOfPeople) - {} - +class CAnnotatedProbabilityBuilderForTest : public CAnnotatedProbabilityBuilder { +public: + CAnnotatedProbabilityBuilderForTest(SAnnotatedProbability& annotatedProbability) : CAnnotatedProbabilityBuilder(annotatedProbability) {} + CAnnotatedProbabilityBuilderForTest(SAnnotatedProbability& annotatedProbability, + std::size_t numberAttributeProbabilities, + function_t::EFunction function, + std::size_t numberOfPeople) + : CAnnotatedProbabilityBuilder(annotatedProbability, numberAttributeProbabilities, function, numberOfPeople) {} }; - } -void CAnnotatedProbabilityBuilderTest::testProbability() -{ +void CAnnotatedProbabilityBuilderTest::testProbability() { SAnnotatedProbability result; CAnnotatedProbabilityBuilderForTest builder(result); @@ -75,8 +62,7 @@ void CAnnotatedProbabilityBuilderTest::testProbability() CPPUNIT_ASSERT_EQUAL(0.99, result.s_Probability); } -void CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenIndividualCount() -{ +void CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenIndividualCount() { SAnnotatedProbability result; CAnnotatedProbabilityBuilderForTest builder(result, 1, function_t::E_IndividualCount, 42); @@ -86,7 +72,8 @@ void CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenIndividua 0.68, model_t::CResultType::E_Unconditional, model_t::E_IndividualCountByBucketAndPerson, - NO_CORRELATED_ATTRIBUTES, NO_CORRELATES); + NO_CORRELATED_ATTRIBUTES, + NO_CORRELATES); builder.build(); CPPUNIT_ASSERT_EQUAL(std::size_t(1), result.s_AttributeProbabilities.size()); @@ -96,8 +83,7 @@ void CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenIndividua CPPUNIT_ASSERT(result.s_AttributeProbabilities[0].s_DescriptiveData.empty()); } -void CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenPopulationCount() -{ +void CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenPopulationCount() { SAnnotatedProbability result; CAnnotatedProbabilityBuilderForTest builder(result, 3, function_t::E_PopulationCount, 42); @@ -107,28 +93,32 @@ void CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenPopulatio 0.09, model_t::CResultType::E_Unconditional, model_t::E_PopulationCountByBucketPersonAndAttribute, - NO_CORRELATED_ATTRIBUTES, NO_CORRELATES); + NO_CORRELATED_ATTRIBUTES, + NO_CORRELATES); builder.addAttributeProbability(1, C1_PTR, 1.0, 0.05, model_t::CResultType::E_Unconditional, model_t::E_PopulationCountByBucketPersonAndAttribute, - NO_CORRELATED_ATTRIBUTES, NO_CORRELATES); + NO_CORRELATED_ATTRIBUTES, + NO_CORRELATES); builder.addAttributeProbability(2, C2_PTR, 1.0, 0.04, model_t::CResultType::E_Unconditional, model_t::E_PopulationCountByBucketPersonAndAttribute, - NO_CORRELATED_ATTRIBUTES, NO_CORRELATES); + NO_CORRELATED_ATTRIBUTES, + NO_CORRELATES); builder.addAttributeProbability(3, C3_PTR, 1.0, 0.06, model_t::CResultType::E_Unconditional, model_t::E_PopulationCountByBucketPersonAndAttribute, - NO_CORRELATED_ATTRIBUTES, NO_CORRELATES); + NO_CORRELATED_ATTRIBUTES, + NO_CORRELATES); builder.build(); CPPUNIT_ASSERT_EQUAL(std::size_t(2), result.s_AttributeProbabilities.size()); @@ -144,8 +134,7 @@ void CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenPopulatio CPPUNIT_ASSERT(result.s_AttributeProbabilities[1].s_DescriptiveData.empty()); } -void CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenIndividualRare() -{ +void CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenIndividualRare() { SAnnotatedProbability result; CAnnotatedProbabilityBuilderForTest builder(result, 1, function_t::E_IndividualRare, 42); @@ -155,26 +144,24 @@ void CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenIndividua 0.68, model_t::CResultType::E_Unconditional, model_t::E_IndividualIndicatorOfBucketPerson, - NO_CORRELATED_ATTRIBUTES, NO_CORRELATES); + NO_CORRELATED_ATTRIBUTES, + NO_CORRELATES); builder.build(); CPPUNIT_ASSERT_EQUAL(std::size_t(1), result.s_AttributeProbabilities.size()); CPPUNIT_ASSERT(result.s_AttributeProbabilities[0].s_DescriptiveData.empty()); } -void CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenPopulationRare() -{ +void CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenPopulationRare() { maths::CMultinomialConjugate attributePrior(maths::CMultinomialConjugate::nonInformativePrior(4u)); - for (std::size_t i = 1u; i <= 4u; ++i) - { + for (std::size_t i = 1u; i <= 4u; ++i) { TDouble1Vec samples(i, static_cast(i)); TDouble4Vec1Vec weights(i, maths::CConstantWeights::UNIT); attributePrior.addSamples(maths::CConstantWeights::COUNT, samples, weights); } maths::CMultinomialConjugate personAttributePrior(maths::CMultinomialConjugate::nonInformativePrior(4u)); - for (std::size_t i = 1u; i <= 4u; ++i) - { + for (std::size_t i = 1u; i <= 4u; ++i) { TDouble1Vec samples(2 * i, static_cast(i)); TDouble4Vec1Vec weights(2 * i, maths::CConstantWeights::UNIT); personAttributePrior.addSamples(maths::CConstantWeights::COUNT, samples, weights); @@ -191,28 +178,32 @@ void CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenPopulatio 0.02, model_t::CResultType::E_Unconditional, model_t::E_IndividualIndicatorOfBucketPerson, - NO_CORRELATED_ATTRIBUTES, NO_CORRELATES); + NO_CORRELATED_ATTRIBUTES, + NO_CORRELATES); builder.addAttributeProbability(2, C2_PTR, 0.06, 0.06, model_t::CResultType::E_Unconditional, model_t::E_IndividualIndicatorOfBucketPerson, - NO_CORRELATED_ATTRIBUTES, NO_CORRELATES); + NO_CORRELATED_ATTRIBUTES, + NO_CORRELATES); builder.addAttributeProbability(3, C3_PTR, 0.07, 0.01, model_t::CResultType::E_Unconditional, model_t::E_IndividualIndicatorOfBucketPerson, - NO_CORRELATED_ATTRIBUTES, NO_CORRELATES); + NO_CORRELATED_ATTRIBUTES, + NO_CORRELATES); builder.addAttributeProbability(4, C4_PTR, 0.03, 0.03, model_t::CResultType::E_Unconditional, model_t::E_IndividualIndicatorOfBucketPerson, - NO_CORRELATED_ATTRIBUTES, NO_CORRELATES); + NO_CORRELATED_ATTRIBUTES, + NO_CORRELATES); builder.build(); CPPUNIT_ASSERT_EQUAL(std::size_t(2), result.s_AttributeProbabilities.size()); @@ -244,19 +235,16 @@ void CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenPopulatio CPPUNIT_ASSERT_EQUAL(2.0, result.s_AttributeProbabilities[1].s_DescriptiveData[1].second); } -void CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenPopulationFreqRare() -{ +void CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenPopulationFreqRare() { maths::CMultinomialConjugate attributePrior(maths::CMultinomialConjugate::nonInformativePrior(4u)); - for (std::size_t i = 1u; i <= 4u; ++i) - { + for (std::size_t i = 1u; i <= 4u; ++i) { TDouble1Vec samples(i, static_cast(i)); TDouble4Vec1Vec weights(i, maths::CConstantWeights::UNIT); attributePrior.addSamples(maths::CConstantWeights::COUNT, samples, weights); } maths::CMultinomialConjugate personAttributePrior(maths::CMultinomialConjugate::nonInformativePrior(4u)); - for (std::size_t i = 1u; i <= 4u; ++i) - { + for (std::size_t i = 1u; i <= 4u; ++i) { TDouble1Vec samples(2 * i, static_cast(i)); TDouble4Vec1Vec weights(2 * i, maths::CConstantWeights::UNIT); personAttributePrior.addSamples(maths::CConstantWeights::COUNT, samples, weights); @@ -273,28 +261,32 @@ void CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenPopulatio 0.02, model_t::CResultType::E_Unconditional, model_t::E_IndividualIndicatorOfBucketPerson, - NO_CORRELATED_ATTRIBUTES, NO_CORRELATES); + NO_CORRELATED_ATTRIBUTES, + NO_CORRELATES); builder.addAttributeProbability(2, C2_PTR, 0.06, 0.06, model_t::CResultType::E_Unconditional, model_t::E_IndividualIndicatorOfBucketPerson, - NO_CORRELATED_ATTRIBUTES, NO_CORRELATES); + NO_CORRELATED_ATTRIBUTES, + NO_CORRELATES); builder.addAttributeProbability(3, C3_PTR, 0.07, 0.01, model_t::CResultType::E_Unconditional, model_t::E_IndividualIndicatorOfBucketPerson, - NO_CORRELATED_ATTRIBUTES, NO_CORRELATES); + NO_CORRELATED_ATTRIBUTES, + NO_CORRELATES); builder.addAttributeProbability(4, C4_PTR, 0.03, 0.03, model_t::CResultType::E_Unconditional, model_t::E_IndividualIndicatorOfBucketPerson, - NO_CORRELATED_ATTRIBUTES, NO_CORRELATES); + NO_CORRELATED_ATTRIBUTES, + NO_CORRELATES); builder.build(); CPPUNIT_ASSERT_EQUAL(std::size_t(2), result.s_AttributeProbabilities.size()); @@ -326,8 +318,7 @@ void CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenPopulatio CPPUNIT_ASSERT_EQUAL(2.0, result.s_AttributeProbabilities[1].s_DescriptiveData[1].second); } -void CAnnotatedProbabilityBuilderTest::testPersonFrequencyGivenIndividualCount() -{ +void CAnnotatedProbabilityBuilderTest::testPersonFrequencyGivenIndividualCount() { SAnnotatedProbability result; CAnnotatedProbabilityBuilderForTest builder(result, 1, function_t::E_IndividualCount, 42); @@ -336,8 +327,7 @@ void CAnnotatedProbabilityBuilderTest::testPersonFrequencyGivenIndividualCount() CPPUNIT_ASSERT(result.s_DescriptiveData.empty()); } -void CAnnotatedProbabilityBuilderTest::testPersonFrequencyGivenIndividualRare() -{ +void CAnnotatedProbabilityBuilderTest::testPersonFrequencyGivenIndividualRare() { { SAnnotatedProbability result; CAnnotatedProbabilityBuilderForTest builder(result, 1, function_t::E_IndividualRare, 42); @@ -360,8 +350,7 @@ void CAnnotatedProbabilityBuilderTest::testPersonFrequencyGivenIndividualRare() } } -void CAnnotatedProbabilityBuilderTest::testPersonFrequencyGivenPopulationRare() -{ +void CAnnotatedProbabilityBuilderTest::testPersonFrequencyGivenPopulationRare() { SAnnotatedProbability result; CAnnotatedProbabilityBuilderForTest builder(result, 3, function_t::E_PopulationRare, 42); @@ -370,36 +359,34 @@ void CAnnotatedProbabilityBuilderTest::testPersonFrequencyGivenPopulationRare() CPPUNIT_ASSERT(result.s_DescriptiveData.empty()); } -CppUnit::Test *CAnnotatedProbabilityBuilderTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CAnnotatedProbabilityBuilderTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CAnnotatedProbabilityBuilderTest::testProbability", - &CAnnotatedProbabilityBuilderTest::testProbability)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenIndividualCount", - &CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenIndividualCount)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenPopulationCount", - &CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenPopulationCount)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenIndividualRare", - &CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenIndividualRare)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenPopulationRare", - &CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenPopulationRare)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenPopulationFreqRare", - &CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenPopulationFreqRare)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CAnnotatedProbabilityBuilderTest::testPersonFrequencyGivenIndividualCount", - &CAnnotatedProbabilityBuilderTest::testPersonFrequencyGivenIndividualCount)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CAnnotatedProbabilityBuilderTest::testPersonFrequencyGivenIndividualRare", - &CAnnotatedProbabilityBuilderTest::testPersonFrequencyGivenIndividualRare)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CAnnotatedProbabilityBuilderTest::testPersonFrequencyGivenPopulationRare", - &CAnnotatedProbabilityBuilderTest::testPersonFrequencyGivenPopulationRare)); +CppUnit::Test* CAnnotatedProbabilityBuilderTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CAnnotatedProbabilityBuilderTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CAnnotatedProbabilityBuilderTest::testProbability", + &CAnnotatedProbabilityBuilderTest::testProbability)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenIndividualCount", + &CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenIndividualCount)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenPopulationCount", + &CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenPopulationCount)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenIndividualRare", + &CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenIndividualRare)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenPopulationRare", + &CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenPopulationRare)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenPopulationFreqRare", + &CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenPopulationFreqRare)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CAnnotatedProbabilityBuilderTest::testPersonFrequencyGivenIndividualCount", + &CAnnotatedProbabilityBuilderTest::testPersonFrequencyGivenIndividualCount)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CAnnotatedProbabilityBuilderTest::testPersonFrequencyGivenIndividualRare", + &CAnnotatedProbabilityBuilderTest::testPersonFrequencyGivenIndividualRare)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CAnnotatedProbabilityBuilderTest::testPersonFrequencyGivenPopulationRare", + &CAnnotatedProbabilityBuilderTest::testPersonFrequencyGivenPopulationRare)); return suiteOfTests; } diff --git a/lib/model/unittest/CAnnotatedProbabilityBuilderTest.h b/lib/model/unittest/CAnnotatedProbabilityBuilderTest.h index 2e5d283ee7..65479751e8 100644 --- a/lib/model/unittest/CAnnotatedProbabilityBuilderTest.h +++ b/lib/model/unittest/CAnnotatedProbabilityBuilderTest.h @@ -9,20 +9,19 @@ #include -class CAnnotatedProbabilityBuilderTest : public CppUnit::TestFixture -{ - public: - void testProbability(); - void testAddAttributeProbabilityGivenIndividualCount(); - void testAddAttributeProbabilityGivenPopulationCount(); - void testAddAttributeProbabilityGivenIndividualRare(); - void testAddAttributeProbabilityGivenPopulationRare(); - void testAddAttributeProbabilityGivenPopulationFreqRare(); - void testPersonFrequencyGivenIndividualCount(); - void testPersonFrequencyGivenIndividualRare(); - void testPersonFrequencyGivenPopulationRare(); +class CAnnotatedProbabilityBuilderTest : public CppUnit::TestFixture { +public: + void testProbability(); + void testAddAttributeProbabilityGivenIndividualCount(); + void testAddAttributeProbabilityGivenPopulationCount(); + void testAddAttributeProbabilityGivenIndividualRare(); + void testAddAttributeProbabilityGivenPopulationRare(); + void testAddAttributeProbabilityGivenPopulationFreqRare(); + void testPersonFrequencyGivenIndividualCount(); + void testPersonFrequencyGivenIndividualRare(); + void testPersonFrequencyGivenPopulationRare(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CAnnotatedProbabilityBuilderTest_h diff --git a/lib/model/unittest/CAnomalyDetectorModelConfigTest.cc b/lib/model/unittest/CAnomalyDetectorModelConfigTest.cc index a073f9e8fc..1c32030d3a 100644 --- a/lib/model/unittest/CAnomalyDetectorModelConfigTest.cc +++ b/lib/model/unittest/CAnomalyDetectorModelConfigTest.cc @@ -17,18 +17,16 @@ using namespace ml; using namespace model; -namespace -{ +namespace { using TDoubleVec = std::vector; -const function_t::EFunction INDIVIDUAL_COUNT = function_t::E_IndividualCount; +const function_t::EFunction INDIVIDUAL_COUNT = function_t::E_IndividualCount; const function_t::EFunction INDIVIDUAL_METRIC = function_t::E_IndividualMetricMin; -const function_t::EFunction POPULATION_COUNT = function_t::E_PopulationCount; +const function_t::EFunction POPULATION_COUNT = function_t::E_PopulationCount; const function_t::EFunction POPULATION_METRIC = function_t::E_PopulationMetric; } -void CAnomalyDetectorModelConfigTest::testNormal() -{ +void CAnomalyDetectorModelConfigTest::testNormal() { { CAnomalyDetectorModelConfig config = CAnomalyDetectorModelConfig::defaultConfig(1800); CPPUNIT_ASSERT(config.init("testfiles/mlmodel.conf")); @@ -63,16 +61,13 @@ void CAnomalyDetectorModelConfigTest::testNormal() CPPUNIT_ASSERT_EQUAL(std::size_t(20), config.factory(1, POPULATION_COUNT)->modelParams().s_SampleCountFactor); CPPUNIT_ASSERT_EQUAL(std::size_t(20), config.factory(1, POPULATION_METRIC)->modelParams().s_SampleCountFactor); TDoubleVec params; - for (std::size_t i = 0u; i < model_t::NUMBER_AGGREGATION_STYLES; ++i) - { - for (std::size_t j = 0u; j < model_t::NUMBER_AGGREGATION_PARAMS; ++j) - { - params.push_back(config.aggregationStyleParam(static_cast(i), - static_cast(j))); + for (std::size_t i = 0u; i < model_t::NUMBER_AGGREGATION_STYLES; ++i) { + for (std::size_t j = 0u; j < model_t::NUMBER_AGGREGATION_PARAMS; ++j) { + params.push_back( + config.aggregationStyleParam(static_cast(i), static_cast(j))); } } - CPPUNIT_ASSERT_EQUAL(std::string("[0.9, 0.1, 2, 4, 0.3, 0.7, 3, 8, 0.6, 0.4, 2, 10]"), - core::CContainerPrinter::print(params)); + CPPUNIT_ASSERT_EQUAL(std::string("[0.9, 0.1, 2, 4, 0.3, 0.7, 3, 8, 0.6, 0.4, 2, 10]"), core::CContainerPrinter::print(params)); CPPUNIT_ASSERT_EQUAL(0.01, config.maximumAnomalousProbability()); CPPUNIT_ASSERT_EQUAL(60.0, config.noisePercentile()); CPPUNIT_ASSERT_EQUAL(1.2, config.noiseMultiplier()); @@ -97,15 +92,15 @@ void CAnomalyDetectorModelConfigTest::testNormal() CPPUNIT_ASSERT(dynamic_cast(config.factory(1, function_t::E_IndividualMetricMin).get())); CPPUNIT_ASSERT(dynamic_cast(config.factory(1, function_t::E_IndividualMetricMax).get())); CPPUNIT_ASSERT(dynamic_cast(config.factory(1, function_t::E_IndividualMetric).get())); - CPPUNIT_ASSERT(dynamic_cast(config.factory(1, function_t::E_PopulationDistinctCount).get())); + CPPUNIT_ASSERT( + dynamic_cast(config.factory(1, function_t::E_PopulationDistinctCount).get())); CPPUNIT_ASSERT(dynamic_cast(config.factory(1, function_t::E_PopulationRare).get())); CPPUNIT_ASSERT(dynamic_cast(config.factory(CSearchKey::simpleCountKey()).get())); CPPUNIT_ASSERT_EQUAL(false, config.perPartitionNormalization()); } } -void CAnomalyDetectorModelConfigTest::testErrors() -{ +void CAnomalyDetectorModelConfigTest::testErrors() { { CAnomalyDetectorModelConfig config1 = CAnomalyDetectorModelConfig::defaultConfig(1800); CPPUNIT_ASSERT(!config1.init("testfiles/invalidmlmodel.conf")); @@ -152,12 +147,10 @@ void CAnomalyDetectorModelConfigTest::testErrors() config1.factory(1, POPULATION_COUNT)->minimumModeFraction()); CPPUNIT_ASSERT_EQUAL(config2.factory(1, POPULATION_METRIC)->minimumModeFraction(), config1.factory(1, POPULATION_METRIC)->minimumModeFraction()); - CPPUNIT_ASSERT_EQUAL(config2.factory(1, INDIVIDUAL_COUNT)->componentSize(), - config1.factory(1, INDIVIDUAL_COUNT)->componentSize()); + CPPUNIT_ASSERT_EQUAL(config2.factory(1, INDIVIDUAL_COUNT)->componentSize(), config1.factory(1, INDIVIDUAL_COUNT)->componentSize()); CPPUNIT_ASSERT_EQUAL(config2.factory(1, INDIVIDUAL_METRIC)->componentSize(), config1.factory(1, INDIVIDUAL_METRIC)->componentSize()); - CPPUNIT_ASSERT_EQUAL(config2.factory(1, POPULATION_COUNT)->componentSize(), - config1.factory(1, POPULATION_COUNT)->componentSize()); + CPPUNIT_ASSERT_EQUAL(config2.factory(1, POPULATION_COUNT)->componentSize(), config1.factory(1, POPULATION_COUNT)->componentSize()); CPPUNIT_ASSERT_EQUAL(config2.factory(1, POPULATION_METRIC)->componentSize(), config1.factory(1, POPULATION_METRIC)->componentSize()); CPPUNIT_ASSERT_EQUAL(config2.factory(1, INDIVIDUAL_COUNT)->modelParams().s_SampleCountFactor, @@ -168,14 +161,11 @@ void CAnomalyDetectorModelConfigTest::testErrors() config1.factory(1, POPULATION_COUNT)->modelParams().s_SampleCountFactor); CPPUNIT_ASSERT_EQUAL(config2.factory(1, POPULATION_METRIC)->modelParams().s_SampleCountFactor, config1.factory(1, POPULATION_METRIC)->modelParams().s_SampleCountFactor); - for (std::size_t i = 0u; i < model_t::NUMBER_AGGREGATION_STYLES; ++i) - { - for (std::size_t j = 0u; j < model_t::NUMBER_AGGREGATION_PARAMS; ++j) - { - CPPUNIT_ASSERT_EQUAL(config2.aggregationStyleParam(static_cast(i), - static_cast(j)), - config1.aggregationStyleParam(static_cast(i), - static_cast(j))); + for (std::size_t i = 0u; i < model_t::NUMBER_AGGREGATION_STYLES; ++i) { + for (std::size_t j = 0u; j < model_t::NUMBER_AGGREGATION_PARAMS; ++j) { + CPPUNIT_ASSERT_EQUAL( + config2.aggregationStyleParam(static_cast(i), static_cast(j)), + config1.aggregationStyleParam(static_cast(i), static_cast(j))); } } CPPUNIT_ASSERT_EQUAL(config2.maximumAnomalousProbability(), config1.maximumAnomalousProbability()); @@ -202,16 +192,13 @@ void CAnomalyDetectorModelConfigTest::testErrors() } } -CppUnit::Test *CAnomalyDetectorModelConfigTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CAnomalyDetectorModelConfigTest"); +CppUnit::Test* CAnomalyDetectorModelConfigTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CAnomalyDetectorModelConfigTest"); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CAnomalyDetectorModelConfigTest::testNormal", - &CAnomalyDetectorModelConfigTest::testNormal) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CAnomalyDetectorModelConfigTest::testErrors", - &CAnomalyDetectorModelConfigTest::testErrors) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CAnomalyDetectorModelConfigTest::testNormal", + &CAnomalyDetectorModelConfigTest::testNormal)); + suiteOfTests->addTest(new CppUnit::TestCaller("CAnomalyDetectorModelConfigTest::testErrors", + &CAnomalyDetectorModelConfigTest::testErrors)); return suiteOfTests; } diff --git a/lib/model/unittest/CAnomalyDetectorModelConfigTest.h b/lib/model/unittest/CAnomalyDetectorModelConfigTest.h index 6695893cbd..cab2511132 100644 --- a/lib/model/unittest/CAnomalyDetectorModelConfigTest.h +++ b/lib/model/unittest/CAnomalyDetectorModelConfigTest.h @@ -9,14 +9,12 @@ #include +class CAnomalyDetectorModelConfigTest : public CppUnit::TestFixture { +public: + void testNormal(); + void testErrors(); -class CAnomalyDetectorModelConfigTest : public CppUnit::TestFixture -{ - public: - void testNormal(); - void testErrors(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CAnomalyDetectorModelConfigTest_h diff --git a/lib/model/unittest/CAnomalyScoreTest.cc b/lib/model/unittest/CAnomalyScoreTest.cc index 53c7ddc4ff..75303b919d 100644 --- a/lib/model/unittest/CAnomalyScoreTest.cc +++ b/lib/model/unittest/CAnomalyScoreTest.cc @@ -6,10 +6,10 @@ #include "CAnomalyScoreTest.h" -#include #include #include #include +#include #include #include @@ -25,8 +25,8 @@ #include #include -#include #include +#include #include #include @@ -34,14 +34,12 @@ using namespace ml; -namespace -{ +namespace { using TDoubleVec = std::vector; using TSizeVec = std::vector; } -void CAnomalyScoreTest::testComputeScores() -{ +void CAnomalyScoreTest::testComputeScores() { using TScores = model::CAnomalyScore; using TJointProbabilityCalculator = maths::CJointProbabilityOfLessLikelySamples; using TLogExtremeProbabilityCalculator = maths::CLogProbabilityOfMFromNExtremeSamples; @@ -97,10 +95,7 @@ void CAnomalyScoreTest::testComputeScores() // Test 2: low anomalousness. // Expect scores of zero. { - double p[] = - { - 0.21, 0.52, 0.13, 0.67, 0.89, 0.32, 0.46, 0.222, 0.35, 0.93 - }; + double p[] = {0.21, 0.52, 0.13, 0.67, 0.89, 0.32, 0.46, 0.222, 0.35, 0.93}; TDoubleVec probabilities(boost::begin(p), boost::end(p)); TScores::compute(jointProbabilityWeight, extremeProbabilityWeight, @@ -118,10 +113,7 @@ void CAnomalyScoreTest::testComputeScores() // Expect a high anomaly score which is generated by the // joint probability of less likely samples. { - double p[] = - { - 0.11, 0.13, 0.12, 0.22, 0.14, 0.09, 0.01, 0.13, 0.15, 0.14, 0.11, 0.13, 0.12, 0.22, 0.09, 0.01 - }; + double p[] = {0.11, 0.13, 0.12, 0.22, 0.14, 0.09, 0.01, 0.13, 0.15, 0.14, 0.11, 0.13, 0.12, 0.22, 0.09, 0.01}; TDoubleVec probabilities(boost::begin(p), boost::end(p)); TScores::compute(jointProbabilityWeight, @@ -135,8 +127,7 @@ void CAnomalyScoreTest::testComputeScores() TJointProbabilityCalculator jointProbabilityCalculator; TLogExtremeProbabilityCalculator extremeProbabilityCalculator(2); - for (size_t i = 0; i < boost::size(p); ++i) - { + for (size_t i = 0; i < boost::size(p); ++i) { jointProbabilityCalculator.add(p[i]); extremeProbabilityCalculator.add(p[i]); } @@ -148,9 +139,8 @@ void CAnomalyScoreTest::testComputeScores() extremeProbability = std::exp(extremeProbability); LOG_DEBUG("3) probabilities = " << core::CContainerPrinter::print(p)); - LOG_DEBUG(" joint probability = " << jointProbability - << ", extreme probability = " << extremeProbability - << ", overallScore = " << overallScore); + LOG_DEBUG(" joint probability = " << jointProbability << ", extreme probability = " << extremeProbability + << ", overallScore = " << overallScore); CPPUNIT_ASSERT_DOUBLES_EQUAL(0.318231, overallScore, 5e-7); } @@ -159,10 +149,7 @@ void CAnomalyScoreTest::testComputeScores() // Expect a high anomaly score which is generated by the // extreme samples probability. { - double p[] = - { - 0.21, 0.52, 0.13, 0.67, 0.89, 0.32, 0.46, 0.222, 0.35, 0.93, 0.89, 0.32, 0.46, 0.000021 - }; + double p[] = {0.21, 0.52, 0.13, 0.67, 0.89, 0.32, 0.46, 0.222, 0.35, 0.93, 0.89, 0.32, 0.46, 0.000021}; TDoubleVec probabilities(boost::begin(p), boost::end(p)); TScores::compute(jointProbabilityWeight, @@ -176,8 +163,7 @@ void CAnomalyScoreTest::testComputeScores() TJointProbabilityCalculator jointProbabilityCalculator; TLogExtremeProbabilityCalculator extremeProbabilityCalculator(1); - for (size_t i = 0; i < boost::size(p); ++i) - { + for (size_t i = 0; i < boost::size(p); ++i) { jointProbabilityCalculator.add(p[i]); extremeProbabilityCalculator.add(p[i]); } @@ -190,9 +176,8 @@ void CAnomalyScoreTest::testComputeScores() extremeProbability = std::exp(extremeProbability); LOG_DEBUG("4) probabilities = " << core::CContainerPrinter::print(probabilities)); - LOG_DEBUG(" joint probability = " << jointProbability - << ", extreme probability = " << extremeProbability - << ", overallScore = " << overallScore); + LOG_DEBUG(" joint probability = " << jointProbability << ", extreme probability = " << extremeProbability + << ", overallScore = " << overallScore); CPPUNIT_ASSERT_DOUBLES_EQUAL(0.137591, overallScore, 5e-7); } @@ -201,10 +186,7 @@ void CAnomalyScoreTest::testComputeScores() // Expect a high anomaly score which is generated by the // extreme samples probability. { - double p[] = - { - 0.21, 0.52, 0.0058, 0.13, 0.67, 0.89, 0.32, 0.03, 0.46, 0.222, 0.35, 0.93, 0.01, 0.89, 0.32, 0.46, 0.0021 - }; + double p[] = {0.21, 0.52, 0.0058, 0.13, 0.67, 0.89, 0.32, 0.03, 0.46, 0.222, 0.35, 0.93, 0.01, 0.89, 0.32, 0.46, 0.0021}; TDoubleVec probabilities(boost::begin(p), boost::end(p)); TScores::compute(jointProbabilityWeight, @@ -218,8 +200,7 @@ void CAnomalyScoreTest::testComputeScores() TJointProbabilityCalculator jointProbabilityCalculator; TLogExtremeProbabilityCalculator extremeProbabilityCalculator(4); - for (size_t i = 0; i < boost::size(probabilities); ++i) - { + for (size_t i = 0; i < boost::size(probabilities); ++i) { jointProbabilityCalculator.add(p[i]); extremeProbabilityCalculator.add(p[i]); } @@ -232,9 +213,8 @@ void CAnomalyScoreTest::testComputeScores() extremeProbability = std::exp(extremeProbability); LOG_DEBUG("5) probabilities = " << core::CContainerPrinter::print(probabilities)); - LOG_DEBUG(" joint probability = " << jointProbability - << ", extreme probability = " << extremeProbability - << ", overallScore = " << overallScore); + LOG_DEBUG(" joint probability = " << jointProbability << ", extreme probability = " << extremeProbability + << ", overallScore = " << overallScore); CPPUNIT_ASSERT_DOUBLES_EQUAL(0.029413, overallScore, 5e-7); } @@ -242,10 +222,7 @@ void CAnomalyScoreTest::testComputeScores() { // Test underflow. - double p[] = - { - 1e-100, 1.7e-20, 1.6e-150, 2.2e-150, 1.3e-180, 1.35e-95, 1.7e-180, 1.21e-300 - }; + double p[] = {1e-100, 1.7e-20, 1.6e-150, 2.2e-150, 1.3e-180, 1.35e-95, 1.7e-180, 1.21e-300}; TDoubleVec probabilities(boost::begin(p), boost::end(p)); TScores::compute(jointProbabilityWeight, @@ -263,8 +240,7 @@ void CAnomalyScoreTest::testComputeScores() } } -void CAnomalyScoreTest::testNormalizeScoresQuantiles() -{ +void CAnomalyScoreTest::testNormalizeScoresQuantiles() { using TDoubleMSet = std::multiset; using TDoubleMSetItr = TDoubleMSet::iterator; @@ -272,10 +248,8 @@ void CAnomalyScoreTest::testNormalizeScoresQuantiles() TDoubleVec samples; rng.generateGammaSamples(1.0, 2.0, 20000, samples); - for (std::size_t i = 0u; i < samples.size(); ++i) - { - if (samples[i] < 0.5) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { + if (samples[i] < 0.5) { samples[i] = 0.0; } } @@ -287,18 +261,15 @@ void CAnomalyScoreTest::testNormalizeScoresQuantiles() double numberSamples = 0.0; TDoubleMSet scores; - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { scores.insert(samples[i]); TDoubleVec sample(1u, samples[i]); normalizer.updateQuantiles(sample); TDoubleMSetItr itr = scores.upper_bound(samples[i]); - double trueQuantile = static_cast(std::distance(scores.begin(), itr)) - / static_cast(scores.size()); + double trueQuantile = static_cast(std::distance(scores.begin(), itr)) / static_cast(scores.size()); - if (trueQuantile > 0.9) - { + if (trueQuantile > 0.9) { double lowerBound; double upperBound; normalizer.quantile(samples[i], 0.0, lowerBound, upperBound); @@ -309,9 +280,7 @@ void CAnomalyScoreTest::testNormalizeScoresQuantiles() totalError += error; numberSamples += 1.0; - LOG_DEBUG("trueQuantile = " << trueQuantile - << ", lowerBound = " << lowerBound - << ", upperBound = " << upperBound); + LOG_DEBUG("trueQuantile = " << trueQuantile << ", lowerBound = " << lowerBound << ", upperBound = " << upperBound); CPPUNIT_ASSERT(error < 0.02); } } @@ -320,8 +289,7 @@ void CAnomalyScoreTest::testNormalizeScoresQuantiles() CPPUNIT_ASSERT(totalError / numberSamples < 0.0043); } -void CAnomalyScoreTest::testNormalizeScoresNoisy() -{ +void CAnomalyScoreTest::testNormalizeScoresNoisy() { using TDoubleSizeMap = std::multimap; using TDoubleSizeMapCItr = TDoubleSizeMap::const_iterator; @@ -331,27 +299,18 @@ void CAnomalyScoreTest::testNormalizeScoresNoisy() TDoubleVec samples; rng.generateGammaSamples(1.0, 2.0, 2000, samples); - for (std::size_t i = 0u; i < samples.size(); ++i) - { - if (samples[i] < 0.5) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { + if (samples[i] < 0.5) { samples[i] = 0.0; } } - std::size_t largeAnomalyTimes[] = - { - 50, 110, 190, 220, 290, 310, 600, 620, 790, 900, 1100, 1400, 1600, 1900 - }; + std::size_t largeAnomalyTimes[] = {50, 110, 190, 220, 290, 310, 600, 620, 790, 900, 1100, 1400, 1600, 1900}; - double largeAnomalies[] = - { - 50.0, 350.0, 30.0, 100.0, 30.0, 45.0, 100.0, 120.0, 60.0, 130.0, 100.0, 90.0, 45.0, 30.0 - }; + double largeAnomalies[] = {50.0, 350.0, 30.0, 100.0, 30.0, 45.0, 100.0, 120.0, 60.0, 130.0, 100.0, 90.0, 45.0, 30.0}; // Add in the big anomalies. - for (size_t i = 0; i < boost::size(largeAnomalyTimes); ++i) - { + for (size_t i = 0; i < boost::size(largeAnomalyTimes); ++i) { samples[largeAnomalyTimes[i]] += largeAnomalies[i]; } @@ -365,12 +324,10 @@ void CAnomalyScoreTest::testNormalizeScoresNoisy() TDoubleSizeMap maxScores; - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { normalizer.updateQuantiles(samples[i]); } - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { double sample = samples[i]; normalizer.normalize(sample); LOG_DEBUG(i << ") raw = " << samples[i] << ", normalized = " << sample); @@ -378,12 +335,9 @@ void CAnomalyScoreTest::testNormalizeScoresNoisy() //raw << " " << samples[i]; //normalized << " " << sample[0]; - if (maxScores.size() < boost::size(largeAnomalyTimes)) - { + if (maxScores.size() < boost::size(largeAnomalyTimes)) { maxScores.insert(TDoubleSizeMap::value_type(sample, i)); - } - else if (sample > maxScores.begin()->first) - { + } else if (sample > maxScores.begin()->first) { LOG_DEBUG("normalized = " << sample << " removing " << maxScores.begin()->first); maxScores.erase(maxScores.begin()); maxScores.insert(TDoubleSizeMap::value_type(sample, i)); @@ -400,22 +354,17 @@ void CAnomalyScoreTest::testNormalizeScoresNoisy() TSizeVec times; - for (TDoubleSizeMapCItr itr = maxScores.begin(); - itr != maxScores.end(); - ++itr) - { + for (TDoubleSizeMapCItr itr = maxScores.begin(); itr != maxScores.end(); ++itr) { times.push_back(itr->second); } std::sort(times.begin(), times.end()); LOG_DEBUG("times = " << core::CContainerPrinter::print(times)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(largeAnomalyTimes), - core::CContainerPrinter::print(times)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(largeAnomalyTimes), core::CContainerPrinter::print(times)); } -void CAnomalyScoreTest::testNormalizeScoresLargeScore() -{ +void CAnomalyScoreTest::testNormalizeScoresLargeScore() { // Test a large score isn't too dominant. test::CRandomNumbers rng; @@ -423,26 +372,23 @@ void CAnomalyScoreTest::testNormalizeScoresLargeScore() TDoubleVec samples; rng.generateUniformSamples(0.0, 0.1, 500, samples); - std::size_t anomalyTimes[] = { 50, 110, 190, 220, 290 }; - double anomalies[] = { 2.0, 4.0, 4.0, 5.0, 20.0 }; + std::size_t anomalyTimes[] = {50, 110, 190, 220, 290}; + double anomalies[] = {2.0, 4.0, 4.0, 5.0, 20.0}; // Add in the anomalies. - for (size_t i = 0; i < boost::size(anomalyTimes); ++i) - { + for (size_t i = 0; i < boost::size(anomalyTimes); ++i) { samples[anomalyTimes[i]] += anomalies[i]; } model::CAnomalyDetectorModelConfig config = model::CAnomalyDetectorModelConfig::defaultConfig(1800); model::CAnomalyScore::CNormalizer normalizer(config); - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { normalizer.updateQuantiles(samples[i]); } TDoubleVec scores; - for (std::size_t i = 0u; i < boost::size(anomalyTimes); ++i) - { + for (std::size_t i = 0u; i < boost::size(anomalyTimes); ++i) { double sample = samples[anomalyTimes[i]]; normalizer.normalize(sample); scores.push_back(sample); @@ -450,65 +396,53 @@ void CAnomalyScoreTest::testNormalizeScoresLargeScore() std::sort(scores.begin(), scores.end()); LOG_DEBUG("scores = " << core::CContainerPrinter::print(scores)); - for (std::size_t i = 0u; i+1 < boost::size(anomalies); ++i) - { - double uplift = scores[i] - 100.0 * anomalies[i]/anomalies[boost::size(anomalies) - 1]; + for (std::size_t i = 0u; i + 1 < boost::size(anomalies); ++i) { + double uplift = scores[i] - 100.0 * anomalies[i] / anomalies[boost::size(anomalies) - 1]; LOG_DEBUG("uplift = " << uplift); CPPUNIT_ASSERT(uplift > 5.0); CPPUNIT_ASSERT(uplift < 13.0); } } -void CAnomalyScoreTest::testNormalizeScoresNearZero() -{ +void CAnomalyScoreTest::testNormalizeScoresNearZero() { // Test the behaviour for scores near zero. - std::size_t anomalyTimes[] = { 50, 110, 190, 220, 290 }; - double anomalies[] = { 0.02, 0.01, 0.006, 0.01, 0.015 }; + std::size_t anomalyTimes[] = {50, 110, 190, 220, 290}; + double anomalies[] = {0.02, 0.01, 0.006, 0.01, 0.015}; - std::size_t nonZeroCounts[] = { 0, 100, 200, 249, 251, 300, 400, 450 }; + std::size_t nonZeroCounts[] = {0, 100, 200, 249, 251, 300, 400, 450}; - std::string expectedScores[] = - { - std::string("[41.62776, 32.36435, 26.16873, 32.36435, 37.68726]"), - std::string("[41.62776, 32.36435, 26.16873, 32.36435, 37.68726]"), - std::string("[41.62776, 32.36435, 17.74216, 32.36435, 37.68726]"), - std::string("[41.62776, 32.36435, 11.1645, 32.36435, 37.68726]"), - std::string("[41.62776, 32.36435, 11.05937, 32.36435, 37.68726]"), - std::string("[41.62776, 32.36435, 8.523397, 32.36435, 37.68726]"), - std::string("[1.14, 1.04, 1, 1.04, 1.09]"), - std::string("[1.14, 1.04, 1, 1.04, 1.09]") - }; + std::string expectedScores[] = {std::string("[41.62776, 32.36435, 26.16873, 32.36435, 37.68726]"), + std::string("[41.62776, 32.36435, 26.16873, 32.36435, 37.68726]"), + std::string("[41.62776, 32.36435, 17.74216, 32.36435, 37.68726]"), + std::string("[41.62776, 32.36435, 11.1645, 32.36435, 37.68726]"), + std::string("[41.62776, 32.36435, 11.05937, 32.36435, 37.68726]"), + std::string("[41.62776, 32.36435, 8.523397, 32.36435, 37.68726]"), + std::string("[1.14, 1.04, 1, 1.04, 1.09]"), + std::string("[1.14, 1.04, 1, 1.04, 1.09]")}; - for (std::size_t i = 0u; i < boost::size(nonZeroCounts); ++i) - { + for (std::size_t i = 0u; i < boost::size(nonZeroCounts); ++i) { LOG_DEBUG("non-zero count = " << nonZeroCounts[i]); TDoubleVec samples(500u, 0.0); - for (std::size_t j = 0u; j < nonZeroCounts[i]; ++j) - { - if (std::find(boost::begin(anomalyTimes), - boost::end(anomalyTimes), j) == boost::end(anomalyTimes)) - { + for (std::size_t j = 0u; j < nonZeroCounts[i]; ++j) { + if (std::find(boost::begin(anomalyTimes), boost::end(anomalyTimes), j) == boost::end(anomalyTimes)) { samples[j] += 0.0055; } } - for (std::size_t j = 0u; j < boost::size(anomalyTimes); ++j) - { + for (std::size_t j = 0u; j < boost::size(anomalyTimes); ++j) { samples[anomalyTimes[j]] += anomalies[j]; } model::CAnomalyDetectorModelConfig config = model::CAnomalyDetectorModelConfig::defaultConfig(1800); model::CAnomalyScore::CNormalizer normalizer(config); - for (std::size_t j = 0u; j < samples.size(); ++j) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { normalizer.updateQuantiles(samples[j]); } TDoubleVec maxScores; - for (std::size_t j = 0u; j < boost::size(anomalyTimes); ++j) - { + for (std::size_t j = 0u; j < boost::size(anomalyTimes); ++j) { double sample = samples[anomalyTimes[j]]; normalizer.normalize(sample); maxScores.push_back(sample); @@ -519,8 +453,7 @@ void CAnomalyScoreTest::testNormalizeScoresNearZero() } } -void CAnomalyScoreTest::testNormalizeScoresOrdering() -{ +void CAnomalyScoreTest::testNormalizeScoresOrdering() { // Test that the normalized scores ordering matches the // -log(probability) ordering. @@ -531,30 +464,25 @@ void CAnomalyScoreTest::testNormalizeScoresOrdering() TDoubleVec allScores; rng.generateUniformSamples(0.0, 50.0, n, allScores); - for (std::size_t i = 200u; i <= n; i += 200) - { + for (std::size_t i = 200u; i <= n; i += 200) { LOG_DEBUG("*** " << i << " ***"); TDoubleVec scores(&allScores[0], &allScores[i]); model::CAnomalyDetectorModelConfig config = model::CAnomalyDetectorModelConfig::defaultConfig(300); model::CAnomalyScore::CNormalizer normalizer(config); - for (std::size_t j = 0u; j < i; ++j) - { + for (std::size_t j = 0u; j < i; ++j) { normalizer.updateQuantiles(scores[j]); } TDoubleVec normalizedScores(scores); - for (std::size_t j = 0u; j < i; ++j) - { + for (std::size_t j = 0u; j < i; ++j) { CPPUNIT_ASSERT(normalizer.normalize(normalizedScores[j])); } maths::COrderings::simultaneousSort(scores, normalizedScores); - for (std::size_t j = 1u; j < normalizedScores.size(); ++j) - { - if (normalizedScores[j] - normalizedScores[j - 1] < -0.01) - { + for (std::size_t j = 1u; j < normalizedScores.size(); ++j) { + if (normalizedScores[j] - normalizedScores[j - 1] < -0.01) { LOG_DEBUG(normalizedScores[j] << " " << normalizedScores[j - 1]); } CPPUNIT_ASSERT(normalizedScores[j] - normalizedScores[j - 1] > -0.01); @@ -562,8 +490,7 @@ void CAnomalyScoreTest::testNormalizeScoresOrdering() } } -void CAnomalyScoreTest::testJsonConversion() -{ +void CAnomalyScoreTest::testJsonConversion() { test::CRandomNumbers rng; model::CAnomalyScore::TDoubleVec samples; @@ -584,7 +511,7 @@ void CAnomalyScoreTest::testJsonConversion() core::CJsonStatePersistInserter inserter(ss); origNormalizer.acceptPersistInserter(inserter); } - std::string origJson = ss.str(); + std::string origJson = ss.str(); // The traverser expects the state json in a embedded document std::string wrappedJson = "{\"topLevel\" : " + origJson + "}"; @@ -594,9 +521,7 @@ void CAnomalyScoreTest::testJsonConversion() model::CAnomalyScore::CNormalizer restoredNormalizer(config); { core::CJsonStateRestoreTraverser traverser(iss); - traverser.traverseSubLevel(boost::bind(&model::CAnomalyScore::CNormalizer::acceptRestoreTraverser, - &restoredNormalizer, - _1)); + traverser.traverseSubLevel(boost::bind(&model::CAnomalyScore::CNormalizer::acceptRestoreTraverser, &restoredNormalizer, _1)); } // The new JSON representation of the new filter should be the same as the original @@ -612,13 +537,7 @@ void CAnomalyScoreTest::testJsonConversion() // representation and extra fields that are used for indexing // in a database std::string toJson; - model::CAnomalyScore::normalizerToJson(origNormalizer, - "dummy", - "sysChange", - "my normalizer", - 1234567890, - toJson); - + model::CAnomalyScore::normalizerToJson(origNormalizer, "dummy", "sysChange", "my normalizer", 1234567890, toJson); rapidjson::Document doc; doc.Parse(toJson.c_str()); @@ -630,7 +549,7 @@ void CAnomalyScoreTest::testJsonConversion() CPPUNIT_ASSERT(doc.HasMember(model::CAnomalyScore::TIME_ATTRIBUTE.c_str())); CPPUNIT_ASSERT(doc.HasMember("a")); - rapidjson::Value &stateDoc = doc["a"]; + rapidjson::Value& stateDoc = doc["a"]; rapidjson::StringBuffer buffer; rapidjson::Writer writer(buffer); @@ -644,25 +563,18 @@ void CAnomalyScoreTest::testJsonConversion() CPPUNIT_ASSERT_EQUAL(origJson, state); - // restore from the JSON state with extra fields used for // indexing in the database model::CAnomalyScore::CNormalizer fromJsonNormalizer(config); CPPUNIT_ASSERT(model::CAnomalyScore::normalizerFromJson(toJson, fromJsonNormalizer)); std::string restoredJson; - model::CAnomalyScore::normalizerToJson(fromJsonNormalizer, - "dummy", - "sysChange", - "my normalizer", - 1234567890, - restoredJson); + model::CAnomalyScore::normalizerToJson(fromJsonNormalizer, "dummy", "sysChange", "my normalizer", 1234567890, restoredJson); CPPUNIT_ASSERT_EQUAL(toJson, restoredJson); } -void CAnomalyScoreTest::testPersistEmpty() -{ +void CAnomalyScoreTest::testPersistEmpty() { // This tests what happens when we persist and restore quantiles that have // never had any data added - see bug 761 in Bugzilla @@ -673,59 +585,39 @@ void CAnomalyScoreTest::testPersistEmpty() CPPUNIT_ASSERT(!origNormalizer.canNormalize()); std::string origJson; - model::CAnomalyScore::normalizerToJson(origNormalizer, - "test", - "test", - "test", - 1234567890, - origJson); + model::CAnomalyScore::normalizerToJson(origNormalizer, "test", "test", "test", 1234567890, origJson); model::CAnomalyScore::CNormalizer newNormalizer(config); - CPPUNIT_ASSERT(model::CAnomalyScore::normalizerFromJson(origJson, - newNormalizer)); + CPPUNIT_ASSERT(model::CAnomalyScore::normalizerFromJson(origJson, newNormalizer)); CPPUNIT_ASSERT(!newNormalizer.canNormalize()); std::string newJson; - model::CAnomalyScore::normalizerToJson(newNormalizer, - "test", - "test", - "test", - 1234567890, - newJson); + model::CAnomalyScore::normalizerToJson(newNormalizer, "test", "test", "test", 1234567890, newJson); CPPUNIT_ASSERT_EQUAL(origJson, newJson); } -CppUnit::Test *CAnomalyScoreTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CAnomalyScoreTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CAnomalyScoreTest::testComputeScores", - &CAnomalyScoreTest::testComputeScores) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CAnomalyScoreTest::testNormalizeScoresQuantiles", - &CAnomalyScoreTest::testNormalizeScoresQuantiles) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CAnomalyScoreTest::testNormalizeScoresNoisy", - &CAnomalyScoreTest::testNormalizeScoresNoisy) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CAnomalyScoreTest::testNormalizeScoresLargeScore", - &CAnomalyScoreTest::testNormalizeScoresLargeScore) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CAnomalyScoreTest::testNormalizeScoresNearZero", - &CAnomalyScoreTest::testNormalizeScoresNearZero) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CAnomalyScoreTest::testNormalizeScoresOrdering", - &CAnomalyScoreTest::testNormalizeScoresOrdering) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CAnomalyScoreTest::testJsonConversion", - &CAnomalyScoreTest::testJsonConversion) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CAnomalyScoreTest::testPersistEmpty", - &CAnomalyScoreTest::testPersistEmpty) ); +CppUnit::Test* CAnomalyScoreTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CAnomalyScoreTest"); + + suiteOfTests->addTest( + new CppUnit::TestCaller("CAnomalyScoreTest::testComputeScores", &CAnomalyScoreTest::testComputeScores)); + suiteOfTests->addTest(new CppUnit::TestCaller("CAnomalyScoreTest::testNormalizeScoresQuantiles", + &CAnomalyScoreTest::testNormalizeScoresQuantiles)); + suiteOfTests->addTest(new CppUnit::TestCaller("CAnomalyScoreTest::testNormalizeScoresNoisy", + &CAnomalyScoreTest::testNormalizeScoresNoisy)); + suiteOfTests->addTest(new CppUnit::TestCaller("CAnomalyScoreTest::testNormalizeScoresLargeScore", + &CAnomalyScoreTest::testNormalizeScoresLargeScore)); + suiteOfTests->addTest(new CppUnit::TestCaller("CAnomalyScoreTest::testNormalizeScoresNearZero", + &CAnomalyScoreTest::testNormalizeScoresNearZero)); + suiteOfTests->addTest(new CppUnit::TestCaller("CAnomalyScoreTest::testNormalizeScoresOrdering", + &CAnomalyScoreTest::testNormalizeScoresOrdering)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CAnomalyScoreTest::testJsonConversion", &CAnomalyScoreTest::testJsonConversion)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CAnomalyScoreTest::testPersistEmpty", &CAnomalyScoreTest::testPersistEmpty)); return suiteOfTests; } diff --git a/lib/model/unittest/CAnomalyScoreTest.h b/lib/model/unittest/CAnomalyScoreTest.h index f22244a9ed..7a53df6d76 100644 --- a/lib/model/unittest/CAnomalyScoreTest.h +++ b/lib/model/unittest/CAnomalyScoreTest.h @@ -11,24 +11,21 @@ #include #include - -class CAnomalyScoreTest : public CppUnit::TestFixture -{ - public: - typedef std::vector TDoubleVec; - - public: - void testComputeScores(); - void testNormalizeScoresQuantiles(); - void testNormalizeScoresNoisy(); - void testNormalizeScoresLargeScore(); - void testNormalizeScoresNearZero(); - void testNormalizeScoresOrdering(); - void testJsonConversion(); - void testPersistEmpty(); - - static CppUnit::Test *suite(); +class CAnomalyScoreTest : public CppUnit::TestFixture { +public: + typedef std::vector TDoubleVec; + +public: + void testComputeScores(); + void testNormalizeScoresQuantiles(); + void testNormalizeScoresNoisy(); + void testNormalizeScoresLargeScore(); + void testNormalizeScoresNearZero(); + void testNormalizeScoresOrdering(); + void testJsonConversion(); + void testPersistEmpty(); + + static CppUnit::Test* suite(); }; #endif // INCLUDED_CAnomalyScoreCalculatorTest_h - diff --git a/lib/model/unittest/CBucketQueueTest.cc b/lib/model/unittest/CBucketQueueTest.cc index 76c57371c4..cb426cca06 100644 --- a/lib/model/unittest/CBucketQueueTest.cc +++ b/lib/model/unittest/CBucketQueueTest.cc @@ -6,10 +6,10 @@ #include "CBucketQueueTest.h" -#include -#include #include #include +#include +#include #include @@ -26,13 +26,12 @@ using TSizeSizePrUInt64UMap = boost::unordered_map; using TSizeSizePrUInt64UMapQueue = model::CBucketQueue; using TSizeSizePrUInt64UMapQueueCItr = TSizeSizePrUInt64UMapQueue::const_iterator; -void CBucketQueueTest::testConstructorFillsQueue() -{ +void CBucketQueueTest::testConstructorFillsQueue() { CBucketQueue queue(3, 5, 15); CPPUNIT_ASSERT_EQUAL(std::size_t(4), queue.size()); - std::set values; + std::set values; values.insert(&queue.get(0)); values.insert(&queue.get(5)); values.insert(&queue.get(10)); @@ -40,8 +39,7 @@ void CBucketQueueTest::testConstructorFillsQueue() CPPUNIT_ASSERT_EQUAL(std::size_t(4), values.size()); } -void CBucketQueueTest::testPushGivenEarlierTime() -{ +void CBucketQueueTest::testPushGivenEarlierTime() { CBucketQueue queue(1, 5, 0); queue.push("a", 5); queue.push("b", 10); @@ -54,8 +52,7 @@ void CBucketQueueTest::testPushGivenEarlierTime() CPPUNIT_ASSERT_EQUAL(std::string("b"), queue.get(12)); } -void CBucketQueueTest::testGetGivenFullQueueWithNoPop() -{ +void CBucketQueueTest::testGetGivenFullQueueWithNoPop() { CBucketQueue queue(1, 5, 0); queue.push("a", 5); queue.push("b", 10); @@ -65,8 +62,7 @@ void CBucketQueueTest::testGetGivenFullQueueWithNoPop() CPPUNIT_ASSERT_EQUAL(std::string("b"), queue.get(10)); } -void CBucketQueueTest::testGetGivenFullQueueAfterPop() -{ +void CBucketQueueTest::testGetGivenFullQueueAfterPop() { CBucketQueue queue(1, 5, 0); queue.push("a", 5); queue.push("b", 10); @@ -77,8 +73,7 @@ void CBucketQueueTest::testGetGivenFullQueueAfterPop() CPPUNIT_ASSERT_EQUAL(std::string("c"), queue.get(19)); } -void CBucketQueueTest::testClear() -{ +void CBucketQueueTest::testClear() { CBucketQueue queue(2, 5, 0); CPPUNIT_ASSERT_EQUAL(std::size_t(3), queue.size()); queue.push(0, 5); @@ -94,8 +89,7 @@ void CBucketQueueTest::testClear() CPPUNIT_ASSERT_EQUAL(std::size_t(3), queue.size()); } -void CBucketQueueTest::testIterators() -{ +void CBucketQueueTest::testIterators() { using TStringQueueItr = CBucketQueue::iterator; CBucketQueue queue(1, 5, 0); @@ -103,8 +97,7 @@ void CBucketQueueTest::testIterators() queue.push("b", 10); std::vector strings; - for (TStringQueueItr itr = queue.begin(); itr != queue.end(); ++itr) - { + for (TStringQueueItr itr = queue.begin(); itr != queue.end(); ++itr) { strings.push_back(*itr); } @@ -113,8 +106,7 @@ void CBucketQueueTest::testIterators() CPPUNIT_ASSERT_EQUAL(std::string("a"), strings[1]); } -void CBucketQueueTest::testReverseIterators() -{ +void CBucketQueueTest::testReverseIterators() { using TStringQueueCRItr = CBucketQueue::const_reverse_iterator; CBucketQueue queue(1, 5, 0); @@ -122,8 +114,7 @@ void CBucketQueueTest::testReverseIterators() queue.push("b", 10); std::vector strings; - for (TStringQueueCRItr itr = queue.rbegin(); itr != queue.rend(); ++itr) - { + for (TStringQueueCRItr itr = queue.rbegin(); itr != queue.rend(); ++itr) { strings.push_back(*itr); } @@ -132,9 +123,7 @@ void CBucketQueueTest::testReverseIterators() CPPUNIT_ASSERT_EQUAL(std::string("b"), strings[1]); } - -void CBucketQueueTest::testBucketQueueUMap() -{ +void CBucketQueueTest::testBucketQueueUMap() { // Tests the memory usage of an unordered_map in a bucket queue // before and after persistence std::size_t usageBefore = 0; @@ -164,10 +153,8 @@ void CBucketQueueTest::testBucketQueueUMap() queue.latest()[TSizeSizePr(5, 67)] = 7; queue.latest()[TSizeSizePr(58, 76)] = 7; queue.push(TSizeSizePrUInt64UMap(1)); - for (std::size_t i = 0; i < 10000; i += 100) - { - for (std::size_t j = 0; j < 50000; j += 400) - { + for (std::size_t i = 0; i < 10000; i += 100) { + for (std::size_t j = 0; j < 50000; j += 400) { queue.latest()[TSizeSizePr(i, j)] = 99 * i * j + 12; } } @@ -184,35 +171,23 @@ void CBucketQueueTest::testBucketQueueUMap() } } - -CppUnit::Test *CBucketQueueTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CBucketQueueTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CBucketQueueTest::testConstructorFillsQueue", - &CBucketQueueTest::testConstructorFillsQueue)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CBucketQueueTest::testPushGivenEarlierTime", - &CBucketQueueTest::testPushGivenEarlierTime)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CBucketQueueTest::testGetGivenFullQueueWithNoPop", - &CBucketQueueTest::testGetGivenFullQueueWithNoPop)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CBucketQueueTest::testGetGivenFullQueueAfterPop", - &CBucketQueueTest::testGetGivenFullQueueAfterPop)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CBucketQueueTest::testClear", - &CBucketQueueTest::testClear)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CBucketQueueTest::testIterators", - &CBucketQueueTest::testIterators)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CBucketQueueTest::testReverseIterators", - &CBucketQueueTest::testReverseIterators)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CBucketQueueTest::testBucketQueueUMap", - &CBucketQueueTest::testBucketQueueUMap) ); +CppUnit::Test* CBucketQueueTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CBucketQueueTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CBucketQueueTest::testConstructorFillsQueue", + &CBucketQueueTest::testConstructorFillsQueue)); + suiteOfTests->addTest(new CppUnit::TestCaller("CBucketQueueTest::testPushGivenEarlierTime", + &CBucketQueueTest::testPushGivenEarlierTime)); + suiteOfTests->addTest(new CppUnit::TestCaller("CBucketQueueTest::testGetGivenFullQueueWithNoPop", + &CBucketQueueTest::testGetGivenFullQueueWithNoPop)); + suiteOfTests->addTest(new CppUnit::TestCaller("CBucketQueueTest::testGetGivenFullQueueAfterPop", + &CBucketQueueTest::testGetGivenFullQueueAfterPop)); + suiteOfTests->addTest(new CppUnit::TestCaller("CBucketQueueTest::testClear", &CBucketQueueTest::testClear)); + suiteOfTests->addTest(new CppUnit::TestCaller("CBucketQueueTest::testIterators", &CBucketQueueTest::testIterators)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CBucketQueueTest::testReverseIterators", &CBucketQueueTest::testReverseIterators)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CBucketQueueTest::testBucketQueueUMap", &CBucketQueueTest::testBucketQueueUMap)); return suiteOfTests; } diff --git a/lib/model/unittest/CBucketQueueTest.h b/lib/model/unittest/CBucketQueueTest.h index c2b9fb64bb..0380ddef90 100644 --- a/lib/model/unittest/CBucketQueueTest.h +++ b/lib/model/unittest/CBucketQueueTest.h @@ -8,19 +8,18 @@ #include -class CBucketQueueTest : public CppUnit::TestFixture -{ - public: - void testConstructorFillsQueue(); - void testPushGivenEarlierTime(); - void testGetGivenFullQueueWithNoPop(); - void testGetGivenFullQueueAfterPop(); - void testClear(); - void testIterators(); - void testReverseIterators(); - void testBucketQueueUMap(); +class CBucketQueueTest : public CppUnit::TestFixture { +public: + void testConstructorFillsQueue(); + void testPushGivenEarlierTime(); + void testGetGivenFullQueueWithNoPop(); + void testGetGivenFullQueueAfterPop(); + void testClear(); + void testIterators(); + void testReverseIterators(); + void testBucketQueueUMap(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CBucketQueueTest_h diff --git a/lib/model/unittest/CCountingModelTest.cc b/lib/model/unittest/CCountingModelTest.cc index a0e0b98d2d..0a3dbbc624 100644 --- a/lib/model/unittest/CCountingModelTest.cc +++ b/lib/model/unittest/CCountingModelTest.cc @@ -7,12 +7,11 @@ #include "CCountingModelTest.h" #include -#include #include +#include -#include -#include #include +#include #include #include #include @@ -24,13 +23,8 @@ using namespace ml; using namespace model; - -namespace -{ -std::size_t addPerson(const std::string &p, - const CModelFactory::TDataGathererPtr &gatherer, - CResourceMonitor &resourceMonitor) -{ +namespace { +std::size_t addPerson(const std::string& p, const CModelFactory::TDataGathererPtr& gatherer, CResourceMonitor& resourceMonitor) { CDataGatherer::TStrCPtrVec person; person.push_back(&p); CEventData result; @@ -38,11 +32,7 @@ std::size_t addPerson(const std::string &p, return *result.personId(); } -void addArrival(CDataGatherer &gatherer, - CResourceMonitor &resourceMonitor, - core_t::TTime time, - const std::string &person) -{ +void addArrival(CDataGatherer& gatherer, CResourceMonitor& resourceMonitor, core_t::TTime time, const std::string& person) { CDataGatherer::TStrCPtrVec fieldValues; fieldValues.push_back(&person); @@ -51,8 +41,7 @@ void addArrival(CDataGatherer &gatherer, gatherer.addArrival(fieldValues, eventData, resourceMonitor); } -SModelParams::TStrDetectionRulePr makeScheduledEvent(const std::string &description, double start, double end) -{ +SModelParams::TStrDetectionRulePr makeScheduledEvent(const std::string& description, double start, double end) { CRuleCondition conditionGte; conditionGte.type(CRuleCondition::E_Time); conditionGte.condition().s_Op = CRuleCondition::E_GTE; @@ -75,8 +64,7 @@ SModelParams::TStrDetectionRulePr makeScheduledEvent(const std::string &descript const std::string EMPTY_STRING; } -void CCountingModelTest::testSkipSampling() -{ +void CCountingModelTest::testSkipSampling() { LOG_DEBUG("*** testSkipSampling ***"); core_t::TTime startTime(100); @@ -96,7 +84,7 @@ void CCountingModelTest::testSkipSampling() CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p", gathererNoGap, m_ResourceMonitor)); CModelFactory::SModelInitializationData modelNoGapInitData(gathererNoGap); CAnomalyDetectorModel::TModelPtr modelHolderNoGap(factory.makeModel(modelNoGapInitData)); - CCountingModel *modelNoGap = dynamic_cast(modelHolderNoGap.get()); + CCountingModel* modelNoGap = dynamic_cast(modelHolderNoGap.get()); // |2|2|0|0|1| -> 1.0 mean count addArrival(*gathererNoGap, m_ResourceMonitor, 100, "p"); @@ -118,7 +106,7 @@ void CCountingModelTest::testSkipSampling() CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p", gathererWithGap, m_ResourceMonitor)); CModelFactory::SModelInitializationData modelWithGapInitData(gathererWithGap); CAnomalyDetectorModel::TModelPtr modelHolderWithGap(factory.makeModel(modelWithGapInitData)); - CCountingModel *modelWithGap = dynamic_cast(modelHolderWithGap.get()); + CCountingModel* modelWithGap = dynamic_cast(modelHolderWithGap.get()); // |2|2|0|0|1| // |2|X|X|X|1| -> 1.5 mean count where X means skipped bucket @@ -137,8 +125,7 @@ void CCountingModelTest::testSkipSampling() } } -void CCountingModelTest::testCheckScheduledEvents() -{ +void CCountingModelTest::testCheckScheduledEvents() { LOG_DEBUG("*** testCheckScheduledEvents ***"); core_t::TTime startTime(100); @@ -164,7 +151,7 @@ void CCountingModelTest::testCheckScheduledEvents() addArrival(*gatherer, m_ResourceMonitor, 200, "p"); CAnomalyDetectorModel::TModelPtr modelHolderNoGap(factory.makeModel(modelNoGapInitData)); - CCountingModel *modelNoGap = dynamic_cast(modelHolderNoGap.get()); + CCountingModel* modelNoGap = dynamic_cast(modelHolderNoGap.get()); SModelParams::TStrDetectionRulePrVec matchedEvents = modelNoGap->checkScheduledEvents(50); CPPUNIT_ASSERT_EQUAL(std::size_t{0}, matchedEvents.size()); @@ -210,7 +197,7 @@ void CCountingModelTest::testCheckScheduledEvents() addArrival(*gatherer, m_ResourceMonitor, 100, "p"); CAnomalyDetectorModel::TModelPtr modelHolderNoGap(factory.makeModel(modelNoGapInitData)); - CCountingModel *modelNoGap = dynamic_cast(modelHolderNoGap.get()); + CCountingModel* modelNoGap = dynamic_cast(modelHolderNoGap.get()); // There are no events at this time modelNoGap->sampleBucketStatistics(0, 100, m_ResourceMonitor); @@ -234,15 +221,12 @@ void CCountingModelTest::testCheckScheduledEvents() } } -CppUnit::Test *CCountingModelTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CCountingModelTest"); +CppUnit::Test* CCountingModelTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CCountingModelTest"); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCountingModelTest::testSkipSampling", - &CCountingModelTest::testSkipSampling) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CCountingModelTest::testCheckScheduledEvents", - &CCountingModelTest::testCheckScheduledEvents) ); + suiteOfTests->addTest( + new CppUnit::TestCaller("CCountingModelTest::testSkipSampling", &CCountingModelTest::testSkipSampling)); + suiteOfTests->addTest(new CppUnit::TestCaller("CCountingModelTest::testCheckScheduledEvents", + &CCountingModelTest::testCheckScheduledEvents)); return suiteOfTests; } diff --git a/lib/model/unittest/CCountingModelTest.h b/lib/model/unittest/CCountingModelTest.h index 4ab3e3a424..a5b0c94d9f 100644 --- a/lib/model/unittest/CCountingModelTest.h +++ b/lib/model/unittest/CCountingModelTest.h @@ -11,15 +11,14 @@ #include -class CCountingModelTest : public CppUnit::TestFixture -{ - public: - void testSkipSampling(); - void testCheckScheduledEvents(); - static CppUnit::Test *suite(); - private: - ml::model::CResourceMonitor m_ResourceMonitor; +class CCountingModelTest : public CppUnit::TestFixture { +public: + void testSkipSampling(); + void testCheckScheduledEvents(); + static CppUnit::Test* suite(); + +private: + ml::model::CResourceMonitor m_ResourceMonitor; }; #endif // INCLUDED_CCountingModelTest_h - diff --git a/lib/model/unittest/CDetectionRuleTest.cc b/lib/model/unittest/CDetectionRuleTest.cc index 85e4dcb293..e19529f2a3 100644 --- a/lib/model/unittest/CDetectionRuleTest.cc +++ b/lib/model/unittest/CDetectionRuleTest.cc @@ -24,8 +24,7 @@ using namespace ml; using namespace model; -namespace -{ +namespace { using TFeatureVec = std::vector; using TStrVec = std::vector; @@ -33,55 +32,44 @@ using TStrVec = std::vector; const std::string EMPTY_STRING; } -CppUnit::Test *CDetectionRuleTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CDetectionRuleTest"); - - suiteOfTests->addTest(new CppUnit::TestCaller( - "CDetectionRuleTest::testApplyGivenCategoricalCondition", - &CDetectionRuleTest::testApplyGivenCategoricalCondition)); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CDetectionRuleTest::testApplyGivenNumericalActualCondition", - &CDetectionRuleTest::testApplyGivenNumericalActualCondition)); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CDetectionRuleTest::testApplyGivenNumericalTypicalCondition", - &CDetectionRuleTest::testApplyGivenNumericalTypicalCondition)); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CDetectionRuleTest::testApplyGivenNumericalDiffAbsCondition", - &CDetectionRuleTest::testApplyGivenNumericalDiffAbsCondition)); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CDetectionRuleTest::testApplyGivenSingleSeriesModelAndConditionWithField", - &CDetectionRuleTest::testApplyGivenSingleSeriesModelAndConditionWithField)); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CDetectionRuleTest::testApplyGivenNoActualValueAvailable", - &CDetectionRuleTest::testApplyGivenNoActualValueAvailable)); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CDetectionRuleTest::testApplyGivenDifferentSeriesAndIndividualModel", - &CDetectionRuleTest::testApplyGivenDifferentSeriesAndIndividualModel)); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CDetectionRuleTest::testApplyGivenDifferentSeriesAndPopulationModel", - &CDetectionRuleTest::testApplyGivenDifferentSeriesAndPopulationModel)); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CDetectionRuleTest::testApplyGivenMultipleConditionsWithOr", - &CDetectionRuleTest::testApplyGivenMultipleConditionsWithOr)); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CDetectionRuleTest::testApplyGivenMultipleConditionsWithAnd", - &CDetectionRuleTest::testApplyGivenMultipleConditionsWithAnd)); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CDetectionRuleTest::testApplyGivenTargetFieldIsPartitionAndIndividualModel", - &CDetectionRuleTest::testApplyGivenTargetFieldIsPartitionAndIndividualModel)); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CDetectionRuleTest::testApplyGivenTimeCondition", - &CDetectionRuleTest::testApplyGivenTimeCondition)); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CDetectionRuleTest::testRuleActions", - &CDetectionRuleTest::testRuleActions)); +CppUnit::Test* CDetectionRuleTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CDetectionRuleTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CDetectionRuleTest::testApplyGivenCategoricalCondition", + &CDetectionRuleTest::testApplyGivenCategoricalCondition)); + suiteOfTests->addTest(new CppUnit::TestCaller("CDetectionRuleTest::testApplyGivenNumericalActualCondition", + &CDetectionRuleTest::testApplyGivenNumericalActualCondition)); + suiteOfTests->addTest(new CppUnit::TestCaller("CDetectionRuleTest::testApplyGivenNumericalTypicalCondition", + &CDetectionRuleTest::testApplyGivenNumericalTypicalCondition)); + suiteOfTests->addTest(new CppUnit::TestCaller("CDetectionRuleTest::testApplyGivenNumericalDiffAbsCondition", + &CDetectionRuleTest::testApplyGivenNumericalDiffAbsCondition)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CDetectionRuleTest::testApplyGivenSingleSeriesModelAndConditionWithField", + &CDetectionRuleTest::testApplyGivenSingleSeriesModelAndConditionWithField)); + suiteOfTests->addTest(new CppUnit::TestCaller("CDetectionRuleTest::testApplyGivenNoActualValueAvailable", + &CDetectionRuleTest::testApplyGivenNoActualValueAvailable)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CDetectionRuleTest::testApplyGivenDifferentSeriesAndIndividualModel", + &CDetectionRuleTest::testApplyGivenDifferentSeriesAndIndividualModel)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CDetectionRuleTest::testApplyGivenDifferentSeriesAndPopulationModel", + &CDetectionRuleTest::testApplyGivenDifferentSeriesAndPopulationModel)); + suiteOfTests->addTest(new CppUnit::TestCaller("CDetectionRuleTest::testApplyGivenMultipleConditionsWithOr", + &CDetectionRuleTest::testApplyGivenMultipleConditionsWithOr)); + suiteOfTests->addTest(new CppUnit::TestCaller("CDetectionRuleTest::testApplyGivenMultipleConditionsWithAnd", + &CDetectionRuleTest::testApplyGivenMultipleConditionsWithAnd)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CDetectionRuleTest::testApplyGivenTargetFieldIsPartitionAndIndividualModel", + &CDetectionRuleTest::testApplyGivenTargetFieldIsPartitionAndIndividualModel)); + suiteOfTests->addTest(new CppUnit::TestCaller("CDetectionRuleTest::testApplyGivenTimeCondition", + &CDetectionRuleTest::testApplyGivenTimeCondition)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CDetectionRuleTest::testRuleActions", &CDetectionRuleTest::testRuleActions)); return suiteOfTests; } -void CDetectionRuleTest::testApplyGivenCategoricalCondition() -{ +void CDetectionRuleTest::testApplyGivenCategoricalCondition() { LOG_DEBUG("*** testApplyGivenCategoricalCondition ***"); core_t::TTime bucketLength = 100; @@ -96,11 +84,21 @@ void CDetectionRuleTest::testApplyGivenCategoricalCondition() std::string partitionFieldValue("par_1"); std::string personFieldName("over"); std::string attributeFieldName("by"); - CAnomalyDetectorModel::TDataGathererPtr gathererPtr( - new CDataGatherer(model_t::E_PopulationMetric, model_t::E_None, params, - EMPTY_STRING, partitionFieldName, partitionFieldValue, - personFieldName, attributeFieldName, EMPTY_STRING, - TStrVec(), false, key, features, startTime, 0)); + CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer(model_t::E_PopulationMetric, + model_t::E_None, + params, + EMPTY_STRING, + partitionFieldName, + partitionFieldValue, + personFieldName, + attributeFieldName, + EMPTY_STRING, + TStrVec(), + false, + key, + features, + startTime, + 0)); std::string person1("p1"); bool added = false; @@ -138,14 +136,16 @@ void CDetectionRuleTest::testApplyGivenCategoricalCondition() model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_PopulationMeanByPersonAndAttribute, resultType, 0, 0, 100)); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_PopulationMeanByPersonAndAttribute, resultType, 0, 1, 100) == false); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_PopulationMeanByPersonAndAttribute, resultType, 1, 2, 100) == false); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_PopulationMeanByPersonAndAttribute, resultType, 1, 3, 100)); + CPPUNIT_ASSERT( + rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 0, 0, 100)); + CPPUNIT_ASSERT( + rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 0, 1, 100) == + false); + CPPUNIT_ASSERT( + rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 1, 2, 100) == + false); + CPPUNIT_ASSERT( + rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 1, 3, 100)); } { std::string filterJson("[\"a1*\"]"); @@ -161,14 +161,16 @@ void CDetectionRuleTest::testApplyGivenCategoricalCondition() model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_PopulationMeanByPersonAndAttribute, resultType, 0, 0, 100)); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_PopulationMeanByPersonAndAttribute, resultType, 0, 1, 100)); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_PopulationMeanByPersonAndAttribute, resultType, 1, 2, 100) == false); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_PopulationMeanByPersonAndAttribute, resultType, 1, 3, 100) == false); + CPPUNIT_ASSERT( + rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 0, 0, 100)); + CPPUNIT_ASSERT( + rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 0, 1, 100)); + CPPUNIT_ASSERT( + rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 1, 2, 100) == + false); + CPPUNIT_ASSERT( + rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 1, 3, 100) == + false); } { std::string filterJson("[\"*2\"]"); @@ -184,14 +186,16 @@ void CDetectionRuleTest::testApplyGivenCategoricalCondition() model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_PopulationMeanByPersonAndAttribute, resultType, 0, 0, 100) == false); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_PopulationMeanByPersonAndAttribute, resultType, 0, 1, 100)); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_PopulationMeanByPersonAndAttribute, resultType, 1, 2, 100) == false); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_PopulationMeanByPersonAndAttribute, resultType, 1, 3, 100)); + CPPUNIT_ASSERT( + rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 0, 0, 100) == + false); + CPPUNIT_ASSERT( + rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 0, 1, 100)); + CPPUNIT_ASSERT( + rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 1, 2, 100) == + false); + CPPUNIT_ASSERT( + rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 1, 3, 100)); } { std::string filterJson("[\"*1*\"]"); @@ -207,14 +211,15 @@ void CDetectionRuleTest::testApplyGivenCategoricalCondition() model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_PopulationMeanByPersonAndAttribute, resultType, 0, 0, 100)); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_PopulationMeanByPersonAndAttribute, resultType, 0, 1, 100)); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_PopulationMeanByPersonAndAttribute, resultType, 1, 2, 100)); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_PopulationMeanByPersonAndAttribute, resultType, 1, 3, 100) == false); + CPPUNIT_ASSERT( + rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 0, 0, 100)); + CPPUNIT_ASSERT( + rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 0, 1, 100)); + CPPUNIT_ASSERT( + rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 1, 2, 100)); + CPPUNIT_ASSERT( + rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 1, 3, 100) == + false); } { std::string filterJson("[\"p2\"]"); @@ -230,14 +235,16 @@ void CDetectionRuleTest::testApplyGivenCategoricalCondition() model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_PopulationMeanByPersonAndAttribute, resultType, 0, 0, 100) == false); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_PopulationMeanByPersonAndAttribute, resultType, 0, 1, 100) == false); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_PopulationMeanByPersonAndAttribute, resultType, 1, 2, 100)); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_PopulationMeanByPersonAndAttribute, resultType, 1, 3, 100)); + CPPUNIT_ASSERT( + rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 0, 0, 100) == + false); + CPPUNIT_ASSERT( + rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 0, 1, 100) == + false); + CPPUNIT_ASSERT( + rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 1, 2, 100)); + CPPUNIT_ASSERT( + rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 1, 3, 100)); } { std::string filterJson("[\"par_1\"]"); @@ -253,14 +260,14 @@ void CDetectionRuleTest::testApplyGivenCategoricalCondition() model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_PopulationMeanByPersonAndAttribute, resultType, 0, 0, 100)); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_PopulationMeanByPersonAndAttribute, resultType, 0, 1, 100)); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_PopulationMeanByPersonAndAttribute, resultType, 1, 2, 100)); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_PopulationMeanByPersonAndAttribute, resultType, 1, 3, 100)); + CPPUNIT_ASSERT( + rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 0, 0, 100)); + CPPUNIT_ASSERT( + rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 0, 1, 100)); + CPPUNIT_ASSERT( + rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 1, 2, 100)); + CPPUNIT_ASSERT( + rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 1, 3, 100)); } { std::string filterJson("[\"par_2\"]"); @@ -276,19 +283,22 @@ void CDetectionRuleTest::testApplyGivenCategoricalCondition() model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_PopulationMeanByPersonAndAttribute, resultType, 0, 0, 100) == false); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_PopulationMeanByPersonAndAttribute, resultType, 0, 1, 100) == false); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_PopulationMeanByPersonAndAttribute, resultType, 1, 2, 100) == false); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_PopulationMeanByPersonAndAttribute, resultType, 1, 3, 100) == false); + CPPUNIT_ASSERT( + rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 0, 0, 100) == + false); + CPPUNIT_ASSERT( + rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 0, 1, 100) == + false); + CPPUNIT_ASSERT( + rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 1, 2, 100) == + false); + CPPUNIT_ASSERT( + rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 1, 3, 100) == + false); } } -void CDetectionRuleTest::testApplyGivenNumericalActualCondition() -{ +void CDetectionRuleTest::testApplyGivenNumericalActualCondition() { LOG_DEBUG("*** testApplyGivenNumericalActionCondition ***"); core_t::TTime bucketLength = 100; @@ -299,10 +309,21 @@ void CDetectionRuleTest::testApplyGivenNumericalActualCondition() TFeatureVec features; features.push_back(model_t::E_IndividualMeanByPerson); - CAnomalyDetectorModel::TDataGathererPtr gathererPtr( - new CDataGatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, key, features, startTime, 0)); + CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer(model_t::E_Metric, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + key, + features, + startTime, + 0)); std::string person1("p1"); bool addedPerson = false; @@ -328,12 +349,11 @@ void CDetectionRuleTest::testApplyGivenNumericalActualCondition() model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 200) == false); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 300) == false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 200) == + false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 300) == + false); } { @@ -348,12 +368,10 @@ void CDetectionRuleTest::testApplyGivenNumericalActualCondition() model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 200)); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 300) == false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 200)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 300) == + false); } { // Test rule with condition with operator GT @@ -367,12 +385,11 @@ void CDetectionRuleTest::testApplyGivenNumericalActualCondition() model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100) == false); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 200) == false); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 300)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100) == + false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 200) == + false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 300)); } { // Test rule with condition with operator GT @@ -386,17 +403,14 @@ void CDetectionRuleTest::testApplyGivenNumericalActualCondition() model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100) == false); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 200)); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 300)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100) == + false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 200)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 300)); } } -void CDetectionRuleTest::testApplyGivenNumericalTypicalCondition() -{ +void CDetectionRuleTest::testApplyGivenNumericalTypicalCondition() { LOG_DEBUG("*** testApplyGivenNumericalTypicalCondition ***"); core_t::TTime bucketLength = 100; @@ -407,10 +421,21 @@ void CDetectionRuleTest::testApplyGivenNumericalTypicalCondition() TFeatureVec features; features.push_back(model_t::E_IndividualMeanByPerson); - CAnomalyDetectorModel::TDataGathererPtr gathererPtr( - new CDataGatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, key, features, startTime, 0)); + CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer(model_t::E_Metric, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + key, + features, + startTime, + 0)); std::string person1("p1"); bool addedPerson = false; @@ -442,12 +467,11 @@ void CDetectionRuleTest::testApplyGivenNumericalTypicalCondition() model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 200) == false); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 300) == false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 200) == + false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 300) == + false); } { @@ -462,17 +486,15 @@ void CDetectionRuleTest::testApplyGivenNumericalTypicalCondition() model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100) == false); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 200) == false); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 300)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100) == + false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 200) == + false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 300)); } } -void CDetectionRuleTest::testApplyGivenNumericalDiffAbsCondition() -{ +void CDetectionRuleTest::testApplyGivenNumericalDiffAbsCondition() { LOG_DEBUG("*** testApplyGivenNumericalDiffAbsCondition ***"); core_t::TTime bucketLength = 100; @@ -483,10 +505,21 @@ void CDetectionRuleTest::testApplyGivenNumericalDiffAbsCondition() TFeatureVec features; features.push_back(model_t::E_IndividualMeanByPerson); - CAnomalyDetectorModel::TDataGathererPtr gathererPtr( - new CDataGatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, key, features, startTime, 0)); + CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer(model_t::E_Metric, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + key, + features, + startTime, + 0)); std::string person1("p1"); bool addedPerson = false; @@ -530,18 +563,16 @@ void CDetectionRuleTest::testApplyGivenNumericalDiffAbsCondition() model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100) == false); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 200) == false); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 300)); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 400)); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 500) == false); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 600) == false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100) == + false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 200) == + false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 300)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 400)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 500) == + false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 600) == + false); } { @@ -556,23 +587,20 @@ void CDetectionRuleTest::testApplyGivenNumericalDiffAbsCondition() model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 200) == false); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 300) == false); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 400) == false); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 500) == false); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 600)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 200) == + false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 300) == + false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 400) == + false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 500) == + false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 600)); } } -void CDetectionRuleTest::testApplyGivenSingleSeriesModelAndConditionWithField() -{ +void CDetectionRuleTest::testApplyGivenSingleSeriesModelAndConditionWithField() { LOG_DEBUG("*** testApplyGivenSingleSeriesModelAndConditionWithField ***"); core_t::TTime bucketLength = 100; @@ -583,10 +611,21 @@ void CDetectionRuleTest::testApplyGivenSingleSeriesModelAndConditionWithField() TFeatureVec features; features.push_back(model_t::E_IndividualMeanByPerson); - CAnomalyDetectorModel::TDataGathererPtr gathererPtr( - new CDataGatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, key, features, startTime, 0)); + CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer(model_t::E_Metric, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + key, + features, + startTime, + 0)); std::string person1("p1"); bool addedPerson = false; @@ -613,16 +652,12 @@ void CDetectionRuleTest::testApplyGivenSingleSeriesModelAndConditionWithField() model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100) == false); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 200) == false); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 300) == false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100) == false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 200) == false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 300) == false); } -void CDetectionRuleTest::testApplyGivenNoActualValueAvailable() -{ +void CDetectionRuleTest::testApplyGivenNoActualValueAvailable() { LOG_DEBUG("*** testApplyGivenNoActualValueAvailable ***"); core_t::TTime bucketLength = 100; @@ -633,10 +668,21 @@ void CDetectionRuleTest::testApplyGivenNoActualValueAvailable() TFeatureVec features; features.push_back(model_t::E_IndividualMeanByPerson); - CAnomalyDetectorModel::TDataGathererPtr gathererPtr( - new CDataGatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, key, features, startTime, 0)); + CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer(model_t::E_Metric, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + key, + features, + startTime, + 0)); std::string person1("p1"); bool addedPerson = false; @@ -659,12 +705,10 @@ void CDetectionRuleTest::testApplyGivenNoActualValueAvailable() model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 400) == false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 400) == false); } -void CDetectionRuleTest::testApplyGivenDifferentSeriesAndIndividualModel() -{ +void CDetectionRuleTest::testApplyGivenDifferentSeriesAndIndividualModel() { LOG_DEBUG("*** testApplyGivenDifferentSeriesAndIndividualModel ***"); core_t::TTime bucketLength = 100; @@ -676,10 +720,21 @@ void CDetectionRuleTest::testApplyGivenDifferentSeriesAndIndividualModel() TFeatureVec features; features.push_back(model_t::E_IndividualMeanByPerson); std::string personFieldName("series"); - CAnomalyDetectorModel::TDataGathererPtr gathererPtr( - new CDataGatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, personFieldName, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, key, features, startTime, 0)); + CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer(model_t::E_Metric, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + personFieldName, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + key, + features, + startTime, + 0)); std::string person1("p1"); bool addedPerson = false; @@ -704,14 +759,11 @@ void CDetectionRuleTest::testApplyGivenDifferentSeriesAndIndividualModel() model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 1, 0, 100) == false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 1, 0, 100) == false); } -void CDetectionRuleTest::testApplyGivenDifferentSeriesAndPopulationModel() -{ +void CDetectionRuleTest::testApplyGivenDifferentSeriesAndPopulationModel() { LOG_DEBUG("*** testApplyGivenDifferentSeriesAndPopulationModel ***"); core_t::TTime bucketLength = 100; @@ -724,10 +776,21 @@ void CDetectionRuleTest::testApplyGivenDifferentSeriesAndPopulationModel() features.push_back(model_t::E_PopulationMeanByPersonAndAttribute); std::string personFieldName("over"); std::string attributeFieldName("by"); - CAnomalyDetectorModel::TDataGathererPtr gathererPtr( - new CDataGatherer(model_t::E_PopulationMetric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, personFieldName, attributeFieldName, EMPTY_STRING, - TStrVec(), false, key, features, startTime, 0)); + CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer(model_t::E_PopulationMetric, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + personFieldName, + attributeFieldName, + EMPTY_STRING, + TStrVec(), + false, + key, + features, + startTime, + 0)); std::string person1("p1"); bool added = false; @@ -762,18 +825,17 @@ void CDetectionRuleTest::testApplyGivenDifferentSeriesAndPopulationModel() model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_PopulationMeanByPersonAndAttribute, resultType, 0, 0, 100) == false); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_PopulationMeanByPersonAndAttribute, resultType, 0, 1, 100)); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_PopulationMeanByPersonAndAttribute, resultType, 1, 2, 100) == false); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_PopulationMeanByPersonAndAttribute, resultType, 1, 3, 100) == false); + CPPUNIT_ASSERT( + rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 0, 0, 100) == false); + CPPUNIT_ASSERT( + rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 0, 1, 100)); + CPPUNIT_ASSERT( + rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 1, 2, 100) == false); + CPPUNIT_ASSERT( + rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 1, 3, 100) == false); } -void CDetectionRuleTest::testApplyGivenMultipleConditionsWithOr() -{ +void CDetectionRuleTest::testApplyGivenMultipleConditionsWithOr() { LOG_DEBUG("*** testApplyGivenMultipleConditionsWithOr ***"); core_t::TTime bucketLength = 100; @@ -785,10 +847,21 @@ void CDetectionRuleTest::testApplyGivenMultipleConditionsWithOr() TFeatureVec features; features.push_back(model_t::E_IndividualMeanByPerson); std::string personFieldName("series"); - CAnomalyDetectorModel::TDataGathererPtr gathererPtr( - new CDataGatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, personFieldName, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, key, features, startTime, 0)); + CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer(model_t::E_Metric, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + personFieldName, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + key, + features, + startTime, + 0)); std::string person1("p1"); bool addedPerson = false; @@ -818,8 +891,8 @@ void CDetectionRuleTest::testApplyGivenMultipleConditionsWithOr() model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100) == false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100) == + false); } { // First applies only @@ -841,8 +914,7 @@ void CDetectionRuleTest::testApplyGivenMultipleConditionsWithOr() model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); } { // Second applies only @@ -864,8 +936,7 @@ void CDetectionRuleTest::testApplyGivenMultipleConditionsWithOr() model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); } { // Both apply @@ -887,13 +958,11 @@ void CDetectionRuleTest::testApplyGivenMultipleConditionsWithOr() model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); } } -void CDetectionRuleTest::testApplyGivenMultipleConditionsWithAnd() -{ +void CDetectionRuleTest::testApplyGivenMultipleConditionsWithAnd() { LOG_DEBUG("*** testApplyGivenMultipleConditionsWithAnd ***"); core_t::TTime bucketLength = 100; @@ -905,10 +974,21 @@ void CDetectionRuleTest::testApplyGivenMultipleConditionsWithAnd() TFeatureVec features; features.push_back(model_t::E_IndividualMeanByPerson); std::string personFieldName("series"); - CAnomalyDetectorModel::TDataGathererPtr gathererPtr( - new CDataGatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, personFieldName, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, key, features, startTime, 0)); + CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer(model_t::E_Metric, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + personFieldName, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + key, + features, + startTime, + 0)); std::string person1("p1"); bool addedPerson = false; @@ -939,8 +1019,8 @@ void CDetectionRuleTest::testApplyGivenMultipleConditionsWithAnd() model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100) == false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100) == + false); } { // First applies only @@ -963,8 +1043,8 @@ void CDetectionRuleTest::testApplyGivenMultipleConditionsWithAnd() model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100) == false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100) == + false); } { // Second applies only @@ -987,8 +1067,8 @@ void CDetectionRuleTest::testApplyGivenMultipleConditionsWithAnd() model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100) == false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100) == + false); } { // Both apply @@ -1011,13 +1091,11 @@ void CDetectionRuleTest::testApplyGivenMultipleConditionsWithAnd() model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); } } -void CDetectionRuleTest::testApplyGivenTargetFieldIsPartitionAndIndividualModel() -{ +void CDetectionRuleTest::testApplyGivenTargetFieldIsPartitionAndIndividualModel() { LOG_DEBUG("*** testApplyGivenTargetFieldIsPartitionAndIndividualModel ***"); core_t::TTime bucketLength = 100; @@ -1031,10 +1109,21 @@ void CDetectionRuleTest::testApplyGivenTargetFieldIsPartitionAndIndividualModel( std::string partitionFieldName("partition"); std::string partitionFieldValue("partition_1"); std::string personFieldName("series"); - CAnomalyDetectorModel::TDataGathererPtr gathererPtr( - new CDataGatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, partitionFieldName, partitionFieldValue, personFieldName, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, key, features, startTime, 0)); + CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer(model_t::E_Metric, + model_t::E_None, + params, + EMPTY_STRING, + partitionFieldName, + partitionFieldValue, + personFieldName, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + key, + features, + startTime, + 0)); std::string person1("p1"); bool addedPerson = false; @@ -1062,10 +1151,8 @@ void CDetectionRuleTest::testApplyGivenTargetFieldIsPartitionAndIndividualModel( model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 1, 0, 100)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 1, 0, 100)); } { // Matching targetFieldValue @@ -1082,10 +1169,8 @@ void CDetectionRuleTest::testApplyGivenTargetFieldIsPartitionAndIndividualModel( model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 1, 0, 100)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 1, 0, 100)); } { // Non-matching targetFieldValue @@ -1103,15 +1188,14 @@ void CDetectionRuleTest::testApplyGivenTargetFieldIsPartitionAndIndividualModel( model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100) == false); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 1, 0, 100) == false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100) == + false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 1, 0, 100) == + false); } } -void CDetectionRuleTest::testApplyGivenTimeCondition() -{ +void CDetectionRuleTest::testApplyGivenTimeCondition() { LOG_DEBUG("*** testApplyGivenTimeCondition ***"); core_t::TTime bucketLength = 100; @@ -1124,11 +1208,21 @@ void CDetectionRuleTest::testApplyGivenTimeCondition() features.push_back(model_t::E_IndividualMeanByPerson); std::string partitionFieldName("partition"); std::string personFieldName("series"); - CAnomalyDetectorModel::TDataGathererPtr gathererPtr( - new CDataGatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, partitionFieldName, EMPTY_STRING, personFieldName, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, key, features, - startTime, 0)); + CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer(model_t::E_Metric, + model_t::E_None, + params, + EMPTY_STRING, + partitionFieldName, + EMPTY_STRING, + personFieldName, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + key, + features, + startTime, + 0)); CMockModel model(params, gathererPtr, influenceCalculators); CRuleCondition conditionGte; @@ -1147,18 +1241,13 @@ void CDetectionRuleTest::testApplyGivenTimeCondition() model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 99) == false); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 150)); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 200) == false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 99) == false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 150)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 200) == false); } -void CDetectionRuleTest::testRuleActions() -{ +void CDetectionRuleTest::testRuleActions() { LOG_DEBUG("*** testRuleActions ***"); core_t::TTime bucketLength = 100; @@ -1171,10 +1260,21 @@ void CDetectionRuleTest::testRuleActions() features.push_back(model_t::E_IndividualMeanByPerson); std::string partitionFieldName("partition"); std::string personFieldName("series"); - CAnomalyDetectorModel::TDataGathererPtr gathererPtr( - new CDataGatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, partitionFieldName, EMPTY_STRING, personFieldName, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, key, features, startTime, 0)); + CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer(model_t::E_Metric, + model_t::E_None, + params, + EMPTY_STRING, + partitionFieldName, + EMPTY_STRING, + personFieldName, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + key, + features, + startTime, + 0)); CMockModel model(params, gathererPtr, influenceCalculators); CRuleCondition conditionGte; @@ -1188,20 +1288,14 @@ void CDetectionRuleTest::testRuleActions() model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_SkipSampling, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100) == false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_SkipSampling, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100) == false); rule.action(CDetectionRule::E_SkipSampling); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100) == false); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_SkipSampling, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100) == false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_SkipSampling, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); rule.action(static_cast(3)); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_SkipSampling, model, - model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_SkipSampling, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); } diff --git a/lib/model/unittest/CDetectionRuleTest.h b/lib/model/unittest/CDetectionRuleTest.h index c871e37ed3..e505993437 100644 --- a/lib/model/unittest/CDetectionRuleTest.h +++ b/lib/model/unittest/CDetectionRuleTest.h @@ -10,27 +10,26 @@ #include -class CDetectionRuleTest : public CppUnit::TestFixture -{ - public: - void testApplyGivenCategoricalCondition(); - void testApplyGivenNumericalActualCondition(); - void testApplyGivenNumericalTypicalCondition(); - void testApplyGivenNumericalDiffAbsCondition(); - void testApplyGivenSingleSeriesModelAndConditionWithField(); - void testApplyGivenNoActualValueAvailable(); - void testApplyGivenDifferentSeriesAndIndividualModel(); - void testApplyGivenDifferentSeriesAndPopulationModel(); - void testApplyGivenMultipleConditionsWithOr(); - void testApplyGivenMultipleConditionsWithAnd(); - void testApplyGivenTargetFieldIsPartitionAndIndividualModel(); - void testApplyGivenTimeCondition(); - void testRuleActions(); +class CDetectionRuleTest : public CppUnit::TestFixture { +public: + void testApplyGivenCategoricalCondition(); + void testApplyGivenNumericalActualCondition(); + void testApplyGivenNumericalTypicalCondition(); + void testApplyGivenNumericalDiffAbsCondition(); + void testApplyGivenSingleSeriesModelAndConditionWithField(); + void testApplyGivenNoActualValueAvailable(); + void testApplyGivenDifferentSeriesAndIndividualModel(); + void testApplyGivenDifferentSeriesAndPopulationModel(); + void testApplyGivenMultipleConditionsWithOr(); + void testApplyGivenMultipleConditionsWithAnd(); + void testApplyGivenTargetFieldIsPartitionAndIndividualModel(); + void testApplyGivenTimeCondition(); + void testRuleActions(); - static CppUnit::Test *suite(); - private: - ml::model::CResourceMonitor m_ResourceMonitor; + static CppUnit::Test* suite(); + +private: + ml::model::CResourceMonitor m_ResourceMonitor; }; #endif // INCLUDED_CDetectionRuleTest_h - diff --git a/lib/model/unittest/CDetectorEqualizerTest.cc b/lib/model/unittest/CDetectorEqualizerTest.cc index 69dcac933e..c576163366 100644 --- a/lib/model/unittest/CDetectorEqualizerTest.cc +++ b/lib/model/unittest/CDetectorEqualizerTest.cc @@ -23,35 +23,29 @@ using namespace ml; using TDoubleVec = std::vector; -namespace -{ +namespace { using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; const double THRESHOLD = std::log(0.05); - } -void CDetectorEqualizerTest::testCorrect() -{ +void CDetectorEqualizerTest::testCorrect() { LOG_DEBUG("*** CDetectorEqualizerTest::testCorrect ***"); // Test that the distribution of scores are more similar after correcting. - double scales[] = { 1.0, 2.1, 3.2 }; + double scales[] = {1.0, 2.1, 3.2}; model::CDetectorEqualizer equalizer; test::CRandomNumbers rng; - for (std::size_t i = 0u; i < boost::size(scales); ++i) - { + for (std::size_t i = 0u; i < boost::size(scales); ++i) { TDoubleVec logp; rng.generateGammaSamples(1.0, scales[i], 1000, logp); - for (std::size_t j = 0u; j < logp.size(); ++j) - { - if (-logp[j] <= THRESHOLD) - { + for (std::size_t j = 0u; j < logp.size(); ++j) { + if (-logp[j] <= THRESHOLD) { double p = std::exp(-logp[j]); equalizer.add(static_cast(i), p); } @@ -60,15 +54,12 @@ void CDetectorEqualizerTest::testCorrect() TDoubleVec raw[3]; TDoubleVec corrected[3]; - for (std::size_t i = 0u; i < boost::size(scales); ++i) - { + for (std::size_t i = 0u; i < boost::size(scales); ++i) { TDoubleVec logp; rng.generateGammaSamples(1.0, scales[i], 1000, logp); - for (std::size_t j = 0u; j < logp.size(); ++j) - { - if (-logp[j] <= THRESHOLD) - { + for (std::size_t j = 0u; j < logp.size(); ++j) { + if (-logp[j] <= THRESHOLD) { double p = std::exp(-logp[j]); raw[i].push_back(p); corrected[i].push_back(equalizer.correct(static_cast(i), p)); @@ -77,12 +68,10 @@ void CDetectorEqualizerTest::testCorrect() } TMeanAccumulator similarityIncrease; - for (std::size_t i = 1u, k = 0u; i < 3; ++i) - { - for (std::size_t j = 0u; j < i; ++j, ++k) - { - double increase = maths::CStatisticalTests::twoSampleKS(corrected[i], corrected[j]) - / maths::CStatisticalTests::twoSampleKS(raw[i], raw[j]); + for (std::size_t i = 1u, k = 0u; i < 3; ++i) { + for (std::size_t j = 0u; j < i; ++j, ++k) { + double increase = + maths::CStatisticalTests::twoSampleKS(corrected[i], corrected[j]) / maths::CStatisticalTests::twoSampleKS(raw[i], raw[j]); similarityIncrease.add(std::log(increase)); LOG_DEBUG("similarity increase = " << increase); CPPUNIT_ASSERT(increase > 3.0); @@ -92,28 +81,24 @@ void CDetectorEqualizerTest::testCorrect() CPPUNIT_ASSERT(std::exp(maths::CBasicStatistics::mean(similarityIncrease)) > 40.0); } -void CDetectorEqualizerTest::testAge() -{ +void CDetectorEqualizerTest::testAge() { LOG_DEBUG("*** CDetectorEqualizerTest::testAge ***"); // Test that propagation doesn't introduce a bias into the corrections. - double scales[] = { 1.0, 2.1, 3.2 }; + double scales[] = {1.0, 2.1, 3.2}; model::CDetectorEqualizer equalizer; model::CDetectorEqualizer equalizerAged; test::CRandomNumbers rng; - for (std::size_t i = 0u; i < boost::size(scales); ++i) - { + for (std::size_t i = 0u; i < boost::size(scales); ++i) { TDoubleVec logp; rng.generateGammaSamples(1.0, scales[i], 1000, logp); - for (std::size_t j = 0u; j < logp.size(); ++j) - { - if (-logp[j] <= THRESHOLD) - { + for (std::size_t j = 0u; j < logp.size(); ++j) { + if (-logp[j] <= THRESHOLD) { double p = std::exp(-logp[j]); equalizer.add(static_cast(i), p); equalizerAged.add(static_cast(i), p); @@ -122,15 +107,13 @@ void CDetectorEqualizerTest::testAge() } } - for (int i = 0; i < 3; ++i) - { + for (int i = 0; i < 3; ++i) { TMeanAccumulator meanBias; TMeanAccumulator meanError; double logp = THRESHOLD; - for (std::size_t j = 0u; j < 150; ++j, logp += std::log(0.9)) - { + for (std::size_t j = 0u; j < 150; ++j, logp += std::log(0.9)) { double p = std::exp(logp); - double pc = equalizer.correct(i, p); + double pc = equalizer.correct(i, p); double pca = equalizerAged.correct(i, p); double error = std::fabs((std::log(pca) - std::log(pc)) / std::log(pc)); meanError.add(error); @@ -144,11 +127,10 @@ void CDetectorEqualizerTest::testAge() } } -void CDetectorEqualizerTest::testPersist() -{ +void CDetectorEqualizerTest::testPersist() { LOG_DEBUG("*** CDetectorEqualizerTest::testPersist ***"); - double scales[] = { 1.0, 2.1, 3.2 }; + double scales[] = {1.0, 2.1, 3.2}; model::CDetectorEqualizer origEqualizer; @@ -157,14 +139,11 @@ void CDetectorEqualizerTest::testPersist() TDoubleVec logp; rng.generateGammaSamples(1.0, 3.1, 1000, logp); - for (std::size_t i = 0u; i < boost::size(scales); ++i) - { + for (std::size_t i = 0u; i < boost::size(scales); ++i) { rng.generateGammaSamples(1.0, scales[i], 1000, logp); - for (std::size_t j = 0u; j < logp.size(); ++j) - { - if (-logp[j] <= THRESHOLD) - { + for (std::size_t j = 0u; j < logp.size(); ++j) { + if (-logp[j] <= THRESHOLD) { double p = std::exp(-logp[j]); origEqualizer.add(static_cast(i), p); } @@ -185,8 +164,7 @@ void CDetectorEqualizerTest::testPersist() core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind(&model::CDetectorEqualizer::acceptRestoreTraverser, - &restoredEqualizer, _1))); + CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind(&model::CDetectorEqualizer::acceptRestoreTraverser, &restoredEqualizer, _1))); } // Checksums should agree. @@ -202,19 +180,15 @@ void CDetectorEqualizerTest::testPersist() CPPUNIT_ASSERT_EQUAL(origXml, newXml); } -CppUnit::Test *CDetectorEqualizerTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CDetectorEqualizerTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CDetectorEqualizerTest::testCorrect", - &CDetectorEqualizerTest::testCorrect) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CDetectorEqualizerTest::testAge", - &CDetectorEqualizerTest::testAge) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CDetectorEqualizerTest::testPersist", - &CDetectorEqualizerTest::testPersist) ); +CppUnit::Test* CDetectorEqualizerTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CDetectorEqualizerTest"); + + suiteOfTests->addTest( + new CppUnit::TestCaller("CDetectorEqualizerTest::testCorrect", &CDetectorEqualizerTest::testCorrect)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CDetectorEqualizerTest::testAge", &CDetectorEqualizerTest::testAge)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CDetectorEqualizerTest::testPersist", &CDetectorEqualizerTest::testPersist)); return suiteOfTests; } diff --git a/lib/model/unittest/CDetectorEqualizerTest.h b/lib/model/unittest/CDetectorEqualizerTest.h index 909c62ed15..821495295d 100644 --- a/lib/model/unittest/CDetectorEqualizerTest.h +++ b/lib/model/unittest/CDetectorEqualizerTest.h @@ -9,14 +9,13 @@ #include -class CDetectorEqualizerTest : public CppUnit::TestFixture -{ - public: - void testCorrect(); - void testAge(); - void testPersist(); +class CDetectorEqualizerTest : public CppUnit::TestFixture { +public: + void testCorrect(); + void testAge(); + void testPersist(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CDetectorEqualizerTest_h diff --git a/lib/model/unittest/CDynamicStringIdRegistryTest.cc b/lib/model/unittest/CDynamicStringIdRegistryTest.cc index 3125430891..d8c0de7fbb 100644 --- a/lib/model/unittest/CDynamicStringIdRegistryTest.cc +++ b/lib/model/unittest/CDynamicStringIdRegistryTest.cc @@ -21,30 +21,23 @@ using namespace ml; using namespace model; +CppUnit::Test* CDynamicStringIdRegistryTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CDynamicStringIdRegistryTest"); -CppUnit::Test *CDynamicStringIdRegistryTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CDynamicStringIdRegistryTest"); - - suiteOfTests->addTest(new CppUnit::TestCaller( - "CDynamicStringIdRegistryTest::testAddName", - &CDynamicStringIdRegistryTest::testAddName)); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CDynamicStringIdRegistryTest::testPersist", - &CDynamicStringIdRegistryTest::testPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller("CDynamicStringIdRegistryTest::testAddName", + &CDynamicStringIdRegistryTest::testAddName)); + suiteOfTests->addTest(new CppUnit::TestCaller("CDynamicStringIdRegistryTest::testPersist", + &CDynamicStringIdRegistryTest::testPersist)); return suiteOfTests; } -void CDynamicStringIdRegistryTest::testAddName() -{ +void CDynamicStringIdRegistryTest::testAddName() { LOG_DEBUG("*** testAddName ***"); CResourceMonitor resourceMonitor; - CDynamicStringIdRegistry registry("person", - stat_t::E_NumberNewPeople, - stat_t::E_NumberNewPeopleNotAllowed, - stat_t::E_NumberNewPeopleRecycled); + CDynamicStringIdRegistry registry( + "person", stat_t::E_NumberNewPeople, stat_t::E_NumberNewPeopleNotAllowed, stat_t::E_NumberNewPeopleRecycled); bool personAdded = false; std::string person1("foo"); @@ -88,15 +81,12 @@ void CDynamicStringIdRegistryTest::testAddName() CPPUNIT_ASSERT(registry.isIdActive(2)); } -void CDynamicStringIdRegistryTest::testPersist() -{ +void CDynamicStringIdRegistryTest::testPersist() { LOG_DEBUG("*** testPersist ***"); CResourceMonitor resourceMonitor; - CDynamicStringIdRegistry registry("person", - stat_t::E_NumberNewPeople, - stat_t::E_NumberNewPeopleNotAllowed, - stat_t::E_NumberNewPeopleRecycled); + CDynamicStringIdRegistry registry( + "person", stat_t::E_NumberNewPeople, stat_t::E_NumberNewPeopleNotAllowed, stat_t::E_NumberNewPeopleRecycled); bool addedPerson = false; std::string person1("foo"); @@ -115,13 +105,9 @@ void CDynamicStringIdRegistryTest::testPersist() core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - CDynamicStringIdRegistry restoredRegistry("person", - stat_t::E_NumberNewPeople, - stat_t::E_NumberNewPeopleNotAllowed, - stat_t::E_NumberNewPeopleRecycled); - traverser.traverseSubLevel(boost::bind(&CDynamicStringIdRegistry::acceptRestoreTraverser, - &restoredRegistry, - _1)); + CDynamicStringIdRegistry restoredRegistry( + "person", stat_t::E_NumberNewPeople, stat_t::E_NumberNewPeopleNotAllowed, stat_t::E_NumberNewPeopleRecycled); + traverser.traverseSubLevel(boost::bind(&CDynamicStringIdRegistry::acceptRestoreTraverser, &restoredRegistry, _1)); std::string restoredXml; { diff --git a/lib/model/unittest/CDynamicStringIdRegistryTest.h b/lib/model/unittest/CDynamicStringIdRegistryTest.h index 51f0f9b852..2353a2cd98 100644 --- a/lib/model/unittest/CDynamicStringIdRegistryTest.h +++ b/lib/model/unittest/CDynamicStringIdRegistryTest.h @@ -9,13 +9,11 @@ #include -class CDynamicStringIdRegistryTest : public CppUnit::TestFixture -{ - public: - void testAddName(); - void testPersist(); - static CppUnit::Test *suite(); +class CDynamicStringIdRegistryTest : public CppUnit::TestFixture { +public: + void testAddName(); + void testPersist(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CDynamicStringIdRegistryTest_h - diff --git a/lib/model/unittest/CEventRateAnomalyDetectorTest.cc b/lib/model/unittest/CEventRateAnomalyDetectorTest.cc index b0555ae1e7..231b8c12e1 100644 --- a/lib/model/unittest/CEventRateAnomalyDetectorTest.cc +++ b/lib/model/unittest/CEventRateAnomalyDetectorTest.cc @@ -29,8 +29,7 @@ #include -namespace -{ +namespace { using TTimeVec = std::vector; using TStrVec = std::vector; @@ -41,118 +40,82 @@ using TTimeStrPrSet = std::set; const std::string EMPTY_STRING; -class CResultWriter : public ml::model::CHierarchicalResultsVisitor -{ - public: - CResultWriter(const ml::model::CAnomalyDetectorModelConfig &modelConfig, - const ml::model::CLimits &limits) - : m_ModelConfig(modelConfig), - m_Limits(limits), - m_Calls(0) - { +class CResultWriter : public ml::model::CHierarchicalResultsVisitor { +public: + CResultWriter(const ml::model::CAnomalyDetectorModelConfig& modelConfig, const ml::model::CLimits& limits) + : m_ModelConfig(modelConfig), m_Limits(limits), m_Calls(0) {} + + void operator()(ml::model::CAnomalyDetector& detector, ml::core_t::TTime start, ml::core_t::TTime end) { + ml::model::CHierarchicalResults results; + detector.buildResults(start, end, results); + results.buildHierarchy(); + ml::model::CHierarchicalResultsAggregator aggregator(m_ModelConfig); + results.bottomUpBreadthFirst(aggregator); + ml::model::CHierarchicalResultsProbabilityFinalizer finalizer; + results.bottomUpBreadthFirst(finalizer); + results.bottomUpBreadthFirst(*this); + } + + virtual void visit(const ml::model::CHierarchicalResults& results, const ml::model::CHierarchicalResults::TNode& node, bool pivot) { + if (pivot) { + return; } - void operator()(ml::model::CAnomalyDetector &detector, - ml::core_t::TTime start, - ml::core_t::TTime end) - { - ml::model::CHierarchicalResults results; - detector.buildResults(start, end, results); - results.buildHierarchy(); - ml::model::CHierarchicalResultsAggregator aggregator(m_ModelConfig); - results.bottomUpBreadthFirst(aggregator); - ml::model::CHierarchicalResultsProbabilityFinalizer finalizer; - results.bottomUpBreadthFirst(finalizer); - results.bottomUpBreadthFirst(*this); + if (!this->shouldWriteResult(m_Limits, results, node, pivot)) { + return; } - virtual void visit(const ml::model::CHierarchicalResults &results, - const ml::model::CHierarchicalResults::TNode &node, - bool pivot) - { - if (pivot) - { - return; - } - - if (!this->shouldWriteResult(m_Limits, results, node, pivot)) - { - return; - } - - if (this->isSimpleCount(node)) - { - return; - } - if (!this->isLeaf(node)) - { - return; - } - - - const std::string analysisFieldValue = *node.s_Spec.s_PersonFieldValue; - ml::core_t::TTime bucketTime = node.s_BucketStartTime; - double anomalyFactor = node.s_RawAnomalyScore; - LOG_DEBUG(analysisFieldValue << " bucket time " << bucketTime - << " anomalyFactor " << anomalyFactor); - ++m_Calls; - m_AllAnomalies.insert(TTimeStrPr(bucketTime, analysisFieldValue)); - m_AnomalyScores[bucketTime] += anomalyFactor; + if (this->isSimpleCount(node)) { + return; + } + if (!this->isLeaf(node)) { + return; } - bool operator()(ml::core_t::TTime time, - const ml::model::CHierarchicalResults::TNode &node, - bool isBucketInfluencer) - { - LOG_DEBUG((isBucketInfluencer ? "BucketInfluencer" : "Influencer ") - << node.s_Spec.print() << " initial score " << node.probability() - << ", time: " << time); + const std::string analysisFieldValue = *node.s_Spec.s_PersonFieldValue; + ml::core_t::TTime bucketTime = node.s_BucketStartTime; + double anomalyFactor = node.s_RawAnomalyScore; + LOG_DEBUG(analysisFieldValue << " bucket time " << bucketTime << " anomalyFactor " << anomalyFactor); + ++m_Calls; + m_AllAnomalies.insert(TTimeStrPr(bucketTime, analysisFieldValue)); + m_AnomalyScores[bucketTime] += anomalyFactor; + } - return true; - } + bool operator()(ml::core_t::TTime time, const ml::model::CHierarchicalResults::TNode& node, bool isBucketInfluencer) { + LOG_DEBUG((isBucketInfluencer ? "BucketInfluencer" : "Influencer ") + << node.s_Spec.print() << " initial score " << node.probability() << ", time: " << time); - size_t calls() const - { - return m_Calls; - } + return true; + } - size_t numDistinctTimes() const - { - return m_AllAnomalies.size(); - } + size_t calls() const { return m_Calls; } - const TTimeDoubleMap &anomalyScores() const - { - return m_AnomalyScores; - } + size_t numDistinctTimes() const { return m_AllAnomalies.size(); } - const TTimeStrPrSet &allAnomalies() const - { - return m_AllAnomalies; - } + const TTimeDoubleMap& anomalyScores() const { return m_AnomalyScores; } + + const TTimeStrPrSet& allAnomalies() const { return m_AllAnomalies; } - private: - const ml::model::CAnomalyDetectorModelConfig &m_ModelConfig; - ml::model::CLimits m_Limits; - std::size_t m_Calls; - TTimeStrPrSet m_AllAnomalies; - TTimeDoubleMap m_AnomalyScores; +private: + const ml::model::CAnomalyDetectorModelConfig& m_ModelConfig; + ml::model::CLimits m_Limits; + std::size_t m_Calls; + TTimeStrPrSet m_AllAnomalies; + TTimeDoubleMap m_AnomalyScores; }; void importData(ml::core_t::TTime firstTime, ml::core_t::TTime lastTime, ml::core_t::TTime bucketLength, - CResultWriter &outputResults, - const TStrVec &fileNames, - ml::model::CAnomalyDetector &detector) -{ + CResultWriter& outputResults, + const TStrVec& fileNames, + ml::model::CAnomalyDetector& detector) { using TifstreamPtr = boost::shared_ptr; using TifstreamPtrVec = std::vector; using TTimeVec = std::vector; TifstreamPtrVec ifss; - for (std::size_t i = 0u; i < fileNames.size(); ++i) - { + for (std::size_t i = 0u; i < fileNames.size(); ++i) { TifstreamPtr ifs(new std::ifstream(fileNames[i].c_str())); CPPUNIT_ASSERT(ifs->is_open()); ifss.push_back(ifs); @@ -161,33 +124,25 @@ void importData(ml::core_t::TTime firstTime, ml::core_t::TTime lastBucketTime = ml::maths::CIntegerTools::ceil(firstTime, bucketLength); TTimeVec times(ifss.size()); - for (std::size_t i = 0u; i < ifss.size(); ++i) - { + for (std::size_t i = 0u; i < ifss.size(); ++i) { std::string line; std::getline(*ifss[i], line); CPPUNIT_ASSERT(ml::core::CStringUtils::stringToType(line, times[i])); } ml::core_t::TTime time(0); - for (;;) - { + for (;;) { std::size_t file(std::min_element(times.begin(), times.end()) - times.begin()); std::string attributeFieldValue = fileNames[file]; time = times[file]; - if (time == std::numeric_limits::max()) - { + if (time == std::numeric_limits::max()) { break; } - for (/**/; - lastBucketTime + bucketLength <= time; - lastBucketTime += bucketLength) - { - outputResults(detector, - lastBucketTime, - lastBucketTime + bucketLength); + for (/**/; lastBucketTime + bucketLength <= time; lastBucketTime += bucketLength) { + outputResults(detector, lastBucketTime, lastBucketTime + bucketLength); } ml::model::CAnomalyDetector::TStrCPtrVec fieldValues; @@ -195,46 +150,36 @@ void importData(ml::core_t::TTime firstTime, detector.addRecord(time, fieldValues); std::string line; - if (!std::getline(*ifss[file], line)) - { + if (!std::getline(*ifss[file], line)) { times[file] = std::numeric_limits::max(); ifss[file].reset(); - } - else - { + } else { CPPUNIT_ASSERT(ml::core::CStringUtils::stringToType(line, times[file])); } } - for (/**/; - lastBucketTime + bucketLength <= lastTime; - lastBucketTime += bucketLength) - { - outputResults(detector, - lastBucketTime, - lastBucketTime + bucketLength); + for (/**/; lastBucketTime + bucketLength <= lastTime; lastBucketTime += bucketLength) { + outputResults(detector, lastBucketTime, lastBucketTime + bucketLength); } } - } -void CEventRateAnomalyDetectorTest::testAnomalies() -{ - static const size_t EXPECTED_ANOMALOUS_HOURS(12); +void CEventRateAnomalyDetectorTest::testAnomalies() { + static const size_t EXPECTED_ANOMALOUS_HOURS(12); static const ml::core_t::TTime FIRST_TIME(1346713620); static const ml::core_t::TTime LAST_TIME(1347317974); static const ml::core_t::TTime BUCKET_SIZE(1800); - static const double HIGH_ANOMALY_SCORE(0.003); + static const double HIGH_ANOMALY_SCORE(0.003); - ml::model::CAnomalyDetectorModelConfig modelConfig = - ml::model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); + ml::model::CAnomalyDetectorModelConfig modelConfig = ml::model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); ml::model::CLimits limits; ml::model::CSearchKey key(1, // identifier ml::model::function_t::E_IndividualRareCount, false, ml::model_t::E_XF_None, - EMPTY_STRING, "status"); + EMPTY_STRING, + "status"); ml::model::CAnomalyDetector detector(1, // identifier limits, modelConfig, @@ -252,30 +197,25 @@ void CEventRateAnomalyDetectorTest::testAnomalies() LOG_DEBUG("visitor.calls() = " << writer.calls()); // CPPUNIT_ASSERT_EQUAL(writer.calls(), writer.numDistinctTimes()); - const TTimeDoubleMap &anomalyScores = writer.anomalyScores(); + const TTimeDoubleMap& anomalyScores = writer.anomalyScores(); TTimeVec peaks; - for (const auto &score : anomalyScores) - { - if (score.second > HIGH_ANOMALY_SCORE) - { + for (const auto& score : anomalyScores) { + if (score.second > HIGH_ANOMALY_SCORE) { peaks.push_back(score.first); } } CPPUNIT_ASSERT_EQUAL(EXPECTED_ANOMALOUS_HOURS, peaks.size()); - std::size_t detected503 = 0u; + std::size_t detected503 = 0u; std::size_t detectedMySQL = 0u; - for (std::size_t i = 0u; i < peaks.size(); ++i) - { + for (std::size_t i = 0u; i < peaks.size(); ++i) { LOG_DEBUG("Checking for status 503 anomaly at " << peaks[i]); - if (writer.allAnomalies().count(TTimeStrPr(peaks[i], "testfiles/status503.txt"))) - { + if (writer.allAnomalies().count(TTimeStrPr(peaks[i], "testfiles/status503.txt"))) { ++detected503; } LOG_DEBUG("Checking for MySQL anomaly at " << peaks[i]); - if (writer.allAnomalies().count(TTimeStrPr(peaks[i], "testfiles/mysqlabort.txt"))) - { + if (writer.allAnomalies().count(TTimeStrPr(peaks[i], "testfiles/mysqlabort.txt"))) { ++detectedMySQL; } } @@ -284,14 +224,12 @@ void CEventRateAnomalyDetectorTest::testAnomalies() CPPUNIT_ASSERT_EQUAL(std::size_t(10), detectedMySQL); } -void CEventRateAnomalyDetectorTest::testPersist() -{ +void CEventRateAnomalyDetectorTest::testPersist() { static const ml::core_t::TTime FIRST_TIME(1346713620); static const ml::core_t::TTime LAST_TIME(1347317974); static const ml::core_t::TTime BUCKET_SIZE(3600); - ml::model::CAnomalyDetectorModelConfig modelConfig = - ml::model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); + ml::model::CAnomalyDetectorModelConfig modelConfig = ml::model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); ml::model::CLimits limits; ml::model::CSearchKey key(1, // identifier @@ -333,10 +271,8 @@ void CEventRateAnomalyDetectorTest::testPersist() ml::core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); ml::core::CRapidXmlStateRestoreTraverser traverser(parser); - CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind(&ml::model::CAnomalyDetector::acceptRestoreTraverser, - &restoredDetector, - EMPTY_STRING, - _1))); + CPPUNIT_ASSERT(traverser.traverseSubLevel( + boost::bind(&ml::model::CAnomalyDetector::acceptRestoreTraverser, &restoredDetector, EMPTY_STRING, _1))); } // The XML representation of the new typer should be the same as the original @@ -349,17 +285,13 @@ void CEventRateAnomalyDetectorTest::testPersist() CPPUNIT_ASSERT_EQUAL(origXml, newXml); } -CppUnit::Test *CEventRateAnomalyDetectorTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CEventRateAnomalyDetectorTest"); +CppUnit::Test* CEventRateAnomalyDetectorTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CEventRateAnomalyDetectorTest"); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRateAnomalyDetectorTest::testAnomalies", - &CEventRateAnomalyDetectorTest::testAnomalies) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRateAnomalyDetectorTest::testPersist", - &CEventRateAnomalyDetectorTest::testPersist) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CEventRateAnomalyDetectorTest::testAnomalies", + &CEventRateAnomalyDetectorTest::testAnomalies)); + suiteOfTests->addTest(new CppUnit::TestCaller("CEventRateAnomalyDetectorTest::testPersist", + &CEventRateAnomalyDetectorTest::testPersist)); return suiteOfTests; } - diff --git a/lib/model/unittest/CEventRateAnomalyDetectorTest.h b/lib/model/unittest/CEventRateAnomalyDetectorTest.h index 551a12cc65..ac5f234c88 100644 --- a/lib/model/unittest/CEventRateAnomalyDetectorTest.h +++ b/lib/model/unittest/CEventRateAnomalyDetectorTest.h @@ -9,14 +9,12 @@ #include -class CEventRateAnomalyDetectorTest : public CppUnit::TestFixture -{ - public: - void testAnomalies(); - void testPersist(); +class CEventRateAnomalyDetectorTest : public CppUnit::TestFixture { +public: + void testAnomalies(); + void testPersist(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CEventRateAnomalyDetectorTest_h - diff --git a/lib/model/unittest/CEventRateDataGathererTest.cc b/lib/model/unittest/CEventRateDataGathererTest.cc index c38631fe77..0093ae82f4 100644 --- a/lib/model/unittest/CEventRateDataGathererTest.cc +++ b/lib/model/unittest/CEventRateDataGathererTest.cc @@ -6,12 +6,12 @@ #include "CEventRateDataGathererTest.h" +#include +#include #include #include #include #include -#include -#include #include #include @@ -55,28 +55,23 @@ using TSizeSizePrStoredStringPtrPrUInt64UMapVec = CBucketGatherer::TSizeSizePrSt using TTimeVec = std::vector; using TStrCPtrVec = CBucketGatherer::TStrCPtrVec; -namespace -{ - +namespace { const CSearchKey key; const std::string EMPTY_STRING(""); -std::size_t addPerson(CDataGatherer &gatherer, - CResourceMonitor &resourceMonitor, - const std::string &p, - const std::string &v = EMPTY_STRING, - std::size_t numInfluencers = 0) -{ +std::size_t addPerson(CDataGatherer& gatherer, + CResourceMonitor& resourceMonitor, + const std::string& p, + const std::string& v = EMPTY_STRING, + std::size_t numInfluencers = 0) { CDataGatherer::TStrCPtrVec person; person.push_back(&p); std::string i("i"); - for (std::size_t j = 0; j < numInfluencers; ++j) - { + for (std::size_t j = 0; j < numInfluencers; ++j) { person.push_back(&i); } - if (!v.empty()) - { + if (!v.empty()) { person.push_back(&v); } CEventData result; @@ -84,11 +79,7 @@ std::size_t addPerson(CDataGatherer &gatherer, return *result.personId(); } -void addArrival(CDataGatherer &gatherer, - CResourceMonitor &resourceMonitor, - core_t::TTime time, - const std::string &person) -{ +void addArrival(CDataGatherer& gatherer, CResourceMonitor& resourceMonitor, core_t::TTime time, const std::string& person) { CDataGatherer::TStrCPtrVec fieldValues; fieldValues.push_back(&person); @@ -98,12 +89,11 @@ void addArrival(CDataGatherer &gatherer, gatherer.addArrival(fieldValues, eventData, resourceMonitor); } -void addArrival(CDataGatherer &gatherer, - CResourceMonitor &resourceMonitor, +void addArrival(CDataGatherer& gatherer, + CResourceMonitor& resourceMonitor, core_t::TTime time, - const std::string &person, - const std::string &attribute) -{ + const std::string& person, + const std::string& attribute) { CDataGatherer::TStrCPtrVec fieldValues; fieldValues.push_back(&person); fieldValues.push_back(&attribute); @@ -114,13 +104,12 @@ void addArrival(CDataGatherer &gatherer, gatherer.addArrival(fieldValues, eventData, resourceMonitor); } -void addArrival(CDataGatherer &gatherer, - CResourceMonitor &resourceMonitor, +void addArrival(CDataGatherer& gatherer, + CResourceMonitor& resourceMonitor, core_t::TTime time, - const std::string &person, - const std::string &value, - const std::string &influencer) -{ + const std::string& person, + const std::string& value, + const std::string& influencer) { CDataGatherer::TStrCPtrVec fieldValues; fieldValues.push_back(&person); fieldValues.push_back(&influencer); @@ -132,23 +121,20 @@ void addArrival(CDataGatherer &gatherer, gatherer.addArrival(fieldValues, eventData, resourceMonitor); } -void addArrival(CDataGatherer &gatherer, - CResourceMonitor &resourceMonitor, +void addArrival(CDataGatherer& gatherer, + CResourceMonitor& resourceMonitor, core_t::TTime time, - const std::string &person, - const TStrVec &influencers, - const std::string &value) -{ + const std::string& person, + const TStrVec& influencers, + const std::string& value) { CDataGatherer::TStrCPtrVec fieldValues; fieldValues.push_back(&person); - for (std::size_t i = 0; i < influencers.size(); ++i) - { + for (std::size_t i = 0; i < influencers.size(); ++i) { fieldValues.push_back(&influencers[i]); } - if (!value.empty()) - { + if (!value.empty()) { fieldValues.push_back(&value); } @@ -158,9 +144,7 @@ void addArrival(CDataGatherer &gatherer, gatherer.addArrival(fieldValues, eventData, resourceMonitor); } -void testPersistence(const SModelParams ¶ms, - const CDataGatherer &gatherer) -{ +void testPersistence(const SModelParams& params, const CDataGatherer& gatherer) { // Test persistence. (We check for idempotency.) std::string origXml; { @@ -176,9 +160,19 @@ void testPersistence(const SModelParams ¶ms, CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - CDataGatherer restoredGatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, key, traverser); + CDataGatherer restoredGatherer(model_t::E_EventRate, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + key, + traverser); // The XML representation of the new filter should be the // same as the original @@ -193,12 +187,11 @@ void testPersistence(const SModelParams ¶ms, } void testInfluencerPerFeature(model_t::EFeature feature, - const TTimeVec &data, - const TStrVecVec &influencers, - const TStrVec &expected, - const std::string &valueField, - CResourceMonitor &resourceMonitor) -{ + const TTimeVec& data, + const TStrVecVec& influencers, + const TStrVec& expected, + const std::string& valueField, + CResourceMonitor& resourceMonitor) { LOG_DEBUG(" *** testing " << model_t::print(feature) << " ***"); const core_t::TTime startTime = 0; @@ -209,15 +202,26 @@ void testInfluencerPerFeature(model_t::EFeature feature, features.push_back(feature); TStrVec influencerFieldNames; influencerFieldNames.push_back("IF1"); - CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - valueField, influencerFieldNames, false, key, features, startTime, 0); + CDataGatherer gatherer(model_t::E_EventRate, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + valueField, + influencerFieldNames, + false, + key, + features, + startTime, + 0); CPPUNIT_ASSERT(!gatherer.isPopulation()); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson(gatherer, resourceMonitor, "p", valueField, 1)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), gatherer.numberFeatures()); - for (std::size_t i = 0u; i < features.size(); ++i) - { + for (std::size_t i = 0u; i < features.size(); ++i) { CPPUNIT_ASSERT_EQUAL(features[i], gatherer.feature(i)); } @@ -235,19 +239,14 @@ void testInfluencerPerFeature(model_t::EFeature feature, CPPUNIT_ASSERT_EQUAL(startTime, gatherer.currentBucketStartTime()); gatherer.currentBucketStartTime(200); - CPPUNIT_ASSERT_EQUAL(static_cast(200), - gatherer.currentBucketStartTime()); + CPPUNIT_ASSERT_EQUAL(static_cast(200), gatherer.currentBucketStartTime()); gatherer.currentBucketStartTime(startTime); CPPUNIT_ASSERT_EQUAL(bucketLength, gatherer.bucketLength()); core_t::TTime time = startTime; - for (std::size_t i = 0, j = 0u; i < data.size(); ++i) - { - for (/**/; - j < 5 && data[i] >= time + bucketLength; - time += bucketLength, ++j, gatherer.timeNow(time)) - { + for (std::size_t i = 0, j = 0u; i < data.size(); ++i) { + for (/**/; j < 5 && data[i] >= time + bucketLength; time += bucketLength, ++j, gatherer.timeNow(time)) { LOG_DEBUG("Processing bucket [" << time << ", " << time + bucketLength << ")"); TFeatureSizeFeatureDataPrVecPrVec featureData; @@ -261,19 +260,13 @@ void testInfluencerPerFeature(model_t::EFeature feature, testPersistence(params, gatherer); } - if (j < 5) - { + if (j < 5) { addArrival(gatherer, resourceMonitor, data[i], "p", influencers[i], valueField.empty() ? EMPTY_STRING : "value"); } } } - -void importCsvData(CDataGatherer &gatherer, - CResourceMonitor &resourceMonitor, - const std::string &filename, - const TSizeVec &fields) -{ +void importCsvData(CDataGatherer& gatherer, CResourceMonitor& resourceMonitor, const std::string& filename, const TSizeVec& fields) { using TifstreamPtr = boost::shared_ptr; TifstreamPtr ifs(new std::ifstream(filename.c_str())); CPPUNIT_ASSERT(ifs->is_open()); @@ -285,8 +278,7 @@ void importCsvData(CDataGatherer &gatherer, // read the header CPPUNIT_ASSERT(std::getline(*ifs, line)); - while (std::getline(*ifs, line)) - { + while (std::getline(*ifs, line)) { LOG_TRACE("Got string: " << line); core::CRegex::TStrVec tokens; regex.split(line, tokens); @@ -297,8 +289,7 @@ void importCsvData(CDataGatherer &gatherer, CDataGatherer::TStrCPtrVec fieldValues; CEventData data; data.time(time); - for (std::size_t i = 0; i < fields.size(); i++) - { + for (std::size_t i = 0; i < fields.size(); i++) { fieldValues.push_back(&tokens[fields[i]]); } gatherer.addArrival(fieldValues, data, resourceMonitor); @@ -308,8 +299,7 @@ void importCsvData(CDataGatherer &gatherer, } // namespace -void CEventRateDataGathererTest::testLatencyPersist() -{ +void CEventRateDataGathererTest::testLatencyPersist() { LOG_DEBUG("*** testLatencyPersist ***"); core_t::TTime bucketLength = 3600; @@ -322,10 +312,21 @@ void CEventRateDataGathererTest::testLatencyPersist() // Create a gatherer, no influences TFeatureVec features; features.push_back(model_t::E_IndividualUniqueCountByBucketAndPerson); - CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, "program", EMPTY_STRING, "file", TStrVec(), - false, key, features, - startTime, 0); + CDataGatherer gatherer(model_t::E_EventRate, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + "program", + EMPTY_STRING, + "file", + TStrVec(), + false, + key, + features, + startTime, + 0); TSizeVec fields; fields.push_back(2); fields.push_back(1); @@ -340,10 +341,21 @@ void CEventRateDataGathererTest::testLatencyPersist() TStrVec influencers; influencers.push_back("user"); features.push_back(model_t::E_IndividualUniqueCountByBucketAndPerson); - CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, "program", EMPTY_STRING, "file", influencers, - false, key, features, - startTime, 0); + CDataGatherer gatherer(model_t::E_EventRate, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + "program", + EMPTY_STRING, + "file", + influencers, + false, + key, + features, + startTime, + 0); TSizeVec fields; fields.push_back(2); fields.push_back(3); @@ -357,10 +369,21 @@ void CEventRateDataGathererTest::testLatencyPersist() // Create a gatherer, no influences TFeatureVec features; features.push_back(model_t::E_IndividualNonZeroCountByBucketAndPerson); - CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, "program", EMPTY_STRING, EMPTY_STRING, TStrVec(), - false, key, features, - startTime, 0); + CDataGatherer gatherer(model_t::E_EventRate, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + "program", + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + key, + features, + startTime, + 0); TSizeVec fields; fields.push_back(2); @@ -374,10 +397,21 @@ void CEventRateDataGathererTest::testLatencyPersist() TStrVec influencers; influencers.push_back("user"); features.push_back(model_t::E_IndividualNonZeroCountByBucketAndPerson); - CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, "program", EMPTY_STRING, EMPTY_STRING, influencers, - false, key, features, - startTime, 0); + CDataGatherer gatherer(model_t::E_EventRate, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + "program", + EMPTY_STRING, + EMPTY_STRING, + influencers, + false, + key, + features, + startTime, + 0); TSizeVec fields; fields.push_back(2); fields.push_back(3); @@ -388,9 +422,7 @@ void CEventRateDataGathererTest::testLatencyPersist() } } - -void CEventRateDataGathererTest::singleSeriesTests() -{ +void CEventRateDataGathererTest::singleSeriesTests() { LOG_DEBUG("*** singleSeriesTests ***"); // Test that the various statistics come back as we expect. @@ -399,43 +431,33 @@ void CEventRateDataGathererTest::singleSeriesTests() const core_t::TTime bucketLength = 600; SModelParams params(bucketLength); - core_t::TTime data[] = - { - 1, 15, 180, 190, 400, 550, // bucket 1 - 600, 799, 1199, // bucket 2 - 1200, 1250, // bucket 3 - // bucket 4 - 2420, 2480, 2490, // bucket 5 - 10000 // sentinel - }; - - - std::string expectedPersonCounts[] = - { - std::string("[(0, 6)]"), - std::string("[(0, 3)]"), - std::string("[(0, 2)]"), - std::string("[(0, 0)]"), - std::string("[(0, 3)]") - }; - - std::string expectedPersonNonZeroCounts[] = - { - std::string("[(0, 6)]"), - std::string("[(0, 3)]"), - std::string("[(0, 2)]"), - std::string("[]"), - std::string("[(0, 3)]") - }; - - std::string expectedPersonIndicator[] = - { - std::string("[(0, 1)]"), - std::string("[(0, 1)]"), - std::string("[(0, 1)]"), - std::string("[]"), - std::string("[(0, 1)]") - }; + core_t::TTime data[] = { + 1, + 15, + 180, + 190, + 400, + 550, // bucket 1 + 600, + 799, + 1199, // bucket 2 + 1200, + 1250, // bucket 3 + // bucket 4 + 2420, + 2480, + 2490, // bucket 5 + 10000 // sentinel + }; + + std::string expectedPersonCounts[] = { + std::string("[(0, 6)]"), std::string("[(0, 3)]"), std::string("[(0, 2)]"), std::string("[(0, 0)]"), std::string("[(0, 3)]")}; + + std::string expectedPersonNonZeroCounts[] = { + std::string("[(0, 6)]"), std::string("[(0, 3)]"), std::string("[(0, 2)]"), std::string("[]"), std::string("[(0, 3)]")}; + + std::string expectedPersonIndicator[] = { + std::string("[(0, 1)]"), std::string("[(0, 1)]"), std::string("[(0, 1)]"), std::string("[]"), std::string("[(0, 1)]")}; // Test the count by bucket and person and bad feature // (which should be ignored). @@ -443,16 +465,26 @@ void CEventRateDataGathererTest::singleSeriesTests() TFeatureVec features; features.push_back(model_t::E_IndividualCountByBucketAndPerson); features.push_back(model_t::E_IndividualMinByPerson); - CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), - false, key, features, - startTime, 0); + CDataGatherer gatherer(model_t::E_EventRate, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + key, + features, + startTime, + 0); CPPUNIT_ASSERT(!gatherer.isPopulation()); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson(gatherer, m_ResourceMonitor, "p")); CPPUNIT_ASSERT_EQUAL(std::size_t(1), gatherer.numberFeatures()); - for (std::size_t i = 0u; i < 1; ++i) - { + for (std::size_t i = 0u; i < 1; ++i) { CPPUNIT_ASSERT_EQUAL(features[i], gatherer.feature(i)); } CPPUNIT_ASSERT(gatherer.hasFeature(model_t::E_IndividualCountByBucketAndPerson)); @@ -472,19 +504,14 @@ void CEventRateDataGathererTest::singleSeriesTests() CPPUNIT_ASSERT_EQUAL(startTime, gatherer.currentBucketStartTime()); gatherer.currentBucketStartTime(200); - CPPUNIT_ASSERT_EQUAL(static_cast(200), - gatherer.currentBucketStartTime()); + CPPUNIT_ASSERT_EQUAL(static_cast(200), gatherer.currentBucketStartTime()); gatherer.currentBucketStartTime(startTime); CPPUNIT_ASSERT_EQUAL(bucketLength, gatherer.bucketLength()); core_t::TTime time = startTime; - for (std::size_t i = 0, j = 0u; i < boost::size(data); ++i) - { - for (/**/; - j < 5 && data[i] >= time + bucketLength; - time += bucketLength, ++j, gatherer.timeNow(time)) - { + for (std::size_t i = 0, j = 0u; i < boost::size(data); ++i) { + for (/**/; j < 5 && data[i] >= time + bucketLength; time += bucketLength, ++j, gatherer.timeNow(time)) { LOG_DEBUG("Processing bucket [" << time << ", " << time + bucketLength << ")"); TFeatureSizeFeatureDataPrVecPrVec featureData; @@ -492,14 +519,12 @@ void CEventRateDataGathererTest::singleSeriesTests() LOG_DEBUG("featureData = " << core::CContainerPrinter::print(featureData)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualCountByBucketAndPerson, featureData[0].first); - CPPUNIT_ASSERT_EQUAL(expectedPersonCounts[j], - core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(expectedPersonCounts[j], core::CContainerPrinter::print(featureData[0].second)); testPersistence(params, gatherer); } - if (j < 5) - { + if (j < 5) { addArrival(gatherer, m_ResourceMonitor, data[i], "p"); } } @@ -510,19 +535,26 @@ void CEventRateDataGathererTest::singleSeriesTests() TFeatureVec features; features.push_back(model_t::E_IndividualNonZeroCountByBucketAndPerson); features.push_back(model_t::E_IndividualTotalBucketCountByPerson); - CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), - false, key, features, - startTime, 0); + CDataGatherer gatherer(model_t::E_EventRate, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + key, + features, + startTime, + 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson(gatherer, m_ResourceMonitor, "p")); core_t::TTime time = startTime; - for (std::size_t i = 0, j = 0u; i < boost::size(data); ++i) - { - for (/**/; - j < 5 && data[i] >= time + bucketLength; - time += bucketLength, ++j, gatherer.timeNow(time)) - { + for (std::size_t i = 0, j = 0u; i < boost::size(data); ++i) { + for (/**/; j < 5 && data[i] >= time + bucketLength; time += bucketLength, ++j, gatherer.timeNow(time)) { LOG_DEBUG("Processing bucket [" << time << ", " << time + bucketLength << ")"); TFeatureSizeFeatureDataPrVecPrVec featureData; @@ -530,17 +562,14 @@ void CEventRateDataGathererTest::singleSeriesTests() LOG_DEBUG("featureData = " << core::CContainerPrinter::print(featureData)); CPPUNIT_ASSERT_EQUAL(std::size_t(2), featureData.size()); CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualNonZeroCountByBucketAndPerson, featureData[0].first); - CPPUNIT_ASSERT_EQUAL(expectedPersonNonZeroCounts[j], - core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(expectedPersonNonZeroCounts[j], core::CContainerPrinter::print(featureData[0].second)); CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualTotalBucketCountByPerson, featureData[1].first); - CPPUNIT_ASSERT_EQUAL(expectedPersonNonZeroCounts[j], - core::CContainerPrinter::print(featureData[1].second)); + CPPUNIT_ASSERT_EQUAL(expectedPersonNonZeroCounts[j], core::CContainerPrinter::print(featureData[1].second)); testPersistence(params, gatherer); } - if (j < 5) - { + if (j < 5) { addArrival(gatherer, m_ResourceMonitor, data[i], "p"); } } @@ -550,19 +579,26 @@ void CEventRateDataGathererTest::singleSeriesTests() { TFeatureVec features; features.push_back(model_t::E_IndividualIndicatorOfBucketPerson); - CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), - false, key, features, - startTime, 0); + CDataGatherer gatherer(model_t::E_EventRate, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + key, + features, + startTime, + 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson(gatherer, m_ResourceMonitor, "p")); core_t::TTime time = startTime; - for (std::size_t i = 0, j = 0u; i < boost::size(data); ++i) - { - for (/**/; - j < 5 && data[i] >= time + bucketLength; - time += bucketLength, ++j, gatherer.timeNow(time)) - { + for (std::size_t i = 0, j = 0u; i < boost::size(data); ++i) { + for (/**/; j < 5 && data[i] >= time + bucketLength; time += bucketLength, ++j, gatherer.timeNow(time)) { LOG_DEBUG("Processing bucket [" << time << ", " << time + bucketLength << ")"); TFeatureSizeFeatureDataPrVecPrVec featureData; @@ -570,22 +606,19 @@ void CEventRateDataGathererTest::singleSeriesTests() LOG_DEBUG("featureData = " << core::CContainerPrinter::print(featureData)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualIndicatorOfBucketPerson, featureData[0].first); - CPPUNIT_ASSERT_EQUAL(expectedPersonIndicator[j], - core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(expectedPersonIndicator[j], core::CContainerPrinter::print(featureData[0].second)); testPersistence(params, gatherer); } - if (j < 5) - { + if (j < 5) { addArrival(gatherer, m_ResourceMonitor, data[i], "p"); } } } } -void CEventRateDataGathererTest::multipleSeriesTests() -{ +void CEventRateDataGathererTest::multipleSeriesTests() { LOG_DEBUG("*** multipleSeriesTests ***"); // Test that the various statistics come back as we expect @@ -594,54 +627,66 @@ void CEventRateDataGathererTest::multipleSeriesTests() const core_t::TTime startTime = 0; const core_t::TTime bucketLength = 600; - core_t::TTime data1[] = - { - 1, 15, 180, 190, 400, 550, // bucket 1 - 600, 799, 1199, // bucket 2 - 1200, 1250, // bucket 3 - 1900, // bucket 4 - 2420, 2480, 2490, // bucket 5 - 10000 // sentinel - }; - core_t::TTime data2[] = - { - 1, 5, 15, 25, 180, 190, 400, 550, // bucket 1 - 600, 605, 609, 799, 1199, // bucket 2 - 1200, 1250, 1255, 1256, 1300, 1400, // bucket 3 - 1900, 1950, // bucket 4 - 2420, 2480, 2490, 2500, 2550, 2600, // bucket 5 - 10000 // sentinel - }; - - std::string expectedPersonCounts[] = - { - std::string("[(0, 6), (1, 8)]"), - std::string("[(0, 3), (1, 5)]"), - std::string("[(0, 2), (1, 6)]"), - std::string("[(0, 1), (1, 2)]"), - std::string("[(0, 3), (1, 6)]") - }; + core_t::TTime data1[] = { + 1, + 15, + 180, + 190, + 400, + 550, // bucket 1 + 600, + 799, + 1199, // bucket 2 + 1200, + 1250, // bucket 3 + 1900, // bucket 4 + 2420, + 2480, + 2490, // bucket 5 + 10000 // sentinel + }; + core_t::TTime data2[] = { + 1, 5, 15, 25, 180, 190, 400, 550, // bucket 1 + 600, 605, 609, 799, 1199, // bucket 2 + 1200, 1250, 1255, 1256, 1300, 1400, // bucket 3 + 1900, 1950, // bucket 4 + 2420, 2480, 2490, 2500, 2550, 2600, // bucket 5 + 10000 // sentinel + }; + + std::string expectedPersonCounts[] = {std::string("[(0, 6), (1, 8)]"), + std::string("[(0, 3), (1, 5)]"), + std::string("[(0, 2), (1, 6)]"), + std::string("[(0, 1), (1, 2)]"), + std::string("[(0, 3), (1, 6)]")}; SModelParams params(bucketLength); { TFeatureVec features; features.push_back(model_t::E_IndividualCountByBucketAndPerson); - CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), - false, key, features, - startTime, 0); + CDataGatherer gatherer(model_t::E_EventRate, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + key, + features, + startTime, + 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson(gatherer, m_ResourceMonitor, "p1")); CPPUNIT_ASSERT_EQUAL(std::size_t(1), addPerson(gatherer, m_ResourceMonitor, "p2")); core_t::TTime time = startTime; std::size_t i1 = 0u, i2 = 0u, j = 0u; - for (;;) - { - for (/**/; - j < 5 && std::min(data1[i1], data2[i2]) >= time + bucketLength; - time += bucketLength, ++j) - { + for (;;) { + for (/**/; j < 5 && std::min(data1[i1], data2[i2]) >= time + bucketLength; time += bucketLength, ++j) { LOG_DEBUG("Processing bucket [" << time << ", " << time + bucketLength << ")"); TFeatureSizeFeatureDataPrVecPrVec featureData; @@ -649,25 +694,20 @@ void CEventRateDataGathererTest::multipleSeriesTests() LOG_DEBUG("featureData = " << core::CContainerPrinter::print(featureData)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualCountByBucketAndPerson, featureData[0].first); - CPPUNIT_ASSERT_EQUAL(expectedPersonCounts[j], - core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(expectedPersonCounts[j], core::CContainerPrinter::print(featureData[0].second)); testPersistence(params, gatherer); } - if (j >= 5) - { + if (j >= 5) { break; } - if (data1[i1] < data2[i2]) - { + if (data1[i1] < data2[i2]) { LOG_DEBUG("Adding arrival for p1 at " << data1[i1]); addArrival(gatherer, m_ResourceMonitor, data1[i1], "p1"); ++i1; - } - else - { + } else { LOG_DEBUG("Adding arrival for p2 at " << data2[i2]); addArrival(gatherer, m_ResourceMonitor, data2[i2], "p2"); ++i2; @@ -692,25 +732,34 @@ void CEventRateDataGathererTest::multipleSeriesTests() LOG_DEBUG("featureData = " << core::CContainerPrinter::print(featureData)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualCountByBucketAndPerson, featureData[0].first); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 3)]"), - core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 3)]"), core::CContainerPrinter::print(featureData[0].second)); } { TFeatureVec features; features.push_back(model_t::E_IndividualCountByBucketAndPerson); - CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), - false, key, features, - startTime, 0); + CDataGatherer gatherer(model_t::E_EventRate, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + key, + features, + startTime, + 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson(gatherer, m_ResourceMonitor, "p1")); CPPUNIT_ASSERT_EQUAL(std::size_t(1), addPerson(gatherer, m_ResourceMonitor, "p2")); CPPUNIT_ASSERT_EQUAL(std::size_t(2), addPerson(gatherer, m_ResourceMonitor, "p3")); CPPUNIT_ASSERT_EQUAL(std::size_t(3), addPerson(gatherer, m_ResourceMonitor, "p4")); CPPUNIT_ASSERT_EQUAL(std::size_t(4), addPerson(gatherer, m_ResourceMonitor, "p5")); - for (std::size_t i = 0u; i < 5; ++i) - { + for (std::size_t i = 0u; i < 5; ++i) { addArrival(gatherer, m_ResourceMonitor, startTime, gatherer.personName(i)); } addArrival(gatherer, m_ResourceMonitor, startTime + 1, gatherer.personName(2)); @@ -750,13 +799,11 @@ void CEventRateDataGathererTest::multipleSeriesTests() LOG_DEBUG("featureData = " << core::CContainerPrinter::print(featureData)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualCountByBucketAndPerson, featureData[0].first); - CPPUNIT_ASSERT_EQUAL(std::string("[(2, 2), (4, 3)]"), - core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(2, 2), (4, 3)]"), core::CContainerPrinter::print(featureData[0].second)); } } -void CEventRateDataGathererTest::testRemovePeople() -{ +void CEventRateDataGathererTest::testRemovePeople() { LOG_DEBUG("*** testRemovePeople ***"); // Test various combinations of removed people. @@ -772,10 +819,21 @@ void CEventRateDataGathererTest::testRemovePeople() features.push_back(model_t::E_IndividualLowCountsByBucketAndPerson); features.push_back(model_t::E_IndividualHighCountsByBucketAndPerson); SModelParams params(bucketLength); - CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), - false, key, features, - startTime, 0); + CDataGatherer gatherer(model_t::E_EventRate, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + key, + features, + startTime, + 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson(gatherer, m_ResourceMonitor, "p1")); CPPUNIT_ASSERT_EQUAL(std::size_t(1), addPerson(gatherer, m_ResourceMonitor, "p2")); CPPUNIT_ASSERT_EQUAL(std::size_t(2), addPerson(gatherer, m_ResourceMonitor, "p3")); @@ -785,11 +843,9 @@ void CEventRateDataGathererTest::testRemovePeople() CPPUNIT_ASSERT_EQUAL(std::size_t(6), addPerson(gatherer, m_ResourceMonitor, "p7")); CPPUNIT_ASSERT_EQUAL(std::size_t(7), addPerson(gatherer, m_ResourceMonitor, "p8")); - core_t::TTime counts[] = { 0, 3, 5, 2, 0, 5, 7, 10 }; - for (std::size_t i = 0u; i < boost::size(counts); ++i) - { - for (core_t::TTime time = 0; time < counts[i]; ++time) - { + core_t::TTime counts[] = {0, 3, 5, 2, 0, 5, 7, 10}; + for (std::size_t i = 0u; i < boost::size(counts); ++i) { + for (core_t::TTime time = 0; time < counts[i]; ++time) { addArrival(gatherer, m_ResourceMonitor, startTime + time, gatherer.personName(i)); } } @@ -800,10 +856,21 @@ void CEventRateDataGathererTest::testRemovePeople() peopleToRemove.push_back(1); gatherer.recyclePeople(peopleToRemove); - CDataGatherer expectedGatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), - false, key, features, - startTime, 0); + CDataGatherer expectedGatherer(model_t::E_EventRate, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + key, + features, + startTime, + 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson(expectedGatherer, m_ResourceMonitor, "p3")); CPPUNIT_ASSERT_EQUAL(std::size_t(1), addPerson(expectedGatherer, m_ResourceMonitor, "p4")); CPPUNIT_ASSERT_EQUAL(std::size_t(2), addPerson(expectedGatherer, m_ResourceMonitor, "p5")); @@ -811,11 +878,9 @@ void CEventRateDataGathererTest::testRemovePeople() CPPUNIT_ASSERT_EQUAL(std::size_t(4), addPerson(expectedGatherer, m_ResourceMonitor, "p7")); CPPUNIT_ASSERT_EQUAL(std::size_t(5), addPerson(expectedGatherer, m_ResourceMonitor, "p8")); - core_t::TTime expectedCounts[] = { 5, 2, 0, 5, 7, 10 }; - for (std::size_t i = 0u; i < boost::size(expectedCounts); ++i) - { - for (core_t::TTime time = 0; time < expectedCounts[i]; ++time) - { + core_t::TTime expectedCounts[] = {5, 2, 0, 5, 7, 10}; + for (std::size_t i = 0u; i < boost::size(expectedCounts); ++i) { + for (core_t::TTime time = 0; time < expectedCounts[i]; ++time) { addArrival(expectedGatherer, m_ResourceMonitor, startTime + time, expectedGatherer.personName(i)); } } @@ -831,19 +896,28 @@ void CEventRateDataGathererTest::testRemovePeople() peopleToRemove.push_back(7); gatherer.recyclePeople(peopleToRemove); - CDataGatherer expectedGatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), - false, key, features, - startTime, 0); + CDataGatherer expectedGatherer(model_t::E_EventRate, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + key, + features, + startTime, + 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson(expectedGatherer, m_ResourceMonitor, "p3")); CPPUNIT_ASSERT_EQUAL(std::size_t(1), addPerson(expectedGatherer, m_ResourceMonitor, "p6")); CPPUNIT_ASSERT_EQUAL(std::size_t(2), addPerson(expectedGatherer, m_ResourceMonitor, "p7")); - core_t::TTime expectedCounts[] = { 5, 5, 7 }; - for (std::size_t i = 0u; i < boost::size(expectedCounts); ++i) - { - for (core_t::TTime time = 0; time < expectedCounts[i]; ++time) - { + core_t::TTime expectedCounts[] = {5, 5, 7}; + for (std::size_t i = 0u; i < boost::size(expectedCounts); ++i) { + for (core_t::TTime time = 0; time < expectedCounts[i]; ++time) { addArrival(expectedGatherer, m_ResourceMonitor, startTime + time, expectedGatherer.personName(i)); } } @@ -859,10 +933,21 @@ void CEventRateDataGathererTest::testRemovePeople() peopleToRemove.push_back(6); gatherer.recyclePeople(peopleToRemove); - CDataGatherer expectedGatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), - false, key, features, - startTime, 0); + CDataGatherer expectedGatherer(model_t::E_EventRate, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + key, + features, + startTime, + 0); LOG_DEBUG("checksum = " << gatherer.checksum()); LOG_DEBUG("expected checksum = " << expectedGatherer.checksum()); @@ -875,12 +960,10 @@ void CEventRateDataGathererTest::testRemovePeople() LOG_DEBUG("recycled = " << core::CContainerPrinter::print(gatherer.recycledPersonIds())); LOG_DEBUG("expected recycled = " << core::CContainerPrinter::print(expectedRecycled)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedRecycled), - core::CContainerPrinter::print(gatherer.recycledPersonIds())); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedRecycled), core::CContainerPrinter::print(gatherer.recycledPersonIds())); } -void CEventRateDataGathererTest::singleSeriesOutOfOrderFinalResultTests() -{ +void CEventRateDataGathererTest::singleSeriesOutOfOrderFinalResultTests() { LOG_DEBUG("*** singleSeriesOutOfOrderFinalResultTests ***"); // Test that the various statistics come back as we expect. @@ -892,62 +975,59 @@ void CEventRateDataGathererTest::singleSeriesOutOfOrderFinalResultTests() SModelParams params(bucketLength); params.s_LatencyBuckets = latencyBuckets; - core_t::TTime data[] = - { - 1, 180, 1200, 190, 400, 600, // bucket 1, 2 & 3 - 550, 799, 1199, 15, // bucket 1 & 2 - 2490, // bucket 5 - // bucket 4 is empty - 2420, 2480, 1250, // bucket 3 & 5 - 10000 // sentinel - }; - - std::string expectedPersonCounts[] = - { - std::string("[(0, 6)]"), - std::string("[(0, 3)]"), - std::string("[(0, 2)]"), - std::string("[(0, 0)]"), - std::string("[(0, 3)]") - }; - - std::string expectedPersonNonZeroCounts[] = - { - std::string("[(0, 6)]"), - std::string("[(0, 3)]"), - std::string("[(0, 2)]"), - std::string("[]"), - std::string("[(0, 3)]") - }; - - std::string expectedPersonIndicator[] = - { - std::string("[(0, 1)]"), - std::string("[(0, 1)]"), - std::string("[(0, 1)]"), - std::string("[]"), - std::string("[(0, 1)]") - }; - + core_t::TTime data[] = { + 1, + 180, + 1200, + 190, + 400, + 600, // bucket 1, 2 & 3 + 550, + 799, + 1199, + 15, // bucket 1 & 2 + 2490, // bucket 5 + // bucket 4 is empty + 2420, + 2480, + 1250, // bucket 3 & 5 + 10000 // sentinel + }; + + std::string expectedPersonCounts[] = { + std::string("[(0, 6)]"), std::string("[(0, 3)]"), std::string("[(0, 2)]"), std::string("[(0, 0)]"), std::string("[(0, 3)]")}; + + std::string expectedPersonNonZeroCounts[] = { + std::string("[(0, 6)]"), std::string("[(0, 3)]"), std::string("[(0, 2)]"), std::string("[]"), std::string("[(0, 3)]")}; + + std::string expectedPersonIndicator[] = { + std::string("[(0, 1)]"), std::string("[(0, 1)]"), std::string("[(0, 1)]"), std::string("[]"), std::string("[(0, 1)]")}; // Test the count by bucket and person and bad feature // (which should be ignored). { TFeatureVec features; features.push_back(model_t::E_IndividualCountByBucketAndPerson); - CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), - false, key, features, - startTime, 0); + CDataGatherer gatherer(model_t::E_EventRate, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + key, + features, + startTime, + 0); addPerson(gatherer, m_ResourceMonitor, "p"); core_t::TTime time = startTime; - for (std::size_t i = 0, j = 0u; i < boost::size(data); ++i) - { - for (/**/; - j < 5 && data[i] >= time + latencyTime; - time += bucketLength, ++j, gatherer.timeNow(time)) - { + for (std::size_t i = 0, j = 0u; i < boost::size(data); ++i) { + for (/**/; j < 5 && data[i] >= time + latencyTime; time += bucketLength, ++j, gatherer.timeNow(time)) { LOG_DEBUG("Processing bucket [" << time << ", " << time + bucketLength << ")"); TFeatureSizeFeatureDataPrVecPrVec featureData; @@ -955,14 +1035,12 @@ void CEventRateDataGathererTest::singleSeriesOutOfOrderFinalResultTests() LOG_DEBUG("featureData = " << core::CContainerPrinter::print(featureData)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualCountByBucketAndPerson, featureData[0].first); - CPPUNIT_ASSERT_EQUAL(expectedPersonCounts[j], - core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(expectedPersonCounts[j], core::CContainerPrinter::print(featureData[0].second)); testPersistence(params, gatherer); } - if (j < 5) - { + if (j < 5) { LOG_DEBUG("Arriving = " << data[i]); addArrival(gatherer, m_ResourceMonitor, data[i], "p"); } @@ -974,19 +1052,26 @@ void CEventRateDataGathererTest::singleSeriesOutOfOrderFinalResultTests() TFeatureVec features; features.push_back(model_t::E_IndividualNonZeroCountByBucketAndPerson); features.push_back(model_t::E_IndividualTotalBucketCountByPerson); - CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), - false, key, features, - startTime, 0); + CDataGatherer gatherer(model_t::E_EventRate, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + key, + features, + startTime, + 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson(gatherer, m_ResourceMonitor, "p")); core_t::TTime time = startTime; - for (std::size_t i = 0, j = 0u; i < boost::size(data); ++i) - { - for (/**/; - j < 5 && data[i] >= time + latencyTime; - time += bucketLength, ++j, gatherer.timeNow(time)) - { + for (std::size_t i = 0, j = 0u; i < boost::size(data); ++i) { + for (/**/; j < 5 && data[i] >= time + latencyTime; time += bucketLength, ++j, gatherer.timeNow(time)) { LOG_DEBUG("Processing bucket [" << time << ", " << time + bucketLength << ")"); TFeatureSizeFeatureDataPrVecPrVec featureData; @@ -994,17 +1079,14 @@ void CEventRateDataGathererTest::singleSeriesOutOfOrderFinalResultTests() LOG_DEBUG("featureData = " << core::CContainerPrinter::print(featureData)); CPPUNIT_ASSERT_EQUAL(std::size_t(2), featureData.size()); CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualNonZeroCountByBucketAndPerson, featureData[0].first); - CPPUNIT_ASSERT_EQUAL(expectedPersonNonZeroCounts[j], - core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(expectedPersonNonZeroCounts[j], core::CContainerPrinter::print(featureData[0].second)); CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualTotalBucketCountByPerson, featureData[1].first); - CPPUNIT_ASSERT_EQUAL(expectedPersonNonZeroCounts[j], - core::CContainerPrinter::print(featureData[1].second)); + CPPUNIT_ASSERT_EQUAL(expectedPersonNonZeroCounts[j], core::CContainerPrinter::print(featureData[1].second)); testPersistence(params, gatherer); } - if (j < 5) - { + if (j < 5) { addArrival(gatherer, m_ResourceMonitor, data[i], "p"); } } @@ -1014,19 +1096,26 @@ void CEventRateDataGathererTest::singleSeriesOutOfOrderFinalResultTests() { TFeatureVec features; features.push_back(model_t::E_IndividualIndicatorOfBucketPerson); - CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), - false, key, features, - startTime, 0); + CDataGatherer gatherer(model_t::E_EventRate, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + key, + features, + startTime, + 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson(gatherer, m_ResourceMonitor, "p")); core_t::TTime time = startTime; - for (std::size_t i = 0, j = 0u; i < boost::size(data); ++i) - { - for (/**/; - j < 5 && data[i] >= time + latencyTime; - time += bucketLength, ++j, gatherer.timeNow(time)) - { + for (std::size_t i = 0, j = 0u; i < boost::size(data); ++i) { + for (/**/; j < 5 && data[i] >= time + latencyTime; time += bucketLength, ++j, gatherer.timeNow(time)) { LOG_DEBUG("Processing bucket [" << time << ", " << time + bucketLength << ")"); TFeatureSizeFeatureDataPrVecPrVec featureData; @@ -1034,22 +1123,19 @@ void CEventRateDataGathererTest::singleSeriesOutOfOrderFinalResultTests() LOG_DEBUG("featureData = " << core::CContainerPrinter::print(featureData)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualIndicatorOfBucketPerson, featureData[0].first); - CPPUNIT_ASSERT_EQUAL(expectedPersonIndicator[j], - core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(expectedPersonIndicator[j], core::CContainerPrinter::print(featureData[0].second)); testPersistence(params, gatherer); } - if (j < 5) - { + if (j < 5) { addArrival(gatherer, m_ResourceMonitor, data[i], "p"); } } } } -void CEventRateDataGathererTest::singleSeriesOutOfOrderInterimResultTests() -{ +void CEventRateDataGathererTest::singleSeriesOutOfOrderInterimResultTests() { LOG_DEBUG("*** singleSeriesOutOfOrderInterimResultTests ***"); const core_t::TTime startTime = 0; @@ -1058,21 +1144,35 @@ void CEventRateDataGathererTest::singleSeriesOutOfOrderInterimResultTests() SModelParams params(bucketLength); params.s_LatencyBuckets = latencyBuckets; - core_t::TTime data[] = - { - 1, 1200, 600, // bucket 1, 3 & 2 - 1199, 15, // bucket 2 & 1 - 2490, // bucket 5 - // bucket 4 is empty - 2420, 1250 // bucket 5 & 3 - }; + core_t::TTime data[] = { + 1, + 1200, + 600, // bucket 1, 3 & 2 + 1199, + 15, // bucket 2 & 1 + 2490, // bucket 5 + // bucket 4 is empty + 2420, + 1250 // bucket 5 & 3 + }; TFeatureVec features; features.push_back(model_t::E_IndividualCountByBucketAndPerson); - CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), - false, key, features, - startTime, 0); + CDataGatherer gatherer(model_t::E_EventRate, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + key, + features, + startTime, + 0); addPerson(gatherer, m_ResourceMonitor, "p"); TFeatureSizeFeatureDataPrVecPrVec featureData; @@ -1080,103 +1180,80 @@ void CEventRateDataGathererTest::singleSeriesOutOfOrderInterimResultTests() addArrival(gatherer, m_ResourceMonitor, data[0], "p"); gatherer.featureData(0, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), - core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), core::CContainerPrinter::print(featureData[0].second)); // Bucket 1, 2 & 3 addArrival(gatherer, m_ResourceMonitor, data[1], "p"); gatherer.featureData(0, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), - core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), core::CContainerPrinter::print(featureData[0].second)); gatherer.featureData(600, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 0)]"), - core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 0)]"), core::CContainerPrinter::print(featureData[0].second)); gatherer.featureData(1200, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), - core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), core::CContainerPrinter::print(featureData[0].second)); // Bucket 1, 2 & 3 addArrival(gatherer, m_ResourceMonitor, data[2], "p"); gatherer.featureData(0, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), - core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), core::CContainerPrinter::print(featureData[0].second)); gatherer.featureData(600, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), - core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), core::CContainerPrinter::print(featureData[0].second)); gatherer.featureData(1200, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), - core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), core::CContainerPrinter::print(featureData[0].second)); // Bucket 1, 2 & 3 addArrival(gatherer, m_ResourceMonitor, data[3], "p"); gatherer.featureData(0, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), - core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), core::CContainerPrinter::print(featureData[0].second)); gatherer.featureData(600, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 2)]"), - core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 2)]"), core::CContainerPrinter::print(featureData[0].second)); gatherer.featureData(1200, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), - core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), core::CContainerPrinter::print(featureData[0].second)); // Bucket 1, 2 & 3 addArrival(gatherer, m_ResourceMonitor, data[4], "p"); gatherer.featureData(0, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 2)]"), - core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 2)]"), core::CContainerPrinter::print(featureData[0].second)); gatherer.featureData(600, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 2)]"), - core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 2)]"), core::CContainerPrinter::print(featureData[0].second)); gatherer.featureData(1200, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), - core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), core::CContainerPrinter::print(featureData[0].second)); // Bucket 3, 4 & 5 addArrival(gatherer, m_ResourceMonitor, data[5], "p"); gatherer.featureData(1200, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), - core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), core::CContainerPrinter::print(featureData[0].second)); gatherer.featureData(1800, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 0)]"), - core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 0)]"), core::CContainerPrinter::print(featureData[0].second)); gatherer.featureData(2400, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), - core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), core::CContainerPrinter::print(featureData[0].second)); // Bucket 3, 4 & 5 addArrival(gatherer, m_ResourceMonitor, data[6], "p"); gatherer.featureData(1200, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), - core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), core::CContainerPrinter::print(featureData[0].second)); gatherer.featureData(1800, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 0)]"), - core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 0)]"), core::CContainerPrinter::print(featureData[0].second)); gatherer.featureData(2400, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 2)]"), - core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 2)]"), core::CContainerPrinter::print(featureData[0].second)); // Bucket 3, 4 & 5 addArrival(gatherer, m_ResourceMonitor, data[7], "p"); gatherer.featureData(1200, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 2)]"), - core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 2)]"), core::CContainerPrinter::print(featureData[0].second)); gatherer.featureData(1800, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 0)]"), - core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 0)]"), core::CContainerPrinter::print(featureData[0].second)); gatherer.featureData(2400, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 2)]"), - core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 2)]"), core::CContainerPrinter::print(featureData[0].second)); } -void CEventRateDataGathererTest::multipleSeriesOutOfOrderFinalResultTests() -{ +void CEventRateDataGathererTest::multipleSeriesOutOfOrderFinalResultTests() { LOG_DEBUG("*** multipleSeriesOutOfOrderFinalResultTests ***"); // Test that the various statistics come back as we expect @@ -1189,52 +1266,64 @@ void CEventRateDataGathererTest::multipleSeriesOutOfOrderFinalResultTests() SModelParams params(bucketLength); params.s_LatencyBuckets = latencyBuckets; - core_t::TTime data1[] = - { - 1, 15, 1200, 190, 400, 550, // bucket 1, 2 & 3 - 600, 1250, 1199, // bucket 2 & 3 - 180, 799, // bucket 1 & 2 - 2480, // bucket 5 - 2420, 1900, 2490, // bucket 4 & 5 - 10000 // sentinel - }; - core_t::TTime data2[] = - { - 1250, 5, 15, 600, 180, 190, 400, 550, // bucket 1, 2 & 3 - 25, 605, 609, 799, 1199, // bucket 1 & 2 - 1200, 1, 1255, 1950, 1400, // bucket 1, 3 & 4 - 2550, 1300, 2500, // bucket 3 & 5 - 2420, 2480, 2490, 1256, 1900, 2600, // bucket 3, 4 & 5 - 10000 // sentinel - }; - - std::string expectedPersonCounts[] = - { - std::string("[(0, 6), (1, 8)]"), - std::string("[(0, 3), (1, 5)]"), - std::string("[(0, 2), (1, 6)]"), - std::string("[(0, 1), (1, 2)]"), - std::string("[(0, 3), (1, 6)]") - }; + core_t::TTime data1[] = { + 1, + 15, + 1200, + 190, + 400, + 550, // bucket 1, 2 & 3 + 600, + 1250, + 1199, // bucket 2 & 3 + 180, + 799, // bucket 1 & 2 + 2480, // bucket 5 + 2420, + 1900, + 2490, // bucket 4 & 5 + 10000 // sentinel + }; + core_t::TTime data2[] = { + 1250, 5, 15, 600, 180, 190, 400, 550, // bucket 1, 2 & 3 + 25, 605, 609, 799, 1199, // bucket 1 & 2 + 1200, 1, 1255, 1950, 1400, // bucket 1, 3 & 4 + 2550, 1300, 2500, // bucket 3 & 5 + 2420, 2480, 2490, 1256, 1900, 2600, // bucket 3, 4 & 5 + 10000 // sentinel + }; + + std::string expectedPersonCounts[] = {std::string("[(0, 6), (1, 8)]"), + std::string("[(0, 3), (1, 5)]"), + std::string("[(0, 2), (1, 6)]"), + std::string("[(0, 1), (1, 2)]"), + std::string("[(0, 3), (1, 6)]")}; { TFeatureVec features; features.push_back(model_t::E_IndividualCountByBucketAndPerson); - CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), - false, key, features, - startTime, 0); + CDataGatherer gatherer(model_t::E_EventRate, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + key, + features, + startTime, + 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson(gatherer, m_ResourceMonitor, "p1")); CPPUNIT_ASSERT_EQUAL(std::size_t(1), addPerson(gatherer, m_ResourceMonitor, "p2")); core_t::TTime time = startTime; std::size_t i1 = 0u, i2 = 0u, j = 0u; - for (;;) - { - for (/**/; - j < 5 && std::min(data1[i1], data2[i2]) >= time + latencyTime; - time += bucketLength, ++j) - { + for (;;) { + for (/**/; j < 5 && std::min(data1[i1], data2[i2]) >= time + latencyTime; time += bucketLength, ++j) { LOG_DEBUG("Processing bucket [" << time << ", " << time + bucketLength << ")"); TFeatureSizeFeatureDataPrVecPrVec featureData; @@ -1242,25 +1331,20 @@ void CEventRateDataGathererTest::multipleSeriesOutOfOrderFinalResultTests() LOG_DEBUG("featureData = " << core::CContainerPrinter::print(featureData)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualCountByBucketAndPerson, featureData[0].first); - CPPUNIT_ASSERT_EQUAL(expectedPersonCounts[j], - core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(expectedPersonCounts[j], core::CContainerPrinter::print(featureData[0].second)); testPersistence(params, gatherer); } - if (j >= 5) - { + if (j >= 5) { break; } - if (data1[i1] < data2[i2]) - { + if (data1[i1] < data2[i2]) { LOG_DEBUG("Adding arrival for p1 at " << data1[i1]); addArrival(gatherer, m_ResourceMonitor, data1[i1], "p1"); ++i1; - } - else - { + } else { LOG_DEBUG("Adding arrival for p2 at " << data2[i2]); addArrival(gatherer, m_ResourceMonitor, data2[i2], "p2"); ++i2; @@ -1284,25 +1368,34 @@ void CEventRateDataGathererTest::multipleSeriesOutOfOrderFinalResultTests() LOG_DEBUG("featureData = " << core::CContainerPrinter::print(featureData)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualCountByBucketAndPerson, featureData[0].first); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 3)]"), - core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 3)]"), core::CContainerPrinter::print(featureData[0].second)); } { TFeatureVec features; features.push_back(model_t::E_IndividualCountByBucketAndPerson); - CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), - false, key, features, - startTime, 0); + CDataGatherer gatherer(model_t::E_EventRate, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + key, + features, + startTime, + 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson(gatherer, m_ResourceMonitor, "p1")); CPPUNIT_ASSERT_EQUAL(std::size_t(1), addPerson(gatherer, m_ResourceMonitor, "p2")); CPPUNIT_ASSERT_EQUAL(std::size_t(2), addPerson(gatherer, m_ResourceMonitor, "p3")); CPPUNIT_ASSERT_EQUAL(std::size_t(3), addPerson(gatherer, m_ResourceMonitor, "p4")); CPPUNIT_ASSERT_EQUAL(std::size_t(4), addPerson(gatherer, m_ResourceMonitor, "p5")); - for (std::size_t i = 0u; i < 5; ++i) - { + for (std::size_t i = 0u; i < 5; ++i) { addArrival(gatherer, m_ResourceMonitor, startTime, gatherer.personName(i)); } addArrival(gatherer, m_ResourceMonitor, startTime + 1, gatherer.personName(2)); @@ -1341,13 +1434,11 @@ void CEventRateDataGathererTest::multipleSeriesOutOfOrderFinalResultTests() LOG_DEBUG("featureData = " << core::CContainerPrinter::print(featureData)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualCountByBucketAndPerson, featureData[0].first); - CPPUNIT_ASSERT_EQUAL(std::string("[(2, 2), (4, 3)]"), - core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(2, 2), (4, 3)]"), core::CContainerPrinter::print(featureData[0].second)); } } -void CEventRateDataGathererTest::testArrivalBeforeLatencyWindowIsIgnored() -{ +void CEventRateDataGathererTest::testArrivalBeforeLatencyWindowIsIgnored() { LOG_DEBUG("*** testArrivalBeforeLatencyWindowIsIgnored ***"); const core_t::TTime startTime = 0; @@ -1356,18 +1447,28 @@ void CEventRateDataGathererTest::testArrivalBeforeLatencyWindowIsIgnored() SModelParams params(bucketLength); params.s_LatencyBuckets = latencyBuckets; - core_t::TTime data[] = - { - 1800, // Bucket 4, thus bucket 1 values are already out of latency window - 1 // Bucket 1 - }; + core_t::TTime data[] = { + 1800, // Bucket 4, thus bucket 1 values are already out of latency window + 1 // Bucket 1 + }; TFeatureVec features; features.push_back(model_t::E_IndividualCountByBucketAndPerson); - CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), - false, key, features, - startTime, 0); + CDataGatherer gatherer(model_t::E_EventRate, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + key, + features, + startTime, + 0); addPerson(gatherer, m_ResourceMonitor, "p"); addArrival(gatherer, m_ResourceMonitor, data[0], "p"); @@ -1379,20 +1480,16 @@ void CEventRateDataGathererTest::testArrivalBeforeLatencyWindowIsIgnored() CPPUNIT_ASSERT_EQUAL(std::size_t(0), featureData.size()); gatherer.featureData(600, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 0)]"), - core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 0)]"), core::CContainerPrinter::print(featureData[0].second)); gatherer.featureData(1200, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 0)]"), - core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 0)]"), core::CContainerPrinter::print(featureData[0].second)); gatherer.featureData(1800, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), - core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), core::CContainerPrinter::print(featureData[0].second)); } -void CEventRateDataGathererTest::testResetBucketGivenSingleSeries() -{ +void CEventRateDataGathererTest::testResetBucketGivenSingleSeries() { LOG_DEBUG("*** testResetBucketGivenSingleSeries ***"); const core_t::TTime startTime = 0; @@ -1401,57 +1498,62 @@ void CEventRateDataGathererTest::testResetBucketGivenSingleSeries() SModelParams params(bucketLength); params.s_LatencyBuckets = latencyBuckets; - core_t::TTime data[] = - { - 100, 300, // Bucket 1 - 600, 800, 850, // Bucket 2 - 1200 // Bucket 3 - }; + core_t::TTime data[] = { + 100, + 300, // Bucket 1 + 600, + 800, + 850, // Bucket 2 + 1200 // Bucket 3 + }; TFeatureVec features; features.push_back(model_t::E_IndividualCountByBucketAndPerson); - CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), - false, key, features, - startTime, 0); + CDataGatherer gatherer(model_t::E_EventRate, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + key, + features, + startTime, + 0); addPerson(gatherer, m_ResourceMonitor, "p"); - for (std::size_t i = 0; i < boost::size(data); ++i) - { + for (std::size_t i = 0; i < boost::size(data); ++i) { addArrival(gatherer, m_ResourceMonitor, data[i], "p"); } TFeatureSizeFeatureDataPrVecPrVec featureData; gatherer.featureData(0, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 2)]"), - core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 2)]"), core::CContainerPrinter::print(featureData[0].second)); gatherer.featureData(600, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 3)]"), - core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 3)]"), core::CContainerPrinter::print(featureData[0].second)); gatherer.featureData(1200, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), - core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), core::CContainerPrinter::print(featureData[0].second)); gatherer.resetBucket(600); gatherer.featureData(0, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 2)]"), - core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 2)]"), core::CContainerPrinter::print(featureData[0].second)); gatherer.featureData(600, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 0)]"), - core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 0)]"), core::CContainerPrinter::print(featureData[0].second)); gatherer.featureData(1200, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), - core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), core::CContainerPrinter::print(featureData[0].second)); } -void CEventRateDataGathererTest::testResetBucketGivenMultipleSeries() -{ +void CEventRateDataGathererTest::testResetBucketGivenMultipleSeries() { LOG_DEBUG("*** testResetBucketGivenMultipleSeries ***"); const core_t::TTime startTime = 0; @@ -1460,25 +1562,37 @@ void CEventRateDataGathererTest::testResetBucketGivenMultipleSeries() SModelParams params(bucketLength); params.s_LatencyBuckets = latencyBuckets; - core_t::TTime data[] = - { - 100, 300, // Bucket 1 - 600, 800, 850, // Bucket 2 - 1200 // Bucket 3 - }; + core_t::TTime data[] = { + 100, + 300, // Bucket 1 + 600, + 800, + 850, // Bucket 2 + 1200 // Bucket 3 + }; TFeatureVec features; features.push_back(model_t::E_IndividualCountByBucketAndPerson); - CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), - false, key, features, - startTime, 0); + CDataGatherer gatherer(model_t::E_EventRate, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + key, + features, + startTime, + 0); addPerson(gatherer, m_ResourceMonitor, "p1"); addPerson(gatherer, m_ResourceMonitor, "p2"); addPerson(gatherer, m_ResourceMonitor, "p3"); - for (std::size_t i = 0; i < boost::size(data); ++i) - { + for (std::size_t i = 0; i < boost::size(data); ++i) { addArrival(gatherer, m_ResourceMonitor, data[i], "p1"); addArrival(gatherer, m_ResourceMonitor, data[i], "p2"); addArrival(gatherer, m_ResourceMonitor, data[i], "p3"); @@ -1487,34 +1601,27 @@ void CEventRateDataGathererTest::testResetBucketGivenMultipleSeries() TFeatureSizeFeatureDataPrVecPrVec featureData; gatherer.featureData(0, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 2), (1, 2), (2, 2)]"), - core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 2), (1, 2), (2, 2)]"), core::CContainerPrinter::print(featureData[0].second)); gatherer.featureData(600, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 3), (1, 3), (2, 3)]"), - core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 3), (1, 3), (2, 3)]"), core::CContainerPrinter::print(featureData[0].second)); gatherer.featureData(1200, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1), (1, 1), (2, 1)]"), - core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1), (1, 1), (2, 1)]"), core::CContainerPrinter::print(featureData[0].second)); gatherer.resetBucket(600); gatherer.featureData(0, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 2), (1, 2), (2, 2)]"), - core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 2), (1, 2), (2, 2)]"), core::CContainerPrinter::print(featureData[0].second)); gatherer.featureData(600, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 0), (1, 0), (2, 0)]"), - core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 0), (1, 0), (2, 0)]"), core::CContainerPrinter::print(featureData[0].second)); gatherer.featureData(1200, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1), (1, 1), (2, 1)]"), - core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1), (1, 1), (2, 1)]"), core::CContainerPrinter::print(featureData[0].second)); } -void CEventRateDataGathererTest::testResetBucketGivenBucketNotAvailable() -{ +void CEventRateDataGathererTest::testResetBucketGivenBucketNotAvailable() { LOG_DEBUG("*** testResetBucketGivenBucketNotAvailable ***"); const core_t::TTime startTime = 0; @@ -1525,10 +1632,21 @@ void CEventRateDataGathererTest::testResetBucketGivenBucketNotAvailable() TFeatureVec features; features.push_back(model_t::E_IndividualCountByBucketAndPerson); - CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), - false, key, features, - startTime, 0); + CDataGatherer gatherer(model_t::E_EventRate, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + key, + features, + startTime, + 0); addPerson(gatherer, m_ResourceMonitor, "p"); addArrival(gatherer, m_ResourceMonitor, 1200, "p"); @@ -1539,87 +1657,82 @@ void CEventRateDataGathererTest::testResetBucketGivenBucketNotAvailable() CPPUNIT_ASSERT(gatherer.resetBucket(1800) == false); } -void CEventRateDataGathererTest::testInfluencerBucketStatistics() -{ - core_t::TTime data[] = - { - 1, 15, 180, 190, 400, 550, // bucket 1 - 600, 799, 1199, // bucket 2 - 1200, 1250, // bucket 3 - // bucket 4 - 2420, 2480, 2490, // bucket 5 - 10000 // sentinel - }; +void CEventRateDataGathererTest::testInfluencerBucketStatistics() { + core_t::TTime data[] = { + 1, + 15, + 180, + 190, + 400, + 550, // bucket 1 + 600, + 799, + 1199, // bucket 2 + 1200, + 1250, // bucket 3 + // bucket 4 + 2420, + 2480, + 2490, // bucket 5 + 10000 // sentinel + }; TTimeVec dataVec(data, &data[15]); TStrVecVec influencers(14, TStrVec(1, "i")); - std::string expectedPersonCounts[] = - { - std::string("[(0, 6, [[(i, ([6], 1))]])]"), - std::string("[(0, 3, [[(i, ([3], 1))]])]"), - std::string("[(0, 2, [[(i, ([2], 1))]])]"), - std::string("[(0, 0)]"), - std::string("[(0, 3, [[(i, ([3], 1))]])]") - }; + std::string expectedPersonCounts[] = {std::string("[(0, 6, [[(i, ([6], 1))]])]"), + std::string("[(0, 3, [[(i, ([3], 1))]])]"), + std::string("[(0, 2, [[(i, ([2], 1))]])]"), + std::string("[(0, 0)]"), + std::string("[(0, 3, [[(i, ([3], 1))]])]")}; TStrVec expectedPersonCountsVec(&expectedPersonCounts[0], &expectedPersonCounts[5]); - std::string expectedPersonNonZeroCounts[] = - { - std::string("[(0, 6, [[(i, ([6], 1))]])]"), - std::string("[(0, 3, [[(i, ([3], 1))]])]"), - std::string("[(0, 2, [[(i, ([2], 1))]])]"), - std::string("[]"), - std::string("[(0, 3, [[(i, ([3], 1))]])]") - }; + std::string expectedPersonNonZeroCounts[] = {std::string("[(0, 6, [[(i, ([6], 1))]])]"), + std::string("[(0, 3, [[(i, ([3], 1))]])]"), + std::string("[(0, 2, [[(i, ([2], 1))]])]"), + std::string("[]"), + std::string("[(0, 3, [[(i, ([3], 1))]])]")}; TStrVec expectedPersonNonZeroCountsVec(&expectedPersonNonZeroCounts[0], &expectedPersonNonZeroCounts[5]); - std::string expectedPersonIndicator[] = - { - std::string("[(0, 1, [[(i, ([1], 1))]])]"), - std::string("[(0, 1, [[(i, ([1], 1))]])]"), - std::string("[(0, 1, [[(i, ([1], 1))]])]"), - std::string("[]"), - std::string("[(0, 1, [[(i, ([1], 1))]])]") - }; + std::string expectedPersonIndicator[] = {std::string("[(0, 1, [[(i, ([1], 1))]])]"), + std::string("[(0, 1, [[(i, ([1], 1))]])]"), + std::string("[(0, 1, [[(i, ([1], 1))]])]"), + std::string("[]"), + std::string("[(0, 1, [[(i, ([1], 1))]])]")}; TStrVec expectedPersonIndicatorVec(&expectedPersonIndicator[0], &expectedPersonIndicator[5]); TStrVec expectedArrivalTimeVec(6, std::string("[]")); - std::string expectedInfoContent[] = - { - std::string("[(0, 13, [[(i, ([13], 1))]])]"), - std::string("[(0, 13, [[(i, ([13], 1))]])]"), - std::string("[(0, 13, [[(i, ([13], 1))]])]"), - std::string("[]"), - std::string("[(0, 13, [[(i, ([13], 1))]])]") - }; + std::string expectedInfoContent[] = {std::string("[(0, 13, [[(i, ([13], 1))]])]"), + std::string("[(0, 13, [[(i, ([13], 1))]])]"), + std::string("[(0, 13, [[(i, ([13], 1))]])]"), + std::string("[]"), + std::string("[(0, 13, [[(i, ([13], 1))]])]")}; TStrVec expectedInfoContentVec(&expectedInfoContent[0], &expectedInfoContent[5]); - testInfluencerPerFeature(model_t::E_IndividualCountByBucketAndPerson, - dataVec, influencers, expectedPersonCountsVec, "", m_ResourceMonitor); + testInfluencerPerFeature( + model_t::E_IndividualCountByBucketAndPerson, dataVec, influencers, expectedPersonCountsVec, "", m_ResourceMonitor); - testInfluencerPerFeature(model_t::E_IndividualNonZeroCountByBucketAndPerson, - dataVec, influencers, expectedPersonNonZeroCountsVec, "", m_ResourceMonitor); + testInfluencerPerFeature( + model_t::E_IndividualNonZeroCountByBucketAndPerson, dataVec, influencers, expectedPersonNonZeroCountsVec, "", m_ResourceMonitor); - testInfluencerPerFeature(model_t::E_IndividualLowCountsByBucketAndPerson, - dataVec, influencers, expectedPersonCountsVec, "", m_ResourceMonitor); + testInfluencerPerFeature( + model_t::E_IndividualLowCountsByBucketAndPerson, dataVec, influencers, expectedPersonCountsVec, "", m_ResourceMonitor); - testInfluencerPerFeature(model_t::E_IndividualArrivalTimesByPerson, - dataVec, influencers, expectedArrivalTimeVec, "", m_ResourceMonitor); + testInfluencerPerFeature( + model_t::E_IndividualArrivalTimesByPerson, dataVec, influencers, expectedArrivalTimeVec, "", m_ResourceMonitor); - testInfluencerPerFeature(model_t::E_IndividualLowNonZeroCountByBucketAndPerson, - dataVec, influencers, expectedPersonNonZeroCountsVec, "", m_ResourceMonitor); + testInfluencerPerFeature( + model_t::E_IndividualLowNonZeroCountByBucketAndPerson, dataVec, influencers, expectedPersonNonZeroCountsVec, "", m_ResourceMonitor); - testInfluencerPerFeature(model_t::E_IndividualUniqueCountByBucketAndPerson, - dataVec, influencers, expectedPersonIndicatorVec, "value", m_ResourceMonitor); + testInfluencerPerFeature( + model_t::E_IndividualUniqueCountByBucketAndPerson, dataVec, influencers, expectedPersonIndicatorVec, "value", m_ResourceMonitor); - testInfluencerPerFeature(model_t::E_IndividualInfoContentByBucketAndPerson, - dataVec, influencers, expectedInfoContentVec, "value", m_ResourceMonitor); + testInfluencerPerFeature( + model_t::E_IndividualInfoContentByBucketAndPerson, dataVec, influencers, expectedInfoContentVec, "value", m_ResourceMonitor); } -void CEventRateDataGathererTest::testDistinctStrings() -{ +void CEventRateDataGathererTest::testDistinctStrings() { using TStoredStringPtrVec = std::vector; TSizeSizePr pair(0, 0); @@ -1636,23 +1749,20 @@ void CEventRateDataGathererTest::testDistinctStrings() CPPUNIT_ASSERT_EQUAL(std::string("0"), featureData.print()); } - for (std::size_t i = 0; i < 100; ++i) - { + for (std::size_t i = 0; i < 100; ++i) { data.insert("str1", influencers); SEventRateFeatureData featureData(0); data.populateDistinctCountFeatureData(featureData); CPPUNIT_ASSERT_EQUAL(std::string("1"), featureData.print()); } - for (std::size_t i = 0; i < 100; ++i) - { + for (std::size_t i = 0; i < 100; ++i) { data.insert("str2", influencers); data.insert("str3", influencers); SEventRateFeatureData featureData(0); data.populateDistinctCountFeatureData(featureData); CPPUNIT_ASSERT_EQUAL(std::string("3"), featureData.print()); } - for (std::size_t i = 1; i < 100; ++i) - { + for (std::size_t i = 1; i < 100; ++i) { std::stringstream ss; ss << "str" << i; data.insert(ss.str(), influencers); @@ -1696,16 +1806,14 @@ void CEventRateDataGathererTest::testDistinctStrings() SEventRateFeatureData featureData(0); data.populateDistinctCountFeatureData(featureData); - std::sort(featureData.s_InfluenceValues[0].begin(), featureData.s_InfluenceValues[0].end(), - maths::COrderings::SFirstLess()); + std::sort(featureData.s_InfluenceValues[0].begin(), featureData.s_InfluenceValues[0].end(), maths::COrderings::SFirstLess()); - CPPUNIT_ASSERT_EQUAL(std::string("3, [[(inf1, ([2], 1)), (inf2, ([2], 1)), (inf3, ([1], 1))]]"), - featureData.print()); + CPPUNIT_ASSERT_EQUAL(std::string("3, [[(inf1, ([2], 1)), (inf2, ([2], 1)), (inf3, ([1], 1))]]"), featureData.print()); } } { - // Check we can add more than one influencer + // Check we can add more than one influencer CUniqueStringFeatureData data; TStoredStringPtrVec influencers; influencers.push_back(core::CStoredStringPtr()); @@ -1739,10 +1847,9 @@ void CEventRateDataGathererTest::testDistinctStrings() { SEventRateFeatureData featureData(0); data.populateDistinctCountFeatureData(featureData); - for (std::size_t i = 0; i < 2; i++) - { - std::sort(featureData.s_InfluenceValues[i].begin(), featureData.s_InfluenceValues[i].end(), - maths::COrderings::SFirstLess()); + for (std::size_t i = 0; i < 2; i++) { + std::sort( + featureData.s_InfluenceValues[i].begin(), featureData.s_InfluenceValues[i].end(), maths::COrderings::SFirstLess()); } CPPUNIT_ASSERT_EQUAL(std::string("3, [[(inf1, ([2], 1)), (inf2, ([2], 1))], [(inf_v2, ([1], 1)), (inf_v3, ([2], 1))]]"), featureData.print()); @@ -1757,8 +1864,7 @@ void CEventRateDataGathererTest::testDistinctStrings() { SEventRateFeatureData featureData(0); data.populateInfoContentFeatureData(featureData); - CPPUNIT_ASSERT_EQUAL(std::string("0"), - featureData.print()); + CPPUNIT_ASSERT_EQUAL(std::string("0"), featureData.print()); } { @@ -1776,8 +1882,7 @@ void CEventRateDataGathererTest::testDistinctStrings() CPPUNIT_ASSERT_EQUAL(std::string("18"), featureData.print()); } - for (std::size_t i = 1; i < 100; ++i) - { + for (std::size_t i = 1; i < 100; ++i) { std::stringstream ss; ss << "str" << i; data.insert(ss.str(), influencers); @@ -1822,11 +1927,9 @@ void CEventRateDataGathererTest::testDistinctStrings() SEventRateFeatureData featureData(0); data.populateInfoContentFeatureData(featureData); - std::sort(featureData.s_InfluenceValues[0].begin(), featureData.s_InfluenceValues[0].end(), - maths::COrderings::SFirstLess()); + std::sort(featureData.s_InfluenceValues[0].begin(), featureData.s_InfluenceValues[0].end(), maths::COrderings::SFirstLess()); - CPPUNIT_ASSERT_EQUAL(std::string("18, [[(inf1, ([16], 1)), (inf2, ([16], 1)), (inf3, ([12], 1))]]"), - featureData.print()); + CPPUNIT_ASSERT_EQUAL(std::string("18, [[(inf1, ([16], 1)), (inf2, ([16], 1)), (inf3, ([12], 1))]]"), featureData.print()); } } { @@ -1864,10 +1967,9 @@ void CEventRateDataGathererTest::testDistinctStrings() { SEventRateFeatureData featureData(0); data.populateInfoContentFeatureData(featureData); - for (std::size_t i = 0; i < 2; i++) - { - std::sort(featureData.s_InfluenceValues[i].begin(), featureData.s_InfluenceValues[i].end(), - maths::COrderings::SFirstLess()); + for (std::size_t i = 0; i < 2; i++) { + std::sort( + featureData.s_InfluenceValues[i].begin(), featureData.s_InfluenceValues[i].end(), maths::COrderings::SFirstLess()); } CPPUNIT_ASSERT_EQUAL(std::string("18, [[(inf1, ([16], 1)), (inf2, ([16], 1))], [(inf_v2, ([12], 1)), (inf_v3, ([16], 1))]]"), featureData.print()); @@ -1883,16 +1985,27 @@ void CEventRateDataGathererTest::testDistinctStrings() TFeatureVec features; features.push_back(model_t::E_IndividualUniqueCountByBucketAndPerson); - CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, "P", EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, "V", - TStrVec(1, "INF"), false, key, features, startTime, 0); + CDataGatherer gatherer(model_t::E_EventRate, + model_t::E_None, + params, + EMPTY_STRING, + "P", + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + "V", + TStrVec(1, "INF"), + false, + key, + features, + startTime, + 0); CPPUNIT_ASSERT(!gatherer.isPopulation()); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson(gatherer, m_ResourceMonitor, "p", "v", 1)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), gatherer.numberFeatures()); - for (std::size_t i = 0u; i < 1; ++i) - { + for (std::size_t i = 0u; i < 1; ++i) { CPPUNIT_ASSERT_EQUAL(features[i], gatherer.feature(i)); } CPPUNIT_ASSERT(gatherer.hasFeature(model_t::E_IndividualUniqueCountByBucketAndPerson)); @@ -1916,8 +2029,7 @@ void CEventRateDataGathererTest::testDistinctStrings() } } -void CEventRateDataGathererTest::testDiurnalFeatures() -{ +void CEventRateDataGathererTest::testDiurnalFeatures() { LOG_DEBUG("*** testDiurnalFeatures ***"); const std::string person("p"); const std::string attribute("a"); @@ -1934,16 +2046,26 @@ void CEventRateDataGathererTest::testDiurnalFeatures() TFeatureVec features; features.push_back(model_t::E_IndividualTimeOfDayByBucketAndPerson); - CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, "person", EMPTY_STRING, EMPTY_STRING, TStrVec(), - false, key, features, - startTime, 0); + CDataGatherer gatherer(model_t::E_EventRate, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + "person", + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + key, + features, + startTime, + 0); CPPUNIT_ASSERT(!gatherer.isPopulation()); CPPUNIT_ASSERT_EQUAL(std::size_t(1), gatherer.numberFeatures()); - for (std::size_t i = 0u; i < 1; ++i) - { + for (std::size_t i = 0u; i < 1; ++i) { CPPUNIT_ASSERT_EQUAL(features[i], gatherer.feature(i)); } CPPUNIT_ASSERT(gatherer.hasFeature(model_t::E_IndividualTimeOfDayByBucketAndPerson)); @@ -2050,16 +2172,26 @@ void CEventRateDataGathererTest::testDiurnalFeatures() TFeatureVec features; features.push_back(model_t::E_IndividualTimeOfWeekByBucketAndPerson); - CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, "person", EMPTY_STRING, EMPTY_STRING, TStrVec(), - false, key, features, - startTime, 0); + CDataGatherer gatherer(model_t::E_EventRate, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + "person", + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + key, + features, + startTime, + 0); CPPUNIT_ASSERT(!gatherer.isPopulation()); CPPUNIT_ASSERT_EQUAL(std::size_t(1), gatherer.numberFeatures()); - for (std::size_t i = 0u; i < 1; ++i) - { + for (std::size_t i = 0u; i < 1; ++i) { CPPUNIT_ASSERT_EQUAL(features[i], gatherer.feature(i)); } CPPUNIT_ASSERT(gatherer.hasFeature(model_t::E_IndividualTimeOfWeekByBucketAndPerson)); @@ -2166,16 +2298,26 @@ void CEventRateDataGathererTest::testDiurnalFeatures() TFeatureVec features; features.push_back(model_t::E_PopulationTimeOfWeekByBucketPersonAndAttribute); - CDataGatherer gatherer(model_t::E_PopulationEventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, "att", EMPTY_STRING, TStrVec(), - false, key, features, - startTime, 0); + CDataGatherer gatherer(model_t::E_PopulationEventRate, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + "att", + EMPTY_STRING, + TStrVec(), + false, + key, + features, + startTime, + 0); CPPUNIT_ASSERT(gatherer.isPopulation()); CPPUNIT_ASSERT_EQUAL(std::size_t(1), gatherer.numberFeatures()); - for (std::size_t i = 0u; i < 1; ++i) - { + for (std::size_t i = 0u; i < 1; ++i) { CPPUNIT_ASSERT_EQUAL(features[i], gatherer.feature(i)); } CPPUNIT_ASSERT(gatherer.hasFeature(model_t::E_PopulationTimeOfWeekByBucketPersonAndAttribute)); @@ -2283,16 +2425,26 @@ void CEventRateDataGathererTest::testDiurnalFeatures() TFeatureVec features; features.push_back(model_t::E_PopulationTimeOfDayByBucketPersonAndAttribute); - CDataGatherer gatherer(model_t::E_PopulationEventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, "att", EMPTY_STRING, TStrVec(), - false, key, features, - startTime, 0); + CDataGatherer gatherer(model_t::E_PopulationEventRate, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + "att", + EMPTY_STRING, + TStrVec(), + false, + key, + features, + startTime, + 0); CPPUNIT_ASSERT(gatherer.isPopulation()); CPPUNIT_ASSERT_EQUAL(std::size_t(1), gatherer.numberFeatures()); - for (std::size_t i = 0u; i < 1; ++i) - { + for (std::size_t i = 0u; i < 1; ++i) { CPPUNIT_ASSERT_EQUAL(features[i], gatherer.feature(i)); } CPPUNIT_ASSERT(gatherer.hasFeature(model_t::E_PopulationTimeOfDayByBucketPersonAndAttribute)); @@ -2388,54 +2540,43 @@ void CEventRateDataGathererTest::testDiurnalFeatures() CPPUNIT_ASSERT_EQUAL(std::string("a"), gatherer.attributeName(0)); testPersistence(params, gatherer); } - } -CppUnit::Test *CEventRateDataGathererTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CEventRateDataGathererTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRateDataGathererTest::singleSeriesTests", - &CEventRateDataGathererTest::singleSeriesTests) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRateDataGathererTest::multipleSeriesTests", - &CEventRateDataGathererTest::multipleSeriesTests) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRateDataGathererTest::testRemovePeople", - &CEventRateDataGathererTest::testRemovePeople) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRateDataGathererTest::singleSeriesOutOfOrderFinalResultTests", - &CEventRateDataGathererTest::singleSeriesOutOfOrderFinalResultTests) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRateDataGathererTest::singleSeriesOutOfOrderInterimResultTests", - &CEventRateDataGathererTest::singleSeriesOutOfOrderInterimResultTests) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRateDataGathererTest::multipleSeriesOutOfOrderFinalResultTests", - &CEventRateDataGathererTest::multipleSeriesOutOfOrderFinalResultTests) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRateDataGathererTest::testArrivalBeforeLatencyWindowIsIgnored", - &CEventRateDataGathererTest::testArrivalBeforeLatencyWindowIsIgnored) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRateDataGathererTest::testResetBucketGivenSingleSeries", - &CEventRateDataGathererTest::testResetBucketGivenSingleSeries) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRateDataGathererTest::testResetBucketGivenMultipleSeries", - &CEventRateDataGathererTest::testResetBucketGivenMultipleSeries) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRateDataGathererTest::testResetBucketGivenBucketNotAvailable", - &CEventRateDataGathererTest::testResetBucketGivenBucketNotAvailable) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRateDataGathererTest::testInfluencerBucketStatistics", - &CEventRateDataGathererTest::testInfluencerBucketStatistics) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRateDataGathererTest::testDistinctStrings", - &CEventRateDataGathererTest::testDistinctStrings) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRateDataGathererTest::testLatencyPersist", - &CEventRateDataGathererTest::testLatencyPersist) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRateDataGathererTest::testDiurnalFeatures", - &CEventRateDataGathererTest::testDiurnalFeatures) ); +CppUnit::Test* CEventRateDataGathererTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CEventRateDataGathererTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CEventRateDataGathererTest::singleSeriesTests", + &CEventRateDataGathererTest::singleSeriesTests)); + suiteOfTests->addTest(new CppUnit::TestCaller("CEventRateDataGathererTest::multipleSeriesTests", + &CEventRateDataGathererTest::multipleSeriesTests)); + suiteOfTests->addTest(new CppUnit::TestCaller("CEventRateDataGathererTest::testRemovePeople", + &CEventRateDataGathererTest::testRemovePeople)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CEventRateDataGathererTest::singleSeriesOutOfOrderFinalResultTests", + &CEventRateDataGathererTest::singleSeriesOutOfOrderFinalResultTests)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CEventRateDataGathererTest::singleSeriesOutOfOrderInterimResultTests", + &CEventRateDataGathererTest::singleSeriesOutOfOrderInterimResultTests)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CEventRateDataGathererTest::multipleSeriesOutOfOrderFinalResultTests", + &CEventRateDataGathererTest::multipleSeriesOutOfOrderFinalResultTests)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CEventRateDataGathererTest::testArrivalBeforeLatencyWindowIsIgnored", + &CEventRateDataGathererTest::testArrivalBeforeLatencyWindowIsIgnored)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRateDataGathererTest::testResetBucketGivenSingleSeries", &CEventRateDataGathererTest::testResetBucketGivenSingleSeries)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRateDataGathererTest::testResetBucketGivenMultipleSeries", &CEventRateDataGathererTest::testResetBucketGivenMultipleSeries)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CEventRateDataGathererTest::testResetBucketGivenBucketNotAvailable", + &CEventRateDataGathererTest::testResetBucketGivenBucketNotAvailable)); + suiteOfTests->addTest(new CppUnit::TestCaller("CEventRateDataGathererTest::testInfluencerBucketStatistics", + &CEventRateDataGathererTest::testInfluencerBucketStatistics)); + suiteOfTests->addTest(new CppUnit::TestCaller("CEventRateDataGathererTest::testDistinctStrings", + &CEventRateDataGathererTest::testDistinctStrings)); + suiteOfTests->addTest(new CppUnit::TestCaller("CEventRateDataGathererTest::testLatencyPersist", + &CEventRateDataGathererTest::testLatencyPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller("CEventRateDataGathererTest::testDiurnalFeatures", + &CEventRateDataGathererTest::testDiurnalFeatures)); return suiteOfTests; } diff --git a/lib/model/unittest/CEventRateDataGathererTest.h b/lib/model/unittest/CEventRateDataGathererTest.h index ef46922a47..a4d618bf81 100644 --- a/lib/model/unittest/CEventRateDataGathererTest.h +++ b/lib/model/unittest/CEventRateDataGathererTest.h @@ -11,28 +11,27 @@ #include -class CEventRateDataGathererTest : public CppUnit::TestFixture -{ - public: - void singleSeriesTests(); - void multipleSeriesTests(); - void testRemovePeople(); - void singleSeriesOutOfOrderFinalResultTests(); - void singleSeriesOutOfOrderInterimResultTests(); - void multipleSeriesOutOfOrderFinalResultTests(); - void testArrivalBeforeLatencyWindowIsIgnored(); - void testResetBucketGivenSingleSeries(); - void testResetBucketGivenMultipleSeries(); - void testResetBucketGivenBucketNotAvailable(); - void testInfluencerBucketStatistics(); - void testDistinctStrings(); - void testLatencyPersist(); - void testDiurnalFeatures(); +class CEventRateDataGathererTest : public CppUnit::TestFixture { +public: + void singleSeriesTests(); + void multipleSeriesTests(); + void testRemovePeople(); + void singleSeriesOutOfOrderFinalResultTests(); + void singleSeriesOutOfOrderInterimResultTests(); + void multipleSeriesOutOfOrderFinalResultTests(); + void testArrivalBeforeLatencyWindowIsIgnored(); + void testResetBucketGivenSingleSeries(); + void testResetBucketGivenMultipleSeries(); + void testResetBucketGivenBucketNotAvailable(); + void testInfluencerBucketStatistics(); + void testDistinctStrings(); + void testLatencyPersist(); + void testDiurnalFeatures(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); - private: - ml::model::CResourceMonitor m_ResourceMonitor; +private: + ml::model::CResourceMonitor m_ResourceMonitor; }; #endif // INCLUDED_CEventRateDataGathererTest_h diff --git a/lib/model/unittest/CEventRateModelTest.cc b/lib/model/unittest/CEventRateModelTest.cc index 3caf901d37..6358df9a1e 100644 --- a/lib/model/unittest/CEventRateModelTest.cc +++ b/lib/model/unittest/CEventRateModelTest.cc @@ -7,12 +7,12 @@ #include "CEventRateModelTest.h" #include -#include -#include #include #include #include #include +#include +#include #include #include @@ -50,8 +50,7 @@ using namespace ml; using namespace model; -namespace -{ +namespace { using TDoubleVec = std::vector; using TDoubleVecVec = std::vector; @@ -76,42 +75,33 @@ using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumula const std::string EMPTY_STRING; -TUInt64Vec rawEventCounts(std::size_t copies = 1) -{ - uint64_t counts[] = { 54, 67, 39, 58, 46, 50, 42, 48, 53, 51, 50, 57, 53, 49 }; +TUInt64Vec rawEventCounts(std::size_t copies = 1) { + uint64_t counts[] = {54, 67, 39, 58, 46, 50, 42, 48, 53, 51, 50, 57, 53, 49}; TUInt64Vec result; - for (std::size_t i = 0u; i < copies; ++i) - { + for (std::size_t i = 0u; i < copies; ++i) { result.insert(result.end(), boost::begin(counts), boost::end(counts)); } return result; } -void generateEvents(const core_t::TTime &startTime, - const core_t::TTime &bucketLength, - const TUInt64Vec &eventCountsPerBucket, - TTimeVec &eventArrivalTimes) -{ +void generateEvents(const core_t::TTime& startTime, + const core_t::TTime& bucketLength, + const TUInt64Vec& eventCountsPerBucket, + TTimeVec& eventArrivalTimes) { // Generate an ordered collection of event arrival times. test::CRandomNumbers rng; double bucketStartTime = static_cast(startTime); - for (auto count : eventCountsPerBucket) - { + for (auto count : eventCountsPerBucket) { double bucketEndTime = bucketStartTime + static_cast(bucketLength); TDoubleVec bucketEventTimes; - rng.generateUniformSamples(bucketStartTime, - bucketEndTime - 1.0, - static_cast(count), - bucketEventTimes); + rng.generateUniformSamples(bucketStartTime, bucketEndTime - 1.0, static_cast(count), bucketEventTimes); std::sort(bucketEventTimes.begin(), bucketEventTimes.end()); - for (auto time_ : bucketEventTimes) - { + for (auto time_ : bucketEventTimes) { core_t::TTime time = static_cast(time_); - time = std::min(static_cast(bucketEndTime - 1.0), - std::max(static_cast(bucketStartTime), time)); + time = std::min(static_cast(bucketEndTime - 1.0), std::max(static_cast(bucketStartTime), time)); eventArrivalTimes.push_back(time); } @@ -119,31 +109,24 @@ void generateEvents(const core_t::TTime &startTime, } } -void generateSporadicEvents(const core_t::TTime &startTime, - const core_t::TTime &bucketLength, - const TUInt64Vec &nonZeroEventCountsPerBucket, - TTimeVec &eventArrivalTimes) -{ +void generateSporadicEvents(const core_t::TTime& startTime, + const core_t::TTime& bucketLength, + const TUInt64Vec& nonZeroEventCountsPerBucket, + TTimeVec& eventArrivalTimes) { // Generate an ordered collection of event arrival times. test::CRandomNumbers rng; double bucketStartTime = static_cast(startTime); - for (auto count : nonZeroEventCountsPerBucket) - { + for (auto count : nonZeroEventCountsPerBucket) { double bucketEndTime = bucketStartTime + static_cast(bucketLength); TDoubleVec bucketEventTimes; - rng.generateUniformSamples(bucketStartTime, - bucketEndTime - 1.0, - static_cast(count), - bucketEventTimes); + rng.generateUniformSamples(bucketStartTime, bucketEndTime - 1.0, static_cast(count), bucketEventTimes); std::sort(bucketEventTimes.begin(), bucketEventTimes.end()); - for (auto time_ : bucketEventTimes) - { + for (auto time_ : bucketEventTimes) { core_t::TTime time = static_cast(time_); - time = std::min(static_cast(bucketEndTime - 1.0), - std::max(static_cast(bucketStartTime), time)); + time = std::min(static_cast(bucketEndTime - 1.0), std::max(static_cast(bucketStartTime), time)); eventArrivalTimes.push_back(time); } @@ -153,10 +136,7 @@ void generateSporadicEvents(const core_t::TTime &startTime, } } -std::size_t addPerson(const std::string &p, - const CModelFactory::TDataGathererPtr &gatherer, - CResourceMonitor &resourceMonitor) -{ +std::size_t addPerson(const std::string& p, const CModelFactory::TDataGathererPtr& gatherer, CResourceMonitor& resourceMonitor) { CDataGatherer::TStrCPtrVec person; person.push_back(&p); CEventData result; @@ -164,21 +144,18 @@ std::size_t addPerson(const std::string &p, return *result.personId(); } -std::size_t addPersonWithInfluence(const std::string &p, - const CModelFactory::TDataGathererPtr &gatherer, - CResourceMonitor &resourceMonitor, +std::size_t addPersonWithInfluence(const std::string& p, + const CModelFactory::TDataGathererPtr& gatherer, + CResourceMonitor& resourceMonitor, std::size_t numInfluencers, - TOptionalStr value = TOptionalStr()) -{ + TOptionalStr value = TOptionalStr()) { std::string i("i"); CDataGatherer::TStrCPtrVec person; person.push_back(&p); - for (std::size_t j = 0; j < numInfluencers; ++j) - { + for (std::size_t j = 0; j < numInfluencers; ++j) { person.push_back(&i); } - if (value) - { + if (value) { person.push_back(&(value.get())); } CEventData result; @@ -186,15 +163,14 @@ std::size_t addPersonWithInfluence(const std::string &p, return *result.personId(); } -void makeModel(CEventRateModelFactory &factory, - const CDataGatherer::TFeatureVec &features, - CResourceMonitor &resourceMonitor, +void makeModel(CEventRateModelFactory& factory, + const CDataGatherer::TFeatureVec& features, + CResourceMonitor& resourceMonitor, core_t::TTime startTime, core_t::TTime bucketLength, - CModelFactory::TDataGathererPtr &gatherer, - CAnomalyDetectorModel::TModelPtr &model, - std::size_t numberPeople) -{ + CModelFactory::TDataGathererPtr& gatherer, + CAnomalyDetectorModel::TModelPtr& model, + std::size_t numberPeople) { factory.features(features); CModelFactory::SGathererInitializationData gathererInitData(startTime); gatherer.reset(factory.makeDataGatherer(gathererInitData)); @@ -202,34 +178,28 @@ void makeModel(CEventRateModelFactory &factory, model.reset(factory.makeModel(initData)); CPPUNIT_ASSERT(model); CPPUNIT_ASSERT_EQUAL(bucketLength, model->bucketLength()); - for (std::size_t i = 0u; i < numberPeople; ++i) - { - CPPUNIT_ASSERT_EQUAL(std::size_t(i), - addPerson("p" + core::CStringUtils::typeToString(i+1), gatherer, resourceMonitor)); + for (std::size_t i = 0u; i < numberPeople; ++i) { + CPPUNIT_ASSERT_EQUAL(std::size_t(i), addPerson("p" + core::CStringUtils::typeToString(i + 1), gatherer, resourceMonitor)); } } -void addArrival(CDataGatherer &gatherer, - CResourceMonitor &resourceMonitor, +void addArrival(CDataGatherer& gatherer, + CResourceMonitor& resourceMonitor, core_t::TTime time, - const std::string &person, - const TOptionalStr &inf1 = TOptionalStr(), - const TOptionalStr &inf2 = TOptionalStr(), - const TOptionalStr &value = TOptionalStr()) -{ + const std::string& person, + const TOptionalStr& inf1 = TOptionalStr(), + const TOptionalStr& inf2 = TOptionalStr(), + const TOptionalStr& value = TOptionalStr()) { CDataGatherer::TStrCPtrVec fieldValues; fieldValues.push_back(&person); - if (inf1) - { + if (inf1) { fieldValues.push_back(&(inf1.get())); } - if (inf2) - { + if (inf2) { fieldValues.push_back(&(inf2.get())); } - if (value) - { + if (value) { fieldValues.push_back(&(value.get())); } @@ -239,8 +209,7 @@ void addArrival(CDataGatherer &gatherer, gatherer.addArrival(fieldValues, eventData, resourceMonitor); } -CEventData makeEventData(core_t::TTime time, std::size_t pid) -{ +CEventData makeEventData(core_t::TTime time, std::size_t pid) { CEventData eventData; eventData.time(time); eventData.person(pid); @@ -249,10 +218,7 @@ CEventData makeEventData(core_t::TTime time, std::size_t pid) return eventData; } -CEventData makeEventData(core_t::TTime time, - std::size_t pid, - const std::string value) -{ +CEventData makeEventData(core_t::TTime time, std::size_t pid, const std::string value) { CEventData eventData; eventData.time(time); eventData.person(pid); @@ -262,21 +228,16 @@ CEventData makeEventData(core_t::TTime time, return eventData; } -void handleEvent(const CDataGatherer::TStrCPtrVec &fields, +void handleEvent(const CDataGatherer::TStrCPtrVec& fields, core_t::TTime time, - CModelFactory::TDataGathererPtr &gatherer, - CResourceMonitor &resourceMonitor) -{ + CModelFactory::TDataGathererPtr& gatherer, + CResourceMonitor& resourceMonitor) { CEventData eventResult; eventResult.time(time); gatherer->addArrival(fields, eventResult, resourceMonitor); } -void testModelWithValueField(model_t::EFeature feature, - TSizeVecVecVec &fields, - TStrVec &strings, - CResourceMonitor &resourceMonitor) -{ +void testModelWithValueField(model_t::EFeature feature, TSizeVecVecVec& fields, TStrVec& strings, CResourceMonitor& resourceMonitor) { LOG_DEBUG(" *** testing feature " << model_t::print(feature)); const core_t::TTime startTime = 1346968800; @@ -299,14 +260,10 @@ void testModelWithValueField(model_t::EFeature feature, const core_t::TTime endTime = startTime + (numberBuckets * bucketLength); std::size_t i = 0u; - for (core_t::TTime bucketStartTime = startTime; - bucketStartTime < endTime; - bucketStartTime += bucketLength, i++) - { + for (core_t::TTime bucketStartTime = startTime; bucketStartTime < endTime; bucketStartTime += bucketLength, i++) { core_t::TTime bucketEndTime = bucketStartTime + bucketLength; - for (std::size_t j = 0; j < fields[i].size(); ++j) - { + for (std::size_t j = 0; j < fields[i].size(); ++j) { CDataGatherer::TStrCPtrVec f; f.push_back(&strings[fields[i][j][0]]); f.push_back(&strings[fields[i][j][1]]); @@ -318,15 +275,11 @@ void testModelWithValueField(model_t::EFeature feature, SAnnotatedProbability annotatedProbability; CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); - model->computeProbability(0/*pid*/, bucketStartTime, bucketEndTime, - partitioningFields, 1, annotatedProbability); + model->computeProbability(0 /*pid*/, bucketStartTime, bucketEndTime, partitioningFields, 1, annotatedProbability); LOG_DEBUG("probability = " << annotatedProbability.s_Probability); - if (i == anomalousBucket) - { + if (i == anomalousBucket) { CPPUNIT_ASSERT(annotatedProbability.s_Probability < 0.001); - } - else - { + } else { CPPUNIT_ASSERT(annotatedProbability.s_Probability > 0.6); } } @@ -338,8 +291,7 @@ const TSizeDoublePr1Vec NO_CORRELATES; } // unnamed:: -void CEventRateModelTest::testOnlineCountSample() -{ +void CEventRateModelTest::testOnlineCountSample() { LOG_DEBUG("*** testOnlineCountSample ***"); const core_t::TTime startTime = 1346968800; @@ -351,11 +303,10 @@ void CEventRateModelTest::testOnlineCountSample() CModelFactory::TDataGathererPtr gatherer; CAnomalyDetectorModel::TModelPtr model_; makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, gatherer, model_, 1); - CEventRateModel *model = dynamic_cast(model_.get()); + CEventRateModel* model = dynamic_cast(model_.get()); CPPUNIT_ASSERT(model); - TMathsModelPtr timeseriesModel{ - factory.defaultFeatureModel(model_t::E_IndividualCountByBucketAndPerson, bucketLength, 0.4, true)}; + TMathsModelPtr timeseriesModel{factory.defaultFeatureModel(model_t::E_IndividualCountByBucketAndPerson, bucketLength, 0.4, true)}; maths::CModelAddSamplesParams::TDouble2Vec4VecVec weights{maths::CConstantWeights::unit(1)}; // Generate some events. @@ -363,20 +314,14 @@ void CEventRateModelTest::testOnlineCountSample() TUInt64Vec expectedEventCounts(rawEventCounts()); generateEvents(startTime, bucketLength, expectedEventCounts, eventTimes); core_t::TTime endTime = (eventTimes.back() / bucketLength + 1) * bucketLength; - LOG_DEBUG("startTime = " << startTime - << ", endTime = " << endTime - << ", # events = " << eventTimes.size()); + LOG_DEBUG("startTime = " << startTime << ", endTime = " << endTime << ", # events = " << eventTimes.size()); std::size_t i = 0u, j = 0u; - for (core_t::TTime bucketStartTime = startTime; - bucketStartTime < endTime; - bucketStartTime += bucketLength, ++j) - { + for (core_t::TTime bucketStartTime = startTime; bucketStartTime < endTime; bucketStartTime += bucketLength, ++j) { core_t::TTime bucketEndTime = bucketStartTime + bucketLength; double count = 0.0; - for (/**/; i < eventTimes.size() && eventTimes[i] < bucketEndTime; ++i) - { + for (/**/; i < eventTimes.size() && eventTimes[i] < bucketEndTime; ++i) { addArrival(*gatherer, m_ResourceMonitor, eventTimes[i], "p1"); count += 1.0; } @@ -387,22 +332,20 @@ void CEventRateModelTest::testOnlineCountSample() maths::CModelAddSamplesParams params_; params_.integer(true) - .nonNegative(true) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); + .nonNegative(true) + .propagationInterval(1.0) + .weightStyles(maths::CConstantWeights::COUNT) + .trendWeights(weights) + .priorWeights(weights); double sample{static_cast(expectedEventCounts[j])}; maths::CModel::TTimeDouble2VecSizeTrVec expectedSamples{ - core::make_triple((bucketStartTime + bucketEndTime) / 2, - maths::CModel::TDouble2Vec{sample}, - std::size_t{0})}; + core::make_triple((bucketStartTime + bucketEndTime) / 2, maths::CModel::TDouble2Vec{sample}, std::size_t{0})}; timeseriesModel->addSamples(params_, expectedSamples); // Test we sample the data correctly. - CPPUNIT_ASSERT_EQUAL(expectedEventCounts[j], - static_cast(model->currentBucketValue(model_t::E_IndividualCountByBucketAndPerson, - 0, 0, bucketStartTime)[0])); + CPPUNIT_ASSERT_EQUAL( + expectedEventCounts[j], + static_cast(model->currentBucketValue(model_t::E_IndividualCountByBucketAndPerson, 0, 0, bucketStartTime)[0])); CPPUNIT_ASSERT_EQUAL(timeseriesModel->checksum(), model->details()->model(model_t::E_IndividualCountByBucketAndPerson, 0)->checksum()); } @@ -442,8 +385,7 @@ void CEventRateModelTest::testOnlineCountSample() CPPUNIT_ASSERT_EQUAL(origXml, newXml); } -void CEventRateModelTest::testOnlineNonZeroCountSample() -{ +void CEventRateModelTest::testOnlineNonZeroCountSample() { LOG_DEBUG("*** testOnlineNonZeroCountSample ***"); const core_t::TTime startTime = 1346968800; @@ -455,7 +397,7 @@ void CEventRateModelTest::testOnlineNonZeroCountSample() CModelFactory::TDataGathererPtr gatherer; CAnomalyDetectorModel::TModelPtr model_; makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, gatherer, model_, 1); - CEventRateModel *model = dynamic_cast(model_.get()); + CEventRateModel* model = dynamic_cast(model_.get()); CPPUNIT_ASSERT(model); TMathsModelPtr timeseriesModel{ @@ -467,20 +409,14 @@ void CEventRateModelTest::testOnlineNonZeroCountSample() TUInt64Vec expectedEventCounts = rawEventCounts(); generateSporadicEvents(startTime, bucketLength, expectedEventCounts, eventTimes); core_t::TTime endTime = (eventTimes.back() / bucketLength + 1) * bucketLength; - LOG_DEBUG("startTime = " << startTime - << ", endTime = " << endTime - << ", # events = " << eventTimes.size()); - - std::size_t i = 0u, j= 0u; - for (core_t::TTime bucketStartTime = startTime; - bucketStartTime < endTime; - bucketStartTime += bucketLength) - { + LOG_DEBUG("startTime = " << startTime << ", endTime = " << endTime << ", # events = " << eventTimes.size()); + + std::size_t i = 0u, j = 0u; + for (core_t::TTime bucketStartTime = startTime; bucketStartTime < endTime; bucketStartTime += bucketLength) { core_t::TTime bucketEndTime = bucketStartTime + bucketLength; double count = 0.0; - for (; i < eventTimes.size() && eventTimes[i] < bucketEndTime; ++i) - { + for (; i < eventTimes.size() && eventTimes[i] < bucketEndTime; ++i) { addArrival(*gatherer, m_ResourceMonitor, eventTimes[i], "p1"); count += 1.0; } @@ -489,28 +425,24 @@ void CEventRateModelTest::testOnlineNonZeroCountSample() model->sample(bucketStartTime, bucketEndTime, m_ResourceMonitor); - if (*model->currentBucketCount(0, bucketStartTime) > 0) - { + if (*model->currentBucketCount(0, bucketStartTime) > 0) { maths::CModelAddSamplesParams params_; params_.integer(true) - .nonNegative(true) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); - double sample{static_cast( - model_t::offsetCountToZero(model_t::E_IndividualNonZeroCountByBucketAndPerson, - static_cast(expectedEventCounts[j])))}; + .nonNegative(true) + .propagationInterval(1.0) + .weightStyles(maths::CConstantWeights::COUNT) + .trendWeights(weights) + .priorWeights(weights); + double sample{static_cast(model_t::offsetCountToZero(model_t::E_IndividualNonZeroCountByBucketAndPerson, + static_cast(expectedEventCounts[j])))}; maths::CModel::TTimeDouble2VecSizeTrVec expectedSamples{ - core::make_triple((bucketStartTime + bucketEndTime) / 2, - maths::CModel::TDouble2Vec{sample}, - std::size_t{0})}; + core::make_triple((bucketStartTime + bucketEndTime) / 2, maths::CModel::TDouble2Vec{sample}, std::size_t{0})}; timeseriesModel->addSamples(params_, expectedSamples); // Test we sample the data correctly. CPPUNIT_ASSERT_EQUAL(expectedEventCounts[j], - static_cast(model->currentBucketValue(model_t::E_IndividualNonZeroCountByBucketAndPerson, - 0, 0, bucketStartTime)[0])); + static_cast(model->currentBucketValue( + model_t::E_IndividualNonZeroCountByBucketAndPerson, 0, 0, bucketStartTime)[0])); CPPUNIT_ASSERT_EQUAL(timeseriesModel->checksum(), model->details()->model(model_t::E_IndividualNonZeroCountByBucketAndPerson, 0)->checksum()); @@ -519,8 +451,7 @@ void CEventRateModelTest::testOnlineNonZeroCountSample() } } -void CEventRateModelTest::testOnlineRare() -{ +void CEventRateModelTest::testOnlineRare() { LOG_DEBUG("*** testOnlineRare ***"); const core_t::TTime startTime = 1346968800; @@ -533,38 +464,34 @@ void CEventRateModelTest::testOnlineRare() CModelFactory::TDataGathererPtr gatherer; CAnomalyDetectorModel::TModelPtr model_; makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, gatherer, model_, 5); - CEventRateModel *model = dynamic_cast(model_.get()); + CEventRateModel* model = dynamic_cast(model_.get()); core_t::TTime time = startTime; - for (/**/; time < startTime + 10 * bucketLength; time += bucketLength) - { - addArrival(*gatherer, m_ResourceMonitor, time + bucketLength/2, "p1"); - addArrival(*gatherer, m_ResourceMonitor, time + bucketLength/2, "p2"); + for (/**/; time < startTime + 10 * bucketLength; time += bucketLength) { + addArrival(*gatherer, m_ResourceMonitor, time + bucketLength / 2, "p1"); + addArrival(*gatherer, m_ResourceMonitor, time + bucketLength / 2, "p2"); model->sample(time, time + bucketLength, m_ResourceMonitor); } - for (/**/; time < startTime + 50 * bucketLength; time += bucketLength) - { - addArrival(*gatherer, m_ResourceMonitor, time + bucketLength/2, "p1"); - addArrival(*gatherer, m_ResourceMonitor, time + bucketLength/2, "p2"); - addArrival(*gatherer, m_ResourceMonitor, time + bucketLength/2, "p3"); - addArrival(*gatherer, m_ResourceMonitor, time + bucketLength/2, "p4"); + for (/**/; time < startTime + 50 * bucketLength; time += bucketLength) { + addArrival(*gatherer, m_ResourceMonitor, time + bucketLength / 2, "p1"); + addArrival(*gatherer, m_ResourceMonitor, time + bucketLength / 2, "p2"); + addArrival(*gatherer, m_ResourceMonitor, time + bucketLength / 2, "p3"); + addArrival(*gatherer, m_ResourceMonitor, time + bucketLength / 2, "p4"); model->sample(time, time + bucketLength, m_ResourceMonitor); } - addArrival(*gatherer, m_ResourceMonitor, time + bucketLength/2, "p1"); - addArrival(*gatherer, m_ResourceMonitor, time + bucketLength/2, "p2"); - addArrival(*gatherer, m_ResourceMonitor, time + bucketLength/2, "p3"); - addArrival(*gatherer, m_ResourceMonitor, time + bucketLength/2, "p4"); - addArrival(*gatherer, m_ResourceMonitor, time + bucketLength/2, "p5"); + addArrival(*gatherer, m_ResourceMonitor, time + bucketLength / 2, "p1"); + addArrival(*gatherer, m_ResourceMonitor, time + bucketLength / 2, "p2"); + addArrival(*gatherer, m_ResourceMonitor, time + bucketLength / 2, "p3"); + addArrival(*gatherer, m_ResourceMonitor, time + bucketLength / 2, "p4"); + addArrival(*gatherer, m_ResourceMonitor, time + bucketLength / 2, "p5"); model->sample(time, time + bucketLength, m_ResourceMonitor); TDoubleVec probabilities; - for (std::size_t pid = 0u; pid < 5; ++pid) - { + for (std::size_t pid = 0u; pid < 5; ++pid) { SAnnotatedProbability annotatedProbability; CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); - CPPUNIT_ASSERT(model->computeProbability(pid, time, time + bucketLength, - partitioningFields, 0, annotatedProbability)); + CPPUNIT_ASSERT(model->computeProbability(pid, time, time + bucketLength, partitioningFields, 0, annotatedProbability)); LOG_DEBUG("probability = " << annotatedProbability.s_Probability); probabilities.push_back(annotatedProbability.s_Probability); } @@ -606,8 +533,7 @@ void CEventRateModelTest::testOnlineRare() CPPUNIT_ASSERT_EQUAL(origXml, newXml); } -void CEventRateModelTest::testOnlineProbabilityCalculation() -{ +void CEventRateModelTest::testOnlineProbabilityCalculation() { LOG_DEBUG("*** testOnlineProbabilityCalculation ***"); using TDoubleSizePr = std::pair; @@ -624,7 +550,7 @@ void CEventRateModelTest::testOnlineProbabilityCalculation() CModelFactory::TDataGathererPtr gatherer; CAnomalyDetectorModel::TModelPtr model_; makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, gatherer, model_, 1); - CEventRateModel *model = dynamic_cast(model_.get()); + CEventRateModel* model = dynamic_cast(model_.get()); TMinAccumulator minProbabilities(2u); @@ -634,20 +560,14 @@ void CEventRateModelTest::testOnlineProbabilityCalculation() expectedEventCounts[anomalousBucket] *= 3; generateEvents(startTime, bucketLength, expectedEventCounts, eventTimes); core_t::TTime endTime = (eventTimes.back() / bucketLength + 1) * bucketLength; - LOG_DEBUG("startTime = " << startTime - << ", endTime = " << endTime - << ", # events = " << eventTimes.size()); + LOG_DEBUG("startTime = " << startTime << ", endTime = " << endTime << ", # events = " << eventTimes.size()); std::size_t i = 0u, j = 0u; - for (core_t::TTime bucketStartTime = startTime; - bucketStartTime < endTime; - bucketStartTime += bucketLength, ++j) - { + for (core_t::TTime bucketStartTime = startTime; bucketStartTime < endTime; bucketStartTime += bucketLength, ++j) { core_t::TTime bucketEndTime = bucketStartTime + bucketLength; double count = 0.0; - for (; i < eventTimes.size() && eventTimes[i] < bucketEndTime; ++i) - { + for (; i < eventTimes.size() && eventTimes[i] < bucketEndTime; ++i) { addArrival(*gatherer, m_ResourceMonitor, eventTimes[i], "p1"); count += 1.0; } @@ -658,8 +578,7 @@ void CEventRateModelTest::testOnlineProbabilityCalculation() SAnnotatedProbability p; CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); - CPPUNIT_ASSERT(model->computeProbability(0/*pid*/, bucketStartTime, bucketEndTime, - partitioningFields, 1, p)); + CPPUNIT_ASSERT(model->computeProbability(0 /*pid*/, bucketStartTime, bucketEndTime, partitioningFields, 1, p)); LOG_DEBUG("probability = " << p.s_Probability); minProbabilities.add(TDoubleSizePr(p.s_Probability, j)); } @@ -670,8 +589,7 @@ void CEventRateModelTest::testOnlineProbabilityCalculation() CPPUNIT_ASSERT(minProbabilities[0].first / minProbabilities[1].first < 0.1); } -void CEventRateModelTest::testOnlineProbabilityCalculationForLowNonZeroCount() -{ +void CEventRateModelTest::testOnlineProbabilityCalculationForLowNonZeroCount() { LOG_DEBUG("*** testOnlineProbabilityCalculationForLowNonZeroCount ***"); core_t::TTime startTime(0); @@ -679,7 +597,7 @@ void CEventRateModelTest::testOnlineProbabilityCalculationForLowNonZeroCount() std::size_t lowNonZeroCountBucket = 6u; std::size_t highNonZeroCountBucket = 8u; - std::size_t bucketCounts[] = { 50, 50, 50, 50, 50, 0, 0, 0, 50, 1, 50, 100, 50, 50 }; + std::size_t bucketCounts[] = {50, 50, 50, 50, 50, 0, 0, 0, 50, 1, 50, 100, 50, 50}; SModelParams params(bucketLength); params.s_DecayRate = 0.001; @@ -688,32 +606,26 @@ void CEventRateModelTest::testOnlineProbabilityCalculationForLowNonZeroCount() CModelFactory::TDataGathererPtr gatherer; CAnomalyDetectorModel::TModelPtr model_; makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, gatherer, model_, 1); - CEventRateModel *model = dynamic_cast(model_.get()); - + CEventRateModel* model = dynamic_cast(model_.get()); TDoubleVec probabilities; core_t::TTime time = startTime; - for (std::size_t i = 0u; i < boost::size(bucketCounts); ++i) - { + for (std::size_t i = 0u; i < boost::size(bucketCounts); ++i) { LOG_DEBUG("Writing " << bucketCounts[i] << " values"); - for (std::size_t j = 0u; j < bucketCounts[i]; ++j) - { + for (std::size_t j = 0u; j < bucketCounts[i]; ++j) { addArrival(*gatherer, m_ResourceMonitor, time + static_cast(j), "p1"); } model->sample(time, time + bucketLength, m_ResourceMonitor); SAnnotatedProbability p; CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); - if (model->computeProbability(0/*pid*/, time, time + bucketLength, - partitioningFields, 0, p) == false) - { + if (model->computeProbability(0 /*pid*/, time, time + bucketLength, partitioningFields, 0, p) == false) { continue; } LOG_DEBUG("probability = " << p.s_Probability); - if (*model->currentBucketCount(0, time) > 0) - { + if (*model->currentBucketCount(0, time) > 0) { probabilities.push_back(p.s_Probability); } time += bucketLength; @@ -725,8 +637,7 @@ void CEventRateModelTest::testOnlineProbabilityCalculationForLowNonZeroCount() CPPUNIT_ASSERT(probabilities[highNonZeroCountBucket] > 0.9); } -void CEventRateModelTest::testOnlineProbabilityCalculationForHighNonZeroCount() -{ +void CEventRateModelTest::testOnlineProbabilityCalculationForHighNonZeroCount() { LOG_DEBUG("*** testOnlineProbabilityCalculationForHighNonZeroCount ***"); core_t::TTime startTime(0); @@ -734,7 +645,7 @@ void CEventRateModelTest::testOnlineProbabilityCalculationForHighNonZeroCount() std::size_t lowNonZeroCountBucket = 6u; std::size_t highNonZeroCountBucket = 8u; - std::size_t bucketCounts[] = { 50, 50, 50, 50, 50, 0, 0, 0, 50, 100, 50, 1, 50, 50 }; + std::size_t bucketCounts[] = {50, 50, 50, 50, 50, 0, 0, 0, 50, 100, 50, 1, 50, 50}; SModelParams params(bucketLength); params.s_DecayRate = 0.001; @@ -743,32 +654,26 @@ void CEventRateModelTest::testOnlineProbabilityCalculationForHighNonZeroCount() CModelFactory::TDataGathererPtr gatherer; CAnomalyDetectorModel::TModelPtr model_; makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, gatherer, model_, 1); - CEventRateModel *model = dynamic_cast(model_.get()); - + CEventRateModel* model = dynamic_cast(model_.get()); TDoubleVec probabilities; core_t::TTime time = startTime; - for (std::size_t i = 0u; i < boost::size(bucketCounts); ++i) - { + for (std::size_t i = 0u; i < boost::size(bucketCounts); ++i) { LOG_DEBUG("Writing " << bucketCounts[i] << " values"); - for (std::size_t j = 0u; j < bucketCounts[i]; ++j) - { + for (std::size_t j = 0u; j < bucketCounts[i]; ++j) { addArrival(*gatherer, m_ResourceMonitor, time + static_cast(j), "p1"); } model->sample(time, time + bucketLength, m_ResourceMonitor); SAnnotatedProbability p; CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); - if (model->computeProbability(0/*pid*/, time, time + bucketLength, - partitioningFields, 1, p) == false) - { + if (model->computeProbability(0 /*pid*/, time, time + bucketLength, partitioningFields, 1, p) == false) { continue; } LOG_DEBUG("probability = " << p.s_Probability); - if (*model->currentBucketCount(0, time) > 0) - { + if (*model->currentBucketCount(0, time) > 0) { probabilities.push_back(p.s_Probability); } time += bucketLength; @@ -780,8 +685,7 @@ void CEventRateModelTest::testOnlineProbabilityCalculationForHighNonZeroCount() CPPUNIT_ASSERT(probabilities[highNonZeroCountBucket] > 0.9); } -void CEventRateModelTest::testOnlineCorrelatedNoTrend() -{ +void CEventRateModelTest::testOnlineCorrelatedNoTrend() { LOG_DEBUG("*** testOnlineCorrelatedNoTrend ***"); // Check we find the correct correlated variables, and identify @@ -796,19 +700,12 @@ void CEventRateModelTest::testOnlineCorrelatedNoTrend() test::CRandomNumbers rng; const std::size_t b = 200; - const double means_[] = { 20.0, 25.0, 100.0, 800.0 }; - const double covariances_[][4] = - { - { 3.0, 2.5, 0.0, 0.0 }, - { 2.5, 4.0, 0.0, 0.0 }, - { 0.0, 0.0, 100.0, -500.0 }, - { 0.0, 0.0, -500.0, 3000.0 } - }; + const double means_[] = {20.0, 25.0, 100.0, 800.0}; + const double covariances_[][4] = {{3.0, 2.5, 0.0, 0.0}, {2.5, 4.0, 0.0, 0.0}, {0.0, 0.0, 100.0, -500.0}, {0.0, 0.0, -500.0, 3000.0}}; TDoubleVec means(&means_[0], &means_[4]); TDoubleVecVec covariances; - for (std::size_t i = 0u; i < 4; ++i) - { + for (std::size_t i = 0u; i < 4; ++i) { covariances.push_back(TDoubleVec(&covariances_[i][0], &covariances_[i][4])); } TDoubleVecVec samples; @@ -826,61 +723,41 @@ void CEventRateModelTest::testOnlineCorrelatedNoTrend() CModelFactory::TDataGathererPtr gatherer; CAnomalyDetectorModel::TModelPtr model_; makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, gatherer, model_, 4); - CEventRateModel *model = dynamic_cast(model_.get()); + CEventRateModel* model = dynamic_cast(model_.get()); CPPUNIT_ASSERT(model); - LOG_DEBUG("Test correlation anomalies"); - std::size_t anomalyBuckets[] = { 100, 160, 190, b }; - double anomalies[][4] = - { - { -5.73, 4.29, 0.0, 0.0 }, - { 0.0, 0.0, 89.99, 15.38 }, - { -7.73, 5.59, 52.99, 9.03 } - }; - - TMinAccumulator probabilities[4] = - { - TMinAccumulator(2), - TMinAccumulator(2), - TMinAccumulator(2), - TMinAccumulator(2) - }; + std::size_t anomalyBuckets[] = {100, 160, 190, b}; + double anomalies[][4] = {{-5.73, 4.29, 0.0, 0.0}, {0.0, 0.0, 89.99, 15.38}, {-7.73, 5.59, 52.99, 9.03}}; + + TMinAccumulator probabilities[4] = {TMinAccumulator(2), TMinAccumulator(2), TMinAccumulator(2), TMinAccumulator(2)}; core_t::TTime time = startTime; - for (std::size_t i = 0u, anomaly = 0u; i < b; ++i) - { - for (std::size_t j = 0u; j < samples[i].size(); ++j) - { - std::string person = std::string("p") + core::CStringUtils::typeToString(j+1); + for (std::size_t i = 0u, anomaly = 0u; i < b; ++i) { + for (std::size_t j = 0u; j < samples[i].size(); ++j) { + std::string person = std::string("p") + core::CStringUtils::typeToString(j + 1); double n = samples[i][j]; - if (i == anomalyBuckets[anomaly]) - { + if (i == anomalyBuckets[anomaly]) { n += anomalies[anomaly][j]; } - for (std::size_t k = 0u; k < static_cast(n); ++k) - { + for (std::size_t k = 0u; k < static_cast(n); ++k) { addArrival(*gatherer, m_ResourceMonitor, time + static_cast(j), person); } } - if (i == anomalyBuckets[anomaly]) - { + if (i == anomalyBuckets[anomaly]) { ++anomaly; } model->sample(time, time + bucketLength, m_ResourceMonitor); - for (std::size_t pid = 0u; pid < samples[i].size(); ++pid) - { + for (std::size_t pid = 0u; pid < samples[i].size(); ++pid) { SAnnotatedProbability p; CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); - CPPUNIT_ASSERT(model->computeProbability(pid, time, time + bucketLength, - partitioningFields, 1, p)); + CPPUNIT_ASSERT(model->computeProbability(pid, time, time + bucketLength, partitioningFields, 1, p)); std::string correlated; - if ( p.s_AttributeProbabilities[0].s_CorrelatedAttributes.size() > 0 - && p.s_AttributeProbabilities[0].s_CorrelatedAttributes[0] != 0 - && !p.s_AttributeProbabilities[0].s_Type.isUnconditional()) - { + if (p.s_AttributeProbabilities[0].s_CorrelatedAttributes.size() > 0 && + p.s_AttributeProbabilities[0].s_CorrelatedAttributes[0] != 0 && + !p.s_AttributeProbabilities[0].s_Type.isUnconditional()) { correlated = *p.s_AttributeProbabilities[0].s_CorrelatedAttributes[0]; } probabilities[pid].add(TDoubleSizeStrTr(p.s_Probability, i, correlated)); @@ -888,21 +765,12 @@ void CEventRateModelTest::testOnlineCorrelatedNoTrend() time += bucketLength; } - std::string expected[] = - { - "[(100,p2), (190,p2)]", - "[(100,p1), (190,p1)]", - "[(160,p4), (190,p4)]", - "[(160,p3), (190,p3)]" - }; - for (std::size_t i = 0u; i < boost::size(probabilities); ++i) - { + std::string expected[] = {"[(100,p2), (190,p2)]", "[(100,p1), (190,p1)]", "[(160,p4), (190,p4)]", "[(160,p3), (190,p3)]"}; + for (std::size_t i = 0u; i < boost::size(probabilities); ++i) { std::string actual[2]; - for (std::size_t j = 0u; j < 2; ++j) - { - actual[j] = std::string("(") - + core::CStringUtils::typeToString(probabilities[i][j].second) - + "," + probabilities[i][j].third + ")"; + for (std::size_t j = 0u; j < 2; ++j) { + actual[j] = + std::string("(") + core::CStringUtils::typeToString(probabilities[i][j].second) + "," + probabilities[i][j].third + ")"; } std::sort(actual, actual + 2); CPPUNIT_ASSERT_EQUAL(expected[i], core::CContainerPrinter::print(actual)); @@ -949,60 +817,40 @@ void CEventRateModelTest::testOnlineCorrelatedNoTrend() CModelFactory::TDataGathererPtr gatherer; CAnomalyDetectorModel::TModelPtr model_; makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, gatherer, model_, 4); - CEventRateModel *model = dynamic_cast(model_.get()); + CEventRateModel* model = dynamic_cast(model_.get()); CPPUNIT_ASSERT(model); + std::size_t anomalyBuckets[] = {100, 160, 190, b}; + double anomalies[][4] = {{11.07, 14.19, 0.0, 0.0}, {0.0, 0.0, -66.9, 399.95}, {11.07, 14.19, -48.15, 329.95}}; - std::size_t anomalyBuckets[] = { 100, 160, 190, b }; - double anomalies[][4] = - { - { 11.07, 14.19, 0.0, 0.0 }, - { 0.0, 0.0, -66.9, 399.95 }, - { 11.07, 14.19, -48.15, 329.95 } - }; - - TMinAccumulator probabilities[4] = - { - TMinAccumulator(2), - TMinAccumulator(2), - TMinAccumulator(2), - TMinAccumulator(2) - }; + TMinAccumulator probabilities[4] = {TMinAccumulator(2), TMinAccumulator(2), TMinAccumulator(2), TMinAccumulator(2)}; core_t::TTime time = startTime; - for (std::size_t i = 0u, anomaly = 0u; i < b; ++i) - { - for (std::size_t j = 0u; j < samples[i].size(); ++j) - { - std::string person = std::string("p") + core::CStringUtils::typeToString(j+1); + for (std::size_t i = 0u, anomaly = 0u; i < b; ++i) { + for (std::size_t j = 0u; j < samples[i].size(); ++j) { + std::string person = std::string("p") + core::CStringUtils::typeToString(j + 1); double n = samples[i][j]; - if (i == anomalyBuckets[anomaly]) - { + if (i == anomalyBuckets[anomaly]) { n += anomalies[anomaly][j]; } n = std::max(n, 0.0); - for (std::size_t k = 0u; k < static_cast(n); ++k) - { + for (std::size_t k = 0u; k < static_cast(n); ++k) { addArrival(*gatherer, m_ResourceMonitor, time + static_cast(j), person); } } - if (i == anomalyBuckets[anomaly]) - { + if (i == anomalyBuckets[anomaly]) { ++anomaly; } model->sample(time, time + bucketLength, m_ResourceMonitor); - for (std::size_t pid = 0u; pid < samples[i].size(); ++pid) - { + for (std::size_t pid = 0u; pid < samples[i].size(); ++pid) { SAnnotatedProbability p; CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); - CPPUNIT_ASSERT(model->computeProbability(pid, time, time + bucketLength, - partitioningFields, 1, p)); + CPPUNIT_ASSERT(model->computeProbability(pid, time, time + bucketLength, partitioningFields, 1, p)); std::string correlated; - if ( p.s_AttributeProbabilities[0].s_CorrelatedAttributes.size() > 0 - && p.s_AttributeProbabilities[0].s_CorrelatedAttributes[0] != 0 - && !p.s_AttributeProbabilities[0].s_Type.isUnconditional()) - { + if (p.s_AttributeProbabilities[0].s_CorrelatedAttributes.size() > 0 && + p.s_AttributeProbabilities[0].s_CorrelatedAttributes[0] != 0 && + !p.s_AttributeProbabilities[0].s_Type.isUnconditional()) { correlated = *p.s_AttributeProbabilities[0].s_CorrelatedAttributes[0]; } probabilities[pid].add(TDoubleSizeStrTr(p.s_Probability, i, correlated)); @@ -1010,21 +858,12 @@ void CEventRateModelTest::testOnlineCorrelatedNoTrend() time += bucketLength; } - std::string expected[] = - { - "[(100,), (190,)]", - "[(100,), (190,)]", - "[(160,), (190,)]", - "[(160,), (190,)]" - }; - for (std::size_t i = 0u; i < boost::size(probabilities); ++i) - { + std::string expected[] = {"[(100,), (190,)]", "[(100,), (190,)]", "[(160,), (190,)]", "[(160,), (190,)]"}; + for (std::size_t i = 0u; i < boost::size(probabilities); ++i) { std::string actual[2]; - for (std::size_t j = 0u; j < 2; ++j) - { - actual[j] = std::string("(") - + core::CStringUtils::typeToString(probabilities[i][j].second) - + "," + probabilities[i][j].third + ")"; + for (std::size_t j = 0u; j < 2; ++j) { + actual[j] = + std::string("(") + core::CStringUtils::typeToString(probabilities[i][j].second) + "," + probabilities[i][j].third + ")"; } std::sort(actual, actual + 2); CPPUNIT_ASSERT_EQUAL(expected[i], core::CContainerPrinter::print(actual)); @@ -1032,8 +871,7 @@ void CEventRateModelTest::testOnlineCorrelatedNoTrend() } } -void CEventRateModelTest::testOnlineCorrelatedTrend() -{ +void CEventRateModelTest::testOnlineCorrelatedTrend() { LOG_DEBUG("*** testOnlineCorrelatedTrend ***"); // FIXME @@ -1052,49 +890,28 @@ void CEventRateModelTest::testOnlineCorrelatedTrend() rng.discard(200000); const std::size_t b = 2880; - const double means_[] = { 20.0, 25.0, 50.0, 100.0 }; - const double covariances_[][4] = - { - { 30.0, 20.0, 0.0, 0.0 }, - { 20.0, 40.0, 0.0, 0.0 }, - { 0.0, 0.0, 60.0, -50.0 }, - { 0.0, 0.0, -50.0, 60.0 } - }; - double trends[][24] = - { - { 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 4.0, 10.0, 11.0, 10.0, 8.0, 8.0, - 7.0, 9.0, 12.0, 4.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0 }, - { 0.0, 0.0, 0.0, 2.0, 2.0, 4.0, 8.0, 15.0, 18.0, 14.0, 12.0, 12.0, - 11.0, 10.0, 16.0, 7.0, 4.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0 }, - { 4.0, 3.0, 5.0, 20.0, 20.0, 40.0, 80.0, 150.0, 180.0, 140.0, 120.0, 120.0, - 110.0, 100.0, 160.0, 70.0, 40.0, 20.0, 10.0, 3.0, 5.0, 2.0, 1.0, 3.0 }, - { 0.0, 0.0, 0.0, 20.0, 20.0, 40.0, 80.0, 150.0, 180.0, 140.0, 120.0, 120.0, - 110.0, 100.0, 160.0, 70.0, 40.0, 40.0, 30.0, 20.0, 10.0, 0.0, 0.0, 0.0 }, - }; + const double means_[] = {20.0, 25.0, 50.0, 100.0}; + const double covariances_[][4] = {{30.0, 20.0, 0.0, 0.0}, {20.0, 40.0, 0.0, 0.0}, {0.0, 0.0, 60.0, -50.0}, {0.0, 0.0, -50.0, 60.0}}; + double trends[][24] = { + {0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 4.0, 10.0, 11.0, 10.0, 8.0, 8.0, 7.0, 9.0, 12.0, 4.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0}, + {0.0, 0.0, 0.0, 2.0, 2.0, 4.0, 8.0, 15.0, 18.0, 14.0, 12.0, 12.0, 11.0, 10.0, 16.0, 7.0, 4.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0}, + {4.0, 3.0, 5.0, 20.0, 20.0, 40.0, 80.0, 150.0, 180.0, 140.0, 120.0, 120.0, + 110.0, 100.0, 160.0, 70.0, 40.0, 20.0, 10.0, 3.0, 5.0, 2.0, 1.0, 3.0}, + {0.0, 0.0, 0.0, 20.0, 20.0, 40.0, 80.0, 150.0, 180.0, 140.0, 120.0, 120.0, + 110.0, 100.0, 160.0, 70.0, 40.0, 40.0, 30.0, 20.0, 10.0, 0.0, 0.0, 0.0}, + }; TDoubleVec means(&means_[0], &means_[4]); TDoubleVecVec covariances; - for (std::size_t i = 0u; i < 4; ++i) - { + for (std::size_t i = 0u; i < 4; ++i) { covariances.push_back(TDoubleVec(&covariances_[i][0], &covariances_[i][4])); } TDoubleVecVec samples; rng.generateMultivariateNormalSamples(means, covariances, b, samples); - std::size_t anomalyBuckets[] = { 1950, 2400, 2700, b }; - double anomalies[][4] = - { - { -23.9, 19.7, 0.0, 0.0 }, - { 0.0, 0.0, 36.4, 36.4 }, - { -28.7, 30.4, 36.4, 36.4 } - }; - TMinAccumulator probabilities[4] = - { - TMinAccumulator(2), - TMinAccumulator(2), - TMinAccumulator(2), - TMinAccumulator(2) - }; + std::size_t anomalyBuckets[] = {1950, 2400, 2700, b}; + double anomalies[][4] = {{-23.9, 19.7, 0.0, 0.0}, {0.0, 0.0, 36.4, 36.4}, {-28.7, 30.4, 36.4, 36.4}}; + TMinAccumulator probabilities[4] = {TMinAccumulator(2), TMinAccumulator(2), TMinAccumulator(2), TMinAccumulator(2)}; SModelParams params(bucketLength); params.s_DecayRate = 0.0002; @@ -1107,50 +924,41 @@ void CEventRateModelTest::testOnlineCorrelatedTrend() CModelFactory::TDataGathererPtr gatherer; CAnomalyDetectorModel::TModelPtr model_; makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, gatherer, model_, 4); - CEventRateModel *model = dynamic_cast(model_.get()); + CEventRateModel* model = dynamic_cast(model_.get()); CPPUNIT_ASSERT(model); core_t::TTime time = startTime; - for (std::size_t i = 0u, anomaly = 0u; i < b; ++i) - { + for (std::size_t i = 0u, anomaly = 0u; i < b; ++i) { LOG_DEBUG(i << ") processing bucket [" << time << ", " << time + bucketLength << ")"); std::size_t hour1 = static_cast((time / 3600) % 24); std::size_t hour2 = (hour1 + 1) % 24; double dt = static_cast(time % 3600) / 3600.0; - for (std::size_t j = 0u; j < samples[i].size(); ++j) - { - std::string person = std::string("p") + core::CStringUtils::typeToString(j+1); + for (std::size_t j = 0u; j < samples[i].size(); ++j) { + std::string person = std::string("p") + core::CStringUtils::typeToString(j + 1); double n = (1.0 - dt) * trends[j][hour1] + dt * trends[j][hour2] + samples[i][j]; - if (i == anomalyBuckets[anomaly]) - { + if (i == anomalyBuckets[anomaly]) { n += anomalies[anomaly][j]; } n = std::max(n / 3.0, 0.0); - for (std::size_t k = 0u; k < static_cast(n); ++k) - { + for (std::size_t k = 0u; k < static_cast(n); ++k) { addArrival(*gatherer, m_ResourceMonitor, time + static_cast(j), person); } } - if (i == anomalyBuckets[anomaly]) - { + if (i == anomalyBuckets[anomaly]) { ++anomaly; } model->sample(time, time + bucketLength, m_ResourceMonitor); - for (std::size_t pid = 0u; pid < samples[i].size(); ++pid) - { + for (std::size_t pid = 0u; pid < samples[i].size(); ++pid) { SAnnotatedProbability p; CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); - CPPUNIT_ASSERT(model->computeProbability(pid, time, time + bucketLength, - partitioningFields, 1, p)); + CPPUNIT_ASSERT(model->computeProbability(pid, time, time + bucketLength, partitioningFields, 1, p)); std::string correlated; - if ( p.s_AttributeProbabilities[0].s_CorrelatedAttributes.size() > 0 - && p.s_AttributeProbabilities[0].s_CorrelatedAttributes[0] != 0 - && !p.s_AttributeProbabilities[0].s_Type.isUnconditional()) - { + if (p.s_AttributeProbabilities[0].s_CorrelatedAttributes.size() > 0 && + p.s_AttributeProbabilities[0].s_CorrelatedAttributes[0] != 0 && !p.s_AttributeProbabilities[0].s_Type.isUnconditional()) { correlated = *p.s_AttributeProbabilities[0].s_CorrelatedAttributes[0]; } probabilities[pid].add(TDoubleSizeStrTr(p.s_Probability, i, correlated)); @@ -1158,30 +966,20 @@ void CEventRateModelTest::testOnlineCorrelatedTrend() time += bucketLength; } - std::string expected[] = - { - "[(1950,p2), (2700,p2)]", - "[(1950,p1), (2700,p1)]", - "[(2400,p4), (2700,p4)]", - "[(2400,p3), (2700,p3)]" - }; - for (std::size_t i = 0u; i < boost::size(probabilities); ++i) - { + std::string expected[] = {"[(1950,p2), (2700,p2)]", "[(1950,p1), (2700,p1)]", "[(2400,p4), (2700,p4)]", "[(2400,p3), (2700,p3)]"}; + for (std::size_t i = 0u; i < boost::size(probabilities); ++i) { LOG_DEBUG(probabilities[i].print()); std::string actual[2]; - for (std::size_t j = 0u; j < 2; ++j) - { - actual[j] = std::string("(") - + core::CStringUtils::typeToString(probabilities[i][j].second) - + "," + probabilities[i][j].third + ")"; + for (std::size_t j = 0u; j < 2; ++j) { + actual[j] = + std::string("(") + core::CStringUtils::typeToString(probabilities[i][j].second) + "," + probabilities[i][j].third + ")"; } std::sort(actual, actual + 2); CPPUNIT_ASSERT_EQUAL(expected[i], core::CContainerPrinter::print(actual)); } } -void CEventRateModelTest::testPrune() -{ +void CEventRateModelTest::testPrune() { LOG_DEBUG("*** testPrune ***"); using TUInt64VecVec = std::vector; @@ -1191,15 +989,8 @@ void CEventRateModelTest::testPrune() const core_t::TTime startTime = 1346968800; const core_t::TTime bucketLength = 3600; - const std::string people[] = - { - std::string("p1"), - std::string("p2"), - std::string("p3"), - std::string("p4"), - std::string("p5"), - std::string("p6") - }; + const std::string people[] = { + std::string("p1"), std::string("p2"), std::string("p3"), std::string("p4"), std::string("p5"), std::string("p6")}; TUInt64VecVec eventCounts; eventCounts.push_back(TUInt64Vec(1000u, 0)); @@ -1231,58 +1022,46 @@ void CEventRateModelTest::testPrune() CModelFactory::TDataGathererPtr gatherer; CAnomalyDetectorModel::TModelPtr model_; makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, gatherer, model_, 0); - CEventRateModel *model = dynamic_cast(model_.get()); + CEventRateModel* model = dynamic_cast(model_.get()); CPPUNIT_ASSERT(model); CModelFactory::TDataGathererPtr expectedGatherer; CAnomalyDetectorModel::TModelPtr expectedModel_; makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, expectedGatherer, expectedModel_, 0); - CEventRateModel *expectedModel = dynamic_cast(expectedModel_.get()); + CEventRateModel* expectedModel = dynamic_cast(expectedModel_.get()); CPPUNIT_ASSERT(expectedModel); - TEventDataVec events; - for (std::size_t i = 0u; i < eventCounts.size(); ++i) - { + for (std::size_t i = 0u; i < eventCounts.size(); ++i) { TTimeVec eventTimes; generateEvents(startTime, bucketLength, eventCounts[i], eventTimes); - if (eventTimes.size() > 0) - { + if (eventTimes.size() > 0) { std::sort(eventTimes.begin(), eventTimes.end()); std::size_t pid = addPerson(people[i], gatherer, m_ResourceMonitor); - for (auto time : eventTimes) - { + for (auto time : eventTimes) { events.push_back(makeEventData(time, pid)); } } } - std::sort(events.begin(), events.end(), - [](const CEventData &lhs, - const CEventData &rhs) { return lhs.time() < rhs.time(); }); + std::sort(events.begin(), events.end(), [](const CEventData& lhs, const CEventData& rhs) { return lhs.time() < rhs.time(); }); TEventDataVec expectedEvents; expectedEvents.reserve(events.size()); TSizeSizeMap mapping; - for (auto person : expectedPeople) - { + for (auto person : expectedPeople) { mapping[person] = addPerson(people[person], expectedGatherer, m_ResourceMonitor); } - for (const auto &event : events) - { - if (std::binary_search(expectedPeople.begin(), expectedPeople.end(), event.personId())) - { + for (const auto& event : events) { + if (std::binary_search(expectedPeople.begin(), expectedPeople.end(), event.personId())) { expectedEvents.push_back(makeEventData(event.time(), mapping[*event.personId()])); } } - for (auto person : expectedPeople) - { + for (auto person : expectedPeople) { addPerson(people[person], expectedGatherer, m_ResourceMonitor); } core_t::TTime bucketStart = startTime; - for (const auto &event : events) - { - while (event.time() >= bucketStart + bucketLength) - { + for (const auto& event : events) { + while (event.time() >= bucketStart + bucketLength) { model->sample(bucketStart, bucketStart + bucketLength, m_ResourceMonitor); bucketStart += bucketLength; } @@ -1295,10 +1074,8 @@ void CEventRateModelTest::testPrune() CPPUNIT_ASSERT_EQUAL(maxDimensionBeforePrune, maxDimensionAfterPrune); bucketStart = maths::CIntegerTools::floor(expectedEvents[0].time(), bucketLength); - for (const auto &event : expectedEvents) - { - while (event.time() >= bucketStart + bucketLength) - { + for (const auto& event : expectedEvents) { + while (event.time() >= bucketStart + bucketLength) { expectedModel->sample(bucketStart, bucketStart + bucketLength, m_ResourceMonitor); bucketStart += bucketLength; } @@ -1314,8 +1091,7 @@ void CEventRateModelTest::testPrune() bucketStart = gatherer->currentBucketStartTime() + bucketLength; TStrVec newPeople{"p7", "p8", "p9"}; - for (const auto &person : newPeople) - { + for (const auto& person : newPeople) { std::size_t newPid = addPerson(person, gatherer, m_ResourceMonitor); CPPUNIT_ASSERT(newPid < 6); std::size_t expectedNewPid = addPerson(person, expectedGatherer, m_ResourceMonitor); @@ -1340,41 +1116,26 @@ void CEventRateModelTest::testPrune() CPPUNIT_ASSERT_EQUAL(numberOfPeopleBeforePrune, clonedModel->dataGatherer().numberActivePeople()); } -void CEventRateModelTest::testKey() -{ - function_t::EFunction countFunctions[] = - { - function_t::E_IndividualCount, - function_t::E_IndividualNonZeroCount, - function_t::E_IndividualRareCount, - function_t::E_IndividualRareNonZeroCount, - function_t::E_IndividualRare, - function_t::E_IndividualLowCounts, - function_t::E_IndividualHighCounts - }; - bool useNull[] = { true, false }; - std::string byField[] = { "", "by" }; - std::string partitionField[] = { "", "partition" }; +void CEventRateModelTest::testKey() { + function_t::EFunction countFunctions[] = {function_t::E_IndividualCount, + function_t::E_IndividualNonZeroCount, + function_t::E_IndividualRareCount, + function_t::E_IndividualRareNonZeroCount, + function_t::E_IndividualRare, + function_t::E_IndividualLowCounts, + function_t::E_IndividualHighCounts}; + bool useNull[] = {true, false}; + std::string byField[] = {"", "by"}; + std::string partitionField[] = {"", "partition"}; CAnomalyDetectorModelConfig config = CAnomalyDetectorModelConfig::defaultConfig(); int identifier = 0; - for (std::size_t i = 0u; i < boost::size(countFunctions); ++i) - { - for (std::size_t j = 0u; j < boost::size(useNull); ++j) - { - for (std::size_t k = 0u; k < boost::size(byField); ++k) - { - for (std::size_t l = 0u; l < boost::size(partitionField); ++l) - { - CSearchKey key(++identifier, - countFunctions[i], - useNull[j], - model_t::E_XF_None, - "", - byField[k], - "", - partitionField[l]); + for (std::size_t i = 0u; i < boost::size(countFunctions); ++i) { + for (std::size_t j = 0u; j < boost::size(useNull); ++j) { + for (std::size_t k = 0u; k < boost::size(byField); ++k) { + for (std::size_t l = 0u; l < boost::size(partitionField); ++l) { + CSearchKey key(++identifier, countFunctions[i], useNull[j], model_t::E_XF_None, "", byField[k], "", partitionField[l]); CAnomalyDetectorModelConfig::TModelFactoryCPtr factory = config.factory(key); @@ -1387,8 +1148,7 @@ void CEventRateModelTest::testKey() } } -void CEventRateModelTest::testModelsWithValueFields() -{ +void CEventRateModelTest::testModelsWithValueFields() { // Check that attributeConditional features are correctly // marked as such: // Create some models with attribute conditional data and @@ -1406,19 +1166,16 @@ void CEventRateModelTest::testModelsWithValueFields() strings.push_back("c2"); TSizeVecVecVec fieldsPerBucket; - for (std::size_t i = 0; i < numberBuckets; i++) - { + for (std::size_t i = 0; i < numberBuckets; i++) { TSizeVecVec fields; std::size_t attribute1Strings = 10; std::size_t attribute2Strings = 10; - if (i == anomalousBucket) - { + if (i == anomalousBucket) { attribute1Strings = 5; attribute2Strings = 15; } - for (std::size_t j = 0; j < std::max(attribute1Strings, attribute2Strings); j++) - { + for (std::size_t j = 0; j < std::max(attribute1Strings, attribute2Strings); j++) { std::ostringstream ss1; std::ostringstream ss2; ss1 << "one_plus_" << i << "_" << j; @@ -1426,8 +1183,7 @@ void CEventRateModelTest::testModelsWithValueFields() strings.push_back(ss1.str()); strings.push_back(ss2.str()); - if (j < attribute1Strings) - { + if (j < attribute1Strings) { TSizeVec f; f.push_back(0); f.push_back(1); @@ -1435,8 +1191,7 @@ void CEventRateModelTest::testModelsWithValueFields() fields.push_back(f); } - if (j < attribute2Strings) - { + if (j < attribute2Strings) { TSizeVec f; f.push_back(0); f.push_back(2); @@ -1446,10 +1201,7 @@ void CEventRateModelTest::testModelsWithValueFields() } fieldsPerBucket.push_back(fields); } - testModelWithValueField(model_t::E_PopulationUniqueCountByBucketPersonAndAttribute, - fieldsPerBucket, - strings, - m_ResourceMonitor); + testModelWithValueField(model_t::E_PopulationUniqueCountByBucketPersonAndAttribute, fieldsPerBucket, strings, m_ResourceMonitor); } { // Check E_PopulationInfoContentByBucketPersonAndAttribute @@ -1467,14 +1219,12 @@ void CEventRateModelTest::testModelsWithValueFields() TSizeVecVecVec fieldsPerBucket; - for (std::size_t i = 0; i < numberBuckets; i++) - { + for (std::size_t i = 0; i < numberBuckets; i++) { TSizeVecVec fields; TSizeVec fb; - if (i == anomalousBucket) - { + if (i == anomalousBucket) { // Load "c1" with "a" and "b" fields.push_back(TSizeVec()); fields.back().push_back(0); @@ -1494,9 +1244,7 @@ void CEventRateModelTest::testModelsWithValueFields() fields.back().push_back(0); fields.back().push_back(2); fields.back().push_back(4); - } - else - { + } else { // Load "c1" and "c2" with similarly random strings fields.push_back(TSizeVec()); fields.back().push_back(0); @@ -1519,15 +1267,11 @@ void CEventRateModelTest::testModelsWithValueFields() fieldsPerBucket.push_back(fields); } - testModelWithValueField(model_t::E_PopulationInfoContentByBucketPersonAndAttribute, - fieldsPerBucket, - strings, - m_ResourceMonitor); + testModelWithValueField(model_t::E_PopulationInfoContentByBucketPersonAndAttribute, fieldsPerBucket, strings, m_ResourceMonitor); } } -void CEventRateModelTest::testCountProbabilityCalculationWithInfluence() -{ +void CEventRateModelTest::testCountProbabilityCalculationWithInfluence() { LOG_DEBUG("*** testCountProbabilityCalculationWithInfluence ***"); const core_t::TTime startTime = 1346968800; @@ -1548,7 +1292,7 @@ void CEventRateModelTest::testCountProbabilityCalculationWithInfluence() CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPersonWithInfluence("p", gatherer, m_ResourceMonitor, 1)); CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr modelHolder(factory.makeModel(modelInitData)); - CEventRateModel *model = dynamic_cast(modelHolder.get()); + CEventRateModel* model = dynamic_cast(modelHolder.get()); CPPUNIT_ASSERT(model); // Generate some events. @@ -1557,21 +1301,15 @@ void CEventRateModelTest::testCountProbabilityCalculationWithInfluence() expectedEventCounts.back() *= 3; generateEvents(startTime, bucketLength, expectedEventCounts, eventTimes); core_t::TTime endTime = (eventTimes.back() / bucketLength + 1) * bucketLength; - LOG_DEBUG("startTime = " << startTime - << ", endTime = " << endTime - << ", # events = " << eventTimes.size()); + LOG_DEBUG("startTime = " << startTime << ", endTime = " << endTime << ", # events = " << eventTimes.size()); SAnnotatedProbability::TStoredStringPtrStoredStringPtrPrDoublePrVec lastInfluencersResult; std::size_t i = 0u, j = 0u; - for (core_t::TTime bucketStartTime = startTime; - bucketStartTime < endTime; - bucketStartTime += bucketLength, ++j) - { + for (core_t::TTime bucketStartTime = startTime; bucketStartTime < endTime; bucketStartTime += bucketLength, ++j) { core_t::TTime bucketEndTime = bucketStartTime + bucketLength; double count = 0.0; - for (; i < eventTimes.size() && eventTimes[i] < bucketEndTime; ++i) - { + for (; i < eventTimes.size() && eventTimes[i] < bucketEndTime; ++i) { addArrival(*gatherer, m_ResourceMonitor, eventTimes[i], "p", TOptionalStr("inf1")); count += 1.0; } @@ -1582,16 +1320,15 @@ void CEventRateModelTest::testCountProbabilityCalculationWithInfluence() SAnnotatedProbability annotatedProbability; CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); - CPPUNIT_ASSERT(model->computeProbability(0/*pid*/, bucketStartTime, bucketEndTime, - partitioningFields, 1, annotatedProbability)); + CPPUNIT_ASSERT( + model->computeProbability(0 /*pid*/, bucketStartTime, bucketEndTime, partitioningFields, 1, annotatedProbability)); LOG_DEBUG("probability = " << annotatedProbability.s_Probability); LOG_DEBUG("influencers = " << core::CContainerPrinter::print(annotatedProbability.s_Influences)); CPPUNIT_ASSERT(annotatedProbability.s_Probability); lastInfluencersResult = annotatedProbability.s_Influences; } // All the influence should be assigned to our one influencer - CPPUNIT_ASSERT_EQUAL(std::string("[((IF1, inf1), 1)]"), - core::CContainerPrinter::print(lastInfluencersResult)); + CPPUNIT_ASSERT_EQUAL(std::string("[((IF1, inf1), 1)]"), core::CContainerPrinter::print(lastInfluencersResult)); } { // Test single influence name, two influence values @@ -1608,7 +1345,7 @@ void CEventRateModelTest::testCountProbabilityCalculationWithInfluence() CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPersonWithInfluence("p", gatherer, m_ResourceMonitor, 1)); CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr modelHolder(factory.makeModel(modelInitData)); - CEventRateModel *model = dynamic_cast(modelHolder.get()); + CEventRateModel* model = dynamic_cast(modelHolder.get()); CPPUNIT_ASSERT(model); // Generate some events. @@ -1617,21 +1354,15 @@ void CEventRateModelTest::testCountProbabilityCalculationWithInfluence() expectedEventCounts.back() *= 3; generateEvents(startTime, bucketLength, expectedEventCounts, eventTimes); core_t::TTime endTime = (eventTimes.back() / bucketLength + 1) * bucketLength; - LOG_DEBUG("startTime = " << startTime - << ", endTime = " << endTime - << ", # events = " << eventTimes.size()); + LOG_DEBUG("startTime = " << startTime << ", endTime = " << endTime << ", # events = " << eventTimes.size()); SAnnotatedProbability::TStoredStringPtrStoredStringPtrPrDoublePrVec lastInfluencersResult; std::size_t i = 0u, j = 0u; - for (core_t::TTime bucketStartTime = startTime; - bucketStartTime < endTime; - bucketStartTime += bucketLength, ++j) - { + for (core_t::TTime bucketStartTime = startTime; bucketStartTime < endTime; bucketStartTime += bucketLength, ++j) { core_t::TTime bucketEndTime = bucketStartTime + bucketLength; double count = 0.0; - for (; i < eventTimes.size() && eventTimes[i] < bucketEndTime; ++i) - { + for (; i < eventTimes.size() && eventTimes[i] < bucketEndTime; ++i) { std::stringstream ss; ss << "inf" << (i % 2); const std::string inf(ss.str()); @@ -1645,8 +1376,8 @@ void CEventRateModelTest::testCountProbabilityCalculationWithInfluence() SAnnotatedProbability annotatedProbability; CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); - CPPUNIT_ASSERT(model->computeProbability(0/*pid*/, bucketStartTime, bucketEndTime, - partitioningFields, 1, annotatedProbability)); + CPPUNIT_ASSERT( + model->computeProbability(0 /*pid*/, bucketStartTime, bucketEndTime, partitioningFields, 1, annotatedProbability)); LOG_DEBUG("probability = " << annotatedProbability.s_Probability); LOG_DEBUG("influencers = " << core::CContainerPrinter::print(annotatedProbability.s_Influences)); CPPUNIT_ASSERT(annotatedProbability.s_Probability); @@ -1656,8 +1387,7 @@ void CEventRateModelTest::testCountProbabilityCalculationWithInfluence() // is about twice the regular count, each influencer contributes a lot to // the anomaly CPPUNIT_ASSERT_EQUAL(std::size_t(2), lastInfluencersResult.size()); - CPPUNIT_ASSERT_DOUBLES_EQUAL(lastInfluencersResult[0].second, - lastInfluencersResult[1].second, 0.05); + CPPUNIT_ASSERT_DOUBLES_EQUAL(lastInfluencersResult[0].second, lastInfluencersResult[1].second, 0.05); CPPUNIT_ASSERT(lastInfluencersResult[0].second > 0.8); } { @@ -1675,7 +1405,7 @@ void CEventRateModelTest::testCountProbabilityCalculationWithInfluence() CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPersonWithInfluence("p", gatherer, m_ResourceMonitor, 1)); CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr modelHolder(factory.makeModel(modelInitData)); - CEventRateModel *model = dynamic_cast(modelHolder.get()); + CEventRateModel* model = dynamic_cast(modelHolder.get()); CPPUNIT_ASSERT(model); // Generate some events. @@ -1684,21 +1414,15 @@ void CEventRateModelTest::testCountProbabilityCalculationWithInfluence() expectedEventCounts.back() *= 6; generateEvents(startTime, bucketLength, expectedEventCounts, eventTimes); core_t::TTime endTime = (eventTimes.back() / bucketLength + 1) * bucketLength; - LOG_DEBUG("startTime = " << startTime - << ", endTime = " << endTime - << ", # events = " << eventTimes.size()); + LOG_DEBUG("startTime = " << startTime << ", endTime = " << endTime << ", # events = " << eventTimes.size()); SAnnotatedProbability::TStoredStringPtrStoredStringPtrPrDoublePrVec lastInfluencersResult; std::size_t i = 0u, j = 0u; - for (core_t::TTime bucketStartTime = startTime; - bucketStartTime < endTime; - bucketStartTime += bucketLength, ++j) - { + for (core_t::TTime bucketStartTime = startTime; bucketStartTime < endTime; bucketStartTime += bucketLength, ++j) { core_t::TTime bucketEndTime = bucketStartTime + bucketLength; double count = 0.0; - for (; i < eventTimes.size() && eventTimes[i] < bucketEndTime; ++i) - { + for (; i < eventTimes.size() && eventTimes[i] < bucketEndTime; ++i) { std::stringstream ss; ss << "inf" << (i % 2); const std::string inf(ss.str()); @@ -1712,8 +1436,8 @@ void CEventRateModelTest::testCountProbabilityCalculationWithInfluence() SAnnotatedProbability annotatedProbability; CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); - CPPUNIT_ASSERT(model->computeProbability(0/*pid*/, bucketStartTime, bucketEndTime, - partitioningFields, 1, annotatedProbability)); + CPPUNIT_ASSERT( + model->computeProbability(0 /*pid*/, bucketStartTime, bucketEndTime, partitioningFields, 1, annotatedProbability)); LOG_DEBUG("probability = " << annotatedProbability.s_Probability); LOG_DEBUG("influencers = " << core::CContainerPrinter::print(annotatedProbability.s_Influences)); CPPUNIT_ASSERT(annotatedProbability.s_Probability); @@ -1724,8 +1448,7 @@ void CEventRateModelTest::testCountProbabilityCalculationWithInfluence() // the anomaly, but less than the previous test as each the results would // be anomalous even without the contribution from the influencer CPPUNIT_ASSERT_EQUAL(std::size_t(2), lastInfluencersResult.size()); - CPPUNIT_ASSERT_DOUBLES_EQUAL(lastInfluencersResult[0].second, - lastInfluencersResult[1].second, 0.05); + CPPUNIT_ASSERT_DOUBLES_EQUAL(lastInfluencersResult[0].second, lastInfluencersResult[1].second, 0.05); CPPUNIT_ASSERT(lastInfluencersResult[0].second > 0.5); CPPUNIT_ASSERT(lastInfluencersResult[0].second < 0.6); } @@ -1744,7 +1467,7 @@ void CEventRateModelTest::testCountProbabilityCalculationWithInfluence() CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPersonWithInfluence("p", gatherer, m_ResourceMonitor, 1)); CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr modelHolder(factory.makeModel(modelInitData)); - CEventRateModel *model = dynamic_cast(modelHolder.get()); + CEventRateModel* model = dynamic_cast(modelHolder.get()); CPPUNIT_ASSERT(model); // Generate some events. @@ -1753,25 +1476,18 @@ void CEventRateModelTest::testCountProbabilityCalculationWithInfluence() expectedEventCounts.back() *= 3; generateEvents(startTime, bucketLength, expectedEventCounts, eventTimes); core_t::TTime endTime = (eventTimes.back() / bucketLength + 1) * bucketLength; - LOG_DEBUG("startTime = " << startTime - << ", endTime = " << endTime - << ", # events = " << eventTimes.size()); + LOG_DEBUG("startTime = " << startTime << ", endTime = " << endTime << ", # events = " << eventTimes.size()); SAnnotatedProbability::TStoredStringPtrStoredStringPtrPrDoublePrVec lastInfluencersResult; std::size_t i = 0u, j = 0u; - for (core_t::TTime bucketStartTime = startTime; - bucketStartTime < endTime; - bucketStartTime += bucketLength, ++j) - { + for (core_t::TTime bucketStartTime = startTime; bucketStartTime < endTime; bucketStartTime += bucketLength, ++j) { core_t::TTime bucketEndTime = bucketStartTime + bucketLength; double count = 0.0; - for (; i < eventTimes.size() && eventTimes[i] < bucketEndTime; ++i) - { + for (; i < eventTimes.size() && eventTimes[i] < bucketEndTime; ++i) { std::stringstream ss; ss << "inf"; - if (i % 10 == 0) - { + if (i % 10 == 0) { ss << "_extra"; } const std::string inf(ss.str()); @@ -1785,8 +1501,8 @@ void CEventRateModelTest::testCountProbabilityCalculationWithInfluence() SAnnotatedProbability annotatedProbability; CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); - CPPUNIT_ASSERT(model->computeProbability(0/*pid*/, bucketStartTime, bucketEndTime, - partitioningFields, 1, annotatedProbability)); + CPPUNIT_ASSERT( + model->computeProbability(0 /*pid*/, bucketStartTime, bucketEndTime, partitioningFields, 1, annotatedProbability)); LOG_DEBUG("probability = " << annotatedProbability.s_Probability); LOG_DEBUG("influencers = " << core::CContainerPrinter::print(annotatedProbability.s_Influences)); CPPUNIT_ASSERT(annotatedProbability.s_Probability); @@ -1813,7 +1529,7 @@ void CEventRateModelTest::testCountProbabilityCalculationWithInfluence() CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPersonWithInfluence("p", gatherer, m_ResourceMonitor, 2)); CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr modelHolder(factory.makeModel(modelInitData)); - CEventRateModel *model = dynamic_cast(modelHolder.get()); + CEventRateModel* model = dynamic_cast(modelHolder.get()); CPPUNIT_ASSERT(model); // Generate some events. @@ -1822,25 +1538,18 @@ void CEventRateModelTest::testCountProbabilityCalculationWithInfluence() expectedEventCounts.back() *= 3; generateEvents(startTime, bucketLength, expectedEventCounts, eventTimes); core_t::TTime endTime = (eventTimes.back() / bucketLength + 1) * bucketLength; - LOG_DEBUG("startTime = " << startTime - << ", endTime = " << endTime - << ", # events = " << eventTimes.size()); + LOG_DEBUG("startTime = " << startTime << ", endTime = " << endTime << ", # events = " << eventTimes.size()); SAnnotatedProbability::TStoredStringPtrStoredStringPtrPrDoublePrVec lastInfluencersResult; std::size_t i = 0u, j = 0u; - for (core_t::TTime bucketStartTime = startTime; - bucketStartTime < endTime; - bucketStartTime += bucketLength, ++j) - { + for (core_t::TTime bucketStartTime = startTime; bucketStartTime < endTime; bucketStartTime += bucketLength, ++j) { core_t::TTime bucketEndTime = bucketStartTime + bucketLength; double count = 0.0; - for (; i < eventTimes.size() && eventTimes[i] < bucketEndTime; ++i) - { + for (; i < eventTimes.size() && eventTimes[i] < bucketEndTime; ++i) { std::stringstream ss; ss << "inf"; - if (i % 10 == 0) - { + if (i % 10 == 0) { ss << "_extra"; } const std::string inf1(ss.str()); @@ -1857,8 +1566,8 @@ void CEventRateModelTest::testCountProbabilityCalculationWithInfluence() SAnnotatedProbability annotatedProbability; CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); - CPPUNIT_ASSERT(model->computeProbability(0/*pid*/, bucketStartTime, bucketEndTime, - partitioningFields, 1, annotatedProbability)); + CPPUNIT_ASSERT( + model->computeProbability(0 /*pid*/, bucketStartTime, bucketEndTime, partitioningFields, 1, annotatedProbability)); LOG_DEBUG("probability = " << annotatedProbability.s_Probability); LOG_DEBUG("influencers = " << core::CContainerPrinter::print(annotatedProbability.s_Influences)); CPPUNIT_ASSERT(annotatedProbability.s_Probability); @@ -1877,8 +1586,7 @@ void CEventRateModelTest::testCountProbabilityCalculationWithInfluence() } } -void CEventRateModelTest::testDistinctCountProbabilityCalculationWithInfluence() -{ +void CEventRateModelTest::testDistinctCountProbabilityCalculationWithInfluence() { LOG_DEBUG("*** testCountProbabilityCalculationWithInfluence ***"); const core_t::TTime startTime = 1346968800; @@ -1899,7 +1607,7 @@ void CEventRateModelTest::testDistinctCountProbabilityCalculationWithInfluence() CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPersonWithInfluence("p", gatherer, m_ResourceMonitor, 1, TOptionalStr("v"))); CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr modelHolder(factory.makeModel(modelInitData)); - CEventRateModel *model = dynamic_cast(modelHolder.get()); + CEventRateModel* model = dynamic_cast(modelHolder.get()); CPPUNIT_ASSERT(model); const std::string uniqueValue("str_value"); @@ -1908,30 +1616,22 @@ void CEventRateModelTest::testDistinctCountProbabilityCalculationWithInfluence() TUInt64Vec expectedEventCounts = rawEventCounts(); generateEvents(startTime, bucketLength, expectedEventCounts, eventTimes); core_t::TTime endTime = (eventTimes.back() / bucketLength + 1) * bucketLength; - LOG_DEBUG("startTime = " << startTime - << ", endTime = " << endTime - << ", # events = " << eventTimes.size()); + LOG_DEBUG("startTime = " << startTime << ", endTime = " << endTime << ", # events = " << eventTimes.size()); SAnnotatedProbability::TStoredStringPtrStoredStringPtrPrDoublePrVec lastInfluencersResult; std::size_t i = 0u, j = 0u; - for (core_t::TTime bucketStartTime = startTime; - bucketStartTime < endTime; - bucketStartTime += bucketLength, ++j) - { + for (core_t::TTime bucketStartTime = startTime; bucketStartTime < endTime; bucketStartTime += bucketLength, ++j) { core_t::TTime bucketEndTime = bucketStartTime + bucketLength; double count = 0.0; - for (; i < eventTimes.size() && eventTimes[i] < bucketEndTime; ++i) - { + for (; i < eventTimes.size() && eventTimes[i] < bucketEndTime; ++i) { addArrival(*gatherer, m_ResourceMonitor, eventTimes[i], "p", TOptionalStr("inf1"), TOptionalStr(uniqueValue)); count += 1.0; } - if (i == eventTimes.size()) - { + if (i == eventTimes.size()) { // Generate anomaly LOG_DEBUG("Generating anomaly"); - for (std::size_t k = 0; k < 20; k++) - { + for (std::size_t k = 0; k < 20; k++) { std::stringstream ss; ss << uniqueValue << "_" << k; addArrival(*gatherer, m_ResourceMonitor, eventTimes[i - 1], "p", TOptionalStr("inf1"), TOptionalStr(ss.str())); @@ -1944,16 +1644,15 @@ void CEventRateModelTest::testDistinctCountProbabilityCalculationWithInfluence() SAnnotatedProbability annotatedProbability; CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); - CPPUNIT_ASSERT(model->computeProbability(0/*pid*/, bucketStartTime, bucketEndTime, - partitioningFields, 1, annotatedProbability)); + CPPUNIT_ASSERT( + model->computeProbability(0 /*pid*/, bucketStartTime, bucketEndTime, partitioningFields, 1, annotatedProbability)); LOG_DEBUG("probability = " << annotatedProbability.s_Probability); LOG_DEBUG("influencers = " << core::CContainerPrinter::print(annotatedProbability.s_Influences)); CPPUNIT_ASSERT(annotatedProbability.s_Probability); lastInfluencersResult = annotatedProbability.s_Influences; } // All the influence should be assigned to our one influencer - CPPUNIT_ASSERT_EQUAL(std::string("[((IF1, inf1), 1)]"), - core::CContainerPrinter::print(lastInfluencersResult)); + CPPUNIT_ASSERT_EQUAL(std::string("[((IF1, inf1), 1)]"), core::CContainerPrinter::print(lastInfluencersResult)); } { // Test single influence name, two influence values @@ -1970,7 +1669,7 @@ void CEventRateModelTest::testDistinctCountProbabilityCalculationWithInfluence() CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPersonWithInfluence("p", gatherer, m_ResourceMonitor, 1, TOptionalStr("v"))); CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr modelHolder(factory.makeModel(modelInitData)); - CEventRateModel *model = dynamic_cast(modelHolder.get()); + CEventRateModel* model = dynamic_cast(modelHolder.get()); CPPUNIT_ASSERT(model); const std::string uniqueValue("str_value"); @@ -1979,39 +1678,28 @@ void CEventRateModelTest::testDistinctCountProbabilityCalculationWithInfluence() TUInt64Vec expectedEventCounts = rawEventCounts(); generateEvents(startTime, bucketLength, expectedEventCounts, eventTimes); core_t::TTime endTime = (eventTimes.back() / bucketLength + 1) * bucketLength; - LOG_DEBUG("startTime = " << startTime - << ", endTime = " << endTime - << ", # events = " << eventTimes.size()); + LOG_DEBUG("startTime = " << startTime << ", endTime = " << endTime << ", # events = " << eventTimes.size()); SAnnotatedProbability::TStoredStringPtrStoredStringPtrPrDoublePrVec lastInfluencersResult; std::size_t i = 0u, j = 0u; - for (core_t::TTime bucketStartTime = startTime; - bucketStartTime < endTime; - bucketStartTime += bucketLength, ++j) - { + for (core_t::TTime bucketStartTime = startTime; bucketStartTime < endTime; bucketStartTime += bucketLength, ++j) { core_t::TTime bucketEndTime = bucketStartTime + bucketLength; double count = 0.0; - for (; i < eventTimes.size() && eventTimes[i] < bucketEndTime; ++i) - { + for (; i < eventTimes.size() && eventTimes[i] < bucketEndTime; ++i) { addArrival(*gatherer, m_ResourceMonitor, eventTimes[i], "p", TOptionalStr("inf1"), TOptionalStr(uniqueValue)); count += 1.0; } - if (i == eventTimes.size()) - { + if (i == eventTimes.size()) { // Generate anomaly LOG_DEBUG("Generating anomaly"); - for (std::size_t k = 1; k < 20; k++) - { + for (std::size_t k = 1; k < 20; k++) { std::stringstream ss; ss << uniqueValue << "_" << k; CEventData d = makeEventData(eventTimes[i - 1], 0, ss.str()); - if (k % 2 == 0) - { + if (k % 2 == 0) { addArrival(*gatherer, m_ResourceMonitor, eventTimes[i - 1], "p", TOptionalStr("inf1"), TOptionalStr(ss.str())); - } - else - { + } else { addArrival(*gatherer, m_ResourceMonitor, eventTimes[i - 1], "p", TOptionalStr("inf2"), TOptionalStr(ss.str())); } } @@ -2023,8 +1711,8 @@ void CEventRateModelTest::testDistinctCountProbabilityCalculationWithInfluence() SAnnotatedProbability annotatedProbability; CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); - CPPUNIT_ASSERT(model->computeProbability(0/*pid*/, bucketStartTime, bucketEndTime, - partitioningFields, 1, annotatedProbability)); + CPPUNIT_ASSERT( + model->computeProbability(0 /*pid*/, bucketStartTime, bucketEndTime, partitioningFields, 1, annotatedProbability)); LOG_DEBUG("probability = " << annotatedProbability.s_Probability); LOG_DEBUG("influencers = " << core::CContainerPrinter::print(annotatedProbability.s_Influences)); CPPUNIT_ASSERT(annotatedProbability.s_Probability); @@ -2034,8 +1722,7 @@ void CEventRateModelTest::testDistinctCountProbabilityCalculationWithInfluence() // is about twice the regular count, each influencer contributes a lot to // the anomaly CPPUNIT_ASSERT_EQUAL(std::size_t(2), lastInfluencersResult.size()); - CPPUNIT_ASSERT_DOUBLES_EQUAL(lastInfluencersResult[0].second, - lastInfluencersResult[1].second, 0.05); + CPPUNIT_ASSERT_DOUBLES_EQUAL(lastInfluencersResult[0].second, lastInfluencersResult[1].second, 0.05); CPPUNIT_ASSERT(lastInfluencersResult[0].second > 0.6); } { @@ -2053,7 +1740,7 @@ void CEventRateModelTest::testDistinctCountProbabilityCalculationWithInfluence() CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPersonWithInfluence("p", gatherer, m_ResourceMonitor, 1, TOptionalStr("v"))); CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr modelHolder(factory.makeModel(modelInitData)); - CEventRateModel *model = dynamic_cast(modelHolder.get()); + CEventRateModel* model = dynamic_cast(modelHolder.get()); CPPUNIT_ASSERT(model); const std::string uniqueValue("str_value"); @@ -2062,38 +1749,27 @@ void CEventRateModelTest::testDistinctCountProbabilityCalculationWithInfluence() TUInt64Vec expectedEventCounts = rawEventCounts(); generateEvents(startTime, bucketLength, expectedEventCounts, eventTimes); core_t::TTime endTime = (eventTimes.back() / bucketLength + 1) * bucketLength; - LOG_DEBUG("startTime = " << startTime - << ", endTime = " << endTime - << ", # events = " << eventTimes.size()); + LOG_DEBUG("startTime = " << startTime << ", endTime = " << endTime << ", # events = " << eventTimes.size()); SAnnotatedProbability::TStoredStringPtrStoredStringPtrPrDoublePrVec lastInfluencersResult; std::size_t i = 0u, j = 0u; - for (core_t::TTime bucketStartTime = startTime; - bucketStartTime < endTime; - bucketStartTime += bucketLength, ++j) - { + for (core_t::TTime bucketStartTime = startTime; bucketStartTime < endTime; bucketStartTime += bucketLength, ++j) { core_t::TTime bucketEndTime = bucketStartTime + bucketLength; double count = 0.0; - for (; i < eventTimes.size() && eventTimes[i] < bucketEndTime; ++i) - { + for (; i < eventTimes.size() && eventTimes[i] < bucketEndTime; ++i) { addArrival(*gatherer, m_ResourceMonitor, eventTimes[i], "p", TOptionalStr("inf1"), TOptionalStr(uniqueValue)); count += 1.0; } - if (i == eventTimes.size()) - { + if (i == eventTimes.size()) { // Generate anomaly LOG_DEBUG("Generating anomaly"); - for (std::size_t k = 1; k < 20; k++) - { + for (std::size_t k = 1; k < 20; k++) { std::stringstream ss; ss << uniqueValue << "_" << k; - if (k == 1) - { + if (k == 1) { addArrival(*gatherer, m_ResourceMonitor, eventTimes[i - 1], "p", TOptionalStr("inf2"), TOptionalStr(ss.str())); - } - else - { + } else { addArrival(*gatherer, m_ResourceMonitor, eventTimes[i - 1], "p", TOptionalStr("inf1"), TOptionalStr(ss.str())); } } @@ -2105,8 +1781,8 @@ void CEventRateModelTest::testDistinctCountProbabilityCalculationWithInfluence() SAnnotatedProbability annotatedProbability; CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); - CPPUNIT_ASSERT(model->computeProbability(0/*pid*/, bucketStartTime, bucketEndTime, - partitioningFields, 1, annotatedProbability)); + CPPUNIT_ASSERT( + model->computeProbability(0 /*pid*/, bucketStartTime, bucketEndTime, partitioningFields, 1, annotatedProbability)); LOG_DEBUG("probability = " << annotatedProbability.s_Probability); LOG_DEBUG("influencers = " << core::CContainerPrinter::print(annotatedProbability.s_Influences)); CPPUNIT_ASSERT(annotatedProbability.s_Probability); @@ -2133,7 +1809,7 @@ void CEventRateModelTest::testDistinctCountProbabilityCalculationWithInfluence() CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPersonWithInfluence("p", gatherer, m_ResourceMonitor, 2, TOptionalStr("v"))); CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr modelHolder(factory.makeModel(modelInitData)); - CEventRateModel *model = dynamic_cast(modelHolder.get()); + CEventRateModel* model = dynamic_cast(modelHolder.get()); CPPUNIT_ASSERT(model); const std::string uniqueValue("str_value"); @@ -2142,36 +1818,33 @@ void CEventRateModelTest::testDistinctCountProbabilityCalculationWithInfluence() TUInt64Vec expectedEventCounts = rawEventCounts(); generateEvents(startTime, bucketLength, expectedEventCounts, eventTimes); core_t::TTime endTime = (eventTimes.back() / bucketLength + 1) * bucketLength; - LOG_DEBUG("startTime = " << startTime - << ", endTime = " << endTime - << ", # events = " << eventTimes.size()); + LOG_DEBUG("startTime = " << startTime << ", endTime = " << endTime << ", # events = " << eventTimes.size()); SAnnotatedProbability::TStoredStringPtrStoredStringPtrPrDoublePrVec lastInfluencersResult; std::size_t i = 0u, j = 0u; - for (core_t::TTime bucketStartTime = startTime; - bucketStartTime < endTime; - bucketStartTime += bucketLength, ++j) - { + for (core_t::TTime bucketStartTime = startTime; bucketStartTime < endTime; bucketStartTime += bucketLength, ++j) { core_t::TTime bucketEndTime = bucketStartTime + bucketLength; double count = 0.0; - for (; i < eventTimes.size() && eventTimes[i] < bucketEndTime; ++i) - { - addArrival(*gatherer, m_ResourceMonitor, eventTimes[i], "p", TOptionalStr("inf1"), TOptionalStr("inf1"), TOptionalStr(uniqueValue)); + for (; i < eventTimes.size() && eventTimes[i] < bucketEndTime; ++i) { + addArrival(*gatherer, + m_ResourceMonitor, + eventTimes[i], + "p", + TOptionalStr("inf1"), + TOptionalStr("inf1"), + TOptionalStr(uniqueValue)); count += 1.0; } - if (i == eventTimes.size()) - { + if (i == eventTimes.size()) { // Generate anomaly LOG_DEBUG("Generating anomaly"); - for (std::size_t k = 1; k < 22; k++) - { + for (std::size_t k = 1; k < 22; k++) { std::stringstream ss1; ss1 << uniqueValue << "_" << k; std::stringstream ss2; ss2 << "inf"; - if (i % 10 == 0) - { + if (i % 10 == 0) { ss2 << "_extra"; } const std::string inf1(ss2.str()); @@ -2180,7 +1853,13 @@ void CEventRateModelTest::testDistinctCountProbabilityCalculationWithInfluence() LOG_DEBUG("Inf1 = " << inf1); LOG_DEBUG("Inf2 = " << inf2); LOG_DEBUG("Value = " << ss1.str()); - addArrival(*gatherer, m_ResourceMonitor, eventTimes[i - 1], "p", TOptionalStr(inf1), TOptionalStr(inf2), TOptionalStr(ss1.str())); + addArrival(*gatherer, + m_ResourceMonitor, + eventTimes[i - 1], + "p", + TOptionalStr(inf1), + TOptionalStr(inf2), + TOptionalStr(ss1.str())); } } @@ -2190,8 +1869,8 @@ void CEventRateModelTest::testDistinctCountProbabilityCalculationWithInfluence() SAnnotatedProbability annotatedProbability; CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); - CPPUNIT_ASSERT(model->computeProbability(0/*pid*/, bucketStartTime, bucketEndTime, - partitioningFields, 1, annotatedProbability)); + CPPUNIT_ASSERT( + model->computeProbability(0 /*pid*/, bucketStartTime, bucketEndTime, partitioningFields, 1, annotatedProbability)); LOG_DEBUG("probability = " << annotatedProbability.s_Probability); LOG_DEBUG("influencers = " << core::CContainerPrinter::print(annotatedProbability.s_Influences)); CPPUNIT_ASSERT(annotatedProbability.s_Probability); @@ -2210,8 +1889,7 @@ void CEventRateModelTest::testDistinctCountProbabilityCalculationWithInfluence() } } -void CEventRateModelTest::testOnlineRareWithInfluence() -{ +void CEventRateModelTest::testOnlineRareWithInfluence() { LOG_DEBUG("*** testOnlineRareWithInfluence ***"); const core_t::TTime startTime = 1346968800; @@ -2232,38 +1910,35 @@ void CEventRateModelTest::testOnlineRareWithInfluence() CPPUNIT_ASSERT_EQUAL(std::size_t(4), addPersonWithInfluence("p5", gatherer, m_ResourceMonitor, 1)); CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr modelHolder(factory.makeModel(modelInitData)); - CEventRateModel *model = dynamic_cast(modelHolder.get()); + CEventRateModel* model = dynamic_cast(modelHolder.get()); CPPUNIT_ASSERT(model); SAnnotatedProbability::TStoredStringPtrStoredStringPtrPrDoublePrVec lastInfluencersResult; core_t::TTime time = startTime; - for (/**/; time < startTime + 50 * bucketLength; time += bucketLength) - { - addArrival(*gatherer, m_ResourceMonitor, time + bucketLength/2, "p1", TOptionalStr("inf1")); - addArrival(*gatherer, m_ResourceMonitor, time + bucketLength/2, "p2", TOptionalStr("inf1")); - addArrival(*gatherer, m_ResourceMonitor, time + bucketLength/2, "p3", TOptionalStr("inf1")); - addArrival(*gatherer, m_ResourceMonitor, time + bucketLength/2, "p4", TOptionalStr("inf1")); + for (/**/; time < startTime + 50 * bucketLength; time += bucketLength) { + addArrival(*gatherer, m_ResourceMonitor, time + bucketLength / 2, "p1", TOptionalStr("inf1")); + addArrival(*gatherer, m_ResourceMonitor, time + bucketLength / 2, "p2", TOptionalStr("inf1")); + addArrival(*gatherer, m_ResourceMonitor, time + bucketLength / 2, "p3", TOptionalStr("inf1")); + addArrival(*gatherer, m_ResourceMonitor, time + bucketLength / 2, "p4", TOptionalStr("inf1")); model->sample(time, time + bucketLength, m_ResourceMonitor); } { - addArrival(*gatherer, m_ResourceMonitor, time + bucketLength/2, "p1", TOptionalStr("inf1")); - addArrival(*gatherer, m_ResourceMonitor, time + bucketLength/2, "p2", TOptionalStr("inf1")); - addArrival(*gatherer, m_ResourceMonitor, time + bucketLength/2, "p3", TOptionalStr("inf1")); - addArrival(*gatherer, m_ResourceMonitor, time + bucketLength/2, "p4", TOptionalStr("inf1")); - addArrival(*gatherer, m_ResourceMonitor, time + bucketLength/2, "p5", TOptionalStr("inf2")); + addArrival(*gatherer, m_ResourceMonitor, time + bucketLength / 2, "p1", TOptionalStr("inf1")); + addArrival(*gatherer, m_ResourceMonitor, time + bucketLength / 2, "p2", TOptionalStr("inf1")); + addArrival(*gatherer, m_ResourceMonitor, time + bucketLength / 2, "p3", TOptionalStr("inf1")); + addArrival(*gatherer, m_ResourceMonitor, time + bucketLength / 2, "p4", TOptionalStr("inf1")); + addArrival(*gatherer, m_ResourceMonitor, time + bucketLength / 2, "p5", TOptionalStr("inf2")); } model->sample(time, time + bucketLength, m_ResourceMonitor); TDoubleVec probabilities; - for (std::size_t pid = 0u; pid < 5; ++pid) - { + for (std::size_t pid = 0u; pid < 5; ++pid) { SAnnotatedProbability annotatedProbability; CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); - CPPUNIT_ASSERT(model->computeProbability(pid, time, time + bucketLength, - partitioningFields, 1, annotatedProbability)); + CPPUNIT_ASSERT(model->computeProbability(pid, time, time + bucketLength, partitioningFields, 1, annotatedProbability)); LOG_DEBUG("probability = " << annotatedProbability.s_Probability); LOG_DEBUG("influencers = " << core::CContainerPrinter::print(annotatedProbability.s_Influences)); lastInfluencersResult = annotatedProbability.s_Influences; @@ -2309,8 +1984,7 @@ void CEventRateModelTest::testOnlineRareWithInfluence() CPPUNIT_ASSERT_EQUAL(origXml, newXml); } -void CEventRateModelTest::testSkipSampling() -{ +void CEventRateModelTest::testSkipSampling() { LOG_DEBUG("*** testSkipSampling ***"); core_t::TTime startTime(100); @@ -2324,7 +1998,7 @@ void CEventRateModelTest::testSkipSampling() CModelFactory::TDataGathererPtr gathererNoGap; CAnomalyDetectorModel::TModelPtr modelNoGap_; makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, gathererNoGap, modelNoGap_, 2); - CEventRateModel *modelNoGap = dynamic_cast(modelNoGap_.get()); + CEventRateModel* modelNoGap = dynamic_cast(modelNoGap_.get()); // p1: |1|1|1| // p2: |1|0|0| @@ -2339,7 +2013,7 @@ void CEventRateModelTest::testSkipSampling() CAnomalyDetectorModel::TModelPtr modelWithGap_; CModelFactory::TDataGathererPtr gathererWithGap; makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, gathererWithGap, modelWithGap_, 2); - CEventRateModel *modelWithGap = dynamic_cast(modelWithGap_.get()); + CEventRateModel* modelWithGap = dynamic_cast(modelWithGap_.get()); // p1: |1|1|0|0|0|0|0|0|0|0|1|1| // p1: |1|X|X|X|X|X|X|X|X|X|1|1| -> equal to |1|1|1| @@ -2367,15 +2041,21 @@ void CEventRateModelTest::testSkipSampling() // Check priors are the same CPPUNIT_ASSERT_EQUAL( - static_cast( - modelWithGap->details()->model(model_t::E_IndividualCountByBucketAndPerson, 0))->residualModel().checksum(), - static_cast( - modelNoGap->details()->model(model_t::E_IndividualCountByBucketAndPerson, 0))->residualModel().checksum()); + static_cast( + modelWithGap->details()->model(model_t::E_IndividualCountByBucketAndPerson, 0)) + ->residualModel() + .checksum(), + static_cast(modelNoGap->details()->model(model_t::E_IndividualCountByBucketAndPerson, 0)) + ->residualModel() + .checksum()); CPPUNIT_ASSERT_EQUAL( - static_cast( - modelWithGap->details()->model(model_t::E_IndividualCountByBucketAndPerson, 1))->residualModel().checksum(), - static_cast( - modelNoGap->details()->model(model_t::E_IndividualCountByBucketAndPerson, 1))->residualModel().checksum()); + static_cast( + modelWithGap->details()->model(model_t::E_IndividualCountByBucketAndPerson, 1)) + ->residualModel() + .checksum(), + static_cast(modelNoGap->details()->model(model_t::E_IndividualCountByBucketAndPerson, 1)) + ->residualModel() + .checksum()); // Confirm last seen times are only updated by gap duration by forcing p2 to be pruned modelWithGap->sample(1200, 1500, m_ResourceMonitor); @@ -2388,8 +2068,7 @@ void CEventRateModelTest::testSkipSampling() CPPUNIT_ASSERT_EQUAL(std::size_t(1), gathererWithGap->numberActivePeople()); } -void CEventRateModelTest::testExplicitNulls() -{ +void CEventRateModelTest::testExplicitNulls() { LOG_DEBUG("*** testExplicitNulls ***"); core_t::TTime startTime(100); @@ -2403,7 +2082,7 @@ void CEventRateModelTest::testExplicitNulls() CModelFactory::TDataGathererPtr gathererSkipGap; CAnomalyDetectorModel::TModelPtr modelSkipGap_; makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, gathererSkipGap, modelSkipGap_, 0); - CEventRateModel *modelSkipGap = dynamic_cast(modelSkipGap_.get()); + CEventRateModel* modelSkipGap = dynamic_cast(modelSkipGap_.get()); // The idea here is to compare a model that has a gap skipped against a model // that has explicit nulls for the buckets that sampling was skipped. @@ -2425,7 +2104,7 @@ void CEventRateModelTest::testExplicitNulls() CModelFactory::TDataGathererPtr gathererExNull; CAnomalyDetectorModel::TModelPtr modelExNullGap_; makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, gathererExNull, modelExNullGap_, 0); - CEventRateModel *modelExNullGap = dynamic_cast(modelExNullGap_.get()); + CEventRateModel* modelExNullGap = dynamic_cast(modelExNullGap_.get()); // p1: |1,"",null|1|1|null|null|1| // p2: |1,""|1|0|null|null|0| @@ -2450,20 +2129,25 @@ void CEventRateModelTest::testExplicitNulls() modelExNullGap->sample(600, 700, m_ResourceMonitor); // Check priors are the same - CPPUNIT_ASSERT_EQUAL( - static_cast( - modelExNullGap->details()->model(model_t::E_IndividualCountByBucketAndPerson, 0))->residualModel().checksum(), - static_cast( - modelSkipGap->details()->model(model_t::E_IndividualCountByBucketAndPerson, 0))->residualModel().checksum()); - CPPUNIT_ASSERT_EQUAL( - static_cast( - modelExNullGap->details()->model(model_t::E_IndividualCountByBucketAndPerson, 1))->residualModel().checksum(), - static_cast( - modelSkipGap->details()->model(model_t::E_IndividualCountByBucketAndPerson, 1))->residualModel().checksum()); + CPPUNIT_ASSERT_EQUAL(static_cast( + modelExNullGap->details()->model(model_t::E_IndividualCountByBucketAndPerson, 0)) + ->residualModel() + .checksum(), + static_cast( + modelSkipGap->details()->model(model_t::E_IndividualCountByBucketAndPerson, 0)) + ->residualModel() + .checksum()); + CPPUNIT_ASSERT_EQUAL(static_cast( + modelExNullGap->details()->model(model_t::E_IndividualCountByBucketAndPerson, 1)) + ->residualModel() + .checksum(), + static_cast( + modelSkipGap->details()->model(model_t::E_IndividualCountByBucketAndPerson, 1)) + ->residualModel() + .checksum()); } -void CEventRateModelTest::testInterimCorrections() -{ +void CEventRateModelTest::testInterimCorrections() { LOG_DEBUG("*** testInterimCorrections ***"); core_t::TTime startTime(3600); @@ -2476,39 +2160,32 @@ void CEventRateModelTest::testInterimCorrections() CModelFactory::TDataGathererPtr gatherer; CAnomalyDetectorModel::TModelPtr model_; makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, gatherer, model_, 3); - CEventRateModel *model = dynamic_cast(model_.get()); + CEventRateModel* model = dynamic_cast(model_.get()); test::CRandomNumbers rng; core_t::TTime now = startTime; TDoubleVec samples(3, 0.0); - while (now < endTime) - { + while (now < endTime) { rng.generateUniformSamples(50.0, 70.0, std::size_t(3), samples); - for (std::size_t i = 0; i < static_cast(samples[0] + 0.5); ++i) - { + for (std::size_t i = 0; i < static_cast(samples[0] + 0.5); ++i) { addArrival(*gatherer, m_ResourceMonitor, now, "p1"); } - for (std::size_t i = 0; i < static_cast(samples[1] + 0.5); ++i) - { + for (std::size_t i = 0; i < static_cast(samples[1] + 0.5); ++i) { addArrival(*gatherer, m_ResourceMonitor, now, "p2"); } - for (std::size_t i = 0; i < static_cast(samples[2] + 0.5); ++i) - { + for (std::size_t i = 0; i < static_cast(samples[2] + 0.5); ++i) { addArrival(*gatherer, m_ResourceMonitor, now, "p3"); } model->sample(now, now + bucketLength, m_ResourceMonitor); now += bucketLength; } - for (std::size_t i = 0; i < 35; ++i) - { + for (std::size_t i = 0; i < 35; ++i) { addArrival(*gatherer, m_ResourceMonitor, now, "p1"); } - for (std::size_t i = 0; i < 1; ++i) - { + for (std::size_t i = 0; i < 1; ++i) { addArrival(*gatherer, m_ResourceMonitor, now, "p2"); } - for (std::size_t i = 0; i < 100; ++i) - { + for (std::size_t i = 0; i < 100; ++i) { addArrival(*gatherer, m_ResourceMonitor, now, "p3"); } model->sampleBucketStatistics(now, now + bucketLength, m_ResourceMonitor); @@ -2518,23 +2195,17 @@ void CEventRateModelTest::testInterimCorrections() model_t::CResultType type(model_t::CResultType::E_Unconditional | model_t::CResultType::E_Interim); SAnnotatedProbability annotatedProbability1; annotatedProbability1.s_ResultType = type; - CPPUNIT_ASSERT(model->computeProbability(0/*pid*/, now, now + bucketLength, - partitioningFields, 1, annotatedProbability1)); + CPPUNIT_ASSERT(model->computeProbability(0 /*pid*/, now, now + bucketLength, partitioningFields, 1, annotatedProbability1)); SAnnotatedProbability annotatedProbability2; annotatedProbability2.s_ResultType = type; - CPPUNIT_ASSERT(model->computeProbability(1/*pid*/, now, now + bucketLength, - partitioningFields, 1, annotatedProbability2)); + CPPUNIT_ASSERT(model->computeProbability(1 /*pid*/, now, now + bucketLength, partitioningFields, 1, annotatedProbability2)); SAnnotatedProbability annotatedProbability3; annotatedProbability3.s_ResultType = type; - CPPUNIT_ASSERT(model->computeProbability(2/*pid*/, now, now + bucketLength, - partitioningFields, 1, annotatedProbability3)); + CPPUNIT_ASSERT(model->computeProbability(2 /*pid*/, now, now + bucketLength, partitioningFields, 1, annotatedProbability3)); - TDouble1Vec p1Baseline = model->baselineBucketMean(model_t::E_IndividualCountByBucketAndPerson, - 0, 0, type, NO_CORRELATES, now); - TDouble1Vec p2Baseline = model->baselineBucketMean(model_t::E_IndividualCountByBucketAndPerson, - 1, 0, type, NO_CORRELATES, now); - TDouble1Vec p3Baseline = model->baselineBucketMean(model_t::E_IndividualCountByBucketAndPerson, - 2, 0, type, NO_CORRELATES, now); + TDouble1Vec p1Baseline = model->baselineBucketMean(model_t::E_IndividualCountByBucketAndPerson, 0, 0, type, NO_CORRELATES, now); + TDouble1Vec p2Baseline = model->baselineBucketMean(model_t::E_IndividualCountByBucketAndPerson, 1, 0, type, NO_CORRELATES, now); + TDouble1Vec p3Baseline = model->baselineBucketMean(model_t::E_IndividualCountByBucketAndPerson, 2, 0, type, NO_CORRELATES, now); LOG_DEBUG("p1 probability = " << annotatedProbability1.s_Probability); LOG_DEBUG("p2 probability = " << annotatedProbability2.s_Probability); @@ -2550,33 +2221,24 @@ void CEventRateModelTest::testInterimCorrections() CPPUNIT_ASSERT(p2Baseline[0] > 43.0 && p2Baseline[0] < 47.0); CPPUNIT_ASSERT(p3Baseline[0] > 57.0 && p3Baseline[0] < 62.0); - for (std::size_t i = 0; i < 25; ++i) - { + for (std::size_t i = 0; i < 25; ++i) { addArrival(*gatherer, m_ResourceMonitor, now, "p1"); } - for (std::size_t i = 0; i < 59; ++i) - { + for (std::size_t i = 0; i < 59; ++i) { addArrival(*gatherer, m_ResourceMonitor, now, "p2"); } - for (std::size_t i = 0; i < 100; ++i) - { + for (std::size_t i = 0; i < 100; ++i) { addArrival(*gatherer, m_ResourceMonitor, now, "p3"); } model->sampleBucketStatistics(now, now + bucketLength, m_ResourceMonitor); - CPPUNIT_ASSERT(model->computeProbability(0/*pid*/, now, now + bucketLength, - partitioningFields, 0, annotatedProbability1)); - CPPUNIT_ASSERT(model->computeProbability(1/*pid*/, now, now + bucketLength, - partitioningFields, 0, annotatedProbability2)); - CPPUNIT_ASSERT(model->computeProbability(2/*pid*/, now, now + bucketLength, - partitioningFields, 0, annotatedProbability3)); + CPPUNIT_ASSERT(model->computeProbability(0 /*pid*/, now, now + bucketLength, partitioningFields, 0, annotatedProbability1)); + CPPUNIT_ASSERT(model->computeProbability(1 /*pid*/, now, now + bucketLength, partitioningFields, 0, annotatedProbability2)); + CPPUNIT_ASSERT(model->computeProbability(2 /*pid*/, now, now + bucketLength, partitioningFields, 0, annotatedProbability3)); - p1Baseline = model->baselineBucketMean(model_t::E_IndividualCountByBucketAndPerson, - 0, 0, type, NO_CORRELATES, now); - p2Baseline = model->baselineBucketMean(model_t::E_IndividualCountByBucketAndPerson, - 1, 0, type, NO_CORRELATES, now); - p3Baseline = model->baselineBucketMean(model_t::E_IndividualCountByBucketAndPerson, - 2, 0, type, NO_CORRELATES, now); + p1Baseline = model->baselineBucketMean(model_t::E_IndividualCountByBucketAndPerson, 0, 0, type, NO_CORRELATES, now); + p2Baseline = model->baselineBucketMean(model_t::E_IndividualCountByBucketAndPerson, 1, 0, type, NO_CORRELATES, now); + p3Baseline = model->baselineBucketMean(model_t::E_IndividualCountByBucketAndPerson, 2, 0, type, NO_CORRELATES, now); LOG_DEBUG("p1 probability = " << annotatedProbability1.s_Probability); LOG_DEBUG("p2 probability = " << annotatedProbability2.s_Probability); @@ -2593,8 +2255,7 @@ void CEventRateModelTest::testInterimCorrections() CPPUNIT_ASSERT(p3Baseline[0] > 58.0 && p3Baseline[0] < 62.0); } -void CEventRateModelTest::testInterimCorrectionsWithCorrelations() -{ +void CEventRateModelTest::testInterimCorrectionsWithCorrelations() { LOG_DEBUG("*** testInterimCorrectionsWithCorrelations ***"); core_t::TTime startTime(3600); @@ -2607,40 +2268,33 @@ void CEventRateModelTest::testInterimCorrectionsWithCorrelations() CModelFactory::TDataGathererPtr gatherer; CAnomalyDetectorModel::TModelPtr model_; makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, gatherer, model_, 3); - CEventRateModel *model = dynamic_cast(model_.get()); + CEventRateModel* model = dynamic_cast(model_.get()); core_t::TTime now = startTime; core_t::TTime endTime(now + 2 * 24 * bucketLength); test::CRandomNumbers rng; TDoubleVec samples(1, 0.0); - while (now < endTime) - { + while (now < endTime) { rng.generateUniformSamples(80.0, 100.0, std::size_t(1), samples); - for (std::size_t i = 0; i < static_cast(samples[0] + 0.5); ++i) - { + for (std::size_t i = 0; i < static_cast(samples[0] + 0.5); ++i) { addArrival(*gatherer, m_ResourceMonitor, now, "p1"); } - for (std::size_t i = 0; i < static_cast(samples[0] + 10.5); ++i) - { + for (std::size_t i = 0; i < static_cast(samples[0] + 10.5); ++i) { addArrival(*gatherer, m_ResourceMonitor, now, "p2"); } - for (std::size_t i = 0; i < static_cast(samples[0] - 9.5); ++i) - { + for (std::size_t i = 0; i < static_cast(samples[0] - 9.5); ++i) { addArrival(*gatherer, m_ResourceMonitor, now, "p3"); } model->sample(now, now + bucketLength, m_ResourceMonitor); now += bucketLength; } - for (std::size_t i = 0; i < 9; ++i) - { + for (std::size_t i = 0; i < 9; ++i) { addArrival(*gatherer, m_ResourceMonitor, now, "p1"); } - for (std::size_t i = 0; i < 10; ++i) - { + for (std::size_t i = 0; i < 10; ++i) { addArrival(*gatherer, m_ResourceMonitor, now, "p2"); } - for (std::size_t i = 0; i < 8; ++i) - { + for (std::size_t i = 0; i < 8; ++i) { addArrival(*gatherer, m_ResourceMonitor, now, "p3"); } model->sampleBucketStatistics(now, now + bucketLength, m_ResourceMonitor); @@ -2649,29 +2303,20 @@ void CEventRateModelTest::testInterimCorrectionsWithCorrelations() model_t::CResultType type(model_t::CResultType::E_Conditional | model_t::CResultType::E_Interim); SAnnotatedProbability annotatedProbability1; annotatedProbability1.s_ResultType = type; - CPPUNIT_ASSERT(model->computeProbability(0/*pid*/, now, now + bucketLength, - partitioningFields, 1, annotatedProbability1)); + CPPUNIT_ASSERT(model->computeProbability(0 /*pid*/, now, now + bucketLength, partitioningFields, 1, annotatedProbability1)); SAnnotatedProbability annotatedProbability2; annotatedProbability2.s_ResultType = type; - CPPUNIT_ASSERT(model->computeProbability(1/*pid*/, now, now + bucketLength, - partitioningFields, 1, annotatedProbability2)); + CPPUNIT_ASSERT(model->computeProbability(1 /*pid*/, now, now + bucketLength, partitioningFields, 1, annotatedProbability2)); SAnnotatedProbability annotatedProbability3; annotatedProbability3.s_ResultType = type; - CPPUNIT_ASSERT(model->computeProbability(2/*pid*/, now, now + bucketLength, - partitioningFields, 1, annotatedProbability3)); - - TDouble1Vec p1Baseline = model->baselineBucketMean(model_t::E_IndividualCountByBucketAndPerson, - 0, 0, type, - annotatedProbability1.s_AttributeProbabilities[0].s_Correlated, - now); - TDouble1Vec p2Baseline = model->baselineBucketMean(model_t::E_IndividualCountByBucketAndPerson, - 1, 0, type, - annotatedProbability2.s_AttributeProbabilities[0].s_Correlated, - now); - TDouble1Vec p3Baseline = model->baselineBucketMean(model_t::E_IndividualCountByBucketAndPerson, - 2, 0, type, - annotatedProbability3.s_AttributeProbabilities[0].s_Correlated, - now); + CPPUNIT_ASSERT(model->computeProbability(2 /*pid*/, now, now + bucketLength, partitioningFields, 1, annotatedProbability3)); + + TDouble1Vec p1Baseline = model->baselineBucketMean( + model_t::E_IndividualCountByBucketAndPerson, 0, 0, type, annotatedProbability1.s_AttributeProbabilities[0].s_Correlated, now); + TDouble1Vec p2Baseline = model->baselineBucketMean( + model_t::E_IndividualCountByBucketAndPerson, 1, 0, type, annotatedProbability2.s_AttributeProbabilities[0].s_Correlated, now); + TDouble1Vec p3Baseline = model->baselineBucketMean( + model_t::E_IndividualCountByBucketAndPerson, 2, 0, type, annotatedProbability3.s_AttributeProbabilities[0].s_Correlated, now); LOG_DEBUG("p1 probability = " << annotatedProbability1.s_Probability); LOG_DEBUG("p2 probability = " << annotatedProbability2.s_Probability); @@ -2688,8 +2333,7 @@ void CEventRateModelTest::testInterimCorrectionsWithCorrelations() CPPUNIT_ASSERT(p3Baseline[0] > 7.4 && p3Baseline[0] < 7.6); } -void CEventRateModelTest::testSummaryCountZeroRecordsAreIgnored() -{ +void CEventRateModelTest::testSummaryCountZeroRecordsAreIgnored() { LOG_DEBUG("*** testSummaryCountZeroRecordsAreIgnored ***"); core_t::TTime startTime(100); @@ -2708,7 +2352,7 @@ void CEventRateModelTest::testSummaryCountZeroRecordsAreIgnored() CAnomalyDetectorModel::TModelPtr modelWithZerosPtr(factory.makeModel(initDataWithZeros)); CPPUNIT_ASSERT(modelWithZerosPtr); CPPUNIT_ASSERT_EQUAL(model_t::E_EventRateOnline, modelWithZerosPtr->category()); - CEventRateModel &modelWithZeros = static_cast(*modelWithZerosPtr.get()); + CEventRateModel& modelWithZeros = static_cast(*modelWithZerosPtr.get()); CModelFactory::SGathererInitializationData gathererNoZerosInitData(startTime); CModelFactory::TDataGathererPtr gathererNoZeros(factory.makeDataGatherer(gathererNoZerosInitData)); @@ -2716,7 +2360,7 @@ void CEventRateModelTest::testSummaryCountZeroRecordsAreIgnored() CAnomalyDetectorModel::TModelPtr modelNoZerosPtr(factory.makeModel(initDataNoZeros)); CPPUNIT_ASSERT(modelNoZerosPtr); CPPUNIT_ASSERT_EQUAL(model_t::E_EventRateOnline, modelNoZerosPtr->category()); - CEventRateModel &modelNoZeros = static_cast(*modelNoZerosPtr.get()); + CEventRateModel& modelNoZeros = static_cast(*modelNoZerosPtr.get()); // The idea here is to compare a model that has records with summary count of zero // against a model that has no records at all where the first model had the zero-count records. @@ -2728,18 +2372,14 @@ void CEventRateModelTest::testSummaryCountZeroRecordsAreIgnored() TDoubleVec zeroCountProbability; std::string summaryCountZero("0"); std::string summaryCountOne("1"); - while (now < end) - { + while (now < end) { rng.generateUniformSamples(1, 10, 1, samples); rng.generateUniformSamples(0.0, 1.0, 1, zeroCountProbability); - for (std::size_t i = 0; i < samples[0]; ++i) - { - if (zeroCountProbability[0] < 0.2) - { - addArrival(*gathererWithZeros, m_ResourceMonitor, now, "p1", TOptionalStr(), TOptionalStr(), TOptionalStr(summaryCountZero)); - } - else - { + for (std::size_t i = 0; i < samples[0]; ++i) { + if (zeroCountProbability[0] < 0.2) { + addArrival( + *gathererWithZeros, m_ResourceMonitor, now, "p1", TOptionalStr(), TOptionalStr(), TOptionalStr(summaryCountZero)); + } else { addArrival(*gathererWithZeros, m_ResourceMonitor, now, "p1", TOptionalStr(), TOptionalStr(), TOptionalStr(summaryCountOne)); addArrival(*gathererNoZeros, m_ResourceMonitor, now, "p1", TOptionalStr(), TOptionalStr(), TOptionalStr(summaryCountOne)); } @@ -2752,8 +2392,7 @@ void CEventRateModelTest::testSummaryCountZeroRecordsAreIgnored() CPPUNIT_ASSERT_EQUAL(modelWithZeros.checksum(), modelNoZeros.checksum()); } -void CEventRateModelTest::testComputeProbabilityGivenDetectionRule() -{ +void CEventRateModelTest::testComputeProbabilityGivenDetectionRule() { LOG_DEBUG("*** testComputeProbabilityGivenDetectionRule ***"); CRuleCondition condition; @@ -2776,23 +2415,20 @@ void CEventRateModelTest::testComputeProbabilityGivenDetectionRule() CModelFactory::TDataGathererPtr gatherer; CAnomalyDetectorModel::TModelPtr model_; makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, gatherer, model_, 1); - CEventRateModel *model = dynamic_cast(model_.get()); + CEventRateModel* model = dynamic_cast(model_.get()); test::CRandomNumbers rng; core_t::TTime now = startTime; TDoubleVec samples(1, 0.0); - while (now < endTime) - { + while (now < endTime) { rng.generateUniformSamples(50.0, 70.0, std::size_t(1), samples); - for (std::size_t i = 0; i < static_cast(samples[0] + 0.5); ++i) - { + for (std::size_t i = 0; i < static_cast(samples[0] + 0.5); ++i) { addArrival(*gatherer, m_ResourceMonitor, now, "p1"); } model->sample(now, now + bucketLength, m_ResourceMonitor); now += bucketLength; } - for (std::size_t i = 0; i < 35; ++i) - { + for (std::size_t i = 0; i < 35; ++i) { addArrival(*gatherer, m_ResourceMonitor, now, "p1"); } model->sampleBucketStatistics(now, now + bucketLength, m_ResourceMonitor); @@ -2800,12 +2436,10 @@ void CEventRateModelTest::testComputeProbabilityGivenDetectionRule() CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); SAnnotatedProbability annotatedProbability; - CPPUNIT_ASSERT(model->computeProbability(0/*pid*/, now, now + bucketLength, - partitioningFields, 1, annotatedProbability) == false); + CPPUNIT_ASSERT(model->computeProbability(0 /*pid*/, now, now + bucketLength, partitioningFields, 1, annotatedProbability) == false); } -void CEventRateModelTest::testDecayRateControl() -{ +void CEventRateModelTest::testDecayRateControl() { LOG_DEBUG("*** testDecayRateControl ***"); core_t::TTime startTime = 0; @@ -2843,37 +2477,30 @@ void CEventRateModelTest::testDecayRateControl() TMeanAccumulator meanPredictionError; TMeanAccumulator meanReferencePredictionError; model_t::CResultType type(model_t::CResultType::E_Unconditional | model_t::CResultType::E_Interim); - for (core_t::TTime t = 0; t < 4 * core::constants::WEEK; t += bucketLength) - { - if (t % core::constants::WEEK == 0) - { + for (core_t::TTime t = 0; t < 4 * core::constants::WEEK; t += bucketLength) { + if (t % core::constants::WEEK == 0) { LOG_DEBUG("week " << t / core::constants::WEEK + 1); } TDoubleVec rate; rng.generateUniformSamples(0.0, 10.0, 1, rate); rate[0] += 20.0 * (t > 3 * core::constants::WEEK && t < core::constants::WEEK + 4 * 3600 ? 1.0 : 0.0); - for (std::size_t i = 0u; i < static_cast(rate[0]); ++i) - { + for (std::size_t i = 0u; i < static_cast(rate[0]); ++i) { addArrival(*gatherer, m_ResourceMonitor, t + bucketLength / 2, "p1"); addArrival(*referenceGatherer, m_ResourceMonitor, t + bucketLength / 2, "p1"); } model->sample(t, t + bucketLength, m_ResourceMonitor); referenceModel->sample(t, t + bucketLength, m_ResourceMonitor); - meanPredictionError.add(std::fabs( - model->currentBucketValue(feature, 0, 0, t + bucketLength / 2)[0] - - model->baselineBucketMean(feature, 0, 0, type, - NO_CORRELATES, t + bucketLength / 2)[0])); - meanReferencePredictionError.add(std::fabs( - referenceModel->currentBucketValue(feature, 0, 0, t + bucketLength / 2)[0] - - referenceModel->baselineBucketMean(feature, 0, 0, type, - NO_CORRELATES, t + bucketLength / 2)[0])); + meanPredictionError.add(std::fabs(model->currentBucketValue(feature, 0, 0, t + bucketLength / 2)[0] - + model->baselineBucketMean(feature, 0, 0, type, NO_CORRELATES, t + bucketLength / 2)[0])); + meanReferencePredictionError.add( + std::fabs(referenceModel->currentBucketValue(feature, 0, 0, t + bucketLength / 2)[0] - + referenceModel->baselineBucketMean(feature, 0, 0, type, NO_CORRELATES, t + bucketLength / 2)[0])); } LOG_DEBUG("mean = " << maths::CBasicStatistics::mean(meanPredictionError)); LOG_DEBUG("reference = " << maths::CBasicStatistics::mean(meanReferencePredictionError)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(meanReferencePredictionError), - maths::CBasicStatistics::mean(meanPredictionError), - 0.05); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + maths::CBasicStatistics::mean(meanReferencePredictionError), maths::CBasicStatistics::mean(meanPredictionError), 0.05); } LOG_DEBUG("*** Test linear scaling ***"); @@ -2899,40 +2526,33 @@ void CEventRateModelTest::testDecayRateControl() TMeanAccumulator meanPredictionError; TMeanAccumulator meanReferencePredictionError; model_t::CResultType type(model_t::CResultType::E_Unconditional | model_t::CResultType::E_Interim); - for (core_t::TTime t = 0; t < 10 * core::constants::WEEK; t += bucketLength) - { - if (t % core::constants::WEEK == 0) - { + for (core_t::TTime t = 0; t < 10 * core::constants::WEEK; t += bucketLength) { + if (t % core::constants::WEEK == 0) { LOG_DEBUG("week " << t / core::constants::WEEK + 1); } - double rate = 10.0 * (1.0 + std::sin( boost::math::double_constants::two_pi - * static_cast(t) - / static_cast(core::constants::DAY))) - * (t < 5 * core::constants::WEEK ? 1.0 : 2.0); + double rate = 10.0 * + (1.0 + std::sin(boost::math::double_constants::two_pi * static_cast(t) / + static_cast(core::constants::DAY))) * + (t < 5 * core::constants::WEEK ? 1.0 : 2.0); TDoubleVec noise; rng.generateUniformSamples(0.0, 3.0, 1, noise); - for (std::size_t i = 0u; i < static_cast(rate + noise[0]); ++i) - { + for (std::size_t i = 0u; i < static_cast(rate + noise[0]); ++i) { addArrival(*gatherer, m_ResourceMonitor, t + bucketLength / 2, "p1"); addArrival(*referenceGatherer, m_ResourceMonitor, t + bucketLength / 2, "p1"); } model->sample(t, t + bucketLength, m_ResourceMonitor); referenceModel->sample(t, t + bucketLength, m_ResourceMonitor); - meanPredictionError.add(std::fabs( - model->currentBucketValue(feature, 0, 0, t + bucketLength / 2)[0] - - model->baselineBucketMean(feature, 0, 0, type, - NO_CORRELATES, t + bucketLength / 2)[0])); - meanReferencePredictionError.add(std::fabs( - referenceModel->currentBucketValue(feature, 0, 0, t + bucketLength / 2)[0] - - referenceModel->baselineBucketMean(feature, 0, 0, type, - NO_CORRELATES, t + bucketLength / 2)[0])); + meanPredictionError.add(std::fabs(model->currentBucketValue(feature, 0, 0, t + bucketLength / 2)[0] - + model->baselineBucketMean(feature, 0, 0, type, NO_CORRELATES, t + bucketLength / 2)[0])); + meanReferencePredictionError.add( + std::fabs(referenceModel->currentBucketValue(feature, 0, 0, t + bucketLength / 2)[0] - + referenceModel->baselineBucketMean(feature, 0, 0, type, NO_CORRELATES, t + bucketLength / 2)[0])); } LOG_DEBUG("mean = " << maths::CBasicStatistics::mean(meanPredictionError)); LOG_DEBUG("reference = " << maths::CBasicStatistics::mean(meanReferencePredictionError)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(meanReferencePredictionError), - maths::CBasicStatistics::mean(meanPredictionError), - 0.05); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + maths::CBasicStatistics::mean(meanReferencePredictionError), maths::CBasicStatistics::mean(meanPredictionError), 0.05); } LOG_DEBUG("*** Test unmodelled cyclic component ***"); @@ -2960,46 +2580,38 @@ void CEventRateModelTest::testDecayRateControl() TMeanAccumulator meanPredictionError; TMeanAccumulator meanReferencePredictionError; model_t::CResultType type(model_t::CResultType::E_Unconditional | model_t::CResultType::E_Interim); - for (core_t::TTime t = 0; t < 20 * core::constants::WEEK; t += bucketLength) - { - if (t % core::constants::WEEK == 0) - { + for (core_t::TTime t = 0; t < 20 * core::constants::WEEK; t += bucketLength) { + if (t % core::constants::WEEK == 0) { LOG_DEBUG("week " << t / core::constants::WEEK + 1); } - double rate = 10.0 * (1.0 + std::sin( boost::math::double_constants::two_pi - * static_cast(t) - / static_cast(core::constants::DAY))) - * (1.0 + std::sin( boost::math::double_constants::two_pi - * static_cast(t) - / 10.0 / static_cast(core::constants::WEEK))); + double rate = 10.0 * + (1.0 + std::sin(boost::math::double_constants::two_pi * static_cast(t) / + static_cast(core::constants::DAY))) * + (1.0 + std::sin(boost::math::double_constants::two_pi * static_cast(t) / 10.0 / + static_cast(core::constants::WEEK))); TDoubleVec noise; rng.generateUniformSamples(0.0, 3.0, 1, noise); - for (std::size_t i = 0u; i < static_cast(rate + noise[0]); ++i) - { + for (std::size_t i = 0u; i < static_cast(rate + noise[0]); ++i) { addArrival(*gatherer, m_ResourceMonitor, t + bucketLength / 2, "p1"); addArrival(*referenceGatherer, m_ResourceMonitor, t + bucketLength / 2, "p1"); } model->sample(t, t + bucketLength, m_ResourceMonitor); referenceModel->sample(t, t + bucketLength, m_ResourceMonitor); - meanPredictionError.add(std::fabs( - model->currentBucketValue(feature, 0, 0, t + bucketLength / 2)[0] - - model->baselineBucketMean(feature, 0, 0, type, - NO_CORRELATES, t + bucketLength / 2)[0])); - meanReferencePredictionError.add(std::fabs( - referenceModel->currentBucketValue(feature, 0, 0, t + bucketLength / 2)[0] - - referenceModel->baselineBucketMean(feature, 0, 0, type, - NO_CORRELATES, t + bucketLength / 2)[0])); + meanPredictionError.add(std::fabs(model->currentBucketValue(feature, 0, 0, t + bucketLength / 2)[0] - + model->baselineBucketMean(feature, 0, 0, type, NO_CORRELATES, t + bucketLength / 2)[0])); + meanReferencePredictionError.add( + std::fabs(referenceModel->currentBucketValue(feature, 0, 0, t + bucketLength / 2)[0] - + referenceModel->baselineBucketMean(feature, 0, 0, type, NO_CORRELATES, t + bucketLength / 2)[0])); } LOG_DEBUG("mean = " << maths::CBasicStatistics::mean(meanPredictionError)); LOG_DEBUG("reference = " << maths::CBasicStatistics::mean(meanReferencePredictionError)); - CPPUNIT_ASSERT( maths::CBasicStatistics::mean(meanPredictionError) - < 0.7 * maths::CBasicStatistics::mean(meanReferencePredictionError)); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanPredictionError) < + 0.7 * maths::CBasicStatistics::mean(meanReferencePredictionError)); } } -void CEventRateModelTest::testIgnoreSamplingGivenDetectionRules() -{ +void CEventRateModelTest::testIgnoreSamplingGivenDetectionRules() { LOG_DEBUG("*** testIgnoreSamplingGivenDetectionRules ***"); // Create 2 models, one of which has a skip sampling rule. @@ -3021,14 +2633,13 @@ void CEventRateModelTest::testIgnoreSamplingGivenDetectionRules() std::size_t startTime(100); SModelParams paramsNoRules(bucketLength); - // Model without the skip sampling rule CEventRateModelFactory factory(paramsNoRules); model_t::TFeatureVec features{model_t::E_IndividualCountByBucketAndPerson}; CModelFactory::TDataGathererPtr gathererNoSkip; CAnomalyDetectorModel::TModelPtr modelPtrNoSkip; makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, gathererNoSkip, modelPtrNoSkip, 1); - CEventRateModel *modelNoSkip = dynamic_cast(modelPtrNoSkip.get()); + CEventRateModel* modelNoSkip = dynamic_cast(modelPtrNoSkip.get()); // Model with the skip sampling rule SModelParams paramsWithRules(bucketLength); @@ -3039,13 +2650,12 @@ void CEventRateModelTest::testIgnoreSamplingGivenDetectionRules() CModelFactory::TDataGathererPtr gathererWithSkip; CAnomalyDetectorModel::TModelPtr modelPtrWithSkip; makeModel(factoryWithSkip, features, m_ResourceMonitor, startTime, bucketLength, gathererWithSkip, modelPtrWithSkip, 1); - CEventRateModel *modelWithSkip = dynamic_cast(modelPtrWithSkip.get()); + CEventRateModel* modelWithSkip = dynamic_cast(modelPtrWithSkip.get()); std::size_t endTime = startTime + bucketLength; // Add a bucket to both models - for (int i = 0; i < 66; ++i) - { + for (int i = 0; i < 66; ++i) { addArrival(*gathererNoSkip, m_ResourceMonitor, startTime, "p1"); addArrival(*gathererWithSkip, m_ResourceMonitor, startTime, "p1"); } @@ -3056,8 +2666,7 @@ void CEventRateModelTest::testIgnoreSamplingGivenDetectionRules() CPPUNIT_ASSERT_EQUAL(modelWithSkip->checksum(), modelNoSkip->checksum()); // Add a bucket to both models - for (int i = 0; i < 55; ++i) - { + for (int i = 0; i < 55; ++i) { addArrival(*gathererNoSkip, m_ResourceMonitor, startTime, "p1"); addArrival(*gathererWithSkip, m_ResourceMonitor, startTime, "p1"); } @@ -3068,21 +2677,18 @@ void CEventRateModelTest::testIgnoreSamplingGivenDetectionRules() CPPUNIT_ASSERT_EQUAL(modelWithSkip->checksum(), modelNoSkip->checksum()); // this sample will be skipped by the detection rule - for (int i = 0; i < 110; ++i) - { + for (int i = 0; i < 110; ++i) { addArrival(*gathererWithSkip, m_ResourceMonitor, startTime, "p1"); } modelWithSkip->sample(startTime, endTime, m_ResourceMonitor); - startTime = endTime; endTime += bucketLength; // Wind the other model forward modelNoSkip->skipSampling(startTime); - for (int i = 0; i < 55; ++i) - { + for (int i = 0; i < 55; ++i) { addArrival(*gathererNoSkip, m_ResourceMonitor, startTime, "p1"); addArrival(*gathererWithSkip, m_ResourceMonitor, startTime, "p1"); } @@ -3096,14 +2702,18 @@ void CEventRateModelTest::testIgnoreSamplingGivenDetectionRules() CAnomalyDetectorModel::CModelDetailsViewPtr modelWithSkipView = modelWithSkip->details(); CAnomalyDetectorModel::CModelDetailsViewPtr modelNoSkipView = modelNoSkip->details(); - uint64_t withSkipChecksum = static_cast( - modelWithSkipView->model(model_t::E_IndividualCountByBucketAndPerson, 0))->residualModel().checksum(); - uint64_t noSkipChecksum = static_cast( - modelNoSkipView->model(model_t::E_IndividualCountByBucketAndPerson, 0))->residualModel().checksum(); + uint64_t withSkipChecksum = + static_cast(modelWithSkipView->model(model_t::E_IndividualCountByBucketAndPerson, 0)) + ->residualModel() + .checksum(); + uint64_t noSkipChecksum = + static_cast(modelNoSkipView->model(model_t::E_IndividualCountByBucketAndPerson, 0)) + ->residualModel() + .checksum(); CPPUNIT_ASSERT_EQUAL(withSkipChecksum, noSkipChecksum); // Check the last value times of the underlying models are the same - const maths::CUnivariateTimeSeriesModel *timeSeriesModel = + const maths::CUnivariateTimeSeriesModel* timeSeriesModel = dynamic_cast(modelNoSkipView->model(model_t::E_IndividualCountByBucketAndPerson, 0)); CPPUNIT_ASSERT(timeSeriesModel != 0); @@ -3111,80 +2721,58 @@ void CEventRateModelTest::testIgnoreSamplingGivenDetectionRules() CPPUNIT_ASSERT_EQUAL(model_t::sampleTime(model_t::E_IndividualCountByBucketAndPerson, startTime, bucketLength), time); // The last times of model with a skip should be the same - timeSeriesModel = dynamic_cast(modelWithSkipView->model(model_t::E_IndividualCountByBucketAndPerson, 0)); + timeSeriesModel = + dynamic_cast(modelWithSkipView->model(model_t::E_IndividualCountByBucketAndPerson, 0)); CPPUNIT_ASSERT_EQUAL(time, timeSeriesModel->trendModel().lastValueTime()); } -CppUnit::Test *CEventRateModelTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CEventRateModelTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRateModelTest::testOnlineCountSample", - &CEventRateModelTest::testOnlineCountSample) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRateModelTest::testOnlineNonZeroCountSample", - &CEventRateModelTest::testOnlineNonZeroCountSample) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRateModelTest::testOnlineRare", - &CEventRateModelTest::testOnlineRare) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRateModelTest::testOnlineProbabilityCalculation", - &CEventRateModelTest::testOnlineProbabilityCalculation) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRateModelTest::testOnlineProbabilityCalculationForLowNonZeroCount", - &CEventRateModelTest::testOnlineProbabilityCalculationForLowNonZeroCount) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRateModelTest::testOnlineProbabilityCalculationForHighNonZeroCount", - &CEventRateModelTest::testOnlineProbabilityCalculationForHighNonZeroCount) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRateModelTest::testOnlineCorrelatedNoTrend", - &CEventRateModelTest::testOnlineCorrelatedNoTrend) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRateModelTest::testOnlineCorrelatedTrend", - &CEventRateModelTest::testOnlineCorrelatedTrend) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRateModelTest::testPrune", - &CEventRateModelTest::testPrune) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRateModelTest::testKey", - &CEventRateModelTest::testKey) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRateModelTest::testModelsWithValueFields", - &CEventRateModelTest::testModelsWithValueFields) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRateModelTest::testCountProbabilityCalculationWithInfluence", - &CEventRateModelTest::testCountProbabilityCalculationWithInfluence) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRateModelTest::testDistinctCountProbabilityCalculationWithInfluence", - &CEventRateModelTest::testDistinctCountProbabilityCalculationWithInfluence) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRateModelTest::testOnlineRareWithInfluence", - &CEventRateModelTest::testOnlineRareWithInfluence) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRateModelTest::testSkipSampling", - &CEventRateModelTest::testSkipSampling) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRateModelTest::testExplicitNulls", - &CEventRateModelTest::testExplicitNulls) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRateModelTest::testInterimCorrections", - &CEventRateModelTest::testInterimCorrections) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRateModelTest::testInterimCorrectionsWithCorrelations", - &CEventRateModelTest::testInterimCorrectionsWithCorrelations) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRateModelTest::testSummaryCountZeroRecordsAreIgnored", - &CEventRateModelTest::testSummaryCountZeroRecordsAreIgnored) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRateModelTest::testComputeProbabilityGivenDetectionRule", - &CEventRateModelTest::testComputeProbabilityGivenDetectionRule) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRateModelTest::testDecayRateControl", - &CEventRateModelTest::testDecayRateControl) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRateModelTest::testIgnoreSamplingGivenDetectionRules", - &CEventRateModelTest::testIgnoreSamplingGivenDetectionRules) ); +CppUnit::Test* CEventRateModelTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CEventRateModelTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CEventRateModelTest::testOnlineCountSample", + &CEventRateModelTest::testOnlineCountSample)); + suiteOfTests->addTest(new CppUnit::TestCaller("CEventRateModelTest::testOnlineNonZeroCountSample", + &CEventRateModelTest::testOnlineNonZeroCountSample)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CEventRateModelTest::testOnlineRare", &CEventRateModelTest::testOnlineRare)); + suiteOfTests->addTest(new CppUnit::TestCaller("CEventRateModelTest::testOnlineProbabilityCalculation", + &CEventRateModelTest::testOnlineProbabilityCalculation)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CEventRateModelTest::testOnlineProbabilityCalculationForLowNonZeroCount", + &CEventRateModelTest::testOnlineProbabilityCalculationForLowNonZeroCount)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CEventRateModelTest::testOnlineProbabilityCalculationForHighNonZeroCount", + &CEventRateModelTest::testOnlineProbabilityCalculationForHighNonZeroCount)); + suiteOfTests->addTest(new CppUnit::TestCaller("CEventRateModelTest::testOnlineCorrelatedNoTrend", + &CEventRateModelTest::testOnlineCorrelatedNoTrend)); + suiteOfTests->addTest(new CppUnit::TestCaller("CEventRateModelTest::testOnlineCorrelatedTrend", + &CEventRateModelTest::testOnlineCorrelatedTrend)); + suiteOfTests->addTest(new CppUnit::TestCaller("CEventRateModelTest::testPrune", &CEventRateModelTest::testPrune)); + suiteOfTests->addTest(new CppUnit::TestCaller("CEventRateModelTest::testKey", &CEventRateModelTest::testKey)); + suiteOfTests->addTest(new CppUnit::TestCaller("CEventRateModelTest::testModelsWithValueFields", + &CEventRateModelTest::testModelsWithValueFields)); + suiteOfTests->addTest(new CppUnit::TestCaller("CEventRateModelTest::testCountProbabilityCalculationWithInfluence", + &CEventRateModelTest::testCountProbabilityCalculationWithInfluence)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CEventRateModelTest::testDistinctCountProbabilityCalculationWithInfluence", + &CEventRateModelTest::testDistinctCountProbabilityCalculationWithInfluence)); + suiteOfTests->addTest(new CppUnit::TestCaller("CEventRateModelTest::testOnlineRareWithInfluence", + &CEventRateModelTest::testOnlineRareWithInfluence)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CEventRateModelTest::testSkipSampling", &CEventRateModelTest::testSkipSampling)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CEventRateModelTest::testExplicitNulls", &CEventRateModelTest::testExplicitNulls)); + suiteOfTests->addTest(new CppUnit::TestCaller("CEventRateModelTest::testInterimCorrections", + &CEventRateModelTest::testInterimCorrections)); + suiteOfTests->addTest(new CppUnit::TestCaller("CEventRateModelTest::testInterimCorrectionsWithCorrelations", + &CEventRateModelTest::testInterimCorrectionsWithCorrelations)); + suiteOfTests->addTest(new CppUnit::TestCaller("CEventRateModelTest::testSummaryCountZeroRecordsAreIgnored", + &CEventRateModelTest::testSummaryCountZeroRecordsAreIgnored)); + suiteOfTests->addTest(new CppUnit::TestCaller("CEventRateModelTest::testComputeProbabilityGivenDetectionRule", + &CEventRateModelTest::testComputeProbabilityGivenDetectionRule)); + suiteOfTests->addTest(new CppUnit::TestCaller("CEventRateModelTest::testDecayRateControl", + &CEventRateModelTest::testDecayRateControl)); + suiteOfTests->addTest(new CppUnit::TestCaller("CEventRateModelTest::testIgnoreSamplingGivenDetectionRules", + &CEventRateModelTest::testIgnoreSamplingGivenDetectionRules)); return suiteOfTests; } - diff --git a/lib/model/unittest/CEventRateModelTest.h b/lib/model/unittest/CEventRateModelTest.h index 1734779b64..c458370666 100644 --- a/lib/model/unittest/CEventRateModelTest.h +++ b/lib/model/unittest/CEventRateModelTest.h @@ -11,36 +11,34 @@ #include -class CEventRateModelTest : public CppUnit::TestFixture -{ - public: - void testOnlineCountSample(); - void testOnlineNonZeroCountSample(); - void testOnlineRare(); - void testOnlineProbabilityCalculation(); - void testOnlineProbabilityCalculationForLowNonZeroCount(); - void testOnlineProbabilityCalculationForHighNonZeroCount(); - void testOnlineCorrelatedNoTrend(); - void testOnlineCorrelatedTrend(); - void testPrune(); - void testKey(); - void testModelsWithValueFields(); - void testCountProbabilityCalculationWithInfluence(); - void testDistinctCountProbabilityCalculationWithInfluence(); - void testOnlineRareWithInfluence(); - void testSkipSampling(); - void testExplicitNulls(); - void testInterimCorrections(); - void testInterimCorrectionsWithCorrelations(); - void testSummaryCountZeroRecordsAreIgnored(); - void testComputeProbabilityGivenDetectionRule(); - void testDecayRateControl(); - void testIgnoreSamplingGivenDetectionRules(); - static CppUnit::Test *suite(); +class CEventRateModelTest : public CppUnit::TestFixture { +public: + void testOnlineCountSample(); + void testOnlineNonZeroCountSample(); + void testOnlineRare(); + void testOnlineProbabilityCalculation(); + void testOnlineProbabilityCalculationForLowNonZeroCount(); + void testOnlineProbabilityCalculationForHighNonZeroCount(); + void testOnlineCorrelatedNoTrend(); + void testOnlineCorrelatedTrend(); + void testPrune(); + void testKey(); + void testModelsWithValueFields(); + void testCountProbabilityCalculationWithInfluence(); + void testDistinctCountProbabilityCalculationWithInfluence(); + void testOnlineRareWithInfluence(); + void testSkipSampling(); + void testExplicitNulls(); + void testInterimCorrections(); + void testInterimCorrectionsWithCorrelations(); + void testSummaryCountZeroRecordsAreIgnored(); + void testComputeProbabilityGivenDetectionRule(); + void testDecayRateControl(); + void testIgnoreSamplingGivenDetectionRules(); + static CppUnit::Test* suite(); - private: - ml::model::CResourceMonitor m_ResourceMonitor; +private: + ml::model::CResourceMonitor m_ResourceMonitor; }; #endif // INCLUDED_CEventRateModelTest_h - diff --git a/lib/model/unittest/CEventRatePopulationDataGathererTest.cc b/lib/model/unittest/CEventRatePopulationDataGathererTest.cc index 73d66890d1..a087930c3f 100644 --- a/lib/model/unittest/CEventRatePopulationDataGathererTest.cc +++ b/lib/model/unittest/CEventRatePopulationDataGathererTest.cc @@ -15,9 +15,9 @@ #include #include -#include #include #include +#include #include #include @@ -33,24 +33,13 @@ using namespace ml; using namespace model; -namespace -{ - -struct SMessage -{ - SMessage(core_t::TTime time, - const std::string &attribute, - const std::string &person) : - s_Time(time), - s_Attribute(attribute), - s_Person(person) - { - } +namespace { - bool operator<(const SMessage &other) const - { - return s_Time < other.s_Time; - } +struct SMessage { + SMessage(core_t::TTime time, const std::string& attribute, const std::string& person) + : s_Time(time), s_Attribute(attribute), s_Person(person) {} + + bool operator<(const SMessage& other) const { return s_Time < other.s_Time; } core_t::TTime s_Time; std::string s_Attribute; @@ -77,33 +66,25 @@ using TSizeSizePrFeatureDataPrVec = std::vector; using TFeatureSizeSizePrFeatureDataPrVecPr = std::pair; using TFeatureSizeSizePrFeatureDataPrVecPrVec = std::vector; -TStrVec allCategories() -{ +TStrVec allCategories() { const std::size_t numberCategories = 30u; TStrVec categories; - for (std::size_t i = 0; i < numberCategories; ++i) - { + for (std::size_t i = 0; i < numberCategories; ++i) { categories.push_back("c" + boost::lexical_cast(i)); } return categories; } -TStrVec allPeople() -{ +TStrVec allPeople() { const std::size_t numberPeople = 5u; TStrVec people; - for (std::size_t i = 0u; i < numberPeople; ++i) - { + for (std::size_t i = 0u; i < numberPeople; ++i) { people.push_back("p" + boost::lexical_cast(i)); } return people; } -void generateTestMessages(test::CRandomNumbers &rng, - core_t::TTime time, - core_t::TTime bucketLength, - TMessageVec &messages) -{ +void generateTestMessages(test::CRandomNumbers& rng, core_t::TTime time, core_t::TTime bucketLength, TMessageVec& messages) { using TUIntVec = std::vector; using TDoubleVec = std::vector; @@ -112,11 +93,10 @@ void generateTestMessages(test::CRandomNumbers &rng, TStrVec categories = allCategories(); TStrVec people = allPeople(); - const double rates[] = { 1.0, 0.3, 10.1, 25.0, 105.0 }; + const double rates[] = {1.0, 0.3, 10.1, 25.0, 105.0}; TSizeVec bucketCounts; - for (std::size_t j = 0u; j < categories.size(); ++j) - { + for (std::size_t j = 0u; j < categories.size(); ++j) { double rate = rates[j % boost::size(rates)]; TUIntVec sample; rng.generatePoissonSamples(rate, 1u, sample); @@ -124,34 +104,23 @@ void generateTestMessages(test::CRandomNumbers &rng, } TDoubleVec personRange; - rng.generateUniformSamples(0.0, - static_cast(people.size()) - 1e-3, - 2u, - personRange); + rng.generateUniformSamples(0.0, static_cast(people.size()) - 1e-3, 2u, personRange); std::sort(personRange.begin(), personRange.end()); std::size_t a = static_cast(personRange[0]); std::size_t b = static_cast(personRange[1]) + 1; TSizeVec bucketPeople; - for (std::size_t i = a; i < b; ++i) - { + for (std::size_t i = a; i < b; ++i) { bucketPeople.push_back(i); } LOG_DEBUG("bucketPeople = " << core::CContainerPrinter::print(bucketPeople)); - for (std::size_t i = 0u; i < categories.size(); ++i) - { + for (std::size_t i = 0u; i < categories.size(); ++i) { TDoubleVec offsets; - rng.generateUniformSamples(0.0, - static_cast(bucketLength) - 1.0, - bucketCounts[i], - offsets); + rng.generateUniformSamples(0.0, static_cast(bucketLength) - 1.0, bucketCounts[i], offsets); - for (std::size_t j = 0u; j < offsets.size(); ++j) - { - messages.push_back(SMessage( - time + static_cast(offsets[j]), - categories[i], - people[bucketPeople[j % bucketPeople.size()]])); + for (std::size_t j = 0u; j < offsets.size(); ++j) { + messages.push_back( + SMessage(time + static_cast(offsets[j]), categories[i], people[bucketPeople[j % bucketPeople.size()]])); } } @@ -159,14 +128,9 @@ void generateTestMessages(test::CRandomNumbers &rng, LOG_DEBUG("Generated " << messages.size() << " messages"); } -const TSizeSizePrFeatureDataPrVec & -extract(const TFeatureSizeSizePrFeatureDataPrVecPrVec &featureData, - model_t::EFeature feature) -{ - for (std::size_t i = 0u; i < featureData.size(); ++i) - { - if (featureData[i].first == feature) - { +const TSizeSizePrFeatureDataPrVec& extract(const TFeatureSizeSizePrFeatureDataPrVecPrVec& featureData, model_t::EFeature feature) { + for (std::size_t i = 0u; i < featureData.size(); ++i) { + if (featureData[i].first == feature) { return featureData[i].second; } } @@ -175,42 +139,28 @@ extract(const TFeatureSizeSizePrFeatureDataPrVecPrVec &featureData, return EMPTY; } -const TSizeSizePrFeatureDataPrVec & -extractPeoplePerAttribute(TFeatureSizeSizePrFeatureDataPrVecPrVec &featureData) -{ +const TSizeSizePrFeatureDataPrVec& extractPeoplePerAttribute(TFeatureSizeSizePrFeatureDataPrVecPrVec& featureData) { return extract(featureData, model_t::E_PopulationUniquePersonCountByAttribute); } -const TSizeSizePrFeatureDataPrVec & -extractNonZeroAttributeCounts(TFeatureSizeSizePrFeatureDataPrVecPrVec &featureData) -{ +const TSizeSizePrFeatureDataPrVec& extractNonZeroAttributeCounts(TFeatureSizeSizePrFeatureDataPrVecPrVec& featureData) { return extract(featureData, model_t::E_PopulationCountByBucketPersonAndAttribute); } -const TSizeSizePrFeatureDataPrVec & -extractAttributeIndicator(TFeatureSizeSizePrFeatureDataPrVecPrVec &featureData) -{ +const TSizeSizePrFeatureDataPrVec& extractAttributeIndicator(TFeatureSizeSizePrFeatureDataPrVecPrVec& featureData) { return extract(featureData, model_t::E_PopulationIndicatorOfBucketPersonAndAttribute); } -const TSizeSizePrFeatureDataPrVec & -extractBucketAttributesPerPerson(TFeatureSizeSizePrFeatureDataPrVecPrVec &featureData) -{ +const TSizeSizePrFeatureDataPrVec& extractBucketAttributesPerPerson(TFeatureSizeSizePrFeatureDataPrVecPrVec& featureData) { return extract(featureData, model_t::E_PopulationUniqueCountByBucketPersonAndAttribute); } -const TSizeSizePrFeatureDataPrVec & -extractCompressedLengthPerPerson(TFeatureSizeSizePrFeatureDataPrVecPrVec &featureData) -{ +const TSizeSizePrFeatureDataPrVec& extractCompressedLengthPerPerson(TFeatureSizeSizePrFeatureDataPrVecPrVec& featureData) { return extract(featureData, model_t::E_PopulationInfoContentByBucketPersonAndAttribute); } -CEventData addArrival(core_t::TTime time, - const std::string &p, - const std::string &a, - CDataGatherer &gatherer, - CResourceMonitor &resourceMonitor) -{ +CEventData +addArrival(core_t::TTime time, const std::string& p, const std::string& a, CDataGatherer& gatherer, CResourceMonitor& resourceMonitor) { CDataGatherer::TStrCPtrVec fields; fields.push_back(&p); fields.push_back(&a); @@ -223,12 +173,11 @@ CEventData addArrival(core_t::TTime time, } CEventData addArrival(core_t::TTime time, - const std::string &p, - const std::string &a, - const std::string &v, - CDataGatherer &gatherer, - CResourceMonitor &resourceMonitor) -{ + const std::string& p, + const std::string& a, + const std::string& v, + CDataGatherer& gatherer, + CResourceMonitor& resourceMonitor) { CDataGatherer::TStrCPtrVec fields; fields.push_back(&p); fields.push_back(&a); @@ -243,11 +192,9 @@ CEventData addArrival(core_t::TTime time, CSearchKey searchKey; const std::string EMPTY_STRING; - } -void CEventRatePopulationDataGathererTest::testAttributeCounts() -{ +void CEventRatePopulationDataGathererTest::testAttributeCounts() { LOG_DEBUG("*** CEventRatePopulationDataGathererTest::testAttributeCounts ***"); // We check that we correctly sample the unique people per @@ -268,7 +215,12 @@ void CEventRatePopulationDataGathererTest::testAttributeCounts() CDataGatherer dataGatherer(model_t::E_PopulationEventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, TStrVec(), false, searchKey, @@ -286,19 +238,13 @@ void CEventRatePopulationDataGathererTest::testAttributeCounts() std::size_t personOrder = 0u; TStrSizeMap expectedPeopleOrder; core_t::TTime time = startTime; - for (std::size_t i = 0u; i < numberBuckets; ++i, time += bucketLength) - { + for (std::size_t i = 0u; i < numberBuckets; ++i, time += bucketLength) { TMessageVec messages; generateTestMessages(rng, time, bucketLength, messages); TSizeSizePrUInt64Map expectedAttributeCounts; - for (std::size_t j = 0u; j < messages.size(); ++j) - { - addArrival(messages[j].s_Time, - messages[j].s_Person, - messages[j].s_Attribute, - dataGatherer, - m_ResourceMonitor); + for (std::size_t j = 0u; j < messages.size(); ++j) { + addArrival(messages[j].s_Time, messages[j].s_Person, messages[j].s_Attribute, dataGatherer, m_ResourceMonitor); std::size_t cid; dataGatherer.attributeId(messages[j].s_Attribute, cid); @@ -308,16 +254,10 @@ void CEventRatePopulationDataGathererTest::testAttributeCounts() ++expectedAttributeCounts[std::make_pair(pid, cid)]; expectedAttributePeople[cid].insert(pid); - if (expectedAttributeOrder.insert( - TStrSizeMap::value_type(messages[j].s_Attribute, - attributeOrder)).second) - { + if (expectedAttributeOrder.insert(TStrSizeMap::value_type(messages[j].s_Attribute, attributeOrder)).second) { ++attributeOrder; } - if (expectedPeopleOrder.insert( - TStrSizeMap::value_type(messages[j].s_Person, - personOrder)).second) - { + if (expectedPeopleOrder.insert(TStrSizeMap::value_type(messages[j].s_Person, personOrder)).second) { ++personOrder; } } @@ -328,30 +268,23 @@ void CEventRatePopulationDataGathererTest::testAttributeCounts() TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; dataGatherer.featureData(time, bucketLength, featureData); - const TSizeSizePrFeatureDataPrVec &peoplePerAttribute = - extractPeoplePerAttribute(featureData); + const TSizeSizePrFeatureDataPrVec& peoplePerAttribute = extractPeoplePerAttribute(featureData); CPPUNIT_ASSERT_EQUAL(expectedAttributePeople.size(), peoplePerAttribute.size()); TSizeSizePrFeatureDataPrVec expectedPeoplePerAttribute; - for (std::size_t j = 0u; j < peoplePerAttribute.size(); ++j) - { - expectedPeoplePerAttribute.push_back( - TSizeSizePrFeatureDataPr(std::make_pair(size_t(0), j), - expectedAttributePeople[j].size())); + for (std::size_t j = 0u; j < peoplePerAttribute.size(); ++j) { + expectedPeoplePerAttribute.push_back(TSizeSizePrFeatureDataPr(std::make_pair(size_t(0), j), expectedAttributePeople[j].size())); } CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedPeoplePerAttribute), core::CContainerPrinter::print(peoplePerAttribute)); - const TSizeSizePrFeatureDataPrVec &personAttributeCounts = - extractNonZeroAttributeCounts(featureData); + const TSizeSizePrFeatureDataPrVec& personAttributeCounts = extractNonZeroAttributeCounts(featureData); CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedAttributeCounts), core::CContainerPrinter::print(personAttributeCounts)); - const TSizeSizePrFeatureDataPrVec &attributeIndicator = - extractAttributeIndicator(featureData); + const TSizeSizePrFeatureDataPrVec& attributeIndicator = extractAttributeIndicator(featureData); CPPUNIT_ASSERT(attributeIndicator.empty()); - const TSizeSizePrFeatureDataPrVec &bucketAttributesPerPerson = - extractBucketAttributesPerPerson(featureData); + const TSizeSizePrFeatureDataPrVec& bucketAttributesPerPerson = extractBucketAttributesPerPerson(featureData); CPPUNIT_ASSERT(bucketAttributesPerPerson.empty()); dataGatherer.timeNow(time + bucketLength); @@ -359,8 +292,7 @@ void CEventRatePopulationDataGathererTest::testAttributeCounts() TStrVec categories = allCategories(); TSizeVec attributeIds; - for (std::size_t i = 0u; i < categories.size(); ++i) - { + for (std::size_t i = 0u; i < categories.size(); ++i) { std::size_t cid; CPPUNIT_ASSERT(dataGatherer.attributeId(categories[i], cid)); attributeIds.push_back(cid); @@ -371,8 +303,7 @@ void CEventRatePopulationDataGathererTest::testAttributeCounts() TStrVec people = allPeople(); TSizeVec peopleIds; - for (std::size_t i = 0u; i < people.size(); ++i) - { + for (std::size_t i = 0u; i < people.size(); ++i) { std::size_t pid; CPPUNIT_ASSERT(dataGatherer.personId(people[i], pid)); peopleIds.push_back(pid); @@ -382,8 +313,7 @@ void CEventRatePopulationDataGathererTest::testAttributeCounts() LOG_DEBUG("expected people ids = " << core::CContainerPrinter::print(expectedPeopleOrder)); } -void CEventRatePopulationDataGathererTest::testAttributeIndicator() -{ +void CEventRatePopulationDataGathererTest::testAttributeIndicator() { LOG_DEBUG("*** CEventRatePopulationDataGathererTest::testAttributeIndicator ***"); // We check that we correctly sample the (attribute, person) @@ -401,7 +331,12 @@ void CEventRatePopulationDataGathererTest::testAttributeIndicator() CDataGatherer dataGatherer(model_t::E_PopulationEventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, TStrVec(), false, searchKey, @@ -410,19 +345,13 @@ void CEventRatePopulationDataGathererTest::testAttributeIndicator() 0); core_t::TTime time = startTime; - for (std::size_t i = 0u; i < numberBuckets; ++i, time += bucketLength) - { + for (std::size_t i = 0u; i < numberBuckets; ++i, time += bucketLength) { TMessageVec messages; generateTestMessages(rng, time, bucketLength, messages); TSizeSizePrUInt64Map expectedAttributeIndicator; - for (std::size_t j = 0u; j < messages.size(); ++j) - { - addArrival(messages[j].s_Time, - messages[j].s_Person, - messages[j].s_Attribute, - dataGatherer, - m_ResourceMonitor); + for (std::size_t j = 0u; j < messages.size(); ++j) { + addArrival(messages[j].s_Time, messages[j].s_Person, messages[j].s_Attribute, dataGatherer, m_ResourceMonitor); std::size_t cid; dataGatherer.attributeId(messages[j].s_Attribute, cid); @@ -439,25 +368,21 @@ void CEventRatePopulationDataGathererTest::testAttributeIndicator() TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; dataGatherer.featureData(time, bucketLength, featureData); - const TSizeSizePrFeatureDataPrVec &peoplePerAttribute = - extractPeoplePerAttribute(featureData); + const TSizeSizePrFeatureDataPrVec& peoplePerAttribute = extractPeoplePerAttribute(featureData); CPPUNIT_ASSERT(peoplePerAttribute.empty()); - const TSizeSizePrFeatureDataPrVec &attributeIndicator = - extractAttributeIndicator(featureData); + const TSizeSizePrFeatureDataPrVec& attributeIndicator = extractAttributeIndicator(featureData); CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedAttributeIndicator), core::CContainerPrinter::print(attributeIndicator)); - const TSizeSizePrFeatureDataPrVec &bucketAttributesPerPerson = - extractBucketAttributesPerPerson(featureData); + const TSizeSizePrFeatureDataPrVec& bucketAttributesPerPerson = extractBucketAttributesPerPerson(featureData); CPPUNIT_ASSERT(bucketAttributesPerPerson.empty()); dataGatherer.timeNow(time + bucketLength); } } -void CEventRatePopulationDataGathererTest::testUniqueValueCounts() -{ +void CEventRatePopulationDataGathererTest::testUniqueValueCounts() { LOG_DEBUG("*** CEventRatePopulationDataGathererTest::testUniqueAttributeCounts ***"); // We check that we correctly sample the unique counts @@ -475,7 +400,12 @@ void CEventRatePopulationDataGathererTest::testUniqueValueCounts() CDataGatherer dataGatherer(model_t::E_PopulationEventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, "value", + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + "value", TStrVec(), false, searchKey, @@ -484,25 +414,19 @@ void CEventRatePopulationDataGathererTest::testUniqueValueCounts() 0); core_t::TTime time = startTime; - for (std::size_t i = 0u; i < numberBuckets; ++i, time += bucketLength) - { + for (std::size_t i = 0u; i < numberBuckets; ++i, time += bucketLength) { TMessageVec messages; generateTestMessages(rng, time, bucketLength, messages); TSizeSizePrUInt64Map expectedUniqueCounts; TSizeSizeSetMap bucketPeopleCategories; - for (std::size_t j = 0u; j < messages.size(); ++j) - { + for (std::size_t j = 0u; j < messages.size(); ++j) { std::ostringstream ss; - ss << "thing" << "_" << time << "_" << i; + ss << "thing" + << "_" << time << "_" << i; std::string value(ss.str()); - addArrival(messages[j].s_Time, - messages[j].s_Person, - messages[j].s_Attribute, - value, - dataGatherer, - m_ResourceMonitor); + addArrival(messages[j].s_Time, messages[j].s_Person, messages[j].s_Attribute, value, dataGatherer, m_ResourceMonitor); std::size_t cid; dataGatherer.attributeId(messages[j].s_Attribute, cid); @@ -521,16 +445,13 @@ void CEventRatePopulationDataGathererTest::testUniqueValueCounts() TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; dataGatherer.featureData(time, bucketLength, featureData); - const TSizeSizePrFeatureDataPrVec &peoplePerAttribute = - extractPeoplePerAttribute(featureData); + const TSizeSizePrFeatureDataPrVec& peoplePerAttribute = extractPeoplePerAttribute(featureData); CPPUNIT_ASSERT(peoplePerAttribute.empty()); - const TSizeSizePrFeatureDataPrVec &attributeIndicator = - extractAttributeIndicator(featureData); + const TSizeSizePrFeatureDataPrVec& attributeIndicator = extractAttributeIndicator(featureData); CPPUNIT_ASSERT(attributeIndicator.empty()); - const TSizeSizePrFeatureDataPrVec &bucketAttributesPerPerson = - extractBucketAttributesPerPerson(featureData); + const TSizeSizePrFeatureDataPrVec& bucketAttributesPerPerson = extractBucketAttributesPerPerson(featureData); CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedUniqueCounts), core::CContainerPrinter::print(bucketAttributesPerPerson)); @@ -539,8 +460,7 @@ void CEventRatePopulationDataGathererTest::testUniqueValueCounts() } } -void CEventRatePopulationDataGathererTest::testCompressedLength() -{ +void CEventRatePopulationDataGathererTest::testCompressedLength() { LOG_DEBUG("*** CEventRatePopulationDataGathererTest::testCompressedLength ***"); // We check that we correctly sample the compressed length of unique @@ -558,7 +478,12 @@ void CEventRatePopulationDataGathererTest::testCompressedLength() CDataGatherer dataGatherer(model_t::E_PopulationEventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, "value", + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + "value", TStrVec(), false, searchKey, @@ -567,20 +492,13 @@ void CEventRatePopulationDataGathererTest::testCompressedLength() 0); core_t::TTime time = startTime; - for (std::size_t i = 0u; i < numberBuckets; ++i, time += bucketLength) - { + for (std::size_t i = 0u; i < numberBuckets; ++i, time += bucketLength) { TMessageVec messages; generateTestMessages(rng, time, bucketLength, messages); TSizeStrSetMap bucketPeopleCategories; - for (std::size_t j = 0u; j < messages.size(); ++j) - { - addArrival(messages[j].s_Time, - messages[j].s_Person, - "attribute", - messages[j].s_Attribute, - dataGatherer, - m_ResourceMonitor); + for (std::size_t j = 0u; j < messages.size(); ++j) { + addArrival(messages[j].s_Time, messages[j].s_Person, "attribute", messages[j].s_Attribute, dataGatherer, m_ResourceMonitor); std::size_t cid; dataGatherer.attributeId(messages[j].s_Attribute, cid); @@ -597,45 +515,35 @@ void CEventRatePopulationDataGathererTest::testCompressedLength() TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; dataGatherer.featureData(time, bucketLength, featureData); - const TSizeSizePrFeatureDataPrVec &peoplePerAttribute = - extractPeoplePerAttribute(featureData); + const TSizeSizePrFeatureDataPrVec& peoplePerAttribute = extractPeoplePerAttribute(featureData); CPPUNIT_ASSERT(peoplePerAttribute.empty()); - const TSizeSizePrFeatureDataPrVec &attributeIndicator = - extractAttributeIndicator(featureData); + const TSizeSizePrFeatureDataPrVec& attributeIndicator = extractAttributeIndicator(featureData); CPPUNIT_ASSERT(attributeIndicator.empty()); - const TSizeSizePrFeatureDataPrVec &bucketCompressedLengthPerPerson = - extractCompressedLengthPerPerson(featureData); + const TSizeSizePrFeatureDataPrVec& bucketCompressedLengthPerPerson = extractCompressedLengthPerPerson(featureData); CPPUNIT_ASSERT_EQUAL(bucketPeopleCategories.size(), bucketCompressedLengthPerPerson.size()); TSizeSizePrUInt64Map expectedBucketCompressedLengthPerPerson; - for (TSizeStrSetMapItr iter = bucketPeopleCategories.begin(); - iter != bucketPeopleCategories.end(); - ++iter) - { + for (TSizeStrSetMapItr iter = bucketPeopleCategories.begin(); iter != bucketPeopleCategories.end(); ++iter) { TSizeSizePr key(iter->first, 0); - const TStrSet &uniqueValues = iter->second; + const TStrSet& uniqueValues = iter->second; core::CCompressUtils compressor(false); - CPPUNIT_ASSERT_EQUAL(uniqueValues.size(), - static_cast(std::count_if(uniqueValues.begin(), - uniqueValues.end(), - boost::bind(&core::CCompressUtils::addString, - &compressor, - _1)))); + CPPUNIT_ASSERT_EQUAL( + uniqueValues.size(), + static_cast(std::count_if( + uniqueValues.begin(), uniqueValues.end(), boost::bind(&core::CCompressUtils::addString, &compressor, _1)))); size_t length(0); CPPUNIT_ASSERT(compressor.compressedLength(true, length)); expectedBucketCompressedLengthPerPerson[key] = length; } - LOG_DEBUG("Time " << time << " bucketCompressedLengthPerPerson " << - core::CContainerPrinter::print(bucketCompressedLengthPerPerson)); - CPPUNIT_ASSERT_EQUAL(expectedBucketCompressedLengthPerPerson.size(), - bucketCompressedLengthPerPerson.size()); - for (TSizeSizePrFeatureDataPrVec::const_iterator j = - bucketCompressedLengthPerPerson.begin(); j != - bucketCompressedLengthPerPerson.end(); ++j) - { + LOG_DEBUG("Time " << time << " bucketCompressedLengthPerPerson " + << core::CContainerPrinter::print(bucketCompressedLengthPerPerson)); + CPPUNIT_ASSERT_EQUAL(expectedBucketCompressedLengthPerPerson.size(), bucketCompressedLengthPerPerson.size()); + for (TSizeSizePrFeatureDataPrVec::const_iterator j = bucketCompressedLengthPerPerson.begin(); + j != bucketCompressedLengthPerPerson.end(); + ++j) { double expectedLength = expectedBucketCompressedLengthPerPerson[j->first]; double actual = j->second.s_Count; CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedLength, actual, expectedLength * 0.1); @@ -645,8 +553,7 @@ void CEventRatePopulationDataGathererTest::testCompressedLength() } } -void CEventRatePopulationDataGathererTest::testRemovePeople() -{ +void CEventRatePopulationDataGathererTest::testRemovePeople() { LOG_DEBUG("*** CEventRatePopulationDataGathererTest::testRemovePeople ***"); using TStrSizeMap = std::map; @@ -665,7 +572,12 @@ void CEventRatePopulationDataGathererTest::testRemovePeople() CDataGatherer gatherer(model_t::E_PopulationEventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, TStrVec(), false, searchKey, @@ -673,17 +585,11 @@ void CEventRatePopulationDataGathererTest::testRemovePeople() startTime, 0); core_t::TTime bucketStart = startTime; - for (std::size_t i = 0u; i < numberBuckets; ++i, bucketStart += bucketLength) - { + for (std::size_t i = 0u; i < numberBuckets; ++i, bucketStart += bucketLength) { TMessageVec messages; generateTestMessages(rng, bucketStart, bucketLength, messages); - for (std::size_t j = 0u; j < messages.size(); ++j) - { - addArrival(messages[j].s_Time, - messages[j].s_Person, - messages[j].s_Attribute, - gatherer, - m_ResourceMonitor); + for (std::size_t j = 0u; j < messages.size(); ++j) { + addArrival(messages[j].s_Time, messages[j].s_Person, messages[j].s_Attribute, gatherer, m_ResourceMonitor); } } @@ -697,16 +603,11 @@ void CEventRatePopulationDataGathererTest::testRemovePeople() CPPUNIT_ASSERT_EQUAL(numberPeople, gatherer.numberOverFieldValues()); TStrVec expectedPersonNames; TSizeVec expectedPersonIds; - for (std::size_t i = 0u; i < numberPeople; ++i) - { - if (!std::binary_search(peopleToRemove.begin(), - peopleToRemove.end(), i)) - { + for (std::size_t i = 0u; i < numberPeople; ++i) { + if (!std::binary_search(peopleToRemove.begin(), peopleToRemove.end(), i)) { expectedPersonNames.push_back(gatherer.personName(i)); expectedPersonIds.push_back(i); - } - else - { + } else { LOG_DEBUG("Removing " << gatherer.personName(i)); } } @@ -715,19 +616,14 @@ void CEventRatePopulationDataGathererTest::testRemovePeople() { TSizeUInt64PrVec nonZeroCounts; gatherer.personNonZeroCounts(bucketStart - bucketLength, nonZeroCounts); - for (std::size_t i = 0u; i < nonZeroCounts.size(); ++i) - { - if (!std::binary_search(peopleToRemove.begin(), - peopleToRemove.end(), - nonZeroCounts[i].first)) - { - const std::string &name = gatherer.personName(nonZeroCounts[i].first); + for (std::size_t i = 0u; i < nonZeroCounts.size(); ++i) { + if (!std::binary_search(peopleToRemove.begin(), peopleToRemove.end(), nonZeroCounts[i].first)) { + const std::string& name = gatherer.personName(nonZeroCounts[i].first); expectedNonZeroCounts[name] = static_cast(nonZeroCounts[i].second); } } } - LOG_DEBUG("expectedNonZeroCounts = " - << core::CContainerPrinter::print(expectedNonZeroCounts)); + LOG_DEBUG("expectedNonZeroCounts = " << core::CContainerPrinter::print(expectedNonZeroCounts)); std::string expectedFeatureData; { @@ -735,18 +631,12 @@ void CEventRatePopulationDataGathererTest::testRemovePeople() TStrFeatureDataPrVec expected; TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; gatherer.featureData(bucketStart - bucketLength, bucketLength, featureData); - for (std::size_t i = 0u; i < featureData.size(); ++i) - { - const TSizeSizePrFeatureDataPrVec &data = featureData[i].second; - for (std::size_t j = 0u; j < data.size(); ++j) - { - if (!std::binary_search(peopleToRemove.begin(), - peopleToRemove.end(), - data[j].first.first)) - { - std::string key = model_t::print(featureData[i].first) - + " " + gatherer.personName(data[j].first.first) - + " " + gatherer.attributeName(data[j].first.second); + for (std::size_t i = 0u; i < featureData.size(); ++i) { + const TSizeSizePrFeatureDataPrVec& data = featureData[i].second; + for (std::size_t j = 0u; j < data.size(); ++j) { + if (!std::binary_search(peopleToRemove.begin(), peopleToRemove.end(), data[j].first.first)) { + std::string key = model_t::print(featureData[i].first) + " " + gatherer.personName(data[j].first.first) + " " + + gatherer.attributeName(data[j].first.second); expected.push_back(TStrFeatureDataPr(key, data[j].second)); LOG_DEBUG(" " << key << " = " << data[j].second.s_Count); } @@ -757,10 +647,8 @@ void CEventRatePopulationDataGathererTest::testRemovePeople() gatherer.recyclePeople(peopleToRemove); - CPPUNIT_ASSERT_EQUAL(numberPeople - peopleToRemove.size(), - gatherer.numberActivePeople()); - for (std::size_t i = 0u; i < expectedPersonNames.size(); ++i) - { + CPPUNIT_ASSERT_EQUAL(numberPeople - peopleToRemove.size(), gatherer.numberActivePeople()); + for (std::size_t i = 0u; i < expectedPersonNames.size(); ++i) { std::size_t pid; CPPUNIT_ASSERT(gatherer.personId(expectedPersonNames[i], pid)); CPPUNIT_ASSERT_EQUAL(expectedPersonIds[i], pid); @@ -769,16 +657,13 @@ void CEventRatePopulationDataGathererTest::testRemovePeople() TStrSizeMap actualNonZeroCounts; TSizeUInt64PrVec nonZeroCounts; gatherer.personNonZeroCounts(bucketStart - bucketLength, nonZeroCounts); - for (std::size_t i = 0u; i < nonZeroCounts.size(); ++i) - { - const std::string &name = gatherer.personName(nonZeroCounts[i].first); + for (std::size_t i = 0u; i < nonZeroCounts.size(); ++i) { + const std::string& name = gatherer.personName(nonZeroCounts[i].first); actualNonZeroCounts[name] = static_cast(nonZeroCounts[i].second); } - LOG_DEBUG("actualNonZeroCounts = " - << core::CContainerPrinter::print(actualNonZeroCounts)); + LOG_DEBUG("actualNonZeroCounts = " << core::CContainerPrinter::print(actualNonZeroCounts)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedNonZeroCounts), - core::CContainerPrinter::print(actualNonZeroCounts)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedNonZeroCounts), core::CContainerPrinter::print(actualNonZeroCounts)); std::string actualFeatureData; { @@ -786,14 +671,11 @@ void CEventRatePopulationDataGathererTest::testRemovePeople() TStrFeatureDataPrVec actual; TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; gatherer.featureData(bucketStart - bucketLength, bucketLength, featureData); - for (std::size_t i = 0u; i < featureData.size(); ++i) - { - const TSizeSizePrFeatureDataPrVec &data = featureData[i].second; - for (std::size_t j = 0u; j < data.size(); ++j) - { - std::string key = model_t::print(featureData[i].first) - + " " + gatherer.personName(data[j].first.first) - + " " + gatherer.attributeName(data[j].first.second); + for (std::size_t i = 0u; i < featureData.size(); ++i) { + const TSizeSizePrFeatureDataPrVec& data = featureData[i].second; + for (std::size_t j = 0u; j < data.size(); ++j) { + std::string key = model_t::print(featureData[i].first) + " " + gatherer.personName(data[j].first.first) + " " + + gatherer.attributeName(data[j].first.second); actual.push_back(TStrFeatureDataPr(key, data[j].second)); LOG_DEBUG(" " << key << " = " << data[j].second.s_Count); } @@ -802,11 +684,9 @@ void CEventRatePopulationDataGathererTest::testRemovePeople() } CPPUNIT_ASSERT_EQUAL(expectedFeatureData, actualFeatureData); - } -void CEventRatePopulationDataGathererTest::testRemoveAttributes() -{ +void CEventRatePopulationDataGathererTest::testRemoveAttributes() { LOG_DEBUG("*** CEventRatePopulationDataGathererTest::testRemoveAttributes ***"); const core_t::TTime startTime = 1367280000; @@ -821,7 +701,12 @@ void CEventRatePopulationDataGathererTest::testRemoveAttributes() CDataGatherer gatherer(model_t::E_PopulationEventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, TStrVec(), false, searchKey, @@ -833,13 +718,8 @@ void CEventRatePopulationDataGathererTest::testRemoveAttributes() generateTestMessages(rng, startTime, bucketLength, messages); core_t::TTime bucketStart = startTime; - for (std::size_t i = 0u; i < messages.size(); ++i) - { - addArrival(messages[i].s_Time, - messages[i].s_Person, - messages[i].s_Attribute, - gatherer, - m_ResourceMonitor); + for (std::size_t i = 0u; i < messages.size(); ++i) { + addArrival(messages[i].s_Time, messages[i].s_Person, messages[i].s_Attribute, gatherer, m_ResourceMonitor); } // Remove attributes 1, 2, 3 and 15. @@ -854,16 +734,11 @@ void CEventRatePopulationDataGathererTest::testRemoveAttributes() CPPUNIT_ASSERT_EQUAL(numberAttributes, gatherer.numberByFieldValues()); TStrVec expectedAttributeNames; TSizeVec expectedAttributeIds; - for (std::size_t i = 0u; i < numberAttributes; ++i) - { - if (!std::binary_search(attributesToRemove.begin(), - attributesToRemove.end(), i)) - { + for (std::size_t i = 0u; i < numberAttributes; ++i) { + if (!std::binary_search(attributesToRemove.begin(), attributesToRemove.end(), i)) { expectedAttributeNames.push_back(gatherer.attributeName(i)); expectedAttributeIds.push_back(i); - } - else - { + } else { LOG_DEBUG("Removing " << gatherer.attributeName(i)); } } @@ -874,18 +749,12 @@ void CEventRatePopulationDataGathererTest::testRemoveAttributes() TStrFeatureDataPrVec expected; TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; gatherer.featureData(bucketStart, bucketLength, featureData); - for (std::size_t i = 0u; i < featureData.size(); ++i) - { - const TSizeSizePrFeatureDataPrVec &data = featureData[i].second; - for (std::size_t j = 0u; j < data.size(); ++j) - { - if (!std::binary_search(attributesToRemove.begin(), - attributesToRemove.end(), - data[j].first.second)) - { - std::string key = model_t::print(featureData[i].first) - + " " + gatherer.personName(data[j].first.first) - + " " + gatherer.attributeName(data[j].first.second); + for (std::size_t i = 0u; i < featureData.size(); ++i) { + const TSizeSizePrFeatureDataPrVec& data = featureData[i].second; + for (std::size_t j = 0u; j < data.size(); ++j) { + if (!std::binary_search(attributesToRemove.begin(), attributesToRemove.end(), data[j].first.second)) { + std::string key = model_t::print(featureData[i].first) + " " + gatherer.personName(data[j].first.first) + " " + + gatherer.attributeName(data[j].first.second); expected.push_back(TStrFeatureDataPr(key, data[j].second)); LOG_DEBUG(" " << key << " = " << data[j].second.s_Count); } @@ -896,10 +765,8 @@ void CEventRatePopulationDataGathererTest::testRemoveAttributes() gatherer.recycleAttributes(attributesToRemove); - CPPUNIT_ASSERT_EQUAL(numberAttributes - attributesToRemove.size(), - gatherer.numberActiveAttributes()); - for (std::size_t i = 0u; i < expectedAttributeNames.size(); ++i) - { + CPPUNIT_ASSERT_EQUAL(numberAttributes - attributesToRemove.size(), gatherer.numberActiveAttributes()); + for (std::size_t i = 0u; i < expectedAttributeNames.size(); ++i) { std::size_t cid; CPPUNIT_ASSERT(gatherer.attributeId(expectedAttributeNames[i], cid)); CPPUNIT_ASSERT_EQUAL(expectedAttributeIds[i], cid); @@ -911,14 +778,11 @@ void CEventRatePopulationDataGathererTest::testRemoveAttributes() TStrFeatureDataPrVec actual; TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; gatherer.featureData(bucketStart, bucketLength, featureData); - for (std::size_t i = 0u; i < featureData.size(); ++i) - { - const TSizeSizePrFeatureDataPrVec &data = featureData[i].second; - for (std::size_t j = 0u; j < data.size(); ++j) - { - std::string key = model_t::print(featureData[i].first) - + " " + gatherer.personName(data[j].first.first) - + " " + gatherer.attributeName(data[j].first.second); + for (std::size_t i = 0u; i < featureData.size(); ++i) { + const TSizeSizePrFeatureDataPrVec& data = featureData[i].second; + for (std::size_t j = 0u; j < data.size(); ++j) { + std::string key = model_t::print(featureData[i].first) + " " + gatherer.personName(data[j].first.first) + " " + + gatherer.attributeName(data[j].first.second); actual.push_back(TStrFeatureDataPr(key, data[j].second)); LOG_DEBUG(" " << key << " = " << data[j].second.s_Count); } @@ -929,16 +793,13 @@ void CEventRatePopulationDataGathererTest::testRemoveAttributes() CPPUNIT_ASSERT_EQUAL(expectedFeatureData, actualFeatureData); } -namespace -{ -bool isSpace(const char x) -{ +namespace { +bool isSpace(const char x) { return x == ' ' || x == '\t'; } } -void CEventRatePopulationDataGathererTest::testPersistence() -{ +void CEventRatePopulationDataGathererTest::testPersistence() { LOG_DEBUG("*** CEventRatePopulationDataGathererTest::testPersistence ***"); const core_t::TTime startTime = 1367280000; @@ -954,7 +815,12 @@ void CEventRatePopulationDataGathererTest::testPersistence() CDataGatherer origDataGatherer(model_t::E_PopulationEventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, TStrVec(), false, searchKey, @@ -965,13 +831,8 @@ void CEventRatePopulationDataGathererTest::testPersistence() TMessageVec messages; generateTestMessages(rng, startTime, bucketLength, messages); - for (std::size_t i = 0u; i < messages.size(); ++i) - { - addArrival(messages[i].s_Time, - messages[i].s_Person, - messages[i].s_Attribute, - origDataGatherer, - m_ResourceMonitor); + for (std::size_t i = 0u; i < messages.size(); ++i) { + addArrival(messages[i].s_Time, messages[i].s_Person, messages[i].s_Attribute, origDataGatherer, m_ResourceMonitor); } std::string origXml; @@ -982,8 +843,7 @@ void CEventRatePopulationDataGathererTest::testPersistence() } LOG_DEBUG("origXml = " << origXml); - LOG_DEBUG("length = " << origXml.length() - << ", # tabs " << std::count_if(origXml.begin(), origXml.end(), isSpace)); + LOG_DEBUG("length = " << origXml.length() << ", # tabs " << std::count_if(origXml.begin(), origXml.end(), isSpace)); // Restore the XML into a new data gatherer core::CRapidXmlParser parser; @@ -993,7 +853,12 @@ void CEventRatePopulationDataGathererTest::testPersistence() CDataGatherer restoredDataGatherer(model_t::E_PopulationEventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, TStrVec(), false, searchKey, @@ -1020,7 +885,12 @@ void CEventRatePopulationDataGathererTest::testPersistence() CDataGatherer dataGatherer(model_t::E_PopulationEventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, "value", + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + "value", TStrVec(), false, searchKey, @@ -1029,19 +899,12 @@ void CEventRatePopulationDataGathererTest::testPersistence() 0); core_t::TTime time = startTime; - for (std::size_t i = 0u; i < numberBuckets; ++i, time += bucketLength) - { + for (std::size_t i = 0u; i < numberBuckets; ++i, time += bucketLength) { TMessageVec messages; generateTestMessages(rng, time, bucketLength, messages); - for (std::size_t j = 0u; j < messages.size(); ++j) - { - addArrival(messages[j].s_Time, - messages[j].s_Person, - "attribute", - messages[j].s_Attribute, - dataGatherer, - m_ResourceMonitor); + for (std::size_t j = 0u; j < messages.size(); ++j) { + addArrival(messages[j].s_Time, messages[j].s_Person, "attribute", messages[j].s_Attribute, dataGatherer, m_ResourceMonitor); std::size_t cid; dataGatherer.attributeId(messages[j].s_Attribute, cid); @@ -1064,8 +927,7 @@ void CEventRatePopulationDataGathererTest::testPersistence() } LOG_DEBUG("origXml = " << origXml); - LOG_DEBUG("length = " << origXml.length() - << ", # tabs " << std::count_if(origXml.begin(), origXml.end(), isSpace)); + LOG_DEBUG("length = " << origXml.length() << ", # tabs " << std::count_if(origXml.begin(), origXml.end(), isSpace)); // Restore the XML into a new data gatherer core::CRapidXmlParser parser; @@ -1075,7 +937,12 @@ void CEventRatePopulationDataGathererTest::testPersistence() CDataGatherer restoredDataGatherer(model_t::E_PopulationEventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, TStrVec(), false, searchKey, @@ -1092,34 +959,25 @@ void CEventRatePopulationDataGathererTest::testPersistence() CPPUNIT_ASSERT_EQUAL(origXml, newXml); CPPUNIT_ASSERT_EQUAL(dataGatherer.checksum(), restoredDataGatherer.checksum()); } - } -CppUnit::Test *CEventRatePopulationDataGathererTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CEventRatePopulationDataGathererTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRatePopulationDataGathererTest::testAttributeCounts", - &CEventRatePopulationDataGathererTest::testAttributeCounts) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRatePopulationDataGathererTest::testAttributeIndicator", - &CEventRatePopulationDataGathererTest::testAttributeIndicator) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRatePopulationDataGathererTest::testUniqueValueCounts", - &CEventRatePopulationDataGathererTest::testUniqueValueCounts) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRatePopulationDataGathererTest::testCompressedLength", - &CEventRatePopulationDataGathererTest::testCompressedLength) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRatePopulationDataGathererTest::testRemovePeople", - &CEventRatePopulationDataGathererTest::testRemovePeople) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRatePopulationDataGathererTest::testRemoveAttributes", - &CEventRatePopulationDataGathererTest::testRemoveAttributes) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRatePopulationDataGathererTest::testPersistence", - &CEventRatePopulationDataGathererTest::testPersistence) ); +CppUnit::Test* CEventRatePopulationDataGathererTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CEventRatePopulationDataGathererTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRatePopulationDataGathererTest::testAttributeCounts", &CEventRatePopulationDataGathererTest::testAttributeCounts)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRatePopulationDataGathererTest::testAttributeIndicator", &CEventRatePopulationDataGathererTest::testAttributeIndicator)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRatePopulationDataGathererTest::testUniqueValueCounts", &CEventRatePopulationDataGathererTest::testUniqueValueCounts)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRatePopulationDataGathererTest::testCompressedLength", &CEventRatePopulationDataGathererTest::testCompressedLength)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRatePopulationDataGathererTest::testRemovePeople", &CEventRatePopulationDataGathererTest::testRemovePeople)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRatePopulationDataGathererTest::testRemoveAttributes", &CEventRatePopulationDataGathererTest::testRemoveAttributes)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRatePopulationDataGathererTest::testPersistence", &CEventRatePopulationDataGathererTest::testPersistence)); return suiteOfTests; } diff --git a/lib/model/unittest/CEventRatePopulationDataGathererTest.h b/lib/model/unittest/CEventRatePopulationDataGathererTest.h index 68f8d75e32..d4590908e1 100644 --- a/lib/model/unittest/CEventRatePopulationDataGathererTest.h +++ b/lib/model/unittest/CEventRatePopulationDataGathererTest.h @@ -11,22 +11,20 @@ #include - -class CEventRatePopulationDataGathererTest : public CppUnit::TestFixture -{ - public: - void testAttributeCounts(); - void testAttributeIndicator(); - void testUniqueValueCounts(); - void testCompressedLength(); - void testRemovePeople(); - void testRemoveAttributes(); - void testPersistence(); - - static CppUnit::Test *suite(); - - private: - ml::model::CResourceMonitor m_ResourceMonitor; +class CEventRatePopulationDataGathererTest : public CppUnit::TestFixture { +public: + void testAttributeCounts(); + void testAttributeIndicator(); + void testUniqueValueCounts(); + void testCompressedLength(); + void testRemovePeople(); + void testRemoveAttributes(); + void testPersistence(); + + static CppUnit::Test* suite(); + +private: + ml::model::CResourceMonitor m_ResourceMonitor; }; #endif // INCLUDED_CEventRatePopulationDataGathererTest_h diff --git a/lib/model/unittest/CEventRatePopulationModelTest.cc b/lib/model/unittest/CEventRatePopulationModelTest.cc index dfcafb27f5..844b0f7aad 100644 --- a/lib/model/unittest/CEventRatePopulationModelTest.cc +++ b/lib/model/unittest/CEventRatePopulationModelTest.cc @@ -48,8 +48,7 @@ using namespace ml; using namespace model; -namespace -{ +namespace { using TOptionalDouble = boost::optional; using TSizeSizePr = std::pair; @@ -69,18 +68,12 @@ using TSizeDoublePr1Vec = core::CSmallVector; const std::string EMPTY_STRING; -struct SMessage -{ - SMessage(core_t::TTime time, - const std::string &person, - const std::string &attribute) : - s_Time(time), s_Person(person), s_Attribute(attribute) - {} +struct SMessage { + SMessage(core_t::TTime time, const std::string& person, const std::string& attribute) + : s_Time(time), s_Person(person), s_Attribute(attribute) {} - bool operator<(const SMessage &other) const - { - return maths::COrderings::lexicographical_compare(s_Time, s_Person, s_Attribute, - other.s_Time, other.s_Person, other.s_Attribute); + bool operator<(const SMessage& other) const { + return maths::COrderings::lexicographical_compare(s_Time, s_Person, s_Attribute, other.s_Time, other.s_Person, other.s_Attribute); } core_t::TTime s_Time; @@ -88,32 +81,22 @@ struct SMessage std::string s_Attribute; }; -struct SAnomaly -{ +struct SAnomaly { SAnomaly() : s_Bucket(0u), s_Person(), s_Attributes() {} - SAnomaly(std::size_t bucket, - const std::string &person, - const TDoubleStrPrVec &attributes) : - s_Bucket(bucket), s_Person(person), s_Attributes(attributes) - {} + SAnomaly(std::size_t bucket, const std::string& person, const TDoubleStrPrVec& attributes) + : s_Bucket(bucket), s_Person(person), s_Attributes(attributes) {} std::size_t s_Bucket; std::string s_Person; TDoubleStrPrVec s_Attributes; - bool operator<(const SAnomaly &other) const - { - return s_Bucket < other.s_Bucket; - } + bool operator<(const SAnomaly& other) const { return s_Bucket < other.s_Bucket; } - std::string print() const - { + std::string print() const { std::ostringstream result; result << "[" << s_Bucket << ", " + s_Person << ","; - for (std::size_t i = 0u; i < s_Attributes.size(); ++i) - { - if (s_Attributes[i].first < 0.01) - { + for (std::size_t i = 0u; i < s_Attributes.size(); ++i) { + if (s_Attributes[i].first < 0.01) { result << " " << s_Attributes[i].second; } } @@ -124,10 +107,7 @@ struct SAnomaly using TMessageVec = std::vector; -void generateTestMessages(core_t::TTime startTime, - core_t::TTime bucketLength, - TMessageVec &messages) -{ +void generateTestMessages(core_t::TTime startTime, core_t::TTime bucketLength, TMessageVec& messages) { // The test case is as follows: // // attribute | 0 | 1 | 2 | 3 | 4 @@ -149,22 +129,20 @@ void generateTestMessages(core_t::TTime startTime, const std::size_t numberPeople = 20u; TStrVec attributes; - for (std::size_t i = 0u; i < numberAttributes; ++i) - { + for (std::size_t i = 0u; i < numberAttributes; ++i) { attributes.push_back("c" + boost::lexical_cast(i)); } TStrVec people; - for (std::size_t i = 0u; i < numberPeople; ++i) - { + for (std::size_t i = 0u; i < numberPeople; ++i) { people.push_back("p" + boost::lexical_cast(i)); } - std::size_t c0People[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 }; - std::size_t c1People[] = { 0, 1, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 }; - std::size_t c2People[] = { 0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 }; - std::size_t c3People[] = { 3, 4 }; - std::size_t c4People[] = { 3 }; + std::size_t c0People[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19}; + std::size_t c1People[] = {0, 1, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19}; + std::size_t c2People[] = {0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19}; + std::size_t c3People[] = {3, 4}; + std::size_t c4People[] = {3}; TSizeVecVec attributePeople; attributePeople.push_back(TSizeVec(boost::begin(c0People), boost::end(c0People))); @@ -173,36 +151,27 @@ void generateTestMessages(core_t::TTime startTime, attributePeople.push_back(TSizeVec(boost::begin(c3People), boost::end(c3People))); attributePeople.push_back(TSizeVec(boost::begin(c4People), boost::end(c4People))); - double attributeRates[] = { 10.0, 0.02, 15.0, 2.0, 1.0 }; + double attributeRates[] = {10.0, 0.02, 15.0, 2.0, 1.0}; - TSizeSizeSizeTr anomaliesAttributePerson[] = - { - TSizeSizeSizeTr(10u, 0u, 1u), - TSizeSizeSizeTr(15u, 0u, 11u), - TSizeSizeSizeTr(30u, 2u, 4u), - TSizeSizeSizeTr(35u, 2u, 5u), - TSizeSizeSizeTr(50u, 0u, 11u), - TSizeSizeSizeTr(75u, 2u, 5u) - }; + TSizeSizeSizeTr anomaliesAttributePerson[] = {TSizeSizeSizeTr(10u, 0u, 1u), + TSizeSizeSizeTr(15u, 0u, 11u), + TSizeSizeSizeTr(30u, 2u, 4u), + TSizeSizeSizeTr(35u, 2u, 5u), + TSizeSizeSizeTr(50u, 0u, 11u), + TSizeSizeSizeTr(75u, 2u, 5u)}; test::CRandomNumbers rng; - for (std::size_t i = 0u; - i < numberBuckets; - ++i, startTime += bucketLength) - { - for (std::size_t j = 0u; j < numberAttributes; ++j) - { + for (std::size_t i = 0u; i < numberBuckets; ++i, startTime += bucketLength) { + for (std::size_t j = 0u; j < numberAttributes; ++j) { TUIntVec samples; rng.generatePoissonSamples(attributeRates[j], attributePeople[j].size(), samples); - for (std::size_t k = 0u; k < samples.size(); ++k) - { + for (std::size_t k = 0u; k < samples.size(); ++k) { unsigned int n = samples[k]; if (std::binary_search(boost::begin(anomaliesAttributePerson), boost::end(anomaliesAttributePerson), - TSizeSizeSizeTr(i, j, attributePeople[j][k]))) - { + TSizeSizeSizeTr(i, j, attributePeople[j][k]))) { n += static_cast(2.5 * attributeRates[j]); LOG_DEBUG(i << " " << attributes[j] << " generating anomaly " << n); } @@ -210,12 +179,9 @@ void generateTestMessages(core_t::TTime startTime, TDoubleVec times; rng.generateUniformSamples(0.0, static_cast(bucketLength - 1), n, times); - for (std::size_t l = 0u; l < times.size(); ++l) - { + for (std::size_t l = 0u; l < times.size(); ++l) { core_t::TTime time = startTime + static_cast(times[l]); - messages.push_back(SMessage(time, - people[attributePeople[j][k]], - attributes[j])); + messages.push_back(SMessage(time, people[attributePeople[j][k]], attributes[j])); } } } @@ -224,10 +190,7 @@ void generateTestMessages(core_t::TTime startTime, std::sort(messages.begin(), messages.end()); } -void addArrival(const SMessage &message, - const CModelFactory::TDataGathererPtr &gatherer, - CResourceMonitor &resourceMonitor) -{ +void addArrival(const SMessage& message, const CModelFactory::TDataGathererPtr& gatherer, CResourceMonitor& resourceMonitor) { CDataGatherer::TStrCPtrVec fields; fields.push_back(&message.s_Person); fields.push_back(&message.s_Attribute); @@ -239,8 +202,7 @@ void addArrival(const SMessage &message, const TSizeDoublePr1Vec NO_CORRELATES; } -void CEventRatePopulationModelTest::testBasicAccessors() -{ +void CEventRatePopulationModelTest::testBasicAccessors() { LOG_DEBUG("*** testBasicAccessors ***"); // Check that the correct data is read retrieved by the @@ -267,8 +229,7 @@ void CEventRatePopulationModelTest::testBasicAccessors() features.push_back(model_t::E_PopulationCountByBucketPersonAndAttribute); factory.features(features); CModelFactory::SGathererInitializationData gathererInitData(startTime); - CModelFactory::TDataGathererPtr gatherer(dynamic_cast( - factory.makeDataGatherer(gathererInitData))); + CModelFactory::TDataGathererPtr gatherer(dynamic_cast(factory.makeDataGatherer(gathererInitData))); CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr model(factory.makeModel(modelInitData)); @@ -277,26 +238,21 @@ void CEventRatePopulationModelTest::testBasicAccessors() TStrUInt64Map expectedBucketPersonCounts; TStrStrPrDoubleMap expectedBucketPersonAttributeCounts; - for (std::size_t i = 0u; i < messages.size(); ++i) - { - if (messages[i].s_Time >= startTime + bucketLength) - { + for (std::size_t i = 0u; i < messages.size(); ++i) { + if (messages[i].s_Time >= startTime + bucketLength) { model->sample(startTime, startTime + bucketLength, m_ResourceMonitor); - LOG_DEBUG("Testing bucket = [" << startTime - << "," << startTime + bucketLength << ")"); + LOG_DEBUG("Testing bucket = [" << startTime << "," << startTime + bucketLength << ")"); // Test the person and attribute invariants. - for (std::size_t j = 0u; j < gatherer->numberActivePeople(); ++j) - { - const std::string &name = model->personName(j); + for (std::size_t j = 0u; j < gatherer->numberActivePeople(); ++j) { + const std::string& name = model->personName(j); std::size_t pid; CPPUNIT_ASSERT(gatherer->personId(name, pid)); CPPUNIT_ASSERT_EQUAL(j, pid); } - for (std::size_t j = 0u; j < gatherer->numberActiveAttributes(); ++j) - { - const std::string &name = model->attributeName(j); + for (std::size_t j = 0u; j < gatherer->numberActiveAttributes(); ++j) { + const std::string& name = model->attributeName(j); std::size_t cid; CPPUNIT_ASSERT(gatherer->attributeId(name, cid)); CPPUNIT_ASSERT_EQUAL(j, cid); @@ -305,10 +261,7 @@ void CEventRatePopulationModelTest::testBasicAccessors() TSizeVec expectedCurrentBucketPersonIds; // Test the person counts. - for (TStrUInt64MapCItr j = expectedBucketPersonCounts.begin(); - j != expectedBucketPersonCounts.end(); - ++j) - { + for (TStrUInt64MapCItr j = expectedBucketPersonCounts.begin(); j != expectedBucketPersonCounts.end(); ++j) { std::size_t pid; CPPUNIT_ASSERT(gatherer->personId(j->first, pid)); @@ -320,26 +273,20 @@ void CEventRatePopulationModelTest::testBasicAccessors() } // Test the person attribute counts. - for (TStrStrPrDoubleMapCItr j = expectedBucketPersonAttributeCounts.begin(); - j != expectedBucketPersonAttributeCounts.end(); - ++j) - { + for (TStrStrPrDoubleMapCItr j = expectedBucketPersonAttributeCounts.begin(); j != expectedBucketPersonAttributeCounts.end(); + ++j) { std::size_t pid; CPPUNIT_ASSERT(gatherer->personId(j->first.first, pid)); std::size_t cid; CPPUNIT_ASSERT(gatherer->attributeId(j->first.second, cid)); - TDouble1Vec count = model->currentBucketValue( - model_t::E_PopulationCountByBucketPersonAndAttribute, - pid, cid, - startTime); + TDouble1Vec count = model->currentBucketValue(model_t::E_PopulationCountByBucketPersonAndAttribute, pid, cid, startTime); CPPUNIT_ASSERT(!count.empty()); CPPUNIT_ASSERT_EQUAL(j->second, count[0]); } // Test the current bucket people. - std::sort(expectedCurrentBucketPersonIds.begin(), - expectedCurrentBucketPersonIds.end()); + std::sort(expectedCurrentBucketPersonIds.begin(), expectedCurrentBucketPersonIds.end()); TSizeVec bucketPersonIds; model->currentBucketPersonIds(startTime, bucketPersonIds); CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedCurrentBucketPersonIds), @@ -350,15 +297,14 @@ void CEventRatePopulationModelTest::testBasicAccessors() startTime += bucketLength; } - const SMessage &m = messages[i]; + const SMessage& m = messages[i]; addArrival(m, gatherer, m_ResourceMonitor); ++expectedBucketPersonCounts[m.s_Person]; expectedBucketPersonAttributeCounts[TStrStrPr(m.s_Person, m.s_Attribute)] += 1.0; } } -void CEventRatePopulationModelTest::testFeatures() -{ +void CEventRatePopulationModelTest::testFeatures() { LOG_DEBUG("*** testFeatures ***"); // We check that the correct data is read from the gatherer @@ -381,8 +327,7 @@ void CEventRatePopulationModelTest::testFeatures() using TDouble2VecVecDouble2Vec4VecVecPr = std::pair; using TSizeDouble2VecVecDouble2Vec4VecVecPrMap = std::map; - static const maths_t::TWeightStyleVec WEIGHT_STYLES{maths_t::E_SampleCountWeight, - maths_t::E_SampleWinsorisationWeight}; + static const maths_t::TWeightStyleVec WEIGHT_STYLES{maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight}; core_t::TTime startTime = 1367280000; const core_t::TTime bucketLength = 3600; @@ -403,8 +348,7 @@ void CEventRatePopulationModelTest::testFeatures() CModelFactory::TDataGathererPtr gatherer(dynamic_cast(factory.makeDataGatherer(gathererInitData))); CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr modelHolder(factory.makeModel(modelInitData)); - CEventRatePopulationModel *model = - dynamic_cast(modelHolder.get()); + CEventRatePopulationModel* model = dynamic_cast(modelHolder.get()); model::CModelFactory::TFeatureMathsModelPtrPrVec models{factory.defaultFeatureModels(features, bucketLength, 1.0, false)}; CPPUNIT_ASSERT_EQUAL(std::size_t(1), models.size()); @@ -417,15 +361,12 @@ void CEventRatePopulationModelTest::testFeatures() TSizeMathsModelPtrMap expectedPopulationModels; - for (const auto &message : messages) - { - if (message.s_Time >= startTime + bucketLength) - { + for (const auto& message : messages) { + if (message.s_Time >= startTime + bucketLength) { model->sample(startTime, startTime + bucketLength, m_ResourceMonitor); TSizeSizePrUInt64Map expectedNonZeroCounts; - for (const auto &count : expectedCounts) - { + for (const auto& count : expectedCounts) { std::size_t pid, cid; CPPUNIT_ASSERT(gatherer->personId(count.first.first, pid)); CPPUNIT_ASSERT(gatherer->attributeId(count.first.second, cid)); @@ -437,77 +378,65 @@ void CEventRatePopulationModelTest::testFeatures() } TSizeDouble2VecVecDouble2Vec4VecVecPrMap populationSamples; - for (const auto &count_ : expectedNonZeroCounts) - { + for (const auto& count_ : expectedNonZeroCounts) { std::size_t pid = count_.first.first; std::size_t cid = count_.first.second; core_t::TTime time = startTime + bucketLength / 2; - double count = model_t::offsetCountToZero( - model_t::E_PopulationCountByBucketPersonAndAttribute, - static_cast(count_.second)); - TMathsModelPtr &model_ = expectedPopulationModels[cid]; - if (model_ == 0) - { + double count = model_t::offsetCountToZero(model_t::E_PopulationCountByBucketPersonAndAttribute, + static_cast(count_.second)); + TMathsModelPtr& model_ = expectedPopulationModels[cid]; + if (model_ == 0) { model_.reset(models[0].second->clone(cid)); } TDoubleVec sample(1, count); - TDouble2Vec4Vec weight{{model->sampleRateWeight(pid, cid)}, - model_->winsorisationWeight(1.0, time, sample)}; + TDouble2Vec4Vec weight{{model->sampleRateWeight(pid, cid)}, model_->winsorisationWeight(1.0, time, sample)}; populationSamples[cid].first.push_back({sample[0]}); populationSamples[cid].second.push_back(weight); } - for (auto &samples_ : populationSamples) - { + for (auto& samples_ : populationSamples) { std::size_t cid = samples_.first; - TDouble2Vec4VecVec &weights = samples_.second.second; + TDouble2Vec4VecVec& weights = samples_.second.second; maths::COrderings::simultaneousSort(samples_.second.first, weights); maths::CModel::TTimeDouble2VecSizeTrVec samples; - for (const auto &sample : samples_.second.first) - { + for (const auto& sample : samples_.second.first) { samples.emplace_back(startTime + bucketLength / 2, sample, 0); } maths::CModelAddSamplesParams params_; params_.integer(true) - .nonNegative(true) - .propagationInterval(1.0) - .weightStyles(WEIGHT_STYLES) - .trendWeights(weights) - .priorWeights(weights); + .nonNegative(true) + .propagationInterval(1.0) + .weightStyles(WEIGHT_STYLES) + .trendWeights(weights) + .priorWeights(weights); expectedPopulationModels[cid]->addSamples(params_, samples); } TSizeSizePrFeatureDataPrVec expectedPeoplePerAttribute; expectedPeoplePerAttribute.reserve(numberAttributes); - for (std::size_t j = 0u; j < numberAttributes; ++j) - { - expectedPeoplePerAttribute.emplace_back(std::make_pair(size_t(0), j), - TFeatureData(j)); + for (std::size_t j = 0u; j < numberAttributes; ++j) { + expectedPeoplePerAttribute.emplace_back(std::make_pair(size_t(0), j), TFeatureData(j)); } - for (const auto &attribute : attributePeople) - { + for (const auto& attribute : attributePeople) { expectedPeoplePerAttribute[attribute.first].second = attribute.second.size(); } // Check the number of people per attribute. - const TSizeSizePrFeatureDataPrVec &peoplePerAttribute = - model->featureData(model_t::E_PopulationUniquePersonCountByAttribute, startTime); + const TSizeSizePrFeatureDataPrVec& peoplePerAttribute = + model->featureData(model_t::E_PopulationUniquePersonCountByAttribute, startTime); CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedPeoplePerAttribute), core::CContainerPrinter::print(peoplePerAttribute)); // Check the non-zero (person, attribute) counts. - const TSizeSizePrFeatureDataPrVec &nonZeroCounts = - model->featureData(model_t::E_PopulationCountByBucketPersonAndAttribute, startTime); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedNonZeroCounts), - core::CContainerPrinter::print(nonZeroCounts)); - - for (std::size_t cid = 0u; cid < numberAttributes; ++cid) - { - const maths::CModel *populationModel = - model->details()->model(model_t::E_PopulationCountByBucketPersonAndAttribute, cid); + const TSizeSizePrFeatureDataPrVec& nonZeroCounts = + model->featureData(model_t::E_PopulationCountByBucketPersonAndAttribute, startTime); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedNonZeroCounts), core::CContainerPrinter::print(nonZeroCounts)); + + for (std::size_t cid = 0u; cid < numberAttributes; ++cid) { + const maths::CModel* populationModel = + model->details()->model(model_t::E_PopulationCountByBucketPersonAndAttribute, cid); CPPUNIT_ASSERT(populationModel); - CPPUNIT_ASSERT_EQUAL(expectedPopulationModels[cid]->checksum(), - populationModel->checksum()); + CPPUNIT_ASSERT_EQUAL(expectedPopulationModels[cid]->checksum(), populationModel->checksum()); } startTime += bucketLength; @@ -520,16 +449,14 @@ void CEventRatePopulationModelTest::testFeatures() } } -void CEventRatePopulationModelTest::testComputeProbability() -{ +void CEventRatePopulationModelTest::testComputeProbability() { LOG_DEBUG("*** testComputeProbability ***"); // Check that we get the probabilities we expect. using TAnomalyVec = std::vector; using TDoubleAnomalyPr = std::pair; - using TAnomalyAccumulator = maths::CBasicStatistics::COrderStatisticsHeap< - TDoubleAnomalyPr, maths::COrderings::SFirstLess>; + using TAnomalyAccumulator = maths::CBasicStatistics::COrderStatisticsHeap; core_t::TTime startTime = 1367280000; const core_t::TTime bucketLength = 3600; @@ -548,30 +475,24 @@ void CEventRatePopulationModelTest::testComputeProbability() CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(gathererInitData)); CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr modelHolder(factory.makeModel(modelInitData)); - CEventRatePopulationModel *model = dynamic_cast(modelHolder.get()); + CEventRatePopulationModel* model = dynamic_cast(modelHolder.get()); TAnomalyAccumulator anomalies(6u); - for (std::size_t i = 0u, bucket = 0u; i < messages.size(); ++i) - { - if (messages[i].s_Time >= startTime + bucketLength) - { - LOG_DEBUG("Updating and testing bucket = [" << startTime - << "," << startTime + bucketLength << ")"); + for (std::size_t i = 0u, bucket = 0u; i < messages.size(); ++i) { + if (messages[i].s_Time >= startTime + bucketLength) { + LOG_DEBUG("Updating and testing bucket = [" << startTime << "," << startTime + bucketLength << ")"); model->sample(startTime, startTime + bucketLength, m_ResourceMonitor); SAnnotatedProbability annotatedProbability; - for (std::size_t pid = 0u; pid < gatherer->numberActivePeople(); ++pid) - { + for (std::size_t pid = 0u; pid < gatherer->numberActivePeople(); ++pid) { CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); - model->computeProbability(pid, startTime, startTime + bucketLength, - partitioningFields, 2, annotatedProbability); + model->computeProbability(pid, startTime, startTime + bucketLength, partitioningFields, 2, annotatedProbability); std::string person = model->personName(pid); TDoubleStrPrVec attributes; - for (std::size_t j = 0u; j < annotatedProbability.s_AttributeProbabilities.size(); ++j) - { + for (std::size_t j = 0u; j < annotatedProbability.s_AttributeProbabilities.size(); ++j) { attributes.emplace_back(annotatedProbability.s_AttributeProbabilities[j].s_Probability, *annotatedProbability.s_AttributeProbabilities[j].s_Attribute); } @@ -589,8 +510,7 @@ void CEventRatePopulationModelTest::testComputeProbability() LOG_DEBUG("Anomalies = " << anomalies.print()); TAnomalyVec orderedAnomalies; - for (std::size_t i = 0u; i < anomalies.count(); ++i) - { + for (std::size_t i = 0u; i < anomalies.count(); ++i) { orderedAnomalies.push_back(anomalies[i].second); } @@ -598,25 +518,20 @@ void CEventRatePopulationModelTest::testComputeProbability() LOG_DEBUG("orderedAnomalies = " << core::CContainerPrinter::print(orderedAnomalies)); - std::string expectedAnomalies[] = - { - std::string("[10, p1, c0]"), - std::string("[15, p11, c0]"), - std::string("[30, p4, c2]"), - std::string("[35, p5, c2]"), - std::string("[50, p11, c0]"), - std::string("[75, p5, c2]") - }; + std::string expectedAnomalies[] = {std::string("[10, p1, c0]"), + std::string("[15, p11, c0]"), + std::string("[30, p4, c2]"), + std::string("[35, p5, c2]"), + std::string("[50, p11, c0]"), + std::string("[75, p5, c2]")}; CPPUNIT_ASSERT_EQUAL(boost::size(expectedAnomalies), orderedAnomalies.size()); - for (std::size_t i = 0u; i < orderedAnomalies.size(); ++i) - { + for (std::size_t i = 0u; i < orderedAnomalies.size(); ++i) { CPPUNIT_ASSERT_EQUAL(expectedAnomalies[i], orderedAnomalies[i].print()); } } -void CEventRatePopulationModelTest::testPrune() -{ +void CEventRatePopulationModelTest::testPrune() { LOG_DEBUG("*** testPrune ***"); // This test has four people and five attributes. We expect @@ -630,29 +545,10 @@ void CEventRatePopulationModelTest::testPrune() const core_t::TTime bucketLength = 3600; const std::size_t numberBuckets = 1000u; - std::string people[] = - { - std::string("p1"), - std::string("p2"), - std::string("p3"), - std::string("p4") - }; - std::string attributes[] = - { - std::string("c1"), - std::string("c2"), - std::string("c3"), - std::string("c4"), - std::string("c5") - }; - - TStrSizePrVecVec eventCounts[] = - { - TStrSizePrVecVec(), - TStrSizePrVecVec(), - TStrSizePrVecVec(), - TStrSizePrVecVec() - }; + std::string people[] = {std::string("p1"), std::string("p2"), std::string("p3"), std::string("p4")}; + std::string attributes[] = {std::string("c1"), std::string("c2"), std::string("c3"), std::string("c4"), std::string("c5")}; + + TStrSizePrVecVec eventCounts[] = {TStrSizePrVecVec(), TStrSizePrVecVec(), TStrSizePrVecVec(), TStrSizePrVecVec()}; { TStrSizePrVec attributeCounts; attributeCounts.push_back(TStrSizePr(attributes[0], 0)); @@ -699,8 +595,8 @@ void CEventRatePopulationModelTest::testPrune() eventCounts[3][70][0].second = 4; // p4, bucket 70, c2 } - const std::string expectedPeople[] = { people[0], people[2], people[3] }; - const std::string expectedAttributes[] = { attributes[2], attributes[3] }; + const std::string expectedPeople[] = {people[0], people[2], people[3]}; + const std::string expectedAttributes[] = {attributes[2], attributes[3]}; SModelParams params(bucketLength); params.s_DecayRate = 0.01; @@ -720,26 +616,19 @@ void CEventRatePopulationModelTest::testPrune() CPPUNIT_ASSERT(expectedModel); TMessageVec messages; - for (std::size_t i = 0u; i < boost::size(people); ++i) - { + for (std::size_t i = 0u; i < boost::size(people); ++i) { core_t::TTime bucketStart = startTime; - for (std::size_t j = 0u; j < numberBuckets; ++j, bucketStart += bucketLength) - { - const TStrSizePrVec &attributeEventCounts = eventCounts[i][j]; - for (std::size_t k = 0u; k < attributeEventCounts.size(); ++k) - { - if (attributeEventCounts[k].second == 0) - { + for (std::size_t j = 0u; j < numberBuckets; ++j, bucketStart += bucketLength) { + const TStrSizePrVec& attributeEventCounts = eventCounts[i][j]; + for (std::size_t k = 0u; k < attributeEventCounts.size(); ++k) { + if (attributeEventCounts[k].second == 0) { continue; } std::size_t n = attributeEventCounts[k].second; core_t::TTime time = bucketStart; core_t::TTime dt = bucketLength / static_cast(n); - for (std::size_t l = 0u; l < n; ++l, time += dt) - { - messages.push_back(SMessage(time, - people[i], - attributeEventCounts[k].first)); + for (std::size_t l = 0u; l < n; ++l, time += dt) { + messages.push_back(SMessage(time, people[i], attributeEventCounts[k].first)); } } } @@ -748,24 +637,16 @@ void CEventRatePopulationModelTest::testPrune() TMessageVec expectedMessages; expectedMessages.reserve(messages.size()); - for (std::size_t i = 0u; i < messages.size(); ++i) - { - if ( std::binary_search(boost::begin(expectedPeople), - boost::end(expectedPeople), - messages[i].s_Person) - && std::binary_search(boost::begin(expectedAttributes), - boost::end(expectedAttributes), - messages[i].s_Attribute)) - { + for (std::size_t i = 0u; i < messages.size(); ++i) { + if (std::binary_search(boost::begin(expectedPeople), boost::end(expectedPeople), messages[i].s_Person) && + std::binary_search(boost::begin(expectedAttributes), boost::end(expectedAttributes), messages[i].s_Attribute)) { expectedMessages.push_back(messages[i]); } } core_t::TTime bucketStart = startTime; - for (std::size_t i = 0u; i < messages.size(); ++i) - { - if (messages[i].s_Time >= bucketStart + bucketLength) - { + for (std::size_t i = 0u; i < messages.size(); ++i) { + if (messages[i].s_Time >= bucketStart + bucketLength) { model->sample(bucketStart, bucketStart + bucketLength, m_ResourceMonitor); bucketStart += bucketLength; } @@ -778,10 +659,8 @@ void CEventRatePopulationModelTest::testPrune() CPPUNIT_ASSERT_EQUAL(maxDimensionBeforePrune, maxDimensionAfterPrune); bucketStart = startTime; - for (std::size_t i = 0u; i < expectedMessages.size(); ++i) - { - if (expectedMessages[i].s_Time >= bucketStart + bucketLength) - { + for (std::size_t i = 0u; i < expectedMessages.size(); ++i) { + if (expectedMessages[i].s_Time >= bucketStart + bucketLength) { expectedModel->sample(bucketStart, bucketStart + bucketLength, m_ResourceMonitor); bucketStart += bucketLength; } @@ -797,15 +676,10 @@ void CEventRatePopulationModelTest::testPrune() bucketStart = gatherer->currentBucketStartTime() + bucketLength; - SMessage newMessages[] = - { - SMessage(bucketStart + 10, "p1", "c2"), - SMessage(bucketStart + 200, "p5", "c6"), - SMessage(bucketStart + 2100, "p5", "c6") - }; + SMessage newMessages[] = { + SMessage(bucketStart + 10, "p1", "c2"), SMessage(bucketStart + 200, "p5", "c6"), SMessage(bucketStart + 2100, "p5", "c6")}; - for (std::size_t i = 0u; i < boost::size(newMessages); ++i) - { + for (std::size_t i = 0u; i < boost::size(newMessages); ++i) { addArrival(newMessages[i], gatherer, m_ResourceMonitor); addArrival(newMessages[i], expectedGatherer, m_ResourceMonitor); } @@ -824,45 +698,31 @@ void CEventRatePopulationModelTest::testPrune() CPPUNIT_ASSERT_EQUAL(numberOfPeopleBeforePrune, clonedModelHolder->dataGatherer().numberActivePeople()); } -void CEventRatePopulationModelTest::testKey() -{ +void CEventRatePopulationModelTest::testKey() { LOG_DEBUG("*** testKey ***"); - function_t::EFunction countFunctions[] = - { - function_t::E_PopulationCount, - function_t::E_PopulationDistinctCount, - function_t::E_PopulationRare, - function_t::E_PopulationRareCount, - function_t::E_PopulationFreqRare, - function_t::E_PopulationFreqRareCount, - function_t::E_PopulationLowCounts, - function_t::E_PopulationHighCounts - }; - bool useNull[] = { true, false }; - std::string byField[] = { "", "by" }; - std::string partitionField[] = { "", "partition" }; + function_t::EFunction countFunctions[] = {function_t::E_PopulationCount, + function_t::E_PopulationDistinctCount, + function_t::E_PopulationRare, + function_t::E_PopulationRareCount, + function_t::E_PopulationFreqRare, + function_t::E_PopulationFreqRareCount, + function_t::E_PopulationLowCounts, + function_t::E_PopulationHighCounts}; + bool useNull[] = {true, false}; + std::string byField[] = {"", "by"}; + std::string partitionField[] = {"", "partition"}; { CAnomalyDetectorModelConfig config = CAnomalyDetectorModelConfig::defaultConfig(); int identifier = 0; - for (std::size_t i = 0u; i < boost::size(countFunctions); ++i) - { - for (std::size_t j = 0u; j < boost::size(useNull); ++j) - { - for (std::size_t k = 0u; k < boost::size(byField); ++k) - { - for (std::size_t l = 0u; l < boost::size(partitionField); ++l) - { - CSearchKey key(++identifier, - countFunctions[i], - useNull[j], - model_t::E_XF_None, - "", - byField[k], - "over", - partitionField[l]); + for (std::size_t i = 0u; i < boost::size(countFunctions); ++i) { + for (std::size_t j = 0u; j < boost::size(useNull); ++j) { + for (std::size_t k = 0u; k < boost::size(byField); ++k) { + for (std::size_t l = 0u; l < boost::size(partitionField); ++l) { + CSearchKey key( + ++identifier, countFunctions[i], useNull[j], model_t::E_XF_None, "", byField[k], "over", partitionField[l]); CAnomalyDetectorModelConfig::TModelFactoryCPtr factory = config.factory(key); @@ -876,8 +736,7 @@ void CEventRatePopulationModelTest::testKey() } } -void CEventRatePopulationModelTest::testFrequency() -{ +void CEventRatePopulationModelTest::testFrequency() { LOG_DEBUG("*** CEventRatePopulationModelTest::testFrequency ***"); using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; @@ -885,27 +744,19 @@ void CEventRatePopulationModelTest::testFrequency() // Test we correctly compute frequencies for people and attributes. const core_t::TTime bucketLength = 600; - const std::string attributes[] = { "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", "a9", "a10" }; - const std::string people[] = { "p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9", "p10" }; - std::size_t period[] = { 1u, 1u, 10u, 3u, 4u, 5u, 2u, 1u, 3u, 7u }; + const std::string attributes[] = {"a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", "a9", "a10"}; + const std::string people[] = {"p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9", "p10"}; + std::size_t period[] = {1u, 1u, 10u, 3u, 4u, 5u, 2u, 1u, 3u, 7u}; core_t::TTime startTime = 0; TMessageVec messages; std::size_t bucket = 0u; - for (core_t::TTime bucketStart = startTime; - bucketStart < 100 * bucketLength; - bucketStart += bucketLength, ++bucket) - { - for (std::size_t i = 0u; i < boost::size(people); ++i) - { - if (bucket % period[i] == 0) - { - for (std::size_t j = 0u; j < i+1; ++j) - { - messages.push_back(SMessage(bucketStart + bucketLength / 2, - people[i], - attributes[j])); + for (core_t::TTime bucketStart = startTime; bucketStart < 100 * bucketLength; bucketStart += bucketLength, ++bucket) { + for (std::size_t i = 0u; i < boost::size(people); ++i) { + if (bucket % period[i] == 0) { + for (std::size_t j = 0u; j < i + 1; ++j) { + messages.push_back(SMessage(bucketStart + bucketLength / 2, people[i], attributes[j])); } } } @@ -923,20 +774,17 @@ void CEventRatePopulationModelTest::testFrequency() factory.features(features); CModelFactory::SGathererInitializationData gathererInitData(startTime); CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(gathererInitData)); - const model::CDataGatherer &populationGatherer(dynamic_cast(*gatherer)); + const model::CDataGatherer& populationGatherer(dynamic_cast(*gatherer)); CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr model(factory.makeModel(modelInitData)); - CEventRatePopulationModel *populationModel = - dynamic_cast(model.get()); + CEventRatePopulationModel* populationModel = dynamic_cast(model.get()); CPPUNIT_ASSERT(populationModel != 0); core_t::TTime time = startTime; - for (const auto &message : messages) - { - if (message.s_Time >= time + bucketLength) - { + for (const auto& message : messages) { + if (message.s_Time >= time + bucketLength) { populationModel->sample(time, time + bucketLength, m_ResourceMonitor); time += bucketLength; } @@ -945,38 +793,32 @@ void CEventRatePopulationModelTest::testFrequency() { TMeanAccumulator meanError; - for (std::size_t i = 0u; i < boost::size(people); ++i) - { + for (std::size_t i = 0u; i < boost::size(people); ++i) { LOG_DEBUG("*** person = " << people[i] << " ***"); std::size_t pid; CPPUNIT_ASSERT(gatherer->personId(people[i], pid)); LOG_DEBUG("frequency = " << populationModel->personFrequency(pid)); LOG_DEBUG("expected frequency = " << 1.0 / static_cast(period[i])); - CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0 / static_cast(period[i]), - populationModel->personFrequency(pid), - 0.1 / static_cast(period[i])); - meanError.add(std::fabs( populationModel->personFrequency(pid) - - 1.0 / static_cast(period[i]))); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + 1.0 / static_cast(period[i]), populationModel->personFrequency(pid), 0.1 / static_cast(period[i])); + meanError.add(std::fabs(populationModel->personFrequency(pid) - 1.0 / static_cast(period[i]))); } LOG_DEBUG("error = " << maths::CBasicStatistics::mean(meanError)); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError) < 0.002); } { - for (std::size_t i = 0u; i < boost::size(attributes); ++i) - { + for (std::size_t i = 0u; i < boost::size(attributes); ++i) { LOG_DEBUG("*** attribute = " << attributes[i] << " ***"); std::size_t cid; CPPUNIT_ASSERT(populationGatherer.attributeId(attributes[i], cid)); LOG_DEBUG("frequency = " << populationModel->attributeFrequency(cid)); LOG_DEBUG("expected frequency = " << (10.0 - static_cast(i)) / 10.0); - CPPUNIT_ASSERT_EQUAL((10.0 - static_cast(i)) / 10.0, - populationModel->attributeFrequency(cid)); + CPPUNIT_ASSERT_EQUAL((10.0 - static_cast(i)) / 10.0, populationModel->attributeFrequency(cid)); } } } -void CEventRatePopulationModelTest::testSampleRateWeight() -{ +void CEventRatePopulationModelTest::testSampleRateWeight() { LOG_DEBUG("*** CEventRatePopulationModelTest::testSampleRateWeight ***"); // Test that we correctly compensate for heavy hitters. @@ -988,11 +830,11 @@ void CEventRatePopulationModelTest::testSampleRateWeight() // one message per attribute per 10 buckets. const core_t::TTime bucketLength = 600; - const std::string attributes[] = { "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", "a9", "a10" }; - const std::string people[] = { "p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9", "p10", - "p11", "p12", "p13", "p14", "p15", "p16", "p17", "p18", "p19", "p20" }; - std::size_t heavyHitters[] = { 0u, 4u }; - std::size_t normal[] = { 1u, 2u, 3u, 5u, 6u, 7u, 8u, 9u, 10u, 11u, 12u, 13u, 14u, 15u, 16u, 17u, 18u, 19u }; + const std::string attributes[] = {"a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", "a9", "a10"}; + const std::string people[] = {"p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9", "p10", + "p11", "p12", "p13", "p14", "p15", "p16", "p17", "p18", "p19", "p20"}; + std::size_t heavyHitters[] = {0u, 4u}; + std::size_t normal[] = {1u, 2u, 3u, 5u, 6u, 7u, 8u, 9u, 10u, 11u, 12u, 13u, 14u, 15u, 16u, 17u, 18u, 19u}; std::size_t messagesPerBucket = boost::size(heavyHitters) * boost::size(attributes) + boost::size(normal); @@ -1001,36 +843,22 @@ void CEventRatePopulationModelTest::testSampleRateWeight() core_t::TTime startTime = 0; TMessageVec messages; - for (core_t::TTime bucketStart = startTime; - bucketStart < 100 * bucketLength; - bucketStart += bucketLength) - { + for (core_t::TTime bucketStart = startTime; bucketStart < 100 * bucketLength; bucketStart += bucketLength) { TSizeVec times; - rng.generateUniformSamples(static_cast(bucketStart), - static_cast(bucketStart + bucketLength), - messagesPerBucket, - times); + rng.generateUniformSamples( + static_cast(bucketStart), static_cast(bucketStart + bucketLength), messagesPerBucket, times); std::size_t m = 0u; - for (std::size_t i = 0u; i < boost::size(attributes); ++i) - { - for (std::size_t j = 0u; j < boost::size(heavyHitters); ++j) - { - messages.push_back(SMessage(static_cast(times[m++]), - people[heavyHitters[j]], - attributes[i])); + for (std::size_t i = 0u; i < boost::size(attributes); ++i) { + for (std::size_t j = 0u; j < boost::size(heavyHitters); ++j) { + messages.push_back(SMessage(static_cast(times[m++]), people[heavyHitters[j]], attributes[i])); } } TSizeVec attributeIndexes; - rng.generateUniformSamples(0, boost::size(attributes), - boost::size(normal), - attributeIndexes); - for (std::size_t i = 0u; i < boost::size(normal); ++i) - { - messages.push_back(SMessage(static_cast(times[m++]), - people[normal[i]], - attributes[attributeIndexes[i]])); + rng.generateUniformSamples(0, boost::size(attributes), boost::size(normal), attributeIndexes); + for (std::size_t i = 0u; i < boost::size(normal); ++i) { + messages.push_back(SMessage(static_cast(times[m++]), people[normal[i]], attributes[attributeIndexes[i]])); } } @@ -1049,15 +877,12 @@ void CEventRatePopulationModelTest::testSampleRateWeight() CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr model(factory.makeModel(modelInitData)); - CEventRatePopulationModel *populationModel = - dynamic_cast(model.get()); + CEventRatePopulationModel* populationModel = dynamic_cast(model.get()); CPPUNIT_ASSERT(populationModel != 0); core_t::TTime time = startTime; - for (const auto &message : messages) - { - if (message.s_Time >= time + bucketLength) - { + for (const auto& message : messages) { + if (message.s_Time >= time + bucketLength) { populationModel->sample(time, time + bucketLength, m_ResourceMonitor); time += bucketLength; } @@ -1071,45 +896,35 @@ void CEventRatePopulationModelTest::testSampleRateWeight() // + ("# heavy hitters")) // / "# people" - double expectedRateWeight = ( static_cast(boost::size(normal)) - / static_cast(boost::size(attributes)) - + static_cast(boost::size(heavyHitters))) - / static_cast(boost::size(people)); + double expectedRateWeight = (static_cast(boost::size(normal)) / static_cast(boost::size(attributes)) + + static_cast(boost::size(heavyHitters))) / + static_cast(boost::size(people)); LOG_DEBUG("expectedRateWeight = " << expectedRateWeight); - for (std::size_t i = 0u; i < boost::size(heavyHitters); ++i) - { + for (std::size_t i = 0u; i < boost::size(heavyHitters); ++i) { LOG_DEBUG("*** person = " << people[heavyHitters[i]] << " ***"); std::size_t pid; CPPUNIT_ASSERT(gatherer->personId(people[heavyHitters[i]], pid)); - for (std::size_t cid = 0u; cid < boost::size(attributes); ++cid) - { + for (std::size_t cid = 0u; cid < boost::size(attributes); ++cid) { double sampleRateWeight = populationModel->sampleRateWeight(pid, cid); - LOG_DEBUG("attribute = " << populationModel->attributeName(cid) - << ", sampleRateWeight = " << sampleRateWeight); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedRateWeight, - sampleRateWeight, - 0.15 * expectedRateWeight); + LOG_DEBUG("attribute = " << populationModel->attributeName(cid) << ", sampleRateWeight = " << sampleRateWeight); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedRateWeight, sampleRateWeight, 0.15 * expectedRateWeight); } } - for (std::size_t i = 0u; i < boost::size(normal); ++i) - { + for (std::size_t i = 0u; i < boost::size(normal); ++i) { LOG_DEBUG("*** person = " << people[normal[i]] << " ***"); std::size_t pid; CPPUNIT_ASSERT(gatherer->personId(people[normal[i]], pid)); - for (std::size_t cid = 0u; cid < boost::size(attributes); ++cid) - { + for (std::size_t cid = 0u; cid < boost::size(attributes); ++cid) { double sampleRateWeight = populationModel->sampleRateWeight(pid, cid); - LOG_DEBUG("attribute = " << populationModel->attributeName(cid) - << ", sampleRateWeight = " << sampleRateWeight); + LOG_DEBUG("attribute = " << populationModel->attributeName(cid) << ", sampleRateWeight = " << sampleRateWeight); CPPUNIT_ASSERT_EQUAL(1.0, sampleRateWeight); } } } -void CEventRatePopulationModelTest::testPeriodicity() -{ +void CEventRatePopulationModelTest::testPeriodicity() { LOG_DEBUG("*** testPeriodicity ***"); // Create a daily periodic population and check that the @@ -1122,15 +937,10 @@ void CEventRatePopulationModelTest::testPeriodicity() static const core_t::TTime DAY = 86400; const core_t::TTime bucketLength = 3600; - double rate[] = - { - 1, 1, 2, 2, 3, 5, 6, 6, - 20, 21, 4, 3, 4, 4, 8, 25, - 7, 6, 5, 1, 1, 4, 1, 1 - }; - const std::string attributes[] = { "a1", "a2" }; - double scales[] = { 1.0, 1.5 }; - const std::string people[] = { "p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9", "p10" }; + double rate[] = {1, 1, 2, 2, 3, 5, 6, 6, 20, 21, 4, 3, 4, 4, 8, 25, 7, 6, 5, 1, 1, 4, 1, 1}; + const std::string attributes[] = {"a1", "a2"}; + double scales[] = {1.0, 1.5}; + const std::string people[] = {"p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9", "p10"}; test::CRandomNumbers rng; @@ -1138,24 +948,14 @@ void CEventRatePopulationModelTest::testPeriodicity() core_t::TTime endTime = 604800; TMessageVec messages; - for (core_t::TTime time = startTime; - time < endTime; - time += bucketLength) - { - for (std::size_t i = 0u; i < boost::size(attributes); ++i) - { + for (core_t::TTime time = startTime; time < endTime; time += bucketLength) { + for (std::size_t i = 0u; i < boost::size(attributes); ++i) { TUIntVec rates; - rng.generatePoissonSamples(scales[i] * rate[(time % DAY) / HOUR], - boost::size(people), - rates); - - for (std::size_t j = 0u; j < rates.size(); ++j) - { - for (unsigned int t = 0; t < rates[j]; ++t) - { - messages.push_back(SMessage(time + (t * bucketLength) / (rates[j] + 1), - people[j], - attributes[i])); + rng.generatePoissonSamples(scales[i] * rate[(time % DAY) / HOUR], boost::size(people), rates); + + for (std::size_t j = 0u; j < rates.size(); ++j) { + for (unsigned int t = 0; t < rates[j]; ++t) { + messages.push_back(SMessage(time + (t * bucketLength) / (rates[j] + 1), people[j], attributes[i])); } } } @@ -1176,48 +976,37 @@ void CEventRatePopulationModelTest::testPeriodicity() CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr model(factory.makeModel(modelInitData)); - CEventRatePopulationModel *populationModel = - dynamic_cast(model.get()); + CEventRatePopulationModel* populationModel = dynamic_cast(model.get()); CPPUNIT_ASSERT(populationModel != 0); TStrDoubleMap personProbabilitiesWithoutPeriodicity; TStrDoubleMap personProbabilitiesWithPeriodicity; core_t::TTime time = startTime; - for (const auto &message : messages) - { - if (message.s_Time >= time + bucketLength) - { + for (const auto& message : messages) { + if (message.s_Time >= time + bucketLength) { populationModel->sample(time, time + bucketLength, m_ResourceMonitor); - for (std::size_t j = 0u; j < boost::size(people); ++j) - { + for (std::size_t j = 0u; j < boost::size(people); ++j) { std::size_t pid; - if (!gatherer->personId(people[j], pid)) - { + if (!gatherer->personId(people[j], pid)) { continue; } CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); SAnnotatedProbability annotatedProbability; - if (populationModel->computeProbability(pid, time, time + bucketLength, - partitioningFields, 1, annotatedProbability) == false) - { + if (populationModel->computeProbability(pid, time, time + bucketLength, partitioningFields, 1, annotatedProbability) == + false) { continue; } - if (time < startTime + 3 * DAY) - { - double &minimumProbability = - personProbabilitiesWithoutPeriodicity.insert( - TStrDoubleMap::value_type(people[j], 1.0)).first->second; + if (time < startTime + 3 * DAY) { + double& minimumProbability = + personProbabilitiesWithoutPeriodicity.insert(TStrDoubleMap::value_type(people[j], 1.0)).first->second; minimumProbability = std::min(minimumProbability, annotatedProbability.s_Probability); - } - else if (time > startTime + 5 * DAY) - { - double &minimumProbability = - personProbabilitiesWithPeriodicity.insert( - TStrDoubleMap::value_type(people[j], 1.0)).first->second; + } else if (time > startTime + 5 * DAY) { + double& minimumProbability = + personProbabilitiesWithPeriodicity.insert(TStrDoubleMap::value_type(people[j], 1.0)).first->second; minimumProbability = std::min(minimumProbability, annotatedProbability.s_Probability); } } @@ -1227,18 +1016,17 @@ void CEventRatePopulationModelTest::testPeriodicity() addArrival(message, gatherer, m_ResourceMonitor); } - double totalw = 0.0; + double totalw = 0.0; double totalwo = 0.0; - for (std::size_t i = 0u; i < boost::size(people); ++i) - { + for (std::size_t i = 0u; i < boost::size(people); ++i) { TStrDoubleMapCItr wo = personProbabilitiesWithoutPeriodicity.find(people[i]); - TStrDoubleMapCItr w = personProbabilitiesWithPeriodicity.find(people[i]); + TStrDoubleMapCItr w = personProbabilitiesWithPeriodicity.find(people[i]); LOG_DEBUG("person = " << people[i]); LOG_DEBUG("minimum probability with periodicity = " << w->second); LOG_DEBUG("minimum probability without periodicity = " << wo->second); totalwo += wo->second; - totalw += w->second; + totalw += w->second; } LOG_DEBUG("total minimum probability with periodicity = " << totalw); @@ -1246,8 +1034,7 @@ void CEventRatePopulationModelTest::testPeriodicity() CPPUNIT_ASSERT(totalw > 3.0 * totalwo); } -void CEventRatePopulationModelTest::testSkipSampling() -{ +void CEventRatePopulationModelTest::testSkipSampling() { LOG_DEBUG("*** testSkipSampling ***"); core_t::TTime startTime(100); @@ -1263,7 +1050,7 @@ void CEventRatePopulationModelTest::testSkipSampling() CModelFactory::TDataGathererPtr gathererNoGap(factory.makeDataGatherer(gathererNoGapInitData)); CModelFactory::SModelInitializationData modelNoGapInitData(gathererNoGap); CAnomalyDetectorModel::TModelPtr modelNoGapHolder(factory.makeModel(modelNoGapInitData)); - CEventRatePopulationModel *modelNoGap = dynamic_cast(modelNoGapHolder.get()); + CEventRatePopulationModel* modelNoGap = dynamic_cast(modelNoGapHolder.get()); addArrival(SMessage(100, "p1", "a1"), gathererNoGap, m_ResourceMonitor); addArrival(SMessage(100, "p1", "a2"), gathererNoGap, m_ResourceMonitor); @@ -1278,7 +1065,7 @@ void CEventRatePopulationModelTest::testSkipSampling() CModelFactory::TDataGathererPtr gathererWithGap(factory.makeDataGatherer(gathererWithGapInitData)); CModelFactory::SModelInitializationData modelWithGapInitData(gathererWithGap); CAnomalyDetectorModel::TModelPtr modelWithGapHolder(factory.makeModel(modelWithGapInitData)); - CEventRatePopulationModel *modelWithGap = dynamic_cast(modelWithGapHolder.get()); + CEventRatePopulationModel* modelWithGap = dynamic_cast(modelWithGapHolder.get()); addArrival(SMessage(100, "p1", "a1"), gathererWithGap, m_ResourceMonitor); addArrival(SMessage(100, "p1", "a2"), gathererWithGap, m_ResourceMonitor); @@ -1300,16 +1087,22 @@ void CEventRatePopulationModelTest::testSkipSampling() modelWithGap->sample(1100, 1200, m_ResourceMonitor); // Check priors are the same - CPPUNIT_ASSERT_EQUAL( - static_cast( - modelWithGap->details()->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 0))->residualModel().checksum(), - static_cast( - modelNoGap->details()->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 0))->residualModel().checksum()); - CPPUNIT_ASSERT_EQUAL( - static_cast( - modelWithGap->details()->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 1))->residualModel().checksum(), - static_cast( - modelNoGap->details()->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 1))->residualModel().checksum()); + CPPUNIT_ASSERT_EQUAL(static_cast( + modelWithGap->details()->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 0)) + ->residualModel() + .checksum(), + static_cast( + modelNoGap->details()->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 0)) + ->residualModel() + .checksum()); + CPPUNIT_ASSERT_EQUAL(static_cast( + modelWithGap->details()->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 1)) + ->residualModel() + .checksum(), + static_cast( + modelNoGap->details()->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 1)) + ->residualModel() + .checksum()); // Confirm last seen times are only updated by gap duration by forcing p2 and a2 to be pruned modelWithGap->sample(1200, 1500, m_ResourceMonitor); @@ -1323,8 +1116,7 @@ void CEventRatePopulationModelTest::testSkipSampling() CPPUNIT_ASSERT_EQUAL(std::size_t(1), gathererWithGap->numberActiveAttributes()); } -void CEventRatePopulationModelTest::testInterimCorrections() -{ +void CEventRatePopulationModelTest::testInterimCorrections() { LOG_DEBUG("*** testInterimCorrections ***"); core_t::TTime startTime(3600); @@ -1338,40 +1130,33 @@ void CEventRatePopulationModelTest::testInterimCorrections() CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(gathererInitData)); CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr modelHolder(factory.makeModel(modelInitData)); - CEventRatePopulationModel *model = dynamic_cast(modelHolder.get()); + CEventRatePopulationModel* model = dynamic_cast(modelHolder.get()); test::CRandomNumbers rng; core_t::TTime now = startTime; core_t::TTime endTime = now + 2 * 24 * bucketLength; TDoubleVec samples(3, 0.0); - while (now < endTime) - { + while (now < endTime) { rng.generateUniformSamples(50.0, 70.0, std::size_t(3), samples); - for (std::size_t i = 0; i < static_cast(samples[0] + 0.5); ++i) - { + for (std::size_t i = 0; i < static_cast(samples[0] + 0.5); ++i) { addArrival(SMessage(now, "p1", "a1"), gatherer, m_ResourceMonitor); } - for (std::size_t i = 0; i < static_cast(samples[1] + 0.5); ++i) - { + for (std::size_t i = 0; i < static_cast(samples[1] + 0.5); ++i) { addArrival(SMessage(now, "p2", "a1"), gatherer, m_ResourceMonitor); } - for (std::size_t i = 0; i < static_cast(samples[2] + 0.5); ++i) - { + for (std::size_t i = 0; i < static_cast(samples[2] + 0.5); ++i) { addArrival(SMessage(now, "p3", "a2"), gatherer, m_ResourceMonitor); } model->sample(now, now + bucketLength, m_ResourceMonitor); now += bucketLength; } - for (std::size_t i = 0; i < 35; ++i) - { + for (std::size_t i = 0; i < 35; ++i) { addArrival(SMessage(now, "p1", "a1"), gatherer, m_ResourceMonitor); } - for (std::size_t i = 0; i < 1; ++i) - { + for (std::size_t i = 0; i < 1; ++i) { addArrival(SMessage(now, "p2", "a1"), gatherer, m_ResourceMonitor); } - for (std::size_t i = 0; i < 100; ++i) - { + for (std::size_t i = 0; i < 100; ++i) { addArrival(SMessage(now, "p3", "a2"), gatherer, m_ResourceMonitor); } model->sampleBucketStatistics(now, now + bucketLength, m_ResourceMonitor); @@ -1379,24 +1164,21 @@ void CEventRatePopulationModelTest::testInterimCorrections() CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); SAnnotatedProbability annotatedProbability1; annotatedProbability1.s_ResultType.set(model_t::CResultType::E_Interim); - CPPUNIT_ASSERT(model->computeProbability(0/*pid*/, now, now + bucketLength, - partitioningFields, 1, annotatedProbability1)); + CPPUNIT_ASSERT(model->computeProbability(0 /*pid*/, now, now + bucketLength, partitioningFields, 1, annotatedProbability1)); SAnnotatedProbability annotatedProbability2; annotatedProbability2.s_ResultType.set(model_t::CResultType::E_Interim); - CPPUNIT_ASSERT(model->computeProbability(1/*pid*/, now, now + bucketLength, - partitioningFields, 1, annotatedProbability2)); + CPPUNIT_ASSERT(model->computeProbability(1 /*pid*/, now, now + bucketLength, partitioningFields, 1, annotatedProbability2)); SAnnotatedProbability annotatedProbability3; annotatedProbability3.s_ResultType.set(model_t::CResultType::E_Interim); - CPPUNIT_ASSERT(model->computeProbability(2/*pid*/, now, now + bucketLength, - partitioningFields, 1, annotatedProbability3)); + CPPUNIT_ASSERT(model->computeProbability(2 /*pid*/, now, now + bucketLength, partitioningFields, 1, annotatedProbability3)); model_t::CResultType type(model_t::CResultType::E_Unconditional | model_t::CResultType::E_Interim); - TDouble1Vec p1a1Baseline = model->baselineBucketMean(model_t::E_PopulationCountByBucketPersonAndAttribute, - 0, 0, type, NO_CORRELATES, now); - TDouble1Vec p2a1Baseline = model->baselineBucketMean(model_t::E_PopulationCountByBucketPersonAndAttribute, - 0, 0, type, NO_CORRELATES, now); - TDouble1Vec p3a2Baseline = model->baselineBucketMean(model_t::E_PopulationCountByBucketPersonAndAttribute, - 2, 1, type, NO_CORRELATES, now); + TDouble1Vec p1a1Baseline = + model->baselineBucketMean(model_t::E_PopulationCountByBucketPersonAndAttribute, 0, 0, type, NO_CORRELATES, now); + TDouble1Vec p2a1Baseline = + model->baselineBucketMean(model_t::E_PopulationCountByBucketPersonAndAttribute, 0, 0, type, NO_CORRELATES, now); + TDouble1Vec p3a2Baseline = + model->baselineBucketMean(model_t::E_PopulationCountByBucketPersonAndAttribute, 2, 1, type, NO_CORRELATES, now); LOG_DEBUG("p1 probability = " << annotatedProbability1.s_Probability); LOG_DEBUG("p2 probability = " << annotatedProbability2.s_Probability); @@ -1413,8 +1195,7 @@ void CEventRatePopulationModelTest::testInterimCorrections() CPPUNIT_ASSERT(p3a2Baseline[0] > 59.0 && p3a2Baseline[0] < 61.0); } -void CEventRatePopulationModelTest::testPersistence() -{ +void CEventRatePopulationModelTest::testPersistence() { LOG_DEBUG("*** testPersistence ***"); core_t::TTime startTime = 1367280000; @@ -1436,14 +1217,11 @@ void CEventRatePopulationModelTest::testPersistence() CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr origModel(factory.makeModel(modelInitData)); - CEventRatePopulationModel *populationModel = - dynamic_cast(origModel.get()); + CEventRatePopulationModel* populationModel = dynamic_cast(origModel.get()); CPPUNIT_ASSERT(populationModel != 0); - for (const auto &message : messages) - { - if (message.s_Time >= startTime + bucketLength) - { + for (const auto& message : messages) { + if (message.s_Time >= startTime + bucketLength) { origModel->sample(startTime, startTime + bucketLength, m_ResourceMonitor); startTime += bucketLength; } @@ -1485,8 +1263,7 @@ void CEventRatePopulationModelTest::testPersistence() CPPUNIT_ASSERT_EQUAL(origXml, newXml); } -void CEventRatePopulationModelTest::testIgnoreSamplingGivenDetectionRules() -{ +void CEventRatePopulationModelTest::testIgnoreSamplingGivenDetectionRules() { LOG_DEBUG("*** testIgnoreSamplingGivenDetectionRules ***"); // Create 2 models, one of which has a skip sampling rule. @@ -1498,7 +1275,6 @@ void CEventRatePopulationModelTest::testIgnoreSamplingGivenDetectionRules() core_t::TTime startTime(100); std::size_t bucketLength(100); - // Create a categorical rule to filter out attribute a3 std::string filterJson("[\"a3\"]"); core::CPatternSet valueFilter; @@ -1544,7 +1320,6 @@ void CEventRatePopulationModelTest::testIgnoreSamplingGivenDetectionRules() CPPUNIT_ASSERT_EQUAL(modelWithSkip->checksum(), modelNoSkip->checksum()); - addArrival(SMessage(200, "p1", "a1"), gathererNoSkip, m_ResourceMonitor); addArrival(SMessage(200, "p1", "a1"), gathererWithSkip, m_ResourceMonitor); addArrival(SMessage(200, "p1", "a2"), gathererNoSkip, m_ResourceMonitor); @@ -1562,7 +1337,6 @@ void CEventRatePopulationModelTest::testIgnoreSamplingGivenDetectionRules() addArrival(SMessage(200, "p2", "a4"), gathererNoSkip, m_ResourceMonitor); addArrival(SMessage(200, "p2", "a4"), gathererWithSkip, m_ResourceMonitor); - modelNoSkip->sample(200, 300, m_ResourceMonitor); modelWithSkip->sample(200, 300, m_ResourceMonitor); @@ -1587,71 +1361,64 @@ void CEventRatePopulationModelTest::testIgnoreSamplingGivenDetectionRules() noSkipChecksum = modelNoSkipView->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 2)->checksum(); CPPUNIT_ASSERT_EQUAL(withSkipChecksum, noSkipChecksum); - // Check the last value times of all the underlying models are the same - const maths::CUnivariateTimeSeriesModel *timeSeriesModel = - dynamic_cast(modelNoSkipView->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 0)); + const maths::CUnivariateTimeSeriesModel* timeSeriesModel = dynamic_cast( + modelNoSkipView->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 0)); CPPUNIT_ASSERT(timeSeriesModel != 0); core_t::TTime time = timeSeriesModel->trendModel().lastValueTime(); CPPUNIT_ASSERT_EQUAL(model_t::sampleTime(model_t::E_PopulationCountByBucketPersonAndAttribute, 200, bucketLength), time); // The last times of the underlying time series models should all be the same - timeSeriesModel = dynamic_cast(modelNoSkipView->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 1)); + timeSeriesModel = dynamic_cast( + modelNoSkipView->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 1)); CPPUNIT_ASSERT_EQUAL(time, timeSeriesModel->trendModel().lastValueTime()); - timeSeriesModel = dynamic_cast(modelNoSkipView->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 2)); + timeSeriesModel = dynamic_cast( + modelNoSkipView->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 2)); CPPUNIT_ASSERT_EQUAL(time, timeSeriesModel->trendModel().lastValueTime()); - timeSeriesModel = dynamic_cast(modelWithSkipView->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 0)); + timeSeriesModel = dynamic_cast( + modelWithSkipView->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 0)); CPPUNIT_ASSERT_EQUAL(time, timeSeriesModel->trendModel().lastValueTime()); - timeSeriesModel = dynamic_cast(modelWithSkipView->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 1)); + timeSeriesModel = dynamic_cast( + modelWithSkipView->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 1)); CPPUNIT_ASSERT_EQUAL(time, timeSeriesModel->trendModel().lastValueTime()); - timeSeriesModel = dynamic_cast(modelWithSkipView->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 2)); + timeSeriesModel = dynamic_cast( + modelWithSkipView->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 2)); CPPUNIT_ASSERT_EQUAL(time, timeSeriesModel->trendModel().lastValueTime()); - timeSeriesModel = dynamic_cast(modelWithSkipView->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 3)); + timeSeriesModel = dynamic_cast( + modelWithSkipView->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 3)); CPPUNIT_ASSERT_EQUAL(time, timeSeriesModel->trendModel().lastValueTime()); } -CppUnit::Test *CEventRatePopulationModelTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CEventRatePopulationModelTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRatePopulationModelTest::testBasicAccessors", - &CEventRatePopulationModelTest::testBasicAccessors) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRatePopulationModelTest::testFeatures", - &CEventRatePopulationModelTest::testFeatures) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRatePopulationModelTest::testComputeProbability", - &CEventRatePopulationModelTest::testComputeProbability) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRatePopulationModelTest::testPrune", - &CEventRatePopulationModelTest::testPrune) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRatePopulationModelTest::testKey", - &CEventRatePopulationModelTest::testKey) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRatePopulationModelTest::testFrequency", - &CEventRatePopulationModelTest::testFrequency) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRatePopulationModelTest::testSampleRateWeight", - &CEventRatePopulationModelTest::testSampleRateWeight) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRatePopulationModelTest::testSkipSampling", - &CEventRatePopulationModelTest::testSkipSampling) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRatePopulationModelTest::testInterimCorrections", - &CEventRatePopulationModelTest::testInterimCorrections) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRatePopulationModelTest::testPeriodicity", - &CEventRatePopulationModelTest::testPeriodicity) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRatePopulationModelTest::testPersistence", - &CEventRatePopulationModelTest::testPersistence) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CEventRatePopulationModelTest::testIgnoreSamplingGivenDetectionRules", - &CEventRatePopulationModelTest::testIgnoreSamplingGivenDetectionRules) ); +CppUnit::Test* CEventRatePopulationModelTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CEventRatePopulationModelTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CEventRatePopulationModelTest::testBasicAccessors", + &CEventRatePopulationModelTest::testBasicAccessors)); + suiteOfTests->addTest(new CppUnit::TestCaller("CEventRatePopulationModelTest::testFeatures", + &CEventRatePopulationModelTest::testFeatures)); + suiteOfTests->addTest(new CppUnit::TestCaller("CEventRatePopulationModelTest::testComputeProbability", + &CEventRatePopulationModelTest::testComputeProbability)); + suiteOfTests->addTest(new CppUnit::TestCaller("CEventRatePopulationModelTest::testPrune", + &CEventRatePopulationModelTest::testPrune)); + suiteOfTests->addTest(new CppUnit::TestCaller("CEventRatePopulationModelTest::testKey", + &CEventRatePopulationModelTest::testKey)); + suiteOfTests->addTest(new CppUnit::TestCaller("CEventRatePopulationModelTest::testFrequency", + &CEventRatePopulationModelTest::testFrequency)); + suiteOfTests->addTest(new CppUnit::TestCaller("CEventRatePopulationModelTest::testSampleRateWeight", + &CEventRatePopulationModelTest::testSampleRateWeight)); + suiteOfTests->addTest(new CppUnit::TestCaller("CEventRatePopulationModelTest::testSkipSampling", + &CEventRatePopulationModelTest::testSkipSampling)); + suiteOfTests->addTest(new CppUnit::TestCaller("CEventRatePopulationModelTest::testInterimCorrections", + &CEventRatePopulationModelTest::testInterimCorrections)); + suiteOfTests->addTest(new CppUnit::TestCaller("CEventRatePopulationModelTest::testPeriodicity", + &CEventRatePopulationModelTest::testPeriodicity)); + suiteOfTests->addTest(new CppUnit::TestCaller("CEventRatePopulationModelTest::testPersistence", + &CEventRatePopulationModelTest::testPersistence)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CEventRatePopulationModelTest::testIgnoreSamplingGivenDetectionRules", + &CEventRatePopulationModelTest::testIgnoreSamplingGivenDetectionRules)); return suiteOfTests; } diff --git a/lib/model/unittest/CEventRatePopulationModelTest.h b/lib/model/unittest/CEventRatePopulationModelTest.h index cf46c68de8..4875ad44ef 100644 --- a/lib/model/unittest/CEventRatePopulationModelTest.h +++ b/lib/model/unittest/CEventRatePopulationModelTest.h @@ -11,26 +11,25 @@ #include +class CEventRatePopulationModelTest : public CppUnit::TestFixture { +public: + void testBasicAccessors(); + void testFeatures(); + void testComputeProbability(); + void testPrune(); + void testKey(); + void testFrequency(); + void testSampleRateWeight(); + void testSkipSampling(); + void testInterimCorrections(); + void testPeriodicity(); + void testPersistence(); + void testIgnoreSamplingGivenDetectionRules(); -class CEventRatePopulationModelTest : public CppUnit::TestFixture -{ - public: - void testBasicAccessors(); - void testFeatures(); - void testComputeProbability(); - void testPrune(); - void testKey(); - void testFrequency(); - void testSampleRateWeight(); - void testSkipSampling(); - void testInterimCorrections(); - void testPeriodicity(); - void testPersistence(); - void testIgnoreSamplingGivenDetectionRules(); + static CppUnit::Test* suite(); - static CppUnit::Test *suite(); - private: - ml::model::CResourceMonitor m_ResourceMonitor; +private: + ml::model::CResourceMonitor m_ResourceMonitor; }; #endif // INCLUDED_CEventRatePopulationModelTest_h diff --git a/lib/model/unittest/CFunctionTypesTest.cc b/lib/model/unittest/CFunctionTypesTest.cc index 71c79157d0..c88605b7ac 100644 --- a/lib/model/unittest/CFunctionTypesTest.cc +++ b/lib/model/unittest/CFunctionTypesTest.cc @@ -6,16 +6,15 @@ #include "CFunctionTypesTest.h" -#include #include +#include #include using namespace ml; using namespace model; -void CFunctionTypesTest::testFeaturesToFunction() -{ +void CFunctionTypesTest::testFeaturesToFunction() { model_t::TFeatureVec features; { @@ -235,13 +234,11 @@ void CFunctionTypesTest::testFeaturesToFunction() } } -CppUnit::Test* CFunctionTypesTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CFunctionTypesTest"); +CppUnit::Test* CFunctionTypesTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CFunctionTypesTest"); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CFunctionTypesTest::testFeaturesToFunction", - &CFunctionTypesTest::testFeaturesToFunction) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CFunctionTypesTest::testFeaturesToFunction", + &CFunctionTypesTest::testFeaturesToFunction)); return suiteOfTests; } diff --git a/lib/model/unittest/CFunctionTypesTest.h b/lib/model/unittest/CFunctionTypesTest.h index 5740ce3d3e..a117ef9f45 100644 --- a/lib/model/unittest/CFunctionTypesTest.h +++ b/lib/model/unittest/CFunctionTypesTest.h @@ -9,12 +9,11 @@ #include -class CFunctionTypesTest : public CppUnit::TestFixture -{ - public: - void testFeaturesToFunction(); +class CFunctionTypesTest : public CppUnit::TestFixture { +public: + void testFeaturesToFunction(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CFunctionTypesTest_h diff --git a/lib/model/unittest/CGathererToolsTest.cc b/lib/model/unittest/CGathererToolsTest.cc index 77ec448276..35ab0a1c82 100644 --- a/lib/model/unittest/CGathererToolsTest.cc +++ b/lib/model/unittest/CGathererToolsTest.cc @@ -11,25 +11,21 @@ using namespace ml; using namespace model; -namespace -{ +namespace { const CGathererTools::CSumGatherer::TStrVec EMPTY_STR_VEC; const CGathererTools::CSumGatherer::TStoredStringPtrVec EMPTY_STR_PTR_VEC; } -CppUnit::Test *CGathererToolsTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CGathererToolsTest"); +CppUnit::Test* CGathererToolsTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CGathererToolsTest"); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CGathererToolsTest::testSumGathererIsRedundant", - &CGathererToolsTest::testSumGathererIsRedundant) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CGathererToolsTest::testSumGathererIsRedundant", + &CGathererToolsTest::testSumGathererIsRedundant)); return suiteOfTests; } -void CGathererToolsTest::testSumGathererIsRedundant() -{ +void CGathererToolsTest::testSumGathererIsRedundant() { using TDouble1Vec = CGathererTools::CSumGatherer::TDouble1Vec; core_t::TTime bucketLength(100); diff --git a/lib/model/unittest/CGathererToolsTest.h b/lib/model/unittest/CGathererToolsTest.h index 7003c6807a..1ae793d102 100644 --- a/lib/model/unittest/CGathererToolsTest.h +++ b/lib/model/unittest/CGathererToolsTest.h @@ -8,14 +8,11 @@ #include +class CGathererToolsTest : public CppUnit::TestFixture { +public: + void testSumGathererIsRedundant(); -class CGathererToolsTest : public CppUnit::TestFixture -{ - public: - void testSumGathererIsRedundant(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CGathererToolsTest_h - diff --git a/lib/model/unittest/CHierarchicalResultsLevelSetTest.cc b/lib/model/unittest/CHierarchicalResultsLevelSetTest.cc index b85b90bb65..582dc685b1 100644 --- a/lib/model/unittest/CHierarchicalResultsLevelSetTest.cc +++ b/lib/model/unittest/CHierarchicalResultsLevelSetTest.cc @@ -13,81 +13,53 @@ #include #include +CppUnit::Test* CHierarchicalResultsLevelSetTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CHierarchicalResultsLevelSetTest"); -CppUnit::Test *CHierarchicalResultsLevelSetTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CHierarchicalResultsLevelSetTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CHierarchicalResultsLevelSetTest::testElementsWithPerPartitionNormalisation", - &CHierarchicalResultsLevelSetTest::testElementsWithPerPartitionNormalisation) ); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CHierarchicalResultsLevelSetTest::testElementsWithPerPartitionNormalisation", + &CHierarchicalResultsLevelSetTest::testElementsWithPerPartitionNormalisation)); return suiteOfTests; } -struct TestNode -{ - TestNode(const std::string &name) : s_Name(name) - { - } +struct TestNode { + TestNode(const std::string& name) : s_Name(name) {} std::string s_Name; }; -class CTestNodeFactory -{ - public: - CTestNodeFactory() - { - } - - TestNode make(const std::string &name1, - const std::string &name2, - const std::string &name3, - const std::string &name4) const - { - return make(name1 + ' ' + name2 + ' ' + name3 + ' ' + name4); - } - - TestNode make(const std::string &name1, const std::string &name2) const - { - return make(name1 + ' ' + name2); - } - - TestNode make(const std::string &name) const - { - return TestNode(name); - } -}; - -class CConcreteHierarchicalResultsLevelSet : public ml::model::CHierarchicalResultsLevelSet -{ +class CTestNodeFactory { public: - CConcreteHierarchicalResultsLevelSet(const TestNode &root) - :ml::model::CHierarchicalResultsLevelSet(root) - { + CTestNodeFactory() {} + TestNode make(const std::string& name1, const std::string& name2, const std::string& name3, const std::string& name4) const { + return make(name1 + ' ' + name2 + ' ' + name3 + ' ' + name4); } + TestNode make(const std::string& name1, const std::string& name2) const { return make(name1 + ' ' + name2); } + + TestNode make(const std::string& name) const { return TestNode(name); } +}; + +class CConcreteHierarchicalResultsLevelSet : public ml::model::CHierarchicalResultsLevelSet { +public: + CConcreteHierarchicalResultsLevelSet(const TestNode& root) : ml::model::CHierarchicalResultsLevelSet(root) {} + //! Visit a node. - virtual void visit(const ml::model::CHierarchicalResults &/*results*/, const TNode &/*node*/, - bool /*pivot*/) - { - } + virtual void visit(const ml::model::CHierarchicalResults& /*results*/, const TNode& /*node*/, bool /*pivot*/) {} // make public using ml::model::CHierarchicalResultsLevelSet::elements; }; -void print(const TestNode *node) -{ +void print(const TestNode* node) { std::cout << "'" << node->s_Name << "'" << std::endl; } -void CHierarchicalResultsLevelSetTest::testElementsWithPerPartitionNormalisation() -{ +void CHierarchicalResultsLevelSetTest::testElementsWithPerPartitionNormalisation() { // This is intentionally NOT an empty string from the string store, but - // instead a completely separate empty string, such that its pointer will be + // instead a completely separate empty string, such that its pointer will be // different to other empty string pointers. (In general, if you need // a pointer to an empty string call CStringStore::getEmpty() instead of // doing this.) @@ -115,7 +87,7 @@ void CHierarchicalResultsLevelSetTest::testElementsWithPerPartitionNormalisation node.s_Parent = &parent; node.s_Children.push_back(&child); - std::vector result; + std::vector result; // without per partition normalization { @@ -134,7 +106,6 @@ void CHierarchicalResultsLevelSetTest::testElementsWithPerPartitionNormalisation CPPUNIT_ASSERT_EQUAL(size_t(1), result.size()); CPPUNIT_ASSERT_EQUAL(std::string("pAv1"), result[0]->s_Name); - ml::model::hierarchical_results_detail::SResultSpec specB; specB.s_PartitionFieldName = PARTITION_B; specB.s_PartitionFieldValue = PARTITION_VALUE_1; diff --git a/lib/model/unittest/CHierarchicalResultsLevelSetTest.h b/lib/model/unittest/CHierarchicalResultsLevelSetTest.h index 0947031127..e710fc4828 100644 --- a/lib/model/unittest/CHierarchicalResultsLevelSetTest.h +++ b/lib/model/unittest/CHierarchicalResultsLevelSetTest.h @@ -8,12 +8,11 @@ #include -class CHierarchicalResultsLevelSetTest : public CppUnit::TestFixture -{ - public: - void testElementsWithPerPartitionNormalisation(); +class CHierarchicalResultsLevelSetTest : public CppUnit::TestFixture { +public: + void testElementsWithPerPartitionNormalisation(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CHierarchicalResultsLevelSetTest_h diff --git a/lib/model/unittest/CHierarchicalResultsTest.cc b/lib/model/unittest/CHierarchicalResultsTest.cc index bceb7a1991..8486076b6c 100644 --- a/lib/model/unittest/CHierarchicalResultsTest.cc +++ b/lib/model/unittest/CHierarchicalResultsTest.cc @@ -38,13 +38,12 @@ #include #include -#include #include +#include using namespace ml; -namespace -{ +namespace { using TDoubleVec = std::vector; using TAttributeProbabilityVec = model::CHierarchicalResults::TAttributeProbabilityVec; @@ -56,421 +55,298 @@ using TStrVec = std::vector; const std::string EMPTY_STRING; //! \brief Checks that we visit the nodes in decreasing depth order. -class CBreadthFirstCheck : public model::CHierarchicalResultsVisitor -{ - public: - using TNodeCPtrSet = std::set; - using TNodeCPtrSetVec = std::vector; +class CBreadthFirstCheck : public model::CHierarchicalResultsVisitor { +public: + using TNodeCPtrSet = std::set; + using TNodeCPtrSetVec = std::vector; - public: - CBreadthFirstCheck() : - m_Layer(0), - m_Layers(1, TNodeCPtrSet()) - {} - - virtual void visit(const model::CHierarchicalResults &/*results*/, - const TNode &node, - bool /*pivot*/) - { - LOG_DEBUG("Visiting " << node.print()); +public: + CBreadthFirstCheck() : m_Layer(0), m_Layers(1, TNodeCPtrSet()) {} - if (node.s_Children.empty()) - { - // Leaf - m_Layers[0].insert(&node); - return; - } + virtual void visit(const model::CHierarchicalResults& /*results*/, const TNode& node, bool /*pivot*/) { + LOG_DEBUG("Visiting " << node.print()); - // Check whether the children are on the layer below - // otherwise start a new layer. + if (node.s_Children.empty()) { + // Leaf + m_Layers[0].insert(&node); + return; + } - std::size_t layer = m_Layer + 1; - for (std::size_t i = 0u; i < node.s_Children.size(); ++i) - { - if (m_Layers[m_Layer].count(node.s_Children[i]) == 0) - { - layer = m_Layer + 2; - break; - } - } - LOG_DEBUG("layer = " << layer); + // Check whether the children are on the layer below + // otherwise start a new layer. - m_Layer = layer - 1; - if (layer > m_Layers.size() - 1) - { - m_Layers.resize(layer + 1); + std::size_t layer = m_Layer + 1; + for (std::size_t i = 0u; i < node.s_Children.size(); ++i) { + if (m_Layers[m_Layer].count(node.s_Children[i]) == 0) { + layer = m_Layer + 2; + break; } - m_Layers[layer].insert(&node); } + LOG_DEBUG("layer = " << layer); - void check(std::size_t expectedLayers) const - { - // Check we have the expected number of layers and that - // all nodes are in a lower layer than their parents. - - using TNodeCPtrSetCItr = TNodeCPtrSet::const_iterator; - - LOG_DEBUG("# layers = " << m_Layers.size()); - CPPUNIT_ASSERT_EQUAL(expectedLayers, m_Layers.size()); - - for (std::size_t i = 0u; i < m_Layers.size(); ++i) - { - LOG_DEBUG("Checking layer " - << core::CContainerPrinter::print(m_Layers[i])); - for (TNodeCPtrSetCItr itr = m_Layers[i].begin(); - itr != m_Layers[i].end(); - ++itr) - { - if ((*itr)->s_Parent) - { - std::size_t p = this->layer((*itr)->s_Parent); - LOG_DEBUG("layer = " << i << ", parent layer = " << p); - CPPUNIT_ASSERT(p > i); - } + m_Layer = layer - 1; + if (layer > m_Layers.size() - 1) { + m_Layers.resize(layer + 1); + } + m_Layers[layer].insert(&node); + } + + void check(std::size_t expectedLayers) const { + // Check we have the expected number of layers and that + // all nodes are in a lower layer than their parents. + + using TNodeCPtrSetCItr = TNodeCPtrSet::const_iterator; + + LOG_DEBUG("# layers = " << m_Layers.size()); + CPPUNIT_ASSERT_EQUAL(expectedLayers, m_Layers.size()); + + for (std::size_t i = 0u; i < m_Layers.size(); ++i) { + LOG_DEBUG("Checking layer " << core::CContainerPrinter::print(m_Layers[i])); + for (TNodeCPtrSetCItr itr = m_Layers[i].begin(); itr != m_Layers[i].end(); ++itr) { + if ((*itr)->s_Parent) { + std::size_t p = this->layer((*itr)->s_Parent); + LOG_DEBUG("layer = " << i << ", parent layer = " << p); + CPPUNIT_ASSERT(p > i); } } } + } - private: - //! Get a node's layer. - std::size_t layer(const TNode *node) const - { - for (std::size_t i = 0u; i < m_Layers.size(); ++i) - { - if (m_Layers[i].count(node) > 0) - { - return i; - } +private: + //! Get a node's layer. + std::size_t layer(const TNode* node) const { + for (std::size_t i = 0u; i < m_Layers.size(); ++i) { + if (m_Layers[i].count(node) > 0) { + return i; } + } - LOG_ERROR("Couldn't find node " << node->print()); - CPPUNIT_ASSERT(false); + LOG_ERROR("Couldn't find node " << node->print()); + CPPUNIT_ASSERT(false); - return 0; - } + return 0; + } - private: - std::size_t m_Layer; - TNodeCPtrSetVec m_Layers; +private: + std::size_t m_Layer; + TNodeCPtrSetVec m_Layers; }; //! \brief Checks that we visit all a nodes children immediately //! before visiting it. -class CDepthFirstCheck : public model::CHierarchicalResultsVisitor -{ - public: - using TNodeCPtrVec = std::vector; - - public: - virtual void visit(const model::CHierarchicalResults &/*results*/, - const TNode &node, - bool /*pivot*/) - { - LOG_DEBUG("Visiting " << node.print()); - for (std::size_t i = node.s_Children.size(); i > 0; --i) - { - CPPUNIT_ASSERT(!m_Children.empty()); - CPPUNIT_ASSERT_EQUAL(m_Children.back(), node.s_Children[i-1]); - m_Children.pop_back(); - } - m_Children.push_back(&node); +class CDepthFirstCheck : public model::CHierarchicalResultsVisitor { +public: + using TNodeCPtrVec = std::vector; + +public: + virtual void visit(const model::CHierarchicalResults& /*results*/, const TNode& node, bool /*pivot*/) { + LOG_DEBUG("Visiting " << node.print()); + for (std::size_t i = node.s_Children.size(); i > 0; --i) { + CPPUNIT_ASSERT(!m_Children.empty()); + CPPUNIT_ASSERT_EQUAL(m_Children.back(), node.s_Children[i - 1]); + m_Children.pop_back(); } + m_Children.push_back(&node); + } - private: - TNodeCPtrVec m_Children; +private: + TNodeCPtrVec m_Children; }; //! \brief A pretty print of the hierarchical results. -class CPrinter : public model::CHierarchicalResultsVisitor -{ - public: - CPrinter() : m_ShouldPrintWrittenNodesOnly(false) - { - } +class CPrinter : public model::CHierarchicalResultsVisitor { +public: + CPrinter() : m_ShouldPrintWrittenNodesOnly(false) {} - CPrinter(bool shouldOnlyPrintWrittenNodes) - : m_ShouldPrintWrittenNodesOnly(shouldOnlyPrintWrittenNodes) - { - } + CPrinter(bool shouldOnlyPrintWrittenNodes) : m_ShouldPrintWrittenNodesOnly(shouldOnlyPrintWrittenNodes) {} - virtual void visit(const model::CHierarchicalResults &results, - const TNode &node, - bool pivot) - { - if (m_ShouldPrintWrittenNodesOnly == false || - shouldWriteResult(m_Limits, results, node, pivot)) - { - m_Result = std::string(2 * depth(&node), ' ') - + node.print() - + (pivot ? " pivot" : "") - + (m_Result.empty() ? "" : "\n") - + m_Result; - } + virtual void visit(const model::CHierarchicalResults& results, const TNode& node, bool pivot) { + if (m_ShouldPrintWrittenNodesOnly == false || shouldWriteResult(m_Limits, results, node, pivot)) { + m_Result = + std::string(2 * depth(&node), ' ') + node.print() + (pivot ? " pivot" : "") + (m_Result.empty() ? "" : "\n") + m_Result; } + } - const std::string &result() const - { - return m_Result; - } + const std::string& result() const { return m_Result; } - private: - std::size_t depth(const TNode *node) const - { - std::size_t result = 0u; - for (/**/; node->s_Parent; node = node->s_Parent) - { - ++result; - } - return result; +private: + std::size_t depth(const TNode* node) const { + std::size_t result = 0u; + for (/**/; node->s_Parent; node = node->s_Parent) { + ++result; } + return result; + } - private: - bool m_ShouldPrintWrittenNodesOnly; - std::string m_Result; - model::CLimits m_Limits; +private: + bool m_ShouldPrintWrittenNodesOnly; + std::string m_Result; + model::CLimits m_Limits; }; //! \brief Gets the various types of nodes. -class CNodeExtractor : public model::CHierarchicalResultsVisitor -{ - public: - using TNodeCPtrVec = std::vector; - - public: - virtual void visit(const model::CHierarchicalResults &/*results*/, - const TNode &node, - bool /*pivot*/) - { - if (this->isPartitioned(node)) - { - m_PartitionedNodes.push_back(&node); - } - if (this->isPartition(node)) - { - m_PartitionNodes.push_back(&node); - } - if (this->isPerson(node)) - { - m_PersonNodes.push_back(&node); - } - if (this->isLeaf(node)) - { - m_LeafNodes.push_back(&node); - } +class CNodeExtractor : public model::CHierarchicalResultsVisitor { +public: + using TNodeCPtrVec = std::vector; + +public: + virtual void visit(const model::CHierarchicalResults& /*results*/, const TNode& node, bool /*pivot*/) { + if (this->isPartitioned(node)) { + m_PartitionedNodes.push_back(&node); } - - const TNodeCPtrVec &partitionedNodes() const - { - return m_PartitionedNodes; + if (this->isPartition(node)) { + m_PartitionNodes.push_back(&node); } - const TNodeCPtrVec &partitionNodes() const - { - return m_PartitionNodes; + if (this->isPerson(node)) { + m_PersonNodes.push_back(&node); } - const TNodeCPtrVec &personNodes() const - { - return m_PersonNodes; - } - const TNodeCPtrVec &leafNodes() const - { - return m_LeafNodes; + if (this->isLeaf(node)) { + m_LeafNodes.push_back(&node); } + } - private: - TNodeCPtrVec m_PartitionedNodes; - TNodeCPtrVec m_PartitionNodes; - TNodeCPtrVec m_PersonNodes; - TNodeCPtrVec m_LeafNodes; + const TNodeCPtrVec& partitionedNodes() const { return m_PartitionedNodes; } + const TNodeCPtrVec& partitionNodes() const { return m_PartitionNodes; } + const TNodeCPtrVec& personNodes() const { return m_PersonNodes; } + const TNodeCPtrVec& leafNodes() const { return m_LeafNodes; } + +private: + TNodeCPtrVec m_PartitionedNodes; + TNodeCPtrVec m_PartitionNodes; + TNodeCPtrVec m_PersonNodes; + TNodeCPtrVec m_LeafNodes; }; //! \brief Checks our anomaly scores are correct post scoring. -class CCheckScores : public model::CHierarchicalResultsVisitor -{ - public: - virtual void visit(const model::CHierarchicalResults &/*results*/, - const TNode &node, - bool /*pivot*/) - { - LOG_DEBUG(node.s_Spec.print() - << " score = " << node.s_RawAnomalyScore - << ", expected score = " << maths::CTools::anomalyScore(node.probability())); - CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CTools::anomalyScore(node.probability()), - node.s_RawAnomalyScore, - 1e-10); - } +class CCheckScores : public model::CHierarchicalResultsVisitor { +public: + virtual void visit(const model::CHierarchicalResults& /*results*/, const TNode& node, bool /*pivot*/) { + LOG_DEBUG(node.s_Spec.print() << " score = " << node.s_RawAnomalyScore + << ", expected score = " << maths::CTools::anomalyScore(node.probability())); + CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CTools::anomalyScore(node.probability()), node.s_RawAnomalyScore, 1e-10); + } }; //! \brief Checks that if we write a result for a node, we also write one //! for its parent (if there is one) and one for at least one child (if //! there are any children). -class CWriteConsistencyChecker : public model::CHierarchicalResultsVisitor -{ - public: - CWriteConsistencyChecker(const model::CLimits &limits) : - m_Limits(limits) - { - } +class CWriteConsistencyChecker : public model::CHierarchicalResultsVisitor { +public: + CWriteConsistencyChecker(const model::CLimits& limits) : m_Limits(limits) {} - virtual void visit(const model::CHierarchicalResults &results, - const TNode &node, - bool pivot) - { - if (!this->shouldWriteResult(m_Limits, results, node, pivot)) - { - return; - } - if (!this->isLeaf(node)) - { - bool willWriteAChild(false); - for (size_t i = 0; i < node.s_Children.size(); ++i) - { - CPPUNIT_ASSERT(node.s_Children[i] != 0); - willWriteAChild = this->shouldWriteResult(m_Limits, results, *node.s_Children[i], pivot); - if (willWriteAChild) - { - break; - } + virtual void visit(const model::CHierarchicalResults& results, const TNode& node, bool pivot) { + if (!this->shouldWriteResult(m_Limits, results, node, pivot)) { + return; + } + if (!this->isLeaf(node)) { + bool willWriteAChild(false); + for (size_t i = 0; i < node.s_Children.size(); ++i) { + CPPUNIT_ASSERT(node.s_Children[i] != 0); + willWriteAChild = this->shouldWriteResult(m_Limits, results, *node.s_Children[i], pivot); + if (willWriteAChild) { + break; } - CPPUNIT_ASSERT(willWriteAChild); } + CPPUNIT_ASSERT(willWriteAChild); + } - if (!this->isRoot(node)) - { - CPPUNIT_ASSERT(node.s_Parent != 0); - if (isTypeForWhichWeWriteResults(*node.s_Parent, pivot)) - { - CPPUNIT_ASSERT(this->shouldWriteResult(m_Limits, results, *node.s_Parent, pivot)); - } + if (!this->isRoot(node)) { + CPPUNIT_ASSERT(node.s_Parent != 0); + if (isTypeForWhichWeWriteResults(*node.s_Parent, pivot)) { + CPPUNIT_ASSERT(this->shouldWriteResult(m_Limits, results, *node.s_Parent, pivot)); } } + } - private: - const model::CLimits &m_Limits; +private: + const model::CLimits& m_Limits; }; using TIntDoubleVecMap = std::map; using TIntDoubleVecMapCItr = TIntDoubleVecMap::const_iterator; //! \brief Node probability container. -struct SNodeProbabilities -{ - SNodeProbabilities(const std::string &name) : s_Name(name) {} +struct SNodeProbabilities { + SNodeProbabilities(const std::string& name) : s_Name(name) {} std::string s_Name; TIntDoubleVecMap s_Probabilities; }; //! \brief Gathers up detector probabilities by level. -class CProbabilityGatherer : public model::CHierarchicalResultsLevelSet -{ - public: - using TBase = model::CHierarchicalResultsLevelSet; - using TNodeProbabilitiesPtrVec = TBase::TTypePtrVec; +class CProbabilityGatherer : public model::CHierarchicalResultsLevelSet { +public: + using TBase = model::CHierarchicalResultsLevelSet; + using TNodeProbabilitiesPtrVec = TBase::TTypePtrVec; - class CFactory - { - public: - SNodeProbabilities make(const std::string &name1, - const std::string &name2, - const std::string &name3, - const std::string &name4) const - { - return make(name1 + ' ' + name2 + ' ' + name3 + ' ' + name4); - } + class CFactory { + public: + SNodeProbabilities + make(const std::string& name1, const std::string& name2, const std::string& name3, const std::string& name4) const { + return make(name1 + ' ' + name2 + ' ' + name3 + ' ' + name4); + } - SNodeProbabilities make(const std::string &name1, - const std::string &name2) const - { - return make(name1 + ' ' + name2); - } + SNodeProbabilities make(const std::string& name1, const std::string& name2) const { return make(name1 + ' ' + name2); } - SNodeProbabilities make(const std::string &name) const - { - return SNodeProbabilities(name); - } - }; + SNodeProbabilities make(const std::string& name) const { return SNodeProbabilities(name); } + }; - public: - CProbabilityGatherer() : TBase(SNodeProbabilities("bucket")) {} +public: + CProbabilityGatherer() : TBase(SNodeProbabilities("bucket")) {} - virtual void visit(const model::CHierarchicalResults &/*results*/, const TNode &node, bool pivot) - { - if (isLeaf(node)) - { - CFactory factory; - TNodeProbabilitiesPtrVec probabilities; - this->elements(node, pivot, factory, probabilities); - for (std::size_t i = 0u; i < probabilities.size(); ++i) - { - if (node.probability() < model::CDetectorEqualizer::largestProbabilityToCorrect()) - { - (*probabilities[i]).s_Probabilities[node.s_Detector].push_back(node.probability()); - } + virtual void visit(const model::CHierarchicalResults& /*results*/, const TNode& node, bool pivot) { + if (isLeaf(node)) { + CFactory factory; + TNodeProbabilitiesPtrVec probabilities; + this->elements(node, pivot, factory, probabilities); + for (std::size_t i = 0u; i < probabilities.size(); ++i) { + if (node.probability() < model::CDetectorEqualizer::largestProbabilityToCorrect()) { + (*probabilities[i]).s_Probabilities[node.s_Detector].push_back(node.probability()); } } } + } - double test(double minimumSignificance) const - { - maths::CBasicStatistics::SSampleMean::TAccumulator meanSignificance; - - for (std::size_t i = 0u; i < this->leafSet().size(); ++i) - { - const SNodeProbabilities &probabilities = this->leafSet()[i].second; - LOG_DEBUG("leaf = " << probabilities.s_Name); - - std::vector detectors; - for (TIntDoubleVecMapCItr j = probabilities.s_Probabilities.begin(); - j != probabilities.s_Probabilities.end(); - ++j) - { - detectors.push_back(j->first); - } + double test(double minimumSignificance) const { + maths::CBasicStatistics::SSampleMean::TAccumulator meanSignificance; - for (std::size_t j = 1u; j < detectors.size(); ++j) - { - for (std::size_t k = 0u; k < j; ++k) - { - double significance = - maths::CStatisticalTests::twoSampleKS( - probabilities.s_Probabilities.find(detectors[j])->second, - probabilities.s_Probabilities.find(detectors[k])->second); - LOG_DEBUG(detectors[j] << " vs " << detectors[k] - << ": significance = " << significance); - CPPUNIT_ASSERT(significance > minimumSignificance); - meanSignificance.add(std::log(significance)); - } - } + for (std::size_t i = 0u; i < this->leafSet().size(); ++i) { + const SNodeProbabilities& probabilities = this->leafSet()[i].second; + LOG_DEBUG("leaf = " << probabilities.s_Name); + + std::vector detectors; + for (TIntDoubleVecMapCItr j = probabilities.s_Probabilities.begin(); j != probabilities.s_Probabilities.end(); ++j) { + detectors.push_back(j->first); } - return std::exp(maths::CBasicStatistics::mean(meanSignificance)); + for (std::size_t j = 1u; j < detectors.size(); ++j) { + for (std::size_t k = 0u; k < j; ++k) { + double significance = maths::CStatisticalTests::twoSampleKS(probabilities.s_Probabilities.find(detectors[j])->second, + probabilities.s_Probabilities.find(detectors[k])->second); + LOG_DEBUG(detectors[j] << " vs " << detectors[k] << ": significance = " << significance); + CPPUNIT_ASSERT(significance > minimumSignificance); + meanSignificance.add(std::log(significance)); + } + } } + + return std::exp(maths::CBasicStatistics::mean(meanSignificance)); + } }; //! \brief Stubs out the result writer. -class CWriterFunc -{ - public: - bool operator()(ml::core_t::TTime time, - const ml::model::CHierarchicalResults::TNode &node, - bool isBucketInfluencer) - { - LOG_DEBUG((isBucketInfluencer ? "BucketInfluencer" : "Influencer ") - << node.s_Spec.print() << " initial score " << node.probability() - << ", time: " << time); - return true; - } +class CWriterFunc { +public: + bool operator()(ml::core_t::TTime time, const ml::model::CHierarchicalResults::TNode& node, bool isBucketInfluencer) { + LOG_DEBUG((isBucketInfluencer ? "BucketInfluencer" : "Influencer ") + << node.s_Spec.print() << " initial score " << node.probability() << ", time: " << time); + return true; + } }; //! Compute the probability of the samples [\p begin, \p end). template -void addAggregateValues(double w1, - double w2, - std::size_t n, - ITR begin, ITR end, - TDoubleVec &scores, - TDoubleVec &probabilities) -{ +void addAggregateValues(double w1, double w2, std::size_t n, ITR begin, ITR end, TDoubleVec& scores, TDoubleVec& probabilities) { double score, probability; TDoubleVec probs(begin, end); model::CAnomalyScore::compute(w1, w2, 1, n, 0.05, probs, score, probability); @@ -480,45 +356,57 @@ void addAggregateValues(double w1, void addResult(int detector, bool isPopulation, - const std::string &functionName, + const std::string& functionName, ml::model::function_t::EFunction function, - const std::string &partitionFieldName, - const std::string &partitionFieldValue, - const std::string &personFieldName, - const std::string &personFieldValue, - const std::string &valueFieldName, + const std::string& partitionFieldName, + const std::string& partitionFieldValue, + const std::string& personFieldName, + const std::string& personFieldValue, + const std::string& valueFieldName, double p, - ml::model::CHierarchicalResults &results) -{ + ml::model::CHierarchicalResults& results) { ml::model::SAnnotatedProbability annotatedProbability(p); - results.addModelResult(detector, isPopulation, functionName, function, partitionFieldName, - partitionFieldValue, personFieldName, personFieldValue, valueFieldName, + results.addModelResult(detector, + isPopulation, + functionName, + function, + partitionFieldName, + partitionFieldValue, + personFieldName, + personFieldValue, + valueFieldName, annotatedProbability); } void addResult(int detector, bool isPopulation, - const std::string &functionName, + const std::string& functionName, ml::model::function_t::EFunction function, - const std::string &partitionFieldName, - const std::string &partitionFieldValue, - const std::string &personFieldName, - const std::string &personFieldValue, - const std::string &valueFieldName, + const std::string& partitionFieldName, + const std::string& partitionFieldValue, + const std::string& personFieldName, + const std::string& personFieldValue, + const std::string& valueFieldName, double p, - const ml::model::CAnomalyDetectorModel *model, - ml::model::CHierarchicalResults &results) -{ + const ml::model::CAnomalyDetectorModel* model, + ml::model::CHierarchicalResults& results) { ml::model::SAnnotatedProbability annotatedProbability(p); - results.addModelResult(detector, isPopulation, functionName, function, partitionFieldName, - partitionFieldValue, personFieldName, personFieldValue, valueFieldName, - annotatedProbability, model); + results.addModelResult(detector, + isPopulation, + functionName, + function, + partitionFieldName, + partitionFieldValue, + personFieldName, + personFieldValue, + valueFieldName, + annotatedProbability, + model); } } // unnamed:: -void CHierarchicalResultsTest::testBreadthFirstVisit() -{ +void CHierarchicalResultsTest::testBreadthFirstVisit() { LOG_DEBUG("*** testBreadthFirstVisit ***"); model::CHierarchicalResults results; @@ -554,16 +442,16 @@ void CHierarchicalResultsTest::testBreadthFirstVisit() addResult(1, false, FUNC, function, PART1, part1, PERS, pers1, VAL1, 0.1, results); addResult(1, false, FUNC, function, PART1, part1, PERS, pers2, VAL1, 0.1, results); addResult(1, false, FUNC, function, PART1, part1, PERS, pers3, VAL1, 0.1, results); - addResult(2, true, FUNC, function, PART1, part1, PERS, pers1, EMPTY_STRING, 0.1, results); - addResult(2, true, FUNC, function, PART1, part1, PERS, pers2, EMPTY_STRING, 0.1, results); - addResult(2, true, FUNC, function, PART1, part1, PERS, pers3, EMPTY_STRING, 0.1, results); + addResult(2, true, FUNC, function, PART1, part1, PERS, pers1, EMPTY_STRING, 0.1, results); + addResult(2, true, FUNC, function, PART1, part1, PERS, pers2, EMPTY_STRING, 0.1, results); + addResult(2, true, FUNC, function, PART1, part1, PERS, pers3, EMPTY_STRING, 0.1, results); addResult(3, false, FUNC, function, PART1, part1, PERS, pers1, VAL2, 0.1, results); addResult(3, false, FUNC, function, PART1, part1, PERS, pers2, VAL2, 0.1, results); addResult(3, false, FUNC, function, PART1, part1, PERS, pers4, VAL2, 0.1, results); addResult(1, false, FUNC, function, PART1, part2, PERS, pers1, VAL1, 0.1, results); addResult(1, false, FUNC, function, PART1, part2, PERS, pers2, VAL1, 0.1, results); - addResult(2, true, FUNC, function, PART1, part2, PERS, pers1, EMPTY_STRING, 0.1, results); - addResult(2, true, FUNC, function, PART1, part2, PERS, pers3, EMPTY_STRING, 0.1, results); + addResult(2, true, FUNC, function, PART1, part2, PERS, pers1, EMPTY_STRING, 0.1, results); + addResult(2, true, FUNC, function, PART1, part2, PERS, pers3, EMPTY_STRING, 0.1, results); addResult(3, false, FUNC, function, PART1, part2, PERS, pers4, VAL2, 0.1, results); addResult(4, false, FUNC, function, PART2, part1, PERS, pers1, VAL1, 0.1, results); addResult(4, false, FUNC, function, PART2, part1, PERS, pers2, VAL1, 0.1, results); @@ -573,18 +461,17 @@ void CHierarchicalResultsTest::testBreadthFirstVisit() addResult(5, false, FUNC, function, PART2, part2, PERS, pers1, VAL2, 0.1, results); addResult(5, false, FUNC, function, PART2, part2, PERS, pers2, VAL2, 0.1, results); addResult(5, false, FUNC, function, PART2, part2, PERS, pers3, VAL2, 0.1, results); - addResult(6, true, FUNC, function, PART3, part1, PERS, pers1, VAL1, 0.1, results); - addResult(6, true, FUNC, function, PART3, part1, PERS, pers2, VAL1, 0.1, results); + addResult(6, true, FUNC, function, PART3, part1, PERS, pers1, VAL1, 0.1, results); + addResult(6, true, FUNC, function, PART3, part1, PERS, pers2, VAL1, 0.1, results); results.buildHierarchy(); CBreadthFirstCheck bfc; results.bottomUpBreadthFirst(bfc); - bfc.check(5/*expected layers*/); + bfc.check(5 /*expected layers*/); } -void CHierarchicalResultsTest::testDepthFirstVisit() -{ +void CHierarchicalResultsTest::testDepthFirstVisit() { LOG_DEBUG("*** testDepthFirstVisit ***"); model::CHierarchicalResults results; @@ -620,16 +507,16 @@ void CHierarchicalResultsTest::testDepthFirstVisit() addResult(1, false, FUNC, function, PART1, part1, PERS, pers1, VAL1, 0.1, results); addResult(1, false, FUNC, function, PART1, part1, PERS, pers2, VAL1, 0.1, results); addResult(1, false, FUNC, function, PART1, part1, PERS, pers3, VAL1, 0.1, results); - addResult(2, true, FUNC, function, PART1, part1, PERS, pers1, EMPTY_STRING, 0.1, results); - addResult(2, true, FUNC, function, PART1, part1, PERS, pers2, EMPTY_STRING, 0.1, results); - addResult(2, true, FUNC, function, PART1, part1, PERS, pers3, EMPTY_STRING, 0.1, results); + addResult(2, true, FUNC, function, PART1, part1, PERS, pers1, EMPTY_STRING, 0.1, results); + addResult(2, true, FUNC, function, PART1, part1, PERS, pers2, EMPTY_STRING, 0.1, results); + addResult(2, true, FUNC, function, PART1, part1, PERS, pers3, EMPTY_STRING, 0.1, results); addResult(3, false, FUNC, function, PART1, part1, PERS, pers1, VAL2, 0.1, results); addResult(3, false, FUNC, function, PART1, part1, PERS, pers2, VAL2, 0.1, results); addResult(3, false, FUNC, function, PART1, part1, PERS, pers4, VAL2, 0.1, results); addResult(1, false, FUNC, function, PART1, part2, PERS, pers1, VAL1, 0.1, results); addResult(1, false, FUNC, function, PART1, part2, PERS, pers2, VAL1, 0.1, results); - addResult(2, true, FUNC, function, PART1, part2, PERS, pers1, EMPTY_STRING, 0.1, results); - addResult(2, true, FUNC, function, PART1, part2, PERS, pers3, EMPTY_STRING, 0.1, results); + addResult(2, true, FUNC, function, PART1, part2, PERS, pers1, EMPTY_STRING, 0.1, results); + addResult(2, true, FUNC, function, PART1, part2, PERS, pers3, EMPTY_STRING, 0.1, results); addResult(3, false, FUNC, function, PART1, part2, PERS, pers4, VAL2, 0.1, results); addResult(4, false, FUNC, function, PART2, part1, PERS, pers1, VAL1, 0.1, results); addResult(4, false, FUNC, function, PART2, part1, PERS, pers2, VAL1, 0.1, results); @@ -639,8 +526,8 @@ void CHierarchicalResultsTest::testDepthFirstVisit() addResult(5, false, FUNC, function, PART2, part2, PERS, pers1, VAL2, 0.1, results); addResult(5, false, FUNC, function, PART2, part2, PERS, pers2, VAL2, 0.1, results); addResult(5, false, FUNC, function, PART2, part2, PERS, pers3, VAL2, 0.1, results); - addResult(6, true, FUNC, function, PART3, part1, PERS, pers1, VAL1, 0.1, results); - addResult(6, true, FUNC, function, PART3, part1, PERS, pers2, VAL1, 0.1, results); + addResult(6, true, FUNC, function, PART3, part1, PERS, pers1, VAL1, 0.1, results); + addResult(6, true, FUNC, function, PART3, part1, PERS, pers2, VAL1, 0.1, results); results.buildHierarchy(); @@ -648,8 +535,7 @@ void CHierarchicalResultsTest::testDepthFirstVisit() results.postorderDepthFirst(dfc); } -namespace -{ +namespace { const std::string FALSE_STR("false"); const std::string TRUE_STR("true"); @@ -691,8 +577,7 @@ const std::string p35("p35"); } // unnamed:: -void CHierarchicalResultsTest::testBuildHierarchy() -{ +void CHierarchicalResultsTest::testBuildHierarchy() { LOG_DEBUG("*** testBuildHierarchy ***"); static const std::string FUNC("mean"); @@ -706,8 +591,7 @@ void CHierarchicalResultsTest::testBuildHierarchy() CPrinter printer; results.postorderDepthFirst(printer); LOG_DEBUG("\nby:\n" << printer.result()); - CPPUNIT_ASSERT_EQUAL(std::string("'false/false/mean/////': 1, 0"), - printer.result()); + CPPUNIT_ASSERT_EQUAL(std::string("'false/false/mean/////': 1, 0"), printer.result()); } { model::CHierarchicalResults results; @@ -725,8 +609,8 @@ void CHierarchicalResultsTest::testBuildHierarchy() { model::CHierarchicalResults results; addResult(1, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.3, results); - addResult(2, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, 0.01, results); - addResult(2, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p12, EMPTY_STRING, 0.03, results); + addResult(2, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, 0.01, results); + addResult(2, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p12, EMPTY_STRING, 0.03, results); addResult(3, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF2, p22, EMPTY_STRING, 0.03, results); results.buildHierarchy(); CPrinter printer; @@ -767,14 +651,14 @@ void CHierarchicalResultsTest::testBuildHierarchy() addResult(3, false, FUNC, function, PNF1, pn11, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.01, results); addResult(3, false, FUNC, function, PNF1, pn12, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.01, results); addResult(3, false, FUNC, function, PNF1, pn13, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.05, results); - addResult(4, true, FUNC, function, PNF2, pn22, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.01, results); - addResult(4, true, FUNC, function, PNF2, pn23, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.05, results); - addResult(5, true, FUNC, function, PNF2, pn21, PF1, p11, EMPTY_STRING, 0.2, results); - addResult(5, true, FUNC, function, PNF2, pn22, PF1, p11, EMPTY_STRING, 0.2, results); - addResult(5, true, FUNC, function, PNF2, pn22, PF1, p12, EMPTY_STRING, 0.1, results); - addResult(6, true, FUNC, function, PNF2, pn22, PF2, p21, EMPTY_STRING, 0.15, results); + addResult(4, true, FUNC, function, PNF2, pn22, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.01, results); + addResult(4, true, FUNC, function, PNF2, pn23, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.05, results); + addResult(5, true, FUNC, function, PNF2, pn21, PF1, p11, EMPTY_STRING, 0.2, results); + addResult(5, true, FUNC, function, PNF2, pn22, PF1, p11, EMPTY_STRING, 0.2, results); + addResult(5, true, FUNC, function, PNF2, pn22, PF1, p12, EMPTY_STRING, 0.1, results); + addResult(6, true, FUNC, function, PNF2, pn22, PF2, p21, EMPTY_STRING, 0.15, results); addResult(7, false, FUNC, function, PNF2, pn22, PF2, p21, EMPTY_STRING, 0.12, results); - addResult(6, true, FUNC, function, PNF2, pn22, PF2, p23, EMPTY_STRING, 0.12, results); + addResult(6, true, FUNC, function, PNF2, pn22, PF2, p23, EMPTY_STRING, 0.12, results); addResult(7, false, FUNC, function, PNF2, pn22, PF2, p23, EMPTY_STRING, 0.82, results); results.buildHierarchy(); CPrinter printer; @@ -807,8 +691,7 @@ void CHierarchicalResultsTest::testBuildHierarchy() } } -void CHierarchicalResultsTest::testBuildHierarchyGivenPartitionsWithSinglePersonFieldValue() -{ +void CHierarchicalResultsTest::testBuildHierarchyGivenPartitionsWithSinglePersonFieldValue() { LOG_DEBUG("*** testBuildHierarchyGivenPartitionsWithSinglePersonFieldValue ***"); static const std::string FUNC("mean"); @@ -863,8 +746,7 @@ void CHierarchicalResultsTest::testBuildHierarchyGivenPartitionsWithSinglePerson CPPUNIT_ASSERT_EQUAL(std::size_t(0), extract.personNodes()[1]->s_Children.size()); } -void CHierarchicalResultsTest::testBasicVisitor() -{ +void CHierarchicalResultsTest::testBasicVisitor() { LOG_DEBUG("*** testBasicVisitor ***"); static const std::string FUNC("max"); @@ -903,8 +785,8 @@ void CHierarchicalResultsTest::testBasicVisitor() { model::CHierarchicalResults results; addResult(1, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, EMPTY_STRING, EMPTY_STRING, 1.0, results); - addResult(1, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, 1.0, results); - addResult(1, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p12, EMPTY_STRING, 1.0, results); + addResult(1, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, 1.0, results); + addResult(1, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p12, EMPTY_STRING, 1.0, results); results.buildHierarchy(); CPrinter printer; results.postorderDepthFirst(printer); @@ -937,8 +819,8 @@ void CHierarchicalResultsTest::testBasicVisitor() model::CHierarchicalResults results; addResult(1, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, EMPTY_STRING, EMPTY_STRING, 1.0, results); - addResult(1, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, 1.0, results); - addResult(2, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF2, p23, EMPTY_STRING, 1.0, results); + addResult(1, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, 1.0, results); + addResult(2, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF2, p23, EMPTY_STRING, 1.0, results); results.buildHierarchy(); CPrinter printer; results.postorderDepthFirst(printer); @@ -964,9 +846,9 @@ void CHierarchicalResultsTest::testBasicVisitor() { LOG_DEBUG("Clear..."); model::CHierarchicalResults results; - addResult(1, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, 0.2, results); - addResult(1, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, 0.3, results); - addResult(2, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF2, EMPTY_STRING, EMPTY_STRING, 0.01, results); + addResult(1, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, 0.2, results); + addResult(1, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, 0.3, results); + addResult(2, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF2, EMPTY_STRING, EMPTY_STRING, 0.01, results); addResult(3, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 1.0, results); results.buildHierarchy(); CPrinter printer; @@ -990,14 +872,14 @@ void CHierarchicalResultsTest::testBasicVisitor() // Test partition { model::CHierarchicalResults results; - addResult(1, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, 0.2, results); - addResult(1, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, 0.3, results); - addResult(2, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF2, EMPTY_STRING, EMPTY_STRING, 0.01, results); + addResult(1, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, 0.2, results); + addResult(1, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, 0.3, results); + addResult(2, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF2, EMPTY_STRING, EMPTY_STRING, 0.01, results); addResult(3, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 1.0, results); - addResult(4, true, FUNC, function, PNF1, EMPTY_STRING, PF1, p11, EMPTY_STRING, 0.2, results); - addResult(4, true, FUNC, function, PNF1, pn11, PF1, p11, EMPTY_STRING, 0.3, results); - addResult(5, true, FUNC, function, PNF1, pn12, PF2, EMPTY_STRING, EMPTY_STRING, 0.01, results); - addResult(6, true, FUNC, function, PNF1, pn13, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 1.0, results); + addResult(4, true, FUNC, function, PNF1, EMPTY_STRING, PF1, p11, EMPTY_STRING, 0.2, results); + addResult(4, true, FUNC, function, PNF1, pn11, PF1, p11, EMPTY_STRING, 0.3, results); + addResult(5, true, FUNC, function, PNF1, pn12, PF2, EMPTY_STRING, EMPTY_STRING, 0.01, results); + addResult(6, true, FUNC, function, PNF1, pn13, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 1.0, results); results.buildHierarchy(); CPrinter printer; results.postorderDepthFirst(printer); @@ -1043,8 +925,7 @@ void CHierarchicalResultsTest::testBasicVisitor() } } -void CHierarchicalResultsTest::testAggregator() -{ +void CHierarchicalResultsTest::testAggregator() { LOG_DEBUG("*** testAggregator ***"); using TAnnotatedProbabilityVec = std::vector; @@ -1061,10 +942,9 @@ void CHierarchicalResultsTest::testAggregator() // Test by. { - double p_[] = { 0.22, 0.03, 0.02 }; + double p_[] = {0.22, 0.03, 0.02}; TAnnotatedProbabilityVec annotatedProbabilities; - for (std::size_t i = 0; i < boost::size(p_); ++i) - { + for (std::size_t i = 0; i < boost::size(p_); ++i) { annotatedProbabilities.push_back(model::SAnnotatedProbability(p_[i])); } @@ -1086,10 +966,9 @@ void CHierarchicalResultsTest::testAggregator() // Test over. { - double p_[] = { 0.25, 0.3, 0.001 }; + double p_[] = {0.25, 0.3, 0.001}; TAnnotatedProbabilityVec annotatedProbabilities; - for (std::size_t i = 0; i < boost::size(p_); ++i) - { + for (std::size_t i = 0; i < boost::size(p_); ++i) { annotatedProbabilities.push_back(model::SAnnotatedProbability(p_[i])); } @@ -1111,26 +990,26 @@ void CHierarchicalResultsTest::testAggregator() // Test aggregation of multiple searches. { - double p11_[] = { 0.25, 0.3, 0.001 }; - double p12_[] = { 0.2, 0.1 }; - double p21_[] = { 0.5, 0.3 }; - double p22_[] = { 0.025, 0.03 }; - double rp1[] = { 0.006079029, 0.379477 }; - double rp2[] = { 0.25, 0.001 }; - double rp3[] = { 0.2, 0.1 }; + double p11_[] = {0.25, 0.3, 0.001}; + double p12_[] = {0.2, 0.1}; + double p21_[] = {0.5, 0.3}; + double p22_[] = {0.025, 0.03}; + double rp1[] = {0.006079029, 0.379477}; + double rp2[] = {0.25, 0.001}; + double rp3[] = {0.2, 0.1}; model::SAnnotatedProbability annotatedProbability; model::CHierarchicalResults results; annotatedProbability.s_Probability = p11_[0]; - results.addModelResult(1, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, annotatedProbability); + results.addModelResult(1, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, annotatedProbability); annotatedProbability.s_Probability = p11_[1]; results.addModelResult(2, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, annotatedProbability); annotatedProbability.s_Probability = p11_[2]; - results.addModelResult(1, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, annotatedProbability); + results.addModelResult(1, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, annotatedProbability); annotatedProbability.s_Probability = p12_[0]; - results.addModelResult(1, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p12, EMPTY_STRING, annotatedProbability); + results.addModelResult(1, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p12, EMPTY_STRING, annotatedProbability); annotatedProbability.s_Probability = p12_[1]; - results.addModelResult(1, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p12, EMPTY_STRING, annotatedProbability); + results.addModelResult(1, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p12, EMPTY_STRING, annotatedProbability); annotatedProbability.s_Probability = p21_[0]; results.addModelResult(3, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF2, p21, EMPTY_STRING, annotatedProbability); annotatedProbability.s_Probability = p21_[1]; @@ -1148,8 +1027,7 @@ void CHierarchicalResultsTest::testAggregator() results.bottomUpBreadthFirst(extract); TDoubleVec scores; TDoubleVec probabilities; - for (std::size_t i = 0u; i < extract.personNodes().size(); ++i) - { + for (std::size_t i = 0u; i < extract.personNodes().size(); ++i) { scores.push_back(extract.personNodes()[i]->s_RawAnomalyScore); probabilities.push_back(extract.personNodes()[i]->probability()); } @@ -1162,20 +1040,17 @@ void CHierarchicalResultsTest::testAggregator() maths::COrderings::simultaneousSort(expectedProbabilities, expectedScores); LOG_DEBUG("expectedScores = " << core::CContainerPrinter::print(expectedScores)); LOG_DEBUG("scores = " << core::CContainerPrinter::print(scores)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedScores), - core::CContainerPrinter::print(scores)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedScores), core::CContainerPrinter::print(scores)); LOG_DEBUG("expectedProbabilities = " << core::CContainerPrinter::print(expectedProbabilities)); LOG_DEBUG("probabilities = " << core::CContainerPrinter::print(probabilities)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedProbabilities), - core::CContainerPrinter::print(probabilities)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedProbabilities), core::CContainerPrinter::print(probabilities)); } // Test partition { - double p_[] = { 0.01, 0.03, 0.001 }; + double p_[] = {0.01, 0.03, 0.001}; TAnnotatedProbabilityVec annotatedProbabilities; - for (std::size_t i = 0; i < boost::size(p_); ++i) - { + for (std::size_t i = 0; i < boost::size(p_); ++i) { annotatedProbabilities.push_back(model::SAnnotatedProbability(p_[i])); } model::CHierarchicalResults results; @@ -1195,8 +1070,7 @@ void CHierarchicalResultsTest::testAggregator() } } -void CHierarchicalResultsTest::testInfluence() -{ +void CHierarchicalResultsTest::testInfluence() { LOG_DEBUG("*** testInfluence ***"); model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(); @@ -1212,15 +1086,15 @@ void CHierarchicalResultsTest::testInfluence() { model::SAnnotatedProbability annotatedProbability1(0.22); annotatedProbability1.s_Influences.push_back( - TStoredStringPtrStoredStringPtrPrDoublePr(TStoredStringPtrStoredStringPtrPr(I, i1), 0.6)); + TStoredStringPtrStoredStringPtrPrDoublePr(TStoredStringPtrStoredStringPtrPr(I, i1), 0.6)); model::SAnnotatedProbability annotatedProbability2(0.003); annotatedProbability2.s_Influences.push_back( - TStoredStringPtrStoredStringPtrPrDoublePr(TStoredStringPtrStoredStringPtrPr(I, i1), 0.9)); + TStoredStringPtrStoredStringPtrPrDoublePr(TStoredStringPtrStoredStringPtrPr(I, i1), 0.9)); annotatedProbability2.s_Influences.push_back( - TStoredStringPtrStoredStringPtrPrDoublePr(TStoredStringPtrStoredStringPtrPr(I, i2), 1.0)); + TStoredStringPtrStoredStringPtrPrDoublePr(TStoredStringPtrStoredStringPtrPr(I, i2), 1.0)); model::SAnnotatedProbability annotatedProbability3(0.01); annotatedProbability3.s_Influences.push_back( - TStoredStringPtrStoredStringPtrPrDoublePr(TStoredStringPtrStoredStringPtrPr(I, i1), 1.0)); + TStoredStringPtrStoredStringPtrPrDoublePr(TStoredStringPtrStoredStringPtrPr(I, i1), 1.0)); model::CHierarchicalResults results; results.addModelResult(1, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, annotatedProbability1); @@ -1248,27 +1122,27 @@ void CHierarchicalResultsTest::testInfluence() { model::SAnnotatedProbability annotatedProbability1(0.22); annotatedProbability1.s_Influences.push_back( - TStoredStringPtrStoredStringPtrPrDoublePr(TStoredStringPtrStoredStringPtrPr(I, i1), 0.6)); + TStoredStringPtrStoredStringPtrPrDoublePr(TStoredStringPtrStoredStringPtrPr(I, i1), 0.6)); model::SAnnotatedProbability annotatedProbability2(0.003); annotatedProbability2.s_Influences.push_back( - TStoredStringPtrStoredStringPtrPrDoublePr(TStoredStringPtrStoredStringPtrPr(I, i1), 0.9)); + TStoredStringPtrStoredStringPtrPrDoublePr(TStoredStringPtrStoredStringPtrPr(I, i1), 0.9)); annotatedProbability2.s_Influences.push_back( - TStoredStringPtrStoredStringPtrPrDoublePr(TStoredStringPtrStoredStringPtrPr(I, i2), 1.0)); + TStoredStringPtrStoredStringPtrPrDoublePr(TStoredStringPtrStoredStringPtrPr(I, i2), 1.0)); model::SAnnotatedProbability annotatedProbability3(0.01); annotatedProbability3.s_Influences.push_back( - TStoredStringPtrStoredStringPtrPrDoublePr(TStoredStringPtrStoredStringPtrPr(I, i1), 1.0)); + TStoredStringPtrStoredStringPtrPrDoublePr(TStoredStringPtrStoredStringPtrPr(I, i1), 1.0)); model::SAnnotatedProbability annotatedProbability4(0.03); annotatedProbability4.s_Influences.push_back( - TStoredStringPtrStoredStringPtrPrDoublePr(TStoredStringPtrStoredStringPtrPr(I, i1), 0.6)); + TStoredStringPtrStoredStringPtrPrDoublePr(TStoredStringPtrStoredStringPtrPr(I, i1), 0.6)); annotatedProbability4.s_Influences.push_back( - TStoredStringPtrStoredStringPtrPrDoublePr(TStoredStringPtrStoredStringPtrPr(I, i2), 0.8)); + TStoredStringPtrStoredStringPtrPrDoublePr(TStoredStringPtrStoredStringPtrPr(I, i2), 0.8)); model::SAnnotatedProbability annotatedProbability5(0.56); annotatedProbability5.s_Influences.push_back( - TStoredStringPtrStoredStringPtrPrDoublePr(TStoredStringPtrStoredStringPtrPr(I, i1), 0.8)); + TStoredStringPtrStoredStringPtrPrDoublePr(TStoredStringPtrStoredStringPtrPr(I, i1), 0.8)); model::CHierarchicalResults results; - results.addModelResult(1, true, FUNC, function, PNF1, pn11, PF1, p11, EMPTY_STRING, annotatedProbability1); - results.addModelResult(1, true, FUNC, function, PNF1, pn12, PF1, p12, EMPTY_STRING, annotatedProbability2); + results.addModelResult(1, true, FUNC, function, PNF1, pn11, PF1, p11, EMPTY_STRING, annotatedProbability1); + results.addModelResult(1, true, FUNC, function, PNF1, pn12, PF1, p12, EMPTY_STRING, annotatedProbability2); results.addModelResult(2, false, FUNC, function, PNF2, pn21, PF1, p13, EMPTY_STRING, annotatedProbability3); results.addModelResult(2, false, FUNC, function, PNF2, pn22, PF1, p12, EMPTY_STRING, annotatedProbability4); results.addModelResult(2, false, FUNC, function, PNF2, pn23, PF1, p12, EMPTY_STRING, annotatedProbability5); @@ -1297,7 +1171,8 @@ void CHierarchicalResultsTest::testInfluence() // Test high probability records are written due to low probability influencer { model::SAnnotatedProbability annotatedProbability1Low(0.06); - annotatedProbability1Low.s_Influences.push_back(TStoredStringPtrStoredStringPtrPrDoublePr(TStoredStringPtrStoredStringPtrPr(I, i1), 1.0)); + annotatedProbability1Low.s_Influences.push_back( + TStoredStringPtrStoredStringPtrPrDoublePr(TStoredStringPtrStoredStringPtrPr(I, i1), 1.0)); model::SAnnotatedProbability annotatedProbability1High(0.8); model::SAnnotatedProbability annotatedProbability11 = annotatedProbability1Low; model::SAnnotatedProbability annotatedProbability12 = annotatedProbability1High; @@ -1306,7 +1181,8 @@ void CHierarchicalResultsTest::testInfluence() model::SAnnotatedProbability annotatedProbability15 = annotatedProbability1High; model::SAnnotatedProbability annotatedProbability16 = annotatedProbability1High; model::SAnnotatedProbability annotatedProbability2(0.001); - annotatedProbability2.s_Influences.push_back(TStoredStringPtrStoredStringPtrPrDoublePr(TStoredStringPtrStoredStringPtrPr(I, i2), 1.0)); + annotatedProbability2.s_Influences.push_back( + TStoredStringPtrStoredStringPtrPrDoublePr(TStoredStringPtrStoredStringPtrPr(I, i2), 1.0)); model::CHierarchicalResults results; results.addInfluencer(*I); @@ -1333,12 +1209,11 @@ void CHierarchicalResultsTest::testInfluence() " 'false/false/max/PNF1/pn11/PF1/p13/': 0.06, 0, [((I, i1), 1)]\n" " 'false/false/max/PNF1/pn11/PF1/p11/': 0.06, 0, [((I, i1), 1)]\n" " 'false/false/max///PF2/p21/': 0.001, 0.09819639, [((I, i2), 1)]"), - writtenNodesOnlyPrinter.result()); + writtenNodesOnlyPrinter.result()); } } -void CHierarchicalResultsTest::testScores() -{ +void CHierarchicalResultsTest::testScores() { LOG_DEBUG("*** testScores ***"); model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(); @@ -1377,9 +1252,9 @@ void CHierarchicalResultsTest::testScores() { model::CHierarchicalResults results; addResult(1, false, MAX, function, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.3, results); - addResult(2, true, MAX, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, 0.01, results); - addResult(2, true, MAX, function, EMPTY_STRING, EMPTY_STRING, PF1, p12, EMPTY_STRING, 0.03, results); - addResult(3, false, MAX, function, EMPTY_STRING, EMPTY_STRING, PF2, p22, EMPTY_STRING, 0.03, results); + addResult(2, true, MAX, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, 0.01, results); + addResult(2, true, MAX, function, EMPTY_STRING, EMPTY_STRING, PF1, p12, EMPTY_STRING, 0.03, results); + addResult(3, false, MAX, function, EMPTY_STRING, EMPTY_STRING, PF2, p22, EMPTY_STRING, 0.03, results); results.buildHierarchy(); results.bottomUpBreadthFirst(aggregator); results.bottomUpBreadthFirst(finalizer); @@ -1390,10 +1265,10 @@ void CHierarchicalResultsTest::testScores() } { model::CHierarchicalResults results; - addResult(1, true, MAX, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, 0.01, results); - addResult(1, true, MAX, function, EMPTY_STRING, EMPTY_STRING, PF1, p12, EMPTY_STRING, 0.03, results); - addResult(2, true, RARE, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, 0.07, results); - addResult(2, true, RARE, function, EMPTY_STRING, EMPTY_STRING, PF1, p12, EMPTY_STRING, 0.3, results); + addResult(1, true, MAX, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, 0.01, results); + addResult(1, true, MAX, function, EMPTY_STRING, EMPTY_STRING, PF1, p12, EMPTY_STRING, 0.03, results); + addResult(2, true, RARE, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, 0.07, results); + addResult(2, true, RARE, function, EMPTY_STRING, EMPTY_STRING, PF1, p12, EMPTY_STRING, 0.3, results); results.buildHierarchy(); results.bottomUpBreadthFirst(aggregator); results.bottomUpBreadthFirst(finalizer); @@ -1422,20 +1297,20 @@ void CHierarchicalResultsTest::testScores() { model::CHierarchicalResults results; addResult(1, false, MAX, function, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.01, results); - addResult(2, false, MAX, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, 0.01, results); - addResult(2, false, MAX, function, EMPTY_STRING, EMPTY_STRING, PF1, p14, EMPTY_STRING, 0.01, results); - addResult(3, false, MAX, function, PNF1, pn11, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.01, results); - addResult(3, false, MAX, function, PNF1, pn12, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.01, results); - addResult(3, false, MAX, function, PNF1, pn13, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.05, results); - addResult(4, true, MAX, function, PNF2, pn22, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.01, results); - addResult(4, true, MAX, function, PNF2, pn23, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.05, results); - addResult(5, true, MAX, function, PNF2, pn21, PF1, p11, EMPTY_STRING, 0.2, results); - addResult(5, true, MAX, function, PNF2, pn22, PF1, p11, EMPTY_STRING, 0.2, results); - addResult(5, true, MAX, function, PNF2, pn22, PF1, p12, EMPTY_STRING, 0.1, results); - addResult(6, true, MAX, function, PNF2, pn22, PF2, p21, EMPTY_STRING, 0.15, results); - addResult(7, false, MAX, function, PNF2, pn22, PF2, p21, EMPTY_STRING, 0.12, results); - addResult(6, true, MAX, function, PNF2, pn22, PF2, p23, EMPTY_STRING, 0.12, results); - addResult(7, false, MAX, function, PNF2, pn22, PF2, p23, EMPTY_STRING, 0.82, results); + addResult(2, false, MAX, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, 0.01, results); + addResult(2, false, MAX, function, EMPTY_STRING, EMPTY_STRING, PF1, p14, EMPTY_STRING, 0.01, results); + addResult(3, false, MAX, function, PNF1, pn11, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.01, results); + addResult(3, false, MAX, function, PNF1, pn12, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.01, results); + addResult(3, false, MAX, function, PNF1, pn13, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.05, results); + addResult(4, true, MAX, function, PNF2, pn22, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.01, results); + addResult(4, true, MAX, function, PNF2, pn23, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.05, results); + addResult(5, true, MAX, function, PNF2, pn21, PF1, p11, EMPTY_STRING, 0.2, results); + addResult(5, true, MAX, function, PNF2, pn22, PF1, p11, EMPTY_STRING, 0.2, results); + addResult(5, true, MAX, function, PNF2, pn22, PF1, p12, EMPTY_STRING, 0.1, results); + addResult(6, true, MAX, function, PNF2, pn22, PF2, p21, EMPTY_STRING, 0.15, results); + addResult(7, false, MAX, function, PNF2, pn22, PF2, p21, EMPTY_STRING, 0.12, results); + addResult(6, true, MAX, function, PNF2, pn22, PF2, p23, EMPTY_STRING, 0.12, results); + addResult(7, false, MAX, function, PNF2, pn22, PF2, p23, EMPTY_STRING, 0.82, results); results.buildHierarchy(); results.bottomUpBreadthFirst(aggregator); results.bottomUpBreadthFirst(finalizer); @@ -1446,8 +1321,7 @@ void CHierarchicalResultsTest::testScores() } } -void CHierarchicalResultsTest::testWriter() -{ +void CHierarchicalResultsTest::testWriter() { LOG_DEBUG("*** testWriter ***"); model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(); @@ -1465,21 +1339,21 @@ void CHierarchicalResultsTest::testWriter() model::SModelParams params(modelConfig.bucketLength()); model::CSearchKey key; model::CAnomalyDetectorModel::TDataGathererPtr dataGatherer( - new model::CDataGatherer(model_t::E_EventRate, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - key, - model_t::TFeatureVec(1, model_t::E_IndividualCountByBucketAndPerson), - modelConfig.bucketLength(), - 0)); + new model::CDataGatherer(model_t::E_EventRate, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + key, + model_t::TFeatureVec(1, model_t::E_IndividualCountByBucketAndPerson), + modelConfig.bucketLength(), + 0)); model::CEventData dummy; dataGatherer->addArrival(TStrCPtrVec(1, &EMPTY_STRING), dummy, resourceMonitor); dummy.clear(); @@ -1495,20 +1369,20 @@ void CHierarchicalResultsTest::testWriter() model::CCountingModel model(params, dataGatherer); model::CHierarchicalResults results; addResult(1, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.001, &model, results); - addResult(2, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, 0.001, &model, results); - addResult(2, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p14, EMPTY_STRING, 0.001, &model, results); - addResult(3, false, FUNC, function, PNF1, pn11, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.001, &model, results); - addResult(3, false, FUNC, function, PNF1, pn12, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.001, &model, results); - addResult(3, false, FUNC, function, PNF1, pn13, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.005, &model, results); - addResult(4, true, FUNC, function, PNF2, pn22, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.001, &model, results); - addResult(4, true, FUNC, function, PNF2, pn23, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.005, &model, results); - addResult(5, true, FUNC, function, PNF2, pn21, PF1, p11, EMPTY_STRING, 0.008, &model, results); - addResult(5, true, FUNC, function, PNF2, pn22, PF1, p11, EMPTY_STRING, 0.009, &model, results); - addResult(5, true, FUNC, function, PNF2, pn22, PF1, p12, EMPTY_STRING, 0.01, &model, results); - addResult(6, true, FUNC, function, PNF2, pn22, PF2, p21, EMPTY_STRING, 0.007, &model, results); - addResult(7, false, FUNC, function, PNF2, pn22, PF2, p21, EMPTY_STRING, 0.006, &model, results); - addResult(6, true, FUNC, function, PNF2, pn22, PF2, p23, EMPTY_STRING, 0.004, &model, results); - addResult(7, false, FUNC, function, PNF2, pn22, PF2, p23, EMPTY_STRING, 0.003, &model, results); + addResult(2, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, 0.001, &model, results); + addResult(2, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p14, EMPTY_STRING, 0.001, &model, results); + addResult(3, false, FUNC, function, PNF1, pn11, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.001, &model, results); + addResult(3, false, FUNC, function, PNF1, pn12, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.001, &model, results); + addResult(3, false, FUNC, function, PNF1, pn13, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.005, &model, results); + addResult(4, true, FUNC, function, PNF2, pn22, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.001, &model, results); + addResult(4, true, FUNC, function, PNF2, pn23, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.005, &model, results); + addResult(5, true, FUNC, function, PNF2, pn21, PF1, p11, EMPTY_STRING, 0.008, &model, results); + addResult(5, true, FUNC, function, PNF2, pn22, PF1, p11, EMPTY_STRING, 0.009, &model, results); + addResult(5, true, FUNC, function, PNF2, pn22, PF1, p12, EMPTY_STRING, 0.01, &model, results); + addResult(6, true, FUNC, function, PNF2, pn22, PF2, p21, EMPTY_STRING, 0.007, &model, results); + addResult(7, false, FUNC, function, PNF2, pn22, PF2, p21, EMPTY_STRING, 0.006, &model, results); + addResult(6, true, FUNC, function, PNF2, pn22, PF2, p23, EMPTY_STRING, 0.004, &model, results); + addResult(7, false, FUNC, function, PNF2, pn22, PF2, p23, EMPTY_STRING, 0.003, &model, results); results.buildHierarchy(); results.bottomUpBreadthFirst(aggregator); CPrinter printer; @@ -1518,8 +1392,7 @@ void CHierarchicalResultsTest::testWriter() } } -void CHierarchicalResultsTest::testNormalizer() -{ +void CHierarchicalResultsTest::testNormalizer() { LOG_DEBUG("*** testNormalizer ***"); using TNormalizerPtr = boost::shared_ptr; @@ -1536,36 +1409,29 @@ void CHierarchicalResultsTest::testNormalizer() // Not using TRUE and FALSE as they clash with Windows macros - const std::string fields[][7] = - { - { "1", FALSE_STR, PNF1, pn11, PF2, p21, EMPTY_STRING }, - { "1", FALSE_STR, PNF1, pn11, PF2, p22, EMPTY_STRING }, - { "1", FALSE_STR, PNF1, pn11, PF2, p23, EMPTY_STRING }, - { "2", FALSE_STR, PNF1, pn12, PF1, p11, EMPTY_STRING }, - { "2", FALSE_STR, PNF1, pn12, PF1, p12, EMPTY_STRING }, - { "2", FALSE_STR, PNF1, pn12, PF1, p13, EMPTY_STRING }, - { "3", TRUE_STR, PNF1, pn12, PF1, p11, EMPTY_STRING }, - { "3", TRUE_STR, PNF1, pn12, PF1, p12, EMPTY_STRING }, - { "3", TRUE_STR, PNF1, pn12, PF1, p13, EMPTY_STRING }, - { "4", FALSE_STR, PNF2, pn21, PF1, p11, EMPTY_STRING }, - { "4", FALSE_STR, PNF2, pn22, PF1, p12, EMPTY_STRING }, - { "4", FALSE_STR, PNF2, pn23, PF1, p13, EMPTY_STRING } - }; + const std::string fields[][7] = {{"1", FALSE_STR, PNF1, pn11, PF2, p21, EMPTY_STRING}, + {"1", FALSE_STR, PNF1, pn11, PF2, p22, EMPTY_STRING}, + {"1", FALSE_STR, PNF1, pn11, PF2, p23, EMPTY_STRING}, + {"2", FALSE_STR, PNF1, pn12, PF1, p11, EMPTY_STRING}, + {"2", FALSE_STR, PNF1, pn12, PF1, p12, EMPTY_STRING}, + {"2", FALSE_STR, PNF1, pn12, PF1, p13, EMPTY_STRING}, + {"3", TRUE_STR, PNF1, pn12, PF1, p11, EMPTY_STRING}, + {"3", TRUE_STR, PNF1, pn12, PF1, p12, EMPTY_STRING}, + {"3", TRUE_STR, PNF1, pn12, PF1, p13, EMPTY_STRING}, + {"4", FALSE_STR, PNF2, pn21, PF1, p11, EMPTY_STRING}, + {"4", FALSE_STR, PNF2, pn22, PF1, p12, EMPTY_STRING}, + {"4", FALSE_STR, PNF2, pn23, PF1, p13, EMPTY_STRING}}; TStrNormalizerPtrMap expectedNormalizers; expectedNormalizers.insert( - TStrNormalizerPtrMap::value_type( - std::string("r"), - TNormalizerPtr(new model::CAnomalyScore::CNormalizer(modelConfig)))); + TStrNormalizerPtrMap::value_type(std::string("r"), TNormalizerPtr(new model::CAnomalyScore::CNormalizer(modelConfig)))); test::CRandomNumbers rng; - for (std::size_t i = 0u; i < 300; ++i) - { + for (std::size_t i = 0u; i < 300; ++i) { model::CHierarchicalResults results; TDoubleVec p; rng.generateUniformSamples(0.0, 1.0, boost::size(fields), p); TAttributeProbabilityVec empty; - for (std::size_t j = 0u; j < boost::size(fields); ++j) - { + for (std::size_t j = 0u; j < boost::size(fields); ++j) { addResult(boost::lexical_cast(fields[j][0]), fields[j][1] == TRUE_STR, FUNC, @@ -1595,34 +1461,28 @@ void CHierarchicalResultsTest::testNormalizer() TDoubleVec normalized; TDoubleVec expectedNormalized; - for (std::size_t j = 0u; j < extract.leafNodes().size(); ++j) - { - std::string key = 'l' + *extract.leafNodes()[j]->s_Spec.s_PartitionFieldName - + ' ' + *extract.leafNodes()[j]->s_Spec.s_PersonFieldName; + for (std::size_t j = 0u; j < extract.leafNodes().size(); ++j) { + std::string key = + 'l' + *extract.leafNodes()[j]->s_Spec.s_PartitionFieldName + ' ' + *extract.leafNodes()[j]->s_Spec.s_PersonFieldName; TStrNormalizerPtrMapItr itr = expectedNormalizers.find(key); - if (itr == expectedNormalizers.end()) - { - itr = expectedNormalizers.insert( - TStrNormalizerPtrMap::value_type( - key, TNormalizerPtr(new model::CAnomalyScore::CNormalizer(modelConfig)))).first; + if (itr == expectedNormalizers.end()) { + itr = expectedNormalizers + .insert(TStrNormalizerPtrMap::value_type(key, TNormalizerPtr(new model::CAnomalyScore::CNormalizer(modelConfig)))) + .first; } double probability = extract.leafNodes()[j]->probability(); // This truncation condition needs to be kept the same as the one in CHierarchicalResultsNormalizer::visit() - double score = probability > modelConfig.maximumAnomalousProbability() ? - 0.0 : maths::CTools::anomalyScore(probability); + double score = probability > modelConfig.maximumAnomalousProbability() ? 0.0 : maths::CTools::anomalyScore(probability); itr->second->updateQuantiles(score); } - for (std::size_t j = 0u; j < extract.leafNodes().size(); ++j) - { - std::string key = 'l' + *extract.leafNodes()[j]->s_Spec.s_PartitionFieldName - + ' ' + *extract.leafNodes()[j]->s_Spec.s_PersonFieldName; + for (std::size_t j = 0u; j < extract.leafNodes().size(); ++j) { + std::string key = + 'l' + *extract.leafNodes()[j]->s_Spec.s_PartitionFieldName + ' ' + *extract.leafNodes()[j]->s_Spec.s_PersonFieldName; TStrNormalizerPtrMapItr itr = expectedNormalizers.find(key); - if (nodes.insert(extract.leafNodes()[j]).second) - { + if (nodes.insert(extract.leafNodes()[j]).second) { double probability = extract.leafNodes()[j]->probability(); // This truncation condition needs to be kept the same as the one in CHierarchicalResultsNormalizer::visit() - double score = probability > modelConfig.maximumAnomalousProbability() ? - 0.0 : maths::CTools::anomalyScore(probability); + double score = probability > modelConfig.maximumAnomalousProbability() ? 0.0 : maths::CTools::anomalyScore(probability); normalized.push_back(extract.leafNodes()[j]->s_NormalizedAnomalyScore); CPPUNIT_ASSERT(itr->second->normalize(score)); expectedNormalized.push_back(score); @@ -1631,39 +1491,32 @@ void CHierarchicalResultsTest::testNormalizer() LOG_DEBUG("* leaf *") LOG_DEBUG("expectedNormalized = " << core::CContainerPrinter::print(expectedNormalized)); LOG_DEBUG("normalized = " << core::CContainerPrinter::print(normalized)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedNormalized), - core::CContainerPrinter::print(normalized)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedNormalized), core::CContainerPrinter::print(normalized)); normalized.clear(); expectedNormalized.clear(); - for (std::size_t j = 0u; j < extract.personNodes().size(); ++j) - { - std::string key = 'p' + *extract.personNodes()[j]->s_Spec.s_PartitionFieldName - + ' ' + *extract.personNodes()[j]->s_Spec.s_PersonFieldName; + for (std::size_t j = 0u; j < extract.personNodes().size(); ++j) { + std::string key = + 'p' + *extract.personNodes()[j]->s_Spec.s_PartitionFieldName + ' ' + *extract.personNodes()[j]->s_Spec.s_PersonFieldName; TStrNormalizerPtrMapItr itr = expectedNormalizers.find(key); - if (itr == expectedNormalizers.end()) - { - itr = expectedNormalizers.insert( - TStrNormalizerPtrMap::value_type( - key, TNormalizerPtr(new model::CAnomalyScore::CNormalizer(modelConfig)))).first; + if (itr == expectedNormalizers.end()) { + itr = expectedNormalizers + .insert(TStrNormalizerPtrMap::value_type(key, TNormalizerPtr(new model::CAnomalyScore::CNormalizer(modelConfig)))) + .first; } double probability = extract.personNodes()[j]->probability(); // This truncation condition needs to be kept the same as the one in CHierarchicalResultsNormalizer::visit() - double score = probability > modelConfig.maximumAnomalousProbability() ? - 0.0 : maths::CTools::anomalyScore(probability); + double score = probability > modelConfig.maximumAnomalousProbability() ? 0.0 : maths::CTools::anomalyScore(probability); itr->second->updateQuantiles(score); } - for (std::size_t j = 0u; j < extract.personNodes().size(); ++j) - { - std::string key = 'p' + *extract.personNodes()[j]->s_Spec.s_PartitionFieldName - + ' ' + *extract.personNodes()[j]->s_Spec.s_PersonFieldName; + for (std::size_t j = 0u; j < extract.personNodes().size(); ++j) { + std::string key = + 'p' + *extract.personNodes()[j]->s_Spec.s_PartitionFieldName + ' ' + *extract.personNodes()[j]->s_Spec.s_PersonFieldName; TStrNormalizerPtrMapItr itr = expectedNormalizers.find(key); - if (nodes.insert(extract.personNodes()[j]).second) - { + if (nodes.insert(extract.personNodes()[j]).second) { double probability = extract.personNodes()[j]->probability(); // This truncation condition needs to be kept the same as the one in CHierarchicalResultsNormalizer::visit() - double score = probability > modelConfig.maximumAnomalousProbability() ? - 0.0 : maths::CTools::anomalyScore(probability); + double score = probability > modelConfig.maximumAnomalousProbability() ? 0.0 : maths::CTools::anomalyScore(probability); normalized.push_back(extract.personNodes()[j]->s_NormalizedAnomalyScore); CPPUNIT_ASSERT(itr->second->normalize(score)); expectedNormalized.push_back(score); @@ -1672,37 +1525,30 @@ void CHierarchicalResultsTest::testNormalizer() LOG_DEBUG("* person *") LOG_DEBUG("expectedNormalized = " << core::CContainerPrinter::print(expectedNormalized)); LOG_DEBUG("normalized = " << core::CContainerPrinter::print(normalized)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedNormalized), - core::CContainerPrinter::print(normalized)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedNormalized), core::CContainerPrinter::print(normalized)); normalized.clear(); expectedNormalized.clear(); - for (std::size_t j = 0u; j < extract.partitionNodes().size(); ++j) - { + for (std::size_t j = 0u; j < extract.partitionNodes().size(); ++j) { std::string key = 'n' + *extract.partitionNodes()[j]->s_Spec.s_PartitionFieldName; TStrNormalizerPtrMapItr itr = expectedNormalizers.find(key); - if (itr == expectedNormalizers.end()) - { - itr = expectedNormalizers.insert( - TStrNormalizerPtrMap::value_type( - key, TNormalizerPtr(new model::CAnomalyScore::CNormalizer(modelConfig)))).first; + if (itr == expectedNormalizers.end()) { + itr = expectedNormalizers + .insert(TStrNormalizerPtrMap::value_type(key, TNormalizerPtr(new model::CAnomalyScore::CNormalizer(modelConfig)))) + .first; } double probability = extract.partitionNodes()[j]->probability(); // This truncation condition needs to be kept the same as the one in CHierarchicalResultsNormalizer::visit() - double score = probability > modelConfig.maximumAnomalousProbability() ? - 0.0 : maths::CTools::anomalyScore(probability); + double score = probability > modelConfig.maximumAnomalousProbability() ? 0.0 : maths::CTools::anomalyScore(probability); itr->second->updateQuantiles(score); } - for (std::size_t j = 0u; j < extract.partitionNodes().size(); ++j) - { + for (std::size_t j = 0u; j < extract.partitionNodes().size(); ++j) { std::string key = 'n' + *extract.partitionNodes()[j]->s_Spec.s_PartitionFieldName; TStrNormalizerPtrMapItr itr = expectedNormalizers.find(key); - if (nodes.insert(extract.partitionNodes()[j]).second) - { + if (nodes.insert(extract.partitionNodes()[j]).second) { double probability = extract.partitionNodes()[j]->probability(); // This truncation condition needs to be kept the same as the one in CHierarchicalResultsNormalizer::visit() - double score = probability > modelConfig.maximumAnomalousProbability() ? - 0.0 : maths::CTools::anomalyScore(probability); + double score = probability > modelConfig.maximumAnomalousProbability() ? 0.0 : maths::CTools::anomalyScore(probability); normalized.push_back(extract.partitionNodes()[j]->s_NormalizedAnomalyScore); CPPUNIT_ASSERT(itr->second->normalize(score)); expectedNormalized.push_back(score); @@ -1711,13 +1557,11 @@ void CHierarchicalResultsTest::testNormalizer() LOG_DEBUG("* partition *") LOG_DEBUG("expectedNormalized = " << core::CContainerPrinter::print(expectedNormalized)); LOG_DEBUG("normalized = " << core::CContainerPrinter::print(normalized)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedNormalized), - core::CContainerPrinter::print(normalized)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedNormalized), core::CContainerPrinter::print(normalized)); double probability = results.root()->probability(); // This truncation condition needs to be kept the same as the one in CHierarchicalResultsNormalizer::visit() - double score = probability > modelConfig.maximumAnomalousProbability() ? - 0.0 : maths::CTools::anomalyScore(probability); + double score = probability > modelConfig.maximumAnomalousProbability() ? 0.0 : maths::CTools::anomalyScore(probability); expectedNormalizers.find(std::string("r"))->second->updateQuantiles(score); expectedNormalizers.find(std::string("r"))->second->normalize(score); @@ -1736,16 +1580,14 @@ void CHierarchicalResultsTest::testNormalizer() model::CHierarchicalResultsNormalizer newNormalizerJson(modelConfig); std::stringstream stream(origJson); - CPPUNIT_ASSERT_EQUAL(model::CHierarchicalResultsNormalizer::E_Ok, - newNormalizerJson.fromJsonStream(stream)); + CPPUNIT_ASSERT_EQUAL(model::CHierarchicalResultsNormalizer::E_Ok, newNormalizerJson.fromJsonStream(stream)); std::string newJson; newNormalizerJson.toJson(123, "mykey", newJson, true); CPPUNIT_ASSERT_EQUAL(newJson, origJson); } -void CHierarchicalResultsTest::testDetectorEqualizing() -{ +void CHierarchicalResultsTest::testDetectorEqualizing() { LOG_DEBUG("*** testDetectorEqualizing ***"); model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(); @@ -1758,29 +1600,24 @@ void CHierarchicalResultsTest::testDetectorEqualizing() static const std::string FUNC("max"); static const ml::model::function_t::EFunction function(ml::model::function_t::E_IndividualMetricMax); - const std::string fields[][7] = - { - { "0", FALSE_STR, PNF1, pn11, PF1, p11, EMPTY_STRING }, - { "0", FALSE_STR, PNF1, pn12, PF1, p12, EMPTY_STRING }, - { "0", FALSE_STR, PNF1, pn11, PF1, p12, EMPTY_STRING }, - { "1", FALSE_STR, PNF1, pn11, PF1, p11, EMPTY_STRING }, - { "1", FALSE_STR, PNF1, pn12, PF1, p12, EMPTY_STRING }, - { "1", FALSE_STR, PNF1, pn11, PF1, p12, EMPTY_STRING }, - { "2", TRUE_STR, PNF1, pn12, PF1, p11, EMPTY_STRING }, - { "2", TRUE_STR, PNF1, pn12, PF1, p12, EMPTY_STRING }, - { "2", TRUE_STR, PNF1, pn11, PF1, p12, EMPTY_STRING }, - { "3", FALSE_STR, PNF1, pn11, PF1, p11, EMPTY_STRING }, - { "3", FALSE_STR, PNF1, pn12, PF1, p12, EMPTY_STRING }, - { "3", FALSE_STR, PNF1, pn12, PF1, p12, EMPTY_STRING } - }; - double scales[] = { 1.9, 2.5, 1.7, 2.9 }; - - for (std::size_t i = 0u; i < 300; ++i) - { + const std::string fields[][7] = {{"0", FALSE_STR, PNF1, pn11, PF1, p11, EMPTY_STRING}, + {"0", FALSE_STR, PNF1, pn12, PF1, p12, EMPTY_STRING}, + {"0", FALSE_STR, PNF1, pn11, PF1, p12, EMPTY_STRING}, + {"1", FALSE_STR, PNF1, pn11, PF1, p11, EMPTY_STRING}, + {"1", FALSE_STR, PNF1, pn12, PF1, p12, EMPTY_STRING}, + {"1", FALSE_STR, PNF1, pn11, PF1, p12, EMPTY_STRING}, + {"2", TRUE_STR, PNF1, pn12, PF1, p11, EMPTY_STRING}, + {"2", TRUE_STR, PNF1, pn12, PF1, p12, EMPTY_STRING}, + {"2", TRUE_STR, PNF1, pn11, PF1, p12, EMPTY_STRING}, + {"3", FALSE_STR, PNF1, pn11, PF1, p11, EMPTY_STRING}, + {"3", FALSE_STR, PNF1, pn12, PF1, p12, EMPTY_STRING}, + {"3", FALSE_STR, PNF1, pn12, PF1, p12, EMPTY_STRING}}; + double scales[] = {1.9, 2.5, 1.7, 2.9}; + + for (std::size_t i = 0u; i < 300; ++i) { model::CHierarchicalResults results; TAttributeProbabilityVec empty; - for (std::size_t j = 0u; j < boost::size(fields); ++j) - { + for (std::size_t j = 0u; j < boost::size(fields); ++j) { int detector = boost::lexical_cast(fields[j][0]); TDoubleVec p; rng.generateGammaSamples(1.0, scales[detector], 1, p); @@ -1801,12 +1638,10 @@ void CHierarchicalResultsTest::testDetectorEqualizing() results.bottomUpBreadthFirst(aggregator); } - for (std::size_t i = 0u; i < 300; ++i) - { + for (std::size_t i = 0u; i < 300; ++i) { model::CHierarchicalResults results; TAttributeProbabilityVec empty; - for (std::size_t j = 0u; j < boost::size(fields); ++j) - { + for (std::size_t j = 0u; j < boost::size(fields); ++j) { int detector = boost::lexical_cast(fields[j][0]); TDoubleVec p; rng.generateGammaSamples(1.0, scales[detector], 1, p); @@ -1847,8 +1682,8 @@ void CHierarchicalResultsTest::testDetectorEqualizing() core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind(&model::CHierarchicalResultsAggregator::acceptRestoreTraverser, - &restoredAggregator, _1))); + CPPUNIT_ASSERT(traverser.traverseSubLevel( + boost::bind(&model::CHierarchicalResultsAggregator::acceptRestoreTraverser, &restoredAggregator, _1))); } // Checksums should agree. @@ -1869,19 +1704,14 @@ void CHierarchicalResultsTest::testDetectorEqualizing() static const std::string FUNC("max"); static const ml::model::function_t::EFunction function(ml::model::function_t::E_IndividualMetricMax); - const std::string fields[][7] = - { - { "0", FALSE_STR, PNF1, pn11, PF1, p11, EMPTY_STRING }, - { "1", FALSE_STR, PNF1, pn11, PF1, p11, EMPTY_STRING } - }; - double scales[] = { 1.0, 3.5 }; + const std::string fields[][7] = {{"0", FALSE_STR, PNF1, pn11, PF1, p11, EMPTY_STRING}, + {"1", FALSE_STR, PNF1, pn11, PF1, p11, EMPTY_STRING}}; + double scales[] = {1.0, 3.5}; - for (std::size_t i = 0u; i < 500; ++i) - { + for (std::size_t i = 0u; i < 500; ++i) { model::CHierarchicalResults results; TAttributeProbabilityVec empty; - for (std::size_t j = 0u; j < boost::size(fields); ++j) - { + for (std::size_t j = 0u; j < boost::size(fields); ++j) { int detector = boost::lexical_cast(fields[j][0]); TDoubleVec p; rng.generateGammaSamples(1.0, scales[detector], 1, p); @@ -1905,12 +1735,10 @@ void CHierarchicalResultsTest::testDetectorEqualizing() using TDoubleSizePr = std::pair; maths::CBasicStatistics::COrderStatisticsStack mostAnomalous; - for (std::size_t i = 0u; i < 100; ++i) - { + for (std::size_t i = 0u; i < 100; ++i) { model::CHierarchicalResults results; TAttributeProbabilityVec empty; - for (std::size_t j = 0u; j < boost::size(fields); ++j) - { + for (std::size_t j = 0u; j < boost::size(fields); ++j) { int detector = boost::lexical_cast(fields[j][0]); TDoubleVec p; rng.generateGammaSamples(1.0, scales[detector], 1, p); @@ -1940,9 +1768,7 @@ void CHierarchicalResultsTest::testDetectorEqualizing() } } - -void CHierarchicalResultsTest::testShouldWritePartition() -{ +void CHierarchicalResultsTest::testShouldWritePartition() { static const std::string PART1("PART1"); static const std::string PERS("PERS"); std::string pers1("pers1"); @@ -1964,7 +1790,7 @@ void CHierarchicalResultsTest::testShouldWritePartition() results.postorderDepthFirst(printer); LOG_DEBUG("\nhierarchy:\n" << printer.result()); - const ml::model::CHierarchicalResults::TNode *root = results.root(); + const ml::model::CHierarchicalResults::TNode* root = results.root(); CPPUNIT_ASSERT_EQUAL(std::size_t(2), root->s_Children.size()); CNodeExtractor extract; @@ -1986,52 +1812,38 @@ void CHierarchicalResultsTest::testShouldWritePartition() results.bottomUpBreadthFirst(aggregator); model::CLimits limits; - CPPUNIT_ASSERT(ml::model::CHierarchicalResultsVisitor::shouldWriteResult(limits, - results, *extract.partitionNodes()[0], false)); - CPPUNIT_ASSERT(ml::model::CHierarchicalResultsVisitor::shouldWriteResult(limits, - results, *extract.partitionNodes()[1], false)); + CPPUNIT_ASSERT(ml::model::CHierarchicalResultsVisitor::shouldWriteResult(limits, results, *extract.partitionNodes()[0], false)); + CPPUNIT_ASSERT(ml::model::CHierarchicalResultsVisitor::shouldWriteResult(limits, results, *extract.partitionNodes()[1], false)); } -CppUnit::Test *CHierarchicalResultsTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CHierarchicalResultsTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CHierarchicalResultsTest::testBreadthFirstVisit", - &CHierarchicalResultsTest::testBreadthFirstVisit) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CHierarchicalResultsTest::testpostorderDepthFirstVisit", - &CHierarchicalResultsTest::testDepthFirstVisit) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CHierarchicalResultsTest::testBuildHierarchy", - &CHierarchicalResultsTest::testBuildHierarchy) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CHierarchicalResultsTest::testBuildHierarchyGivenPartitionsWithSinglePersonFieldValue", - &CHierarchicalResultsTest::testBuildHierarchyGivenPartitionsWithSinglePersonFieldValue) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CHierarchicalResultsTest::testBasicVisitor", - &CHierarchicalResultsTest::testBasicVisitor) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CHierarchicalResultsTest::testAggregator", - &CHierarchicalResultsTest::testAggregator) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CHierarchicalResultsTest::testInfluence", - &CHierarchicalResultsTest::testInfluence) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CHierarchicalResultsTest::testScores", - &CHierarchicalResultsTest::testScores) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CHierarchicalResultsTest::testWriter", - &CHierarchicalResultsTest::testWriter) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CHierarchicalResultsTest::testNormalizer", - &CHierarchicalResultsTest::testNormalizer) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CHierarchicalResultsTest::testDetectorEqualizing", - &CHierarchicalResultsTest::testDetectorEqualizing) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CHierarchicalResultsTest::testShouldWritePartition", - &CHierarchicalResultsTest::testShouldWritePartition) ); +CppUnit::Test* CHierarchicalResultsTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CHierarchicalResultsTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CHierarchicalResultsTest::testBreadthFirstVisit", + &CHierarchicalResultsTest::testBreadthFirstVisit)); + suiteOfTests->addTest(new CppUnit::TestCaller("CHierarchicalResultsTest::testpostorderDepthFirstVisit", + &CHierarchicalResultsTest::testDepthFirstVisit)); + suiteOfTests->addTest(new CppUnit::TestCaller("CHierarchicalResultsTest::testBuildHierarchy", + &CHierarchicalResultsTest::testBuildHierarchy)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CHierarchicalResultsTest::testBuildHierarchyGivenPartitionsWithSinglePersonFieldValue", + &CHierarchicalResultsTest::testBuildHierarchyGivenPartitionsWithSinglePersonFieldValue)); + suiteOfTests->addTest(new CppUnit::TestCaller("CHierarchicalResultsTest::testBasicVisitor", + &CHierarchicalResultsTest::testBasicVisitor)); + suiteOfTests->addTest(new CppUnit::TestCaller("CHierarchicalResultsTest::testAggregator", + &CHierarchicalResultsTest::testAggregator)); + suiteOfTests->addTest(new CppUnit::TestCaller("CHierarchicalResultsTest::testInfluence", + &CHierarchicalResultsTest::testInfluence)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CHierarchicalResultsTest::testScores", &CHierarchicalResultsTest::testScores)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CHierarchicalResultsTest::testWriter", &CHierarchicalResultsTest::testWriter)); + suiteOfTests->addTest(new CppUnit::TestCaller("CHierarchicalResultsTest::testNormalizer", + &CHierarchicalResultsTest::testNormalizer)); + suiteOfTests->addTest(new CppUnit::TestCaller("CHierarchicalResultsTest::testDetectorEqualizing", + &CHierarchicalResultsTest::testDetectorEqualizing)); + suiteOfTests->addTest(new CppUnit::TestCaller("CHierarchicalResultsTest::testShouldWritePartition", + &CHierarchicalResultsTest::testShouldWritePartition)); return suiteOfTests; } diff --git a/lib/model/unittest/CHierarchicalResultsTest.h b/lib/model/unittest/CHierarchicalResultsTest.h index 18d54cd0bf..2efb39da4e 100644 --- a/lib/model/unittest/CHierarchicalResultsTest.h +++ b/lib/model/unittest/CHierarchicalResultsTest.h @@ -9,23 +9,22 @@ #include -class CHierarchicalResultsTest : public CppUnit::TestFixture -{ - public: - void testBreadthFirstVisit(); - void testDepthFirstVisit(); - void testBuildHierarchy(); - void testBuildHierarchyGivenPartitionsWithSinglePersonFieldValue(); - void testBasicVisitor(); - void testAggregator(); - void testInfluence(); - void testScores(); - void testWriter(); - void testNormalizer(); - void testDetectorEqualizing(); - void testShouldWritePartition(); +class CHierarchicalResultsTest : public CppUnit::TestFixture { +public: + void testBreadthFirstVisit(); + void testDepthFirstVisit(); + void testBuildHierarchy(); + void testBuildHierarchyGivenPartitionsWithSinglePersonFieldValue(); + void testBasicVisitor(); + void testAggregator(); + void testInfluence(); + void testScores(); + void testWriter(); + void testNormalizer(); + void testDetectorEqualizing(); + void testShouldWritePartition(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CHierarchicalResultsTest_h diff --git a/lib/model/unittest/CInterimBucketCorrectorTest.cc b/lib/model/unittest/CInterimBucketCorrectorTest.cc index 38eacb716a..44bebd932f 100644 --- a/lib/model/unittest/CInterimBucketCorrectorTest.cc +++ b/lib/model/unittest/CInterimBucketCorrectorTest.cc @@ -20,42 +20,36 @@ using namespace ml; using namespace model; -namespace -{ +namespace { using TDouble1Vec = core::CSmallVector; using TDouble10Vec = core::CSmallVector; const double EPSILON = 1e-10; } -CppUnit::Test *CInterimBucketCorrectorTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CInterimBucketCorrectorTest"); +CppUnit::Test* CInterimBucketCorrectorTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CInterimBucketCorrectorTest"); suiteOfTests->addTest(new CppUnit::TestCaller( - "CInterimBucketCorrectorTest::testCorrectionsGivenSingleValue", - &CInterimBucketCorrectorTest::testCorrectionsGivenSingleValue)); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CInterimBucketCorrectorTest::testCorrectionsGivenSingleValueAndNoBaseline", - &CInterimBucketCorrectorTest::testCorrectionsGivenSingleValueAndNoBaseline)); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CInterimBucketCorrectorTest::testCorrectionsGivenMultiValueAndMultiMode", - &CInterimBucketCorrectorTest::testCorrectionsGivenMultiValueAndMultiMode)); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CInterimBucketCorrectorTest::testPersist", - &CInterimBucketCorrectorTest::testPersist)); + "CInterimBucketCorrectorTest::testCorrectionsGivenSingleValue", &CInterimBucketCorrectorTest::testCorrectionsGivenSingleValue)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CInterimBucketCorrectorTest::testCorrectionsGivenSingleValueAndNoBaseline", + &CInterimBucketCorrectorTest::testCorrectionsGivenSingleValueAndNoBaseline)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CInterimBucketCorrectorTest::testCorrectionsGivenMultiValueAndMultiMode", + &CInterimBucketCorrectorTest::testCorrectionsGivenMultiValueAndMultiMode)); + suiteOfTests->addTest(new CppUnit::TestCaller("CInterimBucketCorrectorTest::testPersist", + &CInterimBucketCorrectorTest::testPersist)); return suiteOfTests; } -void CInterimBucketCorrectorTest::testCorrectionsGivenSingleValue() -{ +void CInterimBucketCorrectorTest::testCorrectionsGivenSingleValue() { core_t::TTime bucketLength(3600); CInterimBucketCorrector corrector(bucketLength); core_t::TTime now = 3600; core_t::TTime end = now + 24 * bucketLength; - while (now < end) - { + while (now < end) { corrector.update(now, 100); now += bucketLength; } @@ -118,8 +112,7 @@ void CInterimBucketCorrectorTest::testCorrectionsGivenSingleValue() } } -void CInterimBucketCorrectorTest::testCorrectionsGivenSingleValueAndNoBaseline() -{ +void CInterimBucketCorrectorTest::testCorrectionsGivenSingleValueAndNoBaseline() { core_t::TTime bucketLength(3600); CInterimBucketCorrector corrector(bucketLength); @@ -129,15 +122,13 @@ void CInterimBucketCorrectorTest::testCorrectionsGivenSingleValueAndNoBaseline() CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, correction, EPSILON); } -void CInterimBucketCorrectorTest::testCorrectionsGivenMultiValueAndMultiMode() -{ +void CInterimBucketCorrectorTest::testCorrectionsGivenMultiValueAndMultiMode() { core_t::TTime bucketLength(3600); CInterimBucketCorrector corrector(bucketLength); core_t::TTime now = 3600; core_t::TTime end = now + 24 * bucketLength; - while (now < end) - { + while (now < end) { corrector.update(now, 100); now += bucketLength; } @@ -180,15 +171,13 @@ void CInterimBucketCorrectorTest::testCorrectionsGivenMultiValueAndMultiMode() CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, correction[9], EPSILON); } -void CInterimBucketCorrectorTest::testPersist() -{ +void CInterimBucketCorrectorTest::testPersist() { core_t::TTime bucketLength(300); CInterimBucketCorrector corrector(bucketLength); core_t::TTime now = 300; core_t::TTime end = now + 24 * bucketLength; - while (now < end) - { + while (now < end) { corrector.update(now, 100); now += bucketLength; } @@ -209,9 +198,7 @@ void CInterimBucketCorrectorTest::testPersist() CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); CInterimBucketCorrector restoredCorrector(bucketLength); - traverser.traverseSubLevel(boost::bind(&CInterimBucketCorrector::acceptRestoreTraverser, - &restoredCorrector, - _1)); + traverser.traverseSubLevel(boost::bind(&CInterimBucketCorrector::acceptRestoreTraverser, &restoredCorrector, _1)); correction = restoredCorrector.corrections(now, 50, 1000, value); CPPUNIT_ASSERT_DOUBLES_EQUAL(500.0, correction, EPSILON); diff --git a/lib/model/unittest/CInterimBucketCorrectorTest.h b/lib/model/unittest/CInterimBucketCorrectorTest.h index e4961ddb3f..1aa139ecbe 100644 --- a/lib/model/unittest/CInterimBucketCorrectorTest.h +++ b/lib/model/unittest/CInterimBucketCorrectorTest.h @@ -8,17 +8,14 @@ #include +class CInterimBucketCorrectorTest : public CppUnit::TestFixture { +public: + void testCorrectionsGivenSingleValue(); + void testCorrectionsGivenSingleValueAndNoBaseline(); + void testCorrectionsGivenMultiValueAndMultiMode(); + void testPersist(); -class CInterimBucketCorrectorTest : public CppUnit::TestFixture -{ - public: - void testCorrectionsGivenSingleValue(); - void testCorrectionsGivenSingleValueAndNoBaseline(); - void testCorrectionsGivenMultiValueAndMultiMode(); - void testPersist(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CInterimBucketCorrectorTest_h - diff --git a/lib/model/unittest/CLimitsTest.cc b/lib/model/unittest/CLimitsTest.cc index 74dc9f1f3a..31af2c416a 100644 --- a/lib/model/unittest/CLimitsTest.cc +++ b/lib/model/unittest/CLimitsTest.cc @@ -7,44 +7,32 @@ #include +CppUnit::Test* CLimitsTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CLimitsTest"); -CppUnit::Test *CLimitsTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CLimitsTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLimitsTest::testTrivial", - &CLimitsTest::testTrivial) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLimitsTest::testValid", - &CLimitsTest::testValid) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CLimitsTest::testInvalid", - &CLimitsTest::testInvalid) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CLimitsTest::testTrivial", &CLimitsTest::testTrivial)); + suiteOfTests->addTest(new CppUnit::TestCaller("CLimitsTest::testValid", &CLimitsTest::testValid)); + suiteOfTests->addTest(new CppUnit::TestCaller("CLimitsTest::testInvalid", &CLimitsTest::testInvalid)); return suiteOfTests; } -void CLimitsTest::testTrivial() -{ +void CLimitsTest::testTrivial() { ml::model::CLimits config; CPPUNIT_ASSERT_EQUAL(ml::model::CLimits::DEFAULT_AUTOCONFIG_EVENTS, config.autoConfigEvents()); CPPUNIT_ASSERT_EQUAL(ml::model::CLimits::DEFAULT_ANOMALY_MAX_TIME_BUCKETS, config.anomalyMaxTimeBuckets()); CPPUNIT_ASSERT_EQUAL(ml::model::CLimits::DEFAULT_RESULTS_MAX_EXAMPLES, config.maxExamples()); - CPPUNIT_ASSERT_EQUAL(ml::model::CLimits::DEFAULT_RESULTS_UNUSUAL_PROBABILITY_THRESHOLD / 100.0, - config.unusualProbabilityThreshold()); + CPPUNIT_ASSERT_EQUAL(ml::model::CLimits::DEFAULT_RESULTS_UNUSUAL_PROBABILITY_THRESHOLD / 100.0, config.unusualProbabilityThreshold()); CPPUNIT_ASSERT_EQUAL(ml::model::CResourceMonitor::DEFAULT_MEMORY_LIMIT_MB, config.memoryLimitMB()); } -void CLimitsTest::testValid() -{ +void CLimitsTest::testValid() { ml::model::CLimits config; CPPUNIT_ASSERT(config.init("testfiles/mllimits.conf")); // This one isn't present in the config file so should be defaulted - CPPUNIT_ASSERT_EQUAL(ml::model::CLimits::DEFAULT_ANOMALY_MAX_TIME_BUCKETS, - config.anomalyMaxTimeBuckets()); + CPPUNIT_ASSERT_EQUAL(ml::model::CLimits::DEFAULT_ANOMALY_MAX_TIME_BUCKETS, config.anomalyMaxTimeBuckets()); CPPUNIT_ASSERT_EQUAL(size_t(8), config.maxExamples()); @@ -53,9 +41,7 @@ void CLimitsTest::testValid() CPPUNIT_ASSERT_EQUAL(size_t(4567), config.memoryLimitMB()); } -void CLimitsTest::testInvalid() -{ +void CLimitsTest::testInvalid() { ml::model::CLimits config; CPPUNIT_ASSERT(!config.init("testfiles/invalidmllimits.conf")); } - diff --git a/lib/model/unittest/CLimitsTest.h b/lib/model/unittest/CLimitsTest.h index cb552b45d9..0f9cc92076 100644 --- a/lib/model/unittest/CLimitsTest.h +++ b/lib/model/unittest/CLimitsTest.h @@ -8,16 +8,13 @@ #include +class CLimitsTest : public CppUnit::TestFixture { +public: + void testTrivial(); + void testValid(); + void testInvalid(); -class CLimitsTest : public CppUnit::TestFixture -{ - public: - void testTrivial(); - void testValid(); - void testInvalid(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CLimitsTest_h - diff --git a/lib/model/unittest/CMemoryUsageEstimatorTest.cc b/lib/model/unittest/CMemoryUsageEstimatorTest.cc index c684c0514f..5159505c12 100644 --- a/lib/model/unittest/CMemoryUsageEstimatorTest.cc +++ b/lib/model/unittest/CMemoryUsageEstimatorTest.cc @@ -18,38 +18,31 @@ using namespace ml; using namespace model; -namespace -{ +namespace { -void addValue(CMemoryUsageEstimator &estimator, +void addValue(CMemoryUsageEstimator& estimator, std::size_t memory, std::size_t people, std::size_t attributes, - std::size_t correlations = 0) -{ + std::size_t correlations = 0) { CMemoryUsageEstimator::TSizeArray predictors; - predictors[CMemoryUsageEstimator::E_People] = people; - predictors[CMemoryUsageEstimator::E_Attributes] = attributes; + predictors[CMemoryUsageEstimator::E_People] = people; + predictors[CMemoryUsageEstimator::E_Attributes] = attributes; predictors[CMemoryUsageEstimator::E_Correlations] = correlations; estimator.addValue(predictors, memory); } -CMemoryUsageEstimator::TOptionalSize estimate(CMemoryUsageEstimator &estimator, - std::size_t people, - std::size_t attributes, - std::size_t correlations = 0) -{ +CMemoryUsageEstimator::TOptionalSize +estimate(CMemoryUsageEstimator& estimator, std::size_t people, std::size_t attributes, std::size_t correlations = 0) { CMemoryUsageEstimator::TSizeArray predictors; - predictors[CMemoryUsageEstimator::E_People] = people; - predictors[CMemoryUsageEstimator::E_Attributes] = attributes; + predictors[CMemoryUsageEstimator::E_People] = people; + predictors[CMemoryUsageEstimator::E_Attributes] = attributes; predictors[CMemoryUsageEstimator::E_Correlations] = correlations; return estimator.estimate(predictors); } - } -void CMemoryUsageEstimatorTest::testEstimateLinear() -{ +void CMemoryUsageEstimatorTest::testEstimateLinear() { LOG_DEBUG("Running estimator test estimate linear"); CMemoryUsageEstimator estimator; @@ -83,8 +76,7 @@ void CMemoryUsageEstimatorTest::testEstimateLinear() CPPUNIT_ASSERT_EQUAL(std::size_t(1042), mem.get()); // Test that after 10 estimates we need to add some more real values - for (std::size_t i = 0; i < 10; i++) - { + for (std::size_t i = 0; i < 10; i++) { mem = estimate(estimator, 4, 1); } CPPUNIT_ASSERT(!mem); @@ -104,8 +96,7 @@ void CMemoryUsageEstimatorTest::testEstimateLinear() CPPUNIT_ASSERT(!mem); } -void CMemoryUsageEstimatorTest::testEstimateNonlinear() -{ +void CMemoryUsageEstimatorTest::testEstimateNonlinear() { LOG_DEBUG("Running estimator test estimate non-linear"); { @@ -139,23 +130,22 @@ void CMemoryUsageEstimatorTest::testEstimateNonlinear() int cScale = 30; CMemoryUsageEstimator estimator; - addValue(estimator, pScale * 10*10 + aScale * 9*9 + cScale * 15*15, 10, 9, 15); - addValue(estimator, pScale * 11*11 + aScale * 11*11 + cScale * 20*20, 11, 11, 20); - addValue(estimator, pScale * 12*12 + aScale * 13*13 + cScale * 25*25, 12, 13, 25); - addValue(estimator, pScale * 13*13 + aScale * 15*15 + cScale * 26*26, 13, 15, 26); - addValue(estimator, pScale * 17*17 + aScale * 19*19 + cScale * 27*27, 17, 19, 27); - addValue(estimator, pScale * 20*20 + aScale * 19*19 + cScale * 30*30, 20, 19, 30); - addValue(estimator, pScale * 20*20 + aScale * 25*25 + cScale * 40*40, 20, 25, 40); + addValue(estimator, pScale * 10 * 10 + aScale * 9 * 9 + cScale * 15 * 15, 10, 9, 15); + addValue(estimator, pScale * 11 * 11 + aScale * 11 * 11 + cScale * 20 * 20, 11, 11, 20); + addValue(estimator, pScale * 12 * 12 + aScale * 13 * 13 + cScale * 25 * 25, 12, 13, 25); + addValue(estimator, pScale * 13 * 13 + aScale * 15 * 15 + cScale * 26 * 26, 13, 15, 26); + addValue(estimator, pScale * 17 * 17 + aScale * 19 * 19 + cScale * 27 * 27, 17, 19, 27); + addValue(estimator, pScale * 20 * 20 + aScale * 19 * 19 + cScale * 30 * 30, 20, 19, 30); + addValue(estimator, pScale * 20 * 20 + aScale * 25 * 25 + cScale * 40 * 40, 20, 25, 40); CMemoryUsageEstimator::TOptionalSize mem = estimate(estimator, 25, 35, 45); - std::size_t actual = pScale * 25*25 + aScale * 35*35 + cScale * 45*45; + std::size_t actual = pScale * 25 * 25 + aScale * 35 * 35 + cScale * 45 * 45; LOG_DEBUG("actual = " << actual << ", estimated = " << mem.get()); CPPUNIT_ASSERT(static_cast(actual - mem.get()) / static_cast(actual) < 0.15); } } -void CMemoryUsageEstimatorTest::testPersist() -{ +void CMemoryUsageEstimatorTest::testPersist() { LOG_DEBUG("Running estimator test persist"); CMemoryUsageEstimator origEstimator; @@ -174,9 +164,7 @@ void CMemoryUsageEstimatorTest::testPersist() core::CRapidXmlStateRestoreTraverser traverser(parser); CMemoryUsageEstimator restoredEstimator; - CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind(&CMemoryUsageEstimator::acceptRestoreTraverser, - &restoredEstimator, - _1))); + CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind(&CMemoryUsageEstimator::acceptRestoreTraverser, &restoredEstimator, _1))); // The XML representation of the new data gatherer should be the same // as the original. @@ -192,7 +180,7 @@ void CMemoryUsageEstimatorTest::testPersist() int pScale = 10000; int aScale = 5; int cScale = 3; - addValue(origEstimator, pScale * 10 + aScale * 9 + cScale * 15, 10, 9, 15); + addValue(origEstimator, pScale * 10 + aScale * 9 + cScale * 15, 10, 9, 15); addValue(origEstimator, pScale * 11 + aScale * 11 + cScale * 20, 11, 11, 20); addValue(origEstimator, pScale * 12 + aScale * 13 + cScale * 25, 12, 13, 25); addValue(origEstimator, pScale * 13 + aScale * 15 + cScale * 26, 13, 15, 26); @@ -213,9 +201,7 @@ void CMemoryUsageEstimatorTest::testPersist() core::CRapidXmlStateRestoreTraverser traverser(parser); CMemoryUsageEstimator restoredEstimator; - CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind(&CMemoryUsageEstimator::acceptRestoreTraverser, - &restoredEstimator, - _1))); + CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind(&CMemoryUsageEstimator::acceptRestoreTraverser, &restoredEstimator, _1))); // The XML representation of the new data gatherer should be the same // as the original. @@ -229,19 +215,15 @@ void CMemoryUsageEstimatorTest::testPersist() } } -CppUnit::Test *CMemoryUsageEstimatorTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CMemoryUsageEstimatorTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMemoryUsageEstimatorTest::testEstimateLinear", - &CMemoryUsageEstimatorTest::testEstimateLinear) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMemoryUsageEstimatorTest::testEstimateNonlinear", - &CMemoryUsageEstimatorTest::testEstimateNonlinear) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMemoryUsageEstimatorTest::testPersist", - &CMemoryUsageEstimatorTest::testPersist) ); +CppUnit::Test* CMemoryUsageEstimatorTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CMemoryUsageEstimatorTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CMemoryUsageEstimatorTest::testEstimateLinear", + &CMemoryUsageEstimatorTest::testEstimateLinear)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMemoryUsageEstimatorTest::testEstimateNonlinear", + &CMemoryUsageEstimatorTest::testEstimateNonlinear)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMemoryUsageEstimatorTest::testPersist", + &CMemoryUsageEstimatorTest::testPersist)); return suiteOfTests; } diff --git a/lib/model/unittest/CMemoryUsageEstimatorTest.h b/lib/model/unittest/CMemoryUsageEstimatorTest.h index 665c07234f..63ca12d2bc 100644 --- a/lib/model/unittest/CMemoryUsageEstimatorTest.h +++ b/lib/model/unittest/CMemoryUsageEstimatorTest.h @@ -9,14 +9,13 @@ #include -class CMemoryUsageEstimatorTest : public CppUnit::TestFixture -{ - public: - void testEstimateLinear(); - void testEstimateNonlinear(); - void testPersist(); +class CMemoryUsageEstimatorTest : public CppUnit::TestFixture { +public: + void testEstimateLinear(); + void testEstimateNonlinear(); + void testPersist(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CMemoryUsageEstimatorTest_h diff --git a/lib/model/unittest/CMetricAnomalyDetectorTest.cc b/lib/model/unittest/CMetricAnomalyDetectorTest.cc index de52a82ba1..04fc40fda6 100644 --- a/lib/model/unittest/CMetricAnomalyDetectorTest.cc +++ b/lib/model/unittest/CMetricAnomalyDetectorTest.cc @@ -36,129 +36,92 @@ using namespace ml; -namespace -{ +namespace { using TTimeTimePr = std::pair; using TTimeTimePrVec = std::vector; using TDoubleVec = std::vector; -bool doIntersect(const TTimeTimePr &i1, const TTimeTimePr &i2) -{ +bool doIntersect(const TTimeTimePr& i1, const TTimeTimePr& i2) { return !(i2.second <= i1.first || i1.second <= i2.first); } -class CResultWriter : public ml::model::CHierarchicalResultsVisitor -{ - public: - static const double HIGH_ANOMALY_SCORE; - public: - CResultWriter(const model::CAnomalyDetectorModelConfig &modelConfig, - const model::CLimits &limits, - core_t::TTime bucketLength) : - m_ModelConfig(modelConfig), - m_Limits(limits), - m_BucketLength(bucketLength) - {} - - void operator()(ml::model::CAnomalyDetector &detector, - ml::core_t::TTime start, - ml::core_t::TTime end) - { - ml::model::CHierarchicalResults results; - detector.buildResults(start, end, results); - results.buildHierarchy(); - ml::model::CHierarchicalResultsAggregator aggregator(m_ModelConfig); - results.bottomUpBreadthFirst(aggregator); - ml::model::CHierarchicalResultsProbabilityFinalizer finalizer; - results.bottomUpBreadthFirst(finalizer); - ml::model::CHierarchicalResultsPopulator populator(m_Limits); - results.bottomUpBreadthFirst(populator); - results.bottomUpBreadthFirst(*this); - } +class CResultWriter : public ml::model::CHierarchicalResultsVisitor { +public: + static const double HIGH_ANOMALY_SCORE; + +public: + CResultWriter(const model::CAnomalyDetectorModelConfig& modelConfig, const model::CLimits& limits, core_t::TTime bucketLength) + : m_ModelConfig(modelConfig), m_Limits(limits), m_BucketLength(bucketLength) {} + + void operator()(ml::model::CAnomalyDetector& detector, ml::core_t::TTime start, ml::core_t::TTime end) { + ml::model::CHierarchicalResults results; + detector.buildResults(start, end, results); + results.buildHierarchy(); + ml::model::CHierarchicalResultsAggregator aggregator(m_ModelConfig); + results.bottomUpBreadthFirst(aggregator); + ml::model::CHierarchicalResultsProbabilityFinalizer finalizer; + results.bottomUpBreadthFirst(finalizer); + ml::model::CHierarchicalResultsPopulator populator(m_Limits); + results.bottomUpBreadthFirst(populator); + results.bottomUpBreadthFirst(*this); + } - //! Visit a node. - virtual void visit(const ml::model::CHierarchicalResults &results, - const ml::model::CHierarchicalResults::TNode &node, - bool pivot) - { - if (pivot) - { - return; - } + //! Visit a node. + virtual void visit(const ml::model::CHierarchicalResults& results, const ml::model::CHierarchicalResults::TNode& node, bool pivot) { + if (pivot) { + return; + } - if (!this->shouldWriteResult(m_Limits, results, node, pivot)) - { - return; - } + if (!this->shouldWriteResult(m_Limits, results, node, pivot)) { + return; + } - if (this->isSimpleCount(node)) - { - return; - } - if (!this->isLeaf(node)) - { - return; - } + if (this->isSimpleCount(node)) { + return; + } + if (!this->isLeaf(node)) { + return; + } - core_t::TTime bucketTime = node.s_BucketStartTime; - double anomalyFactor = node.s_RawAnomalyScore; - if (anomalyFactor > HIGH_ANOMALY_SCORE) - { - m_HighAnomalyTimes.push_back(TTimeTimePr(bucketTime, bucketTime + m_BucketLength)); - m_HighAnomalyFactors.push_back(anomalyFactor); - } - else if (anomalyFactor > 0.0) - { - m_AnomalyFactors.push_back(anomalyFactor); - uint64_t currentRate(0); - if (node.s_AnnotatedProbability.s_CurrentBucketCount) - { - currentRate = *node.s_AnnotatedProbability.s_CurrentBucketCount; - } - m_AnomalyRates.push_back(static_cast(currentRate)); + core_t::TTime bucketTime = node.s_BucketStartTime; + double anomalyFactor = node.s_RawAnomalyScore; + if (anomalyFactor > HIGH_ANOMALY_SCORE) { + m_HighAnomalyTimes.push_back(TTimeTimePr(bucketTime, bucketTime + m_BucketLength)); + m_HighAnomalyFactors.push_back(anomalyFactor); + } else if (anomalyFactor > 0.0) { + m_AnomalyFactors.push_back(anomalyFactor); + uint64_t currentRate(0); + if (node.s_AnnotatedProbability.s_CurrentBucketCount) { + currentRate = *node.s_AnnotatedProbability.s_CurrentBucketCount; } + m_AnomalyRates.push_back(static_cast(currentRate)); } + } - bool operator()(ml::core_t::TTime time, - const ml::model::CHierarchicalResults::TNode &node, - bool isBucketInfluencer) - { - LOG_DEBUG((isBucketInfluencer ? "BucketInfluencer" : "Influencer ") - << node.s_Spec.print() << " initial score " << node.probability() - << ", time: " << time); + bool operator()(ml::core_t::TTime time, const ml::model::CHierarchicalResults::TNode& node, bool isBucketInfluencer) { + LOG_DEBUG((isBucketInfluencer ? "BucketInfluencer" : "Influencer ") + << node.s_Spec.print() << " initial score " << node.probability() << ", time: " << time); - return true; - } + return true; + } - const TTimeTimePrVec &highAnomalyTimes() const - { - return m_HighAnomalyTimes; - } + const TTimeTimePrVec& highAnomalyTimes() const { return m_HighAnomalyTimes; } - const TDoubleVec &highAnomalyFactors() const - { - return m_HighAnomalyFactors; - } + const TDoubleVec& highAnomalyFactors() const { return m_HighAnomalyFactors; } - const TDoubleVec &anomalyFactors() const - { - return m_AnomalyFactors; - } + const TDoubleVec& anomalyFactors() const { return m_AnomalyFactors; } - const TDoubleVec &anomalyRates() const - { - return m_AnomalyRates; - } + const TDoubleVec& anomalyRates() const { return m_AnomalyRates; } - private: - const model::CAnomalyDetectorModelConfig &m_ModelConfig; - const model::CLimits &m_Limits; - core_t::TTime m_BucketLength; - TTimeTimePrVec m_HighAnomalyTimes; - TDoubleVec m_HighAnomalyFactors; - TDoubleVec m_AnomalyFactors; - TDoubleVec m_AnomalyRates; +private: + const model::CAnomalyDetectorModelConfig& m_ModelConfig; + const model::CLimits& m_Limits; + core_t::TTime m_BucketLength; + TTimeTimePrVec m_HighAnomalyTimes; + TDoubleVec m_HighAnomalyFactors; + TDoubleVec m_AnomalyFactors; + TDoubleVec m_AnomalyRates; }; const double CResultWriter::HIGH_ANOMALY_SCORE(0.35); @@ -166,26 +129,19 @@ const double CResultWriter::HIGH_ANOMALY_SCORE(0.35); void importData(core_t::TTime firstTime, core_t::TTime lastTime, core_t::TTime bucketLength, - CResultWriter &outputResults, - const std::string &fileName, - model::CAnomalyDetector &detector) -{ + CResultWriter& outputResults, + const std::string& fileName, + model::CAnomalyDetector& detector) { test::CTimeSeriesTestData::TTimeDoublePrVec timeData; CPPUNIT_ASSERT(test::CTimeSeriesTestData::parse(fileName, timeData)); core_t::TTime lastBucketTime = maths::CIntegerTools::ceil(firstTime, bucketLength); - for (std::size_t i = 0u; i < timeData.size(); ++i) - { + for (std::size_t i = 0u; i < timeData.size(); ++i) { core_t::TTime time = timeData[i].first; - for (/**/; - lastBucketTime + bucketLength <= time; - lastBucketTime += bucketLength) - { - outputResults(detector, - lastBucketTime, - lastBucketTime + bucketLength); + for (/**/; lastBucketTime + bucketLength <= time; lastBucketTime += bucketLength) { + outputResults(detector, lastBucketTime, lastBucketTime + bucketLength); } std::string value = core::CStringUtils::typeToString(timeData[i].second); @@ -196,22 +152,16 @@ void importData(core_t::TTime firstTime, detector.addRecord(time, fieldValues); } - for (/**/; - lastBucketTime + bucketLength <= lastTime; - lastBucketTime += bucketLength) - { - outputResults(detector, - lastBucketTime, - lastBucketTime + bucketLength); + for (/**/; lastBucketTime + bucketLength <= lastTime; lastBucketTime += bucketLength) { + outputResults(detector, lastBucketTime, lastBucketTime + bucketLength); } } void importCsvData(core_t::TTime firstTime, core_t::TTime bucketLength, - CResultWriter &outputResults, - const std::string &fileName, - model::CAnomalyDetector &detector) -{ + CResultWriter& outputResults, + const std::string& fileName, + model::CAnomalyDetector& detector) { using TifstreamPtr = boost::shared_ptr; TifstreamPtr ifs(new std::ifstream(fileName.c_str())); CPPUNIT_ASSERT(ifs->is_open()); @@ -225,8 +175,7 @@ void importCsvData(core_t::TTime firstTime, core_t::TTime lastBucketTime = firstTime; - while (std::getline(*ifs, line)) - { + while (std::getline(*ifs, line)) { LOG_TRACE("Got string: " << line); core::CRegex::TStrVec tokens; regex.split(line, tokens); @@ -234,13 +183,8 @@ void importCsvData(core_t::TTime firstTime, core_t::TTime time; CPPUNIT_ASSERT(core::CStringUtils::stringToType(tokens[0], time)); - for (/**/; - lastBucketTime + bucketLength <= time; - lastBucketTime += bucketLength) - { - outputResults(detector, - lastBucketTime, - lastBucketTime + bucketLength); + for (/**/; lastBucketTime + bucketLength <= time; lastBucketTime += bucketLength) { + outputResults(detector, lastBucketTime, lastBucketTime + bucketLength); } model::CAnomalyDetector::TStrCPtrVec fieldValues; @@ -250,19 +194,15 @@ void importCsvData(core_t::TTime firstTime, detector.addRecord(time, fieldValues); } - outputResults(detector, - lastBucketTime, - lastBucketTime + bucketLength); + outputResults(detector, lastBucketTime, lastBucketTime + bucketLength); ifs.reset(); } const std::string EMPTY_STRING; - } -void CMetricAnomalyDetectorTest::testAnomalies() -{ +void CMetricAnomalyDetectorTest::testAnomalies() { // The test data has one genuine anomaly in the interval // [1360617335, 1360617481]. The rest of the samples are // Gaussian with mean 30 and standard deviation 5. The @@ -299,29 +239,21 @@ void CMetricAnomalyDetectorTest::testAnomalies() static const core_t::TTime FIRST_TIME(1360540800); static const core_t::TTime LAST_TIME(FIRST_TIME + 86400); - static const core_t::TTime BUCKET_LENGTHS[] = - { - 120, 150, 180, 210, 240, 300, 450, 600, 900, 1200 - }; - static const TTimeTimePr ANOMALOUS_INTERVALS[] = - { - TTimeTimePr(1360576852, 1360578629), - TTimeTimePr(1360617335, 1360617481) - }; + static const core_t::TTime BUCKET_LENGTHS[] = {120, 150, 180, 210, 240, 300, 450, 600, 900, 1200}; + static const TTimeTimePr ANOMALOUS_INTERVALS[] = {TTimeTimePr(1360576852, 1360578629), TTimeTimePr(1360617335, 1360617481)}; double highRateNoise = 0.0; double lowRateNoise = 0.0; - for (size_t i = 0; i < boost::size(BUCKET_LENGTHS); ++i) - { - model::CAnomalyDetectorModelConfig modelConfig = - model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTHS[i]); + for (size_t i = 0; i < boost::size(BUCKET_LENGTHS); ++i) { + model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTHS[i]); model::CLimits limits; model::CSearchKey key(1, // identifier model::function_t::E_IndividualMetric, false, model_t::E_XF_None, - "n/a", "n/a"); + "n/a", + "n/a"); model::CAnomalyDetector detector(1, // identifier limits, modelConfig, @@ -330,11 +262,7 @@ void CMetricAnomalyDetectorTest::testAnomalies() modelConfig.factory(key)); CResultWriter writer(modelConfig, limits, BUCKET_LENGTHS[i]); - importData(FIRST_TIME, LAST_TIME, - BUCKET_LENGTHS[i], - writer, - "testfiles/variable_rate_metric.data", - detector); + importData(FIRST_TIME, LAST_TIME, BUCKET_LENGTHS[i], writer, "testfiles/variable_rate_metric.data", detector); TTimeTimePrVec highAnomalyTimes(writer.highAnomalyTimes()); TDoubleVec highAnomalyFactors(writer.highAnomalyFactors()); @@ -347,20 +275,15 @@ void CMetricAnomalyDetectorTest::testAnomalies() LOG_DEBUG("anomaly factors = " << core::CContainerPrinter::print(anomalyFactors)); LOG_DEBUG("anomaly rates = " << core::CContainerPrinter::print(anomalyRates)); - for (std::size_t j = 0u; j < highAnomalyTimes.size(); ++j) - { - LOG_DEBUG("Testing " << core::CContainerPrinter::print(highAnomalyTimes[j]) - << ' ' << highAnomalyFactors[j]); - CPPUNIT_ASSERT( doIntersect(highAnomalyTimes[j], ANOMALOUS_INTERVALS[0]) - || doIntersect(highAnomalyTimes[j], ANOMALOUS_INTERVALS[1])); + for (std::size_t j = 0u; j < highAnomalyTimes.size(); ++j) { + LOG_DEBUG("Testing " << core::CContainerPrinter::print(highAnomalyTimes[j]) << ' ' << highAnomalyFactors[j]); + CPPUNIT_ASSERT(doIntersect(highAnomalyTimes[j], ANOMALOUS_INTERVALS[0]) || + doIntersect(highAnomalyTimes[j], ANOMALOUS_INTERVALS[1])); } - if (!anomalyFactors.empty()) - { - double signal = std::accumulate(highAnomalyFactors.begin(), - highAnomalyFactors.end(), 0.0); - double noise = std::accumulate(anomalyFactors.begin(), - anomalyFactors.end(), 0.0); + if (!anomalyFactors.empty()) { + double signal = std::accumulate(highAnomalyFactors.begin(), highAnomalyFactors.end(), 0.0); + double noise = std::accumulate(anomalyFactors.begin(), anomalyFactors.end(), 0.0); LOG_DEBUG("S/N = " << (signal / noise)); CPPUNIT_ASSERT(signal / noise > 90.0); } @@ -369,50 +292,42 @@ void CMetricAnomalyDetectorTest::testAnomalies() TDoubleVec orderedAnomalyRates(anomalyRates); std::sort(orderedAnomalyRates.begin(), orderedAnomalyRates.end()); std::size_t maxStep = 1; - for (std::size_t j = 2; j < orderedAnomalyRates.size(); ++j) - { - if (orderedAnomalyRates[j] - orderedAnomalyRates[j - 1] > - orderedAnomalyRates[maxStep] - orderedAnomalyRates[maxStep - 1]) - { + for (std::size_t j = 2; j < orderedAnomalyRates.size(); ++j) { + if (orderedAnomalyRates[j] - orderedAnomalyRates[j - 1] > orderedAnomalyRates[maxStep] - orderedAnomalyRates[maxStep - 1]) { maxStep = j; } } double partitionRate = 0.0; - if (maxStep < orderedAnomalyRates.size()) - { + if (maxStep < orderedAnomalyRates.size()) { partitionRate = 0.5 * (orderedAnomalyRates[maxStep] + orderedAnomalyRates[maxStep - 1]); } LOG_DEBUG("partition rate = " << partitionRate); // Compute the ratio of noise in the two rate channels. - for (std::size_t j = 0u; j < anomalyFactors.size(); ++j) - { - (anomalyRates[j] > partitionRate ? - highRateNoise : lowRateNoise) += anomalyFactors[j]; + for (std::size_t j = 0u; j < anomalyFactors.size(); ++j) { + (anomalyRates[j] > partitionRate ? highRateNoise : lowRateNoise) += anomalyFactors[j]; } } - LOG_DEBUG("high rate noise = " << highRateNoise - << ", low rate noise = " << lowRateNoise); + LOG_DEBUG("high rate noise = " << highRateNoise << ", low rate noise = " << lowRateNoise); // We don't have significantly more noise in the low rate channel. CPPUNIT_ASSERT(std::fabs((1.0 + lowRateNoise) / (1.0 + highRateNoise) - 1.0) < 0.2); } -void CMetricAnomalyDetectorTest::testPersist() -{ +void CMetricAnomalyDetectorTest::testPersist() { static const core_t::TTime FIRST_TIME(1360540800); static const core_t::TTime LAST_TIME(FIRST_TIME + 86400); static const core_t::TTime BUCKET_LENGTH(300); - model::CAnomalyDetectorModelConfig modelConfig = - model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); + model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); model::CLimits limits; model::CSearchKey key(1, // identifier model::function_t::E_IndividualMetric, false, model_t::E_XF_None, - "responsetime", "Airline"); + "responsetime", + "Airline"); model::CAnomalyDetector origDetector(1, // identifier limits, modelConfig, @@ -421,11 +336,7 @@ void CMetricAnomalyDetectorTest::testPersist() modelConfig.factory(key)); CResultWriter writer(modelConfig, limits, BUCKET_LENGTH); - importData(FIRST_TIME, LAST_TIME, - BUCKET_LENGTH, - writer, - "testfiles/variable_rate_metric.data", - origDetector); + importData(FIRST_TIME, LAST_TIME, BUCKET_LENGTH, writer, "testfiles/variable_rate_metric.data", origDetector); std::string origXml; { @@ -447,10 +358,8 @@ void CMetricAnomalyDetectorTest::testPersist() core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind(&model::CAnomalyDetector::acceptRestoreTraverser, - &restoredDetector, - EMPTY_STRING, - _1))); + CPPUNIT_ASSERT( + traverser.traverseSubLevel(boost::bind(&model::CAnomalyDetector::acceptRestoreTraverser, &restoredDetector, EMPTY_STRING, _1))); } // The XML representation of the new typer should be the same as the original @@ -463,20 +372,19 @@ void CMetricAnomalyDetectorTest::testPersist() CPPUNIT_ASSERT_EQUAL(origXml, newXml); } -void CMetricAnomalyDetectorTest::testExcludeFrequent() -{ +void CMetricAnomalyDetectorTest::testExcludeFrequent() { static const core_t::TTime FIRST_TIME(1406916000); static const core_t::TTime BUCKET_LENGTH(3600); { - model::CAnomalyDetectorModelConfig modelConfig = - model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); + model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); model::CLimits limits; model::CSearchKey key(1, // identifier model::function_t::E_IndividualMetric, false, model_t::E_XF_None, - "bytes", "host"); + "bytes", + "host"); model::CAnomalyDetector detector(1, // identifier limits, modelConfig, @@ -485,11 +393,7 @@ void CMetricAnomalyDetectorTest::testExcludeFrequent() modelConfig.factory(key)); CResultWriter writer(modelConfig, limits, BUCKET_LENGTH); - importCsvData(FIRST_TIME, - BUCKET_LENGTH, - writer, - "testfiles/excludefrequent_two_series.txt", - detector); + importCsvData(FIRST_TIME, BUCKET_LENGTH, writer, "testfiles/excludefrequent_two_series.txt", detector); TTimeTimePrVec highAnomalyTimes(writer.highAnomalyTimes()); TDoubleVec highAnomalyFactors(writer.highAnomalyFactors()); @@ -502,14 +406,14 @@ void CMetricAnomalyDetectorTest::testExcludeFrequent() CPPUNIT_ASSERT_DOUBLES_EQUAL(99.0, highAnomalyFactors[1], 0.5); } { - model::CAnomalyDetectorModelConfig modelConfig = - model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); + model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); model::CLimits limits; model::CSearchKey key(1, // identifier model::function_t::E_IndividualMetric, false, model_t::E_XF_By, - "bytes", "host"); + "bytes", + "host"); model::CAnomalyDetector detector(1, // identifier limits, modelConfig, @@ -518,11 +422,7 @@ void CMetricAnomalyDetectorTest::testExcludeFrequent() modelConfig.factory(key)); CResultWriter writer(modelConfig, limits, BUCKET_LENGTH); - importCsvData(FIRST_TIME, - BUCKET_LENGTH, - writer, - "testfiles/excludefrequent_two_series.txt", - detector); + importCsvData(FIRST_TIME, BUCKET_LENGTH, writer, "testfiles/excludefrequent_two_series.txt", detector); TTimeTimePrVec highAnomalyTimes(writer.highAnomalyTimes()); TDoubleVec highAnomalyFactors(writer.highAnomalyFactors()); @@ -536,19 +436,15 @@ void CMetricAnomalyDetectorTest::testExcludeFrequent() } } -CppUnit::Test *CMetricAnomalyDetectorTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CMetricAnomalyDetectorTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricAnomalyDetectorTest::testAnomalies", - &CMetricAnomalyDetectorTest::testAnomalies) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricAnomalyDetectorTest::testPersist", - &CMetricAnomalyDetectorTest::testPersist) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricAnomalyDetectorTest::testExcludeFrequent", - &CMetricAnomalyDetectorTest::testExcludeFrequent) ); +CppUnit::Test* CMetricAnomalyDetectorTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CMetricAnomalyDetectorTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CMetricAnomalyDetectorTest::testAnomalies", + &CMetricAnomalyDetectorTest::testAnomalies)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMetricAnomalyDetectorTest::testPersist", + &CMetricAnomalyDetectorTest::testPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMetricAnomalyDetectorTest::testExcludeFrequent", + &CMetricAnomalyDetectorTest::testExcludeFrequent)); return suiteOfTests; } diff --git a/lib/model/unittest/CMetricAnomalyDetectorTest.h b/lib/model/unittest/CMetricAnomalyDetectorTest.h index 770e6d018e..32a25d452f 100644 --- a/lib/model/unittest/CMetricAnomalyDetectorTest.h +++ b/lib/model/unittest/CMetricAnomalyDetectorTest.h @@ -9,15 +9,13 @@ #include -class CMetricAnomalyDetectorTest : public CppUnit::TestFixture -{ - public: - void testAnomalies(); - void testPersist(); - void testExcludeFrequent(); +class CMetricAnomalyDetectorTest : public CppUnit::TestFixture { +public: + void testAnomalies(); + void testPersist(); + void testExcludeFrequent(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CMetricAnomalyDetectorTest_h - diff --git a/lib/model/unittest/CMetricDataGathererTest.cc b/lib/model/unittest/CMetricDataGathererTest.cc index c6ce05a2ba..0a03be2e56 100644 --- a/lib/model/unittest/CMetricDataGathererTest.cc +++ b/lib/model/unittest/CMetricDataGathererTest.cc @@ -8,11 +8,11 @@ #include #include -#include #include #include #include #include +#include #include #include @@ -30,8 +30,7 @@ using namespace ml; using namespace model; -namespace -{ +namespace { using TDoubleVec = std::vector; using TSizeVec = std::vector; using TSizeSizePr = std::pair; @@ -50,16 +49,11 @@ using TTimeDoublePrVec = std::vector; using TTimeDoublePrVecVec = std::vector; using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; -std::size_t addPerson(const std::string &p, - CDataGatherer &gatherer, - CResourceMonitor &resourceMonitor, - std::size_t numInfluencers = 0) -{ +std::size_t addPerson(const std::string& p, CDataGatherer& gatherer, CResourceMonitor& resourceMonitor, std::size_t numInfluencers = 0) { CDataGatherer::TStrCPtrVec person; person.push_back(&p); std::string i("i"); - for (std::size_t j = 0; j < numInfluencers; ++j) - { + for (std::size_t j = 0; j < numInfluencers; ++j) { person.push_back(&i); } person.resize(gatherer.fieldsOfInterest().size(), 0); @@ -68,12 +62,7 @@ std::size_t addPerson(const std::string &p, return *result.personId(); } -void addArrival(CDataGatherer &gatherer, - CResourceMonitor &resourceMonitor, - core_t::TTime time, - const std::string &person, - double value) -{ +void addArrival(CDataGatherer& gatherer, CResourceMonitor& resourceMonitor, core_t::TTime time, const std::string& person, double value) { CDataGatherer::TStrCPtrVec fieldValues; fieldValues.push_back(&person); std::string valueAsString(core::CStringUtils::typeToStringPrecise(value, core::CIEEE754::E_DoublePrecision)); @@ -85,14 +74,13 @@ void addArrival(CDataGatherer &gatherer, gatherer.addArrival(fieldValues, eventData, resourceMonitor); } -void addArrival(CDataGatherer &gatherer, - CResourceMonitor &resourceMonitor, +void addArrival(CDataGatherer& gatherer, + CResourceMonitor& resourceMonitor, core_t::TTime time, - const std::string &person, + const std::string& person, double lat, double lng, - const std::string &delimiter) -{ + const std::string& delimiter) { CDataGatherer::TStrCPtrVec fieldValues; fieldValues.push_back(&person); std::string latlngAsString; @@ -107,14 +95,13 @@ void addArrival(CDataGatherer &gatherer, gatherer.addArrival(fieldValues, eventData, resourceMonitor); } -void addArrival(CDataGatherer &gatherer, - CResourceMonitor &resourceMonitor, +void addArrival(CDataGatherer& gatherer, + CResourceMonitor& resourceMonitor, core_t::TTime time, - const std::string &person, + const std::string& person, double value, - const std::string &influencer1, - const std::string &influencer2) -{ + const std::string& influencer1, + const std::string& influencer2) { CDataGatherer::TStrCPtrVec fieldValues; fieldValues.push_back(&person); fieldValues.push_back(influencer1.empty() ? 0 : &influencer1); @@ -128,39 +115,33 @@ void addArrival(CDataGatherer &gatherer, gatherer.addArrival(fieldValues, eventData, resourceMonitor); } -double doubleToStringToDouble(double value) -{ +double doubleToStringToDouble(double value) { std::string valueAsString(core::CStringUtils::typeToStringPrecise(value, core::CIEEE754::E_DoublePrecision)); double result(0.0); core::CStringUtils::stringToType(valueAsString, result); return result; } -void addArrivals(CDataGatherer &gatherer, - CResourceMonitor &resourceMonitor, +void addArrivals(CDataGatherer& gatherer, + CResourceMonitor& resourceMonitor, core_t::TTime time, core_t::TTime increment, - const std::string &person, - const TDoubleVec &values) -{ - for (std::size_t i = 0; i < values.size(); ++i) - { + const std::string& person, + const TDoubleVec& values) { + for (std::size_t i = 0; i < values.size(); ++i) { addArrival(gatherer, resourceMonitor, time + (i * increment), person, values[i]); } } -double variance(const TDoubleVec &values, double &mean) -{ +double variance(const TDoubleVec& values, double& mean) { double total = 0.0; - for (std::size_t i = 0; i < values.size(); ++i) - { + for (std::size_t i = 0; i < values.size(); ++i) { total += values[i]; } mean = total / static_cast(values.size()); total = 0.0; - for (std::size_t i = 0; i < values.size(); ++i) - { + for (std::size_t i = 0; i < values.size(); ++i) { double x = values[i] - mean; total += (x * x); } @@ -170,11 +151,9 @@ double variance(const TDoubleVec &values, double &mean) const CSearchKey KEY; const std::string EMPTY_STRING; - } -void CMetricDataGathererTest::singleSeriesTests() -{ +void CMetricDataGathererTest::singleSeriesTests() { LOG_DEBUG("*** CMetricDataGathererTest::singleSeriesTests ***"); // Test that the various statistics come back as we suspect. @@ -182,36 +161,18 @@ void CMetricDataGathererTest::singleSeriesTests() const core_t::TTime startTime = 0; const core_t::TTime bucketLength = 600; - TTimeDoublePr bucket1[] = - { - TTimeDoublePr(1, 1.0), - TTimeDoublePr(15, 2.1), - TTimeDoublePr(180, 0.9), - TTimeDoublePr(190, 1.5), - TTimeDoublePr(400, 1.5), - TTimeDoublePr(550, 2.0) - }; - TTimeDoublePr bucket2[] = - { - TTimeDoublePr(600, 2.0), - TTimeDoublePr(799, 2.2), - TTimeDoublePr(1199, 1.8) - }; - TTimeDoublePr bucket3[] = - { - TTimeDoublePr(1200, 2.1), - TTimeDoublePr(1250, 2.5) - }; - TTimeDoublePr bucket4[] = - { - TTimeDoublePr(1900, 3.5), - }; - TTimeDoublePr bucket5[] = - { - TTimeDoublePr(2420, 3.5), - TTimeDoublePr(2480, 3.2), - TTimeDoublePr(2490, 3.8) - }; + TTimeDoublePr bucket1[] = {TTimeDoublePr(1, 1.0), + TTimeDoublePr(15, 2.1), + TTimeDoublePr(180, 0.9), + TTimeDoublePr(190, 1.5), + TTimeDoublePr(400, 1.5), + TTimeDoublePr(550, 2.0)}; + TTimeDoublePr bucket2[] = {TTimeDoublePr(600, 2.0), TTimeDoublePr(799, 2.2), TTimeDoublePr(1199, 1.8)}; + TTimeDoublePr bucket3[] = {TTimeDoublePr(1200, 2.1), TTimeDoublePr(1250, 2.5)}; + TTimeDoublePr bucket4[] = { + TTimeDoublePr(1900, 3.5), + }; + TTimeDoublePr bucket5[] = {TTimeDoublePr(2420, 3.5), TTimeDoublePr(2480, 3.2), TTimeDoublePr(2490, 3.8)}; { TFeatureVec features; features.push_back(model_t::E_IndividualMeanByPerson); @@ -220,15 +181,26 @@ void CMetricDataGathererTest::singleSeriesTests() features.push_back(model_t::E_IndividualSumByBucketAndPerson); features.push_back(model_t::E_IndividualCountByBucketAndPerson); SModelParams params(bucketLength); - CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, KEY, features, startTime, 2u); + CDataGatherer gatherer(model_t::E_Metric, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + KEY, + features, + startTime, + 2u); CPPUNIT_ASSERT(!gatherer.isPopulation()); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p", gatherer, m_ResourceMonitor)); CPPUNIT_ASSERT_EQUAL(std::size_t(4), gatherer.numberFeatures()); - for (std::size_t i = 0u; i < 4; ++i) - { + for (std::size_t i = 0u; i < 4; ++i) { CPPUNIT_ASSERT_EQUAL(features[i], gatherer.feature(i)); } @@ -256,8 +228,7 @@ void CMetricDataGathererTest::singleSeriesTests() CPPUNIT_ASSERT_EQUAL(true, featureData[3].second[0].second.s_IsInteger); } - for (size_t i = 1; i < boost::size(bucket1); ++i) - { + for (size_t i = 1; i < boost::size(bucket1); ++i) { addArrival(gatherer, m_ResourceMonitor, bucket1[i].first, "p", bucket1[i].second); } { @@ -280,8 +251,7 @@ void CMetricDataGathererTest::singleSeriesTests() core::CContainerPrinter::print(featureData[1].second[0].second.s_Samples)); CPPUNIT_ASSERT_EQUAL(std::string("[(8 [2.1] 1 2), (185 [1.5] 1 2), (475 [2] 1 2)]"), core::CContainerPrinter::print(featureData[2].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(0 [9] 1 6)]"), - core::CContainerPrinter::print(featureData[3].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0 [9] 1 6)]"), core::CContainerPrinter::print(featureData[3].second[0].second.s_Samples)); // Test persistence. (We check for idempotency.) std::string origXml; @@ -298,9 +268,19 @@ void CMetricDataGathererTest::singleSeriesTests() CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - CDataGatherer restoredGatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, KEY, traverser); + CDataGatherer restoredGatherer(model_t::E_Metric, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + KEY, + traverser); // The XML representation of the new filter should be the // same as the original @@ -314,8 +294,7 @@ void CMetricDataGathererTest::singleSeriesTests() } gatherer.timeNow(startTime + bucketLength); - for (size_t i = 0; i < boost::size(bucket2); ++i) - { + for (size_t i = 0; i < boost::size(bucket2); ++i) { addArrival(gatherer, m_ResourceMonitor, bucket2[i].first, "p", bucket2[i].second); } { @@ -330,12 +309,10 @@ void CMetricDataGathererTest::singleSeriesTests() CPPUNIT_ASSERT_EQUAL(true, featureData[3].second[0].second.s_IsInteger); CPPUNIT_ASSERT_EQUAL(std::string("[(700 [2.1] 1 2)]"), core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(700 [2] 1 2)]"), - core::CContainerPrinter::print(featureData[1].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(700 [2] 1 2)]"), core::CContainerPrinter::print(featureData[1].second[0].second.s_Samples)); CPPUNIT_ASSERT_EQUAL(std::string("[(700 [2.2] 1 2)]"), core::CContainerPrinter::print(featureData[2].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(600 [6] 1 3)]"), - core::CContainerPrinter::print(featureData[3].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(600 [6] 1 3)]"), core::CContainerPrinter::print(featureData[3].second[0].second.s_Samples)); // Test persistence. (We check for idempotency.) std::string origXml; @@ -352,9 +329,19 @@ void CMetricDataGathererTest::singleSeriesTests() CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - CDataGatherer restoredGatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, KEY, traverser); + CDataGatherer restoredGatherer(model_t::E_Metric, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + KEY, + traverser); // The XML representation of the new filter should be the // same as the original @@ -376,9 +363,21 @@ void CMetricDataGathererTest::singleSeriesTests() features.push_back(model_t::E_IndividualMaxByPerson); features.push_back(model_t::E_IndividualSumByBucketAndPerson); SModelParams params(bucketLength); - CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, KEY, features, startTime, 0); + CDataGatherer gatherer(model_t::E_Metric, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + KEY, + features, + startTime, + 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p", gatherer, m_ResourceMonitor)); TTimeDoublePrVecVec buckets; @@ -388,13 +387,11 @@ void CMetricDataGathererTest::singleSeriesTests() buckets.push_back(TTimeDoublePrVec(boost::begin(bucket4), boost::end(bucket4))); buckets.push_back(TTimeDoublePrVec(boost::begin(bucket5), boost::end(bucket5))); - for (std::size_t i = 0u; i < buckets.size(); ++i) - { + for (std::size_t i = 0u; i < buckets.size(); ++i) { LOG_DEBUG("Processing bucket " << i); gatherer.timeNow(startTime + i * bucketLength); - const TTimeDoublePrVec &bucket = buckets[i]; - for (std::size_t j = 0u; j < bucket.size(); ++j) - { + const TTimeDoublePrVec& bucket = buckets[i]; + for (std::size_t j = 0u; j < bucket.size(); ++j) { addArrival(gatherer, m_ResourceMonitor, bucket[j].first, "p", bucket[j].second); } } @@ -406,27 +403,22 @@ void CMetricDataGathererTest::singleSeriesTests() gatherer.featureData(featureBucketStart, bucketLength, featureData); CPPUNIT_ASSERT(!featureData.empty()); CPPUNIT_ASSERT_DOUBLES_EQUAL(3.5, featureData[0].second[0].second.s_BucketValue->value()[0], 1e-10); - CPPUNIT_ASSERT_EQUAL(3.2, featureData[1].second[0].second.s_BucketValue->value()[0]); - CPPUNIT_ASSERT_EQUAL(3.8, featureData[2].second[0].second.s_BucketValue->value()[0]); + CPPUNIT_ASSERT_EQUAL(3.2, featureData[1].second[0].second.s_BucketValue->value()[0]); + CPPUNIT_ASSERT_EQUAL(3.8, featureData[2].second[0].second.s_BucketValue->value()[0]); CPPUNIT_ASSERT_EQUAL(10.5, featureData[3].second[0].second.s_BucketValue->value()[0]); CPPUNIT_ASSERT_EQUAL(false, featureData[0].second[0].second.s_IsInteger); CPPUNIT_ASSERT_EQUAL(false, featureData[1].second[0].second.s_IsInteger); CPPUNIT_ASSERT_EQUAL(false, featureData[2].second[0].second.s_IsInteger); CPPUNIT_ASSERT_EQUAL(false, featureData[3].second[0].second.s_IsInteger); - CPPUNIT_ASSERT_EQUAL(std::string("[(2323 [3.5] 1 4)]"), - core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(2323 [3.2] 1 4)]"), - core::CContainerPrinter::print(featureData[1].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(2323 [3.8] 1 4)]"), - core::CContainerPrinter::print(featureData[2].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(2400 [10.5] 1 3)]"), - core::CContainerPrinter::print(featureData[3].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(2323 [3.5] 1 4)]"), core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(2323 [3.2] 1 4)]"), core::CContainerPrinter::print(featureData[1].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(2323 [3.8] 1 4)]"), core::CContainerPrinter::print(featureData[2].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(2400 [10.5] 1 3)]"), core::CContainerPrinter::print(featureData[3].second[0].second.s_Samples)); } } -void CMetricDataGathererTest::multipleSeriesTests() -{ +void CMetricDataGathererTest::multipleSeriesTests() { LOG_DEBUG("*** CMetricDataGathererTest::multipleSeriesTests ***"); // Test that the various statistics come back as we suspect @@ -441,42 +433,36 @@ void CMetricDataGathererTest::multipleSeriesTests() features.push_back(model_t::E_IndividualMaxByPerson); features.push_back(model_t::E_IndividualSumByBucketAndPerson); SModelParams params(bucketLength); - CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, KEY, features, startTime, 0); + CDataGatherer gatherer(model_t::E_Metric, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + KEY, + features, + startTime, + 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p1", gatherer, m_ResourceMonitor)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), addPerson("p2", gatherer, m_ResourceMonitor)); - TTimeDoublePr bucket11[] = - { - TTimeDoublePr(1, 1.0), - TTimeDoublePr(15, 2.1), - TTimeDoublePr(180, 0.9), - TTimeDoublePr(190, 1.5), - TTimeDoublePr(400, 1.5), - TTimeDoublePr(550, 2.0) - }; - TTimeDoublePr bucket12[] = - { - TTimeDoublePr(600, 2.0), - TTimeDoublePr(799, 2.2), - TTimeDoublePr(1199, 1.8) - }; - TTimeDoublePr bucket13[] = - { - TTimeDoublePr(1200, 2.1), - TTimeDoublePr(1250, 2.5) - }; - TTimeDoublePr bucket14[] = - { - TTimeDoublePr(1900, 3.5), - }; - TTimeDoublePr bucket15[] = - { - TTimeDoublePr(2420, 3.5), - TTimeDoublePr(2480, 3.2), - TTimeDoublePr(2490, 3.8) - }; + TTimeDoublePr bucket11[] = {TTimeDoublePr(1, 1.0), + TTimeDoublePr(15, 2.1), + TTimeDoublePr(180, 0.9), + TTimeDoublePr(190, 1.5), + TTimeDoublePr(400, 1.5), + TTimeDoublePr(550, 2.0)}; + TTimeDoublePr bucket12[] = {TTimeDoublePr(600, 2.0), TTimeDoublePr(799, 2.2), TTimeDoublePr(1199, 1.8)}; + TTimeDoublePr bucket13[] = {TTimeDoublePr(1200, 2.1), TTimeDoublePr(1250, 2.5)}; + TTimeDoublePr bucket14[] = { + TTimeDoublePr(1900, 3.5), + }; + TTimeDoublePr bucket15[] = {TTimeDoublePr(2420, 3.5), TTimeDoublePr(2480, 3.2), TTimeDoublePr(2490, 3.8)}; TTimeDoublePrVecVec buckets1; buckets1.push_back(TTimeDoublePrVec(boost::begin(bucket11), boost::end(bucket11))); buckets1.push_back(TTimeDoublePrVec(boost::begin(bucket12), boost::end(bucket12))); @@ -484,48 +470,29 @@ void CMetricDataGathererTest::multipleSeriesTests() buckets1.push_back(TTimeDoublePrVec(boost::begin(bucket14), boost::end(bucket14))); buckets1.push_back(TTimeDoublePrVec(boost::begin(bucket15), boost::end(bucket15))); - TTimeDoublePr bucket21[] = - { - TTimeDoublePr(1, 1.0), - TTimeDoublePr(5, 1.0), - TTimeDoublePr(15, 2.1), - TTimeDoublePr(25, 2.0), - TTimeDoublePr(180, 0.9), - TTimeDoublePr(190, 1.5), - TTimeDoublePr(400, 1.5), - TTimeDoublePr(550, 2.0) - }; - TTimeDoublePr bucket22[] = - { - TTimeDoublePr(600, 2.0), - TTimeDoublePr(605, 2.0), - TTimeDoublePr(609, 2.0), - TTimeDoublePr(799, 2.2), - TTimeDoublePr(1199, 1.8) - }; - TTimeDoublePr bucket23[] = - { - TTimeDoublePr(1200, 2.1), - TTimeDoublePr(1250, 2.5), - TTimeDoublePr(1255, 2.2), - TTimeDoublePr(1256, 2.4), - TTimeDoublePr(1300, 2.2), - TTimeDoublePr(1400, 2.5) - }; - TTimeDoublePr bucket24[] = - { - TTimeDoublePr(1900, 3.5), - TTimeDoublePr(1950, 3.5) - }; - TTimeDoublePr bucket25[] = - { - TTimeDoublePr(2420, 3.5), - TTimeDoublePr(2480, 2.9), - TTimeDoublePr(2490, 3.9), - TTimeDoublePr(2500, 3.4), - TTimeDoublePr(2550, 4.1), - TTimeDoublePr(2600, 3.8) - }; + TTimeDoublePr bucket21[] = {TTimeDoublePr(1, 1.0), + TTimeDoublePr(5, 1.0), + TTimeDoublePr(15, 2.1), + TTimeDoublePr(25, 2.0), + TTimeDoublePr(180, 0.9), + TTimeDoublePr(190, 1.5), + TTimeDoublePr(400, 1.5), + TTimeDoublePr(550, 2.0)}; + TTimeDoublePr bucket22[] = { + TTimeDoublePr(600, 2.0), TTimeDoublePr(605, 2.0), TTimeDoublePr(609, 2.0), TTimeDoublePr(799, 2.2), TTimeDoublePr(1199, 1.8)}; + TTimeDoublePr bucket23[] = {TTimeDoublePr(1200, 2.1), + TTimeDoublePr(1250, 2.5), + TTimeDoublePr(1255, 2.2), + TTimeDoublePr(1256, 2.4), + TTimeDoublePr(1300, 2.2), + TTimeDoublePr(1400, 2.5)}; + TTimeDoublePr bucket24[] = {TTimeDoublePr(1900, 3.5), TTimeDoublePr(1950, 3.5)}; + TTimeDoublePr bucket25[] = {TTimeDoublePr(2420, 3.5), + TTimeDoublePr(2480, 2.9), + TTimeDoublePr(2490, 3.9), + TTimeDoublePr(2500, 3.4), + TTimeDoublePr(2550, 4.1), + TTimeDoublePr(2600, 3.8)}; TTimeDoublePrVecVec buckets2; buckets2.push_back(TTimeDoublePrVec(boost::begin(bucket21), boost::end(bucket21))); buckets2.push_back(TTimeDoublePrVec(boost::begin(bucket22), boost::end(bucket22))); @@ -533,21 +500,18 @@ void CMetricDataGathererTest::multipleSeriesTests() buckets2.push_back(TTimeDoublePrVec(boost::begin(bucket24), boost::end(bucket24))); buckets2.push_back(TTimeDoublePrVec(boost::begin(bucket25), boost::end(bucket25))); - for (std::size_t i = 0u; i < 5; ++i) - { + for (std::size_t i = 0u; i < 5; ++i) { LOG_DEBUG("Processing bucket " << i); gatherer.timeNow(startTime + i * bucketLength); - const TTimeDoublePrVec &bucket1 = buckets1[i]; - for (std::size_t j = 0u; j < bucket1.size(); ++j) - { + const TTimeDoublePrVec& bucket1 = buckets1[i]; + for (std::size_t j = 0u; j < bucket1.size(); ++j) { addArrival(gatherer, m_ResourceMonitor, bucket1[j].first, "p1", bucket1[j].second); } - const TTimeDoublePrVec &bucket2 = buckets2[i]; + const TTimeDoublePrVec& bucket2 = buckets2[i]; TMeanAccumulator a; - for (std::size_t j = 0u; j < bucket2.size(); ++j) - { + for (std::size_t j = 0u; j < bucket2.size(); ++j) { addArrival(gatherer, m_ResourceMonitor, bucket2[j].first, "p2", bucket2[j].second); a.add(bucket2[j].second); } @@ -558,8 +522,7 @@ void CMetricDataGathererTest::multipleSeriesTests() TSizeUInt64PrVec nonZeroCounts; gatherer.personNonZeroCounts(startTime + 4 * bucketLength, nonZeroCounts); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 3), (1, 6)]"), - core::CContainerPrinter::print(nonZeroCounts)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 3), (1, 6)]"), core::CContainerPrinter::print(nonZeroCounts)); TFeatureSizeFeatureDataPrVecPrVec featureData; core_t::TTime featureBucketStart = core_t::TTime(startTime + 4 * bucketLength); @@ -573,40 +536,32 @@ void CMetricDataGathererTest::multipleSeriesTests() CPPUNIT_ASSERT_EQUAL(std::size_t(2), featureData[3].second.size()); CPPUNIT_ASSERT_DOUBLES_EQUAL(3.5, featureData[0].second[0].second.s_BucketValue->value()[0], 1e-10); - CPPUNIT_ASSERT_EQUAL(3.2, featureData[1].second[0].second.s_BucketValue->value()[0]); - CPPUNIT_ASSERT_EQUAL(3.8, featureData[2].second[0].second.s_BucketValue->value()[0]); + CPPUNIT_ASSERT_EQUAL(3.2, featureData[1].second[0].second.s_BucketValue->value()[0]); + CPPUNIT_ASSERT_EQUAL(3.8, featureData[2].second[0].second.s_BucketValue->value()[0]); CPPUNIT_ASSERT_EQUAL(10.5, featureData[3].second[0].second.s_BucketValue->value()[0]); CPPUNIT_ASSERT_EQUAL(false, featureData[0].second[0].second.s_IsInteger); CPPUNIT_ASSERT_EQUAL(false, featureData[1].second[0].second.s_IsInteger); CPPUNIT_ASSERT_EQUAL(false, featureData[2].second[0].second.s_IsInteger); CPPUNIT_ASSERT_EQUAL(false, featureData[3].second[0].second.s_IsInteger); - CPPUNIT_ASSERT_EQUAL(std::string("[(2323 [3.5] 1 4)]"), - core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(2323 [3.2] 1 4)]"), - core::CContainerPrinter::print(featureData[1].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(2323 [3.8] 1 4)]"), - core::CContainerPrinter::print(featureData[2].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(2400 [10.5] 1 3)]"), - core::CContainerPrinter::print(featureData[3].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(2323 [3.5] 1 4)]"), core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(2323 [3.2] 1 4)]"), core::CContainerPrinter::print(featureData[1].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(2323 [3.8] 1 4)]"), core::CContainerPrinter::print(featureData[2].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(2400 [10.5] 1 3)]"), core::CContainerPrinter::print(featureData[3].second[0].second.s_Samples)); CPPUNIT_ASSERT_DOUBLES_EQUAL(3.6, featureData[0].second[1].second.s_BucketValue->value()[0], 1e-10); - CPPUNIT_ASSERT_EQUAL(2.9, featureData[1].second[1].second.s_BucketValue->value()[0]); - CPPUNIT_ASSERT_EQUAL(4.1, featureData[2].second[1].second.s_BucketValue->value()[0]); + CPPUNIT_ASSERT_EQUAL(2.9, featureData[1].second[1].second.s_BucketValue->value()[0]); + CPPUNIT_ASSERT_EQUAL(4.1, featureData[2].second[1].second.s_BucketValue->value()[0]); CPPUNIT_ASSERT_EQUAL(21.6, featureData[3].second[1].second.s_BucketValue->value()[0]); CPPUNIT_ASSERT_EQUAL(false, featureData[0].second[1].second.s_IsInteger); CPPUNIT_ASSERT_EQUAL(false, featureData[1].second[1].second.s_IsInteger); CPPUNIT_ASSERT_EQUAL(false, featureData[2].second[1].second.s_IsInteger); CPPUNIT_ASSERT_EQUAL(false, featureData[3].second[1].second.s_IsInteger); - CPPUNIT_ASSERT_EQUAL(std::string("[(2290 [3.45] 1 6)]"), - core::CContainerPrinter::print(featureData[0].second[1].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(2290 [2.9] 1 6)]"), - core::CContainerPrinter::print(featureData[1].second[1].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(2290 [3.9] 1 6)]"), - core::CContainerPrinter::print(featureData[2].second[1].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(2400 [21.6] 1 6)]"), - core::CContainerPrinter::print(featureData[3].second[1].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(2290 [3.45] 1 6)]"), core::CContainerPrinter::print(featureData[0].second[1].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(2290 [2.9] 1 6)]"), core::CContainerPrinter::print(featureData[1].second[1].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(2290 [3.9] 1 6)]"), core::CContainerPrinter::print(featureData[2].second[1].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(2400 [21.6] 1 6)]"), core::CContainerPrinter::print(featureData[3].second[1].second.s_Samples)); // Test persistence. (We check for idempotency.) std::string origXml; @@ -623,9 +578,19 @@ void CMetricDataGathererTest::multipleSeriesTests() CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - CDataGatherer restoredGatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, KEY, traverser); + CDataGatherer restoredGatherer(model_t::E_Metric, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + KEY, + traverser); // The XML representation of the new filter should be the // same as the original @@ -655,8 +620,7 @@ void CMetricDataGathererTest::multipleSeriesTests() CPPUNIT_ASSERT_EQUAL(std::size_t(0), gatherer.numberOverFieldValues()); gatherer.personNonZeroCounts(startTime + 4 * bucketLength, nonZeroCounts); - CPPUNIT_ASSERT_EQUAL(std::string("[(1, 6)]"), - core::CContainerPrinter::print(nonZeroCounts)); + CPPUNIT_ASSERT_EQUAL(std::string("[(1, 6)]"), core::CContainerPrinter::print(nonZeroCounts)); CPPUNIT_ASSERT_DOUBLES_EQUAL(6.0, gatherer.effectiveSampleCount(1), 1e-10); @@ -668,26 +632,21 @@ void CMetricDataGathererTest::multipleSeriesTests() CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData[2].second.size()); CPPUNIT_ASSERT_DOUBLES_EQUAL(3.6, featureData[0].second[0].second.s_BucketValue->value()[0], 1e-10); - CPPUNIT_ASSERT_EQUAL(2.9, featureData[1].second[0].second.s_BucketValue->value()[0]); - CPPUNIT_ASSERT_EQUAL(4.1, featureData[2].second[0].second.s_BucketValue->value()[0]); + CPPUNIT_ASSERT_EQUAL(2.9, featureData[1].second[0].second.s_BucketValue->value()[0]); + CPPUNIT_ASSERT_EQUAL(4.1, featureData[2].second[0].second.s_BucketValue->value()[0]); CPPUNIT_ASSERT_EQUAL(21.6, featureData[3].second[0].second.s_BucketValue->value()[0]); CPPUNIT_ASSERT_EQUAL(false, featureData[0].second[0].second.s_IsInteger); CPPUNIT_ASSERT_EQUAL(false, featureData[1].second[0].second.s_IsInteger); CPPUNIT_ASSERT_EQUAL(false, featureData[2].second[0].second.s_IsInteger); CPPUNIT_ASSERT_EQUAL(false, featureData[3].second[0].second.s_IsInteger); - CPPUNIT_ASSERT_EQUAL(std::string("[(2290 [3.45] 1 6)]"), - core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(2290 [2.9] 1 6)]"), - core::CContainerPrinter::print(featureData[1].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(2290 [3.9] 1 6)]"), - core::CContainerPrinter::print(featureData[2].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(2400 [21.6] 1 6)]"), - core::CContainerPrinter::print(featureData[3].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(2290 [3.45] 1 6)]"), core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(2290 [2.9] 1 6)]"), core::CContainerPrinter::print(featureData[1].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(2290 [3.9] 1 6)]"), core::CContainerPrinter::print(featureData[2].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(2400 [21.6] 1 6)]"), core::CContainerPrinter::print(featureData[3].second[0].second.s_Samples)); } -void CMetricDataGathererTest::testSampleCount() -{ +void CMetricDataGathererTest::testSampleCount() { LOG_DEBUG("*** CMetricDataGathererTest::testSampleCount ***"); // Test that we set sensible sample counts for each person. @@ -704,17 +663,28 @@ void CMetricDataGathererTest::testSampleCount() features.push_back(model_t::E_IndividualMinByPerson); features.push_back(model_t::E_IndividualMaxByPerson); SModelParams params(bucketLength); - CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, KEY, features, startTime, 0); + CDataGatherer gatherer(model_t::E_Metric, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + KEY, + features, + startTime, + 0); std::size_t pid1 = addPerson("p1", gatherer, m_ResourceMonitor); std::size_t pid2 = addPerson("p2", gatherer, m_ResourceMonitor); test::CRandomNumbers rng; - for (std::size_t i = 0u; i < numberBuckets; ++i) - { + for (std::size_t i = 0u; i < numberBuckets; ++i) { LOG_DEBUG("Processing bucket " << i); gatherer.timeNow(startTime + i * bucketLength); @@ -731,8 +701,7 @@ void CMetricDataGathererTest::testSampleCount() TDoubleVec count; rng.generateUniformSamples(1.0, 5.0, 1, count); LOG_DEBUG("count p2 = " << std::floor(count[0])); - for (std::size_t j = 0u; j < static_cast(count[0]); ++j) - { + for (std::size_t j = 0u; j < static_cast(count[0]); ++j) { addArrival(gatherer, m_ResourceMonitor, startTime + i * bucketLength + 100 * (j + 1), "p2", 1.0); } } @@ -744,8 +713,7 @@ void CMetricDataGathererTest::testSampleCount() CPPUNIT_ASSERT_DOUBLES_EQUAL(6.0, gatherer.effectiveSampleCount(pid1), 1e-5); CPPUNIT_ASSERT_DOUBLES_EQUAL(2.0, gatherer.effectiveSampleCount(pid2), 1.0 + 1e-5); - for (std::size_t i = numberBuckets; i < 100; ++i) - { + for (std::size_t i = numberBuckets; i < 100; ++i) { LOG_DEBUG("Processing bucket " << i); gatherer.timeNow(startTime + i * bucketLength); addArrival(gatherer, m_ResourceMonitor, startTime + i * bucketLength + 10, "p1", 1.0); @@ -754,8 +722,7 @@ void CMetricDataGathererTest::testSampleCount() CPPUNIT_ASSERT_DOUBLES_EQUAL(2.0, gatherer.effectiveSampleCount(pid1), 0.5); } -void CMetricDataGathererTest::testRemovePeople() -{ +void CMetricDataGathererTest::testRemovePeople() { LOG_DEBUG("*** CMetricDataGathererTest::testRemovePeople ***"); // Test various combinations of removed people. @@ -769,9 +736,21 @@ void CMetricDataGathererTest::testRemovePeople() features.push_back(model_t::E_IndividualMaxByPerson); features.push_back(model_t::E_IndividualSumByBucketAndPerson); SModelParams params(bucketLength); - CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, KEY, features, startTime, 0); + CDataGatherer gatherer(model_t::E_Metric, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + KEY, + features, + startTime, + 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p1", gatherer, m_ResourceMonitor)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), addPerson("p2", gatherer, m_ResourceMonitor)); CPPUNIT_ASSERT_EQUAL(std::size_t(2), addPerson("p3", gatherer, m_ResourceMonitor)); @@ -781,34 +760,29 @@ void CMetricDataGathererTest::testRemovePeople() CPPUNIT_ASSERT_EQUAL(std::size_t(6), addPerson("p7", gatherer, m_ResourceMonitor)); CPPUNIT_ASSERT_EQUAL(std::size_t(7), addPerson("p8", gatherer, m_ResourceMonitor)); - core_t::TTime times[][8] = - { - { 0, 0, 0, 0, 0, 0, 0, 0 }, - { 10, 20, 100, 0, 0, 0, 0, 0 }, - { 110, 120, 150, 170, 200, 0, 0, 0 }, - { 210, 220, 0, 0, 0, 0, 0, 0 }, - { 0, 0, 0, 0, 0, 0, 0, 0 }, - { 400, 410, 480, 510, 530, 0, 0, 0 }, - { 1040, 1100, 1080, 1200, 1300, 1311, 2100, 0 }, - { 2200, 2500, 2600, 2610, 2702, 2731, 2710, 2862 }, - }; - double values[][8] = - { - { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }, - { 1.0, 2.0, 1.1, 0.0, 0.0, 0.0, 0.0, 0.0 }, - { 2.0, 5.0, 6.0, 1.0, 0.2, 0.0, 0.0, 0.0 }, - { 2.1, 2.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }, - { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }, - { 4.0, 1.0, 8.0, 1.0, 0.3, 0.0, 0.0, 0.0 }, - { 4.0, 1.0, 8.0, 1.0, 0.3, 1.1, 10.3, 0.0 }, - { 2.0, 5.0, 6.0, 1.0, 0.2, 3.1, 7.1, 6.2 }, - }; - for (std::size_t i = 0u; i < boost::size(values); ++i) - { - for (std::size_t j = 0u; j < boost::size(values[i]); ++j) - { - if (values[i][j] > 0.0) - { + core_t::TTime times[][8] = { + {0, 0, 0, 0, 0, 0, 0, 0}, + {10, 20, 100, 0, 0, 0, 0, 0}, + {110, 120, 150, 170, 200, 0, 0, 0}, + {210, 220, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {400, 410, 480, 510, 530, 0, 0, 0}, + {1040, 1100, 1080, 1200, 1300, 1311, 2100, 0}, + {2200, 2500, 2600, 2610, 2702, 2731, 2710, 2862}, + }; + double values[][8] = { + {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}, + {1.0, 2.0, 1.1, 0.0, 0.0, 0.0, 0.0, 0.0}, + {2.0, 5.0, 6.0, 1.0, 0.2, 0.0, 0.0, 0.0}, + {2.1, 2.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}, + {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}, + {4.0, 1.0, 8.0, 1.0, 0.3, 0.0, 0.0, 0.0}, + {4.0, 1.0, 8.0, 1.0, 0.3, 1.1, 10.3, 0.0}, + {2.0, 5.0, 6.0, 1.0, 0.2, 3.1, 7.1, 6.2}, + }; + for (std::size_t i = 0u; i < boost::size(values); ++i) { + for (std::size_t j = 0u; j < boost::size(values[i]); ++j) { + if (values[i][j] > 0.0) { addArrival(gatherer, m_ResourceMonitor, startTime + times[i][j], gatherer.personName(i), values[i][j]); } } @@ -820,9 +794,21 @@ void CMetricDataGathererTest::testRemovePeople() peopleToRemove.push_back(1); gatherer.recyclePeople(peopleToRemove); - CDataGatherer expectedGatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, KEY, features, startTime, 0); + CDataGatherer expectedGatherer(model_t::E_Metric, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + KEY, + features, + startTime, + 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p3", expectedGatherer, m_ResourceMonitor)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), addPerson("p4", expectedGatherer, m_ResourceMonitor)); CPPUNIT_ASSERT_EQUAL(std::size_t(2), addPerson("p5", expectedGatherer, m_ResourceMonitor)); @@ -830,13 +816,10 @@ void CMetricDataGathererTest::testRemovePeople() CPPUNIT_ASSERT_EQUAL(std::size_t(4), addPerson("p7", expectedGatherer, m_ResourceMonitor)); CPPUNIT_ASSERT_EQUAL(std::size_t(5), addPerson("p8", expectedGatherer, m_ResourceMonitor)); - std::size_t people[] = { 2, 3, 4, 5, 6, 7 }; - for (std::size_t i = 0u; i < boost::size(people); ++i) - { - for (std::size_t j = 0u; j < boost::size(values[people[i]]); ++j) - { - if (values[people[i]][j] > 0.0) - { + std::size_t people[] = {2, 3, 4, 5, 6, 7}; + for (std::size_t i = 0u; i < boost::size(people); ++i) { + for (std::size_t j = 0u; j < boost::size(values[people[i]]); ++j) { + if (values[people[i]][j] > 0.0) { addArrival(expectedGatherer, m_ResourceMonitor, startTime + times[people[i]][j], @@ -857,20 +840,29 @@ void CMetricDataGathererTest::testRemovePeople() peopleToRemove.push_back(7); gatherer.recyclePeople(peopleToRemove); - CDataGatherer expectedGatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, KEY, features, startTime, 0); + CDataGatherer expectedGatherer(model_t::E_Metric, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + KEY, + features, + startTime, + 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p3", expectedGatherer, m_ResourceMonitor)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), addPerson("p6", expectedGatherer, m_ResourceMonitor)); CPPUNIT_ASSERT_EQUAL(std::size_t(2), addPerson("p7", expectedGatherer, m_ResourceMonitor)); - std::size_t people[] = { 2, 5, 6 }; - for (std::size_t i = 0u; i < boost::size(people); ++i) - { - for (std::size_t j = 0u; j < boost::size(values[people[i]]); ++j) - { - if (values[people[i]][j] > 0.0) - { + std::size_t people[] = {2, 5, 6}; + for (std::size_t i = 0u; i < boost::size(people); ++i) { + for (std::size_t j = 0u; j < boost::size(values[people[i]]); ++j) { + if (values[people[i]][j] > 0.0) { addArrival(expectedGatherer, m_ResourceMonitor, startTime + times[people[i]][j], @@ -891,9 +883,21 @@ void CMetricDataGathererTest::testRemovePeople() peopleToRemove.push_back(6); gatherer.recyclePeople(peopleToRemove); - CDataGatherer expectedGatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, KEY, features, startTime, 0); + CDataGatherer expectedGatherer(model_t::E_Metric, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + KEY, + features, + startTime, + 0); LOG_DEBUG("checksum = " << gatherer.checksum()); LOG_DEBUG("expected checksum = " << expectedGatherer.checksum()); @@ -906,21 +910,16 @@ void CMetricDataGathererTest::testRemovePeople() LOG_DEBUG("recycled = " << core::CContainerPrinter::print(gatherer.recycledPersonIds())); LOG_DEBUG("expected recycled = " << core::CContainerPrinter::print(expectedRecycled)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedRecycled), - core::CContainerPrinter::print(gatherer.recycledPersonIds())); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedRecycled), core::CContainerPrinter::print(gatherer.recycledPersonIds())); } -void CMetricDataGathererTest::testSum() -{ +void CMetricDataGathererTest::testSum() { LOG_DEBUG("*** CMetricDataGathererTest::testSum ***"); // Test sum and non-zero sum work as expected. const core_t::TTime bucketLength = 600; - const std::size_t bucketCounts[] = - { - 2, 5, 2, 1, 0, 0, 4, 8, 0, 1 - }; + const std::size_t bucketCounts[] = {2, 5, 2, 1, 0, 0, 4, 8, 0, 1}; const core_t::TTime startTime = 0; test::CRandomNumbers rng; @@ -928,22 +927,45 @@ void CMetricDataGathererTest::testSum() TFeatureVec sumFeatures; sumFeatures.push_back(model_t::E_IndividualSumByBucketAndPerson); SModelParams params(bucketLength); - CDataGatherer sum(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, KEY, sumFeatures, startTime, 0); + CDataGatherer sum(model_t::E_Metric, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + KEY, + sumFeatures, + startTime, + 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p1", sum, m_ResourceMonitor)); TFeatureVec nonZeroSumFeatures; nonZeroSumFeatures.push_back(model_t::E_IndividualNonNullSumByBucketAndPerson); - CDataGatherer nonZeroSum(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, KEY, nonZeroSumFeatures, startTime, 0); + CDataGatherer nonZeroSum(model_t::E_Metric, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + KEY, + nonZeroSumFeatures, + startTime, + 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p1", nonZeroSum, m_ResourceMonitor)); core_t::TTime bucketStart = startTime; - for (std::size_t i = 0u; i < boost::size(bucketCounts); ++i) - { + for (std::size_t i = 0u; i < boost::size(bucketCounts); ++i) { std::size_t count = bucketCounts[i]; TDoubleVec times; @@ -954,8 +976,7 @@ void CMetricDataGathererTest::testSum() rng.generateNormalSamples(5.0, 4.0, count, values); double expected = 0.0; - for (std::size_t j = 0u; j < times.size(); ++j) - { + for (std::size_t j = 0u; j < times.size(); ++j) { addArrival(sum, m_ResourceMonitor, bucketStart + static_cast(times[j]), "p1", values[j]); addArrival(nonZeroSum, m_ResourceMonitor, bucketStart + static_cast(times[j]), "p1", values[j]); expected += doubleToStringToDouble(values[j]); @@ -966,9 +987,8 @@ void CMetricDataGathererTest::testSum() TFeatureSizeFeatureDataPrVecPrVec data; sum.featureData(bucketStart, bucketLength, data); CPPUNIT_ASSERT_EQUAL(std::size_t(1), data.size()); - for (std::size_t j = 0u; j < data.size(); ++j) - { - const TSizeFeatureDataPrVec &featureData = data[j].second; + for (std::size_t j = 0u; j < data.size(); ++j) { + const TSizeFeatureDataPrVec& featureData = data[j].second; CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); CPPUNIT_ASSERT_EQUAL(expected, featureData[j].second.s_BucketValue->value()[0]); CPPUNIT_ASSERT_EQUAL(std::size_t(1), boost::unwrap_ref(featureData[j].second.s_Samples).size()); @@ -979,15 +999,11 @@ void CMetricDataGathererTest::testSum() TFeatureSizeFeatureDataPrVecPrVec data; nonZeroSum.featureData(bucketStart, bucketLength, data); CPPUNIT_ASSERT_EQUAL(std::size_t(1), data.size()); - for (std::size_t j = 0u; j < data.size(); ++j) - { - const TSizeFeatureDataPrVec &featureData = data[j].second; - if (count == 0) - { + for (std::size_t j = 0u; j < data.size(); ++j) { + const TSizeFeatureDataPrVec& featureData = data[j].second; + if (count == 0) { CPPUNIT_ASSERT_EQUAL(std::size_t(0), featureData.size()); - } - else - { + } else { CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); CPPUNIT_ASSERT_EQUAL(expected, featureData[j].second.s_BucketValue->value()[0]); CPPUNIT_ASSERT_EQUAL(std::size_t(1), boost::unwrap_ref(featureData[j].second.s_Samples).size()); @@ -1002,8 +1018,7 @@ void CMetricDataGathererTest::testSum() } } -void CMetricDataGathererTest::singleSeriesOutOfOrderTests() -{ +void CMetricDataGathererTest::singleSeriesOutOfOrderTests() { LOG_DEBUG("*** CMetricDataGathererTest::singleSeriesOutOfOrderTests ***"); // Test that the various statistics come back as we suspect. @@ -1015,21 +1030,9 @@ void CMetricDataGathererTest::singleSeriesOutOfOrderTests() params.s_SampleCountFactor = 1; params.s_SampleQueueGrowthFactor = 0.1; - TTimeDoublePr bucket1[] = - { - TTimeDoublePr(1, 1.0), - TTimeDoublePr(15, 2.1), - TTimeDoublePr(180, 0.9), - TTimeDoublePr(400, 1.5), - TTimeDoublePr(550, 2.0) - }; - TTimeDoublePr bucket2[] = - { - TTimeDoublePr(600, 2.0), - TTimeDoublePr(190, 1.5), - TTimeDoublePr(799, 2.2), - TTimeDoublePr(1199, 1.8) - }; + TTimeDoublePr bucket1[] = { + TTimeDoublePr(1, 1.0), TTimeDoublePr(15, 2.1), TTimeDoublePr(180, 0.9), TTimeDoublePr(400, 1.5), TTimeDoublePr(550, 2.0)}; + TTimeDoublePr bucket2[] = {TTimeDoublePr(600, 2.0), TTimeDoublePr(190, 1.5), TTimeDoublePr(799, 2.2), TTimeDoublePr(1199, 1.8)}; { TFeatureVec features; @@ -1038,15 +1041,26 @@ void CMetricDataGathererTest::singleSeriesOutOfOrderTests() features.push_back(model_t::E_IndividualMaxByPerson); features.push_back(model_t::E_IndividualSumByBucketAndPerson); features.push_back(model_t::E_IndividualCountByBucketAndPerson); - CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, KEY, features, startTime, 2u); + CDataGatherer gatherer(model_t::E_Metric, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + KEY, + features, + startTime, + 2u); CPPUNIT_ASSERT(!gatherer.isPopulation()); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p", gatherer, m_ResourceMonitor)); CPPUNIT_ASSERT_EQUAL(std::size_t(4), gatherer.numberFeatures()); - for (std::size_t i = 0u; i < 4; ++i) - { + for (std::size_t i = 0u; i < 4; ++i) { CPPUNIT_ASSERT_EQUAL(features[i], gatherer.feature(i)); } @@ -1073,8 +1087,7 @@ void CMetricDataGathererTest::singleSeriesOutOfOrderTests() CPPUNIT_ASSERT_EQUAL(true, featureData[3].second[0].second.s_IsInteger); } - for (size_t i = 1; i < boost::size(bucket1); ++i) - { + for (size_t i = 1; i < boost::size(bucket1); ++i) { addArrival(gatherer, m_ResourceMonitor, bucket1[i].first, "p", bucket1[i].second); } { @@ -1090,14 +1103,10 @@ void CMetricDataGathererTest::singleSeriesOutOfOrderTests() CPPUNIT_ASSERT_EQUAL(false, featureData[1].second[0].second.s_IsInteger); CPPUNIT_ASSERT_EQUAL(false, featureData[2].second[0].second.s_IsInteger); CPPUNIT_ASSERT_EQUAL(false, featureData[3].second[0].second.s_IsInteger); - CPPUNIT_ASSERT_EQUAL(std::string("[]"), - core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[]"), - core::CContainerPrinter::print(featureData[1].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[]"), - core::CContainerPrinter::print(featureData[2].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(0 [7.5] 1 5)]"), - core::CContainerPrinter::print(featureData[3].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[]"), core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[]"), core::CContainerPrinter::print(featureData[1].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[]"), core::CContainerPrinter::print(featureData[2].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0 [7.5] 1 5)]"), core::CContainerPrinter::print(featureData[3].second[0].second.s_Samples)); // Test persistence. (We check for idempotency.) std::string origXml; @@ -1114,9 +1123,19 @@ void CMetricDataGathererTest::singleSeriesOutOfOrderTests() CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - CDataGatherer restoredGatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, KEY, traverser); + CDataGatherer restoredGatherer(model_t::E_Metric, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + KEY, + traverser); // The XML representation of the new filter should be the // same as the original @@ -1130,8 +1149,7 @@ void CMetricDataGathererTest::singleSeriesOutOfOrderTests() } gatherer.timeNow(startTime + bucketLength); - for (size_t i = 0; i < boost::size(bucket2); ++i) - { + for (size_t i = 0; i < boost::size(bucket2); ++i) { addArrival(gatherer, m_ResourceMonitor, bucket2[i].first, "p", bucket2[i].second); } { @@ -1150,8 +1168,7 @@ void CMetricDataGathererTest::singleSeriesOutOfOrderTests() core::CContainerPrinter::print(featureData[1].second[0].second.s_Samples)); CPPUNIT_ASSERT_EQUAL(std::string("[(8 [2.1] 1 2), (257 [1.5] 1 3)]"), core::CContainerPrinter::print(featureData[2].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(0 [9] 1 6)]"), - core::CContainerPrinter::print(featureData[3].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0 [9] 1 6)]"), core::CContainerPrinter::print(featureData[3].second[0].second.s_Samples)); // Test persistence. (We check for idempotency.) std::string origXml; @@ -1168,9 +1185,19 @@ void CMetricDataGathererTest::singleSeriesOutOfOrderTests() CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - CDataGatherer restoredGatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, KEY, traverser); + CDataGatherer restoredGatherer(model_t::E_Metric, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + KEY, + traverser); // The XML representation of the new filter should be the // same as the original @@ -1185,8 +1212,7 @@ void CMetricDataGathererTest::singleSeriesOutOfOrderTests() } } -void CMetricDataGathererTest::testResetBucketGivenSingleSeries() -{ +void CMetricDataGathererTest::testResetBucketGivenSingleSeries() { LOG_DEBUG("*** CMetricDataGathererTest::testResetBucketGivenSingleSeries ***"); const core_t::TTime startTime = 0; @@ -1196,32 +1222,41 @@ void CMetricDataGathererTest::testResetBucketGivenSingleSeries() params.s_SampleCountFactor = 1; params.s_SampleQueueGrowthFactor = 0.1; - TTimeDoublePr data[] = - { - TTimeDoublePr(1, 1.0), // Bucket 1 - TTimeDoublePr(550, 2.0), - TTimeDoublePr(600, 3.0), // Bucket 2 - TTimeDoublePr(700, 4.0), - TTimeDoublePr(1000, 5.0), - TTimeDoublePr(1200, 6.0) // Bucket 3 - }; + TTimeDoublePr data[] = { + TTimeDoublePr(1, 1.0), // Bucket 1 + TTimeDoublePr(550, 2.0), + TTimeDoublePr(600, 3.0), // Bucket 2 + TTimeDoublePr(700, 4.0), + TTimeDoublePr(1000, 5.0), + TTimeDoublePr(1200, 6.0) // Bucket 3 + }; TFeatureVec features; features.push_back(model_t::E_IndividualMeanByPerson); features.push_back(model_t::E_IndividualMinByPerson); features.push_back(model_t::E_IndividualMaxByPerson); features.push_back(model_t::E_IndividualSumByBucketAndPerson); - CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, KEY, features, startTime, 2u); + CDataGatherer gatherer(model_t::E_Metric, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + KEY, + features, + startTime, + 2u); addPerson("p", gatherer, m_ResourceMonitor); - for (std::size_t i = 0; i < boost::size(data); ++i) - { + for (std::size_t i = 0; i < boost::size(data); ++i) { addArrival(gatherer, m_ResourceMonitor, data[i].first, "p", data[i].second); } - TFeatureSizeFeatureDataPrVecPrVec featureData; TSizeSizePr pidCidPr(0, 0); @@ -1274,30 +1309,21 @@ void CMetricDataGathererTest::testResetBucketGivenSingleSeries() gatherer.sampleNow(0); gatherer.featureData(0, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(276 [1.5] 1 2)]"), - core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(276 [1] 1 2)]"), - core::CContainerPrinter::print(featureData[1].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(276 [2] 1 2)]"), - core::CContainerPrinter::print(featureData[2].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(0 [3] 1 2)]"), - core::CContainerPrinter::print(featureData[3].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(276 [1.5] 1 2)]"), core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(276 [1] 1 2)]"), core::CContainerPrinter::print(featureData[1].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(276 [2] 1 2)]"), core::CContainerPrinter::print(featureData[2].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0 [3] 1 2)]"), core::CContainerPrinter::print(featureData[3].second[0].second.s_Samples)); gatherer.sampleNow(600); gatherer.featureData(600, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(615 [2.5] 1 2)]"), - core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(615 [2] 1 2)]"), - core::CContainerPrinter::print(featureData[1].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(615 [3] 1 2)]"), - core::CContainerPrinter::print(featureData[2].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(600 [5] 1 2)]"), - core::CContainerPrinter::print(featureData[3].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(615 [2.5] 1 2)]"), core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(615 [2] 1 2)]"), core::CContainerPrinter::print(featureData[1].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(615 [3] 1 2)]"), core::CContainerPrinter::print(featureData[2].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(600 [5] 1 2)]"), core::CContainerPrinter::print(featureData[3].second[0].second.s_Samples)); } -void CMetricDataGathererTest::testResetBucketGivenMultipleSeries() -{ +void CMetricDataGathererTest::testResetBucketGivenMultipleSeries() { LOG_DEBUG("*** CMetricDataGathererTest::testResetBucketGivenMultipleSeries ***"); const core_t::TTime startTime = 0; @@ -1307,32 +1333,41 @@ void CMetricDataGathererTest::testResetBucketGivenMultipleSeries() params.s_SampleCountFactor = 1; params.s_SampleQueueGrowthFactor = 0.1; - TTimeDoublePr data[] = - { - TTimeDoublePr(1, 1.0), // Bucket 1 - TTimeDoublePr(550, 2.0), - TTimeDoublePr(600, 3.0), // Bucket 2 - TTimeDoublePr(700, 4.0), - TTimeDoublePr(1000, 5.0), - TTimeDoublePr(1200, 6.0) // Bucket 3 - }; + TTimeDoublePr data[] = { + TTimeDoublePr(1, 1.0), // Bucket 1 + TTimeDoublePr(550, 2.0), + TTimeDoublePr(600, 3.0), // Bucket 2 + TTimeDoublePr(700, 4.0), + TTimeDoublePr(1000, 5.0), + TTimeDoublePr(1200, 6.0) // Bucket 3 + }; TFeatureVec features; features.push_back(model_t::E_IndividualMeanByPerson); features.push_back(model_t::E_IndividualMinByPerson); features.push_back(model_t::E_IndividualMaxByPerson); features.push_back(model_t::E_IndividualSumByBucketAndPerson); - CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, KEY, features, startTime, 2u); + CDataGatherer gatherer(model_t::E_Metric, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + KEY, + features, + startTime, + 2u); addPerson("p1", gatherer, m_ResourceMonitor); addPerson("p2", gatherer, m_ResourceMonitor); addPerson("p3", gatherer, m_ResourceMonitor); - for (std::size_t i = 0; i < boost::size(data); ++i) - { - for (std::size_t pid = 0; pid < gatherer.numberActivePeople(); ++pid) - { + for (std::size_t i = 0; i < boost::size(data); ++i) { + for (std::size_t pid = 0; pid < gatherer.numberActivePeople(); ++pid) { addArrival(gatherer, m_ResourceMonitor, data[i].first, gatherer.personName(pid), data[i].second); } } @@ -1394,8 +1429,7 @@ void CMetricDataGathererTest::testResetBucketGivenMultipleSeries() CPPUNIT_ASSERT_EQUAL(uint64_t(1), gatherer.bucketCounts(1200).find(pidCidPr2)->second); gatherer.resetBucket(600); - for (std::size_t pid = 0; pid < gatherer.numberActivePeople(); ++pid) - { + for (std::size_t pid = 0; pid < gatherer.numberActivePeople(); ++pid) { addArrival(gatherer, m_ResourceMonitor, 610, gatherer.personName(pid), 2.0); addArrival(gatherer, m_ResourceMonitor, 620, gatherer.personName(pid), 3.0); } @@ -1454,62 +1488,37 @@ void CMetricDataGathererTest::testResetBucketGivenMultipleSeries() gatherer.sampleNow(0); gatherer.featureData(0, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(276 [1.5] 1 2)]"), - core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(276 [1.5] 1 2)]"), - core::CContainerPrinter::print(featureData[0].second[1].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(276 [1.5] 1 2)]"), - core::CContainerPrinter::print(featureData[0].second[2].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(276 [1] 1 2)]"), - core::CContainerPrinter::print(featureData[1].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(276 [1] 1 2)]"), - core::CContainerPrinter::print(featureData[1].second[1].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(276 [1] 1 2)]"), - core::CContainerPrinter::print(featureData[1].second[2].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(276 [2] 1 2)]"), - core::CContainerPrinter::print(featureData[2].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(276 [2] 1 2)]"), - core::CContainerPrinter::print(featureData[2].second[1].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(276 [2] 1 2)]"), - core::CContainerPrinter::print(featureData[2].second[2].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(0 [3] 1 2)]"), - core::CContainerPrinter::print(featureData[3].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(0 [3] 1 2)]"), - core::CContainerPrinter::print(featureData[3].second[1].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(0 [3] 1 2)]"), - core::CContainerPrinter::print(featureData[3].second[2].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(276 [1.5] 1 2)]"), core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(276 [1.5] 1 2)]"), core::CContainerPrinter::print(featureData[0].second[1].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(276 [1.5] 1 2)]"), core::CContainerPrinter::print(featureData[0].second[2].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(276 [1] 1 2)]"), core::CContainerPrinter::print(featureData[1].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(276 [1] 1 2)]"), core::CContainerPrinter::print(featureData[1].second[1].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(276 [1] 1 2)]"), core::CContainerPrinter::print(featureData[1].second[2].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(276 [2] 1 2)]"), core::CContainerPrinter::print(featureData[2].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(276 [2] 1 2)]"), core::CContainerPrinter::print(featureData[2].second[1].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(276 [2] 1 2)]"), core::CContainerPrinter::print(featureData[2].second[2].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0 [3] 1 2)]"), core::CContainerPrinter::print(featureData[3].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0 [3] 1 2)]"), core::CContainerPrinter::print(featureData[3].second[1].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0 [3] 1 2)]"), core::CContainerPrinter::print(featureData[3].second[2].second.s_Samples)); gatherer.sampleNow(600); gatherer.featureData(600, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(615 [2.5] 1 2)]"), - core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(615 [2.5] 1 2)]"), - core::CContainerPrinter::print(featureData[0].second[1].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(615 [2.5] 1 2)]"), - core::CContainerPrinter::print(featureData[0].second[2].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(615 [2] 1 2)]"), - core::CContainerPrinter::print(featureData[1].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(615 [2] 1 2)]"), - core::CContainerPrinter::print(featureData[1].second[1].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(615 [2] 1 2)]"), - core::CContainerPrinter::print(featureData[1].second[2].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(615 [3] 1 2)]"), - core::CContainerPrinter::print(featureData[2].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(615 [3] 1 2)]"), - core::CContainerPrinter::print(featureData[2].second[1].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(615 [3] 1 2)]"), - core::CContainerPrinter::print(featureData[2].second[2].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(600 [5] 1 2)]"), - core::CContainerPrinter::print(featureData[3].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(600 [5] 1 2)]"), - core::CContainerPrinter::print(featureData[3].second[1].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(600 [5] 1 2)]"), - core::CContainerPrinter::print(featureData[3].second[2].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(615 [2.5] 1 2)]"), core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(615 [2.5] 1 2)]"), core::CContainerPrinter::print(featureData[0].second[1].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(615 [2.5] 1 2)]"), core::CContainerPrinter::print(featureData[0].second[2].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(615 [2] 1 2)]"), core::CContainerPrinter::print(featureData[1].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(615 [2] 1 2)]"), core::CContainerPrinter::print(featureData[1].second[1].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(615 [2] 1 2)]"), core::CContainerPrinter::print(featureData[1].second[2].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(615 [3] 1 2)]"), core::CContainerPrinter::print(featureData[2].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(615 [3] 1 2)]"), core::CContainerPrinter::print(featureData[2].second[1].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(615 [3] 1 2)]"), core::CContainerPrinter::print(featureData[2].second[2].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(600 [5] 1 2)]"), core::CContainerPrinter::print(featureData[3].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(600 [5] 1 2)]"), core::CContainerPrinter::print(featureData[3].second[1].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(600 [5] 1 2)]"), core::CContainerPrinter::print(featureData[3].second[2].second.s_Samples)); } -void CMetricDataGathererTest::testInfluenceStatistics() -{ +void CMetricDataGathererTest::testInfluenceStatistics() { LOG_DEBUG("*** CMetricDataGathererTest::testInfluenceStatistics ***"); using TTimeDoubleStrStrTuple = boost::tuple; @@ -1524,126 +1533,116 @@ void CMetricDataGathererTest::testInfluenceStatistics() params.s_SampleCountFactor = 1; params.s_SampleQueueGrowthFactor = 0.1; - std::string influencerNames_[] = { "i1", "i2" }; - std::string influencerValues[][3] = - { - { "i11", "i12", "i13" }, - { "i21", "i22", "i23" } - }; - - TTimeDoubleStrStrTuple data[] = - { - TTimeDoubleStrStrTuple(1, 1.0, influencerValues[0][0], influencerValues[1][0]), // Bucket 1 - TTimeDoubleStrStrTuple(150, 5.0, influencerValues[0][1], influencerValues[1][1]), - TTimeDoubleStrStrTuple(150, 3.0, influencerValues[0][2], influencerValues[1][2]), - TTimeDoubleStrStrTuple(550, 2.0, influencerValues[0][0], influencerValues[1][0]), - TTimeDoubleStrStrTuple(551, 2.1, influencerValues[0][1], influencerValues[1][1]), - TTimeDoubleStrStrTuple(552, 4.0, influencerValues[0][2], influencerValues[1][2]), - TTimeDoubleStrStrTuple(554, 2.3, influencerValues[0][2], influencerValues[1][2]), - TTimeDoubleStrStrTuple(600, 3.0, influencerValues[0][1], influencerValues[1][0]), // Bucket 2 - TTimeDoubleStrStrTuple(660, 3.0, influencerValues[0][0], influencerValues[1][2]), - TTimeDoubleStrStrTuple(690, 7.1, influencerValues[0][1], ""), - TTimeDoubleStrStrTuple(700, 4.0, influencerValues[0][0], influencerValues[1][2]), - TTimeDoubleStrStrTuple(800, 2.1, influencerValues[0][2], influencerValues[1][0]), - TTimeDoubleStrStrTuple(900, 2.5, influencerValues[0][1], influencerValues[1][0]), - TTimeDoubleStrStrTuple(1000, 5.0, influencerValues[0][1], influencerValues[1][0]), - TTimeDoubleStrStrTuple(1200, 6.4, "", influencerValues[1][2]), // Bucket 3 - TTimeDoubleStrStrTuple(1210, 6.0, "", influencerValues[1][2]), - TTimeDoubleStrStrTuple(1240, 7.0, "", influencerValues[1][1]), - TTimeDoubleStrStrTuple(1600, 11.0, "", influencerValues[1][0]), - TTimeDoubleStrStrTuple(1800, 11.0, "", "") // Sentinel - }; - - std::string expectedStatistics[] = - { - "[(i11, (1.5, 2)), (i12, (3.55, 2)), (i13, (3.1, 3)), (i21, (1.5, 2)), (i22, (3.55, 2)), (i23, (3.1, 3))]", - "[(i11, (1.5, 2)), (i12, (3.55, 2)), (i13, (3.1, 3)), (i21, (1.5, 2)), (i22, (3.55, 2)), (i23, (3.1, 3))]", - "[(i11, (1, 1)), (i12, (2.1, 1)), (i13, (2.3, 1)), (i21, (1, 1)), (i22, (2.1, 1)), (i23, (2.3, 1))]", - "[(i11, (1, 1)), (i12, (2.1, 1)), (i13, (2.3, 1)), (i21, (1, 1)), (i22, (2.1, 1)), (i23, (2.3, 1))]", - "[(i11, (2, 1)), (i12, (5, 1)), (i13, (4, 1)), (i21, (2, 1)), (i22, (5, 1)), (i23, (4, 1))]", - "[(i11, (2, 1)), (i12, (5, 1)), (i13, (4, 1)), (i21, (2, 1)), (i22, (5, 1)), (i23, (4, 1))]", - "[(i11, (3, 1)), (i12, (7.1, 1)), (i13, (9.3, 1)), (i21, (3, 1)), (i22, (7.1, 1)), (i23, (9.3, 1))]", - "[(i11, (3, 1)), (i12, (7.1, 1)), (i13, (9.3, 1)), (i21, (3, 1)), (i22, (7.1, 1)), (i23, (9.3, 1))]", - "[(i11, (3.5, 2)), (i12, (4.4, 4)), (i13, (2.1, 1)), (i21, (3.15, 4)), (i23, (3.5, 2))]", - "[(i11, (3.5, 2)), (i12, (4.4, 4)), (i13, (2.1, 1)), (i21, (3.15, 4)), (i23, (3.5, 2))]", - "[(i11, (3, 1)), (i12, (2.5, 1)), (i13, (2.1, 1)), (i21, (2.1, 1)), (i23, (3, 1))]", - "[(i11, (3, 1)), (i12, (2.5, 1)), (i13, (2.1, 1)), (i21, (2.1, 1)), (i23, (3, 1))]", - "[(i11, (4, 1)), (i12, (7.1, 1)), (i13, (2.1, 1)), (i21, (5, 1)), (i23, (4, 1))]", - "[(i11, (4, 1)), (i12, (7.1, 1)), (i13, (2.1, 1)), (i21, (5, 1)), (i23, (4, 1))]", - "[(i11, (7, 1)), (i12, (17.6, 1)), (i13, (2.1, 1)), (i21, (12.6, 1)), (i23, (7, 1))]", - "[(i11, (7, 1)), (i12, (17.6, 1)), (i13, (2.1, 1)), (i21, (12.6, 1)), (i23, (7, 1))]", - "[(i21, (11, 1)), (i22, (7, 1)), (i23, (6.2, 2))]", - "[(i21, (11, 1)), (i22, (7, 1)), (i23, (6.2, 2))]", - "[(i21, (11, 1)), (i22, (7, 1)), (i23, (6, 1))]", - "[(i21, (11, 1)), (i22, (7, 1)), (i23, (6, 1))]", - "[(i21, (11, 1)), (i22, (7, 1)), (i23, (6.4, 1))]", - "[(i21, (11, 1)), (i22, (7, 1)), (i23, (6.4, 1))]", - "[(i21, (11, 1)), (i22, (7, 1)), (i23, (12.4, 1))]", - "[(i21, (11, 1)), (i22, (7, 1)), (i23, (12.4, 1))]" - }; - const std::string *expected = expectedStatistics; + std::string influencerNames_[] = {"i1", "i2"}; + std::string influencerValues[][3] = {{"i11", "i12", "i13"}, {"i21", "i22", "i23"}}; + + TTimeDoubleStrStrTuple data[] = { + TTimeDoubleStrStrTuple(1, 1.0, influencerValues[0][0], influencerValues[1][0]), // Bucket 1 + TTimeDoubleStrStrTuple(150, 5.0, influencerValues[0][1], influencerValues[1][1]), + TTimeDoubleStrStrTuple(150, 3.0, influencerValues[0][2], influencerValues[1][2]), + TTimeDoubleStrStrTuple(550, 2.0, influencerValues[0][0], influencerValues[1][0]), + TTimeDoubleStrStrTuple(551, 2.1, influencerValues[0][1], influencerValues[1][1]), + TTimeDoubleStrStrTuple(552, 4.0, influencerValues[0][2], influencerValues[1][2]), + TTimeDoubleStrStrTuple(554, 2.3, influencerValues[0][2], influencerValues[1][2]), + TTimeDoubleStrStrTuple(600, 3.0, influencerValues[0][1], influencerValues[1][0]), // Bucket 2 + TTimeDoubleStrStrTuple(660, 3.0, influencerValues[0][0], influencerValues[1][2]), + TTimeDoubleStrStrTuple(690, 7.1, influencerValues[0][1], ""), + TTimeDoubleStrStrTuple(700, 4.0, influencerValues[0][0], influencerValues[1][2]), + TTimeDoubleStrStrTuple(800, 2.1, influencerValues[0][2], influencerValues[1][0]), + TTimeDoubleStrStrTuple(900, 2.5, influencerValues[0][1], influencerValues[1][0]), + TTimeDoubleStrStrTuple(1000, 5.0, influencerValues[0][1], influencerValues[1][0]), + TTimeDoubleStrStrTuple(1200, 6.4, "", influencerValues[1][2]), // Bucket 3 + TTimeDoubleStrStrTuple(1210, 6.0, "", influencerValues[1][2]), + TTimeDoubleStrStrTuple(1240, 7.0, "", influencerValues[1][1]), + TTimeDoubleStrStrTuple(1600, 11.0, "", influencerValues[1][0]), + TTimeDoubleStrStrTuple(1800, 11.0, "", "") // Sentinel + }; + + std::string expectedStatistics[] = { + "[(i11, (1.5, 2)), (i12, (3.55, 2)), (i13, (3.1, 3)), (i21, (1.5, 2)), (i22, (3.55, 2)), (i23, (3.1, 3))]", + "[(i11, (1.5, 2)), (i12, (3.55, 2)), (i13, (3.1, 3)), (i21, (1.5, 2)), (i22, (3.55, 2)), (i23, (3.1, 3))]", + "[(i11, (1, 1)), (i12, (2.1, 1)), (i13, (2.3, 1)), (i21, (1, 1)), (i22, (2.1, 1)), (i23, (2.3, 1))]", + "[(i11, (1, 1)), (i12, (2.1, 1)), (i13, (2.3, 1)), (i21, (1, 1)), (i22, (2.1, 1)), (i23, (2.3, 1))]", + "[(i11, (2, 1)), (i12, (5, 1)), (i13, (4, 1)), (i21, (2, 1)), (i22, (5, 1)), (i23, (4, 1))]", + "[(i11, (2, 1)), (i12, (5, 1)), (i13, (4, 1)), (i21, (2, 1)), (i22, (5, 1)), (i23, (4, 1))]", + "[(i11, (3, 1)), (i12, (7.1, 1)), (i13, (9.3, 1)), (i21, (3, 1)), (i22, (7.1, 1)), (i23, (9.3, 1))]", + "[(i11, (3, 1)), (i12, (7.1, 1)), (i13, (9.3, 1)), (i21, (3, 1)), (i22, (7.1, 1)), (i23, (9.3, 1))]", + "[(i11, (3.5, 2)), (i12, (4.4, 4)), (i13, (2.1, 1)), (i21, (3.15, 4)), (i23, (3.5, 2))]", + "[(i11, (3.5, 2)), (i12, (4.4, 4)), (i13, (2.1, 1)), (i21, (3.15, 4)), (i23, (3.5, 2))]", + "[(i11, (3, 1)), (i12, (2.5, 1)), (i13, (2.1, 1)), (i21, (2.1, 1)), (i23, (3, 1))]", + "[(i11, (3, 1)), (i12, (2.5, 1)), (i13, (2.1, 1)), (i21, (2.1, 1)), (i23, (3, 1))]", + "[(i11, (4, 1)), (i12, (7.1, 1)), (i13, (2.1, 1)), (i21, (5, 1)), (i23, (4, 1))]", + "[(i11, (4, 1)), (i12, (7.1, 1)), (i13, (2.1, 1)), (i21, (5, 1)), (i23, (4, 1))]", + "[(i11, (7, 1)), (i12, (17.6, 1)), (i13, (2.1, 1)), (i21, (12.6, 1)), (i23, (7, 1))]", + "[(i11, (7, 1)), (i12, (17.6, 1)), (i13, (2.1, 1)), (i21, (12.6, 1)), (i23, (7, 1))]", + "[(i21, (11, 1)), (i22, (7, 1)), (i23, (6.2, 2))]", + "[(i21, (11, 1)), (i22, (7, 1)), (i23, (6.2, 2))]", + "[(i21, (11, 1)), (i22, (7, 1)), (i23, (6, 1))]", + "[(i21, (11, 1)), (i22, (7, 1)), (i23, (6, 1))]", + "[(i21, (11, 1)), (i22, (7, 1)), (i23, (6.4, 1))]", + "[(i21, (11, 1)), (i22, (7, 1)), (i23, (6.4, 1))]", + "[(i21, (11, 1)), (i22, (7, 1)), (i23, (12.4, 1))]", + "[(i21, (11, 1)), (i22, (7, 1)), (i23, (12.4, 1))]"}; + const std::string* expected = expectedStatistics; TFeatureVec features; features.push_back(model_t::E_IndividualMeanByPerson); features.push_back(model_t::E_IndividualMinByPerson); features.push_back(model_t::E_IndividualMaxByPerson); features.push_back(model_t::E_IndividualSumByBucketAndPerson); - TStrVec influencerNames(boost::begin(influencerNames_), - boost::end(influencerNames_)); - CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - influencerNames, false, KEY, features, startTime, 2u); + TStrVec influencerNames(boost::begin(influencerNames_), boost::end(influencerNames_)); + CDataGatherer gatherer(model_t::E_Metric, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + influencerNames, + false, + KEY, + features, + startTime, + 2u); addPerson("p1", gatherer, m_ResourceMonitor, influencerNames.size()); addPerson("p2", gatherer, m_ResourceMonitor, influencerNames.size()); core_t::TTime bucketStart = startTime; - for (std::size_t i = 0u, b = 0u; i < boost::size(data); ++i) - { - if (data[i].get<0>() >= bucketStart + bucketLength) - { + for (std::size_t i = 0u, b = 0u; i < boost::size(data); ++i) { + if (data[i].get<0>() >= bucketStart + bucketLength) { LOG_DEBUG("*** processing bucket ***"); TFeatureSizeFeatureDataPrVecPrVec featureData; gatherer.featureData(bucketStart, bucketLength, featureData); - for (std::size_t j = 0u; j < featureData.size(); ++j) - { + for (std::size_t j = 0u; j < featureData.size(); ++j) { model_t::EFeature feature = featureData[j].first; LOG_DEBUG("feature = " << model_t::print(feature)); - const TSizeFeatureDataPrVec &data_ = featureData[j].second; - for (std::size_t k = 0u; k < data_.size(); ++k) - { + const TSizeFeatureDataPrVec& data_ = featureData[j].second; + for (std::size_t k = 0u; k < data_.size(); ++k) { TStrDoubleDoublePrPrVec statistics; - for (std::size_t m = 0u; - m < data_[k].second.s_InfluenceValues.size(); - ++m) - { - for (std::size_t n = 0u; - n < data_[k].second.s_InfluenceValues[m].size(); - ++n) - { - statistics.push_back(TStrDoubleDoublePrPr( - data_[k].second.s_InfluenceValues[m][n].first, - TDoubleDoublePr(data_[k].second.s_InfluenceValues[m][n].second.first[0], - data_[k].second.s_InfluenceValues[m][n].second.second))); + for (std::size_t m = 0u; m < data_[k].second.s_InfluenceValues.size(); ++m) { + for (std::size_t n = 0u; n < data_[k].second.s_InfluenceValues[m].size(); ++n) { + statistics.push_back( + TStrDoubleDoublePrPr(data_[k].second.s_InfluenceValues[m][n].first, + TDoubleDoublePr(data_[k].second.s_InfluenceValues[m][n].second.first[0], + data_[k].second.s_InfluenceValues[m][n].second.second))); } } - std::sort(statistics.begin(), - statistics.end(), - maths::COrderings::SFirstLess()); + std::sort(statistics.begin(), statistics.end(), maths::COrderings::SFirstLess()); - LOG_DEBUG("statistics = " - << core::CContainerPrinter::print(statistics)); + LOG_DEBUG("statistics = " << core::CContainerPrinter::print(statistics)); LOG_DEBUG("expected = " << *expected); - CPPUNIT_ASSERT_EQUAL((*expected++), - core::CContainerPrinter::print(statistics)); + CPPUNIT_ASSERT_EQUAL((*expected++), core::CContainerPrinter::print(statistics)); } } - bucketStart += bucketLength; ++b; + bucketStart += bucketLength; + ++b; } - for (std::size_t pid = 0; pid < gatherer.numberActivePeople(); ++pid) - { + for (std::size_t pid = 0; pid < gatherer.numberActivePeople(); ++pid) { addArrival(gatherer, m_ResourceMonitor, data[i].get<0>(), @@ -1655,8 +1654,7 @@ void CMetricDataGathererTest::testInfluenceStatistics() } } -void CMetricDataGathererTest::testMultivariate() -{ +void CMetricDataGathererTest::testMultivariate() { using TTimeDoubleDoubleTuple = boost::tuple; using TTimeDoubleDoubleTupleVec = std::vector; using TTimeDoubleDoubleTupleVecVec = std::vector; @@ -1669,44 +1667,40 @@ void CMetricDataGathererTest::testMultivariate() SModelParams params(bucketLength); params.s_MultivariateComponentDelimiter = DELIMITER; - TTimeDoubleDoubleTuple bucket1[] = - { - TTimeDoubleDoubleTuple(1, 1.0, 1.0), - TTimeDoubleDoubleTuple(15, 2.1, 2.0), - TTimeDoubleDoubleTuple(180, 0.9, 0.8), - TTimeDoubleDoubleTuple(190, 1.5, 1.4), - TTimeDoubleDoubleTuple(400, 1.5, 1.4), - TTimeDoubleDoubleTuple(550, 2.0, 1.8) - }; - TTimeDoubleDoubleTuple bucket2[] = - { - TTimeDoubleDoubleTuple(600, 2.0, 1.8), - TTimeDoubleDoubleTuple(799, 2.2, 2.0), - TTimeDoubleDoubleTuple(1199, 1.8, 1.6) - }; - TTimeDoubleDoubleTuple bucket3[] = - { - TTimeDoubleDoubleTuple(1200, 2.1, 2.0), - TTimeDoubleDoubleTuple(1250, 2.5, 2.4) - }; - TTimeDoubleDoubleTuple bucket4[] = - { - TTimeDoubleDoubleTuple(1900, 3.5, 3.2), - }; - TTimeDoubleDoubleTuple bucket5[] = - { - TTimeDoubleDoubleTuple(2420, 3.5, 3.2), - TTimeDoubleDoubleTuple(2480, 3.2, 3.0), - TTimeDoubleDoubleTuple(2490, 3.8, 3.8) - }; + TTimeDoubleDoubleTuple bucket1[] = {TTimeDoubleDoubleTuple(1, 1.0, 1.0), + TTimeDoubleDoubleTuple(15, 2.1, 2.0), + TTimeDoubleDoubleTuple(180, 0.9, 0.8), + TTimeDoubleDoubleTuple(190, 1.5, 1.4), + TTimeDoubleDoubleTuple(400, 1.5, 1.4), + TTimeDoubleDoubleTuple(550, 2.0, 1.8)}; + TTimeDoubleDoubleTuple bucket2[] = { + TTimeDoubleDoubleTuple(600, 2.0, 1.8), TTimeDoubleDoubleTuple(799, 2.2, 2.0), TTimeDoubleDoubleTuple(1199, 1.8, 1.6)}; + TTimeDoubleDoubleTuple bucket3[] = {TTimeDoubleDoubleTuple(1200, 2.1, 2.0), TTimeDoubleDoubleTuple(1250, 2.5, 2.4)}; + TTimeDoubleDoubleTuple bucket4[] = { + TTimeDoubleDoubleTuple(1900, 3.5, 3.2), + }; + TTimeDoubleDoubleTuple bucket5[] = { + TTimeDoubleDoubleTuple(2420, 3.5, 3.2), TTimeDoubleDoubleTuple(2480, 3.2, 3.0), TTimeDoubleDoubleTuple(2490, 3.8, 3.8)}; { TFeatureVec features; features.push_back(model_t::E_IndividualMeanLatLongByPerson); TStrVec influencerNames; - CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - influencerNames, false, KEY, features, startTime, 2u); + CDataGatherer gatherer(model_t::E_Metric, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + influencerNames, + false, + KEY, + features, + startTime, + 2u); CPPUNIT_ASSERT(!gatherer.isPopulation()); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p", gatherer, m_ResourceMonitor)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), gatherer.numberFeatures()); @@ -1731,8 +1725,7 @@ void CMetricDataGathererTest::testMultivariate() CPPUNIT_ASSERT_EQUAL(true, featureData[0].second[0].second.s_IsInteger); } - for (size_t i = 1; i < boost::size(bucket1); ++i) - { + for (size_t i = 1; i < boost::size(bucket1); ++i) { addArrival(gatherer, m_ResourceMonitor, bucket1[i].get<0>(), "p", bucket1[i].get<1>(), bucket1[i].get<2>(), DELIMITER); } { @@ -1763,9 +1756,19 @@ void CMetricDataGathererTest::testMultivariate() CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - CDataGatherer restoredGatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, KEY, traverser); + CDataGatherer restoredGatherer(model_t::E_Metric, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + KEY, + traverser); // The XML representation of the new filter should be the // same as the original @@ -1779,8 +1782,7 @@ void CMetricDataGathererTest::testMultivariate() } gatherer.timeNow(startTime + bucketLength); - for (size_t i = 0; i < boost::size(bucket2); ++i) - { + for (size_t i = 0; i < boost::size(bucket2); ++i) { addArrival(gatherer, m_ResourceMonitor, bucket2[i].get<0>(), "p", bucket2[i].get<1>(), bucket2[i].get<2>(), DELIMITER); } { @@ -1809,9 +1811,19 @@ void CMetricDataGathererTest::testMultivariate() CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - CDataGatherer restoredGatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, KEY, traverser); + CDataGatherer restoredGatherer(model_t::E_Metric, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + KEY, + traverser); // The XML representation of the new filter should be the // same as the original @@ -1825,8 +1837,7 @@ void CMetricDataGathererTest::testMultivariate() } gatherer.timeNow(startTime + 2 * bucketLength); - for (size_t i = 0; i < boost::size(bucket3); ++i) - { + for (size_t i = 0; i < boost::size(bucket3); ++i) { addArrival(gatherer, m_ResourceMonitor, bucket3[i].get<0>(), "p", bucket3[i].get<1>(), bucket3[i].get<2>(), DELIMITER); } { @@ -1846,9 +1857,21 @@ void CMetricDataGathererTest::testMultivariate() { TFeatureVec features; features.push_back(model_t::E_IndividualMeanLatLongByPerson); - CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, KEY, features, startTime, 0); + CDataGatherer gatherer(model_t::E_Metric, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + KEY, + features, + startTime, + 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p", gatherer, m_ResourceMonitor)); TTimeDoubleDoubleTupleVecVec buckets; @@ -1858,13 +1881,11 @@ void CMetricDataGathererTest::testMultivariate() buckets.push_back(TTimeDoubleDoubleTupleVec(boost::begin(bucket4), boost::end(bucket4))); buckets.push_back(TTimeDoubleDoubleTupleVec(boost::begin(bucket5), boost::end(bucket5))); - for (std::size_t i = 0u; i < buckets.size(); ++i) - { + for (std::size_t i = 0u; i < buckets.size(); ++i) { LOG_DEBUG("Processing bucket " << i); gatherer.timeNow(startTime + i * bucketLength); - const TTimeDoubleDoubleTupleVec &bucket = buckets[i]; - for (std::size_t j = 0u; j < bucket.size(); ++j) - { + const TTimeDoubleDoubleTupleVec& bucket = buckets[i]; + for (std::size_t j = 0u; j < bucket.size(); ++j) { addArrival(gatherer, m_ResourceMonitor, bucket[j].get<0>(), "p", bucket[j].get<1>(), bucket[j].get<2>(), DELIMITER); } } @@ -1883,8 +1904,7 @@ void CMetricDataGathererTest::testMultivariate() } } -void CMetricDataGathererTest::testStatisticsPersist() -{ +void CMetricDataGathererTest::testStatisticsPersist() { CGathererTools::TMeanGatherer::TMetricPartialStatistic stat(1); stat.add(TDoubleVec(1, 44.4), 1299196740, 1); stat.add(TDoubleVec(1, 5.5), 1299196741, 1); @@ -1906,8 +1926,7 @@ void CMetricDataGathererTest::testStatisticsPersist() CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); CGathererTools::TMeanGatherer::TMetricPartialStatistic restored(1); - traverser.traverseSubLevel(boost::bind(&CGathererTools::TMeanGatherer::TMetricPartialStatistic::restore, - boost::ref(restored), _1)); + traverser.traverseSubLevel(boost::bind(&CGathererTools::TMeanGatherer::TMetricPartialStatistic::restore, boost::ref(restored), _1)); restoredTime = restored.time(); { @@ -1920,8 +1939,7 @@ void CMetricDataGathererTest::testStatisticsPersist() CPPUNIT_ASSERT_EQUAL(origTime, restoredTime); } -void CMetricDataGathererTest::testVarp() -{ +void CMetricDataGathererTest::testVarp() { core_t::TTime startTime = 100000; const core_t::TTime bucketLength = 1000; const std::string person("p"); @@ -1936,9 +1954,21 @@ void CMetricDataGathererTest::testVarp() { TFeatureVec features; features.push_back(model_t::E_IndividualVarianceByPerson); - CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, KEY, features, startTime, 2u); + CDataGatherer gatherer(model_t::E_Metric, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + KEY, + features, + startTime, + 2u); CPPUNIT_ASSERT(!gatherer.isPopulation()); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson(person, gatherer, m_ResourceMonitor)); @@ -1957,8 +1987,7 @@ void CMetricDataGathererTest::testVarp() // Expect only 1 feature CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); TFeatureSizeFeatureDataPrVecPr fsfd = featureData[0]; - CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualVarianceByPerson, - fsfd.first); + CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualVarianceByPerson, fsfd.first); CSample::TDouble1Vec v = featureData[0].second[0].second.s_BucketValue->value(); double expectedMean = 0; double expectedVariance = ::variance(values, expectedMean); @@ -2010,9 +2039,21 @@ void CMetricDataGathererTest::testVarp() TStrVec influencerFieldNames; influencerFieldNames.push_back("i"); influencerFieldNames.push_back("j"); - CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - influencerFieldNames, false, KEY, features, startTime, 2u); + CDataGatherer gatherer(model_t::E_Metric, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + influencerFieldNames, + false, + KEY, + features, + startTime, + 2u); CPPUNIT_ASSERT(!gatherer.isPopulation()); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson(person, gatherer, m_ResourceMonitor, influencerFieldNames.size())); @@ -2023,7 +2064,7 @@ void CMetricDataGathererTest::testVarp() CPPUNIT_ASSERT_EQUAL(std::size_t(1), gatherer.numberFeatures()); { - addArrival(gatherer, m_ResourceMonitor, startTime + 0, person, 5.0, inf1, inf2); + addArrival(gatherer, m_ResourceMonitor, startTime + 0, person, 5.0, inf1, inf2); addArrival(gatherer, m_ResourceMonitor, startTime + 100, person, 5.5, inf1, ""); addArrival(gatherer, m_ResourceMonitor, startTime + 200, person, 5.9, inf1, ""); addArrival(gatherer, m_ResourceMonitor, startTime + 300, person, 5.2, inf1, ""); @@ -2041,8 +2082,7 @@ void CMetricDataGathererTest::testVarp() TFeatureSizeFeatureDataPrVecPrVec featureData; gatherer.featureData(startTime, bucketLength, featureData); TFeatureSizeFeatureDataPrVecPr fsfd = featureData[0]; - CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualVarianceByPerson, - fsfd.first); + CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualVarianceByPerson, fsfd.first); CSample::TDouble1Vec v = featureData[0].second[0].second.s_BucketValue->value(); values.clear(); @@ -2087,7 +2127,7 @@ void CMetricDataGathererTest::testVarp() CPPUNIT_ASSERT_EQUAL(std::size_t(1), ivs[0].size()); CPPUNIT_ASSERT_EQUAL(std::size_t(2), ivs[1].size()); - const SMetricFeatureData::TStrCRefDouble1VecDoublePrPr &ivs1 = ivs[0][0]; + const SMetricFeatureData::TStrCRefDouble1VecDoublePrPr& ivs1 = ivs[0][0]; CPPUNIT_ASSERT_EQUAL(inf1, ivs1.first.get()); CPPUNIT_ASSERT_DOUBLES_EQUAL(12.0, ivs1.second.second, 0.0001); CPPUNIT_ASSERT_EQUAL(std::size_t(2), ivs1.second.first.size()); @@ -2095,14 +2135,14 @@ void CMetricDataGathererTest::testVarp() CPPUNIT_ASSERT_DOUBLES_EQUAL(ivs1.second.first[1], i1ExpectedMean, 0.0001); // The order of ivs2 and ivs3 seems to be backwards... - const SMetricFeatureData::TStrCRefDouble1VecDoublePrPr &ivs2 = ivs[1][1]; + const SMetricFeatureData::TStrCRefDouble1VecDoublePrPr& ivs2 = ivs[1][1]; CPPUNIT_ASSERT_EQUAL(inf2, ivs2.first.get()); CPPUNIT_ASSERT_DOUBLES_EQUAL(3.0, ivs2.second.second, 0.0001); CPPUNIT_ASSERT_EQUAL(std::size_t(2), ivs2.second.first.size()); CPPUNIT_ASSERT_DOUBLES_EQUAL(ivs2.second.first[0], i2ExpectedVariance, 0.0001); CPPUNIT_ASSERT_DOUBLES_EQUAL(ivs2.second.first[1], i2ExpectedMean, 0.0001); - const SMetricFeatureData::TStrCRefDouble1VecDoublePrPr &ivs3 = ivs[1][0]; + const SMetricFeatureData::TStrCRefDouble1VecDoublePrPr& ivs3 = ivs[1][0]; CPPUNIT_ASSERT_EQUAL(inf3, ivs3.first.get()); CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, ivs3.second.second, 0.0001); CPPUNIT_ASSERT_EQUAL(std::size_t(2), ivs3.second.first.size()); @@ -2112,46 +2152,32 @@ void CMetricDataGathererTest::testVarp() } } - -CppUnit::Test *CMetricDataGathererTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CMetricDataGathererTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricDataGathererTest::singleSeriesTests", - &CMetricDataGathererTest::singleSeriesTests) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricDataGathererTest::multipleSeriesTests", - &CMetricDataGathererTest::multipleSeriesTests) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricDataGathererTest::testSampleCount", - &CMetricDataGathererTest::testSampleCount) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricDataGathererTest::testRemovePeople", - &CMetricDataGathererTest::testRemovePeople) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricDataGathererTest::testSum", - &CMetricDataGathererTest::testSum) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricDataGathererTest::singleSeriesOutOfOrderTests", - &CMetricDataGathererTest::singleSeriesOutOfOrderTests) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricDataGathererTest::testResetBucketGivenSingleSeries", - &CMetricDataGathererTest::testResetBucketGivenSingleSeries) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricDataGathererTest::testResetBucketGivenMultipleSeries", - &CMetricDataGathererTest::testResetBucketGivenMultipleSeries) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricDataGathererTest::testInfluenceStatistics", - &CMetricDataGathererTest::testInfluenceStatistics) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricDataGathererTest::testMultivariate", - &CMetricDataGathererTest::testMultivariate) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricDataGathererTest::testStatisticsPersist", - &CMetricDataGathererTest::testStatisticsPersist) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricDataGathererTest::testVarp", - &CMetricDataGathererTest::testVarp) ); +CppUnit::Test* CMetricDataGathererTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CMetricDataGathererTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CMetricDataGathererTest::singleSeriesTests", + &CMetricDataGathererTest::singleSeriesTests)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMetricDataGathererTest::multipleSeriesTests", + &CMetricDataGathererTest::multipleSeriesTests)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMetricDataGathererTest::testSampleCount", + &CMetricDataGathererTest::testSampleCount)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMetricDataGathererTest::testRemovePeople", + &CMetricDataGathererTest::testRemovePeople)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CMetricDataGathererTest::testSum", &CMetricDataGathererTest::testSum)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMetricDataGathererTest::singleSeriesOutOfOrderTests", + &CMetricDataGathererTest::singleSeriesOutOfOrderTests)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMetricDataGathererTest::testResetBucketGivenSingleSeries", + &CMetricDataGathererTest::testResetBucketGivenSingleSeries)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMetricDataGathererTest::testResetBucketGivenMultipleSeries", + &CMetricDataGathererTest::testResetBucketGivenMultipleSeries)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMetricDataGathererTest::testInfluenceStatistics", + &CMetricDataGathererTest::testInfluenceStatistics)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMetricDataGathererTest::testMultivariate", + &CMetricDataGathererTest::testMultivariate)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMetricDataGathererTest::testStatisticsPersist", + &CMetricDataGathererTest::testStatisticsPersist)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CMetricDataGathererTest::testVarp", &CMetricDataGathererTest::testVarp)); return suiteOfTests; } diff --git a/lib/model/unittest/CMetricDataGathererTest.h b/lib/model/unittest/CMetricDataGathererTest.h index b3d59cc7a8..7f60fe17cf 100644 --- a/lib/model/unittest/CMetricDataGathererTest.h +++ b/lib/model/unittest/CMetricDataGathererTest.h @@ -11,25 +11,25 @@ #include -class CMetricDataGathererTest : public CppUnit::TestFixture -{ - public: - void singleSeriesTests(); - void multipleSeriesTests(); - void testSampleCount(); - void testRemovePeople(); - void testSum(); - void singleSeriesOutOfOrderTests(); - void testResetBucketGivenSingleSeries(); - void testResetBucketGivenMultipleSeries(); - void testInfluenceStatistics(); - void testMultivariate(); - void testStatisticsPersist(); - void testVarp(); +class CMetricDataGathererTest : public CppUnit::TestFixture { +public: + void singleSeriesTests(); + void multipleSeriesTests(); + void testSampleCount(); + void testRemovePeople(); + void testSum(); + void singleSeriesOutOfOrderTests(); + void testResetBucketGivenSingleSeries(); + void testResetBucketGivenMultipleSeries(); + void testInfluenceStatistics(); + void testMultivariate(); + void testStatisticsPersist(); + void testVarp(); - static CppUnit::Test *suite(); - private: - ml::model::CResourceMonitor m_ResourceMonitor; + static CppUnit::Test* suite(); + +private: + ml::model::CResourceMonitor m_ResourceMonitor; }; #endif // INCLUDED_CMetricDataGathererTest_h diff --git a/lib/model/unittest/CMetricModelTest.cc b/lib/model/unittest/CMetricModelTest.cc index 0f346e39cb..130176bb7d 100644 --- a/lib/model/unittest/CMetricModelTest.cc +++ b/lib/model/unittest/CMetricModelTest.cc @@ -8,11 +8,11 @@ #include #include -#include -#include #include #include #include +#include +#include #include #include @@ -22,8 +22,8 @@ #include #include -#include #include +#include #include #include #include @@ -51,8 +51,7 @@ using namespace ml; using namespace model; -namespace -{ +namespace { using TDoubleDoublePr = std::pair; using TSizeDoublePr = std::pair; @@ -87,29 +86,22 @@ using TTimeStrVecPrVec = std::vector; const std::string EMPTY_STRING; -class CTimeLess -{ - public: - bool operator()(const CEventData &lhs, - const CEventData &rhs) const - { - return lhs.time() < rhs.time(); - } +class CTimeLess { +public: + bool operator()(const CEventData& lhs, const CEventData& rhs) const { return lhs.time() < rhs.time(); } }; -void makeModel(CMetricModelFactory &factory, - const CDataGatherer::TFeatureVec &features, +void makeModel(CMetricModelFactory& factory, + const CDataGatherer::TFeatureVec& features, core_t::TTime startTime, core_t::TTime bucketLength, - CModelFactory::TDataGathererPtr &gatherer, - CAnomalyDetectorModel::TModelPtr &model, - unsigned int *sampleCount = 0) -{ + CModelFactory::TDataGathererPtr& gatherer, + CAnomalyDetectorModel::TModelPtr& model, + unsigned int* sampleCount = 0) { factory.features(features); factory.bucketLength(bucketLength); CModelFactory::SGathererInitializationData gathererInitData(startTime); - if (sampleCount) - { + if (sampleCount) { gathererInitData.s_SampleOverrideCount = *sampleCount; } gatherer.reset(factory.makeDataGatherer(gathererInitData)); @@ -120,10 +112,7 @@ void makeModel(CMetricModelFactory &factory, CPPUNIT_ASSERT_EQUAL(bucketLength, model->bucketLength()); } -std::size_t addPerson(const std::string &p, - const CModelFactory::TDataGathererPtr &gatherer, - CResourceMonitor &resourceMonitor) -{ +std::size_t addPerson(const std::string& p, const CModelFactory::TDataGathererPtr& gatherer, CResourceMonitor& resourceMonitor) { CDataGatherer::TStrCPtrVec person; person.push_back(&p); person.resize(gatherer->fieldsOfInterest().size(), 0); @@ -132,32 +121,26 @@ std::size_t addPerson(const std::string &p, return *result.personId(); } -void addArrival(CDataGatherer &gatherer, - CResourceMonitor &resourceMonitor, +void addArrival(CDataGatherer& gatherer, + CResourceMonitor& resourceMonitor, core_t::TTime time, - const std::string &person, + const std::string& person, double value, - const TOptionalStr &inf1 = TOptionalStr(), - const TOptionalStr &inf2 = TOptionalStr(), - const TOptionalStr &count = TOptionalStr()) -{ + const TOptionalStr& inf1 = TOptionalStr(), + const TOptionalStr& inf2 = TOptionalStr(), + const TOptionalStr& count = TOptionalStr()) { CDataGatherer::TStrCPtrVec fieldValues; fieldValues.push_back(&person); - if (inf1) - { + if (inf1) { fieldValues.push_back(&(inf1.get())); } - if (inf2) - { + if (inf2) { fieldValues.push_back(&(inf2.get())); } - if (count) - { + if (count) { fieldValues.push_back(&(count.get())); } - std::string valueAsString(core::CStringUtils::typeToStringPrecise( - value, - core::CIEEE754::E_DoublePrecision)); + std::string valueAsString(core::CStringUtils::typeToStringPrecise(value, core::CIEEE754::E_DoublePrecision)); fieldValues.push_back(&valueAsString); CEventData eventData; @@ -166,32 +149,26 @@ void addArrival(CDataGatherer &gatherer, gatherer.addArrival(fieldValues, eventData, resourceMonitor); } -void addArrival(CDataGatherer &gatherer, - CResourceMonitor &resourceMonitor, +void addArrival(CDataGatherer& gatherer, + CResourceMonitor& resourceMonitor, core_t::TTime time, - const std::string &person, - double lat, double lng, - const TOptionalStr &inf1 = TOptionalStr(), - const TOptionalStr &inf2 = TOptionalStr()) -{ + const std::string& person, + double lat, + double lng, + const TOptionalStr& inf1 = TOptionalStr(), + const TOptionalStr& inf2 = TOptionalStr()) { CDataGatherer::TStrCPtrVec fieldValues; fieldValues.push_back(&person); - if (inf1) - { + if (inf1) { fieldValues.push_back(&(inf1.get())); } - if (inf2) - { + if (inf2) { fieldValues.push_back(&(inf2.get())); } std::string valueAsString; - valueAsString += core::CStringUtils::typeToStringPrecise( - lat, - core::CIEEE754::E_DoublePrecision); + valueAsString += core::CStringUtils::typeToStringPrecise(lat, core::CIEEE754::E_DoublePrecision); valueAsString += model::CAnomalyDetectorModelConfig::DEFAULT_MULTIVARIATE_COMPONENT_DELIMITER; - valueAsString += core::CStringUtils::typeToStringPrecise( - lng, - core::CIEEE754::E_DoublePrecision); + valueAsString += core::CStringUtils::typeToStringPrecise(lng, core::CIEEE754::E_DoublePrecision); fieldValues.push_back(&valueAsString); CEventData eventData; @@ -200,11 +177,7 @@ void addArrival(CDataGatherer &gatherer, gatherer.addArrival(fieldValues, eventData, resourceMonitor); } -CEventData makeEventData(core_t::TTime time, - std::size_t pid, - double value, - const TOptionalStr &influence = TOptionalStr()) -{ +CEventData makeEventData(core_t::TTime time, std::size_t pid, double value, const TOptionalStr& influence = TOptionalStr()) { CEventData result; result.time(time); result.person(pid); @@ -214,27 +187,17 @@ CEventData makeEventData(core_t::TTime time, return result; } -TDouble1Vec featureData(const CMetricModel &model, - model_t::EFeature feature, - std::size_t pid, - core_t::TTime time) -{ - const CMetricModel::TFeatureData *data = model.featureData(feature, pid, time); - if (!data) - { +TDouble1Vec featureData(const CMetricModel& model, model_t::EFeature feature, std::size_t pid, core_t::TTime time) { + const CMetricModel::TFeatureData* data = model.featureData(feature, pid, time); + if (!data) { return TDouble1Vec(); } return data->s_BucketValue ? data->s_BucketValue->value() : TDouble1Vec(); } -TDouble1Vec multivariateFeatureData(const CMetricModel &model, - model_t::EFeature feature, - std::size_t pid, - core_t::TTime time) -{ - const CMetricModel::TFeatureData *data = model.featureData(feature, pid, time); - if (!data) - { +TDouble1Vec multivariateFeatureData(const CMetricModel& model, model_t::EFeature feature, std::size_t pid, core_t::TTime time) { + const CMetricModel::TFeatureData* data = model.featureData(feature, pid, time); + if (!data) { return TDouble1Vec(); } return data->s_BucketValue ? data->s_BucketValue->value() : TDouble1Vec(); @@ -242,50 +205,40 @@ TDouble1Vec multivariateFeatureData(const CMetricModel &model, void processBucket(core_t::TTime time, core_t::TTime bucketLength, - const TDoubleVec &bucket, - const TStrVec &influencerValues, - CDataGatherer &gatherer, - CResourceMonitor &resourceMonitor, - CMetricModel &model, - SAnnotatedProbability &probability) -{ - for (std::size_t i = 0u; i < bucket.size(); ++i) - { + const TDoubleVec& bucket, + const TStrVec& influencerValues, + CDataGatherer& gatherer, + CResourceMonitor& resourceMonitor, + CMetricModel& model, + SAnnotatedProbability& probability) { + for (std::size_t i = 0u; i < bucket.size(); ++i) { addArrival(gatherer, resourceMonitor, time, "p", bucket[i], TOptionalStr(influencerValues[i])); } model.sample(time, time + bucketLength, resourceMonitor); CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); - model.computeProbability(0/*pid*/, time, time + bucketLength, - partitioningFields, 1, probability); + model.computeProbability(0 /*pid*/, time, time + bucketLength, partitioningFields, 1, probability); LOG_DEBUG("influences = " << core::CContainerPrinter::print(probability.s_Influences)); } void processBucket(core_t::TTime time, core_t::TTime bucketLength, - const TDoubleVec &bucket, - CDataGatherer &gatherer, - CResourceMonitor &resourceMonitor, - CMetricModel &model, - SAnnotatedProbability &probability, - SAnnotatedProbability &probability2) -{ + const TDoubleVec& bucket, + CDataGatherer& gatherer, + CResourceMonitor& resourceMonitor, + CMetricModel& model, + SAnnotatedProbability& probability, + SAnnotatedProbability& probability2) { const std::string person("p"); const std::string person2("q"); - for (std::size_t i = 0u; i < bucket.size(); ++i) - { + for (std::size_t i = 0u; i < bucket.size(); ++i) { CDataGatherer::TStrCPtrVec fieldValues; - if (i % 2 == 0) - { + if (i % 2 == 0) { fieldValues.push_back(&person); - } - else - { + } else { fieldValues.push_back(&person2); } - std::string valueAsString(core::CStringUtils::typeToStringPrecise( - bucket[i], - core::CIEEE754::E_DoublePrecision)); + std::string valueAsString(core::CStringUtils::typeToStringPrecise(bucket[i], core::CIEEE754::E_DoublePrecision)); fieldValues.push_back(&valueAsString); CEventData eventData; @@ -295,20 +248,16 @@ void processBucket(core_t::TTime time, } model.sample(time, time + bucketLength, resourceMonitor); CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); - model.computeProbability(0/*pid*/, time, time + bucketLength, - partitioningFields, 1, probability); - model.computeProbability(1/*pid*/, time, time + bucketLength, - partitioningFields, 1, probability2); + model.computeProbability(0 /*pid*/, time, time + bucketLength, partitioningFields, 1, probability); + model.computeProbability(1 /*pid*/, time, time + bucketLength, partitioningFields, 1, probability2); } const maths_t::TWeightStyleVec COUNT_WEIGHT(1, maths_t::E_SampleCountWeight); const TDouble4Vec1Vec UNIT_WEIGHT(1, TDouble4Vec(1, 1.0)); const TSizeDoublePr1Vec NO_CORRELATES; - } -void CMetricModelTest::testSample() -{ +void CMetricModelTest::testSample() { LOG_DEBUG("*** testSample ***"); core_t::TTime startTime(45); @@ -320,29 +269,25 @@ void CMetricModelTest::testSample() // Check basic sampling. { - TTimeDoublePr data[] = - { - TTimeDoublePr(49, 1.5), - TTimeDoublePr(60, 1.3), - TTimeDoublePr(61, 1.3), - TTimeDoublePr(62, 1.6), - TTimeDoublePr(65, 1.7), - TTimeDoublePr(66, 1.33), - TTimeDoublePr(68, 1.5), - TTimeDoublePr(84, 1.58), - TTimeDoublePr(87, 1.69), - TTimeDoublePr(157, 1.6), - TTimeDoublePr(164, 1.66), - TTimeDoublePr(199, 1.28), - TTimeDoublePr(202, 1.2), - TTimeDoublePr(204, 1.5) - }; - - unsigned int sampleCounts[] = { 2, 1 }; - unsigned int expectedSampleCounts[] = { 2, 1 }; - - for (std::size_t i = 0; i < boost::size(sampleCounts); ++i) - { + TTimeDoublePr data[] = {TTimeDoublePr(49, 1.5), + TTimeDoublePr(60, 1.3), + TTimeDoublePr(61, 1.3), + TTimeDoublePr(62, 1.6), + TTimeDoublePr(65, 1.7), + TTimeDoublePr(66, 1.33), + TTimeDoublePr(68, 1.5), + TTimeDoublePr(84, 1.58), + TTimeDoublePr(87, 1.69), + TTimeDoublePr(157, 1.6), + TTimeDoublePr(164, 1.66), + TTimeDoublePr(199, 1.28), + TTimeDoublePr(202, 1.2), + TTimeDoublePr(204, 1.5)}; + + unsigned int sampleCounts[] = {2, 1}; + unsigned int expectedSampleCounts[] = {2, 1}; + + for (std::size_t i = 0; i < boost::size(sampleCounts); ++i) { CDataGatherer::TFeatureVec features; features.push_back(model_t::E_IndividualMeanByPerson); features.push_back(model_t::E_IndividualMinByPerson); @@ -350,7 +295,7 @@ void CMetricModelTest::testSample() CModelFactory::TDataGathererPtr gatherer; CAnomalyDetectorModel::TModelPtr model_; makeModel(factory, features, startTime, bucketLength, gatherer, model_, &sampleCounts[i]); - CMetricModel &model = static_cast(*model_.get()); + CMetricModel& model = static_cast(*model_.get()); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p", gatherer, m_ResourceMonitor)); // Bucket values. @@ -372,19 +317,14 @@ void CMetricModelTest::testSample() TDouble1Vec expectedMaxSamples; std::size_t numberSamples = 0; - TMathsModelPtr expectedMeanModel = factory.defaultFeatureModel(model_t::E_IndividualMeanByPerson, - bucketLength, 0.4, true); - TMathsModelPtr expectedMinModel = factory.defaultFeatureModel(model_t::E_IndividualMinByPerson, - bucketLength, 0.4, true); - TMathsModelPtr expectedMaxModel = factory.defaultFeatureModel(model_t::E_IndividualMaxByPerson, - bucketLength, 0.4, true); + TMathsModelPtr expectedMeanModel = factory.defaultFeatureModel(model_t::E_IndividualMeanByPerson, bucketLength, 0.4, true); + TMathsModelPtr expectedMinModel = factory.defaultFeatureModel(model_t::E_IndividualMinByPerson, bucketLength, 0.4, true); + TMathsModelPtr expectedMaxModel = factory.defaultFeatureModel(model_t::E_IndividualMaxByPerson, bucketLength, 0.4, true); std::size_t j = 0; core_t::TTime time = startTime; - for (;;) - { - if (j < boost::size(data) && data[j].first < time + bucketLength) - { + for (;;) { + if (j < boost::size(data) && data[j].first < time + bucketLength) { LOG_DEBUG("Adding " << data[j].second << " at " << data[j].first); addArrival(*gatherer, m_ResourceMonitor, data[j].first, "p", data[j].second); @@ -401,8 +341,7 @@ void CMetricModelTest::testSample() ++j; - if (j % expectedSampleCounts[i] == 0) - { + if (j % expectedSampleCounts[i] == 0) { ++numberSamples; expectedSampleTimes.push_back(maths::CBasicStatistics::mean(expectedSampleTime)); expectedMeanSamples.push_back(maths::CBasicStatistics::mean(expectedMeanSample)); @@ -410,52 +349,41 @@ void CMetricModelTest::testSample() expectedMaxSamples.push_back(expectedMaxSample[0]); expectedSampleTime = TMeanAccumulator(); expectedMeanSample = TMeanAccumulator(); - expectedMinSample = TMinAccumulator(); - expectedMaxSample = TMaxAccumulator(); + expectedMinSample = TMinAccumulator(); + expectedMaxSample = TMaxAccumulator(); } - } - else - { + } else { LOG_DEBUG("Sampling [" << time << ", " << time + bucketLength << ")"); model.sample(time, time + bucketLength, m_ResourceMonitor); - if (maths::CBasicStatistics::count(expectedMean) > 0.0) - { + if (maths::CBasicStatistics::count(expectedMean) > 0.0) { expectedBaselineMean.add(maths::CBasicStatistics::mean(expectedMean)); } - if (numberSamples > 0) - { + if (numberSamples > 0) { LOG_DEBUG("Adding mean samples = " << core::CContainerPrinter::print(expectedMeanSamples) - << ", min samples = " << core::CContainerPrinter::print(expectedMinSamples) - << ", max samples = " << core::CContainerPrinter::print(expectedMaxSamples)); + << ", min samples = " << core::CContainerPrinter::print(expectedMinSamples) + << ", max samples = " << core::CContainerPrinter::print(expectedMaxSamples)); maths::CModelAddSamplesParams::TDouble2Vec4VecVec weights(numberSamples, maths::CConstantWeights::unit(1)); maths::CModelAddSamplesParams params_; params_.integer(false) - .nonNegative(true) - .propagationInterval(1.0) - .weightStyles(COUNT_WEIGHT) - .trendWeights(weights) - .priorWeights(weights); + .nonNegative(true) + .propagationInterval(1.0) + .weightStyles(COUNT_WEIGHT) + .trendWeights(weights) + .priorWeights(weights); maths::CModel::TTimeDouble2VecSizeTrVec expectedMeanSamples_; maths::CModel::TTimeDouble2VecSizeTrVec expectedMinSamples_; maths::CModel::TTimeDouble2VecSizeTrVec expectedMaxSamples_; - for (std::size_t k = 0u; k < numberSamples; ++k) - { + for (std::size_t k = 0u; k < numberSamples; ++k) { // We round to the nearest integer time (note this has to match // the behaviour of CMetricPartialStatistic::time). core_t::TTime sampleTime{static_cast(expectedSampleTimes[k] + 0.5)}; - expectedMeanSamples_.emplace_back(sampleTime, - TDouble2Vec{expectedMeanSamples[k]}, - std::size_t(0)); - expectedMinSamples_.emplace_back(sampleTime, - TDouble2Vec{expectedMinSamples[k]}, - std::size_t(0)); - expectedMaxSamples_.emplace_back(sampleTime, - TDouble2Vec{expectedMaxSamples[k]}, - std::size_t(0)); + expectedMeanSamples_.emplace_back(sampleTime, TDouble2Vec{expectedMeanSamples[k]}, std::size_t(0)); + expectedMinSamples_.emplace_back(sampleTime, TDouble2Vec{expectedMinSamples[k]}, std::size_t(0)); + expectedMaxSamples_.emplace_back(sampleTime, TDouble2Vec{expectedMaxSamples[k]}, std::size_t(0)); } expectedMeanModel->addSamples(params_, expectedMeanSamples_); expectedMinModel->addSamples(params_, expectedMinSamples_); @@ -469,35 +397,31 @@ void CMetricModelTest::testSample() model_t::CResultType type(model_t::CResultType::E_Unconditional | model_t::CResultType::E_Final); TOptionalUInt64 currentCount = model.currentBucketCount(0, time); - TDouble1Vec bucketMean = model.currentBucketValue(model_t::E_IndividualMeanByPerson, 0, 0, time); - TDouble1Vec baselineMean = model.baselineBucketMean(model_t::E_IndividualMeanByPerson, 0, 0, - type, NO_CORRELATES, time); + TDouble1Vec bucketMean = model.currentBucketValue(model_t::E_IndividualMeanByPerson, 0, 0, time); + TDouble1Vec baselineMean = model.baselineBucketMean(model_t::E_IndividualMeanByPerson, 0, 0, type, NO_CORRELATES, time); LOG_DEBUG("bucket count = " << core::CContainerPrinter::print(currentCount)); - LOG_DEBUG("current bucket mean = " << core::CContainerPrinter::print(bucketMean) - << ", expected baseline bucket mean = " << maths::CBasicStatistics::mean(expectedBaselineMean) - << ", baseline bucket mean = " << core::CContainerPrinter::print(baselineMean)); + LOG_DEBUG("current bucket mean = " << core::CContainerPrinter::print(bucketMean) << ", expected baseline bucket mean = " + << maths::CBasicStatistics::mean(expectedBaselineMean) + << ", baseline bucket mean = " << core::CContainerPrinter::print(baselineMean)); CPPUNIT_ASSERT(currentCount); CPPUNIT_ASSERT_EQUAL(expectedCount, *currentCount); - TDouble1Vec mean = maths::CBasicStatistics::count(expectedMean) > 0.0 ? - TDouble1Vec(1, maths::CBasicStatistics::mean(expectedMean)) : - TDouble1Vec(); - TDouble1Vec min = expectedMin.count() > 0 ? - TDouble1Vec(1, expectedMin[0]) : TDouble1Vec(); - TDouble1Vec max = expectedMax.count() > 0 ? - TDouble1Vec(1, expectedMax[0]) : TDouble1Vec(); + TDouble1Vec mean = maths::CBasicStatistics::count(expectedMean) > 0.0 + ? TDouble1Vec(1, maths::CBasicStatistics::mean(expectedMean)) + : TDouble1Vec(); + TDouble1Vec min = expectedMin.count() > 0 ? TDouble1Vec(1, expectedMin[0]) : TDouble1Vec(); + TDouble1Vec max = expectedMax.count() > 0 ? TDouble1Vec(1, expectedMax[0]) : TDouble1Vec(); CPPUNIT_ASSERT(mean == bucketMean); - if (!baselineMean.empty()) - { + if (!baselineMean.empty()) { baselineMeanError.add(std::fabs(baselineMean[0] - maths::CBasicStatistics::mean(expectedBaselineMean))); } CPPUNIT_ASSERT(mean == featureData(model, model_t::E_IndividualMeanByPerson, 0, time)); - CPPUNIT_ASSERT(min == featureData(model, model_t::E_IndividualMinByPerson, 0, time)); - CPPUNIT_ASSERT(max == featureData(model, model_t::E_IndividualMaxByPerson, 0, time)); + CPPUNIT_ASSERT(min == featureData(model, model_t::E_IndividualMinByPerson, 0, time)); + CPPUNIT_ASSERT(max == featureData(model, model_t::E_IndividualMaxByPerson, 0, time)); CPPUNIT_ASSERT_EQUAL(expectedMeanModel->checksum(), model.details()->model(model_t::E_IndividualMeanByPerson, 0)->checksum()); @@ -538,12 +462,11 @@ void CMetricModelTest::testSample() CPPUNIT_ASSERT_EQUAL(origXml, newXml); expectedCount = 0; - expectedMean = TMeanAccumulator(); - expectedMin = TMinAccumulator(); - expectedMax = TMaxAccumulator(); + expectedMean = TMeanAccumulator(); + expectedMin = TMinAccumulator(); + expectedMax = TMaxAccumulator(); - if (j >= boost::size(data)) - { + if (j >= boost::size(data)) { break; } @@ -556,8 +479,7 @@ void CMetricModelTest::testSample() } } -void CMetricModelTest::testMultivariateSample() -{ +void CMetricModelTest::testMultivariateSample() { LOG_DEBUG("*** testMultivariateSample ***"); using TDoubleVecVecVec = std::vector; @@ -572,42 +494,37 @@ void CMetricModelTest::testMultivariateSample() params.s_MaximumUpdatesPerBucket = 0.0; CMetricModelFactory factory(params); - double data_[][3] = - { - { 49, 1.5, 1.1 }, - { 60, 1.3, 1.2 }, - { 61, 1.3, 2.1 }, - { 62, 1.6, 1.5 }, - { 65, 1.7, 1.4 }, - { 66, 1.33, 1.6 }, - { 68, 1.5, 1.37}, - { 84, 1.58, 1.42}, - { 87, 1.6, 1.6 }, - { 157, 1.6, 1.6 }, - { 164, 1.66, 1.55}, - { 199, 1.28, 1.4 }, - { 202, 1.3, 1.1 }, - { 204, 1.5, 1.8 } - }; + double data_[][3] = {{49, 1.5, 1.1}, + {60, 1.3, 1.2}, + {61, 1.3, 2.1}, + {62, 1.6, 1.5}, + {65, 1.7, 1.4}, + {66, 1.33, 1.6}, + {68, 1.5, 1.37}, + {84, 1.58, 1.42}, + {87, 1.6, 1.6}, + {157, 1.6, 1.6}, + {164, 1.66, 1.55}, + {199, 1.28, 1.4}, + {202, 1.3, 1.1}, + {204, 1.5, 1.8}}; TTimeDouble2AryPrVec data; - for (std::size_t i = 0u; i < boost::size(data_); ++i) - { - boost::array values = { { data_[i][1], data_[i][2] } }; + for (std::size_t i = 0u; i < boost::size(data_); ++i) { + boost::array values = {{data_[i][1], data_[i][2]}}; data.push_back(TTimeDouble2AryPr(static_cast(data_[i][0]), values)); } - unsigned int sampleCounts[] = { 2u, 1u }; - unsigned int expectedSampleCounts[] = { 2u, 1u }; + unsigned int sampleCounts[] = {2u, 1u}; + unsigned int expectedSampleCounts[] = {2u, 1u}; - for (std::size_t i = 0; i < boost::size(sampleCounts); ++i) - { + for (std::size_t i = 0; i < boost::size(sampleCounts); ++i) { LOG_DEBUG("*** sample count = " << sampleCounts[i] << " ***"); CDataGatherer::TFeatureVec features(1, model_t::E_IndividualMeanLatLongByPerson); CModelFactory::TDataGathererPtr gatherer; CAnomalyDetectorModel::TModelPtr model_; makeModel(factory, features, startTime, bucketLength, gatherer, model_, &sampleCounts[i]); - CMetricModel &model = static_cast(*model_.get()); + CMetricModel& model = static_cast(*model_.get()); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p", gatherer, m_ResourceMonitor)); // Bucket values. @@ -624,10 +541,8 @@ void CMetricModelTest::testMultivariateSample() std::size_t j = 0; core_t::TTime time = startTime; - for (;;) - { - if (j < data.size() && data[j].first < time + bucketLength) - { + for (;;) { + if (j < data.size() && data[j].first < time + bucketLength) { LOG_DEBUG("Adding " << data[j].second[0] << "," << data[j].second[1] << " at " << data[j].first); addArrival(*gatherer, m_ResourceMonitor, data[j].first, "p", data[j].second[0], data[j].second[1]); @@ -636,67 +551,60 @@ void CMetricModelTest::testMultivariateSample() expectedLatLong.add(TVector2(data[j].second)); expectedLatLongSample.add(TVector2(data[j].second)); - if (++j % expectedSampleCounts[i] == 0) - { + if (++j % expectedSampleCounts[i] == 0) { ++numberSamples; - expectedLatLongSamples.push_back( - TDoubleVec(maths::CBasicStatistics::mean(expectedLatLongSample).begin(), - maths::CBasicStatistics::mean(expectedLatLongSample).end())); + expectedLatLongSamples.push_back(TDoubleVec(maths::CBasicStatistics::mean(expectedLatLongSample).begin(), + maths::CBasicStatistics::mean(expectedLatLongSample).end())); expectedLatLongSample = TMean2Accumulator(); } - } - else - { + } else { LOG_DEBUG("Sampling [" << time << ", " << time + bucketLength << ")"); model.sample(time, time + bucketLength, m_ResourceMonitor); - if (maths::CBasicStatistics::count(expectedLatLong) > 0.0) - { + if (maths::CBasicStatistics::count(expectedLatLong) > 0.0) { expectedBaselineLatLong.add(maths::CBasicStatistics::mean(expectedLatLong)); } - if (numberSamples > 0) - { + if (numberSamples > 0) { std::sort(expectedLatLongSamples.begin(), expectedLatLongSamples.end()); LOG_DEBUG("Adding mean samples = " << core::CContainerPrinter::print(expectedLatLongSamples)); expectedMeanPrior->dataType(maths_t::E_ContinuousData); expectedMeanPrior->addSamples(COUNT_WEIGHT, expectedLatLongSamples, - TDoubleVecVecVec(expectedLatLongSamples.size(), - TDoubleVecVec(1, TDoubleVec(2, 1.0)))); + TDoubleVecVecVec(expectedLatLongSamples.size(), TDoubleVecVec(1, TDoubleVec(2, 1.0)))); numberSamples = 0u; expectedLatLongSamples.clear(); } model_t::CResultType type(model_t::CResultType::E_Unconditional | model_t::CResultType::E_Final); TOptionalUInt64 currentCount = model.currentBucketCount(0, time); - TDouble1Vec bucketLatLong = model.currentBucketValue(model_t::E_IndividualMeanLatLongByPerson, 0, 0, time); - TDouble1Vec baselineLatLong = model.baselineBucketMean(model_t::E_IndividualMeanLatLongByPerson, 0, 0, - type, NO_CORRELATES, time); + TDouble1Vec bucketLatLong = model.currentBucketValue(model_t::E_IndividualMeanLatLongByPerson, 0, 0, time); + TDouble1Vec baselineLatLong = + model.baselineBucketMean(model_t::E_IndividualMeanLatLongByPerson, 0, 0, type, NO_CORRELATES, time); LOG_DEBUG("bucket count = " << core::CContainerPrinter::print(currentCount)); - LOG_DEBUG("current bucket mean = " << core::CContainerPrinter::print(bucketLatLong) - << ", expected baseline bucket mean = " << maths::CBasicStatistics::mean(expectedBaselineLatLong) - << ", baseline bucket mean = " << core::CContainerPrinter::print(baselineLatLong)); + LOG_DEBUG("current bucket mean = " << core::CContainerPrinter::print(bucketLatLong) << ", expected baseline bucket mean = " + << maths::CBasicStatistics::mean(expectedBaselineLatLong) + << ", baseline bucket mean = " << core::CContainerPrinter::print(baselineLatLong)); CPPUNIT_ASSERT(currentCount); CPPUNIT_ASSERT_EQUAL(expectedCount, *currentCount); TDouble1Vec latLong; - if (maths::CBasicStatistics::count(expectedLatLong) > 0.0) - { + if (maths::CBasicStatistics::count(expectedLatLong) > 0.0) { latLong.push_back(maths::CBasicStatistics::mean(expectedLatLong)(0)); latLong.push_back(maths::CBasicStatistics::mean(expectedLatLong)(1)); } CPPUNIT_ASSERT(latLong == bucketLatLong); - if (!baselineLatLong.empty()) - { - baselineLatLongError.add(maths::fabs( TVector2(baselineLatLong) - - maths::CBasicStatistics::mean(expectedBaselineLatLong))); + if (!baselineLatLong.empty()) { + baselineLatLongError.add( + maths::fabs(TVector2(baselineLatLong) - maths::CBasicStatistics::mean(expectedBaselineLatLong))); } CPPUNIT_ASSERT(latLong == multivariateFeatureData(model, model_t::E_IndividualMeanLatLongByPerson, 0, time)); CPPUNIT_ASSERT_EQUAL(expectedMeanPrior->checksum(), dynamic_cast( - model.details()->model(model_t::E_IndividualMeanLatLongByPerson, 0))->residualModel().checksum()); + model.details()->model(model_t::E_IndividualMeanLatLongByPerson, 0)) + ->residualModel() + .checksum()); // Test persistence. (We check for idempotency.) std::string origXml; @@ -729,11 +637,10 @@ void CMetricModelTest::testMultivariateSample() CPPUNIT_ASSERT_EQUAL(origChecksum, restoredChecksum); CPPUNIT_ASSERT_EQUAL(origXml, newXml); - expectedCount = 0; + expectedCount = 0; expectedLatLong = TMean2Accumulator(); - if (j >= boost::size(data)) - { + if (j >= boost::size(data)) { break; } @@ -746,8 +653,7 @@ void CMetricModelTest::testMultivariateSample() } } -void CMetricModelTest::testProbabilityCalculationForMetric() -{ +void CMetricModelTest::testProbabilityCalculationForMetric() { LOG_DEBUG("*** testProbabilityCalculationForMetric ***"); using TMinAccumulator = maths::CBasicStatistics::COrderStatisticsHeap; @@ -757,7 +663,7 @@ void CMetricModelTest::testProbabilityCalculationForMetric() SModelParams params(bucketLength); CMetricModelFactory factory(params); - std::size_t bucketCounts[] = { 5, 6, 3, 5, 0, 7, 8, 5, 4, 3, 5, 5, 6 }; + std::size_t bucketCounts[] = {5, 6, 3, 5, 0, 7, 8, 5, 4, 3, 5, 5, 6}; double mean = 5.0; double variance = 2.0; @@ -771,7 +677,7 @@ void CMetricModelTest::testProbabilityCalculationForMetric() CModelFactory::TDataGathererPtr gatherer; CAnomalyDetectorModel::TModelPtr model_; makeModel(factory, features, startTime, bucketLength, gatherer, model_); - CMetricModel &model = static_cast(*model_.get()); + CMetricModel& model = static_cast(*model_.get()); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p", gatherer, m_ResourceMonitor)); TMinAccumulator minProbabilities(2u); @@ -779,46 +685,40 @@ void CMetricModelTest::testProbabilityCalculationForMetric() test::CRandomNumbers rng; core_t::TTime time = startTime; - for (std::size_t i = 0u; i < boost::size(bucketCounts); ++i) - { + for (std::size_t i = 0u; i < boost::size(bucketCounts); ++i) { TDoubleVec values; rng.generateNormalSamples(mean, variance, bucketCounts[i], values); LOG_DEBUG("values = " << core::CContainerPrinter::print(values)); - LOG_DEBUG("i = " << i << ", anomalousBucket = " << anomalousBucket - << ", offset = " << (i == anomalousBucket ? anomaly : 0.0)); + LOG_DEBUG("i = " << i << ", anomalousBucket = " << anomalousBucket << ", offset = " << (i == anomalousBucket ? anomaly : 0.0)); - for (std::size_t j = 0u; j < values.size(); ++j) - { - addArrival(*gatherer, m_ResourceMonitor, time + static_cast(j), "p", + for (std::size_t j = 0u; j < values.size(); ++j) { + addArrival(*gatherer, + m_ResourceMonitor, + time + static_cast(j), + "p", values[j] + (i == anomalousBucket ? anomaly : 0.0)); } model.sample(time, time + bucketLength, m_ResourceMonitor); CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); SAnnotatedProbability annotatedProbability; - if (model.computeProbability(0/*pid*/, time, time + bucketLength, - partitioningFields, 1, annotatedProbability) == false) - { + if (model.computeProbability(0 /*pid*/, time, time + bucketLength, partitioningFields, 1, annotatedProbability) == false) { continue; } LOG_DEBUG("probability = " << annotatedProbability.s_Probability); - if (*model.currentBucketCount(0, time) > 0) - { + if (*model.currentBucketCount(0, time) > 0) { minProbabilities.add(TDoubleSizePr(annotatedProbability.s_Probability, i)); } time += bucketLength; } minProbabilities.sort(); - LOG_DEBUG("minProbabilities = " - << core::CContainerPrinter::print(minProbabilities.begin(), - minProbabilities.end())); + LOG_DEBUG("minProbabilities = " << core::CContainerPrinter::print(minProbabilities.begin(), minProbabilities.end())); CPPUNIT_ASSERT_EQUAL(anomalousBucket, minProbabilities[0].second); CPPUNIT_ASSERT(minProbabilities[0].first / minProbabilities[1].first < 0.05); } -void CMetricModelTest::testProbabilityCalculationForMedian() -{ +void CMetricModelTest::testProbabilityCalculationForMedian() { LOG_DEBUG("*** testProbabilityCalculationForMedian ***"); using TMinAccumulator = maths::CBasicStatistics::COrderStatisticsHeap; @@ -828,7 +728,7 @@ void CMetricModelTest::testProbabilityCalculationForMedian() SModelParams params(bucketLength); CMetricModelFactory factory(params); - std::size_t bucketCounts[] = { 5, 6, 3, 5, 0, 7, 8, 5, 4, 3, 5, 5, 6 }; + std::size_t bucketCounts[] = {5, 6, 3, 5, 0, 7, 8, 5, 4, 3, 5, 5, 6}; double mean = 5.0; double variance = 2.0; @@ -838,7 +738,7 @@ void CMetricModelTest::testProbabilityCalculationForMedian() CModelFactory::TDataGathererPtr gatherer; CAnomalyDetectorModel::TModelPtr model_; makeModel(factory, features, startTime, bucketLength, gatherer, model_); - CMetricModel &model = static_cast(*model_.get()); + CMetricModel& model = static_cast(*model_.get()); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p", gatherer, m_ResourceMonitor)); TMinAccumulator minProbabilities(2u); @@ -846,69 +746,53 @@ void CMetricModelTest::testProbabilityCalculationForMedian() test::CRandomNumbers rng; core_t::TTime time = startTime; - for (std::size_t i = 0u; i < boost::size(bucketCounts); ++i) - { + for (std::size_t i = 0u; i < boost::size(bucketCounts); ++i) { LOG_DEBUG("i = " << i << ", anomalousBucket = " << anomalousBucket); TDoubleVec values; - if (i == anomalousBucket) - { + if (i == anomalousBucket) { values.push_back(0.0); values.push_back(mean * 3.0); values.push_back(mean * 3.0); - } - else - { + } else { rng.generateNormalSamples(mean, variance, bucketCounts[i], values); } LOG_DEBUG("values = " << core::CContainerPrinter::print(values)); - for (std::size_t j = 0u; j < values.size(); ++j) - { + for (std::size_t j = 0u; j < values.size(); ++j) { addArrival(*gatherer, m_ResourceMonitor, time + static_cast(j), "p", values[j]); } - model.sample(time, time + bucketLength, m_ResourceMonitor); CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); SAnnotatedProbability annotatedProbability; - if (model.computeProbability(0/*pid*/, time, time + bucketLength, - partitioningFields, 1, annotatedProbability) == false) - { + if (model.computeProbability(0 /*pid*/, time, time + bucketLength, partitioningFields, 1, annotatedProbability) == false) { continue; } LOG_DEBUG("probability = " << annotatedProbability.s_Probability); - if (*model.currentBucketCount(0, time) > 0) - { + if (*model.currentBucketCount(0, time) > 0) { minProbabilities.add(TDoubleSizePr(annotatedProbability.s_Probability, i)); } time += bucketLength; } minProbabilities.sort(); - LOG_DEBUG("minProbabilities = " - << core::CContainerPrinter::print(minProbabilities.begin(), - minProbabilities.end())); + LOG_DEBUG("minProbabilities = " << core::CContainerPrinter::print(minProbabilities.begin(), minProbabilities.end())); CPPUNIT_ASSERT_EQUAL(anomalousBucket, minProbabilities[0].second); CPPUNIT_ASSERT(minProbabilities[0].first / minProbabilities[1].first < 0.05); - - std::size_t pid(0); - const CMetricModel::TFeatureData *fd = model.featureData( - ml::model_t::E_IndividualMedianByPerson, pid, - time - bucketLength); + const CMetricModel::TFeatureData* fd = model.featureData(ml::model_t::E_IndividualMedianByPerson, pid, time - bucketLength); // assert there is only 1 value in the last bucket and its the median CPPUNIT_ASSERT_EQUAL(fd->s_BucketValue->value()[0], mean * 3.0); CPPUNIT_ASSERT_EQUAL(fd->s_BucketValue->value().size(), std::size_t(1)); } -void CMetricModelTest::testProbabilityCalculationForLowMean() -{ +void CMetricModelTest::testProbabilityCalculationForLowMean() { LOG_DEBUG("*** testProbabilityCalculationForLowMean ***"); core_t::TTime startTime(0); @@ -930,52 +814,45 @@ void CMetricModelTest::testProbabilityCalculationForLowMean() CModelFactory::TDataGathererPtr gatherer; CAnomalyDetectorModel::TModelPtr model_; makeModel(factory, features, startTime, bucketLength, gatherer, model_); - CMetricModel &model = static_cast(*model_.get()); + CMetricModel& model = static_cast(*model_.get()); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p", gatherer, m_ResourceMonitor)); TOptionalDoubleVec probabilities; test::CRandomNumbers rng; core_t::TTime time = startTime; - for (std::size_t i = 0u; i < numberOfBuckets; ++i) - { + for (std::size_t i = 0u; i < numberOfBuckets; ++i) { double meanForBucket = mean; - if (i == lowMeanBucket) - { + if (i == lowMeanBucket) { meanForBucket = lowMean; } - if (i == highMeanBucket) - { + if (i == highMeanBucket) { meanForBucket = highMean; } TDoubleVec values; rng.generateNormalSamples(meanForBucket, variance, bucketCount, values); LOG_DEBUG("values = " << core::CContainerPrinter::print(values)); - for (std::size_t j = 0u; j < values.size(); ++j) - { + for (std::size_t j = 0u; j < values.size(); ++j) { addArrival(*gatherer, m_ResourceMonitor, time + static_cast(j), "p", values[j]); } model.sample(time, time + bucketLength, m_ResourceMonitor); CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); SAnnotatedProbability annotatedProbability; - CPPUNIT_ASSERT(model.computeProbability(0/*pid*/, time, time + bucketLength, - partitioningFields, 1, annotatedProbability)); + CPPUNIT_ASSERT(model.computeProbability(0 /*pid*/, time, time + bucketLength, partitioningFields, 1, annotatedProbability)); LOG_DEBUG("probability = " << annotatedProbability.s_Probability); probabilities.push_back(annotatedProbability.s_Probability); time += bucketLength; } - LOG_DEBUG("probabilities = " << core::CContainerPrinter::print(probabilities.begin(), - probabilities.end())); + LOG_DEBUG("probabilities = " << core::CContainerPrinter::print(probabilities.begin(), probabilities.end())); CPPUNIT_ASSERT(probabilities[lowMeanBucket] < 0.01); CPPUNIT_ASSERT(probabilities[highMeanBucket] > 0.1); } -void CMetricModelTest::testProbabilityCalculationForHighMean() -{ +void CMetricModelTest::testProbabilityCalculationForHighMean() { LOG_DEBUG("*** testProbabilityCalculationForHighMean ***"); core_t::TTime startTime(0); @@ -997,37 +874,32 @@ void CMetricModelTest::testProbabilityCalculationForHighMean() CModelFactory::TDataGathererPtr gatherer; CAnomalyDetectorModel::TModelPtr model_; makeModel(factory, features, startTime, bucketLength, gatherer, model_); - CMetricModel &model = static_cast(*model_.get()); + CMetricModel& model = static_cast(*model_.get()); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p", gatherer, m_ResourceMonitor)); TOptionalDoubleVec probabilities; test::CRandomNumbers rng; core_t::TTime time = startTime; - for (std::size_t i = 0u; i < numberOfBuckets; ++i) - { + for (std::size_t i = 0u; i < numberOfBuckets; ++i) { double meanForBucket = mean; - if (i == lowMeanBucket) - { + if (i == lowMeanBucket) { meanForBucket = lowMean; } - if (i == highMeanBucket) - { + if (i == highMeanBucket) { meanForBucket = highMean; } TDoubleVec values; rng.generateNormalSamples(meanForBucket, variance, bucketCount, values); LOG_DEBUG("values = " << core::CContainerPrinter::print(values)); - for (std::size_t j = 0u; j < values.size(); ++j) - { + for (std::size_t j = 0u; j < values.size(); ++j) { addArrival(*gatherer, m_ResourceMonitor, time + static_cast(j), "p", values[j]); } model.sample(time, time + bucketLength, m_ResourceMonitor); CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); SAnnotatedProbability annotatedProbability; - CPPUNIT_ASSERT(model.computeProbability(0/*pid*/, time, time + bucketLength, - partitioningFields, 1, annotatedProbability)); + CPPUNIT_ASSERT(model.computeProbability(0 /*pid*/, time, time + bucketLength, partitioningFields, 1, annotatedProbability)); LOG_DEBUG("probability = " << annotatedProbability.s_Probability); probabilities.push_back(annotatedProbability.s_Probability); @@ -1040,8 +912,7 @@ void CMetricModelTest::testProbabilityCalculationForHighMean() CPPUNIT_ASSERT(probabilities[highMeanBucket] < 0.01); } -void CMetricModelTest::testProbabilityCalculationForLowSum() -{ +void CMetricModelTest::testProbabilityCalculationForLowSum() { LOG_DEBUG("*** testProbabilityCalculationForLowSum ***"); core_t::TTime startTime(0); @@ -1063,37 +934,32 @@ void CMetricModelTest::testProbabilityCalculationForLowSum() CModelFactory::TDataGathererPtr gatherer; CAnomalyDetectorModel::TModelPtr model_; makeModel(factory, features, startTime, bucketLength, gatherer, model_); - CMetricModel &model = static_cast(*model_.get()); + CMetricModel& model = static_cast(*model_.get()); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p", gatherer, m_ResourceMonitor)); TOptionalDoubleVec probabilities; test::CRandomNumbers rng; core_t::TTime time = startTime; - for (std::size_t i = 0u; i < numberOfBuckets; ++i) - { + for (std::size_t i = 0u; i < numberOfBuckets; ++i) { double meanForBucket = mean; - if (i == lowSumBucket) - { + if (i == lowSumBucket) { meanForBucket = lowMean; } - if (i == highSumBucket) - { + if (i == highSumBucket) { meanForBucket = highMean; } TDoubleVec values; rng.generateNormalSamples(meanForBucket, variance, bucketCount, values); LOG_DEBUG("values = " << core::CContainerPrinter::print(values)); - for (std::size_t j = 0u; j < values.size(); ++j) - { + for (std::size_t j = 0u; j < values.size(); ++j) { addArrival(*gatherer, m_ResourceMonitor, time + static_cast(j), "p", values[j]); } model.sample(time, time + bucketLength, m_ResourceMonitor); CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); SAnnotatedProbability annotatedProbability; - CPPUNIT_ASSERT(model.computeProbability(0/*pid*/, time, time + bucketLength, - partitioningFields, 1, annotatedProbability)); + CPPUNIT_ASSERT(model.computeProbability(0 /*pid*/, time, time + bucketLength, partitioningFields, 1, annotatedProbability)); LOG_DEBUG("probability = " << annotatedProbability.s_Probability); probabilities.push_back(annotatedProbability.s_Probability); @@ -1105,8 +971,7 @@ void CMetricModelTest::testProbabilityCalculationForLowSum() CPPUNIT_ASSERT(probabilities[highSumBucket] > 0.1); } -void CMetricModelTest::testProbabilityCalculationForHighSum() -{ +void CMetricModelTest::testProbabilityCalculationForHighSum() { LOG_DEBUG("*** testProbabilityCalculationForLowSum ***"); core_t::TTime startTime(0); @@ -1128,37 +993,32 @@ void CMetricModelTest::testProbabilityCalculationForHighSum() CModelFactory::TDataGathererPtr gatherer; CAnomalyDetectorModel::TModelPtr model_; makeModel(factory, features, startTime, bucketLength, gatherer, model_); - CMetricModel &model = static_cast(*model_.get()); + CMetricModel& model = static_cast(*model_.get()); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p", gatherer, m_ResourceMonitor)); TOptionalDoubleVec probabilities; test::CRandomNumbers rng; core_t::TTime time = startTime; - for (std::size_t i = 0u; i < numberOfBuckets; ++i) - { + for (std::size_t i = 0u; i < numberOfBuckets; ++i) { double meanForBucket = mean; - if (i == lowSumBucket) - { + if (i == lowSumBucket) { meanForBucket = lowMean; } - if (i == highSumBucket) - { + if (i == highSumBucket) { meanForBucket = highMean; } TDoubleVec values; rng.generateNormalSamples(meanForBucket, variance, bucketCount, values); LOG_DEBUG("values = " << core::CContainerPrinter::print(values)); - for (std::size_t j = 0u; j < values.size(); ++j) - { + for (std::size_t j = 0u; j < values.size(); ++j) { addArrival(*gatherer, m_ResourceMonitor, time + static_cast(j), "p", values[j]); } model.sample(time, time + bucketLength, m_ResourceMonitor); CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); SAnnotatedProbability annotatedProbability; - CPPUNIT_ASSERT(model.computeProbability(0/*pid*/, time, time + bucketLength, - partitioningFields, 1, annotatedProbability)); + CPPUNIT_ASSERT(model.computeProbability(0 /*pid*/, time, time + bucketLength, partitioningFields, 1, annotatedProbability)); LOG_DEBUG("probability = " << annotatedProbability.s_Probability); probabilities.push_back(annotatedProbability.s_Probability); @@ -1170,15 +1030,13 @@ void CMetricModelTest::testProbabilityCalculationForHighSum() CPPUNIT_ASSERT(probabilities[highSumBucket] < 0.01); } -void CMetricModelTest::testProbabilityCalculationForLatLong() -{ +void CMetricModelTest::testProbabilityCalculationForLatLong() { LOG_DEBUG("*** testProbabilityCalculationForLatLong ***"); // TODO } -void CMetricModelTest::testInfluence() -{ +void CMetricModelTest::testInfluence() { LOG_DEBUG("*** testInfluence ***"); using TMinAccumulator = maths::CBasicStatistics::SMin::TAccumulator; @@ -1189,8 +1047,7 @@ void CMetricModelTest::testInfluence() LOG_DEBUG("Test min and max influence"); - for (auto feature : {model_t::E_IndividualMinByPerson, model_t::E_IndividualMaxByPerson}) - { + for (auto feature : {model_t::E_IndividualMinByPerson, model_t::E_IndividualMaxByPerson}) { core_t::TTime startTime{0}; core_t::TTime bucketLength{10}; std::size_t numberOfBuckets{50}; @@ -1198,7 +1055,7 @@ void CMetricModelTest::testInfluence() double mean{5.0}; double variance{1.0}; std::string influencer{"I"}; - TStrVec influencerValues{ "i1", "i2", "i3", "i4", "i5" }; + TStrVec influencerValues{"i1", "i2", "i3", "i4", "i5"}; SModelParams params(bucketLength); CMetricModelFactory factory(params); @@ -1213,19 +1070,17 @@ void CMetricModelTest::testInfluence() CAnomalyDetectorModel::TModelPtr model_(factory.makeModel(initData)); CPPUNIT_ASSERT(model_); CPPUNIT_ASSERT_EQUAL(model_t::E_MetricOnline, model_->category()); - CMetricModel &model = static_cast(*model_.get()); + CMetricModel& model = static_cast(*model_.get()); test::CRandomNumbers rng; core_t::TTime time = startTime; - for (std::size_t i = 0u; i < numberOfBuckets; ++i, time += bucketLength) - { + for (std::size_t i = 0u; i < numberOfBuckets; ++i, time += bucketLength) { TDoubleVec samples; rng.generateNormalSamples(mean, variance, bucketCount, samples); TMinAccumulator min; TMaxAccumulator max; - for (std::size_t j = 0u; j < samples.size(); ++j) - { + for (std::size_t j = 0u; j < samples.size(); ++j) { addArrival(*gatherer, m_ResourceMonitor, time, "p", samples[j], TOptionalStr(influencerValues[j])); min.add(TDoubleStrPr(samples[j], influencerValues[j])); max.add(TDoubleStrPr(samples[j], influencerValues[j])); @@ -1235,25 +1090,20 @@ void CMetricModelTest::testInfluence() CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); SAnnotatedProbability annotatedProbability; - model.computeProbability(0/*pid*/, time, time + bucketLength, - partitioningFields, 1, annotatedProbability); + model.computeProbability(0 /*pid*/, time, time + bucketLength, partitioningFields, 1, annotatedProbability); LOG_DEBUG("influences = " << core::CContainerPrinter::print(annotatedProbability.s_Influences)); - if (!annotatedProbability.s_Influences.empty()) - { + if (!annotatedProbability.s_Influences.empty()) { std::size_t j = 0u; - for (/**/; j < annotatedProbability.s_Influences.size(); ++j) - { - if ( feature == model_t::E_IndividualMinByPerson - && *annotatedProbability.s_Influences[j].first.second == min[0].second - && std::fabs(annotatedProbability.s_Influences[j].second - 1.0) < 1e-10) - { + for (/**/; j < annotatedProbability.s_Influences.size(); ++j) { + if (feature == model_t::E_IndividualMinByPerson && + *annotatedProbability.s_Influences[j].first.second == min[0].second && + std::fabs(annotatedProbability.s_Influences[j].second - 1.0) < 1e-10) { break; } - if ( feature == model_t::E_IndividualMaxByPerson - && *annotatedProbability.s_Influences[j].first.second == max[0].second - && std::fabs(annotatedProbability.s_Influences[j].second - 1.0) < 1e-10) - { + if (feature == model_t::E_IndividualMaxByPerson && + *annotatedProbability.s_Influences[j].first.second == max[0].second && + std::fabs(annotatedProbability.s_Influences[j].second - 1.0) < 1e-10) { break; } } @@ -1263,183 +1113,172 @@ void CMetricModelTest::testInfluence() } auto testFeature = [this](model_t::EFeature feature, - const TDoubleVecVec &values, - const TStrVecVec &influencers, - const TStrDoubleDoubleTrVecVec &influences) - { - core_t::TTime startTime{0}; - core_t::TTime bucketLength{10}; - - SModelParams params(bucketLength); - CMetricModelFactory factory(params); - CDataGatherer::TFeatureVec features{feature}; - factory.features(features); - factory.bucketLength(bucketLength); - factory.fieldNames("", "", "P", "V", TStrVec(1, "I")); - CModelFactory::SGathererInitializationData gathererInitData(startTime); - CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(gathererInitData)); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p", gatherer, m_ResourceMonitor)); - CModelFactory::SModelInitializationData initData(gatherer); - CAnomalyDetectorModel::TModelPtr model_(factory.makeModel(initData)); - CPPUNIT_ASSERT(model_); - CPPUNIT_ASSERT_EQUAL(model_t::E_MetricOnline, model_->category()); - CMetricModel &model = static_cast(*model_.get()); + const TDoubleVecVec& values, + const TStrVecVec& influencers, + const TStrDoubleDoubleTrVecVec& influences) { + core_t::TTime startTime{0}; + core_t::TTime bucketLength{10}; - SAnnotatedProbability annotatedProbability; + SModelParams params(bucketLength); + CMetricModelFactory factory(params); + CDataGatherer::TFeatureVec features{feature}; + factory.features(features); + factory.bucketLength(bucketLength); + factory.fieldNames("", "", "P", "V", TStrVec(1, "I")); + CModelFactory::SGathererInitializationData gathererInitData(startTime); + CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(gathererInitData)); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p", gatherer, m_ResourceMonitor)); + CModelFactory::SModelInitializationData initData(gatherer); + CAnomalyDetectorModel::TModelPtr model_(factory.makeModel(initData)); + CPPUNIT_ASSERT(model_); + CPPUNIT_ASSERT_EQUAL(model_t::E_MetricOnline, model_->category()); + CMetricModel& model = static_cast(*model_.get()); - core_t::TTime time{startTime}; - for (std::size_t i = 0u; i < values.size(); ++i) - { - processBucket(time, bucketLength, values[i], influencers[i], - *gatherer, m_ResourceMonitor, model, annotatedProbability); - CPPUNIT_ASSERT_EQUAL(influences[i].size(), annotatedProbability.s_Influences.size()); - if (influences[i].size() > 0) - { - for (const auto &expected : influences[i]) - { - bool found{false}; - for (const auto &actual : annotatedProbability.s_Influences) - { - if (expected.first == *actual.first.second) - { - CPPUNIT_ASSERT( actual.second >= expected.second - && actual.second <= expected.third); - found = true; - break; - } + SAnnotatedProbability annotatedProbability; + + core_t::TTime time{startTime}; + for (std::size_t i = 0u; i < values.size(); ++i) { + processBucket(time, bucketLength, values[i], influencers[i], *gatherer, m_ResourceMonitor, model, annotatedProbability); + CPPUNIT_ASSERT_EQUAL(influences[i].size(), annotatedProbability.s_Influences.size()); + if (influences[i].size() > 0) { + for (const auto& expected : influences[i]) { + bool found{false}; + for (const auto& actual : annotatedProbability.s_Influences) { + if (expected.first == *actual.first.second) { + CPPUNIT_ASSERT(actual.second >= expected.second && actual.second <= expected.third); + found = true; + break; } - CPPUNIT_ASSERT(found); } + CPPUNIT_ASSERT(found); } - time += bucketLength; } - }; + time += bucketLength; + } + }; LOG_DEBUG("Test mean"); { - TDoubleVecVec values{ { 1.0, 2.3, 2.1 }, - { 8.0 }, - { 4.3, 5.2, 3.4 }, - { 3.2, 3.9 }, - { 20.1, 2.8, 3.9 }, - { 12.1, 4.2, 5.7, 3.2 }, - { 0.1, 0.3, 5.4 }, - { 40.5, 7.3 }, - { 6.4, 7.0, 7.1, 6.6, 7.1, 6.7 }, - { 0.3 } }; - TStrVecVec influencers{ { "i1", "i1", "i2" }, - { "i1" }, - { "i1", "i1", "i1" }, - { "i3", "i3" }, - { "i2", "i1", "i1" }, - { "i1", "i2", "i2", "i2" }, - { "i1", "i1", "i3" }, - { "i1", "i2" }, - { "i1", "i2", "i3", "i4", "i5", "i6" }, - { "i2" } }; - TStrDoubleDoubleTrVecVec influences{ { }, - { }, - { }, - { }, - { }, - { }, - { core::make_triple(std::string{"i1"}, 0.9, 1.0) }, - { core::make_triple(std::string{"i1"}, 0.8, 0.9) }, - { }, - { core::make_triple(std::string{"i2"}, 1.0, 1.0) } }; + TDoubleVecVec values{{1.0, 2.3, 2.1}, + {8.0}, + {4.3, 5.2, 3.4}, + {3.2, 3.9}, + {20.1, 2.8, 3.9}, + {12.1, 4.2, 5.7, 3.2}, + {0.1, 0.3, 5.4}, + {40.5, 7.3}, + {6.4, 7.0, 7.1, 6.6, 7.1, 6.7}, + {0.3}}; + TStrVecVec influencers{{"i1", "i1", "i2"}, + {"i1"}, + {"i1", "i1", "i1"}, + {"i3", "i3"}, + {"i2", "i1", "i1"}, + {"i1", "i2", "i2", "i2"}, + {"i1", "i1", "i3"}, + {"i1", "i2"}, + {"i1", "i2", "i3", "i4", "i5", "i6"}, + {"i2"}}; + TStrDoubleDoubleTrVecVec influences{{}, + {}, + {}, + {}, + {}, + {}, + {core::make_triple(std::string{"i1"}, 0.9, 1.0)}, + {core::make_triple(std::string{"i1"}, 0.8, 0.9)}, + {}, + {core::make_triple(std::string{"i2"}, 1.0, 1.0)}}; testFeature(model_t::E_IndividualMeanByPerson, values, influencers, influences); } LOG_DEBUG("Test sum"); { - TDoubleVecVec values{ { 1.0, 2.3, 2.1, 5.9 }, - { 10.0 }, - { 4.3, 5.2, 3.4, 6.2, 7.8 }, - { 3.2, 3.9 }, - { 20.1, 2.8, 3.9 }, - { 12.1, 4.2, 5.7, 3.2 }, - { 0.1, 0.3, 5.4 }, - { 48.1, 10.1 }, - { 6.8, 7.2, 7.3, 6.8, 7.3, 6.9 }, - { 0.4 } }; - TStrVecVec influencers{ { "i1", "i1", "i2", "i2" }, - { "i1" }, - { "i1", "i1", "i1", "i1", "i3" }, - { "i3", "i3" }, - { "i2", "i1", "i1" }, - { "i1", "i2", "i2", "i2" }, - { "i1", "i1", "i3" }, - { "i1", "i2" }, - { "i1", "i2", "i3", "i4", "i5", "i6" }, - { "i2" } }; - TStrDoubleDoubleTrVecVec influences{ { }, - { }, - { }, - { }, - { core::make_triple(std::string{"i1"}, 0.6, 0.7), - core::make_triple(std::string{"i2"}, 0.9, 1.0) }, - { core::make_triple(std::string{"i1"}, 0.9, 1.0), - core::make_triple(std::string{"i2"}, 0.9, 1.0) }, - { }, - { core::make_triple(std::string{"i1"}, 1.0, 1.0) }, - { }, - { core::make_triple(std::string{"i2"}, 1.0, 1.0) } }; + TDoubleVecVec values{{1.0, 2.3, 2.1, 5.9}, + {10.0}, + {4.3, 5.2, 3.4, 6.2, 7.8}, + {3.2, 3.9}, + {20.1, 2.8, 3.9}, + {12.1, 4.2, 5.7, 3.2}, + {0.1, 0.3, 5.4}, + {48.1, 10.1}, + {6.8, 7.2, 7.3, 6.8, 7.3, 6.9}, + {0.4}}; + TStrVecVec influencers{{"i1", "i1", "i2", "i2"}, + {"i1"}, + {"i1", "i1", "i1", "i1", "i3"}, + {"i3", "i3"}, + {"i2", "i1", "i1"}, + {"i1", "i2", "i2", "i2"}, + {"i1", "i1", "i3"}, + {"i1", "i2"}, + {"i1", "i2", "i3", "i4", "i5", "i6"}, + {"i2"}}; + TStrDoubleDoubleTrVecVec influences{ + {}, + {}, + {}, + {}, + {core::make_triple(std::string{"i1"}, 0.6, 0.7), core::make_triple(std::string{"i2"}, 0.9, 1.0)}, + {core::make_triple(std::string{"i1"}, 0.9, 1.0), core::make_triple(std::string{"i2"}, 0.9, 1.0)}, + {}, + {core::make_triple(std::string{"i1"}, 1.0, 1.0)}, + {}, + {core::make_triple(std::string{"i2"}, 1.0, 1.0)}}; testFeature(model_t::E_IndividualSumByBucketAndPerson, values, influencers, influences); } LOG_DEBUG("Test varp"); { - TDoubleVecVec values{ { 1.0, 2.3, 2.1, 5.9 }, - { 10.0 }, - { 4.3, 5.2, 3.4, 6.2, 7.8 }, - { 3.2, 4.9 }, - { 3.3, 3.2, 2.4, 4.2, 6.8 }, - { 3.2, 5.9 }, - { 20.5, 12.3 }, - { 12.1, 4.2, 5.7, 3.2 }, - { 0.1, 0.3, 0.2 }, - { 10.1, 12.8, 3.9 }, - { 7.0, 7.0, 7.1, 6.8, 37.1, 6.7 }, - { 0.3 } }; - TStrVecVec influencers{ { "i1", "i1", "i2", "i2" }, - { "i1" }, - { "i1", "i1", "i1", "i1", "i3" }, - { "i3", "i3" }, - { "i1", "i1", "i1", "i1", "i3" }, - { "i3", "i3" }, - { "i1", "i2" }, - { "i1", "i2", "i2", "i2" }, - { "i1", "i1", "i3" }, - { "i2", "i1", "i1" }, - { "i1", "i2", "i3", "i4", "i5", "i6" }, - { "i2" } }; - TStrDoubleDoubleTrVecVec influences{ { }, - { }, - { }, - { }, - { }, - { }, - { }, - { }, - { core::make_triple(std::string{"i1"}, 0.9, 1.0), - core::make_triple(std::string{"i3"}, 0.9, 1.0) }, - { core::make_triple(std::string{"i1"}, 0.9, 1.0) }, - { core::make_triple(std::string{"i5"}, 0.9, 1.0) }, - { } }; + TDoubleVecVec values{{1.0, 2.3, 2.1, 5.9}, + {10.0}, + {4.3, 5.2, 3.4, 6.2, 7.8}, + {3.2, 4.9}, + {3.3, 3.2, 2.4, 4.2, 6.8}, + {3.2, 5.9}, + {20.5, 12.3}, + {12.1, 4.2, 5.7, 3.2}, + {0.1, 0.3, 0.2}, + {10.1, 12.8, 3.9}, + {7.0, 7.0, 7.1, 6.8, 37.1, 6.7}, + {0.3}}; + TStrVecVec influencers{{"i1", "i1", "i2", "i2"}, + {"i1"}, + {"i1", "i1", "i1", "i1", "i3"}, + {"i3", "i3"}, + {"i1", "i1", "i1", "i1", "i3"}, + {"i3", "i3"}, + {"i1", "i2"}, + {"i1", "i2", "i2", "i2"}, + {"i1", "i1", "i3"}, + {"i2", "i1", "i1"}, + {"i1", "i2", "i3", "i4", "i5", "i6"}, + {"i2"}}; + TStrDoubleDoubleTrVecVec influences{ + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {core::make_triple(std::string{"i1"}, 0.9, 1.0), core::make_triple(std::string{"i3"}, 0.9, 1.0)}, + {core::make_triple(std::string{"i1"}, 0.9, 1.0)}, + {core::make_triple(std::string{"i5"}, 0.9, 1.0)}, + {}}; testFeature(model_t::E_IndividualVarianceByPerson, values, influencers, influences); } } -void CMetricModelTest::testLatLongInfluence() -{ +void CMetricModelTest::testLatLongInfluence() { LOG_DEBUG("*** testLatLongInfluence ***"); // TODO } -void CMetricModelTest::testPrune() -{ +void CMetricModelTest::testPrune() { LOG_DEBUG("*** testPrune ***"); maths::CSampling::CScopeMockRandomNumberGenerator scopeMockRng; @@ -1452,17 +1291,14 @@ void CMetricModelTest::testPrune() const core_t::TTime startTime = 1346968800; const core_t::TTime bucketLength = 3600; - const std::string people[] = - { - std::string("p1"), - std::string("p2"), - std::string("p3"), - std::string("p4"), - std::string("p5"), - std::string("p6"), - std::string("p7"), - std::string("p8") - }; + const std::string people[] = {std::string("p1"), + std::string("p2"), + std::string("p3"), + std::string("p4"), + std::string("p5"), + std::string("p6"), + std::string("p7"), + std::string("p8")}; TSizeVecVec eventCounts; eventCounts.push_back(TSizeVec(1000u, 0)); @@ -1492,7 +1328,7 @@ void CMetricModelTest::testPrune() eventCounts[7][8] = 9; eventCounts[7][15] = 12; - const std::size_t expectedPeople[] = { 1, 4, 5 }; + const std::size_t expectedPeople[] = {1, 4, 5}; SModelParams params(bucketLength); params.s_DecayRate = 0.01; @@ -1506,32 +1342,26 @@ void CMetricModelTest::testPrune() CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(gathererInitData)); CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr model_(factory.makeModel(modelInitData)); - CMetricModel *model = dynamic_cast(model_.get()); + CMetricModel* model = dynamic_cast(model_.get()); CPPUNIT_ASSERT(model); CModelFactory::TDataGathererPtr expectedGatherer(factory.makeDataGatherer(gathererInitData)); CModelFactory::SModelInitializationData expectedModelInitData(expectedGatherer); CAnomalyDetectorModel::TModelPtr expectedModelHolder(factory.makeModel(expectedModelInitData)); - CMetricModel *expectedModel = dynamic_cast(expectedModelHolder.get()); + CMetricModel* expectedModel = dynamic_cast(expectedModelHolder.get()); CPPUNIT_ASSERT(expectedModel); test::CRandomNumbers rng; TEventDataVec events; core_t::TTime bucketStart = startTime; - for (std::size_t i = 0u; i < eventCounts.size(); ++i, bucketStart = startTime) - { - for (std::size_t j = 0u; j < eventCounts[i].size(); ++j, bucketStart += bucketLength) - { + for (std::size_t i = 0u; i < eventCounts.size(); ++i, bucketStart = startTime) { + for (std::size_t j = 0u; j < eventCounts[i].size(); ++j, bucketStart += bucketLength) { core_t::TTime n = static_cast(eventCounts[i][j]); - if (n > 0) - { + if (n > 0) { TDoubleVec samples; rng.generateUniformSamples(0.0, 5.0, static_cast(n), samples); - for (core_t::TTime k = 0, time = bucketStart, dt = bucketLength / n; - k < n; - ++k, time += dt) - { + for (core_t::TTime k = 0, time = bucketStart, dt = bucketLength / n; k < n; ++k, time += dt) { std::size_t pid = addPerson(people[i], gatherer, m_ResourceMonitor); events.push_back(makeEventData(time, pid, samples[static_cast(k)])); } @@ -1543,36 +1373,24 @@ void CMetricModelTest::testPrune() TEventDataVec expectedEvents; expectedEvents.reserve(events.size()); TSizeSizeMap mapping; - for (std::size_t i = 0u; i < boost::size(expectedPeople); ++i) - { + for (std::size_t i = 0u; i < boost::size(expectedPeople); ++i) { std::size_t pid = addPerson(people[expectedPeople[i]], expectedGatherer, m_ResourceMonitor); mapping[expectedPeople[i]] = pid; } - for (std::size_t i = 0u; i < events.size(); ++i) - { - if (std::binary_search(boost::begin(expectedPeople), - boost::end(expectedPeople), - events[i].personId())) - { - expectedEvents.push_back(makeEventData(events[i].time(), - mapping[*events[i].personId()], - events[i].values()[0][0])); + for (std::size_t i = 0u; i < events.size(); ++i) { + if (std::binary_search(boost::begin(expectedPeople), boost::end(expectedPeople), events[i].personId())) { + expectedEvents.push_back(makeEventData(events[i].time(), mapping[*events[i].personId()], events[i].values()[0][0])); } } bucketStart = startTime; - for (std::size_t i = 0u; i < events.size(); ++i) - { - if (events[i].time() >= bucketStart + bucketLength) - { + for (std::size_t i = 0u; i < events.size(); ++i) { + if (events[i].time() >= bucketStart + bucketLength) { model->sample(bucketStart, bucketStart + bucketLength, m_ResourceMonitor); bucketStart += bucketLength; } - addArrival(*gatherer, - m_ResourceMonitor, - events[i].time(), - gatherer->personName(events[i].personId().get()), - events[i].values()[0][0]); + addArrival( + *gatherer, m_ResourceMonitor, events[i].time(), gatherer->personName(events[i].personId().get()), events[i].values()[0][0]); } model->sample(bucketStart, bucketStart + bucketLength, m_ResourceMonitor); size_t maxDimensionBeforePrune(model->dataGatherer().maxDimension()); @@ -1581,10 +1399,8 @@ void CMetricModelTest::testPrune() CPPUNIT_ASSERT_EQUAL(maxDimensionBeforePrune, maxDimensionAfterPrune); bucketStart = startTime; - for (std::size_t i = 0u; i < expectedEvents.size(); ++i) - { - if (expectedEvents[i].time() >= bucketStart + bucketLength) - { + for (std::size_t i = 0u; i < expectedEvents.size(); ++i) { + if (expectedEvents[i].time() >= bucketStart + bucketLength) { expectedModel->sample(bucketStart, bucketStart + bucketLength, m_ResourceMonitor); bucketStart += bucketLength; } @@ -1605,8 +1421,7 @@ void CMetricModelTest::testPrune() bucketStart = gatherer->currentBucketStartTime() + bucketLength; std::string newPersons[] = {"p9", "p10", "p11", "p12", "13"}; - for (std::size_t i = 0u; i < boost::size(newPersons); ++i) - { + for (std::size_t i = 0u; i < boost::size(newPersons); ++i) { std::size_t newPid = addPerson(newPersons[i], gatherer, m_ResourceMonitor); CPPUNIT_ASSERT(newPid < 8); @@ -1632,39 +1447,25 @@ void CMetricModelTest::testPrune() CPPUNIT_ASSERT_EQUAL(numberOfPeopleBeforePrune, clonedModelHolder->dataGatherer().numberActivePeople()); } -void CMetricModelTest::testKey() -{ - function_t::EFunction countFunctions[] = - { - function_t::E_IndividualMetric, - function_t::E_IndividualMetricMean, - function_t::E_IndividualMetricMin, - function_t::E_IndividualMetricMax, - function_t::E_IndividualMetricSum - }; - bool useNull[] = { true, false }; - std::string byField[] = { "", "by" }; - std::string partitionField[] = { "", "partition" }; +void CMetricModelTest::testKey() { + function_t::EFunction countFunctions[] = {function_t::E_IndividualMetric, + function_t::E_IndividualMetricMean, + function_t::E_IndividualMetricMin, + function_t::E_IndividualMetricMax, + function_t::E_IndividualMetricSum}; + bool useNull[] = {true, false}; + std::string byField[] = {"", "by"}; + std::string partitionField[] = {"", "partition"}; CAnomalyDetectorModelConfig config = CAnomalyDetectorModelConfig::defaultConfig(); int identifier = 0; - for (std::size_t i = 0u; i < boost::size(countFunctions); ++i) - { - for (std::size_t j = 0u; j < boost::size(useNull); ++j) - { - for (std::size_t k = 0u; k < boost::size(byField); ++k) - { - for (std::size_t l = 0u; l < boost::size(partitionField); ++l) - { - CSearchKey key(++identifier, - countFunctions[i], - useNull[j], - model_t::E_XF_None, - "value", - byField[k], - "", - partitionField[l]); + for (std::size_t i = 0u; i < boost::size(countFunctions); ++i) { + for (std::size_t j = 0u; j < boost::size(useNull); ++j) { + for (std::size_t k = 0u; k < boost::size(byField); ++k) { + for (std::size_t l = 0u; l < boost::size(partitionField); ++l) { + CSearchKey key( + ++identifier, countFunctions[i], useNull[j], model_t::E_XF_None, "value", byField[k], "", partitionField[l]); CAnomalyDetectorModelConfig::TModelFactoryCPtr factory = config.factory(key); @@ -1677,8 +1478,7 @@ void CMetricModelTest::testKey() } } -void CMetricModelTest::testSkipSampling() -{ +void CMetricModelTest::testSkipSampling() { LOG_DEBUG("*** testSkipSampling ***"); core_t::TTime startTime(100); @@ -1698,27 +1498,24 @@ void CMetricModelTest::testSkipSampling() CAnomalyDetectorModel::TModelPtr modelNoGapPtr(factory.makeModel(initDataNoGap)); CPPUNIT_ASSERT(modelNoGapPtr); CPPUNIT_ASSERT_EQUAL(model_t::E_MetricOnline, modelNoGapPtr->category()); - CMetricModel &modelNoGap = static_cast(*modelNoGapPtr.get()); + CMetricModel& modelNoGap = static_cast(*modelNoGapPtr.get()); { - TStrVec influencerValues1{ "i1" }; - TDoubleVec bucket1{ 1.0 }; - TDoubleVec bucket2{ 5.0 }; - TDoubleVec bucket3{ 10.0 }; + TStrVec influencerValues1{"i1"}; + TDoubleVec bucket1{1.0}; + TDoubleVec bucket2{5.0}; + TDoubleVec bucket3{10.0}; SAnnotatedProbability annotatedProbability; core_t::TTime time = startTime; - processBucket(time, bucketLength, bucket1, influencerValues1, - *gathererNoGap, m_ResourceMonitor, modelNoGap, annotatedProbability); + processBucket(time, bucketLength, bucket1, influencerValues1, *gathererNoGap, m_ResourceMonitor, modelNoGap, annotatedProbability); time += bucketLength; - processBucket(time, bucketLength, bucket2, influencerValues1, - *gathererNoGap, m_ResourceMonitor, modelNoGap, annotatedProbability); + processBucket(time, bucketLength, bucket2, influencerValues1, *gathererNoGap, m_ResourceMonitor, modelNoGap, annotatedProbability); time += bucketLength; - processBucket(time, bucketLength, bucket3, influencerValues1, - *gathererNoGap, m_ResourceMonitor, modelNoGap, annotatedProbability); + processBucket(time, bucketLength, bucket3, influencerValues1, *gathererNoGap, m_ResourceMonitor, modelNoGap, annotatedProbability); } CModelFactory::SGathererInitializationData gathererWithGapInitData(startTime); @@ -1728,43 +1525,44 @@ void CMetricModelTest::testSkipSampling() CAnomalyDetectorModel::TModelPtr modelWithGapPtr(factory.makeModel(initDataWithGap)); CPPUNIT_ASSERT(modelWithGapPtr); CPPUNIT_ASSERT_EQUAL(model_t::E_MetricOnline, modelWithGapPtr->category()); - CMetricModel &modelWithGap = static_cast(*modelWithGapPtr.get()); + CMetricModel& modelWithGap = static_cast(*modelWithGapPtr.get()); core_t::TTime gap(bucketLength * 10); { - TStrVec influencerValues1{ "i1" }; - TDoubleVec bucket1{ 1.0 }; - TDoubleVec bucket2{ 5.0 }; - TDoubleVec bucket3{ 10.0 }; + TStrVec influencerValues1{"i1"}; + TDoubleVec bucket1{1.0}; + TDoubleVec bucket2{5.0}; + TDoubleVec bucket3{10.0}; SAnnotatedProbability annotatedProbability; core_t::TTime time = startTime; - processBucket(time, bucketLength, bucket1, influencerValues1, - *gathererWithGap, m_ResourceMonitor, modelWithGap, annotatedProbability); + processBucket( + time, bucketLength, bucket1, influencerValues1, *gathererWithGap, m_ResourceMonitor, modelWithGap, annotatedProbability); time += gap; modelWithGap.skipSampling(time); LOG_DEBUG("Calling sample over skipped interval should do nothing except print some ERRORs"); modelWithGap.sample(startTime + bucketLength, time, m_ResourceMonitor); - processBucket(time, bucketLength, bucket2, influencerValues1, - *gathererWithGap, m_ResourceMonitor, modelWithGap, annotatedProbability); + processBucket( + time, bucketLength, bucket2, influencerValues1, *gathererWithGap, m_ResourceMonitor, modelWithGap, annotatedProbability); time += bucketLength; - processBucket(time, bucketLength, bucket3, influencerValues1, - *gathererWithGap, m_ResourceMonitor, modelWithGap, annotatedProbability); + processBucket( + time, bucketLength, bucket3, influencerValues1, *gathererWithGap, m_ResourceMonitor, modelWithGap, annotatedProbability); } CPPUNIT_ASSERT_EQUAL( - static_cast( - modelNoGap.details()->model(model_t::E_IndividualSumByBucketAndPerson, 0))->residualModel().checksum(), - static_cast( - modelWithGap.details()->model(model_t::E_IndividualSumByBucketAndPerson, 0))->residualModel().checksum()); + static_cast(modelNoGap.details()->model(model_t::E_IndividualSumByBucketAndPerson, 0)) + ->residualModel() + .checksum(), + static_cast(modelWithGap.details()->model(model_t::E_IndividualSumByBucketAndPerson, 0)) + ->residualModel() + .checksum()); } -void CMetricModelTest::testExplicitNulls() -{ +void CMetricModelTest::testExplicitNulls() { LOG_DEBUG("*** testExplicitNulls ***"); core_t::TTime startTime(100); @@ -1784,7 +1582,7 @@ void CMetricModelTest::testExplicitNulls() CAnomalyDetectorModel::TModelPtr modelSkipGapPtr(factory.makeModel(initDataSkipGap)); CPPUNIT_ASSERT(modelSkipGapPtr); CPPUNIT_ASSERT_EQUAL(model_t::E_MetricOnline, modelSkipGapPtr->category()); - CMetricModel &modelSkipGap = static_cast(*modelSkipGapPtr.get()); + CMetricModel& modelSkipGap = static_cast(*modelSkipGapPtr.get()); // The idea here is to compare a model that has a gap skipped against a model // that has explicit nulls for the buckets that sampling was skipped. @@ -1808,7 +1606,7 @@ void CMetricModelTest::testExplicitNulls() CAnomalyDetectorModel::TModelPtr modelExNullPtr(factory.makeModel(initDataExNull)); CPPUNIT_ASSERT(modelExNullPtr); CPPUNIT_ASSERT_EQUAL(model_t::E_MetricOnline, modelExNullPtr->category()); - CMetricModel &modelExNullGap = static_cast(*modelExNullPtr.get()); + CMetricModel& modelExNullGap = static_cast(*modelExNullPtr.get()); // p1: |(1, 42.0), ("", 42.0), (null, 42.0)|(1, 1.0)|(1, 1.0)|(null, 100.0)|(null, 100.0)|(1, 42.0)| // p2: |(1, 42.0), ("", 42.0)|(0, 0.0)|(0, 0.0)|(null, 100.0)|(null, 100.0)|(0, 0.0)| @@ -1832,14 +1630,15 @@ void CMetricModelTest::testExplicitNulls() modelExNullGap.sample(600, 700, m_ResourceMonitor); CPPUNIT_ASSERT_EQUAL( - static_cast( - modelSkipGap.details()->model(model_t::E_IndividualSumByBucketAndPerson, 0))->residualModel().checksum(), - static_cast( - modelExNullGap.details()->model(model_t::E_IndividualSumByBucketAndPerson, 0))->residualModel().checksum()); + static_cast(modelSkipGap.details()->model(model_t::E_IndividualSumByBucketAndPerson, 0)) + ->residualModel() + .checksum(), + static_cast(modelExNullGap.details()->model(model_t::E_IndividualSumByBucketAndPerson, 0)) + ->residualModel() + .checksum()); } -void CMetricModelTest::testVarp() -{ +void CMetricModelTest::testVarp() { LOG_DEBUG("*** testVarp ***"); core_t::TTime startTime(500000); @@ -1861,103 +1660,91 @@ void CMetricModelTest::testVarp() CAnomalyDetectorModel::TModelPtr model_(factory.makeModel(initData)); CPPUNIT_ASSERT(model_); CPPUNIT_ASSERT_EQUAL(model_t::E_MetricOnline, model_->category()); - CMetricModel &model = static_cast(*model_.get()); - - TDoubleVec bucket1{ 1.0, 1.1 }; - TDoubleVec bucket2{ 10.0, 10.1 }; - TDoubleVec bucket3{ 4.3, 4.45 }; - TDoubleVec bucket4{ 3.2, 3.303 }; - TDoubleVec bucket5{ 20.1, 20.8, 20.9, 20.8 }; - TDoubleVec bucket6{ 4.1, 4.2 }; - TDoubleVec bucket7{ 0.1, 0.3, 0.2, 0.4 }; - TDoubleVec bucket8{ 12.5, 12.3 }; - TDoubleVec bucket9{ 6.9, 7.0, 7.1, 6.6, 7.1, 6.7 }; - TDoubleVec bucket10{ 0.3, 0.2 }; - TDoubleVec bucket11{ 0.0 }; + CMetricModel& model = static_cast(*model_.get()); + + TDoubleVec bucket1{1.0, 1.1}; + TDoubleVec bucket2{10.0, 10.1}; + TDoubleVec bucket3{4.3, 4.45}; + TDoubleVec bucket4{3.2, 3.303}; + TDoubleVec bucket5{20.1, 20.8, 20.9, 20.8}; + TDoubleVec bucket6{4.1, 4.2}; + TDoubleVec bucket7{0.1, 0.3, 0.2, 0.4}; + TDoubleVec bucket8{12.5, 12.3}; + TDoubleVec bucket9{6.9, 7.0, 7.1, 6.6, 7.1, 6.7}; + TDoubleVec bucket10{0.3, 0.2}; + TDoubleVec bucket11{0.0}; SAnnotatedProbability annotatedProbability; SAnnotatedProbability annotatedProbability2; core_t::TTime time = startTime; - processBucket(time, bucketLength, bucket1, - *gatherer, m_ResourceMonitor, model, annotatedProbability, annotatedProbability2); + processBucket(time, bucketLength, bucket1, *gatherer, m_ResourceMonitor, model, annotatedProbability, annotatedProbability2); LOG_DEBUG("P1 " << annotatedProbability.s_Probability << ", P2 " << annotatedProbability2.s_Probability); CPPUNIT_ASSERT(annotatedProbability.s_Probability > 0.8); CPPUNIT_ASSERT(annotatedProbability2.s_Probability > 0.8); time += bucketLength; - processBucket(time, bucketLength, bucket2, - *gatherer, m_ResourceMonitor, model, annotatedProbability, annotatedProbability2); + processBucket(time, bucketLength, bucket2, *gatherer, m_ResourceMonitor, model, annotatedProbability, annotatedProbability2); LOG_DEBUG("P1 " << annotatedProbability.s_Probability << ", P2 " << annotatedProbability2.s_Probability); CPPUNIT_ASSERT(annotatedProbability.s_Probability > 0.8); CPPUNIT_ASSERT(annotatedProbability2.s_Probability > 0.8); time += bucketLength; - processBucket(time, bucketLength, bucket3, - *gatherer, m_ResourceMonitor, model, annotatedProbability, annotatedProbability2); + processBucket(time, bucketLength, bucket3, *gatherer, m_ResourceMonitor, model, annotatedProbability, annotatedProbability2); LOG_DEBUG("P1 " << annotatedProbability.s_Probability << ", P2 " << annotatedProbability2.s_Probability); CPPUNIT_ASSERT(annotatedProbability.s_Probability > 0.8); CPPUNIT_ASSERT(annotatedProbability2.s_Probability > 0.8); time += bucketLength; - processBucket(time, bucketLength, bucket4, - *gatherer, m_ResourceMonitor, model, annotatedProbability, annotatedProbability2); + processBucket(time, bucketLength, bucket4, *gatherer, m_ResourceMonitor, model, annotatedProbability, annotatedProbability2); LOG_DEBUG("P1 " << annotatedProbability.s_Probability << ", P2 " << annotatedProbability2.s_Probability); CPPUNIT_ASSERT(annotatedProbability.s_Probability > 0.8); CPPUNIT_ASSERT(annotatedProbability2.s_Probability > 0.8); time += bucketLength; - processBucket(time, bucketLength, bucket5, - *gatherer, m_ResourceMonitor, model, annotatedProbability, annotatedProbability2); + processBucket(time, bucketLength, bucket5, *gatherer, m_ResourceMonitor, model, annotatedProbability, annotatedProbability2); LOG_DEBUG("P1 " << annotatedProbability.s_Probability << ", P2 " << annotatedProbability2.s_Probability); CPPUNIT_ASSERT(annotatedProbability.s_Probability > 0.8); CPPUNIT_ASSERT(annotatedProbability2.s_Probability > 0.8); time += bucketLength; - processBucket(time, bucketLength, bucket6, - *gatherer, m_ResourceMonitor, model, annotatedProbability, annotatedProbability2); + processBucket(time, bucketLength, bucket6, *gatherer, m_ResourceMonitor, model, annotatedProbability, annotatedProbability2); LOG_DEBUG("P1 " << annotatedProbability.s_Probability << ", P2 " << annotatedProbability2.s_Probability); CPPUNIT_ASSERT(annotatedProbability.s_Probability > 0.8); CPPUNIT_ASSERT(annotatedProbability2.s_Probability > 0.8); time += bucketLength; - processBucket(time, bucketLength, bucket7, - *gatherer, m_ResourceMonitor, model, annotatedProbability, annotatedProbability2); + processBucket(time, bucketLength, bucket7, *gatherer, m_ResourceMonitor, model, annotatedProbability, annotatedProbability2); LOG_DEBUG("P1 " << annotatedProbability.s_Probability << ", P2 " << annotatedProbability2.s_Probability); CPPUNIT_ASSERT(annotatedProbability.s_Probability > 0.8); CPPUNIT_ASSERT(annotatedProbability2.s_Probability > 0.8); time += bucketLength; - processBucket(time, bucketLength, bucket8, - *gatherer, m_ResourceMonitor, model, annotatedProbability, annotatedProbability2); + processBucket(time, bucketLength, bucket8, *gatherer, m_ResourceMonitor, model, annotatedProbability, annotatedProbability2); LOG_DEBUG("P1 " << annotatedProbability.s_Probability << ", P2 " << annotatedProbability2.s_Probability); CPPUNIT_ASSERT(annotatedProbability.s_Probability > 0.5); CPPUNIT_ASSERT(annotatedProbability2.s_Probability > 0.5); time += bucketLength; - processBucket(time, bucketLength, bucket9, - *gatherer, m_ResourceMonitor, model, annotatedProbability, annotatedProbability2); + processBucket(time, bucketLength, bucket9, *gatherer, m_ResourceMonitor, model, annotatedProbability, annotatedProbability2); LOG_DEBUG("P1 " << annotatedProbability.s_Probability << ", P2 " << annotatedProbability2.s_Probability); CPPUNIT_ASSERT(annotatedProbability.s_Probability > 0.5); CPPUNIT_ASSERT(annotatedProbability2.s_Probability > 0.5); time += bucketLength; - processBucket(time, bucketLength, bucket10, - *gatherer, m_ResourceMonitor, model, annotatedProbability, annotatedProbability2); + processBucket(time, bucketLength, bucket10, *gatherer, m_ResourceMonitor, model, annotatedProbability, annotatedProbability2); LOG_DEBUG("P1 " << annotatedProbability.s_Probability << ", P2 " << annotatedProbability2.s_Probability); CPPUNIT_ASSERT(annotatedProbability.s_Probability > 0.5); CPPUNIT_ASSERT(annotatedProbability2.s_Probability > 0.5); time += bucketLength; - processBucket(time, bucketLength, bucket11, - *gatherer, m_ResourceMonitor, model, annotatedProbability, annotatedProbability2); + processBucket(time, bucketLength, bucket11, *gatherer, m_ResourceMonitor, model, annotatedProbability, annotatedProbability2); LOG_DEBUG("P1 " << annotatedProbability.s_Probability << ", P2 " << annotatedProbability2.s_Probability); CPPUNIT_ASSERT(annotatedProbability.s_Probability > 0.5); CPPUNIT_ASSERT(annotatedProbability2.s_Probability > 0.5); } -void CMetricModelTest::testInterimCorrections() -{ +void CMetricModelTest::testInterimCorrections() { LOG_DEBUG("*** testInterimCorrections ***"); core_t::TTime startTime(3600); @@ -1976,7 +1763,7 @@ void CMetricModelTest::testInterimCorrections() CAnomalyDetectorModel::TModelPtr model_(factory.makeModel(initData)); CPPUNIT_ASSERT(model_); CPPUNIT_ASSERT_EQUAL(model_t::E_MetricOnline, model_->category()); - CMetricModel &model = static_cast(*model_.get()); + CMetricModel& model = static_cast(*model_.get()); std::size_t pid1 = addPerson("p1", gatherer, m_ResourceMonitor); std::size_t pid2 = addPerson("p2", gatherer, m_ResourceMonitor); @@ -1986,34 +1773,27 @@ void CMetricModelTest::testInterimCorrections() core_t::TTime endTime(now + 2 * 24 * bucketLength); test::CRandomNumbers rng; TDoubleVec samples(3, 0.0); - while (now < endTime) - { + while (now < endTime) { rng.generateUniformSamples(50.0, 70.0, std::size_t(3), samples); - for (std::size_t i = 0; i < static_cast(samples[0] + 0.5); ++i) - { + for (std::size_t i = 0; i < static_cast(samples[0] + 0.5); ++i) { addArrival(*gatherer, m_ResourceMonitor, now, "p1", 1.0, TOptionalStr("i1")); } - for (std::size_t i = 0; i < static_cast(samples[1] + 0.5); ++i) - { + for (std::size_t i = 0; i < static_cast(samples[1] + 0.5); ++i) { addArrival(*gatherer, m_ResourceMonitor, now, "p2", 1.0, TOptionalStr("i2")); } - for (std::size_t i = 0; i < static_cast(samples[2] + 0.5); ++i) - { + for (std::size_t i = 0; i < static_cast(samples[2] + 0.5); ++i) { addArrival(*gatherer, m_ResourceMonitor, now, "p3", 1.0, TOptionalStr("i3")); } model.sample(now, now + bucketLength, m_ResourceMonitor); now += bucketLength; } - for (std::size_t i = 0; i < 35; ++i) - { + for (std::size_t i = 0; i < 35; ++i) { addArrival(*gatherer, m_ResourceMonitor, now, "p1", 1.0, TOptionalStr("i1")); } - for (std::size_t i = 0; i < 1; ++i) - { + for (std::size_t i = 0; i < 1; ++i) { addArrival(*gatherer, m_ResourceMonitor, now, "p2", 1.0, TOptionalStr("i2")); } - for (std::size_t i = 0; i < 100; ++i) - { + for (std::size_t i = 0; i < 100; ++i) { addArrival(*gatherer, m_ResourceMonitor, now, "p3", 1.0, TOptionalStr("i3")); } model.sampleBucketStatistics(now, now + bucketLength, m_ResourceMonitor); @@ -2022,23 +1802,17 @@ void CMetricModelTest::testInterimCorrections() model_t::CResultType type(model_t::CResultType::E_Unconditional | model_t::CResultType::E_Interim); SAnnotatedProbability annotatedProbability1; annotatedProbability1.s_ResultType = type; - CPPUNIT_ASSERT(model.computeProbability(pid1, now, now + bucketLength, - partitioningFields, 1, annotatedProbability1)); + CPPUNIT_ASSERT(model.computeProbability(pid1, now, now + bucketLength, partitioningFields, 1, annotatedProbability1)); SAnnotatedProbability annotatedProbability2; annotatedProbability2.s_ResultType = type; - CPPUNIT_ASSERT(model.computeProbability(pid2, now, now + bucketLength, - partitioningFields, 1, annotatedProbability2)); + CPPUNIT_ASSERT(model.computeProbability(pid2, now, now + bucketLength, partitioningFields, 1, annotatedProbability2)); SAnnotatedProbability annotatedProbability3; annotatedProbability3.s_ResultType = type; - CPPUNIT_ASSERT(model.computeProbability(pid3, now, now + bucketLength, - partitioningFields, 1, annotatedProbability3)); + CPPUNIT_ASSERT(model.computeProbability(pid3, now, now + bucketLength, partitioningFields, 1, annotatedProbability3)); - TDouble1Vec p1Baseline = model.baselineBucketMean(model_t::E_IndividualSumByBucketAndPerson, - pid1, 0, type, NO_CORRELATES, now); - TDouble1Vec p2Baseline = model.baselineBucketMean(model_t::E_IndividualSumByBucketAndPerson, - pid2, 0, type, NO_CORRELATES, now); - TDouble1Vec p3Baseline = model.baselineBucketMean(model_t::E_IndividualSumByBucketAndPerson, - pid3, 0, type, NO_CORRELATES, now); + TDouble1Vec p1Baseline = model.baselineBucketMean(model_t::E_IndividualSumByBucketAndPerson, pid1, 0, type, NO_CORRELATES, now); + TDouble1Vec p2Baseline = model.baselineBucketMean(model_t::E_IndividualSumByBucketAndPerson, pid2, 0, type, NO_CORRELATES, now); + TDouble1Vec p3Baseline = model.baselineBucketMean(model_t::E_IndividualSumByBucketAndPerson, pid3, 0, type, NO_CORRELATES, now); LOG_DEBUG("p1 probability = " << annotatedProbability1.s_Probability); LOG_DEBUG("p2 probability = " << annotatedProbability2.s_Probability); @@ -2055,8 +1829,7 @@ void CMetricModelTest::testInterimCorrections() CPPUNIT_ASSERT(p3Baseline[0] > 59.0 && p3Baseline[0] < 61.0); } -void CMetricModelTest::testInterimCorrectionsWithCorrelations() -{ +void CMetricModelTest::testInterimCorrectionsWithCorrelations() { LOG_DEBUG("*** testInterimCorrectionsWithCorrelations ***"); core_t::TTime startTime(3600); @@ -2076,7 +1849,7 @@ void CMetricModelTest::testInterimCorrectionsWithCorrelations() CAnomalyDetectorModel::TModelPtr modelPtr(factory.makeModel(initData)); CPPUNIT_ASSERT(modelPtr); CPPUNIT_ASSERT_EQUAL(model_t::E_MetricOnline, modelPtr->category()); - CMetricModel &model = static_cast(*modelPtr.get()); + CMetricModel& model = static_cast(*modelPtr.get()); std::size_t pid1 = addPerson("p1", gatherer, m_ResourceMonitor); std::size_t pid2 = addPerson("p2", gatherer, m_ResourceMonitor); @@ -2086,34 +1859,27 @@ void CMetricModelTest::testInterimCorrectionsWithCorrelations() core_t::TTime endTime(now + 2 * 24 * bucketLength); test::CRandomNumbers rng; TDoubleVec samples(1, 0.0); - while (now < endTime) - { + while (now < endTime) { rng.generateUniformSamples(80.0, 100.0, std::size_t(1), samples); - for (std::size_t i = 0; i < static_cast(samples[0] + 0.5); ++i) - { + for (std::size_t i = 0; i < static_cast(samples[0] + 0.5); ++i) { addArrival(*gatherer, m_ResourceMonitor, now, "p1", 1.0, TOptionalStr("i1")); } - for (std::size_t i = 0; i < static_cast(samples[0] + 10.5); ++i) - { + for (std::size_t i = 0; i < static_cast(samples[0] + 10.5); ++i) { addArrival(*gatherer, m_ResourceMonitor, now, "p2", 1.0, TOptionalStr("i2")); } - for (std::size_t i = 0; i < static_cast(samples[0] - 9.5); ++i) - { + for (std::size_t i = 0; i < static_cast(samples[0] - 9.5); ++i) { addArrival(*gatherer, m_ResourceMonitor, now, "p3", 1.0, TOptionalStr("i3")); } model.sample(now, now + bucketLength, m_ResourceMonitor); now += bucketLength; } - for (std::size_t i = 0; i < 9; ++i) - { + for (std::size_t i = 0; i < 9; ++i) { addArrival(*gatherer, m_ResourceMonitor, now, "p1", 1.0, TOptionalStr("i1")); } - for (std::size_t i = 0; i < 10; ++i) - { + for (std::size_t i = 0; i < 10; ++i) { addArrival(*gatherer, m_ResourceMonitor, now, "p2", 1.0, TOptionalStr("i2")); } - for (std::size_t i = 0; i < 8; ++i) - { + for (std::size_t i = 0; i < 8; ++i) { addArrival(*gatherer, m_ResourceMonitor, now, "p3", 1.0, TOptionalStr("i3")); } model.sampleBucketStatistics(now, now + bucketLength, m_ResourceMonitor); @@ -2122,29 +1888,20 @@ void CMetricModelTest::testInterimCorrectionsWithCorrelations() model_t::CResultType type(model_t::CResultType::E_Conditional | model_t::CResultType::E_Interim); SAnnotatedProbability annotatedProbability1; annotatedProbability1.s_ResultType = type; - CPPUNIT_ASSERT(model.computeProbability(pid1, now, now + bucketLength, - partitioningFields, 1, annotatedProbability1)); + CPPUNIT_ASSERT(model.computeProbability(pid1, now, now + bucketLength, partitioningFields, 1, annotatedProbability1)); SAnnotatedProbability annotatedProbability2; annotatedProbability2.s_ResultType = type; - CPPUNIT_ASSERT(model.computeProbability(pid2, now, now + bucketLength, - partitioningFields, 1, annotatedProbability2)); + CPPUNIT_ASSERT(model.computeProbability(pid2, now, now + bucketLength, partitioningFields, 1, annotatedProbability2)); SAnnotatedProbability annotatedProbability3; annotatedProbability3.s_ResultType = type; - CPPUNIT_ASSERT(model.computeProbability(pid3, now, now + bucketLength, - partitioningFields, 1, annotatedProbability3)); - - TDouble1Vec p1Baseline = model.baselineBucketMean(model_t::E_IndividualSumByBucketAndPerson, - pid1, 0, type, - annotatedProbability1.s_AttributeProbabilities[0].s_Correlated, - now); - TDouble1Vec p2Baseline = model.baselineBucketMean(model_t::E_IndividualSumByBucketAndPerson, - pid2, 0, type, - annotatedProbability2.s_AttributeProbabilities[0].s_Correlated, - now); - TDouble1Vec p3Baseline = model.baselineBucketMean(model_t::E_IndividualSumByBucketAndPerson, - pid3, 0, type, - annotatedProbability3.s_AttributeProbabilities[0].s_Correlated, - now); + CPPUNIT_ASSERT(model.computeProbability(pid3, now, now + bucketLength, partitioningFields, 1, annotatedProbability3)); + + TDouble1Vec p1Baseline = model.baselineBucketMean( + model_t::E_IndividualSumByBucketAndPerson, pid1, 0, type, annotatedProbability1.s_AttributeProbabilities[0].s_Correlated, now); + TDouble1Vec p2Baseline = model.baselineBucketMean( + model_t::E_IndividualSumByBucketAndPerson, pid2, 0, type, annotatedProbability2.s_AttributeProbabilities[0].s_Correlated, now); + TDouble1Vec p3Baseline = model.baselineBucketMean( + model_t::E_IndividualSumByBucketAndPerson, pid3, 0, type, annotatedProbability3.s_AttributeProbabilities[0].s_Correlated, now); LOG_DEBUG("p1 probability = " << annotatedProbability1.s_Probability); LOG_DEBUG("p2 probability = " << annotatedProbability2.s_Probability); @@ -2161,8 +1918,7 @@ void CMetricModelTest::testInterimCorrectionsWithCorrelations() CPPUNIT_ASSERT(p3Baseline[0] > 7.4 && p3Baseline[0] < 7.6); } -void CMetricModelTest::testCorrelatePersist() -{ +void CMetricModelTest::testCorrelatePersist() { LOG_DEBUG("*** testCorrelatePersist ***"); using TVector2 = maths::CVectorNx1; @@ -2170,18 +1926,15 @@ void CMetricModelTest::testCorrelatePersist() const core_t::TTime startTime = 0; const core_t::TTime bucketLength = 600; - const double means[] = { 10.0, 20.0 }; - const double covariances[] = { 3.0, 2.0, 2.0 }; + const double means[] = {10.0, 20.0}; + const double covariances[] = {3.0, 2.0, 2.0}; TVector2 mean(means, means + 2); TMatrix2 covariance(covariances, covariances + 3); test::CRandomNumbers rng; TDoubleVecVec samples; - rng.generateMultivariateNormalSamples(mean.toVector(), - covariance.toVectors(), - 10000, - samples); + rng.generateMultivariateNormalSamples(mean.toVector(), covariance.toVectors(), 10000, samples); SModelParams params(bucketLength); params.s_DecayRate = 0.001; @@ -2195,20 +1948,17 @@ void CMetricModelTest::testCorrelatePersist() addPerson("p1", gatherer, m_ResourceMonitor); addPerson("p2", gatherer, m_ResourceMonitor); - core_t::TTime time = startTime; + core_t::TTime time = startTime; core_t::TTime bucket = time + bucketLength; - for (std::size_t i = 0u; i < samples.size(); ++i, time += 60) - { - if (time >= bucket) - { + for (std::size_t i = 0u; i < samples.size(); ++i, time += 60) { + if (time >= bucket) { model->sample(bucket - bucketLength, bucket, m_ResourceMonitor); bucket += bucketLength; } addArrival(*gatherer, m_ResourceMonitor, time, "p1", samples[i][0]); addArrival(*gatherer, m_ResourceMonitor, time, "p2", samples[i][0]); - if ((i + 1) % 1000 == 0) - { + if ((i + 1) % 1000 == 0) { // Test persistence. (We check for idempotency.) std::string origXml; { @@ -2243,8 +1993,7 @@ void CMetricModelTest::testCorrelatePersist() } } -void CMetricModelTest::testSummaryCountZeroRecordsAreIgnored() -{ +void CMetricModelTest::testSummaryCountZeroRecordsAreIgnored() { LOG_DEBUG("*** testSummaryCountZeroRecordsAreIgnored ***"); core_t::TTime startTime(100); @@ -2265,7 +2014,7 @@ void CMetricModelTest::testSummaryCountZeroRecordsAreIgnored() CAnomalyDetectorModel::TModelPtr modelWithZerosPtr(factory.makeModel(initDataWithZeros)); CPPUNIT_ASSERT(modelWithZerosPtr); CPPUNIT_ASSERT_EQUAL(model_t::E_MetricOnline, modelWithZerosPtr->category()); - CMetricModel &modelWithZeros = static_cast(*modelWithZerosPtr.get()); + CMetricModel& modelWithZeros = static_cast(*modelWithZerosPtr.get()); CModelFactory::SGathererInitializationData gathererNoZerosInitData(startTime); CModelFactory::TDataGathererPtr gathererNoZeros(factory.makeDataGatherer(gathererNoZerosInitData)); @@ -2273,7 +2022,7 @@ void CMetricModelTest::testSummaryCountZeroRecordsAreIgnored() CAnomalyDetectorModel::TModelPtr modelNoZerosPtr(factory.makeModel(initDataNoZeros)); CPPUNIT_ASSERT(modelNoZerosPtr); CPPUNIT_ASSERT_EQUAL(model_t::E_MetricOnline, modelNoZerosPtr->category()); - CMetricModel &modelNoZeros = static_cast(*modelNoZerosPtr.get()); + CMetricModel& modelNoZeros = static_cast(*modelNoZerosPtr.get()); // The idea here is to compare a model that has records with summary count of zero // against a model that has no records at all where the first model had the zero-count records. @@ -2286,24 +2035,37 @@ void CMetricModelTest::testSummaryCountZeroRecordsAreIgnored() TDoubleVec values; std::string summaryCountZero("0"); std::string summaryCountOne("1"); - while (now < end) - { - for (std::size_t i = 0; i < 10; ++i) - { + while (now < end) { + for (std::size_t i = 0; i < 10; ++i) { rng.generateNormalSamples(mean, variance, 1, values); double value = values[0]; rng.generateUniformSamples(0.0, 1.0, 1, values); - if (values[0] < 0.05) - { - addArrival(*gathererWithZeros, m_ResourceMonitor, now, "p1", value, - TOptionalStr("i1"), TOptionalStr(), TOptionalStr(summaryCountZero)); - } - else - { - addArrival(*gathererWithZeros, m_ResourceMonitor, now, "p1", value, - TOptionalStr("i1"), TOptionalStr(), TOptionalStr(summaryCountOne)); - addArrival(*gathererNoZeros, m_ResourceMonitor, now, "p1", value, - TOptionalStr("i1"), TOptionalStr(), TOptionalStr(summaryCountOne)); + if (values[0] < 0.05) { + addArrival(*gathererWithZeros, + m_ResourceMonitor, + now, + "p1", + value, + TOptionalStr("i1"), + TOptionalStr(), + TOptionalStr(summaryCountZero)); + } else { + addArrival(*gathererWithZeros, + m_ResourceMonitor, + now, + "p1", + value, + TOptionalStr("i1"), + TOptionalStr(), + TOptionalStr(summaryCountOne)); + addArrival(*gathererNoZeros, + m_ResourceMonitor, + now, + "p1", + value, + TOptionalStr("i1"), + TOptionalStr(), + TOptionalStr(summaryCountOne)); } } modelWithZeros.sample(now, now + bucketLength, m_ResourceMonitor); @@ -2314,8 +2076,7 @@ void CMetricModelTest::testSummaryCountZeroRecordsAreIgnored() CPPUNIT_ASSERT_EQUAL(modelWithZeros.checksum(), modelNoZeros.checksum()); } -void CMetricModelTest::testDecayRateControl() -{ +void CMetricModelTest::testDecayRateControl() { LOG_DEBUG("*** testDecayRateControl ***"); core_t::TTime startTime = 0; @@ -2353,10 +2114,8 @@ void CMetricModelTest::testDecayRateControl() TMeanAccumulator meanPredictionError; TMeanAccumulator meanReferencePredictionError; model_t::CResultType type(model_t::CResultType::E_Unconditional | model_t::CResultType::E_Interim); - for (core_t::TTime t = 0; t < 4 * core::constants::WEEK; t += bucketLength) - { - if (t % core::constants::WEEK == 0) - { + for (core_t::TTime t = 0; t < 4 * core::constants::WEEK; t += bucketLength) { + if (t % core::constants::WEEK == 0) { LOG_DEBUG("week " << t / core::constants::WEEK + 1); } @@ -2367,20 +2126,16 @@ void CMetricModelTest::testDecayRateControl() addArrival(*referenceGatherer, m_ResourceMonitor, t + bucketLength / 2, "p1", value[0]); model->sample(t, t + bucketLength, m_ResourceMonitor); referenceModel->sample(t, t + bucketLength, m_ResourceMonitor); - meanPredictionError.add(std::fabs( - model->currentBucketValue(feature, 0, 0, t + bucketLength / 2)[0] - - model->baselineBucketMean(feature, 0, 0, type, - NO_CORRELATES, t + bucketLength / 2)[0])); - meanReferencePredictionError.add(std::fabs( - referenceModel->currentBucketValue(feature, 0, 0, t + bucketLength / 2)[0] - - referenceModel->baselineBucketMean(feature, 0, 0, type, - NO_CORRELATES, t + bucketLength / 2)[0])); + meanPredictionError.add(std::fabs(model->currentBucketValue(feature, 0, 0, t + bucketLength / 2)[0] - + model->baselineBucketMean(feature, 0, 0, type, NO_CORRELATES, t + bucketLength / 2)[0])); + meanReferencePredictionError.add( + std::fabs(referenceModel->currentBucketValue(feature, 0, 0, t + bucketLength / 2)[0] - + referenceModel->baselineBucketMean(feature, 0, 0, type, NO_CORRELATES, t + bucketLength / 2)[0])); } LOG_DEBUG("mean = " << maths::CBasicStatistics::mean(meanPredictionError)); LOG_DEBUG("reference = " << maths::CBasicStatistics::mean(meanReferencePredictionError)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(meanReferencePredictionError), - maths::CBasicStatistics::mean(meanPredictionError), - 0.05); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + maths::CBasicStatistics::mean(meanReferencePredictionError), maths::CBasicStatistics::mean(meanPredictionError), 0.05); } LOG_DEBUG("*** Test step change ***"); @@ -2406,37 +2161,31 @@ void CMetricModelTest::testDecayRateControl() TMeanAccumulator meanPredictionError; TMeanAccumulator meanReferencePredictionError; model_t::CResultType type(model_t::CResultType::E_Unconditional | model_t::CResultType::E_Interim); - for (core_t::TTime t = 0; t < 10 * core::constants::WEEK; t += bucketLength) - { - if (t % core::constants::WEEK == 0) - { + for (core_t::TTime t = 0; t < 10 * core::constants::WEEK; t += bucketLength) { + if (t % core::constants::WEEK == 0) { LOG_DEBUG("week " << t / core::constants::WEEK + 1); } - double value = 10.0 * (1.0 + std::sin( boost::math::double_constants::two_pi - * static_cast(t) - / static_cast(core::constants::DAY))) - * (t < 5 * core::constants::WEEK ? 1.0 : 2.0); + double value = 10.0 * + (1.0 + std::sin(boost::math::double_constants::two_pi * static_cast(t) / + static_cast(core::constants::DAY))) * + (t < 5 * core::constants::WEEK ? 1.0 : 2.0); TDoubleVec noise; rng.generateUniformSamples(0.0, 3.0, 1, noise); addArrival(*gatherer, m_ResourceMonitor, t + bucketLength / 2, "p1", value + noise[0]); addArrival(*referenceGatherer, m_ResourceMonitor, t + bucketLength / 2, "p1", value + noise[0]); model->sample(t, t + bucketLength, m_ResourceMonitor); referenceModel->sample(t, t + bucketLength, m_ResourceMonitor); - meanPredictionError.add(std::fabs( - model->currentBucketValue(feature, 0, 0, t + bucketLength / 2)[0] - - model->baselineBucketMean(feature, 0, 0, type, - NO_CORRELATES, t + bucketLength / 2)[0])); - meanReferencePredictionError.add(std::fabs( - referenceModel->currentBucketValue(feature, 0, 0, t + bucketLength / 2)[0] - - referenceModel->baselineBucketMean(feature, 0, 0, type, - NO_CORRELATES, t + bucketLength / 2)[0])); + meanPredictionError.add(std::fabs(model->currentBucketValue(feature, 0, 0, t + bucketLength / 2)[0] - + model->baselineBucketMean(feature, 0, 0, type, NO_CORRELATES, t + bucketLength / 2)[0])); + meanReferencePredictionError.add( + std::fabs(referenceModel->currentBucketValue(feature, 0, 0, t + bucketLength / 2)[0] - + referenceModel->baselineBucketMean(feature, 0, 0, type, NO_CORRELATES, t + bucketLength / 2)[0])); } LOG_DEBUG("mean = " << maths::CBasicStatistics::mean(meanPredictionError)); LOG_DEBUG("reference = " << maths::CBasicStatistics::mean(meanReferencePredictionError)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(meanReferencePredictionError), - maths::CBasicStatistics::mean(meanPredictionError), - 0.05); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + maths::CBasicStatistics::mean(meanReferencePredictionError), maths::CBasicStatistics::mean(meanPredictionError), 0.05); } LOG_DEBUG("*** Test unmodelled cyclic component ***"); @@ -2464,43 +2213,36 @@ void CMetricModelTest::testDecayRateControl() TMeanAccumulator meanPredictionError; TMeanAccumulator meanReferencePredictionError; model_t::CResultType type(model_t::CResultType::E_Unconditional | model_t::CResultType::E_Interim); - for (core_t::TTime t = 0; t < 20 * core::constants::WEEK; t += bucketLength) - { - if (t % core::constants::WEEK == 0) - { + for (core_t::TTime t = 0; t < 20 * core::constants::WEEK; t += bucketLength) { + if (t % core::constants::WEEK == 0) { LOG_DEBUG("week " << t / core::constants::WEEK + 1); } - double value = 10.0 * (1.0 + std::sin( boost::math::double_constants::two_pi - * static_cast(t) - / static_cast(core::constants::DAY))) - * (1.0 + std::sin( boost::math::double_constants::two_pi - * static_cast(t) - / 10.0 / static_cast(core::constants::WEEK))); + double value = 10.0 * + (1.0 + std::sin(boost::math::double_constants::two_pi * static_cast(t) / + static_cast(core::constants::DAY))) * + (1.0 + std::sin(boost::math::double_constants::two_pi * static_cast(t) / 10.0 / + static_cast(core::constants::WEEK))); TDoubleVec noise; rng.generateUniformSamples(0.0, 3.0, 1, noise); addArrival(*gatherer, m_ResourceMonitor, t + bucketLength / 2, "p1", value + noise[0]); addArrival(*referenceGatherer, m_ResourceMonitor, t + bucketLength / 2, "p1", value + noise[0]); model->sample(t, t + bucketLength, m_ResourceMonitor); referenceModel->sample(t, t + bucketLength, m_ResourceMonitor); - meanPredictionError.add(std::fabs( - model->currentBucketValue(feature, 0, 0, t + bucketLength / 2)[0] - - model->baselineBucketMean(feature, 0, 0, type, - NO_CORRELATES, t + bucketLength / 2)[0])); - meanReferencePredictionError.add(std::fabs( - referenceModel->currentBucketValue(feature, 0, 0, t + bucketLength / 2)[0] - - referenceModel->baselineBucketMean(feature, 0, 0, type, - NO_CORRELATES, t + bucketLength / 2)[0])); + meanPredictionError.add(std::fabs(model->currentBucketValue(feature, 0, 0, t + bucketLength / 2)[0] - + model->baselineBucketMean(feature, 0, 0, type, NO_CORRELATES, t + bucketLength / 2)[0])); + meanReferencePredictionError.add( + std::fabs(referenceModel->currentBucketValue(feature, 0, 0, t + bucketLength / 2)[0] - + referenceModel->baselineBucketMean(feature, 0, 0, type, NO_CORRELATES, t + bucketLength / 2)[0])); } LOG_DEBUG("mean = " << maths::CBasicStatistics::mean(meanPredictionError)); LOG_DEBUG("reference = " << maths::CBasicStatistics::mean(meanReferencePredictionError)); - CPPUNIT_ASSERT( maths::CBasicStatistics::mean(meanPredictionError) - < 0.7 * maths::CBasicStatistics::mean(meanReferencePredictionError)); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanPredictionError) < + 0.7 * maths::CBasicStatistics::mean(meanReferencePredictionError)); } } -void CMetricModelTest::testProbabilityCalculationForLowMedian() -{ +void CMetricModelTest::testProbabilityCalculationForLowMedian() { LOG_DEBUG("*** testProbabilityCalculationForLowMedian ***"); core_t::TTime startTime(0); @@ -2522,52 +2264,45 @@ void CMetricModelTest::testProbabilityCalculationForLowMedian() CModelFactory::TDataGathererPtr gatherer; CAnomalyDetectorModel::TModelPtr model_; makeModel(factory, features, startTime, bucketLength, gatherer, model_); - CMetricModel &model = static_cast(*model_.get()); + CMetricModel& model = static_cast(*model_.get()); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p", gatherer, m_ResourceMonitor)); TOptionalDoubleVec probabilities; test::CRandomNumbers rng; core_t::TTime time = startTime; - for (std::size_t i = 0u; i < numberOfBuckets; ++i) - { + for (std::size_t i = 0u; i < numberOfBuckets; ++i) { double meanForBucket = mean; - if (i == lowMedianBucket) - { + if (i == lowMedianBucket) { meanForBucket = lowMean; } - if (i == highMedianBucket) - { + if (i == highMedianBucket) { meanForBucket = highMean; } TDoubleVec values; rng.generateNormalSamples(meanForBucket, variance, bucketCount, values); LOG_DEBUG("values = " << core::CContainerPrinter::print(values)); - for (std::size_t j = 0u; j < values.size(); ++j) - { + for (std::size_t j = 0u; j < values.size(); ++j) { addArrival(*gatherer, m_ResourceMonitor, time + static_cast(j), "p", values[j]); } model.sample(time, time + bucketLength, m_ResourceMonitor); CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); SAnnotatedProbability annotatedProbability; - CPPUNIT_ASSERT(model.computeProbability(0/*pid*/, time, time + bucketLength, - partitioningFields, 1, annotatedProbability)); + CPPUNIT_ASSERT(model.computeProbability(0 /*pid*/, time, time + bucketLength, partitioningFields, 1, annotatedProbability)); LOG_DEBUG("probability = " << annotatedProbability.s_Probability); probabilities.push_back(annotatedProbability.s_Probability); time += bucketLength; } - LOG_DEBUG("probabilities = " << core::CContainerPrinter::print(probabilities.begin(), - probabilities.end())); + LOG_DEBUG("probabilities = " << core::CContainerPrinter::print(probabilities.begin(), probabilities.end())); CPPUNIT_ASSERT(probabilities[lowMedianBucket] < 0.01); CPPUNIT_ASSERT(probabilities[highMedianBucket] > 0.1); } -void CMetricModelTest::testProbabilityCalculationForHighMedian() -{ +void CMetricModelTest::testProbabilityCalculationForHighMedian() { LOG_DEBUG("*** testProbabilityCalculationForHighMedian ***"); core_t::TTime startTime(0); @@ -2589,37 +2324,32 @@ void CMetricModelTest::testProbabilityCalculationForHighMedian() CModelFactory::TDataGathererPtr gatherer; CAnomalyDetectorModel::TModelPtr model_; makeModel(factory, features, startTime, bucketLength, gatherer, model_); - CMetricModel &model = static_cast(*model_.get()); + CMetricModel& model = static_cast(*model_.get()); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p", gatherer, m_ResourceMonitor)); TOptionalDoubleVec probabilities; test::CRandomNumbers rng; core_t::TTime time = startTime; - for (std::size_t i = 0u; i < numberOfBuckets; ++i) - { + for (std::size_t i = 0u; i < numberOfBuckets; ++i) { double meanForBucket = mean; - if (i == lowMedianBucket) - { + if (i == lowMedianBucket) { meanForBucket = lowMean; } - if (i == highMedianBucket) - { + if (i == highMedianBucket) { meanForBucket = highMean; } TDoubleVec values; rng.generateNormalSamples(meanForBucket, variance, bucketCount, values); LOG_DEBUG("values = " << core::CContainerPrinter::print(values)); - for (std::size_t j = 0u; j < values.size(); ++j) - { + for (std::size_t j = 0u; j < values.size(); ++j) { addArrival(*gatherer, m_ResourceMonitor, time + static_cast(j), "p", values[j]); } model.sample(time, time + bucketLength, m_ResourceMonitor); CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); SAnnotatedProbability annotatedProbability; - CPPUNIT_ASSERT(model.computeProbability(0/*pid*/, time, time + bucketLength, - partitioningFields, 1, annotatedProbability)); + CPPUNIT_ASSERT(model.computeProbability(0 /*pid*/, time, time + bucketLength, partitioningFields, 1, annotatedProbability)); LOG_DEBUG("probability = " << annotatedProbability.s_Probability); probabilities.push_back(annotatedProbability.s_Probability); @@ -2632,8 +2362,7 @@ void CMetricModelTest::testProbabilityCalculationForHighMedian() CPPUNIT_ASSERT(probabilities[highMedianBucket] < 0.01); } -void CMetricModelTest::testIgnoreSamplingGivenDetectionRules() -{ +void CMetricModelTest::testIgnoreSamplingGivenDetectionRules() { LOG_DEBUG("*** testIgnoreSamplingGivenDetectionRules ***"); // Create 2 models, one of which has a skip sampling rule. @@ -2642,7 +2371,6 @@ void CMetricModelTest::testIgnoreSamplingGivenDetectionRules() // At the end the checksums for the underlying models should // be the same. - // Create a rule to filter buckets where the actual value > 100 CRuleCondition condition; condition.type(CRuleCondition::E_NumericalActual); @@ -2656,14 +2384,13 @@ void CMetricModelTest::testIgnoreSamplingGivenDetectionRules() std::size_t startTime(300); SModelParams paramsNoRules(bucketLength); - // Model without the skip sampling rule CMetricModelFactory factory(paramsNoRules); model_t::TFeatureVec features{model_t::E_IndividualMeanByPerson}; CModelFactory::TDataGathererPtr gathererNoSkip; CAnomalyDetectorModel::TModelPtr modelPtrNoSkip; makeModel(factory, features, startTime, bucketLength, gathererNoSkip, modelPtrNoSkip); - CMetricModel *modelNoSkip = dynamic_cast(modelPtrNoSkip.get()); + CMetricModel* modelNoSkip = dynamic_cast(modelPtrNoSkip.get()); // Model with the skip sampling rule SModelParams paramsWithRules(bucketLength); @@ -2674,13 +2401,12 @@ void CMetricModelTest::testIgnoreSamplingGivenDetectionRules() CModelFactory::TDataGathererPtr gathererWithSkip; CAnomalyDetectorModel::TModelPtr modelPtrWithSkip; makeModel(factoryWithSkip, features, startTime, bucketLength, gathererWithSkip, modelPtrWithSkip); - CMetricModel *modelWithSkip = dynamic_cast(modelPtrWithSkip.get()); + CMetricModel* modelWithSkip = dynamic_cast(modelPtrWithSkip.get()); std::size_t endTime = startTime + bucketLength; // Add a bucket to both models - for (std::size_t i=0; i<60; i++) - { + for (std::size_t i = 0; i < 60; i++) { addArrival(*gathererNoSkip, m_ResourceMonitor, startTime + i, "p1", 1.0); addArrival(*gathererWithSkip, m_ResourceMonitor, startTime + i, "p1", 1.0); } @@ -2691,8 +2417,7 @@ void CMetricModelTest::testIgnoreSamplingGivenDetectionRules() CPPUNIT_ASSERT_EQUAL(modelWithSkip->checksum(), modelNoSkip->checksum()); // Add a bucket to both models - for (std::size_t i=0; i<60; i++) - { + for (std::size_t i = 0; i < 60; i++) { addArrival(*gathererNoSkip, m_ResourceMonitor, startTime + i, "p1", 1.0); addArrival(*gathererWithSkip, m_ResourceMonitor, startTime + i, "p1", 1.0); } @@ -2703,8 +2428,7 @@ void CMetricModelTest::testIgnoreSamplingGivenDetectionRules() CPPUNIT_ASSERT_EQUAL(modelWithSkip->checksum(), modelNoSkip->checksum()); // this sample will be skipped by the detection rule - for (std::size_t i=0; i<60; i++) - { + for (std::size_t i = 0; i < 60; i++) { addArrival(*gathererWithSkip, m_ResourceMonitor, startTime + i, "p1", 110.0); } modelWithSkip->sample(startTime, endTime, m_ResourceMonitor); @@ -2715,8 +2439,7 @@ void CMetricModelTest::testIgnoreSamplingGivenDetectionRules() // Wind the other model forward modelNoSkip->skipSampling(startTime); - for (std::size_t i=0; i<60; i++) - { + for (std::size_t i = 0; i < 60; i++) { addArrival(*gathererNoSkip, m_ResourceMonitor, startTime + i, "p1", 2.0); addArrival(*gathererWithSkip, m_ResourceMonitor, startTime + i, "p1", 2.0); } @@ -2735,7 +2458,6 @@ void CMetricModelTest::testIgnoreSamplingGivenDetectionRules() // uint64_t noSkipChecksum = modelNoSkipView->model(model_t::E_IndividualMeanByPerson, 0)->checksum(); // CPPUNIT_ASSERT_EQUAL(withSkipChecksum, noSkipChecksum); - // TODO These checks fail see elastic/machine-learning-cpp/issues/485 // Check the last value times of the underlying models are the same // const maths::CUnivariateTimeSeriesModel *timeSeriesModel = @@ -2750,86 +2472,54 @@ void CMetricModelTest::testIgnoreSamplingGivenDetectionRules() // CPPUNIT_ASSERT_EQUAL(time, timeSeriesModel->trend().lastValueTime()); } - -CppUnit::Test *CMetricModelTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CMetricModelTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricModelTest::testSample", - &CMetricModelTest::testSample) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricModelTest::testMultivariateSample", - &CMetricModelTest::testMultivariateSample) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricModelTest::testProbabilityCalculationForMetric", - &CMetricModelTest::testProbabilityCalculationForMetric) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricModelTest::testProbabilityCalculationForMedian", - &CMetricModelTest::testProbabilityCalculationForMedian) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricModelTest::testProbabilityCalculationForLowMean", - &CMetricModelTest::testProbabilityCalculationForLowMean) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricModelTest::testProbabilityCalculationForHighMean", - &CMetricModelTest::testProbabilityCalculationForHighMean) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricModelTest::testProbabilityCalculationForLowSum", - &CMetricModelTest::testProbabilityCalculationForLowSum) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricModelTest::testProbabilityCalculationForHighSum", - &CMetricModelTest::testProbabilityCalculationForHighSum) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricModelTest::testProbabilityCalculationForLatLong", - &CMetricModelTest::testProbabilityCalculationForLatLong) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricModelTest::testInfluence", - &CMetricModelTest::testInfluence) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricModelTest::testLatLongInfluence", - &CMetricModelTest::testLatLongInfluence) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricModelTest::testPrune", - &CMetricModelTest::testPrune) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricModelTest::testKey", - &CMetricModelTest::testKey) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricModelTest::testSkipSampling", - &CMetricModelTest::testSkipSampling) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricModelTest::testExplicitNulls", - &CMetricModelTest::testExplicitNulls) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricModelTest::testVarp", - &CMetricModelTest::testVarp) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricModelTest::testInterimCorrections", - &CMetricModelTest::testInterimCorrections) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricModelTest::testInterimCorrectionsWithCorrelations", - &CMetricModelTest::testInterimCorrectionsWithCorrelations) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricModelTest::testCorrelatePersist", - &CMetricModelTest::testCorrelatePersist) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricModelTest::testSummaryCountZeroRecordsAreIgnored", - &CMetricModelTest::testSummaryCountZeroRecordsAreIgnored) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricModelTest::testSummaryCountZeroRecordsAreIgnored", - &CMetricModelTest::testSummaryCountZeroRecordsAreIgnored) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricModelTest::testDecayRateControl", - &CMetricModelTest::testDecayRateControl) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricModelTest::testProbabilityCalculationForLowMedian", - &CMetricModelTest::testProbabilityCalculationForLowMedian) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricModelTest::testProbabilityCalculationForHighMedian", - &CMetricModelTest::testProbabilityCalculationForHighMedian) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricModelTest::testIgnoreSamplingGivenDetectionRules", - &CMetricModelTest::testIgnoreSamplingGivenDetectionRules) ); +CppUnit::Test* CMetricModelTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CMetricModelTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CMetricModelTest::testSample", &CMetricModelTest::testSample)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CMetricModelTest::testMultivariateSample", &CMetricModelTest::testMultivariateSample)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMetricModelTest::testProbabilityCalculationForMetric", + &CMetricModelTest::testProbabilityCalculationForMetric)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMetricModelTest::testProbabilityCalculationForMedian", + &CMetricModelTest::testProbabilityCalculationForMedian)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMetricModelTest::testProbabilityCalculationForLowMean", + &CMetricModelTest::testProbabilityCalculationForLowMean)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMetricModelTest::testProbabilityCalculationForHighMean", + &CMetricModelTest::testProbabilityCalculationForHighMean)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMetricModelTest::testProbabilityCalculationForLowSum", + &CMetricModelTest::testProbabilityCalculationForLowSum)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMetricModelTest::testProbabilityCalculationForHighSum", + &CMetricModelTest::testProbabilityCalculationForHighSum)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMetricModelTest::testProbabilityCalculationForLatLong", + &CMetricModelTest::testProbabilityCalculationForLatLong)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMetricModelTest::testInfluence", &CMetricModelTest::testInfluence)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CMetricModelTest::testLatLongInfluence", &CMetricModelTest::testLatLongInfluence)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMetricModelTest::testPrune", &CMetricModelTest::testPrune)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMetricModelTest::testKey", &CMetricModelTest::testKey)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CMetricModelTest::testSkipSampling", &CMetricModelTest::testSkipSampling)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CMetricModelTest::testExplicitNulls", &CMetricModelTest::testExplicitNulls)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMetricModelTest::testVarp", &CMetricModelTest::testVarp)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CMetricModelTest::testInterimCorrections", &CMetricModelTest::testInterimCorrections)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMetricModelTest::testInterimCorrectionsWithCorrelations", + &CMetricModelTest::testInterimCorrectionsWithCorrelations)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CMetricModelTest::testCorrelatePersist", &CMetricModelTest::testCorrelatePersist)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMetricModelTest::testSummaryCountZeroRecordsAreIgnored", + &CMetricModelTest::testSummaryCountZeroRecordsAreIgnored)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMetricModelTest::testSummaryCountZeroRecordsAreIgnored", + &CMetricModelTest::testSummaryCountZeroRecordsAreIgnored)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CMetricModelTest::testDecayRateControl", &CMetricModelTest::testDecayRateControl)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMetricModelTest::testProbabilityCalculationForLowMedian", + &CMetricModelTest::testProbabilityCalculationForLowMedian)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMetricModelTest::testProbabilityCalculationForHighMedian", + &CMetricModelTest::testProbabilityCalculationForHighMedian)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMetricModelTest::testIgnoreSamplingGivenDetectionRules", + &CMetricModelTest::testIgnoreSamplingGivenDetectionRules)); return suiteOfTests; } diff --git a/lib/model/unittest/CMetricModelTest.h b/lib/model/unittest/CMetricModelTest.h index f099d9235e..50e698bfce 100644 --- a/lib/model/unittest/CMetricModelTest.h +++ b/lib/model/unittest/CMetricModelTest.h @@ -11,38 +11,37 @@ #include -class CMetricModelTest : public CppUnit::TestFixture -{ - public: - void testSample(); - void testMultivariateSample(); - void testProbabilityCalculationForMetric(); - void testProbabilityCalculationForMedian(); - void testProbabilityCalculationForLowMedian(); - void testProbabilityCalculationForHighMedian(); - void testProbabilityCalculationForLowMean(); - void testProbabilityCalculationForHighMean(); - void testProbabilityCalculationForLowSum(); - void testProbabilityCalculationForHighSum(); - void testProbabilityCalculationForLatLong(); - void testInfluence(); - void testLatLongInfluence(); - void testPrune(); - void testSkipSampling(); - void testExplicitNulls(); - void testKey(); - void testVarp(); - void testInterimCorrections(); - void testInterimCorrectionsWithCorrelations(); - void testCorrelatePersist(); - void testSummaryCountZeroRecordsAreIgnored(); - void testDecayRateControl(); - void testIgnoreSamplingGivenDetectionRules(); +class CMetricModelTest : public CppUnit::TestFixture { +public: + void testSample(); + void testMultivariateSample(); + void testProbabilityCalculationForMetric(); + void testProbabilityCalculationForMedian(); + void testProbabilityCalculationForLowMedian(); + void testProbabilityCalculationForHighMedian(); + void testProbabilityCalculationForLowMean(); + void testProbabilityCalculationForHighMean(); + void testProbabilityCalculationForLowSum(); + void testProbabilityCalculationForHighSum(); + void testProbabilityCalculationForLatLong(); + void testInfluence(); + void testLatLongInfluence(); + void testPrune(); + void testSkipSampling(); + void testExplicitNulls(); + void testKey(); + void testVarp(); + void testInterimCorrections(); + void testInterimCorrectionsWithCorrelations(); + void testCorrelatePersist(); + void testSummaryCountZeroRecordsAreIgnored(); + void testDecayRateControl(); + void testIgnoreSamplingGivenDetectionRules(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); - private: - ml::model::CResourceMonitor m_ResourceMonitor; +private: + ml::model::CResourceMonitor m_ResourceMonitor; }; #endif // INCLUDED_CMetricModelTest_h diff --git a/lib/model/unittest/CMetricPopulationDataGathererTest.cc b/lib/model/unittest/CMetricPopulationDataGathererTest.cc index bb06f2870d..c43612de78 100644 --- a/lib/model/unittest/CMetricPopulationDataGathererTest.cc +++ b/lib/model/unittest/CMetricPopulationDataGathererTest.cc @@ -7,10 +7,10 @@ #include "CMetricPopulationDataGathererTest.h" #include -#include #include #include #include +#include #include #include @@ -31,8 +31,7 @@ using namespace ml; using namespace model; -namespace -{ +namespace { using TDoubleVec = std::vector; using TFeatureVec = std::vector; @@ -46,20 +45,13 @@ using TSizeSizePrFeatureDataPrVec = std::vector; using TFeatureSizeSizePrFeatureDataPrVecPr = std::pair; using TFeatureSizeSizePrFeatureDataPrVecPrVec = std::vector; -struct SMessage -{ - SMessage(const core_t::TTime &time, - const std::string &person, - const std::string &attribute, - const double &value, - const TStrVec &influences = TStrVec()) : - s_Time(time), - s_Person(person), - s_Attribute(attribute), - s_Value(value), - s_Influences(influences) - { - } +struct SMessage { + SMessage(const core_t::TTime& time, + const std::string& person, + const std::string& attribute, + const double& value, + const TStrVec& influences = TStrVec()) + : s_Time(time), s_Person(person), s_Attribute(attribute), s_Value(value), s_Influences(influences) {} core_t::TTime s_Time; std::string s_Person; @@ -69,21 +61,18 @@ struct SMessage }; using TMessageVec = std::vector; -TStrVec vec(const std::string &s1, const std::string &s2) -{ +TStrVec vec(const std::string& s1, const std::string& s2) { TStrVec result(1, s1); result.push_back(s2); return result; } -void generateTestMessages(const core_t::TTime &startTime, - TMessageVec &result) -{ +void generateTestMessages(const core_t::TTime& startTime, TMessageVec& result) { const std::size_t numberMessages = 100000; const std::size_t numberPeople = 40; const std::size_t numberCategories = 10; - const double locations[] = { 1.0, 2.0, 5.0, 15.0, 3.0, 0.5, 10.0, 17.0, 8.5, 1.5 }; - const double scales[] = { 1.0, 1.0, 3.0, 2.0, 0.5, 0.5, 2.0, 3.0, 4.0, 1.0 }; + const double locations[] = {1.0, 2.0, 5.0, 15.0, 3.0, 0.5, 10.0, 17.0, 8.5, 1.5}; + const double scales[] = {1.0, 1.0, 3.0, 2.0, 0.5, 0.5, 2.0, 3.0, 4.0, 1.0}; result.clear(); result.reserve(numberMessages); @@ -100,8 +89,7 @@ void generateTestMessages(const core_t::TTime &startTime, TDoubleVec categories; rng.generateUniformSamples(0.0, static_cast(numberCategories) - 0.01, numberMessages, categories); - for (std::size_t i = 0u; i < numberMessages; ++i) - { + for (std::size_t i = 0u; i < numberMessages; ++i) { core_t::TTime time = startTime + static_cast(times[i]); std::size_t person = static_cast(people[i]); std::size_t attribute = static_cast(categories[i]); @@ -114,19 +102,14 @@ void generateTestMessages(const core_t::TTime &startTime, } } -void addArrival(const SMessage &message, CDataGatherer &gatherer, CResourceMonitor &resourceMonitor) -{ +void addArrival(const SMessage& message, CDataGatherer& gatherer, CResourceMonitor& resourceMonitor) { CDataGatherer::TStrCPtrVec fields; fields.push_back(&message.s_Person); fields.push_back(&message.s_Attribute); - for (std::size_t i = 0u; i < message.s_Influences.size(); ++i) - { - if (message.s_Influences[i].empty()) - { - fields.push_back(static_cast(0)); - } - else - { + for (std::size_t i = 0u; i < message.s_Influences.size(); ++i) { + if (message.s_Influences[i].empty()) { + fields.push_back(static_cast(0)); + } else { fields.push_back(&message.s_Influences[i]); } } @@ -137,8 +120,7 @@ void addArrival(const SMessage &message, CDataGatherer &gatherer, CResourceMonit gatherer.addArrival(fields, result, resourceMonitor); } -bool isSpace(const char x) -{ +bool isSpace(const char x) { return x == ' ' || x == '\t'; } @@ -147,8 +129,7 @@ const std::string EMPTY_STRING; } // unnamed:: -void CMetricPopulationDataGathererTest::testMean() -{ +void CMetricPopulationDataGathererTest::testMean() { LOG_DEBUG("*** CMetricPopulationDataGathererTest::testMean ***"); // Test that we correctly sample the bucket means. @@ -170,46 +151,36 @@ void CMetricPopulationDataGathererTest::testMean() factory.features(features); CModelFactory::SGathererInitializationData initData(startTime); CModelFactory::TDataGathererPtr gathererPtr(factory.makeDataGatherer(initData)); - CDataGatherer &gatherer(*gathererPtr); + CDataGatherer& gatherer(*gathererPtr); CPPUNIT_ASSERT(gatherer.isPopulation()); TStrStrPrMeanAccumulatorMap accumulators; core_t::TTime bucketStart = startTime; - for (std::size_t i = 0u; i < messages.size(); ++i) - { - if (messages[i].s_Time >= bucketStart + bucketLength) - { - LOG_DEBUG("Processing bucket [" << bucketStart - << ", " << bucketStart+bucketLength << ")"); + for (std::size_t i = 0u; i < messages.size(); ++i) { + if (messages[i].s_Time >= bucketStart + bucketLength) { + LOG_DEBUG("Processing bucket [" << bucketStart << ", " << bucketStart + bucketLength << ")"); TFeatureSizeSizePrFeatureDataPrVecPrVec tmp; gatherer.featureData(bucketStart, bucketLength, tmp); CPPUNIT_ASSERT_EQUAL(features.size(), tmp.size()); CPPUNIT_ASSERT_EQUAL(features[0], tmp[0].first); - const TSizeSizePrFeatureDataPrVec &data = tmp[0].second; + const TSizeSizePrFeatureDataPrVec& data = tmp[0].second; CPPUNIT_ASSERT_EQUAL(accumulators.size(), data.size()); TStrStrPrDoubleMap means; - for (std::size_t j = 0u; j < data.size(); ++j) - { - if (data[j].second.s_BucketValue) - { - means[TStrStrPr(gatherer.personName(data[j].first.first), - gatherer.attributeName(data[j].first.second))] = - data[j].second.s_BucketValue->value()[0]; + for (std::size_t j = 0u; j < data.size(); ++j) { + if (data[j].second.s_BucketValue) { + means[TStrStrPr(gatherer.personName(data[j].first.first), gatherer.attributeName(data[j].first.second))] = + data[j].second.s_BucketValue->value()[0]; } } TStrStrPrDoubleMap expectedMeans; - for (TStrStrPrMeanAccumulatorMapCItr itr = accumulators.begin(); - itr != accumulators.end(); - ++itr) - { + for (TStrStrPrMeanAccumulatorMapCItr itr = accumulators.begin(); itr != accumulators.end(); ++itr) { expectedMeans[itr->first] = maths::CBasicStatistics::mean(itr->second); } - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedMeans), - core::CContainerPrinter::print(means)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedMeans), core::CContainerPrinter::print(means)); bucketStart += bucketLength; accumulators.clear(); @@ -220,8 +191,7 @@ void CMetricPopulationDataGathererTest::testMean() } } -void CMetricPopulationDataGathererTest::testMin() -{ +void CMetricPopulationDataGathererTest::testMin() { LOG_DEBUG("*** CMetricPopulationDataGathererTest::testMin ***"); // Test that we correctly sample the bucket minimums. @@ -243,45 +213,35 @@ void CMetricPopulationDataGathererTest::testMin() factory.features(features); CModelFactory::SGathererInitializationData initData(startTime); CModelFactory::TDataGathererPtr gathererPtr(factory.makeDataGatherer(initData)); - CDataGatherer &gatherer(*gathererPtr); + CDataGatherer& gatherer(*gathererPtr); TStrStrPrMinAccumulatorMap accumulators; core_t::TTime bucketStart = startTime; - for (std::size_t i = 0u; i < messages.size(); ++i) - { - if (messages[i].s_Time >= bucketStart + bucketLength) - { - LOG_DEBUG("Processing bucket [" << bucketStart - << ", " << bucketStart+bucketLength << ")"); + for (std::size_t i = 0u; i < messages.size(); ++i) { + if (messages[i].s_Time >= bucketStart + bucketLength) { + LOG_DEBUG("Processing bucket [" << bucketStart << ", " << bucketStart + bucketLength << ")"); TFeatureSizeSizePrFeatureDataPrVecPrVec tmp; gatherer.featureData(bucketStart, bucketLength, tmp); CPPUNIT_ASSERT_EQUAL(features.size(), tmp.size()); CPPUNIT_ASSERT_EQUAL(features[0], tmp[0].first); - const TSizeSizePrFeatureDataPrVec &data = tmp[0].second; + const TSizeSizePrFeatureDataPrVec& data = tmp[0].second; CPPUNIT_ASSERT_EQUAL(accumulators.size(), data.size()); TStrStrPrDoubleMap mins; - for (std::size_t j = 0u; j < data.size(); ++j) - { - if (data[j].second.s_BucketValue) - { - mins[TStrStrPr(gatherer.personName(data[j].first.first), - gatherer.attributeName(data[j].first.second))] = - data[j].second.s_BucketValue->value()[0]; + for (std::size_t j = 0u; j < data.size(); ++j) { + if (data[j].second.s_BucketValue) { + mins[TStrStrPr(gatherer.personName(data[j].first.first), gatherer.attributeName(data[j].first.second))] = + data[j].second.s_BucketValue->value()[0]; } } TStrStrPrDoubleMap expectedMins; - for (TStrStrPrMinAccumulatorMapCItr itr = accumulators.begin(); - itr != accumulators.end(); - ++itr) - { + for (TStrStrPrMinAccumulatorMapCItr itr = accumulators.begin(); itr != accumulators.end(); ++itr) { expectedMins[itr->first] = itr->second[0]; } - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedMins), - core::CContainerPrinter::print(mins)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedMins), core::CContainerPrinter::print(mins)); bucketStart += bucketLength; accumulators.clear(); @@ -292,8 +252,7 @@ void CMetricPopulationDataGathererTest::testMin() } } -void CMetricPopulationDataGathererTest::testMax() -{ +void CMetricPopulationDataGathererTest::testMax() { LOG_DEBUG("*** CMetricPopulationDataGathererTest::testMax ***"); // Test that we correctly sample the bucket maximums. @@ -315,45 +274,35 @@ void CMetricPopulationDataGathererTest::testMax() factory.features(features); CModelFactory::SGathererInitializationData initData(startTime); CModelFactory::TDataGathererPtr gathererPtr(factory.makeDataGatherer(initData)); - CDataGatherer &gatherer(*gathererPtr); + CDataGatherer& gatherer(*gathererPtr); TStrStrPrMaxAccumulatorMap accumulators; core_t::TTime bucketStart = startTime; - for (std::size_t i = 0u; i < messages.size(); ++i) - { - if (messages[i].s_Time >= bucketStart + bucketLength) - { - LOG_DEBUG("Processing bucket [" << bucketStart - << ", " << bucketStart+bucketLength << ")"); + for (std::size_t i = 0u; i < messages.size(); ++i) { + if (messages[i].s_Time >= bucketStart + bucketLength) { + LOG_DEBUG("Processing bucket [" << bucketStart << ", " << bucketStart + bucketLength << ")"); TFeatureSizeSizePrFeatureDataPrVecPrVec tmp; gatherer.featureData(bucketStart, bucketLength, tmp); CPPUNIT_ASSERT_EQUAL(features.size(), tmp.size()); CPPUNIT_ASSERT_EQUAL(features[0], tmp[0].first); - const TSizeSizePrFeatureDataPrVec &data = tmp[0].second; + const TSizeSizePrFeatureDataPrVec& data = tmp[0].second; CPPUNIT_ASSERT_EQUAL(accumulators.size(), data.size()); TStrStrPrDoubleMap maxs; - for (std::size_t j = 0u; j < data.size(); ++j) - { - if (data[j].second.s_BucketValue) - { - maxs[TStrStrPr(gatherer.personName(data[j].first.first), - gatherer.attributeName(data[j].first.second))] = - data[j].second.s_BucketValue->value()[0]; + for (std::size_t j = 0u; j < data.size(); ++j) { + if (data[j].second.s_BucketValue) { + maxs[TStrStrPr(gatherer.personName(data[j].first.first), gatherer.attributeName(data[j].first.second))] = + data[j].second.s_BucketValue->value()[0]; } } TStrStrPrDoubleMap expectedMaxs; - for (TStrStrPrMaxAccumulatorMapCItr itr = accumulators.begin(); - itr != accumulators.end(); - ++itr) - { + for (TStrStrPrMaxAccumulatorMapCItr itr = accumulators.begin(); itr != accumulators.end(); ++itr) { expectedMaxs[itr->first] = itr->second[0]; } - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedMaxs), - core::CContainerPrinter::print(maxs)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedMaxs), core::CContainerPrinter::print(maxs)); bucketStart += bucketLength; accumulators.clear(); @@ -364,8 +313,7 @@ void CMetricPopulationDataGathererTest::testMax() } } -void CMetricPopulationDataGathererTest::testSum() -{ +void CMetricPopulationDataGathererTest::testSum() { LOG_DEBUG("*** CMetricPopulationDataGathererTest::testSum ***"); // Test that we correctly sample the bucket sums. @@ -383,37 +331,30 @@ void CMetricPopulationDataGathererTest::testSum() factory.features(features); CModelFactory::SGathererInitializationData initData(startTime); CModelFactory::TDataGathererPtr gathererPtr(factory.makeDataGatherer(initData)); - CDataGatherer &gatherer(*gathererPtr); + CDataGatherer& gatherer(*gathererPtr); TStrStrPrDoubleMap expectedSums; core_t::TTime bucketStart = startTime; - for (std::size_t i = 0u; i < messages.size(); ++i) - { - if (messages[i].s_Time >= bucketStart + bucketLength) - { - LOG_DEBUG("Processing bucket [" << bucketStart - << ", " << bucketStart+bucketLength << ")"); + for (std::size_t i = 0u; i < messages.size(); ++i) { + if (messages[i].s_Time >= bucketStart + bucketLength) { + LOG_DEBUG("Processing bucket [" << bucketStart << ", " << bucketStart + bucketLength << ")"); TFeatureSizeSizePrFeatureDataPrVecPrVec tmp; gatherer.featureData(bucketStart, bucketLength, tmp); CPPUNIT_ASSERT_EQUAL(features.size(), tmp.size()); CPPUNIT_ASSERT_EQUAL(features[0], tmp[0].first); - const TSizeSizePrFeatureDataPrVec &data = tmp[0].second; + const TSizeSizePrFeatureDataPrVec& data = tmp[0].second; CPPUNIT_ASSERT_EQUAL(expectedSums.size(), data.size()); TStrStrPrDoubleMap sums; - for (std::size_t j = 0u; j < data.size(); ++j) - { - if (data[j].second.s_BucketValue) - { - sums[TStrStrPr(gatherer.personName(data[j].first.first), - gatherer.attributeName(data[j].first.second))] = - data[j].second.s_BucketValue->value()[0]; + for (std::size_t j = 0u; j < data.size(); ++j) { + if (data[j].second.s_BucketValue) { + sums[TStrStrPr(gatherer.personName(data[j].first.first), gatherer.attributeName(data[j].first.second))] = + data[j].second.s_BucketValue->value()[0]; } } - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedSums), - core::CContainerPrinter::print(sums)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedSums), core::CContainerPrinter::print(sums)); bucketStart += bucketLength; expectedSums.clear(); @@ -424,9 +365,7 @@ void CMetricPopulationDataGathererTest::testSum() } } - -void CMetricPopulationDataGathererTest::testSampleCount() -{ +void CMetricPopulationDataGathererTest::testSampleCount() { LOG_DEBUG("*** CMetricPopulationDataGathererTest::testSampleCount ***"); // Test that we set sensible sample counts for each attribute. @@ -437,34 +376,22 @@ void CMetricPopulationDataGathererTest::testSampleCount() const std::string attribute("c1"); const std::string person("p1"); const std::size_t numberBuckets = 40; - const std::size_t personMessageCount[numberBuckets] = - { - 11, 11, 11, 11, 110, 110, 110, 110, 110, 110, - 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, - 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, - 110, 97, 97, 97, 97, 97, 97, 97, 97, 97 - }; - const double expectedSampleCounts[] = - { - 0.0, 0.0, 0.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, - 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, - 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, - 11.3597, 11.7164, 12.0701, 12.421, 12.7689, 13.114, 13.4562, 13.7957, 14.1325, 14.4665 - }; + const std::size_t personMessageCount[numberBuckets] = {11, 11, 11, 11, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, + 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, + 110, 110, 110, 97, 97, 97, 97, 97, 97, 97, 97, 97}; + const double expectedSampleCounts[] = {0.0, 0.0, 0.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, + 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, + 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, + 11.3597, 11.7164, 12.0701, 12.421, 12.7689, 13.114, 13.4562, 13.7957, 14.1325, 14.4665}; const double tolerance = 5e-4; TMessageVec messages; - for (std::size_t bucket = 0u; bucket < numberBuckets; ++bucket) - { - core_t::TTime bucketStart = - startTime + static_cast(bucket) * bucketLength; + for (std::size_t bucket = 0u; bucket < numberBuckets; ++bucket) { + core_t::TTime bucketStart = startTime + static_cast(bucket) * bucketLength; std::size_t n = personMessageCount[bucket]; - for (std::size_t i = 0u; i < n; ++i) - { - core_t::TTime time = bucketStart + bucketLength - * static_cast(i) - / static_cast(n); + for (std::size_t i = 0u; i < n; ++i) { + core_t::TTime time = bucketStart + bucketLength * static_cast(i) / static_cast(n); messages.push_back(SMessage(time, person, attribute, 1.0)); } } @@ -475,37 +402,28 @@ void CMetricPopulationDataGathererTest::testSampleCount() factory.features(features); CModelFactory::SGathererInitializationData initData(startTime); CModelFactory::TDataGathererPtr gathererPtr(factory.makeDataGatherer(initData)); - CDataGatherer &gatherer(*gathererPtr); + CDataGatherer& gatherer(*gathererPtr); std::size_t bucket = 0u; - for (std::size_t i = 0u; i < messages.size(); ++i) - { - core_t::TTime bucketStart = - startTime + static_cast(bucket) * bucketLength; + for (std::size_t i = 0u; i < messages.size(); ++i) { + core_t::TTime bucketStart = startTime + static_cast(bucket) * bucketLength; - if (messages[i].s_Time >= bucketStart + bucketLength) - { + if (messages[i].s_Time >= bucketStart + bucketLength) { gatherer.sampleNow(bucketStart); LOG_DEBUG(gatherer.effectiveSampleCount(0)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedSampleCounts[bucket], - gatherer.effectiveSampleCount(0), - tolerance); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedSampleCounts[bucket], gatherer.effectiveSampleCount(0), tolerance); ++bucket; } addArrival(messages[i], gatherer, m_ResourceMonitor); } - core_t::TTime bucketStart = - startTime + static_cast(bucket) * bucketLength; + core_t::TTime bucketStart = startTime + static_cast(bucket) * bucketLength; gatherer.sampleNow(bucketStart); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedSampleCounts[bucket], - gatherer.effectiveSampleCount(0), - tolerance); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedSampleCounts[bucket], gatherer.effectiveSampleCount(0), tolerance); } -void CMetricPopulationDataGathererTest::testFeatureData() -{ +void CMetricPopulationDataGathererTest::testFeatureData() { LOG_DEBUG("*** CMetricPopulationDataGathererTest::testFeatureData ***"); // Test we correctly sample the mean, minimum and maximum statistics. @@ -537,7 +455,7 @@ void CMetricPopulationDataGathererTest::testFeatureData() factory.features(features); CModelFactory::SGathererInitializationData initData(startTime); CModelFactory::TDataGathererPtr gathererPtr(factory.makeDataGatherer(initData)); - CDataGatherer &gatherer(*gathererPtr); + CDataGatherer& gatherer(*gathererPtr); TStrStrPrMeanAccumulatorMap bucketMeanAccumulators; TStrStrPrMeanAccumulatorMap sampleMeanAccumulators; @@ -549,12 +467,9 @@ void CMetricPopulationDataGathererTest::testFeatureData() TStrStrPrMaxAccumulatorMap sampleMaxAccumulators; TStrStrPrDoubleVecMap expectedMaxSamples; core_t::TTime bucketStart = startTime; - for (std::size_t i = 0u; i < messages.size(); ++i) - { - if (messages[i].s_Time >= bucketStart + bucketLength) - { - LOG_DEBUG("Processing bucket [" << bucketStart - << ", " << bucketStart+bucketLength << ")"); + for (std::size_t i = 0u; i < messages.size(); ++i) { + if (messages[i].s_Time >= bucketStart + bucketLength) { + LOG_DEBUG("Processing bucket [" << bucketStart << ", " << bucketStart + bucketLength << ")"); gatherer.sampleNow(bucketStart); @@ -565,20 +480,14 @@ void CMetricPopulationDataGathererTest::testFeatureData() CPPUNIT_ASSERT_EQUAL(model_t::E_PopulationMeanByPersonAndAttribute, tmp[0].first); TStrStrPrDoubleMap means; TStrStrPrDoubleVecMap meanSamples; - for (std::size_t j = 0u; j < tmp[0].second.size(); ++j) - { - const TSizeSizePrFeatureDataPr &data = tmp[0].second[j]; - TStrStrPr key(gatherer.personName(data.first.first), - gatherer.attributeName(data.first.second)); - if (data.second.s_BucketValue) - { + for (std::size_t j = 0u; j < tmp[0].second.size(); ++j) { + const TSizeSizePrFeatureDataPr& data = tmp[0].second[j]; + TStrStrPr key(gatherer.personName(data.first.first), gatherer.attributeName(data.first.second)); + if (data.second.s_BucketValue) { means[key] = data.second.s_BucketValue->value()[0]; } - TDoubleVec &samples = meanSamples[key]; - for (std::size_t k = 0u; - k < boost::unwrap_ref(data.second.s_Samples).size(); - ++k) - { + TDoubleVec& samples = meanSamples[key]; + for (std::size_t k = 0u; k < boost::unwrap_ref(data.second.s_Samples).size(); ++k) { samples.push_back(boost::unwrap_ref(data.second.s_Samples)[k].value()[0]); } } @@ -586,20 +495,14 @@ void CMetricPopulationDataGathererTest::testFeatureData() CPPUNIT_ASSERT_EQUAL(model_t::E_PopulationMinByPersonAndAttribute, tmp[1].first); TStrStrPrDoubleMap mins; TStrStrPrDoubleVecMap minSamples; - for (std::size_t j = 0u; j < tmp[1].second.size(); ++j) - { - const TSizeSizePrFeatureDataPr &data = tmp[1].second[j]; - TStrStrPr key(gatherer.personName(data.first.first), - gatherer.attributeName(data.first.second)); - if (data.second.s_BucketValue) - { + for (std::size_t j = 0u; j < tmp[1].second.size(); ++j) { + const TSizeSizePrFeatureDataPr& data = tmp[1].second[j]; + TStrStrPr key(gatherer.personName(data.first.first), gatherer.attributeName(data.first.second)); + if (data.second.s_BucketValue) { mins[key] = data.second.s_BucketValue->value()[0]; } - TDoubleVec &samples = minSamples[key]; - for (std::size_t k = 0u; - k < boost::unwrap_ref(data.second.s_Samples).size(); - ++k) - { + TDoubleVec& samples = minSamples[key]; + for (std::size_t k = 0u; k < boost::unwrap_ref(data.second.s_Samples).size(); ++k) { samples.push_back(boost::unwrap_ref(data.second.s_Samples)[k].value()[0]); } } @@ -607,61 +510,40 @@ void CMetricPopulationDataGathererTest::testFeatureData() CPPUNIT_ASSERT_EQUAL(model_t::E_PopulationMaxByPersonAndAttribute, tmp[2].first); TStrStrPrDoubleMap maxs; TStrStrPrDoubleVecMap maxSamples; - for (std::size_t j = 0u; j < tmp[2].second.size(); ++j) - { - const TSizeSizePrFeatureDataPr &data = tmp[2].second[j]; - TStrStrPr key(gatherer.personName(data.first.first), - gatherer.attributeName(data.first.second)); - if (data.second.s_BucketValue) - { + for (std::size_t j = 0u; j < tmp[2].second.size(); ++j) { + const TSizeSizePrFeatureDataPr& data = tmp[2].second[j]; + TStrStrPr key(gatherer.personName(data.first.first), gatherer.attributeName(data.first.second)); + if (data.second.s_BucketValue) { maxs[key] = data.second.s_BucketValue->value()[0]; } - TDoubleVec &samples = maxSamples[key]; - for (std::size_t k = 0u; - k < boost::unwrap_ref(data.second.s_Samples).size(); - ++k) - { + TDoubleVec& samples = maxSamples[key]; + for (std::size_t k = 0u; k < boost::unwrap_ref(data.second.s_Samples).size(); ++k) { samples.push_back(boost::unwrap_ref(data.second.s_Samples)[k].value()[0]); } } TStrStrPrDoubleMap expectedMeans; - for (TStrStrPrMeanAccumulatorMapCItr itr = bucketMeanAccumulators.begin(); - itr != bucketMeanAccumulators.end(); - ++itr) - { + for (TStrStrPrMeanAccumulatorMapCItr itr = bucketMeanAccumulators.begin(); itr != bucketMeanAccumulators.end(); ++itr) { expectedMeans[itr->first] = maths::CBasicStatistics::mean(itr->second); } TStrStrPrDoubleMap expectedMins; - for (TStrStrPrMinAccumulatorMapCItr itr = bucketMinAccumulators.begin(); - itr != bucketMinAccumulators.end(); - ++itr) - { + for (TStrStrPrMinAccumulatorMapCItr itr = bucketMinAccumulators.begin(); itr != bucketMinAccumulators.end(); ++itr) { expectedMins[itr->first] = itr->second[0]; } TStrStrPrDoubleMap expectedMaxs; - for (TStrStrPrMaxAccumulatorMapCItr itr = bucketMaxAccumulators.begin(); - itr != bucketMaxAccumulators.end(); - ++itr) - { + for (TStrStrPrMaxAccumulatorMapCItr itr = bucketMaxAccumulators.begin(); itr != bucketMaxAccumulators.end(); ++itr) { expectedMaxs[itr->first] = itr->second[0]; } - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedMeans), - core::CContainerPrinter::print(means)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedMins), - core::CContainerPrinter::print(mins)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedMaxs), - core::CContainerPrinter::print(maxs)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedMeans), core::CContainerPrinter::print(means)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedMins), core::CContainerPrinter::print(mins)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedMaxs), core::CContainerPrinter::print(maxs)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedMeanSamples), - core::CContainerPrinter::print(meanSamples)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedMinSamples), - core::CContainerPrinter::print(minSamples)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedMaxSamples), - core::CContainerPrinter::print(maxSamples)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedMeanSamples), core::CContainerPrinter::print(meanSamples)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedMinSamples), core::CContainerPrinter::print(minSamples)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedMaxSamples), core::CContainerPrinter::print(maxSamples)); bucketStart += bucketLength; bucketMeanAccumulators.clear(); @@ -686,16 +568,12 @@ void CMetricPopulationDataGathererTest::testFeatureData() CPPUNIT_ASSERT(gatherer.attributeId(messages[i].s_Attribute, cid)); double sampleCount = gatherer.effectiveSampleCount(cid); - if (sampleCount > 0.0) - { + if (sampleCount > 0.0) { sampleMeanAccumulators[key].add(messages[i].s_Value); sampleMinAccumulators[key].add(messages[i].s_Value); sampleMaxAccumulators[key].add(messages[i].s_Value); - if (maths::CBasicStatistics::count(sampleMeanAccumulators[key]) - == std::floor(sampleCount + 0.5)) - { - expectedMeanSamples[key].push_back( - maths::CBasicStatistics::mean(sampleMeanAccumulators[key])); + if (maths::CBasicStatistics::count(sampleMeanAccumulators[key]) == std::floor(sampleCount + 0.5)) { + expectedMeanSamples[key].push_back(maths::CBasicStatistics::mean(sampleMeanAccumulators[key])); expectedMinSamples[key].push_back(sampleMinAccumulators[key][0]); expectedMaxSamples[key].push_back(sampleMaxAccumulators[key][0]); sampleMeanAccumulators[key] = TMeanAccumulator(); @@ -706,8 +584,7 @@ void CMetricPopulationDataGathererTest::testFeatureData() } } -void CMetricPopulationDataGathererTest::testRemovePeople() -{ +void CMetricPopulationDataGathererTest::testRemovePeople() { LOG_DEBUG("*** CMetricPopulationDataGathererTest::testRemovePeople ***"); // Check that all the state is correctly updated when some @@ -730,20 +607,29 @@ void CMetricPopulationDataGathererTest::testRemovePeople() features.push_back(model_t::E_PopulationMinByPersonAndAttribute); features.push_back(model_t::E_PopulationMaxByPersonAndAttribute); features.push_back(model_t::E_PopulationSumByBucketPersonAndAttribute); - CDataGatherer gatherer(model_t::E_PopulationMetric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, searchKey, features, startTime, 0); + CDataGatherer gatherer(model_t::E_PopulationMetric, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + searchKey, + features, + startTime, + 0); TMessageVec messages; generateTestMessages(startTime, messages); core_t::TTime bucketStart = startTime; - for (std::size_t i = 0u; i < messages.size(); ++i) - { - if (messages[i].s_Time >= bucketStart + bucketLength) - { - LOG_DEBUG("Processing bucket [" << bucketStart - << ", " << bucketStart + bucketLength << ")"); + for (std::size_t i = 0u; i < messages.size(); ++i) { + if (messages[i].s_Time >= bucketStart + bucketLength) { + LOG_DEBUG("Processing bucket [" << bucketStart << ", " << bucketStart + bucketLength << ")"); gatherer.sampleNow(bucketStart); bucketStart += bucketLength; } @@ -763,15 +649,11 @@ void CMetricPopulationDataGathererTest::testRemovePeople() CPPUNIT_ASSERT_EQUAL(numberPeople, gatherer.numberOverFieldValues()); TStrVec expectedPersonNames; TSizeVec expectedPersonIds; - for (std::size_t i = 0u; i < numberPeople; ++i) - { - if (!std::binary_search(peopleToRemove.begin(), peopleToRemove.end(), i)) - { + for (std::size_t i = 0u; i < numberPeople; ++i) { + if (!std::binary_search(peopleToRemove.begin(), peopleToRemove.end(), i)) { expectedPersonNames.push_back(gatherer.personName(i)); expectedPersonIds.push_back(i); - } - else - { + } else { LOG_DEBUG("Removing " << gatherer.personName(i)); } } @@ -780,37 +662,26 @@ void CMetricPopulationDataGathererTest::testRemovePeople() { TSizeUInt64PrVec nonZeroCounts; gatherer.personNonZeroCounts(bucketStart, nonZeroCounts); - for (std::size_t i = 0u; i < nonZeroCounts.size(); ++i) - { - if (!std::binary_search(peopleToRemove.begin(), - peopleToRemove.end(), - nonZeroCounts[i].first)) - { - const std::string &name = gatherer.personName(nonZeroCounts[i].first); + for (std::size_t i = 0u; i < nonZeroCounts.size(); ++i) { + if (!std::binary_search(peopleToRemove.begin(), peopleToRemove.end(), nonZeroCounts[i].first)) { + const std::string& name = gatherer.personName(nonZeroCounts[i].first); expectedNonZeroCounts[name] = nonZeroCounts[i].second; } } } - LOG_DEBUG("expectedNonZeroCounts = " - << core::CContainerPrinter::print(expectedNonZeroCounts)); + LOG_DEBUG("expectedNonZeroCounts = " << core::CContainerPrinter::print(expectedNonZeroCounts)); LOG_DEBUG("Expected"); TStrFeatureDataPrVec expectedFeatureData; { TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; gatherer.featureData(bucketStart, bucketLength, featureData); - for (std::size_t i = 0u; i < featureData.size(); ++i) - { - const TSizeSizePrFeatureDataPrVec &data = featureData[i].second; - for (std::size_t j = 0u; j < data.size(); ++j) - { - if (!std::binary_search(peopleToRemove.begin(), - peopleToRemove.end(), - data[j].first.first)) - { - std::string key = model_t::print(featureData[i].first) - + " " + gatherer.personName(data[j].first.first) - + " " + gatherer.attributeName(data[j].first.second); + for (std::size_t i = 0u; i < featureData.size(); ++i) { + const TSizeSizePrFeatureDataPrVec& data = featureData[i].second; + for (std::size_t j = 0u; j < data.size(); ++j) { + if (!std::binary_search(peopleToRemove.begin(), peopleToRemove.end(), data[j].first.first)) { + std::string key = model_t::print(featureData[i].first) + " " + gatherer.personName(data[j].first.first) + " " + + gatherer.attributeName(data[j].first.second); expectedFeatureData.push_back(TStrFeatureDataPr(key, data[j].second)); LOG_DEBUG(" " << key); LOG_DEBUG(" " << data[j].second.print()); @@ -821,10 +692,8 @@ void CMetricPopulationDataGathererTest::testRemovePeople() gatherer.recyclePeople(peopleToRemove); - CPPUNIT_ASSERT_EQUAL(numberPeople - peopleToRemove.size(), - gatherer.numberActivePeople()); - for (std::size_t i = 0u; i < expectedPersonNames.size(); ++i) - { + CPPUNIT_ASSERT_EQUAL(numberPeople - peopleToRemove.size(), gatherer.numberActivePeople()); + for (std::size_t i = 0u; i < expectedPersonNames.size(); ++i) { std::size_t pid; CPPUNIT_ASSERT(gatherer.personId(expectedPersonNames[i], pid)); CPPUNIT_ASSERT_EQUAL(expectedPersonIds[i], pid); @@ -833,30 +702,24 @@ void CMetricPopulationDataGathererTest::testRemovePeople() TStrSizeMap actualNonZeroCounts; TSizeUInt64PrVec nonZeroCounts; gatherer.personNonZeroCounts(bucketStart, nonZeroCounts); - for (std::size_t i = 0u; i < nonZeroCounts.size(); ++i) - { - const std::string &name = gatherer.personName(nonZeroCounts[i].first); + for (std::size_t i = 0u; i < nonZeroCounts.size(); ++i) { + const std::string& name = gatherer.personName(nonZeroCounts[i].first); actualNonZeroCounts[name] = nonZeroCounts[i].second; } - LOG_DEBUG("actualNonZeroCounts = " - << core::CContainerPrinter::print(actualNonZeroCounts)); + LOG_DEBUG("actualNonZeroCounts = " << core::CContainerPrinter::print(actualNonZeroCounts)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedNonZeroCounts), - core::CContainerPrinter::print(actualNonZeroCounts)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedNonZeroCounts), core::CContainerPrinter::print(actualNonZeroCounts)); LOG_DEBUG("Actual"); TStrFeatureDataPrVec actualFeatureData; { TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; gatherer.featureData(bucketStart, bucketLength, featureData); - for (std::size_t i = 0u; i < featureData.size(); ++i) - { - const TSizeSizePrFeatureDataPrVec &data = featureData[i].second; - for (std::size_t j = 0u; j < data.size(); ++j) - { - std::string key = model_t::print(featureData[i].first) - + " " + gatherer.personName(data[j].first.first) - + " " + gatherer.attributeName(data[j].first.second); + for (std::size_t i = 0u; i < featureData.size(); ++i) { + const TSizeSizePrFeatureDataPrVec& data = featureData[i].second; + for (std::size_t j = 0u; j < data.size(); ++j) { + std::string key = model_t::print(featureData[i].first) + " " + gatherer.personName(data[j].first.first) + " " + + gatherer.attributeName(data[j].first.second); actualFeatureData.push_back(TStrFeatureDataPr(key, data[j].second)); LOG_DEBUG(" " << key); LOG_DEBUG(" " << data[j].second.print()); @@ -864,12 +727,10 @@ void CMetricPopulationDataGathererTest::testRemovePeople() } } - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedFeatureData), - core::CContainerPrinter::print(actualFeatureData)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedFeatureData), core::CContainerPrinter::print(actualFeatureData)); } -void CMetricPopulationDataGathererTest::testRemoveAttributes() -{ +void CMetricPopulationDataGathererTest::testRemoveAttributes() { LOG_DEBUG("*** CMetricPopulationDataGathererTest::testRemoveAttributes ***"); // Check that all the state is correctly updated when some @@ -889,20 +750,29 @@ void CMetricPopulationDataGathererTest::testRemoveAttributes() features.push_back(model_t::E_PopulationMinByPersonAndAttribute); features.push_back(model_t::E_PopulationMaxByPersonAndAttribute); features.push_back(model_t::E_PopulationSumByBucketPersonAndAttribute); - CDataGatherer gatherer(model_t::E_PopulationMetric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, searchKey, features, startTime, 0); + CDataGatherer gatherer(model_t::E_PopulationMetric, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + searchKey, + features, + startTime, + 0); TMessageVec messages; generateTestMessages(startTime, messages); core_t::TTime bucketStart = startTime; - for (std::size_t i = 0u; i < messages.size(); ++i) - { - if (messages[i].s_Time >= bucketStart + bucketLength) - { - LOG_DEBUG("Processing bucket [" << bucketStart - << ", " << bucketStart + bucketLength << ")"); + for (std::size_t i = 0u; i < messages.size(); ++i) { + if (messages[i].s_Time >= bucketStart + bucketLength) { + LOG_DEBUG("Processing bucket [" << bucketStart << ", " << bucketStart + bucketLength << ")"); gatherer.sampleNow(bucketStart); bucketStart += bucketLength; } @@ -921,17 +791,12 @@ void CMetricPopulationDataGathererTest::testRemoveAttributes() TStrVec expectedAttributeNames; TSizeVec expectedAttributeIds; TDoubleVec expectedSampleCounts; - for (std::size_t i = 0u; i < numberAttributes; ++i) - { - if (!std::binary_search(attributesToRemove.begin(), - attributesToRemove.end(), i)) - { + for (std::size_t i = 0u; i < numberAttributes; ++i) { + if (!std::binary_search(attributesToRemove.begin(), attributesToRemove.end(), i)) { expectedAttributeNames.push_back(gatherer.attributeName(i)); expectedAttributeIds.push_back(i); expectedSampleCounts.push_back(gatherer.effectiveSampleCount(i)); - } - else - { + } else { LOG_DEBUG("Removing " << gatherer.attributeName(i)); } } @@ -942,18 +807,12 @@ void CMetricPopulationDataGathererTest::testRemoveAttributes() TStrFeatureDataPrVec expected; TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; gatherer.featureData(bucketStart, bucketLength, featureData); - for (std::size_t i = 0u; i < featureData.size(); ++i) - { - const TSizeSizePrFeatureDataPrVec &data = featureData[i].second; - for (std::size_t j = 0u; j < data.size(); ++j) - { - if (!std::binary_search(attributesToRemove.begin(), - attributesToRemove.end(), - data[j].first.second)) - { - std::string key = model_t::print(featureData[i].first) - + " " + gatherer.personName(data[j].first.first) - + " " + gatherer.attributeName(data[j].first.second); + for (std::size_t i = 0u; i < featureData.size(); ++i) { + const TSizeSizePrFeatureDataPrVec& data = featureData[i].second; + for (std::size_t j = 0u; j < data.size(); ++j) { + if (!std::binary_search(attributesToRemove.begin(), attributesToRemove.end(), data[j].first.second)) { + std::string key = model_t::print(featureData[i].first) + " " + gatherer.personName(data[j].first.first) + " " + + gatherer.attributeName(data[j].first.second); expected.push_back(TStrFeatureDataPr(key, data[j].second)); LOG_DEBUG(" " << key); LOG_DEBUG(" " << data[j].second.print()); @@ -965,10 +824,8 @@ void CMetricPopulationDataGathererTest::testRemoveAttributes() gatherer.recycleAttributes(attributesToRemove); - CPPUNIT_ASSERT_EQUAL(numberAttributes - attributesToRemove.size(), - gatherer.numberActiveAttributes()); - for (std::size_t i = 0u; i < expectedAttributeNames.size(); ++i) - { + CPPUNIT_ASSERT_EQUAL(numberAttributes - attributesToRemove.size(), gatherer.numberActiveAttributes()); + for (std::size_t i = 0u; i < expectedAttributeNames.size(); ++i) { std::size_t cid; CPPUNIT_ASSERT(gatherer.attributeId(expectedAttributeNames[i], cid)); CPPUNIT_ASSERT_EQUAL(expectedAttributeIds[i], cid); @@ -976,12 +833,10 @@ void CMetricPopulationDataGathererTest::testRemoveAttributes() numberAttributes = gatherer.numberActiveAttributes(); TDoubleVec actualSampleCounts; - for (std::size_t i = 0u; i < numberAttributes; ++i) - { + for (std::size_t i = 0u; i < numberAttributes; ++i) { actualSampleCounts.push_back(gatherer.effectiveSampleCount(expectedAttributeIds[i])); } - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedSampleCounts), - core::CContainerPrinter::print(actualSampleCounts)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedSampleCounts), core::CContainerPrinter::print(actualSampleCounts)); std::string actualFeatureData; { @@ -989,14 +844,11 @@ void CMetricPopulationDataGathererTest::testRemoveAttributes() TStrFeatureDataPrVec actual; TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; gatherer.featureData(bucketStart, bucketLength, featureData); - for (std::size_t i = 0u; i < featureData.size(); ++i) - { - const TSizeSizePrFeatureDataPrVec &data = featureData[i].second; - for (std::size_t j = 0u; j < data.size(); ++j) - { - std::string key = model_t::print(featureData[i].first) - + " " + gatherer.personName(data[j].first.first) - + " " + gatherer.attributeName(data[j].first.second); + for (std::size_t i = 0u; i < featureData.size(); ++i) { + const TSizeSizePrFeatureDataPrVec& data = featureData[i].second; + for (std::size_t j = 0u; j < data.size(); ++j) { + std::string key = model_t::print(featureData[i].first) + " " + gatherer.personName(data[j].first.first) + " " + + gatherer.attributeName(data[j].first.second); actual.push_back(TStrFeatureDataPr(key, data[j].second)); LOG_DEBUG(" " << key); LOG_DEBUG(" " << data[j].second.print()); @@ -1008,8 +860,7 @@ void CMetricPopulationDataGathererTest::testRemoveAttributes() CPPUNIT_ASSERT_EQUAL(expectedFeatureData, actualFeatureData); } -void CMetricPopulationDataGathererTest::testInfluenceStatistics() -{ +void CMetricPopulationDataGathererTest::testInfluenceStatistics() { LOG_DEBUG("*** CMetricPopulationDataGathererTest::testInfluenceStatistics ***"); using TDoubleDoublePr = std::pair; @@ -1021,123 +872,113 @@ void CMetricPopulationDataGathererTest::testInfluenceStatistics() SModelParams params(bucketLength); params.s_DecayRate = 0.001; - std::string influencerNames_[] = { "i1", "i2" }; - std::string influencerValues[][3] = - { - { "i11", "i12", "i13" }, - { "i21", "i22", "i23" } - }; - - SMessage data[] = - { - SMessage(1, "p1", "", 1.0, vec(influencerValues[0][0], influencerValues[1][0])), // Bucket 1 - SMessage(150, "p1", "", 5.0, vec(influencerValues[0][1], influencerValues[1][1])), - SMessage(150, "p1", "", 3.0, vec(influencerValues[0][2], influencerValues[1][2])), - SMessage(550, "p2", "", 2.0, vec(influencerValues[0][0], influencerValues[1][0])), - SMessage(551, "p2", "", 2.1, vec(influencerValues[0][1], influencerValues[1][1])), - SMessage(552, "p2", "", 4.0, vec(influencerValues[0][2], influencerValues[1][2])), - SMessage(554, "p2", "", 2.2, vec(influencerValues[0][2], influencerValues[1][2])), - SMessage(600, "p1", "", 3.0, vec(influencerValues[0][1], influencerValues[1][0])), // Bucket 2 - SMessage(660, "p2", "", 3.0, vec(influencerValues[0][0], influencerValues[1][2])), - SMessage(690, "p1", "", 7.3, vec(influencerValues[0][1], "")), - SMessage(700, "p2", "", 4.0, vec(influencerValues[0][0], influencerValues[1][2])), - SMessage(800, "p1", "", 2.2, vec(influencerValues[0][2], influencerValues[1][0])), - SMessage(900, "p2", "", 2.5, vec(influencerValues[0][1], influencerValues[1][0])), - SMessage(1000, "p1", "", 5.0, vec(influencerValues[0][1], influencerValues[1][0])), - SMessage(1200, "p2", "", 6.4, vec("", influencerValues[1][2])), // Bucket 3 - SMessage(1210, "p2", "", 6.0, vec("", influencerValues[1][2])), - SMessage(1240, "p2", "", 7.0, vec("", influencerValues[1][1])), - SMessage(1600, "p2", "", 11.0, vec("", influencerValues[1][0])), - SMessage(1800, "p1", "", 11.0, vec("", "")) // Sentinel - }; - - std::string expectedStatistics[] = - { - "[(i11, (1, 1)), (i12, (5, 1)), (i13, (3, 1)), (i21, (1, 1)), (i22, (5, 1)), (i23, (3, 1))]", - "[(i11, (2, 1)), (i12, (2.1, 1)), (i13, (3.1, 2)), (i21, (2, 1)), (i22, (2.1, 1)), (i23, (3.1, 2))]", - "[(i11, (1, 1)), (i12, (5, 1)), (i13, (3, 1)), (i21, (1, 1)), (i22, (5, 1)), (i23, (3, 1))]", - "[(i11, (2, 1)), (i12, (2.1, 1)), (i13, (2.2, 1)), (i21, (2, 1)), (i22, (2.1, 1)), (i23, (2.2, 1))]", - "[(i11, (1, 1)), (i12, (5, 1)), (i13, (3, 1)), (i21, (1, 1)), (i22, (5, 1)), (i23, (3, 1))]", - "[(i11, (2, 1)), (i12, (2.1, 1)), (i13, (4, 1)), (i21, (2, 1)), (i22, (2.1, 1)), (i23, (4, 1))]", - "[(i11, (1, 1)), (i12, (5, 1)), (i13, (3, 1)), (i21, (1, 1)), (i22, (5, 1)), (i23, (3, 1))]", - "[(i11, (2, 1)), (i12, (2.1, 1)), (i13, (6.2, 1)), (i21, (2, 1)), (i22, (2.1, 1)), (i23, (6.2, 1))]", - "[(i12, (5.1, 3)), (i13, (2.2, 1)), (i21, (3.4, 3))]", - "[(i11, (3.5, 2)), (i12, (2.5, 1)), (i21, (2.5, 1)), (i23, (3.5, 2))]", - "[(i12, (3, 1)), (i13, (2.2, 1)), (i21, (2.2, 1))]", - "[(i11, (3, 1)), (i12, (2.5, 1)), (i21, (2.5, 1)), (i23, (3, 1))]", - "[(i12, (7.3, 1)), (i13, (2.2, 1)), (i21, (5, 1))]", - "[(i11, (4, 1)), (i12, (2.5, 1)), (i21, (2.5, 1)), (i23, (4, 1))]", - "[(i12, (15.3, 1)), (i13, (2.2, 1)), (i21, (10.2, 1))]", - "[(i11, (7, 1)), (i12, (2.5, 1)), (i21, (2.5, 1)), (i23, (7, 1))]", - "[(i21, (11, 1)), (i22, (7, 1)), (i23, (6.2, 2))]", - "[(i21, (11, 1)), (i22, (7, 1)), (i23, (6, 1))]", - "[(i21, (11, 1)), (i22, (7, 1)), (i23, (6.4, 1))]", - "[(i21, (11, 1)), (i22, (7, 1)), (i23, (12.4, 1))]" - }; - const std::string *expected = expectedStatistics; + std::string influencerNames_[] = {"i1", "i2"}; + std::string influencerValues[][3] = {{"i11", "i12", "i13"}, {"i21", "i22", "i23"}}; + + SMessage data[] = { + SMessage(1, "p1", "", 1.0, vec(influencerValues[0][0], influencerValues[1][0])), // Bucket 1 + SMessage(150, "p1", "", 5.0, vec(influencerValues[0][1], influencerValues[1][1])), + SMessage(150, "p1", "", 3.0, vec(influencerValues[0][2], influencerValues[1][2])), + SMessage(550, "p2", "", 2.0, vec(influencerValues[0][0], influencerValues[1][0])), + SMessage(551, "p2", "", 2.1, vec(influencerValues[0][1], influencerValues[1][1])), + SMessage(552, "p2", "", 4.0, vec(influencerValues[0][2], influencerValues[1][2])), + SMessage(554, "p2", "", 2.2, vec(influencerValues[0][2], influencerValues[1][2])), + SMessage(600, "p1", "", 3.0, vec(influencerValues[0][1], influencerValues[1][0])), // Bucket 2 + SMessage(660, "p2", "", 3.0, vec(influencerValues[0][0], influencerValues[1][2])), + SMessage(690, "p1", "", 7.3, vec(influencerValues[0][1], "")), + SMessage(700, "p2", "", 4.0, vec(influencerValues[0][0], influencerValues[1][2])), + SMessage(800, "p1", "", 2.2, vec(influencerValues[0][2], influencerValues[1][0])), + SMessage(900, "p2", "", 2.5, vec(influencerValues[0][1], influencerValues[1][0])), + SMessage(1000, "p1", "", 5.0, vec(influencerValues[0][1], influencerValues[1][0])), + SMessage(1200, "p2", "", 6.4, vec("", influencerValues[1][2])), // Bucket 3 + SMessage(1210, "p2", "", 6.0, vec("", influencerValues[1][2])), + SMessage(1240, "p2", "", 7.0, vec("", influencerValues[1][1])), + SMessage(1600, "p2", "", 11.0, vec("", influencerValues[1][0])), + SMessage(1800, "p1", "", 11.0, vec("", "")) // Sentinel + }; + + std::string expectedStatistics[] = { + "[(i11, (1, 1)), (i12, (5, 1)), (i13, (3, 1)), (i21, (1, 1)), (i22, (5, 1)), (i23, (3, 1))]", + "[(i11, (2, 1)), (i12, (2.1, 1)), (i13, (3.1, 2)), (i21, (2, 1)), (i22, (2.1, 1)), (i23, (3.1, 2))]", + "[(i11, (1, 1)), (i12, (5, 1)), (i13, (3, 1)), (i21, (1, 1)), (i22, (5, 1)), (i23, (3, 1))]", + "[(i11, (2, 1)), (i12, (2.1, 1)), (i13, (2.2, 1)), (i21, (2, 1)), (i22, (2.1, 1)), (i23, (2.2, 1))]", + "[(i11, (1, 1)), (i12, (5, 1)), (i13, (3, 1)), (i21, (1, 1)), (i22, (5, 1)), (i23, (3, 1))]", + "[(i11, (2, 1)), (i12, (2.1, 1)), (i13, (4, 1)), (i21, (2, 1)), (i22, (2.1, 1)), (i23, (4, 1))]", + "[(i11, (1, 1)), (i12, (5, 1)), (i13, (3, 1)), (i21, (1, 1)), (i22, (5, 1)), (i23, (3, 1))]", + "[(i11, (2, 1)), (i12, (2.1, 1)), (i13, (6.2, 1)), (i21, (2, 1)), (i22, (2.1, 1)), (i23, (6.2, 1))]", + "[(i12, (5.1, 3)), (i13, (2.2, 1)), (i21, (3.4, 3))]", + "[(i11, (3.5, 2)), (i12, (2.5, 1)), (i21, (2.5, 1)), (i23, (3.5, 2))]", + "[(i12, (3, 1)), (i13, (2.2, 1)), (i21, (2.2, 1))]", + "[(i11, (3, 1)), (i12, (2.5, 1)), (i21, (2.5, 1)), (i23, (3, 1))]", + "[(i12, (7.3, 1)), (i13, (2.2, 1)), (i21, (5, 1))]", + "[(i11, (4, 1)), (i12, (2.5, 1)), (i21, (2.5, 1)), (i23, (4, 1))]", + "[(i12, (15.3, 1)), (i13, (2.2, 1)), (i21, (10.2, 1))]", + "[(i11, (7, 1)), (i12, (2.5, 1)), (i21, (2.5, 1)), (i23, (7, 1))]", + "[(i21, (11, 1)), (i22, (7, 1)), (i23, (6.2, 2))]", + "[(i21, (11, 1)), (i22, (7, 1)), (i23, (6, 1))]", + "[(i21, (11, 1)), (i22, (7, 1)), (i23, (6.4, 1))]", + "[(i21, (11, 1)), (i22, (7, 1)), (i23, (12.4, 1))]"}; + const std::string* expected = expectedStatistics; TFeatureVec features; features.push_back(model_t::E_PopulationMeanByPersonAndAttribute); features.push_back(model_t::E_PopulationMinByPersonAndAttribute); features.push_back(model_t::E_PopulationMaxByPersonAndAttribute); features.push_back(model_t::E_PopulationHighSumByBucketPersonAndAttribute); - TStrVec influencerNames(boost::begin(influencerNames_), - boost::end(influencerNames_)); - CDataGatherer gatherer(model_t::E_PopulationMetric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - influencerNames, false, searchKey, features, startTime, 2u); + TStrVec influencerNames(boost::begin(influencerNames_), boost::end(influencerNames_)); + CDataGatherer gatherer(model_t::E_PopulationMetric, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + influencerNames, + false, + searchKey, + features, + startTime, + 2u); core_t::TTime bucketStart = startTime; - for (std::size_t i = 0u, b = 0u; i < boost::size(data); ++i) - { - if (data[i].s_Time >= bucketStart + bucketLength) - { + for (std::size_t i = 0u, b = 0u; i < boost::size(data); ++i) { + if (data[i].s_Time >= bucketStart + bucketLength) { LOG_DEBUG("*** processing bucket ***"); TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; gatherer.featureData(bucketStart, bucketLength, featureData); - for (std::size_t j = 0u; j < featureData.size(); ++j) - { + for (std::size_t j = 0u; j < featureData.size(); ++j) { model_t::EFeature feature = featureData[j].first; LOG_DEBUG("feature = " << model_t::print(feature)); - const TSizeSizePrFeatureDataPrVec &data_ = featureData[j].second; - for (std::size_t k = 0u; k < data_.size(); ++k) - { + const TSizeSizePrFeatureDataPrVec& data_ = featureData[j].second; + for (std::size_t k = 0u; k < data_.size(); ++k) { TStrDoubleDoublePrPrVec statistics; - for (std::size_t m = 0u; - m < data_[k].second.s_InfluenceValues.size(); - ++m) - { - for (std::size_t n = 0u; - n < data_[k].second.s_InfluenceValues[m].size(); - ++n) - { - statistics.push_back(TStrDoubleDoublePrPr( - data_[k].second.s_InfluenceValues[m][n].first, - TDoubleDoublePr(data_[k].second.s_InfluenceValues[m][n].second.first[0], - data_[k].second.s_InfluenceValues[m][n].second.second))); + for (std::size_t m = 0u; m < data_[k].second.s_InfluenceValues.size(); ++m) { + for (std::size_t n = 0u; n < data_[k].second.s_InfluenceValues[m].size(); ++n) { + statistics.push_back( + TStrDoubleDoublePrPr(data_[k].second.s_InfluenceValues[m][n].first, + TDoubleDoublePr(data_[k].second.s_InfluenceValues[m][n].second.first[0], + data_[k].second.s_InfluenceValues[m][n].second.second))); } } - std::sort(statistics.begin(), - statistics.end(), - maths::COrderings::SFirstLess()); + std::sort(statistics.begin(), statistics.end(), maths::COrderings::SFirstLess()); - LOG_DEBUG("statistics = " - << core::CContainerPrinter::print(statistics)); + LOG_DEBUG("statistics = " << core::CContainerPrinter::print(statistics)); LOG_DEBUG("expected = " << *expected); - CPPUNIT_ASSERT_EQUAL(*(expected++), - core::CContainerPrinter::print(statistics)); + CPPUNIT_ASSERT_EQUAL(*(expected++), core::CContainerPrinter::print(statistics)); } } - bucketStart += bucketLength; ++b; + bucketStart += bucketLength; + ++b; } addArrival(data[i], gatherer, m_ResourceMonitor); } } -void CMetricPopulationDataGathererTest::testPersistence() -{ +void CMetricPopulationDataGathererTest::testPersistence() { LOG_DEBUG("*** CMetricPopulationDataGathererTest::testPersistence ***"); const core_t::TTime startTime = 1367280000; @@ -1150,20 +991,29 @@ void CMetricPopulationDataGathererTest::testPersistence() features.push_back(model_t::E_PopulationMinByPersonAndAttribute); features.push_back(model_t::E_PopulationMaxByPersonAndAttribute); features.push_back(model_t::E_PopulationHighSumByBucketPersonAndAttribute); - CDataGatherer origDataGatherer(model_t::E_PopulationMetric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, searchKey, features, startTime, 0); + CDataGatherer origDataGatherer(model_t::E_PopulationMetric, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + searchKey, + features, + startTime, + 0); TMessageVec messages; generateTestMessages(startTime, messages); core_t::TTime bucketStart = startTime; - for (std::size_t i = 0u; i < messages.size(); ++i) - { - if (messages[i].s_Time >= bucketStart + bucketLength) - { - LOG_DEBUG("Processing bucket [" << bucketStart - << ", " << bucketStart + bucketLength << ")"); + for (std::size_t i = 0u; i < messages.size(); ++i) { + if (messages[i].s_Time >= bucketStart + bucketLength) { + LOG_DEBUG("Processing bucket [" << bucketStart << ", " << bucketStart + bucketLength << ")"); origDataGatherer.sampleNow(bucketStart); bucketStart += bucketLength; } @@ -1178,11 +1028,9 @@ void CMetricPopulationDataGathererTest::testPersistence() inserter.toXml(origXml); } //LOG_DEBUG("origXml = " << origXml); - LOG_DEBUG("origXml length = " << origXml.length() - << ", # tabs " << std::count_if(origXml.begin(), origXml.end(), isSpace)); + LOG_DEBUG("origXml length = " << origXml.length() << ", # tabs " << std::count_if(origXml.begin(), origXml.end(), isSpace)); - std::size_t length = origXml.length() - - std::count_if(origXml.begin(), origXml.end(), isSpace); + std::size_t length = origXml.length() - std::count_if(origXml.begin(), origXml.end(), isSpace); CPPUNIT_ASSERT(length < 645000); // Restore the XML into a new data gatherer @@ -1190,9 +1038,19 @@ void CMetricPopulationDataGathererTest::testPersistence() CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - CDataGatherer restoredDataGatherer(model_t::E_PopulationMetric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, searchKey, traverser); + CDataGatherer restoredDataGatherer(model_t::E_PopulationMetric, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + searchKey, + traverser); // The XML representation of the new data gatherer should be the same as the // original @@ -1203,14 +1061,12 @@ void CMetricPopulationDataGathererTest::testPersistence() inserter.toXml(newXml); } //LOG_DEBUG("newXml = " << newXml); - LOG_DEBUG("newXml length = " << newXml.length() - << ", # tabs " << std::count_if(newXml.begin(), newXml.end(), isSpace)); + LOG_DEBUG("newXml length = " << newXml.length() << ", # tabs " << std::count_if(newXml.begin(), newXml.end(), isSpace)); CPPUNIT_ASSERT_EQUAL(origXml, newXml); } -void CMetricPopulationDataGathererTest::testReleaseMemory() -{ +void CMetricPopulationDataGathererTest::testReleaseMemory() { LOG_DEBUG("*** CMetricPopulationDataGathererTest::testReleaseMemory ***"); const core_t::TTime startTime = 1373932800; @@ -1223,16 +1079,14 @@ void CMetricPopulationDataGathererTest::testReleaseMemory() factory.features(features); CModelFactory::SGathererInitializationData initData(startTime); CModelFactory::TDataGathererPtr gathererPtr(factory.makeDataGatherer(initData)); - CDataGatherer &gatherer(*gathererPtr); + CDataGatherer& gatherer(*gathererPtr); CPPUNIT_ASSERT(gatherer.isPopulation()); core_t::TTime bucketStart = startTime; // Add a few buckets with count of 10 so that sample count gets estimated - for (std::size_t i = 0; i < 10; ++i) - { + for (std::size_t i = 0; i < 10; ++i) { // Add 10 events - for (std::size_t j = 0; j < 10; ++j) - { + for (std::size_t j = 0; j < 10; ++j) { addArrival(SMessage(bucketStart, "p1", "", 10.0), gatherer, m_ResourceMonitor); addArrival(SMessage(bucketStart, "p2", "", 10.0), gatherer, m_ResourceMonitor); } @@ -1241,8 +1095,7 @@ void CMetricPopulationDataGathererTest::testReleaseMemory() } // Add a bucket with not enough data to sample for p2 - for (std::size_t j = 0; j < 10; ++j) - { + for (std::size_t j = 0; j < 10; ++j) { addArrival(SMessage(bucketStart, "p1", "", 10.0), gatherer, m_ResourceMonitor); } addArrival(SMessage(bucketStart, "p2", "", 10.0), gatherer, m_ResourceMonitor); @@ -1252,60 +1105,45 @@ void CMetricPopulationDataGathererTest::testReleaseMemory() std::size_t mem = gatherer.memoryUsage(); // Add 48 + 1 buckets ( > 2 days) to force incomplete samples out of consideration for p2 - for (std::size_t i = 0; i < 49 + 1; ++i) - { - for (std::size_t j = 0; j < 10; ++j) - { + for (std::size_t i = 0; i < 49 + 1; ++i) { + for (std::size_t j = 0; j < 10; ++j) { addArrival(SMessage(bucketStart, "p1", "", 10.0), gatherer, m_ResourceMonitor); } gatherer.sampleNow(bucketStart - params.s_LatencyBuckets * bucketLength); bucketStart += bucketLength; gatherer.releaseMemory(bucketStart - params.s_SamplingAgeCutoff); - if (i <= 40) - { + if (i <= 40) { CPPUNIT_ASSERT(gatherer.memoryUsage() >= mem - 1000); } } CPPUNIT_ASSERT(gatherer.memoryUsage() < mem - 1000); } -CppUnit::Test *CMetricPopulationDataGathererTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CMetricPopulationDataGathererTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricPopulationDataGathererTest::testMean", - &CMetricPopulationDataGathererTest::testMean) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricPopulationDataGathererTest::testMin", - &CMetricPopulationDataGathererTest::testMin) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricPopulationDataGathererTest::testMax", - &CMetricPopulationDataGathererTest::testMax) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricPopulationDataGathererTest::testSum", - &CMetricPopulationDataGathererTest::testSum) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricPopulationDataGathererTest::testSampleCount", - &CMetricPopulationDataGathererTest::testSampleCount) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricPopulationDataGathererTest::testFeatureData", - &CMetricPopulationDataGathererTest::testFeatureData) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricPopulationDataGathererTest::testRemovePeople", - &CMetricPopulationDataGathererTest::testRemovePeople) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricPopulationDataGathererTest::testRemoveAttributes", - &CMetricPopulationDataGathererTest::testRemoveAttributes) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricPopulationDataGathererTest::testInfluenceStatistics", - &CMetricPopulationDataGathererTest::testInfluenceStatistics) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricPopulationDataGathererTest::testPersistence", - &CMetricPopulationDataGathererTest::testPersistence) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricPopulationDataGathererTest::testReleaseMemory", - &CMetricPopulationDataGathererTest::testReleaseMemory) ); +CppUnit::Test* CMetricPopulationDataGathererTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CMetricPopulationDataGathererTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CMetricPopulationDataGathererTest::testMean", + &CMetricPopulationDataGathererTest::testMean)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMetricPopulationDataGathererTest::testMin", + &CMetricPopulationDataGathererTest::testMin)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMetricPopulationDataGathererTest::testMax", + &CMetricPopulationDataGathererTest::testMax)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMetricPopulationDataGathererTest::testSum", + &CMetricPopulationDataGathererTest::testSum)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMetricPopulationDataGathererTest::testSampleCount", + &CMetricPopulationDataGathererTest::testSampleCount)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMetricPopulationDataGathererTest::testFeatureData", + &CMetricPopulationDataGathererTest::testFeatureData)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMetricPopulationDataGathererTest::testRemovePeople", + &CMetricPopulationDataGathererTest::testRemovePeople)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricPopulationDataGathererTest::testRemoveAttributes", &CMetricPopulationDataGathererTest::testRemoveAttributes)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricPopulationDataGathererTest::testInfluenceStatistics", &CMetricPopulationDataGathererTest::testInfluenceStatistics)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMetricPopulationDataGathererTest::testPersistence", + &CMetricPopulationDataGathererTest::testPersistence)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricPopulationDataGathererTest::testReleaseMemory", &CMetricPopulationDataGathererTest::testReleaseMemory)); return suiteOfTests; } diff --git a/lib/model/unittest/CMetricPopulationDataGathererTest.h b/lib/model/unittest/CMetricPopulationDataGathererTest.h index 3f6d340e62..ee756db95b 100644 --- a/lib/model/unittest/CMetricPopulationDataGathererTest.h +++ b/lib/model/unittest/CMetricPopulationDataGathererTest.h @@ -11,26 +11,24 @@ #include - -class CMetricPopulationDataGathererTest : public CppUnit::TestFixture -{ - public: - void testMean(); - void testMin(); - void testMax(); - void testSum(); - void testSampleCount(); - void testFeatureData(); - void testRemovePeople(); - void testRemoveAttributes(); - void testInfluenceStatistics(); - void testPersistence(); - void testReleaseMemory(); - - static CppUnit::Test *suite(); - - private: - ml::model::CResourceMonitor m_ResourceMonitor; +class CMetricPopulationDataGathererTest : public CppUnit::TestFixture { +public: + void testMean(); + void testMin(); + void testMax(); + void testSum(); + void testSampleCount(); + void testFeatureData(); + void testRemovePeople(); + void testRemoveAttributes(); + void testInfluenceStatistics(); + void testPersistence(); + void testReleaseMemory(); + + static CppUnit::Test* suite(); + +private: + ml::model::CResourceMonitor m_ResourceMonitor; }; #endif // INCLUDED_CMetricPopulationDataGathererTest_h diff --git a/lib/model/unittest/CMetricPopulationModelTest.cc b/lib/model/unittest/CMetricPopulationModelTest.cc index 5bb1626f0d..d717b1b24c 100644 --- a/lib/model/unittest/CMetricPopulationModelTest.cc +++ b/lib/model/unittest/CMetricPopulationModelTest.cc @@ -23,10 +23,10 @@ #include #include +#include #include #include #include -#include #include #include #include @@ -47,8 +47,7 @@ using namespace ml; using namespace model; -namespace -{ +namespace { using TSizeSizePr = std::pair; using TSizeSizePrVec = std::vector; @@ -70,35 +69,22 @@ using TDouble2Vec = core::CSmallVector; const std::string EMPTY_STRING; -struct SAnomaly -{ +struct SAnomaly { SAnomaly() : s_Bucket(0u), s_Person(), s_Attributes() {} - SAnomaly(std::size_t bucket, - const std::string &person, - const TDoubleStrPrVec &attributes) : - s_Bucket(bucket), - s_Person(person), - s_Attributes(attributes) - { - } + SAnomaly(std::size_t bucket, const std::string& person, const TDoubleStrPrVec& attributes) + : s_Bucket(bucket), s_Person(person), s_Attributes(attributes) {} std::size_t s_Bucket; std::string s_Person; TDoubleStrPrVec s_Attributes; - bool operator<(const SAnomaly &other) const - { - return s_Bucket < other.s_Bucket; - } + bool operator<(const SAnomaly& other) const { return s_Bucket < other.s_Bucket; } - std::string print() const - { + std::string print() const { std::ostringstream result; result << "[" << s_Bucket << ", " + s_Person << ","; - for (std::size_t i = 0u; i < s_Attributes.size(); ++i) - { - if (s_Attributes[i].first < 0.01) - { + for (std::size_t i = 0u; i < s_Attributes.size(); ++i) { + if (s_Attributes[i].first < 0.01) { result << " " << s_Attributes[i].second; } } @@ -107,27 +93,12 @@ struct SAnomaly } }; -struct SMessage -{ - SMessage(core_t::TTime time, - const std::string &person, - const std::string &attribute, - const TDouble1Vec &value) : - s_Time(time), - s_Person(person), - s_Attribute(attribute), - s_Value(value) - { - } +struct SMessage { + SMessage(core_t::TTime time, const std::string& person, const std::string& attribute, const TDouble1Vec& value) + : s_Time(time), s_Person(person), s_Attribute(attribute), s_Value(value) {} - bool operator<(const SMessage &other) const - { - return maths::COrderings::lexicographical_compare(s_Time, - s_Person, - s_Attribute, - other.s_Time, - other.s_Person, - other.s_Attribute); + bool operator<(const SMessage& other) const { + return maths::COrderings::lexicographical_compare(s_Time, s_Person, s_Attribute, other.s_Time, other.s_Person, other.s_Attribute); } core_t::TTime s_Time; @@ -141,19 +112,14 @@ using TMessageVec = std::vector; const std::size_t numberAttributes = 5u; const std::size_t numberPeople = 10u; -double roundToNearestPersisted(double value) -{ +double roundToNearestPersisted(double value) { std::string valueAsString(core::CStringUtils::typeToStringPrecise(value, core::CIEEE754::E_DoublePrecision)); double result = 0.0; core::CStringUtils::stringToType(valueAsString, result); return result; } -void generateTestMessages(std::size_t dimension, - core_t::TTime startTime, - core_t::TTime bucketLength, - TMessageVec &messages) -{ +void generateTestMessages(std::size_t dimension, core_t::TTime startTime, core_t::TTime bucketLength, TMessageVec& messages) { // The test case is as follows: // // attribute | 0 | 1 | 2 | 3 | 4 @@ -173,97 +139,64 @@ void generateTestMessages(std::size_t dimension, const std::size_t numberBuckets = 100u; TStrVec people; - for (std::size_t i = 0u; i < numberPeople; ++i) - { + for (std::size_t i = 0u; i < numberPeople; ++i) { people.push_back("p" + core::CStringUtils::typeToString(i)); } LOG_DEBUG("people = " << core::CContainerPrinter::print(people)); TStrVec attributes; - for (std::size_t i = 0u; i < numberAttributes; ++i) - { + for (std::size_t i = 0u; i < numberAttributes; ++i) { attributes.push_back("c" + core::CStringUtils::typeToString(i)); } LOG_DEBUG("attributes = " << core::CContainerPrinter::print(attributes)); - double attributeRates[] = { 10.0, 2.0, 15.0, 2.0, 1.0 }; - double means[] = { 5.0, 10.0, 7.0, 3.0, 15.0 }; - double variances[] = { 1.0, 0.5, 2.0, 0.1, 4.0 }; - - TSizeSizePr attribute0AnomalyBucketPerson[] = - { - TSizeSizePr(40u, 6u), - TSizeSizePr(15u, 3u), - TSizeSizePr(12u, 2u) - }; - TSizeSizePr attribute2AnomalyBucketPerson[] = - { - TSizeSizePr(44u, 9u), - TSizeSizePr(30u, 5u) - }; - TSizeSizePr attribute3AnomalyBucketPerson[] = - { - TSizeSizePr(80u, 1u), - TSizeSizePr(12u, 2u) - }; - TSizeSizePr attribute4AnomalyBucketPerson[] = - { - TSizeSizePr(60u, 2u) - }; + double attributeRates[] = {10.0, 2.0, 15.0, 2.0, 1.0}; + double means[] = {5.0, 10.0, 7.0, 3.0, 15.0}; + double variances[] = {1.0, 0.5, 2.0, 0.1, 4.0}; + + TSizeSizePr attribute0AnomalyBucketPerson[] = {TSizeSizePr(40u, 6u), TSizeSizePr(15u, 3u), TSizeSizePr(12u, 2u)}; + TSizeSizePr attribute2AnomalyBucketPerson[] = {TSizeSizePr(44u, 9u), TSizeSizePr(30u, 5u)}; + TSizeSizePr attribute3AnomalyBucketPerson[] = {TSizeSizePr(80u, 1u), TSizeSizePr(12u, 2u)}; + TSizeSizePr attribute4AnomalyBucketPerson[] = {TSizeSizePr(60u, 2u)}; TSizeSizePrVecVec anomalies; - anomalies.push_back(TSizeSizePrVec(boost::begin(attribute0AnomalyBucketPerson), - boost::end(attribute0AnomalyBucketPerson))); + anomalies.push_back(TSizeSizePrVec(boost::begin(attribute0AnomalyBucketPerson), boost::end(attribute0AnomalyBucketPerson))); anomalies.push_back(TSizeSizePrVec()); - anomalies.push_back(TSizeSizePrVec(boost::begin(attribute2AnomalyBucketPerson), - boost::end(attribute2AnomalyBucketPerson))); - anomalies.push_back(TSizeSizePrVec(boost::begin(attribute3AnomalyBucketPerson), - boost::end(attribute3AnomalyBucketPerson))); - anomalies.push_back(TSizeSizePrVec(boost::begin(attribute4AnomalyBucketPerson), - boost::end(attribute4AnomalyBucketPerson))); + anomalies.push_back(TSizeSizePrVec(boost::begin(attribute2AnomalyBucketPerson), boost::end(attribute2AnomalyBucketPerson))); + anomalies.push_back(TSizeSizePrVec(boost::begin(attribute3AnomalyBucketPerson), boost::end(attribute3AnomalyBucketPerson))); + anomalies.push_back(TSizeSizePrVec(boost::begin(attribute4AnomalyBucketPerson), boost::end(attribute4AnomalyBucketPerson))); test::CRandomNumbers rng; - for (std::size_t i = 0u; i < numberBuckets; ++i, startTime += bucketLength) - { - for (std::size_t j = 0u; j < numberAttributes; ++j) - { + for (std::size_t i = 0u; i < numberBuckets; ++i, startTime += bucketLength) { + for (std::size_t j = 0u; j < numberAttributes; ++j) { TUIntVec samples; rng.generatePoissonSamples(attributeRates[j], numberPeople, samples); - for (std::size_t k = 0u; k < numberPeople; ++k) - { - bool anomaly = !anomalies[j].empty() - && anomalies[j].back().first == i - && anomalies[j].back().second == k; - if (anomaly) - { + for (std::size_t k = 0u; k < numberPeople; ++k) { + bool anomaly = !anomalies[j].empty() && anomalies[j].back().first == i && anomalies[j].back().second == k; + if (anomaly) { samples[k] += 4; anomalies[j].pop_back(); } - if (samples[k] == 0) - { + if (samples[k] == 0) { continue; } TDoubleVec values; rng.generateNormalSamples(means[j], variances[j], dimension * samples[k], values); - for (std::size_t l = 0u; l < values.size(); l += dimension) - { + for (std::size_t l = 0u; l < values.size(); l += dimension) { TDouble1Vec value(dimension); - for (std::size_t d = 0u; d < dimension; ++d) - { + for (std::size_t d = 0u; d < dimension; ++d) { double vd = values[l + d]; - if (anomaly && (l % (2 * dimension)) == 0) - { + if (anomaly && (l % (2 * dimension)) == 0) { vd += 6.0 * std::sqrt(variances[j]); } value[d] = roundToNearestPersisted(vd); } - core_t::TTime dt = (static_cast(l) * bucketLength) - / static_cast(values.size()); + core_t::TTime dt = (static_cast(l) * bucketLength) / static_cast(values.size()); messages.push_back(SMessage(startTime + dt, people[k], attributes[j], value)); } } @@ -274,21 +207,16 @@ void generateTestMessages(std::size_t dimension, std::sort(messages.begin(), messages.end()); } -std::string valueAsString(const TDouble1Vec &value) -{ +std::string valueAsString(const TDouble1Vec& value) { std::string result = core::CStringUtils::typeToStringPrecise(value[0], core::CIEEE754::E_DoublePrecision); - for (std::size_t i = 1u; i < value.size(); ++i) - { - result += CAnomalyDetectorModelConfig::DEFAULT_MULTIVARIATE_COMPONENT_DELIMITER - + core::CStringUtils::typeToStringPrecise(value[i], core::CIEEE754::E_DoublePrecision); + for (std::size_t i = 1u; i < value.size(); ++i) { + result += CAnomalyDetectorModelConfig::DEFAULT_MULTIVARIATE_COMPONENT_DELIMITER + + core::CStringUtils::typeToStringPrecise(value[i], core::CIEEE754::E_DoublePrecision); } return result; } -CEventData addArrival(const SMessage &message, - const CModelFactory::TDataGathererPtr &gatherer, - CResourceMonitor &resourceMonitor) -{ +CEventData addArrival(const SMessage& message, const CModelFactory::TDataGathererPtr& gatherer, CResourceMonitor& resourceMonitor) { CDataGatherer::TStrCPtrVec fields; fields.push_back(&message.s_Person); fields.push_back(&message.s_Attribute); @@ -303,23 +231,20 @@ CEventData addArrival(const SMessage &message, void processBucket(core_t::TTime time, core_t::TTime bucketLength, std::size_t n, - const double *bucket, - const std::string *influencerValues, - CDataGatherer &gatherer, - CResourceMonitor &resourceMonitor, - CMetricPopulationModel &model, - SAnnotatedProbability &probability) -{ + const double* bucket, + const std::string* influencerValues, + CDataGatherer& gatherer, + CResourceMonitor& resourceMonitor, + CMetricPopulationModel& model, + SAnnotatedProbability& probability) { const std::string person("p"); const std::string attribute("a"); - for (std::size_t i = 0u; i < n; ++i) - { + for (std::size_t i = 0u; i < n; ++i) { CDataGatherer::TStrCPtrVec fieldValues; fieldValues.push_back(&person); fieldValues.push_back(&attribute); fieldValues.push_back(&influencerValues[i]); - std::string valueAsString(core::CStringUtils::typeToStringPrecise( - bucket[i], core::CIEEE754::E_DoublePrecision)); + std::string valueAsString(core::CStringUtils::typeToStringPrecise(bucket[i], core::CIEEE754::E_DoublePrecision)); fieldValues.push_back(&valueAsString); CEventData eventData; @@ -329,14 +254,12 @@ void processBucket(core_t::TTime time, } model.sample(time, time + bucketLength, resourceMonitor); CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); - model.computeProbability(0/*pid*/, time, time + bucketLength, partitioningFields, 1, probability); + model.computeProbability(0 /*pid*/, time, time + bucketLength, partitioningFields, 1, probability); LOG_DEBUG("influences = " << core::CContainerPrinter::print(probability.s_Influences)); } - } -void CMetricPopulationModelTest::testBasicAccessors() -{ +void CMetricPopulationModelTest::testBasicAccessors() { LOG_DEBUG("*** CMetricPopulationModelTest::testBasicAccessors ***"); // Check that the correct data is read retrieved by the @@ -363,8 +286,7 @@ void CMetricPopulationModelTest::testBasicAccessors() features.push_back(model_t::E_PopulationMaxByPersonAndAttribute); factory.features(features); CModelFactory::SGathererInitializationData gathererInitData(startTime); - CModelFactory::TDataGathererPtr gatherer(dynamic_cast( - factory.makeDataGatherer(gathererInitData))); + CModelFactory::TDataGathererPtr gatherer(dynamic_cast(factory.makeDataGatherer(gathererInitData))); CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr model(factory.makeModel(modelInitData)); @@ -375,10 +297,8 @@ void CMetricPopulationModelTest::testBasicAccessors() TMinAccumulatorVec expectedBucketMins(numberPeople * numberAttributes); TMaxAccumulatorVec expectedBucketMaxs(numberPeople * numberAttributes); - for (const auto &message : messages) - { - if (message.s_Time >= startTime + bucketLength) - { + for (const auto& message : messages) { + if (message.s_Time >= startTime + bucketLength) { model->sample(startTime, startTime + bucketLength, m_ResourceMonitor); LOG_DEBUG("Testing bucket = [" << startTime << "," << startTime + bucketLength << ")"); @@ -387,16 +307,14 @@ void CMetricPopulationModelTest::testBasicAccessors() CPPUNIT_ASSERT_EQUAL(numberAttributes, gatherer->numberActiveAttributes()); // Test the person and attribute invariants. - for (std::size_t j = 0u; j < gatherer->numberActivePeople(); ++j) - { - const std::string &name = model->personName(j); + for (std::size_t j = 0u; j < gatherer->numberActivePeople(); ++j) { + const std::string& name = model->personName(j); std::size_t pid; CPPUNIT_ASSERT(gatherer->personId(name, pid)); CPPUNIT_ASSERT_EQUAL(j, pid); } - for (std::size_t j = 0u; j < gatherer->numberActiveAttributes(); ++j) - { - const std::string &name = model->attributeName(j); + for (std::size_t j = 0u; j < gatherer->numberActiveAttributes(); ++j) { + const std::string& name = model->attributeName(j); std::size_t cid; CPPUNIT_ASSERT(gatherer->attributeId(name, cid)); CPPUNIT_ASSERT_EQUAL(j, cid); @@ -407,8 +325,7 @@ void CMetricPopulationModelTest::testBasicAccessors() TSizeVec expectedCurrentBucketPersonIds; // Test the person counts. - for (const auto &count_ : expectedBucketPersonCounts) - { + for (const auto& count_ : expectedBucketPersonCounts) { std::size_t pid; CPPUNIT_ASSERT(gatherer->personId(count_.first, pid)); expectedCurrentBucketPersonIds.push_back(pid); @@ -425,46 +342,32 @@ void CMetricPopulationModelTest::testBasicAccessors() CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedCurrentBucketPersonIds), core::CContainerPrinter::print(bucketPersonIds)); - if ((startTime / bucketLength) % 10 == 0) - { + if ((startTime / bucketLength) % 10 == 0) { LOG_DEBUG("expected means = " << core::CContainerPrinter::print(expectedBucketMeans)); LOG_DEBUG("expected mins = " << core::CContainerPrinter::print(expectedBucketMins)); LOG_DEBUG("expected maxs = " << core::CContainerPrinter::print(expectedBucketMaxs)); } - for (std::size_t cid = 0u; cid < numberAttributes; ++cid) - { - for (std::size_t pid = 0u; pid < numberPeople; ++pid) - { - const TMeanAccumulator &expectedMean = expectedBucketMeans[pid * numberAttributes + cid]; - const TMinAccumulator &expectedMin = expectedBucketMins[pid * numberAttributes + cid]; - const TMaxAccumulator &expectedMax = expectedBucketMaxs[pid * numberAttributes + cid]; - - TDouble1Vec mean = model->currentBucketValue( - model_t::E_PopulationMeanByPersonAndAttribute, - pid, cid, startTime); - TDouble1Vec min = model->currentBucketValue( - model_t::E_PopulationMinByPersonAndAttribute, - pid, cid, startTime); - TDouble1Vec max = model->currentBucketValue( - model_t::E_PopulationMaxByPersonAndAttribute, - pid, cid, startTime); - - CPPUNIT_ASSERT( (!mean.empty() && maths::CBasicStatistics::count(expectedMean) > 0.0) - || ( mean.empty() && maths::CBasicStatistics::count(expectedMean) == 0.0)); - if (!mean.empty()) - { + for (std::size_t cid = 0u; cid < numberAttributes; ++cid) { + for (std::size_t pid = 0u; pid < numberPeople; ++pid) { + const TMeanAccumulator& expectedMean = expectedBucketMeans[pid * numberAttributes + cid]; + const TMinAccumulator& expectedMin = expectedBucketMins[pid * numberAttributes + cid]; + const TMaxAccumulator& expectedMax = expectedBucketMaxs[pid * numberAttributes + cid]; + + TDouble1Vec mean = model->currentBucketValue(model_t::E_PopulationMeanByPersonAndAttribute, pid, cid, startTime); + TDouble1Vec min = model->currentBucketValue(model_t::E_PopulationMinByPersonAndAttribute, pid, cid, startTime); + TDouble1Vec max = model->currentBucketValue(model_t::E_PopulationMaxByPersonAndAttribute, pid, cid, startTime); + + CPPUNIT_ASSERT((!mean.empty() && maths::CBasicStatistics::count(expectedMean) > 0.0) || + (mean.empty() && maths::CBasicStatistics::count(expectedMean) == 0.0)); + if (!mean.empty()) { CPPUNIT_ASSERT_EQUAL(maths::CBasicStatistics::mean(expectedMean), mean[0]); } - CPPUNIT_ASSERT( (!min.empty() && expectedMin.count() > 0u) - || ( min.empty() && expectedMin.count() == 0u)); - if (!min.empty()) - { + CPPUNIT_ASSERT((!min.empty() && expectedMin.count() > 0u) || (min.empty() && expectedMin.count() == 0u)); + if (!min.empty()) { CPPUNIT_ASSERT_EQUAL(expectedMin[0], min[0]); } - CPPUNIT_ASSERT( (!max.empty() && expectedMax.count() > 0u) - || ( max.empty() && expectedMax.count() == 0u)); - if (!max.empty()) - { + CPPUNIT_ASSERT((!max.empty() && expectedMax.count() > 0u) || (max.empty() && expectedMax.count() == 0u)); + if (!max.empty()) { CPPUNIT_ASSERT_EQUAL(expectedMax[0], max[0]); } } @@ -487,8 +390,7 @@ void CMetricPopulationModelTest::testBasicAccessors() } } -void CMetricPopulationModelTest::testMinMaxAndMean() -{ +void CMetricPopulationModelTest::testMinMaxAndMean() { LOG_DEBUG("*** testMinMaxAndMean ***"); // We check that the correct data is read from the gatherer @@ -506,10 +408,10 @@ void CMetricPopulationModelTest::testMinMaxAndMean() using TSizeMathsModelPtrMap = std::map; using TTimeDouble2VecSizeTrVecDouble2Vec4VecVecPr = std::pair; using TSizeTimeDouble2VecSizeTrVecDouble2Vec4VecVecPrMap = std::map; - using TSizeSizeTimeDouble2VecSizeTrVecDouble2Vec4VecVecPrMapMap = std::map; + using TSizeSizeTimeDouble2VecSizeTrVecDouble2Vec4VecVecPrMapMap = + std::map; - static const maths_t::TWeightStyleVec WEIGHT_STYLES{maths_t::E_SampleCountWeight, - maths_t::E_SampleWinsorisationWeight}; + static const maths_t::TWeightStyleVec WEIGHT_STYLES{maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight}; core_t::TTime startTime = 1367280000; const core_t::TTime bucketLength = 3600; @@ -529,7 +431,7 @@ void CMetricPopulationModelTest::testMinMaxAndMean() CModelFactory::TDataGathererPtr gatherer(dynamic_cast(factory.makeDataGatherer(gathererInitData))); CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr modelHolder(factory.makeModel(modelInitData)); - CMetricPopulationModel *model = dynamic_cast(modelHolder.get()); + CMetricPopulationModel* model = dynamic_cast(modelHolder.get()); CModelFactory::TFeatureMathsModelPtrPrVec models{factory.defaultFeatureModels(features, bucketLength, 1.0, false)}; CPPUNIT_ASSERT_EQUAL(features.size(), models.size()); @@ -546,68 +448,55 @@ void CMetricPopulationModelTest::testMinMaxAndMean() TSizeMathsModelPtrMap expectedPopulationModels[3]; bool nonNegative = true; - for (const auto &message : messages) - { - if (message.s_Time >= startTime + bucketLength) - { + for (const auto& message : messages) { + if (message.s_Time >= startTime + bucketLength) { model->sample(startTime, startTime + bucketLength, m_ResourceMonitor); TSizeSizeTimeDouble2VecSizeTrVecDouble2Vec4VecVecPrMapMap populationWeightedSamples; - for (std::size_t feature = 0u; feature < features.size(); ++feature) - { - for (const auto &samples_ : expectedSamples[feature]) - { + for (std::size_t feature = 0u; feature < features.size(); ++feature) { + for (const auto& samples_ : expectedSamples[feature]) { std::size_t pid = samples_.first.first; std::size_t cid = samples_.first.second; double weight = model->sampleRateWeight(pid, cid); - TTimeDouble2VecSizeTrVec &samples = populationWeightedSamples[feature][cid].first; - TDouble2Vec4VecVec &weights = populationWeightedSamples[feature][cid].second; - TMathsModelPtr &model_ = expectedPopulationModels[feature][cid]; - if (!model_) - { + TTimeDouble2VecSizeTrVec& samples = populationWeightedSamples[feature][cid].first; + TDouble2Vec4VecVec& weights = populationWeightedSamples[feature][cid].second; + TMathsModelPtr& model_ = expectedPopulationModels[feature][cid]; + if (!model_) { model_ = factory.defaultFeatureModel(features[feature], bucketLength, 1.0, false); } - for (std::size_t j = 0u; j < samples_.second.size(); ++j) - { + for (std::size_t j = 0u; j < samples_.second.size(); ++j) { // We round to the nearest integer time (note this has to match // the behaviour of CMetricPartialStatistic::time). - core_t::TTime time_ = static_cast( - expectedSampleTimes[{pid, cid}][j] + 0.5); + core_t::TTime time_ = static_cast(expectedSampleTimes[{pid, cid}][j] + 0.5); TDouble2Vec sample{samples_.second[j]}; samples.emplace_back(time_, sample, pid); weights.push_back({{weight}, model_->winsorisationWeight(1.0, time_, sample)}); } } } - for (auto &feature : populationWeightedSamples) - { - for (auto &attribute : feature.second) - { + for (auto& feature : populationWeightedSamples) { + for (auto& attribute : feature.second) { std::size_t cid = attribute.first; - TTimeDouble2VecSizeTrVec &samples = attribute.second.first; - TDouble2Vec4VecVec &weights = attribute.second.second; + TTimeDouble2VecSizeTrVec& samples = attribute.second.first; + TDouble2Vec4VecVec& weights = attribute.second.second; maths::COrderings::simultaneousSort(samples, weights); maths::CModelAddSamplesParams params_; params_.integer(false) - .nonNegative(nonNegative) - .propagationInterval(1.0) - .weightStyles(WEIGHT_STYLES) - .trendWeights(weights) - .priorWeights(weights); + .nonNegative(nonNegative) + .propagationInterval(1.0) + .weightStyles(WEIGHT_STYLES) + .trendWeights(weights) + .priorWeights(weights); expectedPopulationModels[feature.first][cid]->addSamples(params_, samples); } } - for (std::size_t feature = 0u; feature < features.size(); ++feature) - { - if ((startTime / bucketLength) % 10 == 0) - { + for (std::size_t feature = 0u; feature < features.size(); ++feature) { + if ((startTime / bucketLength) % 10 == 0) { LOG_DEBUG("Testing priors for feature " << model_t::print(features[feature])); } - for (std::size_t cid = 0u; cid < numberAttributes; ++cid) - { - if (expectedPopulationModels[feature].count(cid) > 0) - { + for (std::size_t cid = 0u; cid < numberAttributes; ++cid) { + if (expectedPopulationModels[feature].count(cid) > 0) { CPPUNIT_ASSERT_EQUAL(expectedPopulationModels[feature][cid]->checksum(), model->details()->model(features[feature], cid)->checksum()); } @@ -627,30 +516,27 @@ void CMetricPopulationModelTest::testMinMaxAndMean() nonNegative &= message.s_Value[0] < 0.0; double sampleCount = gatherer->sampleCount(cid); - if (sampleCount > 0.0) - { + if (sampleCount > 0.0) { TSizeSizePr key{pid, cid}; sampleTimes[key].add(static_cast(message.s_Time)); sampleMeans[key].add(message.s_Value[0]); sampleMins[key].add(message.s_Value[0]); sampleMaxs[key].add(message.s_Value[0]); - if (maths::CBasicStatistics::count(sampleTimes[key]) == sampleCount) - { + if (maths::CBasicStatistics::count(sampleTimes[key]) == sampleCount) { expectedSampleTimes[key].push_back(maths::CBasicStatistics::mean(sampleTimes[key])); expectedSamples[0][key].push_back(maths::CBasicStatistics::mean(sampleMeans[key])); expectedSamples[1][key].push_back(sampleMins[key][0]); expectedSamples[2][key].push_back(sampleMaxs[key][0]); sampleTimes[key] = TMeanAccumulator(); sampleMeans[key] = TMeanAccumulator(); - sampleMins[key] = TMinAccumulator(); - sampleMaxs[key] = TMaxAccumulator(); + sampleMins[key] = TMinAccumulator(); + sampleMaxs[key] = TMaxAccumulator(); } } } } -void CMetricPopulationModelTest::testVarp() -{ +void CMetricPopulationModelTest::testVarp() { LOG_DEBUG("*** testVarp ***"); core_t::TTime startTime(3600); @@ -668,80 +554,80 @@ void CMetricPopulationModelTest::testVarp() CAnomalyDetectorModel::TModelPtr model_(factory.makeModel(initData)); CPPUNIT_ASSERT(model_); CPPUNIT_ASSERT_EQUAL(model_t::E_MetricOnline, model_->category()); - CMetricPopulationModel &model = static_cast(*model_.get()); - - double bucket1[] = { 1.0, 1.1, 1.01, 1.02 }; - std::string influencerValues1[] = { "i1", "i1", "i2", "i2" }; - double bucket2[] = { 10.0 }; - std::string influencerValues2[] = { "i1" }; - double bucket3[] = { 4.3, 4.4, 4.6, 4.2, 4.8 }; - std::string influencerValues3[] = { "i1", "i1", "i1", "i1", "i3" }; - double bucket4[] = { 3.2, 3.3 }; - std::string influencerValues4[] = { "i3", "i3" }; - double bucket5[] = { 20.1, 20.8, 20.9 }; - std::string influencerValues5[] = { "i2", "i1", "i1" }; - double bucket6[] = { 4.1, 4.2, 3.9, 4.2 }; - std::string influencerValues6[] = { "i1", "i2", "i2", "i2" }; - double bucket7[] = { 0.1, 0.3, 0.2 }; - std::string influencerValues7[] = { "i1", "i1", "i3" }; - double bucket8[] = { 12.5, 12.3 }; - std::string influencerValues8[] = { "i1", "i2" }; - double bucket9[] = { 6.9, 7.0, 7.1, 6.6, 7.1, 6.7 }; - std::string influencerValues9[] = { "i1", "i2", "i3", "i4", "i5", "i6" }; + CMetricPopulationModel& model = static_cast(*model_.get()); + + double bucket1[] = {1.0, 1.1, 1.01, 1.02}; + std::string influencerValues1[] = {"i1", "i1", "i2", "i2"}; + double bucket2[] = {10.0}; + std::string influencerValues2[] = {"i1"}; + double bucket3[] = {4.3, 4.4, 4.6, 4.2, 4.8}; + std::string influencerValues3[] = {"i1", "i1", "i1", "i1", "i3"}; + double bucket4[] = {3.2, 3.3}; + std::string influencerValues4[] = {"i3", "i3"}; + double bucket5[] = {20.1, 20.8, 20.9}; + std::string influencerValues5[] = {"i2", "i1", "i1"}; + double bucket6[] = {4.1, 4.2, 3.9, 4.2}; + std::string influencerValues6[] = {"i1", "i2", "i2", "i2"}; + double bucket7[] = {0.1, 0.3, 0.2}; + std::string influencerValues7[] = {"i1", "i1", "i3"}; + double bucket8[] = {12.5, 12.3}; + std::string influencerValues8[] = {"i1", "i2"}; + double bucket9[] = {6.9, 7.0, 7.1, 6.6, 7.1, 6.7}; + std::string influencerValues9[] = {"i1", "i2", "i3", "i4", "i5", "i6"}; // This last bucket is much more improbable, with influencer i2 being responsible - double bucket10[] = { 0.3, 15.4, 77.62, 112.999, 5.1, 5.1, 5.1, 5.1, 5.1 }; - std::string influencerValues10[] = { "i2", "i2", "i2", "i2", "i1", "i1", "i1", "i1", "i1"}; + double bucket10[] = {0.3, 15.4, 77.62, 112.999, 5.1, 5.1, 5.1, 5.1, 5.1}; + std::string influencerValues10[] = {"i2", "i2", "i2", "i2", "i1", "i1", "i1", "i1", "i1"}; SAnnotatedProbability annotatedProbability; core_t::TTime time = startTime; - processBucket(time, bucketLength, boost::size(bucket1), bucket1, - influencerValues1, *gatherer, m_ResourceMonitor, model, annotatedProbability); + processBucket( + time, bucketLength, boost::size(bucket1), bucket1, influencerValues1, *gatherer, m_ResourceMonitor, model, annotatedProbability); CPPUNIT_ASSERT(annotatedProbability.s_Probability > 0.8); time += bucketLength; - processBucket(time, bucketLength, boost::size(bucket2), bucket2, - influencerValues2, *gatherer, m_ResourceMonitor, model, annotatedProbability); + processBucket( + time, bucketLength, boost::size(bucket2), bucket2, influencerValues2, *gatherer, m_ResourceMonitor, model, annotatedProbability); CPPUNIT_ASSERT(annotatedProbability.s_Probability > 0.8); time += bucketLength; - processBucket(time, bucketLength, boost::size(bucket3), bucket3, - influencerValues3, *gatherer, m_ResourceMonitor, model, annotatedProbability); + processBucket( + time, bucketLength, boost::size(bucket3), bucket3, influencerValues3, *gatherer, m_ResourceMonitor, model, annotatedProbability); CPPUNIT_ASSERT(annotatedProbability.s_Probability > 0.8); time += bucketLength; - processBucket(time, bucketLength, boost::size(bucket4), bucket4, - influencerValues4, *gatherer, m_ResourceMonitor, model, annotatedProbability); + processBucket( + time, bucketLength, boost::size(bucket4), bucket4, influencerValues4, *gatherer, m_ResourceMonitor, model, annotatedProbability); CPPUNIT_ASSERT(annotatedProbability.s_Probability > 0.8); time += bucketLength; - processBucket(time, bucketLength, boost::size(bucket5), bucket5, - influencerValues5, *gatherer, m_ResourceMonitor, model, annotatedProbability); + processBucket( + time, bucketLength, boost::size(bucket5), bucket5, influencerValues5, *gatherer, m_ResourceMonitor, model, annotatedProbability); CPPUNIT_ASSERT(annotatedProbability.s_Probability > 0.8); time += bucketLength; - processBucket(time, bucketLength, boost::size(bucket6), bucket6, - influencerValues6, *gatherer, m_ResourceMonitor, model, annotatedProbability); + processBucket( + time, bucketLength, boost::size(bucket6), bucket6, influencerValues6, *gatherer, m_ResourceMonitor, model, annotatedProbability); CPPUNIT_ASSERT(annotatedProbability.s_Probability > 0.8); time += bucketLength; - processBucket(time, bucketLength, boost::size(bucket7), bucket7, - influencerValues7, *gatherer, m_ResourceMonitor, model, annotatedProbability); + processBucket( + time, bucketLength, boost::size(bucket7), bucket7, influencerValues7, *gatherer, m_ResourceMonitor, model, annotatedProbability); CPPUNIT_ASSERT(annotatedProbability.s_Probability > 0.8); time += bucketLength; - processBucket(time, bucketLength, boost::size(bucket8), bucket8, - influencerValues8, *gatherer, m_ResourceMonitor, model, annotatedProbability); + processBucket( + time, bucketLength, boost::size(bucket8), bucket8, influencerValues8, *gatherer, m_ResourceMonitor, model, annotatedProbability); CPPUNIT_ASSERT(annotatedProbability.s_Probability > 0.8); time += bucketLength; - processBucket(time, bucketLength, boost::size(bucket9), bucket9, - influencerValues9, *gatherer, m_ResourceMonitor, model, annotatedProbability); + processBucket( + time, bucketLength, boost::size(bucket9), bucket9, influencerValues9, *gatherer, m_ResourceMonitor, model, annotatedProbability); CPPUNIT_ASSERT(annotatedProbability.s_Probability < 0.85); time += bucketLength; - processBucket(time, bucketLength, boost::size(bucket10), bucket10, - influencerValues10, *gatherer, m_ResourceMonitor, model, annotatedProbability); + processBucket( + time, bucketLength, boost::size(bucket10), bucket10, influencerValues10, *gatherer, m_ResourceMonitor, model, annotatedProbability); CPPUNIT_ASSERT(annotatedProbability.s_Probability < 0.1); CPPUNIT_ASSERT_EQUAL(std::size_t(1), annotatedProbability.s_Influences.size()); CPPUNIT_ASSERT_EQUAL(std::string("I"), *annotatedProbability.s_Influences[0].first.first); @@ -749,8 +635,7 @@ void CMetricPopulationModelTest::testVarp() CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, annotatedProbability.s_Influences[0].second, 0.00001); } -void CMetricPopulationModelTest::testComputeProbability() -{ +void CMetricPopulationModelTest::testComputeProbability() { LOG_DEBUG("*** testComputeProbability ***"); maths::CSampling::CScopeMockRandomNumberGenerator scopeMockRng; @@ -760,20 +645,14 @@ void CMetricPopulationModelTest::testComputeProbability() using TAnomalyVec = std::vector; using TDoubleAnomalyPr = std::pair; - using TAnomalyAccumulator = maths::CBasicStatistics::COrderStatisticsHeap< - TDoubleAnomalyPr, maths::COrderings::SFirstLess>; + using TAnomalyAccumulator = maths::CBasicStatistics::COrderStatisticsHeap; core_t::TTime startTime = 1367280000; const core_t::TTime bucketLength = 3600; - model_t::EFeature features_[] = - { - model_t::E_PopulationMaxByPersonAndAttribute, - model_t::E_PopulationMeanLatLongByPersonAndAttribute - }; + model_t::EFeature features_[] = {model_t::E_PopulationMaxByPersonAndAttribute, model_t::E_PopulationMeanLatLongByPersonAndAttribute}; - for (std::size_t i = 0u; i < boost::size(features_); ++i) - { + for (std::size_t i = 0u; i < boost::size(features_); ++i) { LOG_DEBUG("Testing " << model_t::print(features_[i])); TMessageVec messages; @@ -787,37 +666,28 @@ void CMetricPopulationModelTest::testComputeProbability() CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(gathererInitData)); CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr modelHolder(factory.makeModel(modelInitData)); - CMetricPopulationModel *model = - dynamic_cast(modelHolder.get()); + CMetricPopulationModel* model = dynamic_cast(modelHolder.get()); TAnomalyAccumulator anomalies(7); std::size_t bucket = 0u; - for (const auto &message : messages) - { - if (message.s_Time >= startTime + bucketLength) - { + for (const auto& message : messages) { + if (message.s_Time >= startTime + bucketLength) { model->sample(startTime, startTime + bucketLength, m_ResourceMonitor); - LOG_DEBUG("Testing bucket " << bucket - << " = [" << startTime << "," << startTime + bucketLength << ")"); + LOG_DEBUG("Testing bucket " << bucket << " = [" << startTime << "," << startTime + bucketLength << ")"); CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); SAnnotatedProbability annotatedProbability; - for (std::size_t pid = 0u; pid < numberPeople; ++pid) - { - model->computeProbability(pid, startTime, startTime + bucketLength, - partitioningFields, 2, annotatedProbability); - - if ((startTime / bucketLength) % 10 == 0) - { - LOG_DEBUG("person = " << model->personName(pid) - << ", probability = " << annotatedProbability.s_Probability); + for (std::size_t pid = 0u; pid < numberPeople; ++pid) { + model->computeProbability(pid, startTime, startTime + bucketLength, partitioningFields, 2, annotatedProbability); + + if ((startTime / bucketLength) % 10 == 0) { + LOG_DEBUG("person = " << model->personName(pid) << ", probability = " << annotatedProbability.s_Probability); } std::string person = model->personName(pid); TDoubleStrPrVec attributes; - for (const auto &probability : annotatedProbability.s_AttributeProbabilities) - { + for (const auto& probability : annotatedProbability.s_AttributeProbabilities) { attributes.emplace_back(probability.s_Probability, *probability.s_Attribute); } anomalies.add({annotatedProbability.s_Probability, SAnomaly(bucket, person, attributes)}); @@ -834,35 +704,29 @@ void CMetricPopulationModelTest::testComputeProbability() LOG_DEBUG("Anomalies = " << anomalies.print()); TAnomalyVec orderedAnomalies; - for (std::size_t j = 0u; j < anomalies.count(); ++j) - { + for (std::size_t j = 0u; j < anomalies.count(); ++j) { orderedAnomalies.push_back(anomalies[j].second); } std::sort(orderedAnomalies.begin(), orderedAnomalies.end()); LOG_DEBUG("orderedAnomalies = " << core::CContainerPrinter::print(orderedAnomalies)); - std::string expectedAnomalies[] = - { - std::string("[12, p2, c0 c3]"), - std::string("[15, p3, c0]"), - std::string("[30, p5, c2]"), - std::string("[40, p6, c0]"), - std::string("[44, p9, c2]"), - std::string("[60, p2, c4]"), - std::string("[80, p1, c3]") - }; + std::string expectedAnomalies[] = {std::string("[12, p2, c0 c3]"), + std::string("[15, p3, c0]"), + std::string("[30, p5, c2]"), + std::string("[40, p6, c0]"), + std::string("[44, p9, c2]"), + std::string("[60, p2, c4]"), + std::string("[80, p1, c3]")}; CPPUNIT_ASSERT_EQUAL(boost::size(expectedAnomalies), orderedAnomalies.size()); - for (std::size_t j = 0u; j < orderedAnomalies.size(); ++j) - { + for (std::size_t j = 0u; j < orderedAnomalies.size(); ++j) { CPPUNIT_ASSERT_EQUAL(expectedAnomalies[j], orderedAnomalies[j].print()); } } } -void CMetricPopulationModelTest::testPrune() -{ +void CMetricPopulationModelTest::testPrune() { LOG_DEBUG("*** testPrune ***"); // This test has four people and five attributes. We expect @@ -876,29 +740,10 @@ void CMetricPopulationModelTest::testPrune() const core_t::TTime bucketLength = 3600; const std::size_t numberBuckets = 1000u; - std::string people[] = - { - std::string("p1"), - std::string("p2"), - std::string("p3"), - std::string("p4") - }; - std::string attributes[] = - { - std::string("c1"), - std::string("c2"), - std::string("c3"), - std::string("c4"), - std::string("c5") - }; - - TStrSizePrVecVec eventCounts[] = - { - TStrSizePrVecVec(), - TStrSizePrVecVec(), - TStrSizePrVecVec(), - TStrSizePrVecVec() - }; + std::string people[] = {std::string("p1"), std::string("p2"), std::string("p3"), std::string("p4")}; + std::string attributes[] = {std::string("c1"), std::string("c2"), std::string("c3"), std::string("c4"), std::string("c5")}; + + TStrSizePrVecVec eventCounts[] = {TStrSizePrVecVec(), TStrSizePrVecVec(), TStrSizePrVecVec(), TStrSizePrVecVec()}; { TStrSizePrVec attributeCounts; attributeCounts.push_back(TStrSizePr(attributes[0], 0)); @@ -962,8 +807,8 @@ void CMetricPopulationModelTest::testPrune() eventCounts[3][70][0].second = 4; // p4, bucket 70, c2 } - const std::string expectedPeople[] = { people[1], people[2], people[3] }; - const std::string expectedAttributes[] = { attributes[2], attributes[3] }; + const std::string expectedPeople[] = {people[1], people[2], people[3]}; + const std::string expectedAttributes[] = {attributes[2], attributes[3]}; SModelParams params(bucketLength); params.s_DecayRate = 0.01; @@ -986,16 +831,12 @@ void CMetricPopulationModelTest::testPrune() test::CRandomNumbers rng; TMessageVec messages; - for (std::size_t i = 0u; i < boost::size(people); ++i) - { + for (std::size_t i = 0u; i < boost::size(people); ++i) { core_t::TTime bucketStart = startTime; - for (std::size_t j = 0u; j < numberBuckets; ++j, bucketStart += bucketLength) - { - const TStrSizePrVec &attributeEventCounts = eventCounts[i][j]; - for (std::size_t k = 0u; k < attributeEventCounts.size(); ++k) - { - if (attributeEventCounts[k].second == 0) - { + for (std::size_t j = 0u; j < numberBuckets; ++j, bucketStart += bucketLength) { + const TStrSizePrVec& attributeEventCounts = eventCounts[i][j]; + for (std::size_t k = 0u; k < attributeEventCounts.size(); ++k) { + if (attributeEventCounts[k].second == 0) { continue; } @@ -1007,12 +848,8 @@ void CMetricPopulationModelTest::testPrune() core_t::TTime time = bucketStart; core_t::TTime dt = bucketLength / static_cast(n); - for (std::size_t l = 0u; l < n; ++l, time += dt) - { - messages.push_back(SMessage(time, - people[i], - attributeEventCounts[k].first, - TDouble1Vec(1, samples[l]))); + for (std::size_t l = 0u; l < n; ++l, time += dt) { + messages.push_back(SMessage(time, people[i], attributeEventCounts[k].first, TDouble1Vec(1, samples[l]))); } } } @@ -1021,24 +858,16 @@ void CMetricPopulationModelTest::testPrune() TMessageVec expectedMessages; expectedMessages.reserve(messages.size()); - for (std::size_t i = 0u; i < messages.size(); ++i) - { - if ( std::binary_search(boost::begin(expectedPeople), - boost::end(expectedPeople), - messages[i].s_Person) - && std::binary_search(boost::begin(expectedAttributes), - boost::end(expectedAttributes), - messages[i].s_Attribute)) - { + for (std::size_t i = 0u; i < messages.size(); ++i) { + if (std::binary_search(boost::begin(expectedPeople), boost::end(expectedPeople), messages[i].s_Person) && + std::binary_search(boost::begin(expectedAttributes), boost::end(expectedAttributes), messages[i].s_Attribute)) { expectedMessages.push_back(messages[i]); } } core_t::TTime bucketStart = startTime; - for (const auto &message : messages) - { - if (message.s_Time >= bucketStart + bucketLength) - { + for (const auto& message : messages) { + if (message.s_Time >= bucketStart + bucketLength) { model->sample(bucketStart, bucketStart + bucketLength, m_ResourceMonitor); bucketStart += bucketLength; } @@ -1051,10 +880,8 @@ void CMetricPopulationModelTest::testPrune() CPPUNIT_ASSERT_EQUAL(maxDimensionBeforePrune, maxDimensionAfterPrune); bucketStart = startTime; - for (std::size_t i = 0u; i < expectedMessages.size(); ++i) - { - if (expectedMessages[i].s_Time >= bucketStart + bucketLength) - { + for (std::size_t i = 0u; i < expectedMessages.size(); ++i) { + if (expectedMessages[i].s_Time >= bucketStart + bucketLength) { expectedModel->sample(bucketStart, bucketStart + bucketLength, m_ResourceMonitor); bucketStart += bucketLength; } @@ -1070,15 +897,11 @@ void CMetricPopulationModelTest::testPrune() bucketStart = gatherer->currentBucketStartTime() + bucketLength; - SMessage newMessages[] = - { - SMessage(bucketStart + 10, "p1", "c2", TDouble1Vec(1, 20.0)), - SMessage(bucketStart + 200, "p5", "c6", TDouble1Vec(1, 10.0)), - SMessage(bucketStart + 2100, "p5", "c6", TDouble1Vec(1, 15.0)) - }; + SMessage newMessages[] = {SMessage(bucketStart + 10, "p1", "c2", TDouble1Vec(1, 20.0)), + SMessage(bucketStart + 200, "p5", "c6", TDouble1Vec(1, 10.0)), + SMessage(bucketStart + 2100, "p5", "c6", TDouble1Vec(1, 15.0))}; - for (std::size_t i = 0u; i < boost::size(newMessages); ++i) - { + for (std::size_t i = 0u; i < boost::size(newMessages); ++i) { addArrival(newMessages[i], gatherer, m_ResourceMonitor); addArrival(newMessages[i], expectedGatherer, m_ResourceMonitor); } @@ -1097,34 +920,26 @@ void CMetricPopulationModelTest::testPrune() CPPUNIT_ASSERT_EQUAL(numberOfPeopleBeforePrune, clonedModelHolder->dataGatherer().numberActivePeople()); } -void CMetricPopulationModelTest::testKey() -{ +void CMetricPopulationModelTest::testKey() { LOG_DEBUG("*** testKey ***"); - function_t::EFunction countFunctions[] = - { - function_t::E_PopulationMetric, - function_t::E_PopulationMetricMean, - function_t::E_PopulationMetricMin, - function_t::E_PopulationMetricMax, - function_t::E_PopulationMetricSum - }; - bool useNull[] = { true, false }; - std::string byField[] = { "", "by" }; - std::string partitionField[] = { "", "partition" }; + function_t::EFunction countFunctions[] = {function_t::E_PopulationMetric, + function_t::E_PopulationMetricMean, + function_t::E_PopulationMetricMin, + function_t::E_PopulationMetricMax, + function_t::E_PopulationMetricSum}; + bool useNull[] = {true, false}; + std::string byField[] = {"", "by"}; + std::string partitionField[] = {"", "partition"}; { CAnomalyDetectorModelConfig config = CAnomalyDetectorModelConfig::defaultConfig(); int identifier = 0; - for (std::size_t i = 0u; i < boost::size(countFunctions); ++i) - { - for (std::size_t j = 0u; j < boost::size(useNull); ++j) - { - for (std::size_t k = 0u; k < boost::size(byField); ++k) - { - for (std::size_t l = 0u; l < boost::size(partitionField); ++l) - { + for (std::size_t i = 0u; i < boost::size(countFunctions); ++i) { + for (std::size_t j = 0u; j < boost::size(useNull); ++j) { + for (std::size_t k = 0u; k < boost::size(byField); ++k) { + for (std::size_t l = 0u; l < boost::size(partitionField); ++l) { CSearchKey key(++identifier, countFunctions[i], useNull[j], @@ -1146,35 +961,25 @@ void CMetricPopulationModelTest::testKey() } } -void CMetricPopulationModelTest::testFrequency() -{ +void CMetricPopulationModelTest::testFrequency() { LOG_DEBUG("*** CMetricPopulationModelTest::testFrequency ***"); // Test we correctly compute frequencies for people and attributes. const core_t::TTime bucketLength = 600; - const std::string attributes[] = { "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", "a9", "a10" }; - const std::string people[] = { "p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9", "p10" }; - std::size_t period[] = { 1u, 1u, 10u, 3u, 4u, 5u, 2u, 1u, 3u, 7u }; + const std::string attributes[] = {"a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", "a9", "a10"}; + const std::string people[] = {"p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9", "p10"}; + std::size_t period[] = {1u, 1u, 10u, 3u, 4u, 5u, 2u, 1u, 3u, 7u}; core_t::TTime startTime = 0; TMessageVec messages; std::size_t bucket = 0u; - for (core_t::TTime bucketStart = startTime; - bucketStart < 100 * bucketLength; - bucketStart += bucketLength, ++bucket) - { - for (std::size_t i = 0u; i < boost::size(people); ++i) - { - if (bucket % period[i] == 0) - { - for (std::size_t j = 0u; j < i+1; ++j) - { - messages.push_back(SMessage(bucketStart + bucketLength / 2, - people[i], - attributes[j], - TDouble1Vec(1, 0.0))); + for (core_t::TTime bucketStart = startTime; bucketStart < 100 * bucketLength; bucketStart += bucketLength, ++bucket) { + for (std::size_t i = 0u; i < boost::size(people); ++i) { + if (bucket % period[i] == 0) { + for (std::size_t j = 0u; j < i + 1; ++j) { + messages.push_back(SMessage(bucketStart + bucketLength / 2, people[i], attributes[j], TDouble1Vec(1, 0.0))); } } } @@ -1191,20 +996,17 @@ void CMetricPopulationModelTest::testFrequency() factory.features(features); CModelFactory::SGathererInitializationData gathererInitData(startTime); CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(gathererInitData)); - const model::CDataGatherer &populationGatherer(dynamic_cast(*gatherer)); + const model::CDataGatherer& populationGatherer(dynamic_cast(*gatherer)); CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr model(factory.makeModel(modelInitData)); - CMetricPopulationModel *populationModel = - dynamic_cast(model.get()); + CMetricPopulationModel* populationModel = dynamic_cast(model.get()); CPPUNIT_ASSERT(populationModel != 0); core_t::TTime time = startTime; - for (const auto &message : messages) - { - if (message.s_Time >= time + bucketLength) - { + for (const auto& message : messages) { + if (message.s_Time >= time + bucketLength) { populationModel->sample(time, time + bucketLength, m_ResourceMonitor); time += bucketLength; } @@ -1213,38 +1015,32 @@ void CMetricPopulationModelTest::testFrequency() { TMeanAccumulator meanError; - for (std::size_t i = 0u; i < boost::size(people); ++i) - { + for (std::size_t i = 0u; i < boost::size(people); ++i) { LOG_DEBUG("*** person = " << people[i] << " ***"); std::size_t pid; CPPUNIT_ASSERT(gatherer->personId(people[i], pid)); LOG_DEBUG("frequency = " << populationModel->personFrequency(pid)); LOG_DEBUG("expected frequency = " << 1.0 / static_cast(period[i])); - CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0 / static_cast(period[i]), - populationModel->personFrequency(pid), - 0.1 / static_cast(period[i])); - meanError.add(std::fabs( populationModel->personFrequency(pid) - - 1.0 / static_cast(period[i]))); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + 1.0 / static_cast(period[i]), populationModel->personFrequency(pid), 0.1 / static_cast(period[i])); + meanError.add(std::fabs(populationModel->personFrequency(pid) - 1.0 / static_cast(period[i]))); } LOG_DEBUG("error = " << maths::CBasicStatistics::mean(meanError)); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError) < 0.002); } { - for (std::size_t i = 0u; i < boost::size(attributes); ++i) - { + for (std::size_t i = 0u; i < boost::size(attributes); ++i) { LOG_DEBUG("*** attributes = " << attributes[i] << " ***"); std::size_t cid; CPPUNIT_ASSERT(populationGatherer.attributeId(attributes[i], cid)); LOG_DEBUG("frequency = " << populationModel->attributeFrequency(cid)); LOG_DEBUG("expected frequency = " << (10.0 - static_cast(i)) / 10.0); - CPPUNIT_ASSERT_EQUAL((10.0 - static_cast(i)) / 10.0, - populationModel->attributeFrequency(cid)); + CPPUNIT_ASSERT_EQUAL((10.0 - static_cast(i)) / 10.0, populationModel->attributeFrequency(cid)); } } } -void CMetricPopulationModelTest::testSampleRateWeight() -{ +void CMetricPopulationModelTest::testSampleRateWeight() { LOG_DEBUG("*** CMetricPopulationModelTest::testSampleRateWeight ***"); // Test that we correctly compensate for heavy hitters. @@ -1256,11 +1052,11 @@ void CMetricPopulationModelTest::testSampleRateWeight() // one message per attribute per 10 buckets. const core_t::TTime bucketLength = 600; - const std::string attributes[] = { "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", "a9", "a10" }; - const std::string people[] = { "p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9", "p10", - "p11", "p12", "p13", "p14", "p15", "p16", "p17", "p18", "p19", "p20" }; - std::size_t heavyHitters[] = { 0u, 4u }; - std::size_t normal[] = { 1u, 2u, 3u, 5u, 6u, 7u, 8u, 9u, 10u, 11u, 12u, 13u, 14u, 15u, 16u, 17u, 18u, 19u }; + const std::string attributes[] = {"a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", "a9", "a10"}; + const std::string people[] = {"p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9", "p10", + "p11", "p12", "p13", "p14", "p15", "p16", "p17", "p18", "p19", "p20"}; + std::size_t heavyHitters[] = {0u, 4u}; + std::size_t normal[] = {1u, 2u, 3u, 5u, 6u, 7u, 8u, 9u, 10u, 11u, 12u, 13u, 14u, 15u, 16u, 17u, 18u, 19u}; std::size_t messagesPerBucket = boost::size(heavyHitters) * boost::size(attributes) + boost::size(normal); @@ -1269,38 +1065,24 @@ void CMetricPopulationModelTest::testSampleRateWeight() core_t::TTime startTime = 0; TMessageVec messages; - for (core_t::TTime bucketStart = startTime; - bucketStart < 100 * bucketLength; - bucketStart += bucketLength) - { + for (core_t::TTime bucketStart = startTime; bucketStart < 100 * bucketLength; bucketStart += bucketLength) { TSizeVec times; - rng.generateUniformSamples(static_cast(bucketStart), - static_cast(bucketStart + bucketLength), - messagesPerBucket, - times); + rng.generateUniformSamples( + static_cast(bucketStart), static_cast(bucketStart + bucketLength), messagesPerBucket, times); std::size_t m = 0u; - for (std::size_t i = 0u; i < boost::size(attributes); ++i) - { - for (std::size_t j = 0u; j < boost::size(heavyHitters); ++j) - { - messages.push_back(SMessage(static_cast(times[m++]), - people[heavyHitters[j]], - attributes[i], - TDouble1Vec(1, 0.0))); + for (std::size_t i = 0u; i < boost::size(attributes); ++i) { + for (std::size_t j = 0u; j < boost::size(heavyHitters); ++j) { + messages.push_back( + SMessage(static_cast(times[m++]), people[heavyHitters[j]], attributes[i], TDouble1Vec(1, 0.0))); } } TSizeVec attributeIndexes; - rng.generateUniformSamples(0, boost::size(attributes), - boost::size(normal), - attributeIndexes); - for (std::size_t i = 0u; i < boost::size(normal); ++i) - { - messages.push_back(SMessage(static_cast(times[m++]), - people[normal[i]], - attributes[attributeIndexes[i]], - TDouble1Vec(1, 0.0))); + rng.generateUniformSamples(0, boost::size(attributes), boost::size(normal), attributeIndexes); + for (std::size_t i = 0u; i < boost::size(normal); ++i) { + messages.push_back( + SMessage(static_cast(times[m++]), people[normal[i]], attributes[attributeIndexes[i]], TDouble1Vec(1, 0.0))); } } @@ -1318,15 +1100,12 @@ void CMetricPopulationModelTest::testSampleRateWeight() CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr model(factory.makeModel(modelInitData)); - CMetricPopulationModel *populationModel = - dynamic_cast(model.get()); + CMetricPopulationModel* populationModel = dynamic_cast(model.get()); CPPUNIT_ASSERT(populationModel != 0); core_t::TTime time = startTime; - for (const auto &message : messages) - { - if (message.s_Time >= time + bucketLength) - { + for (const auto& message : messages) { + if (message.s_Time >= time + bucketLength) { populationModel->sample(time, time + bucketLength, m_ResourceMonitor); time += bucketLength; } @@ -1340,45 +1119,35 @@ void CMetricPopulationModelTest::testSampleRateWeight() // + ("# heavy hitters")) // / "# people" - double expectedRateWeight = ( static_cast(boost::size(normal)) - / static_cast(boost::size(attributes)) - + static_cast(boost::size(heavyHitters))) - / static_cast(boost::size(people)); + double expectedRateWeight = (static_cast(boost::size(normal)) / static_cast(boost::size(attributes)) + + static_cast(boost::size(heavyHitters))) / + static_cast(boost::size(people)); LOG_DEBUG("expectedRateWeight = " << expectedRateWeight); - for (std::size_t i = 0u; i < boost::size(heavyHitters); ++i) - { + for (std::size_t i = 0u; i < boost::size(heavyHitters); ++i) { LOG_DEBUG("*** person = " << people[heavyHitters[i]] << " ***"); std::size_t pid; CPPUNIT_ASSERT(gatherer->personId(people[heavyHitters[i]], pid)); - for (std::size_t cid = 0u; cid < boost::size(attributes); ++cid) - { + for (std::size_t cid = 0u; cid < boost::size(attributes); ++cid) { double sampleRateWeight = populationModel->sampleRateWeight(pid, cid); - LOG_DEBUG("attribute = " << populationModel->attributeName(cid) - << ", sampleRateWeight = " << sampleRateWeight); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedRateWeight, - sampleRateWeight, - 0.15 * expectedRateWeight); + LOG_DEBUG("attribute = " << populationModel->attributeName(cid) << ", sampleRateWeight = " << sampleRateWeight); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedRateWeight, sampleRateWeight, 0.15 * expectedRateWeight); } } - for (std::size_t i = 0u; i < boost::size(normal); ++i) - { + for (std::size_t i = 0u; i < boost::size(normal); ++i) { LOG_DEBUG("*** person = " << people[normal[i]] << " ***"); std::size_t pid; CPPUNIT_ASSERT(gatherer->personId(people[normal[i]], pid)); - for (std::size_t cid = 0u; cid < boost::size(attributes); ++cid) - { + for (std::size_t cid = 0u; cid < boost::size(attributes); ++cid) { double sampleRateWeight = populationModel->sampleRateWeight(pid, cid); - LOG_DEBUG("attribute = " << populationModel->attributeName(cid) - << ", sampleRateWeight = " << sampleRateWeight); + LOG_DEBUG("attribute = " << populationModel->attributeName(cid) << ", sampleRateWeight = " << sampleRateWeight); CPPUNIT_ASSERT_EQUAL(1.0, sampleRateWeight); } } } -void CMetricPopulationModelTest::testPeriodicity() -{ +void CMetricPopulationModelTest::testPeriodicity() { LOG_DEBUG("*** testPeriodicity ***"); // Create a daily periodic population and check that the @@ -1390,15 +1159,10 @@ void CMetricPopulationModelTest::testPeriodicity() static const core_t::TTime DAY = 86400; const core_t::TTime bucketLength = 3600; - double baseline[] = - { - 1, 1, 2, 2, 3, 5, 6, 6, - 20, 21, 4, 3, 4, 4, 8, 25, - 7, 6, 5, 1, 1, 4, 1, 1 - }; - const std::string attributes[] = { "a1", "a2" }; - double scales[] = { 2.0, 3.0 }; - const std::string people[] = { "p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9", "p10" }; + double baseline[] = {1, 1, 2, 2, 3, 5, 6, 6, 20, 21, 4, 3, 4, 4, 8, 25, 7, 6, 5, 1, 1, 4, 1, 1}; + const std::string attributes[] = {"a1", "a2"}; + double scales[] = {2.0, 3.0}; + const std::string people[] = {"p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9", "p10"}; test::CRandomNumbers rng; @@ -1406,26 +1170,14 @@ void CMetricPopulationModelTest::testPeriodicity() core_t::TTime endTime = 604800; TMessageVec messages; - for (core_t::TTime time = startTime; - time < endTime; - time += bucketLength) - { - for (std::size_t i = 0u; i < boost::size(attributes); ++i) - { + for (core_t::TTime time = startTime; time < endTime; time += bucketLength) { + for (std::size_t i = 0u; i < boost::size(attributes); ++i) { TDoubleVec values; - rng.generateNormalSamples(baseline[(time % DAY) / HOUR], - scales[i] * scales[i], - boost::size(people), - values); - - for (std::size_t j = 0u; j < values.size(); ++j) - { - for (unsigned int t = 0; t < 4; ++t) - { - messages.push_back(SMessage(time + (t * bucketLength) / 4, - people[j], - attributes[i], - TDouble1Vec(1, values[j]))); + rng.generateNormalSamples(baseline[(time % DAY) / HOUR], scales[i] * scales[i], boost::size(people), values); + + for (std::size_t j = 0u; j < values.size(); ++j) { + for (unsigned int t = 0; t < 4; ++t) { + messages.push_back(SMessage(time + (t * bucketLength) / 4, people[j], attributes[i], TDouble1Vec(1, values[j]))); } } } @@ -1444,46 +1196,35 @@ void CMetricPopulationModelTest::testPeriodicity() CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr model(factory.makeModel(modelInitData)); - CMetricPopulationModel *populationModel = - dynamic_cast(model.get()); + CMetricPopulationModel* populationModel = dynamic_cast(model.get()); CPPUNIT_ASSERT(populationModel != 0); TStrDoubleMap personProbabilitiesWithoutPeriodicity; TStrDoubleMap personProbabilitiesWithPeriodicity; core_t::TTime time = startTime; - for (std::size_t i = 0u; i < messages.size(); ++i) - { - if (messages[i].s_Time >= time + bucketLength) - { + for (std::size_t i = 0u; i < messages.size(); ++i) { + if (messages[i].s_Time >= time + bucketLength) { populationModel->sample(time, time + bucketLength, m_ResourceMonitor); - for (std::size_t j = 0u; j < boost::size(people); ++j) - { + for (std::size_t j = 0u; j < boost::size(people); ++j) { std::size_t pid; - if (!gatherer->personId(people[j], pid)) - { + if (!gatherer->personId(people[j], pid)) { continue; } CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); SAnnotatedProbability annotatedProbability; - if (populationModel->computeProbability(pid, time, time + bucketLength, - partitioningFields, 1, annotatedProbability) == false) - { + if (populationModel->computeProbability(pid, time, time + bucketLength, partitioningFields, 1, annotatedProbability) == + false) { continue; } - if (time < startTime + 3 * DAY) - { - double &minimumProbability = - personProbabilitiesWithoutPeriodicity.insert({people[j], 1.0}).first->second; + if (time < startTime + 3 * DAY) { + double& minimumProbability = personProbabilitiesWithoutPeriodicity.insert({people[j], 1.0}).first->second; minimumProbability = std::min(minimumProbability, annotatedProbability.s_Probability); - } - else if (time > startTime + 5 * DAY) - { - double &minimumProbability = - personProbabilitiesWithPeriodicity.insert({people[j], 1.0}).first->second; + } else if (time > startTime + 5 * DAY) { + double& minimumProbability = personProbabilitiesWithPeriodicity.insert({people[j], 1.0}).first->second; minimumProbability = std::min(minimumProbability, annotatedProbability.s_Probability); } } @@ -1493,18 +1234,17 @@ void CMetricPopulationModelTest::testPeriodicity() addArrival(messages[i], gatherer, m_ResourceMonitor); } - double totalw = 0.0; + double totalw = 0.0; double totalwo = 0.0; - for (std::size_t i = 0u; i < boost::size(people); ++i) - { + for (std::size_t i = 0u; i < boost::size(people); ++i) { auto wo = personProbabilitiesWithoutPeriodicity.find(people[i]); - auto w = personProbabilitiesWithPeriodicity.find(people[i]); + auto w = personProbabilitiesWithPeriodicity.find(people[i]); LOG_DEBUG("person = " << people[i]); LOG_DEBUG("minimum probability with periodicity = " << w->second); LOG_DEBUG("minimum probability without periodicity = " << wo->second); totalwo += wo->second; - totalw += w->second; + totalw += w->second; } LOG_DEBUG("total minimum probability with periodicity = " << totalw); @@ -1512,8 +1252,7 @@ void CMetricPopulationModelTest::testPeriodicity() CPPUNIT_ASSERT(totalw > 3.0 * totalwo); } -void CMetricPopulationModelTest::testPersistence() -{ +void CMetricPopulationModelTest::testPersistence() { core_t::TTime startTime = 1367280000; const core_t::TTime bucketLength = 3600; @@ -1534,14 +1273,11 @@ void CMetricPopulationModelTest::testPersistence() CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr origModel(factory.makeModel(modelInitData)); - CMetricPopulationModel *populationModel = - dynamic_cast(origModel.get()); + CMetricPopulationModel* populationModel = dynamic_cast(origModel.get()); CPPUNIT_ASSERT(populationModel != 0); - for (std::size_t i = 0u; i < messages.size(); ++i) - { - if (messages[i].s_Time >= startTime + bucketLength) - { + for (std::size_t i = 0u; i < messages.size(); ++i) { + if (messages[i].s_Time >= startTime + bucketLength) { origModel->sample(startTime, startTime + bucketLength, m_ResourceMonitor); startTime += bucketLength; } @@ -1564,8 +1300,7 @@ void CMetricPopulationModelTest::testPersistence() CAnomalyDetectorModel::TModelPtr restoredModel(factory.makeModel(modelInitData, traverser)); - populationModel = - dynamic_cast(restoredModel.get()); + populationModel = dynamic_cast(restoredModel.get()); CPPUNIT_ASSERT(populationModel != 0); // The XML representation of the new data gatherer should be the same as the @@ -1583,8 +1318,7 @@ void CMetricPopulationModelTest::testPersistence() CPPUNIT_ASSERT_EQUAL(origXml, newXml); } -void CMetricPopulationModelTest::testIgnoreSamplingGivenDetectionRules() -{ +void CMetricPopulationModelTest::testIgnoreSamplingGivenDetectionRules() { LOG_DEBUG("*** testIgnoreSamplingGivenDetectionRules ***"); // Create 2 models, one of which has a skip sampling rule. @@ -1612,7 +1346,6 @@ void CMetricPopulationModelTest::testIgnoreSamplingGivenDetectionRules() CDataGatherer::TFeatureVec features; features.push_back(model_t::E_PopulationMeanByPersonAndAttribute); - SModelParams paramsNoRules(bucketLength); CMetricPopulationModelFactory factoryNoSkip(paramsNoRules); factoryNoSkip.features(features); @@ -1621,7 +1354,6 @@ void CMetricPopulationModelTest::testIgnoreSamplingGivenDetectionRules() CModelFactory::SModelInitializationData modelNoSkipInitData(gathererNoSkip); CAnomalyDetectorModel::TModelPtr modelNoSkip(factoryNoSkip.makeModel(modelNoSkipInitData)); - SModelParams paramsWithRules(bucketLength); SModelParams::TDetectionRuleVec rules{rule}; paramsWithRules.s_DetectionRules = SModelParams::TDetectionRuleVecCRef(rules); @@ -1639,10 +1371,8 @@ void CMetricPopulationModelTest::testIgnoreSamplingGivenDetectionRules() messages.push_back(SMessage(startTime + 10, "p2", "c2", TDouble1Vec(1, 22.0))); std::vector gatherers{gathererNoSkip, gathererWithSkip}; - for (auto &gatherer : gatherers) - { - for (auto &message : messages) - { + for (auto& gatherer : gatherers) { + for (auto& message : messages) { addArrival(message, gatherer, m_ResourceMonitor); } } @@ -1657,10 +1387,8 @@ void CMetricPopulationModelTest::testIgnoreSamplingGivenDetectionRules() messages.push_back(SMessage(startTime + 10, "p1", "c2", TDouble1Vec(1, 21.0))); messages.push_back(SMessage(startTime + 10, "p2", "c1", TDouble1Vec(1, 21.0))); messages.push_back(SMessage(startTime + 10, "p2", "c2", TDouble1Vec(1, 21.0))); - for (auto &gatherer : gatherers) - { - for (auto &message : messages) - { + for (auto& gatherer : gatherers) { + for (auto& message : messages) { addArrival(message, gatherer, m_ResourceMonitor); } } @@ -1689,60 +1417,49 @@ void CMetricPopulationModelTest::testIgnoreSamplingGivenDetectionRules() // TODO These checks fail see elastic/machine-learning-cpp/issues/485 // Check the last value times of all the underlying models are the same -// const maths::CUnivariateTimeSeriesModel *timeSeriesModel = -// dynamic_cast(modelWithSkipView->model(model_t::E_PopulationMeanByPersonAndAttribute, 1)); -// CPPUNIT_ASSERT(timeSeriesModel != 0); - -// core_t::TTime time = timeSeriesModel->trend().lastValueTime(); -// CPPUNIT_ASSERT_EQUAL(model_t::sampleTime(model_t::E_PopulationMeanByPersonAndAttribute, startTime, bucketLength), time); - -// // The last times of the underlying time series models should all be the same -// timeSeriesModel = dynamic_cast(modelNoSkipView->model(model_t::E_PopulationMeanByPersonAndAttribute, 1)); -// CPPUNIT_ASSERT_EQUAL(time, timeSeriesModel->trend().lastValueTime()); - -// timeSeriesModel = dynamic_cast(modelWithSkipView->model(model_t::E_PopulationMeanByPersonAndAttribute, 0)); -// CPPUNIT_ASSERT_EQUAL(time, timeSeriesModel->trend().lastValueTime()); -// timeSeriesModel = dynamic_cast(modelWithSkipView->model(model_t::E_PopulationMeanByPersonAndAttribute, 1)); -// CPPUNIT_ASSERT_EQUAL(time, timeSeriesModel->trend().lastValueTime()); -// timeSeriesModel = dynamic_cast(modelWithSkipView->model(model_t::E_PopulationMeanByPersonAndAttribute, 2)); -// CPPUNIT_ASSERT_EQUAL(time, timeSeriesModel->trend().lastValueTime()); + // const maths::CUnivariateTimeSeriesModel *timeSeriesModel = + // dynamic_cast(modelWithSkipView->model(model_t::E_PopulationMeanByPersonAndAttribute, 1)); + // CPPUNIT_ASSERT(timeSeriesModel != 0); + + // core_t::TTime time = timeSeriesModel->trend().lastValueTime(); + // CPPUNIT_ASSERT_EQUAL(model_t::sampleTime(model_t::E_PopulationMeanByPersonAndAttribute, startTime, bucketLength), time); + + // // The last times of the underlying time series models should all be the same + // timeSeriesModel = dynamic_cast(modelNoSkipView->model(model_t::E_PopulationMeanByPersonAndAttribute, 1)); + // CPPUNIT_ASSERT_EQUAL(time, timeSeriesModel->trend().lastValueTime()); + + // timeSeriesModel = dynamic_cast(modelWithSkipView->model(model_t::E_PopulationMeanByPersonAndAttribute, 0)); + // CPPUNIT_ASSERT_EQUAL(time, timeSeriesModel->trend().lastValueTime()); + // timeSeriesModel = dynamic_cast(modelWithSkipView->model(model_t::E_PopulationMeanByPersonAndAttribute, 1)); + // CPPUNIT_ASSERT_EQUAL(time, timeSeriesModel->trend().lastValueTime()); + // timeSeriesModel = dynamic_cast(modelWithSkipView->model(model_t::E_PopulationMeanByPersonAndAttribute, 2)); + // CPPUNIT_ASSERT_EQUAL(time, timeSeriesModel->trend().lastValueTime()); } - -CppUnit::Test *CMetricPopulationModelTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CMetricPopulationModelTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricPopulationModelTest::testBasicAccessors", - &CMetricPopulationModelTest::testBasicAccessors) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricPopulationModelTest::testMinMaxAndMean", - &CMetricPopulationModelTest::testMinMaxAndMean) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricPopulationModelTest::testComputeProbability", - &CMetricPopulationModelTest::testComputeProbability) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricPopulationModelTest::testPrune", - &CMetricPopulationModelTest::testPrune) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricPopulationModelTest::testKey", - &CMetricPopulationModelTest::testKey) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricPopulationModelTest::testFrequency", - &CMetricPopulationModelTest::testFrequency) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricPopulationModelTest::testSampleRateWeight", - &CMetricPopulationModelTest::testSampleRateWeight) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricPopulationModelTest::testPeriodicity", - &CMetricPopulationModelTest::testPeriodicity) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricPopulationModelTest::testPersistence", - &CMetricPopulationModelTest::testPersistence) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CMetricPopulationModelTest::testIgnoreSamplingGivenDetectionRules", - &CMetricPopulationModelTest::testIgnoreSamplingGivenDetectionRules) ); +CppUnit::Test* CMetricPopulationModelTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CMetricPopulationModelTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CMetricPopulationModelTest::testBasicAccessors", + &CMetricPopulationModelTest::testBasicAccessors)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMetricPopulationModelTest::testMinMaxAndMean", + &CMetricPopulationModelTest::testMinMaxAndMean)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMetricPopulationModelTest::testComputeProbability", + &CMetricPopulationModelTest::testComputeProbability)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMetricPopulationModelTest::testPrune", + &CMetricPopulationModelTest::testPrune)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CMetricPopulationModelTest::testKey", &CMetricPopulationModelTest::testKey)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMetricPopulationModelTest::testFrequency", + &CMetricPopulationModelTest::testFrequency)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMetricPopulationModelTest::testSampleRateWeight", + &CMetricPopulationModelTest::testSampleRateWeight)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMetricPopulationModelTest::testPeriodicity", + &CMetricPopulationModelTest::testPeriodicity)); + suiteOfTests->addTest(new CppUnit::TestCaller("CMetricPopulationModelTest::testPersistence", + &CMetricPopulationModelTest::testPersistence)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CMetricPopulationModelTest::testIgnoreSamplingGivenDetectionRules", + &CMetricPopulationModelTest::testIgnoreSamplingGivenDetectionRules)); return suiteOfTests; } diff --git a/lib/model/unittest/CMetricPopulationModelTest.h b/lib/model/unittest/CMetricPopulationModelTest.h index 2ca9e8755d..0b43c86664 100644 --- a/lib/model/unittest/CMetricPopulationModelTest.h +++ b/lib/model/unittest/CMetricPopulationModelTest.h @@ -11,24 +11,24 @@ #include -class CMetricPopulationModelTest : public CppUnit::TestFixture -{ - public: - void testBasicAccessors(); - void testMinMaxAndMean(); - void testVarp(); - void testComputeProbability(); - void testPrune(); - void testKey(); - void testFrequency(); - void testSampleRateWeight(); - void testPeriodicity(); - void testPersistence(); - void testIgnoreSamplingGivenDetectionRules(); +class CMetricPopulationModelTest : public CppUnit::TestFixture { +public: + void testBasicAccessors(); + void testMinMaxAndMean(); + void testVarp(); + void testComputeProbability(); + void testPrune(); + void testKey(); + void testFrequency(); + void testSampleRateWeight(); + void testPeriodicity(); + void testPersistence(); + void testIgnoreSamplingGivenDetectionRules(); - static CppUnit::Test *suite(); - private: - ml::model::CResourceMonitor m_ResourceMonitor; + static CppUnit::Test* suite(); + +private: + ml::model::CResourceMonitor m_ResourceMonitor; }; #endif // INCLUDED_CMetricPopulationModelTest_h diff --git a/lib/model/unittest/CModelDetailsViewTest.cc b/lib/model/unittest/CModelDetailsViewTest.cc index 0926226e34..a0a08c09c2 100644 --- a/lib/model/unittest/CModelDetailsViewTest.cc +++ b/lib/model/unittest/CModelDetailsViewTest.cc @@ -25,15 +25,13 @@ using namespace ml; -namespace -{ +namespace { const std::string EMPTY_STRING; } // unnamed -void CModelDetailsViewTest::testModelPlot() -{ +void CModelDetailsViewTest::testModelPlot() { LOG_DEBUG("*** CModelDetailsViewTest::testModelPlot ***"); using TDoubleVec = std::vector; @@ -48,36 +46,43 @@ void CModelDetailsViewTest::testModelPlot() model::CAnomalyDetectorModel::TDataGathererPtr gatherer; TMockModelPtr model; - auto setupTest = [&]() - { - gatherer.reset(new model::CDataGatherer{model_t::analysisCategory(features[0]), - model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, "p", EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, key, features, 0, 0}); - std::string person11{"p11"}; - std::string person12{"p12"}; - std::string person21{"p21"}; - std::string person22{"p22"}; - bool addedPerson{false}; - gatherer->addPerson(person11, m_ResourceMonitor, addedPerson); - gatherer->addPerson(person12, m_ResourceMonitor, addedPerson); - gatherer->addPerson(person21, m_ResourceMonitor, addedPerson); - gatherer->addPerson(person22, m_ResourceMonitor, addedPerson); - - model.reset(new model::CMockModel{params, gatherer, {/*we don't care about influence*/}}); - - maths::CTimeSeriesDecomposition trend; - maths::CNormalMeanPrecConjugate prior{ - maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData)}; - maths::CModelParams timeSeriesModelParams{bucketLength, 1.0, 0.001, 0.2, - 6 * core::constants::HOUR, - 24 * core::constants::HOUR}; - maths::CUnivariateTimeSeriesModel timeSeriesModel{timeSeriesModelParams, 0, trend, prior}; - model->mockTimeSeriesModels({model::CMockModel::TMathsModelPtr(timeSeriesModel.clone(0)), - model::CMockModel::TMathsModelPtr(timeSeriesModel.clone(1)), - model::CMockModel::TMathsModelPtr(timeSeriesModel.clone(2)), - model::CMockModel::TMathsModelPtr(timeSeriesModel.clone(3))}); - }; + auto setupTest = [&]() { + gatherer.reset(new model::CDataGatherer{model_t::analysisCategory(features[0]), + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + "p", + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + key, + features, + 0, + 0}); + std::string person11{"p11"}; + std::string person12{"p12"}; + std::string person21{"p21"}; + std::string person22{"p22"}; + bool addedPerson{false}; + gatherer->addPerson(person11, m_ResourceMonitor, addedPerson); + gatherer->addPerson(person12, m_ResourceMonitor, addedPerson); + gatherer->addPerson(person21, m_ResourceMonitor, addedPerson); + gatherer->addPerson(person22, m_ResourceMonitor, addedPerson); + + model.reset(new model::CMockModel{params, gatherer, {/*we don't care about influence*/}}); + + maths::CTimeSeriesDecomposition trend; + maths::CNormalMeanPrecConjugate prior{maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData)}; + maths::CModelParams timeSeriesModelParams{bucketLength, 1.0, 0.001, 0.2, 6 * core::constants::HOUR, 24 * core::constants::HOUR}; + maths::CUnivariateTimeSeriesModel timeSeriesModel{timeSeriesModelParams, 0, trend, prior}; + model->mockTimeSeriesModels({model::CMockModel::TMathsModelPtr(timeSeriesModel.clone(0)), + model::CMockModel::TMathsModelPtr(timeSeriesModel.clone(1)), + model::CMockModel::TMathsModelPtr(timeSeriesModel.clone(2)), + model::CMockModel::TMathsModelPtr(timeSeriesModel.clone(3))}); + }; LOG_DEBUG("Individual sum"); { @@ -87,8 +92,7 @@ void CModelDetailsViewTest::testModelPlot() TDoubleVec values{2.0, 3.0, 0.0, 0.0}; { std::size_t pid{0}; - for (auto value : values) - { + for (auto value : values) { model->mockAddBucketValue(model_t::E_IndividualSumByBucketAndPerson, pid++, 0, 0, {value}); } } @@ -96,16 +100,13 @@ void CModelDetailsViewTest::testModelPlot() model::CModelPlotData plotData; model->details()->modelPlot(0, 90.0, {}, plotData); CPPUNIT_ASSERT(plotData.begin() != plotData.end()); - for (const auto &featureByFieldData : plotData) - { + for (const auto& featureByFieldData : plotData) { CPPUNIT_ASSERT_EQUAL(values.size(), featureByFieldData.second.size()); - for (const auto &byFieldData : featureByFieldData.second) - { + for (const auto& byFieldData : featureByFieldData.second) { std::size_t pid; CPPUNIT_ASSERT(gatherer->personId(byFieldData.first, pid)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), byFieldData.second.s_ValuesPerOverField.size()); - for (const auto ¤tBucketValue : byFieldData.second.s_ValuesPerOverField) - { + for (const auto& currentBucketValue : byFieldData.second.s_ValuesPerOverField) { CPPUNIT_ASSERT_EQUAL(values[pid], currentBucketValue.second); } } @@ -120,8 +121,7 @@ void CModelDetailsViewTest::testModelPlot() TDoubleVec values{0.0, 1.0, 3.0}; { std::size_t pid{0}; - for (auto value : values) - { + for (auto value : values) { model->mockAddBucketValue(model_t::E_IndividualCountByBucketAndPerson, pid++, 0, 0, {value}); } } @@ -129,16 +129,13 @@ void CModelDetailsViewTest::testModelPlot() model::CModelPlotData plotData; model->details()->modelPlot(0, 90.0, {}, plotData); CPPUNIT_ASSERT(plotData.begin() != plotData.end()); - for (const auto &featureByFieldData : plotData) - { + for (const auto& featureByFieldData : plotData) { CPPUNIT_ASSERT_EQUAL(values.size(), featureByFieldData.second.size()); - for (const auto &byFieldData : featureByFieldData.second) - { + for (const auto& byFieldData : featureByFieldData.second) { std::size_t pid; CPPUNIT_ASSERT(gatherer->personId(byFieldData.first, pid)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), byFieldData.second.s_ValuesPerOverField.size()); - for (const auto ¤tBucketValue : byFieldData.second.s_ValuesPerOverField) - { + for (const auto& currentBucketValue : byFieldData.second.s_ValuesPerOverField) { CPPUNIT_ASSERT_EQUAL(values[pid], currentBucketValue.second); } } @@ -146,13 +143,11 @@ void CModelDetailsViewTest::testModelPlot() } } -CppUnit::Test *CModelDetailsViewTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CModelDetailsViewTest"); +CppUnit::Test* CModelDetailsViewTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CModelDetailsViewTest"); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CModelDetailsViewTest::testModelPlot", - &CModelDetailsViewTest::testModelPlot) ); + suiteOfTests->addTest( + new CppUnit::TestCaller("CModelDetailsViewTest::testModelPlot", &CModelDetailsViewTest::testModelPlot)); return suiteOfTests; } diff --git a/lib/model/unittest/CModelDetailsViewTest.h b/lib/model/unittest/CModelDetailsViewTest.h index dbe76a82a4..66179d3d12 100644 --- a/lib/model/unittest/CModelDetailsViewTest.h +++ b/lib/model/unittest/CModelDetailsViewTest.h @@ -11,15 +11,14 @@ #include -class CModelDetailsViewTest : public CppUnit::TestFixture -{ - public: - void testModelPlot(); +class CModelDetailsViewTest : public CppUnit::TestFixture { +public: + void testModelPlot(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); - private: - ml::model::CResourceMonitor m_ResourceMonitor; +private: + ml::model::CResourceMonitor m_ResourceMonitor; }; #endif // INCLUDED_CModelDetailsViewTest_h diff --git a/lib/model/unittest/CModelMemoryTest.cc b/lib/model/unittest/CModelMemoryTest.cc index 9042716fba..8805d9f896 100644 --- a/lib/model/unittest/CModelMemoryTest.cc +++ b/lib/model/unittest/CModelMemoryTest.cc @@ -12,10 +12,10 @@ #include #include -#include #include -#include +#include #include +#include #include #include @@ -25,14 +25,11 @@ using namespace ml; using namespace model; -namespace -{ +namespace { using TDoubleVec = std::vector; -std::size_t addPerson(const std::string &p, - const CModelFactory::TDataGathererPtr &gatherer) -{ +std::size_t addPerson(const std::string& p, const CModelFactory::TDataGathererPtr& gatherer) { CDataGatherer::TStrCPtrVec person; person.push_back(&p); person.resize(gatherer->fieldsOfInterest().size(), 0); @@ -42,10 +39,7 @@ std::size_t addPerson(const std::string &p, return *result.personId(); } -void addArrival(CDataGatherer &gatherer, - core_t::TTime time, - const std::string &person) -{ +void addArrival(CDataGatherer& gatherer, core_t::TTime time, const std::string& person) { CDataGatherer::TStrCPtrVec fieldValues; fieldValues.push_back(&person); CEventData eventData; @@ -54,11 +48,7 @@ void addArrival(CDataGatherer &gatherer, gatherer.addArrival(fieldValues, eventData, resourceMonitor); } -void addArrival(CDataGatherer &gatherer, - core_t::TTime time, - const std::string &person, - double value) -{ +void addArrival(CDataGatherer& gatherer, core_t::TTime time, const std::string& person, double value) { CDataGatherer::TStrCPtrVec fieldValues; fieldValues.push_back(&person); std::string valueAsString(core::CStringUtils::typeToString(value)); @@ -70,11 +60,9 @@ void addArrival(CDataGatherer &gatherer, } const std::string EMPTY_STRING; - } -void CModelMemoryTest::testOnlineEventRateModel() -{ +void CModelMemoryTest::testOnlineEventRateModel() { // Tests to check that the memory usage of the model goes up // as data is fed in and that memoryUsage and debugMemory are // consistent. @@ -84,7 +72,7 @@ void CModelMemoryTest::testOnlineEventRateModel() SModelParams params(bucketLength); CEventRateModelFactory factory(params); - std::size_t bucketCounts[] = { 5, 6, 3, 5, 0, 7, 8, 5, 4, 3, 5, 5, 6 }; + std::size_t bucketCounts[] = {5, 6, 3, 5, 0, 7, 8, 5, 4, 3, 5, 5, 6}; CDataGatherer::TFeatureVec features; features.push_back(model_t::E_IndividualCountByBucketAndPerson); @@ -97,17 +85,15 @@ void CModelMemoryTest::testOnlineEventRateModel() CAnomalyDetectorModel::TModelPtr modelPtr(factory.makeModel(initData)); CPPUNIT_ASSERT(modelPtr); CPPUNIT_ASSERT_EQUAL(model_t::E_EventRateOnline, modelPtr->category()); - CEventRateModel &model = static_cast(*modelPtr.get()); + CEventRateModel& model = static_cast(*modelPtr.get()); std::size_t startMemoryUsage = model.memoryUsage(); CResourceMonitor resourceMonitor; LOG_DEBUG("Memory used by model: " << model.memoryUsage()); core_t::TTime time = startTime; - for (std::size_t i = 0u; i < boost::size(bucketCounts); ++i) - { - for (std::size_t j = 0u; j < bucketCounts[i]; ++j) - { + for (std::size_t i = 0u; i < boost::size(bucketCounts); ++i) { + for (std::size_t j = 0u; j < bucketCounts[i]; ++j) { addArrival(*gatherer, time + static_cast(j), "p"); } model.sample(time, time + bucketLength, resourceMonitor); @@ -123,8 +109,7 @@ void CModelMemoryTest::testOnlineEventRateModel() CPPUNIT_ASSERT_EQUAL(model.computeMemoryUsage(), memoryUsage.usage()); } -void CModelMemoryTest::testOnlineMetricModel() -{ +void CModelMemoryTest::testOnlineMetricModel() { // Tests to check that the memory usage of the model goes up // as data is fed in and that memoryUsage and debugMemory are // consistent. @@ -134,7 +119,7 @@ void CModelMemoryTest::testOnlineMetricModel() SModelParams params(bucketLength); CMetricModelFactory factory(params); - std::size_t bucketCounts[] = { 5, 6, 3, 5, 0, 7, 8, 5, 4, 3, 5, 5, 6 }; + std::size_t bucketCounts[] = {5, 6, 3, 5, 0, 7, 8, 5, 4, 3, 5, 5, 6}; double mean = 5.0; double variance = 2.0; @@ -153,27 +138,21 @@ void CModelMemoryTest::testOnlineMetricModel() CAnomalyDetectorModel::TModelPtr modelPtr(factory.makeModel(initData)); CPPUNIT_ASSERT(modelPtr); CPPUNIT_ASSERT_EQUAL(model_t::E_MetricOnline, modelPtr->category()); - CMetricModel &model = static_cast(*modelPtr.get()); + CMetricModel& model = static_cast(*modelPtr.get()); std::size_t startMemoryUsage = model.memoryUsage(); CResourceMonitor resourceMonitor; - LOG_DEBUG("Memory used by model: " << model.memoryUsage() - << " / " << core::CMemory::dynamicSize(model)); + LOG_DEBUG("Memory used by model: " << model.memoryUsage() << " / " << core::CMemory::dynamicSize(model)); test::CRandomNumbers rng; core_t::TTime time = startTime; - for (std::size_t i = 0u; i < boost::size(bucketCounts); ++i) - { + for (std::size_t i = 0u; i < boost::size(bucketCounts); ++i) { TDoubleVec values; rng.generateNormalSamples(mean, variance, bucketCounts[i], values); - for (std::size_t j = 0u; j < values.size(); ++j) - { - addArrival(*gatherer, - time + static_cast(j), - "p", - values[j] + (i == anomalousBucket ? anomaly : 0.0)); + for (std::size_t j = 0u; j < values.size(); ++j) { + addArrival(*gatherer, time + static_cast(j), "p", values[j] + (i == anomalousBucket ? anomaly : 0.0)); } model.sample(time, time + bucketLength, resourceMonitor); @@ -185,20 +164,16 @@ void CModelMemoryTest::testOnlineMetricModel() core::CMemoryUsage memoryUsage; model.debugMemoryUsage(&memoryUsage); LOG_DEBUG("Debug sizeof model: " << memoryUsage.usage()); - CPPUNIT_ASSERT_EQUAL(model.computeMemoryUsage() , memoryUsage.usage()); + CPPUNIT_ASSERT_EQUAL(model.computeMemoryUsage(), memoryUsage.usage()); } +CppUnit::Test* CModelMemoryTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CModelMemoryTest"); -CppUnit::Test *CModelMemoryTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CModelMemoryTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CModelMemoryTest::testOnlineEventRateModel", - &CModelMemoryTest::testOnlineEventRateModel) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CModelMemoryTest::testOnlineMetricModel", - &CModelMemoryTest::testOnlineMetricModel) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CModelMemoryTest::testOnlineEventRateModel", + &CModelMemoryTest::testOnlineEventRateModel)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CModelMemoryTest::testOnlineMetricModel", &CModelMemoryTest::testOnlineMetricModel)); return suiteOfTests; } diff --git a/lib/model/unittest/CModelMemoryTest.h b/lib/model/unittest/CModelMemoryTest.h index 8f4ef9731d..46cd19083f 100644 --- a/lib/model/unittest/CModelMemoryTest.h +++ b/lib/model/unittest/CModelMemoryTest.h @@ -9,14 +9,12 @@ #include -class CModelMemoryTest : public CppUnit::TestFixture -{ - public: - void testOnlineEventRateModel(); - void testOnlineMetricModel(); - - static CppUnit::Test *suite(); -}; +class CModelMemoryTest : public CppUnit::TestFixture { +public: + void testOnlineEventRateModel(); + void testOnlineMetricModel(); + static CppUnit::Test* suite(); +}; #endif // INCLUDED_CModelMemoryTest_h diff --git a/lib/model/unittest/CModelToolsTest.cc b/lib/model/unittest/CModelToolsTest.cc index ef244360f2..74573ee259 100644 --- a/lib/model/unittest/CModelToolsTest.cc +++ b/lib/model/unittest/CModelToolsTest.cc @@ -7,8 +7,8 @@ #include "CModelToolsTest.h" #include -#include #include +#include #include #include @@ -21,13 +21,12 @@ #include #include -#include #include +#include using namespace ml; -namespace -{ +namespace { using TDoubleVec = std::vector; using TDouble2Vec = core::CSmallVector; @@ -36,35 +35,31 @@ const double MINIMUM_SEASONAL_SCALE{0.25}; const double DECAY_RATE{0.0005}; const std::size_t TAG{0u}; -maths::CModelParams params(core_t::TTime bucketLength) -{ +maths::CModelParams params(core_t::TTime bucketLength) { using TTimeDoubleMap = std::map; static TTimeDoubleMap learnRates; learnRates[bucketLength] = static_cast(bucketLength) / 1800.0; double minimumSeasonalVarianceScale{MINIMUM_SEASONAL_SCALE}; - return maths::CModelParams{bucketLength, learnRates[bucketLength], - DECAY_RATE, minimumSeasonalVarianceScale, - 6 * core::constants::HOUR, 24 * core::constants::HOUR}; + return maths::CModelParams{bucketLength, + learnRates[bucketLength], + DECAY_RATE, + minimumSeasonalVarianceScale, + 6 * core::constants::HOUR, + 24 * core::constants::HOUR}; } -maths::CNormalMeanPrecConjugate normal() -{ +maths::CNormalMeanPrecConjugate normal() { return maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, DECAY_RATE); } -maths::CMultimodalPrior multimodal() -{ - maths::CXMeansOnline1d clusterer{maths_t::E_ContinuousData, - maths::CAvailableModeDistributions::ALL, - maths_t::E_ClustersFractionWeight, - DECAY_RATE}; +maths::CMultimodalPrior multimodal() { + maths::CXMeansOnline1d clusterer{ + maths_t::E_ContinuousData, maths::CAvailableModeDistributions::ALL, maths_t::E_ClustersFractionWeight, DECAY_RATE}; return maths::CMultimodalPrior{maths_t::E_ContinuousData, clusterer, normal(), DECAY_RATE}; } - } -void CModelToolsTest::testFuzzyDeduplicate() -{ +void CModelToolsTest::testFuzzyDeduplicate() { LOG_DEBUG("*** CModelToolsTest::testFuzzyDeduplicate ***"); test::CRandomNumbers rng; @@ -75,32 +70,25 @@ void CModelToolsTest::testFuzzyDeduplicate() uniques.reserve(30000); LOG_DEBUG("Normal"); - for (auto variance : {1.0, 10.0, 100.0, 1000.0}) - { + for (auto variance : {1.0, 10.0, 100.0, 1000.0}) { rng.generateNormalSamples(0.0, variance, 200000, values); model::CModelTools::CFuzzyDeduplicate fuzzy; - for (auto value : values) - { + for (auto value : values) { fuzzy.add(TDouble2Vec{value}); } fuzzy.computeEpsilons(600, 10000); boost::math::normal normal{variance, std::sqrt(variance)}; - double eps{( boost::math::quantile(normal, 0.9) - - boost::math::quantile(normal, 0.1)) / 10000.0}; + double eps{(boost::math::quantile(normal, 0.9) - boost::math::quantile(normal, 0.1)) / 10000.0}; LOG_DEBUG("eps = " << eps); uniques.clear(); - for (auto value : values) - { + for (auto value : values) { std::size_t duplicate{fuzzy.duplicate(300, TDouble2Vec{value})}; - if (duplicate < uniques.size()) - { + if (duplicate < uniques.size()) { CPPUNIT_ASSERT_DOUBLES_EQUAL(uniques[duplicate], value, 1.5 * eps); - } - else - { + } else { uniques.push_back(value); } } @@ -110,13 +98,11 @@ void CModelToolsTest::testFuzzyDeduplicate() } LOG_DEBUG("Uniform"); - for (auto range : {1.0, 10.0, 100.0, 1000.0}) - { + for (auto range : {1.0, 10.0, 100.0, 1000.0}) { rng.generateUniformSamples(0.0, range, 200000, values); model::CModelTools::CFuzzyDeduplicate fuzzy; - for (auto value : values) - { + for (auto value : values) { fuzzy.add(TDouble2Vec{value}); } fuzzy.computeEpsilons(600, 10000); @@ -125,15 +111,11 @@ void CModelToolsTest::testFuzzyDeduplicate() LOG_DEBUG("eps = " << eps); uniques.clear(); - for (auto value : values) - { + for (auto value : values) { std::size_t duplicate{fuzzy.duplicate(300, TDouble2Vec{value})}; - if (duplicate < uniques.size()) - { + if (duplicate < uniques.size()) { CPPUNIT_ASSERT_DOUBLES_EQUAL(uniques[duplicate], value, 1.5 * eps); - } - else - { + } else { uniques.push_back(value); } } @@ -148,8 +130,7 @@ void CModelToolsTest::testFuzzyDeduplicate() std::sort(values.begin(), values.end()); model::CModelTools::CFuzzyDeduplicate fuzzy; - for (auto value : values) - { + for (auto value : values) { fuzzy.add(TDouble2Vec{value}); } fuzzy.computeEpsilons(600, 10000); @@ -158,15 +139,11 @@ void CModelToolsTest::testFuzzyDeduplicate() LOG_DEBUG("eps = " << eps); uniques.clear(); - for (auto value : values) - { + for (auto value : values) { std::size_t duplicate{fuzzy.duplicate(300, TDouble2Vec{value})}; - if (duplicate < uniques.size()) - { + if (duplicate < uniques.size()) { CPPUNIT_ASSERT_DOUBLES_EQUAL(uniques[duplicate], value, 1.5 * eps); - } - else - { + } else { uniques.push_back(value); } } @@ -176,32 +153,25 @@ void CModelToolsTest::testFuzzyDeduplicate() } LOG_DEBUG("Log-Normal"); - for (auto variance : {1.0, 2.0, 4.0, 8.0}) - { + for (auto variance : {1.0, 2.0, 4.0, 8.0}) { rng.generateLogNormalSamples(variance, variance, 200000, values); model::CModelTools::CFuzzyDeduplicate fuzzy; - for (auto value : values) - { + for (auto value : values) { fuzzy.add(TDouble2Vec{value}); } fuzzy.computeEpsilons(600, 10000); boost::math::lognormal lognormal{variance, std::sqrt(variance)}; - double eps{( boost::math::quantile(lognormal, 0.9) - - boost::math::quantile(lognormal, 0.1)) / 10000.0}; + double eps{(boost::math::quantile(lognormal, 0.9) - boost::math::quantile(lognormal, 0.1)) / 10000.0}; LOG_DEBUG("eps = " << eps); uniques.clear(); - for (auto value : values) - { + for (auto value : values) { std::size_t duplicate{fuzzy.duplicate(300, TDouble2Vec{value})}; - if (duplicate < uniques.size()) - { + if (duplicate < uniques.size()) { CPPUNIT_ASSERT_DOUBLES_EQUAL(uniques[duplicate], value, 1.5 * eps); - } - else - { + } else { uniques.push_back(value); } } @@ -211,8 +181,7 @@ void CModelToolsTest::testFuzzyDeduplicate() } } -void CModelToolsTest::testProbabilityCache() -{ +void CModelToolsTest::testProbabilityCache() { LOG_DEBUG("*** CModelToolsTest::testProbabilityCache ***"); using TBool2Vec = core::CSmallVector; @@ -237,7 +206,7 @@ void CModelToolsTest::testProbabilityCache() { TDoubleVec samples_[3]; - rng.generateNormalSamples(0.0, 1.0, 100, samples_[0]); + rng.generateNormalSamples(0.0, 1.0, 100, samples_[0]); rng.generateNormalSamples(20.0, 3.0, 300, samples_[1]); rng.generateNormalSamples(29.0, 1.0, 100, samples_[2]); TDoubleVec samples; @@ -245,14 +214,13 @@ void CModelToolsTest::testProbabilityCache() samples.insert(samples.end(), samples_[1].begin(), samples_[1].end()); samples.insert(samples.end(), samples_[2].begin(), samples_[2].end()); rng.random_shuffle(samples.begin(), samples.end()); - for (auto sample : samples) - { + for (auto sample : samples) { maths::CModelAddSamplesParams params; params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); + .propagationInterval(1.0) + .weightStyles(maths::CConstantWeights::COUNT) + .trendWeights(weights) + .priorWeights(weights); model.addSamples(params, {core::make_triple(time_, TDouble2Vec(1, sample), TAG)}); } } @@ -264,12 +232,11 @@ void CModelToolsTest::testProbabilityCache() LOG_DEBUG("Test Random"); - for (std::size_t t = 0; t < 5; ++t) - { + for (std::size_t t = 0; t < 5; ++t) { TDoubleVec samples_[3]; - rng.generateNormalSamples(0.0, 1.0, 10000, samples_[0]); - rng.generateNormalSamples(20.0, 3.0, 10000, samples_[1]); - rng.generateNormalSamples(10.0, 1000.0, 100, samples_[2]); + rng.generateNormalSamples(0.0, 1.0, 10000, samples_[0]); + rng.generateNormalSamples(20.0, 3.0, 10000, samples_[1]); + rng.generateNormalSamples(10.0, 1000.0, 100, samples_[2]); TDoubleVec samples; samples.insert(samples.end(), samples_[0].begin(), samples_[0].end()); samples.insert(samples.end(), samples_[1].begin(), samples_[1].end()); @@ -281,45 +248,33 @@ void CModelToolsTest::testProbabilityCache() TMeanAccumulator error; std::size_t hits{0u}; - for (auto sample_ : samples) - { + for (auto sample_ : samples) { sample[0][0] = sample_; maths::CModelProbabilityParams params; params.addCalculation(maths_t::E_TwoSided) - .seasonalConfidenceInterval(0.0) - .addBucketEmpty(TBool2Vec{false}) - .weightStyles(maths::CConstantWeights::COUNT) - .addWeights(weight); + .seasonalConfidenceInterval(0.0) + .addBucketEmpty(TBool2Vec{false}) + .weightStyles(maths::CConstantWeights::COUNT) + .addWeights(weight); double expectedProbability; TTail2Vec expectedTail; bool conditional; TSize1Vec mostAnomalousCorrelate; - model.probability(params, time, sample, - expectedProbability, expectedTail, - conditional, mostAnomalousCorrelate); + model.probability(params, time, sample, expectedProbability, expectedTail, conditional, mostAnomalousCorrelate); double probability; TTail2Vec tail; - if (cache.lookup(feature, id, sample, - probability, tail, - conditional, mostAnomalousCorrelate)) - { + if (cache.lookup(feature, id, sample, probability, tail, conditional, mostAnomalousCorrelate)) { ++hits; error.add(std::fabs(probability - expectedProbability) / expectedProbability); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedProbability, - probability, - 0.05 * expectedProbability); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedProbability, probability, 0.05 * expectedProbability); CPPUNIT_ASSERT_EQUAL(expectedTail[0], tail[0]); CPPUNIT_ASSERT_EQUAL(false, conditional); CPPUNIT_ASSERT(mostAnomalousCorrelate.empty()); - } - else - { + } else { cache.addModes(feature, id, model); - cache.addProbability(feature, id, sample, - expectedProbability, expectedTail, - false, mostAnomalousCorrelate); + cache.addProbability(feature, id, sample, expectedProbability, expectedTail, false, mostAnomalousCorrelate); } } @@ -329,61 +284,45 @@ void CModelToolsTest::testProbabilityCache() CPPUNIT_ASSERT(maths::CBasicStatistics::mean(error) < 0.001); } - LOG_DEBUG("Test Adversary"); { model::CModelTools::CProbabilityCache cache(0.05); - for (auto sample_ : {0.5, 1.4, 23.0, 26.0, 20.0}) - { + for (auto sample_ : {0.5, 1.4, 23.0, 26.0, 20.0}) { sample[0][0] = sample_; maths::CModelProbabilityParams params; params.addCalculation(maths_t::E_TwoSided) - .seasonalConfidenceInterval(0.0) - .addBucketEmpty(TBool2Vec{false}) - .weightStyles(maths::CConstantWeights::COUNT) - .addWeights(weight); + .seasonalConfidenceInterval(0.0) + .addBucketEmpty(TBool2Vec{false}) + .weightStyles(maths::CConstantWeights::COUNT) + .addWeights(weight); double expectedProbability; TTail2Vec expectedTail; bool conditional; TSize1Vec mostAnomalousCorrelate; - model.probability(params, time, sample, - expectedProbability, expectedTail, - conditional, mostAnomalousCorrelate); - LOG_DEBUG("probability = " << expectedProbability - << ", tail = " << expectedTail); + model.probability(params, time, sample, expectedProbability, expectedTail, conditional, mostAnomalousCorrelate); + LOG_DEBUG("probability = " << expectedProbability << ", tail = " << expectedTail); double probability; TTail2Vec tail; - if (cache.lookup(feature, id, sample, - probability, tail, - conditional, mostAnomalousCorrelate)) - { + if (cache.lookup(feature, id, sample, probability, tail, conditional, mostAnomalousCorrelate)) { // Shouldn't have any cache hits. CPPUNIT_ASSERT(false); - } - else - { + } else { cache.addModes(feature, id, model); - cache.addProbability(feature, id, sample, - expectedProbability, expectedTail, - false, mostAnomalousCorrelate); + cache.addProbability(feature, id, sample, expectedProbability, expectedTail, false, mostAnomalousCorrelate); } } } } -CppUnit::Test *CModelToolsTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CModelToolsTest"); +CppUnit::Test* CModelToolsTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CModelToolsTest"); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CModelToolsTest::testFuzzyDeduplicate", - &CModelToolsTest::testFuzzyDeduplicate) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CModelToolsTest::testProbabilityCache", - &CModelToolsTest::testProbabilityCache) ); + suiteOfTests->addTest( + new CppUnit::TestCaller("CModelToolsTest::testFuzzyDeduplicate", &CModelToolsTest::testFuzzyDeduplicate)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CModelToolsTest::testProbabilityCache", &CModelToolsTest::testProbabilityCache)); return suiteOfTests; - } diff --git a/lib/model/unittest/CModelToolsTest.h b/lib/model/unittest/CModelToolsTest.h index a27f425571..f51aca6a4e 100644 --- a/lib/model/unittest/CModelToolsTest.h +++ b/lib/model/unittest/CModelToolsTest.h @@ -9,14 +9,12 @@ #include -class CModelToolsTest : public CppUnit::TestFixture -{ - public: - void testFuzzyDeduplicate(); - void testProbabilityCache(); +class CModelToolsTest : public CppUnit::TestFixture { +public: + void testFuzzyDeduplicate(); + void testProbabilityCache(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; - #endif // INCLUDED_CModelToolsTest_h diff --git a/lib/model/unittest/CModelTypesTest.cc b/lib/model/unittest/CModelTypesTest.cc index f46d2d6655..d6d3fe98bc 100644 --- a/lib/model/unittest/CModelTypesTest.cc +++ b/lib/model/unittest/CModelTypesTest.cc @@ -10,8 +10,7 @@ using namespace ml; using namespace model; -void CModelTypesTest::testAll() -{ +void CModelTypesTest::testAll() { { // test print categories CPPUNIT_ASSERT_EQUAL(std::string("'counting'"), model_t::print(model_t::E_Counting)); @@ -81,7 +80,7 @@ void CModelTypesTest::testAll() CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isCategorical(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isDiurnal(feature)); - CPPUNIT_ASSERT_EQUAL(true, model_t::isConstant(feature)); + CPPUNIT_ASSERT_EQUAL(true, model_t::isConstant(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isMeanFeature(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isMinFeature(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isMaxFeature(feature)); @@ -318,7 +317,7 @@ void CModelTypesTest::testAll() feature = model_t::E_IndividualTimeOfDayByBucketAndPerson; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isCategorical(feature)); - CPPUNIT_ASSERT_EQUAL(true, model_t::isDiurnal(feature)); + CPPUNIT_ASSERT_EQUAL(true, model_t::isDiurnal(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isConstant(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isMeanFeature(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isMinFeature(feature)); @@ -335,7 +334,7 @@ void CModelTypesTest::testAll() feature = model_t::E_IndividualTimeOfWeekByBucketAndPerson; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isCategorical(feature)); - CPPUNIT_ASSERT_EQUAL(true, model_t::isDiurnal(feature)); + CPPUNIT_ASSERT_EQUAL(true, model_t::isDiurnal(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isConstant(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isMeanFeature(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isMinFeature(feature)); @@ -356,7 +355,7 @@ void CModelTypesTest::testAll() CPPUNIT_ASSERT_EQUAL(false, model_t::isCategorical(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isDiurnal(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isConstant(feature)); - CPPUNIT_ASSERT_EQUAL(true, model_t::isMeanFeature(feature)); + CPPUNIT_ASSERT_EQUAL(true, model_t::isMeanFeature(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isMedianFeature(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isMinFeature(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isMaxFeature(feature)); @@ -375,7 +374,7 @@ void CModelTypesTest::testAll() CPPUNIT_ASSERT_EQUAL(false, model_t::isDiurnal(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isConstant(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isMeanFeature(feature)); - CPPUNIT_ASSERT_EQUAL(true, model_t::isMedianFeature(feature)); + CPPUNIT_ASSERT_EQUAL(true, model_t::isMedianFeature(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isMinFeature(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isMaxFeature(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isSumFeature(feature)); @@ -394,7 +393,7 @@ void CModelTypesTest::testAll() CPPUNIT_ASSERT_EQUAL(false, model_t::isConstant(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isMeanFeature(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isMedianFeature(feature)); - CPPUNIT_ASSERT_EQUAL(true, model_t::isMinFeature(feature)); + CPPUNIT_ASSERT_EQUAL(true, model_t::isMinFeature(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isMaxFeature(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isSumFeature(feature)); CPPUNIT_ASSERT_EQUAL(1.0, model_t::varianceScale(feature, 4.0, 2.0)); @@ -413,7 +412,7 @@ void CModelTypesTest::testAll() CPPUNIT_ASSERT_EQUAL(false, model_t::isMeanFeature(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isMedianFeature(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isMinFeature(feature)); - CPPUNIT_ASSERT_EQUAL(true, model_t::isMaxFeature(feature)); + CPPUNIT_ASSERT_EQUAL(true, model_t::isMaxFeature(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isSumFeature(feature)); CPPUNIT_ASSERT_EQUAL(1.0, model_t::varianceScale(feature, 4.0, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::offsetCountToZero(feature, 2.0)); @@ -432,7 +431,7 @@ void CModelTypesTest::testAll() CPPUNIT_ASSERT_EQUAL(false, model_t::isMedianFeature(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isMinFeature(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isMaxFeature(feature)); - CPPUNIT_ASSERT_EQUAL(true, model_t::isSumFeature(feature)); + CPPUNIT_ASSERT_EQUAL(true, model_t::isSumFeature(feature)); CPPUNIT_ASSERT_EQUAL(1.0, model_t::varianceScale(feature, 4.0, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::inverseOffsetCountToZero(feature, 2.0)); @@ -446,7 +445,7 @@ void CModelTypesTest::testAll() CPPUNIT_ASSERT_EQUAL(false, model_t::isCategorical(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isDiurnal(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isConstant(feature)); - CPPUNIT_ASSERT_EQUAL(true, model_t::isMeanFeature(feature)); + CPPUNIT_ASSERT_EQUAL(true, model_t::isMeanFeature(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isMedianFeature(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isMinFeature(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isMaxFeature(feature)); @@ -464,7 +463,7 @@ void CModelTypesTest::testAll() CPPUNIT_ASSERT_EQUAL(false, model_t::isCategorical(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isDiurnal(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isConstant(feature)); - CPPUNIT_ASSERT_EQUAL(true, model_t::isMeanFeature(feature)); + CPPUNIT_ASSERT_EQUAL(true, model_t::isMeanFeature(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isMedianFeature(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isMinFeature(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isMaxFeature(feature)); @@ -486,7 +485,7 @@ void CModelTypesTest::testAll() CPPUNIT_ASSERT_EQUAL(false, model_t::isMedianFeature(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isMinFeature(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isMaxFeature(feature)); - CPPUNIT_ASSERT_EQUAL(true, model_t::isSumFeature(feature)); + CPPUNIT_ASSERT_EQUAL(true, model_t::isSumFeature(feature)); CPPUNIT_ASSERT_EQUAL(1.0, model_t::varianceScale(feature, 4.0, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::inverseOffsetCountToZero(feature, 2.0)); @@ -504,7 +503,7 @@ void CModelTypesTest::testAll() CPPUNIT_ASSERT_EQUAL(false, model_t::isMedianFeature(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isMinFeature(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isMaxFeature(feature)); - CPPUNIT_ASSERT_EQUAL(true, model_t::isSumFeature(feature)); + CPPUNIT_ASSERT_EQUAL(true, model_t::isSumFeature(feature)); CPPUNIT_ASSERT_EQUAL(1.0, model_t::varianceScale(feature, 4.0, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::inverseOffsetCountToZero(feature, 2.0)); @@ -522,7 +521,7 @@ void CModelTypesTest::testAll() CPPUNIT_ASSERT_EQUAL(false, model_t::isMedianFeature(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isMinFeature(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isMaxFeature(feature)); - CPPUNIT_ASSERT_EQUAL(true, model_t::isSumFeature(feature)); + CPPUNIT_ASSERT_EQUAL(true, model_t::isSumFeature(feature)); CPPUNIT_ASSERT_EQUAL(1.0, model_t::varianceScale(feature, 4.0, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::inverseOffsetCountToZero(feature, 2.0)); @@ -558,7 +557,7 @@ void CModelTypesTest::testAll() CPPUNIT_ASSERT_EQUAL(false, model_t::isMedianFeature(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isMinFeature(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isMaxFeature(feature)); - CPPUNIT_ASSERT_EQUAL(true, model_t::isSumFeature(feature)); + CPPUNIT_ASSERT_EQUAL(true, model_t::isSumFeature(feature)); CPPUNIT_ASSERT_EQUAL(1.0, model_t::varianceScale(feature, 4.0, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::inverseOffsetCountToZero(feature, 2.0)); @@ -572,7 +571,7 @@ void CModelTypesTest::testAll() CPPUNIT_ASSERT_EQUAL(false, model_t::isCategorical(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isDiurnal(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isConstant(feature)); - CPPUNIT_ASSERT_EQUAL(true, model_t::isMeanFeature(feature)); + CPPUNIT_ASSERT_EQUAL(true, model_t::isMeanFeature(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isMedianFeature(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isMinFeature(feature)); CPPUNIT_ASSERT_EQUAL(false, model_t::isMaxFeature(feature)); @@ -763,7 +762,8 @@ void CModelTypesTest::testAll() CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedAbove, model_t::probabilityCalculation(feature)); CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); CPPUNIT_ASSERT_EQUAL(std::string("info_content"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'high information content of value per bucket by person and attribute'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'high information content of value per bucket by person and attribute'"), + model_t::print(feature)); feature = model_t::E_PopulationLowUniqueCountByBucketPersonAndAttribute; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); @@ -1003,13 +1003,10 @@ void CModelTypesTest::testAll() } } -CppUnit::Test *CModelTypesTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CModelTypesTest"); +CppUnit::Test* CModelTypesTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CModelTypesTest"); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CModelTypesTest::testAll", - &CModelTypesTest::testAll) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CModelTypesTest::testAll", &CModelTypesTest::testAll)); return suiteOfTests; } diff --git a/lib/model/unittest/CModelTypesTest.h b/lib/model/unittest/CModelTypesTest.h index aa2472c913..d4a44d4a73 100644 --- a/lib/model/unittest/CModelTypesTest.h +++ b/lib/model/unittest/CModelTypesTest.h @@ -9,12 +9,11 @@ #include -class CModelTypesTest : public CppUnit::TestFixture -{ - public: - void testAll(); +class CModelTypesTest : public CppUnit::TestFixture { +public: + void testAll(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CModelTypesTest_h diff --git a/lib/model/unittest/CProbabilityAndInfluenceCalculatorTest.cc b/lib/model/unittest/CProbabilityAndInfluenceCalculatorTest.cc index 27e9af2d69..8fecf313fd 100644 --- a/lib/model/unittest/CProbabilityAndInfluenceCalculatorTest.cc +++ b/lib/model/unittest/CProbabilityAndInfluenceCalculatorTest.cc @@ -30,8 +30,7 @@ using namespace ml; -namespace -{ +namespace { using TDoubleDoublePr = std::pair; using TDoubleVec = std::vector; @@ -61,16 +60,15 @@ using TStrCRefDouble1VecDoublePrPrVec = model::CProbabilityAndInfluenceCalculato using TStrCRefDouble1VecDoublePrPrVecVec = std::vector; using TStrCRefDouble1VecDouble1VecPrPr = model::CProbabilityAndInfluenceCalculator::TStrCRefDouble1VecDouble1VecPrPr; using TStrCRefDouble1VecDouble1VecPrPrVec = model::CProbabilityAndInfluenceCalculator::TStrCRefDouble1VecDouble1VecPrPrVec; -using TStoredStringPtrStoredStringPtrPrDoublePrVec = model::CProbabilityAndInfluenceCalculator::TStoredStringPtrStoredStringPtrPrDoublePrVec; +using TStoredStringPtrStoredStringPtrPrDoublePrVec = + model::CProbabilityAndInfluenceCalculator::TStoredStringPtrStoredStringPtrPrDoublePrVec; using TInfluenceCalculatorCPtr = boost::shared_ptr; -TDouble1VecDoublePr make_pair(double first, double second) -{ +TDouble1VecDoublePr make_pair(double first, double second) { return TDouble1VecDoublePr{TDouble1Vec{first}, second}; } -TDouble1VecDoublePr make_pair(double first1, double first2, double second) -{ +TDouble1VecDoublePr make_pair(double first1, double first2, double second) { return TDouble1VecDoublePr{TDouble1Vec{first1, first2}, second}; } @@ -79,39 +77,34 @@ TDouble1VecDoublePr make_pair(double first1, double first2, double second) // return TDouble1VecDouble1VecPr{TDouble1Vec{first1, first2}, TDouble1Vec{second1, second2}}; //} -maths::CModelParams params(core_t::TTime bucketLength) -{ +maths::CModelParams params(core_t::TTime bucketLength) { double learnRate{static_cast(bucketLength) / 1800.0}; double minimumSeasonalVarianceScale{0.4}; - return maths::CModelParams{bucketLength, learnRate, 0.0, minimumSeasonalVarianceScale, - 6 * core::constants::HOUR, 24 * core::constants::HOUR}; + return maths::CModelParams{ + bucketLength, learnRate, 0.0, minimumSeasonalVarianceScale, 6 * core::constants::HOUR, 24 * core::constants::HOUR}; } -std::size_t dimension(double) { return 1; } -std::size_t dimension(const TDoubleVec &sample) { return sample.size(); } +std::size_t dimension(double) { + return 1; +} +std::size_t dimension(const TDoubleVec& sample) { + return sample.size(); +} -TTimeDouble2VecSizeTr sample(core_t::TTime time, double sample) -{ +TTimeDouble2VecSizeTr sample(core_t::TTime time, double sample) { return core::make_triple(time, TDouble2Vec{sample}, std::size_t{0}); } -TTimeDouble2VecSizeTr sample(core_t::TTime time, const TDoubleVec &sample) -{ +TTimeDouble2VecSizeTr sample(core_t::TTime time, const TDoubleVec& sample) { return core::make_triple(time, TDouble2Vec(sample), std::size_t{0}); } template -core_t::TTime addSamples(core_t::TTime bucketLength, const SAMPLES &samples, maths::CModel &model) -{ +core_t::TTime addSamples(core_t::TTime bucketLength, const SAMPLES& samples, maths::CModel& model) { TDouble2Vec4VecVec weights{maths::CConstantWeights::unit(dimension(samples[0]))}; maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); + params.integer(false).propagationInterval(1.0).weightStyles(maths::CConstantWeights::COUNT).trendWeights(weights).priorWeights(weights); core_t::TTime time{0}; - for (const auto &sample_ : samples) - { + for (const auto& sample_ : samples) { model.addSamples(params, TTimeDouble2VecSizeTrVec{sample(time, sample_)}); time += bucketLength; } @@ -120,23 +113,20 @@ core_t::TTime addSamples(core_t::TTime bucketLength, const SAMPLES &samples, mat void computeProbability(core_t::TTime time, maths_t::EProbabilityCalculation calculation, - const TDouble2Vec &sample, - const maths::CModel &model, - double &probablity, - TTail2Vec &tail) -{ + const TDouble2Vec& sample, + const maths::CModel& model, + double& probablity, + TTail2Vec& tail) { TDouble2Vec4Vec weight{model.seasonalWeight(0.0, time)}; maths::CModelProbabilityParams params; params.addCalculation(calculation) - .addBucketEmpty(TBool2Vec{false}) - .weightStyles(maths::CConstantWeights::SEASONAL_VARIANCE) - .addWeights(weight); + .addBucketEmpty(TBool2Vec{false}) + .weightStyles(maths::CConstantWeights::SEASONAL_VARIANCE) + .addWeights(weight); bool conditional; TSize1Vec mostAnomalousCorrelate; - model.probability(params, - TTime2Vec1Vec{TTime2Vec{time}}, - TDouble2Vec1Vec{sample}, - probablity, tail, conditional, mostAnomalousCorrelate); + model.probability( + params, TTime2Vec1Vec{TTime2Vec{time}}, TDouble2Vec1Vec{sample}, probablity, tail, conditional, mostAnomalousCorrelate); } const std::string I("I"); @@ -146,18 +136,17 @@ const std::string i3("i3"); const std::string EMPTY_STRING; template -void computeInfluences(CALCULATOR &calculator, +void computeInfluences(CALCULATOR& calculator, model_t::EFeature feature, - const maths::CModel &model, + const maths::CModel& model, core_t::TTime time, double value, double count, double probability, - const TTail2Vec &tail, - const std::string &influencerName, - const TStrCRefDouble1VecDoublePrPrVec &influencerValues, - TStoredStringPtrStoredStringPtrPrDoublePrVec &result) -{ + const TTail2Vec& tail, + const std::string& influencerName, + const TStrCRefDouble1VecDoublePrPrVec& influencerValues, + TStoredStringPtrStoredStringPtrPrDoublePrVec& result) { maths_t::TWeightStyleVec weightStyles; weightStyles.push_back(maths_t::E_SampleSeasonalVarianceScaleWeight); weightStyles.push_back(maths_t::E_SampleCountVarianceScaleWeight); @@ -169,8 +158,7 @@ void computeInfluences(CALCULATOR &calculator, params.s_Time = TTime2Vec1Vec{TTimeVec{time}}; params.s_Value = TDouble2Vec1Vec{TDoubleVec{value}}; params.s_Count = count; - params.s_ComputeProbabilityParams.weightStyles(weightStyles) - .addWeights(weights); + params.s_ComputeProbabilityParams.weightStyles(weightStyles).addWeights(weights); params.s_Probability = probability; params.s_Tail = tail; params.s_InfluencerName = model::CStringStore::influencers().get(influencerName); @@ -181,18 +169,17 @@ void computeInfluences(CALCULATOR &calculator, } template -void computeInfluences(CALCULATOR &calculator, +void computeInfluences(CALCULATOR& calculator, model_t::EFeature feature, - const maths::CModel &model, + const maths::CModel& model, const core_t::TTime (×)[2], const double (&values)[2], const double (&counts)[2], double probability, - const TTail2Vec &tail, - const std::string &influencerName, - const TStrCRefDouble1VecDouble1VecPrPrVec &influencerValues, - TStoredStringPtrStoredStringPtrPrDoublePrVec &result) -{ + const TTail2Vec& tail, + const std::string& influencerName, + const TStrCRefDouble1VecDouble1VecPrPrVec& influencerValues, + TStoredStringPtrStoredStringPtrPrDoublePrVec& result) { maths_t::TWeightStyleVec weightStyles; weightStyles.push_back(maths_t::E_SampleSeasonalVarianceScaleWeight); weightStyles.push_back(maths_t::E_SampleCountVarianceScaleWeight); @@ -224,12 +211,11 @@ void computeInfluences(CALCULATOR &calculator, } void testProbabilityAndGetInfluences(model_t::EFeature feature, - const maths::CModel &model, + const maths::CModel& model, core_t::TTime time_, - const TDoubleVecVec &values, - const TStrCRefDouble1VecDoublePrPrVecVec &influencerValues, - TStoredStringPtrStoredStringPtrPrDoublePrVec &influences) -{ + const TDoubleVecVec& values, + const TStrCRefDouble1VecDoublePrPrVecVec& influencerValues, + TStoredStringPtrStoredStringPtrPrDoublePrVec& influences) { maths_t::TWeightStyleVec weightStyles; weightStyles.push_back(maths_t::E_SampleSeasonalVarianceScaleWeight); weightStyles.push_back(maths_t::E_SampleCountVarianceScaleWeight); @@ -244,32 +230,28 @@ void testProbabilityAndGetInfluences(model_t::EFeature feature, maths::CJointProbabilityOfLessLikelySamples pJoint; maths::CProbabilityOfExtremeSample pExtreme; - for (std::size_t i = 0u; i < values.size(); ++i) - { + for (std::size_t i = 0u; i < values.size(); ++i) { std::size_t dimension{values[i].size() - 1}; TTime2Vec1Vec time{TTime2Vec{time_}}; TDouble2Vec1Vec value{TDouble2Vec(&values[i][0], &values[i][dimension])}; TDouble2Vec4Vec weight(2, TDouble2Vec(dimension, values[i][dimension])); double count{0.0}; - for (const auto &influence : influencerValues[i]) - { + for (const auto& influence : influencerValues[i]) { count += influence.second.second; } maths::CModelProbabilityParams params_; params_.addCalculation(model_t::probabilityCalculation(feature)) - .seasonalConfidenceInterval(0.0) - .addBucketEmpty(TBool2Vec{false}) - .weightStyles(weightStyles) - .addWeights(weight); + .seasonalConfidenceInterval(0.0) + .addBucketEmpty(TBool2Vec{false}) + .weightStyles(weightStyles) + .addWeights(weight); double p = 0.0; TTail2Vec tail; model_t::CResultType type; TSize1Vec mostAnomalousCorrelate; - calculator.addProbability(feature, 0, model, 0/*elapsedTime*/, - params_, time, value, - p, tail, type, mostAnomalousCorrelate); + calculator.addProbability(feature, 0, model, 0 /*elapsedTime*/, params_, time, value, p, tail, type, mostAnomalousCorrelate); LOG_DEBUG(" p = " << p); pJoint.add(p); @@ -294,15 +276,12 @@ void testProbabilityAndGetInfluences(model_t::EFeature feature, CPPUNIT_ASSERT(pJoint.calculate(pj)); CPPUNIT_ASSERT(pExtreme.calculate(pe)); - LOG_DEBUG(" probability = " << probability - << ", expected probability = " << std::min(pj, pe)); + LOG_DEBUG(" probability = " << probability << ", expected probability = " << std::min(pj, pe)); CPPUNIT_ASSERT_DOUBLES_EQUAL(std::min(pe, pj), probability, 1e-10); } - } -void CProbabilityAndInfluenceCalculatorTest::testInfluenceUnavailableCalculator() -{ +void CProbabilityAndInfluenceCalculatorTest::testInfluenceUnavailableCalculator() { LOG_DEBUG("*** testInfluenceUnavailableCalculator ***"); test::CRandomNumbers rng; @@ -314,24 +293,28 @@ void CProbabilityAndInfluenceCalculatorTest::testInfluenceUnavailableCalculator( model::CInfluenceUnavailableCalculator calculator; maths::CTimeSeriesDecomposition trend{0.0, bucketLength}; - maths::CNormalMeanPrecConjugate prior = - maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); + maths::CNormalMeanPrecConjugate prior = maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); maths::CUnivariateTimeSeriesModel model(params(bucketLength), 0, trend, prior); TDoubleVec samples; rng.generateNormalSamples(10.0, 1.0, 50, samples); addSamples(bucketLength, samples, model); - TStrCRefDouble1VecDoublePrPrVec influencerValues{{TStrCRef(i1), make_pair(11.0, 1.0)}, - {TStrCRef(i2), make_pair(11.0, 1.0)}, - {TStrCRef(i3), make_pair(15.0, 1.0)}}; + TStrCRefDouble1VecDoublePrPrVec influencerValues{ + {TStrCRef(i1), make_pair(11.0, 1.0)}, {TStrCRef(i2), make_pair(11.0, 1.0)}, {TStrCRef(i3), make_pair(15.0, 1.0)}}; TStoredStringPtrStoredStringPtrPrDoublePrVec influences; computeInfluences(calculator, - model_t::E_IndividualLowCountsByBucketAndPerson, model, - 0/*time*/, 15.0/*value*/, 1.0/*count*/, - 0.001/*probability*/, TTail2Vec{maths_t::E_RightTail}, - I, influencerValues, influences); + model_t::E_IndividualLowCountsByBucketAndPerson, + model, + 0 /*time*/, + 15.0 /*value*/, + 1.0 /*count*/, + 0.001 /*probability*/, + TTail2Vec{maths_t::E_RightTail}, + I, + influencerValues, + influences); LOG_DEBUG("influences = " << core::CContainerPrinter::print(influences)); CPPUNIT_ASSERT(influences.empty()); @@ -375,8 +358,7 @@ void CProbabilityAndInfluenceCalculatorTest::testInfluenceUnavailableCalculator( }*/ } -void CProbabilityAndInfluenceCalculatorTest::testLogProbabilityComplementInfluenceCalculator() -{ +void CProbabilityAndInfluenceCalculatorTest::testLogProbabilityComplementInfluenceCalculator() { LOG_DEBUG("*** testLogProbabilityComplementInfluenceCalculator ***"); test::CRandomNumbers rng; @@ -393,8 +375,7 @@ void CProbabilityAndInfluenceCalculatorTest::testLogProbabilityComplementInfluen LOG_DEBUG("One influencer value"); maths::CTimeSeriesDecomposition trend{0.0, bucketLength}; - maths::CNormalMeanPrecConjugate prior = - maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); + maths::CNormalMeanPrecConjugate prior = maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); maths::CUnivariateTimeSeriesModel model(params(bucketLength), 0, trend, prior); TDoubleVec samples; @@ -409,20 +390,25 @@ void CProbabilityAndInfluenceCalculatorTest::testLogProbabilityComplementInfluen TStoredStringPtrStoredStringPtrPrDoublePrVec influences; computeInfluences(calculator, - model_t::E_IndividualCountByBucketAndPerson, model, - 0/*time*/, 20.0/*value*/, 1.0/*count*/, - p, tail, I, influencerValues, influences); + model_t::E_IndividualCountByBucketAndPerson, + model, + 0 /*time*/, + 20.0 /*value*/, + 1.0 /*count*/, + p, + tail, + I, + influencerValues, + influences); LOG_DEBUG(" influences = " << core::CContainerPrinter::print(influences)); - CPPUNIT_ASSERT_EQUAL(std::string("[((I, i1), 1)]"), - core::CContainerPrinter::print(influences)); + CPPUNIT_ASSERT_EQUAL(std::string("[((I, i1), 1)]"), core::CContainerPrinter::print(influences)); } { LOG_DEBUG("No trend"); maths::CTimeSeriesDecomposition trend{0.0, bucketLength}; - maths::CNormalMeanPrecConjugate prior = - maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); + maths::CNormalMeanPrecConjugate prior = maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); maths::CUnivariateTimeSeriesModel model(params(bucketLength), 0, trend, prior); TDoubleVec samples; @@ -433,34 +419,37 @@ void CProbabilityAndInfluenceCalculatorTest::testLogProbabilityComplementInfluen TTail2Vec tail; computeProbability(now, maths_t::E_TwoSided, TDouble1Vec{20.0}, model, p, tail); - TStrCRefDouble1VecDoublePrPrVec influencerValues{{TStrCRef(i1), make_pair( 1.0, 1.0)}, - {TStrCRef(i2), make_pair( 1.0, 1.0)}, - {TStrCRef(i3), make_pair(18.0, 1.0)}}; + TStrCRefDouble1VecDoublePrPrVec influencerValues{ + {TStrCRef(i1), make_pair(1.0, 1.0)}, {TStrCRef(i2), make_pair(1.0, 1.0)}, {TStrCRef(i3), make_pair(18.0, 1.0)}}; TStoredStringPtrStoredStringPtrPrDoublePrVec influences; computeInfluences(calculator, - model_t::E_IndividualCountByBucketAndPerson, model, - 0/*time*/, 20.0/*value*/, 1.0/*count*/, - p, tail, I, influencerValues, influences); + model_t::E_IndividualCountByBucketAndPerson, + model, + 0 /*time*/, + 20.0 /*value*/, + 1.0 /*count*/, + p, + tail, + I, + influencerValues, + influences); LOG_DEBUG(" influences = " << core::CContainerPrinter::print(influences)); - CPPUNIT_ASSERT_EQUAL(std::string("[((I, i3), 1)]"), - core::CContainerPrinter::print(influences)); + CPPUNIT_ASSERT_EQUAL(std::string("[((I, i3), 1)]"), core::CContainerPrinter::print(influences)); } { LOG_DEBUG("Trend"); maths::CTimeSeriesDecomposition trend{0.0, bucketLength}; - maths::CNormalMeanPrecConjugate prior = - maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); + maths::CNormalMeanPrecConjugate prior = maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); maths::CUnivariateTimeSeriesModel model(params(bucketLength), 0, trend, prior); TDoubleVec samples; { rng.generateNormalSamples(0.0, 100.0, 10 * 86400 / 600, samples); core_t::TTime time{0}; - for (auto &sample : samples) - { + for (auto& sample : samples) { sample += 100.0 + 100.0 * std::sin(2.0 * 3.1416 * static_cast(time) / 86400.0); time += bucketLength; } @@ -468,13 +457,11 @@ void CProbabilityAndInfluenceCalculatorTest::testLogProbabilityComplementInfluen addSamples(bucketLength, samples, model); TTimeVec testTimes{0, 86400 / 4, 86400 / 2, (3 * 86400) / 4}; - TStrCRefDouble1VecDoublePrPrVec influencerValues{{TStrCRef(i1), make_pair(70.0, 1.0)}, - {TStrCRef(i2), make_pair(50.0, 1.0)}}; + TStrCRefDouble1VecDoublePrPrVec influencerValues{{TStrCRef(i1), make_pair(70.0, 1.0)}, {TStrCRef(i2), make_pair(50.0, 1.0)}}; std::string expectedInfluencerValues[]{"i1", "i2"}; TDoubleVecVec expectedInfluences{{1.0, 1.0}, {0.0, 0.0}, {1.0, 1.0}, {0.8, 0.6}}; - for (std::size_t i = 0u; i < testTimes.size(); ++i) - { + for (std::size_t i = 0u; i < testTimes.size(); ++i) { core_t::TTime time = testTimes[i]; LOG_DEBUG(" time = " << time); LOG_DEBUG(" baseline = " << model.predict(time)); @@ -486,13 +473,19 @@ void CProbabilityAndInfluenceCalculatorTest::testLogProbabilityComplementInfluen TStoredStringPtrStoredStringPtrPrDoublePrVec influences; computeInfluences(calculator, - model_t::E_IndividualCountByBucketAndPerson, model, - time, 120.0/*value*/, 1.0/*count*/, - p, tail, I, influencerValues, influences); + model_t::E_IndividualCountByBucketAndPerson, + model, + time, + 120.0 /*value*/, + 1.0 /*count*/, + p, + tail, + I, + influencerValues, + influences); LOG_DEBUG(" influences = " << core::CContainerPrinter::print(influences)); - for (std::size_t j = 0u; j < influences.size(); ++j) - { + for (std::size_t j = 0u; j < influences.size(); ++j) { CPPUNIT_ASSERT_EQUAL(expectedInfluencerValues[j], *influences[j].first.second); CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedInfluences[i][j], influences[j].second, 0.06); } @@ -700,8 +693,7 @@ void CProbabilityAndInfluenceCalculatorTest::testLogProbabilityComplementInfluen }*/ } -void CProbabilityAndInfluenceCalculatorTest::testMeanInfluenceCalculator() -{ +void CProbabilityAndInfluenceCalculatorTest::testMeanInfluenceCalculator() { LOG_DEBUG("*** testMeanInfluenceCalculator ***"); test::CRandomNumbers rng; @@ -716,8 +708,7 @@ void CProbabilityAndInfluenceCalculatorTest::testMeanInfluenceCalculator() LOG_DEBUG("One influencer value"); maths::CTimeSeriesDecomposition trend{0.0, bucketLength}; - maths::CNormalMeanPrecConjugate prior = - maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); + maths::CNormalMeanPrecConjugate prior = maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); maths::CUnivariateTimeSeriesModel model(params(bucketLength), 0, trend, prior); TDoubleVec samples; @@ -732,20 +723,25 @@ void CProbabilityAndInfluenceCalculatorTest::testMeanInfluenceCalculator() TStoredStringPtrStoredStringPtrPrDoublePrVec influences; computeInfluences(calculator, - model_t::E_IndividualMeanByPerson, model, - 0/*time*/, 5.0/*value*/, 1.0/*count*/, - p, tail, I, influencerValues, influences); + model_t::E_IndividualMeanByPerson, + model, + 0 /*time*/, + 5.0 /*value*/, + 1.0 /*count*/, + p, + tail, + I, + influencerValues, + influences); LOG_DEBUG(" influences = " << core::CContainerPrinter::print(influences)); - CPPUNIT_ASSERT_EQUAL(std::string("[((I, i1), 1)]"), - core::CContainerPrinter::print(influences)); + CPPUNIT_ASSERT_EQUAL(std::string("[((I, i1), 1)]"), core::CContainerPrinter::print(influences)); } { LOG_DEBUG("No trend"); maths::CTimeSeriesDecomposition trend{0.0, bucketLength}; - maths::CNormalMeanPrecConjugate prior = - maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); + maths::CNormalMeanPrecConjugate prior = maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); maths::CUnivariateTimeSeriesModel model(params(bucketLength), 0, trend, prior); TDoubleVec samples; @@ -759,19 +755,24 @@ void CProbabilityAndInfluenceCalculatorTest::testMeanInfluenceCalculator() TTail2Vec tail; computeProbability(now, maths_t::E_TwoSided, TDouble1Vec{12.5}, model, p, tail); - TStrCRefDouble1VecDoublePrPrVec influencerValues{{TStrCRef(i1), make_pair(20.0, 5.0)}, - {TStrCRef(i2), make_pair(10.0, 7.0)}, - {TStrCRef(i3), make_pair(10.0, 8.0)}}; + TStrCRefDouble1VecDoublePrPrVec influencerValues{ + {TStrCRef(i1), make_pair(20.0, 5.0)}, {TStrCRef(i2), make_pair(10.0, 7.0)}, {TStrCRef(i3), make_pair(10.0, 8.0)}}; TStoredStringPtrStoredStringPtrPrDoublePrVec influences; computeInfluences(calculator, - model_t::E_IndividualMeanByPerson, model, - 0/*time*/, 12.5/*value*/, 20.0/*count*/, - p, tail, I, influencerValues, influences); + model_t::E_IndividualMeanByPerson, + model, + 0 /*time*/, + 12.5 /*value*/, + 20.0 /*count*/, + p, + tail, + I, + influencerValues, + influences); LOG_DEBUG(" influences = " << core::CContainerPrinter::print(influences)); - CPPUNIT_ASSERT_EQUAL(std::string("[((I, i1), 1)]"), - core::CContainerPrinter::print(influences)); + CPPUNIT_ASSERT_EQUAL(std::string("[((I, i1), 1)]"), core::CContainerPrinter::print(influences)); } { LOG_DEBUG("Right tail, no clear influences"); @@ -785,9 +786,16 @@ void CProbabilityAndInfluenceCalculatorTest::testMeanInfluenceCalculator() TStoredStringPtrStoredStringPtrPrDoublePrVec influences; computeInfluences(calculator, - model_t::E_IndividualMeanByPerson, model, - 0/*time*/, 15.0/*value*/, 11.0/*count*/, - p, tail, I, influencerValues, influences); + model_t::E_IndividualMeanByPerson, + model, + 0 /*time*/, + 15.0 /*value*/, + 11.0 /*count*/, + p, + tail, + I, + influencerValues, + influences); LOG_DEBUG(" influences = " << core::CContainerPrinter::print(influences)); CPPUNIT_ASSERT(influences.empty()); @@ -799,14 +807,20 @@ void CProbabilityAndInfluenceCalculatorTest::testMeanInfluenceCalculator() TTail2Vec tail; computeProbability(now, maths_t::E_TwoSided, TDouble1Vec{5.0}, model, p, tail); - TStrCRefDouble1VecDoublePrPrVec influencerValues{{TStrCRef(i1), make_pair(5.0, 5.0)}, - {TStrCRef(i2), make_pair(5.0, 6.0)}}; + TStrCRefDouble1VecDoublePrPrVec influencerValues{{TStrCRef(i1), make_pair(5.0, 5.0)}, {TStrCRef(i2), make_pair(5.0, 6.0)}}; TStoredStringPtrStoredStringPtrPrDoublePrVec influences; computeInfluences(calculator, - model_t::E_IndividualMeanByPerson, model, - 0/*time*/, 5.0/*value*/, 11.0/*count*/, - p, tail, I, influencerValues, influences); + model_t::E_IndividualMeanByPerson, + model, + 0 /*time*/, + 5.0 /*value*/, + 11.0 /*count*/, + p, + tail, + I, + influencerValues, + influences); LOG_DEBUG(" influences = " << core::CContainerPrinter::print(influences)); CPPUNIT_ASSERT(influences.empty()); @@ -818,15 +832,21 @@ void CProbabilityAndInfluenceCalculatorTest::testMeanInfluenceCalculator() TTail2Vec tail; computeProbability(now, maths_t::E_TwoSided, TDouble1Vec{8.0}, model, p, tail); - TStrCRefDouble1VecDoublePrPrVec influencerValues{{TStrCRef(i1), make_pair(5.0, 9.0)}, - {TStrCRef(i2), make_pair(11.0, 20.0)}, - {TStrCRef(i3), make_pair(5.0, 11.0)}}; + TStrCRefDouble1VecDoublePrPrVec influencerValues{ + {TStrCRef(i1), make_pair(5.0, 9.0)}, {TStrCRef(i2), make_pair(11.0, 20.0)}, {TStrCRef(i3), make_pair(5.0, 11.0)}}; TStoredStringPtrStoredStringPtrPrDoublePrVec influences; computeInfluences(calculator, - model_t::E_IndividualMeanByPerson, model, - 0/*time*/, 8.0/*value*/, 40.0/*count*/, - p, tail, I, influencerValues, influences); + model_t::E_IndividualMeanByPerson, + model, + 0 /*time*/, + 8.0 /*value*/, + 40.0 /*count*/, + p, + tail, + I, + influencerValues, + influences); LOG_DEBUG(" influences = " << core::CContainerPrinter::print(influences)); CPPUNIT_ASSERT_EQUAL(std::size_t(2), influences.size()); @@ -1047,8 +1067,7 @@ void CProbabilityAndInfluenceCalculatorTest::testMeanInfluenceCalculator() }*/ } -void CProbabilityAndInfluenceCalculatorTest::testLogProbabilityInfluenceCalculator() -{ +void CProbabilityAndInfluenceCalculatorTest::testLogProbabilityInfluenceCalculator() { LOG_DEBUG("*** testLogProbabilityInfluenceCalculator ***"); test::CRandomNumbers rng; @@ -1058,15 +1077,13 @@ void CProbabilityAndInfluenceCalculatorTest::testLogProbabilityInfluenceCalculat core_t::TTime bucketLength{600}; maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleSeasonalVarianceScaleWeight); - { LOG_DEBUG("Test univariate"); { LOG_DEBUG("One influencer value"); maths::CTimeSeriesDecomposition trend{0.0, bucketLength}; - maths::CNormalMeanPrecConjugate prior = - maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); + maths::CNormalMeanPrecConjugate prior = maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); maths::CUnivariateTimeSeriesModel model(params(bucketLength), 0, trend, prior); TDoubleVec samples; @@ -1081,20 +1098,25 @@ void CProbabilityAndInfluenceCalculatorTest::testLogProbabilityInfluenceCalculat TStoredStringPtrStoredStringPtrPrDoublePrVec influences; computeInfluences(calculator, - model_t::E_IndividualUniqueCountByBucketAndPerson, model, - now/*time*/, 5.0/*value*/, 1.0/*count*/, - p, tail, I, influencerValues, influences); + model_t::E_IndividualUniqueCountByBucketAndPerson, + model, + now /*time*/, + 5.0 /*value*/, + 1.0 /*count*/, + p, + tail, + I, + influencerValues, + influences); LOG_DEBUG(" influences = " << core::CContainerPrinter::print(influences)); - CPPUNIT_ASSERT_EQUAL(std::string("[((I, i1), 1)]"), - core::CContainerPrinter::print(influences)); + CPPUNIT_ASSERT_EQUAL(std::string("[((I, i1), 1)]"), core::CContainerPrinter::print(influences)); } { LOG_DEBUG("No trend"); maths::CTimeSeriesDecomposition trend{0.0, bucketLength}; - maths::CNormalMeanPrecConjugate prior = - maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); + maths::CNormalMeanPrecConjugate prior = maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); maths::CUnivariateTimeSeriesModel model(params(bucketLength), 0, trend, prior); TDoubleVec samples; @@ -1105,34 +1127,37 @@ void CProbabilityAndInfluenceCalculatorTest::testLogProbabilityInfluenceCalculat TTail2Vec tail; computeProbability(now, maths_t::E_TwoSided, TDouble1Vec{6.0}, model, p, tail); - TStrCRefDouble1VecDoublePrPrVec influencerValues{{TStrCRef(i1), make_pair(9.0, 1.0)}, - {TStrCRef(i2), make_pair(6.0, 1.0)}, - {TStrCRef(i3), make_pair(6.0, 1.0)}}; + TStrCRefDouble1VecDoublePrPrVec influencerValues{ + {TStrCRef(i1), make_pair(9.0, 1.0)}, {TStrCRef(i2), make_pair(6.0, 1.0)}, {TStrCRef(i3), make_pair(6.0, 1.0)}}; TStoredStringPtrStoredStringPtrPrDoublePrVec influences; computeInfluences(calculator, - model_t::E_IndividualUniqueCountByBucketAndPerson, model, - now/*time*/, 6.0/*value*/, 1.0/*count*/, - p, tail, I, influencerValues, influences); + model_t::E_IndividualUniqueCountByBucketAndPerson, + model, + now /*time*/, + 6.0 /*value*/, + 1.0 /*count*/, + p, + tail, + I, + influencerValues, + influences); LOG_DEBUG(" influences = " << core::CContainerPrinter::print(influences)); - CPPUNIT_ASSERT_EQUAL(std::string("[((I, i2), 1), ((I, i3), 1)]"), - core::CContainerPrinter::print(influences)); + CPPUNIT_ASSERT_EQUAL(std::string("[((I, i2), 1), ((I, i3), 1)]"), core::CContainerPrinter::print(influences)); } { LOG_DEBUG("Trend"); maths::CTimeSeriesDecomposition trend{0.0, bucketLength}; - maths::CNormalMeanPrecConjugate prior = - maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); + maths::CNormalMeanPrecConjugate prior = maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); maths::CUnivariateTimeSeriesModel model(params(bucketLength), 0, trend, prior); TDoubleVec samples; { rng.generateNormalSamples(0.0, 100.0, 10 * 86400 / 600, samples); core_t::TTime time{0}; - for (auto &sample : samples) - { + for (auto& sample : samples) { sample += 100.0 + 100.0 * std::sin(2.0 * 3.1416 * static_cast(time) / 86400.0); time += bucketLength; } @@ -1141,13 +1166,11 @@ void CProbabilityAndInfluenceCalculatorTest::testLogProbabilityInfluenceCalculat addSamples(bucketLength, samples, model); TTimeVec testTimes{0, 86400 / 4, 86400 / 2, (3 * 86400) / 4}; - TStrCRefDouble1VecDoublePrPrVec influencerValues{{TStrCRef(i1), make_pair(60.0, 1.0)}, - {TStrCRef(i2), make_pair(50.0, 1.0)}}; + TStrCRefDouble1VecDoublePrPrVec influencerValues{{TStrCRef(i1), make_pair(60.0, 1.0)}, {TStrCRef(i2), make_pair(50.0, 1.0)}}; std::string expectedInfluencerValues[] = {"i1", "i2"}; TDoubleVecVec expectedInfluences{{1.0, 1.0}, {1.0, 1.0}, {1.0, 1.0}, {1.0, 0.7}}; - for (std::size_t i = 0u; i < testTimes.size(); ++i) - { + for (std::size_t i = 0u; i < testTimes.size(); ++i) { core_t::TTime time = testTimes[i]; LOG_DEBUG(" time = " << time); @@ -1158,231 +1181,236 @@ void CProbabilityAndInfluenceCalculatorTest::testLogProbabilityInfluenceCalculat TStoredStringPtrStoredStringPtrPrDoublePrVec influences; computeInfluences(calculator, - model_t::E_IndividualHighUniqueCountByBucketAndPerson, model, - time, 60.0/*value*/, 1.0/*count*/, - p, tail, I, influencerValues, influences); + model_t::E_IndividualHighUniqueCountByBucketAndPerson, + model, + time, + 60.0 /*value*/, + 1.0 /*count*/, + p, + tail, + I, + influencerValues, + influences); LOG_DEBUG(" influences = " << core::CContainerPrinter::print(influences)); std::sort(influences.begin(), influences.end(), maths::COrderings::SFirstLess()); - for (std::size_t j = 0u; j < influences.size(); ++j) - { + for (std::size_t j = 0u; j < influences.size(); ++j) { CPPUNIT_ASSERT_EQUAL(expectedInfluencerValues[j], *influences[j].first.second); CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedInfluences[i][j], influences[j].second, 0.03); } } } } -// { -// LOG_DEBUG("Test correlated"); -// -// double counts[] = {1.0, 1.0}; -// -// { -// LOG_DEBUG("One influencer value"); -// -// maths::CMultivariateNormalConjugateFactory::TPriorPtr prior = -// maths::CMultivariateNormalConjugateFactory::nonInformative(2, maths_t::E_ContinuousData, 0.0); -// { -// TDoubleVec mean(2, 10.0); -// TDoubleVecVec covariances(2, TDoubleVec(2)); -// covariances[0][0] = covariances[1][1] = 1.0; -// covariances[0][1] = covariances[1][0] = 0.9; -// TDoubleVecVec samples_; -// rng.generateMultivariateNormalSamples(mean, covariances, 50, samples_); -// TDouble10Vec1Vec samples; -// for (std::size_t i = 0u; i < samples_.size(); ++i) -// { -// samples.push_back(samples_[i]); -// } -// TDouble10Vec4Vec1Vec weights(samples.size(), TDouble10Vec4Vec(1, TDouble10Vec(2, 1.0))); -// prior->addSamples(COUNT_WEIGHT, samples, weights); -// } -// -// core_t::TTime times[] = {0, 0}; -// double values[] = {5.0, 5.0}; -// double lb, ub; -// TTail10Vec tail; -// TDouble10Vec1Vec sample(1, TDouble10Vec(&values[0], &values[2])); -// TDouble10Vec4Vec1Vec weight(1, TDouble10Vec4Vec(2, TDouble10Vec(2, 1.0))); -// prior->probabilityOfLessLikelySamples(maths_t::E_TwoSided, -// weightStyle, -// sample, -// weight, -// lb, ub, tail); -// TStrCRefDouble1VecDouble1VecPrPrVec influencerValues; -// influencerValues.push_back(TStrCRefDouble1VecDouble1VecPrPr(TStrCRef(i1), make_pair(5.0, 10.0, 1.0, 1.0))); -// -// TStoredStringPtrStoredStringPtrPrDoublePrVec influences; -// computeInfluences(calculator, -// model_t::E_IndividualUniqueCountByBucketAndPerson, TDecompositionCPtr1Vec(), *prior, -// times, values, weight, counts, -// 0.5*(lb+ub), tail, 0, 0.0/*confidence*/, -// I, influencerValues, influences); -// -// LOG_DEBUG(" influences = " << core::CContainerPrinter::print(influences)); -// CPPUNIT_ASSERT_EQUAL(std::string("[((I, i1), 1)]"), -// core::CContainerPrinter::print(influences)); -// } -// { -// LOG_DEBUG("No trend"); -// -// maths::CMultivariateNormalConjugateFactory::TPriorPtr prior = -// maths::CMultivariateNormalConjugateFactory::nonInformative(2, maths_t::E_ContinuousData, 0.0); -// -// { -// TDoubleVec mean(2, 10.0); -// TDoubleVecVec covariances(2, TDoubleVec(2)); -// covariances[0][0] = covariances[1][1] = 1.0; -// covariances[0][1] = covariances[1][0] = -0.9; -// TDoubleVecVec samples_; -// rng.generateMultivariateNormalSamples(mean, covariances, 50, samples_); -// TDouble10Vec1Vec samples; -// for (std::size_t i = 0u; i < samples_.size(); ++i) -// { -// samples.push_back(samples_[i]); -// } -// TDouble10Vec4Vec1Vec weights(samples.size(), TDouble10Vec4Vec(1, TDouble10Vec(2, 1.0))); -// prior->addSamples(COUNT_WEIGHT, samples, weights); -// } -// -// core_t::TTime times[] = {0, 0}; -// double values[] = {10.0, 6.0}; -// TSize10Vec coordinates(std::size_t(1), 1); -// TDouble10Vec2Vec lbs, ubs; -// TTail10Vec tail; -// TDouble10Vec1Vec sample(1, TDouble10Vec(&values[0], &values[2])); -// TDouble10Vec4Vec1Vec weight(1, TDouble10Vec4Vec(2, TDouble10Vec(2, 1.0))); -// prior->probabilityOfLessLikelySamples(maths_t::E_TwoSided, -// weightStyle, -// sample, -// weight, -// coordinates, -// lbs, ubs, tail); -// double lb = std::sqrt(lbs[0][0] * lbs[1][0]); -// double ub = std::sqrt(ubs[0][0] * ubs[1][0]); -// TStrCRefDouble1VecDouble1VecPrPrVec influencerValues; -// influencerValues.push_back(TStrCRefDouble1VecDouble1VecPrPr(TStrCRef(i1), make_pair(11.0, 9.0, 1.0, 1.0))); -// influencerValues.push_back(TStrCRefDouble1VecDouble1VecPrPr(TStrCRef(i2), make_pair(10.0, 6.0, 1.0, 1.0))); -// influencerValues.push_back(TStrCRefDouble1VecDouble1VecPrPr(TStrCRef(i3), make_pair( 9.0, 6.0, 1.0, 1.0))); -// -// TStoredStringPtrStoredStringPtrPrDoublePrVec influences; -// computeInfluences(calculator, -// model_t::E_IndividualUniqueCountByBucketAndPerson, TDecompositionCPtr1Vec(), *prior, -// times, values, weight, counts, -// 0.5*(lb+ub), tail, coordinates[0], 0.0/*confidence*/, -// I, influencerValues, influences); -// -// LOG_DEBUG(" influences = " << core::CContainerPrinter::print(influences)); -// CPPUNIT_ASSERT_EQUAL(std::string("[((I, i2), 1), ((I, i3), 1)]"), -// core::CContainerPrinter::print(influences)); -// } -// { -// LOG_DEBUG("Trend"); -// -// TDecompositionPtrVec trend; -// trend.push_back(TDecompositionPtr(new maths::CTimeSeriesDecomposition)); -// trend.push_back(TDecompositionPtr(new maths::CTimeSeriesDecomposition)); -// maths::CMultivariateNormalConjugateFactory::TPriorPtr prior = -// maths::CMultivariateNormalConjugateFactory::nonInformative(2, maths_t::E_ContinuousData, 0.0); -// { -// TDoubleVec mean(2, 0.0); -// TDoubleVecVec covariances(2, TDoubleVec(2)); -// covariances[0][0] = covariances[1][1] = 100.0; -// covariances[0][1] = covariances[1][0] = 80.0; -// TDoubleVecVec samples; -// rng.generateMultivariateNormalSamples(mean, covariances, 10 * 86400 / 600, samples); -// TDouble10Vec4Vec1Vec weight(1, TDouble10Vec4Vec(1, TDouble10Vec(2, 1.0))); -// for (core_t::TTime time = 0, i = 0; time < 10 * 86400; time += 600, ++i) -// { -// double y[] = -// { -// 200.0 + 200.0 * std::sin(2.0 * 3.1416 * static_cast(time) / 86400.0), -// 100.0 + 100.0 * std::sin(2.0 * 3.1416 * static_cast(time) / 86400.0) -// }; -// trend[0]->addPoint(time, y[0] + samples[i][0]); -// trend[1]->addPoint(time, y[1] + samples[i][1]); -// prior->addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, TDouble10Vec(samples[i])), weight); -// } -// } -// -// core_t::TTime testTimes[] = {0, 86400 / 4, 86400 / 2, (3 * 86400) / 4}; -// -// TStrCRefDouble1VecDouble1VecPrPrVec influencerValues; -// influencerValues.push_back(TStrCRefDouble1VecDouble1VecPrPr(TStrCRef(i1), make_pair(60.0, 60.0, 1.0, 1.0))); -// influencerValues.push_back(TStrCRefDouble1VecDouble1VecPrPr(TStrCRef(i2), make_pair(50.0, 50.0, 1.0, 1.0))); -// -// std::string expectedInfluencerValues[] = {"i1", "i2" }; -// double expectedInfluences[][2] = -// { -// {1.0, 1.0}, -// {1.0, 1.0}, -// {1.0, 1.0}, -// {1.0, 0.85} -// }; -// -// for (std::size_t i = 0u; i < boost::size(testTimes); ++i) -// { -// core_t::TTime time = testTimes[i]; -// LOG_DEBUG(" time = " << time); -// LOG_DEBUG(" baseline[0] = " << core::CContainerPrinter::print(trend[0]->baseline(time, 0.0))); -// LOG_DEBUG(" baseline[1] = " << core::CContainerPrinter::print(trend[1]->baseline(time, 0.0))); -// -// core_t::TTime times[] = {time, time }; -// double values[] = {120.0, 60.0}; -// double detrended[] = -// { -// trend[0]->detrend(time, values[0], 0.0), -// trend[1]->detrend(time, values[1], 0.0) -// }; -// double vs[] = -// { -// trend[0]->scale(time, prior->marginalLikelihoodVariances()[0], 0.0).second, -// trend[1]->scale(time, prior->marginalLikelihoodVariances()[1], 0.0).second -// }; -// LOG_DEBUG(" detrended = " << core::CContainerPrinter::print(detrended) -// << ", vs = " << core::CContainerPrinter::print(vs)); -// TSize10Vec coordinates(std::size_t(1), i % 2); -// TDouble10Vec2Vec lbs, ubs; -// TTail10Vec tail; -// TDouble10Vec1Vec sample(1, TDouble10Vec(&detrended[0], &detrended[2])); -// TDouble10Vec4Vec1Vec weight(1, TDouble10Vec4Vec(1, TDouble10Vec(&vs[0], &vs[2]))); -// prior->probabilityOfLessLikelySamples(maths_t::E_OneSidedAbove, -// weightStyle, -// sample, -// weight, -// coordinates, -// lbs, ubs, tail); -// double lb = std::sqrt(lbs[0][0] * lbs[1][0]); -// double ub = std::sqrt(ubs[0][0] * ubs[1][0]); -// LOG_DEBUG(" p = " << 0.5*(lb+ub) << ", tail = " << tail); -// -// TStoredStringPtrStoredStringPtrPrDoublePrVec influences; -// TDecompositionCPtr1Vec trends; -// for (TDecompositionPtrVecCItr itr = trend.begin(); itr != trend.end(); ++itr) -// { -// trends.push_back(itr->get()); -// } -// computeInfluences(calculator, -// model_t::E_IndividualHighUniqueCountByBucketAndPerson, -// trends, *prior, -// times, values, weight, counts, -// 0.5*(lb+ub), tail, coordinates[0], 0.0/*confidence*/, -// I, influencerValues, influences); -// -// LOG_DEBUG(" influences = " << core::CContainerPrinter::print(influences)); -// for (std::size_t j = 0u; j < influences.size(); ++j) -// { -// CPPUNIT_ASSERT_EQUAL(expectedInfluencerValues[j], -// *influences[j].first.second); -// CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedInfluences[i][j], influences[j].second, 0.04); -// } -// } -// } -// } + // { + // LOG_DEBUG("Test correlated"); + // + // double counts[] = {1.0, 1.0}; + // + // { + // LOG_DEBUG("One influencer value"); + // + // maths::CMultivariateNormalConjugateFactory::TPriorPtr prior = + // maths::CMultivariateNormalConjugateFactory::nonInformative(2, maths_t::E_ContinuousData, 0.0); + // { + // TDoubleVec mean(2, 10.0); + // TDoubleVecVec covariances(2, TDoubleVec(2)); + // covariances[0][0] = covariances[1][1] = 1.0; + // covariances[0][1] = covariances[1][0] = 0.9; + // TDoubleVecVec samples_; + // rng.generateMultivariateNormalSamples(mean, covariances, 50, samples_); + // TDouble10Vec1Vec samples; + // for (std::size_t i = 0u; i < samples_.size(); ++i) + // { + // samples.push_back(samples_[i]); + // } + // TDouble10Vec4Vec1Vec weights(samples.size(), TDouble10Vec4Vec(1, TDouble10Vec(2, 1.0))); + // prior->addSamples(COUNT_WEIGHT, samples, weights); + // } + // + // core_t::TTime times[] = {0, 0}; + // double values[] = {5.0, 5.0}; + // double lb, ub; + // TTail10Vec tail; + // TDouble10Vec1Vec sample(1, TDouble10Vec(&values[0], &values[2])); + // TDouble10Vec4Vec1Vec weight(1, TDouble10Vec4Vec(2, TDouble10Vec(2, 1.0))); + // prior->probabilityOfLessLikelySamples(maths_t::E_TwoSided, + // weightStyle, + // sample, + // weight, + // lb, ub, tail); + // TStrCRefDouble1VecDouble1VecPrPrVec influencerValues; + // influencerValues.push_back(TStrCRefDouble1VecDouble1VecPrPr(TStrCRef(i1), make_pair(5.0, 10.0, 1.0, 1.0))); + // + // TStoredStringPtrStoredStringPtrPrDoublePrVec influences; + // computeInfluences(calculator, + // model_t::E_IndividualUniqueCountByBucketAndPerson, TDecompositionCPtr1Vec(), *prior, + // times, values, weight, counts, + // 0.5*(lb+ub), tail, 0, 0.0/*confidence*/, + // I, influencerValues, influences); + // + // LOG_DEBUG(" influences = " << core::CContainerPrinter::print(influences)); + // CPPUNIT_ASSERT_EQUAL(std::string("[((I, i1), 1)]"), + // core::CContainerPrinter::print(influences)); + // } + // { + // LOG_DEBUG("No trend"); + // + // maths::CMultivariateNormalConjugateFactory::TPriorPtr prior = + // maths::CMultivariateNormalConjugateFactory::nonInformative(2, maths_t::E_ContinuousData, 0.0); + // + // { + // TDoubleVec mean(2, 10.0); + // TDoubleVecVec covariances(2, TDoubleVec(2)); + // covariances[0][0] = covariances[1][1] = 1.0; + // covariances[0][1] = covariances[1][0] = -0.9; + // TDoubleVecVec samples_; + // rng.generateMultivariateNormalSamples(mean, covariances, 50, samples_); + // TDouble10Vec1Vec samples; + // for (std::size_t i = 0u; i < samples_.size(); ++i) + // { + // samples.push_back(samples_[i]); + // } + // TDouble10Vec4Vec1Vec weights(samples.size(), TDouble10Vec4Vec(1, TDouble10Vec(2, 1.0))); + // prior->addSamples(COUNT_WEIGHT, samples, weights); + // } + // + // core_t::TTime times[] = {0, 0}; + // double values[] = {10.0, 6.0}; + // TSize10Vec coordinates(std::size_t(1), 1); + // TDouble10Vec2Vec lbs, ubs; + // TTail10Vec tail; + // TDouble10Vec1Vec sample(1, TDouble10Vec(&values[0], &values[2])); + // TDouble10Vec4Vec1Vec weight(1, TDouble10Vec4Vec(2, TDouble10Vec(2, 1.0))); + // prior->probabilityOfLessLikelySamples(maths_t::E_TwoSided, + // weightStyle, + // sample, + // weight, + // coordinates, + // lbs, ubs, tail); + // double lb = std::sqrt(lbs[0][0] * lbs[1][0]); + // double ub = std::sqrt(ubs[0][0] * ubs[1][0]); + // TStrCRefDouble1VecDouble1VecPrPrVec influencerValues; + // influencerValues.push_back(TStrCRefDouble1VecDouble1VecPrPr(TStrCRef(i1), make_pair(11.0, 9.0, 1.0, 1.0))); + // influencerValues.push_back(TStrCRefDouble1VecDouble1VecPrPr(TStrCRef(i2), make_pair(10.0, 6.0, 1.0, 1.0))); + // influencerValues.push_back(TStrCRefDouble1VecDouble1VecPrPr(TStrCRef(i3), make_pair( 9.0, 6.0, 1.0, 1.0))); + // + // TStoredStringPtrStoredStringPtrPrDoublePrVec influences; + // computeInfluences(calculator, + // model_t::E_IndividualUniqueCountByBucketAndPerson, TDecompositionCPtr1Vec(), *prior, + // times, values, weight, counts, + // 0.5*(lb+ub), tail, coordinates[0], 0.0/*confidence*/, + // I, influencerValues, influences); + // + // LOG_DEBUG(" influences = " << core::CContainerPrinter::print(influences)); + // CPPUNIT_ASSERT_EQUAL(std::string("[((I, i2), 1), ((I, i3), 1)]"), + // core::CContainerPrinter::print(influences)); + // } + // { + // LOG_DEBUG("Trend"); + // + // TDecompositionPtrVec trend; + // trend.push_back(TDecompositionPtr(new maths::CTimeSeriesDecomposition)); + // trend.push_back(TDecompositionPtr(new maths::CTimeSeriesDecomposition)); + // maths::CMultivariateNormalConjugateFactory::TPriorPtr prior = + // maths::CMultivariateNormalConjugateFactory::nonInformative(2, maths_t::E_ContinuousData, 0.0); + // { + // TDoubleVec mean(2, 0.0); + // TDoubleVecVec covariances(2, TDoubleVec(2)); + // covariances[0][0] = covariances[1][1] = 100.0; + // covariances[0][1] = covariances[1][0] = 80.0; + // TDoubleVecVec samples; + // rng.generateMultivariateNormalSamples(mean, covariances, 10 * 86400 / 600, samples); + // TDouble10Vec4Vec1Vec weight(1, TDouble10Vec4Vec(1, TDouble10Vec(2, 1.0))); + // for (core_t::TTime time = 0, i = 0; time < 10 * 86400; time += 600, ++i) + // { + // double y[] = + // { + // 200.0 + 200.0 * std::sin(2.0 * 3.1416 * static_cast(time) / 86400.0), + // 100.0 + 100.0 * std::sin(2.0 * 3.1416 * static_cast(time) / 86400.0) + // }; + // trend[0]->addPoint(time, y[0] + samples[i][0]); + // trend[1]->addPoint(time, y[1] + samples[i][1]); + // prior->addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, TDouble10Vec(samples[i])), weight); + // } + // } + // + // core_t::TTime testTimes[] = {0, 86400 / 4, 86400 / 2, (3 * 86400) / 4}; + // + // TStrCRefDouble1VecDouble1VecPrPrVec influencerValues; + // influencerValues.push_back(TStrCRefDouble1VecDouble1VecPrPr(TStrCRef(i1), make_pair(60.0, 60.0, 1.0, 1.0))); + // influencerValues.push_back(TStrCRefDouble1VecDouble1VecPrPr(TStrCRef(i2), make_pair(50.0, 50.0, 1.0, 1.0))); + // + // std::string expectedInfluencerValues[] = {"i1", "i2" }; + // double expectedInfluences[][2] = + // { + // {1.0, 1.0}, + // {1.0, 1.0}, + // {1.0, 1.0}, + // {1.0, 0.85} + // }; + // + // for (std::size_t i = 0u; i < boost::size(testTimes); ++i) + // { + // core_t::TTime time = testTimes[i]; + // LOG_DEBUG(" time = " << time); + // LOG_DEBUG(" baseline[0] = " << core::CContainerPrinter::print(trend[0]->baseline(time, 0.0))); + // LOG_DEBUG(" baseline[1] = " << core::CContainerPrinter::print(trend[1]->baseline(time, 0.0))); + // + // core_t::TTime times[] = {time, time }; + // double values[] = {120.0, 60.0}; + // double detrended[] = + // { + // trend[0]->detrend(time, values[0], 0.0), + // trend[1]->detrend(time, values[1], 0.0) + // }; + // double vs[] = + // { + // trend[0]->scale(time, prior->marginalLikelihoodVariances()[0], 0.0).second, + // trend[1]->scale(time, prior->marginalLikelihoodVariances()[1], 0.0).second + // }; + // LOG_DEBUG(" detrended = " << core::CContainerPrinter::print(detrended) + // << ", vs = " << core::CContainerPrinter::print(vs)); + // TSize10Vec coordinates(std::size_t(1), i % 2); + // TDouble10Vec2Vec lbs, ubs; + // TTail10Vec tail; + // TDouble10Vec1Vec sample(1, TDouble10Vec(&detrended[0], &detrended[2])); + // TDouble10Vec4Vec1Vec weight(1, TDouble10Vec4Vec(1, TDouble10Vec(&vs[0], &vs[2]))); + // prior->probabilityOfLessLikelySamples(maths_t::E_OneSidedAbove, + // weightStyle, + // sample, + // weight, + // coordinates, + // lbs, ubs, tail); + // double lb = std::sqrt(lbs[0][0] * lbs[1][0]); + // double ub = std::sqrt(ubs[0][0] * ubs[1][0]); + // LOG_DEBUG(" p = " << 0.5*(lb+ub) << ", tail = " << tail); + // + // TStoredStringPtrStoredStringPtrPrDoublePrVec influences; + // TDecompositionCPtr1Vec trends; + // for (TDecompositionPtrVecCItr itr = trend.begin(); itr != trend.end(); ++itr) + // { + // trends.push_back(itr->get()); + // } + // computeInfluences(calculator, + // model_t::E_IndividualHighUniqueCountByBucketAndPerson, + // trends, *prior, + // times, values, weight, counts, + // 0.5*(lb+ub), tail, coordinates[0], 0.0/*confidence*/, + // I, influencerValues, influences); + // + // LOG_DEBUG(" influences = " << core::CContainerPrinter::print(influences)); + // for (std::size_t j = 0u; j < influences.size(); ++j) + // { + // CPPUNIT_ASSERT_EQUAL(expectedInfluencerValues[j], + // *influences[j].first.second); + // CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedInfluences[i][j], influences[j].second, 0.04); + // } + // } + // } + // } } -void CProbabilityAndInfluenceCalculatorTest::testIndicatorInfluenceCalculator() -{ +void CProbabilityAndInfluenceCalculatorTest::testIndicatorInfluenceCalculator() { LOG_DEBUG("*** testIndicatorInfluenceCalculator ***"); { @@ -1391,24 +1419,27 @@ void CProbabilityAndInfluenceCalculatorTest::testIndicatorInfluenceCalculator() model::CIndicatorInfluenceCalculator calculator; maths::CTimeSeriesDecomposition trend{0.0, 600}; - maths::CNormalMeanPrecConjugate prior = - maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); + maths::CNormalMeanPrecConjugate prior = maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); maths::CUnivariateTimeSeriesModel model(params(600), 0, trend, prior); - TStrCRefDouble1VecDoublePrPrVec influencerValues{{TStrCRef(i1), make_pair(1.0, 1.0)}, - {TStrCRef(i2), make_pair(1.0, 1.0)}, - {TStrCRef(i3), make_pair(1.0, 1.0)}}; + TStrCRefDouble1VecDoublePrPrVec influencerValues{ + {TStrCRef(i1), make_pair(1.0, 1.0)}, {TStrCRef(i2), make_pair(1.0, 1.0)}, {TStrCRef(i3), make_pair(1.0, 1.0)}}; TStoredStringPtrStoredStringPtrPrDoublePrVec influences; computeInfluences(calculator, - model_t::E_IndividualIndicatorOfBucketPerson, model, - 0/*time*/, 1.0/*value*/, 1.0/*count*/, - 0.1/*probability*/, TTail2Vec{maths_t::E_RightTail}, - I, influencerValues, influences); + model_t::E_IndividualIndicatorOfBucketPerson, + model, + 0 /*time*/, + 1.0 /*value*/, + 1.0 /*count*/, + 0.1 /*probability*/, + TTail2Vec{maths_t::E_RightTail}, + I, + influencerValues, + influences); LOG_DEBUG("influences = " << core::CContainerPrinter::print(influences)); - CPPUNIT_ASSERT_EQUAL(std::string("[((I, i1), 1), ((I, i2), 1), ((I, i3), 1)]"), - core::CContainerPrinter::print(influences)); + CPPUNIT_ASSERT_EQUAL(std::string("[((I, i1), 1), ((I, i2), 1), ((I, i3), 1)]"), core::CContainerPrinter::print(influences)); } /*{ LOG_DEBUG("Test correlated"); @@ -1439,8 +1470,7 @@ void CProbabilityAndInfluenceCalculatorTest::testIndicatorInfluenceCalculator() }*/ } -void CProbabilityAndInfluenceCalculatorTest::testProbabilityAndInfluenceCalculator() -{ +void CProbabilityAndInfluenceCalculatorTest::testProbabilityAndInfluenceCalculator() { LOG_DEBUG("*** testProbabilityAndInfluenceCalculator ***"); test::CRandomNumbers rng; @@ -1448,10 +1478,9 @@ void CProbabilityAndInfluenceCalculatorTest::testProbabilityAndInfluenceCalculat core_t::TTime bucketLength{600}; maths::CTimeSeriesDecomposition trend{0.0, bucketLength}; - maths::CNormalMeanPrecConjugate prior = - maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); + maths::CNormalMeanPrecConjugate prior = maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); maths::CMultivariateNormalConjugate<2> multivariatePrior = - maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData); + maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData); maths::CUnivariateTimeSeriesModel univariateModel(params(bucketLength), 0, trend, prior); maths::CMultivariateTimeSeriesModel multivariateModel(params(bucketLength), trend, multivariatePrior); @@ -1465,9 +1494,8 @@ void CProbabilityAndInfluenceCalculatorTest::testProbabilityAndInfluenceCalculat rng.generateMultivariateNormalSamples(mean, covariances, 50, multivariateSamples); core_t::TTime now{addSamples(bucketLength, multivariateSamples, multivariateModel)}; - model_t::TFeatureVec features{model_t::E_IndividualSumByBucketAndPerson, - model_t::E_IndividualMeanLatLongByPerson}; - const maths::CModel *models[]{&univariateModel, &multivariateModel}; + model_t::TFeatureVec features{model_t::E_IndividualSumByBucketAndPerson, model_t::E_IndividualMeanLatLongByPerson}; + const maths::CModel* models[]{&univariateModel, &multivariateModel}; maths_t::TWeightStyleVec weightStyles; weightStyles.push_back(maths_t::E_SampleSeasonalVarianceScaleWeight); @@ -1483,49 +1511,53 @@ void CProbabilityAndInfluenceCalculatorTest::testProbabilityAndInfluenceCalculat TDoubleVecVec values{{12.0, 1.0}, {15.0, 1.0}, - { 7.0, 1.5}, - { 9.0, 1.0}, + {7.0, 1.5}, + {9.0, 1.0}, {17.0, 2.0}, {12.0, 17.0, 1.0}, {15.0, 20.0, 1.0}, - { 7.0, 12.0, 1.5}, + {7.0, 12.0, 1.5}, {15.0, 10.0, 1.0}, {17.0, 22.0, 2.0}}; TStrCRefDouble1VecDoublePrPrVec influencerValues{{TStrCRef(i2), make_pair(12.0, 1.0)}, {TStrCRef(i1), make_pair(15.0, 1.0)}, - {TStrCRef(i2), make_pair( 7.0, 1.0)}, - {TStrCRef(i2), make_pair( 9.0, 1.0)}, + {TStrCRef(i2), make_pair(7.0, 1.0)}, + {TStrCRef(i2), make_pair(9.0, 1.0)}, {TStrCRef(i1), make_pair(17.0, 1.0)}, {TStrCRef(i2), make_pair(12.0, 17.0, 1.0)}, {TStrCRef(i1), make_pair(15.0, 20.0, 1.0)}, - {TStrCRef(i2), make_pair( 7.0, 12.0, 1.0)}, - {TStrCRef(i2), make_pair( 9.0, 14.0, 1.0)}, + {TStrCRef(i2), make_pair(7.0, 12.0, 1.0)}, + {TStrCRef(i2), make_pair(9.0, 14.0, 1.0)}, {TStrCRef(i1), make_pair(17.0, 22.0, 1.0)}}; maths::CJointProbabilityOfLessLikelySamples pJoint; maths::CProbabilityOfExtremeSample pExtreme; - for (std::size_t i = 0u; i < 5; ++i) - { - for (std::size_t j = 0u; j < features.size(); ++j) - { - TDouble2Vec1Vec value{TDouble2Vec(&values[i+5*j][0], &values[i+5*j][1+j])}; - TDouble2Vec4Vec weights{TDouble2Vec(1+j, values[i+5*j][1+j]), - TDouble2Vec(1+j, 1.0)}; + for (std::size_t i = 0u; i < 5; ++i) { + for (std::size_t j = 0u; j < features.size(); ++j) { + TDouble2Vec1Vec value{TDouble2Vec(&values[i + 5 * j][0], &values[i + 5 * j][1 + j])}; + TDouble2Vec4Vec weights{TDouble2Vec(1 + j, values[i + 5 * j][1 + j]), TDouble2Vec(1 + j, 1.0)}; maths::CModelProbabilityParams params_; params_.addCalculation(maths_t::E_TwoSided) - .seasonalConfidenceInterval(0.0) - .addBucketEmpty(TBool2Vec{false}) - .weightStyles(weightStyles) - .addWeights(weights); + .seasonalConfidenceInterval(0.0) + .addBucketEmpty(TBool2Vec{false}) + .weightStyles(weightStyles) + .addWeights(weights); double p; TTail2Vec tail; model_t::CResultType type; TSize1Vec mostAnomalousCorrelate; - calculator.addProbability(features[j], 0, *models[j], - 0/*elapsedTime*/, params_, - TTime2Vec1Vec{TTime2Vec{now}}, value, - p, tail, type, mostAnomalousCorrelate); + calculator.addProbability(features[j], + 0, + *models[j], + 0 /*elapsedTime*/, + params_, + TTime2Vec1Vec{TTime2Vec{now}}, + value, + p, + tail, + type, + mostAnomalousCorrelate); pJoint.add(p); pExtreme.add(p); model::CProbabilityAndInfluenceCalculator::SParams params(partitioningFields); @@ -1554,41 +1586,28 @@ void CProbabilityAndInfluenceCalculatorTest::testProbabilityAndInfluenceCalculat CPPUNIT_ASSERT(pJoint.calculate(pj)); CPPUNIT_ASSERT(pExtreme.calculate(pe)); - LOG_DEBUG(" probability = " << probability - << ", expected probability = " << std::min(pj, pe)); + LOG_DEBUG(" probability = " << probability << ", expected probability = " << std::min(pj, pe)); CPPUNIT_ASSERT_DOUBLES_EQUAL(std::min(pe, pj), probability, 1e-10); } { LOG_DEBUG("influencing joint probability"); - TDoubleVecVec values[]{TDoubleVecVec{{12.0, 1.0}, - {15.0, 1.0}, - { 7.0, 1.5}, - { 9.0, 1.0}, - {17.0, 2.0}}, - TDoubleVecVec{{12.0, 17.0, 1.0}, - {15.0, 20.0, 1.0}, - { 7.0, 12.0, 1.5}, - { 9.0, 14.0, 1.0}, - {17.0, 22.0, 2.0}}}; + TDoubleVecVec values[]{TDoubleVecVec{{12.0, 1.0}, {15.0, 1.0}, {7.0, 1.5}, {9.0, 1.0}, {17.0, 2.0}}, + TDoubleVecVec{{12.0, 17.0, 1.0}, {15.0, 20.0, 1.0}, {7.0, 12.0, 1.5}, {9.0, 14.0, 1.0}, {17.0, 22.0, 2.0}}}; TStrCRefDouble1VecDoublePrPrVecVec influencerValues[]{ - TStrCRefDouble1VecDoublePrPrVecVec{ - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i2), make_pair(12.0, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(15.0, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i2), make_pair( 7.0, 1.5)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i2), make_pair( 9.0, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(17.0, 2.0)}}}, - TStrCRefDouble1VecDoublePrPrVecVec{ - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i2), make_pair(12.0, 17.0, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(15.0, 20.0, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i2), make_pair( 7.0, 12.0, 1.5)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i2), make_pair( 9.0, 14.0, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(17.0, 22.0, 2.0)}}}}; - for (std::size_t i = 0u; i < features.size(); ++i) - { + TStrCRefDouble1VecDoublePrPrVecVec{TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i2), make_pair(12.0, 1.0)}}, + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(15.0, 1.0)}}, + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i2), make_pair(7.0, 1.5)}}, + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i2), make_pair(9.0, 1.0)}}, + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(17.0, 2.0)}}}, + TStrCRefDouble1VecDoublePrPrVecVec{TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i2), make_pair(12.0, 17.0, 1.0)}}, + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(15.0, 20.0, 1.0)}}, + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i2), make_pair(7.0, 12.0, 1.5)}}, + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i2), make_pair(9.0, 14.0, 1.0)}}, + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(17.0, 22.0, 2.0)}}}}; + for (std::size_t i = 0u; i < features.size(); ++i) { TStoredStringPtrStoredStringPtrPrDoublePrVec influences; - testProbabilityAndGetInfluences(features[i], *models[i], now, values[i], - influencerValues[i], influences); + testProbabilityAndGetInfluences(features[i], *models[i], now, values[i], influencerValues[i], influences); LOG_DEBUG(" influences = " << core::CContainerPrinter::print(influences)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), influences.size()); CPPUNIT_ASSERT_EQUAL(i1, *influences[0].first.second); @@ -1598,35 +1617,23 @@ void CProbabilityAndInfluenceCalculatorTest::testProbabilityAndInfluenceCalculat { LOG_DEBUG("influencing extreme probability"); - TDoubleVecVec values[]{TDoubleVecVec{{11.0, 1.0}, - {10.5, 1.0}, - {8.5, 1.5}, - {10.8, 1.5}, - {19.0, 1.0}}, - TDoubleVecVec{{11.0, 16.0, 1.0}, - {10.5, 15.5, 1.0}, - { 8.5, 13.5, 1.5}, - {10.8, 15.8, 1.5}, - {19.0, 24.0, 1.0}}}; + TDoubleVecVec values[]{TDoubleVecVec{{11.0, 1.0}, {10.5, 1.0}, {8.5, 1.5}, {10.8, 1.5}, {19.0, 1.0}}, + TDoubleVecVec{{11.0, 16.0, 1.0}, {10.5, 15.5, 1.0}, {8.5, 13.5, 1.5}, {10.8, 15.8, 1.5}, {19.0, 24.0, 1.0}}}; TStrCRefDouble1VecDoublePrPrVecVec influencerValues[]{ - TStrCRefDouble1VecDoublePrPrVecVec{ - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(11.0, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(10.5, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair( 8.5, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(10.8, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i2), make_pair(19.0, 1.0)}}}, - TStrCRefDouble1VecDoublePrPrVecVec{ - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(11.0, 16.0, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(10.5, 15.5, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair( 8.5, 13.5, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(10.8, 15.8, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i2), make_pair(19.0, 24.0, 1.0)}}}}; - - for (std::size_t i = 0u; i < features.size(); ++i) - { + TStrCRefDouble1VecDoublePrPrVecVec{TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(11.0, 1.0)}}, + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(10.5, 1.0)}}, + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(8.5, 1.0)}}, + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(10.8, 1.0)}}, + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i2), make_pair(19.0, 1.0)}}}, + TStrCRefDouble1VecDoublePrPrVecVec{TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(11.0, 16.0, 1.0)}}, + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(10.5, 15.5, 1.0)}}, + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(8.5, 13.5, 1.0)}}, + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(10.8, 15.8, 1.0)}}, + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i2), make_pair(19.0, 24.0, 1.0)}}}}; + + for (std::size_t i = 0u; i < features.size(); ++i) { TStoredStringPtrStoredStringPtrPrDoublePrVec influences; - testProbabilityAndGetInfluences(features[i], *models[i], now, values[i], - influencerValues[i], influences); + testProbabilityAndGetInfluences(features[i], *models[i], now, values[i], influencerValues[i], influences); LOG_DEBUG(" influences = " << core::CContainerPrinter::print(influences)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), influences.size()); CPPUNIT_ASSERT_EQUAL(i2, *influences[0].first.second); @@ -1636,45 +1643,26 @@ void CProbabilityAndInfluenceCalculatorTest::testProbabilityAndInfluenceCalculat { LOG_DEBUG("marginal influence"); - TDoubleVecVec values[]{TDoubleVecVec{{11.0, 1.0}, - {10.5, 1.0}, - { 8.0, 1.0}, - {10.8, 1.0}, - {14.0, 1.0}}, - TDoubleVecVec{{11.0, 16.0, 1.0}, - {10.5, 15.5, 1.0}, - { 8.0, 13.0, 1.0}, - {10.8, 15.8, 1.0}, - {14.0, 19.0, 1.0}}}; + TDoubleVecVec values[]{TDoubleVecVec{{11.0, 1.0}, {10.5, 1.0}, {8.0, 1.0}, {10.8, 1.0}, {14.0, 1.0}}, + TDoubleVecVec{{11.0, 16.0, 1.0}, {10.5, 15.5, 1.0}, {8.0, 13.0, 1.0}, {10.8, 15.8, 1.0}, {14.0, 19.0, 1.0}}}; TStrCRefDouble1VecDoublePrPrVecVec influencerValues[]{ - TStrCRefDouble1VecDoublePrPrVecVec{ - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(12.0, 1.0)}, - {TStrCRef(i2), make_pair(10.0, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(10.5, 1.0)}, - {TStrCRef(i2), make_pair(10.5, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair( 9.0, 1.0)}, - {TStrCRef(i2), make_pair( 7.0, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(11.0, 1.0)}, - {TStrCRef(i2), make_pair(10.6, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(16.0, 1.0)}, - {TStrCRef(i2), make_pair(12.0, 1.0)}}}, - TStrCRefDouble1VecDoublePrPrVecVec{ - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(12.0, 17.0, 1.0)}, - {TStrCRef(i2), make_pair(10.0, 15.0, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(10.5, 15.5, 1.0)}, - {TStrCRef(i2), make_pair(10.5, 15.5, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair( 9.0, 14.0, 1.0)}, - {TStrCRef(i2), make_pair( 7.0, 12.0, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(11.0, 16.0, 1.0)}, - {TStrCRef(i2), make_pair(10.6, 15.6, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(16.0, 21.0, 1.0)}, - {TStrCRef(i2), make_pair(12.0, 17.0, 1.0)}}}}; + TStrCRefDouble1VecDoublePrPrVecVec{ + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(12.0, 1.0)}, {TStrCRef(i2), make_pair(10.0, 1.0)}}, + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(10.5, 1.0)}, {TStrCRef(i2), make_pair(10.5, 1.0)}}, + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(9.0, 1.0)}, {TStrCRef(i2), make_pair(7.0, 1.0)}}, + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(11.0, 1.0)}, {TStrCRef(i2), make_pair(10.6, 1.0)}}, + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(16.0, 1.0)}, {TStrCRef(i2), make_pair(12.0, 1.0)}}}, + TStrCRefDouble1VecDoublePrPrVecVec{ + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(12.0, 17.0, 1.0)}, {TStrCRef(i2), make_pair(10.0, 15.0, 1.0)}}, + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(10.5, 15.5, 1.0)}, {TStrCRef(i2), make_pair(10.5, 15.5, 1.0)}}, + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(9.0, 14.0, 1.0)}, {TStrCRef(i2), make_pair(7.0, 12.0, 1.0)}}, + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(11.0, 16.0, 1.0)}, {TStrCRef(i2), make_pair(10.6, 15.6, 1.0)}}, + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(16.0, 21.0, 1.0)}, {TStrCRef(i2), make_pair(12.0, 17.0, 1.0)}}}}; { TStoredStringPtrStoredStringPtrPrDoublePrVec influences; - testProbabilityAndGetInfluences(model_t::E_IndividualMeanByPerson, - univariateModel, now, values[0], - influencerValues[0], influences); + testProbabilityAndGetInfluences( + model_t::E_IndividualMeanByPerson, univariateModel, now, values[0], influencerValues[0], influences); LOG_DEBUG(" influences = " << core::CContainerPrinter::print(influences)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), influences.size()); CPPUNIT_ASSERT_EQUAL(i1, *influences[0].first.second); @@ -1682,9 +1670,8 @@ void CProbabilityAndInfluenceCalculatorTest::testProbabilityAndInfluenceCalculat } { TStoredStringPtrStoredStringPtrPrDoublePrVec influences; - testProbabilityAndGetInfluences(model_t::E_IndividualMeanLatLongByPerson, - multivariateModel, now, values[1], - influencerValues[1], influences); + testProbabilityAndGetInfluences( + model_t::E_IndividualMeanLatLongByPerson, multivariateModel, now, values[1], influencerValues[1], influences); LOG_DEBUG(" influences = " << core::CContainerPrinter::print(influences)); CPPUNIT_ASSERT_EQUAL(std::size_t(2), influences.size()); CPPUNIT_ASSERT_EQUAL(i2, *influences[0].first.second); @@ -1695,28 +1682,27 @@ void CProbabilityAndInfluenceCalculatorTest::testProbabilityAndInfluenceCalculat } } -CppUnit::Test *CProbabilityAndInfluenceCalculatorTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CProbabilityAndInfluenceCalculatorTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CProbabilityAndInfluenceCalculatorTest::testInfluenceUnavailableCalculator", - &CProbabilityAndInfluenceCalculatorTest::testInfluenceUnavailableCalculator) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CProbabilityAndInfluenceCalculatorTest::testLogProbabilityComplementInfluenceCalculator", - &CProbabilityAndInfluenceCalculatorTest::testLogProbabilityComplementInfluenceCalculator) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CProbabilityAndInfluenceCalculatorTest::testMeanInfluenceCalculator", - &CProbabilityAndInfluenceCalculatorTest::testMeanInfluenceCalculator) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CProbabilityAndInfluenceCalculatorTest::testLogProbabilityInfluenceCalculator", - &CProbabilityAndInfluenceCalculatorTest::testLogProbabilityInfluenceCalculator) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CProbabilityAndInfluenceCalculatorTest::testIndicatorInfluenceCalculator", - &CProbabilityAndInfluenceCalculatorTest::testIndicatorInfluenceCalculator) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CProbabilityAndInfluenceCalculatorTest::testProbabilityAndInfluenceCalculator", - &CProbabilityAndInfluenceCalculatorTest::testProbabilityAndInfluenceCalculator) ); +CppUnit::Test* CProbabilityAndInfluenceCalculatorTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CProbabilityAndInfluenceCalculatorTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller( + "CProbabilityAndInfluenceCalculatorTest::testInfluenceUnavailableCalculator", + &CProbabilityAndInfluenceCalculatorTest::testInfluenceUnavailableCalculator)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CProbabilityAndInfluenceCalculatorTest::testLogProbabilityComplementInfluenceCalculator", + &CProbabilityAndInfluenceCalculatorTest::testLogProbabilityComplementInfluenceCalculator)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CProbabilityAndInfluenceCalculatorTest::testMeanInfluenceCalculator", + &CProbabilityAndInfluenceCalculatorTest::testMeanInfluenceCalculator)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CProbabilityAndInfluenceCalculatorTest::testLogProbabilityInfluenceCalculator", + &CProbabilityAndInfluenceCalculatorTest::testLogProbabilityInfluenceCalculator)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CProbabilityAndInfluenceCalculatorTest::testIndicatorInfluenceCalculator", + &CProbabilityAndInfluenceCalculatorTest::testIndicatorInfluenceCalculator)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CProbabilityAndInfluenceCalculatorTest::testProbabilityAndInfluenceCalculator", + &CProbabilityAndInfluenceCalculatorTest::testProbabilityAndInfluenceCalculator)); return suiteOfTests; } diff --git a/lib/model/unittest/CProbabilityAndInfluenceCalculatorTest.h b/lib/model/unittest/CProbabilityAndInfluenceCalculatorTest.h index 59374479cd..473c4bb1e2 100644 --- a/lib/model/unittest/CProbabilityAndInfluenceCalculatorTest.h +++ b/lib/model/unittest/CProbabilityAndInfluenceCalculatorTest.h @@ -9,17 +9,16 @@ #include -class CProbabilityAndInfluenceCalculatorTest : public CppUnit::TestFixture -{ - public: - void testInfluenceUnavailableCalculator(); - void testLogProbabilityComplementInfluenceCalculator(); - void testMeanInfluenceCalculator(); - void testLogProbabilityInfluenceCalculator(); - void testIndicatorInfluenceCalculator(); - void testProbabilityAndInfluenceCalculator(); +class CProbabilityAndInfluenceCalculatorTest : public CppUnit::TestFixture { +public: + void testInfluenceUnavailableCalculator(); + void testLogProbabilityComplementInfluenceCalculator(); + void testMeanInfluenceCalculator(); + void testLogProbabilityInfluenceCalculator(); + void testIndicatorInfluenceCalculator(); + void testProbabilityAndInfluenceCalculator(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CProbabilityAndInfluenceCalculatorTest_h diff --git a/lib/model/unittest/CResourceLimitTest.cc b/lib/model/unittest/CResourceLimitTest.cc index 5315a96b0c..e8c69b4409 100644 --- a/lib/model/unittest/CResourceLimitTest.cc +++ b/lib/model/unittest/CResourceLimitTest.cc @@ -35,120 +35,88 @@ using namespace model; using TStrVec = std::vector; -class CResultWriter : public ml::model::CHierarchicalResultsVisitor -{ - public: - using TResultsTp = boost::tuple; - using TResultsVec = std::vector; - - public: - CResultWriter(const CAnomalyDetectorModelConfig &modelConfig, - const CLimits &limits) : - m_ModelConfig(modelConfig), - m_Limits(limits) - { - } +class CResultWriter : public ml::model::CHierarchicalResultsVisitor { +public: + using TResultsTp = boost::tuple; + using TResultsVec = std::vector; + +public: + CResultWriter(const CAnomalyDetectorModelConfig& modelConfig, const CLimits& limits) : m_ModelConfig(modelConfig), m_Limits(limits) {} + + void operator()(CAnomalyDetector& detector, core_t::TTime start, core_t::TTime end) { + CHierarchicalResults results; + detector.buildResults(start, end, results); + results.buildHierarchy(); + CHierarchicalResultsAggregator aggregator(m_ModelConfig); + results.bottomUpBreadthFirst(aggregator); + model::CHierarchicalResultsProbabilityFinalizer finalizer; + results.bottomUpBreadthFirst(finalizer); + model::CHierarchicalResultsPopulator populator(m_Limits); + results.bottomUpBreadthFirst(populator); + results.bottomUpBreadthFirst(*this); + } - void operator()(CAnomalyDetector &detector, - core_t::TTime start, - core_t::TTime end) - { - CHierarchicalResults results; - detector.buildResults(start, end, results); - results.buildHierarchy(); - CHierarchicalResultsAggregator aggregator(m_ModelConfig); - results.bottomUpBreadthFirst(aggregator); - model::CHierarchicalResultsProbabilityFinalizer finalizer; - results.bottomUpBreadthFirst(finalizer); - model::CHierarchicalResultsPopulator populator(m_Limits); - results.bottomUpBreadthFirst(populator); - results.bottomUpBreadthFirst(*this); + virtual void visit(const ml::model::CHierarchicalResults& results, const ml::model::CHierarchicalResults::TNode& node, bool pivot) { + if (pivot) { + return; } - - virtual void visit(const ml::model::CHierarchicalResults &results, - const ml::model::CHierarchicalResults::TNode &node, - bool pivot) - { - if (pivot) - { - return; - } - if (!this->shouldWriteResult(m_Limits, results, node, pivot)) - { - return; - } - if (this->isSimpleCount(node)) - { - return; - } - if (!this->isLeaf(node)) - { - return; - } - - LOG_DEBUG("Got anomaly @ " << node.s_BucketStartTime - << ": " << node.probability()); - - ml::model::SAnnotatedProbability::TAttributeProbability1Vec &attributes = - node.s_AnnotatedProbability.s_AttributeProbabilities; - - m_Results.push_back(TResultsTp(node.s_BucketStartTime, - node.probability(), - (attributes.empty() ? "" : *attributes[0].s_Attribute), - *node.s_Spec.s_PersonFieldValue, - *node.s_Spec.s_PartitionFieldValue)); + if (!this->shouldWriteResult(m_Limits, results, node, pivot)) { + return; + } + if (this->isSimpleCount(node)) { + return; + } + if (!this->isLeaf(node)) { + return; } - bool operator()(ml::core_t::TTime time, - const ml::model::CHierarchicalResults::TNode &node, - bool isBucketInfluencer) - { - LOG_DEBUG((isBucketInfluencer ? "BucketInfluencer" : "Influencer ") - << node.s_Spec.print() << " initial score " << node.probability() - << ", time: " << time); + LOG_DEBUG("Got anomaly @ " << node.s_BucketStartTime << ": " << node.probability()); - return true; - } + ml::model::SAnnotatedProbability::TAttributeProbability1Vec& attributes = node.s_AnnotatedProbability.s_AttributeProbabilities; - const TResultsVec &results() const - { - return m_Results; - } + m_Results.push_back(TResultsTp(node.s_BucketStartTime, + node.probability(), + (attributes.empty() ? "" : *attributes[0].s_Attribute), + *node.s_Spec.s_PersonFieldValue, + *node.s_Spec.s_PartitionFieldValue)); + } - private: - const CAnomalyDetectorModelConfig &m_ModelConfig; - const CLimits &m_Limits; - TResultsVec m_Results; + bool operator()(ml::core_t::TTime time, const ml::model::CHierarchicalResults::TNode& node, bool isBucketInfluencer) { + LOG_DEBUG((isBucketInfluencer ? "BucketInfluencer" : "Influencer ") + << node.s_Spec.print() << " initial score " << node.probability() << ", time: " << time); + + return true; + } + + const TResultsVec& results() const { return m_Results; } + +private: + const CAnomalyDetectorModelConfig& m_ModelConfig; + const CLimits& m_Limits; + TResultsVec m_Results; }; -CppUnit::Test* CResourceLimitTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CResourceLimitTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CResourceLimitTest::testLimitBy", - &CResourceLimitTest::testLimitBy) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CResourceLimitTest::testLimitByOver", - &CResourceLimitTest::testLimitByOver) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CResourceLimitTest::testLargeAllocations", - &CResourceLimitTest::testLargeAllocations) ); +CppUnit::Test* CResourceLimitTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CResourceLimitTest"); + + suiteOfTests->addTest(new CppUnit::TestCaller("CResourceLimitTest::testLimitBy", &CResourceLimitTest::testLimitBy)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CResourceLimitTest::testLimitByOver", &CResourceLimitTest::testLimitByOver)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CResourceLimitTest::testLargeAllocations", &CResourceLimitTest::testLargeAllocations)); return suiteOfTests; } -void CResourceLimitTest::testLimitBy() -{ +void CResourceLimitTest::testLimitBy() { // Check that we can get some results from a test data set, then // turn on resource limiting and still get the same results static const core_t::TTime BUCKET_LENGTH(3600); - static const core_t::TTime FIRST_TIME(maths::CIntegerTools::ceil(core_t::TTime(1407428000), - BUCKET_LENGTH)); + static const core_t::TTime FIRST_TIME(maths::CIntegerTools::ceil(core_t::TTime(1407428000), BUCKET_LENGTH)); ::CResultWriter::TResultsVec results; { @@ -158,7 +126,8 @@ void CResourceLimitTest::testLimitBy() function_t::E_IndividualMetric, false, model_t::E_XF_None, - "value", "colour"); + "value", + "colour"); CAnomalyDetector detector(1, // identifier limits, modelConfig, @@ -192,7 +161,8 @@ void CResourceLimitTest::testLimitBy() function_t::E_IndividualMetric, false, model_t::E_XF_None, - "value", "colour"); + "value", + "colour"); CAnomalyDetector detector(1, // identifier limits, modelConfig, @@ -201,29 +171,22 @@ void CResourceLimitTest::testLimitBy() modelConfig.factory(key)); ::CResultWriter writer(modelConfig, limits); - importCsvDataWithLimiter(FIRST_TIME, - BUCKET_LENGTH, - writer, - "testfiles/resource_limits_8_series.csv", - detector, - 1, - limits.resourceMonitor()); + importCsvDataWithLimiter( + FIRST_TIME, BUCKET_LENGTH, writer, "testfiles/resource_limits_8_series.csv", detector, 1, limits.resourceMonitor()); - const ::CResultWriter::TResultsVec &secondResults = writer.results(); + const ::CResultWriter::TResultsVec& secondResults = writer.results(); CPPUNIT_ASSERT_EQUAL(std::size_t(0), secondResults.size()); } } -void CResourceLimitTest::testLimitByOver() -{ +void CResourceLimitTest::testLimitByOver() { // Check that we can get some results from a test data set, then // turn on resource limiting and still get the results from // non-limited data, but not results from limited data static const core_t::TTime BUCKET_LENGTH(3600); - static const core_t::TTime FIRST_TIME(maths::CIntegerTools::ceil(core_t::TTime(1407441600), - BUCKET_LENGTH)); + static const core_t::TTime FIRST_TIME(maths::CIntegerTools::ceil(core_t::TTime(1407441600), BUCKET_LENGTH)); ::CResultWriter::TResultsVec results; { @@ -233,7 +196,9 @@ void CResourceLimitTest::testLimitByOver() function_t::E_PopulationMetric, false, model_t::E_XF_None, - "value", "colour", "species"); + "value", + "colour", + "species"); CAnomalyDetector detector(1, // identifier limits, modelConfig, @@ -265,7 +230,9 @@ void CResourceLimitTest::testLimitByOver() function_t::E_PopulationMetric, false, model_t::E_XF_None, - "value", "colour", "species"); + "value", + "colour", + "species"); CAnomalyDetector detector(1, // identifier limits, modelConfig, @@ -274,138 +241,102 @@ void CResourceLimitTest::testLimitByOver() modelConfig.factory(key)); ::CResultWriter writer(modelConfig, limits); - importCsvDataWithLimiter(FIRST_TIME, - BUCKET_LENGTH, - writer, - "testfiles/resource_limits_8_2over.csv", - detector, - 1, - limits.resourceMonitor()); + importCsvDataWithLimiter( + FIRST_TIME, BUCKET_LENGTH, writer, "testfiles/resource_limits_8_2over.csv", detector, 1, limits.resourceMonitor()); - const ::CResultWriter::TResultsVec &secondResults = writer.results(); + const ::CResultWriter::TResultsVec& secondResults = writer.results(); // should only have red flowers as results now CPPUNIT_ASSERT_EQUAL(std::size_t(0), secondResults.size()); } -namespace -{ +namespace { //! A test wrapper around a real model that tracks calls to createNewModels //! and simulates taking lots of memory -class CMockEventRateModel : public ml::model::CEventRateModel -{ - public: - CMockEventRateModel(const SModelParams ¶ms, - const TDataGathererPtr &dataGatherer, - const TFeatureMathsModelPtrPrVec &newFeatureModels, - const maths::CMultinomialConjugate &personProbabilityPrior, - const TFeatureInfluenceCalculatorCPtrPrVecVec &influenceCalculators, - CResourceMonitor &resourceMonitor) : - CEventRateModel(params, - dataGatherer, - newFeatureModels, - TFeatureMultivariatePriorPtrPrVec(), - TFeatureCorrelationsPtrPrVec(), - personProbabilityPrior, - influenceCalculators), - m_ResourceMonitor(resourceMonitor), - m_NewPeople(0), - m_NewAttributes(0) - {} - - virtual void updateRecycledModels() - { - // Do nothing - } +class CMockEventRateModel : public ml::model::CEventRateModel { +public: + CMockEventRateModel(const SModelParams& params, + const TDataGathererPtr& dataGatherer, + const TFeatureMathsModelPtrPrVec& newFeatureModels, + const maths::CMultinomialConjugate& personProbabilityPrior, + const TFeatureInfluenceCalculatorCPtrPrVecVec& influenceCalculators, + CResourceMonitor& resourceMonitor) + : CEventRateModel(params, + dataGatherer, + newFeatureModels, + TFeatureMultivariatePriorPtrPrVec(), + TFeatureCorrelationsPtrPrVec(), + personProbabilityPrior, + influenceCalculators), + m_ResourceMonitor(resourceMonitor), + m_NewPeople(0), + m_NewAttributes(0) {} + + virtual void updateRecycledModels() { + // Do nothing + } - virtual void createNewModels(std::size_t n, std::size_t m) - { - m_NewPeople += n; - m_NewAttributes += m; - this->CEventRateModel::createNewModels(n, m); - } + virtual void createNewModels(std::size_t n, std::size_t m) { + m_NewPeople += n; + m_NewAttributes += m; + this->CEventRateModel::createNewModels(n, m); + } - void test(core_t::TTime time) - { - this->createUpdateNewModels(time, m_ResourceMonitor); - } + void test(core_t::TTime time) { this->createUpdateNewModels(time, m_ResourceMonitor); } - std::size_t getNewPeople() const - { - return m_NewPeople; - } + std::size_t getNewPeople() const { return m_NewPeople; } - std::size_t getNewAttributes() const - { - return m_NewAttributes; - } + std::size_t getNewAttributes() const { return m_NewAttributes; } - private: - CResourceMonitor &m_ResourceMonitor; - std::size_t m_NewPeople; - std::size_t m_NewAttributes; +private: + CResourceMonitor& m_ResourceMonitor; + std::size_t m_NewPeople; + std::size_t m_NewAttributes; }; //! A test wrapper around a real model that tracks calls to createNewModels //! and simulates taking lots of memory -class CMockMetricModel : public ml::model::CMetricModel -{ - public: - CMockMetricModel(const SModelParams ¶ms, - const TDataGathererPtr &dataGatherer, - const TFeatureMathsModelPtrPrVec &newFeatureModels, - const TFeatureInfluenceCalculatorCPtrPrVecVec &influenceCalculators, - CResourceMonitor &resourceMonitor) : - CMetricModel(params, - dataGatherer, - newFeatureModels, - TFeatureMultivariatePriorPtrPrVec(), - TFeatureCorrelationsPtrPrVec(), - influenceCalculators), - m_ResourceMonitor(resourceMonitor), - m_NewPeople(0), - m_NewAttributes(0) - {} - - virtual void updateRecycledModels() - { - // Do nothing - } +class CMockMetricModel : public ml::model::CMetricModel { +public: + CMockMetricModel(const SModelParams& params, + const TDataGathererPtr& dataGatherer, + const TFeatureMathsModelPtrPrVec& newFeatureModels, + const TFeatureInfluenceCalculatorCPtrPrVecVec& influenceCalculators, + CResourceMonitor& resourceMonitor) + : CMetricModel(params, + dataGatherer, + newFeatureModels, + TFeatureMultivariatePriorPtrPrVec(), + TFeatureCorrelationsPtrPrVec(), + influenceCalculators), + m_ResourceMonitor(resourceMonitor), + m_NewPeople(0), + m_NewAttributes(0) {} + + virtual void updateRecycledModels() { + // Do nothing + } - virtual void createNewModels(std::size_t n, std::size_t m) - { - m_NewPeople += n; - m_NewAttributes += m; - this->CMetricModel::createNewModels(n, m); - } + virtual void createNewModels(std::size_t n, std::size_t m) { + m_NewPeople += n; + m_NewAttributes += m; + this->CMetricModel::createNewModels(n, m); + } - void test(core_t::TTime time) - { - this->createUpdateNewModels(time, m_ResourceMonitor); - } + void test(core_t::TTime time) { this->createUpdateNewModels(time, m_ResourceMonitor); } - std::size_t getNewPeople() const - { - return m_NewPeople; - } + std::size_t getNewPeople() const { return m_NewPeople; } - std::size_t getNewAttributes() const - { - return m_NewAttributes; - } + std::size_t getNewAttributes() const { return m_NewAttributes; } - private: - CResourceMonitor &m_ResourceMonitor; - std::size_t m_NewPeople; - std::size_t m_NewAttributes; +private: + CResourceMonitor& m_ResourceMonitor; + std::size_t m_NewPeople; + std::size_t m_NewAttributes; }; -void addArrival(core_t::TTime time, - const std::string &p, - CDataGatherer &gatherer, - CResourceMonitor &resourceMonitor) -{ +void addArrival(core_t::TTime time, const std::string& p, CDataGatherer& gatherer, CResourceMonitor& resourceMonitor) { CDataGatherer::TStrCPtrVec fields; fields.push_back(&p); CEventData result; @@ -413,14 +344,8 @@ void addArrival(core_t::TTime time, gatherer.addArrival(fields, result, resourceMonitor); } -void addPersonData(std::size_t start, - std::size_t end, - core_t::TTime time, - CDataGatherer &gatherer, - CResourceMonitor &resourceMonitor) -{ - for (std::size_t i = start; i < end; i++) - { +void addPersonData(std::size_t start, std::size_t end, core_t::TTime time, CDataGatherer& gatherer, CResourceMonitor& resourceMonitor) { + for (std::size_t i = start; i < end; i++) { std::ostringstream ssA; ssA << "person" << i; addArrival(time, ssA.str(), gatherer, resourceMonitor); @@ -429,11 +354,7 @@ void addPersonData(std::size_t start, const std::string VALUE("23"); -void addMetricArrival(core_t::TTime time, - const std::string &p, - CDataGatherer &gatherer, - CResourceMonitor &resourceMonitor) -{ +void addMetricArrival(core_t::TTime time, const std::string& p, CDataGatherer& gatherer, CResourceMonitor& resourceMonitor) { CDataGatherer::TStrCPtrVec fields; fields.push_back(&p); fields.push_back(&VALUE); @@ -445,21 +366,17 @@ void addMetricArrival(core_t::TTime time, void addPersonMetricData(std::size_t start, std::size_t end, core_t::TTime time, - CDataGatherer &gatherer, - CResourceMonitor &resourceMonitor) -{ - for (std::size_t i = start; i < end; i++) - { + CDataGatherer& gatherer, + CResourceMonitor& resourceMonitor) { + for (std::size_t i = start; i < end; i++) { std::ostringstream ssA; ssA << "person" << i; addMetricArrival(time, ssA.str(), gatherer, resourceMonitor); } } - } -void CResourceLimitTest::testLargeAllocations() -{ +void CResourceLimitTest::testLargeAllocations() { { // Test CEventRateModel::createUpdateNewModels() const std::string EMPTY_STRING(""); @@ -476,8 +393,7 @@ void CResourceLimitTest::testLargeAllocations() factory.features(features); CModelFactory::SGathererInitializationData gathererInitData(FIRST_TIME); - CModelFactory::TDataGathererPtr gatherer(dynamic_cast( - factory.makeDataGatherer(gathererInitData))); + CModelFactory::TDataGathererPtr gatherer(dynamic_cast(factory.makeDataGatherer(gathererInitData))); CResourceMonitor resourceMonitor; resourceMonitor.memoryLimit(std::size_t(70)); @@ -609,12 +525,11 @@ void CResourceLimitTest::testLargeAllocations() void CResourceLimitTest::importCsvDataWithLimiter(core_t::TTime firstTime, core_t::TTime bucketLength, - CResultWriter &outputResults, - const std::string &fileName, - CAnomalyDetector &detector, + CResultWriter& outputResults, + const std::string& fileName, + CAnomalyDetector& detector, std::size_t limitCutoff, - CResourceMonitor &resourceMonitor) -{ + CResourceMonitor& resourceMonitor) { using TifstreamPtr = boost::shared_ptr; TifstreamPtr ifs(new std::ifstream(fileName.c_str())); @@ -630,10 +545,8 @@ void CResourceLimitTest::importCsvDataWithLimiter(core_t::TTime firstTime, core_t::TTime lastBucketTime = firstTime; std::size_t i = 0; - while (std::getline(*ifs, line)) - { - if (i == limitCutoff) - { + while (std::getline(*ifs, line)) { + if (i == limitCutoff) { LOG_INFO("Setting Limit cuttoff now"); resourceMonitor.m_ByteLimitHigh = 0; resourceMonitor.m_ByteLimitLow = 0; @@ -646,18 +559,12 @@ void CResourceLimitTest::importCsvDataWithLimiter(core_t::TTime firstTime, core_t::TTime time; CPPUNIT_ASSERT(core::CStringUtils::stringToType(tokens[0], time)); - for (/**/; - lastBucketTime + bucketLength <= time; - lastBucketTime += bucketLength) - { - outputResults(detector, - lastBucketTime, - lastBucketTime + bucketLength); + for (/**/; lastBucketTime + bucketLength <= time; lastBucketTime += bucketLength) { + outputResults(detector, lastBucketTime, lastBucketTime + bucketLength); } CAnomalyDetector::TStrCPtrVec fieldValues; - for (std::size_t t = tokens.size() - 1; t > 0; t--) - { + for (std::size_t t = tokens.size() - 1; t > 0; t--) { fieldValues.push_back(&tokens[t]); } @@ -665,9 +572,7 @@ void CResourceLimitTest::importCsvDataWithLimiter(core_t::TTime firstTime, ++i; } - outputResults(detector, - lastBucketTime, - lastBucketTime + bucketLength); + outputResults(detector, lastBucketTime, lastBucketTime + bucketLength); ifs.reset(); } diff --git a/lib/model/unittest/CResourceLimitTest.h b/lib/model/unittest/CResourceLimitTest.h index be38daf431..e12c6f5a82 100644 --- a/lib/model/unittest/CResourceLimitTest.h +++ b/lib/model/unittest/CResourceLimitTest.h @@ -10,10 +10,8 @@ #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { class CAnomalyDetector; class CResourceMonitor; } @@ -21,24 +19,22 @@ class CResourceMonitor; class CResultWriter; -class CResourceLimitTest : public CppUnit::TestFixture -{ - public: - void testLimitBy(); - void testLimitByOver(); - void testLargeAllocations(); - - static CppUnit::Test *suite(); - - private: - void importCsvDataWithLimiter(ml::core_t::TTime firstTime, - ml::core_t::TTime bucketLength, - CResultWriter &outputResults, - const std::string &fileName, - ml::model::CAnomalyDetector &detector, - std::size_t limitCutoff, - ml::model::CResourceMonitor &resourceMonitor); +class CResourceLimitTest : public CppUnit::TestFixture { +public: + void testLimitBy(); + void testLimitByOver(); + void testLargeAllocations(); + + static CppUnit::Test* suite(); + +private: + void importCsvDataWithLimiter(ml::core_t::TTime firstTime, + ml::core_t::TTime bucketLength, + CResultWriter& outputResults, + const std::string& fileName, + ml::model::CAnomalyDetector& detector, + std::size_t limitCutoff, + ml::model::CResourceMonitor& resourceMonitor); }; #endif // INCLUDED_CResourceLimitTest_h - diff --git a/lib/model/unittest/CResourceMonitorTest.cc b/lib/model/unittest/CResourceMonitorTest.cc index 53d4997098..fedc887f00 100644 --- a/lib/model/unittest/CResourceMonitorTest.cc +++ b/lib/model/unittest/CResourceMonitorTest.cc @@ -5,7 +5,6 @@ */ #include "CResourceMonitorTest.h" - #include #include #include @@ -19,33 +18,26 @@ using namespace ml; using namespace model; +CppUnit::Test* CResourceMonitorTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CResourceMonitorTest"); -CppUnit::Test *CResourceMonitorTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CResourceMonitorTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CResourceMonitorTest::testMonitor", - &CResourceMonitorTest::testMonitor) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CResourceMonitorTest::testPruning", - &CResourceMonitorTest::testPruning) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CResourceMonitorTest::testExtraMemory", - &CResourceMonitorTest::testExtraMemory) ); + suiteOfTests->addTest( + new CppUnit::TestCaller("CResourceMonitorTest::testMonitor", &CResourceMonitorTest::testMonitor)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CResourceMonitorTest::testPruning", &CResourceMonitorTest::testPruning)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CResourceMonitorTest::testExtraMemory", &CResourceMonitorTest::testExtraMemory)); return suiteOfTests; } -void CResourceMonitorTest::setUp() -{ +void CResourceMonitorTest::setUp() { // Other test suites also use the string store, and it will mess up the // tests in this suite if the string store is not empty when they start CStringStore::names().clearEverythingTestOnly(); CStringStore::influencers().clearEverythingTestOnly(); } -void CResourceMonitorTest::testMonitor() -{ +void CResourceMonitorTest::testMonitor() { const std::string EMPTY_STRING; const core_t::TTime FIRST_TIME(358556400); const core_t::TTime BUCKET_LENGTH(3600); @@ -57,7 +49,8 @@ void CResourceMonitorTest::testMonitor() function_t::E_IndividualMetric, false, model_t::E_XF_None, - "value", "colour"); + "value", + "colour"); CAnomalyDetector detector1(1, // identifier limits, @@ -73,8 +66,8 @@ void CResourceMonitorTest::testMonitor() FIRST_TIME, modelConfig.factory(key)); - std::size_t mem = detector1.memoryUsage() + detector2.memoryUsage() + - CStringStore::names().memoryUsage() + CStringStore::influencers().memoryUsage(); + std::size_t mem = + detector1.memoryUsage() + detector2.memoryUsage() + CStringStore::names().memoryUsage() + CStringStore::influencers().memoryUsage(); { // Test default constructor @@ -84,18 +77,13 @@ void CResourceMonitorTest::testMonitor() CPPUNIT_ASSERT(mon.m_ByteLimitHigh > mon.m_ByteLimitLow); CPPUNIT_ASSERT(mon.m_AllowAllocations); LOG_DEBUG("Resource limit is: " << mon.m_ByteLimitHigh); - if (sizeof(std::size_t) == 4) - { + if (sizeof(std::size_t) == 4) { // 32-bit platform CPPUNIT_ASSERT_EQUAL(std::size_t(1024ull * 1024 * 1024 / 2), mon.m_ByteLimitHigh); - } - else if (sizeof(std::size_t) == 8) - { + } else if (sizeof(std::size_t) == 8) { // 64-bit platform CPPUNIT_ASSERT_EQUAL(std::size_t(4096ull * 1024 * 1024 / 2), mon.m_ByteLimitHigh); - } - else - { + } else { // Unexpected platform CPPUNIT_ASSERT(false); } @@ -142,7 +130,7 @@ void CResourceMonitorTest::testMonitor() { // Check that High limit can be breached and then gone back CResourceMonitor mon; - CPPUNIT_ASSERT(mem > 5); // This SHOULD be OK + CPPUNIT_ASSERT(mem > 5); // This SHOULD be OK // Let's go above the low but below the high limit mon.m_ByteLimitHigh = mem + 1; @@ -274,8 +262,7 @@ void CResourceMonitorTest::testMonitor() { // Test the need to report usage based on a change in levels, up and down CResourceMonitor mon; - mon.memoryUsageReporter( - boost::bind(&CResourceMonitorTest::reportCallback, this, _1)); + mon.memoryUsageReporter(boost::bind(&CResourceMonitorTest::reportCallback, this, _1)); CPPUNIT_ASSERT(!mon.needToSendReport()); std::size_t origTotalMemory = mon.totalMemory(); @@ -317,23 +304,22 @@ void CResourceMonitorTest::testMonitor() } } -void CResourceMonitorTest::testPruning() -{ +void CResourceMonitorTest::testPruning() { const std::string EMPTY_STRING; const core_t::TTime FIRST_TIME(358556400); const core_t::TTime BUCKET_LENGTH(3600); - CAnomalyDetectorModelConfig modelConfig = - CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); + CAnomalyDetectorModelConfig modelConfig = CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); CLimits limits; CSearchKey key(1, // identifier function_t::E_IndividualMetric, false, model_t::E_XF_None, - "value", "colour"); + "value", + "colour"); - CResourceMonitor &monitor = limits.resourceMonitor(); + CResourceMonitor& monitor = limits.resourceMonitor(); monitor.memoryLimit(140); CAnomalyDetector detector(1, // identifier @@ -390,23 +376,22 @@ void CResourceMonitorTest::testPruning() CPPUNIT_ASSERT(monitor.m_PruneWindow > level); } -void CResourceMonitorTest::testExtraMemory() -{ +void CResourceMonitorTest::testExtraMemory() { const std::string EMPTY_STRING; const core_t::TTime FIRST_TIME(358556400); const core_t::TTime BUCKET_LENGTH(3600); - CAnomalyDetectorModelConfig modelConfig = - CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); + CAnomalyDetectorModelConfig modelConfig = CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); CLimits limits; CSearchKey key(1, // identifier function_t::E_IndividualMetric, false, model_t::E_XF_None, - "value", "colour"); + "value", + "colour"); - CResourceMonitor &monitor = limits.resourceMonitor(); + CResourceMonitor& monitor = limits.resourceMonitor(); // set the limit to 1 MB monitor.memoryLimit(1); @@ -440,14 +425,13 @@ void CResourceMonitorTest::testExtraMemory() CPPUNIT_ASSERT_EQUAL(allocationLimit, monitor.allocationLimit()); } -void CResourceMonitorTest::addTestData(core_t::TTime &firstTime, +void CResourceMonitorTest::addTestData(core_t::TTime& firstTime, const core_t::TTime bucketLength, const std::size_t buckets, const std::size_t newPeoplePerBucket, - std::size_t &startOffset, - CAnomalyDetector &detector, - CResourceMonitor &monitor) -{ + std::size_t& startOffset, + CAnomalyDetector& detector, + CResourceMonitor& monitor) { std::string numberValue("100"); core_t::TTime bucketStart = firstTime; CHierarchicalResults results; @@ -455,29 +439,24 @@ void CResourceMonitorTest::addTestData(core_t::TTime &firstTime, std::size_t numBuckets = 0; - for (core_t::TTime time = firstTime; - time < static_cast(firstTime + bucketLength * buckets); - time += (bucketLength / std::max(std::size_t(1), newPeoplePerBucket))) - { + for (core_t::TTime time = firstTime; time < static_cast(firstTime + bucketLength * buckets); + time += (bucketLength / std::max(std::size_t(1), newPeoplePerBucket))) { bool newBucket = false; - for (; bucketStart + bucketLength <= time; bucketStart += bucketLength) - { + for (; bucketStart + bucketLength <= time; bucketStart += bucketLength) { detector.buildResults(bucketStart, bucketStart + bucketLength, results); monitor.pruneIfRequired(bucketStart); numBuckets++; newBucket = true; } - if (newBucket) - { + if (newBucket) { CAnomalyDetector::TStrCPtrVec fieldValues; fieldValues.push_back(&pervasive); fieldValues.push_back(&numberValue); detector.addRecord(time, fieldValues); } - if (newPeoplePerBucket > 0) - { + if (newPeoplePerBucket > 0) { CAnomalyDetector::TStrCPtrVec fieldValues; std::ostringstream ss1; ss1 << "person" << startOffset++; @@ -491,8 +470,6 @@ void CResourceMonitorTest::addTestData(core_t::TTime &firstTime, firstTime = bucketStart; } -void CResourceMonitorTest::reportCallback(const CResourceMonitor::SResults &results) -{ +void CResourceMonitorTest::reportCallback(const CResourceMonitor::SResults& results) { m_CallbackResults = results; } - diff --git a/lib/model/unittest/CResourceMonitorTest.h b/lib/model/unittest/CResourceMonitorTest.h index 8e2bfc1cf2..f80f26e938 100644 --- a/lib/model/unittest/CResourceMonitorTest.h +++ b/lib/model/unittest/CResourceMonitorTest.h @@ -10,35 +10,35 @@ #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { class CAnomalyDetector; } } -class CResourceMonitorTest : public CppUnit::TestFixture -{ - public: - void setUp(); +class CResourceMonitorTest : public CppUnit::TestFixture { +public: + void setUp(); - void testMonitor(); - void testPruning(); - void testExtraMemory(); + void testMonitor(); + void testPruning(); + void testExtraMemory(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); - private: - void reportCallback(const ml::model::CResourceMonitor::SResults &results); +private: + void reportCallback(const ml::model::CResourceMonitor::SResults& results); - void addTestData(ml::core_t::TTime &firstTime, const ml::core_t::TTime bucketLength, - const std::size_t buckets, const std::size_t newPeoplePerBucket, - std::size_t &startOffset, ml::model::CAnomalyDetector &detector, - ml::model::CResourceMonitor &monitor); + void addTestData(ml::core_t::TTime& firstTime, + const ml::core_t::TTime bucketLength, + const std::size_t buckets, + const std::size_t newPeoplePerBucket, + std::size_t& startOffset, + ml::model::CAnomalyDetector& detector, + ml::model::CResourceMonitor& monitor); - private: - ml::model::CResourceMonitor::SResults m_CallbackResults; +private: + ml::model::CResourceMonitor::SResults m_CallbackResults; }; #endif // INCLUDED_CResourceMonitorTest_h diff --git a/lib/model/unittest/CRuleConditionTest.cc b/lib/model/unittest/CRuleConditionTest.cc index 5c20946460..31f998bdfc 100644 --- a/lib/model/unittest/CRuleConditionTest.cc +++ b/lib/model/unittest/CRuleConditionTest.cc @@ -23,29 +23,23 @@ using namespace ml; using namespace model; -namespace -{ +namespace { using TStrVec = std::vector; const std::string EMPTY_STRING; - } +CppUnit::Test* CRuleConditionTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CRuleConditionTest"); -CppUnit::Test *CRuleConditionTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CRuleConditionTest"); - - suiteOfTests->addTest(new CppUnit::TestCaller( - "CRuleConditionTest::testTimeContition", - &CRuleConditionTest::testTimeContition)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CRuleConditionTest::testTimeContition", &CRuleConditionTest::testTimeContition)); return suiteOfTests; } -void CRuleConditionTest::testTimeContition() -{ +void CRuleConditionTest::testTimeContition() { core_t::TTime bucketLength = 100; core_t::TTime startTime = 100; CSearchKey key; @@ -54,10 +48,21 @@ void CRuleConditionTest::testTimeContition() model_t::TFeatureVec features; features.push_back(model_t::E_IndividualMeanByPerson); - CAnomalyDetectorModel::TDataGathererPtr gathererPtr( - new CDataGatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, key, features, startTime, 0)); + CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer(model_t::E_Metric, + model_t::E_None, + params, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + EMPTY_STRING, + TStrVec(), + false, + key, + features, + startTime, + 0)); CMockModel model(params, gathererPtr, influenceCalculators); @@ -71,10 +76,15 @@ void CRuleConditionTest::testTimeContition() CPPUNIT_ASSERT(condition.isCategorical() == false); model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(condition.test(model, model_t::E_IndividualCountByBucketAndPerson, resultType, false, - std::size_t(0), std::size_t(1), core_t::TTime(450)) == false); - CPPUNIT_ASSERT(condition.test(model, model_t::E_IndividualCountByBucketAndPerson, resultType, false, - std::size_t(0), std::size_t(1), core_t::TTime(550))); + CPPUNIT_ASSERT(condition.test(model, + model_t::E_IndividualCountByBucketAndPerson, + resultType, + false, + std::size_t(0), + std::size_t(1), + core_t::TTime(450)) == false); + CPPUNIT_ASSERT(condition.test( + model, model_t::E_IndividualCountByBucketAndPerson, resultType, false, std::size_t(0), std::size_t(1), core_t::TTime(550))); } { @@ -87,9 +97,14 @@ void CRuleConditionTest::testTimeContition() CPPUNIT_ASSERT(condition.isCategorical() == false); model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(condition.test(model, model_t::E_IndividualCountByBucketAndPerson, resultType, false, - std::size_t(0), std::size_t(1), core_t::TTime(600)) == false); - CPPUNIT_ASSERT(condition.test(model, model_t::E_IndividualCountByBucketAndPerson, resultType, false, - std::size_t(0), std::size_t(1), core_t::TTime(599))); + CPPUNIT_ASSERT(condition.test(model, + model_t::E_IndividualCountByBucketAndPerson, + resultType, + false, + std::size_t(0), + std::size_t(1), + core_t::TTime(600)) == false); + CPPUNIT_ASSERT(condition.test( + model, model_t::E_IndividualCountByBucketAndPerson, resultType, false, std::size_t(0), std::size_t(1), core_t::TTime(599))); } } diff --git a/lib/model/unittest/CRuleConditionTest.h b/lib/model/unittest/CRuleConditionTest.h index 7fc55092a1..71657d6834 100644 --- a/lib/model/unittest/CRuleConditionTest.h +++ b/lib/model/unittest/CRuleConditionTest.h @@ -8,13 +8,11 @@ #include +class CRuleConditionTest : public CppUnit::TestFixture { +public: + void testTimeContition(); -class CRuleConditionTest : public CppUnit::TestFixture -{ - public: - void testTimeContition(); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CRuleConditionTest_h diff --git a/lib/model/unittest/CSampleQueueTest.cc b/lib/model/unittest/CSampleQueueTest.cc index e20b3989aa..2dcf62d7df 100644 --- a/lib/model/unittest/CSampleQueueTest.cc +++ b/lib/model/unittest/CSampleQueueTest.cc @@ -32,15 +32,13 @@ using TSampleVec = std::vector; using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; using TTestSampleQueue = CSampleQueue; -void CSampleQueueTest::testSampleToString() -{ +void CSampleQueueTest::testSampleToString() { CSample sample(10, {3.0}, 0.8, 1.0); CPPUNIT_ASSERT_EQUAL(std::string("10;8e-1;1;3"), CSample::SToString()(sample)); } -void CSampleQueueTest::testSampleFromString() -{ +void CSampleQueueTest::testSampleFromString() { CSample sample; CPPUNIT_ASSERT(CSample::SFromString()("15;7e-1;3;2.0", sample)); @@ -51,8 +49,7 @@ void CSampleQueueTest::testSampleFromString() CPPUNIT_ASSERT_EQUAL(3.0, sample.count()); } -void CSampleQueueTest::testAddGivenQueueIsEmptyShouldCreateNewSubSample() -{ +void CSampleQueueTest::testAddGivenQueueIsEmptyShouldCreateNewSubSample() { std::size_t sampleCountFactor(2); std::size_t latencyBuckets(5); double growthFactor(0.1); @@ -70,8 +67,7 @@ void CSampleQueueTest::testAddGivenQueueIsEmptyShouldCreateNewSubSample() CPPUNIT_ASSERT_EQUAL(1.0, queue[0].s_Statistic.count()); } -void CSampleQueueTest::testAddGivenQueueIsFullShouldResize() -{ +void CSampleQueueTest::testAddGivenQueueIsFullShouldResize() { std::size_t sampleCountFactor(1); std::size_t latencyBuckets(1); double growthFactor(0.5); @@ -108,8 +104,7 @@ void CSampleQueueTest::testAddGivenQueueIsFullShouldResize() CPPUNIT_ASSERT_EQUAL(std::size_t(9), queue.capacity()); } -void CSampleQueueTest::testAddGivenTimeIsInOrderAndCloseToNonFullLatestSubSample() -{ +void CSampleQueueTest::testAddGivenTimeIsInOrderAndCloseToNonFullLatestSubSample() { std::size_t sampleCountFactor(2); std::size_t latencyBuckets(5); double growthFactor(0.1); @@ -128,8 +123,7 @@ void CSampleQueueTest::testAddGivenTimeIsInOrderAndCloseToNonFullLatestSubSample CPPUNIT_ASSERT_EQUAL(3.0, queue[0].s_Statistic.count()); } -void CSampleQueueTest::testAddGivenTimeIsInOrderAndCloseToNonFullLatestSubSampleButDifferentBucket() -{ +void CSampleQueueTest::testAddGivenTimeIsInOrderAndCloseToNonFullLatestSubSampleButDifferentBucket() { std::size_t sampleCountFactor(2); std::size_t latencyBuckets(5); double growthFactor(0.1); @@ -158,8 +152,7 @@ void CSampleQueueTest::testAddGivenTimeIsInOrderAndCloseToNonFullLatestSubSample CPPUNIT_ASSERT_EQUAL(core_t::TTime(10), queue.latestEnd()); } -void CSampleQueueTest::testAddGivenTimeIsInOrderAndCloseToFullLatestSubSample() -{ +void CSampleQueueTest::testAddGivenTimeIsInOrderAndCloseToFullLatestSubSample() { std::size_t sampleCountFactor(2); std::size_t latencyBuckets(5); double growthFactor(0.1); @@ -185,8 +178,7 @@ void CSampleQueueTest::testAddGivenTimeIsInOrderAndCloseToFullLatestSubSample() CPPUNIT_ASSERT_EQUAL(5.0, queue[1].s_Statistic.count()); } -void CSampleQueueTest::testAddGivenTimeIsInOrderAndFarFromLatestSubSample() -{ +void CSampleQueueTest::testAddGivenTimeIsInOrderAndFarFromLatestSubSample() { std::size_t sampleCountFactor(2); std::size_t latencyBuckets(5); double growthFactor(0.1); @@ -212,8 +204,7 @@ void CSampleQueueTest::testAddGivenTimeIsInOrderAndFarFromLatestSubSample() CPPUNIT_ASSERT_EQUAL(1.0, queue[1].s_Statistic.count()); } -void CSampleQueueTest::testAddGivenTimeIsWithinFullLatestSubSample() -{ +void CSampleQueueTest::testAddGivenTimeIsWithinFullLatestSubSample() { std::size_t sampleCountFactor(2); std::size_t latencyBuckets(5); double growthFactor(0.1); @@ -234,8 +225,7 @@ void CSampleQueueTest::testAddGivenTimeIsWithinFullLatestSubSample() CPPUNIT_ASSERT_EQUAL(6.0, queue[0].s_Statistic.count()); } -void CSampleQueueTest::testAddGivenTimeIsHistoricalAndFarBeforeEarliestSubSample() -{ +void CSampleQueueTest::testAddGivenTimeIsHistoricalAndFarBeforeEarliestSubSample() { std::size_t sampleCountFactor(2); std::size_t latencyBuckets(5); double growthFactor(0.1); @@ -256,8 +246,7 @@ void CSampleQueueTest::testAddGivenTimeIsHistoricalAndFarBeforeEarliestSubSample CPPUNIT_ASSERT_EQUAL(1.0, queue[2].s_Statistic.count()); } -void CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloseBeforeFullEarliestSubSample() -{ +void CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloseBeforeFullEarliestSubSample() { std::size_t sampleCountFactor(2); std::size_t latencyBuckets(5); double growthFactor(0.1); @@ -278,8 +267,7 @@ void CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloseBeforeFullEarliestSub CPPUNIT_ASSERT_EQUAL(1.0, queue[2].s_Statistic.count()); } -void CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloseBeforeNonFullEarliestSubSample() -{ +void CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloseBeforeNonFullEarliestSubSample() { std::size_t sampleCountFactor(2); std::size_t latencyBuckets(5); double growthFactor(0.1); @@ -300,8 +288,7 @@ void CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloseBeforeNonFullEarliest CPPUNIT_ASSERT_EQUAL(5.0, queue[1].s_Statistic.count()); } -void CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloseBeforeNonFullEarliestSubSampleButDifferentBucket() -{ +void CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloseBeforeNonFullEarliestSubSampleButDifferentBucket() { std::size_t sampleCountFactor(2); std::size_t latencyBuckets(5); double growthFactor(0.1); @@ -327,8 +314,7 @@ void CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloseBeforeNonFullEarliest CPPUNIT_ASSERT_EQUAL(1.0, queue[1].s_Statistic.count()); } -void CSampleQueueTest::testAddGivenTimeIsHistoricalAndWithinSomeSubSample() -{ +void CSampleQueueTest::testAddGivenTimeIsHistoricalAndWithinSomeSubSample() { std::size_t sampleCountFactor(2); std::size_t latencyBuckets(5); double growthFactor(0.1); @@ -352,8 +338,7 @@ void CSampleQueueTest::testAddGivenTimeIsHistoricalAndWithinSomeSubSample() CPPUNIT_ASSERT_EQUAL(3.0, queue[1].s_Statistic.count()); } -void CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToSubSampleBeforeLatest() -{ +void CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToSubSampleBeforeLatest() { std::size_t sampleCountFactor(2); std::size_t latencyBuckets(5); double growthFactor(0.1); @@ -377,8 +362,7 @@ void CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToSubSampleBeforeLat CPPUNIT_ASSERT_EQUAL(2.0, queue[1].s_Statistic.count()); } -void CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToSubSampleBeforeLatestButDifferentBucket() -{ +void CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToSubSampleBeforeLatestButDifferentBucket() { std::size_t sampleCountFactor(2); std::size_t latencyBuckets(5); double growthFactor(0.1); @@ -395,8 +379,7 @@ void CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToSubSampleBeforeLat CPPUNIT_ASSERT_EQUAL(std::size_t(4), queue.size()); } -void CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToPreviousOfNonFullSubSamples() -{ +void CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToPreviousOfNonFullSubSamples() { std::size_t sampleCountFactor(2); std::size_t latencyBuckets(5); double growthFactor(0.1); @@ -419,8 +402,7 @@ void CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToPreviousOfNonFullS CPPUNIT_ASSERT_EQUAL(2.0, queue[2].s_Statistic.count()); } -void CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToNextOfNonFullSubSamples() -{ +void CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToNextOfNonFullSubSamples() { std::size_t sampleCountFactor(2); std::size_t latencyBuckets(5); double growthFactor(0.1); @@ -443,8 +425,7 @@ void CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToNextOfNonFullSubSa CPPUNIT_ASSERT_EQUAL(2.0, queue[1].s_Statistic.count()); } -void CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToPreviousOfFullSubSamples() -{ +void CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToPreviousOfFullSubSamples() { std::size_t sampleCountFactor(2); std::size_t latencyBuckets(5); double growthFactor(0.1); @@ -467,8 +448,7 @@ void CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToPreviousOfFullSubS CPPUNIT_ASSERT_EQUAL(6.0, queue[2].s_Statistic.count()); } -void CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToNextOfFullSubSamples() -{ +void CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToNextOfFullSubSamples() { std::size_t sampleCountFactor(2); std::size_t latencyBuckets(5); double growthFactor(0.1); @@ -491,8 +471,7 @@ void CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToNextOfFullSubSampl CPPUNIT_ASSERT_EQUAL(6.0, queue[1].s_Statistic.count()); } -void CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToPreviousSubSampleButOnlyNextHasSpace() -{ +void CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToPreviousSubSampleButOnlyNextHasSpace() { std::size_t sampleCountFactor(2); std::size_t latencyBuckets(5); double growthFactor(0.1); @@ -515,8 +494,7 @@ void CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToPreviousSubSampleB CPPUNIT_ASSERT_EQUAL(2.0, queue[1].s_Statistic.count()); } -void CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToNextSubSampleButOnlyPreviousHasSpace() -{ +void CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToNextSubSampleButOnlyPreviousHasSpace() { std::size_t sampleCountFactor(2); std::size_t latencyBuckets(5); double growthFactor(0.1); @@ -539,8 +517,7 @@ void CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToNextSubSampleButOn CPPUNIT_ASSERT_EQUAL(2.0, queue[2].s_Statistic.count()); } -void CSampleQueueTest::testAddGivenTimeIsHistoricalAndFallsInBigEnoughGap() -{ +void CSampleQueueTest::testAddGivenTimeIsHistoricalAndFallsInBigEnoughGap() { std::size_t sampleCountFactor(2); std::size_t latencyBuckets(5); double growthFactor(0.1); @@ -562,8 +539,7 @@ void CSampleQueueTest::testAddGivenTimeIsHistoricalAndFallsInBigEnoughGap() CPPUNIT_ASSERT_EQUAL(1.0, queue[1].s_Statistic.count()); } -void CSampleQueueTest::testAddGivenTimeIsHistoricalAndFallsInTooSmallGap() -{ +void CSampleQueueTest::testAddGivenTimeIsHistoricalAndFallsInTooSmallGap() { std::size_t sampleCountFactor(2); std::size_t latencyBuckets(5); double growthFactor(0.1); @@ -586,8 +562,7 @@ void CSampleQueueTest::testAddGivenTimeIsHistoricalAndFallsInTooSmallGap() CPPUNIT_ASSERT_EQUAL(3.0, queue[1].s_Statistic.count()); } -void CSampleQueueTest::testCanSampleGivenEmptyQueue() -{ +void CSampleQueueTest::testCanSampleGivenEmptyQueue() { std::size_t sampleCountFactor(2); std::size_t latencyBuckets(2); double growthFactor(0.1); @@ -597,8 +572,7 @@ void CSampleQueueTest::testCanSampleGivenEmptyQueue() CPPUNIT_ASSERT(queue.canSample(42) == false); } -void CSampleQueueTest::testCanSample() -{ +void CSampleQueueTest::testCanSample() { std::size_t sampleCountFactor(2); std::size_t latencyBuckets(2); double growthFactor(0.1); @@ -616,8 +590,7 @@ void CSampleQueueTest::testCanSample() CPPUNIT_ASSERT(queue.canSample(40)); } -void CSampleQueueTest::testSampleGivenExactlyOneSampleOfExactCountToBeCreated() -{ +void CSampleQueueTest::testSampleGivenExactlyOneSampleOfExactCountToBeCreated() { std::size_t sampleCountFactor(2); std::size_t latencyBuckets(2); double growthFactor(0.1); @@ -645,8 +618,7 @@ void CSampleQueueTest::testSampleGivenExactlyOneSampleOfExactCountToBeCreated() CPPUNIT_ASSERT_EQUAL(1.0, queue[0].s_Statistic.count()); } -void CSampleQueueTest::testSampleGivenExactlyOneSampleOfOverCountToBeCreated() -{ +void CSampleQueueTest::testSampleGivenExactlyOneSampleOfOverCountToBeCreated() { std::size_t sampleCountFactor(2); std::size_t latencyBuckets(2); double growthFactor(0.1); @@ -675,8 +647,7 @@ void CSampleQueueTest::testSampleGivenExactlyOneSampleOfOverCountToBeCreated() CPPUNIT_ASSERT_EQUAL(1.0, queue[0].s_Statistic.count()); } -void CSampleQueueTest::testSampleGivenOneSampleToBeCreatedAndRemainder() -{ +void CSampleQueueTest::testSampleGivenOneSampleToBeCreatedAndRemainder() { std::size_t sampleCountFactor(2); std::size_t latencyBuckets(2); double growthFactor(0.1); @@ -713,8 +684,7 @@ void CSampleQueueTest::testSampleGivenOneSampleToBeCreatedAndRemainder() CPPUNIT_ASSERT_EQUAL(1.0, queue[0].s_Statistic.count()); } -void CSampleQueueTest::testSampleGivenTwoSamplesToBeCreatedAndRemainder() -{ +void CSampleQueueTest::testSampleGivenTwoSamplesToBeCreatedAndRemainder() { std::size_t sampleCountFactor(2); std::size_t latencyBuckets(2); double growthFactor(0.1); @@ -757,8 +727,7 @@ void CSampleQueueTest::testSampleGivenTwoSamplesToBeCreatedAndRemainder() CPPUNIT_ASSERT_EQUAL(1.0, queue[0].s_Statistic.count()); } -void CSampleQueueTest::testSampleGivenNoSampleToBeCreated() -{ +void CSampleQueueTest::testSampleGivenNoSampleToBeCreated() { std::size_t sampleCountFactor(2); std::size_t latencyBuckets(2); double growthFactor(0.1); @@ -777,8 +746,7 @@ void CSampleQueueTest::testSampleGivenNoSampleToBeCreated() CPPUNIT_ASSERT_EQUAL(std::size_t(2), queue.size()); } -void CSampleQueueTest::testSampleGivenUsingSubSamplesUpToCountExceedItMoreThanUsingOneLess() -{ +void CSampleQueueTest::testSampleGivenUsingSubSamplesUpToCountExceedItMoreThanUsingOneLess() { std::size_t sampleCountFactor(2); std::size_t latencyBuckets(2); double growthFactor(0.1); @@ -800,8 +768,7 @@ void CSampleQueueTest::testSampleGivenUsingSubSamplesUpToCountExceedItMoreThanUs CPPUNIT_ASSERT_EQUAL(1.25, samples[0].varianceScale()); } -void CSampleQueueTest::testResetBucketGivenEmptyQueue() -{ +void CSampleQueueTest::testResetBucketGivenEmptyQueue() { std::size_t sampleCountFactor(2); std::size_t latencyBuckets(2); double growthFactor(0.1); @@ -813,8 +780,7 @@ void CSampleQueueTest::testResetBucketGivenEmptyQueue() CPPUNIT_ASSERT(queue.empty()); } -void CSampleQueueTest::testResetBucketGivenBucketBeforeEarliestSubSample() -{ +void CSampleQueueTest::testResetBucketGivenBucketBeforeEarliestSubSample() { std::size_t sampleCountFactor(2); std::size_t latencyBuckets(2); double growthFactor(0.1); @@ -834,8 +800,7 @@ void CSampleQueueTest::testResetBucketGivenBucketBeforeEarliestSubSample() CPPUNIT_ASSERT_EQUAL(std::size_t(6), queue.size()); } -void CSampleQueueTest::testResetBucketGivenBucketAtEarliestSubSample() -{ +void CSampleQueueTest::testResetBucketGivenBucketAtEarliestSubSample() { std::size_t sampleCountFactor(2); std::size_t latencyBuckets(2); double growthFactor(0.1); @@ -860,8 +825,7 @@ void CSampleQueueTest::testResetBucketGivenBucketAtEarliestSubSample() CPPUNIT_ASSERT_EQUAL(core_t::TTime(20), queue[3].s_Start); } -void CSampleQueueTest::testResetBucketGivenBucketInBetweenWithoutAnySubSamples() -{ +void CSampleQueueTest::testResetBucketGivenBucketInBetweenWithoutAnySubSamples() { std::size_t sampleCountFactor(2); std::size_t latencyBuckets(2); double growthFactor(0.1); @@ -878,8 +842,7 @@ void CSampleQueueTest::testResetBucketGivenBucketInBetweenWithoutAnySubSamples() CPPUNIT_ASSERT_EQUAL(std::size_t(3), queue.size()); } -void CSampleQueueTest::testResetBucketGivenBucketAtInBetweenSubSample() -{ +void CSampleQueueTest::testResetBucketGivenBucketAtInBetweenSubSample() { std::size_t sampleCountFactor(2); std::size_t latencyBuckets(2); double growthFactor(0.1); @@ -902,8 +865,7 @@ void CSampleQueueTest::testResetBucketGivenBucketAtInBetweenSubSample() CPPUNIT_ASSERT_EQUAL(core_t::TTime(10), queue[2].s_Start); } -void CSampleQueueTest::testResetBucketGivenBucketAtLatestSubSample() -{ +void CSampleQueueTest::testResetBucketGivenBucketAtLatestSubSample() { std::size_t sampleCountFactor(2); std::size_t latencyBuckets(2); double growthFactor(0.1); @@ -928,8 +890,7 @@ void CSampleQueueTest::testResetBucketGivenBucketAtLatestSubSample() CPPUNIT_ASSERT_EQUAL(core_t::TTime(10), queue[4].s_Start); } -void CSampleQueueTest::testResetBucketGivenBucketAfterLatestSubSample() -{ +void CSampleQueueTest::testResetBucketGivenBucketAfterLatestSubSample() { std::size_t sampleCountFactor(2); std::size_t latencyBuckets(2); double growthFactor(0.1); @@ -949,8 +910,7 @@ void CSampleQueueTest::testResetBucketGivenBucketAfterLatestSubSample() CPPUNIT_ASSERT_EQUAL(std::size_t(6), queue.size()); } -void CSampleQueueTest::testSubSamplesNeverSpanOverDifferentBuckets() -{ +void CSampleQueueTest::testSubSamplesNeverSpanOverDifferentBuckets() { std::size_t sampleCountFactor(10); std::size_t latencyBuckets(3); double growthFactor(0.1); @@ -964,28 +924,23 @@ void CSampleQueueTest::testSubSamplesNeverSpanOverDifferentBuckets() core_t::TTime latestTime = bucketLength * (latencyBuckets + 1); TTestSampleQueue queue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); - for (std::size_t measurementId = 0; measurementId < numberOfMeasurements; ++measurementId) - { + for (std::size_t measurementId = 0; measurementId < numberOfMeasurements; ++measurementId) { TDoubleVec testData; - rng.generateUniformSamples(static_cast(latestTime - latency), - static_cast(latestTime), 1, testData); - latestTime += 60 + static_cast(40.0 * std::sin( - boost::math::constants::two_pi() - * static_cast(latestTime % 86400) / 86400.0)); + rng.generateUniformSamples(static_cast(latestTime - latency), static_cast(latestTime), 1, testData); + latestTime += 60 + static_cast(40.0 * std::sin(boost::math::constants::two_pi() * + static_cast(latestTime % 86400) / 86400.0)); core_t::TTime measurementTime = static_cast(testData[0]); queue.add(measurementTime, {1.0}, 1u, sampleCount); } - for (std::size_t i = 0; i < queue.size(); ++i) - { + for (std::size_t i = 0; i < queue.size(); ++i) { core_t::TTime startBucket = maths::CIntegerTools::floor(queue[i].s_Start, bucketLength); core_t::TTime endBucket = maths::CIntegerTools::floor(queue[i].s_End, bucketLength); CPPUNIT_ASSERT_EQUAL(startBucket, endBucket); } } -void CSampleQueueTest::testPersistence() -{ +void CSampleQueueTest::testPersistence() { std::size_t sampleCountFactor(2); std::size_t latencyBuckets(2); double growthFactor(0.1); @@ -1009,9 +964,7 @@ void CSampleQueueTest::testPersistence() core::CRapidXmlStateRestoreTraverser traverser(parser); TTestSampleQueue restoredQueue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); - traverser.traverseSubLevel(boost::bind(&TTestSampleQueue::acceptRestoreTraverser, - &restoredQueue, - _1)); + traverser.traverseSubLevel(boost::bind(&TTestSampleQueue::acceptRestoreTraverser, &restoredQueue, _1)); CPPUNIT_ASSERT_EQUAL(std::size_t(2), restoredQueue.size()); @@ -1028,8 +981,7 @@ void CSampleQueueTest::testPersistence() CPPUNIT_ASSERT_EQUAL(1.0, restoredQueue[0].s_Statistic.count()); } -void CSampleQueueTest::testQualityOfSamplesGivenConstantRate() -{ +void CSampleQueueTest::testQualityOfSamplesGivenConstantRate() { std::size_t sampleCountFactor(5); std::size_t latencyBuckets(3); double growthFactor(0.1); @@ -1046,16 +998,13 @@ void CSampleQueueTest::testQualityOfSamplesGivenConstantRate() maths::CBasicStatistics::SSampleMean::TAccumulator meanMinVariance; maths::CBasicStatistics::SSampleMean::TAccumulator meanMaxVariance; - for (std::size_t runId = 0; runId < numberOfRuns; ++ runId) - { + for (std::size_t runId = 0; runId < numberOfRuns; ++runId) { TSampleVec samples; core_t::TTime latestTime = bucketLength * (latencyBuckets + 1); TTestSampleQueue queue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); - for (std::size_t measurementId = 0; measurementId < numberOfMeasurements; ++measurementId) - { + for (std::size_t measurementId = 0; measurementId < numberOfMeasurements; ++measurementId) { TDoubleVec testData; - rng.generateUniformSamples(static_cast(latestTime - latency), - static_cast(latestTime), 1, testData); + rng.generateUniformSamples(static_cast(latestTime - latency), static_cast(latestTime), 1, testData); latestTime += 60; core_t::TTime measurementTime = static_cast(testData[0]); queue.add(measurementTime, {1.0}, 1u, sampleCount); @@ -1066,8 +1015,7 @@ void CSampleQueueTest::testQualityOfSamplesGivenConstantRate() maths::CBasicStatistics::SSampleMeanVar::TAccumulator varianceStat; maths::CBasicStatistics::COrderStatisticsStack varianceMin; maths::CBasicStatistics::COrderStatisticsStack> varianceMax; - for (std::size_t i = 0; i < samples.size(); ++i) - { + for (std::size_t i = 0; i < samples.size(); ++i) { varianceStat.add(samples[i].varianceScale()); varianceMin.add(samples[i].varianceScale()); varianceMax.add(samples[i].varianceScale()); @@ -1095,8 +1043,7 @@ void CSampleQueueTest::testQualityOfSamplesGivenConstantRate() CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanMaxVariance) < 1.1); } -void CSampleQueueTest::testQualityOfSamplesGivenVariableRate() -{ +void CSampleQueueTest::testQualityOfSamplesGivenVariableRate() { std::size_t sampleCountFactor(5); std::size_t latencyBuckets(3); double growthFactor(0.1); @@ -1113,19 +1060,15 @@ void CSampleQueueTest::testQualityOfSamplesGivenVariableRate() maths::CBasicStatistics::SSampleMean::TAccumulator meanMinVariance; maths::CBasicStatistics::SSampleMean::TAccumulator meanMaxVariance; - for (std::size_t runId = 0; runId < numberOfRuns; ++ runId) - { + for (std::size_t runId = 0; runId < numberOfRuns; ++runId) { TSampleVec samples; core_t::TTime latestTime = bucketLength * (latencyBuckets + 1); TTestSampleQueue queue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); - for (std::size_t measurementId = 0; measurementId < numberOfMeasurements; ++measurementId) - { + for (std::size_t measurementId = 0; measurementId < numberOfMeasurements; ++measurementId) { TDoubleVec testData; - rng.generateUniformSamples(static_cast(latestTime - latency), - static_cast(latestTime), 1, testData); - latestTime += 60 + static_cast(40.0 * std::sin( - boost::math::constants::two_pi() - * static_cast(latestTime % 86400) / 86400.0)); + rng.generateUniformSamples(static_cast(latestTime - latency), static_cast(latestTime), 1, testData); + latestTime += 60 + static_cast(40.0 * std::sin(boost::math::constants::two_pi() * + static_cast(latestTime % 86400) / 86400.0)); core_t::TTime measurementTime = static_cast(testData[0]); queue.add(measurementTime, {1.0}, 1u, sampleCount); } @@ -1135,8 +1078,7 @@ void CSampleQueueTest::testQualityOfSamplesGivenVariableRate() maths::CBasicStatistics::SSampleMeanVar::TAccumulator varianceStat; maths::CBasicStatistics::COrderStatisticsStack varianceMin; maths::CBasicStatistics::COrderStatisticsStack> varianceMax; - for (std::size_t i = 0; i < samples.size(); ++i) - { + for (std::size_t i = 0; i < samples.size(); ++i) { varianceStat.add(samples[i].varianceScale()); varianceMin.add(samples[i].varianceScale()); varianceMax.add(samples[i].varianceScale()); @@ -1164,8 +1106,7 @@ void CSampleQueueTest::testQualityOfSamplesGivenVariableRate() CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanMaxVariance) < 1.16); } -void CSampleQueueTest::testQualityOfSamplesGivenHighLatencyAndDataInReverseOrder() -{ +void CSampleQueueTest::testQualityOfSamplesGivenHighLatencyAndDataInReverseOrder() { std::size_t sampleCountFactor(5); std::size_t latencyBuckets(500); double growthFactor(0.1); @@ -1180,8 +1121,7 @@ void CSampleQueueTest::testQualityOfSamplesGivenHighLatencyAndDataInReverseOrder core_t::TTime latestTime = 60 * numberOfMeasurements; core_t::TTime time = latestTime; TTestSampleQueue queue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); - for (std::size_t measurementId = 0; measurementId < numberOfMeasurements; ++measurementId) - { + for (std::size_t measurementId = 0; measurementId < numberOfMeasurements; ++measurementId) { queue.add(time, {1.0}, 1u, sampleCount); time -= 60; } @@ -1191,8 +1131,7 @@ void CSampleQueueTest::testQualityOfSamplesGivenHighLatencyAndDataInReverseOrder maths::CBasicStatistics::SSampleMeanVar::TAccumulator varianceStat; maths::CBasicStatistics::COrderStatisticsStack varianceMin; maths::CBasicStatistics::COrderStatisticsStack> varianceMax; - for (std::size_t i = 0; i < samples.size(); ++i) - { + for (std::size_t i = 0; i < samples.size(); ++i) { varianceStat.add(samples[i].varianceScale()); varianceMin.add(samples[i].varianceScale()); varianceMax.add(samples[i].varianceScale()); @@ -1209,150 +1148,124 @@ void CSampleQueueTest::testQualityOfSamplesGivenHighLatencyAndDataInReverseOrder CPPUNIT_ASSERT(varianceMax[0] <= 1.0); } -CppUnit::Test *CSampleQueueTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CSampleQueueTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSampleQueueTest::testSampleToString", - &CSampleQueueTest::testSampleToString)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSampleQueueTest::testSampleFromString", - &CSampleQueueTest::testSampleFromString)); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSampleQueueTest::testAddGivenQueueIsEmptyShouldCreateNewSubSample", - &CSampleQueueTest::testAddGivenQueueIsEmptyShouldCreateNewSubSample)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSampleQueueTest::testAddGivenQueueIsFullShouldResize", - &CSampleQueueTest::testAddGivenQueueIsFullShouldResize)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSampleQueueTest::testAddGivenTimeIsInOrderAndCloseToNonFullLatestSubSample", - &CSampleQueueTest::testAddGivenTimeIsInOrderAndCloseToNonFullLatestSubSample)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSampleQueueTest::testAddGivenTimeIsInOrderAndCloseToNonFullLatestSubSampleButDifferentBucket", - &CSampleQueueTest::testAddGivenTimeIsInOrderAndCloseToNonFullLatestSubSampleButDifferentBucket)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSampleQueueTest::testAddGivenTimeIsInOrderAndCloseToFullLatestSubSample", - &CSampleQueueTest::testAddGivenTimeIsInOrderAndCloseToFullLatestSubSample)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSampleQueueTest::testAddGivenTimeIsInOrderAndFarFromLatestSubSample", - &CSampleQueueTest::testAddGivenTimeIsInOrderAndFarFromLatestSubSample)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSampleQueueTest::testAddGivenTimeIsWithinFullLatestSubSample", - &CSampleQueueTest::testAddGivenTimeIsWithinFullLatestSubSample)); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSampleQueueTest::testAddGivenTimeIsHistoricalAndFarBeforeEarliestSubSample", - &CSampleQueueTest::testAddGivenTimeIsHistoricalAndFarBeforeEarliestSubSample)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloseBeforeFullEarliestSubSample", - &CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloseBeforeFullEarliestSubSample)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloseBeforeNonFullEarliestSubSample", - &CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloseBeforeNonFullEarliestSubSample)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloseBeforeNonFullEarliestSubSampleButDifferentBucket", - &CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloseBeforeNonFullEarliestSubSampleButDifferentBucket)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSampleQueueTest::testAddGivenTimeIsHistoricalAndWithinSomeSubSample", - &CSampleQueueTest::testAddGivenTimeIsHistoricalAndWithinSomeSubSample)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToSubSampleBeforeLatest", - &CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToSubSampleBeforeLatest)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToSubSampleBeforeLatestButDifferentBucket", - &CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToSubSampleBeforeLatestButDifferentBucket)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToPreviousOfNonFullSubSamples", - &CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToPreviousOfNonFullSubSamples)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToNextOfNonFullSubSamples", - &CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToNextOfNonFullSubSamples)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToPreviousOfFullSubSamples", - &CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToPreviousOfFullSubSamples)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToNextOfFullSubSamples", - &CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToNextOfFullSubSamples)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToPreviousSubSampleButOnlyNextHasSpace", - &CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToPreviousSubSampleButOnlyNextHasSpace)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToNextSubSampleButOnlyPreviousHasSpace", - &CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToNextSubSampleButOnlyPreviousHasSpace)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSampleQueueTest::testAddGivenTimeIsHistoricalAndFallsInBigEnoughGap", - &CSampleQueueTest::testAddGivenTimeIsHistoricalAndFallsInBigEnoughGap)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSampleQueueTest::testAddGivenTimeIsHistoricalAndFallsInTooSmallGap", - &CSampleQueueTest::testAddGivenTimeIsHistoricalAndFallsInTooSmallGap)); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSampleQueueTest::testCanSampleGivenEmptyQueue", - &CSampleQueueTest::testCanSampleGivenEmptyQueue)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSampleQueueTest::testCanSample", - &CSampleQueueTest::testCanSample)); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSampleQueueTest::testSampleGivenExactlyOneSampleOfExactCountToBeCreated", - &CSampleQueueTest::testSampleGivenExactlyOneSampleOfExactCountToBeCreated)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSampleQueueTest::testSampleGivenExactlyOneSampleOfOverCountToBeCreated", - &CSampleQueueTest::testSampleGivenExactlyOneSampleOfOverCountToBeCreated)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSampleQueueTest::testSampleGivenOneSampleToBeCreatedAndRemainder", - &CSampleQueueTest::testSampleGivenOneSampleToBeCreatedAndRemainder)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSampleQueueTest::testSampleGivenTwoSamplesToBeCreatedAndRemainder", - &CSampleQueueTest::testSampleGivenTwoSamplesToBeCreatedAndRemainder)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSampleQueueTest::testSampleGivenNoSampleToBeCreated", - &CSampleQueueTest::testSampleGivenNoSampleToBeCreated)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSampleQueueTest::testSampleGivenUsingSubSamplesUpToCountExceedItMoreThanUsingOneLess", - &CSampleQueueTest::testSampleGivenUsingSubSamplesUpToCountExceedItMoreThanUsingOneLess)); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSampleQueueTest::testResetBucketGivenEmptyQueue", - &CSampleQueueTest::testResetBucketGivenEmptyQueue)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSampleQueueTest::testResetBucketGivenBucketBeforeEarliestSubSample", - &CSampleQueueTest::testResetBucketGivenBucketBeforeEarliestSubSample)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSampleQueueTest::testResetBucketGivenBucketAtEarliestSubSample", - &CSampleQueueTest::testResetBucketGivenBucketAtEarliestSubSample)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSampleQueueTest::testResetBucketGivenBucketInBetweenWithoutAnySubSamples", - &CSampleQueueTest::testResetBucketGivenBucketInBetweenWithoutAnySubSamples)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSampleQueueTest::testResetBucketGivenBucketAtInBetweenSubSample", - &CSampleQueueTest::testResetBucketGivenBucketAtInBetweenSubSample)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSampleQueueTest::testResetBucketGivenBucketAtLatestSubSample", - &CSampleQueueTest::testResetBucketGivenBucketAtLatestSubSample)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSampleQueueTest::testResetBucketGivenBucketAfterLatestSubSample", - &CSampleQueueTest::testResetBucketGivenBucketAfterLatestSubSample)); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSampleQueueTest::testSubSamplesNeverSpanOverDifferentBuckets", - &CSampleQueueTest::testSubSamplesNeverSpanOverDifferentBuckets)); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSampleQueueTest::testPersistence", - &CSampleQueueTest::testPersistence)); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSampleQueueTest::testQualityOfSamplesGivenConstantRate", - &CSampleQueueTest::testQualityOfSamplesGivenConstantRate)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSampleQueueTest::testQualityOfSamplesGivenVariableRate", - &CSampleQueueTest::testQualityOfSamplesGivenVariableRate)); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CSampleQueueTest::testQualityOfSamplesGivenHighLatencyAndDataInReverseOrder", - &CSampleQueueTest::testQualityOfSamplesGivenHighLatencyAndDataInReverseOrder)); +CppUnit::Test* CSampleQueueTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CSampleQueueTest"); + + suiteOfTests->addTest( + new CppUnit::TestCaller("CSampleQueueTest::testSampleToString", &CSampleQueueTest::testSampleToString)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CSampleQueueTest::testSampleFromString", &CSampleQueueTest::testSampleFromString)); + + suiteOfTests->addTest(new CppUnit::TestCaller("CSampleQueueTest::testAddGivenQueueIsEmptyShouldCreateNewSubSample", + &CSampleQueueTest::testAddGivenQueueIsEmptyShouldCreateNewSubSample)); + suiteOfTests->addTest(new CppUnit::TestCaller("CSampleQueueTest::testAddGivenQueueIsFullShouldResize", + &CSampleQueueTest::testAddGivenQueueIsFullShouldResize)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CSampleQueueTest::testAddGivenTimeIsInOrderAndCloseToNonFullLatestSubSample", + &CSampleQueueTest::testAddGivenTimeIsInOrderAndCloseToNonFullLatestSubSample)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSampleQueueTest::testAddGivenTimeIsInOrderAndCloseToNonFullLatestSubSampleButDifferentBucket", + &CSampleQueueTest::testAddGivenTimeIsInOrderAndCloseToNonFullLatestSubSampleButDifferentBucket)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CSampleQueueTest::testAddGivenTimeIsInOrderAndCloseToFullLatestSubSample", + &CSampleQueueTest::testAddGivenTimeIsInOrderAndCloseToFullLatestSubSample)); + suiteOfTests->addTest(new CppUnit::TestCaller("CSampleQueueTest::testAddGivenTimeIsInOrderAndFarFromLatestSubSample", + &CSampleQueueTest::testAddGivenTimeIsInOrderAndFarFromLatestSubSample)); + suiteOfTests->addTest(new CppUnit::TestCaller("CSampleQueueTest::testAddGivenTimeIsWithinFullLatestSubSample", + &CSampleQueueTest::testAddGivenTimeIsWithinFullLatestSubSample)); + + suiteOfTests->addTest( + new CppUnit::TestCaller("CSampleQueueTest::testAddGivenTimeIsHistoricalAndFarBeforeEarliestSubSample", + &CSampleQueueTest::testAddGivenTimeIsHistoricalAndFarBeforeEarliestSubSample)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloseBeforeFullEarliestSubSample", + &CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloseBeforeFullEarliestSubSample)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloseBeforeNonFullEarliestSubSample", + &CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloseBeforeNonFullEarliestSubSample)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloseBeforeNonFullEarliestSubSampleButDifferentBucket", + &CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloseBeforeNonFullEarliestSubSampleButDifferentBucket)); + suiteOfTests->addTest(new CppUnit::TestCaller("CSampleQueueTest::testAddGivenTimeIsHistoricalAndWithinSomeSubSample", + &CSampleQueueTest::testAddGivenTimeIsHistoricalAndWithinSomeSubSample)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToSubSampleBeforeLatest", + &CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToSubSampleBeforeLatest)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToSubSampleBeforeLatestButDifferentBucket", + &CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToSubSampleBeforeLatestButDifferentBucket)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToPreviousOfNonFullSubSamples", + &CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToPreviousOfNonFullSubSamples)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToNextOfNonFullSubSamples", + &CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToNextOfNonFullSubSamples)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToPreviousOfFullSubSamples", + &CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToPreviousOfFullSubSamples)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToNextOfFullSubSamples", + &CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToNextOfFullSubSamples)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToPreviousSubSampleButOnlyNextHasSpace", + &CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToPreviousSubSampleButOnlyNextHasSpace)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToNextSubSampleButOnlyPreviousHasSpace", + &CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToNextSubSampleButOnlyPreviousHasSpace)); + suiteOfTests->addTest(new CppUnit::TestCaller("CSampleQueueTest::testAddGivenTimeIsHistoricalAndFallsInBigEnoughGap", + &CSampleQueueTest::testAddGivenTimeIsHistoricalAndFallsInBigEnoughGap)); + suiteOfTests->addTest(new CppUnit::TestCaller("CSampleQueueTest::testAddGivenTimeIsHistoricalAndFallsInTooSmallGap", + &CSampleQueueTest::testAddGivenTimeIsHistoricalAndFallsInTooSmallGap)); + + suiteOfTests->addTest(new CppUnit::TestCaller("CSampleQueueTest::testCanSampleGivenEmptyQueue", + &CSampleQueueTest::testCanSampleGivenEmptyQueue)); + suiteOfTests->addTest(new CppUnit::TestCaller("CSampleQueueTest::testCanSample", &CSampleQueueTest::testCanSample)); + + suiteOfTests->addTest( + new CppUnit::TestCaller("CSampleQueueTest::testSampleGivenExactlyOneSampleOfExactCountToBeCreated", + &CSampleQueueTest::testSampleGivenExactlyOneSampleOfExactCountToBeCreated)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CSampleQueueTest::testSampleGivenExactlyOneSampleOfOverCountToBeCreated", + &CSampleQueueTest::testSampleGivenExactlyOneSampleOfOverCountToBeCreated)); + suiteOfTests->addTest(new CppUnit::TestCaller("CSampleQueueTest::testSampleGivenOneSampleToBeCreatedAndRemainder", + &CSampleQueueTest::testSampleGivenOneSampleToBeCreatedAndRemainder)); + suiteOfTests->addTest(new CppUnit::TestCaller("CSampleQueueTest::testSampleGivenTwoSamplesToBeCreatedAndRemainder", + &CSampleQueueTest::testSampleGivenTwoSamplesToBeCreatedAndRemainder)); + suiteOfTests->addTest(new CppUnit::TestCaller("CSampleQueueTest::testSampleGivenNoSampleToBeCreated", + &CSampleQueueTest::testSampleGivenNoSampleToBeCreated)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CSampleQueueTest::testSampleGivenUsingSubSamplesUpToCountExceedItMoreThanUsingOneLess", + &CSampleQueueTest::testSampleGivenUsingSubSamplesUpToCountExceedItMoreThanUsingOneLess)); + + suiteOfTests->addTest(new CppUnit::TestCaller("CSampleQueueTest::testResetBucketGivenEmptyQueue", + &CSampleQueueTest::testResetBucketGivenEmptyQueue)); + suiteOfTests->addTest(new CppUnit::TestCaller("CSampleQueueTest::testResetBucketGivenBucketBeforeEarliestSubSample", + &CSampleQueueTest::testResetBucketGivenBucketBeforeEarliestSubSample)); + suiteOfTests->addTest(new CppUnit::TestCaller("CSampleQueueTest::testResetBucketGivenBucketAtEarliestSubSample", + &CSampleQueueTest::testResetBucketGivenBucketAtEarliestSubSample)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CSampleQueueTest::testResetBucketGivenBucketInBetweenWithoutAnySubSamples", + &CSampleQueueTest::testResetBucketGivenBucketInBetweenWithoutAnySubSamples)); + suiteOfTests->addTest(new CppUnit::TestCaller("CSampleQueueTest::testResetBucketGivenBucketAtInBetweenSubSample", + &CSampleQueueTest::testResetBucketGivenBucketAtInBetweenSubSample)); + suiteOfTests->addTest(new CppUnit::TestCaller("CSampleQueueTest::testResetBucketGivenBucketAtLatestSubSample", + &CSampleQueueTest::testResetBucketGivenBucketAtLatestSubSample)); + suiteOfTests->addTest(new CppUnit::TestCaller("CSampleQueueTest::testResetBucketGivenBucketAfterLatestSubSample", + &CSampleQueueTest::testResetBucketGivenBucketAfterLatestSubSample)); + + suiteOfTests->addTest(new CppUnit::TestCaller("CSampleQueueTest::testSubSamplesNeverSpanOverDifferentBuckets", + &CSampleQueueTest::testSubSamplesNeverSpanOverDifferentBuckets)); + + suiteOfTests->addTest( + new CppUnit::TestCaller("CSampleQueueTest::testPersistence", &CSampleQueueTest::testPersistence)); + + suiteOfTests->addTest(new CppUnit::TestCaller("CSampleQueueTest::testQualityOfSamplesGivenConstantRate", + &CSampleQueueTest::testQualityOfSamplesGivenConstantRate)); + suiteOfTests->addTest(new CppUnit::TestCaller("CSampleQueueTest::testQualityOfSamplesGivenVariableRate", + &CSampleQueueTest::testQualityOfSamplesGivenVariableRate)); + suiteOfTests->addTest( + new CppUnit::TestCaller("CSampleQueueTest::testQualityOfSamplesGivenHighLatencyAndDataInReverseOrder", + &CSampleQueueTest::testQualityOfSamplesGivenHighLatencyAndDataInReverseOrder)); return suiteOfTests; } diff --git a/lib/model/unittest/CSampleQueueTest.h b/lib/model/unittest/CSampleQueueTest.h index a60c354e90..98b8bc1f9a 100644 --- a/lib/model/unittest/CSampleQueueTest.h +++ b/lib/model/unittest/CSampleQueueTest.h @@ -10,63 +10,62 @@ #include "../../../include/model/CSampleQueue.h" -class CSampleQueueTest : public CppUnit::TestFixture -{ - public: - void testSampleToString(); - void testSampleFromString(); +class CSampleQueueTest : public CppUnit::TestFixture { +public: + void testSampleToString(); + void testSampleFromString(); - void testAddGivenQueueIsEmptyShouldCreateNewSubSample(); - void testAddGivenQueueIsFullShouldResize(); - void testAddGivenTimeIsInOrderAndCloseToNonFullLatestSubSample(); - void testAddGivenTimeIsInOrderAndCloseToNonFullLatestSubSampleButDifferentBucket(); - void testAddGivenTimeIsInOrderAndCloseToFullLatestSubSample(); - void testAddGivenTimeIsInOrderAndFarFromLatestSubSample(); - void testAddGivenTimeIsWithinFullLatestSubSample(); + void testAddGivenQueueIsEmptyShouldCreateNewSubSample(); + void testAddGivenQueueIsFullShouldResize(); + void testAddGivenTimeIsInOrderAndCloseToNonFullLatestSubSample(); + void testAddGivenTimeIsInOrderAndCloseToNonFullLatestSubSampleButDifferentBucket(); + void testAddGivenTimeIsInOrderAndCloseToFullLatestSubSample(); + void testAddGivenTimeIsInOrderAndFarFromLatestSubSample(); + void testAddGivenTimeIsWithinFullLatestSubSample(); - void testAddGivenTimeIsHistoricalAndFarBeforeEarliestSubSample(); - void testAddGivenTimeIsHistoricalAndCloseBeforeFullEarliestSubSample(); - void testAddGivenTimeIsHistoricalAndCloseBeforeNonFullEarliestSubSample(); - void testAddGivenTimeIsHistoricalAndCloseBeforeNonFullEarliestSubSampleButDifferentBucket(); - void testAddGivenTimeIsHistoricalAndWithinSomeSubSample(); - void testAddGivenTimeIsHistoricalAndCloserToSubSampleBeforeLatest(); - void testAddGivenTimeIsHistoricalAndCloserToSubSampleBeforeLatestButDifferentBucket(); - void testAddGivenTimeIsHistoricalAndCloserToPreviousOfNonFullSubSamples(); - void testAddGivenTimeIsHistoricalAndCloserToNextOfNonFullSubSamples(); - void testAddGivenTimeIsHistoricalAndCloserToPreviousOfFullSubSamples(); - void testAddGivenTimeIsHistoricalAndCloserToNextOfFullSubSamples(); - void testAddGivenTimeIsHistoricalAndCloserToPreviousSubSampleButOnlyNextHasSpace(); - void testAddGivenTimeIsHistoricalAndCloserToNextSubSampleButOnlyPreviousHasSpace(); - void testAddGivenTimeIsHistoricalAndFallsInBigEnoughGap(); - void testAddGivenTimeIsHistoricalAndFallsInTooSmallGap(); + void testAddGivenTimeIsHistoricalAndFarBeforeEarliestSubSample(); + void testAddGivenTimeIsHistoricalAndCloseBeforeFullEarliestSubSample(); + void testAddGivenTimeIsHistoricalAndCloseBeforeNonFullEarliestSubSample(); + void testAddGivenTimeIsHistoricalAndCloseBeforeNonFullEarliestSubSampleButDifferentBucket(); + void testAddGivenTimeIsHistoricalAndWithinSomeSubSample(); + void testAddGivenTimeIsHistoricalAndCloserToSubSampleBeforeLatest(); + void testAddGivenTimeIsHistoricalAndCloserToSubSampleBeforeLatestButDifferentBucket(); + void testAddGivenTimeIsHistoricalAndCloserToPreviousOfNonFullSubSamples(); + void testAddGivenTimeIsHistoricalAndCloserToNextOfNonFullSubSamples(); + void testAddGivenTimeIsHistoricalAndCloserToPreviousOfFullSubSamples(); + void testAddGivenTimeIsHistoricalAndCloserToNextOfFullSubSamples(); + void testAddGivenTimeIsHistoricalAndCloserToPreviousSubSampleButOnlyNextHasSpace(); + void testAddGivenTimeIsHistoricalAndCloserToNextSubSampleButOnlyPreviousHasSpace(); + void testAddGivenTimeIsHistoricalAndFallsInBigEnoughGap(); + void testAddGivenTimeIsHistoricalAndFallsInTooSmallGap(); - void testCanSampleGivenEmptyQueue(); - void testCanSample(); + void testCanSampleGivenEmptyQueue(); + void testCanSample(); - void testSampleGivenExactlyOneSampleOfExactCountToBeCreated(); - void testSampleGivenExactlyOneSampleOfOverCountToBeCreated(); - void testSampleGivenOneSampleToBeCreatedAndRemainder(); - void testSampleGivenTwoSamplesToBeCreatedAndRemainder(); - void testSampleGivenNoSampleToBeCreated(); - void testSampleGivenUsingSubSamplesUpToCountExceedItMoreThanUsingOneLess(); + void testSampleGivenExactlyOneSampleOfExactCountToBeCreated(); + void testSampleGivenExactlyOneSampleOfOverCountToBeCreated(); + void testSampleGivenOneSampleToBeCreatedAndRemainder(); + void testSampleGivenTwoSamplesToBeCreatedAndRemainder(); + void testSampleGivenNoSampleToBeCreated(); + void testSampleGivenUsingSubSamplesUpToCountExceedItMoreThanUsingOneLess(); - void testResetBucketGivenEmptyQueue(); - void testResetBucketGivenBucketBeforeEarliestSubSample(); - void testResetBucketGivenBucketAtEarliestSubSample(); - void testResetBucketGivenBucketInBetweenWithoutAnySubSamples(); - void testResetBucketGivenBucketAtInBetweenSubSample(); - void testResetBucketGivenBucketAtLatestSubSample(); - void testResetBucketGivenBucketAfterLatestSubSample(); + void testResetBucketGivenEmptyQueue(); + void testResetBucketGivenBucketBeforeEarliestSubSample(); + void testResetBucketGivenBucketAtEarliestSubSample(); + void testResetBucketGivenBucketInBetweenWithoutAnySubSamples(); + void testResetBucketGivenBucketAtInBetweenSubSample(); + void testResetBucketGivenBucketAtLatestSubSample(); + void testResetBucketGivenBucketAfterLatestSubSample(); - void testSubSamplesNeverSpanOverDifferentBuckets(); + void testSubSamplesNeverSpanOverDifferentBuckets(); - void testPersistence(); + void testPersistence(); - void testQualityOfSamplesGivenConstantRate(); - void testQualityOfSamplesGivenVariableRate(); - void testQualityOfSamplesGivenHighLatencyAndDataInReverseOrder(); + void testQualityOfSamplesGivenConstantRate(); + void testQualityOfSamplesGivenVariableRate(); + void testQualityOfSamplesGivenHighLatencyAndDataInReverseOrder(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CSampleQueueTest_h diff --git a/lib/model/unittest/CStringStoreTest.cc b/lib/model/unittest/CStringStoreTest.cc index d961c7bd29..3989e998f6 100644 --- a/lib/model/unittest/CStringStoreTest.cc +++ b/lib/model/unittest/CStringStoreTest.cc @@ -19,96 +19,74 @@ using namespace ml; using namespace model; -namespace -{ +namespace { using TSizeVec = std::vector; using TStrVec = std::vector; using TStoredStringPtrVec = std::vector; using TStrCPtrUSet = boost::unordered_set; -class CStringThread : public core::CThread -{ - public: - using TCppUnitExceptionP = boost::shared_ptr; +class CStringThread : public core::CThread { +public: + using TCppUnitExceptionP = boost::shared_ptr; - public: - CStringThread(std::size_t i, const TStrVec &strings) - : m_I(i), - m_Strings(strings) - { - } +public: + CStringThread(std::size_t i, const TStrVec& strings) : m_I(i), m_Strings(strings) {} - void uniques(TStrCPtrUSet &result) const - { - result.insert(m_UniquePtrs.begin(), m_UniquePtrs.end()); - } + void uniques(TStrCPtrUSet& result) const { result.insert(m_UniquePtrs.begin(), m_UniquePtrs.end()); } - void propagateLastThreadAssert() - { - if (m_LastException != 0) - { - throw *m_LastException; - } + void propagateLastThreadAssert() { + if (m_LastException != 0) { + throw * m_LastException; } + } - void clearPtrs() - { - m_UniquePtrs.clear(); - m_Ptrs.clear(); - } + void clearPtrs() { + m_UniquePtrs.clear(); + m_Ptrs.clear(); + } - private: - virtual void run() - { - try - { - std::size_t n = m_Strings.size(); - for (std::size_t i = m_I; i < 1000; ++i) - { - m_Ptrs.push_back(core::CStoredStringPtr()); - m_Ptrs.back() = CStringStore::names().get(m_Strings[i % n]); - m_UniquePtrs.insert(m_Ptrs.back().get()); - CPPUNIT_ASSERT_EQUAL(m_Strings[i % n], *m_Ptrs.back()); - } - for (std::size_t i = m_I; i < 1000000; ++i) - { - core::CStoredStringPtr p = CStringStore::names().get(m_Strings[i % n]); - m_UniquePtrs.insert(p.get()); - CPPUNIT_ASSERT_EQUAL(m_Strings[i % n], *p); - } +private: + virtual void run() { + try { + std::size_t n = m_Strings.size(); + for (std::size_t i = m_I; i < 1000; ++i) { + m_Ptrs.push_back(core::CStoredStringPtr()); + m_Ptrs.back() = CStringStore::names().get(m_Strings[i % n]); + m_UniquePtrs.insert(m_Ptrs.back().get()); + CPPUNIT_ASSERT_EQUAL(m_Strings[i % n], *m_Ptrs.back()); } - // CppUnit won't automatically catch the exceptions thrown by - // assertions in newly created threads, so propagate manually - catch (CppUnit::Exception &e) - { - m_LastException.reset(new CppUnit::Exception(e)); + for (std::size_t i = m_I; i < 1000000; ++i) { + core::CStoredStringPtr p = CStringStore::names().get(m_Strings[i % n]); + m_UniquePtrs.insert(p.get()); + CPPUNIT_ASSERT_EQUAL(m_Strings[i % n], *p); } } - - virtual void shutdown() - { + // CppUnit won't automatically catch the exceptions thrown by + // assertions in newly created threads, so propagate manually + catch (CppUnit::Exception& e) { + m_LastException.reset(new CppUnit::Exception(e)); } + } - private: - std::size_t m_I; - TStrVec m_Strings; - TStoredStringPtrVec m_Ptrs; - TStrCPtrUSet m_UniquePtrs; - TCppUnitExceptionP m_LastException; -}; + virtual void shutdown() {} +private: + std::size_t m_I; + TStrVec m_Strings; + TStoredStringPtrVec m_Ptrs; + TStrCPtrUSet m_UniquePtrs; + TCppUnitExceptionP m_LastException; +}; } -void CStringStoreTest::setUp() -{ +void CStringStoreTest::setUp() { // Other test suites also use the string store, and it will mess up the // tests in this suite if the string store is not empty when they start CStringStore::names().clearEverythingTestOnly(); CStringStore::influencers().clearEverythingTestOnly(); } -void CStringStoreTest::testStringStore() -{ +void CStringStoreTest::testStringStore() { TStrVec strings; strings.emplace_back("Milano"); strings.emplace_back("Monza"); @@ -152,16 +130,13 @@ void CStringStoreTest::testStringStore() using TThreadPtr = boost::shared_ptr; using TThreadVec = std::vector; TThreadVec threads; - for (std::size_t i = 0; i < 20; ++i) - { + for (std::size_t i = 0; i < 20; ++i) { threads.emplace_back(new CStringThread(i, strings)); } - for (std::size_t i = 0; i < threads.size(); ++i) - { + for (std::size_t i = 0; i < threads.size(); ++i) { CPPUNIT_ASSERT(threads[i]->start()); } - for (std::size_t i = 0; i < threads.size(); ++i) - { + for (std::size_t i = 0; i < threads.size(); ++i) { CPPUNIT_ASSERT(threads[i]->waitForFinish()); } @@ -170,14 +145,12 @@ void CStringStoreTest::testStringStore() CPPUNIT_ASSERT_EQUAL(strings.size(), CStringStore::names().m_Strings.size()); CPPUNIT_ASSERT_EQUAL(std::size_t(0), CStringStore::influencers().m_Strings.size()); - for (std::size_t i = 0; i < threads.size(); ++i) - { + for (std::size_t i = 0; i < threads.size(); ++i) { // CppUnit won't automatically catch the exceptions thrown by // assertions in newly created threads, so propagate manually threads[i]->propagateLastThreadAssert(); } - for (std::size_t i = 0; i < threads.size(); ++i) - { + for (std::size_t i = 0; i < threads.size(); ++i) { threads[i]->clearPtrs(); } @@ -191,53 +164,45 @@ void CStringStoreTest::testStringStore() LOG_DEBUG("Testing multi-threaded string duplication rate"); TStrVec lotsOfStrings; - for (std::size_t i = 0u; i < 1000; ++i) - { + for (std::size_t i = 0u; i < 1000; ++i) { lotsOfStrings.push_back(core::CStringUtils::typeToString(i)); } using TThreadPtr = boost::shared_ptr; using TThreadVec = std::vector; TThreadVec threads; - for (std::size_t i = 0; i < 20; ++i) - { + for (std::size_t i = 0; i < 20; ++i) { threads.emplace_back(new CStringThread(i * 50, lotsOfStrings)); } - for (std::size_t i = 0; i < threads.size(); ++i) - { + for (std::size_t i = 0; i < threads.size(); ++i) { CPPUNIT_ASSERT(threads[i]->start()); } - for (std::size_t i = 0; i < threads.size(); ++i) - { + for (std::size_t i = 0; i < threads.size(); ++i) { CPPUNIT_ASSERT(threads[i]->waitForFinish()); } - for (std::size_t i = 0; i < threads.size(); ++i) - { + for (std::size_t i = 0; i < threads.size(); ++i) { // CppUnit won't automatically catch the exceptions thrown by // assertions in newly created threads, so propagate manually threads[i]->propagateLastThreadAssert(); } TStrCPtrUSet uniques; - for (std::size_t i = 0; i < threads.size(); ++i) - { + for (std::size_t i = 0; i < threads.size(); ++i) { threads[i]->uniques(uniques); } LOG_DEBUG("unique counts = " << uniques.size()); CPPUNIT_ASSERT(uniques.size() < 20000); // Tidy up - for (std::size_t i = 0; i < threads.size(); ++i) - { + for (std::size_t i = 0; i < threads.size(); ++i) { threads[i]->clearPtrs(); } CStringStore::names().pruneNotThreadSafe(); } } -void CStringStoreTest::testMemUsage() -{ +void CStringStoreTest::testMemUsage() { std::string shortStr("short"); std::string longStr("much much longer than the short string"); @@ -274,16 +239,12 @@ void CStringStoreTest::testMemUsage() CPPUNIT_ASSERT_EQUAL(origMemUse, CStringStore::names().memoryUsage()); } -CppUnit::Test *CStringStoreTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CStringStoreTest"); +CppUnit::Test* CStringStoreTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CStringStoreTest"); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CStringStoreTest::testStringStore", - &CStringStoreTest::testStringStore) ); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CStringStoreTest::testMemUsage", - &CStringStoreTest::testMemUsage) ); + suiteOfTests->addTest( + new CppUnit::TestCaller("CStringStoreTest::testStringStore", &CStringStoreTest::testStringStore)); + suiteOfTests->addTest(new CppUnit::TestCaller("CStringStoreTest::testMemUsage", &CStringStoreTest::testMemUsage)); return suiteOfTests; } diff --git a/lib/model/unittest/CStringStoreTest.h b/lib/model/unittest/CStringStoreTest.h index 3f834c591e..49740ccbd1 100644 --- a/lib/model/unittest/CStringStoreTest.h +++ b/lib/model/unittest/CStringStoreTest.h @@ -9,15 +9,14 @@ #include -class CStringStoreTest : public CppUnit::TestFixture -{ - public: - void setUp(); +class CStringStoreTest : public CppUnit::TestFixture { +public: + void setUp(); - void testStringStore(); - void testMemUsage(); + void testStringStore(); + void testMemUsage(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CStringStoreTest_h diff --git a/lib/model/unittest/CToolsTest.cc b/lib/model/unittest/CToolsTest.cc index 31cc2184f2..6fa4094edf 100644 --- a/lib/model/unittest/CToolsTest.cc +++ b/lib/model/unittest/CToolsTest.cc @@ -16,13 +16,11 @@ using namespace ml; using namespace model; -void CToolsTest::testDataGatherers() -{ +void CToolsTest::testDataGatherers() { // TODO } -void CToolsTest::testProbabilityAggregator() -{ +void CToolsTest::testProbabilityAggregator() { LOG_DEBUG("****** CToolsTest::testProbabilityAggregator ******"); // Test a variety of min aggregations. @@ -40,10 +38,9 @@ void CToolsTest::testProbabilityAggregator() maths::CJointProbabilityOfLessLikelySamples expected; - double p[] = { 0.01, 0.2, 0.001, 0.3, 0.456, 0.1 }; + double p[] = {0.01, 0.2, 0.001, 0.3, 0.456, 0.1}; - for (std::size_t i = 0u; i < boost::size(p); ++i) - { + for (std::size_t i = 0u; i < boost::size(p); ++i) { actual.add(p[0]); expected.add(p[0]); CPPUNIT_ASSERT(!actual.empty()); @@ -69,10 +66,9 @@ void CToolsTest::testProbabilityAggregator() maths::CProbabilityOfExtremeSample expected; - double p[] = { 0.01, 0.2, 0.001, 0.3, 0.456, 0.1 }; + double p[] = {0.01, 0.2, 0.001, 0.3, 0.456, 0.1}; - for (std::size_t i = 0u; i < boost::size(p); ++i) - { + for (std::size_t i = 0u; i < boost::size(p); ++i) { actual.add(p[0]); expected.add(p[0]); CPPUNIT_ASSERT(!actual.empty()); @@ -100,10 +96,9 @@ void CToolsTest::testProbabilityAggregator() maths::CJointProbabilityOfLessLikelySamples joint; maths::CProbabilityOfExtremeSample extreme; - double p[] = { 0.01, 0.2, 0.001, 0.3, 0.456, 0.1 }; + double p[] = {0.01, 0.2, 0.001, 0.3, 0.456, 0.1}; - for (std::size_t i = 0u; i < boost::size(p); ++i) - { + for (std::size_t i = 0u; i < boost::size(p); ++i) { actual.add(p[0]); joint.add(p[0]); extreme.add(p[0]); @@ -133,10 +128,9 @@ void CToolsTest::testProbabilityAggregator() maths::CJointProbabilityOfLessLikelySamples joint; maths::CProbabilityOfExtremeSample extreme; - double p[] = { 0.01, 0.2, 0.001, 0.3, 0.456, 0.1 }; + double p[] = {0.01, 0.2, 0.001, 0.3, 0.456, 0.1}; - for (std::size_t i = 0u; i < boost::size(p); ++i) - { + for (std::size_t i = 0u; i < boost::size(p); ++i) { actual.add(p[0]); joint.add(p[0]); extreme.add(p[0]); @@ -153,13 +147,11 @@ void CToolsTest::testProbabilityAggregator() } } -CppUnit::Test *CToolsTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CToolsTest"); +CppUnit::Test* CToolsTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CToolsTest"); - suiteOfTests->addTest( new CppUnit::TestCaller( - "CToolsTest::testProbabilityAggregator", - &CToolsTest::testProbabilityAggregator) ); + suiteOfTests->addTest( + new CppUnit::TestCaller("CToolsTest::testProbabilityAggregator", &CToolsTest::testProbabilityAggregator)); return suiteOfTests; } diff --git a/lib/model/unittest/CToolsTest.h b/lib/model/unittest/CToolsTest.h index 3577d976af..98b20b0dec 100644 --- a/lib/model/unittest/CToolsTest.h +++ b/lib/model/unittest/CToolsTest.h @@ -9,13 +9,12 @@ #include -class CToolsTest : public CppUnit::TestFixture -{ - public: - void testDataGatherers(); - void testProbabilityAggregator(); +class CToolsTest : public CppUnit::TestFixture { +public: + void testDataGatherers(); + void testProbabilityAggregator(); - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CToolsTest_h diff --git a/lib/model/unittest/Main.cc b/lib/model/unittest/Main.cc index cc6d8de2cb..8a7309143d 100644 --- a/lib/model/unittest/Main.cc +++ b/lib/model/unittest/Main.cc @@ -16,24 +16,24 @@ #include "CEventRateAnomalyDetectorTest.h" #include "CEventRateDataGathererTest.h" #include "CEventRateModelTest.h" +#include "CEventRatePopulationDataGathererTest.h" +#include "CEventRatePopulationModelTest.h" #include "CFunctionTypesTest.h" #include "CGathererToolsTest.h" -#include "CHierarchicalResultsTest.h" #include "CHierarchicalResultsLevelSetTest.h" +#include "CHierarchicalResultsTest.h" #include "CInterimBucketCorrectorTest.h" #include "CLimitsTest.h" #include "CMemoryUsageEstimatorTest.h" #include "CMetricAnomalyDetectorTest.h" #include "CMetricDataGathererTest.h" #include "CMetricModelTest.h" +#include "CMetricPopulationDataGathererTest.h" +#include "CMetricPopulationModelTest.h" #include "CModelDetailsViewTest.h" #include "CModelMemoryTest.h" #include "CModelToolsTest.h" #include "CModelTypesTest.h" -#include "CEventRatePopulationDataGathererTest.h" -#include "CEventRatePopulationModelTest.h" -#include "CMetricPopulationDataGathererTest.h" -#include "CMetricPopulationModelTest.h" #include "CProbabilityAndInfluenceCalculatorTest.h" #include "CResourceLimitTest.h" #include "CResourceMonitorTest.h" @@ -42,45 +42,44 @@ #include "CStringStoreTest.h" #include "CToolsTest.h" -int main(int argc, const char **argv) -{ +int main(int argc, const char** argv) { ml::test::CTestRunner runner(argc, argv); - runner.addTest( CAnnotatedProbabilityBuilderTest::suite() ); - runner.addTest( CAnomalyDetectorModelConfigTest::suite() ); - runner.addTest( CAnomalyScoreTest::suite() ); - runner.addTest( CBucketQueueTest::suite() ); - runner.addTest( CCountingModelTest::suite() ); - runner.addTest( CDetectionRuleTest::suite() ); - runner.addTest( CDetectorEqualizerTest::suite() ); - runner.addTest( CDynamicStringIdRegistryTest::suite() ); - runner.addTest( CEventRateAnomalyDetectorTest::suite() ); - runner.addTest( CEventRateDataGathererTest::suite() ); - runner.addTest( CEventRateModelTest::suite() ); - runner.addTest( CEventRatePopulationDataGathererTest::suite() ); - runner.addTest( CEventRatePopulationModelTest::suite() ); - runner.addTest( CFunctionTypesTest::suite() ); - runner.addTest( CGathererToolsTest::suite() ); - runner.addTest( CHierarchicalResultsTest::suite() ); - runner.addTest( CHierarchicalResultsLevelSetTest::suite() ); - runner.addTest( CInterimBucketCorrectorTest::suite() ); - runner.addTest( CLimitsTest::suite() ); - runner.addTest( CMemoryUsageEstimatorTest::suite() ); - runner.addTest( CMetricAnomalyDetectorTest::suite() ); - runner.addTest( CMetricDataGathererTest::suite() ); - runner.addTest( CMetricModelTest::suite() ); - runner.addTest( CMetricPopulationDataGathererTest::suite() ); - runner.addTest( CMetricPopulationModelTest::suite() ); - runner.addTest( CModelDetailsViewTest::suite() ); - runner.addTest( CModelMemoryTest::suite() ); - runner.addTest( CModelToolsTest::suite() ); - runner.addTest( CModelTypesTest::suite() ); - runner.addTest( CProbabilityAndInfluenceCalculatorTest::suite() ); - runner.addTest( CResourceLimitTest::suite() ); - runner.addTest( CResourceMonitorTest::suite() ); - runner.addTest( CRuleConditionTest::suite() ); - runner.addTest( CSampleQueueTest::suite() ); - runner.addTest( CStringStoreTest::suite() ); - runner.addTest( CToolsTest::suite() ); + runner.addTest(CAnnotatedProbabilityBuilderTest::suite()); + runner.addTest(CAnomalyDetectorModelConfigTest::suite()); + runner.addTest(CAnomalyScoreTest::suite()); + runner.addTest(CBucketQueueTest::suite()); + runner.addTest(CCountingModelTest::suite()); + runner.addTest(CDetectionRuleTest::suite()); + runner.addTest(CDetectorEqualizerTest::suite()); + runner.addTest(CDynamicStringIdRegistryTest::suite()); + runner.addTest(CEventRateAnomalyDetectorTest::suite()); + runner.addTest(CEventRateDataGathererTest::suite()); + runner.addTest(CEventRateModelTest::suite()); + runner.addTest(CEventRatePopulationDataGathererTest::suite()); + runner.addTest(CEventRatePopulationModelTest::suite()); + runner.addTest(CFunctionTypesTest::suite()); + runner.addTest(CGathererToolsTest::suite()); + runner.addTest(CHierarchicalResultsTest::suite()); + runner.addTest(CHierarchicalResultsLevelSetTest::suite()); + runner.addTest(CInterimBucketCorrectorTest::suite()); + runner.addTest(CLimitsTest::suite()); + runner.addTest(CMemoryUsageEstimatorTest::suite()); + runner.addTest(CMetricAnomalyDetectorTest::suite()); + runner.addTest(CMetricDataGathererTest::suite()); + runner.addTest(CMetricModelTest::suite()); + runner.addTest(CMetricPopulationDataGathererTest::suite()); + runner.addTest(CMetricPopulationModelTest::suite()); + runner.addTest(CModelDetailsViewTest::suite()); + runner.addTest(CModelMemoryTest::suite()); + runner.addTest(CModelToolsTest::suite()); + runner.addTest(CModelTypesTest::suite()); + runner.addTest(CProbabilityAndInfluenceCalculatorTest::suite()); + runner.addTest(CResourceLimitTest::suite()); + runner.addTest(CResourceMonitorTest::suite()); + runner.addTest(CRuleConditionTest::suite()); + runner.addTest(CSampleQueueTest::suite()); + runner.addTest(CStringStoreTest::suite()); + runner.addTest(CToolsTest::suite()); return !runner.runTests(); } diff --git a/lib/model/unittest/Mocks.cc b/lib/model/unittest/Mocks.cc index f02e5bce0a..d4977bbc15 100644 --- a/lib/model/unittest/Mocks.cc +++ b/lib/model/unittest/Mocks.cc @@ -8,70 +8,54 @@ #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { -CMockModel::CMockModel(const SModelParams ¶ms, - const TDataGathererPtr &dataGatherer, - const TFeatureInfluenceCalculatorCPtrPrVecVec &influenceCalculators) : - CAnomalyDetectorModel(params, dataGatherer, influenceCalculators), - m_IsPopulation(false) -{} +CMockModel::CMockModel(const SModelParams& params, + const TDataGathererPtr& dataGatherer, + const TFeatureInfluenceCalculatorCPtrPrVecVec& influenceCalculators) + : CAnomalyDetectorModel(params, dataGatherer, influenceCalculators), m_IsPopulation(false) { +} -void CMockModel::acceptPersistInserter(core::CStatePersistInserter &/*inserter*/) const -{ +void CMockModel::acceptPersistInserter(core::CStatePersistInserter& /*inserter*/) const { } -bool CMockModel::acceptRestoreTraverser(core::CStateRestoreTraverser &/*traverser*/) -{ +bool CMockModel::acceptRestoreTraverser(core::CStateRestoreTraverser& /*traverser*/) { return false; } -CAnomalyDetectorModel *CMockModel::cloneForPersistence() const -{ +CAnomalyDetectorModel* CMockModel::cloneForPersistence() const { return 0; } -model_t::EModelType CMockModel::category() const -{ +model_t::EModelType CMockModel::category() const { return model_t::E_MetricOnline; } -bool CMockModel::isPopulation() const -{ +bool CMockModel::isPopulation() const { return m_IsPopulation; } -bool CMockModel::isEventRate() const -{ +bool CMockModel::isEventRate() const { return false; } -bool CMockModel::isMetric() const -{ +bool CMockModel::isMetric() const { return false; } -CMockModel::TOptionalUInt64 CMockModel::currentBucketCount(std::size_t /*pid*/, - core_t::TTime /*time*/) const -{ +CMockModel::TOptionalUInt64 CMockModel::currentBucketCount(std::size_t /*pid*/, core_t::TTime /*time*/) const { CAnomalyDetectorModel::TOptionalUInt64 count; return count; } -CMockModel::TOptionalDouble CMockModel::baselineBucketCount(std::size_t /*pid*/) const -{ +CMockModel::TOptionalDouble CMockModel::baselineBucketCount(std::size_t /*pid*/) const { CAnomalyDetectorModel::TOptionalDouble count; return count; } -CMockModel::TDouble1Vec CMockModel::currentBucketValue(model_t::EFeature feature, - std::size_t pid, - std::size_t cid, - core_t::TTime time) const -{ +CMockModel::TDouble1Vec +CMockModel::currentBucketValue(model_t::EFeature feature, std::size_t pid, std::size_t cid, core_t::TTime time) const { auto i = m_BucketValues.find({feature, core::make_triple(pid, cid, time)}); return i != m_BucketValues.end() ? i->second : TDouble1Vec(); } @@ -80,134 +64,102 @@ CMockModel::TDouble1Vec CMockModel::baselineBucketMean(model_t::EFeature feature std::size_t pid, std::size_t cid, model_t::CResultType /*type*/, - const TSizeDoublePr1Vec &/*correlated*/, - core_t::TTime time) const -{ + const TSizeDoublePr1Vec& /*correlated*/, + core_t::TTime time) const { auto i = m_BucketBaselineMeans.find({feature, core::make_triple(pid, cid, time)}); return i != m_BucketBaselineMeans.end() ? i->second : TDouble1Vec(); } -bool CMockModel::bucketStatsAvailable(core_t::TTime /*time*/) const -{ +bool CMockModel::bucketStatsAvailable(core_t::TTime /*time*/) const { return false; } -void CMockModel::currentBucketPersonIds(core_t::TTime /*time*/, TSizeVec &/*result*/) const -{ +void CMockModel::currentBucketPersonIds(core_t::TTime /*time*/, TSizeVec& /*result*/) const { } -void CMockModel::sampleBucketStatistics(core_t::TTime /*startTime*/, - core_t::TTime /*endTime*/, - CResourceMonitor &/*resourceMonitor*/) -{ +void CMockModel::sampleBucketStatistics(core_t::TTime /*startTime*/, core_t::TTime /*endTime*/, CResourceMonitor& /*resourceMonitor*/) { } -void CMockModel::sample(core_t::TTime /*startTime*/, - core_t::TTime /*endTime*/, - CResourceMonitor &/*resourceMonitor*/) -{ +void CMockModel::sample(core_t::TTime /*startTime*/, core_t::TTime /*endTime*/, CResourceMonitor& /*resourceMonitor*/) { } -void CMockModel::sampleOutOfPhase(core_t::TTime /*startTime*/, - core_t::TTime /*endTime*/, - CResourceMonitor &/*resourceMonitor*/) -{ +void CMockModel::sampleOutOfPhase(core_t::TTime /*startTime*/, core_t::TTime /*endTime*/, CResourceMonitor& /*resourceMonitor*/) { } -void CMockModel::prune(std::size_t /*maximumAge*/) -{ +void CMockModel::prune(std::size_t /*maximumAge*/) { } bool CMockModel::computeProbability(std::size_t /*pid*/, core_t::TTime /*startTime*/, core_t::TTime /*endTime*/, - CPartitioningFields &/*partitioningFields*/, + CPartitioningFields& /*partitioningFields*/, std::size_t /*numberAttributeProbabilities*/, - SAnnotatedProbability &/*result*/) const -{ + SAnnotatedProbability& /*result*/) const { return false; } -bool CMockModel::computeTotalProbability(const std::string &/*person*/, +bool CMockModel::computeTotalProbability(const std::string& /*person*/, std::size_t /*numberAttributeProbabilities*/, - TOptionalDouble &/*probability*/, - TAttributeProbability1Vec &/*attributeProbabilities*/) const -{ + TOptionalDouble& /*probability*/, + TAttributeProbability1Vec& /*attributeProbabilities*/) const { return false; } -uint64_t CMockModel::checksum(bool /*includeCurrentBucketStats*/) const -{ +uint64_t CMockModel::checksum(bool /*includeCurrentBucketStats*/) const { return 0; } -void CMockModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr /*mem*/) const -{ +void CMockModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr /*mem*/) const { } -std::size_t CMockModel::memoryUsage() const -{ +std::size_t CMockModel::memoryUsage() const { return 0; } -std::size_t CMockModel::computeMemoryUsage() const -{ +std::size_t CMockModel::computeMemoryUsage() const { return 0; } -std::size_t CMockModel::staticSize() const -{ +std::size_t CMockModel::staticSize() const { return 0; } -CMockModel::CModelDetailsViewPtr CMockModel::details() const -{ +CMockModel::CModelDetailsViewPtr CMockModel::details() const { CModelDetailsViewPtr result{new CMockModelDetailsView(*this)}; return result; } -double CMockModel::attributeFrequency(std::size_t /*cid*/) const -{ +double CMockModel::attributeFrequency(std::size_t /*cid*/) const { return 0.0; } -core_t::TTime CMockModel::currentBucketStartTime() const -{ +core_t::TTime CMockModel::currentBucketStartTime() const { return 0; } -void CMockModel::currentBucketStartTime(core_t::TTime /*time*/) -{ +void CMockModel::currentBucketStartTime(core_t::TTime /*time*/) { } -void CMockModel::createNewModels(std::size_t /*n*/, std::size_t /*m*/) -{ +void CMockModel::createNewModels(std::size_t /*n*/, std::size_t /*m*/) { } -void CMockModel::updateRecycledModels() -{ +void CMockModel::updateRecycledModels() { } -void CMockModel::clearPrunedResources(const TSizeVec &/*people*/, - const TSizeVec &/*attributes*/) -{ +void CMockModel::clearPrunedResources(const TSizeVec& /*people*/, const TSizeVec& /*attributes*/) { } -void CMockModel::currentBucketTotalCount(uint64_t /*totalCount*/) -{ +void CMockModel::currentBucketTotalCount(uint64_t /*totalCount*/) { } -void CMockModel::doSkipSampling(core_t::TTime /*startTime*/, core_t::TTime /*endTime*/) -{ +void CMockModel::doSkipSampling(core_t::TTime /*startTime*/, core_t::TTime /*endTime*/) { } -const maths::CModel *CMockModel::model(std::size_t id) const -{ +const maths::CModel* CMockModel::model(std::size_t id) const { return m_Models[id].get(); } -void CMockModel::mockPopulation(bool isPopulation) -{ +void CMockModel::mockPopulation(bool isPopulation) { m_IsPopulation = isPopulation; } @@ -215,8 +167,7 @@ void CMockModel::mockAddBucketValue(model_t::EFeature feature, std::size_t pid, std::size_t cid, core_t::TTime time, - const TDouble1Vec &value) -{ + const TDouble1Vec& value) { m_BucketValues[{feature, core::make_triple(pid, cid, time)}] = value; } @@ -224,42 +175,31 @@ void CMockModel::mockAddBucketBaselineMean(model_t::EFeature feature, std::size_t pid, std::size_t cid, core_t::TTime time, - const TDouble1Vec &value) -{ + const TDouble1Vec& value) { m_BucketBaselineMeans[{feature, core::make_triple(pid, cid, time)}] = value; } -void CMockModel::mockTimeSeriesModels(const TMathsModelPtrVec &models) -{ +void CMockModel::mockTimeSeriesModels(const TMathsModelPtrVec& models) { m_Models = models; } -CMemoryUsageEstimator *CMockModel::memoryUsageEstimator() const -{ +CMemoryUsageEstimator* CMockModel::memoryUsageEstimator() const { return 0; } -CMockModelDetailsView::CMockModelDetailsView(const CMockModel &model) : - m_Model{&model} -{} +CMockModelDetailsView::CMockModelDetailsView(const CMockModel& model) : m_Model{&model} { +} -const maths::CModel *CMockModelDetailsView::model(model_t::EFeature /*feature*/, - std::size_t byFieldId) const -{ +const maths::CModel* CMockModelDetailsView::model(model_t::EFeature /*feature*/, std::size_t byFieldId) const { return m_Model->model(byFieldId); } -const CAnomalyDetectorModel &CMockModelDetailsView::base() const -{ +const CAnomalyDetectorModel& CMockModelDetailsView::base() const { return *m_Model; } -double CMockModelDetailsView::countVarianceScale(model_t::EFeature /*feature*/, - std::size_t /*byFieldId*/, - core_t::TTime /*time*/) const -{ +double CMockModelDetailsView::countVarianceScale(model_t::EFeature /*feature*/, std::size_t /*byFieldId*/, core_t::TTime /*time*/) const { return 1.0; } - } } diff --git a/lib/model/unittest/Mocks.h b/lib/model/unittest/Mocks.h index a3aa96bc7b..540fea1fb8 100644 --- a/lib/model/unittest/Mocks.h +++ b/lib/model/unittest/Mocks.h @@ -16,160 +16,134 @@ #include -namespace ml -{ -namespace model -{ +namespace ml { +namespace model { //! \brief Mock a model and allow setting of bucket values //! and baselines. -class CMockModel : public CAnomalyDetectorModel -{ - public: - CMockModel(const SModelParams ¶ms, - const TDataGathererPtr &dataGatherer, - const TFeatureInfluenceCalculatorCPtrPrVecVec &influenceCalculators); +class CMockModel : public CAnomalyDetectorModel { +public: + CMockModel(const SModelParams& params, + const TDataGathererPtr& dataGatherer, + const TFeatureInfluenceCalculatorCPtrPrVecVec& influenceCalculators); - virtual void acceptPersistInserter(core::CStatePersistInserter &inserter) const; + virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const; - virtual bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser); + virtual bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); - virtual CAnomalyDetectorModel *cloneForPersistence() const; + virtual CAnomalyDetectorModel* cloneForPersistence() const; - virtual model_t::EModelType category() const; + virtual model_t::EModelType category() const; - virtual bool isPopulation() const; + virtual bool isPopulation() const; - virtual bool isEventRate() const; + virtual bool isEventRate() const; - virtual bool isMetric() const; + virtual bool isMetric() const; - virtual TOptionalUInt64 currentBucketCount(std::size_t pid, - core_t::TTime time) const; + virtual TOptionalUInt64 currentBucketCount(std::size_t pid, core_t::TTime time) const; - virtual TOptionalDouble baselineBucketCount(std::size_t pid) const; + virtual TOptionalDouble baselineBucketCount(std::size_t pid) const; - virtual TDouble1Vec currentBucketValue(model_t::EFeature feature, - std::size_t pid, - std::size_t cid, - core_t::TTime time) const; + virtual TDouble1Vec currentBucketValue(model_t::EFeature feature, std::size_t pid, std::size_t cid, core_t::TTime time) const; - virtual TDouble1Vec baselineBucketMean(model_t::EFeature feature, - std::size_t pid, - std::size_t cid, - model_t::CResultType type, - const TSizeDoublePr1Vec &correlated, - core_t::TTime time) const; + virtual TDouble1Vec baselineBucketMean(model_t::EFeature feature, + std::size_t pid, + std::size_t cid, + model_t::CResultType type, + const TSizeDoublePr1Vec& correlated, + core_t::TTime time) const; - virtual bool bucketStatsAvailable(core_t::TTime time) const; + virtual bool bucketStatsAvailable(core_t::TTime time) const; - virtual void currentBucketPersonIds(core_t::TTime time, TSizeVec &result) const; + virtual void currentBucketPersonIds(core_t::TTime time, TSizeVec& result) const; - virtual void sampleBucketStatistics(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor &resourceMonitor); + virtual void sampleBucketStatistics(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor); - virtual void sample(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor &resourceMonitor); + virtual void sample(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor); - virtual void sampleOutOfPhase(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor &resourceMonitor); + virtual void sampleOutOfPhase(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor); - virtual void prune(std::size_t maximumAge); + virtual void prune(std::size_t maximumAge); - virtual bool computeProbability(std::size_t pid, - core_t::TTime startTime, - core_t::TTime endTime, - CPartitioningFields &partitioningFields, - std::size_t numberAttributeProbabilities, - SAnnotatedProbability &result) const; + virtual bool computeProbability(std::size_t pid, + core_t::TTime startTime, + core_t::TTime endTime, + CPartitioningFields& partitioningFields, + std::size_t numberAttributeProbabilities, + SAnnotatedProbability& result) const; - virtual bool computeTotalProbability(const std::string &person, - std::size_t numberAttributeProbabilities, - TOptionalDouble &probability, - TAttributeProbability1Vec &attributeProbabilities) const; + virtual bool computeTotalProbability(const std::string& person, + std::size_t numberAttributeProbabilities, + TOptionalDouble& probability, + TAttributeProbability1Vec& attributeProbabilities) const; - virtual uint64_t checksum(bool includeCurrentBucketStats = true) const; + virtual uint64_t checksum(bool includeCurrentBucketStats = true) const; - virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; + virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; - virtual std::size_t memoryUsage() const; + virtual std::size_t memoryUsage() const; - virtual std::size_t computeMemoryUsage() const; + virtual std::size_t computeMemoryUsage() const; - virtual std::size_t staticSize() const; + virtual std::size_t staticSize() const; - virtual CModelDetailsViewPtr details() const; + virtual CModelDetailsViewPtr details() const; - virtual double attributeFrequency(std::size_t cid) const; + virtual double attributeFrequency(std::size_t cid) const; - const maths::CModel *model(std::size_t id) const; + const maths::CModel* model(std::size_t id) const; - // Setter methods to allow mocking + // Setter methods to allow mocking - void mockPopulation(bool isPopulation); + void mockPopulation(bool isPopulation); - void mockAddBucketValue(model_t::EFeature feature, - std::size_t pid, - std::size_t cid, - core_t::TTime time, - const TDouble1Vec &value); + void mockAddBucketValue(model_t::EFeature feature, std::size_t pid, std::size_t cid, core_t::TTime time, const TDouble1Vec& value); - void mockAddBucketBaselineMean(model_t::EFeature feature, - std::size_t pid, - std::size_t cid, - core_t::TTime time, - const TDouble1Vec &value); + void + mockAddBucketBaselineMean(model_t::EFeature feature, std::size_t pid, std::size_t cid, core_t::TTime time, const TDouble1Vec& value); - void mockTimeSeriesModels(const TMathsModelPtrVec &model); + void mockTimeSeriesModels(const TMathsModelPtrVec& model); - protected: - virtual core_t::TTime currentBucketStartTime() const; - virtual void currentBucketStartTime(core_t::TTime time); - virtual void createNewModels(std::size_t n, std::size_t m); - virtual void updateRecycledModels(); - virtual void clearPrunedResources(const TSizeVec &people, - const TSizeVec &attributes); +protected: + virtual core_t::TTime currentBucketStartTime() const; + virtual void currentBucketStartTime(core_t::TTime time); + virtual void createNewModels(std::size_t n, std::size_t m); + virtual void updateRecycledModels(); + virtual void clearPrunedResources(const TSizeVec& people, const TSizeVec& attributes); - private: - using TDouble1Vec = CAnomalyDetectorModel::TDouble1Vec; - using TSizeSizeTimeTriple = core::CTriple; - using TFeatureSizeSizeTimeTriplePr = std::pair; - using TFeatureSizeSizeTimeTriplePrDouble1VecUMap = boost::unordered_map; +private: + using TDouble1Vec = CAnomalyDetectorModel::TDouble1Vec; + using TSizeSizeTimeTriple = core::CTriple; + using TFeatureSizeSizeTimeTriplePr = std::pair; + using TFeatureSizeSizeTimeTriplePrDouble1VecUMap = boost::unordered_map; - private: - virtual void currentBucketTotalCount(uint64_t totalCount); - virtual void doSkipSampling(core_t::TTime startTime, core_t::TTime endTime); - virtual CMemoryUsageEstimator *memoryUsageEstimator() const; +private: + virtual void currentBucketTotalCount(uint64_t totalCount); + virtual void doSkipSampling(core_t::TTime startTime, core_t::TTime endTime); + virtual CMemoryUsageEstimator* memoryUsageEstimator() const; - private: - bool m_IsPopulation; - TFeatureSizeSizeTimeTriplePrDouble1VecUMap m_BucketValues; - TFeatureSizeSizeTimeTriplePrDouble1VecUMap m_BucketBaselineMeans; - TMathsModelPtrVec m_Models; +private: + bool m_IsPopulation; + TFeatureSizeSizeTimeTriplePrDouble1VecUMap m_BucketValues; + TFeatureSizeSizeTimeTriplePrDouble1VecUMap m_BucketBaselineMeans; + TMathsModelPtrVec m_Models; }; //! \brief A details view for a mock model. -class CMockModelDetailsView : public CModelDetailsView -{ - public: - CMockModelDetailsView(const CMockModel &model); - - private: - virtual const maths::CModel *model(model_t::EFeature feature, - std::size_t byFieldId) const; - virtual const CAnomalyDetectorModel &base() const; - virtual double countVarianceScale(model_t::EFeature feature, - std::size_t byFieldId, - core_t::TTime time) const; - - private: - //! The model. - const CMockModel *m_Model; +class CMockModelDetailsView : public CModelDetailsView { +public: + CMockModelDetailsView(const CMockModel& model); + +private: + virtual const maths::CModel* model(model_t::EFeature feature, std::size_t byFieldId) const; + virtual const CAnomalyDetectorModel& base() const; + virtual double countVarianceScale(model_t::EFeature feature, std::size_t byFieldId, core_t::TTime time) const; + +private: + //! The model. + const CMockModel* m_Model; }; - } } diff --git a/lib/test/CMultiFileDataAdder.cc b/lib/test/CMultiFileDataAdder.cc index 894f0fbd31..73f96349fc 100644 --- a/lib/test/CMultiFileDataAdder.cc +++ b/lib/test/CMultiFileDataAdder.cc @@ -14,31 +14,21 @@ #include #include - -namespace ml -{ -namespace test -{ - +namespace ml { +namespace test { const std::string CMultiFileDataAdder::JSON_FILE_EXT = ".json"; - -CMultiFileDataAdder::CMultiFileDataAdder(std::string baseFilename, - std::string fileExtension) -{ +CMultiFileDataAdder::CMultiFileDataAdder(std::string baseFilename, std::string fileExtension) { m_BaseFilename.swap(baseFilename); m_FileExtension.swap(fileExtension); } -CMultiFileDataAdder::TOStreamP CMultiFileDataAdder::addStreamed(const std::string &index, - const std::string &id) -{ - const std::string &filename = this->makeFilename(index, id); +CMultiFileDataAdder::TOStreamP CMultiFileDataAdder::addStreamed(const std::string& index, const std::string& id) { + const std::string& filename = this->makeFilename(index, id); TOStreamP strm(boost::make_shared(filename.c_str())); - if (!strm->good()) - { + if (!strm->good()) { LOG_ERROR("Failed to create new output stream for file " << filename); strm.reset(); } @@ -46,12 +36,9 @@ CMultiFileDataAdder::TOStreamP CMultiFileDataAdder::addStreamed(const std::strin return strm; } -bool CMultiFileDataAdder::streamComplete(TOStreamP &strm, - bool /*force*/) -{ - std::ofstream *ofs(dynamic_cast(strm.get())); - if (ofs == 0) - { +bool CMultiFileDataAdder::streamComplete(TOStreamP& strm, bool /*force*/) { + std::ofstream* ofs(dynamic_cast(strm.get())); + if (ofs == 0) { return false; } @@ -60,30 +47,21 @@ bool CMultiFileDataAdder::streamComplete(TOStreamP &strm, return !ofs->bad(); } -std::string CMultiFileDataAdder::makeFilename(const std::string &index, - const std::string &id) const -{ +std::string CMultiFileDataAdder::makeFilename(const std::string& index, const std::string& id) const { // NB: The logic in here must mirror that of CMultiFileSearcher::search() std::string filename(m_BaseFilename); - if (!index.empty()) - { + if (!index.empty()) { filename += "/_"; filename += index; } - try - { + try { // Prior existence of the directory is not considered an error by // boost::filesystem, and this is what we want boost::filesystem::path directoryPath(filename); boost::filesystem::create_directories(directoryPath); - } - catch (std::exception &e) - { - LOG_ERROR("Failed to create directory " << filename << - " - " << e.what()); - } + } catch (std::exception& e) { LOG_ERROR("Failed to create directory " << filename << " - " << e.what()); } filename += '/'; filename += id; @@ -91,8 +69,5 @@ std::string CMultiFileDataAdder::makeFilename(const std::string &index, return filename; } - - } } - diff --git a/lib/test/CMultiFileSearcher.cc b/lib/test/CMultiFileSearcher.cc index 20e661ce28..1d4d7621f0 100644 --- a/lib/test/CMultiFileSearcher.cc +++ b/lib/test/CMultiFileSearcher.cc @@ -13,28 +13,17 @@ #include #include - -namespace ml -{ -namespace test -{ +namespace ml { +namespace test { const std::string CMultiFileSearcher::JSON_FILE_EXT(".json"); - -CMultiFileSearcher::CMultiFileSearcher(std::string baseFilename, - std::string baseDocId, - std::string fileExtension) - : m_BaseFilename(std::move(baseFilename)), - m_BaseDocId(std::move(baseDocId)), - m_FileExtension(std::move(fileExtension)) -{ +CMultiFileSearcher::CMultiFileSearcher(std::string baseFilename, std::string baseDocId, std::string fileExtension) + : m_BaseFilename(std::move(baseFilename)), m_BaseDocId(std::move(baseDocId)), m_FileExtension(std::move(fileExtension)) { } -CMultiFileSearcher::TIStreamP CMultiFileSearcher::search(size_t currentDocNum, size_t limit) -{ - if (limit != 1) - { +CMultiFileSearcher::TIStreamP CMultiFileSearcher::search(size_t currentDocNum, size_t limit) { + if (limit != 1) { LOG_ERROR("File searcher can only operate with a limit of 1"); return TIStreamP(); } @@ -42,16 +31,14 @@ CMultiFileSearcher::TIStreamP CMultiFileSearcher::search(size_t currentDocNum, s // NB: The logic in here must mirror that of CMultiFileDataAdder::makeFilename() std::string filename(m_BaseFilename); - if (!m_SearchTerms[0].empty()) - { + if (!m_SearchTerms[0].empty()) { filename += '/'; if (m_SearchTerms[0].front() == '.') { filename += '_'; } filename += m_SearchTerms[0]; } - if (!m_SearchTerms[1].empty()) - { + if (!m_SearchTerms[1].empty()) { filename += '/'; filename += m_SearchTerms[1]; } @@ -64,7 +51,5 @@ CMultiFileSearcher::TIStreamP CMultiFileSearcher::search(size_t currentDocNum, s // in the "good" state. return boost::make_shared(filename.c_str()); } - } } - diff --git a/lib/test/CRandomNumbers.cc b/lib/test/CRandomNumbers.cc index d4d8522da2..53b7afd201 100644 --- a/lib/test/CRandomNumbers.cc +++ b/lib/test/CRandomNumbers.cc @@ -11,7 +11,6 @@ #include -#include #include #include #include @@ -19,37 +18,29 @@ #include #include #include +#include #include -namespace ml -{ -namespace test -{ +namespace ml { +namespace test { -void CRandomNumbers::generateNormalSamples(double mean, - double variance, - std::size_t numberSamples, - TDoubleVec &samples) -{ +void CRandomNumbers::generateNormalSamples(double mean, double variance, std::size_t numberSamples, TDoubleVec& samples) { boost::random::normal_distribution<> normal(mean, std::sqrt(variance)); generateSamples(m_Generator, normal, numberSamples, samples); } -void CRandomNumbers::generateMultivariateNormalSamples(const TDoubleVec &mean, - const TDoubleVecVec &covariances_, +void CRandomNumbers::generateMultivariateNormalSamples(const TDoubleVec& mean, + const TDoubleVecVec& covariances_, std::size_t numberSamples, - TDoubleVecVec &samples) -{ + TDoubleVecVec& samples) { samples.clear(); std::size_t d = covariances_.size(); Eigen::MatrixXd covariances(d, d); - for (std::size_t i = 0u; i < d; ++i) - { - for (std::size_t j = 0u; j < d; ++j) - { + for (std::size_t i = 0u; i < d; ++i) { + for (std::size_t j = 0u; j < d; ++j) { covariances(i, j) = covariances_[i][j]; } } @@ -59,88 +50,60 @@ void CRandomNumbers::generateMultivariateNormalSamples(const TDoubleVec &mean, std::size_t r = static_cast(svd.rank()); TDoubleVecVec residuals(r); - for (std::size_t i = 0u; i < r; ++i) - { + for (std::size_t i = 0u; i < r; ++i) { this->generateNormalSamples(0.0, svd.singularValues()(i), numberSamples, residuals[i]); } Eigen::VectorXd ri(d); TDoubleVec xi(d, 0.0); - for (std::size_t i = 0u; i < numberSamples; ++i) - { - for (std::size_t j = 0u; j < r; ++j) - { + for (std::size_t i = 0u; i < numberSamples; ++i) { + for (std::size_t j = 0u; j < r; ++j) { ri(j) = j < r ? residuals[j][i] : 0.0; } ri = svd.matrixU() * ri; - for (std::size_t j = 0u; j < r; ++j) - { + for (std::size_t j = 0u; j < r; ++j) { xi[j] = mean[j] + ri(j); } samples.push_back(xi); } } -void CRandomNumbers::generatePoissonSamples(double rate, - std::size_t numberSamples, - TUIntVec &samples) -{ +void CRandomNumbers::generatePoissonSamples(double rate, std::size_t numberSamples, TUIntVec& samples) { boost::random::poisson_distribution<> poisson(rate); generateSamples(m_Generator, poisson, numberSamples, samples); } -void CRandomNumbers::generateStudentsSamples(double degreesFreedom, - std::size_t numberSamples, - TDoubleVec &samples) -{ +void CRandomNumbers::generateStudentsSamples(double degreesFreedom, std::size_t numberSamples, TDoubleVec& samples) { boost::random::student_t_distribution<> students(degreesFreedom); generateSamples(m_Generator, students, numberSamples, samples); } -void CRandomNumbers::generateLogNormalSamples(double location, - double squareScale, - std::size_t numberSamples, - TDoubleVec &samples) -{ +void CRandomNumbers::generateLogNormalSamples(double location, double squareScale, std::size_t numberSamples, TDoubleVec& samples) { boost::random::lognormal_distribution<> logNormal(location, std::sqrt(squareScale)); generateSamples(m_Generator, logNormal, numberSamples, samples); } -void CRandomNumbers::generateUniformSamples(double a, - double b, - std::size_t numberSamples, - TDoubleVec &samples) -{ +void CRandomNumbers::generateUniformSamples(double a, double b, std::size_t numberSamples, TDoubleVec& samples) { boost::random::uniform_real_distribution<> uniform(a, b); generateSamples(m_Generator, uniform, numberSamples, samples); } -void CRandomNumbers::generateUniformSamples(std::size_t a, - std::size_t b, - std::size_t numberSamples, - TSizeVec &samples) -{ +void CRandomNumbers::generateUniformSamples(std::size_t a, std::size_t b, std::size_t numberSamples, TSizeVec& samples) { boost::random::uniform_int_distribution uniform(a, b - 1); generateSamples(m_Generator, uniform, numberSamples, samples); } -void CRandomNumbers::generateGammaSamples(double shape, - double scale, - std::size_t numberSamples, - TDoubleVec &samples) -{ +void CRandomNumbers::generateGammaSamples(double shape, double scale, std::size_t numberSamples, TDoubleVec& samples) { boost::random::gamma_distribution<> gamma(shape, scale); generateSamples(m_Generator, gamma, numberSamples, samples); } -void CRandomNumbers::generateMultinomialSamples(const TDoubleVec &categories, - const TDoubleVec &probabilities, +void CRandomNumbers::generateMultinomialSamples(const TDoubleVec& categories, + const TDoubleVec& probabilities, std::size_t numberSamples, - TDoubleVec &samples) -{ - if (categories.size() != probabilities.size()) - { + TDoubleVec& samples) { + if (categories.size() != probabilities.size()) { LOG_ERROR("categories and probabilities must be one-to-one."); } @@ -153,108 +116,72 @@ void CRandomNumbers::generateMultinomialSamples(const TDoubleVec &categories, // Construct the transform function. TDoubleVec transform; transform.reserve(probabilities.size()); - std::partial_sum(probabilities.begin(), - probabilities.end(), - std::back_inserter(transform)); + std::partial_sum(probabilities.begin(), probabilities.end(), std::back_inserter(transform)); // Map the samples to categories. - for (std::size_t i = 0u; i < samples.size(); ++i) - { - std::size_t j = std::lower_bound(transform.begin(), - transform.end(), - samples[i]) - transform.begin(); - if (j == transform.size()) - { - LOG_ERROR("Expected sample " << samples[i] - << " to be less than largest value in " - << core::CContainerPrinter::print(transform)); + for (std::size_t i = 0u; i < samples.size(); ++i) { + std::size_t j = std::lower_bound(transform.begin(), transform.end(), samples[i]) - transform.begin(); + if (j == transform.size()) { + LOG_ERROR("Expected sample " << samples[i] << " to be less than largest value in " + << core::CContainerPrinter::print(transform)); j = transform.size() - 1; } samples[i] = categories[j]; } } -void CRandomNumbers::generateDirichletSamples(const TDoubleVec &concentrations, - std::size_t numberSamples, - TDoubleVecVec &samples) -{ +void CRandomNumbers::generateDirichletSamples(const TDoubleVec& concentrations, std::size_t numberSamples, TDoubleVecVec& samples) { samples.resize(numberSamples); - for (std::size_t i = 0; i < concentrations.size(); ++i) - { + for (std::size_t i = 0; i < concentrations.size(); ++i) { TDoubleVec raw; generateGammaSamples(concentrations[i], 1.0, numberSamples, raw); - for (std::size_t j = 0u; j < numberSamples; ++j) - { + for (std::size_t j = 0u; j < numberSamples; ++j) { samples[j].reserve(concentrations.size()); samples[j].push_back(raw[j]); } } - for (std::size_t i = 0u; i < samples.size(); ++i) - { + for (std::size_t i = 0u; i < samples.size(); ++i) { double normalizer = 0.0; - for (std::size_t j = 0u; j < concentrations.size(); ++j) - { + for (std::size_t j = 0u; j < concentrations.size(); ++j) { normalizer += samples[i][j]; } - for (std::size_t j = 0u; j < samples[i].size(); ++j) - { + for (std::size_t j = 0u; j < samples[i].size(); ++j) { samples[i][j] /= normalizer; } } } -void CRandomNumbers::generateWords(std::size_t length, - std::size_t numberSamples, - TStrVec &samples) -{ - const char characterSet[] = - { - 'a', 'b', 'c', 'd', 'e', - 'f', 'g', 'h', 'i', 'j', - 'k', 'l', 'm', 'n', 'o', - 'p', 'q', 'r', 's', 't', - 'u', 'v', 'x', 'y', 'z', - '-', '_', ' ', '1', '2', - '3', '4', '5', '6', '7', - '8', '9', '0' - }; - - boost::random::uniform_int_distribution - uniform(0u, boost::size(characterSet) - 1); +void CRandomNumbers::generateWords(std::size_t length, std::size_t numberSamples, TStrVec& samples) { + const char characterSet[] = {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', + 't', 'u', 'v', 'x', 'y', 'z', '-', '_', ' ', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0'}; + + boost::random::uniform_int_distribution uniform(0u, boost::size(characterSet) - 1); samples.resize(numberSamples); - for (std::size_t i = 0u; i < numberSamples; ++i) - { - std::string &word = samples[i]; + for (std::size_t i = 0u; i < numberSamples; ++i) { + std::string& word = samples[i]; word.resize(length); - for (std::size_t j = 0u; j < length; ++j) - { + for (std::size_t j = 0u; j < length; ++j) { word[j] = characterSet[uniform(m_Generator)]; } } } -CRandomNumbers::CUniform0nGenerator CRandomNumbers::uniformGenerator() -{ +CRandomNumbers::CUniform0nGenerator CRandomNumbers::uniformGenerator() { return CUniform0nGenerator(m_Generator); } -void CRandomNumbers::discard(std::size_t n) -{ +void CRandomNumbers::discard(std::size_t n) { m_Generator.discard(n); } -CRandomNumbers::CUniform0nGenerator::CUniform0nGenerator(const TGenerator &generator) : - m_Generator(new TGenerator(generator)) -{ +CRandomNumbers::CUniform0nGenerator::CUniform0nGenerator(const TGenerator& generator) : m_Generator(new TGenerator(generator)) { } -std::size_t CRandomNumbers::CUniform0nGenerator::operator()(std::size_t n) const -{ +std::size_t CRandomNumbers::CUniform0nGenerator::operator()(std::size_t n) const { boost::random::uniform_int_distribution uniform(0, n - 1); return uniform(*m_Generator); } - } } diff --git a/lib/test/CShellCmdEscape.cc b/lib/test/CShellCmdEscape.cc index 6de9c7696c..efaa42f2e8 100644 --- a/lib/test/CShellCmdEscape.cc +++ b/lib/test/CShellCmdEscape.cc @@ -7,15 +7,10 @@ #include +namespace ml { +namespace test { -namespace ml -{ -namespace test -{ - - -void CShellCmdEscape::escapeCmd(std::string &cmd) -{ +void CShellCmdEscape::escapeCmd(std::string& cmd) { // Special characters are \ * ? < > # & | ( ) ' " ` ; // Escape character is \ on Unix @@ -34,8 +29,5 @@ void CShellCmdEscape::escapeCmd(std::string &cmd) core::CStringUtils::replace("`", "\\`", cmd); core::CStringUtils::replace(";", "\\;", cmd); } - - } } - diff --git a/lib/test/CShellCmdEscape_Windows.cc b/lib/test/CShellCmdEscape_Windows.cc index e5b42611ff..c8b28d6144 100644 --- a/lib/test/CShellCmdEscape_Windows.cc +++ b/lib/test/CShellCmdEscape_Windows.cc @@ -7,15 +7,10 @@ #include +namespace ml { +namespace test { -namespace ml -{ -namespace test -{ - - -void CShellCmdEscape::escapeCmd(std::string &cmd) -{ +void CShellCmdEscape::escapeCmd(std::string& cmd) { // Special characters are ^ < > & | // Escape character is ^ on Windows @@ -25,8 +20,5 @@ void CShellCmdEscape::escapeCmd(std::string &cmd) core::CStringUtils::replace("&", "^&", cmd); core::CStringUtils::replace("|", "^|", cmd); } - - } } - diff --git a/lib/test/CTestRunner.cc b/lib/test/CTestRunner.cc index dd0c32b405..6471b52bc4 100644 --- a/lib/test/CTestRunner.cc +++ b/lib/test/CTestRunner.cc @@ -28,9 +28,7 @@ #include #include - -namespace -{ +namespace { // Since the unit tests log to STDERR, we can improve performance, especially on // Windows, by buffering it. The file scope variable ensures the setvbuf() gets // called to set the buffer size during the static initialisation phase of the @@ -48,43 +46,31 @@ const std::string BIN_DIR("bin"); const std::string UNKNOWN_DIR("unknown"); } - -namespace ml -{ -namespace test -{ - +namespace ml { +namespace test { // Initialise statics const std::string CTestRunner::SKIP_FILE_NAME("unit_test_skip_dirs.csv"); const std::string CTestRunner::XML_RESULT_FILE_NAME("cppunit_results.xml"); - -CTestRunner::CTestRunner(int argc, const char **argv) -{ +CTestRunner::CTestRunner(int argc, const char** argv) { this->processCmdLine(argc, argv); } -CTestRunner::~CTestRunner() -{ +CTestRunner::~CTestRunner() { } -void CTestRunner::processCmdLine(int argc, const char **argv) -{ +void CTestRunner::processCmdLine(int argc, const char** argv) { std::string exeName(argv[0]); size_t pos(exeName.rfind('/')); - if (pos != std::string::npos) - { + if (pos != std::string::npos) { m_ExeName.assign(exeName, pos + 1, exeName.length() - pos - 1); - } - else - { + } else { m_ExeName = exeName; } - if (argc > 1) - { + if (argc > 1) { static const std::string SRC_EXT(".cc"); static const std::string HDR_EXT(".h"); m_TestCases.reserve(argc - 1); @@ -92,79 +78,52 @@ void CTestRunner::processCmdLine(int argc, const char **argv) size_t numHdrStrips(0); int lastSrcIndex(0); int lastHdrIndex(0); - for (int i = 1; i < argc; ++i) - { + for (int i = 1; i < argc; ++i) { m_TestCases.push_back(argv[i]); - std::string &testName = m_TestCases.back(); - if (testName.length() > SRC_EXT.length() && - testName.rfind(SRC_EXT) == testName.length() - SRC_EXT.length()) - { + std::string& testName = m_TestCases.back(); + if (testName.length() > SRC_EXT.length() && testName.rfind(SRC_EXT) == testName.length() - SRC_EXT.length()) { testName.erase(testName.length() - SRC_EXT.length()); ++numSrcStrips; lastSrcIndex = i; - } - else if (testName.length() > HDR_EXT.length() && - testName.rfind(HDR_EXT) == testName.length() - HDR_EXT.length()) - { + } else if (testName.length() > HDR_EXT.length() && testName.rfind(HDR_EXT) == testName.length() - HDR_EXT.length()) { testName.erase(testName.length() - HDR_EXT.length()); ++numHdrStrips; lastHdrIndex = i; } } - if (numSrcStrips == 1) - { - LOG_INFO("Source file extension " << SRC_EXT << - " stripped from supplied test name " << - argv[lastSrcIndex]); - } - else if (numSrcStrips > 0) - { - LOG_INFO("Source file extension " << SRC_EXT << " stripped from " << - numSrcStrips << " supplied test names"); - } - if (numHdrStrips == 1) - { - LOG_INFO("Header file extension " << HDR_EXT << - " stripped from supplied test name " << - argv[lastHdrIndex]); + if (numSrcStrips == 1) { + LOG_INFO("Source file extension " << SRC_EXT << " stripped from supplied test name " << argv[lastSrcIndex]); + } else if (numSrcStrips > 0) { + LOG_INFO("Source file extension " << SRC_EXT << " stripped from " << numSrcStrips << " supplied test names"); } - else if (numHdrStrips > 0) - { - LOG_INFO("Header file extension " << HDR_EXT << " stripped from " << - numHdrStrips << " supplied test names"); + if (numHdrStrips == 1) { + LOG_INFO("Header file extension " << HDR_EXT << " stripped from supplied test name " << argv[lastHdrIndex]); + } else if (numHdrStrips > 0) { + LOG_INFO("Header file extension " << HDR_EXT << " stripped from " << numHdrStrips << " supplied test names"); } std::sort(m_TestCases.begin(), m_TestCases.end()); size_t numDuplicates(m_TestCases.size()); - m_TestCases.erase(std::unique(m_TestCases.begin(), m_TestCases.end()), - m_TestCases.end()); + m_TestCases.erase(std::unique(m_TestCases.begin(), m_TestCases.end()), m_TestCases.end()); numDuplicates -= m_TestCases.size(); - if (numDuplicates > 0) - { + if (numDuplicates > 0) { LOG_WARN(numDuplicates << " of the supplied test names were " - "duplicates - each test case will only be run once"); + "duplicates - each test case will only be run once"); } } } -bool CTestRunner::runTests() -{ +bool CTestRunner::runTests() { boost::filesystem::path cwd; - try - { + try { cwd = boost::filesystem::current_path(); - } - catch (std::exception &e) - { + } catch (std::exception& e) { LOG_ERROR("Unable to determine current directory: " << e.what()); return false; } bool passed(false); - if (this->checkSkipFile(cwd.string(), passed) == true) - { - LOG_WARN("Skipping tests for directory " << cwd << - " and using previous test result " << - std::boolalpha << passed); + if (this->checkSkipFile(cwd.string(), passed) == true) { + LOG_WARN("Skipping tests for directory " << cwd << " and using previous test result " << std::boolalpha << passed); return passed; } @@ -175,19 +134,14 @@ bool CTestRunner::runTests() std::string topPath(UNKNOWN_DIR); std::string testPath(UNKNOWN_DIR); boost::filesystem::path::iterator iter = cwd.end(); - if (--iter != cwd.begin() && - --iter != cwd.begin()) - { + if (--iter != cwd.begin() && --iter != cwd.begin()) { testPath = iter->string(); - while (--iter != cwd.begin()) - { - if (iter->string() == LIB_DIR) - { + while (--iter != cwd.begin()) { + if (iter->string() == LIB_DIR) { topPath = LIB_DIR; break; } - if (iter->string() == BIN_DIR) - { + if (iter->string() == BIN_DIR) { topPath = BIN_DIR; break; } @@ -196,61 +150,40 @@ bool CTestRunner::runTests() passed = this->timeTests(topPath, testPath); - if (this->updateSkipFile(cwd.string(), passed) == true) - { - LOG_INFO("Added directory " << cwd << - " to skip file with result " << - std::boolalpha << passed); + if (this->updateSkipFile(cwd.string(), passed) == true) { + LOG_INFO("Added directory " << cwd << " to skip file with result " << std::boolalpha << passed); } return passed; } -bool CTestRunner::timeTests(const std::string &topPath, - const std::string &testPath) -{ +bool CTestRunner::timeTests(const std::string& topPath, const std::string& testPath) { bool allPassed(true); CTestTimer testTimer; CppUnit::TestResultCollector resultCollector; // m_eventManager is a protected member in the base class - if (m_eventManager != 0) - { + if (m_eventManager != 0) { m_eventManager->addListener(&testTimer); m_eventManager->addListener(&resultCollector); - } - else - { + } else { LOG_ERROR("Unexpected NULL pointer"); } - if (m_TestCases.empty()) - { + if (m_TestCases.empty()) { allPassed = this->run(); - } - else - { - for (TStrVecItr itr = m_TestCases.begin(); - itr != m_TestCases.end() && allPassed; - ++itr) - { - try - { + } else { + for (TStrVecItr itr = m_TestCases.begin(); itr != m_TestCases.end() && allPassed; ++itr) { + try { allPassed = this->run(*itr); - } - catch (std::invalid_argument &) - { - LOG_ERROR("No Test called " << *itr << " in testsuite"); - } + } catch (std::invalid_argument&) { LOG_ERROR("No Test called " << *itr << " in testsuite"); } } } - if (m_eventManager != 0) - { + if (m_eventManager != 0) { std::ofstream xmlResultFile(XML_RESULT_FILE_NAME.c_str()); - if (xmlResultFile.is_open()) - { + if (xmlResultFile.is_open()) { CppUnit::XmlOutputter xmlOutputter(&resultCollector, xmlResultFile); CTimingXmlOutputterHook hook(testTimer, topPath, testPath); xmlOutputter.addHook(&hook); @@ -265,21 +198,15 @@ bool CTestRunner::timeTests(const std::string &topPath, return allPassed; } -bool CTestRunner::checkSkipFile(const std::string &cwd, - bool &passed) const -{ +bool CTestRunner::checkSkipFile(const std::string& cwd, bool& passed) const { std::string fullPath(core::CResourceLocator::cppRootDir() + '/' + SKIP_FILE_NAME); std::ifstream strm(fullPath.c_str()); std::string line; - while (std::getline(strm, line)) - { + while (std::getline(strm, line)) { size_t commaPos(line.rfind(',')); - if (commaPos != std::string::npos && - line.compare(0, commaPos, cwd) == 0 && - core::CStringUtils::stringToType(line.substr(commaPos + 1), - passed) == true) - { + if (commaPos != std::string::npos && line.compare(0, commaPos, cwd) == 0 && + core::CStringUtils::stringToType(line.substr(commaPos + 1), passed) == true) { return true; } } @@ -287,18 +214,13 @@ bool CTestRunner::checkSkipFile(const std::string &cwd, return false; } -bool CTestRunner::updateSkipFile(const std::string &cwd, - bool passed) const -{ +bool CTestRunner::updateSkipFile(const std::string& cwd, bool passed) const { std::string fullPath(core::CResourceLocator::cppRootDir() + '/' + SKIP_FILE_NAME); // Don't create the file if it doesn't already exist, and don't write to it // if it's not writable - if (core::COsFileFuncs::access(fullPath.c_str(), - core::COsFileFuncs::READABLE | core::COsFileFuncs::WRITABLE) == -1) - { - LOG_TRACE("Will not update skip file " << fullPath << - " : " << ::strerror(errno)); + if (core::COsFileFuncs::access(fullPath.c_str(), core::COsFileFuncs::READABLE | core::COsFileFuncs::WRITABLE) == -1) { + LOG_TRACE("Will not update skip file " << fullPath << " : " << ::strerror(errno)); return false; } @@ -308,8 +230,5 @@ bool CTestRunner::updateSkipFile(const std::string &cwd, return strm.good(); } - - } } - diff --git a/lib/test/CTestTimer.cc b/lib/test/CTestTimer.cc index 8b7c5f34fa..c9173d1ac8 100644 --- a/lib/test/CTestTimer.cc +++ b/lib/test/CTestTimer.cc @@ -9,40 +9,30 @@ #include +namespace ml { +namespace test { -namespace ml -{ -namespace test -{ - - -void CTestTimer::startTest(CppUnit::Test * /* test */) -{ +void CTestTimer::startTest(CppUnit::Test* /* test */) { m_StopWatch.reset(true); } -void CTestTimer::endTest(CppUnit::Test *test) -{ - if (test == 0) - { +void CTestTimer::endTest(CppUnit::Test* test) { + if (test == 0) { LOG_ERROR("Unexpected NULL pointer"); return; } uint64_t duration(m_StopWatch.stop()); - const std::string &testName = test->getName(); + const std::string& testName = test->getName(); m_TestTimes[testName] = duration; - LOG_INFO("Unit test timing - " << testName << - " took " << duration << "ms"); + LOG_INFO("Unit test timing - " << testName << " took " << duration << "ms"); } -uint64_t CTestTimer::timeForTest(const std::string &testName) const -{ +uint64_t CTestTimer::timeForTest(const std::string& testName) const { TStrUInt64MapCItr iter = m_TestTimes.find(testName); - if (iter == m_TestTimes.end()) - { + if (iter == m_TestTimes.end()) { LOG_WARN("No timing for test named " << testName); return 0; } @@ -50,31 +40,22 @@ uint64_t CTestTimer::timeForTest(const std::string &testName) const return iter->second; } -uint64_t CTestTimer::totalTime() const -{ +uint64_t CTestTimer::totalTime() const { uint64_t result(0); - for (TStrUInt64MapCItr iter = m_TestTimes.begin(); - iter != m_TestTimes.end(); - ++iter) - { + for (TStrUInt64MapCItr iter = m_TestTimes.begin(); iter != m_TestTimes.end(); ++iter) { result += iter->second; } return result; } -uint64_t CTestTimer::averageTime() const -{ - if (m_TestTimes.empty()) - { +uint64_t CTestTimer::averageTime() const { + if (m_TestTimes.empty()) { return 0; } return this->totalTime() / m_TestTimes.size(); } - - } } - diff --git a/lib/test/CTestTmpDir.cc b/lib/test/CTestTmpDir.cc index 77eb37b88c..784864b7b6 100644 --- a/lib/test/CTestTmpDir.cc +++ b/lib/test/CTestTmpDir.cc @@ -15,30 +15,20 @@ #include #include +namespace ml { +namespace test { -namespace ml -{ -namespace test -{ - - -std::string CTestTmpDir::tmpDir() -{ +std::string CTestTmpDir::tmpDir() { // Try to create a user-specific sub-directory of the temporary directory so // that multiple users sharing the same server don't clash. However, if // this fails for any reason drop back to just raw /tmp. struct passwd pwd; ::memset(&pwd, 0, sizeof(pwd)); static const size_t BUFSIZE(16384); - char buffer[BUFSIZE] = { '\0' }; - struct passwd *result(0); - ::getpwuid_r(::getuid(), - &pwd, - buffer, - BUFSIZE, - &result); - if (result == 0 || result->pw_name == 0) - { + char buffer[BUFSIZE] = {'\0'}; + struct passwd* result(0); + ::getpwuid_r(::getuid(), &pwd, buffer, BUFSIZE, &result); + if (result == 0 || result->pw_name == 0) { LOG_ERROR("Could not get current user name: " << ::strerror(errno)); return "/tmp"; } @@ -46,24 +36,17 @@ std::string CTestTmpDir::tmpDir() std::string userSubdir("/tmp/"); userSubdir += result->pw_name; - try - { + try { // Prior existence of the directory is not considered an error by // boost::filesystem, and this is what we want boost::filesystem::path directoryPath(userSubdir); boost::filesystem::create_directories(directoryPath); - } - catch (std::exception &e) - { - LOG_ERROR("Failed to create directory " << userSubdir << - " - " << e.what()); + } catch (std::exception& e) { + LOG_ERROR("Failed to create directory " << userSubdir << " - " << e.what()); return "/tmp"; } return userSubdir; } - - } } - diff --git a/lib/test/CTestTmpDir_Windows.cc b/lib/test/CTestTmpDir_Windows.cc index 96a4d876e4..4ea8756c02 100644 --- a/lib/test/CTestTmpDir_Windows.cc +++ b/lib/test/CTestTmpDir_Windows.cc @@ -9,26 +9,17 @@ #include +namespace ml { +namespace test { -namespace ml -{ -namespace test -{ - - -std::string CTestTmpDir::tmpDir() -{ - const char *temp(::getenv("TEMP")); - if (temp == 0) - { +std::string CTestTmpDir::tmpDir() { + const char* temp(::getenv("TEMP")); + if (temp == 0) { LOG_ERROR("%TEMP% environment variable not set"); return "."; } return temp; } - - } } - diff --git a/lib/test/CTimeSeriesTestData.cc b/lib/test/CTimeSeriesTestData.cc index ba57bf9163..3fa253a26b 100644 --- a/lib/test/CTimeSeriesTestData.cc +++ b/lib/test/CTimeSeriesTestData.cc @@ -15,85 +15,73 @@ #include -namespace ml -{ -namespace test -{ +namespace ml { +namespace test { const std::string CTimeSeriesTestData::DEFAULT_REGEX("\\s*(\\d+|\\d+\\.\\d+)\\s+([-]*\\d+|\\d+\\.\\d+|\\d+\\.\\d+e-?\\d+)\\s*"); -const std::string CTimeSeriesTestData::DEFAULT_BIVALUED_REGEX("\\s*(\\d+|\\d+\\.\\d+)\\s+([-]*\\d+|\\d+\\.\\d+|\\d+\\.\\d+e-?\\d+)\\s+([-]*\\d+|\\d+\\.\\d+|\\d+\\.\\d+e-?\\d+)\\s*"); +const std::string CTimeSeriesTestData::DEFAULT_BIVALUED_REGEX( + "\\s*(\\d+|\\d+\\.\\d+)\\s+([-]*\\d+|\\d+\\.\\d+|\\d+\\.\\d+e-?\\d+)\\s+([-]*\\d+|\\d+\\.\\d+|\\d+\\.\\d+e-?\\d+)\\s*"); const std::string CTimeSeriesTestData::DEFAULT_DATE_FORMAT(""); const std::string CTimeSeriesTestData::CSV_UNIX_REGEX("^(\\d+),([-]*[\\d\\.]+)"); const std::string CTimeSeriesTestData::CSV_UNIX_BIVALUED_REGEX("^(\\d+),([-]*[\\d\\.]+),([-]*[\\d\\.]+)"); const std::string CTimeSeriesTestData::CSV_UNIX_DATE_FORMAT(""); const std::string CTimeSeriesTestData::CSV_ISO8601_REGEX("^\"(\\d+-\\d+-\\d+T\\d+:\\d+:\\d+)\\..*\",[\"]*([-]*[\\d\\.]+)[\"]*"); -const std::string CTimeSeriesTestData::CSV_ISO8601_BIVALUED_REGEX("^\"(\\d+-\\d+-\\d+T\\d+:\\d+:\\d+)\\..*\",[\"]*([-]*[\\d\\.]+)[\"]*, [\"]*([-]*[\\d\\.]+)[\"]*"); +const std::string CTimeSeriesTestData::CSV_ISO8601_BIVALUED_REGEX( + "^\"(\\d+-\\d+-\\d+T\\d+:\\d+:\\d+)\\..*\",[\"]*([-]*[\\d\\.]+)[\"]*, [\"]*([-]*[\\d\\.]+)[\"]*"); const std::string CTimeSeriesTestData::CSV_ISO8601_DATE_FORMAT("%Y-%m-%dT%H:%M:%S"); -bool CTimeSeriesTestData::parse(const std::string &fileName, - TTimeDoublePrVec &results, - const std::string ®ex, - const std::string &dateFormat) -{ +bool CTimeSeriesTestData::parse(const std::string& fileName, + TTimeDoublePrVec& results, + const std::string& regex, + const std::string& dateFormat) { core_t::TTime unused(0); return CTimeSeriesTestData::parse(fileName, results, unused, unused, regex, dateFormat); } -bool CTimeSeriesTestData::parse(const std::string &fileName, - TTimeDoublePrVec &results, - core_t::TTime &minTime, - core_t::TTime &maxTime, - const std::string ®ex, - const std::string &dateFormat) -{ +bool CTimeSeriesTestData::parse(const std::string& fileName, + TTimeDoublePrVec& results, + core_t::TTime& minTime, + core_t::TTime& maxTime, + const std::string& regex, + const std::string& dateFormat) { return CTimeSeriesTestData::parse(fileName, regex, dateFormat, results, minTime, maxTime); } -bool CTimeSeriesTestData::parse(const std::string &fileName, - TTimeDoubleVecPrVec &results, - const std::string ®ex, - const std::string &dateFormat) -{ +bool CTimeSeriesTestData::parse(const std::string& fileName, + TTimeDoubleVecPrVec& results, + const std::string& regex, + const std::string& dateFormat) { core_t::TTime unused(0); return CTimeSeriesTestData::parse(fileName, results, unused, unused, regex, dateFormat); } -bool CTimeSeriesTestData::parse(const std::string &fileName, - TTimeDoubleVecPrVec &results, - core_t::TTime &minTime, - core_t::TTime &maxTime, - const std::string ®ex, - const std::string &dateFormat) -{ +bool CTimeSeriesTestData::parse(const std::string& fileName, + TTimeDoubleVecPrVec& results, + core_t::TTime& minTime, + core_t::TTime& maxTime, + const std::string& regex, + const std::string& dateFormat) { return CTimeSeriesTestData::parse(fileName, regex, dateFormat, results, minTime, maxTime); } -bool CTimeSeriesTestData::parseCounter(const std::string &fileName, TTimeDoublePrVec &results) -{ - if (CTimeSeriesTestData::parse(fileName, results) == false) - { +bool CTimeSeriesTestData::parseCounter(const std::string& fileName, TTimeDoublePrVec& results) { + if (CTimeSeriesTestData::parse(fileName, results) == false) { return false; } double last(0); - bool started(false); - for (auto &result : results) - { + bool started(false); + for (auto& result : results) { double value = result.second; - if (started == false) - { + if (started == false) { result.second = 0; started = true; - } - else - { + } else { result.second = value - last; - if (result.second < 0) - { - LOG_WARN("Negative value " << value << "<" << last - << "@" << result.first << " setting counter to 0 "); + if (result.second < 0) { + LOG_WARN("Negative value " << value << "<" << last << "@" << result.first << " setting counter to 0 "); result.second = 0; } } @@ -103,38 +91,30 @@ bool CTimeSeriesTestData::parseCounter(const std::string &fileName, TTimeDoubleP return true; } -void CTimeSeriesTestData::transform(const TTimeDoublePrVec &data, TDoubleVec &results) -{ +void CTimeSeriesTestData::transform(const TTimeDoublePrVec& data, TDoubleVec& results) { results.clear(); results.reserve(data.size()); - for (const auto &datum : data) - { + for (const auto& datum : data) { results.push_back(datum.second); } } -void CTimeSeriesTestData::derive(const TTimeDoublePrVec &data, TTimeDoublePrVec &results) -{ +void CTimeSeriesTestData::derive(const TTimeDoublePrVec& data, TTimeDoublePrVec& results) { results.clear(); - if (data.size() <= 1) - { + if (data.size() <= 1) { return; } results.reserve(data.size() - 1); - bool hasStarted(false); + bool hasStarted(false); double lastValue(0.0); - for (const auto &datum : data) - { - if (hasStarted) - { + for (const auto& datum : data) { + if (hasStarted) { double v = datum.second - lastValue; results.emplace_back(datum.first, v); - } - else - { + } else { hasStarted = true; } @@ -142,15 +122,10 @@ void CTimeSeriesTestData::derive(const TTimeDoublePrVec &data, TTimeDoublePrVec } } -bool CTimeSeriesTestData::pad(const TTimeDoublePrVec &data, - core_t::TTime minTime, - core_t::TTime maxTime, - TTimeDoublePrVec &results) -{ +bool CTimeSeriesTestData::pad(const TTimeDoublePrVec& data, core_t::TTime minTime, core_t::TTime maxTime, TTimeDoublePrVec& results) { results.clear(); - if (minTime > maxTime) - { + if (minTime > maxTime) { LOG_ERROR("Invalid bounds " << minTime << ">" << maxTime); return false; } @@ -162,24 +137,18 @@ bool CTimeSeriesTestData::pad(const TTimeDoublePrVec &data, TTimeDoubleMap dataMap; - for (const auto &datum : data) - { - if (dataMap.insert({datum.first, datum.second}).second == false) - { + for (const auto& datum : data) { + if (dataMap.insert({datum.first, datum.second}).second == false) { LOG_ERROR("Duplicate values " << datum.first); return false; } } - for (core_t::TTime t = minTime; t <= maxTime; ++t) - { + for (core_t::TTime t = minTime; t <= maxTime; ++t) { auto itr = dataMap.find(t); - if (itr == dataMap.end()) - { + if (itr == dataMap.end()) { results.emplace_back(t, 0); - } - else - { + } else { results.emplace_back(t, itr->second); } } @@ -187,59 +156,49 @@ bool CTimeSeriesTestData::pad(const TTimeDoublePrVec &data, return true; } -namespace -{ +namespace { -void add(double value, double &target) -{ +void add(double value, double& target) { target = value; } -void add(double value, std::vector &target) -{ +void add(double value, std::vector& target) { target.push_back(value); } - } template -bool CTimeSeriesTestData::parse(const std::string &fileName, - const std::string ®ex, - const std::string &dateFormat, - std::vector> &results, - core_t::TTime &minTime, - core_t::TTime &maxTime) -{ +bool CTimeSeriesTestData::parse(const std::string& fileName, + const std::string& regex, + const std::string& dateFormat, + std::vector>& results, + core_t::TTime& minTime, + core_t::TTime& maxTime) { // reset data results.clear(); - std::string tokenRegexString(regex); + std::string tokenRegexString(regex); core::CRegex tokenRegex; - if (tokenRegex.init(tokenRegexString) == false) - { + if (tokenRegex.init(tokenRegexString) == false) { LOG_ERROR("Regex error"); return false; } std::ifstream inputStrm(fileName); - if (inputStrm.is_open() == false) - { + if (inputStrm.is_open() == false) { LOG_ERROR("Unable to read file " << fileName); return false; } std::string line; - while (std::getline(inputStrm, line)) - { - if (parseLine(tokenRegex, dateFormat, line, results) == false) - { + while (std::getline(inputStrm, line)) { + if (parseLine(tokenRegex, dateFormat, line, results) == false) { return false; } } // Must have more than 1 value - if (results.empty()) - { + if (results.empty()) { LOG_ERROR("Zero values in file " << fileName); return false; } @@ -252,46 +211,36 @@ bool CTimeSeriesTestData::parse(const std::string &fileName, } template -bool CTimeSeriesTestData::parseLine(const core::CRegex &tokenRegex, - const std::string &dateFormat, - const std::string &line, - std::vector> &results) -{ - if (line.empty() || - line.find_first_not_of(core::CStringUtils::WHITESPACE_CHARS) == std::string::npos) - { +bool CTimeSeriesTestData::parseLine(const core::CRegex& tokenRegex, + const std::string& dateFormat, + const std::string& line, + std::vector>& results) { + if (line.empty() || line.find_first_not_of(core::CStringUtils::WHITESPACE_CHARS) == std::string::npos) { LOG_DEBUG("Ignoring blank line"); return true; } core::CRegex::TStrVec tokens; - if (tokenRegex.tokenise(line, tokens) == false) - { + if (tokenRegex.tokenise(line, tokens) == false) { LOG_ERROR("Regex error '" << tokenRegex.str() << "' '" << line << "'"); return false; } core_t::TTime time(0); - if (dateFormat.empty()) - { - if (core::CStringUtils::stringToType(tokens[0], time) == false) - { + if (dateFormat.empty()) { + if (core::CStringUtils::stringToType(tokens[0], time) == false) { LOG_ERROR("Invalid test data '" << line << "'"); return false; } - } - else if (core::CTimeUtils::strptime(dateFormat, tokens[0], time) == false) - { + } else if (core::CTimeUtils::strptime(dateFormat, tokens[0], time) == false) { LOG_ERROR("Invalid test data '" << line << "'"); return false; } results.emplace_back(time, T()); - for (std::size_t i = 1u; i < tokens.size(); ++i) - { + for (std::size_t i = 1u; i < tokens.size(); ++i) { double value(0.0); - if (core::CStringUtils::stringToType(tokens[i], value) == false) - { + if (core::CStringUtils::stringToType(tokens[i], value) == false) { LOG_ERROR("Invalid test data '" << line << "'"); return false; } @@ -300,7 +249,5 @@ bool CTimeSeriesTestData::parseLine(const core::CRegex &tokenRegex, return true; } - } } - diff --git a/lib/test/CTimingXmlOutputterHook.cc b/lib/test/CTimingXmlOutputterHook.cc index 09c2d480a0..1316fd00cd 100644 --- a/lib/test/CTimingXmlOutputterHook.cc +++ b/lib/test/CTimingXmlOutputterHook.cc @@ -12,108 +12,69 @@ #include #include +namespace ml { +namespace test { -namespace ml -{ -namespace test -{ - -namespace -{ +namespace { const std::string NAME_TAG("Name"); const std::string TEST_PATH_TAG("TestPath"); const std::string TIME_TAG("Time"); const std::string TOTAL_ELAPSED_TIME_TAG("TotalElapsedTime"); const std::string AVERAGE_TEST_CASE_TIME_TAG("AverageTestCaseTime"); - } - -CTimingXmlOutputterHook::CTimingXmlOutputterHook(const CTestTimer &testTimer, - const std::string &topPath, - const std::string &testPath) - : m_TestTimer(testTimer), - m_TopPath(topPath), - m_TestPath(testPath) -{ +CTimingXmlOutputterHook::CTimingXmlOutputterHook(const CTestTimer& testTimer, const std::string& topPath, const std::string& testPath) + : m_TestTimer(testTimer), m_TopPath(topPath), m_TestPath(testPath) { } -void CTimingXmlOutputterHook::failTestAdded(CppUnit::XmlDocument * /*document*/, - CppUnit::XmlElement *testElement, - CppUnit::Test *test, - CppUnit::TestFailure * /*failure*/) -{ - if (testElement == 0 || test == 0) - { +void CTimingXmlOutputterHook::failTestAdded(CppUnit::XmlDocument* /*document*/, + CppUnit::XmlElement* testElement, + CppUnit::Test* test, + CppUnit::TestFailure* /*failure*/) { + if (testElement == 0 || test == 0) { return; } - const std::string &testName = test->getName(); + const std::string& testName = test->getName(); - testElement->elementFor(NAME_TAG)->setContent( - m_TopPath + '.' + m_TestPath + '.' + testName); + testElement->elementFor(NAME_TAG)->setContent(m_TopPath + '.' + m_TestPath + '.' + testName); } -void CTimingXmlOutputterHook::successfulTestAdded(CppUnit::XmlDocument * /*document*/, - CppUnit::XmlElement *testElement, - CppUnit::Test *test) -{ - if (testElement == 0 || test == 0) - { +void CTimingXmlOutputterHook::successfulTestAdded(CppUnit::XmlDocument* /*document*/, + CppUnit::XmlElement* testElement, + CppUnit::Test* test) { + if (testElement == 0 || test == 0) { return; } - const std::string &testName = test->getName(); + const std::string& testName = test->getName(); - testElement->elementFor(NAME_TAG)->setContent( - m_TopPath + '.' + m_TestPath + '.' + testName); + testElement->elementFor(NAME_TAG)->setContent(m_TopPath + '.' + m_TestPath + '.' + testName); - testElement->addElement( - new CppUnit::XmlElement(TEST_PATH_TAG, - m_TestPath + '/' + testName) - ); - testElement->addElement( - new CppUnit::XmlElement(TIME_TAG, - this->toSecondsStr(m_TestTimer.timeForTest(testName))) - ); + testElement->addElement(new CppUnit::XmlElement(TEST_PATH_TAG, m_TestPath + '/' + testName)); + testElement->addElement(new CppUnit::XmlElement(TIME_TAG, this->toSecondsStr(m_TestTimer.timeForTest(testName)))); } -void CTimingXmlOutputterHook::statisticsAdded(CppUnit::XmlDocument * /*document*/, - CppUnit::XmlElement *statisticsElement) -{ - if (statisticsElement == 0) - { +void CTimingXmlOutputterHook::statisticsAdded(CppUnit::XmlDocument* /*document*/, CppUnit::XmlElement* statisticsElement) { + if (statisticsElement == 0) { return; } - statisticsElement->addElement( - new CppUnit::XmlElement(TOTAL_ELAPSED_TIME_TAG, - this->toSecondsStr(m_TestTimer.totalTime())) - ); - statisticsElement->addElement( - new CppUnit::XmlElement(AVERAGE_TEST_CASE_TIME_TAG, - this->toSecondsStr(m_TestTimer.averageTime())) - ); + statisticsElement->addElement(new CppUnit::XmlElement(TOTAL_ELAPSED_TIME_TAG, this->toSecondsStr(m_TestTimer.totalTime()))); + statisticsElement->addElement(new CppUnit::XmlElement(AVERAGE_TEST_CASE_TIME_TAG, this->toSecondsStr(m_TestTimer.averageTime()))); } -std::string CTimingXmlOutputterHook::toSecondsStr(uint64_t ms) -{ +std::string CTimingXmlOutputterHook::toSecondsStr(uint64_t ms) { std::string result(core::CStringUtils::typeToString(ms)); - if (result.length() < 4) - { + if (result.length() < 4) { result.insert(0, "0.000", 5 - result.length()); - } - else - { + } else { result.insert(result.length() - 3, 1, '.'); } return result; } - - } } - diff --git a/lib/ver/unittest/CBuildInfoTest.cc b/lib/ver/unittest/CBuildInfoTest.cc index 19fe80276b..4cdf660147 100644 --- a/lib/ver/unittest/CBuildInfoTest.cc +++ b/lib/ver/unittest/CBuildInfoTest.cc @@ -12,20 +12,15 @@ #include +CppUnit::Test* CBuildInfoTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CBuildInfoTest"); -CppUnit::Test *CBuildInfoTest::suite() -{ - CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("CBuildInfoTest"); - - suiteOfTests->addTest( new CppUnit::TestCaller( - "CBuildInfoTest::testFullInfo", - &CBuildInfoTest::testFullInfo) ); + suiteOfTests->addTest(new CppUnit::TestCaller("CBuildInfoTest::testFullInfo", &CBuildInfoTest::testFullInfo)); return suiteOfTests; } -void CBuildInfoTest::testFullInfo(void) -{ +void CBuildInfoTest::testFullInfo(void) { std::string fullInfo(ml::ver::CBuildInfo::fullInfo()); LOG_DEBUG(fullInfo); @@ -39,4 +34,3 @@ void CBuildInfoTest::testFullInfo(void) CPPUNIT_ASSERT(fullInfo.find("Elasticsearch BV") != std::string::npos); CPPUNIT_ASSERT(fullInfo.find(currentYear) != std::string::npos); } - diff --git a/lib/ver/unittest/CBuildInfoTest.h b/lib/ver/unittest/CBuildInfoTest.h index 1304d9c78a..68ca6dfbb3 100644 --- a/lib/ver/unittest/CBuildInfoTest.h +++ b/lib/ver/unittest/CBuildInfoTest.h @@ -8,14 +8,11 @@ #include +class CBuildInfoTest : public CppUnit::TestFixture { +public: + void testFullInfo(void); -class CBuildInfoTest : public CppUnit::TestFixture -{ - public: - void testFullInfo(void); - - static CppUnit::Test *suite(); + static CppUnit::Test* suite(); }; #endif // INCLUDED_CBuildInfoTest_h - diff --git a/lib/ver/unittest/Main.cc b/lib/ver/unittest/Main.cc index eb561bb709..5dfa3e84ab 100644 --- a/lib/ver/unittest/Main.cc +++ b/lib/ver/unittest/Main.cc @@ -7,13 +7,10 @@ #include "CBuildInfoTest.h" - -int main(int argc, const char **argv) -{ +int main(int argc, const char** argv) { ml::test::CTestRunner runner(argc, argv); - runner.addTest( CBuildInfoTest::suite() ); + runner.addTest(CBuildInfoTest::suite()); return !runner.runTests(); } - From d6167799ce5476469b0bb95282256b4a3754f440 Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Fri, 13 Apr 2018 22:39:04 +0100 Subject: [PATCH 16/29] Reformat --- .clang-format | 21 +- bin/autoconfig/CCmdLineParser.cc | 27 +- bin/autoconfig/Main.cc | 25 +- bin/autodetect/CCmdLineParser.cc | 103 +- bin/autodetect/Main.cc | 103 +- bin/categorize/CCmdLineParser.cc | 48 +- bin/categorize/Main.cc | 43 +- .../CBlockingCallCancellerThread.cc | 6 +- bin/controller/CBlockingCallCancellerThread.h | 3 +- bin/controller/CCmdLineParser.cc | 21 +- bin/controller/CCmdLineParser.h | 6 +- bin/controller/CCommandProcessor.cc | 6 +- bin/controller/Main.cc | 15 +- .../CBlockingCallCancellerThreadTest.cc | 12 +- .../unittest/CCommandProcessorTest.cc | 28 +- bin/normalize/CCmdLineParser.cc | 30 +- bin/normalize/Main.cc | 36 +- devbin/move_copy_swap/Main.cc | 6 +- devbin/unixtime_to_string/CCmdLineParser.cc | 18 +- devbin/unixtime_to_string/CCmdLineParser.h | 3 +- devbin/vfprog/CLooper.cc | 4 +- devbin/vfprog/CLooper.h | 9 +- devbin/vsbug/Main.cc | 6 +- include/api/CAnomalyJob.h | 46 +- include/api/CBaseTokenListDataTyper.h | 46 +- include/api/CCategoryExamplesCollector.h | 7 +- include/api/CCmdSkeleton.h | 5 +- include/api/CCsvOutputWriter.h | 8 +- include/api/CDataProcessor.h | 3 +- include/api/CDataTyper.h | 11 +- include/api/CDetectionRulesJsonParser.h | 21 +- include/api/CFieldConfig.h | 29 +- include/api/CFieldDataTyper.h | 24 +- include/api/CForecastRunner.h | 20 +- include/api/CHierarchicalResultsWriter.h | 23 +- include/api/CJsonOutputWriter.h | 39 +- include/api/CLineifiedJsonInputParser.h | 4 +- include/api/CLineifiedJsonOutputWriter.h | 7 +- include/api/CLineifiedXmlInputParser.h | 8 +- include/api/CLineifiedXmlOutputWriter.h | 3 +- include/api/CModelSizeStatsJsonWriter.h | 5 +- include/api/CNullOutput.h | 3 +- include/api/COutputChainer.h | 6 +- include/api/COutputHandler.h | 6 +- include/api/CResultNormalizer.h | 13 +- include/api/CStateRestoreStreamFilter.h | 3 +- include/api/CTokenListDataTyper.h | 38 +- include/api/CTokenListReverseSearchCreator.h | 16 +- .../api/CTokenListReverseSearchCreatorIntf.h | 16 +- include/config/CAutoconfigurer.h | 3 +- .../config/CAutoconfigurerDetectorPenalties.h | 6 +- include/config/CAutoconfigurerParams.h | 17 +- include/config/CDataCountStatistics.h | 37 +- include/config/CDataSemantics.h | 10 +- include/config/CDataSummaryStatistics.h | 19 +- include/config/CDetectorRecord.h | 6 +- include/config/CDetectorSpecification.h | 9 +- include/config/CFieldRolePenalty.h | 16 +- include/config/CFieldStatistics.h | 3 +- include/config/CLongTailPenalty.h | 6 +- include/config/CLowVariationPenalty.h | 27 +- include/config/CNotEnoughDataPenalty.h | 9 +- include/config/CPenalty.h | 4 +- include/config/CReportWriter.h | 15 +- include/config/CTooMuchDataPenalty.h | 9 +- include/config/ConfigTypes.h | 26 +- include/core/BoostMultiIndex.h | 2 +- include/core/CBase64Filter.h | 31 +- include/core/CBlockingMessageQueue.h | 15 +- include/core/CCompressOStream.h | 4 +- include/core/CCompressedDictionary.h | 23 +- include/core/CConcurrentQueue.h | 3 +- include/core/CContainerPrinter.h | 10 +- include/core/CDelimiter.h | 13 +- include/core/CDualThreadStreamBuf.h | 4 +- include/core/CFlatPrefixTree.h | 5 +- include/core/CFloatStorage.h | 17 +- include/core/CHashing.h | 40 +- include/core/CIEEE754.h | 3 +- include/core/CLogger.h | 4 +- include/core/CMaskIterator.h | 34 +- include/core/CMemory.h | 216 +- include/core/CMemoryUsage.h | 6 +- include/core/CMessageBuffer.h | 6 +- include/core/CMessageQueue.h | 16 +- include/core/CPersistUtils.h | 178 +- include/core/CPolymorphicStackObjectCPtr.h | 40 +- include/core/CRapidJsonConcurrentLineWriter.h | 3 +- include/core/CRapidJsonPoolAllocator.h | 7 +- include/core/CRapidJsonWriterBase.h | 80 +- include/core/CRapidXmlParser.h | 17 +- include/core/CScopedRapidJsonPoolAllocator.h | 5 +- include/core/CSmallVector.h | 19 +- include/core/CStateCompressor.h | 3 +- include/core/CStateMachine.h | 12 +- include/core/CStringSimilarityTester.h | 41 +- include/core/CStringUtils.h | 11 +- include/core/CThreadFarm.h | 39 +- include/core/CThreadFarmReceiver.h | 3 +- include/core/CTicker.h | 4 +- include/core/CTimeUtils.h | 8 +- include/core/CTriple.h | 13 +- include/core/CVectorRange.h | 25 +- include/core/CWordDictionary.h | 13 +- include/core/CWordExtractor.h | 4 +- include/core/CXmlNode.h | 6 +- include/core/CXmlNodeWithChildren.h | 4 +- include/core/CXmlNodeWithChildrenPool.h | 3 +- include/core/CXmlParser.h | 16 +- include/core/CXmlParserIntf.h | 18 +- include/core/LogMacros.h | 32 +- include/core/RestoreMacros.h | 72 +- include/maths/CAdaptiveBucketing.h | 8 +- include/maths/CAnnotatedVector.h | 6 +- include/maths/CBasicStatistics.h | 207 +- include/maths/CBasicStatisticsPersist.h | 10 +- include/maths/CBjkstUniqueValues.h | 3 +- include/maths/CBootstrapClusterer.h | 168 +- include/maths/CBoundingBox.h | 4 +- include/maths/CCalendarComponent.h | 4 +- .../CCalendarComponentAdaptiveBucketing.h | 11 +- include/maths/CCalendarFeature.h | 3 +- include/maths/CCategoricalTools.h | 12 +- include/maths/CChecksum.h | 49 +- include/maths/CClusterer.h | 14 +- include/maths/CClustererStateSerialiser.h | 20 +- include/maths/CCompositeFunctions.h | 15 +- include/maths/CConstantPrior.h | 34 +- include/maths/CCountMinSketch.h | 4 +- include/maths/CDecompositionComponent.h | 6 +- include/maths/CEqualWithTolerance.h | 23 +- include/maths/CExpandingWindow.h | 5 +- include/maths/CGammaRateConjugate.h | 37 +- include/maths/CGradientDescent.h | 7 +- include/maths/CGramSchmidt.h | 16 +- include/maths/CInformationCriteria.h | 58 +- include/maths/CIntegration.h | 41 +- include/maths/CKMeansFast.h | 55 +- include/maths/CKMeansOnline.h | 62 +- include/maths/CKMeansOnline1d.h | 6 +- include/maths/CKMostCorrelated.h | 8 +- include/maths/CKdTree.h | 51 +- include/maths/CLassoLogisticRegression.h | 47 +- include/maths/CLinearAlgebra.h | 185 +- include/maths/CLinearAlgebraEigen.h | 68 +- include/maths/CLinearAlgebraPersist.h | 18 +- include/maths/CLinearAlgebraTools.h | 274 ++- include/maths/CLogNormalMeanPrecConjugate.h | 46 +- include/maths/CMathsFuncs.h | 20 +- include/maths/CMixtureDistribution.h | 74 +- include/maths/CModel.h | 60 +- include/maths/CModelDetail.h | 4 +- include/maths/CModelStateSerialiser.h | 4 +- include/maths/CMultimodalPrior.h | 62 +- include/maths/CMultimodalPriorMode.h | 18 +- include/maths/CMultimodalPriorUtils.h | 244 ++- include/maths/CMultinomialConjugate.h | 43 +- include/maths/CMultivariateConstantPrior.h | 32 +- include/maths/CMultivariateMultimodalPrior.h | 239 ++- .../CMultivariateMultimodalPriorFactory.h | 6 +- include/maths/CMultivariateNormalConjugate.h | 301 ++- .../CMultivariateNormalConjugateFactory.h | 9 +- include/maths/CMultivariateOneOfNPrior.h | 38 +- .../maths/CMultivariateOneOfNPriorFactory.h | 11 +- include/maths/CMultivariatePrior.h | 37 +- include/maths/CNaiveBayes.h | 15 +- include/maths/CNaturalBreaksClassifier.h | 30 +- include/maths/CNormalMeanPrecConjugate.h | 51 +- include/maths/COneOfNPrior.h | 47 +- include/maths/COrderings.h | 287 +-- include/maths/COrdinal.h | 3 +- include/maths/CPRNG.h | 12 +- include/maths/CPackedBitVector.h | 3 +- include/maths/CPeriodicityHypothesisTests.h | 67 +- include/maths/CPoissonMeanConjugate.h | 43 +- include/maths/CPrior.h | 50 +- include/maths/CPriorDetail.h | 10 +- include/maths/CPriorStateSerialiser.h | 11 +- include/maths/CQDigest.h | 3 +- include/maths/CQuantileSketch.h | 12 +- include/maths/CRadialBasisFunction.h | 30 +- include/maths/CRandomProjectionClusterer.h | 68 +- include/maths/CRegression.h | 37 +- include/maths/CRegressionDetail.h | 40 +- include/maths/CSampling.h | 113 +- include/maths/CSeasonalComponent.h | 9 +- .../CSeasonalComponentAdaptiveBucketing.h | 23 +- include/maths/CSeasonalTime.h | 6 +- include/maths/CSetTools.h | 63 +- include/maths/CSolvers.h | 125 +- include/maths/CSphericalCluster.h | 53 +- include/maths/CSpline.h | 124 +- include/maths/CTimeSeriesChangeDetector.h | 72 +- include/maths/CTimeSeriesDecomposition.h | 23 +- .../maths/CTimeSeriesDecompositionDetail.h | 30 +- .../maths/CTimeSeriesDecompositionInterface.h | 17 +- .../CTimeSeriesDecompositionStateSerialiser.h | 3 +- include/maths/CTimeSeriesDecompositionStub.h | 11 +- include/maths/CTimeSeriesModel.h | 107 +- include/maths/CTools.h | 35 +- include/maths/CToolsDetail.h | 40 +- include/maths/CTrendComponent.h | 3 +- include/maths/CTrendTests.h | 3 +- include/maths/CTypeConversions.h | 6 +- include/maths/CXMeans.h | 31 +- include/maths/CXMeansOnline.h | 268 ++- include/maths/CXMeansOnline1d.h | 21 +- include/maths/CXMeansOnlineFactory.h | 44 +- include/maths/Constants.h | 6 +- include/maths/MathsTypes.h | 54 +- include/maths/ProbabilityAggregators.h | 30 +- include/model/CAnnotatedProbability.h | 12 +- include/model/CAnnotatedProbabilityBuilder.h | 4 +- include/model/CAnomalyDetector.h | 24 +- include/model/CAnomalyDetectorModel.h | 76 +- include/model/CAnomalyDetectorModelConfig.h | 60 +- include/model/CAnomalyScore.h | 10 +- include/model/CBucketGatherer.h | 40 +- include/model/CBucketQueue.h | 46 +- include/model/CCountingModel.h | 17 +- include/model/CCountingModelFactory.h | 12 +- include/model/CDataGatherer.h | 50 +- include/model/CDynamicStringIdRegistry.h | 5 +- include/model/CEventRateBucketGatherer.h | 48 +- include/model/CEventRateModel.h | 12 +- include/model/CEventRateModelFactory.h | 12 +- include/model/CEventRatePopulationModel.h | 18 +- .../model/CEventRatePopulationModelFactory.h | 12 +- include/model/CForecastDataSink.h | 9 +- include/model/CGathererTools.h | 37 +- include/model/CHierarchicalResults.h | 41 +- .../model/CHierarchicalResultsAggregator.h | 8 +- include/model/CHierarchicalResultsLevelSet.h | 62 +- .../model/CHierarchicalResultsNormalizer.h | 26 +- include/model/CIndividualModel.h | 30 +- include/model/CIndividualModelDetail.h | 47 +- include/model/CInterimBucketCorrector.h | 5 +- include/model/CLimits.h | 3 +- include/model/CMemoryUsageEstimator.h | 7 +- include/model/CMetricBucketGatherer.h | 14 +- include/model/CMetricModel.h | 12 +- include/model/CMetricModelFactory.h | 12 +- include/model/CMetricMultivariateStatistic.h | 18 +- include/model/CMetricPartialStatistic.h | 21 +- include/model/CMetricPopulationModel.h | 18 +- include/model/CMetricPopulationModelFactory.h | 12 +- include/model/CMetricStatisticWrappers.h | 88 +- include/model/CModelDetailsView.h | 33 +- include/model/CModelFactory.h | 58 +- include/model/CModelParams.h | 3 +- include/model/CModelPlotData.h | 3 +- include/model/CModelTools.h | 26 +- include/model/CPartitioningFields.h | 3 +- include/model/CPopulationModel.h | 42 +- include/model/CPopulationModelDetail.h | 28 +- .../CProbabilityAndInfluenceCalculator.h | 15 +- include/model/CResultsQueue.h | 4 +- include/model/CSampleGatherer.h | 132 +- include/model/CSampleQueue.h | 133 +- include/model/CSearchKey.h | 28 +- include/model/CStringStore.h | 8 +- include/model/ModelTypes.h | 247 ++- include/test/CMultiFileSearcher.h | 4 +- include/test/CRandomNumbers.h | 28 +- include/test/CRandomNumbersDetail.h | 18 +- include/test/CTimeSeriesTestData.h | 5 +- include/test/CTimingXmlOutputterHook.h | 17 +- lib/api/CAnomalyJob.cc | 495 +++-- lib/api/CBackgroundPersister.cc | 23 +- lib/api/CBaseTokenListDataTyper.cc | 98 +- lib/api/CBenchMarker.cc | 45 +- lib/api/CCategoryExamplesCollector.cc | 29 +- lib/api/CCmdSkeleton.cc | 6 +- lib/api/CConfigUpdater.cc | 6 +- lib/api/CCsvInputParser.cc | 40 +- lib/api/CCsvOutputWriter.cc | 37 +- lib/api/CDataProcessor.cc | 3 +- lib/api/CDataTyper.cc | 3 +- lib/api/CDetectionRulesJsonParser.cc | 42 +- lib/api/CFieldConfig.cc | 331 +-- lib/api/CFieldDataTyper.cc | 102 +- lib/api/CForecastRunner.cc | 151 +- lib/api/CHierarchicalResultsWriter.cc | 430 ++-- lib/api/CIoManager.cc | 20 +- lib/api/CJsonOutputWriter.cc | 247 ++- lib/api/CLengthEncodedInputParser.cc | 12 +- lib/api/CLineifiedInputParser.cc | 6 +- lib/api/CLineifiedJsonInputParser.cc | 33 +- lib/api/CLineifiedJsonOutputWriter.cc | 22 +- lib/api/CLineifiedXmlInputParser.cc | 20 +- lib/api/CLineifiedXmlOutputWriter.cc | 15 +- lib/api/CModelPlotDataJsonWriter.cc | 46 +- lib/api/CModelSnapshotJsonWriter.cc | 9 +- lib/api/CNullOutput.cc | 3 +- lib/api/COutputChainer.cc | 21 +- lib/api/COutputHandler.cc | 3 +- lib/api/CResultNormalizer.cc | 47 +- lib/api/CSingleStreamDataAdder.cc | 6 +- lib/api/CSingleStreamSearcher.cc | 6 +- lib/api/CStateRestoreStreamFilter.cc | 9 +- lib/api/CTokenListReverseSearchCreator.cc | 9 +- lib/api/CTokenListReverseSearchCreatorIntf.cc | 6 +- lib/api/CTokenListType.cc | 83 +- lib/api/dump_state/Main.cc | 65 +- lib/api/unittest/CAnomalyJobLimitTest.cc | 50 +- lib/api/unittest/CAnomalyJobTest.cc | 200 +- lib/api/unittest/CBackgroundPersisterTest.cc | 71 +- .../unittest/CBaseTokenListDataTyperTest.cc | 76 +- .../CCategoryExamplesCollectorTest.cc | 42 +- lib/api/unittest/CConfigUpdaterTest.cc | 94 +- lib/api/unittest/CCsvInputParserTest.cc | 62 +- lib/api/unittest/CCsvOutputWriterTest.cc | 43 +- .../unittest/CDetectionRulesJsonParserTest.cc | 96 +- lib/api/unittest/CFieldConfigTest.cc | 198 +- lib/api/unittest/CFieldConfigTest.h | 9 +- lib/api/unittest/CFieldDataTyperTest.cc | 54 +- lib/api/unittest/CForecastRunnerTest.cc | 193 +- lib/api/unittest/CIoManagerTest.cc | 32 +- lib/api/unittest/CJsonOutputWriterTest.cc | 1804 +++++------------ .../unittest/CLengthEncodedInputParserTest.cc | 29 +- .../unittest/CLineifiedJsonInputParserTest.cc | 13 +- .../CLineifiedJsonOutputWriterTest.cc | 16 +- .../unittest/CLineifiedXmlInputParserTest.cc | 20 +- lib/api/unittest/CMockDataAdder.cc | 3 +- lib/api/unittest/CMockDataProcessor.cc | 6 +- lib/api/unittest/CMockDataProcessor.h | 3 +- lib/api/unittest/CMockSearcher.cc | 3 +- .../unittest/CModelPlotDataJsonWriterTest.cc | 26 +- .../unittest/CModelSnapshotJsonWriterTest.cc | 34 +- lib/api/unittest/CMultiFileDataAdderTest.cc | 96 +- lib/api/unittest/COutputChainerTest.cc | 21 +- lib/api/unittest/CRestorePreviousStateTest.cc | 117 +- lib/api/unittest/CRestorePreviousStateTest.h | 6 +- lib/api/unittest/CResultNormalizerTest.cc | 157 +- .../unittest/CSingleStreamDataAdderTest.cc | 93 +- .../unittest/CStateRestoreStreamFilterTest.cc | 15 +- lib/api/unittest/CStringStoreTest.cc | 159 +- lib/api/unittest/CTokenListDataTyperTest.cc | 464 +++-- .../CTokenListReverseSearchCreatorTest.cc | 51 +- lib/config/CAutoconfigurer.cc | 70 +- .../CAutoconfigurerDetectorPenalties.cc | 35 +- .../CAutoconfigurerFieldRolePenalties.cc | 25 +- lib/config/CAutoconfigurerParams.cc | 469 +++-- lib/config/CDataCountStatistics.cc | 100 +- lib/config/CDataSemantics.cc | 57 +- lib/config/CDataSummaryStatistics.cc | 84 +- lib/config/CDetectorEnumerator.cc | 45 +- lib/config/CDetectorFieldRolePenalty.cc | 6 +- lib/config/CDetectorRecord.cc | 21 +- lib/config/CDetectorSpecification.cc | 120 +- lib/config/CFieldRolePenalty.cc | 38 +- lib/config/CFieldStatistics.cc | 28 +- lib/config/CLongTailPenalty.cc | 46 +- lib/config/CLowInformationContentPenalty.cc | 50 +- lib/config/CLowVariationPenalty.cc | 134 +- lib/config/CNotEnoughDataPenalty.cc | 68 +- lib/config/CPenalty.cc | 16 +- lib/config/CPolledDataPenalty.cc | 24 +- lib/config/CReportWriter.cc | 142 +- .../CSpanTooSmallForBucketLengthPenalty.cc | 15 +- lib/config/CSparseCountPenalty.cc | 59 +- lib/config/CTooMuchDataPenalty.cc | 73 +- lib/config/CTools.cc | 19 +- lib/config/ConfigTypes.cc | 16 +- lib/config/Constants.cc | 3 +- .../unittest/CAutoconfigurerParamsTest.cc | 95 +- lib/config/unittest/CDataSemanticsTest.cc | 30 +- .../unittest/CDataSummaryStatisticsTest.cc | 85 +- .../unittest/CDetectorEnumeratorTest.cc | 243 +-- lib/config/unittest/CReportWriterTest.cc | 66 +- lib/core/CBase64Filter.cc | 3 +- lib/core/CBufferFlushTimer.cc | 6 +- lib/core/CCompressOStream.cc | 5 +- lib/core/CCompressUtils.cc | 3 +- lib/core/CCrashHandler_Linux.cc | 14 +- lib/core/CDelimiter.cc | 37 +- lib/core/CDetachedProcessSpawner.cc | 28 +- lib/core/CDetachedProcessSpawner_Windows.cc | 54 +- lib/core/CDualThreadStreamBuf.cc | 22 +- lib/core/CFlatPrefixTree.cc | 27 +- lib/core/CHashing.cc | 68 +- lib/core/CHexUtils.cc | 9 +- lib/core/CIEEE754.cc | 6 +- lib/core/CJsonLogLayout.cc | 12 +- lib/core/CJsonOutputStreamWrapper.cc | 12 +- lib/core/CJsonStatePersistInserter.cc | 3 +- lib/core/CJsonStateRestoreTraverser.cc | 36 +- lib/core/CLogger.cc | 41 +- lib/core/CMemoryUsage.cc | 23 +- lib/core/CMonotonicTime_MacOSX.cc | 3 +- lib/core/CMonotonicTime_Windows.cc | 6 +- lib/core/CNamedPipeFactory.cc | 34 +- lib/core/CNamedPipeFactory_Windows.cc | 37 +- lib/core/COsFileFuncs_Windows.cc | 19 +- lib/core/CPatternSet.cc | 11 +- lib/core/CProcess.cc | 4 +- lib/core/CProcessPriority_Linux.cc | 3 +- lib/core/CProcess_Windows.cc | 10 +- lib/core/CRapidJsonConcurrentLineWriter.cc | 3 +- lib/core/CRapidXmlParser.cc | 52 +- lib/core/CRapidXmlStatePersistInserter.cc | 37 +- lib/core/CRapidXmlStateRestoreTraverser.cc | 13 +- lib/core/CRegex.cc | 33 +- lib/core/CScopedReadLock.cc | 3 +- lib/core/CScopedWriteLock.cc | 3 +- lib/core/CShellArgQuoter_Windows.cc | 3 +- lib/core/CStateCompressor.cc | 16 +- lib/core/CStateDecompressor.cc | 63 +- lib/core/CStateMachine.cc | 32 +- lib/core/CStatePersistInserter.cc | 8 +- lib/core/CStatistics.cc | 72 +- lib/core/CStopWatch.cc | 3 +- lib/core/CStoredStringPtr.cc | 6 +- lib/core/CStrFTime_Windows.cc | 3 +- lib/core/CStrPTime_Linux.cc | 3 +- lib/core/CStrPTime_Windows.cc | 3 +- lib/core/CStringCache.cc | 9 +- lib/core/CStringSimilarityTester.cc | 19 +- lib/core/CStringUtils.cc | 45 +- lib/core/CThread.cc | 3 +- lib/core/CThread_Windows.cc | 6 +- lib/core/CTimeUtils.cc | 11 +- lib/core/CTimezone.cc | 6 +- lib/core/CTimezone_Windows.cc | 24 +- lib/core/CUname.cc | 6 +- lib/core/CUname_Windows.cc | 28 +- lib/core/CWindowsError_Windows.cc | 20 +- lib/core/CWordDictionary.cc | 9 +- lib/core/CWordExtractor.cc | 13 +- lib/core/CXmlNode.cc | 6 +- lib/core/CXmlNodeWithChildren.cc | 16 +- lib/core/CXmlNodeWithChildrenPool.cc | 12 +- lib/core/CXmlParser.cc | 129 +- lib/core/unittest/CAllocationStrategyTest.cc | 4 +- lib/core/unittest/CBase64FilterTest.cc | 46 +- .../unittest/CBlockingMessageQueueTest.cc | 4 +- lib/core/unittest/CByteSwapperTest.cc | 3 +- lib/core/unittest/CCompressUtilsTest.cc | 15 +- .../unittest/CCompressedDictionaryTest.cc | 11 +- lib/core/unittest/CConcurrentWrapperTest.cc | 48 +- lib/core/unittest/CContainerPrinterTest.cc | 23 +- lib/core/unittest/CContainerThroughputTest.cc | 90 +- lib/core/unittest/CDelimiterTest.cc | 105 +- .../unittest/CDetachedProcessSpawnerTest.cc | 37 +- lib/core/unittest/CDualThreadStreamBufTest.cc | 74 +- lib/core/unittest/CFileDeleterTest.cc | 9 +- lib/core/unittest/CFlatPrefixTreeTest.cc | 44 +- lib/core/unittest/CFunctionalTest.cc | 9 +- lib/core/unittest/CHashingTest.cc | 73 +- lib/core/unittest/CHexUtilsTest.cc | 3 +- lib/core/unittest/CIEEE754Test.cc | 3 +- lib/core/unittest/CJsonLogLayoutTest.cc | 19 +- .../unittest/CJsonOutputStreamWrapperTest.cc | 12 +- .../unittest/CJsonStatePersistInserterTest.cc | 8 +- .../CJsonStateRestoreTraverserTest.cc | 38 +- lib/core/unittest/CLoggerTest.cc | 27 +- lib/core/unittest/CMapPopulationTest.cc | 107 +- lib/core/unittest/CMapPopulationTest.h | 8 +- .../unittest/CMemoryUsageJsonWriterTest.cc | 15 +- lib/core/unittest/CMemoryUsageTest.cc | 316 ++- lib/core/unittest/CMessageBufferTest.cc | 3 +- lib/core/unittest/CMessageQueueTest.cc | 10 +- lib/core/unittest/CMonotonicTimeTest.cc | 14 +- lib/core/unittest/CMutexTest.cc | 3 +- lib/core/unittest/CNamedPipeFactoryTest.cc | 58 +- lib/core/unittest/COsFileFuncsTest.cc | 15 +- lib/core/unittest/CPatternSetTest.cc | 48 +- lib/core/unittest/CPersistUtilsTest.cc | 48 +- .../CPolymorphicStackObjectCPtrTest.cc | 7 +- lib/core/unittest/CProcessPriorityTest.cc | 4 +- .../unittest/CProcessPriorityTest_Linux.cc | 7 +- lib/core/unittest/CProcessTest.cc | 3 +- lib/core/unittest/CProgNameTest.cc | 6 +- lib/core/unittest/CRapidJsonLineWriterTest.cc | 31 +- lib/core/unittest/CRapidJsonWriterBaseTest.cc | 28 +- lib/core/unittest/CRapidXmlParserTest.cc | 115 +- lib/core/unittest/CRapidXmlParserTest.h | 4 +- .../CRapidXmlStatePersistInserterTest.cc | 5 +- .../CRapidXmlStateRestoreTraverserTest.cc | 8 +- lib/core/unittest/CReadWriteLockTest.cc | 90 +- lib/core/unittest/CRegexFilterTest.cc | 25 +- lib/core/unittest/CRegexTest.cc | 59 +- lib/core/unittest/CResourceLocatorTest.cc | 18 +- lib/core/unittest/CShellArgQuoterTest.cc | 3 +- lib/core/unittest/CSleepTest.cc | 3 +- lib/core/unittest/CSmallVectorTest.cc | 16 +- lib/core/unittest/CStateCompressorTest.cc | 24 +- lib/core/unittest/CStateMachineTest.cc | 58 +- lib/core/unittest/CStatisticsTest.cc | 18 +- lib/core/unittest/CStopWatchTest.cc | 6 +- lib/core/unittest/CStoredStringPtrTest.cc | 20 +- .../unittest/CStringSimilarityTesterTest.cc | 72 +- lib/core/unittest/CStringUtilsTest.cc | 213 +- lib/core/unittest/CThreadFarmTest.cc | 9 +- .../unittest/CThreadMutexConditionTest.cc | 9 +- lib/core/unittest/CThreadPoolTest.cc | 3 +- lib/core/unittest/CTickerTest.cc | 3 +- lib/core/unittest/CTimeUtilsTest.cc | 63 +- lib/core/unittest/CTripleTest.cc | 20 +- lib/core/unittest/CUnameTest.cc | 3 +- lib/core/unittest/CVectorRangeTest.cc | 97 +- lib/core/unittest/CWindowsErrorTest.cc | 3 +- lib/core/unittest/CWordDictionaryTest.cc | 40 +- lib/core/unittest/CWordExtractorTest.cc | 25 +- lib/core/unittest/CXmlNodeWithChildrenTest.cc | 44 +- lib/core/unittest/CXmlParserTest.cc | 156 +- lib/core/unittest/CXmlParserTest.h | 4 +- lib/maths/CAdaptiveBucketing.cc | 45 +- lib/maths/CAgglomerativeClusterer.cc | 48 +- lib/maths/CAssignment.cc | 75 +- lib/maths/CBasicStatistics.cc | 3 +- lib/maths/CBjkstUniqueValues.cc | 112 +- lib/maths/CCalendarComponent.cc | 31 +- .../CCalendarComponentAdaptiveBucketing.cc | 71 +- lib/maths/CCalendarFeature.cc | 31 +- lib/maths/CCategoricalTools.cc | 74 +- lib/maths/CClusterer.cc | 3 +- lib/maths/CClustererStateSerialiser.cc | 14 +- lib/maths/CConstantPrior.cc | 62 +- lib/maths/CCooccurrences.cc | 60 +- lib/maths/CCountMinSketch.cc | 100 +- lib/maths/CDecayRateController.cc | 42 +- lib/maths/CDecompositionComponent.cc | 51 +- lib/maths/CEntropySketch.cc | 5 +- lib/maths/CExpandingWindow.cc | 35 +- lib/maths/CGammaRateConjugate.cc | 449 ++-- lib/maths/CGradientDescent.cc | 17 +- lib/maths/CGramSchmidt.cc | 13 +- lib/maths/CInformationCriteria.cc | 6 +- lib/maths/CIntegration.cc | 149 +- lib/maths/CKMeansOnline1d.cc | 33 +- lib/maths/CKMostCorrelated.cc | 118 +- lib/maths/CLassoLogisticRegression.cc | 71 +- lib/maths/CLinearAlgebraTools.cc | 191 +- lib/maths/CLogNormalMeanPrecConjugate.cc | 538 ++--- lib/maths/CLogTDistribution.cc | 6 +- lib/maths/CMixtureDistribution.cc | 21 +- lib/maths/CModel.cc | 95 +- lib/maths/CModelStateSerialiser.cc | 14 +- lib/maths/CModelWeight.cc | 6 +- lib/maths/CMultimodalPrior.cc | 191 +- lib/maths/CMultinomialConjugate.cc | 273 ++- lib/maths/CMultivariateConstantPrior.cc | 82 +- lib/maths/CMultivariateMultimodalPrior.cc | 49 +- .../CMultivariateMultimodalPriorFactory.cc | 64 +- .../CMultivariateNormalConjugateFactory.cc | 44 +- lib/maths/CMultivariateOneOfNPrior.cc | 108 +- lib/maths/CMultivariateOneOfNPriorFactory.cc | 9 +- lib/maths/CMultivariatePrior.cc | 100 +- lib/maths/CNaiveBayes.cc | 99 +- lib/maths/CNaturalBreaksClassifier.cc | 90 +- lib/maths/CNormalMeanPrecConjugate.cc | 491 +++-- lib/maths/COneOfNPrior.cc | 176 +- lib/maths/COrdinal.cc | 18 +- lib/maths/CPRNG.cc | 26 +- lib/maths/CPackedBitVector.cc | 41 +- lib/maths/CPeriodicityHypothesisTests.cc | 743 ++++--- lib/maths/CPoissonMeanConjugate.cc | 220 +- lib/maths/CPrior.cc | 80 +- lib/maths/CPriorStateSerialiser.cc | 41 +- lib/maths/CProbabilityCalibrator.cc | 20 +- lib/maths/CQDigest.cc | 115 +- lib/maths/CQuantileSketch.cc | 56 +- lib/maths/CRadialBasisFunction.cc | 71 +- lib/maths/CRestoreParams.cc | 35 +- lib/maths/CSampling.cc | 191 +- lib/maths/CSeasonalComponent.cc | 42 +- .../CSeasonalComponentAdaptiveBucketing.cc | 169 +- lib/maths/CSeasonalTime.cc | 42 +- lib/maths/CSignal.cc | 12 +- lib/maths/CSpline.cc | 16 +- lib/maths/CStatisticalTests.cc | 277 +-- lib/maths/CTimeSeriesChangeDetector.cc | 212 +- lib/maths/CTimeSeriesDecomposition.cc | 155 +- lib/maths/CTimeSeriesDecompositionDetail.cc | 654 +++--- ...CTimeSeriesDecompositionStateSerialiser.cc | 20 +- lib/maths/CTimeSeriesDecompositionStub.cc | 21 +- lib/maths/CTimeSeriesModel.cc | 1153 +++++++---- lib/maths/CTools.cc | 273 ++- lib/maths/CTrendComponent.cc | 152 +- lib/maths/CTrendTests.cc | 109 +- lib/maths/CXMeansOnline1d.cc | 313 +-- lib/maths/CXMeansOnlineFactory.cc | 29 +- lib/maths/Constants.cc | 6 +- lib/maths/MathsTypes.cc | 73 +- lib/maths/ProbabilityAggregators.cc | 141 +- .../unittest/CAgglomerativeClustererTest.cc | 91 +- lib/maths/unittest/CAssignmentTest.cc | 47 +- lib/maths/unittest/CBasicStatisticsTest.cc | 442 ++-- lib/maths/unittest/CBjkstUniqueValuesTest.cc | 63 +- lib/maths/unittest/CBootstrapClustererTest.cc | 217 +- lib/maths/unittest/CBoundingBoxTest.cc | 18 +- ...CCalendarComponentAdaptiveBucketingTest.cc | 97 +- lib/maths/unittest/CCalendarFeatureTest.cc | 100 +- lib/maths/unittest/CCategoricalToolsTest.cc | 308 +-- lib/maths/unittest/CChecksumTest.cc | 115 +- lib/maths/unittest/CClustererTest.cc | 4 +- lib/maths/unittest/CCountMinSketchTest.cc | 26 +- .../unittest/CDecayRateControllerTest.cc | 24 +- lib/maths/unittest/CEntropySketchTest.cc | 16 +- lib/maths/unittest/CEqualWithToleranceTest.cc | 54 +- lib/maths/unittest/CForecastTest.cc | 188 +- lib/maths/unittest/CGammaRateConjugateTest.cc | 463 +++-- lib/maths/unittest/CGramSchmidtTest.cc | 25 +- .../unittest/CInformationCriteriaTest.cc | 59 +- lib/maths/unittest/CIntegerToolsTest.cc | 46 +- lib/maths/unittest/CIntegrationTest.cc | 463 +++-- lib/maths/unittest/CKMeansFastTest.cc | 105 +- lib/maths/unittest/CKMeansOnlineTest.cc | 151 +- lib/maths/unittest/CKMostCorrelatedTest.cc | 214 +- lib/maths/unittest/CKdTreeTest.cc | 18 +- .../unittest/CLassoLogisticRegressionTest.cc | 57 +- lib/maths/unittest/CLinearAlgebraTest.cc | 273 ++- .../CLogNormalMeanPrecConjugateTest.cc | 567 ++++-- lib/maths/unittest/CLogTDistributionTest.cc | 40 +- lib/maths/unittest/CMathsFuncsTest.cc | 30 +- lib/maths/unittest/CMathsMemoryTest.cc | 27 +- .../unittest/CMixtureDistributionTest.cc | 71 +- lib/maths/unittest/CModelTest.cc | 25 +- lib/maths/unittest/CMultimodalPriorTest.cc | 779 ++++--- .../unittest/CMultinomialConjugateTest.cc | 312 ++- .../CMultivariateConstantPriorTest.cc | 163 +- .../CMultivariateMultimodalPriorTest.cc | 323 +-- .../CMultivariateNormalConjugateTest.cc | 353 ++-- .../unittest/CMultivariateOneOfNPriorTest.cc | 286 ++- lib/maths/unittest/CNaiveBayesTest.cc | 86 +- .../unittest/CNaturalBreaksClassifierTest.cc | 147 +- .../unittest/CNormalMeanPrecConjugateTest.cc | 492 +++-- lib/maths/unittest/COneOfNPriorTest.cc | 473 +++-- lib/maths/unittest/COrderingsTest.cc | 311 ++- lib/maths/unittest/COrdinalTest.cc | 105 +- lib/maths/unittest/CPRNGTest.cc | 52 +- lib/maths/unittest/CPackedBitVectorTest.cc | 97 +- .../CPeriodicityHypothesisTestsTest.cc | 199 +- .../unittest/CPoissonMeanConjugateTest.cc | 272 +-- lib/maths/unittest/CPriorTest.cc | 27 +- .../unittest/CProbabilityAggregatorsTest.cc | 171 +- .../unittest/CProbabilityCalibratorTest.cc | 64 +- lib/maths/unittest/CQDigestTest.cc | 71 +- lib/maths/unittest/CQuantileSketchTest.cc | 165 +- .../unittest/CRadialBasisFunctionTest.cc | 72 +- .../CRandomProjectionClustererTest.cc | 147 +- lib/maths/unittest/CRegressionTest.cc | 141 +- lib/maths/unittest/CSamplingTest.cc | 22 +- ...CSeasonalComponentAdaptiveBucketingTest.cc | 193 +- lib/maths/unittest/CSeasonalComponentTest.cc | 221 +- lib/maths/unittest/CSetToolsTest.cc | 82 +- lib/maths/unittest/CSignalTest.cc | 91 +- lib/maths/unittest/CSolversTest.cc | 75 +- lib/maths/unittest/CSplineTest.cc | 145 +- lib/maths/unittest/CStatisticalTestsTest.cc | 21 +- .../unittest/CTimeSeriesChangeDetectorTest.cc | 142 +- .../unittest/CTimeSeriesDecompositionTest.cc | 623 ++++-- lib/maths/unittest/CTimeSeriesModelTest.cc | 874 +++++--- lib/maths/unittest/CToolsTest.cc | 286 ++- lib/maths/unittest/CTrendComponentTest.cc | 87 +- lib/maths/unittest/CTrendTestsTest.cc | 82 +- lib/maths/unittest/CXMeansOnline1dTest.cc | 230 ++- lib/maths/unittest/CXMeansOnlineTest.cc | 236 ++- lib/maths/unittest/CXMeansTest.cc | 118 +- lib/maths/unittest/TestUtils.cc | 132 +- lib/maths/unittest/TestUtils.h | 64 +- lib/model/CAnnotatedProbability.cc | 93 +- lib/model/CAnnotatedProbabilityBuilder.cc | 77 +- lib/model/CAnomalyDetector.cc | 164 +- lib/model/CAnomalyDetectorModel.cc | 226 ++- lib/model/CAnomalyDetectorModelConfig.cc | 210 +- lib/model/CAnomalyScore.cc | 355 ++-- lib/model/CBucketGatherer.cc | 235 ++- lib/model/CCountingModel.cc | 111 +- lib/model/CCountingModelFactory.cc | 95 +- lib/model/CDataGatherer.cc | 210 +- lib/model/CDetectionRule.cc | 10 +- lib/model/CDetectorEqualizer.cc | 24 +- lib/model/CDynamicStringIdRegistry.cc | 52 +- lib/model/CEventData.cc | 12 +- lib/model/CEventRateBucketGatherer.cc | 572 ++++-- lib/model/CEventRateModel.cc | 223 +- lib/model/CEventRateModelFactory.cc | 138 +- lib/model/CEventRatePopulationModel.cc | 427 ++-- lib/model/CEventRatePopulationModelFactory.cc | 141 +- lib/model/CFeatureData.cc | 12 +- lib/model/CForecastDataSink.cc | 26 +- lib/model/CGathererTools.cc | 83 +- lib/model/CHierarchicalResults.cc | 328 +-- lib/model/CHierarchicalResultsAggregator.cc | 172 +- lib/model/CHierarchicalResultsNormalizer.cc | 138 +- lib/model/CHierarchicalResultsPopulator.cc | 24 +- ...HierarchicalResultsProbabilityFinalizer.cc | 7 +- lib/model/CIndividualModel.cc | 213 +- lib/model/CInterimBucketCorrector.cc | 49 +- lib/model/CLimits.cc | 18 +- lib/model/CMemoryUsageEstimator.cc | 21 +- lib/model/CMetricBucketGatherer.cc | 417 ++-- lib/model/CMetricModel.cc | 205 +- lib/model/CMetricModelFactory.cc | 136 +- lib/model/CMetricPopulationModel.cc | 383 ++-- lib/model/CMetricPopulationModelFactory.cc | 138 +- lib/model/CModelDetailsView.cc | 92 +- lib/model/CModelFactory.cc | 181 +- lib/model/CModelParams.cc | 36 +- lib/model/CModelPlotData.cc | 33 +- lib/model/CModelTools.cc | 110 +- lib/model/CPartitioningFields.cc | 6 +- lib/model/CPopulationModel.cc | 192 +- .../CProbabilityAndInfluenceCalculator.cc | 581 +++--- lib/model/CResourceMonitor.cc | 45 +- lib/model/CResultsQueue.cc | 23 +- lib/model/CRuleCondition.cc | 25 +- lib/model/CSample.cc | 21 +- lib/model/CSampleCounts.cc | 83 +- lib/model/CSearchKey.cc | 55 +- lib/model/CSimpleCountDetector.cc | 3 +- lib/model/CStringStore.cc | 11 +- lib/model/FrequencyPredicates.cc | 6 +- lib/model/FunctionTypes.cc | 645 +++--- lib/model/ModelTypes.cc | 44 +- .../CAnnotatedProbabilityBuilderTest.cc | 262 ++- .../CAnomalyDetectorModelConfigTest.cc | 309 +-- lib/model/unittest/CAnomalyScoreTest.cc | 232 +-- lib/model/unittest/CBucketQueueTest.cc | 33 +- lib/model/unittest/CCountingModelTest.cc | 54 +- lib/model/unittest/CDetectionRuleTest.cc | 728 +++---- lib/model/unittest/CDetectorEqualizerTest.cc | 21 +- .../unittest/CDynamicStringIdRegistryTest.cc | 41 +- .../unittest/CEventRateAnomalyDetectorTest.cc | 73 +- .../unittest/CEventRateDataGathererTest.cc | 1320 ++++++------ lib/model/unittest/CEventRateModelTest.cc | 1026 ++++++---- .../CEventRatePopulationDataGathererTest.cc | 398 ++-- .../unittest/CEventRatePopulationModelTest.cc | 476 +++-- lib/model/unittest/CFunctionTypesTest.cc | 190 +- lib/model/unittest/CGathererToolsTest.cc | 8 +- .../CHierarchicalResultsLevelSetTest.cc | 31 +- .../unittest/CHierarchicalResultsTest.cc | 1029 ++++++---- .../unittest/CInterimBucketCorrectorTest.cc | 22 +- lib/model/unittest/CLimitsTest.cc | 27 +- .../unittest/CMemoryUsageEstimatorTest.cc | 50 +- .../unittest/CMetricAnomalyDetectorTest.cc | 168 +- lib/model/unittest/CMetricDataGathererTest.cc | 1311 ++++++------ lib/model/unittest/CMetricModelTest.cc | 940 +++++---- .../CMetricPopulationDataGathererTest.cc | 417 ++-- .../unittest/CMetricPopulationModelTest.cc | 517 +++-- lib/model/unittest/CModelDetailsViewTest.cc | 53 +- lib/model/unittest/CModelMemoryTest.cc | 14 +- lib/model/unittest/CModelToolsTest.cc | 50 +- lib/model/unittest/CModelTypesTest.cc | 511 +++-- .../CProbabilityAndInfluenceCalculatorTest.cc | 558 +++-- lib/model/unittest/CResourceLimitTest.cc | 195 +- lib/model/unittest/CResourceMonitorTest.cc | 73 +- lib/model/unittest/CRuleConditionTest.cc | 53 +- lib/model/unittest/CSampleQueueTest.cc | 263 +-- lib/model/unittest/CStringStoreTest.cc | 19 +- lib/model/unittest/CToolsTest.cc | 4 +- lib/model/unittest/Mocks.cc | 34 +- lib/model/unittest/Mocks.h | 33 +- lib/test/CMultiFileDataAdder.cc | 10 +- lib/test/CMultiFileSearcher.cc | 7 +- lib/test/CRandomNumbers.cc | 51 +- lib/test/CTestRunner.cc | 37 +- lib/test/CTimeSeriesTestData.cc | 11 +- lib/test/CTimingXmlOutputterHook.cc | 16 +- lib/ver/unittest/CBuildInfoTest.cc | 6 +- 762 files changed, 38788 insertions(+), 26318 deletions(-) diff --git a/.clang-format b/.clang-format index a6e033bf35..5a705682b9 100644 --- a/.clang-format +++ b/.clang-format @@ -4,21 +4,20 @@ AllowAllParametersOfDeclarationOnNextLine: false AllowShortBlocksOnASingleLine: true AllowShortFunctionsOnASingleLine: InlineOnly AlwaysBreakTemplateDeclarations: true -BinPackArguments: false -BinPackParameters: false -ColumnLimit: 140 -ConstructorInitializerAllOnOneLineOrOnePerLine: true FixNamespaceComments: false IndentCaseLabels: false IndentWidth: 4 -PenaltyBreakAssignment: 2 -PenaltyBreakBeforeFirstCallParameter: 4 +TabWidth: 4 +BinPackParameters: false +PenaltyBreakAssignment: 20 +PenaltyBreakBeforeFirstCallParameter: 15 PenaltyBreakComment: 300 -PenaltyBreakFirstLessLess: 2 -PenaltyBreakString: 1000000 -PenaltyExcessCharacter: 1000000 -PenaltyReturnTypeOnItsOwnLine: 60 +PenaltyBreakFirstLessLess: 1 +PenaltyBreakString: 30 +PenaltyExcessCharacter: 1 +PenaltyReturnTypeOnItsOwnLine: 30 PointerAlignment: Left -TabWidth: 4 SpaceAfterTemplateKeyword: false ReflowComments: false +SortIncludes: true +BreakStringLiterals: false diff --git a/bin/autoconfig/CCmdLineParser.cc b/bin/autoconfig/CCmdLineParser.cc index c79526d873..b647f44695 100644 --- a/bin/autoconfig/CCmdLineParser.cc +++ b/bin/autoconfig/CCmdLineParser.cc @@ -35,29 +35,32 @@ bool CCmdLineParser::parse(int argc, bool& writeDetectorConfigs) { try { boost::program_options::options_description desc(DESCRIPTION); - desc.add_options()("help", "Display this information and exit")("version", "Display version information and exit")( - "logProperties", boost::program_options::value(), "Optional logger properties file")( + desc.add_options()("help", "Display this information and exit")( + "version", "Display version information and exit")( + "logProperties", boost::program_options::value(), + "Optional logger properties file")( "logPipe", boost::program_options::value(), "Optional log to named pipe")( - "delimiter", - boost::program_options::value(), + "delimiter", boost::program_options::value(), "Optional delimiter character for delimited data formats - default is ',' (comma separated)")( - "lengthEncodedInput", "Take input in length encoded binary format - default is delimited")( - "timefield", - boost::program_options::value(), + "lengthEncodedInput", + "Take input in length encoded binary format - default is delimited")( + "timefield", boost::program_options::value(), "Optional name of the field containing the timestamp - default is 'time'")( - "timeformat", - boost::program_options::value(), + "timeformat", boost::program_options::value(), "Optional format of the date in the time field in strptime code - default is the epoch time in seconds")( "config", boost::program_options::value(), "Optional configuration file")( - "input", boost::program_options::value(), "Optional file to read input from - not present means read from STDIN")( + "input", boost::program_options::value(), + "Optional file to read input from - not present means read from STDIN")( "inputIsPipe", "Specified input file is a named pipe")( - "output", boost::program_options::value(), "Optional file to write output to - not present means write to STDOUT")( + "output", boost::program_options::value(), + "Optional file to write output to - not present means write to STDOUT")( "outputIsPipe", "Specified output file is a named pipe")( "verbose", "Output information about all detectors including those that have been discarded")( "writeDetectorConfigs", "Output the detector configurations in JSON format"); boost::program_options::variables_map vm; - boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), vm); + boost::program_options::store( + boost::program_options::parse_command_line(argc, argv, desc), vm); boost::program_options::notify(vm); if (vm.count("help") > 0) { diff --git a/bin/autoconfig/Main.cc b/bin/autoconfig/Main.cc index 58ad388dd7..5a9157543b 100644 --- a/bin/autoconfig/Main.cc +++ b/bin/autoconfig/Main.cc @@ -52,27 +52,17 @@ int main(int argc, char** argv) { bool isOutputFileNamedPipe(false); bool verbose(false); bool writeDetectorConfigs(false); - if (ml::autoconfig::CCmdLineParser::parse(argc, - argv, - logProperties, - logPipe, - delimiter, - lengthEncodedInput, - timeField, - timeFormat, - configFile, - inputFileName, - isInputFileNamedPipe, - outputFileName, - isOutputFileNamedPipe, - verbose, - writeDetectorConfigs) == false) { + if (ml::autoconfig::CCmdLineParser::parse( + argc, argv, logProperties, logPipe, delimiter, lengthEncodedInput, timeField, + timeFormat, configFile, inputFileName, isInputFileNamedPipe, outputFileName, + isOutputFileNamedPipe, verbose, writeDetectorConfigs) == false) { return EXIT_FAILURE; } // Construct the IO manager before reconfiguring the logger, as it performs // std::ios actions that only work before first use - ml::api::CIoManager ioMgr(inputFileName, isInputFileNamedPipe, outputFileName, isOutputFileNamedPipe); + ml::api::CIoManager ioMgr(inputFileName, isInputFileNamedPipe, + outputFileName, isOutputFileNamedPipe); if (ml::core::CLogger::instance().reconfigure(logPipe, logProperties) == false) { LOG_FATAL(<< "Could not reconfigure logging"); @@ -112,8 +102,7 @@ int main(int argc, char** argv) { // The skeleton avoids the need to duplicate a lot of boilerplate code ml::api::CCmdSkeleton skeleton(nullptr, // no restoration at present nullptr, // no persistence at present - *inputParser, - configurer); + *inputParser, configurer); if (skeleton.ioLoop() == false) { LOG_FATAL(<< "Ml autoconfig failed"); return EXIT_FAILURE; diff --git a/bin/autodetect/CCmdLineParser.cc b/bin/autodetect/CCmdLineParser.cc index 9284706c69..39a5c3c01e 100644 --- a/bin/autodetect/CCmdLineParser.cc +++ b/bin/autodetect/CCmdLineParser.cc @@ -57,69 +57,74 @@ bool CCmdLineParser::parse(int argc, TStrVec& clauseTokens) { try { boost::program_options::options_description desc(DESCRIPTION); - desc.add_options()("help", "Display this information and exit")("version", "Display version information and exit")( - "limitconfig", boost::program_options::value(), "Optional limit config file")( - "modelconfig", boost::program_options::value(), "Optional model config file")( - "fieldconfig", boost::program_options::value(), "Optional field config file")( - "modelplotconfig", boost::program_options::value(), "Optional model plot config file")( - "jobid", boost::program_options::value(), "ID of the job this process is associated with")( - "logProperties", boost::program_options::value(), "Optional logger properties file")( + desc.add_options()("help", "Display this information and exit")( + "version", "Display version information and exit")( + "limitconfig", boost::program_options::value(), + "Optional limit config file")("modelconfig", + boost::program_options::value(), + "Optional model config file")( + "fieldconfig", boost::program_options::value(), + "Optional field config file")("modelplotconfig", + boost::program_options::value(), + "Optional model plot config file")( + "jobid", boost::program_options::value(), + "ID of the job this process is associated with")( + "logProperties", boost::program_options::value(), + "Optional logger properties file")( "logPipe", boost::program_options::value(), "Optional log to named pipe")( - "bucketspan", boost::program_options::value(), "Optional aggregation bucket span (in seconds) - default is 300")( - "latency", - boost::program_options::value(), + "bucketspan", boost::program_options::value(), + "Optional aggregation bucket span (in seconds) - default is 300")( + "latency", boost::program_options::value(), "Optional maximum delay for out-of-order records (in seconds) - default is 0")( - "summarycountfield", - boost::program_options::value(), + "summarycountfield", boost::program_options::value(), "Optional field to that contains counts for pre-summarized input - default is none")( - "delimiter", - boost::program_options::value(), + "delimiter", boost::program_options::value(), "Optional delimiter character for delimited data formats - default is '\t' (tab separated)")( - "lengthEncodedInput", "Take input in length encoded binary format - default is delimited")( - "timefield", - boost::program_options::value(), + "lengthEncodedInput", + "Take input in length encoded binary format - default is delimited")( + "timefield", boost::program_options::value(), "Optional name of the field containing the timestamp - default is 'time'")( - "timeformat", - boost::program_options::value(), + "timeformat", boost::program_options::value(), "Optional format of the date in the time field in strptime code - default is the epoch time in seconds")( - "quantilesState", boost::program_options::value(), "Optional file to quantiles for normalization")( + "quantilesState", boost::program_options::value(), + "Optional file to quantiles for normalization")( "deleteStateFiles", "If the 'quantilesState' option is used and this flag is set then delete the model state files once they have been read")( - "input", boost::program_options::value(), "Optional file to read input from - not present means read from STDIN")( + "input", boost::program_options::value(), + "Optional file to read input from - not present means read from STDIN")( "inputIsPipe", "Specified input file is a named pipe")( - "output", boost::program_options::value(), "Optional file to write output to - not present means write to STDOUT")( + "output", boost::program_options::value(), + "Optional file to write output to - not present means write to STDOUT")( "outputIsPipe", "Specified output file is a named pipe")( - "restore", - boost::program_options::value(), - "Optional file to restore state from - not present means no state restoration")("restoreIsPipe", - "Specified restore file is a named pipe")( - "persist", - boost::program_options::value(), - "Optional file to persist state to - not present means no state persistence")("persistIsPipe", - "Specified persist file is a named pipe")( - "persistInterval", - boost::program_options::value(), + "restore", boost::program_options::value(), + "Optional file to restore state from - not present means no state restoration")( + "restoreIsPipe", "Specified restore file is a named pipe")( + "persist", boost::program_options::value(), + "Optional file to persist state to - not present means no state persistence")( + "persistIsPipe", "Specified persist file is a named pipe")( + "persistInterval", boost::program_options::value(), "Optional interval at which to periodically persist model state - if not specified then models will only be persisted at " - "program exit")("maxQuantileInterval", - boost::program_options::value(), - "Optional interval at which to periodically output quantiles if they have not been output due to an anomaly - " - "if not specified then quantiles will only be output following a big anomaly")( - "maxAnomalyRecords", - boost::program_options::value(), + "program exit")( + "maxQuantileInterval", boost::program_options::value(), + "Optional interval at which to periodically output quantiles if they have not been output due to an anomaly - " + "if not specified then quantiles will only be output following a big anomaly")( + "maxAnomalyRecords", boost::program_options::value(), "The maximum number of records to be outputted for each bucket. Defaults to 100, a value 0 removes the limit.")( "memoryUsage", "Log the model memory usage at the end of the job")( - "resultFinalizationWindow", - boost::program_options::value(), + "resultFinalizationWindow", boost::program_options::value(), "The numer of half buckets to store before choosing which overlapping bucket has the biggest anomaly")( - "multivariateByFields", "Optional flag to enable multi-variate analysis of correlated by fields")( - "multipleBucketspans", - boost::program_options::value(), + "multivariateByFields", + "Optional flag to enable multi-variate analysis of correlated by fields")( + "multipleBucketspans", boost::program_options::value(), "Optional comma-separated list of additional bucketspans - must be direct multiples of the main bucketspan")( "perPartitionNormalization", "Optional flag to enable per partition normalization"); boost::program_options::variables_map vm; boost::program_options::parsed_options parsed = - boost::program_options::command_line_parser(argc, argv).options(desc).allow_unregistered().run(); + boost::program_options::command_line_parser(argc, argv) + .options(desc) + .allow_unregistered() + .run(); boost::program_options::store(parsed, vm); if (vm.count("help") > 0) { @@ -127,8 +132,10 @@ bool CCmdLineParser::parse(int argc, return false; } if (vm.count("version") > 0) { - std::cerr << "Model State Version " << model::CAnomalyDetector::STATE_VERSION << std::endl - << "Quantile State Version " << model::CAnomalyScore::CURRENT_FORMAT_VERSION << std::endl + std::cerr << "Model State Version " + << model::CAnomalyDetector::STATE_VERSION << std::endl + << "Quantile State Version " + << model::CAnomalyScore::CURRENT_FORMAT_VERSION << std::endl << ver::CBuildInfo::fullInfo() << std::endl; return false; } @@ -229,7 +236,9 @@ bool CCmdLineParser::parse(int argc, perPartitionNormalization = true; } - boost::program_options::collect_unrecognized(parsed.options, boost::program_options::include_positional).swap(clauseTokens); + boost::program_options::collect_unrecognized( + parsed.options, boost::program_options::include_positional) + .swap(clauseTokens); } catch (std::exception& e) { std::cerr << "Error processing command line: " << e.what() << std::endl; return false; diff --git a/bin/autodetect/Main.cc b/bin/autodetect/Main.cc index 76874c1350..64e17f4121 100644 --- a/bin/autodetect/Main.cc +++ b/bin/autodetect/Main.cc @@ -90,54 +90,24 @@ int main(int argc, char** argv) { std::string multipleBucketspans; bool perPartitionNormalization(false); TStrVec clauseTokens; - if (ml::autodetect::CCmdLineParser::parse(argc, - argv, - limitConfigFile, - modelConfigFile, - fieldConfigFile, - modelPlotConfigFile, - jobId, - logProperties, - logPipe, - bucketSpan, - latency, - summaryCountFieldName, - delimiter, - lengthEncodedInput, - timeField, - timeFormat, - quantilesStateFile, - deleteStateFiles, - persistInterval, - maxQuantileInterval, - inputFileName, - isInputFileNamedPipe, - outputFileName, - isOutputFileNamedPipe, - restoreFileName, - isRestoreFileNamedPipe, - persistFileName, - isPersistFileNamedPipe, - maxAnomalyRecords, - memoryUsage, - bucketResultsDelay, - multivariateByFields, - multipleBucketspans, - perPartitionNormalization, - clauseTokens) == false) { + if (ml::autodetect::CCmdLineParser::parse( + argc, argv, limitConfigFile, modelConfigFile, fieldConfigFile, + modelPlotConfigFile, jobId, logProperties, logPipe, bucketSpan, latency, + summaryCountFieldName, delimiter, lengthEncodedInput, timeField, + timeFormat, quantilesStateFile, deleteStateFiles, persistInterval, + maxQuantileInterval, inputFileName, isInputFileNamedPipe, outputFileName, + isOutputFileNamedPipe, restoreFileName, isRestoreFileNamedPipe, + persistFileName, isPersistFileNamedPipe, maxAnomalyRecords, memoryUsage, + bucketResultsDelay, multivariateByFields, multipleBucketspans, + perPartitionNormalization, clauseTokens) == false) { return EXIT_FAILURE; } // Construct the IO manager before reconfiguring the logger, as it performs // std::ios actions that only work before first use - ml::api::CIoManager ioMgr(inputFileName, - isInputFileNamedPipe, - outputFileName, - isOutputFileNamedPipe, - restoreFileName, - isRestoreFileNamedPipe, - persistFileName, - isPersistFileNamedPipe); + ml::api::CIoManager ioMgr(inputFileName, isInputFileNamedPipe, outputFileName, + isOutputFileNamedPipe, restoreFileName, isRestoreFileNamedPipe, + persistFileName, isPersistFileNamedPipe); if (ml::core::CLogger::instance().reconfigure(logPipe, logProperties) == false) { LOG_FATAL(<< "Could not reconfigure logging"); @@ -169,20 +139,27 @@ int main(int argc, char** argv) { ml::api::CFieldConfig fieldConfig; - ml::model_t::ESummaryMode summaryMode(summaryCountFieldName.empty() ? ml::model_t::E_None : ml::model_t::E_Manual); - ml::model::CAnomalyDetectorModelConfig modelConfig = ml::model::CAnomalyDetectorModelConfig::defaultConfig( - bucketSpan, summaryMode, summaryCountFieldName, latency, bucketResultsDelay, multivariateByFields, multipleBucketspans); + ml::model_t::ESummaryMode summaryMode( + summaryCountFieldName.empty() ? ml::model_t::E_None : ml::model_t::E_Manual); + ml::model::CAnomalyDetectorModelConfig modelConfig = + ml::model::CAnomalyDetectorModelConfig::defaultConfig( + bucketSpan, summaryMode, summaryCountFieldName, latency, + bucketResultsDelay, multivariateByFields, multipleBucketspans); modelConfig.perPartitionNormalization(perPartitionNormalization); - modelConfig.detectionRules(ml::model::CAnomalyDetectorModelConfig::TIntDetectionRuleVecUMapCRef(fieldConfig.detectionRules())); - modelConfig.scheduledEvents(ml::model::CAnomalyDetectorModelConfig::TStrDetectionRulePrVecCRef(fieldConfig.scheduledEvents())); + modelConfig.detectionRules(ml::model::CAnomalyDetectorModelConfig::TIntDetectionRuleVecUMapCRef( + fieldConfig.detectionRules())); + modelConfig.scheduledEvents(ml::model::CAnomalyDetectorModelConfig::TStrDetectionRulePrVecCRef( + fieldConfig.scheduledEvents())); if (!modelConfigFile.empty() && modelConfig.init(modelConfigFile) == false) { LOG_FATAL(<< "Ml model config file '" << modelConfigFile << "' could not be loaded"); return EXIT_FAILURE; } - if (!modelPlotConfigFile.empty() && modelConfig.configureModelPlot(modelPlotConfigFile) == false) { - LOG_FATAL(<< "Ml model plot config file '" << modelPlotConfigFile << "' could not be loaded"); + if (!modelPlotConfigFile.empty() && + modelConfig.configureModelPlot(modelPlotConfigFile) == false) { + LOG_FATAL(<< "Ml model plot config file '" << modelPlotConfigFile + << "' could not be loaded"); return EXIT_FAILURE; } @@ -237,17 +214,11 @@ int main(int argc, char** argv) { } // The anomaly job knows how to detect anomalies - ml::api::CAnomalyJob job(jobId, - limits, - fieldConfig, - modelConfig, - wrappedOutputStream, - boost::bind(&ml::api::CModelSnapshotJsonWriter::write, &modelSnapshotWriter, _1), - periodicPersister.get(), - maxQuantileInterval, - timeField, - timeFormat, - maxAnomalyRecords); + ml::api::CAnomalyJob job(jobId, limits, fieldConfig, modelConfig, wrappedOutputStream, + boost::bind(&ml::api::CModelSnapshotJsonWriter::write, + &modelSnapshotWriter, _1), + periodicPersister.get(), maxQuantileInterval, + timeField, timeFormat, maxAnomalyRecords); if (!quantilesStateFile.empty()) { if (job.initNormalizer(quantilesStateFile) == false) { @@ -267,7 +238,8 @@ int main(int argc, char** argv) { ml::api::CJsonOutputWriter fieldDataTyperOutputWriter(jobId, wrappedOutputStream); // The typer knows how to assign categories to records - ml::api::CFieldDataTyper typer(jobId, fieldConfig, limits, outputChainer, fieldDataTyperOutputWriter); + ml::api::CFieldDataTyper typer(jobId, fieldConfig, limits, outputChainer, + fieldDataTyperOutputWriter); if (fieldConfig.fieldNameSuperset().count(ml::api::CFieldDataTyper::MLCATEGORY_NAME) > 0) { LOG_DEBUG(<< "Applying the categorization typer for anomaly detection"); @@ -275,12 +247,13 @@ int main(int argc, char** argv) { } if (periodicPersister != nullptr) { - periodicPersister->firstProcessorPeriodicPersistFunc( - boost::bind(&ml::api::CDataProcessor::periodicPersistState, firstProcessor, _1)); + periodicPersister->firstProcessorPeriodicPersistFunc(boost::bind( + &ml::api::CDataProcessor::periodicPersistState, firstProcessor, _1)); } // The skeleton avoids the need to duplicate a lot of boilerplate code - ml::api::CCmdSkeleton skeleton(restoreSearcher.get(), persister.get(), *inputParser, *firstProcessor); + ml::api::CCmdSkeleton skeleton(restoreSearcher.get(), persister.get(), + *inputParser, *firstProcessor); bool ioLoopSucceeded(skeleton.ioLoop()); // Unfortunately we cannot rely on destruction to finalise the output writer diff --git a/bin/categorize/CCmdLineParser.cc b/bin/categorize/CCmdLineParser.cc index dfdba5fb06..dd993c4715 100644 --- a/bin/categorize/CCmdLineParser.cc +++ b/bin/categorize/CCmdLineParser.cc @@ -37,34 +37,40 @@ bool CCmdLineParser::parse(int argc, std::string& categorizationFieldName) { try { boost::program_options::options_description desc(DESCRIPTION); - desc.add_options()("help", "Display this information and exit")("version", "Display version information and exit")( - "limitconfig", boost::program_options::value(), "Optional limit config file")( - "jobid", boost::program_options::value(), "ID of the job this process is associated with")( - "logProperties", boost::program_options::value(), "Optional logger properties file")( + desc.add_options()("help", "Display this information and exit")( + "version", "Display version information and exit")( + "limitconfig", boost::program_options::value(), + "Optional limit config file")( + "jobid", boost::program_options::value(), + "ID of the job this process is associated with")( + "logProperties", boost::program_options::value(), + "Optional logger properties file")( "logPipe", boost::program_options::value(), "Optional log to named pipe")( - "delimiter", - boost::program_options::value(), + "delimiter", boost::program_options::value(), "Optional delimiter character for delimited data formats - default is '\t' (tab separated)")( - "lengthEncodedInput", "Take input in length encoded binary format - default is delimited")( - "input", boost::program_options::value(), "Optional file to read input from - not present means read from STDIN")( + "lengthEncodedInput", + "Take input in length encoded binary format - default is delimited")( + "input", boost::program_options::value(), + "Optional file to read input from - not present means read from STDIN")( "inputIsPipe", "Specified input file is a named pipe")( - "output", boost::program_options::value(), "Optional file to write output to - not present means write to STDOUT")( + "output", boost::program_options::value(), + "Optional file to write output to - not present means write to STDOUT")( "outputIsPipe", "Specified output file is a named pipe")( - "restore", - boost::program_options::value(), - "Optional file to restore state from - not present means no state restoration")("restoreIsPipe", - "Specified restore file is a named pipe")( - "persist", - boost::program_options::value(), - "Optional file to persist state to - not present means no state persistence")("persistIsPipe", - "Specified persist file is a named pipe")( - "persistInterval", - boost::program_options::value(), + "restore", boost::program_options::value(), + "Optional file to restore state from - not present means no state restoration")( + "restoreIsPipe", "Specified restore file is a named pipe")( + "persist", boost::program_options::value(), + "Optional file to persist state to - not present means no state persistence")( + "persistIsPipe", "Specified persist file is a named pipe")( + "persistInterval", boost::program_options::value(), "Optional interval at which to periodically persist model state - if not specified then models will only be persisted at " - "program exit")("categorizationfield", boost::program_options::value(), "Field to compute mlcategory from"); + "program exit")("categorizationfield", + boost::program_options::value(), + "Field to compute mlcategory from"); boost::program_options::variables_map vm; - boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), vm); + boost::program_options::store( + boost::program_options::parse_command_line(argc, argv, desc), vm); boost::program_options::notify(vm); if (vm.count("help") > 0) { diff --git a/bin/categorize/Main.cc b/bin/categorize/Main.cc index bda6fec54e..f2c9499acd 100644 --- a/bin/categorize/Main.cc +++ b/bin/categorize/Main.cc @@ -66,37 +66,19 @@ int main(int argc, char** argv) { std::string persistFileName; bool isPersistFileNamedPipe(false); std::string categorizationFieldName; - if (ml::categorize::CCmdLineParser::parse(argc, - argv, - limitConfigFile, - jobId, - logProperties, - logPipe, - delimiter, - lengthEncodedInput, - persistInterval, - inputFileName, - isInputFileNamedPipe, - outputFileName, - isOutputFileNamedPipe, - restoreFileName, - isRestoreFileNamedPipe, - persistFileName, - isPersistFileNamedPipe, - categorizationFieldName) == false) { + if (ml::categorize::CCmdLineParser::parse( + argc, argv, limitConfigFile, jobId, logProperties, logPipe, delimiter, + lengthEncodedInput, persistInterval, inputFileName, isInputFileNamedPipe, + outputFileName, isOutputFileNamedPipe, restoreFileName, isRestoreFileNamedPipe, + persistFileName, isPersistFileNamedPipe, categorizationFieldName) == false) { return EXIT_FAILURE; } // Construct the IO manager before reconfiguring the logger, as it performs // std::ios actions that only work before first use - ml::api::CIoManager ioMgr(inputFileName, - isInputFileNamedPipe, - outputFileName, - isOutputFileNamedPipe, - restoreFileName, - isRestoreFileNamedPipe, - persistFileName, - isPersistFileNamedPipe); + ml::api::CIoManager ioMgr(inputFileName, isInputFileNamedPipe, outputFileName, + isOutputFileNamedPipe, restoreFileName, isRestoreFileNamedPipe, + persistFileName, isPersistFileNamedPipe); if (ml::core::CLogger::instance().reconfigure(logPipe, logProperties) == false) { LOG_FATAL(<< "Could not reconfigure logging"); @@ -184,14 +166,17 @@ int main(int argc, char** argv) { ml::api::CJsonOutputWriter outputWriter(jobId, wrappedOutputStream); // The typer knows how to assign categories to records - ml::api::CFieldDataTyper typer(jobId, fieldConfig, limits, nullOutput, outputWriter, periodicPersister.get()); + ml::api::CFieldDataTyper typer(jobId, fieldConfig, limits, nullOutput, + outputWriter, periodicPersister.get()); if (periodicPersister != nullptr) { - periodicPersister->firstProcessorPeriodicPersistFunc(boost::bind(&ml::api::CFieldDataTyper::periodicPersistState, &typer, _1)); + periodicPersister->firstProcessorPeriodicPersistFunc(boost::bind( + &ml::api::CFieldDataTyper::periodicPersistState, &typer, _1)); } // The skeleton avoids the need to duplicate a lot of boilerplate code - ml::api::CCmdSkeleton skeleton(restoreSearcher.get(), persister.get(), *inputParser, typer); + ml::api::CCmdSkeleton skeleton(restoreSearcher.get(), persister.get(), + *inputParser, typer); bool ioLoopSucceeded(skeleton.ioLoop()); // Unfortunately we cannot rely on destruction to finalise the output writer diff --git a/bin/controller/CBlockingCallCancellerThread.cc b/bin/controller/CBlockingCallCancellerThread.cc index 30d8ef0d03..0b7861a903 100644 --- a/bin/controller/CBlockingCallCancellerThread.cc +++ b/bin/controller/CBlockingCallCancellerThread.cc @@ -12,8 +12,10 @@ namespace ml { namespace controller { -CBlockingCallCancellerThread::CBlockingCallCancellerThread(core::CThread::TThreadId potentiallyBlockedThreadId, std::istream& monitorStream) - : m_PotentiallyBlockedThreadId(potentiallyBlockedThreadId), m_MonitorStream(monitorStream), m_Shutdown(false) { +CBlockingCallCancellerThread::CBlockingCallCancellerThread(core::CThread::TThreadId potentiallyBlockedThreadId, + std::istream& monitorStream) + : m_PotentiallyBlockedThreadId(potentiallyBlockedThreadId), + m_MonitorStream(monitorStream), m_Shutdown(false) { } void CBlockingCallCancellerThread::run() { diff --git a/bin/controller/CBlockingCallCancellerThread.h b/bin/controller/CBlockingCallCancellerThread.h index 81c53bbcd9..b0c7d8e226 100644 --- a/bin/controller/CBlockingCallCancellerThread.h +++ b/bin/controller/CBlockingCallCancellerThread.h @@ -36,7 +36,8 @@ namespace controller { //! class CBlockingCallCancellerThread : public core::CThread { public: - CBlockingCallCancellerThread(core::CThread::TThreadId potentiallyBlockedThreadId, std::istream& monitorStream); + CBlockingCallCancellerThread(core::CThread::TThreadId potentiallyBlockedThreadId, + std::istream& monitorStream); protected: //! Called when the thread is started. diff --git a/bin/controller/CCmdLineParser.cc b/bin/controller/CCmdLineParser.cc index 06e5ca51e6..3420df8e79 100644 --- a/bin/controller/CCmdLineParser.cc +++ b/bin/controller/CCmdLineParser.cc @@ -17,20 +17,25 @@ namespace controller { const std::string CCmdLineParser::DESCRIPTION = "Usage: controller [options]\n" "Options"; -bool CCmdLineParser::parse(int argc, const char* const* argv, std::string& jvmPidStr, std::string& logPipe, std::string& commandPipe) { +bool CCmdLineParser::parse(int argc, + const char* const* argv, + std::string& jvmPidStr, + std::string& logPipe, + std::string& commandPipe) { try { boost::program_options::options_description desc(DESCRIPTION); - desc.add_options()("help", "Display this information and exit")("version", "Display version information and exit")( - "jvmPid", - boost::program_options::value(), + desc.add_options()("help", "Display this information and exit")( + "version", "Display version information and exit")( + "jvmPid", boost::program_options::value(), "Process ID of the JVM to communicate with - default is parent process PID")( - "logPipe", boost::program_options::value(), "Named pipe to log to - default is controller_log_")( - "commandPipe", - boost::program_options::value(), + "logPipe", boost::program_options::value(), + "Named pipe to log to - default is controller_log_")( + "commandPipe", boost::program_options::value(), "Named pipe to accept commands from - default is controller_command_"); boost::program_options::variables_map vm; - boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), vm); + boost::program_options::store( + boost::program_options::parse_command_line(argc, argv, desc), vm); boost::program_options::notify(vm); if (vm.count("help") > 0) { diff --git a/bin/controller/CCmdLineParser.h b/bin/controller/CCmdLineParser.h index 77891532a1..965599e1c2 100644 --- a/bin/controller/CCmdLineParser.h +++ b/bin/controller/CCmdLineParser.h @@ -29,7 +29,11 @@ class CCmdLineParser { public: //! Parse the arguments and return options if appropriate. - static bool parse(int argc, const char* const* argv, std::string& jvmPidStr, std::string& logPipe, std::string& commandPipe); + static bool parse(int argc, + const char* const* argv, + std::string& jvmPidStr, + std::string& logPipe, + std::string& commandPipe); private: static const std::string DESCRIPTION; diff --git a/bin/controller/CCommandProcessor.cc b/bin/controller/CCommandProcessor.cc index 62c76ef81a..4926ab7272 100644 --- a/bin/controller/CCommandProcessor.cc +++ b/bin/controller/CCommandProcessor.cc @@ -26,7 +26,8 @@ namespace controller { const std::string CCommandProcessor::START("start"); const std::string CCommandProcessor::KILL("kill"); -CCommandProcessor::CCommandProcessor(const TStrVec& permittedProcessPaths) : m_Spawner(permittedProcessPaths) { +CCommandProcessor::CCommandProcessor(const TStrVec& permittedProcessPaths) + : m_Spawner(permittedProcessPaths) { } void CCommandProcessor::processCommands(std::istream& stream) { @@ -86,7 +87,8 @@ bool CCommandProcessor::handleStart(TStrVec& tokens) { bool CCommandProcessor::handleKill(TStrVec& tokens) { core::CProcess::TPid pid = 0; if (tokens.size() != 1 || core::CStringUtils::stringToType(tokens[0], pid) == false) { - LOG_ERROR(<< "Unexpected arguments for kill command: " << core::CContainerPrinter::print(tokens)); + LOG_ERROR(<< "Unexpected arguments for kill command: " + << core::CContainerPrinter::print(tokens)); return false; } diff --git a/bin/controller/Main.cc b/bin/controller/Main.cc index 8c82e153c3..0caa645e35 100644 --- a/bin/controller/Main.cc +++ b/bin/controller/Main.cc @@ -63,10 +63,12 @@ int main(int argc, char** argv) { const std::string& progName = ml::core::CProgName::progName(); // Read command line options - std::string jvmPidStr = ml::core::CStringUtils::typeToString(ml::core::CProcess::instance().parentId()); + std::string jvmPidStr = ml::core::CStringUtils::typeToString( + ml::core::CProcess::instance().parentId()); std::string logPipe; std::string commandPipe; - if (ml::controller::CCmdLineParser::parse(argc, argv, jvmPidStr, logPipe, commandPipe) == false) { + if (ml::controller::CCmdLineParser::parse(argc, argv, jvmPidStr, logPipe, + commandPipe) == false) { return EXIT_FAILURE; } @@ -86,7 +88,8 @@ int main(int argc, char** argv) { // 4) No plugin code ever runs // This thread will detect the death of the parent process because this // process's STDIN will be closed. - ml::controller::CBlockingCallCancellerThread cancellerThread(ml::core::CThread::currentThreadId(), std::cin); + ml::controller::CBlockingCallCancellerThread cancellerThread( + ml::core::CThread::currentThreadId(), std::cin); if (cancellerThread.start() == false) { // This log message will probably never been seen as it will go to the // real stderr of this process rather than the log pipe... @@ -109,7 +112,8 @@ int main(int argc, char** argv) { // the controller is critical to the overall system. Also its resource // requirements should always be very low. - ml::core::CNamedPipeFactory::TIStreamP commandStream = ml::core::CNamedPipeFactory::openPipeStreamRead(commandPipe); + ml::core::CNamedPipeFactory::TIStreamP commandStream = + ml::core::CNamedPipeFactory::openPipeStreamRead(commandPipe); if (commandStream == nullptr) { LOG_FATAL(<< "Could not open command pipe"); cancellerThread.stop(); @@ -121,7 +125,8 @@ int main(int argc, char** argv) { // permitted programs const std::string& progDir = ml::core::CProgName::progDir(); if (ml::core::COsFileFuncs::chdir(progDir.c_str()) == -1) { - LOG_FATAL(<< "Could not change directory to '" << progDir << "': " << ::strerror(errno)); + LOG_FATAL(<< "Could not change directory to '" << progDir + << "': " << ::strerror(errno)); cancellerThread.stop(); return EXIT_FAILURE; } diff --git a/bin/controller/unittest/CBlockingCallCancellerThreadTest.cc b/bin/controller/unittest/CBlockingCallCancellerThreadTest.cc index 3637c4b46f..1b98895156 100644 --- a/bin/controller/unittest/CBlockingCallCancellerThreadTest.cc +++ b/bin/controller/unittest/CBlockingCallCancellerThreadTest.cc @@ -37,8 +37,9 @@ class CEofThread : public ml::core::CThread { CppUnit::Test* CBlockingCallCancellerThreadTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CBlockingCallCancellerThreadTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CBlockingCallCancellerThreadTest::testCancelBlock", - &CBlockingCallCancellerThreadTest::testCancelBlock)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CBlockingCallCancellerThreadTest::testCancelBlock", + &CBlockingCallCancellerThreadTest::testCancelBlock)); return suiteOfTests; } @@ -47,7 +48,8 @@ void CBlockingCallCancellerThreadTest::testCancelBlock() { ml::core::CDualThreadStreamBuf buf; std::istream monStrm(&buf); - ml::controller::CBlockingCallCancellerThread cancellerThread(ml::core::CThread::currentThreadId(), monStrm); + ml::controller::CBlockingCallCancellerThread cancellerThread( + ml::core::CThread::currentThreadId(), monStrm); CPPUNIT_ASSERT(cancellerThread.start()); // The CBlockingCallCancellerThread should wake up the blocking open of the @@ -60,8 +62,8 @@ void CBlockingCallCancellerThreadTest::testCancelBlock() { CEofThread eofThread(buf); CPPUNIT_ASSERT(eofThread.start()); - ml::core::CNamedPipeFactory::TIStreamP pipeStrm = - ml::core::CNamedPipeFactory::openPipeStreamRead(ml::core::CNamedPipeFactory::defaultPath() + "test_pipe"); + ml::core::CNamedPipeFactory::TIStreamP pipeStrm = ml::core::CNamedPipeFactory::openPipeStreamRead( + ml::core::CNamedPipeFactory::defaultPath() + "test_pipe"); CPPUNIT_ASSERT(pipeStrm == 0); CPPUNIT_ASSERT(cancellerThread.stop()); diff --git a/bin/controller/unittest/CCommandProcessorTest.cc b/bin/controller/unittest/CCommandProcessorTest.cc index 25ace4b054..802e0d2efd 100644 --- a/bin/controller/unittest/CCommandProcessorTest.cc +++ b/bin/controller/unittest/CCommandProcessorTest.cc @@ -27,7 +27,8 @@ const std::string OUTPUT_FILE("slogan1.txt"); const std::string INPUT_FILE1("testfiles\\slogan1.txt"); const std::string INPUT_FILE2("testfiles\\slogan2.txt"); const char* winDir(::getenv("windir")); -const std::string PROCESS_PATH(winDir != 0 ? std::string(winDir) + "\\System32\\cmd" : std::string("C:\\Windows\\System32\\cmd")); +const std::string PROCESS_PATH(winDir != 0 ? std::string(winDir) + "\\System32\\cmd" + : std::string("C:\\Windows\\System32\\cmd")); const std::string PROCESS_ARGS1[] = {"/C", "copy " + INPUT_FILE1 + " ."}; const std::string PROCESS_ARGS2[] = {"/C", "del " + INPUT_FILE2}; #else @@ -44,16 +45,18 @@ const std::string SLOGAN2("You know, for search!"); CppUnit::Test* CCommandProcessorTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CCommandProcessorTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CCommandProcessorTest::testStartPermitted", - &CCommandProcessorTest::testStartPermitted)); - suiteOfTests->addTest(new CppUnit::TestCaller("CCommandProcessorTest::testStartNonPermitted", - &CCommandProcessorTest::testStartNonPermitted)); - suiteOfTests->addTest(new CppUnit::TestCaller("CCommandProcessorTest::testStartNonExistent", - &CCommandProcessorTest::testStartNonExistent)); - suiteOfTests->addTest(new CppUnit::TestCaller("CCommandProcessorTest::testKillDisallowed", - &CCommandProcessorTest::testKillDisallowed)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CCommandProcessorTest::testInvalidVerb", &CCommandProcessorTest::testInvalidVerb)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCommandProcessorTest::testStartPermitted", &CCommandProcessorTest::testStartPermitted)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCommandProcessorTest::testStartNonPermitted", + &CCommandProcessorTest::testStartNonPermitted)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCommandProcessorTest::testStartNonExistent", + &CCommandProcessorTest::testStartNonExistent)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCommandProcessorTest::testKillDisallowed", &CCommandProcessorTest::testKillDisallowed)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCommandProcessorTest::testInvalidVerb", &CCommandProcessorTest::testInvalidVerb)); return suiteOfTests; } @@ -137,7 +140,8 @@ void CCommandProcessorTest::testKillDisallowed() { std::string command(ml::controller::CCommandProcessor::KILL); command += '\t'; - command += ml::core::CStringUtils::typeToString(ml::core::CProcess::instance().id()); + command += + ml::core::CStringUtils::typeToString(ml::core::CProcess::instance().id()); CPPUNIT_ASSERT(!processor.handleCommand(command)); } diff --git a/bin/normalize/CCmdLineParser.cc b/bin/normalize/CCmdLineParser.cc index 64ebda4cae..e3c28ba6df 100644 --- a/bin/normalize/CCmdLineParser.cc +++ b/bin/normalize/CCmdLineParser.cc @@ -34,24 +34,32 @@ bool CCmdLineParser::parse(int argc, bool& perPartitionNormalization) { try { boost::program_options::options_description desc(DESCRIPTION); - desc.add_options()("help", "Display this information and exit")("version", "Display version information and exit")( - "modelconfig", boost::program_options::value(), "Optional model config file")( - "logProperties", boost::program_options::value(), "Optional logger properties file")( + desc.add_options()("help", "Display this information and exit")( + "version", "Display version information and exit")( + "modelconfig", boost::program_options::value(), + "Optional model config file")("logProperties", + boost::program_options::value(), + "Optional logger properties file")( "logPipe", boost::program_options::value(), "Optional log to named pipe")( - "bucketspan", boost::program_options::value(), "Optional aggregation bucket span (in seconds) - default is 300")( + "bucketspan", boost::program_options::value(), + "Optional aggregation bucket span (in seconds) - default is 300")( "lengthEncodedInput", "Take input in length encoded binary format - default is CSV")( - "input", boost::program_options::value(), "Optional file to read input from - not present means read from STDIN")( + "input", boost::program_options::value(), + "Optional file to read input from - not present means read from STDIN")( "inputIsPipe", "Specified input file is a named pipe")( - "output", boost::program_options::value(), "Optional file to write output to - not present means write to STDOUT")( - "outputIsPipe", "Specified output file is a named pipe")("quantilesState", - boost::program_options::value(), - "Optional file to initialization data for normalization (in JSON)")( - "deleteStateFiles", "If this flag is set then delete the normalizer state files once they have been read")( + "output", boost::program_options::value(), + "Optional file to write output to - not present means write to STDOUT")( + "outputIsPipe", "Specified output file is a named pipe")( + "quantilesState", boost::program_options::value(), + "Optional file to initialization data for normalization (in JSON)")( + "deleteStateFiles", + "If this flag is set then delete the normalizer state files once they have been read")( "writeCsv", "Write the results in CSV format (default is lineified JSON)")( "perPartitionNormalization", "Optional flag to enable per partition normalization"); boost::program_options::variables_map vm; - boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), vm); + boost::program_options::store( + boost::program_options::parse_command_line(argc, argv, desc), vm); boost::program_options::notify(vm); if (vm.count("help") > 0) { diff --git a/bin/normalize/Main.cc b/bin/normalize/Main.cc index d535e3746d..35e1eec3f1 100644 --- a/bin/normalize/Main.cc +++ b/bin/normalize/Main.cc @@ -53,27 +53,18 @@ int main(int argc, char** argv) { bool deleteStateFiles(false); bool writeCsv(false); bool perPartitionNormalization(false); - if (ml::normalize::CCmdLineParser::parse(argc, - argv, - modelConfigFile, - logProperties, - logPipe, - bucketSpan, - lengthEncodedInput, - inputFileName, - isInputFileNamedPipe, - outputFileName, - isOutputFileNamedPipe, - quantilesStateFile, - deleteStateFiles, - writeCsv, - perPartitionNormalization) == false) { + if (ml::normalize::CCmdLineParser::parse( + argc, argv, modelConfigFile, logProperties, logPipe, bucketSpan, + lengthEncodedInput, inputFileName, isInputFileNamedPipe, + outputFileName, isOutputFileNamedPipe, quantilesStateFile, + deleteStateFiles, writeCsv, perPartitionNormalization) == false) { return EXIT_FAILURE; } // Construct the IO manager before reconfiguring the logger, as it performs // std::ios actions that only work before first use - ml::api::CIoManager ioMgr(inputFileName, isInputFileNamedPipe, outputFileName, isOutputFileNamedPipe); + ml::api::CIoManager ioMgr(inputFileName, isInputFileNamedPipe, + outputFileName, isOutputFileNamedPipe); if (ml::core::CLogger::instance().reconfigure(logPipe, logProperties) == false) { LOG_FATAL(<< "Could not reconfigure logging"); @@ -92,7 +83,8 @@ int main(int argc, char** argv) { return EXIT_FAILURE; } - ml::model::CAnomalyDetectorModelConfig modelConfig = ml::model::CAnomalyDetectorModelConfig::defaultConfig(bucketSpan); + ml::model::CAnomalyDetectorModelConfig modelConfig = + ml::model::CAnomalyDetectorModelConfig::defaultConfig(bucketSpan); if (!modelConfigFile.empty() && modelConfig.init(modelConfigFile) == false) { LOG_FATAL(<< "Ml model config file '" << modelConfigFile << "' could not be loaded"); return EXIT_FAILURE; @@ -105,7 +97,8 @@ int main(int argc, char** argv) { if (lengthEncodedInput) { inputParser.reset(new ml::api::CLengthEncodedInputParser(ioMgr.inputStream())); } else { - inputParser.reset(new ml::api::CCsvInputParser(ioMgr.inputStream(), ml::api::CCsvInputParser::COMMA)); + inputParser.reset(new ml::api::CCsvInputParser( + ioMgr.inputStream(), ml::api::CCsvInputParser::COMMA)); } using TScopedOutputHandlerP = boost::scoped_ptr; @@ -114,7 +107,9 @@ int main(int argc, char** argv) { outputWriter.reset(new ml::api::CCsvOutputWriter(ioMgr.outputStream())); } else { outputWriter.reset(new ml::api::CLineifiedJsonOutputWriter( - {ml::api::CResultNormalizer::PROBABILITY_NAME, ml::api::CResultNormalizer::NORMALIZED_SCORE_NAME}, ioMgr.outputStream())); + {ml::api::CResultNormalizer::PROBABILITY_NAME, + ml::api::CResultNormalizer::NORMALIZED_SCORE_NAME}, + ioMgr.outputStream())); } // This object will do the work @@ -132,7 +127,8 @@ int main(int argc, char** argv) { } // Now handle the numbers to be normalised from stdin - if (inputParser->readStream(boost::bind(&ml::api::CResultNormalizer::handleRecord, &normalizer, _1)) == false) { + if (inputParser->readStream(boost::bind(&ml::api::CResultNormalizer::handleRecord, + &normalizer, _1)) == false) { LOG_FATAL(<< "Failed to handle input to be normalized"); return EXIT_FAILURE; } diff --git a/devbin/move_copy_swap/Main.cc b/devbin/move_copy_swap/Main.cc index aa16f2a3b3..d398c482c3 100644 --- a/devbin/move_copy_swap/Main.cc +++ b/devbin/move_copy_swap/Main.cc @@ -71,7 +71,8 @@ void generate(size_t minSize, size_t iterations) { int main(int argc, char** argv) { if (argc != 4) { - std::cerr << "Usage: " << argv[0] << " " << std::endl + std::cerr << "Usage: " << argv[0] + << " " << std::endl << "Where: m = move" << std::endl << " c = copy" << std::endl << " d = copy defeating copy-on-write" << std::endl @@ -105,7 +106,8 @@ int main(int argc, char** argv) { uint64_t durationMs = (durationTenthMs / 10) + ((durationTenthMs % 10 >= 5) ? 1 : 0); #else std::chrono::steady_clock::time_point endTime = std::chrono::steady_clock::now(); - size_t durationMs = std::chrono::duration_cast(endTime - startTime).count(); + size_t durationMs = std::chrono::duration_cast(endTime - startTime) + .count(); #endif std::cout << "Time " << durationMs diff --git a/devbin/unixtime_to_string/CCmdLineParser.cc b/devbin/unixtime_to_string/CCmdLineParser.cc index 4a2141d4d3..3a96a40720 100644 --- a/devbin/unixtime_to_string/CCmdLineParser.cc +++ b/devbin/unixtime_to_string/CCmdLineParser.cc @@ -21,16 +21,22 @@ const std::string CCmdLineParser::DESCRIPTION = "Transport node error on node 0x9876 '\n" "Options:"; -bool CCmdLineParser::parse(int argc, const char* const* argv, std::string& configFile, std::string& syslogLine) { +bool CCmdLineParser::parse(int argc, + const char* const* argv, + std::string& configFile, + std::string& syslogLine) { try { boost::program_options::options_description desc(DESCRIPTION); - desc.add_options()("help", "Display this information and exit")("version", "Display version information and exit")( - "config", boost::program_options::value(), "Read configuration from ")( + desc.add_options()("help", "Display this information and exit")( + "version", "Display version information and exit")( + "config", boost::program_options::value(), + "Read configuration from ")( "syslogline", boost::program_options::value(), "Optional line of syslog"); boost::program_options::variables_map vm; - boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), vm); + boost::program_options::store( + boost::program_options::parse_command_line(argc, argv, desc), vm); boost::program_options::notify(vm); if (vm.count("help") > 0) { @@ -54,7 +60,9 @@ bool CCmdLineParser::parse(int argc, const char* const* argv, std::string& confi // Raise error std::cerr << "Error: Invalid command line options" << std::endl; std::cerr << desc << std::endl; - } catch (std::exception& e) { std::cerr << "Error processing command line: " << e.what() << std::endl; } + } catch (std::exception& e) { + std::cerr << "Error processing command line: " << e.what() << std::endl; + } return false; } diff --git a/devbin/unixtime_to_string/CCmdLineParser.h b/devbin/unixtime_to_string/CCmdLineParser.h index b44ca252de..dd57667587 100644 --- a/devbin/unixtime_to_string/CCmdLineParser.h +++ b/devbin/unixtime_to_string/CCmdLineParser.h @@ -25,7 +25,8 @@ class CCmdLineParser { public: //! Parse the arguments. ONLY return true if configFile and dateTime //! are defined. - static bool parse(int argc, const char* const* argv, std::string& configFile, std::string& syslogLine); + static bool + parse(int argc, const char* const* argv, std::string& configFile, std::string& syslogLine); private: static const std::string DESCRIPTION; diff --git a/devbin/vfprog/CLooper.cc b/devbin/vfprog/CLooper.cc index 4be6c696ef..5811941602 100644 --- a/devbin/vfprog/CLooper.cc +++ b/devbin/vfprog/CLooper.cc @@ -40,7 +40,9 @@ size_t CLooper::inlinedLibraryCallLoop(vflib::CIncrementer& incrementer, size_t return val; } -size_t CLooper::nonVirtualLibraryCallLoop(vflib::CIncrementer& incrementer, size_t count, size_t val) { +size_t CLooper::nonVirtualLibraryCallLoop(vflib::CIncrementer& incrementer, + size_t count, + size_t val) { for (size_t i = 0; i < count; ++i) { val = incrementer.nonVirtualIncrement(val); } diff --git a/devbin/vfprog/CLooper.h b/devbin/vfprog/CLooper.h index 95bd623732..6f4816322b 100644 --- a/devbin/vfprog/CLooper.h +++ b/devbin/vfprog/CLooper.h @@ -40,13 +40,16 @@ class CLooper { static size_t virtualProgramCallLoop(CIncrementer& incrementer, size_t count, size_t val); //! Loop calling the inlined incrementer - static size_t inlinedLibraryCallLoop(vflib::CIncrementer& incrementer, size_t count, size_t val); + static size_t + inlinedLibraryCallLoop(vflib::CIncrementer& incrementer, size_t count, size_t val); //! Loop calling the non-virtual incrementer - static size_t nonVirtualLibraryCallLoop(vflib::CIncrementer& incrementer, size_t count, size_t val); + static size_t + nonVirtualLibraryCallLoop(vflib::CIncrementer& incrementer, size_t count, size_t val); //! Loop calling the virtual incrementer - static size_t virtualLibraryCallLoop(vflib::CIncrementer& incrementer, size_t count, size_t val); + static size_t + virtualLibraryCallLoop(vflib::CIncrementer& incrementer, size_t count, size_t val); }; } } diff --git a/devbin/vsbug/Main.cc b/devbin/vsbug/Main.cc index 9b67ddfec4..8b0f335838 100644 --- a/devbin/vsbug/Main.cc +++ b/devbin/vsbug/Main.cc @@ -18,12 +18,14 @@ int main(int, char**) { eventTimes.push_back(1347019162); std::time_t endTime = (eventTimes.back() / bucketLength + 1) * bucketLength; - std::cout << "startTime = " << startTime << ", endTime = " << endTime << ", # events = " << eventTimes.size() << std::endl; + std::cout << "startTime = " << startTime << ", endTime = " << endTime + << ", # events = " << eventTimes.size() << std::endl; { std::time_t offset = endTime - startTime; unsigned long i = 0; - for (std::time_t bucketStartTime = startTime; bucketStartTime < endTime; bucketStartTime += bucketLength) { + for (std::time_t bucketStartTime = startTime; bucketStartTime < endTime; + bucketStartTime += bucketLength) { std::time_t bucketEndTime = bucketStartTime + bucketLength; for (; i < eventTimes.size() && eventTimes[i] < bucketEndTime; ++i) { std::vector temp; diff --git a/include/api/CAnomalyJob.h b/include/api/CAnomalyJob.h index 9297984ed2..c0ddafe3f7 100644 --- a/include/api/CAnomalyJob.h +++ b/include/api/CAnomalyJob.h @@ -105,7 +105,8 @@ class API_EXPORT CAnomalyJob : public CDataProcessor { }; public: - using TPersistCompleteFunc = std::function; + using TPersistCompleteFunc = + std::function; using TAnomalyDetectorPtr = model::CAnomalyDetector::TAnomalyDetectorPtr; using TAnomalyDetectorPtrVec = std::vector; using TAnomalyDetectorPtrVecItr = std::vector::iterator; @@ -113,7 +114,8 @@ class API_EXPORT CAnomalyJob : public CDataProcessor { using TKeyVec = std::vector; using TKeyAnomalyDetectorPtrUMap = boost::unordered_map; - using TKeyCRefAnomalyDetectorPtrPr = std::pair; + using TKeyCRefAnomalyDetectorPtrPr = + std::pair; using TKeyCRefAnomalyDetectorPtrPrVec = std::vector; using TModelPlotDataVec = model::CAnomalyDetector::TModelPlotDataVec; using TModelPlotDataVecCItr = TModelPlotDataVec::const_iterator; @@ -175,7 +177,8 @@ class API_EXPORT CAnomalyJob : public CDataProcessor { virtual void finalise(); //! Restore previously saved state - virtual bool restoreState(core::CDataSearcher& restoreSearcher, core_t::TTime& completeToTime); + virtual bool restoreState(core::CDataSearcher& restoreSearcher, + core_t::TTime& completeToTime); //! Persist current state virtual bool persistState(core::CDataAdder& persister); @@ -233,15 +236,18 @@ class API_EXPORT CAnomalyJob : public CDataProcessor { void resetBuckets(const std::string& controlMessage); //! Attempt to restore the detectors - bool restoreState(core::CStateRestoreTraverser& traverser, core_t::TTime& completeToTime, std::size_t& numDetectors); + bool restoreState(core::CStateRestoreTraverser& traverser, + core_t::TTime& completeToTime, + std::size_t& numDetectors); //! Attempt to restore one detector from an already-created traverser. bool restoreSingleDetector(core::CStateRestoreTraverser& traverser); //! Restore the detector identified by \p key and \p partitionFieldValue //! from \p traverser. - bool - restoreDetectorState(const model::CSearchKey& key, const std::string& partitionFieldValue, core::CStateRestoreTraverser& traverser); + bool restoreDetectorState(const model::CSearchKey& key, + const std::string& partitionFieldValue, + core::CStateRestoreTraverser& traverser); //! Persist current state in the background bool backgroundPersistState(CBackgroundPersister& backgroundPersister); @@ -306,7 +312,9 @@ class API_EXPORT CAnomalyJob : public CDataProcessor { //! Parses the time range in a control message assuming the time range follows after a //! single character code (e.g. starts with 'i10 20'). - bool parseTimeRangeInControlMessage(const std::string& controlMessage, core_t::TTime& start, core_t::TTime& end); + bool parseTimeRangeInControlMessage(const std::string& controlMessage, + core_t::TTime& start, + core_t::TTime& end); //! Update equalizers if not interim and aggregate. void updateAggregatorAndAggregate(bool isInterim, model::CHierarchicalResults& results); @@ -320,7 +328,9 @@ class API_EXPORT CAnomalyJob : public CDataProcessor { //! Generate the model plot for the models of the specified detector in the //! specified time range. - void generateModelPlot(core_t::TTime startTime, core_t::TTime endTime, const model::CAnomalyDetector& detector); + void generateModelPlot(core_t::TTime startTime, + core_t::TTime endTime, + const model::CAnomalyDetector& detector); //! Write the pre-generated model plot to the output stream of the user's //! choosing: either file or streamed to the API @@ -335,7 +345,8 @@ class API_EXPORT CAnomalyJob : public CDataProcessor { //! the member variables of an object. This makes it safer to call //! from within a persistence thread that's working off a cloned //! anomaly detector. - static void persistIndividualDetector(const model::CAnomalyDetector& detector, core::CStatePersistInserter& inserter); + static void persistIndividualDetector(const model::CAnomalyDetector& detector, + core::CStatePersistInserter& inserter); //! Iterate over the models, refresh their memory status, and send a report //! to the API @@ -344,12 +355,13 @@ class API_EXPORT CAnomalyJob : public CDataProcessor { //! Update configuration void doForecast(const std::string& controlMessage); - model::CAnomalyDetector::TAnomalyDetectorPtr makeDetector(int identifier, - const model::CAnomalyDetectorModelConfig& modelConfig, - model::CLimits& limits, - const std::string& partitionFieldValue, - core_t::TTime firstTime, - const model::CAnomalyDetector::TModelFactoryCPtr& modelFactory); + model::CAnomalyDetector::TAnomalyDetectorPtr + makeDetector(int identifier, + const model::CAnomalyDetectorModelConfig& modelConfig, + model::CLimits& limits, + const std::string& partitionFieldValue, + core_t::TTime firstTime, + const model::CAnomalyDetector::TModelFactoryCPtr& modelFactory); //! Populate detector keys from the field config. void populateDetectorKeys(const CFieldConfig& fieldConfig, TKeyVec& keys); @@ -359,7 +371,9 @@ class API_EXPORT CAnomalyJob : public CDataProcessor { //! Extract the required fields from \p dataRowFields //! and add the new record to \p detector - void addRecord(const TAnomalyDetectorPtr detector, core_t::TTime time, const TStrStrUMap& dataRowFields); + void addRecord(const TAnomalyDetectorPtr detector, + core_t::TTime time, + const TStrStrUMap& dataRowFields); protected: //! Get all the detectors. diff --git a/include/api/CBaseTokenListDataTyper.h b/include/api/CBaseTokenListDataTyper.h index 3a055f2951..705b605840 100644 --- a/include/api/CBaseTokenListDataTyper.h +++ b/include/api/CBaseTokenListDataTyper.h @@ -65,7 +65,8 @@ class API_EXPORT CBaseTokenListDataTyper : public CDataTyper { public: //! Shared pointer to reverse search creator that we're will function //! after being shallow copied - using TTokenListReverseSearchCreatorIntfCPtr = boost::shared_ptr; + using TTokenListReverseSearchCreatorIntfCPtr = + boost::shared_ptr; //! Used to associate tokens with weightings: //! first -> token ID @@ -81,7 +82,9 @@ class API_EXPORT CBaseTokenListDataTyper : public CDataTyper { //! Used for stream output of token IDs translated back to the original //! tokens struct API_EXPORT SIdTranslater { - SIdTranslater(const CBaseTokenListDataTyper& typer, const TSizeSizePrVec& tokenIds, char separator); + SIdTranslater(const CBaseTokenListDataTyper& typer, + const TSizeSizePrVec& tokenIds, + char separator); const CBaseTokenListDataTyper& s_Typer; const TSizeSizePrVec& s_TokenIds; @@ -103,7 +106,8 @@ class API_EXPORT CBaseTokenListDataTyper : public CDataTyper { //! than the length of the passed string, because the passed string may //! have the date stripped out of it. Field names/values are available //! to the type computation. - virtual int computeType(bool dryRun, const TStrStrUMap& fields, const std::string& str, size_t rawStringLen); + virtual int + computeType(bool dryRun, const TStrStrUMap& fields, const std::string& str, size_t rawStringLen); // Bring the other overload of computeType() into scope using CDataTyper::computeType; @@ -112,7 +116,11 @@ class API_EXPORT CBaseTokenListDataTyper : public CDataTyper { //! that are classified as the given type. Note that the reverse search //! is only approximate - it may select more records than have actually //! been classified as the returned type. - virtual bool createReverseSearch(int type, std::string& part1, std::string& part2, size_t& maxMatchingLength, bool& wasCached); + virtual bool createReverseSearch(int type, + std::string& part1, + std::string& part2, + size_t& maxMatchingLength, + bool& wasCached); //! Has the data typer's state changed? virtual bool hasChanged() const; @@ -138,11 +146,16 @@ class API_EXPORT CBaseTokenListDataTyper : public CDataTyper { //! Take a string token, convert it to a numeric ID and a weighting and //! add these to the provided data structures. - virtual void - tokenToIdAndWeight(const std::string& token, TSizeSizePrVec& tokenIds, TSizeSizeMap& tokenUniqueIds, size_t& totalWeight) = 0; + virtual void tokenToIdAndWeight(const std::string& token, + TSizeSizePrVec& tokenIds, + TSizeSizeMap& tokenUniqueIds, + size_t& totalWeight) = 0; //! Compute similarity between two vectors - virtual double similarity(const TSizeSizePrVec& left, size_t leftWeight, const TSizeSizePrVec& right, size_t rightWeight) const = 0; + virtual double similarity(const TSizeSizePrVec& left, + size_t leftWeight, + const TSizeSizePrVec& right, + size_t rightWeight) const = 0; //! Used to hold statistics about the types we compute: //! first -> count of matches @@ -224,22 +237,24 @@ class API_EXPORT CBaseTokenListDataTyper : public CDataTyper { using TTokenMIndex = boost::multi_index::multi_index_container< CTokenInfoItem, - boost::multi_index::indexed_by< - boost::multi_index::random_access<>, - boost::multi_index::hashed_unique, - BOOST_MULTI_INDEX_CONST_TYPE_CONST_MEM_FUN(CTokenInfoItem, std::string, str)>>>; + boost::multi_index::indexed_by, + boost::multi_index::hashed_unique, BOOST_MULTI_INDEX_CONST_TYPE_CONST_MEM_FUN(CTokenInfoItem, std::string, str)>>>; private: //! Used by deferred persistence functions - static void - acceptPersistInserter(const TTokenMIndex& tokenIdLookup, const TTokenListTypeVec& types, core::CStatePersistInserter& inserter); + static void acceptPersistInserter(const TTokenMIndex& tokenIdLookup, + const TTokenListTypeVec& types, + core::CStatePersistInserter& inserter); //! Given a string containing comma separated pre-tokenised input, add //! the tokens to the working data structures in the same way as if they //! had been determined by the tokeniseString() method. The result of //! the tokenisation is returned in \p tokenIds, \p tokenUniqueIds and //! \p totalWeight. Any previous content of these variables is wiped. - bool addPretokenisedTokens(const std::string& tokensCsv, TSizeSizePrVec& tokenIds, TSizeSizeMap& tokenUniqueIds, size_t& totalWeight); + bool addPretokenisedTokens(const std::string& tokensCsv, + TSizeSizePrVec& tokenIds, + TSizeSizeMap& tokenUniqueIds, + size_t& totalWeight); private: //! Reference to the object we'll use to create reverse searches @@ -284,7 +299,8 @@ class API_EXPORT CBaseTokenListDataTyper : public CDataTyper { friend API_EXPORT std::ostream& operator<<(std::ostream&, const SIdTranslater&); }; -API_EXPORT std::ostream& operator<<(std::ostream& strm, const CBaseTokenListDataTyper::SIdTranslater& translator); +API_EXPORT std::ostream& +operator<<(std::ostream& strm, const CBaseTokenListDataTyper::SIdTranslater& translator); } } diff --git a/include/api/CCategoryExamplesCollector.h b/include/api/CCategoryExamplesCollector.h index 7394de2fe6..e0a3119e54 100644 --- a/include/api/CCategoryExamplesCollector.h +++ b/include/api/CCategoryExamplesCollector.h @@ -38,7 +38,8 @@ class API_EXPORT CCategoryExamplesCollector { public: CCategoryExamplesCollector(std::size_t maxExamples); - CCategoryExamplesCollector(std::size_t maxExamples, core::CStateRestoreTraverser& traverser); + CCategoryExamplesCollector(std::size_t maxExamples, + core::CStateRestoreTraverser& traverser); //! Adds the example to the category if the example is a new //! distinct example and if there are less than the maximum @@ -64,7 +65,9 @@ class API_EXPORT CCategoryExamplesCollector { using TSizeStrSetUMap = boost::unordered_map; private: - void persistExamples(std::size_t category, const TStrSet& examples, core::CStatePersistInserter& inserter) const; + void persistExamples(std::size_t category, + const TStrSet& examples, + core::CStatePersistInserter& inserter) const; bool restoreExamples(core::CStateRestoreTraverser& traverser); //! Truncate long examples to MAX_EXAMPLE_LENGTH bytes, appending an diff --git a/include/api/CCmdSkeleton.h b/include/api/CCmdSkeleton.h index 4b53d5de5d..6867dee6b0 100644 --- a/include/api/CCmdSkeleton.h +++ b/include/api/CCmdSkeleton.h @@ -34,7 +34,10 @@ class CInputParser; //! class API_EXPORT CCmdSkeleton : private core::CNonCopyable { public: - CCmdSkeleton(core::CDataSearcher* restoreSearcher, core::CDataAdder* persister, CInputParser& inputParser, CDataProcessor& processor); + CCmdSkeleton(core::CDataSearcher* restoreSearcher, + core::CDataAdder* persister, + CInputParser& inputParser, + CDataProcessor& processor); //! Pass input to the processor until it's consumed as much as it can. bool ioLoop(); diff --git a/include/api/CCsvOutputWriter.h b/include/api/CCsvOutputWriter.h index 659bf00c12..ce6aa5a40a 100644 --- a/include/api/CCsvOutputWriter.h +++ b/include/api/CCsvOutputWriter.h @@ -55,7 +55,10 @@ class API_EXPORT CCsvOutputWriter : public COutputHandler { public: //! Constructor that causes output to be written to the internal string //! stream - CCsvOutputWriter(bool outputMessages = false, bool outputHeader = true, char escape = QUOTE, char separator = COMMA); + CCsvOutputWriter(bool outputMessages = false, + bool outputHeader = true, + char escape = QUOTE, + char separator = COMMA); //! Constructor that causes output to be written to the specified stream CCsvOutputWriter(std::ostream& strmOut, @@ -81,7 +84,8 @@ class API_EXPORT CCsvOutputWriter : public COutputHandler { //! original field values. Where the same field is present in both //! overrideDataRowFields and dataRowFields, the value in //! overrideDataRowFields will be written. - virtual bool writeRow(const TStrStrUMap& dataRowFields, const TStrStrUMap& overrideDataRowFields); + virtual bool writeRow(const TStrStrUMap& dataRowFields, + const TStrStrUMap& overrideDataRowFields); // Bring the other overload of writeRow() into scope using COutputHandler::writeRow; diff --git a/include/api/CDataProcessor.h b/include/api/CDataProcessor.h index 2d909fd853..f3a9774304 100644 --- a/include/api/CDataProcessor.h +++ b/include/api/CDataProcessor.h @@ -67,7 +67,8 @@ class API_EXPORT CDataProcessor : private core::CNonCopyable { virtual void finalise() = 0; //! Restore previously saved state - virtual bool restoreState(core::CDataSearcher& restoreSearcher, core_t::TTime& completeToTime) = 0; + virtual bool restoreState(core::CDataSearcher& restoreSearcher, + core_t::TTime& completeToTime) = 0; //! Persist current state virtual bool persistState(core::CDataAdder& persister) = 0; diff --git a/include/api/CDataTyper.h b/include/api/CDataTyper.h index a16b151860..c757f3f3f8 100644 --- a/include/api/CDataTyper.h +++ b/include/api/CDataTyper.h @@ -63,14 +63,21 @@ class API_EXPORT CDataTyper { int computeType(bool isDryRun, const std::string& str, size_t rawStringLen); //! As above, but also take into account field names/values. - virtual int computeType(bool isDryRun, const TStrStrUMap& fields, const std::string& str, size_t rawStringLen) = 0; + virtual int computeType(bool isDryRun, + const TStrStrUMap& fields, + const std::string& str, + size_t rawStringLen) = 0; //! Create reverse search commands that will (more or less) just //! select the records that are classified as the given type when //! combined with the original search. Note that the reverse search is //! only approximate - it may select more records than have actually //! been classified as the returned type. - virtual bool createReverseSearch(int type, std::string& part1, std::string& part2, size_t& maxMatchingLength, bool& wasCached) = 0; + virtual bool createReverseSearch(int type, + std::string& part1, + std::string& part2, + size_t& maxMatchingLength, + bool& wasCached) = 0; //! Has the data typer's state changed? virtual bool hasChanged() const = 0; diff --git a/include/api/CDetectionRulesJsonParser.h b/include/api/CDetectionRulesJsonParser.h index 28092c5154..b518e42227 100644 --- a/include/api/CDetectionRulesJsonParser.h +++ b/include/api/CDetectionRulesJsonParser.h @@ -39,16 +39,23 @@ class API_EXPORT CDetectionRulesJsonParser { private: bool parseRuleConditions(const rapidjson::Value& ruleObject, model::CDetectionRule& rule); - bool parseFilterId(const rapidjson::Value& conditionObject, model::CRuleCondition& ruleCondition); + bool parseFilterId(const rapidjson::Value& conditionObject, + model::CRuleCondition& ruleCondition); static bool hasStringMember(const rapidjson::Value& object, const std::string& name); static bool hasArrayMember(const rapidjson::Value& object, const std::string& name); - static bool parseRuleActions(const rapidjson::Value& ruleObject, model::CDetectionRule& rule); - static bool parseConditionsConnective(const rapidjson::Value& ruleObject, model::CDetectionRule& rule); - static bool parseRuleConditionType(const rapidjson::Value& ruleConditionObject, model::CRuleCondition& ruleCondition); - static bool parseCondition(const rapidjson::Value& ruleConditionObject, model::CRuleCondition& ruleCondition); - static bool parseConditionOperator(const rapidjson::Value& conditionObject, model::CRuleCondition& ruleCondition); - static bool parseConditionThreshold(const rapidjson::Value& conditionObject, model::CRuleCondition& ruleCondition); + static bool parseRuleActions(const rapidjson::Value& ruleObject, + model::CDetectionRule& rule); + static bool parseConditionsConnective(const rapidjson::Value& ruleObject, + model::CDetectionRule& rule); + static bool parseRuleConditionType(const rapidjson::Value& ruleConditionObject, + model::CRuleCondition& ruleCondition); + static bool parseCondition(const rapidjson::Value& ruleConditionObject, + model::CRuleCondition& ruleCondition); + static bool parseConditionOperator(const rapidjson::Value& conditionObject, + model::CRuleCondition& ruleCondition); + static bool parseConditionThreshold(const rapidjson::Value& conditionObject, + model::CRuleCondition& ruleCondition); private: //! The filters per id used by categorical rule conditions. diff --git a/include/api/CFieldConfig.h b/include/api/CFieldConfig.h index 2e0372f9f5..556d730aab 100644 --- a/include/api/CFieldConfig.h +++ b/include/api/CFieldConfig.h @@ -249,7 +249,11 @@ class API_EXPORT CFieldConfig { CFieldOptions(const std::string& fieldName, int configKey); //! Deduce the function from the fieldName - CFieldOptions(const std::string& fieldName, int configKey, const std::string& byFieldName, bool byHasExcludeFrequent, bool useNull); + CFieldOptions(const std::string& fieldName, + int configKey, + const std::string& byFieldName, + bool byHasExcludeFrequent, + bool useNull); //! Deduce the function from the fieldName CFieldOptions(const std::string& fieldName, @@ -329,8 +333,7 @@ class API_EXPORT CFieldConfig { using TFieldOptionsMIndex = boost::multi_index::multi_index_container< CFieldOptions, boost::multi_index::indexed_by< - boost::multi_index::ordered_unique, - BOOST_MULTI_INDEX_CONST_MEM_FUN(CFieldOptions, int, configKey)>, + boost::multi_index::ordered_unique, BOOST_MULTI_INDEX_CONST_MEM_FUN(CFieldOptions, int, configKey)>, boost::multi_index::ordered_unique< boost::multi_index::tag, boost::multi_index::composite_key< @@ -380,7 +383,10 @@ class API_EXPORT CFieldConfig { //! Construct with a single field and a partition field. (This //! constructor is only used for unit testing.) - CFieldConfig(const std::string& fieldName, const std::string& byFieldName, const std::string& partitionFieldName, bool useNull); + CFieldConfig(const std::string& fieldName, + const std::string& byFieldName, + const std::string& partitionFieldName, + bool useNull); //! Initialise from command line options. This method expects that only //! one of the config file and the tokens will have been specified. If @@ -474,11 +480,16 @@ class API_EXPORT CFieldConfig { //! to find the unique config keys, then search for all the settings //! that correspond to each particular config key. Doing this //! simplifies the error reporting. - bool - processDetector(const boost::property_tree::ptree& propTree, const std::string& key, const std::string& value, TIntSet& handledConfigs); + bool processDetector(const boost::property_tree::ptree& propTree, + const std::string& key, + const std::string& value, + TIntSet& handledConfigs); //! Add data structures relating to an active detector. - bool addActiveDetector(int configKey, const std::string& description, const std::string& rules, TStrVec& copyTokens); + bool addActiveDetector(int configKey, + const std::string& description, + const std::string& rules, + TStrVec& copyTokens); //! Get a function name and field name from a field string static bool parseFieldString(bool haveSummaryCountField, @@ -500,7 +511,9 @@ class API_EXPORT CFieldConfig { //! Check that we have at most one "by" and one "over" token //! and report their positions in the token list - bool findLastByOverTokens(const TStrVec& copyTokens, std::size_t& lastByTokenIndex, std::size_t& lastOverTokenIndex); + bool findLastByOverTokens(const TStrVec& copyTokens, + std::size_t& lastByTokenIndex, + std::size_t& lastOverTokenIndex); //! Check that the "by" or "over" field is valid bool validateByOverField(const TStrVec& copyTokens, diff --git a/include/api/CFieldDataTyper.h b/include/api/CFieldDataTyper.h index 72bfd46b7b..095cacedc0 100644 --- a/include/api/CFieldDataTyper.h +++ b/include/api/CFieldDataTyper.h @@ -61,16 +61,17 @@ class API_EXPORT CFieldDataTyper : public CDataProcessor { public: // A type of token list data typer that DOESN'T exclude fields from its // analysis - using TTokenListDataTyperKeepsFields = CTokenListDataTyper; + using TTokenListDataTyperKeepsFields = + CTokenListDataTyper; public: //! Construct without persistence capability @@ -94,7 +95,8 @@ class API_EXPORT CFieldDataTyper : public CDataProcessor { virtual void finalise(); //! Restore previously saved state - virtual bool restoreState(core::CDataSearcher& restoreSearcher, core_t::TTime& completeToTime); + virtual bool restoreState(core::CDataSearcher& restoreSearcher, + core_t::TTime& completeToTime); //! Persist current state virtual bool persistState(core::CDataAdder& persister); diff --git a/include/api/CForecastRunner.h b/include/api/CForecastRunner.h index a45e108951..717c4d7ec8 100644 --- a/include/api/CForecastRunner.h +++ b/include/api/CForecastRunner.h @@ -107,7 +107,9 @@ class API_EXPORT CForecastRunner final : private core::CNonCopyable { //! Initialize and start the forecast runner thread //! \p jobId The job ID //! \p strmOut The output stream to write forecast results to - CForecastRunner(const std::string& jobId, core::CJsonOutputStreamWrapper& strmOut, model::CResourceMonitor& resourceMonitor); + CForecastRunner(const std::string& jobId, + core::CJsonOutputStreamWrapper& strmOut, + model::CResourceMonitor& resourceMonitor); //! Destructor, cancels all queued forecast requests, finishes a running forecast. //! To finish all remaining forecasts call finishForecasts() first. @@ -126,7 +128,9 @@ class API_EXPORT CForecastRunner final : private core::CNonCopyable { //! \param controlMessage The control message retrieved. //! \param detectors vector of detectors (shallow copy) //! \return true if the forecast request passed validation - bool pushForecastJob(const std::string& controlMessage, const TAnomalyDetectorPtrVec& detectors, const core_t::TTime lastResultsTime); + bool pushForecastJob(const std::string& controlMessage, + const TAnomalyDetectorPtrVec& detectors, + const core_t::TTime lastResultsTime); //! Blocks and waits until all queued forecasts are done void finishForecasts(); @@ -188,7 +192,8 @@ class API_EXPORT CForecastRunner final : private core::CNonCopyable { }; private: - using TErrorFunc = std::function; + using TErrorFunc = + std::function; private: //! The worker loop @@ -214,10 +219,11 @@ class API_EXPORT CForecastRunner final : private core::CNonCopyable { void sendMessage(WRITE write, const SForecast& forecastJob, const std::string& message) const; //! parse and validate a forecast request and turn it into a forecast job - static bool parseAndValidateForecastRequest(const std::string& controlMessage, - SForecast& forecastJob, - const core_t::TTime lastResultsTime, - const TErrorFunc& errorFunction = TErrorFunc()); + static bool + parseAndValidateForecastRequest(const std::string& controlMessage, + SForecast& forecastJob, + const core_t::TTime lastResultsTime, + const TErrorFunc& errorFunction = TErrorFunc()); private: //! This job ID diff --git a/include/api/CHierarchicalResultsWriter.h b/include/api/CHierarchicalResultsWriter.h index d08683e658..18872150df 100644 --- a/include/api/CHierarchicalResultsWriter.h +++ b/include/api/CHierarchicalResultsWriter.h @@ -32,7 +32,8 @@ namespace api { //! //! For each node one or more CAnomalyDetector::SResults objects are //! constructed and written by the callback supplied to the constructor. -class API_EXPORT CHierarchicalResultsWriter : public model::CHierarchicalResultsVisitor, private core::CNonCopyable { +class API_EXPORT CHierarchicalResultsWriter : public model::CHierarchicalResultsVisitor, + private core::CNonCopyable { public: using TDouble1Vec = core::CSmallVector; using TOptionalDouble = boost::optional; @@ -40,14 +41,22 @@ class API_EXPORT CHierarchicalResultsWriter : public model::CHierarchicalResults // Influencers using TStoredStringPtrVec = std::vector; - using TStoredStringPtrStoredStringPtrPr = std::pair; - using TStoredStringPtrStoredStringPtrPrDoublePr = std::pair; - using TStoredStringPtrStoredStringPtrPrDoublePrVec = std::vector; + using TStoredStringPtrStoredStringPtrPr = + std::pair; + using TStoredStringPtrStoredStringPtrPrDoublePr = + std::pair; + using TStoredStringPtrStoredStringPtrPrDoublePrVec = + std::vector; using TStr1Vec = core::CSmallVector; public: - enum EResultType { E_SimpleCountResult, E_PopulationResult, E_PartitionResult, E_Result }; + enum EResultType { + E_SimpleCountResult, + E_PopulationResult, + E_PartitionResult, + E_Result + }; //! Type which wraps up the results of anomaly detection. struct API_EXPORT SResults { //! Construct for population results @@ -168,7 +177,9 @@ class API_EXPORT CHierarchicalResultsWriter : public model::CHierarchicalResults //! Given a leaf node, search upwards to find the most appropriate //! values for person and partition probability results. - static void findParentProbabilities(const TNode& node, double& personProbability, double& partitionProbability); + static void findParentProbabilities(const TNode& node, + double& personProbability, + double& partitionProbability); private: //! The various limits. diff --git a/include/api/CJsonOutputWriter.h b/include/api/CJsonOutputWriter.h index 333a8078b3..d5902e7a9f 100644 --- a/include/api/CJsonOutputWriter.h +++ b/include/api/CJsonOutputWriter.h @@ -195,7 +195,8 @@ class API_EXPORT CJsonOutputWriter : public COutputHandler { virtual const TStrVec& fieldNames() const; //! Write the data row fields as a JSON object - virtual bool writeRow(const TStrStrUMap& dataRowFields, const TStrStrUMap& overrideDataRowFields); + virtual bool writeRow(const TStrStrUMap& dataRowFields, + const TStrStrUMap& overrideDataRowFields); //! Limit the output to the top count anomalous records and influencers. //! Each detector will write no more than count records and influencers @@ -221,7 +222,9 @@ class API_EXPORT CJsonOutputWriter : public COutputHandler { virtual bool acceptResult(const CHierarchicalResultsWriter::TResults& results); //! Accept the influencer - bool acceptInfluencer(core_t::TTime time, const model::CHierarchicalResults::TNode& node, bool isBucketInfluencer); + bool acceptInfluencer(core_t::TTime time, + const model::CHierarchicalResults::TNode& node, + bool isBucketInfluencer); //! Creates a time bucket influencer. //! If limitNumberRecords is set add this influencer after all other influencers @@ -229,7 +232,10 @@ class API_EXPORT CJsonOutputWriter : public COutputHandler { //! than the others. //! Only one per bucket is expected, this does not add to the influencer //! count if limitNumberRecords is used - virtual void acceptBucketTimeInfluencer(core_t::TTime time, double probability, double rawAnomalyScore, double normalizedAnomalyScore); + virtual void acceptBucketTimeInfluencer(core_t::TTime time, + double probability, + double rawAnomalyScore, + double normalizedAnomalyScore); //! This method must be called after all the results for a given bucket //! are available. It triggers the writing of the results. @@ -250,7 +256,8 @@ class API_EXPORT CJsonOutputWriter : public COutputHandler { const TStrSet& examples); //! Persist a normalizer by writing its state to the output - void persistNormalizer(const model::CHierarchicalResultsNormalizer& normalizer, core_t::TTime& persistTime); + void persistNormalizer(const model::CHierarchicalResultsNormalizer& normalizer, + core_t::TTime& persistTime); private: template @@ -267,29 +274,39 @@ class API_EXPORT CJsonOutputWriter : public COutputHandler { private: //! Write out all the JSON documents that have been built up for //! a particular bucket - void writeBucket(bool isInterim, core_t::TTime bucketTime, SBucketData& bucketData, uint64_t bucketProcessingTime); + void writeBucket(bool isInterim, + core_t::TTime bucketTime, + SBucketData& bucketData, + uint64_t bucketProcessingTime); //! Add the fields for a metric detector - void addMetricFields(const CHierarchicalResultsWriter::TResults& results, TDocumentWeakPtr weakDoc); + void addMetricFields(const CHierarchicalResultsWriter::TResults& results, + TDocumentWeakPtr weakDoc); //! Write the fields for a population detector - void addPopulationFields(const CHierarchicalResultsWriter::TResults& results, TDocumentWeakPtr weakDoc); + void addPopulationFields(const CHierarchicalResultsWriter::TResults& results, + TDocumentWeakPtr weakDoc); //! Write the fields for a population detector cause - void addPopulationCauseFields(const CHierarchicalResultsWriter::TResults& results, TDocumentWeakPtr weakDoc); + void addPopulationCauseFields(const CHierarchicalResultsWriter::TResults& results, + TDocumentWeakPtr weakDoc); //! Write the fields for an event rate detector - void addEventRateFields(const CHierarchicalResultsWriter::TResults& results, TDocumentWeakPtr weakDoc); + void addEventRateFields(const CHierarchicalResultsWriter::TResults& results, + TDocumentWeakPtr weakDoc); //! Add the influencer fields to the doc - void addInfluencerFields(bool isBucketInfluencer, const model::CHierarchicalResults::TNode& node, TDocumentWeakPtr weakDoc); + void addInfluencerFields(bool isBucketInfluencer, + const model::CHierarchicalResults::TNode& node, + TDocumentWeakPtr weakDoc); //! Write the influence results. void addInfluences(const CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPrDoublePrVec& influenceResults, TDocumentWeakPtr weakDoc); //! Write partition score & probability - void addPartitionScores(const CHierarchicalResultsWriter::TResults& results, TDocumentWeakPtr weakDoc); + void addPartitionScores(const CHierarchicalResultsWriter::TResults& results, + TDocumentWeakPtr weakDoc); private: //! The job ID diff --git a/include/api/CLineifiedJsonInputParser.h b/include/api/CLineifiedJsonInputParser.h index bf9808a89e..433b06a7cb 100644 --- a/include/api/CLineifiedJsonInputParser.h +++ b/include/api/CLineifiedJsonInputParser.h @@ -59,7 +59,9 @@ class API_EXPORT CLineifiedJsonInputParser : public CLineifiedInputParser { TStrRefVec& fieldValRefs, TStrStrUMap& recordFields); - bool decodeDocumentWithArbitraryFields(const rapidjson::Document& document, TStrVec& fieldNames, TStrStrUMap& recordFields); + bool decodeDocumentWithArbitraryFields(const rapidjson::Document& document, + TStrVec& fieldNames, + TStrStrUMap& recordFields); private: //! Are all JSON documents expected to contain the same fields in the diff --git a/include/api/CLineifiedJsonOutputWriter.h b/include/api/CLineifiedJsonOutputWriter.h index 87ca6e8732..3b0b83f1cb 100644 --- a/include/api/CLineifiedJsonOutputWriter.h +++ b/include/api/CLineifiedJsonOutputWriter.h @@ -67,7 +67,8 @@ class API_EXPORT CLineifiedJsonOutputWriter : public COutputHandler { using COutputHandler::fieldNames; //! Write the data row fields as a JSON object - virtual bool writeRow(const TStrStrUMap& dataRowFields, const TStrStrUMap& overrideDataRowFields); + virtual bool writeRow(const TStrStrUMap& dataRowFields, + const TStrStrUMap& overrideDataRowFields); // Bring the other overload of writeRow() into scope using COutputHandler::writeRow; @@ -78,7 +79,9 @@ class API_EXPORT CLineifiedJsonOutputWriter : public COutputHandler { private: //! Write a single field to the document - void writeField(const std::string& name, const std::string& value, rapidjson::Document& doc) const; + void writeField(const std::string& name, + const std::string& value, + rapidjson::Document& doc) const; private: //! Which output fields are numeric? diff --git a/include/api/CLineifiedXmlInputParser.h b/include/api/CLineifiedXmlInputParser.h index e4a3e753b1..450a8785ae 100644 --- a/include/api/CLineifiedXmlInputParser.h +++ b/include/api/CLineifiedXmlInputParser.h @@ -41,7 +41,9 @@ class API_EXPORT CLineifiedXmlInputParser : public CLineifiedInputParser { //! object should read from it. For example, if std::cin is passed, no //! other object should read from std::cin, otherwise unpredictable and //! incorrect results will be generated. - CLineifiedXmlInputParser(core::CXmlParserIntf& parser, std::istream& strmIn, bool allDocsSameStructure = false); + CLineifiedXmlInputParser(core::CXmlParserIntf& parser, + std::istream& strmIn, + bool allDocsSameStructure = false); //! Read records from the stream. The supplied reader function is called //! once per record. If the supplied reader function returns false, @@ -52,7 +54,9 @@ class API_EXPORT CLineifiedXmlInputParser : public CLineifiedInputParser { private: //! Attempt to parse the current working record into data fields. - bool decodeDocumentWithCommonFields(TStrVec& fieldNames, TStrRefVec& fieldValRefs, TStrStrUMap& recordFields); + bool decodeDocumentWithCommonFields(TStrVec& fieldNames, + TStrRefVec& fieldValRefs, + TStrStrUMap& recordFields); void decodeDocumentWithArbitraryFields(TStrVec& fieldNames, TStrStrUMap& recordFields); diff --git a/include/api/CLineifiedXmlOutputWriter.h b/include/api/CLineifiedXmlOutputWriter.h index 5c4204c479..1d38741a40 100644 --- a/include/api/CLineifiedXmlOutputWriter.h +++ b/include/api/CLineifiedXmlOutputWriter.h @@ -53,7 +53,8 @@ class API_EXPORT CLineifiedXmlOutputWriter : public COutputHandler { using COutputHandler::fieldNames; //! Write the data row fields as an XML document - virtual bool writeRow(const TStrStrUMap& dataRowFields, const TStrStrUMap& overrideDataRowFields); + virtual bool writeRow(const TStrStrUMap& dataRowFields, + const TStrStrUMap& overrideDataRowFields); // Bring the other overload of writeRow() into scope using COutputHandler::writeRow; diff --git a/include/api/CModelSizeStatsJsonWriter.h b/include/api/CModelSizeStatsJsonWriter.h index 4967baf614..3296c0a6c1 100644 --- a/include/api/CModelSizeStatsJsonWriter.h +++ b/include/api/CModelSizeStatsJsonWriter.h @@ -23,8 +23,9 @@ namespace api { class API_EXPORT CModelSizeStatsJsonWriter : private core::CNonInstantiatable { public: //! Writes the model size stats in the \p results in JSON format. - static void - write(const std::string& jobId, const model::CResourceMonitor::SResults& results, core::CRapidJsonConcurrentLineWriter& writer); + static void write(const std::string& jobId, + const model::CResourceMonitor::SResults& results, + core::CRapidJsonConcurrentLineWriter& writer); }; } } diff --git a/include/api/CNullOutput.h b/include/api/CNullOutput.h index 81be9be7ac..5ecbda8e45 100644 --- a/include/api/CNullOutput.h +++ b/include/api/CNullOutput.h @@ -39,7 +39,8 @@ class API_EXPORT CNullOutput : public COutputHandler { using COutputHandler::fieldNames; //! Does nothing with the row provided. - virtual bool writeRow(const TStrStrUMap& dataRowFields, const TStrStrUMap& overrideDataRowFields); + virtual bool writeRow(const TStrStrUMap& dataRowFields, + const TStrStrUMap& overrideDataRowFields); // Bring the other overload of writeRow() into scope using COutputHandler::writeRow; diff --git a/include/api/COutputChainer.h b/include/api/COutputChainer.h index 5a12245bd3..5036943543 100644 --- a/include/api/COutputChainer.h +++ b/include/api/COutputChainer.h @@ -58,7 +58,8 @@ class API_EXPORT COutputChainer : public COutputHandler { //! values, optionally overriding some of the original field values. //! Where the same field is present in both overrideDataRowFields and //! dataRowFields, the value in overrideDataRowFields will be written. - virtual bool writeRow(const TStrStrUMap& dataRowFields, const TStrStrUMap& overrideDataRowFields); + virtual bool writeRow(const TStrStrUMap& dataRowFields, + const TStrStrUMap& overrideDataRowFields); // Bring the other overload of writeRow() into scope using COutputHandler::writeRow; @@ -71,7 +72,8 @@ class API_EXPORT COutputChainer : public COutputHandler { virtual void finalise(); //! Restore previously saved state - virtual bool restoreState(core::CDataSearcher& restoreSearcher, core_t::TTime& completeToTime); + virtual bool restoreState(core::CDataSearcher& restoreSearcher, + core_t::TTime& completeToTime); //! Persist current state virtual bool persistState(core::CDataAdder& persister); diff --git a/include/api/COutputHandler.h b/include/api/COutputHandler.h index f704a6ec17..cb159cfb30 100644 --- a/include/api/COutputHandler.h +++ b/include/api/COutputHandler.h @@ -81,13 +81,15 @@ class API_EXPORT COutputHandler : private core::CNonCopyable { //! original field values. Where the same field is present in both //! overrideDataRowFields and dataRowFields, the value in //! overrideDataRowFields will be written. - virtual bool writeRow(const TStrStrUMap& dataRowFields, const TStrStrUMap& overrideDataRowFields) = 0; + virtual bool writeRow(const TStrStrUMap& dataRowFields, + const TStrStrUMap& overrideDataRowFields) = 0; //! Perform any final processing once all input data has been seen. virtual void finalise(); //! Restore previously saved state - virtual bool restoreState(core::CDataSearcher& restoreSearcher, core_t::TTime& completeToTime); + virtual bool restoreState(core::CDataSearcher& restoreSearcher, + core_t::TTime& completeToTime); //! Persist current state virtual bool persistState(core::CDataAdder& persister); diff --git a/include/api/CResultNormalizer.h b/include/api/CResultNormalizer.h index 5cf531946a..91296d3a07 100644 --- a/include/api/CResultNormalizer.h +++ b/include/api/CResultNormalizer.h @@ -75,7 +75,8 @@ class API_EXPORT CResultNormalizer { using TStrStrUMapCItr = TStrStrUMap::const_iterator; public: - CResultNormalizer(const model::CAnomalyDetectorModelConfig& modelConfig, COutputHandler& outputHandler); + CResultNormalizer(const model::CAnomalyDetectorModelConfig& modelConfig, + COutputHandler& outputHandler); //! Initialise the system change normalizer bool initNormalizer(const std::string& stateFileName); @@ -102,10 +103,14 @@ class API_EXPORT CResultNormalizer { double& probability); template - bool parseDataField(const TStrStrUMap& dataRowFields, const std::string& fieldName, T& result) const { + bool parseDataField(const TStrStrUMap& dataRowFields, + const std::string& fieldName, + T& result) const { TStrStrUMapCItr iter = dataRowFields.find(fieldName); - if (iter == dataRowFields.end() || core::CStringUtils::stringToType(iter->second, result) == false) { - LOG_ERROR(<< "Cannot interpret " << fieldName << " field in record:\n" << CDataProcessor::debugPrintRecord(dataRowFields)); + if (iter == dataRowFields.end() || + core::CStringUtils::stringToType(iter->second, result) == false) { + LOG_ERROR(<< "Cannot interpret " << fieldName << " field in record:\n" + << CDataProcessor::debugPrintRecord(dataRowFields)); return false; } return true; diff --git a/include/api/CStateRestoreStreamFilter.h b/include/api/CStateRestoreStreamFilter.h index b30f78c367..d2a9143e3f 100644 --- a/include/api/CStateRestoreStreamFilter.h +++ b/include/api/CStateRestoreStreamFilter.h @@ -43,7 +43,8 @@ namespace api { //! When using it with boost::iostreams::filtering_ostream not that the filters gets //! copied once pushed to the ostream instance. //! -class API_EXPORT CStateRestoreStreamFilter : public boost::iostreams::basic_line_filter { +class API_EXPORT CStateRestoreStreamFilter + : public boost::iostreams::basic_line_filter { public: using boost::iostreams::basic_line_filter::string_type; diff --git a/include/api/CTokenListDataTyper.h b/include/api/CTokenListDataTyper.h index 4e762f310e..4f707f2003 100644 --- a/include/api/CTokenListDataTyper.h +++ b/include/api/CTokenListDataTyper.h @@ -52,8 +52,11 @@ class CTokenListDataTyper : public CBaseTokenListDataTyper { //! Create a data typer with threshold for how comparable types are //! 0.0 means everything is the same type //! 1.0 means things have to match exactly to be the same type - CTokenListDataTyper(const TTokenListReverseSearchCreatorIntfCPtr& reverseSearchCreator, double threshold, const std::string& fieldName) - : CBaseTokenListDataTyper(reverseSearchCreator, threshold, fieldName), m_Dict(core::CWordDictionary::instance()) {} + CTokenListDataTyper(const TTokenListReverseSearchCreatorIntfCPtr& reverseSearchCreator, + double threshold, + const std::string& fieldName) + : CBaseTokenListDataTyper(reverseSearchCreator, threshold, fieldName), + m_Dict(core::CWordDictionary::instance()) {} protected: //! Split the string into a list of tokens. The result of the @@ -78,18 +81,21 @@ class CTokenListDataTyper : public CBaseTokenListDataTyper { // Basically tokenise into [a-zA-Z0-9]+ strings, possibly // allowing underscores, dots and dashes in the middle if (::isalnum(static_cast(curChar)) || - (!temp.empty() && - ((ALLOW_UNDERSCORE && curChar == '_') || (ALLOW_DOT && curChar == '.') || (ALLOW_DASH && curChar == '-')))) { + (!temp.empty() && ((ALLOW_UNDERSCORE && curChar == '_') || + (ALLOW_DOT && curChar == '.') || + (ALLOW_DASH && curChar == '-')))) { temp += curChar; if (IGNORE_HEX) { // Count dots and dashes as numeric - if (!::isxdigit(static_cast(curChar)) && curChar != '.' && curChar != '-') { + if (!::isxdigit(static_cast(curChar)) && + curChar != '.' && curChar != '-') { nonHexPos = temp.length() - 1; } } } else { if (!temp.empty()) { - this->considerToken(fields, nonHexPos, temp, tokenIds, tokenUniqueIds, totalWeight); + this->considerToken(fields, nonHexPos, temp, tokenIds, + tokenUniqueIds, totalWeight); temp.clear(); } @@ -103,13 +109,16 @@ class CTokenListDataTyper : public CBaseTokenListDataTyper { this->considerToken(fields, nonHexPos, temp, tokenIds, tokenUniqueIds, totalWeight); } - LOG_TRACE(<< str << " tokenised to " << tokenIds.size() << " tokens with total weight " << totalWeight << ": " - << SIdTranslater(*this, tokenIds, ' ')); + LOG_TRACE(<< str << " tokenised to " << tokenIds.size() << " tokens with total weight " + << totalWeight << ": " << SIdTranslater(*this, tokenIds, ' ')); } //! Take a string token, convert it to a numeric ID and a weighting and //! add these to the provided data structures. - virtual void tokenToIdAndWeight(const std::string& token, TSizeSizePrVec& tokenIds, TSizeSizeMap& tokenUniqueIds, size_t& totalWeight) { + virtual void tokenToIdAndWeight(const std::string& token, + TSizeSizePrVec& tokenIds, + TSizeSizeMap& tokenUniqueIds, + size_t& totalWeight) { TSizeSizePr idWithWeight(this->idForToken(token), 1); if (token.length() >= MIN_DICTIONARY_LENGTH) { @@ -122,12 +131,16 @@ class CTokenListDataTyper : public CBaseTokenListDataTyper { } //! Compute similarity between two vectors - virtual double similarity(const TSizeSizePrVec& left, size_t leftWeight, const TSizeSizePrVec& right, size_t rightWeight) const { + virtual double similarity(const TSizeSizePrVec& left, + size_t leftWeight, + const TSizeSizePrVec& right, + size_t rightWeight) const { double similarity(1.0); size_t maxWeight(std::max(leftWeight, rightWeight)); if (maxWeight > 0) { - size_t diff(DO_WARPING ? m_SimilarityTester.weightedEditDistance(left, right) : this->compareNoWarp(left, right)); + size_t diff(DO_WARPING ? m_SimilarityTester.weightedEditDistance(left, right) + : this->compareNoWarp(left, right)); similarity = 1.0 - double(diff) / double(maxWeight); } @@ -190,7 +203,8 @@ class CTokenListDataTyper : public CBaseTokenListDataTyper { // with leading digits, and checking this first will cause the // check to be completely compiled away as IGNORE_LEADING_DIGIT // is a template argument - if (!IGNORE_LEADING_DIGIT && nonHexPos == 1 && token.compare(0, 2, "0x") == 0 && token.length() != 2) { + if (!IGNORE_LEADING_DIGIT && nonHexPos == 1 && + token.compare(0, 2, "0x") == 0 && token.length() != 2) { // Implies hex with 0x prefix. return; } diff --git a/include/api/CTokenListReverseSearchCreator.h b/include/api/CTokenListReverseSearchCreator.h index 050dd3ebc2..6086a86b75 100644 --- a/include/api/CTokenListReverseSearchCreator.h +++ b/include/api/CTokenListReverseSearchCreator.h @@ -53,18 +53,26 @@ class API_EXPORT CTokenListReverseSearchCreator : public CTokenListReverseSearch //! Initialise the two strings that form a reverse search. For example, //! this could be as simple as clearing the strings or setting them to //! some sort of one-off preamble. - virtual void - initStandardSearch(int type, const std::string& example, size_t maxMatchingStringLen, std::string& part1, std::string& part2) const; + virtual void initStandardSearch(int type, + const std::string& example, + size_t maxMatchingStringLen, + std::string& part1, + std::string& part2) const; //! Modify the two strings that form a reverse search to account for the //! specified token, which may occur anywhere within the original //! message, but has been determined to be a good thing to distinguish //! this type of messages from other types. - virtual void addCommonUniqueToken(const std::string& token, std::string& part1, std::string& part2) const; + virtual void addCommonUniqueToken(const std::string& token, + std::string& part1, + std::string& part2) const; //! Modify the two strings that form a reverse search to account for the //! specified token. - virtual void addInOrderCommonToken(const std::string& token, bool first, std::string& part1, std::string& part2) const; + virtual void addInOrderCommonToken(const std::string& token, + bool first, + std::string& part1, + std::string& part2) const; //! Close off the two strings that form a reverse search. For example, //! this may be when closing brackets need to be appended. diff --git a/include/api/CTokenListReverseSearchCreatorIntf.h b/include/api/CTokenListReverseSearchCreatorIntf.h index 13dbe92009..a1d3816f31 100644 --- a/include/api/CTokenListReverseSearchCreatorIntf.h +++ b/include/api/CTokenListReverseSearchCreatorIntf.h @@ -63,18 +63,26 @@ class API_EXPORT CTokenListReverseSearchCreatorIntf { //! Initialise the two strings that form a reverse search. For example, //! this could be as simple as clearing the strings or setting them to //! some sort of one-off preamble. - virtual void - initStandardSearch(int type, const std::string& example, size_t maxMatchingStringLen, std::string& part1, std::string& part2) const = 0; + virtual void initStandardSearch(int type, + const std::string& example, + size_t maxMatchingStringLen, + std::string& part1, + std::string& part2) const = 0; //! Modify the two strings that form a reverse search to account for the //! specified token, which may occur anywhere within the original //! message, but has been determined to be a good thing to distinguish //! this type of messages from other types. - virtual void addCommonUniqueToken(const std::string& token, std::string& part1, std::string& part2) const = 0; + virtual void addCommonUniqueToken(const std::string& token, + std::string& part1, + std::string& part2) const = 0; //! Modify the two strings that form a reverse search to account for the //! specified token. - virtual void addInOrderCommonToken(const std::string& token, bool first, std::string& part1, std::string& part2) const = 0; + virtual void addInOrderCommonToken(const std::string& token, + bool first, + std::string& part1, + std::string& part2) const = 0; //! Close off the two strings that form a reverse search. For example, //! this may be when closing brackets need to be appended. diff --git a/include/config/CAutoconfigurer.h b/include/config/CAutoconfigurer.h index b95362b82d..064e145737 100644 --- a/include/config/CAutoconfigurer.h +++ b/include/config/CAutoconfigurer.h @@ -47,7 +47,8 @@ class CONFIG_EXPORT CAutoconfigurer : public api::CDataProcessor { virtual void finalise(); //! No-op. - virtual bool restoreState(core::CDataSearcher& restoreSearcher, core_t::TTime& completeToTime); + virtual bool restoreState(core::CDataSearcher& restoreSearcher, + core_t::TTime& completeToTime); //! No-op. virtual bool persistState(core::CDataAdder& persister); diff --git a/include/config/CAutoconfigurerDetectorPenalties.h b/include/config/CAutoconfigurerDetectorPenalties.h index 379d682e0e..a389753e35 100644 --- a/include/config/CAutoconfigurerDetectorPenalties.h +++ b/include/config/CAutoconfigurerDetectorPenalties.h @@ -36,14 +36,16 @@ class CONFIG_EXPORT CAutoconfigurerDetectorPenalties { using TPenaltyPtr = boost::shared_ptr; public: - CAutoconfigurerDetectorPenalties(const CAutoconfigurerParams& params, const CAutoconfigurerFieldRolePenalties& fieldRolePenalties); + CAutoconfigurerDetectorPenalties(const CAutoconfigurerParams& params, + const CAutoconfigurerFieldRolePenalties& fieldRolePenalties); //! Get the penalty for the detector \p spec. TPenaltyPtr penaltyFor(const CDetectorSpecification& spec); private: using TAutoconfigurerParamsCRef = boost::reference_wrapper; - using TAutoconfigurerFieldRolePenaltiesCRef = boost::reference_wrapper; + using TAutoconfigurerFieldRolePenaltiesCRef = + boost::reference_wrapper; using TPenaltyPtrVec = std::vector; private: diff --git a/include/config/CAutoconfigurerParams.h b/include/config/CAutoconfigurerParams.h index a74738ccd7..abb55b21ff 100644 --- a/include/config/CAutoconfigurerParams.h +++ b/include/config/CAutoconfigurerParams.h @@ -56,7 +56,10 @@ class CONFIG_EXPORT CAutoconfigurerParams { using TFunctionCategoryVec = std::vector; public: - CAutoconfigurerParams(const std::string& timeFieldName, const std::string& timeFieldFormat, bool verbose, bool writeDetectorConfigs); + CAutoconfigurerParams(const std::string& timeFieldName, + const std::string& timeFieldFormat, + bool verbose, + bool writeDetectorConfigs); //! Initialize from the specified file. bool init(const std::string& file); @@ -169,19 +172,23 @@ class CONFIG_EXPORT CAutoconfigurerParams { //! A fraction of populated buckets that is considered small for \p function //! and \p ignoreEmpty so that smaller proportions will be penalized. - double lowPopulatedBucketFraction(config_t::EFunctionCategory function, bool ignoreEmpty) const; + double lowPopulatedBucketFraction(config_t::EFunctionCategory function, + bool ignoreEmpty) const; //! The smallest permitted fraction of populated buckets for \p function and //! \p ignoreEmpty. - double minimumPopulatedBucketFraction(config_t::EFunctionCategory function, bool ignoreEmpty) const; + double minimumPopulatedBucketFraction(config_t::EFunctionCategory function, + bool ignoreEmpty) const; //! A fraction of populated buckets that is considered high for \p function //! and \p ignoreEmpty so that higher fractions will be penalized. - double highPopulatedBucketFraction(config_t::EFunctionCategory function, bool ignoreEmpty) const; + double highPopulatedBucketFraction(config_t::EFunctionCategory function, + bool ignoreEmpty) const; //! The maximum permitted fraction of populated buckets for \p function and //! \p ignoreEmpty. - double maximumPopulatedBucketFraction(config_t::EFunctionCategory function, bool ignoreEmpty) const; + double maximumPopulatedBucketFraction(config_t::EFunctionCategory function, + bool ignoreEmpty) const; //! Get the candidate bucket lengths to test for each detector. const TTimeVec& candidateBucketLengths() const; diff --git a/include/config/CDataCountStatistics.h b/include/config/CDataCountStatistics.h index 27b7a26027..f158c3a3db 100644 --- a/include/config/CDataCountStatistics.h +++ b/include/config/CDataCountStatistics.h @@ -55,13 +55,17 @@ class CONFIG_EXPORT CBucketCountStatistics { using TSizeSizePrMomentsUMap = boost::unordered_map; using TSizeSizePrArgumentMomentsUMap = boost::unordered_map; - using TStrCPtrSizeSizePrArgumentMomentsUMapPr = std::pair; - using TStrCPtrSizeSizePrArgumentMomentsUMapPrVec = std::vector; + using TStrCPtrSizeSizePrArgumentMomentsUMapPr = + std::pair; + using TStrCPtrSizeSizePrArgumentMomentsUMapPrVec = + std::vector; using TSizeSizePrQuantileUMap = boost::unordered_map; public: //! Add the record for \p partition. - void add(const TSizeSizeSizeTr& partition, TDetectorRecordCItr beginRecords, TDetectorRecordCItr endRecords); + void add(const TSizeSizeSizeTr& partition, + TDetectorRecordCItr beginRecords, + TDetectorRecordCItr endRecords); //! Capture the current bucket statistics. void capture(); @@ -77,23 +81,28 @@ class CONFIG_EXPORT CBucketCountStatistics { //! Get the moments of the distribution of the distinct count of argument //! field values for \p name. - const TSizeSizePrArgumentMomentsUMap& argumentMomentsPerPartition(const std::string& name) const; + const TSizeSizePrArgumentMomentsUMap& + argumentMomentsPerPartition(const std::string& name) const; private: using TMean = maths::CBasicStatistics::SSampleMean::TAccumulator; //! \brief Bucket data stored about argument field. struct CONFIG_EXPORT SBucketArgumentData { - SBucketArgumentData(const maths::CBjkstUniqueValues distinctValues) : s_DistinctValues(distinctValues) {} + SBucketArgumentData(const maths::CBjkstUniqueValues distinctValues) + : s_DistinctValues(distinctValues) {} //! The approximate distinct values. maths::CBjkstUniqueValues s_DistinctValues; //! A sample of the unique strings in the bucket. TMean s_MeanStringLength; }; - using TSizeSizeSizeTrArgumentDataUMap = boost::unordered_map; - using TStrCPtrSizeSizeSizeTrBjkstArgumentDataUMapPr = std::pair; - using TStrCPtrSizeSizeSizeTrArgumentDataUMapPrVec = std::vector; + using TSizeSizeSizeTrArgumentDataUMap = + boost::unordered_map; + using TStrCPtrSizeSizeSizeTrBjkstArgumentDataUMapPr = + std::pair; + using TStrCPtrSizeSizeSizeTrArgumentDataUMapPrVec = + std::vector; private: //! The distinct partitions seen this bucket. @@ -165,13 +174,15 @@ class CONFIG_EXPORT CDataCountStatistics { //! Extract the by field value. template - static std::size_t by(const std::pair, T>& p) { + static std::size_t + by(const std::pair, T>& p) { return p.first.first; } //! Extract the partition field value. template - static std::size_t partition(const std::pair, T>& p) { + static std::size_t + partition(const std::pair, T>& p) { return p.first.second; } @@ -194,8 +205,10 @@ class CONFIG_EXPORT CDataCountStatistics { using TSizeSizePrUSet = boost::unordered_set; using TOptionalTime = boost::optional; using TAutoconfigurerParamsCRef = boost::reference_wrapper; - using TMinTimeAccumulator = maths::CBasicStatistics::COrderStatisticsStack; - using TMaxTimeAccumulator = maths::CBasicStatistics::COrderStatisticsStack>; + using TMinTimeAccumulator = + maths::CBasicStatistics::COrderStatisticsStack; + using TMaxTimeAccumulator = + maths::CBasicStatistics::COrderStatisticsStack>; private: //! Fill in the last bucket end times if they are empty. diff --git a/include/config/CDataSemantics.h b/include/config/CDataSemantics.h index 23b3af5fc9..ae220e63af 100644 --- a/include/config/CDataSemantics.h +++ b/include/config/CDataSemantics.h @@ -74,12 +74,16 @@ class CONFIG_EXPORT CDataSemantics { //! \brief Hashes an ordinal type. class CONFIG_EXPORT CHashOrdinal { public: - std::size_t operator()(maths::COrdinal value) const { return value.hash(); } + std::size_t operator()(maths::COrdinal value) const { + return value.hash(); + } }; using TStrVec = std::vector; using TOrdinalSizeUMap = boost::unordered_map; - using TMinAccumulator = maths::CBasicStatistics::COrderStatisticsStack; - using TMaxAccumulator = maths::CBasicStatistics::COrderStatisticsStack>; + using TMinAccumulator = + maths::CBasicStatistics::COrderStatisticsStack; + using TMaxAccumulator = + maths::CBasicStatistics::COrderStatisticsStack>; private: //! The maximum number of values we'll hold in the empirical diff --git a/include/config/CDataSummaryStatistics.h b/include/config/CDataSummaryStatistics.h index 3f363fb3e2..af800b858b 100644 --- a/include/config/CDataSummaryStatistics.h +++ b/include/config/CDataSummaryStatistics.h @@ -52,8 +52,10 @@ class CONFIG_EXPORT CDataSummaryStatistics { double meanRate() const; protected: - using TMinTimeAccumulator = maths::CBasicStatistics::COrderStatisticsStack; - using TMaxTimeAccumulator = maths::CBasicStatistics::COrderStatisticsStack>; + using TMinTimeAccumulator = + maths::CBasicStatistics::COrderStatisticsStack; + using TMaxTimeAccumulator = + maths::CBasicStatistics::COrderStatisticsStack>; private: //! The earliest example time. @@ -84,8 +86,11 @@ class CONFIG_EXPORT CCategoricalDataSummaryStatistics : public CDataSummaryStati static const std::size_t TO_APPROXIMATE = 5000000; public: - explicit CCategoricalDataSummaryStatistics(std::size_t n, std::size_t toApproximate = TO_APPROXIMATE); - CCategoricalDataSummaryStatistics(const CDataSummaryStatistics& other, std::size_t n, std::size_t toApproximate = TO_APPROXIMATE); + explicit CCategoricalDataSummaryStatistics(std::size_t n, + std::size_t toApproximate = TO_APPROXIMATE); + CCategoricalDataSummaryStatistics(const CDataSummaryStatistics& other, + std::size_t n, + std::size_t toApproximate = TO_APPROXIMATE); //! Add an example at \p time. void add(core_t::TTime time, const std::string& example); @@ -120,8 +125,10 @@ class CONFIG_EXPORT CCategoricalDataSummaryStatistics : public CDataSummaryStati using TStrUInt64UMapItr = TStrUInt64UMap::iterator; using TStrUInt64UMapCItr = TStrUInt64UMap::const_iterator; using TStrUInt64UMapCItrVec = std::vector; - using TMinSizeAccumulator = maths::CBasicStatistics::COrderStatisticsStack; - using TMaxSizeAccumulator = maths::CBasicStatistics::COrderStatisticsStack>; + using TMinSizeAccumulator = + maths::CBasicStatistics::COrderStatisticsStack; + using TMaxSizeAccumulator = + maths::CBasicStatistics::COrderStatisticsStack>; using TBjkstUniqueValuesVec = std::vector; using TEntropySketchVec = std::vector; diff --git a/include/config/CDetectorRecord.h b/include/config/CDetectorRecord.h index fac75acedb..fd98489120 100644 --- a/include/config/CDetectorRecord.h +++ b/include/config/CDetectorRecord.h @@ -125,8 +125,10 @@ class CONFIG_EXPORT CDetectorRecordDirectAddressTable { void build(const TDetectorSpecificationVec& specs); //! Get the unique records from \p time and \p fieldValues for \p specs. - void - detectorRecords(core_t::TTime time, const TStrStrUMap& fieldValues, const TDetectorSpecificationVec& specs, TDetectorRecordVec& result); + void detectorRecords(core_t::TTime time, + const TStrStrUMap& fieldValues, + const TDetectorSpecificationVec& specs, + TDetectorRecordVec& result); private: //! Clear the state (as a precursor to build). diff --git a/include/config/CDetectorSpecification.h b/include/config/CDetectorSpecification.h index 8caf02b240..b78caaf52a 100644 --- a/include/config/CDetectorSpecification.h +++ b/include/config/CDetectorSpecification.h @@ -58,7 +58,10 @@ class CONFIG_EXPORT CDetectorSpecification //! \brief The score for a given set of parameters. struct CONFIG_EXPORT SParamScores { - SParamScores(core_t::TTime bucketLength, const std::string& ignoreEmpty, double score, const TStrVec& descriptions); + SParamScores(core_t::TTime bucketLength, + const std::string& ignoreEmpty, + double score, + const TStrVec& descriptions); //! The bucket length. core_t::TTime s_BucketLength; @@ -76,7 +79,9 @@ class CONFIG_EXPORT CDetectorSpecification using TParamScoresVec = std::vector; public: - CDetectorSpecification(const CAutoconfigurerParams& params, config_t::EFunctionCategory function, std::size_t id); + CDetectorSpecification(const CAutoconfigurerParams& params, + config_t::EFunctionCategory function, + std::size_t id); CDetectorSpecification(const CAutoconfigurerParams& params, config_t::EFunctionCategory function, const std::string& argument, diff --git a/include/config/CFieldRolePenalty.h b/include/config/CFieldRolePenalty.h index 60bc503b1c..cc7bfa24d2 100644 --- a/include/config/CFieldRolePenalty.h +++ b/include/config/CFieldRolePenalty.h @@ -43,7 +43,9 @@ class CONFIG_EXPORT CCantBeNumeric : public CPenalty { private: //! Sets \p penalty to 0.0 for numerics and a no-op otherwise. - virtual void penaltyFromMe(const CFieldStatistics& stats, double& penalty, std::string& description) const; + virtual void penaltyFromMe(const CFieldStatistics& stats, + double& penalty, + std::string& description) const; }; //! \brief Encapsulates the fact that categorical fields can't be used @@ -66,7 +68,9 @@ class CONFIG_EXPORT CCantBeCategorical : public CPenalty { private: //! Sets \p penalty to 0.0 for categorical and a no-op otherwise. - virtual void penaltyFromMe(const CFieldStatistics& stats, double& penalty, std::string& description) const; + virtual void penaltyFromMe(const CFieldStatistics& stats, + double& penalty, + std::string& description) const; }; //! \brief A penalty which stops unary categorical fields being used @@ -90,7 +94,9 @@ class CONFIG_EXPORT CDontUseUnaryField : public CPenalty { private: //! Sets \p penalty to 0.0 for categorical with a single category //! and a no-op otherwise. - virtual void penaltyFromMe(const CFieldStatistics& stats, double& penalty, std::string& description) const; + virtual void penaltyFromMe(const CFieldStatistics& stats, + double& penalty, + std::string& description) const; }; //! \brief A penalty based on the a specified range of penalized distinct @@ -117,7 +123,9 @@ class CONFIG_EXPORT CDistinctCountThresholdPenalty : public CPenalty { //! The penalty is a piecewise continuous linear function which //! is constant outside interval \f$[dc_0, dc_1]\f$ and linear //! decreasing from 1 at \f$dc_1\f$ to 0 at \f$dc_0\f$. - virtual void penaltyFromMe(const CFieldStatistics& stats, double& penalty, std::string& description) const; + virtual void penaltyFromMe(const CFieldStatistics& stats, + double& penalty, + std::string& description) const; private: //! The distinct count for which the penalty is one. diff --git a/include/config/CFieldStatistics.h b/include/config/CFieldStatistics.h index 65032abcd3..d7694f026e 100644 --- a/include/config/CFieldStatistics.h +++ b/include/config/CFieldStatistics.h @@ -66,7 +66,8 @@ class CONFIG_EXPORT CFieldStatistics { private: using TTimeStrPr = std::pair; using TTimeStrPrVec = std::vector; - using TDataSummaryStatistics = boost::variant; + using TDataSummaryStatistics = + boost::variant; private: //! The auto-configuration parameters. diff --git a/include/config/CLongTailPenalty.h b/include/config/CLongTailPenalty.h index b075cdd55b..100e9ec417 100644 --- a/include/config/CLongTailPenalty.h +++ b/include/config/CLongTailPenalty.h @@ -47,10 +47,12 @@ class CONFIG_EXPORT CLongTailPenalty : public CPenalty { virtual void penaltyFromMe(CDetectorSpecification& spec) const; //! Compute the penalty for a by field and optionally a partition. - void penaltyFor(const CByAndPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const; + void penaltyFor(const CByAndPartitionDataCountStatistics& stats, + CDetectorSpecification& spec) const; //! Compute the penalty for a by, over and optionally a partition field. - void penaltyFor(const CByOverAndPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const; + void penaltyFor(const CByOverAndPartitionDataCountStatistics& stats, + CDetectorSpecification& spec) const; //! Extract the tail and total counts from \p counts. template diff --git a/include/config/CLowVariationPenalty.h b/include/config/CLowVariationPenalty.h index 94f1c169ef..e337725318 100644 --- a/include/config/CLowVariationPenalty.h +++ b/include/config/CLowVariationPenalty.h @@ -41,31 +41,40 @@ class CONFIG_EXPORT CLowVariationPenalty : public CPenalty { virtual void penaltyFromMe(CDetectorSpecification& spec) const; //! Apply the penalty for count with optionally a partition. - void penaltiesForCount(const CPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const; + void penaltiesForCount(const CPartitionDataCountStatistics& stats, + CDetectorSpecification& spec) const; //! Apply the penalty for count with a by field and optionally a partition. - void penaltiesForCount(const CByAndPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const; + void penaltiesForCount(const CByAndPartitionDataCountStatistics& stats, + CDetectorSpecification& spec) const; //! Apply the penalty for count with a by, over and optionally a partition field. - void penaltiesForCount(const CByOverAndPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const; + void penaltiesForCount(const CByOverAndPartitionDataCountStatistics& stats, + CDetectorSpecification& spec) const; //! Apply the penalty for distinct count with optionally a partition. - void penaltyForDistinctCount(const CPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const; + void penaltyForDistinctCount(const CPartitionDataCountStatistics& stats, + CDetectorSpecification& spec) const; //! Apply the penalty for distinct count with by and optionally a partition. - void penaltyForDistinctCount(const CByAndPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const; + void penaltyForDistinctCount(const CByAndPartitionDataCountStatistics& stats, + CDetectorSpecification& spec) const; //! Apply the penalty for distinct count with by, over and optionally a partition. - void penaltyForDistinctCount(const CByOverAndPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const; + void penaltyForDistinctCount(const CByOverAndPartitionDataCountStatistics& stats, + CDetectorSpecification& spec) const; //! Apply the penalty for info content with optionally a partition. - void penaltyForInfoContent(const CPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const; + void penaltyForInfoContent(const CPartitionDataCountStatistics& stats, + CDetectorSpecification& spec) const; //! Apply the penalty for info content with a by field and optionally a partition. - void penaltyForInfoContent(const CByAndPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const; + void penaltyForInfoContent(const CByAndPartitionDataCountStatistics& stats, + CDetectorSpecification& spec) const; //! Apply the penalty for info content with a by, over and optionally a partition field. - void penaltyForInfoContent(const CByOverAndPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const; + void penaltyForInfoContent(const CByOverAndPartitionDataCountStatistics& stats, + CDetectorSpecification& spec) const; }; } } diff --git a/include/config/CNotEnoughDataPenalty.h b/include/config/CNotEnoughDataPenalty.h index 37182b0d27..ed69766596 100644 --- a/include/config/CNotEnoughDataPenalty.h +++ b/include/config/CNotEnoughDataPenalty.h @@ -47,13 +47,16 @@ class CONFIG_EXPORT CNotEnoughDataPenalty : public CPenalty { virtual void penaltyFromMe(CDetectorSpecification& spec) const; //! Compute the penalty for optionally a partition. - void penaltyFor(const CPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const; + void penaltyFor(const CPartitionDataCountStatistics& stats, + CDetectorSpecification& spec) const; //! Compute the penalty for a by field and optionally a partition. - void penaltyFor(const CByAndPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const; + void penaltyFor(const CByAndPartitionDataCountStatistics& stats, + CDetectorSpecification& spec) const; //! Compute the penalty for a by, over and optionally a partition field. - void penaltyFor(const CByOverAndPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const; + void penaltyFor(const CByOverAndPartitionDataCountStatistics& stats, + CDetectorSpecification& spec) const; //! The common penalty calculation. void penaltyFor(const TUInt64Vec& bucketCounts, diff --git a/include/config/CPenalty.h b/include/config/CPenalty.h index 9a9e0ce460..58ae4a8d5c 100644 --- a/include/config/CPenalty.h +++ b/include/config/CPenalty.h @@ -138,7 +138,9 @@ class CONFIG_EXPORT CPenalty { //! Compute the penalty based on a detector's field's statistics. //! //! \note No-op unless a derived class overrides it. - virtual void penaltyFromMe(const CFieldStatistics& stats, double& penalty, std::string& description) const; + virtual void penaltyFromMe(const CFieldStatistics& stats, + double& penalty, + std::string& description) const; //! Compute a penalty based a complete detector specification. //! diff --git a/include/config/CReportWriter.h b/include/config/CReportWriter.h index cd3ef2398e..fdc8371dfe 100644 --- a/include/config/CReportWriter.h +++ b/include/config/CReportWriter.h @@ -93,7 +93,8 @@ class CONFIG_EXPORT CReportWriter : public api::COutputHandler { using api::COutputHandler::fieldNames; //! No-op. - virtual bool writeRow(const TStrStrUMap& dataRowFields, const TStrStrUMap& overrideDataRowFields); + virtual bool writeRow(const TStrStrUMap& dataRowFields, + const TStrStrUMap& overrideDataRowFields); // Bring the other overload of writeRow() into scope. using api::COutputHandler::writeRow; @@ -105,13 +106,19 @@ class CONFIG_EXPORT CReportWriter : public api::COutputHandler { void addInvalidRecords(uint64_t n); //! Add the summary for \p field. - void addFieldStatistics(const std::string& field, config_t::EDataType type, const CDataSummaryStatistics& summary); + void addFieldStatistics(const std::string& field, + config_t::EDataType type, + const CDataSummaryStatistics& summary); //! Add the summary for the categorical field \p field. - void addFieldStatistics(const std::string& field, config_t::EDataType type, const CCategoricalDataSummaryStatistics& summary); + void addFieldStatistics(const std::string& field, + config_t::EDataType type, + const CCategoricalDataSummaryStatistics& summary); //! Add the summary for the numeric field \p field. - void addFieldStatistics(const std::string& field, config_t::EDataType type, const CNumericDataSummaryStatistics& summary); + void addFieldStatistics(const std::string& field, + config_t::EDataType type, + const CNumericDataSummaryStatistics& summary); //! Add a summary of the detector \p detector. void addDetector(const CDetectorSpecification& spec); diff --git a/include/config/CTooMuchDataPenalty.h b/include/config/CTooMuchDataPenalty.h index 6aeb9fa2ca..03c528ef30 100644 --- a/include/config/CTooMuchDataPenalty.h +++ b/include/config/CTooMuchDataPenalty.h @@ -49,13 +49,16 @@ class CONFIG_EXPORT CTooMuchDataPenalty : public CPenalty { virtual void penaltyFromMe(CDetectorSpecification& spec) const; //! Compute the penalty for optionally a partition. - void penaltyFor(const CPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const; + void penaltyFor(const CPartitionDataCountStatistics& stats, + CDetectorSpecification& spec) const; //! Compute the penalty for a by field and optionally a partition. - void penaltyFor(const CByAndPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const; + void penaltyFor(const CByAndPartitionDataCountStatistics& stats, + CDetectorSpecification& spec) const; //! Compute the penalty for a by, over and optionally a partition field. - void penaltyFor(const CByOverAndPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const; + void penaltyFor(const CByOverAndPartitionDataCountStatistics& stats, + CDetectorSpecification& spec) const; //! The common penalty calculation. void penaltyFor(const TUInt64Vec& bucketCounts, diff --git a/include/config/ConfigTypes.h b/include/config/ConfigTypes.h index 924aa3e8fa..0a52a22077 100644 --- a/include/config/ConfigTypes.h +++ b/include/config/ConfigTypes.h @@ -27,7 +27,15 @@ CONFIG_EXPORT std::ostream& operator<<(std::ostream& o, EUserDataType type); //! Enumeration of the data types we understand. -enum EDataType { E_UndeterminedType, E_Binary, E_Categorical, E_PositiveInteger, E_Integer, E_PositiveReal, E_Real }; +enum EDataType { + E_UndeterminedType, + E_Binary, + E_Categorical, + E_PositiveInteger, + E_Integer, + E_PositiveReal, + E_Real +}; //! Check if the type is categorical. CONFIG_EXPORT @@ -50,7 +58,18 @@ CONFIG_EXPORT std::ostream& operator<<(std::ostream& o, EDataType type); //! Enumeration of the top-level functions we'll consider configuring. -enum EFunctionCategory { E_Count, E_Rare, E_DistinctCount, E_InfoContent, E_Mean, E_Min, E_Max, E_Sum, E_Varp, E_Median }; +enum EFunctionCategory { + E_Count, + E_Rare, + E_DistinctCount, + E_InfoContent, + E_Mean, + E_Min, + E_Max, + E_Sum, + E_Varp, + E_Median +}; //! Check if the function takes an argument. CONFIG_EXPORT @@ -84,7 +103,8 @@ bool hasDoAndDontIgnoreEmptyVersions(EFunctionCategory function); //! Get the prefix of the function corresponding to \p ignoreEmpty //! and \p isPopulation. CONFIG_EXPORT -const std::string& ignoreEmptyVersionName(EFunctionCategory function, bool ignoreEmpty, bool isPopulation); +const std::string& +ignoreEmptyVersionName(EFunctionCategory function, bool ignoreEmpty, bool isPopulation); //! Get a string for the function function. CONFIG_EXPORT diff --git a/include/core/BoostMultiIndex.h b/include/core/BoostMultiIndex.h index 3224bf70ac..3a15e16611 100644 --- a/include/core/BoostMultiIndex.h +++ b/include/core/BoostMultiIndex.h @@ -20,7 +20,7 @@ #include //! Define a member function that returns a const reference -#define BOOST_MULTI_INDEX_CONST_TYPE_CONST_MEM_FUN(Class, Type, MemberFunName) \ +#define BOOST_MULTI_INDEX_CONST_TYPE_CONST_MEM_FUN(Class, Type, MemberFunName) \ ::boost::multi_index::const_mem_fun #endif // INCLUDED_ml_core_BoostMultiIndex_h diff --git a/include/core/CBase64Filter.h b/include/core/CBase64Filter.h index 200064dac1..da76d180e9 100644 --- a/include/core/CBase64Filter.h +++ b/include/core/CBase64Filter.h @@ -102,7 +102,9 @@ class CORE_EXPORT CBase64Encoder { // copy into the buffer while there is data to read and space in the buffer std::streamsize done = 0; while (done < n) { - std::streamsize toCopy = std::min(std::streamsize(n - done), std::streamsize(m_Buffer.capacity() - m_Buffer.size())); + std::streamsize toCopy = + std::min(std::streamsize(n - done), + std::streamsize(m_Buffer.capacity() - m_Buffer.size())); m_Buffer.insert(m_Buffer.end(), s + done, s + done + toCopy); done += toCopy; this->Encode(snk, false); @@ -123,7 +125,8 @@ class CORE_EXPORT CBase64Encoder { //! the converted output into the stream snk template void Encode(SINK& snk, bool isFinal) { - using TUInt8BufCItrTransformItr = boost::archive::iterators::transform_width; + using TUInt8BufCItrTransformItr = + boost::archive::iterators::transform_width; using TBase64Text = boost::archive::iterators::base64_from_binary; TUInt8BufItr endItr = m_Buffer.end(); @@ -234,18 +237,22 @@ class CORE_EXPORT CBase64Decoder { std::streamsize done = 0; char buf[4096]; while (done < n) { - std::streamsize toCopy = std::min(std::streamsize(m_BufferOut.size()), std::streamsize(n - done)); - LOG_TRACE(<< "Trying to copy " << toCopy << " bytes into stream, max " << n << ", available " << m_BufferOut.size()); + std::streamsize toCopy = std::min(std::streamsize(m_BufferOut.size()), + std::streamsize(n - done)); + LOG_TRACE(<< "Trying to copy " << toCopy << " bytes into stream, max " + << n << ", available " << m_BufferOut.size()); for (std::streamsize i = 0; i < toCopy; i++) { s[done++] = m_BufferOut.front(); m_BufferOut.pop_front(); } - LOG_TRACE(<< "Eos: " << m_Eos << ", In: " << m_BufferIn.empty() << ", Out: " << m_BufferOut.empty()); + LOG_TRACE(<< "Eos: " << m_Eos << ", In: " << m_BufferIn.empty() + << ", Out: " << m_BufferOut.empty()); if (done == n) { break; } if ((done > 0) && m_BufferIn.empty() && m_BufferOut.empty() && m_Eos) { - LOG_TRACE(<< "Base64 READ " << done << ", from n " << n << ", left " << m_BufferOut.size()); + LOG_TRACE(<< "Base64 READ " << done << ", from n " << n + << ", left " << m_BufferOut.size()); return done; } @@ -287,7 +294,8 @@ class CORE_EXPORT CBase64Decoder { return -1; } } - LOG_TRACE(<< "Base64 READ " << done << ", from n " << n << ", left " << m_BufferOut.size()); + LOG_TRACE(<< "Base64 READ " << done << ", from n " << n << ", left " + << m_BufferOut.size()); return done; } @@ -299,8 +307,10 @@ class CORE_EXPORT CBase64Decoder { //! Perform the conversion from Base64 to raw bytes void Decode(bool isFinal) { // Base64 turns 4 characters into 3 bytes - using TUInt8BufCItrBinaryBase64Itr = boost::archive::iterators::binary_from_base64; - using TBase64Binary = boost::archive::iterators::transform_width; + using TUInt8BufCItrBinaryBase64Itr = + boost::archive::iterators::binary_from_base64; + using TBase64Binary = + boost::archive::iterators::transform_width; std::size_t inBytes = m_BufferIn.size(); if (inBytes == 0) { @@ -340,7 +350,8 @@ class CORE_EXPORT CBase64Decoder { } LOG_TRACE(<< "About to decode: " << std::string(m_BufferIn.begin(), endItr)); - m_BufferOut.insert(m_BufferOut.end(), TBase64Binary(m_BufferIn.begin()), TBase64Binary(endItr)); + m_BufferOut.insert(m_BufferOut.end(), TBase64Binary(m_BufferIn.begin()), + TBase64Binary(endItr)); // Remove padding bytes off the back of the stream m_BufferOut.erase_end(paddingBytes); diff --git a/include/core/CBlockingMessageQueue.h b/include/core/CBlockingMessageQueue.h index af73e23b6f..d00c21d17a 100644 --- a/include/core/CBlockingMessageQueue.h +++ b/include/core/CBlockingMessageQueue.h @@ -56,13 +56,11 @@ class CBlockingMessageQueue { using TShutdownFunc = std::function; public: - CBlockingMessageQueue(RECEIVER& receiver, const TShutdownFunc& shutdownFunc = &CBlockingMessageQueue::defaultShutdownFunc) - : m_Thread(*this), - m_ProducerCondition(m_Mutex), - m_ConsumerCondition(m_Mutex), - m_Receiver(receiver), - m_Queue(QUEUE_CAPACITY), - m_ShutdownFunc(shutdownFunc) {} + CBlockingMessageQueue(RECEIVER& receiver, + const TShutdownFunc& shutdownFunc = &CBlockingMessageQueue::defaultShutdownFunc) + : m_Thread(*this), m_ProducerCondition(m_Mutex), + m_ConsumerCondition(m_Mutex), m_Receiver(receiver), + m_Queue(QUEUE_CAPACITY), m_ShutdownFunc(shutdownFunc) {} virtual ~CBlockingMessageQueue() {} @@ -144,7 +142,8 @@ class CBlockingMessageQueue { class CMessageQueueThread : public CThread { public: CMessageQueueThread(CBlockingMessageQueue& messageQueue) - : m_MessageQueue(messageQueue), m_ShuttingDown(false), m_IsRunning(false) {} + : m_MessageQueue(messageQueue), m_ShuttingDown(false), + m_IsRunning(false) {} //! The queue must have the mutex for this to be called bool isRunning() const { diff --git a/include/core/CCompressOStream.h b/include/core/CCompressOStream.h index ef57806d69..8563e25c2e 100644 --- a/include/core/CCompressOStream.h +++ b/include/core/CCompressOStream.h @@ -42,7 +42,9 @@ class CORE_EXPORT CCompressOStream : public std::ostream { private: class CCompressThread : public CThread { public: - CCompressThread(CCompressOStream& stream, CDualThreadStreamBuf& streamBuf, CStateCompressor::CChunkFilter& filter); + CCompressThread(CCompressOStream& stream, + CDualThreadStreamBuf& streamBuf, + CStateCompressor::CChunkFilter& filter); protected: //! Implementation of inherited interface diff --git a/include/core/CCompressedDictionary.h b/include/core/CCompressedDictionary.h index 76307ffbcd..eff9591b6d 100644 --- a/include/core/CCompressedDictionary.h +++ b/include/core/CCompressedDictionary.h @@ -71,7 +71,8 @@ class CCompressedDictionary { //! independent hashes we get a probability of all hashes //! colliding of \f$0.004^N\f$. If N is 2 the probability of //! a collision is \f$1.5 \times 10^{-5}\f$. - class CWord : private boost::equality_comparable1> { + class CWord + : private boost::equality_comparable1> { public: //! See CMemory. static bool dynamicSizeAlwaysZero() { return true; } @@ -83,7 +84,9 @@ class CCompressedDictionary { CWord() { std::fill(m_Hash.begin(), m_Hash.end(), 0); } CWord(const TUInt64Array& hash) : m_Hash(hash) {} - bool operator==(const CWord& other) const { return m_Hash == other.m_Hash; } + bool operator==(const CWord& other) const { + return m_Hash == other.m_Hash; + } bool operator<(const CWord& rhs) const { return m_Hash < rhs.m_Hash; } @@ -124,7 +127,9 @@ class CCompressedDictionary { //! \brief A fast hash of a dictionary word. class CHash : public std::unary_function { public: - inline std::size_t operator()(const CWord& word) const { return word.hash(); } + inline std::size_t operator()(const CWord& word) const { + return word.hash(); + } }; //! The type of an ordered set of words. @@ -167,7 +172,8 @@ class CCompressedDictionary { CWord word(const std::string& word) const { TUInt64Array hash; for (std::size_t i = 0u; i < N; ++i) { - hash[i] = CHashing::safeMurmurHash64(word.c_str(), static_cast(word.size()), m_Seeds[i]); + hash[i] = CHashing::safeMurmurHash64( + word.c_str(), static_cast(word.size()), m_Seeds[i]); } return CWord(hash); } @@ -185,7 +191,10 @@ class CCompressedDictionary { } //! Extract the dictionary word corresponding to (\p word1, \p word2, \p word3, \p word4). - CWord word(const std::string& word1, const std::string& word2, const std::string& word3, const std::string& word4) const { + CWord word(const std::string& word1, + const std::string& word2, + const std::string& word3, + const std::string& word4) const { TStrCPtr words[] = {&word1, &word2, &word3, &word4}; return this->word(words); } @@ -198,7 +207,9 @@ class CCompressedDictionary { uint64_t& hash = hashes[i]; for (std::size_t wordIndex = 0; wordIndex < NUMBER_OF_WORDS; ++wordIndex) { const std::string& word = *words[wordIndex]; - hash = CHashing::safeMurmurHash64(word.c_str(), static_cast(word.size()), (wordIndex) == 0 ? m_Seeds[i] : hash); + hash = CHashing::safeMurmurHash64(word.c_str(), + static_cast(word.size()), + (wordIndex) == 0 ? m_Seeds[i] : hash); } } return CWord(hashes); diff --git a/include/core/CConcurrentQueue.h b/include/core/CConcurrentQueue.h index c1eef24fde..84469d37ca 100644 --- a/include/core/CConcurrentQueue.h +++ b/include/core/CConcurrentQueue.h @@ -36,7 +36,8 @@ class CConcurrentQueue final : private CNonCopyable { public: CConcurrentQueue() : m_Queue(QUEUE_CAPACITY) { static_assert(NOTIFY_CAPACITY > 0, "NOTIFY_CAPACITY must be positive"); - static_assert(QUEUE_CAPACITY >= NOTIFY_CAPACITY, "QUEUE_CAPACITY cannot be less than NOTIFY_CAPACITY"); + static_assert(QUEUE_CAPACITY >= NOTIFY_CAPACITY, + "QUEUE_CAPACITY cannot be less than NOTIFY_CAPACITY"); } //! Pop an item out of the queue, this blocks until an item is available diff --git a/include/core/CContainerPrinter.h b/include/core/CContainerPrinter.h index 051ce64de3..2be8f9c87f 100644 --- a/include/core/CContainerPrinter.h +++ b/include/core/CContainerPrinter.h @@ -131,7 +131,9 @@ class CLeafPrinter { } //! Fast CStringUtil implementation. - inline static std::string print_(bool value, true_ /*is arithmetic*/) { return CStringUtils::typeToStringPretty(value); } + inline static std::string print_(bool value, true_ /*is arithmetic*/) { + return CStringUtils::typeToStringPretty(value); + } //! Slow std::ostringstream stream implementation. template @@ -302,7 +304,8 @@ class CORE_EXPORT CContainerPrinter : private CNonInstantiatable { template static std::string printElement(const std::pair& value) { std::ostringstream result; - result << "(" << printElement(boost::unwrap_ref(value.first)) << ", " << printElement(boost::unwrap_ref(value.second)) << ")"; + result << "(" << printElement(boost::unwrap_ref(value.first)) << ", " + << printElement(boost::unwrap_ref(value.second)) << ")"; return result.str(); } @@ -345,7 +348,8 @@ class CORE_EXPORT CContainerPrinter : private CNonInstantiatable { //! Print a STL compliant container for debug. template static std::string print(const CONTAINER& container) { - return print(boost::unwrap_ref(container).begin(), boost::unwrap_ref(container).end()); + return print(boost::unwrap_ref(container).begin(), + boost::unwrap_ref(container).end()); } //! Specialization for arrays. diff --git a/include/core/CDelimiter.h b/include/core/CDelimiter.h index a5624cffd2..7aaf105cc0 100644 --- a/include/core/CDelimiter.h +++ b/include/core/CDelimiter.h @@ -60,11 +60,17 @@ class CORE_EXPORT CDelimiter { //! Tokenise a string, stating whether time has passed since the last //! attempt - void tokenise(const std::string& str, bool timePassed, CStringUtils::TStrVec& tokens, std::string& remainder) const; + void tokenise(const std::string& str, + bool timePassed, + CStringUtils::TStrVec& tokens, + std::string& remainder) const; //! Tokenise a string, also retrieving an example of the literal //! delimiter that was found - void tokenise(const std::string& str, CStringUtils::TStrVec& tokens, std::string& exampleDelimiter, std::string& remainder) const; + void tokenise(const std::string& str, + CStringUtils::TStrVec& tokens, + std::string& exampleDelimiter, + std::string& remainder) const; //! Tokenise a string, also retrieving an example of the literal //! delimiter that was found, stating whether time has passed since the @@ -105,7 +111,8 @@ class CORE_EXPORT CDelimiter { //! The character used to escape the quote character ('\0' if none). char m_Escape; - friend CORE_EXPORT std::ostream& operator<<(std::ostream& strm, const CDelimiter& delimiter); + friend CORE_EXPORT std::ostream& operator<<(std::ostream& strm, + const CDelimiter& delimiter); }; //! Useful for debugging and CPPUNIT_ASSERT_EQUALS diff --git a/include/core/CDualThreadStreamBuf.h b/include/core/CDualThreadStreamBuf.h index 6aa8a8b6f0..90eac11170 100644 --- a/include/core/CDualThreadStreamBuf.h +++ b/include/core/CDualThreadStreamBuf.h @@ -114,7 +114,9 @@ class CORE_EXPORT CDualThreadStreamBuf : public std::streambuf { //! allowing a zero byte seek in order to allow tellg() and tellp() to //! work on the connected stream. virtual std::streampos - seekoff(std::streamoff off, std::ios_base::seekdir way, std::ios_base::openmode which = std::ios_base::in | std::ios_base::out); + seekoff(std::streamoff off, + std::ios_base::seekdir way, + std::ios_base::openmode which = std::ios_base::in | std::ios_base::out); private: //! Swap the intermediate buffer with the write buffer. Will block if diff --git a/include/core/CFlatPrefixTree.h b/include/core/CFlatPrefixTree.h index 52173d5cf1..d38f0930cd 100644 --- a/include/core/CFlatPrefixTree.h +++ b/include/core/CFlatPrefixTree.h @@ -118,7 +118,10 @@ class CORE_EXPORT CFlatPrefixTree { private: //! The recursive building helper. - void buildRecursively(const TStrVec& prefixes, std::size_t prefixesStart, std::size_t prefixesEnd, std::size_t charPos); + void buildRecursively(const TStrVec& prefixes, + std::size_t prefixesStart, + std::size_t prefixesEnd, + std::size_t charPos); //! Extracts the distinct characters and stores it in \p distinctChars //! along with the start and end index in the \p prefixes vector. diff --git a/include/core/CFloatStorage.h b/include/core/CFloatStorage.h index add992df9a..b7dc561328 100644 --- a/include/core/CFloatStorage.h +++ b/include/core/CFloatStorage.h @@ -19,7 +19,8 @@ namespace ml { namespace core { namespace { -const int MAX_PRECISE_INTEGER_FLOAT(static_cast(std::pow(10.0, static_cast(std::numeric_limits::digits10))) - 1); +const int MAX_PRECISE_INTEGER_FLOAT( + static_cast(std::pow(10.0, static_cast(std::numeric_limits::digits10))) - 1); } //! \brief This class should be used in place of float whenever @@ -93,7 +94,10 @@ class CORE_EXPORT CFloatStorage { } //! Convert to a string. - std::string toString() const { return CStringUtils::typeToStringPrecise(static_cast(m_Value), CIEEE754::E_SinglePrecision); } + std::string toString() const { + return CStringUtils::typeToStringPrecise(static_cast(m_Value), + CIEEE754::E_SinglePrecision); + } //! \name Double Assignment //@{ @@ -131,12 +135,15 @@ class CORE_EXPORT CFloatStorage { //! Utility to actually set the floating point value. void set(double value) { #ifdef CFLOATSTORAGE_BOUNDS_CHECK - if (value > std::numeric_limits::max() || -value > std::numeric_limits::max()) { + if (value > std::numeric_limits::max() || + -value > std::numeric_limits::max()) { LOG_WARN(<< "Value overflows float " << value); } - if (value < std::numeric_limits::min() && -value < std::numeric_limits::min()) { + if (value < std::numeric_limits::min() && + -value < std::numeric_limits::min()) { LOG_WARN(<< "Value underflows float " << value); - } else if (value < 100 * std::numeric_limits::min() && -value < 100 * std::numeric_limits::min()) { + } else if (value < 100 * std::numeric_limits::min() && + -value < 100 * std::numeric_limits::min()) { LOG_WARN(<< "Less than 3 s.f. precision retained for " << value); } #endif // CFLOATSTORAGE_BOUNDS_CHECK diff --git a/include/core/CHashing.h b/include/core/CHashing.h index 7607385d8e..891171ec50 100644 --- a/include/core/CHashing.h +++ b/include/core/CHashing.h @@ -77,8 +77,9 @@ class CORE_EXPORT CHashing : private CNonInstantiatable { uint32_t operator()(uint32_t x) const { // Note by construction: // a * x + b < p^2 + p < 2^64 - return static_cast(((static_cast(m_A) * x + static_cast(m_B)) % BIG_PRIME) % - static_cast(m_M)); + return static_cast( + ((static_cast(m_A) * x + static_cast(m_B)) % BIG_PRIME) % + static_cast(m_M)); } //! Print the hash function for debug. @@ -115,7 +116,8 @@ class CORE_EXPORT CHashing : private CNonInstantiatable { uint32_t operator()(uint32_t x) const { // Note by construction: // a * x + b < p^2 + p < 2^64 - return static_cast((static_cast(m_A) * x + static_cast(m_B)) % BIG_PRIME); + return static_cast( + (static_cast(m_A) * x + static_cast(m_B)) % BIG_PRIME); } //! Print the hash function for debug. @@ -163,8 +165,10 @@ class CORE_EXPORT CHashing : private CNonInstantiatable { // (a(1) * x(1)) mod p + a(2) * x(2) + b // < p^2 + 2*p // < 2^64 - uint64_t h = (static_cast(m_A[0]) * x1) % BIG_PRIME + static_cast(m_A[1]) * x2; - return static_cast(((h + static_cast(m_B)) % BIG_PRIME) % static_cast(m_M)); + uint64_t h = (static_cast(m_A[0]) * x1) % BIG_PRIME + + static_cast(m_A[1]) * x2; + return static_cast(((h + static_cast(m_B)) % BIG_PRIME) % + static_cast(m_M)); } //! \note This is implemented inline in contravention to @@ -181,7 +185,8 @@ class CORE_EXPORT CHashing : private CNonInstantiatable { for (std::size_t i = 1u; i < x.size(); ++i) { h = (h % BIG_PRIME + static_cast(m_A[i]) * x[i]); } - return static_cast(((h + static_cast(m_B)) % BIG_PRIME) % static_cast(m_M)); + return static_cast(((h + static_cast(m_B)) % BIG_PRIME) % + static_cast(m_M)); } //! Print the hash function for debug. @@ -376,7 +381,8 @@ class CORE_EXPORT CHashing : private CNonInstantiatable { //! //! \note This is significantly faster than boost::hash //! and has better distributions. - class CORE_EXPORT CMurmurHash2String : public std::unary_function { + class CORE_EXPORT CMurmurHash2String + : public std::unary_function { public: //! See CMemory. static bool dynamicSizeAlwaysZero() { return true; } @@ -386,7 +392,9 @@ class CORE_EXPORT CHashing : private CNonInstantiatable { CMurmurHash2String(std::size_t seed = 0x5bd1e995) : m_Seed(seed) {} std::size_t operator()(const std::string& key) const; - std::size_t operator()(TStrCRef key) const { return this->operator()(key.get()); } + std::size_t operator()(TStrCRef key) const { + return this->operator()(key.get()); + } std::size_t operator()(const CStoredStringPtr& key) const { if (key) { return this->operator()(*key); @@ -403,7 +411,8 @@ class CORE_EXPORT CHashing : private CNonInstantiatable { //! example would be where the hash value somehow affects data that is //! visible outside the program, such as state persisted to a data //! store. This is also immune to endianness issues. - class CORE_EXPORT CSafeMurmurHash2String64 : public std::unary_function { + class CORE_EXPORT CSafeMurmurHash2String64 + : public std::unary_function { public: //! See CMemory. static bool dynamicSizeAlwaysZero() { return true; } @@ -413,7 +422,9 @@ class CORE_EXPORT CHashing : private CNonInstantiatable { CSafeMurmurHash2String64(uint64_t seed = 0x5bd1e995) : m_Seed(seed) {} uint64_t operator()(const std::string& key) const; - std::size_t operator()(TStrCRef key) const { return this->operator()(key.get()); } + std::size_t operator()(TStrCRef key) const { + return this->operator()(key.get()); + } std::size_t operator()(const CStoredStringPtr& key) const { if (key) { return this->operator()(*key); @@ -438,7 +449,8 @@ namespace hash_detail { template struct SMurmurHashForArchitecture { static std::size_t hash(const void* key, int length, std::size_t seed) { - return static_cast(CHashing::murmurHash32(key, length, static_cast(seed))); + return static_cast( + CHashing::murmurHash32(key, length, static_cast(seed))); } }; @@ -455,11 +467,13 @@ struct SMurmurHashForArchitecture<8> { template inline std::size_t CHashing::CMurmurHash2BT::operator()(const T& key) const { - return hash_detail::SMurmurHashForArchitecture::hash(&key, static_cast(sizeof(key)), m_Seed); + return hash_detail::SMurmurHashForArchitecture::hash( + &key, static_cast(sizeof(key)), m_Seed); } inline std::size_t CHashing::CMurmurHash2String::operator()(const std::string& key) const { - return hash_detail::SMurmurHashForArchitecture::hash(key.data(), static_cast(key.size()), m_Seed); + return hash_detail::SMurmurHashForArchitecture::hash( + key.data(), static_cast(key.size()), m_Seed); } inline uint64_t CHashing::CSafeMurmurHash2String64::operator()(const std::string& key) const { diff --git a/include/core/CIEEE754.h b/include/core/CIEEE754.h index 75a102b23b..2059c0f2c9 100644 --- a/include/core/CIEEE754.h +++ b/include/core/CIEEE754.h @@ -59,7 +59,8 @@ class CORE_EXPORT CIEEE754 { //! the mantissa interpreted as an integer. static void decompose(double value, uint64_t& mantissa, int& exponent) { SDoubleRep parsed; - static_assert(sizeof(double) == sizeof(SDoubleRep), "SDoubleRep definition unsuitable for memcpy to double"); + static_assert(sizeof(double) == sizeof(SDoubleRep), + "SDoubleRep definition unsuitable for memcpy to double"); // Use memcpy() rather than union to adhere to strict aliasing rules ::memcpy(&parsed, &value, sizeof(double)); exponent = static_cast(parsed.s_Exponent) - 1022; diff --git a/include/core/CLogger.h b/include/core/CLogger.h index 87ce98db75..91aa2819e2 100644 --- a/include/core/CLogger.h +++ b/include/core/CLogger.h @@ -128,7 +128,9 @@ class CORE_EXPORT CLogger : private CNonCopyable { using TLogCharLogStrMapCItr = TLogCharLogStrMap::const_iterator; //! Replace Ml specific mappings in a single string - void massageString(const TLogCharLogStrMap& mappings, const log4cxx::LogString& oldStr, log4cxx::LogString& newStr) const; + void massageString(const TLogCharLogStrMap& mappings, + const log4cxx::LogString& oldStr, + log4cxx::LogString& newStr) const; //! Helper for other reconfiguration methods bool reconfigureFromProps(log4cxx::helpers::Properties& props); diff --git a/include/core/CMaskIterator.h b/include/core/CMaskIterator.h index fb92006d17..dfc15cb8e2 100644 --- a/include/core/CMaskIterator.h +++ b/include/core/CMaskIterator.h @@ -34,11 +34,10 @@ template class CMaskIterator : private boost::incrementable< CMaskIterator, - boost::decrementable< - CMaskIterator, - boost::addable2, - typename std::iterator_traits::difference_type, - boost::subtractable2, typename std::iterator_traits::difference_type>>>> { + boost::decrementable, + boost::addable2, + typename std::iterator_traits::difference_type, + boost::subtractable2, typename std::iterator_traits::difference_type>>>> { public: using difference_type = typename std::iterator_traits::difference_type; using value_type = typename std::iterator_traits::value_type; @@ -48,7 +47,8 @@ class CMaskIterator using TDifferenceVec = std::vector; public: - CMaskIterator(ITR begin, const TDifferenceVec& mask, difference_type index) : m_Begin(begin), m_Mask(&mask), m_Index(index) {} + CMaskIterator(ITR begin, const TDifferenceVec& mask, difference_type index) + : m_Begin(begin), m_Mask(&mask), m_Index(index) {} template bool operator==(const CMaskIterator& rhs) const { @@ -77,7 +77,9 @@ class CMaskIterator reference operator*() const { return *(m_Begin + (*m_Mask)[m_Index]); } pointer operator->() const { return &(*(m_Begin + (*m_Mask)[m_Index])); } - reference operator[](difference_type n) const { return *(m_Begin + (*m_Mask)[m_Index + n]); } + reference operator[](difference_type n) const { + return *(m_Begin + (*m_Mask)[m_Index + n]); + } const CMaskIterator& operator++() { ++m_Index; @@ -89,7 +91,8 @@ class CMaskIterator } template difference_type operator-(const CMaskIterator& rhs) const { - return static_cast(m_Index) - static_cast(rhs.m_Index); + return static_cast(m_Index) - + static_cast(rhs.m_Index); } const CMaskIterator& operator+=(difference_type n) { m_Index += n; @@ -117,24 +120,29 @@ class CMaskIterator //! Get a non-constant mask iterator over a subset of the elements of a vector. template -CMaskIterator::iterator> begin_masked(std::vector& v, const std::vector& mask) { +CMaskIterator::iterator> +begin_masked(std::vector& v, const std::vector& mask) { return CMaskIterator::iterator>(v.begin(), mask, 0); } //! Get a non-constant mask iterator at the end of a subset of the elements of a vector. template -CMaskIterator::iterator> end_masked(std::vector& v, const std::vector& mask) { +CMaskIterator::iterator> +end_masked(std::vector& v, const std::vector& mask) { return CMaskIterator::iterator>(v.begin(), mask, mask.size()); } //! Get a constant mask iterator over a subset of the elements of a vector. template -CMaskIterator::const_iterator> begin_masked(const std::vector& v, const std::vector& mask) { +CMaskIterator::const_iterator> +begin_masked(const std::vector& v, const std::vector& mask) { return CMaskIterator::const_iterator>(v.begin(), mask, 0); } //! Get a constant mask iterator at the end of a subset of the elements of a vector. template -CMaskIterator::const_iterator> end_masked(const std::vector& v, const std::vector& mask) { - return CMaskIterator::const_iterator>(v.begin(), mask, mask.size()); +CMaskIterator::const_iterator> +end_masked(const std::vector& v, const std::vector& mask) { + return CMaskIterator::const_iterator>(v.begin(), mask, + mask.size()); } //! A mask iterator over a subset of an iterated sequence. diff --git a/include/core/CMemory.h b/include/core/CMemory.h index bab8d7a01f..58596986e2 100644 --- a/include/core/CMemory.h +++ b/include/core/CMemory.h @@ -105,7 +105,9 @@ struct SDynamicSizeAlwaysZero { //! \brief Checks types in pair. template struct SDynamicSizeAlwaysZero> { - static inline bool value() { return SDynamicSizeAlwaysZero::value() && SDynamicSizeAlwaysZero::value(); } + static inline bool value() { + return SDynamicSizeAlwaysZero::value() && SDynamicSizeAlwaysZero::value(); + } }; //! \brief Specialisation for std::less always true. @@ -135,7 +137,8 @@ struct SDynamicSizeAlwaysZero - bool operator()(const std::pair& lhs, const std::pair& rhs) const { + bool operator()(const std::pair& lhs, + const std::pair& rhs) const { return boost::unwrap_ref(lhs.first).before(boost::unwrap_ref(rhs.first)); } template @@ -221,12 +224,16 @@ class CORE_EXPORT CMemory : private CNonInstantiatable { //! if it is stored in boost::any. template bool registerCallback() { - auto i = std::lower_bound(m_Callbacks.begin(), m_Callbacks.end(), boost::cref(typeid(T)), memory_detail::STypeInfoLess()); + auto i = std::lower_bound(m_Callbacks.begin(), m_Callbacks.end(), + boost::cref(typeid(T)), + memory_detail::STypeInfoLess()); if (i == m_Callbacks.end()) { - m_Callbacks.emplace_back(boost::cref(typeid(T)), &CAnyVisitor::dynamicSizeCallback); + m_Callbacks.emplace_back(boost::cref(typeid(T)), + &CAnyVisitor::dynamicSizeCallback); return true; } else if (i->first.get() != typeid(T)) { - m_Callbacks.insert(i, {boost::cref(typeid(T)), &CAnyVisitor::dynamicSizeCallback}); + m_Callbacks.insert(i, {boost::cref(typeid(T)), + &CAnyVisitor::dynamicSizeCallback}); return true; } return false; @@ -236,7 +243,9 @@ class CORE_EXPORT CMemory : private CNonInstantiatable { //! registered for its type. std::size_t dynamicSize(const boost::any& x) const { if (!x.empty()) { - auto i = std::lower_bound(m_Callbacks.begin(), m_Callbacks.end(), boost::cref(x.type()), memory_detail::STypeInfoLess()); + auto i = std::lower_bound(m_Callbacks.begin(), m_Callbacks.end(), + boost::cref(x.type()), + memory_detail::STypeInfoLess()); if (i != m_Callbacks.end() && i->first.get() == x.type()) { return (*i->second)(x); } @@ -251,7 +260,9 @@ class CORE_EXPORT CMemory : private CNonInstantiatable { static std::size_t dynamicSizeCallback(const boost::any& any) { try { return sizeof(T) + CMemory::dynamicSize(boost::any_cast(any)); - } catch (const std::exception& e) { LOG_ERROR(<< "Failed to calculate size " << e.what()); } + } catch (const std::exception& e) { + LOG_ERROR(<< "Failed to calculate size " << e.what()); + } return 0; } @@ -261,7 +272,9 @@ class CORE_EXPORT CMemory : private CNonInstantiatable { public: //! Default template. template - static std::size_t dynamicSize(const T& t, typename boost::disable_if>::type* = nullptr) { + static std::size_t + dynamicSize(const T& t, + typename boost::disable_if>::type* = nullptr) { std::size_t mem = 0; if (!memory_detail::SDynamicSizeAlwaysZero::value()) { mem += memory_detail::SMemoryDynamicSize::dispatch(t); @@ -271,7 +284,9 @@ class CORE_EXPORT CMemory : private CNonInstantiatable { //! Overload for pointer. template - static std::size_t dynamicSize(const T& t, typename boost::enable_if>::type* = nullptr) { + static std::size_t + dynamicSize(const T& t, + typename boost::enable_if>::type* = nullptr) { if (t == nullptr) { return 0; } @@ -354,12 +369,14 @@ class CORE_EXPORT CMemory : private CNonInstantiatable { template static std::size_t dynamicSize(const boost::unordered_map& t) { std::size_t mem = 0; - if (!(memory_detail::SDynamicSizeAlwaysZero::value() && memory_detail::SDynamicSizeAlwaysZero::value())) { + if (!(memory_detail::SDynamicSizeAlwaysZero::value() && + memory_detail::SDynamicSizeAlwaysZero::value())) { for (auto i = t.begin(); i != t.end(); ++i) { mem += dynamicSize(*i); } } - return mem + (t.bucket_count() * sizeof(std::size_t) * 2) + (t.size() * (sizeof(K) + sizeof(V) + 2 * sizeof(std::size_t))); + return mem + (t.bucket_count() * sizeof(std::size_t) * 2) + + (t.size() * (sizeof(K) + sizeof(V) + 2 * sizeof(std::size_t))); } //! Overload for std::map. @@ -368,19 +385,22 @@ class CORE_EXPORT CMemory : private CNonInstantiatable { // std::map appears to use 4 pointers/size_ts per tree node // (colour, parent, left and right child pointers). std::size_t mem = 0; - if (!(memory_detail::SDynamicSizeAlwaysZero::value() && memory_detail::SDynamicSizeAlwaysZero::value())) { + if (!(memory_detail::SDynamicSizeAlwaysZero::value() && + memory_detail::SDynamicSizeAlwaysZero::value())) { for (auto i = t.begin(); i != t.end(); ++i) { mem += dynamicSize(*i); } } - return mem + (memory_detail::EXTRA_NODES + t.size()) * (sizeof(K) + sizeof(V) + 4 * sizeof(std::size_t)); + return mem + (memory_detail::EXTRA_NODES + t.size()) * + (sizeof(K) + sizeof(V) + 4 * sizeof(std::size_t)); } //! Overload for boost::container::flat_map. template static std::size_t dynamicSize(const boost::container::flat_map& t) { std::size_t mem = 0; - if (!(memory_detail::SDynamicSizeAlwaysZero::value() && memory_detail::SDynamicSizeAlwaysZero::value())) { + if (!(memory_detail::SDynamicSizeAlwaysZero::value() && + memory_detail::SDynamicSizeAlwaysZero::value())) { for (auto i = t.begin(); i != t.end(); ++i) { mem += dynamicSize(*i); } @@ -397,7 +417,8 @@ class CORE_EXPORT CMemory : private CNonInstantiatable { mem += dynamicSize(*i); } } - return mem + (t.bucket_count() * sizeof(std::size_t) * 2) + (t.size() * (sizeof(T) + 2 * sizeof(std::size_t))); + return mem + (t.bucket_count() * sizeof(std::size_t) * 2) + + (t.size() * (sizeof(T) + 2 * sizeof(std::size_t))); } //! Overload for std::set. @@ -411,7 +432,8 @@ class CORE_EXPORT CMemory : private CNonInstantiatable { mem += dynamicSize(*i); } } - return mem + (memory_detail::EXTRA_NODES + t.size()) * (sizeof(T) + 4 * sizeof(std::size_t)); + return mem + (memory_detail::EXTRA_NODES + t.size()) * + (sizeof(T) + 4 * sizeof(std::size_t)); } //! Overload for boost::container::flat_set. @@ -437,7 +459,8 @@ class CORE_EXPORT CMemory : private CNonInstantiatable { mem += dynamicSize(*i); } } - return mem + (memory_detail::EXTRA_NODES + t.size()) * (sizeof(T) + 2 * sizeof(std::size_t)); + return mem + (memory_detail::EXTRA_NODES + t.size()) * + (sizeof(T) + 2 * sizeof(std::size_t)); } //! Overload for std::deque. @@ -543,7 +566,9 @@ struct SDebugMemoryDynamicSize { //! Template specialisation for when T has a debugMemoryUsage member function. template struct SDebugMemoryDynamicSize::type> { - static void dispatch(const char*, const T& t, CMemoryUsage::TMemoryUsagePtr mem) { t.debugMemoryUsage(mem->addChild()); } + static void dispatch(const char*, const T& t, CMemoryUsage::TMemoryUsagePtr mem) { + t.debugMemoryUsage(mem->addChild()); + } }; } // memory_detail @@ -579,7 +604,9 @@ class CORE_EXPORT CMemoryDebug : private CNonInstantiatable { //! See CMemory::CAnyVisitor for details. class CORE_EXPORT CAnyVisitor { public: - using TDynamicSizeFunc = void (*)(const char*, const boost::any& any, CMemoryUsage::TMemoryUsagePtr mem); + using TDynamicSizeFunc = void (*)(const char*, + const boost::any& any, + CMemoryUsage::TMemoryUsagePtr mem); using TTypeInfoDynamicSizeFuncPr = std::pair; using TTypeInfoDynamicSizeFuncPrVec = std::vector; @@ -587,12 +614,16 @@ class CORE_EXPORT CMemoryDebug : private CNonInstantiatable { //! if it is stored in boost::any. template bool registerCallback() { - auto i = std::lower_bound(m_Callbacks.begin(), m_Callbacks.end(), boost::cref(typeid(T)), memory_detail::STypeInfoLess()); + auto i = std::lower_bound(m_Callbacks.begin(), m_Callbacks.end(), + boost::cref(typeid(T)), + memory_detail::STypeInfoLess()); if (i == m_Callbacks.end()) { - m_Callbacks.emplace_back(boost::cref(typeid(T)), &CAnyVisitor::dynamicSizeCallback); + m_Callbacks.emplace_back(boost::cref(typeid(T)), + &CAnyVisitor::dynamicSizeCallback); return true; } else if (i->first.get() != typeid(T)) { - m_Callbacks.insert(i, {boost::cref(typeid(T)), &CAnyVisitor::dynamicSizeCallback}); + m_Callbacks.insert(i, {boost::cref(typeid(T)), + &CAnyVisitor::dynamicSizeCallback}); return true; } return false; @@ -600,9 +631,13 @@ class CORE_EXPORT CMemoryDebug : private CNonInstantiatable { //! Calculate the dynamic size of x if a callback has been //! registered for its type. - void dynamicSize(const char* name, const boost::any& x, CMemoryUsage::TMemoryUsagePtr mem) const { + void dynamicSize(const char* name, + const boost::any& x, + CMemoryUsage::TMemoryUsagePtr mem) const { if (!x.empty()) { - auto i = std::lower_bound(m_Callbacks.begin(), m_Callbacks.end(), boost::cref(x.type()), memory_detail::STypeInfoLess()); + auto i = std::lower_bound(m_Callbacks.begin(), m_Callbacks.end(), + boost::cref(x.type()), + memory_detail::STypeInfoLess()); if (i != m_Callbacks.end() && i->first.get() == x.type()) { (*i->second)(name, x, mem); return; @@ -614,11 +649,15 @@ class CORE_EXPORT CMemoryDebug : private CNonInstantiatable { private: //! Wraps up call to any_cast and dynamicSize. template - static void dynamicSizeCallback(const char* name, const boost::any& any, CMemoryUsage::TMemoryUsagePtr mem) { + static void dynamicSizeCallback(const char* name, + const boost::any& any, + CMemoryUsage::TMemoryUsagePtr mem) { try { mem->addItem(name, sizeof(T)); CMemoryDebug::dynamicSize(name, boost::any_cast(any), mem); - } catch (const std::exception& e) { LOG_ERROR(<< "Failed to calculate size " << e.what()); } + } catch (const std::exception& e) { + LOG_ERROR(<< "Failed to calculate size " << e.what()); + } } TTypeInfoDynamicSizeFuncPrVec m_Callbacks; @@ -627,19 +666,21 @@ class CORE_EXPORT CMemoryDebug : private CNonInstantiatable { public: //! Default template. template - static void dynamicSize(const char* name, - const T& t, - CMemoryUsage::TMemoryUsagePtr mem, - typename boost::disable_if>::type* = nullptr) { + static void + dynamicSize(const char* name, + const T& t, + CMemoryUsage::TMemoryUsagePtr mem, + typename boost::disable_if>::type* = nullptr) { memory_detail::SDebugMemoryDynamicSize::dispatch(name, t, mem); } //! Overload for pointer. template - static void dynamicSize(const char* name, - const T& t, - CMemoryUsage::TMemoryUsagePtr mem, - typename boost::enable_if>::type* = nullptr) { + static void + dynamicSize(const char* name, + const T& t, + CMemoryUsage::TMemoryUsagePtr mem, + typename boost::enable_if>::type* = nullptr) { if (t != nullptr) { mem->addItem("ptr", CMemory::staticSize(*t)); memory_detail::SDebugMemoryDynamicSize::dispatch(name, *t, mem); @@ -648,7 +689,9 @@ class CORE_EXPORT CMemoryDebug : private CNonInstantiatable { //! Overload for boost::shared_ptr. template - static void dynamicSize(const char* name, const boost::shared_ptr& t, CMemoryUsage::TMemoryUsagePtr mem) { + static void dynamicSize(const char* name, + const boost::shared_ptr& t, + CMemoryUsage::TMemoryUsagePtr mem) { if (t) { long uc = t.use_count(); // If the pointer is shared by multiple users, each one @@ -661,14 +704,18 @@ class CORE_EXPORT CMemoryDebug : private CNonInstantiatable { std::ostringstream ss; ss << "shared_ptr (x" << uc << ')'; // Round up - mem->addItem(ss.str(), (CMemory::staticSize(*t) + CMemory::dynamicSize(*t) + std::size_t(uc - 1)) / uc); + mem->addItem(ss.str(), (CMemory::staticSize(*t) + + CMemory::dynamicSize(*t) + std::size_t(uc - 1)) / + uc); } } } //! Overload for boost::array. template - static void dynamicSize(const char* name, const boost::array& t, CMemoryUsage::TMemoryUsagePtr mem) { + static void dynamicSize(const char* name, + const boost::array& t, + CMemoryUsage::TMemoryUsagePtr mem) { if (!memory_detail::SDynamicSizeAlwaysZero::value()) { std::string componentName(name); componentName += "_item"; @@ -682,12 +729,16 @@ class CORE_EXPORT CMemoryDebug : private CNonInstantiatable { //! Overload for std::vector. template - static void dynamicSize(const char* name, const std::vector& t, CMemoryUsage::TMemoryUsagePtr mem) { + static void dynamicSize(const char* name, + const std::vector& t, + CMemoryUsage::TMemoryUsagePtr mem) { std::string componentName(name); std::size_t items = t.size(); std::size_t capacity = t.capacity(); - CMemoryUsage::SMemoryUsage usage(componentName + "::" + typeid(T).name(), capacity * sizeof(T), (capacity - items) * sizeof(T)); + CMemoryUsage::SMemoryUsage usage(componentName + "::" + typeid(T).name(), + capacity * sizeof(T), + (capacity - items) * sizeof(T)); CMemoryUsage::TMemoryUsagePtr ptr = mem->addChild(); ptr->setName(usage); @@ -699,12 +750,16 @@ class CORE_EXPORT CMemoryDebug : private CNonInstantiatable { //! Overload for small vector. template - static void dynamicSize(const char* name, const CSmallVector& t, CMemoryUsage::TMemoryUsagePtr mem) { + static void dynamicSize(const char* name, + const CSmallVector& t, + CMemoryUsage::TMemoryUsagePtr mem) { std::string componentName(name); std::size_t items = memory_detail::inplace(t) ? 0 : t.size(); std::size_t capacity = memory_detail::inplace(t) ? 0 : t.capacity(); - CMemoryUsage::SMemoryUsage usage(componentName + "::" + typeid(T).name(), capacity * sizeof(T), (capacity - items) * sizeof(T)); + CMemoryUsage::SMemoryUsage usage(componentName + "::" + typeid(T).name(), + capacity * sizeof(T), + (capacity - items) * sizeof(T)); CMemoryUsage::TMemoryUsagePtr ptr = mem->addChild(); ptr->setName(usage); @@ -746,11 +801,15 @@ class CORE_EXPORT CMemoryDebug : private CNonInstantiatable { //! Overload for boost::unordered_map. template - static void dynamicSize(const char* name, const boost::unordered_map& t, CMemoryUsage::TMemoryUsagePtr mem) { + static void dynamicSize(const char* name, + const boost::unordered_map& t, + CMemoryUsage::TMemoryUsagePtr mem) { std::string componentName(name); componentName += "_umap"; - std::size_t mapSize = (t.bucket_count() * sizeof(std::size_t) * 2) + (t.size() * (sizeof(K) + sizeof(V) + 2 * sizeof(std::size_t))); + std::size_t mapSize = + (t.bucket_count() * sizeof(std::size_t) * 2) + + (t.size() * (sizeof(K) + sizeof(V) + 2 * sizeof(std::size_t))); CMemoryUsage::SMemoryUsage usage(componentName, mapSize); CMemoryUsage::TMemoryUsagePtr ptr = mem->addChild(); @@ -764,13 +823,16 @@ class CORE_EXPORT CMemoryDebug : private CNonInstantiatable { //! Overload for std::map. template - static void dynamicSize(const char* name, const std::map& t, CMemoryUsage::TMemoryUsagePtr mem) { + static void dynamicSize(const char* name, + const std::map& t, + CMemoryUsage::TMemoryUsagePtr mem) { // std::map appears to use 4 pointers/size_ts per tree node // (colour, parent, left and right child pointers) std::string componentName(name); componentName += "_map"; - std::size_t mapSize = (memory_detail::EXTRA_NODES + t.size()) * (sizeof(K) + sizeof(V) + 4 * sizeof(std::size_t)); + std::size_t mapSize = (memory_detail::EXTRA_NODES + t.size()) * + (sizeof(K) + sizeof(V) + 4 * sizeof(std::size_t)); CMemoryUsage::SMemoryUsage usage(componentName, mapSize); CMemoryUsage::TMemoryUsagePtr ptr = mem->addChild(); @@ -784,16 +846,19 @@ class CORE_EXPORT CMemoryDebug : private CNonInstantiatable { //! Overload for boost::container::flat_map. template - static void dynamicSize(const char* name, const boost::container::flat_map& t, CMemoryUsage::TMemoryUsagePtr mem) { + static void dynamicSize(const char* name, + const boost::container::flat_map& t, + CMemoryUsage::TMemoryUsagePtr mem) { std::string componentName(name); componentName += "_fmap"; std::size_t items = t.size(); std::size_t capacity = t.capacity(); - CMemoryUsage::SMemoryUsage usage(componentName + "::" + typeid(std::pair).name(), - capacity * sizeof(std::pair), - (capacity - items) * sizeof(std::pair)); + CMemoryUsage::SMemoryUsage usage( + componentName + "::" + typeid(std::pair).name(), + capacity * sizeof(std::pair), + (capacity - items) * sizeof(std::pair)); CMemoryUsage::TMemoryUsagePtr ptr = mem->addChild(); ptr->setName(usage); @@ -805,11 +870,14 @@ class CORE_EXPORT CMemoryDebug : private CNonInstantiatable { //! Overload for boost::unordered_set. template - static void dynamicSize(const char* name, const boost::unordered_set& t, CMemoryUsage::TMemoryUsagePtr mem) { + static void dynamicSize(const char* name, + const boost::unordered_set& t, + CMemoryUsage::TMemoryUsagePtr mem) { std::string componentName(name); componentName += "_uset"; - std::size_t setSize = (t.bucket_count() * sizeof(std::size_t) * 2) + (t.size() * (sizeof(T) + 2 * sizeof(std::size_t))); + std::size_t setSize = (t.bucket_count() * sizeof(std::size_t) * 2) + + (t.size() * (sizeof(T) + 2 * sizeof(std::size_t))); CMemoryUsage::SMemoryUsage usage(componentName, setSize); CMemoryUsage::TMemoryUsagePtr ptr = mem->addChild(); @@ -822,13 +890,16 @@ class CORE_EXPORT CMemoryDebug : private CNonInstantiatable { //! Overload for std::set. template - static void dynamicSize(const char* name, const std::set& t, CMemoryUsage::TMemoryUsagePtr mem) { + static void dynamicSize(const char* name, + const std::set& t, + CMemoryUsage::TMemoryUsagePtr mem) { // std::set appears to use 4 pointers/size_ts per tree node // (colour, parent, left and right child pointers) std::string componentName(name); componentName += "_set"; - std::size_t setSize = (memory_detail::EXTRA_NODES + t.size()) * (sizeof(T) + 4 * sizeof(std::size_t)); + std::size_t setSize = (memory_detail::EXTRA_NODES + t.size()) * + (sizeof(T) + 4 * sizeof(std::size_t)); CMemoryUsage::SMemoryUsage usage(componentName, setSize); CMemoryUsage::TMemoryUsagePtr ptr = mem->addChild(); @@ -841,14 +912,18 @@ class CORE_EXPORT CMemoryDebug : private CNonInstantiatable { //! Overload for boost::container::flat_set. template - static void dynamicSize(const char* name, const boost::container::flat_set& t, CMemoryUsage::TMemoryUsagePtr mem) { + static void dynamicSize(const char* name, + const boost::container::flat_set& t, + CMemoryUsage::TMemoryUsagePtr mem) { std::string componentName(name); componentName += "_fset"; std::size_t items = t.size(); std::size_t capacity = t.capacity(); - CMemoryUsage::SMemoryUsage usage(componentName + "::" + typeid(T).name(), capacity * sizeof(T), (capacity - items) * sizeof(T)); + CMemoryUsage::SMemoryUsage usage(componentName + "::" + typeid(T).name(), + capacity * sizeof(T), + (capacity - items) * sizeof(T)); CMemoryUsage::TMemoryUsagePtr ptr = mem->addChild(); ptr->setName(usage); @@ -859,13 +934,16 @@ class CORE_EXPORT CMemoryDebug : private CNonInstantiatable { //! Overload for std::list. template - static void dynamicSize(const char* name, const std::list& t, CMemoryUsage::TMemoryUsagePtr mem) { + static void dynamicSize(const char* name, + const std::list& t, + CMemoryUsage::TMemoryUsagePtr mem) { // std::list appears to use 2 pointers per list node // (prev and next pointers). std::string componentName(name); componentName += "_list"; - std::size_t listSize = (memory_detail::EXTRA_NODES + t.size()) * (sizeof(T) + 4 * sizeof(std::size_t)); + std::size_t listSize = (memory_detail::EXTRA_NODES + t.size()) * + (sizeof(T) + 4 * sizeof(std::size_t)); CMemoryUsage::SMemoryUsage usage(componentName, listSize); CMemoryUsage::TMemoryUsagePtr ptr = mem->addChild(); @@ -878,7 +956,9 @@ class CORE_EXPORT CMemoryDebug : private CNonInstantiatable { //! Overload for std::deque. template - static void dynamicSize(const char* name, const std::deque& t, CMemoryUsage::TMemoryUsagePtr mem) { + static void dynamicSize(const char* name, + const std::deque& t, + CMemoryUsage::TMemoryUsagePtr mem) { // std::deque is a pointer to an array of pointers to pages std::string componentName(name); componentName += "_deque"; @@ -904,12 +984,16 @@ class CORE_EXPORT CMemoryDebug : private CNonInstantiatable { //! Overload for boost::circular_buffer. template - static void dynamicSize(const char* name, const boost::circular_buffer& t, CMemoryUsage::TMemoryUsagePtr mem) { + static void dynamicSize(const char* name, + const boost::circular_buffer& t, + CMemoryUsage::TMemoryUsagePtr mem) { std::string componentName(name); std::size_t items = t.size(); std::size_t capacity = t.capacity(); - CMemoryUsage::SMemoryUsage usage(componentName + "::" + typeid(T).name(), capacity * sizeof(T), (capacity - items) * sizeof(T)); + CMemoryUsage::SMemoryUsage usage(componentName + "::" + typeid(T).name(), + capacity * sizeof(T), + (capacity - items) * sizeof(T)); CMemoryUsage::TMemoryUsagePtr ptr = mem->addChild(); ptr->setName(usage); @@ -921,7 +1005,9 @@ class CORE_EXPORT CMemoryDebug : private CNonInstantiatable { //! Overload for boost::optional. template - static void dynamicSize(const char* name, const boost::optional& t, CMemoryUsage::TMemoryUsagePtr mem) { + static void dynamicSize(const char* name, + const boost::optional& t, + CMemoryUsage::TMemoryUsagePtr mem) { if (t) { dynamicSize(name, *t, mem); } @@ -929,13 +1015,17 @@ class CORE_EXPORT CMemoryDebug : private CNonInstantiatable { //! Overload for boost::reference_wrapper. template - static void dynamicSize(const char* /*name*/, const boost::reference_wrapper& /*t*/, CMemoryUsage::TMemoryUsagePtr /*mem*/) { + static void dynamicSize(const char* /*name*/, + const boost::reference_wrapper& /*t*/, + CMemoryUsage::TMemoryUsagePtr /*mem*/) { return; } //! Overload for std::pair. template - static void dynamicSize(const char* name, const std::pair& t, CMemoryUsage::TMemoryUsagePtr mem) { + static void dynamicSize(const char* name, + const std::pair& t, + CMemoryUsage::TMemoryUsagePtr mem) { std::string keyName(name); keyName += "_key"; std::string valueName(name); diff --git a/include/core/CMemoryUsage.h b/include/core/CMemoryUsage.h index b62a85f9ba..3dbcbebe7a 100644 --- a/include/core/CMemoryUsage.h +++ b/include/core/CMemoryUsage.h @@ -35,9 +35,11 @@ class CORE_EXPORT CMemoryUsage { //! A collection of data to record memory usage information for //! arbitrary components struct CORE_EXPORT SMemoryUsage { - SMemoryUsage(const std::string& name, std::size_t memory) : s_Name(name), s_Memory(memory), s_Unused(0) {} + SMemoryUsage(const std::string& name, std::size_t memory) + : s_Name(name), s_Memory(memory), s_Unused(0) {} - SMemoryUsage(const std::string& name, std::size_t memory, std::size_t unused) : s_Name(name), s_Memory(memory), s_Unused(unused) {} + SMemoryUsage(const std::string& name, std::size_t memory, std::size_t unused) + : s_Name(name), s_Memory(memory), s_Unused(unused) {} //! Name of the component std::string s_Name; diff --git a/include/core/CMessageBuffer.h b/include/core/CMessageBuffer.h index 68cb73adbd..a8792f09b8 100644 --- a/include/core/CMessageBuffer.h +++ b/include/core/CMessageBuffer.h @@ -32,7 +32,8 @@ namespace core { template class CMessageBuffer { public: - CMessageBuffer(BUFFER& buffer) : m_Thread(*this), m_Condition(m_Mutex), m_Buffer(buffer) {} + CMessageBuffer(BUFFER& buffer) + : m_Thread(*this), m_Condition(m_Mutex), m_Buffer(buffer) {} virtual ~CMessageBuffer() {} @@ -100,7 +101,8 @@ class CMessageBuffer { TMessageVec data; m_MessageBuffer.m_Buffer.flushAllMessages(data); - m_MessageBuffer.m_Buffer.processMessages(data, std::numeric_limits::max()); + m_MessageBuffer.m_Buffer.processMessages( + data, std::numeric_limits::max()); m_IsRunning = false; diff --git a/include/core/CMessageQueue.h b/include/core/CMessageQueue.h index a889cbbfcc..efa3b550b8 100644 --- a/include/core/CMessageQueue.h +++ b/include/core/CMessageQueue.h @@ -62,10 +62,9 @@ class CMessageQueue { using TShutdownFunc = std::function; public: - CMessageQueue(RECEIVER& receiver, const TShutdownFunc& shutdownFunc = &CMessageQueue::defaultShutdownFunc) - : m_Thread(*this), - m_Condition(m_Mutex), - m_Receiver(receiver), + CMessageQueue(RECEIVER& receiver, + const TShutdownFunc& shutdownFunc = &CMessageQueue::defaultShutdownFunc) + : m_Thread(*this), m_Condition(m_Mutex), m_Receiver(receiver), m_ShutdownFunc(shutdownFunc), // If timing is enabled, we need a buffer one bigger than the // number of times to average over. If timing is disabled, the @@ -151,7 +150,8 @@ class CMessageQueue { } if (m_Readings.front() > m_Readings.back()) { - LOG_ERROR(<< "Time to process last " << NUM_TO_TIME << " messages is negative (-" << (m_Readings.front() - m_Readings.back()) + LOG_ERROR(<< "Time to process last " << NUM_TO_TIME << " messages is negative (-" + << (m_Readings.front() - m_Readings.back()) << "ms). " "Maybe the system clock has been put back?"); return -1.0; @@ -168,7 +168,8 @@ class CMessageQueue { class CMessageQueueThread : public CThread { public: CMessageQueueThread(CMessageQueue& messageQueue) - : m_MessageQueue(messageQueue), m_ShuttingDown(false), m_IsRunning(false) {} + : m_MessageQueue(messageQueue), m_ShuttingDown(false), + m_IsRunning(false) {} //! The queue must have the mutex for this to be called bool isRunning() const { @@ -213,7 +214,8 @@ class CMessageQueue { // If the stop watch is running, update the history // of readings if (NUM_TO_TIME > 0 && m_MessageQueue.m_StopWatch.isRunning()) { - m_MessageQueue.m_Readings.push_back(m_MessageQueue.m_StopWatch.lap()); + m_MessageQueue.m_Readings.push_back( + m_MessageQueue.m_StopWatch.lap()); } } diff --git a/include/core/CPersistUtils.h b/include/core/CPersistUtils.h index 175d156234..89a7e787b2 100644 --- a/include/core/CPersistUtils.h +++ b/include/core/CPersistUtils.h @@ -220,28 +220,42 @@ class CORE_EXPORT CPersistUtils { //! using CStringUtils functions. class CORE_EXPORT CBuiltinToString { public: - CBuiltinToString(const char pairDelimiter) : m_PairDelimiter(pairDelimiter) {} + CBuiltinToString(const char pairDelimiter) + : m_PairDelimiter(pairDelimiter) {} - std::string operator()(double value) const { return CStringUtils::typeToStringPrecise(value, CIEEE754::E_SinglePrecision); } + std::string operator()(double value) const { + return CStringUtils::typeToStringPrecise(value, CIEEE754::E_SinglePrecision); + } template std::string operator()(T value) const { return CStringUtils::typeToString(value); } - std::string operator()(int8_t value) const { return CStringUtils::typeToString(static_cast(value)); } + std::string operator()(int8_t value) const { + return CStringUtils::typeToString(static_cast(value)); + } - std::string operator()(uint8_t value) const { return CStringUtils::typeToString(static_cast(value)); } + std::string operator()(uint8_t value) const { + return CStringUtils::typeToString(static_cast(value)); + } - std::string operator()(int16_t value) const { return CStringUtils::typeToString(static_cast(value)); } + std::string operator()(int16_t value) const { + return CStringUtils::typeToString(static_cast(value)); + } - std::string operator()(uint16_t value) const { return CStringUtils::typeToString(static_cast(value)); } + std::string operator()(uint16_t value) const { + return CStringUtils::typeToString(static_cast(value)); + } - std::string operator()(CFloatStorage value) const { return value.toString(); } + std::string operator()(CFloatStorage value) const { + return value.toString(); + } template std::string operator()(const std::pair& value) const { - return this->operator()(value.first) + m_PairDelimiter + this->operator()(value.second); + return this->operator()(value.first) + m_PairDelimiter + + this->operator()(value.second); } private: @@ -252,7 +266,10 @@ class CORE_EXPORT CPersistUtils { //! using CStringUtils functions. class CORE_EXPORT CBuiltinFromString { public: - CBuiltinFromString(const char pairDelimiter) : m_PairDelimiter(pairDelimiter) { m_Token.reserve(15); } + CBuiltinFromString(const char pairDelimiter) + : m_PairDelimiter(pairDelimiter) { + m_Token.reserve(15); + } template bool operator()(const std::string& token, T& value) const { @@ -295,7 +312,9 @@ class CORE_EXPORT CPersistUtils { return false; } - bool operator()(const std::string& token, CFloatStorage& value) const { return value.fromString(token); } + bool operator()(const std::string& token, CFloatStorage& value) const { + return value.fromString(token); + } template bool operator()(const std::string& token, std::pair& value) const { @@ -324,13 +343,16 @@ class CORE_EXPORT CPersistUtils { //! Entry method for objects being persisted template - static bool persist(const std::string& tag, const T& collection, CStatePersistInserter& inserter) { + static bool + persist(const std::string& tag, const T& collection, CStatePersistInserter& inserter) { return persist_utils_detail::persist(tag, collection, inserter); } //! Wrapper for containers of built in types. template - static std::string toString(const CONTAINER& collection, const char delimiter = DELIMITER, const char pairDelimiter = PAIR_DELIMITER) { + static std::string toString(const CONTAINER& collection, + const char delimiter = DELIMITER, + const char pairDelimiter = PAIR_DELIMITER) { CBuiltinToString f(pairDelimiter); return toString(collection, f, delimiter); } @@ -344,7 +366,9 @@ class CORE_EXPORT CPersistUtils { //! elements. //! \note This should use RVO so just return the string. template - static std::string toString(const CONTAINER& collection, const F& stringFunc, const char delimiter = DELIMITER) { + static std::string toString(const CONTAINER& collection, + const F& stringFunc, + const char delimiter = DELIMITER) { if (collection.empty()) { return std::string(); } @@ -355,7 +379,10 @@ class CORE_EXPORT CPersistUtils { //! Wrapper for containers of built in types. template - static std::string toString(ITR& begin, ITR& end, const char delimiter = DELIMITER, const char pairDelimiter = PAIR_DELIMITER) { + static std::string toString(ITR& begin, + ITR& end, + const char delimiter = DELIMITER, + const char pairDelimiter = PAIR_DELIMITER) { CBuiltinToString f(pairDelimiter); return toString(begin, end, f, delimiter); } @@ -371,7 +398,8 @@ class CORE_EXPORT CPersistUtils { //! elements. //! \note This should use RVO so just return the string. template - static std::string toString(ITR& begin, ITR& end, const F& stringFunc, const char delimiter = DELIMITER) { + static std::string + toString(ITR& begin, ITR& end, const F& stringFunc, const char delimiter = DELIMITER) { std::string result = stringFunc(*begin++); for (/**/; begin != end; ++begin) { result += delimiter; @@ -403,8 +431,11 @@ class CORE_EXPORT CPersistUtils { //! Wrapper for ranges of built in types. template - static bool - fromString(const std::string& state, ITR begin, ITR end, const char delimiter = DELIMITER, const char pairDelimiter = PAIR_DELIMITER) { + static bool fromString(const std::string& state, + ITR begin, + ITR end, + const char delimiter = DELIMITER, + const char pairDelimiter = PAIR_DELIMITER) { CBuiltinFromString f(pairDelimiter); return fromString(state, f, begin, end, delimiter); } @@ -447,7 +478,8 @@ class CORE_EXPORT CPersistUtils { collection.reserve(std::count(state.begin(), state.end(), delimiter) + 1); - if (fromString(state, delimiter, stringFunc, std::back_inserter(collection)) == false) { + if (fromString(state, delimiter, stringFunc, + std::back_inserter(collection)) == false) { collection.clear(); return false; } @@ -473,8 +505,10 @@ class CORE_EXPORT CPersistUtils { //! bool (const std::string &, T &) //! \endcode template - static bool - fromString(const std::string& state, const F& stringFunc, boost::array& collection, const char delimiter = DELIMITER) { + static bool fromString(const std::string& state, + const F& stringFunc, + boost::array& collection, + const char delimiter = DELIMITER) { if (state.empty()) { LOG_ERROR(<< "Unexpected number of elements 0" << ", expected " << N); @@ -529,7 +563,8 @@ class CORE_EXPORT CPersistUtils { return true; } - if (fromString(state, delimiter, stringFunc, std::inserter(collection, collection.end())) == false) { + if (fromString(state, delimiter, stringFunc, + std::inserter(collection, collection.end())) == false) { collection.clear(); return false; } @@ -558,7 +593,11 @@ class CORE_EXPORT CPersistUtils { //! bool (const std::string &, CONTAINER::value_type &) //! \endcode template - static bool fromString(const std::string& state, const F& stringFunc, ITR begin, ITR end, const char delimiter = DELIMITER) { + static bool fromString(const std::string& state, + const F& stringFunc, + ITR begin, + ITR end, + const char delimiter = DELIMITER) { if (state.empty()) { return true; @@ -571,13 +610,15 @@ class CORE_EXPORT CPersistUtils { return false; } - return fromString::value_type>(state, delimiter, stringFunc, begin); + return fromString::value_type>( + state, delimiter, stringFunc, begin); } private: //! Restores to an insertion iterator. template - static bool fromString(const std::string& state, const char delimiter, const F& stringFunc, ITR inserter) { + static bool + fromString(const std::string& state, const char delimiter, const F& stringFunc, ITR inserter) { std::size_t delimPos = state.find(delimiter); if (delimPos == std::string::npos) { T element; @@ -621,7 +662,8 @@ class CORE_EXPORT CPersistUtils { T element; if (stringFunc(token, element) == false) { - LOG_ERROR(<< "Invalid element " << i << " : element " << token << " in " << state); + LOG_ERROR(<< "Invalid element " << i << " : element " << token + << " in " << state); return false; } *inserter = element; @@ -648,7 +690,9 @@ class CPersisterImpl { } template - static void dispatch(const std::string& tag, const std::pair& t, CStatePersistInserter& inserter) { + static void dispatch(const std::string& tag, + const std::pair& t, + CStatePersistInserter& inserter) { inserter.insertLevel(tag, boost::bind(&newLevel, boost::cref(t), _1)); } @@ -667,17 +711,18 @@ template<> class CPersisterImpl { public: template - static void dispatch(const std::string& tag, const T& container, CStatePersistInserter& inserter) { - doInsert(tag, - container, - inserter, + static void + dispatch(const std::string& tag, const T& container, CStatePersistInserter& inserter) { + doInsert(tag, container, inserter, boost::integral_constant::value>(), boost::false_type()); } //! Specialisation for boost::unordered_set which orders values. template - static void dispatch(const std::string& tag, const boost::unordered_set& container, CStatePersistInserter& inserter) { + static void dispatch(const std::string& tag, + const boost::unordered_set& container, + CStatePersistInserter& inserter) { using TVec = typename std::vector; using TCItr = typename boost::unordered_set::const_iterator; using TCItrVec = typename std::vector; @@ -694,14 +739,17 @@ class CPersisterImpl { } // Sort the values to ensure consistent persist state. - std::sort(iterators.begin(), iterators.end(), [](TCItr lhs, TCItr rhs) { return *lhs < *rhs; }); + std::sort(iterators.begin(), iterators.end(), + [](TCItr lhs, TCItr rhs) { return *lhs < *rhs; }); doInsert(tag, iterators, inserter, boost::false_type(), boost::true_type()); } } //! Specialisation for boost::unordered_map which orders values. template - static void dispatch(const std::string& tag, const boost::unordered_map& container, CStatePersistInserter& inserter) { + static void dispatch(const std::string& tag, + const boost::unordered_map& container, + CStatePersistInserter& inserter) { using TCItr = typename boost::unordered_map::const_iterator; using TCItrVec = typename std::vector; @@ -712,13 +760,16 @@ class CPersisterImpl { } // Sort the keys to ensure consistent persist state. - std::sort(iterators.begin(), iterators.end(), [](TCItr lhs, TCItr rhs) { return lhs->first < rhs->first; }); + std::sort(iterators.begin(), iterators.end(), + [](TCItr lhs, TCItr rhs) { return lhs->first < rhs->first; }); doInsert(tag, iterators, inserter, boost::false_type(), boost::true_type()); } //! Specialisation for std::string, which has iterators but doesn't need //! to be split up into individual characters - static void dispatch(const std::string& tag, const std::string& str, CStatePersistInserter& inserter) { + static void dispatch(const std::string& tag, + const std::string& str, + CStatePersistInserter& inserter) { inserter.insertValue(tag, str); } @@ -727,7 +778,11 @@ class CPersisterImpl { //! //! \note Type T is not an iterator template - static void doInsert(const std::string& tag, const T& container, CStatePersistInserter& inserter, boost::true_type, boost::false_type) { + static void doInsert(const std::string& tag, + const T& container, + CStatePersistInserter& inserter, + boost::true_type, + boost::false_type) { inserter.insertValue(tag, CPersistUtils::toString(container)); } @@ -736,10 +791,14 @@ class CPersisterImpl { //! //! \note Type T is not an iterator template - static void - doInsert(const std::string& tag, const T& container, CStatePersistInserter& inserter, boost::false_type, boost::false_type) { + static void doInsert(const std::string& tag, + const T& container, + CStatePersistInserter& inserter, + boost::false_type, + boost::false_type) { using TCItr = typename T::const_iterator; - inserter.insertLevel(tag, boost::bind(&newLevel, container.begin(), container.end(), container.size(), _1)); + inserter.insertLevel(tag, boost::bind(&newLevel, container.begin(), + container.end(), container.size(), _1)); } //! Handle the case for a non-built-in type, which will be added @@ -747,9 +806,14 @@ class CPersisterImpl { //! //! \note Type T is an iterator template - static void doInsert(const std::string& tag, const T& t, CStatePersistInserter& inserter, boost::false_type, boost::true_type) { + static void doInsert(const std::string& tag, + const T& t, + CStatePersistInserter& inserter, + boost::false_type, + boost::true_type) { using TCItr = boost::indirect_iterator; - inserter.insertLevel(tag, boost::bind(&newLevel, TCItr(t.begin()), TCItr(t.end()), t.size(), _1)); + inserter.insertLevel(tag, boost::bind(&newLevel, TCItr(t.begin()), + TCItr(t.end()), t.size(), _1)); } //! Dispatch a collection of items @@ -806,14 +870,16 @@ class CRestorerImpl { } template - static bool dispatch(const std::string& tag, std::pair& t, CStateRestoreTraverser& traverser) { + static bool + dispatch(const std::string& tag, std::pair& t, CStateRestoreTraverser& traverser) { bool ret = true; if (traverser.name() == tag) { if (!traverser.hasSubLevel()) { LOG_ERROR(<< "SubLevel mismatch in restore, at " << traverser.name()); return false; } - ret = traverser.traverseSubLevel(boost::bind(&newLevel, boost::ref(t), _1)); + ret = traverser.traverseSubLevel( + boost::bind(&newLevel, boost::ref(t), _1)); } return ret; } @@ -826,11 +892,13 @@ class CRestorerImpl { return false; } if (!restore(FIRST_TAG, t.first, traverser)) { - LOG_ERROR(<< "Restore error at " << traverser.name() << ": " << traverser.value()); + LOG_ERROR(<< "Restore error at " << traverser.name() << ": " + << traverser.value()); return false; } if (!traverser.next()) { - LOG_ERROR(<< "Restore error at " << traverser.name() << ": " << traverser.value()); + LOG_ERROR(<< "Restore error at " << traverser.name() << ": " + << traverser.value()); return false; } if (traverser.name() != SECOND_TAG) { @@ -838,7 +906,8 @@ class CRestorerImpl { return false; } if (!restore(SECOND_TAG, t.second, traverser)) { - LOG_ERROR(<< "Restore error at " << traverser.name() << ": " << traverser.value()); + LOG_ERROR(<< "Restore error at " << traverser.name() << ": " + << traverser.value()); return false; } return true; @@ -851,7 +920,9 @@ class CRestorerImpl { public: template static bool dispatch(const std::string& tag, T& container, CStateRestoreTraverser& traverser) { - return doTraverse(tag, container, traverser, boost::integral_constant::value>()); + return doTraverse( + tag, container, traverser, + boost::integral_constant::value>()); } //! Specialisation for std::string, which has iterators but doesn't @@ -908,7 +979,10 @@ class CRestorerImpl { private: template - static bool doTraverse(const std::string& tag, T& container, CStateRestoreTraverser& traverser, boost::true_type) { + static bool doTraverse(const std::string& tag, + T& container, + CStateRestoreTraverser& traverser, + boost::true_type) { bool ret = true; if (traverser.name() == tag) { ret = CPersistUtils::fromString(traverser.value(), container); @@ -917,14 +991,18 @@ class CRestorerImpl { } template - static bool doTraverse(const std::string& tag, T& container, CStateRestoreTraverser& traverser, boost::false_type) { + static bool doTraverse(const std::string& tag, + T& container, + CStateRestoreTraverser& traverser, + boost::false_type) { bool ret = true; if (traverser.name() == tag) { if (!traverser.hasSubLevel()) { LOG_ERROR(<< "SubLevel mismatch in restore, at " << traverser.name()); return false; } - ret = traverser.traverseSubLevel(boost::bind(SSubLevel(), boost::ref(container), _1)); + ret = traverser.traverseSubLevel( + boost::bind(SSubLevel(), boost::ref(container), _1)); } return ret; } diff --git a/include/core/CPolymorphicStackObjectCPtr.h b/include/core/CPolymorphicStackObjectCPtr.h index 3f8f0f1c35..b37c1106be 100644 --- a/include/core/CPolymorphicStackObjectCPtr.h +++ b/include/core/CPolymorphicStackObjectCPtr.h @@ -35,20 +35,21 @@ class CPolymorphicStackObjectCPtr { using TConstD4 = const typename boost::remove_const::type; public: - CPolymorphicStackObjectCPtr() : m_Storage(CNullPolymorphicStackObjectCPtr()) {} + CPolymorphicStackObjectCPtr() + : m_Storage(CNullPolymorphicStackObjectCPtr()) {} template explicit CPolymorphicStackObjectCPtr(const T& d) : m_Storage(d) {} template CPolymorphicStackObjectCPtr(const CPolymorphicStackObjectCPtr& other) { -#define MAYBE_SET(TYPE) \ - { \ - TYPE* d = other.template get(); \ - if (d) { \ - m_Storage = *d; \ - return; \ - } \ +#define MAYBE_SET(TYPE) \ + { \ + TYPE* d = other.template get(); \ + if (d) { \ + m_Storage = *d; \ + return; \ + } \ } MAYBE_SET(TConstD1); MAYBE_SET(TConstD2); @@ -59,21 +60,24 @@ class CPolymorphicStackObjectCPtr { } template - const CPolymorphicStackObjectCPtr& operator=(const CPolymorphicStackObjectCPtr& other) { + const CPolymorphicStackObjectCPtr& + operator=(const CPolymorphicStackObjectCPtr& other) { CPolymorphicStackObjectCPtr tmp(other); this->swap(tmp); return *this; } - operator bool() const { return boost::relaxed_get(&m_Storage) == nullptr; } + operator bool() const { + return boost::relaxed_get(&m_Storage) == nullptr; + } TConstBase* operator->() const { -#define MAYBE_RETURN(TYPE) \ - { \ - TYPE* result = boost::relaxed_get(&m_Storage); \ - if (result) { \ - return static_cast(result); \ - } \ +#define MAYBE_RETURN(TYPE) \ + { \ + TYPE* result = boost::relaxed_get(&m_Storage); \ + if (result) { \ + return static_cast(result); \ + } \ } MAYBE_RETURN(TConstD1); MAYBE_RETURN(TConstD2); @@ -91,7 +95,9 @@ class CPolymorphicStackObjectCPtr { } private: - void swap(CPolymorphicStackObjectCPtr& other) { m_Storage.swap(other.m_Storage); } + void swap(CPolymorphicStackObjectCPtr& other) { + m_Storage.swap(other.m_Storage); + } private: using TStorage = boost::variant; diff --git a/include/core/CRapidJsonConcurrentLineWriter.h b/include/core/CRapidJsonConcurrentLineWriter.h index 356f714127..407752f80b 100644 --- a/include/core/CRapidJsonConcurrentLineWriter.h +++ b/include/core/CRapidJsonConcurrentLineWriter.h @@ -22,7 +22,8 @@ namespace core { //! IMPLEMENTATION DECISIONS:\n //! hard code encoding and stream type //! -class CORE_EXPORT CRapidJsonConcurrentLineWriter : public CRapidJsonLineWriter { +class CORE_EXPORT CRapidJsonConcurrentLineWriter + : public CRapidJsonLineWriter { public: using TRapidJsonLineWriterBase = CRapidJsonLineWriter; diff --git a/include/core/CRapidJsonPoolAllocator.h b/include/core/CRapidJsonPoolAllocator.h index a5157c13ec..c0e093a8ad 100644 --- a/include/core/CRapidJsonPoolAllocator.h +++ b/include/core/CRapidJsonPoolAllocator.h @@ -37,7 +37,8 @@ class CRapidJsonPoolAllocator { using TDocumentPtrVec = std::vector; public: - CRapidJsonPoolAllocator() : m_JsonPoolAllocator(m_FixedBuffer, FIXED_BUFFER_SIZE) {} + CRapidJsonPoolAllocator() + : m_JsonPoolAllocator(m_FixedBuffer, FIXED_BUFFER_SIZE) {} ~CRapidJsonPoolAllocator() { this->clear(); } @@ -54,7 +55,9 @@ class CRapidJsonPoolAllocator { } //! \return const reference to the underlying memory pool allocator - const rapidjson::MemoryPoolAllocator<>& get() const { return m_JsonPoolAllocator; } + const rapidjson::MemoryPoolAllocator<>& get() const { + return m_JsonPoolAllocator; + } //! \return reference to the underlying memory pool allocator rapidjson::MemoryPoolAllocator<>& get() { return m_JsonPoolAllocator; } diff --git a/include/core/CRapidJsonWriterBase.h b/include/core/CRapidJsonWriterBase.h index 4b03f4a77c..5c8e4da741 100644 --- a/include/core/CRapidJsonWriterBase.h +++ b/include/core/CRapidJsonWriterBase.h @@ -60,7 +60,8 @@ template class JSON_WRITER = rapidjson::Writer> -class CRapidJsonWriterBase : public JSON_WRITER { +class CRapidJsonWriterBase + : public JSON_WRITER { public: using TTimeVec = std::vector; using TStrVec = std::vector; @@ -81,7 +82,8 @@ class CRapidJsonWriterBase : public JSON_WRITER; public: - using TRapidJsonWriterBase = JSON_WRITER; + using TRapidJsonWriterBase = + JSON_WRITER; //! Instances of this class may very well be long lived, potentially for the lifetime of the application. //! Over the course of that lifetime resources will accumulate in the underlying rapidjson memory @@ -156,7 +158,9 @@ class CRapidJsonWriterBase : public JSON_WRITER& getRawAllocator() const { return this->getAllocator()->get(); } + rapidjson::MemoryPoolAllocator<>& getRawAllocator() const { + return this->getAllocator()->get(); + } bool Double(double d) { // rewrite NaN and Infinity to 0 @@ -174,7 +178,9 @@ class CRapidJsonWriterBase : public JSON_WRITERgetRawAllocator()); } + void pushBack(const char* value, TValue& obj) const { + obj.PushBack(rapidjson::StringRef(value), this->getRawAllocator()); + } //! Push a generic rapidjson value object into a supplied rapidjson object value //! \p[in] value generic rapidjson value object @@ -197,7 +203,9 @@ class CRapidJsonWriterBase : public JSON_WRITER - void addDoubleArrayFieldToObj(const std::string& fieldName, const CONTAINER& values, TValue& obj) const { + void addDoubleArrayFieldToObj(const std::string& fieldName, + const CONTAINER& values, + TValue& obj) const { TValue array = this->makeArray(values.size()); bool considerLogging(true); @@ -226,7 +234,9 @@ class CRapidJsonWriterBase : public JSON_WRITERgetAllocator()->makeStorableDoc(); } + TDocumentWeakPtr makeStorableDoc() const { + return this->getAllocator()->makeStorableDoc(); + } //! Return a new rapidjson array TValue makeArray(size_t length = 0) const { @@ -256,7 +266,9 @@ class CRapidJsonWriterBase : public JSON_WRITERgetRawAllocator()); obj->AddMember(rapidjson::StringRef(name), v, this->getRawAllocator()); return obj; @@ -266,8 +278,11 @@ class CRapidJsonWriterBase : public JSON_WRITERAddMember(rapidjson::StringRef(name), rapidjson::StringRef(value), this->getRawAllocator()); + TValuePtr addMemberRef(const std::string& name, + const std::string& value, + const TValuePtr& obj) const { + obj->AddMember(rapidjson::StringRef(name), rapidjson::StringRef(value), + this->getRawAllocator()); return obj; } @@ -293,12 +308,16 @@ class CRapidJsonWriterBase : public JSON_WRITERgetRawAllocator()); + obj.AddMember(rapidjson::StringRef(name), rapidjson::StringRef(value), + this->getRawAllocator()); } //! Adds a copy of a string field with the name fieldname to an object. //! \p fieldName must outlive \p obj or memory corruption will occur. - void addStringFieldCopyToObj(const std::string& fieldName, const std::string& value, TValue& obj, bool allowEmptyString = false) const { + void addStringFieldCopyToObj(const std::string& fieldName, + const std::string& value, + TValue& obj, + bool allowEmptyString = false) const { // Don't add empty strings unless explicitly told to if (!allowEmptyString && value.empty()) { return; @@ -311,8 +330,10 @@ class CRapidJsonWriterBase : public JSON_WRITERaddArrayToObj(fieldName, values.begin(), values.end(), obj); } //! Add an array of strings to an object. //! \p fieldName must outlive \p obj or memory corruption will occur. - void addStringArrayFieldToObj(const std::string& fieldName, const TStrUSet& values, TValue& obj) const { + void addStringArrayFieldToObj(const std::string& fieldName, + const TStrUSet& values, + TValue& obj) const { using TStrCPtrVec = std::vector; TStrCPtrVec ordered; @@ -378,18 +404,19 @@ class CRapidJsonWriterBase : public JSON_WRITER>()); + std::sort(ordered.begin(), ordered.end(), + CFunctional::SDereference>()); addArrayToObj(fieldName, boost::iterators::make_indirect_iterator(ordered.begin()), - boost::iterators::make_indirect_iterator(ordered.end()), - obj); + boost::iterators::make_indirect_iterator(ordered.end()), obj); } //! Add an array of pair double, pair double double to an object. //! \p fieldName must outlive \p obj or memory corruption will occur. - void - addDoubleDoubleDoublePrPrArrayFieldToObj(const std::string& fieldName, const TDoubleDoubleDoublePrPrVec& values, TValue& obj) const { + void addDoubleDoubleDoublePrPrArrayFieldToObj(const std::string& fieldName, + const TDoubleDoubleDoublePrPrVec& values, + TValue& obj) const { TValue array = this->makeArray(values.size()); bool considerLogging(true); @@ -436,7 +463,9 @@ class CRapidJsonWriterBase : public JSON_WRITERmakeArray(values.size()); for (const auto& value : values) { @@ -459,7 +488,8 @@ class CRapidJsonWriterBase : public JSON_WRITER void checkArrayNumberFinite(NUMBER val, const std::string& fieldName, bool& considerLogging) const { if (considerLogging && !(boost::math::isfinite)(val)) { - LOG_ERROR(<< "Adding " << val << " to the \"" << fieldName << "\" array in a JSON document"); + LOG_ERROR(<< "Adding " << val << " to the \"" << fieldName + << "\" array in a JSON document"); // Don't return - make a best effort to add the value // Some writers derived from this class may defend themselves by converting to 0 considerLogging = false; @@ -467,7 +497,9 @@ class CRapidJsonWriterBase : public JSON_WRITERgetRawAllocator()}; } + TValue asRapidJsonValue(const std::string& value) const { + return {value, this->getRawAllocator()}; + } //! Convert the range [\p begin, \p end) to a RapidJSON array and add to \p obj. template diff --git a/include/core/CRapidXmlParser.h b/include/core/CRapidXmlParser.h index c67ee26bf8..f26edb7c94 100644 --- a/include/core/CRapidXmlParser.h +++ b/include/core/CRapidXmlParser.h @@ -96,15 +96,18 @@ class CORE_EXPORT CRapidXmlParser : public CXmlParserIntf { virtual bool toNodeHierarchy(CXmlNodeWithChildren::TXmlNodeWithChildrenP& rootNodePtr) const; //! As above, but use a pool to avoid XML node memory allocations where possible - virtual bool toNodeHierarchy(CXmlNodeWithChildrenPool& pool, CXmlNodeWithChildren::TXmlNodeWithChildrenP& rootNodePtr) const; + virtual bool toNodeHierarchy(CXmlNodeWithChildrenPool& pool, + CXmlNodeWithChildren::TXmlNodeWithChildrenP& rootNodePtr) const; //! As above, but use a string cache to avoid string representation memory //! allocations where possible - virtual bool toNodeHierarchy(CStringCache& cache, CXmlNodeWithChildren::TXmlNodeWithChildrenP& rootNodePtr) const; + virtual bool toNodeHierarchy(CStringCache& cache, + CXmlNodeWithChildren::TXmlNodeWithChildrenP& rootNodePtr) const; //! As above, but use both a node pool and a string cache - virtual bool - toNodeHierarchy(CXmlNodeWithChildrenPool& pool, CStringCache& cache, CXmlNodeWithChildren::TXmlNodeWithChildrenP& rootNodePtr) const; + virtual bool toNodeHierarchy(CXmlNodeWithChildrenPool& pool, + CStringCache& cache, + CXmlNodeWithChildren::TXmlNodeWithChildrenP& rootNodePtr) const; //! Functions for navigating an XML document without converting it to a //! node hierarchy @@ -135,8 +138,10 @@ class CORE_EXPORT CRapidXmlParser : public CXmlParserIntf { CXmlNodeWithChildren::TXmlNodeWithChildrenP& nodePtr) const; //! Called recursively by the convert() method - static void - convertChildren(const CXmlNodeWithChildren& current, TCharRapidXmlDocument& doc, TCharRapidXmlNode& xmlNode, size_t& approxLen); + static void convertChildren(const CXmlNodeWithChildren& current, + TCharRapidXmlDocument& doc, + TCharRapidXmlNode& xmlNode, + size_t& approxLen); //! Parse a buffer with some specified RapidXml flags set //! without modifying the contents of the buffer diff --git a/include/core/CScopedRapidJsonPoolAllocator.h b/include/core/CScopedRapidJsonPoolAllocator.h index 8fee1903e7..64832743a7 100644 --- a/include/core/CScopedRapidJsonPoolAllocator.h +++ b/include/core/CScopedRapidJsonPoolAllocator.h @@ -26,7 +26,10 @@ class CScopedRapidJsonPoolAllocator { public: //! \p allocatorName Unique identifier for the allocator //! \p jsonOutputWriter JSON output writer that will make use of the allocator - CScopedRapidJsonPoolAllocator(const std::string& allocatorName, T& writer) : m_Writer(writer) { m_Writer.pushAllocator(allocatorName); } + CScopedRapidJsonPoolAllocator(const std::string& allocatorName, T& writer) + : m_Writer(writer) { + m_Writer.pushAllocator(allocatorName); + } ~CScopedRapidJsonPoolAllocator() { m_Writer.popAllocator(); } diff --git a/include/core/CSmallVector.h b/include/core/CSmallVector.h index 42bf68a4db..f96e0f76e3 100644 --- a/include/core/CSmallVector.h +++ b/include/core/CSmallVector.h @@ -96,17 +96,22 @@ class CSmallVector : public boost::container::small_vector { CSmallVector() {} CSmallVector(const CSmallVector& other) : TBase(other) {} CSmallVector(CSmallVector&& other) : TBase(std::move(other.baseRef())) {} - explicit CSmallVector(size_type n, const value_type& val = value_type()) : TBase(n, val) {} - CSmallVector(std::initializer_list list) : TBase(list.begin(), list.end()) {} + explicit CSmallVector(size_type n, const value_type& val = value_type()) + : TBase(n, val) {} + CSmallVector(std::initializer_list list) + : TBase(list.begin(), list.end()) {} template CSmallVector(ITR first, ITR last) : TBase(first, last) {} template - CSmallVector(const CSmallVector& other) : TBase(other.begin(), other.end()) {} + CSmallVector(const CSmallVector& other) + : TBase(other.begin(), other.end()) {} template - CSmallVector(std::initializer_list list) : TBase(list.begin(), list.end()) {} + CSmallVector(std::initializer_list list) + : TBase(list.begin(), list.end()) {} // Extend to construct implicitly from a vector. template - CSmallVector(const std::vector& other) : TBase(other.begin(), other.end()) {} + CSmallVector(const std::vector& other) + : TBase(other.begin(), other.end()) {} CSmallVector& operator=(CSmallVector&& rhs) { this->baseRef() = std::move(rhs.baseRef()); @@ -118,7 +123,9 @@ class CSmallVector : public boost::container::small_vector { } // Extend to convert implicitly to a vector. - inline operator std::vector() const { return std::vector(this->begin(), this->end()); } + inline operator std::vector() const { + return std::vector(this->begin(), this->end()); + } // Non-standard plus assign for the case that T has operator+=. const CSmallVector& operator+=(const CSmallVectorBase& rhs) { diff --git a/include/core/CStateCompressor.h b/include/core/CStateCompressor.h index d83158bb67..3a1ece3a51 100644 --- a/include/core/CStateCompressor.h +++ b/include/core/CStateCompressor.h @@ -55,7 +55,8 @@ class CORE_EXPORT CStateCompressor : public CDataAdder { using char_type = char; //! Inform the filtering_stream owning object what this is capable of - struct category : public boost::iostreams::sink_tag, public boost::iostreams::closable_tag {}; + struct category : public boost::iostreams::sink_tag, + public boost::iostreams::closable_tag {}; public: //! Constructor diff --git a/include/core/CStateMachine.h b/include/core/CStateMachine.h index 2a8a484ea9..6575a1f5a2 100644 --- a/include/core/CStateMachine.h +++ b/include/core/CStateMachine.h @@ -77,7 +77,10 @@ class CORE_EXPORT CStateMachine { //! //! \note This can fail if the supplied data are inconsistent in //! which case the state is set to bad. - static CStateMachine create(const TStrVec& alphabet, const TStrVec& states, const TSizeVecVec& transitionFunction, std::size_t state); + static CStateMachine create(const TStrVec& alphabet, + const TStrVec& states, + const TSizeVecVec& transitionFunction, + std::size_t state); //! \name Persistence //@{ @@ -128,8 +131,11 @@ class CORE_EXPORT CStateMachine { }; //! \brief A lightweight object to lookup a single machine. - struct CORE_EXPORT SLookupMachine : boost::equality_comparable2 { - SLookupMachine(const TStrVec& alphabet, const TStrVec& states, const TSizeVecVec& transitionFunction); + struct CORE_EXPORT SLookupMachine + : boost::equality_comparable2 { + SLookupMachine(const TStrVec& alphabet, + const TStrVec& states, + const TSizeVecVec& transitionFunction); //! Test if two machines are equal. bool operator==(const SMachine& rhs) const; diff --git a/include/core/CStringSimilarityTester.h b/include/core/CStringSimilarityTester.h index b4f9c226b7..b8c5404b94 100644 --- a/include/core/CStringSimilarityTester.h +++ b/include/core/CStringSimilarityTester.h @@ -80,8 +80,11 @@ class CORE_EXPORT CStringSimilarityTester : private CNonCopyable { //! Calculate how similar two strings are in the case where //! we already know their individual compressed lengths - bool - similarity(const std::string& first, size_t firstCompLength, const std::string& second, size_t secondCompLength, double& result) const; + bool similarity(const std::string& first, + size_t firstCompLength, + const std::string& second, + size_t secondCompLength, + double& result) const; //! Remove those characters from a string that cause a provided //! predicate to return true (can be used with ctype.h functions @@ -91,7 +94,8 @@ class CORE_EXPORT CStringSimilarityTester : private CNonCopyable { std::string stripped; stripped.reserve(original.size()); - std::remove_copy_if(original.begin(), original.end(), std::back_inserter(stripped), excludePred); + std::remove_copy_if(original.begin(), original.end(), + std::back_inserter(stripped), excludePred); return stripped; } @@ -99,8 +103,12 @@ class CORE_EXPORT CStringSimilarityTester : private CNonCopyable { //! Calculate how similar two strings are, excluding //! certain characters template - bool similarityEx(const std::string& first, const std::string& second, PREDICATE excludePred, double& result) const { - return this->similarity(this->strippedString(first, excludePred), this->strippedString(second, excludePred), result); + bool similarityEx(const std::string& first, + const std::string& second, + PREDICATE excludePred, + double& result) const { + return this->similarity(this->strippedString(first, excludePred), + this->strippedString(second, excludePred), result); } //! Find the length of the compressed version of a string - note @@ -110,8 +118,11 @@ class CORE_EXPORT CStringSimilarityTester : private CNonCopyable { //! Calculate the Levenshtein distance between two strings, //! excluding certain characters template - size_t levenshteinDistanceEx(const STRINGLIKE& first, const STRINGLIKE& second, PREDICATE excludePred) const { - return this->levenshteinDistance(this->strippedString(first, excludePred), this->strippedString(second, excludePred)); + size_t levenshteinDistanceEx(const STRINGLIKE& first, + const STRINGLIKE& second, + PREDICATE excludePred) const { + return this->levenshteinDistance(this->strippedString(first, excludePred), + this->strippedString(second, excludePred)); } //! Calculate the Levenshtein distance between two strings or @@ -229,7 +240,8 @@ class CORE_EXPORT CStringSimilarityTester : private CNonCopyable { // Populate the left column currentCol[0] = 0; for (size_t downMinusOne = 0; downMinusOne < secondLen; ++downMinusOne) { - currentCol[downMinusOne + 1] = currentCol[downMinusOne] + second[downMinusOne].second; + currentCol[downMinusOne + 1] = currentCol[downMinusOne] + + second[downMinusOne].second; } // Calculate the other entries in the matrix @@ -257,8 +269,11 @@ class CORE_EXPORT CStringSimilarityTester : private CNonCopyable { // OR // No extra cost in the case where the corresponding // elements are equal - size_t option3(prevCol[downMinusOne] + - ((first[acrossMinusOne].first == second[downMinusOne].first) ? 0 : std::max(firstCost, secondCost))); + size_t option3( + prevCol[downMinusOne] + + ((first[acrossMinusOne].first == second[downMinusOne].first) + ? 0 + : std::max(firstCost, secondCost))); // Take the cheapest option of the 3 currentCol[downMinusOne + 1] = std::min(std::min(option1, option2), option3); @@ -329,7 +344,8 @@ class CORE_EXPORT CStringSimilarityTester : private CNonCopyable { size_t option3(prevCol[downMinusOne]); // Take the cheapest option of the 3 - currentCol[downMinusOne + 1] = std::min(std::min(option1, option2), option3) + 1; + currentCol[downMinusOne + 1] = + std::min(std::min(option1, option2), option3) + 1; } } } @@ -406,7 +422,8 @@ class CORE_EXPORT CStringSimilarityTester : private CNonCopyable { int option3(matrix[row + 1][column - 1] + 1); int t(std::max(std::max(option1, option2), option3)); - int limit(std::min(static_cast(first.size()), static_cast(second.size()) - row)); + int limit(std::min(static_cast(first.size()), + static_cast(second.size()) - row)); while (t < limit && first[t] == second[t + row]) { ++t; } diff --git a/include/core/CStringUtils.h b/include/core/CStringUtils.h index 8ff005ee3a..3a65b5e73b 100644 --- a/include/core/CStringUtils.h +++ b/include/core/CStringUtils.h @@ -137,7 +137,8 @@ class CORE_EXPORT CStringUtils : private CNonInstantiatable { //! Find and replace the first occurrence (only) of a string within //! another string - static size_t replaceFirst(const std::string& from, const std::string& to, std::string& str); + static size_t + replaceFirst(const std::string& from, const std::string& to, std::string& str); //! Escape a specified set of characters in a string static void escape(char escape, const std::string& toEscape, std::string& str); @@ -148,13 +149,17 @@ class CORE_EXPORT CStringUtils : private CNonInstantiatable { //! Tokenise a std::string based on a delimiter. //! This does NOT behave like strtok - it matches //! the entire delimiter not just characters in it - static void tokenise(const std::string& delim, const std::string& str, TStrVec& tokens, std::string& remainder); + static void tokenise(const std::string& delim, + const std::string& str, + TStrVec& tokens, + std::string& remainder); //! Find the longest common substring of two strings static std::string longestCommonSubstr(const std::string& str1, const std::string& str2); //! Find the longest common subsequence of two strings - static std::string longestCommonSubsequence(const std::string& str1, const std::string& str2); + static std::string longestCommonSubsequence(const std::string& str1, + const std::string& str2); //! Convert between wide and narrow strings. //! There's currently no clever processing here for character set diff --git a/include/core/CThreadFarm.h b/include/core/CThreadFarm.h index 6ffecdb425..748c9c4b80 100644 --- a/include/core/CThreadFarm.h +++ b/include/core/CThreadFarm.h @@ -47,7 +47,8 @@ templatedispatchMsg(msg); ++m_Pending; } ++m_MessagesAdded; if (m_MessagesAdded % 1000 == 0) { - LOG_INFO(<< "Added message " << m_MessagesAdded << " to the " << m_Name << " thread farm; pending count now " << m_Pending); + LOG_INFO(<< "Added message " << m_MessagesAdded << " to the " << m_Name + << " thread farm; pending count now " << m_Pending); } pending = m_Pending; @@ -104,14 +108,17 @@ class CThreadFarm : private CNonCopyable { //! Initialise - create the receiving threads bool start() { if (m_Started == true) { - LOG_ERROR(<< "Can't start the " << m_Name << " thread farm because it's already running."); + LOG_ERROR(<< "Can't start the " << m_Name + << " thread farm because it's already running."); return false; } size_t count(1); - for (TMessageQueuePVecItr itr = m_MessageQueues.begin(); itr != m_MessageQueues.end(); ++itr) { + for (TMessageQueuePVecItr itr = m_MessageQueues.begin(); + itr != m_MessageQueues.end(); ++itr) { if ((*itr)->start() == false) { - LOG_ERROR(<< "Unable to start message queue " << count << " for the " << m_Name << " thread farm"); + LOG_ERROR(<< "Unable to start message queue " << count + << " for the " << m_Name << " thread farm"); return false; } @@ -131,13 +138,16 @@ class CThreadFarm : private CNonCopyable { } size_t count(1); - for (TMessageQueuePVecItr itr = m_MessageQueues.begin(); itr != m_MessageQueues.end(); ++itr) { + for (TMessageQueuePVecItr itr = m_MessageQueues.begin(); + itr != m_MessageQueues.end(); ++itr) { if ((*itr)->stop() == false) { - LOG_ERROR(<< "Unable to stop message queue " << count << " for the " << m_Name << " thread farm"); + LOG_ERROR(<< "Unable to stop message queue " << count + << " for the " << m_Name << " thread farm"); return false; } - LOG_DEBUG(<< "Stopped message queue " << count << " for the " << m_Name << " thread farm"); + LOG_DEBUG(<< "Stopped message queue " << count << " for the " + << m_Name << " thread farm"); ++count; } @@ -148,7 +158,8 @@ class CThreadFarm : private CNonCopyable { m_LastPrint = 0; if (m_Pending != 0) { - LOG_ERROR(<< "Inconsistency - " << m_Pending << " pending messages after stopping the " << m_Name << " thread farm"); + LOG_ERROR(<< "Inconsistency - " << m_Pending << " pending messages after stopping the " + << m_Name << " thread farm"); m_Pending = 0; } @@ -162,7 +173,8 @@ class CThreadFarm : private CNonCopyable { CScopedLock lock(m_Mutex); if (m_Pending <= 0) { - LOG_ERROR(<< "Inconsistency - result added with " << m_Pending << " pending messages in the " << m_Name << " thread farm"); + LOG_ERROR(<< "Inconsistency - result added with " << m_Pending + << " pending messages in the " << m_Name << " thread farm"); return; } @@ -172,7 +184,8 @@ class CThreadFarm : private CNonCopyable { // Log how much work is outstanding every so often if ((m_Pending % 10000) == 0 && m_Pending != m_LastPrint) { - LOG_INFO(<< "Pending count now " << m_Pending << " for the " << m_Name << " thread farm"); + LOG_INFO(<< "Pending count now " << m_Pending << " for the " + << m_Name << " thread farm"); m_LastPrint = m_Pending; } diff --git a/include/core/CThreadFarmReceiver.h b/include/core/CThreadFarmReceiver.h index 8c202c0fd5..3ca8027ae4 100644 --- a/include/core/CThreadFarmReceiver.h +++ b/include/core/CThreadFarmReceiver.h @@ -24,7 +24,8 @@ namespace core { template class CThreadFarmReceiver { public: - CThreadFarmReceiver(PROCESSOR& processor, THREADFARM& threadFarm) : m_Processor(processor), m_ThreadFarm(threadFarm) {} + CThreadFarmReceiver(PROCESSOR& processor, THREADFARM& threadFarm) + : m_Processor(processor), m_ThreadFarm(threadFarm) {} virtual ~CThreadFarmReceiver() {} diff --git a/include/core/CTicker.h b/include/core/CTicker.h index 567fa4c7cc..c35b8c5dd7 100644 --- a/include/core/CTicker.h +++ b/include/core/CTicker.h @@ -31,7 +31,9 @@ template class CTicker : public CThread { public: //! Timeout is in milliseconds - CTicker(uint32_t timeOut, RECEIVER& receiver) : m_Condition(m_Mutex), m_Quit(false), m_TimeOut(timeOut), m_Receiver(receiver) {} + CTicker(uint32_t timeOut, RECEIVER& receiver) + : m_Condition(m_Mutex), m_Quit(false), m_TimeOut(timeOut), + m_Receiver(receiver) {} //! Destructor will stop the ticker thread if it's already running ~CTicker() { diff --git a/include/core/CTimeUtils.h b/include/core/CTimeUtils.h index 0992d48da8..254c4f079c 100644 --- a/include/core/CTimeUtils.h +++ b/include/core/CTimeUtils.h @@ -54,10 +54,14 @@ class CORE_EXPORT CTimeUtils : private CNonInstantiatable { static int64_t toEpochMs(core_t::TTime t); //! strptime interface //! NOTE: the time returned here is a UTC value - static bool strptime(const std::string& format, const std::string& dateTime, core_t::TTime& preTime); + static bool strptime(const std::string& format, + const std::string& dateTime, + core_t::TTime& preTime); //! Same strptime interface as above, but doesn't print any error messages - static bool strptimeSilent(const std::string& format, const std::string& dateTime, core_t::TTime& preTime); + static bool strptimeSilent(const std::string& format, + const std::string& dateTime, + core_t::TTime& preTime); //! Is a given word a day of the week name, month name, or timezone //! abbreviation in the current locale? Input should be trimmed of diff --git a/include/core/CTriple.h b/include/core/CTriple.h index d11110c03e..70d7bccdc2 100644 --- a/include/core/CTriple.h +++ b/include/core/CTriple.h @@ -31,19 +31,24 @@ namespace core { //! meaning it can be used as a boost::unordered_map key provided the //! underlying types can be hashed using a boost::hasher. template -class CTriple : private boost::equality_comparable, boost::partially_ordered>> { +class CTriple + : private boost::equality_comparable, boost::partially_ordered>> { public: //! See CMemory. static bool dynamicSizeAlwaysZero() { - return memory_detail::SDynamicSizeAlwaysZero::value() && memory_detail::SDynamicSizeAlwaysZero::value() && + return memory_detail::SDynamicSizeAlwaysZero::value() && + memory_detail::SDynamicSizeAlwaysZero::value() && memory_detail::SDynamicSizeAlwaysZero::value(); } public: CTriple() : first(), second(), third() {} - CTriple(const T1& first_, const T2& second_, const T3& third_) : first(first_), second(second_), third(third_) {} + CTriple(const T1& first_, const T2& second_, const T3& third_) + : first(first_), second(second_), third(third_) {} - bool operator==(const CTriple& other) const { return first == other.first && second == other.second && third == other.third; } + bool operator==(const CTriple& other) const { + return first == other.first && second == other.second && third == other.third; + } bool operator<(const CTriple& other) const { if (first == other.first) { diff --git a/include/core/CVectorRange.h b/include/core/CVectorRange.h index d54838970c..da1056592a 100644 --- a/include/core/CVectorRange.h +++ b/include/core/CVectorRange.h @@ -40,7 +40,8 @@ struct SIteratorType { //! \brief Implements assignment. template struct SDoAssign { - static const CVectorRange& dispatch(CVectorRange& lhs, const CVectorRange& rhs) { + static const CVectorRange& dispatch(CVectorRange& lhs, + const CVectorRange& rhs) { if (rhs.base() != lhs.base()) { lhs.assign(rhs.begin(), rhs.end()); } else { @@ -52,7 +53,8 @@ struct SDoAssign { }; template struct SDoAssign { - static const CVectorRange& dispatch(CVectorRange& lhs, const CVectorRange& rhs) { + static const CVectorRange& + dispatch(CVectorRange& lhs, const CVectorRange& rhs) { CVectorRange tmp(*rhs.base(), rhs.a(), rhs.b()); lhs.swap(tmp); return lhs; @@ -76,10 +78,13 @@ class CVectorRange { using const_iterator = typename VECTOR::const_iterator; public: - CVectorRange(VECTOR& vector, size_type a, size_type b) : m_Vector(&vector), m_A(a), m_B(b) {} + CVectorRange(VECTOR& vector, size_type a, size_type b) + : m_Vector(&vector), m_A(a), m_B(b) {} //! Copy assignment. - const CVectorRange& operator=(const CVectorRange& other) { return vector_range_detail::SDoAssign::dispatch(*this, other); } + const CVectorRange& operator=(const CVectorRange& other) { + return vector_range_detail::SDoAssign::dispatch(*this, other); + } //! Assign from value. template @@ -122,7 +127,9 @@ class CVectorRange { //! Get writable element at \p pos. reference operator[](size_type pos) { return (*m_Vector)[m_A + pos]; } //! Get read-only element at \p pos. - const_reference operator[](size_type pos) const { return (*m_Vector)[m_A + pos]; } + const_reference operator[](size_type pos) const { + return (*m_Vector)[m_A + pos]; + } //! Get writable first element. reference front() { return this->operator[](0); } @@ -155,10 +162,14 @@ class CVectorRange { //! Get the maximum permitted size. size_type max_size() const { return m_Vector->max_size(); } //! Reserve space for \p size elements. - void reserve(size_type size) { m_Vector->reserve((size + m_Vector->size()) - this->size()); } + void reserve(size_type size) { + m_Vector->reserve((size + m_Vector->size()) - this->size()); + } //! Get the number of elements which can be held in the currently //! allocated storage. - size_type capacity() const { return (m_Vector->capacity() - m_Vector->size()) + this->size(); } + size_type capacity() const { + return (m_Vector->capacity() - m_Vector->size()) + this->size(); + } //! Clear the contents. void clear() { diff --git a/include/core/CWordDictionary.h b/include/core/CWordDictionary.h index 6e8e6bacc7..a38e0f4aa0 100644 --- a/include/core/CWordDictionary.h +++ b/include/core/CWordDictionary.h @@ -74,7 +74,9 @@ class CORE_EXPORT CWordDictionary : private CNonCopyable { template class CWeightAll { public: - size_t operator()(EPartOfSpeech partOfSpeech) { return (partOfSpeech == E_NotInDictionary) ? 0 : DEFAULT_EXTRA_WEIGHT; } + size_t operator()(EPartOfSpeech partOfSpeech) { + return (partOfSpeech == E_NotInDictionary) ? 0 : DEFAULT_EXTRA_WEIGHT; + } }; using TWeightAll2 = CWeightAll<2>; @@ -96,11 +98,7 @@ class CORE_EXPORT CWordDictionary : private CNonCopyable { //! Functor for weighting two types of dictionary word by certain //! amounts and all dictionary words by a different amount - template + template class CWeightTwoParts { public: size_t operator()(EPartOfSpeech partOfSpeech) { @@ -167,7 +165,8 @@ class CORE_EXPORT CWordDictionary : private CNonCopyable { //! Stores the dictionary words - using a multi-index even though //! there's only one index, because of its flexible key extractors. //! The key is the string, but hashed and compared ignoring case. - using TStrUMap = boost::unordered_map; + using TStrUMap = + boost::unordered_map; using TStrUMapCItr = TStrUMap::const_iterator; //! Our dictionary of words diff --git a/include/core/CWordExtractor.h b/include/core/CWordExtractor.h index f7648e7d97..4000156f8e 100644 --- a/include/core/CWordExtractor.h +++ b/include/core/CWordExtractor.h @@ -42,7 +42,9 @@ class CORE_EXPORT CWordExtractor { //! Extract words from a message, and return them in a space separated //! string BUT only include words that occur in groups of a specified //! size - static void extractWordsFromMessage(size_t minConsecutive, const std::string& message, std::string& messageWords); + static void extractWordsFromMessage(size_t minConsecutive, + const std::string& message, + std::string& messageWords); private: //! Don't allow objects to be instantiated diff --git a/include/core/CXmlNode.h b/include/core/CXmlNode.h index fe0b1896b0..5b8d0a0a9e 100644 --- a/include/core/CXmlNode.h +++ b/include/core/CXmlNode.h @@ -86,7 +86,8 @@ class CORE_EXPORT CXmlNode { //! type template bool attribute(const std::string& name, TYPE& value) const { - TStrStrPrVecCItr iter = std::find_if(m_Attributes.begin(), m_Attributes.end(), CFirstElementEquals(name)); + TStrStrPrVecCItr iter = std::find_if( + m_Attributes.begin(), m_Attributes.end(), CFirstElementEquals(name)); if (iter == m_Attributes.end()) { return false; } @@ -104,7 +105,8 @@ class CORE_EXPORT CXmlNode { //! convertible to a string using CStringUtils. template bool attribute(const std::string& name, const TYPE& value, bool overwrite) { - TStrStrPrVecItr iter = std::find_if(m_Attributes.begin(), m_Attributes.end(), CFirstElementEquals(name)); + TStrStrPrVecItr iter = std::find_if(m_Attributes.begin(), m_Attributes.end(), + CFirstElementEquals(name)); if (iter == m_Attributes.end()) { m_Attributes.push_back(TStrStrPr(name, CStringUtils::typeToString(value))); return true; diff --git a/include/core/CXmlNodeWithChildren.h b/include/core/CXmlNodeWithChildren.h index 1b76aaea79..e2913c8583 100644 --- a/include/core/CXmlNodeWithChildren.h +++ b/include/core/CXmlNodeWithChildren.h @@ -41,7 +41,9 @@ class CORE_EXPORT CXmlNodeWithChildren : public CXmlNode { CXmlNodeWithChildren(const std::string& name, const std::string& value); - CXmlNodeWithChildren(const std::string& name, const std::string& value, const CXmlNode::TStrStrMap& attributes); + CXmlNodeWithChildren(const std::string& name, + const std::string& value, + const CXmlNode::TStrStrMap& attributes); CXmlNodeWithChildren(const CXmlNodeWithChildren& arg); diff --git a/include/core/CXmlNodeWithChildrenPool.h b/include/core/CXmlNodeWithChildrenPool.h index afd3d7fb78..26ad5a1330 100644 --- a/include/core/CXmlNodeWithChildrenPool.h +++ b/include/core/CXmlNodeWithChildrenPool.h @@ -64,7 +64,8 @@ class CORE_EXPORT CXmlNodeWithChildrenPool { //! Allocate a new XML node with the provided name and value, specifying //! whether the double should be output with full precision (e.g. for //! persistence) or not (e.g. for human readability) - CXmlNodeWithChildren::TXmlNodeWithChildrenP newNode(const std::string& name, double value, CIEEE754::EPrecision precision); + CXmlNodeWithChildren::TXmlNodeWithChildrenP + newNode(const std::string& name, double value, CIEEE754::EPrecision precision); //! Recycle an XML node, plus any children it may have void recycle(CXmlNodeWithChildren::TXmlNodeWithChildrenP& nodePtr); diff --git a/include/core/CXmlParser.h b/include/core/CXmlParser.h index 7cfa3c4e2a..364feaaee2 100644 --- a/include/core/CXmlParser.h +++ b/include/core/CXmlParser.h @@ -162,7 +162,10 @@ class CORE_EXPORT CXmlParser : public CXmlParserIntf { //! As above, but with the ability to customise the number of spaces //! per indent (up to a maximum of 10). - static void convert(size_t indentSpaces, const std::string& root, const TStrStrMap& values, std::string& result); + static void convert(size_t indentSpaces, + const std::string& root, + const TStrStrMap& values, + std::string& result); //! Convert a map of name/value pairs to an XML //! parser. @@ -176,15 +179,18 @@ class CORE_EXPORT CXmlParser : public CXmlParserIntf { virtual bool toNodeHierarchy(CXmlNodeWithChildren::TXmlNodeWithChildrenP& rootNodePtr) const; //! As above, but use a pool to avoid XML node memory allocations where possible - virtual bool toNodeHierarchy(CXmlNodeWithChildrenPool& pool, CXmlNodeWithChildren::TXmlNodeWithChildrenP& rootNodePtr) const; + virtual bool toNodeHierarchy(CXmlNodeWithChildrenPool& pool, + CXmlNodeWithChildren::TXmlNodeWithChildrenP& rootNodePtr) const; //! As above, but use a string cache to avoid string representation memory //! allocations where possible - virtual bool toNodeHierarchy(CStringCache& cache, CXmlNodeWithChildren::TXmlNodeWithChildrenP& rootNodePtr) const; + virtual bool toNodeHierarchy(CStringCache& cache, + CXmlNodeWithChildren::TXmlNodeWithChildrenP& rootNodePtr) const; //! As above, but use both a node pool and a string cache - virtual bool - toNodeHierarchy(CXmlNodeWithChildrenPool& pool, CStringCache& cache, CXmlNodeWithChildren::TXmlNodeWithChildrenP& rootNodePtr) const; + virtual bool toNodeHierarchy(CXmlNodeWithChildrenPool& pool, + CStringCache& cache, + CXmlNodeWithChildren::TXmlNodeWithChildrenP& rootNodePtr) const; //! Functions for navigating an XML document without converting it to a //! node hierarchy diff --git a/include/core/CXmlParserIntf.h b/include/core/CXmlParserIntf.h index 8e4684cbe8..f54a1ab2b0 100644 --- a/include/core/CXmlParserIntf.h +++ b/include/core/CXmlParserIntf.h @@ -59,19 +59,25 @@ class CORE_EXPORT CXmlParserIntf : private CNonCopyable { //! This is much more efficient than making repeated calls to //! evalXPathExpression() to retrieve the entire contents of a parsed //! document. - virtual bool toNodeHierarchy(CXmlNodeWithChildren::TXmlNodeWithChildrenP& rootNodePtr) const = 0; + virtual bool + toNodeHierarchy(CXmlNodeWithChildren::TXmlNodeWithChildrenP& rootNodePtr) const = 0; //! As above, but use a pool to avoid XML node memory allocations where possible - virtual bool toNodeHierarchy(CXmlNodeWithChildrenPool& pool, CXmlNodeWithChildren::TXmlNodeWithChildrenP& rootNodePtr) const = 0; + virtual bool + toNodeHierarchy(CXmlNodeWithChildrenPool& pool, + CXmlNodeWithChildren::TXmlNodeWithChildrenP& rootNodePtr) const = 0; //! As above, but use a string cache to avoid string representation memory //! allocations where possible - virtual bool toNodeHierarchy(CStringCache& cache, CXmlNodeWithChildren::TXmlNodeWithChildrenP& rootNodePtr) const = 0; + virtual bool + toNodeHierarchy(CStringCache& cache, + CXmlNodeWithChildren::TXmlNodeWithChildrenP& rootNodePtr) const = 0; //! As above, but use both a node pool and a string cache - virtual bool toNodeHierarchy(CXmlNodeWithChildrenPool& pool, - CStringCache& cache, - CXmlNodeWithChildren::TXmlNodeWithChildrenP& rootNodePtr) const = 0; + virtual bool + toNodeHierarchy(CXmlNodeWithChildrenPool& pool, + CStringCache& cache, + CXmlNodeWithChildren::TXmlNodeWithChildrenP& rootNodePtr) const = 0; //! Functions for navigating an XML document without converting it to a //! node hierarchy diff --git a/include/core/LogMacros.h b/include/core/LogMacros.h index 0c3b95179f..5a5575b691 100644 --- a/include/core/LogMacros.h +++ b/include/core/LogMacros.h @@ -16,44 +16,52 @@ #endif #ifdef PROMOTE_LOGGING_TO_INFO // When this option is set all LOG_TRACE macros are promoted to LOG_INFO -#define LOG_TRACE(message) LOG4CXX_INFO(ml::core::CLogger::instance().logger(), "" message) +#define LOG_TRACE(message) \ + LOG4CXX_INFO(ml::core::CLogger::instance().logger(), "" message) #elif defined(EXCLUDE_TRACE_LOGGING) // When this option is set TRACE logging is expanded to nothing - this avoids // the overhead of checking the logging level at all for this low level logging #define LOG_TRACE(message) #else -#define LOG_TRACE(message) LOG4CXX_TRACE(ml::core::CLogger::instance().logger(), "" message) +#define LOG_TRACE(message) \ + LOG4CXX_TRACE(ml::core::CLogger::instance().logger(), "" message) #endif #ifdef LOG_DEBUG #undef LOG_DEBUG #endif #ifdef PROMOTE_LOGGING_TO_INFO // When this option is set all LOG_DEBUG macros are promoted to LOG_INFO -#define LOG_DEBUG(message) LOG4CXX_INFO(ml::core::CLogger::instance().logger(), "" message) +#define LOG_DEBUG(message) \ + LOG4CXX_INFO(ml::core::CLogger::instance().logger(), "" message) #else -#define LOG_DEBUG(message) LOG4CXX_DEBUG(ml::core::CLogger::instance().logger(), "" message) +#define LOG_DEBUG(message) \ + LOG4CXX_DEBUG(ml::core::CLogger::instance().logger(), "" message) #endif #ifdef LOG_INFO #undef LOG_INFO #endif -#define LOG_INFO(message) LOG4CXX_INFO(ml::core::CLogger::instance().logger(), "" message) +#define LOG_INFO(message) \ + LOG4CXX_INFO(ml::core::CLogger::instance().logger(), "" message) #ifdef LOG_WARN #undef LOG_WARN #endif -#define LOG_WARN(message) LOG4CXX_WARN(ml::core::CLogger::instance().logger(), "" message) +#define LOG_WARN(message) \ + LOG4CXX_WARN(ml::core::CLogger::instance().logger(), "" message) #ifdef LOG_ERROR #undef LOG_ERROR #endif -#define LOG_ERROR(message) LOG4CXX_ERROR(ml::core::CLogger::instance().logger(), "" message) +#define LOG_ERROR(message) \ + LOG4CXX_ERROR(ml::core::CLogger::instance().logger(), "" message) #ifdef LOG_FATAL #undef LOG_FATAL #endif -#define LOG_FATAL(message) LOG4CXX_FATAL(ml::core::CLogger::instance().logger(), "" message) +#define LOG_FATAL(message) \ + LOG4CXX_FATAL(ml::core::CLogger::instance().logger(), "" message) #ifdef LOG_ABORT #undef LOG_ABORT #endif -#define LOG_ABORT(message) \ - LOG4CXX_FATAL(ml::core::CLogger::instance().logger(), "" message); \ +#define LOG_ABORT(message) \ + LOG4CXX_FATAL(ml::core::CLogger::instance().logger(), "" message); \ ml::core::CLogger::fatal() // Log at a level specified at runtime as a string, for example @@ -62,4 +70,6 @@ #ifdef LOG_AT_LEVEL #undef LOG_AT_LEVEL #endif -#define LOG_AT_LEVEL(level, message) LOG4CXX_LOGLS(ml::core::CLogger::instance().logger(), log4cxx::Level::toLevel(level), "" message) +#define LOG_AT_LEVEL(level, message) \ + LOG4CXX_LOGLS(ml::core::CLogger::instance().logger(), \ + log4cxx::Level::toLevel(level), "" message) diff --git a/include/core/RestoreMacros.h b/include/core/RestoreMacros.h index 57d83e5416..427724dad7 100644 --- a/include/core/RestoreMacros.h +++ b/include/core/RestoreMacros.h @@ -10,50 +10,50 @@ namespace ml { namespace core { -#define RESTORE(tag, restore) \ - if (name == tag) { \ - if ((restore) == false) { \ - LOG_ERROR(<< "Failed to restore " #tag ", got " << traverser.value()); \ - return false; \ - } \ - continue; \ +#define RESTORE(tag, restore) \ + if (name == tag) { \ + if ((restore) == false) { \ + LOG_ERROR(<< "Failed to restore " #tag ", got " << traverser.value()); \ + return false; \ + } \ + continue; \ } -#define RESTORE_BUILT_IN(tag, target) \ - if (name == tag) { \ - if (core::CStringUtils::stringToType(traverser.value(), target) == false) { \ - LOG_ERROR(<< "Failed to restore " #tag ", got " << traverser.value()); \ - return false; \ - } \ - continue; \ +#define RESTORE_BUILT_IN(tag, target) \ + if (name == tag) { \ + if (core::CStringUtils::stringToType(traverser.value(), target) == false) { \ + LOG_ERROR(<< "Failed to restore " #tag ", got " << traverser.value()); \ + return false; \ + } \ + continue; \ } -#define RESTORE_BOOL(tag, target) \ - if (name == tag) { \ - int value; \ - if (core::CStringUtils::stringToType(traverser.value(), value) == false) { \ - LOG_ERROR(<< "Failed to restore " #tag ", got " << traverser.value()); \ - return false; \ - } \ - target = (value != 0); \ - continue; \ +#define RESTORE_BOOL(tag, target) \ + if (name == tag) { \ + int value; \ + if (core::CStringUtils::stringToType(traverser.value(), value) == false) { \ + LOG_ERROR(<< "Failed to restore " #tag ", got " << traverser.value()); \ + return false; \ + } \ + target = (value != 0); \ + continue; \ } -#define RESTORE_SETUP_TEARDOWN(tag, setup, restore, teardown) \ - if (name == tag) { \ - setup; \ - if ((restore) == false) { \ - LOG_ERROR(<< "Failed to restore " #tag ", got " << traverser.value()); \ - return false; \ - } \ - teardown; \ - continue; \ +#define RESTORE_SETUP_TEARDOWN(tag, setup, restore, teardown) \ + if (name == tag) { \ + setup; \ + if ((restore) == false) { \ + LOG_ERROR(<< "Failed to restore " #tag ", got " << traverser.value()); \ + return false; \ + } \ + teardown; \ + continue; \ } -#define RESTORE_NO_ERROR(tag, restore) \ - if (name == tag) { \ - restore; \ - continue; \ +#define RESTORE_NO_ERROR(tag, restore) \ + if (name == tag) { \ + restore; \ + continue; \ } } } diff --git a/include/maths/CAdaptiveBucketing.h b/include/maths/CAdaptiveBucketing.h index 764bc646fa..ab3ede13ec 100644 --- a/include/maths/CAdaptiveBucketing.h +++ b/include/maths/CAdaptiveBucketing.h @@ -82,7 +82,9 @@ class MATHS_EXPORT CAdaptiveBucketing { protected: CAdaptiveBucketing(double decayRate, double minimumBucketLength); //! Construct by traversing a state document. - CAdaptiveBucketing(double decayRate, double minimumBucketLength, core::CStateRestoreTraverser& traverser); + CAdaptiveBucketing(double decayRate, + double minimumBucketLength, + core::CStateRestoreTraverser& traverser); virtual ~CAdaptiveBucketing() = default; //! Efficiently swap the contents of two bucketing objects. @@ -107,7 +109,9 @@ class MATHS_EXPORT CAdaptiveBucketing { //! \param[in] endTime The start of the period. //! \param[in] values The mean values in a regular subdivision //! of [\p start,\p end]. - void initialValues(core_t::TTime startTime, core_t::TTime endTime, const TFloatMeanAccumulatorVec& values); + void initialValues(core_t::TTime startTime, + core_t::TTime endTime, + const TFloatMeanAccumulatorVec& values); //! Get the number of buckets. std::size_t size() const; diff --git a/include/maths/CAnnotatedVector.h b/include/maths/CAnnotatedVector.h index 6f853dab5b..cae45fe77a 100644 --- a/include/maths/CAnnotatedVector.h +++ b/include/maths/CAnnotatedVector.h @@ -34,12 +34,14 @@ class CAnnotatedVector : public VECTOR { public: //! Construct with a vector and annotation data. - CAnnotatedVector(const VECTOR& vector = VECTOR(), const ANNOTATION& annotation = ANNOTATION()) + CAnnotatedVector(const VECTOR& vector = VECTOR(), + const ANNOTATION& annotation = ANNOTATION()) : VECTOR(vector), m_Annotation(annotation) {} //! Construct with a vector initialized with \p coordinate //! and some default constructed annotation data. - explicit CAnnotatedVector(TCoordinate coordinate) : VECTOR(coordinate), m_Annotation() {} + explicit CAnnotatedVector(TCoordinate coordinate) + : VECTOR(coordinate), m_Annotation() {} //! Get the annotation data by constant reference. const ANNOTATION& annotation() const { return m_Annotation; } diff --git a/include/maths/CBasicStatistics.h b/include/maths/CBasicStatistics.h index 4a26b0af91..b02fa0e152 100644 --- a/include/maths/CBasicStatistics.h +++ b/include/maths/CBasicStatistics.h @@ -75,13 +75,15 @@ class MATHS_EXPORT CBasicStatistics { //! Compute the maximum of \p first, \p second and \p third. template static T max(T first, T second, T third) { - return first >= second ? (third >= first ? third : first) : (third >= second ? third : second); + return first >= second ? (third >= first ? third : first) + : (third >= second ? third : second); } //! Compute the minimum of \p first, \p second and \p third. template static T min(T first, T second, T third) { - return first <= second ? (third <= first ? third : first) : (third <= second ? third : second); + return first <= second ? (third <= first ? third : first) + : (third <= second ? third : second); } /////////////////////////// ACCUMULATORS /////////////////////////// @@ -146,13 +148,18 @@ class MATHS_EXPORT CBasicStatistics { using TCoordinate = typename SCoordinate::Type; //! See core::CMemory. - static bool dynamicSizeAlwaysZero() { return core::memory_detail::SDynamicSizeAlwaysZero::value(); } + static bool dynamicSizeAlwaysZero() { + return core::memory_detail::SDynamicSizeAlwaysZero::value(); + } - explicit SSampleCentralMoments(const T& initial = T(0)) : s_Count(0) { std::fill_n(s_Moments, ORDER, initial); } + explicit SSampleCentralMoments(const T& initial = T(0)) : s_Count(0) { + std::fill_n(s_Moments, ORDER, initial); + } //! Copy construction from implicitly convertible type. template - SSampleCentralMoments(const SSampleCentralMoments& other) : s_Count{other.s_Count} { + SSampleCentralMoments(const SSampleCentralMoments& other) + : s_Count{other.s_Count} { std::copy(other.s_Moments, other.s_Moments + ORDER, s_Moments); } @@ -177,7 +184,8 @@ class MATHS_EXPORT CBasicStatistics { bool operator<(const SSampleCentralMoments& rhs) const { return s_Count < rhs.s_Count || (s_Count == rhs.s_Count && - std::lexicographical_compare(s_Moments, s_Moments + ORDER, rhs.s_Moments, rhs.s_Moments + ORDER)); + std::lexicographical_compare(s_Moments, s_Moments + ORDER, + rhs.s_Moments, rhs.s_Moments + ORDER)); } //! \name Update @@ -278,7 +286,8 @@ class MATHS_EXPORT CBasicStatistics { T dMean2Rhs{dMeanRhs * dMeanRhs}; T varianceRhs{rhs.s_Moments[1]}; - s_Moments[1] = beta * (varianceLhs + dMean2Lhs) + alpha * (varianceRhs + dMean2Rhs); + s_Moments[1] = beta * (varianceLhs + dMean2Lhs) + + alpha * (varianceRhs + dMean2Rhs); if (ORDER > 2) { T skewLhs{s_Moments[2]}; @@ -341,7 +350,9 @@ class MATHS_EXPORT CBasicStatistics { T dMean2Rhs{dMeanRhs * dMeanRhs}; T varianceRhs{rhs.s_Moments[1]}; - s_Moments[1] = max(beta * (s_Moments[1] - dMean2Lhs) - alpha * (varianceRhs + dMean2Rhs - dMean2Lhs), T{0}); + s_Moments[1] = max(beta * (s_Moments[1] - dMean2Lhs) - + alpha * (varianceRhs + dMean2Rhs - dMean2Lhs), + T{0}); if (ORDER > 2) { T skewLhs{s_Moments[2]}; @@ -349,7 +360,8 @@ class MATHS_EXPORT CBasicStatistics { T skewRhs{rhs.s_Moments[2]}; T dSkewRhs{(TCoordinate{3} * varianceRhs + dMean2Rhs) * dMeanRhs}; - s_Moments[2] = beta * (skewLhs - dSkewLhs) - alpha * (skewRhs + dSkewRhs - dSkewLhs); + s_Moments[2] = beta * (skewLhs - dSkewLhs) - + alpha * (skewRhs + dSkewRhs - dSkewLhs); } } @@ -416,7 +428,8 @@ class MATHS_EXPORT CBasicStatistics { //! Make a mean and variance accumulator. template - static SSampleCentralMoments accumulator(const U& count, const T& m1, const T& m2) { + static SSampleCentralMoments + accumulator(const U& count, const T& m1, const T& m2) { SSampleCentralMoments result; result.s_Count = count; result.s_Moments[0] = m1; @@ -426,7 +439,8 @@ class MATHS_EXPORT CBasicStatistics { //! Make a mean, variance and skew accumulator. template - static SSampleCentralMoments accumulator(const U& count, const T& m1, const T& m2, const T& m3) { + static SSampleCentralMoments + accumulator(const U& count, const T& m1, const T& m2, const T& m3) { SSampleCentralMoments result; result.s_Count = count; result.s_Moments[0] = m1; @@ -452,13 +466,15 @@ class MATHS_EXPORT CBasicStatistics { //! Extract the count from an accumulator object. template - static inline const typename SSampleCentralMoments::TCoordinate& count(const SSampleCentralMoments& accumulator) { + static inline const typename SSampleCentralMoments::TCoordinate& + count(const SSampleCentralMoments& accumulator) { return accumulator.s_Count; } //! Extract the count from an accumulator object. template - static inline typename SSampleCentralMoments::TCoordinate& count(SSampleCentralMoments& accumulator) { + static inline typename SSampleCentralMoments::TCoordinate& + count(SSampleCentralMoments& accumulator) { return accumulator.s_Count; } @@ -562,7 +578,8 @@ class MATHS_EXPORT CBasicStatistics { //! //! \note This is the biased form. template - static inline const T& maximumLikelihoodVariance(const SSampleCentralMoments& accumulator) { + static inline const T& + maximumLikelihoodVariance(const SSampleCentralMoments& accumulator) { static_assert(N >= 2, "N must be at least 2"); return accumulator.s_Moments[1]; } @@ -646,22 +663,24 @@ class MATHS_EXPORT CBasicStatistics { template static inline std::string print(const SSampleCentralMoments& accumulator) { std::ostringstream result; - result << '(' << count(accumulator) << ", " << mean(accumulator) << ", " << variance(accumulator) << ')'; + result << '(' << count(accumulator) << ", " << mean(accumulator) << ", " + << variance(accumulator) << ')'; return result.str(); } //! Print a mean, variance and skew accumulator. template static inline std::string print(const SSampleCentralMoments& accumulator) { std::ostringstream result; - result << '(' << count(accumulator) << ", " << mean(accumulator) << ", " << variance(accumulator) << ", " << skewness(accumulator) - << ')'; + result << '(' << count(accumulator) << ", " << mean(accumulator) << ", " + << variance(accumulator) << ", " << skewness(accumulator) << ')'; return result.str(); } //@} //! Get a copy of \p moments with count scaled by \p scale. template - static SSampleCentralMoments scaled(SSampleCentralMoments accumulator, const U& scale) { + static SSampleCentralMoments + scaled(SSampleCentralMoments accumulator, const U& scale) { accumulator.s_Count *= typename SSampleCentralMoments::TCoordinate{scale}; return accumulator; } @@ -697,7 +716,9 @@ class MATHS_EXPORT CBasicStatistics { template struct SSampleCovariances : public std::unary_function, void> { //! See core::CMemory. - static bool dynamicSizeAlwaysZero() { return core::memory_detail::SDynamicSizeAlwaysZero::value(); } + static bool dynamicSizeAlwaysZero() { + return core::memory_detail::SDynamicSizeAlwaysZero::value(); + } using TVector = CVectorNx1; using TMatrix = CSymmetricMatrixNxN; @@ -713,7 +734,8 @@ class MATHS_EXPORT CBasicStatistics { //! Copy construction from implicitly convertible type. template SSampleCovariances(const SSampleCovariances& other) - : s_Count{other.s_Count}, s_Mean{other.s_Mean}, s_Covariances{other.s_Covariances} {} + : s_Count{other.s_Count}, s_Mean{other.s_Mean}, s_Covariances{other.s_Covariances} { + } //! Assignment from implicitly convertible type. template @@ -910,7 +932,9 @@ class MATHS_EXPORT CBasicStatistics { //! Make a covariances accumulator. template static inline SSampleCovariances - accumulator(const CVectorNx1& count, const CVectorNx1& mean, const CSymmetricMatrixNxN& covariances) { + accumulator(const CVectorNx1& count, + const CVectorNx1& mean, + const CSymmetricMatrixNxN& covariances) { return SSampleCovariances(count, mean, covariances); } @@ -930,7 +954,8 @@ class MATHS_EXPORT CBasicStatistics { //! //! \note This is the unbiased form. template - static inline CSymmetricMatrixNxN covariances(const SSampleCovariances& accumulator) { + static inline CSymmetricMatrixNxN + covariances(const SSampleCovariances& accumulator) { CVectorNx1 bias(accumulator.s_Count); for (std::size_t i = 0u; i < N; ++i) { if (bias(i) <= T{1}) { @@ -947,7 +972,8 @@ class MATHS_EXPORT CBasicStatistics { //! //! \note This is the unbiased form. template - static inline const CSymmetricMatrixNxN& maximumLikelihoodCovariances(const SSampleCovariances& accumulator) { + static inline const CSymmetricMatrixNxN& + maximumLikelihoodCovariances(const SSampleCovariances& accumulator) { return accumulator.s_Covariances; } @@ -974,7 +1000,8 @@ class MATHS_EXPORT CBasicStatistics { //! \param[out] result Filled in with the count, mean and "shrunk" //! covariance matrix estimate. template - static void covariancesLedoitWolf(const std::vector& points, SSampleCovariances& result) { + static void covariancesLedoitWolf(const std::vector& points, + SSampleCovariances& result) { result.add(points); basic_statistics_detail::SCovariancesLedoitWolf::estimate(points, result); } @@ -1005,7 +1032,8 @@ class MATHS_EXPORT CBasicStatistics { public: COrderStatisticsImpl(const CONTAINER& statistics, const LESS& less) - : m_Less(less), m_Statistics(statistics), m_UnusedCount(statistics.size()) {} + : m_Less(less), m_Statistics(statistics), + m_UnusedCount(statistics.size()) {} //! \name Persistence //@{ @@ -1022,7 +1050,9 @@ class MATHS_EXPORT CBasicStatistics { inline bool operator()(const T& x) { return this->add(x); } //! Check if we would add \p x. - bool wouldAdd(const T& x) const { return m_UnusedCount > 0 || m_Less(x, *this->begin()); } + bool wouldAdd(const T& x) const { + return m_UnusedCount > 0 || m_Less(x, *this->begin()); + } //! Update the statistics with the collection \p x. bool add(const std::vector& x) { @@ -1103,26 +1133,38 @@ class MATHS_EXPORT CBasicStatistics { //! order predicate and is effectively the first value which //! will be removed if a new value displaces it. inline const T& biggest() const { - return m_UnusedCount > 0 ? *std::max_element(this->begin(), this->end(), m_Less) : *this->begin(); + return m_UnusedCount > 0 + ? *std::max_element(this->begin(), this->end(), m_Less) + : *this->begin(); } //! Get the number of statistics. - inline std::size_t count() const { return m_Statistics.size() - m_UnusedCount; } + inline std::size_t count() const { + return m_Statistics.size() - m_UnusedCount; + } //! Get the i'th statistic. - inline T& operator[](std::size_t i) { return m_Statistics[m_UnusedCount + i]; } + inline T& operator[](std::size_t i) { + return m_Statistics[m_UnusedCount + i]; + } //! Get the i'th statistic. - inline const T& operator[](std::size_t i) const { return m_Statistics[m_UnusedCount + i]; } + inline const T& operator[](std::size_t i) const { + return m_Statistics[m_UnusedCount + i]; + } //! Get an iterator over the statistics. inline iterator begin() { return m_Statistics.begin() + m_UnusedCount; } //! Get an iterator over the statistics. - inline const_iterator begin() const { return m_Statistics.begin() + m_UnusedCount; } + inline const_iterator begin() const { + return m_Statistics.begin() + m_UnusedCount; + } //! Get a reverse iterator over the order statistics. inline reverse_iterator rbegin() { return m_Statistics.rbegin(); } //! Get a reverse iterator over the order statistics. - inline const_reverse_iterator rbegin() const { return m_Statistics.rbegin(); } + inline const_reverse_iterator rbegin() const { + return m_Statistics.rbegin(); + } //! Get an iterator representing the end of the statistics. inline iterator end() { return m_Statistics.end(); } @@ -1130,9 +1172,13 @@ class MATHS_EXPORT CBasicStatistics { inline const_iterator end() const { return m_Statistics.end(); } //! Get an iterator representing the end of the statistics. - inline reverse_iterator rend() { return m_Statistics.rbegin() + m_UnusedCount; } + inline reverse_iterator rend() { + return m_Statistics.rbegin() + m_UnusedCount; + } //! Get an iterator representing the end of the statistics. - inline const_reverse_iterator rend() const { return m_Statistics.rbegin() + m_UnusedCount; } + inline const_reverse_iterator rend() const { + return m_Statistics.rbegin() + m_UnusedCount; + } //@} //! Remove all statistics. @@ -1145,7 +1191,9 @@ class MATHS_EXPORT CBasicStatistics { uint64_t checksum(uint64_t seed) const; //! Print for debug. - std::string print() const { return core::CContainerPrinter::print(this->begin(), this->end()); } + std::string print() const { + return core::CContainerPrinter::print(this->begin(), this->end()); + } protected: //! Get the statistics. @@ -1194,8 +1242,9 @@ class MATHS_EXPORT CBasicStatistics { //! \tparam LESS The comparison function object type used to test //! if one object of type T is less than another. template> - class COrderStatisticsStack : public COrderStatisticsImpl, LESS>, - private boost::addable> { + class COrderStatisticsStack + : public COrderStatisticsImpl, LESS>, + private boost::addable> { private: using TArray = boost::array; using TImpl = COrderStatisticsImpl; @@ -1206,12 +1255,18 @@ class MATHS_EXPORT CBasicStatistics { using const_iterator = typename TImpl::const_iterator; //! See core::CMemory. - static bool dynamicSizeAlwaysZero() { return core::memory_detail::SDynamicSizeAlwaysZero::value(); } + static bool dynamicSizeAlwaysZero() { + return core::memory_detail::SDynamicSizeAlwaysZero::value(); + } public: - explicit COrderStatisticsStack(const LESS& less = LESS{}) : TImpl{TArray(), less} { this->statistics().assign(T{}); } + explicit COrderStatisticsStack(const LESS& less = LESS{}) + : TImpl{TArray(), less} { + this->statistics().assign(T{}); + } - explicit COrderStatisticsStack(std::size_t /*n*/, const LESS& less = LESS{}) : TImpl{TArray(), less} { + explicit COrderStatisticsStack(std::size_t /*n*/, const LESS& less = LESS{}) + : TImpl{TArray(), less} { this->statistics().assign(T{}); } @@ -1225,7 +1280,9 @@ class MATHS_EXPORT CBasicStatistics { } //! Create a member function so this class works with CChecksum. - uint64_t checksum(uint64_t seed = 0) const { return this->TImpl::checksum(seed); } + uint64_t checksum(uint64_t seed = 0) const { + return this->TImpl::checksum(seed); + } }; //! \brief A heap based accumulator class for order statistics. @@ -1261,8 +1318,9 @@ class MATHS_EXPORT CBasicStatistics { //! \tparam LESS The comparison function object type used to test //! if one object of type T is less than another. template> - class COrderStatisticsHeap : public COrderStatisticsImpl, LESS>, - private boost::addable> { + class COrderStatisticsHeap + : public COrderStatisticsImpl, LESS>, + private boost::addable> { private: using TImpl = COrderStatisticsImpl, LESS>; @@ -1272,7 +1330,8 @@ class MATHS_EXPORT CBasicStatistics { using const_iterator = typename TImpl::const_iterator; public: - explicit COrderStatisticsHeap(std::size_t n, const LESS& less = LESS{}) : TImpl{std::vector(n, T{}), less} {} + explicit COrderStatisticsHeap(std::size_t n, const LESS& less = LESS{}) + : TImpl{std::vector(n, T{}), less} {} //! Reset the number of statistics to gather to \p n. void resize(std::size_t n) { @@ -1290,7 +1349,9 @@ class MATHS_EXPORT CBasicStatistics { } //! Create a member function so this class works with CChecksum. - uint64_t checksum(uint64_t seed = 0) const { return this->TImpl::checksum(seed); } + uint64_t checksum(uint64_t seed = 0) const { + return this->TImpl::checksum(seed); + } }; //! \name Accumulator Typedefs @@ -1319,16 +1380,21 @@ class MATHS_EXPORT CBasicStatistics { class CMinMax : boost::addable> { public: //! See core::CMemory. - static bool dynamicSizeAlwaysZero() { return core::memory_detail::SDynamicSizeAlwaysZero::value(); } + static bool dynamicSizeAlwaysZero() { + return core::memory_detail::SDynamicSizeAlwaysZero::value(); + } public: - explicit CMinMax(const LESS& less = LESS{}, const GREATER& greater = GREATER{}) : m_Min{less}, m_Max{greater} {} + explicit CMinMax(const LESS& less = LESS{}, const GREATER& greater = GREATER{}) + : m_Min{less}, m_Max{greater} {} //! Define a function operator for use with std:: algorithms. inline bool operator()(const T& x) { return this->add(x); } //! Check if we would add \p x. - bool wouldAdd(const T& x) const { return m_Min.wouldAdd(x) || m_Max.wouldAdd(x); } + bool wouldAdd(const T& x) const { + return m_Min.wouldAdd(x) || m_Max.wouldAdd(x); + } //! Update the statistic with the collection \p x. bool add(const std::vector& x) { @@ -1380,7 +1446,9 @@ class MATHS_EXPORT CBasicStatistics { } //! Get a checksum for this object. - uint64_t checksum() const { return core::CHashing::hashCombine(m_Min.checksum(), m_Max.checksum()); } + uint64_t checksum() const { + return core::CHashing::hashCombine(m_Min.checksum(), m_Max.checksum()); + } private: //! The set minimum. @@ -1391,35 +1459,43 @@ class MATHS_EXPORT CBasicStatistics { // Friends template - friend std::ostream& operator<<(std::ostream& o, const CBasicStatistics::SSampleCentralMoments&); + friend std::ostream& + operator<<(std::ostream& o, const CBasicStatistics::SSampleCentralMoments&); template - friend std::ostream& operator<<(std::ostream& o, const CBasicStatistics::SSampleCentralMoments&); + friend std::ostream& + operator<<(std::ostream& o, const CBasicStatistics::SSampleCentralMoments&); template - friend std::ostream& operator<<(std::ostream& o, const CBasicStatistics::SSampleCentralMoments&); + friend std::ostream& + operator<<(std::ostream& o, const CBasicStatistics::SSampleCentralMoments&); }; template -std::ostream& operator<<(std::ostream& o, const CBasicStatistics::SSampleCentralMoments& accumulator) { +std::ostream& operator<<(std::ostream& o, + const CBasicStatistics::SSampleCentralMoments& accumulator) { return o << CBasicStatistics::print(accumulator); } template -std::ostream& operator<<(std::ostream& o, const CBasicStatistics::SSampleCentralMoments& accumulator) { +std::ostream& operator<<(std::ostream& o, + const CBasicStatistics::SSampleCentralMoments& accumulator) { return o << CBasicStatistics::print(accumulator); } template -std::ostream& operator<<(std::ostream& o, const CBasicStatistics::SSampleCentralMoments& accumulator) { +std::ostream& operator<<(std::ostream& o, + const CBasicStatistics::SSampleCentralMoments& accumulator) { return o << CBasicStatistics::print(accumulator); } template -std::ostream& operator<<(std::ostream& o, const CBasicStatistics::COrderStatisticsStack& accumulator) { +std::ostream& operator<<(std::ostream& o, + const CBasicStatistics::COrderStatisticsStack& accumulator) { return o << accumulator.print(); } template -std::ostream& operator<<(std::ostream& o, const CBasicStatistics::COrderStatisticsHeap& accumulator) { +std::ostream& operator<<(std::ostream& o, + const CBasicStatistics::COrderStatisticsHeap& accumulator) { return o << accumulator.print(); } @@ -1430,7 +1506,9 @@ namespace basic_statistics_detail { template struct SCentralMomentsCustomAdd { template - static inline void add(const U& x, typename SCoordinate::Type n, CBasicStatistics::SSampleCentralMoments& moments) { + static inline void add(const U& x, + typename SCoordinate::Type n, + CBasicStatistics::SSampleCentralMoments& moments) { moments.add(static_cast(x), n, 0); } }; @@ -1439,7 +1517,9 @@ struct SCentralMomentsCustomAdd { //! estimator. template struct SCovariancesCustomAdd> { - static inline void add(const CVectorNx1& x, const CVectorNx1& n, CBasicStatistics::SSampleCovariances& covariances) { + static inline void add(const CVectorNx1& x, + const CVectorNx1& n, + CBasicStatistics::SSampleCovariances& covariances) { covariances.add(x, n, 0); } }; @@ -1456,12 +1536,14 @@ struct SCovariancesCustomAdd> { template struct SCovariancesLedoitWolf> { template - static void estimate(const std::vector>& points, CBasicStatistics::SSampleCovariances& covariances) { + static void estimate(const std::vector>& points, + CBasicStatistics::SSampleCovariances& covariances) { U d{static_cast(N)}; U n{CBasicStatistics::count(covariances)}; const CVectorNx1& m{CBasicStatistics::mean(covariances)}; - const CSymmetricMatrixNxN& s{CBasicStatistics::maximumLikelihoodCovariances(covariances)}; + const CSymmetricMatrixNxN& s{ + CBasicStatistics::maximumLikelihoodCovariances(covariances)}; U mn{s.trace() / d}; U dn{pow2((s - CVectorNx1{mn}.diagonal()).frobenius()) / d}; @@ -1474,7 +1556,8 @@ struct SCovariancesLedoitWolf> { bn = std::min(bn, dn); LOG_TRACE(<< "m = " << mn << ", d = " << dn << ", b = " << bn); - covariances.s_Covariances = CVectorNx1{bn / dn * mn}.diagonal() + (U{1} - bn / dn) * covariances.s_Covariances; + covariances.s_Covariances = CVectorNx1{bn / dn * mn}.diagonal() + + (U{1} - bn / dn) * covariances.s_Covariances; } template diff --git a/include/maths/CBasicStatisticsPersist.h b/include/maths/CBasicStatisticsPersist.h index a5075bf5d0..ebd8fcd4fe 100644 --- a/include/maths/CBasicStatisticsPersist.h +++ b/include/maths/CBasicStatisticsPersist.h @@ -113,7 +113,8 @@ bool CBasicStatistics::SSampleCentralMoments::fromDelimited(const std: } if (!basic_statistics_detail::stringToType(token, s_Moments[index++])) { - LOG_ERROR(<< "Invalid moment " << index << " : element " << token << " in " << str); + LOG_ERROR(<< "Invalid moment " << index << " : element " << token + << " in " << str); return false; } @@ -178,8 +179,8 @@ bool CBasicStatistics::SSampleCovariances::fromDelimited(std::string str) template std::string CBasicStatistics::SSampleCovariances::toDelimited() const { - return s_Count.toDelimited() + CLinearAlgebra::DELIMITER + s_Mean.toDelimited() + CLinearAlgebra::DELIMITER + - s_Covariances.toDelimited(); + return s_Count.toDelimited() + CLinearAlgebra::DELIMITER + s_Mean.toDelimited() + + CLinearAlgebra::DELIMITER + s_Covariances.toDelimited(); } template @@ -226,7 +227,8 @@ bool CBasicStatistics::COrderStatisticsImpl::fromDelimited(c m_Statistics[--m_UnusedCount] = statistic; while (delimPos != value.size()) { - std::size_t nextDelimPos{std::min(value.find(INTERNAL_DELIMITER, delimPos + 1), value.size())}; + std::size_t nextDelimPos{ + std::min(value.find(INTERNAL_DELIMITER, delimPos + 1), value.size())}; statistic_.assign(value, delimPos + 1, nextDelimPos - delimPos - 1); if (basic_statistics_detail::stringToType(statistic_, statistic) == false) { LOG_ERROR(<< "Invalid statistic '" << statistic_ << "' in '" << value << "'"); diff --git a/include/maths/CBjkstUniqueValues.h b/include/maths/CBjkstUniqueValues.h index d2afc54c48..f0ca4ad3df 100644 --- a/include/maths/CBjkstUniqueValues.h +++ b/include/maths/CBjkstUniqueValues.h @@ -129,7 +129,8 @@ class MATHS_EXPORT CBjkstUniqueValues { void swap(SSketch& other); //! Create by traversing a state document. - bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser, std::size_t numberHashes); + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser, + std::size_t numberHashes); //! Convert to a node tree. void acceptPersistInserter(core::CStatePersistInserter& inserter) const; diff --git a/include/maths/CBootstrapClusterer.h b/include/maths/CBootstrapClusterer.h index 0e2e27c32f..a54420129d 100644 --- a/include/maths/CBootstrapClusterer.h +++ b/include/maths/CBootstrapClusterer.h @@ -73,8 +73,8 @@ class CBootstrapClusterer { using TSizeVecVecVec = std::vector; using TPointVec = std::vector; using TPointVecVec = std::vector; - using TGraph = boost:: - adjacency_list>; + using TGraph = + boost::adjacency_list>; using TVertex = typename boost::graph_traits::vertex_descriptor; using TEdge = typename boost::graph_traits::edge_descriptor; using TVertexItr = typename boost::graph_traits::vertex_iterator; @@ -84,7 +84,8 @@ class CBootstrapClusterer { public: CBootstrapClusterer(double overlapThreshold, double chainingFactor) - : m_OverlapThreshold(overlapThreshold), m_ChainingFactor(std::max(chainingFactor, 1.0)) {} + : m_OverlapThreshold(overlapThreshold), + m_ChainingFactor(std::max(chainingFactor, 1.0)) {} //! Run clustering on \p b bootstrap samples of \p points //! and find persistent clusters of the data. @@ -117,44 +118,58 @@ class CBootstrapClusterer { //! \brief Checks if a cluster is empty. struct SIsEmpty { - bool operator()(const TPointVec& cluster) const { return cluster.empty(); } + bool operator()(const TPointVec& cluster) const { + return cluster.empty(); + } }; //! Check if the second elements are equal. struct SSecondEqual { - bool operator()(const TDoubleSizePr& lhs, const TDoubleSizePr& rhs) const { return lhs.second == rhs.second; } + bool operator()(const TDoubleSizePr& lhs, const TDoubleSizePr& rhs) const { + return lhs.second == rhs.second; + } }; //! \brief State used for the maximum adjacency minimum cost //! cut search. struct SCutState { SCutState(std::size_t seed, const TGraph& graph) - : s_V(boost::num_vertices(graph)), s_ToVisit(1, seed), s_Adjacency(s_V, 0), s_Cut(0.0), s_A(0) { + : s_V(boost::num_vertices(graph)), s_ToVisit(1, seed), + s_Adjacency(s_V, 0), s_Cut(0.0), s_A(0) { this->initializeQueue(); } //! Get the cost of the current cut. - double cost() const { return s_Cut / static_cast(s_A * (s_V - s_A)); } + double cost() const { + return s_Cut / static_cast(s_A * (s_V - s_A)); + } //! Check if the vertex is to visit. - bool toVisit(std::size_t i) const { return this->toVisit(s_ToVisit.size(), i); } + bool toVisit(std::size_t i) const { + return this->toVisit(s_ToVisit.size(), i); + } //! Check if the vertex is to visit. - bool toVisit(std::size_t n, std::size_t i) const { return std::binary_search(s_ToVisit.begin(), s_ToVisit.begin() + n, i); } + bool toVisit(std::size_t n, std::size_t i) const { + return std::binary_search(s_ToVisit.begin(), s_ToVisit.begin() + n, i); + } //! Get the next vertex to visit. std::size_t next() const { return s_Queue.front().second; } //! Get the first right or equal vertex. std::size_t nextToVisit(std::size_t i) const { - return static_cast(std::lower_bound(s_ToVisit.begin(), s_ToVisit.end(), i) - s_ToVisit.begin()); + return static_cast( + std::lower_bound(s_ToVisit.begin(), s_ToVisit.end(), i) - + s_ToVisit.begin()); } //! Merge any vertices to visit after \p n. void mergeAfter(std::size_t n) { if (s_ToVisit.size() > n) { std::sort(s_ToVisit.begin() + n, s_ToVisit.end()); - std::inplace_merge(s_ToVisit.begin(), s_ToVisit.begin() + n, s_ToVisit.end()); + std::inplace_merge(s_ToVisit.begin(), s_ToVisit.begin() + n, + s_ToVisit.end()); } } @@ -207,7 +222,10 @@ class CBootstrapClusterer { //! \param[out] result Filled in with the \p b bootstrap //! clusterings. template - std::size_t bootstrapClusters(std::size_t b, CLUSTERER& clusterer, TPointVec& points, TSizeVecVecVec& result) { + std::size_t bootstrapClusters(std::size_t b, + CLUSTERER& clusterer, + TPointVec& points, + TSizeVecVecVec& result) { std::size_t n = points.size(); LOG_TRACE(<< "# points = " << n); @@ -261,7 +279,9 @@ class CBootstrapClusterer { //! \param[out] graph A graph whose vertices are the clusters //! in each bootstrap clustering and whose edges connect clusters //! which overlap significantly. - void buildClusterGraph(const TPointVec& points, TSizeVecVecVec& bootstrapClusters, TGraph& graph) const { + void buildClusterGraph(const TPointVec& points, + TSizeVecVecVec& bootstrapClusters, + TGraph& graph) const { using TSizeSizePrUSet = boost::unordered_set; using TSizeSizePrUSetCItr = TSizeSizePrUSet::const_iterator; @@ -275,7 +295,8 @@ class CBootstrapClusterer { for (std::size_t i = 0u; i < bootstrapClusters.size(); ++i) { for (std::size_t j = 0u; j < bootstrapClusters[i].size(); ++j) { - std::sort(bootstrapClusters[i][j].begin(), bootstrapClusters[i][j].end()); + std::sort(bootstrapClusters[i][j].begin(), + bootstrapClusters[i][j].end()); } } TSizeVec cik; @@ -295,7 +316,8 @@ class CBootstrapClusterer { overlaps.clear(); double sum = 0.0; - for (std::size_t l = 0u; !cik.empty() && l < bootstrapClusters[j].size(); ++l) { + for (std::size_t l = 0u; + !cik.empty() && l < bootstrapClusters[j].size(); ++l) { const TSizeVec& cjl = bootstrapClusters[j][l]; double o = static_cast(cik.size()); CSetTools::inplace_set_difference(cik, cjl.begin(), cjl.end()); @@ -316,10 +338,11 @@ class CBootstrapClusterer { std::swap(u, v); } if (edges.insert(std::make_pair(u, v)).second) { - boost::put(boost::edge_weight, - graph, + boost::put(boost::edge_weight, graph, boost::add_edge(u, v, graph).first, - std::min(m_ChainingFactor * (overlaps[l] - m_OverlapThreshold * sum), 1.0)); + std::min(m_ChainingFactor * + (overlaps[l] - m_OverlapThreshold * sum), + 1.0)); } } } @@ -343,16 +366,20 @@ class CBootstrapClusterer { for (boost::tie(k, endk) = boost::out_edges(v, graph); k != endk; ++k) { std::size_t w = boost::target(*k, graph); if (this->fromVertex(w).first == i->second) { - consistent.push_back(std::make_pair(weight * boost::get(boost::edge_weight, graph, *k), w)); + consistent.push_back(std::make_pair( + weight * boost::get(boost::edge_weight, graph, *k), w)); } } } std::sort(consistent.begin(), consistent.end(), COrderings::SSecondLess()); - consistent.erase(std::unique(consistent.begin(), consistent.end(), SSecondEqual()), consistent.end()); + consistent.erase(std::unique(consistent.begin(), consistent.end(), SSecondEqual()), + consistent.end()); LOG_TRACE(<< "consistent = " << core::CContainerPrinter::print(consistent)); for (std::size_t k = 0u; k < consistent.size(); ++k) { - boost::put(boost::edge_weight, graph, boost::add_edge(u, consistent[k].second, graph).first, consistent[k].first); + boost::put(boost::edge_weight, graph, + boost::add_edge(u, consistent[k].second, graph).first, + consistent[k].first); } } } @@ -367,7 +394,10 @@ class CBootstrapClusterer { //! \p bootstrapClusters. //! \param[out] result Filled in with the majority vote clusters //! of \p bootstrapClusters. - void buildClusters(const TPointVec& points, const TSizeVecVecVec& bootstrapClusters, const TGraph& graph, TPointVecVec& result) const { + void buildClusters(const TPointVec& points, + const TSizeVecVecVec& bootstrapClusters, + const TGraph& graph, + TPointVecVec& result) const { using TSizeSizeUMap = boost::unordered_map; using TSizeSizeUMapCItr = TSizeSizeUMap::const_iterator; using TSizeSizeUMapVec = std::vector; @@ -407,7 +437,8 @@ class CBootstrapClusterer { } std::size_t c = k->second; std::size_t n_ = voters[j].size(); - if (COrderings::lexicographical_compare(c, n_, cmax, nmax, std::greater())) { + if (COrderings::lexicographical_compare( + c, n_, cmax, nmax, std::greater())) { jmax = j; cmax = c; nmax = n_; @@ -423,7 +454,8 @@ class CBootstrapClusterer { // It is possible that after voting clusters contain // no points. Remove these. - result.erase(std::remove_if(result.begin(), result.end(), SIsEmpty()), result.end()); + result.erase(std::remove_if(result.begin(), result.end(), SIsEmpty()), + result.end()); } //! Identify subsets of the component of \p graph which @@ -468,7 +500,9 @@ class CBootstrapClusterer { // to separate (by removing edges). if (this->separate(component, parities)) { LOG_TRACE(<< "Separated component"); - LOG_TRACE(<< "parities = " << core::CContainerPrinter::print(parities.begin(), parities.begin() + Vi)); + LOG_TRACE(<< "parities = " + << core::CContainerPrinter::print(parities.begin(), + parities.begin() + Vi)); for (std::size_t j = 0u; j < Vi; ++j) { if (parities[j]) { components[inverse[j]] = n; @@ -536,7 +570,9 @@ class CBootstrapClusterer { weights[i] += weights[i - 1]; } for (std::size_t i = 1u; i <= V / 2 + 1; ++i) { - std::size_t C = std::max(i * (D - std::min(D, i - 1)), (i * (V - i)) - std::min(i * (V - i), (V * (V - 1)) / 2 - E)); + std::size_t C = std::max( + i * (D - std::min(D, i - 1)), + (i * (V - i)) - std::min(i * (V - i), (V * (V - 1)) / 2 - E)); bound = std::min(bound, weights[C] / static_cast(i * (V - i))); } LOG_TRACE(<< "bound = " << bound << " threshold = " << threshold); @@ -555,13 +591,17 @@ class CBootstrapClusterer { for (std::size_t i = 0u; i < seeds.size(); ++i) { if (cut.empty()) { TEdgeItr seed = boost::edges(graph).first; - for (std::size_t j = 0u; j < static_cast(seeds[i] * static_cast(E)); ++j, ++seed) { + for (std::size_t j = 0u; + j < static_cast(seeds[i] * static_cast(E)); + ++j, ++seed) { } - cut.push_back(std::make_pair(boost::source(*seed, graph), boost::target(*seed, graph))); + cut.push_back(std::make_pair(boost::source(*seed, graph), + boost::target(*seed, graph))); } double cost; - if (this->cutSearch(cut.back().first, cut.back().second, graph, threshold, cost, result)) { + if (this->cutSearch(cut.back().first, cut.back().second, graph, + threshold, cost, result)) { return true; } @@ -579,7 +619,8 @@ class CBootstrapClusterer { if (n > 0) { std::sort(cut.begin() + n, cut.end()); newCut.clear(); - std::set_intersection(cut.begin(), cut.begin() + n, cut.begin() + n, cut.end(), std::back_inserter(newCut)); + std::set_intersection(cut.begin(), cut.begin() + n, cut.begin() + n, + cut.end(), std::back_inserter(newCut)); cut.swap(newCut); } } @@ -614,7 +655,12 @@ class CBootstrapClusterer { //! the lowest cost cut. //! \return True if the cut should split \p graph and false //! otherwise. - bool cutSearch(std::size_t u, std::size_t v, const TGraph& graph, double threshold, double& cost, TBoolVec& parities) const { + bool cutSearch(std::size_t u, + std::size_t v, + const TGraph& graph, + double threshold, + double& cost, + TBoolVec& parities) const { LOG_TRACE(<< "Seed edge = (" << u << "," << v << ")"); std::size_t V = boost::num_vertices(graph); @@ -647,7 +693,8 @@ class CBootstrapClusterer { ++sizes[components[i]]; } } - std::size_t smallest = static_cast(std::min_element(sizes.begin(), sizes.end()) - sizes.begin()); + std::size_t smallest = static_cast( + std::min_element(sizes.begin(), sizes.end()) - sizes.begin()); LOG_TRACE(<< "sizes = " << core::CContainerPrinter::print(sizes)); LOG_TRACE(<< "smallest = " << smallest); @@ -684,8 +731,8 @@ class CBootstrapClusterer { cost = lowestCost; parities.swap(best); - LOG_TRACE(<< "Best cut = " << bestCut << ", |A| = " << bestA << ", |B| = " << V - bestA << ", cost = " << cost - << ", threshold = " << threshold); + LOG_TRACE(<< "Best cut = " << bestCut << ", |A| = " << bestA << ", |B| = " << V - bestA + << ", cost = " << cost << ", threshold = " << threshold); return cost < threshold; } @@ -700,16 +747,20 @@ class CBootstrapClusterer { //! This is intended for use with boost::filtered_graph. class CParityFilter { public: - CParityFilter() : m_Graph(nullptr), m_Parities(nullptr), m_Parity(false) {} + CParityFilter() + : m_Graph(nullptr), m_Parities(nullptr), m_Parity(false) {} CParityFilter(const TGraph& graph, const TBoolVec& parities, bool parity) : m_Graph(&graph), m_Parities(&parities), m_Parity(parity) {} //! Check the vertex parity. - bool operator()(const TVertex& v) const { return (*m_Parities)[v] == m_Parity; } + bool operator()(const TVertex& v) const { + return (*m_Parities)[v] == m_Parity; + } //! Check the end vertices' parity. bool operator()(const TEdge& e) const { - return (*m_Parities)[boost::source(e, *m_Graph)] == m_Parity && (*m_Parities)[boost::target(e, *m_Graph)] == m_Parity; + return (*m_Parities)[boost::source(e, *m_Graph)] == m_Parity && + (*m_Parities)[boost::target(e, *m_Graph)] == m_Parity; } private: @@ -732,8 +783,7 @@ class CBootstrapClusterer { std::size_t u = boost::source(*j, graph); std::size_t v = boost::target(*j, graph); if (u < v && std::binary_search(inverse.begin(), inverse.end(), v)) { - boost::put(boost::edge_weight, - result, + boost::put(boost::edge_weight, result, boost::add_edge(mapping[u], mapping[v], result).first, boost::get(boost::edge_weight, graph, *j)); } @@ -752,7 +802,8 @@ class CBootstrapClusterer { std::size_t candidate = state.next(); std::size_t v = state.s_ToVisit[candidate]; const_cast(parities)[v] = false; - bool connected = (this->positiveSubgraphConnected(graph, parities, components) == 1); + bool connected = + (this->positiveSubgraphConnected(graph, parities, components) == 1); const_cast(parities)[v] = true; if (connected) { return true; @@ -796,7 +847,9 @@ class CBootstrapClusterer { } //! Check that the subgraph with true parity is connected. - std::size_t positiveSubgraphConnected(const TGraph& graph, const TBoolVec& parities, TSizeVec& components) const { + std::size_t positiveSubgraphConnected(const TGraph& graph, + const TBoolVec& parities, + TSizeVec& components) const { using TParityGraph = boost::filtered_graph; CParityFilter parityFilter(graph, parities, true); TParityGraph parityGraph(graph, parityFilter, parityFilter); @@ -806,12 +859,17 @@ class CBootstrapClusterer { //! Extract the vertex for the \p j'th cluster of the //! \p i'th bootstrap clustering. - std::size_t toVertex(std::size_t i, std::size_t j) const { return m_Offsets[i] + j; } + std::size_t toVertex(std::size_t i, std::size_t j) const { + return m_Offsets[i] + j; + } //! Extract the clustering and cluster from the vertex //! representation \p v. TSizeSizePr fromVertex(std::size_t v) const { - std::size_t i = static_cast(std::upper_bound(m_Offsets.begin(), m_Offsets.end(), v) - m_Offsets.begin()) - 1; + std::size_t i = static_cast( + std::upper_bound(m_Offsets.begin(), m_Offsets.end(), v) - + m_Offsets.begin()) - + 1; return std::make_pair(i, v - m_Offsets[i]); } @@ -874,7 +932,9 @@ class CBootstrapClustererFacadeExtractClusters { for (std::size_t j = 0u; j < clusterPoints.size(); ++j) { std::size_t k = points.size(); - for (TPointVecCItr l = this->begin(points, clusterPoints[j]), end = this->end(points, clusterPoints[j]); l != end; ++l) { + for (TPointVecCItr l = this->begin(points, clusterPoints[j]), + end = this->end(points, clusterPoints[j]); + l != end; ++l) { if (*l == clusterPoints[j]) { k = static_cast(l - points.begin()); break; @@ -893,10 +953,14 @@ class CBootstrapClustererFacadeExtractClusters { private: //! Get the first point equal or right of \p x. - TPointVecCItr begin(const TPointVec& points, const POINT& x) const { return std::lower_bound(points.begin(), points.end(), x); } + TPointVecCItr begin(const TPointVec& points, const POINT& x) const { + return std::lower_bound(points.begin(), points.end(), x); + } //! Get the first point right of \p x. - TPointVecCItr end(const TPointVec& points, const POINT& x) const { return std::upper_bound(points.begin(), points.end(), x); } + TPointVecCItr end(const TPointVec& points, const POINT& x) const { + return std::upper_bound(points.begin(), points.end(), x); + } }; //! \brief Adapts clustering implementations for use by the bootstrap @@ -907,7 +971,8 @@ class CBootstrapClustererFacade {}; //! \brief Adapts the x-means implementation for use by the bootstrap //! clusterer. template -class CBootstrapClustererFacade> : private CBootstrapClustererFacadeExtractClusters { +class CBootstrapClustererFacade> + : private CBootstrapClustererFacadeExtractClusters { public: using TSizeVec = std::vector; using TSizeVecVec = std::vector; @@ -933,7 +998,8 @@ class CBootstrapClustererFacade> : private CBootstrapCluste m_Xmeans.setPoints(tmp); // Run - m_Xmeans.run(m_ImproveParamsKmeansIterations, m_ImproveStructureClusterSeeds, m_ImproveStructureKmeansIterations); + m_Xmeans.run(m_ImproveParamsKmeansIterations, m_ImproveStructureClusterSeeds, + m_ImproveStructureKmeansIterations); // Extract TPointVecCRefVec clusterPoints; @@ -960,7 +1026,8 @@ class CBootstrapClustererFacade> : private CBootstrapCluste //! \brief Adapts the x-means implementation for use by the bootstrap //! clusterer. template -class CBootstrapClustererFacade> : private CBootstrapClustererFacadeExtractClusters { +class CBootstrapClustererFacade> + : private CBootstrapClustererFacadeExtractClusters { public: using TSizeVec = std::vector; using TSizeVecVec = std::vector; @@ -1033,7 +1100,8 @@ void bootstrapCluster(std::vector& points, double chainingFactor, std::vector>& result) { CBootstrapClustererFacade> clusterer( - xmeans, improveParamsKmeansIterations, improveStructureClusterSeeds, improveStructureKmeansIterations); + xmeans, improveParamsKmeansIterations, improveStructureClusterSeeds, + improveStructureKmeansIterations); CBootstrapClusterer bootstrapClusterer(overlapThreshold, chainingFactor); bootstrapClusterer.run(B, clusterer, points, result); } diff --git a/include/maths/CBoundingBox.h b/include/maths/CBoundingBox.h index f25076bf79..f2ee9a7dce 100644 --- a/include/maths/CBoundingBox.h +++ b/include/maths/CBoundingBox.h @@ -27,7 +27,9 @@ template class CBoundingBox { public: //! See core::CMemory. - static bool dynamicSizeAlwaysZero() { return core::memory_detail::SDynamicSizeAlwaysZero::value(); } + static bool dynamicSizeAlwaysZero() { + return core::memory_detail::SDynamicSizeAlwaysZero::value(); + } using TPointPrecise = typename SFloatingPoint::Type; public: diff --git a/include/maths/CCalendarComponent.h b/include/maths/CCalendarComponent.h index 9e92a008cf..d5ec0b5fc1 100644 --- a/include/maths/CCalendarComponent.h +++ b/include/maths/CCalendarComponent.h @@ -146,7 +146,9 @@ class MATHS_EXPORT CCalendarComponent : private CDecompositionComponent { private: //! Create by traversing a state document. - bool acceptRestoreTraverser(double decayRate, double minimumBucketLength, core::CStateRestoreTraverser& traverser); + bool acceptRestoreTraverser(double decayRate, + double minimumBucketLength, + core::CStateRestoreTraverser& traverser); private: //! The mean and variance in collection of buckets covering the period. diff --git a/include/maths/CCalendarComponentAdaptiveBucketing.h b/include/maths/CCalendarComponentAdaptiveBucketing.h index 0152d44f4c..dd389e9c0d 100644 --- a/include/maths/CCalendarComponentAdaptiveBucketing.h +++ b/include/maths/CCalendarComponentAdaptiveBucketing.h @@ -38,9 +38,13 @@ class MATHS_EXPORT CCalendarComponentAdaptiveBucketing : private CAdaptiveBucket public: CCalendarComponentAdaptiveBucketing(); - explicit CCalendarComponentAdaptiveBucketing(CCalendarFeature feature, double decayRate = 0.0, double minimumBucketLength = 0.0); + explicit CCalendarComponentAdaptiveBucketing(CCalendarFeature feature, + double decayRate = 0.0, + double minimumBucketLength = 0.0); //! Construct by traversing a state document. - CCalendarComponentAdaptiveBucketing(double decayRate, double minimumBucketLength, core::CStateRestoreTraverser& traverser); + CCalendarComponentAdaptiveBucketing(double decayRate, + double minimumBucketLength, + core::CStateRestoreTraverser& traverser); //! Persist by passing information to the supplied inserter. void acceptPersistInserter(core::CStatePersistInserter& inserter) const; @@ -186,7 +190,8 @@ class MATHS_EXPORT CCalendarComponentAdaptiveBucketing : private CAdaptiveBucket }; //! Create a free function which will be found by Koenig lookup. -inline void swap(CCalendarComponentAdaptiveBucketing& lhs, CCalendarComponentAdaptiveBucketing& rhs) { +inline void swap(CCalendarComponentAdaptiveBucketing& lhs, + CCalendarComponentAdaptiveBucketing& rhs) { lhs.swap(rhs); } } diff --git a/include/maths/CCalendarFeature.h b/include/maths/CCalendarFeature.h index 50271499af..f4ae19f74c 100644 --- a/include/maths/CCalendarFeature.h +++ b/include/maths/CCalendarFeature.h @@ -24,7 +24,8 @@ namespace maths { //! IMPLEMENTATION:\n //! Note that this purposely doesn't use an enum for encoding the feature //! so that the member size is only 16 bits rather than sizeof(int). -class MATHS_EXPORT CCalendarFeature : boost::less_than_comparable> { +class MATHS_EXPORT CCalendarFeature + : boost::less_than_comparable> { public: //! See core::CMemory. static bool dynamicSizeAlwaysZero() { return true; } diff --git a/include/maths/CCategoricalTools.h b/include/maths/CCategoricalTools.h index deda6f9434..d55f20dce9 100644 --- a/include/maths/CCategoricalTools.h +++ b/include/maths/CCategoricalTools.h @@ -48,8 +48,10 @@ class MATHS_EXPORT CCategoricalTools : core::CNonInstantiatable, core::CNonCopya //! \param[in] ni The category counts. //! \param[out] result Filled in with an estimate of the probability //! of seeing a less likely sample than category counts \p ni. - static bool - probabilityOfLessLikelyMultinomialSample(const TDoubleVec& probabilities, const TSizeVec& i, const TSizeVec& ni, double& result); + static bool probabilityOfLessLikelyMultinomialSample(const TDoubleVec& probabilities, + const TSizeVec& i, + const TSizeVec& ni, + double& result); //! Compute the probability of seeing less likely counts than \p ni //! independently for each category in \p i whose probabilities are @@ -99,7 +101,8 @@ class MATHS_EXPORT CCategoricalTools : core::CNonInstantiatable, core::CNonCopya //! //! \warning It is the callers responsibility to ensure that the //! probabilities are normalized. - static bool expectedDistinctCategories(const TDoubleVec& probabilities, double n, double& result); + static bool + expectedDistinctCategories(const TDoubleVec& probabilities, double n, double& result); //! Get the log of the binomial coefficient \f$\binom{n}{m}\f$. static double logBinomialCoefficient(std::size_t n, std::size_t m); @@ -122,7 +125,8 @@ class MATHS_EXPORT CCategoricalTools : core::CNonInstantiatable, core::CNonCopya //! \param[in] p The probability of success. //! \param[in] m The number of successes. //! \param[out] result Filled in with the log probability. - static maths_t::EFloatingPointErrorStatus logBinomialProbability(std::size_t n, double p, std::size_t m, double& result); + static maths_t::EFloatingPointErrorStatus + logBinomialProbability(std::size_t n, double p, std::size_t m, double& result); //! Compute the log of the probability of a sample of \p ni counts //! of categories from the multinomial with number of trials equal diff --git a/include/maths/CChecksum.h b/include/maths/CChecksum.h index bef5f92e1d..4580b2a9ac 100644 --- a/include/maths/CChecksum.h +++ b/include/maths/CChecksum.h @@ -117,21 +117,27 @@ class CChecksumImpl { //! Checksum of double. static uint64_t dispatch(uint64_t seed, double target) { - return dispatch(seed, core::CStringUtils::typeToStringPrecise(target, core::CIEEE754::E_SinglePrecision)); + return dispatch(seed, core::CStringUtils::typeToStringPrecise( + target, core::CIEEE754::E_SinglePrecision)); } //! Checksum of a universal hash function. - static uint64_t dispatch(uint64_t seed, const core::CHashing::CUniversalHash::CUInt32UnrestrictedHash& target) { + static uint64_t + dispatch(uint64_t seed, + const core::CHashing::CUniversalHash::CUInt32UnrestrictedHash& target) { seed = core::CHashing::hashCombine(seed, static_cast(target.a())); return core::CHashing::hashCombine(seed, static_cast(target.b())); } //! Checksum of float storage. - static uint64_t dispatch(uint64_t seed, CFloatStorage target) { return dispatch(seed, target.toString()); } + static uint64_t dispatch(uint64_t seed, CFloatStorage target) { + return dispatch(seed, target.toString()); + } //! Checksum of string. static uint64_t dispatch(uint64_t seed, const std::string& target) { - return core::CHashing::safeMurmurHash64(target.data(), static_cast(target.size()), seed); + return core::CHashing::safeMurmurHash64( + target.data(), static_cast(target.size()), seed); } //! Checksum a stored string pointer. @@ -148,13 +154,15 @@ class CChecksumImpl { //! Checksum of a optional. template static uint64_t dispatch(uint64_t seed, const boost::optional& target) { - return !target ? seed : CChecksumImpl::value>::dispatch(seed, *target); + return !target ? seed + : CChecksumImpl::value>::dispatch(seed, *target); } //! Checksum a pointer. template static uint64_t dispatch(uint64_t seed, const boost::shared_ptr& target) { - return !target ? seed : CChecksumImpl::value>::dispatch(seed, *target); + return !target ? seed + : CChecksumImpl::value>::dispatch(seed, *target); } //! Checksum a pair. @@ -166,7 +174,9 @@ class CChecksumImpl { //! Checksum an Eigen dense vector. template - static uint64_t dispatch(uint64_t seed, const Eigen::Matrix& target) { + static uint64_t + dispatch(uint64_t seed, + const Eigen::Matrix& target) { std::ptrdiff_t dimension = target.size(); if (dimension > 0) { for (std::ptrdiff_t i = 0; i + 1 < dimension; ++i) { @@ -179,7 +189,8 @@ class CChecksumImpl { //! Checksum an Eigen sparse vector. template - static uint64_t dispatch(uint64_t seed, const Eigen::SparseVector& target) { + static uint64_t + dispatch(uint64_t seed, const Eigen::SparseVector& target) { using TIterator = typename Eigen::SparseVector::InnerIterator; uint64_t result = seed; for (TIterator i(target, 0); i; ++i) { @@ -191,9 +202,12 @@ class CChecksumImpl { //! Checksum of an annotated vector. template - static uint64_t dispatch(uint64_t seed, const CAnnotatedVector& target) { - seed = CChecksumImpl::value>::dispatch(seed, static_cast(target)); - return CChecksumImpl::value>::dispatch(seed, target.annotation()); + static uint64_t + dispatch(uint64_t seed, const CAnnotatedVector& target) { + seed = CChecksumImpl::value>::dispatch( + seed, static_cast(target)); + return CChecksumImpl::value>::dispatch( + seed, target.annotation()); } }; @@ -240,7 +254,8 @@ class CChecksumImpl { using CItr = typename T::const_iterator; uint64_t result = seed; for (CItr itr = target.begin(); itr != target.end(); ++itr) { - result = CChecksumImpl::value>::dispatch(result, *itr); + result = CChecksumImpl::value>::dispatch( + result, *itr); } return result; } @@ -253,7 +268,8 @@ class CChecksumImpl { TCRefVec ordered; ordered.reserve(target.size()); - for (typename boost::unordered_set::const_iterator itr = target.begin(); itr != target.end(); ++itr) { + for (typename boost::unordered_set::const_iterator itr = target.begin(); + itr != target.end(); ++itr) { ordered.push_back(TCRef(*itr)); } @@ -272,7 +288,8 @@ class CChecksumImpl { TUCRefVCRefPrVec ordered; ordered.reserve(target.size()); - for (typename boost::unordered_map::const_iterator itr = target.begin(); itr != target.end(); ++itr) { + for (typename boost::unordered_map::const_iterator itr = target.begin(); + itr != target.end(); ++itr) { ordered.push_back(TUCRefVCRefPr(TUCRef(itr->first), TVCRef(itr->second))); } @@ -282,7 +299,9 @@ class CChecksumImpl { } //! Handle std::string which has a const_iterator. - static uint64_t dispatch(uint64_t seed, const std::string& target) { return CChecksumImpl::dispatch(seed, target); } + static uint64_t dispatch(uint64_t seed, const std::string& target) { + return CChecksumImpl::dispatch(seed, target); + } }; //! Convenience function to select implementation. diff --git a/include/maths/CClusterer.h b/include/maths/CClusterer.h index f3464a09e1..f175b2d0f3 100644 --- a/include/maths/CClusterer.h +++ b/include/maths/CClusterer.h @@ -140,7 +140,8 @@ class CClusterer : public CClustererTypes { //! //! \param splitFunc Optional callback for when a cluster is split. //! \param mergeFunc Optional callback for when two clusters are merged. - explicit CClusterer(const TSplitFunc& splitFunc = CDoNothing(), const TMergeFunc& mergeFunc = CDoNothing()) + explicit CClusterer(const TSplitFunc& splitFunc = CDoNothing(), + const TMergeFunc& mergeFunc = CDoNothing()) : m_SplitFunc(splitFunc), m_MergeFunc(mergeFunc) {} virtual ~CClusterer() {} @@ -181,7 +182,9 @@ class CClusterer : public CClustererTypes { //! Gets the index of the cluster(s) to which \p point belongs //! together with their weighting factors. - virtual void cluster(const TPointPrecise& point, TSizeDoublePr2Vec& result, double count = 1.0) const = 0; + virtual void cluster(const TPointPrecise& point, + TSizeDoublePr2Vec& result, + double count = 1.0) const = 0; //! Add a point without caring about its cluster. void add(const TPointPrecise& point, double count = 1.0) { @@ -191,7 +194,8 @@ class CClusterer : public CClustererTypes { //! Update the clustering with \p point and return its cluster(s) //! together with their weighting factors. - virtual void add(const TPointPrecise& point, TSizeDoublePr2Vec& clusters, double count = 1.0) = 0; + virtual void + add(const TPointPrecise& point, TSizeDoublePr2Vec& clusters, double count = 1.0) = 0; //! Update the clustering with \p points. void add(const TPointPreciseVec& points) { @@ -221,7 +225,9 @@ class CClusterer : public CClustererTypes { //! \param numberSamples The desired number of samples. //! \param samples Filled in with the samples. //! \return True if the cluster could be sampled and false otherwise. - virtual bool sample(std::size_t index, std::size_t numberSamples, TPointPreciseVec& samples) const = 0; + virtual bool sample(std::size_t index, + std::size_t numberSamples, + TPointPreciseVec& samples) const = 0; //! Get the probability of the cluster with the index \p index. //! diff --git a/include/maths/CClustererStateSerialiser.h b/include/maths/CClustererStateSerialiser.h index f1cb8265d7..08931fa5dd 100644 --- a/include/maths/CClustererStateSerialiser.h +++ b/include/maths/CClustererStateSerialiser.h @@ -50,7 +50,9 @@ class MATHS_EXPORT CClustererStateSerialiser { //! document representation. //! //! \note Sets \p ptr to NULL on failure. - bool operator()(const SDistributionRestoreParams& params, TClusterer1dPtr& ptr, core::CStateRestoreTraverser& traverser); + bool operator()(const SDistributionRestoreParams& params, + TClusterer1dPtr& ptr, + core::CStateRestoreTraverser& traverser); //! Construct the appropriate CClusterer sub-class from its state //! document representation. @@ -73,7 +75,8 @@ class MATHS_EXPORT CClustererStateSerialiser { bool operator()(const SDistributionRestoreParams& params, boost::shared_ptr>>& ptr, core::CStateRestoreTraverser& traverser) { - return this->operator()(params, CClustererTypes::CDoNothing(), CClustererTypes::CDoNothing(), ptr, traverser); + return this->operator()(params, CClustererTypes::CDoNothing(), + CClustererTypes::CDoNothing(), ptr, traverser); } //! Construct the appropriate CClusterer sub-class from its state @@ -91,10 +94,12 @@ class MATHS_EXPORT CClustererStateSerialiser { do { const std::string& name = traverser.name(); if (name == CClustererTypes::X_MEANS_ONLINE_TAG) { - ptr.reset(CXMeansOnlineFactory::restore(params, splitFunc, mergeFunc, traverser)); + ptr.reset(CXMeansOnlineFactory::restore(params, splitFunc, + mergeFunc, traverser)); ++numResults; } else { - LOG_ERROR(<< "No clusterer corresponds to node name " << traverser.name()); + LOG_ERROR(<< "No clusterer corresponds to node name " + << traverser.name()); } } while (traverser.next()); @@ -109,8 +114,11 @@ class MATHS_EXPORT CClustererStateSerialiser { //! Persist state by passing information to the supplied inserter. template - void operator()(const CClusterer>& clusterer, core::CStatePersistInserter& inserter) { - inserter.insertLevel(clusterer.persistenceTag(), boost::bind(&CClusterer>::acceptPersistInserter, &clusterer, _1)); + void operator()(const CClusterer>& clusterer, + core::CStatePersistInserter& inserter) { + inserter.insertLevel(clusterer.persistenceTag(), + boost::bind(&CClusterer>::acceptPersistInserter, + &clusterer, _1)); } }; } diff --git a/include/maths/CCompositeFunctions.h b/include/maths/CCompositeFunctions.h index ab4b877e0a..22827af5a1 100644 --- a/include/maths/CCompositeFunctions.h +++ b/include/maths/CCompositeFunctions.h @@ -80,8 +80,10 @@ struct result_type_impl { //! \brief Tries to deduce the result type of a function (object) //! in various ways. template -struct result_type : public result_type_impl::type, - typename has_result_type::type>::value> {}; +struct result_type + : public result_type_impl::type, + typename has_result_type::type>::value> { +}; } // composition_detail:: @@ -175,14 +177,16 @@ class MATHS_EXPORT CCompositeFunctions { //! For function returning value. inline T operator()(double x) const { - static const double LOG_MIN_DOUBLE = std::log(std::numeric_limits::min()); + static const double LOG_MIN_DOUBLE = + std::log(std::numeric_limits::min()); double fx = m_F(x); return fx < LOG_MIN_DOUBLE ? 0.0 : std::exp(fx); } //! For function return success/fail and taking result as argument. inline bool operator()(double x, T& result) const { - static const double LOG_MIN_DOUBLE = std::log(std::numeric_limits::min()); + static const double LOG_MIN_DOUBLE = + std::log(std::numeric_limits::min()); if (m_F(x, result)) { result = result < LOG_MIN_DOUBLE ? 0.0 : std::exp(result); return true; @@ -206,7 +210,8 @@ class MATHS_EXPORT CCompositeFunctions { using result_type = U; public: - explicit CProduct(const F& f = F(), const G& g = G()) : m_F(f), m_G(g) {} + explicit CProduct(const F& f = F(), const G& g = G()) + : m_F(f), m_G(g) {} //! For function returning value. inline U operator()(double x) const { return m_F(x) * m_G(x); } diff --git a/include/maths/CConstantPrior.h b/include/maths/CConstantPrior.h index 714edd2740..c8462aac2c 100644 --- a/include/maths/CConstantPrior.h +++ b/include/maths/CConstantPrior.h @@ -65,13 +65,17 @@ class MATHS_EXPORT CConstantPrior : public CPrior { virtual bool needsOffset() const; //! No-op. - virtual double adjustOffset(const TWeightStyleVec& weightStyle, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights); + virtual double adjustOffset(const TWeightStyleVec& weightStyle, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights); //! Returns zero. virtual double offset() const; //! Set the constant if it hasn't been set. - virtual void addSamples(const TWeightStyleVec& weightStyle, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights); + virtual void addSamples(const TWeightStyleVec& weightStyle, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights); //! No-op. virtual void propagateForwardsByTime(double time); @@ -83,24 +87,28 @@ class MATHS_EXPORT CConstantPrior : public CPrior { virtual double marginalLikelihoodMean() const; //! Returns constant or zero if unset (by equidistribution). - virtual double marginalLikelihoodMode(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual double + marginalLikelihoodMode(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; //! All confidence intervals are the point [constant, constant]. - virtual TDoubleDoublePr marginalLikelihoodConfidenceInterval(double percentage, - const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual TDoubleDoublePr marginalLikelihoodConfidenceInterval( + double percentage, + const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; //! Get the variance of the marginal likelihood. - virtual double marginalLikelihoodVariance(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual double + marginalLikelihoodVariance(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; //! Returns a large value if all samples are equal to the constant //! and zero otherwise. - virtual maths_t::EFloatingPointErrorStatus jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, - double& result) const; + virtual maths_t::EFloatingPointErrorStatus + jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& result) const; //! Get \p numberSamples times the constant. virtual void sampleMarginalLikelihood(std::size_t numberSamples, TDouble1Vec& samples) const; diff --git a/include/maths/CCountMinSketch.h b/include/maths/CCountMinSketch.h index 717eb16128..b8806daa7a 100644 --- a/include/maths/CCountMinSketch.h +++ b/include/maths/CCountMinSketch.h @@ -124,7 +124,9 @@ class MATHS_EXPORT CCountMinSketch { SSketch(std::size_t rows, std::size_t columns); //! Create by traversing a state document. - bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser, std::size_t rows, std::size_t columns); + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser, + std::size_t rows, + std::size_t columns); //! Convert to a node tree. void acceptPersistInserter(core::CStatePersistInserter& inserter) const; diff --git a/include/maths/CDecompositionComponent.h b/include/maths/CDecompositionComponent.h index fee9136832..7ae89d29a8 100644 --- a/include/maths/CDecompositionComponent.h +++ b/include/maths/CDecompositionComponent.h @@ -58,10 +58,12 @@ class MATHS_EXPORT CDecompositionComponent { using TDoubleVecArray = boost::array; public: - CPackedSplines(CSplineTypes::EType valueInterpolationType, CSplineTypes::EType varianceInterpolationType); + CPackedSplines(CSplineTypes::EType valueInterpolationType, + CSplineTypes::EType varianceInterpolationType); //! Create by traversing a state document. - bool acceptRestoreTraverser(CSplineTypes::EBoundaryCondition boundary, core::CStateRestoreTraverser& traverser); + bool acceptRestoreTraverser(CSplineTypes::EBoundaryCondition boundary, + core::CStateRestoreTraverser& traverser); //! Persist state by passing information to \p inserter. void acceptPersistInserter(core::CStatePersistInserter& inserter) const; diff --git a/include/maths/CEqualWithTolerance.h b/include/maths/CEqualWithTolerance.h index 58279d2d9e..891e5968c7 100644 --- a/include/maths/CEqualWithTolerance.h +++ b/include/maths/CEqualWithTolerance.h @@ -39,7 +39,9 @@ struct SNorm> { template struct SNorm> { using result_type = T; - static T dispatch(const CSymmetricMatrixNxN& t) { return t.frobenius(); } + static T dispatch(const CSymmetricMatrixNxN& t) { + return t.frobenius(); + } }; template @@ -76,13 +78,16 @@ class CToleranceTypes { //! have has_multiplies and so, short of writing this functionality //! ourselves, we can't implement this. template -class CEqualWithTolerance : public std::binary_function, public CToleranceTypes { +class CEqualWithTolerance : public std::binary_function, + public CToleranceTypes { public: CEqualWithTolerance(unsigned int toleranceType, const T& eps) - : m_ToleranceType(toleranceType), m_AbsoluteEps(abs(norm(eps))), m_RelativeEps(abs(norm(eps))) {} + : m_ToleranceType(toleranceType), m_AbsoluteEps(abs(norm(eps))), + m_RelativeEps(abs(norm(eps))) {} CEqualWithTolerance(unsigned int toleranceType, const T& absoluteEps, const T& relativeEps) - : m_ToleranceType(toleranceType), m_AbsoluteEps(abs(norm(absoluteEps))), m_RelativeEps(abs(norm(relativeEps))) {} + : m_ToleranceType(toleranceType), m_AbsoluteEps(abs(norm(absoluteEps))), + m_RelativeEps(abs(norm(relativeEps))) {} bool operator()(const T& lhs, const T& rhs) const { const T& max = norm(rhs) > norm(lhs) ? rhs : lhs; @@ -93,13 +98,15 @@ class CEqualWithTolerance : public std::binary_function, public CTol switch (m_ToleranceType) { case 2: // absolute & relative - return (norm(difference) <= m_AbsoluteEps) && (norm(difference) <= m_RelativeEps * abs(norm(maxAbs))); + return (norm(difference) <= m_AbsoluteEps) && + (norm(difference) <= m_RelativeEps * abs(norm(maxAbs))); case 3: // absolute return norm(difference) <= m_AbsoluteEps; case 6: // relative return norm(difference) <= m_RelativeEps * abs(norm(maxAbs)); case 7: // absolute | relative - return (norm(difference) <= m_AbsoluteEps) || (norm(difference) <= m_RelativeEps * abs(norm(maxAbs))); + return (norm(difference) <= m_AbsoluteEps) || + (norm(difference) <= m_RelativeEps * abs(norm(maxAbs))); } LOG_ERROR(<< "Unexpected tolerance type " << m_ToleranceType); return false; @@ -116,7 +123,9 @@ class CEqualWithTolerance : public std::binary_function, public CTol } //! Get the norm of the specified type. - static TNorm norm(const T& t) { return equal_with_tolerance_detail::SNorm::dispatch(t); } + static TNorm norm(const T& t) { + return equal_with_tolerance_detail::SNorm::dispatch(t); + } private: unsigned int m_ToleranceType; diff --git a/include/maths/CExpandingWindow.h b/include/maths/CExpandingWindow.h index cd29253375..18d75472e9 100644 --- a/include/maths/CExpandingWindow.h +++ b/include/maths/CExpandingWindow.h @@ -46,7 +46,10 @@ class MATHS_EXPORT CExpandingWindow { using TPredictor = std::function; public: - CExpandingWindow(core_t::TTime bucketLength, TTimeCRng bucketLengths, std::size_t size, double decayRate = 0.0); + CExpandingWindow(core_t::TTime bucketLength, + TTimeCRng bucketLengths, + std::size_t size, + double decayRate = 0.0); //! Initialize by reading state from \p traverser. bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); diff --git a/include/maths/CGammaRateConjugate.h b/include/maths/CGammaRateConjugate.h index 86252303f6..31fc0e90cc 100644 --- a/include/maths/CGammaRateConjugate.h +++ b/include/maths/CGammaRateConjugate.h @@ -139,7 +139,9 @@ class MATHS_EXPORT CGammaRateConjugate : public CPrior { //! \param[in] samples The samples from which to determine the offset. //! \param[in] weights The weights of each sample in \p samples. //! \return The penalty to apply in model selection. - virtual double adjustOffset(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights); + virtual double adjustOffset(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights); //! Get the current offset. virtual double offset() const; @@ -152,7 +154,9 @@ class MATHS_EXPORT CGammaRateConjugate : public CPrior { //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. - virtual void addSamples(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights); + virtual void addSamples(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights); //! Propagate the prior density function forwards by \p time. //! @@ -171,12 +175,14 @@ class MATHS_EXPORT CGammaRateConjugate : public CPrior { virtual double marginalLikelihoodMean() const; //! Get the mode of the marginal likelihood function. - virtual double marginalLikelihoodMode(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual double + marginalLikelihoodMode(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; //! Get the variance of the marginal likelihood. - virtual double marginalLikelihoodVariance(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual double + marginalLikelihoodVariance(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; //! Get the \p percentage symmetric confidence interval for the marginal //! likelihood function, i.e. the values \f$a\f$ and \f$b\f$ such that: @@ -191,9 +197,10 @@ class MATHS_EXPORT CGammaRateConjugate : public CPrior { //! \param[in] weightStyles Optional variance scale weight styles. //! \param[in] weights Optional variance scale weights. //! \note \p percentage should be in the range [0.0, 100.0). - virtual TDoubleDoublePr marginalLikelihoodConfidenceInterval(double percentage, - const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual TDoubleDoublePr marginalLikelihoodConfidenceInterval( + double percentage, + const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; //! Compute the log marginal likelihood function at \p samples integrating //! over the prior density function for the gamma rate. @@ -206,10 +213,11 @@ class MATHS_EXPORT CGammaRateConjugate : public CPrior { //! \param[out] result Filled in with the joint likelihood of \p samples. //! \note The samples are assumed to be independent and identically //! distributed. - virtual maths_t::EFloatingPointErrorStatus jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, - double& result) const; + virtual maths_t::EFloatingPointErrorStatus + jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& result) const; //! Sample the marginal likelihood function. //! @@ -346,7 +354,8 @@ class MATHS_EXPORT CGammaRateConjugate : public CPrior { private: using TMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; - using TMeanVarAccumulator = CBasicStatistics::SSampleMeanVar::TAccumulator; + using TMeanVarAccumulator = + CBasicStatistics::SSampleMeanVar::TAccumulator; private: //! Read parameters from \p traverser. diff --git a/include/maths/CGradientDescent.h b/include/maths/CGradientDescent.h index b29c82a67b..9b01a16800 100644 --- a/include/maths/CGradientDescent.h +++ b/include/maths/CGradientDescent.h @@ -82,7 +82,12 @@ class MATHS_EXPORT CGradientDescent { //! \param[out] xBest Filled in with the minimum function value argument //! visited. //! \param[out] fi Filled in with the sequence of function values. - bool run(std::size_t n, const TVector& x0, const CFunction& f, const CGradient& gf, TVector& xBest, TDoubleVec& fi); + bool run(std::size_t n, + const TVector& x0, + const CFunction& f, + const CGradient& gf, + TVector& xBest, + TDoubleVec& fi); private: //! The multiplier of the unit vector along the gradient. diff --git a/include/maths/CGramSchmidt.h b/include/maths/CGramSchmidt.h index b4e1e3b9cd..bb2d3138aa 100644 --- a/include/maths/CGramSchmidt.h +++ b/include/maths/CGramSchmidt.h @@ -97,7 +97,8 @@ class MATHS_EXPORT CGramSchmidt : private core::CNonInstantiatable { } double n = norm(x[current]); - LOG_TRACE(<< "i = " << i << ", current = " << current << ", x = " << print(x[current]) << ", norm = " << n); + LOG_TRACE(<< "i = " << i << ", current = " << current + << ", x = " << print(x[current]) << ", norm = " << n); if (n != 0.0) { divide(x[current], n); @@ -113,14 +114,16 @@ class MATHS_EXPORT CGramSchmidt : private core::CNonInstantiatable { swap(x[current], x[i]); } - double eps = 5.0 * norm(x[current]) * std::numeric_limits::epsilon(); + double eps = 5.0 * norm(x[current]) * + std::numeric_limits::epsilon(); for (std::size_t j = 0u; j < i; ++j) { minusProjection(x[current], x[j]); } double n = norm(x[current]); - LOG_TRACE(<< "i = " << i << ", current = " << current << ", x = " << print(x[current]) << ", norm = " << n + LOG_TRACE(<< "i = " << i << ", current = " << current + << ", x = " << print(x[current]) << ", norm = " << n << ", eps = " << eps); if (std::fabs(n) > eps) { @@ -159,7 +162,8 @@ class MATHS_EXPORT CGramSchmidt : private core::CNonInstantiatable { //! Subtract the projection of \p x onto \p e from \p x. template - static const CVectorNx1& minusProjection(CVectorNx1& x, const CVectorNx1& e) { + static const CVectorNx1& + minusProjection(CVectorNx1& x, const CVectorNx1& e) { double n = e.inner(x); return x -= n * e; } @@ -208,7 +212,9 @@ class MATHS_EXPORT CGramSchmidt : private core::CNonInstantiatable { //! Remove [\p begin, \p end) from \p x. template - static void erase(std::vector& x, typename std::vector::iterator begin, typename std::vector::iterator end) { + static void erase(std::vector& x, + typename std::vector::iterator begin, + typename std::vector::iterator end) { x.erase(begin, end); } diff --git a/include/maths/CInformationCriteria.h b/include/maths/CInformationCriteria.h index 29897d9acd..2613e4d39c 100644 --- a/include/maths/CInformationCriteria.h +++ b/include/maths/CInformationCriteria.h @@ -41,8 +41,8 @@ struct SSampleCovariances> { MATHS_EXPORT double confidence(double df); -#define LOG_DETERMINANT(N) \ - MATHS_EXPORT \ +#define LOG_DETERMINANT(N) \ + MATHS_EXPORT \ double logDeterminant(const CSymmetricMatrixNxN& c, double upper) LOG_DETERMINANT(2); LOG_DETERMINANT(3); @@ -105,12 +105,20 @@ class CSphericalGaussianInfoCriterion { using TBarePoint = typename SStripped::Type; using TBarePointPrecise = typename SFloatingPoint::Type; using TCoordinate = typename SCoordinate::Type; - using TMeanVarAccumulator = typename CBasicStatistics::SSampleMeanVar::TAccumulator; + using TMeanVarAccumulator = + typename CBasicStatistics::SSampleMeanVar::TAccumulator; public: - CSphericalGaussianInfoCriterion() : m_D(0.0), m_K(0.0), m_N(0.0), m_Likelihood(0.0) {} - explicit CSphericalGaussianInfoCriterion(const TPointVecVec& x) : m_D(0.0), m_K(0.0), m_N(0.0), m_Likelihood(0.0) { this->add(x); } - explicit CSphericalGaussianInfoCriterion(const TPointVec& x) : m_D(0.0), m_K(0.0), m_N(0.0), m_Likelihood(0.0) { this->add(x); } + CSphericalGaussianInfoCriterion() + : m_D(0.0), m_K(0.0), m_N(0.0), m_Likelihood(0.0) {} + explicit CSphericalGaussianInfoCriterion(const TPointVecVec& x) + : m_D(0.0), m_K(0.0), m_N(0.0), m_Likelihood(0.0) { + this->add(x); + } + explicit CSphericalGaussianInfoCriterion(const TPointVec& x) + : m_D(0.0), m_K(0.0), m_N(0.0), m_Likelihood(0.0) { + this->add(x); + } //! Update the sufficient statistics for computing info content. void add(const TPointVecVec& x) { @@ -147,9 +155,13 @@ class CSphericalGaussianInfoCriterion { m_N += ni; if (ni > 1.0) { double upper = information_criteria_detail::confidence(ni - 1.0); - m_Likelihood += ni * log(ni) - 0.5 * m_D * ni * (1.0 + core::constants::LOG_TWO_PI + std::log(upper * vi / m_D)); + m_Likelihood += ni * log(ni) - 0.5 * m_D * ni * + (1.0 + core::constants::LOG_TWO_PI + + std::log(upper * vi / m_D)); } else { - m_Likelihood += ni * log(ni) - 0.5 * m_D * ni * (1.0 + core::constants::LOG_TWO_PI + core::constants::LOG_MAX_DOUBLE); + m_Likelihood += ni * log(ni) - 0.5 * m_D * ni * + (1.0 + core::constants::LOG_TWO_PI + + core::constants::LOG_MAX_DOUBLE); } } @@ -165,7 +177,8 @@ class CSphericalGaussianInfoCriterion { case E_BIC: return -2.0 * (m_Likelihood - m_N * logN) + p * logN; case E_AICc: - return -2.0 * (m_Likelihood - m_N * logN) + 2.0 * p + p * (p + 1.0) / (m_N - p - 1.0); + return -2.0 * (m_Likelihood - m_N * logN) + 2.0 * p + + p * (p + 1.0) / (m_N - p - 1.0); } return 0.0; } @@ -197,13 +210,21 @@ class CGaussianInfoCriterion { using TBarePoint = typename SStripped::Type; using TBarePointPrecise = typename SFloatingPoint::Type; using TCoordinate = typename SCoordinate::Type; - using TCovariances = typename information_criteria_detail::SSampleCovariances::Type; + using TCovariances = + typename information_criteria_detail::SSampleCovariances::Type; using TMatrix = typename SConformableMatrix::Type; public: - CGaussianInfoCriterion() : m_D(0.0), m_K(0.0), m_N(0.0), m_Likelihood(0.0) {} - explicit CGaussianInfoCriterion(const TPointVecVec& x) : m_D(0.0), m_K(0.0), m_N(0.0), m_Likelihood(0.0) { this->add(x); } - explicit CGaussianInfoCriterion(const TPointVec& x) : m_D(0.0), m_K(0.0), m_N(0.0), m_Likelihood(0.0) { this->add(x); } + CGaussianInfoCriterion() + : m_D(0.0), m_K(0.0), m_N(0.0), m_Likelihood(0.0) {} + explicit CGaussianInfoCriterion(const TPointVecVec& x) + : m_D(0.0), m_K(0.0), m_N(0.0), m_Likelihood(0.0) { + this->add(x); + } + explicit CGaussianInfoCriterion(const TPointVec& x) + : m_D(0.0), m_K(0.0), m_N(0.0), m_Likelihood(0.0) { + this->add(x); + } //! Update the sufficient statistics for computing info content. void add(const TPointVecVec& x) { @@ -229,9 +250,11 @@ class CGaussianInfoCriterion { m_D = static_cast(CBasicStatistics::mean(covariance).dimension()); m_K += 1.0; m_N += ni; - m_Likelihood += ni * log(ni) - 0.5 * ni * - (m_D + m_D * core::constants::LOG_TWO_PI + - (ni <= m_D + 1.0 ? core::constants::LOG_MAX_DOUBLE : this->logDeterminant(covariance))); + m_Likelihood += ni * log(ni) - + 0.5 * ni * + (m_D + m_D * core::constants::LOG_TWO_PI + + (ni <= m_D + 1.0 ? core::constants::LOG_MAX_DOUBLE + : this->logDeterminant(covariance))); } //! Calculate the information content of the clusters added so far. @@ -246,7 +269,8 @@ class CGaussianInfoCriterion { case E_BIC: return -2.0 * (m_Likelihood - m_N * logN) + p * logN; case E_AICc: - return -2.0 * (m_Likelihood - m_N * logN) + 2.0 * p + p * (p + 1.0) / (m_N - p - 1.0); + return -2.0 * (m_Likelihood - m_N * logN) + 2.0 * p + + p * (p + 1.0) / (m_N - p - 1.0); } return 0.0; } diff --git a/include/maths/CIntegration.h b/include/maths/CIntegration.h index c57e4303c9..7e9dad4d22 100644 --- a/include/maths/CIntegration.h +++ b/include/maths/CIntegration.h @@ -156,7 +156,13 @@ class MATHS_EXPORT CIntegration { //! \tparam V The type of range of \p g. This must have a meaningful default //! constructor, support multiplication by a double and addition. template - static bool productGaussLegendre(const F& f, const G& g, double a, double b, U& productIntegral, U& fIntegral, V& gIntegral) { + static bool productGaussLegendre(const F& f, + const G& g, + double a, + double b, + U& productIntegral, + U& fIntegral, + V& gIntegral) { productIntegral = U(); fIntegral = U(); gIntegral = V(); @@ -171,7 +177,8 @@ class MATHS_EXPORT CIntegration { for (unsigned int i = 0; i < ORDER; ++i) { U fx; V gx; - if (!f(centre + range * abscissas[i], fx) || !g(centre + range * abscissas[i], gx)) { + if (!f(centre + range * abscissas[i], fx) || + !g(centre + range * abscissas[i], gx)) { return false; } double weight = weights[i]; @@ -280,7 +287,8 @@ class MATHS_EXPORT CIntegration { double tolerance, double& result) { if (intervals.size() != fIntervals.size()) { - LOG_ERROR(<< "Inconsistent intervals and function integrals: " << core::CContainerPrinter::print(intervals) << " " + LOG_ERROR(<< "Inconsistent intervals and function integrals: " + << core::CContainerPrinter::print(intervals) << " " << core::CContainerPrinter::print(fIntervals)); return false; } @@ -330,7 +338,8 @@ class MATHS_EXPORT CIntegration { double fjNew = 0.0; double aj = intervals[j].first; - double dj = (intervals[j].second - intervals[j].first) / static_cast(splitsPerRefinement); + double dj = (intervals[j].second - intervals[j].first) / + static_cast(splitsPerRefinement); for (std::size_t k = 0u; k < splitsPerRefinement; ++k, aj += dj) { double df; if (CIntegration::gaussLegendre(f, aj, aj + dj, df)) { @@ -345,7 +354,8 @@ class MATHS_EXPORT CIntegration { } } } else { - LOG_ERROR(<< "Couldn't integrate f over [" << aj << "," << aj + dj << "]"); + LOG_ERROR(<< "Couldn't integrate f over [" << aj << "," + << aj + dj << "]"); return false; } } @@ -354,7 +364,8 @@ class MATHS_EXPORT CIntegration { double correction = fjNew - fjOld; if (i + 1 < refinements) { corrections[j] = std::fabs(correction); - corrections.resize(corrections.size() + splitsPerRefinement - 1, std::fabs(correction)); + corrections.resize(corrections.size() + splitsPerRefinement - 1, + std::fabs(correction)); } result += correction; @@ -400,7 +411,8 @@ class MATHS_EXPORT CIntegration { public: static const CSparseGaussLegendreQuadrature& instance() { - const CSparseGaussLegendreQuadrature* tmp = ms_Instance.load(std::memory_order_acquire); + const CSparseGaussLegendreQuadrature* tmp = + ms_Instance.load(std::memory_order_acquire); if (!tmp) { core::CScopedFastLock scopedLock(CIntegration::ms_Mutex); tmp = ms_Instance.load(std::memory_order_relaxed); @@ -458,7 +470,8 @@ class MATHS_EXPORT CIntegration { TVectorDoubleMap ordered; - for (unsigned int l = ORDER > DIMENSION ? ORDER - DIMENSION : 0; l < ORDER; ++l) { + for (unsigned int l = ORDER > DIMENSION ? ORDER - DIMENSION : 0; + l < ORDER; ++l) { LOG_TRACE(<< "order = " << l); std::size_t d = 0u; TUIntVec indices(DIMENSION, 1); @@ -466,7 +479,8 @@ class MATHS_EXPORT CIntegration { TUIntVec stop(DIMENSION, l + 1); double sign = (ORDER - l - 1) % 2 == 1 ? -1.0 : 1.0; - double scale = sign * CIntegerTools::binomial(DIMENSION - 1, DIMENSION + l - ORDER); + double scale = sign * CIntegerTools::binomial(DIMENSION - 1, + DIMENSION + l - ORDER); LOG_TRACE(<< "scale = " << scale); do { @@ -481,7 +495,8 @@ class MATHS_EXPORT CIntegration { TDoubleVec weights(n, 1.0); TVectorVec points(n, TVector(0.0)); for (unsigned int i = 0u; i < n; ++i) { - for (unsigned int i_ = i, j = 0u; j < indices.size(); i_ /= indices[j], ++j) { + for (unsigned int i_ = i, j = 0u; j < indices.size(); + i_ /= indices[j], ++j) { EOrder order = static_cast(indices[j]); const double* w = CGaussLegendreQuadrature::weights(order); const double* a = CGaussLegendreQuadrature::abscissas(order); @@ -535,7 +550,8 @@ class MATHS_EXPORT CIntegration { //! \tparam T The type of range of \p f. This must have a meaningful //! default constructor, support multiplication by a double and addition. template - static bool sparseGaussLegendre(const F& function, const TDoubleVec& a, const TDoubleVec& b, T& result) { + static bool + sparseGaussLegendre(const F& function, const TDoubleVec& a, const TDoubleVec& b, T& result) { using TSparseQuadrature = CSparseGaussLegendreQuadrature; using TVector = typename TSparseQuadrature::TVector; using TVectorVec = typename TSparseQuadrature::TVectorVec; @@ -637,7 +653,8 @@ class MATHS_EXPORT CIntegration { }; template -std::atomic*> CIntegration::CSparseGaussLegendreQuadrature::ms_Instance; +std::atomic*> + CIntegration::CSparseGaussLegendreQuadrature::ms_Instance; } } diff --git a/include/maths/CKMeansFast.h b/include/maths/CKMeansFast.h index a27c3f4bca..18bba43a9b 100644 --- a/include/maths/CKMeansFast.h +++ b/include/maths/CKMeansFast.h @@ -30,7 +30,8 @@ using TSizeVec = std::vector; //! Get the closest filtered centre to \p point. template -std::size_t closest(const std::vector& centres, ITR filter, ITR end, const POINT& point) { +std::size_t +closest(const std::vector& centres, ITR filter, ITR end, const POINT& point) { std::size_t result = *filter; double d = (point - centres[result]).euclidean(); for (++filter; filter != end; ++filter) { @@ -45,7 +46,8 @@ std::size_t closest(const std::vector& centres, ITR filter, ITR end, cons //! Get the closest filtered centre to \p point. template -std::size_t closest(const std::vector& centres, const TSizeVec& filter, const POINT& point) { +std::size_t +closest(const std::vector& centres, const TSizeVec& filter, const POINT& point) { return closest(centres, filter.begin(), filter.end(), point); } } @@ -90,12 +92,15 @@ class CKMeansFast { //! Check for equality using checksum and then points if the //! checksum is ambiguous. - bool operator==(const CCluster& other) const { return m_Checksum == other.m_Checksum && m_Points == other.m_Points; } + bool operator==(const CCluster& other) const { + return m_Checksum == other.m_Checksum && m_Points == other.m_Points; + } //! Total ordering by checksum breaking ties using expensive //! comparison on all points. bool operator<(const CCluster& rhs) const { - return m_Checksum < rhs.m_Checksum || (m_Checksum == rhs.m_Checksum && m_Points < rhs.m_Points); + return m_Checksum < rhs.m_Checksum || + (m_Checksum == rhs.m_Checksum && m_Points < rhs.m_Points); } //! Get the number of points in the cluster. @@ -132,7 +137,8 @@ class CKMeansFast { protected: using TBarePoint = typename SStripped::Type; using TBarePointPrecise = typename SFloatingPoint::Type; - using TMeanAccumulator = typename CBasicStatistics::SSampleMean::TAccumulator; + using TMeanAccumulator = + typename CBasicStatistics::SSampleMean::TAccumulator; using TMeanAccumulatorVec = std::vector; using TBoundingBox = CBoundingBox; class CKdTreeNodeData; @@ -148,7 +154,10 @@ class CKMeansFast { class CKdTreeNodeData { public: CKdTreeNodeData() {} - explicit CKdTreeNodeData(const POINT& x) : m_BoundingBox(x), m_Centroid() { m_Centroid.add(x); } + explicit CKdTreeNodeData(const POINT& x) + : m_BoundingBox(x), m_Centroid() { + m_Centroid.add(x); + } //! Get the bounding box. const TBoundingBox& boundingBox() const { return m_BoundingBox; } @@ -225,9 +234,12 @@ class CKMeansFast { //! of points than a specified point. class CFurtherFrom { public: - CFurtherFrom(const TBoundingBox& bb_, std::size_t x_, const TPointVec& centres_) : bb(&bb_), x(x_), centres(¢res_) {} + CFurtherFrom(const TBoundingBox& bb_, std::size_t x_, const TPointVec& centres_) + : bb(&bb_), x(x_), centres(¢res_) {} - bool operator()(std::size_t y) const { return y == x ? false : bb->closerToX((*centres)[x], (*centres)[y]); } + bool operator()(std::size_t y) const { + return y == x ? false : bb->closerToX((*centres)[x], (*centres)[y]); + } private: const TBoundingBox* bb; @@ -238,7 +250,8 @@ class CKMeansFast { public: explicit CCentreFilter(const TPointVec& centres) : m_Centres(¢res), - m_Filter(boost::counting_iterator(0), boost::counting_iterator(centres.size())) {} + m_Filter(boost::counting_iterator(0), + boost::counting_iterator(centres.size())) {} //! Get the centres. const TPointVec& centres() const { return *m_Centres; } @@ -267,8 +280,11 @@ class CKMeansFast { namespace detail = kmeans_fast_detail; if (m_Filter.size() > 1) { - std::size_t closest = detail::closest(*m_Centres, m_Filter, POINT(bb.centre())); - m_Filter.erase(std::remove_if(m_Filter.begin(), m_Filter.end(), CFurtherFrom(bb, closest, *m_Centres)), m_Filter.end()); + std::size_t closest = + detail::closest(*m_Centres, m_Filter, POINT(bb.centre())); + m_Filter.erase(std::remove_if(m_Filter.begin(), m_Filter.end(), + CFurtherFrom(bb, closest, *m_Centres)), + m_Filter.end()); } } @@ -292,7 +308,8 @@ class CKMeansFast { //! of its assigned points. class CCentroidComputer { public: - CCentroidComputer(const TPointVec& centres, TMeanAccumulatorVec& centroids) : m_Centres(centres), m_Centroids(¢roids) {} + CCentroidComputer(const TPointVec& centres, TMeanAccumulatorVec& centroids) + : m_Centres(centres), m_Centroids(¢roids) {} //! Update the centres with \p node. //! @@ -330,7 +347,9 @@ class CKMeansFast { //! centre supplied to the constructor. class CClosestPointsCollector { public: - CClosestPointsCollector(std::size_t numberPoints, const TPointVec& centres, TPointVecVec& closestPoints) + CClosestPointsCollector(std::size_t numberPoints, + const TPointVec& centres, + TPointVecVec& closestPoints) : m_Centres(¢res), m_ClosestPoints(&closestPoints) { m_ClosestPoints->resize(centres.size()); for (std::size_t i = 0u; i < m_ClosestPoints->size(); ++i) { @@ -345,8 +364,8 @@ class CKMeansFast { namespace detail = kmeans_fast_detail; std::size_t n = m_Centres->size(); const POINT& point = node.s_Point; - (*m_ClosestPoints)[detail::closest( - *m_Centres, boost::counting_iterator(0), boost::counting_iterator(n), point)] + (*m_ClosestPoints)[detail::closest(*m_Centres, boost::counting_iterator(0), + boost::counting_iterator(n), point)] .push_back(point); } @@ -431,14 +450,16 @@ class CKMeansFast { //! Single iteration of Lloyd's algorithm to update \p centres. bool updateCentres() { using TCoordinate = typename SCoordinate::Type; - static const TCoordinate PRECISION = TCoordinate(5) * std::numeric_limits::epsilon(); + static const TCoordinate PRECISION = + TCoordinate(5) * std::numeric_limits::epsilon(); TMeanAccumulatorVec newCentres(m_Centres.size()); CCentroidComputer computer(m_Centres, newCentres); m_Points.preorderDepthFirst(computer); bool changed = false; for (std::size_t i = 0u; i < newCentres.size(); ++i) { POINT newCentre(CBasicStatistics::mean(newCentres[i])); - if ((m_Centres[i] - newCentre).euclidean() > PRECISION * m_Centres[i].euclidean()) { + if ((m_Centres[i] - newCentre).euclidean() > + PRECISION * m_Centres[i].euclidean()) { m_Centres[i] = newCentre; changed = true; } diff --git a/include/maths/CKMeansOnline.h b/include/maths/CKMeansOnline.h index 0eb82511ad..ee4119436a 100644 --- a/include/maths/CKMeansOnline.h +++ b/include/maths/CKMeansOnline.h @@ -66,7 +66,8 @@ class CKMeansOnline { //! \brief Checks if a cluster should be deleted based on its count. class CShouldDelete { public: - CShouldDelete(double minimumCategoryCount) : m_MinimumCategoryCount(minimumCategoryCount) {} + CShouldDelete(double minimumCategoryCount) + : m_MinimumCategoryCount(minimumCategoryCount) {} template bool operator()(const CLUSTER& cluster) const { @@ -84,8 +85,10 @@ class CKMeansOnline { using TFloatMeanAccumulator = typename CBasicStatistics::SSampleMean::TAccumulator; using TFloatMeanAccumulatorDoublePr = std::pair; using TFloatMeanAccumulatorDoublePrVec = std::vector; - using TDoubleMeanAccumulator = typename CBasicStatistics::SSampleMean::TAccumulator; - using TDoubleMeanVarAccumulator = typename CBasicStatistics::SSampleMeanVar::TAccumulator; + using TDoubleMeanAccumulator = + typename CBasicStatistics::SSampleMean::TAccumulator; + using TDoubleMeanVarAccumulator = + typename CBasicStatistics::SSampleMeanVar::TAccumulator; protected: //! The minimum permitted size for the clusterer. @@ -117,13 +120,15 @@ class CKMeansOnline { //! subject to this constraint so will generally hold \p k //! clusters. CKMeansOnline(std::size_t k, double decayRate = 0.0, double minimumCategoryCount = MINIMUM_CATEGORY_COUNT) - : m_K(std::max(k, MINIMUM_SPACE)), m_DecayRate(decayRate), m_MinimumCategoryCount(minimumCategoryCount) { + : m_K(std::max(k, MINIMUM_SPACE)), m_DecayRate(decayRate), + m_MinimumCategoryCount(minimumCategoryCount) { m_Clusters.reserve(m_K + MAXIMUM_BUFFER_SIZE + 1u); m_PointsBuffer.reserve(MAXIMUM_BUFFER_SIZE); } //! Create from part of a state document. - bool acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) { + bool acceptRestoreTraverser(const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser) { m_DecayRate = params.s_DecayRate; m_MinimumCategoryCount = params.s_MinimumCategoryCount; @@ -131,7 +136,8 @@ class CKMeansOnline { const std::string name = traverser.name(); RESTORE(RNG_TAG, m_Rng.fromString(traverser.value())); RESTORE(K_TAG, core::CPersistUtils::restore(K_TAG, m_K, traverser)) - RESTORE(CLUSTERS_TAG, core::CPersistUtils::restore(CLUSTERS_TAG, m_Clusters, traverser)) + RESTORE(CLUSTERS_TAG, + core::CPersistUtils::restore(CLUSTERS_TAG, m_Clusters, traverser)) RESTORE(POINTS_TAG, core::CPersistUtils::restore(POINTS_TAG, m_PointsBuffer, traverser)) } while (traverser.next()); return true; @@ -156,7 +162,9 @@ class CKMeansOnline { } //! Get the total number of clusters. - std::size_t size() const { return std::min(m_Clusters.size() + m_PointsBuffer.size(), m_K); } + std::size_t size() const { + return std::min(m_Clusters.size() + m_PointsBuffer.size(), m_K); + } //! Get the clusters being maintained. void clusters(TSphericalClusterVec& result) const { @@ -203,7 +211,8 @@ class CKMeansOnline { //! \param[out] result Filled in with the \p k means clustering //! of \p clusters. template - static bool kmeans(RNG& rng, TSphericalClusterVec& clusters, std::size_t k, TSphericalClusterVecVec& result) { + static bool + kmeans(RNG& rng, TSphericalClusterVec& clusters, std::size_t k, TSphericalClusterVecVec& result) { result.clear(); if (k == 0) { @@ -301,9 +310,11 @@ class CKMeansOnline { for (std::size_t i = 0u; i < other.m_PointsBuffer.size(); ++i) { m_Clusters.push_back(TFloatMeanAccumulatorDoublePr()); - CKMeansOnline::add(other.m_PointsBuffer[i].first, other.m_PointsBuffer[i].second, m_Clusters.back()); + CKMeansOnline::add(other.m_PointsBuffer[i].first, + other.m_PointsBuffer[i].second, m_Clusters.back()); } - m_Clusters.insert(m_Clusters.end(), other.m_Clusters.begin(), other.m_Clusters.end()); + m_Clusters.insert(m_Clusters.end(), other.m_Clusters.begin(), + other.m_Clusters.end()); this->reduce(); @@ -338,7 +349,9 @@ class CKMeansOnline { // Prune any dead categories: we're not interested in // maintaining categories with low counts. - m_Clusters.erase(std::remove_if(m_Clusters.begin(), m_Clusters.end(), CShouldDelete(m_MinimumCategoryCount)), m_Clusters.end()); + m_Clusters.erase(std::remove_if(m_Clusters.begin(), m_Clusters.end(), + CShouldDelete(m_MinimumCategoryCount)), + m_Clusters.end()); LOG_TRACE(<< "clusters = " << core::CContainerPrinter::print(m_Clusters)); } @@ -378,7 +391,8 @@ class CKMeansOnline { for (std::size_t i = 0u; i < counts.size(); ++i) { counts[i] /= Z; } - LOG_TRACE(<< "weights = " << core::CContainerPrinter::print(counts) << ", Z = " << Z << ", n = " << numberSamples); + LOG_TRACE(<< "weights = " << core::CContainerPrinter::print(counts) + << ", Z = " << Z << ", n = " << numberSamples); result.reserve(2 * numberSamples); @@ -406,7 +420,8 @@ class CKMeansOnline { LOG_TRACE(<< "weights = " << core::CContainerPrinter::print(weights)); TDoublePointVec final; - final.reserve(static_cast(std::ceil(std::accumulate(weights.begin(), weights.end(), 0.0)))); + final.reserve(static_cast( + std::ceil(std::accumulate(weights.begin(), weights.end(), 0.0)))); TDoubleMeanAccumulator sample; for (;;) { CBasicStatistics::COrderStatisticsStack nearest; @@ -439,7 +454,9 @@ class CKMeansOnline { } //! Print this classifier for debug. - std::string print() const { return core::CContainerPrinter::print(m_Clusters); } + std::string print() const { + return core::CContainerPrinter::print(m_Clusters); + } //! Get a checksum for this object. uint64_t checksum(uint64_t seed = 0) const { @@ -466,8 +483,12 @@ class CKMeansOnline { protected: //! Construct a new classifier with the specified space limit //! \p space and categories \p categories. - CKMeansOnline(std::size_t k, double decayRate, double minimumCategoryCount, TFloatMeanAccumulatorDoublePrVec& clusters) - : m_K(std::max(k, MINIMUM_SPACE)), m_DecayRate(decayRate), m_MinimumCategoryCount(minimumCategoryCount) { + CKMeansOnline(std::size_t k, + double decayRate, + double minimumCategoryCount, + TFloatMeanAccumulatorDoublePrVec& clusters) + : m_K(std::max(k, MINIMUM_SPACE)), m_DecayRate(decayRate), + m_MinimumCategoryCount(minimumCategoryCount) { m_Clusters.swap(clusters); m_Clusters.reserve(m_K + MAXIMUM_BUFFER_SIZE + 1u); m_PointsBuffer.reserve(MAXIMUM_BUFFER_SIZE); @@ -499,7 +520,8 @@ class CKMeansOnline { // Add all the points as new spherical clusters and reduce. for (std::size_t i = 0u; i < m_PointsBuffer.size(); ++i) { m_Clusters.push_back(TFloatMeanAccumulatorDoublePr()); - CKMeansOnline::add(m_PointsBuffer[i].first, m_PointsBuffer[i].second, m_Clusters.back()); + CKMeansOnline::add(m_PointsBuffer[i].first, + m_PointsBuffer[i].second, m_Clusters.back()); } m_PointsBuffer.clear(); @@ -525,7 +547,8 @@ class CKMeansOnline { } double n = CBasicStatistics::count(cluster); const TDoublePoint& m = CBasicStatistics::mean(cluster); - m_Clusters[i].first = CBasicStatistics::accumulator(TFloatCoordinate(n), TFloatPoint(m)); + m_Clusters[i].first = + CBasicStatistics::accumulator(TFloatCoordinate(n), TFloatPoint(m)); m_Clusters[i].second = variance(cluster); } @@ -540,7 +563,8 @@ class CKMeansOnline { double nc = CBasicStatistics::count(cluster.first); TDoublePoint mc = CBasicStatistics::mean(cluster.first); TDoublePoint vc(cluster.second); - TDoubleMeanVarAccumulator moments = CBasicStatistics::accumulator(nc, mc, vc) + CBasicStatistics::accumulator(nx, mx, vx); + TDoubleMeanVarAccumulator moments = CBasicStatistics::accumulator(nc, mc, vc) + + CBasicStatistics::accumulator(nx, mx, vx); TFloatCoordinate ncx = CBasicStatistics::count(moments); TFloatPoint mcx = CBasicStatistics::mean(moments); cluster.first = CBasicStatistics::accumulator(ncx, mcx); diff --git a/include/maths/CKMeansOnline1d.h b/include/maths/CKMeansOnline1d.h index 17306ab5e1..c4e8e50ff2 100644 --- a/include/maths/CKMeansOnline1d.h +++ b/include/maths/CKMeansOnline1d.h @@ -42,7 +42,8 @@ class MATHS_EXPORT CKMeansOnline1d : public CClusterer1d { CKMeansOnline1d(TNormalVec& clusters); //! Construct by traversing a state document. - CKMeansOnline1d(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser); + CKMeansOnline1d(const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser); //! \name Clusterer Contract //@{ @@ -127,7 +128,8 @@ class MATHS_EXPORT CKMeansOnline1d : public CClusterer1d { private: //! Restore by traversing a state document. - bool acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser); + bool acceptRestoreTraverser(const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser); private: //! The (fixed) clusters. diff --git a/include/maths/CKMostCorrelated.h b/include/maths/CKMostCorrelated.h index ced5f410e4..ca7b661b11 100644 --- a/include/maths/CKMostCorrelated.h +++ b/include/maths/CKMostCorrelated.h @@ -64,7 +64,8 @@ class MATHS_EXPORT CKMostCorrelated { using TVectorVec = std::vector; using TSizeVectorUMap = boost::unordered_map; using TVectorPackedBitVectorPr = std::pair; - using TSizeVectorPackedBitVectorPrUMap = boost::unordered_map; + using TSizeVectorPackedBitVectorPrUMap = + boost::unordered_map; public: CKMostCorrelated(std::size_t k, double decayRate, bool initialize = true); @@ -167,7 +168,10 @@ class MATHS_EXPORT CKMostCorrelated { //! Estimate the correlation based on the projections //! \p px and \p py. - static double correlation(const TVector& px, const CPackedBitVector& ix, const TVector& py, const CPackedBitVector& iy); + static double correlation(const TVector& px, + const CPackedBitVector& ix, + const TVector& py, + const CPackedBitVector& iy); //! Get the checksum of this object. uint64_t checksum(uint64_t seed) const; diff --git a/include/maths/CKdTree.h b/include/maths/CKdTree.h index 70caf4259f..621e597ae5 100644 --- a/include/maths/CKdTree.h +++ b/include/maths/CKdTree.h @@ -73,7 +73,9 @@ class CKdTree { class CCoordinateLess { public: CCoordinateLess(std::size_t i) : m_I(i) {} - bool operator()(const POINT& lhs, const POINT& rhs) const { return lhs(m_I) < rhs(m_I); } + bool operator()(const POINT& lhs, const POINT& rhs) const { + return lhs(m_I) < rhs(m_I); + } private: std::size_t m_I; @@ -82,7 +84,8 @@ class CKdTree { //! A node of the k-d tree. struct SNode : public NODE_DATA { SNode(SNode* parent, const POINT& point) - : NODE_DATA(), s_Parent(parent), s_LeftChild(nullptr), s_RightChild(nullptr), s_Point(point) {} + : NODE_DATA(), s_Parent(parent), s_LeftChild(nullptr), + s_RightChild(nullptr), s_Point(point) {} //! Check node invariants. bool checkInvariants(std::size_t dimension) const { @@ -96,11 +99,13 @@ class CKdTree { std::size_t coordinate = this->depth() % dimension; CCoordinateLess less(coordinate); if (s_LeftChild && less(s_Point, s_LeftChild->s_Point)) { - LOG_ERROR(<< "parent = " << s_Point << ", left child = " << s_LeftChild->s_Point << ", coordinate = " << coordinate); + LOG_ERROR(<< "parent = " << s_Point << ", left child = " + << s_LeftChild->s_Point << ", coordinate = " << coordinate); return false; } if (s_RightChild && less(s_RightChild->s_Point, s_Point)) { - LOG_ERROR(<< "parent = " << s_Point << ", right child = " << s_RightChild->s_Point << ", coordinate = " << coordinate); + LOG_ERROR(<< "parent = " << s_Point << ", right child = " + << s_RightChild->s_Point << ", coordinate = " << coordinate); return false; } return true; @@ -141,8 +146,7 @@ class CKdTree { m_Nodes.reserve(points.size()); this->buildRecursively(nullptr, // Parent pointer 0, // Split coordinate - points.begin(), - points.end()); + points.begin(), points.end()); } //! Get the number of points in the tree. @@ -156,12 +160,11 @@ class CKdTree { return nearest; } - TCoordinatePrecise distanceToNearest = std::numeric_limits::max(); - return this->nearestNeighbour(point, - m_Nodes[0], + TCoordinatePrecise distanceToNearest = + std::numeric_limits::max(); + return this->nearestNeighbour(point, m_Nodes[0], 0, // Split coordinate, - nearest, - distanceToNearest); + nearest, distanceToNearest); } //! Branch and bound search for nearest \p n neighbours of \p point. @@ -173,8 +176,7 @@ class CKdTree { } TNearestAccumulator nearest(n); - this->nearestNeighbours(point, - m_Nodes[0], + this->nearestNeighbours(point, m_Nodes[0], 0, // Split coordinate, nearest); @@ -232,11 +234,13 @@ class CKdTree { m_Nodes.push_back(SNode(parent, *median)); SNode* node = &m_Nodes.back(); if (median - begin > 0) { - SNode* leftChild = this->buildRecursively(node, (coordinate + 1) % m_Dimension, begin, median); + SNode* leftChild = this->buildRecursively( + node, (coordinate + 1) % m_Dimension, begin, median); node->s_LeftChild = leftChild; } if (end - median > 1) { - SNode* rightChild = this->buildRecursively(node, (coordinate + 1) % m_Dimension, median + 1, end); + SNode* rightChild = this->buildRecursively( + node, (coordinate + 1) % m_Dimension, median + 1, end); node->s_RightChild = rightChild; } return node; @@ -256,7 +260,8 @@ class CKdTree { } if (node.s_LeftChild || node.s_RightChild) { - TCoordinatePrecise distanceToHyperplane = point(coordinate) - node.s_Point(coordinate); + TCoordinatePrecise distanceToHyperplane = point(coordinate) - + node.s_Point(coordinate); SNode* primary = node.s_LeftChild; SNode* secondary = node.s_RightChild; @@ -265,9 +270,11 @@ class CKdTree { } std::size_t nextCoordinate = (coordinate + 1) % m_Dimension; - nearest = this->nearestNeighbour(point, *primary, nextCoordinate, nearest, distanceToNearest); + nearest = this->nearestNeighbour(point, *primary, nextCoordinate, + nearest, distanceToNearest); if (secondary && std::fabs(distanceToHyperplane) < distanceToNearest) { - nearest = this->nearestNeighbour(point, *secondary, nextCoordinate, nearest, distanceToNearest); + nearest = this->nearestNeighbour(point, *secondary, nextCoordinate, + nearest, distanceToNearest); } } @@ -275,13 +282,17 @@ class CKdTree { } //! Recursively find the nearest point to \p point. - void nearestNeighbours(const POINT& point, const SNode& node, std::size_t coordinate, TNearestAccumulator& nearest) const { + void nearestNeighbours(const POINT& point, + const SNode& node, + std::size_t coordinate, + TNearestAccumulator& nearest) const { TCoordinatePrecise distance = kdtree_detail::euclidean(point - node.s_Point); nearest.add(TCoordinatePrecisePointPr(distance, node.s_Point)); if (node.s_LeftChild || node.s_RightChild) { - TCoordinatePrecise distanceToHyperplane = point(coordinate) - node.s_Point(coordinate); + TCoordinatePrecise distanceToHyperplane = point(coordinate) - + node.s_Point(coordinate); SNode* primary = node.s_LeftChild; SNode* secondary = node.s_RightChild; diff --git a/include/maths/CLassoLogisticRegression.h b/include/maths/CLassoLogisticRegression.h index 58076e66df..28301e4c7d 100644 --- a/include/maths/CLassoLogisticRegression.h +++ b/include/maths/CLassoLogisticRegression.h @@ -43,7 +43,9 @@ class MATHS_EXPORT CDenseMatrix { void swap(CDenseMatrix& other); //! Get the number of rows. - std::size_t rows() const { return m_Elements.empty() ? 0 : m_Elements[0].size(); } + std::size_t rows() const { + return m_Elements.empty() ? 0 : m_Elements[0].size(); + } //! Get the number of columns. std::size_t columns() const { return m_Elements.size(); } //! Get the beginning of the rows present for the j'th column. @@ -51,7 +53,9 @@ class MATHS_EXPORT CDenseMatrix { //! Get the end of the rows present for the j'th column. iterator endRows(std::size_t j) const { return m_Elements[j].end(); } //! Get the row represented by the j'th column row iterator. - std::size_t row(iterator itr, std::size_t j) const { return itr - m_Elements[j].begin(); } + std::size_t row(iterator itr, std::size_t j) const { + return itr - m_Elements[j].begin(); + } //! Get the element represented by the iterator. double element(iterator itr) const { return *itr; } @@ -81,14 +85,18 @@ class MATHS_EXPORT CSparseMatrix { std::size_t columns() const { return m_Columns; } //! Get the beginning of the rows present for the j'th column. iterator beginRows(std::size_t j) const { - return std::lower_bound(m_Elements.begin(), m_Elements.end(), TSizeSizePr(j, size_t(0)), COrderings::SFirstLess()); + return std::lower_bound(m_Elements.begin(), m_Elements.end(), + TSizeSizePr(j, size_t(0)), COrderings::SFirstLess()); } //! Get the end of the rows present for the j'th column. iterator endRows(std::size_t j) const { - return std::upper_bound(m_Elements.begin(), m_Elements.end(), TSizeSizePr(j, m_Rows), COrderings::SFirstLess()); + return std::upper_bound(m_Elements.begin(), m_Elements.end(), + TSizeSizePr(j, m_Rows), COrderings::SFirstLess()); } //! Get the row represented by the j'th column row iterator. - std::size_t row(iterator itr, std::size_t /*j*/) const { return itr->first.second; } + std::size_t row(iterator itr, std::size_t /*j*/) const { + return itr->first.second; + } //! Get the element represented by the iterator. double element(iterator itr) const { return itr->second; } @@ -142,7 +150,11 @@ class MATHS_EXPORT CCyclicCoordinateDescent { //! regression. //! \param[out] numberIterations The number of iterations of //! the main optimization loop used. - bool run(const CDenseMatrix& x, const TDoubleVec& y, const TDoubleVec& lambda, TDoubleVec& beta, std::size_t& numberIterations); + bool run(const CDenseMatrix& x, + const TDoubleVec& y, + const TDoubleVec& lambda, + TDoubleVec& beta, + std::size_t& numberIterations); //! Compute the regression parameters for sparse feature vectors. //! @@ -153,7 +165,11 @@ class MATHS_EXPORT CCyclicCoordinateDescent { //! regression. //! \param[out] numberIterations The number of iterations of //! the main optimization loop used. - bool run(const CSparseMatrix& x, const TDoubleVec& y, const TDoubleVec& lambda, TDoubleVec& beta, std::size_t& numberIterations); + bool run(const CSparseMatrix& x, + const TDoubleVec& y, + const TDoubleVec& lambda, + TDoubleVec& beta, + std::size_t& numberIterations); //! Compute the regression parameters for dense feature vectors //! using the input value of beta to initialize the optimization @@ -166,8 +182,11 @@ class MATHS_EXPORT CCyclicCoordinateDescent { //! regression. //! \param[out] numberIterations The number of iterations of //! the main optimization loop used. - bool - runIncremental(const CDenseMatrix& x, const TDoubleVec& y, const TDoubleVec& lambda, TDoubleVec& beta, std::size_t& numberIterations); + bool runIncremental(const CDenseMatrix& x, + const TDoubleVec& y, + const TDoubleVec& lambda, + TDoubleVec& beta, + std::size_t& numberIterations); //! Compute the regression parameters for sparse feature vectors //! using the input value of beta to initialize the optimization @@ -180,8 +199,11 @@ class MATHS_EXPORT CCyclicCoordinateDescent { //! regression. //! \param[out] numberIterations The number of iterations of //! the main optimization loop used. - bool - runIncremental(const CSparseMatrix& x, const TDoubleVec& y, const TDoubleVec& lambda, TDoubleVec& beta, std::size_t& numberIterations); + bool runIncremental(const CSparseMatrix& x, + const TDoubleVec& y, + const TDoubleVec& lambda, + TDoubleVec& beta, + std::size_t& numberIterations); private: //! Check the validity of the training data and the prior parameters. @@ -337,7 +359,8 @@ using TSparseStorage = std::vector>>; //! IMPLEMENTATION DECISIONS:\n //! This uses a dense encoding of the feature vector for the case that //! they are small and mostly non-zero. -class MATHS_EXPORT CLassoLogisticRegressionDense : public CLassoLogisticRegression { +class MATHS_EXPORT CLassoLogisticRegressionDense + : public CLassoLogisticRegression { public: using TSizeDoublePr = std::pair; using TSizeDoublePrVec = std::vector; diff --git a/include/maths/CLinearAlgebra.h b/include/maths/CLinearAlgebra.h index ba8d2660f9..e5a265049f 100644 --- a/include/maths/CLinearAlgebra.h +++ b/include/maths/CLinearAlgebra.h @@ -55,7 +55,8 @@ struct SSymmetricMatrix { //! Set this vector equal to \p other. template void assign(const SSymmetricMatrix& other) { - std::copy(other.m_LowerTriangle.begin(), other.m_LowerTriangle.end(), m_LowerTriangle.begin()); + std::copy(other.m_LowerTriangle.begin(), other.m_LowerTriangle.end(), + m_LowerTriangle.begin()); } //! Create from a delimited string. @@ -123,14 +124,20 @@ struct SSymmetricMatrix { } //! Check if two matrices are identically equal. - bool equal(const SSymmetricMatrix& other) const { return m_LowerTriangle == other.m_LowerTriangle; } + bool equal(const SSymmetricMatrix& other) const { + return m_LowerTriangle == other.m_LowerTriangle; + } //! Lexicographical total ordering. - bool less(const SSymmetricMatrix& rhs) const { return m_LowerTriangle < rhs.m_LowerTriangle; } + bool less(const SSymmetricMatrix& rhs) const { + return m_LowerTriangle < rhs.m_LowerTriangle; + } //! Check if this is zero. bool isZero() const { - return std::find_if(m_LowerTriangle.begin(), m_LowerTriangle.end(), [](double ei) { return ei != 0.0; }) == m_LowerTriangle.end(); + return std::find_if(m_LowerTriangle.begin(), m_LowerTriangle.end(), + [](double ei) { return ei != 0.0; }) == + m_LowerTriangle.end(); } //! Get the matrix diagonal. @@ -179,7 +186,8 @@ struct SSymmetricMatrix { uint64_t checksum() const { uint64_t result = 0u; for (std::size_t i = 0u; i < m_LowerTriangle.size(); ++i) { - result = core::CHashing::hashCombine(result, static_cast(m_LowerTriangle[i])); + result = core::CHashing::hashCombine( + result, static_cast(m_LowerTriangle[i])); } return result; } @@ -227,13 +235,9 @@ class CSymmetricMatrixNxN CSymmetricMatrixNxN, boost::partially_ordered< CSymmetricMatrixNxN, - boost::addable< - CSymmetricMatrixNxN, - boost::subtractable< - CSymmetricMatrixNxN, - boost::multipliable< - CSymmetricMatrixNxN, - boost::multipliable2, T, boost::dividable2, T>>>>>>>, + boost::addable, + boost::subtractable, + boost::multipliable, boost::multipliable2, T, boost::dividable2, T>>>>>>>, private linear_algebra_detail::SSymmetricMatrix>, private linear_algebra_detail::CBoundsCheck::InRange { private: @@ -249,11 +253,15 @@ class CSymmetricMatrixNxN public: //! See core::CMemory. - static bool dynamicSizeAlwaysZero() { return core::memory_detail::SDynamicSizeAlwaysZero::value(); } + static bool dynamicSizeAlwaysZero() { + return core::memory_detail::SDynamicSizeAlwaysZero::value(); + } public: //! Set to multiple of ones matrix. - explicit CSymmetricMatrixNxN(T v = T(0)) { std::fill_n(&TBase::m_LowerTriangle[0], N * (N + 1) / 2, v); } + explicit CSymmetricMatrixNxN(T v = T(0)) { + std::fill_n(&TBase::m_LowerTriangle[0], N * (N + 1) / 2, v); + } //! Construct from C-style array of arrays. explicit CSymmetricMatrixNxN(const TArray& m) { @@ -317,7 +325,9 @@ class CSymmetricMatrixNxN //! \name Persistence //@{ //! Create from a delimited string. - bool fromDelimited(const std::string& str) { return this->TBase::fromDelimited(str); } + bool fromDelimited(const std::string& str) { + return this->TBase::fromDelimited(str); + } //! Convert to a delimited string. std::string toDelimited() const { return this->TBase::toDelimited(); } @@ -330,10 +340,14 @@ class CSymmetricMatrixNxN std::size_t columns() const { return N; } //! Get the i,j 'th component (no bounds checking). - inline T operator()(std::size_t i, std::size_t j) const { return this->element(i, j); } + inline T operator()(std::size_t i, std::size_t j) const { + return this->element(i, j); + } //! Get the i,j 'th component (no bounds checking). - inline T& operator()(std::size_t i, std::size_t j) { return this->element(i, j); } + inline T& operator()(std::size_t i, std::size_t j) { + return this->element(i, j); + } //! Get an iterator over the elements. TConstIterator begin() const { return TBase::m_LowerTriangle.begin(); } @@ -388,10 +402,14 @@ class CSymmetricMatrixNxN // supported. //! Check if two matrices are identically equal. - bool operator==(const CSymmetricMatrixNxN& other) const { return this->equal(other.base()); } + bool operator==(const CSymmetricMatrixNxN& other) const { + return this->equal(other.base()); + } //! Lexicographical total ordering. - bool operator<(const CSymmetricMatrixNxN& rhs) const { return this->less(rhs.base()); } + bool operator<(const CSymmetricMatrixNxN& rhs) const { + return this->less(rhs.base()); + } //! Check if this is zero. bool isZero() const { return this->TBase::isZero(); } @@ -440,7 +458,9 @@ class CSymmetricMatrixNxN //! \brief Gets a zero symmetric matrix with specified dimension. template struct SZero> { - static CSymmetricMatrixNxN get(std::size_t /*dimension*/) { return CSymmetricMatrixNxN(T(0)); } + static CSymmetricMatrixNxN get(std::size_t /*dimension*/) { + return CSymmetricMatrixNxN(T(0)); + } }; // ************************ HEAP SYMMETRIC MATRIX ************************ @@ -480,12 +500,8 @@ class CSymmetricMatrix CSymmetricMatrix, boost::partially_ordered< CSymmetricMatrix, - boost::addable< - CSymmetricMatrix, - boost::subtractable< - CSymmetricMatrix, - boost::multipliable, - boost::multipliable2, T, boost::dividable2, T>>>>>>>, + boost::addable, + boost::subtractable, boost::multipliable, boost::multipliable2, T, boost::dividable2, T>>>>>>>, private linear_algebra_detail::SSymmetricMatrix> { private: using TBase = linear_algebra_detail::SSymmetricMatrix>; @@ -516,7 +532,8 @@ class CSymmetricMatrix //! Construct from a small vector of small vectors. template - explicit CSymmetricMatrix(const core::CSmallVectorBase>& m) : m_D(m.size()) { + explicit CSymmetricMatrix(const core::CSmallVectorBase>& m) + : m_D(m.size()) { TBase::m_LowerTriangle.resize(m_D * (m_D + 1) / 2); for (std::size_t i = 0u, i_ = 0u; i < m_D; ++i) { for (std::size_t j = 0u; j <= i; ++j, ++i_) { @@ -588,10 +605,14 @@ class CSymmetricMatrix std::size_t columns() const { return m_D; } //! Get the i,j 'th component (no bounds checking). - inline T operator()(std::size_t i, std::size_t j) const { return this->element(i, j); } + inline T operator()(std::size_t i, std::size_t j) const { + return this->element(i, j); + } //! Get the i,j 'th component (no bounds checking). - inline T& operator()(std::size_t i, std::size_t j) { return this->element(i, j); } + inline T& operator()(std::size_t i, std::size_t j) { + return this->element(i, j); + } //! Get an iterator over the elements. TConstIterator begin() const { return TBase::m_X.begin(); } @@ -646,10 +667,14 @@ class CSymmetricMatrix // supported. //! Check if two matrices are identically equal. - bool operator==(const CSymmetricMatrix& other) const { return this->equal(other.base()); } + bool operator==(const CSymmetricMatrix& other) const { + return this->equal(other.base()); + } //! Lexicographical total ordering. - bool operator<(const CSymmetricMatrix& rhs) const { return this->less(rhs.base()); } + bool operator<(const CSymmetricMatrix& rhs) const { + return this->less(rhs.base()); + } //! Check if this is zero. bool isZero() const { return this->TBase::isZero(); } @@ -692,12 +717,16 @@ class CSymmetricMatrix } //! Get a checksum for the matrix. - uint64_t checksum() const { return core::CHashing::hashCombine(this->TBase::checksum(), static_cast(m_D)); } + uint64_t checksum() const { + return core::CHashing::hashCombine(this->TBase::checksum(), + static_cast(m_D)); + } private: //! Compute the dimension from the number of elements. std::size_t dimension(std::size_t n) const { - return static_cast((std::sqrt(8.0 * static_cast(n) + 1.0) - 1.0) / 2.0 + 0.5); + return static_cast( + (std::sqrt(8.0 * static_cast(n) + 1.0) - 1.0) / 2.0 + 0.5); } private: @@ -708,7 +737,9 @@ class CSymmetricMatrix //! \brief Gets a zero symmetric matrix with specified dimension. template struct SZero> { - static CSymmetricMatrix get(std::size_t dimension) { return CSymmetricMatrix(dimension, T(0)); } + static CSymmetricMatrix get(std::size_t dimension) { + return CSymmetricMatrix(dimension, T(0)); + } }; namespace linear_algebra_detail { @@ -793,7 +824,8 @@ struct SVector { //! Check if this is zero. bool isZero() const { - return std::find_if(m_X.begin(), m_X.end(), [](double xi) { return xi != 0.0; }) == m_X.end(); + return std::find_if(m_X.begin(), m_X.end(), + [](double xi) { return xi != 0.0; }) == m_X.end(); } //! Inner product. @@ -880,13 +912,7 @@ class CVectorNx1 boost::partially_ordered< CVectorNx1, boost::addable, - boost::subtractable< - CVectorNx1, - boost::multipliable< - CVectorNx1, - boost::multipliable2, - T, - boost::dividable, boost::dividable2, T>>>>>>>>, + boost::subtractable, boost::multipliable, boost::multipliable2, T, boost::dividable, boost::dividable2, T>>>>>>>>, private linear_algebra_detail::SVector>, private linear_algebra_detail::CBoundsCheck::InRange { private: @@ -902,7 +928,9 @@ class CVectorNx1 public: //! See core::CMemory. - static bool dynamicSizeAlwaysZero() { return core::memory_detail::SDynamicSizeAlwaysZero::value(); } + static bool dynamicSizeAlwaysZero() { + return core::memory_detail::SDynamicSizeAlwaysZero::value(); + } public: //! Set to multiple of ones vector. @@ -970,7 +998,9 @@ class CVectorNx1 //! \name Persistence //@{ //! Create from a delimited string. - bool fromDelimited(const std::string& str) { return this->TBase::fromDelimited(str); } + bool fromDelimited(const std::string& str) { + return this->TBase::fromDelimited(str); + } //! Convert to a delimited string. std::string toDelimited() const { return this->TBase::toDelimited(); } @@ -1035,16 +1065,22 @@ class CVectorNx1 } //! Check if two vectors are identically equal. - bool operator==(const CVectorNx1& other) const { return this->equal(other.base()); } + bool operator==(const CVectorNx1& other) const { + return this->equal(other.base()); + } //! Lexicographical total ordering. - bool operator<(const CVectorNx1& rhs) const { return this->less(rhs.base()); } + bool operator<(const CVectorNx1& rhs) const { + return this->less(rhs.base()); + } //! Check if this is zero. bool isZero() const { return this->TBase::isZero(); } //! Inner product. - double inner(const CVectorNx1& covector) const { return this->TBase::inner(covector.base()); } + double inner(const CVectorNx1& covector) const { + return this->TBase::inner(covector.base()); + } //! Inner product. template @@ -1055,12 +1091,16 @@ class CVectorNx1 //! Outer product. //! //! \note The copy should be avoided by RVO. - CSymmetricMatrixNxN outer() const { return CSymmetricMatrixNxN(E_OuterProduct, *this); } + CSymmetricMatrixNxN outer() const { + return CSymmetricMatrixNxN(E_OuterProduct, *this); + } //! A diagonal matrix. //! //! \note The copy should be avoided by RVO. - CSymmetricMatrixNxN diagonal() const { return CSymmetricMatrixNxN(E_Diagonal, *this); } + CSymmetricMatrixNxN diagonal() const { + return CSymmetricMatrixNxN(E_Diagonal, *this); + } //! L1 norm. double L1() const { return this->TBase::L1(); } @@ -1110,7 +1150,8 @@ class CVectorNx1 //! Construct from the outer product of a vector with itself. template -CSymmetricMatrixNxN::CSymmetricMatrixNxN(ESymmetricMatrixType type, const CVectorNx1& x) { +CSymmetricMatrixNxN::CSymmetricMatrixNxN(ESymmetricMatrixType type, + const CVectorNx1& x) { switch (type) { case E_OuterProduct: for (std::size_t i = 0u, i_ = 0u; i < N; ++i) { @@ -1132,7 +1173,9 @@ CSymmetricMatrixNxN::CSymmetricMatrixNxN(ESymmetricMatrixType type, const //! \brief Gets a zero vector with specified dimension. template struct SZero> { - static CVectorNx1 get(std::size_t /*dimension*/) { return CVectorNx1(T(0)); } + static CVectorNx1 get(std::size_t /*dimension*/) { + return CVectorNx1(T(0)); + } }; // ************************ HEAP VECTOR ************************ @@ -1163,13 +1206,7 @@ class CVector CVector, boost::partially_ordered< CVector, - boost::addable< - CVector, - boost::subtractable< - CVector, - boost::multipliable< - CVector, - boost::multipliable2, T, boost::dividable, boost::dividable2, T>>>>>>>>, + boost::addable, boost::subtractable, boost::multipliable, boost::multipliable2, T, boost::dividable, boost::dividable2, T>>>>>>>>, private linear_algebra_detail::SVector> { private: using TBase = linear_algebra_detail::SVector>; @@ -1200,7 +1237,9 @@ class CVector explicit CVector(const TArray& v) { TBase::m_X = v; } //! Construct from a vector. - explicit CVector(const core::CSmallVectorBase& v) { TBase::m_X.assign(v.begin(), v.end()); } + explicit CVector(const core::CSmallVectorBase& v) { + TBase::m_X.assign(v.begin(), v.end()); + } //! Construct from the range [\p begin, \p end). template @@ -1241,7 +1280,9 @@ class CVector //! Extend the vector to dimension \p d adding components //! initialized to \p v. - void extend(std::size_t d, T v = T(0)) { TBase::m_X.resize(this->dimension() + d, v); } + void extend(std::size_t d, T v = T(0)) { + TBase::m_X.resize(this->dimension() + d, v); + } //! Extend the vector adding components initialized to \p v. template @@ -1252,7 +1293,9 @@ class CVector //! \name Persistence //@{ //! Create from a delimited string. - bool fromDelimited(const std::string& str) { return this->TBase::fromDelimited(str); } + bool fromDelimited(const std::string& str) { + return this->TBase::fromDelimited(str); + } //! Persist state to delimited values. std::string toDelimited() const { return this->TBase::toDelimited(); } @@ -1317,7 +1360,9 @@ class CVector } //! Check if two vectors are identically equal. - bool operator==(const CVector& other) const { return this->equal(other.base()); } + bool operator==(const CVector& other) const { + return this->equal(other.base()); + } //! Lexicographical total ordering. bool operator<(const CVector& rhs) const { return this->less(rhs.base()); } @@ -1326,7 +1371,9 @@ class CVector bool isZero() const { return this->TBase::isZero(); } //! Inner product. - double inner(const CVector& covector) const { return this->TBase::inner(covector.base()); } + double inner(const CVector& covector) const { + return this->TBase::inner(covector.base()); + } //! Inner product. template @@ -1337,12 +1384,16 @@ class CVector //! Outer product. //! //! \note The copy should be avoided by RVO. - CSymmetricMatrix outer() const { return CSymmetricMatrix(E_OuterProduct, *this); } + CSymmetricMatrix outer() const { + return CSymmetricMatrix(E_OuterProduct, *this); + } //! A diagonal matrix. //! //! \note The copy should be avoided by RVO. - CSymmetricMatrix diagonal() const { return CSymmetricMatrix(E_Diagonal, *this); } + CSymmetricMatrix diagonal() const { + return CSymmetricMatrix(E_Diagonal, *this); + } //! L1 norm. double L1() const { return this->TBase::L1(); } @@ -1413,7 +1464,9 @@ CSymmetricMatrix::CSymmetricMatrix(ESymmetricMatrixType type, const CVector struct SZero> { - static CVector get(std::size_t dimension) { return CVector(dimension, T(0)); } + static CVector get(std::size_t dimension) { + return CVector(dimension, T(0)); + } }; // ************************ FREE FUNCTIONS ************************ diff --git a/include/maths/CLinearAlgebraEigen.h b/include/maths/CLinearAlgebraEigen.h index 483b407c7c..bb4ba18f75 100644 --- a/include/maths/CLinearAlgebraEigen.h +++ b/include/maths/CLinearAlgebraEigen.h @@ -20,16 +20,17 @@ #include namespace Eigen { -#define LESS_OR_GREATER(l, r) \ - if (l < r) { \ - return true; \ - } else if (r > l) { \ - return false; \ +#define LESS_OR_GREATER(l, r) \ + if (l < r) { \ + return true; \ + } else if (r > l) { \ + return false; \ } //! Less than on Eigen sparse matrix. template -bool operator<(const SparseMatrix& lhs, const SparseMatrix& rhs) { +bool operator<(const SparseMatrix& lhs, + const SparseMatrix& rhs) { LESS_OR_GREATER(lhs.rows(), rhs.rows()) LESS_OR_GREATER(lhs.cols(), rhs.cols()) for (STORAGE_INDEX i = 0; i < lhs.rows(); ++i) { @@ -42,7 +43,8 @@ bool operator<(const SparseMatrix& lhs, const Spar //! Less than on Eigen sparse vector. template -bool operator<(const SparseVector& lhs, const SparseVector& rhs) { +bool operator<(const SparseVector& lhs, + const SparseVector& rhs) { LESS_OR_GREATER(lhs.size(), rhs.size()) for (STORAGE_INDEX i = 0; i < lhs.size(); ++i) { LESS_OR_GREATER(lhs.coeff(i), rhs(i)) @@ -67,13 +69,15 @@ bool operator<(const Matrix& lh //! Free swap picked up by std:: algorithms etc. template -void swap(SparseVector& lhs, SparseVector& rhs) { +void swap(SparseVector& lhs, + SparseVector& rhs) { lhs.swap(rhs); } //! Free swap picked up by std:: algorithms etc. template -void swap(Matrix& lhs, Matrix& rhs) { +void swap(Matrix& lhs, + Matrix& rhs) { lhs.swap(rhs); } @@ -90,7 +94,9 @@ using CSparseMatrix = Eigen::SparseMatrix; //! \brief Gets a zero sparse matrix with specified dimensions. template struct SZero> { - static CSparseMatrix get(std::ptrdiff_t rows, std::ptrdiff_t cols) { return CSparseMatrix(rows, cols); } + static CSparseMatrix get(std::ptrdiff_t rows, std::ptrdiff_t cols) { + return CSparseMatrix(rows, cols); + } }; //! The type of an element of a sparse matrix in coordinate form. @@ -104,7 +110,9 @@ using CSparseVector = Eigen::SparseVector; //! \brief Gets a zero sparse vector with specified dimension. template struct SZero> { - static CSparseVector get(std::ptrdiff_t dimension) { return CSparseVector(dimension); } + static CSparseVector get(std::ptrdiff_t dimension) { + return CSparseVector(dimension); + } }; //! The type of an element of a sparse vector in coordinate form. @@ -113,7 +121,8 @@ using CSparseVectorCoordinate = Eigen::Triplet; //! Create a tuple with which to initialize a sparse matrix. template -inline CSparseMatrixElement matrixElement(std::ptrdiff_t row, std::ptrdiff_t column, SCALAR value) { +inline CSparseMatrixElement +matrixElement(std::ptrdiff_t row, std::ptrdiff_t column, SCALAR value) { return CSparseMatrixElement(row, column, value); } @@ -125,15 +134,22 @@ inline CSparseVectorCoordinate vectorCoordinate(std::ptrdiff_t row, SCAL //! \brief Adapts Eigen::SparseVector::InnerIterator for use with STL. template -class CSparseVectorIndexIterator : public std::iterator { - CSparseVectorIndexIterator(const CSparseVector& vector, std::size_t index) : m_Vector(&vector), m_Base(vector, index) {} +class CSparseVectorIndexIterator + : public std::iterator { + CSparseVectorIndexIterator(const CSparseVector& vector, std::size_t index) + : m_Vector(&vector), m_Base(vector, index) {} bool operator==(const CSparseVectorIndexIterator& rhs) const { - return m_Vector == rhs.m_Vector && m_Base.row() == rhs.m_Base.row() && m_Base.col() == rhs.m_Base.col(); + return m_Vector == rhs.m_Vector && m_Base.row() == rhs.m_Base.row() && + m_Base.col() == rhs.m_Base.col(); + } + bool operator!=(const CSparseVectorIndexIterator& rhs) const { + return !(*this == rhs); } - bool operator!=(const CSparseVectorIndexIterator& rhs) const { return !(*this == rhs); } - std::ptrdiff_t operator*() const { return std::max(m_Base.row(), m_Base.col()); } + std::ptrdiff_t operator*() const { + return std::max(m_Base.row(), m_Base.col()); + } CSparseVectorIndexIterator& operator++() { ++m_Base; @@ -155,13 +171,15 @@ class CSparseVectorIndexIterator : public std::iterator -CSparseVectorIndexIterator beginIndices(const CSparseVector& vector) { +CSparseVectorIndexIterator +beginIndices(const CSparseVector& vector) { return CSparseVectorIndexIterator(vector, 0); } //! Get the end iterator of the indices of \p vector. template -CSparseVectorIndexIterator endIndices(const CSparseVector& vector) { +CSparseVectorIndexIterator +endIndices(const CSparseVector& vector) { return CSparseVectorIndexIterator(vector, vector.data().size()); } @@ -172,7 +190,9 @@ using CDenseMatrix = Eigen::Matrix; //! \brief Gets a zero dense vector with specified dimension. template struct SZero> { - static CDenseMatrix get(std::ptrdiff_t rows, std::ptrdiff_t cols) { return CDenseMatrix::Zero(rows, cols); } + static CDenseMatrix get(std::ptrdiff_t rows, std::ptrdiff_t cols) { + return CDenseMatrix::Zero(rows, cols); + } }; //! Rename to follow our conventions and add to ml::maths. @@ -182,7 +202,9 @@ using CDenseVector = Eigen::Matrix; //! \brief Gets a zero dense vector with specified dimension. template struct SZero> { - static CDenseVector get(std::ptrdiff_t dimension) { return CDenseVector::Zero(dimension); } + static CDenseVector get(std::ptrdiff_t dimension) { + return CDenseVector::Zero(dimension); + } }; //! \brief Eigen matrix typedef. @@ -277,7 +299,9 @@ class CDenseMatrixInitializer { std::size_t rows() const { return m_Type->rows(); } - double get(std::size_t i, std::size_t j) const { return (m_Type->template selfadjointView())(i, j); } + double get(std::size_t i, std::size_t j) const { + return (m_Type->template selfadjointView())(i, j); + } private: const MATRIX* m_Type; diff --git a/include/maths/CLinearAlgebraPersist.h b/include/maths/CLinearAlgebraPersist.h index b9e59da1d5..f8f46abf18 100644 --- a/include/maths/CLinearAlgebraPersist.h +++ b/include/maths/CLinearAlgebraPersist.h @@ -22,7 +22,9 @@ struct SFromString { bool operator()(const std::string& token, T& value) const { return core::CStringUtils::stringToType(token, value); } - bool operator()(const std::string& token, CFloatStorage& value) const { return value.fromString(token); } + bool operator()(const std::string& token, CFloatStorage& value) const { + return value.fromString(token); + } }; //! \brief Converts a vector component / matrix element to a string. @@ -31,18 +33,24 @@ struct SToString { std::string operator()(const T& value) const { return core::CStringUtils::typeToString(value); } - std::string operator()(double value) const { return core::CStringUtils::typeToStringPrecise(value, core::CIEEE754::E_DoublePrecision); } - std::string operator()(CFloatStorage value) const { return value.toString(); } + std::string operator()(double value) const { + return core::CStringUtils::typeToStringPrecise(value, core::CIEEE754::E_DoublePrecision); + } + std::string operator()(CFloatStorage value) const { + return value.toString(); + } }; template bool SSymmetricMatrix::fromDelimited(const std::string& str) { - return core::CPersistUtils::fromString(str, SFromString(), m_LowerTriangle, CLinearAlgebra::DELIMITER); + return core::CPersistUtils::fromString(str, SFromString(), m_LowerTriangle, + CLinearAlgebra::DELIMITER); } template std::string SSymmetricMatrix::toDelimited() const { - return core::CPersistUtils::toString(m_LowerTriangle, SToString(), CLinearAlgebra::DELIMITER); + return core::CPersistUtils::toString(m_LowerTriangle, SToString(), + CLinearAlgebra::DELIMITER); } template diff --git a/include/maths/CLinearAlgebraTools.h b/include/maths/CLinearAlgebraTools.h index 1b23ed1939..d39d79117d 100644 --- a/include/maths/CLinearAlgebraTools.h +++ b/include/maths/CLinearAlgebraTools.h @@ -221,13 +221,11 @@ struct SFabs { } }; -#define INVERSE_QUADRATIC_PRODUCT(T, N) \ - MATHS_EXPORT \ - maths_t::EFloatingPointErrorStatus inverseQuadraticProduct(std::size_t d, \ - const CSymmetricMatrixNxN& covariance, \ - const CVectorNx1& residual, \ - double& result, \ - bool ignoreSingularSubspace) +#define INVERSE_QUADRATIC_PRODUCT(T, N) \ + MATHS_EXPORT \ + maths_t::EFloatingPointErrorStatus inverseQuadraticProduct( \ + std::size_t d, const CSymmetricMatrixNxN& covariance, \ + const CVectorNx1& residual, double& result, bool ignoreSingularSubspace) INVERSE_QUADRATIC_PRODUCT(CFloatStorage, 2); INVERSE_QUADRATIC_PRODUCT(CFloatStorage, 3); INVERSE_QUADRATIC_PRODUCT(CFloatStorage, 4); @@ -238,25 +236,25 @@ INVERSE_QUADRATIC_PRODUCT(double, 4); INVERSE_QUADRATIC_PRODUCT(double, 5); #undef INVERSE_QUADRATIC_PRODUCT MATHS_EXPORT -maths_t::EFloatingPointErrorStatus inverseQuadraticProduct(std::size_t d, - const CSymmetricMatrix& covariance, - const CVector& residual, - double& result, - bool ignoreSingularSubspace); +maths_t::EFloatingPointErrorStatus +inverseQuadraticProduct(std::size_t d, + const CSymmetricMatrix& covariance, + const CVector& residual, + double& result, + bool ignoreSingularSubspace); MATHS_EXPORT -maths_t::EFloatingPointErrorStatus inverseQuadraticProduct(std::size_t d, - const CSymmetricMatrix& covariance, - const CVector& residual, - double& result, - bool ignoreSingularSubspace); - -#define GAUSSIAN_LOG_LIKELIHOOD(T, N) \ - MATHS_EXPORT \ - maths_t::EFloatingPointErrorStatus gaussianLogLikelihood(std::size_t d, \ - const CSymmetricMatrixNxN& covariance, \ - const CVectorNx1& residual, \ - double& result, \ - bool ignoreSingularSubspace) +maths_t::EFloatingPointErrorStatus +inverseQuadraticProduct(std::size_t d, + const CSymmetricMatrix& covariance, + const CVector& residual, + double& result, + bool ignoreSingularSubspace); + +#define GAUSSIAN_LOG_LIKELIHOOD(T, N) \ + MATHS_EXPORT \ + maths_t::EFloatingPointErrorStatus gaussianLogLikelihood( \ + std::size_t d, const CSymmetricMatrixNxN& covariance, \ + const CVectorNx1& residual, double& result, bool ignoreSingularSubspace) GAUSSIAN_LOG_LIKELIHOOD(CFloatStorage, 2); GAUSSIAN_LOG_LIKELIHOOD(CFloatStorage, 3); GAUSSIAN_LOG_LIKELIHOOD(CFloatStorage, 4); @@ -267,24 +265,25 @@ GAUSSIAN_LOG_LIKELIHOOD(double, 4); GAUSSIAN_LOG_LIKELIHOOD(double, 5); #undef GAUSSIAN_LOG_LIKELIHOOD MATHS_EXPORT -maths_t::EFloatingPointErrorStatus gaussianLogLikelihood(std::size_t d, - const CSymmetricMatrix& covariance, - const CVector& residual, - double& result, - bool ignoreSingularSubspace); +maths_t::EFloatingPointErrorStatus +gaussianLogLikelihood(std::size_t d, + const CSymmetricMatrix& covariance, + const CVector& residual, + double& result, + bool ignoreSingularSubspace); MATHS_EXPORT -maths_t::EFloatingPointErrorStatus gaussianLogLikelihood(std::size_t d, - const CSymmetricMatrix& covariance, - const CVector& residual, - double& result, - bool ignoreSingularSubspace); +maths_t::EFloatingPointErrorStatus +gaussianLogLikelihood(std::size_t d, + const CSymmetricMatrix& covariance, + const CVector& residual, + double& result, + bool ignoreSingularSubspace); //! Shared implementation of Gaussian sampling. -#define SAMPLE_GAUSSIAN(T, N) \ - MATHS_EXPORT \ - void sampleGaussian(std::size_t n, \ - const CVectorNx1& mean, \ - const CSymmetricMatrixNxN& covariance, \ +#define SAMPLE_GAUSSIAN(T, N) \ + MATHS_EXPORT \ + void sampleGaussian(std::size_t n, const CVectorNx1& mean, \ + const CSymmetricMatrixNxN& covariance, \ std::vector>& result) SAMPLE_GAUSSIAN(CFloatStorage, 2); SAMPLE_GAUSSIAN(CFloatStorage, 3); @@ -307,10 +306,11 @@ void sampleGaussian(std::size_t n, std::vector>& result); //! Shared implementation of the log-determinant function. -#define LOG_DETERMINANT(T, N) \ - MATHS_EXPORT \ - maths_t::EFloatingPointErrorStatus logDeterminant( \ - std::size_t d, const CSymmetricMatrixNxN& matrix, double& result, bool ignoreSingularSubspace) +#define LOG_DETERMINANT(T, N) \ + MATHS_EXPORT \ + maths_t::EFloatingPointErrorStatus logDeterminant( \ + std::size_t d, const CSymmetricMatrixNxN& matrix, \ + double& result, bool ignoreSingularSubspace) LOG_DETERMINANT(CFloatStorage, 2); LOG_DETERMINANT(CFloatStorage, 3); LOG_DETERMINANT(CFloatStorage, 4); @@ -321,11 +321,15 @@ LOG_DETERMINANT(double, 4); LOG_DETERMINANT(double, 5); #undef LOG_DETERMINANT MATHS_EXPORT -maths_t::EFloatingPointErrorStatus -logDeterminant(std::size_t d, const CSymmetricMatrix& matrix, double& result, bool ignoreSingularSubspace); +maths_t::EFloatingPointErrorStatus logDeterminant(std::size_t d, + const CSymmetricMatrix& matrix, + double& result, + bool ignoreSingularSubspace); MATHS_EXPORT -maths_t::EFloatingPointErrorStatus -logDeterminant(std::size_t d, const CSymmetricMatrix& matrix, double& result, bool ignoreSingularSubspace); +maths_t::EFloatingPointErrorStatus logDeterminant(std::size_t d, + const CSymmetricMatrix& matrix, + double& result, + bool ignoreSingularSubspace); } //! Output for debug. @@ -383,14 +387,16 @@ std::ostream& operator<<(std::ostream& o, const CVector& v) { template CVectorNx1 sqrt(const CVectorNx1& v) { CVectorNx1 result(v); - linear_algebra_tools_detail::SSqrt::calculate(N, result); + linear_algebra_tools_detail::SSqrt::calculate( + N, result); return result; } //! Overload sqrt for CSymmetricMatrixNxN. template CSymmetricMatrixNxN sqrt(const CSymmetricMatrixNxN& m) { CSymmetricMatrixNxN result(m); - linear_algebra_tools_detail::SSqrt::calculate(N, result); + linear_algebra_tools_detail::SSqrt::calculate( + N, result); return result; } @@ -398,42 +404,49 @@ CSymmetricMatrixNxN sqrt(const CSymmetricMatrixNxN& m) { template CVectorNx1 min(const CVectorNx1& lhs, const CVectorNx1& rhs) { CVectorNx1 result(rhs); - linear_algebra_tools_detail::SMin::calculate(N, lhs, result); + linear_algebra_tools_detail::SMin::calculate( + N, lhs, result); return result; } //! Overload minimum for CVectorNx1. template CVectorNx1 min(const CVectorNx1& lhs, const T& rhs) { CVectorNx1 result(lhs); - linear_algebra_tools_detail::SMin::calculate(N, result, rhs); + linear_algebra_tools_detail::SMin::calculate( + N, result, rhs); return result; } //! Overload minimum for CVectorNx1. template CVectorNx1 min(const T& lhs, const CVectorNx1& rhs) { CVectorNx1 result(rhs); - linear_algebra_tools_detail::SMin::calculate(N, lhs, result); + linear_algebra_tools_detail::SMin::calculate( + N, lhs, result); return result; } //! Overload minimum for CSymmetricMatrixNxN. template -CSymmetricMatrixNxN min(const CSymmetricMatrixNxN& lhs, const CSymmetricMatrixNxN& rhs) { +CSymmetricMatrixNxN +min(const CSymmetricMatrixNxN& lhs, const CSymmetricMatrixNxN& rhs) { CSymmetricMatrixNxN result(rhs); - linear_algebra_tools_detail::SMin::calculate(N, lhs, result); + linear_algebra_tools_detail::SMin::calculate( + N, lhs, result); return result; } //! Overload minimum for CSymmetricMatrixNxN. template CSymmetricMatrixNxN min(const CSymmetricMatrixNxN& lhs, const T& rhs) { CSymmetricMatrixNxN result(lhs); - linear_algebra_tools_detail::SMin::calculate(N, result, rhs); + linear_algebra_tools_detail::SMin::calculate( + N, result, rhs); return result; } //! Overload minimum for CSymmetricMatrixNxN. template CSymmetricMatrixNxN min(const T& lhs, const CSymmetricMatrixNxN& rhs) { CSymmetricMatrixNxN result(rhs); - linear_algebra_tools_detail::SMin::calculate(N, lhs, result); + linear_algebra_tools_detail::SMin::calculate( + N, lhs, result); return result; } @@ -441,42 +454,49 @@ CSymmetricMatrixNxN min(const T& lhs, const CSymmetricMatrixNxN& rhs template CVectorNx1 max(const CVectorNx1& lhs, const CVectorNx1& rhs) { CVectorNx1 result(rhs); - linear_algebra_tools_detail::SMax::calculate(N, lhs, result); + linear_algebra_tools_detail::SMax::calculate( + N, lhs, result); return result; } //! Overload maximum for CVectorNx1. template CVectorNx1 max(const CVectorNx1& lhs, const T& rhs) { CVectorNx1 result(lhs); - linear_algebra_tools_detail::SMax::calculate(N, result, rhs); + linear_algebra_tools_detail::SMax::calculate( + N, result, rhs); return result; } //! Overload maximum for CVectorNx1. template CVectorNx1 max(const T& lhs, const CVectorNx1& rhs) { CVectorNx1 result(rhs); - linear_algebra_tools_detail::SMax::calculate(N, lhs, result); + linear_algebra_tools_detail::SMax::calculate( + N, lhs, result); return result; } //! Overload maximum for CSymmetricMatrixNxN. template -CSymmetricMatrixNxN max(const CSymmetricMatrixNxN& lhs, const CSymmetricMatrixNxN& rhs) { +CSymmetricMatrixNxN +max(const CSymmetricMatrixNxN& lhs, const CSymmetricMatrixNxN& rhs) { CSymmetricMatrixNxN result(rhs); - linear_algebra_tools_detail::SMax::calculate(N, lhs, result); + linear_algebra_tools_detail::SMax::calculate( + N, lhs, result); return result; } //! Overload maximum for CSymmetricMatrixNxN. template CSymmetricMatrixNxN max(const CSymmetricMatrixNxN& lhs, const T& rhs) { CSymmetricMatrixNxN result(lhs); - linear_algebra_tools_detail::SMax::calculate(N, result, rhs); + linear_algebra_tools_detail::SMax::calculate( + N, result, rhs); return result; } //! Overload maximum for CSymmetricMatrixNxN. template CSymmetricMatrixNxN max(const T& lhs, const CSymmetricMatrixNxN& rhs) { CSymmetricMatrixNxN result(rhs); - linear_algebra_tools_detail::SMax::calculate(N, lhs, result); + linear_algebra_tools_detail::SMax::calculate( + N, lhs, result); return result; } @@ -484,14 +504,16 @@ CSymmetricMatrixNxN max(const T& lhs, const CSymmetricMatrixNxN& rhs template CVectorNx1 fabs(const CVectorNx1& v) { CVectorNx1 result(v); - linear_algebra_tools_detail::SFabs::calculate(N, result); + linear_algebra_tools_detail::SFabs::calculate( + N, result); return result; } //! Overload ::fabs for CSymmetricMatrixNxN. template CSymmetricMatrixNxN fabs(const CSymmetricMatrixNxN& m) { CSymmetricMatrixNxN result(m); - linear_algebra_tools_detail::SFabs::calculate(N, result); + linear_algebra_tools_detail::SFabs::calculate( + N, result); return result; } @@ -499,14 +521,16 @@ CSymmetricMatrixNxN fabs(const CSymmetricMatrixNxN& m) { template CVector sqrt(const CVector& v) { CVector result(v); - linear_algebra_tools_detail::SSqrt::calculate(result.dimension(), result); + linear_algebra_tools_detail::SSqrt::calculate( + result.dimension(), result); return result; } //! Overload sqrt for CSymmetricMatrix. template CSymmetricMatrix sqrt(const CSymmetricMatrix& m) { CSymmetricMatrix result(m); - linear_algebra_tools_detail::SSqrt::calculate(result.rows(), result); + linear_algebra_tools_detail::SSqrt::calculate( + result.rows(), result); return result; } @@ -514,42 +538,48 @@ CSymmetricMatrix sqrt(const CSymmetricMatrix& m) { template CVector min(const CVector& lhs, const CVector& rhs) { CVector result(rhs); - linear_algebra_tools_detail::SMin::calculate(result.dimension(), lhs, result); + linear_algebra_tools_detail::SMin::calculate( + result.dimension(), lhs, result); return result; } //! Overload minimum for CVector. template CVector min(const CVector& lhs, const T& rhs) { CVector result(lhs); - linear_algebra_tools_detail::SMin::calculate(result.dimension(), result, rhs); + linear_algebra_tools_detail::SMin::calculate( + result.dimension(), result, rhs); return result; } //! Overload minimum for CVector. template CVector min(const T& lhs, const CVector& rhs) { CVector result(rhs); - linear_algebra_tools_detail::SMin::calculate(result.dimension(), lhs, result); + linear_algebra_tools_detail::SMin::calculate( + result.dimension(), lhs, result); return result; } //! Overload minimum for CSymmetricMatrix. template CSymmetricMatrix min(const CSymmetricMatrix& lhs, const CSymmetricMatrix& rhs) { CSymmetricMatrix result(rhs); - linear_algebra_tools_detail::SMin::calculate(result.rows(), lhs, result); + linear_algebra_tools_detail::SMin::calculate( + result.rows(), lhs, result); return result; } //! Overload minimum for CSymmetricMatrix. template CSymmetricMatrix min(const CSymmetricMatrix& lhs, const T& rhs) { CSymmetricMatrix result(lhs); - linear_algebra_tools_detail::SMin::calculate(result.rows(), result, rhs); + linear_algebra_tools_detail::SMin::calculate( + result.rows(), result, rhs); return result; } //! Overload minimum for CSymmetricMatrix. template CSymmetricMatrix min(const T& lhs, const CSymmetricMatrix& rhs) { CSymmetricMatrix result(rhs); - linear_algebra_tools_detail::SMin::calculate(result.rows(), lhs, result); + linear_algebra_tools_detail::SMin::calculate( + result.rows(), lhs, result); return result; } @@ -557,42 +587,48 @@ CSymmetricMatrix min(const T& lhs, const CSymmetricMatrix& rhs) { template CVector max(const CVector& lhs, const CVector& rhs) { CVector result(rhs); - linear_algebra_tools_detail::SMax::calculate(result.dimension(), lhs, result); + linear_algebra_tools_detail::SMax::calculate( + result.dimension(), lhs, result); return result; } //! Overload maximum for CVector. template CVector max(const CVector& lhs, const T& rhs) { CVector result(lhs); - linear_algebra_tools_detail::SMax::calculate(result.dimension(), result, rhs); + linear_algebra_tools_detail::SMax::calculate( + result.dimension(), result, rhs); return result; } //! Overload maximum for CVector. template CVector max(const T& lhs, const CVector& rhs) { CVector result(rhs); - linear_algebra_tools_detail::SMax::calculate(result.dimension(), lhs, result); + linear_algebra_tools_detail::SMax::calculate( + result.dimension(), lhs, result); return result; } //! Overload maximum for CSymmetricMatrix. template CSymmetricMatrix max(const CSymmetricMatrix& lhs, const CSymmetricMatrix& rhs) { CSymmetricMatrix result(rhs); - linear_algebra_tools_detail::SMax::calculate(result.rows(), lhs, result); + linear_algebra_tools_detail::SMax::calculate( + result.rows(), lhs, result); return result; } //! Overload maximum for CSymmetricMatrix. template CSymmetricMatrix max(const CSymmetricMatrix& lhs, const T& rhs) { CSymmetricMatrix result(lhs); - linear_algebra_tools_detail::SMax::calculate(result.rows(), result, rhs); + linear_algebra_tools_detail::SMax::calculate( + result.rows(), result, rhs); return result; } //! Overload maximum for CSymmetricMatrix. template CSymmetricMatrix max(const T& lhs, const CSymmetricMatrix& rhs) { CSymmetricMatrix result(rhs); - linear_algebra_tools_detail::SMax::calculate(result.rows(), lhs, result); + linear_algebra_tools_detail::SMax::calculate( + result.rows(), lhs, result); return result; } @@ -600,14 +636,16 @@ CSymmetricMatrix max(const T& lhs, const CSymmetricMatrix& rhs) { template CVector fabs(const CVector& v) { CVector result(v); - linear_algebra_tools_detail::SFabs::calculate(result.dimension(), result); + linear_algebra_tools_detail::SFabs::calculate( + result.dimension(), result); return result; } //! Overload ::fabs for CSymmetricMatrix. template CSymmetricMatrix fabs(const CSymmetricMatrix& m) { CSymmetricMatrix result(m); - linear_algebra_tools_detail::SFabs::calculate(result.dimension(), result); + linear_algebra_tools_detail::SFabs::calculate( + result.dimension(), result); return result; } @@ -660,11 +698,13 @@ void scaleCovariances(const CVector& scale, CSymmetricMatrix& m) { //! residual on a singular subspace of m. Otherwise the result is //! minus infinity in this case. template -maths_t::EFloatingPointErrorStatus inverseQuadraticForm(const CSymmetricMatrixNxN& covariance, - const CVectorNx1& residual, - double& result, - bool ignoreSingularSubspace = true) { - return linear_algebra_tools_detail::inverseQuadraticProduct(N, covariance, residual, result, ignoreSingularSubspace); +maths_t::EFloatingPointErrorStatus +inverseQuadraticForm(const CSymmetricMatrixNxN& covariance, + const CVectorNx1& residual, + double& result, + bool ignoreSingularSubspace = true) { + return linear_algebra_tools_detail::inverseQuadraticProduct( + N, covariance, residual, result, ignoreSingularSubspace); } //! Compute the log-likelihood for the residual \p x and covariance @@ -677,11 +717,13 @@ maths_t::EFloatingPointErrorStatus inverseQuadraticForm(const CSymmetricMatrixNx //! residual on a singular subspace of m. Otherwise the result is //! minus infinity in this case. template -maths_t::EFloatingPointErrorStatus gaussianLogLikelihood(const CSymmetricMatrixNxN& covariance, - const CVectorNx1& residual, - double& result, - bool ignoreSingularSubspace = true) { - return linear_algebra_tools_detail::gaussianLogLikelihood(N, covariance, residual, result, ignoreSingularSubspace); +maths_t::EFloatingPointErrorStatus +gaussianLogLikelihood(const CSymmetricMatrixNxN& covariance, + const CVectorNx1& residual, + double& result, + bool ignoreSingularSubspace = true) { + return linear_algebra_tools_detail::gaussianLogLikelihood( + N, covariance, residual, result, ignoreSingularSubspace); } //! Sample from a Gaussian with \p mean and \p covariance in such @@ -706,8 +748,9 @@ void sampleGaussian(std::size_t n, //! \param[in] ignoreSingularSubspace If true then we ignore any //! singular subspace of m. Otherwise, the result is minus infinity. template -maths_t::EFloatingPointErrorStatus -logDeterminant(const CSymmetricMatrixNxN& matrix, double& result, bool ignoreSingularSubspace = true) { +maths_t::EFloatingPointErrorStatus logDeterminant(const CSymmetricMatrixNxN& matrix, + double& result, + bool ignoreSingularSubspace = true) { return linear_algebra_tools_detail::logDeterminant(N, matrix, result, ignoreSingularSubspace); } @@ -720,11 +763,13 @@ logDeterminant(const CSymmetricMatrixNxN& matrix, double& result, bool ign //! residual on a singular subspace of m. Otherwise the result is //! minus infinity in this case. template -maths_t::EFloatingPointErrorStatus inverseQuadraticForm(const CSymmetricMatrix& covariance, - const CVector& residual, - double& result, - bool ignoreSingularSubspace = true) { - return linear_algebra_tools_detail::inverseQuadraticProduct(covariance.rows(), covariance, residual, result, ignoreSingularSubspace); +maths_t::EFloatingPointErrorStatus +inverseQuadraticForm(const CSymmetricMatrix& covariance, + const CVector& residual, + double& result, + bool ignoreSingularSubspace = true) { + return linear_algebra_tools_detail::inverseQuadraticProduct( + covariance.rows(), covariance, residual, result, ignoreSingularSubspace); } //! Compute the log-likelihood for the residual \p x and covariance @@ -737,11 +782,13 @@ maths_t::EFloatingPointErrorStatus inverseQuadraticForm(const CSymmetricMatrix -maths_t::EFloatingPointErrorStatus gaussianLogLikelihood(const CSymmetricMatrix& covariance, - const CVector& residual, - double& result, - bool ignoreSingularSubspace = true) { - return linear_algebra_tools_detail::gaussianLogLikelihood(covariance.rows(), covariance, residual, result, ignoreSingularSubspace); +maths_t::EFloatingPointErrorStatus +gaussianLogLikelihood(const CSymmetricMatrix& covariance, + const CVector& residual, + double& result, + bool ignoreSingularSubspace = true) { + return linear_algebra_tools_detail::gaussianLogLikelihood( + covariance.rows(), covariance, residual, result, ignoreSingularSubspace); } //! Sample from a Gaussian with \p mean and \p covariance in such @@ -753,7 +800,10 @@ maths_t::EFloatingPointErrorStatus gaussianLogLikelihood(const CSymmetricMatrix< //! \param[in] covariance The covariance matrix of the Gaussian. //! \param[out] result Filled in with the samples. template -void sampleGaussian(std::size_t n, const CVector& mean, const CSymmetricMatrix& covariance, std::vector>& result) { +void sampleGaussian(std::size_t n, + const CVector& mean, + const CSymmetricMatrix& covariance, + std::vector>& result) { return linear_algebra_tools_detail::sampleGaussian(n, mean, covariance, result); } @@ -763,14 +813,17 @@ void sampleGaussian(std::size_t n, const CVector& mean, const CSymmetricMatri //! \param[in] ignoreSingularSubspace If true then we ignore any //! singular subspace of m. Otherwise, the result is minus infinity. template -maths_t::EFloatingPointErrorStatus logDeterminant(const CSymmetricMatrix& matrix, double& result, bool ignoreSingularSubspace = true) { - return linear_algebra_tools_detail::logDeterminant(matrix.rows(), matrix, result, ignoreSingularSubspace); +maths_t::EFloatingPointErrorStatus logDeterminant(const CSymmetricMatrix& matrix, + double& result, + bool ignoreSingularSubspace = true) { + return linear_algebra_tools_detail::logDeterminant(matrix.rows(), matrix, result, + ignoreSingularSubspace); } //! Project the matrix on to \p subspace. template -inline Eigen::Matrix projectedMatrix(const std::vector& subspace, - const MATRIX& matrix) { +inline Eigen::Matrix +projectedMatrix(const std::vector& subspace, const MATRIX& matrix) { std::size_t d = subspace.size(); Eigen::Matrix result(d, d); for (std::size_t i = 0u; i < d; ++i) { @@ -783,7 +836,8 @@ inline Eigen::Matrix projectedMatrix(con //! Project the vector on to \p subspace. template -inline Eigen::Matrix projectedVector(const std::vector& subspace, const VECTOR& vector) { +inline Eigen::Matrix +projectedVector(const std::vector& subspace, const VECTOR& vector) { std::size_t d = subspace.size(); Eigen::Matrix result(d); for (std::size_t i = 0u; i < d; ++i) { diff --git a/include/maths/CLogNormalMeanPrecConjugate.h b/include/maths/CLogNormalMeanPrecConjugate.h index 89dda4ecf2..56e36b9f9d 100644 --- a/include/maths/CLogNormalMeanPrecConjugate.h +++ b/include/maths/CLogNormalMeanPrecConjugate.h @@ -98,10 +98,11 @@ class MATHS_EXPORT CLogNormalMeanPrecConjugate : public CPrior { //! \param[in] offsetMargin The margin between the smallest value and the support //! left end. //! \return A non-informative prior. - static CLogNormalMeanPrecConjugate nonInformativePrior(maths_t::EDataType dataType, - double offset = 0.0, - double decayRate = 0.0, - double offsetMargin = LOG_NORMAL_OFFSET_MARGIN); + static CLogNormalMeanPrecConjugate + nonInformativePrior(maths_t::EDataType dataType, + double offset = 0.0, + double decayRate = 0.0, + double offsetMargin = LOG_NORMAL_OFFSET_MARGIN); //@} //! \name Prior Contract @@ -141,7 +142,9 @@ class MATHS_EXPORT CLogNormalMeanPrecConjugate : public CPrior { //! \param[in] samples The samples from which to determine the offset. //! \param[in] weights The weights of each sample in \p samples. //! \return The penalty to apply in model selection. - virtual double adjustOffset(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights); + virtual double adjustOffset(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights); //! Get the current offset. virtual double offset() const; @@ -154,7 +157,9 @@ class MATHS_EXPORT CLogNormalMeanPrecConjugate : public CPrior { //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. - virtual void addSamples(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights); + virtual void addSamples(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights); //! Propagate the prior density function forwards by \p time. //! @@ -173,12 +178,14 @@ class MATHS_EXPORT CLogNormalMeanPrecConjugate : public CPrior { virtual double marginalLikelihoodMean() const; //! Get the mode of the marginal likelihood function. - virtual double marginalLikelihoodMode(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual double + marginalLikelihoodMode(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; //! Get the variance of the marginal likelihood. - virtual double marginalLikelihoodVariance(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual double + marginalLikelihoodVariance(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; //! Get the \p percentage symmetric confidence interval for the marginal //! likelihood function, i.e. the values \f$a\f$ and \f$b\f$ such that: @@ -193,9 +200,10 @@ class MATHS_EXPORT CLogNormalMeanPrecConjugate : public CPrior { //! \param[in] weightStyles Optional variance scale weight styles. //! \param[in] weights Optional variance scale weights. //! \note \p percentage should be in the range [0.0, 100.0). - virtual TDoubleDoublePr marginalLikelihoodConfidenceInterval(double percentage, - const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual TDoubleDoublePr marginalLikelihoodConfidenceInterval( + double percentage, + const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; //! Compute the log marginal likelihood function at \p samples integrating //! over the prior density function for the exponentiated normal mean @@ -209,10 +217,11 @@ class MATHS_EXPORT CLogNormalMeanPrecConjugate : public CPrior { //! \param[out] result Filled in with the joint likelihood of \p samples. //! \note The samples are assumed to be independent and identically //! distributed. - virtual maths_t::EFloatingPointErrorStatus jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, - double& result) const; + virtual maths_t::EFloatingPointErrorStatus + jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& result) const; //! Sample the marginal likelihood function. //! @@ -373,7 +382,8 @@ class MATHS_EXPORT CLogNormalMeanPrecConjugate : public CPrior { TDoubleDoublePr confidenceIntervalNormalPrecision(double percentage) const; //! Check if two priors are equal to the specified tolerance. - bool equalTolerance(const CLogNormalMeanPrecConjugate& rhs, const TEqualWithTolerance& equal) const; + bool equalTolerance(const CLogNormalMeanPrecConjugate& rhs, + const TEqualWithTolerance& equal) const; //@} private: diff --git a/include/maths/CMathsFuncs.h b/include/maths/CMathsFuncs.h index 87e113a0a5..6b5833fc76 100644 --- a/include/maths/CMathsFuncs.h +++ b/include/maths/CMathsFuncs.h @@ -107,16 +107,21 @@ class MATHS_EXPORT CMathsFuncs : private core::CNonInstantiatable { public: CFiniteIterator() : m_Base(), m_End() {} - CFiniteIterator(const ITR& base, const ITR& end) : m_Base(base), m_End(end) { + CFiniteIterator(const ITR& base, const ITR& end) + : m_Base(base), m_End(end) { if (m_Base != m_End && !isFinite(*m_Base)) { this->increment(); } } //! Equal. - bool operator==(const CFiniteIterator& rhs) const { return m_Base == rhs.m_Base; } + bool operator==(const CFiniteIterator& rhs) const { + return m_Base == rhs.m_Base; + } //! Different. - bool operator!=(const CFiniteIterator& rhs) const { return m_Base != rhs.m_Base; } + bool operator!=(const CFiniteIterator& rhs) const { + return m_Base != rhs.m_Base; + } //! Dereference. reference operator*() const { return *m_Base; } @@ -153,13 +158,15 @@ class MATHS_EXPORT CMathsFuncs : private core::CNonInstantiatable { //! Get an iterator over the finite values of a double container. template static CFiniteIterator beginFinite(T& container) { - return CFiniteIterator(container.begin(), container.end()); + return CFiniteIterator(container.begin(), + container.end()); } //! Get a const_iterator over the finite values of a double container. template static CFiniteIterator beginFinite(const T& container) { - return CFiniteIterator(container.begin(), container.end()); + return CFiniteIterator(container.begin(), + container.end()); } //! Get a finite values iterator at the end of a double container. @@ -171,7 +178,8 @@ class MATHS_EXPORT CMathsFuncs : private core::CNonInstantiatable { //! Get a finite values const_iterator at the end of a double container. template static CFiniteIterator endFinite(const T& container) { - return CFiniteIterator(container.end(), container.end()); + return CFiniteIterator(container.end(), + container.end()); } private: diff --git a/include/maths/CMixtureDistribution.h b/include/maths/CMixtureDistribution.h index 41c24989f8..5b0b13ead1 100644 --- a/include/maths/CMixtureDistribution.h +++ b/include/maths/CMixtureDistribution.h @@ -149,7 +149,8 @@ class CMixtureDistribution { CMixtureDistribution() {} //! \note The length of \p weights should match \p modes. - CMixtureDistribution(const TDoubleVec& weights, const TModeVec& modes) : m_Weights(weights), m_Modes(modes) { + CMixtureDistribution(const TDoubleVec& weights, const TModeVec& modes) + : m_Weights(weights), m_Modes(modes) { std::size_t w = m_Weights.size(); if (w != m_Modes.size()) { LOG_ERROR(<< "# weights = " << w << ", # modes = " << m_Modes.size()); @@ -165,7 +166,8 @@ class CMixtureDistribution { LOG_ERROR(<< "Expected non-zero weight sum"); } for (std::size_t i = 0u; i < w; ++i) { - m_Weights[i] = weightSum == 0.0 ? 1.0 / static_cast(w) : m_Weights[i] / weightSum; + m_Weights[i] = weightSum == 0.0 ? 1.0 / static_cast(w) + : m_Weights[i] / weightSum; } } @@ -183,9 +185,10 @@ class CMixtureDistribution { std::string print() const { std::string result; for (std::size_t i = 0u; i < m_Weights.size(); ++i) { - result += ' ' + core::CStringUtils::typeToStringPretty(m_Weights[i]) + '/' + - core::CStringUtils::typeToStringPretty(mean(m_Modes[i])) + '/' + - core::CStringUtils::typeToStringPretty(standard_deviation(m_Modes[i])); + result += + ' ' + core::CStringUtils::typeToStringPretty(m_Weights[i]) + '/' + + core::CStringUtils::typeToStringPretty(mean(m_Modes[i])) + '/' + + core::CStringUtils::typeToStringPretty(standard_deviation(m_Modes[i])); } result += (m_Weights.empty() ? "" : " "); return result; @@ -205,7 +208,8 @@ class CPdfAdpater { using result_type = double; public: - CPdfAdpater(const CMixtureDistribution& distribution) : m_Distribution(&distribution) {} + CPdfAdpater(const CMixtureDistribution& distribution) + : m_Distribution(&distribution) {} double operator()(double x) const { return pdf(*m_Distribution, x); } @@ -222,17 +226,21 @@ mixture_detail::TDoubleDoublePr support(const CMixtureDistribution& distribut const TModeVec& modes = distribution.modes(); if (modes.empty()) { - return mixture_detail::TDoubleDoublePr(boost::numeric::bounds::lowest(), boost::numeric::bounds::highest()); + return mixture_detail::TDoubleDoublePr(boost::numeric::bounds::lowest(), + boost::numeric::bounds::highest()); } - mixture_detail::TDoubleDoublePr result(boost::numeric::bounds::highest(), boost::numeric::bounds::lowest()); + mixture_detail::TDoubleDoublePr result(boost::numeric::bounds::highest(), + boost::numeric::bounds::lowest()); for (std::size_t i = 0u; i < modes.size(); ++i) { try { mixture_detail::TDoubleDoublePr modeSupport = support(modes[i]); result.first = std::min(result.first, modeSupport.first); result.second = std::max(result.second, modeSupport.second); - } catch (const std::exception& e) { LOG_ERROR(<< "Failed to compute support for mode: " << e.what()); } + } catch (const std::exception& e) { + LOG_ERROR(<< "Failed to compute support for mode: " << e.what()); + } } return result; @@ -310,14 +318,17 @@ double pdf(const CMixtureDistribution& distribution, double x) { if (x >= ms.first && x <= ms.second) { try { double fx = pdf(modes[i], x); - LOG_TRACE(<< "x = " << x << ", w(" << i << ") = " << weights[i] << ", f(x, " << i << ") " << fx); + LOG_TRACE(<< "x = " << x << ", w(" << i << ") = " << weights[i] + << ", f(x, " << i << ") " << fx); result += weights[i] * fx; } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to compute f(x) for mode at " << x << ": " << e.what()); + LOG_ERROR(<< "Failed to compute f(x) for mode at " << x << ": " + << e.what()); throw e; } } else { - LOG_TRACE(<< "x = " << x << ", support = (" << ms.first << "," << ms.second << ")"); + LOG_TRACE(<< "x = " << x << ", support = (" << ms.first << "," + << ms.second << ")"); } } @@ -352,14 +363,17 @@ double cdf(const CMixtureDistribution& distribution, double x) { } else if (x >= ms.first) { try { double fx = cdf(modes[i], x); - LOG_TRACE(<< "x = " << x << ", w(" << i << ") = " << weights[i] << ", f(x, " << i << ") " << fx); + LOG_TRACE(<< "x = " << x << ", w(" << i << ") = " << weights[i] + << ", f(x, " << i << ") " << fx); result += weights[i] * fx; } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to compute f(x) for mode at " << x << ": " << e.what()); + LOG_ERROR(<< "Failed to compute f(x) for mode at " << x << ": " + << e.what()); throw e; } } else { - LOG_TRACE(<< "x = " << x << ", support = (" << ms.first << "," << ms.second << ")"); + LOG_TRACE(<< "x = " << x << ", support = (" << ms.first << "," + << ms.second << ")"); } } @@ -394,14 +408,17 @@ double cdfComplement(const CMixtureDistribution& distribution, double x) { } else if (x < ms.second) { try { double fx = cdf(complement(modes[i], x)); - LOG_TRACE(<< "x = " << x << ", w(" << i << ") = " << weights[i] << ", f(x, " << i << ") " << fx); + LOG_TRACE(<< "x = " << x << ", w(" << i << ") = " << weights[i] + << ", f(x, " << i << ") " << fx); result += weights[i] * fx; } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to compute f(x) for mode at " << x << ": " << e.what()); + LOG_ERROR(<< "Failed to compute f(x) for mode at " << x << ": " + << e.what()); throw e; } } else { - LOG_TRACE(<< "x = " << x << ", support = (" << ms.first << "," << ms.second << ")"); + LOG_TRACE(<< "x = " << x << ", support = (" << ms.first << "," + << ms.second << ")"); } } @@ -417,7 +434,8 @@ class CCdfAdapter { using result_type = double; public: - CCdfAdapter(const CMixtureDistribution& distribution) : m_Distribution(&distribution) {} + CCdfAdapter(const CMixtureDistribution& distribution) + : m_Distribution(&distribution) {} double operator()(const double x) const { return cdf(*m_Distribution, x); } @@ -470,19 +488,25 @@ double quantile(const CMixtureDistribution& distribution, const double q) { << ", (f(a),f(b)) = [" << fa << "," << fb << "]"); std::size_t maxIterations = MAX_ITERATIONS; - if ((f0 < 0 && !CSolvers::rightBracket(a, b, fa, fb, fq, maxIterations, s.first, s.second)) || - (f0 >= 0 && !CSolvers::leftBracket(a, b, fa, fb, fq, maxIterations, s.first, s.second))) { - LOG_ERROR(<< "Unable to bracket quantile = " << q << ", (a,b) = (" << a << "," << b << ")" + if ((f0 < 0 && !CSolvers::rightBracket(a, b, fa, fb, fq, maxIterations, + s.first, s.second)) || + (f0 >= 0 && !CSolvers::leftBracket(a, b, fa, fb, fq, maxIterations, + s.first, s.second))) { + LOG_ERROR(<< "Unable to bracket quantile = " << q << ", (a,b) = (" + << a << "," << b << ")" << ", (f(a),f(b)) = (" << fa << "," << fb << ")"); result = std::fabs(fa) < std::fabs(fb) ? a : b; } else { LOG_TRACE(<< "(a,b) = (" << a << "," << b << ")" << ", (f(a),f(b)) = (" << fa << "," << fb << ")"); maxIterations = MAX_ITERATIONS - maxIterations; - CEqualWithTolerance equal(CToleranceTypes::E_AbsoluteTolerance, - std::min(std::numeric_limits::epsilon() * b, EPS * q / std::max(fa, fb))); + CEqualWithTolerance equal( + CToleranceTypes::E_AbsoluteTolerance, + std::min(std::numeric_limits::epsilon() * b, + EPS * q / std::max(fa, fb))); CSolvers::solve(a, b, fa, fb, fq, maxIterations, equal, result); - LOG_TRACE(<< "q = " << q << ", x = " << result << ", f(x) = " << fq(result) << ", iterations = " << maxIterations); + LOG_TRACE(<< "q = " << q << ", x = " << result << ", f(x) = " << fq(result) + << ", iterations = " << maxIterations); } } catch (const std::exception& e) { LOG_ERROR(<< "Failed to compute quantile " << q); diff --git a/include/maths/CModel.h b/include/maths/CModel.h index 2dd07216e4..a72e394d69 100644 --- a/include/maths/CModel.h +++ b/include/maths/CModel.h @@ -162,7 +162,8 @@ class MATHS_EXPORT CModelProbabilityParams { using TDouble2Vec4Vec = core::CSmallVector; using TDouble2Vec4Vec1Vec = core::CSmallVector; using TSize2Vec = core::CSmallVector; - using TProbabilityCalculation2Vec = core::CSmallVector; + using TProbabilityCalculation2Vec = + core::CSmallVector; public: CModelProbabilityParams(); @@ -318,28 +319,36 @@ class MATHS_EXPORT CModel { virtual void addBucketValue(const TTimeDouble2VecSizeTrVec& value) = 0; //! Update the model with new samples. - virtual EUpdateResult addSamples(const CModelAddSamplesParams& params, TTimeDouble2VecSizeTrVec samples) = 0; + virtual EUpdateResult addSamples(const CModelAddSamplesParams& params, + TTimeDouble2VecSizeTrVec samples) = 0; //! Advance time by \p gap. virtual void skipTime(core_t::TTime gap) = 0; //! Get the most likely value for the time series at \p time. - virtual TDouble2Vec mode(core_t::TTime time, const maths_t::TWeightStyleVec& weightStyles, const TDouble2Vec4Vec& weights) const = 0; + virtual TDouble2Vec mode(core_t::TTime time, + const maths_t::TWeightStyleVec& weightStyles, + const TDouble2Vec4Vec& weights) const = 0; //! Get the most likely value for each correlate time series at //! \p time, if there are any. - virtual TDouble2Vec1Vec - correlateModes(core_t::TTime time, const maths_t::TWeightStyleVec& weightStyles, const TDouble2Vec4Vec1Vec& weights) const = 0; + virtual TDouble2Vec1Vec correlateModes(core_t::TTime time, + const maths_t::TWeightStyleVec& weightStyles, + const TDouble2Vec4Vec1Vec& weights) const = 0; //! Get the local maxima of the residual distribution. - virtual TDouble2Vec1Vec residualModes(const maths_t::TWeightStyleVec& weightStyles, const TDouble2Vec4Vec& weights) const = 0; + virtual TDouble2Vec1Vec residualModes(const maths_t::TWeightStyleVec& weightStyles, + const TDouble2Vec4Vec& weights) const = 0; //! Remove any trend components from \p value. - virtual void detrend(const TTime2Vec1Vec& time, double confidenceInterval, TDouble2Vec1Vec& value) const = 0; + virtual void detrend(const TTime2Vec1Vec& time, + double confidenceInterval, + TDouble2Vec1Vec& value) const = 0; //! Get the best (least MSE) predicted value at \p time. - virtual TDouble2Vec - predict(core_t::TTime time, const TSizeDoublePr1Vec& correlated = TSizeDoublePr1Vec(), TDouble2Vec hint = TDouble2Vec()) const = 0; + virtual TDouble2Vec predict(core_t::TTime time, + const TSizeDoublePr1Vec& correlated = TSizeDoublePr1Vec(), + TDouble2Vec hint = TDouble2Vec()) const = 0; //! Get the prediction and \p confidenceInterval percentage //! confidence interval for the time series at \p time. @@ -373,7 +382,9 @@ class MATHS_EXPORT CModel { //! Get the Winsorisation weight to apply to \p value, //! if appropriate. - virtual TDouble2Vec winsorisationWeight(double derate, core_t::TTime time, const TDouble2Vec& value) const = 0; + virtual TDouble2Vec winsorisationWeight(double derate, + core_t::TTime time, + const TDouble2Vec& value) const = 0; //! Get the seasonal variance scale at \p time. virtual TDouble2Vec seasonalWeight(double confidence, core_t::TTime time) const = 0; @@ -416,7 +427,8 @@ class MATHS_EXPORT CModel { //! Get the error in the prior prediction for \p sample. template - static boost::optional predictionError(double propagationInterval, const PRIOR& prior, const VECTOR& sample); + static boost::optional + predictionError(double propagationInterval, const PRIOR& prior, const VECTOR& sample); //! Correct \p probability with \p probabilityEmptyBucket. static double correctForEmptyBucket(maths_t::EProbabilityCalculation calculation, @@ -467,26 +479,35 @@ class MATHS_EXPORT CModelStub : public CModel { virtual void addBucketValue(const TTimeDouble2VecSizeTrVec& value); //! No-op. - virtual EUpdateResult addSamples(const CModelAddSamplesParams& params, TTimeDouble2VecSizeTrVec samples); + virtual EUpdateResult addSamples(const CModelAddSamplesParams& params, + TTimeDouble2VecSizeTrVec samples); //! No-op. virtual void skipTime(core_t::TTime gap); //! Returns empty. - virtual TDouble2Vec mode(core_t::TTime time, const maths_t::TWeightStyleVec& weightStyles, const TDouble2Vec4Vec& weights) const; + virtual TDouble2Vec mode(core_t::TTime time, + const maths_t::TWeightStyleVec& weightStyles, + const TDouble2Vec4Vec& weights) const; //! Returns empty. - virtual TDouble2Vec1Vec - correlateModes(core_t::TTime time, const maths_t::TWeightStyleVec& weightStyles, const TDouble2Vec4Vec1Vec& weights) const; + virtual TDouble2Vec1Vec correlateModes(core_t::TTime time, + const maths_t::TWeightStyleVec& weightStyles, + const TDouble2Vec4Vec1Vec& weights) const; //! Returns empty. - virtual TDouble2Vec1Vec residualModes(const maths_t::TWeightStyleVec& weightStyles, const TDouble2Vec4Vec& weights) const; + virtual TDouble2Vec1Vec residualModes(const maths_t::TWeightStyleVec& weightStyles, + const TDouble2Vec4Vec& weights) const; //! No-op. - virtual void detrend(const TTime2Vec1Vec& time, double confidenceInterval, TDouble2Vec1Vec& value) const; + virtual void detrend(const TTime2Vec1Vec& time, + double confidenceInterval, + TDouble2Vec1Vec& value) const; //! Returns empty. - virtual TDouble2Vec predict(core_t::TTime time, const TSizeDoublePr1Vec& correlated, TDouble2Vec hint = TDouble2Vec()) const; + virtual TDouble2Vec predict(core_t::TTime time, + const TSizeDoublePr1Vec& correlated, + TDouble2Vec hint = TDouble2Vec()) const; //! Returns empty. virtual TDouble2Vec3Vec confidenceInterval(core_t::TTime time, @@ -512,7 +533,8 @@ class MATHS_EXPORT CModelStub : public CModel { TSize1Vec& mostAnomalousCorrelate) const; //! Returns empty. - virtual TDouble2Vec winsorisationWeight(double derate, core_t::TTime time, const TDouble2Vec& value) const; + virtual TDouble2Vec + winsorisationWeight(double derate, core_t::TTime time, const TDouble2Vec& value) const; //! Returns empty. virtual TDouble2Vec seasonalWeight(double confidence, core_t::TTime time) const; diff --git a/include/maths/CModelDetail.h b/include/maths/CModelDetail.h index 63c4fd295a..55267ee724 100644 --- a/include/maths/CModelDetail.h +++ b/include/maths/CModelDetail.h @@ -44,7 +44,9 @@ boost::optional CModel::predictionError(const TREND& trend, const VECTOR } template -boost::optional CModel::predictionError(double propagationInterval, const PRIOR& prior, const VECTOR& sample) { +boost::optional CModel::predictionError(double propagationInterval, + const PRIOR& prior, + const VECTOR& sample) { boost::optional result; if (prior->numberSamples() > 20.0 / propagationInterval) { std::size_t dimension{sample.size()}; diff --git a/include/maths/CModelStateSerialiser.h b/include/maths/CModelStateSerialiser.h index 8b48aafff0..c08ed0658d 100644 --- a/include/maths/CModelStateSerialiser.h +++ b/include/maths/CModelStateSerialiser.h @@ -39,7 +39,9 @@ class MATHS_EXPORT CModelStateSerialiser { public: //! Construct the appropriate CPrior sub-class from its state //! document representation. Sets \p result to NULL on failure. - bool operator()(const SModelRestoreParams& params, TModelPtr& result, core::CStateRestoreTraverser& traverser) const; + bool operator()(const SModelRestoreParams& params, + TModelPtr& result, + core::CStateRestoreTraverser& traverser) const; //! Persist state by passing information to the supplied inserter void operator()(const CModel& model, core::CStatePersistInserter& inserter) const; diff --git a/include/maths/CMultimodalPrior.h b/include/maths/CMultimodalPrior.h index 05d1c0d997..10a45ca73e 100644 --- a/include/maths/CMultimodalPrior.h +++ b/include/maths/CMultimodalPrior.h @@ -70,16 +70,22 @@ class MATHS_EXPORT CMultimodalPrior : public CPrior { //! \name Life-Cycle //@{ //! Create a new (empty) multimodal prior. - CMultimodalPrior(maths_t::EDataType dataType, const CClusterer1d& clusterer, const CPrior& seedPrior, double decayRate = 0.0); + CMultimodalPrior(maths_t::EDataType dataType, + const CClusterer1d& clusterer, + const CPrior& seedPrior, + double decayRate = 0.0); //! Create a mixture of normals. - CMultimodalPrior(maths_t::EDataType dataType, const TMeanVarAccumulatorVec& moments, double decayRate = 0.0); + CMultimodalPrior(maths_t::EDataType dataType, + const TMeanVarAccumulatorVec& moments, + double decayRate = 0.0); //! Create from a collection of weights and priors. CMultimodalPrior(maths_t::EDataType dataType, double decayRate, TPriorPtrVec& priors); //! Construct from part of a state document. - CMultimodalPrior(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser); + CMultimodalPrior(const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser); //! Implements value semantics for copy construction. CMultimodalPrior(const CMultimodalPrior& other); @@ -120,7 +126,9 @@ class MATHS_EXPORT CMultimodalPrior : public CPrior { //! Forward the offset to the mode priors. //! //! \return The penalty to apply in model selection. - virtual double adjustOffset(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights); + virtual double adjustOffset(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights); //! Get the current offset. virtual double offset() const; @@ -133,7 +141,9 @@ class MATHS_EXPORT CMultimodalPrior : public CPrior { //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. - virtual void addSamples(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights); + virtual void addSamples(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights); //! Propagate the prior density function forwards by \p time. //! @@ -154,16 +164,19 @@ class MATHS_EXPORT CMultimodalPrior : public CPrior { virtual double nearestMarginalLikelihoodMean(double value) const; //! Get the mode of the marginal likelihood function. - virtual double marginalLikelihoodMode(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual double + marginalLikelihoodMode(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; //! Get the local maxima of the marginal likelihood function. - virtual TDouble1Vec marginalLikelihoodModes(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual TDouble1Vec + marginalLikelihoodModes(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; //! Get the variance of the marginal likelihood. - virtual double marginalLikelihoodVariance(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual double + marginalLikelihoodVariance(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; //! Get the \p percentage symmetric confidence interval for the marginal //! likelihood function, i.e. the values \f$a\f$ and \f$b\f$ such that: @@ -178,9 +191,10 @@ class MATHS_EXPORT CMultimodalPrior : public CPrior { //! \param[in] weightStyles Optional variance scale weight styles. //! \param[in] weights Optional variance scale weights. //! \note \p percentage should be in the range [0.0, 100.0). - virtual TDoubleDoublePr marginalLikelihoodConfidenceInterval(double percentage, - const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual TDoubleDoublePr marginalLikelihoodConfidenceInterval( + double percentage, + const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; //! Compute the log marginal likelihood function at \p samples integrating //! over the prior density function for the mode parameters and summing @@ -194,10 +208,11 @@ class MATHS_EXPORT CMultimodalPrior : public CPrior { //! \param[out] result Filled in with the joint likelihood of \p samples. //! \note The samples are assumed to be independent and identically //! distributed. - virtual maths_t::EFloatingPointErrorStatus jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, - double& result) const; + virtual maths_t::EFloatingPointErrorStatus + jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& result) const; //! Sample the marginal likelihood function. //! @@ -314,7 +329,9 @@ class MATHS_EXPORT CMultimodalPrior : public CPrior { class MATHS_EXPORT CModeSplitCallback { public: CModeSplitCallback(CMultimodalPrior& prior); - void operator()(std::size_t sourceIndex, std::size_t leftSplitIndex, std::size_t rightSplitIndex) const; + void operator()(std::size_t sourceIndex, + std::size_t leftSplitIndex, + std::size_t rightSplitIndex) const; private: CMultimodalPrior* m_Prior; @@ -324,7 +341,9 @@ class MATHS_EXPORT CMultimodalPrior : public CPrior { class MATHS_EXPORT CModeMergeCallback { public: CModeMergeCallback(CMultimodalPrior& prior); - void operator()(std::size_t leftMergeIndex, std::size_t rightMergeIndex, std::size_t targetIndex) const; + void operator()(std::size_t leftMergeIndex, + std::size_t rightMergeIndex, + std::size_t targetIndex) const; private: CMultimodalPrior* m_Prior; @@ -335,7 +354,8 @@ class MATHS_EXPORT CMultimodalPrior : public CPrior { private: //! Read parameters from \p traverser. - bool acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser); + bool acceptRestoreTraverser(const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser); //! We should only use this prior when it has multiple modes. virtual bool participatesInModelSelection() const; diff --git a/include/maths/CMultimodalPriorMode.h b/include/maths/CMultimodalPriorMode.h index c893749661..ace923875d 100644 --- a/include/maths/CMultimodalPriorMode.h +++ b/include/maths/CMultimodalPriorMode.h @@ -34,7 +34,8 @@ struct SMultimodalPriorMode { static const std::string PRIOR_TAG; SMultimodalPriorMode() : s_Index(0), s_Prior() {} - SMultimodalPriorMode(std::size_t index, const PRIOR_PTR& prior) : s_Index(index), s_Prior(prior->clone()) {} + SMultimodalPriorMode(std::size_t index, const PRIOR_PTR& prior) + : s_Index(index), s_Prior(prior->clone()) {} //! Get the weight of this sample. double weight() const { return s_Prior->numberSamples(); } @@ -52,15 +53,19 @@ struct SMultimodalPriorMode { } //! Get the memory used by this component - std::size_t memoryUsage() const { return core::CMemory::dynamicSize(s_Prior); } + std::size_t memoryUsage() const { + return core::CMemory::dynamicSize(s_Prior); + } //! Create from part of a state document. - bool acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) { + bool acceptRestoreTraverser(const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser) { do { const std::string& name = traverser.name(); RESTORE_BUILT_IN(INDEX_TAG, s_Index) - RESTORE(PRIOR_TAG, - traverser.traverseSubLevel(boost::bind(CPriorStateSerialiser(), boost::cref(params), boost::ref(s_Prior), _1))) + RESTORE(PRIOR_TAG, traverser.traverseSubLevel(boost::bind( + CPriorStateSerialiser(), boost::cref(params), + boost::ref(s_Prior), _1))) } while (traverser.next()); return true; @@ -69,7 +74,8 @@ struct SMultimodalPriorMode { //! Persist state by passing information to the supplied inserter. void acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(INDEX_TAG, s_Index); - inserter.insertLevel(PRIOR_TAG, boost::bind(CPriorStateSerialiser(), boost::cref(*s_Prior), _1)); + inserter.insertLevel(PRIOR_TAG, boost::bind(CPriorStateSerialiser(), + boost::cref(*s_Prior), _1)); } //! Full debug dump of the mode weights. diff --git a/include/maths/CMultimodalPriorUtils.h b/include/maths/CMultimodalPriorUtils.h index a8a2f40e8f..63caee87b2 100644 --- a/include/maths/CMultimodalPriorUtils.h +++ b/include/maths/CMultimodalPriorUtils.h @@ -49,15 +49,18 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { //! Get the mode of the marginal likelihood function. template - static TDoubleDoublePr marginalLikelihoodSupport(const std::vector>& modes) { + static TDoubleDoublePr + marginalLikelihoodSupport(const std::vector>& modes) { if (modes.size() == 0) { - return std::make_pair(boost::numeric::bounds::lowest(), boost::numeric::bounds::highest()); + return std::make_pair(boost::numeric::bounds::lowest(), + boost::numeric::bounds::highest()); } if (modes.size() == 1) { return modes[0].s_Prior->marginalLikelihoodSupport(); } - TDoubleDoublePr result(boost::numeric::bounds::highest(), boost::numeric::bounds::lowest()); + TDoubleDoublePr result(boost::numeric::bounds::highest(), + boost::numeric::bounds::lowest()); // We define this is as the union of the mode supports. for (std::size_t i = 0u; i < modes.size(); ++i) { @@ -105,7 +108,8 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { return modes[0].s_Prior->marginalLikelihoodMode(weightStyles, weights); } - using TMaxAccumulator = CBasicStatistics::COrderStatisticsStack>; + using TMaxAccumulator = + CBasicStatistics::COrderStatisticsStack>; // We'll approximate this as the maximum likelihood mode (mode). double result = 0.0; @@ -115,7 +119,9 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { try { seasonalScale = std::sqrt(maths_t::seasonalVarianceScale(weightStyles, weights)); countVarianceScale = maths_t::countVarianceScale(weightStyles, weights); - } catch (const std::exception& e) { LOG_ERROR(<< "Failed to get variance scale " << e.what()); } + } catch (const std::exception& e) { + LOG_ERROR(<< "Failed to get variance scale " << e.what()); + } // Declared outside the loop to minimize number of times they // are created. @@ -128,7 +134,8 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { const T& prior = modes[i].s_Prior; mode[0] = prior->marginalLikelihoodMode(TWeights::COUNT_VARIANCE, weight[0]); double likelihood; - if (prior->jointLogMarginalLikelihood(TWeights::COUNT_VARIANCE, mode, weight, likelihood) & + if (prior->jointLogMarginalLikelihood(TWeights::COUNT_VARIANCE, + mode, weight, likelihood) & (maths_t::E_FpFailed | maths_t::E_FpOverflowed)) { continue; } @@ -147,9 +154,10 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { //! Get the variance of the marginal likelihood. template - static double marginalLikelihoodVariance(const std::vector>& modes, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble4Vec& weights) { + static double + marginalLikelihoodVariance(const std::vector>& modes, + const maths_t::TWeightStyleVec& weightStyles, + const TDouble4Vec& weights) { if (modes.size() == 0) { return boost::numeric::bounds::highest(); } @@ -164,8 +172,11 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { double varianceScale = 1.0; try { - varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) * maths_t::countVarianceScale(weightStyles, weights); - } catch (const std::exception& e) { LOG_ERROR(<< "Failed to get variance scale " << e.what()); } + varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) * + maths_t::countVarianceScale(weightStyles, weights); + } catch (const std::exception& e) { + LOG_ERROR(<< "Failed to get variance scale " << e.what()); + } double mean = marginalLikelihoodMean(modes); @@ -190,11 +201,12 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { //! where \f$m\f$ is the median of the distribution and \f$p\f$ is the //! the percentage of interest \p percentage. template - static TDoubleDoublePr marginalLikelihoodConfidenceInterval(const PRIOR& prior, - const std::vector& modes, - double percentage, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble4Vec& weights) { + static TDoubleDoublePr + marginalLikelihoodConfidenceInterval(const PRIOR& prior, + const std::vector& modes, + double percentage, + const maths_t::TWeightStyleVec& weightStyles, + const TDouble4Vec& weights) { TDoubleDoublePr support = marginalLikelihoodSupport(modes); if (isNonInformative(modes)) { @@ -202,7 +214,8 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { } if (modes.size() == 1) { - return modes[0].s_Prior->marginalLikelihoodConfidenceInterval(percentage, weightStyles, weights); + return modes[0].s_Prior->marginalLikelihoodConfidenceInterval( + percentage, weightStyles, weights); } percentage /= 100.0; @@ -236,17 +249,21 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { std::size_t maxIterations = MAX_ITERATIONS; if ((f10 < 0 && !CSolvers::rightBracket(a, b, fa, fb, f1, maxIterations)) || (f10 >= 0 && !CSolvers::leftBracket(a, b, fa, fb, f1, maxIterations))) { - LOG_ERROR(<< "Unable to bracket left percentile = " << p1 << ", (a,b) = (" << a << "," << b << ")" + LOG_ERROR(<< "Unable to bracket left percentile = " << p1 + << ", (a,b) = (" << a << "," << b << ")" << ", (f(a),f(b)) = (" << fa << "," << fb << ")"); result.first = support.first; } else { LOG_TRACE(<< "(a,b) = (" << a << "," << b << ")" << ", (f(a),f(b)) = (" << fa << "," << fb << ")"); maxIterations = MAX_ITERATIONS - maxIterations; - CEqualWithTolerance equal(CToleranceTypes::E_AbsoluteTolerance, - std::min(std::numeric_limits::epsilon() * b, EPS * p1 / std::max(fa, fb))); + CEqualWithTolerance equal( + CToleranceTypes::E_AbsoluteTolerance, + std::min(std::numeric_limits::epsilon() * b, + EPS * p1 / std::max(fa, fb))); CSolvers::solve(a, b, fa, fb, f1, maxIterations, equal, result.first); - LOG_TRACE(<< "p1 = " << p1 << ", x = " << result.first << ", f(x) = " << fl(result.first)); + LOG_TRACE(<< "p1 = " << p1 << ", x = " << result.first + << ", f(x) = " << fl(result.first)); } result.second = result.first; @@ -260,7 +277,8 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { // Fall: nothing to do. } else if ((f20 < 0 && !CSolvers::rightBracket(a, b, fa, fb, f2, maxIterations)) || (f20 >= 0 && !CSolvers::leftBracket(a, b, fa, fb, f2, maxIterations))) { - LOG_ERROR(<< "Unable to bracket right percentile = " << p2 << ", (a,b) = (" << a << "," << b << ")" + LOG_ERROR(<< "Unable to bracket right percentile = " << p2 + << ", (a,b) = (" << a << "," << b << ")" << ", (f(a),f(b)) = (" << fa << "," << fb << ")"); result.second = support.second; } else { @@ -268,13 +286,17 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { << ", (f(a),f(b)) = [" << fa << "," << fb << "]"); maxIterations = MAX_ITERATIONS - maxIterations; - CEqualWithTolerance equal(CToleranceTypes::E_AbsoluteTolerance, - std::min(std::numeric_limits::epsilon() * b, EPS * p2 / std::max(fa, fb))); + CEqualWithTolerance equal( + CToleranceTypes::E_AbsoluteTolerance, + std::min(std::numeric_limits::epsilon() * b, + EPS * p2 / std::max(fa, fb))); CSolvers::solve(a, b, fa, fb, f2, maxIterations, equal, result.second); - LOG_TRACE(<< "p2 = " << p2 << ", x = " << result.second << ", f(x) = " << fu(result.second)); + LOG_TRACE(<< "p2 = " << p2 << ", x = " << result.second + << ", f(x) = " << fu(result.second)); } } catch (const std::exception& e) { - LOG_ERROR(<< "Unable to find left percentile: " << e.what() << ", percentiles = [" << p1 << "," << p2 << "]" + LOG_ERROR(<< "Unable to find left percentile: " << e.what() + << ", percentiles = [" << p1 << "," << p2 << "]" << ", x0 = " << x0); return support; } @@ -285,11 +307,12 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { //! Calculate the log marginal likelihood function integrating over //! the prior density function. template - static maths_t::EFloatingPointErrorStatus jointLogMarginalLikelihood(const std::vector>& modes, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, - double& result) { + static maths_t::EFloatingPointErrorStatus + jointLogMarginalLikelihood(const std::vector>& modes, + const maths_t::TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& result) { // The likelihood can be computed from the conditional likelihood // that a sample is from each mode. In particular, the likelihood // of a sample x is: @@ -327,13 +350,17 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { TSizeDoublePr5Vec modeLogLikelihoods; modeLogLikelihoods.reserve(modes.size()); - double mean = maths_t::hasSeasonalVarianceScale(weightStyles, weights) ? marginalLikelihoodMean(modes) : 0.0; + double mean = maths_t::hasSeasonalVarianceScale(weightStyles, weights) + ? marginalLikelihoodMean(modes) + : 0.0; TDouble4Vec1Vec weight(1, TDouble4Vec(1, 1.0)); try { for (std::size_t i = 0u; i < samples.size(); ++i) { double n = maths_t::countForUpdate(weightStyles, weights[i]); - double seasonalScale = std::sqrt(maths_t::seasonalVarianceScale(weightStyles, weights[i])); - double logSeasonalScale = seasonalScale != 1.0 ? std::log(seasonalScale) : 0.0; + double seasonalScale = std::sqrt( + maths_t::seasonalVarianceScale(weightStyles, weights[i])); + double logSeasonalScale = seasonalScale != 1.0 ? std::log(seasonalScale) + : 0.0; sample[0] = mean + (samples[i] - mean) / seasonalScale; weight[0][0] = maths_t::countVarianceScale(weightStyles, weights[i]); @@ -346,7 +373,8 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { for (std::size_t j = 0u; j < modes.size(); ++j) { double modeLogLikelihood; maths_t::EFloatingPointErrorStatus status = - modes[j].s_Prior->jointLogMarginalLikelihood(TWeights::COUNT_VARIANCE, sample, weight, modeLogLikelihood); + modes[j].s_Prior->jointLogMarginalLikelihood( + TWeights::COUNT_VARIANCE, sample, weight, modeLogLikelihood); if (status & maths_t::E_FpFailed) { // Logging handled at a lower level. return status; @@ -371,7 +399,8 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { return maths_t::E_FpOverflowed; } - LOG_TRACE(<< "modeLogLikelihoods = " << core::CContainerPrinter::print(modeLogLikelihoods)); + LOG_TRACE(<< "modeLogLikelihoods = " + << core::CContainerPrinter::print(modeLogLikelihoods)); double sampleLikelihood = 0.0; double Z = 0.0; @@ -386,7 +415,8 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { sampleLikelihood /= Z; double sampleLogLikelihood = n * (std::log(sampleLikelihood) + maxLogLikelihood); - LOG_TRACE(<< "sample = " << core::CContainerPrinter::print(sample) << ", maxLogLikelihood = " << maxLogLikelihood + LOG_TRACE(<< "sample = " << core::CContainerPrinter::print(sample) + << ", maxLogLikelihood = " << maxLogLikelihood << ", sampleLogLikelihood = " << sampleLogLikelihood); result += sampleLogLikelihood - n * logSeasonalScale; @@ -398,7 +428,8 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { maths_t::EFloatingPointErrorStatus status = CMathsFuncs::fpStatus(result); if (status & maths_t::E_FpFailed) { - LOG_ERROR(<< "Failed to compute likelihood (" << SMultimodalPriorMode::debugWeights(modes) << ")"); + LOG_ERROR(<< "Failed to compute likelihood (" + << SMultimodalPriorMode::debugWeights(modes) << ")"); LOG_ERROR(<< "samples = " << core::CContainerPrinter::print(samples)); LOG_ERROR(<< "weights = " << core::CContainerPrinter::print(weights)); } @@ -408,8 +439,9 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { //! Sample the marginal likelihood function. template - static void - sampleMarginalLikelihood(const std::vector>& modes, std::size_t numberSamples, TDouble1Vec& samples) { + static void sampleMarginalLikelihood(const std::vector>& modes, + std::size_t numberSamples, + TDouble1Vec& samples) { samples.clear(); if (modes.size() == 1) { @@ -462,7 +494,8 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { const TDouble4Vec1Vec& weights, double& lowerBound, double& upperBound) { - return minusLogJointCdf(modes, CMinusLogJointCdf(), weightStyles, samples, weights, lowerBound, upperBound); + return minusLogJointCdf(modes, CMinusLogJointCdf(), weightStyles, + samples, weights, lowerBound, upperBound); } //! Compute minus the log of the one minus the joint c.d.f. of the @@ -470,13 +503,15 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { //! to cancellation errors at one, i.e. the smallest non-zero value //! this can return is the minimum double rather than epsilon. template - static bool minusLogJointCdfComplement(const std::vector>& modes, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, - double& lowerBound, - double& upperBound) { - return minusLogJointCdf(modes, CMinusLogJointCdfComplement(), weightStyles, samples, weights, lowerBound, upperBound); + static bool + minusLogJointCdfComplement(const std::vector>& modes, + const maths_t::TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound) { + return minusLogJointCdf(modes, CMinusLogJointCdfComplement(), weightStyles, + samples, weights, lowerBound, upperBound); } //! Calculate the joint probability of seeing a lower likelihood @@ -552,7 +587,8 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { switch (calculation) { case maths_t::E_OneSidedBelow: if (!minusLogJointCdf(modes, weightStyles, samples, weights, upperBound, lowerBound)) { - LOG_ERROR(<< "Failed computing probability of less likely samples: " << core::CContainerPrinter::print(samples)); + LOG_ERROR(<< "Failed computing probability of less likely samples: " + << core::CContainerPrinter::print(samples)); return false; } lowerBound = std::exp(-lowerBound); @@ -569,7 +605,8 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { TDoubleDoublePr support = marginalLikelihoodSupport(modes); support.first = (1.0 + (support.first > 0.0 ? EPS : -EPS)) * support.first; - support.second = (1.0 + (support.first > 0.0 ? EPS : -EPS)) * support.second; + support.second = (1.0 + (support.first > 0.0 ? EPS : -EPS)) * + support.second; double mean = marginalLikelihoodMean(modes); double a = boost::numeric::bounds::highest(); @@ -586,7 +623,9 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { LOG_TRACE(<< "a = " << a << ", b = " << b << ", Z = " << Z); std::size_t svi = static_cast( - std::find(weightStyles.begin(), weightStyles.end(), maths_t::E_SampleSeasonalVarianceScaleWeight) - weightStyles.begin()); + std::find(weightStyles.begin(), weightStyles.end(), + maths_t::E_SampleSeasonalVarianceScaleWeight) - + weightStyles.begin()); // Declared outside the loop to minimize the number of times // they are created. @@ -604,7 +643,8 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { } double fx; - maths_t::EFloatingPointErrorStatus status = jointLogMarginalLikelihood(modes, weightStyles, {x}, weight, fx); + maths_t::EFloatingPointErrorStatus status = + jointLogMarginalLikelihood(modes, weightStyles, {x}, weight, fx); if (status & maths_t::E_FpFailed) { LOG_ERROR(<< "Unable to compute likelihood for " << x); return false; @@ -617,11 +657,14 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { CPrior::CLogMarginalLikelihood logLikelihood(prior, weightStyles, weight); - CTools::CMixtureProbabilityOfLessLikelySample calculator(modes.size(), x, fx, a, b); + CTools::CMixtureProbabilityOfLessLikelySample calculator( + modes.size(), x, fx, a, b); for (const auto& mode : modes) { double w = mode.weight() / Z; - double centre = mode.s_Prior->marginalLikelihoodMode(weightStyles, weight[0]); - double spread = std::sqrt(mode.s_Prior->marginalLikelihoodVariance(weightStyles, weight[0])); + double centre = mode.s_Prior->marginalLikelihoodMode( + weightStyles, weight[0]); + double spread = std::sqrt(mode.s_Prior->marginalLikelihoodVariance( + weightStyles, weight[0])); calculator.addMode(w, centre, spread); tail_ = tail_ | (x < centre ? maths_t::E_LeftTail : maths_t::E_RightTail); } @@ -632,7 +675,8 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { double lb, ub; double l; - CEqualWithTolerance lequal(CToleranceTypes::E_AbsoluteTolerance, EPS * a); + CEqualWithTolerance lequal( + CToleranceTypes::E_AbsoluteTolerance, EPS * a); if (calculator.leftTail(logLikelihood, MAX_ITERATIONS, lequal, l)) { wt[0] = l; minusLogJointCdf(modes, weightStyles, wt, weight, lb, ub); @@ -645,7 +689,8 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { } double r; - CEqualWithTolerance requal(CToleranceTypes::E_AbsoluteTolerance, EPS * b); + CEqualWithTolerance requal( + CToleranceTypes::E_AbsoluteTolerance, EPS * b); if (calculator.rightTail(logLikelihood, MAX_ITERATIONS, requal, r)) { wt[0] = r; minusLogJointCdfComplement(modes, weightStyles, wt, weight, lb, ub); @@ -662,13 +707,15 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { p = calculator.calculate(logLikelihood, sampleLowerBound); } - LOG_TRACE(<< "sampleLowerBound = " << sampleLowerBound << ", sampleUpperBound = " << sampleUpperBound << " p = " << p); + LOG_TRACE(<< "sampleLowerBound = " << sampleLowerBound + << ", sampleUpperBound = " << sampleUpperBound << " p = " << p); lowerBoundCalculator.add(CTools::truncate(sampleLowerBound + p, 0.0, 1.0)); upperBoundCalculator.add(CTools::truncate(sampleUpperBound + p, 0.0, 1.0)); } - if (!lowerBoundCalculator.calculate(lowerBound) || !upperBoundCalculator.calculate(upperBound)) { + if (!lowerBoundCalculator.calculate(lowerBound) || + !upperBoundCalculator.calculate(upperBound)) { LOG_ERROR(<< "Couldn't compute probability of less likely samples:" << " " << lowerBoundCalculator << " " << upperBoundCalculator); return false; @@ -677,8 +724,10 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { } break; case maths_t::E_OneSidedAbove: - if (!minusLogJointCdfComplement(modes, weightStyles, samples, weights, upperBound, lowerBound)) { - LOG_ERROR(<< "Failed computing probability of less likely samples: " << core::CContainerPrinter::print(samples)); + if (!minusLogJointCdfComplement(modes, weightStyles, samples, + weights, upperBound, lowerBound)) { + LOG_ERROR(<< "Failed computing probability of less likely samples: " + << core::CContainerPrinter::print(samples)); return false; } lowerBound = std::exp(-lowerBound); @@ -693,12 +742,15 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { //! Check if this is a non-informative prior. template static bool isNonInformative(const std::vector>& modes) { - return modes.empty() || (modes.size() == 1 && modes[0].s_Prior->isNonInformative()); + return modes.empty() || + (modes.size() == 1 && modes[0].s_Prior->isNonInformative()); } //! Get a human readable description of the prior. template - static void print(const std::vector>& modes, const std::string& indent, std::string& result) { + static void print(const std::vector>& modes, + const std::string& indent, + std::string& result) { result += "\n" + indent + "multimodal"; if (isNonInformative(modes)) { result += " non-informative"; @@ -712,7 +764,8 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { result += ":"; for (std::size_t i = 0u; i < modes.size(); ++i) { double weight = modes[i].weight() / Z; - std::string indent_ = indent + " weight " + core::CStringUtils::typeToStringPretty(weight) + " "; + std::string indent_ = indent + " weight " + + core::CStringUtils::typeToStringPretty(weight) + " "; modes[i].s_Prior->print(indent_, result); } } @@ -728,7 +781,8 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { const TDouble4Vec1Vec& weights, double& lowerBound, double& upperBound) const { - return prior->minusLogJointCdf(weightStyles, samples, weights, lowerBound, upperBound); + return prior->minusLogJointCdf(weightStyles, samples, weights, + lowerBound, upperBound); } }; @@ -742,7 +796,8 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { const TDouble4Vec1Vec& weights, double& lowerBound, double& upperBound) const { - return prior->minusLogJointCdfComplement(weightStyles, samples, weights, lowerBound, upperBound); + return prior->minusLogJointCdfComplement(weightStyles, samples, weights, + lowerBound, upperBound); } }; @@ -756,14 +811,20 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { enum EStyle { E_Lower, E_Upper, E_Mean }; public: - CLogCdf(EStyle style, const PRIOR& prior, const maths_t::TWeightStyleVec& weightStyles, const TDouble4Vec& weights) - : m_Style(style), m_Prior(&prior), m_WeightStyles(&weightStyles), m_Weights(1, weights), m_X(1u, 0.0) {} + CLogCdf(EStyle style, + const PRIOR& prior, + const maths_t::TWeightStyleVec& weightStyles, + const TDouble4Vec& weights) + : m_Style(style), m_Prior(&prior), m_WeightStyles(&weightStyles), + m_Weights(1, weights), m_X(1u, 0.0) {} double operator()(double x) const { m_X[0] = x; double lowerBound, upperBound; - if (!m_Prior->minusLogJointCdf(*m_WeightStyles, m_X, m_Weights, lowerBound, upperBound)) { - throw std::runtime_error("Unable to compute c.d.f. at " + core::CStringUtils::typeToString(x)); + if (!m_Prior->minusLogJointCdf(*m_WeightStyles, m_X, m_Weights, + lowerBound, upperBound)) { + throw std::runtime_error("Unable to compute c.d.f. at " + + core::CStringUtils::typeToString(x)); } switch (m_Style) { case E_Lower: @@ -805,7 +866,8 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { } if (modes.size() == 1) { - return minusLogCdf(modes[0].s_Prior, weightStyles, samples, weights, lowerBound, upperBound); + return minusLogCdf(modes[0].s_Prior, weightStyles, samples, weights, + lowerBound, upperBound); } using TMinAccumulator = CBasicStatistics::COrderStatisticsStack; @@ -826,12 +888,16 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { modeUpperBounds.reserve(modes.size()); try { - double mean = maths_t::hasSeasonalVarianceScale(weightStyles, weights) ? marginalLikelihoodMean(modes) : 0.0; + double mean = maths_t::hasSeasonalVarianceScale(weightStyles, weights) + ? marginalLikelihoodMean(modes) + : 0.0; for (std::size_t i = 0; i < samples.size(); ++i) { double n = maths_t::count(weightStyles, weights[i]); - double seasonalScale = std::sqrt(maths_t::seasonalVarianceScale(weightStyles, weights[i])); - double countVarianceScale = maths_t::countVarianceScale(weightStyles, weights[i]); + double seasonalScale = std::sqrt( + maths_t::seasonalVarianceScale(weightStyles, weights[i])); + double countVarianceScale = + maths_t::countVarianceScale(weightStyles, weights[i]); if (isNonInformative(modes)) { lowerBound -= n * std::log(CTools::IMPROPER_CDF); @@ -839,7 +905,8 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { continue; } - sample[0] = seasonalScale != 1.0 ? mean + (samples[i] - mean) / seasonalScale : samples[i]; + sample[0] = seasonalScale != 1.0 ? mean + (samples[i] - mean) / seasonalScale + : samples[i]; weight[0][0] = countVarianceScale; // We re-normalize so that the maximum log c.d.f. is one @@ -852,8 +919,10 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { for (std::size_t j = 0u; j < modes.size(); ++j) { double modeLowerBound; double modeUpperBound; - if (!minusLogCdf(modes[j].s_Prior, TWeights::COUNT_VARIANCE, sample, weight, modeLowerBound, modeUpperBound)) { - LOG_ERROR(<< "Unable to compute c.d.f. for " << core::CContainerPrinter::print(samples)); + if (!minusLogCdf(modes[j].s_Prior, TWeights::COUNT_VARIANCE, sample, + weight, modeLowerBound, modeUpperBound)) { + LOG_ERROR(<< "Unable to compute c.d.f. for " + << core::CContainerPrinter::print(samples)); return false; } minLowerBound.add(modeLowerBound); @@ -866,20 +935,27 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { TMeanAccumulator sampleUpperBound; for (std::size_t j = 0u; j < modes.size(); ++j) { - LOG_TRACE(<< "Mode -log(c.d.f.) = [" << modeLowerBounds[j] << "," << modeUpperBounds[j] << "]"); + LOG_TRACE(<< "Mode -log(c.d.f.) = [" << modeLowerBounds[j] + << "," << modeUpperBounds[j] << "]"); double w = modes[j].weight(); // Divide through by the largest value to avoid underflow. // Remember we are working with minus logs so the largest // value corresponds to the smallest log. - sampleLowerBound.add(std::exp(-(modeLowerBounds[j] - minLowerBound[0])), w); - sampleUpperBound.add(std::exp(-(modeUpperBounds[j] - minUpperBound[0])), w); + sampleLowerBound.add( + std::exp(-(modeLowerBounds[j] - minLowerBound[0])), w); + sampleUpperBound.add( + std::exp(-(modeUpperBounds[j] - minUpperBound[0])), w); } - lowerBound += n * std::max(minLowerBound[0] - std::log(CBasicStatistics::mean(sampleLowerBound)), 0.0); - upperBound += n * std::max(minUpperBound[0] - std::log(CBasicStatistics::mean(sampleUpperBound)), 0.0); + lowerBound += n * std::max(minLowerBound[0] - + std::log(CBasicStatistics::mean(sampleLowerBound)), + 0.0); + upperBound += n * std::max(minUpperBound[0] - + std::log(CBasicStatistics::mean(sampleUpperBound)), + 0.0); - LOG_TRACE(<< "sample = " << core::CContainerPrinter::print(sample) << ", sample -log(c.d.f.) = [" << sampleLowerBound << "," - << sampleUpperBound << "]"); + LOG_TRACE(<< "sample = " << core::CContainerPrinter::print(sample) << ", sample -log(c.d.f.) = [" + << sampleLowerBound << "," << sampleUpperBound << "]"); } } catch (const std::exception& e) { LOG_ERROR(<< "Failed to calculate c.d.f.: " << e.what()); diff --git a/include/maths/CMultinomialConjugate.h b/include/maths/CMultinomialConjugate.h index f6ef1e228e..86c1451fb5 100644 --- a/include/maths/CMultinomialConjugate.h +++ b/include/maths/CMultinomialConjugate.h @@ -63,7 +63,8 @@ class MATHS_EXPORT CMultinomialConjugate : public CPrior { double decayRate = 0.0); //! Construct from part of an state document. - CMultinomialConjugate(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser); + CMultinomialConjugate(const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser); // Default copy constructor and assignment operator work. @@ -75,7 +76,8 @@ class MATHS_EXPORT CMultinomialConjugate : public CPrior { //! \param[in] maximumNumberOfCategories The number of categories in the likelihood function. //! \param[in] decayRate The rate at which to revert to the non-informative prior. //! \return A non-informative prior. - static CMultinomialConjugate nonInformativePrior(std::size_t maximumNumberOfCategories, double decayRate = 0.0); + static CMultinomialConjugate nonInformativePrior(std::size_t maximumNumberOfCategories, + double decayRate = 0.0); //@} //! \name Prior Contract @@ -96,7 +98,9 @@ class MATHS_EXPORT CMultinomialConjugate : public CPrior { virtual bool needsOffset() const; //! No-op. - virtual double adjustOffset(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights); + virtual double adjustOffset(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights); //! Returns zero. virtual double offset() const; @@ -109,7 +113,9 @@ class MATHS_EXPORT CMultinomialConjugate : public CPrior { //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. - virtual void addSamples(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights); + virtual void addSamples(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights); //! Propagate the prior density function forwards by \p time. //! @@ -128,12 +134,14 @@ class MATHS_EXPORT CMultinomialConjugate : public CPrior { virtual double marginalLikelihoodMean() const; //! Get the mode of the marginal likelihood function. - virtual double marginalLikelihoodMode(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual double + marginalLikelihoodMode(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; //! Get the variance of the marginal likelihood. - virtual double marginalLikelihoodVariance(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual double + marginalLikelihoodVariance(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; //! Get the \p percentage symmetric confidence interval for the marginal //! likelihood function, i.e. the values \f$a\f$ and \f$b\f$ such that: @@ -149,9 +157,10 @@ class MATHS_EXPORT CMultinomialConjugate : public CPrior { //! \param[in] weightStyles Ignored. //! \param[in] weights Ignored. //! \note \p percentage should be in the range [0.0, 100.0). - virtual TDoubleDoublePr marginalLikelihoodConfidenceInterval(double percentage, - const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual TDoubleDoublePr marginalLikelihoodConfidenceInterval( + double percentage, + const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; //! Compute the log marginal likelihood function at \p samples integrating //! over the prior density function for the category probability parameters. @@ -167,10 +176,11 @@ class MATHS_EXPORT CMultinomialConjugate : public CPrior { //! the model collection, so this is appropriate. //! \note The samples are assumed to be independent and identically //! distributed. - virtual maths_t::EFloatingPointErrorStatus jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, - double& result) const; + virtual maths_t::EFloatingPointErrorStatus + jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& result) const; //! Sample the marginal likelihood function. //! @@ -355,7 +365,8 @@ class MATHS_EXPORT CMultinomialConjugate : public CPrior { TDoubleDoublePrVec confidenceIntervalProbabilities(double percentage) const; //! Check if two priors are equal to the specified tolerance. - bool equalTolerance(const CMultinomialConjugate& rhs, const TEqualWithTolerance& equal) const; + bool equalTolerance(const CMultinomialConjugate& rhs, + const TEqualWithTolerance& equal) const; //@} private: diff --git a/include/maths/CMultivariateConstantPrior.h b/include/maths/CMultivariateConstantPrior.h index 2558731c5a..da9f024ead 100644 --- a/include/maths/CMultivariateConstantPrior.h +++ b/include/maths/CMultivariateConstantPrior.h @@ -43,7 +43,8 @@ class MATHS_EXPORT CMultivariateConstantPrior : public CMultivariatePrior { public: //! \name Life-Cycle //@{ - CMultivariateConstantPrior(std::size_t dimension, const TOptionalDouble10Vec& constant = TOptionalDouble10Vec()); + CMultivariateConstantPrior(std::size_t dimension, + const TOptionalDouble10Vec& constant = TOptionalDouble10Vec()); //! Construct by traversing a state document. CMultivariateConstantPrior(std::size_t dimension, core::CStateRestoreTraverser& traverser); @@ -63,19 +64,25 @@ class MATHS_EXPORT CMultivariateConstantPrior : public CMultivariatePrior { virtual void setToNonInformative(double offset = 0.0, double decayRate = 0.0); //! No-op. - virtual void adjustOffset(const TWeightStyleVec& weightStyle, const TDouble10Vec1Vec& samples, const TDouble10Vec4Vec1Vec& weights); + virtual void adjustOffset(const TWeightStyleVec& weightStyle, + const TDouble10Vec1Vec& samples, + const TDouble10Vec4Vec1Vec& weights); //! Set the constant if it hasn't been set. - virtual void addSamples(const TWeightStyleVec& weightStyle, const TDouble10Vec1Vec& samples, const TDouble10Vec4Vec1Vec& weights); + virtual void addSamples(const TWeightStyleVec& weightStyle, + const TDouble10Vec1Vec& samples, + const TDouble10Vec4Vec1Vec& weights); //! No-op. virtual void propagateForwardsByTime(double time); //! Get the corresponding constant univariate prior. - virtual TUnivariatePriorPtrDoublePr univariate(const TSize10Vec& marginalize, const TSizeDoublePr10Vec& condition) const; + virtual TUnivariatePriorPtrDoublePr + univariate(const TSize10Vec& marginalize, const TSizeDoublePr10Vec& condition) const; //! Compute the bivariate const bivariate prior. - virtual TPriorPtrDoublePr bivariate(const TSize10Vec& marginalize, const TSizeDoublePr10Vec& condition) const; + virtual TPriorPtrDoublePr bivariate(const TSize10Vec& marginalize, + const TSizeDoublePr10Vec& condition) const; //! Get the support for the marginal likelihood function. virtual TDouble10VecDouble10VecPr marginalLikelihoodSupport() const; @@ -84,7 +91,8 @@ class MATHS_EXPORT CMultivariateConstantPrior : public CMultivariatePrior { virtual TDouble10Vec marginalLikelihoodMean() const; //! Returns constant or zero if unset (by equidistribution). - virtual TDouble10Vec marginalLikelihoodMode(const TWeightStyleVec& weightStyles, const TDouble10Vec4Vec& weights) const; + virtual TDouble10Vec marginalLikelihoodMode(const TWeightStyleVec& weightStyles, + const TDouble10Vec4Vec& weights) const; //! Get the covariance matrix of the marginal likelihood. virtual TDouble10Vec10Vec marginalLikelihoodCovariance() const; @@ -94,13 +102,15 @@ class MATHS_EXPORT CMultivariateConstantPrior : public CMultivariatePrior { //! Returns a large value if all samples are equal to the constant //! and zero otherwise. - virtual maths_t::EFloatingPointErrorStatus jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights, - double& result) const; + virtual maths_t::EFloatingPointErrorStatus + jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble10Vec1Vec& samples, + const TDouble10Vec4Vec1Vec& weights, + double& result) const; //! Get \p numberSamples times the constant. - virtual void sampleMarginalLikelihood(std::size_t numberSamples, TDouble10Vec1Vec& samples) const; + virtual void sampleMarginalLikelihood(std::size_t numberSamples, + TDouble10Vec1Vec& samples) const; //! Check if this is a non-informative prior. bool isNonInformative() const; diff --git a/include/maths/CMultivariateMultimodalPrior.h b/include/maths/CMultivariateMultimodalPrior.h index 7432c446fe..5aee60c468 100644 --- a/include/maths/CMultivariateMultimodalPrior.h +++ b/include/maths/CMultivariateMultimodalPrior.h @@ -58,16 +58,19 @@ using TModeVec = std::vector; //! Implementation of a sample joint log marginal likelihood calculation. MATHS_EXPORT -maths_t::EFloatingPointErrorStatus jointLogMarginalLikelihood(const TModeVec& modes, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble10Vec1Vec& sample, - const TDouble10Vec4Vec1Vec& weights, - TSizeDoublePr3Vec& modeLogLikelihoods, - double& result); +maths_t::EFloatingPointErrorStatus +jointLogMarginalLikelihood(const TModeVec& modes, + const maths_t::TWeightStyleVec& weightStyles, + const TDouble10Vec1Vec& sample, + const TDouble10Vec4Vec1Vec& weights, + TSizeDoublePr3Vec& modeLogLikelihoods, + double& result); //! Implementation of marginal likelihood sample. MATHS_EXPORT -void sampleMarginalLikelihood(const TModeVec& modes, std::size_t numberSamples, TDouble10Vec1Vec& samples); +void sampleMarginalLikelihood(const TModeVec& modes, + std::size_t numberSamples, + TDouble10Vec1Vec& samples); //! Implementation of mode printing. MATHS_EXPORT @@ -152,7 +155,8 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { const TClusterer& clusterer, const CMultivariatePrior& seedPrior, double decayRate = 0.0) - : CMultivariatePrior(dataType, decayRate), m_Clusterer(clusterer.clone()), m_SeedPrior(seedPrior.clone()) { + : CMultivariatePrior(dataType, decayRate), + m_Clusterer(clusterer.clone()), m_SeedPrior(seedPrior.clone()) { // Register the split and merge callbacks. m_Clusterer->splitFunc(CModeSplitCallback(*this)); m_Clusterer->mergeFunc(CModeMergeCallback(*this)); @@ -162,7 +166,8 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { //! //! \note The priors are shallow copied. //! \note This constructor doesn't support subsequent update of the prior. - CMultivariateMultimodalPrior(maths_t::EDataType dataType, TPriorPtrVec& priors) : CMultivariatePrior(dataType, 0.0) { + CMultivariateMultimodalPrior(maths_t::EDataType dataType, TPriorPtrVec& priors) + : CMultivariatePrior(dataType, 0.0) { m_Modes.reserve(priors.size()); for (std::size_t i = 0u; i < priors.size(); ++i) { m_Modes.emplace_back(i, priors[i]); @@ -170,9 +175,11 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { } //! Construct from part of a state document. - CMultivariateMultimodalPrior(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) + CMultivariateMultimodalPrior(const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser) : CMultivariatePrior(params.s_DataType, params.s_DecayRate) { - traverser.traverseSubLevel(boost::bind(&CMultivariateMultimodalPrior::acceptRestoreTraverser, this, boost::cref(params), _1)); + traverser.traverseSubLevel(boost::bind(&CMultivariateMultimodalPrior::acceptRestoreTraverser, + this, boost::cref(params), _1)); } //! Implements value semantics for copy construction. @@ -230,7 +237,9 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { //! Create a copy of the prior. //! //! \warning Caller owns returned object. - virtual CMultivariatePrior* clone() const { return new CMultivariateMultimodalPrior(*this); } + virtual CMultivariatePrior* clone() const { + return new CMultivariateMultimodalPrior(*this); + } //! Get the dimension of the prior. virtual std::size_t dimension() const { return N; } @@ -270,7 +279,9 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { //! for more details. //! \param[in] samples The samples from which to determine the offset. //! \param[in] weights The weights of each sample in \p samples. - virtual void adjustOffset(const TWeightStyleVec& weightStyles, const TDouble10Vec1Vec& samples, const TDouble10Vec4Vec1Vec& weights) { + virtual void adjustOffset(const TWeightStyleVec& weightStyles, + const TDouble10Vec1Vec& samples, + const TDouble10Vec4Vec1Vec& weights) { // This has to adjust offsets for its modes because it must be // possible to call jointLogMarginalLikelihood before the samples // have been added to the prior in order for model selection to @@ -288,7 +299,9 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { //! for more details. //! \param[in] samples A collection of samples of the process. //! \param[in] weights The weights of each sample in \p samples. - virtual void addSamples(const TWeightStyleVec& weightStyles_, const TDouble10Vec1Vec& samples, const TDouble10Vec4Vec1Vec& weights) { + virtual void addSamples(const TWeightStyleVec& weightStyles_, + const TDouble10Vec1Vec& samples, + const TDouble10Vec4Vec1Vec& weights) { if (samples.empty()) { return; } @@ -334,7 +347,8 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { } if (hasSeasonalScale) { - TPoint seasonalScale = sqrt(TPoint(maths_t::seasonalVarianceScale(N, weightStyles_, weights[i]))); + TPoint seasonalScale = sqrt(TPoint(maths_t::seasonalVarianceScale( + N, weightStyles_, weights[i]))); x = mean + (x - mean) / seasonalScale; } @@ -349,12 +363,15 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { clusters.clear(); m_Clusterer->add(x, clusters, smallestCountWeight); - double Z = std::accumulate( - m_Modes.begin(), m_Modes.end(), smallestCountWeight, [](double sum, const TMode& mode) { return sum + mode.weight(); }); + double Z = std::accumulate(m_Modes.begin(), m_Modes.end(), smallestCountWeight, + [](double sum, const TMode& mode) { + return sum + mode.weight(); + }); double n = 0.0; for (const auto& cluster : clusters) { - auto k = std::find_if(m_Modes.begin(), m_Modes.end(), CSetTools::CIndexInSet(cluster.first)); + auto k = std::find_if(m_Modes.begin(), m_Modes.end(), + CSetTools::CIndexInSet(cluster.first)); if (k == m_Modes.end()) { LOG_TRACE(<< "Creating mode with index " << cluster.first); m_Modes.emplace_back(cluster.first, m_SeedPrior); @@ -373,7 +390,9 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { } this->addSamples(n); } - } catch (const std::exception& e) { LOG_ERROR(<< "Failed to update likelihood: " << e.what()); } + } catch (const std::exception& e) { + LOG_ERROR(<< "Failed to update likelihood: " << e.what()); + } } //! Update the prior for the specified elapsed time. @@ -403,7 +422,8 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { mode.s_Prior->propagateForwardsByTime(time); } - this->numberSamples(this->numberSamples() * std::exp(-this->scaledDecayRate() * time)); + this->numberSamples(this->numberSamples() * + std::exp(-this->scaledDecayRate() * time)); LOG_TRACE(<< "numberSamples = " << this->numberSamples()); } @@ -418,7 +438,8 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { //! \note The caller must specify dimension - 1 variables between //! \p marginalize and \p condition so the resulting distribution //! is univariate. - virtual TUnivariatePriorPtrDoublePr univariate(const TSize10Vec& marginalize, const TSizeDoublePr10Vec& condition) const { + virtual TUnivariatePriorPtrDoublePr + univariate(const TSize10Vec& marginalize, const TSizeDoublePr10Vec& condition) const { std::size_t n = m_Modes.size(); CMultimodalPrior::TPriorPtrVec modes; @@ -449,7 +470,8 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { modes[i]->numberSamples(weights[i] / Z * modes[i]->numberSamples()); } - return {TUnivariatePriorPtr(new CMultimodalPrior(this->dataType(), this->decayRate(), modes)), + return {TUnivariatePriorPtr(new CMultimodalPrior(this->dataType(), + this->decayRate(), modes)), Z > 0.0 ? maxWeight[0] + std::log(Z) : 0.0}; } @@ -465,7 +487,8 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { //! \note The caller must specify dimension - 2 variables between //! \p marginalize and \p condition so the resulting distribution //! is univariate. - virtual TPriorPtrDoublePr bivariate(const TSize10Vec& marginalize, const TSizeDoublePr10Vec& condition) const { + virtual TPriorPtrDoublePr bivariate(const TSize10Vec& marginalize, + const TSizeDoublePr10Vec& condition) const { if (N == 2) { return TPriorPtrDoublePr(TPriorPtr(this->clone()), 0.0); } @@ -500,13 +523,15 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { modes[i]->numberSamples(weights[i] / Z * modes[i]->numberSamples()); } - return {TPriorPtr(new CMultivariateMultimodalPrior<2>(this->dataType(), modes)), Z > 0.0 ? maxWeight[0] + std::log(Z) : 0.0}; + return {TPriorPtr(new CMultivariateMultimodalPrior<2>(this->dataType(), modes)), + Z > 0.0 ? maxWeight[0] + std::log(Z) : 0.0}; } //! Get the support for the marginal likelihood function. virtual TDouble10VecDouble10VecPr marginalLikelihoodSupport() const { if (m_Modes.size() == 0) { - return {TPoint::smallest().template toVector(), TPoint::largest().template toVector()}; + return {TPoint::smallest().template toVector(), + TPoint::largest().template toVector()}; } if (m_Modes.size() == 1) { return m_Modes[0].s_Prior->marginalLikelihoodSupport(); @@ -522,7 +547,8 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { upper = max(upper, TPoint(s.second)); } - return {lower.template toVector(), upper.template toVector()}; + return {lower.template toVector(), + upper.template toVector()}; } //! Get the mean of the marginal likelihood function. @@ -564,7 +590,8 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { } //! Get the mode of the marginal likelihood function. - virtual TDouble10Vec marginalLikelihoodMode(const TWeightStyleVec& weightStyles, const TDouble10Vec4Vec& weight) const { + virtual TDouble10Vec marginalLikelihoodMode(const TWeightStyleVec& weightStyles, + const TDouble10Vec4Vec& weight) const { if (m_Modes.size() == 0) { return TDouble10Vec(N, 0.0); } @@ -572,7 +599,8 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { return m_Modes[0].s_Prior->marginalLikelihoodMode(weightStyles, weight); } - using TMaxAccumulator = CBasicStatistics::COrderStatisticsStack>; + using TMaxAccumulator = + CBasicStatistics::COrderStatisticsStack>; // We'll approximate this as the mode with the maximum likelihood. TPoint result(0.0); @@ -580,9 +608,12 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { TPoint seasonalScale(1.0); TDouble10Vec4Vec1Vec weight_(1, TDouble10Vec4Vec(1)); try { - seasonalScale = sqrt(TPoint(maths_t::seasonalVarianceScale(N, weightStyles, weight))); + seasonalScale = + sqrt(TPoint(maths_t::seasonalVarianceScale(N, weightStyles, weight))); weight_[0][0] = maths_t::countVarianceScale(N, weightStyles, weight); - } catch (const std::exception& e) { LOG_ERROR(<< "Failed to get variance scale " << e.what()); } + } catch (const std::exception& e) { + LOG_ERROR(<< "Failed to get variance scale " << e.what()); + } // Declared outside the loop to minimize number of times it is created. TDouble10Vec1Vec mode(1); @@ -593,7 +624,9 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { const TPriorPtr& prior = mode_.s_Prior; mode[0] = prior->marginalLikelihoodMode(TWeights::COUNT_VARIANCE, weight_[0]); double likelihood; - if (prior->jointLogMarginalLikelihood(TWeights::COUNT_VARIANCE, mode, weight_, likelihood) & maths_t::E_FpAllErrors) { + if (prior->jointLogMarginalLikelihood(TWeights::COUNT_VARIANCE, + mode, weight_, likelihood) & + maths_t::E_FpAllErrors) { continue; } if (modeLikelihood.add(std::log(w) + likelihood)) { @@ -607,7 +640,8 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { } //! Get the local maxima of the marginal likelihood functions. - TDouble10Vec1Vec marginalLikelihoodModes(const TWeightStyleVec& weightStyles, const TDouble10Vec4Vec& weights) const { + TDouble10Vec1Vec marginalLikelihoodModes(const TWeightStyleVec& weightStyles, + const TDouble10Vec4Vec& weights) const { TDouble10Vec1Vec result; result.reserve(m_Modes.size()); for (const auto& mode : m_Modes) { @@ -647,10 +681,11 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { //! \param[in] samples A collection of samples of the process. //! \param[in] weights The weights of each sample in \p samples. //! \param[out] result Filled in with the joint likelihood of \p samples. - virtual maths_t::EFloatingPointErrorStatus jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights, - double& result) const { + virtual maths_t::EFloatingPointErrorStatus + jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble10Vec1Vec& samples, + const TDouble10Vec4Vec1Vec& weights, + double& result) const { result = 0.0; if (samples.empty()) { @@ -677,8 +712,8 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { if (m_Modes.size() == 1) { // Apply a small penalty to kill off this model if the data are // single mode. - maths_t::EFloatingPointErrorStatus status = - m_Modes[0].s_Prior->jointLogMarginalLikelihood(weightStyles, samples, weights, result); + maths_t::EFloatingPointErrorStatus status = m_Modes[0].s_Prior->jointLogMarginalLikelihood( + weightStyles, samples, weights, result); result -= 10.0 * this->decayRate(); return status; } @@ -698,8 +733,10 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { TDouble10Vec4Vec1Vec weights_(1, TDouble10Vec4Vec(1, TDouble10Vec(N, 1.0))); try { for (std::size_t i = 0u; i < samples.size(); ++i) { - double n = this->smallest(maths_t::countForUpdate(N, weightStyles, weights[i])); - TPoint seasonalScale = sqrt(TPoint(maths_t::seasonalVarianceScale(N, weightStyles, weights[i]))); + double n = this->smallest( + maths_t::countForUpdate(N, weightStyles, weights[i])); + TPoint seasonalScale = sqrt(TPoint( + maths_t::seasonalVarianceScale(N, weightStyles, weights[i]))); double logSeasonalScale = 0.0; for (std::size_t j = 0u; j < seasonalScale.dimension(); ++j) { logSeasonalScale += std::log(seasonalScale(j)); @@ -714,7 +751,8 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { double sampleLogLikelihood; maths_t::EFloatingPointErrorStatus status = detail::jointLogMarginalLikelihood( - m_Modes, TWeights::COUNT_VARIANCE, sample, weights_, modeLogLikelihoods, sampleLogLikelihood); + m_Modes, TWeights::COUNT_VARIANCE, sample, weights_, + modeLogLikelihoods, sampleLogLikelihood); if (status & maths_t::E_FpOverflowed) { result = boost::numeric::bounds::lowest(); return status; @@ -760,7 +798,8 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { //! \param[in] numberSamples The number of samples required. //! \param[out] samples Filled in with samples from the prior. //! \note \p numberSamples is truncated to the number of samples received. - virtual void sampleMarginalLikelihood(std::size_t numberSamples, TDouble10Vec1Vec& samples) const { + virtual void sampleMarginalLikelihood(std::size_t numberSamples, + TDouble10Vec1Vec& samples) const { namespace detail = multivariate_multimodal_prior_detail; samples.clear(); @@ -773,7 +812,10 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { } //! Check if this is a non-informative prior. - virtual bool isNonInformative() const { return m_Modes.empty() || (m_Modes.size() == 1 && m_Modes[0].s_Prior->isNonInformative()); } + virtual bool isNonInformative() const { + return m_Modes.empty() || + (m_Modes.size() == 1 && m_Modes[0].s_Prior->isNonInformative()); + } //! Get a human readable description of the prior. //! @@ -818,17 +860,25 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { virtual std::size_t staticSize() const { return sizeof(*this); } //! Get the tag name for this prior. - virtual std::string persistenceTag() const { return MULTIMODAL_TAG + core::CStringUtils::typeToString(N); } + virtual std::string persistenceTag() const { + return MULTIMODAL_TAG + core::CStringUtils::typeToString(N); + } //! Persist state by passing information to the supplied inserter virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const { - inserter.insertLevel(CLUSTERER_TAG, boost::bind(CClustererStateSerialiser(), boost::cref(*m_Clusterer), _1)); - inserter.insertLevel(SEED_PRIOR_TAG, boost::bind(CPriorStateSerialiser(), boost::cref(*m_SeedPrior), _1)); + inserter.insertLevel(CLUSTERER_TAG, + boost::bind(CClustererStateSerialiser(), + boost::cref(*m_Clusterer), _1)); + inserter.insertLevel(SEED_PRIOR_TAG, + boost::bind(CPriorStateSerialiser(), + boost::cref(*m_SeedPrior), _1)); for (std::size_t i = 0u; i < m_Modes.size(); ++i) { - inserter.insertLevel(MODE_TAG, boost::bind(&TMode::acceptPersistInserter, &m_Modes[i], _1)); + inserter.insertLevel(MODE_TAG, boost::bind(&TMode::acceptPersistInserter, + &m_Modes[i], _1)); } inserter.insertValue(DECAY_RATE_TAG, this->decayRate(), core::CIEEE754::E_SinglePrecision); - inserter.insertValue(NUMBER_SAMPLES_TAG, this->numberSamples(), core::CIEEE754::E_SinglePrecision); + inserter.insertValue(NUMBER_SAMPLES_TAG, this->numberSamples(), + core::CIEEE754::E_SinglePrecision); } //@} @@ -865,15 +915,19 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { static const std::size_t MODE_SPLIT_NUMBER_SAMPLES; public: - CModeSplitCallback(CMultivariateMultimodalPrior& prior) : m_Prior(&prior) {} + CModeSplitCallback(CMultivariateMultimodalPrior& prior) + : m_Prior(&prior) {} - void operator()(std::size_t sourceIndex, std::size_t leftSplitIndex, std::size_t rightSplitIndex) const { + void operator()(std::size_t sourceIndex, + std::size_t leftSplitIndex, + std::size_t rightSplitIndex) const { LOG_TRACE(<< "Splitting mode with index " << sourceIndex); TModeVec& modes = m_Prior->m_Modes; // Remove the split mode. - auto mode = std::find_if(modes.begin(), modes.end(), CSetTools::CIndexInSet(sourceIndex)); + auto mode = std::find_if(modes.begin(), modes.end(), + CSetTools::CIndexInSet(sourceIndex)); double numberSamples = mode != modes.end() ? mode->weight() : 0.0; modes.erase(mode); @@ -884,14 +938,16 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { pLeft /= Z; pRight /= Z; } - LOG_TRACE(<< "# samples = " << numberSamples << ", pLeft = " << pLeft << ", pRight = " << pRight); + LOG_TRACE(<< "# samples = " << numberSamples + << ", pLeft = " << pLeft << ", pRight = " << pRight); // Create the child modes. LOG_TRACE(<< "Creating mode with index " << leftSplitIndex); modes.emplace_back(leftSplitIndex, m_Prior->m_SeedPrior); { TPointVec samples; - if (!m_Prior->m_Clusterer->sample(leftSplitIndex, MODE_SPLIT_NUMBER_SAMPLES, samples)) { + if (!m_Prior->m_Clusterer->sample( + leftSplitIndex, MODE_SPLIT_NUMBER_SAMPLES, samples)) { LOG_ERROR(<< "Couldn't find cluster for " << leftSplitIndex); } LOG_TRACE(<< "samples = " << core::CContainerPrinter::print(samples)); @@ -911,7 +967,8 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { modes.back().s_Prior->addSamples(TWeights::COUNT, samples_, weights); double weight = (nl - ns) / s; if (weight > 0.0) { - weights.assign(weights.size(), TDouble10Vec4Vec(1, TDouble10Vec(N, weight))); + weights.assign(weights.size(), + TDouble10Vec4Vec(1, TDouble10Vec(N, weight))); modes.back().s_Prior->addSamples(TWeights::COUNT, samples_, weights); LOG_TRACE(<< modes.back().s_Prior->print()); } @@ -921,7 +978,8 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { modes.emplace_back(rightSplitIndex, m_Prior->m_SeedPrior); { TPointVec samples; - if (!m_Prior->m_Clusterer->sample(rightSplitIndex, MODE_SPLIT_NUMBER_SAMPLES, samples)) { + if (!m_Prior->m_Clusterer->sample( + rightSplitIndex, MODE_SPLIT_NUMBER_SAMPLES, samples)) { LOG_ERROR(<< "Couldn't find cluster for " << rightSplitIndex) } LOG_TRACE(<< "samples = " << core::CContainerPrinter::print(samples)); @@ -941,7 +999,8 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { modes.back().s_Prior->addSamples(TWeights::COUNT, samples_, weights); double weight = (nr - ns) / s; if (weight > 0.0) { - weights.assign(weights.size(), TDouble10Vec4Vec(1, TDouble10Vec(N, weight))); + weights.assign(weights.size(), + TDouble10Vec4Vec(1, TDouble10Vec(N, weight))); modes.back().s_Prior->addSamples(TWeights::COUNT, samples_, weights); LOG_TRACE(<< modes.back().s_Prior->print()); } @@ -961,12 +1020,16 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { static const std::size_t MODE_MERGE_NUMBER_SAMPLES; public: - CModeMergeCallback(CMultivariateMultimodalPrior& prior) : m_Prior(&prior) {} + CModeMergeCallback(CMultivariateMultimodalPrior& prior) + : m_Prior(&prior) {} - void operator()(std::size_t leftMergeIndex, std::size_t rightMergeIndex, std::size_t targetIndex) const { + void operator()(std::size_t leftMergeIndex, + std::size_t rightMergeIndex, + std::size_t targetIndex) const { namespace detail = multivariate_multimodal_prior_detail; - detail::modeMergeCallback( - N, m_Prior->m_Modes, m_Prior->m_SeedPrior, MODE_MERGE_NUMBER_SAMPLES, leftMergeIndex, rightMergeIndex, targetIndex); + detail::modeMergeCallback(N, m_Prior->m_Modes, m_Prior->m_SeedPrior, + MODE_MERGE_NUMBER_SAMPLES, leftMergeIndex, + rightMergeIndex, targetIndex); } private: @@ -987,27 +1050,28 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { private: //! Read parameters from \p traverser. - bool acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) { + bool acceptRestoreTraverser(const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser) { do { const std::string& name = traverser.name(); - RESTORE_SETUP_TEARDOWN(DECAY_RATE_TAG, - double decayRate, + RESTORE_SETUP_TEARDOWN(DECAY_RATE_TAG, double decayRate, core::CStringUtils::stringToType(traverser.value(), decayRate), this->decayRate(decayRate)) - RESTORE(CLUSTERER_TAG, - traverser.traverseSubLevel( - boost::bind(CClustererStateSerialiser(), boost::cref(params), boost::ref(m_Clusterer), _1))) - RESTORE( - SEED_PRIOR_TAG, - traverser.traverseSubLevel(boost::bind(CPriorStateSerialiser(), boost::cref(params), boost::ref(m_SeedPrior), _1))) - RESTORE_SETUP_TEARDOWN(MODE_TAG, - TMode mode, - traverser.traverseSubLevel(boost::bind(&TMode::acceptRestoreTraverser, &mode, boost::cref(params), _1)), - m_Modes.push_back(mode)) - RESTORE_SETUP_TEARDOWN(NUMBER_SAMPLES_TAG, - double numberSamples, - core::CStringUtils::stringToType(traverser.value(), numberSamples), - this->numberSamples(numberSamples)) + RESTORE(CLUSTERER_TAG, traverser.traverseSubLevel(boost::bind( + CClustererStateSerialiser(), boost::cref(params), + boost::ref(m_Clusterer), _1))) + RESTORE(SEED_PRIOR_TAG, traverser.traverseSubLevel(boost::bind( + CPriorStateSerialiser(), boost::cref(params), + boost::ref(m_SeedPrior), _1))) + RESTORE_SETUP_TEARDOWN( + MODE_TAG, TMode mode, + traverser.traverseSubLevel(boost::bind( + &TMode::acceptRestoreTraverser, &mode, boost::cref(params), _1)), + m_Modes.push_back(mode)) + RESTORE_SETUP_TEARDOWN( + NUMBER_SAMPLES_TAG, double numberSamples, + core::CStringUtils::stringToType(traverser.value(), numberSamples), + this->numberSamples(numberSamples)) } while (traverser.next()); if (m_Clusterer) { @@ -1020,12 +1084,16 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { } //! We should only use this prior when it has multiple modes. - virtual bool participatesInModelSelection() const { return m_Modes.size() > 1; } + virtual bool participatesInModelSelection() const { + return m_Modes.size() > 1; + } //! Get the number of nuisance parameters in the marginal likelihood. //! //! This is just number modes - 1 due to the normalization constraint. - virtual double unmarginalizedParameters() const { return std::max(static_cast(m_Modes.size()), 1.0) - 1.0; } + virtual double unmarginalizedParameters() const { + return std::max(static_cast(m_Modes.size()), 1.0) - 1.0; + } //! Get the convariance matrix for the marginal likelihood. TMatrix covarianceMatrix() const { @@ -1034,7 +1102,8 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { // = Sum_i{ w(i) * (Integral{ x' * x * f(x | i) } - m' * m) } // = Sum_i{ w(i) * ((mi' * mi + Ci) - m' * m) } - using TMatrixMeanAccumulator = typename CBasicStatistics::SSampleMean::TAccumulator; + using TMatrixMeanAccumulator = + typename CBasicStatistics::SSampleMean::TAccumulator; TMatrix mean2 = TPoint(this->marginalLikelihoodMean()).outer(); @@ -1050,7 +1119,9 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { } //! Full debug dump of the mode weights. - std::string debugWeights() const { return multivariate_multimodal_prior_detail::debugWeights(m_Modes); } + std::string debugWeights() const { + return multivariate_multimodal_prior_detail::debugWeights(m_Modes); + } private: //! The object which partitions the data into clusters. @@ -1078,9 +1149,11 @@ const std::string CMultivariateMultimodalPrior::MAXIMUM_TAG("f"); template const std::string CMultivariateMultimodalPrior::DECAY_RATE_TAG("g"); template -const std::size_t CMultivariateMultimodalPrior::CModeSplitCallback::MODE_SPLIT_NUMBER_SAMPLES(50 * N); +const std::size_t + CMultivariateMultimodalPrior::CModeSplitCallback::MODE_SPLIT_NUMBER_SAMPLES(50 * N); template -const std::size_t CMultivariateMultimodalPrior::CModeMergeCallback::MODE_MERGE_NUMBER_SAMPLES(25 * N); +const std::size_t + CMultivariateMultimodalPrior::CModeMergeCallback::MODE_MERGE_NUMBER_SAMPLES(25 * N); } } diff --git a/include/maths/CMultivariateMultimodalPriorFactory.h b/include/maths/CMultivariateMultimodalPriorFactory.h index 0629b7ff9b..bbe73801a3 100644 --- a/include/maths/CMultivariateMultimodalPriorFactory.h +++ b/include/maths/CMultivariateMultimodalPriorFactory.h @@ -40,8 +40,10 @@ class MATHS_EXPORT CMultivariateMultimodalPriorFactory { const CMultivariatePrior& seedPrior); //! Create reading state from its state document representation. - static bool - restore(std::size_t dimension, const SDistributionRestoreParams& params, TPriorPtr& ptr, core::CStateRestoreTraverser& traverser); + static bool restore(std::size_t dimension, + const SDistributionRestoreParams& params, + TPriorPtr& ptr, + core::CStateRestoreTraverser& traverser); }; } } diff --git a/include/maths/CMultivariateNormalConjugate.h b/include/maths/CMultivariateNormalConjugate.h index 03f02fc709..64a5235b56 100644 --- a/include/maths/CMultivariateNormalConjugate.h +++ b/include/maths/CMultivariateNormalConjugate.h @@ -116,26 +116,30 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { double wishartDegreesFreedom, const TMatrix& wishartScaleMatrix, double decayRate = 0.0) - : CMultivariatePrior(dataType, decayRate), - m_GaussianMean(gaussianMean), + : CMultivariatePrior(dataType, decayRate), m_GaussianMean(gaussianMean), m_GaussianPrecision(gaussianPrecision), m_WishartDegreesFreedom(wishartDegreesFreedom), m_WishartScaleMatrix(wishartScaleMatrix) {} //! Construct from sample central moments. - CMultivariateNormalConjugate(maths_t::EDataType dataType, const TCovariance& covariance, double decayRate = 0.0) + CMultivariateNormalConjugate(maths_t::EDataType dataType, + const TCovariance& covariance, + double decayRate = 0.0) : CMultivariatePrior(dataType, decayRate), m_GaussianMean(CBasicStatistics::mean(covariance)), m_GaussianPrecision(covariance.s_Count), - m_WishartDegreesFreedom(this->smallest(covariance.s_Count.template toVector())), + m_WishartDegreesFreedom( + this->smallest(covariance.s_Count.template toVector())), m_WishartScaleMatrix(covariance.s_Count * covariance.s_Covariances) { this->numberSamples(CBasicStatistics::count(covariance)); } //! Construct from part of a state document. - CMultivariateNormalConjugate(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) + CMultivariateNormalConjugate(const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser) : CMultivariatePrior(params.s_DataType, params.s_DecayRate) { - traverser.traverseSubLevel(boost::bind(&CMultivariateNormalConjugate::acceptRestoreTraverser, this, _1)); + traverser.traverseSubLevel(boost::bind( + &CMultivariateNormalConjugate::acceptRestoreTraverser, this, _1)); } virtual ~CMultivariateNormalConjugate() {} @@ -148,13 +152,11 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { //! for details). //! \param[in] decayRate The rate at which to revert to the non-informative prior. //! \return A non-informative prior. - static CMultivariateNormalConjugate nonInformativePrior(maths_t::EDataType dataType, double decayRate = 0.0) { - return CMultivariateNormalConjugate(dataType, - NON_INFORMATIVE_MEAN, - TPoint(NON_INFORMATIVE_PRECISION), - NON_INFORMATIVE_DEGREES_FREEDOM, - NON_INFORMATIVE_SCALE, - decayRate); + static CMultivariateNormalConjugate + nonInformativePrior(maths_t::EDataType dataType, double decayRate = 0.0) { + return CMultivariateNormalConjugate( + dataType, NON_INFORMATIVE_MEAN, TPoint(NON_INFORMATIVE_PRECISION), + NON_INFORMATIVE_DEGREES_FREEDOM, NON_INFORMATIVE_SCALE, decayRate); } //@} @@ -163,7 +165,9 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { //! Create a copy of the prior. //! //! \warning Caller owns returned object. - virtual CMultivariateNormalConjugate* clone() const { return new CMultivariateNormalConjugate(*this); } + virtual CMultivariateNormalConjugate* clone() const { + return new CMultivariateNormalConjugate(*this); + } //! Get the dimension of the prior. std::size_t dimension() const { return N; } @@ -174,8 +178,9 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { } //! No-op. - virtual void - adjustOffset(const TWeightStyleVec& /*weightStyles*/, const TDouble10Vec1Vec& /*samples*/, const TDouble10Vec4Vec1Vec& /*weights*/) {} + virtual void adjustOffset(const TWeightStyleVec& /*weightStyles*/, + const TDouble10Vec1Vec& /*samples*/, + const TDouble10Vec4Vec1Vec& /*weights*/) {} //! Update the prior with a collection of independent samples from the //! process. @@ -185,7 +190,9 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { //! for more details. //! \param[in] samples A collection of samples of the process. //! \param[in] weights The weights of each sample in \p samples. - virtual void addSamples(const TWeightStyleVec& weightStyles, const TDouble10Vec1Vec& samples, const TDouble10Vec4Vec1Vec& weights) { + virtual void addSamples(const TWeightStyleVec& weightStyles, + const TDouble10Vec1Vec& samples, + const TDouble10Vec4Vec1Vec& weights) { if (samples.empty()) { return; } @@ -230,8 +237,9 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { for (std::size_t i = 0u; i < samples.size(); ++i) { TPoint x(samples[i]); TPoint n(maths_t::countForUpdate(N, weightStyles, weights[i])); - TPoint varianceScale = TPoint(maths_t::seasonalVarianceScale(N, weightStyles, weights[i])) * - TPoint(maths_t::countVarianceScale(N, weightStyles, weights[i])); + TPoint varianceScale = + TPoint(maths_t::seasonalVarianceScale(N, weightStyles, weights[i])) * + TPoint(maths_t::countVarianceScale(N, weightStyles, weights[i])); numberSamples += n; covariancePost.add(x, n / varianceScale); } @@ -250,12 +258,14 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { TPoint scale = TPoint(1.0) / m_GaussianPrecision; TMatrix covariances = m_WishartScaleMatrix; scaleCovariances(scale, covariances); - TCovariance covariancePrior = CBasicStatistics::accumulator(m_GaussianPrecision, m_GaussianMean, covariances); + TCovariance covariancePrior = CBasicStatistics::accumulator( + m_GaussianPrecision, m_GaussianMean, covariances); covariancePost += covariancePrior; } m_GaussianMean = CBasicStatistics::mean(covariancePost); m_GaussianPrecision += scaledNumberSamples; - m_WishartDegreesFreedom += this->smallest(numberSamples.template toVector()); + m_WishartDegreesFreedom += + this->smallest(numberSamples.template toVector()); m_WishartScaleMatrix = covariancePost.s_Covariances; scaleCovariances(covariancePost.s_Count, m_WishartScaleMatrix); @@ -274,9 +284,12 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { } } - LOG_TRACE(<< "numberSamples = " << numberSamples << ", scaledNumberSamples = " << scaledNumberSamples - << ", m_WishartDegreesFreedom = " << m_WishartDegreesFreedom << ", m_WishartScaleMatrix = " << m_WishartScaleMatrix - << ", m_GaussianMean = " << m_GaussianMean << ", m_GaussianPrecision = " << m_GaussianPrecision); + LOG_TRACE(<< "numberSamples = " << numberSamples + << ", scaledNumberSamples = " << scaledNumberSamples + << ", m_WishartDegreesFreedom = " << m_WishartDegreesFreedom + << ", m_WishartScaleMatrix = " << m_WishartScaleMatrix + << ", m_GaussianMean = " << m_GaussianMean + << ", m_GaussianPrecision = " << m_GaussianPrecision); if (this->isBad()) { LOG_ERROR(<< "Update failed (" << this->debug() << ")" @@ -300,7 +313,8 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { double alpha = std::exp(-this->scaledDecayRate() * time); - m_GaussianPrecision = alpha * m_GaussianPrecision + (1.0 - alpha) * TPoint(NON_INFORMATIVE_PRECISION); + m_GaussianPrecision = alpha * m_GaussianPrecision + + (1.0 - alpha) * TPoint(NON_INFORMATIVE_PRECISION); // The mean of the Wishart distribution is n V and the variance // is [V]_ij = n ( V_ij^2 + V_ii * V_jj), note V is the inverse @@ -313,17 +327,22 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { // // Thus the mean is unchanged and variance is increased by 1 / f. - double factor = - std::min((alpha * m_WishartDegreesFreedom + (1.0 - alpha) * NON_INFORMATIVE_DEGREES_FREEDOM) / m_WishartDegreesFreedom, 1.0); + double factor = std::min((alpha * m_WishartDegreesFreedom + + (1.0 - alpha) * NON_INFORMATIVE_DEGREES_FREEDOM) / + m_WishartDegreesFreedom, + 1.0); m_WishartDegreesFreedom *= factor; m_WishartScaleMatrix *= factor; this->numberSamples(this->numberSamples() * alpha); - LOG_TRACE(<< "time = " << time << ", alpha = " << alpha << ", m_WishartDegreesFreedom = " << m_WishartDegreesFreedom - << ", m_WishartScaleMatrix = " << m_WishartScaleMatrix << ", m_GaussianMean = " << m_GaussianMean - << ", m_GaussianPrecision = " << m_GaussianPrecision << ", numberSamples = " << this->numberSamples()); + LOG_TRACE(<< "time = " << time << ", alpha = " << alpha + << ", m_WishartDegreesFreedom = " << m_WishartDegreesFreedom + << ", m_WishartScaleMatrix = " << m_WishartScaleMatrix + << ", m_GaussianMean = " << m_GaussianMean + << ", m_GaussianPrecision = " << m_GaussianPrecision + << ", numberSamples = " << this->numberSamples()); } //! Compute the univariate prior marginalizing over the variables @@ -337,7 +356,8 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { //! \note The caller must specify dimension - 1 variables between //! \p marginalize and \p condition so the resulting distribution //! is univariate. - virtual TUnivariatePriorPtrDoublePr univariate(const TSize10Vec& marginalize, const TSizeDoublePr10Vec& condition) const { + virtual TUnivariatePriorPtrDoublePr + univariate(const TSize10Vec& marginalize, const TSizeDoublePr10Vec& condition) const { if (!this->check(marginalize, condition)) { return TUnivariatePriorPtrDoublePr(); } @@ -347,14 +367,17 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { if (i1.size() != 1) { LOG_ERROR(<< "Invalid variables for computing univariate distribution: " << "marginalize '" << core::CContainerPrinter::print(marginalize) << "'" - << ", condition '" << core::CContainerPrinter::print(condition) << "'"); + << ", condition '" + << core::CContainerPrinter::print(condition) << "'"); return TUnivariatePriorPtrDoublePr(); } maths_t::EDataType dataType = this->dataType(); double decayRate = this->decayRate(); if (this->isNonInformative()) { - return {TUnivariatePriorPtr(CNormalMeanPrecConjugate::nonInformativePrior(dataType, decayRate).clone()), 0.0}; + return {TUnivariatePriorPtr(CNormalMeanPrecConjugate::nonInformativePrior(dataType, decayRate) + .clone()), + 0.0}; } double p = m_GaussianPrecision(i1[0]); @@ -366,7 +389,9 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { double m1 = m(i1[0]); double c11 = c(i1[0], i1[0]); if (condition.empty()) { - return {TUnivariatePriorPtr(new CNormalMeanPrecConjugate(dataType, m1, p, s, c11 * v / 2.0, decayRate)), 0.0}; + return {TUnivariatePriorPtr(new CNormalMeanPrecConjugate( + dataType, m1, p, s, c11 * v / 2.0, decayRate)), + 0.0}; } TSize10Vec condition_; @@ -380,18 +405,26 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { condition_.push_back(i1[0]); CDenseMatrix cp = projectedMatrix(condition_, c); CDenseVector c12 = cp.topRightCorner(n, 1); - Eigen::JacobiSVD> c22(cp.topLeftCorner(n, n), Eigen::ComputeThinU | Eigen::ComputeThinV); - LOG_TRACE(<< "c22 = " << cp.topLeftCorner(n, n) << ", c12 = " << c12 << ", a = " << xc << ", m2 = " << m2); + Eigen::JacobiSVD> c22( + cp.topLeftCorner(n, n), Eigen::ComputeThinU | Eigen::ComputeThinV); + LOG_TRACE(<< "c22 = " << cp.topLeftCorner(n, n) << ", c12 = " << c12 + << ", a = " << xc << ", m2 = " << m2); CDenseVector c22SolvexcMinusm2 = c22.solve(xc - m2); double mean = m1 + c12.transpose() * c22SolvexcMinusm2; - double variance = std::max(c11 - c12.transpose() * c22.solve(c12), MINIMUM_COEFFICIENT_OF_VARIATION * std::fabs(mean)); + double variance = std::max(c11 - c12.transpose() * c22.solve(c12), + MINIMUM_COEFFICIENT_OF_VARIATION * std::fabs(mean)); double weight = 0.5 * (std::log(variance) - (xc - m2).transpose() * c22SolvexcMinusm2); - LOG_TRACE(<< "mean = " << mean << ", variance = " << variance << ", weight = " << weight); + LOG_TRACE(<< "mean = " << mean << ", variance = " << variance + << ", weight = " << weight); - return {TUnivariatePriorPtr(new CNormalMeanPrecConjugate(dataType, mean, p, s, variance * v / 2.0, decayRate)), weight}; - } catch (const std::exception& e) { LOG_ERROR(<< "Failed to get univariate prior: " << e.what()); } + return {TUnivariatePriorPtr(new CNormalMeanPrecConjugate( + dataType, mean, p, s, variance * v / 2.0, decayRate)), + weight}; + } catch (const std::exception& e) { + LOG_ERROR(<< "Failed to get univariate prior: " << e.what()); + } return TUnivariatePriorPtrDoublePr(); } @@ -408,9 +441,11 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { //! \note The caller must specify dimension - 2 variables between //! \p marginalize and \p condition so the resulting distribution //! is univariate. - virtual TPriorPtrDoublePr bivariate(const TSize10Vec& marginalize, const TSizeDoublePr10Vec& condition) const { + virtual TPriorPtrDoublePr bivariate(const TSize10Vec& marginalize, + const TSizeDoublePr10Vec& condition) const { if (N == 2) { - return TPriorPtrDoublePr(boost::shared_ptr(this->clone()), 0.0); + return TPriorPtrDoublePr( + boost::shared_ptr(this->clone()), 0.0); } if (!this->check(marginalize, condition)) { @@ -426,7 +461,9 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { maths_t::EDataType dataType = this->dataType(); double decayRate = this->decayRate(); if (this->isNonInformative()) { - return {TPriorPtr(CMultivariateNormalConjugate<2>::nonInformativePrior(dataType, decayRate).clone()), 0.0}; + return {TPriorPtr(CMultivariateNormalConjugate<2>::nonInformativePrior(dataType, decayRate) + .clone()), + 0.0}; } using TPoint2 = CVectorNx1; @@ -448,7 +485,9 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { } } if (condition.empty()) { - return {TPriorPtr(new CMultivariateNormalConjugate<2>(dataType, m1, p, f, c11, decayRate)), 0.0}; + return {TPriorPtr(new CMultivariateNormalConjugate<2>( + dataType, m1, p, f, c11, decayRate)), + 0.0}; } TSize10Vec condition_; @@ -463,34 +502,46 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { condition_.push_back(i1[1]); CDenseMatrix cp = projectedMatrix(condition_, c); CDenseVector c12 = cp.topRightCorner(n, 1); - Eigen::JacobiSVD> c22(cp.topLeftCorner(n, n), Eigen::ComputeThinU | Eigen::ComputeThinV); - LOG_TRACE(<< "c22 = " << cp.topLeftCorner(n, n) << ", c12 = " << c12 << ", a = " << xc << ", m2 = " << m2); + Eigen::JacobiSVD> c22( + cp.topLeftCorner(n, n), Eigen::ComputeThinU | Eigen::ComputeThinV); + LOG_TRACE(<< "c22 = " << cp.topLeftCorner(n, n) << ", c12 = " << c12 + << ", a = " << xc << ", m2 = " << m2); CDenseVector c22SolvexcMinusm2 = c22.solve(xc - m2); - TPoint2 mean(fromDenseVector(toDynamicDenseVector(m1) + c12.transpose() * c22SolvexcMinusm2)); - TMatrix2 covariance(fromDenseMatrix(toDynamicDenseMatrix(c11) - c12.transpose() * c22.solve(c12))); + TPoint2 mean(fromDenseVector(toDynamicDenseVector(m1) + + c12.transpose() * c22SolvexcMinusm2)); + TMatrix2 covariance(fromDenseMatrix(toDynamicDenseMatrix(c11) - + c12.transpose() * c22.solve(c12))); double weight; logDeterminant(covariance, weight, false); weight -= 0.5 * (xc - m2).transpose() * c22SolvexcMinusm2; LOG_TRACE(<< "mean = " << mean << ", covariance = " << covariance); - return {TPriorPtr(new CMultivariateNormalConjugate<2>(dataType, mean, p, f, covariance, decayRate)), weight}; - } catch (const std::exception& e) { LOG_ERROR(<< "Failed to get univariate prior: " << e.what()); } + return {TPriorPtr(new CMultivariateNormalConjugate<2>( + dataType, mean, p, f, covariance, decayRate)), + weight}; + } catch (const std::exception& e) { + LOG_ERROR(<< "Failed to get univariate prior: " << e.what()); + } return TPriorPtrDoublePr(); } //! Get the support for the marginal likelihood function. virtual TDouble10VecDouble10VecPr marginalLikelihoodSupport() const { - return {TPoint::smallest().template toVector(), TPoint::largest().template toVector()}; + return {TPoint::smallest().template toVector(), + TPoint::largest().template toVector()}; } //! Get the mean of the marginal likelihood function. - virtual TDouble10Vec marginalLikelihoodMean() const { return this->mean().template toVector(); } + virtual TDouble10Vec marginalLikelihoodMean() const { + return this->mean().template toVector(); + } //! Get the mode of the marginal likelihood function. - virtual TDouble10Vec marginalLikelihoodMode(const TWeightStyleVec& /*weightStyles*/, const TDouble10Vec4Vec& /*weights*/) const { + virtual TDouble10Vec marginalLikelihoodMode(const TWeightStyleVec& /*weightStyles*/, + const TDouble10Vec4Vec& /*weights*/) const { return this->marginalLikelihoodMean(); } @@ -500,7 +551,9 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { } //! Get the diagonal of the covariance matrix for the marginal likelihood. - virtual TDouble10Vec marginalLikelihoodVariances() const { return this->covarianceMatrix().template diagonal(); } + virtual TDouble10Vec marginalLikelihoodVariances() const { + return this->covarianceMatrix().template diagonal(); + } //! Calculate the log marginal likelihood function, integrating over the //! prior density function. @@ -511,10 +564,11 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { //! \param[in] samples A collection of samples of the process. //! \param[in] weights The weights of each sample in \p samples. //! \param[out] result Filled in with the joint likelihood of \p samples. - virtual maths_t::EFloatingPointErrorStatus jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights, - double& result) const { + virtual maths_t::EFloatingPointErrorStatus + jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble10Vec1Vec& samples, + const TDouble10Vec4Vec1Vec& weights, + double& result) const { result = 0.0; if (samples.empty()) { @@ -546,7 +600,8 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { if (this->isInteger()) { double logLikelihood; - status = this->jointLogMarginalLikelihood(weightStyles, samples, TPoint(0.5), weights, logLikelihood); + status = this->jointLogMarginalLikelihood( + weightStyles, samples, TPoint(0.5), weights, logLikelihood); if (status != maths_t::E_FpNoErrors) { return status; } @@ -558,7 +613,8 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { TDoubleVec z; CSampling::uniformSample(0.0, 1.0, 3 * N, z); for (std::size_t i = 0u; i < z.size(); i += N) { - status = this->jointLogMarginalLikelihood(weightStyles, samples, TPoint(&z[i], &z[i + N]), weights, logLikelihood); + status = this->jointLogMarginalLikelihood( + weightStyles, samples, TPoint(&z[i], &z[i + N]), weights, logLikelihood); if (status & maths_t::E_FpFailed) { return maths_t::E_FpFailed; } @@ -576,7 +632,8 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { result = maxLogLikelihood + std::log(sum / n); } else { - status = this->jointLogMarginalLikelihood(weightStyles, samples, TPoint(0.0), weights, result); + status = this->jointLogMarginalLikelihood(weightStyles, samples, + TPoint(0.0), weights, result); } if (status & maths_t::E_FpFailed) { @@ -611,7 +668,8 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { //! \param[in] numberSamples The number of samples required. //! \param[out] samples Filled in with samples from the prior. //! \note \p numberSamples is truncated to the number of samples received. - virtual void sampleMarginalLikelihood(std::size_t numberSamples, TDouble10Vec1Vec& samples) const { + virtual void sampleMarginalLikelihood(std::size_t numberSamples, + TDouble10Vec1Vec& samples) const { samples.clear(); if (numberSamples == 0 || this->numberSamples() == 0.0) { @@ -657,7 +715,9 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { } //! Check if this is a non-informative prior. - virtual bool isNonInformative() const { return m_WishartDegreesFreedom <= static_cast(N + 1); } + virtual bool isNonInformative() const { + return m_WishartDegreesFreedom <= static_cast(N + 1); + } //! Get a human readable description of the prior. //! @@ -687,7 +747,9 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { } //! Get the memory used by this component - virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CMultivariateNormalConjugate"); } + virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { + mem->setName("CMultivariateNormalConjugate"); + } //! Get the memory used by this component virtual std::size_t memoryUsage() const { return 0; } @@ -696,24 +758,28 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { virtual std::size_t staticSize() const { return sizeof(*this); } //! Get the tag name for this prior. - virtual std::string persistenceTag() const { return NORMAL_TAG + core::CStringUtils::typeToString(N); } + virtual std::string persistenceTag() const { + return NORMAL_TAG + core::CStringUtils::typeToString(N); + } //! Read parameters from \p traverser. bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { do { const std::string& name = traverser.name(); - RESTORE_SETUP_TEARDOWN(DECAY_RATE_TAG, - double decayRate, + RESTORE_SETUP_TEARDOWN(DECAY_RATE_TAG, double decayRate, core::CStringUtils::stringToType(traverser.value(), decayRate), this->decayRate(decayRate)) - RESTORE_SETUP_TEARDOWN(NUMBER_SAMPLES_TAG, - double numberSamples, - core::CStringUtils::stringToType(traverser.value(), numberSamples), - this->numberSamples(numberSamples)) + RESTORE_SETUP_TEARDOWN( + NUMBER_SAMPLES_TAG, double numberSamples, + core::CStringUtils::stringToType(traverser.value(), numberSamples), + this->numberSamples(numberSamples)) RESTORE(GAUSSIAN_MEAN_TAG, m_GaussianMean.fromDelimited(traverser.value())) - RESTORE(GAUSSIAN_PRECISION_TAG, m_GaussianPrecision.fromDelimited(traverser.value())) - RESTORE(WISHART_DEGREES_FREEDOM_TAG, core::CStringUtils::stringToType(traverser.value(), m_WishartDegreesFreedom)) - RESTORE(WISHART_SCALE_MATRIX_TAG, m_WishartScaleMatrix.fromDelimited(traverser.value())) + RESTORE(GAUSSIAN_PRECISION_TAG, + m_GaussianPrecision.fromDelimited(traverser.value())) + RESTORE(WISHART_DEGREES_FREEDOM_TAG, + core::CStringUtils::stringToType(traverser.value(), m_WishartDegreesFreedom)) + RESTORE(WISHART_SCALE_MATRIX_TAG, + m_WishartScaleMatrix.fromDelimited(traverser.value())) } while (traverser.next()); return true; @@ -722,10 +788,12 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { //! Persist state by passing information to the supplied inserter virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(DECAY_RATE_TAG, this->decayRate(), core::CIEEE754::E_SinglePrecision); - inserter.insertValue(NUMBER_SAMPLES_TAG, this->numberSamples(), core::CIEEE754::E_SinglePrecision); + inserter.insertValue(NUMBER_SAMPLES_TAG, this->numberSamples(), + core::CIEEE754::E_SinglePrecision); inserter.insertValue(GAUSSIAN_MEAN_TAG, m_GaussianMean.toDelimited()); inserter.insertValue(GAUSSIAN_PRECISION_TAG, m_GaussianPrecision.toDelimited()); - inserter.insertValue(WISHART_DEGREES_FREEDOM_TAG, m_WishartDegreesFreedom, core::CIEEE754::E_DoublePrecision); + inserter.insertValue(WISHART_DEGREES_FREEDOM_TAG, m_WishartDegreesFreedom, + core::CIEEE754::E_DoublePrecision); inserter.insertValue(WISHART_SCALE_MATRIX_TAG, m_WishartScaleMatrix.toDelimited()); } //@} @@ -758,7 +826,8 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { double f = m_WishartDegreesFreedom - d - 1.0; LOG_TRACE(<< "f = " << f); - Eigen::JacobiSVD precision(toDenseMatrix(m_WishartScaleMatrix), Eigen::ComputeFullU | Eigen::ComputeFullV); + Eigen::JacobiSVD precision(toDenseMatrix(m_WishartScaleMatrix), + Eigen::ComputeFullU | Eigen::ComputeFullV); // Note we can extract the (non-zero vectors of the Cholesky // factorization by noting that U = V^t and multiplying each @@ -871,7 +940,9 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { //@} //! Get the expected mean of the marginal likelihood. - TPoint mean() const { return this->isInteger() ? m_GaussianMean - TPoint(0.5) : m_GaussianMean; } + TPoint mean() const { + return this->isInteger() ? m_GaussianMean - TPoint(0.5) : m_GaussianMean; + } //! Get the covariance matrix for the marginal likelihood. TMatrix covarianceMatrix() const { @@ -904,7 +975,9 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { } //! Check if two priors are equal to the specified tolerance. - bool equalTolerance(const CMultivariateNormalConjugate& rhs, unsigned int toleranceType, double epsilon) const { + bool equalTolerance(const CMultivariateNormalConjugate& rhs, + unsigned int toleranceType, + double epsilon) const { LOG_DEBUG(<< m_GaussianMean << " " << rhs.m_GaussianMean); LOG_DEBUG(<< m_GaussianPrecision << " " << rhs.m_GaussianPrecision); LOG_DEBUG(<< m_WishartDegreesFreedom << " " << rhs.m_WishartDegreesFreedom); @@ -914,7 +987,8 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { CEqualWithTolerance equalVector(toleranceType, TPoint(epsilon)); CEqualWithTolerance equalMatrix(toleranceType, TMatrix(epsilon)); - return equalVector(m_GaussianMean, rhs.m_GaussianMean) && equalVector(m_GaussianPrecision, rhs.m_GaussianPrecision) && + return equalVector(m_GaussianMean, rhs.m_GaussianMean) && + equalVector(m_GaussianPrecision, rhs.m_GaussianPrecision) && equalScalar(m_WishartDegreesFreedom, rhs.m_WishartDegreesFreedom) && equalMatrix(m_WishartScaleMatrix, rhs.m_WishartScaleMatrix); } @@ -949,7 +1023,9 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { private: //! Unpack the variable values on which to condition. - void unpack(const TSizeDoublePr10Vec& condition, TSize10Vec& condition_, CDenseVector& x) const { + void unpack(const TSizeDoublePr10Vec& condition, + TSize10Vec& condition_, + CDenseVector& x) const { condition_.reserve(condition.size()); for (std::size_t i = 0u; i < condition.size(); ++i) { condition_.push_back(condition[i].first); @@ -959,11 +1035,12 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { //! Compute the marginal likelihood for \p samples at the offset //! \p offset. - maths_t::EFloatingPointErrorStatus jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble10Vec1Vec& samples, - const TPoint& offset, - const TDouble10Vec4Vec1Vec& weights, - double& result) const { + maths_t::EFloatingPointErrorStatus + jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble10Vec1Vec& samples, + const TPoint& offset, + const TDouble10Vec4Vec1Vec& weights, + double& result) const { // As usual, one can find the marginal likelihood by noting that // it is proportional to the ratio of the normalization factors // of the conjugate distribution before and after update with the @@ -979,8 +1056,10 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { for (std::size_t i = 0u; i < samples.size(); ++i) { TPoint x(samples[i]); TPoint n(maths_t::countForUpdate(N, weightStyles, weights[i])); - TPoint seasonalScale = sqrt(TPoint(maths_t::seasonalVarianceScale(N, weightStyles, weights[i]))); - TPoint countVarianceScale(maths_t::countVarianceScale(N, weightStyles, weights[i])); + TPoint seasonalScale = sqrt(TPoint( + maths_t::seasonalVarianceScale(N, weightStyles, weights[i]))); + TPoint countVarianceScale( + maths_t::countVarianceScale(N, weightStyles, weights[i])); x = m + (x + offset - m) / seasonalScale; numberSamples += this->smallest(n.template toVector()); covariancePost.add(x, n / countVarianceScale); @@ -993,22 +1072,25 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { return maths_t::E_FpFailed; } TPoint scaledNumberSamples = covariancePost.s_Count; - TCovariance covariancePrior = - CBasicStatistics::accumulator(m_WishartDegreesFreedom, m_GaussianMean, m_WishartScaleMatrix / m_WishartDegreesFreedom); + TCovariance covariancePrior = CBasicStatistics::accumulator( + m_WishartDegreesFreedom, m_GaussianMean, + m_WishartScaleMatrix / m_WishartDegreesFreedom); covariancePost += covariancePrior; double logGaussianPrecisionPrior = 0.0; double logGaussianPrecisionPost = 0.0; for (std::size_t i = 0u; i < N; ++i) { logGaussianPrecisionPrior += std::log(m_GaussianPrecision(i)); - logGaussianPrecisionPost += std::log(m_GaussianPrecision(i) + scaledNumberSamples(i)); + logGaussianPrecisionPost += + std::log(m_GaussianPrecision(i) + scaledNumberSamples(i)); } double wishartDegreesFreedomPrior = m_WishartDegreesFreedom; double wishartDegreesFreedomPost = m_WishartDegreesFreedom + numberSamples; TMatrix wishartScaleMatrixPost = covariancePost.s_Covariances; scaleCovariances(covariancePost.s_Count, wishartScaleMatrixPost); double logDeterminantPrior; - if (logDeterminant(m_WishartScaleMatrix, logDeterminantPrior, false) & maths_t::E_FpFailed) { + if (logDeterminant(m_WishartScaleMatrix, logDeterminantPrior, false) & + maths_t::E_FpFailed) { LOG_ERROR(<< "Failed to calculate log det " << m_WishartScaleMatrix); return maths_t::E_FpFailed; } @@ -1021,8 +1103,11 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { try { double logGammaPostMinusPrior = 0.0; for (std::size_t i = 0u; i < N; ++i) { - logGammaPostMinusPrior += boost::math::lgamma(0.5 * (wishartDegreesFreedomPost - static_cast(i))) - - boost::math::lgamma(0.5 * (wishartDegreesFreedomPrior - static_cast(i))); + logGammaPostMinusPrior += + boost::math::lgamma( + 0.5 * (wishartDegreesFreedomPost - static_cast(i))) - + boost::math::lgamma(0.5 * (wishartDegreesFreedomPrior - + static_cast(i))); } LOG_TRACE(<< "numberSamples = " << numberSamples); LOG_TRACE(<< "logGaussianPrecisionPrior = " << logGaussianPrecisionPrior @@ -1031,14 +1116,19 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { << ", wishartDegreesFreedomPost = " << wishartDegreesFreedomPost); LOG_TRACE(<< "wishartScaleMatrixPrior = " << m_WishartScaleMatrix); LOG_TRACE(<< "wishartScaleMatrixPost = " << wishartScaleMatrixPost); - LOG_TRACE(<< "logDeterminantPrior = " << logDeterminantPrior << ", logDeterminantPost = " << logDeterminantPost); + LOG_TRACE(<< "logDeterminantPrior = " << logDeterminantPrior + << ", logDeterminantPost = " << logDeterminantPost); LOG_TRACE(<< "logGammaPostMinusPrior = " << logGammaPostMinusPrior); LOG_TRACE(<< "logCountVarianceScales = " << logCountVarianceScales); - result = 0.5 * (wishartDegreesFreedomPrior * logDeterminantPrior - wishartDegreesFreedomPost * logDeterminantPost - + result = 0.5 * (wishartDegreesFreedomPrior * logDeterminantPrior - + wishartDegreesFreedomPost * logDeterminantPost - d * (logGaussianPrecisionPost - logGaussianPrecisionPrior) + - (wishartDegreesFreedomPost - wishartDegreesFreedomPrior) * d * core::constants::LOG_TWO + - 2.0 * logGammaPostMinusPrior - numberSamples * d * core::constants::LOG_TWO_PI - logCountVarianceScales); + (wishartDegreesFreedomPost - wishartDegreesFreedomPrior) * + d * core::constants::LOG_TWO + + 2.0 * logGammaPostMinusPrior - + numberSamples * d * core::constants::LOG_TWO_PI - + logCountVarianceScales); } catch (const std::exception& e) { LOG_ERROR(<< "Failed to calculate marginal likelihood: " << e.what()); return maths_t::E_FpFailed; @@ -1048,14 +1138,17 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { //! Check that the state is valid. bool isBad() const { - return !CMathsFuncs::isFinite(m_GaussianMean) || !CMathsFuncs::isFinite(m_GaussianPrecision) || - !CMathsFuncs::isFinite(m_WishartDegreesFreedom) || !CMathsFuncs::isFinite(m_WishartScaleMatrix); + return !CMathsFuncs::isFinite(m_GaussianMean) || + !CMathsFuncs::isFinite(m_GaussianPrecision) || + !CMathsFuncs::isFinite(m_WishartDegreesFreedom) || + !CMathsFuncs::isFinite(m_WishartScaleMatrix); } //! Full debug dump of the state of this prior. std::string debug() const { std::ostringstream result; - result << std::scientific << std::setprecision(15) << m_GaussianMean << " " << m_GaussianPrecision << " " << m_WishartDegreesFreedom + result << std::scientific << std::setprecision(15) << m_GaussianMean + << " " << m_GaussianPrecision << " " << m_WishartDegreesFreedom << " " << m_WishartScaleMatrix; return result.str(); } @@ -1087,13 +1180,15 @@ const std::string CMultivariateNormalConjugate::WISHART_SCALE_MATRIX_TAG("e") template const std::string CMultivariateNormalConjugate::DECAY_RATE_TAG("f"); template -const typename CMultivariateNormalConjugate::TPoint CMultivariateNormalConjugate::NON_INFORMATIVE_MEAN = TPoint(0); +const typename CMultivariateNormalConjugate::TPoint + CMultivariateNormalConjugate::NON_INFORMATIVE_MEAN = TPoint(0); template const double CMultivariateNormalConjugate::NON_INFORMATIVE_PRECISION(0.0); template const double CMultivariateNormalConjugate::NON_INFORMATIVE_DEGREES_FREEDOM(0.0); template -const typename CMultivariateNormalConjugate::TMatrix CMultivariateNormalConjugate::NON_INFORMATIVE_SCALE = TMatrix(0); +const typename CMultivariateNormalConjugate::TMatrix + CMultivariateNormalConjugate::NON_INFORMATIVE_SCALE = TMatrix(0); template const double CMultivariateNormalConjugate::MINIMUM_GAUSSIAN_DEGREES_FREEDOM(100.0); } diff --git a/include/maths/CMultivariateNormalConjugateFactory.h b/include/maths/CMultivariateNormalConjugateFactory.h index 6dc00b605e..3a0966dff3 100644 --- a/include/maths/CMultivariateNormalConjugateFactory.h +++ b/include/maths/CMultivariateNormalConjugateFactory.h @@ -30,11 +30,14 @@ class MATHS_EXPORT CMultivariateNormalConjugateFactory { public: //! Create a new non-informative multivariate normal prior. - static TPriorPtr nonInformative(std::size_t dimension, maths_t::EDataType dataType, double decayRate); + static TPriorPtr + nonInformative(std::size_t dimension, maths_t::EDataType dataType, double decayRate); //! Create reading state from its state document representation. - static bool - restore(std::size_t dimenion, const SDistributionRestoreParams& params, TPriorPtr& ptr, core::CStateRestoreTraverser& traverser); + static bool restore(std::size_t dimenion, + const SDistributionRestoreParams& params, + TPriorPtr& ptr, + core::CStateRestoreTraverser& traverser); }; } } diff --git a/include/maths/CMultivariateOneOfNPrior.h b/include/maths/CMultivariateOneOfNPrior.h index 1acef8b147..77707475d5 100644 --- a/include/maths/CMultivariateOneOfNPrior.h +++ b/include/maths/CMultivariateOneOfNPrior.h @@ -91,7 +91,10 @@ class MATHS_EXPORT CMultivariateOneOfNPrior : public CMultivariatePrior { //! for details). //! \param[in] decayRate The rate at which to revert to the non-informative prior. //! \warning This class takes ownership of \p models. - CMultivariateOneOfNPrior(std::size_t dimension, const TPriorPtrVec& models, maths_t::EDataType dataType, double decayRate = 0.0); + CMultivariateOneOfNPrior(std::size_t dimension, + const TPriorPtrVec& models, + maths_t::EDataType dataType, + double decayRate = 0.0); //! Create with a weighted collection of models. //! @@ -108,7 +111,9 @@ class MATHS_EXPORT CMultivariateOneOfNPrior : public CMultivariatePrior { double decayRate = 0.0); //! Construct from part of a state document. - CMultivariateOneOfNPrior(std::size_t dimension, const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser); + CMultivariateOneOfNPrior(std::size_t dimension, + const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser); //! Implements value semantics for copy construction. CMultivariateOneOfNPrior(const CMultivariateOneOfNPrior& other); @@ -146,7 +151,9 @@ class MATHS_EXPORT CMultivariateOneOfNPrior : public CMultivariatePrior { virtual void setToNonInformative(double offset = 0.0, double decayRate = 0.0); //! Forward the offset to the model priors. - virtual void adjustOffset(const TWeightStyleVec& weightStyles, const TDouble10Vec1Vec& samples, const TDouble10Vec4Vec1Vec& weights); + virtual void adjustOffset(const TWeightStyleVec& weightStyles, + const TDouble10Vec1Vec& samples, + const TDouble10Vec4Vec1Vec& weights); //! Update the model weights using the marginal likelihoods for //! the data. The component prior parameters are then updated. @@ -156,7 +163,9 @@ class MATHS_EXPORT CMultivariateOneOfNPrior : public CMultivariatePrior { //! for more details. //! \param[in] samples A collection of samples of the process. //! \param[in] weights The weights of each sample in \p samples. - virtual void addSamples(const TWeightStyleVec& weightStyles, const TDouble10Vec1Vec& samples, const TDouble10Vec4Vec1Vec& weights); + virtual void addSamples(const TWeightStyleVec& weightStyles, + const TDouble10Vec1Vec& samples, + const TDouble10Vec4Vec1Vec& weights); //! Propagate the prior density function forwards by \p time. //! @@ -181,7 +190,8 @@ class MATHS_EXPORT CMultivariateOneOfNPrior : public CMultivariatePrior { //! \note The caller must specify dimension - 1 variables between //! \p marginalize and \p condition so the resulting distribution //! is univariate. - virtual TUnivariatePriorPtrDoublePr univariate(const TSize10Vec& marginalize, const TSizeDoublePr10Vec& condition) const; + virtual TUnivariatePriorPtrDoublePr + univariate(const TSize10Vec& marginalize, const TSizeDoublePr10Vec& condition) const; //! Compute the bivariate prior marginalizing over the variables //! \p marginalize and conditioning on the variables \p condition. @@ -195,7 +205,8 @@ class MATHS_EXPORT CMultivariateOneOfNPrior : public CMultivariatePrior { //! \note The caller must specify dimension - 2 variables between //! \p marginalize and \p condition so the resulting distribution //! is univariate. - virtual TPriorPtrDoublePr bivariate(const TSize10Vec& marginalize, const TSizeDoublePr10Vec& condition) const; + virtual TPriorPtrDoublePr bivariate(const TSize10Vec& marginalize, + const TSizeDoublePr10Vec& condition) const; //! Get the support for the marginal likelihood function. virtual TDouble10VecDouble10VecPr marginalLikelihoodSupport() const; @@ -213,7 +224,8 @@ class MATHS_EXPORT CMultivariateOneOfNPrior : public CMultivariatePrior { virtual TDouble10Vec marginalLikelihoodVariances() const; //! Get the mode of the marginal likelihood function. - virtual TDouble10Vec marginalLikelihoodMode(const TWeightStyleVec& weightStyles, const TDouble10Vec4Vec& weights) const; + virtual TDouble10Vec marginalLikelihoodMode(const TWeightStyleVec& weightStyles, + const TDouble10Vec4Vec& weights) const; //! Compute the log marginal likelihood function at \p samples integrating //! over the prior density function for the distribution parameters. @@ -226,10 +238,11 @@ class MATHS_EXPORT CMultivariateOneOfNPrior : public CMultivariatePrior { //! \param[out] result Filled in with the joint likelihood of \p samples. //! \note The samples are assumed to be independent and identically //! distributed. - virtual maths_t::EFloatingPointErrorStatus jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights, - double& result) const; + virtual maths_t::EFloatingPointErrorStatus + jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble10Vec1Vec& samples, + const TDouble10Vec4Vec1Vec& weights, + double& result) const; //! Sample the marginal likelihood function. //! @@ -241,7 +254,8 @@ class MATHS_EXPORT CMultivariateOneOfNPrior : public CMultivariatePrior { //! \param[in] numberSamples The number of samples required. //! \param[out] samples Filled in with samples from the prior. //! \note \p numberSamples is truncated to the number of samples received. - virtual void sampleMarginalLikelihood(std::size_t numberSamples, TDouble10Vec1Vec& samples) const; + virtual void sampleMarginalLikelihood(std::size_t numberSamples, + TDouble10Vec1Vec& samples) const; //! Check if this is a non-informative prior. virtual bool isNonInformative() const; diff --git a/include/maths/CMultivariateOneOfNPriorFactory.h b/include/maths/CMultivariateOneOfNPriorFactory.h index ef0115aca7..5205f30947 100644 --- a/include/maths/CMultivariateOneOfNPriorFactory.h +++ b/include/maths/CMultivariateOneOfNPriorFactory.h @@ -31,11 +31,16 @@ class MATHS_EXPORT CMultivariateOneOfNPriorFactory { public: //! Create a new non-informative multivariate normal prior. - static TPriorPtr nonInformative(std::size_t dimension, maths_t::EDataType dataType, double decayRate, const TPriorPtrVec& models); + static TPriorPtr nonInformative(std::size_t dimension, + maths_t::EDataType dataType, + double decayRate, + const TPriorPtrVec& models); //! Create reading state from its state document representation. - static bool - restore(std::size_t dimension, const SDistributionRestoreParams& params, TPriorPtr& ptr, core::CStateRestoreTraverser& traverser); + static bool restore(std::size_t dimension, + const SDistributionRestoreParams& params, + TPriorPtr& ptr, + core::CStateRestoreTraverser& traverser); }; } } diff --git a/include/maths/CMultivariatePrior.h b/include/maths/CMultivariatePrior.h index 2137260869..fb57d2a201 100644 --- a/include/maths/CMultivariatePrior.h +++ b/include/maths/CMultivariatePrior.h @@ -134,8 +134,9 @@ class MATHS_EXPORT CMultivariatePrior { //! for more details. //! \param[in] samples The samples from which to determine the offset. //! \param[in] weights The weights of each sample in \p samples. - virtual void - adjustOffset(const TWeightStyleVec& weightStyles, const TDouble10Vec1Vec& samples, const TDouble10Vec4Vec1Vec& weights) = 0; + virtual void adjustOffset(const TWeightStyleVec& weightStyles, + const TDouble10Vec1Vec& samples, + const TDouble10Vec4Vec1Vec& weights) = 0; //! Update the prior with a collection of independent samples from the //! process. @@ -145,7 +146,9 @@ class MATHS_EXPORT CMultivariatePrior { //! for more details. //! \param[in] samples A collection of samples of the process. //! \param[in] weights The weights of each sample in \p samples. - virtual void addSamples(const TWeightStyleVec& weightStyles, const TDouble10Vec1Vec& samples, const TDouble10Vec4Vec1Vec& weights) = 0; + virtual void addSamples(const TWeightStyleVec& weightStyles, + const TDouble10Vec1Vec& samples, + const TDouble10Vec4Vec1Vec& weights) = 0; //! Update the prior for the specified elapsed time. virtual void propagateForwardsByTime(double time) = 0; @@ -162,7 +165,8 @@ class MATHS_EXPORT CMultivariatePrior { //! \note The caller must specify dimension - 1 variables between //! \p marginalize and \p condition so the resulting distribution //! is univariate. - virtual TUnivariatePriorPtrDoublePr univariate(const TSize10Vec& marginalize, const TSizeDoublePr10Vec& condition) const = 0; + virtual TUnivariatePriorPtrDoublePr + univariate(const TSize10Vec& marginalize, const TSizeDoublePr10Vec& condition) const = 0; //! Compute the bivariate prior marginalizing over the variables //! \p marginalize and conditioning on the variables \p condition. @@ -176,7 +180,8 @@ class MATHS_EXPORT CMultivariatePrior { //! \note The caller must specify dimension - 2 variables between //! \p marginalize and \p condition so the resulting distribution //! is univariate. - virtual TPriorPtrDoublePr bivariate(const TSize10Vec& marginalize, const TSizeDoublePr10Vec& condition) const = 0; + virtual TPriorPtrDoublePr bivariate(const TSize10Vec& marginalize, + const TSizeDoublePr10Vec& condition) const = 0; //! Get the support for the marginal likelihood function. virtual TDouble10VecDouble10VecPr marginalLikelihoodSupport() const = 0; @@ -189,10 +194,12 @@ class MATHS_EXPORT CMultivariatePrior { virtual TDouble10Vec nearestMarginalLikelihoodMean(const TDouble10Vec& value) const; //! Get the mode of the marginal likelihood function. - virtual TDouble10Vec marginalLikelihoodMode(const TWeightStyleVec& weightStyles, const TDouble10Vec4Vec& weights) const = 0; + virtual TDouble10Vec marginalLikelihoodMode(const TWeightStyleVec& weightStyles, + const TDouble10Vec4Vec& weights) const = 0; //! Get the local maxima of the marginal likelihood function. - virtual TDouble10Vec1Vec marginalLikelihoodModes(const TWeightStyleVec& weightStyles, const TDouble10Vec4Vec& weights) const; + virtual TDouble10Vec1Vec marginalLikelihoodModes(const TWeightStyleVec& weightStyles, + const TDouble10Vec4Vec& weights) const; //! Get the covariance matrix for the marginal likelihood. virtual TDouble10Vec10Vec marginalLikelihoodCovariance() const = 0; @@ -209,10 +216,11 @@ class MATHS_EXPORT CMultivariatePrior { //! \param[in] samples A collection of samples of the process. //! \param[in] weights The weights of each sample in \p samples. //! \param[out] result Filled in with the joint likelihood of \p samples. - virtual maths_t::EFloatingPointErrorStatus jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights, - double& result) const = 0; + virtual maths_t::EFloatingPointErrorStatus + jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble10Vec1Vec& samples, + const TDouble10Vec4Vec1Vec& weights, + double& result) const = 0; //! Sample the marginal likelihood function. //! @@ -234,7 +242,8 @@ class MATHS_EXPORT CMultivariatePrior { //! \param[in] numberSamples The number of samples required. //! \param[out] samples Filled in with samples from the prior. //! \note \p numberSamples is truncated to the number of samples received. - virtual void sampleMarginalLikelihood(std::size_t numberSamples, TDouble10Vec1Vec& samples) const = 0; + virtual void sampleMarginalLikelihood(std::size_t numberSamples, + TDouble10Vec1Vec& samples) const = 0; //! Calculate the joint probability of seeing a lower marginal likelihood //! collection of independent samples for each coordinate. @@ -390,7 +399,9 @@ class MATHS_EXPORT CMultivariatePrior { bool check(const TSize10Vec& marginalize, const TSizeDoublePr10Vec& condition) const; //! Get the remaining variables. - void remainingVariables(const TSize10Vec& marginalize, const TSizeDoublePr10Vec& condition, TSize10Vec& results) const; + void remainingVariables(const TSize10Vec& marginalize, + const TSizeDoublePr10Vec& condition, + TSize10Vec& results) const; //! Get the smallest component of \p x. double smallest(const TDouble10Vec& x) const; diff --git a/include/maths/CNaiveBayes.h b/include/maths/CNaiveBayes.h index b508f854c4..fbef5ccbbb 100644 --- a/include/maths/CNaiveBayes.h +++ b/include/maths/CNaiveBayes.h @@ -41,7 +41,8 @@ class MATHS_EXPORT CNaiveBayesFeatureDensity { virtual CNaiveBayesFeatureDensity* clone() const = 0; //! Initialize by reading state from \p traverser. - virtual bool acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) = 0; + virtual bool acceptRestoreTraverser(const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser) = 0; //! Persist state by passing information to \p inserter. virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const = 0; @@ -90,7 +91,8 @@ class MATHS_EXPORT CNaiveBayesFeatureDensityFromPrior final : public CNaiveBayes virtual CNaiveBayesFeatureDensityFromPrior* clone() const; //! Initialize by reading state from \p traverser. - virtual bool acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser); + virtual bool acceptRestoreTraverser(const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser); //! Persist state by passing information to \p inserter. virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const; @@ -146,7 +148,8 @@ class MATHS_EXPORT CNaiveBayes { explicit CNaiveBayes(const CNaiveBayesFeatureDensity& exemplar, double decayRate = 0.0, TOptionalDouble minMaxLogLikelihoodToUseFeature = TOptionalDouble()); - CNaiveBayes(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser); + CNaiveBayes(const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser); //! Persist state by passing information to \p inserter. void acceptPersistInserter(core::CStatePersistInserter& inserter) const; @@ -224,7 +227,8 @@ class MATHS_EXPORT CNaiveBayes { //! \brief The data associated with a class. struct SClass { //! Initialize by reading state from \p traverser. - bool acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser); + bool acceptRestoreTraverser(const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser); //! Persist state by passing information to \p inserter. void acceptPersistInserter(core::CStatePersistInserter& inserter) const; //! Debug the memory used by this object. @@ -244,7 +248,8 @@ class MATHS_EXPORT CNaiveBayes { private: //! Initialize by reading state from \p traverser. - bool acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser); + bool acceptRestoreTraverser(const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser); //! Validate \p x. bool validate(const TDouble1VecVec& x) const; diff --git a/include/maths/CNaturalBreaksClassifier.h b/include/maths/CNaturalBreaksClassifier.h index 4296afe3c3..45454edc2f 100644 --- a/include/maths/CNaturalBreaksClassifier.h +++ b/include/maths/CNaturalBreaksClassifier.h @@ -127,10 +127,13 @@ class MATHS_EXPORT CNaturalBreaksClassifier { //! \note This will store as much information about the points //! subject to this constraint so will generally hold approximately //! \p space tuples. - CNaturalBreaksClassifier(std::size_t space, double decayRate = 0.0, double minimumCategoryCount = MINIMUM_CATEGORY_COUNT); + CNaturalBreaksClassifier(std::size_t space, + double decayRate = 0.0, + double minimumCategoryCount = MINIMUM_CATEGORY_COUNT); //! Create from part of a state document. - bool acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser); + bool acceptRestoreTraverser(const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser); //! Persist state by passing information to the supplied inserter. void acceptPersistInserter(core::CStatePersistInserter& inserter) const; @@ -249,12 +252,20 @@ class MATHS_EXPORT CNaturalBreaksClassifier { //! programming approach in complexity \f$O(N^2n)\f$ where //! \f$N\f$ the number of tuples and \f$n\f$ is the desired //! size for the partition. - static bool naturalBreaks(const TTupleVec& categories, std::size_t n, std::size_t p, EObjective target, TSizeVec& result); + static bool naturalBreaks(const TTupleVec& categories, + std::size_t n, + std::size_t p, + EObjective target, + TSizeVec& result); //! Double tuple version. //! //! \see naturalBreaks for more details. - static bool naturalBreaks(const TDoubleTupleVec& categories, std::size_t n, std::size_t p, EObjective target, TSizeVec& result); + static bool naturalBreaks(const TDoubleTupleVec& categories, + std::size_t n, + std::size_t p, + EObjective target, + TSizeVec& result); private: using TSizeSizePr = std::pair; @@ -263,7 +274,11 @@ class MATHS_EXPORT CNaturalBreaksClassifier { //! Implementation called by naturalBreaks with explicit //! tuple types. template - static bool naturalBreaksImpl(const std::vector& categories, std::size_t n, std::size_t p, EObjective target, TSizeVec& result); + static bool naturalBreaksImpl(const std::vector& categories, + std::size_t n, + std::size_t p, + EObjective target, + TSizeVec& result); private: //! The minimum permitted size for the classifier. @@ -275,7 +290,10 @@ class MATHS_EXPORT CNaturalBreaksClassifier { private: //! Construct a new classifier with the specified space limit //! \p space and categories \p categories. - CNaturalBreaksClassifier(std::size_t space, double decayRate, double minimumCategoryCount, TTupleVec& categories); + CNaturalBreaksClassifier(std::size_t space, + double decayRate, + double minimumCategoryCount, + TTupleVec& categories); //! Reduce the number of tuples until we satisfy the space constraint. void reduce(); diff --git a/include/maths/CNormalMeanPrecConjugate.h b/include/maths/CNormalMeanPrecConjugate.h index 4091e83f27..9f8bea298f 100644 --- a/include/maths/CNormalMeanPrecConjugate.h +++ b/include/maths/CNormalMeanPrecConjugate.h @@ -78,10 +78,13 @@ class MATHS_EXPORT CNormalMeanPrecConjugate : public CPrior { double decayRate = 0.0); //! Construct from sample central moments. - CNormalMeanPrecConjugate(maths_t::EDataType dataType, const TMeanVarAccumulator& moments, double decayRate = 0.0); + CNormalMeanPrecConjugate(maths_t::EDataType dataType, + const TMeanVarAccumulator& moments, + double decayRate = 0.0); //! Construct from part of a state document. - CNormalMeanPrecConjugate(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser); + CNormalMeanPrecConjugate(const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser); // Default copy constructor and assignment operator work. @@ -91,11 +94,14 @@ class MATHS_EXPORT CNormalMeanPrecConjugate : public CPrior { //! for details). //! \param[in] decayRate The rate at which to revert to the non-informative prior. //! \return A non-informative prior. - static CNormalMeanPrecConjugate nonInformativePrior(maths_t::EDataType dataType, double decayRate = 0.0); + static CNormalMeanPrecConjugate nonInformativePrior(maths_t::EDataType dataType, + double decayRate = 0.0); //@} //! Reset the prior based on the sample central moments. - void reset(maths_t::EDataType dataType, const TMeanVarAccumulator& moments, double decayRate = 0.0); + void reset(maths_t::EDataType dataType, + const TMeanVarAccumulator& moments, + double decayRate = 0.0); //! \name Prior Contract //@{ @@ -115,7 +121,9 @@ class MATHS_EXPORT CNormalMeanPrecConjugate : public CPrior { virtual bool needsOffset() const; //! No-op. - virtual double adjustOffset(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights); + virtual double adjustOffset(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights); //! Returns zero. virtual double offset() const; @@ -128,7 +136,9 @@ class MATHS_EXPORT CNormalMeanPrecConjugate : public CPrior { //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. - virtual void addSamples(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights); + virtual void addSamples(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights); //! Propagate the prior density function forwards by \p time. //! @@ -147,12 +157,14 @@ class MATHS_EXPORT CNormalMeanPrecConjugate : public CPrior { virtual double marginalLikelihoodMean() const; //! Get the mode of the marginal likelihood function. - virtual double marginalLikelihoodMode(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual double + marginalLikelihoodMode(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; //! Get the variance of the marginal likelihood. - virtual double marginalLikelihoodVariance(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual double + marginalLikelihoodVariance(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; //! Get the \p percentage symmetric confidence interval for the marginal //! likelihood function, i.e. the values \f$a\f$ and \f$b\f$ such that: @@ -167,9 +179,10 @@ class MATHS_EXPORT CNormalMeanPrecConjugate : public CPrior { //! \param[in] weightStyles Optional variance scale weight styles. //! \param[in] weights Optional variance scale weights. //! \note \p percentage should be in the range [0.0, 100.0). - virtual TDoubleDoublePr marginalLikelihoodConfidenceInterval(double percentage, - const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual TDoubleDoublePr marginalLikelihoodConfidenceInterval( + double percentage, + const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; //! Compute the log marginal likelihood function at \p samples integrating //! over the prior density function for the normal mean and precision. @@ -182,10 +195,11 @@ class MATHS_EXPORT CNormalMeanPrecConjugate : public CPrior { //! \param[out] result Filled in with the joint likelihood of \p samples. //! \note The samples are assumed to be independent and identically //! distributed. - virtual maths_t::EFloatingPointErrorStatus jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, - double& result) const; + virtual maths_t::EFloatingPointErrorStatus + jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& result) const; //! Sample the marginal likelihood function. //! @@ -315,7 +329,8 @@ class MATHS_EXPORT CNormalMeanPrecConjugate : public CPrior { TDoubleDoublePr confidenceIntervalPrecision(double percentage) const; //! Check if two priors are equal to the specified tolerance. - bool equalTolerance(const CNormalMeanPrecConjugate& rhs, const TEqualWithTolerance& equal) const; + bool equalTolerance(const CNormalMeanPrecConjugate& rhs, + const TEqualWithTolerance& equal) const; //@} private: diff --git a/include/maths/COneOfNPrior.h b/include/maths/COneOfNPrior.h index f07773ea6c..9bf608e7fc 100644 --- a/include/maths/COneOfNPrior.h +++ b/include/maths/COneOfNPrior.h @@ -82,10 +82,13 @@ class MATHS_EXPORT COneOfNPrior : public CPrior { //! for details). //! \param[in] decayRate The rate at which we revert to the non-informative prior. //! \warning This class takes ownership of \p models. - COneOfNPrior(const TDoublePriorPtrPrVec& models, maths_t::EDataType dataType, double decayRate = 0.0); + COneOfNPrior(const TDoublePriorPtrPrVec& models, + maths_t::EDataType dataType, + double decayRate = 0.0); //! Construct from part of a state document. - COneOfNPrior(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser); + COneOfNPrior(const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser); //! Implements value semantics for copy construction. COneOfNPrior(const COneOfNPrior& other); @@ -131,7 +134,9 @@ class MATHS_EXPORT COneOfNPrior : public CPrior { //! Forward the offset to the model priors. //! //! \return The penalty to apply in model selection. - virtual double adjustOffset(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights); + virtual double adjustOffset(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights); //! Get the maximum model offset. virtual double offset() const; @@ -144,7 +149,9 @@ class MATHS_EXPORT COneOfNPrior : public CPrior { //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. - virtual void addSamples(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights); + virtual void addSamples(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights); //! Propagate the prior density function forwards by \p time. //! @@ -166,12 +173,14 @@ class MATHS_EXPORT COneOfNPrior : public CPrior { virtual double nearestMarginalLikelihoodMean(double value) const; //! Get the mode of the marginal likelihood function. - virtual double marginalLikelihoodMode(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual double + marginalLikelihoodMode(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; //! Get the variance of the marginal likelihood. - virtual double marginalLikelihoodVariance(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual double + marginalLikelihoodVariance(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; //! Get the \p percentage symmetric confidence interval for the marginal //! likelihood function, i.e. the values \f$a\f$ and \f$b\f$ such that: @@ -186,9 +195,10 @@ class MATHS_EXPORT COneOfNPrior : public CPrior { //! \param[in] weightStyles Optional variance scale weight styles. //! \param[in] weights Optional variance scale weights. //! \note \p percentage should be in the range (0.0, 100.0]. - virtual TDoubleDoublePr marginalLikelihoodConfidenceInterval(double percentage, - const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual TDoubleDoublePr marginalLikelihoodConfidenceInterval( + double percentage, + const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; //! Compute the log marginal likelihood function at \p samples integrating //! over the prior density function for the distribution parameters. @@ -201,10 +211,11 @@ class MATHS_EXPORT COneOfNPrior : public CPrior { //! \param[out] result Filled in with the joint likelihood of \p samples. //! \note The samples are assumed to be independent and identically //! distributed. - virtual maths_t::EFloatingPointErrorStatus jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, - double& result) const; + virtual maths_t::EFloatingPointErrorStatus + jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& result) const; //! Sample the marginal likelihood function. //! @@ -343,10 +354,12 @@ class MATHS_EXPORT COneOfNPrior : public CPrior { private: //! Read parameters from \p traverser. - bool acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser); + bool acceptRestoreTraverser(const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser); //! Add a model vector entry reading parameters from \p traverser. - bool modelAcceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser); + bool modelAcceptRestoreTraverser(const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser); //! Get the normalized model weights. TDoubleSizePr5Vec normalizedLogWeights() const; diff --git a/include/maths/COrderings.h b/include/maths/COrderings.h index 4d0f48293d..09449c62e4 100644 --- a/include/maths/COrderings.h +++ b/include/maths/COrderings.h @@ -54,10 +54,13 @@ class COrderings : private core::CNonInstantiatable { } template - static inline bool less(const boost::optional& lhs, const boost::optional& rhs) { + static inline bool + less(const boost::optional& lhs, const boost::optional& rhs) { bool lInitialized(lhs); bool rInitialized(rhs); - return lInitialized && rInitialized ? boost::unwrap_ref(*lhs) < boost::unwrap_ref(*rhs) : rInitialized < lInitialized; + return lInitialized && rInitialized + ? boost::unwrap_ref(*lhs) < boost::unwrap_ref(*rhs) + : rInitialized < lInitialized; } template static inline bool less(const T& lhs, const boost::optional& rhs) { @@ -83,10 +86,13 @@ class COrderings : private core::CNonInstantiatable { } template - static inline bool greater(const boost::optional& lhs, const boost::optional& rhs) { + static inline bool + greater(const boost::optional& lhs, const boost::optional& rhs) { bool lInitialized(lhs); bool rInitialized(rhs); - return lInitialized && rInitialized ? boost::unwrap_ref(*lhs) > boost::unwrap_ref(*rhs) : rInitialized > lInitialized; + return lInitialized && rInitialized + ? boost::unwrap_ref(*lhs) > boost::unwrap_ref(*rhs) + : rInitialized > lInitialized; } template static inline bool greater(const T& lhs, const boost::optional& rhs) { @@ -113,7 +119,9 @@ class COrderings : private core::CNonInstantiatable { static inline bool less(const T* lhs, const T* rhs) { bool lInitialized(lhs != nullptr); bool rInitialized(rhs != nullptr); - return lInitialized && rInitialized ? boost::unwrap_ref(*lhs) < boost::unwrap_ref(*rhs) : rInitialized < lInitialized; + return lInitialized && rInitialized + ? boost::unwrap_ref(*lhs) < boost::unwrap_ref(*rhs) + : rInitialized < lInitialized; } }; @@ -132,7 +140,9 @@ class COrderings : private core::CNonInstantiatable { static inline bool greater(const T* lhs, const T* rhs) { bool lInitialized(lhs != nullptr); bool rInitialized(rhs != nullptr); - return lInitialized && rInitialized ? boost::unwrap_ref(*lhs) > boost::unwrap_ref(*rhs) : rInitialized > lInitialized; + return lInitialized && rInitialized + ? boost::unwrap_ref(*lhs) > boost::unwrap_ref(*rhs) + : rInitialized > lInitialized; } }; @@ -183,31 +193,44 @@ class COrderings : private core::CNonInstantiatable { static bool lexicographical_compare(const T1& l1, const T1& r1) { return lexicographical_compare(l1, r1, SReferenceLess()); } -#define COMPARE(l, r) \ - if (comp(l, r)) { \ - return true; \ - } else if (comp(r, l)) { \ - return false; \ +#define COMPARE(l, r) \ + if (comp(l, r)) { \ + return true; \ + } else if (comp(r, l)) { \ + return false; \ } //! Lexicographical comparison of (\p l1, \p l2) and (\p r1, \p r2). template - static bool lexicographical_compare(const T1& l1, const T2& l2, const T1& r1, const T2& r2, COMP comp) { + static bool + lexicographical_compare(const T1& l1, const T2& l2, const T1& r1, const T2& r2, COMP comp) { COMPARE(l1, r1); return comp(l2, r2); } template - static bool lexicographical_compare(const T1& l1, const T2& l2, const T1& r1, const T2& r2) { + static bool + lexicographical_compare(const T1& l1, const T2& l2, const T1& r1, const T2& r2) { return lexicographical_compare(l1, l2, r1, r2, SReferenceLess()); } //! Lexicographical comparison of (\p l1, \p l2, \p l3) and (\p r1, \p r2, \p r3). template - static bool lexicographical_compare(const T1& l1, const T2& l2, const T3& l3, const T1& r1, const T2& r2, const T3& r3, COMP comp) { + static bool lexicographical_compare(const T1& l1, + const T2& l2, + const T3& l3, + const T1& r1, + const T2& r2, + const T3& r3, + COMP comp) { COMPARE(l1, r1); COMPARE(l2, r2); return comp(l3, r3); } template - static bool lexicographical_compare(const T1& l1, const T2& l2, const T3& l3, const T1& r1, const T2& r2, const T3& r3) { + static bool lexicographical_compare(const T1& l1, + const T2& l2, + const T3& l3, + const T1& r1, + const T2& r2, + const T3& r3) { return lexicographical_compare(l1, l2, l3, r1, r2, r3, SReferenceLess()); } //! Lexicographical comparison of (\p l1, \p l2, \p l3, \p l4) and @@ -269,7 +292,8 @@ class COrderings : private core::CNonInstantiatable { const T3& r3, const T4& r4, const T5& r5) { - return lexicographical_compare(l1, l2, l3, l4, l5, r1, r2, r3, r4, r5, SReferenceLess()); + return lexicographical_compare(l1, l2, l3, l4, l5, r1, r2, r3, r4, r5, + SReferenceLess()); } #undef COMPARE //@} @@ -309,7 +333,8 @@ class COrderings : private core::CNonInstantiatable { template bool operator()(const std::pair& lhs, const std::pair& rhs) const { - return lexicographical_compare(lhs.first, lhs.second, rhs.first, rhs.second, *this); + return lexicographical_compare(lhs.first, lhs.second, rhs.first, + rhs.second, *this); } SReferenceLess s_Less; }; @@ -349,7 +374,8 @@ class COrderings : private core::CNonInstantiatable { template bool operator()(const std::pair& lhs, const std::pair& rhs) const { - return lexicographical_compare(lhs.first, lhs.second, rhs.first, rhs.second, *this); + return lexicographical_compare(lhs.first, lhs.second, rhs.first, + rhs.second, *this); } SReferenceGreater s_Greater; @@ -362,47 +388,38 @@ class COrderings : private core::CNonInstantiatable { //! This also tuples of handles reference wrapped types. struct SLexicographicalCompare { template - inline bool operator()(const std::pair& lhs, const std::pair& rhs) const { - return lexicographical_compare(lhs.first, lhs.second, rhs.first, rhs.second, s_Less); + inline bool operator()(const std::pair& lhs, + const std::pair& rhs) const { + return lexicographical_compare(lhs.first, lhs.second, rhs.first, + rhs.second, s_Less); } template - inline bool operator()(const boost::tuple& lhs, const boost::tuple& rhs) const { - return lexicographical_compare(lhs.template get<0>(), - lhs.template get<1>(), - lhs.template get<2>(), - rhs.template get<0>(), - rhs.template get<1>(), - rhs.template get<2>(), - s_Less); + inline bool operator()(const boost::tuple& lhs, + const boost::tuple& rhs) const { + return lexicographical_compare( + lhs.template get<0>(), lhs.template get<1>(), + lhs.template get<2>(), rhs.template get<0>(), + rhs.template get<1>(), rhs.template get<2>(), s_Less); } template - inline bool operator()(const boost::tuple& lhs, const boost::tuple& rhs) const { - return lexicographical_compare(lhs.template get<0>(), - lhs.template get<1>(), - lhs.template get<2>(), - lhs.template get<3>(), - rhs.template get<0>(), - rhs.template get<1>(), - rhs.template get<2>(), - rhs.template get<3>(), - s_Less); + inline bool operator()(const boost::tuple& lhs, + const boost::tuple& rhs) const { + return lexicographical_compare( + lhs.template get<0>(), lhs.template get<1>(), lhs.template get<2>(), + lhs.template get<3>(), rhs.template get<0>(), rhs.template get<1>(), + rhs.template get<2>(), rhs.template get<3>(), s_Less); } template - inline bool operator()(const boost::tuple& lhs, const boost::tuple& rhs) const { - return lexicographical_compare(lhs.template get<0>(), - lhs.template get<1>(), - lhs.template get<2>(), - lhs.template get<3>(), - lhs.template get<4>(), - rhs.template get<0>(), - rhs.template get<1>(), - rhs.template get<2>(), - rhs.template get<3>(), - rhs.template get<4>(), - s_Less); + inline bool operator()(const boost::tuple& lhs, + const boost::tuple& rhs) const { + return lexicographical_compare( + lhs.template get<0>(), lhs.template get<1>(), + lhs.template get<2>(), lhs.template get<3>(), lhs.template get<4>(), + rhs.template get<0>(), rhs.template get<1>(), rhs.template get<2>(), + rhs.template get<3>(), rhs.template get<4>(), s_Less); } SLess s_Less; @@ -430,18 +447,19 @@ class COrderings : private core::CNonInstantiatable { return s_Less(lhs.first, rhs); } -#define TUPLE_FIRST_LESS \ - template \ - inline bool operator()(const boost::tuple& lhs, const boost::tuple& rhs) const { \ - return s_Less(lhs.template get<0>(), rhs.template get<0>()); \ - } \ - template \ - inline bool operator()(const T1& lhs, const boost::tuple& rhs) const { \ - return s_Less(lhs, rhs.template get<0>()); \ - } \ - template \ - inline bool operator()(const boost::tuple& lhs, const T1& rhs) const { \ - return s_Less(lhs.template get<0>(), rhs); \ +#define TUPLE_FIRST_LESS \ + template \ + inline bool operator()(const boost::tuple& lhs, \ + const boost::tuple& rhs) const { \ + return s_Less(lhs.template get<0>(), rhs.template get<0>()); \ + } \ + template \ + inline bool operator()(const T1& lhs, const boost::tuple& rhs) const { \ + return s_Less(lhs, rhs.template get<0>()); \ + } \ + template \ + inline bool operator()(const boost::tuple& lhs, const T1& rhs) const { \ + return s_Less(lhs.template get<0>(), rhs); \ } #define TEMPLATE_ARGS_DECL typename T1, typename T2, typename T3 @@ -454,7 +472,8 @@ class COrderings : private core::CNonInstantiatable { TUPLE_FIRST_LESS #undef TEMPLATE_ARGS #undef TEMPLATE_ARGS_DECL -#define TEMPLATE_ARGS_DECL typename T1, typename T2, typename T3, typename T4, typename T5 +#define TEMPLATE_ARGS_DECL \ + typename T1, typename T2, typename T3, typename T4, typename T5 #define TEMPLATE_ARGS T1, T2, T3, T4, T5 TUPLE_FIRST_LESS #undef TEMPLATE_ARGS @@ -582,9 +601,12 @@ class COrderings : private core::CNonInstantiatable { template> class CIndexLess { public: - CIndexLess(const KEY_VECTOR& keys, const COMP& comp = COMP()) : m_Keys(&keys), m_Comp(comp) {} + CIndexLess(const KEY_VECTOR& keys, const COMP& comp = COMP()) + : m_Keys(&keys), m_Comp(comp) {} - bool operator()(std::size_t lhs, std::size_t rhs) { return m_Comp((*m_Keys)[lhs], (*m_Keys)[rhs]); } + bool operator()(std::size_t lhs, std::size_t rhs) { + return m_Comp((*m_Keys)[lhs], (*m_Keys)[rhs]); + } private: const KEY_VECTOR* m_Keys; @@ -607,30 +629,31 @@ class COrderings : private core::CNonInstantiatable { // is in its correct place, and we update the ordering accordingly. // So the containers are sorted in at most O(N) additional steps to // the N * log(N) taken to sort the indices. -#define SIMULTANEOUS_SORT_IMPL \ - if (boost::algorithm::is_sorted(keys.begin(), keys.end(), comp)) { \ - return true; \ - } \ - using TSizeVec = std::vector; \ - TSizeVec ordering; \ - ordering.reserve(keys.size()); \ - for (std::size_t i = 0u; i < keys.size(); ++i) { \ - ordering.push_back(i); \ - } \ - std::stable_sort(ordering.begin(), ordering.end(), CIndexLess(keys, comp)); \ - for (std::size_t i = 0u; i < ordering.size(); ++i) { \ - std::size_t j_ = i; \ - std::size_t j = ordering[j_]; \ - while (i != j) { \ - using std::swap; \ - swap(keys[j_], keys[j]); \ - CUSTOM_SWAP_VALUES \ - ordering[j_] = j_; \ - j_ = j; \ - j = ordering[j_]; \ - } \ - ordering[j_] = j_; \ - } \ +#define SIMULTANEOUS_SORT_IMPL \ + if (boost::algorithm::is_sorted(keys.begin(), keys.end(), comp)) { \ + return true; \ + } \ + using TSizeVec = std::vector; \ + TSizeVec ordering; \ + ordering.reserve(keys.size()); \ + for (std::size_t i = 0u; i < keys.size(); ++i) { \ + ordering.push_back(i); \ + } \ + std::stable_sort(ordering.begin(), ordering.end(), \ + CIndexLess(keys, comp)); \ + for (std::size_t i = 0u; i < ordering.size(); ++i) { \ + std::size_t j_ = i; \ + std::size_t j = ordering[j_]; \ + while (i != j) { \ + using std::swap; \ + swap(keys[j_], keys[j]); \ + CUSTOM_SWAP_VALUES \ + ordering[j_] = j_; \ + j_ = j; \ + j = ordering[j_]; \ + } \ + ordering[j_] = j_; \ + } \ return true; #define CUSTOM_SWAP_VALUES swap(values[j_], values[j]); @@ -647,21 +670,27 @@ class COrderings : private core::CNonInstantiatable { //! Overload for default operator< comparison. template static bool simultaneousSort(KEY_VECTOR& keys, VALUE_VECTOR& values) { - return simultaneousSort(keys, values, std::less()); + return simultaneousSort(keys, values, + std::less()); } //! Overload for default operator< comparison. template - static bool simultaneousSort(core::CVectorRange& keys, core::CVectorRange& values) { - return simultaneousSort(keys, values, std::less()); + static bool simultaneousSort(core::CVectorRange& keys, + core::CVectorRange& values) { + return simultaneousSort(keys, values, + std::less()); } -#define CUSTOM_SWAP_VALUES \ - swap(values1[j_], values1[j]); \ +#define CUSTOM_SWAP_VALUES \ + swap(values1[j_], values1[j]); \ swap(values2[j_], values2[j]); //! Simultaneously sort \p keys, \p values1 and \p values2 //! using the \p comp order of \p keys. template - static bool simultaneousSort(KEY_VECTOR& keys, VALUE1_VECTOR& values1, VALUE2_VECTOR& values2, const COMP& comp) { + static bool simultaneousSort(KEY_VECTOR& keys, + VALUE1_VECTOR& values1, + VALUE2_VECTOR& values2, + const COMP& comp) { if (keys.size() != values1.size() || values1.size() != values2.size()) { return false; } @@ -670,27 +699,34 @@ class COrderings : private core::CNonInstantiatable { #undef CUSTOM_SWAP_VALUES //! Overload for default operator< comparison. template - static bool simultaneousSort(KEY_VECTOR& keys, VALUE1_VECTOR& values1, VALUE2_VECTOR& values2) { - return simultaneousSort(keys, values1, values2, std::less()); + static bool + simultaneousSort(KEY_VECTOR& keys, VALUE1_VECTOR& values1, VALUE2_VECTOR& values2) { + return simultaneousSort(keys, values1, values2, + std::less()); } //! Overload for default operator< comparison. template static bool simultaneousSort(core::CVectorRange keys, core::CVectorRange values1, core::CVectorRange values2) { - return simultaneousSort(keys, values1, values2, std::less()); + return simultaneousSort(keys, values1, values2, + std::less()); } -#define CUSTOM_SWAP_VALUES \ - swap(values1[j_], values1[j]); \ - swap(values2[j_], values2[j]); \ +#define CUSTOM_SWAP_VALUES \ + swap(values1[j_], values1[j]); \ + swap(values2[j_], values2[j]); \ swap(values3[j_], values3[j]); //! Simultaneously sort \p keys, \p values1, \p values2 //! and \p values3 using the \p comp order of \p keys. template - static bool - simultaneousSort(KEY_VECTOR& keys, VALUE1_VECTOR& values1, VALUE2_VECTOR& values2, VALUE3_VECTOR& values3, const COMP& comp) { - if (keys.size() != values1.size() || values1.size() != values2.size() || values2.size() != values3.size()) { + static bool simultaneousSort(KEY_VECTOR& keys, + VALUE1_VECTOR& values1, + VALUE2_VECTOR& values2, + VALUE3_VECTOR& values3, + const COMP& comp) { + if (keys.size() != values1.size() || values1.size() != values2.size() || + values2.size() != values3.size()) { return false; } SIMULTANEOUS_SORT_IMPL @@ -698,8 +734,12 @@ class COrderings : private core::CNonInstantiatable { #undef CUSTOM_SWAP_VALUES //! Overload for default operator< comparison. template - static bool simultaneousSort(KEY_VECTOR& keys, VALUE1_VECTOR& values1, VALUE2_VECTOR& values2, VALUE3_VECTOR& values3) { - return simultaneousSort(keys, values1, values2, values3, std::less()); + static bool simultaneousSort(KEY_VECTOR& keys, + VALUE1_VECTOR& values1, + VALUE2_VECTOR& values2, + VALUE3_VECTOR& values3) { + return simultaneousSort(keys, values1, values2, values3, + std::less()); } //! Overload for default operator< comparison. template @@ -707,31 +747,27 @@ class COrderings : private core::CNonInstantiatable { core::CVectorRange values1, core::CVectorRange values2, core::CVectorRange values3) { - return simultaneousSort(keys, values1, values2, values3, std::less()); + return simultaneousSort(keys, values1, values2, values3, + std::less()); } -#define CUSTOM_SWAP_VALUES \ - swap(values1[j_], values1[j]); \ - swap(values2[j_], values2[j]); \ - swap(values3[j_], values3[j]); \ +#define CUSTOM_SWAP_VALUES \ + swap(values1[j_], values1[j]); \ + swap(values2[j_], values2[j]); \ + swap(values3[j_], values3[j]); \ swap(values4[j_], values4[j]); //! Simultaneously sort \p keys, \p values1, \p values2, //! \p values3 and \p values4 using the \p comp order of //! \p keys. - template + template static bool simultaneousSort(KEY_VECTOR& keys, VALUE1_VECTOR& values1, VALUE2_VECTOR& values2, VALUE3_VECTOR& values3, VALUE4_VECTOR& values4, const COMP& comp) { - if (keys.size() != values1.size() || values1.size() != values2.size() || values2.size() != values3.size() || - values3.size() != values4.size()) { + if (keys.size() != values1.size() || values1.size() != values2.size() || + values2.size() != values3.size() || values3.size() != values4.size()) { return false; } SIMULTANEOUS_SORT_IMPL @@ -739,9 +775,13 @@ class COrderings : private core::CNonInstantiatable { #undef CUSTOM_SWAP_VALUES //! Overload for default operator< comparison. template - static bool - simultaneousSort(KEY_VECTOR& keys, VALUE1_VECTOR& values1, VALUE2_VECTOR& values2, VALUE3_VECTOR& values3, VALUE4_VECTOR& values4) { - return simultaneousSort(keys, values1, values2, values3, values4, std::less()); + static bool simultaneousSort(KEY_VECTOR& keys, + VALUE1_VECTOR& values1, + VALUE2_VECTOR& values2, + VALUE3_VECTOR& values3, + VALUE4_VECTOR& values4) { + return simultaneousSort(keys, values1, values2, values3, values4, + std::less()); } //! Overload for default operator< comparison. template @@ -750,7 +790,8 @@ class COrderings : private core::CNonInstantiatable { core::CVectorRange values2, core::CVectorRange values3, core::CVectorRange values4) { - return simultaneousSort(keys, values1, values2, values3, values4, std::less()); + return simultaneousSort(keys, values1, values2, values3, values4, + std::less()); } #undef SIMULTANEOUS_SORT_IMPL diff --git a/include/maths/COrdinal.h b/include/maths/COrdinal.h index 9fd9bfc770..aca629eaca 100644 --- a/include/maths/COrdinal.h +++ b/include/maths/COrdinal.h @@ -23,7 +23,8 @@ namespace maths { //! This deals with floating point and integer values and works //! around the loss of precision converting 64 bit integers to //! doubles. -class MATHS_EXPORT COrdinal : private boost::equality_comparable> { +class MATHS_EXPORT COrdinal + : private boost::equality_comparable> { public: //! Create an unset value. COrdinal(); diff --git a/include/maths/CPRNG.h b/include/maths/CPRNG.h index 9e69e97e39..e3b198b3cd 100644 --- a/include/maths/CPRNG.h +++ b/include/maths/CPRNG.h @@ -60,7 +60,9 @@ class MATHS_EXPORT CPRNG : private core::CNonInstantiatable { //! Compare for equality. bool operator==(CSplitMix64 other) const; //! Not equal. - bool operator!=(CSplitMix64 other) const { return !this->operator==(other); } + bool operator!=(CSplitMix64 other) const { + return !this->operator==(other); + } void seed(); void seed(uint64_t seed); @@ -121,7 +123,9 @@ class MATHS_EXPORT CPRNG : private core::CNonInstantiatable { //! Compare for equality. bool operator==(const CXorOShiro128Plus& other) const; //! Not equal. - bool operator!=(const CXorOShiro128Plus& other) const { return !this->operator==(other); } + bool operator!=(const CXorOShiro128Plus& other) const { + return !this->operator==(other); + } //! Set to the default seeded generator. //! @@ -208,7 +212,9 @@ class MATHS_EXPORT CPRNG : private core::CNonInstantiatable { //! Compare for equality. bool operator==(const CXorShift1024Mult& other) const; //! Not equal. - bool operator!=(const CXorShift1024Mult& other) const { return !this->operator==(other); } + bool operator!=(const CXorShift1024Mult& other) const { + return !this->operator==(other); + } //! Set to the default seeded generator. //! diff --git a/include/maths/CPackedBitVector.h b/include/maths/CPackedBitVector.h index 17b9765169..9be32e7480 100644 --- a/include/maths/CPackedBitVector.h +++ b/include/maths/CPackedBitVector.h @@ -43,7 +43,8 @@ namespace maths { //! the first bit in the vector and can deduce all other values by the //! number of runs in between. In practice we store one extra bit, the //! vector parity to allow us to extend the vector efficiently. -class MATHS_EXPORT CPackedBitVector : private boost::equality_comparable> { +class MATHS_EXPORT CPackedBitVector + : private boost::equality_comparable> { public: using TBoolVec = std::vector; diff --git a/include/maths/CPeriodicityHypothesisTests.h b/include/maths/CPeriodicityHypothesisTests.h index e5014fdbe5..dcc0ea5106 100644 --- a/include/maths/CPeriodicityHypothesisTests.h +++ b/include/maths/CPeriodicityHypothesisTests.h @@ -76,7 +76,8 @@ class MATHS_EXPORT CPeriodicityHypothesisTestsResult //! //! \warning This only makes sense if the this and the //! other result share the start of the partition time. - const CPeriodicityHypothesisTestsResult& operator+=(const CPeriodicityHypothesisTestsResult& other); + const CPeriodicityHypothesisTestsResult& + operator+=(const CPeriodicityHypothesisTestsResult& other); //! Add a component. void add(const std::string& description, @@ -293,62 +294,81 @@ class MATHS_EXPORT CPeriodicityHypothesisTests { TNestedHypothesesVec& hypotheses) const; //! Get the hypotheses to test for period components. - void - hypothesesForPeriod(const TTimeTimePr2Vec& windows, const TFloatMeanAccumulatorCRng& buckets, TNestedHypothesesVec& hypotheses) const; + void hypothesesForPeriod(const TTimeTimePr2Vec& windows, + const TFloatMeanAccumulatorCRng& buckets, + TNestedHypothesesVec& hypotheses) const; //! Extract the best hypothesis. CPeriodicityHypothesisTestsResult best(const TNestedHypothesesVec& hypotheses) const; //! The null hypothesis of the various tests. - CPeriodicityHypothesisTestsResult - testForNull(const TTimeTimePr2Vec& window, const TFloatMeanAccumulatorCRng& buckets, STestStats& stats) const; + CPeriodicityHypothesisTestsResult testForNull(const TTimeTimePr2Vec& window, + const TFloatMeanAccumulatorCRng& buckets, + STestStats& stats) const; //! Test for a daily periodic component. - CPeriodicityHypothesisTestsResult - testForDaily(const TTimeTimePr2Vec& window, const TFloatMeanAccumulatorCRng& buckets, STestStats& stats) const; + CPeriodicityHypothesisTestsResult testForDaily(const TTimeTimePr2Vec& window, + const TFloatMeanAccumulatorCRng& buckets, + STestStats& stats) const; //! Test for a weekly periodic component. - CPeriodicityHypothesisTestsResult - testForWeekly(const TTimeTimePr2Vec& window, const TFloatMeanAccumulatorCRng& buckets, STestStats& stats) const; + CPeriodicityHypothesisTestsResult testForWeekly(const TTimeTimePr2Vec& window, + const TFloatMeanAccumulatorCRng& buckets, + STestStats& stats) const; //! Test for a weekday/end partition. - CPeriodicityHypothesisTestsResult testForDailyWithWeekend(const TFloatMeanAccumulatorCRng& buckets, STestStats& stats) const; + CPeriodicityHypothesisTestsResult + testForDailyWithWeekend(const TFloatMeanAccumulatorCRng& buckets, STestStats& stats) const; //! Test for a weekly period given we think there is a //! weekday/end partition. CPeriodicityHypothesisTestsResult - testForWeeklyGivenDailyWithWeekend(const TTimeTimePr2Vec& window, const TFloatMeanAccumulatorCRng& buckets, STestStats& stats) const; + testForWeeklyGivenDailyWithWeekend(const TTimeTimePr2Vec& window, + const TFloatMeanAccumulatorCRng& buckets, + STestStats& stats) const; //! Test for the specified period given we think there is diurnal //! periodicity. - CPeriodicityHypothesisTestsResult - testForPeriod(const TTimeTimePr2Vec& window, const TFloatMeanAccumulatorCRng& buckets, STestStats& stats) const; + CPeriodicityHypothesisTestsResult testForPeriod(const TTimeTimePr2Vec& window, + const TFloatMeanAccumulatorCRng& buckets, + STestStats& stats) const; //! Check we've seen sufficient data to test accurately. - bool seenSufficientDataToTest(core_t::TTime period, const TFloatMeanAccumulatorCRng& buckets) const; + bool seenSufficientDataToTest(core_t::TTime period, + const TFloatMeanAccumulatorCRng& buckets) const; //! Check if there are enough non-empty buckets which are repeated //! at at least one \p period in \p buckets. - bool seenSufficientPeriodicallyPopulatedBucketsToTest(const TFloatMeanAccumulatorCRng& buckets, std::size_t period) const; + bool seenSufficientPeriodicallyPopulatedBucketsToTest(const TFloatMeanAccumulatorCRng& buckets, + std::size_t period) const; //! Compute various ancillary statistics for testing. bool testStatisticsFor(const TFloatMeanAccumulatorCRng& buckets, STestStats& stats) const; //! Get the variance and degrees freedom for the null hypothesis //! that there is no trend or repeating partition of any kind. - void nullHypothesis(const TTimeTimePr2Vec& window, const TFloatMeanAccumulatorCRng& buckets, STestStats& stats) const; + void nullHypothesis(const TTimeTimePr2Vec& window, + const TFloatMeanAccumulatorCRng& buckets, + STestStats& stats) const; //! Compute the variance and degrees freedom for the hypothesis. - void hypothesis(const TTime2Vec& periods, const TFloatMeanAccumulatorCRng& buckets, STestStats& stats) const; + void hypothesis(const TTime2Vec& periods, + const TFloatMeanAccumulatorCRng& buckets, + STestStats& stats) const; //! Condition \p buckets assuming the null hypothesis is true. //! //! This removes any trend associated with the null hypothesis. - void conditionOnHypothesis(const TTimeTimePr2Vec& windows, const STestStats& stats, TFloatMeanAccumulatorVec& buckets) const; + void conditionOnHypothesis(const TTimeTimePr2Vec& windows, + const STestStats& stats, + TFloatMeanAccumulatorVec& buckets) const; //! Test to see if there is significant evidence for a component //! with period \p period. - bool testPeriod(const TTimeTimePr2Vec& window, const TFloatMeanAccumulatorCRng& buckets, core_t::TTime period, STestStats& stats) const; + bool testPeriod(const TTimeTimePr2Vec& window, + const TFloatMeanAccumulatorCRng& buckets, + core_t::TTime period, + STestStats& stats) const; //! Test to see if there is significant evidence for a repeating //! partition of the data into windows defined by \p partition. @@ -391,10 +411,11 @@ using TFloatMeanAccumulatorVec = std::vector; //! Test for periodic components in \p values. MATHS_EXPORT -CPeriodicityHypothesisTestsResult testForPeriods(const CPeriodicityHypothesisTestsConfig& config, - core_t::TTime startTime, - core_t::TTime bucketLength, - const TFloatMeanAccumulatorVec& values); +CPeriodicityHypothesisTestsResult +testForPeriods(const CPeriodicityHypothesisTestsConfig& config, + core_t::TTime startTime, + core_t::TTime bucketLength, + const TFloatMeanAccumulatorVec& values); } } diff --git a/include/maths/CPoissonMeanConjugate.h b/include/maths/CPoissonMeanConjugate.h index cb772fd1dd..7c9154707a 100644 --- a/include/maths/CPoissonMeanConjugate.h +++ b/include/maths/CPoissonMeanConjugate.h @@ -64,7 +64,8 @@ class MATHS_EXPORT CPoissonMeanConjugate : public CPrior { CPoissonMeanConjugate(double offset, double shape, double rate, double decayRate = 0.0); //! Construct from part of a state document. - CPoissonMeanConjugate(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser); + CPoissonMeanConjugate(const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser); // Default copy constructor and assignment operator work. //@} @@ -75,7 +76,8 @@ class MATHS_EXPORT CPoissonMeanConjugate : public CPrior { //! \param[in] decayRate The rate at which to revert to the non-informative prior. //! \return A non-informative prior. //! \warning The caller owns the object returned. - static CPoissonMeanConjugate nonInformativePrior(double offset = 0.0, double decayRate = 0.0); + static CPoissonMeanConjugate nonInformativePrior(double offset = 0.0, + double decayRate = 0.0); //! \name Prior Contract //@{ @@ -108,7 +110,9 @@ class MATHS_EXPORT CPoissonMeanConjugate : public CPrior { //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. //! \return The penalty to apply in model selection. - virtual double adjustOffset(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights); + virtual double adjustOffset(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights); //! Get the current offset. virtual double offset() const; @@ -121,7 +125,9 @@ class MATHS_EXPORT CPoissonMeanConjugate : public CPrior { //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. - virtual void addSamples(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights); + virtual void addSamples(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights); //! Propagate the prior density function forwards by \p time. //! @@ -139,12 +145,14 @@ class MATHS_EXPORT CPoissonMeanConjugate : public CPrior { virtual double marginalLikelihoodMean() const; //! Get the mode of the marginal likelihood function. - virtual double marginalLikelihoodMode(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual double + marginalLikelihoodMode(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; //! Get the variance of the marginal likelihood. - virtual double marginalLikelihoodVariance(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual double + marginalLikelihoodVariance(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; //! Get the \p percentage symmetric confidence interval for the marginal //! likelihood function, i.e. the values \f$a\f$ and \f$b\f$ such that: @@ -159,9 +167,10 @@ class MATHS_EXPORT CPoissonMeanConjugate : public CPrior { //! \param[in] weightStyles Optional variance scale weight styles. //! \param[in] weights Optional variance scale weights. //! \note \p percentage should be in the range [0.0, 100.0). - virtual TDoubleDoublePr marginalLikelihoodConfidenceInterval(double percentage, - const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual TDoubleDoublePr marginalLikelihoodConfidenceInterval( + double percentage, + const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; //! Compute the log marginal likelihood function at \p samples integrating //! over the prior density function for the Poisson mean. @@ -174,10 +183,11 @@ class MATHS_EXPORT CPoissonMeanConjugate : public CPrior { //! \param[out] result Filled in with the joint likelihood of \p samples. //! \note The samples are assumed to be independent and identically //! distributed. - virtual maths_t::EFloatingPointErrorStatus jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, - double& result) const; + virtual maths_t::EFloatingPointErrorStatus + jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& result) const; //! Sample the marginal likelihood function. //! @@ -285,7 +295,8 @@ class MATHS_EXPORT CPoissonMeanConjugate : public CPrior { TDoubleDoublePr meanConfidenceInterval(double percentage) const; //! Check if two priors are equal to the specified tolerance. - bool equalTolerance(const CPoissonMeanConjugate& rhs, const TEqualWithTolerance& equal) const; + bool equalTolerance(const CPoissonMeanConjugate& rhs, + const TEqualWithTolerance& equal) const; //@} private: diff --git a/include/maths/CPrior.h b/include/maths/CPrior.h index 25b507ffbd..c8b2d6a00e 100644 --- a/include/maths/CPrior.h +++ b/include/maths/CPrior.h @@ -184,7 +184,9 @@ class MATHS_EXPORT CPrior { //! \param[in] samples The samples from which to determine the offset. //! \param[in] weights The weights of each sample in \p samples. //! \return The penalty to apply in model selection. - virtual double adjustOffset(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights) = 0; + virtual double adjustOffset(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights) = 0; //! Get the current sample offset. virtual double offset() const = 0; @@ -197,7 +199,9 @@ class MATHS_EXPORT CPrior { //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. - virtual void addSamples(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights) = 0; + virtual void addSamples(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights) = 0; //! Update the prior for the specified elapsed time. virtual void propagateForwardsByTime(double time) = 0; @@ -213,12 +217,14 @@ class MATHS_EXPORT CPrior { virtual double nearestMarginalLikelihoodMean(double value) const; //! Get the mode of the marginal likelihood function. - virtual double marginalLikelihoodMode(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const = 0; + virtual double + marginalLikelihoodMode(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const = 0; //! Get the local maxima of the marginal likelihood function. - virtual TDouble1Vec marginalLikelihoodModes(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual TDouble1Vec + marginalLikelihoodModes(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const; //! Get the \p percentage symmetric confidence interval for the marginal //! likelihood function, i.e. the values \f$a\f$ and \f$b\f$ such that: @@ -233,13 +239,15 @@ class MATHS_EXPORT CPrior { //! \param[in] weightStyles Optional variance scale weight styles. //! \param[in] weights Optional variance scale weights. //! \note \p percentage should be in the range [0.0, 100.0). - virtual TDoubleDoublePr marginalLikelihoodConfidenceInterval(double percentage, - const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const = 0; + virtual TDoubleDoublePr marginalLikelihoodConfidenceInterval( + double percentage, + const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const = 0; //! Get the variance of the marginal likelihood. - virtual double marginalLikelihoodVariance(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const = 0; + virtual double + marginalLikelihoodVariance(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, + const TDouble4Vec& weights = TWeights::UNIT) const = 0; //! Calculate the log marginal likelihood function integrating over the //! prior density function. @@ -250,10 +258,11 @@ class MATHS_EXPORT CPrior { //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. //! \param[out] result Filled in with the joint likelihood of \p samples. - virtual maths_t::EFloatingPointErrorStatus jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, - double& result) const = 0; + virtual maths_t::EFloatingPointErrorStatus + jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& result) const = 0; //! Sample the marginal likelihood function. //! @@ -272,7 +281,8 @@ class MATHS_EXPORT CPrior { //! \param[in] numberSamples The number of samples required. //! \param[out] samples Filled in with samples from the prior. //! \note \p numberSamples is truncated to the number of samples received. - virtual void sampleMarginalLikelihood(std::size_t numberSamples, TDouble1Vec& samples) const = 0; + virtual void sampleMarginalLikelihood(std::size_t numberSamples, + TDouble1Vec& samples) const = 0; //! Calculate minus the log of the joint c.d.f. of the marginal likelihood //! for a collection of independent samples from the variable. @@ -453,7 +463,9 @@ class MATHS_EXPORT CPrior { virtual double unmarginalizedParameters() const; //! Get a set of sample for the prior to use in adjust offset. - void adjustOffsetResamples(double minimumSample, TDouble1Vec& resamples, TDouble4Vec1Vec& resamplesWeights) const; + void adjustOffsetResamples(double minimumSample, + TDouble1Vec& resamples, + TDouble4Vec1Vec& resamplesWeights) const; protected: //! \brief Defines a set of operations to adjust the offset parameter @@ -464,7 +476,9 @@ class MATHS_EXPORT CPrior { virtual ~COffsetParameters() = default; //! Add a collection of samples. - void samples(const maths_t::TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights); + void samples(const maths_t::TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights); //! Capture a collection of re-samples from the prior. virtual void resample(double minimumSample); diff --git a/include/maths/CPriorDetail.h b/include/maths/CPriorDetail.h index 4ef556c9e7..3d066f947c 100644 --- a/include/maths/CPriorDetail.h +++ b/include/maths/CPriorDetail.h @@ -12,8 +12,11 @@ namespace ml { namespace maths { template -bool CPrior::expectation(const F& f, std::size_t numberIntervals, T& result, const TWeightStyleVec& weightStyles, const TDouble4Vec& weight) - const { +bool CPrior::expectation(const F& f, + std::size_t numberIntervals, + T& result, + const TWeightStyleVec& weightStyles, + const TDouble4Vec& weight) const { if (numberIntervals == 0) { LOG_ERROR(<< "Must specify non-zero number of intervals"); return false; @@ -22,7 +25,8 @@ bool CPrior::expectation(const F& f, std::size_t numberIntervals, T& result, con result = T(); double n{static_cast(numberIntervals)}; - TDoubleDoublePr interval{this->marginalLikelihoodConfidenceInterval(100.0 - 1.0 / (100.0 * n), weightStyles, weight)}; + TDoubleDoublePr interval{this->marginalLikelihoodConfidenceInterval( + 100.0 - 1.0 / (100.0 * n), weightStyles, weight)}; double x{interval.first}; double dx{(interval.second - interval.first) / n}; diff --git a/include/maths/CPriorStateSerialiser.h b/include/maths/CPriorStateSerialiser.h index 961946141a..d845ef3284 100644 --- a/include/maths/CPriorStateSerialiser.h +++ b/include/maths/CPriorStateSerialiser.h @@ -44,7 +44,9 @@ class MATHS_EXPORT CPriorStateSerialiser { public: //! Construct the appropriate CPrior sub-class from its state //! document representation. Sets \p ptr to NULL on failure. - bool operator()(const SDistributionRestoreParams& params, TPriorPtr& ptr, core::CStateRestoreTraverser& traverser) const; + bool operator()(const SDistributionRestoreParams& params, + TPriorPtr& ptr, + core::CStateRestoreTraverser& traverser) const; //! Persist state by passing information to the supplied inserter void operator()(const CPrior& prior, core::CStatePersistInserter& inserter) const; @@ -52,10 +54,13 @@ class MATHS_EXPORT CPriorStateSerialiser { //! Construct the appropriate CMultivariatePrior sub-class from //! its state document representation. Sets \p ptr to NULL on //! failure. - bool operator()(const SDistributionRestoreParams& params, TMultivariatePriorPtr& ptr, core::CStateRestoreTraverser& traverser) const; + bool operator()(const SDistributionRestoreParams& params, + TMultivariatePriorPtr& ptr, + core::CStateRestoreTraverser& traverser) const; //! Persist state by passing information to the supplied inserter - void operator()(const CMultivariatePrior& prior, core::CStatePersistInserter& inserter) const; + void operator()(const CMultivariatePrior& prior, + core::CStatePersistInserter& inserter) const; }; } } diff --git a/include/maths/CQDigest.h b/include/maths/CQDigest.h index bbebb6a019..07ce21f4e4 100644 --- a/include/maths/CQDigest.h +++ b/include/maths/CQDigest.h @@ -294,7 +294,8 @@ class MATHS_EXPORT CQDigest : private core::CNonCopyable { const uint64_t& subtreeCount() const; //! Persist this node and descendents - void persistRecursive(const std::string& nodeTag, core::CStatePersistInserter& inserter) const; + void persistRecursive(const std::string& nodeTag, + core::CStatePersistInserter& inserter) const; //! Create from an XML node tree. bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); diff --git a/include/maths/CQuantileSketch.h b/include/maths/CQuantileSketch.h index aa7f0ca6ac..45f6a4cbb6 100644 --- a/include/maths/CQuantileSketch.h +++ b/include/maths/CQuantileSketch.h @@ -142,19 +142,25 @@ class CFixedQuantileSketch : public CQuantileSketch { //! NB1: Needs to be redeclared to work with CChecksum. //! NB2: This method is not currently virtual - needs changing if any of the //! methods of this class ever do anything other than forward to the base class - uint64_t checksum(uint64_t seed = 0) const { return this->CQuantileSketch::checksum(seed); } + uint64_t checksum(uint64_t seed = 0) const { + return this->CQuantileSketch::checksum(seed); + } //! Debug the memory used by this object. //! NB1: Needs to be redeclared to work with CMemoryDebug. //! NB2: This method is not currently virtual - needs changing if any of the //! methods of this class ever do anything other than forward to the base class - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { this->CQuantileSketch::debugMemoryUsage(mem); } + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { + this->CQuantileSketch::debugMemoryUsage(mem); + } //! Get the memory used by this object. //! NB1: Needs to be redeclared to work with CMemory. //! NB2: This method is not currently virtual - needs changing if any of the //! methods of this class ever do anything other than forward to the base class - std::size_t memoryUsage() const { return this->CQuantileSketch::memoryUsage(); } + std::size_t memoryUsage() const { + return this->CQuantileSketch::memoryUsage(); + } }; //! Write to stream using print member. diff --git a/include/maths/CRadialBasisFunction.h b/include/maths/CRadialBasisFunction.h index 27a796c324..0d6303973a 100644 --- a/include/maths/CRadialBasisFunction.h +++ b/include/maths/CRadialBasisFunction.h @@ -68,14 +68,20 @@ class MATHS_EXPORT CRadialBasisFunction { //! //! //! \note \p b should be greater than or equal to \p a. - virtual double meanSquareDerivative(double a, double b, double centre, double scale = 1.0) const = 0; + virtual double + meanSquareDerivative(double a, double b, double centre, double scale = 1.0) const = 0; //! \brief Get the integral of the product of two basis functions //! on the interval \f$[a,b]\f$, i.e. //!
     //!   \f$\displaystyle \frac{1}{b - a} \int_a^b{\phi_{\epsilon}(\left \|u - c_1 \right \|)\phi_{\epsilon}(\left \|u - c_2 \right \|)}du\f$
     //! 
- virtual double product(double a, double b, double centre1, double centre2, double scale1 = 1.0, double scale2 = 1.0) const = 0; + virtual double product(double a, + double b, + double centre1, + double centre2, + double scale1 = 1.0, + double scale2 = 1.0) const = 0; }; //! \brief The Gaussian radial basis function. @@ -115,11 +121,17 @@ class MATHS_EXPORT CGaussianBasisFunction : public CRadialBasisFunction { //! \brief Get the mean square derivative of the basis function //! on the interval [\p a, \p b], i.e. the result of: - virtual double meanSquareDerivative(double a, double b, double centre, double scale = 1.0) const; + virtual double + meanSquareDerivative(double a, double b, double centre, double scale = 1.0) const; //! \brief Get the integral of the product of two basis functions //! on the interval [\p a, \p b]. - virtual double product(double a, double b, double centre1, double centre2, double scale1 = 1.0, double scale2 = 1.0) const; + virtual double product(double a, + double b, + double centre1, + double centre2, + double scale1 = 1.0, + double scale2 = 1.0) const; }; //! \brief The inverse quadratic radial basis function. @@ -159,11 +171,17 @@ class MATHS_EXPORT CInverseQuadraticBasisFunction : public CRadialBasisFunction //! \brief Get the mean square derivative of the basis function //! on the interval [\p a, \p b], i.e. the result of: - virtual double meanSquareDerivative(double a, double b, double centre, double scale = 1.0) const; + virtual double + meanSquareDerivative(double a, double b, double centre, double scale = 1.0) const; //! \brief Get the integral of the product of two basis functions //! on the interval [\p a, \p b]. - virtual double product(double a, double b, double centre1, double centre2, double scale1 = 1.0, double scale2 = 1.0) const; + virtual double product(double a, + double b, + double centre1, + double centre2, + double scale1 = 1.0, + double scale2 = 1.0) const; }; } } diff --git a/include/maths/CRandomProjectionClusterer.h b/include/maths/CRandomProjectionClusterer.h index ccf6fcb1bf..8e7f291c0b 100644 --- a/include/maths/CRandomProjectionClusterer.h +++ b/include/maths/CRandomProjectionClusterer.h @@ -114,7 +114,8 @@ class CRandomProjectionClusterer { for (std::size_t i = 0u; i < b; ++i) { TVectorArray& projection = m_Projections[i]; for (std::size_t j = 0u; j < N; ++j) { - projection[j].assign(&components[(i * N + j) * m_Dimension], &components[(i * N + j + 1) * m_Dimension]); + projection[j].assign(&components[(i * N + j) * m_Dimension], + &components[(i * N + j + 1) * m_Dimension]); } if (!CGramSchmidt::basis(projection)) { @@ -151,7 +152,8 @@ class CRandomProjectionClusterer { CSampling::normalSample(m_Rng, 0.0, 1.0, b * N * d, components); for (std::size_t i = 0u; i < b; ++i) { for (std::size_t j = 0u; j < N; ++j) { - extension[j].assign(&components[(i * N + j) * d], &components[(i * N + j + 1) * d]); + extension[j].assign(&components[(i * N + j) * d], + &components[(i * N + j + 1) * d]); } if (!CGramSchmidt::basis(extension)) { @@ -215,7 +217,8 @@ class CRandomProjectionClustererBatch : public CRandomProjectionClusterer { using TMeanAccumulatorVecVec = std::vector; public: - CRandomProjectionClustererBatch(double compression) : m_Compression(compression) {} + CRandomProjectionClustererBatch(double compression) + : m_Compression(compression) {} virtual ~CRandomProjectionClustererBatch() = default; @@ -316,9 +319,14 @@ class CRandomProjectionClustererBatch : public CRandomProjectionClusterer { //! \param[out] I Filled in with the indices of distinct sampled //! points. template - void clusterProjections(CLUSTERER clusterer, TDoubleVecVec& W, TVectorNx1VecVec& M, TSvdNxNVecVec& C, TSizeUSet& I) const { + void clusterProjections(CLUSTERER clusterer, + TDoubleVecVec& W, + TVectorNx1VecVec& M, + TSvdNxNVecVec& C, + TSizeUSet& I) const { using TVectorNx1CRef = boost::reference_wrapper; - using TVectorNx1CRefSizeUMap = boost::unordered_map; + using TVectorNx1CRefSizeUMap = + boost::unordered_map; using TClusterVec = typename CLUSTERER::TClusterVec; using TSampleCovariancesNxN = CBasicStatistics::SSampleCovariances; @@ -363,14 +371,16 @@ class CRandomProjectionClustererBatch : public CRandomProjectionClusterer { // Compute the number of points to sample from this cluster. std::size_t nij = points.size(); double wij = static_cast(nij) / static_cast(n); - std::size_t nsij = static_cast(std::max(m_Compression * wij * ni, 1.0)); + std::size_t nsij = + static_cast(std::max(m_Compression * wij * ni, 1.0)); LOG_TRACE(<< "wij = " << wij << ", nsij = " << nsij); // Compute the cluster sample mean and covariance matrix. TSampleCovariancesNxN covariances; covariances.add(points); TVectorNx1 mij = CBasicStatistics::mean(covariances); - TSvdNxN Cij(toDenseMatrix(CBasicStatistics::covariances(covariances)), Eigen::ComputeFullU | Eigen::ComputeFullV); + TSvdNxN Cij(toDenseMatrix(CBasicStatistics::covariances(covariances)), + Eigen::ComputeFullU | Eigen::ComputeFullV); // Compute the probability that a sample from the cluster // is a given point in the cluster. @@ -401,7 +411,8 @@ class CRandomProjectionClustererBatch : public CRandomProjectionClusterer { LOG_TRACE(<< "pij = " << core::CContainerPrinter::print(pij)); // Sample the cluster. - CSampling::categoricalSampleWithoutReplacement(this->rng(), pij, nsij, sij); + CSampling::categoricalSampleWithoutReplacement(this->rng(), + pij, nsij, sij); LOG_TRACE(<< "sij = " << core::CContainerPrinter::print(sij)); // Save the relevant data for the i'th clustering. @@ -475,8 +486,11 @@ class CRandomProjectionClustererBatch : public CRandomProjectionClusterer { //! i.e. the indices of the closest points. //! \param[out] S Filled in with the mean similarities between //! neighbourhoods over the different clusterings. - void - similarities(const TDoubleVecVec& W, const TVectorNx1VecVec& M, const TSvdNxNVecVec& C, const TSizeVecVec& H, TDoubleVecVec& S) const { + void similarities(const TDoubleVecVec& W, + const TVectorNx1VecVec& M, + const TSvdNxNVecVec& C, + const TSizeVecVec& H, + TDoubleVecVec& S) const { std::size_t b = m_ProjectedData.size(); std::size_t h = H.size(); @@ -526,7 +540,8 @@ class CRandomProjectionClustererBatch : public CRandomProjectionClusterer { for (std::size_t j = 0u; j < h; ++j) { S_[j].resize(j + 1); for (std::size_t k = 0u; k <= j; ++k) { - S_[j][k].add(-std::log(std::max(Pi[j].inner(Pi[k]), boost::numeric::bounds::smallest()))); + S_[j][k].add(-std::log(std::max( + Pi[j].inner(Pi[k]), boost::numeric::bounds::smallest()))); } } } @@ -566,13 +581,14 @@ class CRandomProjectionClustererBatch : public CRandomProjectionClusterer { LOG_TRACE(<< "heights = " << core::CContainerPrinter::print(heights)); TSizeVec splits; - if (CNaturalBreaksClassifier::naturalBreaks(heights, - 2, // Number splits - 0, // Minimum cluster size - CNaturalBreaksClassifier::E_TargetDeviation, - splits)) { + if (CNaturalBreaksClassifier::naturalBreaks( + heights, + 2, // Number splits + 0, // Minimum cluster size + CNaturalBreaksClassifier::E_TargetDeviation, splits)) { double height = CBasicStatistics::mean(heights[splits[0] - 1]); - LOG_TRACE(<< "split = " << core::CContainerPrinter::print(splits) << ", height = " << height); + LOG_TRACE(<< "split = " << core::CContainerPrinter::print(splits) + << ", height = " << height); const TNode& root = tree.back(); root.clusteringAt(height, result); for (std::size_t i = 0u; i < result.size(); ++i) { @@ -594,7 +610,8 @@ class CRandomProjectionClustererBatch : public CRandomProjectionClusterer { //! Get the log determinant of the rank full portion of \p m. double logDeterminant(const TSvdNxN& svd) const { double result = 0.0; - for (std::size_t i = 0u, rank = static_cast(svd.rank()); i < rank; ++i) { + for (std::size_t i = 0u, rank = static_cast(svd.rank()); + i < rank; ++i) { result += std::log(svd.singularValues()[i]); } return result; @@ -638,7 +655,10 @@ class CRandomProjectionClustererFacade, COST>> { void setPoints(TVectorNx1Vec& points) { m_Xmeans.setPoints(points); } //! Cluster the points. - void run() { m_Xmeans.run(m_ImproveParamsKmeansIterations, m_ImproveStructureClusterSeeds, m_ImproveStructureKmeansIterations); } + void run() { + m_Xmeans.run(m_ImproveParamsKmeansIterations, m_ImproveStructureClusterSeeds, + m_ImproveStructureKmeansIterations); + } //! Get the clusters (should only be called after run). const TClusterVec& clusters() const { return m_Xmeans.clusters(); } @@ -665,7 +685,8 @@ forRandomProjectionClusterer(const CXMeans, COST>& xmeans, std::size_t improveStructureClusterSeeds, std::size_t improveStructureKmeansIterations) { return CRandomProjectionClustererFacade, COST>>( - xmeans, improveParamsKmeansIterations, improveStructureClusterSeeds, improveStructureKmeansIterations); + xmeans, improveParamsKmeansIterations, improveStructureClusterSeeds, + improveStructureKmeansIterations); } //! \brief Adapts k-means for use by the random projection clusterer. @@ -715,8 +736,11 @@ class CRandomProjectionClustererFacade>> { //! Makes a k-means adapter for random projection clustering. template CRandomProjectionClustererFacade>> -forRandomProjectionClusterer(const CKMeansFast>& kmeans, std::size_t k, std::size_t maxIterations) { - return CRandomProjectionClustererFacade>>(kmeans, k, maxIterations); +forRandomProjectionClusterer(const CKMeansFast>& kmeans, + std::size_t k, + std::size_t maxIterations) { + return CRandomProjectionClustererFacade>>( + kmeans, k, maxIterations); } } } diff --git a/include/maths/CRegression.h b/include/maths/CRegression.h index cda79f8ff2..ae16838aa1 100644 --- a/include/maths/CRegression.h +++ b/include/maths/CRegression.h @@ -102,7 +102,8 @@ class MATHS_EXPORT CRegression { using TArray = boost::array; using TVector = CVectorNx1; using TMatrix = CSymmetricMatrixNxN; - using TVectorMeanAccumulator = typename CBasicStatistics::SSampleMean::TAccumulator; + using TVectorMeanAccumulator = + typename CBasicStatistics::SSampleMean::TAccumulator; public: static const std::string STATISTIC_TAG; @@ -110,7 +111,8 @@ class MATHS_EXPORT CRegression { public: CLeastSquaresOnline() : m_S() {} template - CLeastSquaresOnline(const CLeastSquaresOnline& other) : m_S(other.statistic()) {} + CLeastSquaresOnline(const CLeastSquaresOnline& other) + : m_S(other.statistic()) {} //! Restore by traversing a state document. bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); @@ -240,7 +242,8 @@ class MATHS_EXPORT CRegression { } //! Get the predicted value at \p x. - double predict(double x, double maxCondition = regression_detail::CMaxCondition::VALUE) const; + double predict(double x, + double maxCondition = regression_detail::CMaxCondition::VALUE) const; //! Get the regression parameters. //! @@ -250,12 +253,14 @@ class MATHS_EXPORT CRegression { //! the Gramian this will consider solving. If the condition //! is worse than this it'll fit a lower order polynomial. //! \param[out] result Filled in with the regression parameters. - bool parameters(TArray& result, double maxCondition = regression_detail::CMaxCondition::VALUE) const; + bool parameters(TArray& result, + double maxCondition = regression_detail::CMaxCondition::VALUE) const; //! Get the predicted value of the regression parameters at \p x. //! //! \note Returns array of zeros if getting the parameters fails. - TArray parameters(double x, double maxCondition = regression_detail::CMaxCondition::VALUE) const { + TArray parameters(double x, + double maxCondition = regression_detail::CMaxCondition::VALUE) const { TArray result; TArray params; if (this->parameters(params, maxCondition)) { @@ -263,7 +268,8 @@ class MATHS_EXPORT CRegression { for (std::ptrdiff_t i = n - 1; i >= 0; --i) { result[i] = params[i]; for (std::ptrdiff_t j = i + 1; j < n; ++j) { - params[j] *= static_cast(i + 1) / static_cast(j - i) * x; + params[j] *= static_cast(i + 1) / + static_cast(j - i) * x; result[i] += params[j]; } } @@ -290,7 +296,9 @@ class MATHS_EXPORT CRegression { //! the Gramian this will consider solving. If the condition //! is worse than this it'll fit a lower order polynomial. //! \param[out] result Filled in with the covariance matrix. - bool covariances(double variance, TMatrix& result, double maxCondition = regression_detail::CMaxCondition::VALUE) const; + bool covariances(double variance, + TMatrix& result, + double maxCondition = regression_detail::CMaxCondition::VALUE) const; //! Get the safe prediction horizon based on the spread //! of the abscissa added to the model so far. @@ -311,7 +319,8 @@ class MATHS_EXPORT CRegression { if (meanRevert) { TVector& s = CBasicStatistics::moment<0>(m_S); for (std::size_t i = 1u; i < N; ++i) { - s(i + 2 * N - 1) = factor * s(i + 2 * N - 1) + (1.0 - factor) * s(i) * s(2 * N - 1); + s(i + 2 * N - 1) = factor * s(i + 2 * N - 1) + + (1.0 - factor) * s(i) * s(2 * N - 1); } } m_S.age(factor); @@ -343,8 +352,10 @@ class MATHS_EXPORT CRegression { for (std::size_t i = 0u; i < N; ++i) { for (std::size_t j = 0u; j <= i; ++j) { - result += CCategoricalTools::binomialCoefficient(i + 1, j + 1) * params[i] / static_cast(i + 1) * - std::pow(a, static_cast(i - j)) * std::pow(interval, static_cast(j + 1)); + result += CCategoricalTools::binomialCoefficient(i + 1, j + 1) * + params[i] / static_cast(i + 1) * + std::pow(a, static_cast(i - j)) * + std::pow(interval, static_cast(j + 1)); } } @@ -459,7 +470,8 @@ class MATHS_EXPORT CRegression { dT(i) = dTi; } - CSymmetricMatrixNxN covariance = CBasicStatistics::covariances(m_UnitTimeCovariances); + CSymmetricMatrixNxN covariance = + CBasicStatistics::covariances(m_UnitTimeCovariances); return dT.inner(covariance * dT); } @@ -490,7 +502,8 @@ double CRegression::CLeastSquaresOnline::predict(double x, double maxCondi template const std::string CRegression::CLeastSquaresOnline::STATISTIC_TAG("a"); template -const std::string CRegression::CLeastSquaresOnlineParameterProcess::UNIT_TIME_COVARIANCES_TAG("a"); +const std::string + CRegression::CLeastSquaresOnlineParameterProcess::UNIT_TIME_COVARIANCES_TAG("a"); } } diff --git a/include/maths/CRegressionDetail.h b/include/maths/CRegressionDetail.h index 327579b2a2..817cad0f0c 100644 --- a/include/maths/CRegressionDetail.h +++ b/include/maths/CRegressionDetail.h @@ -112,7 +112,9 @@ bool CRegression::CLeastSquaresOnline::parameters(TArray& result, double m } template -bool CRegression::CLeastSquaresOnline::covariances(double variance, TMatrix& result, double maxCondition) const { +bool CRegression::CLeastSquaresOnline::covariances(double variance, + TMatrix& result, + double maxCondition) const { result = TMatrix(0.0); // Search for the covariance matrix of a non-singular subproblem. @@ -149,7 +151,8 @@ std::string CRegression::CLeastSquaresOnline::print() const { if (this->parameters(params)) { std::string result; for (std::size_t i = params.size() - 1; i > 0; --i) { - result += core::CStringUtils::typeToStringPretty(params[i]) + " x^" + core::CStringUtils::typeToStringPretty(i) + " + "; + result += core::CStringUtils::typeToStringPretty(params[i]) + + " x^" + core::CStringUtils::typeToStringPretty(i) + " + "; } result += core::CStringUtils::typeToStringPretty(params[0]); return result; @@ -159,7 +162,11 @@ std::string CRegression::CLeastSquaresOnline::print() const { template template -bool CRegression::CLeastSquaresOnline::parameters(std::size_t n, MATRIX& x, VECTOR& y, double maxCondition, TArray& result) const { +bool CRegression::CLeastSquaresOnline::parameters(std::size_t n, + MATRIX& x, + VECTOR& y, + double maxCondition, + TArray& result) const { if (n == 1) { result[0] = CBasicStatistics::mean(m_S)(2 * N - 1); return true; @@ -173,7 +180,8 @@ bool CRegression::CLeastSquaresOnline::parameters(std::size_t n, MATRIX& x LOG_TRACE(<< "x =\n" << x); LOG_TRACE(<< "y =\n" << y); - Eigen::JacobiSVD x_(x.template selfadjointView(), Eigen::ComputeFullU | Eigen::ComputeFullV); + Eigen::JacobiSVD x_(x.template selfadjointView(), + Eigen::ComputeFullU | Eigen::ComputeFullV); if (x_.singularValues()(0) > maxCondition * x_.singularValues()(n - 1)) { LOG_TRACE(<< "singular values = " << x_.singularValues()); return false; @@ -191,15 +199,19 @@ bool CRegression::CLeastSquaresOnline::parameters(std::size_t n, MATRIX& x template template -bool CRegression::CLeastSquaresOnline::covariances(std::size_t n, MATRIX& x, double variance, double maxCondition, TMatrix& result) - const { +bool CRegression::CLeastSquaresOnline::covariances(std::size_t n, + MATRIX& x, + double variance, + double maxCondition, + TMatrix& result) const { if (n == 1) { x(0) = variance / CBasicStatistics::count(m_S); return true; } this->gramian(n, x); - Eigen::JacobiSVD x_(x.template selfadjointView(), Eigen::ComputeFullU | Eigen::ComputeFullV); + Eigen::JacobiSVD x_(x.template selfadjointView(), + Eigen::ComputeFullU | Eigen::ComputeFullV); if (x_.singularValues()(0) > maxCondition * x_.singularValues()(n - 1)) { LOG_TRACE(<< "singular values = " << x_.singularValues()); return false; @@ -209,8 +221,9 @@ bool CRegression::CLeastSquaresOnline::covariances(std::size_t n, MATRIX& // the matrix condition above. Also, we zero initialize result // in the calling code so any values we don't fill in the // following loop are zero (as required). - x = (x_.matrixV() * x_.singularValues().cwiseInverse().asDiagonal() * x_.matrixU().transpose()) * variance / - CBasicStatistics::count(m_S); + x = (x_.matrixV() * x_.singularValues().cwiseInverse().asDiagonal() * + x_.matrixU().transpose()) * + variance / CBasicStatistics::count(m_S); for (std::size_t i = 0u; i < n; ++i) { result(i, i) = x(i, i); for (std::size_t j = 0u; j < i; ++j) { @@ -222,16 +235,19 @@ bool CRegression::CLeastSquaresOnline::covariances(std::size_t n, MATRIX& } template -bool CRegression::CLeastSquaresOnlineParameterProcess::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { +bool CRegression::CLeastSquaresOnlineParameterProcess::acceptRestoreTraverser( + core::CStateRestoreTraverser& traverser) { do { const std::string& name = traverser.name(); - RESTORE(UNIT_TIME_COVARIANCES_TAG, m_UnitTimeCovariances.fromDelimited(traverser.value())) + RESTORE(UNIT_TIME_COVARIANCES_TAG, + m_UnitTimeCovariances.fromDelimited(traverser.value())) } while (traverser.next()); return true; } template -void CRegression::CLeastSquaresOnlineParameterProcess::acceptPersistInserter(core::CStatePersistInserter& inserter) const { +void CRegression::CLeastSquaresOnlineParameterProcess::acceptPersistInserter( + core::CStatePersistInserter& inserter) const { inserter.insertValue(UNIT_TIME_COVARIANCES_TAG, m_UnitTimeCovariances.toDelimited()); } diff --git a/include/maths/CSampling.h b/include/maths/CSampling.h index 51dc05bf47..cd372c7cf4 100644 --- a/include/maths/CSampling.h +++ b/include/maths/CSampling.h @@ -82,13 +82,15 @@ class MATHS_EXPORT CSampling : private core::CNonInstantiatable { //! Writes the mersenne_twister_engine to a std::ostream. template - friend std::basic_ostream& operator<<(std::basic_ostream& o, const CRandomNumberGenerator& g) { + friend std::basic_ostream& + operator<<(std::basic_ostream& o, const CRandomNumberGenerator& g) { return o << g.m_Rng; } //! Reads a mersenne_twister_engine from a std::istream. template - friend std::basic_istream& operator>>(std::basic_istream& i, CRandomNumberGenerator& g) { + friend std::basic_istream& + operator>>(std::basic_istream& i, CRandomNumberGenerator& g) { return i >> g.m_Rng; } @@ -125,13 +127,15 @@ class MATHS_EXPORT CSampling : private core::CNonInstantiatable { //! //! Sample uniformly from a specified range //@{ -#define UNIFORM_SAMPLE(TYPE) \ - static TYPE uniformSample(TYPE a, TYPE b); \ - static TYPE uniformSample(CPRNG::CXorOShiro128Plus& rng, TYPE a, TYPE b); \ - static TYPE uniformSample(CPRNG::CXorShift1024Mult& rng, TYPE a, TYPE b); \ - static void uniformSample(TYPE a, TYPE b, std::size_t n, std::vector& result); \ - static void uniformSample(CPRNG::CXorOShiro128Plus& rng, TYPE a, TYPE b, std::size_t n, std::vector& result); \ - static void uniformSample(CPRNG::CXorShift1024Mult& rng, TYPE a, TYPE b, std::size_t n, std::vector& result); +#define UNIFORM_SAMPLE(TYPE) \ + static TYPE uniformSample(TYPE a, TYPE b); \ + static TYPE uniformSample(CPRNG::CXorOShiro128Plus& rng, TYPE a, TYPE b); \ + static TYPE uniformSample(CPRNG::CXorShift1024Mult& rng, TYPE a, TYPE b); \ + static void uniformSample(TYPE a, TYPE b, std::size_t n, std::vector& result); \ + static void uniformSample(CPRNG::CXorOShiro128Plus& rng, TYPE a, TYPE b, \ + std::size_t n, std::vector& result); \ + static void uniformSample(CPRNG::CXorShift1024Mult& rng, TYPE a, TYPE b, \ + std::size_t n, std::vector& result); UNIFORM_SAMPLE(std::size_t) UNIFORM_SAMPLE(std::ptrdiff_t) UNIFORM_SAMPLE(double) @@ -156,11 +160,19 @@ class MATHS_EXPORT CSampling : private core::CNonInstantiatable { //! Get \p n normal samples with mean and variance \p mean and //! \p variance, respectively, using \p rng. - static void normalSample(CPRNG::CXorOShiro128Plus& rng, double mean, double variance, std::size_t n, TDoubleVec& result); + static void normalSample(CPRNG::CXorOShiro128Plus& rng, + double mean, + double variance, + std::size_t n, + TDoubleVec& result); //! Get \p n normal samples with mean and variance \p mean and //! \p variance, respectively, using \p rng. - static void normalSample(CPRNG::CXorShift1024Mult& rng, double mean, double variance, std::size_t n, TDoubleVec& result); + static void normalSample(CPRNG::CXorShift1024Mult& rng, + double mean, + double variance, + std::size_t n, + TDoubleVec& result); //! Get \p n samples of a \f$\chi^2\f$ random variable with \p f //! degrees of freedom. @@ -168,11 +180,13 @@ class MATHS_EXPORT CSampling : private core::CNonInstantiatable { //! Get \p n samples of a \f$\chi^2\f$ random variable with \p f //! degrees of freedom using \p rng. - static void chiSquaredSample(CPRNG::CXorOShiro128Plus& rng, double f, std::size_t n, TDoubleVec& result); + static void + chiSquaredSample(CPRNG::CXorOShiro128Plus& rng, double f, std::size_t n, TDoubleVec& result); //! Get \p n samples of a \f$\chi^2\f$ random variable with \p f //! degrees of freedom using \p rng. - static void chiSquaredSample(CPRNG::CXorShift1024Mult& rng, double f, std::size_t n, TDoubleVec& result); + static void + chiSquaredSample(CPRNG::CXorShift1024Mult& rng, double f, std::size_t n, TDoubleVec& result); //! \name Multivariate Normal Sampling //@{ @@ -184,7 +198,10 @@ class MATHS_EXPORT CSampling : private core::CNonInstantiatable { //! \param[in] n The number of samples to generate. //! \param[out] samples Filled in with IID samples of the //! multivariate normal. - static bool multivariateNormalSample(const TDoubleVec& mean, const TDoubleVecVec& covariance, std::size_t n, TDoubleVecVec& samples); + static bool multivariateNormalSample(const TDoubleVec& mean, + const TDoubleVecVec& covariance, + std::size_t n, + TDoubleVecVec& samples); //! Overload of multivariate normal sample using \p rng static bool multivariateNormalSample(CPRNG::CXorOShiro128Plus& rng, @@ -200,21 +217,18 @@ class MATHS_EXPORT CSampling : private core::CNonInstantiatable { std::size_t n, TDoubleVecVec& samples); -#define MULTIVARIATE_NORMAL_SAMPLE(N) \ - static void multivariateNormalSample(const CVectorNx1& mean, \ - const CSymmetricMatrixNxN& covariance, \ - std::size_t n, \ - std::vector>& samples); \ - static void multivariateNormalSample(CPRNG::CXorOShiro128Plus& rng, \ - const CVectorNx1& mean, \ - const CSymmetricMatrixNxN& covariance, \ - std::size_t n, \ - std::vector>& samples); \ - static void multivariateNormalSample(CPRNG::CXorShift1024Mult& rng, \ - const CVectorNx1& mean, \ - const CSymmetricMatrixNxN& covariance, \ - std::size_t n, \ - std::vector>& samples) +#define MULTIVARIATE_NORMAL_SAMPLE(N) \ + static void multivariateNormalSample( \ + const CVectorNx1& mean, const CSymmetricMatrixNxN& covariance, \ + std::size_t n, std::vector>& samples); \ + static void multivariateNormalSample( \ + CPRNG::CXorOShiro128Plus& rng, const CVectorNx1& mean, \ + const CSymmetricMatrixNxN& covariance, std::size_t n, \ + std::vector>& samples); \ + static void multivariateNormalSample( \ + CPRNG::CXorShift1024Mult& rng, const CVectorNx1& mean, \ + const CSymmetricMatrixNxN& covariance, std::size_t n, \ + std::vector>& samples) MULTIVARIATE_NORMAL_SAMPLE(2); MULTIVARIATE_NORMAL_SAMPLE(3); MULTIVARIATE_NORMAL_SAMPLE(4); @@ -234,43 +248,59 @@ class MATHS_EXPORT CSampling : private core::CNonInstantiatable { //! Generate a sample from a categorical distribution with //! category probabilities \p probabilities using \p rng. - static std::size_t categoricalSample(CPRNG::CXorOShiro128Plus& rng, TDoubleVec& probabilities); + static std::size_t categoricalSample(CPRNG::CXorOShiro128Plus& rng, + TDoubleVec& probabilities); //! Generate a sample from a categorical distribution with //! category probabilities \p probabilities using \p rng. - static std::size_t categoricalSample(CPRNG::CXorShift1024Mult& rng, TDoubleVec& probabilities); + static std::size_t categoricalSample(CPRNG::CXorShift1024Mult& rng, + TDoubleVec& probabilities); //! Generate \p n samples from a categorical distribution //! with category probabilities \p probabilities assuming //! the values are replaced between draws. - static void categoricalSampleWithReplacement(TDoubleVec& probabilities, std::size_t n, TSizeVec& result); + static void categoricalSampleWithReplacement(TDoubleVec& probabilities, + std::size_t n, + TSizeVec& result); //! Generate \p n samples from a categorical distribution //! with category probabilities \p probabilities using \p rng //! assuming the values are replaced between draws. - static void categoricalSampleWithReplacement(CPRNG::CXorOShiro128Plus& rng, TDoubleVec& probabilities, std::size_t n, TSizeVec& result); + static void categoricalSampleWithReplacement(CPRNG::CXorOShiro128Plus& rng, + TDoubleVec& probabilities, + std::size_t n, + TSizeVec& result); //! Generate \p n samples from a categorical distribution //! with category probabilities \p probabilities using \p rng //! assuming the values are replaced between draws. - static void categoricalSampleWithReplacement(CPRNG::CXorShift1024Mult& rng, TDoubleVec& probabilities, std::size_t n, TSizeVec& result); + static void categoricalSampleWithReplacement(CPRNG::CXorShift1024Mult& rng, + TDoubleVec& probabilities, + std::size_t n, + TSizeVec& result); //! Generate \p n samples from a categorical distribution //! with category probabilities \p probabilities assuming //! the values are *not* replaced between draws. - static void categoricalSampleWithoutReplacement(TDoubleVec& probabilities, std::size_t n, TSizeVec& result); + static void categoricalSampleWithoutReplacement(TDoubleVec& probabilities, + std::size_t n, + TSizeVec& result); //! Generate \p n samples from a categorical distribution //! with category probabilities \p probabilities using \p rng //! assuming the values are *not* replaced between draws. - static void - categoricalSampleWithoutReplacement(CPRNG::CXorOShiro128Plus& rng, TDoubleVec& probabilities, std::size_t n, TSizeVec& result); + static void categoricalSampleWithoutReplacement(CPRNG::CXorOShiro128Plus& rng, + TDoubleVec& probabilities, + std::size_t n, + TSizeVec& result); //! Generate \p n samples from a categorical distribution //! with category probabilities \p probabilities using \p rng //! assuming the values are *not* replaced between draws. - static void - categoricalSampleWithoutReplacement(CPRNG::CXorShift1024Mult& rng, TDoubleVec& probabilities, std::size_t n, TSizeVec& result); + static void categoricalSampleWithoutReplacement(CPRNG::CXorShift1024Mult& rng, + TDoubleVec& probabilities, + std::size_t n, + TSizeVec& result); //! Generate samples from a multinomial distribution with number //! of trials \p n and category probabilities \p probabilities. @@ -294,7 +324,10 @@ class MATHS_EXPORT CSampling : private core::CNonInstantiatable { //! are zero. //! \param[in] sorted Set to true if the probabilities are //! already sorted in descending order. - static void multinomialSampleFast(TDoubleVec& probabilities, std::size_t n, TSizeVec& sample, bool sorted = false); + static void multinomialSampleFast(TDoubleVec& probabilities, + std::size_t n, + TSizeVec& sample, + bool sorted = false); //! Generate samples according to the multinomial distribution //! with number of trials \p n and category probabilities diff --git a/include/maths/CSeasonalComponent.h b/include/maths/CSeasonalComponent.h index 64932dcad4..8c84207b5a 100644 --- a/include/maths/CSeasonalComponent.h +++ b/include/maths/CSeasonalComponent.h @@ -81,8 +81,9 @@ class MATHS_EXPORT CSeasonalComponent : private CDecompositionComponent { bool initialized() const; //! Initialize the adaptive bucketing. - bool - initialize(core_t::TTime startTime = 0, core_t::TTime endTime = 0, const TFloatMeanAccumulatorVec& values = TFloatMeanAccumulatorVec()); + bool initialize(core_t::TTime startTime = 0, + core_t::TTime endTime = 0, + const TFloatMeanAccumulatorVec& values = TFloatMeanAccumulatorVec()); //! Get the size of this component. std::size_t size() const; @@ -190,7 +191,9 @@ class MATHS_EXPORT CSeasonalComponent : private CDecompositionComponent { private: //! Create by traversing a state document. - bool acceptRestoreTraverser(double decayRate, double minimumBucketLength, core::CStateRestoreTraverser& traverser); + bool acceptRestoreTraverser(double decayRate, + double minimumBucketLength, + core::CStateRestoreTraverser& traverser); //! Get a jitter to apply to the prediction time. core_t::TTime jitter(core_t::TTime time); diff --git a/include/maths/CSeasonalComponentAdaptiveBucketing.h b/include/maths/CSeasonalComponentAdaptiveBucketing.h index d02d4bf7f2..af46413a8f 100644 --- a/include/maths/CSeasonalComponentAdaptiveBucketing.h +++ b/include/maths/CSeasonalComponentAdaptiveBucketing.h @@ -39,13 +39,18 @@ class MATHS_EXPORT CSeasonalComponentAdaptiveBucketing : private CAdaptiveBucket public: CSeasonalComponentAdaptiveBucketing(); - explicit CSeasonalComponentAdaptiveBucketing(const CSeasonalTime& time, double decayRate = 0.0, double minimumBucketLength = 0.0); + explicit CSeasonalComponentAdaptiveBucketing(const CSeasonalTime& time, + double decayRate = 0.0, + double minimumBucketLength = 0.0); CSeasonalComponentAdaptiveBucketing(const CSeasonalComponentAdaptiveBucketing& other); //! Construct by traversing a state document. - CSeasonalComponentAdaptiveBucketing(double decayRate, double minimumBucketLength, core::CStateRestoreTraverser& traverser); + CSeasonalComponentAdaptiveBucketing(double decayRate, + double minimumBucketLength, + core::CStateRestoreTraverser& traverser); //! Copy from \p rhs. - const CSeasonalComponentAdaptiveBucketing& operator=(const CSeasonalComponentAdaptiveBucketing& rhs); + const CSeasonalComponentAdaptiveBucketing& + operator=(const CSeasonalComponentAdaptiveBucketing& rhs); //! Persist by passing information to the supplied inserter. void acceptPersistInserter(core::CStatePersistInserter& inserter) const; @@ -69,7 +74,9 @@ class MATHS_EXPORT CSeasonalComponentAdaptiveBucketing : private CAdaptiveBucket //! \param[in] endTime The end of the period including \p values. //! \param[in] values Time ranges and the corresponding function //! value moments. - void initialValues(core_t::TTime startTime, core_t::TTime endTime, const TFloatMeanAccumulatorVec& values); + void initialValues(core_t::TTime startTime, + core_t::TTime endTime, + const TFloatMeanAccumulatorVec& values); //! Get the number of buckets. std::size_t size() const; @@ -178,7 +185,10 @@ class MATHS_EXPORT CSeasonalComponentAdaptiveBucketing : private CAdaptiveBucket //! \brief The state maintained for each bucket. struct SBucket { SBucket(); - SBucket(const TRegression& regression, double variance, core_t::TTime firstUpdate, core_t::TTime lastUpdate); + SBucket(const TRegression& regression, + double variance, + core_t::TTime firstUpdate, + core_t::TTime lastUpdate); bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); void acceptPersistInserter(core::CStatePersistInserter& inserter) const; @@ -234,7 +244,8 @@ class MATHS_EXPORT CSeasonalComponentAdaptiveBucketing : private CAdaptiveBucket }; //! Create a free function which will be found by Koenig lookup. -inline void swap(CSeasonalComponentAdaptiveBucketing& lhs, CSeasonalComponentAdaptiveBucketing& rhs) { +inline void swap(CSeasonalComponentAdaptiveBucketing& lhs, + CSeasonalComponentAdaptiveBucketing& rhs) { lhs.swap(rhs); } } diff --git a/include/maths/CSeasonalTime.h b/include/maths/CSeasonalTime.h index e0b5647d91..1e165ba0cc 100644 --- a/include/maths/CSeasonalTime.h +++ b/include/maths/CSeasonalTime.h @@ -252,10 +252,12 @@ class MATHS_EXPORT CSeasonalTimeStateSerializer { public: //! Construct the appropriate CSeasonalTime sub-class from its state //! document representation. Sets \p result to NULL on failure. - static bool acceptRestoreTraverser(TSeasonalTimePtr& result, core::CStateRestoreTraverser& traverser); + static bool acceptRestoreTraverser(TSeasonalTimePtr& result, + core::CStateRestoreTraverser& traverser); //! Persist state by passing information to \p inserter. - static void acceptPersistInserter(const CSeasonalTime& time, core::CStatePersistInserter& inserter); + static void acceptPersistInserter(const CSeasonalTime& time, + core::CStatePersistInserter& inserter); }; } } diff --git a/include/maths/CSetTools.h b/include/maths/CSetTools.h index 25d6511d6b..6b3831ef42 100644 --- a/include/maths/CSetTools.h +++ b/include/maths/CSetTools.h @@ -78,35 +78,36 @@ class MATHS_EXPORT CSetTools { } } -#define SIMULTANEOUS_REMOVE_IF_IMPL \ - using std::swap; \ - std::size_t last{0u}; \ - std::size_t n{values1.size()}; \ - for (std::size_t i = 0u; i < n; ++i) { \ - if (last != i) { \ - CUSTOM_SWAP_VALUES \ - } \ - if (!pred(values1[last])) { \ - ++last; \ - } \ - } \ - if (last < n) { \ - CUSTOM_ERASE_VALUES \ - return true; \ - } \ +#define SIMULTANEOUS_REMOVE_IF_IMPL \ + using std::swap; \ + std::size_t last{0u}; \ + std::size_t n{values1.size()}; \ + for (std::size_t i = 0u; i < n; ++i) { \ + if (last != i) { \ + CUSTOM_SWAP_VALUES \ + } \ + if (!pred(values1[last])) { \ + ++last; \ + } \ + } \ + if (last < n) { \ + CUSTOM_ERASE_VALUES \ + return true; \ + } \ return false; -#define CUSTOM_SWAP_VALUES \ - swap(values1[i], values1[last]); \ +#define CUSTOM_SWAP_VALUES \ + swap(values1[i], values1[last]); \ swap(values2[i], values2[last]); -#define CUSTOM_ERASE_VALUES \ - values1.erase(values1.begin() + last, values1.end()); \ +#define CUSTOM_ERASE_VALUES \ + values1.erase(values1.begin() + last, values1.end()); \ values2.erase(values2.begin() + last, values2.end()); //! Remove all instances of \p values1 for which \p pred is true //! and corresponding values of \p values2. template - static bool simultaneousRemoveIf(std::vector& values1, std::vector& values2, const F& pred) { + static bool + simultaneousRemoveIf(std::vector& values1, std::vector& values2, const F& pred) { if (values1.size() != values2.size()) { return false; } @@ -117,19 +118,22 @@ class MATHS_EXPORT CSetTools { #undef CUSTOM_SWAP_VALUES #undef CUSTOM_ERASE_VALUES -#define CUSTOM_SWAP_VALUES \ - swap(values1[i], values1[last]); \ - swap(values2[i], values2[last]); \ +#define CUSTOM_SWAP_VALUES \ + swap(values1[i], values1[last]); \ + swap(values2[i], values2[last]); \ swap(values3[i], values3[last]); -#define CUSTOM_ERASE_VALUES \ - values1.erase(values1.begin() + last, values1.end()); \ - values2.erase(values2.begin() + last, values2.end()); \ +#define CUSTOM_ERASE_VALUES \ + values1.erase(values1.begin() + last, values1.end()); \ + values2.erase(values2.begin() + last, values2.end()); \ values3.erase(values3.begin() + last, values3.end()); //! Remove all instances of \p values1 for which \p pred is true //! and corresponding values of \p values2 and \p values3. template - static bool simultaneousRemoveIf(std::vector& values1, std::vector& values2, std::vector& values3, const F& pred) { + static bool simultaneousRemoveIf(std::vector& values1, + std::vector& values2, + std::vector& values3, + const F& pred) { if (values1.size() != values2.size() || values2.size() != values3.size()) { return false; } @@ -144,7 +148,8 @@ class MATHS_EXPORT CSetTools { //! Compute the number of elements in the intersection of the //! ranges [\p beginLhs, \p endLhs) and [\p beginRhs, \p endRhs). template - static std::size_t setIntersectSize(ITR1 beginLhs, ITR1 endLhs, ITR2 beginRhs, ITR2 endRhs) { + static std::size_t + setIntersectSize(ITR1 beginLhs, ITR1 endLhs, ITR2 beginRhs, ITR2 endRhs) { std::size_t result = 0u; while (beginLhs != endLhs && beginRhs != endRhs) { if (*beginLhs < *beginRhs) { diff --git a/include/maths/CSolvers.h b/include/maths/CSolvers.h index 3b243c33ab..3c0ac27efc 100644 --- a/include/maths/CSolvers.h +++ b/include/maths/CSolvers.h @@ -44,19 +44,27 @@ class MATHS_EXPORT CSolvers { //! An inverse quadratic interpolation of three distinct //! function values. WARNING the caller must ensure that //! the \p fa != \p fb != \p fc. - static inline double - inverseQuadraticInterpolate(const double a, const double b, const double c, const double fa, const double fb, const double fc) { - return a * fb * fc / (fa - fb) / (fa - fc) + b * fa * fc / (fb - fa) / (fb - fc) + c * fa * fb / (fc - fa) / (fc - fb); + static inline double inverseQuadraticInterpolate(const double a, + const double b, + const double c, + const double fa, + const double fb, + const double fc) { + return a * fb * fc / (fa - fb) / (fa - fc) + + b * fa * fc / (fb - fa) / (fb - fc) + c * fa * fb / (fc - fa) / (fc - fb); } //! A secant interpolation of two distinct function values. //! WARNING the caller must ensure that \p fa != \p fb. - static inline double secantInterpolate(const double a, const double b, const double fa, const double fb) { + static inline double + secantInterpolate(const double a, const double b, const double fa, const double fb) { return b - fb * (b - a) / (fb - fa); } //! Bisect the interval [\p a, \p b]. - static inline double bisect(const double a, const double b) { return (a + b) / 2.0; } + static inline double bisect(const double a, const double b) { + return (a + b) / 2.0; + } //! Shift the values such that a = b and b = c. static inline void shift(double& a, double& b, const double c) { @@ -226,7 +234,12 @@ class MATHS_EXPORT CSolvers { if (n < (3 * maxIterations) / 4) { double minStep = step; double maxStep = step * step; - step = fa == fb ? maxStep : std::min(std::max(std::fabs(b - a) / std::fabs(fb - fa) * std::fabs(fb), minStep), maxStep); + step = fa == fb + ? maxStep + : std::min(std::max(std::fabs(b - a) / std::fabs(fb - fa) * + std::fabs(fb), + minStep), + maxStep); } a = b; fa = fb; @@ -348,7 +361,8 @@ class MATHS_EXPORT CSolvers { //! \param[out] bestGuess Filled in with the best estimate //! of the root. template - static inline void solve(double& a, double& b, const F& f, std::size_t& maxIterations, const EQUAL& equal, double& bestGuess) { + static inline void + solve(double& a, double& b, const F& f, std::size_t& maxIterations, const EQUAL& equal, double& bestGuess) { if (equal(a, b)) { bestGuess = bisect(a, b); maxIterations = 0u; @@ -386,8 +400,14 @@ class MATHS_EXPORT CSolvers { //! \param[out] bestGuess Filled in with the best estimate //! of the root. template - static void - solve(double& a, double& b, double fa, double fb, const F& f, std::size_t& maxIterations, const EQUAL& equal, double& bestGuess) { + static void solve(double& a, + double& b, + double fa, + double fb, + const F& f, + std::size_t& maxIterations, + const EQUAL& equal, + double& bestGuess) { if (equal(a, b)) { // There is a bug in boost's solver for the case that // a == b so trap and return early. @@ -400,7 +420,9 @@ class MATHS_EXPORT CSolvers { // Need at least one step or the boost solver underflows // size_t. boost::uintmax_t n = std::max(maxIterations, std::size_t(1)); - TDoubleDoublePr bracket = boost::math::tools::toms748_solve&>(fSafe, a, b, fa, fb, equal, n); + TDoubleDoublePr bracket = + boost::math::tools::toms748_solve&>( + fSafe, a, b, fa, fb, equal, n); a = bracket.first; b = bracket.second; bestGuess = bisect(a, b); @@ -437,7 +459,8 @@ class MATHS_EXPORT CSolvers { //! of the root. //! \return True if a, b bracket the root. template - static bool brent(double& a, double& b, const F& f, std::size_t& maxIterations, const EQUAL& equal, double& bestGuess) { + static bool + brent(double& a, double& b, const F& f, std::size_t& maxIterations, const EQUAL& equal, double& bestGuess) { if (equal(a, b)) { bestGuess = bisect(a, b); maxIterations = 0u; @@ -476,8 +499,14 @@ class MATHS_EXPORT CSolvers { //! of the root. //! \return True if a, b bracket the root. template - static bool - brent(double& a, double& b, double fa, double fb, const F& f, std::size_t& maxIterations, const EQUAL& equal, double& bestGuess) { + static bool brent(double& a, + double& b, + double fa, + double fb, + const F& f, + std::size_t& maxIterations, + const EQUAL& equal, + double& bestGuess) { std::size_t n = maxIterations; if (fa == 0.0) { @@ -509,13 +538,16 @@ class MATHS_EXPORT CSolvers { double d = std::numeric_limits::max(); do { - double s = (fa != fc) && (fb != fc) ? inverseQuadraticInterpolate(a, b, c, fa, fb, fc) : secantInterpolate(a, b, fa, fb); + double s = (fa != fc) && (fb != fc) + ? inverseQuadraticInterpolate(a, b, c, fa, fb, fc) + : secantInterpolate(a, b, fa, fb); double e = (3.0 * a + b) / 4.0; if ((!(((s > e) && (s < b)) || ((s < e) && (s > b)))) || (bisected && ((std::fabs(s - b) >= std::fabs(b - c) / 2.0) || equal(b, c))) || - (!bisected && ((std::fabs(s - b) >= std::fabs(c - d) / 2.0) || equal(c, d)))) { + (!bisected && + ((std::fabs(s - b) >= std::fabs(c - d) / 2.0) || equal(c, d)))) { // Use bisection. s = bisect(a, b); bisected = true; @@ -552,8 +584,9 @@ class MATHS_EXPORT CSolvers { std::swap(a, b); std::swap(fa, fb); } - bestGuess = (fa != fc) && (fb != fc) ? inverseQuadraticInterpolate(a, b, c, fa, fb, fc) - : (fa != fb ? secantInterpolate(a, b, fa, fb) : bisect(a, b)); + bestGuess = (fa != fc) && (fb != fc) + ? inverseQuadraticInterpolate(a, b, c, fa, fb, fc) + : (fa != fb ? secantInterpolate(a, b, fa, fb) : bisect(a, b)); bestGuess = std::min(std::max(a, bestGuess), b); maxIterations -= n; @@ -581,7 +614,12 @@ class MATHS_EXPORT CSolvers { //! of the root. //! \return True if a, b bracket the root and equal(a, b). template - static bool bisection(double& a, double& b, const F& f, std::size_t& maxIterations, const EQUAL& equal, double& bestGuess) { + static bool bisection(double& a, + double& b, + const F& f, + std::size_t& maxIterations, + const EQUAL& equal, + double& bestGuess) { if (equal(a, b)) { bestGuess = bisect(a, b); maxIterations = 0u; @@ -622,8 +660,14 @@ class MATHS_EXPORT CSolvers { //! of the root. //! \return True if a, b bracket the root and equal(a, b). template - static bool - bisection(double& a, double& b, double fa, double fb, const F& f, std::size_t& maxIterations, const EQUAL& equal, double& bestGuess) { + static bool bisection(double& a, + double& b, + double fa, + double fb, + const F& f, + std::size_t& maxIterations, + const EQUAL& equal, + double& bestGuess) { std::size_t n = maxIterations; if (fa == 0.0) { // Root at left bracket. @@ -697,9 +741,17 @@ class MATHS_EXPORT CSolvers { //! \param[out] x Set to argmin of f on [\p a, \p b]. //! \param[out] fx Set to the value of f at \p x. template - static inline void - minimize(double a, double b, double fa, double fb, const F& f, double tolerance, std::size_t& maxIterations, double& x, double& fx) { - minimize(a, b, fa, fb, f, tolerance, maxIterations, -std::numeric_limits::max(), x, fx); + static inline void minimize(double a, + double b, + double fa, + double fb, + const F& f, + double tolerance, + std::size_t& maxIterations, + double& x, + double& fx) { + minimize(a, b, fa, fb, f, tolerance, maxIterations, + -std::numeric_limits::max(), x, fx); } //! Maximize the function \p f on the interval [\p a, \p b] @@ -728,8 +780,15 @@ class MATHS_EXPORT CSolvers { //! \param[out] x Set to argmax of f on [\p a, \p b]. //! \param[out] fx Set to the value of f at \p x. template - static inline void - maximize(double a, double b, double fa, double fb, const F& f, double tolerance, std::size_t& maxIterations, double& x, double& fx) { + static inline void maximize(double a, + double b, + double fa, + double fb, + const F& f, + double tolerance, + std::size_t& maxIterations, + double& x, + double& fx) { CCompositeFunctions::CMinus f_(f); minimize(a, b, -fa, -fb, f_, tolerance, maxIterations, x, fx); fx = -fx; @@ -773,7 +832,8 @@ class MATHS_EXPORT CSolvers { if (i == 0) { minimize(p[0], p[1], fp[0], fp[1], f, 0.0, maxIterations, x, fx); } else if (i == n - 1) { - minimize(p[n - 2], p[n - 1], fp[n - 2], fp[n - 1], f, 0.0, maxIterations, x, fx); + minimize(p[n - 2], p[n - 1], fp[n - 2], fp[n - 1], f, 0.0, + maxIterations, x, fx); } else { std::size_t ai = i - 1; std::size_t bi = i + 1; @@ -837,8 +897,14 @@ class MATHS_EXPORT CSolvers { //! false otherwise. //! \note This will evaluate \p f at most 3 * \p maxIterations. template - static bool - sublevelSet(double a, double b, double fa, double fb, const F& f, const double fc, std::size_t maxIterations, TDoubleDoublePr& result) { + static bool sublevelSet(double a, + double b, + double fa, + double fb, + const F& f, + const double fc, + std::size_t maxIterations, + TDoubleDoublePr& result) { if (a > b) { std::swap(a, b); std::swap(fa, fb); @@ -860,7 +926,8 @@ class MATHS_EXPORT CSolvers { CCompositeFunctions::CMinusConstant f_(f, fc); LOG_TRACE(<< "a = " << a << ", x = " << x << ", b = " << b); - LOG_TRACE(<< "f_(a) = " << fa - fc << ", f_(x) = " << fx - fc << ", f_(b) = " << fb - fc); + LOG_TRACE(<< "f_(a) = " << fa - fc << ", f_(x) = " << fx - fc + << ", f_(b) = " << fb - fc); const double eps = std::sqrt(std::numeric_limits::epsilon()) * b; CEqualWithTolerance equal(CToleranceTypes::E_AbsoluteTolerance, eps); diff --git a/include/maths/CSphericalCluster.h b/include/maths/CSphericalCluster.h index 2192ce49a2..2d55609e2d 100644 --- a/include/maths/CSphericalCluster.h +++ b/include/maths/CSphericalCluster.h @@ -22,7 +22,8 @@ namespace maths { //! \brief A cluster's count and variance. struct MATHS_EXPORT SCountAndVariance { - SCountAndVariance(double count = 0.0, double variance = 0.0) : s_Count(count), s_Variance(variance) {} + SCountAndVariance(double count = 0.0, double variance = 0.0) + : s_Count(count), s_Variance(variance) {} //! The count of point in the cluster. double s_Count; @@ -45,7 +46,8 @@ class CSphericalCluster { class CHash { public: std::size_t operator()(const Type& o) const { - std::size_t seed = boost::hash_combine(m_PointHash(o), o.annotation().s_Count); + std::size_t seed = + boost::hash_combine(m_PointHash(o), o.annotation().s_Count); return boost::hash_combine(seed, o.annotation().s_Variance); } @@ -57,18 +59,17 @@ class CSphericalCluster { public: std::size_t operator()(const Type& lhs, const Type& rhs) const { return static_cast(lhs) == static_cast(rhs) && - lhs.annotation().s_Count == rhs.annotation().s_Count && lhs.annotation().s_Variance == rhs.annotation().s_Variance; + lhs.annotation().s_Count == rhs.annotation().s_Count && + lhs.annotation().s_Variance == rhs.annotation().s_Variance; } }; struct SLess { bool operator()(const Type& lhs, const Type& rhs) const { - return COrderings::lexicographical_compare(static_cast(lhs), - lhs.annotation().s_Count, - lhs.annotation().s_Variance, - static_cast(rhs), - rhs.annotation().s_Count, - rhs.annotation().s_Variance); + return COrderings::lexicographical_compare( + static_cast(lhs), lhs.annotation().s_Count, + lhs.annotation().s_Variance, static_cast(rhs), + rhs.annotation().s_Count, rhs.annotation().s_Variance); } }; }; @@ -92,7 +93,8 @@ struct SCentralMomentsCustomAdd, SCountAndVari typename SCoordinate::Type n, CBasicStatistics::SSampleCentralMoments& moments) { using TCoordinate = typename SCoordinate::Type; - moments += CBasicStatistics::accumulator(TCoordinate(x.annotation().s_Count) * n, T(x), T(x.annotation().s_Variance)); + moments += CBasicStatistics::accumulator( + TCoordinate(x.annotation().s_Count) * n, T(x), T(x.annotation().s_Variance)); } }; @@ -101,14 +103,16 @@ struct SCentralMomentsCustomAdd, SCountAndVari template struct SCovariancesCustomAdd, SCountAndVariance>> { template - static inline void add(const CAnnotatedVector, SCountAndVariance>& x, - const CAnnotatedVector, SCountAndVariance>& n, - CBasicStatistics::SSampleCovariances& covariances) { + static inline void + add(const CAnnotatedVector, SCountAndVariance>& x, + const CAnnotatedVector, SCountAndVariance>& n, + CBasicStatistics::SSampleCovariances& covariances) { CSymmetricMatrixNxN m(0); for (std::size_t i = 0u; i < N; ++i) { m(i, i) = x.annotation().s_Variance; } - covariances += CBasicStatistics::SSampleCovariances(T(x.annotation().s_Count) * n, x, m); + covariances += CBasicStatistics::SSampleCovariances( + T(x.annotation().s_Count) * n, x, m); } }; @@ -124,13 +128,15 @@ struct SCovariancesCustomAdd, SCountAndVarianc template struct SCovariancesLedoitWolf, SCountAndVariance>> { template - static void estimate(const std::vector, SCountAndVariance>>& points, - CBasicStatistics::SSampleCovariances& covariances) { + static void + estimate(const std::vector, SCountAndVariance>>& points, + CBasicStatistics::SSampleCovariances& covariances) { U d = static_cast(N); U n = CBasicStatistics::count(covariances); const CVectorNx1& m = CBasicStatistics::mean(covariances); - const CSymmetricMatrixNxN& s = CBasicStatistics::maximumLikelihoodCovariances(covariances); + const CSymmetricMatrixNxN& s = + CBasicStatistics::maximumLikelihoodCovariances(covariances); double mn = s.trace() / d; double dn = pow2((s - CVectorNx1(mn).diagonal()).frobenius()) / d; @@ -140,12 +146,15 @@ struct SCovariancesLedoitWolf, SCountAndVarian CVectorNx1 ci(points[i]); U ni = static_cast(points[i].annotation().s_Count); U vi = static_cast(points[i].annotation().s_Variance); - bn += ni * pow2(((ci - m).outer() + CVectorNx1(vi).diagonal() - s).frobenius()) / d / z; + bn += ni * + pow2(((ci - m).outer() + CVectorNx1(vi).diagonal() - s).frobenius()) / + d / z; } bn = std::min(bn, dn); LOG_TRACE(<< "m = " << mn << ", d = " << dn << ", b = " << bn); - covariances.s_Covariances = CVectorNx1(bn / dn * mn).diagonal() + (U(1) - bn / dn) * covariances.s_Covariances; + covariances.s_Covariances = CVectorNx1(bn / dn * mn).diagonal() + + (U(1) - bn / dn) * covariances.s_Covariances; } template @@ -157,8 +166,10 @@ struct SCovariancesLedoitWolf, SCountAndVarian //! Write a description of \p cluster for debugging. template -std::ostream& operator<<(std::ostream& o, const CAnnotatedVector& cluster) { - return o << static_cast(cluster) << " (" << cluster.annotation().s_Count << "," +std::ostream& operator<<(std::ostream& o, + const CAnnotatedVector& cluster) { + return o << static_cast(cluster) << " (" + << cluster.annotation().s_Count << "," << std::sqrt(cluster.annotation().s_Variance) << ")"; } } diff --git a/include/maths/CSpline.h b/include/maths/CSpline.h index cff5b3f42b..71b900d85e 100644 --- a/include/maths/CSpline.h +++ b/include/maths/CSpline.h @@ -39,7 +39,10 @@ using TFloatVec = std::vector; //! \param[in,out] x Initially contains the input vector \f$y\f$, //! and returns the solution \f$x\f$, indexed from [0, ..., n - 1]. //! \note The contents of input vector c will be modified. -bool MATHS_EXPORT solveTridiagonal(const TDoubleVec& a, const TDoubleVec& b, TDoubleVec& c, TDoubleVec& x); +bool MATHS_EXPORT solveTridiagonal(const TDoubleVec& a, + const TDoubleVec& b, + TDoubleVec& c, + TDoubleVec& x); //! Solves: //!
@@ -55,8 +58,12 @@ bool MATHS_EXPORT solveTridiagonal(const TDoubleVec& a, const TDoubleVec& b, TDo
 //! \param[in,out] x Initially contains the input vector \f$y\f$,
 //! and returns the solution \f$x\f$, indexed from [0, ..., n - 1].
 //! \note The contents of input vector c will be modified.
-bool MATHS_EXPORT
-solvePeturbedTridiagonal(const TDoubleVec& a, const TDoubleVec& b, TDoubleVec& c, TDoubleVec& u, const TDoubleVec& v, TDoubleVec& x);
+bool MATHS_EXPORT solvePeturbedTridiagonal(const TDoubleVec& a,
+                                           const TDoubleVec& b,
+                                           TDoubleVec& c,
+                                           TDoubleVec& u,
+                                           const TDoubleVec& v,
+                                           TDoubleVec& x);
 }
 
 //! \brief Defines types used by the spline implementation.
@@ -164,10 +171,10 @@ class CSpline : public CSplineTypes {
             return 0.0;
         }
 
-        std::size_t k =
-            CTools::truncate(std::size_t(std::lower_bound(this->knots().begin(), this->knots().end(), x) - this->knots().begin()),
-                             std::size_t(1),
-                             this->knots().size() - 1);
+        std::size_t k = CTools::truncate(
+            std::size_t(std::lower_bound(this->knots().begin(), this->knots().end(), x) -
+                        this->knots().begin()),
+            std::size_t(1), this->knots().size() - 1);
 
         if (x == this->knots()[k]) {
             return this->values()[k];
@@ -185,8 +192,8 @@ class CSpline : public CSplineTypes {
             double h = this->knots()[k] - this->knots()[k - 1];
             double a = (this->curvatures()[k] - this->curvatures()[k - 1]) / 6.0 / h;
             double b = this->curvatures()[k - 1] / 2.0;
-            double c =
-                (this->values()[k] - this->values()[k - 1]) / h - (this->curvatures()[k] / 6.0 + this->curvatures()[k - 1] / 3.0) * h;
+            double c = (this->values()[k] - this->values()[k - 1]) / h -
+                       (this->curvatures()[k] / 6.0 + this->curvatures()[k - 1] / 3.0) * h;
             double d = this->values()[k - 1];
             double r = x - this->knots()[k - 1];
             return ((a * r + b) * r + c) * r + d;
@@ -221,8 +228,8 @@ class CSpline : public CSplineTypes {
                 double h = this->knots()[i] - this->knots()[i - 1];
                 double a = (this->curvatures()[i] - this->curvatures()[i - 1]) / 6.0 / h;
                 double b = this->curvatures()[i - 1] / 2.0;
-                double c =
-                    (this->values()[i] - this->values()[i - 1]) / h - (this->curvatures()[i] / 6.0 + this->curvatures()[i - 1] / 3.0) * h;
+                double c = (this->values()[i] - this->values()[i - 1]) / h -
+                           (this->curvatures()[i] / 6.0 + this->curvatures()[i - 1] / 3.0) * h;
                 double d = this->values()[i - 1];
                 result.add(((a * h / 4.0 + b / 3.0) * h + c / 2.0) * h + d, h / interval);
             }
@@ -242,10 +249,10 @@ class CSpline : public CSplineTypes {
             return 0.0;
         }
 
-        std::size_t k =
-            CTools::truncate(std::size_t(std::lower_bound(this->knots().begin(), this->knots().end(), x) - this->knots().begin()),
-                             std::size_t(1),
-                             this->knots().size() - 1);
+        std::size_t k = CTools::truncate(
+            std::size_t(std::lower_bound(this->knots().begin(), this->knots().end(), x) -
+                        this->knots().begin()),
+            std::size_t(1), this->knots().size() - 1);
 
         switch (m_Type) {
         case E_Linear: {
@@ -256,8 +263,8 @@ class CSpline : public CSplineTypes {
             double h = this->knots()[k] - this->knots()[k - 1];
             double a = (this->curvatures()[k] - this->curvatures()[k - 1]) / 6.0 / h;
             double b = this->curvatures()[k - 1] / 2.0;
-            double c =
-                (this->values()[k] - this->values()[k - 1]) / h - (this->curvatures()[k] / 6.0 + this->curvatures()[k - 1] / 3.0) * h;
+            double c = (this->values()[k] - this->values()[k - 1]) / h -
+                       (this->curvatures()[k] / 6.0 + this->curvatures()[k - 1] / 3.0) * h;
             double r = x - this->knots()[k - 1];
             return ((3.0 * a * r + 2.0 * b) * r + c);
         }
@@ -291,8 +298,8 @@ class CSpline : public CSplineTypes {
                 double h = b - a;
                 double ai = (this->curvatures()[i] - this->curvatures()[i - 1]) / 6.0 / h;
                 double bi = this->curvatures()[i - 1] / 2.0;
-                double ci =
-                    (this->values()[i] - this->values()[i - 1]) / h - (this->curvatures()[i] / 6.0 + this->curvatures()[i - 1] / 3.0) * h;
+                double ci = (this->values()[i] - this->values()[i - 1]) / h -
+                            (this->curvatures()[i] / 6.0 + this->curvatures()[i - 1] / 3.0) * h;
 
                 double descriminant = bi * bi - 3.0 * ai * ci;
                 if (descriminant < 0.0) {
@@ -304,9 +311,10 @@ class CSpline : public CSplineTypes {
                 if (rl > rr) {
                     std::swap(rl, rr);
                 }
-                result += std::fabs(((ai * (rl - a) + bi) * (rl - a) + ci) * (rl - a)) +
-                          std::fabs(((ai * (rr - rl) + bi) * (rr - rl) + ci) * (rr - rl)) +
-                          std::fabs(((ai * (b - rr) + bi) * (b - rr) + ci) * (b - rr));
+                result +=
+                    std::fabs(((ai * (rl - a) + bi) * (rl - a) + ci) * (rl - a)) +
+                    std::fabs(((ai * (rr - rl) + bi) * (rr - rl) + ci) * (rr - rl)) +
+                    std::fabs(((ai * (b - rr) + bi) * (b - rr) + ci) * (b - rr));
             }
             break;
         }
@@ -321,7 +329,10 @@ class CSpline : public CSplineTypes {
     //! \param[out] c Filled in with the linear coefficient.
     //! \param[out] d Filled in with the constant.
     //! \note Null pointers are ignored.
-    void coefficients(TDoubleVec* a = nullptr, TDoubleVec* b = nullptr, TDoubleVec* c = nullptr, TDoubleVec* d = nullptr) const {
+    void coefficients(TDoubleVec* a = nullptr,
+                      TDoubleVec* b = nullptr,
+                      TDoubleVec* c = nullptr,
+                      TDoubleVec* d = nullptr) const {
         if (a)
             a->reserve(this->values().size());
         if (b)
@@ -354,8 +365,9 @@ class CSpline : public CSplineTypes {
                 if (b)
                     b->push_back(this->curvatures()[i - 1] / 2.0);
                 if (c)
-                    c->push_back((this->values()[i] - this->values()[i - 1]) / h -
-                                 (this->curvatures()[i] / 6.0 + this->curvatures()[i - 1] / 3.0) * h);
+                    c->push_back(
+                        (this->values()[i] - this->values()[i - 1]) / h -
+                        (this->curvatures()[i] / 6.0 + this->curvatures()[i - 1] / 3.0) * h);
                 if (d)
                     d->push_back(this->values()[i - 1]);
             }
@@ -386,7 +398,8 @@ class CSpline : public CSplineTypes {
         }
         if (knots.size() != values.size()) {
             LOG_ERROR(<< "Number knots not equal to number of values: "
-                      << " knots = " << core::CContainerPrinter::print(knots) << " values = " << core::CContainerPrinter::print(values));
+                      << " knots = " << core::CContainerPrinter::print(knots)
+                      << " values = " << core::CContainerPrinter::print(values));
             return false;
         }
 
@@ -421,8 +434,10 @@ class CSpline : public CSplineTypes {
                 this->valuesRef()[last] = this->values()[i_];
             }
         }
-        this->knotsRef().erase(this->knotsRef().begin() + last + 1, this->knotsRef().end());
-        this->valuesRef().erase(this->valuesRef().begin() + last + 1, this->valuesRef().end());
+        this->knotsRef().erase(this->knotsRef().begin() + last + 1,
+                               this->knotsRef().end());
+        this->valuesRef().erase(this->valuesRef().begin() + last + 1,
+                                this->valuesRef().end());
         n = this->knots().size();
         LOG_TRACE(<< "knots = " << core::CContainerPrinter::print(this->knots()));
         LOG_TRACE(<< "values = " << core::CContainerPrinter::print(this->values()));
@@ -473,7 +488,8 @@ class CSpline : public CSplineTypes {
                 b.push_back(2.0 * (h + h_));
                 c.push_back(h - 1.0);
                 this->curvaturesRef().push_back(
-                    6.0 * ((this->values()[1] - this->values()[0]) / h - (this->values()[0] - this->values()[n - 2]) / h_));
+                    6.0 * ((this->values()[1] - this->values()[0]) / h -
+                           (this->values()[0] - this->values()[n - 2]) / h_));
                 break;
             }
 
@@ -484,7 +500,8 @@ class CSpline : public CSplineTypes {
                 b.push_back(2.0 * (h + h_));
                 c.push_back(h);
                 this->curvaturesRef().push_back(
-                    6.0 * ((this->values()[i + 1] - this->values()[i]) / h - (this->values()[i] - this->values()[i - 1]) / h_));
+                    6.0 * ((this->values()[i + 1] - this->values()[i]) / h -
+                           (this->values()[i] - this->values()[i - 1]) / h_));
             }
 
             h_ = h;
@@ -521,8 +538,10 @@ class CSpline : public CSplineTypes {
                 v[1] = 1.0;
                 v[n - 2] = h_;
                 this->curvaturesRef().push_back(
-                    6.0 * ((this->values()[1] - this->values()[n - 1]) / h - (this->values()[n - 1] - this->values()[n - 2]) / h_));
-                if (!spline_detail::solvePeturbedTridiagonal(a, b, c, u, v, this->curvaturesRef())) {
+                    6.0 * ((this->values()[1] - this->values()[n - 1]) / h -
+                           (this->values()[n - 1] - this->values()[n - 2]) / h_));
+                if (!spline_detail::solvePeturbedTridiagonal(
+                        a, b, c, u, v, this->curvaturesRef())) {
                     LOG_ERROR(<< "Failed to calculate curvatures");
                     return false;
                 }
@@ -553,9 +572,12 @@ class CSpline : public CSplineTypes {
                 double h = this->knots()[i] - this->knots()[i - 1];
                 double c = (this->values()[i] - this->values()[i - 1]) / h;
                 double d = this->values()[i - 1];
-                std::string kl = core::CStringUtils::typeToStringPretty(this->knots()[i - 1]);
-                result += "\n" + indent + core::CStringUtils::typeToStringPretty(c) + " (x - " + kl + ") + " +
-                          core::CStringUtils::typeToStringPretty(d) + "   x in [" + kl + "," +
+                std::string kl =
+                    core::CStringUtils::typeToStringPretty(this->knots()[i - 1]);
+                result += "\n" + indent +
+                          core::CStringUtils::typeToStringPretty(c) + " (x - " +
+                          kl + ") + " + core::CStringUtils::typeToStringPretty(d) +
+                          "   x in [" + kl + "," +
                           core::CStringUtils::typeToStringPretty(this->knots()[i]) + ")";
             }
             break;
@@ -565,13 +587,17 @@ class CSpline : public CSplineTypes {
                 double h = this->knots()[i] - this->knots()[i - 1];
                 double a = (this->curvatures()[i] - this->curvatures()[i - 1]) / 6.0 / h;
                 double b = this->curvatures()[i - 1] / 2.0;
-                double c =
-                    (this->values()[i] - this->values()[i - 1]) / h - (this->curvatures()[i] / 6.0 + this->curvatures()[i - 1] / 3.0) * h;
+                double c = (this->values()[i] - this->values()[i - 1]) / h -
+                           (this->curvatures()[i] / 6.0 + this->curvatures()[i - 1] / 3.0) * h;
                 double d = this->values()[i - 1];
-                std::string kl = core::CStringUtils::typeToStringPretty(this->knots()[i - 1]);
-                result += "\n" + indent + core::CStringUtils::typeToStringPretty(a) + " (x - " + kl + ")^3 + " +
-                          core::CStringUtils::typeToStringPretty(b) + " (x - " + kl + ")^2 + " + core::CStringUtils::typeToStringPretty(c) +
-                          " (x - " + kl + ") + " + core::CStringUtils::typeToStringPretty(d) + "   x in [" + kl + "," +
+                std::string kl =
+                    core::CStringUtils::typeToStringPretty(this->knots()[i - 1]);
+                result += "\n" + indent + core::CStringUtils::typeToStringPretty(a) +
+                          " (x - " + kl + ")^3 + " +
+                          core::CStringUtils::typeToStringPretty(b) + " (x - " + kl +
+                          ")^2 + " + core::CStringUtils::typeToStringPretty(c) + " (x - " +
+                          kl + ") + " + core::CStringUtils::typeToStringPretty(d) +
+                          "   x in [" + kl + "," +
                           core::CStringUtils::typeToStringPretty(this->knots()[i]) + ")";
             }
             break;
@@ -603,13 +629,19 @@ class CSpline : public CSplineTypes {
     }
 
     //! Get the knot points of the spline.
-    inline const TNonConstKnots& knots() const { return boost::unwrap_ref(m_Knots); }
+    inline const TNonConstKnots& knots() const {
+        return boost::unwrap_ref(m_Knots);
+    }
 
     //! Get the values at the knot points of the spline.
-    inline const TNonConstValues& values() const { return boost::unwrap_ref(m_Values); }
+    inline const TNonConstValues& values() const {
+        return boost::unwrap_ref(m_Values);
+    }
 
     //! Get the curvatures at the knot points of the spline.
-    inline const TNonConstCurvatures& curvatures() const { return boost::unwrap_ref(m_Curvatures); }
+    inline const TNonConstCurvatures& curvatures() const {
+        return boost::unwrap_ref(m_Curvatures);
+    }
 
 private:
     //! Get the knot points of the spline.
@@ -619,7 +651,9 @@ class CSpline : public CSplineTypes {
     inline TNonConstValues& valuesRef() { return boost::unwrap_ref(m_Values); }
 
     //! Get the curvatures at the knot points of the spline.
-    inline TCurvatures& curvaturesRef() { return boost::unwrap_ref(m_Curvatures); }
+    inline TCurvatures& curvaturesRef() {
+        return boost::unwrap_ref(m_Curvatures);
+    }
 
 private:
     //! The type of spline.
diff --git a/include/maths/CTimeSeriesChangeDetector.h b/include/maths/CTimeSeriesChangeDetector.h
index ab4b5f70a0..fcb7ac4344 100644
--- a/include/maths/CTimeSeriesChangeDetector.h
+++ b/include/maths/CTimeSeriesChangeDetector.h
@@ -81,7 +81,8 @@ class MATHS_EXPORT CUnivariateTimeSeriesChangeDetector {
                                         double minimumDeltaBicToDetect = 14.0);
 
     //! Initialize by reading state from \p traverser.
-    bool acceptRestoreTraverser(const SModelRestoreParams& params, core::CStateRestoreTraverser& traverser);
+    bool acceptRestoreTraverser(const SModelRestoreParams& params,
+                                core::CStateRestoreTraverser& traverser);
 
     //! Persist state by passing information to \p inserter.
     void acceptPersistInserter(core::CStatePersistInserter& inserter) const;
@@ -98,7 +99,9 @@ class MATHS_EXPORT CUnivariateTimeSeriesChangeDetector {
     double decisionFunction(std::size_t& change) const;
 
     //! Add \p samples to the change detector.
-    void addSamples(const TWeightStyleVec& weightStyles, const TTimeDoublePr1Vec& samples, const TDouble4Vec1Vec& weights);
+    void addSamples(const TWeightStyleVec& weightStyles,
+                    const TTimeDoublePr1Vec& samples,
+                    const TDouble4Vec1Vec& weights);
 
     //! Check if we should stop testing.
     bool stopTesting() const;
@@ -163,7 +166,8 @@ class MATHS_EXPORT CUnivariateChangeModel : private core::CNonCopyable {
     virtual ~CUnivariateChangeModel() = default;
 
     //! Initialize by reading state from \p traverser.
-    virtual bool acceptRestoreTraverser(const SModelRestoreParams& params, core::CStateRestoreTraverser& traverser) = 0;
+    virtual bool acceptRestoreTraverser(const SModelRestoreParams& params,
+                                        core::CStateRestoreTraverser& traverser) = 0;
 
     //! Persist state by passing information to \p inserter.
     virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const = 0;
@@ -178,8 +182,10 @@ class MATHS_EXPORT CUnivariateChangeModel : private core::CNonCopyable {
     virtual TOptionalChangeDescription change() const = 0;
 
     //! Update the change model with \p samples.
-    virtual void
-    addSamples(const std::size_t count, TWeightStyleVec weightStyles, const TTimeDoublePr1Vec& samples, TDouble4Vec1Vec weights) = 0;
+    virtual void addSamples(const std::size_t count,
+                            TWeightStyleVec weightStyles,
+                            const TTimeDoublePr1Vec& samples,
+                            TDouble4Vec1Vec weights) = 0;
 
     //! Debug the memory used by this object.
     void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const;
@@ -195,7 +201,8 @@ class MATHS_EXPORT CUnivariateChangeModel : private core::CNonCopyable {
 
 protected:
     //! Restore the residual model reading state from \p traverser.
-    bool restoreResidualModel(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser);
+    bool restoreResidualModel(const SDistributionRestoreParams& params,
+                              core::CStateRestoreTraverser& traverser);
 
     //! Get the log-likelihood.
     double logLikelihood() const;
@@ -204,10 +211,13 @@ class MATHS_EXPORT CUnivariateChangeModel : private core::CNonCopyable {
     double expectedLogLikelihood() const;
 
     //! Update the log-likelihood with \p samples.
-    void updateLogLikelihood(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights);
+    void updateLogLikelihood(const TWeightStyleVec& weightStyles,
+                             const TDouble1Vec& samples,
+                             const TDouble4Vec1Vec& weights);
 
     //! Update the expected log-likelihoods.
-    void updateExpectedLogLikelihood(const TWeightStyleVec& weightStyles, const TDouble4Vec1Vec& weights);
+    void updateExpectedLogLikelihood(const TWeightStyleVec& weightStyles,
+                                     const TDouble4Vec1Vec& weights);
 
     //! Get the time series trend model.
     const CTimeSeriesDecompositionInterface& trendModel() const;
@@ -239,7 +249,8 @@ class MATHS_EXPORT CUnivariateNoChangeModel final : public CUnivariateChangeMode
     CUnivariateNoChangeModel(const TDecompositionPtr& trendModel, const TPriorPtr& residualModel);
 
     //! Initialize by reading state from \p traverser.
-    virtual bool acceptRestoreTraverser(const SModelRestoreParams& params, core::CStateRestoreTraverser& traverser);
+    virtual bool acceptRestoreTraverser(const SModelRestoreParams& params,
+                                        core::CStateRestoreTraverser& traverser);
 
     //! Persist state by passing information to \p inserter.
     virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const;
@@ -254,8 +265,10 @@ class MATHS_EXPORT CUnivariateNoChangeModel final : public CUnivariateChangeMode
     virtual TOptionalChangeDescription change() const;
 
     //! Get the log likelihood of \p samples.
-    virtual void
-    addSamples(const std::size_t count, TWeightStyleVec weightStyles, const TTimeDoublePr1Vec& samples, TDouble4Vec1Vec weights);
+    virtual void addSamples(const std::size_t count,
+                            TWeightStyleVec weightStyles,
+                            const TTimeDoublePr1Vec& samples,
+                            TDouble4Vec1Vec weights);
 
     //! Get the static size of this object.
     virtual std::size_t staticSize() const;
@@ -268,10 +281,12 @@ class MATHS_EXPORT CUnivariateNoChangeModel final : public CUnivariateChangeMode
 //! level shift.
 class MATHS_EXPORT CUnivariateLevelShiftModel final : public CUnivariateChangeModel {
 public:
-    CUnivariateLevelShiftModel(const TDecompositionPtr& trendModel, const TPriorPtr& residualModel);
+    CUnivariateLevelShiftModel(const TDecompositionPtr& trendModel,
+                               const TPriorPtr& residualModel);
 
     //! Initialize by reading state from \p traverser.
-    virtual bool acceptRestoreTraverser(const SModelRestoreParams& params, core::CStateRestoreTraverser& traverser);
+    virtual bool acceptRestoreTraverser(const SModelRestoreParams& params,
+                                        core::CStateRestoreTraverser& traverser);
 
     //! Persist state by passing information to \p inserter.
     virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const;
@@ -286,8 +301,10 @@ class MATHS_EXPORT CUnivariateLevelShiftModel final : public CUnivariateChangeMo
     virtual TOptionalChangeDescription change() const;
 
     //! Update with \p samples.
-    virtual void
-    addSamples(const std::size_t count, TWeightStyleVec weightStyles, const TTimeDoublePr1Vec& samples, TDouble4Vec1Vec weights);
+    virtual void addSamples(const std::size_t count,
+                            TWeightStyleVec weightStyles,
+                            const TTimeDoublePr1Vec& samples,
+                            TDouble4Vec1Vec weights);
 
     //! Get the static size of this object.
     virtual std::size_t staticSize() const;
@@ -313,10 +330,12 @@ class MATHS_EXPORT CUnivariateLevelShiftModel final : public CUnivariateChangeMo
 //! linear scaling.
 class MATHS_EXPORT CUnivariateLinearScaleModel final : public CUnivariateChangeModel {
 public:
-    CUnivariateLinearScaleModel(const TDecompositionPtr& trendModel, const TPriorPtr& residualModel);
+    CUnivariateLinearScaleModel(const TDecompositionPtr& trendModel,
+                                const TPriorPtr& residualModel);
 
     //! Initialize by reading state from \p traverser.
-    virtual bool acceptRestoreTraverser(const SModelRestoreParams& params, core::CStateRestoreTraverser& traverser);
+    virtual bool acceptRestoreTraverser(const SModelRestoreParams& params,
+                                        core::CStateRestoreTraverser& traverser);
 
     //! Persist state by passing information to \p inserter.
     virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const;
@@ -331,8 +350,10 @@ class MATHS_EXPORT CUnivariateLinearScaleModel final : public CUnivariateChangeM
     virtual TOptionalChangeDescription change() const;
 
     //! Update with \p samples.
-    virtual void
-    addSamples(const std::size_t count, TWeightStyleVec weightStyles, const TTimeDoublePr1Vec& samples, TDouble4Vec1Vec weights);
+    virtual void addSamples(const std::size_t count,
+                            TWeightStyleVec weightStyles,
+                            const TTimeDoublePr1Vec& samples,
+                            TDouble4Vec1Vec weights);
 
     //! Get the static size of this object.
     virtual std::size_t staticSize() const;
@@ -358,10 +379,13 @@ class MATHS_EXPORT CUnivariateLinearScaleModel final : public CUnivariateChangeM
 //! time shift.
 class MATHS_EXPORT CUnivariateTimeShiftModel final : public CUnivariateChangeModel {
 public:
-    CUnivariateTimeShiftModel(const TDecompositionPtr& trendModel, const TPriorPtr& residualModel, core_t::TTime shift);
+    CUnivariateTimeShiftModel(const TDecompositionPtr& trendModel,
+                              const TPriorPtr& residualModel,
+                              core_t::TTime shift);
 
     //! Initialize by reading state from \p traverser.
-    virtual bool acceptRestoreTraverser(const SModelRestoreParams& params, core::CStateRestoreTraverser& traverser);
+    virtual bool acceptRestoreTraverser(const SModelRestoreParams& params,
+                                        core::CStateRestoreTraverser& traverser);
 
     //! Persist state by passing information to \p inserter.
     virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const;
@@ -376,8 +400,10 @@ class MATHS_EXPORT CUnivariateTimeShiftModel final : public CUnivariateChangeMod
     virtual TOptionalChangeDescription change() const;
 
     //! Update with \p samples.
-    virtual void
-    addSamples(const std::size_t count, TWeightStyleVec weightStyles, const TTimeDoublePr1Vec& samples, TDouble4Vec1Vec weights);
+    virtual void addSamples(const std::size_t count,
+                            TWeightStyleVec weightStyles,
+                            const TTimeDoublePr1Vec& samples,
+                            TDouble4Vec1Vec weights);
 
     //! Get the static size of this object.
     virtual std::size_t staticSize() const;
diff --git a/include/maths/CTimeSeriesDecomposition.h b/include/maths/CTimeSeriesDecomposition.h
index 502aa280f1..932bdd8375 100644
--- a/include/maths/CTimeSeriesDecomposition.h
+++ b/include/maths/CTimeSeriesDecomposition.h
@@ -48,7 +48,8 @@ struct STimeSeriesDecompositionRestoreParams;
 //!
 //! By default this assumes the data has one day and one week
 //! periodicity, i.e. \f${ T_i } = { 86400, 604800 }\f$.
-class MATHS_EXPORT CTimeSeriesDecomposition : public CTimeSeriesDecompositionInterface, private CTimeSeriesDecompositionDetail {
+class MATHS_EXPORT CTimeSeriesDecomposition : public CTimeSeriesDecompositionInterface,
+                                              private CTimeSeriesDecompositionDetail {
 public:
     using TSizeVec = std::vector;
 
@@ -62,10 +63,12 @@ class MATHS_EXPORT CTimeSeriesDecomposition : public CTimeSeriesDecompositionInt
                                       std::size_t seasonalComponentSize = DECOMPOSITION_COMPONENT_SIZE);
 
     //! Construct from part of a state document.
-    CTimeSeriesDecomposition(const STimeSeriesDecompositionRestoreParams& params, core::CStateRestoreTraverser& traverser);
+    CTimeSeriesDecomposition(const STimeSeriesDecompositionRestoreParams& params,
+                             core::CStateRestoreTraverser& traverser);
 
     //! Deep copy.
-    CTimeSeriesDecomposition(const CTimeSeriesDecomposition& other, bool isForForecast = false);
+    CTimeSeriesDecomposition(const CTimeSeriesDecomposition& other,
+                             bool isForForecast = false);
 
     //! An efficient swap of the state of this and \p other.
     void swap(CTimeSeriesDecomposition& other);
@@ -129,7 +132,10 @@ class MATHS_EXPORT CTimeSeriesDecomposition : public CTimeSeriesDecompositionInt
     //! \param[in] confidence The symmetric confidence interval for the prediction
     //! the baseline as a percentage.
     //! \param[in] components The components to include in the baseline.
-    virtual maths_t::TDoubleDoublePr value(core_t::TTime time, double confidence = 0.0, int components = E_All, bool smooth = true) const;
+    virtual maths_t::TDoubleDoublePr value(core_t::TTime time,
+                                           double confidence = 0.0,
+                                           int components = E_All,
+                                           bool smooth = true) const;
 
     //! Forecast from \p start to \p end at \p dt intervals.
     //!
@@ -148,7 +154,8 @@ class MATHS_EXPORT CTimeSeriesDecomposition : public CTimeSeriesDecompositionInt
 
     //! Detrend \p value from the time series being modeled by removing
     //! any trend and periodic component at \p time.
-    virtual double detrend(core_t::TTime time, double value, double confidence, int components = E_All) const;
+    virtual double
+    detrend(core_t::TTime time, double value, double confidence, int components = E_All) const;
 
     //! Get the mean variance of the baseline.
     virtual double meanVariance() const;
@@ -160,7 +167,8 @@ class MATHS_EXPORT CTimeSeriesDecomposition : public CTimeSeriesDecompositionInt
     //! to scale.
     //! \param[in] confidence The symmetric confidence interval
     //! for the variance scale as a percentage.
-    virtual maths_t::TDoubleDoublePr scale(core_t::TTime time, double variance, double confidence, bool smooth = true) const;
+    virtual maths_t::TDoubleDoublePr
+    scale(core_t::TTime time, double variance, double confidence, bool smooth = true) const;
 
     //! Roll time forwards by \p skipInterval.
     virtual void skipTime(core_t::TTime skipInterval);
@@ -195,7 +203,8 @@ class MATHS_EXPORT CTimeSeriesDecomposition : public CTimeSeriesDecompositionInt
     void initializeMediator();
 
     //! Create from part of a state document.
-    bool acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser);
+    bool acceptRestoreTraverser(const SDistributionRestoreParams& params,
+                                core::CStateRestoreTraverser& traverser);
 
     //! The correction to produce a smooth join between periodic
     //! repeats and partitions.
diff --git a/include/maths/CTimeSeriesDecompositionDetail.h b/include/maths/CTimeSeriesDecompositionDetail.h
index cca21467c1..d3684df55b 100644
--- a/include/maths/CTimeSeriesDecompositionDetail.h
+++ b/include/maths/CTimeSeriesDecompositionDetail.h
@@ -109,7 +109,11 @@ class MATHS_EXPORT CTimeSeriesDecompositionDetail {
     //! \brief The message passed to indicate new components are being
     //! modeled.
     struct MATHS_EXPORT SNewComponents : public SMessage {
-        enum EComponent { E_DiurnalSeasonal, E_GeneralSeasonal, E_CalendarCyclic };
+        enum EComponent {
+            E_DiurnalSeasonal,
+            E_GeneralSeasonal,
+            E_CalendarCyclic
+        };
 
         SNewComponents(core_t::TTime time, core_t::TTime lastTime, EComponent component);
 
@@ -346,7 +350,8 @@ class MATHS_EXPORT CTimeSeriesDecompositionDetail {
         };
 
         //! Initialize by reading state from \p traverser.
-        bool acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser);
+        bool acceptRestoreTraverser(const SDistributionRestoreParams& params,
+                                    core::CStateRestoreTraverser& traverser);
 
         //! Persist state by passing information to \p inserter.
         void acceptPersistInserter(core::CStatePersistInserter& inserter) const;
@@ -488,7 +493,9 @@ class MATHS_EXPORT CTimeSeriesDecompositionDetail {
         //! \brief The seasonal components of the decomposition.
         struct MATHS_EXPORT SSeasonal {
             //! Initialize by reading state from \p traverser.
-            bool acceptRestoreTraverser(double decayRate, core_t::TTime bucketLength, core::CStateRestoreTraverser& traverser);
+            bool acceptRestoreTraverser(double decayRate,
+                                        core_t::TTime bucketLength,
+                                        core::CStateRestoreTraverser& traverser);
 
             //! Persist state by passing information to \p inserter.
             void acceptPersistInserter(core::CStatePersistInserter& inserter) const;
@@ -548,7 +555,9 @@ class MATHS_EXPORT CTimeSeriesDecompositionDetail {
         //! \brief Calendar periodic components of the decomposition.
         struct MATHS_EXPORT SCalendar {
             //! Initialize by reading state from \p traverser.
-            bool acceptRestoreTraverser(double decayRate, core_t::TTime bucketLength, core::CStateRestoreTraverser& traverser);
+            bool acceptRestoreTraverser(double decayRate,
+                                        core_t::TTime bucketLength,
+                                        core::CStateRestoreTraverser& traverser);
 
             //! Persist state by passing information to \p inserter.
             void acceptPersistInserter(core::CStatePersistInserter& inserter) const;
@@ -567,7 +576,9 @@ class MATHS_EXPORT CTimeSeriesDecompositionDetail {
             bool haveComponent(CCalendarFeature feature) const;
 
             //! Get the state to update.
-            void componentsAndErrors(core_t::TTime time, TCalendarComponentPtrVec& components, TComponentErrorsPtrVec& errors);
+            void componentsAndErrors(core_t::TTime time,
+                                     TCalendarComponentPtrVec& components,
+                                     TComponentErrorsPtrVec& errors);
 
             //! Check if we need to interpolate any of the components.
             bool shouldInterpolate(core_t::TTime time, core_t::TTime last) const;
@@ -689,17 +700,20 @@ class MATHS_EXPORT CTimeSeriesDecompositionDetail {
 };
 
 //! Create a free function which will be found by Koenig lookup.
-inline void swap(CTimeSeriesDecompositionDetail::CPeriodicityTest& lhs, CTimeSeriesDecompositionDetail::CPeriodicityTest& rhs) {
+inline void swap(CTimeSeriesDecompositionDetail::CPeriodicityTest& lhs,
+                 CTimeSeriesDecompositionDetail::CPeriodicityTest& rhs) {
     lhs.swap(rhs);
 }
 
 //! Create a free function which will be found by Koenig lookup.
-inline void swap(CTimeSeriesDecompositionDetail::CCalendarTest& lhs, CTimeSeriesDecompositionDetail::CCalendarTest& rhs) {
+inline void swap(CTimeSeriesDecompositionDetail::CCalendarTest& lhs,
+                 CTimeSeriesDecompositionDetail::CCalendarTest& rhs) {
     lhs.swap(rhs);
 }
 
 //! Create a free function which will be found by Koenig lookup.
-inline void swap(CTimeSeriesDecompositionDetail::CComponents& lhs, CTimeSeriesDecompositionDetail::CComponents& rhs) {
+inline void swap(CTimeSeriesDecompositionDetail::CComponents& lhs,
+                 CTimeSeriesDecompositionDetail::CComponents& rhs) {
     lhs.swap(rhs);
 }
 }
diff --git a/include/maths/CTimeSeriesDecompositionInterface.h b/include/maths/CTimeSeriesDecompositionInterface.h
index 314f6916b5..6257830f0a 100644
--- a/include/maths/CTimeSeriesDecompositionInterface.h
+++ b/include/maths/CTimeSeriesDecompositionInterface.h
@@ -95,7 +95,8 @@ class MATHS_EXPORT CTimeSeriesDecompositionInterface {
     //! point.
     //! \param[in] change A description of the change to apply.
     //! \return True if a new component was detected.
-    virtual bool applyChange(core_t::TTime time, double value, const SChangeDescription& change) = 0;
+    virtual bool
+    applyChange(core_t::TTime time, double value, const SChangeDescription& change) = 0;
 
     //! Propagate the decomposition forwards to \p time.
     virtual void propagateForwardsTo(core_t::TTime time) = 0;
@@ -109,8 +110,10 @@ class MATHS_EXPORT CTimeSeriesDecompositionInterface {
     //! \param[in] confidence The symmetric confidence interval for the prediction
     //! the baseline as a percentage.
     //! \param[in] components The components to include in the baseline.
-    virtual maths_t::TDoubleDoublePr
-    value(core_t::TTime time, double confidence = 0.0, int components = E_All, bool smooth = true) const = 0;
+    virtual maths_t::TDoubleDoublePr value(core_t::TTime time,
+                                           double confidence = 0.0,
+                                           int components = E_All,
+                                           bool smooth = true) const = 0;
 
     //! Forecast from \p start to \p end at \p dt intervals.
     //!
@@ -131,7 +134,10 @@ class MATHS_EXPORT CTimeSeriesDecompositionInterface {
     //! any periodic component at \p time.
     //!
     //! \note That detrending preserves the time series mean.
-    virtual double detrend(core_t::TTime time, double value, double confidence, int components = E_All) const = 0;
+    virtual double detrend(core_t::TTime time,
+                           double value,
+                           double confidence,
+                           int components = E_All) const = 0;
 
     //! Get the mean variance of the baseline.
     virtual double meanVariance() const = 0;
@@ -142,7 +148,8 @@ class MATHS_EXPORT CTimeSeriesDecompositionInterface {
     //! \param[in] variance The variance of the distribution to scale.
     //! \param[in] confidence The symmetric confidence interval for the
     //! variance scale as a percentage.
-    virtual maths_t::TDoubleDoublePr scale(core_t::TTime time, double variance, double confidence, bool smooth = true) const = 0;
+    virtual maths_t::TDoubleDoublePr
+    scale(core_t::TTime time, double variance, double confidence, bool smooth = true) const = 0;
 
     //! Roll time forwards by \p skipInterval.
     virtual void skipTime(core_t::TTime skipInterval) = 0;
diff --git a/include/maths/CTimeSeriesDecompositionStateSerialiser.h b/include/maths/CTimeSeriesDecompositionStateSerialiser.h
index f4eb2fe978..e772e53e40 100644
--- a/include/maths/CTimeSeriesDecompositionStateSerialiser.h
+++ b/include/maths/CTimeSeriesDecompositionStateSerialiser.h
@@ -52,7 +52,8 @@ class MATHS_EXPORT CTimeSeriesDecompositionStateSerialiser {
                     core::CStateRestoreTraverser& traverser) const;
 
     //! Persist state by passing information to the supplied inserter.
-    void operator()(const CTimeSeriesDecompositionInterface& decomposition, core::CStatePersistInserter& inserter) const;
+    void operator()(const CTimeSeriesDecompositionInterface& decomposition,
+                    core::CStatePersistInserter& inserter) const;
 };
 }
 }
diff --git a/include/maths/CTimeSeriesDecompositionStub.h b/include/maths/CTimeSeriesDecompositionStub.h
index 4d7351cd56..374d5ca1a3 100644
--- a/include/maths/CTimeSeriesDecompositionStub.h
+++ b/include/maths/CTimeSeriesDecompositionStub.h
@@ -53,7 +53,10 @@ class MATHS_EXPORT CTimeSeriesDecompositionStub : public CTimeSeriesDecompositio
     virtual double meanValue(core_t::TTime time) const;
 
     //! Returns (0.0, 0.0).
-    virtual maths_t::TDoubleDoublePr value(core_t::TTime time, double confidence = 0.0, int components = E_All, bool smooth = true) const;
+    virtual maths_t::TDoubleDoublePr value(core_t::TTime time,
+                                           double confidence = 0.0,
+                                           int components = E_All,
+                                           bool smooth = true) const;
 
     //! No-op.
     virtual void forecast(core_t::TTime startTime,
@@ -64,13 +67,15 @@ class MATHS_EXPORT CTimeSeriesDecompositionStub : public CTimeSeriesDecompositio
                           const TWriteForecastResult& writer);
 
     //! Returns \p value.
-    virtual double detrend(core_t::TTime time, double value, double confidence, int components = E_All) const;
+    virtual double
+    detrend(core_t::TTime time, double value, double confidence, int components = E_All) const;
 
     //! Returns 0.0.
     virtual double meanVariance() const;
 
     //! Returns (1.0, 1.0).
-    virtual maths_t::TDoubleDoublePr scale(core_t::TTime time, double variance, double confidence, bool smooth = true) const;
+    virtual maths_t::TDoubleDoublePr
+    scale(core_t::TTime time, double variance, double confidence, bool smooth = true) const;
 
     //! No-op.
     virtual void skipTime(core_t::TTime skipInterval);
diff --git a/include/maths/CTimeSeriesModel.h b/include/maths/CTimeSeriesModel.h
index 1bd5d9fdfe..74e33fdf17 100644
--- a/include/maths/CTimeSeriesModel.h
+++ b/include/maths/CTimeSeriesModel.h
@@ -68,7 +68,8 @@ class MATHS_EXPORT CUnivariateTimeSeriesModel : public CModel {
                                const CPrior& residualModel,
                                const TDecayRateController2Ary* controllers = nullptr,
                                bool modelAnomalies = true);
-    CUnivariateTimeSeriesModel(const SModelRestoreParams& params, core::CStateRestoreTraverser& traverser);
+    CUnivariateTimeSeriesModel(const SModelRestoreParams& params,
+                               core::CStateRestoreTraverser& traverser);
     ~CUnivariateTimeSeriesModel();
 
     //! Get the model identifier.
@@ -97,28 +98,36 @@ class MATHS_EXPORT CUnivariateTimeSeriesModel : public CModel {
     virtual void addBucketValue(const TTimeDouble2VecSizeTrVec& value);
 
     //! Update the model with new samples.
-    virtual EUpdateResult addSamples(const CModelAddSamplesParams& params, TTimeDouble2VecSizeTrVec samples);
+    virtual EUpdateResult addSamples(const CModelAddSamplesParams& params,
+                                     TTimeDouble2VecSizeTrVec samples);
 
     //! Advance time by \p gap.
     virtual void skipTime(core_t::TTime gap);
 
     //! Get the most likely value for the time series at \p time.
-    virtual TDouble2Vec mode(core_t::TTime time, const maths_t::TWeightStyleVec& weightStyles, const TDouble2Vec4Vec& weights) const;
+    virtual TDouble2Vec mode(core_t::TTime time,
+                             const maths_t::TWeightStyleVec& weightStyles,
+                             const TDouble2Vec4Vec& weights) const;
 
     //! Get the most likely value for each correlate time series
     //! at \p time, if there are any.
-    virtual TDouble2Vec1Vec
-    correlateModes(core_t::TTime time, const maths_t::TWeightStyleVec& weightStyles, const TDouble2Vec4Vec1Vec& weights) const;
+    virtual TDouble2Vec1Vec correlateModes(core_t::TTime time,
+                                           const maths_t::TWeightStyleVec& weightStyles,
+                                           const TDouble2Vec4Vec1Vec& weights) const;
 
     //! Get the local maxima of the residual distribution.
-    virtual TDouble2Vec1Vec residualModes(const maths_t::TWeightStyleVec& weightStyles, const TDouble2Vec4Vec& weights) const;
+    virtual TDouble2Vec1Vec residualModes(const maths_t::TWeightStyleVec& weightStyles,
+                                          const TDouble2Vec4Vec& weights) const;
 
     //! Remove any trend components from \p value.
-    virtual void detrend(const TTime2Vec1Vec& time, double confidenceInterval, TDouble2Vec1Vec& value) const;
+    virtual void detrend(const TTime2Vec1Vec& time,
+                         double confidenceInterval,
+                         TDouble2Vec1Vec& value) const;
 
     //! Get the best (least MSE) predicted value at \p time.
-    virtual TDouble2Vec
-    predict(core_t::TTime time, const TSizeDoublePr1Vec& correlated = TSizeDoublePr1Vec(), TDouble2Vec hint = TDouble2Vec()) const;
+    virtual TDouble2Vec predict(core_t::TTime time,
+                                const TSizeDoublePr1Vec& correlated = TSizeDoublePr1Vec(),
+                                TDouble2Vec hint = TDouble2Vec()) const;
 
     //! Get the prediction and \p confidenceInterval percentage
     //! confidence interval for the time series at \p time.
@@ -148,7 +157,8 @@ class MATHS_EXPORT CUnivariateTimeSeriesModel : public CModel {
                              TSize1Vec& mostAnomalousCorrelate) const;
 
     //! Get the Winsorisation weight to apply to \p value.
-    virtual TDouble2Vec winsorisationWeight(double derate, core_t::TTime time, const TDouble2Vec& value) const;
+    virtual TDouble2Vec
+    winsorisationWeight(double derate, core_t::TTime time, const TDouble2Vec& value) const;
 
     //! Get the seasonal variance scale at \p time.
     virtual TDouble2Vec seasonalWeight(double confidence, core_t::TTime time) const;
@@ -163,7 +173,8 @@ class MATHS_EXPORT CUnivariateTimeSeriesModel : public CModel {
     virtual std::size_t memoryUsage() const;
 
     //! Initialize reading state from \p traverser.
-    bool acceptRestoreTraverser(const SModelRestoreParams& params, core::CStateRestoreTraverser& traverser);
+    bool acceptRestoreTraverser(const SModelRestoreParams& params,
+                                core::CStateRestoreTraverser& traverser);
 
     //! Persist by passing information to \p inserter.
     virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const;
@@ -207,15 +218,20 @@ class MATHS_EXPORT CUnivariateTimeSeriesModel : public CModel {
     using TPriorPtr = boost::shared_ptr;
     using TAnomalyModelPtr = boost::shared_ptr;
     using TMultivariatePriorCPtrSizePr = std::pair;
-    using TMultivariatePriorCPtrSizePr1Vec = core::CSmallVector;
+    using TMultivariatePriorCPtrSizePr1Vec =
+        core::CSmallVector;
     using TModelCPtr1Vec = core::CSmallVector;
     using TChangeDetectorPtr = boost::shared_ptr;
 
 private:
-    CUnivariateTimeSeriesModel(const CUnivariateTimeSeriesModel& other, std::size_t id, bool isForForecast = false);
+    CUnivariateTimeSeriesModel(const CUnivariateTimeSeriesModel& other,
+                               std::size_t id,
+                               bool isForForecast = false);
 
     //! Test for and apply any change we find.
-    EUpdateResult testAndApplyChange(const CModelAddSamplesParams& params, const TSizeVec& order, const TTimeDouble2VecSizeTrVec& samples);
+    EUpdateResult testAndApplyChange(const CModelAddSamplesParams& params,
+                                     const TSizeVec& order,
+                                     const TTimeDouble2VecSizeTrVec& samples);
 
     //! Apply \p change to this model.
     EUpdateResult applyChange(const SChangeDescription& change);
@@ -337,9 +353,11 @@ class MATHS_EXPORT CTimeSeriesCorrelations {
     using TSizeSizePr = std::pair;
     using TMultivariatePriorPtr = boost::shared_ptr;
     using TMultivariatePriorPtrDoublePr = std::pair;
-    using TSizeSizePrMultivariatePriorPtrDoublePrUMap = boost::unordered_map;
+    using TSizeSizePrMultivariatePriorPtrDoublePrUMap =
+        boost::unordered_map;
     using TMultivariatePriorCPtrSizePr = std::pair;
-    using TMultivariatePriorCPtrSizePr1Vec = core::CSmallVector;
+    using TMultivariatePriorCPtrSizePr1Vec =
+        core::CSmallVector;
 
     //! \brief Wraps up the sampled data for a feature.
     struct MATHS_EXPORT SSampleData {
@@ -395,7 +413,8 @@ class MATHS_EXPORT CTimeSeriesCorrelations {
     std::size_t memoryUsage() const;
 
     //! Initialize reading state from \p traverser.
-    bool acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser);
+    bool acceptRestoreTraverser(const SDistributionRestoreParams& params,
+                                core::CStateRestoreTraverser& traverser);
 
     //! Persist by passing information to \p inserter.
     void acceptPersistInserter(core::CStatePersistInserter& inserter) const;
@@ -405,14 +424,17 @@ class MATHS_EXPORT CTimeSeriesCorrelations {
     using TTimeDouble2VecSizeTrVec = std::vector;
     using TModelCPtrVec = std::vector;
     using TModelCPtr1Vec = core::CSmallVector;
-    using TSizeSizePrMultivariatePriorPtrDoublePrPr = std::pair;
+    using TSizeSizePrMultivariatePriorPtrDoublePrPr =
+        std::pair;
 
 private:
-    CTimeSeriesCorrelations(const CTimeSeriesCorrelations& other, bool isForPersistence = false);
+    CTimeSeriesCorrelations(const CTimeSeriesCorrelations& other,
+                            bool isForPersistence = false);
 
     //! Restore the correlation distribution models reading state from
     //! \p traverser.
-    bool restoreCorrelationModels(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser);
+    bool restoreCorrelationModels(const SDistributionRestoreParams& params,
+                                  core::CStateRestoreTraverser& traverser);
 
     //! Persist the correlation distribution models passing information
     //! to \p inserter.
@@ -424,7 +446,8 @@ class MATHS_EXPORT CTimeSeriesCorrelations {
                         core::CStateRestoreTraverser& traverser);
 
     //! Persist the \p model passing information to \p inserter.
-    static void persist(const TSizeSizePrMultivariatePriorPtrDoublePrPr& model, core::CStatePersistInserter& inserter);
+    static void persist(const TSizeSizePrMultivariatePriorPtrDoublePrPr& model,
+                        core::CStatePersistInserter& inserter);
 
     //! Add the time series identified by \p id.
     void addTimeSeries(std::size_t id, const CUnivariateTimeSeriesModel& model);
@@ -433,7 +456,10 @@ class MATHS_EXPORT CTimeSeriesCorrelations {
     void removeTimeSeries(std::size_t id);
 
     //! Add a sample for the time series identified by \p id.
-    void addSamples(std::size_t id, const CModelAddSamplesParams& params, const TTimeDouble2VecSizeTrVec& samples, double multiplier);
+    void addSamples(std::size_t id,
+                    const CModelAddSamplesParams& params,
+                    const TTimeDouble2VecSizeTrVec& samples,
+                    double multiplier);
 
     //! Get the ids of the time series correlated with \p id.
     TSize1Vec correlated(std::size_t id) const;
@@ -500,7 +526,8 @@ class MATHS_EXPORT CMultivariateTimeSeriesModel : public CModel {
                                  const TDecayRateController2Ary* controllers = nullptr,
                                  bool modelAnomalies = true);
     CMultivariateTimeSeriesModel(const CMultivariateTimeSeriesModel& other);
-    CMultivariateTimeSeriesModel(const SModelRestoreParams& params, core::CStateRestoreTraverser& traverser);
+    CMultivariateTimeSeriesModel(const SModelRestoreParams& params,
+                                 core::CStateRestoreTraverser& traverser);
 
     //! Returns 0 since these models don't need a unique identifier.
     virtual std::size_t identifier() const;
@@ -528,27 +555,35 @@ class MATHS_EXPORT CMultivariateTimeSeriesModel : public CModel {
     virtual void addBucketValue(const TTimeDouble2VecSizeTrVec& value);
 
     //! Update the model with new samples.
-    virtual EUpdateResult addSamples(const CModelAddSamplesParams& params, TTimeDouble2VecSizeTrVec samples);
+    virtual EUpdateResult addSamples(const CModelAddSamplesParams& params,
+                                     TTimeDouble2VecSizeTrVec samples);
 
     //! Advance time by \p gap.
     virtual void skipTime(core_t::TTime gap);
 
     //! Get the most likely value for the time series at \p time.
-    virtual TDouble2Vec mode(core_t::TTime time, const maths_t::TWeightStyleVec& weightStyles, const TDouble2Vec4Vec& weights) const;
+    virtual TDouble2Vec mode(core_t::TTime time,
+                             const maths_t::TWeightStyleVec& weightStyles,
+                             const TDouble2Vec4Vec& weights) const;
 
     //! Returns empty.
-    virtual TDouble2Vec1Vec
-    correlateModes(core_t::TTime time, const maths_t::TWeightStyleVec& weightStyles, const TDouble2Vec4Vec1Vec& weights) const;
+    virtual TDouble2Vec1Vec correlateModes(core_t::TTime time,
+                                           const maths_t::TWeightStyleVec& weightStyles,
+                                           const TDouble2Vec4Vec1Vec& weights) const;
 
     //! Get the local maxima of the residual distribution.
-    virtual TDouble2Vec1Vec residualModes(const maths_t::TWeightStyleVec& weightStyles, const TDouble2Vec4Vec& weights) const;
+    virtual TDouble2Vec1Vec residualModes(const maths_t::TWeightStyleVec& weightStyles,
+                                          const TDouble2Vec4Vec& weights) const;
 
     //! Remove any trend components from \p value.
-    virtual void detrend(const TTime2Vec1Vec& time, double confidenceInterval, TDouble2Vec1Vec& value) const;
+    virtual void detrend(const TTime2Vec1Vec& time,
+                         double confidenceInterval,
+                         TDouble2Vec1Vec& value) const;
 
     //! Get the best (least MSE) predicted value at \p time.
-    virtual TDouble2Vec
-    predict(core_t::TTime time, const TSizeDoublePr1Vec& correlated = TSizeDoublePr1Vec(), TDouble2Vec hint = TDouble2Vec()) const;
+    virtual TDouble2Vec predict(core_t::TTime time,
+                                const TSizeDoublePr1Vec& correlated = TSizeDoublePr1Vec(),
+                                TDouble2Vec hint = TDouble2Vec()) const;
 
     //! Get the prediction and \p confidenceInterval percentage
     //! confidence interval for the time series at \p time.
@@ -576,7 +611,8 @@ class MATHS_EXPORT CMultivariateTimeSeriesModel : public CModel {
                              TSize1Vec& mostAnomalousCorrelate) const;
 
     //! Get the Winsorisation weight to apply to \p value.
-    virtual TDouble2Vec winsorisationWeight(double derate, core_t::TTime time, const TDouble2Vec& value) const;
+    virtual TDouble2Vec
+    winsorisationWeight(double derate, core_t::TTime time, const TDouble2Vec& value) const;
 
     //! Get the seasonal variance scale at \p time.
     virtual TDouble2Vec seasonalWeight(double confidence, core_t::TTime time) const;
@@ -591,7 +627,8 @@ class MATHS_EXPORT CMultivariateTimeSeriesModel : public CModel {
     virtual std::size_t memoryUsage() const;
 
     //! Initialize reading state from \p traverser.
-    bool acceptRestoreTraverser(const SModelRestoreParams& params, core::CStateRestoreTraverser& traverser);
+    bool acceptRestoreTraverser(const SModelRestoreParams& params,
+                                core::CStateRestoreTraverser& traverser);
 
     //! Persist by passing information to \p inserter.
     virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const;
@@ -641,7 +678,9 @@ class MATHS_EXPORT CMultivariateTimeSeriesModel : public CModel {
                               const TDouble2Vec4VecVec& trendWeights);
 
     //! Compute the prediction errors for \p sample.
-    void appendPredictionErrors(double interval, const TDouble2Vec& sample, TDouble1VecVec (&result)[2]);
+    void appendPredictionErrors(double interval,
+                                const TDouble2Vec& sample,
+                                TDouble1VecVec (&result)[2]);
 
     //! Reinitialize state after detecting a new component of the trend
     //! decomposition.
diff --git a/include/maths/CTools.h b/include/maths/CTools.h
index ce6d6be505..d70be5636c 100644
--- a/include/maths/CTools.h
+++ b/include/maths/CTools.h
@@ -119,7 +119,9 @@ class MATHS_EXPORT CTools : private core::CNonInstantiatable {
         double operator()(const SImproperDistribution&, double, maths_t::ETail& tail) const;
         double operator()(const normal& normal_, double x, maths_t::ETail& tail) const;
         double operator()(const students_t& students, double x, maths_t::ETail& tail) const;
-        double operator()(const negative_binomial& negativeBinomial, double x, maths_t::ETail& tail) const;
+        double operator()(const negative_binomial& negativeBinomial,
+                          double x,
+                          maths_t::ETail& tail) const;
         double operator()(const lognormal& logNormal, double x, maths_t::ETail& tail) const;
         double operator()(const CLogTDistribution& logt, double x, maths_t::ETail& tail) const;
         double operator()(const gamma& gamma_, double x, maths_t::ETail& tail) const;
@@ -382,7 +384,8 @@ class MATHS_EXPORT CTools : private core::CNonInstantiatable {
     template
     class CDifferentialEntropyKernel {
     public:
-        CDifferentialEntropyKernel(const CMixtureDistribution& mixture) : m_Mixture(&mixture) {}
+        CDifferentialEntropyKernel(const CMixtureDistribution& mixture)
+            : m_Mixture(&mixture) {}
 
         inline bool operator()(double x, double& result) const {
             double fx = pdf(*m_Mixture, x);
@@ -454,7 +457,8 @@ class MATHS_EXPORT CTools : private core::CNonInstantiatable {
             x.s_Sign = 0;
             x.s_Mantissa = (dx / 2) & core::CIEEE754::IEEE754_MANTISSA_MASK;
             x.s_Exponent = 1022;
-            for (std::size_t i = 0u; i < BINS; ++i, x.s_Mantissa = (x.s_Mantissa + dx) & core::CIEEE754::IEEE754_MANTISSA_MASK) {
+            for (std::size_t i = 0u; i < BINS;
+                 ++i, x.s_Mantissa = (x.s_Mantissa + dx) & core::CIEEE754::IEEE754_MANTISSA_MASK) {
                 double value;
                 static_assert(sizeof(double) == sizeof(core::CIEEE754::SDoubleRep),
                               "SDoubleRep definition unsuitable for memcpy to double");
@@ -466,7 +470,9 @@ class MATHS_EXPORT CTools : private core::CNonInstantiatable {
         }
 
         //! Lookup log2 for a given mantissa.
-        const double& operator[](uint64_t mantissa) const { return m_Table[mantissa >> FAST_LOG_SHIFT]; }
+        const double& operator[](uint64_t mantissa) const {
+            return m_Table[mantissa >> FAST_LOG_SHIFT];
+        }
 
     private:
         //! The quantized log base 2 for the mantissa range.
@@ -504,12 +510,14 @@ class MATHS_EXPORT CTools : private core::CNonInstantiatable {
     }
     //! Get a writable location of the point \p x.
     template
-    static double location(const typename CBasicStatistics::SSampleMean::TAccumulator& x) {
+    static double
+    location(const typename CBasicStatistics::SSampleMean::TAccumulator& x) {
         return CBasicStatistics::mean(x);
     }
     //! Set the mean of \p x to \p y.
     template
-    static void setLocation(typename CBasicStatistics::SSampleMean::TAccumulator& x, double y) {
+    static void
+    setLocation(typename CBasicStatistics::SSampleMean::TAccumulator& x, double y) {
         x.s_Moments[0] = static_cast(y);
     }
 
@@ -522,7 +530,8 @@ class MATHS_EXPORT CTools : private core::CNonInstantiatable {
     public:
         //! Create a new points group.
         template
-        CGroup(std::size_t index, const T& points) : m_A(index), m_B(index), m_Centre() {
+        CGroup(std::size_t index, const T& points)
+            : m_A(index), m_B(index), m_Centre() {
             m_Centre.add(location(points[index]));
         }
 
@@ -614,7 +623,9 @@ class MATHS_EXPORT CTools : private core::CNonInstantiatable {
 
     //! Component-wise truncation of stack vectors.
     template
-    static CVectorNx1 truncate(const CVectorNx1& x, const CVectorNx1& a, const CVectorNx1& b) {
+    static CVectorNx1 truncate(const CVectorNx1& x,
+                                     const CVectorNx1& a,
+                                     const CVectorNx1& b) {
         CVectorNx1 result(x);
         for (std::size_t i = 0u; i < N; ++i) {
             result(i) = truncate(result(i), a(i), b(i));
@@ -624,7 +635,8 @@ class MATHS_EXPORT CTools : private core::CNonInstantiatable {
 
     //! Component-wise truncation of heap vectors.
     template
-    static CVector truncate(const CVector& x, const CVector& a, const CVector& b) {
+    static CVector
+    truncate(const CVector& x, const CVector& a, const CVector& b) {
         CVector result(x);
         for (std::size_t i = 0u; i < result.dimension(); ++i) {
             result(i) = truncate(result(i), a(i), b(i));
@@ -634,8 +646,9 @@ class MATHS_EXPORT CTools : private core::CNonInstantiatable {
 
     //! Component-wise truncation of small vector.
     template
-    static core::CSmallVector
-    truncate(const core::CSmallVector& x, const core::CSmallVector& a, const core::CSmallVector& b) {
+    static core::CSmallVector truncate(const core::CSmallVector& x,
+                                             const core::CSmallVector& a,
+                                             const core::CSmallVector& b) {
         core::CSmallVector result(x);
         for (std::size_t i = 0u; i < result.size(); ++i) {
             result[i] = truncate(result[i], a[i], b[i]);
diff --git a/include/maths/CToolsDetail.h b/include/maths/CToolsDetail.h
index 6680eb33af..1224ee8a89 100644
--- a/include/maths/CToolsDetail.h
+++ b/include/maths/CToolsDetail.h
@@ -25,8 +25,11 @@ namespace ml {
 namespace maths {
 
 template
-CTools::CMixtureProbabilityOfLessLikelySample::CSmoothedKernel::CSmoothedKernel(LOGF logf, double logF0, double k)
-    : m_LogF(logf), m_LogF0(logF0), m_K(k), m_Scale(std::exp(m_LogF0) * (1.0 + std::exp(-k))) {
+CTools::CMixtureProbabilityOfLessLikelySample::CSmoothedKernel::CSmoothedKernel(LOGF logf,
+                                                                                      double logF0,
+                                                                                      double k)
+    : m_LogF(logf), m_LogF0(logF0), m_K(k),
+      m_Scale(std::exp(m_LogF0) * (1.0 + std::exp(-k))) {
 }
 
 template
@@ -37,7 +40,8 @@ void CTools::CMixtureProbabilityOfLessLikelySample::CSmoothedKernel::k(dou
 }
 
 template
-bool CTools::CMixtureProbabilityOfLessLikelySample::CSmoothedKernel::operator()(double x, double& result) const {
+bool CTools::CMixtureProbabilityOfLessLikelySample::CSmoothedKernel::
+operator()(double x, double& result) const {
     // We use the fact that if:
     //   1 + exp(-k(f(x)/f0 - 1)) < (1 + eps) * exp(-k(f(x)/f0 - 1))
     //
@@ -104,8 +108,9 @@ bool CTools::CMixtureProbabilityOfLessLikelySample::leftTail(const LOGF& logf,
         n = iterations - n;
         CSolvers::solve(xl, xr, fl, fr, f, n, equal, result);
     } catch (const std::exception& e) {
-        LOG_ERROR(<< "Failed to find left root: " << e.what() << ", a = " << m_A << ", logf(x) = " << m_LogFx << ", logf(a) = " << logf(m_A)
-                  << ", max deviation = " << (m_MaxDeviation.count() > 0 ? m_MaxDeviation[0] : 0.0));
+        LOG_ERROR(<< "Failed to find left root: " << e.what() << ", a = " << m_A
+                  << ", logf(x) = " << m_LogFx << ", logf(a) = " << logf(m_A) << ", max deviation = "
+                  << (m_MaxDeviation.count() > 0 ? m_MaxDeviation[0] : 0.0));
         return false;
     }
     return true;
@@ -146,15 +151,16 @@ bool CTools::CMixtureProbabilityOfLessLikelySample::rightTail(const LOGF& logf,
         n = iterations - n;
         CSolvers::solve(xl, xr, fl, fr, f, n, equal, result);
     } catch (const std::exception& e) {
-        LOG_ERROR(<< "Failed to find right root: " << e.what() << ",b = " << m_B << ", logf(x) = " << m_LogFx
-                  << ", logf(b) = " << logf(m_B));
+        LOG_ERROR(<< "Failed to find right root: " << e.what() << ",b = " << m_B
+                  << ", logf(x) = " << m_LogFx << ", logf(b) = " << logf(m_B));
         return false;
     }
     return true;
 }
 
 template
-double CTools::CMixtureProbabilityOfLessLikelySample::calculate(const LOGF& logf, double pTails) {
+double CTools::CMixtureProbabilityOfLessLikelySample::calculate(const LOGF& logf,
+                                                                double pTails) {
     TDoubleDoublePrVec intervals;
     this->intervals(intervals);
 
@@ -162,17 +168,17 @@ double CTools::CMixtureProbabilityOfLessLikelySample::calculate(const LOGF& logf
     TDoubleVec pIntervals(intervals.size(), 0.0);
     CSmoothedKernel kernel(logf, m_LogFx, 3.0);
     for (std::size_t i = 0u; i < intervals.size(); ++i) {
-        if (!CIntegration::gaussLegendre(kernel, intervals[i].first, intervals[i].second, pIntervals[i])) {
-            LOG_ERROR(<< "Couldn't integrate kernel over " << core::CContainerPrinter::print(intervals[i]));
+        if (!CIntegration::gaussLegendre(
+                kernel, intervals[i].first, intervals[i].second, pIntervals[i])) {
+            LOG_ERROR(<< "Couldn't integrate kernel over "
+                      << core::CContainerPrinter::print(intervals[i]));
         }
     }
 
     p += pTails;
     kernel.k(15.0);
-    CIntegration::adaptiveGaussLegendre(kernel,
-                                                                intervals,
-                                                                pIntervals,
-                                                                2,    // refinements
+    CIntegration::adaptiveGaussLegendre(kernel, intervals, pIntervals,
+                                                                2, // refinements
                                                                 3,    // splits
                                                                 1e-2, // tolerance
                                                                 p);
@@ -195,7 +201,8 @@ double CTools::differentialEntropy(const CMixtureDistribution& mixture) {
 
     TDoubleDoublePrVec range;
     for (std::size_t i = 0u; i < modes.size(); ++i) {
-        range.push_back(TDoubleDoublePr(quantile(modes[i], EPS), quantile(modes[i], 1.0 - EPS)));
+        range.push_back(TDoubleDoublePr(quantile(modes[i], EPS),
+                                        quantile(modes[i], 1.0 - EPS)));
     }
     std::sort(range.begin(), range.end(), COrderings::SFirstLess());
     LOG_TRACE(<< "range = " << core::CContainerPrinter::print(range));
@@ -243,7 +250,8 @@ void CTools::spread(double a, double b, double separation, T& points) {
     std::size_t n = points.size() - 1;
     if (b - a <= separation * static_cast(n + 1)) {
         for (std::size_t i = 0u; i <= n; ++i) {
-            setLocation(points[i], a + (b - a) * static_cast(i) / static_cast(n));
+            setLocation(points[i], a + (b - a) * static_cast(i) /
+                                           static_cast(n));
         }
         return;
     }
diff --git a/include/maths/CTrendComponent.h b/include/maths/CTrendComponent.h
index d825220c10..8f4d67aab9 100644
--- a/include/maths/CTrendComponent.h
+++ b/include/maths/CTrendComponent.h
@@ -67,7 +67,8 @@ class MATHS_EXPORT CTrendComponent {
     void acceptPersistInserter(core::CStatePersistInserter& inserter) const;
 
     //! Initialize by reading state from \p traverser.
-    bool acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser);
+    bool acceptRestoreTraverser(const SDistributionRestoreParams& params,
+                                core::CStateRestoreTraverser& traverser);
 
     //!  Check if the trend has been estimated.
     bool initialized() const;
diff --git a/include/maths/CTrendTests.h b/include/maths/CTrendTests.h
index 93d2d6d325..bd684341ef 100644
--- a/include/maths/CTrendTests.h
+++ b/include/maths/CTrendTests.h
@@ -130,7 +130,8 @@ class MATHS_EXPORT CRandomizedPeriodicityTest {
 
 private:
     //! Refresh \p projections and update \p statistics.
-    static void updateStatistics(TVector2NMeanAccumulator& projections, TVector2MeanAccumulator& statistics);
+    static void updateStatistics(TVector2NMeanAccumulator& projections,
+                                 TVector2MeanAccumulator& statistics);
 
     //! Re-sample the projections.
     static void resample(core_t::TTime time);
diff --git a/include/maths/CTypeConversions.h b/include/maths/CTypeConversions.h
index fa1887950c..a603db5a2d 100644
--- a/include/maths/CTypeConversions.h
+++ b/include/maths/CTypeConversions.h
@@ -99,7 +99,8 @@ struct SSelector {
 //! \brief Defines a suitable floating point type.
 template
 struct SFloatingPoint {
-    using Type = typename type_conversion_detail::SSelector::value>::Type;
+    using Type =
+        typename type_conversion_detail::SSelector::value>::Type;
 };
 
 //! \brief Defines CVectorNx1 on a suitable floating point type.
@@ -141,7 +142,8 @@ struct SFloatingPoint, U> {
 //! \brief Defines an Eigen dense matrix on a suitable floating point type.
 template
 struct SFloatingPoint, U> {
-    using Type = Eigen::Matrix::Type, ROWS, COLS, OPTIONS, MAX_ROWS, MAX_COLS>;
+    using Type =
+        Eigen::Matrix::Type, ROWS, COLS, OPTIONS, MAX_ROWS, MAX_COLS>;
 };
 
 //! \brief Defines CAnnotatedVector on a suitable floating point type.
diff --git a/include/maths/CXMeans.h b/include/maths/CXMeans.h
index da793b3162..a0f1730570 100644
--- a/include/maths/CXMeans.h
+++ b/include/maths/CXMeans.h
@@ -62,16 +62,20 @@ class CXMeans {
     //! points for stable comparison.
     class CCluster {
     public:
-        CCluster() : m_Cost(std::numeric_limits::max()), m_Checksum(0) {}
+        CCluster()
+            : m_Cost(std::numeric_limits::max()), m_Checksum(0) {}
 
         //! Check for equality using checksum and then points if the
         //! checksum is ambiguous.
-        bool operator==(const CCluster& other) const { return m_Checksum == other.m_Checksum && m_Points == other.m_Points; }
+        bool operator==(const CCluster& other) const {
+            return m_Checksum == other.m_Checksum && m_Points == other.m_Points;
+        }
 
         //! Total ordering by checksum breaking ties using expensive
         //! comparison on all points.
         bool operator<(const CCluster& rhs) const {
-            return COrderings::lexicographical_compare(m_Checksum, m_Points, rhs.m_Checksum, rhs.m_Points);
+            return COrderings::lexicographical_compare(m_Checksum, m_Points,
+                                                       rhs.m_Checksum, rhs.m_Points);
         }
 
         //! Get the number of points in the cluster.
@@ -113,7 +117,8 @@ class CXMeans {
     using TClusterVec = std::vector;
 
 public:
-    CXMeans(std::size_t kmax) : m_Kmax(kmax), m_MinCost(std::numeric_limits::max()) {
+    CXMeans(std::size_t kmax)
+        : m_Kmax(kmax), m_MinCost(std::numeric_limits::max()) {
         m_BestCentres.reserve(m_Kmax);
         m_Clusters.reserve(m_Kmax);
     }
@@ -156,9 +161,11 @@ class CXMeans {
     //! \param[in] improveStructureKmeansIterations The number
     //! of iterations of Lloyd's algorithm to use in k-means for
     //! a single round of improve structure.
-    void
-    run(std::size_t improveParamsKmeansIterations, std::size_t improveStructureClusterSeeds, std::size_t improveStructureKmeansIterations) {
-        while (this->improveStructure(improveStructureClusterSeeds, improveStructureKmeansIterations)) {
+    void run(std::size_t improveParamsKmeansIterations,
+             std::size_t improveStructureClusterSeeds,
+             std::size_t improveStructureKmeansIterations) {
+        while (this->improveStructure(improveStructureClusterSeeds,
+                                      improveStructureKmeansIterations)) {
             this->improveParams(improveParamsKmeansIterations);
         }
         this->polish(10 * improveParamsKmeansIterations);
@@ -209,8 +216,8 @@ class CXMeans {
             CCluster& cluster = newClusters.back();
             cluster.centre(newCentres[i]);
             cluster.points(newClusterPoints[i]);
-            typename TClusterCPtrVec::const_iterator j =
-                std::lower_bound(oldClusters.begin(), oldClusters.end(), &cluster, COrderings::SPtrLess());
+            typename TClusterCPtrVec::const_iterator j = std::lower_bound(
+                oldClusters.begin(), oldClusters.end(), &cluster, COrderings::SPtrLess());
             if (j != oldClusters.end() && **j == cluster) {
                 cluster.cost((*j)->cost());
                 preserved.insert(cluster.checksum());
@@ -275,7 +282,8 @@ class CXMeans {
 
         bool split = false;
 
-        for (std::size_t i = 0u, n = m_Clusters.size(); i < n && m_Clusters.size() < m_Kmax; ++i) {
+        for (std::size_t i = 0u, n = m_Clusters.size();
+             i < n && m_Clusters.size() < m_Kmax; ++i) {
             if (m_Inactive.count(m_Clusters[i].checksum()) > 0) {
                 continue;
             }
@@ -290,7 +298,8 @@ class CXMeans {
 
             for (std::size_t j = 0u; j < clusterSeeds; ++j) {
                 this->generateSeedCentres(points, 2, seedClusterCentres);
-                LOG_TRACE(<< "seed centres = " << core::CContainerPrinter::print(seedClusterCentres));
+                LOG_TRACE(<< "seed centres = "
+                          << core::CContainerPrinter::print(seedClusterCentres));
 
                 kmeans.setCentres(seedClusterCentres);
                 kmeans.run(kmeansIterations);
diff --git a/include/maths/CXMeansOnline.h b/include/maths/CXMeansOnline.h
index d92dc2d09d..fb45693bbd 100644
--- a/include/maths/CXMeansOnline.h
+++ b/include/maths/CXMeansOnline.h
@@ -102,19 +102,19 @@ class CXMeansOnline : public CClusterer> {
     public:
         explicit CCluster(const CXMeansOnline& clusterer)
             : m_Index(clusterer.m_ClusterIndexGenerator.next()),
-              m_DataType(clusterer.m_DataType),
-              m_DecayRate(clusterer.m_DecayRate),
+              m_DataType(clusterer.m_DataType), m_DecayRate(clusterer.m_DecayRate),
               m_Structure(STRUCTURE_SIZE, clusterer.m_DecayRate) {}
 
         //! Initialize by traversing a state document.
-        bool acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) {
+        bool acceptRestoreTraverser(const SDistributionRestoreParams& params,
+                                    core::CStateRestoreTraverser& traverser) {
             do {
                 const std::string& name = traverser.name();
                 RESTORE_BUILT_IN(INDEX_TAG, m_Index)
                 RESTORE(COVARIANCES_TAG, m_Covariances.fromDelimited(traverser.value()))
-                RESTORE(
-                    STRUCTURE_TAG,
-                    traverser.traverseSubLevel(boost::bind(&TKMeansOnline::acceptRestoreTraverser, &m_Structure, boost::cref(params), _1)))
+                RESTORE(STRUCTURE_TAG, traverser.traverseSubLevel(boost::bind(
+                                           &TKMeansOnline::acceptRestoreTraverser,
+                                           &m_Structure, boost::cref(params), _1)))
             } while (traverser.next());
 
             return true;
@@ -124,7 +124,8 @@ class CXMeansOnline : public CClusterer> {
         void acceptPersistInserter(core::CStatePersistInserter& inserter) const {
             inserter.insertValue(INDEX_TAG, m_Index);
             inserter.insertValue(COVARIANCES_TAG, m_Covariances.toDelimited());
-            inserter.insertLevel(STRUCTURE_TAG, boost::bind(&TKMeansOnline::acceptPersistInserter, m_Structure, _1));
+            inserter.insertLevel(STRUCTURE_TAG, boost::bind(&TKMeansOnline::acceptPersistInserter,
+                                                            m_Structure, _1));
         }
 
         //! Efficiently swap the contents of this and \p other.
@@ -175,13 +176,17 @@ class CXMeansOnline : public CClusterer> {
         //! Get the centre of the cluster.
         //!
         //! This is defined as the sample mean.
-        const TPointPrecise& centre() const { return CBasicStatistics::mean(m_Covariances); }
+        const TPointPrecise& centre() const {
+            return CBasicStatistics::mean(m_Covariances);
+        }
 
         //! Get the spread of the cluster.
         //!
         //! This is defined as the trace of the sample covariance matrix.
         double spread() const {
-            return std::sqrt(CBasicStatistics::maximumLikelihoodCovariances(m_Covariances).trace() / static_cast(N));
+            return std::sqrt(
+                CBasicStatistics::maximumLikelihoodCovariances(m_Covariances).trace() /
+                static_cast(N));
         }
 
         //! Get the sample covariance matrix this cluster.
@@ -203,13 +208,17 @@ class CXMeansOnline : public CClusterer> {
         }
 
         //! Get the likelihood that \p x is from this cluster.
-        double logLikelihoodFromCluster(maths_t::EClusterWeightCalc calc, const TPointPrecise& x) const {
+        double logLikelihoodFromCluster(maths_t::EClusterWeightCalc calc,
+                                        const TPointPrecise& x) const {
             double likelihood;
             const TPointPrecise& mean = CBasicStatistics::mean(m_Covariances);
-            const TMatrixPrecise& covariances = CBasicStatistics::maximumLikelihoodCovariances(m_Covariances);
-            maths_t::EFloatingPointErrorStatus status = gaussianLogLikelihood(covariances, x - mean, likelihood, false);
+            const TMatrixPrecise& covariances =
+                CBasicStatistics::maximumLikelihoodCovariances(m_Covariances);
+            maths_t::EFloatingPointErrorStatus status =
+                gaussianLogLikelihood(covariances, x - mean, likelihood, false);
             if (status & maths_t::E_FpFailed) {
-                LOG_ERROR(<< "Unable to compute likelihood for " << x << " and cluster " << m_Index);
+                LOG_ERROR(<< "Unable to compute likelihood for " << x
+                          << " and cluster " << m_Index);
                 return core::constants::LOG_MIN_DOUBLE - 1.0;
             }
             if (status & maths_t::E_FpOverflowed) {
@@ -219,7 +228,9 @@ class CXMeansOnline : public CClusterer> {
         }
 
         //! Get \p numberSamples from this cluster.
-        void sample(std::size_t numberSamples, TPointPreciseVec& samples) const { m_Structure.sample(numberSamples, samples); }
+        void sample(std::size_t numberSamples, TPointPreciseVec& samples) const {
+            m_Structure.sample(numberSamples, samples);
+        }
 
         //! Try and find a split by a full search of the binary tree
         //! of possible optimal 2-splits of the data.
@@ -228,8 +239,9 @@ class CXMeansOnline : public CClusterer> {
         //! in the split.
         //! \param[in] indexGenerator The unique cluster identifier
         //! generator.
-        TOptionalClusterClusterPr
-        split(CPRNG::CXorOShiro128Plus& rng, double minimumCount, CClustererTypes::CIndexGenerator& indexGenerator) {
+        TOptionalClusterClusterPr split(CPRNG::CXorOShiro128Plus& rng,
+                                        double minimumCount,
+                                        CClustererTypes::CIndexGenerator& indexGenerator) {
             // We do our clustering top down to minimize space and avoid
             // making splits before we are confident they exist. This is
             // important for anomaly detection because we do *not* want
@@ -271,26 +283,32 @@ class CXMeansOnline : public CClusterer> {
             }
             TKMeansOnlineVec structure;
             m_Structure.split(split, structure);
-            LOG_TRACE(<< "Splitting cluster " << this->index() << " at " << this->centre() << " left = " << structure[0].print()
+            LOG_TRACE(<< "Splitting cluster " << this->index() << " at "
+                      << this->centre() << " left = " << structure[0].print()
                       << ", right = " << structure[1].print());
 
             std::size_t index[] = {indexGenerator.next(), indexGenerator.next()};
             indexGenerator.recycle(m_Index);
 
-            return TClusterClusterPr(CCluster(index[0], m_DataType, m_DecayRate, covariances[0], structure[0]),
-                                     CCluster(index[1], m_DataType, m_DecayRate, covariances[1], structure[1]));
+            return TClusterClusterPr(CCluster(index[0], m_DataType, m_DecayRate,
+                                              covariances[0], structure[0]),
+                                     CCluster(index[1], m_DataType, m_DecayRate,
+                                              covariances[1], structure[1]));
         }
 
         //! Check if this and \p other cluster should merge.
         //!
         //! \param[in] other The cluster to merge with this one.
-        bool shouldMerge(CCluster& other) { return BICGain(*this, other) <= MAXIMUM_MERGE_DISTANCE; }
+        bool shouldMerge(CCluster& other) {
+            return BICGain(*this, other) <= MAXIMUM_MERGE_DISTANCE;
+        }
 
         //! Merge this and \p other cluster.
         CCluster merge(CCluster& other, CClustererTypes::CIndexGenerator& indexGenerator) {
             CKMeansOnline structure(m_Structure);
             structure.merge(other.m_Structure);
-            CCluster result(indexGenerator.next(), m_DataType, m_DecayRate, m_Covariances + other.m_Covariances, structure);
+            CCluster result(indexGenerator.next(), m_DataType, m_DecayRate,
+                            m_Covariances + other.m_Covariances, structure);
             indexGenerator.recycle(m_Index);
             indexGenerator.recycle(other.m_Index);
             return result;
@@ -312,13 +330,17 @@ class CXMeansOnline : public CClusterer> {
         }
 
         //! Get the memory used by this component.
-        std::size_t memoryUsage() const { return core::CMemory::dynamicSize(m_Structure); }
+        std::size_t memoryUsage() const {
+            return core::CMemory::dynamicSize(m_Structure);
+        }
 
         //! Get Bayes Information Criterion decrease in going from one
         //! to two clusters.
         //!
         //! \note This is not necessarily positive.
-        static double BICGain(const CCluster& lhs, const CCluster& rhs) { return BICGain(lhs.m_Covariances, rhs.m_Covariances); }
+        static double BICGain(const CCluster& lhs, const CCluster& rhs) {
+            return BICGain(lhs.m_Covariances, rhs.m_Covariances);
+        }
 
     protected:
         CCluster(std::size_t index,
@@ -326,7 +348,8 @@ class CXMeansOnline : public CClusterer> {
                  double decayRate,
                  const TCovariances& covariances,
                  const CKMeansOnline& structure)
-            : m_Index(index), m_DataType(dataType), m_DecayRate(decayRate), m_Covariances(covariances), m_Structure(structure) {}
+            : m_Index(index), m_DataType(dataType), m_DecayRate(decayRate),
+              m_Covariances(covariances), m_Structure(structure) {}
 
         //! Search for a split of the data that satisfies the constraints
         //! on both the BIC divergence and minimum count.
@@ -368,7 +391,8 @@ class CXMeansOnline : public CClusterer> {
                 LOG_TRACE(<< "candidate = " << core::CContainerPrinter::print(candidate));
 
                 if (candidate.size() != 2) {
-                    LOG_ERROR(<< "Expected 2-split: " << core::CContainerPrinter::print(candidate));
+                    LOG_ERROR(<< "Expected 2-split: "
+                              << core::CContainerPrinter::print(candidate));
                     break;
                 }
                 if (candidate[0].empty() || candidate[1].empty()) {
@@ -385,7 +409,8 @@ class CXMeansOnline : public CClusterer> {
                 TCovariances covariances[2];
                 CBasicStatistics::covariancesLedoitWolf(candidate[0], covariances[0]);
                 CBasicStatistics::covariancesLedoitWolf(candidate[1], covariances[1]);
-                double n[] = {CBasicStatistics::count(covariances[0]), CBasicStatistics::count(covariances[1])};
+                double n[] = {CBasicStatistics::count(covariances[0]),
+                              CBasicStatistics::count(covariances[1])};
                 double nmin = std::min(n[0], n[1]);
 
                 // Check the count constraint.
@@ -395,18 +420,21 @@ class CXMeansOnline : public CClusterer> {
                 // Check the distance constraint.
                 double distance = BICGain(covariances[0], covariances[1]);
                 bool satisfiesDistance = (distance > MINIMUM_SPLIT_DISTANCE);
-                LOG_TRACE(<< "BIC(1) - BIC(2) = " << distance << " (to split " << MINIMUM_SPLIT_DISTANCE << ")");
+                LOG_TRACE(<< "BIC(1) - BIC(2) = " << distance << " (to split "
+                          << MINIMUM_SPLIT_DISTANCE << ")");
 
                 if (!satisfiesCount) {
                     // Recurse to the (one) node with sufficient count.
                     if (n[0] > minimumCount && candidate[0].size() > 1) {
                         node.swap(candidate[0]);
-                        remainder.insert(remainder.end(), candidate[1].begin(), candidate[1].end());
+                        remainder.insert(remainder.end(), candidate[1].begin(),
+                                         candidate[1].end());
                         continue;
                     }
                     if (n[1] > minimumCount && candidate[1].size() > 1) {
                         node.swap(candidate[1]);
-                        remainder.insert(remainder.end(), candidate[0].begin(), candidate[0].end());
+                        remainder.insert(remainder.end(), candidate[0].begin(),
+                                         candidate[0].end());
                         continue;
                     }
                 } else if (satisfiesDistance) {
@@ -426,7 +454,8 @@ class CXMeansOnline : public CClusterer> {
                     }
 
                     distance = BICGain(covariances[0], covariances[1]);
-                    LOG_TRACE(<< "BIC(1) - BIC(2) = " << distance << " (to split " << MINIMUM_SPLIT_DISTANCE << ")");
+                    LOG_TRACE(<< "BIC(1) - BIC(2) = " << distance
+                              << " (to split " << MINIMUM_SPLIT_DISTANCE << ")");
 
                     if (distance > MINIMUM_SPLIT_DISTANCE) {
                         LOG_TRACE(<< "splitting");
@@ -434,17 +463,22 @@ class CXMeansOnline : public CClusterer> {
                         result.resize(candidate.size());
                         TSphericalClusterVec clusters;
                         this->sphericalClusters(clusters);
-                        TSizeVec indexes(boost::counting_iterator(0), boost::counting_iterator(clusters.size()));
-                        COrderings::simultaneousSort(clusters, indexes, typename CSphericalCluster::SLess());
+                        TSizeVec indexes(
+                            boost::counting_iterator(0),
+                            boost::counting_iterator(clusters.size()));
+                        COrderings::simultaneousSort(
+                            clusters, indexes, typename CSphericalCluster::SLess());
                         for (std::size_t i = 0u; i < candidate.size(); ++i) {
                             for (std::size_t j = 0u; j < candidate[i].size(); ++j) {
                                 std::size_t k =
                                     std::lower_bound(
-                                        clusters.begin(), clusters.end(), candidate[i][j], typename CSphericalCluster::SLess()) -
+                                        clusters.begin(), clusters.end(),
+                                        candidate[i][j],
+                                        typename CSphericalCluster::SLess()) -
                                     clusters.begin();
                                 if (k >= clusters.size()) {
-                                    LOG_ERROR(<< "Missing " << candidate[i][j]
-                                              << ", clusters = " << core::CContainerPrinter::print(clusters));
+                                    LOG_ERROR(<< "Missing " << candidate[i][j] << ", clusters = "
+                                              << core::CContainerPrinter::print(clusters));
                                     return false;
                                 }
                                 result[i].push_back(indexes[k]);
@@ -477,11 +511,14 @@ class CXMeansOnline : public CClusterer> {
         }
 
         //! Get the closest (in Mahalanobis distance) cluster to \p x.
-        static std::size_t nearest(const TSphericalCluster& x, const TCovariances (&c)[2]) {
+        static std::size_t nearest(const TSphericalCluster& x,
+                                   const TCovariances (&c)[2]) {
             TPrecise d[] = {0, 0};
             TPointPrecise x_(x);
-            inverseQuadraticForm(CBasicStatistics::maximumLikelihoodCovariances(c[0]), x_ - CBasicStatistics::mean(c[0]), d[0]);
-            inverseQuadraticForm(CBasicStatistics::maximumLikelihoodCovariances(c[1]), x_ - CBasicStatistics::mean(c[1]), d[1]);
+            inverseQuadraticForm(CBasicStatistics::maximumLikelihoodCovariances(c[0]),
+                                 x_ - CBasicStatistics::mean(c[0]), d[0]);
+            inverseQuadraticForm(CBasicStatistics::maximumLikelihoodCovariances(c[1]),
+                                 x_ - CBasicStatistics::mean(c[1]), d[1]);
             return d[0] < d[1] ? 0 : 1;
         }
 
@@ -499,7 +536,9 @@ class CXMeansOnline : public CClusterer> {
 
     private:
         //! Get the scaled decay rate for use by propagateForwardsByTime.
-        double scaledDecayRate() const { return std::pow(0.5, static_cast(N)) * m_DecayRate; }
+        double scaledDecayRate() const {
+            return std::pow(0.5, static_cast(N)) * m_DecayRate;
+        }
 
     private:
         //! A unique identifier for this cluster.
@@ -548,12 +587,9 @@ class CXMeansOnline : public CClusterer> {
                   double minimumCategoryCount = MINIMUM_CATEGORY_COUNT,
                   const CClustererTypes::TSplitFunc& splitFunc = CClustererTypes::CDoNothing(),
                   const CClustererTypes::TMergeFunc& mergeFunc = CClustererTypes::CDoNothing())
-        : CClusterer(splitFunc, mergeFunc),
-          m_DataType(dataType),
-          m_InitialDecayRate(decayRate),
-          m_DecayRate(decayRate),
-          m_HistoryLength(0.0),
-          m_WeightCalc(weightCalc),
+        : CClusterer(splitFunc, mergeFunc), m_DataType(dataType),
+          m_InitialDecayRate(decayRate), m_DecayRate(decayRate),
+          m_HistoryLength(0.0), m_WeightCalc(weightCalc),
           m_MinimumClusterFraction(minimumClusterFraction),
           m_MinimumClusterCount(minimumClusterCount),
           m_MinimumCategoryCount(minimumCategoryCount),
@@ -562,15 +598,13 @@ class CXMeansOnline : public CClusterer> {
     //! Construct by traversing a state document.
     CXMeansOnline(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser)
         : CClusterer(CClustererTypes::CDoNothing(), CClustererTypes::CDoNothing()),
-          m_DataType(params.s_DataType),
-          m_InitialDecayRate(params.s_DecayRate),
-          m_DecayRate(params.s_DecayRate),
-          m_HistoryLength(),
+          m_DataType(params.s_DataType), m_InitialDecayRate(params.s_DecayRate),
+          m_DecayRate(params.s_DecayRate), m_HistoryLength(),
           m_WeightCalc(maths_t::E_ClustersEqualWeight),
-          m_MinimumClusterFraction(),
-          m_MinimumClusterCount(),
+          m_MinimumClusterFraction(), m_MinimumClusterCount(),
           m_MinimumCategoryCount(params.s_MinimumCategoryCount) {
-        traverser.traverseSubLevel(boost::bind(&CXMeansOnline::acceptRestoreTraverser, this, boost::cref(params), _1));
+        traverser.traverseSubLevel(boost::bind(&CXMeansOnline::acceptRestoreTraverser,
+                                               this, boost::cref(params), _1));
     }
 
     //! Construct by traversing a state document.
@@ -578,26 +612,21 @@ class CXMeansOnline : public CClusterer> {
                   const CClustererTypes::TSplitFunc& splitFunc,
                   const CClustererTypes::TMergeFunc& mergeFunc,
                   core::CStateRestoreTraverser& traverser)
-        : CClusterer(splitFunc, mergeFunc),
-          m_DataType(params.s_DataType),
-          m_InitialDecayRate(params.s_DecayRate),
-          m_DecayRate(params.s_DecayRate),
-          m_HistoryLength(),
-          m_WeightCalc(maths_t::E_ClustersEqualWeight),
-          m_MinimumClusterFraction(),
-          m_MinimumClusterCount(),
+        : CClusterer(splitFunc, mergeFunc), m_DataType(params.s_DataType),
+          m_InitialDecayRate(params.s_DecayRate), m_DecayRate(params.s_DecayRate),
+          m_HistoryLength(), m_WeightCalc(maths_t::E_ClustersEqualWeight),
+          m_MinimumClusterFraction(), m_MinimumClusterCount(),
           m_MinimumCategoryCount(params.s_MinimumCategoryCount) {
-        traverser.traverseSubLevel(boost::bind(&CXMeansOnline::acceptRestoreTraverser, this, boost::cref(params), _1));
+        traverser.traverseSubLevel(boost::bind(&CXMeansOnline::acceptRestoreTraverser,
+                                               this, boost::cref(params), _1));
     }
 
     //! The x-means clusterer has value semantics.
     CXMeansOnline(const CXMeansOnline& other)
         : CClusterer(other.splitFunc(), other.mergeFunc()),
-          m_Rng(other.m_Rng),
-          m_DataType(other.m_DataType),
+          m_Rng(other.m_Rng), m_DataType(other.m_DataType),
           m_InitialDecayRate(other.m_InitialDecayRate),
-          m_DecayRate(other.m_DecayRate),
-          m_HistoryLength(other.m_HistoryLength),
+          m_DecayRate(other.m_DecayRate), m_HistoryLength(other.m_HistoryLength),
           m_WeightCalc(other.m_WeightCalc),
           m_MinimumClusterFraction(other.m_MinimumClusterFraction),
           m_MinimumClusterCount(other.m_MinimumClusterCount),
@@ -636,21 +665,26 @@ class CXMeansOnline : public CClusterer> {
     //! \name Clusterer Contract
     //@{
     //! Get the tag name for this clusterer.
-    virtual std::string persistenceTag() const { return CClustererTypes::X_MEANS_ONLINE_TAG; }
+    virtual std::string persistenceTag() const {
+        return CClustererTypes::X_MEANS_ONLINE_TAG;
+    }
 
     //! Persist state by passing information to the supplied inserter.
     virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const {
         for (std::size_t i = 0u; i < m_Clusters.size(); ++i) {
-            inserter.insertLevel(CLUSTER_TAG, boost::bind(&CCluster::acceptPersistInserter, &m_Clusters[i], _1));
+            inserter.insertLevel(CLUSTER_TAG, boost::bind(&CCluster::acceptPersistInserter,
+                                                          &m_Clusters[i], _1));
         }
         inserter.insertValue(DECAY_RATE_TAG, m_DecayRate, core::CIEEE754::E_SinglePrecision);
-        inserter.insertValue(HISTORY_LENGTH_TAG, m_HistoryLength, core::CIEEE754::E_SinglePrecision);
+        inserter.insertValue(HISTORY_LENGTH_TAG, m_HistoryLength,
+                             core::CIEEE754::E_SinglePrecision);
         inserter.insertValue(RNG_TAG, m_Rng.toString());
         inserter.insertValue(WEIGHT_CALC_TAG, static_cast(m_WeightCalc));
         inserter.insertValue(MINIMUM_CLUSTER_FRACTION_TAG, m_MinimumClusterFraction);
         inserter.insertValue(MINIMUM_CLUSTER_COUNT_TAG, m_MinimumClusterCount);
         inserter.insertLevel(CLUSTER_INDEX_GENERATOR_TAG,
-                             boost::bind(&CClustererTypes::CIndexGenerator::acceptPersistInserter, &m_ClusterIndexGenerator, _1));
+                             boost::bind(&CClustererTypes::CIndexGenerator::acceptPersistInserter,
+                                         &m_ClusterIndexGenerator, _1));
     }
 
     //! Creates a copy of the clusterer.
@@ -660,13 +694,9 @@ class CXMeansOnline : public CClusterer> {
 
     //! Clear the current clusterer state.
     virtual void clear() {
-        *this = CXMeansOnline(m_DataType,
-                              m_WeightCalc,
-                              m_InitialDecayRate,
-                              m_MinimumClusterFraction,
-                              m_MinimumClusterCount,
-                              m_MinimumCategoryCount,
-                              this->splitFunc(),
+        *this = CXMeansOnline(m_DataType, m_WeightCalc, m_InitialDecayRate,
+                              m_MinimumClusterFraction, m_MinimumClusterCount,
+                              m_MinimumCategoryCount, this->splitFunc(),
                               this->mergeFunc());
     }
 
@@ -690,7 +720,9 @@ class CXMeansOnline : public CClusterer> {
     }
 
     //! Check if the cluster identified by \p index exists.
-    virtual bool hasCluster(std::size_t index) const { return this->cluster(index) != nullptr; }
+    virtual bool hasCluster(std::size_t index) const {
+        return this->cluster(index) != nullptr;
+    }
 
     //! Get the centre of the cluster identified by \p index.
     virtual bool clusterCentre(std::size_t index, TPointPrecise& result) const {
@@ -716,7 +748,9 @@ class CXMeansOnline : public CClusterer> {
 
     //! Gets the index of the cluster(s) to which \p point belongs
     //! together with their weighting factor.
-    virtual void cluster(const TPointPrecise& point, TSizeDoublePr2Vec& result, double count = 1.0) const {
+    virtual void cluster(const TPointPrecise& point,
+                         TSizeDoublePr2Vec& result,
+                         double count = 1.0) const {
         result.clear();
 
         if (m_Clusters.empty()) {
@@ -756,7 +790,9 @@ class CXMeansOnline : public CClusterer> {
             result[i].second /= normalizer;
             pmax = std::max(pmax, result[i].second);
         }
-        result.erase(std::remove_if(result.begin(), result.end(), CProbabilityLessThan(HARD_ASSIGNMENT_THRESHOLD * pmax)), result.end());
+        result.erase(std::remove_if(result.begin(), result.end(),
+                                    CProbabilityLessThan(HARD_ASSIGNMENT_THRESHOLD * pmax)),
+                     result.end());
         normalizer = 0.0;
         for (std::size_t i = 0u; i < result.size(); ++i) {
             normalizer += result[i].second;
@@ -780,11 +816,13 @@ class CXMeansOnline : public CClusterer> {
             }
         } else {
             using TDoubleSizePr = std::pair;
-            using TMaxAccumulator = CBasicStatistics::COrderStatisticsStack>;
+            using TMaxAccumulator =
+                CBasicStatistics::COrderStatisticsStack>;
 
             TMaxAccumulator closest;
             for (std::size_t i = 0u; i < m_Clusters.size(); ++i) {
-                closest.add(std::make_pair(m_Clusters[i].logLikelihoodFromCluster(m_WeightCalc, x), i));
+                closest.add(std::make_pair(
+                    m_Clusters[i].logLikelihoodFromCluster(m_WeightCalc, x), i));
             }
             closest.sort();
             LOG_TRACE(<< "closest = " << closest.print());
@@ -814,14 +852,16 @@ class CXMeansOnline : public CClusterer> {
                 // Get the weighted counts.
                 double count0 = count * p0;
                 double count1 = count * p1;
-                LOG_TRACE(<< "Soft adding " << x << " " << count0 << " to " << cluster0->centre() << " and " << count1 << " to "
+                LOG_TRACE(<< "Soft adding " << x << " " << count0 << " to "
+                          << cluster0->centre() << " and " << count1 << " to "
                           << cluster1->centre());
 
                 cluster0->add(x, count0);
                 cluster1->add(x, count1);
                 clusters.push_back(std::make_pair(cluster0->index(), count0));
                 clusters.push_back(std::make_pair(cluster1->index(), count1));
-                if (this->maybeSplit(cluster0) || this->maybeSplit(cluster1) || this->maybeMerge(cluster0) || this->maybeMerge(cluster1)) {
+                if (this->maybeSplit(cluster0) || this->maybeSplit(cluster1) ||
+                    this->maybeMerge(cluster0) || this->maybeMerge(cluster1)) {
                     this->cluster(x, clusters, count);
                 }
             }
@@ -898,7 +938,8 @@ class CXMeansOnline : public CClusterer> {
     //! Debug the memory used by the object.
     virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const {
         mem->setName("CXMeansOnline");
-        core::CMemoryDebug::dynamicSize("m_ClusterIndexGenerator", m_ClusterIndexGenerator, mem);
+        core::CMemoryDebug::dynamicSize("m_ClusterIndexGenerator",
+                                        m_ClusterIndexGenerator, mem);
         core::CMemoryDebug::dynamicSize("m_Clusters", m_Clusters, mem);
     }
 
@@ -945,31 +986,34 @@ class CXMeansOnline : public CClusterer> {
     }
 
     //! Get the index generator.
-    CClustererTypes::CIndexGenerator& indexGenerator() { return m_ClusterIndexGenerator; }
+    CClustererTypes::CIndexGenerator& indexGenerator() {
+        return m_ClusterIndexGenerator;
+    }
 
 protected:
     //! Restore by traversing a state document
-    bool acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) {
+    bool acceptRestoreTraverser(const SDistributionRestoreParams& params,
+                                core::CStateRestoreTraverser& traverser) {
         do {
             const std::string& name = traverser.name();
-            RESTORE_SETUP_TEARDOWN(
-                CLUSTER_TAG,
-                CCluster cluster(*this),
-                traverser.traverseSubLevel(boost::bind(&CCluster::acceptRestoreTraverser, &cluster, boost::cref(params), _1)),
-                m_Clusters.push_back(cluster))
-            RESTORE_SETUP_TEARDOWN(DECAY_RATE_TAG,
-                                   double decayRate,
+            RESTORE_SETUP_TEARDOWN(CLUSTER_TAG, CCluster cluster(*this),
+                                   traverser.traverseSubLevel(boost::bind(
+                                       &CCluster::acceptRestoreTraverser,
+                                       &cluster, boost::cref(params), _1)),
+                                   m_Clusters.push_back(cluster))
+            RESTORE_SETUP_TEARDOWN(DECAY_RATE_TAG, double decayRate,
                                    core::CStringUtils::stringToType(traverser.value(), decayRate),
                                    this->decayRate(decayRate))
             RESTORE_BUILT_IN(HISTORY_LENGTH_TAG, m_HistoryLength)
             RESTORE(RNG_TAG, m_Rng.fromString(traverser.value()));
             RESTORE(CLUSTER_INDEX_GENERATOR_TAG,
-                    traverser.traverseSubLevel(
-                        boost::bind(&CClustererTypes::CIndexGenerator::acceptRestoreTraverser, &m_ClusterIndexGenerator, _1)))
-            RESTORE_SETUP_TEARDOWN(WEIGHT_CALC_TAG,
-                                   int weightCalc,
-                                   core::CStringUtils::stringToType(traverser.value(), weightCalc),
-                                   m_WeightCalc = static_cast(weightCalc))
+                    traverser.traverseSubLevel(boost::bind(
+                        &CClustererTypes::CIndexGenerator::acceptRestoreTraverser,
+                        &m_ClusterIndexGenerator, _1)))
+            RESTORE_SETUP_TEARDOWN(
+                WEIGHT_CALC_TAG, int weightCalc,
+                core::CStringUtils::stringToType(traverser.value(), weightCalc),
+                m_WeightCalc = static_cast(weightCalc))
             RESTORE_BUILT_IN(MINIMUM_CLUSTER_FRACTION_TAG, m_MinimumClusterFraction)
             RESTORE_BUILT_IN(MINIMUM_CLUSTER_COUNT_TAG, m_MinimumClusterCount)
         } while (traverser.next());
@@ -995,7 +1039,8 @@ class CXMeansOnline : public CClusterer> {
             for (std::size_t i = 0u; i < m_Clusters.size(); ++i) {
                 count += m_Clusters[i].count();
             }
-            double scale = std::max(m_HistoryLength * (1.0 - std::exp(-m_InitialDecayRate)), 1.0);
+            double scale = std::max(
+                m_HistoryLength * (1.0 - std::exp(-m_InitialDecayRate)), 1.0);
             count *= m_MinimumClusterFraction / scale;
             result = std::max(result, count);
         }
@@ -1009,8 +1054,10 @@ class CXMeansOnline : public CClusterer> {
             return false;
         }
 
-        if (TOptionalClusterClusterPr split = cluster->split(m_Rng, this->minimumSplitCount(), m_ClusterIndexGenerator)) {
-            LOG_TRACE(<< "Splitting cluster " << cluster->index() << " at " << cluster->centre());
+        if (TOptionalClusterClusterPr split = cluster->split(
+                m_Rng, this->minimumSplitCount(), m_ClusterIndexGenerator)) {
+            LOG_TRACE(<< "Splitting cluster " << cluster->index() << " at "
+                      << cluster->centre());
             std::size_t index = cluster->index();
             *cluster = split->first;
             m_Clusters.push_back(split->second);
@@ -1030,8 +1077,9 @@ class CXMeansOnline : public CClusterer> {
         CCluster* nearest = this->nearest(*cluster);
 
         if (nearest && nearest->shouldMerge(*cluster)) {
-            LOG_TRACE(<< "Merging cluster " << nearest->index() << " at " << nearest->centre() << " and cluster " << cluster->index()
-                      << " at " << cluster->centre());
+            LOG_TRACE(<< "Merging cluster " << nearest->index() << " at "
+                      << nearest->centre() << " and cluster "
+                      << cluster->index() << " at " << cluster->centre());
             std::size_t index1 = nearest->index();
             std::size_t index2 = cluster->index();
             CCluster merged = nearest->merge(*cluster, m_ClusterIndexGenerator);
@@ -1051,7 +1099,8 @@ class CXMeansOnline : public CClusterer> {
         }
 
         using TDoubleSizePr = std::pair;
-        using TMinAccumulator = CBasicStatistics::COrderStatisticsStack;
+        using TMinAccumulator =
+            CBasicStatistics::COrderStatisticsStack;
 
         bool result = false;
 
@@ -1076,8 +1125,9 @@ class CXMeansOnline : public CClusterer> {
             CCluster& cluster = m_Clusters[prune[0].second];
             CCluster* nearest = this->nearest(cluster);
             if (nearest) {
-                LOG_TRACE(<< "Merging cluster " << cluster.index() << " at " << cluster.centre() << " and cluster " << nearest->index()
-                          << " at " << nearest->centre());
+                LOG_TRACE(<< "Merging cluster " << cluster.index() << " at "
+                          << cluster.centre() << " and cluster "
+                          << nearest->index() << " at " << nearest->centre());
                 CCluster merge = nearest->merge(cluster, m_ClusterIndexGenerator);
                 (this->mergeFunc())(cluster.index(), nearest->index(), merge.index());
                 nearest->swap(merge);
@@ -1124,7 +1174,9 @@ class CXMeansOnline : public CClusterer> {
     public:
         CProbabilityLessThan(double threshold) : m_Threshold(threshold) {}
 
-        bool operator()(const TSizeDoublePr& p) const { return p.second < m_Threshold; }
+        bool operator()(const TSizeDoublePr& p) const {
+            return p.second < m_Threshold;
+        }
 
     private:
         double m_Threshold;
diff --git a/include/maths/CXMeansOnline1d.h b/include/maths/CXMeansOnline1d.h
index be8a6e86d1..7377984e04 100644
--- a/include/maths/CXMeansOnline1d.h
+++ b/include/maths/CXMeansOnline1d.h
@@ -120,7 +120,8 @@ class MATHS_EXPORT CXMeansOnline1d : public CClusterer1d {
         explicit CCluster(const CXMeansOnline1d& clusterer);
 
         //! Construct by traversing a state document
-        bool acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser);
+        bool acceptRestoreTraverser(const SDistributionRestoreParams& params,
+                                    core::CStateRestoreTraverser& traverser);
 
         //! Persist state by passing information to the supplied inserter
         void acceptPersistInserter(core::CStatePersistInserter& inserter) const;
@@ -185,7 +186,10 @@ class MATHS_EXPORT CXMeansOnline1d : public CClusterer1d {
         //! model the clusters.
         //! \param[in] smallest The smallest sample added to date.
         //! \param[in] interval The Winsorisation interval.
-        bool shouldMerge(CCluster& other, CAvailableModeDistributions distributions, double smallest, const TDoubleDoublePr& interval);
+        bool shouldMerge(CCluster& other,
+                         CAvailableModeDistributions distributions,
+                         double smallest,
+                         const TDoubleDoublePr& interval);
 
         //! Merge this and \p other cluster.
         CCluster merge(CCluster& other, CIndexGenerator& indexGenerator);
@@ -203,7 +207,9 @@ class MATHS_EXPORT CXMeansOnline1d : public CClusterer1d {
         std::size_t memoryUsage() const;
 
     private:
-        CCluster(std::size_t index, const CNormalMeanPrecConjugate& prior, const CNaturalBreaksClassifier& structure);
+        CCluster(std::size_t index,
+                 const CNormalMeanPrecConjugate& prior,
+                 const CNaturalBreaksClassifier& structure);
 
     private:
         //! A unique identifier for this cluster.
@@ -256,7 +262,8 @@ class MATHS_EXPORT CXMeansOnline1d : public CClusterer1d {
                     const TMergeFunc& mergeFunc = CDoNothing());
 
     //! Construct by traversing a state document.
-    CXMeansOnline1d(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser);
+    CXMeansOnline1d(const SDistributionRestoreParams& params,
+                    core::CStateRestoreTraverser& traverser);
 
     //! Construct by traversing a state document.
     CXMeansOnline1d(const SDistributionRestoreParams& params,
@@ -369,7 +376,8 @@ class MATHS_EXPORT CXMeansOnline1d : public CClusterer1d {
 
 private:
     using TMinAccumulator = CBasicStatistics::COrderStatisticsStack;
-    using TMaxAccumulator = CBasicStatistics::COrderStatisticsStack>;
+    using TMaxAccumulator =
+        CBasicStatistics::COrderStatisticsStack>;
 
 private:
     //! The minimum Kullback-Leibler divergence at which we'll
@@ -393,7 +401,8 @@ class MATHS_EXPORT CXMeansOnline1d : public CClusterer1d {
 
 private:
     //! Restore by traversing a state document.
-    bool acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser);
+    bool acceptRestoreTraverser(const SDistributionRestoreParams& params,
+                                core::CStateRestoreTraverser& traverser);
 
     //! Get the cluster with the index \p index.
     const CCluster* cluster(std::size_t index) const;
diff --git a/include/maths/CXMeansOnlineFactory.h b/include/maths/CXMeansOnlineFactory.h
index e460edd8c6..88616ede24 100644
--- a/include/maths/CXMeansOnlineFactory.h
+++ b/include/maths/CXMeansOnlineFactory.h
@@ -29,20 +29,21 @@ namespace xmeans_online_factory_detail {
 template
 class CFactory {};
 
-#define XMEANS_FACTORY(T, N)                                                                                                               \
-    template<>                                                                                                                             \
-    class MATHS_EXPORT CFactory {                                                                                                    \
-    public:                                                                                                                                \
-        static CClusterer>* make(maths_t::EDataType dataType,                                                             \
-                                                  maths_t::EClusterWeightCalc weightCalc,                                                  \
-                                                  double decayRate,                                                                        \
-                                                  double minimumClusterFraction,                                                           \
-                                                  double minimumClusterCount,                                                              \
-                                                  double minimumCategoryCount);                                                            \
-        static CClusterer>* restore(const SDistributionRestoreParams& params,                                             \
-                                                     const CClustererTypes::TSplitFunc& splitFunc,                                         \
-                                                     const CClustererTypes::TMergeFunc& mergeFunc,                                         \
-                                                     core::CStateRestoreTraverser& traverser);                                             \
+#define XMEANS_FACTORY(T, N)                                                              \
+    template<>                                                                            \
+    class MATHS_EXPORT CFactory {                                                   \
+    public:                                                                               \
+        static CClusterer>* make(maths_t::EDataType dataType,            \
+                                                  maths_t::EClusterWeightCalc weightCalc, \
+                                                  double decayRate,                       \
+                                                  double minimumClusterFraction,          \
+                                                  double minimumClusterCount,             \
+                                                  double minimumCategoryCount);           \
+        static CClusterer>*                                              \
+        restore(const SDistributionRestoreParams& params,                                 \
+                const CClustererTypes::TSplitFunc& splitFunc,                             \
+                const CClustererTypes::TMergeFunc& mergeFunc,                             \
+                core::CStateRestoreTraverser& traverser);                                 \
     }
 XMEANS_FACTORY(CFloatStorage, 2);
 XMEANS_FACTORY(CFloatStorage, 3);
@@ -73,16 +74,19 @@ class MATHS_EXPORT CXMeansOnlineFactory {
                                                      double minimumClusterCount,
                                                      double minimumCategoryCount) {
         return xmeans_online_factory_detail::CFactory::make(
-            dataType, weightCalc, decayRate, minimumClusterFraction, minimumClusterCount, minimumCategoryCount);
+            dataType, weightCalc, decayRate, minimumClusterFraction,
+            minimumClusterCount, minimumCategoryCount);
     }
 
     //! Construct by traversing a state document.
     template
-    static inline CClusterer>* restore(const SDistributionRestoreParams& params,
-                                                        const CClustererTypes::TSplitFunc& splitFunc,
-                                                        const CClustererTypes::TMergeFunc& mergeFunc,
-                                                        core::CStateRestoreTraverser& traverser) {
-        return xmeans_online_factory_detail::CFactory::restore(params, splitFunc, mergeFunc, traverser);
+    static inline CClusterer>*
+    restore(const SDistributionRestoreParams& params,
+            const CClustererTypes::TSplitFunc& splitFunc,
+            const CClustererTypes::TMergeFunc& mergeFunc,
+            core::CStateRestoreTraverser& traverser) {
+        return xmeans_online_factory_detail::CFactory::restore(
+            params, splitFunc, mergeFunc, traverser);
     }
 };
 }
diff --git a/include/maths/Constants.h b/include/maths/Constants.h
index 8e8c22ecf0..7c73843308 100644
--- a/include/maths/Constants.h
+++ b/include/maths/Constants.h
@@ -125,8 +125,10 @@ class MATHS_EXPORT CConstantWeights {
     }
     //! Get a single unit weight for data with \p dimension.
     template
-    static core::CSmallVector, 1> singleUnit(std::size_t dimension) {
-        return core::CSmallVector, 1>{core::CSmallVector{VECTOR(dimension, 1.0)}};
+    static core::CSmallVector, 1>
+    singleUnit(std::size_t dimension) {
+        return core::CSmallVector, 1>{
+            core::CSmallVector{VECTOR(dimension, 1.0)}};
     }
 };
 
diff --git a/include/maths/MathsTypes.h b/include/maths/MathsTypes.h
index ceda1782f7..26bf7c84f7 100644
--- a/include/maths/MathsTypes.h
+++ b/include/maths/MathsTypes.h
@@ -79,7 +79,9 @@ double count(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights);
 
 //! Extract the effective sample count from a collection of weights.
 MATHS_EXPORT
-TDouble10Vec count(std::size_t dimension, const TWeightStyleVec& weightStyles, const TDouble10Vec4Vec& weights);
+TDouble10Vec count(std::size_t dimension,
+                   const TWeightStyleVec& weightStyles,
+                   const TDouble10Vec4Vec& weights);
 
 //! Extract the effective sample count with which to update a model
 //! from a collection of weights.
@@ -89,7 +91,9 @@ double countForUpdate(const TWeightStyleVec& weightStyles, const TDouble4Vec& we
 //! Extract the effective sample count with which to update a model
 //! from a collection of weights.
 MATHS_EXPORT
-TDouble10Vec countForUpdate(std::size_t dimension, const TWeightStyleVec& weightStyles, const TDouble10Vec4Vec& weights);
+TDouble10Vec countForUpdate(std::size_t dimension,
+                            const TWeightStyleVec& weightStyles,
+                            const TDouble10Vec4Vec& weights);
 
 //! Extract the winsorisation weight from a collection of weights.
 MATHS_EXPORT
@@ -97,7 +101,8 @@ double winsorisationWeight(const TWeightStyleVec& weightStyles, const TDouble4Ve
 
 //! Extract the winsorisation weight from a collection of weights.
 MATHS_EXPORT
-TDouble10Vec winsorisationWeight(const TWeightStyleVec& weightStyles, const TDouble10Vec4Vec& weights);
+TDouble10Vec winsorisationWeight(const TWeightStyleVec& weightStyles,
+                                 const TDouble10Vec4Vec& weights);
 
 //! Extract the variance scale from a collection of weights.
 MATHS_EXPORT
@@ -105,7 +110,9 @@ double seasonalVarianceScale(const TWeightStyleVec& weightStyles, const TDouble4
 
 //! Extract the variance scale from a collection of weights.
 MATHS_EXPORT
-TDouble10Vec seasonalVarianceScale(std::size_t dimension, const TWeightStyleVec& weightStyles, const TDouble10Vec4Vec& weights);
+TDouble10Vec seasonalVarianceScale(std::size_t dimension,
+                                   const TWeightStyleVec& weightStyles,
+                                   const TDouble10Vec4Vec& weights);
 
 //! Extract the variance scale from a collection of weights.
 MATHS_EXPORT
@@ -113,7 +120,9 @@ double countVarianceScale(const TWeightStyleVec& weightStyles, const TDouble4Vec
 
 //! Extract the variance scale from a collection of weights.
 MATHS_EXPORT
-TDouble10Vec countVarianceScale(std::size_t dimension, const TWeightStyleVec& weightStyles, const TDouble10Vec4Vec& weights);
+TDouble10Vec countVarianceScale(std::size_t dimension,
+                                const TWeightStyleVec& weightStyles,
+                                const TDouble10Vec4Vec& weights);
 
 //! Check if a non-unit seasonal variance scale applies.
 MATHS_EXPORT
@@ -121,15 +130,18 @@ bool hasSeasonalVarianceScale(const TWeightStyleVec& weightStyles, const TDouble
 
 //! Check if a non-unit seasonal variance scale applies.
 MATHS_EXPORT
-bool hasSeasonalVarianceScale(const TWeightStyleVec& weightStyles, const TDouble4Vec1Vec& weights);
+bool hasSeasonalVarianceScale(const TWeightStyleVec& weightStyles,
+                              const TDouble4Vec1Vec& weights);
 
 //! Check if a non-unit seasonal variance scale applies.
 MATHS_EXPORT
-bool hasSeasonalVarianceScale(const TWeightStyleVec& weightStyles, const TDouble10Vec4Vec& weights);
+bool hasSeasonalVarianceScale(const TWeightStyleVec& weightStyles,
+                              const TDouble10Vec4Vec& weights);
 
 //! Check if a non-unit seasonal variance scale applies.
 MATHS_EXPORT
-bool hasSeasonalVarianceScale(const TWeightStyleVec& weightStyles, const TDouble10Vec4Vec1Vec& weights);
+bool hasSeasonalVarianceScale(const TWeightStyleVec& weightStyles,
+                              const TDouble10Vec4Vec1Vec& weights);
 
 //! Check if a non-unit count variance scale applies.
 MATHS_EXPORT
@@ -141,11 +153,13 @@ bool hasCountVarianceScale(const TWeightStyleVec& weightStyles, const TDouble4Ve
 
 //! Check if a non-unit seasonal variance scale applies.
 MATHS_EXPORT
-bool hasCountVarianceScale(const TWeightStyleVec& weightStyles, const TDouble10Vec4Vec& weights);
+bool hasCountVarianceScale(const TWeightStyleVec& weightStyles,
+                           const TDouble10Vec4Vec& weights);
 
 //! Check if a non-unit seasonal variance scale applies.
 MATHS_EXPORT
-bool hasCountVarianceScale(const TWeightStyleVec& weightStyles, const TDouble10Vec4Vec1Vec& weights);
+bool hasCountVarianceScale(const TWeightStyleVec& weightStyles,
+                           const TDouble10Vec4Vec1Vec& weights);
 
 //! Set \p style to weight or append if it isn't in \p weightStyles.
 MATHS_EXPORT
@@ -153,7 +167,11 @@ void setWeight(ESampleWeightStyle style, double weight, TWeightStyleVec& weightS
 
 //! Set \p style to weight or append if it isn't in \p weightStyles.
 MATHS_EXPORT
-void setWeight(ESampleWeightStyle style, double weight, std::size_t dimension, TWeightStyleVec& weightStyles, TDouble10Vec4Vec& weights);
+void setWeight(ESampleWeightStyle style,
+               double weight,
+               std::size_t dimension,
+               TWeightStyleVec& weightStyles,
+               TDouble10Vec4Vec& weights);
 
 //! Enumerates the possible probability of less likely sample calculations.
 //!
@@ -183,7 +201,12 @@ enum EClusterWeightCalc { E_ClustersEqualWeight, E_ClustersFractionWeight };
 //! calculations. These provide finer grained information than
 //! a pass/fail boolean which can be used to take appropriate
 //! action in the calling context.
-enum EFloatingPointErrorStatus { E_FpNoErrors = 0x0, E_FpOverflowed = 0x1, E_FpFailed = 0x2, E_FpAllErrors = 0x3 };
+enum EFloatingPointErrorStatus {
+    E_FpNoErrors = 0x0,
+    E_FpOverflowed = 0x1,
+    E_FpFailed = 0x2,
+    E_FpAllErrors = 0x3
+};
 
 //! Enumerates the cases that a collection of samples is either in
 //! the left tail, right tail or a mixture or neither of the tails
@@ -197,7 +220,12 @@ enum EFloatingPointErrorStatus { E_FpNoErrors = 0x0, E_FpOverflowed = 0x1, E_FpF
 //!   -# Mixed or neither is used to denote the case that some are
 //!      to left, some to the right and/or some are between the left
 //!      and rightmost modes.
-enum ETail { E_UndeterminedTail = 0x0, E_LeftTail = 0x1, E_RightTail = 0x2, E_MixedOrNeitherTail = 0x3 };
+enum ETail {
+    E_UndeterminedTail = 0x0,
+    E_LeftTail = 0x1,
+    E_RightTail = 0x2,
+    E_MixedOrNeitherTail = 0x3
+};
 }
 }
 
diff --git a/include/maths/ProbabilityAggregators.h b/include/maths/ProbabilityAggregators.h
index 5142c9fb25..0a91d17da0 100644
--- a/include/maths/ProbabilityAggregators.h
+++ b/include/maths/ProbabilityAggregators.h
@@ -43,14 +43,17 @@ namespace maths {
 //! probability of a collection of samples which are sampled where
 //! each sample only appears with some specified frequency. The weights
 //! must be non-negative.
-class MATHS_EXPORT CJointProbabilityOfLessLikelySamples : private boost::addable {
+class MATHS_EXPORT CJointProbabilityOfLessLikelySamples
+    : private boost::addable {
 public:
     using TOptionalDouble = boost::optional;
 
     //! Functor wrapper of CJointProbabilityOfLessLikelySamples::add.
     struct SAddProbability {
         CJointProbabilityOfLessLikelySamples&
-        operator()(CJointProbabilityOfLessLikelySamples& jointProbability, double probability, double weight = 1.0) const;
+        operator()(CJointProbabilityOfLessLikelySamples& jointProbability,
+                   double probability,
+                   double weight = 1.0) const;
     };
 
 public:
@@ -63,7 +66,8 @@ class MATHS_EXPORT CJointProbabilityOfLessLikelySamples : private boost::addable
     std::string toDelimited() const;
 
     //! Combine two joint probability calculators.
-    const CJointProbabilityOfLessLikelySamples& operator+=(const CJointProbabilityOfLessLikelySamples& other);
+    const CJointProbabilityOfLessLikelySamples&
+    operator+=(const CJointProbabilityOfLessLikelySamples& other);
 
     //! Add \p probability.
     void add(double probability, double weight = 1.0);
@@ -98,7 +102,8 @@ class MATHS_EXPORT CJointProbabilityOfLessLikelySamples : private boost::addable
 };
 
 MATHS_EXPORT
-std::ostream& operator<<(std::ostream& o, const CJointProbabilityOfLessLikelySamples& probability);
+std::ostream& operator<<(std::ostream& o,
+                         const CJointProbabilityOfLessLikelySamples& probability);
 
 //! \brief Computes log of the joint probability of seeing a more
 //! extreme collection of samples.
@@ -126,13 +131,15 @@ std::ostream& operator<<(std::ostream& o, const CJointProbabilityOfLessLikelySam
 //! joint probabilities, which should respect the error in the bounds.
 //! For example, two probabilities should be treated as equal if the
 //! intervals defined by their upper and lower bounds intersect.
-class MATHS_EXPORT CLogJointProbabilityOfLessLikelySamples : protected CJointProbabilityOfLessLikelySamples,
-                                                             private boost::addable {
+class MATHS_EXPORT CLogJointProbabilityOfLessLikelySamples
+    : protected CJointProbabilityOfLessLikelySamples,
+      private boost::addable {
 public:
     CLogJointProbabilityOfLessLikelySamples();
 
     //! Combine two log joint probability calculators.
-    const CLogJointProbabilityOfLessLikelySamples& operator+=(const CLogJointProbabilityOfLessLikelySamples& other);
+    const CLogJointProbabilityOfLessLikelySamples&
+    operator+=(const CLogJointProbabilityOfLessLikelySamples& other);
 
     //! Add \p probability.
     void add(double probability, double weight = 1.0);
@@ -183,7 +190,8 @@ class MATHS_EXPORT CLogJointProbabilityOfLessLikelySamples : protected CJointPro
 //! 
//! //! where we have used the fact that \f$(1 - F(x)) = p / 2\f$. -class MATHS_EXPORT CProbabilityOfExtremeSample : private boost::addable { +class MATHS_EXPORT CProbabilityOfExtremeSample + : private boost::addable { public: CProbabilityOfExtremeSample(); @@ -266,7 +274,8 @@ std::ostream& operator<<(std::ostream& o, const CProbabilityOfExtremeSample& pro //! The integral representing \f$P(R)\f$ can be evaluated in order \f$M^2\f$ //! as a polynomial in the individual probabilities \f$\{p_1, ..., p_M\}\f$ //! with recurrence relations used to compute the coefficients. -class MATHS_EXPORT CLogProbabilityOfMFromNExtremeSamples : private boost::addable { +class MATHS_EXPORT CLogProbabilityOfMFromNExtremeSamples + : private boost::addable { public: CLogProbabilityOfMFromNExtremeSamples(std::size_t m); @@ -277,7 +286,8 @@ class MATHS_EXPORT CLogProbabilityOfMFromNExtremeSamples : private boost::addabl std::string toDelimited() const; //! Combine two extreme probability calculators. - const CLogProbabilityOfMFromNExtremeSamples& operator+=(const CLogProbabilityOfMFromNExtremeSamples& other); + const CLogProbabilityOfMFromNExtremeSamples& + operator+=(const CLogProbabilityOfMFromNExtremeSamples& other); //! Add \p probability. void add(double probability); diff --git a/include/model/CAnnotatedProbability.h b/include/model/CAnnotatedProbability.h index 96c19b06af..3cd06722ae 100644 --- a/include/model/CAnnotatedProbability.h +++ b/include/model/CAnnotatedProbability.h @@ -48,7 +48,8 @@ struct MODEL_EXPORT SAttributeProbability { using TDouble1Vec = core::CSmallVector; using TSizeDoublePr = std::pair; using TSizeDoublePr1Vec = core::CSmallVector; - using TDescriptiveDataDoublePr = std::pair; + using TDescriptiveDataDoublePr = + std::pair; using TDescriptiveDataDoublePr2Vec = core::CSmallVector; using TStoredStringPtr1Vec = core::CSmallVector; @@ -105,9 +106,12 @@ struct MODEL_EXPORT SAttributeProbability { //! and so on. struct MODEL_EXPORT SAnnotatedProbability { using TAttributeProbability1Vec = core::CSmallVector; - using TStoredStringPtrStoredStringPtrPr = std::pair; - using TStoredStringPtrStoredStringPtrPrDoublePr = std::pair; - using TStoredStringPtrStoredStringPtrPrDoublePrVec = std::vector; + using TStoredStringPtrStoredStringPtrPr = + std::pair; + using TStoredStringPtrStoredStringPtrPrDoublePr = + std::pair; + using TStoredStringPtrStoredStringPtrPrDoublePrVec = + std::vector; using TDescriptiveDataDoublePr = SAttributeProbability::TDescriptiveDataDoublePr; using TDescriptiveDataDoublePr2Vec = SAttributeProbability::TDescriptiveDataDoublePr2Vec; using TOptionalDouble = boost::optional; diff --git a/include/model/CAnnotatedProbabilityBuilder.h b/include/model/CAnnotatedProbabilityBuilder.h index aaf49679b2..dcb1865ed6 100644 --- a/include/model/CAnnotatedProbabilityBuilder.h +++ b/include/model/CAnnotatedProbabilityBuilder.h @@ -61,7 +61,9 @@ class MODEL_EXPORT CAnnotatedProbabilityBuilder : private core::CNonCopyable { void build(); private: - void addAttributeDescriptiveData(std::size_t cid, double pAttribute, SAttributeProbability& attributeProbability); + void addAttributeDescriptiveData(std::size_t cid, + double pAttribute, + SAttributeProbability& attributeProbability); void addDescriptiveData(); diff --git a/include/model/CAnomalyDetector.h b/include/model/CAnomalyDetector.h index 71d34a616e..63aa9b31bb 100644 --- a/include/model/CAnomalyDetector.h +++ b/include/model/CAnomalyDetector.h @@ -151,7 +151,8 @@ class MODEL_EXPORT CAnomalyDetector : private core::CNonCopyable { void zeroModelsToTime(core_t::TTime time); //! Populate the object from a state document - bool acceptRestoreTraverser(const std::string& partitionFieldValue, core::CStateRestoreTraverser& traverser); + bool acceptRestoreTraverser(const std::string& partitionFieldValue, + core::CStateRestoreTraverser& traverser); //! Restore state for statics - this is only called from the //! simple count detector to ensure singleton behaviour @@ -162,13 +163,15 @@ class MODEL_EXPORT CAnomalyDetector : private core::CNonCopyable { //! \note This is static so it can be called before the state is fully //! deserialised, because we need this value before to restoring the //! detector. - static bool partitionFieldAcceptRestoreTraverser(core::CStateRestoreTraverser& traverser, std::string& partitionFieldValue); + static bool partitionFieldAcceptRestoreTraverser(core::CStateRestoreTraverser& traverser, + std::string& partitionFieldValue); //! Find the detector keys given part of an state document. //! //! \note This is static so it can be called before the state is fully //! deserialised, because we need these before to restoring the detector. - static bool keyAcceptRestoreTraverser(core::CStateRestoreTraverser& traverser, CSearchKey& key); + static bool keyAcceptRestoreTraverser(core::CStateRestoreTraverser& traverser, + CSearchKey& key); //! Persist the detector keys separately to the rest of the state. //! This must be done for a 100% streaming state restoration because @@ -211,10 +214,14 @@ class MODEL_EXPORT CAnomalyDetector : private core::CNonCopyable { void addRecord(core_t::TTime time, const TStrCPtrVec& fieldValues); //! Update the results with this detector model's results. - void buildResults(core_t::TTime bucketStartTime, core_t::TTime bucketEndTime, CHierarchicalResults& results); + void buildResults(core_t::TTime bucketStartTime, + core_t::TTime bucketEndTime, + CHierarchicalResults& results); //! Update the results with this detector model's results. - void buildInterimResults(core_t::TTime bucketStartTime, core_t::TTime bucketEndTime, CHierarchicalResults& results); + void buildInterimResults(core_t::TTime bucketStartTime, + core_t::TTime bucketEndTime, + CHierarchicalResults& results); //! Generate the model plot data for the time series identified //! by \p terms. @@ -305,11 +312,14 @@ class MODEL_EXPORT CAnomalyDetector : private core::CNonCopyable { //! Sample bucket statistics and any other state needed to compute //! probabilities in the interval [\p startTime, \p endTime], but //! does not update the model. - void sampleBucketStatistics(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor); + void sampleBucketStatistics(core_t::TTime startTime, + core_t::TTime endTime, + CResourceMonitor& resourceMonitor); //! Restores the state that was formerly part of the model ensemble class. //! This includes the data gatherer and the model. - bool legacyModelEnsembleAcceptRestoreTraverser(const std::string& partitionFieldValue, core::CStateRestoreTraverser& traverser); + bool legacyModelEnsembleAcceptRestoreTraverser(const std::string& partitionFieldValue, + core::CStateRestoreTraverser& traverser); //! Restores the state that was formerly part of the live models //! in the model ensemble class. diff --git a/include/model/CAnomalyDetectorModel.h b/include/model/CAnomalyDetectorModel.h index 15bc4cb7e6..19da0371f9 100644 --- a/include/model/CAnomalyDetectorModel.h +++ b/include/model/CAnomalyDetectorModel.h @@ -152,9 +152,11 @@ class MODEL_EXPORT CAnomalyDetectorModel : private core::CNonCopyable { using TOptionalSize = boost::optional; using TAttributeProbability1Vec = core::CSmallVector; using TInfluenceCalculatorCPtr = boost::shared_ptr; - using TFeatureInfluenceCalculatorCPtrPr = std::pair; + using TFeatureInfluenceCalculatorCPtrPr = + std::pair; using TFeatureInfluenceCalculatorCPtrPrVec = std::vector; - using TFeatureInfluenceCalculatorCPtrPrVecVec = std::vector; + using TFeatureInfluenceCalculatorCPtrPrVecVec = + std::vector; using TMultivariatePriorPtr = boost::shared_ptr; using TFeatureMultivariatePriorPtrPr = std::pair; using TFeatureMultivariatePriorPtrPrVec = std::vector; @@ -249,7 +251,10 @@ class MODEL_EXPORT CAnomalyDetectorModel : private core::CNonCopyable { //! \param[in] time The time of interest. //! \return The value of \p feature in the bucket containing //! \p time if available and empty otherwise. - virtual TDouble1Vec currentBucketValue(model_t::EFeature feature, std::size_t pid, std::size_t cid, core_t::TTime time) const = 0; + virtual TDouble1Vec currentBucketValue(model_t::EFeature feature, + std::size_t pid, + std::size_t cid, + core_t::TTime time) const = 0; //! Get the appropriate baseline bucket value of \p feature for //! the person identified by \p pid and the attribute identified @@ -289,7 +294,8 @@ class MODEL_EXPORT CAnomalyDetectorModel : private core::CNonCopyable { //! Print the people identified by \p pids. //! Optionally, this may be limited to return a string of the form: //! A B C and n others - std::string printPeople(const TSizeVec& pids, size_t limit = std::numeric_limits::max()) const; + std::string printPeople(const TSizeVec& pids, + size_t limit = std::numeric_limits::max()) const; //! Get the person unique identifiers which have a feature value //! in the bucketing time interval including \p time. @@ -320,7 +326,8 @@ class MODEL_EXPORT CAnomalyDetectorModel : private core::CNonCopyable { //! Print the attributes identified by \p cids. //! Optionally, this may be limited to return a string of the form: //! A B C and n others - std::string printAttributes(const TSizeVec& cids, size_t limit = std::numeric_limits::max()) const; + std::string printAttributes(const TSizeVec& cids, + size_t limit = std::numeric_limits::max()) const; //@} //! \name Update @@ -332,7 +339,9 @@ class MODEL_EXPORT CAnomalyDetectorModel : private core::CNonCopyable { //! //! \param[in] startTime The start of the time interval to sample. //! \param[in] endTime The end of the time interval to sample. - virtual void sampleBucketStatistics(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) = 0; + virtual void sampleBucketStatistics(core_t::TTime startTime, + core_t::TTime endTime, + CResourceMonitor& resourceMonitor) = 0; //! Update the model with the samples of the process in the //! time interval [\p startTime, \p endTime]. @@ -340,7 +349,9 @@ class MODEL_EXPORT CAnomalyDetectorModel : private core::CNonCopyable { //! \param[in] startTime The start of the time interval to sample. //! \param[in] endTime The end of the time interval to sample. //! \param[in] resourceMonitor The resourceMonitor. - virtual void sample(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) = 0; + virtual void sample(core_t::TTime startTime, + core_t::TTime endTime, + CResourceMonitor& resourceMonitor) = 0; //! This samples the bucket statistics, and any state needed //! by computeProbablity, in the time interval [\p startTime, @@ -349,7 +360,9 @@ class MODEL_EXPORT CAnomalyDetectorModel : private core::CNonCopyable { //! //! \param[in] startTime The start of the time interval to sample. //! \param[in] endTime The end of the time interval to sample. - virtual void sampleOutOfPhase(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) = 0; + virtual void sampleOutOfPhase(core_t::TTime startTime, + core_t::TTime endTime, + CResourceMonitor& resourceMonitor) = 0; //! Rolls time to \p endTime while skipping sampling the models for //! buckets within the gap. @@ -421,10 +434,11 @@ class MODEL_EXPORT CAnomalyDetectorModel : private core::CNonCopyable { //! \param[out] attributeProbabilities Filled in with the smallest //! \p numberAttributeProbabilities attribute probabilities and //! associated data describing the calculation. - virtual bool computeTotalProbability(const std::string& person, - std::size_t numberAttributeProbabilities, - TOptionalDouble& probability, - TAttributeProbability1Vec& attributeProbabilities) const = 0; + virtual bool + computeTotalProbability(const std::string& person, + std::size_t numberAttributeProbabilities, + TOptionalDouble& probability, + TAttributeProbability1Vec& attributeProbabilities) const = 0; //@} //! Get the checksum of this model. @@ -444,13 +458,16 @@ class MODEL_EXPORT CAnomalyDetectorModel : private core::CNonCopyable { //! Estimate the memory usage of the model based on number of people, //! attributes and correlations. Returns empty when the estimator //! is unable to produce an estimate. - TOptionalSize estimateMemoryUsage(std::size_t numberPeople, std::size_t numberAttributes, std::size_t numberCorrelations) const; + TOptionalSize estimateMemoryUsage(std::size_t numberPeople, + std::size_t numberAttributes, + std::size_t numberCorrelations) const; //! Estimate the memory usage of the model based on number of people, //! attributes and correlations. When an estimate cannot be produced, //! the memory usage is computed and the estimator is updated. - std::size_t - estimateMemoryUsageOrComputeAndUpdate(std::size_t numberPeople, std::size_t numberAttributes, std::size_t numberCorrelations); + std::size_t estimateMemoryUsageOrComputeAndUpdate(std::size_t numberPeople, + std::size_t numberAttributes, + std::size_t numberCorrelations); //! Get the static size of this object - used for virtual hierarchies virtual std::size_t staticSize() const = 0; @@ -488,7 +505,8 @@ class MODEL_EXPORT CAnomalyDetectorModel : private core::CNonCopyable { SFeatureModels(model_t::EFeature feature, TMathsModelPtr newModel); //! Restore the models reading state from \p traverser. - bool acceptRestoreTraverser(const SModelParams& params, core::CStateRestoreTraverser& traverser); + bool acceptRestoreTraverser(const SModelParams& params, + core::CStateRestoreTraverser& traverser); //! Persist the models passing state to \p inserter. void acceptPersistInserter(core::CStatePersistInserter& inserter) const; @@ -508,10 +526,13 @@ class MODEL_EXPORT CAnomalyDetectorModel : private core::CNonCopyable { //! \brief The feature correlate models. struct MODEL_EXPORT SFeatureCorrelateModels { - SFeatureCorrelateModels(model_t::EFeature feature, TMultivariatePriorPtr modelPrior, TCorrelationsPtr model); + SFeatureCorrelateModels(model_t::EFeature feature, + TMultivariatePriorPtr modelPrior, + TCorrelationsPtr model); //! Restore the models reading state from \p traverser. - bool acceptRestoreTraverser(const SModelParams& params, core::CStateRestoreTraverser& traverser); + bool acceptRestoreTraverser(const SModelParams& params, + core::CStateRestoreTraverser& traverser); //! Persist the models passing state to \p inserter. void acceptPersistInserter(core::CStatePersistInserter& inserter) const; @@ -582,12 +603,16 @@ class MODEL_EXPORT CAnomalyDetectorModel : private core::CNonCopyable { protected: //! Remove heavy hitting people from the \p data if necessary. template - void applyFilter(model_t::EExcludeFrequent exclude, bool updateStatistics, const FILTER& filter, T& data) const { + void applyFilter(model_t::EExcludeFrequent exclude, + bool updateStatistics, + const FILTER& filter, + T& data) const { if (this->params().s_ExcludeFrequent & exclude) { std::size_t initialSize = data.size(); data.erase(std::remove_if(data.begin(), data.end(), filter), data.end()); if (updateStatistics && data.size() != initialSize) { - core::CStatistics::stat(stat_t::E_NumberExcludedFrequentInvocations).increment(1); + core::CStatistics::stat(stat_t::E_NumberExcludedFrequentInvocations) + .increment(1); } } } @@ -613,7 +638,8 @@ class MODEL_EXPORT CAnomalyDetectorModel : private core::CNonCopyable { //! Get the influence calculator for the influencer field identified //! by \p iid and the \p feature. - const CInfluenceCalculator* influenceCalculator(model_t::EFeature feature, std::size_t iid) const; + const CInfluenceCalculator* influenceCalculator(model_t::EFeature feature, + std::size_t iid) const; //! Get the person bucket counts. const TDoubleVec& personBucketCounts() const; @@ -639,7 +665,10 @@ class MODEL_EXPORT CAnomalyDetectorModel : private core::CNonCopyable { const CInterimBucketCorrector& interimValueCorrector() const; //! Check if any of the sample-filtering detection rules apply to this series. - bool shouldIgnoreSample(model_t::EFeature feature, std::size_t pid, std::size_t cid, core_t::TTime time) const; + bool shouldIgnoreSample(model_t::EFeature feature, + std::size_t pid, + std::size_t cid, + core_t::TTime time) const; //! Check if any of the result-filtering detection rules apply to this series. bool shouldIgnoreResult(model_t::EFeature feature, @@ -655,7 +684,8 @@ class MODEL_EXPORT CAnomalyDetectorModel : private core::CNonCopyable { bool interimBucketCorrectorAcceptRestoreTraverser(core::CStateRestoreTraverser& traverser); //! Persist the interim bucket corrector. - void interimBucketCorrectorAcceptPersistInserter(const std::string& tag, core::CStatePersistInserter& inserter) const; + void interimBucketCorrectorAcceptPersistInserter(const std::string& tag, + core::CStatePersistInserter& inserter) const; //! Create a stub version of maths::CModel for use when pruning people //! or attributes to free memory resource. diff --git a/include/model/CAnomalyDetectorModelConfig.h b/include/model/CAnomalyDetectorModelConfig.h index e06eaed75c..a7c94fbc5b 100644 --- a/include/model/CAnomalyDetectorModelConfig.h +++ b/include/model/CAnomalyDetectorModelConfig.h @@ -236,25 +236,23 @@ class MODEL_EXPORT CAnomalyDetectorModelConfig { //! correlated 'by' fields be performed? //! \param[in] multipleBucketLengths If specified, set multiple bucket //! lengths to be analysed (CSV string of time values) - static CAnomalyDetectorModelConfig defaultConfig(core_t::TTime bucketLength, - model_t::ESummaryMode summaryMode, - const std::string& summaryCountFieldName, - core_t::TTime latency, - std::size_t bucketResultsDelay, - bool multivariateByFields, - const std::string& multipleBucketLengths); + static CAnomalyDetectorModelConfig + defaultConfig(core_t::TTime bucketLength, + model_t::ESummaryMode summaryMode, + const std::string& summaryCountFieldName, + core_t::TTime latency, + std::size_t bucketResultsDelay, + bool multivariateByFields, + const std::string& multipleBucketLengths); //! Overload using defaults. - static CAnomalyDetectorModelConfig defaultConfig(core_t::TTime bucketLength = DEFAULT_BUCKET_LENGTH, - model_t::ESummaryMode summaryMode = model_t::E_None, - const std::string& summaryCountFieldName = "") { - return defaultConfig(bucketLength, - summaryMode, - summaryCountFieldName, + static CAnomalyDetectorModelConfig + defaultConfig(core_t::TTime bucketLength = DEFAULT_BUCKET_LENGTH, + model_t::ESummaryMode summaryMode = model_t::E_None, + const std::string& summaryCountFieldName = "") { + return defaultConfig(bucketLength, summaryMode, summaryCountFieldName, DEFAULT_LATENCY_BUCKETS * bucketLength, - DEFAULT_BUCKET_RESULTS_DELAY, - false, - ""); + DEFAULT_BUCKET_RESULTS_DELAY, false, ""); } //! Get the factor to normalize all bucket lengths to the default @@ -267,7 +265,8 @@ class MODEL_EXPORT CAnomalyDetectorModelConfig { //! Parse and verify the multiple bucket lengths - these should all be //! multiples of the standard bucket length. - static TTimeVec multipleBucketLengths(core_t::TTime bucketLength, const std::string& multipleBucketLengths); + static TTimeVec multipleBucketLengths(core_t::TTime bucketLength, + const std::string& multipleBucketLengths); public: CAnomalyDetectorModelConfig(); @@ -283,7 +282,9 @@ class MODEL_EXPORT CAnomalyDetectorModelConfig { //! Set the model factories. void factories(const TFactoryTypeFactoryPtrMap& factories); //! Set the style and parameter value for raw score aggregation. - bool aggregationStyleParams(model_t::EAggregationStyle style, model_t::EAggregationParam param, double value); + bool aggregationStyleParams(model_t::EAggregationStyle style, + model_t::EAggregationParam param, + double value); //! Set the maximum anomalous probability. void maximumAnomalousProbability(double probability); //! Set the noise level as a percentile of historic raw anomaly scores. @@ -331,15 +332,17 @@ class MODEL_EXPORT CAnomalyDetectorModelConfig { //! \param[in] attributeFieldName The name of the by field. //! \param[in] valueFieldName The name of the field containing metric values. //! \param[in] influenceFieldNames The list of influence field names. - TModelFactoryCPtr factory(int identifier, - function_t::EFunction function, - bool useNull = false, - model_t::EExcludeFrequent excludeFrequent = model_t::E_XF_None, - const std::string& partitionFieldName = std::string(), - const std::string& personFieldName = std::string(), - const std::string& attributeFieldName = std::string(), - const std::string& valueFieldName = std::string(), - const CSearchKey::TStoredStringPtrVec& influenceFieldNames = CSearchKey::TStoredStringPtrVec()) const; + TModelFactoryCPtr + factory(int identifier, + function_t::EFunction function, + bool useNull = false, + model_t::EExcludeFrequent excludeFrequent = model_t::E_XF_None, + const std::string& partitionFieldName = std::string(), + const std::string& personFieldName = std::string(), + const std::string& attributeFieldName = std::string(), + const std::string& valueFieldName = std::string(), + const CSearchKey::TStoredStringPtrVec& influenceFieldNames = + CSearchKey::TStoredStringPtrVec()) const; //! Set the rate at which the models lose information. void decayRate(double value); @@ -393,7 +396,8 @@ class MODEL_EXPORT CAnomalyDetectorModelConfig { //@{ //! Get the value of the aggregation style parameter identified by //! \p style and \p param. - double aggregationStyleParam(model_t::EAggregationStyle style, model_t::EAggregationParam param) const; + double aggregationStyleParam(model_t::EAggregationStyle style, + model_t::EAggregationParam param) const; //! Get the maximum anomalous probability. double maximumAnomalousProbability() const; diff --git a/include/model/CAnomalyScore.h b/include/model/CAnomalyScore.h index 782d153117..659888d078 100644 --- a/include/model/CAnomalyScore.h +++ b/include/model/CAnomalyScore.h @@ -76,7 +76,9 @@ class MODEL_EXPORT CAnomalyScore { double maximumAnomalousProbability); //! Compute the overall anomaly score and aggregate probability. - bool operator()(const TDoubleVec& probabilities, double& overallAnomalyScore, double& overallProbability) const; + bool operator()(const TDoubleVec& probabilities, + double& overallAnomalyScore, + double& overallProbability) const; private: //! The weight to assign the joint probability. @@ -167,7 +169,8 @@ class MODEL_EXPORT CAnomalyScore { using TDoubleDoublePrVec = std::vector; using TDoubleDoublePrVecCItr = TDoubleDoublePrVec::const_iterator; using TGreaterDouble = std::greater; - using TMaxValueAccumulator = maths::CBasicStatistics::COrderStatisticsStack; + using TMaxValueAccumulator = + maths::CBasicStatistics::COrderStatisticsStack; private: //! Used to convert raw scores in to integers so that we @@ -274,7 +277,8 @@ class MODEL_EXPORT CAnomalyScore { static bool normalizerFromJson(const std::string& json, CNormalizer& normalizer); //! Populate \p normalizer from the restore traverser - static bool normalizerFromJson(core::CStateRestoreTraverser& traverser, CNormalizer& normalizer); + static bool normalizerFromJson(core::CStateRestoreTraverser& traverser, + CNormalizer& normalizer); //! Convert \p normalizer to its JSON representation with a restoration //! cue and description specified by the caller diff --git a/include/model/CBucketGatherer.h b/include/model/CBucketGatherer.h index 1907aab1b1..4c57ee2591 100644 --- a/include/model/CBucketGatherer.h +++ b/include/model/CBucketGatherer.h @@ -95,7 +95,9 @@ class MODEL_EXPORT CBucketGatherer { //! \brief Hashes a ((size_t, size_t), string*) pair. struct MODEL_EXPORT SSizeSizePrStoredStringPtrPrHash { std::size_t operator()(const TSizeSizePrStoredStringPtrPr& key) const { - uint64_t seed = core::CHashing::hashCombine(static_cast(key.first.first), static_cast(key.first.second)); + uint64_t seed = core::CHashing::hashCombine( + static_cast(key.first.first), + static_cast(key.first.second)); return core::CHashing::hashCombine(seed, s_Hasher(*key.second)); } core::CHashing::CMurmurHash2String s_Hasher; @@ -103,19 +105,26 @@ class MODEL_EXPORT CBucketGatherer { //! \brief Checks two ((size_t, size_t), string*) pairs for equality. struct MODEL_EXPORT SSizeSizePrStoredStringPtrPrEqual { - bool operator()(const TSizeSizePrStoredStringPtrPr& lhs, const TSizeSizePrStoredStringPtrPr& rhs) const { + bool operator()(const TSizeSizePrStoredStringPtrPr& lhs, + const TSizeSizePrStoredStringPtrPr& rhs) const { return lhs.first == rhs.first && *lhs.second == *rhs.second; } }; using TSizeSizePrStoredStringPtrPrUInt64UMap = boost::unordered_map; - using TSizeSizePrStoredStringPtrPrUInt64UMapCItr = TSizeSizePrStoredStringPtrPrUInt64UMap::const_iterator; - using TSizeSizePrStoredStringPtrPrUInt64UMapItr = TSizeSizePrStoredStringPtrPrUInt64UMap::iterator; - using TSizeSizePrStoredStringPtrPrUInt64UMapVec = std::vector; - using TSizeSizePrStoredStringPtrPrUInt64UMapVecQueue = CBucketQueue; - using TSizeSizePrStoredStringPtrPrUInt64UMapVecCItr = TSizeSizePrStoredStringPtrPrUInt64UMapVec::const_iterator; - using TTimeSizeSizePrStoredStringPtrPrUInt64UMapVecMap = std::map; + using TSizeSizePrStoredStringPtrPrUInt64UMapCItr = + TSizeSizePrStoredStringPtrPrUInt64UMap::const_iterator; + using TSizeSizePrStoredStringPtrPrUInt64UMapItr = + TSizeSizePrStoredStringPtrPrUInt64UMap::iterator; + using TSizeSizePrStoredStringPtrPrUInt64UMapVec = + std::vector; + using TSizeSizePrStoredStringPtrPrUInt64UMapVecQueue = + CBucketQueue; + using TSizeSizePrStoredStringPtrPrUInt64UMapVecCItr = + TSizeSizePrStoredStringPtrPrUInt64UMapVec::const_iterator; + using TTimeSizeSizePrStoredStringPtrPrUInt64UMapVecMap = + std::map; using TSearchKeyCRef = boost::reference_wrapper; using TFeatureAnyPr = std::pair; using TFeatureAnyPrVec = std::vector; @@ -204,7 +213,9 @@ class MODEL_EXPORT CBucketGatherer { //! //! This adds people and attributes as necessary and fills out the //! event data from \p fieldValues. - virtual bool processFields(const TStrCPtrVec& fieldValues, CEventData& result, CResourceMonitor& resourceMonitor) = 0; + virtual bool processFields(const TStrCPtrVec& fieldValues, + CEventData& result, + CResourceMonitor& resourceMonitor) = 0; //! Record the arrival of \p data at \p time. bool addEventData(CEventData& data); @@ -345,12 +356,15 @@ class MODEL_EXPORT CBucketGatherer { //! \tparam T This must be a vector of associative array from person //! id and/or attribute id to some corresponding value. template - static void remove(const TSizeVec& toRemove, const F& extractId, CBucketQueue>& queue) { + static void remove(const TSizeVec& toRemove, + const F& extractId, + CBucketQueue>& queue) { for (auto bucketItr = queue.begin(); bucketItr != queue.end(); ++bucketItr) { for (std::size_t i = 0u; i < bucketItr->size(); ++i) { T& bucket = (*bucketItr)[i]; for (auto j = bucket.begin(); j != bucket.end(); /**/) { - if (std::binary_search(toRemove.begin(), toRemove.end(), extractId(j->first))) { + if (std::binary_search(toRemove.begin(), toRemove.end(), + extractId(j->first))) { j = bucket.erase(j); } else { ++j; @@ -365,7 +379,9 @@ class MODEL_EXPORT CBucketGatherer { //! //! \param[in] time The time of interest. //! \param[out] result Filled in with the feature data at \p time. - virtual void featureData(core_t::TTime time, core_t::TTime bucketLength, TFeatureAnyPrVec& result) const = 0; + virtual void featureData(core_t::TTime time, + core_t::TTime bucketLength, + TFeatureAnyPrVec& result) const = 0; //! Get a reference to the owning data gatherer. const CDataGatherer& dataGatherer() const; diff --git a/include/model/CBucketQueue.h b/include/model/CBucketQueue.h index dea8211c53..877bd78311 100644 --- a/include/model/CBucketQueue.h +++ b/include/model/CBucketQueue.h @@ -46,9 +46,12 @@ class CBucketQueue { template class CSerializer { public: - CSerializer(const T& initial = T(), const F& serializer = F()) : m_InitialValue(initial), m_Serializer(serializer) {} + CSerializer(const T& initial = T(), const F& serializer = F()) + : m_InitialValue(initial), m_Serializer(serializer) {} - void operator()(const CBucketQueue& queue, core::CStatePersistInserter& inserter) const { queue.persist(m_Serializer, inserter); } + void operator()(const CBucketQueue& queue, core::CStatePersistInserter& inserter) const { + queue.persist(m_Serializer, inserter); + } bool operator()(CBucketQueue& queue, core::CStateRestoreTraverser& traverser) const { return queue.restore(m_Serializer, m_InitialValue, traverser); @@ -68,8 +71,13 @@ class CBucketQueue { //! the latency window. //! \param[in] bucketLength The bucket length. //! \param[in] latestBucketStart The start time of the latest bucket. - CBucketQueue(std::size_t latencyBuckets, core_t::TTime bucketLength, core_t::TTime latestBucketStart, T initial = T()) - : m_Queue(latencyBuckets + 1), m_LatestBucketEnd(latestBucketStart + bucketLength - 1), m_BucketLength(bucketLength) { + CBucketQueue(std::size_t latencyBuckets, + core_t::TTime bucketLength, + core_t::TTime latestBucketStart, + T initial = T()) + : m_Queue(latencyBuckets + 1), + m_LatestBucketEnd(latestBucketStart + bucketLength - 1), + m_BucketLength(bucketLength) { this->fill(initial); LOG_TRACE(<< "Queue created :"); LOG_TRACE(<< "Bucket length = " << m_BucketLength); @@ -84,7 +92,8 @@ class CBucketQueue { //! \param[in] time The time to which the item corresponds. void push(const T& item, core_t::TTime time) { if (time <= m_LatestBucketEnd) { - LOG_ERROR(<< "Push was called with early time = " << time << ", latest bucket end time = " << m_LatestBucketEnd); + LOG_ERROR(<< "Push was called with early time = " << time + << ", latest bucket end time = " << m_LatestBucketEnd); return; } m_LatestBucketEnd += m_BucketLength; @@ -104,7 +113,9 @@ class CBucketQueue { //! Returns the item in the queue that corresponds to the bucket //! indicated by \p time. - const T& get(core_t::TTime time) const { return m_Queue[this->index(time)]; } + const T& get(core_t::TTime time) const { + return m_Queue[this->index(time)]; + } //! Returns the size of the queue. std::size_t size() const { return m_Queue.size(); } @@ -160,10 +171,14 @@ class CBucketQueue { } //! Get the memory used by this component. - std::size_t memoryUsage() const { return core::CMemory::dynamicSize(m_Queue); } + std::size_t memoryUsage() const { + return core::CMemory::dynamicSize(m_Queue); + } //! Prints the contents of the queue. - std::string print() const { return core::CContainerPrinter::print(m_Queue); } + std::string print() const { + return core::CContainerPrinter::print(m_Queue); + } //! Return the configured bucketlength of this queue core_t::TTime bucketLength() const { return m_BucketLength; } @@ -189,7 +204,8 @@ class CBucketQueue { } } else if (traverser.name() == BUCKET_TAG) { if (i >= m_Queue.size()) { - LOG_WARN(<< "Bucket queue is smaller on restore than on persist: " << i << " >= " << m_Queue.size() + LOG_WARN(<< "Bucket queue is smaller on restore than on persist: " << i + << " >= " << m_Queue.size() << ". Extra buckets will be ignored."); // Restore into a temporary T dummy; @@ -213,7 +229,8 @@ class CBucketQueue { void persist(F bucketPersist, core::CStatePersistInserter& inserter) const { for (std::size_t i = 0; i < m_Queue.size(); i++) { inserter.insertValue(INDEX_TAG, i); - inserter.insertLevel(BUCKET_TAG, boost::bind(bucketPersist, boost::cref(m_Queue[i]), _1)); + inserter.insertLevel( + BUCKET_TAG, boost::bind(bucketPersist, boost::cref(m_Queue[i]), _1)); } } @@ -229,19 +246,22 @@ class CBucketQueue { } } else if (traverser.name() == BUCKET_TAG) { if (i >= m_Queue.size()) { - LOG_WARN(<< "Bucket queue is smaller on restore than on persist: " << i << " >= " << m_Queue.size() + LOG_WARN(<< "Bucket queue is smaller on restore than on persist: " << i + << " >= " << m_Queue.size() << ". Extra buckets will be ignored."); if (traverser.hasSubLevel()) { // Restore into a temporary T dummy = initial; - if (traverser.traverseSubLevel(boost::bind(bucketRestore, dummy, _1)) == false) { + if (traverser.traverseSubLevel(boost::bind( + bucketRestore, dummy, _1)) == false) { LOG_ERROR(<< "Invalid bucket"); } } } else { m_Queue[i] = initial; if (traverser.hasSubLevel()) { - if (traverser.traverseSubLevel(boost::bind(bucketRestore, boost::ref(m_Queue[i]), _1)) == false) { + if (traverser.traverseSubLevel(boost::bind( + bucketRestore, boost::ref(m_Queue[i]), _1)) == false) { LOG_ERROR(<< "Invalid bucket"); return false; } diff --git a/include/model/CCountingModel.h b/include/model/CCountingModel.h index 49784e41bc..3adfa5c2c1 100644 --- a/include/model/CCountingModel.h +++ b/include/model/CCountingModel.h @@ -41,7 +41,9 @@ class MODEL_EXPORT CCountingModel : public CAnomalyDetectorModel { //! //! \note The current bucket statistics are left default initialized //! and so must be sampled for before this model can be used. - CCountingModel(const SModelParams& params, const TDataGathererPtr& dataGatherer, core::CStateRestoreTraverser& traverser); + CCountingModel(const SModelParams& params, + const TDataGathererPtr& dataGatherer, + core::CStateRestoreTraverser& traverser); //! Create a copy that will result in the same persisted state as the //! original. This is effectively a copy constructor that creates a @@ -103,7 +105,10 @@ class MODEL_EXPORT CCountingModel : public CAnomalyDetectorModel { //! \param[in] pid The identifier of the person of interest. //! \param[in] cid Ignored. //! \param[in] time The time of interest. - virtual TDouble1Vec currentBucketValue(model_t::EFeature feature, std::size_t pid, std::size_t cid, core_t::TTime time) const; + virtual TDouble1Vec currentBucketValue(model_t::EFeature feature, + std::size_t pid, + std::size_t cid, + core_t::TTime time) const; //! Get the mean bucket count or the reference model mean bucket //! count if one is defined for the person identified by \p pid. @@ -141,7 +146,9 @@ class MODEL_EXPORT CCountingModel : public CAnomalyDetectorModel { //! //! \param[in] startTime The start of the time interval to sample. //! \param[in] endTime The end of the time interval to sample. - virtual void sampleBucketStatistics(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor); + virtual void sampleBucketStatistics(core_t::TTime startTime, + core_t::TTime endTime, + CResourceMonitor& resourceMonitor); //! This samples the bucket statistics, and any state needed //! by computeProbablity, in the time interval [\p startTime, @@ -150,7 +157,9 @@ class MODEL_EXPORT CCountingModel : public CAnomalyDetectorModel { //! //! \param[in] startTime The start of the time interval to sample. //! \param[in] endTime The end of the time interval to sample. - virtual void sampleOutOfPhase(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor); + virtual void sampleOutOfPhase(core_t::TTime startTime, + core_t::TTime endTime, + CResourceMonitor& resourceMonitor); //! This samples the bucket statistics, in the time interval //! [\p startTime, \p endTime]. diff --git a/include/model/CCountingModelFactory.h b/include/model/CCountingModelFactory.h index b28f72fd11..8d6da80523 100644 --- a/include/model/CCountingModelFactory.h +++ b/include/model/CCountingModelFactory.h @@ -53,7 +53,8 @@ class MODEL_EXPORT CCountingModelFactory : public CModelFactory { //! the model. //! \param[in,out] traverser A state document traverser. //! \warning It is owned by the calling code. - virtual CAnomalyDetectorModel* makeModel(const SModelInitializationData& initData, core::CStateRestoreTraverser& traverser) const; + virtual CAnomalyDetectorModel* makeModel(const SModelInitializationData& initData, + core::CStateRestoreTraverser& traverser) const; //! Make a new event rate data gatherer. //! @@ -67,7 +68,8 @@ class MODEL_EXPORT CCountingModelFactory : public CModelFactory { //! \param[in] partitionFieldValue The partition field value. //! \param[in,out] traverser A state document traverser. //! \warning It is owned by the calling code. - virtual CDataGatherer* makeDataGatherer(const std::string& partitionFieldValue, core::CStateRestoreTraverser& traverser) const; + virtual CDataGatherer* makeDataGatherer(const std::string& partitionFieldValue, + core::CStateRestoreTraverser& traverser) const; //@} //! \name Defaults @@ -76,11 +78,13 @@ class MODEL_EXPORT CCountingModelFactory : public CModelFactory { virtual TPriorPtr defaultPrior(model_t::EFeature feature, const SModelParams& params) const; //! Get the default prior for \p feature which is a stub. - virtual TMultivariatePriorPtr defaultMultivariatePrior(model_t::EFeature feature, const SModelParams& params) const; + virtual TMultivariatePriorPtr + defaultMultivariatePrior(model_t::EFeature feature, const SModelParams& params) const; //! Get the default prior for pairs of correlated time series //! of \p feature which is a stub. - virtual TMultivariatePriorPtr defaultCorrelatePrior(model_t::EFeature feature, const SModelParams& params) const; + virtual TMultivariatePriorPtr + defaultCorrelatePrior(model_t::EFeature feature, const SModelParams& params) const; //@} //! Get the search key corresponding to this factory. diff --git a/include/model/CDataGatherer.h b/include/model/CDataGatherer.h index 6ad31daaf7..e3db02d342 100644 --- a/include/model/CDataGatherer.h +++ b/include/model/CDataGatherer.h @@ -120,10 +120,14 @@ class MODEL_EXPORT CDataGatherer { using TSizeSizePrUInt64UMapQueueCItr = TSizeSizePrUInt64UMapQueue::const_iterator; using TSizeSizePrUInt64UMapQueueCRItr = TSizeSizePrUInt64UMapQueue::const_reverse_iterator; using TSizeSizePrStoredStringPtrPrUInt64UMap = CBucketGatherer::TSizeSizePrStoredStringPtrPrUInt64UMap; - using TSizeSizePrStoredStringPtrPrUInt64UMapCItr = TSizeSizePrStoredStringPtrPrUInt64UMap::const_iterator; - using TSizeSizePrStoredStringPtrPrUInt64UMapItr = TSizeSizePrStoredStringPtrPrUInt64UMap::iterator; - using TSizeSizePrStoredStringPtrPrUInt64UMapVec = std::vector; - using TSizeSizePrStoredStringPtrPrUInt64UMapVecQueue = CBucketQueue; + using TSizeSizePrStoredStringPtrPrUInt64UMapCItr = + TSizeSizePrStoredStringPtrPrUInt64UMap::const_iterator; + using TSizeSizePrStoredStringPtrPrUInt64UMapItr = + TSizeSizePrStoredStringPtrPrUInt64UMap::iterator; + using TSizeSizePrStoredStringPtrPrUInt64UMapVec = + std::vector; + using TSizeSizePrStoredStringPtrPrUInt64UMapVecQueue = + CBucketQueue; using TSearchKeyCRef = boost::reference_wrapper; using TBucketGathererPVec = std::vector; using TBucketGathererPVecItr = TBucketGathererPVec::iterator; @@ -310,7 +314,9 @@ class MODEL_EXPORT CDataGatherer { //! //! This adds people and attributes as necessary and fills out the //! event data from \p fieldValues. - bool processFields(const TStrCPtrVec& fieldValues, CEventData& result, CResourceMonitor& resourceMonitor); + bool processFields(const TStrCPtrVec& fieldValues, + CEventData& result, + CResourceMonitor& resourceMonitor); //! Record the arrival of \p data at \p time. bool addArrival(const TStrCPtrVec& fieldValues, CEventData& data, CResourceMonitor& resourceMonitor); @@ -347,7 +353,9 @@ class MODEL_EXPORT CDataGatherer { //! \param[out] result Filled in with the feature data at \p time. //! \tparam T The type of the feature data. template - bool featureData(core_t::TTime time, core_t::TTime bucketLength, std::vector>& result) const { + bool featureData(core_t::TTime time, + core_t::TTime bucketLength, + std::vector>& result) const { TFeatureAnyPrVec rawFeatureData; this->chooseBucketGatherer(time).featureData(time, bucketLength, rawFeatureData); @@ -361,7 +369,8 @@ class MODEL_EXPORT CDataGatherer { // Check the typeid before attempting the cast so we // don't use throw to handle failure, which is slow. if (feature.second.type() != typeid(T)) { - LOG_ERROR(<< "Bad type for feature = " << model_t::print(feature.first) << ", expected " << typeid(T).name() << " got " + LOG_ERROR(<< "Bad type for feature = " << model_t::print(feature.first) + << ", expected " << typeid(T).name() << " got " << feature.second.type().name()); succeeded = false; continue; @@ -455,7 +464,9 @@ class MODEL_EXPORT CDataGatherer { bool isPersonActive(std::size_t pid) const; //! Record a person called \p person. - std::size_t addPerson(const std::string& person, CResourceMonitor& resourceMonitor, bool& addedPerson); + std::size_t addPerson(const std::string& person, + CResourceMonitor& resourceMonitor, + bool& addedPerson); //@} //! \name Attribute @@ -569,7 +580,9 @@ class MODEL_EXPORT CDataGatherer { std::string printCurrentBucket(core_t::TTime time) const; //! Record a attribute called \p attribute. - std::size_t addAttribute(const std::string& attribute, CResourceMonitor& resourceMonitor, bool& addedAttribute); + std::size_t addAttribute(const std::string& attribute, + CResourceMonitor& resourceMonitor, + bool& addedAttribute); //@} //! \name Counts @@ -621,7 +634,9 @@ class MODEL_EXPORT CDataGatherer { return tuple.first.first; } //! Extract the person identifier from a tuple. - static inline std::size_t extractPersonId(const TSizeSizePr& tuple) { return tuple.first; } + static inline std::size_t extractPersonId(const TSizeSizePr& tuple) { + return tuple.first; + } //! Extracts the person identifier from a tuple. struct SExtractPersonId { template @@ -632,7 +647,8 @@ class MODEL_EXPORT CDataGatherer { //! Extract the attribute identifier from a tuple. template - static inline std::size_t extractAttributeId(const std::pair& tuple) { + static inline std::size_t + extractAttributeId(const std::pair& tuple) { return tuple.first.second; } //! Extract the attribute identifier from a tuple. @@ -641,7 +657,9 @@ class MODEL_EXPORT CDataGatherer { return tuple.first.second; } //! Extract the attribute identifier from a tuple. - static inline std::size_t extractAttributeId(const TSizeSizePr& tuple) { return tuple.second; } + static inline std::size_t extractAttributeId(const TSizeSizePr& tuple) { + return tuple.second; + } //! Extracts the attribute identifier from a tuple. struct SExtractAttributeId { template @@ -668,11 +686,15 @@ class MODEL_EXPORT CDataGatherer { //! Helper to avoid code duplication when getting a count from a //! field. Logs different errors for missing value and invalid value. - bool extractCountFromField(const std::string& fieldName, const std::string* fieldValue, std::size_t& count) const; + bool extractCountFromField(const std::string& fieldName, + const std::string* fieldValue, + std::size_t& count) const; //! Helper to avoid code duplication when getting a metric value from a //! field. Logs different errors for missing value and invalid value. - bool extractMetricFromField(const std::string& fieldName, std::string fieldValue, TDouble1Vec& metricValue) const; + bool extractMetricFromField(const std::string& fieldName, + std::string fieldValue, + TDouble1Vec& metricValue) const; //! Returns the startTime of the earliest bucket for which data are still //! accepted. diff --git a/include/model/CDynamicStringIdRegistry.h b/include/model/CDynamicStringIdRegistry.h index db6979ca3d..715e504417 100644 --- a/include/model/CDynamicStringIdRegistry.h +++ b/include/model/CDynamicStringIdRegistry.h @@ -106,7 +106,10 @@ class MODEL_EXPORT CDynamicStringIdRegistry { bool isIdActive(std::size_t id) const; //! Register a \p name and return its unique identifier. - std::size_t addName(const std::string& name, core_t::TTime time, CResourceMonitor& resourceMonitor, bool& addedPerson); + std::size_t addName(const std::string& name, + core_t::TTime time, + CResourceMonitor& resourceMonitor, + bool& addedPerson); //! Remove all traces of names whose identifiers are greater than //! or equal to \p lowestNameToRemove. diff --git a/include/model/CEventRateBucketGatherer.h b/include/model/CEventRateBucketGatherer.h index 70860e8c5a..cbae2a9d9a 100644 --- a/include/model/CEventRateBucketGatherer.h +++ b/include/model/CEventRateBucketGatherer.h @@ -216,7 +216,9 @@ class MODEL_EXPORT CEventRateBucketGatherer : public CBucketGatherer { //! contain two fields. The first field should contain the over clause //! field value. The second field should the by clause field value //! or a generic name if none was specified. - virtual bool processFields(const TStrCPtrVec& fieldValues, CEventData& result, CResourceMonitor& resourceMonitor); + virtual bool processFields(const TStrCPtrVec& fieldValues, + CEventData& result, + CResourceMonitor& resourceMonitor); //@} //! \name Person @@ -267,7 +269,9 @@ class MODEL_EXPORT CEventRateBucketGatherer : public CBucketGatherer { //! //! \param[in] time The time of interest. //! \param[out] result Filled in with the feature data at \p time. - virtual void featureData(core_t::TTime time, core_t::TTime bucketLength, TFeatureAnyPrVec& result) const; + virtual void featureData(core_t::TTime time, + core_t::TTime bucketLength, + TFeatureAnyPrVec& result) const; //@} private: @@ -289,7 +293,9 @@ class MODEL_EXPORT CEventRateBucketGatherer : public CBucketGatherer { //! \param[in,out] result Append (person identifier, count) for each //! person present in the bucketing interval containing \p time. The //! collection is sorted by person. - void nonZeroPersonCounts(model_t::EFeature feature, core_t::TTime time, TFeatureAnyPrVec& result) const; + void nonZeroPersonCounts(model_t::EFeature feature, + core_t::TTime time, + TFeatureAnyPrVec& result) const; //! Append an indicator function for people present in the bucketing //! interval containing \p time. @@ -307,7 +313,9 @@ class MODEL_EXPORT CEventRateBucketGatherer : public CBucketGatherer { //! \param[in,out] result Append (person identifier, mean arrival time) //! for each person present in the bucketing interval containing \p time. //! The collection is sorted by person identifier. - void personArrivalTimes(model_t::EFeature feature, core_t::TTime time, TFeatureAnyPrVec& result) const; + void personArrivalTimes(model_t::EFeature feature, + core_t::TTime time, + TFeatureAnyPrVec& result) const; //! Append the non-zero counts for each attribute by person for the //! bucketing interval containing \p time. @@ -318,7 +326,9 @@ class MODEL_EXPORT CEventRateBucketGatherer : public CBucketGatherer { //! attribute. The collection is sorted lexicographically by key. //! \note We expect the pairs present to be sparse on the full outer //! product space of attribute and person so use a sparse encoding. - void nonZeroAttributeCounts(model_t::EFeature feature, core_t::TTime time, TFeatureAnyPrVec& result) const; + void nonZeroAttributeCounts(model_t::EFeature feature, + core_t::TTime time, + TFeatureAnyPrVec& result) const; //! Append the number of unique people hitting each attribute. //! @@ -337,7 +347,9 @@ class MODEL_EXPORT CEventRateBucketGatherer : public CBucketGatherer { //! collection is sorted lexicographically by key. //! \note We expect the pairs present to be sparse on the full outer //! product space of attribute and person so use a sparse encoding. - void attributeIndicator(model_t::EFeature feature, core_t::TTime time, TFeatureAnyPrVec& result) const; + void attributeIndicator(model_t::EFeature feature, + core_t::TTime time, + TFeatureAnyPrVec& result) const; //! Append the number of unique values for each person //! in the bucketing interval containing \p time. @@ -345,7 +357,9 @@ class MODEL_EXPORT CEventRateBucketGatherer : public CBucketGatherer { //! \param[in] time The time of interest. //! \param[out] result Filled in with the unique value counts //! by person - void bucketUniqueValuesPerPerson(model_t::EFeature feature, core_t::TTime time, TFeatureAnyPrVec& result) const; + void bucketUniqueValuesPerPerson(model_t::EFeature feature, + core_t::TTime time, + TFeatureAnyPrVec& result) const; //! Append the number of unique values for each person and attribute //! in the bucketing interval containing \p time. @@ -353,7 +367,9 @@ class MODEL_EXPORT CEventRateBucketGatherer : public CBucketGatherer { //! \param[in] time The time of interest. //! \param[out] result Filled in with the unique value counts //! by person and attribute - void bucketUniqueValuesPerPersonAttribute(model_t::EFeature feature, core_t::TTime time, TFeatureAnyPrVec& result) const; + void bucketUniqueValuesPerPersonAttribute(model_t::EFeature feature, + core_t::TTime time, + TFeatureAnyPrVec& result) const; //! Append the compressed length of the unique attributes each person //! hits in the bucketing interval containing \p time. @@ -361,7 +377,9 @@ class MODEL_EXPORT CEventRateBucketGatherer : public CBucketGatherer { //! \param[in] time The time of interest. //! \param[out] result Filled in with the compressed length of the //! unique values by person and attribute - void bucketCompressedLengthPerPerson(model_t::EFeature feature, core_t::TTime time, TFeatureAnyPrVec& result) const; + void bucketCompressedLengthPerPerson(model_t::EFeature feature, + core_t::TTime time, + TFeatureAnyPrVec& result) const; //! Append the compressed length of the unique attributes each person //! hits in the bucketing interval containing \p time. @@ -369,7 +387,9 @@ class MODEL_EXPORT CEventRateBucketGatherer : public CBucketGatherer { //! \param[in] time The time of interest. //! \param[out] result Filled in with the compressed length of the //! unique values by person and attribute - void bucketCompressedLengthPerPersonAttribute(model_t::EFeature feature, core_t::TTime time, TFeatureAnyPrVec& result) const; + void bucketCompressedLengthPerPersonAttribute(model_t::EFeature feature, + core_t::TTime time, + TFeatureAnyPrVec& result) const; //! Append the time-of-day/week values for each person in the //! bucketing interval \p time. @@ -377,7 +397,9 @@ class MODEL_EXPORT CEventRateBucketGatherer : public CBucketGatherer { //! \param[in] time The time of interest. //! \param[out] result Filled in with the arrival time values //! by person. - void bucketMeanTimesPerPerson(model_t::EFeature feature, core_t::TTime time, TFeatureAnyPrVec& result) const; + void bucketMeanTimesPerPerson(model_t::EFeature feature, + core_t::TTime time, + TFeatureAnyPrVec& result) const; //! Append the time-of-day/week values of each attribute and person //! in the bucketing interval \p time. @@ -385,7 +407,9 @@ class MODEL_EXPORT CEventRateBucketGatherer : public CBucketGatherer { //! \param[in] time The time of interest. //! \param[out] result Filled in with the arrival time values //! by attribute and person - void bucketMeanTimesPerPersonAttribute(model_t::EFeature feature, core_t::TTime time, TFeatureAnyPrVec& result) const; + void bucketMeanTimesPerPersonAttribute(model_t::EFeature feature, + core_t::TTime time, + TFeatureAnyPrVec& result) const; //! Resize the necessary data structures so they can accommodate //! the person and attribute identified by \p pid and \p cid, diff --git a/include/model/CEventRateModel.h b/include/model/CEventRateModel.h index e072a623ac..057979a3fd 100644 --- a/include/model/CEventRateModel.h +++ b/include/model/CEventRateModel.h @@ -159,7 +159,10 @@ class MODEL_EXPORT CEventRateModel : public CIndividualModel { //! \param[in] pid The identifier of the person of interest. //! \param[in] cid Ignored. //! \param[in] time The time of interest. - virtual TDouble1Vec currentBucketValue(model_t::EFeature feature, std::size_t pid, std::size_t cid, core_t::TTime time) const; + virtual TDouble1Vec currentBucketValue(model_t::EFeature feature, + std::size_t pid, + std::size_t cid, + core_t::TTime time) const; //! Get the baseline bucket value of \p feature for the person //! identified by \p pid as of the start of the current bucketing @@ -197,7 +200,9 @@ class MODEL_EXPORT CEventRateModel : public CIndividualModel { //! //! \param[in] startTime The start of the time interval to sample. //! \param[in] endTime The end of the time interval to sample. - virtual void sampleBucketStatistics(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor); + virtual void sampleBucketStatistics(core_t::TTime startTime, + core_t::TTime endTime, + CResourceMonitor& resourceMonitor); //! Update the model with features samples from the time interval //! [\p startTime, \p endTime]. @@ -256,7 +261,8 @@ class MODEL_EXPORT CEventRateModel : public CIndividualModel { //! Get the value of the \p feature of the person identified //! by \p pid for the bucketing interval containing \p time. - const TFeatureData* featureData(model_t::EFeature feature, std::size_t pid, core_t::TTime time) const; + const TFeatureData* + featureData(model_t::EFeature feature, std::size_t pid, core_t::TTime time) const; private: //! Get the start time of the current bucket. diff --git a/include/model/CEventRateModelFactory.h b/include/model/CEventRateModelFactory.h index 5a1f234531..3269d433f1 100644 --- a/include/model/CEventRateModelFactory.h +++ b/include/model/CEventRateModelFactory.h @@ -54,7 +54,8 @@ class MODEL_EXPORT CEventRateModelFactory : public CModelFactory { //! the model. //! \param[in,out] traverser A state document traverser. //! \warning It is owned by the calling code. - virtual CAnomalyDetectorModel* makeModel(const SModelInitializationData& initData, core::CStateRestoreTraverser& traverser) const; + virtual CAnomalyDetectorModel* makeModel(const SModelInitializationData& initData, + core::CStateRestoreTraverser& traverser) const; //! Make a new event rate data gatherer. //! @@ -68,7 +69,8 @@ class MODEL_EXPORT CEventRateModelFactory : public CModelFactory { //! \param[in] partitionFieldValue The partition field value. //! \param[in,out] traverser A state document traverser. //! \warning It is owned by the calling code. - virtual CDataGatherer* makeDataGatherer(const std::string& partitionFieldValue, core::CStateRestoreTraverser& traverser) const; + virtual CDataGatherer* makeDataGatherer(const std::string& partitionFieldValue, + core::CStateRestoreTraverser& traverser) const; //@} //! \name Defaults @@ -83,14 +85,16 @@ class MODEL_EXPORT CEventRateModelFactory : public CModelFactory { //! //! \param[in] feature The feature for which to get the prior. //! \param[in] params The model parameters. - virtual TMultivariatePriorPtr defaultMultivariatePrior(model_t::EFeature feature, const SModelParams& params) const; + virtual TMultivariatePriorPtr + defaultMultivariatePrior(model_t::EFeature feature, const SModelParams& params) const; //! Get the default prior for pairs of correlated time series //! of \p feature. //! //! \param[in] feature The feature for which to get the prior. //! \param[in] params The model parameters. - virtual TMultivariatePriorPtr defaultCorrelatePrior(model_t::EFeature feature, const SModelParams& params) const; + virtual TMultivariatePriorPtr + defaultCorrelatePrior(model_t::EFeature feature, const SModelParams& params) const; //@} //! Get the search key corresponding to this factory. diff --git a/include/model/CEventRatePopulationModel.h b/include/model/CEventRatePopulationModel.h index a003c502c5..16df44872c 100644 --- a/include/model/CEventRatePopulationModel.h +++ b/include/model/CEventRatePopulationModel.h @@ -85,7 +85,8 @@ class MODEL_EXPORT CEventRatePopulationModel : public CPopulationModel { using TFeatureData = SEventRateFeatureData; using TSizeSizePrFeatureDataPr = std::pair; using TSizeSizePrFeatureDataPrVec = std::vector; - using TFeatureSizeSizePrFeatureDataPrVecMap = std::map; + using TFeatureSizeSizePrFeatureDataPrVecMap = + std::map; using TCategoryProbabilityCache = CModelTools::CCategoryProbabilityCache; using TProbabilityCache = CModelTools::CProbabilityCache; @@ -191,7 +192,10 @@ class MODEL_EXPORT CEventRatePopulationModel : public CPopulationModel { //! \param[in] pid The identifier of the person of interest. //! \param[in] cid The identifier of the attribute of interest. //! \param[in] time The time of interest. - virtual TDouble1Vec currentBucketValue(model_t::EFeature feature, std::size_t pid, std::size_t cid, core_t::TTime time) const; + virtual TDouble1Vec currentBucketValue(model_t::EFeature feature, + std::size_t pid, + std::size_t cid, + core_t::TTime time) const; //! Get the population baseline mean of \p feature for the //! attribute identified by \p cid as of the start of the @@ -225,7 +229,9 @@ class MODEL_EXPORT CEventRatePopulationModel : public CPopulationModel { //! //! \param[in] startTime The start of the time interval to sample. //! \param[in] endTime The end of the time interval to sample. - virtual void sampleBucketStatistics(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor); + virtual void sampleBucketStatistics(core_t::TTime startTime, + core_t::TTime endTime, + CResourceMonitor& resourceMonitor); //! Update the model with the samples of the various processes //! in the time interval [\p startTime, \p endTime]. @@ -295,7 +301,8 @@ class MODEL_EXPORT CEventRatePopulationModel : public CPopulationModel { virtual CModelDetailsViewPtr details() const; //! Get the feature data corresponding to \p feature at \p time. - const TSizeSizePrFeatureDataPrVec& featureData(model_t::EFeature feature, core_t::TTime time) const; + const TSizeSizePrFeatureDataPrVec& featureData(model_t::EFeature feature, + core_t::TTime time) const; private: //! Initialize the feature models. @@ -329,7 +336,8 @@ class MODEL_EXPORT CEventRatePopulationModel : public CPopulationModel { virtual void updateRecycledModels(); //! Update the correlation models. - virtual void refreshCorrelationModels(std::size_t resourceLimit, CResourceMonitor& resourceMonitor); + virtual void refreshCorrelationModels(std::size_t resourceLimit, + CResourceMonitor& resourceMonitor); //! Clear out large state objects for people/attributes that are pruned virtual void clearPrunedResources(const TSizeVec& people, const TSizeVec& attributes); diff --git a/include/model/CEventRatePopulationModelFactory.h b/include/model/CEventRatePopulationModelFactory.h index d8e50a4cb3..967399528d 100644 --- a/include/model/CEventRatePopulationModelFactory.h +++ b/include/model/CEventRatePopulationModelFactory.h @@ -55,7 +55,8 @@ class MODEL_EXPORT CEventRatePopulationModelFactory : public CModelFactory { //! the model. //! \param[in,out] traverser A state document traverser. //! \warning It is owned by the calling code. - virtual CAnomalyDetectorModel* makeModel(const SModelInitializationData& initData, core::CStateRestoreTraverser& traverser) const; + virtual CAnomalyDetectorModel* makeModel(const SModelInitializationData& initData, + core::CStateRestoreTraverser& traverser) const; //! Make a new event rate population data gatherer. //! \param[in] initData The parameters needed to initialize the @@ -69,7 +70,8 @@ class MODEL_EXPORT CEventRatePopulationModelFactory : public CModelFactory { //! \param[in] partitionFieldValue The partition field value. //! \param[in,out] traverser A state document traverser. //! \warning It is owned by the calling code. - virtual CDataGatherer* makeDataGatherer(const std::string& partitionFieldValue, core::CStateRestoreTraverser& traverser) const; + virtual CDataGatherer* makeDataGatherer(const std::string& partitionFieldValue, + core::CStateRestoreTraverser& traverser) const; //@} //! \name Defaults @@ -84,14 +86,16 @@ class MODEL_EXPORT CEventRatePopulationModelFactory : public CModelFactory { //! //! \param[in] feature The feature for which to get the prior. //! \param[in] params The model parameters. - virtual TMultivariatePriorPtr defaultMultivariatePrior(model_t::EFeature feature, const SModelParams& params) const; + virtual TMultivariatePriorPtr + defaultMultivariatePrior(model_t::EFeature feature, const SModelParams& params) const; //! Get the default prior for pairs of correlated time series //! of \p feature. //! //! \param[in] feature The feature for which to get the prior. //! \param[in] params The model parameters. - virtual TMultivariatePriorPtr defaultCorrelatePrior(model_t::EFeature feature, const SModelParams& params) const; + virtual TMultivariatePriorPtr + defaultCorrelatePrior(model_t::EFeature feature, const SModelParams& params) const; //@} //! Get the search key corresponding to this factory. diff --git a/include/model/CForecastDataSink.h b/include/model/CForecastDataSink.h index 4eda225718..515732c9f6 100644 --- a/include/model/CForecastDataSink.h +++ b/include/model/CForecastDataSink.h @@ -43,7 +43,9 @@ class MODEL_EXPORT CForecastDataSink final : private core::CNonCopyable { //! Wrapper for 1 timeseries model, its feature and by Field struct MODEL_EXPORT SForecastModelWrapper { - SForecastModelWrapper(model_t::EFeature feature, TMathsModelPtr&& forecastModel, const std::string& byFieldValue); + SForecastModelWrapper(model_t::EFeature feature, + TMathsModelPtr&& forecastModel, + const std::string& byFieldValue); SForecastModelWrapper(SForecastModelWrapper&& other); @@ -131,7 +133,10 @@ class MODEL_EXPORT CForecastDataSink final : private core::CNonCopyable { int detectorIndex); //! Write Statistics about the forecast, also marks the ending - void writeStats(const double progress, uint64_t runtime, const TStrUMap& messages, bool successful = true); + void writeStats(const double progress, + uint64_t runtime, + const TStrUMap& messages, + bool successful = true); //! Write a scheduled message to signal that validation was successful void writeScheduledMessage(); diff --git a/include/model/CGathererTools.h b/include/model/CGathererTools.h index 350de3e170..2ffd626bb2 100644 --- a/include/model/CGathererTools.h +++ b/include/model/CGathererTools.h @@ -58,9 +58,11 @@ class MODEL_EXPORT CGathererTools { using TOptionalDouble = boost::optional; using TSampleVec = std::vector; using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; - using TMedianAccumulator = maths::CFixedQuantileSketch; + using TMedianAccumulator = + maths::CFixedQuantileSketch; using TMinAccumulator = maths::CBasicStatistics::COrderStatisticsStack; - using TMaxAccumulator = maths::CBasicStatistics::COrderStatisticsStack>; + using TMaxAccumulator = + maths::CBasicStatistics::COrderStatisticsStack>; using TVarianceAccumulator = maths::CBasicStatistics::SSampleMeanVar::TAccumulator; using TMultivariateMeanAccumulator = CMetricMultivariateStatistic; using TMultivariateMinAccumulator = CMetricMultivariateStatistic; @@ -98,7 +100,8 @@ class MODEL_EXPORT CGathererTools { if (m_LastTime == FIRST_TIME) { m_LastTime = time; } else { - m_Value.add(static_cast(time - m_LastTime) / static_cast(count)); + m_Value.add(static_cast(time - m_LastTime) / + static_cast(count)); m_LastTime = time; } } @@ -144,14 +147,16 @@ class MODEL_EXPORT CGathererTools { //! \brief Multivariate mean statistic gatherer. //! //! See TMeanGatherer for details. - using TMultivariateMeanGatherer = CSampleGatherer; + using TMultivariateMeanGatherer = + CSampleGatherer; //! \brief Median statistic gatherer. //! //! DESCRIPTION:\n //! Wraps up the functionality to sample the median of a fixed number //! of measurements, which are supplied to the add function. - using TMedianGatherer = CSampleGatherer; + using TMedianGatherer = + CSampleGatherer; // TODO Add multivariate median. @@ -168,7 +173,8 @@ class MODEL_EXPORT CGathererTools { //! \brief Multivariate minimum statistic gatherer. //! //! See TMinGatherer for details. - using TMultivariateMinGatherer = CSampleGatherer; + using TMultivariateMinGatherer = + CSampleGatherer; //! \brief Maximum statistic gatherer. //! @@ -183,7 +189,8 @@ class MODEL_EXPORT CGathererTools { //! \brief Multivariate maximum statistic gatherer. //! //! See TMaxGatherer for details. - using TMultivariateMaxGatherer = CSampleGatherer; + using TMultivariateMaxGatherer = + CSampleGatherer; //! \brief Variance statistic gatherer. //! @@ -193,7 +200,8 @@ class MODEL_EXPORT CGathererTools { //! //! This also computes the variance of all measurements in the current //! bucketing interval. - using TVarianceGatherer = CSampleGatherer; + using TVarianceGatherer = + CSampleGatherer; // TODO Add multivariate variance. @@ -212,10 +220,12 @@ class MODEL_EXPORT CGathererTools { using TSampleVecQueue = CBucketQueue; using TSampleVecQueueItr = TSampleVecQueue::iterator; using TSampleVecQueueCItr = TSampleVecQueue::const_iterator; - using TStoredStringPtrDoubleUMap = boost::unordered_map; + using TStoredStringPtrDoubleUMap = + boost::unordered_map; using TStoredStringPtrDoubleUMapCItr = TStoredStringPtrDoubleUMap::const_iterator; using TStoredStringPtrDoubleUMapQueue = CBucketQueue; - using TStoredStringPtrDoubleUMapQueueCRItr = TStoredStringPtrDoubleUMapQueue::const_reverse_iterator; + using TStoredStringPtrDoubleUMapQueueCRItr = + TStoredStringPtrDoubleUMapQueue::const_reverse_iterator; using TStoredStringPtrDoubleUMapQueueVec = std::vector; using TStoredStringPtrVec = std::vector; @@ -231,7 +241,9 @@ class MODEL_EXPORT CGathererTools { std::size_t dimension() const; //! Get the feature data for the current bucketing interval. - SMetricFeatureData featureData(core_t::TTime time, core_t::TTime bucketLength, const TSampleVec& emptySample) const; + SMetricFeatureData featureData(core_t::TTime time, + core_t::TTime bucketLength, + const TSampleVec& emptySample) const; //! Returns false. bool sample(core_t::TTime time, unsigned int sampleCount); @@ -250,7 +262,8 @@ class MODEL_EXPORT CGathererTools { TSampleVec& sum = m_BucketSums.get(time); if (sum.empty()) { core_t::TTime bucketLength = m_BucketSums.bucketLength(); - sum.push_back(CSample(maths::CIntegerTools::floor(time, bucketLength), TDoubleVec(1, 0.0), 1.0, 0.0)); + sum.push_back(CSample(maths::CIntegerTools::floor(time, bucketLength), + TDoubleVec(1, 0.0), 1.0, 0.0)); } (sum[0].value())[0] += value[0]; sum[0].count() += static_cast(count); diff --git a/include/model/CHierarchicalResults.h b/include/model/CHierarchicalResults.h index 3619661bb7..fbf2bb494a 100644 --- a/include/model/CHierarchicalResults.h +++ b/include/model/CHierarchicalResults.h @@ -40,9 +40,12 @@ class CLimits; namespace hierarchical_results_detail { using TStoredStringPtrVec = std::vector; -using TStoredStringPtrStoredStringPtrPr = std::pair; -using TStoredStringPtrStoredStringPtrPrDoublePr = std::pair; -using TStoredStringPtrStoredStringPtrPrDoublePrVec = std::vector; +using TStoredStringPtrStoredStringPtrPr = + std::pair; +using TStoredStringPtrStoredStringPtrPrDoublePr = + std::pair; +using TStoredStringPtrStoredStringPtrPrDoublePrVec = + std::vector; using TStr1Vec = core::CSmallVector; //! \brief The data fully describing a result node. @@ -152,14 +155,18 @@ struct MODEL_EXPORT SNode { void swap(SNode& other); //! Persist the node state by passing information to \p inserter. - void acceptPersistInserter1(core::CStatePersistInserter& inserter, TNodePtrSizeUMap& nodePointers) const; + void acceptPersistInserter1(core::CStatePersistInserter& inserter, + TNodePtrSizeUMap& nodePointers) const; //! Persist the node connectivity by passing information to \p inserter. - void acceptPersistInserter2(core::CStatePersistInserter& inserter, const TNodePtrSizeUMap& nodePointers) const; + void acceptPersistInserter2(core::CStatePersistInserter& inserter, + const TNodePtrSizeUMap& nodePointers) const; //! Restore the node state reading state from \p traverser. - bool acceptRestoreTraverser1(core::CStateRestoreTraverser& traverser, TSizeNodePtrUMap& nodePointers); + bool acceptRestoreTraverser1(core::CStateRestoreTraverser& traverser, + TSizeNodePtrUMap& nodePointers); //! Restore the node connectivity reading state from \p traverser. - bool acceptRestoreTraverser2(core::CStateRestoreTraverser& traverser, const TSizeNodePtrUMap& nodePointers); + bool acceptRestoreTraverser2(core::CStateRestoreTraverser& traverser, + const TSizeNodePtrUMap& nodePointers); //! \name Connectivity //@{ @@ -248,16 +255,20 @@ class MODEL_EXPORT CHierarchicalResults { using TAttributeProbabilityVec = std::vector; using TResultSpec = hierarchical_results_detail::SResultSpec; using TStoredStringPtr = core::CStoredStringPtr; - using TStoredStringPtrStoredStringPtrPr = hierarchical_results_detail::TStoredStringPtrStoredStringPtrPr; - using TStoredStringPtrStoredStringPtrPrDoublePr = hierarchical_results_detail::TStoredStringPtrStoredStringPtrPrDoublePr; - using TStoredStringPtrStoredStringPtrPrDoublePrVec = hierarchical_results_detail::TStoredStringPtrStoredStringPtrPrDoublePrVec; + using TStoredStringPtrStoredStringPtrPr = + hierarchical_results_detail::TStoredStringPtrStoredStringPtrPr; + using TStoredStringPtrStoredStringPtrPrDoublePr = + hierarchical_results_detail::TStoredStringPtrStoredStringPtrPrDoublePr; + using TStoredStringPtrStoredStringPtrPrDoublePrVec = + hierarchical_results_detail::TStoredStringPtrStoredStringPtrPrDoublePrVec; using TNode = hierarchical_results_detail::SNode; using TNodePtrSizeUMap = hierarchical_results_detail::SNode::TNodePtrSizeUMap; using TSizeNodePtrUMap = hierarchical_results_detail::SNode::TSizeNodePtrUMap; using TNodeDeque = std::deque; using TStoredStringPtrStoredStringPtrPrNodeMap = std::map; - using TStoredStringPtrNodeMap = std::map; + using TStoredStringPtrNodeMap = + std::map; public: CHierarchicalResults(); @@ -338,7 +349,8 @@ class MODEL_EXPORT CHierarchicalResults { //! Get the influencer identified by \p influencerName and //! \p influencerValue if one exists. - const TNode* influencer(const TStoredStringPtr& influencerName, const TStoredStringPtr& influencerValue) const; + const TNode* influencer(const TStoredStringPtr& influencerName, + const TStoredStringPtr& influencerValue) const; //! Bottom up first visit the tree. void bottomUpBreadthFirst(CHierarchicalResultsVisitor& visitor) const; @@ -454,7 +466,10 @@ class MODEL_EXPORT CHierarchicalResultsVisitor { static const TNode* nearestAncestorForWhichWeWriteResults(const TNode& node); //! Check if we'll write a result for the node. - static bool shouldWriteResult(const CLimits& limits, const CHierarchicalResults& results, const TNode& node, bool pivot); + static bool shouldWriteResult(const CLimits& limits, + const CHierarchicalResults& results, + const TNode& node, + bool pivot); friend class ::CHierarchicalResultsTest; }; diff --git a/include/model/CHierarchicalResultsAggregator.h b/include/model/CHierarchicalResultsAggregator.h index 8b8b9dca0f..a58b9ed1b1 100644 --- a/include/model/CHierarchicalResultsAggregator.h +++ b/include/model/CHierarchicalResultsAggregator.h @@ -37,7 +37,8 @@ class CLimits; //! people in a population and collections of individual results in //! system change analysis. Special logic is used for named people, //! i.e. aggregations of multiple compatible simple searches. -class MODEL_EXPORT CHierarchicalResultsAggregator : public CHierarchicalResultsLevelSet { +class MODEL_EXPORT CHierarchicalResultsAggregator + : public CHierarchicalResultsLevelSet { public: //! Enumeration of the possible jobs that the aggregator can //! perform when invoked. @@ -94,7 +95,10 @@ class MODEL_EXPORT CHierarchicalResultsAggregator : public CHierarchicalResultsL void aggregateNode(const TNode& node, bool pivot); //! Partition the child probabilities into groups to aggregate together. - bool partitionChildProbabilities(const TNode& node, bool pivot, std::size_t& numberDetectors, TIntSizePrDouble1VecUMap (&partition)[N]); + bool partitionChildProbabilities(const TNode& node, + bool pivot, + std::size_t& numberDetectors, + TIntSizePrDouble1VecUMap (&partition)[N]); //! Compute the probability for each of the detectors. void detectorProbabilities(const TNode& node, diff --git a/include/model/CHierarchicalResultsLevelSet.h b/include/model/CHierarchicalResultsLevelSet.h index 9642cbb0ab..ea7b99f168 100644 --- a/include/model/CHierarchicalResultsLevelSet.h +++ b/include/model/CHierarchicalResultsLevelSet.h @@ -51,7 +51,8 @@ class CHierarchicalResultsLevelSet : public CHierarchicalResultsVisitor { using TWordTypePrVecCItr = typename TWordTypePrVec::const_iterator; protected: - explicit CHierarchicalResultsLevelSet(const T& bucketElement) : m_BucketElement(bucketElement) {} + explicit CHierarchicalResultsLevelSet(const T& bucketElement) + : m_BucketElement(bucketElement) {} //! Get the root unique element. const T& bucketElement() const { return m_BucketElement; } @@ -68,17 +69,22 @@ class CHierarchicalResultsLevelSet : public CHierarchicalResultsVisitor { //! Get an influencer element for \p influencerFieldName. //! //! \note Returns NULL if there isn't a matching one. - const T* influencerElement(const std::string& influencerFieldName) const { return element(m_InfluencerSet, influencerFieldName); } + const T* influencerElement(const std::string& influencerFieldName) const { + return element(m_InfluencerSet, influencerFieldName); + } //! Get a partition element for \p partitionFieldName. //! //! \note Returns NULL if there isn't a matching one. - const T* partitionElement(const std::string& partitionFieldName) const { return element(m_PartitionSet, partitionFieldName); } + const T* partitionElement(const std::string& partitionFieldName) const { + return element(m_PartitionSet, partitionFieldName); + } //! Get a person element. //! //! \note Returns NULL if there isn't a matching one. - const T* personElement(const std::string& partitionFieldName, const std::string& personFieldName) const { + const T* personElement(const std::string& partitionFieldName, + const std::string& personFieldName) const { TWord word = ms_Dictionary.word(partitionFieldName, personFieldName); TWordTypePrVecCItr i = element(m_PersonSet, word); return (i != m_PersonSet.end() && i->first == word) ? &i->second : nullptr; @@ -91,13 +97,16 @@ class CHierarchicalResultsLevelSet : public CHierarchicalResultsVisitor { const std::string& personFieldName, const std::string& functionName, const std::string& valueFieldName) const { - TWord word = ms_Dictionary.word(partitionFieldName, personFieldName, functionName, valueFieldName); + TWord word = ms_Dictionary.word(partitionFieldName, personFieldName, + functionName, valueFieldName); TWordTypePrVecCItr i = element(m_LeafSet, word); return (i != m_LeafSet.end() && i->first == word) ? &i->second : nullptr; } //! Get the influencer bucket set. - const TWordTypePrVec& influencerBucketSet() const { return m_InfluencerBucketSet; } + const TWordTypePrVec& influencerBucketSet() const { + return m_InfluencerBucketSet; + } //! Get a writable influencer bucket set. TWordTypePrVec& influencerBucketSet() { return m_InfluencerBucketSet; } @@ -153,7 +162,11 @@ class CHierarchicalResultsLevelSet : public CHierarchicalResultsVisitor { //! Get and possibly add a normalizer for \p node. template - void elements(const TNode& node, bool pivot, const FACTORY& factory, TTypePtrVec& result, bool distinctLeavesPerPartition = false) { + void elements(const TNode& node, + bool pivot, + const FACTORY& factory, + TTypePtrVec& result, + bool distinctLeavesPerPartition = false) { result.clear(); if (this->isSimpleCount(node)) { return; @@ -163,7 +176,8 @@ class CHierarchicalResultsLevelSet : public CHierarchicalResultsVisitor { TWord word = ms_Dictionary.word(*node.s_Spec.s_PersonFieldName); TWordTypePrVecItr i = element(m_InfluencerBucketSet, word); if (i == m_InfluencerBucketSet.end() || i->first != word) { - i = m_InfluencerBucketSet.insert(i, TWordTypePr(word, factory.make(*node.s_Spec.s_PersonFieldName))); + i = m_InfluencerBucketSet.insert( + i, TWordTypePr(word, factory.make(*node.s_Spec.s_PersonFieldName))); } result.push_back(&i->second); return; @@ -172,26 +186,28 @@ class CHierarchicalResultsLevelSet : public CHierarchicalResultsVisitor { TWord word = ms_Dictionary.word(*node.s_Spec.s_PersonFieldName); TWordTypePrVecItr i = element(m_InfluencerSet, word); if (i == m_InfluencerSet.end() || i->first != word) { - i = m_InfluencerSet.insert(i, TWordTypePr(word, factory.make(*node.s_Spec.s_PersonFieldName))); + i = m_InfluencerSet.insert( + i, TWordTypePr(word, factory.make(*node.s_Spec.s_PersonFieldName))); } result.push_back(&i->second); return; } - std::string partitionKey = distinctLeavesPerPartition ? *node.s_Spec.s_PartitionFieldName + *node.s_Spec.s_PartitionFieldValue - : *node.s_Spec.s_PartitionFieldName; + std::string partitionKey = distinctLeavesPerPartition + ? *node.s_Spec.s_PartitionFieldName + + *node.s_Spec.s_PartitionFieldValue + : *node.s_Spec.s_PartitionFieldName; if (this->isLeaf(node)) { - TWord word = ms_Dictionary.word( - partitionKey, *node.s_Spec.s_PersonFieldName, *node.s_Spec.s_FunctionName, *node.s_Spec.s_ValueFieldName); + TWord word = ms_Dictionary.word(partitionKey, *node.s_Spec.s_PersonFieldName, + *node.s_Spec.s_FunctionName, + *node.s_Spec.s_ValueFieldName); TWordTypePrVecItr i = element(m_LeafSet, word); if (i == m_LeafSet.end() || i->first != word) { i = m_LeafSet.insert( - i, - TWordTypePr( - word, - factory.make( - partitionKey, *node.s_Spec.s_PersonFieldName, *node.s_Spec.s_FunctionName, *node.s_Spec.s_ValueFieldName))); + i, TWordTypePr(word, factory.make(partitionKey, *node.s_Spec.s_PersonFieldName, + *node.s_Spec.s_FunctionName, + *node.s_Spec.s_ValueFieldName))); } result.push_back(&i->second); } @@ -199,7 +215,8 @@ class CHierarchicalResultsLevelSet : public CHierarchicalResultsVisitor { TWord word = ms_Dictionary.word(partitionKey, *node.s_Spec.s_PersonFieldName); TWordTypePrVecItr i = element(m_PersonSet, word); if (i == m_PersonSet.end() || i->first != word) { - i = m_PersonSet.insert(i, TWordTypePr(word, factory.make(partitionKey, *node.s_Spec.s_PersonFieldName))); + i = m_PersonSet.insert( + i, TWordTypePr(word, factory.make(partitionKey, *node.s_Spec.s_PersonFieldName))); } result.push_back(&i->second); } @@ -244,11 +261,14 @@ class CHierarchicalResultsLevelSet : public CHierarchicalResultsVisitor { //! Get the element corresponding to \p word if it exists //! and return the end iterator otherwise. static TWordTypePrVecItr element(TWordTypePrVec& set, const TWord& word) { - return std::lower_bound(set.begin(), set.end(), word, maths::COrderings::SFirstLess()); + return std::lower_bound(set.begin(), set.end(), word, + maths::COrderings::SFirstLess()); } //! Sort \p set on its key. - static void sort(TWordTypePrVec& set) { std::sort(set.begin(), set.end(), maths::COrderings::SFirstLess()); } + static void sort(TWordTypePrVec& set) { + std::sort(set.begin(), set.end(), maths::COrderings::SFirstLess()); + } //! Propagate the set elements forwards by \p time. template diff --git a/include/model/CHierarchicalResultsNormalizer.h b/include/model/CHierarchicalResultsNormalizer.h index 74ed6083c6..29888e2b0c 100644 --- a/include/model/CHierarchicalResultsNormalizer.h +++ b/include/model/CHierarchicalResultsNormalizer.h @@ -130,36 +130,44 @@ class MODEL_EXPORT CHierarchicalResultsNormalizer //! Get the influencer bucket normalizer for \p influencerFieldName. //! //! \note Returns NULL if there isn't a matching one. - const CAnomalyScore::CNormalizer* influencerBucketNormalizer(const std::string& influencerFieldName) const; + const CAnomalyScore::CNormalizer* + influencerBucketNormalizer(const std::string& influencerFieldName) const; //! Get the influencer normalizer for \p influencerFieldName. //! //! \note Returns NULL if there isn't a matching one. - const CAnomalyScore::CNormalizer* influencerNormalizer(const std::string& influencerFieldName) const; + const CAnomalyScore::CNormalizer* + influencerNormalizer(const std::string& influencerFieldName) const; //! Get a partition normalizer. //! //! \note Returns NULL if there isn't a matching one. - const CAnomalyScore::CNormalizer* partitionNormalizer(const std::string& partitionFieldName) const; + const CAnomalyScore::CNormalizer* + partitionNormalizer(const std::string& partitionFieldName) const; //! Get a person normalizer. //! //! \note Returns NULL if there isn't a matching one. - const CAnomalyScore::CNormalizer* personNormalizer(const std::string& partitionFieldName, const std::string& personFieldName) const; + const CAnomalyScore::CNormalizer* + personNormalizer(const std::string& partitionFieldName, + const std::string& personFieldName) const; //! Get a leaf normalizer. //! //! \note Returns NULL if there isn't a matching one. - const CAnomalyScore::CNormalizer* leafNormalizer(const std::string& partitionFieldName, - const std::string& personFieldName, - const std::string& functionName, - const std::string& valueFieldName) const; + const CAnomalyScore::CNormalizer* + leafNormalizer(const std::string& partitionFieldName, + const std::string& personFieldName, + const std::string& functionName, + const std::string& valueFieldName) const; private: //! Get the normalizer corresponding to \p cue if they exist //! and return NULL if it doesn't have an appropriate prefix. //! Also, extract the hash value. - bool parseCue(const std::string& cue, TWordNormalizerPrVec*& normalizers, TDictionary::TUInt64Array& hashArray); + bool parseCue(const std::string& cue, + TWordNormalizerPrVec*& normalizers, + TDictionary::TUInt64Array& hashArray); //! Get the persistence cue for the root normalizer. static const std::string& bucketCue(); diff --git a/include/model/CIndividualModel.h b/include/model/CIndividualModel.h index 7b25e8b864..ceb52c6e83 100644 --- a/include/model/CIndividualModel.h +++ b/include/model/CIndividualModel.h @@ -51,7 +51,8 @@ class MODEL_EXPORT CIndividualModel : public CAnomalyDetectorModel { using TSizeUInt64Pr = std::pair; using TSizeUInt64PrVec = std::vector; using TFeatureSizeSizeTriple = core::CTriple; - using TFeatureSizeSizeTripleDouble1VecUMap = boost::unordered_map; + using TFeatureSizeSizeTripleDouble1VecUMap = + boost::unordered_map; using TFeatureMathsModelPtrPr = std::pair; using TFeatureMathsModelPtrPrVec = std::vector; using TFeatureMathsModelPtrVecPr = std::pair; @@ -112,7 +113,9 @@ class MODEL_EXPORT CIndividualModel : public CAnomalyDetectorModel { //! //! \param[in] startTime The start of the time interval to sample. //! \param[in] endTime The end of the time interval to sample. - virtual void sampleBucketStatistics(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) = 0; + virtual void sampleBucketStatistics(core_t::TTime startTime, + core_t::TTime endTime, + CResourceMonitor& resourceMonitor) = 0; //! Sample any state needed by computeProbablity for the out- //! of-phase bucket in the time interval [\p startTime, \p endTime] @@ -120,7 +123,9 @@ class MODEL_EXPORT CIndividualModel : public CAnomalyDetectorModel { //! //! \param[in] startTime The start of the time interval to sample. //! \param[in] endTime The end of the time interval to sample. - virtual void sampleOutOfPhase(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor); + virtual void sampleOutOfPhase(core_t::TTime startTime, + core_t::TTime endTime, + CResourceMonitor& resourceMonitor); //! Update the model with features samples from the time interval //! [\p startTime, \p endTime]. @@ -128,7 +133,9 @@ class MODEL_EXPORT CIndividualModel : public CAnomalyDetectorModel { //! \param[in] startTime The start of the time interval to sample. //! \param[in] endTime The end of the time interval to sample. //! \param[in] resourceMonitor The resourceMonitor. - virtual void sample(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) = 0; + virtual void sample(core_t::TTime startTime, + core_t::TTime endTime, + CResourceMonitor& resourceMonitor) = 0; //! Prune any person models which haven't been updated for a //! specified period. @@ -167,8 +174,10 @@ class MODEL_EXPORT CIndividualModel : public CAnomalyDetectorModel { protected: using TStrCRefDouble1VecDouble1VecPrPr = std::pair; using TStrCRefDouble1VecDouble1VecPrPrVec = std::vector; - using TStrCRefDouble1VecDouble1VecPrPrVecVec = std::vector; - using TStrCRefDouble1VecDouble1VecPrPrVecVecVec = std::vector; + using TStrCRefDouble1VecDouble1VecPrPrVecVec = + std::vector; + using TStrCRefDouble1VecDouble1VecPrPrVecVecVec = + std::vector; protected: //! Persist state by passing information to the supplied inserter. @@ -206,10 +215,11 @@ class MODEL_EXPORT CIndividualModel : public CAnomalyDetectorModel { //! Get the value of the \p feature of the person identified //! by \p pid for the bucketing interval containing \p time. template - const T* featureData(model_t::EFeature feature, - std::size_t pid, - core_t::TTime time, - const std::vector>>>& featureData) const; + const T* featureData( + model_t::EFeature feature, + std::size_t pid, + core_t::TTime time, + const std::vector>>>& featureData) const; //! Sample the bucket statistics and write the results in to //! \p featureData. diff --git a/include/model/CIndividualModelDetail.h b/include/model/CIndividualModelDetail.h index 538ac1a4fe..47c4d89056 100644 --- a/include/model/CIndividualModelDetail.h +++ b/include/model/CIndividualModelDetail.h @@ -16,13 +16,16 @@ namespace ml { namespace model { template -void CIndividualModel::currentBucketPersonIds(core_t::TTime time, const T& featureData, TSizeVec& result) const { +void CIndividualModel::currentBucketPersonIds(core_t::TTime time, + const T& featureData, + TSizeVec& result) const { using TSizeUSet = boost::unordered_set; result.clear(); if (!this->bucketStatsAvailable(time)) { - LOG_ERROR(<< "No statistics at " << time << ", current bucket = " << this->printCurrentBucket()); + LOG_ERROR(<< "No statistics at " << time + << ", current bucket = " << this->printCurrentBucket()); return; } @@ -37,23 +40,26 @@ void CIndividualModel::currentBucketPersonIds(core_t::TTime time, const T& featu } template -const T* -CIndividualModel::featureData(model_t::EFeature feature, - std::size_t pid, - core_t::TTime time, - const std::vector>>>& featureData) const { +const T* CIndividualModel::featureData( + model_t::EFeature feature, + std::size_t pid, + core_t::TTime time, + const std::vector>>>& featureData) const { if (!this->bucketStatsAvailable(time)) { - LOG_ERROR(<< "No statistics at " << time << ", current bucket = " << this->printCurrentBucket()); + LOG_ERROR(<< "No statistics at " << time + << ", current bucket = " << this->printCurrentBucket()); return nullptr; } - auto i = std::lower_bound(featureData.begin(), featureData.end(), feature, maths::COrderings::SFirstLess()); + auto i = std::lower_bound(featureData.begin(), featureData.end(), feature, + maths::COrderings::SFirstLess()); if (i == featureData.end() || i->first != feature) { LOG_ERROR(<< "No data for feature " << model_t::print(feature)); return nullptr; } - auto j = std::lower_bound(i->second.begin(), i->second.end(), pid, maths::COrderings::SFirstLess()); + auto j = std::lower_bound(i->second.begin(), i->second.end(), pid, + maths::COrderings::SFirstLess()); return (j != i->second.end() && j->first == pid) ? &j->second : nullptr; } @@ -69,13 +75,15 @@ void CIndividualModel::sampleBucketStatistics(core_t::TTime startTime, return; } - for (core_t::TTime time = startTime, bucketLength = gatherer.bucketLength(); time < endTime; time += bucketLength) { + for (core_t::TTime time = startTime, bucketLength = gatherer.bucketLength(); + time < endTime; time += bucketLength) { this->CIndividualModel::sampleBucketStatistics(time, time + bucketLength, resourceMonitor); gatherer.featureData(time, bucketLength, featureData); for (auto& feature_ : featureData) { T& data = feature_.second; - LOG_TRACE(<< model_t::print(feature_.first) << " data = " << core::CContainerPrinter::print(data)); + LOG_TRACE(<< model_t::print(feature_.first) + << " data = " << core::CContainerPrinter::print(data)); this->applyFilter(model_t::E_XF_By, false, filter, data); } } @@ -90,20 +98,23 @@ bool CIndividualModel::addProbabilityAndInfluences(std::size_t pid, if (!pJoint.addAttributeProbability(CStringStore::names().get(EMPTY_STRING), model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID, 1.0, // attribute probability - params, - builder)) { - LOG_ERROR(<< "Failed to compute P(" << params.describe() << ", person = " << this->personName(pid) << ")"); + params, builder)) { + LOG_ERROR(<< "Failed to compute P(" << params.describe() + << ", person = " << this->personName(pid) << ")"); return false; } else { - LOG_TRACE(<< "P(" << params.describe() << ", person = " << this->personName(pid) << ") = " << params.s_Probability); + LOG_TRACE(<< "P(" << params.describe() << ", person = " << this->personName(pid) + << ") = " << params.s_Probability); } if (!influences.empty()) { const CDataGatherer& gatherer = this->dataGatherer(); for (std::size_t j = 0u; j < influences.size(); ++j) { - if (const CInfluenceCalculator* influenceCalculator = this->influenceCalculator(params.s_Feature, j)) { + if (const CInfluenceCalculator* influenceCalculator = + this->influenceCalculator(params.s_Feature, j)) { pJoint.plugin(*influenceCalculator); - pJoint.addInfluences(*(gatherer.beginInfluencers() + j), influences[j], params); + pJoint.addInfluences(*(gatherer.beginInfluencers() + j), + influences[j], params); } } } diff --git a/include/model/CInterimBucketCorrector.h b/include/model/CInterimBucketCorrector.h index 2a0ff6eff4..631c18692b 100644 --- a/include/model/CInterimBucketCorrector.h +++ b/include/model/CInterimBucketCorrector.h @@ -70,7 +70,10 @@ class MODEL_EXPORT CInterimBucketCorrector { //! \param[in] currentCount The total count in the bucket of interest. //! \param[in] modes The modes that map to the given \p values. //! \param[in] values The values to be corrected. - TDouble10Vec corrections(core_t::TTime time, std::size_t currentCount, const TDouble10Vec& modes, const TDouble10Vec& values) const; + TDouble10Vec corrections(core_t::TTime time, + std::size_t currentCount, + const TDouble10Vec& modes, + const TDouble10Vec& values) const; //! Get the memory used by the corrector void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; diff --git a/include/model/CLimits.h b/include/model/CLimits.h index e82ad1164f..c1db0032fc 100644 --- a/include/model/CLimits.h +++ b/include/model/CLimits.h @@ -107,7 +107,8 @@ class MODEL_EXPORT CLimits { return false; } } catch (boost::property_tree::ptree_error&) { - LOG_DEBUG(<< "Using default value (" << defaultValue << ") for unspecified setting " << iniPath); + LOG_DEBUG(<< "Using default value (" << defaultValue + << ") for unspecified setting " << iniPath); value = defaultValue; } diff --git a/include/model/CMemoryUsageEstimator.h b/include/model/CMemoryUsageEstimator.h index e98d0707cd..c28c1fb882 100644 --- a/include/model/CMemoryUsageEstimator.h +++ b/include/model/CMemoryUsageEstimator.h @@ -40,7 +40,12 @@ namespace model { class MODEL_EXPORT CMemoryUsageEstimator { public: //! Enumeration of the components included in the memory estimate. - enum EComponent { E_People = 0, E_Attributes, E_Correlations, E_NumberPredictors }; + enum EComponent { + E_People = 0, + E_Attributes, + E_Correlations, + E_NumberPredictors + }; using TSizeArray = boost::array; using TOptionalSize = boost::optional; diff --git a/include/model/CMetricBucketGatherer.h b/include/model/CMetricBucketGatherer.h index 566f6bed08..3c0b67f2d0 100644 --- a/include/model/CMetricBucketGatherer.h +++ b/include/model/CMetricBucketGatherer.h @@ -107,7 +107,8 @@ class MODEL_EXPORT CMetricBucketGatherer : public CBucketGatherer { private: //! Internal restore function. - bool acceptRestoreTraverserInternal(core::CStateRestoreTraverser& traverser, bool isCurrentVersion); + bool acceptRestoreTraverserInternal(core::CStateRestoreTraverser& traverser, + bool isCurrentVersion); //@} public: @@ -161,7 +162,9 @@ class MODEL_EXPORT CMetricBucketGatherer : public CBucketGatherer { //! should the by clause field value or a generic name if none was //! specified. The third field should contain a number corresponding //! to the metric value. - virtual bool processFields(const TStrCPtrVec& fieldValues, CEventData& result, CResourceMonitor& resourceMonitor); + virtual bool processFields(const TStrCPtrVec& fieldValues, + CEventData& result, + CResourceMonitor& resourceMonitor); //@} //! \name Person @@ -212,7 +215,9 @@ class MODEL_EXPORT CMetricBucketGatherer : public CBucketGatherer { //! //! \param[in] time The time of interest. //! \param[out] result Filled in with the feature data at \p time. - virtual void featureData(core_t::TTime time, core_t::TTime bucketLength, TFeatureAnyPrVec& result) const; + virtual void featureData(core_t::TTime time, + core_t::TTime bucketLength, + TFeatureAnyPrVec& result) const; //@} private: @@ -269,7 +274,8 @@ class MODEL_EXPORT CMetricBucketGatherer : public CBucketGatherer { //! 1) initializeFieldNamesPart1() //! 2) restore state //! 3) initializeFieldNamesPart2() - void initializeFieldNamesPart2(const std::string& valueFieldName, const std::string& summaryCountFieldName); + void initializeFieldNamesPart2(const std::string& valueFieldName, + const std::string& summaryCountFieldName); //! Initialize the feature data gatherers. void initializeFeatureData(); diff --git a/include/model/CMetricModel.h b/include/model/CMetricModel.h index eecae258e3..6e61814c97 100644 --- a/include/model/CMetricModel.h +++ b/include/model/CMetricModel.h @@ -152,7 +152,10 @@ class MODEL_EXPORT CMetricModel : public CIndividualModel { //! \param[in] pid The identifier of the person of interest. //! \param[in] cid Ignored. //! \param[in] time The time of interest. - virtual TDouble1Vec currentBucketValue(model_t::EFeature feature, std::size_t pid, std::size_t cid, core_t::TTime time) const; + virtual TDouble1Vec currentBucketValue(model_t::EFeature feature, + std::size_t pid, + std::size_t cid, + core_t::TTime time) const; //! Get the baseline bucket value of \p feature for the person //! identified by \p pid as of the start of the current bucketing @@ -190,7 +193,9 @@ class MODEL_EXPORT CMetricModel : public CIndividualModel { //! //! \param[in] startTime The start of the time interval to sample. //! \param[in] endTime The end of the time interval to sample. - virtual void sampleBucketStatistics(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor); + virtual void sampleBucketStatistics(core_t::TTime startTime, + core_t::TTime endTime, + CResourceMonitor& resourceMonitor); //! Update the model with features samples from the time interval //! [\p startTime, \p endTime]. @@ -249,7 +254,8 @@ class MODEL_EXPORT CMetricModel : public CIndividualModel { //! Get the value of the \p feature of the person identified //! by \p pid for the bucketing interval containing \p time. - const TFeatureData* featureData(model_t::EFeature feature, std::size_t pid, core_t::TTime time) const; + const TFeatureData* + featureData(model_t::EFeature feature, std::size_t pid, core_t::TTime time) const; private: using TOptionalSample = boost::optional; diff --git a/include/model/CMetricModelFactory.h b/include/model/CMetricModelFactory.h index 3843e94672..f17d21022f 100644 --- a/include/model/CMetricModelFactory.h +++ b/include/model/CMetricModelFactory.h @@ -54,7 +54,8 @@ class MODEL_EXPORT CMetricModelFactory : public CModelFactory { //! the model. //! \param[in,out] traverser A state document traverser. //! \warning It is owned by the calling code. - virtual CAnomalyDetectorModel* makeModel(const SModelInitializationData& initData, core::CStateRestoreTraverser& traverser) const; + virtual CAnomalyDetectorModel* makeModel(const SModelInitializationData& initData, + core::CStateRestoreTraverser& traverser) const; //! Make a new metric data gatherer. //! @@ -68,7 +69,8 @@ class MODEL_EXPORT CMetricModelFactory : public CModelFactory { //! \param[in] partitionFieldValue The partition field value. //! \param[in,out] traverser A state document traverser. //! \warning It is owned by the calling code. - virtual CDataGatherer* makeDataGatherer(const std::string& partitionFieldValue, core::CStateRestoreTraverser& traverser) const; + virtual CDataGatherer* makeDataGatherer(const std::string& partitionFieldValue, + core::CStateRestoreTraverser& traverser) const; //@} //! \name Defaults @@ -83,14 +85,16 @@ class MODEL_EXPORT CMetricModelFactory : public CModelFactory { //! //! \param[in] feature The feature for which to get the prior. //! \param[in] params The model parameters. - virtual TMultivariatePriorPtr defaultMultivariatePrior(model_t::EFeature feature, const SModelParams& params) const; + virtual TMultivariatePriorPtr + defaultMultivariatePrior(model_t::EFeature feature, const SModelParams& params) const; //! Get the default prior for pairs of correlated time series //! of \p feature. //! //! \param[in] feature The feature for which to get the prior. //! \param[in] params The model parameters. - virtual TMultivariatePriorPtr defaultCorrelatePrior(model_t::EFeature feature, const SModelParams& params) const; + virtual TMultivariatePriorPtr + defaultCorrelatePrior(model_t::EFeature feature, const SModelParams& params) const; //@} //! Get the search key corresponding to this factory. diff --git a/include/model/CMetricMultivariateStatistic.h b/include/model/CMetricMultivariateStatistic.h index ed6084dfa0..64afa6c406 100644 --- a/include/model/CMetricMultivariateStatistic.h +++ b/include/model/CMetricMultivariateStatistic.h @@ -79,7 +79,8 @@ class CMetricMultivariateStatistic { void add(const TDouble1Vec& value, unsigned int count) { if (value.size() != m_Values.size()) { LOG_ERROR(<< "Inconsistent input data:" - << " # values = " << value.size() << ", expected " << m_Values.size()); + << " # values = " << value.size() << ", expected " + << m_Values.size()); return; } for (std::size_t i = 0u; i < value.size(); ++i) { @@ -121,7 +122,9 @@ class CMetricMultivariateStatistic { } //! Returns the count of all the measurements. - double count() const { return CMetricStatisticWrappers::count(m_Values[0]); } + double count() const { + return CMetricStatisticWrappers::count(m_Values[0]); + } //! Combine two partial statistics. const CMetricMultivariateStatistic& operator+=(const CMetricMultivariateStatistic& rhs) { @@ -132,7 +135,9 @@ class CMetricMultivariateStatistic { } //! Get the checksum of the partial statistic - uint64_t checksum(uint64_t seed) const { return maths::CChecksum::calculate(seed, m_Values); } + uint64_t checksum(uint64_t seed) const { + return maths::CChecksum::calculate(seed, m_Values); + } //! Debug the memory used by the statistic. void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { @@ -141,7 +146,9 @@ class CMetricMultivariateStatistic { } //! Get the memory used by the statistic. - std::size_t memoryUsage() const { return sizeof(*this) + core::CMemory::dynamicSize(m_Values); } + std::size_t memoryUsage() const { + return sizeof(*this) + core::CMemory::dynamicSize(m_Values); + } //! Print partial statistic std::string print() const { @@ -161,7 +168,8 @@ template const std::string CMetricMultivariateStatistic::VALUE_TAG("a"); template -std::ostream& operator<<(std::ostream& o, const CMetricMultivariateStatistic& statistic) { +std::ostream& operator<<(std::ostream& o, + const CMetricMultivariateStatistic& statistic) { return o << statistic.print(); } } diff --git a/include/model/CMetricPartialStatistic.h b/include/model/CMetricPartialStatistic.h index db023a48a4..792d9b3f7e 100644 --- a/include/model/CMetricPartialStatistic.h +++ b/include/model/CMetricPartialStatistic.h @@ -48,14 +48,16 @@ template class CMetricPartialStatistic { public: using TDouble1Vec = core::CSmallVector; - using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; + using TMeanAccumulator = + maths::CBasicStatistics::SSampleMean::TAccumulator; public: static const std::string VALUE_TAG; static const std::string TIME_TAG; public: - CMetricPartialStatistic(std::size_t dimension) : m_Value(CMetricStatisticWrappers::template make(dimension)) {} + CMetricPartialStatistic(std::size_t dimension) + : m_Value(CMetricStatisticWrappers::template make(dimension)) {} //! Persist to a state document. void persist(core::CStatePersistInserter& inserter) const { @@ -93,13 +95,19 @@ class CMetricPartialStatistic { } //! Returns the aggregated value of all the measurements. - inline TDouble1Vec value() const { return CMetricStatisticWrappers::value(m_Value); } + inline TDouble1Vec value() const { + return CMetricStatisticWrappers::value(m_Value); + } //! Returns the combined count of all the measurements. - inline double count() const { return maths::CBasicStatistics::count(m_Time); } + inline double count() const { + return maths::CBasicStatistics::count(m_Time); + } //! Returns the mean time of all the measurements. - inline core_t::TTime time() const { return static_cast(maths::CBasicStatistics::mean(m_Time) + 0.5); } + inline core_t::TTime time() const { + return static_cast(maths::CBasicStatistics::mean(m_Time) + 0.5); + } //! Combine two partial statistics. inline const CMetricPartialStatistic& operator+=(const CMetricPartialStatistic& rhs) { @@ -123,7 +131,8 @@ class CMetricPartialStatistic { //! Get the memory used by the statistic. inline std::size_t memoryUsage() const { - return sizeof(*this) + core::CMemory::dynamicSize(m_Value) + core::CMemory::dynamicSize(m_Time); + return sizeof(*this) + core::CMemory::dynamicSize(m_Value) + + core::CMemory::dynamicSize(m_Time); } //! Print partial statistic diff --git a/include/model/CMetricPopulationModel.h b/include/model/CMetricPopulationModel.h index 4482cd38fa..451b164d22 100644 --- a/include/model/CMetricPopulationModel.h +++ b/include/model/CMetricPopulationModel.h @@ -62,7 +62,8 @@ class MODEL_EXPORT CMetricPopulationModel : public CPopulationModel { using TFeatureData = SMetricFeatureData; using TSizeSizePrFeatureDataPr = std::pair; using TSizeSizePrFeatureDataPrVec = std::vector; - using TFeatureSizeSizePrFeatureDataPrVecMap = std::map; + using TFeatureSizeSizePrFeatureDataPrVecMap = + std::map; using TProbabilityCache = CModelTools::CProbabilityCache; //! The statistics we maintain about a bucketing interval. @@ -167,7 +168,10 @@ class MODEL_EXPORT CMetricPopulationModel : public CPopulationModel { //! \param[in] pid The identifier of the person of interest. //! \param[in] cid The identifier of the attribute of interest. //! \param[in] time The time of interest. - virtual TDouble1Vec currentBucketValue(model_t::EFeature feature, std::size_t pid, std::size_t cid, core_t::TTime time) const; + virtual TDouble1Vec currentBucketValue(model_t::EFeature feature, + std::size_t pid, + std::size_t cid, + core_t::TTime time) const; //! Get the population baseline mean of \p feature for the //! attribute identified by \p cid as of the start of the @@ -201,7 +205,9 @@ class MODEL_EXPORT CMetricPopulationModel : public CPopulationModel { //! //! \param[in] startTime The start of the time interval to sample. //! \param[in] endTime The end of the time interval to sample. - virtual void sampleBucketStatistics(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor); + virtual void sampleBucketStatistics(core_t::TTime startTime, + core_t::TTime endTime, + CResourceMonitor& resourceMonitor); //! Update the model with the samples of the various processes //! in the time interval [\p startTime, \p endTime]. @@ -259,7 +265,8 @@ class MODEL_EXPORT CMetricPopulationModel : public CPopulationModel { virtual CModelDetailsViewPtr details() const; //! Get the feature data corresponding to \p feature at \p time. - const TSizeSizePrFeatureDataPrVec& featureData(model_t::EFeature feature, core_t::TTime time) const; + const TSizeSizePrFeatureDataPrVec& featureData(model_t::EFeature feature, + core_t::TTime time) const; //! Debug the memory used by this model. virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; @@ -305,7 +312,8 @@ class MODEL_EXPORT CMetricPopulationModel : public CPopulationModel { virtual void updateRecycledModels(); //! Update the correlation models. - virtual void refreshCorrelationModels(std::size_t resourceLimit, CResourceMonitor& resourceMonitor); + virtual void refreshCorrelationModels(std::size_t resourceLimit, + CResourceMonitor& resourceMonitor); //! Clear out large state objects for people/attributes that are pruned virtual void clearPrunedResources(const TSizeVec& people, const TSizeVec& attributes); diff --git a/include/model/CMetricPopulationModelFactory.h b/include/model/CMetricPopulationModelFactory.h index 11fb1d4b91..5c66ef142d 100644 --- a/include/model/CMetricPopulationModelFactory.h +++ b/include/model/CMetricPopulationModelFactory.h @@ -54,7 +54,8 @@ class MODEL_EXPORT CMetricPopulationModelFactory : public CModelFactory { //! the model. //! \param[in,out] traverser A state document traverser. //! \warning It is owned by the calling code. - virtual CAnomalyDetectorModel* makeModel(const SModelInitializationData& initData, core::CStateRestoreTraverser& traverser) const; + virtual CAnomalyDetectorModel* makeModel(const SModelInitializationData& initData, + core::CStateRestoreTraverser& traverser) const; //! Make a new metric population data gatherer. //! @@ -69,7 +70,8 @@ class MODEL_EXPORT CMetricPopulationModelFactory : public CModelFactory { //! \param[in] partitionFieldValue The partition field value. //! \param[in,out] traverser A state document traverser. //! \warning It is owned by the calling code. - virtual CDataGatherer* makeDataGatherer(const std::string& partitionFieldValue, core::CStateRestoreTraverser& traverser) const; + virtual CDataGatherer* makeDataGatherer(const std::string& partitionFieldValue, + core::CStateRestoreTraverser& traverser) const; //@} //! \name Defaults @@ -84,14 +86,16 @@ class MODEL_EXPORT CMetricPopulationModelFactory : public CModelFactory { //! //! \param[in] feature The feature for which to get the prior. //! \param[in] params The model parameters. - virtual TMultivariatePriorPtr defaultMultivariatePrior(model_t::EFeature feature, const SModelParams& params) const; + virtual TMultivariatePriorPtr + defaultMultivariatePrior(model_t::EFeature feature, const SModelParams& params) const; //! Get the default prior for pairs of correlated time series //! of \p feature. //! //! \param[in] feature The feature for which to get the prior. //! \param[in] params The model parameters. - virtual TMultivariatePriorPtr defaultCorrelatePrior(model_t::EFeature feature, const SModelParams& params) const; + virtual TMultivariatePriorPtr + defaultCorrelatePrior(model_t::EFeature feature, const SModelParams& params) const; //@} //! Get the search key corresponding to this factory. diff --git a/include/model/CMetricStatisticWrappers.h b/include/model/CMetricStatisticWrappers.h index c4d048d24f..b8bfdd4f4a 100644 --- a/include/model/CMetricStatisticWrappers.h +++ b/include/model/CMetricStatisticWrappers.h @@ -57,7 +57,8 @@ struct MODEL_EXPORT CMetricStatisticWrappers { using TDouble1Vec = core::CSmallVector; using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; using TVarianceAccumulator = maths::CBasicStatistics::SSampleMeanVar::TAccumulator; - using TMedianAccumulator = maths::CFixedQuantileSketch; + using TMedianAccumulator = + maths::CFixedQuantileSketch; //! Make a statistic. template @@ -67,33 +68,47 @@ struct MODEL_EXPORT CMetricStatisticWrappers { //! Add \p value to an order statistic. template - static void add(const TDouble1Vec& value, unsigned int count, maths::CBasicStatistics::COrderStatisticsStack& stat) { + static void add(const TDouble1Vec& value, + unsigned int count, + maths::CBasicStatistics::COrderStatisticsStack& stat) { stat.add(value[0], count); } //! Add \p value to a mean statistic. - static void add(const TDouble1Vec& value, unsigned int count, TMeanAccumulator& stat) { stat.add(value[0], count); } + static void add(const TDouble1Vec& value, unsigned int count, TMeanAccumulator& stat) { + stat.add(value[0], count); + } //! Add \p value to a variance statistic. - static void add(const TDouble1Vec& value, unsigned int count, TVarianceAccumulator& stat) { stat.add(value[0], count); } + static void add(const TDouble1Vec& value, unsigned int count, TVarianceAccumulator& stat) { + stat.add(value[0], count); + } //! Add \p value to a median statistic. - static void add(const TDouble1Vec& value, unsigned int count, TMedianAccumulator& stat) { stat.add(value[0], count); } + static void add(const TDouble1Vec& value, unsigned int count, TMedianAccumulator& stat) { + stat.add(value[0], count); + } //! Add \p value to a multivariate statistic. template - static void add(const TDouble1Vec& value, unsigned int count, CMetricMultivariateStatistic& stat) { + static void add(const TDouble1Vec& value, + unsigned int count, + CMetricMultivariateStatistic& stat) { stat.add(value, count); } //! Get the median value of an order statistic. template - static TDouble1Vec value(const maths::CBasicStatistics::COrderStatisticsStack& stat) { + static TDouble1Vec + value(const maths::CBasicStatistics::COrderStatisticsStack& stat) { return TDouble1Vec{stat[0]}; } //! Get the value of a mean statistic. - static TDouble1Vec value(const TMeanAccumulator& stat) { return TDouble1Vec{maths::CBasicStatistics::mean(stat)}; } + static TDouble1Vec value(const TMeanAccumulator& stat) { + return TDouble1Vec{maths::CBasicStatistics::mean(stat)}; + } //! Get the value of a variance statistic. static TDouble1Vec value(const TVarianceAccumulator& stat) { TDouble1Vec result; if (maths::CBasicStatistics::count(stat) >= 2.0) { - result.assign({maths::CBasicStatistics::maximumLikelihoodVariance(stat), maths::CBasicStatistics::mean(stat)}); + result.assign({maths::CBasicStatistics::maximumLikelihoodVariance(stat), + maths::CBasicStatistics::mean(stat)}); } return result; } @@ -135,13 +150,18 @@ struct MODEL_EXPORT CMetricStatisticWrappers { //! Returns 1.0 since this is not available. template - static double count(const maths::CBasicStatistics::COrderStatisticsStack& /*stat*/) { + static double + count(const maths::CBasicStatistics::COrderStatisticsStack& /*stat*/) { return 1.0; } //! Get the count of the statistic. - static double count(const TMeanAccumulator& stat) { return static_cast(maths::CBasicStatistics::count(stat)); } + static double count(const TMeanAccumulator& stat) { + return static_cast(maths::CBasicStatistics::count(stat)); + } //! Get the count of the statistic. - static double count(const TVarianceAccumulator& stat) { return static_cast(maths::CBasicStatistics::count(stat)); } + static double count(const TVarianceAccumulator& stat) { + return static_cast(maths::CBasicStatistics::count(stat)); + } //! Get the count of the statistic. static double count(const TMedianAccumulator& stat) { return stat.count(); } //! Get the count of a multivariate statistic. @@ -152,34 +172,45 @@ struct MODEL_EXPORT CMetricStatisticWrappers { //! Persist an order statistic. template - static void persist(const maths::CBasicStatistics::COrderStatisticsStack& stat, - const std::string& tag, - core::CStatePersistInserter& inserter) { + static void + persist(const maths::CBasicStatistics::COrderStatisticsStack& stat, + const std::string& tag, + core::CStatePersistInserter& inserter) { inserter.insertValue(tag, stat.toDelimited()); } //! Persist a mean statistic. - static void persist(const TMeanAccumulator& stat, const std::string& tag, core::CStatePersistInserter& inserter) { + static void persist(const TMeanAccumulator& stat, + const std::string& tag, + core::CStatePersistInserter& inserter) { inserter.insertValue(tag, stat.toDelimited()); } //! Persist a variance statistic. - static void persist(const TVarianceAccumulator& stat, const std::string& tag, core::CStatePersistInserter& inserter) { + static void persist(const TVarianceAccumulator& stat, + const std::string& tag, + core::CStatePersistInserter& inserter) { inserter.insertValue(tag, stat.toDelimited()); } //! Persist a median statistic. - static void persist(const TMedianAccumulator& stat, const std::string& tag, core::CStatePersistInserter& inserter) { - inserter.insertLevel(tag, boost::bind(&TMedianAccumulator::acceptPersistInserter, &stat, _1)); + static void persist(const TMedianAccumulator& stat, + const std::string& tag, + core::CStatePersistInserter& inserter) { + inserter.insertLevel( + tag, boost::bind(&TMedianAccumulator::acceptPersistInserter, &stat, _1)); } //! Persist a multivariate statistic. template - static void - persist(const CMetricMultivariateStatistic& stat, const std::string& tag, core::CStatePersistInserter& inserter) { - inserter.insertLevel(tag, boost::bind(&CMetricMultivariateStatistic::persist, &stat, _1)); + static void persist(const CMetricMultivariateStatistic& stat, + const std::string& tag, + core::CStatePersistInserter& inserter) { + inserter.insertLevel( + tag, boost::bind(&CMetricMultivariateStatistic::persist, &stat, _1)); } //! Restore an order statistic. template - static inline bool restore(core::CStateRestoreTraverser& traverser, - maths::CBasicStatistics::COrderStatisticsStack& stat) { + static inline bool + restore(core::CStateRestoreTraverser& traverser, + maths::CBasicStatistics::COrderStatisticsStack& stat) { if (stat.fromDelimited(traverser.value()) == false) { LOG_ERROR(<< "Invalid statistic in " << traverser.value()); return false; @@ -204,12 +235,15 @@ struct MODEL_EXPORT CMetricStatisticWrappers { } //! Restore a median statistic. static bool restore(core::CStateRestoreTraverser& traverser, TMedianAccumulator& stat) { - return traverser.traverseSubLevel(boost::bind(&TMedianAccumulator::acceptRestoreTraverser, &stat, _1)); + return traverser.traverseSubLevel( + boost::bind(&TMedianAccumulator::acceptRestoreTraverser, &stat, _1)); } //! Restore a multivariate statistic. template - static bool restore(core::CStateRestoreTraverser& traverser, CMetricMultivariateStatistic& stat) { - return traverser.traverseSubLevel(boost::bind(&CMetricMultivariateStatistic::restore, &stat, _1)); + static bool restore(core::CStateRestoreTraverser& traverser, + CMetricMultivariateStatistic& stat) { + return traverser.traverseSubLevel(boost::bind( + &CMetricMultivariateStatistic::restore, &stat, _1)); } }; } diff --git a/include/model/CModelDetailsView.h b/include/model/CModelDetailsView.h index 4cbc550224..57387e7733 100644 --- a/include/model/CModelDetailsView.h +++ b/include/model/CModelDetailsView.h @@ -55,14 +55,21 @@ class MODEL_EXPORT CModelDetailsView { //! by \p terms. //! //! \note If \p terms is empty all by field error bars are returned. - void modelPlot(core_t::TTime time, double boundsPercentile, const TStrSet& terms, CModelPlotData& modelPlotData) const; + void modelPlot(core_t::TTime time, + double boundsPercentile, + const TStrSet& terms, + CModelPlotData& modelPlotData) const; //! Get the feature prior for the specified by field \p byFieldId. - virtual const maths::CModel* model(model_t::EFeature feature, std::size_t byFieldId) const = 0; + virtual const maths::CModel* model(model_t::EFeature feature, + std::size_t byFieldId) const = 0; private: //! Add the model plot data for all by field values which match \p terms. - void addCurrentBucketValues(core_t::TTime time, model_t::EFeature feature, const TStrSet& terms, CModelPlotData& modelPlotData) const; + void addCurrentBucketValues(core_t::TTime time, + model_t::EFeature feature, + const TStrSet& terms, + CModelPlotData& modelPlotData) const; //! Get the model plot data for the specified by field value. void modelPlotForByFieldId(core_t::TTime, @@ -75,7 +82,9 @@ class MODEL_EXPORT CModelDetailsView { virtual const CAnomalyDetectorModel& base() const = 0; //! Get the count variance scale. - virtual double countVarianceScale(model_t::EFeature feature, std::size_t byFieldId, core_t::TTime time) const = 0; + virtual double countVarianceScale(model_t::EFeature feature, + std::size_t byFieldId, + core_t::TTime time) const = 0; //! Returns true if the terms are empty or they contain the key. static bool contains(const TStrSet& terms, const std::string& key); @@ -106,7 +115,9 @@ class MODEL_EXPORT CEventRateModelDetailsView : public CModelDetailsView { private: virtual const CAnomalyDetectorModel& base() const; - virtual double countVarianceScale(model_t::EFeature feature, std::size_t byFieldId, core_t::TTime time) const; + virtual double countVarianceScale(model_t::EFeature feature, + std::size_t byFieldId, + core_t::TTime time) const; private: //! The model. @@ -125,7 +136,9 @@ class MODEL_EXPORT CEventRatePopulationModelDetailsView : public CModelDetailsVi private: virtual const CAnomalyDetectorModel& base() const; - virtual double countVarianceScale(model_t::EFeature feature, std::size_t byFieldId, core_t::TTime time) const; + virtual double countVarianceScale(model_t::EFeature feature, + std::size_t byFieldId, + core_t::TTime time) const; private: //! The model. @@ -144,7 +157,9 @@ class MODEL_EXPORT CMetricModelDetailsView : public CModelDetailsView { private: virtual const CAnomalyDetectorModel& base() const; - virtual double countVarianceScale(model_t::EFeature feature, std::size_t byFieldId, core_t::TTime time) const; + virtual double countVarianceScale(model_t::EFeature feature, + std::size_t byFieldId, + core_t::TTime time) const; private: //! The model. @@ -163,7 +178,9 @@ class MODEL_EXPORT CMetricPopulationModelDetailsView : public CModelDetailsView private: virtual const CAnomalyDetectorModel& base() const; - virtual double countVarianceScale(model_t::EFeature feature, std::size_t byFieldId, core_t::TTime time) const; + virtual double countVarianceScale(model_t::EFeature feature, + std::size_t byFieldId, + core_t::TTime time) const; private: //! The model. diff --git a/include/model/CModelFactory.h b/include/model/CModelFactory.h index 1ebd4b9c7b..e4f37618ee 100644 --- a/include/model/CModelFactory.h +++ b/include/model/CModelFactory.h @@ -86,9 +86,11 @@ class MODEL_EXPORT CModelFactory { using TModelPtr = boost::shared_ptr; using TModelCPtr = boost::shared_ptr; using TInfluenceCalculatorCPtr = boost::shared_ptr; - using TFeatureInfluenceCalculatorCPtrPr = std::pair; + using TFeatureInfluenceCalculatorCPtrPr = + std::pair; using TFeatureInfluenceCalculatorCPtrPrVec = std::vector; - using TFeatureInfluenceCalculatorCPtrPrVecVec = std::vector; + using TFeatureInfluenceCalculatorCPtrPrVecVec = + std::vector; using TDetectionRuleVec = std::vector; using TDetectionRuleVecCRef = boost::reference_wrapper; using TStrDetectionRulePr = std::pair; @@ -115,7 +117,9 @@ class MODEL_EXPORT CModelFactory { //! need to change the signature of every factory function each //! time we need extra data to initialize a data gatherer. struct MODEL_EXPORT SGathererInitializationData { - SGathererInitializationData(core_t::TTime startTime, const std::string& partitionFieldValue, unsigned int sampleOverrideCount = 0u); + SGathererInitializationData(core_t::TTime startTime, + const std::string& partitionFieldValue, + unsigned int sampleOverrideCount = 0u); //! This constructor is meant to simplify unit tests SGathererInitializationData(const core_t::TTime startTime); @@ -141,7 +145,8 @@ class MODEL_EXPORT CModelFactory { //! //! \param[in] initData The parameters needed to initialize the model. //! \warning It is owned by the calling code. - virtual CAnomalyDetectorModel* makeModel(const SModelInitializationData& initData) const = 0; + virtual CAnomalyDetectorModel* + makeModel(const SModelInitializationData& initData) const = 0; //! Make a new model from part of a state document. //! @@ -149,21 +154,25 @@ class MODEL_EXPORT CModelFactory { //! the model. //! \param[in,out] traverser A state document traverser. //! \warning It is owned by the calling code. - virtual CAnomalyDetectorModel* makeModel(const SModelInitializationData& initData, core::CStateRestoreTraverser& traverser) const = 0; + virtual CAnomalyDetectorModel* + makeModel(const SModelInitializationData& initData, + core::CStateRestoreTraverser& traverser) const = 0; //! Make a new data gatherer. //! //! \param[in] initData The parameters needed to initialize the //! data gatherer. //! \warning It is owned by the calling code. - virtual CDataGatherer* makeDataGatherer(const SGathererInitializationData& initData) const = 0; + virtual CDataGatherer* + makeDataGatherer(const SGathererInitializationData& initData) const = 0; //! Make a new data gatherer from part of a state document. //! //! \param[in,out] traverser A state document traverser. //! \param[in] partitionFieldValue The partition field value. //! \warning It is owned by the calling code. - virtual CDataGatherer* makeDataGatherer(const std::string& partitionFieldValue, core::CStateRestoreTraverser& traverser) const = 0; + virtual CDataGatherer* makeDataGatherer(const std::string& partitionFieldValue, + core::CStateRestoreTraverser& traverser) const = 0; //@} //! \name Defaults @@ -182,7 +191,8 @@ class MODEL_EXPORT CModelFactory { //! Get the default correlate priors to use for correlated pairs of time //! series of \p features. - const TFeatureMultivariatePriorPtrPrVec& defaultCorrelatePriors(const TFeatureVec& features) const; + const TFeatureMultivariatePriorPtrPrVec& + defaultCorrelatePriors(const TFeatureVec& features) const; //! Get the default models for correlations of \p features. const TFeatureCorrelationsPtrPrVec& defaultCorrelates(const TFeatureVec& features) const; @@ -201,20 +211,23 @@ class MODEL_EXPORT CModelFactory { //! //! \param[in] feature The feature for which to get the prior. //! \param[in] params The model parameters. - virtual TPriorPtr defaultPrior(model_t::EFeature feature, const SModelParams& params) const = 0; + virtual TPriorPtr defaultPrior(model_t::EFeature feature, + const SModelParams& params) const = 0; //! Get the default prior for multivariate \p feature. //! //! \param[in] feature The feature for which to get the prior. //! \param[in] params The model parameters. - virtual TMultivariatePriorPtr defaultMultivariatePrior(model_t::EFeature feature, const SModelParams& params) const = 0; + virtual TMultivariatePriorPtr + defaultMultivariatePrior(model_t::EFeature feature, const SModelParams& params) const = 0; //! Get the default prior for pairs of correlated time series //! of \p feature. //! //! \param[in] feature The feature for which to get the prior. //! \param[in] params The model parameters. - virtual TMultivariatePriorPtr defaultCorrelatePrior(model_t::EFeature feature, const SModelParams& params) const = 0; + virtual TMultivariatePriorPtr + defaultCorrelatePrior(model_t::EFeature feature, const SModelParams& params) const = 0; //! Get the default prior to use for categorical data. maths::CMultinomialConjugate defaultCategoricalPrior() const; @@ -223,11 +236,13 @@ class MODEL_EXPORT CModelFactory { //! //! \param[in] feature The feature for which to get the decomposition. //! \param[in] bucketLength The data bucketing length. - TDecompositionCPtr defaultDecomposition(model_t::EFeature feature, core_t::TTime bucketLength) const; + TDecompositionCPtr defaultDecomposition(model_t::EFeature feature, + core_t::TTime bucketLength) const; //! Get the influence calculators to use for each feature in \p features. - const TFeatureInfluenceCalculatorCPtrPrVec& defaultInfluenceCalculators(const std::string& influencerName, - const TFeatureVec& features) const; + const TFeatureInfluenceCalculatorCPtrPrVec& + defaultInfluenceCalculators(const std::string& influencerName, + const TFeatureVec& features) const; //@} //! Get the search key corresponding to this factory. @@ -346,7 +361,8 @@ class MODEL_EXPORT CModelFactory { //! \param[in] dimension The dimension. //! \param[in] params The model parameters. //! \warning Up to ten dimensions are supported. - TMultivariatePriorPtr multivariateNormalPrior(std::size_t dimension, const SModelParams& params) const; + TMultivariatePriorPtr multivariateNormalPrior(std::size_t dimension, + const SModelParams& params) const; //! Get a multivariate multimodal prior with dimension \p dimension. //! @@ -354,15 +370,18 @@ class MODEL_EXPORT CModelFactory { //! \param[in] params The model parameters. //! \warning Up to ten dimensions are supported. TMultivariatePriorPtr - multivariateMultimodalPrior(std::size_t dimension, const SModelParams& params, const maths::CMultivariatePrior& modePrior) const; + multivariateMultimodalPrior(std::size_t dimension, + const SModelParams& params, + const maths::CMultivariatePrior& modePrior) const; //! Get a multivariate 1-of-n prior with dimension \p dimension. //! //! \param[in] dimension The dimension. //! \param[in] params The model parameters. //! \param[in] models The component models to select between. - TMultivariatePriorPtr - multivariateOneOfNPrior(std::size_t dimension, const SModelParams& params, const TMultivariatePriorPtrVec& models) const; + TMultivariatePriorPtr multivariateOneOfNPrior(std::size_t dimension, + const SModelParams& params, + const TMultivariatePriorPtrVec& models) const; //! Get the default prior for time-of-day and time-of-week modeling. //! This is just a mixture of normals which allows more modes than @@ -380,7 +399,8 @@ class MODEL_EXPORT CModelFactory { private: using TFeatureVecMathsModelMap = std::map; - using TFeatureVecMultivariatePriorMap = std::map; + using TFeatureVecMultivariatePriorMap = + std::map; using TFeatureVecCorrelationsMap = std::map; using TStrFeatureVecPr = std::pair; using TStrFeatureVecPrInfluenceCalculatorCPtrMap = diff --git a/include/model/CModelParams.h b/include/model/CModelParams.h index f78131f739..da2a55de83 100644 --- a/include/model/CModelParams.h +++ b/include/model/CModelParams.h @@ -55,7 +55,8 @@ struct MODEL_EXPORT SModelParams { double minimumCategoryCount() const; //! Get the parameters supplied when restoring time series decompositions. - maths::STimeSeriesDecompositionRestoreParams decompositionRestoreParams(maths_t::EDataType dataType) const; + maths::STimeSeriesDecompositionRestoreParams + decompositionRestoreParams(maths_t::EDataType dataType) const; //! Get the parameters supplied when restoring distribution models. maths::SDistributionRestoreParams distributionRestoreParams(maths_t::EDataType dataType) const; diff --git a/include/model/CModelPlotData.h b/include/model/CModelPlotData.h index 0a7599670d..9a50e24e81 100644 --- a/include/model/CModelPlotData.h +++ b/include/model/CModelPlotData.h @@ -49,7 +49,8 @@ class MODEL_EXPORT CModelPlotData { public: using TStrByFieldDataUMap = boost::unordered_map; using TFeatureStrByFieldDataUMapPr = std::pair; - using TFeatureStrByFieldDataUMapUMap = boost::unordered_map; + using TFeatureStrByFieldDataUMapUMap = + boost::unordered_map; using TIntStrByFieldDataUMapUMap = boost::unordered_map; using TFeatureStrByFieldDataUMapUMapCItr = TFeatureStrByFieldDataUMapUMap::const_iterator; diff --git a/include/model/CModelTools.h b/include/model/CModelTools.h index 176fd91b17..fa80cad20d 100644 --- a/include/model/CModelTools.h +++ b/include/model/CModelTools.h @@ -58,7 +58,8 @@ class MODEL_EXPORT CModelTools { using TDouble2Vec1Vec = core::CSmallVector; using TTimeDouble2VecPr = std::pair; using TSizeSizePr = std::pair; - using TStoredStringPtrStoredStringPtrPr = std::pair; + using TStoredStringPtrStoredStringPtrPr = + std::pair; using TSampleVec = std::vector; //! \brief De-duplicates nearly equal values. @@ -76,7 +77,8 @@ class MODEL_EXPORT CModelTools { struct MODEL_EXPORT SDuplicateValueHash { std::size_t operator()(const TTimeDouble2VecPr& value) const; }; - using TTimeDouble2VecPrSizeUMap = boost::unordered_map; + using TTimeDouble2VecPrSizeUMap = + boost::unordered_map; private: //! Quantize \p value. @@ -104,15 +106,17 @@ class MODEL_EXPORT CModelTools { //! \brief Hashes a string pointer pair. struct MODEL_EXPORT SStoredStringPtrStoredStringPtrPrHash { std::size_t operator()(const TStoredStringPtrStoredStringPtrPr& target) const { - return static_cast(core::CHashing::hashCombine(static_cast(s_Hasher(*target.first)), - static_cast(s_Hasher(*target.second)))); + return static_cast(core::CHashing::hashCombine( + static_cast(s_Hasher(*target.first)), + static_cast(s_Hasher(*target.second)))); } core::CHashing::CMurmurHash2String s_Hasher; }; //! \brief Compares two string pointer pairs. struct MODEL_EXPORT SStoredStringPtrStoredStringPtrPrEqual { - std::size_t operator()(const TStoredStringPtrStoredStringPtrPr& lhs, const TStoredStringPtrStoredStringPtrPr& rhs) const { + std::size_t operator()(const TStoredStringPtrStoredStringPtrPr& lhs, + const TStoredStringPtrStoredStringPtrPr& rhs) const { return *lhs.first == *rhs.first && *lhs.second == *rhs.second; } }; @@ -127,7 +131,8 @@ class MODEL_EXPORT CModelTools { //! aggregation styles. class MODEL_EXPORT CProbabilityAggregator { public: - using TAggregator = boost::variant; + using TAggregator = + boost::variant; using TAggregatorDoublePr = std::pair; using TAggregatorDoublePrVec = std::vector; @@ -159,10 +164,8 @@ class MODEL_EXPORT CModelTools { TAggregatorDoublePrVec m_Aggregators; }; - using TStoredStringPtrStoredStringPtrPrProbabilityAggregatorUMap = boost::unordered_map; + using TStoredStringPtrStoredStringPtrPrProbabilityAggregatorUMap = + boost::unordered_map; //! Wraps up the calculation of less likely probabilities for a //! multinomial distribution. @@ -290,7 +293,8 @@ class MODEL_EXPORT CModelTools { }; using TFeatureSizePr = std::pair; - using TFeatureSizePrProbabilityCacheUMap = boost::unordered_map; + using TFeatureSizePrProbabilityCacheUMap = + boost::unordered_map; private: //! The maximum relative error we'll tolerate in the probability. diff --git a/include/model/CPartitioningFields.h b/include/model/CPartitioningFields.h index 724077e063..b1f9f795cb 100644 --- a/include/model/CPartitioningFields.h +++ b/include/model/CPartitioningFields.h @@ -30,7 +30,8 @@ class MODEL_EXPORT CPartitioningFields { using TStrCRefStrCRefPrVec = std::vector; public: - CPartitioningFields(const std::string& partitionFieldName, const std::string& partitionFieldValue); + CPartitioningFields(const std::string& partitionFieldName, + const std::string& partitionFieldValue); //! Append the field (name, value) pair (\p fieldName, \p fieldValue). void add(const std::string& fieldName, const std::string& fieldValue); diff --git a/include/model/CPopulationModel.h b/include/model/CPopulationModel.h index 6e0a35a4be..0066fa46aa 100644 --- a/include/model/CPopulationModel.h +++ b/include/model/CPopulationModel.h @@ -117,9 +117,13 @@ class MODEL_EXPORT CPopulationModel : public CAnomalyDetectorModel { static typename T::const_iterator find(const T& data, std::size_t pid, std::size_t cid); //! Extract the bucket value for count feature data. - static inline TDouble1Vec extractValue(model_t::EFeature /*feature*/, const std::pair& data); + static inline TDouble1Vec + extractValue(model_t::EFeature /*feature*/, + const std::pair& data); //! Extract the bucket value for metric feature data. - static inline TDouble1Vec extractValue(model_t::EFeature feature, const std::pair& data); + static inline TDouble1Vec + extractValue(model_t::EFeature feature, + const std::pair& data); //@} public: @@ -142,10 +146,14 @@ class MODEL_EXPORT CPopulationModel : public CAnomalyDetectorModel { //! //! \param[in] startTime The start of the time interval to sample. //! \param[in] endTime The end of the time interval to sample. - virtual void sampleOutOfPhase(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor); + virtual void sampleOutOfPhase(core_t::TTime startTime, + core_t::TTime endTime, + CResourceMonitor& resourceMonitor); //! Update the rates for \p feature and \p people. - virtual void sample(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) = 0; + virtual void sample(core_t::TTime startTime, + core_t::TTime endTime, + CResourceMonitor& resourceMonitor) = 0; //@} //! Get the checksum of this model. @@ -178,7 +186,10 @@ class MODEL_EXPORT CPopulationModel : public CAnomalyDetectorModel { //! \brief A key for the partial bucket corrections map. class MODEL_EXPORT CCorrectionKey { public: - CCorrectionKey(model_t::EFeature feature, std::size_t pid, std::size_t cid, std::size_t correlated = 0); + CCorrectionKey(model_t::EFeature feature, + std::size_t pid, + std::size_t cid, + std::size_t correlated = 0); bool operator==(const CCorrectionKey& rhs) const; std::size_t hash() const; @@ -191,9 +202,12 @@ class MODEL_EXPORT CPopulationModel : public CAnomalyDetectorModel { //! \brief A hasher for the partial bucket corrections map key. struct MODEL_EXPORT CHashCorrectionKey { - std::size_t operator()(const CCorrectionKey& key) const { return key.hash(); } + std::size_t operator()(const CCorrectionKey& key) const { + return key.hash(); + } }; - using TCorrectionKeyDouble1VecUMap = boost::unordered_map; + using TCorrectionKeyDouble1VecUMap = + boost::unordered_map; protected: //! Persist state by passing information to the supplied inserter. @@ -220,7 +234,8 @@ class MODEL_EXPORT CPopulationModel : public CAnomalyDetectorModel { virtual void updateRecycledModels() = 0; //! Update the correlation models. - virtual void refreshCorrelationModels(std::size_t resourceLimit, CResourceMonitor& resourceMonitor) = 0; + virtual void refreshCorrelationModels(std::size_t resourceLimit, + CResourceMonitor& resourceMonitor) = 0; //! Clear out large state objects for people/attributes that are pruned. virtual void clearPrunedResources(const TSizeVec& people, const TSizeVec& attributes) = 0; @@ -240,7 +255,10 @@ class MODEL_EXPORT CPopulationModel : public CAnomalyDetectorModel { //! Remove heavy hitting people and attributes from the feature //! data if necessary. template - void applyFilters(bool updateStatistics, const PERSON_FILTER& personFilter, const ATTRIBUTE_FILTER& attributeFilter, T& data) const; + void applyFilters(bool updateStatistics, + const PERSON_FILTER& personFilter, + const ATTRIBUTE_FILTER& attributeFilter, + T& data) const; //! Get the first time each attribute was seen. const TTimeVec& attributeFirstBucketTimes() const; @@ -248,8 +266,10 @@ class MODEL_EXPORT CPopulationModel : public CAnomalyDetectorModel { const TTimeVec& attributeLastBucketTimes() const; //! Get the people and attributes to remove if any. - void - peopleAndAttributesToRemove(core_t::TTime time, std::size_t maximumAge, TSizeVec& peopleToRemove, TSizeVec& attributesToRemove) const; + void peopleAndAttributesToRemove(core_t::TTime time, + std::size_t maximumAge, + TSizeVec& peopleToRemove, + TSizeVec& attributesToRemove) const; //! Remove the \p people. void removePeople(const TSizeVec& peopleToRemove); diff --git a/include/model/CPopulationModelDetail.h b/include/model/CPopulationModelDetail.h index 02aab01735..3e4e15bb4e 100644 --- a/include/model/CPopulationModelDetail.h +++ b/include/model/CPopulationModelDetail.h @@ -18,27 +18,35 @@ template CPopulationModel::TSizeSizePr CPopulationModel::personRange(const T& data, std::size_t pid) { const std::size_t minCid = 0u; const std::size_t maxCid = std::numeric_limits::max(); - auto begin = std::lower_bound(data.begin(), data.end(), std::make_pair(pid, minCid), maths::COrderings::SFirstLess()); - auto end = std::upper_bound(begin, data.end(), std::make_pair(pid, maxCid), maths::COrderings::SFirstLess()); - return {static_cast(begin - data.begin()), static_cast(end - data.begin())}; + auto begin = std::lower_bound(data.begin(), data.end(), std::make_pair(pid, minCid), + maths::COrderings::SFirstLess()); + auto end = std::upper_bound(begin, data.end(), std::make_pair(pid, maxCid), + maths::COrderings::SFirstLess()); + return {static_cast(begin - data.begin()), + static_cast(end - data.begin())}; } template -typename T::const_iterator CPopulationModel::find(const T& data, std::size_t pid, std::size_t cid) { - auto i = std::lower_bound(data.begin(), data.end(), std::make_pair(pid, cid), maths::COrderings::SFirstLess()); - if (i != data.end() && (CDataGatherer::extractPersonId(*i) != pid || CDataGatherer::extractAttributeId(*i) != cid)) { +typename T::const_iterator +CPopulationModel::find(const T& data, std::size_t pid, std::size_t cid) { + auto i = std::lower_bound(data.begin(), data.end(), std::make_pair(pid, cid), + maths::COrderings::SFirstLess()); + if (i != data.end() && (CDataGatherer::extractPersonId(*i) != pid || + CDataGatherer::extractAttributeId(*i) != cid)) { i = data.end(); } return i; } -inline CPopulationModel::TDouble1Vec CPopulationModel::extractValue(model_t::EFeature /*feature*/, - const std::pair& data) { +inline CPopulationModel::TDouble1Vec +CPopulationModel::extractValue(model_t::EFeature /*feature*/, + const std::pair& data) { return TDouble1Vec{static_cast(CDataGatherer::extractData(data).s_Count)}; } -inline CPopulationModel::TDouble1Vec CPopulationModel::extractValue(model_t::EFeature feature, - const std::pair& data) { +inline CPopulationModel::TDouble1Vec +CPopulationModel::extractValue(model_t::EFeature feature, + const std::pair& data) { return CDataGatherer::extractData(data).s_BucketValue ? CDataGatherer::extractData(data).s_BucketValue->value(model_t::dimension(feature)) : TDouble1Vec(); diff --git a/include/model/CProbabilityAndInfluenceCalculator.h b/include/model/CProbabilityAndInfluenceCalculator.h index 30c68586ef..e20684e782 100644 --- a/include/model/CProbabilityAndInfluenceCalculator.h +++ b/include/model/CProbabilityAndInfluenceCalculator.h @@ -69,11 +69,15 @@ class MODEL_EXPORT CProbabilityAndInfluenceCalculator { using TStrCRefDouble1VecDoublePrPrVec = std::vector; using TStrCRefDouble1VecDouble1VecPrPr = std::pair; using TStrCRefDouble1VecDouble1VecPrPrVec = std::vector; - using TStrCRefDouble1VecDouble1VecPrPrVecVec = std::vector; - using TStoredStringPtrStoredStringPtrPr = std::pair; + using TStrCRefDouble1VecDouble1VecPrPrVecVec = + std::vector; + using TStoredStringPtrStoredStringPtrPr = + std::pair; using TStoredStringPtrStoredStringPtrPrVec = std::vector; - using TStoredStringPtrStoredStringPtrPrDoublePr = std::pair; - using TStoredStringPtrStoredStringPtrPrDoublePrVec = std::vector; + using TStoredStringPtrStoredStringPtrPrDoublePr = + std::pair; + using TStoredStringPtrStoredStringPtrPrDoublePrVec = + std::vector; using TStoredStringPtr1Vec = core::CSmallVector; //! \brief Wraps up the parameters to the influence calculation. @@ -301,7 +305,8 @@ class MODEL_EXPORT CProbabilityAndInfluenceCalculator { //! of all values added via addProbability. //! \param[out] influences Filled in with all influences of the //! overall probability. - bool calculate(double& probability, TStoredStringPtrStoredStringPtrPrDoublePrVec& influences) const; + bool calculate(double& probability, + TStoredStringPtrStoredStringPtrPrDoublePrVec& influences) const; private: //! Actually commit any influences we've found. diff --git a/include/model/CResultsQueue.h b/include/model/CResultsQueue.h index 9285fe66bf..198b450974 100644 --- a/include/model/CResultsQueue.h +++ b/include/model/CResultsQueue.h @@ -55,7 +55,9 @@ class MODEL_EXPORT CResultsQueue { //! Select which queued result object to output, based on anomaly score //! and which have been output most recently - core_t::TTime chooseResultTime(core_t::TTime bucketStartTime, core_t::TTime bucketLength, model::CHierarchicalResults& results); + core_t::TTime chooseResultTime(core_t::TTime bucketStartTime, + core_t::TTime bucketLength, + model::CHierarchicalResults& results); //! Standard persistence void acceptPersistInserter(core::CStatePersistInserter& inserter) const; diff --git a/include/model/CSampleGatherer.h b/include/model/CSampleGatherer.h index 813798bea4..453bf27899 100644 --- a/include/model/CSampleGatherer.h +++ b/include/model/CSampleGatherer.h @@ -68,9 +68,11 @@ class CSampleGatherer { using TMetricPartialStatistic = CMetricPartialStatistic; using TStatBucketQueue = CBucketQueue; using TStoredStringPtrVec = std::vector; - using TStoredStringPtrStatUMap = boost::unordered_map; + using TStoredStringPtrStatUMap = + boost::unordered_map; using TStoredStringPtrStatUMapBucketQueue = CBucketQueue; - using TStoredStringPtrStatUMapBucketQueueVec = std::vector; + using TStoredStringPtrStatUMapBucketQueueVec = + std::vector; public: static const std::string CLASSIFIER_TAG; @@ -86,33 +88,46 @@ class CSampleGatherer { core_t::TTime bucketLength, TStrVecCItr beginInfluencers, TStrVecCItr endInfluencers) - : m_Dimension(dimension), - m_SampleStats(dimension, params.s_SampleCountFactor, params.s_LatencyBuckets, params.s_SampleQueueGrowthFactor, bucketLength), - m_BucketStats(params.s_LatencyBuckets, bucketLength, startTime, TMetricPartialStatistic(dimension)), + : m_Dimension(dimension), m_SampleStats(dimension, + params.s_SampleCountFactor, + params.s_LatencyBuckets, + params.s_SampleQueueGrowthFactor, + bucketLength), + m_BucketStats(params.s_LatencyBuckets, + bucketLength, + startTime, + TMetricPartialStatistic(dimension)), m_InfluencerBucketStats( std::distance(beginInfluencers, endInfluencers), - TStoredStringPtrStatUMapBucketQueue(params.s_LatencyBuckets + 3, bucketLength, startTime, TStoredStringPtrStatUMap(1))) {} + TStoredStringPtrStatUMapBucketQueue(params.s_LatencyBuckets + 3, + bucketLength, + startTime, + TStoredStringPtrStatUMap(1))) {} //! \name Persistence //@{ //! Persist state by passing information to the supplied inserter. void acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(DIMENSION_TAG, m_Dimension); - inserter.insertLevel(CLASSIFIER_TAG, boost::bind(&CDataClassifier::acceptPersistInserter, &m_Classifier, _1)); + inserter.insertLevel(CLASSIFIER_TAG, boost::bind(&CDataClassifier::acceptPersistInserter, + &m_Classifier, _1)); if (m_SampleStats.size() > 0) { - inserter.insertLevel(SAMPLE_STATS_TAG, boost::bind(&TSampleQueue::acceptPersistInserter, &m_SampleStats, _1)); + inserter.insertLevel(SAMPLE_STATS_TAG, boost::bind(&TSampleQueue::acceptPersistInserter, + &m_SampleStats, _1)); } if (m_BucketStats.size() > 0) { inserter.insertLevel( BUCKET_STATS_TAG, - boost::bind(TStatBucketQueueSerializer(TMetricPartialStatistic(m_Dimension)), boost::cref(m_BucketStats), _1)); + boost::bind(TStatBucketQueueSerializer(TMetricPartialStatistic(m_Dimension)), + boost::cref(m_BucketStats), _1)); } for (const auto& stats : m_InfluencerBucketStats) { - inserter.insertLevel(INFLUENCER_BUCKET_STATS_TAG, - boost::bind(TStoredStringPtrStatUMapBucketQueueSerializer( - TStoredStringPtrStatUMap(1), CStoredStringPtrStatUMapSerializer(m_Dimension)), - boost::cref(stats), - _1)); + inserter.insertLevel( + INFLUENCER_BUCKET_STATS_TAG, + boost::bind(TStoredStringPtrStatUMapBucketQueueSerializer( + TStoredStringPtrStatUMap(1), + CStoredStringPtrStatUMapSerializer(m_Dimension)), + boost::cref(stats), _1)); } } @@ -123,18 +138,23 @@ class CSampleGatherer { const std::string& name = traverser.name(); TMetricPartialStatistic stat(m_Dimension); RESTORE_BUILT_IN(DIMENSION_TAG, m_Dimension) - RESTORE(CLASSIFIER_TAG, traverser.traverseSubLevel(boost::bind(&CDataClassifier::acceptRestoreTraverser, &m_Classifier, _1))) - RESTORE(SAMPLE_STATS_TAG, traverser.traverseSubLevel(boost::bind(&TSampleQueue::acceptRestoreTraverser, &m_SampleStats, _1))) + RESTORE(CLASSIFIER_TAG, + traverser.traverseSubLevel(boost::bind( + &CDataClassifier::acceptRestoreTraverser, &m_Classifier, _1))) + RESTORE(SAMPLE_STATS_TAG, + traverser.traverseSubLevel(boost::bind( + &TSampleQueue::acceptRestoreTraverser, &m_SampleStats, _1))) RESTORE(BUCKET_STATS_TAG, - traverser.traverseSubLevel( - boost::bind(TStatBucketQueueSerializer(TMetricPartialStatistic(m_Dimension)), boost::ref(m_BucketStats), _1))) + traverser.traverseSubLevel(boost::bind( + TStatBucketQueueSerializer(TMetricPartialStatistic(m_Dimension)), + boost::ref(m_BucketStats), _1))) RESTORE(INFLUENCER_BUCKET_STATS_TAG, i < m_InfluencerBucketStats.size() && - traverser.traverseSubLevel( - boost::bind(TStoredStringPtrStatUMapBucketQueueSerializer( - TStoredStringPtrStatUMap(1), CStoredStringPtrStatUMapSerializer(m_Dimension)), - boost::ref(m_InfluencerBucketStats[i++]), - _1))) + traverser.traverseSubLevel(boost::bind( + TStoredStringPtrStatUMapBucketQueueSerializer( + TStoredStringPtrStatUMap(1), + CStoredStringPtrStatUMapSerializer(m_Dimension)), + boost::ref(m_InfluencerBucketStats[i++]), _1))) } while (traverser.next()); return true; } @@ -149,21 +169,27 @@ class CSampleGatherer { //! \param[in] time The start time of the sampled bucket. //! \param[in] effectiveSampleCount The effective historical //! number of measurements in a sample. - SMetricFeatureData featureData(core_t::TTime time, core_t::TTime /*bucketLength*/, double effectiveSampleCount) const { + SMetricFeatureData featureData(core_t::TTime time, + core_t::TTime /*bucketLength*/, + double effectiveSampleCount) const { const TMetricPartialStatistic& bucketPartial = m_BucketStats.get(time); double count = bucketPartial.count(); if (count > 0.0) { core_t::TTime bucketTime = bucketPartial.time(); TDouble1Vec bucketValue = bucketPartial.value(); if (bucketValue.size() > 0) { - TStrCRefDouble1VecDoublePrPrVecVec influenceValues(m_InfluencerBucketStats.size()); + TStrCRefDouble1VecDoublePrPrVecVec influenceValues( + m_InfluencerBucketStats.size()); for (std::size_t i = 0u; i < m_InfluencerBucketStats.size(); ++i) { - const TStoredStringPtrStatUMap& influencerStats = m_InfluencerBucketStats[i].get(time); + const TStoredStringPtrStatUMap& influencerStats = + m_InfluencerBucketStats[i].get(time); influenceValues[i].reserve(influencerStats.size()); for (const auto& stat : influencerStats) { - influenceValues[i].emplace_back(boost::cref(*stat.first), - std::make_pair(CMetricStatisticWrappers::influencerValue(stat.second), - CMetricStatisticWrappers::count(stat.second))); + influenceValues[i].emplace_back( + boost::cref(*stat.first), + std::make_pair( + CMetricStatisticWrappers::influencerValue(stat.second), + CMetricStatisticWrappers::count(stat.second))); } } return {bucketTime, @@ -201,7 +227,10 @@ class CSampleGatherer { //! \param[in] sampleCount The measurement count in a sample. //! \param[in] influences The influencing field values which //! label \p value. - inline void add(core_t::TTime time, const TDouble1Vec& value, unsigned int sampleCount, const TStoredStringPtrVec& influences) { + inline void add(core_t::TTime time, + const TDouble1Vec& value, + unsigned int sampleCount, + const TStoredStringPtrVec& influences) { this->add(time, value, 1, sampleCount, influences); } @@ -229,7 +258,10 @@ class CSampleGatherer { continue; } TStoredStringPtrStatUMap& stats = m_InfluencerBucketStats[i].get(time); - auto j = stats.emplace(influences[i], CMetricStatisticWrappers::template make(m_Dimension)).first; + auto j = stats + .emplace(influences[i], + CMetricStatisticWrappers::template make(m_Dimension)) + .first; CMetricStatisticWrappers::add(statistic, count, j->second); } } @@ -279,21 +311,25 @@ class CSampleGatherer { mem->setName("CSampleGatherer", sizeof(*this)); core::CMemoryDebug::dynamicSize("m_SampleStats", m_SampleStats, mem); core::CMemoryDebug::dynamicSize("m_BucketStats", m_BucketStats, mem); - core::CMemoryDebug::dynamicSize("m_InfluencerBucketStats", m_InfluencerBucketStats, mem); + core::CMemoryDebug::dynamicSize("m_InfluencerBucketStats", + m_InfluencerBucketStats, mem); core::CMemoryDebug::dynamicSize("m_Samples", m_Samples, mem); } //! Get the memory used by this gatherer. std::size_t memoryUsage() const { - return sizeof(*this) + core::CMemory::dynamicSize(m_SampleStats) + core::CMemory::dynamicSize(m_BucketStats) + - core::CMemory::dynamicSize(m_InfluencerBucketStats) + core::CMemory::dynamicSize(m_Samples); + return sizeof(*this) + core::CMemory::dynamicSize(m_SampleStats) + + core::CMemory::dynamicSize(m_BucketStats) + + core::CMemory::dynamicSize(m_InfluencerBucketStats) + + core::CMemory::dynamicSize(m_Samples); } //! Print this gatherer for debug. std::string print() const { std::ostringstream result; - result << m_Classifier.isInteger() << ' ' << m_Classifier.isNonNegative() << ' ' << m_BucketStats.print() << ' ' - << m_SampleStats.print() << ' ' << core::CContainerPrinter::print(m_Samples) << ' ' + result << m_Classifier.isInteger() << ' ' << m_Classifier.isNonNegative() + << ' ' << m_BucketStats.print() << ' ' << m_SampleStats.print() + << ' ' << core::CContainerPrinter::print(m_Samples) << ' ' << core::CContainerPrinter::print(m_InfluencerBucketStats); return result.str(); } @@ -305,11 +341,18 @@ class CSampleGatherer { private: //! \brief Manages persistence of bucket statistics. struct SStatSerializer { - void operator()(const TMetricPartialStatistic& stat, core::CStatePersistInserter& inserter) const { stat.persist(inserter); } + void operator()(const TMetricPartialStatistic& stat, + core::CStatePersistInserter& inserter) const { + stat.persist(inserter); + } - bool operator()(TMetricPartialStatistic& stat, core::CStateRestoreTraverser& traverser) const { return stat.restore(traverser); } + bool operator()(TMetricPartialStatistic& stat, + core::CStateRestoreTraverser& traverser) const { + return stat.restore(traverser); + } }; - using TStatBucketQueueSerializer = typename TStatBucketQueue::template CSerializer; + using TStatBucketQueueSerializer = + typename TStatBucketQueue::template CSerializer; //! \brief Manages persistence of influence bucket statistics. class CStoredStringPtrStatUMapSerializer { @@ -317,7 +360,8 @@ class CSampleGatherer { CStoredStringPtrStatUMapSerializer(std::size_t dimension) : m_Initial(CMetricStatisticWrappers::template make(dimension)) {} - void operator()(const TStoredStringPtrStatUMap& map, core::CStatePersistInserter& inserter) const { + void operator()(const TStoredStringPtrStatUMap& map, + core::CStatePersistInserter& inserter) const { using TStatCRef = boost::reference_wrapper; using TStrCRefStatCRefPr = std::pair; using TStrCRefStatCRefPrVec = std::vector; @@ -333,14 +377,16 @@ class CSampleGatherer { } } - bool operator()(TStoredStringPtrStatUMap& map, core::CStateRestoreTraverser& traverser) const { + bool operator()(TStoredStringPtrStatUMap& map, + core::CStateRestoreTraverser& traverser) const { std::string key; do { const std::string& name = traverser.name(); RESTORE_NO_ERROR(MAP_KEY_TAG, key = traverser.value()) RESTORE(MAP_VALUE_TAG, - CMetricStatisticWrappers::restore(traverser, - map.insert({CStringStore::influencers().get(key), m_Initial}).first->second)) + CMetricStatisticWrappers::restore( + traverser, map.insert({CStringStore::influencers().get(key), m_Initial}) + .first->second)) } while (traverser.next()); return true; } diff --git a/include/model/CSampleQueue.h b/include/model/CSampleQueue.h index 164cd3aba7..fef69dfd2e 100644 --- a/include/model/CSampleQueue.h +++ b/include/model/CSampleQueue.h @@ -64,7 +64,8 @@ class CSampleQueue { static const std::string SAMPLE_END_TAG; static const std::string SAMPLE_TAG; - SSubSample(std::size_t dimension, core_t::TTime time) : s_Statistic(dimension), s_Start(time), s_End(time) {} + SSubSample(std::size_t dimension, core_t::TTime time) + : s_Statistic(dimension), s_Start(time), s_End(time) {} void add(const TDouble1Vec& measurement, core_t::TTime time, unsigned int count) { s_Statistic.add(measurement, time, count); @@ -107,7 +108,8 @@ class CSampleQueue { //! Persist to a state document. void acceptPersistInserter(core::CStatePersistInserter& inserter) const { - inserter.insertLevel(SAMPLE_TAG, boost::bind(&TMetricPartialStatistic::persist, &s_Statistic, _1)); + inserter.insertLevel(SAMPLE_TAG, boost::bind(&TMetricPartialStatistic::persist, + &s_Statistic, _1)); inserter.insertValue(SAMPLE_START_TAG, s_Start); inserter.insertValue(SAMPLE_END_TAG, s_End); } @@ -117,18 +119,21 @@ class CSampleQueue { do { const std::string& name = traverser.name(); if (name == SAMPLE_TAG) { - if (traverser.traverseSubLevel(boost::bind(&TMetricPartialStatistic::restore, &s_Statistic, _1)) == false) { + if (traverser.traverseSubLevel(boost::bind( + &TMetricPartialStatistic::restore, &s_Statistic, _1)) == false) { LOG_ERROR(<< "Invalid sample value"); return false; } } else if (name == SAMPLE_START_TAG) { if (core::CStringUtils::stringToType(traverser.value(), s_Start) == false) { - LOG_ERROR(<< "Invalid attribute identifier in " << traverser.value()); + LOG_ERROR(<< "Invalid attribute identifier in " + << traverser.value()); return false; } } else if (name == SAMPLE_END_TAG) { if (core::CStringUtils::stringToType(traverser.value(), s_End) == false) { - LOG_ERROR(<< "Invalid attribute identifier in " << traverser.value()); + LOG_ERROR(<< "Invalid attribute identifier in " + << traverser.value()); return false; } } @@ -150,11 +155,14 @@ class CSampleQueue { } //! Get the memory used by the sub-sample. - std::size_t memoryUsage() const { return sizeof(*this) + core::CMemory::dynamicSize(s_Statistic); } + std::size_t memoryUsage() const { + return sizeof(*this) + core::CMemory::dynamicSize(s_Statistic); + } //! Print the sub-sample for debug. std::string print() const { - return "{[" + core::CStringUtils::typeToString(s_Start) + ", " + core::CStringUtils::typeToString(s_End) + "] -> " + + return "{[" + core::CStringUtils::typeToString(s_Start) + ", " + + core::CStringUtils::typeToString(s_End) + "] -> " + s_Statistic.print() + "}"; } @@ -191,8 +199,7 @@ class CSampleQueue { core_t::TTime bucketLength) : m_Dimension(dimension), m_Queue(std::max(sampleCountFactor * latencyBuckets, std::size_t(1))), - m_SampleCountFactor(sampleCountFactor), - m_GrowthFactor(growthFactor), + m_SampleCountFactor(sampleCountFactor), m_GrowthFactor(growthFactor), m_BucketLength(bucketLength), m_Latency(static_cast(latencyBuckets) * bucketLength) {} @@ -224,7 +231,10 @@ class CSampleQueue { //! \param[in] sampleCount The target sample count. //! \param[in] feature The feature to which the measurements correspond. //! \param[out] samples The newly created samples. - void sample(core_t::TTime bucketStart, unsigned int sampleCount, model_t::EFeature feature, TSampleVec& samples) { + void sample(core_t::TTime bucketStart, + unsigned int sampleCount, + model_t::EFeature feature, + TSampleVec& samples) { core_t::TTime latencyCutoff = bucketStart + m_BucketLength - 1; TOptionalSubSample combinedSubSample; @@ -238,11 +248,13 @@ class CSampleQueue { m_Queue.pop_back(); double count = combinedSubSample->s_Statistic.count(); - double countIncludingNext = (m_Queue.empty()) ? count : count + m_Queue.back().s_Statistic.count(); + double countIncludingNext = + (m_Queue.empty()) ? count : count + m_Queue.back().s_Statistic.count(); double countRatio = sampleCount / count; double countRatioIncludingNext = sampleCount / countIncludingNext; - if (countIncludingNext >= sampleCount && (std::abs(1.0 - countRatio) <= std::abs(1.0 - countRatioIncludingNext))) { + if (countIncludingNext >= sampleCount && + (std::abs(1.0 - countRatio) <= std::abs(1.0 - countRatioIncludingNext))) { TDouble1Vec sample = combinedSubSample->s_Statistic.value(); core_t::TTime sampleTime = combinedSubSample->s_Statistic.time(); double vs = model_t::varianceScale(feature, sampleCount, count); @@ -259,16 +271,20 @@ class CSampleQueue { void resetBucket(core_t::TTime bucketStart) { // The queue is ordered in descending sub-sample start time. - iterator firstEarlierThanBucket = std::upper_bound(m_Queue.begin(), m_Queue.end(), bucketStart, timeLater); + iterator firstEarlierThanBucket = + std::upper_bound(m_Queue.begin(), m_Queue.end(), bucketStart, timeLater); // This is equivalent to lower_bound(., ., bucketStart + m_BucketLength - 1, .); - iterator latestWithinBucket = std::upper_bound(m_Queue.begin(), m_Queue.end(), bucketStart + m_BucketLength, timeLater); + iterator latestWithinBucket = std::upper_bound( + m_Queue.begin(), m_Queue.end(), bucketStart + m_BucketLength, timeLater); m_Queue.erase(latestWithinBucket, firstEarlierThanBucket); } //! Returns the item in the queue at position \p index. - const SSubSample& operator[](std::size_t index) const { return m_Queue[index]; } + const SSubSample& operator[](std::size_t index) const { + return m_Queue[index]; + } //! Returns the size of the queue. std::size_t size() const { return m_Queue.size(); } @@ -279,14 +295,17 @@ class CSampleQueue { //! Is the queue empty? bool empty() const { return m_Queue.empty(); } - core_t::TTime latestEnd() const { return m_Queue.empty() ? 0 : m_Queue.front().s_End; } + core_t::TTime latestEnd() const { + return m_Queue.empty() ? 0 : m_Queue.front().s_End; + } //! \name Persistence //@{ //! Persist state by passing information to the supplied inserter. void acceptPersistInserter(core::CStatePersistInserter& inserter) const { for (const_reverse_iterator itr = m_Queue.rbegin(); itr != m_Queue.rend(); ++itr) { - inserter.insertLevel(SUB_SAMPLE_TAG, boost::bind(&SSubSample::acceptPersistInserter, *itr, _1)); + inserter.insertLevel(SUB_SAMPLE_TAG, boost::bind(&SSubSample::acceptPersistInserter, + *itr, _1)); } } @@ -296,7 +315,8 @@ class CSampleQueue { const std::string& name = traverser.name(); if (name == SUB_SAMPLE_TAG) { SSubSample subSample(m_Dimension, 0); - if (traverser.traverseSubLevel(boost::bind(&SSubSample::acceptRestoreTraverser, &subSample, _1)) == false) { + if (traverser.traverseSubLevel(boost::bind( + &SSubSample::acceptRestoreTraverser, &subSample, _1)) == false) { LOG_ERROR(<< "Invalid sub-sample in " << traverser.value()); return false; } @@ -310,7 +330,9 @@ class CSampleQueue { //@} //! Returns the checksum of the queue. - uint64_t checksum() const { return maths::CChecksum::calculate(0, m_Queue); } + uint64_t checksum() const { + return maths::CChecksum::calculate(0, m_Queue); + } //! Debug the memory used by the queue. void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { @@ -319,10 +341,14 @@ class CSampleQueue { } //! Get the memory used by the queue. - std::size_t memoryUsage() const { return sizeof(*this) + core::CMemory::dynamicSize(m_Queue); } + std::size_t memoryUsage() const { + return sizeof(*this) + core::CMemory::dynamicSize(m_Queue); + } //! Prints the contents of the queue. - std::string print() const { return core::CContainerPrinter::print(m_Queue); } + std::string print() const { + return core::CContainerPrinter::print(m_Queue); + } private: void pushFrontNewSubSample(const TDouble1Vec& measurement, core_t::TTime time, unsigned int count) { @@ -339,7 +365,10 @@ class CSampleQueue { m_Queue.push_back(newSubSample); } - void insertNewSubSample(iterator pos, const TDouble1Vec& measurement, core_t::TTime time, unsigned int count) { + void insertNewSubSample(iterator pos, + const TDouble1Vec& measurement, + core_t::TTime time, + unsigned int count) { this->resizeIfFull(); SSubSample newSubSample(m_Dimension, time); newSubSample.s_Statistic.add(measurement, time, count); @@ -349,13 +378,18 @@ class CSampleQueue { void resizeIfFull() { if (m_Queue.full()) { std::size_t currentSize = m_Queue.size(); - std::size_t newSize = static_cast(static_cast(currentSize) * (1.0 + m_GrowthFactor)); + std::size_t newSize = static_cast( + static_cast(currentSize) * (1.0 + m_GrowthFactor)); m_Queue.set_capacity(std::max(newSize, currentSize + 1)); } } - void addAfterLatestStartTime(const TDouble1Vec& measurement, core_t::TTime time, unsigned int count, unsigned int sampleCount) { - if (time >= m_Queue[0].s_End && this->shouldCreateNewSubSampleAfterLatest(time, sampleCount)) { + void addAfterLatestStartTime(const TDouble1Vec& measurement, + core_t::TTime time, + unsigned int count, + unsigned int sampleCount) { + if (time >= m_Queue[0].s_End && + this->shouldCreateNewSubSampleAfterLatest(time, sampleCount)) { this->pushFrontNewSubSample(measurement, time, count); } else { m_Queue[0].add(measurement, time, count); @@ -363,14 +397,16 @@ class CSampleQueue { } bool shouldCreateNewSubSampleAfterLatest(core_t::TTime time, unsigned int sampleCount) { - if (m_Queue[0].s_Statistic.count() >= static_cast(this->targetSubSampleCount(sampleCount))) { + if (m_Queue[0].s_Statistic.count() >= + static_cast(this->targetSubSampleCount(sampleCount))) { return true; } // If latency is non-zero, we also want to check whether the new measurement // is too far from the latest sub-sample or whether they belong in different buckets. if (m_Latency > 0) { - if (!m_Queue[0].isClose(time, this->targetSubSampleSpan()) || !m_Queue[0].isInSameBucket(time, m_BucketLength)) { + if (!m_Queue[0].isClose(time, this->targetSubSampleSpan()) || + !m_Queue[0].isInSameBucket(time, m_BucketLength)) { return true; } } @@ -378,22 +414,31 @@ class CSampleQueue { } core_t::TTime targetSubSampleSpan() const { - return (m_BucketLength + static_cast(m_SampleCountFactor) - 1) / static_cast(m_SampleCountFactor); + return (m_BucketLength + static_cast(m_SampleCountFactor) - 1) / + static_cast(m_SampleCountFactor); } - std::size_t targetSubSampleCount(unsigned int sampleCount) const { return static_cast(sampleCount) / m_SampleCountFactor; } + std::size_t targetSubSampleCount(unsigned int sampleCount) const { + return static_cast(sampleCount) / m_SampleCountFactor; + } - void addHistorical(const TDouble1Vec& measurement, core_t::TTime time, unsigned int count, unsigned int sampleCount) { + void addHistorical(const TDouble1Vec& measurement, + core_t::TTime time, + unsigned int count, + unsigned int sampleCount) { // We have to resize before we do the search of the upper bound. Otherwise, // a later resize will invalidate the upper bound iterator. this->resizeIfFull(); - reverse_iterator upperBound = std::upper_bound(m_Queue.rbegin(), m_Queue.rend(), time, timeEarlier); + reverse_iterator upperBound = + std::upper_bound(m_Queue.rbegin(), m_Queue.rend(), time, timeEarlier); core_t::TTime targetSubSampleSpan = this->targetSubSampleSpan(); if (upperBound == m_Queue.rbegin()) { - if ((upperBound->s_Statistic.count() >= static_cast(this->targetSubSampleCount(sampleCount))) || - !upperBound->isClose(time, targetSubSampleSpan) || !(*upperBound).isInSameBucket(time, m_BucketLength)) { + if ((upperBound->s_Statistic.count() >= + static_cast(this->targetSubSampleCount(sampleCount))) || + !upperBound->isClose(time, targetSubSampleSpan) || + !(*upperBound).isInSameBucket(time, m_BucketLength)) { this->pushBackNewSubSample(measurement, time, count); } else { upperBound->add(measurement, time, count); @@ -414,26 +459,30 @@ class CSampleQueue { bool rightHasSpace = static_cast(right.s_Statistic.count()) < spaceLimit; core_t::TTime leftDistance = time - left.s_End; core_t::TTime rightDistance = right.s_Start - time; - SSubSample& candidate = maths::COrderings::lexicographical_compare(-static_cast(sameBucketWithLeft), - -static_cast(leftHasSpace), - leftDistance, - -static_cast(sameBucketWithRight), - -static_cast(rightHasSpace), - rightDistance) + SSubSample& candidate = maths::COrderings::lexicographical_compare( + -static_cast(sameBucketWithLeft), + -static_cast(leftHasSpace), leftDistance, + -static_cast(sameBucketWithRight), + -static_cast(rightHasSpace), rightDistance) ? left : right; if (candidate.isInSameBucket(time, m_BucketLength) && - (candidate.isClose(time, targetSubSampleSpan) || right.s_Start <= left.s_End + targetSubSampleSpan)) { + (candidate.isClose(time, targetSubSampleSpan) || + right.s_Start <= left.s_End + targetSubSampleSpan)) { candidate.add(measurement, time, count); return; } this->insertNewSubSample(upperBound.base(), measurement, time, count); } - static bool timeEarlier(core_t::TTime time, const SSubSample& subSample) { return time < subSample.s_Start; } + static bool timeEarlier(core_t::TTime time, const SSubSample& subSample) { + return time < subSample.s_Start; + } - static bool timeLater(core_t::TTime time, const SSubSample& subSample) { return time > subSample.s_Start; } + static bool timeLater(core_t::TTime time, const SSubSample& subSample) { + return time > subSample.s_Start; + } private: std::size_t m_Dimension; diff --git a/include/model/CSearchKey.h b/include/model/CSearchKey.h index b584ff31d2..a994404443 100644 --- a/include/model/CSearchKey.h +++ b/include/model/CSearchKey.h @@ -81,7 +81,8 @@ class MODEL_EXPORT CSearchKey { //! //! \note This is intended for map lookups when one doesn't want //! to copy the strings. - using TStrCRefKeyCRefPr = std::pair, boost::reference_wrapper>; + using TStrCRefKeyCRefPr = + std::pair, boost::reference_wrapper>; public: //! If the "by" field name is "count" then the key represents @@ -217,8 +218,12 @@ std::ostream& operator<<(std::ostream& strm, const CSearchKey& key); //! Hashes a (string, search key) pair. class CStrKeyPrHash { public: - std::size_t operator()(const CSearchKey::TStrKeyPr& key) const { return this->hash(key); } - std::size_t operator()(const CSearchKey::TStrCRefKeyCRefPr& key) const { return this->hash(key); } + std::size_t operator()(const CSearchKey::TStrKeyPr& key) const { + return this->hash(key); + } + std::size_t operator()(const CSearchKey::TStrCRefKeyCRefPr& key) const { + return this->hash(key); + } private: template @@ -233,10 +238,19 @@ class CStrKeyPrHash { //! Checks if two (string, search key) pairs are equal. class CStrKeyPrEqual { public: - bool operator()(const CSearchKey::TStrKeyPr& lhs, const CSearchKey::TStrKeyPr& rhs) const { return this->equal(lhs, rhs); } - bool operator()(const CSearchKey::TStrCRefKeyCRefPr& lhs, const CSearchKey::TStrKeyPr& rhs) const { return this->equal(lhs, rhs); } - bool operator()(const CSearchKey::TStrKeyPr& lhs, const CSearchKey::TStrCRefKeyCRefPr& rhs) const { return this->equal(lhs, rhs); } - bool operator()(const CSearchKey::TStrCRefKeyCRefPr& lhs, const CSearchKey::TStrCRefKeyCRefPr& rhs) const { + bool operator()(const CSearchKey::TStrKeyPr& lhs, const CSearchKey::TStrKeyPr& rhs) const { + return this->equal(lhs, rhs); + } + bool operator()(const CSearchKey::TStrCRefKeyCRefPr& lhs, + const CSearchKey::TStrKeyPr& rhs) const { + return this->equal(lhs, rhs); + } + bool operator()(const CSearchKey::TStrKeyPr& lhs, + const CSearchKey::TStrCRefKeyCRefPr& rhs) const { + return this->equal(lhs, rhs); + } + bool operator()(const CSearchKey::TStrCRefKeyCRefPr& lhs, + const CSearchKey::TStrCRefKeyCRefPr& rhs) const { return this->equal(lhs, rhs); } diff --git a/include/model/CStringStore.h b/include/model/CStringStore.h index 631263f553..ac979842dd 100644 --- a/include/model/CStringStore.h +++ b/include/model/CStringStore.h @@ -58,7 +58,10 @@ class MODEL_EXPORT CStringStore : private core::CNonCopyable { } }; struct MODEL_EXPORT SStoredStringPtrEqual { - bool operator()(const core::CStoredStringPtr& lhs, const core::CStoredStringPtr& rhs) const { return *lhs == *rhs; } + bool operator()(const core::CStoredStringPtr& lhs, + const core::CStoredStringPtr& rhs) const { + return *lhs == *rhs; + } }; public: @@ -93,7 +96,8 @@ class MODEL_EXPORT CStringStore : private core::CNonCopyable { std::size_t memoryUsage() const; private: - using TStoredStringPtrUSet = boost::unordered_set; + using TStoredStringPtrUSet = + boost::unordered_set; using TStrVec = std::vector; private: diff --git a/include/model/ModelTypes.h b/include/model/ModelTypes.h index de979e346d..0a5cba5ff8 100644 --- a/include/model/ModelTypes.h +++ b/include/model/ModelTypes.h @@ -95,16 +95,24 @@ class MODEL_EXPORT CResultType { } //! Check if this is interim. - bool isInterim() const { return (m_Type & static_cast(E_Interim)) != 0; } + bool isInterim() const { + return (m_Type & static_cast(E_Interim)) != 0; + } //! Get as interim or final enumeration. - EInterimOrFinal asInterimOrFinal() const { return this->isInterim() ? E_Interim : E_Final; } + EInterimOrFinal asInterimOrFinal() const { + return this->isInterim() ? E_Interim : E_Final; + } //! Check if this is unconditional. - bool isUnconditional() const { return (m_Type & static_cast(E_Unconditional)) != 0; } + bool isUnconditional() const { + return (m_Type & static_cast(E_Unconditional)) != 0; + } //! Get as conditional or unconditional enumeration. - EConditionalOrUnconditional asConditionalOrUnconditional() const { return this->isUnconditional() ? E_Unconditional : E_Conditional; } + EConditionalOrUnconditional asConditionalOrUnconditional() const { + return this->isUnconditional() ? E_Unconditional : E_Conditional; + } //! Get as an unsigned integer. unsigned int asUint() const { return m_Type; } @@ -520,7 +528,10 @@ maths_t::EProbabilityCalculation probabilityCalculation(EFeature feature); //! some metric features the time is deduced in which case \p time //! is used. MODEL_EXPORT -core_t::TTime sampleTime(EFeature feature, core_t::TTime bucketStartTime, core_t::TTime bucketLength, core_t::TTime time = 0); +core_t::TTime sampleTime(EFeature feature, + core_t::TTime bucketStartTime, + core_t::TTime bucketLength, + core_t::TTime time = 0); //! Get the support for \p feature. MODEL_EXPORT @@ -558,116 +569,116 @@ std::string print(EFeature feature); //! generally a bad idea so don't take this as a precedent for //! crazy macro magic (see item 1.14 of our coding standards for //! guidelines). -#define CASE_INDIVIDUAL_COUNT \ - case model_t::E_IndividualCountByBucketAndPerson: \ - case model_t::E_IndividualNonZeroCountByBucketAndPerson: \ - case model_t::E_IndividualTotalBucketCountByPerson: \ - case model_t::E_IndividualIndicatorOfBucketPerson: \ - case model_t::E_IndividualLowCountsByBucketAndPerson: \ - case model_t::E_IndividualHighCountsByBucketAndPerson: \ - case model_t::E_IndividualArrivalTimesByPerson: \ - case model_t::E_IndividualLongArrivalTimesByPerson: \ - case model_t::E_IndividualShortArrivalTimesByPerson: \ - case model_t::E_IndividualLowNonZeroCountByBucketAndPerson: \ - case model_t::E_IndividualHighNonZeroCountByBucketAndPerson: \ - case model_t::E_IndividualUniqueCountByBucketAndPerson: \ - case model_t::E_IndividualLowUniqueCountByBucketAndPerson: \ - case model_t::E_IndividualHighUniqueCountByBucketAndPerson: \ - case model_t::E_IndividualInfoContentByBucketAndPerson: \ - case model_t::E_IndividualHighInfoContentByBucketAndPerson: \ - case model_t::E_IndividualLowInfoContentByBucketAndPerson: \ - case model_t::E_IndividualTimeOfDayByBucketAndPerson: \ +#define CASE_INDIVIDUAL_COUNT \ + case model_t::E_IndividualCountByBucketAndPerson: \ + case model_t::E_IndividualNonZeroCountByBucketAndPerson: \ + case model_t::E_IndividualTotalBucketCountByPerson: \ + case model_t::E_IndividualIndicatorOfBucketPerson: \ + case model_t::E_IndividualLowCountsByBucketAndPerson: \ + case model_t::E_IndividualHighCountsByBucketAndPerson: \ + case model_t::E_IndividualArrivalTimesByPerson: \ + case model_t::E_IndividualLongArrivalTimesByPerson: \ + case model_t::E_IndividualShortArrivalTimesByPerson: \ + case model_t::E_IndividualLowNonZeroCountByBucketAndPerson: \ + case model_t::E_IndividualHighNonZeroCountByBucketAndPerson: \ + case model_t::E_IndividualUniqueCountByBucketAndPerson: \ + case model_t::E_IndividualLowUniqueCountByBucketAndPerson: \ + case model_t::E_IndividualHighUniqueCountByBucketAndPerson: \ + case model_t::E_IndividualInfoContentByBucketAndPerson: \ + case model_t::E_IndividualHighInfoContentByBucketAndPerson: \ + case model_t::E_IndividualLowInfoContentByBucketAndPerson: \ + case model_t::E_IndividualTimeOfDayByBucketAndPerson: \ case model_t::E_IndividualTimeOfWeekByBucketAndPerson //! Individual metric feature case statement block. -#define CASE_INDIVIDUAL_METRIC \ - case model_t::E_IndividualMeanByPerson: \ - case model_t::E_IndividualMedianByPerson: \ - case model_t::E_IndividualMinByPerson: \ - case model_t::E_IndividualMaxByPerson: \ - case model_t::E_IndividualSumByBucketAndPerson: \ - case model_t::E_IndividualLowMeanByPerson: \ - case model_t::E_IndividualHighMeanByPerson: \ - case model_t::E_IndividualLowSumByBucketAndPerson: \ - case model_t::E_IndividualHighSumByBucketAndPerson: \ - case model_t::E_IndividualNonNullSumByBucketAndPerson: \ - case model_t::E_IndividualLowNonNullSumByBucketAndPerson: \ - case model_t::E_IndividualHighNonNullSumByBucketAndPerson: \ - case model_t::E_IndividualMeanLatLongByPerson: \ - case model_t::E_IndividualMaxVelocityByPerson: \ - case model_t::E_IndividualMinVelocityByPerson: \ - case model_t::E_IndividualMeanVelocityByPerson: \ - case model_t::E_IndividualSumVelocityByPerson: \ - case model_t::E_IndividualVarianceByPerson: \ - case model_t::E_IndividualLowVarianceByPerson: \ - case model_t::E_IndividualHighVarianceByPerson: \ - case model_t::E_IndividualLowMedianByPerson: \ +#define CASE_INDIVIDUAL_METRIC \ + case model_t::E_IndividualMeanByPerson: \ + case model_t::E_IndividualMedianByPerson: \ + case model_t::E_IndividualMinByPerson: \ + case model_t::E_IndividualMaxByPerson: \ + case model_t::E_IndividualSumByBucketAndPerson: \ + case model_t::E_IndividualLowMeanByPerson: \ + case model_t::E_IndividualHighMeanByPerson: \ + case model_t::E_IndividualLowSumByBucketAndPerson: \ + case model_t::E_IndividualHighSumByBucketAndPerson: \ + case model_t::E_IndividualNonNullSumByBucketAndPerson: \ + case model_t::E_IndividualLowNonNullSumByBucketAndPerson: \ + case model_t::E_IndividualHighNonNullSumByBucketAndPerson: \ + case model_t::E_IndividualMeanLatLongByPerson: \ + case model_t::E_IndividualMaxVelocityByPerson: \ + case model_t::E_IndividualMinVelocityByPerson: \ + case model_t::E_IndividualMeanVelocityByPerson: \ + case model_t::E_IndividualSumVelocityByPerson: \ + case model_t::E_IndividualVarianceByPerson: \ + case model_t::E_IndividualLowVarianceByPerson: \ + case model_t::E_IndividualHighVarianceByPerson: \ + case model_t::E_IndividualLowMedianByPerson: \ case model_t::E_IndividualHighMedianByPerson //! Population count feature case statement block. -#define CASE_POPULATION_COUNT \ - case model_t::E_PopulationAttributeTotalCountByPerson: \ - case model_t::E_PopulationCountByBucketPersonAndAttribute: \ - case model_t::E_PopulationIndicatorOfBucketPersonAndAttribute: \ - case model_t::E_PopulationUniqueCountByBucketPersonAndAttribute: \ - case model_t::E_PopulationLowUniqueCountByBucketPersonAndAttribute: \ - case model_t::E_PopulationHighUniqueCountByBucketPersonAndAttribute: \ - case model_t::E_PopulationUniquePersonCountByAttribute: \ - case model_t::E_PopulationLowCountsByBucketPersonAndAttribute: \ - case model_t::E_PopulationHighCountsByBucketPersonAndAttribute: \ - case model_t::E_PopulationInfoContentByBucketPersonAndAttribute: \ - case model_t::E_PopulationLowInfoContentByBucketPersonAndAttribute: \ - case model_t::E_PopulationHighInfoContentByBucketPersonAndAttribute: \ - case model_t::E_PopulationTimeOfDayByBucketPersonAndAttribute: \ +#define CASE_POPULATION_COUNT \ + case model_t::E_PopulationAttributeTotalCountByPerson: \ + case model_t::E_PopulationCountByBucketPersonAndAttribute: \ + case model_t::E_PopulationIndicatorOfBucketPersonAndAttribute: \ + case model_t::E_PopulationUniqueCountByBucketPersonAndAttribute: \ + case model_t::E_PopulationLowUniqueCountByBucketPersonAndAttribute: \ + case model_t::E_PopulationHighUniqueCountByBucketPersonAndAttribute: \ + case model_t::E_PopulationUniquePersonCountByAttribute: \ + case model_t::E_PopulationLowCountsByBucketPersonAndAttribute: \ + case model_t::E_PopulationHighCountsByBucketPersonAndAttribute: \ + case model_t::E_PopulationInfoContentByBucketPersonAndAttribute: \ + case model_t::E_PopulationLowInfoContentByBucketPersonAndAttribute: \ + case model_t::E_PopulationHighInfoContentByBucketPersonAndAttribute: \ + case model_t::E_PopulationTimeOfDayByBucketPersonAndAttribute: \ case model_t::E_PopulationTimeOfWeekByBucketPersonAndAttribute //! Population metric feature case statement block. -#define CASE_POPULATION_METRIC \ - case model_t::E_PopulationMeanByPersonAndAttribute: \ - case model_t::E_PopulationMedianByPersonAndAttribute: \ - case model_t::E_PopulationMinByPersonAndAttribute: \ - case model_t::E_PopulationMaxByPersonAndAttribute: \ - case model_t::E_PopulationSumByBucketPersonAndAttribute: \ - case model_t::E_PopulationLowMeanByPersonAndAttribute: \ - case model_t::E_PopulationHighMeanByPersonAndAttribute: \ - case model_t::E_PopulationLowSumByBucketPersonAndAttribute: \ - case model_t::E_PopulationHighSumByBucketPersonAndAttribute: \ - case model_t::E_PopulationMeanLatLongByPersonAndAttribute: \ - case model_t::E_PopulationMaxVelocityByPersonAndAttribute: \ - case model_t::E_PopulationMinVelocityByPersonAndAttribute: \ - case model_t::E_PopulationMeanVelocityByPersonAndAttribute: \ - case model_t::E_PopulationSumVelocityByPersonAndAttribute: \ - case model_t::E_PopulationVarianceByPersonAndAttribute: \ - case model_t::E_PopulationLowVarianceByPersonAndAttribute: \ - case model_t::E_PopulationHighVarianceByPersonAndAttribute: \ - case model_t::E_PopulationLowMedianByPersonAndAttribute: \ +#define CASE_POPULATION_METRIC \ + case model_t::E_PopulationMeanByPersonAndAttribute: \ + case model_t::E_PopulationMedianByPersonAndAttribute: \ + case model_t::E_PopulationMinByPersonAndAttribute: \ + case model_t::E_PopulationMaxByPersonAndAttribute: \ + case model_t::E_PopulationSumByBucketPersonAndAttribute: \ + case model_t::E_PopulationLowMeanByPersonAndAttribute: \ + case model_t::E_PopulationHighMeanByPersonAndAttribute: \ + case model_t::E_PopulationLowSumByBucketPersonAndAttribute: \ + case model_t::E_PopulationHighSumByBucketPersonAndAttribute: \ + case model_t::E_PopulationMeanLatLongByPersonAndAttribute: \ + case model_t::E_PopulationMaxVelocityByPersonAndAttribute: \ + case model_t::E_PopulationMinVelocityByPersonAndAttribute: \ + case model_t::E_PopulationMeanVelocityByPersonAndAttribute: \ + case model_t::E_PopulationSumVelocityByPersonAndAttribute: \ + case model_t::E_PopulationVarianceByPersonAndAttribute: \ + case model_t::E_PopulationLowVarianceByPersonAndAttribute: \ + case model_t::E_PopulationHighVarianceByPersonAndAttribute: \ + case model_t::E_PopulationLowMedianByPersonAndAttribute: \ case model_t::E_PopulationHighMedianByPersonAndAttribute //! Peers count feature case statement block. -#define CASE_PEERS_COUNT \ - case model_t::E_PeersAttributeTotalCountByPerson: \ - case model_t::E_PeersCountByBucketPersonAndAttribute: \ - case model_t::E_PeersUniqueCountByBucketPersonAndAttribute: \ - case model_t::E_PeersLowCountsByBucketPersonAndAttribute: \ - case model_t::E_PeersHighCountsByBucketPersonAndAttribute: \ - case model_t::E_PeersInfoContentByBucketPersonAndAttribute: \ - case model_t::E_PeersLowInfoContentByBucketPersonAndAttribute: \ - case model_t::E_PeersHighInfoContentByBucketPersonAndAttribute: \ - case model_t::E_PeersLowUniqueCountByBucketPersonAndAttribute: \ - case model_t::E_PeersHighUniqueCountByBucketPersonAndAttribute: \ - case model_t::E_PeersTimeOfDayByBucketPersonAndAttribute: \ +#define CASE_PEERS_COUNT \ + case model_t::E_PeersAttributeTotalCountByPerson: \ + case model_t::E_PeersCountByBucketPersonAndAttribute: \ + case model_t::E_PeersUniqueCountByBucketPersonAndAttribute: \ + case model_t::E_PeersLowCountsByBucketPersonAndAttribute: \ + case model_t::E_PeersHighCountsByBucketPersonAndAttribute: \ + case model_t::E_PeersInfoContentByBucketPersonAndAttribute: \ + case model_t::E_PeersLowInfoContentByBucketPersonAndAttribute: \ + case model_t::E_PeersHighInfoContentByBucketPersonAndAttribute: \ + case model_t::E_PeersLowUniqueCountByBucketPersonAndAttribute: \ + case model_t::E_PeersHighUniqueCountByBucketPersonAndAttribute: \ + case model_t::E_PeersTimeOfDayByBucketPersonAndAttribute: \ case model_t::E_PeersTimeOfWeekByBucketPersonAndAttribute // Peers metric features case statement block. -#define CASE_PEERS_METRIC \ - case model_t::E_PeersMeanByPersonAndAttribute: \ - case model_t::E_PeersMedianByPersonAndAttribute: \ - case model_t::E_PeersMinByPersonAndAttribute: \ - case model_t::E_PeersMaxByPersonAndAttribute: \ - case model_t::E_PeersSumByBucketPersonAndAttribute: \ - case model_t::E_PeersLowMeanByPersonAndAttribute: \ - case model_t::E_PeersHighMeanByPersonAndAttribute: \ - case model_t::E_PeersLowSumByBucketPersonAndAttribute: \ +#define CASE_PEERS_METRIC \ + case model_t::E_PeersMeanByPersonAndAttribute: \ + case model_t::E_PeersMedianByPersonAndAttribute: \ + case model_t::E_PeersMinByPersonAndAttribute: \ + case model_t::E_PeersMaxByPersonAndAttribute: \ + case model_t::E_PeersSumByBucketPersonAndAttribute: \ + case model_t::E_PeersLowMeanByPersonAndAttribute: \ + case model_t::E_PeersHighMeanByPersonAndAttribute: \ + case model_t::E_PeersLowSumByBucketPersonAndAttribute: \ case model_t::E_PeersHighSumByBucketPersonAndAttribute //! The categories of metric feature. @@ -704,7 +715,12 @@ std::string print(EMetricCategory category); //! //! The enumerate the distinct type of event rate statistics //! which we gather. -enum EEventRateCategory { E_MeanArrivalTimes, E_AttributePeople, E_UniqueValues, E_DiurnalTimes }; +enum EEventRateCategory { + E_MeanArrivalTimes, + E_AttributePeople, + E_UniqueValues, + E_DiurnalTimes +}; //! Get a string description of \p category. MODEL_EXPORT @@ -721,7 +737,14 @@ std::string print(EEventRateCategory category); //! peer groups. //! -# Population metric: analysis of message values in //! peer groups. -enum EAnalysisCategory { E_EventRate, E_Metric, E_PopulationEventRate, E_PopulationMetric, E_PeersEventRate, E_PeersMetric }; +enum EAnalysisCategory { + E_EventRate, + E_Metric, + E_PopulationEventRate, + E_PopulationMetric, + E_PeersEventRate, + E_PeersMetric +}; //! Get the category of analysis to which \p feature belongs. MODEL_EXPORT @@ -744,7 +767,12 @@ enum ESummaryMode { //! -# E_XF_By: remove popular "attributes" from populations //! -# E_XF_Over: remove popular "people" from populations //! -# E_XF_Both: remove popular "people" and "attributes" from populations -enum EExcludeFrequent { E_XF_None = 0, E_XF_By = 1, E_XF_Over = 2, E_XF_Both = 3 }; +enum EExcludeFrequent { + E_XF_None = 0, + E_XF_By = 1, + E_XF_Over = 2, + E_XF_Both = 3 +}; //! An enumeration of the ResourceMonitor memory status - //! Start in the OK state. Moves into soft limit if aggressive pruning @@ -767,7 +795,11 @@ std::string print(EMemoryStatus memoryStatus); //! values of the by field. //! -# AggregateDetectors: the style used to aggregate distinct detector //! results. -enum EAggregationStyle { E_AggregatePeople = 0, E_AggregateAttributes = 1, E_AggregateDetectors = 2 }; +enum EAggregationStyle { + E_AggregatePeople = 0, + E_AggregateAttributes = 1, + E_AggregateDetectors = 2 +}; const std::size_t NUMBER_AGGREGATION_STYLES = E_AggregateDetectors + 1; //! Controllable aggregation parameters: @@ -779,7 +811,12 @@ const std::size_t NUMBER_AGGREGATION_STYLES = E_AggregateDetectors + 1; //! m from n probability calculation. //! -# MaxExtremeSamples: the maximum number m of samples to consider in the //! m from n probability calculation. -enum EAggregationParam { E_JointProbabilityWeight = 0, E_ExtremeProbabilityWeight = 1, E_MinExtremeSamples = 2, E_MaxExtremeSamples = 3 }; +enum EAggregationParam { + E_JointProbabilityWeight = 0, + E_ExtremeProbabilityWeight = 1, + E_MinExtremeSamples = 2, + E_MaxExtremeSamples = 3 +}; const std::size_t NUMBER_AGGREGATION_PARAMS = E_MaxExtremeSamples + 1; //! The dummy attribute identifier used for modeling individual features. diff --git a/include/test/CMultiFileSearcher.h b/include/test/CMultiFileSearcher.h index d47586a77b..12392d9944 100644 --- a/include/test/CMultiFileSearcher.h +++ b/include/test/CMultiFileSearcher.h @@ -37,7 +37,9 @@ class TEST_EXPORT CMultiFileSearcher : public core::CDataSearcher { public: //! Constructor uses the pass-by-value-and-move idiom - CMultiFileSearcher(std::string baseFilename, std::string baseDocId, std::string fileExtension = JSON_FILE_EXT); + CMultiFileSearcher(std::string baseFilename, + std::string baseDocId, + std::string fileExtension = JSON_FILE_EXT); //! Load the file //! \return Pointer to the input stream - may be NULL diff --git a/include/test/CRandomNumbers.h b/include/test/CRandomNumbers.h index 876c85e0bb..3b56234632 100644 --- a/include/test/CRandomNumbers.h +++ b/include/test/CRandomNumbers.h @@ -48,8 +48,10 @@ class TEST_EXPORT CRandomNumbers { //! \brief Generate random samples from the specified distribution //! using a custom random number generator. template - static void - generateSamples(RNG& randomNumberGenerator, const Distribution& distribution, std::size_t numberSamples, Container& samples); + static void generateSamples(RNG& randomNumberGenerator, + const Distribution& distribution, + std::size_t numberSamples, + Container& samples); //! Shuffle the elements of a sequence using a random number generator. //! @@ -91,11 +93,16 @@ class TEST_EXPORT CRandomNumbers { //! Generate Student's t random samples with the specified degrees //! freedom using the default random number generator. - void generateStudentsSamples(double degreesFreedom, std::size_t numberSamples, TDoubleVec& samples); + void generateStudentsSamples(double degreesFreedom, + std::size_t numberSamples, + TDoubleVec& samples); //! Generate log-normal random samples with the specified location //! and scale using the default random number generator. - void generateLogNormalSamples(double location, double squareScale, std::size_t numberSamples, TDoubleVec& samples); + void generateLogNormalSamples(double location, + double squareScale, + std::size_t numberSamples, + TDoubleVec& samples); //! Generate uniform random samples in the interval [a,b) using //! the default random number generator. @@ -118,7 +125,9 @@ class TEST_EXPORT CRandomNumbers { //! Generate random samples from a Diriclet distribution with //! concentration parameters \p concentrations. - void generateDirichletSamples(const TDoubleVec& concentrations, std::size_t numberSamples, TDoubleVecVec& samples); + void generateDirichletSamples(const TDoubleVec& concentrations, + std::size_t numberSamples, + TDoubleVecVec& samples); //! Generate a collection of random words of specified length using //! the default random number generator. @@ -136,10 +145,11 @@ class TEST_EXPORT CRandomNumbers { //! matrix for each cluster. //! \param[out] points Filled in with the samples from each cluster. template - void generateRandomMultivariateNormals(const TSizeVec& sizes, - std::vector>& means, - std::vector>& covariances, - std::vector>>& points); + void generateRandomMultivariateNormals( + const TSizeVec& sizes, + std::vector>& means, + std::vector>& covariances, + std::vector>>& points); //! Get a uniform generator in the range [0, n). This can be used //! in conjunction with std::random_shuffle if you want a seeded diff --git a/include/test/CRandomNumbersDetail.h b/include/test/CRandomNumbersDetail.h index 9c13ca2174..0fbc3d542a 100644 --- a/include/test/CRandomNumbersDetail.h +++ b/include/test/CRandomNumbersDetail.h @@ -31,14 +31,16 @@ void CRandomNumbers::generateSamples(RNG& randomNumberGenerator, Container& samples) { samples.clear(); samples.reserve(numberSamples); - std::generate_n(std::back_inserter(samples), numberSamples, boost::bind(distribution, boost::ref(randomNumberGenerator))); + std::generate_n(std::back_inserter(samples), numberSamples, + boost::bind(distribution, boost::ref(randomNumberGenerator))); } template -void CRandomNumbers::generateRandomMultivariateNormals(const TSizeVec& sizes, - std::vector>& means, - std::vector>& covariances, - std::vector>>& points) { +void CRandomNumbers::generateRandomMultivariateNormals( + const TSizeVec& sizes, + std::vector>& means, + std::vector>& covariances, + std::vector>>& points) { means.clear(); covariances.clear(); points.clear(); @@ -65,7 +67,8 @@ void CRandomNumbers::generateRandomMultivariateNormals(const TSizeVec& sizes, TSizeVec coordinates; this->generateUniformSamples(0, N, 4, coordinates); std::sort(coordinates.begin(), coordinates.end()); - coordinates.erase(std::unique(coordinates.begin(), coordinates.end()), coordinates.end()); + coordinates.erase(std::unique(coordinates.begin(), coordinates.end()), + coordinates.end()); TDoubleVec thetas; this->generateUniformSamples(0.0, boost::math::constants::two_pi(), 2, thetas); @@ -93,7 +96,8 @@ void CRandomNumbers::generateRandomMultivariateNormals(const TSizeVec& sizes, LOG_TRACE(<< "mean = " << means[i]); LOG_TRACE(<< "covariance = " << covariances[i]); this->generateMultivariateNormalSamples( - means[i].template toVector(), covariances[i].template toVectors(), sizes[i], pointsi); + means[i].template toVector(), + covariances[i].template toVectors(), sizes[i], pointsi); for (std::size_t j = 0u; j < pointsi.size(); ++j) { points[i].emplace_back(pointsi[j]); } diff --git a/include/test/CTimeSeriesTestData.h b/include/test/CTimeSeriesTestData.h index 74efb355fe..91bb17e32a 100644 --- a/include/test/CTimeSeriesTestData.h +++ b/include/test/CTimeSeriesTestData.h @@ -95,7 +95,10 @@ class TEST_EXPORT CTimeSeriesTestData { static void derive(const TTimeDoublePrVec& data, TTimeDoublePrVec& results); //! Pad a vector from minTime to maxTime with zeros - static bool pad(const TTimeDoublePrVec& data, core_t::TTime minTime, core_t::TTime maxTime, TTimeDoublePrVec& results); + static bool pad(const TTimeDoublePrVec& data, + core_t::TTime minTime, + core_t::TTime maxTime, + TTimeDoublePrVec& results); private: template diff --git a/include/test/CTimingXmlOutputterHook.h b/include/test/CTimingXmlOutputterHook.h index a7681f10a9..a0209e8bea 100644 --- a/include/test/CTimingXmlOutputterHook.h +++ b/include/test/CTimingXmlOutputterHook.h @@ -32,14 +32,21 @@ class CTestTimer; //! class TEST_EXPORT CTimingXmlOutputterHook : public CppUnit::XmlOutputterHook { public: - CTimingXmlOutputterHook(const CTestTimer& testTimer, const std::string& topPath, const std::string& testPath); + CTimingXmlOutputterHook(const CTestTimer& testTimer, + const std::string& topPath, + const std::string& testPath); - virtual void - failTestAdded(CppUnit::XmlDocument* document, CppUnit::XmlElement* testElement, CppUnit::Test* test, CppUnit::TestFailure* failure); + virtual void failTestAdded(CppUnit::XmlDocument* document, + CppUnit::XmlElement* testElement, + CppUnit::Test* test, + CppUnit::TestFailure* failure); - virtual void successfulTestAdded(CppUnit::XmlDocument* document, CppUnit::XmlElement* testElement, CppUnit::Test* test); + virtual void successfulTestAdded(CppUnit::XmlDocument* document, + CppUnit::XmlElement* testElement, + CppUnit::Test* test); - virtual void statisticsAdded(CppUnit::XmlDocument* document, CppUnit::XmlElement* statisticsElement); + virtual void statisticsAdded(CppUnit::XmlDocument* document, + CppUnit::XmlElement* statisticsElement); private: //! Convert a time in ms to a time in seconds in string form diff --git a/lib/api/CAnomalyJob.cc b/lib/api/CAnomalyJob.cc index 7b9969d855..3a85447b6c 100644 --- a/lib/api/CAnomalyJob.cc +++ b/lib/api/CAnomalyJob.cc @@ -98,31 +98,23 @@ CAnomalyJob::CAnomalyJob(const std::string& jobId, const std::string& timeFieldName, const std::string& timeFieldFormat, size_t maxAnomalyRecords) - : m_JobId(jobId), - m_Limits(limits), - m_OutputStream(outputStream), + : m_JobId(jobId), m_Limits(limits), m_OutputStream(outputStream), m_ForecastRunner(m_JobId, m_OutputStream, limits.resourceMonitor()), - m_JsonOutputWriter(m_JobId, m_OutputStream), - m_FieldConfig(fieldConfig), - m_ModelConfig(modelConfig), - m_NumRecordsHandled(0), - m_LastFinalisedBucketEndTime(0), - m_PersistCompleteFunc(persistCompleteFunc), - m_TimeFieldName(timeFieldName), - m_TimeFieldFormat(timeFieldFormat), + m_JsonOutputWriter(m_JobId, m_OutputStream), m_FieldConfig(fieldConfig), + m_ModelConfig(modelConfig), m_NumRecordsHandled(0), + m_LastFinalisedBucketEndTime(0), m_PersistCompleteFunc(persistCompleteFunc), + m_TimeFieldName(timeFieldName), m_TimeFieldFormat(timeFieldFormat), m_MaxDetectors(std::numeric_limits::max()), m_PeriodicPersister(periodicPersister), m_MaxQuantileInterval(maxQuantileInterval), - m_LastNormalizerPersistTime(core::CTimeUtils::now()), - m_LatestRecordTime(0), - m_LastResultsTime(0), - m_Aggregator(modelConfig), - m_Normalizer(modelConfig), + m_LastNormalizerPersistTime(core::CTimeUtils::now()), m_LatestRecordTime(0), + m_LastResultsTime(0), m_Aggregator(modelConfig), m_Normalizer(modelConfig), m_ResultsQueue(m_ModelConfig.bucketResultsDelay(), this->effectiveBucketLength()), m_ModelPlotQueue(m_ModelConfig.bucketResultsDelay(), this->effectiveBucketLength(), 0) { m_JsonOutputWriter.limitNumberRecords(maxAnomalyRecords); - m_Limits.resourceMonitor().memoryUsageReporter(boost::bind(&CJsonOutputWriter::reportMemoryUsage, &m_JsonOutputWriter, _1)); + m_Limits.resourceMonitor().memoryUsageReporter( + boost::bind(&CJsonOutputWriter::reportMemoryUsage, &m_JsonOutputWriter, _1)); } CAnomalyJob::~CAnomalyJob() { @@ -148,14 +140,15 @@ bool CAnomalyJob::handleRecord(const TStrStrUMap& dataRowFields) { iter = dataRowFields.find(m_TimeFieldName); if (iter == dataRowFields.end()) { core::CStatistics::stat(stat_t::E_NumberRecordsNoTimeField).increment(); - LOG_ERROR(<< "Found record with no " << m_TimeFieldName << " field:" << core_t::LINE_ENDING - << this->debugPrintRecord(dataRowFields)); + LOG_ERROR(<< "Found record with no " << m_TimeFieldName << " field:" + << core_t::LINE_ENDING << this->debugPrintRecord(dataRowFields)); return true; } if (m_TimeFieldFormat.empty()) { if (core::CStringUtils::stringToType(iter->second, time) == false) { core::CStatistics::stat(stat_t::E_NumberTimeFieldConversionErrors).increment(); - LOG_ERROR(<< "Cannot interpret " << m_TimeFieldName << " field in record:" << core_t::LINE_ENDING + LOG_ERROR(<< "Cannot interpret " << m_TimeFieldName + << " field in record:" << core_t::LINE_ENDING << this->debugPrintRecord(dataRowFields)); return true; } @@ -164,8 +157,9 @@ bool CAnomalyJob::handleRecord(const TStrStrUMap& dataRowFields) { // around many operating system specific issues. if (core::CTimeUtils::strptime(m_TimeFieldFormat, iter->second, time) == false) { core::CStatistics::stat(stat_t::E_NumberTimeFieldConversionErrors).increment(); - LOG_ERROR(<< "Cannot interpret " << m_TimeFieldName << " field using format " << m_TimeFieldFormat - << " in record:" << core_t::LINE_ENDING << this->debugPrintRecord(dataRowFields)); + LOG_ERROR(<< "Cannot interpret " << m_TimeFieldName << " field using format " + << m_TimeFieldFormat << " in record:" << core_t::LINE_ENDING + << this->debugPrintRecord(dataRowFields)); return true; } } @@ -178,8 +172,8 @@ bool CAnomalyJob::handleRecord(const TStrStrUMap& dataRowFields) { core::CStatistics::stat(stat_t::E_NumberTimeOrderErrors).increment(); std::ostringstream ss; ss << "Records must be in ascending time order. " - << "Record '" << this->debugPrintRecord(dataRowFields) << "' time " << time << " is before bucket time " - << m_LastFinalisedBucketEndTime; + << "Record '" << this->debugPrintRecord(dataRowFields) << "' time " + << time << " is before bucket time " << m_LastFinalisedBucketEndTime; LOG_ERROR(<< ss.str()); return true; } @@ -194,16 +188,17 @@ bool CAnomalyJob::handleRecord(const TStrStrUMap& dataRowFields) { const std::string& partitionFieldName(m_DetectorKeys[i].partitionFieldName()); // An empty partitionFieldName means no partitioning - TStrStrUMapCItr itr = partitionFieldName.empty() ? dataRowFields.end() : dataRowFields.find(partitionFieldName); - const std::string& partitionFieldValue(itr == dataRowFields.end() ? EMPTY_STRING : itr->second); + TStrStrUMapCItr itr = partitionFieldName.empty() + ? dataRowFields.end() + : dataRowFields.find(partitionFieldName); + const std::string& partitionFieldValue( + itr == dataRowFields.end() ? EMPTY_STRING : itr->second); // TODO - should usenull apply to the partition field too? - const TAnomalyDetectorPtr& detector = this->detectorForKey(false, // not restoring - time, - m_DetectorKeys[i], - partitionFieldValue, - m_Limits.resourceMonitor()); + const TAnomalyDetectorPtr& detector = this->detectorForKey( + false, // not restoring + time, m_DetectorKeys[i], partitionFieldValue, m_Limits.resourceMonitor()); if (detector == nullptr) { // There wasn't enough memory to create the detector continue; @@ -239,7 +234,8 @@ void CAnomalyJob::finalise() { bool CAnomalyJob::initNormalizer(const std::string& quantilesStateFile) { std::ifstream inputStream(quantilesStateFile.c_str()); - return m_Normalizer.fromJsonStream(inputStream) == model::CHierarchicalResultsNormalizer::E_Ok; + return m_Normalizer.fromJsonStream(inputStream) == + model::CHierarchicalResultsNormalizer::E_Ok; } uint64_t CAnomalyJob::numRecordsHandled() const { @@ -314,7 +310,8 @@ bool CAnomalyJob::handleControlMessage(const std::string& controlMessage) { case ' ': // Spaces are just used to fill the buffers and force prior messages // through the system - we don't need to do anything else - LOG_TRACE(<< "Received space control message of length " << controlMessage.length()); + LOG_TRACE(<< "Received space control message of length " + << controlMessage.length()); break; case CONTROL_FIELD_NAME_CHAR: // Silent no-op. This is a simple way to ignore repeated header @@ -348,8 +345,9 @@ bool CAnomalyJob::handleControlMessage(const std::string& controlMessage) { } } break; default: - LOG_WARN(<< "Ignoring unknown control message of length " << controlMessage.length() << " beginning with '" << controlMessage[0] - << '\''); + LOG_WARN(<< "Ignoring unknown control message of length " + << controlMessage.length() << " beginning with '" + << controlMessage[0] << '\''); // Don't return false here (for the time being at least), as it // seems excessive to cause the entire job to fail break; @@ -388,7 +386,8 @@ void CAnomalyJob::advanceTime(const std::string& time_) { } if (m_LastFinalisedBucketEndTime == 0) { - LOG_DEBUG(<< "Manually advancing time to " << time << " before any valid data has been seen"); + LOG_DEBUG(<< "Manually advancing time to " << time + << " before any valid data has been seen"); } else { LOG_TRACE(<< "Received request to advance time to " << time); } @@ -405,11 +404,13 @@ void CAnomalyJob::outputBucketResultsUntil(core_t::TTime time) { core_t::TTime latency = m_ModelConfig.latency(); if (m_LastFinalisedBucketEndTime == 0) { - m_LastFinalisedBucketEndTime = - std::max(m_LastFinalisedBucketEndTime, maths::CIntegerTools::floor(time, effectiveBucketLength) - latency); + m_LastFinalisedBucketEndTime = std::max( + m_LastFinalisedBucketEndTime, + maths::CIntegerTools::floor(time, effectiveBucketLength) - latency); } - for (core_t::TTime lastBucketEndTime = m_LastFinalisedBucketEndTime; lastBucketEndTime + bucketLength + latency <= time; + for (core_t::TTime lastBucketEndTime = m_LastFinalisedBucketEndTime; + lastBucketEndTime + bucketLength + latency <= time; lastBucketEndTime += effectiveBucketLength) { this->outputResults(lastBucketEndTime); m_Limits.resourceMonitor().sendMemoryUsageReportIfSignificantlyChanged(lastBucketEndTime); @@ -447,7 +448,8 @@ void CAnomalyJob::skipSampling(core_t::TTime endTime) { for (const auto& detector_ : m_Detectors) { model::CAnomalyDetector* detector(detector_.second.get()); if (detector == nullptr) { - LOG_ERROR(<< "Unexpected NULL pointer for key '" << pairDebug(detector_.first) << '\''); + LOG_ERROR(<< "Unexpected NULL pointer for key '" + << pairDebug(detector_.first) << '\''); continue; } detector->skipSampling(endTime); @@ -460,11 +462,14 @@ void CAnomalyJob::flushAndResetResultsQueue(core_t::TTime startTime) { LOG_DEBUG(<< "Flush & reset results queue: " << startTime); if (m_ModelConfig.bucketResultsDelay() != 0) { core_t::TTime effectiveBucketLength = this->effectiveBucketLength(); - core_t::TTime earliestResultTime = m_LastFinalisedBucketEndTime - m_ResultsQueue.size() * effectiveBucketLength; - for (core_t::TTime bucketStart = earliestResultTime; bucketStart < m_LastFinalisedBucketEndTime; + core_t::TTime earliestResultTime = m_LastFinalisedBucketEndTime - + m_ResultsQueue.size() * effectiveBucketLength; + for (core_t::TTime bucketStart = earliestResultTime; + bucketStart < m_LastFinalisedBucketEndTime; bucketStart += effectiveBucketLength) { model::CHierarchicalResults& results = m_ResultsQueue.latest(); - core_t::TTime resultsTime = m_ResultsQueue.chooseResultTime(bucketStart, m_ModelConfig.bucketLength(), results); + core_t::TTime resultsTime = m_ResultsQueue.chooseResultTime( + bucketStart, m_ModelConfig.bucketLength(), results); if (resultsTime != 0) { core::CStopWatch timer(true); model::CHierarchicalResults& resultsToOutput = m_ResultsQueue.get(resultsTime); @@ -472,7 +477,8 @@ void CAnomalyJob::flushAndResetResultsQueue(core_t::TTime startTime) { // Model plots must be written first so the Java persists them // once the bucket result is processed this->writeOutModelPlot(resultsTime); - this->writeOutResults(false, resultsToOutput, resultsTime, processingTime, 0l); + this->writeOutResults(false, resultsToOutput, resultsTime, + processingTime, 0l); } m_ResultsQueue.push(model::CHierarchicalResults()); } @@ -489,7 +495,8 @@ void CAnomalyJob::timeNow(core_t::TTime time) { for (const auto& detector_ : m_Detectors) { model::CAnomalyDetector* detector(detector_.second.get()); if (detector == nullptr) { - LOG_ERROR(<< "Unexpected NULL pointer for key '" << pairDebug(detector_.first) << '\''); + LOG_ERROR(<< "Unexpected NULL pointer for key '" + << pairDebug(detector_.first) << '\''); continue; } detector->timeNow(time); @@ -497,7 +504,8 @@ void CAnomalyJob::timeNow(core_t::TTime time) { } core_t::TTime CAnomalyJob::effectiveBucketLength() const { - return m_ModelConfig.bucketResultsDelay() ? m_ModelConfig.bucketLength() / 2 : m_ModelConfig.bucketLength(); + return m_ModelConfig.bucketResultsDelay() ? m_ModelConfig.bucketLength() / 2 + : m_ModelConfig.bucketLength(); } void CAnomalyJob::generateInterimResults(const std::string& controlMessage) { @@ -509,7 +517,8 @@ void CAnomalyJob::generateInterimResults(const std::string& controlMessage) { } core_t::TTime start = m_LastFinalisedBucketEndTime; - core_t::TTime end = m_LastFinalisedBucketEndTime + (m_ModelConfig.latencyBuckets() + 1) * this->effectiveBucketLength(); + core_t::TTime end = m_LastFinalisedBucketEndTime + + (m_ModelConfig.latencyBuckets() + 1) * this->effectiveBucketLength(); if (this->parseTimeRangeInControlMessage(controlMessage, start, end)) { LOG_TRACE(<< "Time range for results: " << start << " : " << end); @@ -517,11 +526,14 @@ void CAnomalyJob::generateInterimResults(const std::string& controlMessage) { } } -bool CAnomalyJob::parseTimeRangeInControlMessage(const std::string& controlMessage, core_t::TTime& start, core_t::TTime& end) { +bool CAnomalyJob::parseTimeRangeInControlMessage(const std::string& controlMessage, + core_t::TTime& start, + core_t::TTime& end) { using TStrVec = core::CStringUtils::TStrVec; TStrVec tokens; std::string remainder; - core::CStringUtils::tokenise(" ", controlMessage.substr(1, std::string::npos), tokens, remainder); + core::CStringUtils::tokenise(" ", controlMessage.substr(1, std::string::npos), + tokens, remainder); if (!remainder.empty()) { tokens.push_back(remainder); } @@ -531,10 +543,12 @@ bool CAnomalyJob::parseTimeRangeInControlMessage(const std::string& controlMessa return true; } if (tokensSize != 2) { - LOG_ERROR(<< "Control message " << controlMessage << " has " << tokensSize << " parameters when only zero or two are allowed."); + LOG_ERROR(<< "Control message " << controlMessage << " has " << tokensSize + << " parameters when only zero or two are allowed."); return false; } - if (core::CStringUtils::stringToType(tokens[0], start) && core::CStringUtils::stringToType(tokens[1], end)) { + if (core::CStringUtils::stringToType(tokens[0], start) && + core::CStringUtils::stringToType(tokens[1], end)) { return true; } LOG_ERROR(<< "Cannot parse control message: " << controlMessage); @@ -573,15 +587,18 @@ void CAnomalyJob::outputResults(core_t::TTime bucketStartTime) { TKeyAnomalyDetectorPtrUMapCItrVec iterators; iterators.reserve(m_Detectors.size()); - for (TKeyAnomalyDetectorPtrUMapCItr itr = m_Detectors.begin(); itr != m_Detectors.end(); ++itr) { + for (TKeyAnomalyDetectorPtrUMapCItr itr = m_Detectors.begin(); + itr != m_Detectors.end(); ++itr) { iterators.push_back(itr); } - std::sort(iterators.begin(), iterators.end(), core::CFunctional::SDereference()); + std::sort(iterators.begin(), iterators.end(), + core::CFunctional::SDereference()); for (std::size_t i = 0u; i < iterators.size(); ++i) { model::CAnomalyDetector* detector(iterators[i]->second.get()); if (detector == nullptr) { - LOG_ERROR(<< "Unexpected NULL pointer for key '" << pairDebug(iterators[i]->first) << '\''); + LOG_ERROR(<< "Unexpected NULL pointer for key '" + << pairDebug(iterators[i]->first) << '\''); continue; } detector->buildResults(bucketStartTime, bucketStartTime + bucketLength, results); @@ -606,14 +623,16 @@ void CAnomalyJob::outputResults(core_t::TTime bucketStartTime) { this->updateQuantilesAndNormalize(false, results); } - core_t::TTime resultsTime = m_ResultsQueue.chooseResultTime(bucketStartTime, bucketLength, results); + core_t::TTime resultsTime = + m_ResultsQueue.chooseResultTime(bucketStartTime, bucketLength, results); if (resultsTime != 0) { model::CHierarchicalResults& resultsToOutput = m_ResultsQueue.get(resultsTime); uint64_t processingTime = timer.stop(); // Model plots must be written first so the Java persists them // once the bucket result is processed this->writeOutModelPlot(resultsTime); - this->writeOutResults(false, resultsToOutput, resultsTime, processingTime, cumulativeTime); + this->writeOutResults(false, resultsToOutput, resultsTime, + processingTime, cumulativeTime); cumulativeTime = 0; } else { cumulativeTime += timer.stop(); @@ -634,7 +653,8 @@ void CAnomalyJob::outputInterimResults(core_t::TTime bucketStartTime) { for (const auto& detector_ : m_Detectors) { model::CAnomalyDetector* detector(detector_.second.get()); if (detector == nullptr) { - LOG_ERROR(<< "Unexpected NULL pointer for key '" << pairDebug(detector_.first) << '\''); + LOG_ERROR(<< "Unexpected NULL pointer for key '" + << pairDebug(detector_.first) << '\''); continue; } detector->buildInterimResults(bucketStartTime, bucketStartTime + bucketLength, results); @@ -673,28 +693,30 @@ void CAnomalyJob::writeOutResults(bool interim, uint64_t processingTime, uint64_t sumPastProcessingTime) { if (!results.empty()) { - LOG_TRACE(<< "Got results object here: " << results.root()->s_RawAnomalyScore << " / " << results.root()->s_NormalizedAnomalyScore + LOG_TRACE(<< "Got results object here: " << results.root()->s_RawAnomalyScore + << " / " << results.root()->s_NormalizedAnomalyScore << ", count " << results.resultCount() << " at " << bucketTime); using TScopedAllocator = ml::core::CScopedRapidJsonPoolAllocator; static const std::string ALLOCATOR_ID("CAnomalyJob::writeOutResults"); TScopedAllocator scopedAllocator(ALLOCATOR_ID, m_JsonOutputWriter); - api::CHierarchicalResultsWriter writer(m_Limits, - m_ModelConfig, - boost::bind(&CJsonOutputWriter::acceptResult, &m_JsonOutputWriter, _1), - boost::bind(&CJsonOutputWriter::acceptInfluencer, &m_JsonOutputWriter, _1, _2, _3)); + api::CHierarchicalResultsWriter writer( + m_Limits, m_ModelConfig, + boost::bind(&CJsonOutputWriter::acceptResult, &m_JsonOutputWriter, _1), + boost::bind(&CJsonOutputWriter::acceptInfluencer, + &m_JsonOutputWriter, _1, _2, _3)); results.bottomUpBreadthFirst(writer); results.pivotsBottomUpBreadthFirst(writer); // Add the bucketTime bucket influencer. // Note that the influencer will only be accepted if there are records. - m_JsonOutputWriter.acceptBucketTimeInfluencer(bucketTime, - results.root()->s_AnnotatedProbability.s_Probability, - results.root()->s_RawAnomalyScore, - results.root()->s_NormalizedAnomalyScore); + m_JsonOutputWriter.acceptBucketTimeInfluencer( + bucketTime, results.root()->s_AnnotatedProbability.s_Probability, + results.root()->s_RawAnomalyScore, results.root()->s_NormalizedAnomalyScore); - if (m_JsonOutputWriter.endOutputBatch(interim, sumPastProcessingTime + processingTime) == false) { + if (m_JsonOutputWriter.endOutputBatch( + interim, sumPastProcessingTime + processingTime) == false) { LOG_ERROR(<< "Problem writing anomaly output"); } m_LastResultsTime = bucketTime; @@ -716,7 +738,8 @@ void CAnomalyJob::resetBuckets(const std::string& controlMessage) { for (const auto& detector_ : m_Detectors) { model::CAnomalyDetector* detector = detector_.second.get(); if (detector == nullptr) { - LOG_ERROR(<< "Unexpected NULL pointer for key '" << pairDebug(detector_.first) << '\''); + LOG_ERROR(<< "Unexpected NULL pointer for key '" + << pairDebug(detector_.first) << '\''); continue; } LOG_TRACE(<< "Resetting bucket = " << time); @@ -727,7 +750,8 @@ void CAnomalyJob::resetBuckets(const std::string& controlMessage) { } } -bool CAnomalyJob::restoreState(core::CDataSearcher& restoreSearcher, core_t::TTime& completeToTime) { +bool CAnomalyJob::restoreState(core::CDataSearcher& restoreSearcher, + core_t::TTime& completeToTime) { // Pass on the request in case we're chained if (this->outputHandler().restoreState(restoreSearcher, completeToTime) == false) { return false; @@ -772,22 +796,25 @@ bool CAnomalyJob::restoreState(core::CDataSearcher& restoreSearcher, core_t::TTi } if (completeToTime > 0) { - core_t::TTime lastBucketEndTime(maths::CIntegerTools::ceil(completeToTime, m_ModelConfig.bucketLength())); + core_t::TTime lastBucketEndTime(maths::CIntegerTools::ceil( + completeToTime, m_ModelConfig.bucketLength())); for (const auto& detector_ : m_Detectors) { model::CAnomalyDetector* detector(detector_.second.get()); if (detector == nullptr) { - LOG_ERROR(<< "Unexpected NULL pointer for key '" << pairDebug(detector_.first) << '\''); + LOG_ERROR(<< "Unexpected NULL pointer for key '" + << pairDebug(detector_.first) << '\''); continue; } - LOG_DEBUG(<< "Setting lastBucketEndTime to " << lastBucketEndTime << " in detector for '" << detector->description() - << '\''); + LOG_DEBUG(<< "Setting lastBucketEndTime to " << lastBucketEndTime + << " in detector for '" << detector->description() << '\''); detector->lastBucketEndTime() = lastBucketEndTime; } } else { if (!m_Detectors.empty()) { - LOG_ERROR(<< "Inconsistency - " << m_Detectors.size() << " detectors have been restored but completeToTime is " + LOG_ERROR(<< "Inconsistency - " << m_Detectors.size() + << " detectors have been restored but completeToTime is " << completeToTime); } } @@ -799,7 +826,9 @@ bool CAnomalyJob::restoreState(core::CDataSearcher& restoreSearcher, core_t::TTi return true; } -bool CAnomalyJob::restoreState(core::CStateRestoreTraverser& traverser, core_t::TTime& completeToTime, std::size_t& numDetectors) { +bool CAnomalyJob::restoreState(core::CStateRestoreTraverser& traverser, + core_t::TTime& completeToTime, + std::size_t& numDetectors) { m_RestoredStateDetail.s_RestoredStateStatus = E_Failure; m_RestoredStateDetail.s_Extra = boost::none; @@ -812,10 +841,11 @@ bool CAnomalyJob::restoreState(core::CStateRestoreTraverser& traverser, core_t:: } core_t::TTime lastBucketEndTime(0); - if (traverser.name() != TIME_TAG || core::CStringUtils::stringToType(traverser.value(), lastBucketEndTime) == false) { + if (traverser.name() != TIME_TAG || + core::CStringUtils::stringToType(traverser.value(), lastBucketEndTime) == false) { m_RestoredStateDetail.s_RestoredStateStatus = E_UnexpectedTag; - LOG_ERROR(<< "Cannot restore anomaly detector - '" << TIME_TAG << "' element expected but found " << traverser.name() << '=' - << traverser.value()); + LOG_ERROR(<< "Cannot restore anomaly detector - '" << TIME_TAG << "' element expected but found " + << traverser.name() << '=' << traverser.value()); return false; } m_LastFinalisedBucketEndTime = lastBucketEndTime; @@ -834,7 +864,8 @@ bool CAnomalyJob::restoreState(core::CStateRestoreTraverser& traverser, core_t:: const std::string& stateVersion = traverser.value(); if (stateVersion != model::CAnomalyDetector::STATE_VERSION) { m_RestoredStateDetail.s_RestoredStateStatus = E_IncorrectVersion; - LOG_ERROR(<< "Restored anomaly detector state version is " << stateVersion << " - ignoring it as current state version is " + LOG_ERROR(<< "Restored anomaly detector state version is " + << stateVersion << " - ignoring it as current state version is " << model::CAnomalyDetector::STATE_VERSION); // This counts as successful restoration @@ -844,23 +875,26 @@ bool CAnomalyJob::restoreState(core::CStateRestoreTraverser& traverser, core_t:: while (traverser.next()) { const std::string& name = traverser.name(); if (name == TOP_LEVEL_DETECTOR_TAG) { - if (traverser.traverseSubLevel(boost::bind(&CAnomalyJob::restoreSingleDetector, this, _1)) == false) { + if (traverser.traverseSubLevel(boost::bind( + &CAnomalyJob::restoreSingleDetector, this, _1)) == false) { LOG_ERROR(<< "Cannot restore anomaly detector"); return false; } ++numDetectors; } else if (name == RESULTS_AGGREGATOR_TAG) { - if (traverser.traverseSubLevel( - boost::bind(&model::CHierarchicalResultsAggregator::acceptRestoreTraverser, &m_Aggregator, _1)) == false) { + if (traverser.traverseSubLevel(boost::bind( + &model::CHierarchicalResultsAggregator::acceptRestoreTraverser, + &m_Aggregator, _1)) == false) { LOG_ERROR(<< "Cannot restore results aggregator"); return false; } } else if (name == HIERARCHICAL_RESULTS_TAG) { core::CPersistUtils::restore(HIERARCHICAL_RESULTS_TAG, m_ResultsQueue, traverser); } else if (name == MODEL_PLOT_TAG) { - core_t::TTime resultsQueueResetTime = m_ModelConfig.bucketResultsDelay() == 0 - ? m_LastFinalisedBucketEndTime - : m_LastFinalisedBucketEndTime - this->effectiveBucketLength(); + core_t::TTime resultsQueueResetTime = + m_ModelConfig.bucketResultsDelay() == 0 + ? m_LastFinalisedBucketEndTime + : m_LastFinalisedBucketEndTime - this->effectiveBucketLength(); m_ModelPlotQueue.reset(resultsQueueResetTime); core::CPersistUtils::restore(MODEL_PLOT_TAG, m_ModelPlotQueue, traverser); } else if (name == LATEST_RECORD_TIME_TAG) { @@ -877,15 +911,16 @@ bool CAnomalyJob::restoreState(core::CStateRestoreTraverser& traverser, core_t:: bool CAnomalyJob::restoreSingleDetector(core::CStateRestoreTraverser& traverser) { if (traverser.name() != KEY_TAG) { - LOG_ERROR(<< "Cannot restore anomaly detector - " << KEY_TAG << " element expected but found " << traverser.name() << '=' - << traverser.value()); + LOG_ERROR(<< "Cannot restore anomaly detector - " << KEY_TAG << " element expected but found " + << traverser.name() << '=' << traverser.value()); m_RestoredStateDetail.s_RestoredStateStatus = E_UnexpectedTag; return false; } model::CSearchKey key; - if (traverser.traverseSubLevel(boost::bind(&model::CAnomalyDetector::keyAcceptRestoreTraverser, _1, boost::ref(key))) == false) { + if (traverser.traverseSubLevel(boost::bind(&model::CAnomalyDetector::keyAcceptRestoreTraverser, + _1, boost::ref(key))) == false) { LOG_ERROR(<< "Cannot restore anomaly detector - no key found in " << KEY_TAG); m_RestoredStateDetail.s_RestoredStateStatus = E_UnexpectedTag; @@ -893,15 +928,16 @@ bool CAnomalyJob::restoreSingleDetector(core::CStateRestoreTraverser& traverser) } if (traverser.next() == false) { - LOG_ERROR(<< "Cannot restore anomaly detector - end of object reached when " << PARTITION_FIELD_TAG << " was expected"); + LOG_ERROR(<< "Cannot restore anomaly detector - end of object reached when " + << PARTITION_FIELD_TAG << " was expected"); m_RestoredStateDetail.s_RestoredStateStatus = E_UnexpectedTag; return false; } if (traverser.name() != PARTITION_FIELD_TAG) { - LOG_ERROR(<< "Cannot restore anomaly detector - " << PARTITION_FIELD_TAG << " element expected but found " << traverser.name() - << '=' << traverser.value()); + LOG_ERROR(<< "Cannot restore anomaly detector - " << PARTITION_FIELD_TAG << " element expected but found " + << traverser.name() << '=' << traverser.value()); m_RestoredStateDetail.s_RestoredStateStatus = E_UnexpectedTag; return false; @@ -909,7 +945,8 @@ bool CAnomalyJob::restoreSingleDetector(core::CStateRestoreTraverser& traverser) std::string partitionFieldValue; if (traverser.traverseSubLevel( - boost::bind(&model::CAnomalyDetector::partitionFieldAcceptRestoreTraverser, _1, boost::ref(partitionFieldValue))) == false) { + boost::bind(&model::CAnomalyDetector::partitionFieldAcceptRestoreTraverser, + _1, boost::ref(partitionFieldValue))) == false) { LOG_ERROR(<< "Cannot restore anomaly detector - " "no partition field value found in " << PARTITION_FIELD_TAG); @@ -919,21 +956,23 @@ bool CAnomalyJob::restoreSingleDetector(core::CStateRestoreTraverser& traverser) } if (traverser.next() == false) { - LOG_ERROR(<< "Cannot restore anomaly detector - end of object reached when " << DETECTOR_TAG << " was expected"); + LOG_ERROR(<< "Cannot restore anomaly detector - end of object reached when " + << DETECTOR_TAG << " was expected"); m_RestoredStateDetail.s_RestoredStateStatus = E_UnexpectedTag; return false; } if (traverser.name() != DETECTOR_TAG) { - LOG_ERROR(<< "Cannot restore anomaly detector - " << DETECTOR_TAG << " element expected but found " << traverser.name() << '=' - << traverser.value()); + LOG_ERROR(<< "Cannot restore anomaly detector - " << DETECTOR_TAG << " element expected but found " + << traverser.name() << '=' << traverser.value()); m_RestoredStateDetail.s_RestoredStateStatus = E_UnexpectedTag; return false; } - if (this->restoreDetectorState(key, partitionFieldValue, traverser) == false || traverser.haveBadState()) { + if (this->restoreDetectorState(key, partitionFieldValue, traverser) == false || + traverser.haveBadState()) { LOG_ERROR(<< "Delegated portion of anomaly detector restore failed"); m_RestoredStateDetail.s_RestoredStateStatus = E_Failure; return false; @@ -946,11 +985,10 @@ bool CAnomalyJob::restoreSingleDetector(core::CStateRestoreTraverser& traverser) bool CAnomalyJob::restoreDetectorState(const model::CSearchKey& key, const std::string& partitionFieldValue, core::CStateRestoreTraverser& traverser) { - const TAnomalyDetectorPtr& detector = this->detectorForKey(true, // for restoring - 0, // time reset later - key, - partitionFieldValue, - m_Limits.resourceMonitor()); + const TAnomalyDetectorPtr& detector = + this->detectorForKey(true, // for restoring + 0, // time reset later + key, partitionFieldValue, m_Limits.resourceMonitor()); if (!detector) { LOG_ERROR(<< "Detector with key '" << key.debug() << '/' << partitionFieldValue << "' " @@ -961,11 +999,14 @@ bool CAnomalyJob::restoreDetectorState(const model::CSearchKey& key, return false; } - LOG_DEBUG(<< "Restoring state for detector with key '" << key.debug() << '/' << partitionFieldValue << '\''); + LOG_DEBUG(<< "Restoring state for detector with key '" << key.debug() << '/' + << partitionFieldValue << '\''); - if (traverser.traverseSubLevel( - boost::bind(&model::CAnomalyDetector::acceptRestoreTraverser, detector.get(), boost::cref(partitionFieldValue), _1)) == false) { - LOG_ERROR(<< "Error restoring anomaly detector for key '" << key.debug() << '/' << partitionFieldValue << '\''); + if (traverser.traverseSubLevel(boost::bind( + &model::CAnomalyDetector::acceptRestoreTraverser, detector.get(), + boost::cref(partitionFieldValue), _1)) == false) { + LOG_ERROR(<< "Error restoring anomaly detector for key '" << key.debug() + << '/' << partitionFieldValue << '\''); return false; } @@ -998,17 +1039,11 @@ bool CAnomalyJob::persistState(core::CDataAdder& persister) { m_Normalizer.toJson(m_LastResultsTime, "api", normaliserState, true); return this->persistState( - "State persisted due to job close at ", - m_ResultsQueue, - m_ModelPlotQueue, - m_LastFinalisedBucketEndTime, - detectors, - m_Limits.resourceMonitor().createMemoryUsageReport(m_LastFinalisedBucketEndTime - m_ModelConfig.bucketLength()), - m_Aggregator, - normaliserState, - m_LatestRecordTime, - m_LastResultsTime, - persister); + "State persisted due to job close at ", m_ResultsQueue, + m_ModelPlotQueue, m_LastFinalisedBucketEndTime, detectors, + m_Limits.resourceMonitor().createMemoryUsageReport( + m_LastFinalisedBucketEndTime - m_ModelConfig.bucketLength()), + m_Aggregator, normaliserState, m_LatestRecordTime, m_LastResultsTime, persister); } bool CAnomalyJob::backgroundPersistState(CBackgroundPersister& backgroundPersister) { @@ -1019,13 +1054,10 @@ bool CAnomalyJob::backgroundPersistState(CBackgroundPersister& backgroundPersist // Do NOT add boost::ref wrappers around these arguments - they // MUST be copied for thread safety TBackgroundPersistArgsPtr args = boost::make_shared( - m_ResultsQueue, - m_ModelPlotQueue, - m_LastFinalisedBucketEndTime, - m_Limits.resourceMonitor().createMemoryUsageReport(m_LastFinalisedBucketEndTime - m_ModelConfig.bucketLength()), - m_Aggregator, - m_LatestRecordTime, - m_LastResultsTime); + m_ResultsQueue, m_ModelPlotQueue, m_LastFinalisedBucketEndTime, + m_Limits.resourceMonitor().createMemoryUsageReport( + m_LastFinalisedBucketEndTime - m_ModelConfig.bucketLength()), + m_Aggregator, m_LatestRecordTime, m_LastResultsTime); // The normaliser is non-copyable, so we have to make do with JSONifying it now; // it should be relatively fast though @@ -1037,20 +1069,25 @@ bool CAnomalyJob::backgroundPersistState(CBackgroundPersister& backgroundPersist for (const auto& detector_ : m_Detectors) { model::CAnomalyDetector* detector(detector_.second.get()); if (detector == nullptr) { - LOG_ERROR(<< "Unexpected NULL pointer for key '" << pairDebug(detector_.first) << '\''); + LOG_ERROR(<< "Unexpected NULL pointer for key '" + << pairDebug(detector_.first) << '\''); continue; } - model::CSearchKey::TStrCRefKeyCRefPr key(boost::cref(detector_.first.first), boost::cref(detector_.first.second)); + model::CSearchKey::TStrCRefKeyCRefPr key(boost::cref(detector_.first.first), + boost::cref(detector_.first.second)); if (detector->isSimpleCount()) { - copiedDetectors.push_back( - TKeyCRefAnomalyDetectorPtrPr(key, TAnomalyDetectorPtr(new model::CSimpleCountDetector(true, *detector)))); + copiedDetectors.push_back(TKeyCRefAnomalyDetectorPtrPr( + key, TAnomalyDetectorPtr(new model::CSimpleCountDetector(true, *detector)))); } else { - copiedDetectors.push_back(TKeyCRefAnomalyDetectorPtrPr(key, TAnomalyDetectorPtr(new model::CAnomalyDetector(true, *detector)))); + copiedDetectors.push_back(TKeyCRefAnomalyDetectorPtrPr( + key, TAnomalyDetectorPtr(new model::CAnomalyDetector(true, *detector)))); } } - std::sort(copiedDetectors.begin(), copiedDetectors.end(), maths::COrderings::SFirstLess()); + std::sort(copiedDetectors.begin(), copiedDetectors.end(), + maths::COrderings::SFirstLess()); - if (backgroundPersister.addPersistFunc(boost::bind(&CAnomalyJob::runBackgroundPersist, this, args, _1)) == false) { + if (backgroundPersister.addPersistFunc(boost::bind( + &CAnomalyJob::runBackgroundPersist, this, args, _1)) == false) { LOG_ERROR(<< "Failed to add anomaly detector background persistence function"); return false; } @@ -1058,23 +1095,18 @@ bool CAnomalyJob::backgroundPersistState(CBackgroundPersister& backgroundPersist return true; } -bool CAnomalyJob::runBackgroundPersist(TBackgroundPersistArgsPtr args, core::CDataAdder& persister) { +bool CAnomalyJob::runBackgroundPersist(TBackgroundPersistArgsPtr args, + core::CDataAdder& persister) { if (!args) { LOG_ERROR(<< "Unexpected NULL pointer passed to background persist"); return false; } - return this->persistState("Periodic background persist at ", - args->s_ResultsQueue, - args->s_ModelPlotQueue, - args->s_Time, - args->s_Detectors, - args->s_ModelSizeStats, - args->s_Aggregator, - args->s_NormalizerState, - args->s_LatestRecordTime, - args->s_LastResultsTime, - persister); + return this->persistState("Periodic background persist at ", args->s_ResultsQueue, + args->s_ModelPlotQueue, args->s_Time, args->s_Detectors, + args->s_ModelSizeStats, args->s_Aggregator, + args->s_NormalizerState, args->s_LatestRecordTime, + args->s_LastResultsTime, persister); } bool CAnomalyJob::persistState(const std::string& descriptionPrefix, @@ -1094,7 +1126,8 @@ bool CAnomalyJob::persistState(const std::string& descriptionPrefix, core_t::TTime snapshotTimestamp(core::CTimeUtils::now()); const std::string snapShotId(core::CStringUtils::typeToString(snapshotTimestamp)); - core::CDataAdder::TOStreamP strm = compressor.addStreamed(ML_STATE_INDEX, m_JobId + '_' + STATE_TYPE + '_' + snapShotId); + core::CDataAdder::TOStreamP strm = compressor.addStreamed( + ML_STATE_INDEX, m_JobId + '_' + STATE_TYPE + '_' + snapShotId); if (strm != nullptr) { // IMPORTANT - this method can run in a background thread while the // analytics carries on processing new buckets in the main thread. @@ -1108,7 +1141,8 @@ bool CAnomalyJob::persistState(const std::string& descriptionPrefix, inserter.insertValue(VERSION_TAG, model::CAnomalyDetector::STATE_VERSION); if (resultsQueue.size() > 1) { - core::CPersistUtils::persist(HIERARCHICAL_RESULTS_TAG, resultsQueue, inserter); + core::CPersistUtils::persist(HIERARCHICAL_RESULTS_TAG, + resultsQueue, inserter); } if (modelPlotQueue.size() > 1) { core::CPersistUtils::persist(MODEL_PLOT_TAG, modelPlotQueue, inserter); @@ -1117,19 +1151,23 @@ bool CAnomalyJob::persistState(const std::string& descriptionPrefix, for (const auto& detector_ : detectors) { const model::CAnomalyDetector* detector(detector_.second.get()); if (detector == nullptr) { - LOG_ERROR(<< "Unexpected NULL pointer for key '" << pairDebug(detector_.first) << '\''); + LOG_ERROR(<< "Unexpected NULL pointer for key '" + << pairDebug(detector_.first) << '\''); continue; } inserter.insertLevel(TOP_LEVEL_DETECTOR_TAG, - boost::bind(&CAnomalyJob::persistIndividualDetector, boost::cref(*detector), _1)); + boost::bind(&CAnomalyJob::persistIndividualDetector, + boost::cref(*detector), _1)); LOG_DEBUG(<< "Persisted state for '" << detector->description() << "'"); } inserter.insertLevel(RESULTS_AGGREGATOR_TAG, - boost::bind(&model::CHierarchicalResultsAggregator::acceptPersistInserter, &aggregator, _1)); + boost::bind(&model::CHierarchicalResultsAggregator::acceptPersistInserter, + &aggregator, _1)); - core::CPersistUtils::persist(LATEST_RECORD_TIME_TAG, latestRecordTime, inserter); + core::CPersistUtils::persist(LATEST_RECORD_TIME_TAG, + latestRecordTime, inserter); core::CPersistUtils::persist(LAST_RESULTS_TIME_TAG, lastResultsTime, inserter); } @@ -1140,14 +1178,10 @@ bool CAnomalyJob::persistState(const std::string& descriptionPrefix, if (m_PersistCompleteFunc) { CModelSnapshotJsonWriter::SModelSnapshotReport modelSnapshotReport{ - MODEL_SNAPSHOT_MIN_VERSION, - snapshotTimestamp, + MODEL_SNAPSHOT_MIN_VERSION, snapshotTimestamp, descriptionPrefix + core::CTimeUtils::toIso8601(snapshotTimestamp), - snapShotId, - compressor.numCompressedDocs(), - modelSizeStats, - normalizerState, - latestRecordTime, + snapShotId, compressor.numCompressedDocs(), modelSizeStats, + normalizerState, latestRecordTime, // This needs to be the last final result time as it serves // as the time after which all results are deleted when a // model snapshot is reverted @@ -1177,7 +1211,8 @@ bool CAnomalyJob::periodicPersistState(CBackgroundPersister& persister) { for (const auto& detector_ : m_Detectors) { model::CAnomalyDetector* detector = detector_.second.get(); if (detector == nullptr) { - LOG_ERROR(<< "Unexpected NULL pointer for key '" << pairDebug(detector_.first) << '\''); + LOG_ERROR(<< "Unexpected NULL pointer for key '" + << pairDebug(detector_.first) << '\''); continue; } m_Limits.resourceMonitor().forceRefresh(*detector); @@ -1186,7 +1221,8 @@ bool CAnomalyJob::periodicPersistState(CBackgroundPersister& persister) { return this->backgroundPersistState(persister); } -void CAnomalyJob::updateAggregatorAndAggregate(bool isInterim, model::CHierarchicalResults& results) { +void CAnomalyJob::updateAggregatorAndAggregate(bool isInterim, + model::CHierarchicalResults& results) { m_Aggregator.refresh(m_ModelConfig); m_Aggregator.setJob(model::CHierarchicalResultsAggregator::E_Correct); @@ -1202,7 +1238,8 @@ void CAnomalyJob::updateAggregatorAndAggregate(bool isInterim, model::CHierarchi results.pivotsBottomUpBreadthFirst(m_Aggregator); } -void CAnomalyJob::updateQuantilesAndNormalize(bool isInterim, model::CHierarchicalResults& results) { +void CAnomalyJob::updateQuantilesAndNormalize(bool isInterim, + model::CHierarchicalResults& results) { m_Normalizer.resetBigChange(); // The normalizers are NOT updated with interim results, in other @@ -1220,7 +1257,8 @@ void CAnomalyJob::updateQuantilesAndNormalize(bool isInterim, model::CHierarchic results.pivotsBottomUpBreadthFirst(m_Normalizer); if ((isInterim == false && m_Normalizer.hasLastUpdateCausedBigChange()) || - (m_MaxQuantileInterval > 0 && core::CTimeUtils::now() > m_LastNormalizerPersistTime + m_MaxQuantileInterval)) { + (m_MaxQuantileInterval > 0 && + core::CTimeUtils::now() > m_LastNormalizerPersistTime + m_MaxQuantileInterval)) { m_JsonOutputWriter.persistNormalizer(m_Normalizer, m_LastNormalizerPersistTime); } } @@ -1231,11 +1269,13 @@ void CAnomalyJob::outputResultsWithinRange(bool isInterim, core_t::TTime start, } if (start < m_LastFinalisedBucketEndTime) { LOG_WARN(<< "Cannot output results for range (" << start << ", " << m_LastFinalisedBucketEndTime - << "): Start time is before last finalized bucket end time " << m_LastFinalisedBucketEndTime << '.'); + << "): Start time is before last finalized bucket end time " + << m_LastFinalisedBucketEndTime << '.'); start = m_LastFinalisedBucketEndTime; } if (start > end) { - LOG_ERROR(<< "Cannot output results for range (" << start << ", " << end << "): Start time is later than end time."); + LOG_ERROR(<< "Cannot output results for range (" << start << ", " << end + << "): Start time is later than end time."); return; } core_t::TTime bucketLength = m_ModelConfig.bucketLength(); @@ -1252,12 +1292,15 @@ void CAnomalyJob::outputResultsWithinRange(bool isInterim, core_t::TTime start, } } -void CAnomalyJob::generateModelPlot(core_t::TTime startTime, core_t::TTime endTime, const model::CAnomalyDetector& detector) { +void CAnomalyJob::generateModelPlot(core_t::TTime startTime, + core_t::TTime endTime, + const model::CAnomalyDetector& detector) { double modelPlotBoundsPercentile(m_ModelConfig.modelPlotBoundsPercentile()); if (modelPlotBoundsPercentile > 0.0) { LOG_TRACE(<< "Generating model debug data at " << startTime); detector.generateModelPlot( - startTime, endTime, m_ModelConfig.modelPlotBoundsPercentile(), m_ModelConfig.modelPlotTerms(), m_ModelPlotQueue.get(startTime)); + startTime, endTime, m_ModelConfig.modelPlotBoundsPercentile(), + m_ModelConfig.modelPlotTerms(), m_ModelPlotQueue.get(startTime)); } } @@ -1270,7 +1313,8 @@ void CAnomalyJob::writeOutModelPlot(core_t::TTime resultsTime) { } } -void CAnomalyJob::writeOutModelPlot(core_t::TTime resultsTime, CModelPlotDataJsonWriter& writer) { +void CAnomalyJob::writeOutModelPlot(core_t::TTime resultsTime, + CModelPlotDataJsonWriter& writer) { for (const auto& plot : m_ModelPlotQueue.get(resultsTime)) { writer.writeFlat(m_JobId, plot); } @@ -1282,18 +1326,24 @@ void CAnomalyJob::refreshMemoryAndReport() { for (const auto& detector_ : m_Detectors) { model::CAnomalyDetector* detector = detector_.second.get(); if (detector == nullptr) { - LOG_ERROR(<< "Unexpected NULL pointer for key '" << pairDebug(detector_.first) << '\''); + LOG_ERROR(<< "Unexpected NULL pointer for key '" + << pairDebug(detector_.first) << '\''); continue; } m_Limits.resourceMonitor().forceRefresh(*detector); } - m_Limits.resourceMonitor().sendMemoryUsageReport(m_LastFinalisedBucketEndTime - m_ModelConfig.bucketLength()); + m_Limits.resourceMonitor().sendMemoryUsageReport( + m_LastFinalisedBucketEndTime - m_ModelConfig.bucketLength()); } -void CAnomalyJob::persistIndividualDetector(const model::CAnomalyDetector& detector, core::CStatePersistInserter& inserter) { - inserter.insertLevel(KEY_TAG, boost::bind(&model::CAnomalyDetector::keyAcceptPersistInserter, &detector, _1)); - inserter.insertLevel(PARTITION_FIELD_TAG, boost::bind(&model::CAnomalyDetector::partitionFieldAcceptPersistInserter, &detector, _1)); - inserter.insertLevel(DETECTOR_TAG, boost::bind(&model::CAnomalyDetector::acceptPersistInserter, &detector, _1)); +void CAnomalyJob::persistIndividualDetector(const model::CAnomalyDetector& detector, + core::CStatePersistInserter& inserter) { + inserter.insertLevel(KEY_TAG, boost::bind(&model::CAnomalyDetector::keyAcceptPersistInserter, + &detector, _1)); + inserter.insertLevel(PARTITION_FIELD_TAG, boost::bind(&model::CAnomalyDetector::partitionFieldAcceptPersistInserter, + &detector, _1)); + inserter.insertLevel(DETECTOR_TAG, boost::bind(&model::CAnomalyDetector::acceptPersistInserter, + &detector, _1)); } void CAnomalyJob::detectors(TAnomalyDetectorPtrVec& detectors) const { @@ -1308,7 +1358,9 @@ void CAnomalyJob::sortedDetectors(TKeyCRefAnomalyDetectorPtrPrVec& detectors) co detectors.reserve(m_Detectors.size()); for (const auto& detector : m_Detectors) { detectors.push_back(TKeyCRefAnomalyDetectorPtrPr( - model::CSearchKey::TStrCRefKeyCRefPr(boost::cref(detector.first.first), boost::cref(detector.first.second)), detector.second)); + model::CSearchKey::TStrCRefKeyCRefPr(boost::cref(detector.first.first), + boost::cref(detector.first.second)), + detector.second)); } std::sort(detectors.begin(), detectors.end(), maths::COrderings::SFirstLess()); } @@ -1317,32 +1369,39 @@ const CAnomalyJob::TKeyAnomalyDetectorPtrUMap& CAnomalyJob::detectorPartitionMap return m_Detectors; } -const CAnomalyJob::TAnomalyDetectorPtr& CAnomalyJob::detectorForKey(bool isRestoring, - core_t::TTime time, - const model::CSearchKey& key, - const std::string& partitionFieldValue, - model::CResourceMonitor& resourceMonitor) { +const CAnomalyJob::TAnomalyDetectorPtr& +CAnomalyJob::detectorForKey(bool isRestoring, + core_t::TTime time, + const model::CSearchKey& key, + const std::string& partitionFieldValue, + model::CResourceMonitor& resourceMonitor) { // The simple count detector always lives in a special null partition. const std::string& partition = key.isSimpleCount() ? EMPTY_STRING : partitionFieldValue; // Try and get the detector. - auto itr = m_Detectors.find( - model::CSearchKey::TStrCRefKeyCRefPr(boost::cref(partition), boost::cref(key)), model::CStrKeyPrHash(), model::CStrKeyPrEqual()); + auto itr = m_Detectors.find(model::CSearchKey::TStrCRefKeyCRefPr( + boost::cref(partition), boost::cref(key)), + model::CStrKeyPrHash(), model::CStrKeyPrEqual()); // Check if we need to and are allowed to create a new detector. if (itr == m_Detectors.end() && resourceMonitor.areAllocationsAllowed()) { // Create an placeholder for the anomaly detector. model::CAnomalyDetector::TAnomalyDetectorPtr& detector = - m_Detectors.emplace(model::CSearchKey::TStrKeyPr(partition, key), TAnomalyDetectorPtr()).first->second; + m_Detectors + .emplace(model::CSearchKey::TStrKeyPr(partition, key), TAnomalyDetectorPtr()) + .first->second; - LOG_TRACE(<< "Creating new detector for key '" << key.debug() << '/' << partition << '\'' << ", time " << time); + LOG_TRACE(<< "Creating new detector for key '" << key.debug() << '/' + << partition << '\'' << ", time " << time); LOG_TRACE(<< "Detector count " << m_Detectors.size()) - detector = this->makeDetector(key.identifier(), m_ModelConfig, m_Limits, partition, time, m_ModelConfig.factory(key)); + detector = this->makeDetector(key.identifier(), m_ModelConfig, m_Limits, + partition, time, m_ModelConfig.factory(key)); if (detector == nullptr) { // This should never happen as CAnomalyDetectorUtils::makeDetector() // contracts to never return NULL - LOG_ABORT(<< "Failed to create anomaly detector for key '" << key.debug() << '\''); + LOG_ABORT(<< "Failed to create anomaly detector for key '" + << key.debug() << '\''); } detector->zeroModelsToTime(time - m_ModelConfig.latency()); @@ -1352,7 +1411,8 @@ const CAnomalyJob::TAnomalyDetectorPtr& CAnomalyJob::detectorForKey(bool isResto } return detector; } else if (itr == m_Detectors.end()) { - LOG_TRACE(<< "No memory to create new detector for key '" << key.debug() << '/' << partition << '\''); + LOG_TRACE(<< "No memory to create new detector for key '" << key.debug() + << '/' << partition << '\''); return NULL_DETECTOR; } @@ -1365,24 +1425,28 @@ void CAnomalyJob::pruneAllModels() { for (const auto& detector_ : m_Detectors) { model::CAnomalyDetector* detector = detector_.second.get(); if (detector == nullptr) { - LOG_ERROR(<< "Unexpected NULL pointer for key '" << pairDebug(detector_.first) << '\''); + LOG_ERROR(<< "Unexpected NULL pointer for key '" + << pairDebug(detector_.first) << '\''); continue; } detector->pruneModels(); } } -model::CAnomalyDetector::TAnomalyDetectorPtr CAnomalyJob::makeDetector(int identifier, - const model::CAnomalyDetectorModelConfig& modelConfig, - model::CLimits& limits, - const std::string& partitionFieldValue, - core_t::TTime firstTime, - const model::CAnomalyDetector::TModelFactoryCPtr& modelFactory) { +model::CAnomalyDetector::TAnomalyDetectorPtr +CAnomalyJob::makeDetector(int identifier, + const model::CAnomalyDetectorModelConfig& modelConfig, + model::CLimits& limits, + const std::string& partitionFieldValue, + core_t::TTime firstTime, + const model::CAnomalyDetector::TModelFactoryCPtr& modelFactory) { return modelFactory->isSimpleCount() ? boost::make_shared( - identifier, modelFactory->summaryMode(), modelConfig, boost::ref(limits), partitionFieldValue, firstTime, modelFactory) + identifier, modelFactory->summaryMode(), modelConfig, + boost::ref(limits), partitionFieldValue, firstTime, modelFactory) : boost::make_shared( - identifier, boost::ref(limits), modelConfig, partitionFieldValue, firstTime, modelFactory); + identifier, boost::ref(limits), modelConfig, + partitionFieldValue, firstTime, modelFactory); } void CAnomalyJob::populateDetectorKeys(const CFieldConfig& fieldConfig, TKeyVec& keys) { @@ -1392,25 +1456,25 @@ void CAnomalyJob::populateDetectorKeys(const CFieldConfig& fieldConfig, TKeyVec& keys.push_back(model::CSearchKey::simpleCountKey()); for (const auto& fieldOptions : fieldConfig.fieldOptions()) { - keys.emplace_back(fieldOptions.configKey(), - fieldOptions.function(), - fieldOptions.useNull(), - fieldOptions.excludeFrequent(), - fieldOptions.fieldName(), - fieldOptions.byFieldName(), - fieldOptions.overFieldName(), - fieldOptions.partitionFieldName(), + keys.emplace_back(fieldOptions.configKey(), fieldOptions.function(), + fieldOptions.useNull(), fieldOptions.excludeFrequent(), + fieldOptions.fieldName(), fieldOptions.byFieldName(), + fieldOptions.overFieldName(), fieldOptions.partitionFieldName(), fieldConfig.influencerFieldNames()); } } -const std::string* CAnomalyJob::fieldValue(const std::string& fieldName, const TStrStrUMap& dataRowFields) { - TStrStrUMapCItr itr = fieldName.empty() ? dataRowFields.end() : dataRowFields.find(fieldName); +const std::string* CAnomalyJob::fieldValue(const std::string& fieldName, + const TStrStrUMap& dataRowFields) { + TStrStrUMapCItr itr = fieldName.empty() ? dataRowFields.end() + : dataRowFields.find(fieldName); const std::string& fieldValue(itr == dataRowFields.end() ? EMPTY_STRING : itr->second); return !fieldName.empty() && fieldValue.empty() ? nullptr : &fieldValue; } -void CAnomalyJob::addRecord(const TAnomalyDetectorPtr detector, core_t::TTime time, const TStrStrUMap& dataRowFields) { +void CAnomalyJob::addRecord(const TAnomalyDetectorPtr detector, + core_t::TTime time, + const TStrStrUMap& dataRowFields) { model::CAnomalyDetector::TStrCPtrVec fieldValues; const TStrVec& fieldNames = detector->fieldsOfInterest(); fieldValues.reserve(fieldNames.size()); @@ -1421,20 +1485,17 @@ void CAnomalyJob::addRecord(const TAnomalyDetectorPtr detector, core_t::TTime ti detector->addRecord(time, fieldValues); } -CAnomalyJob::SBackgroundPersistArgs::SBackgroundPersistArgs(const model::CResultsQueue& resultsQueue, - const TModelPlotDataVecQueue& modelPlotQueue, - core_t::TTime time, - const model::CResourceMonitor::SResults& modelSizeStats, - const model::CHierarchicalResultsAggregator& aggregator, - core_t::TTime latestRecordTime, - core_t::TTime lastResultsTime) - : s_ResultsQueue(resultsQueue), - s_ModelPlotQueue(modelPlotQueue), - s_Time(time), - s_ModelSizeStats(modelSizeStats), - s_Aggregator(aggregator), - s_LatestRecordTime(latestRecordTime), - s_LastResultsTime(lastResultsTime) { +CAnomalyJob::SBackgroundPersistArgs::SBackgroundPersistArgs( + const model::CResultsQueue& resultsQueue, + const TModelPlotDataVecQueue& modelPlotQueue, + core_t::TTime time, + const model::CResourceMonitor::SResults& modelSizeStats, + const model::CHierarchicalResultsAggregator& aggregator, + core_t::TTime latestRecordTime, + core_t::TTime lastResultsTime) + : s_ResultsQueue(resultsQueue), s_ModelPlotQueue(modelPlotQueue), + s_Time(time), s_ModelSizeStats(modelSizeStats), s_Aggregator(aggregator), + s_LatestRecordTime(latestRecordTime), s_LastResultsTime(lastResultsTime) { } } } diff --git a/lib/api/CBackgroundPersister.cc b/lib/api/CBackgroundPersister.cc index 26bd65406f..2fcda6ad53 100644 --- a/lib/api/CBackgroundPersister.cc +++ b/lib/api/CBackgroundPersister.cc @@ -19,13 +19,11 @@ namespace { const core_t::TTime PERSIST_INTERVAL_INCREMENT(300); // 5 minutes } -CBackgroundPersister::CBackgroundPersister(core_t::TTime periodicPersistInterval, core::CDataAdder& dataAdder) +CBackgroundPersister::CBackgroundPersister(core_t::TTime periodicPersistInterval, + core::CDataAdder& dataAdder) : m_PeriodicPersistInterval(periodicPersistInterval), - m_LastPeriodicPersistTime(core::CTimeUtils::now()), - m_DataAdder(dataAdder), - m_IsBusy(false), - m_IsShutdown(false), - m_BackgroundThread(*this) { + m_LastPeriodicPersistTime(core::CTimeUtils::now()), m_DataAdder(dataAdder), + m_IsBusy(false), m_IsShutdown(false), m_BackgroundThread(*this) { if (m_PeriodicPersistInterval < PERSIST_INTERVAL_INCREMENT) { // This may be dynamically increased further depending on how long // persistence takes @@ -39,9 +37,7 @@ CBackgroundPersister::CBackgroundPersister(core_t::TTime periodicPersistInterval : m_PeriodicPersistInterval(periodicPersistInterval), m_LastPeriodicPersistTime(core::CTimeUtils::now()), m_FirstProcessorPeriodicPersistFunc(firstProcessorPeriodicPersistFunc), - m_DataAdder(dataAdder), - m_IsBusy(false), - m_IsShutdown(false), + m_DataAdder(dataAdder), m_IsBusy(false), m_IsShutdown(false), m_BackgroundThread(*this) { if (m_PeriodicPersistInterval < PERSIST_INTERVAL_INCREMENT) { // This may be dynamically increased further depending on how long @@ -131,7 +127,8 @@ bool CBackgroundPersister::clear() { return true; } -bool CBackgroundPersister::firstProcessorPeriodicPersistFunc(const TFirstProcessorPeriodicPersistFunc& firstProcessorPeriodicPersistFunc) { +bool CBackgroundPersister::firstProcessorPeriodicPersistFunc( + const TFirstProcessorPeriodicPersistFunc& firstProcessorPeriodicPersistFunc) { core::CScopedFastLock lock(m_Mutex); if (this->isBusy()) { @@ -164,7 +161,8 @@ bool CBackgroundPersister::startBackgroundPersistIfAppropriate() { m_PeriodicPersistInterval += PERSIST_INTERVAL_INCREMENT; LOG_WARN(<< "Periodic persist is due at " << due << " but previous persist started at " - << core::CTimeUtils::toIso8601(m_LastPeriodicPersistTime) << " is still in progress - increased persistence interval to " + << core::CTimeUtils::toIso8601(m_LastPeriodicPersistTime) + << " is still in progress - increased persistence interval to " << m_PeriodicPersistInterval << " seconds"); return false; @@ -196,7 +194,8 @@ bool CBackgroundPersister::startBackgroundPersist(core_t::TTime timeOfPersistenc return true; } -CBackgroundPersister::CBackgroundThread::CBackgroundThread(CBackgroundPersister& owner) : m_Owner(owner) { +CBackgroundPersister::CBackgroundThread::CBackgroundThread(CBackgroundPersister& owner) + : m_Owner(owner) { } void CBackgroundPersister::CBackgroundThread::run() { diff --git a/lib/api/CBaseTokenListDataTyper.cc b/lib/api/CBaseTokenListDataTyper.cc index 6d17d5f815..bf0be6cb46 100644 --- a/lib/api/CBaseTokenListDataTyper.cc +++ b/lib/api/CBaseTokenListDataTyper.cc @@ -40,29 +40,32 @@ const std::string EMPTY_STRING; CBaseTokenListDataTyper::CBaseTokenListDataTyper(const TTokenListReverseSearchCreatorIntfCPtr& reverseSearchCreator, double threshold, const std::string& fieldName) - : CDataTyper(fieldName), - m_ReverseSearchCreator(reverseSearchCreator), + : CDataTyper(fieldName), m_ReverseSearchCreator(reverseSearchCreator), m_LowerThreshold(std::min(0.99, std::max(0.01, threshold))), // Upper threshold is half way between the lower threshold and 1 - m_UpperThreshold((1.0 + m_LowerThreshold) / 2.0), - m_HasChanged(false) { + m_UpperThreshold((1.0 + m_LowerThreshold) / 2.0), m_HasChanged(false) { } void CBaseTokenListDataTyper::dumpStats() const { // Type number is vector index plus one int typeNum(1); for (const auto& type : m_Types) { - LOG_DEBUG(<< "Type=" << typeNum << '-' << type.numMatches() << ' ' << type.baseString()); + LOG_DEBUG(<< "Type=" << typeNum << '-' << type.numMatches() << ' ' + << type.baseString()); ++typeNum; } } -int CBaseTokenListDataTyper::computeType(bool isDryRun, const TStrStrUMap& fields, const std::string& str, size_t rawStringLen) { +int CBaseTokenListDataTyper::computeType(bool isDryRun, + const TStrStrUMap& fields, + const std::string& str, + size_t rawStringLen) { // First tokenise string size_t workWeight(0); auto preTokenisedIter = fields.find(PRETOKENISED_TOKEN_FIELD); if (preTokenisedIter != fields.end()) { - if (this->addPretokenisedTokens(preTokenisedIter->second, m_WorkTokenIds, m_WorkTokenUniqueIds, workWeight) == false) { + if (this->addPretokenisedTokens(preTokenisedIter->second, m_WorkTokenIds, + m_WorkTokenUniqueIds, workWeight) == false) { return -1; } } else { @@ -78,7 +81,8 @@ int CBaseTokenListDataTyper::computeType(bool isDryRun, const TStrStrUMap& field // we've seen for them TSizeSizePrListItr bestSoFarIter(m_TypesByCount.end()); double bestSoFarSimilarity(m_LowerThreshold); - for (TSizeSizePrListItr iter = m_TypesByCount.begin(); iter != m_TypesByCount.end(); ++iter) { + for (TSizeSizePrListItr iter = m_TypesByCount.begin(); + iter != m_TypesByCount.end(); ++iter) { const CTokenListType& compType = m_Types[iter->second]; const TSizeSizePrVec& baseTokenIds = compType.baseTokenIds(); size_t baseWeight(compType.baseWeight()); @@ -88,7 +92,8 @@ int CBaseTokenListDataTyper::computeType(bool isDryRun, const TStrStrUMap& field // further checks. The first condition here ensures that we never say // a string with tokens matches the reverse search of a string with no // tokens (which the other criteria alone might say matched). - bool matchesSearch((baseWeight == 0) == (workWeight == 0) && compType.maxMatchingStringLen() >= rawStringLen && + bool matchesSearch((baseWeight == 0) == (workWeight == 0) && + compType.maxMatchingStringLen() >= rawStringLen && compType.isMissingCommonTokenWeightZero(m_WorkTokenUniqueIds) && compType.containsCommonTokensInOrder(m_WorkTokenIds)); if (!matchesSearch) { @@ -102,8 +107,10 @@ int CBaseTokenListDataTyper::computeType(bool isDryRun, const TStrStrUMap& field // reduce the number of unique common tokens size_t origUniqueTokenWeight(compType.origUniqueTokenWeight()); size_t commonUniqueTokenWeight(compType.commonUniqueTokenWeight()); - size_t missingCommonTokenWeight(compType.missingCommonTokenWeight(m_WorkTokenUniqueIds)); - double proportionOfOrig(double(commonUniqueTokenWeight - missingCommonTokenWeight) / double(origUniqueTokenWeight)); + size_t missingCommonTokenWeight( + compType.missingCommonTokenWeight(m_WorkTokenUniqueIds)); + double proportionOfOrig(double(commonUniqueTokenWeight - missingCommonTokenWeight) / + double(origUniqueTokenWeight)); if (proportionOfOrig < m_LowerThreshold) { continue; } @@ -117,13 +124,15 @@ int CBaseTokenListDataTyper::computeType(bool isDryRun, const TStrStrUMap& field if (similarity <= m_LowerThreshold) { // Not an ideal situation, but log at trace level to avoid // excessive log file spam - LOG_TRACE(<< "Reverse search match below threshold : " << similarity << '-' << compType.baseString() << '|' << str); + LOG_TRACE(<< "Reverse search match below threshold : " << similarity + << '-' << compType.baseString() << '|' << str); } // This is a strong match, so accept it immediately and stop // looking for better matches - use vector index plus one as type int type(1 + int(iter->second)); - this->addTypeMatch(isDryRun, str, rawStringLen, m_WorkTokenIds, m_WorkTokenUniqueIds, similarity, iter); + this->addTypeMatch(isDryRun, str, rawStringLen, m_WorkTokenIds, + m_WorkTokenUniqueIds, similarity, iter); return type; } @@ -143,12 +152,14 @@ int CBaseTokenListDataTyper::computeType(bool isDryRun, const TStrStrUMap& field if (bestSoFarIter != m_TypesByCount.end()) { // Return the best match - use vector index plus one as type int type(1 + int(bestSoFarIter->second)); - this->addTypeMatch(isDryRun, str, rawStringLen, m_WorkTokenIds, m_WorkTokenUniqueIds, bestSoFarSimilarity, bestSoFarIter); + this->addTypeMatch(isDryRun, str, rawStringLen, m_WorkTokenIds, + m_WorkTokenUniqueIds, bestSoFarSimilarity, bestSoFarIter); return type; } // If we get here we haven't matched, so create a new type - CTokenListType obj(isDryRun, str, rawStringLen, m_WorkTokenIds, workWeight, m_WorkTokenUniqueIds); + CTokenListType obj(isDryRun, str, rawStringLen, m_WorkTokenIds, workWeight, + m_WorkTokenUniqueIds); m_TypesByCount.push_back(TSizeSizePr(1, m_Types.size())); m_Types.push_back(obj); m_HasChanged = true; @@ -206,8 +217,9 @@ bool CBaseTokenListDataTyper::createReverseSearch(int type, const TSizeSizePrVec& commonUniqueTokenIds = typeObj.commonUniqueTokenIds(); if (commonUniqueTokenIds.empty()) { // There's quite a high chance this call will return false - if (m_ReverseSearchCreator->createNoUniqueTokenSearch(type, typeObj.baseString(), typeObj.maxMatchingStringLen(), part1, part2) == - false) { + if (m_ReverseSearchCreator->createNoUniqueTokenSearch( + type, typeObj.baseString(), typeObj.maxMatchingStringLen(), + part1, part2) == false) { // More detail should have been logged by the failed call LOG_ERROR(<< "Could not create reverse search"); @@ -231,10 +243,12 @@ bool CBaseTokenListDataTyper::createReverseSearch(int type, size_t lowestCost(std::numeric_limits::max()); for (const auto& commonUniqueTokenId : commonUniqueTokenIds) { size_t tokenId(commonUniqueTokenId.first); - size_t occurrences(std::count_if(baseTokenIds.begin(), baseTokenIds.end(), CSizePairFirstElementEquals(tokenId))); + size_t occurrences(std::count_if(baseTokenIds.begin(), baseTokenIds.end(), + CSizePairFirstElementEquals(tokenId))); const CTokenInfoItem& info = m_TokenIdLookup[tokenId]; size_t cost(m_ReverseSearchCreator->costOfToken(info.str(), occurrences)); - rareIdsWithCost.insert(TSizeSizeSizePrMMap::value_type(info.typeCount(), TSizeSizePr(tokenId, cost))); + rareIdsWithCost.insert(TSizeSizeSizePrMMap::value_type( + info.typeCount(), TSizeSizePr(tokenId, cost))); lowestCost = std::min(cost, lowestCost); } @@ -242,7 +256,8 @@ bool CBaseTokenListDataTyper::createReverseSearch(int type, TSizeSet costedCommonUniqueTokenIds; size_t cheapestCost(std::numeric_limits::max()); auto cheapestIter = rareIdsWithCost.end(); - for (auto iter = rareIdsWithCost.begin(); iter != rareIdsWithCost.end() && availableCost > lowestCost; ++iter) { + for (auto iter = rareIdsWithCost.begin(); + iter != rareIdsWithCost.end() && availableCost > lowestCost; ++iter) { if (iter->second.second < cheapestCost) { cheapestCost = iter->second.second; cheapestIter = iter; @@ -266,7 +281,8 @@ bool CBaseTokenListDataTyper::createReverseSearch(int type, } else { LOG_ERROR(<< "No token was short enough to include in reverse search " "for " - << type << " - cheapest token was " << cheapestIter->second.first << " with cost " << cheapestCost); + << type << " - cheapest token was " + << cheapestIter->second.first << " with cost " << cheapestCost); } part1.clear(); @@ -278,18 +294,22 @@ bool CBaseTokenListDataTyper::createReverseSearch(int type, // If we get here we're going to create a search in the standard way - there // shouldn't be any more errors after this point - m_ReverseSearchCreator->initStandardSearch(type, typeObj.baseString(), typeObj.maxMatchingStringLen(), part1, part2); + m_ReverseSearchCreator->initStandardSearch( + type, typeObj.baseString(), typeObj.maxMatchingStringLen(), part1, part2); for (auto costedCommonUniqueTokenId : costedCommonUniqueTokenIds) { - m_ReverseSearchCreator->addCommonUniqueToken(m_TokenIdLookup[costedCommonUniqueTokenId].str(), part1, part2); + m_ReverseSearchCreator->addCommonUniqueToken( + m_TokenIdLookup[costedCommonUniqueTokenId].str(), part1, part2); } bool first(true); size_t end(typeObj.outOfOrderCommonTokenIndex()); for (size_t index = 0; index < end; ++index) { size_t tokenId(baseTokenIds[index].first); - if (costedCommonUniqueTokenIds.find(tokenId) != costedCommonUniqueTokenIds.end()) { - m_ReverseSearchCreator->addInOrderCommonToken(m_TokenIdLookup[tokenId].str(), first, part1, part2); + if (costedCommonUniqueTokenIds.find(tokenId) != + costedCommonUniqueTokenIds.end()) { + m_ReverseSearchCreator->addInOrderCommonToken( + m_TokenIdLookup[tokenId].str(), first, part1, part2); first = false; } } @@ -333,7 +353,8 @@ bool CBaseTokenListDataTyper::acceptRestoreTraverser(core::CStateRestoreTraverse m_TokenIdLookup.push_back(CTokenInfoItem(traverser.value(), nextIndex)); } else if (name == TOKEN_TYPE_COUNT_TAG) { if (m_TokenIdLookup.empty()) { - LOG_ERROR(<< "Token type count precedes token string in " << traverser.value()); + LOG_ERROR(<< "Token type count precedes token string in " + << traverser.value()); return false; } @@ -374,12 +395,14 @@ void CBaseTokenListDataTyper::acceptPersistInserter(const TTokenMIndex& tokenIdL } for (const CTokenListType& type : types) { - inserter.insertLevel(TYPE_TAG, boost::bind(&CTokenListType::acceptPersistInserter, &type, _1)); + inserter.insertLevel( + TYPE_TAG, boost::bind(&CTokenListType::acceptPersistInserter, &type, _1)); } } CDataTyper::TPersistFunc CBaseTokenListDataTyper::makePersistFunc() const { - return boost::bind(&CBaseTokenListDataTyper::acceptPersistInserter, m_TokenIdLookup, m_Types, _1); + return boost::bind(&CBaseTokenListDataTyper::acceptPersistInserter, + m_TokenIdLookup, m_Types, _1); } void CBaseTokenListDataTyper::addTypeMatch(bool isDryRun, @@ -389,7 +412,8 @@ void CBaseTokenListDataTyper::addTypeMatch(bool isDryRun, const TSizeSizeMap& tokenUniqueIds, double similarity, TSizeSizePrListItr& iter) { - if (m_Types[iter->second].addString(isDryRun, str, rawStringLen, tokenIds, tokenUniqueIds, similarity) == true) { + if (m_Types[iter->second].addString(isDryRun, str, rawStringLen, tokenIds, + tokenUniqueIds, similarity) == true) { m_HasChanged = true; } @@ -480,7 +504,8 @@ bool CBaseTokenListDataTyper::addPretokenisedTokens(const std::string& tokensCsv return true; } -CBaseTokenListDataTyper::CTokenInfoItem::CTokenInfoItem(const std::string& str, size_t index) : m_Str(str), m_Index(index), m_TypeCount(0) { +CBaseTokenListDataTyper::CTokenInfoItem::CTokenInfoItem(const std::string& str, size_t index) + : m_Str(str), m_Index(index), m_TypeCount(0) { } const std::string& CBaseTokenListDataTyper::CTokenInfoItem::str() const { @@ -503,15 +528,20 @@ void CBaseTokenListDataTyper::CTokenInfoItem::incTypeCount() { ++m_TypeCount; } -CBaseTokenListDataTyper::CSizePairFirstElementEquals::CSizePairFirstElementEquals(size_t value) : m_Value(value) { +CBaseTokenListDataTyper::CSizePairFirstElementEquals::CSizePairFirstElementEquals(size_t value) + : m_Value(value) { } -CBaseTokenListDataTyper::SIdTranslater::SIdTranslater(const CBaseTokenListDataTyper& typer, const TSizeSizePrVec& tokenIds, char separator) +CBaseTokenListDataTyper::SIdTranslater::SIdTranslater(const CBaseTokenListDataTyper& typer, + const TSizeSizePrVec& tokenIds, + char separator) : s_Typer(typer), s_TokenIds(tokenIds), s_Separator(separator) { } -std::ostream& operator<<(std::ostream& strm, const CBaseTokenListDataTyper::SIdTranslater& translator) { - for (auto iter = translator.s_TokenIds.begin(); iter != translator.s_TokenIds.end(); ++iter) { +std::ostream& operator<<(std::ostream& strm, + const CBaseTokenListDataTyper::SIdTranslater& translator) { + for (auto iter = translator.s_TokenIds.begin(); + iter != translator.s_TokenIds.end(); ++iter) { if (iter != translator.s_TokenIds.begin()) { strm << translator.s_Separator; } diff --git a/lib/api/CBenchMarker.cc b/lib/api/CBenchMarker.cc index 1cf660eebd..9ae6774d2d 100644 --- a/lib/api/CBenchMarker.cc +++ b/lib/api/CBenchMarker.cc @@ -51,7 +51,8 @@ bool CBenchMarker::init(const std::string& regexFilename) { void CBenchMarker::addResult(const std::string& message, int type) { bool scored(false); size_t position(0); - for (TRegexIntSizeStrPrMapPrVecItr measureVecIter = m_Measures.begin(); measureVecIter != m_Measures.end(); ++measureVecIter) { + for (TRegexIntSizeStrPrMapPrVecItr measureVecIter = m_Measures.begin(); + measureVecIter != m_Measures.end(); ++measureVecIter) { const core::CRegex& regex = measureVecIter->first; if (regex.search(message, position) == true) { TIntSizeStrPrMap& counts = measureVecIter->second; @@ -77,17 +78,21 @@ void CBenchMarker::addResult(const std::string& message, int type) { void CBenchMarker::dumpResults() const { // Sort the results in descending order of actual type occurrence using TSizeRegexIntSizeStrPrMapPrVecCItrPr = std::pair; - using TSizeRegexIntSizeStrPrMapPrVecCItrPrVec = std::vector; - using TSizeRegexIntSizeStrPrMapPrVecCItrPrVecCItr = TSizeRegexIntSizeStrPrMapPrVecCItrPrVec::const_iterator; + using TSizeRegexIntSizeStrPrMapPrVecCItrPrVec = + std::vector; + using TSizeRegexIntSizeStrPrMapPrVecCItrPrVecCItr = + TSizeRegexIntSizeStrPrMapPrVecCItrPrVec::const_iterator; TSizeRegexIntSizeStrPrMapPrVecCItrPrVec sortVec; sortVec.reserve(m_Measures.size()); - for (TRegexIntSizeStrPrMapPrVecCItr measureVecIter = m_Measures.begin(); measureVecIter != m_Measures.end(); ++measureVecIter) { + for (TRegexIntSizeStrPrMapPrVecCItr measureVecIter = m_Measures.begin(); + measureVecIter != m_Measures.end(); ++measureVecIter) { const TIntSizeStrPrMap& counts = measureVecIter->second; size_t total(0); - for (TIntSizeStrPrMapCItr mapIter = counts.begin(); mapIter != counts.end(); ++mapIter) { + for (TIntSizeStrPrMapCItr mapIter = counts.begin(); + mapIter != counts.end(); ++mapIter) { total += mapIter->second.first; } @@ -95,7 +100,8 @@ void CBenchMarker::dumpResults() const { } // Sort descending - using TGreaterSizeRegexIntSizeStrPrMapPrVecCItrPr = std::greater; + using TGreaterSizeRegexIntSizeStrPrMapPrVecCItrPr = + std::greater; TGreaterSizeRegexIntSizeStrPrMapPrVecCItrPr comp; std::sort(sortVec.begin(), sortVec.end(), comp); @@ -110,7 +116,8 @@ void CBenchMarker::dumpResults() const { // Iterate backwards through the sorted vector, so that the most common // actual types are looked at first - for (TSizeRegexIntSizeStrPrMapPrVecCItrPrVecCItr sortedVecIter = sortVec.begin(); sortedVecIter != sortVec.end(); ++sortedVecIter) { + for (TSizeRegexIntSizeStrPrMapPrVecCItrPrVecCItr sortedVecIter = sortVec.begin(); + sortedVecIter != sortVec.end(); ++sortedVecIter) { size_t total(sortedVecIter->first); if (total > 0) { ++observedActuals; @@ -119,17 +126,20 @@ void CBenchMarker::dumpResults() const { TRegexIntSizeStrPrMapPrVecCItr measureVecIter = sortedVecIter->second; const core::CRegex& regex = measureVecIter->first; - strm << "Manual category defined by regex " << regex.str() << core_t::LINE_ENDING << "\tNumber of messages in manual category " + strm << "Manual category defined by regex " << regex.str() + << core_t::LINE_ENDING << "\tNumber of messages in manual category " << total << core_t::LINE_ENDING; const TIntSizeStrPrMap& counts = measureVecIter->second; - strm << "\tNumber of Ml categories that include this manual category " << counts.size() << core_t::LINE_ENDING; + strm << "\tNumber of Ml categories that include this manual category " + << counts.size() << core_t::LINE_ENDING; if (counts.size() == 1) { size_t count(counts.begin()->second.first); int type(counts.begin()->first); if (usedTypes.find(type) != usedTypes.end()) { - strm << "\t\t" << count << "\t(CATEGORY ALREADY USED)\t" << counts.begin()->second.second << core_t::LINE_ENDING; + strm << "\t\t" << count << "\t(CATEGORY ALREADY USED)\t" + << counts.begin()->second.second << core_t::LINE_ENDING; } else { good += count; usedTypes.insert(type); @@ -142,7 +152,8 @@ void CBenchMarker::dumpResults() const { // are bad. size_t max(0); int maxType(-1); - for (TIntSizeStrPrMapCItr mapIter = counts.begin(); mapIter != counts.end(); ++mapIter) { + for (TIntSizeStrPrMapCItr mapIter = counts.begin(); + mapIter != counts.end(); ++mapIter) { int type(mapIter->first); size_t count(mapIter->second.first); @@ -165,11 +176,13 @@ void CBenchMarker::dumpResults() const { } } - strm << "Total number of messages passed to benchmarker " << m_TotalMessages << core_t::LINE_ENDING - << "Total number of scored messages " << m_ScoredMessages << core_t::LINE_ENDING - << "Number of scored messages correctly categorised by Ml " << good << core_t::LINE_ENDING - << "Overall accuracy for scored messages " << (double(good) / double(m_ScoredMessages)) * 100.0 << '%' << core_t::LINE_ENDING - << "Percentage of manual categories detected at all " << (double(usedTypes.size()) / double(observedActuals)) * 100.0 << '%'; + strm << "Total number of messages passed to benchmarker " << m_TotalMessages + << core_t::LINE_ENDING << "Total number of scored messages " << m_ScoredMessages + << core_t::LINE_ENDING << "Number of scored messages correctly categorised by Ml " + << good << core_t::LINE_ENDING << "Overall accuracy for scored messages " + << (double(good) / double(m_ScoredMessages)) * 100.0 << '%' + << core_t::LINE_ENDING << "Percentage of manual categories detected at all " + << (double(usedTypes.size()) / double(observedActuals)) * 100.0 << '%'; LOG_DEBUG(<< strm.str()); } diff --git a/lib/api/CCategoryExamplesCollector.cc b/lib/api/CCategoryExamplesCollector.cc index 52976bb458..ab6f347f1a 100644 --- a/lib/api/CCategoryExamplesCollector.cc +++ b/lib/api/CCategoryExamplesCollector.cc @@ -32,12 +32,15 @@ const std::string ELLIPSIS(3, '.'); const size_t CCategoryExamplesCollector::MAX_EXAMPLE_LENGTH(1000); -CCategoryExamplesCollector::CCategoryExamplesCollector(std::size_t maxExamples) : m_MaxExamples(maxExamples) { +CCategoryExamplesCollector::CCategoryExamplesCollector(std::size_t maxExamples) + : m_MaxExamples(maxExamples) { } -CCategoryExamplesCollector::CCategoryExamplesCollector(std::size_t maxExamples, core::CStateRestoreTraverser& traverser) +CCategoryExamplesCollector::CCategoryExamplesCollector(std::size_t maxExamples, + core::CStateRestoreTraverser& traverser) : m_MaxExamples(maxExamples) { - traverser.traverseSubLevel(boost::bind(&CCategoryExamplesCollector::acceptRestoreTraverser, this, _1)); + traverser.traverseSubLevel( + boost::bind(&CCategoryExamplesCollector::acceptRestoreTraverser, this, _1)); } bool CCategoryExamplesCollector::add(std::size_t category, const std::string& example) { @@ -56,7 +59,8 @@ std::size_t CCategoryExamplesCollector::numberOfExamplesForCategory(std::size_t return (iterator == m_ExamplesByCategory.end()) ? 0 : iterator->second.size(); } -const CCategoryExamplesCollector::TStrSet& CCategoryExamplesCollector::examples(std::size_t category) const { +const CCategoryExamplesCollector::TStrSet& +CCategoryExamplesCollector::examples(std::size_t category) const { auto iterator = m_ExamplesByCategory.find(category); if (iterator == m_ExamplesByCategory.end()) { return EMPTY_EXAMPLES; @@ -81,10 +85,10 @@ void CCategoryExamplesCollector::acceptPersistInserter(core::CStatePersistInsert std::sort(orderedData.begin(), orderedData.end()); for (const auto& exampleByCategory : orderedData) { - inserter.insertLevel( - EXAMPLES_BY_CATEGORY_TAG, - boost::bind( - &CCategoryExamplesCollector::persistExamples, this, exampleByCategory.first, boost::cref(*exampleByCategory.second), _1)); + inserter.insertLevel(EXAMPLES_BY_CATEGORY_TAG, + boost::bind(&CCategoryExamplesCollector::persistExamples, + this, exampleByCategory.first, + boost::cref(*exampleByCategory.second), _1)); } } @@ -102,7 +106,8 @@ bool CCategoryExamplesCollector::acceptRestoreTraverser(core::CStateRestoreTrave do { const std::string& name = traverser.name(); if (name == EXAMPLES_BY_CATEGORY_TAG) { - if (traverser.traverseSubLevel(boost::bind(&CCategoryExamplesCollector::restoreExamples, this, _1)) == false) { + if (traverser.traverseSubLevel(boost::bind( + &CCategoryExamplesCollector::restoreExamples, this, _1)) == false) { LOG_ERROR(<< "Error restoring examples by category"); return false; } @@ -127,7 +132,8 @@ bool CCategoryExamplesCollector::restoreExamples(core::CStateRestoreTraverser& t } } while (traverser.next()); - LOG_TRACE(<< "Restoring examples for category " << category << ": " << core::CContainerPrinter::print(examples)); + LOG_TRACE(<< "Restoring examples for category " << category << ": " + << core::CContainerPrinter::print(examples)); m_ExamplesByCategory[category].swap(examples); return true; @@ -142,7 +148,8 @@ std::string CCategoryExamplesCollector::truncateExample(std::string example) { size_t replacePos(MAX_EXAMPLE_LENGTH - ELLIPSIS.length()); // Ensure truncation doesn't result in a partial UTF-8 character - while (replacePos > 0 && core::CStringUtils::utf8ByteType(example[replacePos]) == -1) { + while (replacePos > 0 && + core::CStringUtils::utf8ByteType(example[replacePos]) == -1) { --replacePos; } example.replace(replacePos, example.length() - replacePos, ELLIPSIS); diff --git a/lib/api/CCmdSkeleton.cc b/lib/api/CCmdSkeleton.cc index ab7eb565b8..68b003dc7b 100644 --- a/lib/api/CCmdSkeleton.cc +++ b/lib/api/CCmdSkeleton.cc @@ -21,7 +21,8 @@ CCmdSkeleton::CCmdSkeleton(core::CDataSearcher* restoreSearcher, core::CDataAdder* persister, CInputParser& inputParser, CDataProcessor& processor) - : m_RestoreSearcher(restoreSearcher), m_Persister(persister), m_InputParser(inputParser), m_Processor(processor) { + : m_RestoreSearcher(restoreSearcher), m_Persister(persister), + m_InputParser(inputParser), m_Processor(processor) { } bool CCmdSkeleton::ioLoop() { @@ -35,7 +36,8 @@ bool CCmdSkeleton::ioLoop() { } } - if (m_InputParser.readStream(boost::bind(&CDataProcessor::handleRecord, &m_Processor, _1)) == false) { + if (m_InputParser.readStream(boost::bind(&CDataProcessor::handleRecord, + &m_Processor, _1)) == false) { LOG_FATAL(<< "Failed to handle all input data"); return false; } diff --git a/lib/api/CConfigUpdater.cc b/lib/api/CConfigUpdater.cc index 578e3eefd4..a1a51b9471 100644 --- a/lib/api/CConfigUpdater.cc +++ b/lib/api/CConfigUpdater.cc @@ -20,7 +20,8 @@ const std::string CConfigUpdater::RULES_JSON("rulesJson"); const std::string CConfigUpdater::FILTERS("filters"); const std::string CConfigUpdater::SCHEDULED_EVENTS("scheduledEvents"); -CConfigUpdater::CConfigUpdater(CFieldConfig& fieldConfig, model::CAnomalyDetectorModelConfig& modelConfig) +CConfigUpdater::CConfigUpdater(CFieldConfig& fieldConfig, + model::CAnomalyDetectorModelConfig& modelConfig) : m_FieldConfig(fieldConfig), m_ModelConfig(modelConfig) { } @@ -35,7 +36,8 @@ bool CConfigUpdater::update(const std::string& config) { return false; } - for (boost::property_tree::ptree::const_iterator stanzaItr = propTree.begin(); stanzaItr != propTree.end(); ++stanzaItr) { + for (boost::property_tree::ptree::const_iterator stanzaItr = propTree.begin(); + stanzaItr != propTree.end(); ++stanzaItr) { const std::string& stanzaName = stanzaItr->first; const boost::property_tree::ptree& subTree = stanzaItr->second; diff --git a/lib/api/CCsvInputParser.cc b/lib/api/CCsvInputParser.cc index b159d12b7d..c9f52034f1 100644 --- a/lib/api/CCsvInputParser.cc +++ b/lib/api/CCsvInputParser.cc @@ -24,24 +24,15 @@ const char CCsvInputParser::STRIP_BEFORE_END('\r'); const size_t CCsvInputParser::WORK_BUFFER_SIZE(131072); // 128kB CCsvInputParser::CCsvInputParser(const std::string& input, char separator) - : CInputParser(), - m_StringInputBuf(input), - m_StrmIn(m_StringInputBuf), - m_WorkBuffer(nullptr), - m_WorkBufferPtr(nullptr), - m_WorkBufferEnd(nullptr), - m_NoMoreRecords(false), - m_LineParser(separator) { + : CInputParser(), m_StringInputBuf(input), m_StrmIn(m_StringInputBuf), + m_WorkBuffer(nullptr), m_WorkBufferPtr(nullptr), m_WorkBufferEnd(nullptr), + m_NoMoreRecords(false), m_LineParser(separator) { } CCsvInputParser::CCsvInputParser(std::istream& strmIn, char separator) - : CInputParser(), - m_StrmIn(strmIn), - m_WorkBuffer(nullptr), - m_WorkBufferPtr(nullptr), - m_WorkBufferEnd(nullptr), - m_NoMoreRecords(false), - m_LineParser(separator) { + : CInputParser(), m_StrmIn(strmIn), m_WorkBuffer(nullptr), + m_WorkBufferPtr(nullptr), m_WorkBufferEnd(nullptr), + m_NoMoreRecords(false), m_LineParser(separator) { } const std::string& CCsvInputParser::fieldNameStr() const { @@ -138,7 +129,8 @@ bool CCsvInputParser::parseCsvRecordFromStream() { m_WorkBufferEnd = m_WorkBufferPtr + avail; } - const char* delimPtr(reinterpret_cast(::memchr(m_WorkBufferPtr, RECORD_END, avail))); + const char* delimPtr(reinterpret_cast( + ::memchr(m_WorkBufferPtr, RECORD_END, avail))); const char* endPtr(m_WorkBufferEnd); if (delimPtr != nullptr) { endPtr = delimPtr; @@ -233,8 +225,9 @@ bool CCsvInputParser::parseDataRecord(const TStrRefVec& fieldValRefs) { while (m_LineParser.parseNext(extraField) == true) { ++numExtraFields; } - LOG_ERROR(<< "Data record contains " << numExtraFields << " more fields than header:" << core_t::LINE_ENDING << m_CurrentRowStr - << core_t::LINE_ENDING << "and:" << core_t::LINE_ENDING << m_FieldNameStr); + LOG_ERROR(<< "Data record contains " << numExtraFields << " more fields than header:" + << core_t::LINE_ENDING << m_CurrentRowStr << core_t::LINE_ENDING + << "and:" << core_t::LINE_ENDING << m_FieldNameStr); return false; } @@ -244,12 +237,8 @@ bool CCsvInputParser::parseDataRecord(const TStrRefVec& fieldValRefs) { } CCsvInputParser::CCsvLineParser::CCsvLineParser(char separator) - : m_Separator(separator), - m_SeparatorAfterLastField(false), - m_Line(nullptr), - m_LineCurrent(nullptr), - m_LineEnd(nullptr), - m_WorkFieldEnd(nullptr), + : m_Separator(separator), m_SeparatorAfterLastField(false), m_Line(nullptr), + m_LineCurrent(nullptr), m_LineEnd(nullptr), m_WorkFieldEnd(nullptr), m_WorkFieldCapacity(0) { } @@ -292,7 +281,8 @@ bool CCsvInputParser::CCsvLineParser::parseNextToken(const char* end, const char if (current == end) { // Allow one empty token at the end of a line if (!m_SeparatorAfterLastField) { - LOG_ERROR(<< "Trying to read too many fields from record:" << core_t::LINE_ENDING << *m_Line); + LOG_ERROR(<< "Trying to read too many fields from record:" << core_t::LINE_ENDING + << *m_Line); return false; } m_SeparatorAfterLastField = false; diff --git a/lib/api/CCsvOutputWriter.cc b/lib/api/CCsvOutputWriter.cc index c305de2b13..76e845e7ee 100644 --- a/lib/api/CCsvOutputWriter.cc +++ b/lib/api/CCsvOutputWriter.cc @@ -20,11 +20,8 @@ const char CCsvOutputWriter::QUOTE('"'); const char CCsvOutputWriter::RECORD_END('\n'); CCsvOutputWriter::CCsvOutputWriter(bool outputMessages, bool outputHeader, char escape, char separator) - : m_StrmOut(m_StringOutputBuf), - m_OutputMessages(outputMessages), - m_OutputHeader(outputHeader), - m_Escape(escape), - m_Separator(separator) { + : m_StrmOut(m_StringOutputBuf), m_OutputMessages(outputMessages), + m_OutputHeader(outputHeader), m_Escape(escape), m_Separator(separator) { if (m_Separator == QUOTE || m_Separator == m_Escape || m_Separator == RECORD_END) { LOG_ERROR(<< "CSV output writer will not generate parsable output because " "separator character (" @@ -34,8 +31,13 @@ CCsvOutputWriter::CCsvOutputWriter(bool outputMessages, bool outputHeader, char } } -CCsvOutputWriter::CCsvOutputWriter(std::ostream& strmOut, bool outputMessages, bool outputHeader, char escape, char separator) - : m_StrmOut(strmOut), m_OutputMessages(outputMessages), m_OutputHeader(outputHeader), m_Escape(escape), m_Separator(separator) { +CCsvOutputWriter::CCsvOutputWriter(std::ostream& strmOut, + bool outputMessages, + bool outputHeader, + char escape, + char separator) + : m_StrmOut(strmOut), m_OutputMessages(outputMessages), + m_OutputHeader(outputHeader), m_Escape(escape), m_Separator(separator) { if (m_Separator == QUOTE || m_Separator == m_Escape || m_Separator == RECORD_END) { LOG_ERROR(<< "CSV output writer will not generate parsable output because " "separator character (" @@ -61,7 +63,8 @@ bool CCsvOutputWriter::fieldNames(const TStrVec& fieldNames, const TStrVec& extr // Only add extra field names if they're not already present for (TStrVecCItr iter = extraFieldNames.begin(); iter != extraFieldNames.end(); ++iter) { - if (std::find(m_FieldNames.begin(), m_FieldNames.end(), *iter) == m_FieldNames.end()) { + if (std::find(m_FieldNames.begin(), m_FieldNames.end(), *iter) == + m_FieldNames.end()) { m_FieldNames.push_back(*iter); } } @@ -95,7 +98,8 @@ bool CCsvOutputWriter::fieldNames(const TStrVec& fieldNames, const TStrVec& extr // Messages are output in arrears - this is not ideal - TODO if (m_OutputMessages) { - for (TStrStrPrSetCItr msgIter = m_Messages.begin(); msgIter != m_Messages.end(); ++msgIter) { + for (TStrStrPrSetCItr msgIter = m_Messages.begin(); + msgIter != m_Messages.end(); ++msgIter) { m_StrmOut << msgIter->first << '=' << msgIter->second << RECORD_END; LOG_DEBUG(<< "Forwarded " << msgIter->first << '=' << msgIter->second); } @@ -117,7 +121,8 @@ const COutputHandler::TStrVec& CCsvOutputWriter::fieldNames() const { return m_FieldNames; } -bool CCsvOutputWriter::writeRow(const TStrStrUMap& dataRowFields, const TStrStrUMap& overrideDataRowFields) { +bool CCsvOutputWriter::writeRow(const TStrStrUMap& dataRowFields, + const TStrStrUMap& overrideDataRowFields) { if (m_FieldNames.empty()) { LOG_ERROR(<< "Attempt to write data before field names"); return false; @@ -132,7 +137,8 @@ bool CCsvOutputWriter::writeRow(const TStrStrUMap& dataRowFields, const TStrStrU TStrVecCItr fieldNameIter = m_FieldNames.begin(); TPreComputedHashVecCItr preComputedHashIter = m_Hashes.begin(); - TStrStrUMapCItr fieldValueIter = overrideDataRowFields.find(*fieldNameIter, *preComputedHashIter, pred); + TStrStrUMapCItr fieldValueIter = + overrideDataRowFields.find(*fieldNameIter, *preComputedHashIter, pred); if (fieldValueIter == overrideDataRowFields.end()) { fieldValueIter = dataRowFields.find(*fieldNameIter, *preComputedHashIter, pred); if (fieldValueIter == dataRowFields.end()) { @@ -144,11 +150,13 @@ bool CCsvOutputWriter::writeRow(const TStrStrUMap& dataRowFields, const TStrStrU } this->appendField(fieldValueIter->second); - for (++fieldNameIter, ++preComputedHashIter; fieldNameIter != m_FieldNames.end() && preComputedHashIter != m_Hashes.end(); + for (++fieldNameIter, ++preComputedHashIter; + fieldNameIter != m_FieldNames.end() && preComputedHashIter != m_Hashes.end(); ++fieldNameIter, ++preComputedHashIter) { m_WorkRecord += m_Separator; - fieldValueIter = overrideDataRowFields.find(*fieldNameIter, *preComputedHashIter, pred); + fieldValueIter = overrideDataRowFields.find(*fieldNameIter, + *preComputedHashIter, pred); if (fieldValueIter == overrideDataRowFields.end()) { fieldValueIter = dataRowFields.find(*fieldNameIter, *preComputedHashIter, pred); if (fieldValueIter == dataRowFields.end()) { @@ -186,7 +194,8 @@ void CCsvOutputWriter::appendField(const std::string& field) { bool needOuterQuotes(false); for (std::string::const_iterator iter = field.begin(); iter != field.end(); ++iter) { char curChar(*iter); - if (curChar == m_Separator || curChar == QUOTE || curChar == RECORD_END || curChar == m_Escape) { + if (curChar == m_Separator || curChar == QUOTE || + curChar == RECORD_END || curChar == m_Escape) { needOuterQuotes = true; break; } diff --git a/lib/api/CDataProcessor.cc b/lib/api/CDataProcessor.cc index eca12db80a..bf5bc0640e 100644 --- a/lib/api/CDataProcessor.cc +++ b/lib/api/CDataProcessor.cc @@ -34,7 +34,8 @@ std::string CDataProcessor::debugPrintRecord(const TStrStrUMap& dataRowFields) { // We want to print the field names on one line, followed by the field // values on the next line - for (TStrStrUMapCItr rowIter = dataRowFields.begin(); rowIter != dataRowFields.end(); ++rowIter) { + for (TStrStrUMapCItr rowIter = dataRowFields.begin(); + rowIter != dataRowFields.end(); ++rowIter) { if (rowIter != dataRowFields.begin()) { fieldNames.push_back(','); fieldValues.push_back(','); diff --git a/lib/api/CDataTyper.cc b/lib/api/CDataTyper.cc index 13364db450..87691f31d1 100644 --- a/lib/api/CDataTyper.cc +++ b/lib/api/CDataTyper.cc @@ -11,7 +11,8 @@ namespace api { // Initialise statics const CDataTyper::TStrStrUMap CDataTyper::EMPTY_FIELDS; -CDataTyper::CDataTyper(const std::string& fieldName) : m_FieldName(fieldName), m_LastPersistTime(0) { +CDataTyper::CDataTyper(const std::string& fieldName) + : m_FieldName(fieldName), m_LastPersistTime(0) { } CDataTyper::~CDataTyper() { diff --git a/lib/api/CDetectionRulesJsonParser.cc b/lib/api/CDetectionRulesJsonParser.cc index 08f6f92f88..58c14de602 100644 --- a/lib/api/CDetectionRulesJsonParser.cc +++ b/lib/api/CDetectionRulesJsonParser.cc @@ -41,7 +41,8 @@ const std::string FIELD_VALUE("field_value"); const std::string FILTER_ID("filter_id"); } -CDetectionRulesJsonParser::CDetectionRulesJsonParser(TStrPatternSetUMap& filtersByIdMap) : m_FiltersByIdMap(filtersByIdMap) { +CDetectionRulesJsonParser::CDetectionRulesJsonParser(TStrPatternSetUMap& filtersByIdMap) + : m_FiltersByIdMap(filtersByIdMap) { } bool CDetectionRulesJsonParser::parseRules(const std::string& json, TDetectionRuleVec& rules) { @@ -50,7 +51,8 @@ bool CDetectionRulesJsonParser::parseRules(const std::string& json, TDetectionRu rules.clear(); rapidjson::Document doc; if (doc.Parse<0>(json.c_str()).HasParseError()) { - LOG_ERROR(<< "An error occurred while parsing detection rules from JSON: " << doc.GetParseError()); + LOG_ERROR(<< "An error occurred while parsing detection rules from JSON: " + << doc.GetParseError()); return false; } @@ -102,17 +104,20 @@ bool CDetectionRulesJsonParser::parseRules(const std::string& json, TDetectionRu return true; } -bool CDetectionRulesJsonParser::hasStringMember(const rapidjson::Value& object, const std::string& name) { +bool CDetectionRulesJsonParser::hasStringMember(const rapidjson::Value& object, + const std::string& name) { const char* nameAsCStr = name.c_str(); return object.HasMember(nameAsCStr) && object[nameAsCStr].IsString(); } -bool CDetectionRulesJsonParser::hasArrayMember(const rapidjson::Value& object, const std::string& name) { +bool CDetectionRulesJsonParser::hasArrayMember(const rapidjson::Value& object, + const std::string& name) { const char* nameAsCStr = name.c_str(); return object.HasMember(nameAsCStr) && object[nameAsCStr].IsArray(); } -bool CDetectionRulesJsonParser::parseRuleActions(const rapidjson::Value& ruleObject, model::CDetectionRule& rule) { +bool CDetectionRulesJsonParser::parseRuleActions(const rapidjson::Value& ruleObject, + model::CDetectionRule& rule) { if (!hasArrayMember(ruleObject, ACTIONS)) { LOG_ERROR(<< "Missing rule field: " << ACTIONS); return false; @@ -142,7 +147,8 @@ bool CDetectionRulesJsonParser::parseRuleActions(const rapidjson::Value& ruleObj return true; } -bool CDetectionRulesJsonParser::parseConditionsConnective(const rapidjson::Value& ruleObject, model::CDetectionRule& rule) { +bool CDetectionRulesJsonParser::parseConditionsConnective(const rapidjson::Value& ruleObject, + model::CDetectionRule& rule) { if (!hasStringMember(ruleObject, CONDITIONS_CONNECTIVE)) { LOG_ERROR(<< "Missing rule field: " << CONDITIONS_CONNECTIVE); return false; @@ -160,7 +166,8 @@ bool CDetectionRulesJsonParser::parseConditionsConnective(const rapidjson::Value return true; } -bool CDetectionRulesJsonParser::parseRuleConditions(const rapidjson::Value& ruleObject, model::CDetectionRule& rule) { +bool CDetectionRulesJsonParser::parseRuleConditions(const rapidjson::Value& ruleObject, + model::CDetectionRule& rule) { if (!hasArrayMember(ruleObject, CONDITIONS)) { LOG_ERROR(<< "Missing rule field: " << CONDITIONS); return false; @@ -208,7 +215,8 @@ bool CDetectionRulesJsonParser::parseRuleConditions(const rapidjson::Value& rule return true; } -bool CDetectionRulesJsonParser::parseFilterId(const rapidjson::Value& conditionObject, model::CRuleCondition& ruleCondition) { +bool CDetectionRulesJsonParser::parseFilterId(const rapidjson::Value& conditionObject, + model::CRuleCondition& ruleCondition) { if (!hasStringMember(conditionObject, FILTER_ID)) { LOG_ERROR(<< "Missing condition field: " << FILTER_ID); return false; @@ -223,7 +231,8 @@ bool CDetectionRulesJsonParser::parseFilterId(const rapidjson::Value& conditionO return true; } -bool CDetectionRulesJsonParser::parseRuleConditionType(const rapidjson::Value& ruleConditionObject, model::CRuleCondition& ruleCondition) { +bool CDetectionRulesJsonParser::parseRuleConditionType(const rapidjson::Value& ruleConditionObject, + model::CRuleCondition& ruleCondition) { if (!hasStringMember(ruleConditionObject, TYPE)) { LOG_ERROR(<< "Missing ruleCondition field: " << TYPE); return false; @@ -249,7 +258,8 @@ bool CDetectionRulesJsonParser::parseRuleConditionType(const rapidjson::Value& r return true; } -bool CDetectionRulesJsonParser::parseCondition(const rapidjson::Value& ruleConditionObject, model::CRuleCondition& ruleCondition) { +bool CDetectionRulesJsonParser::parseCondition(const rapidjson::Value& ruleConditionObject, + model::CRuleCondition& ruleCondition) { if (!ruleConditionObject.HasMember(CONDITION.c_str())) { LOG_ERROR(<< "Missing ruleCondition field: " << CONDITION); return false; @@ -260,10 +270,12 @@ bool CDetectionRulesJsonParser::parseCondition(const rapidjson::Value& ruleCondi return false; } - return parseConditionOperator(conditionObject, ruleCondition) && parseConditionThreshold(conditionObject, ruleCondition); + return parseConditionOperator(conditionObject, ruleCondition) && + parseConditionThreshold(conditionObject, ruleCondition); } -bool CDetectionRulesJsonParser::parseConditionOperator(const rapidjson::Value& conditionObject, model::CRuleCondition& ruleCondition) { +bool CDetectionRulesJsonParser::parseConditionOperator(const rapidjson::Value& conditionObject, + model::CRuleCondition& ruleCondition) { if (!hasStringMember(conditionObject, OPERATOR)) { LOG_ERROR(<< "Missing condition field: " << OPERATOR); return false; @@ -285,14 +297,16 @@ bool CDetectionRulesJsonParser::parseConditionOperator(const rapidjson::Value& c return true; } -bool CDetectionRulesJsonParser::parseConditionThreshold(const rapidjson::Value& conditionObject, model::CRuleCondition& ruleCondition) { +bool CDetectionRulesJsonParser::parseConditionThreshold(const rapidjson::Value& conditionObject, + model::CRuleCondition& ruleCondition) { if (!hasStringMember(conditionObject, VALUE)) { LOG_ERROR(<< "Missing condition field: " << VALUE); return false; } const std::string valueString = conditionObject[VALUE.c_str()].GetString(); - if (core::CStringUtils::stringToType(valueString, ruleCondition.condition().s_Threshold) == false) { + if (core::CStringUtils::stringToType( + valueString, ruleCondition.condition().s_Threshold) == false) { LOG_ERROR(<< "Invalid operator value: " << valueString); return false; } diff --git a/lib/api/CFieldConfig.cc b/lib/api/CFieldConfig.cc index 108f7ce9de..f677fd815c 100644 --- a/lib/api/CFieldConfig.cc +++ b/lib/api/CFieldConfig.cc @@ -138,7 +138,8 @@ const std::string CFieldConfig::CLEAR("clear"); CFieldConfig::CFieldConfig() { } -CFieldConfig::CFieldConfig(const std::string& categorizationFieldName) : m_CategorizationFieldName(categorizationFieldName) { +CFieldConfig::CFieldConfig(const std::string& categorizationFieldName) + : m_CategorizationFieldName(categorizationFieldName) { this->seenField(categorizationFieldName); } @@ -225,27 +226,33 @@ bool CFieldConfig::initFromFile(const std::string& configFile) { TIntSet handledConfigs; TIntSet handledScheduledEvents; - for (boost::property_tree::ptree::iterator level1Iter = propTree.begin(); level1Iter != propTree.end(); ++level1Iter) { + for (boost::property_tree::ptree::iterator level1Iter = propTree.begin(); + level1Iter != propTree.end(); ++level1Iter) { const std::string& level1Key = level1Iter->first; const std::string& value = level1Iter->second.data(); - if (level1Key.length() > DETECTOR_PREFIX.length() && level1Key.compare(0, DETECTOR_PREFIX.length(), DETECTOR_PREFIX) == 0) { + if (level1Key.length() > DETECTOR_PREFIX.length() && + level1Key.compare(0, DETECTOR_PREFIX.length(), DETECTOR_PREFIX) == 0) { if (this->processDetector(propTree, level1Key, value, handledConfigs) == false) { LOG_ERROR(<< "Error reading config file " << configFile); return false; } } else if (level1Key.length() > CATEGORIZATION_FILTER_PREFIX.length() && - level1Key.compare(0, CATEGORIZATION_FILTER_PREFIX.length(), CATEGORIZATION_FILTER_PREFIX) == 0) { + level1Key.compare(0, CATEGORIZATION_FILTER_PREFIX.length(), + CATEGORIZATION_FILTER_PREFIX) == 0) { this->addCategorizationFilter(value); } else if (level1Key.length() > INFLUENCER_PREFIX.length() && level1Key.compare(0, INFLUENCER_PREFIX.length(), INFLUENCER_PREFIX) == 0) { this->addInfluencerFieldName(value); - } else if (level1Key.length() > FILTER_PREFIX.length() && level1Key.compare(0, FILTER_PREFIX.length(), FILTER_PREFIX) == 0) { + } else if (level1Key.length() > FILTER_PREFIX.length() && + level1Key.compare(0, FILTER_PREFIX.length(), FILTER_PREFIX) == 0) { this->processFilter(level1Key, value); } else if (level1Key.length() > SCHEDULED_EVENT_PREFIX.length() && - level1Key.compare(0, SCHEDULED_EVENT_PREFIX.length(), SCHEDULED_EVENT_PREFIX) == 0) { + level1Key.compare(0, SCHEDULED_EVENT_PREFIX.length(), + SCHEDULED_EVENT_PREFIX) == 0) { this->processScheduledEvent(propTree, level1Key, value, handledScheduledEvents); } else { - LOG_ERROR(<< "Invalid setting " << level1Key << " = " << value << " in config file " << configFile); + LOG_ERROR(<< "Invalid setting " << level1Key << " = " << value + << " in config file " << configFile); return false; } } @@ -264,7 +271,8 @@ bool CFieldConfig::tokenise(const std::string& clause, TStrVec& copyTokens) { using TCharEscapedListSeparatorTokenizer = boost::tokenizer; TCharEscapedListSeparatorTokenizer tokenizer(clause, els); - for (TCharEscapedListSeparatorTokenizer::iterator iter = tokenizer.begin(); iter != tokenizer.end(); ++iter) { + for (TCharEscapedListSeparatorTokenizer::iterator iter = tokenizer.begin(); + iter != tokenizer.end(); ++iter) { const std::string& token = *iter; if (token.empty()) { // boost::escaped_list_separator creates empty tokens for @@ -312,14 +320,16 @@ void CFieldConfig::retokenise(const TStrVec& tokens, TStrVec& copyTokens) { } } -bool CFieldConfig::findLastByOverTokens(const TStrVec& copyTokens, std::size_t& lastByTokenIndex, std::size_t& lastOverTokenIndex) { +bool CFieldConfig::findLastByOverTokens(const TStrVec& copyTokens, + std::size_t& lastByTokenIndex, + std::size_t& lastOverTokenIndex) { for (size_t index = 0; index < copyTokens.size(); ++index) { if (copyTokens[index].length() == BY_TOKEN.length() && core::CStrCaseCmp::strCaseCmp(copyTokens[index].c_str(), BY_TOKEN.c_str()) == 0) { if (lastByTokenIndex != copyTokens.size()) { LOG_ERROR(<< "Multiple '" << copyTokens[lastByTokenIndex] << "' tokens in analysis clause - tokens " - << core::CStringUtils::typeToString(1 + lastByTokenIndex) << " and " - << core::CStringUtils::typeToString(1 + index)); + << core::CStringUtils::typeToString(1 + lastByTokenIndex) + << " and " << core::CStringUtils::typeToString(1 + index)); return false; } @@ -327,11 +337,12 @@ bool CFieldConfig::findLastByOverTokens(const TStrVec& copyTokens, std::size_t& } if (copyTokens[index].length() == OVER_TOKEN.length() && - core::CStrCaseCmp::strCaseCmp(copyTokens[index].c_str(), OVER_TOKEN.c_str()) == 0) { + core::CStrCaseCmp::strCaseCmp(copyTokens[index].c_str(), + OVER_TOKEN.c_str()) == 0) { if (lastOverTokenIndex != copyTokens.size()) { LOG_ERROR(<< "Multiple '" << copyTokens[lastOverTokenIndex] << "' tokens in analysis clause - tokens " - << core::CStringUtils::typeToString(1 + lastOverTokenIndex) << " and " - << core::CStringUtils::typeToString(1 + index)); + << core::CStringUtils::typeToString(1 + lastOverTokenIndex) + << " and " << core::CStringUtils::typeToString(1 + index)); return false; } @@ -348,24 +359,28 @@ bool CFieldConfig::validateByOverField(const TStrVec& copyTokens, std::string& fieldName) { if (thisIndex != copyTokens.size()) { if (thisIndex == 0) { - LOG_ERROR(<< "Analysis clause begins with a '" << copyTokens[thisIndex] << "' token"); + LOG_ERROR(<< "Analysis clause begins with a '" + << copyTokens[thisIndex] << "' token"); return false; } if (thisIndex + 1 == copyTokens.size() || thisIndex + 1 == otherIndex) { - LOG_ERROR(<< "No field name follows the '" << copyTokens[thisIndex] << "' token in the analysis clause"); + LOG_ERROR(<< "No field name follows the '" << copyTokens[thisIndex] + << "' token in the analysis clause"); return false; } if (thisIndex + 2 < copyTokens.size() && thisIndex + 2 < otherIndex) { - LOG_ERROR(<< "Only one field name may follow the '" << copyTokens[thisIndex] << "' token in the analysis clause"); + LOG_ERROR(<< "Only one field name may follow the '" + << copyTokens[thisIndex] << "' token in the analysis clause"); return false; } fieldName = copyTokens[thisIndex + 1]; for (const auto& clashingName : clashingNames) { if (fieldName == clashingName) { - LOG_ERROR(<< "The '" << copyTokens[thisIndex] << "' field cannot be " << fieldName); + LOG_ERROR(<< "The '" << copyTokens[thisIndex] + << "' field cannot be " << fieldName); return false; } } @@ -378,7 +393,8 @@ std::string CFieldConfig::findParameter(const std::string& parameter, TStrVec& c for (TStrVecItr iter = copyTokens.begin(); iter != copyTokens.end(); ++iter) { const std::string& token = *iter; std::size_t equalPos = token.find('='); - if (equalPos == parameter.length() && core::CStrCaseCmp::strNCaseCmp(parameter.c_str(), token.c_str(), equalPos) == 0) { + if (equalPos == parameter.length() && + core::CStrCaseCmp::strNCaseCmp(parameter.c_str(), token.c_str(), equalPos) == 0) { std::string value(token, equalPos + 1, token.length() - equalPos); LOG_TRACE(<< "Found parameter " << parameter << " : " << value); copyTokens.erase(iter); @@ -409,8 +425,8 @@ bool CFieldConfig::initFromClause(const TStrVec& tokens) { std::string defaultCategorizationFieldName; std::string summaryCountFieldName; - if (this->parseClause(true, 0, EMPTY_STRING, copyTokens, m_FieldOptions, defaultCategorizationFieldName, summaryCountFieldName) == - false) { + if (this->parseClause(true, 0, EMPTY_STRING, copyTokens, m_FieldOptions, + defaultCategorizationFieldName, summaryCountFieldName) == false) { // parseClause() will have logged the problem return false; } @@ -431,7 +447,8 @@ bool CFieldConfig::addOptions(const CFieldOptions& options) { using TFieldOptionsMIndexItrBoolPr = std::pair; TFieldOptionsMIndexItrBoolPr result(m_FieldOptions.insert(options)); if (result.second == false) { - LOG_ERROR(<< "Duplicate config found: " << options << core_t::LINE_ENDING << "It clashes with config " << *result.first); + LOG_ERROR(<< "Duplicate config found: " << options << core_t::LINE_ENDING + << "It clashes with config " << *result.first); return false; } @@ -489,13 +506,15 @@ bool CFieldConfig::parseClause(bool allowMultipleFunctions, clashingNames.push_back(COUNT_NAME); clashingNames.push_back(partitionFieldName); std::string byFieldName; - if (!this->validateByOverField(copyTokens, lastByTokenIndex, lastOverTokenIndex, clashingNames, byFieldName)) { + if (!this->validateByOverField(copyTokens, lastByTokenIndex, lastOverTokenIndex, + clashingNames, byFieldName)) { return false; } std::string overFieldName; clashingNames.push_back(byFieldName); - if (!this->validateByOverField(copyTokens, lastOverTokenIndex, lastByTokenIndex, clashingNames, overFieldName)) { + if (!this->validateByOverField(copyTokens, lastOverTokenIndex, lastByTokenIndex, + clashingNames, overFieldName)) { return false; } @@ -505,8 +524,9 @@ bool CFieldConfig::parseClause(bool allowMultipleFunctions, //! Validate the "excludefrequent" flag if it has been set bool byExcludeFrequent(false); bool overExcludeFrequent(false); - if (this->decipherExcludeFrequentSetting(excludeFrequentString, hasByField, isPopulation, byExcludeFrequent, overExcludeFrequent) == - false) { + if (this->decipherExcludeFrequentSetting(excludeFrequentString, hasByField, + isPopulation, byExcludeFrequent, + overExcludeFrequent) == false) { LOG_ERROR(<< "Unknown setting for excludefrequent: " << excludeFrequentString); return false; } @@ -514,16 +534,16 @@ bool CFieldConfig::parseClause(bool allowMultipleFunctions, int tokenNum(0); size_t stop(std::min(lastByTokenIndex, lastOverTokenIndex)); if (stop > 1 && !allowMultipleFunctions) { - LOG_ERROR(<< "Only one analysis function is allowed in this context but " << core::CStringUtils::typeToString(stop) - << " were specified"); + LOG_ERROR(<< "Only one analysis function is allowed in this context but " + << core::CStringUtils::typeToString(stop) << " were specified"); return false; } for (size_t index = 0; index < stop; ++index) { model::function_t::EFunction function; std::string fieldName; - if (this->parseFieldString(!summaryCountFieldName.empty(), isPopulation, hasByField, copyTokens[index], function, fieldName) == - false) { + if (this->parseFieldString(!summaryCountFieldName.empty(), isPopulation, hasByField, + copyTokens[index], function, fieldName) == false) { LOG_ERROR(<< "Failed to process token '" << copyTokens[index] << "'"); // External error reporting is done within parseFieldString() so @@ -532,15 +552,10 @@ bool CFieldConfig::parseClause(bool allowMultipleFunctions, return false; } - CFieldOptions options(function, - fieldName, + CFieldOptions options(function, fieldName, allowMultipleFunctions ? ++tokenNum : configKey, - byFieldName, - overFieldName, - partitionFieldName, - byExcludeFrequent, - overExcludeFrequent, - useNull); + byFieldName, overFieldName, partitionFieldName, + byExcludeFrequent, overExcludeFrequent, useNull); if (!description.empty()) { options.description(description); } @@ -549,7 +564,8 @@ bool CFieldConfig::parseClause(bool allowMultipleFunctions, TFieldOptionsMIndexItrBoolPr result(optionsIndex.insert(options)); if (result.second == false) { LOG_ERROR(<< "Token " << core::CStringUtils::typeToString(options.configKey()) - << " in the analysis clause is a duplicate of token " << result.first->configKey()); + << " in the analysis clause is a duplicate of token " + << result.first->configKey()); return false; } @@ -616,11 +632,13 @@ bool CFieldConfig::processDetector(const boost::property_tree::ptree& propTree, // Here we pull out the "1" in "detector.1.clause" size_t sepPos(key.rfind(SUFFIX_SEPARATOR)); - if (sepPos == std::string::npos || sepPos <= DETECTOR_PREFIX.length() || sepPos == key.length() - 1) { + if (sepPos == std::string::npos || sepPos <= DETECTOR_PREFIX.length() || + sepPos == key.length() - 1) { LOG_ERROR(<< "Unrecognised configuration option " << key << " = " << value); return false; } - std::string configKeyString(key, DETECTOR_PREFIX.length(), sepPos - DETECTOR_PREFIX.length()); + std::string configKeyString(key, DETECTOR_PREFIX.length(), + sepPos - DETECTOR_PREFIX.length()); int configKey; if (core::CStringUtils::stringToType(configKeyString, configKey) == false) { LOG_ERROR(<< "Cannot convert config key to integer: " << configKeyString); @@ -633,14 +651,18 @@ bool CFieldConfig::processDetector(const boost::property_tree::ptree& propTree, return true; } - std::string description( - propTree.get(boost::property_tree::ptree::path_type(DETECTOR_PREFIX + configKeyString + DESCRIPTION_SUFFIX, '\t'), EMPTY_STRING)); + std::string description(propTree.get( + boost::property_tree::ptree::path_type( + DETECTOR_PREFIX + configKeyString + DESCRIPTION_SUFFIX, '\t'), + EMPTY_STRING)); - std::string clause( - propTree.get(boost::property_tree::ptree::path_type(DETECTOR_PREFIX + configKeyString + CLAUSE_SUFFIX, '\t'), EMPTY_STRING)); + std::string clause(propTree.get(boost::property_tree::ptree::path_type( + DETECTOR_PREFIX + configKeyString + CLAUSE_SUFFIX, '\t'), + EMPTY_STRING)); - std::string rules( - propTree.get(boost::property_tree::ptree::path_type(DETECTOR_PREFIX + configKeyString + RULES_SUFFIX, '\t'), EMPTY_STRING)); + std::string rules(propTree.get(boost::property_tree::ptree::path_type( + DETECTOR_PREFIX + configKeyString + RULES_SUFFIX, '\t'), + EMPTY_STRING)); TStrVec tokens; if (this->tokenise(clause, tokens) == false) { @@ -656,12 +678,15 @@ bool CFieldConfig::processDetector(const boost::property_tree::ptree& propTree, return true; } -bool CFieldConfig::addActiveDetector(int configKey, const std::string& description, const std::string& rules, TStrVec& copyTokens) { +bool CFieldConfig::addActiveDetector(int configKey, + const std::string& description, + const std::string& rules, + TStrVec& copyTokens) { std::string categorizationFieldName; std::string summaryCountFieldName; - if (this->parseClause(false, configKey, description, copyTokens, m_FieldOptions, categorizationFieldName, summaryCountFieldName) == - false) { + if (this->parseClause(false, configKey, description, copyTokens, m_FieldOptions, + categorizationFieldName, summaryCountFieldName) == false) { // parseClause() will have logged the error return false; } @@ -732,26 +757,36 @@ bool CFieldConfig::parseFieldString(bool haveSummaryCountField, bool byFieldInvalid(false); if (outerToken == FUNCTION_COUNT || outerToken == FUNCTION_COUNT_ABBREV) { - function = isPopulation ? model::function_t::E_PopulationCount : model::function_t::E_IndividualRareCount; + function = isPopulation ? model::function_t::E_PopulationCount + : model::function_t::E_IndividualRareCount; argumentInvalid = true; - } else if (outerToken == FUNCTION_DISTINCT_COUNT || outerToken == FUNCTION_DISTINCT_COUNT_ABBREV) { - function = isPopulation ? model::function_t::E_PopulationDistinctCount : model::function_t::E_IndividualDistinctCount; + } else if (outerToken == FUNCTION_DISTINCT_COUNT || + outerToken == FUNCTION_DISTINCT_COUNT_ABBREV) { + function = isPopulation ? model::function_t::E_PopulationDistinctCount + : model::function_t::E_IndividualDistinctCount; argumentRequired = true; - } else if (outerToken == FUNCTION_LOW_DISTINCT_COUNT || outerToken == FUNCTION_LOW_DISTINCT_COUNT_ABBREV) { - function = isPopulation ? model::function_t::E_PopulationLowDistinctCount : model::function_t::E_IndividualLowDistinctCount; + } else if (outerToken == FUNCTION_LOW_DISTINCT_COUNT || + outerToken == FUNCTION_LOW_DISTINCT_COUNT_ABBREV) { + function = isPopulation ? model::function_t::E_PopulationLowDistinctCount + : model::function_t::E_IndividualLowDistinctCount; argumentRequired = true; - } else if (outerToken == FUNCTION_HIGH_DISTINCT_COUNT || outerToken == FUNCTION_HIGH_DISTINCT_COUNT_ABBREV) { - function = isPopulation ? model::function_t::E_PopulationHighDistinctCount : model::function_t::E_IndividualHighDistinctCount; + } else if (outerToken == FUNCTION_HIGH_DISTINCT_COUNT || + outerToken == FUNCTION_HIGH_DISTINCT_COUNT_ABBREV) { + function = isPopulation ? model::function_t::E_PopulationHighDistinctCount + : model::function_t::E_IndividualHighDistinctCount; argumentRequired = true; - } else if (outerToken == FUNCTION_NON_ZERO_COUNT || outerToken == FUNCTION_NON_ZERO_COUNT_ABBREV) { + } else if (outerToken == FUNCTION_NON_ZERO_COUNT || + outerToken == FUNCTION_NON_ZERO_COUNT_ABBREV) { function = model::function_t::E_IndividualNonZeroCount; argumentInvalid = true; - } else if (outerToken == FUNCTION_RARE_NON_ZERO_COUNT || outerToken == FUNCTION_RARE_NON_ZERO_COUNT_ABBREV) { + } else if (outerToken == FUNCTION_RARE_NON_ZERO_COUNT || + outerToken == FUNCTION_RARE_NON_ZERO_COUNT_ABBREV) { function = model::function_t::E_IndividualRareNonZeroCount; argumentInvalid = true; byFieldRequired = true; } else if (outerToken == FUNCTION_RARE) { - function = isPopulation ? model::function_t::E_PopulationRare : model::function_t::E_IndividualRare; + function = isPopulation ? model::function_t::E_PopulationRare + : model::function_t::E_IndividualRare; argumentInvalid = true; byFieldRequired = true; } else if (outerToken == FUNCTION_RARE_COUNT) { @@ -759,115 +794,148 @@ bool CFieldConfig::parseFieldString(bool haveSummaryCountField, argumentInvalid = true; byFieldRequired = true; } else if (outerToken == FUNCTION_LOW_COUNT || outerToken == FUNCTION_LOW_COUNT_ABBREV) { - function = isPopulation ? model::function_t::E_PopulationLowCounts : model::function_t::E_IndividualLowCounts; + function = isPopulation ? model::function_t::E_PopulationLowCounts + : model::function_t::E_IndividualLowCounts; argumentInvalid = true; } else if (outerToken == FUNCTION_HIGH_COUNT || outerToken == FUNCTION_HIGH_COUNT_ABBREV) { - function = isPopulation ? model::function_t::E_PopulationHighCounts : model::function_t::E_IndividualHighCounts; + function = isPopulation ? model::function_t::E_PopulationHighCounts + : model::function_t::E_IndividualHighCounts; argumentInvalid = true; - } else if (outerToken == FUNCTION_LOW_NON_ZERO_COUNT || outerToken == FUNCTION_LOW_NON_ZERO_COUNT_ABBREV) { + } else if (outerToken == FUNCTION_LOW_NON_ZERO_COUNT || + outerToken == FUNCTION_LOW_NON_ZERO_COUNT_ABBREV) { function = model::function_t::E_IndividualLowNonZeroCount; argumentInvalid = true; - } else if (outerToken == FUNCTION_HIGH_NON_ZERO_COUNT || outerToken == FUNCTION_HIGH_NON_ZERO_COUNT_ABBREV) { + } else if (outerToken == FUNCTION_HIGH_NON_ZERO_COUNT || + outerToken == FUNCTION_HIGH_NON_ZERO_COUNT_ABBREV) { function = model::function_t::E_IndividualHighNonZeroCount; argumentInvalid = true; } else if (outerToken == FUNCTION_FREQ_RARE || outerToken == FUNCTION_FREQ_RARE_ABBREV) { function = model::function_t::E_PopulationFreqRare; argumentInvalid = true; byFieldRequired = true; - } else if (outerToken == FUNCTION_FREQ_RARE_COUNT || outerToken == FUNCTION_FREQ_RARE_COUNT_ABBREV) { + } else if (outerToken == FUNCTION_FREQ_RARE_COUNT || + outerToken == FUNCTION_FREQ_RARE_COUNT_ABBREV) { function = model::function_t::E_PopulationFreqRareCount; argumentInvalid = true; byFieldRequired = true; } else if (outerToken == FUNCTION_INFO_CONTENT) { - function = isPopulation ? model::function_t::E_PopulationInfoContent : model::function_t::E_IndividualInfoContent; + function = isPopulation ? model::function_t::E_PopulationInfoContent + : model::function_t::E_IndividualInfoContent; argumentRequired = true; } else if (outerToken == FUNCTION_LOW_INFO_CONTENT) { - function = isPopulation ? model::function_t::E_PopulationLowInfoContent : model::function_t::E_IndividualLowInfoContent; + function = isPopulation ? model::function_t::E_PopulationLowInfoContent + : model::function_t::E_IndividualLowInfoContent; argumentRequired = true; } else if (outerToken == FUNCTION_HIGH_INFO_CONTENT) { - function = isPopulation ? model::function_t::E_PopulationHighInfoContent : model::function_t::E_IndividualHighInfoContent; + function = isPopulation ? model::function_t::E_PopulationHighInfoContent + : model::function_t::E_IndividualHighInfoContent; argumentRequired = true; } else if (outerToken == FUNCTION_METRIC) { if (haveSummaryCountField) { - LOG_ERROR(<< "Function " << outerToken << "() cannot be used with a summary count field"); + LOG_ERROR(<< "Function " << outerToken + << "() cannot be used with a summary count field"); return false; } - function = isPopulation ? model::function_t::E_PopulationMetric : model::function_t::E_IndividualMetric; + function = isPopulation ? model::function_t::E_PopulationMetric + : model::function_t::E_IndividualMetric; argumentRequired = true; } else if (outerToken == FUNCTION_AVERAGE || outerToken == FUNCTION_MEAN) { - function = isPopulation ? model::function_t::E_PopulationMetricMean : model::function_t::E_IndividualMetricMean; + function = isPopulation ? model::function_t::E_PopulationMetricMean + : model::function_t::E_IndividualMetricMean; argumentRequired = true; } else if (outerToken == FUNCTION_LOW_AVERAGE || outerToken == FUNCTION_LOW_MEAN) { - function = isPopulation ? model::function_t::E_PopulationMetricLowMean : model::function_t::E_IndividualMetricLowMean; + function = isPopulation ? model::function_t::E_PopulationMetricLowMean + : model::function_t::E_IndividualMetricLowMean; argumentRequired = true; } else if (outerToken == FUNCTION_HIGH_AVERAGE || outerToken == FUNCTION_HIGH_MEAN) { - function = isPopulation ? model::function_t::E_PopulationMetricHighMean : model::function_t::E_IndividualMetricHighMean; + function = isPopulation ? model::function_t::E_PopulationMetricHighMean + : model::function_t::E_IndividualMetricHighMean; argumentRequired = true; } else if (outerToken == FUNCTION_MEDIAN) { - function = isPopulation ? model::function_t::E_PopulationMetricMedian : model::function_t::E_IndividualMetricMedian; + function = isPopulation ? model::function_t::E_PopulationMetricMedian + : model::function_t::E_IndividualMetricMedian; argumentRequired = true; } else if (outerToken == FUNCTION_LOW_MEDIAN) { - function = isPopulation ? model::function_t::E_PopulationMetricLowMedian : model::function_t::E_IndividualMetricLowMedian; + function = isPopulation ? model::function_t::E_PopulationMetricLowMedian + : model::function_t::E_IndividualMetricLowMedian; argumentRequired = true; } else if (outerToken == FUNCTION_HIGH_MEDIAN) { - function = isPopulation ? model::function_t::E_PopulationMetricHighMedian : model::function_t::E_IndividualMetricHighMedian; + function = isPopulation ? model::function_t::E_PopulationMetricHighMedian + : model::function_t::E_IndividualMetricHighMedian; argumentRequired = true; } else if (outerToken == FUNCTION_MIN) { - function = isPopulation ? model::function_t::E_PopulationMetricMin : model::function_t::E_IndividualMetricMin; + function = isPopulation ? model::function_t::E_PopulationMetricMin + : model::function_t::E_IndividualMetricMin; argumentRequired = true; } else if (outerToken == FUNCTION_MAX) { - function = isPopulation ? model::function_t::E_PopulationMetricMax : model::function_t::E_IndividualMetricMax; + function = isPopulation ? model::function_t::E_PopulationMetricMax + : model::function_t::E_IndividualMetricMax; argumentRequired = true; } else if (outerToken == FUNCTION_VARIANCE) { - function = isPopulation ? model::function_t::E_PopulationMetricVariance : model::function_t::E_IndividualMetricVariance; + function = isPopulation ? model::function_t::E_PopulationMetricVariance + : model::function_t::E_IndividualMetricVariance; argumentRequired = true; } else if (outerToken == FUNCTION_LOW_VARIANCE) { - function = isPopulation ? model::function_t::E_PopulationMetricLowVariance : model::function_t::E_IndividualMetricLowVariance; + function = isPopulation ? model::function_t::E_PopulationMetricLowVariance + : model::function_t::E_IndividualMetricLowVariance; argumentRequired = true; } else if (outerToken == FUNCTION_HIGH_VARIANCE) { - function = isPopulation ? model::function_t::E_PopulationMetricHighVariance : model::function_t::E_IndividualMetricHighVariance; + function = isPopulation ? model::function_t::E_PopulationMetricHighVariance + : model::function_t::E_IndividualMetricHighVariance; argumentRequired = true; } else if (outerToken == FUNCTION_SUM) { - function = isPopulation ? model::function_t::E_PopulationMetricSum : model::function_t::E_IndividualMetricSum; + function = isPopulation ? model::function_t::E_PopulationMetricSum + : model::function_t::E_IndividualMetricSum; argumentRequired = true; } else if (outerToken == FUNCTION_LOW_SUM) { - function = isPopulation ? model::function_t::E_PopulationMetricLowSum : model::function_t::E_IndividualMetricLowSum; + function = isPopulation ? model::function_t::E_PopulationMetricLowSum + : model::function_t::E_IndividualMetricLowSum; argumentRequired = true; } else if (outerToken == FUNCTION_HIGH_SUM) { - function = isPopulation ? model::function_t::E_PopulationMetricHighSum : model::function_t::E_IndividualMetricHighSum; + function = isPopulation ? model::function_t::E_PopulationMetricHighSum + : model::function_t::E_IndividualMetricHighSum; argumentRequired = true; } else if (outerToken == FUNCTION_NON_NULL_SUM || outerToken == FUNCTION_NON_NULL_SUM_ABBREV) { function = model::function_t::E_IndividualMetricNonNullSum; argumentRequired = true; - } else if (outerToken == FUNCTION_LOW_NON_NULL_SUM || outerToken == FUNCTION_LOW_NON_NULL_SUM_ABBREV) { + } else if (outerToken == FUNCTION_LOW_NON_NULL_SUM || + outerToken == FUNCTION_LOW_NON_NULL_SUM_ABBREV) { function = model::function_t::E_IndividualMetricLowNonNullSum; argumentRequired = true; - } else if (outerToken == FUNCTION_HIGH_NON_NULL_SUM || outerToken == FUNCTION_HIGH_NON_NULL_SUM_ABBREV) { + } else if (outerToken == FUNCTION_HIGH_NON_NULL_SUM || + outerToken == FUNCTION_HIGH_NON_NULL_SUM_ABBREV) { function = model::function_t::E_IndividualMetricHighNonNullSum; argumentRequired = true; } else if (outerToken == FUNCTION_TIME_OF_DAY) { - function = isPopulation ? model::function_t::E_PopulationTimeOfDay : model::function_t::E_IndividualTimeOfDay; + function = isPopulation ? model::function_t::E_PopulationTimeOfDay + : model::function_t::E_IndividualTimeOfDay; argumentRequired = false; argumentInvalid = true; } else if (outerToken == FUNCTION_TIME_OF_WEEK) { - function = isPopulation ? model::function_t::E_PopulationTimeOfWeek : model::function_t::E_IndividualTimeOfWeek; + function = isPopulation ? model::function_t::E_PopulationTimeOfWeek + : model::function_t::E_IndividualTimeOfWeek; argumentRequired = false; argumentInvalid = true; } else if (outerToken == FUNCTION_LAT_LONG) { - function = isPopulation ? model::function_t::E_PopulationLatLong : model::function_t::E_IndividualLatLong; + function = isPopulation ? model::function_t::E_PopulationLatLong + : model::function_t::E_IndividualLatLong; argumentRequired = true; } else if (outerToken == FUNCTION_MAX_VELOCITY) { - function = isPopulation ? model::function_t::E_PopulationMaxVelocity : model::function_t::E_IndividualMaxVelocity; + function = isPopulation ? model::function_t::E_PopulationMaxVelocity + : model::function_t::E_IndividualMaxVelocity; argumentRequired = true; } else if (outerToken == FUNCTION_MIN_VELOCITY) { - function = isPopulation ? model::function_t::E_PopulationMinVelocity : model::function_t::E_IndividualMinVelocity; + function = isPopulation ? model::function_t::E_PopulationMinVelocity + : model::function_t::E_IndividualMinVelocity; argumentRequired = true; } else if (outerToken == FUNCTION_MEAN_VELOCITY) { - function = isPopulation ? model::function_t::E_PopulationMeanVelocity : model::function_t::E_IndividualMeanVelocity; + function = isPopulation ? model::function_t::E_PopulationMeanVelocity + : model::function_t::E_IndividualMeanVelocity; argumentRequired = true; } else if (outerToken == FUNCTION_SUM_VELOCITY) { - function = isPopulation ? model::function_t::E_PopulationSumVelocity : model::function_t::E_IndividualSumVelocity; + function = isPopulation ? model::function_t::E_PopulationSumVelocity + : model::function_t::E_IndividualSumVelocity; argumentRequired = true; } else { // We expect an individual metric here, but if the original string @@ -884,7 +952,8 @@ bool CFieldConfig::parseFieldString(bool haveSummaryCountField, return false; } - function = isPopulation ? model::function_t::E_PopulationMetric : model::function_t::E_IndividualMetric; + function = isPopulation ? model::function_t::E_PopulationMetric + : model::function_t::E_IndividualMetric; // This is inconsistent notation, but kept for backwards compatibility fieldName = outerToken; @@ -968,18 +1037,22 @@ bool CFieldConfig::decipherExcludeFrequentSetting(const std::string& excludeFreq if (!excludeFrequentString.empty()) { if (excludeFrequentString.length() == ALL_TOKEN.length() && - core::CStrCaseCmp::strCaseCmp(excludeFrequentString.c_str(), ALL_TOKEN.c_str()) == 0) { + core::CStrCaseCmp::strCaseCmp(excludeFrequentString.c_str(), + ALL_TOKEN.c_str()) == 0) { byExcludeFrequent = hasByField; overExcludeFrequent = isPopulation; } else if (excludeFrequentString.length() == BY_TOKEN.length() && - core::CStrCaseCmp::strCaseCmp(excludeFrequentString.c_str(), BY_TOKEN.c_str()) == 0) { + core::CStrCaseCmp::strCaseCmp(excludeFrequentString.c_str(), + BY_TOKEN.c_str()) == 0) { byExcludeFrequent = hasByField; } else if (excludeFrequentString.length() == OVER_TOKEN.length() && - core::CStrCaseCmp::strCaseCmp(excludeFrequentString.c_str(), OVER_TOKEN.c_str()) == 0) { + core::CStrCaseCmp::strCaseCmp(excludeFrequentString.c_str(), + OVER_TOKEN.c_str()) == 0) { overExcludeFrequent = isPopulation; } else { if (excludeFrequentString.length() != NONE_TOKEN.length() || - core::CStrCaseCmp::strCaseCmp(excludeFrequentString.c_str(), NONE_TOKEN.c_str()) != 0) { + core::CStrCaseCmp::strCaseCmp(excludeFrequentString.c_str(), + NONE_TOKEN.c_str()) != 0) { LOG_ERROR(<< "Unexpected excludeFrequent value = " << excludeFrequentString); return false; } @@ -1015,7 +1088,8 @@ const CFieldConfig::TStrDetectionRulePrVec& CFieldConfig::scheduledEvents() cons void CFieldConfig::influencerFieldNames(TStrVec influencers) { LOG_DEBUG(<< "Set influencers : " << core::CContainerPrinter::print(influencers)); - std::for_each(influencers.begin(), influencers.end(), boost::bind(&CFieldConfig::seenField, this, _1)); + std::for_each(influencers.begin(), influencers.end(), + boost::bind(&CFieldConfig::seenField, this, _1)); m_Influencers.swap(influencers); } @@ -1027,7 +1101,8 @@ void CFieldConfig::addInfluencerFieldName(const std::string& influencer, bool qu return; } - if (std::find(m_Influencers.begin(), m_Influencers.end(), influencer) == m_Influencers.end()) { + if (std::find(m_Influencers.begin(), m_Influencers.end(), influencer) == + m_Influencers.end()) { LOG_TRACE(<< "Add influencer : " << influencer); this->seenField(influencer); m_Influencers.push_back(influencer); @@ -1048,7 +1123,8 @@ void CFieldConfig::addCategorizationFilter(const std::string& filter) { this->tokenise(filter, tokens); if (tokens.size() != 1) { - LOG_ERROR(<< "Unexpected number of tokens: " << tokens.size() << "; ignoring categorization filter: " << filter); + LOG_ERROR(<< "Unexpected number of tokens: " << tokens.size() + << "; ignoring categorization filter: " << filter); return; } @@ -1090,7 +1166,8 @@ bool CFieldConfig::processScheduledEvent(const boost::property_tree::ptree& prop return false; } - std::string indexString(key, SCHEDULED_EVENT_PREFIX.length(), sepPos - SCHEDULED_EVENT_PREFIX.length()); + std::string indexString(key, SCHEDULED_EVENT_PREFIX.length(), + sepPos - SCHEDULED_EVENT_PREFIX.length()); int indexKey; if (core::CStringUtils::stringToType(indexString, indexKey) == false) { LOG_ERROR(<< "Cannot convert config key to integer: " << indexString); @@ -1104,10 +1181,13 @@ bool CFieldConfig::processScheduledEvent(const boost::property_tree::ptree& prop } std::string description(propTree.get( - boost::property_tree::ptree::path_type(SCHEDULED_EVENT_PREFIX + indexString + DESCRIPTION_SUFFIX, '\t'), EMPTY_STRING)); + boost::property_tree::ptree::path_type( + SCHEDULED_EVENT_PREFIX + indexString + DESCRIPTION_SUFFIX, '\t'), + EMPTY_STRING)); - std::string rules( - propTree.get(boost::property_tree::ptree::path_type(SCHEDULED_EVENT_PREFIX + indexString + RULES_SUFFIX, '\t'), EMPTY_STRING)); + std::string rules(propTree.get(boost::property_tree::ptree::path_type( + SCHEDULED_EVENT_PREFIX + indexString + RULES_SUFFIX, '\t'), + EMPTY_STRING)); TDetectionRuleVec detectionRules; if (this->parseRules(detectionRules, rules) == false) { @@ -1146,12 +1226,10 @@ bool CFieldConfig::updateScheduledEvents(const boost::property_tree::ptree& prop } CFieldConfig::CFieldOptions::CFieldOptions(const std::string& fieldName, int configKey) - : m_Function(fieldName == COUNT_NAME ? model::function_t::E_IndividualRareCount : model::function_t::E_IndividualMetric), - m_FieldName(fieldName), - m_ConfigKey(configKey), - m_ByHasExcludeFrequent(false), - m_OverHasExcludeFrequent(false), - m_UseNull(true) { + : m_Function(fieldName == COUNT_NAME ? model::function_t::E_IndividualRareCount + : model::function_t::E_IndividualMetric), + m_FieldName(fieldName), m_ConfigKey(configKey), m_ByHasExcludeFrequent(false), + m_OverHasExcludeFrequent(false), m_UseNull(true) { } CFieldConfig::CFieldOptions::CFieldOptions(const std::string& fieldName, @@ -1162,13 +1240,12 @@ CFieldConfig::CFieldOptions::CFieldOptions(const std::string& fieldName, // For historical reasons, the only function name we interpret in this // constructor is "count" - every other word is considered to be a metric // field name. - : m_Function(fieldName == COUNT_NAME ? model::function_t::E_IndividualRareCount : model::function_t::E_IndividualMetric), + : m_Function(fieldName == COUNT_NAME ? model::function_t::E_IndividualRareCount + : model::function_t::E_IndividualMetric), m_FieldName(fieldName == COUNT_NAME ? EMPTY_STRING : fieldName), - m_ConfigKey(configKey), - m_ByFieldName(byFieldName), + m_ConfigKey(configKey), m_ByFieldName(byFieldName), m_ByHasExcludeFrequent(byHasExcludeFrequent), - m_OverHasExcludeFrequent(false), - m_UseNull(useNull) { + m_OverHasExcludeFrequent(false), m_UseNull(useNull) { } CFieldConfig::CFieldOptions::CFieldOptions(const std::string& fieldName, @@ -1181,14 +1258,13 @@ CFieldConfig::CFieldOptions::CFieldOptions(const std::string& fieldName, // For historical reasons, the only function name we interpret in this // constructor is "count" - every other word is considered to be a metric // field name. - : m_Function(fieldName == COUNT_NAME ? model::function_t::E_IndividualRareCount : model::function_t::E_IndividualMetric), + : m_Function(fieldName == COUNT_NAME ? model::function_t::E_IndividualRareCount + : model::function_t::E_IndividualMetric), m_FieldName(fieldName == COUNT_NAME ? EMPTY_STRING : fieldName), - m_ConfigKey(configKey), - m_ByFieldName(byFieldName), + m_ConfigKey(configKey), m_ByFieldName(byFieldName), m_PartitionFieldName(partitionFieldName), m_ByHasExcludeFrequent(byHasExcludeFrequent), - m_OverHasExcludeFrequent(overHasExcludeFrequent), - m_UseNull(useNull) { + m_OverHasExcludeFrequent(overHasExcludeFrequent), m_UseNull(useNull) { } CFieldConfig::CFieldOptions::CFieldOptions(model::function_t::EFunction function, @@ -1200,15 +1276,11 @@ CFieldConfig::CFieldOptions::CFieldOptions(model::function_t::EFunction function bool byHasExcludeFrequent, bool overHasExcludeFrequent, bool useNull) - : m_Function(function), - m_FieldName(fieldName), - m_ConfigKey(configKey), - m_ByFieldName(byFieldName), - m_OverFieldName(overFieldName), + : m_Function(function), m_FieldName(fieldName), m_ConfigKey(configKey), + m_ByFieldName(byFieldName), m_OverFieldName(overFieldName), m_PartitionFieldName(partitionFieldName), m_ByHasExcludeFrequent(byHasExcludeFrequent), - m_OverHasExcludeFrequent(overHasExcludeFrequent), - m_UseNull(useNull) { + m_OverHasExcludeFrequent(overHasExcludeFrequent), m_UseNull(useNull) { } void CFieldConfig::CFieldOptions::description(std::string description) { @@ -1684,7 +1756,8 @@ void swap(CFieldConfig::CFieldOptions& lhs, CFieldConfig::CFieldOptions& rhs) { std::ostream& operator<<(std::ostream& strm, const CFieldConfig::CFieldOptions& options) { options.debugPrintClause(strm); - strm << " (config key: " << options.m_ConfigKey << " description: " << options.m_Description << ')'; + strm << " (config key: " << options.m_ConfigKey + << " description: " << options.m_Description << ')'; return strm; } } diff --git a/lib/api/CFieldDataTyper.cc b/lib/api/CFieldDataTyper.cc index d9a8e0f1b9..498a859853 100644 --- a/lib/api/CFieldDataTyper.cc +++ b/lib/api/CFieldDataTyper.cc @@ -48,18 +48,13 @@ CFieldDataTyper::CFieldDataTyper(const std::string& jobId, COutputHandler& outputHandler, CJsonOutputWriter& jsonOutputWriter, CBackgroundPersister* periodicPersister) - : m_JobId(jobId), - m_OutputHandler(outputHandler), - m_ExtraFieldNames(1, MLCATEGORY_NAME), - m_WriteFieldNames(true), - m_NumRecordsHandled(0), - m_OutputFieldCategory(m_Overrides[MLCATEGORY_NAME]), - m_MaxMatchingLength(0), - m_JsonOutputWriter(jsonOutputWriter), + : m_JobId(jobId), m_OutputHandler(outputHandler), + m_ExtraFieldNames(1, MLCATEGORY_NAME), m_WriteFieldNames(true), + m_NumRecordsHandled(0), m_OutputFieldCategory(m_Overrides[MLCATEGORY_NAME]), + m_MaxMatchingLength(0), m_JsonOutputWriter(jsonOutputWriter), m_ExamplesCollector(limits.maxExamples()), m_CategorizationFieldName(config.categorizationFieldName()), - m_CategorizationFilter(), - m_PeriodicPersister(periodicPersister) { + m_CategorizationFilter(), m_PeriodicPersister(periodicPersister) { this->createTyper(m_CategorizationFieldName); LOG_DEBUG(<< "Configuring categorization filtering"); @@ -85,7 +80,8 @@ bool CFieldDataTyper::handleRecord(const TStrStrUMap& dataRowFields) { } if (m_OutputHandler.fieldNames(fieldNames, m_ExtraFieldNames) == false) { - LOG_ERROR(<< "Unable to set field names for output:" << core_t::LINE_ENDING << this->debugPrintRecord(dataRowFields)); + LOG_ERROR(<< "Unable to set field names for output:" << core_t::LINE_ENDING + << this->debugPrintRecord(dataRowFields)); return false; } m_WriteFieldNames = false; @@ -103,7 +99,8 @@ bool CFieldDataTyper::handleRecord(const TStrStrUMap& dataRowFields) { m_OutputFieldCategory = core::CStringUtils::typeToString(this->computeType(dataRowFields)); if (m_OutputHandler.writeRow(dataRowFields, m_Overrides) == false) { - LOG_ERROR(<< "Unable to write output with type " << m_OutputFieldCategory << " for input:" << core_t::LINE_ENDING + LOG_ERROR(<< "Unable to write output with type " << m_OutputFieldCategory + << " for input:" << core_t::LINE_ENDING << this->debugPrintRecord(dataRowFields)); return false; } @@ -135,24 +132,28 @@ int CFieldDataTyper::computeType(const TStrStrUMap& dataRowFields) { const std::string& categorizationFieldName = m_DataTyper->fieldName(); TStrStrUMapCItr fieldIter = dataRowFields.find(categorizationFieldName); if (fieldIter == dataRowFields.end()) { - LOG_WARN(<< "Assigning type -1 to record with no " << categorizationFieldName << " field:" << core_t::LINE_ENDING + LOG_WARN(<< "Assigning type -1 to record with no " + << categorizationFieldName << " field:" << core_t::LINE_ENDING << this->debugPrintRecord(dataRowFields)); return -1; } const std::string& fieldValue = fieldIter->second; if (fieldValue.empty()) { - LOG_WARN(<< "Assigning type -1 to record with blank " << categorizationFieldName << " field:" << core_t::LINE_ENDING + LOG_WARN(<< "Assigning type -1 to record with blank " + << categorizationFieldName << " field:" << core_t::LINE_ENDING << this->debugPrintRecord(dataRowFields)); return -1; } int type = -1; if (m_CategorizationFilter.empty()) { - type = m_DataTyper->computeType(false, dataRowFields, fieldValue, fieldValue.length()); + type = m_DataTyper->computeType(false, dataRowFields, fieldValue, + fieldValue.length()); } else { std::string filtered = m_CategorizationFilter.apply(fieldValue); - type = m_DataTyper->computeType(false, dataRowFields, filtered, fieldValue.length()); + type = m_DataTyper->computeType(false, dataRowFields, filtered, + fieldValue.length()); } if (type < 1) { return -1; @@ -161,8 +162,10 @@ int CFieldDataTyper::computeType(const TStrStrUMap& dataRowFields) { bool exampleAdded = m_ExamplesCollector.add(static_cast(type), fieldValue); bool searchTermsChanged = this->createReverseSearch(type); if (exampleAdded || searchTermsChanged) { - const TStrSet& examples = m_ExamplesCollector.examples(static_cast(type)); - m_JsonOutputWriter.writeCategoryDefinition(type, m_SearchTerms, m_SearchTermsRegex, m_MaxMatchingLength, examples); + const TStrSet& examples = + m_ExamplesCollector.examples(static_cast(type)); + m_JsonOutputWriter.writeCategoryDefinition( + type, m_SearchTerms, m_SearchTermsRegex, m_MaxMatchingLength, examples); } // Check if a periodic persist is due. @@ -178,21 +181,24 @@ void CFieldDataTyper::createTyper(const std::string& fieldName) { // replaced with a factory TTokenListDataTyperKeepsFields::TTokenListReverseSearchCreatorIntfCPtr reverseSearchCreator( new CTokenListReverseSearchCreator(fieldName)); - m_DataTyper.reset(new TTokenListDataTyperKeepsFields(reverseSearchCreator, SIMILARITY_THRESHOLD, fieldName)); + m_DataTyper.reset(new TTokenListDataTyperKeepsFields( + reverseSearchCreator, SIMILARITY_THRESHOLD, fieldName)); LOG_TRACE(<< "Created new categorizer for field '" << fieldName << "'"); } bool CFieldDataTyper::createReverseSearch(int type) { bool wasCached(false); - if (m_DataTyper->createReverseSearch(type, m_SearchTerms, m_SearchTermsRegex, m_MaxMatchingLength, wasCached) == false) { + if (m_DataTyper->createReverseSearch(type, m_SearchTerms, m_SearchTermsRegex, + m_MaxMatchingLength, wasCached) == false) { m_SearchTerms.clear(); m_SearchTermsRegex.clear(); } return !wasCached; } -bool CFieldDataTyper::restoreState(core::CDataSearcher& restoreSearcher, core_t::TTime& completeToTime) { +bool CFieldDataTyper::restoreState(core::CDataSearcher& restoreSearcher, + core_t::TTime& completeToTime) { // Pass on the request in case we're chained if (m_OutputHandler.restoreState(restoreSearcher, completeToTime) == false) { return false; @@ -252,7 +258,8 @@ bool CFieldDataTyper::acceptRestoreTraverser(core::CStateRestoreTraverser& trave if (firstFieldName == VERSION_TAG) { std::string version; if (core::CStringUtils::stringToType(traverser.value(), version) == false) { - LOG_ERROR(<< "Cannot restore categorizer, invalid version: " << traverser.value()); + LOG_ERROR(<< "Cannot restore categorizer, invalid version: " + << traverser.value()); return false; } if (version != STATE_VERSION) { @@ -260,42 +267,48 @@ bool CFieldDataTyper::acceptRestoreTraverser(core::CStateRestoreTraverser& trave return true; } } else { - LOG_ERROR(<< "Cannot restore categorizer - " << VERSION_TAG << " element expected but found " << traverser.name() << '=' - << traverser.value()); + LOG_ERROR(<< "Cannot restore categorizer - " << VERSION_TAG << " element expected but found " + << traverser.name() << '=' << traverser.value()); return false; } if (traverser.next() == false) { - LOG_ERROR(<< "Cannot restore categorizer - end of object reached when " << TYPER_TAG << " was expected"); + LOG_ERROR(<< "Cannot restore categorizer - end of object reached when " + << TYPER_TAG << " was expected"); return false; } if (traverser.name() == TYPER_TAG) { - if (traverser.traverseSubLevel(boost::bind(&CDataTyper::acceptRestoreTraverser, m_DataTyper, _1)) == false) { - LOG_ERROR(<< "Cannot restore categorizer, unexpected element: " << traverser.value()); + if (traverser.traverseSubLevel(boost::bind(&CDataTyper::acceptRestoreTraverser, + m_DataTyper, _1)) == false) { + LOG_ERROR(<< "Cannot restore categorizer, unexpected element: " + << traverser.value()); return false; } } else { - LOG_ERROR(<< "Cannot restore categorizer - " << TYPER_TAG << " element expected but found " << traverser.name() << '=' - << traverser.value()); + LOG_ERROR(<< "Cannot restore categorizer - " << TYPER_TAG << " element expected but found " + << traverser.name() << '=' << traverser.value()); return false; } if (traverser.next() == false) { - LOG_ERROR(<< "Cannot restore categorizer - end of object reached when " << EXAMPLES_COLLECTOR_TAG << " was expected"); + LOG_ERROR(<< "Cannot restore categorizer - end of object reached when " + << EXAMPLES_COLLECTOR_TAG << " was expected"); return false; } if (traverser.name() == EXAMPLES_COLLECTOR_TAG) { if (traverser.traverseSubLevel( - boost::bind(&CCategoryExamplesCollector::acceptRestoreTraverser, boost::ref(m_ExamplesCollector), _1)) == false || + boost::bind(&CCategoryExamplesCollector::acceptRestoreTraverser, + boost::ref(m_ExamplesCollector), _1)) == false || traverser.haveBadState()) { - LOG_ERROR(<< "Cannot restore categorizer, unexpected element: " << traverser.value()); + LOG_ERROR(<< "Cannot restore categorizer, unexpected element: " + << traverser.value()); return false; } } else { - LOG_ERROR(<< "Cannot restore categorizer - " << EXAMPLES_COLLECTOR_TAG << " element expected but found " << traverser.name() << '=' - << traverser.value()); + LOG_ERROR(<< "Cannot restore categorizer - " << EXAMPLES_COLLECTOR_TAG << " element expected but found " + << traverser.name() << '=' << traverser.value()); return false; } @@ -328,7 +341,8 @@ bool CFieldDataTyper::doPersistState(const CDataTyper::TPersistFunc& dataTyperPe try { core::CStateCompressor compressor(persister); - core::CDataAdder::TOStreamP strm = compressor.addStreamed(ML_STATE_INDEX, m_JobId + '_' + STATE_TYPE); + core::CDataAdder::TOStreamP strm = + compressor.addStreamed(ML_STATE_INDEX, m_JobId + '_' + STATE_TYPE); if (strm == nullptr) { LOG_ERROR(<< "Failed to create persistence stream"); @@ -370,7 +384,9 @@ void CFieldDataTyper::acceptPersistInserter(const CDataTyper::TPersistFunc& data core::CStatePersistInserter& inserter) const { inserter.insertValue(VERSION_TAG, STATE_VERSION); inserter.insertLevel(TYPER_TAG, dataTyperPersistFunc); - inserter.insertLevel(EXAMPLES_COLLECTOR_TAG, boost::bind(&CCategoryExamplesCollector::acceptPersistInserter, &examplesCollector, _1)); + inserter.insertLevel(EXAMPLES_COLLECTOR_TAG, + boost::bind(&CCategoryExamplesCollector::acceptPersistInserter, + &examplesCollector, _1)); } bool CFieldDataTyper::periodicPersistState(CBackgroundPersister& persister) { @@ -381,14 +397,12 @@ bool CFieldDataTyper::periodicPersistState(CBackgroundPersister& persister) { return false; } - if (persister.addPersistFunc(boost::bind(&CFieldDataTyper::doPersistState, - this, + if (persister.addPersistFunc(boost::bind(&CFieldDataTyper::doPersistState, this, // Do NOT add boost::ref wrappers // around these arguments - they // MUST be copied for thread safety m_DataTyper->makePersistFunc(), - m_ExamplesCollector, - _1)) == false) { + m_ExamplesCollector, _1)) == false) { LOG_ERROR(<< "Failed to add categorizer background persistence function"); return false; } @@ -416,7 +430,8 @@ bool CFieldDataTyper::handleControlMessage(const std::string& controlMessage) { case ' ': // Spaces are just used to fill the buffers and force prior messages // through the system - we don't need to do anything else - LOG_TRACE(<< "Received space control message of length " << controlMessage.length()); + LOG_TRACE(<< "Received space control message of length " + << controlMessage.length()); break; case CONTROL_FIELD_NAME_CHAR: // Silent no-op. This is a simple way to ignore repeated header @@ -427,8 +442,9 @@ bool CFieldDataTyper::handleControlMessage(const std::string& controlMessage) { this->acknowledgeFlush(controlMessage.substr(1)); break; default: - LOG_WARN(<< "Ignoring unknown control message of length " << controlMessage.length() << " beginning with '" << controlMessage[0] - << '\''); + LOG_WARN(<< "Ignoring unknown control message of length " + << controlMessage.length() << " beginning with '" + << controlMessage[0] << '\''); // Don't return false here (for the time being at least), as it // seems excessive to cause the entire job to fail break; diff --git a/lib/api/CForecastRunner.cc b/lib/api/CForecastRunner.cc index b651e1ca1c..60d81fd76e 100644 --- a/lib/api/CForecastRunner.cc +++ b/lib/api/CForecastRunner.cc @@ -28,12 +28,10 @@ const std::string EMPTY_STRING; const std::string CForecastRunner::ERROR_FORECAST_REQUEST_FAILED_TO_PARSE("Failed to parse forecast request: "); const std::string CForecastRunner::ERROR_NO_FORECAST_ID("forecast ID must be specified and non empty"); -const std::string CForecastRunner::ERROR_TOO_MANY_JOBS( - "Forecast cannot be executed due to queue limit. Please wait for requests to finish and try again"); -const std::string - CForecastRunner::ERROR_NO_MODELS("Forecast cannot be executed as model is not yet established. Job requires more time to learn"); -const std::string - CForecastRunner::ERROR_NO_DATA_PROCESSED("Forecast cannot be executed as job requires data to have been processed and modeled"); +const std::string CForecastRunner::ERROR_TOO_MANY_JOBS("Forecast cannot be executed due to queue limit. Please wait for requests to finish and try again"); +const std::string CForecastRunner::ERROR_NO_MODELS("Forecast cannot be executed as model is not yet established. Job requires more time to learn"); +const std::string CForecastRunner::ERROR_NO_DATA_PROCESSED( + "Forecast cannot be executed as job requires data to have been processed and modeled"); const std::string CForecastRunner::ERROR_NO_CREATE_TIME("Forecast create time must be specified and non zero"); const std::string CForecastRunner::ERROR_BAD_MEMORY_STATUS("Forecast cannot be executed as model memory status is not OK"); const std::string CForecastRunner::ERROR_MEMORY_LIMIT("Forecast cannot be executed as forecast memory usage is predicted to exceed 20MB"); @@ -46,17 +44,9 @@ const std::string CForecastRunner::INFO_DEFAULT_EXPIRY("Forecast expires_in not const std::string CForecastRunner::INFO_NO_MODELS_CAN_CURRENTLY_BE_FORECAST("Insufficient history to forecast for all models"); CForecastRunner::SForecast::SForecast() - : s_ForecastId(), - s_ForecastAlias(), - s_ForecastSeries(), - s_CreateTime(0), - s_StartTime(0), - s_Duration(0), - s_ExpiryTime(0), - s_BoundsPercentile(0), - s_NumberOfModels(0), - s_NumberOfForecastableModels(0), - s_MemoryUsage(0), + : s_ForecastId(), s_ForecastAlias(), s_ForecastSeries(), s_CreateTime(0), + s_StartTime(0), s_Duration(0), s_ExpiryTime(0), s_BoundsPercentile(0), + s_NumberOfModels(0), s_NumberOfForecastableModels(0), s_MemoryUsage(0), s_Messages() { } @@ -64,15 +54,12 @@ CForecastRunner::SForecast::SForecast(SForecast&& other) : s_ForecastId(std::move(other.s_ForecastId)), s_ForecastAlias(std::move(other.s_ForecastAlias)), s_ForecastSeries(std::move(other.s_ForecastSeries)), - s_CreateTime(other.s_CreateTime), - s_StartTime(other.s_StartTime), - s_Duration(other.s_Duration), - s_ExpiryTime(other.s_ExpiryTime), + s_CreateTime(other.s_CreateTime), s_StartTime(other.s_StartTime), + s_Duration(other.s_Duration), s_ExpiryTime(other.s_ExpiryTime), s_BoundsPercentile(other.s_BoundsPercentile), s_NumberOfModels(other.s_NumberOfModels), s_NumberOfForecastableModels(other.s_NumberOfForecastableModels), - s_MemoryUsage(other.s_MemoryUsage), - s_Messages(other.s_Messages) { + s_MemoryUsage(other.s_MemoryUsage), s_Messages(other.s_Messages) { } CForecastRunner::SForecast& CForecastRunner::SForecast::operator=(SForecast&& other) { @@ -95,7 +82,8 @@ CForecastRunner::SForecast& CForecastRunner::SForecast::operator=(SForecast&& ot CForecastRunner::CForecastRunner(const std::string& jobId, core::CJsonOutputStreamWrapper& strmOut, model::CResourceMonitor& resourceMonitor) - : m_JobId(jobId), m_ConcurrentOutputStream(strmOut), m_ResourceMonitor(resourceMonitor), m_Shutdown(false) { + : m_JobId(jobId), m_ConcurrentOutputStream(strmOut), + m_ResourceMonitor(resourceMonitor), m_Shutdown(false) { m_Worker = std::thread([this] { this->forecastWorker(); }); } @@ -123,29 +111,27 @@ void CForecastRunner::forecastWorker() { SForecast forecastJob; while (!m_Shutdown) { if (this->tryGetJob(forecastJob)) { - LOG_INFO(<< "Start forecasting from " << core::CTimeUtils::toIso8601(forecastJob.s_StartTime) << " to " + LOG_INFO(<< "Start forecasting from " + << core::CTimeUtils::toIso8601(forecastJob.s_StartTime) << " to " << core::CTimeUtils::toIso8601(forecastJob.forecastEnd())); core::CStopWatch timer(true); uint64_t lastStatsUpdate = 0; LOG_TRACE(<< "about to create sink"); - model::CForecastDataSink sink(m_JobId, - forecastJob.s_ForecastId, - forecastJob.s_ForecastAlias, - forecastJob.s_CreateTime, - forecastJob.s_StartTime, - forecastJob.forecastEnd(), - forecastJob.s_ExpiryTime, - forecastJob.s_MemoryUsage, - m_ConcurrentOutputStream); + model::CForecastDataSink sink( + m_JobId, forecastJob.s_ForecastId, forecastJob.s_ForecastAlias, + forecastJob.s_CreateTime, forecastJob.s_StartTime, + forecastJob.forecastEnd(), forecastJob.s_ExpiryTime, + forecastJob.s_MemoryUsage, m_ConcurrentOutputStream); std::string message; // collecting the runtime messages first and sending it in 1 go TStrUSet messages(forecastJob.s_Messages); double processedModels = 0; - double totalNumberOfForecastableModels = static_cast(forecastJob.s_NumberOfForecastableModels); + double totalNumberOfForecastableModels = + static_cast(forecastJob.s_NumberOfForecastableModels); size_t failedForecasts = 0; sink.writeStats(0.0, 0, forecastJob.s_Messages); @@ -155,31 +141,27 @@ void CForecastRunner::forecastWorker() { while (!series.s_ToForecast.empty()) { const TForecastModelWrapper& model = series.s_ToForecast.back(); - model_t::TDouble1VecDouble1VecPr support = model_t::support(model.s_Feature); - bool success = model.s_ForecastModel->forecast(forecastJob.s_StartTime, - forecastJob.forecastEnd(), - forecastJob.s_BoundsPercentile, - support.first, - support.second, - boost::bind(&model::CForecastDataSink::push, - &sink, - _1, - model_t::print(model.s_Feature), - series.s_PartitionFieldName, - series.s_PartitionFieldValue, - series.s_ByFieldName, - model.s_ByFieldValue, - series.s_DetectorIndex), - message); + model_t::TDouble1VecDouble1VecPr support = + model_t::support(model.s_Feature); + bool success = model.s_ForecastModel->forecast( + forecastJob.s_StartTime, forecastJob.forecastEnd(), + forecastJob.s_BoundsPercentile, support.first, support.second, + boost::bind(&model::CForecastDataSink::push, &sink, _1, + model_t::print(model.s_Feature), series.s_PartitionFieldName, + series.s_PartitionFieldValue, series.s_ByFieldName, + model.s_ByFieldValue, series.s_DetectorIndex), + message); series.s_ToForecast.pop_back(); if (success == false) { - LOG_DEBUG(<< "Detector " << series.s_DetectorIndex << " failed to forecast"); + LOG_DEBUG(<< "Detector " << series.s_DetectorIndex + << " failed to forecast"); ++failedForecasts; } if (message.empty() == false) { - messages.insert("Detector[" + std::to_string(series.s_DetectorIndex) + "]: " + message); + messages.insert("Detector[" + std::to_string(series.s_DetectorIndex) + + "]: " + message); message.clear(); } @@ -188,7 +170,8 @@ void CForecastRunner::forecastWorker() { if (processedModels != totalNumberOfForecastableModels) { uint64_t elapsedTime = timer.lap(); if (elapsedTime - lastStatsUpdate > MINIMUM_TIME_ELAPSED_FOR_STATS_UPDATE) { - sink.writeStats(processedModels / totalNumberOfForecastableModels, elapsedTime, forecastJob.s_Messages); + sink.writeStats(processedModels / totalNumberOfForecastableModels, + elapsedTime, forecastJob.s_Messages); lastStatsUpdate = elapsedTime; } } @@ -196,11 +179,13 @@ void CForecastRunner::forecastWorker() { forecastJob.s_ForecastSeries.pop_back(); } // write final message - sink.writeStats(1.0, timer.stop(), messages, failedForecasts != forecastJob.s_NumberOfForecastableModels); + sink.writeStats(1.0, timer.stop(), messages, + failedForecasts != forecastJob.s_NumberOfForecastableModels); // important: reset the structure to decrease shared pointer reference counts forecastJob.reset(); - LOG_INFO(<< "Finished forecasting, wrote " << sink.numRecordsWritten() << " records"); + LOG_INFO(<< "Finished forecasting, wrote " + << sink.numRecordsWritten() << " records"); // signal that job is done m_WorkCompleteCondition.notify_all(); @@ -240,7 +225,8 @@ bool CForecastRunner::pushForecastJob(const std::string& controlMessage, const core_t::TTime lastResultsTime) { SForecast forecastJob; if (parseAndValidateForecastRequest( - controlMessage, forecastJob, lastResultsTime, boost::bind(&CForecastRunner::sendErrorMessage, this, _1, _2)) == false) { + controlMessage, forecastJob, lastResultsTime, + boost::bind(&CForecastRunner::sendErrorMessage, this, _1, _2)) == false) { return false; } @@ -262,12 +248,15 @@ bool CForecastRunner::pushForecastJob(const std::string& controlMessage, continue; } - model::CForecastDataSink::SForecastModelPrerequisites prerequisites = detector->getForecastPrerequisites(); + model::CForecastDataSink::SForecastModelPrerequisites prerequisites = + detector->getForecastPrerequisites(); totalNumberOfModels += prerequisites.s_NumberOfModels; totalNumberOfForecastModels += prerequisites.s_NumberOfForecastableModels; - atLeastOneNonPopulationModel = atLeastOneNonPopulationModel || !prerequisites.s_IsPopulation; - atLeastOneSupportedFunction = atLeastOneSupportedFunction || prerequisites.s_IsSupportedFunction; + atLeastOneNonPopulationModel = atLeastOneNonPopulationModel || + !prerequisites.s_IsPopulation; + atLeastOneSupportedFunction = atLeastOneSupportedFunction || + prerequisites.s_IsSupportedFunction; totalMemoryUsage += prerequisites.s_MemoryUsageForDetector; if (totalMemoryUsage >= MAX_FORECAST_MODEL_MEMORY) { @@ -347,7 +336,8 @@ bool CForecastRunner::parseAndValidateForecastRequest(const std::string& control boost::property_tree::read_json(stringStream, properties); forecastJob.s_ForecastId = properties.get("forecast_id", EMPTY_STRING); - forecastJob.s_ForecastAlias = properties.get("forecast_alias", EMPTY_STRING); + forecastJob.s_ForecastAlias = + properties.get("forecast_alias", EMPTY_STRING); forecastJob.s_Duration = properties.get("duration", 0); forecastJob.s_CreateTime = properties.get("create_time", 0); @@ -412,39 +402,34 @@ bool CForecastRunner::parseAndValidateForecastRequest(const std::string& control void CForecastRunner::sendScheduledMessage(const SForecast& forecastJob) const { LOG_DEBUG(<< "job passed forecast validation, scheduled for forecasting"); - model::CForecastDataSink sink(m_JobId, - forecastJob.s_ForecastId, - forecastJob.s_ForecastAlias, - forecastJob.s_CreateTime, - forecastJob.s_StartTime, - forecastJob.forecastEnd(), - forecastJob.s_ExpiryTime, - forecastJob.s_MemoryUsage, - m_ConcurrentOutputStream); + model::CForecastDataSink sink( + m_JobId, forecastJob.s_ForecastId, forecastJob.s_ForecastAlias, + forecastJob.s_CreateTime, forecastJob.s_StartTime, forecastJob.forecastEnd(), + forecastJob.s_ExpiryTime, forecastJob.s_MemoryUsage, m_ConcurrentOutputStream); sink.writeScheduledMessage(); } -void CForecastRunner::sendErrorMessage(const SForecast& forecastJob, const std::string& message) const { +void CForecastRunner::sendErrorMessage(const SForecast& forecastJob, + const std::string& message) const { LOG_ERROR(<< message); this->sendMessage(&model::CForecastDataSink::writeErrorMessage, forecastJob, message); } -void CForecastRunner::sendFinalMessage(const SForecast& forecastJob, const std::string& message) const { +void CForecastRunner::sendFinalMessage(const SForecast& forecastJob, + const std::string& message) const { this->sendMessage(&model::CForecastDataSink::writeFinalMessage, forecastJob, message); } template -void CForecastRunner::sendMessage(WRITE write, const SForecast& forecastJob, const std::string& message) const { - model::CForecastDataSink sink(m_JobId, - forecastJob.s_ForecastId, - forecastJob.s_ForecastAlias, - forecastJob.s_CreateTime, - forecastJob.s_StartTime, - forecastJob.forecastEnd(), - // in an error case use the default expiry time - forecastJob.s_CreateTime + DEFAULT_EXPIRY_TIME, - forecastJob.s_MemoryUsage, - m_ConcurrentOutputStream); +void CForecastRunner::sendMessage(WRITE write, + const SForecast& forecastJob, + const std::string& message) const { + model::CForecastDataSink sink( + m_JobId, forecastJob.s_ForecastId, forecastJob.s_ForecastAlias, + forecastJob.s_CreateTime, forecastJob.s_StartTime, forecastJob.forecastEnd(), + // in an error case use the default expiry time + forecastJob.s_CreateTime + DEFAULT_EXPIRY_TIME, + forecastJob.s_MemoryUsage, m_ConcurrentOutputStream); (sink.*write)(message); } diff --git a/lib/api/CHierarchicalResultsWriter.cc b/lib/api/CHierarchicalResultsWriter.cc index 6cf8d0283e..66d4e7ebad 100644 --- a/lib/api/CHierarchicalResultsWriter.cc +++ b/lib/api/CHierarchicalResultsWriter.cc @@ -24,128 +24,102 @@ const std::string EMPTY_STRING; const CHierarchicalResultsWriter::TStr1Vec EMPTY_STRING_LIST; } -CHierarchicalResultsWriter::SResults::SResults(bool isAllTimeResult, - bool isOverallResult, - const std::string& partitionFieldName, - const std::string& partitionFieldValue, - const std::string& overFieldName, - const std::string& overFieldValue, - const std::string& byFieldName, - const std::string& byFieldValue, - const std::string& correlatedByFieldValue, - core_t::TTime bucketStartTime, - const std::string& functionName, - const std::string& functionDescription, - const TDouble1Vec& functionValue, - const TDouble1Vec& populationAverage, - double rawAnomalyScore, - double normalizedAnomalyScore, - double probability, - const TOptionalUInt64& currentRate, - const std::string& metricValueField, - const TStoredStringPtrStoredStringPtrPrDoublePrVec& influences, - bool useNull, - bool metric, - int identifier, - core_t::TTime bucketSpan) - : s_ResultType(E_PopulationResult), - s_IsAllTimeResult(isAllTimeResult), - s_IsOverallResult(isOverallResult), - s_UseNull(useNull), - s_IsMetric(metric), - s_PartitionFieldName(partitionFieldName), - s_PartitionFieldValue(partitionFieldValue), - s_ByFieldName(byFieldName), - s_ByFieldValue(byFieldValue), - s_CorrelatedByFieldValue(correlatedByFieldValue), - s_OverFieldName(overFieldName), - s_OverFieldValue(overFieldValue), - s_MetricValueField(metricValueField), - s_BucketStartTime(bucketStartTime), - s_BucketSpan(bucketSpan), - s_FunctionName(functionName), +CHierarchicalResultsWriter::SResults::SResults( + bool isAllTimeResult, + bool isOverallResult, + const std::string& partitionFieldName, + const std::string& partitionFieldValue, + const std::string& overFieldName, + const std::string& overFieldValue, + const std::string& byFieldName, + const std::string& byFieldValue, + const std::string& correlatedByFieldValue, + core_t::TTime bucketStartTime, + const std::string& functionName, + const std::string& functionDescription, + const TDouble1Vec& functionValue, + const TDouble1Vec& populationAverage, + double rawAnomalyScore, + double normalizedAnomalyScore, + double probability, + const TOptionalUInt64& currentRate, + const std::string& metricValueField, + const TStoredStringPtrStoredStringPtrPrDoublePrVec& influences, + bool useNull, + bool metric, + int identifier, + core_t::TTime bucketSpan) + : s_ResultType(E_PopulationResult), s_IsAllTimeResult(isAllTimeResult), + s_IsOverallResult(isOverallResult), s_UseNull(useNull), + s_IsMetric(metric), s_PartitionFieldName(partitionFieldName), + s_PartitionFieldValue(partitionFieldValue), s_ByFieldName(byFieldName), + s_ByFieldValue(byFieldValue), s_CorrelatedByFieldValue(correlatedByFieldValue), + s_OverFieldName(overFieldName), s_OverFieldValue(overFieldValue), + s_MetricValueField(metricValueField), s_BucketStartTime(bucketStartTime), + s_BucketSpan(bucketSpan), s_FunctionName(functionName), s_FunctionDescription(functionDescription), - s_FunctionValue(functionValue), - s_PopulationAverage(populationAverage), - s_BaselineRate(0.0), - s_CurrentRate(currentRate), - s_BaselineMean(0.0), - s_CurrentMean(0.0), - s_RawAnomalyScore(rawAnomalyScore), - s_NormalizedAnomalyScore(normalizedAnomalyScore), - s_Probability(probability), - s_Influences(influences), - s_Identifier(identifier) { + s_FunctionValue(functionValue), s_PopulationAverage(populationAverage), + s_BaselineRate(0.0), s_CurrentRate(currentRate), s_BaselineMean(0.0), + s_CurrentMean(0.0), s_RawAnomalyScore(rawAnomalyScore), + s_NormalizedAnomalyScore(normalizedAnomalyScore), s_Probability(probability), + s_Influences(influences), s_Identifier(identifier) { } -CHierarchicalResultsWriter::SResults::SResults(EResultType resultType, - const std::string& partitionFieldName, - const std::string& partitionFieldValue, - const std::string& byFieldName, - const std::string& byFieldValue, - const std::string& correlatedByFieldValue, - core_t::TTime bucketStartTime, - const std::string& functionName, - const std::string& functionDescription, - const TOptionalDouble& baselineRate, - const TOptionalUInt64& currentRate, - const TDouble1Vec& baselineMean, - const TDouble1Vec& currentMean, - double rawAnomalyScore, - double normalizedAnomalyScore, - double probability, - const std::string& metricValueField, - const TStoredStringPtrStoredStringPtrPrDoublePrVec& influences, - bool useNull, - bool metric, - int identifier, - core_t::TTime bucketSpan, - TStr1Vec scheduledEventDescriptions) - : s_ResultType(resultType), - s_IsAllTimeResult(false), - s_IsOverallResult(true), - s_UseNull(useNull), - s_IsMetric(metric), - s_PartitionFieldName(partitionFieldName), - s_PartitionFieldValue(partitionFieldValue), - s_ByFieldName(byFieldName), - s_ByFieldValue(byFieldValue), - s_CorrelatedByFieldValue(correlatedByFieldValue), +CHierarchicalResultsWriter::SResults::SResults( + EResultType resultType, + const std::string& partitionFieldName, + const std::string& partitionFieldValue, + const std::string& byFieldName, + const std::string& byFieldValue, + const std::string& correlatedByFieldValue, + core_t::TTime bucketStartTime, + const std::string& functionName, + const std::string& functionDescription, + const TOptionalDouble& baselineRate, + const TOptionalUInt64& currentRate, + const TDouble1Vec& baselineMean, + const TDouble1Vec& currentMean, + double rawAnomalyScore, + double normalizedAnomalyScore, + double probability, + const std::string& metricValueField, + const TStoredStringPtrStoredStringPtrPrDoublePrVec& influences, + bool useNull, + bool metric, + int identifier, + core_t::TTime bucketSpan, + TStr1Vec scheduledEventDescriptions) + : s_ResultType(resultType), s_IsAllTimeResult(false), s_IsOverallResult(true), + s_UseNull(useNull), s_IsMetric(metric), s_PartitionFieldName(partitionFieldName), + s_PartitionFieldValue(partitionFieldValue), s_ByFieldName(byFieldName), + s_ByFieldValue(byFieldValue), s_CorrelatedByFieldValue(correlatedByFieldValue), // The simple count output is characterised by both 'by' and 'over' field names being 'count' // TODO: this could be done differently now, with changes to both this class and CJsonOutputWriter s_OverFieldName((byFieldName == COUNT_NAME) ? COUNT_NAME : EMPTY_STRING), - s_OverFieldValue(EMPTY_STRING), - s_MetricValueField(metricValueField), - s_BucketStartTime(bucketStartTime), - s_BucketSpan(bucketSpan), - s_FunctionName(functionName), - s_FunctionDescription(functionDescription), - s_FunctionValue(0.0), - s_PopulationAverage(0.0), - s_BaselineRate(baselineRate), - s_CurrentRate(currentRate), - s_BaselineMean(baselineMean), - s_CurrentMean(currentMean), - s_RawAnomalyScore(rawAnomalyScore), - s_NormalizedAnomalyScore(normalizedAnomalyScore), - s_Probability(probability), - s_Influences(influences), - s_Identifier(identifier), + s_OverFieldValue(EMPTY_STRING), s_MetricValueField(metricValueField), + s_BucketStartTime(bucketStartTime), s_BucketSpan(bucketSpan), + s_FunctionName(functionName), s_FunctionDescription(functionDescription), + s_FunctionValue(0.0), s_PopulationAverage(0.0), s_BaselineRate(baselineRate), + s_CurrentRate(currentRate), s_BaselineMean(baselineMean), + s_CurrentMean(currentMean), s_RawAnomalyScore(rawAnomalyScore), + s_NormalizedAnomalyScore(normalizedAnomalyScore), s_Probability(probability), + s_Influences(influences), s_Identifier(identifier), s_ScheduledEventDescriptions(scheduledEventDescriptions) { } -CHierarchicalResultsWriter::CHierarchicalResultsWriter(const model::CLimits& limits, - const model::CAnomalyDetectorModelConfig& modelConfig, - const TResultWriterFunc& resultWriterFunc, - const TPivotWriterFunc& pivotWriterFunc) - : m_Limits(limits), - m_ModelConfig(modelConfig), - m_ResultWriterFunc(resultWriterFunc), - m_PivotWriterFunc(pivotWriterFunc), +CHierarchicalResultsWriter::CHierarchicalResultsWriter( + const model::CLimits& limits, + const model::CAnomalyDetectorModelConfig& modelConfig, + const TResultWriterFunc& resultWriterFunc, + const TPivotWriterFunc& pivotWriterFunc) + : m_Limits(limits), m_ModelConfig(modelConfig), + m_ResultWriterFunc(resultWriterFunc), m_PivotWriterFunc(pivotWriterFunc), m_BucketTime(0) { } -void CHierarchicalResultsWriter::visit(const model::CHierarchicalResults& results, const TNode& node, bool pivot) { +void CHierarchicalResultsWriter::visit(const model::CHierarchicalResults& results, + const TNode& node, + bool pivot) { if (pivot) { this->writePivotResult(results, node); } else { @@ -156,7 +130,8 @@ void CHierarchicalResultsWriter::visit(const model::CHierarchicalResults& result } } -void CHierarchicalResultsWriter::writePopulationResult(const model::CHierarchicalResults& results, const TNode& node) { +void CHierarchicalResultsWriter::writePopulationResult(const model::CHierarchicalResults& results, + const TNode& node) { if (this->isSimpleCount(node) || !this->isLeaf(node) || !this->isPopulation(node) || !this->shouldWriteResult(m_Limits, results, node, false)) { return; @@ -171,11 +146,14 @@ void CHierarchicalResultsWriter::writePopulationResult(const model::CHierarchica const std::string& functionDescription = node.s_AnnotatedProbability.s_AttributeProbabilities.empty() ? EMPTY_STRING - : model_t::outputFunctionName(node.s_AnnotatedProbability.s_AttributeProbabilities[0].s_Feature); + : model_t::outputFunctionName( + node.s_AnnotatedProbability.s_AttributeProbabilities[0].s_Feature); TOptionalDouble null; - for (std::size_t i = 0; i < node.s_AnnotatedProbability.s_AttributeProbabilities.size(); ++i) { - const model::SAttributeProbability& attributeProbability = node.s_AnnotatedProbability.s_AttributeProbabilities[i]; + for (std::size_t i = 0; + i < node.s_AnnotatedProbability.s_AttributeProbabilities.size(); ++i) { + const model::SAttributeProbability& attributeProbability = + node.s_AnnotatedProbability.s_AttributeProbabilities[i]; // TODO - At present the display code can only cope with all the // attribute rows having the same output function name as the @@ -207,58 +185,39 @@ void CHierarchicalResultsWriter::writePopulationResult(const model::CHierarchica continue; } - m_ResultWriterFunc( - TResults(false, - false, // not an overall result - *node.s_Spec.s_PartitionFieldName, - *node.s_Spec.s_PartitionFieldValue, - *node.s_Spec.s_PersonFieldName, - *node.s_Spec.s_PersonFieldValue, - *node.s_Spec.s_ByFieldName, - attribute, // attribute field value - attributeProbability.s_CorrelatedAttributes.empty() ? EMPTY_STRING : *attributeProbability.s_CorrelatedAttributes[0], - node.s_BucketStartTime, - *node.s_Spec.s_FunctionName, - functionDescription, - personAttributeValue, - attributeMean, - node.s_RawAnomalyScore, - node.s_NormalizedAnomalyScore, - attributeProbability.s_Probability, - node.s_AnnotatedProbability.s_CurrentBucketCount, - *node.s_Spec.s_ValueFieldName, - node.s_AnnotatedProbability.s_Influences, - node.s_Spec.s_UseNull, - model::function_t::isMetric(node.s_Spec.s_Function), - node.s_Spec.s_Detector, - node.s_BucketLength)); + m_ResultWriterFunc(TResults( + false, + false, // not an overall result + *node.s_Spec.s_PartitionFieldName, + *node.s_Spec.s_PartitionFieldValue, *node.s_Spec.s_PersonFieldName, + *node.s_Spec.s_PersonFieldValue, *node.s_Spec.s_ByFieldName, + attribute, // attribute field value + attributeProbability.s_CorrelatedAttributes.empty() + ? EMPTY_STRING + : *attributeProbability.s_CorrelatedAttributes[0], + node.s_BucketStartTime, *node.s_Spec.s_FunctionName, functionDescription, + personAttributeValue, attributeMean, node.s_RawAnomalyScore, + node.s_NormalizedAnomalyScore, attributeProbability.s_Probability, + node.s_AnnotatedProbability.s_CurrentBucketCount, *node.s_Spec.s_ValueFieldName, + node.s_AnnotatedProbability.s_Influences, node.s_Spec.s_UseNull, + model::function_t::isMetric(node.s_Spec.s_Function), + node.s_Spec.s_Detector, node.s_BucketLength)); } // Overall result for this person - m_ResultWriterFunc(TResults(false, - true, // this is an overall result - *node.s_Spec.s_PartitionFieldName, - *node.s_Spec.s_PartitionFieldValue, - *node.s_Spec.s_PersonFieldName, - *node.s_Spec.s_PersonFieldValue, - *node.s_Spec.s_ByFieldName, - EMPTY_STRING, - EMPTY_STRING, - node.s_BucketStartTime, - *node.s_Spec.s_FunctionName, - functionDescription, - TDouble1Vec(1, 0.0), // no function value in overall result - TDouble1Vec(1, 0.0), // no population average in overall result - node.s_RawAnomalyScore, - node.s_NormalizedAnomalyScore, - node.probability(), - node.s_AnnotatedProbability.s_CurrentBucketCount, - *node.s_Spec.s_ValueFieldName, - node.s_AnnotatedProbability.s_Influences, - node.s_Spec.s_UseNull, - model::function_t::isMetric(node.s_Spec.s_Function), - node.s_Spec.s_Detector, - node.s_BucketLength)); + m_ResultWriterFunc(TResults( + false, + true, // this is an overall result + *node.s_Spec.s_PartitionFieldName, *node.s_Spec.s_PartitionFieldValue, + *node.s_Spec.s_PersonFieldName, *node.s_Spec.s_PersonFieldValue, + *node.s_Spec.s_ByFieldName, EMPTY_STRING, EMPTY_STRING, node.s_BucketStartTime, + *node.s_Spec.s_FunctionName, functionDescription, TDouble1Vec(1, 0.0), // no function value in overall result + TDouble1Vec(1, 0.0), // no population average in overall result + node.s_RawAnomalyScore, node.s_NormalizedAnomalyScore, + node.probability(), node.s_AnnotatedProbability.s_CurrentBucketCount, + *node.s_Spec.s_ValueFieldName, node.s_AnnotatedProbability.s_Influences, + node.s_Spec.s_UseNull, model::function_t::isMetric(node.s_Spec.s_Function), + node.s_Spec.s_Detector, node.s_BucketLength)); // TODO - could also output "all time" results here // These would have the first argument to the SResults constructor @@ -267,83 +226,69 @@ void CHierarchicalResultsWriter::writePopulationResult(const model::CHierarchica // results) } -void CHierarchicalResultsWriter::writeIndividualResult(const model::CHierarchicalResults& results, const TNode& node) { +void CHierarchicalResultsWriter::writeIndividualResult(const model::CHierarchicalResults& results, + const TNode& node) { if (this->isSimpleCount(node) || !this->isLeaf(node) || this->isPopulation(node) || !this->shouldWriteResult(m_Limits, results, node, false)) { return; } - model_t::EFeature feature = node.s_AnnotatedProbability.s_AttributeProbabilities.empty() - ? model_t::E_IndividualCountByBucketAndPerson - : node.s_AnnotatedProbability.s_AttributeProbabilities[0].s_Feature; - - const model::SAttributeProbability& attributeProbability = node.s_AnnotatedProbability.s_AttributeProbabilities[0]; - - m_ResultWriterFunc( - TResults(E_Result, - *node.s_Spec.s_PartitionFieldName, - *node.s_Spec.s_PartitionFieldValue, - *node.s_Spec.s_ByFieldName, - *node.s_Spec.s_PersonFieldValue, - attributeProbability.s_CorrelatedAttributes.empty() ? EMPTY_STRING : *attributeProbability.s_CorrelatedAttributes[0], - node.s_BucketStartTime, - *node.s_Spec.s_FunctionName, - model_t::outputFunctionName(feature), - node.s_AnnotatedProbability.s_BaselineBucketCount, - node.s_AnnotatedProbability.s_CurrentBucketCount, - attributeProbability.s_BaselineBucketMean, - attributeProbability.s_CurrentBucketValue, - node.s_RawAnomalyScore, - node.s_NormalizedAnomalyScore, - node.probability(), - *node.s_Spec.s_ValueFieldName, - node.s_AnnotatedProbability.s_Influences, - node.s_Spec.s_UseNull, - model::function_t::isMetric(node.s_Spec.s_Function), - node.s_Spec.s_Detector, - node.s_BucketLength, - EMPTY_STRING_LIST)); + model_t::EFeature feature = + node.s_AnnotatedProbability.s_AttributeProbabilities.empty() + ? model_t::E_IndividualCountByBucketAndPerson + : node.s_AnnotatedProbability.s_AttributeProbabilities[0].s_Feature; + + const model::SAttributeProbability& attributeProbability = + node.s_AnnotatedProbability.s_AttributeProbabilities[0]; + + m_ResultWriterFunc(TResults( + E_Result, *node.s_Spec.s_PartitionFieldName, *node.s_Spec.s_PartitionFieldValue, + *node.s_Spec.s_ByFieldName, *node.s_Spec.s_PersonFieldValue, + attributeProbability.s_CorrelatedAttributes.empty() + ? EMPTY_STRING + : *attributeProbability.s_CorrelatedAttributes[0], + node.s_BucketStartTime, *node.s_Spec.s_FunctionName, + model_t::outputFunctionName(feature), node.s_AnnotatedProbability.s_BaselineBucketCount, + node.s_AnnotatedProbability.s_CurrentBucketCount, + attributeProbability.s_BaselineBucketMean, attributeProbability.s_CurrentBucketValue, + node.s_RawAnomalyScore, node.s_NormalizedAnomalyScore, node.probability(), + *node.s_Spec.s_ValueFieldName, node.s_AnnotatedProbability.s_Influences, + node.s_Spec.s_UseNull, model::function_t::isMetric(node.s_Spec.s_Function), + node.s_Spec.s_Detector, node.s_BucketLength, EMPTY_STRING_LIST)); } -void CHierarchicalResultsWriter::writePartitionResult(const model::CHierarchicalResults& results, const TNode& node) { - if (!m_ModelConfig.perPartitionNormalization() || this->isSimpleCount(node) || this->isPopulation(node) || !this->isPartition(node) || +void CHierarchicalResultsWriter::writePartitionResult(const model::CHierarchicalResults& results, + const TNode& node) { + if (!m_ModelConfig.perPartitionNormalization() || this->isSimpleCount(node) || + this->isPopulation(node) || !this->isPartition(node) || !this->shouldWriteResult(m_Limits, results, node, false)) { return; } - model_t::EFeature feature = node.s_AnnotatedProbability.s_AttributeProbabilities.empty() - ? model_t::E_IndividualCountByBucketAndPerson - : node.s_AnnotatedProbability.s_AttributeProbabilities[0].s_Feature; + model_t::EFeature feature = + node.s_AnnotatedProbability.s_AttributeProbabilities.empty() + ? model_t::E_IndividualCountByBucketAndPerson + : node.s_AnnotatedProbability.s_AttributeProbabilities[0].s_Feature; TDouble1Vec emptyDoubleVec; - m_ResultWriterFunc(TResults(E_PartitionResult, - *node.s_Spec.s_PartitionFieldName, - *node.s_Spec.s_PartitionFieldValue, - *node.s_Spec.s_ByFieldName, - *node.s_Spec.s_PersonFieldValue, - EMPTY_STRING, - node.s_BucketStartTime, - *node.s_Spec.s_FunctionName, - model_t::outputFunctionName(feature), - node.s_AnnotatedProbability.s_BaselineBucketCount, - node.s_AnnotatedProbability.s_CurrentBucketCount, - emptyDoubleVec, - emptyDoubleVec, - node.s_RawAnomalyScore, - node.s_NormalizedAnomalyScore, - node.probability(), - *node.s_Spec.s_ValueFieldName, - node.s_AnnotatedProbability.s_Influences, - node.s_Spec.s_UseNull, - model::function_t::isMetric(node.s_Spec.s_Function), - node.s_Spec.s_Detector, - node.s_BucketLength, - EMPTY_STRING_LIST)); + m_ResultWriterFunc(TResults( + E_PartitionResult, *node.s_Spec.s_PartitionFieldName, + *node.s_Spec.s_PartitionFieldValue, *node.s_Spec.s_ByFieldName, + *node.s_Spec.s_PersonFieldValue, EMPTY_STRING, node.s_BucketStartTime, + *node.s_Spec.s_FunctionName, model_t::outputFunctionName(feature), + node.s_AnnotatedProbability.s_BaselineBucketCount, + node.s_AnnotatedProbability.s_CurrentBucketCount, emptyDoubleVec, emptyDoubleVec, + node.s_RawAnomalyScore, node.s_NormalizedAnomalyScore, node.probability(), + *node.s_Spec.s_ValueFieldName, node.s_AnnotatedProbability.s_Influences, + node.s_Spec.s_UseNull, model::function_t::isMetric(node.s_Spec.s_Function), + node.s_Spec.s_Detector, node.s_BucketLength, EMPTY_STRING_LIST)); } -void CHierarchicalResultsWriter::writePivotResult(const model::CHierarchicalResults& results, const TNode& node) { - if (this->isSimpleCount(node) || !this->shouldWriteResult(m_Limits, results, node, true)) { +void CHierarchicalResultsWriter::writePivotResult(const model::CHierarchicalResults& results, + const TNode& node) { + if (this->isSimpleCount(node) || + !this->shouldWriteResult(m_Limits, results, node, true)) { return; } @@ -364,32 +309,21 @@ void CHierarchicalResultsWriter::writeSimpleCountResult(const TNode& node) { TOptionalDouble baselineCount = node.s_AnnotatedProbability.s_BaselineBucketCount; TOptionalUInt64 currentCount = node.s_AnnotatedProbability.s_CurrentBucketCount; - m_ResultWriterFunc(TResults(E_SimpleCountResult, - *node.s_Spec.s_PartitionFieldName, - *node.s_Spec.s_PartitionFieldValue, - *node.s_Spec.s_ByFieldName, - *node.s_Spec.s_PersonFieldValue, - EMPTY_STRING, - m_BucketTime, - EMPTY_STRING, - EMPTY_STRING, - baselineCount, - currentCount, - baselineCount ? TDouble1Vec(1, *baselineCount) : TDouble1Vec(), - currentCount ? TDouble1Vec(1, static_cast(*currentCount)) : TDouble1Vec(), - node.s_RawAnomalyScore, - node.s_NormalizedAnomalyScore, - node.probability(), - *node.s_Spec.s_ValueFieldName, - node.s_AnnotatedProbability.s_Influences, - node.s_Spec.s_UseNull, - model::function_t::isMetric(node.s_Spec.s_Function), - node.s_Spec.s_Detector, - node.s_BucketLength, - node.s_Spec.s_ScheduledEventDescriptions)); + m_ResultWriterFunc(TResults( + E_SimpleCountResult, *node.s_Spec.s_PartitionFieldName, *node.s_Spec.s_PartitionFieldValue, + *node.s_Spec.s_ByFieldName, *node.s_Spec.s_PersonFieldValue, EMPTY_STRING, + m_BucketTime, EMPTY_STRING, EMPTY_STRING, baselineCount, currentCount, + baselineCount ? TDouble1Vec(1, *baselineCount) : TDouble1Vec(), + currentCount ? TDouble1Vec(1, static_cast(*currentCount)) : TDouble1Vec(), + node.s_RawAnomalyScore, node.s_NormalizedAnomalyScore, node.probability(), + *node.s_Spec.s_ValueFieldName, node.s_AnnotatedProbability.s_Influences, + node.s_Spec.s_UseNull, model::function_t::isMetric(node.s_Spec.s_Function), + node.s_Spec.s_Detector, node.s_BucketLength, node.s_Spec.s_ScheduledEventDescriptions)); } -void CHierarchicalResultsWriter::findParentProbabilities(const TNode& node, double& personProbability, double& partitionProbability) { +void CHierarchicalResultsWriter::findParentProbabilities(const TNode& node, + double& personProbability, + double& partitionProbability) { // The idea is that if person doesn't exist then the person probability is // set to the leaf probability, and if partition doesn't exist then the // partition probability is set to the person probability (or if person diff --git a/lib/api/CIoManager.cc b/lib/api/CIoManager.cc index a7377b50f0..62d4cb376d 100644 --- a/lib/api/CIoManager.cc +++ b/lib/api/CIoManager.cc @@ -16,7 +16,9 @@ namespace api { namespace { -bool setUpIStream(const std::string& fileName, bool isFileNamedPipe, core::CNamedPipeFactory::TIStreamP& stream) { +bool setUpIStream(const std::string& fileName, + bool isFileNamedPipe, + core::CNamedPipeFactory::TIStreamP& stream) { if (fileName.empty()) { stream.reset(); return true; @@ -30,7 +32,9 @@ bool setUpIStream(const std::string& fileName, bool isFileNamedPipe, core::CName return fileStream->is_open(); } -bool setUpOStream(const std::string& fileName, bool isFileNamedPipe, core::CNamedPipeFactory::TOStreamP& stream) { +bool setUpOStream(const std::string& fileName, + bool isFileNamedPipe, + core::CNamedPipeFactory::TOStreamP& stream) { if (fileName.empty()) { stream.reset(); return true; @@ -53,8 +57,7 @@ CIoManager::CIoManager(const std::string& inputFileName, bool isRestoreFileNamedPipe, const std::string& persistFileName, bool isPersistFileNamedPipe) - : m_IoInitialised(false), - m_InputFileName(inputFileName), + : m_IoInitialised(false), m_InputFileName(inputFileName), m_IsInputFileNamedPipe(isInputFileNamedPipe && !inputFileName.empty()), m_OutputFileName(outputFileName), m_IsOutputFileNamedPipe(isOutputFileNamedPipe && !outputFileName.empty()), @@ -80,10 +83,11 @@ CIoManager::~CIoManager() { } bool CIoManager::initIo() { - m_IoInitialised = setUpIStream(m_InputFileName, m_IsInputFileNamedPipe, m_InputStream) && - setUpOStream(m_OutputFileName, m_IsOutputFileNamedPipe, m_OutputStream) && - setUpIStream(m_RestoreFileName, m_IsRestoreFileNamedPipe, m_RestoreStream) && - setUpOStream(m_PersistFileName, m_IsPersistFileNamedPipe, m_PersistStream); + m_IoInitialised = + setUpIStream(m_InputFileName, m_IsInputFileNamedPipe, m_InputStream) && + setUpOStream(m_OutputFileName, m_IsOutputFileNamedPipe, m_OutputStream) && + setUpIStream(m_RestoreFileName, m_IsRestoreFileNamedPipe, m_RestoreStream) && + setUpOStream(m_PersistFileName, m_IsPersistFileNamedPipe, m_PersistStream); return m_IoInitialised; } diff --git a/lib/api/CJsonOutputWriter.cc b/lib/api/CJsonOutputWriter.cc index 70d9819633..0acdd8e92c 100644 --- a/lib/api/CJsonOutputWriter.cc +++ b/lib/api/CJsonOutputWriter.cc @@ -78,7 +78,8 @@ const std::string QUANTILES("quantiles"); //! Assumes the document contains the field. //! The caller is responsible for ensuring this, and a //! program crash is likely if this requirement is not met. -double doubleFromDocument(const CJsonOutputWriter::TDocumentWeakPtr& weakDoc, const std::string& field) { +double doubleFromDocument(const CJsonOutputWriter::TDocumentWeakPtr& weakDoc, + const std::string& field) { CJsonOutputWriter::TDocumentPtr docPtr = weakDoc.lock(); if (!docPtr) { LOG_ERROR(<< "Inconsistent program state. JSON document unavailable."); @@ -90,8 +91,10 @@ double doubleFromDocument(const CJsonOutputWriter::TDocumentWeakPtr& weakDoc, co //! Sort rapidjson documents by the probability lowest to highest class CProbabilityLess { public: - bool operator()(const CJsonOutputWriter::TDocumentWeakPtrIntPr& lhs, const CJsonOutputWriter::TDocumentWeakPtrIntPr& rhs) const { - return doubleFromDocument(lhs.first, PROBABILITY) < doubleFromDocument(rhs.first, PROBABILITY); + bool operator()(const CJsonOutputWriter::TDocumentWeakPtrIntPr& lhs, + const CJsonOutputWriter::TDocumentWeakPtrIntPr& rhs) const { + return doubleFromDocument(lhs.first, PROBABILITY) < + doubleFromDocument(rhs.first, PROBABILITY); } }; @@ -100,9 +103,11 @@ const CProbabilityLess PROBABILITY_LESS = CProbabilityLess(); //! Sort rapidjson documents by detector name first then probability lowest to highest class CDetectorThenProbabilityLess { public: - bool operator()(const CJsonOutputWriter::TDocumentWeakPtrIntPr& lhs, const CJsonOutputWriter::TDocumentWeakPtrIntPr& rhs) const { + bool operator()(const CJsonOutputWriter::TDocumentWeakPtrIntPr& lhs, + const CJsonOutputWriter::TDocumentWeakPtrIntPr& rhs) const { if (lhs.second == rhs.second) { - return doubleFromDocument(lhs.first, PROBABILITY) < doubleFromDocument(rhs.first, PROBABILITY); + return doubleFromDocument(lhs.first, PROBABILITY) < + doubleFromDocument(rhs.first, PROBABILITY); } return lhs.second < rhs.second; } @@ -113,7 +118,8 @@ const CDetectorThenProbabilityLess DETECTOR_PROBABILITY_LESS = CDetectorThenProb //! Sort influences from highes to lowest class CInfluencesLess { public: - bool operator()(const std::pair& lhs, const std::pair& rhs) const { + bool operator()(const std::pair& lhs, + const std::pair& rhs) const { return lhs.second > rhs.second; } }; @@ -125,7 +131,8 @@ class CInfluencerGreater { public: CInfluencerGreater(const std::string& field) : m_Field(field) {} - bool operator()(const CJsonOutputWriter::TDocumentWeakPtr& lhs, const CJsonOutputWriter::TDocumentWeakPtr& rhs) const { + bool operator()(const CJsonOutputWriter::TDocumentWeakPtr& lhs, + const CJsonOutputWriter::TDocumentWeakPtr& rhs) const { return doubleFromDocument(lhs, m_Field) > doubleFromDocument(rhs, m_Field); } @@ -137,8 +144,10 @@ const CInfluencerGreater INFLUENCER_GREATER = CInfluencerGreater(INITIAL_INFLUEN const CInfluencerGreater BUCKET_INFLUENCER_GREATER = CInfluencerGreater(INITIAL_SCORE); } -CJsonOutputWriter::CJsonOutputWriter(const std::string& jobId, core::CJsonOutputStreamWrapper& strmOut) - : m_JobId(jobId), m_Writer(strmOut), m_LastNonInterimBucketTime(0), m_Finalised(false), m_RecordOutputLimit(0) { +CJsonOutputWriter::CJsonOutputWriter(const std::string& jobId, + core::CJsonOutputStreamWrapper& strmOut) + : m_JobId(jobId), m_Writer(strmOut), m_LastNonInterimBucketTime(0), + m_Finalised(false), m_RecordOutputLimit(0) { // Don't write any output in the constructor because, the way things work at // the moment, the output stream might be redirected after construction } @@ -214,7 +223,8 @@ bool CJsonOutputWriter::acceptResult(const CHierarchicalResultsWriter::TResults& newDoc = m_Writer.makeStorableDoc(); // remove the highest prob doc and insert new one - std::pop_heap(detectorDocumentsToWrite.begin(), detectorDocumentsToWrite.end(), PROBABILITY_LESS); + std::pop_heap(detectorDocumentsToWrite.begin(), + detectorDocumentsToWrite.end(), PROBABILITY_LESS); detectorDocumentsToWrite.pop_back(); detectorDocumentsToWrite.push_back(TDocumentWeakPtrIntPr(newDoc, results.s_Identifier)); @@ -235,24 +245,32 @@ bool CJsonOutputWriter::acceptResult(const CHierarchicalResultsWriter::TResults& this->addInfluences(results.s_Influences, newDoc); if (makeHeap) { - std::make_heap(detectorDocumentsToWrite.begin(), detectorDocumentsToWrite.end(), PROBABILITY_LESS); + std::make_heap(detectorDocumentsToWrite.begin(), + detectorDocumentsToWrite.end(), PROBABILITY_LESS); - bucketData.s_HighestProbability = doubleFromDocument(detectorDocumentsToWrite.front().first, PROBABILITY); + bucketData.s_HighestProbability = + doubleFromDocument(detectorDocumentsToWrite.front().first, PROBABILITY); makeHeap = false; } return true; } -bool CJsonOutputWriter::acceptInfluencer(core_t::TTime time, const model::CHierarchicalResults::TNode& node, bool isBucketInfluencer) { +bool CJsonOutputWriter::acceptInfluencer(core_t::TTime time, + const model::CHierarchicalResults::TNode& node, + bool isBucketInfluencer) { TDocumentWeakPtr newDoc = m_Writer.makeStorableDoc(); SBucketData& bucketData = m_BucketDataByTime[time]; - TDocumentWeakPtrVec& documents = (isBucketInfluencer) ? bucketData.s_BucketInfluencerDocuments : bucketData.s_InfluencerDocuments; + TDocumentWeakPtrVec& documents = (isBucketInfluencer) + ? bucketData.s_BucketInfluencerDocuments + : bucketData.s_InfluencerDocuments; bool isLimitedWrite(m_RecordOutputLimit > 0); if (isLimitedWrite && documents.size() == m_RecordOutputLimit) { - double& lowestScore = (isBucketInfluencer) ? bucketData.s_LowestBucketInfluencerScore : bucketData.s_LowestInfluencerScore; + double& lowestScore = (isBucketInfluencer) + ? bucketData.s_LowestBucketInfluencerScore + : bucketData.s_LowestInfluencerScore; if (node.s_NormalizedAnomalyScore < lowestScore) { // Don't write this influencer @@ -269,18 +287,22 @@ bool CJsonOutputWriter::acceptInfluencer(core_t::TTime time, const model::CHiera bool sortVectorAfterWritingDoc = isLimitedWrite && documents.size() >= m_RecordOutputLimit; if (sortVectorAfterWritingDoc) { - std::sort(documents.begin(), documents.end(), isBucketInfluencer ? BUCKET_INFLUENCER_GREATER : INFLUENCER_GREATER); + std::sort(documents.begin(), documents.end(), + isBucketInfluencer ? BUCKET_INFLUENCER_GREATER : INFLUENCER_GREATER); } if (isBucketInfluencer) { bucketData.s_MaxBucketInfluencerNormalizedAnomalyScore = - std::max(bucketData.s_MaxBucketInfluencerNormalizedAnomalyScore, node.s_NormalizedAnomalyScore); + std::max(bucketData.s_MaxBucketInfluencerNormalizedAnomalyScore, + node.s_NormalizedAnomalyScore); bucketData.s_LowestBucketInfluencerScore = - std::min(bucketData.s_LowestBucketInfluencerScore, doubleFromDocument(documents.back(), INITIAL_SCORE)); + std::min(bucketData.s_LowestBucketInfluencerScore, + doubleFromDocument(documents.back(), INITIAL_SCORE)); } else { bucketData.s_LowestInfluencerScore = - std::min(bucketData.s_LowestInfluencerScore, doubleFromDocument(documents.back(), INITIAL_INFLUENCER_SCORE)); + std::min(bucketData.s_LowestInfluencerScore, + doubleFromDocument(documents.back(), INITIAL_INFLUENCER_SCORE)); } return true; @@ -307,13 +329,14 @@ void CJsonOutputWriter::acceptBucketTimeInfluencer(core_t::TTime time, m_Writer.addDoubleFieldToObj(INITIAL_SCORE, normalizedAnomalyScore, *newDoc); m_Writer.addDoubleFieldToObj(ANOMALY_SCORE, normalizedAnomalyScore, *newDoc); - bucketData.s_MaxBucketInfluencerNormalizedAnomalyScore = - std::max(bucketData.s_MaxBucketInfluencerNormalizedAnomalyScore, normalizedAnomalyScore); + bucketData.s_MaxBucketInfluencerNormalizedAnomalyScore = std::max( + bucketData.s_MaxBucketInfluencerNormalizedAnomalyScore, normalizedAnomalyScore); bucketData.s_BucketInfluencerDocuments.push_back(doc); } bool CJsonOutputWriter::endOutputBatch(bool isInterim, uint64_t bucketProcessingTime) { - for (TTimeBucketDataMapItr iter = m_BucketDataByTime.begin(); iter != m_BucketDataByTime.end(); ++iter) { + for (TTimeBucketDataMapItr iter = m_BucketDataByTime.begin(); + iter != m_BucketDataByTime.end(); ++iter) { this->writeBucket(isInterim, iter->first, iter->second, bucketProcessingTime); if (!isInterim) { m_LastNonInterimBucketTime = iter->first; @@ -328,7 +351,8 @@ bool CJsonOutputWriter::endOutputBatch(bool isInterim, uint64_t bucketProcessing return true; } -bool CJsonOutputWriter::fieldNames(const TStrVec& /*fieldNames*/, const TStrVec& /*extraFieldNames*/) { +bool CJsonOutputWriter::fieldNames(const TStrVec& /*fieldNames*/, + const TStrVec& /*extraFieldNames*/) { return true; } @@ -336,8 +360,10 @@ const CJsonOutputWriter::TStrVec& CJsonOutputWriter::fieldNames() const { return EMPTY_FIELD_NAMES; } -bool CJsonOutputWriter::writeRow(const TStrStrUMap& dataRowFields, const TStrStrUMap& overrideDataRowFields) { - using TScopedAllocator = core::CScopedRapidJsonPoolAllocator; +bool CJsonOutputWriter::writeRow(const TStrStrUMap& dataRowFields, + const TStrStrUMap& overrideDataRowFields) { + using TScopedAllocator = + core::CScopedRapidJsonPoolAllocator; TScopedAllocator scopedAllocator("CJsonOutputWriter::writeRow", m_Writer); @@ -345,7 +371,8 @@ bool CJsonOutputWriter::writeRow(const TStrStrUMap& dataRowFields, const TStrStr // Write all the fields to the document as strings // No need to copy the strings as the doc is written straight away - for (TStrStrUMapCItr fieldValueIter = dataRowFields.begin(); fieldValueIter != dataRowFields.end(); ++fieldValueIter) { + for (TStrStrUMapCItr fieldValueIter = dataRowFields.begin(); + fieldValueIter != dataRowFields.end(); ++fieldValueIter) { const std::string& name = fieldValueIter->first; const std::string& value = fieldValueIter->second; @@ -355,7 +382,8 @@ bool CJsonOutputWriter::writeRow(const TStrStrUMap& dataRowFields, const TStrStr } } - for (TStrStrUMapCItr fieldValueIter = overrideDataRowFields.begin(); fieldValueIter != overrideDataRowFields.end(); ++fieldValueIter) { + for (TStrStrUMapCItr fieldValueIter = overrideDataRowFields.begin(); + fieldValueIter != overrideDataRowFields.end(); ++fieldValueIter) { const std::string& name = fieldValueIter->first; const std::string& value = fieldValueIter->second; @@ -367,21 +395,25 @@ bool CJsonOutputWriter::writeRow(const TStrStrUMap& dataRowFields, const TStrStr return true; } -void CJsonOutputWriter::writeBucket(bool isInterim, core_t::TTime bucketTime, SBucketData& bucketData, uint64_t bucketProcessingTime) { +void CJsonOutputWriter::writeBucket(bool isInterim, + core_t::TTime bucketTime, + SBucketData& bucketData, + uint64_t bucketProcessingTime) { // Write records if (!bucketData.s_DocumentsToWrite.empty()) { // Sort the results so they are grouped by detector and // ordered by probability - std::sort(bucketData.s_DocumentsToWrite.begin(), bucketData.s_DocumentsToWrite.end(), DETECTOR_PROBABILITY_LESS); + std::sort(bucketData.s_DocumentsToWrite.begin(), + bucketData.s_DocumentsToWrite.end(), DETECTOR_PROBABILITY_LESS); m_Writer.StartObject(); m_Writer.String(RECORDS); m_Writer.StartArray(); // Iterate over the different detectors that we have results for - for (TDocumentWeakPtrIntPrVecItr detectorIter = bucketData.s_DocumentsToWrite.begin(); - detectorIter != bucketData.s_DocumentsToWrite.end(); - ++detectorIter) { + for (TDocumentWeakPtrIntPrVecItr detectorIter = + bucketData.s_DocumentsToWrite.begin(); + detectorIter != bucketData.s_DocumentsToWrite.end(); ++detectorIter) { // Write the document, adding some extra fields as we go int detectorIndex = detectorIter->second; TDocumentWeakPtr weakDoc = detectorIter->first; @@ -410,9 +442,9 @@ void CJsonOutputWriter::writeBucket(bool isInterim, core_t::TTime bucketTime, SB m_Writer.StartObject(); m_Writer.String(INFLUENCERS); m_Writer.StartArray(); - for (TDocumentWeakPtrVecItr influencerIter = bucketData.s_InfluencerDocuments.begin(); - influencerIter != bucketData.s_InfluencerDocuments.end(); - ++influencerIter) { + for (TDocumentWeakPtrVecItr influencerIter = + bucketData.s_InfluencerDocuments.begin(); + influencerIter != bucketData.s_InfluencerDocuments.end(); ++influencerIter) { TDocumentWeakPtr weakDoc = *influencerIter; TDocumentPtr docPtr = weakDoc.lock(); if (!docPtr) { @@ -459,7 +491,8 @@ void CJsonOutputWriter::writeBucket(bool isInterim, core_t::TTime bucketTime, SB // Write the array of influencers m_Writer.String(BUCKET_INFLUENCERS); m_Writer.StartArray(); - for (TDocumentWeakPtrVecItr influencerIter = bucketData.s_BucketInfluencerDocuments.begin(); + for (TDocumentWeakPtrVecItr influencerIter = + bucketData.s_BucketInfluencerDocuments.begin(); influencerIter != bucketData.s_BucketInfluencerDocuments.end(); ++influencerIter) { TDocumentWeakPtr weakDoc = *influencerIter; @@ -484,7 +517,8 @@ void CJsonOutputWriter::writeBucket(bool isInterim, core_t::TTime bucketTime, SB // Write the array of partition-anonaly score pairs m_Writer.String(PARTITION_SCORES); m_Writer.StartArray(); - for (TDocumentWeakPtrVecItr partitionScoresIter = bucketData.s_PartitionScoreDocuments.begin(); + for (TDocumentWeakPtrVecItr partitionScoresIter = + bucketData.s_PartitionScoreDocuments.begin(); partitionScoresIter != bucketData.s_PartitionScoreDocuments.end(); ++partitionScoresIter) { TDocumentWeakPtr weakDoc = *partitionScoresIter; @@ -515,7 +549,8 @@ void CJsonOutputWriter::writeBucket(bool isInterim, core_t::TTime bucketTime, SB m_Writer.EndObject(); } -void CJsonOutputWriter::addMetricFields(const CHierarchicalResultsWriter::TResults& results, TDocumentWeakPtr weakDoc) { +void CJsonOutputWriter::addMetricFields(const CHierarchicalResultsWriter::TResults& results, + TDocumentWeakPtr weakDoc) { TDocumentPtr docPtr = weakDoc.lock(); if (!docPtr) { LOG_ERROR(<< "Inconsistent program state. JSON document unavailable."); @@ -524,29 +559,36 @@ void CJsonOutputWriter::addMetricFields(const CHierarchicalResultsWriter::TResul // record_score, probability, fieldName, byFieldName, byFieldValue, partitionFieldName, // partitionFieldValue, function, typical, actual. influences? - m_Writer.addDoubleFieldToObj(INITIAL_RECORD_SCORE, results.s_NormalizedAnomalyScore, *docPtr); + m_Writer.addDoubleFieldToObj(INITIAL_RECORD_SCORE, + results.s_NormalizedAnomalyScore, *docPtr); m_Writer.addDoubleFieldToObj(RECORD_SCORE, results.s_NormalizedAnomalyScore, *docPtr); m_Writer.addDoubleFieldToObj(PROBABILITY, results.s_Probability, *docPtr); m_Writer.addStringFieldCopyToObj(FIELD_NAME, results.s_MetricValueField, *docPtr); if (!results.s_ByFieldName.empty()) { m_Writer.addStringFieldCopyToObj(BY_FIELD_NAME, results.s_ByFieldName, *docPtr); // If name is present then force output of value too, even when empty - m_Writer.addStringFieldCopyToObj(BY_FIELD_VALUE, results.s_ByFieldValue, *docPtr, true); + m_Writer.addStringFieldCopyToObj(BY_FIELD_VALUE, results.s_ByFieldValue, + *docPtr, true); // But allow correlatedByFieldValue to be unset if blank - m_Writer.addStringFieldCopyToObj(CORRELATED_BY_FIELD_VALUE, results.s_CorrelatedByFieldValue, *docPtr); + m_Writer.addStringFieldCopyToObj(CORRELATED_BY_FIELD_VALUE, + results.s_CorrelatedByFieldValue, *docPtr); } if (!results.s_PartitionFieldName.empty()) { - m_Writer.addStringFieldCopyToObj(PARTITION_FIELD_NAME, results.s_PartitionFieldName, *docPtr); + m_Writer.addStringFieldCopyToObj(PARTITION_FIELD_NAME, + results.s_PartitionFieldName, *docPtr); // If name is present then force output of value too, even when empty - m_Writer.addStringFieldCopyToObj(PARTITION_FIELD_VALUE, results.s_PartitionFieldValue, *docPtr, true); + m_Writer.addStringFieldCopyToObj( + PARTITION_FIELD_VALUE, results.s_PartitionFieldValue, *docPtr, true); } m_Writer.addStringFieldCopyToObj(FUNCTION, results.s_FunctionName, *docPtr); - m_Writer.addStringFieldCopyToObj(FUNCTION_DESCRIPTION, results.s_FunctionDescription, *docPtr); + m_Writer.addStringFieldCopyToObj(FUNCTION_DESCRIPTION, + results.s_FunctionDescription, *docPtr); m_Writer.addDoubleArrayFieldToObj(TYPICAL, results.s_BaselineMean, *docPtr); m_Writer.addDoubleArrayFieldToObj(ACTUAL, results.s_CurrentMean, *docPtr); } -void CJsonOutputWriter::addPopulationFields(const CHierarchicalResultsWriter::TResults& results, TDocumentWeakPtr weakDoc) { +void CJsonOutputWriter::addPopulationFields(const CHierarchicalResultsWriter::TResults& results, + TDocumentWeakPtr weakDoc) { TDocumentPtr docPtr = weakDoc.lock(); if (!docPtr) { LOG_ERROR(<< "Inconsistent program state. JSON document unavailable."); @@ -556,7 +598,8 @@ void CJsonOutputWriter::addPopulationFields(const CHierarchicalResultsWriter::TR // record_score, probability, fieldName, byFieldName, // overFieldName, overFieldValue, partitionFieldName, partitionFieldValue, // function, causes, influences? - m_Writer.addDoubleFieldToObj(INITIAL_RECORD_SCORE, results.s_NormalizedAnomalyScore, *docPtr); + m_Writer.addDoubleFieldToObj(INITIAL_RECORD_SCORE, + results.s_NormalizedAnomalyScore, *docPtr); m_Writer.addDoubleFieldToObj(RECORD_SCORE, results.s_NormalizedAnomalyScore, *docPtr); m_Writer.addDoubleFieldToObj(PROBABILITY, results.s_Probability, *docPtr); m_Writer.addStringFieldCopyToObj(FIELD_NAME, results.s_MetricValueField, *docPtr); @@ -566,15 +609,19 @@ void CJsonOutputWriter::addPopulationFields(const CHierarchicalResultsWriter::TR if (!results.s_OverFieldName.empty()) { m_Writer.addStringFieldCopyToObj(OVER_FIELD_NAME, results.s_OverFieldName, *docPtr); // If name is present then force output of value too, even when empty - m_Writer.addStringFieldCopyToObj(OVER_FIELD_VALUE, results.s_OverFieldValue, *docPtr, true); + m_Writer.addStringFieldCopyToObj(OVER_FIELD_VALUE, + results.s_OverFieldValue, *docPtr, true); } if (!results.s_PartitionFieldName.empty()) { - m_Writer.addStringFieldCopyToObj(PARTITION_FIELD_NAME, results.s_PartitionFieldName, *docPtr); + m_Writer.addStringFieldCopyToObj(PARTITION_FIELD_NAME, + results.s_PartitionFieldName, *docPtr); // If name is present then force output of value too, even when empty - m_Writer.addStringFieldCopyToObj(PARTITION_FIELD_VALUE, results.s_PartitionFieldValue, *docPtr, true); + m_Writer.addStringFieldCopyToObj( + PARTITION_FIELD_VALUE, results.s_PartitionFieldValue, *docPtr, true); } m_Writer.addStringFieldCopyToObj(FUNCTION, results.s_FunctionName, *docPtr); - m_Writer.addStringFieldCopyToObj(FUNCTION_DESCRIPTION, results.s_FunctionDescription, *docPtr); + m_Writer.addStringFieldCopyToObj(FUNCTION_DESCRIPTION, + results.s_FunctionDescription, *docPtr); // Add nested causes if (m_NestedDocs.size() > 0) { @@ -598,7 +645,8 @@ void CJsonOutputWriter::addPopulationFields(const CHierarchicalResultsWriter::TR } } -void CJsonOutputWriter::addPopulationCauseFields(const CHierarchicalResultsWriter::TResults& results, TDocumentWeakPtr weakDoc) { +void CJsonOutputWriter::addPopulationCauseFields(const CHierarchicalResultsWriter::TResults& results, + TDocumentWeakPtr weakDoc) { TDocumentPtr docPtr = weakDoc.lock(); if (!docPtr) { LOG_ERROR(<< "Inconsistent program state. JSON document unavailable."); @@ -613,22 +661,28 @@ void CJsonOutputWriter::addPopulationCauseFields(const CHierarchicalResultsWrite if (!results.s_ByFieldName.empty()) { m_Writer.addStringFieldCopyToObj(BY_FIELD_NAME, results.s_ByFieldName, *docPtr); // If name is present then force output of value too, even when empty - m_Writer.addStringFieldCopyToObj(BY_FIELD_VALUE, results.s_ByFieldValue, *docPtr, true); + m_Writer.addStringFieldCopyToObj(BY_FIELD_VALUE, results.s_ByFieldValue, + *docPtr, true); // But allow correlatedByFieldValue to be unset if blank - m_Writer.addStringFieldCopyToObj(CORRELATED_BY_FIELD_VALUE, results.s_CorrelatedByFieldValue, *docPtr); + m_Writer.addStringFieldCopyToObj(CORRELATED_BY_FIELD_VALUE, + results.s_CorrelatedByFieldValue, *docPtr); } if (!results.s_OverFieldName.empty()) { m_Writer.addStringFieldCopyToObj(OVER_FIELD_NAME, results.s_OverFieldName, *docPtr); // If name is present then force output of value too, even when empty - m_Writer.addStringFieldCopyToObj(OVER_FIELD_VALUE, results.s_OverFieldValue, *docPtr, true); + m_Writer.addStringFieldCopyToObj(OVER_FIELD_VALUE, + results.s_OverFieldValue, *docPtr, true); } if (!results.s_PartitionFieldName.empty()) { - m_Writer.addStringFieldCopyToObj(PARTITION_FIELD_NAME, results.s_PartitionFieldName, *docPtr); + m_Writer.addStringFieldCopyToObj(PARTITION_FIELD_NAME, + results.s_PartitionFieldName, *docPtr); // If name is present then force output of value too, even when empty - m_Writer.addStringFieldCopyToObj(PARTITION_FIELD_VALUE, results.s_PartitionFieldValue, *docPtr, true); + m_Writer.addStringFieldCopyToObj( + PARTITION_FIELD_VALUE, results.s_PartitionFieldValue, *docPtr, true); } m_Writer.addStringFieldCopyToObj(FUNCTION, results.s_FunctionName, *docPtr); - m_Writer.addStringFieldCopyToObj(FUNCTION_DESCRIPTION, results.s_FunctionDescription, *docPtr); + m_Writer.addStringFieldCopyToObj(FUNCTION_DESCRIPTION, + results.s_FunctionDescription, *docPtr); m_Writer.addDoubleArrayFieldToObj(TYPICAL, results.s_PopulationAverage, *docPtr); m_Writer.addDoubleArrayFieldToObj(ACTUAL, results.s_FunctionValue, *docPtr); } @@ -653,31 +707,37 @@ void CJsonOutputWriter::addInfluences(const CHierarchicalResultsWriter::TStoredS using TCharPtrDoublePrVec = std::vector; using TCharPtrDoublePrVecIter = TCharPtrDoublePrVec::iterator; using TCharPtrCharPtrDoublePrVecPr = std::pair; - using TStrCharPtrCharPtrDoublePrVecPrUMap = boost::unordered_map; + using TStrCharPtrCharPtrDoublePrVecPrUMap = + boost::unordered_map; using TStrCharPtrCharPtrDoublePrVecPrUMapIter = TStrCharPtrCharPtrDoublePrVecPrUMap::iterator; TStrCharPtrCharPtrDoublePrVecPrUMap influences; // group by influence field for (const auto& influenceResult : influenceResults) { - TCharPtrCharPtrDoublePrVecPr infResult(influenceResult.first.first->c_str(), TCharPtrDoublePrVec()); + TCharPtrCharPtrDoublePrVecPr infResult(influenceResult.first.first->c_str(), + TCharPtrDoublePrVec()); auto insertResult = influences.emplace(*influenceResult.first.first, infResult); - insertResult.first->second.second.emplace_back(influenceResult.first.second->c_str(), influenceResult.second); + insertResult.first->second.second.emplace_back( + influenceResult.first.second->c_str(), influenceResult.second); } // Order by influence - for (TStrCharPtrCharPtrDoublePrVecPrUMapIter iter = influences.begin(); iter != influences.end(); ++iter) { + for (TStrCharPtrCharPtrDoublePrVecPrUMapIter iter = influences.begin(); + iter != influences.end(); ++iter) { std::sort(iter->second.second.begin(), iter->second.second.end(), INFLUENCE_LESS); } rapidjson::Value influencesDoc = m_Writer.makeArray(influences.size()); - for (TStrCharPtrCharPtrDoublePrVecPrUMapIter iter = influences.begin(); iter != influences.end(); ++iter) { + for (TStrCharPtrCharPtrDoublePrVecPrUMapIter iter = influences.begin(); + iter != influences.end(); ++iter) { rapidjson::Value influenceDoc(rapidjson::kObjectType); rapidjson::Value values = m_Writer.makeArray(influences.size()); - for (TCharPtrDoublePrVecIter arrayIter = iter->second.second.begin(); arrayIter != iter->second.second.end(); ++arrayIter) { + for (TCharPtrDoublePrVecIter arrayIter = iter->second.second.begin(); + arrayIter != iter->second.second.end(); ++arrayIter) { m_Writer.pushBack(arrayIter->first, values); } @@ -690,7 +750,8 @@ void CJsonOutputWriter::addInfluences(const CHierarchicalResultsWriter::TStoredS m_Writer.addMember(INFLUENCERS, influencesDoc, *docPtr); } -void CJsonOutputWriter::addEventRateFields(const CHierarchicalResultsWriter::TResults& results, TDocumentWeakPtr weakDoc) { +void CJsonOutputWriter::addEventRateFields(const CHierarchicalResultsWriter::TResults& results, + TDocumentWeakPtr weakDoc) { TDocumentPtr docPtr = weakDoc.lock(); if (!docPtr) { LOG_ERROR(<< "Inconsistent program state. JSON document unavailable."); @@ -700,24 +761,30 @@ void CJsonOutputWriter::addEventRateFields(const CHierarchicalResultsWriter::TRe // record_score, probability, fieldName, byFieldName, byFieldValue, partitionFieldName, // partitionFieldValue, functionName, typical, actual, influences? - m_Writer.addDoubleFieldToObj(INITIAL_RECORD_SCORE, results.s_NormalizedAnomalyScore, *docPtr); + m_Writer.addDoubleFieldToObj(INITIAL_RECORD_SCORE, + results.s_NormalizedAnomalyScore, *docPtr); m_Writer.addDoubleFieldToObj(RECORD_SCORE, results.s_NormalizedAnomalyScore, *docPtr); m_Writer.addDoubleFieldToObj(PROBABILITY, results.s_Probability, *docPtr); m_Writer.addStringFieldCopyToObj(FIELD_NAME, results.s_MetricValueField, *docPtr); if (!results.s_ByFieldName.empty()) { m_Writer.addStringFieldCopyToObj(BY_FIELD_NAME, results.s_ByFieldName, *docPtr); // If name is present then force output of value too, even when empty - m_Writer.addStringFieldCopyToObj(BY_FIELD_VALUE, results.s_ByFieldValue, *docPtr, true); + m_Writer.addStringFieldCopyToObj(BY_FIELD_VALUE, results.s_ByFieldValue, + *docPtr, true); // But allow correlatedByFieldValue to be unset if blank - m_Writer.addStringFieldCopyToObj(CORRELATED_BY_FIELD_VALUE, results.s_CorrelatedByFieldValue, *docPtr); + m_Writer.addStringFieldCopyToObj(CORRELATED_BY_FIELD_VALUE, + results.s_CorrelatedByFieldValue, *docPtr); } if (!results.s_PartitionFieldName.empty()) { - m_Writer.addStringFieldCopyToObj(PARTITION_FIELD_NAME, results.s_PartitionFieldName, *docPtr); + m_Writer.addStringFieldCopyToObj(PARTITION_FIELD_NAME, + results.s_PartitionFieldName, *docPtr); // If name is present then force output of value too, even when empty - m_Writer.addStringFieldCopyToObj(PARTITION_FIELD_VALUE, results.s_PartitionFieldValue, *docPtr, true); + m_Writer.addStringFieldCopyToObj( + PARTITION_FIELD_VALUE, results.s_PartitionFieldValue, *docPtr, true); } m_Writer.addStringFieldCopyToObj(FUNCTION, results.s_FunctionName, *docPtr); - m_Writer.addStringFieldCopyToObj(FUNCTION_DESCRIPTION, results.s_FunctionDescription, *docPtr); + m_Writer.addStringFieldCopyToObj(FUNCTION_DESCRIPTION, + results.s_FunctionDescription, *docPtr); m_Writer.addDoubleArrayFieldToObj(TYPICAL, results.s_BaselineMean, *docPtr); m_Writer.addDoubleArrayFieldToObj(ACTUAL, results.s_CurrentMean, *docPtr); } @@ -732,8 +799,10 @@ void CJsonOutputWriter::addInfluencerFields(bool isBucketInfluencer, } m_Writer.addDoubleFieldToObj(PROBABILITY, node.probability(), *docPtr); - m_Writer.addDoubleFieldToObj(isBucketInfluencer ? INITIAL_SCORE : INITIAL_INFLUENCER_SCORE, node.s_NormalizedAnomalyScore, *docPtr); - m_Writer.addDoubleFieldToObj(isBucketInfluencer ? ANOMALY_SCORE : INFLUENCER_SCORE, node.s_NormalizedAnomalyScore, *docPtr); + m_Writer.addDoubleFieldToObj(isBucketInfluencer ? INITIAL_SCORE : INITIAL_INFLUENCER_SCORE, + node.s_NormalizedAnomalyScore, *docPtr); + m_Writer.addDoubleFieldToObj(isBucketInfluencer ? ANOMALY_SCORE : INFLUENCER_SCORE, + node.s_NormalizedAnomalyScore, *docPtr); const std::string& personFieldName = *node.s_Spec.s_PersonFieldName; m_Writer.addStringFieldCopyToObj(INFLUENCER_FIELD_NAME, personFieldName, *docPtr); if (isBucketInfluencer) { @@ -741,12 +810,14 @@ void CJsonOutputWriter::addInfluencerFields(bool isBucketInfluencer, } else { if (!personFieldName.empty()) { // If name is present then force output of value too, even when empty - m_Writer.addStringFieldCopyToObj(INFLUENCER_FIELD_VALUE, *node.s_Spec.s_PersonFieldValue, *docPtr, true); + m_Writer.addStringFieldCopyToObj( + INFLUENCER_FIELD_VALUE, *node.s_Spec.s_PersonFieldValue, *docPtr, true); } } } -void CJsonOutputWriter::addPartitionScores(const CHierarchicalResultsWriter::TResults& results, TDocumentWeakPtr weakDoc) { +void CJsonOutputWriter::addPartitionScores(const CHierarchicalResultsWriter::TResults& results, + TDocumentWeakPtr weakDoc) { TDocumentPtr docPtr = weakDoc.lock(); if (!docPtr) { LOG_ERROR(<< "Inconsistent program state. JSON document unavailable."); @@ -754,9 +825,12 @@ void CJsonOutputWriter::addPartitionScores(const CHierarchicalResultsWriter::TRe } m_Writer.addDoubleFieldToObj(PROBABILITY, results.s_Probability, *docPtr); - m_Writer.addStringFieldCopyToObj(PARTITION_FIELD_NAME, results.s_PartitionFieldName, *docPtr); - m_Writer.addStringFieldCopyToObj(PARTITION_FIELD_VALUE, results.s_PartitionFieldValue, *docPtr, true); - m_Writer.addDoubleFieldToObj(INITIAL_RECORD_SCORE, results.s_NormalizedAnomalyScore, *docPtr); + m_Writer.addStringFieldCopyToObj(PARTITION_FIELD_NAME, + results.s_PartitionFieldName, *docPtr); + m_Writer.addStringFieldCopyToObj(PARTITION_FIELD_VALUE, + results.s_PartitionFieldValue, *docPtr, true); + m_Writer.addDoubleFieldToObj(INITIAL_RECORD_SCORE, + results.s_NormalizedAnomalyScore, *docPtr); m_Writer.addDoubleFieldToObj(RECORD_SCORE, results.s_NormalizedAnomalyScore, *docPtr); } @@ -768,14 +842,16 @@ size_t CJsonOutputWriter::limitNumberRecords() const { return m_RecordOutputLimit; } -void CJsonOutputWriter::persistNormalizer(const model::CHierarchicalResultsNormalizer& normalizer, core_t::TTime& persistTime) { +void CJsonOutputWriter::persistNormalizer(const model::CHierarchicalResultsNormalizer& normalizer, + core_t::TTime& persistTime) { std::string quantilesState; normalizer.toJson(m_LastNonInterimBucketTime, "api", quantilesState, true); m_Writer.StartObject(); m_Writer.String(QUANTILES); // No need to copy the strings as the doc is written straight away - CModelSnapshotJsonWriter::writeQuantileState(m_JobId, quantilesState, m_LastNonInterimBucketTime, m_Writer); + CModelSnapshotJsonWriter::writeQuantileState( + m_JobId, quantilesState, m_LastNonInterimBucketTime, m_Writer); m_Writer.EndObject(); persistTime = core::CTimeUtils::now(); @@ -798,7 +874,8 @@ void CJsonOutputWriter::reportMemoryUsage(const model::CResourceMonitor::SResult LOG_TRACE(<< "Wrote memory usage results"); } -void CJsonOutputWriter::acknowledgeFlush(const std::string& flushId, core_t::TTime lastFinalizedBucketEnd) { +void CJsonOutputWriter::acknowledgeFlush(const std::string& flushId, + core_t::TTime lastFinalizedBucketEnd) { m_Writer.StartObject(); m_Writer.String(FLUSH); m_Writer.StartObject(); @@ -846,13 +923,9 @@ void CJsonOutputWriter::writeCategoryDefinition(int categoryId, } CJsonOutputWriter::SBucketData::SBucketData() - : s_MaxBucketInfluencerNormalizedAnomalyScore(0.0), - s_InputEventCount(0), - s_RecordCount(0), - s_BucketSpan(0), - s_HighestProbability(-1), - s_LowestInfluencerScore(101.0), - s_LowestBucketInfluencerScore(101.0) { + : s_MaxBucketInfluencerNormalizedAnomalyScore(0.0), s_InputEventCount(0), + s_RecordCount(0), s_BucketSpan(0), s_HighestProbability(-1), + s_LowestInfluencerScore(101.0), s_LowestBucketInfluencerScore(101.0) { } } } diff --git a/lib/api/CLengthEncodedInputParser.cc b/lib/api/CLengthEncodedInputParser.cc index 8fdae96ef3..cc593fc412 100644 --- a/lib/api/CLengthEncodedInputParser.cc +++ b/lib/api/CLengthEncodedInputParser.cc @@ -29,7 +29,8 @@ namespace api { const size_t CLengthEncodedInputParser::WORK_BUFFER_SIZE(8192); // 8kB CLengthEncodedInputParser::CLengthEncodedInputParser(std::istream& strmIn) - : CInputParser(), m_StrmIn(strmIn), m_WorkBuffer(nullptr), m_WorkBufferPtr(nullptr), m_WorkBufferEnd(nullptr), m_NoMoreRecords(false) { + : CInputParser(), m_StrmIn(strmIn), m_WorkBuffer(nullptr), + m_WorkBufferPtr(nullptr), m_WorkBufferEnd(nullptr), m_NoMoreRecords(false) { // This test is not ideal because std::cin's stream buffer could have been // changed if (strmIn.rdbuf() == std::cin.rdbuf()) { @@ -142,7 +143,8 @@ bool CLengthEncodedInputParser::parseRecordFromStream(STR_VEC& results) { std::string temp; results.resize(numFields, typename STR_VEC::value_type(temp)); } else { - LOG_ERROR(<< "Incorrect number of fields in input stream record: expected " << results.size() << " but got " << numFields); + LOG_ERROR(<< "Incorrect number of fields in input stream record: expected " + << results.size() << " but got " << numFields); return false; } } @@ -163,7 +165,8 @@ bool CLengthEncodedInputParser::parseRecordFromStream(STR_VEC& results) { // in Bugzilla for more details. static const uint32_t HIGH_BYTE_MASK(0xFF000000); if ((length & HIGH_BYTE_MASK) != 0u) { - LOG_ERROR(<< "Parsed field length " << length << " is suspiciously large - assuming corrupt input stream"); + LOG_ERROR(<< "Parsed field length " << length + << " is suspiciously large - assuming corrupt input stream"); return false; } @@ -243,7 +246,8 @@ size_t CLengthEncodedInputParser::refillBuffer() { } m_WorkBufferPtr = m_WorkBuffer.get(); - m_StrmIn.read(m_WorkBuffer.get() + avail, static_cast(WORK_BUFFER_SIZE - avail)); + m_StrmIn.read(m_WorkBuffer.get() + avail, + static_cast(WORK_BUFFER_SIZE - avail)); if (m_StrmIn.bad()) { LOG_ERROR(<< "Input stream is bad"); } else { diff --git a/lib/api/CLineifiedInputParser.cc b/lib/api/CLineifiedInputParser.cc index 35b243323d..d3ca5c279f 100644 --- a/lib/api/CLineifiedInputParser.cc +++ b/lib/api/CLineifiedInputParser.cc @@ -19,7 +19,8 @@ const char CLineifiedInputParser::LINE_END('\n'); const size_t CLineifiedInputParser::WORK_BUFFER_SIZE(131072); // 128kB CLineifiedInputParser::CLineifiedInputParser(std::istream& strmIn) - : CInputParser(), m_StrmIn(strmIn), m_WorkBuffer(nullptr), m_WorkBufferCapacity(0), m_WorkBufferPtr(nullptr), m_WorkBufferEnd(nullptr) { + : CInputParser(), m_StrmIn(strmIn), m_WorkBuffer(nullptr), + m_WorkBufferCapacity(0), m_WorkBufferPtr(nullptr), m_WorkBufferEnd(nullptr) { } CLineifiedInputParser::TCharPSizePr CLineifiedInputParser::parseLine() { @@ -70,7 +71,8 @@ CLineifiedInputParser::TCharPSizePr CLineifiedInputParser::parseLine() { break; } - m_StrmIn.read(m_WorkBufferEnd, static_cast(m_WorkBufferCapacity - avail)); + m_StrmIn.read(m_WorkBufferEnd, + static_cast(m_WorkBufferCapacity - avail)); std::streamsize bytesRead(m_StrmIn.gcount()); if (bytesRead == 0) { if (m_StrmIn.bad()) { diff --git a/lib/api/CLineifiedJsonInputParser.cc b/lib/api/CLineifiedJsonInputParser.cc index 1c332a20ae..5c26dcf20d 100644 --- a/lib/api/CLineifiedJsonInputParser.cc +++ b/lib/api/CLineifiedJsonInputParser.cc @@ -36,12 +36,14 @@ bool CLineifiedJsonInputParser::readStream(const TReaderFunc& readerFunc) { } if (m_AllDocsSameStructure) { - if (this->decodeDocumentWithCommonFields(document, fieldNames, fieldValRefs, recordFields) == false) { + if (this->decodeDocumentWithCommonFields( + document, fieldNames, fieldValRefs, recordFields) == false) { LOG_ERROR(<< "Failed to decode JSON document"); return false; } } else { - if (this->decodeDocumentWithArbitraryFields(document, fieldNames, recordFields) == false) { + if (this->decodeDocumentWithArbitraryFields(document, fieldNames, + recordFields) == false) { LOG_ERROR(<< "Failed to decode JSON document"); return false; } @@ -66,7 +68,8 @@ bool CLineifiedJsonInputParser::parseDocument(char* begin, rapidjson::Document& } if (!document.IsObject()) { - LOG_ERROR(<< "Top level of JSON document must be an object: " << document.GetType()); + LOG_ERROR(<< "Top level of JSON document must be an object: " + << document.GetType()); return false; } @@ -94,7 +97,8 @@ bool CLineifiedJsonInputParser::decodeDocumentWithCommonFields(const rapidjson:: } TStrRefVecItr refIter = fieldValRefs.begin(); - for (rapidjson::Value::ConstMemberIterator iter = document.MemberBegin(); iter != document.MemberEnd(); ++iter, ++refIter) { + for (rapidjson::Value::ConstMemberIterator iter = document.MemberBegin(); + iter != document.MemberEnd(); ++iter, ++refIter) { if (refIter == fieldValRefs.end()) { LOG_ERROR(<< "More fields than field references"); return false; @@ -112,13 +116,15 @@ bool CLineifiedJsonInputParser::decodeDocumentWithCommonFields(const rapidjson:: break; case rapidjson::kObjectType: case rapidjson::kArrayType: - LOG_ERROR(<< "Can't handle nested objects/arrays in JSON documents: " << fieldNames.back()); + LOG_ERROR(<< "Can't handle nested objects/arrays in JSON documents: " + << fieldNames.back()); return false; case rapidjson::kStringType: refIter->get().assign(iter->value.GetString(), iter->value.GetStringLength()); break; case rapidjson::kNumberType: - core::CStringUtils::typeToString(iter->value.GetDouble()).swap(refIter->get()); + core::CStringUtils::typeToString(iter->value.GetDouble()) + .swap(refIter->get()); break; } } @@ -134,8 +140,10 @@ bool CLineifiedJsonInputParser::decodeDocumentWithArbitraryFields(const rapidjso fieldNames.clear(); recordFields.clear(); - for (rapidjson::Value::ConstMemberIterator iter = document.MemberBegin(); iter != document.MemberEnd(); ++iter) { - fieldNames.push_back(std::string(iter->name.GetString(), iter->name.GetStringLength())); + for (rapidjson::Value::ConstMemberIterator iter = document.MemberBegin(); + iter != document.MemberEnd(); ++iter) { + fieldNames.push_back( + std::string(iter->name.GetString(), iter->name.GetStringLength())); switch (iter->value.GetType()) { case rapidjson::kNullType: @@ -149,14 +157,17 @@ bool CLineifiedJsonInputParser::decodeDocumentWithArbitraryFields(const rapidjso break; case rapidjson::kObjectType: case rapidjson::kArrayType: - LOG_ERROR(<< "Can't handle nested objects/arrays in JSON documents: " << fieldNames.back()); + LOG_ERROR(<< "Can't handle nested objects/arrays in JSON documents: " + << fieldNames.back()); fieldNames.pop_back(); return false; case rapidjson::kStringType: - recordFields[fieldNames.back()].assign(iter->value.GetString(), iter->value.GetStringLength()); + recordFields[fieldNames.back()].assign(iter->value.GetString(), + iter->value.GetStringLength()); break; case rapidjson::kNumberType: - core::CStringUtils::typeToString(iter->value.GetDouble()).swap(recordFields[fieldNames.back()]); + core::CStringUtils::typeToString(iter->value.GetDouble()) + .swap(recordFields[fieldNames.back()]); break; } } diff --git a/lib/api/CLineifiedJsonOutputWriter.cc b/lib/api/CLineifiedJsonOutputWriter.cc index d004a2afd8..cf56c02bf8 100644 --- a/lib/api/CLineifiedJsonOutputWriter.cc +++ b/lib/api/CLineifiedJsonOutputWriter.cc @@ -15,19 +15,23 @@ namespace ml { namespace api { CLineifiedJsonOutputWriter::CLineifiedJsonOutputWriter() - : m_OutStream(m_StringOutputBuf), m_WriteStream(m_OutStream), m_Writer(m_WriteStream) { + : m_OutStream(m_StringOutputBuf), m_WriteStream(m_OutStream), + m_Writer(m_WriteStream) { } CLineifiedJsonOutputWriter::CLineifiedJsonOutputWriter(const TStrSet& numericFields) - : m_NumericFields(numericFields), m_OutStream(m_StringOutputBuf), m_WriteStream(m_OutStream), m_Writer(m_WriteStream) { + : m_NumericFields(numericFields), m_OutStream(m_StringOutputBuf), + m_WriteStream(m_OutStream), m_Writer(m_WriteStream) { } CLineifiedJsonOutputWriter::CLineifiedJsonOutputWriter(std::ostream& strmOut) : m_OutStream(strmOut), m_WriteStream(m_OutStream), m_Writer(m_WriteStream) { } -CLineifiedJsonOutputWriter::CLineifiedJsonOutputWriter(const TStrSet& numericFields, std::ostream& strmOut) - : m_NumericFields(numericFields), m_OutStream(strmOut), m_WriteStream(m_OutStream), m_Writer(m_WriteStream) { +CLineifiedJsonOutputWriter::CLineifiedJsonOutputWriter(const TStrSet& numericFields, + std::ostream& strmOut) + : m_NumericFields(numericFields), m_OutStream(strmOut), + m_WriteStream(m_OutStream), m_Writer(m_WriteStream) { } CLineifiedJsonOutputWriter::~CLineifiedJsonOutputWriter() { @@ -41,7 +45,8 @@ CLineifiedJsonOutputWriter::~CLineifiedJsonOutputWriter() { core::CSleep::sleep(20); } -bool CLineifiedJsonOutputWriter::fieldNames(const TStrVec& /*fieldNames*/, const TStrVec& /*extraFieldNames*/) { +bool CLineifiedJsonOutputWriter::fieldNames(const TStrVec& /*fieldNames*/, + const TStrVec& /*extraFieldNames*/) { return true; } @@ -49,7 +54,8 @@ const CLineifiedJsonOutputWriter::TStrVec& CLineifiedJsonOutputWriter::fieldName return EMPTY_FIELD_NAMES; } -bool CLineifiedJsonOutputWriter::writeRow(const TStrStrUMap& dataRowFields, const TStrStrUMap& overrideDataRowFields) { +bool CLineifiedJsonOutputWriter::writeRow(const TStrStrUMap& dataRowFields, + const TStrStrUMap& overrideDataRowFields) { using TScopedAllocator = core::CScopedRapidJsonPoolAllocator; TScopedAllocator scopedAllocator("CLineifiedJsonOutputWriter::writeRow", m_Writer); @@ -90,7 +96,9 @@ std::string CLineifiedJsonOutputWriter::internalString() const { return m_StringOutputBuf.str(); } -void CLineifiedJsonOutputWriter::writeField(const std::string& name, const std::string& value, rapidjson::Document& doc) const { +void CLineifiedJsonOutputWriter::writeField(const std::string& name, + const std::string& value, + rapidjson::Document& doc) const { if (m_NumericFields.find(name) != m_NumericFields.end()) { double numericValue(0.0); if (core::CStringUtils::stringToType(value, numericValue) == false) { diff --git a/lib/api/CLineifiedXmlInputParser.cc b/lib/api/CLineifiedXmlInputParser.cc index c49bc073bf..4e610d9bba 100644 --- a/lib/api/CLineifiedXmlInputParser.cc +++ b/lib/api/CLineifiedXmlInputParser.cc @@ -13,8 +13,11 @@ namespace ml { namespace api { -CLineifiedXmlInputParser::CLineifiedXmlInputParser(core::CXmlParserIntf& parser, std::istream& strmIn, bool allDocsSameStructure) - : CLineifiedInputParser(strmIn), m_Parser(parser), m_AllDocsSameStructure(allDocsSameStructure) { +CLineifiedXmlInputParser::CLineifiedXmlInputParser(core::CXmlParserIntf& parser, + std::istream& strmIn, + bool allDocsSameStructure) + : CLineifiedInputParser(strmIn), m_Parser(parser), + m_AllDocsSameStructure(allDocsSameStructure) { } bool CLineifiedXmlInputParser::readStream(const TReaderFunc& readerFunc) { @@ -40,7 +43,8 @@ bool CLineifiedXmlInputParser::readStream(const TReaderFunc& readerFunc) { } if (m_AllDocsSameStructure) { - if (this->decodeDocumentWithCommonFields(fieldNames, fieldValRefs, recordFields) == false) { + if (this->decodeDocumentWithCommonFields(fieldNames, fieldValRefs, + recordFields) == false) { LOG_ERROR(<< "Failed to decode XML document"); return false; } @@ -59,7 +63,9 @@ bool CLineifiedXmlInputParser::readStream(const TReaderFunc& readerFunc) { return true; } -bool CLineifiedXmlInputParser::decodeDocumentWithCommonFields(TStrVec& fieldNames, TStrRefVec& fieldValRefs, TStrStrUMap& recordFields) { +bool CLineifiedXmlInputParser::decodeDocumentWithCommonFields(TStrVec& fieldNames, + TStrRefVec& fieldValRefs, + TStrStrUMap& recordFields) { if (fieldValRefs.empty()) { // We haven't yet decoded any documents, so decode the first one long-hand this->decodeDocumentWithArbitraryFields(fieldNames, recordFields); @@ -88,14 +94,16 @@ bool CLineifiedXmlInputParser::decodeDocumentWithCommonFields(TStrVec& fieldName more = m_Parser.navigateNext(); } - LOG_ERROR(<< "Incorrect number of fields: expected " << fieldValRefs.size() << ", got " << i); + LOG_ERROR(<< "Incorrect number of fields: expected " + << fieldValRefs.size() << ", got " << i); return false; } return true; } -void CLineifiedXmlInputParser::decodeDocumentWithArbitraryFields(TStrVec& fieldNames, TStrStrUMap& recordFields) { +void CLineifiedXmlInputParser::decodeDocumentWithArbitraryFields(TStrVec& fieldNames, + TStrStrUMap& recordFields) { // The major drawback of having self-describing messages is that we can't // make assumptions about what fields exist or what order they're in fieldNames.clear(); diff --git a/lib/api/CLineifiedXmlOutputWriter.cc b/lib/api/CLineifiedXmlOutputWriter.cc index aac78821aa..9b2311e21e 100644 --- a/lib/api/CLineifiedXmlOutputWriter.cc +++ b/lib/api/CLineifiedXmlOutputWriter.cc @@ -18,10 +18,12 @@ namespace { const std::string EMPTY_STRING; } -CLineifiedXmlOutputWriter::CLineifiedXmlOutputWriter(const std::string& rootName) : m_RootName(rootName), m_OutStream(m_StringOutputBuf) { +CLineifiedXmlOutputWriter::CLineifiedXmlOutputWriter(const std::string& rootName) + : m_RootName(rootName), m_OutStream(m_StringOutputBuf) { } -CLineifiedXmlOutputWriter::CLineifiedXmlOutputWriter(const std::string& rootName, std::ostream& strmOut) +CLineifiedXmlOutputWriter::CLineifiedXmlOutputWriter(const std::string& rootName, + std::ostream& strmOut) : m_RootName(rootName), m_OutStream(strmOut) { } @@ -36,7 +38,8 @@ CLineifiedXmlOutputWriter::~CLineifiedXmlOutputWriter() { core::CSleep::sleep(20); } -bool CLineifiedXmlOutputWriter::fieldNames(const TStrVec& /*fieldNames*/, const TStrVec& /*extraFieldNames*/) { +bool CLineifiedXmlOutputWriter::fieldNames(const TStrVec& /*fieldNames*/, + const TStrVec& /*extraFieldNames*/) { return true; } @@ -44,8 +47,10 @@ const CLineifiedXmlOutputWriter::TStrVec& CLineifiedXmlOutputWriter::fieldNames( return EMPTY_FIELD_NAMES; } -bool CLineifiedXmlOutputWriter::writeRow(const TStrStrUMap& dataRowFields, const TStrStrUMap& overrideDataRowFields) { - core::CXmlNodeWithChildren::TXmlNodeWithChildrenP root(m_Pool.newNode(m_RootName, EMPTY_STRING)); +bool CLineifiedXmlOutputWriter::writeRow(const TStrStrUMap& dataRowFields, + const TStrStrUMap& overrideDataRowFields) { + core::CXmlNodeWithChildren::TXmlNodeWithChildrenP root( + m_Pool.newNode(m_RootName, EMPTY_STRING)); TStrStrUMapCItr fieldValueIter = dataRowFields.begin(); for (; fieldValueIter != dataRowFields.end(); ++fieldValueIter) { diff --git a/lib/api/CModelPlotDataJsonWriter.cc b/lib/api/CModelPlotDataJsonWriter.cc index 88cf3d937f..8e7bccfc4e 100644 --- a/lib/api/CModelPlotDataJsonWriter.cc +++ b/lib/api/CModelPlotDataJsonWriter.cc @@ -29,10 +29,12 @@ const std::string CModelPlotDataJsonWriter::MEDIAN("model_median"); const std::string CModelPlotDataJsonWriter::ACTUAL("actual"); const std::string CModelPlotDataJsonWriter::BUCKET_SPAN("bucket_span"); -CModelPlotDataJsonWriter::CModelPlotDataJsonWriter(core::CJsonOutputStreamWrapper& outStream) : m_Writer(outStream) { +CModelPlotDataJsonWriter::CModelPlotDataJsonWriter(core::CJsonOutputStreamWrapper& outStream) + : m_Writer(outStream) { } -void CModelPlotDataJsonWriter::writeFlat(const std::string& jobId, const model::CModelPlotData& data) { +void CModelPlotDataJsonWriter::writeFlat(const std::string& jobId, + const model::CModelPlotData& data) { const std::string& partitionFieldName = data.partitionFieldName(); const std::string& partitionFieldValue = data.partitionFieldValue(); const std::string& overFieldName = data.overFieldName(); @@ -40,26 +42,20 @@ void CModelPlotDataJsonWriter::writeFlat(const std::string& jobId, const model:: core_t::TTime time = data.time(); int detectorIndex = data.detectorIndex(); - for (TFeatureStrByFieldDataUMapUMapCItr featureItr = data.begin(); featureItr != data.end(); ++featureItr) { + for (TFeatureStrByFieldDataUMapUMapCItr featureItr = data.begin(); + featureItr != data.end(); ++featureItr) { std::string feature = model_t::print(featureItr->first); const TStrByFieldDataUMap& byDataMap = featureItr->second; - for (TStrByFieldDataUMapCItr byItr = byDataMap.begin(); byItr != byDataMap.end(); ++byItr) { + for (TStrByFieldDataUMapCItr byItr = byDataMap.begin(); + byItr != byDataMap.end(); ++byItr) { const std::string& byFieldValue = byItr->first; const TByFieldData& byData = byItr->second; const TStrDoublePrVec& values = byData.s_ValuesPerOverField; if (values.empty()) { rapidjson::Value doc = m_Writer.makeObject(); - this->writeFlatRow(time, - jobId, - detectorIndex, - partitionFieldName, - partitionFieldValue, - feature, - byFieldName, - byFieldValue, - byData, - data.bucketSpan(), - doc); + this->writeFlatRow(time, jobId, detectorIndex, partitionFieldName, + partitionFieldValue, feature, byFieldName, + byFieldValue, byData, data.bucketSpan(), doc); rapidjson::Value wrapper = m_Writer.makeObject(); m_Writer.addMember(MODEL_PLOT, doc, wrapper); @@ -68,20 +64,13 @@ void CModelPlotDataJsonWriter::writeFlat(const std::string& jobId, const model:: for (std::size_t valueIndex = 0; valueIndex < values.size(); ++valueIndex) { const TStrDoublePr& keyValue = values[valueIndex]; rapidjson::Value doc = m_Writer.makeObject(); - this->writeFlatRow(time, - jobId, - detectorIndex, - partitionFieldName, - partitionFieldValue, - feature, - byFieldName, - byFieldValue, - byData, - data.bucketSpan(), - doc); + this->writeFlatRow(time, jobId, detectorIndex, partitionFieldName, + partitionFieldValue, feature, byFieldName, + byFieldValue, byData, data.bucketSpan(), doc); if (!overFieldName.empty()) { m_Writer.addStringFieldCopyToObj(OVER_FIELD_NAME, overFieldName, doc); - m_Writer.addStringFieldCopyToObj(OVER_FIELD_VALUE, keyValue.first, doc, true); + m_Writer.addStringFieldCopyToObj(OVER_FIELD_VALUE, + keyValue.first, doc, true); } m_Writer.addDoubleFieldToObj(ACTUAL, keyValue.second, doc); @@ -115,7 +104,8 @@ void CModelPlotDataJsonWriter::writeFlatRow(core_t::TTime time, m_Writer.addIntFieldToObj(BUCKET_SPAN, bucketSpan, doc); if (!partitionFieldName.empty()) { m_Writer.addStringFieldCopyToObj(PARTITION_FIELD_NAME, partitionFieldName, doc); - m_Writer.addStringFieldCopyToObj(PARTITION_FIELD_VALUE, partitionFieldValue, doc, true); + m_Writer.addStringFieldCopyToObj(PARTITION_FIELD_VALUE, + partitionFieldValue, doc, true); } if (!byFieldName.empty()) { m_Writer.addStringFieldCopyToObj(BY_FIELD_NAME, byFieldName, doc); diff --git a/lib/api/CModelSnapshotJsonWriter.cc b/lib/api/CModelSnapshotJsonWriter.cc index 4e43f7b797..cad16f1861 100644 --- a/lib/api/CModelSnapshotJsonWriter.cc +++ b/lib/api/CModelSnapshotJsonWriter.cc @@ -26,7 +26,8 @@ const std::string QUANTILES("quantiles"); const std::string QUANTILE_STATE("quantile_state"); } -CModelSnapshotJsonWriter::CModelSnapshotJsonWriter(const std::string& jobId, core::CJsonOutputStreamWrapper& strmOut) +CModelSnapshotJsonWriter::CModelSnapshotJsonWriter(const std::string& jobId, + core::CJsonOutputStreamWrapper& strmOut) : m_JobId(jobId), m_Writer(strmOut) { // Don't write any output in the constructor because, the way things work at // the moment, the output stream might be redirected after construction @@ -67,14 +68,16 @@ void CModelSnapshotJsonWriter::write(const SModelSnapshotReport& report) { // write normalizerState here m_Writer.String(QUANTILES); - writeQuantileState(m_JobId, report.s_NormalizerState, report.s_LatestFinalResultTime, m_Writer); + writeQuantileState(m_JobId, report.s_NormalizerState, + report.s_LatestFinalResultTime, m_Writer); m_Writer.EndObject(); m_Writer.EndObject(); m_Writer.flush(); - LOG_DEBUG(<< "Wrote model snapshot report with ID " << report.s_SnapshotId << " for: " << report.s_Description + LOG_DEBUG(<< "Wrote model snapshot report with ID " << report.s_SnapshotId + << " for: " << report.s_Description << ", latest final results at " << report.s_LatestFinalResultTime); } diff --git a/lib/api/CNullOutput.cc b/lib/api/CNullOutput.cc index faffe35fd6..ba442688a0 100644 --- a/lib/api/CNullOutput.cc +++ b/lib/api/CNullOutput.cc @@ -16,7 +16,8 @@ const COutputHandler::TStrVec& CNullOutput::fieldNames() const { return EMPTY_FIELD_NAMES; } -bool CNullOutput::writeRow(const TStrStrUMap& /*dataRowFields*/, const TStrStrUMap& /*overrideDataRowFields*/) { +bool CNullOutput::writeRow(const TStrStrUMap& /*dataRowFields*/, + const TStrStrUMap& /*overrideDataRowFields*/) { return true; } } diff --git a/lib/api/COutputChainer.cc b/lib/api/COutputChainer.cc index efff989090..6c37df9c3a 100644 --- a/lib/api/COutputChainer.cc +++ b/lib/api/COutputChainer.cc @@ -12,7 +12,8 @@ namespace ml { namespace api { -COutputChainer::COutputChainer(CDataProcessor& dataProcessor) : m_DataProcessor(dataProcessor) { +COutputChainer::COutputChainer(CDataProcessor& dataProcessor) + : m_DataProcessor(dataProcessor) { } void COutputChainer::newOutputStream() { @@ -24,7 +25,8 @@ bool COutputChainer::fieldNames(const TStrVec& fieldNames, const TStrVec& extraF // Only add extra field names if they're not already present for (TStrVecCItr iter = extraFieldNames.begin(); iter != extraFieldNames.end(); ++iter) { - if (std::find(m_FieldNames.begin(), m_FieldNames.end(), *iter) == m_FieldNames.end()) { + if (std::find(m_FieldNames.begin(), m_FieldNames.end(), *iter) == + m_FieldNames.end()) { m_FieldNames.push_back(*iter); } } @@ -56,7 +58,8 @@ const COutputHandler::TStrVec& COutputChainer::fieldNames() const { return m_FieldNames; } -bool COutputChainer::writeRow(const TStrStrUMap& dataRowFields, const TStrStrUMap& overrideDataRowFields) { +bool COutputChainer::writeRow(const TStrStrUMap& dataRowFields, + const TStrStrUMap& overrideDataRowFields) { if (m_FieldNames.empty()) { LOG_ERROR(<< "Attempt to output data before field names"); return false; @@ -68,9 +71,11 @@ bool COutputChainer::writeRow(const TStrStrUMap& dataRowFields, const TStrStrUMa TPreComputedHashVecCItr preComputedHashIter = m_Hashes.begin(); TStrRefVecCItr fieldRefIter = m_WorkRecordFieldRefs.begin(); for (TStrVecCItr fieldNameIter = m_FieldNames.begin(); - fieldNameIter != m_FieldNames.end() && preComputedHashIter != m_Hashes.end() && fieldRefIter != m_WorkRecordFieldRefs.end(); + fieldNameIter != m_FieldNames.end() && preComputedHashIter != m_Hashes.end() && + fieldRefIter != m_WorkRecordFieldRefs.end(); ++fieldNameIter, ++preComputedHashIter, ++fieldRefIter) { - TStrStrUMapCItr fieldValueIter = overrideDataRowFields.find(*fieldNameIter, *preComputedHashIter, pred); + TStrStrUMapCItr fieldValueIter = + overrideDataRowFields.find(*fieldNameIter, *preComputedHashIter, pred); if (fieldValueIter == overrideDataRowFields.end()) { fieldValueIter = dataRowFields.find(*fieldNameIter, *preComputedHashIter, pred); if (fieldValueIter == dataRowFields.end()) { @@ -82,7 +87,8 @@ bool COutputChainer::writeRow(const TStrStrUMap& dataRowFields, const TStrStrUMa // Use the start/length version of assign to bypass GNU copy-on-write, // since we don't want the strings in m_WorkRecordFields to share // representations with strings in our input maps. - fieldRefIter->get().assign(fieldValueIter->second, 0, fieldValueIter->second.length()); + fieldRefIter->get().assign(fieldValueIter->second, 0, + fieldValueIter->second.length()); } if (m_DataProcessor.handleRecord(m_WorkRecordFields) == false) { @@ -98,7 +104,8 @@ void COutputChainer::finalise() { m_DataProcessor.finalise(); } -bool COutputChainer::restoreState(core::CDataSearcher& restoreSearcher, core_t::TTime& completeToTime) { +bool COutputChainer::restoreState(core::CDataSearcher& restoreSearcher, + core_t::TTime& completeToTime) { return m_DataProcessor.restoreState(restoreSearcher, completeToTime); } diff --git a/lib/api/COutputHandler.cc b/lib/api/COutputHandler.cc index 483bf6f00c..78f3d9daf6 100644 --- a/lib/api/COutputHandler.cc +++ b/lib/api/COutputHandler.cc @@ -36,7 +36,8 @@ void COutputHandler::finalise() { // NOOP unless overridden } -bool COutputHandler::restoreState(core::CDataSearcher& /* restoreSearcher */, core_t::TTime& /* completeToTime */) { +bool COutputHandler::restoreState(core::CDataSearcher& /* restoreSearcher */, + core_t::TTime& /* completeToTime */) { // NOOP unless overridden return true; } diff --git a/lib/api/CResultNormalizer.cc b/lib/api/CResultNormalizer.cc index 1e14b7091d..a20f183c6d 100644 --- a/lib/api/CResultNormalizer.cc +++ b/lib/api/CResultNormalizer.cc @@ -30,9 +30,9 @@ const std::string CResultNormalizer::BUCKET_INFLUENCER_LEVEL("inflb"); const std::string CResultNormalizer::INFLUENCER_LEVEL("infl"); const std::string CResultNormalizer::ZERO("0"); -CResultNormalizer::CResultNormalizer(const model::CAnomalyDetectorModelConfig& modelConfig, COutputHandler& outputHandler) - : m_ModelConfig(modelConfig), - m_OutputHandler(outputHandler), +CResultNormalizer::CResultNormalizer(const model::CAnomalyDetectorModelConfig& modelConfig, + COutputHandler& outputHandler) + : m_ModelConfig(modelConfig), m_OutputHandler(outputHandler), m_WriteFieldNames(true), m_OutputFieldNormalizedScore(m_OutputFields[NORMALIZED_SCORE_NAME]), m_Normalizer(m_ModelConfig) { @@ -40,7 +40,8 @@ CResultNormalizer::CResultNormalizer(const model::CAnomalyDetectorModelConfig& m bool CResultNormalizer::initNormalizer(const std::string& stateFileName) { std::ifstream inputStream(stateFileName.c_str()); - model::CHierarchicalResultsNormalizer::ERestoreOutcome outcome(m_Normalizer.fromJsonStream(inputStream)); + model::CHierarchicalResultsNormalizer::ERestoreOutcome outcome( + m_Normalizer.fromJsonStream(inputStream)); if (outcome != model::CHierarchicalResultsNormalizer::E_Ok) { LOG_ERROR(<< "Failed to restore JSON state for quantiles"); return false; @@ -76,20 +77,27 @@ bool CResultNormalizer::handleRecord(const TStrStrUMap& dataRowFields) { bool isValidRecord(false); if (m_ModelConfig.perPartitionNormalization()) { - isValidRecord = parseDataFields(dataRowFields, level, partition, partitionValue, person, function, valueFieldName, probability); + isValidRecord = parseDataFields(dataRowFields, level, partition, partitionValue, + person, function, valueFieldName, probability); } else { - isValidRecord = parseDataFields(dataRowFields, level, partition, person, function, valueFieldName, probability); + isValidRecord = parseDataFields(dataRowFields, level, partition, person, + function, valueFieldName, probability); } - std::string partitionKey = m_ModelConfig.perPartitionNormalization() ? partition + partitionValue : partition; + std::string partitionKey = m_ModelConfig.perPartitionNormalization() + ? partition + partitionValue + : partition; if (isValidRecord) { const model::CAnomalyScore::CNormalizer* levelNormalizer = nullptr; - double score = probability > m_ModelConfig.maximumAnomalousProbability() ? 0.0 : maths::CTools::anomalyScore(probability); + double score = probability > m_ModelConfig.maximumAnomalousProbability() + ? 0.0 + : maths::CTools::anomalyScore(probability); if (level == ROOT_LEVEL) { levelNormalizer = &m_Normalizer.bucketNormalizer(); } else if (level == LEAF_LEVEL) { - levelNormalizer = m_Normalizer.leafNormalizer(partitionKey, person, function, valueFieldName); + levelNormalizer = m_Normalizer.leafNormalizer(partitionKey, person, + function, valueFieldName); } else if (level == PARTITION_LEVEL) { levelNormalizer = m_Normalizer.partitionNormalizer(partitionKey); } else if (level == BUCKET_INFLUENCER_LEVEL) { @@ -101,16 +109,19 @@ bool CResultNormalizer::handleRecord(const TStrStrUMap& dataRowFields) { } if (levelNormalizer != nullptr) { if (levelNormalizer->canNormalize() && levelNormalizer->normalize(score) == false) { - LOG_ERROR(<< "Failed to normalize score " << score << " at level " << level << " with partition field name " << partition + LOG_ERROR(<< "Failed to normalize score " << score << " at level " + << level << " with partition field name " << partition << " and person field name " << person); } } else { LOG_ERROR(<< "No normalizer available" " at level '" - << level << "' with partition field name '" << partition << "' and person field name '" << person << "'"); + << level << "' with partition field name '" << partition + << "' and person field name '" << person << "'"); } - m_OutputFieldNormalizedScore = (score > 0.0) ? core::CStringUtils::typeToStringPretty(score) : ZERO; + m_OutputFieldNormalizedScore = + (score > 0.0) ? core::CStringUtils::typeToStringPretty(score) : ZERO; } else { m_OutputFieldNormalizedScore.clear(); } @@ -130,8 +141,10 @@ bool CResultNormalizer::parseDataFields(const TStrStrUMap& dataRowFields, std::string& function, std::string& valueFieldName, double& probability) { - return this->parseDataField(dataRowFields, LEVEL, level) && this->parseDataField(dataRowFields, PARTITION_FIELD_NAME, partition) && - this->parseDataField(dataRowFields, PERSON_FIELD_NAME, person) && this->parseDataField(dataRowFields, FUNCTION_NAME, function) && + return this->parseDataField(dataRowFields, LEVEL, level) && + this->parseDataField(dataRowFields, PARTITION_FIELD_NAME, partition) && + this->parseDataField(dataRowFields, PERSON_FIELD_NAME, person) && + this->parseDataField(dataRowFields, FUNCTION_NAME, function) && this->parseDataField(dataRowFields, VALUE_FIELD_NAME, valueFieldName) && this->parseDataField(dataRowFields, PROBABILITY_NAME, probability); } @@ -144,9 +157,11 @@ bool CResultNormalizer::parseDataFields(const TStrStrUMap& dataRowFields, std::string& function, std::string& valueFieldName, double& probability) { - return this->parseDataField(dataRowFields, LEVEL, level) && this->parseDataField(dataRowFields, PARTITION_FIELD_NAME, partition) && + return this->parseDataField(dataRowFields, LEVEL, level) && + this->parseDataField(dataRowFields, PARTITION_FIELD_NAME, partition) && this->parseDataField(dataRowFields, PARTITION_FIELD_VALUE, partitionValue) && - this->parseDataField(dataRowFields, PERSON_FIELD_NAME, person) && this->parseDataField(dataRowFields, FUNCTION_NAME, function) && + this->parseDataField(dataRowFields, PERSON_FIELD_NAME, person) && + this->parseDataField(dataRowFields, FUNCTION_NAME, function) && this->parseDataField(dataRowFields, VALUE_FIELD_NAME, valueFieldName) && this->parseDataField(dataRowFields, PROBABILITY_NAME, probability); } diff --git a/lib/api/CSingleStreamDataAdder.cc b/lib/api/CSingleStreamDataAdder.cc index 6fc5ffd0e5..022f8d1ce5 100644 --- a/lib/api/CSingleStreamDataAdder.cc +++ b/lib/api/CSingleStreamDataAdder.cc @@ -14,10 +14,12 @@ namespace api { const size_t CSingleStreamDataAdder::MAX_DOCUMENT_SIZE(16 * 1024 * 1024); // 16MB -CSingleStreamDataAdder::CSingleStreamDataAdder(const TOStreamP& stream) : m_Stream(stream) { +CSingleStreamDataAdder::CSingleStreamDataAdder(const TOStreamP& stream) + : m_Stream(stream) { } -CSingleStreamDataAdder::TOStreamP CSingleStreamDataAdder::addStreamed(const std::string& /*index*/, const std::string& id) { +CSingleStreamDataAdder::TOStreamP +CSingleStreamDataAdder::addStreamed(const std::string& /*index*/, const std::string& id) { if (m_Stream != nullptr && !m_Stream->bad()) { // Start with metadata, leaving the index for the receiving code to set (*m_Stream) << "{\"index\":{\"_id\":\"" << id << "\"}}\n"; diff --git a/lib/api/CSingleStreamSearcher.cc b/lib/api/CSingleStreamSearcher.cc index f3516c5628..11b90209cc 100644 --- a/lib/api/CSingleStreamSearcher.cc +++ b/lib/api/CSingleStreamSearcher.cc @@ -10,10 +10,12 @@ namespace ml { namespace api { -CSingleStreamSearcher::CSingleStreamSearcher(const TIStreamP& stream) : m_Stream(stream) { +CSingleStreamSearcher::CSingleStreamSearcher(const TIStreamP& stream) + : m_Stream(stream) { } -CSingleStreamSearcher::TIStreamP CSingleStreamSearcher::search(size_t /*currentDocNum*/, size_t /*limit*/) { +CSingleStreamSearcher::TIStreamP +CSingleStreamSearcher::search(size_t /*currentDocNum*/, size_t /*limit*/) { // documents in a stream are separated by '\0', skip over it in case to not confuse clients (see #279) if (m_Stream->peek() == 0) { m_Stream->get(); diff --git a/lib/api/CStateRestoreStreamFilter.cc b/lib/api/CStateRestoreStreamFilter.cc index 35f51639da..7bcbc6d675 100644 --- a/lib/api/CStateRestoreStreamFilter.cc +++ b/lib/api/CStateRestoreStreamFilter.cc @@ -11,10 +11,12 @@ namespace ml { namespace api { CStateRestoreStreamFilter::CStateRestoreStreamFilter() - : boost::iostreams::basic_line_filter(true), m_DocCount(0), m_RewrotePreviousLine(false) { + : boost::iostreams::basic_line_filter(true), m_DocCount(0), + m_RewrotePreviousLine(false) { } -CStateRestoreStreamFilter::string_type CStateRestoreStreamFilter::do_filter(const string_type& line) { +CStateRestoreStreamFilter::string_type +CStateRestoreStreamFilter::do_filter(const string_type& line) { // Persist format is: // { bulk metadata } // { document source } @@ -53,7 +55,8 @@ CStateRestoreStreamFilter::string_type CStateRestoreStreamFilter::do_filter(cons m_RewrotePreviousLine = true; - return line.substr(leftOffset, rightOffset - leftOffset + 1) + ",\"_version\":1,\"found\":true,\"_source\":"; + return line.substr(leftOffset, rightOffset - leftOffset + 1) + + ",\"_version\":1,\"found\":true,\"_source\":"; } else if (m_RewrotePreviousLine) { return line + '}' + '\0' + '\n'; diff --git a/lib/api/CTokenListReverseSearchCreator.cc b/lib/api/CTokenListReverseSearchCreator.cc index 4330056073..c98dc85c75 100644 --- a/lib/api/CTokenListReverseSearchCreator.cc +++ b/lib/api/CTokenListReverseSearchCreator.cc @@ -21,7 +21,8 @@ size_t CTokenListReverseSearchCreator::availableCost() const { return 10000; } -size_t CTokenListReverseSearchCreator::costOfToken(const std::string& token, size_t numOccurrences) const { +size_t CTokenListReverseSearchCreator::costOfToken(const std::string& token, + size_t numOccurrences) const { size_t tokenLength = token.length(); return (1 + tokenLength + // length of what we add to the terms (part 1) 3 + tokenLength // length of what we add to the regex (part 2) @@ -29,7 +30,8 @@ size_t CTokenListReverseSearchCreator::costOfToken(const std::string& token, siz numOccurrences; } -bool CTokenListReverseSearchCreator::createNullSearch(std::string& part1, std::string& part2) const { +bool CTokenListReverseSearchCreator::createNullSearch(std::string& part1, + std::string& part2) const { part1.clear(); part2.clear(); return true; @@ -73,7 +75,8 @@ void CTokenListReverseSearchCreator::addInOrderCommonToken(const std::string& to part2 += core::CRegex::escapeRegexSpecial(token); } -void CTokenListReverseSearchCreator::closeStandardSearch(std::string& /*part1*/, std::string& part2) const { +void CTokenListReverseSearchCreator::closeStandardSearch(std::string& /*part1*/, + std::string& part2) const { part2 += ".*"; } } diff --git a/lib/api/CTokenListReverseSearchCreatorIntf.cc b/lib/api/CTokenListReverseSearchCreatorIntf.cc index b8466062f3..c8b6c2976f 100644 --- a/lib/api/CTokenListReverseSearchCreatorIntf.cc +++ b/lib/api/CTokenListReverseSearchCreatorIntf.cc @@ -8,13 +8,15 @@ namespace ml { namespace api { -CTokenListReverseSearchCreatorIntf::CTokenListReverseSearchCreatorIntf(const std::string& fieldName) : m_FieldName(fieldName) { +CTokenListReverseSearchCreatorIntf::CTokenListReverseSearchCreatorIntf(const std::string& fieldName) + : m_FieldName(fieldName) { } CTokenListReverseSearchCreatorIntf::~CTokenListReverseSearchCreatorIntf() { } -void CTokenListReverseSearchCreatorIntf::closeStandardSearch(std::string& /*part1*/, std::string& /*part2*/) const { +void CTokenListReverseSearchCreatorIntf::closeStandardSearch(std::string& /*part1*/, + std::string& /*part2*/) const { // Default is to do nothing } diff --git a/lib/api/CTokenListType.cc b/lib/api/CTokenListType.cc index a12876f52b..ef5673359f 100644 --- a/lib/api/CTokenListType.cc +++ b/lib/api/CTokenListType.cc @@ -33,9 +33,12 @@ const std::string NUM_MATCHES("i"); const std::string EMPTY_STRING; //! Functor for comparing just the first element of a pair of sizes -class CSizePairFirstElementLess : public std::binary_function { +class CSizePairFirstElementLess + : public std::binary_function { public: - bool operator()(CTokenListType::TSizeSizePr lhs, CTokenListType::TSizeSizePr rhs) { return lhs.first < rhs.first; } + bool operator()(CTokenListType::TSizeSizePr lhs, CTokenListType::TSizeSizePr rhs) { + return lhs.first < rhs.first; + } }; } @@ -45,31 +48,26 @@ CTokenListType::CTokenListType(bool isDryRun, const TSizeSizePrVec& baseTokenIds, size_t baseWeight, const TSizeSizeMap& uniqueTokenIds) - : m_BaseString(baseString), - m_BaseTokenIds(baseTokenIds), - m_BaseWeight(baseWeight), - m_MaxStringLen(rawStringLen), + : m_BaseString(baseString), m_BaseTokenIds(baseTokenIds), + m_BaseWeight(baseWeight), m_MaxStringLen(rawStringLen), m_OutOfOrderCommonTokenIndex(baseTokenIds.size()), // Note: m_CommonUniqueTokenIds is required to be in sorted order, and // this relies on uniqueTokenIds being in sorted order m_CommonUniqueTokenIds(uniqueTokenIds.begin(), uniqueTokenIds.end()), - m_CommonUniqueTokenWeight(0), - m_OrigUniqueTokenWeight(0), + m_CommonUniqueTokenWeight(0), m_OrigUniqueTokenWeight(0), m_NumMatches(isDryRun ? 0 : 1) { - for (TSizeSizeMapCItr iter = uniqueTokenIds.begin(); iter != uniqueTokenIds.end(); ++iter) { + for (TSizeSizeMapCItr iter = uniqueTokenIds.begin(); + iter != uniqueTokenIds.end(); ++iter) { m_CommonUniqueTokenWeight += iter->second; } m_OrigUniqueTokenWeight = m_CommonUniqueTokenWeight; } CTokenListType::CTokenListType(core::CStateRestoreTraverser& traverser) - : m_BaseWeight(0), - m_MaxStringLen(0), - m_OutOfOrderCommonTokenIndex(0), - m_CommonUniqueTokenWeight(0), - m_OrigUniqueTokenWeight(0), - m_NumMatches(0) { - traverser.traverseSubLevel(boost::bind(&CTokenListType::acceptRestoreTraverser, this, _1)); + : m_BaseWeight(0), m_MaxStringLen(0), m_OutOfOrderCommonTokenIndex(0), + m_CommonUniqueTokenWeight(0), m_OrigUniqueTokenWeight(0), m_NumMatches(0) { + traverser.traverseSubLevel( + boost::bind(&CTokenListType::acceptRestoreTraverser, this, _1)); } bool CTokenListType::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { @@ -81,7 +79,8 @@ bool CTokenListType::acceptRestoreTraverser(core::CStateRestoreTraverser& traver m_BaseString = traverser.value(); } else if (name == BASE_TOKEN_ID) { TSizeSizePr tokenAndWeight(0, 0); - if (core::CStringUtils::stringToType(traverser.value(), tokenAndWeight.first) == false) { + if (core::CStringUtils::stringToType(traverser.value(), + tokenAndWeight.first) == false) { LOG_ERROR(<< "Invalid base token ID in " << traverser.value()); return false; } @@ -89,12 +88,14 @@ bool CTokenListType::acceptRestoreTraverser(core::CStateRestoreTraverser& traver m_BaseTokenIds.push_back(tokenAndWeight); } else if (name == BASE_TOKEN_WEIGHT) { if (m_BaseTokenIds.empty()) { - LOG_ERROR(<< "Base token weight precedes base token ID in " << traverser.value()); + LOG_ERROR(<< "Base token weight precedes base token ID in " + << traverser.value()); return false; } TSizeSizePr& tokenAndWeight = m_BaseTokenIds.back(); - if (core::CStringUtils::stringToType(traverser.value(), tokenAndWeight.second) == false) { + if (core::CStringUtils::stringToType(traverser.value(), + tokenAndWeight.second) == false) { LOG_ERROR(<< "Invalid base token weight in " << traverser.value()); return false; } @@ -106,13 +107,15 @@ bool CTokenListType::acceptRestoreTraverser(core::CStateRestoreTraverser& traver return false; } } else if (name == OUT_OF_ORDER_COMMON_TOKEN_INDEX) { - if (core::CStringUtils::stringToType(traverser.value(), m_OutOfOrderCommonTokenIndex) == false) { + if (core::CStringUtils::stringToType( + traverser.value(), m_OutOfOrderCommonTokenIndex) == false) { LOG_ERROR(<< "Invalid maximum string length in " << traverser.value()); return false; } } else if (name == COMMON_UNIQUE_TOKEN_ID) { TSizeSizePr tokenAndWeight(0, 0); - if (core::CStringUtils::stringToType(traverser.value(), tokenAndWeight.first) == false) { + if (core::CStringUtils::stringToType(traverser.value(), + tokenAndWeight.first) == false) { LOG_ERROR(<< "Invalid common unique token ID in " << traverser.value()); return false; } @@ -121,20 +124,24 @@ bool CTokenListType::acceptRestoreTraverser(core::CStateRestoreTraverser& traver expectWeight = true; } else if (name == COMMON_UNIQUE_TOKEN_WEIGHT) { if (!expectWeight) { - LOG_ERROR(<< "Common unique token weight precedes common unique token ID in " << traverser.value()); + LOG_ERROR(<< "Common unique token weight precedes common unique token ID in " + << traverser.value()); return false; } TSizeSizePr& tokenAndWeight = m_CommonUniqueTokenIds.back(); - if (core::CStringUtils::stringToType(traverser.value(), tokenAndWeight.second) == false) { - LOG_ERROR(<< "Invalid common unique token weight in " << traverser.value()); + if (core::CStringUtils::stringToType(traverser.value(), + tokenAndWeight.second) == false) { + LOG_ERROR(<< "Invalid common unique token weight in " + << traverser.value()); return false; } expectWeight = false; m_CommonUniqueTokenWeight += tokenAndWeight.second; } else if (name == ORIG_UNIQUE_TOKEN_WEIGHT) { - if (core::CStringUtils::stringToType(traverser.value(), m_OrigUniqueTokenWeight) == false) { + if (core::CStringUtils::stringToType(traverser.value(), + m_OrigUniqueTokenWeight) == false) { LOG_ERROR(<< "Invalid maximum string length in " << traverser.value()); return false; } @@ -187,9 +194,9 @@ bool CTokenListType::addString(bool isDryRun, TSizeSizePrVecCItr testIter = tokenIds.begin(); for (size_t index = 0; index < m_OutOfOrderCommonTokenIndex; ++index) { // Ignore tokens that are not in the common unique tokens - if (std::binary_search( - m_CommonUniqueTokenIds.begin(), m_CommonUniqueTokenIds.end(), m_BaseTokenIds[index], CSizePairFirstElementLess()) == - false) { + if (std::binary_search(m_CommonUniqueTokenIds.begin(), + m_CommonUniqueTokenIds.end(), m_BaseTokenIds[index], + CSizePairFirstElementLess()) == false) { continue; } @@ -269,7 +276,8 @@ size_t CTokenListType::missingCommonTokenWeight(const TSizeSizeMap& uniqueTokenI TSizeSizePrVecCItr commonIter = m_CommonUniqueTokenIds.begin(); TSizeSizeMapCItr testIter = uniqueTokenIds.begin(); - while (commonIter != m_CommonUniqueTokenIds.end() && testIter != uniqueTokenIds.end()) { + while (commonIter != m_CommonUniqueTokenIds.end() && + testIter != uniqueTokenIds.end()) { if (commonIter->first == testIter->first) { // Don't increment the weight if a given token appears a different // number of times in the two strings @@ -300,7 +308,8 @@ bool CTokenListType::isMissingCommonTokenWeightZero(const TSizeSizeMap& uniqueTo TSizeSizePrVecCItr commonIter = m_CommonUniqueTokenIds.begin(); TSizeSizeMapCItr testIter = uniqueTokenIds.begin(); - while (commonIter != m_CommonUniqueTokenIds.end() && testIter != uniqueTokenIds.end()) { + while (commonIter != m_CommonUniqueTokenIds.end() && + testIter != uniqueTokenIds.end()) { if (commonIter->first < testIter->first) { return false; } @@ -322,10 +331,12 @@ bool CTokenListType::isMissingCommonTokenWeightZero(const TSizeSizeMap& uniqueTo bool CTokenListType::containsCommonTokensInOrder(const TSizeSizePrVec& tokenIds) const { TSizeSizePrVecCItr testIter = tokenIds.begin(); - for (TSizeSizePrVecCItr baseIter = m_BaseTokenIds.begin(); baseIter != m_BaseTokenIds.end(); ++baseIter) { + for (TSizeSizePrVecCItr baseIter = m_BaseTokenIds.begin(); + baseIter != m_BaseTokenIds.end(); ++baseIter) { // Ignore tokens that are not in the common unique tokens - if (std::binary_search(m_CommonUniqueTokenIds.begin(), m_CommonUniqueTokenIds.end(), *baseIter, CSizePairFirstElementLess()) == - false) { + if (std::binary_search(m_CommonUniqueTokenIds.begin(), + m_CommonUniqueTokenIds.end(), *baseIter, + CSizePairFirstElementLess()) == false) { continue; } @@ -350,7 +361,8 @@ size_t CTokenListType::numMatches() const { void CTokenListType::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(BASE_STRING, m_BaseString); - for (TSizeSizePrVecCItr iter = m_BaseTokenIds.begin(); iter != m_BaseTokenIds.end(); ++iter) { + for (TSizeSizePrVecCItr iter = m_BaseTokenIds.begin(); + iter != m_BaseTokenIds.end(); ++iter) { inserter.insertValue(BASE_TOKEN_ID, iter->first); inserter.insertValue(BASE_TOKEN_WEIGHT, iter->second); } @@ -358,7 +370,8 @@ void CTokenListType::acceptPersistInserter(core::CStatePersistInserter& inserter inserter.insertValue(MAX_STRING_LEN, m_MaxStringLen); inserter.insertValue(OUT_OF_ORDER_COMMON_TOKEN_INDEX, m_OutOfOrderCommonTokenIndex); - for (TSizeSizePrVecCItr iter = m_CommonUniqueTokenIds.begin(); iter != m_CommonUniqueTokenIds.end(); ++iter) { + for (TSizeSizePrVecCItr iter = m_CommonUniqueTokenIds.begin(); + iter != m_CommonUniqueTokenIds.end(); ++iter) { inserter.insertValue(COMMON_UNIQUE_TOKEN_ID, iter->first); inserter.insertValue(COMMON_UNIQUE_TOKEN_WEIGHT, iter->second); } diff --git a/lib/api/dump_state/Main.cc b/lib/api/dump_state/Main.cc index 1be733ba33..4ea51b28c1 100644 --- a/lib/api/dump_state/Main.cc +++ b/lib/api/dump_state/Main.cc @@ -142,19 +142,13 @@ bool persistAnomalyDetectorStateToFile(const std::string& configFileName, ml::core_t::TTime bucketSize(3600); std::string jobId("foo"); - ml::model::CAnomalyDetectorModelConfig modelConfig = ml::model::CAnomalyDetectorModelConfig::defaultConfig( - bucketSize, ml::model_t::E_None, "", bucketSize * latencyBuckets, 0, false, ""); - - ml::api::CAnomalyJob origJob(jobId, - limits, - fieldConfig, - modelConfig, - wrappedOutputStream, + ml::model::CAnomalyDetectorModelConfig modelConfig = + ml::model::CAnomalyDetectorModelConfig::defaultConfig( + bucketSize, ml::model_t::E_None, "", bucketSize * latencyBuckets, 0, false, ""); + + ml::api::CAnomalyJob origJob(jobId, limits, fieldConfig, modelConfig, wrappedOutputStream, boost::bind(&reportPersistComplete, _1), - nullptr, - -1, - "time", - timeFormat); + nullptr, -1, "time", timeFormat); using TScopedInputParserP = boost::scoped_ptr; TScopedInputParserP parser; @@ -191,41 +185,39 @@ bool persistAnomalyDetectorStateToFile(const std::string& configFileName, } bool persistByDetector(const std::string& version) { - return persistAnomalyDetectorStateToFile("../unittest/testfiles/new_mlfields.conf", - "../unittest/testfiles/big_ascending.txt", - "../unittest/testfiles/state/" + version + "/by_detector_state.json", - 0, - "%d/%b/%Y:%T %z"); + return persistAnomalyDetectorStateToFile( + "../unittest/testfiles/new_mlfields.conf", "../unittest/testfiles/big_ascending.txt", + "../unittest/testfiles/state/" + version + "/by_detector_state.json", 0, + "%d/%b/%Y:%T %z"); } bool persistOverDetector(const std::string& version) { - return persistAnomalyDetectorStateToFile("../unittest/testfiles/new_mlfields_over.conf", - "../unittest/testfiles/big_ascending.txt", - "../unittest/testfiles/state/" + version + "/over_detector_state.json", - 0, - "%d/%b/%Y:%T %z"); + return persistAnomalyDetectorStateToFile( + "../unittest/testfiles/new_mlfields_over.conf", "../unittest/testfiles/big_ascending.txt", + "../unittest/testfiles/state/" + version + "/over_detector_state.json", + 0, "%d/%b/%Y:%T %z"); } bool persistPartitionDetector(const std::string& version) { - return persistAnomalyDetectorStateToFile("../unittest/testfiles/new_mlfields_partition.conf", - "../unittest/testfiles/big_ascending.txt", - "../unittest/testfiles/state/" + version + "/partition_detector_state.json", - 0, - "%d/%b/%Y:%T %z"); + return persistAnomalyDetectorStateToFile( + "../unittest/testfiles/new_mlfields_partition.conf", + "../unittest/testfiles/big_ascending.txt", + "../unittest/testfiles/state/" + version + "/partition_detector_state.json", + 0, "%d/%b/%Y:%T %z"); } bool persistDcDetector(const std::string& version) { - return persistAnomalyDetectorStateToFile("../unittest/testfiles/new_persist_dc.conf", - "../unittest/testfiles/files_users_programs.csv", - "../unittest/testfiles/state/" + version + "/dc_detector_state.json", - 5); + return persistAnomalyDetectorStateToFile( + "../unittest/testfiles/new_persist_dc.conf", + "../unittest/testfiles/files_users_programs.csv", + "../unittest/testfiles/state/" + version + "/dc_detector_state.json", 5); } bool persistCountDetector(const std::string& version) { - return persistAnomalyDetectorStateToFile("../unittest/testfiles/new_persist_count.conf", - "../unittest/testfiles/files_users_programs.csv", - "../unittest/testfiles/state/" + version + "/count_detector_state.json", - 5); + return persistAnomalyDetectorStateToFile( + "../unittest/testfiles/new_persist_count.conf", + "../unittest/testfiles/files_users_programs.csv", + "../unittest/testfiles/state/" + version + "/count_detector_state.json", 5); } int main(int /*argc*/, char** /*argv*/) { @@ -277,7 +269,8 @@ int main(int /*argc*/, char** /*argv*/) { return EXIT_FAILURE; } - persisted = persistCategorizerStateToFile("../unittest/testfiles/state/" + version + "/categorizer_state.json"); + persisted = persistCategorizerStateToFile("../unittest/testfiles/state/" + + version + "/categorizer_state.json"); if (!persisted) { LOG_ERROR(<< "Failed to persist categorizer state"); return EXIT_FAILURE; diff --git a/lib/api/unittest/CAnomalyJobLimitTest.cc b/lib/api/unittest/CAnomalyJobLimitTest.cc index dd4ff15a7e..f85a7279a0 100644 --- a/lib/api/unittest/CAnomalyJobLimitTest.cc +++ b/lib/api/unittest/CAnomalyJobLimitTest.cc @@ -44,13 +44,17 @@ std::set getUniqueValues(const std::string& key, const std::string& if (p1 != nullptr) { size_t j = 0; while (true) { - rapidjson::Value* p2 = rapidjson::Pointer("/" + std::to_string(i) + "/records/" + std::to_string(j)).Get(doc); + rapidjson::Value* p2 = rapidjson::Pointer("/" + std::to_string(i) + "/records/" + + std::to_string(j)) + .Get(doc); if (p2 != nullptr) { size_t k = 0; while (true) { - rapidjson::Value* p3 = rapidjson::Pointer("/" + std::to_string(i) + "/records/" + std::to_string(j) + "/causes/" + - std::to_string(k) + "/" + key) - .Get(doc); + rapidjson::Value* p3 = + rapidjson::Pointer("/" + std::to_string(i) + "/records/" + + std::to_string(j) + "/causes/" + + std::to_string(k) + "/" + key) + .Get(doc); if (p3 != nullptr) { values.insert(p3->GetString()); @@ -76,10 +80,10 @@ std::set getUniqueValues(const std::string& key, const std::string& CppUnit::Test* CAnomalyJobLimitTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CAnomalyJobLimitTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CAnomalyJobLimitTest::testLimit", &CAnomalyJobLimitTest::testLimit)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CAnomalyJobLimitTest::testAccuracy", &CAnomalyJobLimitTest::testAccuracy)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CAnomalyJobLimitTest::testLimit", &CAnomalyJobLimitTest::testLimit)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CAnomalyJobLimitTest::testAccuracy", &CAnomalyJobLimitTest::testAccuracy)); return suiteOfTests; } @@ -104,7 +108,8 @@ void CAnomalyJobLimitTest::testAccuracy() { CPPUNIT_ASSERT(fieldConfig.initFromClause(clause)); - model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(3600); + model::CAnomalyDetectorModelConfig modelConfig = + model::CAnomalyDetectorModelConfig::defaultConfig(3600); std::stringstream outputStrm; core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); @@ -121,7 +126,8 @@ void CAnomalyJobLimitTest::testAccuracy() { api::CCsvInputParser parser(inputStrm); LOG_TRACE(<< "Reading file"); - CPPUNIT_ASSERT(parser.readStream(boost::bind(&api::CAnomalyJob::handleRecord, &job, _1))); + CPPUNIT_ASSERT(parser.readStream( + boost::bind(&api::CAnomalyJob::handleRecord, &job, _1))); LOG_TRACE(<< "Checking results"); @@ -143,7 +149,8 @@ void CAnomalyJobLimitTest::testAccuracy() { CPPUNIT_ASSERT(fieldConfig.initFromClause(clause)); - model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(3600); + model::CAnomalyDetectorModelConfig modelConfig = + model::CAnomalyDetectorModelConfig::defaultConfig(3600); model::CLimits limits; std::stringstream outputStrm; @@ -151,7 +158,8 @@ void CAnomalyJobLimitTest::testAccuracy() { core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); limits.resourceMonitor().m_ByteLimitHigh = nonLimitedUsage / 10; - limits.resourceMonitor().m_ByteLimitLow = limits.resourceMonitor().m_ByteLimitHigh - 1024; + limits.resourceMonitor().m_ByteLimitLow = + limits.resourceMonitor().m_ByteLimitHigh - 1024; LOG_TRACE(<< "Setting up job"); api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); @@ -161,7 +169,8 @@ void CAnomalyJobLimitTest::testAccuracy() { api::CCsvInputParser parser(inputStrm); LOG_TRACE(<< "Reading file"); - CPPUNIT_ASSERT(parser.readStream(boost::bind(&api::CAnomalyJob::handleRecord, &job, _1))); + CPPUNIT_ASSERT(parser.readStream( + boost::bind(&api::CAnomalyJob::handleRecord, &job, _1))); LOG_TRACE(<< "Checking results"); @@ -197,7 +206,8 @@ void CAnomalyJobLimitTest::testLimit() { CPPUNIT_ASSERT(fieldConfig.initFromClause(clause)); - model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(3600); + model::CAnomalyDetectorModelConfig modelConfig = + model::CAnomalyDetectorModelConfig::defaultConfig(3600); LOG_TRACE(<< "Setting up job"); api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); @@ -207,7 +217,8 @@ void CAnomalyJobLimitTest::testLimit() { api::CCsvInputParser parser(inputStrm); LOG_TRACE(<< "Reading file"); - CPPUNIT_ASSERT(parser.readStream(boost::bind(&api::CAnomalyJob::handleRecord, &job, _1))); + CPPUNIT_ASSERT(parser.readStream( + boost::bind(&api::CAnomalyJob::handleRecord, &job, _1))); LOG_TRACE(<< "Checking results"); CPPUNIT_ASSERT_EQUAL(uint64_t(1176), job.numRecordsHandled()); } @@ -238,7 +249,8 @@ void CAnomalyJobLimitTest::testLimit() { CPPUNIT_ASSERT(fieldConfig.initFromClause(clause)); - model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(3600); + model::CAnomalyDetectorModelConfig modelConfig = + model::CAnomalyDetectorModelConfig::defaultConfig(3600); //::CMockOutputWriter resultsHandler; core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); @@ -251,7 +263,8 @@ void CAnomalyJobLimitTest::testLimit() { api::CCsvInputParser parser(inputStrm); LOG_TRACE(<< "Reading file"); - CPPUNIT_ASSERT(parser.readStream(boost::bind(&api::CAnomalyJob::handleRecord, &job, _1))); + CPPUNIT_ASSERT(parser.readStream( + boost::bind(&api::CAnomalyJob::handleRecord, &job, _1))); // Now turn on the resource limiting limits.resourceMonitor().m_ByteLimitHigh = 0; limits.resourceMonitor().m_ByteLimitLow = 0; @@ -262,7 +275,8 @@ void CAnomalyJobLimitTest::testLimit() { api::CCsvInputParser parser2(inputStrm2); LOG_TRACE(<< "Reading second file"); - CPPUNIT_ASSERT(parser2.readStream(boost::bind(&api::CAnomalyJob::handleRecord, &job, _1))); + CPPUNIT_ASSERT(parser2.readStream( + boost::bind(&api::CAnomalyJob::handleRecord, &job, _1))); LOG_TRACE(<< "Checking results"); CPPUNIT_ASSERT_EQUAL(uint64_t(1180), job.numRecordsHandled()); } diff --git a/lib/api/unittest/CAnomalyJobTest.cc b/lib/api/unittest/CAnomalyJobTest.cc index b243dc08cc..31e0e43d0b 100644 --- a/lib/api/unittest/CAnomalyJobTest.cc +++ b/lib/api/unittest/CAnomalyJobTest.cc @@ -38,7 +38,9 @@ namespace { class CEmptySearcher : public ml::core::CDataSearcher { public: //! Do a search that results in an empty input stream. - virtual TIStreamP search(size_t /*currentDocNum*/, size_t /*limit*/) { return TIStreamP(new std::istringstream()); } + virtual TIStreamP search(size_t /*currentDocNum*/, size_t /*limit*/) { + return TIStreamP(new std::istringstream()); + } }; //! \brief @@ -57,7 +59,9 @@ class CSingleResultVisitor : public ml::model::CHierarchicalResultsVisitor { virtual ~CSingleResultVisitor() {} - virtual void visit(const ml::model::CHierarchicalResults& /*results*/, const TNode& node, bool /*pivot*/) { + virtual void visit(const ml::model::CHierarchicalResults& /*results*/, + const TNode& node, + bool /*pivot*/) { if (!this->isSimpleCount(node) && this->isLeaf(node)) { if (node.s_AnnotatedProbability.s_AttributeProbabilities.size() == 0) { return; @@ -65,9 +69,11 @@ class CSingleResultVisitor : public ml::model::CHierarchicalResultsVisitor { if (!node.s_Model) { return; } - const ml::model::SAttributeProbability& attribute = node.s_AnnotatedProbability.s_AttributeProbabilities[0]; + const ml::model::SAttributeProbability& attribute = + node.s_AnnotatedProbability.s_AttributeProbabilities[0]; - m_LastResult = node.s_Model->currentBucketValue(attribute.s_Feature, 0, 0, node.s_BucketStartTime)[0]; + m_LastResult = node.s_Model->currentBucketValue( + attribute.s_Feature, 0, 0, node.s_BucketStartTime)[0]; } } @@ -83,7 +89,9 @@ class CMultiResultVisitor : public ml::model::CHierarchicalResultsVisitor { virtual ~CMultiResultVisitor() {} - virtual void visit(const ml::model::CHierarchicalResults& /*results*/, const TNode& node, bool /*pivot*/) { + virtual void visit(const ml::model::CHierarchicalResults& /*results*/, + const TNode& node, + bool /*pivot*/) { if (!this->isSimpleCount(node) && this->isLeaf(node)) { if (node.s_AnnotatedProbability.s_AttributeProbabilities.size() == 0) { return; @@ -97,9 +105,12 @@ class CMultiResultVisitor : public ml::model::CHierarchicalResultsVisitor { LOG_ERROR(<< "No identifier for '" << *node.s_Spec.s_PersonFieldValue << "'"); return; } - for (std::size_t i = 0; i < node.s_AnnotatedProbability.s_AttributeProbabilities.size(); ++i) { - const ml::model::SAttributeProbability& attribute = node.s_AnnotatedProbability.s_AttributeProbabilities[i]; - m_LastResult += node.s_Model->currentBucketValue(attribute.s_Feature, pid, attribute.s_Cid, node.s_BucketStartTime)[0]; + for (std::size_t i = 0; + i < node.s_AnnotatedProbability.s_AttributeProbabilities.size(); ++i) { + const ml::model::SAttributeProbability& attribute = + node.s_AnnotatedProbability.s_AttributeProbabilities[i]; + m_LastResult += node.s_Model->currentBucketValue( + attribute.s_Feature, pid, attribute.s_Cid, node.s_BucketStartTime)[0]; } } } @@ -116,7 +127,9 @@ class CResultsScoreVisitor : public ml::model::CHierarchicalResultsVisitor { virtual ~CResultsScoreVisitor() {} - virtual void visit(const ml::model::CHierarchicalResults& /*results*/, const TNode& node, bool /*pivot*/) { + virtual void visit(const ml::model::CHierarchicalResults& /*results*/, + const TNode& node, + bool /*pivot*/) { if (this->isRoot(node)) { node.s_NormalizedAnomalyScore = m_Score; } @@ -170,7 +183,8 @@ void CAnomalyJobTest::testBadTimes() { clauses.push_back("value"); clauses.push_back("partitionfield=greenhouse"); fieldConfig.initFromClause(clauses); - model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); + model::CAnomalyDetectorModelConfig modelConfig = + model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); std::stringstream outputStrm; core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); @@ -192,7 +206,8 @@ void CAnomalyJobTest::testBadTimes() { clauses.push_back("value"); clauses.push_back("partitionfield=greenhouse"); fieldConfig.initFromClause(clauses); - model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); + model::CAnomalyDetectorModelConfig modelConfig = + model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); std::stringstream outputStrm; core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); @@ -214,20 +229,14 @@ void CAnomalyJobTest::testBadTimes() { clauses.push_back("value"); clauses.push_back("partitionfield=greenhouse"); fieldConfig.initFromClause(clauses); - model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); + model::CAnomalyDetectorModelConfig modelConfig = + model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); std::stringstream outputStrm; core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - api::CAnomalyJob job("job", - limits, - fieldConfig, - modelConfig, - wrappedOutputStream, - api::CAnomalyJob::TPersistCompleteFunc(), - nullptr, - -1, - "time", - "%Y%m%m%H%M%S"); + api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream, + api::CAnomalyJob::TPersistCompleteFunc(), nullptr, + -1, "time", "%Y%m%m%H%M%S"); api::CAnomalyJob::TStrStrUMap dataRows; dataRows["time"] = "hello world"; @@ -248,7 +257,8 @@ void CAnomalyJobTest::testOutOfSequence() { clauses.push_back("value"); clauses.push_back("partitionfield=greenhouse"); fieldConfig.initFromClause(clauses); - model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); + model::CAnomalyDetectorModelConfig modelConfig = + model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); std::stringstream outputStrm; core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); @@ -283,7 +293,8 @@ void CAnomalyJobTest::testControlMessages() { clauses.push_back("value"); clauses.push_back("partitionfield=greenhouse"); fieldConfig.initFromClause(clauses); - model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); + model::CAnomalyDetectorModelConfig modelConfig = + model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); std::stringstream outputStrm; core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); @@ -314,7 +325,8 @@ void CAnomalyJobTest::testControlMessages() { clauses.push_back("count"); clauses.push_back("partitionfield=greenhouse"); fieldConfig.initFromClause(clauses); - model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); + model::CAnomalyDetectorModelConfig modelConfig = + model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); api::CAnomalyJob::TStrStrUMap dataRows; dataRows["value"] = "2.0"; @@ -353,10 +365,12 @@ void CAnomalyJobTest::testControlMessages() { const rapidjson::Value& allRecords = doc.GetArray(); bool foundRecord = false; for (auto& r : allRecords.GetArray()) { - rapidjson::Value::ConstMemberIterator recordsIt = r.GetObject().FindMember("records"); + rapidjson::Value::ConstMemberIterator recordsIt = + r.GetObject().FindMember("records"); if (recordsIt != r.GetObject().MemberEnd()) { auto& recordsArray = recordsIt->value.GetArray()[0]; - rapidjson::Value::ConstMemberIterator actualIt = recordsArray.FindMember("actual"); + rapidjson::Value::ConstMemberIterator actualIt = + recordsArray.FindMember("actual"); CPPUNIT_ASSERT(actualIt != recordsArray.MemberEnd()); const rapidjson::Value::ConstArray& values = actualIt->value.GetArray(); @@ -401,10 +415,12 @@ void CAnomalyJobTest::testControlMessages() { const rapidjson::Value& allRecords2 = doc2.GetArray(); foundRecord = false; for (auto& r : allRecords2.GetArray()) { - rapidjson::Value::ConstMemberIterator recordsIt = r.GetObject().FindMember("records"); + rapidjson::Value::ConstMemberIterator recordsIt = + r.GetObject().FindMember("records"); if (recordsIt != r.GetObject().MemberEnd()) { auto& recordsArray = recordsIt->value.GetArray()[0]; - rapidjson::Value::ConstMemberIterator actualIt = recordsArray.FindMember("actual"); + rapidjson::Value::ConstMemberIterator actualIt = + recordsArray.FindMember("actual"); CPPUNIT_ASSERT(actualIt != recordsArray.MemberEnd()); const rapidjson::Value::ConstArray& values = actualIt->value.GetArray(); @@ -423,7 +439,8 @@ void CAnomalyJobTest::testSkipTimeControlMessage() { api::CFieldConfig::TStrVec clauses; clauses.push_back("count"); fieldConfig.initFromClause(clauses); - model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); + model::CAnomalyDetectorModelConfig modelConfig = + model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); std::stringstream outputStrm; core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); @@ -486,7 +503,8 @@ void CAnomalyJobTest::testOutOfPhase() { api::CFieldConfig::TStrVec clauses; clauses.push_back("mean(value)"); fieldConfig.initFromClause(clauses); - model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(bucketSize); + model::CAnomalyDetectorModelConfig modelConfig = + model::CAnomalyDetectorModelConfig::defaultConfig(bucketSize); std::stringstream outputStrm; core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); @@ -606,7 +624,8 @@ void CAnomalyJobTest::testOutOfPhase() { api::CFieldConfig::TStrVec clauses; clauses.push_back("mean(value)"); fieldConfig.initFromClause(clauses); - model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(bucketSize); + model::CAnomalyDetectorModelConfig modelConfig = + model::CAnomalyDetectorModelConfig::defaultConfig(bucketSize); std::stringstream outputStrm; core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); @@ -711,7 +730,8 @@ void CAnomalyJobTest::testOutOfPhase() { api::CFieldConfig::TStrVec clauses; clauses.push_back("count"); fieldConfig.initFromClause(clauses); - model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(bucketSize); + model::CAnomalyDetectorModelConfig modelConfig = + model::CAnomalyDetectorModelConfig::defaultConfig(bucketSize); std::stringstream outputStrm; core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); @@ -829,7 +849,8 @@ void CAnomalyJobTest::testOutOfPhase() { api::CFieldConfig::TStrVec clauses; clauses.push_back("count"); fieldConfig.initFromClause(clauses); - model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(bucketSize); + model::CAnomalyDetectorModelConfig modelConfig = + model::CAnomalyDetectorModelConfig::defaultConfig(bucketSize); std::stringstream outputStrm; core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); @@ -947,7 +968,8 @@ void CAnomalyJobTest::testOutOfPhase() { // 2 delay buckets model::CAnomalyDetectorModelConfig modelConfig = - model::CAnomalyDetectorModelConfig::defaultConfig(bucketSize, model_t::E_None, "", 0, 2, false, ""); + model::CAnomalyDetectorModelConfig::defaultConfig( + bucketSize, model_t::E_None, "", 0, 2, false, ""); std::stringstream outputStrm; core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); @@ -1092,7 +1114,8 @@ void CAnomalyJobTest::testOutOfPhase() { // 2 delay buckets model::CAnomalyDetectorModelConfig modelConfig = - model::CAnomalyDetectorModelConfig::defaultConfig(bucketSize, model_t::E_None, "", 0, 2, false, ""); + model::CAnomalyDetectorModelConfig::defaultConfig( + bucketSize, model_t::E_None, "", 0, 2, false, ""); std::stringstream outputStrm; core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); @@ -1238,7 +1261,8 @@ void CAnomalyJobTest::testOutOfPhase() { // 2 delay buckets model::CAnomalyDetectorModelConfig modelConfig = - model::CAnomalyDetectorModelConfig::defaultConfig(bucketSize, model_t::E_None, "", 0, 2, false, ""); + model::CAnomalyDetectorModelConfig::defaultConfig( + bucketSize, model_t::E_None, "", 0, 2, false, ""); std::stringstream outputStrm; core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); @@ -1430,7 +1454,8 @@ void CAnomalyJobTest::testBucketSelection() { // 2 delay buckets model::CAnomalyDetectorModelConfig modelConfig = - model::CAnomalyDetectorModelConfig::defaultConfig(bucketSize, model_t::E_None, "", 0, 2, false, ""); + model::CAnomalyDetectorModelConfig::defaultConfig(bucketSize, model_t::E_None, + "", 0, 2, false, ""); std::stringstream outputStrm; core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); @@ -1442,7 +1467,8 @@ void CAnomalyJobTest::testBucketSelection() { model::SAnnotatedProbability prob(1.0); model::CHierarchicalResults results; - results.addModelResult(0, false, "mean", model::function_t::E_IndividualMetricMean, "", "", "", "", "value", prob, nullptr, 1000); + results.addModelResult(0, false, "mean", model::function_t::E_IndividualMetricMean, + "", "", "", "", "value", prob, nullptr, 1000); CResultsScoreVisitor visitor(10); results.topDownBreadthFirst(visitor); job.m_ResultsQueue.push(results, 1000); @@ -1452,7 +1478,8 @@ void CAnomalyJobTest::testBucketSelection() { model::SAnnotatedProbability prob(1.0); model::CHierarchicalResults results; - results.addModelResult(0, false, "mean", model::function_t::E_IndividualMetricMean, "", "", "", "", "value", prob, nullptr, 1000); + results.addModelResult(0, false, "mean", model::function_t::E_IndividualMetricMean, + "", "", "", "", "value", prob, nullptr, 1000); CResultsScoreVisitor visitor(20); results.topDownBreadthFirst(visitor); job.m_ResultsQueue.push(results, 1050); @@ -1462,78 +1489,92 @@ void CAnomalyJobTest::testBucketSelection() { model::SAnnotatedProbability prob(1.0); model::CHierarchicalResults results; - results.addModelResult(0, false, "mean", model::function_t::E_IndividualMetricMean, "", "", "", "", "value", prob, nullptr, 1000); + results.addModelResult(0, false, "mean", model::function_t::E_IndividualMetricMean, + "", "", "", "", "value", prob, nullptr, 1000); CResultsScoreVisitor visitor(15); results.topDownBreadthFirst(visitor); job.m_ResultsQueue.push(results, 1100); LOG_DEBUG(<< "Adding 15 at 1100"); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(0), job.m_ResultsQueue.chooseResultTime(1100, bucketSize, results)); + CPPUNIT_ASSERT_EQUAL(core_t::TTime(0), job.m_ResultsQueue.chooseResultTime( + 1100, bucketSize, results)); } { model::SAnnotatedProbability prob(1.0); model::CHierarchicalResults results; - results.addModelResult(0, false, "mean", model::function_t::E_IndividualMetricMean, "", "", "", "", "value", prob, nullptr, 1000); + results.addModelResult(0, false, "mean", model::function_t::E_IndividualMetricMean, + "", "", "", "", "value", prob, nullptr, 1000); CResultsScoreVisitor visitor(20); results.topDownBreadthFirst(visitor); job.m_ResultsQueue.push(results, 1150); LOG_DEBUG(<< "Adding 20 at 1150"); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(0), job.m_ResultsQueue.chooseResultTime(1150, bucketSize, results)); + CPPUNIT_ASSERT_EQUAL(core_t::TTime(0), job.m_ResultsQueue.chooseResultTime( + 1150, bucketSize, results)); } { model::SAnnotatedProbability prob(1.0); model::CHierarchicalResults results; - results.addModelResult(0, false, "mean", model::function_t::E_IndividualMetricMean, "", "", "", "", "value", prob, nullptr, 1000); + results.addModelResult(0, false, "mean", model::function_t::E_IndividualMetricMean, + "", "", "", "", "value", prob, nullptr, 1000); CResultsScoreVisitor visitor(25); results.topDownBreadthFirst(visitor); job.m_ResultsQueue.push(results, 1200); LOG_DEBUG(<< "Adding 25 at 1200"); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(1100), job.m_ResultsQueue.chooseResultTime(1200, bucketSize, results)); + CPPUNIT_ASSERT_EQUAL(core_t::TTime(1100), job.m_ResultsQueue.chooseResultTime( + 1200, bucketSize, results)); } { model::SAnnotatedProbability prob(1.0); model::CHierarchicalResults results; - results.addModelResult(0, false, "mean", model::function_t::E_IndividualMetricMean, "", "", "", "", "value", prob, nullptr, 1000); + results.addModelResult(0, false, "mean", model::function_t::E_IndividualMetricMean, + "", "", "", "", "value", prob, nullptr, 1000); CResultsScoreVisitor visitor(0); results.topDownBreadthFirst(visitor); job.m_ResultsQueue.push(results, 1250); LOG_DEBUG(<< "Adding 0 at 1250"); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(0), job.m_ResultsQueue.chooseResultTime(1250, bucketSize, results)); + CPPUNIT_ASSERT_EQUAL(core_t::TTime(0), job.m_ResultsQueue.chooseResultTime( + 1250, bucketSize, results)); } { model::SAnnotatedProbability prob(1.0); model::CHierarchicalResults results; - results.addModelResult(0, false, "mean", model::function_t::E_IndividualMetricMean, "", "", "", "", "value", prob, nullptr, 1000); + results.addModelResult(0, false, "mean", model::function_t::E_IndividualMetricMean, + "", "", "", "", "value", prob, nullptr, 1000); CResultsScoreVisitor visitor(5); results.topDownBreadthFirst(visitor); job.m_ResultsQueue.push(results, 1300); LOG_DEBUG(<< "Adding 5 at 1300"); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(1200), job.m_ResultsQueue.chooseResultTime(1300, bucketSize, results)); + CPPUNIT_ASSERT_EQUAL(core_t::TTime(1200), job.m_ResultsQueue.chooseResultTime( + 1300, bucketSize, results)); } { model::SAnnotatedProbability prob(1.0); model::CHierarchicalResults results; - results.addModelResult(0, false, "mean", model::function_t::E_IndividualMetricMean, "", "", "", "", "value", prob, nullptr, 1000); + results.addModelResult(0, false, "mean", model::function_t::E_IndividualMetricMean, + "", "", "", "", "value", prob, nullptr, 1000); CResultsScoreVisitor visitor(5); results.topDownBreadthFirst(visitor); job.m_ResultsQueue.push(results, 1350); LOG_DEBUG(<< "Adding 5 at 1350"); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(0), job.m_ResultsQueue.chooseResultTime(1350, bucketSize, results)); + CPPUNIT_ASSERT_EQUAL(core_t::TTime(0), job.m_ResultsQueue.chooseResultTime( + 1350, bucketSize, results)); } { model::SAnnotatedProbability prob(1.0); model::CHierarchicalResults results; - results.addModelResult(0, false, "mean", model::function_t::E_IndividualMetricMean, "", "", "", "", "value", prob, nullptr, 1000); + results.addModelResult(0, false, "mean", model::function_t::E_IndividualMetricMean, + "", "", "", "", "value", prob, nullptr, 1000); CResultsScoreVisitor visitor(1); results.topDownBreadthFirst(visitor); job.m_ResultsQueue.push(results, 1400); LOG_DEBUG(<< "Adding 1 at 1400"); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(1300), job.m_ResultsQueue.chooseResultTime(1400, bucketSize, results)); + CPPUNIT_ASSERT_EQUAL(core_t::TTime(1300), job.m_ResultsQueue.chooseResultTime( + 1400, bucketSize, results)); } } @@ -1551,7 +1592,8 @@ void CAnomalyJobTest::testModelPlot() { fieldConfig.initFromClause(clauses); model::CAnomalyDetectorModelConfig modelConfig = - model::CAnomalyDetectorModelConfig::defaultConfig(bucketSize, model_t::E_None, "", 0, 0, false, ""); + model::CAnomalyDetectorModelConfig::defaultConfig( + bucketSize, model_t::E_None, "", 0, 0, false, ""); modelConfig.modelPlotBoundsPercentile(1.0); std::stringstream outputStrm; @@ -1625,7 +1667,8 @@ void CAnomalyJobTest::testModelPlot() { // 2 delay buckets model::CAnomalyDetectorModelConfig modelConfig = - model::CAnomalyDetectorModelConfig::defaultConfig(bucketSize, model_t::E_None, "", 0, 2, false, ""); + model::CAnomalyDetectorModelConfig::defaultConfig( + bucketSize, model_t::E_None, "", 0, 2, false, ""); modelConfig.modelPlotBoundsPercentile(1.0); std::stringstream outputStrm; @@ -1733,7 +1776,8 @@ void CAnomalyJobTest::testInterimResultEdgeCases() { api::CFieldConfig::TStrVec clauses{"count", "by", "error"}; fieldConfig.initFromClause(clauses); - model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(bucketSize); + model::CAnomalyDetectorModelConfig modelConfig = + model::CAnomalyDetectorModelConfig::defaultConfig(bucketSize); std::stringstream outputStrm; @@ -1742,7 +1786,8 @@ void CAnomalyJobTest::testInterimResultEdgeCases() { api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); std::remove(logFile); - CPPUNIT_ASSERT(ml::core::CLogger::instance().reconfigureFromFile("testfiles/testLogErrorsLog4cxx.properties")); + CPPUNIT_ASSERT(ml::core::CLogger::instance().reconfigureFromFile( + "testfiles/testLogErrorsLog4cxx.properties")); api::CAnomalyJob::TStrStrUMap dataRows; dataRows["time"] = "3610"; @@ -1790,7 +1835,8 @@ void CAnomalyJobTest::testRestoreFailsWithEmptyStream() { clauses.push_back("value"); clauses.push_back("partitionfield=greenhouse"); fieldConfig.initFromClause(clauses); - model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); + model::CAnomalyDetectorModelConfig modelConfig = + model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); std::ostringstream outputStrm; core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); @@ -1804,20 +1850,26 @@ void CAnomalyJobTest::testRestoreFailsWithEmptyStream() { CppUnit::Test* CAnomalyJobTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CAnomalyJobTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CAnomalyJobTest::testBadTimes", &CAnomalyJobTest::testBadTimes)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CAnomalyJobTest::testOutOfSequence", &CAnomalyJobTest::testOutOfSequence)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CAnomalyJobTest::testControlMessages", &CAnomalyJobTest::testControlMessages)); - suiteOfTests->addTest(new CppUnit::TestCaller("CAnomalyJobTest::testSkipTimeControlMessage", - &CAnomalyJobTest::testSkipTimeControlMessage)); - suiteOfTests->addTest(new CppUnit::TestCaller("CAnomalyJobTest::testOutOfPhase", &CAnomalyJobTest::testOutOfPhase)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CAnomalyJobTest::testBucketSelection", &CAnomalyJobTest::testBucketSelection)); - suiteOfTests->addTest(new CppUnit::TestCaller("CAnomalyJobTest::testModelPlot", &CAnomalyJobTest::testModelPlot)); - suiteOfTests->addTest(new CppUnit::TestCaller("CAnomalyJobTest::testInterimResultEdgeCases", - &CAnomalyJobTest::testInterimResultEdgeCases)); - suiteOfTests->addTest(new CppUnit::TestCaller("CAnomalyJobTest::testRestoreFailsWithEmptyStream", - &CAnomalyJobTest::testRestoreFailsWithEmptyStream)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CAnomalyJobTest::testBadTimes", &CAnomalyJobTest::testBadTimes)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CAnomalyJobTest::testOutOfSequence", &CAnomalyJobTest::testOutOfSequence)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CAnomalyJobTest::testControlMessages", &CAnomalyJobTest::testControlMessages)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CAnomalyJobTest::testSkipTimeControlMessage", + &CAnomalyJobTest::testSkipTimeControlMessage)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CAnomalyJobTest::testOutOfPhase", &CAnomalyJobTest::testOutOfPhase)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CAnomalyJobTest::testBucketSelection", &CAnomalyJobTest::testBucketSelection)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CAnomalyJobTest::testModelPlot", &CAnomalyJobTest::testModelPlot)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CAnomalyJobTest::testInterimResultEdgeCases", + &CAnomalyJobTest::testInterimResultEdgeCases)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CAnomalyJobTest::testRestoreFailsWithEmptyStream", + &CAnomalyJobTest::testRestoreFailsWithEmptyStream)); return suiteOfTests; } diff --git a/lib/api/unittest/CBackgroundPersisterTest.cc b/lib/api/unittest/CBackgroundPersisterTest.cc index 4e4a23fa42..7b23070719 100644 --- a/lib/api/unittest/CBackgroundPersisterTest.cc +++ b/lib/api/unittest/CBackgroundPersisterTest.cc @@ -46,14 +46,18 @@ void reportPersistComplete(ml::api::CModelSnapshotJsonWriter::SModelSnapshotRepo CppUnit::Test* CBackgroundPersisterTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CBackgroundPersisterTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CBackgroundPersisterTest::testDetectorPersistBy", - &CBackgroundPersisterTest::testDetectorPersistBy)); - suiteOfTests->addTest(new CppUnit::TestCaller("CBackgroundPersisterTest::testDetectorPersistOver", - &CBackgroundPersisterTest::testDetectorPersistOver)); - suiteOfTests->addTest(new CppUnit::TestCaller("CBackgroundPersisterTest::testDetectorPersistPartition", - &CBackgroundPersisterTest::testDetectorPersistPartition)); - suiteOfTests->addTest(new CppUnit::TestCaller("CBackgroundPersisterTest::testCategorizationOnlyPersist", - &CBackgroundPersisterTest::testCategorizationOnlyPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CBackgroundPersisterTest::testDetectorPersistBy", + &CBackgroundPersisterTest::testDetectorPersistBy)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CBackgroundPersisterTest::testDetectorPersistOver", + &CBackgroundPersisterTest::testDetectorPersistOver)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CBackgroundPersisterTest::testDetectorPersistPartition", + &CBackgroundPersisterTest::testDetectorPersistPartition)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CBackgroundPersisterTest::testCategorizationOnlyPersist", + &CBackgroundPersisterTest::testCategorizationOnlyPersist)); return suiteOfTests; } @@ -67,7 +71,8 @@ void CBackgroundPersisterTest::testDetectorPersistOver() { } void CBackgroundPersisterTest::testDetectorPersistPartition() { - this->foregroundBackgroundCompCategorizationAndAnomalyDetection("testfiles/new_mlfields_partition.conf"); + this->foregroundBackgroundCompCategorizationAndAnomalyDetection( + "testfiles/new_mlfields_partition.conf"); } void CBackgroundPersisterTest::testCategorizationOnlyPersist() { @@ -88,14 +93,16 @@ void CBackgroundPersisterTest::testCategorizationOnlyPersist() { ml::api::CFieldConfig fieldConfig("agent"); std::ostringstream* backgroundStream(nullptr); - ml::api::CSingleStreamDataAdder::TOStreamP backgroundStreamPtr(backgroundStream = new std::ostringstream()); + ml::api::CSingleStreamDataAdder::TOStreamP backgroundStreamPtr( + backgroundStream = new std::ostringstream()); ml::api::CSingleStreamDataAdder backgroundDataAdder(backgroundStreamPtr); // The 300 second persist interval is irrelevant here - we bypass the timer // in this test and kick off the background persistence chain explicitly ml::api::CBackgroundPersister backgroundPersister(300, backgroundDataAdder); std::ostringstream* foregroundStream(nullptr); - ml::api::CSingleStreamDataAdder::TOStreamP foregroundStreamPtr(foregroundStream = new std::ostringstream()); + ml::api::CSingleStreamDataAdder::TOStreamP foregroundStreamPtr( + foregroundStream = new std::ostringstream()); { ml::core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); ml::api::CJsonOutputWriter outputWriter(JOB_ID, wrappedOutputStream); @@ -105,11 +112,13 @@ void CBackgroundPersisterTest::testCategorizationOnlyPersist() { ml::api::CNullOutput nullOutput; // The typer knows how to assign categories to records - ml::api::CFieldDataTyper typer(JOB_ID, fieldConfig, limits, nullOutput, outputWriter, &backgroundPersister); + ml::api::CFieldDataTyper typer(JOB_ID, fieldConfig, limits, nullOutput, + outputWriter, &backgroundPersister); ml::api::CLineifiedJsonInputParser parser(inputStrm); - CPPUNIT_ASSERT(parser.readStream(boost::bind(&ml::api::CDataProcessor::handleRecord, &typer, _1))); + CPPUNIT_ASSERT(parser.readStream( + boost::bind(&ml::api::CDataProcessor::handleRecord, &typer, _1))); // Persist the processors' state in the background CPPUNIT_ASSERT(typer.periodicPersistState(backgroundPersister)); @@ -135,7 +144,8 @@ void CBackgroundPersisterTest::testCategorizationOnlyPersist() { CPPUNIT_ASSERT_EQUAL(backgroundState, foregroundState); } -void CBackgroundPersisterTest::foregroundBackgroundCompCategorizationAndAnomalyDetection(const std::string& configFileName) { +void CBackgroundPersisterTest::foregroundBackgroundCompCategorizationAndAnomalyDetection( + const std::string& configFileName) { // Start by creating processors with non-trivial state static const ml::core_t::TTime BUCKET_SIZE(3600); @@ -154,10 +164,12 @@ void CBackgroundPersisterTest::foregroundBackgroundCompCategorizationAndAnomalyD ml::api::CFieldConfig fieldConfig; CPPUNIT_ASSERT(fieldConfig.initFromFile(configFileName)); - ml::model::CAnomalyDetectorModelConfig modelConfig = ml::model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); + ml::model::CAnomalyDetectorModelConfig modelConfig = + ml::model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); std::ostringstream* backgroundStream(nullptr); - ml::api::CSingleStreamDataAdder::TOStreamP backgroundStreamPtr(backgroundStream = new std::ostringstream()); + ml::api::CSingleStreamDataAdder::TOStreamP backgroundStreamPtr( + backgroundStream = new std::ostringstream()); ml::api::CSingleStreamDataAdder backgroundDataAdder(backgroundStreamPtr); // The 300 second persist interval is irrelevant here - we bypass the timer // in this test and kick off the background persistence chain explicitly @@ -170,21 +182,17 @@ void CBackgroundPersisterTest::foregroundBackgroundCompCategorizationAndAnomalyD std::string foregroundSnapshotId; std::ostringstream* foregroundStream(nullptr); - ml::api::CSingleStreamDataAdder::TOStreamP foregroundStreamPtr(foregroundStream = new std::ostringstream()); + ml::api::CSingleStreamDataAdder::TOStreamP foregroundStreamPtr( + foregroundStream = new std::ostringstream()); { ml::core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); ml::api::CJsonOutputWriter outputWriter(JOB_ID, wrappedOutputStream); - ml::api::CAnomalyJob job(JOB_ID, - limits, - fieldConfig, - modelConfig, - wrappedOutputStream, - boost::bind(&reportPersistComplete, _1, boost::ref(snapshotId), boost::ref(numDocs)), - &backgroundPersister, - -1, - "time", - "%d/%b/%Y:%T %z"); + ml::api::CAnomalyJob job( + JOB_ID, limits, fieldConfig, modelConfig, wrappedOutputStream, + boost::bind(&reportPersistComplete, _1, boost::ref(snapshotId), + boost::ref(numDocs)), + &backgroundPersister, -1, "time", "%d/%b/%Y:%T %z"); ml::api::CDataProcessor* firstProcessor(&job); @@ -201,7 +209,8 @@ void CBackgroundPersisterTest::foregroundBackgroundCompCategorizationAndAnomalyD ml::api::CLineifiedJsonInputParser parser(inputStrm); - CPPUNIT_ASSERT(parser.readStream(boost::bind(&ml::api::CDataProcessor::handleRecord, firstProcessor, _1))); + CPPUNIT_ASSERT(parser.readStream(boost::bind( + &ml::api::CDataProcessor::handleRecord, firstProcessor, _1))); // Persist the processors' state in the background CPPUNIT_ASSERT(firstProcessor->periodicPersistState(backgroundPersister)); @@ -223,8 +232,10 @@ void CBackgroundPersisterTest::foregroundBackgroundCompCategorizationAndAnomalyD // The snapshot ID can be different between the two persists, so replace the // first occurrence of it (which is in the bulk metadata) - CPPUNIT_ASSERT_EQUAL(size_t(1), ml::core::CStringUtils::replaceFirst(backgroundSnapshotId, "snap", backgroundState)); - CPPUNIT_ASSERT_EQUAL(size_t(1), ml::core::CStringUtils::replaceFirst(foregroundSnapshotId, "snap", foregroundState)); + CPPUNIT_ASSERT_EQUAL(size_t(1), ml::core::CStringUtils::replaceFirst( + backgroundSnapshotId, "snap", backgroundState)); + CPPUNIT_ASSERT_EQUAL(size_t(1), ml::core::CStringUtils::replaceFirst( + foregroundSnapshotId, "snap", foregroundState)); // Replace the zero byte separators so the expected/actual strings don't get // truncated by CppUnit if the test fails diff --git a/lib/api/unittest/CBaseTokenListDataTyperTest.cc b/lib/api/unittest/CBaseTokenListDataTyperTest.cc index 3e7c422320..107a8cdd1e 100644 --- a/lib/api/unittest/CBaseTokenListDataTyperTest.cc +++ b/lib/api/unittest/CBaseTokenListDataTyperTest.cc @@ -10,38 +10,62 @@ CppUnit::Test* CBaseTokenListDataTyperTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CBaseTokenListDataTyperTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CBaseTokenListDataTyperTest::testMinMatchingWeights", - &CBaseTokenListDataTyperTest::testMinMatchingWeights)); - suiteOfTests->addTest(new CppUnit::TestCaller("CBaseTokenListDataTyperTest::testMaxMatchingWeights", - &CBaseTokenListDataTyperTest::testMaxMatchingWeights)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CBaseTokenListDataTyperTest::testMinMatchingWeights", + &CBaseTokenListDataTyperTest::testMinMatchingWeights)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CBaseTokenListDataTyperTest::testMaxMatchingWeights", + &CBaseTokenListDataTyperTest::testMaxMatchingWeights)); return suiteOfTests; } void CBaseTokenListDataTyperTest::testMinMatchingWeights() { - CPPUNIT_ASSERT_EQUAL(size_t(0), ml::api::CBaseTokenListDataTyper::minMatchingWeight(0, 0.7)); - CPPUNIT_ASSERT_EQUAL(size_t(1), ml::api::CBaseTokenListDataTyper::minMatchingWeight(1, 0.7)); - CPPUNIT_ASSERT_EQUAL(size_t(2), ml::api::CBaseTokenListDataTyper::minMatchingWeight(2, 0.7)); - CPPUNIT_ASSERT_EQUAL(size_t(3), ml::api::CBaseTokenListDataTyper::minMatchingWeight(3, 0.7)); - CPPUNIT_ASSERT_EQUAL(size_t(3), ml::api::CBaseTokenListDataTyper::minMatchingWeight(4, 0.7)); - CPPUNIT_ASSERT_EQUAL(size_t(4), ml::api::CBaseTokenListDataTyper::minMatchingWeight(5, 0.7)); - CPPUNIT_ASSERT_EQUAL(size_t(5), ml::api::CBaseTokenListDataTyper::minMatchingWeight(6, 0.7)); - CPPUNIT_ASSERT_EQUAL(size_t(5), ml::api::CBaseTokenListDataTyper::minMatchingWeight(7, 0.7)); - CPPUNIT_ASSERT_EQUAL(size_t(6), ml::api::CBaseTokenListDataTyper::minMatchingWeight(8, 0.7)); - CPPUNIT_ASSERT_EQUAL(size_t(7), ml::api::CBaseTokenListDataTyper::minMatchingWeight(9, 0.7)); - CPPUNIT_ASSERT_EQUAL(size_t(8), ml::api::CBaseTokenListDataTyper::minMatchingWeight(10, 0.7)); + CPPUNIT_ASSERT_EQUAL( + size_t(0), ml::api::CBaseTokenListDataTyper::minMatchingWeight(0, 0.7)); + CPPUNIT_ASSERT_EQUAL( + size_t(1), ml::api::CBaseTokenListDataTyper::minMatchingWeight(1, 0.7)); + CPPUNIT_ASSERT_EQUAL( + size_t(2), ml::api::CBaseTokenListDataTyper::minMatchingWeight(2, 0.7)); + CPPUNIT_ASSERT_EQUAL( + size_t(3), ml::api::CBaseTokenListDataTyper::minMatchingWeight(3, 0.7)); + CPPUNIT_ASSERT_EQUAL( + size_t(3), ml::api::CBaseTokenListDataTyper::minMatchingWeight(4, 0.7)); + CPPUNIT_ASSERT_EQUAL( + size_t(4), ml::api::CBaseTokenListDataTyper::minMatchingWeight(5, 0.7)); + CPPUNIT_ASSERT_EQUAL( + size_t(5), ml::api::CBaseTokenListDataTyper::minMatchingWeight(6, 0.7)); + CPPUNIT_ASSERT_EQUAL( + size_t(5), ml::api::CBaseTokenListDataTyper::minMatchingWeight(7, 0.7)); + CPPUNIT_ASSERT_EQUAL( + size_t(6), ml::api::CBaseTokenListDataTyper::minMatchingWeight(8, 0.7)); + CPPUNIT_ASSERT_EQUAL( + size_t(7), ml::api::CBaseTokenListDataTyper::minMatchingWeight(9, 0.7)); + CPPUNIT_ASSERT_EQUAL( + size_t(8), ml::api::CBaseTokenListDataTyper::minMatchingWeight(10, 0.7)); } void CBaseTokenListDataTyperTest::testMaxMatchingWeights() { - CPPUNIT_ASSERT_EQUAL(size_t(0), ml::api::CBaseTokenListDataTyper::maxMatchingWeight(0, 0.7)); - CPPUNIT_ASSERT_EQUAL(size_t(1), ml::api::CBaseTokenListDataTyper::maxMatchingWeight(1, 0.7)); - CPPUNIT_ASSERT_EQUAL(size_t(2), ml::api::CBaseTokenListDataTyper::maxMatchingWeight(2, 0.7)); - CPPUNIT_ASSERT_EQUAL(size_t(4), ml::api::CBaseTokenListDataTyper::maxMatchingWeight(3, 0.7)); - CPPUNIT_ASSERT_EQUAL(size_t(5), ml::api::CBaseTokenListDataTyper::maxMatchingWeight(4, 0.7)); - CPPUNIT_ASSERT_EQUAL(size_t(7), ml::api::CBaseTokenListDataTyper::maxMatchingWeight(5, 0.7)); - CPPUNIT_ASSERT_EQUAL(size_t(8), ml::api::CBaseTokenListDataTyper::maxMatchingWeight(6, 0.7)); - CPPUNIT_ASSERT_EQUAL(size_t(9), ml::api::CBaseTokenListDataTyper::maxMatchingWeight(7, 0.7)); - CPPUNIT_ASSERT_EQUAL(size_t(11), ml::api::CBaseTokenListDataTyper::maxMatchingWeight(8, 0.7)); - CPPUNIT_ASSERT_EQUAL(size_t(12), ml::api::CBaseTokenListDataTyper::maxMatchingWeight(9, 0.7)); - CPPUNIT_ASSERT_EQUAL(size_t(14), ml::api::CBaseTokenListDataTyper::maxMatchingWeight(10, 0.7)); + CPPUNIT_ASSERT_EQUAL( + size_t(0), ml::api::CBaseTokenListDataTyper::maxMatchingWeight(0, 0.7)); + CPPUNIT_ASSERT_EQUAL( + size_t(1), ml::api::CBaseTokenListDataTyper::maxMatchingWeight(1, 0.7)); + CPPUNIT_ASSERT_EQUAL( + size_t(2), ml::api::CBaseTokenListDataTyper::maxMatchingWeight(2, 0.7)); + CPPUNIT_ASSERT_EQUAL( + size_t(4), ml::api::CBaseTokenListDataTyper::maxMatchingWeight(3, 0.7)); + CPPUNIT_ASSERT_EQUAL( + size_t(5), ml::api::CBaseTokenListDataTyper::maxMatchingWeight(4, 0.7)); + CPPUNIT_ASSERT_EQUAL( + size_t(7), ml::api::CBaseTokenListDataTyper::maxMatchingWeight(5, 0.7)); + CPPUNIT_ASSERT_EQUAL( + size_t(8), ml::api::CBaseTokenListDataTyper::maxMatchingWeight(6, 0.7)); + CPPUNIT_ASSERT_EQUAL( + size_t(9), ml::api::CBaseTokenListDataTyper::maxMatchingWeight(7, 0.7)); + CPPUNIT_ASSERT_EQUAL( + size_t(11), ml::api::CBaseTokenListDataTyper::maxMatchingWeight(8, 0.7)); + CPPUNIT_ASSERT_EQUAL( + size_t(12), ml::api::CBaseTokenListDataTyper::maxMatchingWeight(9, 0.7)); + CPPUNIT_ASSERT_EQUAL( + size_t(14), ml::api::CBaseTokenListDataTyper::maxMatchingWeight(10, 0.7)); } diff --git a/lib/api/unittest/CCategoryExamplesCollectorTest.cc b/lib/api/unittest/CCategoryExamplesCollectorTest.cc index eae9e60d4f..77f311356d 100644 --- a/lib/api/unittest/CCategoryExamplesCollectorTest.cc +++ b/lib/api/unittest/CCategoryExamplesCollectorTest.cc @@ -109,37 +109,43 @@ void CCategoryExamplesCollectorTest::testTruncation() { // All single byte characters std::string example = baseExample + "bbbbbb"; examplesCollector.add(1, example); - CPPUNIT_ASSERT_EQUAL(baseExample + "bb" + ellipsis, *examplesCollector.examples(1).begin()); + CPPUNIT_ASSERT_EQUAL(baseExample + "bb" + ellipsis, + *examplesCollector.examples(1).begin()); } { // Two byte character crosses truncation boundary std::string example = baseExample + "bébbb"; examplesCollector.add(2, example); - CPPUNIT_ASSERT_EQUAL(baseExample + "b" + ellipsis, *examplesCollector.examples(2).begin()); + CPPUNIT_ASSERT_EQUAL(baseExample + "b" + ellipsis, + *examplesCollector.examples(2).begin()); } { // Two byte characters either side of truncation boundary std::string example = baseExample + "éébbb"; examplesCollector.add(3, example); - CPPUNIT_ASSERT_EQUAL(baseExample + "é" + ellipsis, *examplesCollector.examples(3).begin()); + CPPUNIT_ASSERT_EQUAL(baseExample + "é" + ellipsis, + *examplesCollector.examples(3).begin()); } { // Two byte character before truncation boundary, single byte immediately after std::string example = baseExample + "ébbbb"; examplesCollector.add(4, example); - CPPUNIT_ASSERT_EQUAL(baseExample + "é" + ellipsis, *examplesCollector.examples(4).begin()); + CPPUNIT_ASSERT_EQUAL(baseExample + "é" + ellipsis, + *examplesCollector.examples(4).begin()); } { // Three byte character crosses truncation boundary with start character before std::string example = baseExample + "b中bbb"; examplesCollector.add(5, example); - CPPUNIT_ASSERT_EQUAL(baseExample + "b" + ellipsis, *examplesCollector.examples(5).begin()); + CPPUNIT_ASSERT_EQUAL(baseExample + "b" + ellipsis, + *examplesCollector.examples(5).begin()); } { // Three byte character crosses truncation boundary with continuation character before std::string example = baseExample + "中bbb"; examplesCollector.add(6, example); - CPPUNIT_ASSERT_EQUAL(baseExample + ellipsis, *examplesCollector.examples(6).begin()); + CPPUNIT_ASSERT_EQUAL(baseExample + ellipsis, + *examplesCollector.examples(6).begin()); } } @@ -147,22 +153,26 @@ CppUnit::Test* CCategoryExamplesCollectorTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CCategoryExamplesCollectorTest"); suiteOfTests->addTest(new CppUnit::TestCaller( - "CCategoryExamplesCollectorTest::testAddGivenMaxExamplesIsZero", &CCategoryExamplesCollectorTest::testAddGivenMaxExamplesIsZero)); + "CCategoryExamplesCollectorTest::testAddGivenMaxExamplesIsZero", + &CCategoryExamplesCollectorTest::testAddGivenMaxExamplesIsZero)); suiteOfTests->addTest(new CppUnit::TestCaller( "CCategoryExamplesCollectorTest::testAddGivenSameCategoryExamplePairAddedTwice", &CCategoryExamplesCollectorTest::testAddGivenSameCategoryExamplePairAddedTwice)); suiteOfTests->addTest(new CppUnit::TestCaller( "CCategoryExamplesCollectorTest::testAddGivenMoreThanMaxExamplesAreAddedForSameCategory", &CCategoryExamplesCollectorTest::testAddGivenMoreThanMaxExamplesAreAddedForSameCategory)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CCategoryExamplesCollectorTest::testAddGivenCategoryAddedIsNotSubsequent", - &CCategoryExamplesCollectorTest::testAddGivenCategoryAddedIsNotSubsequent)); - suiteOfTests->addTest(new CppUnit::TestCaller("CCategoryExamplesCollectorTest::testExamples", - &CCategoryExamplesCollectorTest::testExamples)); - suiteOfTests->addTest(new CppUnit::TestCaller("CCategoryExamplesCollectorTest::testPersist", - &CCategoryExamplesCollectorTest::testPersist)); - suiteOfTests->addTest(new CppUnit::TestCaller("CCategoryExamplesCollectorTest::testTruncation", - &CCategoryExamplesCollectorTest::testTruncation)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCategoryExamplesCollectorTest::testAddGivenCategoryAddedIsNotSubsequent", + &CCategoryExamplesCollectorTest::testAddGivenCategoryAddedIsNotSubsequent)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCategoryExamplesCollectorTest::testExamples", + &CCategoryExamplesCollectorTest::testExamples)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCategoryExamplesCollectorTest::testPersist", + &CCategoryExamplesCollectorTest::testPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCategoryExamplesCollectorTest::testTruncation", + &CCategoryExamplesCollectorTest::testTruncation)); return suiteOfTests; } diff --git a/lib/api/unittest/CConfigUpdaterTest.cc b/lib/api/unittest/CConfigUpdaterTest.cc index 0615ed9185..060ce9eb24 100644 --- a/lib/api/unittest/CConfigUpdaterTest.cc +++ b/lib/api/unittest/CConfigUpdaterTest.cc @@ -21,33 +21,41 @@ using namespace api; CppUnit::Test* CConfigUpdaterTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CConfigUpdaterTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CConfigUpdaterTest::testUpdateGivenUpdateCannotBeParsed", - &CConfigUpdaterTest::testUpdateGivenUpdateCannotBeParsed)); - suiteOfTests->addTest(new CppUnit::TestCaller("CConfigUpdaterTest::testUpdateGivenUnknownStanzas", - &CConfigUpdaterTest::testUpdateGivenUnknownStanzas)); - suiteOfTests->addTest(new CppUnit::TestCaller("CConfigUpdaterTest::testUpdateGivenModelPlotConfig", - &CConfigUpdaterTest::testUpdateGivenModelPlotConfig)); - suiteOfTests->addTest(new CppUnit::TestCaller("CConfigUpdaterTest::testUpdateGivenDetectorRules", - &CConfigUpdaterTest::testUpdateGivenDetectorRules)); - suiteOfTests->addTest(new CppUnit::TestCaller("CConfigUpdaterTest::testUpdateGivenRulesWithInvalidDetectorIndex", - &CConfigUpdaterTest::testUpdateGivenRulesWithInvalidDetectorIndex)); - suiteOfTests->addTest(new CppUnit::TestCaller("CConfigUpdaterTest::testUpdateGivenFilters", - &CConfigUpdaterTest::testUpdateGivenFilters)); - suiteOfTests->addTest(new CppUnit::TestCaller("CConfigUpdaterTest::testUpdateGivenScheduledEvents", - &CConfigUpdaterTest::testUpdateGivenScheduledEvents)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CConfigUpdaterTest::testUpdateGivenUpdateCannotBeParsed", + &CConfigUpdaterTest::testUpdateGivenUpdateCannotBeParsed)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CConfigUpdaterTest::testUpdateGivenUnknownStanzas", + &CConfigUpdaterTest::testUpdateGivenUnknownStanzas)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CConfigUpdaterTest::testUpdateGivenModelPlotConfig", + &CConfigUpdaterTest::testUpdateGivenModelPlotConfig)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CConfigUpdaterTest::testUpdateGivenDetectorRules", + &CConfigUpdaterTest::testUpdateGivenDetectorRules)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CConfigUpdaterTest::testUpdateGivenRulesWithInvalidDetectorIndex", + &CConfigUpdaterTest::testUpdateGivenRulesWithInvalidDetectorIndex)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CConfigUpdaterTest::testUpdateGivenFilters", &CConfigUpdaterTest::testUpdateGivenFilters)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CConfigUpdaterTest::testUpdateGivenScheduledEvents", + &CConfigUpdaterTest::testUpdateGivenScheduledEvents)); return suiteOfTests; } void CConfigUpdaterTest::testUpdateGivenUpdateCannotBeParsed() { CFieldConfig fieldConfig; - model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(); + model::CAnomalyDetectorModelConfig modelConfig = + model::CAnomalyDetectorModelConfig::defaultConfig(); CConfigUpdater configUpdater(fieldConfig, modelConfig); CPPUNIT_ASSERT(configUpdater.update("this is invalid") == false); } void CConfigUpdaterTest::testUpdateGivenUnknownStanzas() { CFieldConfig fieldConfig; - model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(); + model::CAnomalyDetectorModelConfig modelConfig = + model::CAnomalyDetectorModelConfig::defaultConfig(); CConfigUpdater configUpdater(fieldConfig, modelConfig); CPPUNIT_ASSERT(configUpdater.update("[unknown1]\na = 1\n[unknown2]\nb = 2\n") == false); } @@ -56,7 +64,8 @@ void CConfigUpdaterTest::testUpdateGivenModelPlotConfig() { using TStrSet = model::CAnomalyDetectorModelConfig::TStrSet; CFieldConfig fieldConfig; - model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(); + model::CAnomalyDetectorModelConfig modelConfig = + model::CAnomalyDetectorModelConfig::defaultConfig(); modelConfig.modelPlotBoundsPercentile(95.0); TStrSet terms; terms.insert(std::string("a")); @@ -85,23 +94,27 @@ void CConfigUpdaterTest::testUpdateGivenDetectorRules() { fieldConfig.parseRules(0, originalRules0); fieldConfig.parseRules(1, originalRules1); - model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(); + model::CAnomalyDetectorModelConfig modelConfig = + model::CAnomalyDetectorModelConfig::defaultConfig(); std::string configUpdate0("[detectorRules]\ndetectorIndex = 0\nrulesJson = []\n"); - std::string configUpdate1("[detectorRules]\ndetectorIndex = 1\nrulesJson = " - "[{\"actions\":[\"filter_results\"],\"conditions_connective\":\"or\",\"conditions\":[{\"type\":\"numerical_" - "typical\",\"condition\":{\"operator\":\"lt\",\"value\":\"15\"}}]}]"); + std::string configUpdate1( + "[detectorRules]\ndetectorIndex = 1\nrulesJson = " + "[{\"actions\":[\"filter_results\"],\"conditions_connective\":\"or\",\"conditions\":[{\"type\":\"numerical_" + "typical\",\"condition\":{\"operator\":\"lt\",\"value\":\"15\"}}]}]"); CConfigUpdater configUpdater(fieldConfig, modelConfig); CPPUNIT_ASSERT(configUpdater.update(configUpdate0)); CPPUNIT_ASSERT(configUpdater.update(configUpdate1)); - CFieldConfig::TIntDetectionRuleVecUMap::const_iterator itr = fieldConfig.detectionRules().find(0); + CFieldConfig::TIntDetectionRuleVecUMap::const_iterator itr = + fieldConfig.detectionRules().find(0); CPPUNIT_ASSERT(itr->second.empty()); itr = fieldConfig.detectionRules().find(1); CPPUNIT_ASSERT_EQUAL(std::size_t(1), itr->second.size()); - CPPUNIT_ASSERT_EQUAL(std::string("FILTER_RESULTS IF TYPICAL < 15.000000"), itr->second[0].print()); + CPPUNIT_ASSERT_EQUAL(std::string("FILTER_RESULTS IF TYPICAL < 15.000000"), + itr->second[0].print()); } void CConfigUpdaterTest::testUpdateGivenRulesWithInvalidDetectorIndex() { @@ -110,7 +123,8 @@ void CConfigUpdaterTest::testUpdateGivenRulesWithInvalidDetectorIndex() { originalRules += "\"conditions\":[{\"type\":\"numerical_actual\",\"condition\":{\"operator\":\"lt\",\"value\":\"5\"}}]}]"; fieldConfig.parseRules(0, originalRules); - model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(); + model::CAnomalyDetectorModelConfig modelConfig = + model::CAnomalyDetectorModelConfig::defaultConfig(); std::string configUpdate("[detectorRules]\ndetectorIndex = invalid\nrulesJson = []\n"); @@ -124,7 +138,8 @@ void CConfigUpdaterTest::testUpdateGivenFilters() { fieldConfig.processFilter("filter.filter_1", "[\"aaa\",\"bbb\"]"); fieldConfig.processFilter("filter.filter_2", "[\"ccc\",\"ddd\"]"); - model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(); + model::CAnomalyDetectorModelConfig modelConfig = + model::CAnomalyDetectorModelConfig::defaultConfig(); auto ruleFilters = fieldConfig.ruleFilters(); CPPUNIT_ASSERT_EQUAL(std::size_t(2), ruleFilters.size()); @@ -169,22 +184,28 @@ void CConfigUpdaterTest::testUpdateGivenFilters() { } void CConfigUpdaterTest::testUpdateGivenScheduledEvents() { - std::string validRule1 = "[{\"actions\":[\"filter_results\",\"skip_sampling\"],\"conditions_connective\":\"and\"," - "\"conditions\":[{\"type\":\"time\",\"condition\":{\"operator\":\"gte\",\"value\":\"1\"}}," - "{\"type\":\"time\",\"condition\":{\"operator\":\"lt\",\"value\":\"2\"}}]}]"; - std::string validRule2 = "[{\"actions\":[\"filter_results\",\"skip_sampling\"],\"conditions_connective\":\"and\"," - "\"conditions\":[{\"type\":\"time\",\"condition\":{\"operator\":\"gte\",\"value\":\"3\"}}," - "{\"type\":\"time\",\"condition\":{\"operator\":\"lt\",\"value\":\"4\"}}]}]"; + std::string validRule1 = + "[{\"actions\":[\"filter_results\",\"skip_sampling\"],\"conditions_connective\":\"and\"," + "\"conditions\":[{\"type\":\"time\",\"condition\":{\"operator\":\"gte\",\"value\":\"1\"}}," + "{\"type\":\"time\",\"condition\":{\"operator\":\"lt\",\"value\":\"2\"}}]}]"; + std::string validRule2 = + "[{\"actions\":[\"filter_results\",\"skip_sampling\"],\"conditions_connective\":\"and\"," + "\"conditions\":[{\"type\":\"time\",\"condition\":{\"operator\":\"gte\",\"value\":\"3\"}}," + "{\"type\":\"time\",\"condition\":{\"operator\":\"lt\",\"value\":\"4\"}}]}]"; CFieldConfig fieldConfig; // Set up some events { boost::property_tree::ptree propTree; - propTree.put(boost::property_tree::ptree::path_type("scheduledevent.0.description", '\t'), "old_event_1"); - propTree.put(boost::property_tree::ptree::path_type("scheduledevent.0.rules", '\t'), validRule1); - propTree.put(boost::property_tree::ptree::path_type("scheduledevent.1.description", '\t'), "old_event_2"); - propTree.put(boost::property_tree::ptree::path_type("scheduledevent.1.rules", '\t'), validRule2); + propTree.put(boost::property_tree::ptree::path_type("scheduledevent.0.description", '\t'), + "old_event_1"); + propTree.put(boost::property_tree::ptree::path_type("scheduledevent.0.rules", '\t'), + validRule1); + propTree.put(boost::property_tree::ptree::path_type("scheduledevent.1.description", '\t'), + "old_event_2"); + propTree.put(boost::property_tree::ptree::path_type("scheduledevent.1.rules", '\t'), + validRule2); fieldConfig.updateScheduledEvents(propTree); const auto& events = fieldConfig.scheduledEvents(); @@ -197,7 +218,8 @@ void CConfigUpdaterTest::testUpdateGivenScheduledEvents() { events[1].second.print()); } - model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(); + model::CAnomalyDetectorModelConfig modelConfig = + model::CAnomalyDetectorModelConfig::defaultConfig(); CConfigUpdater configUpdater(fieldConfig, modelConfig); // Test an update that replaces the events diff --git a/lib/api/unittest/CCsvInputParserTest.cc b/lib/api/unittest/CCsvInputParserTest.cc index fcc241eda4..dc4b435097 100644 --- a/lib/api/unittest/CCsvInputParserTest.cc +++ b/lib/api/unittest/CCsvInputParserTest.cc @@ -23,18 +23,18 @@ CppUnit::Test* CCsvInputParserTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CCsvInputParserTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CCsvInputParserTest::testSimpleDelims", &CCsvInputParserTest::testSimpleDelims)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CCsvInputParserTest::testComplexDelims", &CCsvInputParserTest::testComplexDelims)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CCsvInputParserTest::testThroughput", &CCsvInputParserTest::testThroughput)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CCsvInputParserTest::testDateParse", &CCsvInputParserTest::testDateParse)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CCsvInputParserTest::testQuoteParsing", &CCsvInputParserTest::testQuoteParsing)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CCsvInputParserTest::testLineParser", &CCsvInputParserTest::testLineParser)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCsvInputParserTest::testSimpleDelims", &CCsvInputParserTest::testSimpleDelims)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCsvInputParserTest::testComplexDelims", &CCsvInputParserTest::testComplexDelims)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCsvInputParserTest::testThroughput", &CCsvInputParserTest::testThroughput)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCsvInputParserTest::testDateParse", &CCsvInputParserTest::testDateParse)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCsvInputParserTest::testQuoteParsing", &CCsvInputParserTest::testQuoteParsing)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCsvInputParserTest::testLineParser", &CCsvInputParserTest::testLineParser)); return suiteOfTests; } @@ -60,7 +60,8 @@ class CVisitor { // Check the field names for (const auto& entry : dataRowFields) { - auto iter = std::find(m_ExpectedFieldNames.begin(), m_ExpectedFieldNames.end(), entry.first); + auto iter = std::find(m_ExpectedFieldNames.begin(), + m_ExpectedFieldNames.end(), entry.first); CPPUNIT_ASSERT(iter != m_ExpectedFieldNames.end()); } @@ -69,10 +70,12 @@ class CVisitor { // Check the line count is consistent with the _raw field ml::api::CCsvInputParser::TStrStrUMapCItr rawIter = dataRowFields.find("_raw"); CPPUNIT_ASSERT(rawIter != dataRowFields.end()); - ml::api::CCsvInputParser::TStrStrUMapCItr lineCountIter = dataRowFields.find("linecount"); + ml::api::CCsvInputParser::TStrStrUMapCItr lineCountIter = + dataRowFields.find("linecount"); CPPUNIT_ASSERT(lineCountIter != dataRowFields.end()); - size_t expectedLineCount(1 + std::count(rawIter->second.begin(), rawIter->second.end(), '\n')); + size_t expectedLineCount(1 + std::count(rawIter->second.begin(), + rawIter->second.end(), '\n')); size_t lineCount(0); CPPUNIT_ASSERT(ml::core::CStringUtils::stringToType(lineCountIter->second, lineCount)); CPPUNIT_ASSERT_EQUAL(expectedLineCount, lineCount); @@ -93,8 +96,11 @@ class CTimeCheckingVisitor { using TTimeVec = std::vector; public: - CTimeCheckingVisitor(const std::string& timeField, const std::string& timeFormat, const TTimeVec& expectedTimes) - : m_RecordCount(0), m_TimeField(timeField), m_TimeFormat(timeFormat), m_ExpectedTimes(expectedTimes) {} + CTimeCheckingVisitor(const std::string& timeField, + const std::string& timeFormat, + const TTimeVec& expectedTimes) + : m_RecordCount(0), m_TimeField(timeField), m_TimeFormat(timeFormat), + m_ExpectedTimes(expectedTimes) {} //! Handle a record bool operator()(const ml::api::CCsvInputParser::TStrStrUMap& dataRowFields) { @@ -111,8 +117,10 @@ class CTimeCheckingVisitor { if (m_TimeFormat.empty()) { CPPUNIT_ASSERT(ml::core::CStringUtils::stringToType(fieldIter->second, timeVal)); } else { - CPPUNIT_ASSERT(ml::core::CTimeUtils::strptime(m_TimeFormat, fieldIter->second, timeVal)); - LOG_DEBUG(<< "Converted " << fieldIter->second << " to " << timeVal << " using format " << m_TimeFormat); + CPPUNIT_ASSERT(ml::core::CTimeUtils::strptime( + m_TimeFormat, fieldIter->second, timeVal)); + LOG_DEBUG(<< "Converted " << fieldIter->second << " to " << timeVal + << " using format " << m_TimeFormat); } CPPUNIT_ASSERT_EQUAL(m_ExpectedTimes[m_RecordCount], timeVal); @@ -294,16 +302,20 @@ void CCsvInputParserTest::testThroughput() { CPPUNIT_ASSERT_EQUAL(recordsPerBlock * TEST_SIZE, visitor.recordCount()); - LOG_INFO(<< "Parsing " << visitor.recordCount() << " records took " << (end - start) << " seconds"); + LOG_INFO(<< "Parsing " << visitor.recordCount() << " records took " + << (end - start) << " seconds"); } void CCsvInputParserTest::testDateParse() { static const ml::core_t::TTime EXPECTED_TIMES[] = { - 1359331200, 1359331200, 1359331207, 1359331220, 1359331259, 1359331262, 1359331269, 1359331270, 1359331272, 1359331296, - 1359331301, 1359331311, 1359331314, 1359331315, 1359331316, 1359331321, 1359331328, 1359331333, 1359331349, 1359331352, - 1359331370, 1359331382, 1359331385, 1359331386, 1359331395, 1359331404, 1359331416, 1359331416, 1359331424, 1359331429}; - - CTimeCheckingVisitor::TTimeVec expectedTimes(boost::begin(EXPECTED_TIMES), boost::end(EXPECTED_TIMES)); + 1359331200, 1359331200, 1359331207, 1359331220, 1359331259, 1359331262, + 1359331269, 1359331270, 1359331272, 1359331296, 1359331301, 1359331311, + 1359331314, 1359331315, 1359331316, 1359331321, 1359331328, 1359331333, + 1359331349, 1359331352, 1359331370, 1359331382, 1359331385, 1359331386, + 1359331395, 1359331404, 1359331416, 1359331416, 1359331424, 1359331429}; + + CTimeCheckingVisitor::TTimeVec expectedTimes(boost::begin(EXPECTED_TIMES), + boost::end(EXPECTED_TIMES)); // Ensure we are in UK timewise CPPUNIT_ASSERT(ml::core::CTimezone::setTimezone("Europe/London")); diff --git a/lib/api/unittest/CCsvOutputWriterTest.cc b/lib/api/unittest/CCsvOutputWriterTest.cc index d08f5e7b95..cdec881dc9 100644 --- a/lib/api/unittest/CCsvOutputWriterTest.cc +++ b/lib/api/unittest/CCsvOutputWriterTest.cc @@ -18,15 +18,16 @@ CppUnit::Test* CCsvOutputWriterTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CCsvOutputWriterTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CCsvOutputWriterTest::testAdd", &CCsvOutputWriterTest::testAdd)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CCsvOutputWriterTest::testOverwrite", &CCsvOutputWriterTest::testOverwrite)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CCsvOutputWriterTest::testThroughput", &CCsvOutputWriterTest::testThroughput)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CCsvOutputWriterTest::testExcelQuoting", &CCsvOutputWriterTest::testExcelQuoting)); - suiteOfTests->addTest(new CppUnit::TestCaller("CCsvOutputWriterTest::testNonExcelQuoting", - &CCsvOutputWriterTest::testNonExcelQuoting)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCsvOutputWriterTest::testAdd", &CCsvOutputWriterTest::testAdd)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCsvOutputWriterTest::testOverwrite", &CCsvOutputWriterTest::testOverwrite)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCsvOutputWriterTest::testThroughput", &CCsvOutputWriterTest::testThroughput)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCsvOutputWriterTest::testExcelQuoting", &CCsvOutputWriterTest::testExcelQuoting)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCsvOutputWriterTest::testNonExcelQuoting", &CCsvOutputWriterTest::testNonExcelQuoting)); return suiteOfTests; } @@ -107,22 +108,26 @@ void CCsvOutputWriterTest::testAdd() { LOG_DEBUG(<< "Output is:\n" << output); - for (ml::api::CCsvOutputWriter::TStrVecCItr iter = fieldNames.begin(); iter != fieldNames.end(); ++iter) { + for (ml::api::CCsvOutputWriter::TStrVecCItr iter = fieldNames.begin(); + iter != fieldNames.end(); ++iter) { LOG_DEBUG(<< "Checking output contains '" << *iter << "'"); CPPUNIT_ASSERT(output.find(*iter) != std::string::npos); } - for (ml::api::CCsvOutputWriter::TStrVecCItr iter = mlFieldNames.begin(); iter != mlFieldNames.end(); ++iter) { + for (ml::api::CCsvOutputWriter::TStrVecCItr iter = mlFieldNames.begin(); + iter != mlFieldNames.end(); ++iter) { LOG_DEBUG(<< "Checking output contains '" << *iter << "'"); CPPUNIT_ASSERT(output.find(*iter) != std::string::npos); } - for (ml::api::CCsvOutputWriter::TStrStrUMapCItr iter = originalFields.begin(); iter != originalFields.end(); ++iter) { + for (ml::api::CCsvOutputWriter::TStrStrUMapCItr iter = originalFields.begin(); + iter != originalFields.end(); ++iter) { LOG_DEBUG(<< "Checking output contains '" << iter->second << "'"); CPPUNIT_ASSERT(output.find(iter->second) != std::string::npos); } - for (ml::api::CCsvOutputWriter::TStrStrUMapCItr iter = mlFields.begin(); iter != mlFields.end(); ++iter) { + for (ml::api::CCsvOutputWriter::TStrStrUMapCItr iter = mlFields.begin(); + iter != mlFields.end(); ++iter) { LOG_DEBUG(<< "Checking output contains '" << iter->second << "'"); CPPUNIT_ASSERT(output.find(iter->second) != std::string::npos); } @@ -206,17 +211,20 @@ void CCsvOutputWriterTest::testOverwrite() { LOG_DEBUG(<< "Output is:\n" << output); - for (ml::api::CCsvOutputWriter::TStrVecCItr iter = fieldNames.begin(); iter != fieldNames.end(); ++iter) { + for (ml::api::CCsvOutputWriter::TStrVecCItr iter = fieldNames.begin(); + iter != fieldNames.end(); ++iter) { LOG_DEBUG(<< "Checking output contains '" << *iter << "'"); CPPUNIT_ASSERT(output.find(*iter) != std::string::npos); } - for (ml::api::CCsvOutputWriter::TStrVecCItr iter = mlFieldNames.begin(); iter != mlFieldNames.end(); ++iter) { + for (ml::api::CCsvOutputWriter::TStrVecCItr iter = mlFieldNames.begin(); + iter != mlFieldNames.end(); ++iter) { LOG_DEBUG(<< "Checking output contains '" << *iter << "'"); CPPUNIT_ASSERT(output.find(*iter) != std::string::npos); } - for (ml::api::CCsvOutputWriter::TStrStrUMapCItr iter = originalFields.begin(); iter != originalFields.end(); ++iter) { + for (ml::api::CCsvOutputWriter::TStrStrUMapCItr iter = originalFields.begin(); + iter != originalFields.end(); ++iter) { // The Ml fields should override the originals if (mlFields.find(iter->first) == mlFields.end()) { LOG_DEBUG(<< "Checking output contains '" << iter->second << "'"); @@ -227,7 +235,8 @@ void CCsvOutputWriterTest::testOverwrite() { } } - for (ml::api::CCsvOutputWriter::TStrStrUMapCItr iter = mlFields.begin(); iter != mlFields.end(); ++iter) { + for (ml::api::CCsvOutputWriter::TStrStrUMapCItr iter = mlFields.begin(); + iter != mlFields.end(); ++iter) { LOG_DEBUG(<< "Checking output contains '" << iter->second << "'"); CPPUNIT_ASSERT(output.find(iter->second) != std::string::npos); } diff --git a/lib/api/unittest/CDetectionRulesJsonParserTest.cc b/lib/api/unittest/CDetectionRulesJsonParserTest.cc index 5b198864d3..b1da763fe5 100644 --- a/lib/api/unittest/CDetectionRulesJsonParserTest.cc +++ b/lib/api/unittest/CDetectionRulesJsonParserTest.cc @@ -24,30 +24,32 @@ CppUnit::Test* CDetectionRulesJsonParserTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CDetectionRulesJsonParserTest"); suiteOfTests->addTest(new CppUnit::TestCaller( - "CDetectionRulesJsonParserTest::testParseRulesGivenEmptyString", &CDetectionRulesJsonParserTest::testParseRulesGivenEmptyString)); + "CDetectionRulesJsonParserTest::testParseRulesGivenEmptyString", + &CDetectionRulesJsonParserTest::testParseRulesGivenEmptyString)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CDetectionRulesJsonParserTest::testParseRulesGivenEmptyArray", &CDetectionRulesJsonParserTest::testParseRulesGivenEmptyArray)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CDetectionRulesJsonParserTest::testParseRulesGivenArrayContainsStrings", - &CDetectionRulesJsonParserTest::testParseRulesGivenArrayContainsStrings)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CDetectionRulesJsonParserTest::testParseRulesGivenMissingRuleAction", - &CDetectionRulesJsonParserTest::testParseRulesGivenMissingRuleAction)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CDetectionRulesJsonParserTest::testParseRulesGivenRuleActionIsNotArray", - &CDetectionRulesJsonParserTest::testParseRulesGivenRuleActionIsNotArray)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CDetectionRulesJsonParserTest::testParseRulesGivenInvalidRuleAction", - &CDetectionRulesJsonParserTest::testParseRulesGivenInvalidRuleAction)); + "CDetectionRulesJsonParserTest::testParseRulesGivenEmptyArray", + &CDetectionRulesJsonParserTest::testParseRulesGivenEmptyArray)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDetectionRulesJsonParserTest::testParseRulesGivenArrayContainsStrings", + &CDetectionRulesJsonParserTest::testParseRulesGivenArrayContainsStrings)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDetectionRulesJsonParserTest::testParseRulesGivenMissingRuleAction", + &CDetectionRulesJsonParserTest::testParseRulesGivenMissingRuleAction)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDetectionRulesJsonParserTest::testParseRulesGivenRuleActionIsNotArray", + &CDetectionRulesJsonParserTest::testParseRulesGivenRuleActionIsNotArray)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDetectionRulesJsonParserTest::testParseRulesGivenInvalidRuleAction", + &CDetectionRulesJsonParserTest::testParseRulesGivenInvalidRuleAction)); suiteOfTests->addTest(new CppUnit::TestCaller( "CDetectionRulesJsonParserTest::testParseRulesGivenMissingConditionsConnective", &CDetectionRulesJsonParserTest::testParseRulesGivenMissingConditionsConnective)); suiteOfTests->addTest(new CppUnit::TestCaller( "CDetectionRulesJsonParserTest::testParseRulesGivenInvalidConditionsConnective", &CDetectionRulesJsonParserTest::testParseRulesGivenInvalidConditionsConnective)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CDetectionRulesJsonParserTest::testParseRulesGivenMissingRuleConditions", - &CDetectionRulesJsonParserTest::testParseRulesGivenMissingRuleConditions)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDetectionRulesJsonParserTest::testParseRulesGivenMissingRuleConditions", + &CDetectionRulesJsonParserTest::testParseRulesGivenMissingRuleConditions)); suiteOfTests->addTest(new CppUnit::TestCaller( "CDetectionRulesJsonParserTest::testParseRulesGivenRuleConditionsIsNotArray", &CDetectionRulesJsonParserTest::testParseRulesGivenRuleConditionsIsNotArray)); @@ -63,23 +65,24 @@ CppUnit::Test* CDetectionRulesJsonParserTest::suite() { suiteOfTests->addTest(new CppUnit::TestCaller( "CDetectionRulesJsonParserTest::testParseRulesGivenNumericalTypicalAndDiffAbsRuleWithConnectiveAnd", &CDetectionRulesJsonParserTest::testParseRulesGivenNumericalTypicalAndDiffAbsRuleWithConnectiveAnd)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CDetectionRulesJsonParserTest::testParseRulesGivenMultipleRules", - &CDetectionRulesJsonParserTest::testParseRulesGivenMultipleRules)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CDetectionRulesJsonParserTest::testParseRulesGivenCategoricalMatchRule", - &CDetectionRulesJsonParserTest::testParseRulesGivenCategoricalMatchRule)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDetectionRulesJsonParserTest::testParseRulesGivenMultipleRules", + &CDetectionRulesJsonParserTest::testParseRulesGivenMultipleRules)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDetectionRulesJsonParserTest::testParseRulesGivenCategoricalMatchRule", + &CDetectionRulesJsonParserTest::testParseRulesGivenCategoricalMatchRule)); suiteOfTests->addTest(new CppUnit::TestCaller( "CDetectionRulesJsonParserTest::testParseRulesGivenCategoricalComplementRule", &CDetectionRulesJsonParserTest::testParseRulesGivenCategoricalComplementRule)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CDetectionRulesJsonParserTest::testParseRulesGivenTimeRule", &CDetectionRulesJsonParserTest::testParseRulesGivenTimeRule)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CDetectionRulesJsonParserTest::testParseRulesGivenDifferentActions", - &CDetectionRulesJsonParserTest::testParseRulesGivenDifferentActions)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CDetectionRulesJsonParserTest::testParseRulesGivenOldStyleCategoricalRule", - &CDetectionRulesJsonParserTest::testParseRulesGivenOldStyleCategoricalRule)); + "CDetectionRulesJsonParserTest::testParseRulesGivenTimeRule", + &CDetectionRulesJsonParserTest::testParseRulesGivenTimeRule)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDetectionRulesJsonParserTest::testParseRulesGivenDifferentActions", + &CDetectionRulesJsonParserTest::testParseRulesGivenDifferentActions)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDetectionRulesJsonParserTest::testParseRulesGivenOldStyleCategoricalRule", + &CDetectionRulesJsonParserTest::testParseRulesGivenOldStyleCategoricalRule)); return suiteOfTests; } @@ -294,7 +297,8 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenNumericalActualRuleWithCo CPPUNIT_ASSERT(parser.parseRules(rulesJson, rules)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), rules.size()); - CPPUNIT_ASSERT_EQUAL(std::string("FILTER_RESULTS IF ACTUAL < 5.000000 OR ACTUAL(metric) <= 2.300000"), rules[0].print()); + CPPUNIT_ASSERT_EQUAL(std::string("FILTER_RESULTS IF ACTUAL < 5.000000 OR ACTUAL(metric) <= 2.300000"), + rules[0].print()); } void CDetectionRulesJsonParserTest::testParseRulesGivenNumericalTypicalAndDiffAbsRuleWithConnectiveAnd() { @@ -317,7 +321,8 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenNumericalTypicalAndDiffAb CPPUNIT_ASSERT(parser.parseRules(rulesJson, rules)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), rules.size()); - CPPUNIT_ASSERT_EQUAL(std::string("FILTER_RESULTS IF TYPICAL > 5.000000 AND DIFF_ABS(metric:cpu) >= 2.300000"), rules[0].print()); + CPPUNIT_ASSERT_EQUAL(std::string("FILTER_RESULTS IF TYPICAL > 5.000000 AND DIFF_ABS(metric:cpu) >= 2.300000"), + rules[0].print()); } void CDetectionRulesJsonParserTest::testParseRulesGivenMultipleRules() { @@ -349,8 +354,10 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenMultipleRules() { CPPUNIT_ASSERT(parser.parseRules(rulesJson, rules)); CPPUNIT_ASSERT_EQUAL(std::size_t(2), rules.size()); - CPPUNIT_ASSERT_EQUAL(std::string("FILTER_RESULTS (id:foo) IF ACTUAL < 1.000000"), rules[0].print()); - CPPUNIT_ASSERT_EQUAL(std::string("SKIP_SAMPLING (id:42) IF ACTUAL < 2.000000"), rules[1].print()); + CPPUNIT_ASSERT_EQUAL(std::string("FILTER_RESULTS (id:foo) IF ACTUAL < 1.000000"), + rules[0].print()); + CPPUNIT_ASSERT_EQUAL(std::string("SKIP_SAMPLING (id:42) IF ACTUAL < 2.000000"), + rules[1].print()); } void CDetectionRulesJsonParserTest::testParseRulesGivenCategoricalMatchRule() { @@ -376,7 +383,8 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenCategoricalMatchRule() { CPPUNIT_ASSERT(parser.parseRules(rulesJson, rules)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), rules.size()); - CPPUNIT_ASSERT_EQUAL(std::string("FILTER_RESULTS IF (foo) IN FILTER"), rules[0].print()); + CPPUNIT_ASSERT_EQUAL(std::string("FILTER_RESULTS IF (foo) IN FILTER"), + rules[0].print()); } void CDetectionRulesJsonParserTest::testParseRulesGivenOldStyleCategoricalRule() { @@ -405,7 +413,8 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenOldStyleCategoricalRule() CPPUNIT_ASSERT(parser.parseRules(rulesJson, rules)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), rules.size()); - CPPUNIT_ASSERT_EQUAL(std::string("FILTER_RESULTS IF (foo) IN FILTER"), rules[0].print()); + CPPUNIT_ASSERT_EQUAL(std::string("FILTER_RESULTS IF (foo) IN FILTER"), + rules[0].print()); } void CDetectionRulesJsonParserTest::testParseRulesGivenCategoricalComplementRule() { @@ -431,7 +440,8 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenCategoricalComplementRule CPPUNIT_ASSERT(parser.parseRules(rulesJson, rules)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), rules.size()); - CPPUNIT_ASSERT_EQUAL(std::string("FILTER_RESULTS IF (foo) NOT IN FILTER"), rules[0].print()); + CPPUNIT_ASSERT_EQUAL(std::string("FILTER_RESULTS IF (foo) NOT IN FILTER"), + rules[0].print()); } void CDetectionRulesJsonParserTest::testParseRulesGivenTimeRule() { @@ -452,7 +462,8 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenTimeRule() { CPPUNIT_ASSERT(parser.parseRules(rulesJson, rules)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), rules.size()); - CPPUNIT_ASSERT_EQUAL(std::string("FILTER_RESULTS IF TIME >= 5000.000000 AND TIME < 10000.000000"), rules[0].print()); + CPPUNIT_ASSERT_EQUAL(std::string("FILTER_RESULTS IF TIME >= 5000.000000 AND TIME < 10000.000000"), + rules[0].print()); } void CDetectionRulesJsonParserTest::testParseRulesGivenDifferentActions() { @@ -474,7 +485,8 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenDifferentActions() { CPPUNIT_ASSERT(parser.parseRules(rulesJson, rules)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), rules.size()); - CPPUNIT_ASSERT_EQUAL(std::string("FILTER_RESULTS IF ACTUAL < 5.000000"), rules[0].print()); + CPPUNIT_ASSERT_EQUAL(std::string("FILTER_RESULTS IF ACTUAL < 5.000000"), + rules[0].print()); } { @@ -493,7 +505,8 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenDifferentActions() { CPPUNIT_ASSERT(parser.parseRules(rulesJson, rules)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), rules.size()); - CPPUNIT_ASSERT_EQUAL(std::string("SKIP_SAMPLING IF ACTUAL < 5.000000"), rules[0].print()); + CPPUNIT_ASSERT_EQUAL(std::string("SKIP_SAMPLING IF ACTUAL < 5.000000"), + rules[0].print()); } { @@ -512,6 +525,7 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenDifferentActions() { CPPUNIT_ASSERT(parser.parseRules(rulesJson, rules)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), rules.size()); - CPPUNIT_ASSERT_EQUAL(std::string("FILTER_RESULTS AND SKIP_SAMPLING IF ACTUAL < 5.000000"), rules[0].print()); + CPPUNIT_ASSERT_EQUAL(std::string("FILTER_RESULTS AND SKIP_SAMPLING IF ACTUAL < 5.000000"), + rules[0].print()); } } diff --git a/lib/api/unittest/CFieldConfigTest.cc b/lib/api/unittest/CFieldConfigTest.cc index c5bf5990ef..4a5ec7e4af 100644 --- a/lib/api/unittest/CFieldConfigTest.cc +++ b/lib/api/unittest/CFieldConfigTest.cc @@ -16,40 +16,52 @@ CppUnit::Test* CFieldConfigTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CFieldConfigTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CFieldConfigTest::testTrivial", &CFieldConfigTest::testTrivial)); - suiteOfTests->addTest(new CppUnit::TestCaller("CFieldConfigTest::testValid", &CFieldConfigTest::testValid)); - suiteOfTests->addTest(new CppUnit::TestCaller("CFieldConfigTest::testInvalid", &CFieldConfigTest::testInvalid)); - suiteOfTests->addTest(new CppUnit::TestCaller("CFieldConfigTest::testValidSummaryCountFieldName", - &CFieldConfigTest::testValidSummaryCountFieldName)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CFieldConfigTest::testValidClauses", &CFieldConfigTest::testValidClauses)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CFieldConfigTest::testInvalidClauses", &CFieldConfigTest::testInvalidClauses)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CFieldConfigTest::testFieldOptions", &CFieldConfigTest::testFieldOptions)); - suiteOfTests->addTest(new CppUnit::TestCaller("CFieldConfigTest::testValidPopulationClauses", - &CFieldConfigTest::testValidPopulationClauses)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CFieldConfigTest::testValidPopulation", &CFieldConfigTest::testValidPopulation)); - suiteOfTests->addTest(new CppUnit::TestCaller("CFieldConfigTest::testDefaultCategorizationField", - &CFieldConfigTest::testDefaultCategorizationField)); - suiteOfTests->addTest(new CppUnit::TestCaller("CFieldConfigTest::testCategorizationFieldWithFilters", - &CFieldConfigTest::testCategorizationFieldWithFilters)); - suiteOfTests->addTest(new CppUnit::TestCaller("CFieldConfigTest::testExcludeFrequentClauses", - &CFieldConfigTest::testExcludeFrequentClauses)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CFieldConfigTest::testExcludeFrequent", &CFieldConfigTest::testExcludeFrequent)); - suiteOfTests->addTest(new CppUnit::TestCaller("CFieldConfigTest::testSlashes", &CFieldConfigTest::testSlashes)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CFieldConfigTest::testBracketPercent", &CFieldConfigTest::testBracketPercent)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CFieldConfigTest::testClauseTokenise", &CFieldConfigTest::testClauseTokenise)); - suiteOfTests->addTest(new CppUnit::TestCaller("CFieldConfigTest::testUtf8Bom", &CFieldConfigTest::testUtf8Bom)); - suiteOfTests->addTest(new CppUnit::TestCaller("CFieldConfigTest::testAddByOverPartitionInfluencers", - &CFieldConfigTest::testAddByOverPartitionInfluencers)); - suiteOfTests->addTest(new CppUnit::TestCaller("CFieldConfigTest::testAddOptions", &CFieldConfigTest::testAddOptions)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CFieldConfigTest::testScheduledEvents", &CFieldConfigTest::testScheduledEvents)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CFieldConfigTest::testTrivial", &CFieldConfigTest::testTrivial)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CFieldConfigTest::testValid", &CFieldConfigTest::testValid)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CFieldConfigTest::testInvalid", &CFieldConfigTest::testInvalid)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CFieldConfigTest::testValidSummaryCountFieldName", + &CFieldConfigTest::testValidSummaryCountFieldName)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CFieldConfigTest::testValidClauses", &CFieldConfigTest::testValidClauses)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CFieldConfigTest::testInvalidClauses", &CFieldConfigTest::testInvalidClauses)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CFieldConfigTest::testFieldOptions", &CFieldConfigTest::testFieldOptions)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CFieldConfigTest::testValidPopulationClauses", + &CFieldConfigTest::testValidPopulationClauses)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CFieldConfigTest::testValidPopulation", &CFieldConfigTest::testValidPopulation)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CFieldConfigTest::testDefaultCategorizationField", + &CFieldConfigTest::testDefaultCategorizationField)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CFieldConfigTest::testCategorizationFieldWithFilters", + &CFieldConfigTest::testCategorizationFieldWithFilters)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CFieldConfigTest::testExcludeFrequentClauses", + &CFieldConfigTest::testExcludeFrequentClauses)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CFieldConfigTest::testExcludeFrequent", &CFieldConfigTest::testExcludeFrequent)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CFieldConfigTest::testSlashes", &CFieldConfigTest::testSlashes)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CFieldConfigTest::testBracketPercent", &CFieldConfigTest::testBracketPercent)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CFieldConfigTest::testClauseTokenise", &CFieldConfigTest::testClauseTokenise)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CFieldConfigTest::testUtf8Bom", &CFieldConfigTest::testUtf8Bom)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CFieldConfigTest::testAddByOverPartitionInfluencers", + &CFieldConfigTest::testAddByOverPartitionInfluencers)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CFieldConfigTest::testAddOptions", &CFieldConfigTest::testAddOptions)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CFieldConfigTest::testScheduledEvents", &CFieldConfigTest::testScheduledEvents)); return suiteOfTests; } @@ -74,16 +86,19 @@ void CFieldConfigTest::testTrivial() { } void CFieldConfigTest::testValid() { - this->testValidFile(boost::bind(&ml::api::CFieldConfig::initFromFile, _1, _2), "testfiles/new_mlfields.conf"); + this->testValidFile(boost::bind(&ml::api::CFieldConfig::initFromFile, _1, _2), + "testfiles/new_mlfields.conf"); } void CFieldConfigTest::testInvalid() { - this->testInvalidFile(boost::bind(&ml::api::CFieldConfig::initFromFile, _1, _2), "testfiles/new_invalidmlfields.conf"); + this->testInvalidFile(boost::bind(&ml::api::CFieldConfig::initFromFile, _1, _2), + "testfiles/new_invalidmlfields.conf"); } void CFieldConfigTest::testValidSummaryCountFieldName() { - this->testValidSummaryCountFieldNameFile(boost::bind(&ml::api::CFieldConfig::initFromFile, _1, _2), - "testfiles/new_mlfields_summarycount.conf"); + this->testValidSummaryCountFieldNameFile( + boost::bind(&ml::api::CFieldConfig::initFromFile, _1, _2), + "testfiles/new_mlfields_summarycount.conf"); } void CFieldConfigTest::testValidClauses() { @@ -172,7 +187,8 @@ void CFieldConfigTest::testValidClauses() { CPPUNIT_ASSERT(!config.havePartitionFields()); CPPUNIT_ASSERT(config.summaryCountFieldName().empty()); CPPUNIT_ASSERT_EQUAL(size_t(1), config.influencerFieldNames().size()); - CPPUNIT_ASSERT_EQUAL(std::string("nationality"), config.influencerFieldNames().front()); + CPPUNIT_ASSERT_EQUAL(std::string("nationality"), + config.influencerFieldNames().front()); LOG_DEBUG(<< config.debug()); @@ -202,8 +218,10 @@ void CFieldConfigTest::testValidClauses() { CPPUNIT_ASSERT(!config.havePartitionFields()); CPPUNIT_ASSERT(config.summaryCountFieldName().empty()); CPPUNIT_ASSERT_EQUAL(size_t(2), config.influencerFieldNames().size()); - CPPUNIT_ASSERT_EQUAL(std::string("MarketCap"), config.influencerFieldNames().front()); - CPPUNIT_ASSERT_EQUAL(std::string("nationality"), config.influencerFieldNames().back()); + CPPUNIT_ASSERT_EQUAL(std::string("MarketCap"), + config.influencerFieldNames().front()); + CPPUNIT_ASSERT_EQUAL(std::string("nationality"), + config.influencerFieldNames().back()); LOG_DEBUG(<< config.debug()); @@ -458,7 +476,8 @@ void CFieldConfigTest::testValidClauses() { CPPUNIT_ASSERT(iter->overFieldName().empty()); CPPUNIT_ASSERT(iter->partitionFieldName().empty()); CPPUNIT_ASSERT_EQUAL(false, iter->useNull()); - CPPUNIT_ASSERT_EQUAL(ml::model::function_t::E_IndividualMetricMedian, iter->function()); + CPPUNIT_ASSERT_EQUAL(ml::model::function_t::E_IndividualMetricMedian, + iter->function()); CPPUNIT_ASSERT_EQUAL(true, ml::model::function_t::isMetric(iter->function())); CPPUNIT_ASSERT_EQUAL(false, ml::model::function_t::isPopulation(iter->function())); CPPUNIT_ASSERT_EQUAL(std::string("median"), iter->terseFunctionName()); @@ -618,9 +637,12 @@ void CFieldConfigTest::testFieldOptions() { { ml::model::function_t::EFunction function; std::string fieldName; - CPPUNIT_ASSERT(ml::api::CFieldConfig::parseFieldString(false, true, true, "c", function, fieldName)); + CPPUNIT_ASSERT(ml::api::CFieldConfig::parseFieldString( + false, true, true, "c", function, fieldName)); - ml::api::CFieldConfig::CFieldOptions opt(function, fieldName, 1, "byField", "overField", "partitionField", false, false, true); + ml::api::CFieldConfig::CFieldOptions opt(function, fieldName, 1, "byField", + "overField", "partitionField", + false, false, true); CPPUNIT_ASSERT_EQUAL(ml::model::function_t::E_PopulationCount, opt.function()); CPPUNIT_ASSERT(opt.fieldName().empty()); @@ -637,9 +659,11 @@ void CFieldConfigTest::testFieldOptions() { { ml::model::function_t::EFunction function; std::string fieldName; - CPPUNIT_ASSERT(ml::api::CFieldConfig::parseFieldString(false, false, false, "count()", function, fieldName)); + CPPUNIT_ASSERT(ml::api::CFieldConfig::parseFieldString( + false, false, false, "count()", function, fieldName)); - ml::api::CFieldConfig::CFieldOptions opt(function, fieldName, 3, "", "", "", false, false, false); + ml::api::CFieldConfig::CFieldOptions opt(function, fieldName, 3, "", "", + "", false, false, false); CPPUNIT_ASSERT_EQUAL(ml::model::function_t::E_IndividualRareCount, opt.function()); CPPUNIT_ASSERT(opt.fieldName().empty()); @@ -669,11 +693,14 @@ void CFieldConfigTest::testFieldOptions() { { ml::model::function_t::EFunction function; std::string fieldName; - CPPUNIT_ASSERT(ml::api::CFieldConfig::parseFieldString(false, true, false, "dc(category)", function, fieldName)); + CPPUNIT_ASSERT(ml::api::CFieldConfig::parseFieldString( + false, true, false, "dc(category)", function, fieldName)); - ml::api::CFieldConfig::CFieldOptions opt(function, fieldName, 5, "", "overField", "", false, false, false); + ml::api::CFieldConfig::CFieldOptions opt( + function, fieldName, 5, "", "overField", "", false, false, false); - CPPUNIT_ASSERT_EQUAL(ml::model::function_t::E_PopulationDistinctCount, opt.function()); + CPPUNIT_ASSERT_EQUAL(ml::model::function_t::E_PopulationDistinctCount, + opt.function()); CPPUNIT_ASSERT(opt.byFieldName().empty()); CPPUNIT_ASSERT_EQUAL(5, opt.configKey()); CPPUNIT_ASSERT_EQUAL(std::string("category"), opt.fieldName()); @@ -688,9 +715,11 @@ void CFieldConfigTest::testFieldOptions() { { ml::model::function_t::EFunction function; std::string fieldName; - CPPUNIT_ASSERT(ml::api::CFieldConfig::parseFieldString(false, true, false, "info_content(mlsub)", function, fieldName)); + CPPUNIT_ASSERT(ml::api::CFieldConfig::parseFieldString( + false, true, false, "info_content(mlsub)", function, fieldName)); - ml::api::CFieldConfig::CFieldOptions opt(function, fieldName, 6, "", "mlhrd", "", false, false, false); + ml::api::CFieldConfig::CFieldOptions opt(function, fieldName, 6, "", + "mlhrd", "", false, false, false); CPPUNIT_ASSERT_EQUAL(ml::model::function_t::E_PopulationInfoContent, opt.function()); CPPUNIT_ASSERT(opt.byFieldName().empty()); @@ -707,11 +736,14 @@ void CFieldConfigTest::testFieldOptions() { { ml::model::function_t::EFunction function; std::string fieldName; - CPPUNIT_ASSERT(ml::api::CFieldConfig::parseFieldString(false, true, false, "high_info_content(mlsub)", function, fieldName)); + CPPUNIT_ASSERT(ml::api::CFieldConfig::parseFieldString( + false, true, false, "high_info_content(mlsub)", function, fieldName)); - ml::api::CFieldConfig::CFieldOptions opt(function, fieldName, 1, "", "mlhrd", "datacenter", false, false, false); + ml::api::CFieldConfig::CFieldOptions opt(function, fieldName, 1, "", "mlhrd", + "datacenter", false, false, false); - CPPUNIT_ASSERT_EQUAL(ml::model::function_t::E_PopulationHighInfoContent, opt.function()); + CPPUNIT_ASSERT_EQUAL(ml::model::function_t::E_PopulationHighInfoContent, + opt.function()); CPPUNIT_ASSERT(opt.byFieldName().empty()); CPPUNIT_ASSERT_EQUAL(1, opt.configKey()); CPPUNIT_ASSERT_EQUAL(std::string("mlsub"), opt.fieldName()); @@ -726,9 +758,11 @@ void CFieldConfigTest::testFieldOptions() { { ml::model::function_t::EFunction function; std::string fieldName; - CPPUNIT_ASSERT(ml::api::CFieldConfig::parseFieldString(false, true, true, "rare()", function, fieldName)); + CPPUNIT_ASSERT(ml::api::CFieldConfig::parseFieldString( + false, true, true, "rare()", function, fieldName)); - ml::api::CFieldConfig::CFieldOptions opt(function, fieldName, 1, "byField", "overField", "", false, false, false); + ml::api::CFieldConfig::CFieldOptions opt(function, fieldName, 1, "byField", + "overField", "", false, false, false); CPPUNIT_ASSERT_EQUAL(ml::model::function_t::E_PopulationRare, opt.function()); CPPUNIT_ASSERT(opt.fieldName().empty()); @@ -745,9 +779,12 @@ void CFieldConfigTest::testFieldOptions() { { ml::model::function_t::EFunction function; std::string fieldName; - CPPUNIT_ASSERT(ml::api::CFieldConfig::parseFieldString(false, true, true, "rare_count", function, fieldName)); + CPPUNIT_ASSERT(ml::api::CFieldConfig::parseFieldString( + false, true, true, "rare_count", function, fieldName)); - ml::api::CFieldConfig::CFieldOptions opt(function, fieldName, 1, "byField", "overField", "partitionField", false, false, true); + ml::api::CFieldConfig::CFieldOptions opt(function, fieldName, 1, "byField", + "overField", "partitionField", + false, false, true); CPPUNIT_ASSERT_EQUAL(ml::model::function_t::E_PopulationRareCount, opt.function()); CPPUNIT_ASSERT(opt.fieldName().empty()); @@ -843,7 +880,8 @@ void CFieldConfigTest::testValidPopulationClauses() { CPPUNIT_ASSERT_EQUAL(false, ml::model::function_t::isMetric(iter->function())); CPPUNIT_ASSERT_EQUAL(true, ml::model::function_t::isPopulation(iter->function())); CPPUNIT_ASSERT_EQUAL(std::string("high_dc"), iter->terseFunctionName()); - CPPUNIT_ASSERT_EQUAL(std::string("high_distinct_count"), iter->verboseFunctionName()); + CPPUNIT_ASSERT_EQUAL(std::string("high_distinct_count"), + iter->verboseFunctionName()); } { ml::api::CFieldConfig config; @@ -1038,12 +1076,14 @@ void CFieldConfigTest::testValidPopulationClauses() { } void CFieldConfigTest::testValidPopulation() { - this->testValidPopulationFile(boost::bind(&ml::api::CFieldConfig::initFromFile, _1, _2), "testfiles/new_populationmlfields.conf"); + this->testValidPopulationFile(boost::bind(&ml::api::CFieldConfig::initFromFile, _1, _2), + "testfiles/new_populationmlfields.conf"); } void CFieldConfigTest::testDefaultCategorizationField() { - this->testDefaultCategorizationFieldFile(boost::bind(&ml::api::CFieldConfig::initFromFile, _1, _2), - "testfiles/new_mlfields_sos_message_cat.conf"); + this->testDefaultCategorizationFieldFile( + boost::bind(&ml::api::CFieldConfig::initFromFile, _1, _2), + "testfiles/new_mlfields_sos_message_cat.conf"); } void CFieldConfigTest::testCategorizationFieldWithFilters() { @@ -1276,15 +1316,18 @@ void CFieldConfigTest::testExcludeFrequentClauses() { } void CFieldConfigTest::testExcludeFrequent() { - this->testExcludeFrequentFile(boost::bind(&ml::api::CFieldConfig::initFromFile, _1, _2), "testfiles/new_mlfields_excludefrequent.conf"); + this->testExcludeFrequentFile(boost::bind(&ml::api::CFieldConfig::initFromFile, _1, _2), + "testfiles/new_mlfields_excludefrequent.conf"); } void CFieldConfigTest::testSlashes() { - this->testSlashesFile(boost::bind(&ml::api::CFieldConfig::initFromFile, _1, _2), "testfiles/new_mlfields_slashes.conf"); + this->testSlashesFile(boost::bind(&ml::api::CFieldConfig::initFromFile, _1, _2), + "testfiles/new_mlfields_slashes.conf"); } void CFieldConfigTest::testBracketPercent() { - this->testBracketPercentFile(boost::bind(&ml::api::CFieldConfig::initFromFile, _1, _2), "testfiles/new_mlfields_bracket_percent.conf"); + this->testBracketPercentFile(boost::bind(&ml::api::CFieldConfig::initFromFile, _1, _2), + "testfiles/new_mlfields_bracket_percent.conf"); } void CFieldConfigTest::testClauseTokenise() { @@ -1478,7 +1521,9 @@ void CFieldConfigTest::testAddOptions() { ml::api::CFieldConfig::CFieldOptions options1("count", 1, "SRC", false, false); CPPUNIT_ASSERT(configFromScratch.addOptions(options1)); - ml::api::CFieldConfig::CFieldOptions options2(ml::model::function_t::E_PopulationCount, "", 2, "DPT", "SRC", "", false, false, true); + ml::api::CFieldConfig::CFieldOptions options2(ml::model::function_t::E_PopulationCount, + "", 2, "DPT", "SRC", "", + false, false, true); CPPUNIT_ASSERT(configFromScratch.addOptions(options2)); CPPUNIT_ASSERT_EQUAL(configFromFile.debug(), configFromScratch.debug()); @@ -1587,7 +1632,8 @@ void CFieldConfigTest::testInvalidFile(TInitFromFileFunc initFunc, const std::st CPPUNIT_ASSERT(!initFunc(&config, fileName)); } -void CFieldConfigTest::testValidSummaryCountFieldNameFile(TInitFromFileFunc initFunc, const std::string& fileName) { +void CFieldConfigTest::testValidSummaryCountFieldNameFile(TInitFromFileFunc initFunc, + const std::string& fileName) { ml::api::CFieldConfig config; CPPUNIT_ASSERT(initFunc(&config, fileName)); @@ -1595,7 +1641,8 @@ void CFieldConfigTest::testValidSummaryCountFieldNameFile(TInitFromFileFunc init CPPUNIT_ASSERT_EQUAL(std::string("count"), config.summaryCountFieldName()); } -void CFieldConfigTest::testValidPopulationFile(TInitFromFileFunc initFunc, const std::string& fileName) { +void CFieldConfigTest::testValidPopulationFile(TInitFromFileFunc initFunc, + const std::string& fileName) { { ml::api::CFieldConfig config; CPPUNIT_ASSERT(initFunc(&config, fileName)); @@ -1627,7 +1674,8 @@ void CFieldConfigTest::testValidPopulationFile(TInitFromFileFunc initFunc, const } } -void CFieldConfigTest::testDefaultCategorizationFieldFile(TInitFromFileFunc initFunc, const std::string& fileName) { +void CFieldConfigTest::testDefaultCategorizationFieldFile(TInitFromFileFunc initFunc, + const std::string& fileName) { ml::api::CFieldConfig config; CPPUNIT_ASSERT(initFunc(&config, fileName)); @@ -1653,7 +1701,8 @@ void CFieldConfigTest::testDefaultCategorizationFieldFile(TInitFromFileFunc init CPPUNIT_ASSERT_EQUAL(false, ml::model::function_t::isPopulation(iter->function())); } -void CFieldConfigTest::testExcludeFrequentFile(TInitFromFileFunc initFunc, const std::string& fileName) { +void CFieldConfigTest::testExcludeFrequentFile(TInitFromFileFunc initFunc, + const std::string& fileName) { ml::api::CFieldConfig config; CPPUNIT_ASSERT(initFunc(&config, fileName)); @@ -1755,12 +1804,14 @@ void CFieldConfigTest::testSlashesFile(TInitFromFileFunc initFunc, const std::st const ml::api::CFieldConfig::TFieldOptionsMIndex& fields = config.fieldOptions(); - for (ml::api::CFieldConfig::TFieldOptionsMIndexCItr iter = fields.begin(); iter != fields.end(); ++iter) { + for (ml::api::CFieldConfig::TFieldOptionsMIndexCItr iter = fields.begin(); + iter != fields.end(); ++iter) { CPPUNIT_ASSERT_EQUAL(std::string("host"), iter->partitionFieldName()); } } -void CFieldConfigTest::testBracketPercentFile(TInitFromFileFunc initFunc, const std::string& fileName) { +void CFieldConfigTest::testBracketPercentFile(TInitFromFileFunc initFunc, + const std::string& fileName) { ml::api::CFieldConfig config; CPPUNIT_ASSERT(initFunc(&config, fileName)); @@ -1777,7 +1828,8 @@ void CFieldConfigTest::testBracketPercentFile(TInitFromFileFunc initFunc, const CPPUNIT_ASSERT_EQUAL(std::string("10%"), iter->byFieldName()); CPPUNIT_ASSERT_EQUAL(std::string("%10"), iter->overFieldName()); CPPUNIT_ASSERT_EQUAL(std::string("Percentage (%)"), iter->partitionFieldName()); - CPPUNIT_ASSERT_EQUAL(std::string("This string should have quotes removed"), config.categorizationFieldName()); + CPPUNIT_ASSERT_EQUAL(std::string("This string should have quotes removed"), + config.categorizationFieldName()); } void CFieldConfigTest::testScheduledEvents() { diff --git a/lib/api/unittest/CFieldConfigTest.h b/lib/api/unittest/CFieldConfigTest.h index 59ef9b30f7..b67d78efa9 100644 --- a/lib/api/unittest/CFieldConfigTest.h +++ b/lib/api/unittest/CFieldConfigTest.h @@ -14,7 +14,8 @@ class CFieldConfigTest : public CppUnit::TestFixture { public: - using TInitFromFileFunc = std::function; + using TInitFromFileFunc = + std::function; public: void testTrivial(); @@ -43,9 +44,11 @@ class CFieldConfigTest : public CppUnit::TestFixture { private: void testValidFile(TInitFromFileFunc initFunc, const std::string& fileName); void testInvalidFile(TInitFromFileFunc initFunc, const std::string& fileName); - void testValidSummaryCountFieldNameFile(TInitFromFileFunc initFunc, const std::string& fileName); + void testValidSummaryCountFieldNameFile(TInitFromFileFunc initFunc, + const std::string& fileName); void testValidPopulationFile(TInitFromFileFunc initFunc, const std::string& fileName); - void testDefaultCategorizationFieldFile(TInitFromFileFunc initFunc, const std::string& fileName); + void testDefaultCategorizationFieldFile(TInitFromFileFunc initFunc, + const std::string& fileName); void testExcludeFrequentFile(TInitFromFileFunc initFunc, const std::string& fileName); void testSlashesFile(TInitFromFileFunc initFunc, const std::string& fileName); void testBracketPercentFile(TInitFromFileFunc initFunc, const std::string& fileName); diff --git a/lib/api/unittest/CFieldDataTyperTest.cc b/lib/api/unittest/CFieldDataTyperTest.cc index b70639642f..98923f9c98 100644 --- a/lib/api/unittest/CFieldDataTyperTest.cc +++ b/lib/api/unittest/CFieldDataTyperTest.cc @@ -37,12 +37,15 @@ namespace { class CEmptySearcher : public ml::core::CDataSearcher { public: //! Do a search that results in an empty input stream. - virtual TIStreamP search(size_t /*currentDocNum*/, size_t /*limit*/) { return TIStreamP(new std::istringstream()); } + virtual TIStreamP search(size_t /*currentDocNum*/, size_t /*limit*/) { + return TIStreamP(new std::istringstream()); + } }; class CTestOutputHandler : public COutputHandler { public: - CTestOutputHandler() : COutputHandler(), m_NewStream(false), m_Finalised(false), m_Records(0) {} + CTestOutputHandler() + : COutputHandler(), m_NewStream(false), m_Finalised(false), m_Records(0) {} virtual ~CTestOutputHandler() {} @@ -54,11 +57,14 @@ class CTestOutputHandler : public COutputHandler { bool isNewStream() const { return m_NewStream; } - virtual bool fieldNames(const TStrVec& /*fieldNames*/, const TStrVec& /*extraFieldNames*/) { return true; } + virtual bool fieldNames(const TStrVec& /*fieldNames*/, const TStrVec& /*extraFieldNames*/) { + return true; + } virtual const TStrVec& fieldNames() const { return m_FieldNames; } - virtual bool writeRow(const TStrStrUMap& /*dataRowFields*/, const TStrStrUMap& /*overrideDataRowFields*/) { + virtual bool writeRow(const TStrStrUMap& /*dataRowFields*/, + const TStrStrUMap& /*overrideDataRowFields*/) { m_Records++; return true; } @@ -77,9 +83,12 @@ class CTestOutputHandler : public COutputHandler { class CTestDataSearcher : public core::CDataSearcher { public: - CTestDataSearcher(const std::string& data) : m_Stream(new std::istringstream(data)) {} + CTestDataSearcher(const std::string& data) + : m_Stream(new std::istringstream(data)) {} - virtual TIStreamP search(size_t /*currentDocNum*/, size_t /*limit*/) { return m_Stream; } + virtual TIStreamP search(size_t /*currentDocNum*/, size_t /*limit*/) { + return m_Stream; + } private: TIStreamP m_Stream; @@ -89,9 +98,13 @@ class CTestDataAdder : public core::CDataAdder { public: CTestDataAdder() : m_Stream(new std::ostringstream) {} - virtual TOStreamP addStreamed(const std::string& /*index*/, const std::string& /*id*/) { return m_Stream; } + virtual TOStreamP addStreamed(const std::string& /*index*/, const std::string& /*id*/) { + return m_Stream; + } - virtual bool streamComplete(TOStreamP& /*strm*/, bool /*force*/) { return true; } + virtual bool streamComplete(TOStreamP& /*strm*/, bool /*force*/) { + return true; + } TOStreamP getStream() { return m_Stream; } @@ -267,7 +280,8 @@ void CFieldDataTyperTest::testHandleControlMessages() { const std::string& output = outputStrm.str(); LOG_DEBUG(<< "Output is: " << output); - CPPUNIT_ASSERT_EQUAL(std::string::size_type(0), output.find("[{\"flush\":{\"id\":\"7\",\"last_finalized_bucket_end\":0}}")); + CPPUNIT_ASSERT_EQUAL(std::string::size_type(0), + output.find("[{\"flush\":{\"id\":\"7\",\"last_finalized_bucket_end\":0}}")); } void CFieldDataTyperTest::testRestoreStateFailsWithEmptyState() { @@ -289,14 +303,18 @@ void CFieldDataTyperTest::testRestoreStateFailsWithEmptyState() { CppUnit::Test* CFieldDataTyperTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CFieldDataTyperTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CFieldDataTyperTest::testAll", &CFieldDataTyperTest::testAll)); - suiteOfTests->addTest(new CppUnit::TestCaller("CFieldDataTyperTest::testNodeReverseSearch", - &CFieldDataTyperTest::testNodeReverseSearch)); - suiteOfTests->addTest(new CppUnit::TestCaller("CFieldDataTyperTest::testPassOnControlMessages", - &CFieldDataTyperTest::testPassOnControlMessages)); - suiteOfTests->addTest(new CppUnit::TestCaller("CFieldDataTyperTest::testHandleControlMessages", - &CFieldDataTyperTest::testHandleControlMessages)); - suiteOfTests->addTest(new CppUnit::TestCaller("CFieldDataTyperTest::testRestoreStateFailsWithEmptyState", - &CFieldDataTyperTest::testRestoreStateFailsWithEmptyState)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CFieldDataTyperTest::testAll", &CFieldDataTyperTest::testAll)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CFieldDataTyperTest::testNodeReverseSearch", &CFieldDataTyperTest::testNodeReverseSearch)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CFieldDataTyperTest::testPassOnControlMessages", + &CFieldDataTyperTest::testPassOnControlMessages)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CFieldDataTyperTest::testHandleControlMessages", + &CFieldDataTyperTest::testHandleControlMessages)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CFieldDataTyperTest::testRestoreStateFailsWithEmptyState", + &CFieldDataTyperTest::testRestoreStateFailsWithEmptyState)); return suiteOfTests; } diff --git a/lib/api/unittest/CForecastRunnerTest.cc b/lib/api/unittest/CForecastRunnerTest.cc index 9c26091a97..b91f00585b 100644 --- a/lib/api/unittest/CForecastRunnerTest.cc +++ b/lib/api/unittest/CForecastRunnerTest.cc @@ -23,7 +23,8 @@ namespace { -using TGenerateRecord = void (*)(ml::core_t::TTime time, ml::api::CAnomalyJob::TStrStrUMap& dataRows); +using TGenerateRecord = void (*)(ml::core_t::TTime time, + ml::api::CAnomalyJob::TStrStrUMap& dataRows); const ml::core_t::TTime START_TIME{12000000}; const ml::core_t::TTime BUCKET_LENGTH{3600}; @@ -32,27 +33,33 @@ void generateRecord(ml::core_t::TTime time, ml::api::CAnomalyJob::TStrStrUMap& d dataRows["time"] = ml::core::CStringUtils::typeToString(time); } -void generateRecordWithSummaryCount(ml::core_t::TTime time, ml::api::CAnomalyJob::TStrStrUMap& dataRows) { +void generateRecordWithSummaryCount(ml::core_t::TTime time, + ml::api::CAnomalyJob::TStrStrUMap& dataRows) { double x = static_cast(time - START_TIME) / BUCKET_LENGTH; double count = (std::sin(x / 4.0) + 1.0) * 42.0 * std::pow(1.005, x); dataRows["time"] = ml::core::CStringUtils::typeToString(time); dataRows["count"] = ml::core::CStringUtils::typeToString(count); } -void generateRecordWithStatus(ml::core_t::TTime time, ml::api::CAnomalyJob::TStrStrUMap& dataRows) { +void generateRecordWithStatus(ml::core_t::TTime time, + ml::api::CAnomalyJob::TStrStrUMap& dataRows) { dataRows["time"] = ml::core::CStringUtils::typeToString(time); dataRows["status"] = (time / BUCKET_LENGTH) % 919 == 0 ? "404" : "200"; } -void generatePopulationRecord(ml::core_t::TTime time, ml::api::CAnomalyJob::TStrStrUMap& dataRows) { +void generatePopulationRecord(ml::core_t::TTime time, + ml::api::CAnomalyJob::TStrStrUMap& dataRows) { dataRows["time"] = ml::core::CStringUtils::typeToString(time); dataRows["person"] = "jill"; } -void populateJob(TGenerateRecord generateRecord, ml::api::CAnomalyJob& job, std::size_t buckets = 1000) { +void populateJob(TGenerateRecord generateRecord, + ml::api::CAnomalyJob& job, + std::size_t buckets = 1000) { ml::core_t::TTime time = START_TIME; ml::api::CAnomalyJob::TStrStrUMap dataRows; - for (std::size_t bucket = 0u; bucket < 2 * buckets; ++bucket, time += (BUCKET_LENGTH / 2)) { + for (std::size_t bucket = 0u; bucket < 2 * buckets; + ++bucket, time += (BUCKET_LENGTH / 2)) { generateRecord(time, dataRows); CPPUNIT_ASSERT(job.handleRecord(dataRows)); } @@ -73,14 +80,16 @@ void CForecastRunnerTest::testSummaryCount() { clauses.push_back("count"); clauses.push_back("summarycountfield=count"); fieldConfig.initFromClause(clauses); - ml::model::CAnomalyDetectorModelConfig modelConfig = ml::model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); + ml::model::CAnomalyDetectorModelConfig modelConfig = + ml::model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); ml::api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, streamWrapper); populateJob(generateRecordWithSummaryCount, job); ml::api::CAnomalyJob::TStrStrUMap dataRows; - dataRows["."] = "p{\"duration\":" + std::to_string(13 * BUCKET_LENGTH) + ",\"forecast_id\": \"42\"" + - ",\"forecast_alias\": \"sumcount\"" + ",\"create_time\": \"1511370819\"" + ",\"expires_in\": \"" + + dataRows["."] = "p{\"duration\":" + std::to_string(13 * BUCKET_LENGTH) + + ",\"forecast_id\": \"42\"" + ",\"forecast_alias\": \"sumcount\"" + + ",\"create_time\": \"1511370819\"" + ",\"expires_in\": \"" + std::to_string(100 * ml::core::constants::DAY) + "\" }"; CPPUNIT_ASSERT(job.handleRecord(dataRows)); } @@ -97,7 +106,8 @@ void CForecastRunnerTest::testSummaryCount() { if (std::strcmp("scheduled", forecastStart["forecast_status"].GetString()) == 0) { CPPUNIT_ASSERT(!foundStartedRecord); foundScheduledRecord = true; - } else if (std::strcmp("started", forecastStart["forecast_status"].GetString()) == 0) { + } else if (std::strcmp("started", + forecastStart["forecast_status"].GetString()) == 0) { CPPUNIT_ASSERT(foundScheduledRecord); foundStartedRecord = true; break; @@ -111,16 +121,22 @@ void CForecastRunnerTest::testSummaryCount() { CPPUNIT_ASSERT(lastElement.HasMember("model_forecast_request_stats")); const rapidjson::Value& forecastStats = lastElement["model_forecast_request_stats"]; - CPPUNIT_ASSERT_EQUAL(std::string("42"), std::string(forecastStats["forecast_id"].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string("sumcount"), std::string(forecastStats["forecast_alias"].GetString())); - CPPUNIT_ASSERT_EQUAL(1511370819 * int64_t(1000), forecastStats["forecast_create_timestamp"].GetInt64()); + CPPUNIT_ASSERT_EQUAL(std::string("42"), + std::string(forecastStats["forecast_id"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("sumcount"), + std::string(forecastStats["forecast_alias"].GetString())); + CPPUNIT_ASSERT_EQUAL(1511370819 * int64_t(1000), + forecastStats["forecast_create_timestamp"].GetInt64()); CPPUNIT_ASSERT(forecastStats.HasMember("processed_record_count")); CPPUNIT_ASSERT_EQUAL(13, forecastStats["processed_record_count"].GetInt()); CPPUNIT_ASSERT_EQUAL(1.0, forecastStats["forecast_progress"].GetDouble()); - CPPUNIT_ASSERT_EQUAL(std::string("finished"), std::string(forecastStats["forecast_status"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("finished"), + std::string(forecastStats["forecast_status"].GetString())); CPPUNIT_ASSERT_EQUAL(15591600 * int64_t(1000), forecastStats["timestamp"].GetInt64()); - CPPUNIT_ASSERT_EQUAL(15591600 * int64_t(1000), forecastStats["forecast_start_timestamp"].GetInt64()); - CPPUNIT_ASSERT_EQUAL((15591600 + 13 * BUCKET_LENGTH) * int64_t(1000), forecastStats["forecast_end_timestamp"].GetInt64()); + CPPUNIT_ASSERT_EQUAL(15591600 * int64_t(1000), + forecastStats["forecast_start_timestamp"].GetInt64()); + CPPUNIT_ASSERT_EQUAL((15591600 + 13 * BUCKET_LENGTH) * int64_t(1000), + forecastStats["forecast_end_timestamp"].GetInt64()); CPPUNIT_ASSERT_EQUAL((1511370819 + 100 * ml::core::constants::DAY) * int64_t(1000), forecastStats["forecast_expiry_timestamp"].GetInt64()); } @@ -138,14 +154,15 @@ void CForecastRunnerTest::testPopulation() { clauses.push_back("over"); clauses.push_back("person"); fieldConfig.initFromClause(clauses); - ml::model::CAnomalyDetectorModelConfig modelConfig = ml::model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); + ml::model::CAnomalyDetectorModelConfig modelConfig = + ml::model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); ml::api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, streamWrapper); populateJob(generatePopulationRecord, job); ml::api::CAnomalyJob::TStrStrUMap dataRows; - dataRows["."] = - "p{\"duration\":" + std::to_string(13 * BUCKET_LENGTH) + ",\"forecast_id\": \"31\"" + ",\"create_time\": \"1511370819\" }"; + dataRows["."] = "p{\"duration\":" + std::to_string(13 * BUCKET_LENGTH) + + ",\"forecast_id\": \"31\"" + ",\"create_time\": \"1511370819\" }"; CPPUNIT_ASSERT(job.handleRecord(dataRows)); } @@ -157,11 +174,14 @@ void CForecastRunnerTest::testPopulation() { const rapidjson::Value& forecastStats = lastElement["model_forecast_request_stats"]; CPPUNIT_ASSERT(!doc.HasParseError()); - CPPUNIT_ASSERT_EQUAL(std::string("31"), std::string(forecastStats["forecast_id"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("31"), + std::string(forecastStats["forecast_id"].GetString())); CPPUNIT_ASSERT(!forecastStats.HasMember("forecast_alias")); - CPPUNIT_ASSERT_EQUAL(std::string("failed"), std::string(forecastStats["forecast_status"].GetString())); - CPPUNIT_ASSERT_EQUAL(ml::api::CForecastRunner::ERROR_NOT_SUPPORTED_FOR_POPULATION_MODELS, - std::string(forecastStats["forecast_messages"].GetArray()[0].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("failed"), + std::string(forecastStats["forecast_status"].GetString())); + CPPUNIT_ASSERT_EQUAL( + ml::api::CForecastRunner::ERROR_NOT_SUPPORTED_FOR_POPULATION_MODELS, + std::string(forecastStats["forecast_messages"].GetArray()[0].GetString())); CPPUNIT_ASSERT_EQUAL((1511370819 + 14 * ml::core::constants::DAY) * int64_t(1000), forecastStats["forecast_expiry_timestamp"].GetInt64()); } @@ -180,14 +200,16 @@ void CForecastRunnerTest::testRare() { clauses.push_back("status"); fieldConfig.initFromClause(clauses); - ml::model::CAnomalyDetectorModelConfig modelConfig = ml::model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); + ml::model::CAnomalyDetectorModelConfig modelConfig = + ml::model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); ml::api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, streamWrapper); populateJob(generateRecordWithStatus, job, 5000); ml::api::CAnomalyJob::TStrStrUMap dataRows; - dataRows["."] = "p{\"duration\":" + std::to_string(13 * BUCKET_LENGTH) + ",\"forecast_id\": \"42\"" + - ",\"create_time\": \"1511370819\"" + ",\"expires_in\": \"8640000\" }"; + dataRows["."] = "p{\"duration\":" + std::to_string(13 * BUCKET_LENGTH) + + ",\"forecast_id\": \"42\"" + ",\"create_time\": \"1511370819\"" + + ",\"expires_in\": \"8640000\" }"; CPPUNIT_ASSERT(job.handleRecord(dataRows)); } rapidjson::Document doc; @@ -198,11 +220,14 @@ void CForecastRunnerTest::testRare() { const rapidjson::Value& forecastStats = lastElement["model_forecast_request_stats"]; CPPUNIT_ASSERT(!doc.HasParseError()); - CPPUNIT_ASSERT_EQUAL(std::string("42"), std::string(forecastStats["forecast_id"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("42"), + std::string(forecastStats["forecast_id"].GetString())); CPPUNIT_ASSERT(!forecastStats.HasMember("forecast_alias")); - CPPUNIT_ASSERT_EQUAL(std::string("failed"), std::string(forecastStats["forecast_status"].GetString())); - CPPUNIT_ASSERT_EQUAL(ml::api::CForecastRunner::ERROR_NO_SUPPORTED_FUNCTIONS, - std::string(forecastStats["forecast_messages"].GetArray()[0].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("failed"), + std::string(forecastStats["forecast_status"].GetString())); + CPPUNIT_ASSERT_EQUAL( + ml::api::CForecastRunner::ERROR_NO_SUPPORTED_FUNCTIONS, + std::string(forecastStats["forecast_messages"].GetArray()[0].GetString())); CPPUNIT_ASSERT_EQUAL((1511370819 + 14 * ml::core::constants::DAY) * int64_t(1000), forecastStats["forecast_expiry_timestamp"].GetInt64()); } @@ -218,14 +243,15 @@ void CForecastRunnerTest::testInsufficientData() { ml::api::CFieldConfig::TStrVec clauses; clauses.push_back("count"); fieldConfig.initFromClause(clauses); - ml::model::CAnomalyDetectorModelConfig modelConfig = ml::model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); + ml::model::CAnomalyDetectorModelConfig modelConfig = + ml::model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); ml::api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, streamWrapper); populateJob(generateRecord, job, 3); ml::api::CAnomalyJob::TStrStrUMap dataRows; - dataRows["."] = - "p{\"duration\":" + std::to_string(13 * BUCKET_LENGTH) + ",\"forecast_id\": \"31\"" + ",\"create_time\": \"1511370819\" }"; + dataRows["."] = "p{\"duration\":" + std::to_string(13 * BUCKET_LENGTH) + + ",\"forecast_id\": \"31\"" + ",\"create_time\": \"1511370819\" }"; CPPUNIT_ASSERT(job.handleRecord(dataRows)); } @@ -237,11 +263,14 @@ void CForecastRunnerTest::testInsufficientData() { const rapidjson::Value& forecastStats = lastElement["model_forecast_request_stats"]; CPPUNIT_ASSERT(!doc.HasParseError()); - CPPUNIT_ASSERT_EQUAL(std::string("31"), std::string(forecastStats["forecast_id"].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string("finished"), std::string(forecastStats["forecast_status"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("31"), + std::string(forecastStats["forecast_id"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("finished"), + std::string(forecastStats["forecast_status"].GetString())); CPPUNIT_ASSERT_EQUAL(1.0, forecastStats["forecast_progress"].GetDouble()); - CPPUNIT_ASSERT_EQUAL(ml::api::CForecastRunner::INFO_NO_MODELS_CAN_CURRENTLY_BE_FORECAST, - std::string(forecastStats["forecast_messages"].GetArray()[0].GetString())); + CPPUNIT_ASSERT_EQUAL( + ml::api::CForecastRunner::INFO_NO_MODELS_CAN_CURRENTLY_BE_FORECAST, + std::string(forecastStats["forecast_messages"].GetArray()[0].GetString())); CPPUNIT_ASSERT_EQUAL((1511370819 + 14 * ml::core::constants::DAY) * int64_t(1000), forecastStats["forecast_expiry_timestamp"].GetInt64()); } @@ -249,28 +278,34 @@ void CForecastRunnerTest::testInsufficientData() { void CForecastRunnerTest::testValidateDuration() { ml::api::CForecastRunner::SForecast forecastJob; - std::string message("p{\"duration\":" + std::to_string(10 * ml::core::constants::WEEK) + ",\"forecast_id\": \"42\"" + - ",\"create_time\": \"1511370819\" }"); + std::string message("p{\"duration\":" + std::to_string(10 * ml::core::constants::WEEK) + + ",\"forecast_id\": \"42\"" + ",\"create_time\": \"1511370819\" }"); - CPPUNIT_ASSERT(ml::api::CForecastRunner::parseAndValidateForecastRequest(message, forecastJob, 1400000000)); + CPPUNIT_ASSERT(ml::api::CForecastRunner::parseAndValidateForecastRequest( + message, forecastJob, 1400000000)); CPPUNIT_ASSERT_EQUAL(8 * ml::core::constants::WEEK, forecastJob.s_Duration); - CPPUNIT_ASSERT_EQUAL(8 * ml::core::constants::WEEK + 1400000000, forecastJob.forecastEnd()); - CPPUNIT_ASSERT_EQUAL(ml::api::CForecastRunner::WARNING_DURATION_LIMIT, *forecastJob.s_Messages.begin()); + CPPUNIT_ASSERT_EQUAL(8 * ml::core::constants::WEEK + 1400000000, + forecastJob.forecastEnd()); + CPPUNIT_ASSERT_EQUAL(ml::api::CForecastRunner::WARNING_DURATION_LIMIT, + *forecastJob.s_Messages.begin()); } void CForecastRunnerTest::testValidateDefaultExpiry() { ml::api::CForecastRunner::SForecast forecastJob; - std::string message("p{\"duration\":" + std::to_string(2 * ml::core::constants::WEEK) + ",\"forecast_id\": \"42\"" + - ",\"create_time\": \"1511370819\" }"); + std::string message("p{\"duration\":" + std::to_string(2 * ml::core::constants::WEEK) + + ",\"forecast_id\": \"42\"" + ",\"create_time\": \"1511370819\" }"); - CPPUNIT_ASSERT(ml::api::CForecastRunner::parseAndValidateForecastRequest(message, forecastJob, 1400000000)); + CPPUNIT_ASSERT(ml::api::CForecastRunner::parseAndValidateForecastRequest( + message, forecastJob, 1400000000)); CPPUNIT_ASSERT_EQUAL(2 * ml::core::constants::WEEK, forecastJob.s_Duration); CPPUNIT_ASSERT_EQUAL(14 * ml::core::constants::DAY + 1511370819, forecastJob.s_ExpiryTime); - std::string message2("p{\"duration\":" + std::to_string(2 * ml::core::constants::WEEK) + ",\"forecast_id\": \"42\"" + + std::string message2("p{\"duration\":" + std::to_string(2 * ml::core::constants::WEEK) + + ",\"forecast_id\": \"42\"" + ",\"create_time\": \"1511370819\"" + ",\"expires_in\": -1 }"); - CPPUNIT_ASSERT(ml::api::CForecastRunner::parseAndValidateForecastRequest(message2, forecastJob, 1400000000)); + CPPUNIT_ASSERT(ml::api::CForecastRunner::parseAndValidateForecastRequest( + message2, forecastJob, 1400000000)); CPPUNIT_ASSERT_EQUAL(2 * ml::core::constants::WEEK, forecastJob.s_Duration); CPPUNIT_ASSERT_EQUAL(14 * ml::core::constants::DAY + 1511370819, forecastJob.s_ExpiryTime); } @@ -278,10 +313,12 @@ void CForecastRunnerTest::testValidateDefaultExpiry() { void CForecastRunnerTest::testValidateNoExpiry() { ml::api::CForecastRunner::SForecast forecastJob; - std::string message("p{\"duration\":" + std::to_string(3 * ml::core::constants::WEEK) + ",\"forecast_id\": \"42\"" + + std::string message("p{\"duration\":" + std::to_string(3 * ml::core::constants::WEEK) + + ",\"forecast_id\": \"42\"" + ",\"create_time\": \"1511370819\"" + ",\"expires_in\": 0 }"); - CPPUNIT_ASSERT(ml::api::CForecastRunner::parseAndValidateForecastRequest(message, forecastJob, 1400000000)); + CPPUNIT_ASSERT(ml::api::CForecastRunner::parseAndValidateForecastRequest( + message, forecastJob, 1400000000)); CPPUNIT_ASSERT_EQUAL(3 * ml::core::constants::WEEK, forecastJob.s_Duration); CPPUNIT_ASSERT_EQUAL(ml::core_t::TTime(1511370819), forecastJob.s_ExpiryTime); CPPUNIT_ASSERT_EQUAL(forecastJob.s_CreateTime, forecastJob.s_ExpiryTime); @@ -290,10 +327,12 @@ void CForecastRunnerTest::testValidateNoExpiry() { void CForecastRunnerTest::testValidateInvalidExpiry() { ml::api::CForecastRunner::SForecast forecastJob; - std::string message("p{\"duration\":" + std::to_string(3 * ml::core::constants::WEEK) + ",\"forecast_id\": \"42\"" + - ",\"create_time\": \"1511370819\"" + ",\"expires_in\": -244 }"); + std::string message("p{\"duration\":" + std::to_string(3 * ml::core::constants::WEEK) + + ",\"forecast_id\": \"42\"" + ",\"create_time\": \"1511370819\"" + + ",\"expires_in\": -244 }"); - CPPUNIT_ASSERT(ml::api::CForecastRunner::parseAndValidateForecastRequest(message, forecastJob, 1400000000)); + CPPUNIT_ASSERT(ml::api::CForecastRunner::parseAndValidateForecastRequest( + message, forecastJob, 1400000000)); CPPUNIT_ASSERT_EQUAL(3 * ml::core::constants::WEEK, forecastJob.s_Duration); CPPUNIT_ASSERT_EQUAL(14 * ml::core::constants::DAY + 1511370819, forecastJob.s_ExpiryTime); } @@ -303,39 +342,45 @@ void CForecastRunnerTest::testValidateBrokenMessage() { std::string message("p{\"dura"); - CPPUNIT_ASSERT(ml::api::CForecastRunner::parseAndValidateForecastRequest(message, forecastJob, 1400000000) == false); + CPPUNIT_ASSERT(ml::api::CForecastRunner::parseAndValidateForecastRequest( + message, forecastJob, 1400000000) == false); } void CForecastRunnerTest::testValidateMissingId() { ml::api::CForecastRunner::SForecast forecastJob; - std::string message("p{\"duration\":" + std::to_string(3 * ml::core::constants::WEEK) + ",\"create_time\": \"1511370819\"}"); + std::string message("p{\"duration\":" + std::to_string(3 * ml::core::constants::WEEK) + + ",\"create_time\": \"1511370819\"}"); - CPPUNIT_ASSERT(ml::api::CForecastRunner::parseAndValidateForecastRequest(message, forecastJob, 1400000000) == false); + CPPUNIT_ASSERT(ml::api::CForecastRunner::parseAndValidateForecastRequest( + message, forecastJob, 1400000000) == false); } CppUnit::Test* CForecastRunnerTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CForecastRunnerTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CForecastRunnerTest::testSummaryCount", &CForecastRunnerTest::testSummaryCount)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CForecastRunnerTest::testPopulation", &CForecastRunnerTest::testPopulation)); - suiteOfTests->addTest(new CppUnit::TestCaller("CForecastRunnerTest::testRare", &CForecastRunnerTest::testRare)); - suiteOfTests->addTest(new CppUnit::TestCaller("CForecastRunnerTest::testInsufficientData", - &CForecastRunnerTest::testInsufficientData)); - suiteOfTests->addTest(new CppUnit::TestCaller("CForecastRunnerTest::testValidateDuration", - &CForecastRunnerTest::testValidateDuration)); - suiteOfTests->addTest(new CppUnit::TestCaller("CForecastRunnerTest::testValidateExpiry", - &CForecastRunnerTest::testValidateDefaultExpiry)); - suiteOfTests->addTest(new CppUnit::TestCaller("CForecastRunnerTest::testValidateNoExpiry", - &CForecastRunnerTest::testValidateNoExpiry)); - suiteOfTests->addTest(new CppUnit::TestCaller("CForecastRunnerTest::testValidateInvalidExpiry", - &CForecastRunnerTest::testValidateInvalidExpiry)); - suiteOfTests->addTest(new CppUnit::TestCaller("CForecastRunnerTest::testBrokenMessage", - &CForecastRunnerTest::testValidateBrokenMessage)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CForecastRunnerTest::testMissingId", &CForecastRunnerTest::testValidateMissingId)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CForecastRunnerTest::testSummaryCount", &CForecastRunnerTest::testSummaryCount)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CForecastRunnerTest::testPopulation", &CForecastRunnerTest::testPopulation)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CForecastRunnerTest::testRare", &CForecastRunnerTest::testRare)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CForecastRunnerTest::testInsufficientData", &CForecastRunnerTest::testInsufficientData)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CForecastRunnerTest::testValidateDuration", &CForecastRunnerTest::testValidateDuration)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CForecastRunnerTest::testValidateExpiry", + &CForecastRunnerTest::testValidateDefaultExpiry)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CForecastRunnerTest::testValidateNoExpiry", &CForecastRunnerTest::testValidateNoExpiry)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CForecastRunnerTest::testValidateInvalidExpiry", + &CForecastRunnerTest::testValidateInvalidExpiry)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CForecastRunnerTest::testBrokenMessage", &CForecastRunnerTest::testValidateBrokenMessage)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CForecastRunnerTest::testMissingId", &CForecastRunnerTest::testValidateMissingId)); return suiteOfTests; } diff --git a/lib/api/unittest/CIoManagerTest.cc b/lib/api/unittest/CIoManagerTest.cc index 8d87d6c475..2aa2463700 100644 --- a/lib/api/unittest/CIoManagerTest.cc +++ b/lib/api/unittest/CIoManagerTest.cc @@ -41,7 +41,8 @@ const char* BAD_OUTPUT_PIPE_NAME = "can't_create_a_pipe_here/bad_output_pipe"; class CThreadDataWriter : public ml::core::CThread { public: - CThreadDataWriter(const std::string& fileName, size_t size) : m_FileName(fileName), m_Size(size) {} + CThreadDataWriter(const std::string& fileName, size_t size) + : m_FileName(fileName), m_Size(size) {} protected: virtual void run() { @@ -63,7 +64,8 @@ class CThreadDataWriter : public ml::core::CThread { class CThreadDataReader : public ml::core::CThread { public: - CThreadDataReader(const std::string& fileName) : m_FileName(fileName), m_Shutdown(false) {} + CThreadDataReader(const std::string& fileName) + : m_FileName(fileName), m_Shutdown(false) {} const std::string& data() const { // The memory barriers associated with the mutex lock should ensure @@ -129,13 +131,16 @@ class CThreadDataReader : public ml::core::CThread { CppUnit::Test* CIoManagerTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CIoManagerTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CIoManagerTest::testStdinStdout", &CIoManagerTest::testStdinStdout)); - suiteOfTests->addTest(new CppUnit::TestCaller("CIoManagerTest::testFileIoGood", &CIoManagerTest::testFileIoGood)); - suiteOfTests->addTest(new CppUnit::TestCaller("CIoManagerTest::testFileIoBad", &CIoManagerTest::testFileIoBad)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CIoManagerTest::testNamedPipeIoGood", &CIoManagerTest::testNamedPipeIoGood)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CIoManagerTest::testNamedPipeIoBad", &CIoManagerTest::testNamedPipeIoBad)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CIoManagerTest::testStdinStdout", &CIoManagerTest::testStdinStdout)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CIoManagerTest::testFileIoGood", &CIoManagerTest::testFileIoGood)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CIoManagerTest::testFileIoBad", &CIoManagerTest::testFileIoBad)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CIoManagerTest::testNamedPipeIoGood", &CIoManagerTest::testNamedPipeIoGood)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CIoManagerTest::testNamedPipeIoBad", &CIoManagerTest::testNamedPipeIoBad)); return suiteOfTests; } @@ -202,7 +207,8 @@ void CIoManagerTest::testCommon(const std::string& inputFileName, std::string processedData; { - ml::api::CIoManager ioMgr(inputFileName, isInputFileNamedPipe, outputFileName, isOutputFileNamedPipe); + ml::api::CIoManager ioMgr(inputFileName, isInputFileNamedPipe, + outputFileName, isOutputFileNamedPipe); CPPUNIT_ASSERT_EQUAL(isGood, ioMgr.initIo()); if (isGood) { static const std::streamsize BUF_SIZE = 512; @@ -211,10 +217,12 @@ void CIoManagerTest::testCommon(const std::string& inputFileName, ioMgr.inputStream().read(buffer, BUF_SIZE); CPPUNIT_ASSERT(!ioMgr.inputStream().bad()); if (ioMgr.inputStream().gcount() > 0) { - processedData.append(buffer, static_cast(ioMgr.inputStream().gcount())); + processedData.append( + buffer, static_cast(ioMgr.inputStream().gcount())); } CPPUNIT_ASSERT(!ioMgr.outputStream().bad()); - ioMgr.outputStream().write(buffer, static_cast(ioMgr.inputStream().gcount())); + ioMgr.outputStream().write( + buffer, static_cast(ioMgr.inputStream().gcount())); } while (!ioMgr.inputStream().eof()); CPPUNIT_ASSERT(!ioMgr.outputStream().bad()); } diff --git a/lib/api/unittest/CJsonOutputWriterTest.cc b/lib/api/unittest/CJsonOutputWriterTest.cc index 76a5a5f26c..2dc3d67664 100644 --- a/lib/api/unittest/CJsonOutputWriterTest.cc +++ b/lib/api/unittest/CJsonOutputWriterTest.cc @@ -38,40 +38,53 @@ const TStr1Vec EMPTY_STRING_LIST; CppUnit::Test* CJsonOutputWriterTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CJsonOutputWriterTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CJsonOutputWriterTest::testSimpleWrite", &CJsonOutputWriterTest::testSimpleWrite)); - suiteOfTests->addTest(new CppUnit::TestCaller("CJsonOutputWriterTest::testWriteNonAnomalousBucket", - &CJsonOutputWriterTest::testWriteNonAnomalousBucket)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CJsonOutputWriterTest::testBucketWrite", &CJsonOutputWriterTest::testBucketWrite)); - suiteOfTests->addTest(new CppUnit::TestCaller("CJsonOutputWriterTest::testBucketWriteInterim", - &CJsonOutputWriterTest::testBucketWriteInterim)); - suiteOfTests->addTest(new CppUnit::TestCaller("CJsonOutputWriterTest::testLimitedRecordsWrite", - &CJsonOutputWriterTest::testLimitedRecordsWrite)); - suiteOfTests->addTest(new CppUnit::TestCaller("CJsonOutputWriterTest::testLimitedRecordsWriteInterim", - &CJsonOutputWriterTest::testLimitedRecordsWriteInterim)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CJsonOutputWriterTest::testFlush", &CJsonOutputWriterTest::testFlush)); - suiteOfTests->addTest(new CppUnit::TestCaller("CJsonOutputWriterTest::testWriteCategoryDefinition", - &CJsonOutputWriterTest::testWriteCategoryDefinition)); - suiteOfTests->addTest(new CppUnit::TestCaller("CJsonOutputWriterTest::testWriteWithInfluences", - &CJsonOutputWriterTest::testWriteWithInfluences)); - suiteOfTests->addTest(new CppUnit::TestCaller("CJsonOutputWriterTest::testWriteInfluencers", - &CJsonOutputWriterTest::testWriteInfluencers)); - suiteOfTests->addTest(new CppUnit::TestCaller("CJsonOutputWriterTest::testWriteInfluencersWithLimit", - &CJsonOutputWriterTest::testWriteInfluencersWithLimit)); - suiteOfTests->addTest(new CppUnit::TestCaller("CJsonOutputWriterTest::testPersistNormalizer", - &CJsonOutputWriterTest::testPersistNormalizer)); - suiteOfTests->addTest(new CppUnit::TestCaller("CJsonOutputWriterTest::testPartitionScores", - &CJsonOutputWriterTest::testPartitionScores)); - suiteOfTests->addTest(new CppUnit::TestCaller("CJsonOutputWriterTest::testReportMemoryUsage", - &CJsonOutputWriterTest::testReportMemoryUsage)); - suiteOfTests->addTest(new CppUnit::TestCaller("CJsonOutputWriterTest::testWriteScheduledEvent", - &CJsonOutputWriterTest::testWriteScheduledEvent)); - suiteOfTests->addTest(new CppUnit::TestCaller("CJsonOutputWriterTest::testThroughputWithScopedAllocator", - &CJsonOutputWriterTest::testThroughputWithScopedAllocator)); - suiteOfTests->addTest(new CppUnit::TestCaller("CJsonOutputWriterTest::testThroughputWithoutScopedAllocator", - &CJsonOutputWriterTest::testThroughputWithoutScopedAllocator)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CJsonOutputWriterTest::testSimpleWrite", &CJsonOutputWriterTest::testSimpleWrite)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CJsonOutputWriterTest::testWriteNonAnomalousBucket", + &CJsonOutputWriterTest::testWriteNonAnomalousBucket)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CJsonOutputWriterTest::testBucketWrite", &CJsonOutputWriterTest::testBucketWrite)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CJsonOutputWriterTest::testBucketWriteInterim", + &CJsonOutputWriterTest::testBucketWriteInterim)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CJsonOutputWriterTest::testLimitedRecordsWrite", + &CJsonOutputWriterTest::testLimitedRecordsWrite)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CJsonOutputWriterTest::testLimitedRecordsWriteInterim", + &CJsonOutputWriterTest::testLimitedRecordsWriteInterim)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CJsonOutputWriterTest::testFlush", &CJsonOutputWriterTest::testFlush)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CJsonOutputWriterTest::testWriteCategoryDefinition", + &CJsonOutputWriterTest::testWriteCategoryDefinition)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CJsonOutputWriterTest::testWriteWithInfluences", + &CJsonOutputWriterTest::testWriteWithInfluences)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CJsonOutputWriterTest::testWriteInfluencers", + &CJsonOutputWriterTest::testWriteInfluencers)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CJsonOutputWriterTest::testWriteInfluencersWithLimit", + &CJsonOutputWriterTest::testWriteInfluencersWithLimit)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CJsonOutputWriterTest::testPersistNormalizer", + &CJsonOutputWriterTest::testPersistNormalizer)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CJsonOutputWriterTest::testPartitionScores", &CJsonOutputWriterTest::testPartitionScores)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CJsonOutputWriterTest::testReportMemoryUsage", + &CJsonOutputWriterTest::testReportMemoryUsage)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CJsonOutputWriterTest::testWriteScheduledEvent", + &CJsonOutputWriterTest::testWriteScheduledEvent)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CJsonOutputWriterTest::testThroughputWithScopedAllocator", + &CJsonOutputWriterTest::testThroughputWithScopedAllocator)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CJsonOutputWriterTest::testThroughputWithoutScopedAllocator", + &CJsonOutputWriterTest::testThroughputWithoutScopedAllocator)); return suiteOfTests; } @@ -113,9 +126,11 @@ void CJsonOutputWriterTest::testSimpleWrite() { CPPUNIT_ASSERT(object.IsObject()); CPPUNIT_ASSERT(object.HasMember("by_field_name")); - CPPUNIT_ASSERT_EQUAL(std::string("airline"), std::string(object["by_field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("airline"), + std::string(object["by_field_name"].GetString())); CPPUNIT_ASSERT(object.HasMember("by_field_value")); - CPPUNIT_ASSERT_EQUAL(std::string("GAL"), std::string(object["by_field_value"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("GAL"), + std::string(object["by_field_value"].GetString())); CPPUNIT_ASSERT(object.HasMember("typical")); CPPUNIT_ASSERT_EQUAL(std::string("6953"), std::string(object["typical"].GetString())); CPPUNIT_ASSERT(object.HasMember("actual")); @@ -123,23 +138,28 @@ void CJsonOutputWriterTest::testSimpleWrite() { CPPUNIT_ASSERT(object.HasMember("probability")); CPPUNIT_ASSERT_EQUAL(std::string("0"), std::string(object["probability"].GetString())); CPPUNIT_ASSERT(object.HasMember("field_name")); - CPPUNIT_ASSERT_EQUAL(std::string("responsetime"), std::string(object["field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("responsetime"), + std::string(object["field_name"].GetString())); const rapidjson::Value& object2 = arrayDoc[rapidjson::SizeType(1)]; CPPUNIT_ASSERT(object.IsObject()); CPPUNIT_ASSERT(object2.HasMember("by_field_name")); - CPPUNIT_ASSERT_EQUAL(std::string("busroute"), std::string(object2["by_field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("busroute"), + std::string(object2["by_field_name"].GetString())); CPPUNIT_ASSERT(object2.HasMember("by_field_value")); - CPPUNIT_ASSERT_EQUAL(std::string("No 32"), std::string(object2["by_field_value"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("No 32"), + std::string(object2["by_field_value"].GetString())); CPPUNIT_ASSERT(object2.HasMember("typical")); CPPUNIT_ASSERT_EQUAL(std::string("6953"), std::string(object2["typical"].GetString())); CPPUNIT_ASSERT(object2.HasMember("actual")); CPPUNIT_ASSERT_EQUAL(std::string("10090"), std::string(object2["actual"].GetString())); CPPUNIT_ASSERT(object2.HasMember("probability")); - CPPUNIT_ASSERT_EQUAL(std::string("0"), std::string(object2["probability"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("0"), + std::string(object2["probability"].GetString())); CPPUNIT_ASSERT(object2.HasMember("field_name")); - CPPUNIT_ASSERT_EQUAL(std::string("responsetime"), std::string(object2["field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("responsetime"), + std::string(object2["field_name"].GetString())); } void CJsonOutputWriterTest::testWriteNonAnomalousBucket() { @@ -153,30 +173,11 @@ void CJsonOutputWriterTest::testWriteNonAnomalousBucket() { ml::core::CJsonOutputStreamWrapper outputStream(sstream); ml::api::CJsonOutputWriter writer("job", outputStream); - ml::api::CHierarchicalResultsWriter::SResults result(false, - false, - emptyString, - emptyString, - emptyString, - emptyString, - emptyString, - emptyString, - emptyString, - 1, - function, - functionDescription, - TDouble1Vec(1, 42.0), - TDouble1Vec(1, 42.0), - 0.0, - 0.0, - 1.0, - 30, - emptyString, - influences, - false, - false, - 1, - 100); + ml::api::CHierarchicalResultsWriter::SResults result( + false, false, emptyString, emptyString, emptyString, emptyString, + emptyString, emptyString, emptyString, 1, function, + functionDescription, TDouble1Vec(1, 42.0), TDouble1Vec(1, 42.0), + 0.0, 0.0, 1.0, 30, emptyString, influences, false, false, 1, 100); CPPUNIT_ASSERT(writer.acceptResult(result)); writer.acceptBucketTimeInfluencer(1, 1.0, 0.0, 0.0); @@ -243,7 +244,9 @@ void CJsonOutputWriterTest::testFlush() { CPPUNIT_ASSERT(flush.HasMember("id")); CPPUNIT_ASSERT_EQUAL(testId, std::string(flush["id"].GetString())); CPPUNIT_ASSERT(flush.HasMember("last_finalized_bucket_end")); - CPPUNIT_ASSERT_EQUAL(lastFinalizedBucketEnd * 1000, static_cast(flush["last_finalized_bucket_end"].GetInt64())); + CPPUNIT_ASSERT_EQUAL(lastFinalizedBucketEnd * 1000, + static_cast( + flush["last_finalized_bucket_end"].GetInt64())); } void CJsonOutputWriterTest::testWriteCategoryDefinition() { @@ -292,7 +295,9 @@ void CJsonOutputWriterTest::testWriteCategoryDefinition() { CPPUNIT_ASSERT(category.HasMember("regex")); CPPUNIT_ASSERT_EQUAL(regex, std::string(category["regex"].GetString())); CPPUNIT_ASSERT(category.HasMember("max_matching_length")); - CPPUNIT_ASSERT_EQUAL(maxMatchingLength, static_cast(category["max_matching_length"].GetInt())); + CPPUNIT_ASSERT_EQUAL( + maxMatchingLength, + static_cast(category["max_matching_length"].GetInt())); CPPUNIT_ASSERT(category.HasMember("examples")); TStrSet writtenExamplesSet; @@ -343,127 +348,40 @@ void CJsonOutputWriterTest::testBucketWriteHelper(bool isInterim) { ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPrDoublePrVec influences; { - ml::api::CHierarchicalResultsWriter::SResults result11(false, - false, - partitionFieldName, - partitionFieldValue, - overFieldName, - overFieldValue, - byFieldName, - byFieldValue, - correlatedByFieldValue, - 1, - function, - functionDescription, - TDouble1Vec(1, 10090.0), - TDouble1Vec(1, 6953.0), - 2.24, - 0.5, - 0.0, - 79, - fieldName, - influences, - false, - false, - 1, - 100); - - ml::api::CHierarchicalResultsWriter::SResults result112(false, - true, - partitionFieldName, - partitionFieldValue, - overFieldName, - overFieldValue, - byFieldName, - byFieldValue, - correlatedByFieldValue, - 1, - function, - functionDescription, - TDouble1Vec(1, 10090.0), - TDouble1Vec(1, 6953.0), - 2.24, - 0.5, - 0.0, - 79, - fieldName, - influences, - false, - false, - 1, - 100); - - ml::api::CHierarchicalResultsWriter::SResults result12(ml::api::CHierarchicalResultsWriter::E_Result, - partitionFieldName, - partitionFieldValue, - byFieldName, - byFieldValue, - correlatedByFieldValue, - 1, - function, - functionDescription, - 42.0, - 79, - TDouble1Vec(1, 6953.0), - TDouble1Vec(1, 10090.0), - 2.24, - 0.8, - 0.0, - fieldName, - influences, - false, - true, - 2, - 100, - EMPTY_STRING_LIST); - - ml::api::CHierarchicalResultsWriter::SResults result13(ml::api::CHierarchicalResultsWriter::E_SimpleCountResult, - partitionFieldName, - partitionFieldValue, - byFieldName, - byFieldValue, - correlatedByFieldValue, - 1, - function, - functionDescription, - 42.0, - 79, - TDouble1Vec(1, 6953.0), - TDouble1Vec(1, 10090.0), - 2.24, - 0.5, - 0.0, - fieldName, - influences, - false, - false, - 3, - 100, - EMPTY_STRING_LIST); - - ml::api::CHierarchicalResultsWriter::SResults result14(ml::api::CHierarchicalResultsWriter::E_Result, - partitionFieldName, - partitionFieldValue, - byFieldName, - byFieldValue, - correlatedByFieldValue, - 1, - function, - functionDescription, - 42.0, - 79, - TDouble1Vec(1, 6953.0), - TDouble1Vec(1, 10090.0), - 2.24, - 0.0, - 0.0, - fieldName, - influences, - false, - false, - 4, - 100, - EMPTY_STRING_LIST); + ml::api::CHierarchicalResultsWriter::SResults result11( + false, false, partitionFieldName, partitionFieldValue, + overFieldName, overFieldValue, byFieldName, byFieldValue, + correlatedByFieldValue, 1, function, functionDescription, + TDouble1Vec(1, 10090.0), TDouble1Vec(1, 6953.0), 2.24, 0.5, 0.0, + 79, fieldName, influences, false, false, 1, 100); + + ml::api::CHierarchicalResultsWriter::SResults result112( + false, true, partitionFieldName, partitionFieldValue, + overFieldName, overFieldValue, byFieldName, byFieldValue, + correlatedByFieldValue, 1, function, functionDescription, + TDouble1Vec(1, 10090.0), TDouble1Vec(1, 6953.0), 2.24, 0.5, 0.0, + 79, fieldName, influences, false, false, 1, 100); + + ml::api::CHierarchicalResultsWriter::SResults result12( + ml::api::CHierarchicalResultsWriter::E_Result, partitionFieldName, + partitionFieldValue, byFieldName, byFieldValue, + correlatedByFieldValue, 1, function, functionDescription, 42.0, 79, + TDouble1Vec(1, 6953.0), TDouble1Vec(1, 10090.0), 2.24, 0.8, 0.0, + fieldName, influences, false, true, 2, 100, EMPTY_STRING_LIST); + + ml::api::CHierarchicalResultsWriter::SResults result13( + ml::api::CHierarchicalResultsWriter::E_SimpleCountResult, + partitionFieldName, partitionFieldValue, byFieldName, byFieldValue, + correlatedByFieldValue, 1, function, functionDescription, 42.0, 79, + TDouble1Vec(1, 6953.0), TDouble1Vec(1, 10090.0), 2.24, 0.5, 0.0, + fieldName, influences, false, false, 3, 100, EMPTY_STRING_LIST); + + ml::api::CHierarchicalResultsWriter::SResults result14( + ml::api::CHierarchicalResultsWriter::E_Result, partitionFieldName, + partitionFieldValue, byFieldName, byFieldValue, + correlatedByFieldValue, 1, function, functionDescription, 42.0, 79, + TDouble1Vec(1, 6953.0), TDouble1Vec(1, 10090.0), 2.24, 0.0, 0.0, + fieldName, influences, false, false, 4, 100, EMPTY_STRING_LIST); // 1st bucket CPPUNIT_ASSERT(writer.acceptResult(result11)); @@ -479,127 +397,40 @@ void CJsonOutputWriterTest::testBucketWriteHelper(bool isInterim) { } { - ml::api::CHierarchicalResultsWriter::SResults result21(false, - false, - partitionFieldName, - partitionFieldValue, - overFieldName, - overFieldValue, - byFieldName, - byFieldValue, - correlatedByFieldValue, - 2, - function, - functionDescription, - TDouble1Vec(1, 10090.0), - TDouble1Vec(1, 6953.0), - 2.24, - 0.6, - 0.0, - 79, - fieldName, - influences, - false, - false, - 1, - 100); - - ml::api::CHierarchicalResultsWriter::SResults result212(false, - true, - partitionFieldName, - partitionFieldValue, - overFieldName, - overFieldValue, - byFieldName, - byFieldValue, - correlatedByFieldValue, - 2, - function, - functionDescription, - TDouble1Vec(1, 10090.0), - TDouble1Vec(1, 6953.0), - 2.24, - 0.6, - 0.0, - 79, - fieldName, - influences, - false, - false, - 1, - 100); - - ml::api::CHierarchicalResultsWriter::SResults result22(ml::api::CHierarchicalResultsWriter::E_Result, - partitionFieldName, - partitionFieldValue, - byFieldName, - byFieldValue, - correlatedByFieldValue, - 2, - function, - functionDescription, - 42.0, - 79, - TDouble1Vec(1, 6953.0), - TDouble1Vec(1, 10090.0), - 2.24, - 0.8, - 0.0, - fieldName, - influences, - false, - true, - 2, - 100, - EMPTY_STRING_LIST); - - ml::api::CHierarchicalResultsWriter::SResults result23(ml::api::CHierarchicalResultsWriter::E_SimpleCountResult, - partitionFieldName, - partitionFieldValue, - byFieldName, - byFieldValue, - correlatedByFieldValue, - 2, - function, - functionDescription, - 42.0, - 79, - TDouble1Vec(1, 6953.0), - TDouble1Vec(1, 10090.0), - 2.24, - 0.0, - 0.0, - fieldName, - influences, - false, - false, - 3, - 100, - EMPTY_STRING_LIST); - - ml::api::CHierarchicalResultsWriter::SResults result24(ml::api::CHierarchicalResultsWriter::E_Result, - partitionFieldName, - partitionFieldValue, - byFieldName, - byFieldValue, - correlatedByFieldValue, - 2, - function, - functionDescription, - 42.0, - 79, - TDouble1Vec(1, 6953.0), - TDouble1Vec(1, 10090.0), - 2.24, - 0.0, - 0.0, - fieldName, - influences, - false, - false, - 4, - 100, - EMPTY_STRING_LIST); + ml::api::CHierarchicalResultsWriter::SResults result21( + false, false, partitionFieldName, partitionFieldValue, + overFieldName, overFieldValue, byFieldName, byFieldValue, + correlatedByFieldValue, 2, function, functionDescription, + TDouble1Vec(1, 10090.0), TDouble1Vec(1, 6953.0), 2.24, 0.6, 0.0, + 79, fieldName, influences, false, false, 1, 100); + + ml::api::CHierarchicalResultsWriter::SResults result212( + false, true, partitionFieldName, partitionFieldValue, + overFieldName, overFieldValue, byFieldName, byFieldValue, + correlatedByFieldValue, 2, function, functionDescription, + TDouble1Vec(1, 10090.0), TDouble1Vec(1, 6953.0), 2.24, 0.6, 0.0, + 79, fieldName, influences, false, false, 1, 100); + + ml::api::CHierarchicalResultsWriter::SResults result22( + ml::api::CHierarchicalResultsWriter::E_Result, partitionFieldName, + partitionFieldValue, byFieldName, byFieldValue, + correlatedByFieldValue, 2, function, functionDescription, 42.0, 79, + TDouble1Vec(1, 6953.0), TDouble1Vec(1, 10090.0), 2.24, 0.8, 0.0, + fieldName, influences, false, true, 2, 100, EMPTY_STRING_LIST); + + ml::api::CHierarchicalResultsWriter::SResults result23( + ml::api::CHierarchicalResultsWriter::E_SimpleCountResult, + partitionFieldName, partitionFieldValue, byFieldName, byFieldValue, + correlatedByFieldValue, 2, function, functionDescription, 42.0, 79, + TDouble1Vec(1, 6953.0), TDouble1Vec(1, 10090.0), 2.24, 0.0, 0.0, + fieldName, influences, false, false, 3, 100, EMPTY_STRING_LIST); + + ml::api::CHierarchicalResultsWriter::SResults result24( + ml::api::CHierarchicalResultsWriter::E_Result, partitionFieldName, + partitionFieldValue, byFieldName, byFieldValue, + correlatedByFieldValue, 2, function, functionDescription, 42.0, 79, + TDouble1Vec(1, 6953.0), TDouble1Vec(1, 10090.0), 2.24, 0.0, 0.0, + fieldName, influences, false, false, 4, 100, EMPTY_STRING_LIST); // 2nd bucket CPPUNIT_ASSERT(writer.acceptResult(result21)); @@ -615,127 +446,40 @@ void CJsonOutputWriterTest::testBucketWriteHelper(bool isInterim) { } { - ml::api::CHierarchicalResultsWriter::SResults result31(false, - false, - partitionFieldName, - partitionFieldValue, - overFieldName, - overFieldValue, - byFieldName, - byFieldValue, - correlatedByFieldValue, - 3, - function, - functionDescription, - TDouble1Vec(1, 10090.0), - TDouble1Vec(1, 6953.0), - 2.24, - 0.8, - 0.0, - 79, - fieldName, - influences, - false, - false, - 1, - 100); - - ml::api::CHierarchicalResultsWriter::SResults result312(false, - true, - partitionFieldName, - partitionFieldValue, - overFieldName, - overFieldValue, - byFieldName, - byFieldValue, - correlatedByFieldValue, - 3, - function, - functionDescription, - TDouble1Vec(1, 10090.0), - TDouble1Vec(1, 6953.0), - 2.24, - 0.8, - 0.0, - 79, - fieldName, - influences, - false, - false, - 1, - 100); - - ml::api::CHierarchicalResultsWriter::SResults result32(ml::api::CHierarchicalResultsWriter::E_Result, - partitionFieldName, - partitionFieldValue, - byFieldName, - byFieldValue, - correlatedByFieldValue, - 3, - function, - functionDescription, - 42.0, - 79, - TDouble1Vec(1, 6953.0), - TDouble1Vec(1, 10090.0), - 2.24, - 0.0, - 0.0, - fieldName, - influences, - false, - true, - 2, - 100, - EMPTY_STRING_LIST); - - ml::api::CHierarchicalResultsWriter::SResults result33(ml::api::CHierarchicalResultsWriter::E_SimpleCountResult, - partitionFieldName, - partitionFieldValue, - byFieldName, - byFieldValue, - correlatedByFieldValue, - 3, - function, - functionDescription, - 42.0, - 79, - TDouble1Vec(1, 6953.0), - TDouble1Vec(1, 10090.0), - 2.24, - 0.0, - 0.0, - fieldName, - influences, - false, - false, - 3, - 100, - EMPTY_STRING_LIST); - - ml::api::CHierarchicalResultsWriter::SResults result34(ml::api::CHierarchicalResultsWriter::E_Result, - partitionFieldName, - partitionFieldValue, - byFieldName, - byFieldValue, - correlatedByFieldValue, - 3, - function, - functionDescription, - 42.0, - 79, - TDouble1Vec(1, 6953.0), - TDouble1Vec(1, 10090.0), - 2.24, - 0.0, - 0.0, - fieldName, - influences, - false, - false, - 4, - 100, - EMPTY_STRING_LIST); + ml::api::CHierarchicalResultsWriter::SResults result31( + false, false, partitionFieldName, partitionFieldValue, + overFieldName, overFieldValue, byFieldName, byFieldValue, + correlatedByFieldValue, 3, function, functionDescription, + TDouble1Vec(1, 10090.0), TDouble1Vec(1, 6953.0), 2.24, 0.8, 0.0, + 79, fieldName, influences, false, false, 1, 100); + + ml::api::CHierarchicalResultsWriter::SResults result312( + false, true, partitionFieldName, partitionFieldValue, + overFieldName, overFieldValue, byFieldName, byFieldValue, + correlatedByFieldValue, 3, function, functionDescription, + TDouble1Vec(1, 10090.0), TDouble1Vec(1, 6953.0), 2.24, 0.8, 0.0, + 79, fieldName, influences, false, false, 1, 100); + + ml::api::CHierarchicalResultsWriter::SResults result32( + ml::api::CHierarchicalResultsWriter::E_Result, partitionFieldName, + partitionFieldValue, byFieldName, byFieldValue, + correlatedByFieldValue, 3, function, functionDescription, 42.0, 79, + TDouble1Vec(1, 6953.0), TDouble1Vec(1, 10090.0), 2.24, 0.0, 0.0, + fieldName, influences, false, true, 2, 100, EMPTY_STRING_LIST); + + ml::api::CHierarchicalResultsWriter::SResults result33( + ml::api::CHierarchicalResultsWriter::E_SimpleCountResult, + partitionFieldName, partitionFieldValue, byFieldName, byFieldValue, + correlatedByFieldValue, 3, function, functionDescription, 42.0, 79, + TDouble1Vec(1, 6953.0), TDouble1Vec(1, 10090.0), 2.24, 0.0, 0.0, + fieldName, influences, false, false, 3, 100, EMPTY_STRING_LIST); + + ml::api::CHierarchicalResultsWriter::SResults result34( + ml::api::CHierarchicalResultsWriter::E_Result, partitionFieldName, + partitionFieldValue, byFieldName, byFieldValue, + correlatedByFieldValue, 3, function, functionDescription, 42.0, 79, + TDouble1Vec(1, 6953.0), TDouble1Vec(1, 10090.0), 2.24, 0.0, 0.0, + fieldName, influences, false, false, 4, 100, EMPTY_STRING_LIST); // 3rd bucket CPPUNIT_ASSERT(writer.acceptResult(result31)); @@ -779,7 +523,8 @@ void CJsonOutputWriterTest::testBucketWriteHelper(bool isInterim) { const rapidjson::Value& bucket = bucketWrapper["bucket"]; CPPUNIT_ASSERT(bucket.IsObject()); CPPUNIT_ASSERT(bucket.HasMember("job_id")); - CPPUNIT_ASSERT_EQUAL(std::string("job"), std::string(bucket["job_id"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("job"), + std::string(bucket["job_id"].GetString())); // 3 detectors each have 2 records (simple count detector isn't added) // except the population detector which has a single record and clauses @@ -788,13 +533,19 @@ void CJsonOutputWriterTest::testBucketWriteHelper(bool isInterim) { const rapidjson::Value& bucketInfluencers = bucket["bucket_influencers"]; CPPUNIT_ASSERT(bucketInfluencers.IsArray()); CPPUNIT_ASSERT_EQUAL(rapidjson::SizeType(1), bucketInfluencers.Size()); - const rapidjson::Value& bucketInfluencer = bucketInfluencers[rapidjson::SizeType(0)]; - CPPUNIT_ASSERT_DOUBLES_EQUAL(13.44, bucketInfluencer["raw_anomaly_score"].GetDouble(), 0.00001); + const rapidjson::Value& bucketInfluencer = + bucketInfluencers[rapidjson::SizeType(0)]; + CPPUNIT_ASSERT_DOUBLES_EQUAL( + 13.44, bucketInfluencer["raw_anomaly_score"].GetDouble(), 0.00001); CPPUNIT_ASSERT_DOUBLES_EQUAL(0.01, bucketInfluencer["probability"].GetDouble(), 0.00001); - CPPUNIT_ASSERT_DOUBLES_EQUAL(70.0, bucketInfluencer["initial_anomaly_score"].GetDouble(), 0.00001); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + 70.0, bucketInfluencer["initial_anomaly_score"].GetDouble(), 0.00001); CPPUNIT_ASSERT(bucketInfluencer.HasMember("anomaly_score")); - CPPUNIT_ASSERT_DOUBLES_EQUAL(70.0, bucketInfluencer["anomaly_score"].GetDouble(), 0.00001); - CPPUNIT_ASSERT_EQUAL(std::string("bucket_time"), std::string(bucketInfluencer["influencer_field_name"].GetString())); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + 70.0, bucketInfluencer["anomaly_score"].GetDouble(), 0.00001); + CPPUNIT_ASSERT_EQUAL( + std::string("bucket_time"), + std::string(bucketInfluencer["influencer_field_name"].GetString())); CPPUNIT_ASSERT_EQUAL(79, bucket["event_count"].GetInt()); CPPUNIT_ASSERT(bucket.HasMember("anomaly_score")); @@ -825,7 +576,8 @@ void CJsonOutputWriterTest::testBucketWriteHelper(bool isInterim) { { const rapidjson::Value& record = records[rapidjson::SizeType(0)]; CPPUNIT_ASSERT(record.HasMember("job_id")); - CPPUNIT_ASSERT_EQUAL(std::string("job"), std::string(record["job_id"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("job"), + std::string(record["job_id"].GetString())); CPPUNIT_ASSERT(record.HasMember("detector_index")); CPPUNIT_ASSERT_EQUAL(1, record["detector_index"].GetInt()); CPPUNIT_ASSERT(record.HasMember("timestamp")); @@ -833,17 +585,22 @@ void CJsonOutputWriterTest::testBucketWriteHelper(bool isInterim) { CPPUNIT_ASSERT(record.HasMember("probability")); CPPUNIT_ASSERT_EQUAL(0.0, record["probability"].GetDouble()); CPPUNIT_ASSERT(record.HasMember("by_field_name")); - CPPUNIT_ASSERT_EQUAL(std::string("airline"), std::string(record["by_field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("airline"), + std::string(record["by_field_name"].GetString())); CPPUNIT_ASSERT(!record.HasMember("by_field_value")); CPPUNIT_ASSERT(!record.HasMember("correlated_by_field_value")); CPPUNIT_ASSERT(record.HasMember("function")); - CPPUNIT_ASSERT_EQUAL(std::string("mean"), std::string(record["function"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("mean"), + std::string(record["function"].GetString())); CPPUNIT_ASSERT(record.HasMember("function_description")); - CPPUNIT_ASSERT_EQUAL(std::string("mean(responsetime)"), std::string(record["function_description"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("mean(responsetime)"), + std::string(record["function_description"].GetString())); CPPUNIT_ASSERT(record.HasMember("over_field_name")); - CPPUNIT_ASSERT_EQUAL(std::string("pfn"), std::string(record["over_field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("pfn"), + std::string(record["over_field_name"].GetString())); CPPUNIT_ASSERT(record.HasMember("over_field_value")); - CPPUNIT_ASSERT_EQUAL(std::string("pfv"), std::string(record["over_field_value"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("pfv"), + std::string(record["over_field_value"].GetString())); CPPUNIT_ASSERT(record.HasMember("bucket_span")); CPPUNIT_ASSERT_EQUAL(100, record["bucket_span"].GetInt()); // It's hard to predict what these will be, so just assert their @@ -866,29 +623,40 @@ void CJsonOutputWriterTest::testBucketWriteHelper(bool isInterim) { CPPUNIT_ASSERT(cause.HasMember("probability")); CPPUNIT_ASSERT_EQUAL(0.0, cause["probability"].GetDouble()); CPPUNIT_ASSERT(cause.HasMember("field_name")); - CPPUNIT_ASSERT_EQUAL(std::string("responsetime"), std::string(cause["field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("responsetime"), + std::string(cause["field_name"].GetString())); CPPUNIT_ASSERT(cause.HasMember("by_field_name")); - CPPUNIT_ASSERT_EQUAL(std::string("airline"), std::string(cause["by_field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("airline"), + std::string(cause["by_field_name"].GetString())); CPPUNIT_ASSERT(cause.HasMember("by_field_value")); - CPPUNIT_ASSERT_EQUAL(std::string("GAL"), std::string(cause["by_field_value"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("GAL"), + std::string(cause["by_field_value"].GetString())); CPPUNIT_ASSERT(cause.HasMember("correlated_by_field_value")); - CPPUNIT_ASSERT_EQUAL(std::string("BAW"), std::string(cause["correlated_by_field_value"].GetString())); + CPPUNIT_ASSERT_EQUAL( + std::string("BAW"), + std::string(cause["correlated_by_field_value"].GetString())); CPPUNIT_ASSERT(cause.HasMember("partition_field_name")); - CPPUNIT_ASSERT_EQUAL(std::string("tfn"), std::string(cause["partition_field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("tfn"), + std::string(cause["partition_field_name"].GetString())); CPPUNIT_ASSERT(cause.HasMember("partition_field_value")); - CPPUNIT_ASSERT_EQUAL(std::string(""), std::string(cause["partition_field_value"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string(""), + std::string(cause["partition_field_value"].GetString())); CPPUNIT_ASSERT(cause.HasMember("function")); - CPPUNIT_ASSERT_EQUAL(std::string("mean"), std::string(cause["function"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("mean"), + std::string(cause["function"].GetString())); CPPUNIT_ASSERT(cause.HasMember("function_description")); - CPPUNIT_ASSERT_EQUAL(std::string("mean(responsetime)"), std::string(cause["function_description"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("mean(responsetime)"), + std::string(cause["function_description"].GetString())); CPPUNIT_ASSERT(cause.HasMember("typical")); CPPUNIT_ASSERT(cause["typical"].IsArray()); CPPUNIT_ASSERT_EQUAL(rapidjson::SizeType(1), cause["typical"].Size()); - CPPUNIT_ASSERT_EQUAL(6953.0, cause["typical"][rapidjson::SizeType(0)].GetDouble()); + CPPUNIT_ASSERT_EQUAL( + 6953.0, cause["typical"][rapidjson::SizeType(0)].GetDouble()); CPPUNIT_ASSERT(cause.HasMember("actual")); CPPUNIT_ASSERT(cause["actual"].IsArray()); CPPUNIT_ASSERT_EQUAL(rapidjson::SizeType(1), cause["actual"].Size()); - CPPUNIT_ASSERT_EQUAL(10090.0, cause["actual"][rapidjson::SizeType(0)].GetDouble()); + CPPUNIT_ASSERT_EQUAL( + 10090.0, cause["actual"][rapidjson::SizeType(0)].GetDouble()); CPPUNIT_ASSERT(cause.HasMember("function")); } } @@ -898,7 +666,8 @@ void CJsonOutputWriterTest::testBucketWriteHelper(bool isInterim) { for (rapidjson::SizeType k = 1; k < 3; k++) { const rapidjson::Value& record = records[k]; CPPUNIT_ASSERT(record.HasMember("job_id")); - CPPUNIT_ASSERT_EQUAL(std::string("job"), std::string(record["job_id"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("job"), + std::string(record["job_id"].GetString())); CPPUNIT_ASSERT(record.HasMember("detector_index")); CPPUNIT_ASSERT_EQUAL(2, record["detector_index"].GetInt()); CPPUNIT_ASSERT(record.HasMember("timestamp")); @@ -906,29 +675,41 @@ void CJsonOutputWriterTest::testBucketWriteHelper(bool isInterim) { CPPUNIT_ASSERT(record.HasMember("probability")); CPPUNIT_ASSERT_EQUAL(0.0, record["probability"].GetDouble()); CPPUNIT_ASSERT(record.HasMember("by_field_name")); - CPPUNIT_ASSERT_EQUAL(std::string("airline"), std::string(record["by_field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("airline"), + std::string(record["by_field_name"].GetString())); CPPUNIT_ASSERT(record.HasMember("by_field_value")); - CPPUNIT_ASSERT_EQUAL(std::string("GAL"), std::string(record["by_field_value"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("GAL"), + std::string(record["by_field_value"].GetString())); CPPUNIT_ASSERT(record.HasMember("correlated_by_field_value")); - CPPUNIT_ASSERT_EQUAL(std::string("BAW"), std::string(record["correlated_by_field_value"].GetString())); + CPPUNIT_ASSERT_EQUAL( + std::string("BAW"), + std::string(record["correlated_by_field_value"].GetString())); CPPUNIT_ASSERT(record.HasMember("typical")); CPPUNIT_ASSERT(record["typical"].IsArray()); CPPUNIT_ASSERT_EQUAL(rapidjson::SizeType(1), record["typical"].Size()); - CPPUNIT_ASSERT_EQUAL(6953.0, record["typical"][rapidjson::SizeType(0)].GetDouble()); + CPPUNIT_ASSERT_EQUAL( + 6953.0, record["typical"][rapidjson::SizeType(0)].GetDouble()); CPPUNIT_ASSERT(record.HasMember("actual")); CPPUNIT_ASSERT(record["actual"].IsArray()); CPPUNIT_ASSERT_EQUAL(rapidjson::SizeType(1), record["actual"].Size()); - CPPUNIT_ASSERT_EQUAL(10090.0, record["actual"][rapidjson::SizeType(0)].GetDouble()); + CPPUNIT_ASSERT_EQUAL( + 10090.0, record["actual"][rapidjson::SizeType(0)].GetDouble()); CPPUNIT_ASSERT(record.HasMember("field_name")); - CPPUNIT_ASSERT_EQUAL(std::string("responsetime"), std::string(record["field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("responsetime"), + std::string(record["field_name"].GetString())); CPPUNIT_ASSERT(record.HasMember("function")); - CPPUNIT_ASSERT_EQUAL(std::string("mean"), std::string(record["function"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("mean"), + std::string(record["function"].GetString())); CPPUNIT_ASSERT(record.HasMember("function_description")); - CPPUNIT_ASSERT_EQUAL(std::string("mean(responsetime)"), std::string(record["function_description"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("mean(responsetime)"), + std::string(record["function_description"].GetString())); CPPUNIT_ASSERT(record.HasMember("partition_field_name")); - CPPUNIT_ASSERT_EQUAL(std::string("tfn"), std::string(record["partition_field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("tfn"), + std::string(record["partition_field_name"].GetString())); CPPUNIT_ASSERT(record.HasMember("partition_field_value")); - CPPUNIT_ASSERT_EQUAL(std::string(""), std::string(record["partition_field_value"].GetString())); + CPPUNIT_ASSERT_EQUAL( + std::string(""), + std::string(record["partition_field_value"].GetString())); CPPUNIT_ASSERT(record.HasMember("bucket_span")); CPPUNIT_ASSERT_EQUAL(100, record["bucket_span"].GetInt()); // It's hard to predict what these will be, so just assert their @@ -949,7 +730,8 @@ void CJsonOutputWriterTest::testBucketWriteHelper(bool isInterim) { for (rapidjson::SizeType k = 3; k < 5; k++) { const rapidjson::Value& record = records[k]; CPPUNIT_ASSERT(record.HasMember("job_id")); - CPPUNIT_ASSERT_EQUAL(std::string("job"), std::string(record["job_id"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("job"), + std::string(record["job_id"].GetString())); CPPUNIT_ASSERT(record.HasMember("detector_index")); CPPUNIT_ASSERT_EQUAL(4, record["detector_index"].GetInt()); CPPUNIT_ASSERT(record.HasMember("timestamp")); @@ -957,28 +739,39 @@ void CJsonOutputWriterTest::testBucketWriteHelper(bool isInterim) { CPPUNIT_ASSERT(record.HasMember("probability")); CPPUNIT_ASSERT_EQUAL(0.0, record["probability"].GetDouble()); CPPUNIT_ASSERT(record.HasMember("by_field_name")); - CPPUNIT_ASSERT_EQUAL(std::string("airline"), std::string(record["by_field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("airline"), + std::string(record["by_field_name"].GetString())); CPPUNIT_ASSERT(record.HasMember("by_field_value")); - CPPUNIT_ASSERT_EQUAL(std::string("GAL"), std::string(record["by_field_value"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("GAL"), + std::string(record["by_field_value"].GetString())); CPPUNIT_ASSERT(record.HasMember("correlated_by_field_value")); - CPPUNIT_ASSERT_EQUAL(std::string("BAW"), std::string(record["correlated_by_field_value"].GetString())); + CPPUNIT_ASSERT_EQUAL( + std::string("BAW"), + std::string(record["correlated_by_field_value"].GetString())); CPPUNIT_ASSERT(record.HasMember("typical")); CPPUNIT_ASSERT(record["typical"].IsArray()); CPPUNIT_ASSERT_EQUAL(rapidjson::SizeType(1), record["typical"].Size()); - CPPUNIT_ASSERT_EQUAL(6953.0, record["typical"][rapidjson::SizeType(0)].GetDouble()); + CPPUNIT_ASSERT_EQUAL( + 6953.0, record["typical"][rapidjson::SizeType(0)].GetDouble()); CPPUNIT_ASSERT(record.HasMember("actual")); CPPUNIT_ASSERT(record["actual"].IsArray()); CPPUNIT_ASSERT_EQUAL(rapidjson::SizeType(1), record["actual"].Size()); - CPPUNIT_ASSERT_EQUAL(10090.0, record["actual"][rapidjson::SizeType(0)].GetDouble()); + CPPUNIT_ASSERT_EQUAL( + 10090.0, record["actual"][rapidjson::SizeType(0)].GetDouble()); CPPUNIT_ASSERT(record.HasMember("function")); // This would be count in the real case with properly generated input data - CPPUNIT_ASSERT_EQUAL(std::string("mean"), std::string(record["function"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("mean"), + std::string(record["function"].GetString())); CPPUNIT_ASSERT(record.HasMember("function_description")); - CPPUNIT_ASSERT_EQUAL(std::string("mean(responsetime)"), std::string(record["function_description"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("mean(responsetime)"), + std::string(record["function_description"].GetString())); CPPUNIT_ASSERT(record.HasMember("partition_field_name")); - CPPUNIT_ASSERT_EQUAL(std::string("tfn"), std::string(record["partition_field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("tfn"), + std::string(record["partition_field_name"].GetString())); CPPUNIT_ASSERT(record.HasMember("partition_field_value")); - CPPUNIT_ASSERT_EQUAL(std::string(""), std::string(record["partition_field_value"].GetString())); + CPPUNIT_ASSERT_EQUAL( + std::string(""), + std::string(record["partition_field_value"].GetString())); CPPUNIT_ASSERT(record.HasMember("bucket_span")); CPPUNIT_ASSERT_EQUAL(100, record["bucket_span"].GetInt()); // It's hard to predict what these will be, so just assert their @@ -1022,264 +815,82 @@ void CJsonOutputWriterTest::testLimitedRecordsWriteHelper(bool isInterim) { { // 1st bucket - ml::api::CHierarchicalResultsWriter::SResults result111(ml::api::CHierarchicalResultsWriter::E_Result, - partitionFieldName, - partitionFieldValue, - byFieldName, - byFieldValue, - emptyString, - 1, - function, - functionDescription, - 42.0, - 79, - TDouble1Vec(1, 6953.0), - TDouble1Vec(1, 10090.0), - 0.0, - 0.1, - 0.1, - fieldName, - influences, - false, - true, - 1, - 100, - EMPTY_STRING_LIST); + ml::api::CHierarchicalResultsWriter::SResults result111( + ml::api::CHierarchicalResultsWriter::E_Result, partitionFieldName, + partitionFieldValue, byFieldName, byFieldValue, emptyString, 1, + function, functionDescription, 42.0, 79, TDouble1Vec(1, 6953.0), + TDouble1Vec(1, 10090.0), 0.0, 0.1, 0.1, fieldName, influences, + false, true, 1, 100, EMPTY_STRING_LIST); CPPUNIT_ASSERT(writer.acceptResult(result111)); - ml::api::CHierarchicalResultsWriter::SResults result112(ml::api::CHierarchicalResultsWriter::E_Result, - partitionFieldName, - partitionFieldValue, - byFieldName, - byFieldValue, - emptyString, - 1, - function, - functionDescription, - 42.0, - 79, - TDouble1Vec(1, 6953.0), - TDouble1Vec(1, 10090.0), - 0.0, - 0.1, - 0.2, - fieldName, - influences, - false, - true, - 1, - 100, - EMPTY_STRING_LIST); + ml::api::CHierarchicalResultsWriter::SResults result112( + ml::api::CHierarchicalResultsWriter::E_Result, partitionFieldName, + partitionFieldValue, byFieldName, byFieldValue, emptyString, 1, + function, functionDescription, 42.0, 79, TDouble1Vec(1, 6953.0), + TDouble1Vec(1, 10090.0), 0.0, 0.1, 0.2, fieldName, influences, + false, true, 1, 100, EMPTY_STRING_LIST); CPPUNIT_ASSERT(writer.acceptResult(result112)); - ml::api::CHierarchicalResultsWriter::SResults result113(ml::api::CHierarchicalResultsWriter::E_Result, - partitionFieldName, - partitionFieldValue, - byFieldName, - byFieldValue, - emptyString, - 1, - function, - functionDescription, - 42.0, - 79, - TDouble1Vec(1, 6953.0), - TDouble1Vec(1, 10090.0), - 2.0, - 0.0, - 0.4, - fieldName, - influences, - false, - true, - 1, - 100, - EMPTY_STRING_LIST); + ml::api::CHierarchicalResultsWriter::SResults result113( + ml::api::CHierarchicalResultsWriter::E_Result, partitionFieldName, + partitionFieldValue, byFieldName, byFieldValue, emptyString, 1, + function, functionDescription, 42.0, 79, TDouble1Vec(1, 6953.0), + TDouble1Vec(1, 10090.0), 2.0, 0.0, 0.4, fieldName, influences, + false, true, 1, 100, EMPTY_STRING_LIST); CPPUNIT_ASSERT(writer.acceptResult(result113)); - ml::api::CHierarchicalResultsWriter::SResults result114(ml::api::CHierarchicalResultsWriter::E_Result, - partitionFieldName, - partitionFieldValue, - byFieldName, - byFieldValue, - emptyString, - 1, - function, - functionDescription, - 42.0, - 79, - TDouble1Vec(1, 6953.0), - TDouble1Vec(1, 10090.0), - 12.0, - 0.0, - 0.4, - fieldName, - influences, - false, - true, - 1, - 100, - EMPTY_STRING_LIST); + ml::api::CHierarchicalResultsWriter::SResults result114( + ml::api::CHierarchicalResultsWriter::E_Result, partitionFieldName, + partitionFieldValue, byFieldName, byFieldValue, emptyString, 1, + function, functionDescription, 42.0, 79, TDouble1Vec(1, 6953.0), + TDouble1Vec(1, 10090.0), 12.0, 0.0, 0.4, fieldName, influences, + false, true, 1, 100, EMPTY_STRING_LIST); CPPUNIT_ASSERT(writer.acceptResult(result114)); CPPUNIT_ASSERT(writer.acceptResult(result114)); overFieldName = "ofn"; overFieldValue = "ofv"; - ml::api::CHierarchicalResultsWriter::SResults result121(false, - false, - partitionFieldName, - partitionFieldValue, - overFieldName, - overFieldValue, - emptyString, - emptyString, - emptyString, - 1, - function, - functionDescription, - TDouble1Vec(1, 10090.0), - TDouble1Vec(1, 6953.0), - 12.0, - 0.0, - 0.01, - 79, - fieldName, - influences, - false, - true, - 2, - 100); + ml::api::CHierarchicalResultsWriter::SResults result121( + false, false, partitionFieldName, partitionFieldValue, overFieldName, + overFieldValue, emptyString, emptyString, emptyString, 1, function, + functionDescription, TDouble1Vec(1, 10090.0), TDouble1Vec(1, 6953.0), + 12.0, 0.0, 0.01, 79, fieldName, influences, false, true, 2, 100); CPPUNIT_ASSERT(writer.acceptResult(result121)); - ml::api::CHierarchicalResultsWriter::SResults result122(false, - true, - partitionFieldName, - partitionFieldValue, - overFieldName, - overFieldValue, - byFieldName, - byFieldValue, - emptyString, - 1, - function, - functionDescription, - TDouble1Vec(1, 10090.0), - TDouble1Vec(1, 6953.0), - 12.0, - 0.0, - 0.01, - 79, - fieldName, - influences, - false, - true, - 2, - 100); + ml::api::CHierarchicalResultsWriter::SResults result122( + false, true, partitionFieldName, partitionFieldValue, overFieldName, + overFieldValue, byFieldName, byFieldValue, emptyString, 1, function, + functionDescription, TDouble1Vec(1, 10090.0), TDouble1Vec(1, 6953.0), + 12.0, 0.0, 0.01, 79, fieldName, influences, false, true, 2, 100); CPPUNIT_ASSERT(writer.acceptResult(result122)); - ml::api::CHierarchicalResultsWriter::SResults result123(false, - false, - partitionFieldName, - partitionFieldValue, - overFieldName, - overFieldValue, - byFieldName, - byFieldValue, - emptyString, - 1, - function, - functionDescription, - TDouble1Vec(1, 10090.0), - TDouble1Vec(1, 6953.0), - 0.5, - 0.0, - 0.5, - 79, - fieldName, - influences, - false, - true, - 2, - 100); + ml::api::CHierarchicalResultsWriter::SResults result123( + false, false, partitionFieldName, partitionFieldValue, overFieldName, + overFieldValue, byFieldName, byFieldValue, emptyString, 1, function, + functionDescription, TDouble1Vec(1, 10090.0), TDouble1Vec(1, 6953.0), + 0.5, 0.0, 0.5, 79, fieldName, influences, false, true, 2, 100); CPPUNIT_ASSERT(writer.acceptResult(result123)); - ml::api::CHierarchicalResultsWriter::SResults result124(false, - true, - partitionFieldName, - partitionFieldValue, - overFieldName, - overFieldValue, - emptyString, - emptyString, - emptyString, - 1, - function, - functionDescription, - TDouble1Vec(1, 10090.0), - TDouble1Vec(1, 6953.0), - 0.5, - 0.0, - 0.5, - 79, - fieldName, - influences, - false, - true, - 2, - 100); + ml::api::CHierarchicalResultsWriter::SResults result124( + false, true, partitionFieldName, partitionFieldValue, overFieldName, + overFieldValue, emptyString, emptyString, emptyString, 1, function, + functionDescription, TDouble1Vec(1, 10090.0), TDouble1Vec(1, 6953.0), + 0.5, 0.0, 0.5, 79, fieldName, influences, false, true, 2, 100); CPPUNIT_ASSERT(writer.acceptResult(result124)); - ml::api::CHierarchicalResultsWriter::SResults result125(false, - false, - partitionFieldName, - partitionFieldValue, - overFieldName, - overFieldValue, - byFieldName, - byFieldValue, - emptyString, - 1, - function, - functionDescription, - TDouble1Vec(1, 10090.0), - TDouble1Vec(1, 6953.0), - 6.0, - 0.0, - 0.5, - 79, - fieldName, - influences, - false, - true, - 2, - 100); + ml::api::CHierarchicalResultsWriter::SResults result125( + false, false, partitionFieldName, partitionFieldValue, overFieldName, + overFieldValue, byFieldName, byFieldValue, emptyString, 1, function, + functionDescription, TDouble1Vec(1, 10090.0), TDouble1Vec(1, 6953.0), + 6.0, 0.0, 0.5, 79, fieldName, influences, false, true, 2, 100); CPPUNIT_ASSERT(writer.acceptResult(result125)); - ml::api::CHierarchicalResultsWriter::SResults result126(false, - true, - partitionFieldName, - partitionFieldValue, - overFieldName, - overFieldValue, - emptyString, - emptyString, - emptyString, - 1, - function, - functionDescription, - TDouble1Vec(1, 10090.0), - TDouble1Vec(1, 6953.0), - 6.0, - 0.0, - 0.05, - 79, - fieldName, - influences, - false, - true, - 2, - 100); + ml::api::CHierarchicalResultsWriter::SResults result126( + false, true, partitionFieldName, partitionFieldValue, overFieldName, + overFieldValue, emptyString, emptyString, emptyString, 1, function, + functionDescription, TDouble1Vec(1, 10090.0), TDouble1Vec(1, 6953.0), + 6.0, 0.0, 0.05, 79, fieldName, influences, false, true, 2, 100); CPPUNIT_ASSERT(writer.acceptResult(result126)); } @@ -1288,188 +899,61 @@ void CJsonOutputWriterTest::testLimitedRecordsWriteHelper(bool isInterim) { overFieldName.clear(); overFieldValue.clear(); - ml::api::CHierarchicalResultsWriter::SResults result211(ml::api::CHierarchicalResultsWriter::E_Result, - partitionFieldName, - partitionFieldValue, - byFieldName, - byFieldValue, - emptyString, - 2, - function, - functionDescription, - 42.0, - 79, - TDouble1Vec(1, 6953.0), - TDouble1Vec(1, 10090.0), - 1.0, - 0.0, - 0.05, - fieldName, - influences, - false, - true, - 1, - 100, - EMPTY_STRING_LIST); + ml::api::CHierarchicalResultsWriter::SResults result211( + ml::api::CHierarchicalResultsWriter::E_Result, partitionFieldName, + partitionFieldValue, byFieldName, byFieldValue, emptyString, 2, + function, functionDescription, 42.0, 79, TDouble1Vec(1, 6953.0), + TDouble1Vec(1, 10090.0), 1.0, 0.0, 0.05, fieldName, influences, + false, true, 1, 100, EMPTY_STRING_LIST); CPPUNIT_ASSERT(writer.acceptResult(result211)); - ml::api::CHierarchicalResultsWriter::SResults result212(ml::api::CHierarchicalResultsWriter::E_Result, - partitionFieldName, - partitionFieldValue, - byFieldName, - byFieldValue, - emptyString, - 2, - function, - functionDescription, - 42.0, - 79, - TDouble1Vec(1, 6953.0), - TDouble1Vec(1, 10090.0), - 7.0, - 0.0, - 0.001, - fieldName, - influences, - false, - true, - 1, - 100, - EMPTY_STRING_LIST); + ml::api::CHierarchicalResultsWriter::SResults result212( + ml::api::CHierarchicalResultsWriter::E_Result, partitionFieldName, + partitionFieldValue, byFieldName, byFieldValue, emptyString, 2, + function, functionDescription, 42.0, 79, TDouble1Vec(1, 6953.0), + TDouble1Vec(1, 10090.0), 7.0, 0.0, 0.001, fieldName, influences, + false, true, 1, 100, EMPTY_STRING_LIST); CPPUNIT_ASSERT(writer.acceptResult(result212)); - ml::api::CHierarchicalResultsWriter::SResults result213(ml::api::CHierarchicalResultsWriter::E_Result, - partitionFieldName, - partitionFieldValue, - byFieldName, - byFieldValue, - emptyString, - 2, - function, - functionDescription, - 42.0, - 79, - TDouble1Vec(1, 6953.0), - TDouble1Vec(1, 10090.0), - 0.6, - 0.0, - 0.1, - fieldName, - influences, - false, - true, - 1, - 100, - EMPTY_STRING_LIST); + ml::api::CHierarchicalResultsWriter::SResults result213( + ml::api::CHierarchicalResultsWriter::E_Result, partitionFieldName, + partitionFieldValue, byFieldName, byFieldValue, emptyString, 2, + function, functionDescription, 42.0, 79, TDouble1Vec(1, 6953.0), + TDouble1Vec(1, 10090.0), 0.6, 0.0, 0.1, fieldName, influences, + false, true, 1, 100, EMPTY_STRING_LIST); CPPUNIT_ASSERT(writer.acceptResult(result213)); CPPUNIT_ASSERT(writer.acceptResult(result213)); overFieldName = "ofn"; overFieldValue = "ofv"; - ml::api::CHierarchicalResultsWriter::SResults result221(false, - false, - partitionFieldName, - partitionFieldValue, - overFieldName, - overFieldValue, - byFieldName, - byFieldValue, - emptyString, - 2, - function, - functionDescription, - TDouble1Vec(1, 10090.0), - TDouble1Vec(1, 6953.0), - 0.6, - 0.0, - 0.1, - 79, - fieldName, - influences, - false, - true, - 2, - 100); + ml::api::CHierarchicalResultsWriter::SResults result221( + false, false, partitionFieldName, partitionFieldValue, overFieldName, + overFieldValue, byFieldName, byFieldValue, emptyString, 2, function, + functionDescription, TDouble1Vec(1, 10090.0), TDouble1Vec(1, 6953.0), + 0.6, 0.0, 0.1, 79, fieldName, influences, false, true, 2, 100); CPPUNIT_ASSERT(writer.acceptResult(result221)); CPPUNIT_ASSERT(writer.acceptResult(result221)); - ml::api::CHierarchicalResultsWriter::SResults result222(false, - false, - partitionFieldName, - partitionFieldValue, - overFieldName, - overFieldValue, - emptyString, - emptyString, - emptyString, - 2, - function, - functionDescription, - TDouble1Vec(1, 10090.0), - TDouble1Vec(1, 6953.0), - 0.6, - 0.0, - 0.1, - 79, - fieldName, - influences, - false, - true, - 2, - 100); + ml::api::CHierarchicalResultsWriter::SResults result222( + false, false, partitionFieldName, partitionFieldValue, overFieldName, + overFieldValue, emptyString, emptyString, emptyString, 2, function, + functionDescription, TDouble1Vec(1, 10090.0), TDouble1Vec(1, 6953.0), + 0.6, 0.0, 0.1, 79, fieldName, influences, false, true, 2, 100); CPPUNIT_ASSERT(writer.acceptResult(result222)); - ml::api::CHierarchicalResultsWriter::SResults result223(false, - false, - partitionFieldName, - partitionFieldValue, - overFieldName, - overFieldValue, - byFieldName, - byFieldValue, - emptyString, - 2, - function, - functionDescription, - TDouble1Vec(1, 10090.0), - TDouble1Vec(1, 6953.0), - 3.0, - 0.0, - 0.02, - 79, - fieldName, - influences, - false, - true, - 2, - 100); + ml::api::CHierarchicalResultsWriter::SResults result223( + false, false, partitionFieldName, partitionFieldValue, overFieldName, + overFieldValue, byFieldName, byFieldValue, emptyString, 2, function, + functionDescription, TDouble1Vec(1, 10090.0), TDouble1Vec(1, 6953.0), + 3.0, 0.0, 0.02, 79, fieldName, influences, false, true, 2, 100); CPPUNIT_ASSERT(writer.acceptResult(result223)); - ml::api::CHierarchicalResultsWriter::SResults result224(false, - true, - partitionFieldName, - partitionFieldValue, - overFieldName, - overFieldValue, - emptyString, - emptyString, - emptyString, - 2, - function, - functionDescription, - TDouble1Vec(1, 10090.0), - TDouble1Vec(1, 6953.0), - 20.0, - 0.0, - 0.02, - 79, - fieldName, - influences, - false, - true, - 2, - 100); + ml::api::CHierarchicalResultsWriter::SResults result224( + false, true, partitionFieldName, partitionFieldValue, overFieldName, + overFieldValue, emptyString, emptyString, emptyString, 2, function, + functionDescription, TDouble1Vec(1, 10090.0), TDouble1Vec(1, 6953.0), + 20.0, 0.0, 0.02, 79, fieldName, influences, false, true, 2, 100); CPPUNIT_ASSERT(writer.acceptResult(result224)); } @@ -1478,84 +962,29 @@ void CJsonOutputWriterTest::testLimitedRecordsWriteHelper(bool isInterim) { overFieldName.clear(); overFieldValue.clear(); - ml::api::CHierarchicalResultsWriter::SResults result311(ml::api::CHierarchicalResultsWriter::E_Result, - partitionFieldName, - partitionFieldValue, - byFieldName, - byFieldValue, - emptyString, - 3, - function, - functionDescription, - 42.0, - 79, - TDouble1Vec(1, 6953.0), - TDouble1Vec(1, 10090.0), - 30.0, - 0.0, - 0.02, - fieldName, - influences, - false, - true, - 1, - 100, - EMPTY_STRING_LIST); + ml::api::CHierarchicalResultsWriter::SResults result311( + ml::api::CHierarchicalResultsWriter::E_Result, partitionFieldName, + partitionFieldValue, byFieldName, byFieldValue, emptyString, 3, + function, functionDescription, 42.0, 79, TDouble1Vec(1, 6953.0), + TDouble1Vec(1, 10090.0), 30.0, 0.0, 0.02, fieldName, influences, + false, true, 1, 100, EMPTY_STRING_LIST); CPPUNIT_ASSERT(writer.acceptResult(result311)); overFieldName = "ofn"; overFieldValue = "ofv"; - ml::api::CHierarchicalResultsWriter::SResults result321(false, - false, - partitionFieldName, - partitionFieldValue, - overFieldName, - overFieldValue, - byFieldName, - byFieldValue, - emptyString, - 3, - function, - functionDescription, - TDouble1Vec(1, 10090.0), - TDouble1Vec(1, 6953.0), - 31.0, - 0.0, - 0.0002, - 79, - fieldName, - influences, - false, - true, - 2, - 100); + ml::api::CHierarchicalResultsWriter::SResults result321( + false, false, partitionFieldName, partitionFieldValue, overFieldName, + overFieldValue, byFieldName, byFieldValue, emptyString, 3, function, + functionDescription, TDouble1Vec(1, 10090.0), TDouble1Vec(1, 6953.0), + 31.0, 0.0, 0.0002, 79, fieldName, influences, false, true, 2, 100); CPPUNIT_ASSERT(writer.acceptResult(result321)); - ml::api::CHierarchicalResultsWriter::SResults result322(false, - true, - partitionFieldName, - partitionFieldValue, - overFieldName, - overFieldValue, - emptyString, - emptyString, - emptyString, - 3, - function, - functionDescription, - TDouble1Vec(1, 10090.0), - TDouble1Vec(1, 6953.0), - 31.0, - 0.0, - 0.0002, - 79, - fieldName, - influences, - false, - true, - 2, - 100); + ml::api::CHierarchicalResultsWriter::SResults result322( + false, true, partitionFieldName, partitionFieldValue, overFieldName, + overFieldValue, emptyString, emptyString, emptyString, 3, function, + functionDescription, TDouble1Vec(1, 10090.0), TDouble1Vec(1, 6953.0), + 31.0, 0.0, 0.0002, 79, fieldName, influences, false, true, 2, 100); CPPUNIT_ASSERT(writer.acceptResult(result322)); } @@ -1611,7 +1040,8 @@ void CJsonOutputWriterTest::testLimitedRecordsWriteHelper(bool isInterim) { CPPUNIT_ASSERT(records[i].HasMember("initial_record_score")); CPPUNIT_ASSERT(records[i].HasMember("record_score")); CPPUNIT_ASSERT(records[i].HasMember("probability")); - CPPUNIT_ASSERT_EQUAL(EXPECTED_PROBABILITIES[probIndex], records[i]["probability"].GetDouble()); + CPPUNIT_ASSERT_EQUAL(EXPECTED_PROBABILITIES[probIndex], + records[i]["probability"].GetDouble()); ++probIndex; if (isInterim) { @@ -1701,7 +1131,10 @@ void CJsonOutputWriterTest::testLimitedRecordsWriteHelper(bool isInterim) { } ml::model::CHierarchicalResults::TNode -createInfluencerNode(const std::string& personName, const std::string& personValue, double probability, double normalisedAnomalyScore) { +createInfluencerNode(const std::string& personName, + const std::string& personValue, + double probability, + double normalisedAnomalyScore) { ml::model::CHierarchicalResults::TResultSpec spec; spec.s_PersonFieldName = ml::model::CStringStore::names().get(personName); spec.s_PersonFieldValue = ml::model::CStringStore::names().get(personValue); @@ -1715,7 +1148,10 @@ createInfluencerNode(const std::string& personName, const std::string& personVal } ml::model::CHierarchicalResults::TNode -createBucketInfluencerNode(const std::string& personName, double probability, double normalisedAnomalyScore, double rawAnomalyScore) { +createBucketInfluencerNode(const std::string& personName, + double probability, + double normalisedAnomalyScore, + double rawAnomalyScore) { ml::model::CHierarchicalResults::TResultSpec spec; spec.s_PersonFieldName = ml::model::CStringStore::names().get(personName); @@ -1736,8 +1172,10 @@ void CJsonOutputWriterTest::testWriteInfluencers() { std::string daisy("daisy"); std::string jim("jim"); - ml::model::CHierarchicalResults::TNode node1 = createInfluencerNode(user, daisy, 0.5, 10.0); - ml::model::CHierarchicalResults::TNode node2 = createInfluencerNode(user, jim, 0.9, 100.0); + ml::model::CHierarchicalResults::TNode node1 = + createInfluencerNode(user, daisy, 0.5, 10.0); + ml::model::CHierarchicalResults::TNode node2 = + createInfluencerNode(user, jim, 0.9, 100.0); ml::core::CJsonOutputStreamWrapper outputStream(sstream); ml::api::CJsonOutputWriter writer("job", outputStream); @@ -1766,24 +1204,31 @@ void CJsonOutputWriterTest::testWriteInfluencers() { const rapidjson::Value& influencer = influencers[rapidjson::SizeType(0)]; CPPUNIT_ASSERT(influencer.HasMember("job_id")); - CPPUNIT_ASSERT_EQUAL(std::string("job"), std::string(influencer["job_id"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("job"), + std::string(influencer["job_id"].GetString())); CPPUNIT_ASSERT_DOUBLES_EQUAL(0.5, influencer["probability"].GetDouble(), 0.001); - CPPUNIT_ASSERT_DOUBLES_EQUAL(10.0, influencer["initial_influencer_score"].GetDouble(), 0.001); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + 10.0, influencer["initial_influencer_score"].GetDouble(), 0.001); CPPUNIT_ASSERT(influencer.HasMember("influencer_score")); CPPUNIT_ASSERT_DOUBLES_EQUAL(10.0, influencer["influencer_score"].GetDouble(), 0.001); - CPPUNIT_ASSERT_EQUAL(std::string("user"), std::string(influencer["influencer_field_name"].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string("daisy"), std::string(influencer["influencer_field_value"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("user"), + std::string(influencer["influencer_field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("daisy"), + std::string(influencer["influencer_field_value"].GetString())); CPPUNIT_ASSERT_EQUAL(42000, influencer["timestamp"].GetInt()); CPPUNIT_ASSERT(influencer["is_interim"].GetBool()); CPPUNIT_ASSERT(influencer.HasMember("bucket_span")); const rapidjson::Value& influencer2 = influencers[rapidjson::SizeType(1)]; CPPUNIT_ASSERT_DOUBLES_EQUAL(0.9, influencer2["probability"].GetDouble(), 0.001); - CPPUNIT_ASSERT_DOUBLES_EQUAL(100.0, influencer2["initial_influencer_score"].GetDouble(), 0.001); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + 100.0, influencer2["initial_influencer_score"].GetDouble(), 0.001); CPPUNIT_ASSERT(influencer2.HasMember("influencer_score")); CPPUNIT_ASSERT_DOUBLES_EQUAL(100.0, influencer2["influencer_score"].GetDouble(), 0.001); - CPPUNIT_ASSERT_EQUAL(std::string("user"), std::string(influencer2["influencer_field_name"].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string("jim"), std::string(influencer2["influencer_field_value"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("user"), + std::string(influencer2["influencer_field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("jim"), + std::string(influencer2["influencer_field_value"].GetString())); CPPUNIT_ASSERT_EQUAL(42000, influencer2["timestamp"].GetInt()); CPPUNIT_ASSERT(influencer2["is_interim"].GetBool()); CPPUNIT_ASSERT(influencer2.HasMember("bucket_span")); @@ -1804,14 +1249,21 @@ void CJsonOutputWriterTest::testWriteInfluencersWithLimit() { std::string bob("bob"); std::string laptop("laptop"); - ml::model::CHierarchicalResults::TNode node1 = createInfluencerNode(user, daisy, 0.5, 10.0); - ml::model::CHierarchicalResults::TNode node2 = createInfluencerNode(user, jim, 0.9, 100.0); - ml::model::CHierarchicalResults::TNode node3 = createInfluencerNode(user, bob, 0.3, 9.0); - ml::model::CHierarchicalResults::TNode node4 = createInfluencerNode(computer, laptop, 0.3, 12.0); - - ml::model::CHierarchicalResults::TNode bnode1 = createBucketInfluencerNode(user, 0.5, 10.0, 1.0); - ml::model::CHierarchicalResults::TNode bnode2 = createBucketInfluencerNode(computer, 0.9, 100.0, 10.0); - ml::model::CHierarchicalResults::TNode bnode3 = createBucketInfluencerNode(monitor, 0.3, 9.0, 0.9); + ml::model::CHierarchicalResults::TNode node1 = + createInfluencerNode(user, daisy, 0.5, 10.0); + ml::model::CHierarchicalResults::TNode node2 = + createInfluencerNode(user, jim, 0.9, 100.0); + ml::model::CHierarchicalResults::TNode node3 = + createInfluencerNode(user, bob, 0.3, 9.0); + ml::model::CHierarchicalResults::TNode node4 = + createInfluencerNode(computer, laptop, 0.3, 12.0); + + ml::model::CHierarchicalResults::TNode bnode1 = + createBucketInfluencerNode(user, 0.5, 10.0, 1.0); + ml::model::CHierarchicalResults::TNode bnode2 = + createBucketInfluencerNode(computer, 0.9, 100.0, 10.0); + ml::model::CHierarchicalResults::TNode bnode3 = + createBucketInfluencerNode(monitor, 0.3, 9.0, 0.9); ml::core::CJsonOutputStreamWrapper outputStream(sstream); ml::api::CJsonOutputWriter writer("job", outputStream); @@ -1836,29 +1288,10 @@ void CJsonOutputWriterTest::testWriteInfluencersWithLimit() { std::string fn("field_name"); std::string emptyStr; ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPrDoublePrVec influences; - ml::api::CHierarchicalResultsWriter::SResults result(ml::api::CHierarchicalResultsWriter::E_Result, - pfn, - pfv, - bfn, - bfv, - emptyStr, - 0, - fun, - fund, - 42.0, - 79, - TDouble1Vec(1, 6953.0), - TDouble1Vec(1, 10090.0), - 0.0, - 0.1, - 0.1, - fn, - influences, - false, - true, - 1, - 100, - EMPTY_STRING_LIST); + ml::api::CHierarchicalResultsWriter::SResults result( + ml::api::CHierarchicalResultsWriter::E_Result, pfn, pfv, bfn, bfv, emptyStr, + 0, fun, fund, 42.0, 79, TDouble1Vec(1, 6953.0), TDouble1Vec(1, 10090.0), + 0.0, 0.1, 0.1, fn, influences, false, true, 1, 100, EMPTY_STRING_LIST); CPPUNIT_ASSERT(writer.acceptResult(result)); @@ -1884,20 +1317,26 @@ void CJsonOutputWriterTest::testWriteInfluencersWithLimit() { const rapidjson::Value& influencer = influencers[rapidjson::SizeType(0)]; CPPUNIT_ASSERT_DOUBLES_EQUAL(0.9, influencer["probability"].GetDouble(), 0.001); - CPPUNIT_ASSERT_DOUBLES_EQUAL(100.0, influencer["initial_influencer_score"].GetDouble(), 0.001); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + 100.0, influencer["initial_influencer_score"].GetDouble(), 0.001); CPPUNIT_ASSERT(influencer.HasMember("influencer_score")); CPPUNIT_ASSERT_DOUBLES_EQUAL(100.0, influencer["influencer_score"].GetDouble(), 0.001); - CPPUNIT_ASSERT_EQUAL(std::string("user"), std::string(influencer["influencer_field_name"].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string("jim"), std::string(influencer["influencer_field_value"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("user"), + std::string(influencer["influencer_field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("jim"), + std::string(influencer["influencer_field_value"].GetString())); CPPUNIT_ASSERT(influencer.HasMember("bucket_span")); const rapidjson::Value& influencer2 = influencers[rapidjson::SizeType(1)]; CPPUNIT_ASSERT_DOUBLES_EQUAL(0.3, influencer2["probability"].GetDouble(), 0.001); - CPPUNIT_ASSERT_DOUBLES_EQUAL(12.0, influencer2["initial_influencer_score"].GetDouble(), 0.001); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + 12.0, influencer2["initial_influencer_score"].GetDouble(), 0.001); CPPUNIT_ASSERT(influencer2.HasMember("influencer_score")); CPPUNIT_ASSERT_DOUBLES_EQUAL(12.0, influencer2["influencer_score"].GetDouble(), 0.001); - CPPUNIT_ASSERT_EQUAL(std::string("computer"), std::string(influencer2["influencer_field_name"].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string("laptop"), std::string(influencer2["influencer_field_value"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("computer"), + std::string(influencer2["influencer_field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("laptop"), + std::string(influencer2["influencer_field_value"].GetString())); CPPUNIT_ASSERT(influencer2.HasMember("bucket_span")); // bucket influencers @@ -1912,7 +1351,8 @@ void CJsonOutputWriterTest::testWriteInfluencersWithLimit() { CPPUNIT_ASSERT_DOUBLES_EQUAL(100.0, binf["initial_anomaly_score"].GetDouble(), 0.001); CPPUNIT_ASSERT(binf.HasMember("anomaly_score")); CPPUNIT_ASSERT_DOUBLES_EQUAL(100.0, binf["anomaly_score"].GetDouble(), 0.001); - CPPUNIT_ASSERT_EQUAL(std::string("computer"), std::string(binf["influencer_field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("computer"), + std::string(binf["influencer_field_name"].GetString())); CPPUNIT_ASSERT_DOUBLES_EQUAL(10.0, binf["raw_anomaly_score"].GetDouble(), 0.001); const rapidjson::Value& binf2 = bucketInfluencers[rapidjson::SizeType(1)]; @@ -1920,7 +1360,8 @@ void CJsonOutputWriterTest::testWriteInfluencersWithLimit() { CPPUNIT_ASSERT_DOUBLES_EQUAL(10.0, binf2["initial_anomaly_score"].GetDouble(), 0.001); CPPUNIT_ASSERT(binf2.HasMember("anomaly_score")); CPPUNIT_ASSERT_DOUBLES_EQUAL(10.0, binf2["anomaly_score"].GetDouble(), 0.001); - CPPUNIT_ASSERT_EQUAL(std::string("user"), std::string(binf2["influencer_field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("user"), + std::string(binf2["influencer_field_name"].GetString())); CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, binf2["raw_anomaly_score"].GetDouble(), 0.001); const rapidjson::Value& binf3 = bucketInfluencers[rapidjson::SizeType(2)]; @@ -1928,7 +1369,8 @@ void CJsonOutputWriterTest::testWriteInfluencersWithLimit() { CPPUNIT_ASSERT_DOUBLES_EQUAL(10.0, binf3["initial_anomaly_score"].GetDouble(), 0.001); CPPUNIT_ASSERT(binf3.HasMember("anomaly_score")); CPPUNIT_ASSERT_DOUBLES_EQUAL(10.0, binf3["anomaly_score"].GetDouble(), 0.001); - CPPUNIT_ASSERT_EQUAL(std::string("bucket_time"), std::string(binf3["influencer_field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("bucket_time"), + std::string(binf3["influencer_field_name"].GetString())); CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, binf3["raw_anomaly_score"].GetDouble(), 0.001); } @@ -1957,53 +1399,46 @@ void CJsonOutputWriterTest::testWriteWithInfluences() { std::string webserver("web-server"); ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPr field1 = - ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPr(ml::model::CStringStore::names().get(user), - ml::model::CStringStore::names().get(dave)); + ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPr( + ml::model::CStringStore::names().get(user), + ml::model::CStringStore::names().get(dave)); ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPr field2 = - ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPr(ml::model::CStringStore::names().get(user), - ml::model::CStringStore::names().get(cat)); + ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPr( + ml::model::CStringStore::names().get(user), + ml::model::CStringStore::names().get(cat)); ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPr field3 = - ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPr(ml::model::CStringStore::names().get(user), - ml::model::CStringStore::names().get(jo)); + ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPr( + ml::model::CStringStore::names().get(user), + ml::model::CStringStore::names().get(jo)); ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPr hostField1 = - ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPr(ml::model::CStringStore::names().get(host), - ml::model::CStringStore::names().get(localhost)); + ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPr( + ml::model::CStringStore::names().get(host), + ml::model::CStringStore::names().get(localhost)); ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPr hostField2 = - ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPr(ml::model::CStringStore::names().get(host), - ml::model::CStringStore::names().get(webserver)); - - influences.push_back(ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPrDoublePr(field1, 0.4)); - influences.push_back(ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPrDoublePr(field2, 1.0)); - influences.push_back(ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPrDoublePr(hostField1, 0.7)); - influences.push_back(ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPrDoublePr(field3, 0.1)); - influences.push_back(ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPrDoublePr(hostField2, 0.8)); + ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPr( + ml::model::CStringStore::names().get(host), + ml::model::CStringStore::names().get(webserver)); + + influences.push_back(ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPrDoublePr( + field1, 0.4)); + influences.push_back(ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPrDoublePr( + field2, 1.0)); + influences.push_back(ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPrDoublePr( + hostField1, 0.7)); + influences.push_back(ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPrDoublePr( + field3, 0.1)); + influences.push_back(ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPrDoublePr( + hostField2, 0.8)); // The output writer won't close the JSON structures until is is destroyed - ml::api::CHierarchicalResultsWriter::SResults result(ml::api::CHierarchicalResultsWriter::E_Result, - partitionFieldName, - partitionFieldValue, - byFieldName, - byFieldValue, - emptyString, - 1, - function, - functionDescription, - 42.0, - 79, - TDouble1Vec(1, 6953.0), - TDouble1Vec(1, 10090.0), - 0.0, - 0.1, - 0.1, - fieldName, - influences, - false, - true, - 1, - 100, - EMPTY_STRING_LIST); + ml::api::CHierarchicalResultsWriter::SResults result( + ml::api::CHierarchicalResultsWriter::E_Result, partitionFieldName, + partitionFieldValue, byFieldName, byFieldValue, emptyString, 1, + function, functionDescription, 42.0, 79, TDouble1Vec(1, 6953.0), + TDouble1Vec(1, 10090.0), 0.0, 0.1, 0.1, fieldName, influences, + false, true, 1, 100, EMPTY_STRING_LIST); ml::core::CJsonOutputStreamWrapper outputStream(sstream); ml::api::CJsonOutputWriter writer("job", outputStream); @@ -2042,34 +1477,47 @@ void CJsonOutputWriterTest::testWriteWithInfluences() { { const rapidjson::Value& influence = influences[rapidjson::SizeType(0)]; CPPUNIT_ASSERT(influence.HasMember("influencer_field_name")); - CPPUNIT_ASSERT_EQUAL(std::string("host"), std::string(influence["influencer_field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("host"), + std::string(influence["influencer_field_name"].GetString())); CPPUNIT_ASSERT(influence.HasMember("influencer_field_values")); const rapidjson::Value& influencerFieldValues = influence["influencer_field_values"]; CPPUNIT_ASSERT(influencerFieldValues.IsArray()); CPPUNIT_ASSERT_EQUAL(rapidjson::SizeType(2), influencerFieldValues.Size()); // Check influencers are ordered - CPPUNIT_ASSERT_EQUAL(std::string("web-server"), std::string(influencerFieldValues[rapidjson::SizeType(0)].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string("localhost"), std::string(influencerFieldValues[rapidjson::SizeType(1)].GetString())); + CPPUNIT_ASSERT_EQUAL( + std::string("web-server"), + std::string(influencerFieldValues[rapidjson::SizeType(0)].GetString())); + CPPUNIT_ASSERT_EQUAL( + std::string("localhost"), + std::string(influencerFieldValues[rapidjson::SizeType(1)].GetString())); } { const rapidjson::Value& influence = influences[rapidjson::SizeType(1)]; CPPUNIT_ASSERT(influence.HasMember("influencer_field_name")); - CPPUNIT_ASSERT_EQUAL(std::string("user"), std::string(influence["influencer_field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("user"), + std::string(influence["influencer_field_name"].GetString())); CPPUNIT_ASSERT(influence.HasMember("influencer_field_values")); const rapidjson::Value& influencerFieldValues = influence["influencer_field_values"]; CPPUNIT_ASSERT(influencerFieldValues.IsArray()); CPPUNIT_ASSERT_EQUAL(rapidjson::SizeType(3), influencerFieldValues.Size()); // Check influencers are ordered - CPPUNIT_ASSERT_EQUAL(std::string("cat"), std::string(influencerFieldValues[rapidjson::SizeType(0)].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string("dave"), std::string(influencerFieldValues[rapidjson::SizeType(1)].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string("jo"), std::string(influencerFieldValues[rapidjson::SizeType(2)].GetString())); + CPPUNIT_ASSERT_EQUAL( + std::string("cat"), + std::string(influencerFieldValues[rapidjson::SizeType(0)].GetString())); + CPPUNIT_ASSERT_EQUAL( + std::string("dave"), + std::string(influencerFieldValues[rapidjson::SizeType(1)].GetString())); + CPPUNIT_ASSERT_EQUAL( + std::string("jo"), + std::string(influencerFieldValues[rapidjson::SizeType(2)].GetString())); } } void CJsonOutputWriterTest::testPersistNormalizer() { - ml::model::CAnomalyDetectorModelConfig modelConfig = ml::model::CAnomalyDetectorModelConfig::defaultConfig(); + ml::model::CAnomalyDetectorModelConfig modelConfig = + ml::model::CAnomalyDetectorModelConfig::defaultConfig(); std::ostringstream sstream; ml::core_t::TTime persistTime(1); @@ -2094,13 +1542,15 @@ void CJsonOutputWriterTest::testPersistNormalizer() { CPPUNIT_ASSERT(quantileWrapper.HasMember("quantiles")); const rapidjson::Value& quantileState = quantileWrapper["quantiles"]; CPPUNIT_ASSERT(quantileState.HasMember("job_id")); - CPPUNIT_ASSERT_EQUAL(std::string("job"), std::string(quantileState["job_id"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("job"), + std::string(quantileState["job_id"].GetString())); CPPUNIT_ASSERT(quantileState.HasMember("quantile_state")); CPPUNIT_ASSERT(quantileState.HasMember("timestamp")); } void CJsonOutputWriterTest::testPartitionScores() { - ml::model::CAnomalyDetectorModelConfig modelConfig = ml::model::CAnomalyDetectorModelConfig::defaultConfig(); + ml::model::CAnomalyDetectorModelConfig modelConfig = + ml::model::CAnomalyDetectorModelConfig::defaultConfig(); std::ostringstream sstream; { @@ -2118,29 +1568,13 @@ void CJsonOutputWriterTest::testPartitionScores() { if (i > 0) { partitionFieldValue = 'p' + ml::core::CStringUtils::typeToString(i); } - ml::api::CHierarchicalResultsWriter::SResults result(ml::api::CHierarchicalResultsWriter::E_PartitionResult, - partitionFieldName, - partitionFieldValue, - emptyString, - emptyString, - emptyString, - 1, - emptyString, - emptyString, - 42.0, - 79, - TDouble1Vec(1, 6953.0), - TDouble1Vec(1, 10090.0), - 0.0, - double(i), // normalised anomaly score - 0.1, - emptyString, - influences, - false, - true, - 1, - 100, - EMPTY_STRING_LIST); + ml::api::CHierarchicalResultsWriter::SResults result( + ml::api::CHierarchicalResultsWriter::E_PartitionResult, + partitionFieldName, partitionFieldValue, emptyString, + emptyString, emptyString, 1, emptyString, emptyString, 42.0, 79, + TDouble1Vec(1, 6953.0), TDouble1Vec(1, 10090.0), 0.0, + double(i), // normalised anomaly score + 0.1, emptyString, influences, false, true, 1, 100, EMPTY_STRING_LIST); writer.acceptResult(result); } @@ -2170,16 +1604,19 @@ void CJsonOutputWriterTest::testPartitionScores() { CPPUNIT_ASSERT(pDoc.HasMember("record_score")); CPPUNIT_ASSERT_DOUBLES_EQUAL(double(i), pDoc["record_score"].GetDouble(), 0.01); CPPUNIT_ASSERT(pDoc.HasMember("initial_record_score")); - CPPUNIT_ASSERT_DOUBLES_EQUAL(double(i), pDoc["initial_record_score"].GetDouble(), 0.01); + CPPUNIT_ASSERT_DOUBLES_EQUAL(double(i), + pDoc["initial_record_score"].GetDouble(), 0.01); CPPUNIT_ASSERT(pDoc.HasMember("partition_field_name")); - CPPUNIT_ASSERT_EQUAL(std::string("part1"), std::string(pDoc["partition_field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("part1"), + std::string(pDoc["partition_field_name"].GetString())); std::string fieldValue; if (i > 0) { fieldValue = 'p' + ml::core::CStringUtils::typeToString(i); } CPPUNIT_ASSERT(pDoc.HasMember("partition_field_value")); - CPPUNIT_ASSERT_EQUAL(fieldValue, std::string(pDoc["partition_field_value"].GetString())); + CPPUNIT_ASSERT_EQUAL( + fieldValue, std::string(pDoc["partition_field_value"].GetString())); } } @@ -2226,7 +1663,8 @@ void CJsonOutputWriterTest::testReportMemoryUsage() { CPPUNIT_ASSERT(sizeStats.HasMember("timestamp")); CPPUNIT_ASSERT_EQUAL(6000, sizeStats["timestamp"].GetInt()); CPPUNIT_ASSERT(sizeStats.HasMember("memory_status")); - CPPUNIT_ASSERT_EQUAL(std::string("hard_limit"), std::string(sizeStats["memory_status"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("hard_limit"), + std::string(sizeStats["memory_status"].GetString())); CPPUNIT_ASSERT(sizeStats.HasMember("log_time")); int64_t nowMs = ml::core::CTimeUtils::now() * 1000ll; CPPUNIT_ASSERT(nowMs >= sizeStats["log_time"].GetInt64()); @@ -2251,56 +1689,22 @@ void CJsonOutputWriterTest::testWriteScheduledEvent() { ml::api::CJsonOutputWriter writer("job", outputStream); // This result has no scheduled events - ml::api::CHierarchicalResultsWriter::SResults result(ml::api::CHierarchicalResultsWriter::E_SimpleCountResult, - partitionFieldName, - partitionFieldValue, - byFieldName, - byFieldValue, - emptyString, - 100, - function, - functionDescription, - 42.0, - 79, - TDouble1Vec(1, 6953.0), - TDouble1Vec(1, 10090.0), - 0.0, - 0.1, - 0.1, - fieldName, - influences, - false, - true, - 1, - 100, - EMPTY_STRING_LIST); + ml::api::CHierarchicalResultsWriter::SResults result( + ml::api::CHierarchicalResultsWriter::E_SimpleCountResult, + partitionFieldName, partitionFieldValue, byFieldName, byFieldValue, + emptyString, 100, function, functionDescription, 42.0, 79, + TDouble1Vec(1, 6953.0), TDouble1Vec(1, 10090.0), 0.0, 0.1, 0.1, + fieldName, influences, false, true, 1, 100, EMPTY_STRING_LIST); CPPUNIT_ASSERT(writer.acceptResult(result)); // This result has 2 scheduled events std::vector eventDescriptions{"event-foo", "event-bar"}; - ml::api::CHierarchicalResultsWriter::SResults result2(ml::api::CHierarchicalResultsWriter::E_SimpleCountResult, - partitionFieldName, - partitionFieldValue, - byFieldName, - byFieldValue, - emptyString, - 200, - function, - functionDescription, - 42.0, - 79, - TDouble1Vec(1, 6953.0), - TDouble1Vec(1, 10090.0), - 0.0, - 0.1, - 0.1, - fieldName, - influences, - false, - true, - 1, - 100, - eventDescriptions); + ml::api::CHierarchicalResultsWriter::SResults result2( + ml::api::CHierarchicalResultsWriter::E_SimpleCountResult, + partitionFieldName, partitionFieldValue, byFieldName, byFieldValue, + emptyString, 200, function, functionDescription, 42.0, 79, + TDouble1Vec(1, 6953.0), TDouble1Vec(1, 10090.0), 0.0, 0.1, 0.1, + fieldName, influences, false, true, 1, 100, eventDescriptions); CPPUNIT_ASSERT(writer.acceptResult(result2)); CPPUNIT_ASSERT(writer.endOutputBatch(false, 1U)); @@ -2330,8 +1734,10 @@ void CJsonOutputWriterTest::testWriteScheduledEvent() { const rapidjson::Value& events = bucketWithEvents["scheduled_events"]; CPPUNIT_ASSERT(events.IsArray()); CPPUNIT_ASSERT_EQUAL(rapidjson::SizeType(2), events.Size()); - CPPUNIT_ASSERT_EQUAL(std::string("event-foo"), std::string(events[rapidjson::SizeType(0)].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string("event-bar"), std::string(events[rapidjson::SizeType(1)].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("event-foo"), + std::string(events[rapidjson::SizeType(0)].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("event-bar"), + std::string(events[rapidjson::SizeType(1)].GetString())); } void CJsonOutputWriterTest::testThroughputWithScopedAllocator() { @@ -2363,127 +1769,38 @@ void CJsonOutputWriterTest::testThroughputHelper(bool useScopedAllocator) { std::string emptyString; ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPrDoublePrVec influences; - ml::api::CHierarchicalResultsWriter::SResults result11(false, - false, - partitionFieldName, - partitionFieldValue, - overFieldName, - overFieldValue, - byFieldName, - byFieldValue, - correlatedByFieldValue, - 1, - function, - functionDescription, - TDouble1Vec(1, 10090.0), - TDouble1Vec(1, 6953.0), - 2.24, - 0.5, - 0.0, - 79, - fieldName, - influences, - false, - false, - 1, - 100); - - ml::api::CHierarchicalResultsWriter::SResults result112(false, - true, - partitionFieldName, - partitionFieldValue, - overFieldName, - overFieldValue, - byFieldName, - byFieldValue, - correlatedByFieldValue, - 1, - function, - functionDescription, - TDouble1Vec(1, 10090.0), - TDouble1Vec(1, 6953.0), - 2.24, - 0.5, - 0.0, - 79, - fieldName, - influences, - false, - false, - 1, - 100); - - ml::api::CHierarchicalResultsWriter::SResults result12(ml::api::CHierarchicalResultsWriter::E_Result, - partitionFieldName, - partitionFieldValue, - byFieldName, - byFieldValue, - correlatedByFieldValue, - 1, - function, - functionDescription, - 42.0, - 79, - TDouble1Vec(1, 6953.0), - TDouble1Vec(1, 10090.0), - 2.24, - 0.8, - 0.0, - fieldName, - influences, - false, - true, - 2, - 100, - EMPTY_STRING_LIST); - - ml::api::CHierarchicalResultsWriter::SResults result13(ml::api::CHierarchicalResultsWriter::E_SimpleCountResult, - partitionFieldName, - partitionFieldValue, - byFieldName, - byFieldValue, - correlatedByFieldValue, - 1, - function, - functionDescription, - 42.0, - 79, - TDouble1Vec(1, 6953.0), - TDouble1Vec(1, 10090.0), - 2.24, - 0.5, - 0.0, - fieldName, - influences, - false, - false, - 3, - 100, - EMPTY_STRING_LIST); - - ml::api::CHierarchicalResultsWriter::SResults result14(ml::api::CHierarchicalResultsWriter::E_Result, - partitionFieldName, - partitionFieldValue, - byFieldName, - byFieldValue, - correlatedByFieldValue, - 1, - function, - functionDescription, - 42.0, - 79, - TDouble1Vec(1, 6953.0), - TDouble1Vec(1, 10090.0), - 2.24, - 0.0, - 0.0, - fieldName, - influences, - false, - false, - 4, - 100, - EMPTY_STRING_LIST); + ml::api::CHierarchicalResultsWriter::SResults result11( + false, false, partitionFieldName, partitionFieldValue, overFieldName, + overFieldValue, byFieldName, byFieldValue, correlatedByFieldValue, 1, function, + functionDescription, TDouble1Vec(1, 10090.0), TDouble1Vec(1, 6953.0), + 2.24, 0.5, 0.0, 79, fieldName, influences, false, false, 1, 100); + + ml::api::CHierarchicalResultsWriter::SResults result112( + false, true, partitionFieldName, partitionFieldValue, overFieldName, + overFieldValue, byFieldName, byFieldValue, correlatedByFieldValue, 1, function, + functionDescription, TDouble1Vec(1, 10090.0), TDouble1Vec(1, 6953.0), + 2.24, 0.5, 0.0, 79, fieldName, influences, false, false, 1, 100); + + ml::api::CHierarchicalResultsWriter::SResults result12( + ml::api::CHierarchicalResultsWriter::E_Result, partitionFieldName, + partitionFieldValue, byFieldName, byFieldValue, correlatedByFieldValue, + 1, function, functionDescription, 42.0, 79, TDouble1Vec(1, 6953.0), + TDouble1Vec(1, 10090.0), 2.24, 0.8, 0.0, fieldName, influences, false, + true, 2, 100, EMPTY_STRING_LIST); + + ml::api::CHierarchicalResultsWriter::SResults result13( + ml::api::CHierarchicalResultsWriter::E_SimpleCountResult, + partitionFieldName, partitionFieldValue, byFieldName, byFieldValue, + correlatedByFieldValue, 1, function, functionDescription, 42.0, 79, + TDouble1Vec(1, 6953.0), TDouble1Vec(1, 10090.0), 2.24, 0.5, 0.0, + fieldName, influences, false, false, 3, 100, EMPTY_STRING_LIST); + + ml::api::CHierarchicalResultsWriter::SResults result14( + ml::api::CHierarchicalResultsWriter::E_Result, partitionFieldName, + partitionFieldValue, byFieldName, byFieldValue, correlatedByFieldValue, + 1, function, functionDescription, 42.0, 79, TDouble1Vec(1, 6953.0), + TDouble1Vec(1, 10090.0), 2.24, 0.0, 0.0, fieldName, influences, false, + false, 4, 100, EMPTY_STRING_LIST); // 1st bucket writer.acceptBucketTimeInfluencer(1, 0.01, 13.44, 70.0); @@ -2496,7 +1813,8 @@ void CJsonOutputWriterTest::testThroughputHelper(bool useScopedAllocator) { for (size_t count = 0; count < TEST_SIZE; ++count) { if (useScopedAllocator) { - using TScopedAllocator = ml::core::CScopedRapidJsonPoolAllocator; + using TScopedAllocator = + ml::core::CScopedRapidJsonPoolAllocator; static const std::string ALLOCATOR_ID("CAnomalyJob::writeOutResults"); TScopedAllocator scopedAllocator(ALLOCATOR_ID, writer); diff --git a/lib/api/unittest/CLengthEncodedInputParserTest.cc b/lib/api/unittest/CLengthEncodedInputParserTest.cc index 2e4071ff39..544d2f8ee5 100644 --- a/lib/api/unittest/CLengthEncodedInputParserTest.cc +++ b/lib/api/unittest/CLengthEncodedInputParserTest.cc @@ -27,12 +27,15 @@ CppUnit::Test* CLengthEncodedInputParserTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CLengthEncodedInputParserTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CLengthEncodedInputParserTest::testCsvEquivalence", - &CLengthEncodedInputParserTest::testCsvEquivalence)); - suiteOfTests->addTest(new CppUnit::TestCaller("CLengthEncodedInputParserTest::testThroughput", - &CLengthEncodedInputParserTest::testThroughput)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CLengthEncodedInputParserTest::testCorruptStreamDetection", &CLengthEncodedInputParserTest::testCorruptStreamDetection)); + "CLengthEncodedInputParserTest::testCsvEquivalence", + &CLengthEncodedInputParserTest::testCsvEquivalence)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLengthEncodedInputParserTest::testThroughput", + &CLengthEncodedInputParserTest::testThroughput)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLengthEncodedInputParserTest::testCorruptStreamDetection", + &CLengthEncodedInputParserTest::testCorruptStreamDetection)); return suiteOfTests; } @@ -118,18 +121,23 @@ class CVisitor { // Check the field names CPPUNIT_ASSERT_EQUAL(m_ExpectedFieldNames.size(), dataRowFields.size()); - for (ml::api::CCsvInputParser::TStrStrUMapCItr iter = dataRowFields.begin(); iter != dataRowFields.end(); ++iter) { + for (ml::api::CCsvInputParser::TStrStrUMapCItr iter = dataRowFields.begin(); + iter != dataRowFields.end(); ++iter) { LOG_DEBUG(<< "Field " << iter->first << " is " << iter->second); - CPPUNIT_ASSERT(std::find(m_ExpectedFieldNames.begin(), m_ExpectedFieldNames.end(), iter->first) != m_ExpectedFieldNames.end()); + CPPUNIT_ASSERT(std::find(m_ExpectedFieldNames.begin(), + m_ExpectedFieldNames.end(), + iter->first) != m_ExpectedFieldNames.end()); } // Check the line count is consistent with the _raw field ml::api::CCsvInputParser::TStrStrUMapCItr rawIter = dataRowFields.find("_raw"); CPPUNIT_ASSERT(rawIter != dataRowFields.end()); - ml::api::CCsvInputParser::TStrStrUMapCItr lineCountIter = dataRowFields.find("linecount"); + ml::api::CCsvInputParser::TStrStrUMapCItr lineCountIter = + dataRowFields.find("linecount"); CPPUNIT_ASSERT(lineCountIter != dataRowFields.end()); - size_t expectedLineCount(1 + std::count(rawIter->second.begin(), rawIter->second.end(), '\n')); + size_t expectedLineCount(1 + std::count(rawIter->second.begin(), + rawIter->second.end(), '\n')); size_t lineCount(0); CPPUNIT_ASSERT(ml::core::CStringUtils::stringToType(lineCountIter->second, lineCount)); CPPUNIT_ASSERT_EQUAL(expectedLineCount, lineCount); @@ -230,7 +238,8 @@ void CLengthEncodedInputParserTest::testThroughput() { CPPUNIT_ASSERT_EQUAL(setupVisitor.recordsPerBlock() * TEST_SIZE, visitor.recordCount()); - LOG_INFO(<< "Parsing " << visitor.recordCount() << " records took " << (end - start) << " seconds"); + LOG_INFO(<< "Parsing " << visitor.recordCount() << " records took " + << (end - start) << " seconds"); } void CLengthEncodedInputParserTest::testCorruptStreamDetection() { diff --git a/lib/api/unittest/CLineifiedJsonInputParserTest.cc b/lib/api/unittest/CLineifiedJsonInputParserTest.cc index f13eb423cb..9f568bbdde 100644 --- a/lib/api/unittest/CLineifiedJsonInputParserTest.cc +++ b/lib/api/unittest/CLineifiedJsonInputParserTest.cc @@ -19,10 +19,12 @@ CppUnit::Test* CLineifiedJsonInputParserTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CLineifiedJsonInputParserTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CLineifiedJsonInputParserTest::testThroughputArbitrary", - &CLineifiedJsonInputParserTest::testThroughputArbitrary)); - suiteOfTests->addTest(new CppUnit::TestCaller("CLineifiedJsonInputParserTest::testThroughputCommon", - &CLineifiedJsonInputParserTest::testThroughputCommon)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLineifiedJsonInputParserTest::testThroughputArbitrary", + &CLineifiedJsonInputParserTest::testThroughputArbitrary)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLineifiedJsonInputParserTest::testThroughputCommon", + &CLineifiedJsonInputParserTest::testThroughputCommon)); return suiteOfTests; } @@ -125,5 +127,6 @@ void CLineifiedJsonInputParserTest::runTest(bool allDocsSameStructure) { CPPUNIT_ASSERT_EQUAL(setupVisitor.recordsPerBlock() * TEST_SIZE, visitor.recordCount()); - LOG_INFO(<< "Parsing " << visitor.recordCount() << " records took " << (end - start) << " seconds"); + LOG_INFO(<< "Parsing " << visitor.recordCount() << " records took " + << (end - start) << " seconds"); } diff --git a/lib/api/unittest/CLineifiedJsonOutputWriterTest.cc b/lib/api/unittest/CLineifiedJsonOutputWriterTest.cc index dd4f63201f..60324c6c57 100644 --- a/lib/api/unittest/CLineifiedJsonOutputWriterTest.cc +++ b/lib/api/unittest/CLineifiedJsonOutputWriterTest.cc @@ -14,10 +14,12 @@ CppUnit::Test* CLineifiedJsonOutputWriterTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CLineifiedJsonOutputWriterTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CLineifiedJsonOutputWriterTest::testStringOutput", - &CLineifiedJsonOutputWriterTest::testStringOutput)); - suiteOfTests->addTest(new CppUnit::TestCaller("CLineifiedJsonOutputWriterTest::testNumericOutput", - &CLineifiedJsonOutputWriterTest::testNumericOutput)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLineifiedJsonOutputWriterTest::testStringOutput", + &CLineifiedJsonOutputWriterTest::testStringOutput)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLineifiedJsonOutputWriterTest::testNumericOutput", + &CLineifiedJsonOutputWriterTest::testNumericOutput)); return suiteOfTests; } @@ -34,7 +36,8 @@ void CLineifiedJsonOutputWriterTest::testStringOutput() { const std::string& output = writer.internalString(); - CPPUNIT_ASSERT_EQUAL(std::string("{\"probability\":\"0.01\",\"normalized_score\":\"3.3\"}\n"), output); + CPPUNIT_ASSERT_EQUAL( + std::string("{\"probability\":\"0.01\",\"normalized_score\":\"3.3\"}\n"), output); } void CLineifiedJsonOutputWriterTest::testNumericOutput() { @@ -49,5 +52,6 @@ void CLineifiedJsonOutputWriterTest::testNumericOutput() { const std::string& output = writer.internalString(); - CPPUNIT_ASSERT_EQUAL(std::string("{\"probability\":0.01,\"normalized_score\":3.3}\n"), output); + CPPUNIT_ASSERT_EQUAL( + std::string("{\"probability\":0.01,\"normalized_score\":3.3}\n"), output); } diff --git a/lib/api/unittest/CLineifiedXmlInputParserTest.cc b/lib/api/unittest/CLineifiedXmlInputParserTest.cc index c0c77be07f..be79b6a42d 100644 --- a/lib/api/unittest/CLineifiedXmlInputParserTest.cc +++ b/lib/api/unittest/CLineifiedXmlInputParserTest.cc @@ -21,15 +21,18 @@ CppUnit::Test* CLineifiedXmlInputParserTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CLineifiedXmlInputParserTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CLineifiedXmlInputParserTest::testThroughputArbitraryConformant", - &CLineifiedXmlInputParserTest::testThroughputArbitraryConformant)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CLineifiedXmlInputParserTest::testThroughputCommonConformant", &CLineifiedXmlInputParserTest::testThroughputCommonConformant)); + "CLineifiedXmlInputParserTest::testThroughputArbitraryConformant", + &CLineifiedXmlInputParserTest::testThroughputArbitraryConformant)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CLineifiedXmlInputParserTest::testThroughputArbitraryRapid", &CLineifiedXmlInputParserTest::testThroughputArbitraryRapid)); - suiteOfTests->addTest(new CppUnit::TestCaller("CLineifiedXmlInputParserTest::testThroughputCommonRapid", - &CLineifiedXmlInputParserTest::testThroughputCommonRapid)); + "CLineifiedXmlInputParserTest::testThroughputCommonConformant", + &CLineifiedXmlInputParserTest::testThroughputCommonConformant)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLineifiedXmlInputParserTest::testThroughputArbitraryRapid", + &CLineifiedXmlInputParserTest::testThroughputArbitraryRapid)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLineifiedXmlInputParserTest::testThroughputCommonRapid", + &CLineifiedXmlInputParserTest::testThroughputCommonRapid)); return suiteOfTests; } @@ -143,5 +146,6 @@ void CLineifiedXmlInputParserTest::runTest(bool allDocsSameStructure) { CPPUNIT_ASSERT_EQUAL(setupVisitor.recordsPerBlock() * TEST_SIZE, visitor.recordCount()); - LOG_INFO(<< "Parsing " << visitor.recordCount() << " records took " << (end - start) << " seconds"); + LOG_INFO(<< "Parsing " << visitor.recordCount() << " records took " + << (end - start) << " seconds"); } diff --git a/lib/api/unittest/CMockDataAdder.cc b/lib/api/unittest/CMockDataAdder.cc index 8260824a24..2290bdee30 100644 --- a/lib/api/unittest/CMockDataAdder.cc +++ b/lib/api/unittest/CMockDataAdder.cc @@ -13,7 +13,8 @@ CMockDataAdder::CMockDataAdder() { } -CMockDataAdder::TOStreamP CMockDataAdder::addStreamed(const std::string& index, const std::string& /*id*/) { +CMockDataAdder::TOStreamP CMockDataAdder::addStreamed(const std::string& index, + const std::string& /*id*/) { LOG_TRACE(<< "Add Streamed for index " << index); if (m_Streams.find(index) == m_Streams.end()) { m_Streams[index] = TOStreamP(new std::ostringstream); diff --git a/lib/api/unittest/CMockDataProcessor.cc b/lib/api/unittest/CMockDataProcessor.cc index 194c19e145..4fecd5daa2 100644 --- a/lib/api/unittest/CMockDataProcessor.cc +++ b/lib/api/unittest/CMockDataProcessor.cc @@ -27,7 +27,8 @@ bool CMockDataProcessor::handleRecord(const TStrStrUMap& dataRowFields) { } if (m_OutputHandler.fieldNames(fieldNames) == false) { - LOG_ERROR(<< "Unable to set field names for output:\n" << this->debugPrintRecord(dataRowFields)); + LOG_ERROR(<< "Unable to set field names for output:\n" + << this->debugPrintRecord(dataRowFields)); return false; } m_WriteFieldNames = false; @@ -46,7 +47,8 @@ bool CMockDataProcessor::handleRecord(const TStrStrUMap& dataRowFields) { void CMockDataProcessor::finalise() { } -bool CMockDataProcessor::restoreState(ml::core::CDataSearcher& restoreSearcher, ml::core_t::TTime& completeToTime) { +bool CMockDataProcessor::restoreState(ml::core::CDataSearcher& restoreSearcher, + ml::core_t::TTime& completeToTime) { // Pass on the request in case we're chained if (m_OutputHandler.restoreState(restoreSearcher, completeToTime) == false) { return false; diff --git a/lib/api/unittest/CMockDataProcessor.h b/lib/api/unittest/CMockDataProcessor.h index 76b6d69843..420b88fe57 100644 --- a/lib/api/unittest/CMockDataProcessor.h +++ b/lib/api/unittest/CMockDataProcessor.h @@ -41,7 +41,8 @@ class CMockDataProcessor : public ml::api::CDataProcessor { virtual void finalise(); //! Restore previously saved state - virtual bool restoreState(ml::core::CDataSearcher& restoreSearcher, ml::core_t::TTime& completeToTime); + virtual bool restoreState(ml::core::CDataSearcher& restoreSearcher, + ml::core_t::TTime& completeToTime); //! Persist current state virtual bool persistState(ml::core::CDataAdder& persister); diff --git a/lib/api/unittest/CMockSearcher.cc b/lib/api/unittest/CMockSearcher.cc index b560c4c4b3..7be67fb132 100644 --- a/lib/api/unittest/CMockSearcher.cc +++ b/lib/api/unittest/CMockSearcher.cc @@ -9,7 +9,8 @@ #include "CMockDataAdder.h" -CMockSearcher::CMockSearcher(const CMockDataAdder& mockDataAdder) : m_MockDataAdder(mockDataAdder) { +CMockSearcher::CMockSearcher(const CMockDataAdder& mockDataAdder) + : m_MockDataAdder(mockDataAdder) { } CMockSearcher::TIStreamP CMockSearcher::search(size_t currentDocNum, size_t /*limit*/) { diff --git a/lib/api/unittest/CModelPlotDataJsonWriterTest.cc b/lib/api/unittest/CModelPlotDataJsonWriterTest.cc index 1d9ba7505a..cc46591488 100644 --- a/lib/api/unittest/CModelPlotDataJsonWriterTest.cc +++ b/lib/api/unittest/CModelPlotDataJsonWriterTest.cc @@ -18,8 +18,9 @@ CppUnit::Test* CModelPlotDataJsonWriterTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CModelPlotDataJsonWriterTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CModelPlotDataJsonWriterTest::testWriteFlat", - &CModelPlotDataJsonWriterTest::testWriteFlat)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CModelPlotDataJsonWriterTest::testWriteFlat", + &CModelPlotDataJsonWriterTest::testWriteFlat)); return suiteOfTests; } @@ -32,7 +33,8 @@ void CModelPlotDataJsonWriterTest::testWriteFlat() { ml::api::CModelPlotDataJsonWriter writer(outputStream); ml::model::CModelPlotData plotData(1, "pName", "pValue", "", "bName", 300, 1); - plotData.get(ml::model_t::E_IndividualCountByBucketAndPerson, "bName") = ml::model::CModelPlotData::SByFieldData(1.0, 2.0, 3.0); + plotData.get(ml::model_t::E_IndividualCountByBucketAndPerson, "bName") = + ml::model::CModelPlotData::SByFieldData(1.0, 2.0, 3.0); writer.writeFlat("job-id", plotData); } @@ -44,19 +46,25 @@ void CModelPlotDataJsonWriterTest::testWriteFlat() { CPPUNIT_ASSERT(firstElement.HasMember("model_plot")); const rapidjson::Value& modelPlot = firstElement["model_plot"]; CPPUNIT_ASSERT(modelPlot.HasMember("job_id")); - CPPUNIT_ASSERT_EQUAL(std::string("job-id"), std::string(modelPlot["job_id"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("job-id"), + std::string(modelPlot["job_id"].GetString())); CPPUNIT_ASSERT(modelPlot.HasMember("model_feature")); - CPPUNIT_ASSERT_EQUAL(std::string("'count per bucket by person'"), std::string(modelPlot["model_feature"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("'count per bucket by person'"), + std::string(modelPlot["model_feature"].GetString())); CPPUNIT_ASSERT(modelPlot.HasMember("timestamp")); CPPUNIT_ASSERT_EQUAL(int64_t(1000), modelPlot["timestamp"].GetInt64()); CPPUNIT_ASSERT(modelPlot.HasMember("partition_field_name")); - CPPUNIT_ASSERT_EQUAL(std::string("pName"), std::string(modelPlot["partition_field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("pName"), + std::string(modelPlot["partition_field_name"].GetString())); CPPUNIT_ASSERT(modelPlot.HasMember("partition_field_value")); - CPPUNIT_ASSERT_EQUAL(std::string("pValue"), std::string(modelPlot["partition_field_value"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("pValue"), + std::string(modelPlot["partition_field_value"].GetString())); CPPUNIT_ASSERT(modelPlot.HasMember("by_field_name")); - CPPUNIT_ASSERT_EQUAL(std::string("bName"), std::string(modelPlot["by_field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("bName"), + std::string(modelPlot["by_field_name"].GetString())); CPPUNIT_ASSERT(modelPlot.HasMember("by_field_value")); - CPPUNIT_ASSERT_EQUAL(std::string("bName"), std::string(modelPlot["by_field_value"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("bName"), + std::string(modelPlot["by_field_value"].GetString())); CPPUNIT_ASSERT(modelPlot.HasMember("model_lower")); CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, modelPlot["model_lower"].GetDouble(), 0.01); CPPUNIT_ASSERT(modelPlot.HasMember("model_upper")); diff --git a/lib/api/unittest/CModelSnapshotJsonWriterTest.cc b/lib/api/unittest/CModelSnapshotJsonWriterTest.cc index 1c9f277b98..9d493ea703 100644 --- a/lib/api/unittest/CModelSnapshotJsonWriterTest.cc +++ b/lib/api/unittest/CModelSnapshotJsonWriterTest.cc @@ -21,8 +21,8 @@ using namespace api; CppUnit::Test* CModelSnapshotJsonWriterTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CModelSnapshotJsonWriterTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CModelSnapshotJsonWriterTest::testWrite", - &CModelSnapshotJsonWriterTest::testWrite)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CModelSnapshotJsonWriterTest::testWrite", &CModelSnapshotJsonWriterTest::testWrite)); return suiteOfTests; } @@ -72,36 +72,45 @@ void CModelSnapshotJsonWriterTest::testWrite() { CPPUNIT_ASSERT(snapshot.HasMember("job_id")); CPPUNIT_ASSERT_EQUAL(std::string("job"), std::string(snapshot["job_id"].GetString())); CPPUNIT_ASSERT(snapshot.HasMember("min_version")); - CPPUNIT_ASSERT_EQUAL(std::string("6.3.0"), std::string(snapshot["min_version"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("6.3.0"), + std::string(snapshot["min_version"].GetString())); CPPUNIT_ASSERT(snapshot.HasMember("snapshot_id")); - CPPUNIT_ASSERT_EQUAL(std::string("test_snapshot_id"), std::string(snapshot["snapshot_id"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("test_snapshot_id"), + std::string(snapshot["snapshot_id"].GetString())); CPPUNIT_ASSERT(snapshot.HasMember("snapshot_doc_count")); CPPUNIT_ASSERT_EQUAL(int64_t(15), snapshot["snapshot_doc_count"].GetInt64()); CPPUNIT_ASSERT(snapshot.HasMember("timestamp")); CPPUNIT_ASSERT_EQUAL(int64_t(1521046309000), snapshot["timestamp"].GetInt64()); CPPUNIT_ASSERT(snapshot.HasMember("description")); - CPPUNIT_ASSERT_EQUAL(std::string("the snapshot description"), std::string(snapshot["description"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("the snapshot description"), + std::string(snapshot["description"].GetString())); CPPUNIT_ASSERT(snapshot.HasMember("latest_record_time_stamp")); - CPPUNIT_ASSERT_EQUAL(int64_t(1521046409000), snapshot["latest_record_time_stamp"].GetInt64()); + CPPUNIT_ASSERT_EQUAL(int64_t(1521046409000), + snapshot["latest_record_time_stamp"].GetInt64()); CPPUNIT_ASSERT(snapshot.HasMember("latest_result_time_stamp")); - CPPUNIT_ASSERT_EQUAL(int64_t(1521040000000), snapshot["latest_result_time_stamp"].GetInt64()); + CPPUNIT_ASSERT_EQUAL(int64_t(1521040000000), + snapshot["latest_result_time_stamp"].GetInt64()); CPPUNIT_ASSERT(snapshot.HasMember("model_size_stats")); const rapidjson::Value& modelSizeStats = snapshot["model_size_stats"]; CPPUNIT_ASSERT(modelSizeStats.HasMember("job_id")); - CPPUNIT_ASSERT_EQUAL(std::string("job"), std::string(modelSizeStats["job_id"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("job"), + std::string(modelSizeStats["job_id"].GetString())); CPPUNIT_ASSERT(modelSizeStats.HasMember("model_bytes")); CPPUNIT_ASSERT_EQUAL(int64_t(20000), modelSizeStats["model_bytes"].GetInt64()); CPPUNIT_ASSERT(modelSizeStats.HasMember("total_by_field_count")); CPPUNIT_ASSERT_EQUAL(int64_t(3), modelSizeStats["total_by_field_count"].GetInt64()); CPPUNIT_ASSERT(modelSizeStats.HasMember("total_partition_field_count")); - CPPUNIT_ASSERT_EQUAL(int64_t(1), modelSizeStats["total_partition_field_count"].GetInt64()); + CPPUNIT_ASSERT_EQUAL(int64_t(1), + modelSizeStats["total_partition_field_count"].GetInt64()); CPPUNIT_ASSERT(modelSizeStats.HasMember("total_over_field_count")); CPPUNIT_ASSERT_EQUAL(int64_t(150), modelSizeStats["total_over_field_count"].GetInt64()); CPPUNIT_ASSERT(modelSizeStats.HasMember("bucket_allocation_failures_count")); - CPPUNIT_ASSERT_EQUAL(int64_t(4), modelSizeStats["bucket_allocation_failures_count"].GetInt64()); + CPPUNIT_ASSERT_EQUAL( + int64_t(4), modelSizeStats["bucket_allocation_failures_count"].GetInt64()); CPPUNIT_ASSERT(modelSizeStats.HasMember("memory_status")); - CPPUNIT_ASSERT_EQUAL(std::string("ok"), std::string(modelSizeStats["memory_status"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("ok"), + std::string(modelSizeStats["memory_status"].GetString())); CPPUNIT_ASSERT(modelSizeStats.HasMember("timestamp")); CPPUNIT_ASSERT_EQUAL(int64_t(1521046309000), modelSizeStats["timestamp"].GetInt64()); CPPUNIT_ASSERT(modelSizeStats.HasMember("log_time")); @@ -111,7 +120,8 @@ void CModelSnapshotJsonWriterTest::testWrite() { CPPUNIT_ASSERT(quantiles.HasMember("job_id")); CPPUNIT_ASSERT_EQUAL(std::string("job"), std::string(quantiles["job_id"].GetString())); CPPUNIT_ASSERT(quantiles.HasMember("quantile_state")); - CPPUNIT_ASSERT_EQUAL(std::string("some normalizer state"), std::string(quantiles["quantile_state"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("some normalizer state"), + std::string(quantiles["quantile_state"].GetString())); CPPUNIT_ASSERT(quantiles.HasMember("timestamp")); CPPUNIT_ASSERT_EQUAL(int64_t(1521040000000), quantiles["timestamp"].GetInt64()); } diff --git a/lib/api/unittest/CMultiFileDataAdderTest.cc b/lib/api/unittest/CMultiFileDataAdderTest.cc index 819ab374e1..d10df85ff2 100644 --- a/lib/api/unittest/CMultiFileDataAdderTest.cc +++ b/lib/api/unittest/CMultiFileDataAdderTest.cc @@ -53,18 +53,23 @@ void reportPersistComplete(ml::api::CModelSnapshotJsonWriter::SModelSnapshotRepo CppUnit::Test* CMultiFileDataAdderTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CMultiFileDataAdderTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CMultiFileDataAdderTest::testSimpleWrite", - &CMultiFileDataAdderTest::testSimpleWrite)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMultiFileDataAdderTest::testDetectorPersistBy", - &CMultiFileDataAdderTest::testDetectorPersistBy)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMultiFileDataAdderTest::testDetectorPersistOver", - &CMultiFileDataAdderTest::testDetectorPersistOver)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMultiFileDataAdderTest::testDetectorPersistPartition", - &CMultiFileDataAdderTest::testDetectorPersistPartition)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMultiFileDataAdderTest::testDetectorPersistDc", - &CMultiFileDataAdderTest::testDetectorPersistDc)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMultiFileDataAdderTest::testDetectorPersistCount", - &CMultiFileDataAdderTest::testDetectorPersistCount)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultiFileDataAdderTest::testSimpleWrite", &CMultiFileDataAdderTest::testSimpleWrite)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultiFileDataAdderTest::testDetectorPersistBy", + &CMultiFileDataAdderTest::testDetectorPersistBy)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultiFileDataAdderTest::testDetectorPersistOver", + &CMultiFileDataAdderTest::testDetectorPersistOver)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultiFileDataAdderTest::testDetectorPersistPartition", + &CMultiFileDataAdderTest::testDetectorPersistPartition)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultiFileDataAdderTest::testDetectorPersistDc", + &CMultiFileDataAdderTest::testDetectorPersistDc)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultiFileDataAdderTest::testDetectorPersistCount", + &CMultiFileDataAdderTest::testDetectorPersistCount)); return suiteOfTests; } @@ -129,23 +134,28 @@ void CMultiFileDataAdderTest::testSimpleWrite() { } void CMultiFileDataAdderTest::testDetectorPersistBy() { - this->detectorPersistHelper("testfiles/new_mlfields.conf", "testfiles/big_ascending.txt", 0, "%d/%b/%Y:%T %z"); + this->detectorPersistHelper("testfiles/new_mlfields.conf", + "testfiles/big_ascending.txt", 0, "%d/%b/%Y:%T %z"); } void CMultiFileDataAdderTest::testDetectorPersistOver() { - this->detectorPersistHelper("testfiles/new_mlfields_over.conf", "testfiles/big_ascending.txt", 0, "%d/%b/%Y:%T %z"); + this->detectorPersistHelper("testfiles/new_mlfields_over.conf", + "testfiles/big_ascending.txt", 0, "%d/%b/%Y:%T %z"); } void CMultiFileDataAdderTest::testDetectorPersistPartition() { - this->detectorPersistHelper("testfiles/new_mlfields_partition.conf", "testfiles/big_ascending.txt", 0, "%d/%b/%Y:%T %z"); + this->detectorPersistHelper("testfiles/new_mlfields_partition.conf", + "testfiles/big_ascending.txt", 0, "%d/%b/%Y:%T %z"); } void CMultiFileDataAdderTest::testDetectorPersistDc() { - this->detectorPersistHelper("testfiles/new_persist_dc.conf", "testfiles/files_users_programs.csv", 5); + this->detectorPersistHelper("testfiles/new_persist_dc.conf", + "testfiles/files_users_programs.csv", 5); } void CMultiFileDataAdderTest::testDetectorPersistCount() { - this->detectorPersistHelper("testfiles/new_persist_count.conf", "testfiles/files_users_programs.csv", 5); + this->detectorPersistHelper("testfiles/new_persist_count.conf", + "testfiles/files_users_programs.csv", 5); } void CMultiFileDataAdderTest::detectorPersistHelper(const std::string& configFileName, @@ -168,21 +178,18 @@ void CMultiFileDataAdderTest::detectorPersistHelper(const std::string& configFil ml::api::CFieldConfig fieldConfig; CPPUNIT_ASSERT(fieldConfig.initFromFile(configFileName)); - ml::model::CAnomalyDetectorModelConfig modelConfig = ml::model::CAnomalyDetectorModelConfig::defaultConfig( - BUCKET_SIZE, ml::model_t::E_None, "", BUCKET_SIZE * latencyBuckets, 0, false, ""); + ml::model::CAnomalyDetectorModelConfig modelConfig = + ml::model::CAnomalyDetectorModelConfig::defaultConfig( + BUCKET_SIZE, ml::model_t::E_None, "", BUCKET_SIZE * latencyBuckets, + 0, false, ""); std::string origSnapshotId; std::size_t numOrigDocs(0); - ml::api::CAnomalyJob origJob(JOB_ID, - limits, - fieldConfig, - modelConfig, - wrappedOutputStream, - boost::bind(&reportPersistComplete, _1, boost::ref(origSnapshotId), boost::ref(numOrigDocs)), - nullptr, - -1, - "time", - timeFormat); + ml::api::CAnomalyJob origJob( + JOB_ID, limits, fieldConfig, modelConfig, wrappedOutputStream, + boost::bind(&reportPersistComplete, _1, boost::ref(origSnapshotId), + boost::ref(numOrigDocs)), + nullptr, -1, "time", timeFormat); using TScopedInputParserP = boost::scoped_ptr; TScopedInputParserP parser; @@ -192,7 +199,8 @@ void CMultiFileDataAdderTest::detectorPersistHelper(const std::string& configFil parser.reset(new ml::api::CLineifiedJsonInputParser(inputStrm)); } - CPPUNIT_ASSERT(parser->readStream(boost::bind(&ml::api::CAnomalyJob::handleRecord, &origJob, _1))); + CPPUNIT_ASSERT(parser->readStream( + boost::bind(&ml::api::CAnomalyJob::handleRecord, &origJob, _1))); // Persist the detector state to file(s) @@ -206,7 +214,8 @@ void CMultiFileDataAdderTest::detectorPersistHelper(const std::string& configFil CPPUNIT_ASSERT(origJob.persistState(persister)); } - std::string origBaseDocId(JOB_ID + '_' + ml::api::CAnomalyJob::STATE_TYPE + '_' + origSnapshotId); + std::string origBaseDocId(JOB_ID + '_' + ml::api::CAnomalyJob::STATE_TYPE + + '_' + origSnapshotId); std::string temp; TStrVec origFileContents(numOrigDocs); @@ -215,12 +224,14 @@ void CMultiFileDataAdderTest::detectorPersistHelper(const std::string& configFil expectedOrigFilename += "/_"; expectedOrigFilename += ml::api::CAnomalyJob::ML_STATE_INDEX; expectedOrigFilename += '/'; - expectedOrigFilename += ml::core::CDataAdder::makeCurrentDocId(origBaseDocId, 1 + index); + expectedOrigFilename += + ml::core::CDataAdder::makeCurrentDocId(origBaseDocId, 1 + index); expectedOrigFilename += ml::test::CMultiFileDataAdder::JSON_FILE_EXT; LOG_DEBUG(<< "Trying to open file: " << expectedOrigFilename); std::ifstream origFile(expectedOrigFilename.c_str()); CPPUNIT_ASSERT(origFile.is_open()); - std::string json((std::istreambuf_iterator(origFile)), std::istreambuf_iterator()); + std::string json((std::istreambuf_iterator(origFile)), + std::istreambuf_iterator()); origFileContents[index] = json; // Ensure that the JSON is valid, by parsing string using Rapidjson @@ -233,12 +244,10 @@ void CMultiFileDataAdderTest::detectorPersistHelper(const std::string& configFil std::string restoredSnapshotId; std::size_t numRestoredDocs(0); - ml::api::CAnomalyJob restoredJob(JOB_ID, - limits, - fieldConfig, - modelConfig, - wrappedOutputStream, - boost::bind(&reportPersistComplete, _1, boost::ref(restoredSnapshotId), boost::ref(numRestoredDocs))); + ml::api::CAnomalyJob restoredJob( + JOB_ID, limits, fieldConfig, modelConfig, wrappedOutputStream, + boost::bind(&reportPersistComplete, _1, boost::ref(restoredSnapshotId), + boost::ref(numRestoredDocs))); { ml::core_t::TTime completeToTime(0); @@ -260,18 +269,21 @@ void CMultiFileDataAdderTest::detectorPersistHelper(const std::string& configFil CPPUNIT_ASSERT(restoredJob.persistState(persister)); } - std::string restoredBaseDocId(JOB_ID + '_' + ml::api::CAnomalyJob::STATE_TYPE + '_' + restoredSnapshotId); + std::string restoredBaseDocId(JOB_ID + '_' + ml::api::CAnomalyJob::STATE_TYPE + + '_' + restoredSnapshotId); for (size_t index = 0; index < numRestoredDocs; ++index) { std::string expectedRestoredFilename(baseRestoredOutputFilename); expectedRestoredFilename += "/_"; expectedRestoredFilename += ml::api::CAnomalyJob::ML_STATE_INDEX; expectedRestoredFilename += '/'; - expectedRestoredFilename += ml::core::CDataAdder::makeCurrentDocId(restoredBaseDocId, 1 + index); + expectedRestoredFilename += + ml::core::CDataAdder::makeCurrentDocId(restoredBaseDocId, 1 + index); expectedRestoredFilename += ml::test::CMultiFileDataAdder::JSON_FILE_EXT; std::ifstream restoredFile(expectedRestoredFilename.c_str()); CPPUNIT_ASSERT(restoredFile.is_open()); - std::string json((std::istreambuf_iterator(restoredFile)), std::istreambuf_iterator()); + std::string json((std::istreambuf_iterator(restoredFile)), + std::istreambuf_iterator()); CPPUNIT_ASSERT_EQUAL(origFileContents[index], json); } diff --git a/lib/api/unittest/COutputChainerTest.cc b/lib/api/unittest/COutputChainerTest.cc index c71ef148ce..30d87d21d4 100644 --- a/lib/api/unittest/COutputChainerTest.cc +++ b/lib/api/unittest/COutputChainerTest.cc @@ -24,8 +24,8 @@ CppUnit::Test* COutputChainerTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("COutputChainerTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("COutputChainerTest::testChaining", &COutputChainerTest::testChaining)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "COutputChainerTest::testChaining", &COutputChainerTest::testChaining)); return suiteOfTests; } @@ -51,18 +51,12 @@ void COutputChainerTest::testChaining() { ml::api::CFieldConfig fieldConfig; CPPUNIT_ASSERT(fieldConfig.initFromFile("testfiles/new_mlfields.conf")); - ml::model::CAnomalyDetectorModelConfig modelConfig = ml::model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); + ml::model::CAnomalyDetectorModelConfig modelConfig = + ml::model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); - ml::api::CAnomalyJob job("job", - limits, - fieldConfig, - modelConfig, - wrappedOutputStream, + ml::api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream, ml::api::CAnomalyJob::TPersistCompleteFunc(), - nullptr, - -1, - "time", - "%d/%b/%Y:%T %z"); + nullptr, -1, "time", "%d/%b/%Y:%T %z"); ml::api::COutputChainer outputChainer(job); @@ -70,7 +64,8 @@ void COutputChainerTest::testChaining() { ml::api::CLineifiedJsonInputParser parser(inputStrm); - CPPUNIT_ASSERT(parser.readStream(boost::bind(&CMockDataProcessor::handleRecord, &mockProcessor, _1))); + CPPUNIT_ASSERT(parser.readStream( + boost::bind(&CMockDataProcessor::handleRecord, &mockProcessor, _1))); } // Check the results by re-reading the output file diff --git a/lib/api/unittest/CRestorePreviousStateTest.cc b/lib/api/unittest/CRestorePreviousStateTest.cc index 023fef845f..02a1b4f08f 100644 --- a/lib/api/unittest/CRestorePreviousStateTest.cc +++ b/lib/api/unittest/CRestorePreviousStateTest.cc @@ -48,86 +48,91 @@ struct SRestoreTestConfig { bool s_CategorizerRestoreIsSymmetric; }; -const std::vector BWC_VERSIONS{SRestoreTestConfig{"5.6.0", false, true}, - SRestoreTestConfig{"6.0.0", false, true}, - SRestoreTestConfig{"6.1.0", false, true}}; +const std::vector BWC_VERSIONS{ + SRestoreTestConfig{"5.6.0", false, true}, SRestoreTestConfig{"6.0.0", false, true}, + SRestoreTestConfig{"6.1.0", false, true}}; } CppUnit::Test* CRestorePreviousStateTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CRestorePreviousStateTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CRestorePreviousStateTest::testRestoreDetectorPersistBy", - &CRestorePreviousStateTest::testRestoreDetectorBy)); - suiteOfTests->addTest(new CppUnit::TestCaller("CRestorePreviousStateTest::testRestoreDetectorOver", - &CRestorePreviousStateTest::testRestoreDetectorOver)); - suiteOfTests->addTest(new CppUnit::TestCaller("CRestorePreviousStateTest::testRestoreDetectorPartition", - &CRestorePreviousStateTest::testRestoreDetectorPartition)); - suiteOfTests->addTest(new CppUnit::TestCaller("CRestorePreviousStateTest::testRestoreDetectorDc", - &CRestorePreviousStateTest::testRestoreDetectorDc)); - suiteOfTests->addTest(new CppUnit::TestCaller("CRestorePreviousStateTest::testRestoreDetectorCount", - &CRestorePreviousStateTest::testRestoreDetectorCount)); - suiteOfTests->addTest(new CppUnit::TestCaller("CRestorePreviousStateTest::testRestoreNormalizer", - &CRestorePreviousStateTest::testRestoreNormalizer)); - suiteOfTests->addTest(new CppUnit::TestCaller("CRestorePreviousStateTest::testRestoreCategorizer", - &CRestorePreviousStateTest::testRestoreCategorizer)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRestorePreviousStateTest::testRestoreDetectorPersistBy", + &CRestorePreviousStateTest::testRestoreDetectorBy)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRestorePreviousStateTest::testRestoreDetectorOver", + &CRestorePreviousStateTest::testRestoreDetectorOver)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRestorePreviousStateTest::testRestoreDetectorPartition", + &CRestorePreviousStateTest::testRestoreDetectorPartition)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRestorePreviousStateTest::testRestoreDetectorDc", + &CRestorePreviousStateTest::testRestoreDetectorDc)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRestorePreviousStateTest::testRestoreDetectorCount", + &CRestorePreviousStateTest::testRestoreDetectorCount)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRestorePreviousStateTest::testRestoreNormalizer", + &CRestorePreviousStateTest::testRestoreNormalizer)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRestorePreviousStateTest::testRestoreCategorizer", + &CRestorePreviousStateTest::testRestoreCategorizer)); return suiteOfTests; } void CRestorePreviousStateTest::testRestoreDetectorBy() { for (const auto& version : BWC_VERSIONS) { LOG_INFO(<< "Test restoring state from version " << version.s_Version); - this->anomalyDetectorRestoreHelper("testfiles/state/" + version.s_Version + "/by_detector_state.json", - "testfiles/new_mlfields.conf", - version.s_DetectorRestoreIsSymmetric, - 0); + this->anomalyDetectorRestoreHelper( + "testfiles/state/" + version.s_Version + "/by_detector_state.json", + "testfiles/new_mlfields.conf", version.s_DetectorRestoreIsSymmetric, 0); } } void CRestorePreviousStateTest::testRestoreDetectorOver() { for (const auto& version : BWC_VERSIONS) { LOG_INFO(<< "Test restoring state from version " << version.s_Version); - this->anomalyDetectorRestoreHelper("testfiles/state/" + version.s_Version + "/over_detector_state.json", - "testfiles/new_mlfields_over.conf", - version.s_DetectorRestoreIsSymmetric, - 0); + this->anomalyDetectorRestoreHelper( + "testfiles/state/" + version.s_Version + "/over_detector_state.json", + "testfiles/new_mlfields_over.conf", version.s_DetectorRestoreIsSymmetric, 0); } } void CRestorePreviousStateTest::testRestoreDetectorPartition() { for (const auto& version : BWC_VERSIONS) { LOG_INFO(<< "Test restoring state from version " << version.s_Version); - this->anomalyDetectorRestoreHelper("testfiles/state/" + version.s_Version + "/partition_detector_state.json", - "testfiles/new_mlfields_partition.conf", - version.s_DetectorRestoreIsSymmetric, - 0); + this->anomalyDetectorRestoreHelper( + "testfiles/state/" + version.s_Version + "/partition_detector_state.json", + "testfiles/new_mlfields_partition.conf", + version.s_DetectorRestoreIsSymmetric, 0); } } void CRestorePreviousStateTest::testRestoreDetectorDc() { for (const auto& version : BWC_VERSIONS) { LOG_INFO(<< "Test restoring state from version " << version.s_Version); - this->anomalyDetectorRestoreHelper("testfiles/state/" + version.s_Version + "/dc_detector_state.json", - "testfiles/new_persist_dc.conf", - version.s_DetectorRestoreIsSymmetric, - 5); + this->anomalyDetectorRestoreHelper( + "testfiles/state/" + version.s_Version + "/dc_detector_state.json", + "testfiles/new_persist_dc.conf", version.s_DetectorRestoreIsSymmetric, 5); } } void CRestorePreviousStateTest::testRestoreDetectorCount() { for (const auto& version : BWC_VERSIONS) { LOG_INFO(<< "Test restoring state from version " << version.s_Version); - this->anomalyDetectorRestoreHelper("testfiles/state/" + version.s_Version + "/count_detector_state.json", - "testfiles/new_persist_count.conf", - version.s_DetectorRestoreIsSymmetric, - 5); + this->anomalyDetectorRestoreHelper( + "testfiles/state/" + version.s_Version + "/count_detector_state.json", + "testfiles/new_persist_count.conf", version.s_DetectorRestoreIsSymmetric, 5); } } void CRestorePreviousStateTest::testRestoreNormalizer() { for (const auto& version : BWC_VERSIONS) { - ml::model::CAnomalyDetectorModelConfig modelConfig = ml::model::CAnomalyDetectorModelConfig::defaultConfig(3600); + ml::model::CAnomalyDetectorModelConfig modelConfig = + ml::model::CAnomalyDetectorModelConfig::defaultConfig(3600); ml::api::CCsvOutputWriter outputWriter; ml::api::CResultNormalizer normalizer(modelConfig, outputWriter); - CPPUNIT_ASSERT(normalizer.initNormalizer("testfiles/state/" + version.s_Version + "/normalizer_state.json")); + CPPUNIT_ASSERT(normalizer.initNormalizer( + "testfiles/state/" + version.s_Version + "/normalizer_state.json")); } } @@ -139,7 +144,8 @@ void CRestorePreviousStateTest::testRestoreCategorizer() { } } -void CRestorePreviousStateTest::categorizerRestoreHelper(const std::string& stateFile, bool isSymmetric) { +void CRestorePreviousStateTest::categorizerRestoreHelper(const std::string& stateFile, + bool isSymmetric) { ml::model::CLimits limits; ml::api::CFieldConfig config("count", "mlcategory"); @@ -150,7 +156,8 @@ void CRestorePreviousStateTest::categorizerRestoreHelper(const std::string& stat std::ifstream inputStrm(stateFile.c_str()); CPPUNIT_ASSERT(inputStrm.is_open()); - std::string origPersistedState(std::istreambuf_iterator{inputStrm}, std::istreambuf_iterator{}); + std::string origPersistedState(std::istreambuf_iterator{inputStrm}, + std::istreambuf_iterator{}); { ml::core_t::TTime completeToTime(0); @@ -176,7 +183,8 @@ void CRestorePreviousStateTest::categorizerRestoreHelper(const std::string& stat CPPUNIT_ASSERT(restoredTyper.persistState(persister)); newPersistedState = strm->str(); } - CPPUNIT_ASSERT_EQUAL(this->stripDocIds(origPersistedState), this->stripDocIds(newPersistedState)); + CPPUNIT_ASSERT_EQUAL(this->stripDocIds(origPersistedState), + this->stripDocIds(newPersistedState)); } } @@ -187,7 +195,8 @@ void CRestorePreviousStateTest::anomalyDetectorRestoreHelper(const std::string& // Open the input state file std::ifstream inputStrm(stateFile.c_str()); CPPUNIT_ASSERT(inputStrm.is_open()); - std::string origPersistedState(std::istreambuf_iterator{inputStrm}, std::istreambuf_iterator{}); + std::string origPersistedState(std::istreambuf_iterator{inputStrm}, + std::istreambuf_iterator{}); // Start by creating a detector with non-trivial state static const ml::core_t::TTime BUCKET_SIZE(3600); @@ -197,8 +206,10 @@ void CRestorePreviousStateTest::anomalyDetectorRestoreHelper(const std::string& ml::api::CFieldConfig fieldConfig; CPPUNIT_ASSERT(fieldConfig.initFromFile(configFileName)); - ml::model::CAnomalyDetectorModelConfig modelConfig = ml::model::CAnomalyDetectorModelConfig::defaultConfig( - BUCKET_SIZE, ml::model_t::E_None, "", BUCKET_SIZE * latencyBuckets, 0, false, ""); + ml::model::CAnomalyDetectorModelConfig modelConfig = + ml::model::CAnomalyDetectorModelConfig::defaultConfig( + BUCKET_SIZE, ml::model_t::E_None, "", BUCKET_SIZE * latencyBuckets, + 0, false, ""); std::ofstream outputStrm(ml::core::COsFileFuncs::NULL_FILENAME); CPPUNIT_ASSERT(outputStrm.is_open()); @@ -207,12 +218,10 @@ void CRestorePreviousStateTest::anomalyDetectorRestoreHelper(const std::string& std::string restoredSnapshotId; std::size_t numRestoredDocs(0); - ml::api::CAnomalyJob restoredJob(JOB_ID, - limits, - fieldConfig, - modelConfig, - wrappedOutputStream, - boost::bind(&reportPersistComplete, _1, boost::ref(restoredSnapshotId), boost::ref(numRestoredDocs))); + ml::api::CAnomalyJob restoredJob( + JOB_ID, limits, fieldConfig, modelConfig, wrappedOutputStream, + boost::bind(&reportPersistComplete, _1, boost::ref(restoredSnapshotId), + boost::ref(numRestoredDocs))); std::size_t numDocsInStateFile(0); { @@ -226,7 +235,8 @@ void CRestorePreviousStateTest::anomalyDetectorRestoreHelper(const std::string& in << origPersistedState; in.flush(); - numDocsInStateFile = in.component(0)->getDocCount(); + numDocsInStateFile = + in.component(0)->getDocCount(); ml::api::CSingleStreamSearcher retriever(strm); CPPUNIT_ASSERT(restoredJob.restoreState(retriever, completeToTime)); @@ -246,7 +256,8 @@ void CRestorePreviousStateTest::anomalyDetectorRestoreHelper(const std::string& } CPPUNIT_ASSERT_EQUAL(numRestoredDocs, numDocsInStateFile); - CPPUNIT_ASSERT_EQUAL(this->stripDocIds(origPersistedState), this->stripDocIds(newPersistedState)); + CPPUNIT_ASSERT_EQUAL(this->stripDocIds(origPersistedState), + this->stripDocIds(newPersistedState)); } } diff --git a/lib/api/unittest/CRestorePreviousStateTest.h b/lib/api/unittest/CRestorePreviousStateTest.h index 5dd11379d7..e68ebe468f 100644 --- a/lib/api/unittest/CRestorePreviousStateTest.h +++ b/lib/api/unittest/CRestorePreviousStateTest.h @@ -21,8 +21,10 @@ class CRestorePreviousStateTest : public CppUnit::TestFixture { static CppUnit::Test* suite(); private: - void - anomalyDetectorRestoreHelper(const std::string& stateFile, const std::string& configFileName, bool isSymmetric, int latencyBuckets); + void anomalyDetectorRestoreHelper(const std::string& stateFile, + const std::string& configFileName, + bool isSymmetric, + int latencyBuckets); void categorizerRestoreHelper(const std::string& stateFile, bool isSymmetric); diff --git a/lib/api/unittest/CResultNormalizerTest.cc b/lib/api/unittest/CResultNormalizerTest.cc index aa29f63341..108c4a327a 100644 --- a/lib/api/unittest/CResultNormalizerTest.cc +++ b/lib/api/unittest/CResultNormalizerTest.cc @@ -24,14 +24,15 @@ CppUnit::Test* CResultNormalizerTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CResultNormalizerTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CResultNormalizerTest::testInitNormalizer", - &CResultNormalizerTest::testInitNormalizer)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CResultNormalizerTest::testInitNormalizer", &CResultNormalizerTest::testInitNormalizer)); return suiteOfTests; } void CResultNormalizerTest::testInitNormalizer() { - ml::model::CAnomalyDetectorModelConfig modelConfig = ml::model::CAnomalyDetectorModelConfig::defaultConfig(3600); + ml::model::CAnomalyDetectorModelConfig modelConfig = + ml::model::CAnomalyDetectorModelConfig::defaultConfig(3600); ml::api::CLineifiedJsonOutputWriter outputWriter; @@ -41,7 +42,8 @@ void CResultNormalizerTest::testInitNormalizer() { std::ifstream inputStrm("testfiles/normalizerInput.csv"); ml::api::CCsvInputParser inputParser(inputStrm, ml::api::CCsvInputParser::COMMA); - CPPUNIT_ASSERT(inputParser.readStream(boost::bind(&ml::api::CResultNormalizer::handleRecord, &normalizer, _1))); + CPPUNIT_ASSERT(inputParser.readStream( + boost::bind(&ml::api::CResultNormalizer::handleRecord, &normalizer, _1))); std::string results(outputWriter.internalString()); LOG_DEBUG(<< "Results:\n" << results); @@ -55,89 +57,138 @@ void CResultNormalizerTest::testInitNormalizer() { resultDocs.back().Parse(docString.c_str()); } - CPPUNIT_ASSERT_EQUAL(std::vector::size_type{38}, resultDocs.size()); + CPPUNIT_ASSERT_EQUAL(std::vector::size_type{38}, + resultDocs.size()); // The maximum bucketTime influencer probability in the Savvis data used to initialise // the normaliser is 2.56098e-205, so this should map to the highest normalised // score which is 98.28496 { const rapidjson::Document& doc = resultDocs[0]; - CPPUNIT_ASSERT_EQUAL(std::string(""), std::string(doc["value_field_name"].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string(""), std::string(doc["function_name"].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string("2.56098e-205"), std::string(doc["probability"].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string("bucketTime"), std::string(doc["person_field_name"].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string(""), std::string(doc["partition_field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string(""), + std::string(doc["value_field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string(""), + std::string(doc["function_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("2.56098e-205"), + std::string(doc["probability"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("bucketTime"), + std::string(doc["person_field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string(""), + std::string(doc["partition_field_name"].GetString())); CPPUNIT_ASSERT_EQUAL(std::string("root"), std::string(doc["level"].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string("98.28496"), std::string(doc["normalized_score"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("98.28496"), + std::string(doc["normalized_score"].GetString())); } { const rapidjson::Document& doc = resultDocs[1]; - CPPUNIT_ASSERT_EQUAL(std::string(""), std::string(doc["value_field_name"].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string(""), std::string(doc["function_name"].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string("2.93761e-203"), std::string(doc["probability"].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string("status"), std::string(doc["person_field_name"].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string(""), std::string(doc["partition_field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string(""), + std::string(doc["value_field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string(""), + std::string(doc["function_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("2.93761e-203"), + std::string(doc["probability"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("status"), + std::string(doc["person_field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string(""), + std::string(doc["partition_field_name"].GetString())); CPPUNIT_ASSERT_EQUAL(std::string("inflb"), std::string(doc["level"].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string("97.26764"), std::string(doc["normalized_score"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("97.26764"), + std::string(doc["normalized_score"].GetString())); } { const rapidjson::Document& doc = resultDocs[2]; - CPPUNIT_ASSERT_EQUAL(std::string(""), std::string(doc["value_field_name"].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string(""), std::string(doc["function_name"].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string("5.56572e-204"), std::string(doc["probability"].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string("status"), std::string(doc["person_field_name"].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string(""), std::string(doc["partition_field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string(""), + std::string(doc["value_field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string(""), + std::string(doc["function_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("5.56572e-204"), + std::string(doc["probability"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("status"), + std::string(doc["person_field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string(""), + std::string(doc["partition_field_name"].GetString())); CPPUNIT_ASSERT_EQUAL(std::string("infl"), std::string(doc["level"].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string("98.56057"), std::string(doc["normalized_score"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("98.56057"), + std::string(doc["normalized_score"].GetString())); } { const rapidjson::Document& doc = resultDocs[4]; - CPPUNIT_ASSERT_EQUAL(std::string(""), std::string(doc["value_field_name"].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string("count"), std::string(doc["function_name"].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string("1e-300"), std::string(doc["probability"].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string("status"), std::string(doc["person_field_name"].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string(""), std::string(doc["partition_field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string(""), + std::string(doc["value_field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("count"), + std::string(doc["function_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("1e-300"), + std::string(doc["probability"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("status"), + std::string(doc["person_field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string(""), + std::string(doc["partition_field_name"].GetString())); CPPUNIT_ASSERT_EQUAL(std::string("leaf"), std::string(doc["level"].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string("99.19481"), std::string(doc["normalized_score"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("99.19481"), + std::string(doc["normalized_score"].GetString())); } { const rapidjson::Document& doc = resultDocs[15]; - CPPUNIT_ASSERT_EQUAL(std::string(""), std::string(doc["value_field_name"].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string(""), std::string(doc["function_name"].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string("1e-10"), std::string(doc["probability"].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string("bucketTime"), std::string(doc["person_field_name"].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string(""), std::string(doc["partition_field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string(""), + std::string(doc["value_field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string(""), + std::string(doc["function_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("1e-10"), + std::string(doc["probability"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("bucketTime"), + std::string(doc["person_field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string(""), + std::string(doc["partition_field_name"].GetString())); CPPUNIT_ASSERT_EQUAL(std::string("root"), std::string(doc["level"].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string("31.20283"), std::string(doc["normalized_score"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("31.20283"), + std::string(doc["normalized_score"].GetString())); } { const rapidjson::Document& doc = resultDocs[35]; - CPPUNIT_ASSERT_EQUAL(std::string(""), std::string(doc["value_field_name"].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string(""), std::string(doc["function_name"].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string("1"), std::string(doc["probability"].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string("bucketTime"), std::string(doc["person_field_name"].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string(""), std::string(doc["partition_field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string(""), + std::string(doc["value_field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string(""), + std::string(doc["function_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("1"), + std::string(doc["probability"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("bucketTime"), + std::string(doc["person_field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string(""), + std::string(doc["partition_field_name"].GetString())); CPPUNIT_ASSERT_EQUAL(std::string("root"), std::string(doc["level"].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string("0"), std::string(doc["normalized_score"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("0"), + std::string(doc["normalized_score"].GetString())); } { const rapidjson::Document& doc = resultDocs[36]; - CPPUNIT_ASSERT_EQUAL(std::string(""), std::string(doc["value_field_name"].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string(""), std::string(doc["function_name"].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string("1"), std::string(doc["probability"].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string("status"), std::string(doc["person_field_name"].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string(""), std::string(doc["partition_field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string(""), + std::string(doc["value_field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string(""), + std::string(doc["function_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("1"), + std::string(doc["probability"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("status"), + std::string(doc["person_field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string(""), + std::string(doc["partition_field_name"].GetString())); CPPUNIT_ASSERT_EQUAL(std::string("infl"), std::string(doc["level"].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string("0"), std::string(doc["normalized_score"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("0"), + std::string(doc["normalized_score"].GetString())); } { const rapidjson::Document& doc = resultDocs[37]; - CPPUNIT_ASSERT_EQUAL(std::string(""), std::string(doc["value_field_name"].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string("count"), std::string(doc["function_name"].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string("1"), std::string(doc["probability"].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string("status"), std::string(doc["person_field_name"].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string(""), std::string(doc["partition_field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string(""), + std::string(doc["value_field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("count"), + std::string(doc["function_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("1"), + std::string(doc["probability"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("status"), + std::string(doc["person_field_name"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string(""), + std::string(doc["partition_field_name"].GetString())); CPPUNIT_ASSERT_EQUAL(std::string("leaf"), std::string(doc["level"].GetString())); - CPPUNIT_ASSERT_EQUAL(std::string("0"), std::string(doc["normalized_score"].GetString())); + CPPUNIT_ASSERT_EQUAL(std::string("0"), + std::string(doc["normalized_score"].GetString())); } } diff --git a/lib/api/unittest/CSingleStreamDataAdderTest.cc b/lib/api/unittest/CSingleStreamDataAdderTest.cc index 328ce1281f..49d718a17c 100644 --- a/lib/api/unittest/CSingleStreamDataAdderTest.cc +++ b/lib/api/unittest/CSingleStreamDataAdderTest.cc @@ -49,43 +49,55 @@ void reportPersistComplete(ml::api::CModelSnapshotJsonWriter::SModelSnapshotRepo CppUnit::Test* CSingleStreamDataAdderTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CSingleStreamDataAdderTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CSingleStreamDataAdderTest::testDetectorPersistBy", - &CSingleStreamDataAdderTest::testDetectorPersistBy)); - suiteOfTests->addTest(new CppUnit::TestCaller("CSingleStreamDataAdderTest::testDetectorPersistOver", - &CSingleStreamDataAdderTest::testDetectorPersistOver)); - suiteOfTests->addTest(new CppUnit::TestCaller("CSingleStreamDataAdderTest::testDetectorPersistPartition", - &CSingleStreamDataAdderTest::testDetectorPersistPartition)); - suiteOfTests->addTest(new CppUnit::TestCaller("CSingleStreamDataAdderTest::testDetectorPersistDc", - &CSingleStreamDataAdderTest::testDetectorPersistDc)); - suiteOfTests->addTest(new CppUnit::TestCaller("CSingleStreamDataAdderTest::testDetectorPersistCount", - &CSingleStreamDataAdderTest::testDetectorPersistCount)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CSingleStreamDataAdderTest::testDetectorPersistCategorization", &CSingleStreamDataAdderTest::testDetectorPersistCategorization)); + "CSingleStreamDataAdderTest::testDetectorPersistBy", + &CSingleStreamDataAdderTest::testDetectorPersistBy)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSingleStreamDataAdderTest::testDetectorPersistOver", + &CSingleStreamDataAdderTest::testDetectorPersistOver)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSingleStreamDataAdderTest::testDetectorPersistPartition", + &CSingleStreamDataAdderTest::testDetectorPersistPartition)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSingleStreamDataAdderTest::testDetectorPersistDc", + &CSingleStreamDataAdderTest::testDetectorPersistDc)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSingleStreamDataAdderTest::testDetectorPersistCount", + &CSingleStreamDataAdderTest::testDetectorPersistCount)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSingleStreamDataAdderTest::testDetectorPersistCategorization", + &CSingleStreamDataAdderTest::testDetectorPersistCategorization)); return suiteOfTests; } void CSingleStreamDataAdderTest::testDetectorPersistBy() { - this->detectorPersistHelper("testfiles/new_mlfields.conf", "testfiles/big_ascending.txt", 0, "%d/%b/%Y:%T %z"); + this->detectorPersistHelper("testfiles/new_mlfields.conf", + "testfiles/big_ascending.txt", 0, "%d/%b/%Y:%T %z"); } void CSingleStreamDataAdderTest::testDetectorPersistOver() { - this->detectorPersistHelper("testfiles/new_mlfields_over.conf", "testfiles/big_ascending.txt", 0, "%d/%b/%Y:%T %z"); + this->detectorPersistHelper("testfiles/new_mlfields_over.conf", + "testfiles/big_ascending.txt", 0, "%d/%b/%Y:%T %z"); } void CSingleStreamDataAdderTest::testDetectorPersistPartition() { - this->detectorPersistHelper("testfiles/new_mlfields_partition.conf", "testfiles/big_ascending.txt", 0, "%d/%b/%Y:%T %z"); + this->detectorPersistHelper("testfiles/new_mlfields_partition.conf", + "testfiles/big_ascending.txt", 0, "%d/%b/%Y:%T %z"); } void CSingleStreamDataAdderTest::testDetectorPersistDc() { - this->detectorPersistHelper("testfiles/new_persist_dc.conf", "testfiles/files_users_programs.csv", 5); + this->detectorPersistHelper("testfiles/new_persist_dc.conf", + "testfiles/files_users_programs.csv", 5); } void CSingleStreamDataAdderTest::testDetectorPersistCount() { - this->detectorPersistHelper("testfiles/new_persist_count.conf", "testfiles/files_users_programs.csv", 5); + this->detectorPersistHelper("testfiles/new_persist_count.conf", + "testfiles/files_users_programs.csv", 5); } void CSingleStreamDataAdderTest::testDetectorPersistCategorization() { - this->detectorPersistHelper("testfiles/new_persist_categorization.conf", "testfiles/time_messages.csv", 0); + this->detectorPersistHelper("testfiles/new_persist_categorization.conf", + "testfiles/time_messages.csv", 0); } void CSingleStreamDataAdderTest::detectorPersistHelper(const std::string& configFileName, @@ -107,24 +119,21 @@ void CSingleStreamDataAdderTest::detectorPersistHelper(const std::string& config ml::api::CFieldConfig fieldConfig; CPPUNIT_ASSERT(fieldConfig.initFromFile(configFileName)); - ml::model::CAnomalyDetectorModelConfig modelConfig = ml::model::CAnomalyDetectorModelConfig::defaultConfig( - BUCKET_SIZE, ml::model_t::E_None, "", BUCKET_SIZE * latencyBuckets, 0, false, ""); + ml::model::CAnomalyDetectorModelConfig modelConfig = + ml::model::CAnomalyDetectorModelConfig::defaultConfig( + BUCKET_SIZE, ml::model_t::E_None, "", BUCKET_SIZE * latencyBuckets, + 0, false, ""); ml::core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); ml::api::CJsonOutputWriter outputWriter(JOB_ID, wrappedOutputStream); std::string origSnapshotId; std::size_t numOrigDocs(0); - ml::api::CAnomalyJob origJob(JOB_ID, - limits, - fieldConfig, - modelConfig, - wrappedOutputStream, - boost::bind(&reportPersistComplete, _1, boost::ref(origSnapshotId), boost::ref(numOrigDocs)), - nullptr, - -1, - "time", - timeFormat); + ml::api::CAnomalyJob origJob( + JOB_ID, limits, fieldConfig, modelConfig, wrappedOutputStream, + boost::bind(&reportPersistComplete, _1, boost::ref(origSnapshotId), + boost::ref(numOrigDocs)), + nullptr, -1, "time", timeFormat); ml::api::CDataProcessor* firstProcessor(&origJob); @@ -147,7 +156,8 @@ void CSingleStreamDataAdderTest::detectorPersistHelper(const std::string& config parser.reset(new ml::api::CLineifiedJsonInputParser(inputStrm)); } - CPPUNIT_ASSERT(parser->readStream(boost::bind(&ml::api::CDataProcessor::handleRecord, firstProcessor, _1))); + CPPUNIT_ASSERT(parser->readStream( + boost::bind(&ml::api::CDataProcessor::handleRecord, firstProcessor, _1))); // Persist the detector state to a stringstream @@ -164,12 +174,10 @@ void CSingleStreamDataAdderTest::detectorPersistHelper(const std::string& config std::string restoredSnapshotId; std::size_t numRestoredDocs(0); - ml::api::CAnomalyJob restoredJob(JOB_ID, - limits, - fieldConfig, - modelConfig, - wrappedOutputStream, - boost::bind(&reportPersistComplete, _1, boost::ref(restoredSnapshotId), boost::ref(numRestoredDocs))); + ml::api::CAnomalyJob restoredJob( + JOB_ID, limits, fieldConfig, modelConfig, wrappedOutputStream, + boost::bind(&reportPersistComplete, _1, boost::ref(restoredSnapshotId), + boost::ref(numRestoredDocs))); ml::api::CDataProcessor* restoredFirstProcessor(&restoredJob); @@ -177,7 +185,8 @@ void CSingleStreamDataAdderTest::detectorPersistHelper(const std::string& config ml::api::COutputChainer restoredOutputChainer(restoredJob); // The typer knows how to assign categories to records - ml::api::CFieldDataTyper restoredTyper(JOB_ID, fieldConfig, limits, restoredOutputChainer, outputWriter); + ml::api::CFieldDataTyper restoredTyper(JOB_ID, fieldConfig, limits, + restoredOutputChainer, outputWriter); size_t numCategorizerDocs(0); @@ -199,7 +208,9 @@ void CSingleStreamDataAdderTest::detectorPersistHelper(const std::string& config CPPUNIT_ASSERT(restoredFirstProcessor->restoreState(retriever, completeToTime)); CPPUNIT_ASSERT(completeToTime > 0); - CPPUNIT_ASSERT_EQUAL(numOrigDocs + numCategorizerDocs, strm->component(0)->getDocCount()); + CPPUNIT_ASSERT_EQUAL( + numOrigDocs + numCategorizerDocs, + strm->component(0)->getDocCount()); } // Finally, persist the new detector state and compare the result @@ -216,8 +227,10 @@ void CSingleStreamDataAdderTest::detectorPersistHelper(const std::string& config // The snapshot ID can be different between the two persists, so replace the // first occurrence of it (which is in the bulk metadata) - CPPUNIT_ASSERT_EQUAL(size_t(1), ml::core::CStringUtils::replaceFirst(origSnapshotId, "snap", origPersistedState)); - CPPUNIT_ASSERT_EQUAL(size_t(1), ml::core::CStringUtils::replaceFirst(restoredSnapshotId, "snap", newPersistedState)); + CPPUNIT_ASSERT_EQUAL(size_t(1), ml::core::CStringUtils::replaceFirst( + origSnapshotId, "snap", origPersistedState)); + CPPUNIT_ASSERT_EQUAL(size_t(1), ml::core::CStringUtils::replaceFirst( + restoredSnapshotId, "snap", newPersistedState)); CPPUNIT_ASSERT_EQUAL(origPersistedState, newPersistedState); } diff --git a/lib/api/unittest/CStateRestoreStreamFilterTest.cc b/lib/api/unittest/CStateRestoreStreamFilterTest.cc index 6d4632cfb1..9b42501ef6 100644 --- a/lib/api/unittest/CStateRestoreStreamFilterTest.cc +++ b/lib/api/unittest/CStateRestoreStreamFilterTest.cc @@ -17,10 +17,11 @@ CppUnit::Test* CStateRestoreStreamFilterTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CRestoreStreamFilterTest"); suiteOfTests->addTest(new CppUnit::TestCaller( - "CRestoreStreamFilterTest::testBulkIndexHeaderRemoval", &CStateRestoreStreamFilterTest::testBulkIndexHeaderRemoval)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CRestoreStreamFilterTest::testBulkIndexHeaderRemovalZerobyte", - &CStateRestoreStreamFilterTest::testBulkIndexHeaderRemovalZerobyte)); + "CRestoreStreamFilterTest::testBulkIndexHeaderRemoval", + &CStateRestoreStreamFilterTest::testBulkIndexHeaderRemoval)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRestoreStreamFilterTest::testBulkIndexHeaderRemovalZerobyte", + &CStateRestoreStreamFilterTest::testBulkIndexHeaderRemovalZerobyte)); return suiteOfTests; } @@ -32,7 +33,8 @@ void CStateRestoreStreamFilterTest::testBulkIndexHeaderRemoval() { boost::iostreams::filtering_istream in; in.push(ml::api::CStateRestoreStreamFilter()); in.push(input); - std::string output(std::istreambuf_iterator{in}, std::istreambuf_iterator{}); + std::string output(std::istreambuf_iterator{in}, + std::istreambuf_iterator{}); std::string expected("{\"_id\":\"some_id\",\"_version\":1,\"found\":true,\"_source\":" "{\"compressed\" : [ \"a\",\"b\"]}}"); @@ -58,7 +60,8 @@ void CStateRestoreStreamFilterTest::testBulkIndexHeaderRemovalZerobyte() { boost::iostreams::filtering_istream in; in.push(ml::api::CStateRestoreStreamFilter()); in.push(input); - std::string output(std::istreambuf_iterator{in}, std::istreambuf_iterator{}); + std::string output(std::istreambuf_iterator{in}, + std::istreambuf_iterator{}); std::string expected("{\"_id\":\"some_id\",\"_version\":1,\"found\":true,\"_source\":" "{\"compressed\" : [ \"a\",\"b\"]}}"); diff --git a/lib/api/unittest/CStringStoreTest.cc b/lib/api/unittest/CStringStoreTest.cc index 22a2ba36f6..35217b29d9 100644 --- a/lib/api/unittest/CStringStoreTest.cc +++ b/lib/api/unittest/CStringStoreTest.cc @@ -44,9 +44,15 @@ size_t countBuckets(const std::string& key, const std::string& output) { return count; } -core_t::TTime -playData(core_t::TTime start, core_t::TTime span, int numBuckets, int numPeople, int numPartitions, int anomaly, api::CAnomalyJob& job) { - std::string people[] = {"Elgar", "Holst", "Delius", "Vaughan Williams", "Bliss", "Warlock", "Walton"}; +core_t::TTime playData(core_t::TTime start, + core_t::TTime span, + int numBuckets, + int numPeople, + int numPartitions, + int anomaly, + api::CAnomalyJob& job) { + std::string people[] = {"Elgar", "Holst", "Delius", "Vaughan Williams", + "Bliss", "Warlock", "Walton"}; if (numPeople > 7) { LOG_ERROR(<< "Too many people: " << numPeople); return start; @@ -68,7 +74,8 @@ playData(core_t::TTime start, core_t::TTime span, int numBuckets, int numPeople, } } if (bucketNum == anomaly) { - ss << t << "," << 5564 << "," << people[numPeople - 1] << "," << partitions[numPartitions - 1] << "\n"; + ss << t << "," << 5564 << "," << people[numPeople - 1] << "," + << partitions[numPartitions - 1] << "\n"; } } @@ -86,18 +93,22 @@ struct SLookup { return hasher(key); } - bool operator()(const std::string& lhs, const core::CStoredStringPtr& rhs) const { return lhs == *rhs; } + bool operator()(const std::string& lhs, const core::CStoredStringPtr& rhs) const { + return lhs == *rhs; + } }; } // namespace bool CStringStoreTest::nameExists(const std::string& string) { - model::CStringStore::TStoredStringPtrUSet names = model::CStringStore::names().m_Strings; + model::CStringStore::TStoredStringPtrUSet names = + model::CStringStore::names().m_Strings; return names.find(string, ::SLookup(), ::SLookup()) != names.end(); } bool CStringStoreTest::influencerExists(const std::string& string) { - model::CStringStore::TStoredStringPtrUSet names = model::CStringStore::influencers().m_Strings; + model::CStringStore::TStoredStringPtrUSet names = + model::CStringStore::influencers().m_Strings; return names.find(string, ::SLookup(), ::SLookup()) != names.end(); } @@ -114,7 +125,8 @@ void CStringStoreTest::testPersonStringPruning() { CPPUNIT_ASSERT(fieldConfig.initFromClause(clause)); - model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SPAN); + model::CAnomalyDetectorModelConfig modelConfig = + model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SPAN); modelConfig.decayRate(0.001); modelConfig.bucketResultsDelay(2); @@ -129,8 +141,10 @@ void CStringStoreTest::testPersonStringPruning() { model::CStringStore::influencers().clearEverythingTestOnly(); model::CStringStore::names().clearEverythingTestOnly(); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), model::CStringStore::influencers().m_Strings.size()); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), model::CStringStore::names().m_Strings.size()); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + model::CStringStore::influencers().m_Strings.size()); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + model::CStringStore::names().m_Strings.size()); LOG_TRACE(<< "Setting up job"); @@ -145,10 +159,12 @@ void CStringStoreTest::testPersonStringPruning() { time = playData(time, BUCKET_SPAN, 100, 3, 2, 99, job); wrappedOutputStream.syncFlush(); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), countBuckets("records", outputStrm.str() + "]")); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + countBuckets("records", outputStrm.str() + "]")); // No influencers in this configuration - CPPUNIT_ASSERT_EQUAL(std::size_t(0), model::CStringStore::influencers().m_Strings.size()); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + model::CStringStore::influencers().m_Strings.size()); // "", "count", "max", "notes", "composer", "instrument", "Elgar", "Holst", "Delius", "flute", "tuba" CPPUNIT_ASSERT(this->nameExists("count")); @@ -168,7 +184,8 @@ void CStringStoreTest::testPersonStringPruning() { CPPUNIT_ASSERT(job.persistState(adder)); wrappedOutputStream.syncFlush(); - CPPUNIT_ASSERT_EQUAL(std::size_t(1), countBuckets("records", outputStrm.str() + "]")); + CPPUNIT_ASSERT_EQUAL(std::size_t(1), + countBuckets("records", outputStrm.str() + "]")); } LOG_DEBUG(<< "Restoring job"); @@ -176,19 +193,23 @@ void CStringStoreTest::testPersonStringPruning() { model::CStringStore::influencers().clearEverythingTestOnly(); model::CStringStore::names().clearEverythingTestOnly(); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), model::CStringStore::influencers().m_Strings.size()); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), model::CStringStore::names().m_Strings.size()); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + model::CStringStore::influencers().m_Strings.size()); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + model::CStringStore::names().m_Strings.size()); std::ostringstream outputStrm; ml::core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream, api::CAnomalyJob::TPersistCompleteFunc()); + api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream, + api::CAnomalyJob::TPersistCompleteFunc()); core_t::TTime completeToTime(0); CPPUNIT_ASSERT(job.restoreState(searcher, completeToTime)); adder.clear(); // No influencers in this configuration - CPPUNIT_ASSERT_EQUAL(std::size_t(0), model::CStringStore::influencers().m_Strings.size()); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + model::CStringStore::influencers().m_Strings.size()); // "", "count", "max", "notes", "composer", "instrument", "Elgar", "Holst", "Delius", "flute", "tuba" CPPUNIT_ASSERT(this->nameExists("count")); @@ -214,19 +235,23 @@ void CStringStoreTest::testPersonStringPruning() { model::CStringStore::influencers().clearEverythingTestOnly(); model::CStringStore::names().clearEverythingTestOnly(); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), model::CStringStore::influencers().m_Strings.size()); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), model::CStringStore::names().m_Strings.size()); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + model::CStringStore::influencers().m_Strings.size()); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + model::CStringStore::names().m_Strings.size()); std::ostringstream outputStrm; ml::core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream, api::CAnomalyJob::TPersistCompleteFunc()); + api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream, + api::CAnomalyJob::TPersistCompleteFunc()); core_t::TTime completeToTime(0); CPPUNIT_ASSERT(job.restoreState(searcher, completeToTime)); adder.clear(); // No influencers in this configuration - CPPUNIT_ASSERT_EQUAL(std::size_t(0), model::CStringStore::influencers().m_Strings.size()); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + model::CStringStore::influencers().m_Strings.size()); // While the 3 composers from the second partition should have been culled in the prune, // their names still exist in the first partition, so will still be in the string store @@ -253,19 +278,23 @@ void CStringStoreTest::testPersonStringPruning() { model::CStringStore::influencers().clearEverythingTestOnly(); model::CStringStore::names().clearEverythingTestOnly(); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), model::CStringStore::influencers().m_Strings.size()); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), model::CStringStore::names().m_Strings.size()); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + model::CStringStore::influencers().m_Strings.size()); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + model::CStringStore::names().m_Strings.size()); std::ostringstream outputStrm; ml::core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream, api::CAnomalyJob::TPersistCompleteFunc()); + api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream, + api::CAnomalyJob::TPersistCompleteFunc()); core_t::TTime completeToTime(0); CPPUNIT_ASSERT(job.restoreState(searcher, completeToTime)); adder.clear(); // No influencers in this configuration - CPPUNIT_ASSERT_EQUAL(std::size_t(0), model::CStringStore::influencers().m_Strings.size()); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + model::CStringStore::influencers().m_Strings.size()); // One composer should have been culled! CPPUNIT_ASSERT(this->nameExists("count")); @@ -294,7 +323,8 @@ void CStringStoreTest::testAttributeStringPruning() { CPPUNIT_ASSERT(fieldConfig.initFromClause(clause)); - model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SPAN); + model::CAnomalyDetectorModelConfig modelConfig = + model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SPAN); modelConfig.decayRate(0.001); modelConfig.bucketResultsDelay(2); @@ -309,8 +339,10 @@ void CStringStoreTest::testAttributeStringPruning() { model::CStringStore::influencers().clearEverythingTestOnly(); model::CStringStore::names().clearEverythingTestOnly(); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), model::CStringStore::influencers().m_Strings.size()); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), model::CStringStore::names().m_Strings.size()); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + model::CStringStore::influencers().m_Strings.size()); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + model::CStringStore::names().m_Strings.size()); LOG_TRACE(<< "Setting up job"); std::ostringstream outputStrm; @@ -323,10 +355,12 @@ void CStringStoreTest::testAttributeStringPruning() { time = playData(time, BUCKET_SPAN, 100, 3, 2, 99, job); wrappedOutputStream.syncFlush(); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), countBuckets("records", outputStrm.str() + "]")); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + countBuckets("records", outputStrm.str() + "]")); // No influencers in this configuration - CPPUNIT_ASSERT_EQUAL(std::size_t(0), model::CStringStore::influencers().m_Strings.size()); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + model::CStringStore::influencers().m_Strings.size()); // "", "count", "distinct_count", "notes", "composer", "instrument", "Elgar", "Holst", "Delius", "flute", "tuba" LOG_DEBUG(<< core::CContainerPrinter::print(model::CStringStore::names().m_Strings)); @@ -346,27 +380,32 @@ void CStringStoreTest::testAttributeStringPruning() { CPPUNIT_ASSERT(job.persistState(adder)); wrappedOutputStream.syncFlush(); - CPPUNIT_ASSERT_EQUAL(std::size_t(1), countBuckets("records", outputStrm.str() + "]")); + CPPUNIT_ASSERT_EQUAL(std::size_t(1), + countBuckets("records", outputStrm.str() + "]")); } LOG_DEBUG(<< "Restoring job"); { model::CStringStore::influencers().clearEverythingTestOnly(); model::CStringStore::names().clearEverythingTestOnly(); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), model::CStringStore::influencers().m_Strings.size()); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), model::CStringStore::names().m_Strings.size()); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + model::CStringStore::influencers().m_Strings.size()); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + model::CStringStore::names().m_Strings.size()); std::ostringstream outputStrm; ml::core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream, api::CAnomalyJob::TPersistCompleteFunc()); + api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream, + api::CAnomalyJob::TPersistCompleteFunc()); core_t::TTime completeToTime(0); CPPUNIT_ASSERT(job.restoreState(searcher, completeToTime)); adder.clear(); // No influencers in this configuration - CPPUNIT_ASSERT_EQUAL(std::size_t(0), model::CStringStore::influencers().m_Strings.size()); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + model::CStringStore::influencers().m_Strings.size()); // "", "count", "distinct_count", "notes", "composer", "instrument", "Elgar", "Holst", "Delius", "flute", "tuba" CPPUNIT_ASSERT(this->nameExists("count")); @@ -392,20 +431,24 @@ void CStringStoreTest::testAttributeStringPruning() { model::CStringStore::influencers().clearEverythingTestOnly(); model::CStringStore::names().clearEverythingTestOnly(); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), model::CStringStore::influencers().m_Strings.size()); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), model::CStringStore::names().m_Strings.size()); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + model::CStringStore::influencers().m_Strings.size()); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + model::CStringStore::names().m_Strings.size()); std::ostringstream outputStrm; ml::core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream, api::CAnomalyJob::TPersistCompleteFunc()); + api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream, + api::CAnomalyJob::TPersistCompleteFunc()); core_t::TTime completeToTime(0); CPPUNIT_ASSERT(job.restoreState(searcher, completeToTime)); adder.clear(); // No influencers in this configuration - CPPUNIT_ASSERT_EQUAL(std::size_t(0), model::CStringStore::influencers().m_Strings.size()); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + model::CStringStore::influencers().m_Strings.size()); // While the 3 composers from the second partition should have been culled in the prune, // their names still exist in the first partition, so will still be in the string store @@ -432,20 +475,24 @@ void CStringStoreTest::testAttributeStringPruning() { model::CStringStore::influencers().clearEverythingTestOnly(); model::CStringStore::names().clearEverythingTestOnly(); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), model::CStringStore::influencers().m_Strings.size()); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), model::CStringStore::names().m_Strings.size()); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + model::CStringStore::influencers().m_Strings.size()); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + model::CStringStore::names().m_Strings.size()); std::ostringstream outputStrm; ml::core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream, api::CAnomalyJob::TPersistCompleteFunc()); + api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream, + api::CAnomalyJob::TPersistCompleteFunc()); core_t::TTime completeToTime(0); CPPUNIT_ASSERT(job.restoreState(searcher, completeToTime)); adder.clear(); // No influencers in this configuration - CPPUNIT_ASSERT_EQUAL(std::size_t(0), model::CStringStore::influencers().m_Strings.size()); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + model::CStringStore::influencers().m_Strings.size()); // One composer should have been culled! CPPUNIT_ASSERT(this->nameExists("count")); @@ -473,7 +520,8 @@ void CStringStoreTest::testInfluencerStringPruning() { CPPUNIT_ASSERT(fieldConfig.initFromClause(clause)); - model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SPAN); + model::CAnomalyDetectorModelConfig modelConfig = + model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SPAN); modelConfig.bucketResultsDelay(2); model::CLimits limits; @@ -487,8 +535,10 @@ void CStringStoreTest::testInfluencerStringPruning() { model::CStringStore::influencers().clearEverythingTestOnly(); model::CStringStore::names().clearEverythingTestOnly(); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), model::CStringStore::influencers().m_Strings.size()); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), model::CStringStore::names().m_Strings.size()); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + model::CStringStore::influencers().m_Strings.size()); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + model::CStringStore::names().m_Strings.size()); LOG_TRACE(<< "Setting up job"); std::ostringstream outputStrm; @@ -502,7 +552,8 @@ void CStringStoreTest::testInfluencerStringPruning() { time = playData(time, BUCKET_SPAN, 20, 7, 5, 99, job); LOG_TRACE(<< core::CContainerPrinter::print(model::CStringStore::names().m_Strings)); - LOG_TRACE(<< core::CContainerPrinter::print(model::CStringStore::influencers().m_Strings)); + LOG_TRACE(<< core::CContainerPrinter::print( + model::CStringStore::influencers().m_Strings)); CPPUNIT_ASSERT(this->influencerExists("Delius")); CPPUNIT_ASSERT(this->influencerExists("Walton")); @@ -586,11 +637,13 @@ void CStringStoreTest::testInfluencerStringPruning() { CppUnit::Test* CStringStoreTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CStringStoreTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CStringStoreTest::testPersonStringPruning", &CStringStoreTest::testPersonStringPruning)); - suiteOfTests->addTest(new CppUnit::TestCaller("CStringStoreTest::testAttributeStringPruning", - &CStringStoreTest::testAttributeStringPruning)); - suiteOfTests->addTest(new CppUnit::TestCaller("CStringStoreTest::testInfluencerStringPruning", - &CStringStoreTest::testInfluencerStringPruning)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CStringStoreTest::testPersonStringPruning", &CStringStoreTest::testPersonStringPruning)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CStringStoreTest::testAttributeStringPruning", + &CStringStoreTest::testAttributeStringPruning)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CStringStoreTest::testInfluencerStringPruning", + &CStringStoreTest::testInfluencerStringPruning)); return suiteOfTests; } diff --git a/lib/api/unittest/CTokenListDataTyperTest.cc b/lib/api/unittest/CTokenListDataTyperTest.cc index 9feff0ba38..b251e33695 100644 --- a/lib/api/unittest/CTokenListDataTyperTest.cc +++ b/lib/api/unittest/CTokenListDataTyperTest.cc @@ -17,16 +17,17 @@ namespace { -using TTokenListDataTyperKeepsFields = ml::api::CTokenListDataTyper; +using TTokenListDataTyperKeepsFields = + ml::api::CTokenListDataTyper; const TTokenListDataTyperKeepsFields::TTokenListReverseSearchCreatorIntfCPtr NO_REVERSE_SEARCH_CREATOR; } @@ -34,32 +35,34 @@ const TTokenListDataTyperKeepsFields::TTokenListReverseSearchCreatorIntfCPtr NO_ CppUnit::Test* CTokenListDataTyperTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CTokenListDataTyperTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CTokenListDataTyperTest::testHexData", &CTokenListDataTyperTest::testHexData)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CTokenListDataTyperTest::testRmdsData", &CTokenListDataTyperTest::testRmdsData)); - suiteOfTests->addTest(new CppUnit::TestCaller("CTokenListDataTyperTest::testProxyData", - &CTokenListDataTyperTest::testProxyData)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CTokenListDataTyperTest::testFxData", &CTokenListDataTyperTest::testFxData)); - suiteOfTests->addTest(new CppUnit::TestCaller("CTokenListDataTyperTest::testApacheData", - &CTokenListDataTyperTest::testApacheData)); - suiteOfTests->addTest(new CppUnit::TestCaller("CTokenListDataTyperTest::testBrokerageData", - &CTokenListDataTyperTest::testBrokerageData)); - suiteOfTests->addTest(new CppUnit::TestCaller("CTokenListDataTyperTest::testVmwareData", - &CTokenListDataTyperTest::testVmwareData)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CTokenListDataTyperTest::testBankData", &CTokenListDataTyperTest::testBankData)); - suiteOfTests->addTest(new CppUnit::TestCaller("CTokenListDataTyperTest::testJavaGcData", - &CTokenListDataTyperTest::testJavaGcData)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CTokenListDataTyperTest::testPersist", &CTokenListDataTyperTest::testPersist)); - suiteOfTests->addTest(new CppUnit::TestCaller("CTokenListDataTyperTest::testLongReverseSearch", - &CTokenListDataTyperTest::testLongReverseSearch)); - suiteOfTests->addTest(new CppUnit::TestCaller("CTokenListDataTyperTest::testPreTokenised", - &CTokenListDataTyperTest::testPreTokenised)); - suiteOfTests->addTest(new CppUnit::TestCaller("CTokenListDataTyperTest::testPreTokenisedPerformance", - &CTokenListDataTyperTest::testPreTokenisedPerformance)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTokenListDataTyperTest::testHexData", &CTokenListDataTyperTest::testHexData)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTokenListDataTyperTest::testRmdsData", &CTokenListDataTyperTest::testRmdsData)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTokenListDataTyperTest::testProxyData", &CTokenListDataTyperTest::testProxyData)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTokenListDataTyperTest::testFxData", &CTokenListDataTyperTest::testFxData)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTokenListDataTyperTest::testApacheData", &CTokenListDataTyperTest::testApacheData)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTokenListDataTyperTest::testBrokerageData", &CTokenListDataTyperTest::testBrokerageData)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTokenListDataTyperTest::testVmwareData", &CTokenListDataTyperTest::testVmwareData)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTokenListDataTyperTest::testBankData", &CTokenListDataTyperTest::testBankData)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTokenListDataTyperTest::testJavaGcData", &CTokenListDataTyperTest::testJavaGcData)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTokenListDataTyperTest::testPersist", &CTokenListDataTyperTest::testPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTokenListDataTyperTest::testLongReverseSearch", + &CTokenListDataTyperTest::testLongReverseSearch)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTokenListDataTyperTest::testPreTokenised", &CTokenListDataTyperTest::testPreTokenised)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTokenListDataTyperTest::testPreTokenisedPerformance", + &CTokenListDataTyperTest::testPreTokenisedPerformance)); return suiteOfTests; } @@ -87,227 +90,213 @@ void CTokenListDataTyperTest::testHexData() { void CTokenListDataTyperTest::testRmdsData() { TTokenListDataTyperKeepsFields typer(NO_REVERSE_SEARCH_CREATOR, 0.7, "whatever"); - CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, " Source ML_SERVICE2 on 13122:867 has shut down.", 500)); - CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, " Source MONEYBROKER on 13112:736 has shut down.", 500)); - CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, " Source CUBE_LIQUID on 13188:2010 has shut down.", 500)); - CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, " Source ML SERVICE2 on 13122:867 has shut down.", 500)); - CPPUNIT_ASSERT_EQUAL(2, typer.computeType(false, " Source MONEYBROKER on 13112:736 has started.", 500)); - CPPUNIT_ASSERT_EQUAL(2, typer.computeType(false, " Source ML_SERVICE2 on 13122:867 has started.", 500)); - CPPUNIT_ASSERT_EQUAL(3, typer.computeType(false, " Service CUBE_CHIX, id of 132, has started.", 500)); - CPPUNIT_ASSERT_EQUAL(3, typer.computeType(false, " Service CUBE_IDEM, id of 232, has started.", 500)); - CPPUNIT_ASSERT_EQUAL(3, typer.computeType(false, " Service CUBE_IDEM, id of 232, has started.", 500)); - CPPUNIT_ASSERT_EQUAL(4, typer.computeType(false, " Service CUBE_CHIX has shut down.", 500)); + CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, " Source ML_SERVICE2 on 13122:867 has shut down.", + 500)); + CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, " Source MONEYBROKER on 13112:736 has shut down.", + 500)); + CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, " Source CUBE_LIQUID on 13188:2010 has shut down.", + 500)); + CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, " Source ML SERVICE2 on 13122:867 has shut down.", + 500)); + CPPUNIT_ASSERT_EQUAL(2, typer.computeType(false, " Source MONEYBROKER on 13112:736 has started.", + 500)); + CPPUNIT_ASSERT_EQUAL(2, typer.computeType(false, " Source ML_SERVICE2 on 13122:867 has started.", + 500)); + CPPUNIT_ASSERT_EQUAL(3, typer.computeType(false, " Service CUBE_CHIX, id of 132, has started.", + 500)); + CPPUNIT_ASSERT_EQUAL(3, typer.computeType(false, " Service CUBE_IDEM, id of 232, has started.", + 500)); + CPPUNIT_ASSERT_EQUAL(3, typer.computeType(false, " Service CUBE_IDEM, id of 232, has started.", + 500)); + CPPUNIT_ASSERT_EQUAL(4, typer.computeType(false, " Service CUBE_CHIX has shut down.", + 500)); } void CTokenListDataTyperTest::testProxyData() { TTokenListDataTyperKeepsFields typer(NO_REVERSE_SEARCH_CREATOR, 0.7, "whatever"); - CPPUNIT_ASSERT_EQUAL(1, - typer.computeType(false, - " [1094662464] INFO transaction <3c26701d3140-kn8n1c8f5d2o> - Transaction TID: " - "z9hG4bKy6aEy6aEy6aEaUgi!UmU-Ma.9-6bf50ea0192.168.251.8SUBSCRIBE deleted", - 500)); - CPPUNIT_ASSERT_EQUAL(1, - typer.computeType(false, - " [1091504448] INFO transaction <3c26701ad775-1cref2zy3w9e> - Transaction TID: " - "z9hG4bK_UQA_UQA_UQAsO0i!OG!yYK.25-5bee09e0192.168.251.8SUBSCRIBE deleted", - 500)); - CPPUNIT_ASSERT_EQUAL(2, - typer.computeType(false, - " [1094662464] INFO transactionuser <6508700927200972648@10.10.18.82> - ---------------- " - "DESTROYING RegistrationServer ---------------", - 500)); - CPPUNIT_ASSERT_EQUAL( - 3, - typer.computeType( - false, - " [1111529792] INFO proxy <45409105041220090733@192.168.251.123> - +++++++++++++++ CREATING ProxyCore ++++++++++++++++", - 500)); - CPPUNIT_ASSERT_EQUAL( - 4, - typer.computeType( - false, - " [1091504448] INFO transactionuser <3c26709ab9f0-iih26eh8pxxa> - +++++++++++++++ CREATING PresenceAgent ++++++++++++++++", - 500)); - CPPUNIT_ASSERT_EQUAL(5, - typer.computeType(false, - " [1111529792] INFO session <45409105041220090733@192.168.251.123> - ----------------- PROXY " - "Session DESTROYED --------------------", - 500)); - CPPUNIT_ASSERT_EQUAL(5, - typer.computeType(false, - " [1094662464] INFO session - ----------------- " - "PROXY Session DESTROYED --------------------", - 500)); + CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, + " [1094662464] INFO transaction <3c26701d3140-kn8n1c8f5d2o> - Transaction TID: " + "z9hG4bKy6aEy6aEy6aEaUgi!UmU-Ma.9-6bf50ea0192.168.251.8SUBSCRIBE deleted", + 500)); + CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, + " [1091504448] INFO transaction <3c26701ad775-1cref2zy3w9e> - Transaction TID: " + "z9hG4bK_UQA_UQA_UQAsO0i!OG!yYK.25-5bee09e0192.168.251.8SUBSCRIBE deleted", + 500)); + CPPUNIT_ASSERT_EQUAL(2, typer.computeType(false, + " [1094662464] INFO transactionuser <6508700927200972648@10.10.18.82> - ---------------- " + "DESTROYING RegistrationServer ---------------", + 500)); + CPPUNIT_ASSERT_EQUAL(3, typer.computeType(false, " [1111529792] INFO proxy <45409105041220090733@192.168.251.123> - +++++++++++++++ CREATING ProxyCore ++++++++++++++++", + 500)); + CPPUNIT_ASSERT_EQUAL(4, typer.computeType(false, " [1091504448] INFO transactionuser <3c26709ab9f0-iih26eh8pxxa> - +++++++++++++++ CREATING PresenceAgent ++++++++++++++++", + 500)); + CPPUNIT_ASSERT_EQUAL(5, typer.computeType(false, + " [1111529792] INFO session <45409105041220090733@192.168.251.123> - ----------------- PROXY " + "Session DESTROYED --------------------", + 500)); + CPPUNIT_ASSERT_EQUAL(5, typer.computeType(false, + " [1094662464] INFO session - ----------------- " + "PROXY Session DESTROYED --------------------", + 500)); } void CTokenListDataTyperTest::testFxData() { TTokenListDataTyperKeepsFields typer(NO_REVERSE_SEARCH_CREATOR, 0.7, "whatever"); - CPPUNIT_ASSERT_EQUAL( - 1, - typer.computeType(false, - "javax.ejb.FinderException - findFxCover([]): " - "null", - 500)); - CPPUNIT_ASSERT_EQUAL( - 1, - typer.computeType(false, - "javax.ejb.FinderException - findFxCover([]): " - "null", - 500)); + CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, + "javax.ejb.FinderException - findFxCover([]): " + "null", + 500)); + CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, + "javax.ejb.FinderException - findFxCover([]): " + "null", + 500)); } void CTokenListDataTyperTest::testApacheData() { TTokenListDataTyperKeepsFields typer(NO_REVERSE_SEARCH_CREATOR, 0.7, "whatever"); - CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, " org.apache.coyote.http11.Http11BaseProtocol destroy", 500)); - CPPUNIT_ASSERT_EQUAL(2, typer.computeType(false, " org.apache.coyote.http11.Http11BaseProtocol init", 500)); - CPPUNIT_ASSERT_EQUAL(3, typer.computeType(false, " org.apache.coyote.http11.Http11BaseProtocol start", 500)); - CPPUNIT_ASSERT_EQUAL(4, typer.computeType(false, " org.apache.coyote.http11.Http11BaseProtocol stop", 500)); + CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, " org.apache.coyote.http11.Http11BaseProtocol destroy", + 500)); + CPPUNIT_ASSERT_EQUAL(2, typer.computeType(false, " org.apache.coyote.http11.Http11BaseProtocol init", + 500)); + CPPUNIT_ASSERT_EQUAL(3, typer.computeType(false, " org.apache.coyote.http11.Http11BaseProtocol start", + 500)); + CPPUNIT_ASSERT_EQUAL(4, typer.computeType(false, " org.apache.coyote.http11.Http11BaseProtocol stop", + 500)); } void CTokenListDataTyperTest::testBrokerageData() { TTokenListDataTyperKeepsFields typer(NO_REVERSE_SEARCH_CREATOR, 0.7, "whatever"); CPPUNIT_ASSERT_EQUAL( - 1, - typer.computeType(false, - "AUDIT ; tomcat-http--16; ee96c0c4567c0c11d6b90f9bc8b54aaa77; REQ4e42023e0a0328d020003e460005aa33; " - "applnx911.elastic.co; ; Request Complete: /mlgw/mlb/ofsummary/summary " - "[T=283ms,CUSTPREF-WEB_ACCOUNT_PREFERENCES=95,MAUI-ETSPROF2=155,NBMSG-NB_MESSAGING_SERVICE=164,CustAcctProfile=" - "BRK=2;NB=0;FILI=0;CESG=0;CC=0;AcctTotal=2,migrated=2]", - 500)); - CPPUNIT_ASSERT_EQUAL( - 2, - typer.computeType(false, - "AUDIT ; tomcat-http--39; ee763e95747c0b11d6b90f9bc8b54aaa77; REQ4e42023e0a0429a020000c6f0002aa33; " - "applnx811.elastic.co; ; Request Complete: /mlgw/mlb/ofaccounts/brokerageAccountHistory " - "[T=414ms,CUSTPREF-INS_PERSON_WEB_ACCT_PREFERENCES=298,MAUI-PSL04XD=108]", - 500)); - CPPUNIT_ASSERT_EQUAL( - 3, - typer.computeType(false, - "AUDIT ; tomcat-http--39; ee256201da7c0c11d6b90f9bc8b54aaa77; REQ4e42023b0a022925200027180002aa33; " - "applnx711.elastic.co; ; Request Complete: /mlgw/mlb/ofpositions/brokerageAccountPositionsIframe " - "[T=90ms,CacheStore-GetAttribute=5,MAUI-ECAPPOS=50,RR-QUOTE_TRANSACTION=11]", - 500)); + 1, typer.computeType(false, + "AUDIT ; tomcat-http--16; ee96c0c4567c0c11d6b90f9bc8b54aaa77; REQ4e42023e0a0328d020003e460005aa33; " + "applnx911.elastic.co; ; Request Complete: /mlgw/mlb/ofsummary/summary " + "[T=283ms,CUSTPREF-WEB_ACCOUNT_PREFERENCES=95,MAUI-ETSPROF2=155,NBMSG-NB_MESSAGING_SERVICE=164,CustAcctProfile=" + "BRK=2;NB=0;FILI=0;CESG=0;CC=0;AcctTotal=2,migrated=2]", + 500)); + CPPUNIT_ASSERT_EQUAL(2, typer.computeType(false, + "AUDIT ; tomcat-http--39; ee763e95747c0b11d6b90f9bc8b54aaa77; REQ4e42023e0a0429a020000c6f0002aa33; " + "applnx811.elastic.co; ; Request Complete: /mlgw/mlb/ofaccounts/brokerageAccountHistory " + "[T=414ms,CUSTPREF-INS_PERSON_WEB_ACCT_PREFERENCES=298,MAUI-PSL04XD=108]", + 500)); + CPPUNIT_ASSERT_EQUAL(3, typer.computeType(false, + "AUDIT ; tomcat-http--39; ee256201da7c0c11d6b90f9bc8b54aaa77; REQ4e42023b0a022925200027180002aa33; " + "applnx711.elastic.co; ; Request Complete: /mlgw/mlb/ofpositions/brokerageAccountPositionsIframe " + "[T=90ms,CacheStore-GetAttribute=5,MAUI-ECAPPOS=50,RR-QUOTE_TRANSACTION=11]", + 500)); } void CTokenListDataTyperTest::testVmwareData() { TTokenListDataTyperKeepsFields typer(NO_REVERSE_SEARCH_CREATOR, 0.7, "whatever"); - CPPUNIT_ASSERT_EQUAL( - 1, - typer.computeType( - false, "Vpxa: [49EC0B90 verbose 'VpxaHalCnxHostagent' opID=WFU-ddeadb59] [WaitForUpdatesDone] Received callback", 103)); - CPPUNIT_ASSERT_EQUAL( - 2, - typer.computeType( - false, "Vpxa: [49EC0B90 verbose 'Default' opID=WFU-ddeadb59] [VpxaHalVmHostagent] 11: GuestInfo changed 'guest.disk", 107)); - CPPUNIT_ASSERT_EQUAL( - 3, - typer.computeType( - false, "Vpxa: [49EC0B90 verbose 'VpxaHalCnxHostagent' opID=WFU-ddeadb59] [WaitForUpdatesDone] Completed callback", 104)); - CPPUNIT_ASSERT_EQUAL( - 1, - typer.computeType( - false, "Vpxa: [49EC0B90 verbose 'VpxaHalCnxHostagent' opID=WFU-35689729] [WaitForUpdatesDone] Received callback", 103)); - CPPUNIT_ASSERT_EQUAL( - 2, - typer.computeType( - false, "Vpxa: [49EC0B90 verbose 'Default' opID=WFU-35689729] [VpxaHalVmHostagent] 15: GuestInfo changed 'guest.disk", 107)); - CPPUNIT_ASSERT_EQUAL( - 3, - typer.computeType( - false, "Vpxa: [49EC0B90 verbose 'VpxaHalCnxHostagent' opID=WFU-35689729] [WaitForUpdatesDone] Completed callback", 104)); + CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, "Vpxa: [49EC0B90 verbose 'VpxaHalCnxHostagent' opID=WFU-ddeadb59] [WaitForUpdatesDone] Received callback", + 103)); + CPPUNIT_ASSERT_EQUAL(2, typer.computeType(false, "Vpxa: [49EC0B90 verbose 'Default' opID=WFU-ddeadb59] [VpxaHalVmHostagent] 11: GuestInfo changed 'guest.disk", + 107)); + CPPUNIT_ASSERT_EQUAL(3, typer.computeType(false, "Vpxa: [49EC0B90 verbose 'VpxaHalCnxHostagent' opID=WFU-ddeadb59] [WaitForUpdatesDone] Completed callback", + 104)); + CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, "Vpxa: [49EC0B90 verbose 'VpxaHalCnxHostagent' opID=WFU-35689729] [WaitForUpdatesDone] Received callback", + 103)); + CPPUNIT_ASSERT_EQUAL(2, typer.computeType(false, "Vpxa: [49EC0B90 verbose 'Default' opID=WFU-35689729] [VpxaHalVmHostagent] 15: GuestInfo changed 'guest.disk", + 107)); + CPPUNIT_ASSERT_EQUAL(3, typer.computeType(false, "Vpxa: [49EC0B90 verbose 'VpxaHalCnxHostagent' opID=WFU-35689729] [WaitForUpdatesDone] Completed callback", + 104)); } void CTokenListDataTyperTest::testBankData() { TTokenListDataTyperKeepsFields typer(NO_REVERSE_SEARCH_CREATOR, 0.7, "whatever"); - CPPUNIT_ASSERT_EQUAL(1, - typer.computeType(false, - "INFO [co.elastic.settlement.synchronization.PaymentFlowProcessorImpl] Process payment flow " - "for tradeId=80894728 and backOfficeId=9354474", - 500)); - CPPUNIT_ASSERT_EQUAL(2, - typer.computeType(false, - "INFO [co.elastic.settlement.synchronization.PaymentFlowProcessorImpl] Synchronization of " - "payment flow is complete for tradeId=80013186 and backOfficeId=265573", - 500)); + CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, + "INFO [co.elastic.settlement.synchronization.PaymentFlowProcessorImpl] Process payment flow " + "for tradeId=80894728 and backOfficeId=9354474", + 500)); + CPPUNIT_ASSERT_EQUAL(2, typer.computeType(false, + "INFO [co.elastic.settlement.synchronization.PaymentFlowProcessorImpl] Synchronization of " + "payment flow is complete for tradeId=80013186 and backOfficeId=265573", + 500)); // This is not great, but it's tricky when only 1 word differs from the // first type - CPPUNIT_ASSERT_EQUAL(1, - typer.computeType(false, - "INFO [co.elastic.settlement.synchronization.PaymentFlowProcessorImpl] Synchronize payment " - "flow for tradeId=80894721 and backOfficeId=9354469", - 500)); + CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, + "INFO [co.elastic.settlement.synchronization.PaymentFlowProcessorImpl] Synchronize payment " + "flow for tradeId=80894721 and backOfficeId=9354469", + 500)); } void CTokenListDataTyperTest::testJavaGcData() { TTokenListDataTyperKeepsFields typer(NO_REVERSE_SEARCH_CREATOR, 0.7, "whatever"); - CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, "2016-04-27T19:57:43.644-0700: 1922084.903: [GC", 46)); - CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, "2016-04-28T19:57:43.644-0700: 1922084.903: [GC", 46)); - CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, "2016-04-29T19:57:43.644-0700: 1922084.903: [GC", 46)); - CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, "2016-04-30T19:57:43.644-0700: 1922084.903: [GC", 46)); - CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, "2016-04-30T19:57:43.644-0700: 1922084.904: [GC", 46)); - CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, "2016-04-30T19:57:43.644-0700: 1922084.905: [GC", 46)); - CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, "2016-04-30T19:57:43.644-0700: 1922084.906: [GC", 46)); - CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, "2016-04-30T19:57:43.644-0700: 1922085.906: [GC", 46)); - CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, "2016-04-30T19:57:43.644-0700: 1922086.906: [GC", 46)); - CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, "2016-04-30T19:57:43.644-0700: 1922087.906: [GC", 46)); - CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, "2016-04-30T19:57:43.645-0700: 1922087.906: [GC", 46)); - CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, "2016-04-30T19:57:43.646-0700: 1922087.906: [GC", 46)); - CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, "2016-04-30T19:57:43.647-0700: 1922087.906: [GC", 46)); - CPPUNIT_ASSERT_EQUAL( - 2, - typer.computeType( - false, "PSYoungGen total 2572800K, used 1759355K [0x0000000759500000, 0x0000000800000000, 0x0000000800000000)", 106)); + 1, typer.computeType(false, "2016-04-27T19:57:43.644-0700: 1922084.903: [GC", 46)); + CPPUNIT_ASSERT_EQUAL( + 1, typer.computeType(false, "2016-04-28T19:57:43.644-0700: 1922084.903: [GC", 46)); + CPPUNIT_ASSERT_EQUAL( + 1, typer.computeType(false, "2016-04-29T19:57:43.644-0700: 1922084.903: [GC", 46)); + CPPUNIT_ASSERT_EQUAL( + 1, typer.computeType(false, "2016-04-30T19:57:43.644-0700: 1922084.903: [GC", 46)); CPPUNIT_ASSERT_EQUAL( - 2, - typer.computeType( - false, "PSYoungGen total 2572801K, used 1759355K [0x0000000759500000, 0x0000000800000000, 0x0000000800000000)", 106)); + 1, typer.computeType(false, "2016-04-30T19:57:43.644-0700: 1922084.904: [GC", 46)); CPPUNIT_ASSERT_EQUAL( - 2, - typer.computeType( - false, "PSYoungGen total 2572802K, used 1759355K [0x0000000759500000, 0x0000000800000000, 0x0000000800000000)", 106)); + 1, typer.computeType(false, "2016-04-30T19:57:43.644-0700: 1922084.905: [GC", 46)); CPPUNIT_ASSERT_EQUAL( - 2, - typer.computeType( - false, "PSYoungGen total 2572803K, used 1759355K [0x0000000759500000, 0x0000000800000000, 0x0000000800000000)", 106)); + 1, typer.computeType(false, "2016-04-30T19:57:43.644-0700: 1922084.906: [GC", 46)); CPPUNIT_ASSERT_EQUAL( - 2, - typer.computeType( - false, "PSYoungGen total 2572803K, used 1759355K [0x0000000759600000, 0x0000000800000000, 0x0000000800000000)", 106)); + 1, typer.computeType(false, "2016-04-30T19:57:43.644-0700: 1922085.906: [GC", 46)); CPPUNIT_ASSERT_EQUAL( - 2, - typer.computeType( - false, "PSYoungGen total 2572803K, used 1759355K [0x0000000759700000, 0x0000000800000000, 0x0000000800000000)", 106)); + 1, typer.computeType(false, "2016-04-30T19:57:43.644-0700: 1922086.906: [GC", 46)); CPPUNIT_ASSERT_EQUAL( - 2, - typer.computeType( - false, "PSYoungGen total 2572803K, used 1759355K [0x0000000759800000, 0x0000000800000000, 0x0000000800000000)", 106)); + 1, typer.computeType(false, "2016-04-30T19:57:43.644-0700: 1922087.906: [GC", 46)); + CPPUNIT_ASSERT_EQUAL( + 1, typer.computeType(false, "2016-04-30T19:57:43.645-0700: 1922087.906: [GC", 46)); + CPPUNIT_ASSERT_EQUAL( + 1, typer.computeType(false, "2016-04-30T19:57:43.646-0700: 1922087.906: [GC", 46)); + CPPUNIT_ASSERT_EQUAL( + 1, typer.computeType(false, "2016-04-30T19:57:43.647-0700: 1922087.906: [GC", 46)); + + CPPUNIT_ASSERT_EQUAL(2, typer.computeType(false, "PSYoungGen total 2572800K, used 1759355K [0x0000000759500000, 0x0000000800000000, 0x0000000800000000)", + 106)); + CPPUNIT_ASSERT_EQUAL(2, typer.computeType(false, "PSYoungGen total 2572801K, used 1759355K [0x0000000759500000, 0x0000000800000000, 0x0000000800000000)", + 106)); + CPPUNIT_ASSERT_EQUAL(2, typer.computeType(false, "PSYoungGen total 2572802K, used 1759355K [0x0000000759500000, 0x0000000800000000, 0x0000000800000000)", + 106)); + CPPUNIT_ASSERT_EQUAL(2, typer.computeType(false, "PSYoungGen total 2572803K, used 1759355K [0x0000000759500000, 0x0000000800000000, 0x0000000800000000)", + 106)); + CPPUNIT_ASSERT_EQUAL(2, typer.computeType(false, "PSYoungGen total 2572803K, used 1759355K [0x0000000759600000, 0x0000000800000000, 0x0000000800000000)", + 106)); + CPPUNIT_ASSERT_EQUAL(2, typer.computeType(false, "PSYoungGen total 2572803K, used 1759355K [0x0000000759700000, 0x0000000800000000, 0x0000000800000000)", + 106)); + CPPUNIT_ASSERT_EQUAL(2, typer.computeType(false, "PSYoungGen total 2572803K, used 1759355K [0x0000000759800000, 0x0000000800000000, 0x0000000800000000)", + 106)); } void CTokenListDataTyperTest::testPersist() { TTokenListDataTyperKeepsFields origTyper(NO_REVERSE_SEARCH_CREATOR, 0.7, "whatever"); - origTyper.computeType(false, " Source ML_SERVICE2 on 13122:867 has shut down.", 500); - origTyper.computeType(false, " Source MONEYBROKER on 13112:736 has shut down.", 500); - origTyper.computeType(false, " Source CUBE_LIQUID on 13188:2010 has shut down.", 500); - origTyper.computeType(false, " Source ML SERVICE2 on 13122:867 has shut down.", 500); - origTyper.computeType(false, " Source MONEYBROKER on 13112:736 has started.", 500); - origTyper.computeType(false, " Source ML_SERVICE2 on 13122:867 has started.", 500); - origTyper.computeType(false, " Service CUBE_CHIX, id of 132, has started.", 500); - origTyper.computeType(false, " Service CUBE_IDEM, id of 232, has started.", 500); - origTyper.computeType(false, " Service CUBE_IDEM, id of 232, has started.", 500); - origTyper.computeType(false, " Service CUBE_CHIX has shut down.", 500); + origTyper.computeType(false, " Source ML_SERVICE2 on 13122:867 has shut down.", + 500); + origTyper.computeType(false, " Source MONEYBROKER on 13112:736 has shut down.", + 500); + origTyper.computeType(false, " Source CUBE_LIQUID on 13188:2010 has shut down.", + 500); + origTyper.computeType(false, " Source ML SERVICE2 on 13122:867 has shut down.", + 500); + origTyper.computeType(false, " Source MONEYBROKER on 13112:736 has started.", + 500); + origTyper.computeType(false, " Source ML_SERVICE2 on 13122:867 has started.", + 500); + origTyper.computeType(false, " Service CUBE_CHIX, id of 132, has started.", + 500); + origTyper.computeType(false, " Service CUBE_IDEM, id of 232, has started.", + 500); + origTyper.computeType(false, " Service CUBE_IDEM, id of 232, has started.", + 500); + origTyper.computeType( + false, " Service CUBE_CHIX has shut down.", 500); std::string origXml; { @@ -324,8 +313,8 @@ void CTokenListDataTyperTest::testPersist() { ml::core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); ml::core::CRapidXmlStateRestoreTraverser traverser(parser); - CPPUNIT_ASSERT( - traverser.traverseSubLevel(boost::bind(&TTokenListDataTyperKeepsFields::acceptRestoreTraverser, &restoredTyper, _1))); + CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind( + &TTokenListDataTyperKeepsFields::acceptRestoreTraverser, &restoredTyper, _1))); } // The XML representation of the new typer should be the same as the original @@ -388,23 +377,35 @@ void CTokenListDataTyperTest::testLongReverseSearch() { void CTokenListDataTyperTest::testPreTokenised() { TTokenListDataTyperKeepsFields typer(NO_REVERSE_SEARCH_CREATOR, 0.7, "whatever"); - CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, " Source ML_SERVICE2 on 13122:867 has shut down.", 500)); - CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, " Source MONEYBROKER on 13112:736 has shut down.", 500)); - CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, " Source CUBE_LIQUID on 13188:2010 has shut down.", 500)); - CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, " Source ML SERVICE2 on 13122:867 has shut down.", 500)); - CPPUNIT_ASSERT_EQUAL(2, typer.computeType(false, " Source MONEYBROKER on 13112:736 has started.", 500)); - CPPUNIT_ASSERT_EQUAL(2, typer.computeType(false, " Source ML_SERVICE2 on 13122:867 has started.", 500)); - CPPUNIT_ASSERT_EQUAL(3, typer.computeType(false, " Service CUBE_CHIX, id of 132, has started.", 500)); - CPPUNIT_ASSERT_EQUAL(3, typer.computeType(false, " Service CUBE_IDEM, id of 232, has started.", 500)); - CPPUNIT_ASSERT_EQUAL(3, typer.computeType(false, " Service CUBE_IDEM, id of 232, has started.", 500)); - CPPUNIT_ASSERT_EQUAL(4, typer.computeType(false, " Service CUBE_CHIX has shut down.", 500)); + CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, " Source ML_SERVICE2 on 13122:867 has shut down.", + 500)); + CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, " Source MONEYBROKER on 13112:736 has shut down.", + 500)); + CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, " Source CUBE_LIQUID on 13188:2010 has shut down.", + 500)); + CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, " Source ML SERVICE2 on 13122:867 has shut down.", + 500)); + CPPUNIT_ASSERT_EQUAL(2, typer.computeType(false, " Source MONEYBROKER on 13112:736 has started.", + 500)); + CPPUNIT_ASSERT_EQUAL(2, typer.computeType(false, " Source ML_SERVICE2 on 13122:867 has started.", + 500)); + CPPUNIT_ASSERT_EQUAL(3, typer.computeType(false, " Service CUBE_CHIX, id of 132, has started.", + 500)); + CPPUNIT_ASSERT_EQUAL(3, typer.computeType(false, " Service CUBE_IDEM, id of 232, has started.", + 500)); + CPPUNIT_ASSERT_EQUAL(3, typer.computeType(false, " Service CUBE_IDEM, id of 232, has started.", + 500)); + CPPUNIT_ASSERT_EQUAL(4, typer.computeType(false, " Service CUBE_CHIX has shut down.", + 500)); TTokenListDataTyperKeepsFields::TStrStrUMap fields; // The pre-tokenised tokens exactly match those of the other message in // category 4, so this should get put it category 4 - fields[TTokenListDataTyperKeepsFields::PRETOKENISED_TOKEN_FIELD] = "ml00-4201.1.p2ps,Info,Service,CUBE_CHIX,has,shut,down"; - CPPUNIT_ASSERT_EQUAL(4, typer.computeType(false, fields, " Service CUBE_CHIX has shut down.", 500)); + fields[TTokenListDataTyperKeepsFields::PRETOKENISED_TOKEN_FIELD] = + "ml00-4201.1.p2ps,Info,Service,CUBE_CHIX,has,shut,down"; + CPPUNIT_ASSERT_EQUAL(4, typer.computeType(false, fields, " Service CUBE_CHIX has shut down.", + 500)); // Here we cheat. The pre-tokenised tokens exactly match those of the // first message, so this should get put in category 1. But the full @@ -413,13 +414,16 @@ void CTokenListDataTyperTest::testPreTokenised() { // means there's a bug where the pre-tokenised tokens are being ignored. // (Obviously in production we wouldn't get the discrepancy between the // pre-tokenised tokens and the full message.) - fields[TTokenListDataTyperKeepsFields::PRETOKENISED_TOKEN_FIELD] = "ml13-4608.1.p2ps,Info,Source,ML_SERVICE2,on,has,shut,down"; - CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, fields, " Service CUBE_CHIX has shut down.", 500)); + fields[TTokenListDataTyperKeepsFields::PRETOKENISED_TOKEN_FIELD] = + "ml13-4608.1.p2ps,Info,Source,ML_SERVICE2,on,has,shut,down"; + CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, fields, " Service CUBE_CHIX has shut down.", + 500)); // Similar principle, but with Chinese, Japanese and Korean tokens, so // should go in a new category. fields[TTokenListDataTyperKeepsFields::PRETOKENISED_TOKEN_FIELD] = "编码,コーディング,코딩"; - CPPUNIT_ASSERT_EQUAL(5, typer.computeType(false, fields, " Service CUBE_CHIX has shut down.", 500)); + CPPUNIT_ASSERT_EQUAL(5, typer.computeType(false, fields, " Service CUBE_CHIX has shut down.", + 500)); } void CTokenListDataTyperTest::testPreTokenisedPerformance() { @@ -434,10 +438,8 @@ void CTokenListDataTyperTest::testPreTokenisedPerformance() { stopWatch.start(); for (size_t count = 0; count < TEST_SIZE; ++count) { - CPPUNIT_ASSERT_EQUAL( - 1, - typer.computeType( - false, "Vpxa: [49EC0B90 verbose 'VpxaHalCnxHostagent' opID=WFU-ddeadb59] [WaitForUpdatesDone] Received callback", 103)); + CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, "Vpxa: [49EC0B90 verbose 'VpxaHalCnxHostagent' opID=WFU-ddeadb59] [WaitForUpdatesDone] Received callback", + 103)); } inlineTokenisationTime = stopWatch.stop(); @@ -459,12 +461,8 @@ void CTokenListDataTyperTest::testPreTokenisedPerformance() { stopWatch.start(); for (size_t count = 0; count < TEST_SIZE; ++count) { - CPPUNIT_ASSERT_EQUAL( - 1, - typer.computeType(false, - fields, - "Vpxa: [49EC0B90 verbose 'VpxaHalCnxHostagent' opID=WFU-ddeadb59] [WaitForUpdatesDone] Received callback", - 103)); + CPPUNIT_ASSERT_EQUAL(1, typer.computeType(false, fields, "Vpxa: [49EC0B90 verbose 'VpxaHalCnxHostagent' opID=WFU-ddeadb59] [WaitForUpdatesDone] Received callback", + 103)); } preTokenisationTime = stopWatch.stop(); diff --git a/lib/api/unittest/CTokenListReverseSearchCreatorTest.cc b/lib/api/unittest/CTokenListReverseSearchCreatorTest.cc index 0142a33081..d10c92eeae 100644 --- a/lib/api/unittest/CTokenListReverseSearchCreatorTest.cc +++ b/lib/api/unittest/CTokenListReverseSearchCreatorTest.cc @@ -11,30 +11,38 @@ using namespace ml; using namespace api; CppUnit::Test* CTokenListReverseSearchCreatorTest::suite() { - CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CTokenListReverseSearchCreatorTest"); + CppUnit::TestSuite* suiteOfTests = + new CppUnit::TestSuite("CTokenListReverseSearchCreatorTest"); suiteOfTests->addTest(new CppUnit::TestCaller( - "CTokenListReverseSearchCreatorTest::testCostOfToken", &CTokenListReverseSearchCreatorTest::testCostOfToken)); + "CTokenListReverseSearchCreatorTest::testCostOfToken", + &CTokenListReverseSearchCreatorTest::testCostOfToken)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CTokenListReverseSearchCreatorTest::testCreateNullSearch", &CTokenListReverseSearchCreatorTest::testCreateNullSearch)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CTokenListReverseSearchCreatorTest::testCreateNoUniqueTokenSearch", - &CTokenListReverseSearchCreatorTest::testCreateNoUniqueTokenSearch)); + "CTokenListReverseSearchCreatorTest::testCreateNullSearch", + &CTokenListReverseSearchCreatorTest::testCreateNullSearch)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CTokenListReverseSearchCreatorTest::testInitStandardSearch", &CTokenListReverseSearchCreatorTest::testInitStandardSearch)); + "CTokenListReverseSearchCreatorTest::testCreateNoUniqueTokenSearch", + &CTokenListReverseSearchCreatorTest::testCreateNoUniqueTokenSearch)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CTokenListReverseSearchCreatorTest::testAddCommonUniqueToken", &CTokenListReverseSearchCreatorTest::testAddCommonUniqueToken)); + "CTokenListReverseSearchCreatorTest::testInitStandardSearch", + &CTokenListReverseSearchCreatorTest::testInitStandardSearch)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CTokenListReverseSearchCreatorTest::testAddInOrderCommonToken", &CTokenListReverseSearchCreatorTest::testAddInOrderCommonToken)); + "CTokenListReverseSearchCreatorTest::testAddCommonUniqueToken", + &CTokenListReverseSearchCreatorTest::testAddCommonUniqueToken)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CTokenListReverseSearchCreatorTest::testCloseStandardSearch", &CTokenListReverseSearchCreatorTest::testCloseStandardSearch)); + "CTokenListReverseSearchCreatorTest::testAddInOrderCommonToken", + &CTokenListReverseSearchCreatorTest::testAddInOrderCommonToken)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTokenListReverseSearchCreatorTest::testCloseStandardSearch", + &CTokenListReverseSearchCreatorTest::testCloseStandardSearch)); return suiteOfTests; } void CTokenListReverseSearchCreatorTest::testCostOfToken() { CTokenListReverseSearchCreator reverseSearchCreator("foo"); - CPPUNIT_ASSERT_EQUAL(std::size_t(110), reverseSearchCreator.costOfToken("someToken", 5)); + CPPUNIT_ASSERT_EQUAL(std::size_t(110), + reverseSearchCreator.costOfToken("someToken", 5)); } void CTokenListReverseSearchCreatorTest::testCreateNullSearch() { @@ -55,7 +63,8 @@ void CTokenListReverseSearchCreatorTest::testCreateNoUniqueTokenSearch() { std::string reverseSearchPart1; std::string reverseSearchPart2; - CPPUNIT_ASSERT(reverseSearchCreator.createNoUniqueTokenSearch(1, "404", 4, reverseSearchPart1, reverseSearchPart2)); + CPPUNIT_ASSERT(reverseSearchCreator.createNoUniqueTokenSearch( + 1, "404", 4, reverseSearchPart1, reverseSearchPart2)); CPPUNIT_ASSERT_EQUAL(std::string(""), reverseSearchPart1); CPPUNIT_ASSERT_EQUAL(std::string(""), reverseSearchPart2); @@ -67,7 +76,8 @@ void CTokenListReverseSearchCreatorTest::testInitStandardSearch() { std::string reverseSearchPart1; std::string reverseSearchPart2; - reverseSearchCreator.initStandardSearch(1, "User 'foo' logged in host '0.0.0.0'", 1, reverseSearchPart1, reverseSearchPart2); + reverseSearchCreator.initStandardSearch(1, "User 'foo' logged in host '0.0.0.0'", + 1, reverseSearchPart1, reverseSearchPart2); CPPUNIT_ASSERT_EQUAL(std::string(""), reverseSearchPart1); CPPUNIT_ASSERT_EQUAL(std::string(""), reverseSearchPart2); @@ -92,13 +102,18 @@ void CTokenListReverseSearchCreatorTest::testAddInOrderCommonToken() { std::string reverseSearchPart1; std::string reverseSearchPart2; - reverseSearchCreator.addInOrderCommonToken("user", true, reverseSearchPart1, reverseSearchPart2); - reverseSearchCreator.addInOrderCommonToken("logged", false, reverseSearchPart1, reverseSearchPart2); - reverseSearchCreator.addInOrderCommonToken("b=0.15+a", false, reverseSearchPart1, reverseSearchPart2); - reverseSearchCreator.addInOrderCommonToken("logged", false, reverseSearchPart1, reverseSearchPart2); + reverseSearchCreator.addInOrderCommonToken("user", true, reverseSearchPart1, + reverseSearchPart2); + reverseSearchCreator.addInOrderCommonToken("logged", false, reverseSearchPart1, + reverseSearchPart2); + reverseSearchCreator.addInOrderCommonToken("b=0.15+a", false, reverseSearchPart1, + reverseSearchPart2); + reverseSearchCreator.addInOrderCommonToken("logged", false, reverseSearchPart1, + reverseSearchPart2); CPPUNIT_ASSERT_EQUAL(std::string("user logged b=0.15+a logged"), reverseSearchPart1); - CPPUNIT_ASSERT_EQUAL(std::string(".*?user.+?logged.+?b=0\\.15\\+a.+?logged"), reverseSearchPart2); + CPPUNIT_ASSERT_EQUAL(std::string(".*?user.+?logged.+?b=0\\.15\\+a.+?logged"), + reverseSearchPart2); } void CTokenListReverseSearchCreatorTest::testCloseStandardSearch() { diff --git a/lib/config/CAutoconfigurer.cc b/lib/config/CAutoconfigurer.cc index f5f01fff60..58759a048a 100644 --- a/lib/config/CAutoconfigurer.cc +++ b/lib/config/CAutoconfigurer.cc @@ -91,7 +91,8 @@ class CONFIG_EXPORT CAutoconfigurerImpl : public core::CNonCopyable { //! Update the statistics with \p time and \p fieldValues and maybe //! recompute detector scores and prune. - void updateStatisticsAndMaybeComputeScores(core_t::TTime time, const TStrStrUMap& fieldValues); + void updateStatisticsAndMaybeComputeScores(core_t::TTime time, + const TStrStrUMap& fieldValues); //! Compute the detector scores. void computeScores(bool final); @@ -164,7 +165,8 @@ void CAutoconfigurer::finalise() { m_Impl->finalise(); } -bool CAutoconfigurer::restoreState(core::CDataSearcher& /*restoreSearcher*/, core_t::TTime& /*completeToTime*/) { +bool CAutoconfigurer::restoreState(core::CDataSearcher& /*restoreSearcher*/, + core_t::TTime& /*completeToTime*/) { return true; } @@ -182,17 +184,14 @@ api::COutputHandler& CAutoconfigurer::outputHandler() { //////// CAutoconfigurerImpl //////// -CAutoconfigurerImpl::CAutoconfigurerImpl(const CAutoconfigurerParams& params, CReportWriter& reportWriter) - : m_Params(params), - m_Initialized(false), - m_NumberRecords(0), +CAutoconfigurerImpl::CAutoconfigurerImpl(const CAutoconfigurerParams& params, + CReportWriter& reportWriter) + : m_Params(params), m_Initialized(false), m_NumberRecords(0), m_NumberRecordsWithNoOrInvalidTime(0), m_LastTimeScoresWereRefreshed(boost::numeric::bounds::lowest()), - m_DetectorCountStatistics(m_Params), - m_FieldRolePenalties(m_Params), + m_DetectorCountStatistics(m_Params), m_FieldRolePenalties(m_Params), m_DetectorPenalties(m_Params, m_FieldRolePenalties), - m_GeneratedCandidateFieldNames(false), - m_ReportWriter(reportWriter) { + m_GeneratedCandidateFieldNames(false), m_ReportWriter(reportWriter) { } bool CAutoconfigurerImpl::handleRecord(const TStrStrUMap& fieldValues) { @@ -227,10 +226,12 @@ void CAutoconfigurerImpl::finalise() { if (const CDataSummaryStatistics* summary = m_FieldStatistics[i].summary()) { m_ReportWriter.addFieldStatistics(name, type, *summary); } - if (const CCategoricalDataSummaryStatistics* summary = m_FieldStatistics[i].categoricalSummary()) { + if (const CCategoricalDataSummaryStatistics* summary = + m_FieldStatistics[i].categoricalSummary()) { m_ReportWriter.addFieldStatistics(name, type, *summary); } - if (const CNumericDataSummaryStatistics* summary = m_FieldStatistics[i].numericSummary()) { + if (const CNumericDataSummaryStatistics* summary = + m_FieldStatistics[i].numericSummary()) { m_ReportWriter.addFieldStatistics(name, type, *summary); } } @@ -252,24 +253,28 @@ uint64_t CAutoconfigurerImpl::numRecordsHandled() const { return m_NumberRecords; } -bool CAutoconfigurerImpl::extractTime(const TStrStrUMap& fieldValues, core_t::TTime& time) const { +bool CAutoconfigurerImpl::extractTime(const TStrStrUMap& fieldValues, + core_t::TTime& time) const { TStrStrUMapCItr i = fieldValues.find(m_Params.timeFieldName()); if (i == fieldValues.end()) { - LOG_ERROR(<< "No time field '" << m_Params.timeFieldName() << "' in record:" << core_t::LINE_ENDING + LOG_ERROR(<< "No time field '" << m_Params.timeFieldName() + << "' in record:" << core_t::LINE_ENDING << CAutoconfigurer::debugPrintRecord(fieldValues)); return false; } if (m_Params.timeFieldFormat().empty()) { if (!core::CStringUtils::stringToType(i->second, time)) { - LOG_ERROR(<< "Cannot interpret time field '" << m_Params.timeFieldName() << "' in record:" << core_t::LINE_ENDING + LOG_ERROR(<< "Cannot interpret time field '" << m_Params.timeFieldName() + << "' in record:" << core_t::LINE_ENDING << CAutoconfigurer::debugPrintRecord(fieldValues)); return false; } } else if (!core::CTimeUtils::strptime(m_Params.timeFieldFormat(), i->second, time)) { - LOG_ERROR(<< "Cannot interpret time field '" << m_Params.timeFieldName() << "' using format '" << m_Params.timeFieldFormat() - << "' in record:" << core_t::LINE_ENDING << CAutoconfigurer::debugPrintRecord(fieldValues)); + LOG_ERROR(<< "Cannot interpret time field '" << m_Params.timeFieldName() << "' using format '" + << m_Params.timeFieldFormat() << "' in record:" << core_t::LINE_ENDING + << CAutoconfigurer::debugPrintRecord(fieldValues)); return false; } @@ -310,11 +315,13 @@ void CAutoconfigurerImpl::processRecord(core_t::TTime time, const TStrStrUMap& f } } -void CAutoconfigurerImpl::updateStatisticsAndMaybeComputeScores(core_t::TTime time, const TStrStrUMap& fieldValues) { +void CAutoconfigurerImpl::updateStatisticsAndMaybeComputeScores(core_t::TTime time, + const TStrStrUMap& fieldValues) { CDetectorRecordDirectAddressTable::TDetectorRecordVec records; m_DetectorRecordFactory.detectorRecords(time, fieldValues, m_CandidateDetectors, records); m_DetectorCountStatistics.add(records); - if (m_NumberRecords % UPDATE_SCORE_RECORD_COUNT_INTERVAL == 0 && time >= m_LastTimeScoresWereRefreshed + UPDATE_SCORE_TIME_INTERVAL) { + if (m_NumberRecords % UPDATE_SCORE_RECORD_COUNT_INTERVAL == 0 && + time >= m_LastTimeScoresWereRefreshed + UPDATE_SCORE_TIME_INTERVAL) { this->computeScores(false); m_LastTimeScoresWereRefreshed = time; } @@ -329,7 +336,8 @@ void CAutoconfigurerImpl::computeScores(bool final) { LOG_TRACE(<< "Refreshing scores for " << m_CandidateDetectors[i].description()); m_CandidateDetectors[i].refreshScores(); LOG_TRACE(<< "score = " << m_CandidateDetectors[i].score()); - if (m_CandidateDetectors[i].score() > (final ? m_Params.minimumDetectorScore() : 0.0)) { + if (m_CandidateDetectors[i].score() > + (final ? m_Params.minimumDetectorScore() : 0.0)) { if (i > last) { m_CandidateDetectors[i].swap(m_CandidateDetectors[last]); } @@ -339,7 +347,8 @@ void CAutoconfigurerImpl::computeScores(bool final) { if (last < m_CandidateDetectors.size()) { LOG_DEBUG(<< "Removing " << m_CandidateDetectors.size() - last << " detectors"); - m_CandidateDetectors.erase(m_CandidateDetectors.begin() + last, m_CandidateDetectors.end()); + m_CandidateDetectors.erase(m_CandidateDetectors.begin() + last, + m_CandidateDetectors.end()); m_DetectorRecordFactory.build(m_CandidateDetectors); m_DetectorCountStatistics.pruneUnsed(m_CandidateDetectors); } @@ -380,17 +389,19 @@ void CAutoconfigurerImpl::generateCandidateDetectorsOnce() { &CAutoconfigurerParams::canUseForByField, &CAutoconfigurerParams::canUseForOverField, &CAutoconfigurerParams::canUseForPartitionField}; - double scores[] = {m_FieldStatistics[i].score(m_FieldRolePenalties.categoricalFunctionArgumentPenalty()), - m_FieldStatistics[i].score(m_FieldRolePenalties.metricFunctionArgumentPenalty()), - m_FieldStatistics[i].score(m_FieldRolePenalties.byPenalty()), - m_FieldStatistics[i].score(m_FieldRolePenalties.rareByPenalty()), - m_FieldStatistics[i].score(m_FieldRolePenalties.overPenalty()), - m_FieldStatistics[i].score(m_FieldRolePenalties.partitionPenalty())}; + double scores[] = { + m_FieldStatistics[i].score(m_FieldRolePenalties.categoricalFunctionArgumentPenalty()), + m_FieldStatistics[i].score(m_FieldRolePenalties.metricFunctionArgumentPenalty()), + m_FieldStatistics[i].score(m_FieldRolePenalties.byPenalty()), + m_FieldStatistics[i].score(m_FieldRolePenalties.rareByPenalty()), + m_FieldStatistics[i].score(m_FieldRolePenalties.overPenalty()), + m_FieldStatistics[i].score(m_FieldRolePenalties.partitionPenalty())}; const std::string& fieldName = m_FieldStatistics[i].name(); for (std::size_t j = 0u; j < boost::size(FIELD_NAMES); ++j) { if ((m_Params.*CAN_USE[j])(fieldName) && scores[j] > 0.0) { - LOG_DEBUG(<< FIELD_NAMES[j] << " '" << fieldName << "' with score " << scores[j]); + LOG_DEBUG(<< FIELD_NAMES[j] << " '" << fieldName + << "' with score " << scores[j]); (enumerator.*ADD_FIELD[j])(fieldName); } } @@ -418,7 +429,8 @@ void CAutoconfigurerImpl::replayBuffer() { if (reportProgress(i)) { LOG_DEBUG(<< "Replayed " << i << " records"); } - this->updateStatisticsAndMaybeComputeScores(m_Buffer[i].first, m_Buffer[i].second); + this->updateStatisticsAndMaybeComputeScores(m_Buffer[i].first, + m_Buffer[i].second); } TTimeStrStrUMapPrVec empty; m_Buffer.swap(empty); diff --git a/lib/config/CAutoconfigurerDetectorPenalties.cc b/lib/config/CAutoconfigurerDetectorPenalties.cc index 1febb72fec..41ff30cdb4 100644 --- a/lib/config/CAutoconfigurerDetectorPenalties.cc +++ b/lib/config/CAutoconfigurerDetectorPenalties.cc @@ -30,24 +30,30 @@ namespace { std::size_t fieldRolePenaltyIndex(const CDetectorSpecification& spec) { static const std::size_t SKIPS[] = {1, 2, 3, 6, 9, 18}; return (spec.argumentField() ? SKIPS[0 + config_t::isMetric(spec.function())] : 0) + - (spec.byField() ? SKIPS[2 + config_t::isRare(spec.function())] : 0) + (spec.overField() ? SKIPS[4] : 0) + - (spec.partitionField() ? SKIPS[5] : 0); + (spec.byField() ? SKIPS[2 + config_t::isRare(spec.function())] : 0) + + (spec.overField() ? SKIPS[4] : 0) + (spec.partitionField() ? SKIPS[5] : 0); } } -CAutoconfigurerDetectorPenalties::CAutoconfigurerDetectorPenalties(const CAutoconfigurerParams& params, - const CAutoconfigurerFieldRolePenalties& fieldRolePenalties) +CAutoconfigurerDetectorPenalties::CAutoconfigurerDetectorPenalties( + const CAutoconfigurerParams& params, + const CAutoconfigurerFieldRolePenalties& fieldRolePenalties) : m_Params(params), m_FieldRolePenalties(fieldRolePenalties) { } -CAutoconfigurerDetectorPenalties::TPenaltyPtr CAutoconfigurerDetectorPenalties::penaltyFor(const CDetectorSpecification& spec) { - return TPenaltyPtr((this->fieldRolePenalty(spec) * CSpanTooSmallForBucketLengthPenalty(m_Params) * CPolledDataPenalty(m_Params) * - CLongTailPenalty(m_Params) * CLowInformationContentPenalty(m_Params) * CNotEnoughDataPenalty(m_Params) * - CTooMuchDataPenalty(m_Params) * CLowVariationPenalty(m_Params) * CSparseCountPenalty(m_Params)) - .clone()); +CAutoconfigurerDetectorPenalties::TPenaltyPtr +CAutoconfigurerDetectorPenalties::penaltyFor(const CDetectorSpecification& spec) { + return TPenaltyPtr( + (this->fieldRolePenalty(spec) * CSpanTooSmallForBucketLengthPenalty(m_Params) * + CPolledDataPenalty(m_Params) * CLongTailPenalty(m_Params) * + CLowInformationContentPenalty(m_Params) * + CNotEnoughDataPenalty(m_Params) * CTooMuchDataPenalty(m_Params) * + CLowVariationPenalty(m_Params) * CSparseCountPenalty(m_Params)) + .clone()); } -const CPenalty& CAutoconfigurerDetectorPenalties::fieldRolePenalty(const CDetectorSpecification& spec) { +const CPenalty& +CAutoconfigurerDetectorPenalties::fieldRolePenalty(const CDetectorSpecification& spec) { m_DetectorFieldRolePenalties.resize(36); TPenaltyPtr& result = m_DetectorFieldRolePenalties[fieldRolePenaltyIndex(spec)]; if (!result) { @@ -55,11 +61,14 @@ const CPenalty& CAutoconfigurerDetectorPenalties::fieldRolePenalty(const CDetect const CAutoconfigurerFieldRolePenalties& penalties = m_FieldRolePenalties; if (spec.argumentField()) { penalty.addPenalty(constants::ARGUMENT_INDEX, - config_t::isMetric(spec.function()) ? penalties.metricFunctionArgumentPenalty() - : penalties.categoricalFunctionArgumentPenalty()); + config_t::isMetric(spec.function()) + ? penalties.metricFunctionArgumentPenalty() + : penalties.categoricalFunctionArgumentPenalty()); } if (spec.byField()) { - penalty.addPenalty(constants::BY_INDEX, config_t::isRare(spec.function()) ? penalties.rareByPenalty() : penalties.byPenalty()); + penalty.addPenalty(constants::BY_INDEX, config_t::isRare(spec.function()) + ? penalties.rareByPenalty() + : penalties.byPenalty()); } if (spec.overField()) { penalty.addPenalty(constants::OVER_INDEX, penalties.overPenalty()); diff --git a/lib/config/CAutoconfigurerFieldRolePenalties.cc b/lib/config/CAutoconfigurerFieldRolePenalties.cc index 262186b65a..eaefe44be0 100644 --- a/lib/config/CAutoconfigurerFieldRolePenalties.cc +++ b/lib/config/CAutoconfigurerFieldRolePenalties.cc @@ -27,22 +27,27 @@ const std::size_t PARTITION_INDEX = 5u; using TCountThreshold = std::size_t (CAutoconfigurerParams::*)() const; const std::size_t PENALTY_INDICES[] = {BY_INDEX, RARE_BY_INDEX, OVER_INDEX, PARTITION_INDEX}; -const TCountThreshold PENALTY_THRESHOLD[] = {&CAutoconfigurerParams::highNumberByFieldValues, - &CAutoconfigurerParams::highNumberRareByFieldValues, - &CAutoconfigurerParams::lowNumberOverFieldValues, - &CAutoconfigurerParams::highNumberPartitionFieldValues}; -const TCountThreshold HARD_CUTOFF[] = {&CAutoconfigurerParams::maximumNumberByFieldValues, - &CAutoconfigurerParams::maximumNumberRareByFieldValues, - &CAutoconfigurerParams::minimumNumberOverFieldValues, - &CAutoconfigurerParams::maximumNumberPartitionFieldValues}; +const TCountThreshold PENALTY_THRESHOLD[] = { + &CAutoconfigurerParams::highNumberByFieldValues, + &CAutoconfigurerParams::highNumberRareByFieldValues, + &CAutoconfigurerParams::lowNumberOverFieldValues, + &CAutoconfigurerParams::highNumberPartitionFieldValues}; +const TCountThreshold HARD_CUTOFF[] = { + &CAutoconfigurerParams::maximumNumberByFieldValues, + &CAutoconfigurerParams::maximumNumberRareByFieldValues, + &CAutoconfigurerParams::minimumNumberOverFieldValues, + &CAutoconfigurerParams::maximumNumberPartitionFieldValues}; } CAutoconfigurerFieldRolePenalties::CAutoconfigurerFieldRolePenalties(const CAutoconfigurerParams& params) { - m_Penalties[CATEGORICAL_ARGUMENT_INDEX].reset((CCantBeNumeric(params) * CDontUseUnaryField(params)).clone()); + m_Penalties[CATEGORICAL_ARGUMENT_INDEX].reset( + (CCantBeNumeric(params) * CDontUseUnaryField(params)).clone()); m_Penalties[METRIC_ARGUMENT_INDEX].reset(new CCantBeCategorical(params)); for (std::size_t i = 0u; i < boost::size(PENALTY_INDICES); ++i) { m_Penalties[PENALTY_INDICES[i]].reset( - (CCantBeNumeric(params) * CDistinctCountThresholdPenalty(params, (params.*PENALTY_THRESHOLD[i])(), (params.*HARD_CUTOFF[i])()) * + (CCantBeNumeric(params) * + CDistinctCountThresholdPenalty(params, (params.*PENALTY_THRESHOLD[i])(), + (params.*HARD_CUTOFF[i])()) * CDontUseUnaryField(params)) .clone()); } diff --git a/lib/config/CAutoconfigurerParams.cc b/lib/config/CAutoconfigurerParams.cc index b1fe99165b..ccdb7ca455 100644 --- a/lib/config/CAutoconfigurerParams.cc +++ b/lib/config/CAutoconfigurerParams.cc @@ -34,7 +34,9 @@ class CConstraint { public: virtual ~CConstraint() {} virtual bool operator()(const T& /*value*/) const { return true; } - virtual bool operator()(const std::vector& /*value*/) const { return true; } + virtual bool operator()(const std::vector& /*value*/) const { + return true; + } virtual std::string print() const = 0; }; @@ -59,7 +61,9 @@ class CConstraintConjunction : public CConstraint { return this; } bool operator()(const T& value) const { return this->evaluate(value); } - bool operator()(const std::vector& value) const { return this->evaluate(value); } + bool operator()(const std::vector& value) const { + return this->evaluate(value); + } std::string print() const { std::string result; if (m_Constraints.size() > 0) { @@ -116,7 +120,9 @@ class CValueIs : public CConstraint { public: CValueIs(const T& rhs) : m_Rhs(&rhs) {} bool operator()(const T& lhs) const { return m_Pred(lhs, *m_Rhs); } - std::string print() const { return m_Pred.print() + core::CStringUtils::typeToString(*m_Rhs); } + std::string print() const { + return m_Pred.print() + core::CStringUtils::typeToString(*m_Rhs); + } private: const T* m_Rhs; @@ -136,7 +142,9 @@ class CVectorValueIs : public CConstraint { } return true; } - std::string print() const { return m_Pred.print() + core::CContainerPrinter::print(*m_Rhs); } + std::string print() const { + return m_Pred.print() + core::CContainerPrinter::print(*m_Rhs); + } private: const std::vector* m_Rhs; @@ -147,7 +155,9 @@ class CVectorValueIs : public CConstraint { template class CNotEmpty : public CConstraint { public: - bool operator()(const std::vector& value) const { return !value.empty(); } + bool operator()(const std::vector& value) const { + return !value.empty(); + } std::string print() const { return "not empty"; } }; @@ -156,8 +166,12 @@ template class CSizeIs : public CConstraint { public: CSizeIs(std::size_t size) : m_Size(size) {} - bool operator()(const std::vector& value) const { return value.size() == m_Size; } - std::string print() const { return "size is " + core::CStringUtils::typeToString(m_Size); } + bool operator()(const std::vector& value) const { + return value.size() == m_Size; + } + std::string print() const { + return "size is " + core::CStringUtils::typeToString(m_Size); + } private: std::size_t m_Size; @@ -184,9 +198,12 @@ class CBuiltinParameter : public CParameter { using TConstraintCPtr = boost::shared_ptr>; public: - CBuiltinParameter(T& value) : m_Value(value), m_Constraint(new CUnconstrained) {} - CBuiltinParameter(T& value, const CConstraint* constraint) : m_Value(value), m_Constraint(constraint) {} - CBuiltinParameter(T& value, TConstraintCPtr constraint) : m_Value(value), m_Constraint(constraint) {} + CBuiltinParameter(T& value) + : m_Value(value), m_Constraint(new CUnconstrained) {} + CBuiltinParameter(T& value, const CConstraint* constraint) + : m_Value(value), m_Constraint(constraint) {} + CBuiltinParameter(T& value, TConstraintCPtr constraint) + : m_Value(value), m_Constraint(constraint) {} private: virtual bool fromStringImpl(const std::string& value) { @@ -198,7 +215,8 @@ class CBuiltinParameter : public CParameter { return false; } if (!(*m_Constraint)(value_)) { - LOG_ERROR(<< "'" << value_ << "' doesn't satisfy '" << m_Constraint->print() << "'"); + LOG_ERROR(<< "'" << value_ << "' doesn't satisfy '" + << m_Constraint->print() << "'"); return false; } m_Value = value_; @@ -216,8 +234,10 @@ class CBuiltinParameter : public CParameter { template class CBuiltinVectorParameter : public CParameter { public: - CBuiltinVectorParameter(std::vector& value) : m_Value(value), m_Constraint(new CUnconstrained) {} - CBuiltinVectorParameter(std::vector& value, const CConstraint* constraint) : m_Value(value), m_Constraint(constraint) {} + CBuiltinVectorParameter(std::vector& value) + : m_Value(value), m_Constraint(new CUnconstrained) {} + CBuiltinVectorParameter(std::vector& value, const CConstraint* constraint) + : m_Value(value), m_Constraint(constraint) {} private: virtual bool fromStringImpl(const std::string& value) { @@ -235,7 +255,8 @@ class CBuiltinVectorParameter : public CParameter { } } if (!(*m_Constraint)(value_)) { - LOG_ERROR(<< "'" << core::CContainerPrinter::print(value_) << "' doesn't satisfy '" << m_Constraint->print() << "'"); + LOG_ERROR(<< "'" << core::CContainerPrinter::print(value_) + << "' doesn't satisfy '" << m_Constraint->print() << "'"); return false; } m_Value.swap(value_); @@ -252,7 +273,8 @@ class COptionalStrVecParameter : public CParameter { public: COptionalStrVecParameter(CAutoconfigurerParams::TOptionalStrVec& value) : m_Value(value), m_Constraint(new CUnconstrained) {} - COptionalStrVecParameter(CAutoconfigurerParams::TOptionalStrVec& value, const CConstraint* constraint) + COptionalStrVecParameter(CAutoconfigurerParams::TOptionalStrVec& value, + const CConstraint* constraint) : m_Value(value), m_Constraint(constraint) {} virtual bool fromStringImpl(const std::string& value) { @@ -263,7 +285,8 @@ class COptionalStrVecParameter : public CParameter { value_.push_back(remainder); } if (!(*m_Constraint)(value_)) { - LOG_ERROR(<< "'" << core::CContainerPrinter::print(value_) << "' doesn't satisfy '" << m_Constraint->print() << "'"); + LOG_ERROR(<< "'" << core::CContainerPrinter::print(value_) + << "' doesn't satisfy '" << m_Constraint->print() << "'"); return false; } m_Value.reset(TStrVec()); @@ -279,7 +302,8 @@ class COptionalStrVecParameter : public CParameter { //! \brief The field data type parameter. class CFieldDataTypeParameter : public CParameter { public: - CFieldDataTypeParameter(CAutoconfigurerParams::TStrUserDataTypePrVec& value) : m_Value(value) {} + CFieldDataTypeParameter(CAutoconfigurerParams::TStrUserDataTypePrVec& value) + : m_Value(value) {} private: virtual bool fromStringImpl(const std::string& value) { @@ -329,7 +353,8 @@ class CFieldDataTypeParameter : public CParameter { class CFunctionCategoryParameter : public CParameter { public: CFunctionCategoryParameter(CAutoconfigurerParams::TFunctionCategoryVec& value) - : m_Value(value), m_Constraint(new CUnconstrained) {} + : m_Value(value), + m_Constraint(new CUnconstrained) {} CFunctionCategoryParameter(CAutoconfigurerParams::TFunctionCategoryVec& value, const CConstraint* constraint) : m_Value(value), m_Constraint(constraint) {} @@ -357,7 +382,8 @@ class CFunctionCategoryParameter : public CParameter { } std::sort(value_.begin(), value_.end()); if (!(*m_Constraint)(value_)) { - LOG_ERROR(<< "'" << core::CContainerPrinter::print(value_) << "' doesn't satisfy '" << m_Constraint->print() << "'"); + LOG_ERROR(<< "'" << core::CContainerPrinter::print(value_) + << "' doesn't satisfy '" << m_Constraint->print() << "'"); return false; } m_Value.swap(value_); @@ -402,7 +428,9 @@ void skipUtf8Bom(std::ifstream& strm) { //! Helper method for CAutoconfigurerParams::init() to extract parameter //! value from the property file. -static bool processSetting(const boost::property_tree::ptree& propTree, const std::string& iniPath, CParameter& parameter) { +static bool processSetting(const boost::property_tree::ptree& propTree, + const std::string& iniPath, + CParameter& parameter) { try { // This get() will throw an exception if the path isn't found std::string value = propTree.get(iniPath); @@ -413,7 +441,9 @@ static bool processSetting(const boost::property_tree::ptree& propTree, const st LOG_ERROR(<< "Invalid value for setting '" << iniPath << "' : " << value); return false; } - } catch (boost::property_tree::ptree_error&) { LOG_INFO(<< "Keeping default value for unspecified setting '" << iniPath << "'"); } + } catch (boost::property_tree::ptree_error&) { + LOG_INFO(<< "Keeping default value for unspecified setting '" << iniPath << "'"); + } return true; } @@ -426,7 +456,8 @@ bool canUse(const CAutoconfigurerParams::TOptionalStrVec& primary, return std::find(primary->begin(), primary->end(), value) != primary->end(); } if (secondary) { - return std::find(secondary->begin(), secondary->end(), value) != secondary->end(); + return std::find(secondary->begin(), secondary->end(), value) != + secondary->end(); } return true; } @@ -436,16 +467,11 @@ const std::size_t MINIMUM_RECORDS_TO_ATTEMPT_CONFIG(10000); const double MINIMUM_DETECTOR_SCORE(0.1); const std::size_t NUMBER_OF_MOST_FREQUENT_FIELDS_COUNTS(10); std::string DEFAULT_DETECTOR_CONFIG_LINE_ENDING("\n"); -const config_t::EFunctionCategory FUNCTION_CATEGORIES[] = {config_t::E_Count, - config_t::E_Rare, - config_t::E_DistinctCount, - config_t::E_InfoContent, - config_t::E_Mean, - config_t::E_Min, - config_t::E_Max, - config_t::E_Sum, - config_t::E_Varp, - config_t::E_Median}; +const config_t::EFunctionCategory FUNCTION_CATEGORIES[] = { + config_t::E_Count, config_t::E_Rare, config_t::E_DistinctCount, + config_t::E_InfoContent, config_t::E_Mean, config_t::E_Min, + config_t::E_Max, config_t::E_Sum, config_t::E_Varp, + config_t::E_Median}; const std::size_t HIGH_NUMBER_BY_FIELD_VALUES(500); const std::size_t MAXIMUM_NUMBER_BY_FIELD_VALUES(1000); const std::size_t HIGH_NUMBER_RARE_BY_FIELD_VALUES(50000); @@ -462,7 +488,8 @@ const double LOW_POPULATED_BUCKET_FRACTIONS[] = {1.0 / 3.0, 1.0 / 50.0}; const double MINIMUM_POPULATED_BUCKET_FRACTIONS[] = {1.0 / 50.0, 1.0 / 500.0}; const double HIGH_POPULATED_BUCKET_FRACTIONS[] = {1.1, 1.0 / 10.0}; const double MAXIMUM_POPULATED_BUCKET_FRACTIONS[] = {1.2, 5.0 / 10.0}; -const core_t::TTime CANDIDATE_BUCKET_LENGTHS[] = {60, 300, 600, 1800, 3600, 7200, 14400, constants::LONGEST_BUCKET_LENGTH}; +const core_t::TTime CANDIDATE_BUCKET_LENGTHS[] = { + 60, 300, 600, 1800, 3600, 7200, 14400, constants::LONGEST_BUCKET_LENGTH}; const double LOW_NUMBER_OF_BUCKETS_FOR_CONFIG(500.0); const double MINIMUM_NUMBER_OF_BUCKETS_FOR_CONFIG(50.0); const double POLLED_DATA_MINIMUM_MASS_AT_INTERVAL(0.99); @@ -483,12 +510,11 @@ CAutoconfigurerParams::CAutoconfigurerParams(const std::string& timeFieldName, const std::string& timeFieldFormat, bool verbose, bool writeDetectorConfigs) - : m_TimeFieldName(timeFieldName), - m_TimeFieldFormat(timeFieldFormat), - m_Verbose(verbose), - m_WriteDetectorConfigs(writeDetectorConfigs), + : m_TimeFieldName(timeFieldName), m_TimeFieldFormat(timeFieldFormat), + m_Verbose(verbose), m_WriteDetectorConfigs(writeDetectorConfigs), m_DetectorConfigLineEnding(DEFAULT_DETECTOR_CONFIG_LINE_ENDING), - m_FunctionCategoriesToConfigure(boost::begin(FUNCTION_CATEGORIES), boost::end(FUNCTION_CATEGORIES)), + m_FunctionCategoriesToConfigure(boost::begin(FUNCTION_CATEGORIES), + boost::end(FUNCTION_CATEGORIES)), m_MinimumExamplesToClassify(MINIMUM_EXAMPLES_TO_CLASSIFY), m_NumberOfMostFrequentFieldsCounts(NUMBER_OF_MOST_FREQUENT_FIELDS_COUNTS), m_MinimumRecordsToAttemptConfig(MINIMUM_RECORDS_TO_ATTEMPT_CONFIG), @@ -505,11 +531,16 @@ CAutoconfigurerParams::CAutoconfigurerParams(const std::string& timeFieldName, m_HighCardinalityInTailIncrement(HIGH_CARDINALITY_IN_TAIL_INCREMENT), m_HighCardinalityHighTailFraction(HIGH_CARDINALITY_HIGH_TAIL_FRACTION), m_HighCardinalityMaximumTailFraction(HIGH_CARDINALITY_MAXIMUM_TAIL_FRACTION), - m_LowPopulatedBucketFractions(boost::begin(LOW_POPULATED_BUCKET_FRACTIONS), boost::end(LOW_POPULATED_BUCKET_FRACTIONS)), - m_MinimumPopulatedBucketFractions(boost::begin(MINIMUM_POPULATED_BUCKET_FRACTIONS), boost::end(MINIMUM_POPULATED_BUCKET_FRACTIONS)), - m_HighPopulatedBucketFractions(boost::begin(HIGH_POPULATED_BUCKET_FRACTIONS), boost::end(HIGH_POPULATED_BUCKET_FRACTIONS)), - m_MaximumPopulatedBucketFractions(boost::begin(MAXIMUM_POPULATED_BUCKET_FRACTIONS), boost::end(MAXIMUM_POPULATED_BUCKET_FRACTIONS)), - m_CandidateBucketLengths(boost::begin(CANDIDATE_BUCKET_LENGTHS), boost::end(CANDIDATE_BUCKET_LENGTHS)), + m_LowPopulatedBucketFractions(boost::begin(LOW_POPULATED_BUCKET_FRACTIONS), + boost::end(LOW_POPULATED_BUCKET_FRACTIONS)), + m_MinimumPopulatedBucketFractions(boost::begin(MINIMUM_POPULATED_BUCKET_FRACTIONS), + boost::end(MINIMUM_POPULATED_BUCKET_FRACTIONS)), + m_HighPopulatedBucketFractions(boost::begin(HIGH_POPULATED_BUCKET_FRACTIONS), + boost::end(HIGH_POPULATED_BUCKET_FRACTIONS)), + m_MaximumPopulatedBucketFractions(boost::begin(MAXIMUM_POPULATED_BUCKET_FRACTIONS), + boost::end(MAXIMUM_POPULATED_BUCKET_FRACTIONS)), + m_CandidateBucketLengths(boost::begin(CANDIDATE_BUCKET_LENGTHS), + boost::end(CANDIDATE_BUCKET_LENGTHS)), m_LowNumberOfBucketsForConfig(LOW_NUMBER_OF_BUCKETS_FOR_CONFIG), m_MinimumNumberOfBucketsForConfig(MINIMUM_NUMBER_OF_BUCKETS_FOR_CONFIG), m_PolledDataMinimumMassAtInterval(POLLED_DATA_MINIMUM_MASS_AT_INTERVAL), @@ -551,159 +582,187 @@ bool CAutoconfigurerParams::init(const std::string& file) { static const core_t::TTime ZERO_TIME = 0; static const double ZERO_DOUBLE = 0.0; static const double ONE_DOUBLE = 1.0; - static const std::string LABELS[] = {std::string("scope.fields_of_interest"), - std::string("scope.permitted_argument_fields"), - std::string("scope.permitted_by_fields"), - std::string("scope.permitted_over_fields"), - std::string("scope.permitted_partition_fields"), - std::string("scope.functions_of_interest"), - std::string("statistics.field_data_types"), - std::string("statistics.minimum_examples_to_classify"), - std::string("statistics.number_of_most_frequent_to_count"), - std::string("configuration.minimum_records_to_attempt_config"), - std::string("configuration.high_number_of_by_fields"), - std::string("configuration.maximum_number_of_by_fields"), - std::string("configuration.high_number_of_rare_by_fields"), - std::string("configuration.maximum_number_of_rare_by_fields"), - std::string("configuration.high_number_of_partition_fields"), - std::string("configuration.maximum_of_number_partition_fields"), - std::string("configuration.low_number_of_over_fields"), - std::string("configuration.minimum_number_of_over_fields"), - std::string("configuration.high_cardinality_in_tail_factor"), - std::string("configuration.high_cardinality_in_tail_increment"), - std::string("configuration.high_cardinality_high_tail_fraction"), - std::string("configuration.high_cardinality_maximum_tail_fraction"), - std::string("configuration.low_populated_bucket_ratio"), - std::string("configuration.minimum_populated_bucket_ratio"), - std::string("configuration.high_populated_bucket_ratio"), - std::string("configuration.maximum_populated_bucket_ratio"), - std::string("configuration.candidate_bucket_lengths"), - std::string("configuration.low_number_buckets_for_config"), - std::string("configuration.minimum_number_buckets_for_config"), - std::string("configuration.polled_data_minimum_mass_at_interval"), - std::string("configuration.polled_data_jitter"), - std::string("configuration.low_coefficient_of_variation"), - std::string("configuration.minimum_coefficient_of_variation"), - std::string("configuration.low_length_range_for_info_content"), - std::string("configuration.minimum_length_range_for_info_content"), - std::string("configuration.low_maximum_length_for_info_content"), - std::string("configuration.minimum_maximum_length_for_info_content"), - std::string("configuration.low_entropy_for_info_content"), - std::string("configuration.minimum_entropy_for_info_content"), - std::string("configuration.low_distinct_count_for_info_content"), - std::string("configuration.minimum_distinct_count_for_info_content") + static const std::string LABELS[] = { + std::string("scope.fields_of_interest"), + std::string("scope.permitted_argument_fields"), + std::string("scope.permitted_by_fields"), + std::string("scope.permitted_over_fields"), + std::string("scope.permitted_partition_fields"), + std::string("scope.functions_of_interest"), + std::string("statistics.field_data_types"), + std::string("statistics.minimum_examples_to_classify"), + std::string("statistics.number_of_most_frequent_to_count"), + std::string("configuration.minimum_records_to_attempt_config"), + std::string("configuration.high_number_of_by_fields"), + std::string("configuration.maximum_number_of_by_fields"), + std::string("configuration.high_number_of_rare_by_fields"), + std::string("configuration.maximum_number_of_rare_by_fields"), + std::string("configuration.high_number_of_partition_fields"), + std::string("configuration.maximum_of_number_partition_fields"), + std::string("configuration.low_number_of_over_fields"), + std::string("configuration.minimum_number_of_over_fields"), + std::string("configuration.high_cardinality_in_tail_factor"), + std::string("configuration.high_cardinality_in_tail_increment"), + std::string("configuration.high_cardinality_high_tail_fraction"), + std::string("configuration.high_cardinality_maximum_tail_fraction"), + std::string("configuration.low_populated_bucket_ratio"), + std::string("configuration.minimum_populated_bucket_ratio"), + std::string("configuration.high_populated_bucket_ratio"), + std::string("configuration.maximum_populated_bucket_ratio"), + std::string("configuration.candidate_bucket_lengths"), + std::string("configuration.low_number_buckets_for_config"), + std::string("configuration.minimum_number_buckets_for_config"), + std::string("configuration.polled_data_minimum_mass_at_interval"), + std::string("configuration.polled_data_jitter"), + std::string("configuration.low_coefficient_of_variation"), + std::string("configuration.minimum_coefficient_of_variation"), + std::string("configuration.low_length_range_for_info_content"), + std::string("configuration.minimum_length_range_for_info_content"), + std::string("configuration.low_maximum_length_for_info_content"), + std::string("configuration.minimum_maximum_length_for_info_content"), + std::string("configuration.low_entropy_for_info_content"), + std::string("configuration.minimum_entropy_for_info_content"), + std::string("configuration.low_distinct_count_for_info_content"), + std::string("configuration.minimum_distinct_count_for_info_content") }; TParameterPtr parameters[] = { - TParameterPtr(new COptionalStrVecParameter(m_FieldsOfInterest, new CNotEmpty)), - TParameterPtr(new COptionalStrVecParameter(m_FieldsToUseInAutoconfigureByRole[constants::ARGUMENT_INDEX])), - TParameterPtr(new COptionalStrVecParameter(m_FieldsToUseInAutoconfigureByRole[constants::BY_INDEX])), - TParameterPtr(new COptionalStrVecParameter(m_FieldsToUseInAutoconfigureByRole[constants::OVER_INDEX])), - TParameterPtr(new COptionalStrVecParameter(m_FieldsToUseInAutoconfigureByRole[constants::PARTITION_INDEX])), + TParameterPtr(new COptionalStrVecParameter(m_FieldsOfInterest, + new CNotEmpty)), + TParameterPtr(new COptionalStrVecParameter( + m_FieldsToUseInAutoconfigureByRole[constants::ARGUMENT_INDEX])), + TParameterPtr(new COptionalStrVecParameter( + m_FieldsToUseInAutoconfigureByRole[constants::BY_INDEX])), + TParameterPtr(new COptionalStrVecParameter( + m_FieldsToUseInAutoconfigureByRole[constants::OVER_INDEX])), + TParameterPtr(new COptionalStrVecParameter( + m_FieldsToUseInAutoconfigureByRole[constants::PARTITION_INDEX])), TParameterPtr(new CFunctionCategoryParameter(m_FunctionCategoriesToConfigure)), TParameterPtr(new CFieldDataTypeParameter(m_FieldDataTypes)), TParameterPtr(new CBuiltinParameter(m_MinimumExamplesToClassify)), TParameterPtr(new CBuiltinParameter(m_NumberOfMostFrequentFieldsCounts)), - TParameterPtr(new CBuiltinParameter(m_MinimumRecordsToAttemptConfig, - new CValueIs(m_MinimumExamplesToClassify))), + TParameterPtr(new CBuiltinParameter( + m_MinimumRecordsToAttemptConfig, + new CValueIs(m_MinimumExamplesToClassify))), TParameterPtr(new CBuiltinParameter(m_HighNumberByFieldValues)), - TParameterPtr(new CBuiltinParameter(m_MaximumNumberByFieldValues, - new CValueIs(m_HighNumberByFieldValues))), + TParameterPtr(new CBuiltinParameter( + m_MaximumNumberByFieldValues, + new CValueIs(m_HighNumberByFieldValues))), TParameterPtr(new CBuiltinParameter(m_HighNumberRareByFieldValues)), - TParameterPtr(new CBuiltinParameter(m_MaximumNumberRareByFieldValues, - new CValueIs(m_HighNumberRareByFieldValues))), + TParameterPtr(new CBuiltinParameter( + m_MaximumNumberRareByFieldValues, + new CValueIs(m_HighNumberRareByFieldValues))), TParameterPtr(new CBuiltinParameter(m_HighNumberPartitionFieldValues)), - TParameterPtr(new CBuiltinParameter(m_MaximumNumberPartitionFieldValues, - new CValueIs(m_HighNumberPartitionFieldValues))), + TParameterPtr(new CBuiltinParameter( + m_MaximumNumberPartitionFieldValues, + new CValueIs(m_HighNumberPartitionFieldValues))), TParameterPtr(new CBuiltinParameter(m_LowNumberOverFieldValues)), - TParameterPtr(new CBuiltinParameter(m_MinimumNumberOverFieldValues, - new CValueIs(m_LowNumberOverFieldValues))), - TParameterPtr(new CBuiltinParameter(m_HighCardinalityInTailFactor, new CValueIs(ONE_DOUBLE))), + TParameterPtr(new CBuiltinParameter( + m_MinimumNumberOverFieldValues, + new CValueIs(m_LowNumberOverFieldValues))), + TParameterPtr(new CBuiltinParameter( + m_HighCardinalityInTailFactor, new CValueIs(ONE_DOUBLE))), TParameterPtr(new CBuiltinParameter(m_HighCardinalityInTailIncrement)), - TParameterPtr(new CBuiltinParameter(m_HighCardinalityHighTailFraction, - (new CConstraintConjunction) - ->addConstraint(new CValueIs(ZERO_DOUBLE)) - ->addConstraint(new CValueIs(ONE_DOUBLE)))), - TParameterPtr( - new CBuiltinParameter(m_HighCardinalityMaximumTailFraction, - (new CConstraintConjunction) - ->addConstraint(new CValueIs(m_HighCardinalityHighTailFraction)) - ->addConstraint(new CValueIs(ONE_DOUBLE)))), - TParameterPtr(new CBuiltinVectorParameter(m_LowPopulatedBucketFractions, - (new CConstraintConjunction) - ->addConstraint(new CValueIs(ZERO_DOUBLE)) - ->addConstraint(new CValueIs(ONE_DOUBLE)) - ->addConstraint(new CSizeIs(2)))), + TParameterPtr(new CBuiltinParameter( + m_HighCardinalityHighTailFraction, + (new CConstraintConjunction) + ->addConstraint(new CValueIs(ZERO_DOUBLE)) + ->addConstraint(new CValueIs(ONE_DOUBLE)))), + TParameterPtr(new CBuiltinParameter( + m_HighCardinalityMaximumTailFraction, + (new CConstraintConjunction) + ->addConstraint(new CValueIs(m_HighCardinalityHighTailFraction)) + ->addConstraint(new CValueIs(ONE_DOUBLE)))), + TParameterPtr(new CBuiltinVectorParameter( + m_LowPopulatedBucketFractions, + (new CConstraintConjunction) + ->addConstraint(new CValueIs(ZERO_DOUBLE)) + ->addConstraint(new CValueIs(ONE_DOUBLE)) + ->addConstraint(new CSizeIs(2)))), TParameterPtr(new CBuiltinVectorParameter( m_MinimumPopulatedBucketFractions, (new CConstraintConjunction) ->addConstraint(new CValueIs(ZERO_DOUBLE)) ->addConstraint(new CVectorValueIs(m_LowPopulatedBucketFractions)) ->addConstraint(new CSizeIs(2)))), - TParameterPtr(new CBuiltinParameter(m_HighPopulatedBucketFractions[1], - (new CConstraintConjunction) - ->addConstraint(new CValueIs(ZERO_DOUBLE)) - ->addConstraint(new CValueIs(ONE_DOUBLE)))), - TParameterPtr( - new CBuiltinParameter(m_MaximumPopulatedBucketFractions[1], - (new CConstraintConjunction) - ->addConstraint(new CVectorValueIs(m_HighPopulatedBucketFractions)) - ->addConstraint(new CValueIs(ONE_DOUBLE)))), - TParameterPtr(new CBuiltinVectorParameter(m_CandidateBucketLengths, - (new CConstraintConjunction) - ->addConstraint(new CValueIs(ZERO_TIME)) - ->addConstraint(new CNotEmpty))), - TParameterPtr(new CBuiltinParameter(m_LowNumberOfBucketsForConfig, new CValueIs(ZERO_DOUBLE))), - TParameterPtr(new CBuiltinParameter(m_MinimumNumberOfBucketsForConfig, - (new CConstraintConjunction) - ->addConstraint(new CValueIs(ZERO_DOUBLE)) - ->addConstraint(new CValueIs(m_LowNumberOfBucketsForConfig)))), - TParameterPtr(new CBuiltinParameter(m_PolledDataMinimumMassAtInterval, - (new CConstraintConjunction) - ->addConstraint(new CValueIs(ZERO_DOUBLE)) - ->addConstraint(new CValueIs(ONE_DOUBLE)))), - TParameterPtr(new CBuiltinParameter(m_PolledDataJitter, - (new CConstraintConjunction) - ->addConstraint(new CValueIs(ZERO_DOUBLE)) - ->addConstraint(new CValueIs(ONE_DOUBLE)))), + TParameterPtr(new CBuiltinParameter( + m_HighPopulatedBucketFractions[1], + (new CConstraintConjunction) + ->addConstraint(new CValueIs(ZERO_DOUBLE)) + ->addConstraint(new CValueIs(ONE_DOUBLE)))), + TParameterPtr(new CBuiltinParameter( + m_MaximumPopulatedBucketFractions[1], + (new CConstraintConjunction) + ->addConstraint(new CVectorValueIs(m_HighPopulatedBucketFractions)) + ->addConstraint(new CValueIs(ONE_DOUBLE)))), + TParameterPtr(new CBuiltinVectorParameter( + m_CandidateBucketLengths, + (new CConstraintConjunction) + ->addConstraint(new CValueIs(ZERO_TIME)) + ->addConstraint(new CNotEmpty))), + TParameterPtr(new CBuiltinParameter( + m_LowNumberOfBucketsForConfig, new CValueIs(ZERO_DOUBLE))), + TParameterPtr(new CBuiltinParameter( + m_MinimumNumberOfBucketsForConfig, + (new CConstraintConjunction) + ->addConstraint(new CValueIs(ZERO_DOUBLE)) + ->addConstraint(new CValueIs(m_LowNumberOfBucketsForConfig)))), + TParameterPtr(new CBuiltinParameter( + m_PolledDataMinimumMassAtInterval, + (new CConstraintConjunction) + ->addConstraint(new CValueIs(ZERO_DOUBLE)) + ->addConstraint(new CValueIs(ONE_DOUBLE)))), + TParameterPtr(new CBuiltinParameter( + m_PolledDataJitter, + (new CConstraintConjunction) + ->addConstraint(new CValueIs(ZERO_DOUBLE)) + ->addConstraint(new CValueIs(ONE_DOUBLE)))), TParameterPtr(new CBuiltinParameter( m_LowCoefficientOfVariation, - (new CConstraintConjunction)->addConstraint(new CValueIs(ZERO_DOUBLE)))), - TParameterPtr(new CBuiltinParameter(m_MinimumCoefficientOfVariation, - (new CConstraintConjunction) - ->addConstraint(new CValueIs(ZERO_DOUBLE)) - ->addConstraint(new CValueIs(m_LowCoefficientOfVariation)))), + (new CConstraintConjunction) + ->addConstraint(new CValueIs(ZERO_DOUBLE)))), + TParameterPtr(new CBuiltinParameter( + m_MinimumCoefficientOfVariation, + (new CConstraintConjunction) + ->addConstraint(new CValueIs(ZERO_DOUBLE)) + ->addConstraint(new CValueIs(m_LowCoefficientOfVariation)))), TParameterPtr(new CBuiltinParameter( m_LowLengthRangeForInfoContent, - (new CConstraintConjunction)->addConstraint(new CValueIs(ZERO_DOUBLE)))), - TParameterPtr(new CBuiltinParameter(m_MinimumLengthRangeForInfoContent, - (new CConstraintConjunction) - ->addConstraint(new CValueIs(ZERO_DOUBLE)) - ->addConstraint(new CValueIs(m_LowLengthRangeForInfoContent)))), + (new CConstraintConjunction) + ->addConstraint(new CValueIs(ZERO_DOUBLE)))), + TParameterPtr(new CBuiltinParameter( + m_MinimumLengthRangeForInfoContent, + (new CConstraintConjunction) + ->addConstraint(new CValueIs(ZERO_DOUBLE)) + ->addConstraint(new CValueIs(m_LowLengthRangeForInfoContent)))), TParameterPtr(new CBuiltinParameter( m_LowMaximumLengthForInfoContent, - (new CConstraintConjunction)->addConstraint(new CValueIs(ZERO_DOUBLE)))), - TParameterPtr( - new CBuiltinParameter(m_MinimumMaximumLengthForInfoContent, - (new CConstraintConjunction) - ->addConstraint(new CValueIs(ZERO_DOUBLE)) - ->addConstraint(new CValueIs(m_LowMaximumLengthForInfoContent)))), - TParameterPtr(new CBuiltinParameter(m_LowEntropyForInfoContent, - (new CConstraintConjunction) - ->addConstraint(new CValueIs(ZERO_DOUBLE)) - ->addConstraint(new CValueIs(ONE_DOUBLE)))), - TParameterPtr(new CBuiltinParameter(m_MinimumEntropyForInfoContent, - (new CConstraintConjunction) - ->addConstraint(new CValueIs(ZERO_DOUBLE)) - ->addConstraint(new CValueIs(m_LowEntropyForInfoContent)))), + (new CConstraintConjunction) + ->addConstraint(new CValueIs(ZERO_DOUBLE)))), + TParameterPtr(new CBuiltinParameter( + m_MinimumMaximumLengthForInfoContent, + (new CConstraintConjunction) + ->addConstraint(new CValueIs(ZERO_DOUBLE)) + ->addConstraint(new CValueIs(m_LowMaximumLengthForInfoContent)))), + TParameterPtr(new CBuiltinParameter( + m_LowEntropyForInfoContent, + (new CConstraintConjunction) + ->addConstraint(new CValueIs(ZERO_DOUBLE)) + ->addConstraint(new CValueIs(ONE_DOUBLE)))), + TParameterPtr(new CBuiltinParameter( + m_MinimumEntropyForInfoContent, + (new CConstraintConjunction) + ->addConstraint(new CValueIs(ZERO_DOUBLE)) + ->addConstraint(new CValueIs(m_LowEntropyForInfoContent)))), TParameterPtr(new CBuiltinParameter( m_LowDistinctCountForInfoContent, - (new CConstraintConjunction)->addConstraint(new CValueIs(ZERO_DOUBLE)))), - TParameterPtr( - new CBuiltinParameter(m_MinimumDistinctCountForInfoContent, - (new CConstraintConjunction) - ->addConstraint(new CValueIs(ZERO_DOUBLE)) - ->addConstraint(new CValueIs(m_LowDistinctCountForInfoContent))))}; + (new CConstraintConjunction) + ->addConstraint(new CValueIs(ZERO_DOUBLE)))), + TParameterPtr(new CBuiltinParameter( + m_MinimumDistinctCountForInfoContent, + (new CConstraintConjunction) + ->addConstraint(new CValueIs(ZERO_DOUBLE)) + ->addConstraint(new CValueIs(m_LowDistinctCountForInfoContent))))}; bool result = true; for (std::size_t i = 0u; i < boost::size(LABELS); ++i) { @@ -740,35 +799,45 @@ const std::string& CAutoconfigurerParams::detectorConfigLineEnding() const { bool CAutoconfigurerParams::fieldOfInterest(const std::string& field) const { if (m_FieldsOfInterest) { - return std::find(m_FieldsOfInterest->begin(), m_FieldsOfInterest->end(), field) != m_FieldsOfInterest->end(); + return std::find(m_FieldsOfInterest->begin(), m_FieldsOfInterest->end(), + field) != m_FieldsOfInterest->end(); } return true; } bool CAutoconfigurerParams::canUseForFunctionArgument(const std::string& argument) const { - return canUse(m_FieldsToUseInAutoconfigureByRole[constants::ARGUMENT_INDEX], m_FieldsOfInterest, argument); + return canUse(m_FieldsToUseInAutoconfigureByRole[constants::ARGUMENT_INDEX], + m_FieldsOfInterest, argument); } bool CAutoconfigurerParams::canUseForByField(const std::string& by) const { - return canUse(m_FieldsToUseInAutoconfigureByRole[constants::BY_INDEX], m_FieldsOfInterest, by); + return canUse(m_FieldsToUseInAutoconfigureByRole[constants::BY_INDEX], + m_FieldsOfInterest, by); } bool CAutoconfigurerParams::canUseForOverField(const std::string& over) const { - return canUse(m_FieldsToUseInAutoconfigureByRole[constants::OVER_INDEX], m_FieldsOfInterest, over); + return canUse(m_FieldsToUseInAutoconfigureByRole[constants::OVER_INDEX], + m_FieldsOfInterest, over); } bool CAutoconfigurerParams::canUseForPartitionField(const std::string& partition) const { - return canUse(m_FieldsToUseInAutoconfigureByRole[constants::PARTITION_INDEX], m_FieldsOfInterest, partition); + return canUse(m_FieldsToUseInAutoconfigureByRole[constants::PARTITION_INDEX], + m_FieldsOfInterest, partition); } -const CAutoconfigurerParams::TFunctionCategoryVec& CAutoconfigurerParams::functionsCategoriesToConfigure() const { +const CAutoconfigurerParams::TFunctionCategoryVec& +CAutoconfigurerParams::functionsCategoriesToConfigure() const { return m_FunctionCategoriesToConfigure; } -CAutoconfigurerParams::TOptionalUserDataType CAutoconfigurerParams::dataType(const std::string& field) const { +CAutoconfigurerParams::TOptionalUserDataType +CAutoconfigurerParams::dataType(const std::string& field) const { TStrUserDataTypePrVec::const_iterator result = - std::lower_bound(m_FieldDataTypes.begin(), m_FieldDataTypes.end(), field, maths::COrderings::SFirstLess()); - return result != m_FieldDataTypes.end() && result->first == field ? TOptionalUserDataType(result->second) : TOptionalUserDataType(); + std::lower_bound(m_FieldDataTypes.begin(), m_FieldDataTypes.end(), + field, maths::COrderings::SFirstLess()); + return result != m_FieldDataTypes.end() && result->first == field + ? TOptionalUserDataType(result->second) + : TOptionalUserDataType(); } uint64_t CAutoconfigurerParams::minimumExamplesToClassify() const { @@ -835,19 +904,23 @@ double CAutoconfigurerParams::highCardinalityMaximumTailFraction() const { return m_HighCardinalityMaximumTailFraction; } -double CAutoconfigurerParams::lowPopulatedBucketFraction(config_t::EFunctionCategory function, bool ignoreEmpty) const { +double CAutoconfigurerParams::lowPopulatedBucketFraction(config_t::EFunctionCategory function, + bool ignoreEmpty) const { return m_LowPopulatedBucketFractions[config_t::hasDoAndDontIgnoreEmptyVersions(function) && ignoreEmpty]; } -double CAutoconfigurerParams::minimumPopulatedBucketFraction(config_t::EFunctionCategory function, bool ignoreEmpty) const { +double CAutoconfigurerParams::minimumPopulatedBucketFraction(config_t::EFunctionCategory function, + bool ignoreEmpty) const { return m_MinimumPopulatedBucketFractions[config_t::hasDoAndDontIgnoreEmptyVersions(function) && ignoreEmpty]; } -double CAutoconfigurerParams::highPopulatedBucketFraction(config_t::EFunctionCategory function, bool ignoreEmpty) const { +double CAutoconfigurerParams::highPopulatedBucketFraction(config_t::EFunctionCategory function, + bool ignoreEmpty) const { return m_HighPopulatedBucketFractions[config_t::hasDoAndDontIgnoreEmptyVersions(function) && ignoreEmpty]; } -double CAutoconfigurerParams::maximumPopulatedBucketFraction(config_t::EFunctionCategory function, bool ignoreEmpty) const { +double CAutoconfigurerParams::maximumPopulatedBucketFraction(config_t::EFunctionCategory function, + bool ignoreEmpty) const { return m_MaximumPopulatedBucketFractions[config_t::hasDoAndDontIgnoreEmptyVersions(function) && ignoreEmpty]; } @@ -911,28 +984,31 @@ double CAutoconfigurerParams::minimumDistinctCountForInfoContent() const { return m_MinimumDistinctCountForInfoContent; } -const CAutoconfigurerParams::TSizeVec& CAutoconfigurerParams::penaltyIndicesFor(std::size_t bid) const { +const CAutoconfigurerParams::TSizeVec& +CAutoconfigurerParams::penaltyIndicesFor(std::size_t bid) const { return m_BucketLengthPenaltyIndices[bid]; } -const CAutoconfigurerParams::TSizeVec& CAutoconfigurerParams::penaltyIndicesFor(bool ignoreEmpty) const { +const CAutoconfigurerParams::TSizeVec& +CAutoconfigurerParams::penaltyIndicesFor(bool ignoreEmpty) const { return m_IgnoreEmptyPenaltyIndices[ignoreEmpty]; } std::size_t CAutoconfigurerParams::penaltyIndexFor(std::size_t bid, bool ignoreEmpty) const { TSizeVec result; - std::set_intersection(this->penaltyIndicesFor(bid).begin(), - this->penaltyIndicesFor(bid).end(), - this->penaltyIndicesFor(ignoreEmpty).begin(), - this->penaltyIndicesFor(ignoreEmpty).end(), - std::back_inserter(result)); + std::set_intersection( + this->penaltyIndicesFor(bid).begin(), this->penaltyIndicesFor(bid).end(), + this->penaltyIndicesFor(ignoreEmpty).begin(), + this->penaltyIndicesFor(ignoreEmpty).end(), std::back_inserter(result)); return result[0]; } std::string CAutoconfigurerParams::print() const { #define PRINT_STRING(field) result += " " #field " = " + m_##field + "\n" -#define PRINT_VALUE(field) result += " " #field " = " + core::CStringUtils::typeToString(m_##field) + "\n" -#define PRINT_CONTAINER(field) result += " " #field " = " + core::CContainerPrinter::print(m_##field) + "\n" +#define PRINT_VALUE(field) \ + result += " " #field " = " + core::CStringUtils::typeToString(m_##field) + "\n" +#define PRINT_CONTAINER(field) \ + result += " " #field " = " + core::CContainerPrinter::print(m_##field) + "\n" std::string result; PRINT_STRING(TimeFieldName); @@ -952,9 +1028,11 @@ std::string CAutoconfigurerParams::print() const { result += "]\n"; result += " FieldDataType = ["; if (m_FieldDataTypes.size() > 0) { - result += "(" + m_FieldDataTypes[0].first + "," + config_t::print(m_FieldDataTypes[0].second) + ")"; + result += "(" + m_FieldDataTypes[0].first + "," + + config_t::print(m_FieldDataTypes[0].second) + ")"; for (std::size_t i = 1u; i < m_FieldDataTypes.size(); ++i) { - result += ", (" + m_FieldDataTypes[i].first + "," + config_t::print(m_FieldDataTypes[i].second) + ")"; + result += ", (" + m_FieldDataTypes[i].first + "," + + config_t::print(m_FieldDataTypes[i].second) + ")"; } } result += "]\n"; @@ -999,7 +1077,8 @@ std::string CAutoconfigurerParams::print() const { void CAutoconfigurerParams::refreshPenaltyIndices() { m_BucketLengthPenaltyIndices.resize(m_CandidateBucketLengths.size(), TSizeVec(2)); m_IgnoreEmptyPenaltyIndices.resize(2, TSizeVec(m_CandidateBucketLengths.size())); - for (std::size_t i = 0u, n = m_CandidateBucketLengths.size(); i < m_CandidateBucketLengths.size(); ++i) { + for (std::size_t i = 0u, n = m_CandidateBucketLengths.size(); + i < m_CandidateBucketLengths.size(); ++i) { m_BucketLengthPenaltyIndices[i][0] = 0 + i; m_BucketLengthPenaltyIndices[i][1] = n + i; m_IgnoreEmptyPenaltyIndices[0][i] = 0 + i; diff --git a/lib/config/CDataCountStatistics.cc b/lib/config/CDataCountStatistics.cc index 9a561b44cc..1471136e92 100644 --- a/lib/config/CDataCountStatistics.cc +++ b/lib/config/CDataCountStatistics.cc @@ -47,9 +47,11 @@ TBoolVec bucketSampleMask(core_t::TTime bucketLength) { //! Insert with the same semantics as boost::unordered_map/set::emplace. template -std::size_t emplace(const std::string* name, std::vector>& stats) { - std::size_t i = - static_cast(std::lower_bound(stats.begin(), stats.end(), name, maths::COrderings::SFirstLess()) - stats.begin()); +std::size_t emplace(const std::string* name, + std::vector>& stats) { + std::size_t i = static_cast( + std::lower_bound(stats.begin(), stats.end(), name, maths::COrderings::SFirstLess()) - + stats.begin()); if (i == stats.size()) { stats.push_back(std::make_pair(name, T())); } else if (*name != *stats[i].first) { @@ -77,7 +79,8 @@ class CCountStatisticsKey { } bool operator==(const CCountStatisticsKey& rhs) const { - return std::equal(boost::begin(m_Fields), boost::end(m_Fields), boost::begin(rhs.m_Fields)); + return std::equal(boost::begin(m_Fields), boost::end(m_Fields), + boost::begin(rhs.m_Fields)); } private: @@ -87,7 +90,9 @@ class CCountStatisticsKey { //! \brief Hashes a count statistic key. class CCountStatisticsKeyHasher { public: - std::size_t operator()(const CCountStatisticsKey& key) const { return key.hash(); } + std::size_t operator()(const CCountStatisticsKey& key) const { + return key.hash(); + } }; //! Get some partition statistics on the heap. @@ -116,14 +121,19 @@ const maths::CQuantileSketch QUANTILES(maths::CQuantileSketch::E_Linear, CS_SIZE //////// CBucketCountStatistics //////// -void CBucketCountStatistics::add(const TSizeSizeSizeTr& partition, TDetectorRecordCItr beginRecords, TDetectorRecordCItr endRecords) { +void CBucketCountStatistics::add(const TSizeSizeSizeTr& partition, + TDetectorRecordCItr beginRecords, + TDetectorRecordCItr endRecords) { ++m_CurrentBucketPartitionCounts[partition]; for (TDetectorRecordCItr record = beginRecords; record != endRecords; ++record) { if (record->function() == config_t::E_DistinctCount) { if (const std::string* name = record->argumentFieldName()) { const std::string& value = *record->argumentFieldValue(); std::size_t i = emplace(name, m_CurrentBucketArgumentDataPerPartition); - SBucketArgumentData& data = m_CurrentBucketArgumentDataPerPartition[i].second.emplace(partition, BJKST).first->second; + SBucketArgumentData& data = + m_CurrentBucketArgumentDataPerPartition[i] + .second.emplace(partition, BJKST) + .first->second; data.s_DistinctValues.add(CTools::category32(value)); data.s_MeanStringLength.add(static_cast(value.length())); } @@ -136,7 +146,8 @@ void CBucketCountStatistics::capture() { using TSizeSizeSizeTrArgumentDataUMapItr = TSizeSizeSizeTrArgumentDataUMap::iterator; m_BucketPartitionCount += m_CurrentBucketPartitionCounts.size(); - for (TSizeSizeSizeTrUInt64UMapCItr i = m_CurrentBucketPartitionCounts.begin(); i != m_CurrentBucketPartitionCounts.end(); ++i) { + for (TSizeSizeSizeTrUInt64UMapCItr i = m_CurrentBucketPartitionCounts.begin(); + i != m_CurrentBucketPartitionCounts.end(); ++i) { TSizeSizePr id(i->first.first, i->first.third); double count = static_cast(i->second); m_CountMomentsPerPartition[id].add(count); @@ -146,9 +157,11 @@ void CBucketCountStatistics::capture() { for (std::size_t i = 0u; i < m_CurrentBucketArgumentDataPerPartition.size(); ++i) { const std::string* name = m_CurrentBucketArgumentDataPerPartition[i].first; - TSizeSizeSizeTrArgumentDataUMap& values = m_CurrentBucketArgumentDataPerPartition[i].second; + TSizeSizeSizeTrArgumentDataUMap& values = + m_CurrentBucketArgumentDataPerPartition[i].second; std::size_t j = emplace(name, m_ArgumentMomentsPerPartition); - for (TSizeSizeSizeTrArgumentDataUMapItr k = values.begin(); k != values.end(); ++k) { + for (TSizeSizeSizeTrArgumentDataUMapItr k = values.begin(); + k != values.end(); ++k) { TSizeSizePr id(k->first.first, k->first.third); SArgumentMoments& moments = m_ArgumentMomentsPerPartition[j].second[id]; double dc = static_cast(k->second.s_DistinctValues.number()); @@ -164,28 +177,33 @@ uint64_t CBucketCountStatistics::bucketPartitionCount() const { return m_BucketPartitionCount; } -const CBucketCountStatistics::TSizeSizePrMomentsUMap& CBucketCountStatistics::countMomentsPerPartition() const { +const CBucketCountStatistics::TSizeSizePrMomentsUMap& +CBucketCountStatistics::countMomentsPerPartition() const { return m_CountMomentsPerPartition; } -const CBucketCountStatistics::TSizeSizePrQuantileUMap& CBucketCountStatistics::countQuantilesPerPartition() const { +const CBucketCountStatistics::TSizeSizePrQuantileUMap& +CBucketCountStatistics::countQuantilesPerPartition() const { return m_CountQuantiles; } const CBucketCountStatistics::TSizeSizePrArgumentMomentsUMap& CBucketCountStatistics::argumentMomentsPerPartition(const std::string& name) const { - using TStrCPtrPartitionArgumentMomentsUMapPrVecCItr = TStrCPtrSizeSizePrArgumentMomentsUMapPrVec::const_iterator; + using TStrCPtrPartitionArgumentMomentsUMapPrVecCItr = + TStrCPtrSizeSizePrArgumentMomentsUMapPrVec::const_iterator; static const TSizeSizePrArgumentMomentsUMap EMPTY; TStrCPtrPartitionArgumentMomentsUMapPrVecCItr result = std::lower_bound( - m_ArgumentMomentsPerPartition.begin(), m_ArgumentMomentsPerPartition.end(), &name, maths::COrderings::SFirstLess()); - return result != m_ArgumentMomentsPerPartition.end() && *result->first == name ? result->second : EMPTY; + m_ArgumentMomentsPerPartition.begin(), m_ArgumentMomentsPerPartition.end(), + &name, maths::COrderings::SFirstLess()); + return result != m_ArgumentMomentsPerPartition.end() && *result->first == name + ? result->second + : EMPTY; } //////// CDataCountStatistics //////// CDataCountStatistics::CDataCountStatistics(const CAutoconfigurerParams& params) - : m_Params(params), - m_RecordCount(0), + : m_Params(params), m_RecordCount(0), m_ArrivalTimeDistribution(maths::CQuantileSketch::E_PiecewiseConstant, SKETCH_SIZE), m_BucketIndices(params.candidateBucketLengths().size(), 0), m_BucketCounts(params.candidateBucketLengths().size(), 0), @@ -194,7 +212,8 @@ CDataCountStatistics::CDataCountStatistics(const CAutoconfigurerParams& params) m_BucketMasks.reserve(candidates.size()); for (std::size_t bid = 0u; bid < candidates.size(); ++bid) { m_BucketMasks.push_back(bucketSampleMask(candidates[bid])); - maths::CSampling::random_shuffle(m_Rng, m_BucketMasks[bid].begin(), m_BucketMasks[bid].end()); + maths::CSampling::random_shuffle(m_Rng, m_BucketMasks[bid].begin(), + m_BucketMasks[bid].end()); } } @@ -219,14 +238,16 @@ void CDataCountStatistics::add(TDetectorRecordCItr beginRecords, TDetectorRecord const TTimeVec& candidates = this->params().candidateBucketLengths(); for (std::size_t bid = 0u; bid < m_LastBucketEndTimes.size(); ++bid) { if (time - m_LastBucketEndTimes[bid] >= candidates[bid]) { - for (core_t::TTime i = 0; i < (time - m_LastBucketEndTimes[bid]) / candidates[bid]; ++i) { + for (core_t::TTime i = 0; + i < (time - m_LastBucketEndTimes[bid]) / candidates[bid]; ++i) { if (m_BucketMasks[bid][m_BucketIndices[bid]++]) { ++m_BucketCounts[bid]; m_BucketStatistics[bid].capture(); } if ((m_BucketIndices[bid] % m_BucketMasks.size()) == 0) { m_BucketIndices[bid] = 0; - maths::CSampling::random_shuffle(m_Rng, m_BucketMasks[bid].begin(), m_BucketMasks[bid].end()); + maths::CSampling::random_shuffle(m_Rng, m_BucketMasks[bid].begin(), + m_BucketMasks[bid].end()); } } m_LastBucketEndTimes[bid] = maths::CIntegerTools::floor(time, candidates[bid]); @@ -299,10 +320,12 @@ void CDataCountStatistics::fillLastBucketEndTimes(core_t::TTime time) { //////// CPartitionDataCountStatistics //////// -CPartitionDataCountStatistics::CPartitionDataCountStatistics(const CAutoconfigurerParams& params) : CDataCountStatistics(params) { +CPartitionDataCountStatistics::CPartitionDataCountStatistics(const CAutoconfigurerParams& params) + : CDataCountStatistics(params) { } -void CPartitionDataCountStatistics::add(TDetectorRecordCItr beginRecords, TDetectorRecordCItr endRecords) { +void CPartitionDataCountStatistics::add(TDetectorRecordCItr beginRecords, + TDetectorRecordCItr endRecords) { if (beginRecords != endRecords) { this->CDataCountStatistics::add(beginRecords, endRecords); } @@ -310,10 +333,12 @@ void CPartitionDataCountStatistics::add(TDetectorRecordCItr beginRecords, TDetec //////// CByAndPartitionDataCountStatistics //////// -CByAndPartitionDataCountStatistics::CByAndPartitionDataCountStatistics(const CAutoconfigurerParams& params) : CDataCountStatistics(params) { +CByAndPartitionDataCountStatistics::CByAndPartitionDataCountStatistics(const CAutoconfigurerParams& params) + : CDataCountStatistics(params) { } -void CByAndPartitionDataCountStatistics::add(TDetectorRecordCItr beginRecords, TDetectorRecordCItr endRecords) { +void CByAndPartitionDataCountStatistics::add(TDetectorRecordCItr beginRecords, + TDetectorRecordCItr endRecords) { if (beginRecords != endRecords) { this->CDataCountStatistics::add(beginRecords, endRecords); } @@ -325,7 +350,8 @@ CByOverAndPartitionDataCountStatistics::CByOverAndPartitionDataCountStatistics(c : CDataCountStatistics(params) { } -void CByOverAndPartitionDataCountStatistics::add(TDetectorRecordCItr beginRecords, TDetectorRecordCItr endRecords) { +void CByOverAndPartitionDataCountStatistics::add(TDetectorRecordCItr beginRecords, + TDetectorRecordCItr endRecords) { if (beginRecords == endRecords) { return; } @@ -338,7 +364,8 @@ void CByOverAndPartitionDataCountStatistics::add(TDetectorRecordCItr beginRecord if (this->samplePartition(partition)) { std::size_t by = beginRecords->byFieldValueHash(); std::size_t over = beginRecords->overFieldValueHash(); - TSizeSizePrCBjkstUMapItr i = m_DistinctOverValues.emplace(std::make_pair(by, partition), BJKST).first; + TSizeSizePrCBjkstUMapItr i = + m_DistinctOverValues.emplace(std::make_pair(by, partition), BJKST).first; i->second.add(CTools::category32(over)); } } @@ -350,11 +377,13 @@ CByOverAndPartitionDataCountStatistics::sampledByAndPartitionDistinctOverCounts( //////// CDataCountStatisticsDirectAddressTable //////// -CDataCountStatisticsDirectAddressTable::CDataCountStatisticsDirectAddressTable(const CAutoconfigurerParams& params) : m_Params(params) { +CDataCountStatisticsDirectAddressTable::CDataCountStatisticsDirectAddressTable(const CAutoconfigurerParams& params) + : m_Params(params) { } void CDataCountStatisticsDirectAddressTable::build(const TDetectorSpecificationVec& specs) { - using TCountStatisticsKeySizeUMap = boost::unordered_map; + using TCountStatisticsKeySizeUMap = + boost::unordered_map; std::size_t size = 0u; for (std::size_t i = 0u; i < specs.size(); ++i) { @@ -401,25 +430,30 @@ void CDataCountStatisticsDirectAddressTable::pruneUnsed(const TDetectorSpecifica ++last; } } - m_DataCountStatistics.erase(m_DataCountStatistics.begin() + last, m_DataCountStatistics.end()); + m_DataCountStatistics.erase(m_DataCountStatistics.begin() + last, + m_DataCountStatistics.end()); m_RecordSchema.erase(m_RecordSchema.begin() + last, m_RecordSchema.end()); } void CDataCountStatisticsDirectAddressTable::add(const TDetectorRecordVec& records) { for (std::size_t i = 0u; i < m_RecordSchema.size(); ++i) { - m_DataCountStatistics[i]->add(core::begin_masked(records, m_RecordSchema[i]), core::end_masked(records, m_RecordSchema[i])); + m_DataCountStatistics[i]->add(core::begin_masked(records, m_RecordSchema[i]), + core::end_masked(records, m_RecordSchema[i])); } } -const CDataCountStatistics& CDataCountStatisticsDirectAddressTable::statistics(const CDetectorSpecification& spec) const { +const CDataCountStatistics& +CDataCountStatisticsDirectAddressTable::statistics(const CDetectorSpecification& spec) const { return *m_DataCountStatistics[m_DetectorSchema[spec.id()]]; } CDataCountStatisticsDirectAddressTable::TDataCountStatisticsPtr CDataCountStatisticsDirectAddressTable::stats(const CDetectorSpecification& spec) const { using TStatistics = CDataCountStatistics* (*)(const CAutoconfigurerParams&); - static TStatistics STATISTICS[] = {&partitionCountStatistics, &byAndPartitionStatistics, &byOverAndPartitionStatistics}; - return TDataCountStatisticsPtr((STATISTICS[spec.overField() ? 2 : (spec.byField() ? 1 : 0)])(m_Params)); + static TStatistics STATISTICS[] = {&partitionCountStatistics, &byAndPartitionStatistics, + &byOverAndPartitionStatistics}; + return TDataCountStatisticsPtr( + (STATISTICS[spec.overField() ? 2 : (spec.byField() ? 1 : 0)])(m_Params)); } } } diff --git a/lib/config/CDataSemantics.cc b/lib/config/CDataSemantics.cc index 0ce44df313..871d85396b 100644 --- a/lib/config/CDataSemantics.cc +++ b/lib/config/CDataSemantics.cc @@ -31,7 +31,8 @@ using TSizeVec = std::vector; //! verses a categorical model for the data. class CMixtureData { public: - CMixtureData(double count, std::size_t N) : m_Count(count), m_Classifier(N) {} + CMixtureData(double count, std::size_t N) + : m_Count(count), m_Classifier(N) {} //! Add the data point \p xi with count \p ni. void add(double xi, double ni) { m_Classifier.add(xi, ni); } @@ -54,7 +55,8 @@ class CMixtureData { for (std::size_t i = 0u; i < m_Categories.size(); ++i) { double ci = maths::CBasicStatistics::count(m_Categories[i]); double vi = maths::CBasicStatistics::maximumLikelihoodVariance(m_Categories[i]); - double si = std::max(3.0 * std::sqrt(vi), 1.0 / boost::math::constants::root_two_pi()); + double si = std::max(3.0 * std::sqrt(vi), + 1.0 / boost::math::constants::root_two_pi()); scale.add(static_cast(counts[i]) / si, ci); } return maths::CBasicStatistics::mean(scale); @@ -79,7 +81,8 @@ class CMixtureData { double ci = maths::CBasicStatistics::count(m_Categories[i]); double mi = maths::CBasicStatistics::mean(m_Categories[i]); double vi = maths::CBasicStatistics::maximumLikelihoodVariance(m_Categories[i]); - double si = std::max(std::sqrt(vi), 1.0 / boost::math::constants::root_two_pi()); + double si = std::max(std::sqrt(vi), + 1.0 / boost::math::constants::root_two_pi()); m_Gmm.weights().push_back(ci / m_Count); m_Gmm.modes().push_back(boost::math::normal_distribution<>(mi, si)); } @@ -87,7 +90,9 @@ class CMixtureData { } //! Get the number of parameters in the mixture. - double parameters() const { return 3.0 * static_cast(m_Categories.size()) - 1.0; } + double parameters() const { + return 3.0 * static_cast(m_Categories.size()) - 1.0; + } //! Compute the value of the density function at \p x. double pdf(double x) const { return maths::pdf(m_Gmm, x); } @@ -112,11 +117,8 @@ class CMixtureData { } CDataSemantics::CDataSemantics(TOptionalUserDataType override) - : m_Type(config_t::E_UndeterminedType), - m_Override(override), - m_Count(0.0), - m_NumericProportion(0.0), - m_IntegerProportion(0.0), + : m_Type(config_t::E_UndeterminedType), m_Override(override), m_Count(0.0), + m_NumericProportion(0.0), m_IntegerProportion(0.0), m_EmpiricalDistributionOverflowed(false) { } @@ -142,11 +144,14 @@ void CDataSemantics::add(const std::string& example) { m_Smallest.add(value); m_Largest.add(value); } else if (m_NonNumericValues.size() < 2 && - std::find(m_NonNumericValues.begin(), m_NonNumericValues.end(), trimmed) == m_NonNumericValues.end()) { + std::find(m_NonNumericValues.begin(), m_NonNumericValues.end(), + trimmed) == m_NonNumericValues.end()) { m_NonNumericValues.push_back(trimmed); } - if (m_DistinctValues.size() < 3 && std::find(m_DistinctValues.begin(), m_DistinctValues.end(), example) == m_DistinctValues.end()) { + if (m_DistinctValues.size() < 3 && + std::find(m_DistinctValues.begin(), m_DistinctValues.end(), example) == + m_DistinctValues.end()) { m_DistinctValues.push_back(example); } @@ -206,12 +211,14 @@ config_t::EDataType CDataSemantics::realType() const { } config_t::EDataType CDataSemantics::integerType() const { - return m_Smallest[0] < maths::COrdinal(uint64_t(0)) ? config_t::E_Integer : config_t::E_PositiveInteger; + return m_Smallest[0] < maths::COrdinal(uint64_t(0)) ? config_t::E_Integer + : config_t::E_PositiveInteger; } bool CDataSemantics::isNumeric() const { return m_NumericProportion >= NUMERIC_PROPORTION_FOR_METRIC_STRICT || - (m_NonNumericValues.size() < 2 && m_NumericProportion >= NUMERIC_PROPORTION_FOR_METRIC_WITH_SUSPECTED_MISSING_VALUES); + (m_NonNumericValues.size() < 2 && + m_NumericProportion >= NUMERIC_PROPORTION_FOR_METRIC_WITH_SUSPECTED_MISSING_VALUES); } bool CDataSemantics::isInteger() const { @@ -237,7 +244,8 @@ bool CDataSemantics::GMMGoodFit() const { LOG_TRACE(<< "offset = " << offset); double categoricalBIC = static_cast(N - 1) * logc; - for (TOrdinalSizeUMapCItr i = m_EmpiricalDistribution.begin(); i != m_EmpiricalDistribution.end(); ++i) { + for (TOrdinalSizeUMapCItr i = m_EmpiricalDistribution.begin(); + i != m_EmpiricalDistribution.end(); ++i) { double ni = static_cast(i->second); categoricalBIC -= 2.0 * ni * std::log(ni / m_Count); } @@ -250,7 +258,8 @@ bool CDataSemantics::GMMGoodFit() const { double scale = 1.0; { CMixtureData scaling(m_Count, N); - for (TOrdinalSizeUMapCItr i = m_EmpiricalDistribution.begin(); i != m_EmpiricalDistribution.end(); ++i) { + for (TOrdinalSizeUMapCItr i = m_EmpiricalDistribution.begin(); + i != m_EmpiricalDistribution.end(); ++i) { double xi = i->first.asDouble(); double ni = static_cast(i->second); scaling.add(xi, ni); @@ -261,7 +270,8 @@ bool CDataSemantics::GMMGoodFit() const { CMixtureData light(m_Count, N); CMixtureData heavy(m_Count, N); - for (TOrdinalSizeUMapCItr i = m_EmpiricalDistribution.begin(); i != m_EmpiricalDistribution.end(); ++i) { + for (TOrdinalSizeUMapCItr i = m_EmpiricalDistribution.begin(); + i != m_EmpiricalDistribution.end(); ++i) { double xi = smallest + scale * (i->first.asDouble() - smallest); double ni = static_cast(i->second); light.add(xi, ni); @@ -274,20 +284,27 @@ bool CDataSemantics::GMMGoodFit() const { double lightGmmBIC = light.parameters() * logc; double heavyGmmBIC = heavy.parameters() * logc; - for (TOrdinalSizeUMapCItr i = m_EmpiricalDistribution.begin(); i != m_EmpiricalDistribution.end(); ++i) { + for (TOrdinalSizeUMapCItr i = m_EmpiricalDistribution.begin(); + i != m_EmpiricalDistribution.end(); ++i) { double xi = smallest + scale * (i->first.asDouble() - smallest); double ni = static_cast(i->second); double fx = light.pdf(xi); double gx = 1.0 / (xi + offset) * heavy.pdf(std::log(xi + offset)); - lightGmmBIC -= 2.0 * ni * (fx == 0.0 ? boost::numeric::bounds::lowest() : std::log(fx)); - heavyGmmBIC -= 2.0 * ni * (gx == 0.0 ? boost::numeric::bounds::lowest() : std::log(gx)); + lightGmmBIC -= 2.0 * ni * + (fx == 0.0 ? boost::numeric::bounds::lowest() + : std::log(fx)); + heavyGmmBIC -= 2.0 * ni * + (gx == 0.0 ? boost::numeric::bounds::lowest() + : std::log(gx)); } LOG_TRACE(<< "light BIC = " << lightGmmBIC << ", heavy BIC = " << heavyGmmBIC); if (std::min(lightGmmBIC, heavyGmmBIC) < categoricalBIC) { return true; } - } catch (const std::exception& e) { LOG_ERROR(<< "Failed to compute BIC for " << m << " modes: " << e.what()); } + } catch (const std::exception& e) { + LOG_ERROR(<< "Failed to compute BIC for " << m << " modes: " << e.what()); + } } return false; diff --git a/lib/config/CDataSummaryStatistics.cc b/lib/config/CDataSummaryStatistics.cc index 5da6d641bd..6a935cad4b 100644 --- a/lib/config/CDataSummaryStatistics.cc +++ b/lib/config/CDataSummaryStatistics.cc @@ -60,7 +60,8 @@ core_t::TTime CDataSummaryStatistics::latest() const { } double CDataSummaryStatistics::meanRate() const { - return static_cast(m_Count) / static_cast(m_Latest[0] - m_Earliest[0]); + return static_cast(m_Count) / + static_cast(m_Latest[0] - m_Earliest[0]); } void CDataSummaryStatistics::add(core_t::TTime time) { @@ -69,34 +70,31 @@ void CDataSummaryStatistics::add(core_t::TTime time) { ++m_Count; } -CCategoricalDataSummaryStatistics::CCategoricalDataSummaryStatistics(std::size_t n, std::size_t toApproximate) - : m_ToApproximate(toApproximate), - m_Approximating(toApproximate == 0), +CCategoricalDataSummaryStatistics::CCategoricalDataSummaryStatistics(std::size_t n, + std::size_t toApproximate) + : m_ToApproximate(toApproximate), m_Approximating(toApproximate == 0), m_DistinctValues(DS_NUMBER_HASHES, DS_MAX_SIZE), - m_CountSketch(CS_ROWS, CS_COLUMNS), - m_N(std::max(n, std::size_t(1))), + m_CountSketch(CS_ROWS, CS_COLUMNS), m_N(std::max(n, std::size_t(1))), m_TopN(topNSize(m_N)), // This is important to stop invalidation of // the lowest top-n iterator by an insertion. - m_LowestTopN(m_TopN.end()), - m_EmpiricalEntropy(ES_K), - m_DistinctNGrams(NUMBER_N_GRAMS, maths::CBjkstUniqueValues(DS_NUMBER_HASHES, DS_MAX_SIZE)), + m_LowestTopN(m_TopN.end()), m_EmpiricalEntropy(ES_K), + m_DistinctNGrams(NUMBER_N_GRAMS, + maths::CBjkstUniqueValues(DS_NUMBER_HASHES, DS_MAX_SIZE)), m_NGramEmpricalEntropy(NUMBER_N_GRAMS, maths::CEntropySketch(ES_K)) { } CCategoricalDataSummaryStatistics::CCategoricalDataSummaryStatistics(const CDataSummaryStatistics& other, std::size_t n, std::size_t toApproximate) - : CDataSummaryStatistics(other), - m_ToApproximate(toApproximate), + : CDataSummaryStatistics(other), m_ToApproximate(toApproximate), m_Approximating(toApproximate == 0), m_DistinctValues(DS_NUMBER_HASHES, DS_MAX_SIZE), - m_CountSketch(CS_ROWS, CS_COLUMNS), - m_N(std::max(n, std::size_t(1))), + m_CountSketch(CS_ROWS, CS_COLUMNS), m_N(std::max(n, std::size_t(1))), m_TopN(topNSize(m_N)), // This is important to stop invalidation of // the lowest top-n iterator by an insertion. - m_LowestTopN(m_TopN.end()), - m_EmpiricalEntropy(ES_K), - m_DistinctNGrams(NUMBER_N_GRAMS, maths::CBjkstUniqueValues(DS_NUMBER_HASHES, DS_MAX_SIZE)), + m_LowestTopN(m_TopN.end()), m_EmpiricalEntropy(ES_K), + m_DistinctNGrams(NUMBER_N_GRAMS, + maths::CBjkstUniqueValues(DS_NUMBER_HASHES, DS_MAX_SIZE)), m_NGramEmpricalEntropy(NUMBER_N_GRAMS, maths::CEntropySketch(ES_K)) { } @@ -191,7 +189,8 @@ double CCategoricalDataSummaryStatistics::meanCountInRemainders() const { } return static_cast(this->count() - std::min(total, this->count())) / - static_cast(std::max(static_cast(m_DistinctValues.number()), m_TopN.size())); + static_cast(std::max( + static_cast(m_DistinctValues.number()), m_TopN.size())); } void CCategoricalDataSummaryStatistics::addNGrams(std::size_t n, const std::string& example) { @@ -216,9 +215,11 @@ void CCategoricalDataSummaryStatistics::approximateIfCardinalityTooHigh() { } void CCategoricalDataSummaryStatistics::updateCalibrators(std::size_t category_) { - uint32_t category = m_Approximating ? static_cast(category_) : CTools::category32(category_); - std::size_t i = - std::lower_bound(m_Calibrators.begin(), m_Calibrators.end(), category, maths::COrderings::SFirstLess()) - m_Calibrators.begin(); + uint32_t category = m_Approximating ? static_cast(category_) + : CTools::category32(category_); + std::size_t i = std::lower_bound(m_Calibrators.begin(), m_Calibrators.end(), + category, maths::COrderings::SFirstLess()) - + m_Calibrators.begin(); if (i == m_Calibrators.size() || m_Calibrators[i].first != category) { if (m_Calibrators.size() < 5) { m_Calibrators.insert(m_Calibrators.begin() + i, std::make_pair(category, 1)); @@ -238,14 +239,17 @@ double CCategoricalDataSummaryStatistics::calibratedCount(std::size_t category) TMeanAccumulator error; if (m_CountSketch.sketched()) { for (std::size_t j = 0u; j < m_Calibrators.size(); ++j) { - error.add(m_CountSketch.count(m_Calibrators[j].first) - static_cast(m_Calibrators[j].second)); + error.add(m_CountSketch.count(m_Calibrators[j].first) - + static_cast(m_Calibrators[j].second)); } } - return m_CountSketch.count(static_cast(category)) - maths::CBasicStatistics::mean(error); + return m_CountSketch.count(static_cast(category)) - + maths::CBasicStatistics::mean(error); } void CCategoricalDataSummaryStatistics::findLowestTopN() { - using TMinAccumulator = maths::CBasicStatistics::COrderStatisticsStack; + using TMinAccumulator = + maths::CBasicStatistics::COrderStatisticsStack; TMinAccumulator lowest; for (TStrUInt64UMapItr i = m_TopN.begin(); i != m_TopN.end(); ++i) { lowest.add(i); @@ -254,7 +258,8 @@ void CCategoricalDataSummaryStatistics::findLowestTopN() { } void CCategoricalDataSummaryStatistics::topN(TStrUInt64UMapCItrVec& result) const { - using TMaxAccumulator = maths::CBasicStatistics::COrderStatisticsHeap; + using TMaxAccumulator = + maths::CBasicStatistics::COrderStatisticsHeap; TMaxAccumulator topN(m_N); for (TStrUInt64UMapCItr i = m_TopN.begin(); i != m_TopN.end(); ++i) { topN.add(i); @@ -264,31 +269,30 @@ void CCategoricalDataSummaryStatistics::topN(TStrUInt64UMapCItrVec& result) cons } CNumericDataSummaryStatistics::CNumericDataSummaryStatistics(bool integer) - : m_NonNumericCount(0), - m_QuantileSketch(maths::CQuantileSketch::E_Linear, QS_SIZE), + : m_NonNumericCount(0), m_QuantileSketch(maths::CQuantileSketch::E_Linear, QS_SIZE), m_Clusters(integer ? maths_t::E_IntegerData : maths_t::E_ContinuousData, maths::CAvailableModeDistributions::NORMAL, maths_t::E_ClustersFractionWeight, 0.0, // No decay CLUSTER_MINIMUM_FRACTION, // We're only interested in clusters which - // comprise at least 0.5% of the data. - CLUSTER_MINIMUM_COUNT) // We need a few points to get a reasonable - // variance estimate. + // comprise at least 0.5% of the data. + CLUSTER_MINIMUM_COUNT) // We need a few points to get a reasonable + // variance estimate. { } -CNumericDataSummaryStatistics::CNumericDataSummaryStatistics(const CDataSummaryStatistics& other, bool integer) - : CDataSummaryStatistics(other), - m_NonNumericCount(0), +CNumericDataSummaryStatistics::CNumericDataSummaryStatistics(const CDataSummaryStatistics& other, + bool integer) + : CDataSummaryStatistics(other), m_NonNumericCount(0), m_QuantileSketch(maths::CQuantileSketch::E_Linear, QS_SIZE), m_Clusters(integer ? maths_t::E_IntegerData : maths_t::E_ContinuousData, maths::CAvailableModeDistributions::NORMAL, maths_t::E_ClustersFractionWeight, 0.0, // No decay CLUSTER_MINIMUM_FRACTION, // We're only interested in clusters which - // comprise at least 0.5% of the data. - CLUSTER_MINIMUM_COUNT) // Need a few points to get a reasonable - // variance estimate. + // comprise at least 0.5% of the data. + CLUSTER_MINIMUM_COUNT) // Need a few points to get a reasonable + // variance estimate. { } @@ -346,14 +350,18 @@ bool CNumericDataSummaryStatistics::densityChart(TDoubleDoublePrVec& result) con weights.reserve(n); modes.reserve(n); for (std::size_t i = 0u; i < n; ++i) { - LOG_TRACE(<< "weight = " << clusters[i].count() << ", mean = " << clusters[i].centre() << ", sd = " << clusters[i].spread()); + LOG_TRACE(<< "weight = " << clusters[i].count() << ", mean = " + << clusters[i].centre() << ", sd = " << clusters[i].spread()); weights.push_back(clusters[i].count()); - modes.push_back(boost::math::normal_distribution<>(clusters[i].centre(), clusters[i].spread())); + modes.push_back(boost::math::normal_distribution<>( + clusters[i].centre(), clusters[i].spread())); } TGMM gmm(weights, modes); - static const double QUANTILES[] = {0.001, 0.005, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.995, 0.999}; + static const double QUANTILES[] = {0.001, 0.005, 0.05, 0.1, 0.2, + 0.3, 0.4, 0.5, 0.6, 0.7, + 0.8, 0.9, 0.95, 0.995, 0.999}; TDoubleVec pillars; pillars.reserve(boost::size(QUANTILES)); diff --git a/lib/config/CDetectorEnumerator.cc b/lib/config/CDetectorEnumerator.cc index 01631007be..6a705c8d55 100644 --- a/lib/config/CDetectorEnumerator.cc +++ b/lib/config/CDetectorEnumerator.cc @@ -54,7 +54,8 @@ void add(std::size_t p, } } -CDetectorEnumerator::CDetectorEnumerator(const CAutoconfigurerParams& params) : m_Params(params) { +CDetectorEnumerator::CDetectorEnumerator(const CAutoconfigurerParams& params) + : m_Params(params) { } void CDetectorEnumerator::addFunction(config_t::EFunctionCategory category) { @@ -112,22 +113,30 @@ void CDetectorEnumerator::generateNoPartitioning(TDetectorSpecificationVec& resu std::size_t id = result.size(); if (config_t::hasArgument(function)) { - const TStrVec& arguments = - config_t::isMetric(function) ? m_CandidateMetricFunctionArguments : m_CandidateCategoricalFunctionArguments; + const TStrVec& arguments = config_t::isMetric(function) + ? m_CandidateMetricFunctionArguments + : m_CandidateCategoricalFunctionArguments; for (std::size_t j = 0u; j < arguments.size(); ++j) { - result.push_back(CDetectorSpecification(m_Params, function, arguments[j], id)); + result.push_back(CDetectorSpecification(m_Params, function, + arguments[j], id)); } } else { result.push_back(CDetectorSpecification(m_Params, function, id)); } - } catch (std::exception& e) { LOG_ERROR(<< "Bad detector: " << e.what()); } + } catch (std::exception& e) { + LOG_ERROR(<< "Bad detector: " << e.what()); + } } } -void CDetectorEnumerator::addOnePartitioning(std::size_t a, std::size_t b, TDetectorSpecificationVec& result) const { - TStrVecCRef candidates[] = { - boost::cref(m_CandidateByFields), boost::cref(m_CandidateOverFields), boost::cref(m_CandidatePartitionFields)}; - add(boost::size(constants::CFieldIndices::PARTITIONING), constants::CFieldIndices::PARTITIONING, candidates, a, b, result); +void CDetectorEnumerator::addOnePartitioning(std::size_t a, + std::size_t b, + TDetectorSpecificationVec& result) const { + TStrVecCRef candidates[] = {boost::cref(m_CandidateByFields), + boost::cref(m_CandidateOverFields), + boost::cref(m_CandidatePartitionFields)}; + add(boost::size(constants::CFieldIndices::PARTITIONING), + constants::CFieldIndices::PARTITIONING, candidates, a, b, result); for (std::size_t i = 0u; i < m_Functions.size(); ++i) { config_t::EFunctionCategory function = m_Functions[i]; @@ -136,20 +145,28 @@ void CDetectorEnumerator::addOnePartitioning(std::size_t a, std::size_t b, TDete for (std::size_t j = 0u; j < m_CandidateRareByFields.size(); ++j) { std::size_t id = result.size(); result.push_back(CDetectorSpecification(m_Params, function, id)); - result.back().addPartitioning(constants::BY_INDEX, m_CandidateRareByFields[j]); + result.back().addPartitioning(constants::BY_INDEX, + m_CandidateRareByFields[j]); } - } catch (std::exception& e) { LOG_ERROR(<< "Bad detector: " << e.what()); } + } catch (std::exception& e) { + LOG_ERROR(<< "Bad detector: " << e.what()); + } } } } -void CDetectorEnumerator::addTwoPartitioning(std::size_t a, std::size_t b, TDetectorSpecificationVec& result) const { +void CDetectorEnumerator::addTwoPartitioning(std::size_t a, + std::size_t b, + TDetectorSpecificationVec& result) const { static std::size_t OVER_AND_PARTITION[] = {constants::OVER_INDEX, constants::PARTITION_INDEX}; - TStrVecCRef candidates[] = {boost::cref(m_CandidateOverFields), boost::cref(m_CandidatePartitionFields)}; + TStrVecCRef candidates[] = {boost::cref(m_CandidateOverFields), + boost::cref(m_CandidatePartitionFields)}; add(boost::size(OVER_AND_PARTITION), OVER_AND_PARTITION, candidates, a, b, result); } -void CDetectorEnumerator::addThreePartitioning(std::size_t a, std::size_t b, TDetectorSpecificationVec& result) const { +void CDetectorEnumerator::addThreePartitioning(std::size_t a, + std::size_t b, + TDetectorSpecificationVec& result) const { static std::size_t PARTITION[] = {constants::PARTITION_INDEX}; TStrVecCRef candidates[] = {boost::cref(m_CandidatePartitionFields)}; add(boost::size(PARTITION), PARTITION, candidates, a, b, result); diff --git a/lib/config/CDetectorFieldRolePenalty.cc b/lib/config/CDetectorFieldRolePenalty.cc index 6b2ba65538..4447b9aead 100644 --- a/lib/config/CDetectorFieldRolePenalty.cc +++ b/lib/config/CDetectorFieldRolePenalty.cc @@ -27,7 +27,8 @@ const TGetStatistics STATISTIC[] = { }; } -CDetectorFieldRolePenalty::CDetectorFieldRolePenalty(const CAutoconfigurerParams& params) : CPenalty(params) { +CDetectorFieldRolePenalty::CDetectorFieldRolePenalty(const CAutoconfigurerParams& params) + : CPenalty(params) { std::fill_n(m_FieldRolePenalties, constants::NUMBER_FIELD_INDICES, nullptr); } @@ -39,7 +40,8 @@ std::string CDetectorFieldRolePenalty::name() const { std::string arguments; for (std::size_t i = 0u; i < constants::NUMBER_FIELD_INDICES; ++i) { if (m_FieldRolePenalties[i]) { - arguments += (arguments.empty() ? "'" : ", '") + constants::name(i) + ' ' + m_FieldRolePenalties[i]->name() + "'"; + arguments += (arguments.empty() ? "'" : ", '") + constants::name(i) + + ' ' + m_FieldRolePenalties[i]->name() + "'"; } } return "field role penalty(" + arguments + ")"; diff --git a/lib/config/CDetectorRecord.cc b/lib/config/CDetectorRecord.cc index cab0e6fc83..81031b1464 100644 --- a/lib/config/CDetectorRecord.cc +++ b/lib/config/CDetectorRecord.cc @@ -43,7 +43,8 @@ CDetectorRecord::CDetectorRecord(core_t::TTime time, const TStrCPtrAry& fieldNames, const TStrCPtrAry& fieldValues, const TSizeAry& hashedFieldValues) - : m_Time(time), m_Function(function), m_FieldNames(fieldNames), m_FieldValues(fieldValues), m_HashedFieldValues(hashedFieldValues) { + : m_Time(time), m_Function(function), m_FieldNames(fieldNames), + m_FieldValues(fieldValues), m_HashedFieldValues(hashedFieldValues) { } core_t::TTime CDetectorRecord::time() const { @@ -103,8 +104,10 @@ std::size_t CDetectorRecord::partitionFieldValueHash() const { } std::string CDetectorRecord::print() const { - return core::CStringUtils::typeToString(m_Time) + ' ' + extract(this->argumentFieldValue()) + ' ' + extract(this->byFieldValue()) + - ' ' + extract(this->overFieldValue()) + ' ' + extract(this->partitionFieldValue()); + return core::CStringUtils::typeToString(m_Time) + ' ' + + extract(this->argumentFieldValue()) + ' ' + + extract(this->byFieldValue()) + ' ' + extract(this->overFieldValue()) + + ' ' + extract(this->partitionFieldValue()); } void CDetectorRecordDirectAddressTable::build(const TDetectorSpecificationVec& specs) { @@ -117,7 +120,8 @@ void CDetectorRecordDirectAddressTable::build(const TDetectorSpecificationVec& s size_t size = 0u; for (std::size_t i = 0u; i < specs.size(); ++i) { for (std::size_t j = 0u; j < boost::size(FIELDS); ++j) { - if (const CDetectorSpecification::TOptionalStr& field = ((specs[i]).*FIELDS[j])()) { + if (const CDetectorSpecification::TOptionalStr& field = + ((specs[i]).*FIELDS[j])()) { uniques.emplace(*field, uniques.size()); } } @@ -137,9 +141,11 @@ void CDetectorRecordDirectAddressTable::build(const TDetectorSpecificationVec& s TSizeAry entry; for (std::size_t j = 0u; j < boost::size(FIELDS); ++j) { const CDetectorSpecification::TOptionalStr& field = ((specs[i]).*FIELDS[j])(); - entry[constants::CFieldIndices::ALL[j]] = field ? uniques[*field] : m_FieldSchema.size(); + entry[constants::CFieldIndices::ALL[j]] = field ? uniques[*field] + : m_FieldSchema.size(); } - LOG_TRACE(<< "Fields for " << specs[i].description() << " = " << core::CContainerPrinter::print(entry)); + LOG_TRACE(<< "Fields for " << specs[i].description() << " = " + << core::CContainerPrinter::print(entry)); m_DetectorFieldSchema[specs[i].id()] = entry; } } @@ -164,7 +170,8 @@ void CDetectorRecordDirectAddressTable::detectorRecords(core_t::TTime time, for (std::size_t i = 0u; i < m_FieldSchema.size(); ++i) { TStrStrUMapCItr j = fieldValues.find(m_FieldSchema[i].first); m_FieldValueTable[i] = j != fieldValues.end() ? &j->second : nullptr; - m_HashedFieldValueTable[i] = HASHER(m_FieldValueTable[i] ? *m_FieldValueTable[i] : NULL_STRING); + m_HashedFieldValueTable[i] = + HASHER(m_FieldValueTable[i] ? *m_FieldValueTable[i] : NULL_STRING); } CDetectorRecord::TStrCPtrAry ni; diff --git a/lib/config/CDetectorSpecification.cc b/lib/config/CDetectorSpecification.cc index 0d2c4730a8..ede94f97e6 100644 --- a/lib/config/CDetectorSpecification.cc +++ b/lib/config/CDetectorSpecification.cc @@ -34,7 +34,9 @@ class CNameEquals { public: CNameEquals(const std::string& value) : m_Value(&value) {} - bool operator()(const CFieldStatistics& stats) const { return stats.name() == *m_Value; } + bool operator()(const CFieldStatistics& stats) const { + return stats.name() == *m_Value; + } private: const std::string* m_Value; @@ -96,22 +98,25 @@ const bool IGNORE_EMPTY[] = {false, true}; //! Get the ignore empty unique identifier. std::size_t ignoreEmptyId(bool ignoreEmpty) { - return std::find(boost::begin(IGNORE_EMPTY), boost::end(IGNORE_EMPTY), ignoreEmpty) - boost::begin(IGNORE_EMPTY); + return std::find(boost::begin(IGNORE_EMPTY), boost::end(IGNORE_EMPTY), ignoreEmpty) - + boost::begin(IGNORE_EMPTY); } } -CDetectorSpecification::CDetectorSpecification(const CAutoconfigurerParams& params, config_t::EFunctionCategory function, std::size_t id) - : m_Params(params), - m_Function(function), - m_Side(config_t::hasSidedCalculation(function) ? config_t::E_UndeterminedSide : config_t::E_TwoSide), +CDetectorSpecification::CDetectorSpecification(const CAutoconfigurerParams& params, + config_t::EFunctionCategory function, + std::size_t id) + : m_Params(params), m_Function(function), + m_Side(config_t::hasSidedCalculation(function) ? config_t::E_UndeterminedSide + : config_t::E_TwoSide), m_IgnoreEmpty(config_t::hasDoAndDontIgnoreEmptyVersions(function) ? E_Maybe : E_True), m_Penalties(2 * params.candidateBucketLengths().size()), m_PenaltyDescriptions(2 * params.candidateBucketLengths().size()), - m_Id(id), - m_CountStatistics(nullptr) { + m_Id(id), m_CountStatistics(nullptr) { this->initializePenalties(); if (config_t::hasArgument(function)) { - throw std::logic_error(std::string("No argument supplied for '") + config_t::print(function) + "'"); + throw std::logic_error(std::string("No argument supplied for '") + + config_t::print(function) + "'"); } std::fill_n(m_FieldStatistics, constants::NUMBER_FIELD_INDICES, nullptr); } @@ -120,17 +125,17 @@ CDetectorSpecification::CDetectorSpecification(const CAutoconfigurerParams& para config_t::EFunctionCategory function, const std::string& argument, std::size_t id) - : m_Params(params), - m_Function(function), - m_Side(config_t::hasSidedCalculation(function) ? config_t::E_UndeterminedSide : config_t::E_TwoSide), + : m_Params(params), m_Function(function), + m_Side(config_t::hasSidedCalculation(function) ? config_t::E_UndeterminedSide + : config_t::E_TwoSide), m_IgnoreEmpty(config_t::hasDoAndDontIgnoreEmptyVersions(function) ? E_Maybe : E_True), m_Penalties(2 * params.candidateBucketLengths().size()), m_PenaltyDescriptions(2 * params.candidateBucketLengths().size()), - m_Id(id), - m_CountStatistics(nullptr) { + m_Id(id), m_CountStatistics(nullptr) { this->initializePenalties(); if (!config_t::hasArgument(function)) { - LOG_ERROR(<< "Ignoring argument '" + argument + "' for '" + config_t::print(function) + "'"); + LOG_ERROR(<< "Ignoring argument '" + argument + "' for '" + + config_t::print(function) + "'"); } else { m_FunctionFields[constants::ARGUMENT_INDEX] = argument; } @@ -163,14 +168,16 @@ void CDetectorSpecification::ignoreEmpty(bool ignoreEmpty) { m_IgnoreEmpty = ignoreEmpty ? E_True : E_False; } -bool CDetectorSpecification::canAddPartitioning(std::size_t index, const std::string& value) const { +bool CDetectorSpecification::canAddPartitioning(std::size_t index, + const std::string& value) const { // Rules: // 1) We can only add a field to a detector whose index is greater // than any field currently set. // 2) We can't have duplicate fields. return static_cast(index) > this->highestFieldIndex() && - std::find(boost::begin(m_FunctionFields), boost::end(m_FunctionFields), value) == boost::end(m_FunctionFields); + std::find(boost::begin(m_FunctionFields), boost::end(m_FunctionFields), + value) == boost::end(m_FunctionFields); } void CDetectorSpecification::addPartitioning(std::size_t index, const std::string& value) { @@ -184,7 +191,8 @@ void CDetectorSpecification::addInfluencer(const std::string& influencer) { std::size_t n = m_Influencers.size(); m_Influencers.push_back(influencer); if (n > 0) { - std::inplace_merge(m_Influencers.begin(), m_Influencers.begin() + n, m_Influencers.end()); + std::inplace_merge(m_Influencers.begin(), m_Influencers.begin() + n, + m_Influencers.end()); } } @@ -195,7 +203,8 @@ void CDetectorSpecification::bucketLength(core_t::TTime bucketLength) { void CDetectorSpecification::addFieldStatistics(const TFieldStatisticsVec& stats) { for (std::size_t i = 0u; i < boost::size(constants::CFieldIndices::ALL); ++i) { if (const TOptionalStr& field = m_FunctionFields[constants::CFieldIndices::ALL[i]]) { - m_FieldStatistics[constants::CFieldIndices::ALL[i]] = &(*std::find_if(stats.begin(), stats.end(), CNameEquals(*field))); + m_FieldStatistics[constants::CFieldIndices::ALL[i]] = + &(*std::find_if(stats.begin(), stats.end(), CNameEquals(*field))); } } } @@ -226,7 +235,8 @@ void CDetectorSpecification::scores(TParamScoresVec& result) const { double score = CPenalty::score(m_Penalties[pid]); const TStrVec& descriptions = m_PenaltyDescriptions[pid]; if (score > this->params().minimumDetectorScore()) { - const std::string& name = config_t::ignoreEmptyVersionName(m_Function, IGNORE_EMPTY[iid], this->isPopulation()); + const std::string& name = config_t::ignoreEmptyVersionName( + m_Function, IGNORE_EMPTY[iid], this->isPopulation()); result.push_back(SParamScores(candidates[bid], name, score, descriptions)); } } @@ -246,7 +256,9 @@ void CDetectorSpecification::applyPenalty(double penalty, const std::string& des } } -void CDetectorSpecification::applyPenalties(const TSizeVec& indices, const TDoubleVec& penalties, const TStrVec& descriptions) { +void CDetectorSpecification::applyPenalties(const TSizeVec& indices, + const TDoubleVec& penalties, + const TStrVec& descriptions) { LOG_TRACE(<< "penalties = " << core::CContainerPrinter::print(penalties)); for (std::size_t i = 0u; i < indices.size(); ++i) { if (penalties[i] == 1.0) { @@ -295,7 +307,8 @@ void CDetectorSpecification::candidateBucketLengths(TTimeVec& result) const { const TTimeVec& candidates = this->params().candidateBucketLengths(); result.reserve(candidates.size()); for (std::size_t bid = 0u; bid < candidates.size(); ++bid) { - if (CPenalty::score(maxPenalty(this->params().penaltyIndicesFor(bid), m_Penalties)) > 0.0) { + if (CPenalty::score(maxPenalty(this->params().penaltyIndicesFor(bid), + m_Penalties)) > 0.0) { result.push_back(candidates[bid]); } } @@ -306,10 +319,10 @@ bool CDetectorSpecification::isPopulation() const { } bool CDetectorSpecification::operator<(const CDetectorSpecification& rhs) const { -#define LESS(lhs, rhs) \ - if (lhs < rhs) \ - return true; \ - if (rhs < lhs) \ +#define LESS(lhs, rhs) \ + if (lhs < rhs) \ + return true; \ + if (rhs < lhs) \ return false LESS(m_Function, rhs.m_Function); @@ -324,18 +337,14 @@ bool CDetectorSpecification::operator<(const CDetectorSpecification& rhs) const return false; } - if (std::lexicographical_compare(boost::begin(m_FunctionFields), - boost::end(m_FunctionFields), + if (std::lexicographical_compare(boost::begin(m_FunctionFields), boost::end(m_FunctionFields), boost::begin(rhs.m_FunctionFields), - boost::end(rhs.m_FunctionFields), - less)) { + boost::end(rhs.m_FunctionFields), less)) { return true; } - if (std::lexicographical_compare(boost::begin(rhs.m_FunctionFields), - boost::end(rhs.m_FunctionFields), - boost::begin(m_FunctionFields), - boost::end(m_FunctionFields), - less)) { + if (std::lexicographical_compare( + boost::begin(rhs.m_FunctionFields), boost::end(rhs.m_FunctionFields), + boost::begin(m_FunctionFields), boost::end(m_FunctionFields), less)) { return false; } @@ -343,9 +352,10 @@ bool CDetectorSpecification::operator<(const CDetectorSpecification& rhs) const } bool CDetectorSpecification::operator==(const CDetectorSpecification& rhs) const { - return m_Function == rhs.m_Function && m_Side == rhs.m_Side && m_IgnoreEmpty == rhs.m_IgnoreEmpty && - m_BucketLength == rhs.m_BucketLength && - std::equal(boost::begin(m_FunctionFields), boost::end(m_FunctionFields), boost::begin(rhs.m_FunctionFields)) && + return m_Function == rhs.m_Function && m_Side == rhs.m_Side && + m_IgnoreEmpty == rhs.m_IgnoreEmpty && m_BucketLength == rhs.m_BucketLength && + std::equal(boost::begin(m_FunctionFields), boost::end(m_FunctionFields), + boost::begin(rhs.m_FunctionFields)) && m_Influencers == rhs.m_Influencers; } @@ -383,7 +393,8 @@ std::string CDetectorSpecification::detectorConfig() const { } using TDoubleTimePr = std::pair; - using TMaxAccumulator = maths::CBasicStatistics::COrderStatisticsStack; + using TMaxAccumulator = + maths::CBasicStatistics::COrderStatisticsStack; const TTimeVec& candidates = this->params().candidateBucketLengths(); @@ -398,9 +409,10 @@ std::string CDetectorSpecification::detectorConfig() const { std::ostringstream result; if (CPenalty::score(best[0].first) > this->params().minimumDetectorScore()) { const std::string& newLine = this->params().detectorConfigLineEnding(); - result << "{" << newLine << " \"analysisConfig\": {" << newLine << " \"bucketSpan\": " << best[0].second << newLine << " }," - << newLine << " \"detectors\": [" << newLine << " {" << newLine << " \"function\":\"" << config_t::print(m_Function) - << "\""; + result << "{" << newLine << " \"analysisConfig\": {" << newLine + << " \"bucketSpan\": " << best[0].second << newLine << " }," + << newLine << " \"detectors\": [" << newLine << " {" << newLine + << " \"function\":\"" << config_t::print(m_Function) << "\""; if (const CDetectorSpecification::TOptionalStr& argument = this->argumentField()) { result << "," << newLine << " \"fieldName\": \"" << *argument << "\""; } @@ -411,7 +423,8 @@ std::string CDetectorSpecification::detectorConfig() const { result << "," << newLine << " \"overFieldName\": \"" << *over << "\""; } if (const CDetectorSpecification::TOptionalStr& partition = this->partitionField()) { - result << "," << newLine << " \"partitionFieldName\": \"" << *partition << "\""; + result << "," << newLine << " \"partitionFieldName\": \"" + << *partition << "\""; } result << newLine << " }" << newLine << " ]" << newLine << "}"; } @@ -420,11 +433,16 @@ std::string CDetectorSpecification::detectorConfig() const { std::string CDetectorSpecification::description() const { std::ostringstream result; - result << fullFunctionName(m_Side, m_IgnoreEmpty, this->isPopulation(), m_Function) - << (this->argumentField() ? std::string("(") + *this->argumentField() + ")" : std::string()) - << (this->byField() ? std::string(" by '") + *this->byField() + "'" : std::string()) - << (this->overField() ? std::string(" over '") + *this->overField() + "'" : std::string()) - << (this->partitionField() ? std::string(" partition '") + *this->partitionField() + "'" : std::string()); + result + << fullFunctionName(m_Side, m_IgnoreEmpty, this->isPopulation(), m_Function) + << (this->argumentField() ? std::string("(") + *this->argumentField() + ")" + : std::string()) + << (this->byField() ? std::string(" by '") + *this->byField() + "'" : std::string()) + << (this->overField() ? std::string(" over '") + *this->overField() + "'" + : std::string()) + << (this->partitionField() + ? std::string(" partition '") + *this->partitionField() + "'" + : std::string()); return result.str(); } @@ -480,14 +498,16 @@ void CDetectorSpecification::refreshIgnoreEmpty() { double ptrue = maxPenalty(this->params().penaltyIndicesFor(true), m_Penalties); double pfalse = maxPenalty(this->params().penaltyIndicesFor(false), m_Penalties); - m_IgnoreEmpty = STATUS[(CPenalty::score(ptrue) > 0.0 ? 2 : 0) + (CPenalty::score(pfalse) > 0.0 ? 1 : 0)]; + m_IgnoreEmpty = + STATUS[(CPenalty::score(ptrue) > 0.0 ? 2 : 0) + (CPenalty::score(pfalse) > 0.0 ? 1 : 0)]; } CDetectorSpecification::SParamScores::SParamScores(core_t::TTime bucketLength, const std::string& ignoreEmpty, double score, const TStrVec& descriptions) - : s_BucketLength(bucketLength), s_IgnoreEmpty(ignoreEmpty), s_Score(score), s_Descriptions(descriptions) { + : s_BucketLength(bucketLength), s_IgnoreEmpty(ignoreEmpty), s_Score(score), + s_Descriptions(descriptions) { } } } diff --git a/lib/config/CFieldRolePenalty.cc b/lib/config/CFieldRolePenalty.cc index f8dfbfbf5f..72f8595db1 100644 --- a/lib/config/CFieldRolePenalty.cc +++ b/lib/config/CFieldRolePenalty.cc @@ -30,7 +30,8 @@ std::string prefix(const std::string& description) { //////// CCantBeNumeric //////// -CCantBeNumeric::CCantBeNumeric(const CAutoconfigurerParams& params) : CPenalty(params) { +CCantBeNumeric::CCantBeNumeric(const CAutoconfigurerParams& params) + : CPenalty(params) { } CCantBeNumeric* CCantBeNumeric::clone() const { @@ -41,7 +42,9 @@ std::string CCantBeNumeric::name() const { return "can't be numeric"; } -void CCantBeNumeric::penaltyFromMe(const CFieldStatistics& stats, double& penalty, std::string& description) const { +void CCantBeNumeric::penaltyFromMe(const CFieldStatistics& stats, + double& penalty, + std::string& description) const { if (config_t::isNumeric(stats.type())) { penalty = 0.0; description += prefix(description) + "Can't use numeric"; @@ -50,7 +53,8 @@ void CCantBeNumeric::penaltyFromMe(const CFieldStatistics& stats, double& penalt //////// CCantBeCategorical //////// -CCantBeCategorical::CCantBeCategorical(const CAutoconfigurerParams& params) : CPenalty(params) { +CCantBeCategorical::CCantBeCategorical(const CAutoconfigurerParams& params) + : CPenalty(params) { } CCantBeCategorical* CCantBeCategorical::clone() const { @@ -61,7 +65,9 @@ std::string CCantBeCategorical::name() const { return "Can't be categorical"; } -void CCantBeCategorical::penaltyFromMe(const CFieldStatistics& stats, double& penalty, std::string& description) const { +void CCantBeCategorical::penaltyFromMe(const CFieldStatistics& stats, + double& penalty, + std::string& description) const { if (config_t::isCategorical(stats.type())) { penalty = 0.0; description += prefix(description) + "Can't use categorical"; @@ -70,7 +76,8 @@ void CCantBeCategorical::penaltyFromMe(const CFieldStatistics& stats, double& pe //////// CDontUseUnaryField //////// -CDontUseUnaryField::CDontUseUnaryField(const CAutoconfigurerParams& params) : CPenalty(params) { +CDontUseUnaryField::CDontUseUnaryField(const CAutoconfigurerParams& params) + : CPenalty(params) { } CDontUseUnaryField* CDontUseUnaryField::clone() const { @@ -81,7 +88,9 @@ std::string CDontUseUnaryField::name() const { return "don't use unary field"; } -void CDontUseUnaryField::penaltyFromMe(const CFieldStatistics& stats, double& penalty, std::string& description) const { +void CDontUseUnaryField::penaltyFromMe(const CFieldStatistics& stats, + double& penalty, + std::string& description) const { if (const CCategoricalDataSummaryStatistics* summary = stats.categoricalSummary()) { if (summary->distinctCount() == 1) { penalty = 0.0; @@ -105,19 +114,26 @@ CDistinctCountThresholdPenalty* CDistinctCountThresholdPenalty::clone() const { } std::string CDistinctCountThresholdPenalty::name() const { - return "distinct count thresholds " + core::CStringUtils::typeToString(m_DistinctCountForPenaltyOfZero) + " and " + + return "distinct count thresholds " + + core::CStringUtils::typeToString(m_DistinctCountForPenaltyOfZero) + " and " + core::CStringUtils::typeToString(m_DistinctCountForPenaltyOfOne); } -void CDistinctCountThresholdPenalty::penaltyFromMe(const CFieldStatistics& stats, double& penalty, std::string& description) const { +void CDistinctCountThresholdPenalty::penaltyFromMe(const CFieldStatistics& stats, + double& penalty, + std::string& description) const { if (const CCategoricalDataSummaryStatistics* summary = stats.categoricalSummary()) { double penalty_ = CTools::interpolate( - m_DistinctCountForPenaltyOfZero, m_DistinctCountForPenaltyOfOne, 0.0, 1.0, static_cast(summary->distinctCount())); + m_DistinctCountForPenaltyOfZero, m_DistinctCountForPenaltyOfOne, + 0.0, 1.0, static_cast(summary->distinctCount())); if (penalty_ < 1.0) { penalty *= penalty_; - description += prefix(description) + "A distinct count of " + core::CStringUtils::typeToString(summary->distinctCount()) + + description += prefix(description) + "A distinct count of " + + core::CStringUtils::typeToString(summary->distinctCount()) + " is" + (penalty_ == 0.0 ? " too " : " ") + - (m_DistinctCountForPenaltyOfZero > m_DistinctCountForPenaltyOfOne ? "high" : "low"); + (m_DistinctCountForPenaltyOfZero > m_DistinctCountForPenaltyOfOne + ? "high" + : "low"); } } } diff --git a/lib/config/CFieldStatistics.cc b/lib/config/CFieldStatistics.cc index 2c639ad7cb..46306050c0 100644 --- a/lib/config/CFieldStatistics.cc +++ b/lib/config/CFieldStatistics.cc @@ -18,13 +18,20 @@ namespace { //! \brief Adds an example to the summary statistics. class CAddToStatistics : public boost::static_visitor { public: - CAddToStatistics(core_t::TTime time, const std::string& example) : m_Time(time), m_Example(&example) {} + CAddToStatistics(core_t::TTime time, const std::string& example) + : m_Time(time), m_Example(&example) {} - void operator()(CDataSummaryStatistics& summary) const { summary.add(m_Time); } + void operator()(CDataSummaryStatistics& summary) const { + summary.add(m_Time); + } - void operator()(CCategoricalDataSummaryStatistics& summary) const { summary.add(m_Time, *m_Example); } + void operator()(CCategoricalDataSummaryStatistics& summary) const { + summary.add(m_Time, *m_Example); + } - void operator()(CNumericDataSummaryStatistics& summary) const { summary.add(m_Time, *m_Example); } + void operator()(CNumericDataSummaryStatistics& summary) const { + summary.add(m_Time, *m_Example); + } private: core_t::TTime m_Time; @@ -32,10 +39,9 @@ class CAddToStatistics : public boost::static_visitor { }; } -CFieldStatistics::CFieldStatistics(const std::string& fieldName, const CAutoconfigurerParams& params) - : m_Params(params), - m_FieldName(fieldName), - m_NumberExamples(0), +CFieldStatistics::CFieldStatistics(const std::string& fieldName, + const CAutoconfigurerParams& params) + : m_Params(params), m_FieldName(fieldName), m_NumberExamples(0), m_Semantics(params.dataType(fieldName)), m_SummaryStatistics(CDataSummaryStatistics()) { } @@ -53,10 +59,12 @@ void CFieldStatistics::maybeStartCapturingTypeStatistics() { LOG_DEBUG(<< "Classified '" << m_FieldName << "' as " << config_t::print(type)); if (config_t::isCategorical(type)) { - m_SummaryStatistics = CCategoricalDataSummaryStatistics(*summary, this->params().numberOfMostFrequentFieldsCounts()); + m_SummaryStatistics = CCategoricalDataSummaryStatistics( + *summary, this->params().numberOfMostFrequentFieldsCounts()); this->replayBuffer(); } else if (config_t::isNumeric(type)) { - m_SummaryStatistics = CNumericDataSummaryStatistics(*summary, config_t::isInteger(type)); + m_SummaryStatistics = CNumericDataSummaryStatistics( + *summary, config_t::isInteger(type)); this->replayBuffer(); } } diff --git a/lib/config/CLongTailPenalty.cc b/lib/config/CLongTailPenalty.cc index 5da6cd324c..79349984cf 100644 --- a/lib/config/CLongTailPenalty.cc +++ b/lib/config/CLongTailPenalty.cc @@ -36,7 +36,8 @@ uint64_t count(const maths::CBjkstUniqueValues& distinct) { } } -CLongTailPenalty::CLongTailPenalty(const CAutoconfigurerParams& params) : CPenalty(params) { +CLongTailPenalty::CLongTailPenalty(const CAutoconfigurerParams& params) + : CPenalty(params) { } CLongTailPenalty* CLongTailPenalty::clone() const { @@ -53,13 +54,15 @@ void CLongTailPenalty::penaltyFromMe(CDetectorSpecification& spec) const { dynamic_cast(spec.countStatistics())) { this->penaltyFor(*byAndPartitionStats, spec); } else if (const CByOverAndPartitionDataCountStatistics* byOverAndPartitionStats = - dynamic_cast(spec.countStatistics())) { + dynamic_cast( + spec.countStatistics())) { this->penaltyFor(*byOverAndPartitionStats, spec); } } } -void CLongTailPenalty::penaltyFor(const CByAndPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const { +void CLongTailPenalty::penaltyFor(const CByAndPartitionDataCountStatistics& stats, + CDetectorSpecification& spec) const { std::size_t n = stats.bucketStatistics().size(); TSizeVec indices; @@ -74,13 +77,16 @@ void CLongTailPenalty::penaltyFor(const CByAndPartitionDataCountStatistics& stat // to the minimum number of buckets. TSizeUInt64UMap totals; TSizeUInt64UMap tail; - this->extractTailCounts(stats.bucketStatistics()[bid].countMomentsPerPartition(), totals, tail); + this->extractTailCounts( + stats.bucketStatistics()[bid].countMomentsPerPartition(), totals, tail); const TSizeVec& indices_ = this->params().penaltyIndicesFor(bid); indices.insert(indices.end(), indices_.begin(), indices_.end()); double penalty = this->penaltyFor(tail, totals); - std::string description = penalty < 1.0 ? std::string("A significant proportion of categories have similar frequency at '") + - CTools::prettyPrint(this->params().candidateBucketLengths()[bid]) + "' resolution" - : std::string(); + std::string description = + penalty < 1.0 + ? std::string("A significant proportion of categories have similar frequency at '") + + CTools::prettyPrint(this->params().candidateBucketLengths()[bid]) + "' resolution" + : std::string(); std::fill_n(std::back_inserter(penalties), indices_.size(), penalty); std::fill_n(std::back_inserter(descriptions), indices_.size(), description); } @@ -88,18 +94,23 @@ void CLongTailPenalty::penaltyFor(const CByAndPartitionDataCountStatistics& stat spec.applyPenalties(indices, penalties, descriptions); } -void CLongTailPenalty::penaltyFor(const CByOverAndPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const { +void CLongTailPenalty::penaltyFor(const CByOverAndPartitionDataCountStatistics& stats, + CDetectorSpecification& spec) const { // Penalize the case that many by fields values have close to the // minimum number of over field values. TSizeUInt64UMap totals; TSizeUInt64UMap tail; - this->extractTailCounts(stats.sampledByAndPartitionDistinctOverCounts(), totals, tail); + this->extractTailCounts( + stats.sampledByAndPartitionDistinctOverCounts(), totals, tail); double penalty = this->penaltyFor(tail, totals); - spec.applyPenalty(penalty, penalty < 1.0 ? "A significant proportion of categories have a similar frequency in the population" : ""); + spec.applyPenalty(penalty, penalty < 1.0 ? "A significant proportion of categories have a similar frequency in the population" + : ""); } template -void CLongTailPenalty::extractTailCounts(const MAP& counts, TSizeUInt64UMap& totals, TSizeUInt64UMap& tail) const { +void CLongTailPenalty::extractTailCounts(const MAP& counts, + TSizeUInt64UMap& totals, + TSizeUInt64UMap& tail) const { using TMinAccumulator = maths::CBasicStatistics::COrderStatisticsStack; using TSizeMinAccumulatorUMap = boost::unordered_map; using TItr = typename MAP::const_iterator; @@ -117,7 +128,9 @@ void CLongTailPenalty::extractTailCounts(const MAP& counts, TSizeUInt64UMap& tot uint64_t n = count(i->second); std::size_t partition = STATS::partition(*i); const TMinAccumulator& min = mins[partition]; - if (n <= static_cast(this->params().highCardinalityInTailFactor() * static_cast(min[0]) + 0.5) || + if (n <= static_cast(this->params().highCardinalityInTailFactor() * + static_cast(min[0]) + + 0.5) || n <= this->params().highCardinalityInTailIncrement() + min[0]) { tail[partition] += n; } @@ -131,11 +144,10 @@ double CLongTailPenalty::penaltyFor(TSizeUInt64UMap& tail, TSizeUInt64UMap& tota for (TSizeUInt64UMapCItr i = tail.begin(); i != tail.end(); ++i) { double rare = static_cast(i->second); double total = static_cast(totals[i->first]); - double penalty = CTools::logInterpolate(this->params().highCardinalityHighTailFraction(), - this->params().highCardinalityMaximumTailFraction(), - 1.0, - std::min(10.0 / total, 1.0), - rare / total); + double penalty = CTools::logInterpolate( + this->params().highCardinalityHighTailFraction(), + this->params().highCardinalityMaximumTailFraction(), 1.0, + std::min(10.0 / total, 1.0), rare / total); result.add(std::sqrt(-std::min(maths::CTools::fastLog(penalty), 0.0)), total); } return std::exp(-std::pow(maths::CBasicStatistics::mean(result), 2.0)); diff --git a/lib/config/CLowInformationContentPenalty.cc b/lib/config/CLowInformationContentPenalty.cc index 6c82e9d719..703335d332 100644 --- a/lib/config/CLowInformationContentPenalty.cc +++ b/lib/config/CLowInformationContentPenalty.cc @@ -17,10 +17,12 @@ namespace ml { namespace config { namespace { -const double LOG_MIN = 0.5 * std::log(0.9 * constants::DETECTOR_SCORE_EPSILON / constants::MAXIMUM_DETECTOR_SCORE); +const double LOG_MIN = 0.5 * std::log(0.9 * constants::DETECTOR_SCORE_EPSILON / + constants::MAXIMUM_DETECTOR_SCORE); } -CLowInformationContentPenalty::CLowInformationContentPenalty(const CAutoconfigurerParams& params) : CPenalty(params) { +CLowInformationContentPenalty::CLowInformationContentPenalty(const CAutoconfigurerParams& params) + : CPenalty(params) { } CLowInformationContentPenalty* CLowInformationContentPenalty::clone() const { @@ -40,31 +42,29 @@ void CLowInformationContentPenalty::penaltyFromMe(CDetectorSpecification& spec) double maximumLength = static_cast(summary->maximumLength()); double cardinality = static_cast(summary->distinctCount()); double entropy = summary->entropy(); - double penalty = cardinality == 1.0 - ? 0.0 - : std::exp(CTools::interpolate(this->params().lowLengthRangeForInfoContent(), - this->params().minimumLengthRangeForInfoContent(), - 0.0, - LOG_MIN, - maximumLength - minimumLength)) * - std::exp(CTools::interpolate(this->params().lowMaximumLengthForInfoContent(), - this->params().minimumMaximumLengthForInfoContent(), - 0.0, - LOG_MIN, - maximumLength)) * - std::exp(CTools::logInterpolate(this->params().lowEntropyForInfoContent(), - this->params().minimumEntropyForInfoContent(), - 0.0, - LOG_MIN, - entropy / std::log(cardinality))) * - std::exp(CTools::logInterpolate(this->params().lowDistinctCountForInfoContent(), - this->params().minimumDistinctCountForInfoContent(), - LOG_MIN, - 0.0, - cardinality)); + double penalty = + cardinality == 1.0 + ? 0.0 + : std::exp(CTools::interpolate( + this->params().lowLengthRangeForInfoContent(), + this->params().minimumLengthRangeForInfoContent(), + 0.0, LOG_MIN, maximumLength - minimumLength)) * + std::exp(CTools::interpolate( + this->params().lowMaximumLengthForInfoContent(), + this->params().minimumMaximumLengthForInfoContent(), + 0.0, LOG_MIN, maximumLength)) * + std::exp(CTools::logInterpolate( + this->params().lowEntropyForInfoContent(), + this->params().minimumEntropyForInfoContent(), + 0.0, LOG_MIN, entropy / std::log(cardinality))) * + std::exp(CTools::logInterpolate( + this->params().lowDistinctCountForInfoContent(), + this->params().minimumDistinctCountForInfoContent(), + LOG_MIN, 0.0, cardinality)); std::string description; if (penalty < 1.0) { - description = "There is weak evidence that '" + *spec.argumentField() + "' carries information"; + description = "There is weak evidence that '" + + *spec.argumentField() + "' carries information"; } spec.applyPenalty(penalty, description); } diff --git a/lib/config/CLowVariationPenalty.cc b/lib/config/CLowVariationPenalty.cc index a078758a8c..fdc88ef012 100644 --- a/lib/config/CLowVariationPenalty.cc +++ b/lib/config/CLowVariationPenalty.cc @@ -42,12 +42,16 @@ double cov(const MOMENTS& moments) { //! Compute the penalty for the partition moments \p moments. template -void penaltyImpl(const CAutoconfigurerParams& params, const MOMENTS& moments, double& penalty, double& proportionWithLowVariation) { +void penaltyImpl(const CAutoconfigurerParams& params, + const MOMENTS& moments, + double& penalty, + double& proportionWithLowVariation) { TMeanAccumulator penalty_; proportionWithLowVariation = 0.0; for (typename MOMENTS::const_iterator i = moments.begin(); i != moments.end(); ++i) { - double pi = - CTools::logInterpolate(params.lowCoefficientOfVariation(), params.minimumCoefficientOfVariation(), 1.0, MIN, cov(i->second)); + double pi = CTools::logInterpolate(params.lowCoefficientOfVariation(), + params.minimumCoefficientOfVariation(), + 1.0, MIN, cov(i->second)); penalty_.add(maths::CTools::fastLog(pi), maths::CBasicStatistics::count(i->second)); if (pi < 1.0) { proportionWithLowVariation += 1.0; @@ -60,13 +64,18 @@ void penaltyImpl(const CAutoconfigurerParams& params, const MOMENTS& moments, do //! Compute the distinct count penalty for the partition moments \p moments. struct SDistinctCountPenalty { template - void - operator()(const CAutoconfigurerParams& params, const MOMENTS& moments, double& penalty, double& proportionWithLowVariation) const { + void operator()(const CAutoconfigurerParams& params, + const MOMENTS& moments, + double& penalty, + double& proportionWithLowVariation) const { TMeanAccumulator penalty_; - for (typename MOMENTS::const_iterator i = moments.begin(); i != moments.end(); ++i) { - double pi = CTools::logInterpolate( - params.lowCoefficientOfVariation(), params.minimumCoefficientOfVariation(), 1.0, MIN, cov(i->second.s_DistinctCount)); - penalty_.add(maths::CTools::fastLog(pi), maths::CBasicStatistics::count(i->second.s_DistinctCount)); + for (typename MOMENTS::const_iterator i = moments.begin(); + i != moments.end(); ++i) { + double pi = CTools::logInterpolate(params.lowCoefficientOfVariation(), + params.minimumCoefficientOfVariation(), + 1.0, MIN, cov(i->second.s_DistinctCount)); + penalty_.add(maths::CTools::fastLog(pi), + maths::CBasicStatistics::count(i->second.s_DistinctCount)); if (pi < 1.0) { proportionWithLowVariation += 1.0; } @@ -79,13 +88,18 @@ struct SDistinctCountPenalty { //! Compute the info content penalty for the partition moments \p moments. struct SInfoContentPenalty { template - void - operator()(const CAutoconfigurerParams& params, const MOMENTS& moments, double& penalty, double& proportionWithLowVariation) const { + void operator()(const CAutoconfigurerParams& params, + const MOMENTS& moments, + double& penalty, + double& proportionWithLowVariation) const { TMeanAccumulator penalty_; - for (typename MOMENTS::const_iterator i = moments.begin(); i != moments.end(); ++i) { - double pi = CTools::logInterpolate( - params.lowCoefficientOfVariation(), params.minimumCoefficientOfVariation(), 1.0, MIN, cov(i->second.s_InfoContent)); - penalty_.add(maths::CTools::fastLog(pi), maths::CBasicStatistics::count(i->second.s_InfoContent)); + for (typename MOMENTS::const_iterator i = moments.begin(); + i != moments.end(); ++i) { + double pi = CTools::logInterpolate(params.lowCoefficientOfVariation(), + params.minimumCoefficientOfVariation(), + 1.0, MIN, cov(i->second.s_InfoContent)); + penalty_.add(maths::CTools::fastLog(pi), + maths::CBasicStatistics::count(i->second.s_InfoContent)); if (pi < 1.0) { proportionWithLowVariation += 1.0; } @@ -96,23 +110,31 @@ struct SInfoContentPenalty { }; //! Get the description prefix. -std::string descriptionPrefix(const CDetectorSpecification& spec, double proportionWithLowVariation) { +std::string descriptionPrefix(const CDetectorSpecification& spec, + double proportionWithLowVariation) { if (spec.byField() && spec.partitionField()) { - return "A significant proportion, " + CTools::prettyPrint(100.0 * proportionWithLowVariation) + + return "A significant proportion, " + + CTools::prettyPrint(100.0 * proportionWithLowVariation) + "%, of distinct partition and by fields combinations"; } if (spec.byField()) { - return "A significant proportion, " + CTools::prettyPrint(100.0 * proportionWithLowVariation) + "%, of distinct by fields"; + return "A significant proportion, " + + CTools::prettyPrint(100.0 * proportionWithLowVariation) + + "%, of distinct by fields"; } if (spec.partitionField()) { - return "A significant proportion, " + CTools::prettyPrint(100.0 * proportionWithLowVariation) + "%, of distinct partition fields"; + return "A significant proportion, " + + CTools::prettyPrint(100.0 * proportionWithLowVariation) + + "%, of distinct partition fields"; } return ""; } //! Apply the penalties for count analysis from \p stats. template -void penaltyForCountImpl(const CAutoconfigurerParams& params, const STATS& stats, CDetectorSpecification& spec) { +void penaltyForCountImpl(const CAutoconfigurerParams& params, + const STATS& stats, + CDetectorSpecification& spec) { std::size_t n = stats.bucketStatistics().size(); TSizeVec indices; @@ -126,15 +148,18 @@ void penaltyForCountImpl(const CAutoconfigurerParams& params, const STATS& stats const TSizeVec& indices_ = params.penaltyIndicesFor(bid); double penalty; double proportionWithLowVariation; - penaltyImpl(params, stats.bucketStatistics()[bid].countMomentsPerPartition(), penalty, proportionWithLowVariation); + penaltyImpl(params, stats.bucketStatistics()[bid].countMomentsPerPartition(), + penalty, proportionWithLowVariation); indices.insert(indices.end(), indices_.begin(), indices_.end()); std::string description; if (penalty < 1.0) { if (spec.byField() || spec.partitionField()) { - description = descriptionPrefix(spec, proportionWithLowVariation) + " have " + (penalty == MIN ? "too " : "") + "low" + - " variation in their bucket counts"; + description = descriptionPrefix(spec, proportionWithLowVariation) + + " have " + (penalty == MIN ? "too " : "") + + "low" + " variation in their bucket counts"; } else { - description = std::string("The variation in the bucket counts is ") + (penalty == MIN ? "too " : "") + "low"; + description = std::string("The variation in the bucket counts is ") + + (penalty == MIN ? "too " : "") + "low"; } } std::fill_n(std::back_inserter(penalties), indices_.size(), penalty); @@ -166,14 +191,17 @@ void penaltyForImpl(const CAutoconfigurerParams& params, const std::string& argument = *spec.argumentField(); double penalty = 0.0; double proportionWithLowVariation = 0.0; - computePenalty(params, stats.bucketStatistics()[bid].argumentMomentsPerPartition(argument), penalty, proportionWithLowVariation); + computePenalty(params, stats.bucketStatistics()[bid].argumentMomentsPerPartition(argument), + penalty, proportionWithLowVariation); std::string description; if (penalty < 1.0) { if (spec.byField() || spec.partitionField()) { - description = descriptionPrefix(spec, proportionWithLowVariation) + " have " + (penalty == MIN ? "too " : "") + "low" + - " variation in their bucket " + function; + description = descriptionPrefix(spec, proportionWithLowVariation) + + " have " + (penalty == MIN ? "too " : "") + + "low" + " variation in their bucket " + function; } else { - description = std::string("The variation in the bucket ") + function + " is " + (penalty == MIN ? "too " : "") + "low"; + description = std::string("The variation in the bucket ") + function + + " is " + (penalty == MIN ? "too " : "") + "low"; } } std::fill_n(std::back_inserter(penalties), indices_.size(), penalty); @@ -184,7 +212,8 @@ void penaltyForImpl(const CAutoconfigurerParams& params, } } -CLowVariationPenalty::CLowVariationPenalty(const CAutoconfigurerParams& params) : CPenalty(params) { +CLowVariationPenalty::CLowVariationPenalty(const CAutoconfigurerParams& params) + : CPenalty(params) { } CLowVariationPenalty* CLowVariationPenalty::clone() const { @@ -196,17 +225,18 @@ std::string CLowVariationPenalty::name() const { } void CLowVariationPenalty::penaltyFromMe(CDetectorSpecification& spec) const { -#define APPLY_COUNTING_PENALTY(penalty) \ - if (const CDataCountStatistics* stats_ = spec.countStatistics()) { \ - if (const CPartitionDataCountStatistics* partitionStats = dynamic_cast(stats_)) { \ - this->penalty(*partitionStats, spec); \ - } else if (const CByAndPartitionDataCountStatistics* byAndPartitionStats = \ - dynamic_cast(stats_)) { \ - this->penalty(*byAndPartitionStats, spec); \ - } else if (const CByOverAndPartitionDataCountStatistics* byOverAndPartitionStats = \ - dynamic_cast(stats_)) { \ - this->penalty(*byOverAndPartitionStats, spec); \ - } \ +#define APPLY_COUNTING_PENALTY(penalty) \ + if (const CDataCountStatistics* stats_ = spec.countStatistics()) { \ + if (const CPartitionDataCountStatistics* partitionStats = \ + dynamic_cast(stats_)) { \ + this->penalty(*partitionStats, spec); \ + } else if (const CByAndPartitionDataCountStatistics* byAndPartitionStats = \ + dynamic_cast(stats_)) { \ + this->penalty(*byAndPartitionStats, spec); \ + } else if (const CByOverAndPartitionDataCountStatistics* byOverAndPartitionStats = \ + dynamic_cast(stats_)) { \ + this->penalty(*byOverAndPartitionStats, spec); \ + } \ } switch (spec.function()) { @@ -228,23 +258,28 @@ void CLowVariationPenalty::penaltyFromMe(CDetectorSpecification& spec) const { } } -void CLowVariationPenalty::penaltiesForCount(const CPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const { +void CLowVariationPenalty::penaltiesForCount(const CPartitionDataCountStatistics& stats, + CDetectorSpecification& spec) const { penaltyForCountImpl(this->params(), stats, spec); } -void CLowVariationPenalty::penaltiesForCount(const CByAndPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const { +void CLowVariationPenalty::penaltiesForCount(const CByAndPartitionDataCountStatistics& stats, + CDetectorSpecification& spec) const { penaltyForCountImpl(this->params(), stats, spec); } -void CLowVariationPenalty::penaltiesForCount(const CByOverAndPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const { +void CLowVariationPenalty::penaltiesForCount(const CByOverAndPartitionDataCountStatistics& stats, + CDetectorSpecification& spec) const { penaltyForCountImpl(this->params(), stats, spec); } -void CLowVariationPenalty::penaltyForDistinctCount(const CPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const { +void CLowVariationPenalty::penaltyForDistinctCount(const CPartitionDataCountStatistics& stats, + CDetectorSpecification& spec) const { penaltyForImpl(this->params(), stats, SDistinctCountPenalty(), "distinct counts", spec); } -void CLowVariationPenalty::penaltyForDistinctCount(const CByAndPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const { +void CLowVariationPenalty::penaltyForDistinctCount(const CByAndPartitionDataCountStatistics& stats, + CDetectorSpecification& spec) const { penaltyForImpl(this->params(), stats, SDistinctCountPenalty(), "distinct counts", spec); } @@ -253,15 +288,18 @@ void CLowVariationPenalty::penaltyForDistinctCount(const CByOverAndPartitionData penaltyForImpl(this->params(), stats, SDistinctCountPenalty(), "distinct counts", spec); } -void CLowVariationPenalty::penaltyForInfoContent(const CPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const { +void CLowVariationPenalty::penaltyForInfoContent(const CPartitionDataCountStatistics& stats, + CDetectorSpecification& spec) const { penaltyForImpl(this->params(), stats, SInfoContentPenalty(), "info content", spec); } -void CLowVariationPenalty::penaltyForInfoContent(const CByAndPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const { +void CLowVariationPenalty::penaltyForInfoContent(const CByAndPartitionDataCountStatistics& stats, + CDetectorSpecification& spec) const { penaltyForImpl(this->params(), stats, SInfoContentPenalty(), "info content", spec); } -void CLowVariationPenalty::penaltyForInfoContent(const CByOverAndPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const { +void CLowVariationPenalty::penaltyForInfoContent(const CByOverAndPartitionDataCountStatistics& stats, + CDetectorSpecification& spec) const { penaltyForImpl(this->params(), stats, SInfoContentPenalty(), "info content", spec); } } diff --git a/lib/config/CNotEnoughDataPenalty.cc b/lib/config/CNotEnoughDataPenalty.cc index 50b0eec3e4..3b71c04fef 100644 --- a/lib/config/CNotEnoughDataPenalty.cc +++ b/lib/config/CNotEnoughDataPenalty.cc @@ -27,18 +27,23 @@ namespace { using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; //! Get the description prefix. -std::string descriptionPrefix(const CDetectorSpecification& spec, const TMeanAccumulator& meanOccupied, std::size_t partitions) { +std::string descriptionPrefix(const CDetectorSpecification& spec, + const TMeanAccumulator& meanOccupied, + std::size_t partitions) { if (spec.byField() && spec.partitionField()) { return "A significant proportion, " + - CTools::prettyPrint(100.0 * maths::CBasicStatistics::count(meanOccupied) / static_cast(partitions)) + + CTools::prettyPrint(100.0 * maths::CBasicStatistics::count(meanOccupied) / + static_cast(partitions)) + "%, of distinct partition and by fields combinations are sparse."; } else if (spec.byField()) { return "A significant proportion, " + - CTools::prettyPrint(100.0 * maths::CBasicStatistics::count(meanOccupied) / static_cast(partitions)) + + CTools::prettyPrint(100.0 * maths::CBasicStatistics::count(meanOccupied) / + static_cast(partitions)) + "%, of distinct by fields are sparse."; } else if (spec.partitionField()) { return "A significant proportion, " + - CTools::prettyPrint(100.0 * maths::CBasicStatistics::count(meanOccupied) / static_cast(partitions)) + + CTools::prettyPrint(100.0 * maths::CBasicStatistics::count(meanOccupied) / + static_cast(partitions)) + "%, of distinct partition fields are sparse."; } return ""; @@ -47,7 +52,8 @@ std::string descriptionPrefix(const CDetectorSpecification& spec, const TMeanAcc const bool IGNORE_EMPTY[] = {false, true}; } -CNotEnoughDataPenalty::CNotEnoughDataPenalty(const CAutoconfigurerParams& params) : CPenalty(params) { +CNotEnoughDataPenalty::CNotEnoughDataPenalty(const CAutoconfigurerParams& params) + : CPenalty(params) { } CNotEnoughDataPenalty* CNotEnoughDataPenalty::clone() const { @@ -64,24 +70,29 @@ void CNotEnoughDataPenalty::penaltyFromMe(CDetectorSpecification& spec) const { dynamic_cast(spec.countStatistics())) { this->penaltyFor(*partitionStats, spec); } else if (const CByAndPartitionDataCountStatistics* byAndPartitionStats = - dynamic_cast(spec.countStatistics())) { + dynamic_cast( + spec.countStatistics())) { this->penaltyFor(*byAndPartitionStats, spec); } else if (const CByOverAndPartitionDataCountStatistics* byOverAndPartitionStats = - dynamic_cast(spec.countStatistics())) { + dynamic_cast( + spec.countStatistics())) { this->penaltyFor(*byOverAndPartitionStats, spec); } } } -void CNotEnoughDataPenalty::penaltyFor(const CPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const { +void CNotEnoughDataPenalty::penaltyFor(const CPartitionDataCountStatistics& stats, + CDetectorSpecification& spec) const { this->penaltyFor(stats.bucketCounts(), stats.bucketStatistics(), spec); } -void CNotEnoughDataPenalty::penaltyFor(const CByAndPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const { +void CNotEnoughDataPenalty::penaltyFor(const CByAndPartitionDataCountStatistics& stats, + CDetectorSpecification& spec) const { this->penaltyFor(stats.bucketCounts(), stats.bucketStatistics(), spec); } -void CNotEnoughDataPenalty::penaltyFor(const CByOverAndPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const { +void CNotEnoughDataPenalty::penaltyFor(const CByOverAndPartitionDataCountStatistics& stats, + CDetectorSpecification& spec) const { this->penaltyFor(stats.bucketCounts(), stats.bucketStatistics(), spec); } @@ -90,7 +101,8 @@ void CNotEnoughDataPenalty::penaltyFor(const TUInt64Vec& bucketCounts, CDetectorSpecification& spec) const { using TSizeSizePrMomentsUMapCItr = CBucketCountStatistics::TSizeSizePrMomentsUMap::const_iterator; - const CAutoconfigurerParams::TTimeVec& candidates = this->params().candidateBucketLengths(); + const CAutoconfigurerParams::TTimeVec& candidates = + this->params().candidateBucketLengths(); LOG_TRACE(<< "bucket counts = " << core::CContainerPrinter::print(bucketCounts)); @@ -109,25 +121,27 @@ void CNotEnoughDataPenalty::penaltyFor(const TUInt64Vec& bucketCounts, uint64_t bc = bucketCounts[bid]; if (bc > 0) { const CBucketCountStatistics& si = statistics[bid]; - const CBucketCountStatistics::TSizeSizePrMomentsUMap& mi = si.countMomentsPerPartition(); + const CBucketCountStatistics::TSizeSizePrMomentsUMap& mi = + si.countMomentsPerPartition(); TMeanAccumulator penalty_; TMeanAccumulator meanOccupied; for (TSizeSizePrMomentsUMapCItr j = mi.begin(); j != mi.end(); ++j) { - double occupied = maths::CBasicStatistics::count(j->second) / static_cast(bc); - double penalty = CTools::logInterpolate(this->params().lowPopulatedBucketFraction(function, IGNORE_EMPTY[i]), - this->params().minimumPopulatedBucketFraction(function, IGNORE_EMPTY[i]), - 1.0, - 1.0 / static_cast(bc), - occupied); + double occupied = maths::CBasicStatistics::count(j->second) / + static_cast(bc); + double penalty = CTools::logInterpolate( + this->params().lowPopulatedBucketFraction(function, IGNORE_EMPTY[i]), + this->params().minimumPopulatedBucketFraction(function, IGNORE_EMPTY[i]), + 1.0, 1.0 / static_cast(bc), occupied); penalty_.add(maths::CTools::fastLog(penalty)); if (penalty < 1.0) { meanOccupied.add(occupied); } } - double penalty = std::min(std::exp(maths::CBasicStatistics::mean(penalty_)), 1.0); + double penalty = + std::min(std::exp(maths::CBasicStatistics::mean(penalty_)), 1.0); std::size_t index = this->params().penaltyIndexFor(bid, IGNORE_EMPTY[i]); indices.push_back(index); penalties.push_back(penalty); @@ -135,12 +149,18 @@ void CNotEnoughDataPenalty::penaltyFor(const TUInt64Vec& bucketCounts, if (penalty < 1.0) { if (spec.byField() || spec.partitionField()) { descriptions.back() = - descriptionPrefix(spec, meanOccupied, si.countMomentsPerPartition().size()) + " On average, only " + - CTools::prettyPrint(100.0 * maths::CBasicStatistics::mean(meanOccupied)) + "% of their buckets have a value"; + descriptionPrefix(spec, meanOccupied, + si.countMomentsPerPartition().size()) + + " On average, only " + + CTools::prettyPrint( + 100.0 * maths::CBasicStatistics::mean(meanOccupied)) + + "% of their buckets have a value"; } else { - descriptions.back() = std::string("On average only ") + - CTools::prettyPrint(100.0 * maths::CBasicStatistics::mean(meanOccupied)) + - "% of partition buckets have a value"; + descriptions.back() = + std::string("On average only ") + + CTools::prettyPrint( + 100.0 * maths::CBasicStatistics::mean(meanOccupied)) + + "% of partition buckets have a value"; } } } diff --git a/lib/config/CPenalty.cc b/lib/config/CPenalty.cc index adc711f907..81ddbf11a8 100644 --- a/lib/config/CPenalty.cc +++ b/lib/config/CPenalty.cc @@ -31,7 +31,8 @@ CPenalty::CPenalty(const CPenalty& other) : m_Params(other.m_Params) { } } -CPenalty::CPenalty(CClosure closure) : m_Params(closure.penalties()[0]->params()) { +CPenalty::CPenalty(CClosure closure) + : m_Params(closure.penalties()[0]->params()) { m_Penalties.swap(closure.penalties()); } @@ -56,7 +57,8 @@ const CPenalty& CPenalty::operator*=(const CPenalty& rhs) { } const CPenalty& CPenalty::operator*=(CClosure rhs) { - m_Penalties.insert(m_Penalties.end(), rhs.penalties().begin(), rhs.penalties().end()); + m_Penalties.insert(m_Penalties.end(), rhs.penalties().begin(), + rhs.penalties().end()); return *this; } @@ -79,7 +81,8 @@ void CPenalty::penalize(CDetectorSpecification& spec) const { return; } for (std::size_t i = 0u; i < m_Penalties.size(); ++i) { - LOG_TRACE(<< "Applying '" << m_Penalties[i]->name() << "' to " << spec.description()); + LOG_TRACE(<< "Applying '" << m_Penalties[i]->name() << "' to " + << spec.description()); m_Penalties[i]->penalize(spec); if (spec.score() == 0.0) { break; @@ -88,7 +91,8 @@ void CPenalty::penalize(CDetectorSpecification& spec) const { } double CPenalty::score(double penalty) { - return constants::DETECTOR_SCORE_EPSILON * std::floor(constants::MAXIMUM_DETECTOR_SCORE * penalty / constants::DETECTOR_SCORE_EPSILON); + return constants::DETECTOR_SCORE_EPSILON * + std::floor(constants::MAXIMUM_DETECTOR_SCORE * penalty / constants::DETECTOR_SCORE_EPSILON); } bool CPenalty::scoreIsZeroFor(double penalty) { @@ -99,7 +103,9 @@ const CAutoconfigurerParams& CPenalty::params() const { return m_Params; } -void CPenalty::penaltyFromMe(const CFieldStatistics& /*stats*/, double& /*penalty*/, std::string& /*description*/) const { +void CPenalty::penaltyFromMe(const CFieldStatistics& /*stats*/, + double& /*penalty*/, + std::string& /*description*/) const { } void CPenalty::penaltyFromMe(CDetectorSpecification& /*spec*/) const { diff --git a/lib/config/CPolledDataPenalty.cc b/lib/config/CPolledDataPenalty.cc index df82598060..cfb972e905 100644 --- a/lib/config/CPolledDataPenalty.cc +++ b/lib/config/CPolledDataPenalty.cc @@ -27,7 +27,8 @@ namespace { const double LOG_TENTH_NUMBER_POLLING_INTERVALS = 10.0; } -CPolledDataPenalty::CPolledDataPenalty(const CAutoconfigurerParams& params) : CPenalty(params) { +CPolledDataPenalty::CPolledDataPenalty(const CAutoconfigurerParams& params) + : CPenalty(params) { } CPolledDataPenalty* CPolledDataPenalty::clone() const { @@ -54,14 +55,13 @@ void CPolledDataPenalty::penaltyFromMe(CDetectorSpecification& spec) const { if (candidates[bid] < *interval) { const TSizeVec& indices_ = this->params().penaltyIndicesFor(bid); indices.insert(indices.end(), indices_.begin(), indices_.end()); - std::fill_n(std::back_inserter(penalties), - indices_.size(), - std::pow(0.1, - static_cast(stats->timeRange()) / static_cast(*interval) / - LOG_TENTH_NUMBER_POLLING_INTERVALS)); - std::fill_n(std::back_inserter(descriptions), - indices_.size(), - CTools::prettyPrint(candidates[bid]) + " is shorter than possible polling interval " + + std::fill_n(std::back_inserter(penalties), indices_.size(), + std::pow(0.1, static_cast(stats->timeRange()) / + static_cast(*interval) / + LOG_TENTH_NUMBER_POLLING_INTERVALS)); + std::fill_n(std::back_inserter(descriptions), indices_.size(), + CTools::prettyPrint(candidates[bid]) + + " is shorter than possible polling interval " + CTools::prettyPrint(*interval)); } } @@ -71,7 +71,8 @@ void CPolledDataPenalty::penaltyFromMe(CDetectorSpecification& spec) const { } } -CPolledDataPenalty::TOptionalTime CPolledDataPenalty::pollingInterval(const CDataCountStatistics& stats) const { +CPolledDataPenalty::TOptionalTime +CPolledDataPenalty::pollingInterval(const CDataCountStatistics& stats) const { using TMaxAccumulator = maths::CBasicStatistics::COrderStatisticsStack; @@ -105,7 +106,8 @@ CPolledDataPenalty::TOptionalTime CPolledDataPenalty::pollingInterval(const CDat F.cdf(upper + 0.01 * upper, f[3]); mass = f[1] - f[0] + f[3] - f[2]; - if (mass > this->params().polledDataMinimumMassAtInterval() && lower < this->params().polledDataJitter() * upper) { + if (mass > this->params().polledDataMinimumMassAtInterval() && + lower < this->params().polledDataJitter() * upper) { return static_cast(upper); } else { } diff --git a/lib/config/CReportWriter.cc b/lib/config/CReportWriter.cc index d1e286545b..ce66f0705d 100644 --- a/lib/config/CReportWriter.cc +++ b/lib/config/CReportWriter.cc @@ -27,7 +27,8 @@ using TStrVecVec = std::vector; //! Pad \p value. inline std::string pad(std::size_t padTo, const std::string& value) { - return std::string((padTo - value.length()) / 2, ' ') + value + std::string((padTo - value.length() + 1) / 2, ' '); + return std::string((padTo - value.length()) / 2, ' ') + value + + std::string((padTo - value.length() + 1) / 2, ' '); } //! Pass the string back. @@ -51,7 +52,8 @@ template inline std::string print(const std::pair& p, std::size_t padTo = 0) { std::string first = print(p.first); std::string second = print(p.second); - return (padTo > 0 ? pad(padTo, first) : first) + " " + (padTo > 0 ? pad(padTo, second) : second); + return (padTo > 0 ? pad(padTo, first) : first) + " " + + (padTo > 0 ? pad(padTo, second) : second); } //! Write out a vector of pairs new line delimited. @@ -87,7 +89,10 @@ std::size_t longest(const TStrVecVec& fields, std::size_t statistic) { //! Write a row of the summary statistic table. template -void writeTableRow(std::ostream& o, const TSizeVec& padTo, const std::size_t (&stats)[N], const ROW& row) { +void writeTableRow(std::ostream& o, + const TSizeVec& padTo, + const std::size_t (&stats)[N], + const ROW& row) { TStrVecVec columnFields; columnFields.reserve(N); std::size_t height = 1u; @@ -97,7 +102,8 @@ void writeTableRow(std::ostream& o, const TSizeVec& padTo, const std::size_t (&s } for (std::size_t i = 0u; i < height; ++i) { for (std::size_t j = 0u; j < N; ++j) { - o << (i < columnFields[j].size() ? pad(padTo[j], columnFields[j][i]) : std::string(padTo[j], ' ')); + o << (i < columnFields[j].size() ? pad(padTo[j], columnFields[j][i]) + : std::string(padTo[j], ' ')); } o << "\n"; } @@ -105,7 +111,10 @@ void writeTableRow(std::ostream& o, const TSizeVec& padTo, const std::size_t (&s //! Write the summary statistic table. template -void writeTable(std::ostream& o, const std::string (&labels)[M], const std::size_t (&stats)[N], const TStrVecVec& values) { +void writeTable(std::ostream& o, + const std::string (&labels)[M], + const std::size_t (&stats)[N], + const TStrVecVec& values) { // Compute the table pads. TSizeVec padTo(N, 0); std::size_t tableWidth = 0; @@ -126,7 +135,8 @@ void writeTable(std::ostream& o, const std::string (&labels)[M], const std::size const TStrVec NO_STRINGS; } -CReportWriter::CReportWriter(std::ostream& writeStream) : m_WriteStream(writeStream) { +CReportWriter::CReportWriter(std::ostream& writeStream) + : m_WriteStream(writeStream) { } bool CReportWriter::fieldNames(const TStrVec& /*fieldNames*/, const TStrVec& /*extraFieldNames*/) { @@ -137,7 +147,8 @@ const CReportWriter::TStrVec& CReportWriter::fieldNames() const { return NO_STRINGS; } -bool CReportWriter::writeRow(const TStrStrUMap& /*dataRowFields*/, const TStrStrUMap& /*overrideDataRowFields*/) { +bool CReportWriter::writeRow(const TStrStrUMap& /*dataRowFields*/, + const TStrStrUMap& /*overrideDataRowFields*/) { return true; } @@ -149,13 +160,17 @@ void CReportWriter::addInvalidRecords(uint64_t n) { m_InvalidRecords = print(n); } -void CReportWriter::addFieldStatistics(const std::string& field, config_t::EDataType type, const CDataSummaryStatistics& summary) { +void CReportWriter::addFieldStatistics(const std::string& field, + config_t::EDataType type, + const CDataSummaryStatistics& summary) { std::size_t n = m_UnclassifiedFields.size(); m_UnclassifiedFields.push_back(TStrVec(NUMBER_STATISTICS)); m_UnclassifiedFields[n][FIELD_NAME] = field; m_UnclassifiedFields[n][DATA_TYPE] = config_t::print(type); - m_UnclassifiedFields[n][EARLIEST_TIME] = core::CTimeUtils::toLocalString(summary.earliest()); - m_UnclassifiedFields[n][LATEST_TIME] = core::CTimeUtils::toLocalString(summary.latest()); + m_UnclassifiedFields[n][EARLIEST_TIME] = + core::CTimeUtils::toLocalString(summary.earliest()); + m_UnclassifiedFields[n][LATEST_TIME] = + core::CTimeUtils::toLocalString(summary.latest()); m_UnclassifiedFields[n][MEAN_RATE] = CTools::prettyPrint(summary.meanRate()); } @@ -166,8 +181,10 @@ void CReportWriter::addFieldStatistics(const std::string& field, m_CategoricalFields.push_back(TStrVec(NUMBER_STATISTICS)); m_CategoricalFields[n][FIELD_NAME] = field; m_CategoricalFields[n][DATA_TYPE] = config_t::print(type); - m_CategoricalFields[n][EARLIEST_TIME] = core::CTimeUtils::toLocalString(summary.earliest()); - m_CategoricalFields[n][LATEST_TIME] = core::CTimeUtils::toLocalString(summary.latest()); + m_CategoricalFields[n][EARLIEST_TIME] = + core::CTimeUtils::toLocalString(summary.earliest()); + m_CategoricalFields[n][LATEST_TIME] = + core::CTimeUtils::toLocalString(summary.latest()); m_CategoricalFields[n][MEAN_RATE] = CTools::prettyPrint(summary.meanRate()); m_CategoricalFields[n][CATEGORICAL_DISTINCT_COUNT] = print(summary.distinctCount()); CCategoricalDataSummaryStatistics::TStrSizePrVec topn; @@ -175,7 +192,9 @@ void CReportWriter::addFieldStatistics(const std::string& field, m_CategoricalFields[n][CATEGORICAL_TOP_N_COUNTS] = print(topn); } -void CReportWriter::addFieldStatistics(const std::string& field, config_t::EDataType type, const CNumericDataSummaryStatistics& summary) { +void CReportWriter::addFieldStatistics(const std::string& field, + config_t::EDataType type, + const CNumericDataSummaryStatistics& summary) { std::size_t n = m_NumericFields.size(); m_NumericFields.push_back(TStrVec(NUMBER_STATISTICS)); m_NumericFields[n][FIELD_NAME] = field; @@ -200,13 +219,17 @@ void CReportWriter::addDetector(const CDetectorSpecification& spec) { spec.scores(scores); m_Detectors[n][PARAMETER_SCORES].resize(scores.size(), TStrVec(NUMBER_PARAMETERS)); for (std::size_t i = 0u; i < scores.size(); ++i) { - m_Detectors[n][PARAMETER_SCORES][i][BUCKET_LENGTH_PARAMETER] = CTools::prettyPrint(scores[i].s_BucketLength); + m_Detectors[n][PARAMETER_SCORES][i][BUCKET_LENGTH_PARAMETER] = + CTools::prettyPrint(scores[i].s_BucketLength); m_Detectors[n][PARAMETER_SCORES][i][IGNORE_EMPTY_PARAMETER] = scores[i].s_IgnoreEmpty; - m_Detectors[n][PARAMETER_SCORES][i][SCORE_PARAMETER] = CTools::prettyPrint(scores[i].s_Score); + m_Detectors[n][PARAMETER_SCORES][i][SCORE_PARAMETER] = + CTools::prettyPrint(scores[i].s_Score); m_Detectors[n][PARAMETER_SCORES][i][DESCRIPTION_PARAMETER] = - scores[i].s_Descriptions.empty() ? std::string("-") : scores[i].s_Descriptions[0]; + scores[i].s_Descriptions.empty() ? std::string("-") + : scores[i].s_Descriptions[0]; for (std::size_t j = 1u; j < scores[i].s_Descriptions.size(); ++j) { - m_Detectors[n][PARAMETER_SCORES][i][DESCRIPTION_PARAMETER] += "\n" + scores[i].s_Descriptions[j]; + m_Detectors[n][PARAMETER_SCORES][i][DESCRIPTION_PARAMETER] += + "\n" + scores[i].s_Descriptions[j]; } } m_Detectors[n][DETECTOR_CONFIG].push_back(TStrVec(1, spec.detectorConfig())); @@ -217,7 +240,10 @@ void CReportWriter::write() const { m_WriteStream << "DATA SUMMARY\n"; m_WriteStream << "============\n\n"; - m_WriteStream << "Found " << (m_UnclassifiedFields.size() + m_CategoricalFields.size() + m_NumericFields.size()) << " fields\n"; + m_WriteStream << "Found " + << (m_UnclassifiedFields.size() + m_CategoricalFields.size() + + m_NumericFields.size()) + << " fields\n"; m_WriteStream << "Processed " << m_TotalRecords << " records\n"; m_WriteStream << "There were " << m_InvalidRecords << " invalid records\n"; @@ -232,7 +258,8 @@ void CReportWriter::write() const { writeTable(m_WriteStream, STATISTIC_LABELS, CATEGORICAL_STATISTICS, m_CategoricalFields); for (std::size_t i = 0u; i < m_CategoricalFields.size(); ++i) { - m_WriteStream << "\nMost frequent for '" << m_CategoricalFields[i][FIELD_NAME] << "':\n"; + m_WriteStream << "\nMost frequent for '" + << m_CategoricalFields[i][FIELD_NAME] << "':\n"; m_WriteStream << m_CategoricalFields[i][CATEGORICAL_TOP_N_COUNTS]; } } @@ -241,7 +268,8 @@ void CReportWriter::write() const { m_WriteStream << "==============\n\n"; writeTable(m_WriteStream, STATISTIC_LABELS, NUMERIC_STATISTICS, m_NumericFields); for (std::size_t i = 0u; i < m_NumericFields.size(); ++i) { - m_WriteStream << "\nProbability density for '" << m_NumericFields[i][FIELD_NAME] << "':\n"; + m_WriteStream << "\nProbability density for '" + << m_NumericFields[i][FIELD_NAME] << "':\n"; m_WriteStream << pad(15, "x") << pad(15, "f(x)") << "\n"; m_WriteStream << m_NumericFields[i][NUMERIC_DENSITY_CHART]; } @@ -251,45 +279,51 @@ void CReportWriter::write() const { m_WriteStream << "CANDIDATE DETECTORS\n"; m_WriteStream << "==================="; for (std::size_t i = 0u; i < m_Detectors.size(); ++i) { - m_WriteStream << "\n\n\n" << m_Detectors[i][DESCRIPTION][0][0] << "\n"; - m_WriteStream << std::string(m_Detectors[i][DESCRIPTION][0][0].length(), '=') << "\n"; - m_WriteStream << "\n Best parameters score: " << m_Detectors[i][OVERALL_SCORE][0][0] << "\n\n"; - writeTable(m_WriteStream, PARAMETER_LABELS, DETECTOR_PARAMETERS, m_Detectors[i][PARAMETER_SCORES]); + m_WriteStream << "\n\n\n" + << m_Detectors[i][DESCRIPTION][0][0] << "\n"; + m_WriteStream + << std::string(m_Detectors[i][DESCRIPTION][0][0].length(), '=') << "\n"; + m_WriteStream << "\n Best parameters score: " + << m_Detectors[i][OVERALL_SCORE][0][0] << "\n\n"; + writeTable(m_WriteStream, PARAMETER_LABELS, DETECTOR_PARAMETERS, + m_Detectors[i][PARAMETER_SCORES]); if (!m_Detectors[i][DETECTOR_CONFIG][0][0].empty()) { - m_WriteStream << "\n" << m_Detectors[i][DETECTOR_CONFIG][0][0] << "\n"; + m_WriteStream << "\n" + << m_Detectors[i][DETECTOR_CONFIG][0][0] << "\n"; } } } } -const std::string CReportWriter::STATISTIC_LABELS[NUMBER_STATISTICS] = {std::string("Field Name"), - std::string("Data Type"), - std::string("Earliest Time"), - std::string("Latest Time"), - std::string("Mean Rate"), - std::string("Distinct Categories"), - std::string("Most Frequent Categories"), - std::string("Minimum"), - std::string("Median"), - std::string("Maximum"), - std::string("Probability Density Chart")}; - -const std::string CReportWriter::PARAMETER_LABELS[NUMBER_PARAMETERS] = {std::string("Bucket Length"), - std::string("Ignore Empty"), - std::string("Score"), - std::string("Explanation")}; - -const std::size_t CReportWriter::UNCLASSIFIED_STATISTICS[] = {FIELD_NAME, DATA_TYPE, EARLIEST_TIME, LATEST_TIME, MEAN_RATE}; - -const std::size_t CReportWriter::CATEGORICAL_STATISTICS[] = - {FIELD_NAME, DATA_TYPE, EARLIEST_TIME, LATEST_TIME, MEAN_RATE, CATEGORICAL_DISTINCT_COUNT}; - -const std::size_t CReportWriter::NUMERIC_STATISTICS[] = - {FIELD_NAME, DATA_TYPE, EARLIEST_TIME, LATEST_TIME, MEAN_RATE, NUMERIC_MINIMUM, NUMERIC_MEDIAN, NUMERIC_MAXIMUM}; - -const std::size_t CReportWriter::DETECTOR_PARAMETERS[] = {BUCKET_LENGTH_PARAMETER, - IGNORE_EMPTY_PARAMETER, - SCORE_PARAMETER, - DESCRIPTION_PARAMETER}; +const std::string CReportWriter::STATISTIC_LABELS[NUMBER_STATISTICS] = { + std::string("Field Name"), + std::string("Data Type"), + std::string("Earliest Time"), + std::string("Latest Time"), + std::string("Mean Rate"), + std::string("Distinct Categories"), + std::string("Most Frequent Categories"), + std::string("Minimum"), + std::string("Median"), + std::string("Maximum"), + std::string("Probability Density Chart")}; + +const std::string CReportWriter::PARAMETER_LABELS[NUMBER_PARAMETERS] = { + std::string("Bucket Length"), std::string("Ignore Empty"), + std::string("Score"), std::string("Explanation")}; + +const std::size_t CReportWriter::UNCLASSIFIED_STATISTICS[] = { + FIELD_NAME, DATA_TYPE, EARLIEST_TIME, LATEST_TIME, MEAN_RATE}; + +const std::size_t CReportWriter::CATEGORICAL_STATISTICS[] = { + FIELD_NAME, DATA_TYPE, EARLIEST_TIME, + LATEST_TIME, MEAN_RATE, CATEGORICAL_DISTINCT_COUNT}; + +const std::size_t CReportWriter::NUMERIC_STATISTICS[] = { + FIELD_NAME, DATA_TYPE, EARLIEST_TIME, LATEST_TIME, + MEAN_RATE, NUMERIC_MINIMUM, NUMERIC_MEDIAN, NUMERIC_MAXIMUM}; + +const std::size_t CReportWriter::DETECTOR_PARAMETERS[] = { + BUCKET_LENGTH_PARAMETER, IGNORE_EMPTY_PARAMETER, SCORE_PARAMETER, DESCRIPTION_PARAMETER}; } } diff --git a/lib/config/CSpanTooSmallForBucketLengthPenalty.cc b/lib/config/CSpanTooSmallForBucketLengthPenalty.cc index 41b4f10dcc..b46a50e825 100644 --- a/lib/config/CSpanTooSmallForBucketLengthPenalty.cc +++ b/lib/config/CSpanTooSmallForBucketLengthPenalty.cc @@ -14,7 +14,8 @@ namespace ml { namespace config { -CSpanTooSmallForBucketLengthPenalty::CSpanTooSmallForBucketLengthPenalty(const CAutoconfigurerParams& params) : CPenalty(params) { +CSpanTooSmallForBucketLengthPenalty::CSpanTooSmallForBucketLengthPenalty(const CAutoconfigurerParams& params) + : CPenalty(params) { } CSpanTooSmallForBucketLengthPenalty* CSpanTooSmallForBucketLengthPenalty::clone() const { @@ -39,12 +40,12 @@ void CSpanTooSmallForBucketLengthPenalty::penaltyFromMe(CDetectorSpecification& for (std::size_t bid = 0u; bid < candidates.size(); ++bid) { const TSizeVec& indices_ = this->params().penaltyIndicesFor(bid); indices.insert(indices.end(), indices_.begin(), indices_.end()); - double penalty = CTools::logInterpolate(this->params().minimumNumberOfBucketsForConfig(), - this->params().lowNumberOfBucketsForConfig(), - 0.0, - 1.0, - static_cast(stats->timeRange() / candidates[bid])); - std::string description = penalty < 1.0 ? "The data span is too short to properly assess the bucket length" : ""; + double penalty = CTools::logInterpolate( + this->params().minimumNumberOfBucketsForConfig(), + this->params().lowNumberOfBucketsForConfig(), 0.0, 1.0, + static_cast(stats->timeRange() / candidates[bid])); + std::string description = penalty < 1.0 ? "The data span is too short to properly assess the bucket length" + : ""; std::fill_n(std::back_inserter(penalties), indices_.size(), penalty); std::fill_n(std::back_inserter(descriptions), indices_.size(), description); } diff --git a/lib/config/CSparseCountPenalty.cc b/lib/config/CSparseCountPenalty.cc index 089e44c50e..eacd254711 100644 --- a/lib/config/CSparseCountPenalty.cc +++ b/lib/config/CSparseCountPenalty.cc @@ -36,8 +36,10 @@ void extract(const maths::CQuantileSketch& quantiles, std::size_t n, TDoubleVec& } //! Get the quantiles adjusted for empty buckets. -const maths::CQuantileSketch& -correctForEmptyBuckets(bool ignoreEmpty, uint64_t buckets, maths::CQuantileSketch& placeholder, const maths::CQuantileSketch& quantiles) { +const maths::CQuantileSketch& correctForEmptyBuckets(bool ignoreEmpty, + uint64_t buckets, + maths::CQuantileSketch& placeholder, + const maths::CQuantileSketch& quantiles) { if (!ignoreEmpty) { double n = static_cast(buckets) - quantiles.count(); if (n > 0.0) { @@ -50,7 +52,9 @@ correctForEmptyBuckets(bool ignoreEmpty, uint64_t buckets, maths::CQuantileSketc } //! Get the mean adjusted for empty buckets. -double correctForEmptyBuckets(bool ignoreEmpty, uint64_t buckets, const CBucketCountStatistics::TMoments& moments) { +double correctForEmptyBuckets(bool ignoreEmpty, + uint64_t buckets, + const CBucketCountStatistics::TMoments& moments) { double n = maths::CBasicStatistics::count(moments); double m = maths::CBasicStatistics::mean(moments); return ignoreEmpty ? m : n / static_cast(buckets) * m; @@ -60,7 +64,8 @@ const uint64_t MINIMUM_BUCKETS_TO_TEST = 20; const bool IGNORE_EMPTY[] = {false, true}; } -CSparseCountPenalty::CSparseCountPenalty(const CAutoconfigurerParams& params) : CPenalty(params) { +CSparseCountPenalty::CSparseCountPenalty(const CAutoconfigurerParams& params) + : CPenalty(params) { } CSparseCountPenalty* CSparseCountPenalty::clone() const { @@ -80,12 +85,14 @@ void CSparseCountPenalty::penaltyFromMe(CDetectorSpecification& spec) const { using TSizeSizePrQuantileUMap = CBucketCountStatistics::TSizeSizePrQuantileUMap; using TSizeSizePrQuantileUMapCItr = TSizeSizePrQuantileUMap::const_iterator; using TSizeSizePrQuantileUMapCPtrVec = std::vector; - using TSizeSizePrMomentsUMapCPtrVec = std::vector; + using TSizeSizePrMomentsUMapCPtrVec = + std::vector; using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; using TMeanAccumulatorVec = std::vector; if (const CDataCountStatistics* stats = spec.countStatistics()) { - const CAutoconfigurerParams::TTimeVec& candidates = this->params().candidateBucketLengths(); + const CAutoconfigurerParams::TTimeVec& candidates = + this->params().candidateBucketLengths(); TSizeSizePrQuantileUMapCPtrVec quantiles; quantiles.reserve(candidates.size()); @@ -95,8 +102,10 @@ void CSparseCountPenalty::penaltyFromMe(CDetectorSpecification& spec) const { for (std::size_t bid = 0u; bid < candidates.size(); ++bid) { if (stats->bucketCounts()[bid] > MINIMUM_BUCKETS_TO_TEST) { - quantiles.push_back(&(stats->bucketStatistics()[bid].countQuantilesPerPartition())); - moments.push_back(&(stats->bucketStatistics()[bid].countMomentsPerPartition())); + quantiles.push_back( + &(stats->bucketStatistics()[bid].countQuantilesPerPartition())); + moments.push_back( + &(stats->bucketStatistics()[bid].countMomentsPerPartition())); longest = std::max(longest, candidates[bid]); } } @@ -112,12 +121,15 @@ void CSparseCountPenalty::penaltyFromMe(CDetectorSpecification& spec) const { TMeanAccumulatorVec penalties_(nb - 1); maths::CQuantileSketch placeholder(maths::CQuantileSketch::E_Linear, 1); - for (TSizeSizePrQuantileUMapCItr q0 = quantiles[0]->begin(); q0 != quantiles[0]->end(); ++q0) { + for (TSizeSizePrQuantileUMapCItr q0 = quantiles[0]->begin(); + q0 != quantiles[0]->end(); ++q0) { const CBucketCountStatistics::TSizeSizePr& partition = q0->first; uint64_t bc = stats->bucketCounts()[0]; - const maths::CQuantileSketch& qe0 = correctForEmptyBuckets(IGNORE_EMPTY[iid], bc, placeholder, q0->second); - const CBucketCountStatistics::TMoments& m0 = moments[0]->find(partition)->second; + const maths::CQuantileSketch& qe0 = correctForEmptyBuckets( + IGNORE_EMPTY[iid], bc, placeholder, q0->second); + const CBucketCountStatistics::TMoments& m0 = + moments[0]->find(partition)->second; double me0 = correctForEmptyBuckets(IGNORE_EMPTY[iid], bc, m0); extract(qe0, nq, xq[0]); means[0] = me0; @@ -132,8 +144,10 @@ void CSparseCountPenalty::penaltyFromMe(CDetectorSpecification& spec) const { } bc = stats->bucketCounts()[bid]; - const maths::CQuantileSketch& qei = correctForEmptyBuckets(IGNORE_EMPTY[iid], bc, placeholder, qi->second); - const CBucketCountStatistics::TMoments& mi = moments[bid]->find(partition)->second; + const maths::CQuantileSketch& qei = correctForEmptyBuckets( + IGNORE_EMPTY[iid], bc, placeholder, qi->second); + const CBucketCountStatistics::TMoments& mi = + moments[bid]->find(partition)->second; double mei = correctForEmptyBuckets(IGNORE_EMPTY[iid], bc, mi); extract(qei, nq, xq[bid]); means[bid] = mei; @@ -146,7 +160,9 @@ void CSparseCountPenalty::penaltyFromMe(CDetectorSpecification& spec) const { std::fill_n(significances.begin(), nb - 1, 0.0); for (std::size_t i = 0u; i < 2; ++i) { for (std::size_t bid = 0u; bid + 1 < nb; ++bid) { - significances[bid] = std::max(significances[bid], maths::CStatisticalTests::twoSampleKS(xq[bid], xq[nb - 1])); + significances[bid] = std::max( + significances[bid], maths::CStatisticalTests::twoSampleKS( + xq[bid], xq[nb - 1])); } // If the rate is high w.r.t. the bucket length we expect the mean and variance @@ -161,16 +177,19 @@ void CSparseCountPenalty::penaltyFromMe(CDetectorSpecification& spec) const { if (longest == candidates[bid]) { continue; } - double scale = static_cast(longest) / static_cast(candidates[bid]); + double scale = static_cast(longest) / + static_cast(candidates[bid]); for (std::size_t j = 0u; j < xq[bid].size(); ++j) { - xq[bid][j] = scale * means[bid] + std::sqrt(scale) * (xq[bid][j] - means[bid]); + xq[bid][j] = scale * means[bid] + + std::sqrt(scale) * (xq[bid][j] - means[bid]); } } } for (std::size_t bid = 0u; bid + 1 < nb; ++bid) { double pi = std::min(10.0 * significances[bid], 1.0); - penalties_[bid].add(std::min(maths::CTools::fastLog(pi), 0.0), counts[bid]); + penalties_[bid].add(std::min(maths::CTools::fastLog(pi), 0.0), + counts[bid]); } } @@ -182,9 +201,11 @@ void CSparseCountPenalty::penaltyFromMe(CDetectorSpecification& spec) const { descriptions.reserve(2 * (nb - 1)); for (std::size_t bid = 0u; bid < penalties_.size(); ++bid) { - std::size_t index = this->params().penaltyIndexFor(bid, IGNORE_EMPTY[iid]); + std::size_t index = + this->params().penaltyIndexFor(bid, IGNORE_EMPTY[iid]); indices.push_back(index); - double penalty = std::exp(maths::CBasicStatistics::mean(penalties_[bid])); + double penalty = + std::exp(maths::CBasicStatistics::mean(penalties_[bid])); std::string description; if (penalty < 1.0) { description = "The bucket length does not properly capture the variation in event rate"; diff --git a/lib/config/CTooMuchDataPenalty.cc b/lib/config/CTooMuchDataPenalty.cc index 18859696f0..69632bec88 100644 --- a/lib/config/CTooMuchDataPenalty.cc +++ b/lib/config/CTooMuchDataPenalty.cc @@ -25,25 +25,31 @@ namespace { using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; //! Get the description prefix. -std::string descriptionPrefix(const CDetectorSpecification& spec, const TMeanAccumulator& meanOccupied, std::size_t partitions) { +std::string descriptionPrefix(const CDetectorSpecification& spec, + const TMeanAccumulator& meanOccupied, + std::size_t partitions) { if (spec.byField() && spec.partitionField()) { return "A significant proportion, " + - CTools::prettyPrint(100.0 * maths::CBasicStatistics::count(meanOccupied) / static_cast(partitions)) + + CTools::prettyPrint(100.0 * maths::CBasicStatistics::count(meanOccupied) / + static_cast(partitions)) + "%, of distinct partition and by fields combinations have values in many buckets."; } else if (spec.byField()) { return "A significant proportion, " + - CTools::prettyPrint(100.0 * maths::CBasicStatistics::count(meanOccupied) / static_cast(partitions)) + + CTools::prettyPrint(100.0 * maths::CBasicStatistics::count(meanOccupied) / + static_cast(partitions)) + "%, of distinct by fields have values in many buckets."; } else if (spec.partitionField()) { return "A significant proportion, " + - CTools::prettyPrint(100.0 * maths::CBasicStatistics::count(meanOccupied) / static_cast(partitions)) + + CTools::prettyPrint(100.0 * maths::CBasicStatistics::count(meanOccupied) / + static_cast(partitions)) + "%, of distinct partition fields have values in many buckets."; } return ""; } } -CTooMuchDataPenalty::CTooMuchDataPenalty(const CAutoconfigurerParams& params) : CPenalty(params) { +CTooMuchDataPenalty::CTooMuchDataPenalty(const CAutoconfigurerParams& params) + : CPenalty(params) { } CTooMuchDataPenalty* CTooMuchDataPenalty::clone() const { @@ -60,24 +66,29 @@ void CTooMuchDataPenalty::penaltyFromMe(CDetectorSpecification& spec) const { dynamic_cast(spec.countStatistics())) { this->penaltyFor(*partitionStats, spec); } else if (const CByAndPartitionDataCountStatistics* byAndPartitionStats = - dynamic_cast(spec.countStatistics())) { + dynamic_cast( + spec.countStatistics())) { this->penaltyFor(*byAndPartitionStats, spec); } else if (const CByOverAndPartitionDataCountStatistics* byOverAndPartitionStats = - dynamic_cast(spec.countStatistics())) { + dynamic_cast( + spec.countStatistics())) { this->penaltyFor(*byOverAndPartitionStats, spec); } } } -void CTooMuchDataPenalty::penaltyFor(const CPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const { +void CTooMuchDataPenalty::penaltyFor(const CPartitionDataCountStatistics& stats, + CDetectorSpecification& spec) const { this->penaltyFor(stats.bucketCounts(), stats.bucketStatistics(), spec); } -void CTooMuchDataPenalty::penaltyFor(const CByAndPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const { +void CTooMuchDataPenalty::penaltyFor(const CByAndPartitionDataCountStatistics& stats, + CDetectorSpecification& spec) const { this->penaltyFor(stats.bucketCounts(), stats.bucketStatistics(), spec); } -void CTooMuchDataPenalty::penaltyFor(const CByOverAndPartitionDataCountStatistics& stats, CDetectorSpecification& spec) const { +void CTooMuchDataPenalty::penaltyFor(const CByOverAndPartitionDataCountStatistics& stats, + CDetectorSpecification& spec) const { this->penaltyFor(stats.bucketCounts(), stats.bucketStatistics(), spec); } @@ -86,7 +97,8 @@ void CTooMuchDataPenalty::penaltyFor(const TUInt64Vec& bucketCounts, CDetectorSpecification& spec) const { using TSizeSizePrMomentsUMapCItr = CBucketCountStatistics::TSizeSizePrMomentsUMap::const_iterator; - const CAutoconfigurerParams::TTimeVec& candidates = this->params().candidateBucketLengths(); + const CAutoconfigurerParams::TTimeVec& candidates = + this->params().candidateBucketLengths(); LOG_TRACE(<< "bucket counts = " << core::CContainerPrinter::print(bucketCounts)); @@ -103,39 +115,48 @@ void CTooMuchDataPenalty::penaltyFor(const TUInt64Vec& bucketCounts, uint64_t bc = bucketCounts[bid]; if (bc > 0) { const CBucketCountStatistics& si = statistics[bid]; - const CBucketCountStatistics::TSizeSizePrMomentsUMap& mi = si.countMomentsPerPartition(); + const CBucketCountStatistics::TSizeSizePrMomentsUMap& mi = + si.countMomentsPerPartition(); TMeanAccumulator penalty_; TMeanAccumulator penalizedOccupancy; for (TSizeSizePrMomentsUMapCItr j = mi.begin(); j != mi.end(); ++j) { - double occupied = maths::CBasicStatistics::count(j->second) / static_cast(bc); - double penalty = CTools::logInterpolate(this->params().highPopulatedBucketFraction(function, true), - this->params().maximumPopulatedBucketFraction(function, true), - 1.0, - 1.0 / static_cast(bucketCounts[bid]), - occupied); + double occupied = maths::CBasicStatistics::count(j->second) / + static_cast(bc); + double penalty = CTools::logInterpolate( + this->params().highPopulatedBucketFraction(function, true), + this->params().maximumPopulatedBucketFraction(function, true), + 1.0, 1.0 / static_cast(bucketCounts[bid]), occupied); penalty_.add(maths::CTools::fastLog(penalty)); if (penalty < 1.0) { penalizedOccupancy.add(occupied); } } - if (maths::CBasicStatistics::count(penalizedOccupancy) > 0.95 * static_cast(mi.size())) { - double penalty = std::min(std::exp(maths::CBasicStatistics::mean(penalty_)), 1.0); + if (maths::CBasicStatistics::count(penalizedOccupancy) > + 0.95 * static_cast(mi.size())) { + double penalty = + std::min(std::exp(maths::CBasicStatistics::mean(penalty_)), 1.0); std::size_t index = this->params().penaltyIndexFor(bid, true); indices.push_back(index); penalties.push_back(penalty); descriptions.push_back(""); if (penalty < 1.0) { if (spec.byField() || spec.partitionField()) { - descriptions.back() = descriptionPrefix(spec, penalizedOccupancy, mi.size()) + " On average, " + - CTools::prettyPrint(100.0 * maths::CBasicStatistics::mean(penalizedOccupancy)) + - "% of their buckets have a value"; + descriptions.back() = + descriptionPrefix(spec, penalizedOccupancy, mi.size()) + + " On average, " + + CTools::prettyPrint(100.0 * maths::CBasicStatistics::mean( + penalizedOccupancy)) + + "% of their buckets have a value"; } else { - descriptions.back() = "A significant proportion, " + - CTools::prettyPrint(100.0 * maths::CBasicStatistics::mean(penalizedOccupancy)) + "%, of " + - CTools::prettyPrint(candidates[bid]) + " buckets have a value"; + descriptions.back() = + "A significant proportion, " + + CTools::prettyPrint(100.0 * maths::CBasicStatistics::mean( + penalizedOccupancy)) + + "%, of " + CTools::prettyPrint(candidates[bid]) + + " buckets have a value"; } } } diff --git a/lib/config/CTools.cc b/lib/config/CTools.cc index 756d64ba39..936a49600a 100644 --- a/lib/config/CTools.cc +++ b/lib/config/CTools.cc @@ -37,18 +37,21 @@ std::size_t CTools::category64(const std::string& value) { } double CTools::interpolate(double a, double b, double pa, double pb, double x) { - return maths::CTools::truncate(pa + (pb - pa) * (x - a) / (b - a), std::min(pa, pb), std::max(pa, pb)); + return maths::CTools::truncate(pa + (pb - pa) * (x - a) / (b - a), + std::min(pa, pb), std::max(pa, pb)); } double CTools::powInterpolate(double p, double a, double b, double pa, double pb, double x) { - return maths::CTools::truncate(pa + (pb - pa) * std::pow((x - a) / (b - a), p), std::min(pa, pb), std::max(pa, pb)); + return maths::CTools::truncate(pa + (pb - pa) * std::pow((x - a) / (b - a), p), + std::min(pa, pb), std::max(pa, pb)); } double CTools::logInterpolate(double a, double b, double pa, double pb, double x) { double la = maths::CTools::fastLog(a); double lb = maths::CTools::fastLog(b); double lx = maths::CTools::fastLog(x); - return maths::CTools::truncate(pa + (pb - pa) * (lx - la) / (lb - la), std::min(pa, pb), std::max(pa, pb)); + return maths::CTools::truncate(pa + (pb - pa) * (lx - la) / (lb - la), + std::min(pa, pb), std::max(pa, pb)); } std::string CTools::prettyPrint(double d) { @@ -68,7 +71,9 @@ std::string CTools::prettyPrint(double d) { } else if (std::fabs(d) < 1e13) { std::sprintf(buf, "%.0f", d); char* end = std::find(buf, buf + 20, '\0'); - for (char *pos = end; pos - buf > 3 && std::isdigit(static_cast(pos[-4])); pos -= 3, ++end) { + for (char *pos = end; + pos - buf > 3 && std::isdigit(static_cast(pos[-4])); + pos -= 3, ++end) { std::copy_backward(pos - 3, end, end + 1); pos[-3] = ','; } @@ -83,10 +88,12 @@ std::string CTools::prettyPrint(core_t::TTime time) { static const char* SUFFIXES[] = {" week", " day", " hr", " min", " sec"}; std::string result; - core_t::TTime intervals[] = {(time / 604800), (time / 86400) % 7, (time / 3600) % 24, (time / 60) % 60, time % 60}; + core_t::TTime intervals[] = {(time / 604800), (time / 86400) % 7, + (time / 3600) % 24, (time / 60) % 60, time % 60}; for (std::size_t i = 0u; i < boost::size(intervals); ++i) { if (intervals[i] != 0) { - result += (result.empty() ? "" : " ") + core::CStringUtils::typeToString(intervals[i]) + SUFFIXES[i]; + result += (result.empty() ? "" : " ") + + core::CStringUtils::typeToString(intervals[i]) + SUFFIXES[i]; } } return result; diff --git a/lib/config/ConfigTypes.cc b/lib/config/ConfigTypes.cc index d038620a8c..9dd9eb2efc 100644 --- a/lib/config/ConfigTypes.cc +++ b/lib/config/ConfigTypes.cc @@ -12,7 +12,8 @@ namespace ml { namespace config_t { namespace { -const std::string USER_DATA_TYPE_NAMES[] = {std::string("categorical"), std::string("numeric")}; +const std::string USER_DATA_TYPE_NAMES[] = {std::string("categorical"), + std::string("numeric")}; const std::string DATA_TYPE_NAMES[] = {std::string(""), std::string("binary"), @@ -33,11 +34,13 @@ const std::string FUNCTION_CATEGORY_NAMES[] = {std::string("count"), std::string("varp"), std::string("median")}; -const std::string IGNORE_EMPTY_VERSION_NAMES[][2] = {{std::string("n/a"), std::string("n/a")}, - {std::string("count"), std::string("non_zero_count")}, - {std::string("sum"), std::string("non_null_sum")}}; +const std::string IGNORE_EMPTY_VERSION_NAMES[][2] = { + {std::string("n/a"), std::string("n/a")}, + {std::string("count"), std::string("non_zero_count")}, + {std::string("sum"), std::string("non_null_sum")}}; -const std::string SIDE_NAME[] = {std::string("high"), std::string("low"), std::string("both"), std::string("")}; +const std::string SIDE_NAME[] = {std::string("high"), std::string("low"), + std::string("both"), std::string("")}; } const std::string& print(EUserDataType type) { @@ -227,7 +230,8 @@ bool hasDoAndDontIgnoreEmptyVersions(EFunctionCategory function) { return true; } -const std::string& ignoreEmptyVersionName(EFunctionCategory function, bool ignoreEmpty, bool isPopulation) { +const std::string& +ignoreEmptyVersionName(EFunctionCategory function, bool ignoreEmpty, bool isPopulation) { std::size_t index = 0u; switch (function) { case E_Count: diff --git a/lib/config/Constants.cc b/lib/config/Constants.cc index 75ef61946f..4b85fea4b7 100644 --- a/lib/config/Constants.cc +++ b/lib/config/Constants.cc @@ -11,7 +11,8 @@ namespace config { namespace constants { namespace { -const std::string FIELD_NAME[] = {std::string("argument"), std::string("by"), std::string("over"), std::string("partition")}; +const std::string FIELD_NAME[] = {std::string("argument"), std::string("by"), + std::string("over"), std::string("partition")}; } const std::size_t CFieldIndices::PARTITIONING[] = {BY_INDEX, OVER_INDEX, PARTITION_INDEX}; diff --git a/lib/config/unittest/CAutoconfigurerParamsTest.cc b/lib/config/unittest/CAutoconfigurerParamsTest.cc index a7debce134..0725742721 100644 --- a/lib/config/unittest/CAutoconfigurerParamsTest.cc +++ b/lib/config/unittest/CAutoconfigurerParamsTest.cc @@ -78,49 +78,50 @@ void CAutoconfigurerParamsTest::testInit() { params.init("testfiles/parameters.conf"); std::string actual = params.print(); - std::string expected = " TimeFieldName = time\n" - " TimeFieldFormat = \n" - " FieldsOfInterest = [performance_metric, performance_metric_name, machine, user, region, program]\n" - " FieldsToUseInAutoconfigureByRole[constants::ARGUMENT_INDEX] = \"null\"\n" - " FieldsToUseInAutoconfigureByRole[constants::BY_INDEX] = [performance_metric_name, program]\n" - " FieldsToUseInAutoconfigureByRole[constants::OVER_INDEX] = [user]\n" - " FieldsToUseInAutoconfigureByRole[constants::PARTITION_INDEX] = [user, machine, region]\n" - " FunctionCategoriesToConfigure = [count, min, max, median]\n" - " FieldDataType = [(machine,categorical), (performance_metric,numeric)]\n" - " MinimumExamplesToClassify = 50\n" - " NumberOfMostFrequentFieldsCounts = 20\n" - " MinimumRecordsToAttemptConfig = 200\n" - " HighNumberByFieldValues = 50\n" - " MaximumNumberByFieldValues = 5000\n" - " HighNumberRareByFieldValues = 10000\n" - " MaximumNumberRareByFieldValues = 100000\n" - " HighNumberPartitionFieldValues = 1000\n" - " MaximumNumberPartitionFieldValues = 100000\n" - " LowNumberOverFieldValues = 80\n" - " MinimumNumberOverFieldValues = 20\n" - " HighCardinalityInTailFactor = 1.030000\n" - " HighCardinalityInTailIncrement = 2\n" - " HighCardinalityHighTailFraction = 0.310000\n" - " HighCardinalityMaximumTailFraction = 0.620000\n" - " LowPopulatedBucketFractions = [0.35, 0.12]\n" - " MinimumPopulatedBucketFractions = [0.11, 0.042]\n" - " HighPopulatedBucketFractions[1] = 0.100000\n" - " MaximumPopulatedBucketFractions[1] = 0.500000\n" - " CandidateBucketLengths = [1, 60, 600, 1800, 7200]\n" - " LowNumberOfBucketsForConfig = 30.000000\n" - " MinimumNumberOfBucketsForConfig = 8.000000\n" - " PolledDataMinimumMassAtInterval = 0.890000\n" - " PolledDataJitter = 0.030000\n" - " LowCoefficientOfVariation = 0.003000\n" - " MinimumCoefficientOfVariation = 0.000200\n" - " LowLengthRangeForInfoContent = 10.000000\n" - " MinimumLengthRangeForInfoContent = 1.000000\n" - " LowMaximumLengthForInfoContent = 25.000000\n" - " MinimumMaximumLengthForInfoContent = 5.000000\n" - " LowEntropyForInfoContent = 0.010000\n" - " MinimumEntropyForInfoContent = 0.000001\n" - " LowDistinctCountForInfoContent = 500000.000000\n" - " MinimumDistinctCountForInfoContent = 5000.000000\n"; + std::string expected = + " TimeFieldName = time\n" + " TimeFieldFormat = \n" + " FieldsOfInterest = [performance_metric, performance_metric_name, machine, user, region, program]\n" + " FieldsToUseInAutoconfigureByRole[constants::ARGUMENT_INDEX] = \"null\"\n" + " FieldsToUseInAutoconfigureByRole[constants::BY_INDEX] = [performance_metric_name, program]\n" + " FieldsToUseInAutoconfigureByRole[constants::OVER_INDEX] = [user]\n" + " FieldsToUseInAutoconfigureByRole[constants::PARTITION_INDEX] = [user, machine, region]\n" + " FunctionCategoriesToConfigure = [count, min, max, median]\n" + " FieldDataType = [(machine,categorical), (performance_metric,numeric)]\n" + " MinimumExamplesToClassify = 50\n" + " NumberOfMostFrequentFieldsCounts = 20\n" + " MinimumRecordsToAttemptConfig = 200\n" + " HighNumberByFieldValues = 50\n" + " MaximumNumberByFieldValues = 5000\n" + " HighNumberRareByFieldValues = 10000\n" + " MaximumNumberRareByFieldValues = 100000\n" + " HighNumberPartitionFieldValues = 1000\n" + " MaximumNumberPartitionFieldValues = 100000\n" + " LowNumberOverFieldValues = 80\n" + " MinimumNumberOverFieldValues = 20\n" + " HighCardinalityInTailFactor = 1.030000\n" + " HighCardinalityInTailIncrement = 2\n" + " HighCardinalityHighTailFraction = 0.310000\n" + " HighCardinalityMaximumTailFraction = 0.620000\n" + " LowPopulatedBucketFractions = [0.35, 0.12]\n" + " MinimumPopulatedBucketFractions = [0.11, 0.042]\n" + " HighPopulatedBucketFractions[1] = 0.100000\n" + " MaximumPopulatedBucketFractions[1] = 0.500000\n" + " CandidateBucketLengths = [1, 60, 600, 1800, 7200]\n" + " LowNumberOfBucketsForConfig = 30.000000\n" + " MinimumNumberOfBucketsForConfig = 8.000000\n" + " PolledDataMinimumMassAtInterval = 0.890000\n" + " PolledDataJitter = 0.030000\n" + " LowCoefficientOfVariation = 0.003000\n" + " MinimumCoefficientOfVariation = 0.000200\n" + " LowLengthRangeForInfoContent = 10.000000\n" + " MinimumLengthRangeForInfoContent = 1.000000\n" + " LowMaximumLengthForInfoContent = 25.000000\n" + " MinimumMaximumLengthForInfoContent = 5.000000\n" + " LowEntropyForInfoContent = 0.010000\n" + " MinimumEntropyForInfoContent = 0.000001\n" + " LowDistinctCountForInfoContent = 500000.000000\n" + " MinimumDistinctCountForInfoContent = 5000.000000\n"; LOG_DEBUG(<< "parameters =\n" << actual); CPPUNIT_ASSERT_EQUAL(expected, actual); @@ -133,10 +134,10 @@ void CAutoconfigurerParamsTest::testInit() { CppUnit::Test* CAutoconfigurerParamsTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CAutoconfigurerParamsTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CAutoconfigurerParamsTest::testDefaults", - &CAutoconfigurerParamsTest::testDefaults)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CAutoconfigurerParamsTest::testInit", &CAutoconfigurerParamsTest::testInit)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CAutoconfigurerParamsTest::testDefaults", &CAutoconfigurerParamsTest::testDefaults)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CAutoconfigurerParamsTest::testInit", &CAutoconfigurerParamsTest::testInit)); return suiteOfTests; } diff --git a/lib/config/unittest/CDataSemanticsTest.cc b/lib/config/unittest/CDataSemanticsTest.cc index 17372c452f..15c2478ada 100644 --- a/lib/config/unittest/CDataSemanticsTest.cc +++ b/lib/config/unittest/CDataSemanticsTest.cc @@ -85,19 +85,23 @@ void CDataSemanticsTest::testNumericCategorical() { // Test plausible http status code distribution is correctly // identified as categorical. - double codes[] = {200, 201, 202, 303, 400, 403, 404, 500, 501, 503, 506, 598, 599}; - double frequencies[] = {0.7715, 0.03, 0.05, 0.001, 0.005, 0.041, 0.061, 0.002, 0.0005, 0.021, 0.001, 0.002, 0.014}; + double codes[] = {200, 201, 202, 303, 400, 403, 404, + 500, 501, 503, 506, 598, 599}; + double frequencies[] = {0.7715, 0.03, 0.05, 0.001, 0.005, 0.041, 0.061, + 0.002, 0.0005, 0.021, 0.001, 0.002, 0.014}; test::CRandomNumbers rng; TDoubleVec status; rng.generateMultinomialSamples( - TDoubleVec(boost::begin(codes), boost::end(codes)), TDoubleVec(boost::begin(frequencies), boost::end(frequencies)), 5000, status); + TDoubleVec(boost::begin(codes), boost::end(codes)), + TDoubleVec(boost::begin(frequencies), boost::end(frequencies)), 5000, status); config::CDataSemantics semantics; for (std::size_t i = 0u; i < status.size(); ++i) { - semantics.add(core::CStringUtils::typeToString(static_cast(status[i]))); + semantics.add( + core::CStringUtils::typeToString(static_cast(status[i]))); } semantics.computeType(); LOG_DEBUG(<< "type = " << semantics.type()); @@ -297,13 +301,17 @@ void CDataSemanticsTest::testReal() { CppUnit::Test* CDataSemanticsTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CDataSemanticsTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CDataSemanticsTest::testBinary", &CDataSemanticsTest::testBinary)); - suiteOfTests->addTest(new CppUnit::TestCaller("CDataSemanticsTest::testNonNumericCategorical", - &CDataSemanticsTest::testNonNumericCategorical)); - suiteOfTests->addTest(new CppUnit::TestCaller("CDataSemanticsTest::testNumericCategorical", - &CDataSemanticsTest::testNumericCategorical)); - suiteOfTests->addTest(new CppUnit::TestCaller("CDataSemanticsTest::testInteger", &CDataSemanticsTest::testInteger)); - suiteOfTests->addTest(new CppUnit::TestCaller("CDataSemanticsTest::testReal", &CDataSemanticsTest::testReal)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDataSemanticsTest::testBinary", &CDataSemanticsTest::testBinary)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDataSemanticsTest::testNonNumericCategorical", + &CDataSemanticsTest::testNonNumericCategorical)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDataSemanticsTest::testNumericCategorical", &CDataSemanticsTest::testNumericCategorical)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDataSemanticsTest::testInteger", &CDataSemanticsTest::testInteger)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDataSemanticsTest::testReal", &CDataSemanticsTest::testReal)); return suiteOfTests; } diff --git a/lib/config/unittest/CDataSummaryStatisticsTest.cc b/lib/config/unittest/CDataSummaryStatisticsTest.cc index 5ccf8a8127..48d7aa27a7 100644 --- a/lib/config/unittest/CDataSummaryStatisticsTest.cc +++ b/lib/config/unittest/CDataSummaryStatisticsTest.cc @@ -53,9 +53,11 @@ void CDataSummaryStatisticsTest::testRate() { summary.add(static_cast(times[j])); } - LOG_DEBUG(<< "earliest = " << summary.earliest() << ", latest = " << summary.latest()); + LOG_DEBUG(<< "earliest = " << summary.earliest() + << ", latest = " << summary.latest()); LOG_DEBUG(<< "rate = " << summary.meanRate()); - CPPUNIT_ASSERT_DOUBLES_EQUAL(rate[i], summary.meanRate(), 2.0 * rate[i] * rate[i] / n); + CPPUNIT_ASSERT_DOUBLES_EQUAL(rate[i], summary.meanRate(), + 2.0 * rate[i] * rate[i] / n); } } @@ -82,7 +84,8 @@ void CDataSummaryStatisticsTest::testCategoricalDistinctCount() { summary.add(static_cast(j), categories[j]); } - LOG_DEBUG(<< "# categories = " << categories.size() << ", distinct count = " << summary.distinctCount()); + LOG_DEBUG(<< "# categories = " << categories.size() + << ", distinct count = " << summary.distinctCount()); } } @@ -91,11 +94,13 @@ void CDataSummaryStatisticsTest::testCategoricalDistinctCount() { config::CCategoricalDataSummaryStatistics summary(100); for (std::size_t i = 0u; i < 1000000; ++i) { - summary.add(static_cast(i), core::CStringUtils::typeToString(i)); + summary.add(static_cast(i), + core::CStringUtils::typeToString(i)); } LOG_DEBUG(<< "# categories = 1000000, distinct count = " << summary.distinctCount()); - CPPUNIT_ASSERT_DOUBLES_EQUAL(1000000.0, static_cast(summary.distinctCount()), 0.005 * 1000000.0); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + 1000000.0, static_cast(summary.distinctCount()), 0.005 * 1000000.0); } } @@ -125,14 +130,16 @@ void CDataSummaryStatisticsTest::testCategoricalTopN() { rng.generateUniformSamples(0.0, 1.0, 1, p); if (p[0] < 0.05) { - std::size_t b = std::upper_bound(boost::begin(freq), boost::end(freq), j / 2000) - boost::begin(freq); + std::size_t b = std::upper_bound(boost::begin(freq), boost::end(freq), j / 2000) - + boost::begin(freq); rng.generateUniformSamples(0, b, 1, index); index[0] = freq[index[0]]; } else { rng.generateUniformSamples(0, categories.size(), 1, index); } - const std::size_t* f = std::lower_bound(boost::begin(freq), boost::end(freq), index[0]); + const std::size_t* f = + std::lower_bound(boost::begin(freq), boost::end(freq), index[0]); if (f != boost::end(freq) && *f == index[0]) { ++counts[f - boost::begin(freq)]; } @@ -146,8 +153,10 @@ void CDataSummaryStatisticsTest::testCategoricalTopN() { TMeanAccumulator meanError; for (std::size_t i = 0u; i < boost::size(freq); ++i) { LOG_DEBUG(<< ""); - LOG_DEBUG(<< "actual: " << categories[freq[i]] << " appeared " << counts[i] << " times"); - LOG_DEBUG(<< "estimated: " << topn[i].first << " appeared " << topn[i].second << " times"); + LOG_DEBUG(<< "actual: " << categories[freq[i]] << " appeared " + << counts[i] << " times"); + LOG_DEBUG(<< "estimated: " << topn[i].first << " appeared " + << topn[i].second << " times"); double exact = static_cast(counts[i]); double approx = static_cast(topn[i].second); @@ -195,7 +204,8 @@ void CDataSummaryStatisticsTest::testNumericBasicStatistics() { config::CNumericDataSummaryStatistics summary(false); for (std::size_t j = 0u; j < samples.size(); ++j) { - summary.add(static_cast(j), core::CStringUtils::typeToString(samples[j])); + summary.add(static_cast(j), + core::CStringUtils::typeToString(samples[j])); } LOG_DEBUG(<< "minimum = " << summary.minimum()); @@ -223,13 +233,15 @@ void CDataSummaryStatisticsTest::testNumericBasicStatistics() { config::CNumericDataSummaryStatistics summary(false); for (std::size_t j = 0u; j < samples.size(); ++j) { - summary.add(static_cast(j), core::CStringUtils::typeToString(samples[j])); + summary.add(static_cast(j), + core::CStringUtils::typeToString(samples[j])); } LOG_DEBUG(<< "median = " << summary.median()); CPPUNIT_ASSERT(std::fabs(summary.median() - boost::math::median(lognormal)) < 0.25); - meanError.add(std::fabs(summary.median() - boost::math::median(lognormal)) / boost::math::median(lognormal)); + meanError.add(std::fabs(summary.median() - boost::math::median(lognormal)) / + boost::math::median(lognormal)); } LOG_DEBUG(<< "mean error = " << maths::CBasicStatistics::mean(meanError)); @@ -256,7 +268,8 @@ void CDataSummaryStatisticsTest::testNumericDistribution() { config::CNumericDataSummaryStatistics statistics(false); for (std::size_t i = 0u; i < samples.size(); ++i) { - statistics.add(static_cast(i), core::CStringUtils::typeToString(samples[i])); + statistics.add(static_cast(i), + core::CStringUtils::typeToString(samples[i])); } config::CNumericDataSummaryStatistics::TDoubleDoublePrVec chart; @@ -271,14 +284,17 @@ void CDataSummaryStatisticsTest::testNumericDistribution() { } double fexpected = boost::math::pdf(d, std::max(chart[i].first, 0.0)); double f = chart[i].second; - LOG_DEBUG(<< "x = " << chart[i].first << ", fexpected(x) = " << fexpected << ", f(x) = " << f); + LOG_DEBUG(<< "x = " << chart[i].first + << ", fexpected(x) = " << fexpected << ", f(x) = " << f); meanAbsError.add(std::fabs(f - fexpected)); mean.add(fexpected); } LOG_DEBUG(<< "meanAbsError = " << maths::CBasicStatistics::mean(meanAbsError)); LOG_DEBUG(<< "mean = " << maths::CBasicStatistics::mean(mean)); - CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanAbsError) / maths::CBasicStatistics::mean(mean) < 0.3); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanAbsError) / + maths::CBasicStatistics::mean(mean) < + 0.3); } { @@ -305,7 +321,8 @@ void CDataSummaryStatisticsTest::testNumericDistribution() { config::CNumericDataSummaryStatistics statistics(false); for (std::size_t i = 0u; i < samples.size(); ++i) { - statistics.add(static_cast(i), core::CStringUtils::typeToString(samples[i])); + statistics.add(static_cast(i), + core::CStringUtils::typeToString(samples[i])); } config::CNumericDataSummaryStatistics::TDoubleDoublePrVec chart; @@ -315,12 +332,16 @@ void CDataSummaryStatisticsTest::testNumericDistribution() { TMeanAccumulator meanRelError; for (std::size_t i = 0u; i < chart.size(); ++i) { - double fexpected = weights[0] * boost::math::pdf(m0, chart[i].first) + weights[1] * boost::math::pdf(m1, chart[i].first) + - weights[2] * boost::math::pdf(m2, chart[i].first) + weights[3] * boost::math::pdf(m3, chart[i].first); + double fexpected = weights[0] * boost::math::pdf(m0, chart[i].first) + + weights[1] * boost::math::pdf(m1, chart[i].first) + + weights[2] * boost::math::pdf(m2, chart[i].first) + + weights[3] * boost::math::pdf(m3, chart[i].first); double f = chart[i].second; - LOG_DEBUG(<< "x = " << chart[i].first << ", fexpected(x) = " << fexpected << ", f(x) = " << f); + LOG_DEBUG(<< "x = " << chart[i].first + << ", fexpected(x) = " << fexpected << ", f(x) = " << f); meanAbsError.add(std::fabs(f - fexpected)); - meanRelError.add(std::fabs(std::log(f) - std::log(fexpected)) / std::fabs(std::log(fexpected))); + meanRelError.add(std::fabs(std::log(f) - std::log(fexpected)) / + std::fabs(std::log(fexpected))); } LOG_DEBUG(<< "meanAbsError = " << maths::CBasicStatistics::mean(meanAbsError)); @@ -333,16 +354,20 @@ void CDataSummaryStatisticsTest::testNumericDistribution() { CppUnit::Test* CDataSummaryStatisticsTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CDataSummaryStatisticsTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CDataSummaryStatisticsTest::testRate", &CDataSummaryStatisticsTest::testRate)); - suiteOfTests->addTest(new CppUnit::TestCaller("CDataSummaryStatisticsTest::testCategoricalDistinctCount", - &CDataSummaryStatisticsTest::testCategoricalDistinctCount)); - suiteOfTests->addTest(new CppUnit::TestCaller("CDataSummaryStatisticsTest::testCategoricalTopN", - &CDataSummaryStatisticsTest::testCategoricalTopN)); - suiteOfTests->addTest(new CppUnit::TestCaller("CDataSummaryStatisticsTest::testNumericBasicStatistics", - &CDataSummaryStatisticsTest::testNumericBasicStatistics)); - suiteOfTests->addTest(new CppUnit::TestCaller("CDataSummaryStatisticsTest::testNumericDistribution", - &CDataSummaryStatisticsTest::testNumericDistribution)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDataSummaryStatisticsTest::testRate", &CDataSummaryStatisticsTest::testRate)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDataSummaryStatisticsTest::testCategoricalDistinctCount", + &CDataSummaryStatisticsTest::testCategoricalDistinctCount)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDataSummaryStatisticsTest::testCategoricalTopN", + &CDataSummaryStatisticsTest::testCategoricalTopN)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDataSummaryStatisticsTest::testNumericBasicStatistics", + &CDataSummaryStatisticsTest::testNumericBasicStatistics)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDataSummaryStatisticsTest::testNumericDistribution", + &CDataSummaryStatisticsTest::testNumericDistribution)); return suiteOfTests; } diff --git a/lib/config/unittest/CDetectorEnumeratorTest.cc b/lib/config/unittest/CDetectorEnumeratorTest.cc index edd6edbc6a..d77063b8d2 100644 --- a/lib/config/unittest/CDetectorEnumeratorTest.cc +++ b/lib/config/unittest/CDetectorEnumeratorTest.cc @@ -17,7 +17,8 @@ using namespace ml; namespace { -std::string print(const config::CDetectorEnumerator::TDetectorSpecificationVec& spec, const std::string& indent = std::string()) { +std::string print(const config::CDetectorEnumerator::TDetectorSpecificationVec& spec, + const std::string& indent = std::string()) { std::ostringstream result; for (std::size_t i = 0u; i < spec.size(); ++i) { result << indent << spec[i].description() << "\n"; @@ -82,33 +83,34 @@ void CDetectorEnumeratorTest::testAll() { enumerator.addOverField("person"); enumerator.generate(spec); LOG_DEBUG(<< "4) detectors =\n" << print(spec, " ")); - std::string expected = "[low_|high_][non_zero_]count\n" - "[low_|high_]distinct_count(port)\n" - "[low_|high_]mean(bytes)\n" - "[low_|high_][non_zero_]count by 'process'\n" - "[low_|high_][non_zero_]count by 'parent_process'\n" - "[low_|high_]distinct_count(port) by 'process'\n" - "[low_|high_]distinct_count(port) by 'parent_process'\n" - "[low_|high_]mean(bytes) by 'process'\n" - "[low_|high_]mean(bytes) by 'parent_process'\n" - "[low_|high_]count over 'machine'\n" - "[low_|high_]count over 'person'\n" - "[low_|high_]distinct_count(port) over 'machine'\n" - "[low_|high_]distinct_count(port) over 'person'\n" - "[low_|high_]mean(bytes) over 'machine'\n" - "[low_|high_]mean(bytes) over 'person'\n" - "[low_|high_]count by 'process' over 'machine'\n" - "[low_|high_]count by 'process' over 'person'\n" - "[low_|high_]count by 'parent_process' over 'machine'\n" - "[low_|high_]count by 'parent_process' over 'person'\n" - "[low_|high_]distinct_count(port) by 'process' over 'machine'\n" - "[low_|high_]distinct_count(port) by 'process' over 'person'\n" - "[low_|high_]distinct_count(port) by 'parent_process' over 'machine'\n" - "[low_|high_]distinct_count(port) by 'parent_process' over 'person'\n" - "[low_|high_]mean(bytes) by 'process' over 'machine'\n" - "[low_|high_]mean(bytes) by 'process' over 'person'\n" - "[low_|high_]mean(bytes) by 'parent_process' over 'machine'\n" - "[low_|high_]mean(bytes) by 'parent_process' over 'person'\n"; + std::string expected = + "[low_|high_][non_zero_]count\n" + "[low_|high_]distinct_count(port)\n" + "[low_|high_]mean(bytes)\n" + "[low_|high_][non_zero_]count by 'process'\n" + "[low_|high_][non_zero_]count by 'parent_process'\n" + "[low_|high_]distinct_count(port) by 'process'\n" + "[low_|high_]distinct_count(port) by 'parent_process'\n" + "[low_|high_]mean(bytes) by 'process'\n" + "[low_|high_]mean(bytes) by 'parent_process'\n" + "[low_|high_]count over 'machine'\n" + "[low_|high_]count over 'person'\n" + "[low_|high_]distinct_count(port) over 'machine'\n" + "[low_|high_]distinct_count(port) over 'person'\n" + "[low_|high_]mean(bytes) over 'machine'\n" + "[low_|high_]mean(bytes) over 'person'\n" + "[low_|high_]count by 'process' over 'machine'\n" + "[low_|high_]count by 'process' over 'person'\n" + "[low_|high_]count by 'parent_process' over 'machine'\n" + "[low_|high_]count by 'parent_process' over 'person'\n" + "[low_|high_]distinct_count(port) by 'process' over 'machine'\n" + "[low_|high_]distinct_count(port) by 'process' over 'person'\n" + "[low_|high_]distinct_count(port) by 'parent_process' over 'machine'\n" + "[low_|high_]distinct_count(port) by 'parent_process' over 'person'\n" + "[low_|high_]mean(bytes) by 'process' over 'machine'\n" + "[low_|high_]mean(bytes) by 'process' over 'person'\n" + "[low_|high_]mean(bytes) by 'parent_process' over 'machine'\n" + "[low_|high_]mean(bytes) by 'parent_process' over 'person'\n"; CPPUNIT_ASSERT_EQUAL(expected, print(spec)); } @@ -117,96 +119,97 @@ void CDetectorEnumeratorTest::testAll() { enumerator.addPartitionField("machine"); enumerator.addPartitionField("data_centre"); enumerator.generate(spec); - std::string expected = "[low_|high_][non_zero_]count\n" - "[low_|high_]distinct_count(port)\n" - "[low_|high_]mean(bytes)\n" - "[low_|high_][non_zero_]count by 'process'\n" - "[low_|high_][non_zero_]count by 'parent_process'\n" - "[low_|high_]distinct_count(port) by 'process'\n" - "[low_|high_]distinct_count(port) by 'parent_process'\n" - "[low_|high_]mean(bytes) by 'process'\n" - "[low_|high_]mean(bytes) by 'parent_process'\n" - "[low_|high_]count over 'machine'\n" - "[low_|high_]count over 'person'\n" - "[low_|high_]distinct_count(port) over 'machine'\n" - "[low_|high_]distinct_count(port) over 'person'\n" - "[low_|high_]mean(bytes) over 'machine'\n" - "[low_|high_]mean(bytes) over 'person'\n" - "[low_|high_][non_zero_]count partition 'process'\n" - "[low_|high_][non_zero_]count partition 'machine'\n" - "[low_|high_][non_zero_]count partition 'data_centre'\n" - "[low_|high_]distinct_count(port) partition 'process'\n" - "[low_|high_]distinct_count(port) partition 'machine'\n" - "[low_|high_]distinct_count(port) partition 'data_centre'\n" - "[low_|high_]mean(bytes) partition 'process'\n" - "[low_|high_]mean(bytes) partition 'machine'\n" - "[low_|high_]mean(bytes) partition 'data_centre'\n" - "[low_|high_]count by 'process' over 'machine'\n" - "[low_|high_]count by 'process' over 'person'\n" - "[low_|high_]count by 'parent_process' over 'machine'\n" - "[low_|high_]count by 'parent_process' over 'person'\n" - "[low_|high_]distinct_count(port) by 'process' over 'machine'\n" - "[low_|high_]distinct_count(port) by 'process' over 'person'\n" - "[low_|high_]distinct_count(port) by 'parent_process' over 'machine'\n" - "[low_|high_]distinct_count(port) by 'parent_process' over 'person'\n" - "[low_|high_]mean(bytes) by 'process' over 'machine'\n" - "[low_|high_]mean(bytes) by 'process' over 'person'\n" - "[low_|high_]mean(bytes) by 'parent_process' over 'machine'\n" - "[low_|high_]mean(bytes) by 'parent_process' over 'person'\n" - "[low_|high_][non_zero_]count by 'process' partition 'machine'\n" - "[low_|high_][non_zero_]count by 'process' partition 'data_centre'\n" - "[low_|high_][non_zero_]count by 'parent_process' partition 'process'\n" - "[low_|high_][non_zero_]count by 'parent_process' partition 'machine'\n" - "[low_|high_][non_zero_]count by 'parent_process' partition 'data_centre'\n" - "[low_|high_]distinct_count(port) by 'process' partition 'machine'\n" - "[low_|high_]distinct_count(port) by 'process' partition 'data_centre'\n" - "[low_|high_]distinct_count(port) by 'parent_process' partition 'process'\n" - "[low_|high_]distinct_count(port) by 'parent_process' partition 'machine'\n" - "[low_|high_]distinct_count(port) by 'parent_process' partition 'data_centre'\n" - "[low_|high_]mean(bytes) by 'process' partition 'machine'\n" - "[low_|high_]mean(bytes) by 'process' partition 'data_centre'\n" - "[low_|high_]mean(bytes) by 'parent_process' partition 'process'\n" - "[low_|high_]mean(bytes) by 'parent_process' partition 'machine'\n" - "[low_|high_]mean(bytes) by 'parent_process' partition 'data_centre'\n" - "[low_|high_]count over 'machine' partition 'process'\n" - "[low_|high_]count over 'machine' partition 'data_centre'\n" - "[low_|high_]count over 'person' partition 'process'\n" - "[low_|high_]count over 'person' partition 'machine'\n" - "[low_|high_]count over 'person' partition 'data_centre'\n" - "[low_|high_]distinct_count(port) over 'machine' partition 'process'\n" - "[low_|high_]distinct_count(port) over 'machine' partition 'data_centre'\n" - "[low_|high_]distinct_count(port) over 'person' partition 'process'\n" - "[low_|high_]distinct_count(port) over 'person' partition 'machine'\n" - "[low_|high_]distinct_count(port) over 'person' partition 'data_centre'\n" - "[low_|high_]mean(bytes) over 'machine' partition 'process'\n" - "[low_|high_]mean(bytes) over 'machine' partition 'data_centre'\n" - "[low_|high_]mean(bytes) over 'person' partition 'process'\n" - "[low_|high_]mean(bytes) over 'person' partition 'machine'\n" - "[low_|high_]mean(bytes) over 'person' partition 'data_centre'\n" - "[low_|high_]count by 'process' over 'machine' partition 'data_centre'\n" - "[low_|high_]count by 'process' over 'person' partition 'machine'\n" - "[low_|high_]count by 'process' over 'person' partition 'data_centre'\n" - "[low_|high_]count by 'parent_process' over 'machine' partition 'process'\n" - "[low_|high_]count by 'parent_process' over 'machine' partition 'data_centre'\n" - "[low_|high_]count by 'parent_process' over 'person' partition 'process'\n" - "[low_|high_]count by 'parent_process' over 'person' partition 'machine'\n" - "[low_|high_]count by 'parent_process' over 'person' partition 'data_centre'\n" - "[low_|high_]distinct_count(port) by 'process' over 'machine' partition 'data_centre'\n" - "[low_|high_]distinct_count(port) by 'process' over 'person' partition 'machine'\n" - "[low_|high_]distinct_count(port) by 'process' over 'person' partition 'data_centre'\n" - "[low_|high_]distinct_count(port) by 'parent_process' over 'machine' partition 'process'\n" - "[low_|high_]distinct_count(port) by 'parent_process' over 'machine' partition 'data_centre'\n" - "[low_|high_]distinct_count(port) by 'parent_process' over 'person' partition 'process'\n" - "[low_|high_]distinct_count(port) by 'parent_process' over 'person' partition 'machine'\n" - "[low_|high_]distinct_count(port) by 'parent_process' over 'person' partition 'data_centre'\n" - "[low_|high_]mean(bytes) by 'process' over 'machine' partition 'data_centre'\n" - "[low_|high_]mean(bytes) by 'process' over 'person' partition 'machine'\n" - "[low_|high_]mean(bytes) by 'process' over 'person' partition 'data_centre'\n" - "[low_|high_]mean(bytes) by 'parent_process' over 'machine' partition 'process'\n" - "[low_|high_]mean(bytes) by 'parent_process' over 'machine' partition 'data_centre'\n" - "[low_|high_]mean(bytes) by 'parent_process' over 'person' partition 'process'\n" - "[low_|high_]mean(bytes) by 'parent_process' over 'person' partition 'machine'\n" - "[low_|high_]mean(bytes) by 'parent_process' over 'person' partition 'data_centre'\n"; + std::string expected = + "[low_|high_][non_zero_]count\n" + "[low_|high_]distinct_count(port)\n" + "[low_|high_]mean(bytes)\n" + "[low_|high_][non_zero_]count by 'process'\n" + "[low_|high_][non_zero_]count by 'parent_process'\n" + "[low_|high_]distinct_count(port) by 'process'\n" + "[low_|high_]distinct_count(port) by 'parent_process'\n" + "[low_|high_]mean(bytes) by 'process'\n" + "[low_|high_]mean(bytes) by 'parent_process'\n" + "[low_|high_]count over 'machine'\n" + "[low_|high_]count over 'person'\n" + "[low_|high_]distinct_count(port) over 'machine'\n" + "[low_|high_]distinct_count(port) over 'person'\n" + "[low_|high_]mean(bytes) over 'machine'\n" + "[low_|high_]mean(bytes) over 'person'\n" + "[low_|high_][non_zero_]count partition 'process'\n" + "[low_|high_][non_zero_]count partition 'machine'\n" + "[low_|high_][non_zero_]count partition 'data_centre'\n" + "[low_|high_]distinct_count(port) partition 'process'\n" + "[low_|high_]distinct_count(port) partition 'machine'\n" + "[low_|high_]distinct_count(port) partition 'data_centre'\n" + "[low_|high_]mean(bytes) partition 'process'\n" + "[low_|high_]mean(bytes) partition 'machine'\n" + "[low_|high_]mean(bytes) partition 'data_centre'\n" + "[low_|high_]count by 'process' over 'machine'\n" + "[low_|high_]count by 'process' over 'person'\n" + "[low_|high_]count by 'parent_process' over 'machine'\n" + "[low_|high_]count by 'parent_process' over 'person'\n" + "[low_|high_]distinct_count(port) by 'process' over 'machine'\n" + "[low_|high_]distinct_count(port) by 'process' over 'person'\n" + "[low_|high_]distinct_count(port) by 'parent_process' over 'machine'\n" + "[low_|high_]distinct_count(port) by 'parent_process' over 'person'\n" + "[low_|high_]mean(bytes) by 'process' over 'machine'\n" + "[low_|high_]mean(bytes) by 'process' over 'person'\n" + "[low_|high_]mean(bytes) by 'parent_process' over 'machine'\n" + "[low_|high_]mean(bytes) by 'parent_process' over 'person'\n" + "[low_|high_][non_zero_]count by 'process' partition 'machine'\n" + "[low_|high_][non_zero_]count by 'process' partition 'data_centre'\n" + "[low_|high_][non_zero_]count by 'parent_process' partition 'process'\n" + "[low_|high_][non_zero_]count by 'parent_process' partition 'machine'\n" + "[low_|high_][non_zero_]count by 'parent_process' partition 'data_centre'\n" + "[low_|high_]distinct_count(port) by 'process' partition 'machine'\n" + "[low_|high_]distinct_count(port) by 'process' partition 'data_centre'\n" + "[low_|high_]distinct_count(port) by 'parent_process' partition 'process'\n" + "[low_|high_]distinct_count(port) by 'parent_process' partition 'machine'\n" + "[low_|high_]distinct_count(port) by 'parent_process' partition 'data_centre'\n" + "[low_|high_]mean(bytes) by 'process' partition 'machine'\n" + "[low_|high_]mean(bytes) by 'process' partition 'data_centre'\n" + "[low_|high_]mean(bytes) by 'parent_process' partition 'process'\n" + "[low_|high_]mean(bytes) by 'parent_process' partition 'machine'\n" + "[low_|high_]mean(bytes) by 'parent_process' partition 'data_centre'\n" + "[low_|high_]count over 'machine' partition 'process'\n" + "[low_|high_]count over 'machine' partition 'data_centre'\n" + "[low_|high_]count over 'person' partition 'process'\n" + "[low_|high_]count over 'person' partition 'machine'\n" + "[low_|high_]count over 'person' partition 'data_centre'\n" + "[low_|high_]distinct_count(port) over 'machine' partition 'process'\n" + "[low_|high_]distinct_count(port) over 'machine' partition 'data_centre'\n" + "[low_|high_]distinct_count(port) over 'person' partition 'process'\n" + "[low_|high_]distinct_count(port) over 'person' partition 'machine'\n" + "[low_|high_]distinct_count(port) over 'person' partition 'data_centre'\n" + "[low_|high_]mean(bytes) over 'machine' partition 'process'\n" + "[low_|high_]mean(bytes) over 'machine' partition 'data_centre'\n" + "[low_|high_]mean(bytes) over 'person' partition 'process'\n" + "[low_|high_]mean(bytes) over 'person' partition 'machine'\n" + "[low_|high_]mean(bytes) over 'person' partition 'data_centre'\n" + "[low_|high_]count by 'process' over 'machine' partition 'data_centre'\n" + "[low_|high_]count by 'process' over 'person' partition 'machine'\n" + "[low_|high_]count by 'process' over 'person' partition 'data_centre'\n" + "[low_|high_]count by 'parent_process' over 'machine' partition 'process'\n" + "[low_|high_]count by 'parent_process' over 'machine' partition 'data_centre'\n" + "[low_|high_]count by 'parent_process' over 'person' partition 'process'\n" + "[low_|high_]count by 'parent_process' over 'person' partition 'machine'\n" + "[low_|high_]count by 'parent_process' over 'person' partition 'data_centre'\n" + "[low_|high_]distinct_count(port) by 'process' over 'machine' partition 'data_centre'\n" + "[low_|high_]distinct_count(port) by 'process' over 'person' partition 'machine'\n" + "[low_|high_]distinct_count(port) by 'process' over 'person' partition 'data_centre'\n" + "[low_|high_]distinct_count(port) by 'parent_process' over 'machine' partition 'process'\n" + "[low_|high_]distinct_count(port) by 'parent_process' over 'machine' partition 'data_centre'\n" + "[low_|high_]distinct_count(port) by 'parent_process' over 'person' partition 'process'\n" + "[low_|high_]distinct_count(port) by 'parent_process' over 'person' partition 'machine'\n" + "[low_|high_]distinct_count(port) by 'parent_process' over 'person' partition 'data_centre'\n" + "[low_|high_]mean(bytes) by 'process' over 'machine' partition 'data_centre'\n" + "[low_|high_]mean(bytes) by 'process' over 'person' partition 'machine'\n" + "[low_|high_]mean(bytes) by 'process' over 'person' partition 'data_centre'\n" + "[low_|high_]mean(bytes) by 'parent_process' over 'machine' partition 'process'\n" + "[low_|high_]mean(bytes) by 'parent_process' over 'machine' partition 'data_centre'\n" + "[low_|high_]mean(bytes) by 'parent_process' over 'person' partition 'process'\n" + "[low_|high_]mean(bytes) by 'parent_process' over 'person' partition 'machine'\n" + "[low_|high_]mean(bytes) by 'parent_process' over 'person' partition 'data_centre'\n"; LOG_DEBUG(<< "5) detectors =\n" << print(spec, " ")); CPPUNIT_ASSERT_EQUAL(expected, print(spec)); } @@ -215,8 +218,8 @@ void CDetectorEnumeratorTest::testAll() { CppUnit::Test* CDetectorEnumeratorTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CDetectorEnumeratorTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CDetectorEnumeratorTest::testAll", &CDetectorEnumeratorTest::testAll)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDetectorEnumeratorTest::testAll", &CDetectorEnumeratorTest::testAll)); return suiteOfTests; } diff --git a/lib/config/unittest/CReportWriterTest.cc b/lib/config/unittest/CReportWriterTest.cc index 0d5a26f89a..78a48198b3 100644 --- a/lib/config/unittest/CReportWriterTest.cc +++ b/lib/config/unittest/CReportWriterTest.cc @@ -34,46 +34,41 @@ void CReportWriterTest::testPretty() { core_t::TTime startTime = 1459468810; core_t::TTime endTime = startTime + 7 * core::constants::DAY; - std::string fields[] = {std::string("name"), std::string("phylum"), std::string("species"), std::string("code"), std::string("weight")}; + std::string fields[] = {std::string("name"), std::string("phylum"), + std::string("species"), std::string("code"), + std::string("weight")}; - std::string categories1[] = {std::string("Annelida"), std::string("Nematoda"), std::string("Arthropoda"), std::string("Chordata")}; + std::string categories1[] = {std::string("Annelida"), std::string("Nematoda"), + std::string("Arthropoda"), std::string("Chordata")}; std::size_t breaks[] = {0, 6, 10, 13, 20}; - std::string categories2[] = {// Annelida - std::string("Amage auricula"), - std::string("Eunice purpurea"), - std::string("Dorvillea kastjani"), - std::string("Dalhousiella carpenteri"), - std::string("Dysponetus gracilisi"), - std::string("Macellicephala incerta"), - // Nematoda - std::string("Microlaimus robustidens"), - std::string("Theristus longisetosus"), - std::string("Rhynchonema cemae"), - std::string("Contracaecum chubutensis"), - // Arthropoda - std::string("black widow"), - std::string("Daddy longleg"), - std::string("Lobster"), - // Chordata - std::string("hag fish"), - std::string("hen"), - std::string("elephant"), - std::string("dog"), - std::string("shrew"), - std::string("weasel"), - std::string("lemming")}; + std::string categories2[] = { + // Annelida + std::string("Amage auricula"), std::string("Eunice purpurea"), + std::string("Dorvillea kastjani"), std::string("Dalhousiella carpenteri"), + std::string("Dysponetus gracilisi"), std::string("Macellicephala incerta"), + // Nematoda + std::string("Microlaimus robustidens"), std::string("Theristus longisetosus"), + std::string("Rhynchonema cemae"), std::string("Contracaecum chubutensis"), + // Arthropoda + std::string("black widow"), std::string("Daddy longleg"), std::string("Lobster"), + // Chordata + std::string("hag fish"), std::string("hen"), std::string("elephant"), + std::string("dog"), std::string("shrew"), std::string("weasel"), + std::string("lemming")}; TStrVec codes; rng.generateWords(6, 2000, codes); - double weights[] = {0.01, 0.05, 0.1, 0.05, 0.01, 0.5, 0.001, 0.0003, 0.01, 0.0004, - 1.3, 1.1, 520.0, 1200.0, 810.1, 1000000.0, 5334.0, 70.0, 180.0, 100.3}; + double weights[] = {0.01, 0.05, 0.1, 0.05, 0.01, 0.5, 0.001, + 0.0003, 0.01, 0.0004, 1.3, 1.1, 520.0, 1200.0, + 810.1, 1000000.0, 5334.0, 70.0, 180.0, 100.3}; config::CDataSummaryStatistics stats1; - config::CCategoricalDataSummaryStatistics stats2[] = {config::CCategoricalDataSummaryStatistics(10), - config::CCategoricalDataSummaryStatistics(10), - config::CCategoricalDataSummaryStatistics(10)}; + config::CCategoricalDataSummaryStatistics stats2[] = { + config::CCategoricalDataSummaryStatistics(10), + config::CCategoricalDataSummaryStatistics(10), + config::CCategoricalDataSummaryStatistics(10)}; config::CNumericDataSummaryStatistics stats3(false); uint64_t n = 0; @@ -82,8 +77,10 @@ void CReportWriterTest::testPretty() { TDoubleVec dt; TDoubleVec weight; TSizeVec index; - for (core_t::TTime time = startTime; time < endTime; time += static_cast(dt[0])) { - double progress = static_cast(time - startTime) / static_cast((endTime - startTime)); + for (core_t::TTime time = startTime; time < endTime; + time += static_cast(dt[0])) { + double progress = static_cast(time - startTime) / + static_cast((endTime - startTime)); if (progress > lastProgress + 0.05) { LOG_DEBUG(<< "Processed " << progress * 100.0 << "%"); lastProgress = progress; @@ -140,7 +137,8 @@ void CReportWriterTest::testJSON() { CppUnit::Test* CReportWriterTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CReportWriterTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CReportWriterTest::testPretty", &CReportWriterTest::testPretty)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CReportWriterTest::testPretty", &CReportWriterTest::testPretty)); return suiteOfTests; } diff --git a/lib/core/CBase64Filter.cc b/lib/core/CBase64Filter.cc index 8b28eca3fe..3861b21f51 100644 --- a/lib/core/CBase64Filter.cc +++ b/lib/core/CBase64Filter.cc @@ -15,7 +15,8 @@ CBase64Encoder::CBase64Encoder() : m_Buffer(4096) { CBase64Encoder::~CBase64Encoder() { } -CBase64Decoder::CBase64Decoder() : m_BufferIn(4096), m_BufferOut(4096), m_Eos(false) { +CBase64Decoder::CBase64Decoder() + : m_BufferIn(4096), m_BufferOut(4096), m_Eos(false) { } CBase64Decoder::~CBase64Decoder() { diff --git a/lib/core/CBufferFlushTimer.cc b/lib/core/CBufferFlushTimer.cc index ae91ca8be4..07755fe7a6 100644 --- a/lib/core/CBufferFlushTimer.cc +++ b/lib/core/CBufferFlushTimer.cc @@ -15,7 +15,8 @@ namespace core { CBufferFlushTimer::CBufferFlushTimer() : m_LastMaxTime(0), m_LastFlushTime(0) { } -core_t::TTime CBufferFlushTimer::flushTime(core_t::TTime bufferDelay, core_t::TTime bufferMaxTime) { +core_t::TTime CBufferFlushTimer::flushTime(core_t::TTime bufferDelay, + core_t::TTime bufferMaxTime) { core_t::TTime now(CTimeUtils::now()); if (bufferMaxTime == 0) { @@ -34,7 +35,8 @@ core_t::TTime CBufferFlushTimer::flushTime(core_t::TTime bufferDelay, core_t::TT // flush based on elapsed real time if (ahead > bufferDelay) { // Defend against wrap - if (bufferMaxTime - bufferDelay >= std::numeric_limits::max() - ahead) { + if (bufferMaxTime - bufferDelay >= + std::numeric_limits::max() - ahead) { return std::numeric_limits::max(); } diff --git a/lib/core/CCompressOStream.cc b/lib/core/CCompressOStream.cc index 7182ed2d57..1c31f1fd2d 100644 --- a/lib/core/CCompressOStream.cc +++ b/lib/core/CCompressOStream.cc @@ -39,10 +39,7 @@ void CCompressOStream::close() { CCompressOStream::CCompressThread::CCompressThread(CCompressOStream& stream, CDualThreadStreamBuf& streamBuf, CStateCompressor::CChunkFilter& filter) - : m_Stream(stream), - m_StreamBuf(streamBuf), - m_FilterSink(filter), - m_OutFilter() + : m_Stream(stream), m_StreamBuf(streamBuf), m_FilterSink(filter), m_OutFilter() { m_OutFilter.push(boost::iostreams::gzip_compressor()); diff --git a/lib/core/CCompressUtils.cc b/lib/core/CCompressUtils.cc index 0e7bc56c07..1853598004 100644 --- a/lib/core/CCompressUtils.cc +++ b/lib/core/CCompressUtils.cc @@ -12,7 +12,8 @@ namespace ml { namespace core { -CCompressUtils::CCompressUtils(bool lengthOnly, int level) : m_State(E_Unused), m_LengthOnly(lengthOnly) { +CCompressUtils::CCompressUtils(bool lengthOnly, int level) + : m_State(E_Unused), m_LengthOnly(lengthOnly) { ::memset(&m_ZlibStrm, 0, sizeof(z_stream)); m_ZlibStrm.zalloc = Z_NULL; diff --git a/lib/core/CCrashHandler_Linux.cc b/lib/core/CCrashHandler_Linux.cc index da97107e22..338924bef2 100644 --- a/lib/core/CCrashHandler_Linux.cc +++ b/lib/core/CCrashHandler_Linux.cc @@ -49,15 +49,11 @@ void crashHandler(int sig, siginfo_t* info, void* context) { Dl_info symbolInfo; dladdr(errorAddress, &symbolInfo); - fprintf(stderr, - "si_signo %d, si_code: %d, si_errno: %d, address: %p, library: %s, base: %p, normalized address: %p\n", - info->si_signo, - info->si_code, - info->si_errno, - errorAddress, - symbolInfo.dli_fname, - symbolInfo.dli_fbase, - reinterpret_cast(reinterpret_cast(errorAddress) - reinterpret_cast(symbolInfo.dli_fbase))); + fprintf(stderr, "si_signo %d, si_code: %d, si_errno: %d, address: %p, library: %s, base: %p, normalized address: %p\n", + info->si_signo, info->si_code, info->si_errno, errorAddress, + symbolInfo.dli_fname, symbolInfo.dli_fbase, + reinterpret_cast(reinterpret_cast(errorAddress) - + reinterpret_cast(symbolInfo.dli_fbase))); // Still generate a core dump, // see http://www.alexonlinux.com/how-to-handle-sigsegv-but-also-generate-core-dump diff --git a/lib/core/CDelimiter.cc b/lib/core/CDelimiter.cc index 6731d11e05..1a10f8ada9 100644 --- a/lib/core/CDelimiter.cc +++ b/lib/core/CDelimiter.cc @@ -16,19 +16,13 @@ namespace core { const std::string CDelimiter::DEFAULT_DELIMITER(","); CDelimiter::CDelimiter() - : m_Valid(m_Delimiter.init(DEFAULT_DELIMITER)), - m_HaveFollowingRegex(false), - m_WaiveFollowingRegexAfterTime(false), - m_Quote('\0'), - m_Escape('\0') { + : m_Valid(m_Delimiter.init(DEFAULT_DELIMITER)), m_HaveFollowingRegex(false), + m_WaiveFollowingRegexAfterTime(false), m_Quote('\0'), m_Escape('\0') { } CDelimiter::CDelimiter(const std::string& delimiter) - : m_Valid(m_Delimiter.init(delimiter)), - m_HaveFollowingRegex(false), - m_WaiveFollowingRegexAfterTime(false), - m_Quote('\0'), - m_Escape('\0') { + : m_Valid(m_Delimiter.init(delimiter)), m_HaveFollowingRegex(false), + m_WaiveFollowingRegexAfterTime(false), m_Quote('\0'), m_Escape('\0') { if (!m_Valid) { LOG_ERROR(<< "Unable to set delimiter regex to " << delimiter); } @@ -37,9 +31,7 @@ CDelimiter::CDelimiter(const std::string& delimiter) CDelimiter::CDelimiter(const std::string& delimiter, const std::string& followingRegex, bool orTime) : m_Valid(m_Delimiter.init(delimiter)), m_HaveFollowingRegex(m_FollowingRegex.init(followingRegex)), - m_WaiveFollowingRegexAfterTime(orTime), - m_Quote('\0'), - m_Escape('\0') { + m_WaiveFollowingRegexAfterTime(orTime), m_Quote('\0'), m_Escape('\0') { if (!m_Valid) { LOG_ERROR(<< "Unable to set delimiter regex to " << delimiter); } @@ -51,7 +43,8 @@ CDelimiter::CDelimiter(const std::string& delimiter, const std::string& followin bool CDelimiter::operator==(const CDelimiter& rhs) const { if (m_Valid != rhs.m_Valid || m_HaveFollowingRegex != rhs.m_HaveFollowingRegex || - m_WaiveFollowingRegexAfterTime != rhs.m_WaiveFollowingRegexAfterTime || m_Quote != rhs.m_Quote || m_Escape != rhs.m_Escape) { + m_WaiveFollowingRegexAfterTime != rhs.m_WaiveFollowingRegexAfterTime || + m_Quote != rhs.m_Quote || m_Escape != rhs.m_Escape) { return false; } @@ -76,7 +69,9 @@ bool CDelimiter::operator!=(const CDelimiter& rhs) const { } // Check whether the text that followed the primary delimiter was acceptable -bool CDelimiter::isFollowingTextAcceptable(size_t searchPos, const std::string& str, bool timePassed) const { +bool CDelimiter::isFollowingTextAcceptable(size_t searchPos, + const std::string& str, + bool timePassed) const { bool answer(false); if (m_HaveFollowingRegex) { @@ -104,12 +99,17 @@ std::string CDelimiter::delimiter() const { return m_Delimiter.str(); } -void CDelimiter::tokenise(const std::string& str, CStringUtils::TStrVec& tokens, std::string& remainder) const { +void CDelimiter::tokenise(const std::string& str, + CStringUtils::TStrVec& tokens, + std::string& remainder) const { std::string exampleDelimiter; this->tokenise(str, false, tokens, exampleDelimiter, remainder); } -void CDelimiter::tokenise(const std::string& str, bool timePassed, CStringUtils::TStrVec& tokens, std::string& remainder) const { +void CDelimiter::tokenise(const std::string& str, + bool timePassed, + CStringUtils::TStrVec& tokens, + std::string& remainder) const { std::string exampleDelimiter; this->tokenise(str, timePassed, tokens, exampleDelimiter, remainder); } @@ -164,7 +164,8 @@ void CDelimiter::tokenise(const std::string& str, } // Search for the delimiter - bool found(m_Delimiter.search(expectingQuote ? (quotePos + 1) : searchPos, str, delimStartPos, delimLength)); + bool found(m_Delimiter.search(expectingQuote ? (quotePos + 1) : searchPos, + str, delimStartPos, delimLength)); if (!found) { if (expectingQuote && quotePos < str.length()) { // If we're expecting a quote and find one, treat this as diff --git a/lib/core/CDetachedProcessSpawner.cc b/lib/core/CDetachedProcessSpawner.cc index 055ba51c55..00578b291a 100644 --- a/lib/core/CDetachedProcessSpawner.cc +++ b/lib/core/CDetachedProcessSpawner.cc @@ -175,13 +175,15 @@ class CTrackerThread : public CThread { if (signal == SIGTERM) { // We expect this when a job is force-closed, so log // at a lower level - LOG_INFO(<< "Child process with PID " << pid << " was terminated by signal " << signal); + LOG_INFO(<< "Child process with PID " << pid + << " was terminated by signal " << signal); } else { // This should never happen if the system is working // normally - possible reasons are the Linux OOM // killer, manual intervention and bugs that cause // access violations - LOG_ERROR(<< "Child process with PID " << pid << " was terminated by signal " << signal); + LOG_ERROR(<< "Child process with PID " << pid + << " was terminated by signal " << signal); } } else { int exitCode = WEXITSTATUS(status); @@ -189,7 +191,8 @@ class CTrackerThread : public CThread { // This is the happy case LOG_DEBUG(<< "Child process with PID " << pid << " has exited"); } else { - LOG_WARN(<< "Child process with PID " << pid << " has exited with exit code " << exitCode); + LOG_WARN(<< "Child process with PID " << pid + << " has exited with exit code " << exitCode); } } m_Pids.erase(pid); @@ -206,7 +209,8 @@ class CTrackerThread : public CThread { } CDetachedProcessSpawner::CDetachedProcessSpawner(const TStrVec& permittedProcessPaths) - : m_PermittedProcessPaths(permittedProcessPaths), m_TrackerThread(boost::make_shared()) { + : m_PermittedProcessPaths(permittedProcessPaths), + m_TrackerThread(boost::make_shared()) { if (m_TrackerThread->start() == false) { LOG_ERROR(<< "Failed to start spawned process tracker thread"); } @@ -223,8 +227,11 @@ bool CDetachedProcessSpawner::spawn(const std::string& processPath, const TStrVe return this->spawn(processPath, args, dummy); } -bool CDetachedProcessSpawner::spawn(const std::string& processPath, const TStrVec& args, CProcess::TPid& childPid) { - if (std::find(m_PermittedProcessPaths.begin(), m_PermittedProcessPaths.end(), processPath) == m_PermittedProcessPaths.end()) { +bool CDetachedProcessSpawner::spawn(const std::string& processPath, + const TStrVec& args, + CProcess::TPid& childPid) { + if (std::find(m_PermittedProcessPaths.begin(), m_PermittedProcessPaths.end(), + processPath) == m_PermittedProcessPaths.end()) { LOG_ERROR(<< "Spawning process '" << processPath << "' is not permitted"); return false; } @@ -251,12 +258,14 @@ bool CDetachedProcessSpawner::spawn(const std::string& processPath, const TStrVe posix_spawn_file_actions_t fileActions; if (setupFileActions(&fileActions) == false) { - LOG_ERROR(<< "Failed to set up file actions prior to spawn of '" << processPath << "': " << ::strerror(errno)); + LOG_ERROR(<< "Failed to set up file actions prior to spawn of '" + << processPath << "': " << ::strerror(errno)); return false; } posix_spawnattr_t spawnAttributes; if (::posix_spawnattr_init(&spawnAttributes) != 0) { - LOG_ERROR(<< "Failed to set up spawn attributes prior to spawn of '" << processPath << "': " << ::strerror(errno)); + LOG_ERROR(<< "Failed to set up spawn attributes prior to spawn of '" + << processPath << "': " << ::strerror(errno)); return false; } ::posix_spawnattr_setflags(&spawnAttributes, POSIX_SPAWN_SETPGROUP); @@ -267,7 +276,8 @@ bool CDetachedProcessSpawner::spawn(const std::string& processPath, const TStrVe // quickly CScopedLock lock(m_TrackerThread->mutex()); - int err(::posix_spawn(&childPid, processPath.c_str(), &fileActions, &spawnAttributes, &argv[0], environ)); + int err(::posix_spawn(&childPid, processPath.c_str(), &fileActions, + &spawnAttributes, &argv[0], environ)); ::posix_spawn_file_actions_destroy(&fileActions); ::posix_spawnattr_destroy(&spawnAttributes); diff --git a/lib/core/CDetachedProcessSpawner_Windows.cc b/lib/core/CDetachedProcessSpawner_Windows.cc index 20a95a9231..5b66bd3f5a 100644 --- a/lib/core/CDetachedProcessSpawner_Windows.cc +++ b/lib/core/CDetachedProcessSpawner_Windows.cc @@ -135,7 +135,8 @@ class CTrackerThread : public CThread { } CDetachedProcessSpawner::CDetachedProcessSpawner(const TStrVec& permittedProcessPaths) - : m_PermittedProcessPaths(permittedProcessPaths), m_TrackerThread(boost::make_shared()) { + : m_PermittedProcessPaths(permittedProcessPaths), + m_TrackerThread(boost::make_shared()) { if (m_TrackerThread->start() == false) { LOG_ERROR(<< "Failed to start spawned process tracker thread"); } @@ -152,13 +153,17 @@ bool CDetachedProcessSpawner::spawn(const std::string& processPath, const TStrVe return this->spawn(processPath, args, dummy); } -bool CDetachedProcessSpawner::spawn(const std::string& processPath, const TStrVec& args, CProcess::TPid& childPid) { - if (std::find(m_PermittedProcessPaths.begin(), m_PermittedProcessPaths.end(), processPath) == m_PermittedProcessPaths.end()) { +bool CDetachedProcessSpawner::spawn(const std::string& processPath, + const TStrVec& args, + CProcess::TPid& childPid) { + if (std::find(m_PermittedProcessPaths.begin(), m_PermittedProcessPaths.end(), + processPath) == m_PermittedProcessPaths.end()) { LOG_ERROR(<< "Spawning process '" << processPath << "' is not permitted"); return false; } - bool processPathHasExeExt(processPath.length() > 4 && processPath.compare(processPath.length() - 4, 4, ".exe") == 0); + bool processPathHasExeExt(processPath.length() > 4 && + processPath.compare(processPath.length() - 4, 4, ".exe") == 0); // Windows takes command lines as a single string std::string cmdLine(CShellArgQuoter::quote(processPath)); @@ -180,29 +185,24 @@ bool CDetachedProcessSpawner::spawn(const std::string& processPath, const TStrVe // quickly CScopedLock lock(m_TrackerThread->mutex()); - if (CreateProcess((processPathHasExeExt ? processPath : processPath + ".exe").c_str(), - const_cast(cmdLine.c_str()), - 0, - 0, - FALSE, - // The CREATE_NO_WINDOW flag is used instead of - // DETACHED_PROCESS, as Visual Studio 2017 optimises - // away the file handles that underlie stdin, stdout - // and stderr if a process has no knowledge of any - // console. With CREATE_NO_WINDOW the process will - // not initially be attached to any console, but has - // the option to attach to its parent process's - // console later on, and this appears to prevent the - // file handles being optimised away. This wouldn't - // be a problem if we redirected stderr using - // freopen(), but instead we redirect the underlying - // OS level file handles so that we can revert the - // redirection. - CREATE_NEW_PROCESS_GROUP | CREATE_NO_WINDOW, - 0, - 0, - &startupInfo, - &processInformation) == FALSE) { + if (CreateProcess( + (processPathHasExeExt ? processPath : processPath + ".exe").c_str(), + const_cast(cmdLine.c_str()), 0, 0, FALSE, + // The CREATE_NO_WINDOW flag is used instead of + // DETACHED_PROCESS, as Visual Studio 2017 optimises + // away the file handles that underlie stdin, stdout + // and stderr if a process has no knowledge of any + // console. With CREATE_NO_WINDOW the process will + // not initially be attached to any console, but has + // the option to attach to its parent process's + // console later on, and this appears to prevent the + // file handles being optimised away. This wouldn't + // be a problem if we redirected stderr using + // freopen(), but instead we redirect the underlying + // OS level file handles so that we can revert the + // redirection. + CREATE_NEW_PROCESS_GROUP | CREATE_NO_WINDOW, 0, 0, &startupInfo, + &processInformation) == FALSE) { LOG_ERROR(<< "Failed to spawn '" << processPath << "': " << CWindowsError()); return false; } diff --git a/lib/core/CDualThreadStreamBuf.cc b/lib/core/CDualThreadStreamBuf.cc index 3b7720523f..3d1d87a08f 100644 --- a/lib/core/CDualThreadStreamBuf.cc +++ b/lib/core/CDualThreadStreamBuf.cc @@ -19,18 +19,13 @@ namespace core { const size_t CDualThreadStreamBuf::DEFAULT_BUFFER_CAPACITY(65536); CDualThreadStreamBuf::CDualThreadStreamBuf(size_t bufferCapacity) - : m_WriteBuffer(new char[bufferCapacity]), - m_WriteBufferCapacity(bufferCapacity), - m_ReadBuffer(new char[bufferCapacity]), - m_ReadBufferCapacity(bufferCapacity), + : m_WriteBuffer(new char[bufferCapacity]), m_WriteBufferCapacity(bufferCapacity), + m_ReadBuffer(new char[bufferCapacity]), m_ReadBufferCapacity(bufferCapacity), m_IntermediateBuffer(new char[bufferCapacity]), m_IntermediateBufferCapacity(bufferCapacity), - m_IntermediateBufferEnd(m_IntermediateBuffer.get()), - m_ReadBytesSwapped(0), - m_WriteBytesSwapped(0), - m_IntermediateBufferCondition(m_IntermediateBufferMutex), - m_Eof(false), - m_FatalError(false) { + m_IntermediateBufferEnd(m_IntermediateBuffer.get()), m_ReadBytesSwapped(0), + m_WriteBytesSwapped(0), m_IntermediateBufferCondition(m_IntermediateBufferMutex), + m_Eof(false), m_FatalError(false) { // Initialise write buffer pointers to indicate an empty buffer char* begin(m_WriteBuffer.get()); char* end(begin + m_WriteBufferCapacity); @@ -274,7 +269,9 @@ int CDualThreadStreamBuf::overflow(int c) { return ret; } -std::streampos CDualThreadStreamBuf::seekoff(std::streamoff off, std::ios_base::seekdir way, std::ios_base::openmode which) { +std::streampos CDualThreadStreamBuf::seekoff(std::streamoff off, + std::ios_base::seekdir way, + std::ios_base::openmode which) { std::streampos pos(static_cast(-1)); if (off != 0) { @@ -344,7 +341,8 @@ bool CDualThreadStreamBuf::swapReadBuffer() { if (!m_Eof) { LOG_ERROR(<< "Inconsistency - intermediate buffer empty after wait " "when not at end-of-file: begin = " - << static_cast(begin) << " end = " << static_cast(end)); + << static_cast(begin) + << " end = " << static_cast(end)); } return false; } diff --git a/lib/core/CFlatPrefixTree.cc b/lib/core/CFlatPrefixTree.cc index e9e0432c1e..41043cb47d 100644 --- a/lib/core/CFlatPrefixTree.cc +++ b/lib/core/CFlatPrefixTree.cc @@ -38,7 +38,8 @@ struct SCharNotEqualTo { }; } -CFlatPrefixTree::SNode::SNode(char c, char type, uint32_t next) : s_Char(c), s_Type(type), s_Next(next) { +CFlatPrefixTree::SNode::SNode(char c, char type, uint32_t next) + : s_Char(c), s_Type(type), s_Next(next) { } bool CFlatPrefixTree::SNode::operator<(char rhs) const { @@ -63,7 +64,8 @@ bool CFlatPrefixTree::build(const TStrVec& prefixes) { if (prefixes.size() > 1) { for (std::size_t i = 0; i < prefixes.size() - 1; ++i) { if (prefixes[i] == prefixes[i + 1]) { - LOG_ERROR(<< "FlatPrefixTree cannot be build from a vector containing duplicate prefixes: " << prefixes[i]); + LOG_ERROR(<< "FlatPrefixTree cannot be build from a vector containing duplicate prefixes: " + << prefixes[i]); return false; } } @@ -76,8 +78,8 @@ bool CFlatPrefixTree::build(const TStrVec& prefixes) { } if (m_FlatTree.size() >= NO_CHILD) { - LOG_ERROR(<< "Failed to build the tree: " << m_FlatTree.size() << " nodes were required; no more than " << NO_CHILD - << " are supported."); + LOG_ERROR(<< "Failed to build the tree: " << m_FlatTree.size() << " nodes were required; no more than " + << NO_CHILD << " are supported."); m_FlatTree.clear(); return false; } @@ -86,16 +88,21 @@ bool CFlatPrefixTree::build(const TStrVec& prefixes) { return true; } -void CFlatPrefixTree::buildRecursively(const TStrVec& prefixes, std::size_t prefixesStart, std::size_t prefixesEnd, std::size_t charPos) { +void CFlatPrefixTree::buildRecursively(const TStrVec& prefixes, + std::size_t prefixesStart, + std::size_t prefixesEnd, + std::size_t charPos) { // First, we extract the distinct characters for the current character position and we // record their start/end indices in the prefixes vector. TDistinctCharVec distinctCharsWithRange; distinctCharsWithRange.reserve(256); - this->extractDistinctCharacters(prefixes, prefixesStart, prefixesEnd, charPos, distinctCharsWithRange); + this->extractDistinctCharacters(prefixes, prefixesStart, prefixesEnd, + charPos, distinctCharsWithRange); // Now, we create the nodes of the current level: the padding node, that contains // the number of distinct characters, and a node for each distinct character. - m_FlatTree.push_back(SNode(PADDING_NODE, PADDING_NODE, static_cast(distinctCharsWithRange.size()))); + m_FlatTree.push_back(SNode(PADDING_NODE, PADDING_NODE, + static_cast(distinctCharsWithRange.size()))); std::size_t treeSizeBeforeNewChars = m_FlatTree.size(); for (std::size_t i = 0; i < distinctCharsWithRange.size(); ++i) { SDistinctChar& distinctChar = distinctCharsWithRange[i]; @@ -107,8 +114,10 @@ void CFlatPrefixTree::buildRecursively(const TStrVec& prefixes, std::size_t pref for (std::size_t i = 0; i < distinctCharsWithRange.size(); ++i) { SDistinctChar& distinctChar = distinctCharsWithRange[i]; if (distinctChar.s_Type != LEAF_NODE) { - m_FlatTree[treeSizeBeforeNewChars + i].s_Next = static_cast(m_FlatTree.size()); - this->buildRecursively(prefixes, distinctChar.s_Start, distinctChar.s_End, charPos + 1); + m_FlatTree[treeSizeBeforeNewChars + i].s_Next = + static_cast(m_FlatTree.size()); + this->buildRecursively(prefixes, distinctChar.s_Start, + distinctChar.s_End, charPos + 1); } } } diff --git a/lib/core/CHashing.cc b/lib/core/CHashing.cc index 6d2f76c9e1..f506e91b57 100644 --- a/lib/core/CHashing.cc +++ b/lib/core/CHashing.cc @@ -30,10 +30,12 @@ const uint64_t CHashing::CUniversalHash::BIG_PRIME = 4294967291ull; boost::random::mt11213b CHashing::CUniversalHash::ms_Generator; CFastMutex CHashing::CUniversalHash::ms_Mutex; -CHashing::CUniversalHash::CUInt32Hash::CUInt32Hash() : m_M(1000), m_A(1), m_B(0) { +CHashing::CUniversalHash::CUInt32Hash::CUInt32Hash() + : m_M(1000), m_A(1), m_B(0) { } -CHashing::CUniversalHash::CUInt32Hash::CUInt32Hash(uint32_t m, uint32_t a, uint32_t b) : m_M(m), m_A(a), m_B(b) { +CHashing::CUniversalHash::CUInt32Hash::CUInt32Hash(uint32_t m, uint32_t a, uint32_t b) + : m_M(m), m_A(a), m_B(b) { } uint32_t CHashing::CUniversalHash::CUInt32Hash::m() const { @@ -50,14 +52,17 @@ uint32_t CHashing::CUniversalHash::CUInt32Hash::b() const { std::string CHashing::CUniversalHash::CUInt32Hash::print() const { std::ostringstream result; - result << "\"((" << m_A << " * x + " << m_B << ") mod " << BIG_PRIME << ") mod " << m_M << "\""; + result << "\"((" << m_A << " * x + " << m_B << ") mod " << BIG_PRIME + << ") mod " << m_M << "\""; return result.str(); } -CHashing::CUniversalHash::CUInt32UnrestrictedHash::CUInt32UnrestrictedHash() : m_A(1), m_B(0) { +CHashing::CUniversalHash::CUInt32UnrestrictedHash::CUInt32UnrestrictedHash() + : m_A(1), m_B(0) { } -CHashing::CUniversalHash::CUInt32UnrestrictedHash::CUInt32UnrestrictedHash(uint32_t a, uint32_t b) : m_A(a), m_B(b) { +CHashing::CUniversalHash::CUInt32UnrestrictedHash::CUInt32UnrestrictedHash(uint32_t a, uint32_t b) + : m_A(a), m_B(b) { } uint32_t CHashing::CUniversalHash::CUInt32UnrestrictedHash::a() const { @@ -74,14 +79,16 @@ std::string CHashing::CUniversalHash::CUInt32UnrestrictedHash::print() const { return result.str(); } -CHashing::CUniversalHash::CUInt32VecHash::CUInt32VecHash(uint32_t m, const TUInt32Vec& a, uint32_t b) : m_M(m), m_A(a), m_B(b) { +CHashing::CUniversalHash::CUInt32VecHash::CUInt32VecHash(uint32_t m, const TUInt32Vec& a, uint32_t b) + : m_M(m), m_A(a), m_B(b) { } uint32_t CHashing::CUniversalHash::CUInt32VecHash::m() const { return m_M; } -const CHashing::CUniversalHash::TUInt32Vec& CHashing::CUniversalHash::CUInt32VecHash::a() const { +const CHashing::CUniversalHash::TUInt32Vec& +CHashing::CUniversalHash::CUInt32VecHash::a() const { return m_A; } @@ -99,22 +106,28 @@ std::string CHashing::CUniversalHash::CUInt32VecHash::print() const { return result.str(); } -CHashing::CUniversalHash::CToString::CToString(const char delimiter) : m_Delimiter(delimiter) { +CHashing::CUniversalHash::CToString::CToString(const char delimiter) + : m_Delimiter(delimiter) { } -std::string CHashing::CUniversalHash::CToString::operator()(const CUInt32UnrestrictedHash& hash) const { - return CStringUtils::typeToString(hash.a()) + m_Delimiter + CStringUtils::typeToString(hash.b()); +std::string CHashing::CUniversalHash::CToString:: +operator()(const CUInt32UnrestrictedHash& hash) const { + return CStringUtils::typeToString(hash.a()) + m_Delimiter + + CStringUtils::typeToString(hash.b()); } std::string CHashing::CUniversalHash::CToString::operator()(const CUInt32Hash& hash) const { - return CStringUtils::typeToString(hash.m()) + m_Delimiter + CStringUtils::typeToString(hash.a()) + m_Delimiter + + return CStringUtils::typeToString(hash.m()) + m_Delimiter + + CStringUtils::typeToString(hash.a()) + m_Delimiter + CStringUtils::typeToString(hash.b()); } -CHashing::CUniversalHash::CFromString::CFromString(const char delimiter) : m_Delimiter(delimiter) { +CHashing::CUniversalHash::CFromString::CFromString(const char delimiter) + : m_Delimiter(delimiter) { } -bool CHashing::CUniversalHash::CFromString::operator()(const std::string& token, CUInt32UnrestrictedHash& hash) const { +bool CHashing::CUniversalHash::CFromString:: +operator()(const std::string& token, CUInt32UnrestrictedHash& hash) const { std::size_t delimPos = token.find(m_Delimiter); if (delimPos == std::string::npos) { LOG_ERROR(<< "Invalid hash state " << token); @@ -137,7 +150,8 @@ bool CHashing::CUniversalHash::CFromString::operator()(const std::string& token, return true; } -bool CHashing::CUniversalHash::CFromString::operator()(const std::string& token, CUInt32Hash& hash) const { +bool CHashing::CUniversalHash::CFromString::operator()(const std::string& token, + CUInt32Hash& hash) const { std::size_t firstDelimPos = token.find(m_Delimiter); if (firstDelimPos == std::string::npos) { LOG_ERROR(<< "Invalid hash state " << token); @@ -181,7 +195,8 @@ void CHashing::CUniversalHash::generateHashes(std::size_t k, uint32_t m, TUInt32 CScopedFastLock scopedLock(ms_Mutex); TUniform32 uniform1(1u, static_cast(BIG_PRIME - 1)); - std::generate_n(std::back_inserter(a), k, boost::bind(uniform1, boost::ref(ms_Generator))); + std::generate_n(std::back_inserter(a), k, + boost::bind(uniform1, boost::ref(ms_Generator))); for (std::size_t i = 0u; i < a.size(); ++i) { if (a[i] == 0) { LOG_ERROR(<< "Expected a in [1," << BIG_PRIME << ")"); @@ -190,7 +205,8 @@ void CHashing::CUniversalHash::generateHashes(std::size_t k, uint32_t m, TUInt32 } TUniform32 uniform0(0u, static_cast(BIG_PRIME - 1)); - std::generate_n(std::back_inserter(b), k, boost::bind(uniform0, boost::ref(ms_Generator))); + std::generate_n(std::back_inserter(b), k, + boost::bind(uniform0, boost::ref(ms_Generator))); } result.reserve(k); @@ -199,7 +215,8 @@ void CHashing::CUniversalHash::generateHashes(std::size_t k, uint32_t m, TUInt32 } } -void CHashing::CUniversalHash::generateHashes(std::size_t k, TUInt32UnrestrictedHashVec& result) { +void CHashing::CUniversalHash::generateHashes(std::size_t k, + TUInt32UnrestrictedHashVec& result) { TUInt32Vec a, b; a.reserve(k); b.reserve(k); @@ -208,7 +225,8 @@ void CHashing::CUniversalHash::generateHashes(std::size_t k, TUInt32Unrestricted CScopedFastLock scopedLock(ms_Mutex); TUniform32 uniform1(1u, static_cast(BIG_PRIME - 1)); - std::generate_n(std::back_inserter(a), k, boost::bind(uniform1, boost::ref(ms_Generator))); + std::generate_n(std::back_inserter(a), k, + boost::bind(uniform1, boost::ref(ms_Generator))); for (std::size_t i = 0u; i < a.size(); ++i) { if (a[i] == 0) { LOG_ERROR(<< "Expected a in [1," << BIG_PRIME << ")"); @@ -217,7 +235,8 @@ void CHashing::CUniversalHash::generateHashes(std::size_t k, TUInt32Unrestricted } TUniform32 uniform0(0u, static_cast(BIG_PRIME - 1)); - std::generate_n(std::back_inserter(b), k, boost::bind(uniform0, boost::ref(ms_Generator))); + std::generate_n(std::back_inserter(b), k, + boost::bind(uniform0, boost::ref(ms_Generator))); } result.reserve(k); @@ -226,7 +245,10 @@ void CHashing::CUniversalHash::generateHashes(std::size_t k, TUInt32Unrestricted } } -void CHashing::CUniversalHash::generateHashes(std::size_t k, std::size_t n, uint32_t m, TUInt32VecHashVec& result) { +void CHashing::CUniversalHash::generateHashes(std::size_t k, + std::size_t n, + uint32_t m, + TUInt32VecHashVec& result) { using TUInt32VecVec = std::vector; TUInt32VecVec a; @@ -241,7 +263,8 @@ void CHashing::CUniversalHash::generateHashes(std::size_t k, std::size_t n, uint a.push_back(TUInt32Vec()); a.back().reserve(n); TUniform32 uniform1(1u, static_cast(BIG_PRIME - 1)); - std::generate_n(std::back_inserter(a.back()), n, boost::bind(uniform1, boost::ref(ms_Generator))); + std::generate_n(std::back_inserter(a.back()), n, + boost::bind(uniform1, boost::ref(ms_Generator))); for (std::size_t j = 0u; j < a.back().size(); ++j) { if ((a.back())[j] == 0) { LOG_ERROR(<< "Expected a in [1," << BIG_PRIME << ")"); @@ -251,7 +274,8 @@ void CHashing::CUniversalHash::generateHashes(std::size_t k, std::size_t n, uint } TUniform32 uniform0(0u, static_cast(BIG_PRIME - 1)); - std::generate_n(std::back_inserter(b), k, boost::bind(uniform0, boost::ref(ms_Generator))); + std::generate_n(std::back_inserter(b), k, + boost::bind(uniform0, boost::ref(ms_Generator))); } result.reserve(k); diff --git a/lib/core/CHexUtils.cc b/lib/core/CHexUtils.cc index 9bef045a16..f5ed6a0ea1 100644 --- a/lib/core/CHexUtils.cc +++ b/lib/core/CHexUtils.cc @@ -18,11 +18,13 @@ namespace ml { namespace core { CHexUtils::CHexUtils(const uint8_t* pkt, size_t pktLen, bool printHeader, bool printAscii) - : m_Pkt(pkt), m_PktLen(pktLen), m_PrintHeader(printHeader), m_PrintAscii(printAscii) { + : m_Pkt(pkt), m_PktLen(pktLen), m_PrintHeader(printHeader), + m_PrintAscii(printAscii) { } CHexUtils::CHexUtils(const TDataVec& data, bool printHeader, bool printAscii) - : m_Pkt((data.size() > 0) ? data.data() : nullptr), m_PktLen(data.size()), m_PrintHeader(printHeader), m_PrintAscii(printAscii) { + : m_Pkt((data.size() > 0) ? data.data() : nullptr), m_PktLen(data.size()), + m_PrintHeader(printHeader), m_PrintAscii(printAscii) { } void CHexUtils::dump(const uint8_t* pkt, size_t pktLen) { @@ -42,7 +44,8 @@ std::ostream& operator<<(std::ostream& strm, const CHexUtils& hex) { std::string text; for (size_t i = 0; i < hex.m_PktLen; ++i) { - strm << std::setfill('0') << std::setw(2) << static_cast(hex.m_Pkt[i]) << ' '; + strm << std::setfill('0') << std::setw(2) + << static_cast(hex.m_Pkt[i]) << ' '; if (::isprint(hex.m_Pkt[i])) { text += static_cast(hex.m_Pkt[i]); diff --git a/lib/core/CIEEE754.cc b/lib/core/CIEEE754.cc index e5a1674d35..6037412f96 100644 --- a/lib/core/CIEEE754.cc +++ b/lib/core/CIEEE754.cc @@ -22,12 +22,14 @@ double CIEEE754::round(double value, EPrecision precision) { switch (precision) { case E_HalfPrecision: { static const double PRECISION = 2048.0; - mantissa = mantissa < 0.0 ? std::ceil(mantissa * PRECISION - 0.5) / PRECISION : std::floor(mantissa * PRECISION + 0.5) / PRECISION; + mantissa = mantissa < 0.0 ? std::ceil(mantissa * PRECISION - 0.5) / PRECISION + : std::floor(mantissa * PRECISION + 0.5) / PRECISION; break; } case E_SinglePrecision: { static const double PRECISION = 16777216.0; - mantissa = mantissa < 0.0 ? std::ceil(mantissa * PRECISION - 0.5) / PRECISION : std::floor(mantissa * PRECISION + 0.5) / PRECISION; + mantissa = mantissa < 0.0 ? std::ceil(mantissa * PRECISION - 0.5) / PRECISION + : std::floor(mantissa * PRECISION + 0.5) / PRECISION; break; } case E_DoublePrecision: diff --git a/lib/core/CJsonLogLayout.cc b/lib/core/CJsonLogLayout.cc index 304931eec1..2189bbd2d1 100644 --- a/lib/core/CJsonLogLayout.cc +++ b/lib/core/CJsonLogLayout.cc @@ -76,10 +76,12 @@ void CJsonLogLayout::activateOptions(Pool& /*p*/) { } void CJsonLogLayout::setOption(const LogString& option, const LogString& value) { - if (StringHelper::equalsIgnoreCase(option, LOG4CXX_STR("LOCATIONINFO"), LOG4CXX_STR("locationinfo"))) { + if (StringHelper::equalsIgnoreCase(option, LOG4CXX_STR("LOCATIONINFO"), + LOG4CXX_STR("locationinfo"))) { this->locationInfo(OptionConverter::toBoolean(value, false)); } - if (StringHelper::equalsIgnoreCase(option, LOG4CXX_STR("PROPERTIES"), LOG4CXX_STR("properties"))) { + if (StringHelper::equalsIgnoreCase(option, LOG4CXX_STR("PROPERTIES"), + LOG4CXX_STR("properties"))) { this->properties(OptionConverter::toBoolean(value, false)); } } @@ -149,7 +151,8 @@ void CJsonLogLayout::format(LogString& output, const spi::LoggingEventPtr& event writer.String(PROPERTIES_NAME); writer.StartObject(); - for (spi::LoggingEvent::KeySet::const_iterator i = keySet.begin(); i != keySet.end(); ++i) { + for (spi::LoggingEvent::KeySet::const_iterator i = keySet.begin(); + i != keySet.end(); ++i) { const LogString& key = *i; LogString value; if (event->getMDC(key, value)) { @@ -159,7 +162,8 @@ void CJsonLogLayout::format(LogString& output, const spi::LoggingEventPtr& event writer.String(val); } } - for (spi::LoggingEvent::KeySet::const_iterator i = propertySet.begin(); i != propertySet.end(); ++i) { + for (spi::LoggingEvent::KeySet::const_iterator i = propertySet.begin(); + i != propertySet.end(); ++i) { const LogString& key = *i; LogString value; if (event->getProperty(key, value)) { diff --git a/lib/core/CJsonOutputStreamWrapper.cc b/lib/core/CJsonOutputStreamWrapper.cc index c5d6d929f9..d94caccf00 100644 --- a/lib/core/CJsonOutputStreamWrapper.cc +++ b/lib/core/CJsonOutputStreamWrapper.cc @@ -15,7 +15,8 @@ const char CJsonOutputStreamWrapper::JSON_ARRAY_START('['); const char CJsonOutputStreamWrapper::JSON_ARRAY_END(']'); const char CJsonOutputStreamWrapper::JSON_ARRAY_DELIMITER(','); -CJsonOutputStreamWrapper::CJsonOutputStreamWrapper(std::ostream& outStream) : m_ConcurrentOutputStream(outStream), m_FirstObject(true) { +CJsonOutputStreamWrapper::CJsonOutputStreamWrapper(std::ostream& outStream) + : m_ConcurrentOutputStream(outStream), m_FirstObject(true) { // initialize the bufferpool for (size_t i = 0; i < BUFFER_POOL_SIZE; ++i) { m_StringBuffers[i].Reserve(BUFFER_START_SIZE); @@ -29,12 +30,14 @@ CJsonOutputStreamWrapper::~CJsonOutputStreamWrapper() { m_ConcurrentOutputStream([](std::ostream& o) { o.put(JSON_ARRAY_END); }); } -void CJsonOutputStreamWrapper::acquireBuffer(TGenericLineWriter& writer, rapidjson::StringBuffer*& buffer) { +void CJsonOutputStreamWrapper::acquireBuffer(TGenericLineWriter& writer, + rapidjson::StringBuffer*& buffer) { buffer = m_StringBufferQueue.pop(); writer.Reset(*buffer); } -void CJsonOutputStreamWrapper::releaseBuffer(TGenericLineWriter& writer, rapidjson::StringBuffer* buffer) { +void CJsonOutputStreamWrapper::releaseBuffer(TGenericLineWriter& writer, + rapidjson::StringBuffer* buffer) { writer.Flush(); // check for data that has to be written @@ -55,7 +58,8 @@ void CJsonOutputStreamWrapper::releaseBuffer(TGenericLineWriter& writer, rapidjs } } -void CJsonOutputStreamWrapper::flushBuffer(TGenericLineWriter& writer, rapidjson::StringBuffer*& buffer) { +void CJsonOutputStreamWrapper::flushBuffer(TGenericLineWriter& writer, + rapidjson::StringBuffer*& buffer) { writer.Flush(); m_ConcurrentOutputStream([this, buffer](std::ostream& o) { diff --git a/lib/core/CJsonStatePersistInserter.cc b/lib/core/CJsonStatePersistInserter.cc index bfd8bb7689..7b2690f165 100644 --- a/lib/core/CJsonStatePersistInserter.cc +++ b/lib/core/CJsonStatePersistInserter.cc @@ -10,7 +10,8 @@ namespace ml { namespace core { -CJsonStatePersistInserter::CJsonStatePersistInserter(std::ostream& outputStream) : m_WriteStream(outputStream), m_Writer(m_WriteStream) { +CJsonStatePersistInserter::CJsonStatePersistInserter(std::ostream& outputStream) + : m_WriteStream(outputStream), m_Writer(m_WriteStream) { m_Writer.StartObject(); } diff --git a/lib/core/CJsonStateRestoreTraverser.cc b/lib/core/CJsonStateRestoreTraverser.cc index 3f0b1e6522..f4883acb69 100644 --- a/lib/core/CJsonStateRestoreTraverser.cc +++ b/lib/core/CJsonStateRestoreTraverser.cc @@ -19,7 +19,8 @@ const std::string EMPTY_STRING; } CJsonStateRestoreTraverser::CJsonStateRestoreTraverser(std::istream& inputStream) - : m_ReadStream(inputStream), m_Handler(), m_Started(false), m_DesiredLevel(0), m_IsArrayOfObjects(false) { + : m_ReadStream(inputStream), m_Handler(), m_Started(false), + m_DesiredLevel(0), m_IsArrayOfObjects(false) { } bool CJsonStateRestoreTraverser::isEof() const { @@ -38,7 +39,8 @@ bool CJsonStateRestoreTraverser::next() { return false; } - if (this->nextLevel() == m_DesiredLevel || (this->currentLevel() == m_DesiredLevel && this->nextLevel() == m_DesiredLevel + 1)) { + if (this->nextLevel() == m_DesiredLevel || + (this->currentLevel() == m_DesiredLevel && this->nextLevel() == m_DesiredLevel + 1)) { return this->advance(); } @@ -148,8 +150,10 @@ bool CJsonStateRestoreTraverser::ascend() { } void CJsonStateRestoreTraverser::debug() const { - LOG_DEBUG(<< "Current: name = " << this->currentName() << " value = " << this->currentValue() << " level = " << this->currentLevel() - << ", Next: name = " << this->nextName() << " value = " << this->nextValue() << " level = " << this->nextLevel() + LOG_DEBUG(<< "Current: name = " << this->currentName() << " value = " + << this->currentValue() << " level = " << this->currentLevel() + << ", Next: name = " << this->nextName() + << " value = " << this->nextValue() << " level = " << this->nextLevel() << " is array of objects = " << m_IsArrayOfObjects); } @@ -204,9 +208,11 @@ bool CJsonStateRestoreTraverser::skipArray() { m_Handler.s_NextIndex = 1 - m_Handler.s_NextIndex; do { - if (m_Handler.s_Type == SRapidJsonHandler::E_TokenArrayStart || m_Handler.s_Type == SRapidJsonHandler::E_TokenObjectStart) { + if (m_Handler.s_Type == SRapidJsonHandler::E_TokenArrayStart || + m_Handler.s_Type == SRapidJsonHandler::E_TokenObjectStart) { ++depth; - } else if (m_Handler.s_Type == SRapidJsonHandler::E_TokenArrayEnd || m_Handler.s_Type == SRapidJsonHandler::E_TokenObjectEnd) { + } else if (m_Handler.s_Type == SRapidJsonHandler::E_TokenArrayEnd || + m_Handler.s_Type == SRapidJsonHandler::E_TokenObjectEnd) { --depth; } @@ -241,7 +247,8 @@ bool CJsonStateRestoreTraverser::start() { // For Ml state the first token should be the start of a JSON // object, but we don't store it if (m_Handler.s_Type != SRapidJsonHandler::E_TokenObjectStart) { - if (m_IsArrayOfObjects && m_Handler.s_Type == SRapidJsonHandler::E_TokenArrayEnd && this->isEof()) { + if (m_IsArrayOfObjects && + m_Handler.s_Type == SRapidJsonHandler::E_TokenArrayEnd && this->isEof()) { LOG_DEBUG(<< "JSON document is an empty array"); return false; } @@ -278,7 +285,8 @@ bool CJsonStateRestoreTraverser::advance() { void CJsonStateRestoreTraverser::logError() { const char* error(rapidjson::GetParseError_En(m_Reader.GetParseErrorCode())); - LOG_ERROR(<< "Error parsing JSON at offset " << m_Reader.GetErrorOffset() << ": " << ((error != nullptr) ? error : "No message")); + LOG_ERROR(<< "Error parsing JSON at offset " << m_Reader.GetErrorOffset() + << ": " << ((error != nullptr) ? error : "No message")); this->setBadState(); } @@ -348,11 +356,15 @@ bool CJsonStateRestoreTraverser::SRapidJsonHandler::Double(double d) { return true; } -bool CJsonStateRestoreTraverser::SRapidJsonHandler::RawNumber(const char*, rapidjson::SizeType, bool) { +bool CJsonStateRestoreTraverser::SRapidJsonHandler::RawNumber(const char*, + rapidjson::SizeType, + bool) { return false; } -bool CJsonStateRestoreTraverser::SRapidJsonHandler::String(const char* str, rapidjson::SizeType length, bool) { +bool CJsonStateRestoreTraverser::SRapidJsonHandler::String(const char* str, + rapidjson::SizeType length, + bool) { s_Type = E_TokenString; if (s_RememberValue) { s_Value[s_NextIndex].assign(str, length); @@ -370,7 +382,9 @@ bool CJsonStateRestoreTraverser::SRapidJsonHandler::StartObject() { return true; } -bool CJsonStateRestoreTraverser::SRapidJsonHandler::Key(const char* str, rapidjson::SizeType length, bool) { +bool CJsonStateRestoreTraverser::SRapidJsonHandler::Key(const char* str, + rapidjson::SizeType length, + bool) { s_Type = E_TokenKey; if (s_RememberValue) { s_NextIndex = 1 - s_NextIndex; diff --git a/lib/core/CLogger.cc b/lib/core/CLogger.cc index 18a7c5bad4..9e8fc20ba5 100644 --- a/lib/core/CLogger.cc +++ b/lib/core/CLogger.cc @@ -48,7 +48,9 @@ const ml::core::CLogger& DO_NOT_USE_THIS_VARIABLE = ml::core::CLogger::instance( namespace ml { namespace core { -CLogger::CLogger() : m_Logger(0), m_Reconfigured(false), m_ProgramName(CProgName::progName()), m_OrigStderrFd(-1) { +CLogger::CLogger() + : m_Logger(0), m_Reconfigured(false), m_ProgramName(CProgName::progName()), + m_OrigStderrFd(-1) { CCrashHandler::installCrashHandler(); this->reset(); } @@ -101,9 +103,11 @@ void CLogger::reset() { log4cxx::helpers::Properties props; props.put(LOG4CXX_STR("log4j.rootLogger"), LOG4CXX_STR("DEBUG, A1")); - props.put(LOG4CXX_STR("log4j.appender.A1"), LOG4CXX_STR("org.apache.log4j.ConsoleAppender")); + props.put(LOG4CXX_STR("log4j.appender.A1"), + LOG4CXX_STR("org.apache.log4j.ConsoleAppender")); props.put(LOG4CXX_STR("log4j.appender.A1.Target"), LOG4CXX_STR("System.err")); - props.put(LOG4CXX_STR("log4j.appender.A1.layout"), LOG4CXX_STR("org.apache.log4j.PatternLayout")); + props.put(LOG4CXX_STR("log4j.appender.A1.layout"), + LOG4CXX_STR("org.apache.log4j.PatternLayout")); // The pattern includes the process ID to make it easier to see if a // process dies and restarts @@ -207,12 +211,14 @@ bool CLogger::setLoggingLevel(ELevel level) { // change will have no effect. Therefore, we adjust all appender thresholds // here as well for appenders that write to a file or the console. log4cxx::AppenderList appendersToChange(loggerToChange->getAllAppenders()); - for (log4cxx::AppenderList::iterator iter = appendersToChange.begin(); iter != appendersToChange.end(); ++iter) { + for (log4cxx::AppenderList::iterator iter = appendersToChange.begin(); + iter != appendersToChange.end(); ++iter) { log4cxx::Appender* appenderToChange(*iter); // Unfortunately, thresholds are a concept lower down the inheritance // hierarchy than the Appender base class, so we have to downcast. - log4cxx::WriterAppender* writerToChange(dynamic_cast(appenderToChange)); + log4cxx::WriterAppender* writerToChange( + dynamic_cast(appenderToChange)); if (writerToChange != nullptr) { writerToChange->setThreshold(levelToSet); } @@ -240,7 +246,8 @@ bool CLogger::reconfigureLogToNamedPipe(const std::string& pipeName) { m_PipeFile = CNamedPipeFactory::openPipeFileWrite(pipeName); if (m_PipeFile == nullptr) { - LOG_ERROR(<< "Cannot log to named pipe " << pipeName << " as it could not be opened for writing"); + LOG_ERROR(<< "Cannot log to named pipe " << pipeName + << " as it could not be opened for writing"); return false; } @@ -263,9 +270,11 @@ bool CLogger::reconfigureLogJson() { log4cxx::LogString logStr; log4cxx::helpers::Transcoder::decode(m_ProgramName, logStr); props.put(LOG4CXX_STR("log4j.logger.") + logStr, LOG4CXX_STR("DEBUG, A2")); - props.put(LOG4CXX_STR("log4j.appender.A2"), LOG4CXX_STR("org.apache.log4j.ConsoleAppender")); + props.put(LOG4CXX_STR("log4j.appender.A2"), + LOG4CXX_STR("org.apache.log4j.ConsoleAppender")); props.put(LOG4CXX_STR("log4j.appender.A2.Target"), LOG4CXX_STR("System.err")); - props.put(LOG4CXX_STR("log4j.appender.A2.layout"), LOG4CXX_STR("org.apache.log4j.CJsonLogLayout")); + props.put(LOG4CXX_STR("log4j.appender.A2.layout"), + LOG4CXX_STR("org.apache.log4j.CJsonLogLayout")); return this->reconfigureFromProps(props); } @@ -273,7 +282,8 @@ bool CLogger::reconfigureLogJson() { bool CLogger::reconfigureFromFile(const std::string& propertiesFile) { COsFileFuncs::TStat statBuf; if (COsFileFuncs::stat(propertiesFile.c_str(), &statBuf) != 0) { - LOG_ERROR(<< "Unable to access properties file " << propertiesFile << " for logger re-initialisation: " << ::strerror(errno)); + LOG_ERROR(<< "Unable to access properties file " << propertiesFile + << " for logger re-initialisation: " << ::strerror(errno)); return false; } @@ -283,10 +293,12 @@ bool CLogger::reconfigureFromFile(const std::string& propertiesFile) { log4cxx::helpers::Properties props; try { // InputStreamPtr is a smart pointer - log4cxx::helpers::InputStreamPtr inputStream(new log4cxx::helpers::FileInputStream(propertiesFile)); + log4cxx::helpers::InputStreamPtr inputStream( + new log4cxx::helpers::FileInputStream(propertiesFile)); props.load(inputStream); } catch (const log4cxx::helpers::Exception& e) { - LOG_ERROR(<< "Unable to read from properties file " << propertiesFile << " for logger re-initialisation: " << e.what()); + LOG_ERROR(<< "Unable to read from properties file " << propertiesFile + << " for logger re-initialisation: " << e.what()); return false; } @@ -382,10 +394,13 @@ void CLogger::massageProperties(log4cxx::helpers::Properties& props) const { } } -void CLogger::massageString(const TLogCharLogStrMap& mappings, const log4cxx::LogString& oldStr, log4cxx::LogString& newStr) const { +void CLogger::massageString(const TLogCharLogStrMap& mappings, + const log4cxx::LogString& oldStr, + log4cxx::LogString& newStr) const { newStr.clear(); - for (log4cxx::LogString::const_iterator iter = oldStr.begin(); iter != oldStr.end(); ++iter) { + for (log4cxx::LogString::const_iterator iter = oldStr.begin(); + iter != oldStr.end(); ++iter) { // We ONLY want to replace the patterns in our map - other patterns are // left for log4cxx itself if (*iter == static_cast('%')) { diff --git a/lib/core/CMemoryUsage.cc b/lib/core/CMemoryUsage.cc index 15016671b1..acf7a73555 100644 --- a/lib/core/CMemoryUsage.cc +++ b/lib/core/CMemoryUsage.cc @@ -20,9 +20,12 @@ namespace memory_detail { //! their description class CMemoryUsageComparison : public std::unary_function { public: - explicit CMemoryUsageComparison(const std::string& baseline) : m_Baseline(baseline) {} + explicit CMemoryUsageComparison(const std::string& baseline) + : m_Baseline(baseline) {} - bool operator()(const CMemoryUsage* rhs) { return m_Baseline == rhs->m_Description.s_Name; } + bool operator()(const CMemoryUsage* rhs) { + return m_Baseline == rhs->m_Description.s_Name; + } private: std::string m_Baseline; @@ -30,12 +33,15 @@ class CMemoryUsageComparison : public std::unary_function { //! Comparison function class to compare CMemoryUsage objects by //! their description, but ignoring the first in the collection -class CMemoryUsageComparisonTwo : public std::binary_function { +class CMemoryUsageComparisonTwo + : public std::binary_function { public: explicit CMemoryUsageComparisonTwo(const std::string& baseline, const CMemoryUsage* firstItem) : m_Baseline(baseline), m_FirstItem(firstItem) {} - bool operator()(const CMemoryUsage* rhs) { return (rhs != m_FirstItem) && (m_Baseline == rhs->m_Description.s_Name); } + bool operator()(const CMemoryUsage* rhs) { + return (rhs != m_FirstItem) && (m_Baseline == rhs->m_Description.s_Name); + } private: std::string m_Baseline; @@ -148,7 +154,8 @@ void CMemoryUsage::compress() { TStrSizeMap itemsByName; for (TMemoryUsagePtrListCItr i = m_Children.begin(); i != m_Children.end(); ++i) { itemsByName[(*i)->m_Description.s_Name]++; - LOG_TRACE(<< "Item " << (*i)->m_Description.s_Name << " : " << itemsByName[(*i)->m_Description.s_Name]); + LOG_TRACE(<< "Item " << (*i)->m_Description.s_Name << " : " + << itemsByName[(*i)->m_Description.s_Name]); } for (TStrSizeMapCItr i = itemsByName.begin(); i != itemsByName.end(); ++i) { @@ -158,11 +165,13 @@ void CMemoryUsage::compress() { std::size_t counter = 0; memory_detail::CMemoryUsageComparison compareName(i->first); - TMemoryUsagePtrListItr firstChild = std::find_if(m_Children.begin(), m_Children.end(), compareName); + TMemoryUsagePtrListItr firstChild = + std::find_if(m_Children.begin(), m_Children.end(), compareName); memory_detail::CMemoryUsageComparisonTwo comparison(i->first, *firstChild); TMemoryUsagePtrListItr j = m_Children.begin(); - while ((j = std::find_if(j, m_Children.end(), comparison)) != m_Children.end()) { + while ((j = std::find_if(j, m_Children.end(), comparison)) != + m_Children.end()) { LOG_TRACE(<< "Trying to remove " << *j); (*firstChild)->m_Description.s_Memory += (*j)->usage(); (*firstChild)->m_Description.s_Unused += (*j)->unusage(); diff --git a/lib/core/CMonotonicTime_MacOSX.cc b/lib/core/CMonotonicTime_MacOSX.cc index 426ec12862..5147307c5e 100644 --- a/lib/core/CMonotonicTime_MacOSX.cc +++ b/lib/core/CMonotonicTime_MacOSX.cc @@ -12,7 +12,8 @@ namespace ml { namespace core { -CMonotonicTime::CMonotonicTime() : m_ScalingFactor1(1), m_ScalingFactor2(1000000), m_ScalingFactor3(1) { +CMonotonicTime::CMonotonicTime() + : m_ScalingFactor1(1), m_ScalingFactor2(1000000), m_ScalingFactor3(1) { mach_timebase_info_data_t info; if (::mach_timebase_info(&info) != 0) { // Assume numerator and denominator for nanoseconds are both 1 (which is diff --git a/lib/core/CMonotonicTime_Windows.cc b/lib/core/CMonotonicTime_Windows.cc index 8216c48c91..7b60cbd206 100644 --- a/lib/core/CMonotonicTime_Windows.cc +++ b/lib/core/CMonotonicTime_Windows.cc @@ -14,8 +14,7 @@ namespace core { CMonotonicTime::CMonotonicTime() : m_ScalingFactor1(0), // Only one variable scaling factor is needed on Windows - m_ScalingFactor2(0), - m_ScalingFactor3(0) { + m_ScalingFactor2(0), m_ScalingFactor3(0) { LARGE_INTEGER largeInt; if (QueryPerformanceFrequency(&largeInt) == FALSE) { LOG_WARN(<< "High frequency performance counters not available"); @@ -55,7 +54,8 @@ uint64_t CMonotonicTime::nanoseconds() const { // Doing the division first here truncates the result to the number of // nanoseconds in a number of full seconds - uint64_t fullSecondNanoseconds((static_cast(largeInt.QuadPart) / m_ScalingFactor1) * 1000000000ULL); + uint64_t fullSecondNanoseconds( + (static_cast(largeInt.QuadPart) / m_ScalingFactor1) * 1000000000ULL); // This is the number of ticks over and above the last full second uint64_t remainder(static_cast(largeInt.QuadPart) % m_ScalingFactor1); diff --git a/lib/core/CNamedPipeFactory.cc b/lib/core/CNamedPipeFactory.cc index 178cb3292a..01f4feac2e 100644 --- a/lib/core/CNamedPipeFactory.cc +++ b/lib/core/CNamedPipeFactory.cc @@ -112,8 +112,10 @@ CNamedPipeFactory::TIStreamP CNamedPipeFactory::openPipeStreamRead(const std::st if (fd == -1) { return TIStreamP(); } - using TFileDescriptorSourceStream = boost::iostreams::stream; - return TIStreamP(new TFileDescriptorSourceStream(boost::iostreams::file_descriptor_source(fd, boost::iostreams::close_handle))); + using TFileDescriptorSourceStream = + boost::iostreams::stream; + return TIStreamP(new TFileDescriptorSourceStream( + boost::iostreams::file_descriptor_source(fd, boost::iostreams::close_handle))); } CNamedPipeFactory::TOStreamP CNamedPipeFactory::openPipeStreamWrite(const std::string& fileName) { @@ -121,8 +123,10 @@ CNamedPipeFactory::TOStreamP CNamedPipeFactory::openPipeStreamWrite(const std::s if (fd == -1) { return TOStreamP(); } - using TRetryingFileDescriptorSinkStream = boost::iostreams::stream; - return TOStreamP(new TRetryingFileDescriptorSinkStream(CRetryingFileDescriptorSink(fd, boost::iostreams::close_handle))); + using TRetryingFileDescriptorSinkStream = + boost::iostreams::stream; + return TOStreamP(new TRetryingFileDescriptorSinkStream( + CRetryingFileDescriptorSink(fd, boost::iostreams::close_handle))); } CNamedPipeFactory::TFileP CNamedPipeFactory::openPipeFileRead(const std::string& fileName) { @@ -169,7 +173,8 @@ std::string CNamedPipeFactory::defaultPath() { return path; } -CNamedPipeFactory::TPipeHandle CNamedPipeFactory::initPipeHandle(const std::string& fileName, bool forWrite) { +CNamedPipeFactory::TPipeHandle +CNamedPipeFactory::initPipeHandle(const std::string& fileName, bool forWrite) { if (!SIGPIPE_IGNORED) { LOG_WARN(<< "Failed to ignore SIGPIPE - this process will not terminate " "gracefully if a process it is writing to via a named pipe dies"); @@ -187,15 +192,18 @@ CNamedPipeFactory::TPipeHandle CNamedPipeFactory::initPipeHandle(const std::stri "of this name already exists, but it is not a FIFO"); return -1; } - if ((statbuf.st_mode & (S_IRGRP | S_IWGRP | S_IXGRP | S_IROTH | S_IWOTH | S_IXOTH)) != 0) { - LOG_ERROR(<< "Will not use pre-existing named pipe " << fileName << " - it has permissions that are too open"); + if ((statbuf.st_mode & + (S_IRGRP | S_IWGRP | S_IXGRP | S_IROTH | S_IWOTH | S_IXOTH)) != 0) { + LOG_ERROR(<< "Will not use pre-existing named pipe " << fileName + << " - it has permissions that are too open"); return -1; } } else { // The file didn't exist, so create a new FIFO for it, with permissions // for the current user only if (::mkfifo(fileName.c_str(), S_IRUSR | S_IWUSR) == -1) { - LOG_ERROR(<< "Unable to create named pipe " << fileName << ": " << ::strerror(errno)); + LOG_ERROR(<< "Unable to create named pipe " << fileName << ": " + << ::strerror(errno)); return -1; } madeFifo = true; @@ -203,15 +211,19 @@ CNamedPipeFactory::TPipeHandle CNamedPipeFactory::initPipeHandle(const std::stri // The open call here will block if there is no other connection to the // named pipe - int fd = COsFileFuncs::open(fileName.c_str(), forWrite ? COsFileFuncs::WRONLY : COsFileFuncs::RDONLY); + int fd = COsFileFuncs::open(fileName.c_str(), forWrite ? COsFileFuncs::WRONLY + : COsFileFuncs::RDONLY); if (fd == -1) { - LOG_ERROR(<< "Unable to open named pipe " << fileName << (forWrite ? " for writing: " : " for reading: ") << ::strerror(errno)); + LOG_ERROR(<< "Unable to open named pipe " << fileName + << (forWrite ? " for writing: " : " for reading: ") + << ::strerror(errno)); } else { // Write a test character to the pipe - this is really only necessary on // Windows, but doing it on *nix too will mean the inability of the Java // code to tolerate the test character will be discovered sooner. if (forWrite && COsFileFuncs::write(fd, &TEST_CHAR, sizeof(TEST_CHAR)) <= 0) { - LOG_ERROR(<< "Unable to test named pipe " << fileName << ": " << ::strerror(errno)); + LOG_ERROR(<< "Unable to test named pipe " << fileName << ": " + << ::strerror(errno)); COsFileFuncs::close(fd); fd = -1; } diff --git a/lib/core/CNamedPipeFactory_Windows.cc b/lib/core/CNamedPipeFactory_Windows.cc index 760942a292..a41f7d717b 100644 --- a/lib/core/CNamedPipeFactory_Windows.cc +++ b/lib/core/CNamedPipeFactory_Windows.cc @@ -39,8 +39,10 @@ CNamedPipeFactory::TIStreamP CNamedPipeFactory::openPipeStreamRead(const std::st if (handle == INVALID_HANDLE_VALUE) { return TIStreamP(); } - using TFileDescriptorSourceStream = boost::iostreams::stream; - return TIStreamP(new TFileDescriptorSourceStream(boost::iostreams::file_descriptor_source(handle, boost::iostreams::close_handle))); + using TFileDescriptorSourceStream = + boost::iostreams::stream; + return TIStreamP(new TFileDescriptorSourceStream(boost::iostreams::file_descriptor_source( + handle, boost::iostreams::close_handle))); } CNamedPipeFactory::TOStreamP CNamedPipeFactory::openPipeStreamWrite(const std::string& fileName) { @@ -48,8 +50,10 @@ CNamedPipeFactory::TOStreamP CNamedPipeFactory::openPipeStreamWrite(const std::s if (handle == INVALID_HANDLE_VALUE) { return TOStreamP(); } - using TFileDescriptorSinkStream = boost::iostreams::stream; - return TOStreamP(new TFileDescriptorSinkStream(boost::iostreams::file_descriptor_sink(handle, boost::iostreams::close_handle))); + using TFileDescriptorSinkStream = + boost::iostreams::stream; + return TOStreamP(new TFileDescriptorSinkStream( + boost::iostreams::file_descriptor_sink(handle, boost::iostreams::close_handle))); } CNamedPipeFactory::TFileP CNamedPipeFactory::openPipeFileRead(const std::string& fileName) { @@ -57,7 +61,8 @@ CNamedPipeFactory::TFileP CNamedPipeFactory::openPipeFileRead(const std::string& if (handle == INVALID_HANDLE_VALUE) { return TFileP(); } - return TFileP(::fdopen(::_open_osfhandle(reinterpret_cast(handle), _O_RDONLY), "rb"), safeFClose); + return TFileP(::fdopen(::_open_osfhandle(reinterpret_cast(handle), _O_RDONLY), "rb"), + safeFClose); } CNamedPipeFactory::TFileP CNamedPipeFactory::openPipeFileWrite(const std::string& fileName) { @@ -65,18 +70,21 @@ CNamedPipeFactory::TFileP CNamedPipeFactory::openPipeFileWrite(const std::string if (handle == INVALID_HANDLE_VALUE) { return TFileP(); } - return TFileP(::fdopen(::_open_osfhandle(reinterpret_cast(handle), 0), "wb"), safeFClose); + return TFileP(::fdopen(::_open_osfhandle(reinterpret_cast(handle), 0), "wb"), + safeFClose); } bool CNamedPipeFactory::isNamedPipe(const std::string& fileName) { - return fileName.length() > PIPE_PREFIX.length() && fileName.compare(0, PIPE_PREFIX.length(), PIPE_PREFIX) == 0; + return fileName.length() > PIPE_PREFIX.length() && + fileName.compare(0, PIPE_PREFIX.length(), PIPE_PREFIX) == 0; } std::string CNamedPipeFactory::defaultPath() { return PIPE_PREFIX; } -CNamedPipeFactory::TPipeHandle CNamedPipeFactory::initPipeHandle(const std::string& fileName, bool forWrite) { +CNamedPipeFactory::TPipeHandle +CNamedPipeFactory::initPipeHandle(const std::string& fileName, bool forWrite) { // Size of named pipe buffer static const DWORD BUFFER_SIZE(4096); @@ -87,11 +95,8 @@ CNamedPipeFactory::TPipeHandle CNamedPipeFactory::initPipeHandle(const std::stri // the Java security manager problem forWrite ? PIPE_ACCESS_OUTBOUND : PIPE_ACCESS_DUPLEX, PIPE_TYPE_BYTE | PIPE_WAIT | PIPE_REJECT_REMOTE_CLIENTS, - 1, - forWrite ? BUFFER_SIZE : 1, - forWrite ? 1 : BUFFER_SIZE, - NMPWAIT_USE_DEFAULT_WAIT, - 0)); + 1, forWrite ? BUFFER_SIZE : 1, forWrite ? 1 : BUFFER_SIZE, + NMPWAIT_USE_DEFAULT_WAIT, 0)); if (handle == INVALID_HANDLE_VALUE) { LOG_ERROR(<< "Unable to create named pipe " << fileName << ": " << CWindowsError()); return INVALID_HANDLE_VALUE; @@ -130,7 +135,8 @@ CNamedPipeFactory::TPipeHandle CNamedPipeFactory::initPipeHandle(const std::stri // there was no need to connect it again - not a problem DWORD errCode(GetLastError()); if (errCode != ERROR_PIPE_CONNECTED) { - LOG_ERROR(<< "Unable to connect named pipe " << fileName << ": " << CWindowsError(errCode)); + LOG_ERROR(<< "Unable to connect named pipe " << fileName << ": " + << CWindowsError(errCode)); // Close the pipe (even though it was successfully opened) so // that the net effect of this failed call is nothing CloseHandle(handle); @@ -145,7 +151,8 @@ CNamedPipeFactory::TPipeHandle CNamedPipeFactory::initPipeHandle(const std::stri // relies on the Java side of all connections tolerating an initial // blank line) DWORD bytesWritten(0); - if (WriteFile(handle, &TEST_CHAR, sizeof(TEST_CHAR), &bytesWritten, 0) == FALSE || bytesWritten == 0) { + if (WriteFile(handle, &TEST_CHAR, sizeof(TEST_CHAR), &bytesWritten, 0) == FALSE || + bytesWritten == 0) { DisconnectNamedPipe(handle); sufferedShortLivedConnection = true; } else { diff --git a/lib/core/COsFileFuncs_Windows.cc b/lib/core/COsFileFuncs_Windows.cc index 6e297ab459..04c99ef89a 100644 --- a/lib/core/COsFileFuncs_Windows.cc +++ b/lib/core/COsFileFuncs_Windows.cc @@ -104,8 +104,9 @@ int COsFileFuncs::open(const char* path, int oflag, TMode pmode) { attributes = FILE_ATTRIBUTE_READONLY; } - HANDLE handle = - CreateFile(path, desiredAccess, FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE, 0, creationDisposition, attributes, 0); + HANDLE handle = CreateFile(path, desiredAccess, + FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE, + 0, creationDisposition, attributes, 0); if (handle == INVALID_HANDLE_VALUE) { switch (GetLastError()) { case ERROR_FILE_NOT_FOUND: @@ -202,7 +203,8 @@ int COsFileFuncs::fstat(int fildes, TStat* buf) { return -1; } - buf->st_ino = static_cast(info.nFileIndexLow) | (static_cast(info.nFileIndexHigh) << 32); + buf->st_ino = static_cast(info.nFileIndexLow) | + (static_cast(info.nFileIndexHigh) << 32); return 0; } @@ -237,10 +239,7 @@ int COsFileFuncs::stat(const char* path, TStat* buf) { HANDLE handle = CreateFile(path, 0, // Open for neither read nor write FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE, - 0, - OPEN_EXISTING, - FILE_ATTRIBUTE_NORMAL, - 0); + 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0); if (handle == INVALID_HANDLE_VALUE) { errno = EACCES; return -1; @@ -254,7 +253,8 @@ int COsFileFuncs::stat(const char* path, TStat* buf) { return -1; } - buf->st_ino = static_cast(info.nFileIndexLow) | (static_cast(info.nFileIndexHigh) << 32); + buf->st_ino = static_cast(info.nFileIndexLow) | + (static_cast(info.nFileIndexHigh) << 32); CloseHandle(handle); @@ -266,7 +266,8 @@ int COsFileFuncs::lstat(const char* path, TStat* buf) { // case where the path points at a symlink, so often we can simply call // stat() WIN32_FILE_ATTRIBUTE_DATA attributes = {0}; - if (path == nullptr || buf == nullptr || GetFileAttributesEx(path, GetFileExInfoStandard, &attributes) == FALSE || + if (path == nullptr || buf == nullptr || + GetFileAttributesEx(path, GetFileExInfoStandard, &attributes) == FALSE || (attributes.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) == 0) { return COsFileFuncs::stat(path, buf); } diff --git a/lib/core/CPatternSet.cc b/lib/core/CPatternSet.cc index e92af04b49..c210825fe1 100644 --- a/lib/core/CPatternSet.cc +++ b/lib/core/CPatternSet.cc @@ -21,7 +21,9 @@ namespace { const char WILDCARD = '*'; } -CPatternSet::CPatternSet() : m_FullMatchPatterns(), m_PrefixPatterns(), m_SuffixPatterns(), m_ContainsPatterns() { +CPatternSet::CPatternSet() + : m_FullMatchPatterns(), m_PrefixPatterns(), m_SuffixPatterns(), + m_ContainsPatterns() { } bool CPatternSet::initFromJson(const std::string& json) { @@ -44,7 +46,8 @@ bool CPatternSet::initFromJson(const std::string& json) { for (unsigned int i = 0; i < doc.Size(); ++i) { if (!doc[i].IsString()) { - LOG_ERROR(<< "Could not parse pattern set: unexpected non-string item in JSON: " << json); + LOG_ERROR(<< "Could not parse pattern set: unexpected non-string item in JSON: " + << json); this->clear(); return false; } @@ -72,7 +75,9 @@ bool CPatternSet::initFromJson(const std::string& json) { this->sortAndPruneDuplicates(prefixPatterns); this->sortAndPruneDuplicates(suffixPatterns); this->sortAndPruneDuplicates(containsPatterns); - return m_FullMatchPatterns.build(fullPatterns) && m_PrefixPatterns.build(prefixPatterns) && m_SuffixPatterns.build(suffixPatterns) && + return m_FullMatchPatterns.build(fullPatterns) && + m_PrefixPatterns.build(prefixPatterns) && + m_SuffixPatterns.build(suffixPatterns) && m_ContainsPatterns.build(containsPatterns); } diff --git a/lib/core/CProcess.cc b/lib/core/CProcess.cc index 161cb368d9..cc62dc6eaf 100644 --- a/lib/core/CProcess.cc +++ b/lib/core/CProcess.cc @@ -18,7 +18,9 @@ const char* CProcess::STARTED_MSG("Process Started."); const char* CProcess::STOPPING_MSG("Process Shutting Down."); const char* CProcess::STOPPED_MSG("Process Exiting."); -CProcess::CProcess() : m_IsService(false), m_Initialised(false), m_Running(false), m_MlMainFunc(nullptr) { +CProcess::CProcess() + : m_IsService(false), m_Initialised(false), m_Running(false), + m_MlMainFunc(nullptr) { } CProcess& CProcess::instance() { diff --git a/lib/core/CProcessPriority_Linux.cc b/lib/core/CProcessPriority_Linux.cc index faed1b209b..43ed8e84e9 100644 --- a/lib/core/CProcessPriority_Linux.cc +++ b/lib/core/CProcessPriority_Linux.cc @@ -43,7 +43,8 @@ void increaseOomKillerAdj() { // oom_adj is on a scale of -16 to 15. // In both cases higher numbers mean the process is more likely to be killed // in low memory situations. - if (writeToSystemFile("/proc/self/oom_score_adj", "667\n") == false && writeToSystemFile("/proc/self/oom_adj", "10\n") == false) { + if (writeToSystemFile("/proc/self/oom_score_adj", "667\n") == false && + writeToSystemFile("/proc/self/oom_adj", "10\n") == false) { LOG_WARN(<< "Could not increase OOM killer adjustment using " "/proc/self/oom_score_adj or /proc/self/oom_adj: " << ::strerror(errno)); diff --git a/lib/core/CProcess_Windows.cc b/lib/core/CProcess_Windows.cc index 8349f0a644..019a068311 100644 --- a/lib/core/CProcess_Windows.cc +++ b/lib/core/CProcess_Windows.cc @@ -67,7 +67,9 @@ const char* CProcess::STARTED_MSG("Process Started."); const char* CProcess::STOPPING_MSG("Process Shutting Down."); const char* CProcess::STOPPED_MSG("Process Exiting."); -CProcess::CProcess() : m_IsService(false), m_Initialised(false), m_Running(false), m_MlMainFunc(0), m_ServiceHandle(0) { +CProcess::CProcess() + : m_IsService(false), m_Initialised(false), m_Running(false), + m_MlMainFunc(0), m_ServiceHandle(0) { } CProcess& CProcess::instance() { @@ -211,7 +213,8 @@ void WINAPI CProcess::serviceMain(DWORD argc, char* argv[]) { size_t index(0); TScopedCharPArray mergedArgV(new char*[mergedArgC]); - for (TStrVecCItr iter = process.m_Args.begin(); iter != process.m_Args.end(); ++iter) { + for (TStrVecCItr iter = process.m_Args.begin(); + iter != process.m_Args.end(); ++iter) { mergedArgV[index++] = const_cast(iter->c_str()); } @@ -237,7 +240,8 @@ void WINAPI CProcess::serviceMain(DWORD argc, char* argv[]) { serviceStatus.dwServiceType = SERVICE_WIN32_OWN_PROCESS; serviceStatus.dwControlsAccepted = 0; - serviceStatus.dwWin32ExitCode = (ret == EXIT_SUCCESS ? NO_ERROR : ERROR_SERVICE_SPECIFIC_ERROR); + serviceStatus.dwWin32ExitCode = + (ret == EXIT_SUCCESS ? NO_ERROR : ERROR_SERVICE_SPECIFIC_ERROR); serviceStatus.dwServiceSpecificExitCode = static_cast(ret); serviceStatus.dwCheckPoint = 0; serviceStatus.dwWaitHint = 0; diff --git a/lib/core/CRapidJsonConcurrentLineWriter.cc b/lib/core/CRapidJsonConcurrentLineWriter.cc index 0eef07b285..9bfc52eeff 100644 --- a/lib/core/CRapidJsonConcurrentLineWriter.cc +++ b/lib/core/CRapidJsonConcurrentLineWriter.cc @@ -9,7 +9,8 @@ namespace ml { namespace core { -CRapidJsonConcurrentLineWriter::CRapidJsonConcurrentLineWriter(CJsonOutputStreamWrapper& outStream) : m_OutputStreamWrapper(outStream) { +CRapidJsonConcurrentLineWriter::CRapidJsonConcurrentLineWriter(CJsonOutputStreamWrapper& outStream) + : m_OutputStreamWrapper(outStream) { m_OutputStreamWrapper.acquireBuffer(*this, m_StringBuffer); } diff --git a/lib/core/CRapidXmlParser.cc b/lib/core/CRapidXmlParser.cc index f869c483f3..e491894f1e 100644 --- a/lib/core/CRapidXmlParser.cc +++ b/lib/core/CRapidXmlParser.cc @@ -26,7 +26,8 @@ CRapidXmlParser::~CRapidXmlParser() { } bool CRapidXmlParser::parseString(const std::string& xml) { - return this->parseBufferNonDestructive(xml.c_str(), xml.length()); + return this->parseBufferNonDestructive( + xml.c_str(), xml.length()); } bool CRapidXmlParser::parseBuffer(const char* begin, size_t length) { @@ -38,8 +39,8 @@ bool CRapidXmlParser::parseBufferInSitu(char* begin, size_t length) { } bool CRapidXmlParser::parseStringIgnoreCdata(const std::string& xml) { - return this->parseBufferNonDestructive(xml.c_str(), - xml.length()); + return this->parseBufferNonDestructive( + xml.c_str(), xml.length()); } std::string CRapidXmlParser::rootElementName() const { @@ -61,12 +62,14 @@ bool CRapidXmlParser::rootElementAttributes(TStrStrMap& rootAttributes) const { return false; } - for (const TCharRapidXmlAttribute* attr = root->first_attribute(); attr != nullptr; attr = attr->next_attribute()) { + for (const TCharRapidXmlAttribute* attr = root->first_attribute(); + attr != nullptr; attr = attr->next_attribute()) { // NB: where there are multiple attributes with the same name this keeps // the last one (only keeping one attribute with a given name is a // limitation throughout our XML encapsulation classes, but it // hasn't been a problem in practice to date) - rootAttributes[std::string(attr->name(), attr->name_size())].assign(attr->value(), attr->value_size()); + rootAttributes[std::string(attr->name(), attr->name_size())].assign( + attr->value(), attr->value_size()); } return true; @@ -88,7 +91,8 @@ bool CRapidXmlParser::toNodeHierarchy(CXmlNodeWithChildren::TXmlNodeWithChildren return this->toNodeHierarchy(pool, rootNodePtr); } -bool CRapidXmlParser::toNodeHierarchy(CStringCache& cache, CXmlNodeWithChildren::TXmlNodeWithChildrenP& rootNodePtr) const { +bool CRapidXmlParser::toNodeHierarchy(CStringCache& cache, + CXmlNodeWithChildren::TXmlNodeWithChildrenP& rootNodePtr) const { // Because both the pool and the nodes use shared pointers, it doesn't // matter if the pool that originally allocates the nodes is destroyed // before the nodes themselves. Hence we can get away with implementing @@ -98,7 +102,8 @@ bool CRapidXmlParser::toNodeHierarchy(CStringCache& cache, CXmlNodeWithChildren: return this->toNodeHierarchy(pool, cache, rootNodePtr); } -bool CRapidXmlParser::toNodeHierarchy(CXmlNodeWithChildrenPool& pool, CXmlNodeWithChildren::TXmlNodeWithChildrenP& rootNodePtr) const { +bool CRapidXmlParser::toNodeHierarchy(CXmlNodeWithChildrenPool& pool, + CXmlNodeWithChildren::TXmlNodeWithChildrenP& rootNodePtr) const { rootNodePtr.reset(); const TCharRapidXmlNode* root(m_Doc.first_node()); @@ -175,7 +180,8 @@ bool CRapidXmlParser::toNodeHierarchy(const TCharRapidXmlNode& parentNode, // Take advantage of friendship to add attributes directly to the parent // node attr = parentNode.first_attribute(); - for (CXmlNode::TStrStrPrVecItr iter = nodePtr->m_Attributes.begin(); iter != nodePtr->m_Attributes.end(); ++iter) { + for (CXmlNode::TStrStrPrVecItr iter = nodePtr->m_Attributes.begin(); + iter != nodePtr->m_Attributes.end(); ++iter) { // Here we take advantage of friendship to directly modify the // CXmlNode's attributes map, thus avoiding the need to build a // separate map and then copy it @@ -325,18 +331,21 @@ void CRapidXmlParser::convert(bool indent, const CXmlNodeWithChildren& root, std // Root node TCharRapidXmlNode* rootNode(doc.allocate_node( - rapidxml::node_element, root.name().c_str(), root.value().empty() ? nullptr : root.value().c_str(), nameLen, valueLen)); + rapidxml::node_element, root.name().c_str(), + root.value().empty() ? nullptr : root.value().c_str(), nameLen, valueLen)); doc.append_node(rootNode); const CXmlNode::TStrStrPrVec& attrs = root.attributes(); - for (CXmlNode::TStrStrPrVecCItr attrIter = attrs.begin(); attrIter != attrs.end(); ++attrIter) { + for (CXmlNode::TStrStrPrVecCItr attrIter = attrs.begin(); + attrIter != attrs.end(); ++attrIter) { nameLen = attrIter->first.length(); valueLen = attrIter->second.length(); approxLen += 5 + nameLen + valueLen; TCharRapidXmlAttribute* attr(doc.allocate_attribute( - attrIter->first.c_str(), attrIter->second.empty() ? nullptr : attrIter->second.c_str(), nameLen, valueLen)); + attrIter->first.c_str(), + attrIter->second.empty() ? nullptr : attrIter->second.c_str(), nameLen, valueLen)); rootNode->append_attribute(attr); } @@ -362,33 +371,36 @@ void CRapidXmlParser::convertChildren(const CXmlNodeWithChildren& current, // If a node has both children and a value, RapidXML requires that we add a // data node containing the value if (xmlNode.value_size() > 0 && !childVec.empty()) { - TCharRapidXmlNode* dataNode(doc.allocate_node(rapidxml::node_data, nullptr, xmlNode.value(), 0, xmlNode.value_size())); + TCharRapidXmlNode* dataNode(doc.allocate_node( + rapidxml::node_data, nullptr, xmlNode.value(), 0, xmlNode.value_size())); xmlNode.append_node(dataNode); } - for (CXmlNodeWithChildren::TChildNodePVecCItr childIter = childVec.begin(); childIter != childVec.end(); ++childIter) { + for (CXmlNodeWithChildren::TChildNodePVecCItr childIter = childVec.begin(); + childIter != childVec.end(); ++childIter) { const CXmlNodeWithChildren* child = childIter->get(); if (child != nullptr) { size_t nameLen(child->name().length()); size_t valueLen(child->value().length()); approxLen += 10 + nameLen * 2 + valueLen; - TCharRapidXmlNode* childNode(doc.allocate_node(rapidxml::node_element, - child->name().c_str(), - child->value().empty() ? nullptr : child->value().c_str(), - nameLen, - valueLen)); + TCharRapidXmlNode* childNode(doc.allocate_node( + rapidxml::node_element, child->name().c_str(), + child->value().empty() ? nullptr : child->value().c_str(), nameLen, valueLen)); xmlNode.append_node(childNode); const CXmlNode::TStrStrPrVec& attrs = child->attributes(); - for (CXmlNode::TStrStrPrVecCItr attrIter = attrs.begin(); attrIter != attrs.end(); ++attrIter) { + for (CXmlNode::TStrStrPrVecCItr attrIter = attrs.begin(); + attrIter != attrs.end(); ++attrIter) { nameLen = attrIter->first.length(); valueLen = attrIter->second.length(); approxLen += 5 + nameLen + valueLen; TCharRapidXmlAttribute* attr(doc.allocate_attribute( - attrIter->first.c_str(), attrIter->second.empty() ? nullptr : attrIter->second.c_str(), nameLen, valueLen)); + attrIter->first.c_str(), + attrIter->second.empty() ? nullptr : attrIter->second.c_str(), + nameLen, valueLen)); childNode->append_attribute(attr); } diff --git a/lib/core/CRapidXmlStatePersistInserter.cc b/lib/core/CRapidXmlStatePersistInserter.cc index ad663b66e5..204d69a466 100644 --- a/lib/core/CRapidXmlStatePersistInserter.cc +++ b/lib/core/CRapidXmlStatePersistInserter.cc @@ -15,35 +15,41 @@ namespace ml { namespace core { CRapidXmlStatePersistInserter::CRapidXmlStatePersistInserter(const std::string& rootName) - : m_LevelParent(m_Doc.allocate_node(rapidxml::node_element, this->nameFromCache(rootName), nullptr, rootName.length())), + : m_LevelParent(m_Doc.allocate_node(rapidxml::node_element, + this->nameFromCache(rootName), + nullptr, + rootName.length())), m_ApproxLen(12 + rootName.length() * 2) { m_Doc.append_node(m_LevelParent); } -CRapidXmlStatePersistInserter::CRapidXmlStatePersistInserter(const std::string& rootName, const TStrStrMap& rootAttributes) - : m_LevelParent(m_Doc.allocate_node(rapidxml::node_element, this->nameFromCache(rootName), nullptr, rootName.length())), +CRapidXmlStatePersistInserter::CRapidXmlStatePersistInserter(const std::string& rootName, + const TStrStrMap& rootAttributes) + : m_LevelParent(m_Doc.allocate_node(rapidxml::node_element, + this->nameFromCache(rootName), + nullptr, + rootName.length())), m_ApproxLen(12 + rootName.length() * 2) { m_Doc.append_node(m_LevelParent); for (TStrStrMapCItr iter = rootAttributes.begin(); iter != rootAttributes.end(); ++iter) { const std::string& name = iter->first; const std::string& value = iter->second; - m_LevelParent->append_attribute( - m_Doc.allocate_attribute(m_Doc.allocate_string(name.c_str(), name.length()), - value.empty() ? nullptr : m_Doc.allocate_string(value.c_str(), value.length()), - name.length(), - value.length())); + m_LevelParent->append_attribute(m_Doc.allocate_attribute( + m_Doc.allocate_string(name.c_str(), name.length()), + value.empty() ? nullptr : m_Doc.allocate_string(value.c_str(), value.length()), + name.length(), value.length())); m_ApproxLen += 5 + name.length() + value.length(); } } -void CRapidXmlStatePersistInserter::insertValue(const std::string& name, const std::string& value) { - m_LevelParent->append_node(m_Doc.allocate_node(rapidxml::node_element, - this->nameFromCache(name), - value.empty() ? nullptr : m_Doc.allocate_string(value.c_str(), value.length()), - name.length(), - value.length())); +void CRapidXmlStatePersistInserter::insertValue(const std::string& name, + const std::string& value) { + m_LevelParent->append_node(m_Doc.allocate_node( + rapidxml::node_element, this->nameFromCache(name), + value.empty() ? nullptr : m_Doc.allocate_string(value.c_str(), value.length()), + name.length(), value.length())); m_ApproxLen += 5 + name.length() * 2 + value.length(); } @@ -65,7 +71,8 @@ void CRapidXmlStatePersistInserter::toXml(bool indent, std::string& xml) const { } void CRapidXmlStatePersistInserter::newLevel(const std::string& name) { - TCharRapidXmlNode* child(m_Doc.allocate_node(rapidxml::node_element, this->nameFromCache(name), nullptr, name.length())); + TCharRapidXmlNode* child(m_Doc.allocate_node( + rapidxml::node_element, this->nameFromCache(name), nullptr, name.length())); m_LevelParent->append_node(child); m_ApproxLen += 5 + name.length() * 2; diff --git a/lib/core/CRapidXmlStateRestoreTraverser.cc b/lib/core/CRapidXmlStateRestoreTraverser.cc index 80a6570e9a..97f9ac9f7e 100644 --- a/lib/core/CRapidXmlStateRestoreTraverser.cc +++ b/lib/core/CRapidXmlStateRestoreTraverser.cc @@ -11,7 +11,8 @@ namespace ml { namespace core { CRapidXmlStateRestoreTraverser::CRapidXmlStateRestoreTraverser(const CRapidXmlParser& parser) - : m_Parser(parser), m_CurrentNode(m_Parser.m_Doc.first_node()), m_IsNameCacheValid(false), m_IsValueCacheValid(false) { + : m_Parser(parser), m_CurrentNode(m_Parser.m_Doc.first_node()), + m_IsNameCacheValid(false), m_IsValueCacheValid(false) { if (m_CurrentNode != nullptr && m_CurrentNode->type() != rapidxml::node_element) { LOG_ERROR(<< "Node type " << m_CurrentNode->type() << " not supported"); m_CurrentNode = nullptr; @@ -101,8 +102,8 @@ CRapidXmlParser::TCharRapidXmlNode* CRapidXmlStateRestoreTraverser::nextNodeElem return nullptr; } - for (CRapidXmlParser::TCharRapidXmlNode* nextNode = m_CurrentNode->next_sibling(); nextNode != nullptr; - nextNode = nextNode->next_sibling()) { + for (CRapidXmlParser::TCharRapidXmlNode* nextNode = m_CurrentNode->next_sibling(); + nextNode != nullptr; nextNode = nextNode->next_sibling()) { // We ignore comments, CDATA and any other type of node that's not an // element if (nextNode->type() == rapidxml::node_element) { @@ -113,12 +114,14 @@ CRapidXmlParser::TCharRapidXmlNode* CRapidXmlStateRestoreTraverser::nextNodeElem return nullptr; } -CRapidXmlParser::TCharRapidXmlNode* CRapidXmlStateRestoreTraverser::firstChildNodeElement() const { +CRapidXmlParser::TCharRapidXmlNode* +CRapidXmlStateRestoreTraverser::firstChildNodeElement() const { if (m_CurrentNode == nullptr) { return nullptr; } - for (CRapidXmlParser::TCharRapidXmlNode* child = m_CurrentNode->first_node(); child != nullptr; child = child->next_sibling()) { + for (CRapidXmlParser::TCharRapidXmlNode* child = m_CurrentNode->first_node(); + child != nullptr; child = child->next_sibling()) { // We ignore comments, CDATA and any other type of node that's not an // element if (child->type() == rapidxml::node_element) { diff --git a/lib/core/CRegex.cc b/lib/core/CRegex.cc index 848efc2714..a1e07683ad 100644 --- a/lib/core/CRegex.cc +++ b/lib/core/CRegex.cc @@ -86,10 +86,13 @@ bool CRegex::init(const std::string& regex) { m_Regex = boost::regex(regex.c_str()); } catch (boost::regex_error& e) { if (static_cast(e.position()) <= regex.size()) { - LOG_ERROR(<< "Unable to compile regex: '" << regex << "' '" << regex.substr(0, e.position()) << "' '" - << regex.substr(e.position()) << "': " << ::translateErrorCode(e.code())); + LOG_ERROR(<< "Unable to compile regex: '" << regex << "' '" + << regex.substr(0, e.position()) << "' '" + << regex.substr(e.position()) + << "': " << ::translateErrorCode(e.code())); } else { - LOG_ERROR(<< "Unable to compile regex: '" << regex << "': " << ::translateErrorCode(e.code())); + LOG_ERROR(<< "Unable to compile regex: '" << regex + << "': " << ::translateErrorCode(e.code())); } return false; } catch (std::exception& e) { @@ -120,7 +123,8 @@ bool CRegex::tokenise(const std::string& str, CRegex::TStrVec& tokens) const { tokens.push_back(std::string(matches[i].first, matches[i].second)); } } catch (boost::regex_error& e) { - LOG_ERROR(<< "Unable to tokenise using regex: '" << str << "': " << ::translateErrorCode(e.code())); + LOG_ERROR(<< "Unable to tokenise using regex: '" << str + << "': " << ::translateErrorCode(e.code())); return false; } catch (std::exception& e) { LOG_ERROR(<< "Unable to tokenise using regex: " << e.what()); @@ -146,7 +150,8 @@ bool CRegex::split(const std::string& str, CRegex::TStrVec& tokens) const { tokens.push_back(*i++); } } catch (boost::regex_error& e) { - LOG_ERROR(<< "Unable to tokenise using regex: '" << str << "': " << ::translateErrorCode(e.code())); + LOG_ERROR(<< "Unable to tokenise using regex: '" << str + << "': " << ::translateErrorCode(e.code())); return false; } catch (std::exception& e) { LOG_ERROR(<< "Unable to tokenise using regex: " << e.what()); @@ -168,7 +173,8 @@ bool CRegex::matches(const std::string& str) const { return false; } } catch (boost::regex_error& e) { - LOG_ERROR(<< "Unable to match using regex: '" << str << "': " << ::translateErrorCode(e.code())); + LOG_ERROR(<< "Unable to match using regex: '" << str + << "': " << ::translateErrorCode(e.code())); return false; } catch (std::exception& e) { LOG_ERROR(<< "Unable to match using regex: " << e.what()); @@ -190,14 +196,16 @@ bool CRegex::search(size_t startPos, const std::string& str, size_t& position, s try { boost::smatch matches; - if (boost::regex_search(str.begin() + startPos, str.begin() + str.length(), matches, m_Regex) == false) { + if (boost::regex_search(str.begin() + startPos, str.begin() + str.length(), + matches, m_Regex) == false) { return false; } position = matches[0].first - str.begin(); length = matches[0].second - matches[0].first; } catch (boost::regex_error& e) { - LOG_ERROR(<< "Unable to search using regex: '" << str << "': " << ::translateErrorCode(e.code())); + LOG_ERROR(<< "Unable to search using regex: '" << str + << "': " << ::translateErrorCode(e.code())); return false; } catch (std::exception& e) { LOG_ERROR(<< "Unable to match using regex: " << e.what()); @@ -273,11 +281,13 @@ size_t CRegex::literalCount() const { return count; } thisChar = *iter; - if (thisChar != 'd' && thisChar != 's' && thisChar != 'w' && thisChar != 'D' && thisChar != 'S' && thisChar != 'W' && + if (thisChar != 'd' && thisChar != 's' && thisChar != 'w' && + thisChar != 'D' && thisChar != 'S' && thisChar != 'W' && (thisChar < '0' || thisChar > '9')) { if (squareBracketCount == 0 && braceCount == 0) { std::string::iterator nextIter(iter + 1); - if (nextIter == regexStr.end() || (*nextIter != '*' && *nextIter != '+' && *nextIter != '?')) { + if (nextIter == regexStr.end() || + (*nextIter != '*' && *nextIter != '+' && *nextIter != '?')) { if (inSubMatch) { ++subCount; } else { @@ -331,7 +341,8 @@ size_t CRegex::literalCount() const { default: if (squareBracketCount == 0 && braceCount == 0) { std::string::iterator nextIter(iter + 1); - if (nextIter == regexStr.end() || (*nextIter != '*' && *nextIter != '+' && *nextIter != '?')) { + if (nextIter == regexStr.end() || + (*nextIter != '*' && *nextIter != '+' && *nextIter != '?')) { if (inSubMatch) { ++subCount; } else { diff --git a/lib/core/CScopedReadLock.cc b/lib/core/CScopedReadLock.cc index 2f154cf501..8810430079 100644 --- a/lib/core/CScopedReadLock.cc +++ b/lib/core/CScopedReadLock.cc @@ -10,7 +10,8 @@ namespace ml { namespace core { -CScopedReadLock::CScopedReadLock(CReadWriteLock& readWriteLock) : m_ReadWriteLock(readWriteLock) { +CScopedReadLock::CScopedReadLock(CReadWriteLock& readWriteLock) + : m_ReadWriteLock(readWriteLock) { m_ReadWriteLock.readLock(); } diff --git a/lib/core/CScopedWriteLock.cc b/lib/core/CScopedWriteLock.cc index 94ae857aef..50af2719ce 100644 --- a/lib/core/CScopedWriteLock.cc +++ b/lib/core/CScopedWriteLock.cc @@ -10,7 +10,8 @@ namespace ml { namespace core { -CScopedWriteLock::CScopedWriteLock(CReadWriteLock& readWriteLock) : m_ReadWriteLock(readWriteLock) { +CScopedWriteLock::CScopedWriteLock(CReadWriteLock& readWriteLock) + : m_ReadWriteLock(readWriteLock) { m_ReadWriteLock.writeLock(); } diff --git a/lib/core/CShellArgQuoter_Windows.cc b/lib/core/CShellArgQuoter_Windows.cc index 62fa79caf3..c141a2d27a 100644 --- a/lib/core/CShellArgQuoter_Windows.cc +++ b/lib/core/CShellArgQuoter_Windows.cc @@ -18,7 +18,8 @@ std::string CShellArgQuoter::quote(const std::string& arg) { } // Simple command line options should not be quoted - if (arg.length() == 2 && (arg[0] == '/' || arg[0] == '-') && ::isalnum(static_cast(arg[1]))) { + if (arg.length() == 2 && (arg[0] == '/' || arg[0] == '-') && + ::isalnum(static_cast(arg[1]))) { return arg; } diff --git a/lib/core/CStateCompressor.cc b/lib/core/CStateCompressor.cc index 44c7ac8348..c1f1cd6e00 100644 --- a/lib/core/CStateCompressor.cc +++ b/lib/core/CStateCompressor.cc @@ -18,11 +18,13 @@ const std::string CStateCompressor::COMPRESSED_ATTRIBUTE("compressed"); const std::string CStateCompressor::END_OF_STREAM_ATTRIBUTE("eos"); CStateCompressor::CStateCompressor(CDataAdder& compressedAdder) - : m_FilterSink(compressedAdder), m_OutStream(boost::make_shared(boost::ref(m_FilterSink))) { + : m_FilterSink(compressedAdder), + m_OutStream(boost::make_shared(boost::ref(m_FilterSink))) { LOG_TRACE(<< "New compressor"); } -CDataAdder::TOStreamP CStateCompressor::addStreamed(const std::string& index, const std::string& baseId) { +CDataAdder::TOStreamP CStateCompressor::addStreamed(const std::string& index, + const std::string& baseId) { LOG_TRACE(<< "StateCompressor asking for index " << index); m_FilterSink.index(index, baseId); @@ -40,7 +42,8 @@ size_t CStateCompressor::numCompressedDocs() const { } CStateCompressor::CChunkFilter::CChunkFilter(CDataAdder& adder) - : m_Adder(adder), m_CurrentDocNum(1), m_BytesDone(0), m_MaxDocSize(adder.maxDocumentSize()), m_WritesSuccessful(true) { + : m_Adder(adder), m_CurrentDocNum(1), m_BytesDone(0), + m_MaxDocSize(adder.maxDocumentSize()), m_WritesSuccessful(true) { } std::streamsize CStateCompressor::CChunkFilter::write(const char* s, std::streamsize n) { @@ -111,12 +114,15 @@ void CStateCompressor::CChunkFilter::closeStream(bool isFinal) { } } -void CStateCompressor::CChunkFilter::index(const std::string& index, const std::string& baseId) { +void CStateCompressor::CChunkFilter::index(const std::string& index, + const std::string& baseId) { m_Index = index; m_BaseId = baseId; } -void CStateCompressor::CChunkFilter::writeInternal(const char* s, std::streamsize& written, std::streamsize& n) { +void CStateCompressor::CChunkFilter::writeInternal(const char* s, + std::streamsize& written, + std::streamsize& n) { std::size_t bytesToWrite = std::min(std::size_t(n), m_MaxDocSize - m_BytesDone); LOG_TRACE(<< "Writing string: " << std::string(&s[written], bytesToWrite)); m_OStream->write("\"", 1); diff --git a/lib/core/CStateDecompressor.cc b/lib/core/CStateDecompressor.cc index 6dde2c0468..626819cfff 100644 --- a/lib/core/CStateDecompressor.cc +++ b/lib/core/CStateDecompressor.cc @@ -37,18 +37,14 @@ void CStateDecompressor::setStateRestoreSearch(const std::string& index) { m_Searcher.setStateRestoreSearch(index); } -void CStateDecompressor::setStateRestoreSearch(const std::string& index, const std::string& id) { +void CStateDecompressor::setStateRestoreSearch(const std::string& index, + const std::string& id) { m_Searcher.setStateRestoreSearch(index, id); } CStateDecompressor::CDechunkFilter::CDechunkFilter(CDataSearcher& searcher) - : m_Initialised(false), - m_SentData(false), - m_Searcher(searcher), - m_CurrentDocNum(1), - m_EndOfStream(false), - m_BufferOffset(0), - m_NestedLevel(1) { + : m_Initialised(false), m_SentData(false), m_Searcher(searcher), + m_CurrentDocNum(1), m_EndOfStream(false), m_BufferOffset(0), m_NestedLevel(1) { } std::streamsize CStateDecompressor::CDechunkFilter::read(char* s, std::streamsize n) { @@ -107,7 +103,8 @@ std::streamsize CStateDecompressor::CDechunkFilter::read(char* s, std::streamsiz bool CStateDecompressor::CDechunkFilter::parseNext() { if (m_Reader->HasParseError()) { const char* error(rapidjson::GetParseError_En(m_Reader->GetParseErrorCode())); - LOG_ERROR(<< "Error parsing JSON at offset " << m_Reader->GetErrorOffset() << ": " << ((error != nullptr) ? error : "No message")); + LOG_ERROR(<< "Error parsing JSON at offset " << m_Reader->GetErrorOffset() + << ": " << ((error != nullptr) ? error : "No message")); return false; } @@ -130,7 +127,8 @@ bool CStateDecompressor::CDechunkFilter::readHeader() { while (this->parseNext()) { if (m_Handler.s_Type == SRapidJsonHandler::E_TokenKey && CStateCompressor::COMPRESSED_ATTRIBUTE.compare( - 0, CStateCompressor::COMPRESSED_ATTRIBUTE.length(), m_Handler.s_CompressedChunk, m_Handler.s_CompressedChunkLength) == 0) { + 0, CStateCompressor::COMPRESSED_ATTRIBUTE.length(), + m_Handler.s_CompressedChunk, m_Handler.s_CompressedChunkLength) == 0) { if (this->parseNext() && m_Handler.s_Type == SRapidJsonHandler::E_TokenArrayStart) { m_Initialised = true; m_BufferOffset = 0; @@ -149,7 +147,9 @@ bool CStateDecompressor::CDechunkFilter::readHeader() { return false; } -void CStateDecompressor::CDechunkFilter::handleRead(char* s, std::streamsize n, std::streamsize& bytesDone) { +void CStateDecompressor::CDechunkFilter::handleRead(char* s, + std::streamsize n, + std::streamsize& bytesDone) { // Extract data from the JSON array "compressed" if (!m_Initialised) { return; @@ -157,7 +157,8 @@ void CStateDecompressor::CDechunkFilter::handleRead(char* s, std::streamsize n, // Copy any outstanding data if (m_BufferOffset > 0) { - std::streamsize toCopy = std::min((n - bytesDone), (m_Handler.s_CompressedChunkLength - m_BufferOffset)); + std::streamsize toCopy = std::min( + (n - bytesDone), (m_Handler.s_CompressedChunkLength - m_BufferOffset)); std::memcpy(s + bytesDone, m_Handler.s_CompressedChunk + m_BufferOffset, toCopy); bytesDone += toCopy; m_BufferOffset += toCopy; @@ -169,22 +170,25 @@ void CStateDecompressor::CDechunkFilter::handleRead(char* s, std::streamsize n, if (m_Handler.s_Type == SRapidJsonHandler::E_TokenArrayEnd) { LOG_TRACE(<< "Come to end of array"); if (this->parseNext() && m_Handler.s_Type == SRapidJsonHandler::E_TokenKey && - CStateCompressor::END_OF_STREAM_ATTRIBUTE.compare(0, - CStateCompressor::END_OF_STREAM_ATTRIBUTE.length(), - m_Handler.s_CompressedChunk, - m_Handler.s_CompressedChunkLength) == 0) { - LOG_DEBUG(<< "Explicit end-of-stream marker found in document with index " << m_CurrentDocNum); + CStateCompressor::END_OF_STREAM_ATTRIBUTE.compare( + 0, CStateCompressor::END_OF_STREAM_ATTRIBUTE.length(), + m_Handler.s_CompressedChunk, m_Handler.s_CompressedChunkLength) == 0) { + LOG_DEBUG(<< "Explicit end-of-stream marker found in document with index " + << m_CurrentDocNum); // Read the value of the CStateCompressor::END_OF_STREAM_ATTRIBUTE field and the closing brace if (this->parseNext() && m_Handler.s_Type != SRapidJsonHandler::E_TokenBool) { - LOG_ERROR(<< "Expecting bool value to follow " << CStateCompressor::END_OF_STREAM_ATTRIBUTE << ", got " - << m_Handler.s_Type); + LOG_ERROR(<< "Expecting bool value to follow " + << CStateCompressor::END_OF_STREAM_ATTRIBUTE + << ", got " << m_Handler.s_Type); } while (m_NestedLevel > 0) { - if (this->parseNext() && m_Handler.s_Type != SRapidJsonHandler::E_TokenObjectEnd) { - LOG_ERROR(<< "Expecting end object to follow " << CStateCompressor::END_OF_STREAM_ATTRIBUTE << ", got " - << m_Handler.s_Type); + if (this->parseNext() && + m_Handler.s_Type != SRapidJsonHandler::E_TokenObjectEnd) { + LOG_ERROR(<< "Expecting end object to follow " + << CStateCompressor::END_OF_STREAM_ATTRIBUTE + << ", got " << m_Handler.s_Type); } --m_NestedLevel; @@ -200,7 +204,8 @@ void CStateDecompressor::CDechunkFilter::handleRead(char* s, std::streamsize n, } m_SentData = true; if (m_Handler.s_CompressedChunkLength <= (n - bytesDone)) { - std::memcpy(s + bytesDone, m_Handler.s_CompressedChunk, m_Handler.s_CompressedChunkLength); + std::memcpy(s + bytesDone, m_Handler.s_CompressedChunk, + m_Handler.s_CompressedChunkLength); bytesDone += m_Handler.s_CompressedChunkLength; } else { std::streamsize toCopy = n - bytesDone; @@ -212,7 +217,9 @@ void CStateDecompressor::CDechunkFilter::handleRead(char* s, std::streamsize n, } } -std::streamsize CStateDecompressor::CDechunkFilter::endOfStream(char* s, std::streamsize n, std::streamsize bytesDone) { +std::streamsize CStateDecompressor::CDechunkFilter::endOfStream(char* s, + std::streamsize n, + std::streamsize bytesDone) { // return [ ] if not m_Initialised m_EndOfStream = true; if (!m_SentData && bytesDone == 0) { @@ -234,14 +241,18 @@ bool CStateDecompressor::CDechunkFilter::SRapidJsonHandler::Bool(bool) { return true; } -bool CStateDecompressor::CDechunkFilter::SRapidJsonHandler::String(const char* str, rapidjson::SizeType length, bool) { +bool CStateDecompressor::CDechunkFilter::SRapidJsonHandler::String(const char* str, + rapidjson::SizeType length, + bool) { s_Type = E_TokenString; s_CompressedChunk = str; s_CompressedChunkLength = length; return true; } -bool CStateDecompressor::CDechunkFilter::SRapidJsonHandler::Key(const char* str, rapidjson::SizeType length, bool) { +bool CStateDecompressor::CDechunkFilter::SRapidJsonHandler::Key(const char* str, + rapidjson::SizeType length, + bool) { s_Type = E_TokenKey; s_CompressedChunk = str; s_CompressedChunkLength = length; diff --git a/lib/core/CStateMachine.cc b/lib/core/CStateMachine.cc index fb3dec3266..159e29c8e2 100644 --- a/lib/core/CStateMachine.cc +++ b/lib/core/CStateMachine.cc @@ -42,8 +42,10 @@ void CStateMachine::expectedNumberMachines(std::size_t number) { ms_Machines.capacity(number); } -CStateMachine -CStateMachine::create(const TStrVec& alphabet, const TStrVec& states, const TSizeVecVec& transitionFunction, std::size_t state) { +CStateMachine CStateMachine::create(const TStrVec& alphabet, + const TStrVec& states, + const TSizeVecVec& transitionFunction, + std::size_t state) { // Validate that the alphabet, states, transition function, // and initial state are consistent. @@ -59,7 +61,8 @@ CStateMachine::create(const TStrVec& alphabet, const TStrVec& states, const TSiz } for (const auto& function : transitionFunction) { if (states.size() != function.size()) { - LOG_ERROR(<< "Bad transition function row: " << core::CContainerPrinter::print(function)); + LOG_ERROR(<< "Bad transition function row: " + << core::CContainerPrinter::print(function)); return result; } } @@ -111,7 +114,8 @@ bool CStateMachine::apply(std::size_t symbol) { return false; } if (m_State >= table[symbol].size()) { - LOG_ERROR(<< "Bad state " << m_State << " not in states [" << table[symbol].size() << "]"); + LOG_ERROR(<< "Bad state " << m_State << " not in states [" + << table[symbol].size() << "]"); return false; } @@ -138,7 +142,8 @@ std::string CStateMachine::printSymbol(std::size_t symbol) const { } uint64_t CStateMachine::checksum() const { - return CHashing::hashCombine(static_cast(m_Machine), static_cast(m_State)); + return CHashing::hashCombine(static_cast(m_Machine), + static_cast(m_State)); } std::size_t CStateMachine::numberMachines() { @@ -163,24 +168,31 @@ std::size_t CStateMachine::find(std::size_t begin, std::size_t end, const SLooku CStateMachine::CStateMachine() : m_Machine(BAD_MACHINE), m_State(0) { } -CStateMachine::SMachine::SMachine(const TStrVec& alphabet, const TStrVec& states, const TSizeVecVec& transitionFunction) +CStateMachine::SMachine::SMachine(const TStrVec& alphabet, + const TStrVec& states, + const TSizeVecVec& transitionFunction) : s_Alphabet(alphabet), s_States(states), s_TransitionFunction(transitionFunction) { } CStateMachine::SMachine::SMachine(const SMachine& other) - : s_Alphabet(other.s_Alphabet), s_States(other.s_States), s_TransitionFunction(other.s_TransitionFunction) { + : s_Alphabet(other.s_Alphabet), s_States(other.s_States), + s_TransitionFunction(other.s_TransitionFunction) { } -CStateMachine::SLookupMachine::SLookupMachine(const TStrVec& alphabet, const TStrVec& states, const TSizeVecVec& transitionFunction) +CStateMachine::SLookupMachine::SLookupMachine(const TStrVec& alphabet, + const TStrVec& states, + const TSizeVecVec& transitionFunction) : s_Alphabet(alphabet), s_States(states), s_TransitionFunction(transitionFunction) { } bool CStateMachine::SLookupMachine::operator==(const SMachine& rhs) const { - return boost::unwrap_ref(s_TransitionFunction) == rhs.s_TransitionFunction && boost::unwrap_ref(s_Alphabet) == rhs.s_Alphabet && + return boost::unwrap_ref(s_TransitionFunction) == rhs.s_TransitionFunction && + boost::unwrap_ref(s_Alphabet) == rhs.s_Alphabet && boost::unwrap_ref(s_States) == rhs.s_States; } -CStateMachine::CMachineDeque::CMachineDeque() : m_Capacity(DEFAULT_CAPACITY), m_NumberMachines(0) { +CStateMachine::CMachineDeque::CMachineDeque() + : m_Capacity(DEFAULT_CAPACITY), m_NumberMachines(0) { m_Machines.push_back(TMachineVec()); m_Machines.back().reserve(m_Capacity); } diff --git a/lib/core/CStatePersistInserter.cc b/lib/core/CStatePersistInserter.cc index 60e3d14a24..5f9be6fcaf 100644 --- a/lib/core/CStatePersistInserter.cc +++ b/lib/core/CStatePersistInserter.cc @@ -11,11 +11,15 @@ namespace core { CStatePersistInserter::~CStatePersistInserter() { } -void CStatePersistInserter::insertValue(const std::string& name, double value, CIEEE754::EPrecision precision) { +void CStatePersistInserter::insertValue(const std::string& name, + double value, + CIEEE754::EPrecision precision) { this->insertValue(name, CStringUtils::typeToStringPrecise(value, precision)); } -CStatePersistInserter::CAutoLevel::CAutoLevel(const std::string& name, CStatePersistInserter& inserter) : m_Inserter(inserter) { +CStatePersistInserter::CAutoLevel::CAutoLevel(const std::string& name, + CStatePersistInserter& inserter) + : m_Inserter(inserter) { m_Inserter.newLevel(name); } diff --git a/lib/core/CStatistics.cc b/lib/core/CStatistics.cc index 6af3c29cb7..d8cc7dc1c0 100644 --- a/lib/core/CStatistics.cc +++ b/lib/core/CStatistics.cc @@ -34,7 +34,10 @@ static const std::string KEY_TAG("a"); static const std::string VALUE_TAG("b"); //! Helper function to add a string/int pair to JSON writer -void addStringInt(TGenericLineWriter& writer, const std::string& name, const std::string& description, uint64_t stat) { +void addStringInt(TGenericLineWriter& writer, + const std::string& name, + const std::string& description, + uint64_t stat) { writer.StartObject(); writer.String(NAME_TYPE); @@ -117,88 +120,73 @@ std::ostream& operator<<(std::ostream& o, const CStatistics& /*stats*/) { writer.StartArray(); - addStringInt(writer, - "E_NumberNewPeopleNotAllowed", - "Number of new people not allowed", + addStringInt(writer, "E_NumberNewPeopleNotAllowed", "Number of new people not allowed", CStatistics::stat(stat_t::E_NumberNewPeopleNotAllowed).value()); - addStringInt(writer, "E_NumberNewPeople", "Number of new people created", CStatistics::stat(stat_t::E_NumberNewPeople).value()); + addStringInt(writer, "E_NumberNewPeople", "Number of new people created", + CStatistics::stat(stat_t::E_NumberNewPeople).value()); - addStringInt(writer, - "E_NumberNewPeopleRecycled", + addStringInt(writer, "E_NumberNewPeopleRecycled", "Number of new people recycled into existing space", CStatistics::stat(stat_t::E_NumberNewPeopleRecycled).value()); - addStringInt(writer, - "E_NumberApiRecordsHandled", + addStringInt(writer, "E_NumberApiRecordsHandled", "Number of records successfully ingested into the engine API", CStatistics::stat(stat_t::E_NumberApiRecordsHandled).value()); - addStringInt(writer, - "E_MemoryUsage", + addStringInt(writer, "E_MemoryUsage", "The estimated memory currently used by the engine and models", CStatistics::stat(stat_t::E_MemoryUsage).value()); - addStringInt(writer, - "E_NumberMemoryUsageChecks", + addStringInt(writer, "E_NumberMemoryUsageChecks", "Number of times a model memory usage check has been carried out", CStatistics::stat(stat_t::E_NumberMemoryUsageChecks).value()); - addStringInt(writer, - "E_NumberMemoryUsageEstimates", + addStringInt(writer, "E_NumberMemoryUsageEstimates", "Number of times a partial memory usage estimate has been carried out", CStatistics::stat(stat_t::E_NumberMemoryUsageEstimates).value()); - addStringInt(writer, - "E_NumberRecordsNoTimeField", + addStringInt(writer, "E_NumberRecordsNoTimeField", "Number of records that didn't contain a Time field", CStatistics::stat(stat_t::E_NumberRecordsNoTimeField).value()); - addStringInt(writer, - "E_NumberTimeFieldConversionErrors", + addStringInt(writer, "E_NumberTimeFieldConversionErrors", "Number of records where the format of the time field could not be converted", CStatistics::stat(stat_t::E_NumberTimeFieldConversionErrors).value()); - addStringInt(writer, - "E_NumberTimeOrderErrors", - "Number of records not in ascending time order", + addStringInt(writer, "E_NumberTimeOrderErrors", "Number of records not in ascending time order", CStatistics::stat(stat_t::E_NumberTimeOrderErrors).value()); - addStringInt(writer, - "E_NumberNewAttributesNotAllowed", - "Number of new attributes not allowed", + addStringInt(writer, "E_NumberNewAttributesNotAllowed", "Number of new attributes not allowed", CStatistics::stat(stat_t::E_NumberNewAttributesNotAllowed).value()); - addStringInt( - writer, "E_NumberNewAttributes", "Number of new attributes created", CStatistics::stat(stat_t::E_NumberNewAttributes).value()); + addStringInt(writer, "E_NumberNewAttributes", "Number of new attributes created", + CStatistics::stat(stat_t::E_NumberNewAttributes).value()); - addStringInt(writer, - "E_NumberNewAttributesRecycled", + addStringInt(writer, "E_NumberNewAttributesRecycled", "Number of new attributes recycled into existing space", CStatistics::stat(stat_t::E_NumberNewAttributesRecycled).value()); - addStringInt(writer, "E_NumberByFields", "Number of 'by' fields within the model", CStatistics::stat(stat_t::E_NumberByFields).value()); + addStringInt(writer, "E_NumberByFields", "Number of 'by' fields within the model", + CStatistics::stat(stat_t::E_NumberByFields).value()); - addStringInt( - writer, "E_NumberOverFields", "Number of 'over' fields within the model", CStatistics::stat(stat_t::E_NumberOverFields).value()); + addStringInt(writer, "E_NumberOverFields", "Number of 'over' fields within the model", + CStatistics::stat(stat_t::E_NumberOverFields).value()); - addStringInt(writer, - "E_NumberExcludedFrequentInvocations", + addStringInt(writer, "E_NumberExcludedFrequentInvocations", "The number of times 'ExcludeFrequent' has been invoked by the model", CStatistics::stat(stat_t::E_NumberExcludedFrequentInvocations).value()); - addStringInt(writer, - "E_NumberSamplesOutsideLatencyWindow", + addStringInt(writer, "E_NumberSamplesOutsideLatencyWindow", "The number of samples received outside the latency window", CStatistics::stat(stat_t::E_NumberSamplesOutsideLatencyWindow).value()); - addStringInt(writer, - "E_NumberMemoryLimitModelCreationFailures", - "The number of model creation failures from being over memory limit", - CStatistics::stat(stat_t::E_NumberMemoryLimitModelCreationFailures).value()); + addStringInt( + writer, "E_NumberMemoryLimitModelCreationFailures", + "The number of model creation failures from being over memory limit", + CStatistics::stat(stat_t::E_NumberMemoryLimitModelCreationFailures).value()); - addStringInt(writer, - "E_NumberPrunedItems", + addStringInt(writer, "E_NumberPrunedItems", "The number of old people or attributes pruned from the models", CStatistics::stat(stat_t::E_NumberPrunedItems).value()); diff --git a/lib/core/CStopWatch.cc b/lib/core/CStopWatch.cc index 9890108ecc..139e5675f0 100644 --- a/lib/core/CStopWatch.cc +++ b/lib/core/CStopWatch.cc @@ -12,7 +12,8 @@ namespace ml { namespace core { -CStopWatch::CStopWatch(bool startRunning) : m_IsRunning(false), m_Start(0), m_AccumulatedTime(0) { +CStopWatch::CStopWatch(bool startRunning) + : m_IsRunning(false), m_Start(0), m_AccumulatedTime(0) { if (startRunning) { this->start(); } diff --git a/lib/core/CStoredStringPtr.cc b/lib/core/CStoredStringPtr.cc index 0d150af511..6e0e8462c1 100644 --- a/lib/core/CStoredStringPtr.cc +++ b/lib/core/CStoredStringPtr.cc @@ -18,10 +18,12 @@ namespace core { CStoredStringPtr::CStoredStringPtr() noexcept : m_String{} { } -CStoredStringPtr::CStoredStringPtr(const std::string& str) : m_String{boost::make_shared(str)} { +CStoredStringPtr::CStoredStringPtr(const std::string& str) + : m_String{boost::make_shared(str)} { } -CStoredStringPtr::CStoredStringPtr(std::string&& str) : m_String{boost::make_shared(std::move(str))} { +CStoredStringPtr::CStoredStringPtr(std::string&& str) + : m_String{boost::make_shared(std::move(str))} { } void CStoredStringPtr::swap(CStoredStringPtr& other) noexcept { diff --git a/lib/core/CStrFTime_Windows.cc b/lib/core/CStrFTime_Windows.cc index 41f0116865..bab6f304b1 100644 --- a/lib/core/CStrFTime_Windows.cc +++ b/lib/core/CStrFTime_Windows.cc @@ -37,7 +37,8 @@ size_t CStrFTime::strFTime(char* buf, size_t maxSize, const char* format, struct core_t::TTime diffHours(diffMinutes / 60); std::ostringstream strm; - strm << ((diffSeconds < 0) ? '-' : '+') << std::setfill('0') << std::setw(2) << ::_abs64(diffHours) << std::setfill('0') + strm << ((diffSeconds < 0) ? '-' : '+') << std::setfill('0') + << std::setw(2) << ::_abs64(diffHours) << std::setfill('0') << std::setw(2) << (::_abs64(diffMinutes) % 60); adjFormat.replace(zPos, 2, strm.str()); diff --git a/lib/core/CStrPTime_Linux.cc b/lib/core/CStrPTime_Linux.cc index 30283adba3..61e4ce8429 100644 --- a/lib/core/CStrPTime_Linux.cc +++ b/lib/core/CStrPTime_Linux.cc @@ -72,7 +72,8 @@ char* CStrPTime::strPTime(const char* buf, const char* format, struct tm* tm) { if (zPos != std::string::npos) { // If there's anything except whitespace after the // %z it's too complicated - if (adjFormat.find_first_not_of(CStringUtils::WHITESPACE_CHARS, zPos + 2) != std::string::npos) { + if (adjFormat.find_first_not_of(CStringUtils::WHITESPACE_CHARS, zPos + 2) != + std::string::npos) { return 0; } diff --git a/lib/core/CStrPTime_Windows.cc b/lib/core/CStrPTime_Windows.cc index bb0ce167d4..7ed53093f6 100644 --- a/lib/core/CStrPTime_Windows.cc +++ b/lib/core/CStrPTime_Windows.cc @@ -80,7 +80,8 @@ char* CStrPTime::strPTime(const char* buf, const char* format, struct tm* tm) { if (zPos != std::string::npos) { // If there's anything except whitespace after the // %z it's too complicated - if (adjFormat.find_first_not_of(CStringUtils::WHITESPACE_CHARS, zPos + 2) != std::string::npos) { + if (adjFormat.find_first_not_of(CStringUtils::WHITESPACE_CHARS, zPos + 2) != + std::string::npos) { return 0; } diff --git a/lib/core/CStringCache.cc b/lib/core/CStringCache.cc index da7a966534..23f039832a 100644 --- a/lib/core/CStringCache.cc +++ b/lib/core/CStringCache.cc @@ -71,7 +71,8 @@ size_t CStringCache::CStrHash::operator()(const std::string& str) const { } // Caller is responsible for ensuring that str is not NULL and end > str -inline CStringCache::CCharPHash::CCharPHash(const char* str, const char* end) : m_Hash(0) { +inline CStringCache::CCharPHash::CCharPHash(const char* str, const char* end) + : m_Hash(0) { // It is essential that the result of this hash matches that of the method // above size_t hash(*str); @@ -86,11 +87,13 @@ inline size_t CStringCache::CCharPHash::operator()(const char*) const { return m_Hash; } -inline CStringCache::CCharPStrEqual::CCharPStrEqual(size_t length) : m_Length(length) { +inline CStringCache::CCharPStrEqual::CCharPStrEqual(size_t length) + : m_Length(length) { } // Caller is responsible for ensuring that lhs is not NULL -inline bool CStringCache::CCharPStrEqual::operator()(const char* lhs, const std::string& rhs) const { +inline bool CStringCache::CCharPStrEqual::operator()(const char* lhs, + const std::string& rhs) const { return m_Length == rhs.length() && ::memcmp(lhs, rhs.data(), m_Length) == 0; } } diff --git a/lib/core/CStringSimilarityTester.cc b/lib/core/CStringSimilarityTester.cc index c93ccaa5dd..c2599116cf 100644 --- a/lib/core/CStringSimilarityTester.cc +++ b/lib/core/CStringSimilarityTester.cc @@ -15,12 +15,16 @@ const int CStringSimilarityTester::MINUS_INFINITE_INT(std::numeric_limits:: CStringSimilarityTester::CStringSimilarityTester() : m_Compressor(true) { } -bool CStringSimilarityTester::similarity(const std::string& first, const std::string& second, double& result) const { +bool CStringSimilarityTester::similarity(const std::string& first, + const std::string& second, + double& result) const { size_t firstCompLength(0); size_t secondCompLength(0); - if (m_Compressor.addString(first) == false || m_Compressor.compressedLength(true, firstCompLength) == false || - m_Compressor.addString(second) == false || m_Compressor.compressedLength(true, secondCompLength) == false) { + if (m_Compressor.addString(first) == false || + m_Compressor.compressedLength(true, firstCompLength) == false || + m_Compressor.addString(second) == false || + m_Compressor.compressedLength(true, secondCompLength) == false) { // The compressor will have logged the detailed reason LOG_ERROR(<< "Compression problem"); return false; @@ -45,8 +49,9 @@ bool CStringSimilarityTester::similarity(const std::string& first, size_t secondPlusFirstCompLength(0); if (m_Compressor.addString(first) == false || m_Compressor.addString(second) == false || - m_Compressor.compressedLength(true, firstPlusSecondCompLength) == false || m_Compressor.addString(second) == false || - m_Compressor.addString(first) == false || m_Compressor.compressedLength(true, secondPlusFirstCompLength) == false) { + m_Compressor.compressedLength(true, firstPlusSecondCompLength) == false || + m_Compressor.addString(second) == false || m_Compressor.addString(first) == false || + m_Compressor.compressedLength(true, secondPlusFirstCompLength) == false) { // The compressor will have logged the detailed reason LOG_ERROR(<< "Compression problem"); return false; @@ -69,7 +74,9 @@ bool CStringSimilarityTester::compressedLengthOf(const std::string& str, size_t& return m_Compressor.addString(str) && m_Compressor.compressedLength(true, length); } -int** CStringSimilarityTester::setupBerghelRoachMatrix(int maxDist, TScopedIntArray& dataArray, TScopedIntPArray& matrixArray) { +int** CStringSimilarityTester::setupBerghelRoachMatrix(int maxDist, + TScopedIntArray& dataArray, + TScopedIntPArray& matrixArray) { // Ensure that we don't suffer memory corruption due to an incorrect input if (maxDist <= 0) { LOG_ERROR(<< "Programmatic error - maxDist too small " << maxDist); diff --git a/lib/core/CStringUtils.cc b/lib/core/CStringUtils.cc index dec2a9bcd0..b0401e2382 100644 --- a/lib/core/CStringUtils.cc +++ b/lib/core/CStringUtils.cc @@ -170,7 +170,9 @@ size_t CStringUtils::replace(const std::string& from, const std::string& to, std return count; } -size_t CStringUtils::replaceFirst(const std::string& from, const std::string& to, std::string& str) { +size_t CStringUtils::replaceFirst(const std::string& from, + const std::string& to, + std::string& str) { if (from == to) { return 0; } @@ -360,17 +362,25 @@ std::string CStringUtils::typeToStringPrecise(double d, CIEEE754::EPrecision pre int ret = 0; switch (precision) { case CIEEE754::E_HalfPrecision: - ret = std::fabs(d) < 1.0 && d != 0.0 ? ::sprintf(buf, "%.2e", clampToReadable(CIEEE754::round(d, CIEEE754::E_HalfPrecision))) - : ::sprintf(buf, "%.3g", clampToReadable(CIEEE754::round(d, CIEEE754::E_HalfPrecision))); + ret = std::fabs(d) < 1.0 && d != 0.0 + ? ::sprintf(buf, "%.2e", + clampToReadable(CIEEE754::round(d, CIEEE754::E_HalfPrecision))) + : ::sprintf(buf, "%.3g", + clampToReadable(CIEEE754::round(d, CIEEE754::E_HalfPrecision))); break; case CIEEE754::E_SinglePrecision: - ret = std::fabs(d) < 1.0 && d != 0.0 ? ::sprintf(buf, "%.6e", clampToReadable(CIEEE754::round(d, CIEEE754::E_SinglePrecision))) - : ::sprintf(buf, "%.7g", clampToReadable(CIEEE754::round(d, CIEEE754::E_SinglePrecision))); + ret = std::fabs(d) < 1.0 && d != 0.0 + ? ::sprintf(buf, "%.6e", + clampToReadable(CIEEE754::round(d, CIEEE754::E_SinglePrecision))) + : ::sprintf(buf, "%.7g", + clampToReadable(CIEEE754::round(d, CIEEE754::E_SinglePrecision))); break; case CIEEE754::E_DoublePrecision: - ret = std::fabs(d) < 1.0 && d != 0.0 ? ::sprintf(buf, "%.14e", clampToReadable(d)) : ::sprintf(buf, "%.15g", clampToReadable(d)); + ret = std::fabs(d) < 1.0 && d != 0.0 + ? ::sprintf(buf, "%.14e", clampToReadable(d)) + : ::sprintf(buf, "%.15g", clampToReadable(d)); break; } @@ -695,7 +705,8 @@ bool CStringUtils::_stringToType(bool silent, const std::string& str, short& i) } // Now check if the result is in range for short - if (ret < std::numeric_limits::min() || ret > std::numeric_limits::max()) { + if (ret < std::numeric_limits::min() || + ret > std::numeric_limits::max()) { if (!silent) { LOG_ERROR(<< "Unable to convert string '" << str << "'" @@ -829,7 +840,8 @@ bool CStringUtils::_stringToType(bool silent, const std::string& str, double& d) bool CStringUtils::_stringToType(bool silent, const std::string& str, char& c) { if (str.length() != 1) { if (!silent) { - LOG_ERROR(<< "Unable to convert string '" << str << "' to char: " << (str.empty() ? "too short" : "too long")); + LOG_ERROR(<< "Unable to convert string '" << str + << "' to char: " << (str.empty() ? "too short" : "too long")); } return false; } @@ -846,7 +858,10 @@ bool CStringUtils::_stringToType(bool /* silent */, const std::string& str, std: return true; } -void CStringUtils::tokenise(const std::string& delim, const std::string& str, TStrVec& tokens, std::string& remainder) { +void CStringUtils::tokenise(const std::string& delim, + const std::string& str, + TStrVec& tokens, + std::string& remainder) { std::string::size_type pos(0); for (;;) { @@ -861,7 +876,8 @@ void CStringUtils::tokenise(const std::string& delim, const std::string& str, TS } } -std::string CStringUtils::longestCommonSubstr(const std::string& str1, const std::string& str2) { +std::string CStringUtils::longestCommonSubstr(const std::string& str1, + const std::string& str2) { std::string common; if (str1.empty() || str2.empty()) { return common; @@ -908,7 +924,8 @@ std::string CStringUtils::longestCommonSubstr(const std::string& str1, const std return common; } -std::string CStringUtils::longestCommonSubsequence(const std::string& str1, const std::string& str2) { +std::string CStringUtils::longestCommonSubsequence(const std::string& str1, + const std::string& str2) { std::string common; if (str1.empty() || str2.empty()) { return common; @@ -988,7 +1005,8 @@ std::string CStringUtils::wideToNarrow(const std::wstring& wideStr) { // cope with UTF8 either, so we should replace it with a proper // string conversion library, e.g. ICU using TWCharTCType = std::ctype; - std::use_facet(CStringUtils::locale()).narrow(wideStr.data(), wideStr.data() + wideStr.length(), '?', &narrowStr[0]); + std::use_facet(CStringUtils::locale()) + .narrow(wideStr.data(), wideStr.data() + wideStr.length(), '?', &narrowStr[0]); return narrowStr; } @@ -1001,7 +1019,8 @@ std::wstring CStringUtils::narrowToWide(const std::string& narrowStr) { // cope with UTF8 either, so we should replace it with a proper // string conversion library, e.g. ICU using TWCharTCType = std::ctype; - std::use_facet(CStringUtils::locale()).widen(narrowStr.data(), narrowStr.data() + narrowStr.length(), &wideStr[0]); + std::use_facet(CStringUtils::locale()) + .widen(narrowStr.data(), narrowStr.data() + narrowStr.length(), &wideStr[0]); return wideStr; } diff --git a/lib/core/CThread.cc b/lib/core/CThread.cc index ea1cde1b09..ff47aa9aa2 100644 --- a/lib/core/CThread.cc +++ b/lib/core/CThread.cc @@ -179,7 +179,8 @@ bool CThread::cancelBlockedIo(TThreadId threadId) { if (ret != 0) { // Don't report an error if the thread has already exited if (ret != ESRCH) { - LOG_ERROR(<< "Error cancelling blocked IO in thread " << threadId << ": " << ::strerror(ret)); + LOG_ERROR(<< "Error cancelling blocked IO in thread " << threadId + << ": " << ::strerror(ret)); return false; } } diff --git a/lib/core/CThread_Windows.cc b/lib/core/CThread_Windows.cc index de09dde9c1..32488851b7 100644 --- a/lib/core/CThread_Windows.cc +++ b/lib/core/CThread_Windows.cc @@ -171,7 +171,8 @@ bool CThread::cancelBlockedIo(TThreadId threadId) { // Note inconsistency in Win32 thread function return codes here - the error // return is NULL rather than INVALID_HANDLE_VALUE! if (threadHandle == 0) { - LOG_ERROR(<< "Error cancelling blocked IO in thread " << threadId << ": " << CWindowsError()); + LOG_ERROR(<< "Error cancelling blocked IO in thread " << threadId + << ": " << CWindowsError()); return false; } @@ -180,7 +181,8 @@ bool CThread::cancelBlockedIo(TThreadId threadId) { // Don't report an error if there is no blocking call to cancel if (errCode != ERROR_NOT_FOUND) { - LOG_ERROR(<< "Error cancelling blocked IO in thread " << threadId << ": " << CWindowsError(errCode)); + LOG_ERROR(<< "Error cancelling blocked IO in thread " << threadId + << ": " << CWindowsError(errCode)); CloseHandle(threadHandle); return false; } diff --git a/lib/core/CTimeUtils.cc b/lib/core/CTimeUtils.cc index 695dbd322c..6ed6adce13 100644 --- a/lib/core/CTimeUtils.cc +++ b/lib/core/CTimeUtils.cc @@ -47,7 +47,9 @@ int64_t CTimeUtils::toEpochMs(core_t::TTime t) { return static_cast(t) * 1000; } -bool CTimeUtils::strptime(const std::string& format, const std::string& dateTime, core_t::TTime& preTime) { +bool CTimeUtils::strptime(const std::string& format, + const std::string& dateTime, + core_t::TTime& preTime) { if (CTimeUtils::strptimeSilent(format, dateTime, preTime) == false) { LOG_ERROR(<< "Unable to convert " << dateTime << " to " << format); return false; @@ -56,7 +58,9 @@ bool CTimeUtils::strptime(const std::string& format, const std::string& dateTime return true; } -bool CTimeUtils::strptimeSilent(const std::string& format, const std::string& dateTime, core_t::TTime& preTime) { +bool CTimeUtils::strptimeSilent(const std::string& format, + const std::string& dateTime, + core_t::TTime& preTime) { struct tm t; ::memset(&t, 0, sizeof(struct tm)); @@ -201,7 +205,8 @@ CTimeUtils::CDateWordCache::CDateWordCache() { m_DateWords.insert(buf); } - static const int DAYS_PER_MONTH[] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; + static const int DAYS_PER_MONTH[] = {31, 28, 31, 30, 31, 30, + 31, 31, 30, 31, 30, 31}; // Populate other month names and abbreviations for (int month = 1; month < 12; ++month) { diff --git a/lib/core/CTimezone.cc b/lib/core/CTimezone.cc index c99159e797..19e868763e 100644 --- a/lib/core/CTimezone.cc +++ b/lib/core/CTimezone.cc @@ -44,7 +44,8 @@ bool CTimezone::timezoneName(const std::string& name) { CScopedFastLock lock(m_Mutex); if (CSetEnv::setEnv("TZ", name.c_str(), 1) != 0) { - LOG_ERROR(<< "Unable to set TZ environment variable to " << name << " : " << ::strerror(errno)); + LOG_ERROR(<< "Unable to set TZ environment variable to " << name + << " : " << ::strerror(errno)); return false; } @@ -106,7 +107,8 @@ bool CTimezone::dateFields(core_t::TTime utcTime, monthsSinceJanuary = result.tm_mon; daysSinceJanuary1st = result.tm_yday; yearsSince1900 = result.tm_year; - secondsSinceMidnight = 3600 * result.tm_hour + 60 * result.tm_min + result.tm_sec; + secondsSinceMidnight = 3600 * result.tm_hour + 60 * result.tm_min + + result.tm_sec; return true; } diff --git a/lib/core/CTimezone_Windows.cc b/lib/core/CTimezone_Windows.cc index 80d7a00d74..1cc34b9b53 100644 --- a/lib/core/CTimezone_Windows.cc +++ b/lib/core/CTimezone_Windows.cc @@ -50,7 +50,10 @@ CTimezone::CTimezone() { path += "/date_time_zonespec.csv"; try { m_TimezoneDb.load_from_file(path); - } catch (std::exception& ex) { LOG_ERROR(<< "Failed to load Boost timezone database from " << path << " : " << ex.what()); } + } catch (std::exception& ex) { + LOG_ERROR(<< "Failed to load Boost timezone database from " << path + << " : " << ex.what()); + } } CTimezone::~CTimezone() { @@ -78,7 +81,8 @@ bool CTimezone::timezoneName(const std::string& name) { m_Timezone = m_TimezoneDb.time_zone_from_region(name); if (m_Timezone == 0) { - LOG_ERROR(<< "Unable to set timezone to " << name << " - operating system timezone settings will be used instead"); + LOG_ERROR(<< "Unable to set timezone to " << name + << " - operating system timezone settings will be used instead"); m_Name.clear(); return false; @@ -110,7 +114,8 @@ std::string CTimezone::dstAbbrev() const { return _tzname[1]; } - return m_Timezone->has_dst() ? m_Timezone->dst_zone_abbrev() : m_Timezone->std_zone_abbrev(); + return m_Timezone->has_dst() ? m_Timezone->dst_zone_abbrev() + : m_Timezone->std_zone_abbrev(); } core_t::TTime CTimezone::localToUtc(struct tm& localTime) const { @@ -128,13 +133,15 @@ core_t::TTime CTimezone::localToUtc(struct tm& localTime) const { static const boost::posix_time::ptime EPOCH(boost::gregorian::date(1970, 1, 1)); boost::gregorian::date dateIn(boost::gregorian::date_from_tm(localTime)); - boost::posix_time::time_duration timeIn(static_cast(localTime.tm_hour), - static_cast(localTime.tm_min), - static_cast(localTime.tm_sec)); + boost::posix_time::time_duration timeIn( + static_cast(localTime.tm_hour), + static_cast(localTime.tm_min), + static_cast(localTime.tm_sec)); boost::posix_time::time_duration diff; try { - boost::local_time::local_date_time boostLocal(dateIn, timeIn, m_Timezone, boost::local_time::local_date_time::EXCEPTION_ON_ERROR); + boost::local_time::local_date_time boostLocal( + dateIn, timeIn, m_Timezone, boost::local_time::local_date_time::EXCEPTION_ON_ERROR); diff = boostLocal.utc_time() - EPOCH; localTime.tm_isdst = (boostLocal.is_dst() ? 1 : 0); } catch (boost::local_time::ambiguous_result&) { @@ -196,7 +203,8 @@ bool CTimezone::dateFields(core_t::TTime utcTime, monthsSinceJanuary = result.tm_mon; daysSinceJanuary1st = result.tm_yday; yearsSince1900 = result.tm_year; - secondsSinceMidnight = 3600 * result.tm_hour + 60 * result.tm_min + result.tm_sec; + secondsSinceMidnight = 3600 * result.tm_hour + 60 * result.tm_min + + result.tm_sec; return true; } diff --git a/lib/core/CUname.cc b/lib/core/CUname.cc index 3ee4579aaf..47b07f3ec8 100644 --- a/lib/core/CUname.cc +++ b/lib/core/CUname.cc @@ -92,14 +92,16 @@ std::string CUname::mlPlatform() { // This isn't great because it's assuming that any C runtime library // that doesn't identify itself as glibc is musl, but it's hard to do // better as musl goes out of its way to be hard to detect - if (::confstr(_CS_GNU_LIBC_VERSION, buffer, sizeof(buffer)) == 0 || ::strstr(buffer, "glibc") == 0) { + if (::confstr(_CS_GNU_LIBC_VERSION, buffer, sizeof(buffer)) == 0 || + ::strstr(buffer, "glibc") == 0) { os += "-musl"; } } #endif const std::string& machine = CStringUtils::toLower(name.machine); - if (machine.length() == 4 && machine[0] == 'i' && machine[2] == '8' && machine[3] == '6') { + if (machine.length() == 4 && machine[0] == 'i' && machine[2] == '8' && + machine[3] == '6') { return os + "-x86"; } diff --git a/lib/core/CUname_Windows.cc b/lib/core/CUname_Windows.cc index d451a2efc1..9808f95ef4 100644 --- a/lib/core/CUname_Windows.cc +++ b/lib/core/CUname_Windows.cc @@ -30,21 +30,25 @@ bool queryKernelVersion(uint16_t& major, uint16_t& minor, uint16_t& build) { DWORD handle(0); DWORD size(GetFileVersionInfoSize(KERNEL32_DLL, &handle)); if (size == 0) { - LOG_ERROR(<< "Error getting file version info size for " << KERNEL32_DLL << " - error code : " << CWindowsError()); + LOG_ERROR(<< "Error getting file version info size for " << KERNEL32_DLL + << " - error code : " << CWindowsError()); return false; } using TScopedCharArray = boost::scoped_array; TScopedCharArray buffer(new char[size]); if (GetFileVersionInfo(KERNEL32_DLL, handle, size, buffer.get()) == FALSE) { - LOG_ERROR(<< "Error getting file version info for " << KERNEL32_DLL << " - error code : " << CWindowsError()); + LOG_ERROR(<< "Error getting file version info for " << KERNEL32_DLL + << " - error code : " << CWindowsError()); return false; } UINT len(0); VS_FIXEDFILEINFO* fixedFileInfo(0); - if (VerQueryValue(buffer.get(), "\\", reinterpret_cast(&fixedFileInfo), &len) == FALSE) { - LOG_ERROR(<< "Error querying fixed file info for " << KERNEL32_DLL << " - error code : " << CWindowsError()); + if (VerQueryValue(buffer.get(), "\\", + reinterpret_cast(&fixedFileInfo), &len) == FALSE) { + LOG_ERROR(<< "Error querying fixed file info for " << KERNEL32_DLL + << " - error code : " << CWindowsError()); return false; } @@ -71,7 +75,8 @@ std::string CUname::nodeName() { DWORD size(0); BOOL res(GetComputerNameEx(ComputerNameDnsHostname, 0, &size)); if (res != FALSE || GetLastError() != ERROR_MORE_DATA) { - LOG_ERROR(<< "Error getting computer name length - error code : " << CWindowsError()); + LOG_ERROR(<< "Error getting computer name length - error code : " + << CWindowsError()); return std::string(); } @@ -121,18 +126,23 @@ std::string CUname::version() { DWORDLONG conditionMask(0); versionInfoEx.wProductType = VER_NT_DOMAIN_CONTROLLER; - if (VerifyVersionInfo(&versionInfoEx, VER_PRODUCT_TYPE, VerSetConditionMask(conditionMask, VER_PRODUCT_TYPE, VER_EQUAL)) != FALSE) { + if (VerifyVersionInfo(&versionInfoEx, VER_PRODUCT_TYPE, + VerSetConditionMask(conditionMask, VER_PRODUCT_TYPE, + VER_EQUAL)) != FALSE) { strm << " (Domain Controller)"; } else { conditionMask = 0; versionInfoEx.wProductType = VER_NT_SERVER; - if (VerifyVersionInfo(&versionInfoEx, VER_PRODUCT_TYPE, VerSetConditionMask(conditionMask, VER_PRODUCT_TYPE, VER_EQUAL)) != FALSE) { + if (VerifyVersionInfo(&versionInfoEx, VER_PRODUCT_TYPE, + VerSetConditionMask(conditionMask, VER_PRODUCT_TYPE, + VER_EQUAL)) != FALSE) { strm << " (Server)"; } else { conditionMask = 0; versionInfoEx.wProductType = VER_NT_WORKSTATION; - if (VerifyVersionInfo(&versionInfoEx, VER_PRODUCT_TYPE, VerSetConditionMask(conditionMask, VER_PRODUCT_TYPE, VER_EQUAL)) != - FALSE) { + if (VerifyVersionInfo(&versionInfoEx, VER_PRODUCT_TYPE, + VerSetConditionMask(conditionMask, VER_PRODUCT_TYPE, + VER_EQUAL)) != FALSE) { strm << " (Workstation)"; } } diff --git a/lib/core/CWindowsError_Windows.cc b/lib/core/CWindowsError_Windows.cc index 6783bdfc27..d30e297b76 100644 --- a/lib/core/CWindowsError_Windows.cc +++ b/lib/core/CWindowsError_Windows.cc @@ -30,13 +30,9 @@ uint32_t CWindowsError::errorCode() const { std::string CWindowsError::errorString() const { char message[BUFFER_SIZE] = {'\0'}; - DWORD msgLen(FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_MAX_WIDTH_MASK, - 0, - m_ErrorCode, - 0, - message, - BUFFER_SIZE, - 0)); + DWORD msgLen(FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS | + FORMAT_MESSAGE_MAX_WIDTH_MASK, + 0, m_ErrorCode, 0, message, BUFFER_SIZE, 0)); if (msgLen == 0) { return "unknown error code (" + CStringUtils::typeToString(m_ErrorCode) + ')'; } @@ -47,13 +43,9 @@ std::string CWindowsError::errorString() const { std::ostream& operator<<(std::ostream& os, const CWindowsError& windowsError) { char message[BUFFER_SIZE] = {'\0'}; - DWORD msgLen(FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_MAX_WIDTH_MASK, - 0, - windowsError.m_ErrorCode, - 0, - message, - BUFFER_SIZE, - 0)); + DWORD msgLen(FormatMessage( + FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_MAX_WIDTH_MASK, + 0, windowsError.m_ErrorCode, 0, message, BUFFER_SIZE, 0)); if (msgLen == 0) { os << "unknown error code (" << windowsError.m_ErrorCode << ')'; } else { diff --git a/lib/core/CWordDictionary.cc b/lib/core/CWordDictionary.cc index 3377ceb80d..6fa8d39c43 100644 --- a/lib/core/CWordDictionary.cc +++ b/lib/core/CWordDictionary.cc @@ -130,7 +130,8 @@ CWordDictionary::CWordDictionary() { char partOfSpeechCode(word[sepPos + 1]); EPartOfSpeech partOfSpeech(partOfSpeechFromCode(partOfSpeechCode)); if (partOfSpeech == E_NotInDictionary) { - LOG_ERROR(<< "Unknown part-of-speech code (" << partOfSpeechCode << ") for word: " << word); + LOG_ERROR(<< "Unknown part-of-speech code (" << partOfSpeechCode + << ") for word: " << word); continue; } word.erase(sepPos); @@ -158,8 +159,10 @@ size_t CWordDictionary::CStrHashIgnoreCase::operator()(const std::string& str) c return hash; } -bool CWordDictionary::CStrEqualIgnoreCase::operator()(const std::string& lhs, const std::string& rhs) const { - return lhs.length() == rhs.length() && CStrCaseCmp::strCaseCmp(lhs.c_str(), rhs.c_str()) == 0; +bool CWordDictionary::CStrEqualIgnoreCase::operator()(const std::string& lhs, + const std::string& rhs) const { + return lhs.length() == rhs.length() && + CStrCaseCmp::strCaseCmp(lhs.c_str(), rhs.c_str()) == 0; } } } diff --git a/lib/core/CWordExtractor.cc b/lib/core/CWordExtractor.cc index 6aa75c5342..b01c17c06f 100644 --- a/lib/core/CWordExtractor.cc +++ b/lib/core/CWordExtractor.cc @@ -14,11 +14,14 @@ namespace core { const std::string CWordExtractor::PUNCT_CHARS("!\"'(),-./:;?[]`"); -void CWordExtractor::extractWordsFromMessage(const std::string& message, std::string& messageWords) { +void CWordExtractor::extractWordsFromMessage(const std::string& message, + std::string& messageWords) { CWordExtractor::extractWordsFromMessage(1, message, messageWords); } -void CWordExtractor::extractWordsFromMessage(size_t minConsecutive, const std::string& message, std::string& messageWords) { +void CWordExtractor::extractWordsFromMessage(size_t minConsecutive, + const std::string& message, + std::string& messageWords) { // Words are taken to be sub-strings of 1 or more letters, all lower case // except possibly the first, preceded by a space, and followed by 0 or 1 // punctuation characters and then a space (or the end of the string). @@ -47,7 +50,8 @@ void CWordExtractor::extractWordsFromMessage(size_t minConsecutive, const std::s if (::isspace(static_cast(thisChar))) { if (inWord && punctCount <= 1) { if (dict.isInDictionary(curWord)) { - messageWords.append(message, wordStartPos, messagePos - spaceCount - punctCount - wordStartPos); + messageWords.append(message, wordStartPos, + messagePos - spaceCount - punctCount - wordStartPos); messageWords += ' '; ++consecutive; @@ -109,7 +113,8 @@ void CWordExtractor::extractWordsFromMessage(size_t minConsecutive, const std::s if (inWord && punctCount <= 1 && dict.isInDictionary(curWord)) { ++consecutive; if (consecutive >= minConsecutive) { - messageWords.append(message, wordStartPos, message.length() - wordStartPos - punctCount); + messageWords.append(message, wordStartPos, + message.length() - wordStartPos - punctCount); messageWords += ' '; rollbackPos = messageWords.length(); diff --git a/lib/core/CXmlNode.cc b/lib/core/CXmlNode.cc index 909175dbdd..74516b1419 100644 --- a/lib/core/CXmlNode.cc +++ b/lib/core/CXmlNode.cc @@ -14,11 +14,13 @@ CXmlNode::CXmlNode() { CXmlNode::CXmlNode(const std::string& name) : m_Name(name) { } -CXmlNode::CXmlNode(const std::string& name, const std::string& value) : m_Name(name), m_Value(value) { +CXmlNode::CXmlNode(const std::string& name, const std::string& value) + : m_Name(name), m_Value(value) { } CXmlNode::CXmlNode(const std::string& name, const std::string& value, const TStrStrMap& attributes) - : m_Name(name), m_Value(value), m_Attributes(attributes.begin(), attributes.end()) { + : m_Name(name), m_Value(value), + m_Attributes(attributes.begin(), attributes.end()) { } CXmlNode::~CXmlNode() { diff --git a/lib/core/CXmlNodeWithChildren.cc b/lib/core/CXmlNodeWithChildren.cc index 2fc567a4ff..78b2e4cf0d 100644 --- a/lib/core/CXmlNodeWithChildren.cc +++ b/lib/core/CXmlNodeWithChildren.cc @@ -15,17 +15,22 @@ namespace core { CXmlNodeWithChildren::CXmlNodeWithChildren() : CXmlNode() { } -CXmlNodeWithChildren::CXmlNodeWithChildren(const std::string& name) : CXmlNode(name) { +CXmlNodeWithChildren::CXmlNodeWithChildren(const std::string& name) + : CXmlNode(name) { } -CXmlNodeWithChildren::CXmlNodeWithChildren(const std::string& name, const std::string& value) : CXmlNode(name, value) { +CXmlNodeWithChildren::CXmlNodeWithChildren(const std::string& name, const std::string& value) + : CXmlNode(name, value) { } -CXmlNodeWithChildren::CXmlNodeWithChildren(const std::string& name, const std::string& value, const CXmlNode::TStrStrMap& attributes) +CXmlNodeWithChildren::CXmlNodeWithChildren(const std::string& name, + const std::string& value, + const CXmlNode::TStrStrMap& attributes) : CXmlNode(name, value, attributes) { } -CXmlNodeWithChildren::CXmlNodeWithChildren(const CXmlNodeWithChildren& arg) : CXmlNode(arg), m_Children(arg.m_Children) { +CXmlNodeWithChildren::CXmlNodeWithChildren(const CXmlNodeWithChildren& arg) + : CXmlNode(arg), m_Children(arg.m_Children) { } CXmlNodeWithChildren::~CXmlNodeWithChildren() { @@ -70,7 +75,8 @@ std::string CXmlNodeWithChildren::dump(size_t indent) const { strRep += core_t::LINE_ENDING; // Now add children at next level of indenting - for (TChildNodePVecCItr childIter = m_Children.begin(); childIter != m_Children.end(); ++childIter) { + for (TChildNodePVecCItr childIter = m_Children.begin(); + childIter != m_Children.end(); ++childIter) { const CXmlNodeWithChildren* child = childIter->get(); if (child != nullptr) { strRep += child->dump(indent + 1); diff --git a/lib/core/CXmlNodeWithChildrenPool.cc b/lib/core/CXmlNodeWithChildrenPool.cc index d006384e10..6af5639fb1 100644 --- a/lib/core/CXmlNodeWithChildrenPool.cc +++ b/lib/core/CXmlNodeWithChildrenPool.cc @@ -15,10 +15,12 @@ namespace ml { namespace core { -CXmlNodeWithChildrenPool::CXmlNodeWithChildrenPool() : m_MaxRecycled(m_Recycled.max_size()) { +CXmlNodeWithChildrenPool::CXmlNodeWithChildrenPool() + : m_MaxRecycled(m_Recycled.max_size()) { } -CXmlNodeWithChildrenPool::CXmlNodeWithChildrenPool(size_t maxRecycled) : m_MaxRecycled(std::min(maxRecycled, m_Recycled.max_size())) { +CXmlNodeWithChildrenPool::CXmlNodeWithChildrenPool(size_t maxRecycled) + : m_MaxRecycled(std::min(maxRecycled, m_Recycled.max_size())) { } CXmlNodeWithChildren::TXmlNodeWithChildrenP CXmlNodeWithChildrenPool::newNode() { @@ -31,7 +33,8 @@ CXmlNodeWithChildren::TXmlNodeWithChildrenP CXmlNodeWithChildrenPool::newNode() return nodePtr; } -CXmlNodeWithChildren::TXmlNodeWithChildrenP CXmlNodeWithChildrenPool::newNode(std::string name, std::string value) { +CXmlNodeWithChildren::TXmlNodeWithChildrenP +CXmlNodeWithChildrenPool::newNode(std::string name, std::string value) { CXmlNodeWithChildren::TXmlNodeWithChildrenP nodePtr(this->newNode()); // We take advantage of friendship here to set the node's name and value @@ -55,7 +58,8 @@ void CXmlNodeWithChildrenPool::recycle(CXmlNodeWithChildren::TXmlNodeWithChildre if (m_Recycled.size() < m_MaxRecycled) { // We take advantage of friendship here to clear the node's attribute vector nodePtr->m_Attributes.clear(); - std::for_each(nodePtr->m_Children.rbegin(), nodePtr->m_Children.rend(), boost::bind(&CXmlNodeWithChildrenPool::recycle, this, _1)); + std::for_each(nodePtr->m_Children.rbegin(), nodePtr->m_Children.rend(), + boost::bind(&CXmlNodeWithChildrenPool::recycle, this, _1)); nodePtr->m_Children.clear(); m_Recycled.push_back(nodePtr); diff --git a/lib/core/CXmlParser.cc b/lib/core/CXmlParser.cc index 608097837a..effa090ee0 100644 --- a/lib/core/CXmlParser.cc +++ b/lib/core/CXmlParser.cc @@ -32,7 +32,8 @@ const size_t CXmlParser::MAX_INDENT_SPACES(10); // The number of spaces in this constant MUST match the maximum above const char* CXmlParser::INDENT_SPACE_STR(" "); -CXmlParser::CXmlParser() : m_Doc(nullptr), m_XPathContext(nullptr), m_NavigatedNode(nullptr) { +CXmlParser::CXmlParser() + : m_Doc(nullptr), m_XPathContext(nullptr), m_NavigatedNode(nullptr) { // Note that xmlLoadExtDtdDefaultValue needs to be set before parsing, // but is a per-thread setting // xmlLoadExtDtdDefaultValue = 1; @@ -176,7 +177,8 @@ bool CXmlParser::evalXPathExpression(const std::string& xpathExpr, CXmlNode& ret } if (vec.size() != 1) { - LOG_ERROR(<< "Return for " << xpathExpr << " must be single value, not " << vec.size()); + LOG_ERROR(<< "Return for " << xpathExpr << " must be single value, not " + << vec.size()); return false; } @@ -251,7 +253,8 @@ bool CXmlParser::evalXPathExpression(const std::string& xpathExpr, TStrStrMap& r return true; } -bool CXmlParser::evalXPathExpression(const std::string& xpathExpr, CXmlParser::TXmlNodeVec& ret) const { +bool CXmlParser::evalXPathExpression(const std::string& xpathExpr, + CXmlParser::TXmlNodeVec& ret) const { ret.clear(); if (m_Doc == nullptr || m_XPathContext == nullptr) { @@ -259,7 +262,8 @@ bool CXmlParser::evalXPathExpression(const std::string& xpathExpr, CXmlParser::T return false; } - xmlXPathObject* xpathObj(xmlXPathEvalExpression(reinterpret_cast(xpathExpr.c_str()), m_XPathContext)); + xmlXPathObject* xpathObj(xmlXPathEvalExpression( + reinterpret_cast(xpathExpr.c_str()), m_XPathContext)); if (xpathObj == nullptr) { LOG_ERROR(<< "Unable to evaluate xpath expression " << xpathExpr); return false; @@ -267,7 +271,8 @@ bool CXmlParser::evalXPathExpression(const std::string& xpathExpr, CXmlParser::T if (xpathObj->type != XPATH_NODESET) { xmlXPathFreeObject(xpathObj); - LOG_ERROR(<< "Unable to evaluate xpath expression " << xpathExpr << " " << xpathObj->type); + LOG_ERROR(<< "Unable to evaluate xpath expression " << xpathExpr << " " + << xpathObj->type); return false; } @@ -288,7 +293,8 @@ bool CXmlParser::evalXPathExpression(const std::string& xpathExpr, CXmlParser::T const xmlChar* name(nodes->nodeTab[i]->name); xmlChar* value(xmlNodeGetContent(nodes->nodeTab[i])); - CXmlNode node(reinterpret_cast(name), reinterpret_cast(value)); + CXmlNode node(reinterpret_cast(name), + reinterpret_cast(value)); ret.push_back(node); @@ -300,7 +306,8 @@ bool CXmlParser::evalXPathExpression(const std::string& xpathExpr, CXmlParser::T while (prop != nullptr) { const xmlChar* propName(prop->name); xmlChar* propValue(xmlGetProp(nodes->nodeTab[i], propName)); - attrs.push_back(CXmlNode::TStrStrPr(reinterpret_cast(propName), reinterpret_cast(propValue))); + attrs.push_back(CXmlNode::TStrStrPr(reinterpret_cast(propName), + reinterpret_cast(propValue))); xmlFree(propValue); @@ -358,18 +365,20 @@ void CXmlParser::convert(const CXmlNodeWithChildren& root, std::string& result) void CXmlParser::convert(size_t indentSpaces, const CXmlNodeWithChildren& root, std::string& result) { // The xmlTreeIndentString "global" is really a per-thread variable. - xmlTreeIndentString = INDENT_SPACE_STR + MAX_INDENT_SPACES - std::min(indentSpaces, MAX_INDENT_SPACES); + xmlTreeIndentString = INDENT_SPACE_STR + MAX_INDENT_SPACES - + std::min(indentSpaces, MAX_INDENT_SPACES); // Create a temporary document xmlDoc* doc(xmlNewDoc(reinterpret_cast("1.0"))); // Root node - xmlNode* rootNode(xmlNewNode(nullptr, reinterpret_cast(root.name().c_str()))); + xmlNode* rootNode( + xmlNewNode(nullptr, reinterpret_cast(root.name().c_str()))); const CXmlNode::TStrStrPrVec& attrs = root.attributes(); - for (CXmlNode::TStrStrPrVecCItr attrIter = attrs.begin(); attrIter != attrs.end(); ++attrIter) { - xmlSetProp(rootNode, - reinterpret_cast(attrIter->first.c_str()), + for (CXmlNode::TStrStrPrVecCItr attrIter = attrs.begin(); + attrIter != attrs.end(); ++attrIter) { + xmlSetProp(rootNode, reinterpret_cast(attrIter->first.c_str()), reinterpret_cast(attrIter->second.c_str())); } @@ -397,7 +406,8 @@ void CXmlParser::convert(size_t indentSpaces, const CXmlNodeWithChildren& root, void CXmlParser::convertChildren(const CXmlNodeWithChildren& current, xmlNode& xmlRep) { const CXmlNodeWithChildren::TChildNodePVec& childVec = current.children(); - for (CXmlNodeWithChildren::TChildNodePVecCItr childIter = childVec.begin(); childIter != childVec.end(); ++childIter) { + for (CXmlNodeWithChildren::TChildNodePVecCItr childIter = childVec.begin(); + childIter != childVec.end(); ++childIter) { const CXmlNodeWithChildren* child = childIter->get(); if (child != nullptr) { xmlNode* childRep(nullptr); @@ -406,17 +416,20 @@ void CXmlParser::convertChildren(const CXmlNodeWithChildren& current, xmlNode& x // It's crucial to specify the value as NULL rather than // an empty string, otherwise the formatting will be messed // up - childRep = xmlNewChild(&xmlRep, nullptr, reinterpret_cast(child->name().c_str()), nullptr); + childRep = xmlNewChild( + &xmlRep, nullptr, + reinterpret_cast(child->name().c_str()), nullptr); } else { - childRep = xmlNewTextChild(&xmlRep, - nullptr, - reinterpret_cast(child->name().c_str()), - reinterpret_cast(child->value().c_str())); + childRep = xmlNewTextChild( + &xmlRep, nullptr, + reinterpret_cast(child->name().c_str()), + reinterpret_cast(child->value().c_str())); } const CXmlNode::TStrStrPrVec& attrs = child->attributes(); - for (CXmlNode::TStrStrPrVecCItr attrIter = attrs.begin(); attrIter != attrs.end(); ++attrIter) { + for (CXmlNode::TStrStrPrVecCItr attrIter = attrs.begin(); + attrIter != attrs.end(); ++attrIter) { xmlSetProp(childRep, reinterpret_cast(attrIter->first.c_str()), reinterpret_cast(attrIter->second.c_str())); @@ -431,9 +444,13 @@ void CXmlParser::convert(const std::string& root, const TStrStrMap& values, std: CXmlParser::convert(DEFAULT_INDENT_SPACES, root, values, result); } -void CXmlParser::convert(size_t indentSpaces, const std::string& root, const TStrStrMap& values, std::string& result) { +void CXmlParser::convert(size_t indentSpaces, + const std::string& root, + const TStrStrMap& values, + std::string& result) { // The xmlTreeIndentString "global" is really a per-thread variable. - xmlTreeIndentString = INDENT_SPACE_STR + MAX_INDENT_SPACES - std::min(indentSpaces, MAX_INDENT_SPACES); + xmlTreeIndentString = INDENT_SPACE_STR + MAX_INDENT_SPACES - + std::min(indentSpaces, MAX_INDENT_SPACES); // Create a temporary document xmlDoc* doc(xmlNewDoc(reinterpret_cast("1.0"))); @@ -458,17 +475,21 @@ void CXmlParser::convert(size_t indentSpaces, const std::string& root, const TSt } xmlNode* childRep(xmlNewTextChild( - rootNode, nullptr, reinterpret_cast(tag.c_str()), reinterpret_cast(itr->second.c_str()))); + rootNode, nullptr, reinterpret_cast(tag.c_str()), + reinterpret_cast(itr->second.c_str()))); if (!attribute.empty()) { size_t eqPos(attribute.find(ATTRIBUTE_EQUALS)); if (eqPos == std::string::npos || eqPos == 0) { - LOG_ERROR(<< "Attribute format does not contain '" << ATTRIBUTE_EQUALS << "' surrounded by name and value : " << attribute - << core_t::LINE_ENDING << "Map key : " << itr->first << core_t::LINE_ENDING << "Map value : " << itr->second); + LOG_ERROR(<< "Attribute format does not contain '" << ATTRIBUTE_EQUALS + << "' surrounded by name and value : " << attribute + << core_t::LINE_ENDING << "Map key : " << itr->first + << core_t::LINE_ENDING << "Map value : " << itr->second); } else { - xmlSetProp(childRep, - reinterpret_cast(attribute.substr(0, eqPos).c_str()), - reinterpret_cast(attribute.substr(eqPos + 1).c_str())); + xmlSetProp( + childRep, + reinterpret_cast(attribute.substr(0, eqPos).c_str()), + reinterpret_cast(attribute.substr(eqPos + 1).c_str())); } } } @@ -504,8 +525,9 @@ bool CXmlParser::convert(const std::string& root, const TStrStrMap& values) { // Create child nodes for (TStrStrMapCItr itr = values.begin(); itr != values.end(); ++itr) { - xmlNewTextChild( - rootNode, nullptr, reinterpret_cast(itr->first.c_str()), reinterpret_cast(itr->second.c_str())); + xmlNewTextChild(rootNode, nullptr, + reinterpret_cast(itr->first.c_str()), + reinterpret_cast(itr->second.c_str())); } xmlDocSetRootElement(m_Doc, rootNode); @@ -533,7 +555,8 @@ bool CXmlParser::toNodeHierarchy(CXmlNodeWithChildren::TXmlNodeWithChildrenP& ro return this->toNodeHierarchy(pool, rootNodePtr); } -bool CXmlParser::toNodeHierarchy(CXmlNodeWithChildrenPool& pool, CXmlNodeWithChildren::TXmlNodeWithChildrenP& rootNodePtr) const { +bool CXmlParser::toNodeHierarchy(CXmlNodeWithChildrenPool& pool, + CXmlNodeWithChildren::TXmlNodeWithChildrenP& rootNodePtr) const { rootNodePtr.reset(); if (m_Doc == nullptr) { @@ -555,7 +578,8 @@ bool CXmlParser::toNodeHierarchy(CXmlNodeWithChildrenPool& pool, CXmlNodeWithChi return this->toNodeHierarchy(*root, pool, nullptr, rootNodePtr); } -bool CXmlParser::toNodeHierarchy(CStringCache& cache, CXmlNodeWithChildren::TXmlNodeWithChildrenP& rootNodePtr) const { +bool CXmlParser::toNodeHierarchy(CStringCache& cache, + CXmlNodeWithChildren::TXmlNodeWithChildrenP& rootNodePtr) const { // Because both the pool and the nodes use shared pointers, it doesn't // matter if the pool that originally allocates the nodes is destroyed // before the nodes themselves. Hence we can get away with implementing @@ -738,8 +762,8 @@ bool CXmlParser::addNewChildNode(const std::string& name, const std::string& val } // Note the namespace is NULL here - if (xmlNewTextChild(root, nullptr, reinterpret_cast(name.c_str()), reinterpret_cast(value.c_str())) == - nullptr) { + if (xmlNewTextChild(root, nullptr, reinterpret_cast(name.c_str()), + reinterpret_cast(value.c_str())) == nullptr) { LOG_ERROR(<< "Unable to add new child to " << root); return false; } @@ -750,7 +774,9 @@ bool CXmlParser::addNewChildNode(const std::string& name, const std::string& val return true; } -bool CXmlParser::addNewChildNode(const std::string& name, const std::string& value, const TStrStrMap& attrs) { +bool CXmlParser::addNewChildNode(const std::string& name, + const std::string& value, + const TStrStrMap& attrs) { if (m_Doc == nullptr) { LOG_ERROR(<< "Cannot add to uninitialised document"); return false; @@ -763,16 +789,17 @@ bool CXmlParser::addNewChildNode(const std::string& name, const std::string& val } // Note the namespace is NULL here - xmlNode* child( - xmlNewTextChild(root, nullptr, reinterpret_cast(name.c_str()), reinterpret_cast(value.c_str()))); + xmlNode* child(xmlNewTextChild(root, nullptr, + reinterpret_cast(name.c_str()), + reinterpret_cast(value.c_str()))); if (child == nullptr) { LOG_ERROR(<< "Unable to add new child to " << root); return false; } for (TStrStrMapCItr attrIter = attrs.begin(); attrIter != attrs.end(); ++attrIter) { - xmlSetProp( - child, reinterpret_cast(attrIter->first.c_str()), reinterpret_cast(attrIter->second.c_str())); + xmlSetProp(child, reinterpret_cast(attrIter->first.c_str()), + reinterpret_cast(attrIter->second.c_str())); } // This makes XPath operations on large documents much faster @@ -795,11 +822,13 @@ bool CXmlParser::changeChildNodeValue(const std::string& name, const std::string xmlNode* child(root->children); while (child != nullptr) { - if (child->type == XML_ELEMENT_NODE && name == reinterpret_cast(child->name)) { + if (child->type == XML_ELEMENT_NODE && + name == reinterpret_cast(child->name)) { // Unlike xmlNewTextChild, xmlNodeSetContent doesn't escape special // characters, so we have to call xmlEncodeSpecialChars ourselves to // do this - xmlChar* encoded(xmlEncodeSpecialChars(m_Doc, reinterpret_cast(newValue.c_str()))); + xmlChar* encoded(xmlEncodeSpecialChars( + m_Doc, reinterpret_cast(newValue.c_str()))); xmlNodeSetContent(child, encoded); xmlFree(encoded); @@ -829,12 +858,13 @@ bool CXmlParser::stringLatin1ToUtf8(std::string& str) { int outLen(static_cast(bufferSize)); // This function is provided by libxml2 - int ret = - ::isolat1ToUTF8(reinterpret_cast(&buffer[0]), &outLen, reinterpret_cast(str.c_str()), &inLen); + int ret = ::isolat1ToUTF8(reinterpret_cast(&buffer[0]), &outLen, + reinterpret_cast(str.c_str()), &inLen); if (ret == -1 || inLen < static_cast(str.length())) { - LOG_ERROR(<< "Failure converting Latin1 string to UTF-8" << core_t::LINE_ENDING << "Return code: " << ret << core_t::LINE_ENDING - << "Remaining length: " << inLen << core_t::LINE_ENDING << "Original string: " << str << core_t::LINE_ENDING - << "Result so far: " << &buffer[0]); + LOG_ERROR(<< "Failure converting Latin1 string to UTF-8" << core_t::LINE_ENDING + << "Return code: " << ret << core_t::LINE_ENDING << "Remaining length: " + << inLen << core_t::LINE_ENDING << "Original string: " << str + << core_t::LINE_ENDING << "Result so far: " << &buffer[0]); return false; } @@ -856,7 +886,8 @@ bool CXmlParser::toNodeHierarchy(const xmlNode& parentNode, if (cache != nullptr) { // Get the name from the cache if there is one, as we expect relatively // few distinct names repeated many times - nodePtr->m_Name = cache->stringFor(reinterpret_cast(parentNode.name)); + nodePtr->m_Name = + cache->stringFor(reinterpret_cast(parentNode.name)); } else { nodePtr->m_Name = reinterpret_cast(parentNode.name); } @@ -900,7 +931,8 @@ bool CXmlParser::toNodeHierarchy(const xmlNode& parentNode, // xmlFree(propValue); // but obviously this involves a temporary memory allocation.) const xmlNode* propChildren(prop->children); - if (propChildren != nullptr && propChildren->next == nullptr && propChildren->type == XML_TEXT_NODE) { + if (propChildren != nullptr && propChildren->next == nullptr && + propChildren->type == XML_TEXT_NODE) { const char* propName(reinterpret_cast(prop->name)); const char* propValue(reinterpret_cast(propChildren->content)); @@ -911,7 +943,8 @@ bool CXmlParser::toNodeHierarchy(const xmlNode& parentNode, // Get attribute names and values from the cache if there is // one, as we expect relatively few distinct attributes repeated // many times - nodePtr->m_Attributes.push_back(CXmlNode::TStrStrPr(cache->stringFor(propName), cache->stringFor(propValue))); + nodePtr->m_Attributes.push_back(CXmlNode::TStrStrPr( + cache->stringFor(propName), cache->stringFor(propValue))); } else { nodePtr->m_Attributes.push_back(CXmlNode::TStrStrPr(propName, propValue)); } diff --git a/lib/core/unittest/CAllocationStrategyTest.cc b/lib/core/unittest/CAllocationStrategyTest.cc index d758170463..f8abe9061b 100644 --- a/lib/core/unittest/CAllocationStrategyTest.cc +++ b/lib/core/unittest/CAllocationStrategyTest.cc @@ -73,7 +73,7 @@ void CAllocationStrategyTest::test() { CppUnit::Test* CAllocationStrategyTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CAllocationStrategyTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CAllocationStrategyTest::test", &CAllocationStrategyTest::test)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CAllocationStrategyTest::test", &CAllocationStrategyTest::test)); return suiteOfTests; } diff --git a/lib/core/unittest/CBase64FilterTest.cc b/lib/core/unittest/CBase64FilterTest.cc index 34693f52aa..2f75ecfdf6 100644 --- a/lib/core/unittest/CBase64FilterTest.cc +++ b/lib/core/unittest/CBase64FilterTest.cc @@ -41,7 +41,8 @@ class CMockSource { return -1; } std::streamsize toCopy = std::min(std::streamsize(m_Data.size() - m_Read), n); - LOG_TRACE(<< "Read " << toCopy << ": " << std::string(m_Data.c_str() + m_Read, toCopy)); + LOG_TRACE(<< "Read " << toCopy << ": " + << std::string(m_Data.c_str() + m_Read, toCopy)); memcpy(s, m_Data.c_str() + m_Read, toCopy); m_Read += toCopy; return toCopy; @@ -59,7 +60,8 @@ class CMockSink { public: using char_type = char; - struct category : public boost::iostreams::sink_tag, public boost::iostreams::closable_tag {}; + struct category : public boost::iostreams::sink_tag, + public boost::iostreams::closable_tag {}; public: CMockSink() {} @@ -116,11 +118,12 @@ void CBase64FilterTest::testEncode() { filter << "in the continued and indefatigable generation of knowledge, exceeds the short "; filter << "vehemence of any carnal pleasure."; } - std::string expected = "TWFuIGlzIGRpc3Rpbmd1aXNoZWQsIG5vdCBvbmx5IGJ5IGhpcyByZWFzb24sIGJ1dCBieSB0aGlz" - "IHNpbmd1bGFyIHBhc3Npb24gZnJvbSBvdGhlciBhbmltYWxzLCB3aGljaCBpcyBhIGx1c3Qgb2Yg" - "dGhlIG1pbmQsIHRoYXQgYnkgYSBwZXJzZXZlcmFuY2Ugb2YgZGVsaWdodCBpbiB0aGUgY29udGlu" - "dWVkIGFuZCBpbmRlZmF0aWdhYmxlIGdlbmVyYXRpb24gb2Yga25vd2xlZGdlLCBleGNlZWRzIHRo" - "ZSBzaG9ydCB2ZWhlbWVuY2Ugb2YgYW55IGNhcm5hbCBwbGVhc3VyZS4="; + std::string expected = + "TWFuIGlzIGRpc3Rpbmd1aXNoZWQsIG5vdCBvbmx5IGJ5IGhpcyByZWFzb24sIGJ1dCBieSB0aGlz" + "IHNpbmd1bGFyIHBhc3Npb24gZnJvbSBvdGhlciBhbmltYWxzLCB3aGljaCBpcyBhIGx1c3Qgb2Yg" + "dGhlIG1pbmQsIHRoYXQgYnkgYSBwZXJzZXZlcmFuY2Ugb2YgZGVsaWdodCBpbiB0aGUgY29udGlu" + "dWVkIGFuZCBpbmRlZmF0aWdhYmxlIGdlbmVyYXRpb24gb2Yga25vd2xlZGdlLCBleGNlZWRzIHRo" + "ZSBzaG9ydCB2ZWhlbWVuY2Ugb2YgYW55IGNhcm5hbCBwbGVhc3VyZS4="; CPPUNIT_ASSERT_EQUAL(expected, sink.getData()); } { @@ -145,15 +148,17 @@ void CBase64FilterTest::testEncode() { void CBase64FilterTest::testDecode() { { // Test decoding - std::string encoded = "TWFuIGlzIGRpc3Rpbmd1aXNoZWQsIG5vdCBvbmx5IGJ5IGhpcyByZWFzb24sIGJ1dCBieSB0aGlz" - "IHNpbmd1bGFyIHBhc3Npb24gZnJvbSBvdGhlciBhbmltYWxzLCB3aGljaCBpcyBhIGx1c3Qgb2Yg" - "dGhlIG1pbmQsIHRoYXQgYnkgYSBwZXJzZXZlcmFuY2Ugb2YgZGVsaWdodCBpbiB0aGUgY29udGlu" - "dWVkIGFuZCBpbmRlZmF0aWdhYmxlIGdlbmVyYXRpb24gb2Yga25vd2xlZGdlLCBleGNlZWRzIHRo" - "ZSBzaG9ydCB2ZWhlbWVuY2Ugb2YgYW55IGNhcm5hbCBwbGVhc3VyZS4="; - std::string expected = "Man is distinguished, not only by his reason, but by this singular passion from " - "other animals, which is a lust of the mind, that by a perseverance of delight " - "in the continued and indefatigable generation of knowledge, exceeds the short " - "vehemence of any carnal pleasure."; + std::string encoded = + "TWFuIGlzIGRpc3Rpbmd1aXNoZWQsIG5vdCBvbmx5IGJ5IGhpcyByZWFzb24sIGJ1dCBieSB0aGlz" + "IHNpbmd1bGFyIHBhc3Npb24gZnJvbSBvdGhlciBhbmltYWxzLCB3aGljaCBpcyBhIGx1c3Qgb2Yg" + "dGhlIG1pbmQsIHRoYXQgYnkgYSBwZXJzZXZlcmFuY2Ugb2YgZGVsaWdodCBpbiB0aGUgY29udGlu" + "dWVkIGFuZCBpbmRlZmF0aWdhYmxlIGdlbmVyYXRpb24gb2Yga25vd2xlZGdlLCBleGNlZWRzIHRo" + "ZSBzaG9ydCB2ZWhlbWVuY2Ugb2YgYW55IGNhcm5hbCBwbGVhc3VyZS4="; + std::string expected = + "Man is distinguished, not only by his reason, but by this singular passion from " + "other animals, which is a lust of the mind, that by a perseverance of delight " + "in the continued and indefatigable generation of knowledge, exceeds the short " + "vehemence of any carnal pleasure."; CMockSource source(encoded); TFilteredInput filter; filter.push(CBase64Decoder()); @@ -204,9 +209,12 @@ void CBase64FilterTest::testBoth() { CppUnit::Test* CBase64FilterTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CBase64FilterTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CBase64FilterTest::testDecode", &CBase64FilterTest::testDecode)); - suiteOfTests->addTest(new CppUnit::TestCaller("CBase64FilterTest::testEncode", &CBase64FilterTest::testEncode)); - suiteOfTests->addTest(new CppUnit::TestCaller("CBase64FilterTest::testBoth", &CBase64FilterTest::testBoth)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CBase64FilterTest::testDecode", &CBase64FilterTest::testDecode)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CBase64FilterTest::testEncode", &CBase64FilterTest::testEncode)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CBase64FilterTest::testBoth", &CBase64FilterTest::testBoth)); return suiteOfTests; } diff --git a/lib/core/unittest/CBlockingMessageQueueTest.cc b/lib/core/unittest/CBlockingMessageQueueTest.cc index b1851c2640..85a228eb18 100644 --- a/lib/core/unittest/CBlockingMessageQueueTest.cc +++ b/lib/core/unittest/CBlockingMessageQueueTest.cc @@ -13,8 +13,8 @@ CppUnit::Test* CBlockingMessageQueueTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CBlockingMessageQueueTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CBlockingMessageQueueTest::testSendReceive", - &CBlockingMessageQueueTest::testSendReceive)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CBlockingMessageQueueTest::testSendReceive", &CBlockingMessageQueueTest::testSendReceive)); return suiteOfTests; } diff --git a/lib/core/unittest/CByteSwapperTest.cc b/lib/core/unittest/CByteSwapperTest.cc index ba6063bed9..5526145617 100644 --- a/lib/core/unittest/CByteSwapperTest.cc +++ b/lib/core/unittest/CByteSwapperTest.cc @@ -13,7 +13,8 @@ CppUnit::Test* CByteSwapperTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CByteSwapperTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CByteSwapperTest::testByteSwaps", &CByteSwapperTest::testByteSwaps)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CByteSwapperTest::testByteSwaps", &CByteSwapperTest::testByteSwaps)); return suiteOfTests; } diff --git a/lib/core/unittest/CCompressUtilsTest.cc b/lib/core/unittest/CCompressUtilsTest.cc index 4123e195aa..df07ae01cd 100644 --- a/lib/core/unittest/CCompressUtilsTest.cc +++ b/lib/core/unittest/CCompressUtilsTest.cc @@ -13,13 +13,14 @@ CppUnit::Test* CCompressUtilsTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CCompressUtilsTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CCompressUtilsTest::testEmptyAdd", &CCompressUtilsTest::testEmptyAdd)); - suiteOfTests->addTest(new CppUnit::TestCaller("CCompressUtilsTest::testOneAdd", &CCompressUtilsTest::testOneAdd)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CCompressUtilsTest::testManyAdds", &CCompressUtilsTest::testManyAdds)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CCompressUtilsTest::testLengthOnly", &CCompressUtilsTest::testLengthOnly)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCompressUtilsTest::testEmptyAdd", &CCompressUtilsTest::testEmptyAdd)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCompressUtilsTest::testOneAdd", &CCompressUtilsTest::testOneAdd)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCompressUtilsTest::testManyAdds", &CCompressUtilsTest::testManyAdds)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCompressUtilsTest::testLengthOnly", &CCompressUtilsTest::testLengthOnly)); return suiteOfTests; } diff --git a/lib/core/unittest/CCompressedDictionaryTest.cc b/lib/core/unittest/CCompressedDictionaryTest.cc index fc2110dcc0..0770c1500e 100644 --- a/lib/core/unittest/CCompressedDictionaryTest.cc +++ b/lib/core/unittest/CCompressedDictionaryTest.cc @@ -47,7 +47,8 @@ void CCompressedDictionaryTest::testAll() { for (std::size_t j = 0u; j < words.size(); ++j) { CPPUNIT_ASSERT(uniqueWords.insert(dictionary.word(words[j])).second); CPPUNIT_ASSERT(uniqueWords.insert(dictionary.word(words[j], word2)).second); - CPPUNIT_ASSERT(uniqueWords.insert(dictionary.word(words[j], word2, word3)).second); + CPPUNIT_ASSERT( + uniqueWords.insert(dictionary.word(words[j], word2, word3)).second); } } } @@ -99,9 +100,9 @@ void CCompressedDictionaryTest::testPersist() { CppUnit::Test* CCompressedDictionaryTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CCompressedDictionaryTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CCompressedDictionaryTest::testAll", &CCompressedDictionaryTest::testAll)); - suiteOfTests->addTest(new CppUnit::TestCaller("CCompressedDictionaryTest::testPersist", - &CCompressedDictionaryTest::testPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCompressedDictionaryTest::testAll", &CCompressedDictionaryTest::testAll)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCompressedDictionaryTest::testPersist", &CCompressedDictionaryTest::testPersist)); return suiteOfTests; } diff --git a/lib/core/unittest/CConcurrentWrapperTest.cc b/lib/core/unittest/CConcurrentWrapperTest.cc index 32a88a2dcc..7b52b322a3 100644 --- a/lib/core/unittest/CConcurrentWrapperTest.cc +++ b/lib/core/unittest/CConcurrentWrapperTest.cc @@ -26,7 +26,8 @@ using namespace core; using TOStringStreamConcurrentWrapper = CConcurrentWrapper; // a low capacity wrapper with only 5 buckets for the queue, the 3 controls the wakeup of threads -using TOStringStreamLowCapacityConcurrentWrapper = CConcurrentWrapper; +using TOStringStreamLowCapacityConcurrentWrapper = + CConcurrentWrapper; void CConcurrentWrapperTest::testBasic() { @@ -43,7 +44,8 @@ void CConcurrentWrapperTest::testBasic() { o << " world 2\n"; }); } - CPPUNIT_ASSERT_EQUAL(std::string("Hello 1 world 1\nHello 2 world 2\n"), stringStream.str()); + CPPUNIT_ASSERT_EQUAL(std::string("Hello 1 world 1\nHello 2 world 2\n"), + stringStream.str()); } namespace { @@ -59,7 +61,9 @@ void task(CConcurrentWrapper& sink, int i, std::chrono::micr }); } -void taskLowCapacityQueue(TOStringStreamLowCapacityConcurrentWrapper& sink, int i, std::chrono::microseconds pause) { +void taskLowCapacityQueue(TOStringStreamLowCapacityConcurrentWrapper& sink, + int i, + std::chrono::microseconds pause) { sink([i, pause](std::ostream& o) { o << "ta"; std::this_thread::sleep_for(pause); @@ -79,7 +83,8 @@ void CConcurrentWrapperTest::testThreads() { boost::threadpool::pool tp(10); for (size_t i = 0; i < MESSAGES; ++i) { - tp.schedule(boost::bind(task, boost::ref(wrappedStringStream), i, std::chrono::microseconds(0))); + tp.schedule(boost::bind(task, boost::ref(wrappedStringStream), i, + std::chrono::microseconds(0))); } tp.wait(); @@ -105,7 +110,8 @@ void CConcurrentWrapperTest::testThreadsSlow() { boost::threadpool::pool tp(2); for (size_t i = 0; i < MESSAGES; ++i) { - tp.schedule(boost::bind(task, boost::ref(wrappedStringStream), i, std::chrono::microseconds(50))); + tp.schedule(boost::bind(task, boost::ref(wrappedStringStream), i, + std::chrono::microseconds(50))); } tp.wait(); @@ -132,7 +138,8 @@ void CConcurrentWrapperTest::testThreadsSlowLowCapacity() { boost::threadpool::pool tp(2); for (size_t i = 0; i < MESSAGES; ++i) { - tp.schedule(boost::bind(taskLowCapacityQueue, boost::ref(wrappedStringStream), i, std::chrono::microseconds(50))); + tp.schedule(boost::bind(taskLowCapacityQueue, boost::ref(wrappedStringStream), + i, std::chrono::microseconds(50))); } tp.wait(); @@ -159,7 +166,8 @@ void CConcurrentWrapperTest::testThreadsLowCapacity() { boost::threadpool::pool tp(8); for (size_t i = 0; i < MESSAGES; ++i) { - tp.schedule(boost::bind(taskLowCapacityQueue, boost::ref(wrappedStringStream), i, std::chrono::microseconds(0))); + tp.schedule(boost::bind(taskLowCapacityQueue, boost::ref(wrappedStringStream), + i, std::chrono::microseconds(0))); } tp.wait(); @@ -189,18 +197,20 @@ void CConcurrentWrapperTest::testMemoryDebug() { CppUnit::Test* CConcurrentWrapperTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CConcurrentWrapperTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CConcurrentWrapperTest::testBasic", &CConcurrentWrapperTest::testBasic)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CConcurrentWrapperTest::testThreads", &CConcurrentWrapperTest::testThreads)); - suiteOfTests->addTest(new CppUnit::TestCaller("CConcurrentWrapperTest::testThreadsSlow", - &CConcurrentWrapperTest::testThreadsSlow)); - suiteOfTests->addTest(new CppUnit::TestCaller("CConcurrentWrapperTest::testThreadsSlowLowCapacity", - &CConcurrentWrapperTest::testThreadsSlowLowCapacity)); - suiteOfTests->addTest(new CppUnit::TestCaller("CConcurrentWrapperTest::testThreadsLowCapacity", - &CConcurrentWrapperTest::testThreadsLowCapacity)); - suiteOfTests->addTest(new CppUnit::TestCaller("CConcurrentWrapperTest::testMemoryDebug", - &CConcurrentWrapperTest::testMemoryDebug)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CConcurrentWrapperTest::testBasic", &CConcurrentWrapperTest::testBasic)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CConcurrentWrapperTest::testThreads", &CConcurrentWrapperTest::testThreads)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CConcurrentWrapperTest::testThreadsSlow", &CConcurrentWrapperTest::testThreadsSlow)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CConcurrentWrapperTest::testThreadsSlowLowCapacity", + &CConcurrentWrapperTest::testThreadsSlowLowCapacity)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CConcurrentWrapperTest::testThreadsLowCapacity", + &CConcurrentWrapperTest::testThreadsLowCapacity)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CConcurrentWrapperTest::testMemoryDebug", &CConcurrentWrapperTest::testMemoryDebug)); return suiteOfTests; } diff --git a/lib/core/unittest/CContainerPrinterTest.cc b/lib/core/unittest/CContainerPrinterTest.cc index 5a5f26c68b..8f4bc96d59 100644 --- a/lib/core/unittest/CContainerPrinterTest.cc +++ b/lib/core/unittest/CContainerPrinterTest.cc @@ -35,7 +35,8 @@ void CContainerPrinterTest::testAll() { list.push_back(std::make_pair(2, 2)); list.push_back(std::make_pair(3, 2)); LOG_DEBUG(<< "list = " << CContainerPrinter::print(list)); - CPPUNIT_ASSERT_EQUAL(std::string("[(1, 2), (2, 2), (3, 2)]"), CContainerPrinter::print(list)); + CPPUNIT_ASSERT_EQUAL(std::string("[(1, 2), (2, 2), (3, 2)]"), + CContainerPrinter::print(list)); std::list> plist; plist.push_back(boost::shared_ptr()); @@ -51,15 +52,21 @@ void CContainerPrinterTest::testAll() { map.insert(std::make_pair(3.3, &fivePointOne)); map.insert(std::make_pair(1.0, static_cast(nullptr))); LOG_DEBUG(<< "map = " << CContainerPrinter::print(map)); - CPPUNIT_ASSERT_EQUAL(std::string("[(1, \"null\"), (1.1, 3), (3.3, 5.1)]"), CContainerPrinter::print(map)); + CPPUNIT_ASSERT_EQUAL(std::string("[(1, \"null\"), (1.1, 3), (3.3, 5.1)]"), + CContainerPrinter::print(map)); - std::auto_ptr pints[] = {std::auto_ptr(new int(2)), std::auto_ptr(new int(3)), std::auto_ptr(new int(2))}; - LOG_DEBUG(<< "pints = " << CContainerPrinter::print(boost::begin(pints), boost::end(pints))); - CPPUNIT_ASSERT_EQUAL(std::string("[2, 3, 2]"), CContainerPrinter::print(boost::begin(pints), boost::end(pints))); + std::auto_ptr pints[] = {std::auto_ptr(new int(2)), + std::auto_ptr(new int(3)), + std::auto_ptr(new int(2))}; + LOG_DEBUG(<< "pints = " + << CContainerPrinter::print(boost::begin(pints), boost::end(pints))); + CPPUNIT_ASSERT_EQUAL(std::string("[2, 3, 2]"), + CContainerPrinter::print(boost::begin(pints), boost::end(pints))); std::vector> ovec(2, boost::optional()); LOG_DEBUG(<< "ovec = " << CContainerPrinter::print(ovec)); - CPPUNIT_ASSERT_EQUAL(std::string("[\"null\", \"null\"]"), CContainerPrinter::print(ovec)); + CPPUNIT_ASSERT_EQUAL(std::string("[\"null\", \"null\"]"), + CContainerPrinter::print(ovec)); std::vector>, double>> aggregate; aggregate.push_back(std::make_pair(list, 1.3)); @@ -73,8 +80,8 @@ void CContainerPrinterTest::testAll() { CppUnit::Test* CContainerPrinterTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CContainerPrinterTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CContainerPrinterTest::testAll", &CContainerPrinterTest::testAll)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CContainerPrinterTest::testAll", &CContainerPrinterTest::testAll)); return suiteOfTests; } diff --git a/lib/core/unittest/CContainerThroughputTest.cc b/lib/core/unittest/CContainerThroughputTest.cc index 2388df571a..3c88173703 100644 --- a/lib/core/unittest/CContainerThroughputTest.cc +++ b/lib/core/unittest/CContainerThroughputTest.cc @@ -22,18 +22,18 @@ const size_t CContainerThroughputTest::TEST_SIZE(10000000); CppUnit::Test* CContainerThroughputTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CContainerThroughputTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CContainerThroughputTest::testVector", &CContainerThroughputTest::testVector)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CContainerThroughputTest::testList", &CContainerThroughputTest::testList)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CContainerThroughputTest::testDeque", &CContainerThroughputTest::testDeque)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CContainerThroughputTest::testMap", &CContainerThroughputTest::testMap)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CContainerThroughputTest::testCircBuf", &CContainerThroughputTest::testCircBuf)); - suiteOfTests->addTest(new CppUnit::TestCaller("CContainerThroughputTest::testMultiIndex", - &CContainerThroughputTest::testMultiIndex)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CContainerThroughputTest::testVector", &CContainerThroughputTest::testVector)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CContainerThroughputTest::testList", &CContainerThroughputTest::testList)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CContainerThroughputTest::testDeque", &CContainerThroughputTest::testDeque)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CContainerThroughputTest::testMap", &CContainerThroughputTest::testMap)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CContainerThroughputTest::testCircBuf", &CContainerThroughputTest::testCircBuf)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CContainerThroughputTest::testMultiIndex", &CContainerThroughputTest::testMultiIndex)); return suiteOfTests; } @@ -49,7 +49,8 @@ void CContainerThroughputTest::testVector() { testVec.reserve(FILL_SIZE); ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Starting vector throughput test at " << ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO(<< "Starting vector throughput test at " + << ml::core::CTimeUtils::toTimeString(start)); size_t count(0); while (count < FILL_SIZE) { @@ -64,12 +65,13 @@ void CContainerThroughputTest::testVector() { } ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Finished vector throughput test at " << ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO(<< "Finished vector throughput test at " + << ml::core::CTimeUtils::toTimeString(end)); CPPUNIT_ASSERT_EQUAL(FILL_SIZE, testVec.size()); - LOG_INFO(<< "Vector throughput test with fill size " << FILL_SIZE << " and test size " << TEST_SIZE << " took " << (end - start) - << " seconds"); + LOG_INFO(<< "Vector throughput test with fill size " << FILL_SIZE << " and test size " + << TEST_SIZE << " took " << (end - start) << " seconds"); } void CContainerThroughputTest::testList() { @@ -77,7 +79,8 @@ void CContainerThroughputTest::testList() { TContentList testList; ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Starting list throughput test at " << ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO(<< "Starting list throughput test at " + << ml::core::CTimeUtils::toTimeString(start)); size_t count(0); while (count < FILL_SIZE) { @@ -92,12 +95,13 @@ void CContainerThroughputTest::testList() { } ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Finished list throughput test at " << ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO(<< "Finished list throughput test at " + << ml::core::CTimeUtils::toTimeString(end)); CPPUNIT_ASSERT_EQUAL(FILL_SIZE, testList.size()); - LOG_INFO(<< "List throughput test with fill size " << FILL_SIZE << " and test size " << TEST_SIZE << " took " << (end - start) - << " seconds"); + LOG_INFO(<< "List throughput test with fill size " << FILL_SIZE << " and test size " + << TEST_SIZE << " took " << (end - start) << " seconds"); } void CContainerThroughputTest::testDeque() { @@ -105,7 +109,8 @@ void CContainerThroughputTest::testDeque() { TContentDeque testDeque; ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Starting deque throughput test at " << ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO(<< "Starting deque throughput test at " + << ml::core::CTimeUtils::toTimeString(start)); size_t count(0); while (count < FILL_SIZE) { @@ -120,12 +125,13 @@ void CContainerThroughputTest::testDeque() { } ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Finished deque throughput test at " << ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO(<< "Finished deque throughput test at " + << ml::core::CTimeUtils::toTimeString(end)); CPPUNIT_ASSERT_EQUAL(FILL_SIZE, testDeque.size()); - LOG_INFO(<< "Deque throughput test with fill size " << FILL_SIZE << " and test size " << TEST_SIZE << " took " << (end - start) - << " seconds"); + LOG_INFO(<< "Deque throughput test with fill size " << FILL_SIZE << " and test size " + << TEST_SIZE << " took " << (end - start) << " seconds"); } void CContainerThroughputTest::testMap() { @@ -133,7 +139,8 @@ void CContainerThroughputTest::testMap() { TSizeContentMap testMap; ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Starting map throughput test at " << ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO(<< "Starting map throughput test at " + << ml::core::CTimeUtils::toTimeString(start)); size_t count(0); while (count < FILL_SIZE) { @@ -148,12 +155,13 @@ void CContainerThroughputTest::testMap() { } ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Finished map throughput test at " << ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO(<< "Finished map throughput test at " + << ml::core::CTimeUtils::toTimeString(end)); CPPUNIT_ASSERT_EQUAL(FILL_SIZE, testMap.size()); - LOG_INFO(<< "Map throughput test with fill size " << FILL_SIZE << " and test size " << TEST_SIZE << " took " << (end - start) - << " seconds"); + LOG_INFO(<< "Map throughput test with fill size " << FILL_SIZE << " and test size " + << TEST_SIZE << " took " << (end - start) << " seconds"); } void CContainerThroughputTest::testCircBuf() { @@ -161,7 +169,8 @@ void CContainerThroughputTest::testCircBuf() { TContentCircBuf testCircBuf(FILL_SIZE); ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Starting circular buffer throughput test at " << ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO(<< "Starting circular buffer throughput test at " + << ml::core::CTimeUtils::toTimeString(start)); size_t count(0); while (count < FILL_SIZE) { @@ -176,22 +185,23 @@ void CContainerThroughputTest::testCircBuf() { } ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Finished circular buffer throughput test at " << ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO(<< "Finished circular buffer throughput test at " + << ml::core::CTimeUtils::toTimeString(end)); CPPUNIT_ASSERT_EQUAL(FILL_SIZE, testCircBuf.size()); - LOG_INFO(<< "Circular buffer throughput test with fill size " << FILL_SIZE << " and test size " << TEST_SIZE << " took " - << (end - start) << " seconds"); + LOG_INFO(<< "Circular buffer throughput test with fill size " << FILL_SIZE << " and test size " + << TEST_SIZE << " took " << (end - start) << " seconds"); } void CContainerThroughputTest::testMultiIndex() { using TContentMIndex = boost::multi_index::multi_index_container< - SContent, - boost::multi_index::indexed_by>>; + SContent, boost::multi_index::indexed_by>>; TContentMIndex testMultiIndex; ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Starting multi-index throughput test at " << ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO(<< "Starting multi-index throughput test at " + << ml::core::CTimeUtils::toTimeString(start)); size_t count(0); while (count < FILL_SIZE) { @@ -206,13 +216,15 @@ void CContainerThroughputTest::testMultiIndex() { } ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Finished multi-index throughput test at " << ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO(<< "Finished multi-index throughput test at " + << ml::core::CTimeUtils::toTimeString(end)); CPPUNIT_ASSERT_EQUAL(FILL_SIZE, testMultiIndex.size()); - LOG_INFO(<< "Multi-index throughput test with fill size " << FILL_SIZE << " and test size " << TEST_SIZE << " took " << (end - start) - << " seconds"); + LOG_INFO(<< "Multi-index throughput test with fill size " << FILL_SIZE << " and test size " + << TEST_SIZE << " took " << (end - start) << " seconds"); } -CContainerThroughputTest::SContent::SContent(size_t count) : s_Size(count), s_Ptr(this), s_Double(double(count)) { +CContainerThroughputTest::SContent::SContent(size_t count) + : s_Size(count), s_Ptr(this), s_Double(double(count)) { } diff --git a/lib/core/unittest/CDelimiterTest.cc b/lib/core/unittest/CDelimiterTest.cc index f937798f1e..598c4ed1ed 100644 --- a/lib/core/unittest/CDelimiterTest.cc +++ b/lib/core/unittest/CDelimiterTest.cc @@ -14,35 +14,39 @@ CppUnit::Test* CDelimiterTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CDelimiterTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CDelimiterTest::testSimpleTokenise", &CDelimiterTest::testSimpleTokenise)); - suiteOfTests->addTest(new CppUnit::TestCaller("CDelimiterTest::testRegexTokenise", &CDelimiterTest::testRegexTokenise)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CDelimiterTest::testQuotedTokenise", &CDelimiterTest::testQuotedTokenise)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CDelimiterTest::testQuotedEscapedTokenise", &CDelimiterTest::testQuotedEscapedTokenise)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CDelimiterTest::testInvalidQuotedTokenise", &CDelimiterTest::testInvalidQuotedTokenise)); - suiteOfTests->addTest(new CppUnit::TestCaller("CDelimiterTest::testQuoteEqualsEscapeTokenise", - &CDelimiterTest::testQuoteEqualsEscapeTokenise)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDelimiterTest::testSimpleTokenise", &CDelimiterTest::testSimpleTokenise)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDelimiterTest::testRegexTokenise", &CDelimiterTest::testRegexTokenise)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDelimiterTest::testQuotedTokenise", &CDelimiterTest::testQuotedTokenise)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDelimiterTest::testQuotedEscapedTokenise", &CDelimiterTest::testQuotedEscapedTokenise)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDelimiterTest::testInvalidQuotedTokenise", &CDelimiterTest::testInvalidQuotedTokenise)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDelimiterTest::testQuoteEqualsEscapeTokenise", + &CDelimiterTest::testQuoteEqualsEscapeTokenise)); return suiteOfTests; } void CDelimiterTest::testSimpleTokenise() { - std::string testData("Oct 12, 2008 8:38:51 AM org.apache.tomcat.util.http.Parameters processParameters\n" - "WARNING: Parameters: Invalid chunk ignored.\n" - "Oct 12, 2008 8:38:52 AM org.apache.tomcat.util.http.Parameters processParameters\n" - "WARNING: Parameters: Invalid chunk ignored.\n" - "Oct 12, 2008 8:38:53 AM org.apache.tomcat.util.http.Parameters processParameters\n" - "WARNING: Parameters: Invalid chunk ignored.\n" - "Oct 12, 2008 8:39:03 AM org.apache.tomcat.util.http.Parameters processParameters\n" - "WARNING: Parameters: Invalid chunk ignored.\n" - "Oct 12, 2008 8:39:04 AM org.apache.tomcat.util.http.Parameters processParameters\n" - "WARNING: Parameters: Invalid chunk ignored.\n"); + std::string testData( + "Oct 12, 2008 8:38:51 AM org.apache.tomcat.util.http.Parameters processParameters\n" + "WARNING: Parameters: Invalid chunk ignored.\n" + "Oct 12, 2008 8:38:52 AM org.apache.tomcat.util.http.Parameters processParameters\n" + "WARNING: Parameters: Invalid chunk ignored.\n" + "Oct 12, 2008 8:38:53 AM org.apache.tomcat.util.http.Parameters processParameters\n" + "WARNING: Parameters: Invalid chunk ignored.\n" + "Oct 12, 2008 8:39:03 AM org.apache.tomcat.util.http.Parameters processParameters\n" + "WARNING: Parameters: Invalid chunk ignored.\n" + "Oct 12, 2008 8:39:04 AM org.apache.tomcat.util.http.Parameters processParameters\n" + "WARNING: Parameters: Invalid chunk ignored.\n"); LOG_DEBUG(<< "Input data:\n" << testData << '\n'); - ml::core::CDelimiter delimiter("\n", "\\w+\\s+\\d+,\\s+\\d+\\s+\\d+:\\d+:\\d+\\s+\\w+", true); + ml::core::CDelimiter delimiter( + "\n", "\\w+\\s+\\d+,\\s+\\d+\\s+\\d+:\\d+:\\d+\\s+\\w+", true); ml::core::CStringUtils::TStrVec delimited; std::string remainder; @@ -51,7 +55,8 @@ void CDelimiterTest::testSimpleTokenise() { std::ostringstream strm1; std::copy(delimited.begin(), delimited.end(), TStrOStreamItr(strm1, "\n")); - LOG_DEBUG(<< "First output data:\nNumber of lines = " << delimited.size() << "\nLines are:\n" << strm1.str()); + LOG_DEBUG(<< "First output data:\nNumber of lines = " << delimited.size() << "\nLines are:\n" + << strm1.str()); LOG_DEBUG(<< "First remainder:\n" << remainder << '\n'); CPPUNIT_ASSERT_EQUAL(size_t(4), delimited.size()); @@ -63,7 +68,8 @@ void CDelimiterTest::testSimpleTokenise() { std::ostringstream strm2; std::copy(delimited.begin(), delimited.end(), TStrOStreamItr(strm2, "\n")); - LOG_DEBUG(<< "Second output data:\nNumber of lines = " << delimited.size() << "\nLines are:\n" << strm2.str()); + LOG_DEBUG(<< "Second output data:\nNumber of lines = " << delimited.size() << "\nLines are:\n" + << strm2.str()); LOG_DEBUG(<< "Second remainder:\n" << remainder << '\n'); CPPUNIT_ASSERT_EQUAL(size_t(5), delimited.size()); @@ -72,21 +78,23 @@ void CDelimiterTest::testSimpleTokenise() { void CDelimiterTest::testRegexTokenise() { // Some of the lines here are Windows text format, and others Unix text - std::string testData("Oct 12, 2008 8:38:51 AM org.apache.tomcat.util.http.Parameters processParameters\r\n" - "WARNING: Parameters: Invalid chunk ignored.\r\n" - "Oct 12, 2008 8:38:52 AM org.apache.tomcat.util.http.Parameters processParameters\r\n" - "WARNING: Parameters: Invalid chunk ignored.\n" - "Oct 12, 2008 8:38:53 AM org.apache.tomcat.util.http.Parameters processParameters\n" - "WARNING: Parameters: Invalid chunk ignored.\r\n" - "Oct 12, 2008 8:39:03 AM org.apache.tomcat.util.http.Parameters processParameters\r\n" - "WARNING: Parameters: Invalid chunk ignored.\n" - "Oct 12, 2008 8:39:04 AM org.apache.tomcat.util.http.Parameters processParameters\n" - "WARNING: Parameters: Invalid chunk ignored.\n"); + std::string testData( + "Oct 12, 2008 8:38:51 AM org.apache.tomcat.util.http.Parameters processParameters\r\n" + "WARNING: Parameters: Invalid chunk ignored.\r\n" + "Oct 12, 2008 8:38:52 AM org.apache.tomcat.util.http.Parameters processParameters\r\n" + "WARNING: Parameters: Invalid chunk ignored.\n" + "Oct 12, 2008 8:38:53 AM org.apache.tomcat.util.http.Parameters processParameters\n" + "WARNING: Parameters: Invalid chunk ignored.\r\n" + "Oct 12, 2008 8:39:03 AM org.apache.tomcat.util.http.Parameters processParameters\r\n" + "WARNING: Parameters: Invalid chunk ignored.\n" + "Oct 12, 2008 8:39:04 AM org.apache.tomcat.util.http.Parameters processParameters\n" + "WARNING: Parameters: Invalid chunk ignored.\n"); LOG_DEBUG(<< "Input data:\n" << testData << '\n'); // Regex matches line terminator for either Windows or Unix text - ml::core::CDelimiter delimiter("\r?\n", "\\w+\\s+\\d+,\\s+\\d+\\s+\\d+:\\d+:\\d+\\s+\\w+", true); + ml::core::CDelimiter delimiter( + "\r?\n", "\\w+\\s+\\d+,\\s+\\d+\\s+\\d+:\\d+:\\d+\\s+\\w+", true); ml::core::CStringUtils::TStrVec delimited; std::string remainder; @@ -95,7 +103,8 @@ void CDelimiterTest::testRegexTokenise() { std::ostringstream strm1; std::copy(delimited.begin(), delimited.end(), TStrOStreamItr(strm1, "\n")); - LOG_DEBUG(<< "First output data:\nNumber of lines = " << delimited.size() << "\nLines are:\n" << strm1.str()); + LOG_DEBUG(<< "First output data:\nNumber of lines = " << delimited.size() << "\nLines are:\n" + << strm1.str()); LOG_DEBUG(<< "First remainder:\n" << remainder << '\n'); CPPUNIT_ASSERT_EQUAL(size_t(4), delimited.size()); @@ -107,7 +116,8 @@ void CDelimiterTest::testRegexTokenise() { std::ostringstream strm2; std::copy(delimited.begin(), delimited.end(), TStrOStreamItr(strm2, "\n")); - LOG_DEBUG(<< "Second output data:\nNumber of lines = " << delimited.size() << "\nLines are:\n" << strm2.str()); + LOG_DEBUG(<< "Second output data:\nNumber of lines = " << delimited.size() << "\nLines are:\n" + << strm2.str()); LOG_DEBUG(<< "Second remainder:\n" << remainder << '\n'); CPPUNIT_ASSERT_EQUAL(size_t(5), delimited.size()); @@ -134,7 +144,8 @@ void CDelimiterTest::testQuotedTokenise() { std::ostringstream strm; std::copy(delimited.begin(), delimited.end(), TStrOStreamItr(strm, "\n")); - LOG_DEBUG(<< "Quoted output data:\nNumber of lines = " << delimited.size() << "\nLines are:\n" << strm.str()); + LOG_DEBUG(<< "Quoted output data:\nNumber of lines = " << delimited.size() << "\nLines are:\n" + << strm.str()); // 40 fields (most blank) CPPUNIT_ASSERT_EQUAL(size_t(40), delimited.size()); @@ -143,10 +154,11 @@ void CDelimiterTest::testQuotedTokenise() { void CDelimiterTest::testQuotedEscapedTokenise() { // Similar to previous test, but there are four values with escaped quotes in AFTER // pre-processing by the C++ compiler - std::string testData("3,1,5415.1132,56135135,0x00000001,0x00000002,\"SOME_STRING\",\"\",\"\\\"\",\"\",\"\",\"\",\"\",\"\",\"A " - "\\\"middling\\\" " - "one\",\"\",\"\",\"\",\"\",0x0000000000000000,0x0000000000000000,\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\\\"start\"," - "\"\",\"\",\"end\\\"\",\"\",\"\",\"\",\"\",\"\",\"\\\"both\\\"\",\"\",\"\""); + std::string testData( + "3,1,5415.1132,56135135,0x00000001,0x00000002,\"SOME_STRING\",\"\",\"\\\"\",\"\",\"\",\"\",\"\",\"\",\"A " + "\\\"middling\\\" " + "one\",\"\",\"\",\"\",\"\",0x0000000000000000,0x0000000000000000,\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\\\"start\"," + "\"\",\"\",\"end\\\"\",\"\",\"\",\"\",\"\",\"\",\"\\\"both\\\"\",\"\",\"\""); LOG_DEBUG(<< "Input data:\n" << testData << '\n'); @@ -162,7 +174,8 @@ void CDelimiterTest::testQuotedEscapedTokenise() { std::ostringstream strm; std::copy(delimited.begin(), delimited.end(), TStrOStreamItr(strm, "\n")); - LOG_DEBUG(<< "Quoted output data:\nNumber of lines = " << delimited.size() << "\nLines are:\n" << strm.str()); + LOG_DEBUG(<< "Quoted output data:\nNumber of lines = " << delimited.size() << "\nLines are:\n" + << strm.str()); // 40 fields (most blank) CPPUNIT_ASSERT_EQUAL(size_t(40), delimited.size()); @@ -184,7 +197,8 @@ void CDelimiterTest::testInvalidQuotedTokenise() { delimiter.tokenise(testData, false, delimited, remainder); CPPUNIT_ASSERT_EQUAL(size_t(3), delimited.size()); - CPPUNIT_ASSERT_EQUAL(std::string("/some_action.do?param1=foo¶m2=Sljahfej+kfejhafef/3931nfV"), remainder); + CPPUNIT_ASSERT_EQUAL(std::string("/some_action.do?param1=foo¶m2=Sljahfej+kfejhafef/3931nfV"), + remainder); } void CDelimiterTest::testQuoteEqualsEscapeTokenise() { @@ -212,7 +226,8 @@ void CDelimiterTest::testQuoteEqualsEscapeTokenise() { std::ostringstream strm; std::copy(delimited.begin(), delimited.end(), TStrOStreamItr(strm, "\n")); - LOG_DEBUG(<< "Quoted output data:\nNumber of lines = " << delimited.size() << "\nLines are:\n" << strm.str()); + LOG_DEBUG(<< "Quoted output data:\nNumber of lines = " << delimited.size() << "\nLines are:\n" + << strm.str()); // 42 fields - in particular, the JSON data at index 31 in the vector should // still contain commas and double quotes diff --git a/lib/core/unittest/CDetachedProcessSpawnerTest.cc b/lib/core/unittest/CDetachedProcessSpawnerTest.cc index 2430f94494..c243d2bdf1 100644 --- a/lib/core/unittest/CDetachedProcessSpawnerTest.cc +++ b/lib/core/unittest/CDetachedProcessSpawnerTest.cc @@ -24,7 +24,8 @@ const std::string INPUT_FILE("testfiles\\withNs.xml"); // File size is different on Windows due to CRLF line endings const size_t EXPECTED_FILE_SIZE(585); const char* winDir(::getenv("windir")); -const std::string PROCESS_PATH1(winDir != 0 ? std::string(winDir) + "\\System32\\cmd" : std::string("C:\\Windows\\System32\\cmd")); +const std::string PROCESS_PATH1(winDir != 0 ? std::string(winDir) + "\\System32\\cmd" + : std::string("C:\\Windows\\System32\\cmd")); const std::string PROCESS_ARGS1[] = {"/C", "copy " + INPUT_FILE + " ."}; const std::string& PROCESS_PATH2 = PROCESS_PATH1; const std::string PROCESS_ARGS2[] = {"/C", "ping 127.0.0.1 -n 11"}; @@ -32,10 +33,9 @@ const std::string PROCESS_ARGS2[] = {"/C", "ping 127.0.0.1 -n 11"}; const std::string INPUT_FILE("testfiles/withNs.xml"); const size_t EXPECTED_FILE_SIZE(563); const std::string PROCESS_PATH1("/bin/dd"); -const std::string PROCESS_ARGS1[] = {"if=" + INPUT_FILE, - "of=" + OUTPUT_FILE, - "bs=1", - "count=" + ml::core::CStringUtils::typeToString(EXPECTED_FILE_SIZE)}; +const std::string PROCESS_ARGS1[] = { + "if=" + INPUT_FILE, "of=" + OUTPUT_FILE, "bs=1", + "count=" + ml::core::CStringUtils::typeToString(EXPECTED_FILE_SIZE)}; const std::string PROCESS_PATH2("/bin/sleep"); const std::string PROCESS_ARGS2[] = {"10"}; #endif @@ -44,14 +44,16 @@ const std::string PROCESS_ARGS2[] = {"10"}; CppUnit::Test* CDetachedProcessSpawnerTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CDetachedProcessSpawnerTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CDetachedProcessSpawnerTest::testSpawn", - &CDetachedProcessSpawnerTest::testSpawn)); - suiteOfTests->addTest(new CppUnit::TestCaller("CDetachedProcessSpawnerTest::testKill", - &CDetachedProcessSpawnerTest::testKill)); - suiteOfTests->addTest(new CppUnit::TestCaller("CDetachedProcessSpawnerTest::testPermitted", - &CDetachedProcessSpawnerTest::testPermitted)); - suiteOfTests->addTest(new CppUnit::TestCaller("CDetachedProcessSpawnerTest::testNonExistent", - &CDetachedProcessSpawnerTest::testNonExistent)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDetachedProcessSpawnerTest::testSpawn", &CDetachedProcessSpawnerTest::testSpawn)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDetachedProcessSpawnerTest::testKill", &CDetachedProcessSpawnerTest::testKill)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDetachedProcessSpawnerTest::testPermitted", + &CDetachedProcessSpawnerTest::testPermitted)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDetachedProcessSpawnerTest::testNonExistent", + &CDetachedProcessSpawnerTest::testNonExistent)); return suiteOfTests; } @@ -67,7 +69,8 @@ void CDetachedProcessSpawnerTest::testSpawn() { ml::core::CDetachedProcessSpawner::TStrVec permittedPaths(1, PROCESS_PATH1); ml::core::CDetachedProcessSpawner spawner(permittedPaths); - ml::core::CDetachedProcessSpawner::TStrVec args(PROCESS_ARGS1, PROCESS_ARGS1 + boost::size(PROCESS_ARGS1)); + ml::core::CDetachedProcessSpawner::TStrVec args( + PROCESS_ARGS1, PROCESS_ARGS1 + boost::size(PROCESS_ARGS1)); CPPUNIT_ASSERT(spawner.spawn(PROCESS_PATH1, args)); @@ -89,7 +92,8 @@ void CDetachedProcessSpawnerTest::testKill() { ml::core::CDetachedProcessSpawner::TStrVec permittedPaths(1, PROCESS_PATH2); ml::core::CDetachedProcessSpawner spawner(permittedPaths); - ml::core::CDetachedProcessSpawner::TStrVec args(PROCESS_ARGS2, PROCESS_ARGS2 + boost::size(PROCESS_ARGS2)); + ml::core::CDetachedProcessSpawner::TStrVec args( + PROCESS_ARGS2, PROCESS_ARGS2 + boost::size(PROCESS_ARGS2)); ml::core::CProcess::TPid childPid = 0; CPPUNIT_ASSERT(spawner.spawn(PROCESS_PATH2, args, childPid)); @@ -124,5 +128,6 @@ void CDetachedProcessSpawnerTest::testNonExistent() { ml::core::CDetachedProcessSpawner spawner(permittedPaths); // Should fail as even though it's a permitted process as the file doesn't exist - CPPUNIT_ASSERT(!spawner.spawn("./does_not_exist", ml::core::CDetachedProcessSpawner::TStrVec())); + CPPUNIT_ASSERT(!spawner.spawn("./does_not_exist", + ml::core::CDetachedProcessSpawner::TStrVec())); } diff --git a/lib/core/unittest/CDualThreadStreamBufTest.cc b/lib/core/unittest/CDualThreadStreamBufTest.cc index bf687b3cb6..e210188579 100644 --- a/lib/core/unittest/CDualThreadStreamBufTest.cc +++ b/lib/core/unittest/CDualThreadStreamBufTest.cc @@ -22,14 +22,14 @@ CppUnit::Test* CDualThreadStreamBufTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CDualThreadStreamBufTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CDualThreadStreamBufTest::testThroughput", - &CDualThreadStreamBufTest::testThroughput)); - suiteOfTests->addTest(new CppUnit::TestCaller("CDualThreadStreamBufTest::testSlowConsumer", - &CDualThreadStreamBufTest::testSlowConsumer)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CDualThreadStreamBufTest::testPutback", &CDualThreadStreamBufTest::testPutback)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CDualThreadStreamBufTest::testFatal", &CDualThreadStreamBufTest::testFatal)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDualThreadStreamBufTest::testThroughput", &CDualThreadStreamBufTest::testThroughput)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDualThreadStreamBufTest::testSlowConsumer", &CDualThreadStreamBufTest::testSlowConsumer)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDualThreadStreamBufTest::testPutback", &CDualThreadStreamBufTest::testPutback)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDualThreadStreamBufTest::testFatal", &CDualThreadStreamBufTest::testFatal)); return suiteOfTests; } @@ -39,7 +39,8 @@ namespace { class CInputThread : public ml::core::CThread { public: CInputThread(ml::core::CDualThreadStreamBuf& buffer, uint32_t delay = 0, size_t fatalAfter = 0) - : m_Buffer(buffer), m_Delay(delay), m_FatalAfter(fatalAfter), m_TotalData(0) {} + : m_Buffer(buffer), m_Delay(delay), m_FatalAfter(fatalAfter), + m_TotalData(0) {} size_t totalData() const { return m_TotalData; } @@ -69,19 +70,20 @@ class CInputThread : public ml::core::CThread { size_t m_TotalData; }; -const char* DATA("According to the most recent Wikipedia definition \"Predictive " - "analytics encompasses a variety of statistical techniques from " - "modeling, machine learning, data mining and game theory that ... " - "exploit patterns found in historical and transactional data to " - "identify risks and opportunities.\"\n" - "In applications such as credit scoring, predictive analytics " - "identifies patterns and relationships in huge volumes of data, hidden " - "to human analysis, that presages an undesirable outcome. Many " - "vendors refer to their ability to project a ramp in a single metric, " - "say CPU utilization, as predictive analytics. As most users know, " - "these capabilities are of limited value in that single metrics are " - "rarely the cause of cataclysmic failures. Rather it is the impact of " - "change between components that causes failure in complex IT systems.\n"); +const char* + DATA("According to the most recent Wikipedia definition \"Predictive " + "analytics encompasses a variety of statistical techniques from " + "modeling, machine learning, data mining and game theory that ... " + "exploit patterns found in historical and transactional data to " + "identify risks and opportunities.\"\n" + "In applications such as credit scoring, predictive analytics " + "identifies patterns and relationships in huge volumes of data, hidden " + "to human analysis, that presages an undesirable outcome. Many " + "vendors refer to their ability to project a ramp in a single metric, " + "say CPU utilization, as predictive analytics. As most users know, " + "these capabilities are of limited value in that single metrics are " + "rarely the cause of cataclysmic failures. Rather it is the impact of " + "change between components that causes failure in complex IT systems.\n"); } void CDualThreadStreamBufTest::testThroughput() { @@ -94,7 +96,8 @@ void CDualThreadStreamBufTest::testThroughput() { inputThread.start(); ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Starting REST buffer throughput test at " << ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO(<< "Starting REST buffer throughput test at " + << ml::core::CTimeUtils::toTimeString(start)); for (size_t count = 0; count < TEST_SIZE; ++count) { std::streamsize toWrite(static_cast(dataSize)); @@ -107,19 +110,21 @@ void CDualThreadStreamBufTest::testThroughput() { } } - CPPUNIT_ASSERT_EQUAL(static_cast(totalDataSize), buf.pubseekoff(0, std::ios_base::cur, std::ios_base::out)); + CPPUNIT_ASSERT_EQUAL(static_cast(totalDataSize), + buf.pubseekoff(0, std::ios_base::cur, std::ios_base::out)); buf.signalEndOfFile(); inputThread.waitForFinish(); ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Finished REST buffer throughput test at " << ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO(<< "Finished REST buffer throughput test at " + << ml::core::CTimeUtils::toTimeString(end)); CPPUNIT_ASSERT_EQUAL(totalDataSize, inputThread.totalData()); - LOG_INFO(<< "REST buffer throughput test with test size " << TEST_SIZE << " (total data transferred " << totalDataSize - << " bytes) took " << (end - start) << " seconds"); + LOG_INFO(<< "REST buffer throughput test with test size " << TEST_SIZE << " (total data transferred " + << totalDataSize << " bytes) took " << (end - start) << " seconds"); } void CDualThreadStreamBufTest::testSlowConsumer() { @@ -134,7 +139,8 @@ void CDualThreadStreamBufTest::testSlowConsumer() { inputThread.start(); ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Starting REST buffer slow consumer test at " << ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO(<< "Starting REST buffer slow consumer test at " + << ml::core::CTimeUtils::toTimeString(start)); for (size_t count = 0; count < TEST_SIZE; ++count) { std::streamsize toWrite(static_cast(dataSize)); @@ -152,15 +158,18 @@ void CDualThreadStreamBufTest::testSlowConsumer() { inputThread.waitForFinish(); ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Finished REST buffer slow consumer test at " << ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO(<< "Finished REST buffer slow consumer test at " + << ml::core::CTimeUtils::toTimeString(end)); CPPUNIT_ASSERT_EQUAL(totalDataSize, inputThread.totalData()); ml::core_t::TTime duration(end - start); - LOG_INFO(<< "REST buffer slow consumer test with test size " << TEST_SIZE << ", " << numNewLines << " newlines per message and delay " + LOG_INFO(<< "REST buffer slow consumer test with test size " << TEST_SIZE + << ", " << numNewLines << " newlines per message and delay " << DELAY << "ms took " << duration << " seconds"); - ml::core_t::TTime delaySecs(static_cast((DELAY * numNewLines * TEST_SIZE) / 1000)); + ml::core_t::TTime delaySecs( + static_cast((DELAY * numNewLines * TEST_SIZE) / 1000)); CPPUNIT_ASSERT(duration >= delaySecs); static const ml::core_t::TTime TOLERANCE(3); CPPUNIT_ASSERT(duration <= delaySecs + TOLERANCE); @@ -239,7 +248,8 @@ void CDualThreadStreamBufTest::testFatal() { inputThread.waitForFinish(); - LOG_DEBUG(<< "Total data written in fatal error test of size " << TEST_SIZE << " is " << totalDataWritten << " bytes"); + LOG_DEBUG(<< "Total data written in fatal error test of size " << TEST_SIZE + << " is " << totalDataWritten << " bytes"); // The fatal error should have stopped the writer thread from writing all the data CPPUNIT_ASSERT(totalDataWritten >= BUFFER_CAPACITY); diff --git a/lib/core/unittest/CFileDeleterTest.cc b/lib/core/unittest/CFileDeleterTest.cc index 8f317cd1fa..243d96e735 100644 --- a/lib/core/unittest/CFileDeleterTest.cc +++ b/lib/core/unittest/CFileDeleterTest.cc @@ -16,7 +16,8 @@ CppUnit::Test* CFileDeleterTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CFileDeleterTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CFileDeleterTest::testDelete", &CFileDeleterTest::testDelete)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CFileDeleterTest::testDelete", &CFileDeleterTest::testDelete)); return suiteOfTests; } @@ -32,10 +33,12 @@ void CFileDeleterTest::testDelete() { testFile << "to be deleted" << std::endl; } // The file should exist by the time the stream is closed here - CPPUNIT_ASSERT_EQUAL(0, ml::core::COsFileFuncs::access(fileName.c_str(), ml::core::COsFileFuncs::EXISTS)); + CPPUNIT_ASSERT_EQUAL(0, ml::core::COsFileFuncs::access( + fileName.c_str(), ml::core::COsFileFuncs::EXISTS)); } // The file should be deleted here - CPPUNIT_ASSERT_EQUAL(-1, ml::core::COsFileFuncs::access(fileName.c_str(), ml::core::COsFileFuncs::EXISTS)); + CPPUNIT_ASSERT_EQUAL(-1, ml::core::COsFileFuncs::access( + fileName.c_str(), ml::core::COsFileFuncs::EXISTS)); CPPUNIT_ASSERT_EQUAL(ENOENT, errno); } diff --git a/lib/core/unittest/CFlatPrefixTreeTest.cc b/lib/core/unittest/CFlatPrefixTreeTest.cc index 72d8f5d317..45c254c826 100644 --- a/lib/core/unittest/CFlatPrefixTreeTest.cc +++ b/lib/core/unittest/CFlatPrefixTreeTest.cc @@ -24,24 +24,26 @@ using namespace core; CppUnit::Test* CFlatPrefixTreeTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CFlatPrefixTreeTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CFlatPrefixTreeTest::testBuildGivenUnsortedInput", - &CFlatPrefixTreeTest::testBuildGivenUnsortedInput)); - suiteOfTests->addTest(new CppUnit::TestCaller("CFlatPrefixTreeTest::testBuildGivenSortedInputWithDuplicates", - &CFlatPrefixTreeTest::testBuildGivenSortedInputWithDuplicates)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CFlatPrefixTreeTest::testEmptyString", &CFlatPrefixTreeTest::testEmptyString)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CFlatPrefixTreeTest::testSimple", &CFlatPrefixTreeTest::testSimple)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CFlatPrefixTreeTest::testLeafAndBranch", &CFlatPrefixTreeTest::testLeafAndBranch)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CFlatPrefixTreeTest::testMatchesStartGivenStringThatMatchesMoreThanAGivenPrefix", - &CFlatPrefixTreeTest::testMatchesStartGivenStringThatMatchesMoreThanAGivenPrefix)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CFlatPrefixTreeTest::testMatchesFullyGivenStringThatIsSubstringOfPrefix", - &CFlatPrefixTreeTest::testMatchesFullyGivenStringThatIsSubstringOfPrefix)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CFlatPrefixTreeTest::testRandom", &CFlatPrefixTreeTest::testRandom)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CFlatPrefixTreeTest::testBuildGivenUnsortedInput", + &CFlatPrefixTreeTest::testBuildGivenUnsortedInput)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CFlatPrefixTreeTest::testBuildGivenSortedInputWithDuplicates", + &CFlatPrefixTreeTest::testBuildGivenSortedInputWithDuplicates)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CFlatPrefixTreeTest::testEmptyString", &CFlatPrefixTreeTest::testEmptyString)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CFlatPrefixTreeTest::testSimple", &CFlatPrefixTreeTest::testSimple)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CFlatPrefixTreeTest::testLeafAndBranch", &CFlatPrefixTreeTest::testLeafAndBranch)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CFlatPrefixTreeTest::testMatchesStartGivenStringThatMatchesMoreThanAGivenPrefix", + &CFlatPrefixTreeTest::testMatchesStartGivenStringThatMatchesMoreThanAGivenPrefix)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CFlatPrefixTreeTest::testMatchesFullyGivenStringThatIsSubstringOfPrefix", + &CFlatPrefixTreeTest::testMatchesFullyGivenStringThatIsSubstringOfPrefix)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CFlatPrefixTreeTest::testRandom", &CFlatPrefixTreeTest::testRandom)); return suiteOfTests; } @@ -179,7 +181,8 @@ void CFlatPrefixTreeTest::testRandom() { lookups.insert(lookups.end(), prefixes.begin(), prefixes.end()); for (std::size_t i = 0; i < lookups.size(); ++i) { - CPPUNIT_ASSERT(prefixTree.matchesFully(lookups[i]) == set.count(lookups[i]) > 0); + CPPUNIT_ASSERT(prefixTree.matchesFully(lookups[i]) == + set.count(lookups[i]) > 0); } } @@ -188,7 +191,8 @@ void CFlatPrefixTreeTest::testRandom() { CFlatPrefixTree::TStrVec suffixes; rng.generateWords(10, 1000, suffixes); for (std::size_t i = 0; i < 100000; i++) { - std::string key = prefixes[uniformGen(prefixes.size())] + suffixes[uniformGen(suffixes.size())]; + std::string key = prefixes[uniformGen(prefixes.size())] + + suffixes[uniformGen(suffixes.size())]; CPPUNIT_ASSERT(prefixTree.matchesStart(key)); } } diff --git a/lib/core/unittest/CFunctionalTest.cc b/lib/core/unittest/CFunctionalTest.cc index 4b590d1c4e..798160a492 100644 --- a/lib/core/unittest/CFunctionalTest.cc +++ b/lib/core/unittest/CFunctionalTest.cc @@ -55,7 +55,8 @@ void CFunctionalTest::testDereference() { const double* values[] = {&one, &two, &three}; for (std::size_t i = 0u; i < boost::size(values); ++i) { for (std::size_t j = 0u; j < boost::size(values); ++j) { - CPPUNIT_ASSERT_EQUAL(less(*values[i], *values[j]), derefLess(values[i], values[j])); + CPPUNIT_ASSERT_EQUAL(less(*values[i], *values[j]), + derefLess(values[i], values[j])); } } } @@ -63,8 +64,10 @@ void CFunctionalTest::testDereference() { CppUnit::Test* CFunctionalTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CFunctionalTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CFunctionalTest::testIsNull", &CFunctionalTest::testIsNull)); - suiteOfTests->addTest(new CppUnit::TestCaller("CFunctionalTest::testDereference", &CFunctionalTest::testDereference)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CFunctionalTest::testIsNull", &CFunctionalTest::testIsNull)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CFunctionalTest::testDereference", &CFunctionalTest::testDereference)); return suiteOfTests; } diff --git a/lib/core/unittest/CHashingTest.cc b/lib/core/unittest/CHashingTest.cc index cfaea39172..985ebaa97e 100644 --- a/lib/core/unittest/CHashingTest.cc +++ b/lib/core/unittest/CHashingTest.cc @@ -73,9 +73,11 @@ void CHashingTest::testUniversalHash() { collisionsRandom += static_cast(collisions); hashedRandom += static_cast(n * (n - 1)) / 2.0; - double pc = 2.0 * static_cast(collisions) / static_cast(n * (n - 1)); + double pc = 2.0 * static_cast(collisions) / + static_cast(n * (n - 1)); - LOG_DEBUG(<< "collisions = " << collisions << ", P(collision) = " << pc << ", 1/m = " << (1.0 / static_cast(m[i]))); + LOG_DEBUG(<< "collisions = " << collisions << ", P(collision) = " << pc + << ", 1/m = " << (1.0 / static_cast(m[i]))); // Note that the definition of universality doesn't require // the P(collision) <= 1/m for every hash function. @@ -104,7 +106,8 @@ void CHashingTest::testUniversalHash() { boost::random::uniform_int_distribution uniform(0u, 10000000u); TUInt32Vec samples; - std::generate_n(std::back_inserter(samples), 1000u, boost::bind(uniform, boost::ref(generator))); + std::generate_n(std::back_inserter(samples), 1000u, + boost::bind(uniform, boost::ref(generator))); CHashing::CUniversalHash::TUInt32HashVec hashes; CHashing::CUniversalHash::generateHashes(100u, 10000u, hashes); @@ -122,7 +125,8 @@ void CHashingTest::testUniversalHash() { for (std::size_t i = 0u; i < samples.size(); ++i) { for (std::size_t j = i + 1u; j < samples.size(); ++j) { - if (samples[i] != samples[j] && uniquePairs.insert(TUInt32Pr(samples[i], samples[j])).second) { + if (samples[i] != samples[j] && + uniquePairs.insert(TUInt32Pr(samples[i], samples[j])).second) { uint32_t hx = hash(samples[i]); uint32_t hy = hash(samples[j]); if (hx == hy) { @@ -135,9 +139,11 @@ void CHashingTest::testUniversalHash() { collisionsRandom += static_cast(collisions); hashedRandom += static_cast(uniquePairs.size()); - double pc = static_cast(collisions) / static_cast(uniquePairs.size()); + double pc = static_cast(collisions) / + static_cast(uniquePairs.size()); - LOG_DEBUG(<< "collisions = " << collisions << ", P(collision) = " << pc << ", 1/m = " << (1.0 / 10000.0)); + LOG_DEBUG(<< "collisions = " << collisions << ", P(collision) = " << pc + << ", 1/m = " << (1.0 / 10000.0)); // Note that the definition of universality doesn't require // the P(collision) <= 1/m for every hash function. @@ -179,8 +185,10 @@ void CHashingTest::testUniversalHash() { double error = 0.0; - for (TUint32PrUIntMapCItr i = uniqueHashedPairs.begin(); i != uniqueHashedPairs.end(); ++i) { - double p = 2.0 * static_cast(i->second) / 2000.0 / 1999.0 / static_cast(hashes.size()); + for (TUint32PrUIntMapCItr i = uniqueHashedPairs.begin(); + i != uniqueHashedPairs.end(); ++i) { + double p = 2.0 * static_cast(i->second) / 2000.0 / 1999.0 / + static_cast(hashes.size()); if (p > 1.0 / 10000.0) { LOG_DEBUG(<< core::CContainerPrinter::print(*i) << ", p = " << p); @@ -198,27 +206,30 @@ void CHashingTest::testMurmurHash() { { std::string key("This is the voice of the Mysterons!"); uint32_t seed = 0xdead4321; - uint32_t result = CHashing::murmurHash32(key.c_str(), static_cast(key.size()), seed); + uint32_t result = + CHashing::murmurHash32(key.c_str(), static_cast(key.size()), seed); CPPUNIT_ASSERT_EQUAL(uint32_t(0xEE593473), result); } { std::string key("We know that you can hear us, Earthmen!"); uint32_t seed = 0xffeeeeff; - uint32_t result = CHashing::safeMurmurHash32(key.c_str(), static_cast(key.size()), seed); + uint32_t result = CHashing::safeMurmurHash32( + key.c_str(), static_cast(key.size()), seed); CPPUNIT_ASSERT_EQUAL(uint32_t(0x54837c96), result); } { - std::string key( - "Your message has been analysed and it has been decided to allow one member of Spectrum to meet our representative."); + std::string key("Your message has been analysed and it has been decided to allow one member of Spectrum to meet our representative."); uint64_t seed = 0xaabbccddffeeeeffULL; - uint64_t result = CHashing::murmurHash64(key.c_str(), static_cast(key.size()), seed); + uint64_t result = + CHashing::murmurHash64(key.c_str(), static_cast(key.size()), seed); CPPUNIT_ASSERT_EQUAL(uint64_t(14826751455157300659ull), result); } { std::string key("Earthmen, we are peaceful beings and you have tried to destroy us, but you cannot succeed. You and your people " "will pay for this act of aggression."); uint64_t seed = 0x1324fedc9876abdeULL; - uint64_t result = CHashing::safeMurmurHash64(key.c_str(), static_cast(key.size()), seed); + uint64_t result = CHashing::safeMurmurHash64( + key.c_str(), static_cast(key.size()), seed); CPPUNIT_ASSERT_EQUAL(uint64_t(7291323361835448266ull), result); } @@ -282,14 +293,16 @@ void CHashingTest::testMurmurHash() { LOG_DEBUG(<< "Finished throughput of boost::unordered_set with murmur hash"); } - LOG_DEBUG(<< "default insert runtime = " << defaultInsertTime << "ms, murmur insert runtime = " << murmurInsertTime << "ms"); - LOG_DEBUG(<< "default lookup runtime = " << defaultLookupTime << "ms, murmur lookup runtime = " << murmurLookupTime << "ms"); + LOG_DEBUG(<< "default insert runtime = " << defaultInsertTime + << "ms, murmur insert runtime = " << murmurInsertTime << "ms"); + LOG_DEBUG(<< "default lookup runtime = " << defaultLookupTime + << "ms, murmur lookup runtime = " << murmurLookupTime << "ms"); // The benefits of the murmur hash are mainly at lookup time, so just assert // on that, but still log a warning for slower insert time if (murmurInsertTime > defaultInsertTime) { - LOG_WARN(<< "murmur insert runtime (" << murmurInsertTime << "ms) was longer than default insert runtime (" << defaultInsertTime - << "ms)"); + LOG_WARN(<< "murmur insert runtime (" << murmurInsertTime << "ms) was longer than default insert runtime (" + << defaultInsertTime << "ms)"); } // Most of the times the murmur lookup time will be faster. But it is not @@ -346,14 +359,17 @@ void CHashingTest::testHashCombine() { for (std::size_t j = 0u; j < numberStrings; j += 2) { uniqueHashes.insert(hasher(testStrings[j] + testStrings[j + 1])); - uniqueHashCombines.insert(core::CHashing::hashCombine(static_cast(hasher(testStrings[j])), - static_cast(hasher(testStrings[j + 1])))); + uniqueHashCombines.insert(core::CHashing::hashCombine( + static_cast(hasher(testStrings[j])), + static_cast(hasher(testStrings[j + 1])))); } LOG_DEBUG(<< "# unique hashes = " << uniqueHashes.size()); LOG_DEBUG(<< "# unique combined hashes = " << uniqueHashCombines.size()); - CPPUNIT_ASSERT(uniqueHashCombines.size() > static_cast(0.999 * static_cast(uniqueHashes.size()))); + CPPUNIT_ASSERT(uniqueHashCombines.size() > + static_cast( + 0.999 * static_cast(uniqueHashes.size()))); } } @@ -381,7 +397,8 @@ void CHashingTest::testConstructors() { a.push_back(20); a.push_back(30); CHashing::CUniversalHash::CUInt32VecHash hash(5, a, 6); - CPPUNIT_ASSERT_EQUAL(CContainerPrinter::print(a), CContainerPrinter::print(hash.a())); + CPPUNIT_ASSERT_EQUAL(CContainerPrinter::print(a), + CContainerPrinter::print(hash.a())); CPPUNIT_ASSERT_EQUAL(uint32_t(5), hash.m()); CPPUNIT_ASSERT_EQUAL(uint32_t(6), hash.b()); LOG_DEBUG(<< hash.print()); @@ -428,9 +445,13 @@ void CHashingTest::testConstructors() { CppUnit::Test* CHashingTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CHashingTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CHashingTest::testUniversalHash", &CHashingTest::testUniversalHash)); - suiteOfTests->addTest(new CppUnit::TestCaller("CHashingTest::testMurmurHash", &CHashingTest::testMurmurHash)); - suiteOfTests->addTest(new CppUnit::TestCaller("CHashingTest::testHashCombine", &CHashingTest::testHashCombine)); - suiteOfTests->addTest(new CppUnit::TestCaller("CHashingTest::testConstructors", &CHashingTest::testConstructors)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CHashingTest::testUniversalHash", &CHashingTest::testUniversalHash)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CHashingTest::testMurmurHash", &CHashingTest::testMurmurHash)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CHashingTest::testHashCombine", &CHashingTest::testHashCombine)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CHashingTest::testConstructors", &CHashingTest::testConstructors)); return suiteOfTests; } diff --git a/lib/core/unittest/CHexUtilsTest.cc b/lib/core/unittest/CHexUtilsTest.cc index 2988c46bef..1da49f368b 100644 --- a/lib/core/unittest/CHexUtilsTest.cc +++ b/lib/core/unittest/CHexUtilsTest.cc @@ -16,7 +16,8 @@ CppUnit::Test* CHexUtilsTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CHexUtilsTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CHexUtilsTest::testHexOutput", &CHexUtilsTest::testHexOutput)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CHexUtilsTest::testHexOutput", &CHexUtilsTest::testHexOutput)); return suiteOfTests; } diff --git a/lib/core/unittest/CIEEE754Test.cc b/lib/core/unittest/CIEEE754Test.cc index e961f2287d..b664b836ce 100644 --- a/lib/core/unittest/CIEEE754Test.cc +++ b/lib/core/unittest/CIEEE754Test.cc @@ -82,7 +82,8 @@ void CIEEE754Test::testRound() { CppUnit::Test* CIEEE754Test::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CIEEE754Test"); - suiteOfTests->addTest(new CppUnit::TestCaller("CIEEE754Test::testRound", &CIEEE754Test::testRound)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CIEEE754Test::testRound", &CIEEE754Test::testRound)); return suiteOfTests; } diff --git a/lib/core/unittest/CJsonLogLayoutTest.cc b/lib/core/unittest/CJsonLogLayoutTest.cc index cb523d8b7d..7b3fb17ad7 100644 --- a/lib/core/unittest/CJsonLogLayoutTest.cc +++ b/lib/core/unittest/CJsonLogLayoutTest.cc @@ -11,8 +11,8 @@ CppUnit::Test* CJsonLogLayoutTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CJsonLogLayoutTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CJsonLogLayoutTest::testPathCropping", &CJsonLogLayoutTest::testPathCropping)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CJsonLogLayoutTest::testPathCropping", &CJsonLogLayoutTest::testPathCropping)); return suiteOfTests; } @@ -20,11 +20,18 @@ CppUnit::Test* CJsonLogLayoutTest::suite() { void CJsonLogLayoutTest::testPathCropping() { LOG_DEBUG(<< "CJsonLogLayoutTest::testPathCropping"); #ifdef Windows - CPPUNIT_ASSERT_EQUAL(std::string("source.h"), log4cxx::helpers::CJsonLogLayout::cropPath("c:\\\\home\\hendrik\\src\\include/source.h")); CPPUNIT_ASSERT_EQUAL(std::string("source.h"), - log4cxx::helpers::CJsonLogLayout::cropPath("c:\\\\home\\hendrik\\src\\include\\source.h")); + log4cxx::helpers::CJsonLogLayout::cropPath( + "c:\\\\home\\hendrik\\src\\include/source.h")); + CPPUNIT_ASSERT_EQUAL(std::string("source.h"), + log4cxx::helpers::CJsonLogLayout::cropPath( + "c:\\\\home\\hendrik\\src\\include\\source.h")); #else - CPPUNIT_ASSERT_EQUAL(std::string("source.h"), log4cxx::helpers::CJsonLogLayout::cropPath("/home/hendrik/src/include/source.h")); - CPPUNIT_ASSERT_EQUAL(std::string("source.h"), log4cxx::helpers::CJsonLogLayout::cropPath("/home/hendrik/work/../src/include/source.h")); + CPPUNIT_ASSERT_EQUAL(std::string("source.h"), + log4cxx::helpers::CJsonLogLayout::cropPath( + "/home/hendrik/src/include/source.h")); + CPPUNIT_ASSERT_EQUAL(std::string("source.h"), + log4cxx::helpers::CJsonLogLayout::cropPath( + "/home/hendrik/work/../src/include/source.h")); #endif } diff --git a/lib/core/unittest/CJsonOutputStreamWrapperTest.cc b/lib/core/unittest/CJsonOutputStreamWrapperTest.cc index 1cd0c726f2..479362f9ab 100644 --- a/lib/core/unittest/CJsonOutputStreamWrapperTest.cc +++ b/lib/core/unittest/CJsonOutputStreamWrapperTest.cc @@ -22,10 +22,11 @@ CppUnit::Test* CJsonOutputStreamWrapperTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CJsonOutputStreamWrapperTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CJsonOutputStreamWrapperTest::testConcurrentWrites", - &CJsonOutputStreamWrapperTest::testConcurrentWrites)); - suiteOfTests->addTest(new CppUnit::TestCaller("CJsonOutputStreamWrapperTest::testShrink", - &CJsonOutputStreamWrapperTest::testShrink)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CJsonOutputStreamWrapperTest::testConcurrentWrites", + &CJsonOutputStreamWrapperTest::testConcurrentWrites)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CJsonOutputStreamWrapperTest::testShrink", &CJsonOutputStreamWrapperTest::testShrink)); return suiteOfTests; } @@ -72,7 +73,8 @@ void CJsonOutputStreamWrapperTest::testConcurrentWrites() { const rapidjson::Value& allRecords = doc.GetArray(); // check number of documents - CPPUNIT_ASSERT_EQUAL(rapidjson::SizeType(WRITERS * DOCUMENTS_PER_WRITER), allRecords.Size()); + CPPUNIT_ASSERT_EQUAL(rapidjson::SizeType(WRITERS * DOCUMENTS_PER_WRITER), + allRecords.Size()); } void CJsonOutputStreamWrapperTest::testShrink() { diff --git a/lib/core/unittest/CJsonStatePersistInserterTest.cc b/lib/core/unittest/CJsonStatePersistInserterTest.cc index 7acd618b32..60e159c512 100644 --- a/lib/core/unittest/CJsonStatePersistInserterTest.cc +++ b/lib/core/unittest/CJsonStatePersistInserterTest.cc @@ -14,8 +14,9 @@ CppUnit::Test* CJsonStatePersistInserterTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CJsonStatePersistInserterTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CJsonStatePersistInserterTest::testPersist", - &CJsonStatePersistInserterTest::testPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CJsonStatePersistInserterTest::testPersist", + &CJsonStatePersistInserterTest::testPersist)); return suiteOfTests; } @@ -44,5 +45,6 @@ void CJsonStatePersistInserterTest::testPersist() { LOG_DEBUG(<< "JSON is: " << json); - CPPUNIT_ASSERT_EQUAL(std::string("{\"level1A\":\"a\",\"level1B\":\"25\",\"level1C\":{\"level2A\":\"3.14\",\"level2B\":\"z\"}}"), json); + CPPUNIT_ASSERT_EQUAL(std::string("{\"level1A\":\"a\",\"level1B\":\"25\",\"level1C\":{\"level2A\":\"3.14\",\"level2B\":\"z\"}}"), + json); } diff --git a/lib/core/unittest/CJsonStateRestoreTraverserTest.cc b/lib/core/unittest/CJsonStateRestoreTraverserTest.cc index 515b8d85d1..6891beabab 100644 --- a/lib/core/unittest/CJsonStateRestoreTraverserTest.cc +++ b/lib/core/unittest/CJsonStateRestoreTraverserTest.cc @@ -12,20 +12,27 @@ CppUnit::Test* CJsonStateRestoreTraverserTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CJsonStateRestoreTraverserTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CJsonStateRestoreTraverserTest::testRestore1", - &CJsonStateRestoreTraverserTest::testRestore1)); - suiteOfTests->addTest(new CppUnit::TestCaller("CJsonStateRestoreTraverserTest::testRestore2", - &CJsonStateRestoreTraverserTest::testRestore2)); - suiteOfTests->addTest(new CppUnit::TestCaller("CJsonStateRestoreTraverserTest::testRestore3", - &CJsonStateRestoreTraverserTest::testRestore3)); - suiteOfTests->addTest(new CppUnit::TestCaller("CJsonStateRestoreTraverserTest::testRestore4", - &CJsonStateRestoreTraverserTest::testRestore4)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CJsonStateRestoreTraverserTest::testParsingBooleanFields", &CJsonStateRestoreTraverserTest::testParsingBooleanFields)); + "CJsonStateRestoreTraverserTest::testRestore1", + &CJsonStateRestoreTraverserTest::testRestore1)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CJsonStateRestoreTraverserTest::testRestore1IgnoreArrays", &CJsonStateRestoreTraverserTest::testRestore1IgnoreArrays)); + "CJsonStateRestoreTraverserTest::testRestore2", + &CJsonStateRestoreTraverserTest::testRestore2)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CJsonStateRestoreTraverserTest::testRestore1IgnoreArraysNested", &CJsonStateRestoreTraverserTest::testRestore1IgnoreArraysNested)); + "CJsonStateRestoreTraverserTest::testRestore3", + &CJsonStateRestoreTraverserTest::testRestore3)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CJsonStateRestoreTraverserTest::testRestore4", + &CJsonStateRestoreTraverserTest::testRestore4)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CJsonStateRestoreTraverserTest::testParsingBooleanFields", + &CJsonStateRestoreTraverserTest::testParsingBooleanFields)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CJsonStateRestoreTraverserTest::testRestore1IgnoreArrays", + &CJsonStateRestoreTraverserTest::testRestore1IgnoreArrays)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CJsonStateRestoreTraverserTest::testRestore1IgnoreArraysNested", + &CJsonStateRestoreTraverserTest::testRestore1IgnoreArraysNested)); return suiteOfTests; } @@ -182,8 +189,10 @@ void CJsonStateRestoreTraverserTest::testRestore4() { void CJsonStateRestoreTraverserTest::testParsingBooleanFields() { // Even though the parser doesn't handle boolean fields it should not hiccup over them - std::string json = std::string("{\"_index\" : \"categorization-test\", \"_type\" : \"categorizerState\",") + - std::string("\"_id\" : \"1\", \"_version\" : 2, \"found\" : true, ") + std::string("\"_source\":{\"a\" :\"1\"}"); + std::string json = + std::string("{\"_index\" : \"categorization-test\", \"_type\" : \"categorizerState\",") + + std::string("\"_id\" : \"1\", \"_version\" : 2, \"found\" : true, ") + + std::string("\"_source\":{\"a\" :\"1\"}"); std::istringstream strm(json); @@ -209,8 +218,7 @@ void CJsonStateRestoreTraverserTest::testParsingBooleanFields() { } void CJsonStateRestoreTraverserTest::testRestore1IgnoreArrays() { - std::string json( - "{\"_source\":{\"level1A\":\"a\",\"someArray\":[42],\"level1B\":\"25\",\"level1C\":{\"level2A\":\"3.14\",\"level2B\":\"z\"}}}"); + std::string json("{\"_source\":{\"level1A\":\"a\",\"someArray\":[42],\"level1B\":\"25\",\"level1C\":{\"level2A\":\"3.14\",\"level2B\":\"z\"}}}"); std::istringstream strm(json); ml::core::CJsonStateRestoreTraverser traverser(strm); diff --git a/lib/core/unittest/CLoggerTest.cc b/lib/core/unittest/CLoggerTest.cc index 1259934a9d..e984b72b15 100644 --- a/lib/core/unittest/CLoggerTest.cc +++ b/lib/core/unittest/CLoggerTest.cc @@ -31,12 +31,16 @@ const char* TEST_PIPE_NAME = "testfiles/testpipe"; CppUnit::Test* CLoggerTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CLoggerTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CLoggerTest::testLogging", &CLoggerTest::testLogging)); - suiteOfTests->addTest(new CppUnit::TestCaller("CLoggerTest::testReconfiguration", &CLoggerTest::testReconfiguration)); - suiteOfTests->addTest(new CppUnit::TestCaller("CLoggerTest::testSetLevel", &CLoggerTest::testSetLevel)); - suiteOfTests->addTest(new CppUnit::TestCaller("CLoggerTest::testLogEnvironment", &CLoggerTest::testLogEnvironment)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CLoggerTest::testNonAsciiJsonLogging", &CLoggerTest::testNonAsciiJsonLogging)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLoggerTest::testLogging", &CLoggerTest::testLogging)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLoggerTest::testReconfiguration", &CLoggerTest::testReconfiguration)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLoggerTest::testSetLevel", &CLoggerTest::testSetLevel)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLoggerTest::testLogEnvironment", &CLoggerTest::testLogEnvironment)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLoggerTest::testNonAsciiJsonLogging", &CLoggerTest::testNonAsciiJsonLogging)); return suiteOfTests; } @@ -51,9 +55,8 @@ void CLoggerTest::testLogging() { LOG_INFO(<< "Info " << std::boolalpha << true); LOG_AT_LEVEL("INFO", << "Dynamic INFO " << false); LOG_WARN(<< "Warn " << t); - LOG_AT_LEVEL("WARN", - "Dynamic WARN " - << "abc"); + LOG_AT_LEVEL("WARN", "Dynamic WARN " + << "abc"); LOG_ERROR(<< "Error " << 1000 << ' ' << 0.23124F); LOG_AT_LEVEL("ERROR", << "Dynamic ERROR"); LOG_FATAL(<< "Fatal - application to handle exit"); @@ -142,14 +145,16 @@ void CLoggerTest::testSetLevel() { } void CLoggerTest::testNonAsciiJsonLogging() { - std::vector messages{"Non-iso8859-15: 编码", "Non-ascii: üaöä", "Non-iso8859-15: 编码 test", "surrogate pair: 𐐷 test"}; + std::vector messages{"Non-iso8859-15: 编码", "Non-ascii: üaöä", + "Non-iso8859-15: 编码 test", "surrogate pair: 𐐷 test"}; std::ostringstream loggedData; std::thread reader([&loggedData] { // wait a bit so that pipe has been created ml::core::CSleep::sleep(200); std::ifstream strm(TEST_PIPE_NAME); - std::copy(std::istreambuf_iterator(strm), std::istreambuf_iterator(), std::ostreambuf_iterator(loggedData)); + std::copy(std::istreambuf_iterator(strm), std::istreambuf_iterator(), + std::ostreambuf_iterator(loggedData)); }); ml::core::CLogger& logger = ml::core::CLogger::instance(); diff --git a/lib/core/unittest/CMapPopulationTest.cc b/lib/core/unittest/CMapPopulationTest.cc index f2f04dbf65..802c8d3bac 100644 --- a/lib/core/unittest/CMapPopulationTest.cc +++ b/lib/core/unittest/CMapPopulationTest.cc @@ -26,8 +26,8 @@ CMapPopulationTest::CMapPopulationTest() : m_TestData(nullptr) { CppUnit::Test* CMapPopulationTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CMapPopulationTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CMapPopulationTest::testMapInsertSpeed", &CMapPopulationTest::testMapInsertSpeed)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMapPopulationTest::testMapInsertSpeed", &CMapPopulationTest::testMapInsertSpeed)); return suiteOfTests; } @@ -44,7 +44,8 @@ CMapPopulationTest::CTestData::CTestData(size_t fillSize) // It's essential these vectors don't resize as the char pointers in the // last two vectors point into the contents of the strings in the first two, // so set the correct size when they're constructed - : m_StringKeys(fillSize), m_StringVals(fillSize), m_CharPtrKeys(fillSize), m_CharPtrVals(fillSize) { + : m_StringKeys(fillSize), m_StringVals(fillSize), m_CharPtrKeys(fillSize), + m_CharPtrVals(fillSize) { // Set up test data such that each test uses identical data for (size_t index = 0; index < fillSize; ++index) { // Keys are 4 to 12 letters followed by a unique number @@ -62,7 +63,8 @@ CMapPopulationTest::CTestData::CTestData(size_t fillSize) } for (size_t index = 0; index < fillSize; ++index) { - LOG_DEBUG(<< "Test entry " << index << ": " << m_CharPtrKeys[index] << " -> " << m_CharPtrVals[index]); + LOG_DEBUG(<< "Test entry " << index << ": " << m_CharPtrKeys[index] + << " -> " << m_CharPtrVals[index]); } } @@ -74,11 +76,13 @@ const CMapPopulationTest::CTestData::TStrVec& CMapPopulationTest::CTestData::str return m_StringVals; } -const CMapPopulationTest::CTestData::TCharPVec& CMapPopulationTest::CTestData::charPtrKeys() const { +const CMapPopulationTest::CTestData::TCharPVec& +CMapPopulationTest::CTestData::charPtrKeys() const { return m_CharPtrKeys; } -const CMapPopulationTest::CTestData::TCharPVec& CMapPopulationTest::CTestData::charPtrVals() const { +const CMapPopulationTest::CTestData::TCharPVec& +CMapPopulationTest::CTestData::charPtrVals() const { return m_CharPtrVals; } @@ -107,130 +111,149 @@ void CMapPopulationTest::testMapInsertStr() { TStrStrMapVec testVec(TEST_SIZE); ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Starting map insert string test at " << ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO(<< "Starting map insert string test at " + << ml::core::CTimeUtils::toTimeString(start)); this->addInsert(m_TestData->stringKeys(), m_TestData->stringVals(), testVec); ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Finished map insert string test at " << ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO(<< "Finished map insert string test at " + << ml::core::CTimeUtils::toTimeString(end)); - LOG_INFO(<< "Map insert string test with fill size " << FILL_SIZE << " and test size " << TEST_SIZE << " took " << (end - start) - << " seconds"); + LOG_INFO(<< "Map insert string test with fill size " << FILL_SIZE << " and test size " + << TEST_SIZE << " took " << (end - start) << " seconds"); } void CMapPopulationTest::testMapInsertCharP() { TStrStrMapVec testVec(TEST_SIZE); ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Starting map insert char pointer test at " << ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO(<< "Starting map insert char pointer test at " + << ml::core::CTimeUtils::toTimeString(start)); this->addInsert(m_TestData->charPtrKeys(), m_TestData->charPtrVals(), testVec); ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Finished map insert char pointer test at " << ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO(<< "Finished map insert char pointer test at " + << ml::core::CTimeUtils::toTimeString(end)); - LOG_INFO(<< "Map insert char pointer test with fill size " << FILL_SIZE << " and test size " << TEST_SIZE << " took " << (end - start) - << " seconds"); + LOG_INFO(<< "Map insert char pointer test with fill size " << FILL_SIZE << " and test size " + << TEST_SIZE << " took " << (end - start) << " seconds"); } void CMapPopulationTest::testMapOpSqBracStr() { TStrStrMapVec testVec(TEST_SIZE); ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Starting map operator[] string test at " << ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO(<< "Starting map operator[] string test at " + << ml::core::CTimeUtils::toTimeString(start)); this->addOpSqBrac(m_TestData->stringKeys(), m_TestData->stringVals(), testVec); ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Finished map operator[] string test at " << ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO(<< "Finished map operator[] string test at " + << ml::core::CTimeUtils::toTimeString(end)); - LOG_INFO(<< "Map operator[] string test with fill size " << FILL_SIZE << " and test size " << TEST_SIZE << " took " << (end - start) - << " seconds"); + LOG_INFO(<< "Map operator[] string test with fill size " << FILL_SIZE << " and test size " + << TEST_SIZE << " took " << (end - start) << " seconds"); } void CMapPopulationTest::testMapOpSqBracCharP() { TStrStrMapVec testVec(TEST_SIZE); ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Starting map operator[] char pointer test at " << ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO(<< "Starting map operator[] char pointer test at " + << ml::core::CTimeUtils::toTimeString(start)); this->addOpSqBrac(m_TestData->charPtrKeys(), m_TestData->charPtrVals(), testVec); ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Finished map operator[] char pointer test at " << ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO(<< "Finished map operator[] char pointer test at " + << ml::core::CTimeUtils::toTimeString(end)); - LOG_INFO(<< "Map operator[] char pointer test with fill size " << FILL_SIZE << " and test size " << TEST_SIZE << " took " - << (end - start) << " seconds"); + LOG_INFO(<< "Map operator[] char pointer test with fill size " << FILL_SIZE << " and test size " + << TEST_SIZE << " took " << (end - start) << " seconds"); } void CMapPopulationTest::testUMapInsertStr() { TStrStrUMapVec testVec(TEST_SIZE); ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Starting unordered map insert string test at " << ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO(<< "Starting unordered map insert string test at " + << ml::core::CTimeUtils::toTimeString(start)); this->addInsert(m_TestData->stringKeys(), m_TestData->stringVals(), testVec); ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Finished unordered map insert string test at " << ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO(<< "Finished unordered map insert string test at " + << ml::core::CTimeUtils::toTimeString(end)); - LOG_INFO(<< "Unordered map insert string test with fill size " << FILL_SIZE << " and test size " << TEST_SIZE << " took " - << (end - start) << " seconds"); + LOG_INFO(<< "Unordered map insert string test with fill size " << FILL_SIZE << " and test size " + << TEST_SIZE << " took " << (end - start) << " seconds"); } void CMapPopulationTest::testUMapInsertCharP() { TStrStrUMapVec testVec(TEST_SIZE); ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Starting unordered map insert char pointer test at " << ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO(<< "Starting unordered map insert char pointer test at " + << ml::core::CTimeUtils::toTimeString(start)); this->addInsert(m_TestData->charPtrKeys(), m_TestData->charPtrVals(), testVec); ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Finished unordered map insert char pointer test at " << ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO(<< "Finished unordered map insert char pointer test at " + << ml::core::CTimeUtils::toTimeString(end)); - LOG_INFO(<< "Unordered map insert char pointer test with fill size " << FILL_SIZE << " and test size " << TEST_SIZE << " took " - << (end - start) << " seconds"); + LOG_INFO(<< "Unordered map insert char pointer test with fill size " << FILL_SIZE + << " and test size " << TEST_SIZE << " took " << (end - start) << " seconds"); } void CMapPopulationTest::testUMapOpSqBracStr() { TStrStrUMapVec testVec(TEST_SIZE); ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Starting unordered map operator[] string test at " << ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO(<< "Starting unordered map operator[] string test at " + << ml::core::CTimeUtils::toTimeString(start)); this->addOpSqBrac(m_TestData->stringKeys(), m_TestData->stringVals(), testVec); ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Finished unordered map operator[] string test at " << ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO(<< "Finished unordered map operator[] string test at " + << ml::core::CTimeUtils::toTimeString(end)); - LOG_INFO(<< "Unordered map operator[] string test with fill size " << FILL_SIZE << " and test size " << TEST_SIZE << " took " - << (end - start) << " seconds"); + LOG_INFO(<< "Unordered map operator[] string test with fill size " << FILL_SIZE + << " and test size " << TEST_SIZE << " took " << (end - start) << " seconds"); } void CMapPopulationTest::testUMapOpSqBracCharP() { TStrStrUMapVec testVec(TEST_SIZE); ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Starting unordered map operator[] char pointer test at " << ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO(<< "Starting unordered map operator[] char pointer test at " + << ml::core::CTimeUtils::toTimeString(start)); this->addOpSqBrac(m_TestData->charPtrKeys(), m_TestData->charPtrVals(), testVec); ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Finished unordered map operator[] char pointer test at " << ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO(<< "Finished unordered map operator[] char pointer test at " + << ml::core::CTimeUtils::toTimeString(end)); - LOG_INFO(<< "Unordered map operator[] char pointer test with fill size " << FILL_SIZE << " and test size " << TEST_SIZE << " took " - << (end - start) << " seconds"); + LOG_INFO(<< "Unordered map operator[] char pointer test with fill size " << FILL_SIZE + << " and test size " << TEST_SIZE << " took " << (end - start) << " seconds"); } template -void CMapPopulationTest::addInsert(const INPUT_CONTAINER& keys, const INPUT_CONTAINER& values, MAP_CONTAINER& maps) const { +void CMapPopulationTest::addInsert(const INPUT_CONTAINER& keys, + const INPUT_CONTAINER& values, + MAP_CONTAINER& maps) const { for (typename MAP_CONTAINER::iterator iter = maps.begin(); iter != maps.end(); ++iter) { typename MAP_CONTAINER::value_type& map = *iter; size_t limit(std::min(keys.size(), values.size())); for (size_t index = 0; index < limit; ++index) { - map.insert(typename MAP_CONTAINER::value_type::value_type(keys[index], values[index])); + map.insert(typename MAP_CONTAINER::value_type::value_type( + keys[index], values[index])); } CPPUNIT_ASSERT_EQUAL(limit, map.size()); @@ -238,7 +261,9 @@ void CMapPopulationTest::addInsert(const INPUT_CONTAINER& keys, const INPUT_CONT } template -void CMapPopulationTest::addOpSqBrac(const INPUT_CONTAINER& keys, const INPUT_CONTAINER& values, MAP_CONTAINER& maps) const { +void CMapPopulationTest::addOpSqBrac(const INPUT_CONTAINER& keys, + const INPUT_CONTAINER& values, + MAP_CONTAINER& maps) const { for (typename MAP_CONTAINER::iterator iter = maps.begin(); iter != maps.end(); ++iter) { typename MAP_CONTAINER::value_type& map = *iter; diff --git a/lib/core/unittest/CMapPopulationTest.h b/lib/core/unittest/CMapPopulationTest.h index 20f0e2c5ce..a5eb92f2d5 100644 --- a/lib/core/unittest/CMapPopulationTest.h +++ b/lib/core/unittest/CMapPopulationTest.h @@ -64,10 +64,14 @@ class CMapPopulationTest : public CppUnit::TestFixture { using TStrStrUMapVec = std::vector; template - void addInsert(const INPUT_CONTAINER& keys, const INPUT_CONTAINER& values, MAP_CONTAINER& maps) const; + void addInsert(const INPUT_CONTAINER& keys, + const INPUT_CONTAINER& values, + MAP_CONTAINER& maps) const; template - void addOpSqBrac(const INPUT_CONTAINER& keys, const INPUT_CONTAINER& values, MAP_CONTAINER& maps) const; + void addOpSqBrac(const INPUT_CONTAINER& keys, + const INPUT_CONTAINER& values, + MAP_CONTAINER& maps) const; private: static const size_t FILL_SIZE; diff --git a/lib/core/unittest/CMemoryUsageJsonWriterTest.cc b/lib/core/unittest/CMemoryUsageJsonWriterTest.cc index 863539bf04..32bcd58e35 100644 --- a/lib/core/unittest/CMemoryUsageJsonWriterTest.cc +++ b/lib/core/unittest/CMemoryUsageJsonWriterTest.cc @@ -16,8 +16,8 @@ using namespace ml; CppUnit::Test* CMemoryUsageJsonWriterTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CMemoryUsageJsonWriterTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CMemoryUsageJsonWriterTest::test", &CMemoryUsageJsonWriterTest::test)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMemoryUsageJsonWriterTest::test", &CMemoryUsageJsonWriterTest::test)); return suiteOfTests; } @@ -54,7 +54,8 @@ void CMemoryUsageJsonWriterTest::test() { writer.addItem(description); writer.endObject(); writer.finalise(); - CPPUNIT_ASSERT_EQUAL(std::string("{\"Hello\":{\"memory\":223,\"unused\":45678}}\n"), ss.str()); + CPPUNIT_ASSERT_EQUAL(std::string("{\"Hello\":{\"memory\":223,\"unused\":45678}}\n"), + ss.str()); } { // Check one empty array @@ -67,7 +68,8 @@ void CMemoryUsageJsonWriterTest::test() { writer.endArray(); writer.endObject(); writer.finalise(); - CPPUNIT_ASSERT_EQUAL(std::string("{\"Hello\":{\"memory\":223},\"Sheeple\":[]}\n"), ss.str()); + CPPUNIT_ASSERT_EQUAL( + std::string("{\"Hello\":{\"memory\":223},\"Sheeple\":[]}\n"), ss.str()); } { // Check one full array @@ -111,8 +113,7 @@ void CMemoryUsageJsonWriterTest::test() { writer.endArray(); writer.endObject(); writer.finalise(); - CPPUNIT_ASSERT_EQUAL( - std::string("{\"Hello\":{\"memory\":223},\"Sheeple\":[{\"Dumplings\":{\"memory\":345},\"Gravy\":{\"memory\":12341234}}]}\n"), - ss.str()); + CPPUNIT_ASSERT_EQUAL(std::string("{\"Hello\":{\"memory\":223},\"Sheeple\":[{\"Dumplings\":{\"memory\":345},\"Gravy\":{\"memory\":12341234}}]}\n"), + ss.str()); } } diff --git a/lib/core/unittest/CMemoryUsageTest.cc b/lib/core/unittest/CMemoryUsageTest.cc index c97e11f513..9f4ccd2930 100644 --- a/lib/core/unittest/CMemoryUsageTest.cc +++ b/lib/core/unittest/CMemoryUsageTest.cc @@ -32,7 +32,11 @@ namespace { // Subset of model_t equivalent duplicated here to avoid a dependency // with the model library -enum EFeature { E_IndividualHighMeanByPerson, E_IndividualCountByBucketAndPerson, E_IndividualHighCountsByBucketAndPerson }; +enum EFeature { + E_IndividualHighMeanByPerson, + E_IndividualCountByBucketAndPerson, + E_IndividualHighCountsByBucketAndPerson +}; using TIntVec = std::vector; using TStrVec = std::vector; @@ -56,11 +60,17 @@ struct SFoo { struct SFooWithMemoryUsage { explicit SFooWithMemoryUsage(std::size_t key = 0) : s_Key(key) {} - bool operator<(const SFooWithMemoryUsage& rhs) const { return s_Key < rhs.s_Key; } - bool operator==(const SFooWithMemoryUsage& rhs) const { return s_Key == rhs.s_Key; } + bool operator<(const SFooWithMemoryUsage& rhs) const { + return s_Key < rhs.s_Key; + } + bool operator==(const SFooWithMemoryUsage& rhs) const { + return s_Key == rhs.s_Key; + } std::size_t memoryUsage() const { return 0; } - void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("SFooWithMemoryUsage", 0); } + void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { + mem->setName("SFooWithMemoryUsage", 0); + } std::size_t s_Key; double s_State[100]; @@ -80,7 +90,9 @@ struct SBar { explicit SBar(std::size_t key = 0) : s_Key(key), s_State() {} bool operator<(const SBar& rhs) const { return s_Key < rhs.s_Key; } bool operator==(const SBar& rhs) const { return s_Key == rhs.s_Key; } - std::size_t memoryUsage() const { return sizeof(SFoo) * s_State.capacity(); } + std::size_t memoryUsage() const { + return sizeof(SFoo) * s_State.capacity(); + } std::size_t s_Key; TFooVec s_State; @@ -92,7 +104,9 @@ struct SBarDebug { explicit SBarDebug(std::size_t key = 0) : s_Key(key), s_State() {} bool operator<(const SBarDebug& rhs) const { return s_Key < rhs.s_Key; } bool operator==(const SBarDebug& rhs) const { return s_Key == rhs.s_Key; } - std::size_t memoryUsage() const { return sizeof(SFoo) * s_State.capacity(); } + std::size_t memoryUsage() const { + return sizeof(SFoo) * s_State.capacity(); + } void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("SBarDebug", 0); @@ -107,9 +121,15 @@ struct SBarVectorDebug { using TFooVec = std::vector; explicit SBarVectorDebug(std::size_t key = 0) : s_Key(key), s_State() {} - bool operator<(const SBarVectorDebug& rhs) const { return s_Key < rhs.s_Key; } - bool operator==(const SBarVectorDebug& rhs) const { return s_Key == rhs.s_Key; } - std::size_t memoryUsage() const { return core::CMemory::dynamicSize(s_State); } + bool operator<(const SBarVectorDebug& rhs) const { + return s_Key < rhs.s_Key; + } + bool operator==(const SBarVectorDebug& rhs) const { + return s_Key == rhs.s_Key; + } + std::size_t memoryUsage() const { + return core::CMemory::dynamicSize(s_State); + } void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("SBarVectorDebug", 0); @@ -122,7 +142,9 @@ struct SBarVectorDebug { struct SHash { std::size_t operator()(const SFoo& foo) const { return foo.s_Key; } - std::size_t operator()(const SFooWithMemoryUsage& foo) const { return foo.s_Key; } + std::size_t operator()(const SFooWithMemoryUsage& foo) const { + return foo.s_Key; + } std::size_t operator()(const SBar& bar) const { return bar.s_Key; } }; @@ -132,7 +154,9 @@ class CBase { virtual ~CBase() = default; - virtual std::size_t memoryUsage() const { return core::CMemory::dynamicSize(m_Vec); } + virtual std::size_t memoryUsage() const { + return core::CMemory::dynamicSize(m_Vec); + } virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CBase", 0); @@ -148,7 +172,8 @@ class CBase { class CDerived : public CBase { public: - CDerived(std::size_t i) : CBase(i), m_Strings(i, "This is a secret string") {} + CDerived(std::size_t i) + : CBase(i), m_Strings(i, "This is a secret string") {} virtual ~CDerived() = default; @@ -203,7 +228,8 @@ class CTrackingAllocator { inline const_pointer address(const_reference r) { return &r; } // memory allocation - inline pointer allocate(size_type cnt, typename std::allocator::const_pointer = nullptr) { + inline pointer allocate(size_type cnt, + typename std::allocator::const_pointer = nullptr) { ms_Allocated += cnt; return reinterpret_cast(::operator new(cnt * sizeof(T))); } @@ -214,7 +240,9 @@ class CTrackingAllocator { } // size - inline size_type max_size() const { return std::numeric_limits::max() / sizeof(T); } + inline size_type max_size() const { + return std::numeric_limits::max() / sizeof(T); + } static std::size_t usage() { return ms_Allocated; } @@ -225,7 +253,9 @@ class CTrackingAllocator { inline bool operator==(const CTrackingAllocator&) const { return true; } - inline bool operator!=(const CTrackingAllocator& a) const { return !operator==(a); } + inline bool operator!=(const CTrackingAllocator& a) const { + return !operator==(a); + } private: static std::size_t ms_Allocated; @@ -248,7 +278,8 @@ void CMemoryUsageTest::testUsage() { using TFooWithMemoryFooWithMemoryMap = std::map; using TFooFooUMap = boost::unordered_map; using TFooFSet = boost::container::flat_set; - using TFooWithMemoryFooWithMemoryUMap = boost::unordered_map; + using TFooWithMemoryFooWithMemoryUMap = + boost::unordered_map; using TBarVec = std::vector; using TBarBarMap = std::map; using TBarBarUMap = boost::unordered_map; @@ -272,8 +303,10 @@ void CMemoryUsageTest::testUsage() { LOG_DEBUG(<< "*** TFooVec ***"); LOG_DEBUG(<< "dynamicSize(foos) = " << core::CMemory::dynamicSize(foos)); - LOG_DEBUG(<< "dynamicSize(foosWithMemory) = " << core::CMemory::dynamicSize(foosWithMemory)); - CPPUNIT_ASSERT_EQUAL(core::CMemory::dynamicSize(foos), core::CMemory::dynamicSize(foosWithMemory)); + LOG_DEBUG(<< "dynamicSize(foosWithMemory) = " + << core::CMemory::dynamicSize(foosWithMemory)); + CPPUNIT_ASSERT_EQUAL(core::CMemory::dynamicSize(foos), + core::CMemory::dynamicSize(foosWithMemory)); } { TFooList foos(10); @@ -281,8 +314,10 @@ void CMemoryUsageTest::testUsage() { LOG_DEBUG(<< "*** TFooList ***"); LOG_DEBUG(<< "dynamicSize(foos) = " << core::CMemory::dynamicSize(foos)); - LOG_DEBUG(<< "dynamicSize(foosWithMemory) = " << core::CMemory::dynamicSize(foosWithMemory)); - CPPUNIT_ASSERT_EQUAL(core::CMemory::dynamicSize(foos), core::CMemory::dynamicSize(foosWithMemory)); + LOG_DEBUG(<< "dynamicSize(foosWithMemory) = " + << core::CMemory::dynamicSize(foosWithMemory)); + CPPUNIT_ASSERT_EQUAL(core::CMemory::dynamicSize(foos), + core::CMemory::dynamicSize(foosWithMemory)); } { TFooDeque foos(10); @@ -290,8 +325,10 @@ void CMemoryUsageTest::testUsage() { LOG_DEBUG(<< "*** TFooDeque ***"); LOG_DEBUG(<< "dynamicSize(foos) = " << core::CMemory::dynamicSize(foos)); - LOG_DEBUG(<< "dynamicSize(foosWithMemory) = " << core::CMemory::dynamicSize(foosWithMemory)); - CPPUNIT_ASSERT_EQUAL(core::CMemory::dynamicSize(foos), core::CMemory::dynamicSize(foosWithMemory)); + LOG_DEBUG(<< "dynamicSize(foosWithMemory) = " + << core::CMemory::dynamicSize(foosWithMemory)); + CPPUNIT_ASSERT_EQUAL(core::CMemory::dynamicSize(foos), + core::CMemory::dynamicSize(foosWithMemory)); } { TFooCircBuf foos(10); @@ -301,8 +338,10 @@ void CMemoryUsageTest::testUsage() { LOG_DEBUG(<< "*** TFooCircBuf ***"); LOG_DEBUG(<< "dynamicSize(foos) = " << core::CMemory::dynamicSize(foos)); - LOG_DEBUG(<< "dynamicSize(foosWithMemory) = " << core::CMemory::dynamicSize(foosWithMemory)); - CPPUNIT_ASSERT_EQUAL(core::CMemory::dynamicSize(foos), core::CMemory::dynamicSize(foosWithMemory)); + LOG_DEBUG(<< "dynamicSize(foosWithMemory) = " + << core::CMemory::dynamicSize(foosWithMemory)); + CPPUNIT_ASSERT_EQUAL(core::CMemory::dynamicSize(foos), + core::CMemory::dynamicSize(foosWithMemory)); } { TFooFooMap foos; @@ -316,8 +355,10 @@ void CMemoryUsageTest::testUsage() { LOG_DEBUG(<< "*** TFooFooMap ***"); LOG_DEBUG(<< "dynamicSize(foos) = " << core::CMemory::dynamicSize(foos)); - LOG_DEBUG(<< "dynamicSize(foosWithMemory) = " << core::CMemory::dynamicSize(foosWithMemory)); - CPPUNIT_ASSERT_EQUAL(core::CMemory::dynamicSize(foos), core::CMemory::dynamicSize(foosWithMemory)); + LOG_DEBUG(<< "dynamicSize(foosWithMemory) = " + << core::CMemory::dynamicSize(foosWithMemory)); + CPPUNIT_ASSERT_EQUAL(core::CMemory::dynamicSize(foos), + core::CMemory::dynamicSize(foosWithMemory)); } { TFooFooUMap foos; @@ -331,8 +372,10 @@ void CMemoryUsageTest::testUsage() { LOG_DEBUG(<< "*** TFooFooUMap ***"); LOG_DEBUG(<< "dynamicSize(foos) = " << core::CMemory::dynamicSize(foos)); - LOG_DEBUG(<< "dynamicSize(foosWithMemory) = " << core::CMemory::dynamicSize(foosWithMemory)); - CPPUNIT_ASSERT_EQUAL(core::CMemory::dynamicSize(foos), core::CMemory::dynamicSize(foosWithMemory)); + LOG_DEBUG(<< "dynamicSize(foosWithMemory) = " + << core::CMemory::dynamicSize(foosWithMemory)); + CPPUNIT_ASSERT_EQUAL(core::CMemory::dynamicSize(foos), + core::CMemory::dynamicSize(foosWithMemory)); } { TFooFSet foos; @@ -344,7 +387,8 @@ void CMemoryUsageTest::testUsage() { LOG_DEBUG(<< "*** TFooFSet ***"); LOG_DEBUG(<< "dynamicSize(foos) = " << core::CMemory::dynamicSize(foos)); - CPPUNIT_ASSERT_EQUAL(core::CMemory::dynamicSize(foos), foos.capacity() * sizeof(SFoo)); + CPPUNIT_ASSERT_EQUAL(core::CMemory::dynamicSize(foos), + foos.capacity() * sizeof(SFoo)); } { @@ -376,9 +420,12 @@ void CMemoryUsageTest::testUsage() { LOG_DEBUG(<< "*** TBarVec ***"); LOG_DEBUG(<< "dynamic size = " << core::CMemory::dynamicSize(bars1)); LOG_DEBUG(<< "expected dynamic size = " - << core::CMemory::dynamicSize(bars2) + core::CMemory::dynamicSize(state21) + core::CMemory::dynamicSize(state22)); + << core::CMemory::dynamicSize(bars2) + core::CMemory::dynamicSize(state21) + + core::CMemory::dynamicSize(state22)); CPPUNIT_ASSERT_EQUAL(core::CMemory::dynamicSize(bars1), - core::CMemory::dynamicSize(bars2) + core::CMemory::dynamicSize(state21) + core::CMemory::dynamicSize(state22)); + core::CMemory::dynamicSize(bars2) + + core::CMemory::dynamicSize(state21) + + core::CMemory::dynamicSize(state22)); } { SBar key; @@ -395,9 +442,12 @@ void CMemoryUsageTest::testUsage() { LOG_DEBUG(<< "*** TBarBarMap ***"); LOG_DEBUG(<< "dynamic size = " << core::CMemory::dynamicSize(bars1)); LOG_DEBUG(<< "expected dynamic size = " - << core::CMemory::dynamicSize(bars2) + core::CMemory::dynamicSize(key) + core::CMemory::dynamicSize(value)); + << core::CMemory::dynamicSize(bars2) + core::CMemory::dynamicSize(key) + + core::CMemory::dynamicSize(value)); CPPUNIT_ASSERT_EQUAL(core::CMemory::dynamicSize(bars1), - core::CMemory::dynamicSize(bars2) + core::CMemory::dynamicSize(key) + core::CMemory::dynamicSize(value)); + core::CMemory::dynamicSize(bars2) + + core::CMemory::dynamicSize(key) + + core::CMemory::dynamicSize(value)); } { SBar key; @@ -414,9 +464,12 @@ void CMemoryUsageTest::testUsage() { LOG_DEBUG(<< "*** TBarBarUMap ***"); LOG_DEBUG(<< "dynamic size = " << core::CMemory::dynamicSize(bars1)); LOG_DEBUG(<< "expected dynamic size = " - << core::CMemory::dynamicSize(bars2) + core::CMemory::dynamicSize(key) + core::CMemory::dynamicSize(value)); + << core::CMemory::dynamicSize(bars2) + core::CMemory::dynamicSize(key) + + core::CMemory::dynamicSize(value)); CPPUNIT_ASSERT_EQUAL(core::CMemory::dynamicSize(bars1), - core::CMemory::dynamicSize(bars2) + core::CMemory::dynamicSize(key) + core::CMemory::dynamicSize(value)); + core::CMemory::dynamicSize(bars2) + + core::CMemory::dynamicSize(key) + + core::CMemory::dynamicSize(value)); } { SBar key; @@ -437,9 +490,12 @@ void CMemoryUsageTest::testUsage() { LOG_DEBUG(<< "*** TBarBarFMap ***"); LOG_DEBUG(<< "dynamic size = " << core::CMemory::dynamicSize(bars1)); LOG_DEBUG(<< "expected dynamic size = " - << core::CMemory::dynamicSize(bars2) + core::CMemory::dynamicSize(key) + core::CMemory::dynamicSize(value)); + << core::CMemory::dynamicSize(bars2) + core::CMemory::dynamicSize(key) + + core::CMemory::dynamicSize(value)); CPPUNIT_ASSERT_EQUAL(core::CMemory::dynamicSize(bars1), - core::CMemory::dynamicSize(bars2) + core::CMemory::dynamicSize(key) + core::CMemory::dynamicSize(value)); + core::CMemory::dynamicSize(bars2) + + core::CMemory::dynamicSize(key) + + core::CMemory::dynamicSize(value)); } { SBar value; @@ -449,8 +505,10 @@ void CMemoryUsageTest::testUsage() { LOG_DEBUG(<< "*** TBarPtr ***"); LOG_DEBUG(<< "dynamic size = " << core::CMemory::dynamicSize(pointer)); - LOG_DEBUG(<< "expected dynamic size = " << sizeof(SBar) + sizeof(SFoo) * value.s_State.capacity()); - CPPUNIT_ASSERT_EQUAL(core::CMemory::dynamicSize(pointer), sizeof(SBar) + sizeof(SFoo) * value.s_State.capacity()); + LOG_DEBUG(<< "expected dynamic size = " + << sizeof(SBar) + sizeof(SFoo) * value.s_State.capacity()); + CPPUNIT_ASSERT_EQUAL(core::CMemory::dynamicSize(pointer), + sizeof(SBar) + sizeof(SFoo) * value.s_State.capacity()); } { @@ -467,7 +525,8 @@ void CMemoryUsageTest::testUsage() { variables.push_back(b); LOG_DEBUG(<< "wrong dynamic size = " << core::CMemory::dynamicSize(variables)); - CPPUNIT_ASSERT_EQUAL(variables.capacity() * sizeof(std::size_t), core::CMemory::dynamicSize(variables)); + CPPUNIT_ASSERT_EQUAL(variables.capacity() * sizeof(std::size_t), + core::CMemory::dynamicSize(variables)); core::CMemory::CAnyVisitor& visitor = core::CMemory::anyVisitor(); visitor.registerCallback(); @@ -475,10 +534,12 @@ void CMemoryUsageTest::testUsage() { LOG_DEBUG(<< "dynamic size = " << core::CMemory::dynamicSize(variables)); LOG_DEBUG(<< "expected dynamic size = " - << variables.capacity() * sizeof(std::size_t) + sizeof(a) + core::CMemory::dynamicSize(a) + sizeof(b) + + << variables.capacity() * sizeof(std::size_t) + sizeof(a) + + core::CMemory::dynamicSize(a) + sizeof(b) + core::CMemory::dynamicSize(b)); - CPPUNIT_ASSERT_EQUAL(variables.capacity() * sizeof(std::size_t) + sizeof(a) + core::CMemory::dynamicSize(a) + sizeof(b) + - core::CMemory::dynamicSize(b), + CPPUNIT_ASSERT_EQUAL(variables.capacity() * sizeof(std::size_t) + + sizeof(a) + core::CMemory::dynamicSize(a) + + sizeof(b) + core::CMemory::dynamicSize(b), core::CMemory::dynamicSize(variables)); core::CMemoryDebug::CAnyVisitor& debugVisitor = core::CMemoryDebug::anyVisitor(); @@ -511,7 +572,8 @@ void CMemoryUsageTest::testUsage() { mem.print(ss); LOG_TRACE(<< ss.str()); } - CPPUNIT_ASSERT(core::CMemory::dynamicSize(*base) < core::CMemory::dynamicSize(*derived)); + CPPUNIT_ASSERT(core::CMemory::dynamicSize(*base) < + core::CMemory::dynamicSize(*derived)); TBasePtr sharedBase(new CBase(10)); TBasePtr sharedDerived(new CDerived(10)); @@ -548,19 +610,23 @@ void CMemoryUsageTest::testUsage() { CPPUNIT_ASSERT_EQUAL(base.memoryUsage(), core::CMemory::dynamicSize(base)); CBase* basePtr = new CBase(5); - CPPUNIT_ASSERT_EQUAL(basePtr->memoryUsage() + sizeof(*basePtr), core::CMemory::dynamicSize(basePtr)); + CPPUNIT_ASSERT_EQUAL(basePtr->memoryUsage() + sizeof(*basePtr), + core::CMemory::dynamicSize(basePtr)); CDerived derived(6); CPPUNIT_ASSERT_EQUAL(derived.memoryUsage(), core::CMemory::dynamicSize(derived)); CDerived* derivedPtr = new CDerived(5); - CPPUNIT_ASSERT_EQUAL(derivedPtr->memoryUsage() + sizeof(*derivedPtr), core::CMemory::dynamicSize(derivedPtr)); + CPPUNIT_ASSERT_EQUAL(derivedPtr->memoryUsage() + sizeof(*derivedPtr), + core::CMemory::dynamicSize(derivedPtr)); CBase* basederivedPtr = new CDerived(5); - CPPUNIT_ASSERT_EQUAL(basederivedPtr->memoryUsage() + sizeof(CDerived), core::CMemory::dynamicSize(basederivedPtr)); + CPPUNIT_ASSERT_EQUAL(basederivedPtr->memoryUsage() + sizeof(CDerived), + core::CMemory::dynamicSize(basederivedPtr)); TBasePtr sPtr(new CDerived(6)); - CPPUNIT_ASSERT_EQUAL(sPtr->memoryUsage() + sizeof(CDerived), core::CMemory::dynamicSize(sPtr)); + CPPUNIT_ASSERT_EQUAL(sPtr->memoryUsage() + sizeof(CDerived), + core::CMemory::dynamicSize(sPtr)); } { TDerivedVec vec; @@ -613,7 +679,8 @@ void CMemoryUsageTest::testDebug() { sbar.s_State.push_back(SFoo(i)); sbarDebug.s_State.push_back(SFoo(i)); sbarVectorDebug.s_State.push_back(SFooWithMemoryUsage(i)); - LOG_TRACE(<< "SFooWithMemoryUsage usage: " << sbarVectorDebug.s_State.back().memoryUsage()); + LOG_TRACE(<< "SFooWithMemoryUsage usage: " + << sbarVectorDebug.s_State.back().memoryUsage()); } CPPUNIT_ASSERT_EQUAL(sbar.memoryUsage(), sbarDebug.memoryUsage()); CPPUNIT_ASSERT_EQUAL(sbar.memoryUsage(), sbarVectorDebug.memoryUsage()); @@ -632,7 +699,8 @@ void CMemoryUsageTest::testDebug() { std::ostringstream ss; memoryUsage.print(ss); LOG_TRACE(<< "SBarVectorDebug: " + ss.str()); - LOG_TRACE(<< "memoryUsage: " << sbarVectorDebug.memoryUsage() << ", debugUsage: " << memoryUsage.usage()); + LOG_TRACE(<< "memoryUsage: " << sbarVectorDebug.memoryUsage() + << ", debugUsage: " << memoryUsage.usage()); CPPUNIT_ASSERT_EQUAL(sbarVectorDebug.memoryUsage(), memoryUsage.usage()); } } @@ -649,7 +717,8 @@ void CMemoryUsageTest::testDebug() { core::CMemoryDebug::dynamicSize("TBarVecPtr", t, memoryUsage.addChild()); std::ostringstream ss; memoryUsage.print(ss); - LOG_TRACE(<< "TBarVecPtr usage: " << core::CMemory::dynamicSize(t) << ", debug: " << memoryUsage.usage()); + LOG_TRACE(<< "TBarVecPtr usage: " << core::CMemory::dynamicSize(t) + << ", debug: " << memoryUsage.usage()); LOG_TRACE(<< ss.str()); CPPUNIT_ASSERT_EQUAL(core::CMemory::dynamicSize(t), memoryUsage.usage()); } @@ -671,13 +740,15 @@ void CMemoryUsageTest::testDebug() { vec2->push_back(SBar(33)); t.push_back(TFeatureBarVecPtrPr(E_IndividualCountByBucketAndPerson, vec)); - t.push_back(TFeatureBarVecPtrPr(E_IndividualHighCountsByBucketAndPerson, TBarVecPtr())); + t.push_back(TFeatureBarVecPtrPr(E_IndividualHighCountsByBucketAndPerson, + TBarVecPtr())); core::CMemoryUsage memoryUsage; memoryUsage.setName("test", 0); core::CMemoryDebug::dynamicSize("TFeatureBarVecPtrPrVec", t, memoryUsage.addChild()); std::ostringstream ss; memoryUsage.print(ss); - LOG_TRACE(<< "TFeatureBarVecPtrPrVec usage: " << core::CMemory::dynamicSize(t) << ", debug: " << memoryUsage.usage()); + LOG_TRACE(<< "TFeatureBarVecPtrPrVec usage: " << core::CMemory::dynamicSize(t) + << ", debug: " << memoryUsage.usage()); LOG_TRACE(<< ss.str()); CPPUNIT_ASSERT_EQUAL(core::CMemory::dynamicSize(t), memoryUsage.usage()); } @@ -812,8 +883,10 @@ void CMemoryUsageTest::testStringBehaviour() { std::string empty1; std::string empty2; - LOG_INFO(<< "Two independently constructed empty strings have data at " << static_cast(empty1.data()) << " and " - << static_cast(empty2.data()) << " and capacity " << empty1.capacity()); + LOG_INFO(<< "Two independently constructed empty strings have data at " + << static_cast(empty1.data()) << " and " + << static_cast(empty2.data()) << " and capacity " + << empty1.capacity()); if (empty1.data() == empty2.data()) { LOG_INFO(<< "All strings constructed empty probably share the same " "representation on this platform"); @@ -824,18 +897,21 @@ void CMemoryUsageTest::testStringBehaviour() { std::string something3; something3 = something2; - LOG_INFO(<< "Non-empty string has data at " << static_cast(something1.data()) << " length " << something1.length() - << " and capacity " << something1.capacity()); + LOG_INFO(<< "Non-empty string has data at " + << static_cast(something1.data()) << " length " + << something1.length() << " and capacity " << something1.capacity()); - LOG_INFO(<< "Copy constructed string has data at " << static_cast(something2.data()) << " length " << something2.length() - << " and capacity " << something2.capacity()); + LOG_INFO(<< "Copy constructed string has data at " + << static_cast(something2.data()) << " length " + << something2.length() << " and capacity " << something2.capacity()); if (something2.data() == something1.data()) { LOG_INFO(<< "Copy constructor probably has a copy-on-write " "implementation on this platform"); } - LOG_INFO(<< "Assigned string has data at " << static_cast(something3.data()) << " length " << something3.length() - << " and capacity " << something3.capacity()); + LOG_INFO(<< "Assigned string has data at " + << static_cast(something3.data()) << " length " + << something3.length() << " and capacity " << something3.capacity()); if (something3.data() == something2.data()) { LOG_INFO(<< "Assignment operator probably has a copy-on-write " "implementation on this platform"); @@ -843,7 +919,8 @@ void CMemoryUsageTest::testStringBehaviour() { something1.clear(); - LOG_INFO(<< "Cleared string that was copied to two others has data at " << static_cast(something1.data()) << " length " + LOG_INFO(<< "Cleared string that was copied to two others has data at " + << static_cast(something1.data()) << " length " << something1.length() << " and capacity " << something1.capacity()); if (something1.data() == empty1.data()) { LOG_INFO(<< "Cleared strings revert to shared empty representation on " @@ -854,8 +931,8 @@ void CMemoryUsageTest::testStringBehaviour() { LOG_INFO(<< "String that was copied to another then assigned an empty string " "has data at " - << static_cast(something2.data()) << " length " << something2.length() << " and capacity " - << something2.capacity()); + << static_cast(something2.data()) << " length " + << something2.length() << " and capacity " << something2.capacity()); if (something2.data() == empty1.data()) { LOG_INFO(<< "Strings that have an empty constructed string assigned to " "them share the same representation as other empty " @@ -864,28 +941,33 @@ void CMemoryUsageTest::testStringBehaviour() { std::string uncopied("uncopied"); - LOG_INFO(<< "Non-empty uncopied string has data at " << static_cast(uncopied.data()) << " length " << uncopied.length() - << " and capacity " << uncopied.capacity()); + LOG_INFO(<< "Non-empty uncopied string has data at " + << static_cast(uncopied.data()) << " length " + << uncopied.length() << " and capacity " << uncopied.capacity()); uncopied.clear(); - LOG_INFO(<< "Cleared uncopied string has data at " << static_cast(uncopied.data()) << " length " << uncopied.length() - << " and capacity " << uncopied.capacity()); + LOG_INFO(<< "Cleared uncopied string has data at " + << static_cast(uncopied.data()) << " length " + << uncopied.length() << " and capacity " << uncopied.capacity()); std::string startSmall("small"); - LOG_INFO(<< "Non-empty small string unchanged since construction has data at " << static_cast(startSmall.data()) - << " length " << startSmall.length() << " and capacity " << startSmall.capacity()); + LOG_INFO(<< "Non-empty small string unchanged since construction has data at " + << static_cast(startSmall.data()) << " length " + << startSmall.length() << " and capacity " << startSmall.capacity()); startSmall.reserve(100); size_t capacity100(startSmall.capacity()); - LOG_INFO(<< "Small string after reserving 100 bytes has data at " << static_cast(startSmall.data()) << " length " + LOG_INFO(<< "Small string after reserving 100 bytes has data at " + << static_cast(startSmall.data()) << " length " << startSmall.length() << " and capacity " << startSmall.capacity()); startSmall.reserve(10); - LOG_INFO(<< "Small string after reserving 10 bytes has data at " << static_cast(startSmall.data()) << " length " + LOG_INFO(<< "Small string after reserving 10 bytes has data at " + << static_cast(startSmall.data()) << " length " << startSmall.length() << " and capacity " << startSmall.capacity()); if (startSmall.capacity() < capacity100) { LOG_INFO(<< "On this platform reservations can reduce string capacity"); @@ -895,19 +977,22 @@ void CMemoryUsageTest::testStringBehaviour() { // the short string optimisation (if it's being used) std::string startLong("this_string_is_longer_than_one_that_will_take_advantage_of_the_small_string_optimisation"); - LOG_INFO(<< "Long string after initial construction has data at " << static_cast(startLong.data()) << " length " + LOG_INFO(<< "Long string after initial construction has data at " + << static_cast(startLong.data()) << " length " << startLong.length() << " and capacity " << startLong.capacity()); startLong.reserve(10000); size_t capacity10000(startLong.capacity()); - LOG_INFO(<< "Long string after reserving 10000 bytes has data at " << static_cast(startLong.data()) << " length " + LOG_INFO(<< "Long string after reserving 10000 bytes has data at " + << static_cast(startLong.data()) << " length " << startLong.length() << " and capacity " << startLong.capacity()); startLong.clear(); - LOG_INFO(<< "Long string after clearing has data at " << static_cast(startLong.data()) << " length " << startLong.length() - << " and capacity " << startLong.capacity()); + LOG_INFO(<< "Long string after clearing has data at " + << static_cast(startLong.data()) << " length " + << startLong.length() << " and capacity " << startLong.capacity()); if (startLong.capacity() < capacity10000) { LOG_INFO(<< "On this platform clearing can reduce string capacity"); } @@ -922,20 +1007,24 @@ void CMemoryUsageTest::testStringBehaviour() { } } - LOG_INFO(<< "Capacities during growth from 0 to 50000 characters are: " << core::CContainerPrinter::print(capacities)); + LOG_INFO(<< "Capacities during growth from 0 to 50000 characters are: " + << core::CContainerPrinter::print(capacities)); std::string toBeShrunk(100, 'a'); toBeShrunk = "a lot smaller than it was"; size_t preShrinkCapacity(toBeShrunk.capacity()); - LOG_INFO(<< "String to be shrunk has starting size " << toBeShrunk.size() << " and capacity " << preShrinkCapacity); + LOG_INFO(<< "String to be shrunk has starting size " << toBeShrunk.size() + << " and capacity " << preShrinkCapacity); std::string(toBeShrunk).swap(toBeShrunk); size_t postShrinkCapacity(toBeShrunk.capacity()); - LOG_INFO(<< "String to be shrunk has post-shrink size " << toBeShrunk.size() << " and capacity " << postShrinkCapacity); + LOG_INFO(<< "String to be shrunk has post-shrink size " << toBeShrunk.size() + << " and capacity " << postShrinkCapacity); - LOG_INFO(<< "The swap() trick to reduce capacity " << ((postShrinkCapacity < preShrinkCapacity) ? "works" : "DOESN'T WORK!")); + LOG_INFO(<< "The swap() trick to reduce capacity " + << ((postShrinkCapacity < preShrinkCapacity) ? "works" : "DOESN'T WORK!")); } void CMemoryUsageTest::testStringMemory() { @@ -950,7 +1039,8 @@ void CMemoryUsageTest::testStringMemory() { trackingString.push_back(static_cast('a' + j)); normalString.push_back(static_cast('a' + j)); } - LOG_DEBUG(<< "String size " << core::CMemory::dynamicSize(normalString) << ", allocated " << TAllocator::usage()); + LOG_DEBUG(<< "String size " << core::CMemory::dynamicSize(normalString) + << ", allocated " << TAllocator::usage()); CPPUNIT_ASSERT_EQUAL(core::CMemory::dynamicSize(normalString), TAllocator::usage()); } } @@ -1017,16 +1107,21 @@ void CMemoryUsageTest::testSharedPointer() { // vec2: 8 (capacity) * 16 (shared_ptr element size) // = 688 - std::size_t expectedSize = vec1.capacity() * sizeof(TIntVecPtr) + vec2.capacity() * sizeof(TIntVecPtr) + 3 * sizeof(TIntVec) + - (vec1[0]->capacity() + vec1[1]->capacity() + vec1[3]->capacity()) * sizeof(int); + std::size_t expectedSize = + vec1.capacity() * sizeof(TIntVecPtr) + + vec2.capacity() * sizeof(TIntVecPtr) + 3 * sizeof(TIntVec) + + (vec1[0]->capacity() + vec1[1]->capacity() + vec1[3]->capacity()) * sizeof(int); - LOG_DEBUG(<< "Expected: " << expectedSize << ", actual: " << (core::CMemory::dynamicSize(vec1) + core::CMemory::dynamicSize(vec2))); + LOG_DEBUG(<< "Expected: " << expectedSize << ", actual: " + << (core::CMemory::dynamicSize(vec1) + core::CMemory::dynamicSize(vec2))); - CPPUNIT_ASSERT_EQUAL(expectedSize, core::CMemory::dynamicSize(vec1) + core::CMemory::dynamicSize(vec2)); + CPPUNIT_ASSERT_EQUAL(expectedSize, core::CMemory::dynamicSize(vec1) + + core::CMemory::dynamicSize(vec2)); TStrPtrVec svec1; svec1.push_back(TStrPtr(new std::string("This is a string"))); - svec1.push_back(TStrPtr(new std::string("Here is some more string data, a little longer than the previous one"))); + svec1.push_back(TStrPtr(new std::string( + "Here is some more string data, a little longer than the previous one"))); svec1.push_back(TStrPtr(new std::string("An uninteresting string, this one!"))); TStrPtrVec svec2; @@ -1034,15 +1129,18 @@ void CMemoryUsageTest::testSharedPointer() { svec2.push_back(TStrPtr()); svec2.push_back(TStrPtr()); - long stringSizeBefore = core::CMemory::dynamicSize(svec1) + core::CMemory::dynamicSize(svec2); + long stringSizeBefore = core::CMemory::dynamicSize(svec1) + + core::CMemory::dynamicSize(svec2); svec2[0] = svec1[2]; svec2[1] = svec1[0]; svec2[2] = svec1[1]; - long stringSizeAfter = core::CMemory::dynamicSize(svec1) + core::CMemory::dynamicSize(svec2); + long stringSizeAfter = core::CMemory::dynamicSize(svec1) + + core::CMemory::dynamicSize(svec2); - CPPUNIT_ASSERT_EQUAL(core::CMemory::dynamicSize(svec1), core::CMemory::dynamicSize(svec2)); + CPPUNIT_ASSERT_EQUAL(core::CMemory::dynamicSize(svec1), + core::CMemory::dynamicSize(svec2)); // Allow for integer rounding off by 1 for each string CPPUNIT_ASSERT(std::abs(stringSizeBefore - stringSizeAfter) < 4); } @@ -1077,7 +1175,8 @@ void CMemoryUsageTest::testSmallVector() { TDouble1Vec vec1(size); TDouble6Vec vec2(size); TDouble9Vec vec3(size); - TSizeVec memory{core::CMemory::dynamicSize(vec1), core::CMemory::dynamicSize(vec2), core::CMemory::dynamicSize(vec3)}; + TSizeVec memory{core::CMemory::dynamicSize(vec1), core::CMemory::dynamicSize(vec2), + core::CMemory::dynamicSize(vec3)}; // These assertions hold because the vectors never shrink CPPUNIT_ASSERT(size > 2 || memory[0] == 0); CPPUNIT_ASSERT(memory[0] == 0 || memory[0] == vec1.capacity() * sizeof(double)); @@ -1116,22 +1215,27 @@ void CMemoryUsageTest::testSmallVector() { CppUnit::Test* CMemoryUsageTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CMemoryUsageTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CMemoryUsageTest::testUsage", &CMemoryUsageTest::testUsage)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMemoryUsageTest::testDebug", &CMemoryUsageTest::testDebug)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMemoryUsageTest::testDynamicSizeAlwaysZero", - &CMemoryUsageTest::testDynamicSizeAlwaysZero)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMemoryUsageTest::testCompress", &CMemoryUsageTest::testCompress)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CMemoryUsageTest::testStringBehaviour", &CMemoryUsageTest::testStringBehaviour)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CMemoryUsageTest::testStringMemory", &CMemoryUsageTest::testStringMemory)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CMemoryUsageTest::testStringClear", &CMemoryUsageTest::testStringClear)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CMemoryUsageTest::testSharedPointer", &CMemoryUsageTest::testSharedPointer)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMemoryUsageTest::testRawPointer", &CMemoryUsageTest::testRawPointer)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CMemoryUsageTest::testSmallVector", &CMemoryUsageTest::testSmallVector)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMemoryUsageTest::testUsage", &CMemoryUsageTest::testUsage)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMemoryUsageTest::testDebug", &CMemoryUsageTest::testDebug)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMemoryUsageTest::testDynamicSizeAlwaysZero", + &CMemoryUsageTest::testDynamicSizeAlwaysZero)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMemoryUsageTest::testCompress", &CMemoryUsageTest::testCompress)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMemoryUsageTest::testStringBehaviour", &CMemoryUsageTest::testStringBehaviour)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMemoryUsageTest::testStringMemory", &CMemoryUsageTest::testStringMemory)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMemoryUsageTest::testStringClear", &CMemoryUsageTest::testStringClear)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMemoryUsageTest::testSharedPointer", &CMemoryUsageTest::testSharedPointer)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMemoryUsageTest::testRawPointer", &CMemoryUsageTest::testRawPointer)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMemoryUsageTest::testSmallVector", &CMemoryUsageTest::testSmallVector)); return suiteOfTests; } diff --git a/lib/core/unittest/CMessageBufferTest.cc b/lib/core/unittest/CMessageBufferTest.cc index 4974fcb37e..2e4dd693cf 100644 --- a/lib/core/unittest/CMessageBufferTest.cc +++ b/lib/core/unittest/CMessageBufferTest.cc @@ -13,7 +13,8 @@ CppUnit::Test* CMessageBufferTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CMessageBufferTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CMessageBufferTest::testAll", &CMessageBufferTest::testAll)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMessageBufferTest::testAll", &CMessageBufferTest::testAll)); return suiteOfTests; } diff --git a/lib/core/unittest/CMessageQueueTest.cc b/lib/core/unittest/CMessageQueueTest.cc index 229ed9a682..798efcc867 100644 --- a/lib/core/unittest/CMessageQueueTest.cc +++ b/lib/core/unittest/CMessageQueueTest.cc @@ -16,9 +16,10 @@ CppUnit::Test* CMessageQueueTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CMessageQueueTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CMessageQueueTest::testSendReceive", &CMessageQueueTest::testSendReceive)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMessageQueueTest::testTiming", &CMessageQueueTest::testTiming)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMessageQueueTest::testSendReceive", &CMessageQueueTest::testSendReceive)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMessageQueueTest::testTiming", &CMessageQueueTest::testTiming)); return suiteOfTests; } @@ -99,7 +100,8 @@ void CMessageQueueTest::testTiming() { CPPUNIT_ASSERT_EQUAL(TEST_SIZE, receiver.size()); double avgProcTimeSec(queue.rollingAverageProcessingTime()); - LOG_DEBUG(<< "Average processing time per item for the last " << NUM_TO_TIME << " items was " << avgProcTimeSec << " seconds"); + LOG_DEBUG(<< "Average processing time per item for the last " << NUM_TO_TIME + << " items was " << avgProcTimeSec << " seconds"); // The high side tolerance is greater here, because although the sleep will // make up the bulk of the processing time, there is some other processing diff --git a/lib/core/unittest/CMonotonicTimeTest.cc b/lib/core/unittest/CMonotonicTimeTest.cc index 4554418f1f..feb232b88d 100644 --- a/lib/core/unittest/CMonotonicTimeTest.cc +++ b/lib/core/unittest/CMonotonicTimeTest.cc @@ -12,10 +12,10 @@ CppUnit::Test* CMonotonicTimeTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CMonotonicTimeTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CMonotonicTimeTest::testMilliseconds", &CMonotonicTimeTest::testMilliseconds)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CMonotonicTimeTest::testNanoseconds", &CMonotonicTimeTest::testNanoseconds)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMonotonicTimeTest::testMilliseconds", &CMonotonicTimeTest::testMilliseconds)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMonotonicTimeTest::testNanoseconds", &CMonotonicTimeTest::testNanoseconds)); return suiteOfTests; } @@ -30,7 +30,8 @@ void CMonotonicTimeTest::testMilliseconds() { uint64_t end(monoTime.milliseconds()); uint64_t diff(end - start); - LOG_DEBUG(<< "During 1 second the monotonic millisecond timer advanced by " << diff << " milliseconds"); + LOG_DEBUG(<< "During 1 second the monotonic millisecond timer advanced by " + << diff << " milliseconds"); // Allow 10% margin of error - this is as much for the sleep as the timer CPPUNIT_ASSERT(diff > 900); @@ -47,7 +48,8 @@ void CMonotonicTimeTest::testNanoseconds() { uint64_t end(monoTime.nanoseconds()); uint64_t diff(end - start); - LOG_DEBUG(<< "During 1 second the monotonic nanosecond timer advanced by " << diff << " nanoseconds"); + LOG_DEBUG(<< "During 1 second the monotonic nanosecond timer advanced by " + << diff << " nanoseconds"); // Allow 10% margin of error - this is as much for the sleep as the timer CPPUNIT_ASSERT(diff > 900000000); diff --git a/lib/core/unittest/CMutexTest.cc b/lib/core/unittest/CMutexTest.cc index 25505af60e..fc690cb993 100644 --- a/lib/core/unittest/CMutexTest.cc +++ b/lib/core/unittest/CMutexTest.cc @@ -10,7 +10,8 @@ CppUnit::Test* CMutexTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CMutexTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CMutexTest::testRecursive", &CMutexTest::testRecursive)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMutexTest::testRecursive", &CMutexTest::testRecursive)); return suiteOfTests; } diff --git a/lib/core/unittest/CNamedPipeFactoryTest.cc b/lib/core/unittest/CNamedPipeFactoryTest.cc index 20f46af3ec..10fe1bc181 100644 --- a/lib/core/unittest/CNamedPipeFactoryTest.cc +++ b/lib/core/unittest/CNamedPipeFactoryTest.cc @@ -33,7 +33,8 @@ const char* TEST_PIPE_NAME = "testfiles/testpipe"; class CThreadDataWriter : public ml::core::CThread { public: - CThreadDataWriter(const std::string& fileName, size_t size) : m_FileName(fileName), m_Size(size) {} + CThreadDataWriter(const std::string& fileName, size_t size) + : m_FileName(fileName), m_Size(size) {} protected: virtual void run() { @@ -104,7 +105,8 @@ class CThreadDataReader : public ml::core::CThread { class CThreadBlockCanceller : public ml::core::CThread { public: - CThreadBlockCanceller(ml::core::CThread::TThreadId threadId) : m_ThreadId(threadId) {} + CThreadBlockCanceller(ml::core::CThread::TThreadId threadId) + : m_ThreadId(threadId) {} protected: virtual void run() { @@ -125,20 +127,23 @@ class CThreadBlockCanceller : public ml::core::CThread { CppUnit::Test* CNamedPipeFactoryTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CNamedPipeFactoryTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CNamedPipeFactoryTest::testServerIsCppReader", - &CNamedPipeFactoryTest::testServerIsCppReader)); - suiteOfTests->addTest(new CppUnit::TestCaller("CNamedPipeFactoryTest::testServerIsCReader", - &CNamedPipeFactoryTest::testServerIsCReader)); - suiteOfTests->addTest(new CppUnit::TestCaller("CNamedPipeFactoryTest::testServerIsCppWriter", - &CNamedPipeFactoryTest::testServerIsCppWriter)); - suiteOfTests->addTest(new CppUnit::TestCaller("CNamedPipeFactoryTest::testServerIsCWriter", - &CNamedPipeFactoryTest::testServerIsCWriter)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CNamedPipeFactoryTest::testCancelBlock", &CNamedPipeFactoryTest::testCancelBlock)); - suiteOfTests->addTest(new CppUnit::TestCaller("CNamedPipeFactoryTest::testErrorIfRegularFile", - &CNamedPipeFactoryTest::testErrorIfRegularFile)); - suiteOfTests->addTest(new CppUnit::TestCaller("CNamedPipeFactoryTest::testErrorIfSymlink", - &CNamedPipeFactoryTest::testErrorIfSymlink)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CNamedPipeFactoryTest::testServerIsCppReader", + &CNamedPipeFactoryTest::testServerIsCppReader)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CNamedPipeFactoryTest::testServerIsCReader", &CNamedPipeFactoryTest::testServerIsCReader)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CNamedPipeFactoryTest::testServerIsCppWriter", + &CNamedPipeFactoryTest::testServerIsCppWriter)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CNamedPipeFactoryTest::testServerIsCWriter", &CNamedPipeFactoryTest::testServerIsCWriter)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CNamedPipeFactoryTest::testCancelBlock", &CNamedPipeFactoryTest::testCancelBlock)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CNamedPipeFactoryTest::testErrorIfRegularFile", + &CNamedPipeFactoryTest::testErrorIfRegularFile)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CNamedPipeFactoryTest::testErrorIfSymlink", &CNamedPipeFactoryTest::testErrorIfSymlink)); return suiteOfTests; } @@ -147,7 +152,8 @@ void CNamedPipeFactoryTest::testServerIsCppReader() { CThreadDataWriter threadWriter(TEST_PIPE_NAME, TEST_SIZE); CPPUNIT_ASSERT(threadWriter.start()); - ml::core::CNamedPipeFactory::TIStreamP strm = ml::core::CNamedPipeFactory::openPipeStreamRead(TEST_PIPE_NAME); + ml::core::CNamedPipeFactory::TIStreamP strm = + ml::core::CNamedPipeFactory::openPipeStreamRead(TEST_PIPE_NAME); CPPUNIT_ASSERT(strm); static const std::streamsize BUF_SIZE = 512; @@ -173,7 +179,8 @@ void CNamedPipeFactoryTest::testServerIsCReader() { CThreadDataWriter threadWriter(TEST_PIPE_NAME, TEST_SIZE); CPPUNIT_ASSERT(threadWriter.start()); - ml::core::CNamedPipeFactory::TFileP file = ml::core::CNamedPipeFactory::openPipeFileRead(TEST_PIPE_NAME); + ml::core::CNamedPipeFactory::TFileP file = + ml::core::CNamedPipeFactory::openPipeFileRead(TEST_PIPE_NAME); CPPUNIT_ASSERT(file); static const size_t BUF_SIZE = 512; @@ -199,7 +206,8 @@ void CNamedPipeFactoryTest::testServerIsCppWriter() { CThreadDataReader threadReader(TEST_PIPE_NAME); CPPUNIT_ASSERT(threadReader.start()); - ml::core::CNamedPipeFactory::TOStreamP strm = ml::core::CNamedPipeFactory::openPipeStreamWrite(TEST_PIPE_NAME); + ml::core::CNamedPipeFactory::TOStreamP strm = + ml::core::CNamedPipeFactory::openPipeStreamWrite(TEST_PIPE_NAME); CPPUNIT_ASSERT(strm); size_t charsLeft(TEST_SIZE); @@ -225,7 +233,8 @@ void CNamedPipeFactoryTest::testServerIsCWriter() { CThreadDataReader threadReader(TEST_PIPE_NAME); CPPUNIT_ASSERT(threadReader.start()); - ml::core::CNamedPipeFactory::TFileP file = ml::core::CNamedPipeFactory::openPipeFileWrite(TEST_PIPE_NAME); + ml::core::CNamedPipeFactory::TFileP file = + ml::core::CNamedPipeFactory::openPipeFileWrite(TEST_PIPE_NAME); CPPUNIT_ASSERT(file); size_t charsLeft(TEST_SIZE); @@ -250,14 +259,16 @@ void CNamedPipeFactoryTest::testCancelBlock() { CThreadBlockCanceller cancellerThread(ml::core::CThread::currentThreadId()); CPPUNIT_ASSERT(cancellerThread.start()); - ml::core::CNamedPipeFactory::TOStreamP strm = ml::core::CNamedPipeFactory::openPipeStreamWrite(TEST_PIPE_NAME); + ml::core::CNamedPipeFactory::TOStreamP strm = + ml::core::CNamedPipeFactory::openPipeStreamWrite(TEST_PIPE_NAME); CPPUNIT_ASSERT(strm == nullptr); CPPUNIT_ASSERT(cancellerThread.stop()); } void CNamedPipeFactoryTest::testErrorIfRegularFile() { - ml::core::CNamedPipeFactory::TIStreamP strm = ml::core::CNamedPipeFactory::openPipeStreamRead("Main.cc"); + ml::core::CNamedPipeFactory::TIStreamP strm = + ml::core::CNamedPipeFactory::openPipeStreamRead("Main.cc"); CPPUNIT_ASSERT(strm == nullptr); } @@ -278,7 +289,8 @@ void CNamedPipeFactoryTest::testErrorIfSymlink() { CPPUNIT_ASSERT_EQUAL(0, ::mkfifo(TEST_PIPE_NAME, S_IRUSR | S_IWUSR)); CPPUNIT_ASSERT_EQUAL(0, ::symlink(TEST_PIPE_NAME, TEST_SYMLINK_NAME)); - ml::core::CNamedPipeFactory::TIStreamP strm = ml::core::CNamedPipeFactory::openPipeStreamRead(TEST_SYMLINK_NAME); + ml::core::CNamedPipeFactory::TIStreamP strm = + ml::core::CNamedPipeFactory::openPipeStreamRead(TEST_SYMLINK_NAME); CPPUNIT_ASSERT(strm == nullptr); CPPUNIT_ASSERT_EQUAL(0, ::unlink(TEST_SYMLINK_NAME)); diff --git a/lib/core/unittest/COsFileFuncsTest.cc b/lib/core/unittest/COsFileFuncsTest.cc index 12a0cb91c7..cdef30c6b2 100644 --- a/lib/core/unittest/COsFileFuncsTest.cc +++ b/lib/core/unittest/COsFileFuncsTest.cc @@ -19,8 +19,10 @@ CppUnit::Test* COsFileFuncsTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("COsFileFuncsTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("COsFileFuncsTest::testInode", &COsFileFuncsTest::testInode)); - suiteOfTests->addTest(new CppUnit::TestCaller("COsFileFuncsTest::testLStat", &COsFileFuncsTest::testLStat)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "COsFileFuncsTest::testInode", &COsFileFuncsTest::testInode)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "COsFileFuncsTest::testLStat", &COsFileFuncsTest::testLStat)); return suiteOfTests; } @@ -43,7 +45,8 @@ void COsFileFuncsTest::testInode() { ::memset(&statBuf, 0, sizeof(statBuf)); ml::core::COsFileFuncs::TIno headerOpen(0); - int headerFd(ml::core::COsFileFuncs::open(headerFile.c_str(), ml::core::COsFileFuncs::RDONLY)); + int headerFd(ml::core::COsFileFuncs::open(headerFile.c_str(), + ml::core::COsFileFuncs::RDONLY)); CPPUNIT_ASSERT(headerFd != -1); CPPUNIT_ASSERT_EQUAL(0, ml::core::COsFileFuncs::fstat(headerFd, &statBuf)); CPPUNIT_ASSERT_EQUAL(0, ml::core::COsFileFuncs::close(headerFd)); @@ -85,7 +88,8 @@ void COsFileFuncsTest::testLStat() { LOG_WARN(<< "Skipping lstat() test as it would need to run as administrator"); #else #ifdef Windows - CPPUNIT_ASSERT(CreateSymbolicLink(symLink.c_str(), file.c_str(), SYMBOLIC_LINK_FLAG_ALLOW_UNPRIVILEGED_CREATE) != FALSE); + CPPUNIT_ASSERT(CreateSymbolicLink(symLink.c_str(), file.c_str(), + SYMBOLIC_LINK_FLAG_ALLOW_UNPRIVILEGED_CREATE) != FALSE); #else CPPUNIT_ASSERT_EQUAL(0, ::symlink(file.c_str(), symLink.c_str())); #endif @@ -102,7 +106,8 @@ void COsFileFuncsTest::testLStat() { // Due to the way this test is structured, the link should have been created // in the last few seconds (but the linked file, Main.cc, could be older) ml::core_t::TTime now = ml::core::CTimeUtils::now(); - LOG_INFO(<< "now: " << now << ", symlink create time: " << statBuf.st_ctime << ", symlink modification time: " << statBuf.st_mtime + LOG_INFO(<< "now: " << now << ", symlink create time: " << statBuf.st_ctime + << ", symlink modification time: " << statBuf.st_mtime << ", symlink access time: " << statBuf.st_atime); CPPUNIT_ASSERT(statBuf.st_ctime > now - 3); CPPUNIT_ASSERT(statBuf.st_mtime > now - 3); diff --git a/lib/core/unittest/CPatternSetTest.cc b/lib/core/unittest/CPatternSetTest.cc index db45f21f85..b63692691e 100644 --- a/lib/core/unittest/CPatternSetTest.cc +++ b/lib/core/unittest/CPatternSetTest.cc @@ -14,25 +14,35 @@ using namespace core; CppUnit::Test* CPatternSetTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CPatternSetTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CPatternSetTest::testInitFromJson_GivenInvalidJson", - &CPatternSetTest::testInitFromJson_GivenInvalidJson)); - suiteOfTests->addTest(new CppUnit::TestCaller("CPatternSetTest::testInitFromJson_GivenNonArray", - &CPatternSetTest::testInitFromJson_GivenNonArray)); - suiteOfTests->addTest(new CppUnit::TestCaller("CPatternSetTest::testInitFromJson_GivenArrayWithNonStringItem", - &CPatternSetTest::testInitFromJson_GivenArrayWithNonStringItem)); - suiteOfTests->addTest(new CppUnit::TestCaller("CPatternSetTest::testInitFromJson_GivenArrayWithDuplicates", - &CPatternSetTest::testInitFromJson_GivenArrayWithDuplicates)); - suiteOfTests->addTest(new CppUnit::TestCaller("CPatternSetTest::testContains_GivenFullMatchKeys", - &CPatternSetTest::testContains_GivenFullMatchKeys)); - suiteOfTests->addTest(new CppUnit::TestCaller("CPatternSetTest::testContains_GivenPrefixKeys", - &CPatternSetTest::testContains_GivenPrefixKeys)); - suiteOfTests->addTest(new CppUnit::TestCaller("CPatternSetTest::testContains_GivenSuffixKeys", - &CPatternSetTest::testContains_GivenSuffixKeys)); - suiteOfTests->addTest(new CppUnit::TestCaller("CPatternSetTest::testContains_GivenContainsKeys", - &CPatternSetTest::testContains_GivenContainsKeys)); - suiteOfTests->addTest(new CppUnit::TestCaller("CPatternSetTest::testContains_GivenMixedKeys", - &CPatternSetTest::testContains_GivenMixedKeys)); - suiteOfTests->addTest(new CppUnit::TestCaller("CPatternSetTest::testClear", &CPatternSetTest::testClear)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CPatternSetTest::testInitFromJson_GivenInvalidJson", + &CPatternSetTest::testInitFromJson_GivenInvalidJson)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CPatternSetTest::testInitFromJson_GivenNonArray", + &CPatternSetTest::testInitFromJson_GivenNonArray)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CPatternSetTest::testInitFromJson_GivenArrayWithNonStringItem", + &CPatternSetTest::testInitFromJson_GivenArrayWithNonStringItem)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CPatternSetTest::testInitFromJson_GivenArrayWithDuplicates", + &CPatternSetTest::testInitFromJson_GivenArrayWithDuplicates)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CPatternSetTest::testContains_GivenFullMatchKeys", + &CPatternSetTest::testContains_GivenFullMatchKeys)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CPatternSetTest::testContains_GivenPrefixKeys", + &CPatternSetTest::testContains_GivenPrefixKeys)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CPatternSetTest::testContains_GivenSuffixKeys", + &CPatternSetTest::testContains_GivenSuffixKeys)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CPatternSetTest::testContains_GivenContainsKeys", + &CPatternSetTest::testContains_GivenContainsKeys)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CPatternSetTest::testContains_GivenMixedKeys", + &CPatternSetTest::testContains_GivenMixedKeys)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CPatternSetTest::testClear", &CPatternSetTest::testClear)); return suiteOfTests; } diff --git a/lib/core/unittest/CPersistUtilsTest.cc b/lib/core/unittest/CPersistUtilsTest.cc index 4e0b0e5bc7..a72dd36746 100644 --- a/lib/core/unittest/CPersistUtilsTest.cc +++ b/lib/core/unittest/CPersistUtilsTest.cc @@ -72,7 +72,9 @@ struct SFirstLess { }; struct SEqual { - bool operator()(double lhs, double rhs) const { return std::fabs(lhs - rhs) <= 1e-5 * std::max(std::fabs(lhs), std::fabs(rhs)); } + bool operator()(double lhs, double rhs) const { + return std::fabs(lhs - rhs) <= 1e-5 * std::max(std::fabs(lhs), std::fabs(rhs)); + } template bool operator()(T lhs, T rhs) const { @@ -117,7 +119,8 @@ class CCompareImpl { } template - static bool dispatch(const boost::unordered_map& lhs, const boost::unordered_map& rhs) { + static bool dispatch(const boost::unordered_map& lhs, + const boost::unordered_map& rhs) { using TVec = std::vector>; TVec lKeys(lhs.begin(), lhs.end()); TVec rKeys(rhs.begin(), rhs.end()); @@ -127,7 +130,8 @@ class CCompareImpl { } template - static bool dispatch(const boost::unordered_set& lhs, const boost::unordered_set& rhs) { + static bool dispatch(const boost::unordered_set& lhs, + const boost::unordered_set& rhs) { using TVec = std::vector; TVec lKeys(lhs.begin(), lhs.end()); TVec rKeys(rhs.begin(), rhs.end()); @@ -139,7 +143,8 @@ class CCompareImpl { template bool equal(const T& lhs, const T& rhs) { - return lhs.size() == rhs.size() && std::equal(lhs.begin(), lhs.end(), rhs.begin(), SEqual()); + return lhs.size() == rhs.size() && + std::equal(lhs.begin(), lhs.end(), rhs.begin(), SEqual()); } template @@ -422,7 +427,8 @@ void CPersistUtilsTest::testPersistContainers() { CPPUNIT_ASSERT(!core::CPersistUtils::fromString(bad, collection)); CPPUNIT_ASSERT(collection.empty()); - bad = std::string("etjdjk") + core::CPersistUtils::PAIR_DELIMITER + "2.3" + core::CPersistUtils::DELIMITER + bad; + bad = std::string("etjdjk") + core::CPersistUtils::PAIR_DELIMITER + + "2.3" + core::CPersistUtils::DELIMITER + bad; CPPUNIT_ASSERT(!core::CPersistUtils::fromString(bad, collection)); CPPUNIT_ASSERT(collection.empty()); } @@ -505,7 +511,8 @@ void CPersistUtilsTest::testAppend() { CPPUNIT_ASSERT(begin == end); LOG_DEBUG(<< "state = " << state); - core::CPersistUtils::fromString(state, restored, core::CPersistUtils::DELIMITER, core::CPersistUtils::PAIR_DELIMITER, true); + core::CPersistUtils::fromString(state, restored, core::CPersistUtils::DELIMITER, + core::CPersistUtils::PAIR_DELIMITER, true); CPPUNIT_ASSERT(equal(source, restored)); for (int i = 15; i < 19; i++) { @@ -519,7 +526,8 @@ void CPersistUtilsTest::testAppend() { CPPUNIT_ASSERT(begin == end); LOG_DEBUG(<< "state = " << state); - core::CPersistUtils::fromString(state, restored, core::CPersistUtils::DELIMITER, core::CPersistUtils::PAIR_DELIMITER, true); + core::CPersistUtils::fromString(state, restored, core::CPersistUtils::DELIMITER, + core::CPersistUtils::PAIR_DELIMITER, true); CPPUNIT_ASSERT(equal(source, restored)); } @@ -531,7 +539,8 @@ void CPersistUtilsTest::testAppend() { LOG_DEBUG(<< "state = " << state); TSizeDoubleMap restored; - core::CPersistUtils::fromString(state, restored, core::CPersistUtils::DELIMITER, core::CPersistUtils::PAIR_DELIMITER, true); + core::CPersistUtils::fromString(state, restored, core::CPersistUtils::DELIMITER, + core::CPersistUtils::PAIR_DELIMITER, true); CPPUNIT_ASSERT(equal(collection, restored)); for (int i = 0; i < 10; i++) { @@ -540,16 +549,19 @@ void CPersistUtilsTest::testAppend() { state = core::CPersistUtils::toString(collection); LOG_DEBUG(<< "state = " << state); - core::CPersistUtils::fromString(state, restored, core::CPersistUtils::DELIMITER, core::CPersistUtils::PAIR_DELIMITER, true); + core::CPersistUtils::fromString(state, restored, core::CPersistUtils::DELIMITER, + core::CPersistUtils::PAIR_DELIMITER, true); CPPUNIT_ASSERT(equal(collection, restored)); // add another element - std::pair pr = collection.insert(TSizeDoublePr(14, 1.0)); + std::pair pr = + collection.insert(TSizeDoublePr(14, 1.0)); TSizeDoubleMap::iterator end = collection.end(); state = core::CPersistUtils::toString(pr.first, end); LOG_DEBUG(<< "state = " << state); - core::CPersistUtils::fromString(state, restored, core::CPersistUtils::DELIMITER, core::CPersistUtils::PAIR_DELIMITER, true); + core::CPersistUtils::fromString(state, restored, core::CPersistUtils::DELIMITER, + core::CPersistUtils::PAIR_DELIMITER, true); CPPUNIT_ASSERT(equal(collection, restored)); pr = collection.insert(TSizeDoublePr(20, 158.0)); @@ -558,7 +570,8 @@ void CPersistUtilsTest::testAppend() { end = collection.end(); state = core::CPersistUtils::toString(pr.first, end); LOG_DEBUG(<< "state = " << state); - core::CPersistUtils::fromString(state, restored, core::CPersistUtils::DELIMITER, core::CPersistUtils::PAIR_DELIMITER, true); + core::CPersistUtils::fromString(state, restored, core::CPersistUtils::DELIMITER, + core::CPersistUtils::PAIR_DELIMITER, true); CPPUNIT_ASSERT(equal(collection, restored)); } } @@ -566,11 +579,12 @@ void CPersistUtilsTest::testAppend() { CppUnit::Test* CPersistUtilsTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CPersistUtilsTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CPersistUtilsTest::testPersistContainers", &CPersistUtilsTest::testPersistContainers)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CPersistUtilsTest::testPersistIterators", &CPersistUtilsTest::testPersistIterators)); - suiteOfTests->addTest(new CppUnit::TestCaller("CPersistUtilsTest::testAppend", &CPersistUtilsTest::testAppend)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CPersistUtilsTest::testPersistContainers", &CPersistUtilsTest::testPersistContainers)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CPersistUtilsTest::testPersistIterators", &CPersistUtilsTest::testPersistIterators)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CPersistUtilsTest::testAppend", &CPersistUtilsTest::testAppend)); return suiteOfTests; } diff --git a/lib/core/unittest/CPolymorphicStackObjectCPtrTest.cc b/lib/core/unittest/CPolymorphicStackObjectCPtrTest.cc index d021a3ca4b..b4f75ff787 100644 --- a/lib/core/unittest/CPolymorphicStackObjectCPtrTest.cc +++ b/lib/core/unittest/CPolymorphicStackObjectCPtrTest.cc @@ -43,7 +43,8 @@ class CDerived4 : public CBase { void CPolymorphicStackObjectCPtrTest::testAll() { using TStackPtr12 = core::CPolymorphicStackObjectCPtr; - using TStackPtr1234 = core::CPolymorphicStackObjectCPtr; + using TStackPtr1234 = + core::CPolymorphicStackObjectCPtr; TStackPtr12 test1((CDerived1())); CPPUNIT_ASSERT_EQUAL(std::string("d1"), test1->iam()); @@ -77,8 +78,8 @@ void CPolymorphicStackObjectCPtrTest::testAll() { CppUnit::Test* CPolymorphicStackObjectCPtrTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CPolymorphicStackObjectCPtrTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CPolymorphicStackObjectCPtrTest::testAll", - &CPolymorphicStackObjectCPtrTest::testAll)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CPolymorphicStackObjectCPtrTest::testAll", &CPolymorphicStackObjectCPtrTest::testAll)); return suiteOfTests; } diff --git a/lib/core/unittest/CProcessPriorityTest.cc b/lib/core/unittest/CProcessPriorityTest.cc index afc2de5765..f6eda9fef5 100644 --- a/lib/core/unittest/CProcessPriorityTest.cc +++ b/lib/core/unittest/CProcessPriorityTest.cc @@ -10,8 +10,8 @@ CppUnit::Test* CProcessPriorityTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CProcessPriorityTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CProcessPriorityTest::testReducePriority", - &CProcessPriorityTest::testReducePriority)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CProcessPriorityTest::testReducePriority", &CProcessPriorityTest::testReducePriority)); return suiteOfTests; } diff --git a/lib/core/unittest/CProcessPriorityTest_Linux.cc b/lib/core/unittest/CProcessPriorityTest_Linux.cc index 447a16cc0e..49b715ac91 100644 --- a/lib/core/unittest/CProcessPriorityTest_Linux.cc +++ b/lib/core/unittest/CProcessPriorityTest_Linux.cc @@ -51,8 +51,8 @@ bool readFromSystemFile(const std::string& fileName, std::string& content) { CppUnit::Test* CProcessPriorityTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CProcessPriorityTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CProcessPriorityTest::testReducePriority", - &CProcessPriorityTest::testReducePriority)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CProcessPriorityTest::testReducePriority", &CProcessPriorityTest::testReducePriority)); return suiteOfTests; } @@ -69,7 +69,8 @@ void CProcessPriorityTest::testReducePriority() { } if (readFromSystemFile("/proc/self/oom_adj", content) == true) { if (readFromOneOrOther) { - LOG_DEBUG(<< "oom_score_adj 667 corresponds to oom_adj " << content << " on kernel " << ml::core::CUname::release()); + LOG_DEBUG(<< "oom_score_adj 667 corresponds to oom_adj " << content + << " on kernel " << ml::core::CUname::release()); int oomAdj = 0; CPPUNIT_ASSERT(ml::core::CStringUtils::stringToType(content, oomAdj)); // For the kernel versions that support both, there's variation in diff --git a/lib/core/unittest/CProcessTest.cc b/lib/core/unittest/CProcessTest.cc index 47a4024790..5c7595c8a3 100644 --- a/lib/core/unittest/CProcessTest.cc +++ b/lib/core/unittest/CProcessTest.cc @@ -11,7 +11,8 @@ CppUnit::Test* CProcessTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CProcessTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CProcessTest::testPids", &CProcessTest::testPids)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CProcessTest::testPids", &CProcessTest::testPids)); return suiteOfTests; } diff --git a/lib/core/unittest/CProgNameTest.cc b/lib/core/unittest/CProgNameTest.cc index 8a01b91566..26f1127ad8 100644 --- a/lib/core/unittest/CProgNameTest.cc +++ b/lib/core/unittest/CProgNameTest.cc @@ -12,8 +12,10 @@ CppUnit::Test* CProgNameTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CProgNameTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CProgNameTest::testProgName", &CProgNameTest::testProgName)); - suiteOfTests->addTest(new CppUnit::TestCaller("CProgNameTest::testProgDir", &CProgNameTest::testProgDir)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CProgNameTest::testProgName", &CProgNameTest::testProgName)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CProgNameTest::testProgDir", &CProgNameTest::testProgDir)); return suiteOfTests; } diff --git a/lib/core/unittest/CRapidJsonLineWriterTest.cc b/lib/core/unittest/CRapidJsonLineWriterTest.cc index 90141e7b66..962c4c37cf 100644 --- a/lib/core/unittest/CRapidJsonLineWriterTest.cc +++ b/lib/core/unittest/CRapidJsonLineWriterTest.cc @@ -40,7 +40,8 @@ void CRapidJsonLineWriterTest::testDoublePrecission() { std::ostringstream strm; { using TGenericLineWriter = - ml::core::CRapidJsonLineWriter, rapidjson::UTF8<>, rapidjson::CrtAllocator>; + ml::core::CRapidJsonLineWriter, + rapidjson::UTF8<>, rapidjson::CrtAllocator>; rapidjson::OStreamWrapper writeStream(strm); TGenericLineWriter writer(writeStream); @@ -54,27 +55,33 @@ void CRapidJsonLineWriterTest::testDoublePrecission() { writer.EndObject(); } - CPPUNIT_ASSERT_EQUAL(std::string("{\"a\":0.00003,\"b\":5e-300,\"c\":0.0}\n"), strm.str()); + CPPUNIT_ASSERT_EQUAL(std::string("{\"a\":0.00003,\"b\":5e-300,\"c\":0.0}\n"), + strm.str()); } void CRapidJsonLineWriterTest::testDoublePrecissionDtoa() { char buffer[100]; char* end = rapidjson::internal::dtoa(3e-5, buffer); - CPPUNIT_ASSERT_EQUAL(std::string("0.00003"), std::string(buffer, static_cast(end - buffer))); + CPPUNIT_ASSERT_EQUAL(std::string("0.00003"), + std::string(buffer, static_cast(end - buffer))); end = rapidjson::internal::dtoa(2e-20, buffer, 20); - CPPUNIT_ASSERT_EQUAL(std::string("2e-20"), std::string(buffer, static_cast(end - buffer))); + CPPUNIT_ASSERT_EQUAL(std::string("2e-20"), + std::string(buffer, static_cast(end - buffer))); end = rapidjson::internal::dtoa(1e-308, buffer); - CPPUNIT_ASSERT_EQUAL(std::string("1e-308"), std::string(buffer, static_cast(end - buffer))); + CPPUNIT_ASSERT_EQUAL(std::string("1e-308"), + std::string(buffer, static_cast(end - buffer))); end = rapidjson::internal::dtoa(1e-300, buffer, 20); - CPPUNIT_ASSERT_EQUAL(std::string("0.0"), std::string(buffer, static_cast(end - buffer))); + CPPUNIT_ASSERT_EQUAL(std::string("0.0"), + std::string(buffer, static_cast(end - buffer))); // test the limit, to not hardcode the string we check that it is not 0.0 end = rapidjson::internal::dtoa(std::numeric_limits::denorm_min(), buffer); - CPPUNIT_ASSERT(std::string("0.0") != std::string(buffer, static_cast(end - buffer))); + CPPUNIT_ASSERT(std::string("0.0") != + std::string(buffer, static_cast(end - buffer))); int ret = ::snprintf(buffer, sizeof(buffer), "%g", 1e-300); @@ -114,10 +121,12 @@ void CRapidJsonLineWriterTest::microBenchmark() { CppUnit::Test* CRapidJsonLineWriterTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CRapidJsonLineWriterTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CRapidJsonLineWriterTest::testDoublePrecissionDtoa", - &CRapidJsonLineWriterTest::testDoublePrecissionDtoa)); - suiteOfTests->addTest(new CppUnit::TestCaller("CRapidJsonLineWriterTest::testDoublePrecission", - &CRapidJsonLineWriterTest::testDoublePrecission)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRapidJsonLineWriterTest::testDoublePrecissionDtoa", + &CRapidJsonLineWriterTest::testDoublePrecissionDtoa)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRapidJsonLineWriterTest::testDoublePrecission", + &CRapidJsonLineWriterTest::testDoublePrecission)); // microbenchmark, enable if you are interested /*suiteOfTests->addTest( new CppUnit::TestCaller( diff --git a/lib/core/unittest/CRapidJsonWriterBaseTest.cc b/lib/core/unittest/CRapidJsonWriterBaseTest.cc index b0e67b351a..0238191728 100644 --- a/lib/core/unittest/CRapidJsonWriterBaseTest.cc +++ b/lib/core/unittest/CRapidJsonWriterBaseTest.cc @@ -18,10 +18,11 @@ CppUnit::Test* CRapidJsonWriterBaseTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CRapidJsonWriterBaseTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CRapidJsonWriterBaseTest::testAddFields", - &CRapidJsonWriterBaseTest::testAddFields)); - suiteOfTests->addTest(new CppUnit::TestCaller("CRapidJsonWriterBaseTest::testRemoveMemberIfPresent", - &CRapidJsonWriterBaseTest::testRemoveMemberIfPresent)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRapidJsonWriterBaseTest::testAddFields", &CRapidJsonWriterBaseTest::testAddFields)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRapidJsonWriterBaseTest::testRemoveMemberIfPresent", + &CRapidJsonWriterBaseTest::testRemoveMemberIfPresent)); return suiteOfTests; } @@ -47,7 +48,8 @@ void CRapidJsonWriterBaseTest::testAddFields() { std::ostringstream strm; rapidjson::OStreamWrapper writeStream(strm); using TGenericLineWriter = - ml::core::CRapidJsonWriterBase, rapidjson::UTF8<>, rapidjson::CrtAllocator>; + ml::core::CRapidJsonWriterBase, + rapidjson::UTF8<>, rapidjson::CrtAllocator>; TGenericLineWriter writer(writeStream); rapidjson::Document doc = writer.makeDoc(); @@ -63,10 +65,15 @@ void CRapidJsonWriterBaseTest::testAddFields() { writer.addIntFieldToObj(INT_NAME, -9, doc); writer.addTimeFieldToObj(TIME_NAME, ml::core_t::TTime(1521035866), doc); writer.addUIntFieldToObj(UINT_NAME, 999999999999999ull, doc); - writer.addStringArrayFieldToObj(STR_ARRAY_NAME, TGenericLineWriter::TStrVec(3, "blah"), doc); - writer.addDoubleArrayFieldToObj(DOUBLE_ARRAY_NAME, TGenericLineWriter::TDoubleVec(10, 1.5), doc); - writer.addDoubleArrayFieldToObj(NAN_ARRAY_NAME, TGenericLineWriter::TDoubleVec(2, std::numeric_limits::quiet_NaN()), doc); - writer.addTimeArrayFieldToObj(TTIME_ARRAY_NAME, TGenericLineWriter::TTimeVec(2, 1421421421), doc); + writer.addStringArrayFieldToObj(STR_ARRAY_NAME, + TGenericLineWriter::TStrVec(3, "blah"), doc); + writer.addDoubleArrayFieldToObj(DOUBLE_ARRAY_NAME, + TGenericLineWriter::TDoubleVec(10, 1.5), doc); + writer.addDoubleArrayFieldToObj( + NAN_ARRAY_NAME, + TGenericLineWriter::TDoubleVec(2, std::numeric_limits::quiet_NaN()), doc); + writer.addTimeArrayFieldToObj(TTIME_ARRAY_NAME, + TGenericLineWriter::TTimeVec(2, 1421421421), doc); writer.write(doc); writer.Flush(); @@ -99,7 +106,8 @@ void CRapidJsonWriterBaseTest::testRemoveMemberIfPresent() { std::ostringstream strm; rapidjson::OStreamWrapper writeStream(strm); using TGenericLineWriter = - ml::core::CRapidJsonWriterBase, rapidjson::UTF8<>, rapidjson::CrtAllocator>; + ml::core::CRapidJsonWriterBase, + rapidjson::UTF8<>, rapidjson::CrtAllocator>; TGenericLineWriter writer(writeStream); rapidjson::Document doc = writer.makeDoc(); diff --git a/lib/core/unittest/CRapidXmlParserTest.cc b/lib/core/unittest/CRapidXmlParserTest.cc index 96215110e6..3d1b4e4c3c 100644 --- a/lib/core/unittest/CRapidXmlParserTest.cc +++ b/lib/core/unittest/CRapidXmlParserTest.cc @@ -15,19 +15,20 @@ CppUnit::Test* CRapidXmlParserTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CRapidXmlParserTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CRapidXmlParserTest::testParse1", &CRapidXmlParserTest::testParse1)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CRapidXmlParserTest::testParse2", &CRapidXmlParserTest::testParse2)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CRapidXmlParserTest::testNavigate", &CRapidXmlParserTest::testNavigate)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CRapidXmlParserTest::testConvert", &CRapidXmlParserTest::testConvert)); - suiteOfTests->addTest(new CppUnit::TestCaller("CRapidXmlParserTest::testDump", &CRapidXmlParserTest::testDump)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CRapidXmlParserTest::testParseSpeed", &CRapidXmlParserTest::testParseSpeed)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CRapidXmlParserTest::testConvertSpeed", &CRapidXmlParserTest::testConvertSpeed)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRapidXmlParserTest::testParse1", &CRapidXmlParserTest::testParse1)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRapidXmlParserTest::testParse2", &CRapidXmlParserTest::testParse2)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRapidXmlParserTest::testNavigate", &CRapidXmlParserTest::testNavigate)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRapidXmlParserTest::testConvert", &CRapidXmlParserTest::testConvert)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRapidXmlParserTest::testDump", &CRapidXmlParserTest::testDump)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRapidXmlParserTest::testParseSpeed", &CRapidXmlParserTest::testParseSpeed)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRapidXmlParserTest::testConvertSpeed", &CRapidXmlParserTest::testConvertSpeed)); return suiteOfTests; } @@ -56,25 +57,30 @@ void CRapidXmlParserTest::testParse2() { CPPUNIT_ASSERT_EQUAL(std::string("syslog_parser"), rootNodePtr->name()); CPPUNIT_ASSERT_EQUAL(rootNodePtr->name(), parser.rootElementName()); - const ml::core::CXmlNodeWithChildren::TChildNodePVec& parseTree = rootNodePtr->children(); + const ml::core::CXmlNodeWithChildren::TChildNodePVec& parseTree = + rootNodePtr->children(); CPPUNIT_ASSERT_EQUAL(size_t(1), parseTree.size()); CPPUNIT_ASSERT(parseTree[0] != nullptr); CPPUNIT_ASSERT_EQUAL(std::string("parsetree"), parseTree[0]->name()); - const ml::core::CXmlNodeWithChildren::TChildNodePVec& expression = parseTree[0]->children(); + const ml::core::CXmlNodeWithChildren::TChildNodePVec& expression = + parseTree[0]->children(); CPPUNIT_ASSERT_EQUAL(size_t(2), expression.size()); CPPUNIT_ASSERT(expression[0] != nullptr); CPPUNIT_ASSERT_EQUAL(std::string("expression"), expression[0]->name()); - const ml::core::CXmlNodeWithChildren::TChildNodePVec& descriptionAndRegexes = expression[0]->children(); + const ml::core::CXmlNodeWithChildren::TChildNodePVec& descriptionAndRegexes = + expression[0]->children(); CPPUNIT_ASSERT_EQUAL(size_t(2), descriptionAndRegexes.size()); CPPUNIT_ASSERT(descriptionAndRegexes[0] != nullptr); CPPUNIT_ASSERT_EQUAL(std::string("description"), descriptionAndRegexes[0]->name()); - CPPUNIT_ASSERT_EQUAL(std::string("Transport node error"), descriptionAndRegexes[0]->value()); + CPPUNIT_ASSERT_EQUAL(std::string("Transport node error"), + descriptionAndRegexes[0]->value()); CPPUNIT_ASSERT(descriptionAndRegexes[1] != nullptr); CPPUNIT_ASSERT_EQUAL(std::string("regexes"), descriptionAndRegexes[1]->name()); - const ml::core::CXmlNodeWithChildren::TChildNodePVec& varbind = descriptionAndRegexes[1]->children(); + const ml::core::CXmlNodeWithChildren::TChildNodePVec& varbind = + descriptionAndRegexes[1]->children(); CPPUNIT_ASSERT_EQUAL(size_t(2), varbind.size()); CPPUNIT_ASSERT(varbind[0] != nullptr); CPPUNIT_ASSERT_EQUAL(std::string("varbind"), varbind[0]->name()); @@ -82,7 +88,8 @@ void CRapidXmlParserTest::testParse2() { CPPUNIT_ASSERT_EQUAL(std::string("varbind"), varbind[1]->name()); // Test attributes - const ml::core::CXmlNodeWithChildren::TChildNodePVec& tokenAndRegex0 = varbind[0]->children(); + const ml::core::CXmlNodeWithChildren::TChildNodePVec& tokenAndRegex0 = + varbind[0]->children(); CPPUNIT_ASSERT_EQUAL(std::string("token"), tokenAndRegex0[0]->name()); CPPUNIT_ASSERT_EQUAL(std::string(""), tokenAndRegex0[0]->value()); CPPUNIT_ASSERT_EQUAL(std::string("regex"), tokenAndRegex0[1]->name()); @@ -91,11 +98,13 @@ void CRapidXmlParserTest::testParse2() { CPPUNIT_ASSERT(this->testAttribute(*(tokenAndRegex0[1]), "local", "BZ")); // Test CDATA - const ml::core::CXmlNodeWithChildren::TChildNodePVec& tokenAndRegex1 = varbind[1]->children(); + const ml::core::CXmlNodeWithChildren::TChildNodePVec& tokenAndRegex1 = + varbind[1]->children(); CPPUNIT_ASSERT_EQUAL(std::string("token"), tokenAndRegex1[0]->name()); CPPUNIT_ASSERT_EQUAL(std::string("source"), tokenAndRegex1[0]->value()); CPPUNIT_ASSERT_EQUAL(std::string("regex"), tokenAndRegex1[1]->name()); - CPPUNIT_ASSERT_EQUAL(std::string("(template[[:space:]]*<[^;:{]+>[[:space:]]*)?"), tokenAndRegex1[1]->value()); + CPPUNIT_ASSERT_EQUAL(std::string("(template[[:space:]]*<[^;:{]+>[[:space:]]*)?"), + tokenAndRegex1[1]->value()); } void CRapidXmlParserTest::testNavigate() { @@ -132,25 +141,32 @@ void CRapidXmlParserTest::testNavigate() { CPPUNIT_ASSERT(!parser.navigateNext()); } -ml::core::CXmlNodeWithChildren::TXmlNodeWithChildrenP CRapidXmlParserTest::makeTestNodeHierarchy() { - ml::core::CXmlNodeWithChildren::TXmlNodeWithChildrenP root(new ml::core::CXmlNodeWithChildren("root")); +ml::core::CXmlNodeWithChildren::TXmlNodeWithChildrenP +CRapidXmlParserTest::makeTestNodeHierarchy() { + ml::core::CXmlNodeWithChildren::TXmlNodeWithChildrenP root( + new ml::core::CXmlNodeWithChildren("root")); - ml::core::CXmlNodeWithChildren::TXmlNodeWithChildrenP id(new ml::core::CXmlNodeWithChildren("id", "123")); + ml::core::CXmlNodeWithChildren::TXmlNodeWithChildrenP id( + new ml::core::CXmlNodeWithChildren("id", "123")); root->addChildP(id); - ml::core::CXmlNodeWithChildren::TXmlNodeWithChildrenP parent(new ml::core::CXmlNodeWithChildren("parent")); + ml::core::CXmlNodeWithChildren::TXmlNodeWithChildrenP parent( + new ml::core::CXmlNodeWithChildren("parent")); root->addChildP(parent); - ml::core::CXmlNodeWithChildren::TXmlNodeWithChildrenP child(new ml::core::CXmlNodeWithChildren("child", "boo!")); + ml::core::CXmlNodeWithChildren::TXmlNodeWithChildrenP child( + new ml::core::CXmlNodeWithChildren("child", "boo!")); parent->addChildP(child); ml::core::CXmlNode::TStrStrMap attrMap; attrMap["attr1"] = "you & me"; - ml::core::CXmlNodeWithChildren::TXmlNodeWithChildrenP child2(new ml::core::CXmlNodeWithChildren("child", "2nd", attrMap)); + ml::core::CXmlNodeWithChildren::TXmlNodeWithChildrenP child2( + new ml::core::CXmlNodeWithChildren("child", "2nd", attrMap)); parent->addChildP(child2); - ml::core::CXmlNodeWithChildren::TXmlNodeWithChildrenP empty(new ml::core::CXmlNodeWithChildren("empty")); + ml::core::CXmlNodeWithChildren::TXmlNodeWithChildrenP empty( + new ml::core::CXmlNodeWithChildren("empty")); root->addChildP(empty); attrMap["attr1"] = "first 'attribute'"; @@ -165,7 +181,8 @@ ml::core::CXmlNodeWithChildren::TXmlNodeWithChildrenP CRapidXmlParserTest::makeT void CRapidXmlParserTest::testConvert() { // Use a standard node hierarchy to allow for comparison with the // standards-compliant XML parser - ml::core::CXmlNodeWithChildren::TXmlNodeWithChildrenP root(CRapidXmlParserTest::makeTestNodeHierarchy()); + ml::core::CXmlNodeWithChildren::TXmlNodeWithChildrenP root( + CRapidXmlParserTest::makeTestNodeHierarchy()); std::string converted; ml::core::CRapidXmlParser::convert(*root, converted); @@ -185,7 +202,8 @@ void CRapidXmlParserTest::testConvert() { CPPUNIT_ASSERT(converted.find("") != std::string::npos); CPPUNIT_ASSERT(converted.find("") != std::string::npos || converted.find("") != std::string::npos); + CPPUNIT_ASSERT(converted.find("") != std::string::npos || + converted.find("") != std::string::npos); CPPUNIT_ASSERT(converted.find("name()); CPPUNIT_ASSERT_EQUAL(rootNodePtr->name(), parser.rootElementName()); - const ml::core::CXmlNodeWithChildren::TChildNodePVec& firstLevelChildren = rootNodePtr->children(); + const ml::core::CXmlNodeWithChildren::TChildNodePVec& firstLevelChildren = + rootNodePtr->children(); CPPUNIT_ASSERT_EQUAL(size_t(2), firstLevelChildren.size()); CPPUNIT_ASSERT(firstLevelChildren[0] != nullptr); CPPUNIT_ASSERT_EQUAL(std::string("OperationRequest"), firstLevelChildren[0]->name()); CPPUNIT_ASSERT(firstLevelChildren[1] != nullptr); CPPUNIT_ASSERT_EQUAL(std::string("Items"), firstLevelChildren[1]->name()); - const ml::core::CXmlNodeWithChildren::TChildNodePVec& opReqChildren = firstLevelChildren[0]->children(); + const ml::core::CXmlNodeWithChildren::TChildNodePVec& opReqChildren = + firstLevelChildren[0]->children(); CPPUNIT_ASSERT_EQUAL(size_t(4), opReqChildren.size()); CPPUNIT_ASSERT(opReqChildren[0] != nullptr); CPPUNIT_ASSERT_EQUAL(std::string("HTTPHeaders"), opReqChildren[0]->name()); @@ -239,11 +259,13 @@ void CRapidXmlParserTest::testParse1(const ml::core::CRapidXmlParser& parser) { CPPUNIT_ASSERT_EQUAL(std::string("1.05041599273682"), opReqChildren[3]->value()); // Test CDATA - const ml::core::CXmlNodeWithChildren::TChildNodePVec& itemsChildren = firstLevelChildren[1]->children(); + const ml::core::CXmlNodeWithChildren::TChildNodePVec& itemsChildren = + firstLevelChildren[1]->children(); CPPUNIT_ASSERT_EQUAL(size_t(13), itemsChildren.size()); CPPUNIT_ASSERT(itemsChildren[3] != nullptr); - const ml::core::CXmlNodeWithChildren::TChildNodePVec& item3Children = itemsChildren[3]->children(); + const ml::core::CXmlNodeWithChildren::TChildNodePVec& item3Children = + itemsChildren[3]->children(); CPPUNIT_ASSERT_EQUAL(size_t(4), item3Children.size()); CPPUNIT_ASSERT(item3Children[0] != nullptr); CPPUNIT_ASSERT_EQUAL(std::string("msg"), item3Children[0]->name()); @@ -254,15 +276,18 @@ void CRapidXmlParserTest::testParse1(const ml::core::CRapidXmlParser& parser) { // Test escaped ampersand CPPUNIT_ASSERT(itemsChildren[10] != nullptr); - const ml::core::CXmlNodeWithChildren::TChildNodePVec& item10Children = itemsChildren[10]->children(); + const ml::core::CXmlNodeWithChildren::TChildNodePVec& item10Children = + itemsChildren[10]->children(); CPPUNIT_ASSERT_EQUAL(size_t(3), item10Children.size()); CPPUNIT_ASSERT(item10Children[2] != nullptr); CPPUNIT_ASSERT_EQUAL(std::string("ItemAttributes"), item10Children[2]->name()); - const ml::core::CXmlNodeWithChildren::TChildNodePVec& itemAttributesChildren = item10Children[2]->children(); + const ml::core::CXmlNodeWithChildren::TChildNodePVec& itemAttributesChildren = + item10Children[2]->children(); CPPUNIT_ASSERT_EQUAL(size_t(4), itemAttributesChildren.size()); CPPUNIT_ASSERT(itemAttributesChildren[1] != nullptr); CPPUNIT_ASSERT_EQUAL(std::string("Manufacturer"), itemAttributesChildren[1]->name()); - CPPUNIT_ASSERT_EQUAL(std::string("William Morrow & Company"), itemAttributesChildren[1]->value()); + CPPUNIT_ASSERT_EQUAL(std::string("William Morrow & Company"), + itemAttributesChildren[1]->value()); } std::string CRapidXmlParserTest::fileToString(const std::string& fileName) { @@ -280,7 +305,9 @@ std::string CRapidXmlParserTest::fileToString(const std::string& fileName) { return ret; } -bool CRapidXmlParserTest::testAttribute(const ml::core::CXmlNode& node, const std::string& key, const std::string& expected) { +bool CRapidXmlParserTest::testAttribute(const ml::core::CXmlNode& node, + const std::string& key, + const std::string& expected) { std::string actual; if (node.attribute(key, actual) == false) { return false; @@ -300,7 +327,8 @@ void CRapidXmlParserTest::testParseSpeed() { std::string testString(CRapidXmlParserTest::fileToString("./testfiles/CXmlParser2.xml")); ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Starting parse speed test at " << ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO(<< "Starting parse speed test at " + << ml::core::CTimeUtils::toTimeString(start)); ml::core::CXmlNodeWithChildrenPool nodePool; @@ -327,10 +355,12 @@ void CRapidXmlParserTest::testConvertSpeed() { // Use a standard node hierarchy to allow for comparison with the // standards-compliant XML parser - ml::core::CXmlNodeWithChildren::TXmlNodeWithChildrenP root(CRapidXmlParserTest::makeTestNodeHierarchy()); + ml::core::CXmlNodeWithChildren::TXmlNodeWithChildrenP root( + CRapidXmlParserTest::makeTestNodeHierarchy()); ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Starting convert speed test at " << ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO(<< "Starting convert speed test at " + << ml::core::CTimeUtils::toTimeString(start)); for (size_t count = 0; count < TEST_SIZE; ++count) { std::string converted; @@ -338,7 +368,8 @@ void CRapidXmlParserTest::testConvertSpeed() { } ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Finished convert speed test at " << ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO(<< "Finished convert speed test at " + << ml::core::CTimeUtils::toTimeString(end)); LOG_INFO(<< "Converting " << TEST_SIZE << " documents took " << (end - start) << " seconds"); } diff --git a/lib/core/unittest/CRapidXmlParserTest.h b/lib/core/unittest/CRapidXmlParserTest.h index 7035103706..b08b9d11ca 100644 --- a/lib/core/unittest/CRapidXmlParserTest.h +++ b/lib/core/unittest/CRapidXmlParserTest.h @@ -38,7 +38,9 @@ class CRapidXmlParserTest : public CppUnit::TestFixture { static std::string fileToString(const std::string& fileName); - static bool testAttribute(const ml::core::CXmlNode& node, const std::string& key, const std::string& expected); + static bool testAttribute(const ml::core::CXmlNode& node, + const std::string& key, + const std::string& expected); }; #endif // INCLUDED_CRapidXmlParserTest_h diff --git a/lib/core/unittest/CRapidXmlStatePersistInserterTest.cc b/lib/core/unittest/CRapidXmlStatePersistInserterTest.cc index 9e81cde6eb..74230b57d2 100644 --- a/lib/core/unittest/CRapidXmlStatePersistInserterTest.cc +++ b/lib/core/unittest/CRapidXmlStatePersistInserterTest.cc @@ -11,8 +11,9 @@ CppUnit::Test* CRapidXmlStatePersistInserterTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CRapidXmlStatePersistInserterTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CRapidXmlStatePersistInserterTest::testPersist", - &CRapidXmlStatePersistInserterTest::testPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRapidXmlStatePersistInserterTest::testPersist", + &CRapidXmlStatePersistInserterTest::testPersist)); return suiteOfTests; } diff --git a/lib/core/unittest/CRapidXmlStateRestoreTraverserTest.cc b/lib/core/unittest/CRapidXmlStateRestoreTraverserTest.cc index 33d1cbcb68..fbef0a6761 100644 --- a/lib/core/unittest/CRapidXmlStateRestoreTraverserTest.cc +++ b/lib/core/unittest/CRapidXmlStateRestoreTraverserTest.cc @@ -9,10 +9,12 @@ #include CppUnit::Test* CRapidXmlStateRestoreTraverserTest::suite() { - CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CRapidXmlStateRestoreTraverserTest"); + CppUnit::TestSuite* suiteOfTests = + new CppUnit::TestSuite("CRapidXmlStateRestoreTraverserTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CRapidXmlStateRestoreTraverserTest::testRestore", - &CRapidXmlStateRestoreTraverserTest::testRestore)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRapidXmlStateRestoreTraverserTest::testRestore", + &CRapidXmlStateRestoreTraverserTest::testRestore)); return suiteOfTests; } diff --git a/lib/core/unittest/CReadWriteLockTest.cc b/lib/core/unittest/CReadWriteLockTest.cc index c47bb92e23..626864f295 100644 --- a/lib/core/unittest/CReadWriteLockTest.cc +++ b/lib/core/unittest/CReadWriteLockTest.cc @@ -24,12 +24,13 @@ CppUnit::Test* CReadWriteLockTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CReadWriteLockTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CReadWriteLockTest::testReadLock", &CReadWriteLockTest::testReadLock)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CReadWriteLockTest::testWriteLock", &CReadWriteLockTest::testWriteLock)); - suiteOfTests->addTest(new CppUnit::TestCaller("CReadWriteLockTest::testPerformanceVersusMutex", - &CReadWriteLockTest::testPerformanceVersusMutex)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CReadWriteLockTest::testReadLock", &CReadWriteLockTest::testReadLock)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CReadWriteLockTest::testWriteLock", &CReadWriteLockTest::testWriteLock)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CReadWriteLockTest::testPerformanceVersusMutex", + &CReadWriteLockTest::testPerformanceVersusMutex)); return suiteOfTests; } @@ -39,7 +40,8 @@ namespace { class CUnprotectedAdder : public ml::core::CThread { public: CUnprotectedAdder(uint32_t sleepTime, uint32_t iterations, uint32_t increment, volatile uint32_t& variable) - : m_SleepTime(sleepTime), m_Iterations(iterations), m_Increment(increment), m_Variable(variable) {} + : m_SleepTime(sleepTime), m_Iterations(iterations), + m_Increment(increment), m_Variable(variable) {} protected: void run() { @@ -63,7 +65,8 @@ class CUnprotectedAdder : public ml::core::CThread { class CAtomicAdder : public ml::core::CThread { public: CAtomicAdder(uint32_t sleepTime, uint32_t iterations, uint32_t increment, std::atomic_uint_fast32_t& variable) - : m_SleepTime(sleepTime), m_Iterations(iterations), m_Increment(increment), m_Variable(variable) {} + : m_SleepTime(sleepTime), m_Iterations(iterations), + m_Increment(increment), m_Variable(variable) {} protected: void run() { @@ -91,7 +94,8 @@ class CFastMutexProtectedAdder : public ml::core::CThread { uint32_t iterations, uint32_t increment, volatile uint32_t& variable) - : m_Mutex(mutex), m_SleepTime(sleepTime), m_Iterations(iterations), m_Increment(increment), m_Variable(variable) {} + : m_Mutex(mutex), m_SleepTime(sleepTime), m_Iterations(iterations), + m_Increment(increment), m_Variable(variable) {} protected: void run() { @@ -117,8 +121,13 @@ class CFastMutexProtectedAdder : public ml::core::CThread { class CMutexProtectedAdder : public ml::core::CThread { public: - CMutexProtectedAdder(ml::core::CMutex& mutex, uint32_t sleepTime, uint32_t iterations, uint32_t increment, volatile uint32_t& variable) - : m_Mutex(mutex), m_SleepTime(sleepTime), m_Iterations(iterations), m_Increment(increment), m_Variable(variable) {} + CMutexProtectedAdder(ml::core::CMutex& mutex, + uint32_t sleepTime, + uint32_t iterations, + uint32_t increment, + volatile uint32_t& variable) + : m_Mutex(mutex), m_SleepTime(sleepTime), m_Iterations(iterations), + m_Increment(increment), m_Variable(variable) {} protected: void run() { @@ -149,7 +158,8 @@ class CWriteLockProtectedAdder : public ml::core::CThread { uint32_t iterations, uint32_t increment, volatile uint32_t& variable) - : m_ReadWriteLock(readWriteLock), m_SleepTime(sleepTime), m_Iterations(iterations), m_Increment(increment), m_Variable(variable) {} + : m_ReadWriteLock(readWriteLock), m_SleepTime(sleepTime), + m_Iterations(iterations), m_Increment(increment), m_Variable(variable) {} protected: void run() { @@ -175,8 +185,12 @@ class CWriteLockProtectedAdder : public ml::core::CThread { class CReadLockProtectedReader : public ml::core::CThread { public: - CReadLockProtectedReader(ml::core::CReadWriteLock& readWriteLock, uint32_t sleepTime, uint32_t iterations, volatile uint32_t& variable) - : m_ReadWriteLock(readWriteLock), m_SleepTime(sleepTime), m_Iterations(iterations), m_Variable(variable), m_LastRead(variable) {} + CReadLockProtectedReader(ml::core::CReadWriteLock& readWriteLock, + uint32_t sleepTime, + uint32_t iterations, + volatile uint32_t& variable) + : m_ReadWriteLock(readWriteLock), m_SleepTime(sleepTime), + m_Iterations(iterations), m_Variable(variable), m_LastRead(variable) {} uint32_t lastRead() const { return m_LastRead; } @@ -272,7 +286,8 @@ void CReadWriteLockTest::testPerformanceVersusMutex() { uint32_t testVariable(0); ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Starting unlocked throughput test at " << ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO(<< "Starting unlocked throughput test at " + << ml::core::CTimeUtils::toTimeString(start)); CUnprotectedAdder writer1(0, TEST_SIZE, 1, testVariable); CUnprotectedAdder writer2(0, TEST_SIZE, 5, testVariable); @@ -287,9 +302,11 @@ void CReadWriteLockTest::testPerformanceVersusMutex() { writer3.stop(); ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Finished unlocked throughput test at " << ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO(<< "Finished unlocked throughput test at " + << ml::core::CTimeUtils::toTimeString(end)); - LOG_INFO(<< "Unlocked throughput test with test size " << TEST_SIZE << " took " << (end - start) << " seconds"); + LOG_INFO(<< "Unlocked throughput test with test size " << TEST_SIZE + << " took " << (end - start) << " seconds"); LOG_INFO(<< "Unlocked variable incremented to " << testVariable); @@ -304,7 +321,8 @@ void CReadWriteLockTest::testPerformanceVersusMutex() { std::atomic_uint_fast32_t testVariable(0); ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Starting atomic throughput test at " << ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO(<< "Starting atomic throughput test at " + << ml::core::CTimeUtils::toTimeString(start)); CAtomicAdder writer1(0, TEST_SIZE, 1, testVariable); CAtomicAdder writer2(0, TEST_SIZE, 5, testVariable); @@ -319,20 +337,24 @@ void CReadWriteLockTest::testPerformanceVersusMutex() { writer3.stop(); ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Finished atomic throughput test at " << ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO(<< "Finished atomic throughput test at " + << ml::core::CTimeUtils::toTimeString(end)); - LOG_INFO(<< "Atomic throughput test with test size " << TEST_SIZE << " took " << (end - start) << " seconds"); + LOG_INFO(<< "Atomic throughput test with test size " << TEST_SIZE + << " took " << (end - start) << " seconds"); LOG_INFO(<< "Atomic variable incremented to " << testVariable.load()); - CPPUNIT_ASSERT_EQUAL(uint_fast32_t(TEST_SIZE * (1 + 5 + 9)), testVariable.load()); + CPPUNIT_ASSERT_EQUAL(uint_fast32_t(TEST_SIZE * (1 + 5 + 9)), + testVariable.load()); } { uint32_t testVariable(0); ml::core::CFastMutex mutex; ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Starting fast mutex lock throughput test at " << ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO(<< "Starting fast mutex lock throughput test at " + << ml::core::CTimeUtils::toTimeString(start)); CFastMutexProtectedAdder writer1(mutex, 0, TEST_SIZE, 1, testVariable); CFastMutexProtectedAdder writer2(mutex, 0, TEST_SIZE, 5, testVariable); @@ -347,9 +369,11 @@ void CReadWriteLockTest::testPerformanceVersusMutex() { writer3.stop(); ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Finished fast mutex lock throughput test at " << ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO(<< "Finished fast mutex lock throughput test at " + << ml::core::CTimeUtils::toTimeString(end)); - LOG_INFO(<< "Fast mutex lock throughput test with test size " << TEST_SIZE << " took " << (end - start) << " seconds"); + LOG_INFO(<< "Fast mutex lock throughput test with test size " + << TEST_SIZE << " took " << (end - start) << " seconds"); LOG_INFO(<< "Fast mutex lock protected variable incremented to " << testVariable); @@ -360,7 +384,8 @@ void CReadWriteLockTest::testPerformanceVersusMutex() { ml::core::CMutex mutex; ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Starting mutex lock throughput test at " << ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO(<< "Starting mutex lock throughput test at " + << ml::core::CTimeUtils::toTimeString(start)); CMutexProtectedAdder writer1(mutex, 0, TEST_SIZE, 1, testVariable); CMutexProtectedAdder writer2(mutex, 0, TEST_SIZE, 5, testVariable); @@ -375,9 +400,11 @@ void CReadWriteLockTest::testPerformanceVersusMutex() { writer3.stop(); ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Finished mutex lock throughput test at " << ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO(<< "Finished mutex lock throughput test at " + << ml::core::CTimeUtils::toTimeString(end)); - LOG_INFO(<< "Mutex lock throughput test with test size " << TEST_SIZE << " took " << (end - start) << " seconds"); + LOG_INFO(<< "Mutex lock throughput test with test size " << TEST_SIZE + << " took " << (end - start) << " seconds"); LOG_INFO(<< "Mutex lock protected variable incremented to " << testVariable); @@ -388,7 +415,8 @@ void CReadWriteLockTest::testPerformanceVersusMutex() { ml::core::CReadWriteLock readWriteLock; ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Starting read-write lock throughput test at " << ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO(<< "Starting read-write lock throughput test at " + << ml::core::CTimeUtils::toTimeString(start)); CWriteLockProtectedAdder writer1(readWriteLock, 0, TEST_SIZE, 1, testVariable); CWriteLockProtectedAdder writer2(readWriteLock, 0, TEST_SIZE, 5, testVariable); @@ -403,9 +431,11 @@ void CReadWriteLockTest::testPerformanceVersusMutex() { writer3.stop(); ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Finished read-write lock throughput test at " << ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO(<< "Finished read-write lock throughput test at " + << ml::core::CTimeUtils::toTimeString(end)); - LOG_INFO(<< "Read-write lock throughput test with test size " << TEST_SIZE << " took " << (end - start) << " seconds"); + LOG_INFO(<< "Read-write lock throughput test with test size " + << TEST_SIZE << " took " << (end - start) << " seconds"); LOG_INFO(<< "Write lock protected variable incremented to " << testVariable); diff --git a/lib/core/unittest/CRegexFilterTest.cc b/lib/core/unittest/CRegexFilterTest.cc index 222cd3acff..e0e67ddbf2 100644 --- a/lib/core/unittest/CRegexFilterTest.cc +++ b/lib/core/unittest/CRegexFilterTest.cc @@ -11,16 +11,21 @@ CppUnit::Test* CRegexFilterTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CRegexFilterTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CRegexFilterTest::testConfigure_GivenInvalidRegex", - &CRegexFilterTest::testConfigure_GivenInvalidRegex)); - suiteOfTests->addTest(new CppUnit::TestCaller("CRegexFilterTest::testApply_GivenEmptyFilter", - &CRegexFilterTest::testApply_GivenEmptyFilter)); - suiteOfTests->addTest(new CppUnit::TestCaller("CRegexFilterTest::testApply_GivenSingleMatchAllRegex", - &CRegexFilterTest::testApply_GivenSingleMatchAllRegex)); - suiteOfTests->addTest(new CppUnit::TestCaller("CRegexFilterTest::testApply_GivenSingleRegex", - &CRegexFilterTest::testApply_GivenSingleRegex)); - suiteOfTests->addTest(new CppUnit::TestCaller("CRegexFilterTest::testApply_GivenMultipleRegex", - &CRegexFilterTest::testApply_GivenMultipleRegex)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRegexFilterTest::testConfigure_GivenInvalidRegex", + &CRegexFilterTest::testConfigure_GivenInvalidRegex)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRegexFilterTest::testApply_GivenEmptyFilter", + &CRegexFilterTest::testApply_GivenEmptyFilter)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRegexFilterTest::testApply_GivenSingleMatchAllRegex", + &CRegexFilterTest::testApply_GivenSingleMatchAllRegex)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRegexFilterTest::testApply_GivenSingleRegex", + &CRegexFilterTest::testApply_GivenSingleRegex)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRegexFilterTest::testApply_GivenMultipleRegex", + &CRegexFilterTest::testApply_GivenMultipleRegex)); return suiteOfTests; } diff --git a/lib/core/unittest/CRegexTest.cc b/lib/core/unittest/CRegexTest.cc index 20fd245608..425ea45e3a 100644 --- a/lib/core/unittest/CRegexTest.cc +++ b/lib/core/unittest/CRegexTest.cc @@ -11,13 +11,20 @@ CppUnit::Test* CRegexTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CRegexTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CRegexTest::testInit", &CRegexTest::testInit)); - suiteOfTests->addTest(new CppUnit::TestCaller("CRegexTest::testSearch", &CRegexTest::testSearch)); - suiteOfTests->addTest(new CppUnit::TestCaller("CRegexTest::testSplit", &CRegexTest::testSplit)); - suiteOfTests->addTest(new CppUnit::TestCaller("CRegexTest::testTokenise1", &CRegexTest::testTokenise1)); - suiteOfTests->addTest(new CppUnit::TestCaller("CRegexTest::testTokenise2", &CRegexTest::testTokenise2)); - suiteOfTests->addTest(new CppUnit::TestCaller("CRegexTest::testEscape", &CRegexTest::testEscape)); - suiteOfTests->addTest(new CppUnit::TestCaller("CRegexTest::testLiteralCount", &CRegexTest::testLiteralCount)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRegexTest::testInit", &CRegexTest::testInit)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRegexTest::testSearch", &CRegexTest::testSearch)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRegexTest::testSplit", &CRegexTest::testSplit)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRegexTest::testTokenise1", &CRegexTest::testTokenise1)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRegexTest::testTokenise2", &CRegexTest::testTokenise2)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRegexTest::testEscape", &CRegexTest::testEscape)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRegexTest::testLiteralCount", &CRegexTest::testLiteralCount)); return suiteOfTests; } @@ -70,15 +77,15 @@ void CRegexTest::testInit() { CPPUNIT_ASSERT(regex.init(regexStr)); CPPUNIT_ASSERT_EQUAL(regexStr, regex.str()); - CPPUNIT_ASSERT( - regex.matches(" ")); + CPPUNIT_ASSERT(regex.matches( + " ")); } { // Uninitialised std::string regexStr = "<.*"; ml::core::CRegex regex; - CPPUNIT_ASSERT( - !regex.matches(" ")); + CPPUNIT_ASSERT(!regex.matches( + " ")); } } @@ -184,7 +191,8 @@ void CRegexTest::testTokenise1() { CPPUNIT_ASSERT(regex.matches(str1)); CPPUNIT_ASSERT(regex.tokenise(str1, tokens)); - for (ml::core::CRegex::TStrVec::iterator itr = tokens.begin(); itr != tokens.end(); ++itr) { + for (ml::core::CRegex::TStrVec::iterator itr = tokens.begin(); + itr != tokens.end(); ++itr) { LOG_DEBUG(<< "'" << *itr << "'"); } @@ -210,7 +218,8 @@ void CRegexTest::testTokenise1() { CPPUNIT_ASSERT(regex.matches(str2)); CPPUNIT_ASSERT(regex.tokenise(str2, tokens)); - for (ml::core::CRegex::TStrVec::iterator itr = tokens.begin(); itr != tokens.end(); ++itr) { + for (ml::core::CRegex::TStrVec::iterator itr = tokens.begin(); + itr != tokens.end(); ++itr) { LOG_DEBUG(<< "'" << *itr << "'"); } @@ -218,8 +227,7 @@ void CRegexTest::testTokenise1() { CPPUNIT_ASSERT(!regex.tokenise(str1, tokens)); } - std::string str3( - "Sep 10, 2009 3:54:12 AM org.apache.tomcat.util.http.Parameters processParameters\r\nWARNING: Parameters: Invalid chunk ignored."); + std::string str3("Sep 10, 2009 3:54:12 AM org.apache.tomcat.util.http.Parameters processParameters\r\nWARNING: Parameters: Invalid chunk ignored."); { std::string regexStr("(\\w+\\s+\\d+,\\s+\\d+\\s+\\d+:\\d+:\\d+\\s+\\w+)\\s*([[:alnum:].]+)\\s*(\\w+)\\r?\\n(INFO|WARNING|SEVERE|" @@ -234,7 +242,8 @@ void CRegexTest::testTokenise1() { CPPUNIT_ASSERT(regex.matches(str3)); CPPUNIT_ASSERT(regex.tokenise(str3, tokens)); - for (ml::core::CRegex::TStrVec::iterator itr = tokens.begin(); itr != tokens.end(); ++itr) { + for (ml::core::CRegex::TStrVec::iterator itr = tokens.begin(); + itr != tokens.end(); ++itr) { LOG_DEBUG(<< "'" << *itr << "'"); } } @@ -253,7 +262,8 @@ void CRegexTest::testTokenise1() { CPPUNIT_ASSERT(regex.matches(str4)); CPPUNIT_ASSERT(regex.tokenise(str4, tokens)); - for (ml::core::CRegex::TStrVec::iterator itr = tokens.begin(); itr != tokens.end(); ++itr) { + for (ml::core::CRegex::TStrVec::iterator itr = tokens.begin(); + itr != tokens.end(); ++itr) { LOG_DEBUG(<< "'" << *itr << "'"); } } @@ -320,17 +330,22 @@ void CRegexTest::testSplit() { CPPUNIT_ASSERT(regex.split(str1, tokens)); - for (ml::core::CRegex::TStrVec::iterator itr = tokens.begin(); itr != tokens.end(); ++itr) { + for (ml::core::CRegex::TStrVec::iterator itr = tokens.begin(); + itr != tokens.end(); ++itr) { LOG_DEBUG(<< "'" << *itr << "'"); } } } void CRegexTest::testEscape() { - CPPUNIT_ASSERT_EQUAL(std::string("\\.\\.\\."), ml::core::CRegex::escapeRegexSpecial("...")); - CPPUNIT_ASSERT_EQUAL(std::string("hello"), ml::core::CRegex::escapeRegexSpecial("hello")); - CPPUNIT_ASSERT_EQUAL(std::string("\\)hello\\(\\n\\^"), ml::core::CRegex::escapeRegexSpecial(")hello(\n^")); - CPPUNIT_ASSERT_EQUAL(std::string("\\)hello\\(\\r?\\n\\^"), ml::core::CRegex::escapeRegexSpecial(")hello(\r\n^")); + CPPUNIT_ASSERT_EQUAL(std::string("\\.\\.\\."), + ml::core::CRegex::escapeRegexSpecial("...")); + CPPUNIT_ASSERT_EQUAL(std::string("hello"), + ml::core::CRegex::escapeRegexSpecial("hello")); + CPPUNIT_ASSERT_EQUAL(std::string("\\)hello\\(\\n\\^"), + ml::core::CRegex::escapeRegexSpecial(")hello(\n^")); + CPPUNIT_ASSERT_EQUAL(std::string("\\)hello\\(\\r?\\n\\^"), + ml::core::CRegex::escapeRegexSpecial(")hello(\r\n^")); } void CRegexTest::testLiteralCount() { diff --git a/lib/core/unittest/CResourceLocatorTest.cc b/lib/core/unittest/CResourceLocatorTest.cc index 8de9b56604..a0abbf73ae 100644 --- a/lib/core/unittest/CResourceLocatorTest.cc +++ b/lib/core/unittest/CResourceLocatorTest.cc @@ -12,12 +12,12 @@ CppUnit::Test* CResourceLocatorTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CResourceLocatorTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CResourceLocatorTest::testResourceDir", &CResourceLocatorTest::testResourceDir)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CResourceLocatorTest::testLogDir", &CResourceLocatorTest::testLogDir)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CResourceLocatorTest::testSrcRootDir", &CResourceLocatorTest::testSrcRootDir)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CResourceLocatorTest::testResourceDir", &CResourceLocatorTest::testResourceDir)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CResourceLocatorTest::testLogDir", &CResourceLocatorTest::testLogDir)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CResourceLocatorTest::testSrcRootDir", &CResourceLocatorTest::testSrcRootDir)); return suiteOfTests; } @@ -28,7 +28,8 @@ void CResourceLocatorTest::testResourceDir() { // It should contain the file ml-en.dict ml::core::COsFileFuncs::TStat buf; - CPPUNIT_ASSERT_EQUAL(0, ml::core::COsFileFuncs::stat((resourceDir + "/ml-en.dict").c_str(), &buf)); + CPPUNIT_ASSERT_EQUAL( + 0, ml::core::COsFileFuncs::stat((resourceDir + "/ml-en.dict").c_str(), &buf)); } void CResourceLocatorTest::testLogDir() { @@ -45,5 +46,6 @@ void CResourceLocatorTest::testSrcRootDir() { // It should contain the file set_env.sh ml::core::COsFileFuncs::TStat buf; - CPPUNIT_ASSERT_EQUAL(0, ml::core::COsFileFuncs::stat((cppRootDir + "/set_env.sh").c_str(), &buf)); + CPPUNIT_ASSERT_EQUAL( + 0, ml::core::COsFileFuncs::stat((cppRootDir + "/set_env.sh").c_str(), &buf)); } diff --git a/lib/core/unittest/CShellArgQuoterTest.cc b/lib/core/unittest/CShellArgQuoterTest.cc index 0ff54e6343..c9bb592c4e 100644 --- a/lib/core/unittest/CShellArgQuoterTest.cc +++ b/lib/core/unittest/CShellArgQuoterTest.cc @@ -11,7 +11,8 @@ CppUnit::Test* CShellArgQuoterTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CShellArgQuoterTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CShellArgQuoterTest::testQuote", &CShellArgQuoterTest::testQuote)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CShellArgQuoterTest::testQuote", &CShellArgQuoterTest::testQuote)); return suiteOfTests; } diff --git a/lib/core/unittest/CSleepTest.cc b/lib/core/unittest/CSleepTest.cc index 712e671d4a..eff2c7c45e 100644 --- a/lib/core/unittest/CSleepTest.cc +++ b/lib/core/unittest/CSleepTest.cc @@ -13,7 +13,8 @@ CppUnit::Test* CSleepTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CSleepTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CSleepTest::testSleep", &CSleepTest::testSleep)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSleepTest::testSleep", &CSleepTest::testSleep)); return suiteOfTests; } diff --git a/lib/core/unittest/CSmallVectorTest.cc b/lib/core/unittest/CSmallVectorTest.cc index 203a3a1482..10cf1f141b 100644 --- a/lib/core/unittest/CSmallVectorTest.cc +++ b/lib/core/unittest/CSmallVectorTest.cc @@ -21,10 +21,12 @@ void CSmallVectorTest::testNonStandard() { { TDoubleVec vec{0.1, 1.4, 7.4}; TDouble5Vec svec(vec); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(vec), core::CContainerPrinter::print(svec)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(vec), + core::CContainerPrinter::print(svec)); TDoubleVec cvec(svec); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(vec), core::CContainerPrinter::print(cvec)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(vec), + core::CContainerPrinter::print(cvec)); } // Test addition and subtraction. @@ -33,19 +35,21 @@ void CSmallVectorTest::testNonStandard() { TDouble5Vec vec2{1.3, 1.6, 2.2, 1.6}; vec1 -= vec2; - CPPUNIT_ASSERT_EQUAL(std::string("[-0.3, 1.6, -0.8, 5.7]"), core::CContainerPrinter::print(vec1)); + CPPUNIT_ASSERT_EQUAL(std::string("[-0.3, 1.6, -0.8, 5.7]"), + core::CContainerPrinter::print(vec1)); vec1 += vec2; vec1 += vec2; - CPPUNIT_ASSERT_EQUAL(std::string("[2.3, 4.8, 3.6, 8.9]"), core::CContainerPrinter::print(vec1)); + CPPUNIT_ASSERT_EQUAL(std::string("[2.3, 4.8, 3.6, 8.9]"), + core::CContainerPrinter::print(vec1)); } } CppUnit::Test* CSmallVectorTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CSmallVectorTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CSmallVectorTest::testNonStandard", &CSmallVectorTest::testNonStandard)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSmallVectorTest::testNonStandard", &CSmallVectorTest::testNonStandard)); return suiteOfTests; } diff --git a/lib/core/unittest/CStateCompressorTest.cc b/lib/core/unittest/CStateCompressorTest.cc index b349b15e08..54e5d79dfe 100644 --- a/lib/core/unittest/CStateCompressorTest.cc +++ b/lib/core/unittest/CStateCompressorTest.cc @@ -56,7 +56,8 @@ void insert1stLevel(ml::core::CStatePersistInserter& inserter, std::size_t n) { class CMockDataAdder : public ml::core::CDataAdder { public: - CMockDataAdder(std::size_t maxDocSize) : m_CurrentDocNum(0), m_MaxDocumentSize(maxDocSize) {} + CMockDataAdder(std::size_t maxDocSize) + : m_CurrentDocNum(0), m_MaxDocumentSize(maxDocSize) {} virtual TOStreamP addStreamed(const std::string& /*index*/, const std::string& /*id*/) { ++m_CurrentDocNum; @@ -66,7 +67,8 @@ class CMockDataAdder : public ml::core::CDataAdder { virtual bool streamComplete(TOStreamP& strm, bool /*force*/) { CPPUNIT_ASSERT_EQUAL(m_CurrentStream, strm); - std::ostringstream* ss = dynamic_cast(m_CurrentStream.get()); + std::ostringstream* ss = + dynamic_cast(m_CurrentStream.get()); CPPUNIT_ASSERT(ss); LOG_TRACE(<< ss->str()); m_Data[m_CurrentDocNum] = ss->str(); @@ -180,7 +182,8 @@ void CStateCompressorTest::testStreaming() { // data is streamed, not read all at once std::size_t lastAskedFor = 0; ::CMockDataSearcher mockKvSearcher(mockKvAdder); - LOG_TRACE(<< "After compression, there are " << mockKvSearcher.totalDocs() << " docs, asked for " << mockKvSearcher.askedFor()); + LOG_TRACE(<< "After compression, there are " << mockKvSearcher.totalDocs() + << " docs, asked for " << mockKvSearcher.askedFor()); ml::core::CStateDecompressor decompressor(mockKvSearcher); decompressor.setStateRestoreSearch("1", ""); TIStreamP istrm = decompressor.search(1, 1); @@ -215,7 +218,8 @@ void CStateCompressorTest::testStreaming() { } CPPUNIT_ASSERT(mockKvSearcher.askedFor() > lastAskedFor); lastAskedFor = mockKvSearcher.askedFor(); - while (traverser.next()) {}; + while (traverser.next()) { + }; LOG_TRACE(<< "Asked for: " << mockKvSearcher.askedFor()); CPPUNIT_ASSERT_EQUAL(mockKvSearcher.askedFor(), mockKvAdder.data().size()); } @@ -298,12 +302,12 @@ void CStateCompressorTest::testChunking() { CppUnit::Test* CStateCompressorTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CStateCompressorTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CStateCompressorTest::testForApiNoKey", &CStateCompressorTest::testForApiNoKey)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CStateCompressorTest::testStreaming", &CStateCompressorTest::testStreaming)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CStateCompressorTest::testChunking", &CStateCompressorTest::testChunking)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CStateCompressorTest::testForApiNoKey", &CStateCompressorTest::testForApiNoKey)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CStateCompressorTest::testStreaming", &CStateCompressorTest::testStreaming)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CStateCompressorTest::testChunking", &CStateCompressorTest::testChunking)); return suiteOfTests; } diff --git a/lib/core/unittest/CStateMachineTest.cc b/lib/core/unittest/CStateMachineTest.cc index 9c3614d1fc..ec69a7334e 100644 --- a/lib/core/unittest/CStateMachineTest.cc +++ b/lib/core/unittest/CStateMachineTest.cc @@ -39,12 +39,15 @@ struct SMachine { TSizeVecVec s_TransitionFunction; bool operator==(const SMachine& rhs) const { - return s_Alphabet == rhs.s_Alphabet && s_States == rhs.s_States && s_TransitionFunction == rhs.s_TransitionFunction; + return s_Alphabet == rhs.s_Alphabet && s_States == rhs.s_States && + s_TransitionFunction == rhs.s_TransitionFunction; } bool operator<(const SMachine& rhs) const { - return s_Alphabet < rhs.s_Alphabet || (s_Alphabet == rhs.s_Alphabet && s_States < rhs.s_States) || - (s_Alphabet == rhs.s_Alphabet && s_States == rhs.s_States && s_TransitionFunction < rhs.s_TransitionFunction); + return s_Alphabet < rhs.s_Alphabet || + (s_Alphabet == rhs.s_Alphabet && s_States < rhs.s_States) || + (s_Alphabet == rhs.s_Alphabet && s_States == rhs.s_States && + s_TransitionFunction < rhs.s_TransitionFunction); } }; @@ -55,7 +58,8 @@ class CTestThread : public core::CThread { using TCppUnitExceptionP = boost::shared_ptr; public: - CTestThread(const TMachineVec& machines) : m_Machines(machines), m_Failures(0) {} + CTestThread(const TMachineVec& machines) + : m_Machines(machines), m_Failures(0) {} std::size_t failures() const { return m_Failures; } @@ -68,10 +72,10 @@ class CTestThread : public core::CThread { TSizeVec machine; for (std::size_t i = 0u; i < n; ++i) { m_Rng.generateUniformSamples(0, m_Machines.size(), 1, machine); - core::CStateMachine sm = core::CStateMachine::create(m_Machines[machine[0]].s_Alphabet, - m_Machines[machine[0]].s_States, - m_Machines[machine[0]].s_TransitionFunction, - 0); // initial state + core::CStateMachine sm = core::CStateMachine::create( + m_Machines[machine[0]].s_Alphabet, m_Machines[machine[0]].s_States, + m_Machines[machine[0]].s_TransitionFunction, + 0); // initial state if (!sm.apply(0)) { ++m_Failures; } @@ -127,10 +131,9 @@ void CStateMachineTest::testBasics() { LOG_DEBUG(<< "machine " << m); for (std::size_t i = 0u; i < machines[m].s_Alphabet.size(); ++i) { for (std::size_t j = 0u; j < machines[m].s_States.size(); ++j) { - core::CStateMachine sm = core::CStateMachine::create(machines[m].s_Alphabet, - machines[m].s_States, - machines[m].s_TransitionFunction, - j); // initial state + core::CStateMachine sm = core::CStateMachine::create( + machines[m].s_Alphabet, machines[m].s_States, machines[m].s_TransitionFunction, + j); // initial state const std::string& oldState = machines[m].s_States[j]; @@ -139,7 +142,9 @@ void CStateMachineTest::testBasics() { const std::string& newState = machines[m].s_States[sm.state()]; LOG_DEBUG(<< " " << oldState << " -> " << newState); - CPPUNIT_ASSERT_EQUAL(machines[m].s_States[machines[m].s_TransitionFunction[i][j]], sm.printState(sm.state())); + CPPUNIT_ASSERT_EQUAL( + machines[m].s_States[machines[m].s_TransitionFunction[i][j]], + sm.printState(sm.state())); } } } @@ -151,10 +156,9 @@ void CStateMachineTest::testPersist() { TMachineVec machine; randomMachines(2, machine); - core::CStateMachine original = core::CStateMachine::create(machine[0].s_Alphabet, - machine[0].s_States, - machine[0].s_TransitionFunction, - 1); // initial state + core::CStateMachine original = core::CStateMachine::create( + machine[0].s_Alphabet, machine[0].s_States, machine[0].s_TransitionFunction, + 1); // initial state std::string origXml; { core::CRapidXmlStatePersistInserter inserter("root"); @@ -168,11 +172,11 @@ void CStateMachineTest::testPersist() { CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - core::CStateMachine restored = core::CStateMachine::create(machine[1].s_Alphabet, - machine[1].s_States, - machine[1].s_TransitionFunction, - 0); // initial state - traverser.traverseSubLevel(boost::bind(&core::CStateMachine::acceptRestoreTraverser, &restored, _1)); + core::CStateMachine restored = core::CStateMachine::create( + machine[1].s_Alphabet, machine[1].s_States, machine[1].s_TransitionFunction, + 0); // initial state + traverser.traverseSubLevel( + boost::bind(&core::CStateMachine::acceptRestoreTraverser, &restored, _1)); CPPUNIT_ASSERT_EQUAL(original.checksum(), restored.checksum()); std::string newXml; @@ -225,10 +229,12 @@ void CStateMachineTest::testMultithreaded() { CppUnit::Test* CStateMachineTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CStateMachineTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CStateMachineTest::testBasics", &CStateMachineTest::testBasics)); - suiteOfTests->addTest(new CppUnit::TestCaller("CStateMachineTest::testPersist", &CStateMachineTest::testPersist)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CStateMachineTest::testMultithreaded", &CStateMachineTest::testMultithreaded)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CStateMachineTest::testBasics", &CStateMachineTest::testBasics)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CStateMachineTest::testPersist", &CStateMachineTest::testPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CStateMachineTest::testMultithreaded", &CStateMachineTest::testMultithreaded)); return suiteOfTests; } diff --git a/lib/core/unittest/CStatisticsTest.cc b/lib/core/unittest/CStatisticsTest.cc index bff41a1b55..9b87804002 100644 --- a/lib/core/unittest/CStatisticsTest.cc +++ b/lib/core/unittest/CStatisticsTest.cc @@ -49,8 +49,10 @@ class CStatisticsTestRunner : public ml::core::CThread { CppUnit::Test* CStatisticsTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CStatisticsTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CStatisticsTest::testStatistics", &CStatisticsTest::testStatistics)); - suiteOfTests->addTest(new CppUnit::TestCaller("CStatisticsTest::testPersist", &CStatisticsTest::testPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CStatisticsTest::testStatistics", &CStatisticsTest::testStatistics)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CStatisticsTest::testPersist", &CStatisticsTest::testPersist)); return suiteOfTests; } @@ -118,7 +120,8 @@ void CStatisticsTest::testPersist() { ml::core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origStaticsXml)); ml::core::CRapidXmlStateRestoreTraverser traverser(parser); - CPPUNIT_ASSERT(traverser.traverseSubLevel(&ml::core::CStatistics::staticsAcceptRestoreTraverser)); + CPPUNIT_ASSERT(traverser.traverseSubLevel( + &ml::core::CStatistics::staticsAcceptRestoreTraverser)); } for (int i = 0; i < ml::stat_t::E_LastEnumStat; i++) { @@ -146,7 +149,8 @@ void CStatisticsTest::testPersist() { ml::core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origStaticsXml)); ml::core::CRapidXmlStateRestoreTraverser traverser(parser); - CPPUNIT_ASSERT(traverser.traverseSubLevel(&ml::core::CStatistics::staticsAcceptRestoreTraverser)); + CPPUNIT_ASSERT(traverser.traverseSubLevel( + &ml::core::CStatistics::staticsAcceptRestoreTraverser)); } for (int i = 0; i < ml::stat_t::E_LastEnumStat; i++) { @@ -158,7 +162,8 @@ void CStatisticsTest::testPersist() { ml::core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(newStaticsXml)); ml::core::CRapidXmlStateRestoreTraverser traverser(parser); - CPPUNIT_ASSERT(traverser.traverseSubLevel(&ml::core::CStatistics::staticsAcceptRestoreTraverser)); + CPPUNIT_ASSERT(traverser.traverseSubLevel( + &ml::core::CStatistics::staticsAcceptRestoreTraverser)); } for (int i = 0; i < ml::stat_t::E_LastEnumStat; i++) { @@ -170,7 +175,8 @@ void CStatisticsTest::testPersist() { ml::core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origStaticsXml)); ml::core::CRapidXmlStateRestoreTraverser traverser(parser); - CPPUNIT_ASSERT(traverser.traverseSubLevel(&ml::core::CStatistics::staticsAcceptRestoreTraverser)); + CPPUNIT_ASSERT(traverser.traverseSubLevel( + &ml::core::CStatistics::staticsAcceptRestoreTraverser)); } for (int i = 0; i < ml::stat_t::E_LastEnumStat; i++) { diff --git a/lib/core/unittest/CStopWatchTest.cc b/lib/core/unittest/CStopWatchTest.cc index 52877aea7b..a2d59d338b 100644 --- a/lib/core/unittest/CStopWatchTest.cc +++ b/lib/core/unittest/CStopWatchTest.cc @@ -14,7 +14,8 @@ CppUnit::Test* CStopWatchTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CStopWatchTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CStopWatchTest::testStopWatch", &CStopWatchTest::testStopWatch)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CStopWatchTest::testStopWatch", &CStopWatchTest::testStopWatch)); return suiteOfTests; } @@ -40,7 +41,8 @@ void CStopWatchTest::testStopWatch() { elapsed = stopWatch.stop(); - LOG_DEBUG(<< "After a further 3.5 second wait, the stop watch reads " << elapsed << " milliseconds"); + LOG_DEBUG(<< "After a further 3.5 second wait, the stop watch reads " + << elapsed << " milliseconds"); // Elapsed time should be between 8.9 and 9.1 seconds CPPUNIT_ASSERT(elapsed >= 8900); diff --git a/lib/core/unittest/CStoredStringPtrTest.cc b/lib/core/unittest/CStoredStringPtrTest.cc index cc4425f2f8..02fb712903 100644 --- a/lib/core/unittest/CStoredStringPtrTest.cc +++ b/lib/core/unittest/CStoredStringPtrTest.cc @@ -16,11 +16,12 @@ CppUnit::Test* CStoredStringPtrTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CStoredStringPtrTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CStoredStringPtrTest::testPointerSemantics", - &CStoredStringPtrTest::testPointerSemantics)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CStoredStringPtrTest::testMemoryUsage", &CStoredStringPtrTest::testMemoryUsage)); - suiteOfTests->addTest(new CppUnit::TestCaller("CStoredStringPtrTest::testHash", &CStoredStringPtrTest::testHash)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CStoredStringPtrTest::testPointerSemantics", &CStoredStringPtrTest::testPointerSemantics)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CStoredStringPtrTest::testMemoryUsage", &CStoredStringPtrTest::testMemoryUsage)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CStoredStringPtrTest::testHash", &CStoredStringPtrTest::testHash)); return suiteOfTests; } @@ -65,7 +66,8 @@ void CStoredStringPtrTest::testPointerSemantics() { // would be to leave the original value in the moved-from string std::string str2("my second string - long enough to not use the small string optimisation"); - ml::core::CStoredStringPtr ptr2 = ml::core::CStoredStringPtr::makeStoredString(std::move(str2)); + ml::core::CStoredStringPtr ptr2 = + ml::core::CStoredStringPtr::makeStoredString(std::move(str2)); if (ptr2) { CPPUNIT_ASSERT(ptr2 == ptr2); @@ -98,7 +100,8 @@ void CStoredStringPtrTest::testMemoryUsage() { ml::core::CStoredStringPtr ptr1 = ml::core::CStoredStringPtr::makeStoredString(str1); CPPUNIT_ASSERT_EQUAL(std::size_t(0), ml::core::CMemory::dynamicSize(ptr1)); - CPPUNIT_ASSERT_EQUAL(ml::core::CMemory::dynamicSize(&str1), ptr1.actualMemoryUsage()); + CPPUNIT_ASSERT_EQUAL(ml::core::CMemory::dynamicSize(&str1), + ptr1.actualMemoryUsage()); } { std::string str2("much longer - YUGE in fact!"); @@ -106,7 +109,8 @@ void CStoredStringPtrTest::testMemoryUsage() { ml::core::CStoredStringPtr ptr2 = ml::core::CStoredStringPtr::makeStoredString(str2); CPPUNIT_ASSERT_EQUAL(std::size_t(0), ml::core::CMemory::dynamicSize(ptr2)); - CPPUNIT_ASSERT_EQUAL(ml::core::CMemory::dynamicSize(&str2), ptr2.actualMemoryUsage()); + CPPUNIT_ASSERT_EQUAL(ml::core::CMemory::dynamicSize(&str2), + ptr2.actualMemoryUsage()); } } diff --git a/lib/core/unittest/CStringSimilarityTesterTest.cc b/lib/core/unittest/CStringSimilarityTesterTest.cc index 2949cd5ed4..49e6c196d5 100644 --- a/lib/core/unittest/CStringSimilarityTesterTest.cc +++ b/lib/core/unittest/CStringSimilarityTesterTest.cc @@ -19,23 +19,27 @@ CppUnit::Test* CStringSimilarityTesterTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CStringSimilarityTesterTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CStringSimilarityTesterTest::testStringSimilarity", - &CStringSimilarityTesterTest::testStringSimilarity)); - suiteOfTests->addTest(new CppUnit::TestCaller("CStringSimilarityTesterTest::testLevensteinDistance", - &CStringSimilarityTesterTest::testLevensteinDistance)); - suiteOfTests->addTest(new CppUnit::TestCaller("CStringSimilarityTesterTest::testLevensteinDistance2", - &CStringSimilarityTesterTest::testLevensteinDistance2)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CStringSimilarityTesterTest::testLevensteinDistanceThroughputDifferent", - &CStringSimilarityTesterTest::testLevensteinDistanceThroughputDifferent)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CStringSimilarityTesterTest::testLevensteinDistanceThroughputSimilar", - &CStringSimilarityTesterTest::testLevensteinDistanceThroughputSimilar)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CStringSimilarityTesterTest::testLevensteinDistanceAlgorithmEquivalence", - &CStringSimilarityTesterTest::testLevensteinDistanceAlgorithmEquivalence)); - suiteOfTests->addTest(new CppUnit::TestCaller("CStringSimilarityTesterTest::testWeightedEditDistance", - &CStringSimilarityTesterTest::testWeightedEditDistance)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CStringSimilarityTesterTest::testStringSimilarity", + &CStringSimilarityTesterTest::testStringSimilarity)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CStringSimilarityTesterTest::testLevensteinDistance", + &CStringSimilarityTesterTest::testLevensteinDistance)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CStringSimilarityTesterTest::testLevensteinDistance2", + &CStringSimilarityTesterTest::testLevensteinDistance2)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CStringSimilarityTesterTest::testLevensteinDistanceThroughputDifferent", + &CStringSimilarityTesterTest::testLevensteinDistanceThroughputDifferent)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CStringSimilarityTesterTest::testLevensteinDistanceThroughputSimilar", + &CStringSimilarityTesterTest::testLevensteinDistanceThroughputSimilar)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CStringSimilarityTesterTest::testLevensteinDistanceAlgorithmEquivalence", + &CStringSimilarityTesterTest::testLevensteinDistanceAlgorithmEquivalence)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CStringSimilarityTesterTest::testWeightedEditDistance", + &CStringSimilarityTesterTest::testWeightedEditDistance)); return suiteOfTests; } @@ -280,10 +284,12 @@ void CStringSimilarityTesterTest::testLevensteinDistanceThroughputDifferent() { } ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Finished Levenstein distance throughput test for low commonality strings at " << ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO(<< "Finished Levenstein distance throughput test for low commonality strings at " + << ml::core::CTimeUtils::toTimeString(end)); - LOG_INFO(<< "Levenstein distance throughput test for low commonality strings with size " << TEST_SIZE << " and maximum string length " - << MAX_LEN << " took " << (end - start) << " seconds"); + LOG_INFO(<< "Levenstein distance throughput test for low commonality strings with size " + << TEST_SIZE << " and maximum string length " << MAX_LEN + << " took " << (end - start) << " seconds"); } void CStringSimilarityTesterTest::testLevensteinDistanceThroughputSimilar() { @@ -310,7 +316,8 @@ void CStringSimilarityTesterTest::testLevensteinDistanceThroughputSimilar() { } ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Starting Levenstein distance throughput test for similar strings at " << ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO(<< "Starting Levenstein distance throughput test for similar strings at " + << ml::core::CTimeUtils::toTimeString(start)); for (size_t i = 0; i < TEST_SIZE; ++i) { for (size_t j = 0; j < TEST_SIZE; ++j) { @@ -322,10 +329,12 @@ void CStringSimilarityTesterTest::testLevensteinDistanceThroughputSimilar() { } ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Finished Levenstein distance throughput test for similar strings at " << ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO(<< "Finished Levenstein distance throughput test for similar strings at " + << ml::core::CTimeUtils::toTimeString(end)); - LOG_INFO(<< "Levenstein distance throughput test for similar strings with size " << TEST_SIZE << " and " << EXTRA_CHARS - << " extra characters took " << (end - start) << " seconds"); + LOG_INFO(<< "Levenstein distance throughput test for similar strings with size " + << TEST_SIZE << " and " << EXTRA_CHARS << " extra characters took " + << (end - start) << " seconds"); } void CStringSimilarityTesterTest::testLevensteinDistanceAlgorithmEquivalence() { @@ -343,11 +352,16 @@ void CStringSimilarityTesterTest::testLevensteinDistanceAlgorithmEquivalence() { // Remember we're calling private implementation methods here that require: // 1) Neither input sequence is empty // 2) The first input sequence is no longer than the second input sequence - CPPUNIT_ASSERT_EQUAL(sst.levenshteinDistanceSimple(cat, cat), sst.berghelRoachEditDistance(cat, cat)); - CPPUNIT_ASSERT_EQUAL(sst.levenshteinDistanceSimple(cat, dog), sst.berghelRoachEditDistance(cat, dog)); - CPPUNIT_ASSERT_EQUAL(sst.levenshteinDistanceSimple(cat, mouse), sst.berghelRoachEditDistance(cat, mouse)); - CPPUNIT_ASSERT_EQUAL(sst.levenshteinDistanceSimple(cat, elephant), sst.berghelRoachEditDistance(cat, elephant)); - CPPUNIT_ASSERT_EQUAL(sst.levenshteinDistanceSimple(mouse, elephant), sst.berghelRoachEditDistance(mouse, elephant)); + CPPUNIT_ASSERT_EQUAL(sst.levenshteinDistanceSimple(cat, cat), + sst.berghelRoachEditDistance(cat, cat)); + CPPUNIT_ASSERT_EQUAL(sst.levenshteinDistanceSimple(cat, dog), + sst.berghelRoachEditDistance(cat, dog)); + CPPUNIT_ASSERT_EQUAL(sst.levenshteinDistanceSimple(cat, mouse), + sst.berghelRoachEditDistance(cat, mouse)); + CPPUNIT_ASSERT_EQUAL(sst.levenshteinDistanceSimple(cat, elephant), + sst.berghelRoachEditDistance(cat, elephant)); + CPPUNIT_ASSERT_EQUAL(sst.levenshteinDistanceSimple(mouse, elephant), + sst.berghelRoachEditDistance(mouse, elephant)); } void CStringSimilarityTesterTest::testWeightedEditDistance() { diff --git a/lib/core/unittest/CStringUtilsTest.cc b/lib/core/unittest/CStringUtilsTest.cc index 7c573b06de..ba7843fa0e 100644 --- a/lib/core/unittest/CStringUtilsTest.cc +++ b/lib/core/unittest/CStringUtilsTest.cc @@ -22,39 +22,48 @@ CppUnit::Test* CStringUtilsTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CStringUtilsTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CStringUtilsTest::testNumMatches", &CStringUtilsTest::testNumMatches)); - suiteOfTests->addTest(new CppUnit::TestCaller("CStringUtilsTest::testReplace", &CStringUtilsTest::testReplace)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CStringUtilsTest::testReplaceFirst", &CStringUtilsTest::testReplaceFirst)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CStringUtilsTest::testTypeToString", &CStringUtilsTest::testTypeToString)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CStringUtilsTest::testTypeToStringPrecise", &CStringUtilsTest::testTypeToStringPrecise)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CStringUtilsTest::testTypeToStringPretty", &CStringUtilsTest::testTypeToStringPretty)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CStringUtilsTest::testStringToType", &CStringUtilsTest::testStringToType)); - suiteOfTests->addTest(new CppUnit::TestCaller("CStringUtilsTest::testTokeniser", &CStringUtilsTest::testTokeniser)); - suiteOfTests->addTest(new CppUnit::TestCaller("CStringUtilsTest::testTrim", &CStringUtilsTest::testTrim)); - suiteOfTests->addTest(new CppUnit::TestCaller("CStringUtilsTest::testJoin", &CStringUtilsTest::testJoin)); - suiteOfTests->addTest(new CppUnit::TestCaller("CStringUtilsTest::testLower", &CStringUtilsTest::testLower)); - suiteOfTests->addTest(new CppUnit::TestCaller("CStringUtilsTest::testUpper", &CStringUtilsTest::testUpper)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CStringUtilsTest::testNarrowWiden", &CStringUtilsTest::testNarrowWiden)); - suiteOfTests->addTest(new CppUnit::TestCaller("CStringUtilsTest::testEscape", &CStringUtilsTest::testEscape)); - suiteOfTests->addTest(new CppUnit::TestCaller("CStringUtilsTest::testUnEscape", &CStringUtilsTest::testUnEscape)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CStringUtilsTest::testLongestSubstr", &CStringUtilsTest::testLongestSubstr)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CStringUtilsTest::testLongestSubseq", &CStringUtilsTest::testLongestSubseq)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CStringUtilsTest::testNormaliseWhitespace", &CStringUtilsTest::testNormaliseWhitespace)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CStringUtilsTest::testPerformance", &CStringUtilsTest::testPerformance)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CStringUtilsTest::testUtf8ByteType", &CStringUtilsTest::testUtf8ByteType)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CStringUtilsTest::testRoundtripMaxDouble", &CStringUtilsTest::testRoundtripMaxDouble)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CStringUtilsTest::testNumMatches", &CStringUtilsTest::testNumMatches)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CStringUtilsTest::testReplace", &CStringUtilsTest::testReplace)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CStringUtilsTest::testReplaceFirst", &CStringUtilsTest::testReplaceFirst)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CStringUtilsTest::testTypeToString", &CStringUtilsTest::testTypeToString)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CStringUtilsTest::testTypeToStringPrecise", &CStringUtilsTest::testTypeToStringPrecise)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CStringUtilsTest::testTypeToStringPretty", &CStringUtilsTest::testTypeToStringPretty)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CStringUtilsTest::testStringToType", &CStringUtilsTest::testStringToType)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CStringUtilsTest::testTokeniser", &CStringUtilsTest::testTokeniser)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CStringUtilsTest::testTrim", &CStringUtilsTest::testTrim)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CStringUtilsTest::testJoin", &CStringUtilsTest::testJoin)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CStringUtilsTest::testLower", &CStringUtilsTest::testLower)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CStringUtilsTest::testUpper", &CStringUtilsTest::testUpper)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CStringUtilsTest::testNarrowWiden", &CStringUtilsTest::testNarrowWiden)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CStringUtilsTest::testEscape", &CStringUtilsTest::testEscape)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CStringUtilsTest::testUnEscape", &CStringUtilsTest::testUnEscape)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CStringUtilsTest::testLongestSubstr", &CStringUtilsTest::testLongestSubstr)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CStringUtilsTest::testLongestSubseq", &CStringUtilsTest::testLongestSubseq)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CStringUtilsTest::testNormaliseWhitespace", &CStringUtilsTest::testNormaliseWhitespace)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CStringUtilsTest::testPerformance", &CStringUtilsTest::testPerformance)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CStringUtilsTest::testUtf8ByteType", &CStringUtilsTest::testUtf8ByteType)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CStringUtilsTest::testRoundtripMaxDouble", &CStringUtilsTest::testRoundtripMaxDouble)); return suiteOfTests; } @@ -92,7 +101,8 @@ void CStringUtilsTest::testReplaceFirst() { std::string in("%d%M%Y%f%D%t"); const std::string out(" %d%M%Y%f%D%t"); - CPPUNIT_ASSERT_EQUAL(size_t(1), ml::core::CStringUtils::replaceFirst("%", " %", in)); + CPPUNIT_ASSERT_EQUAL(size_t(1), + ml::core::CStringUtils::replaceFirst("%", " %", in)); CPPUNIT_ASSERT_EQUAL(out, in); } @@ -172,98 +182,112 @@ void CStringUtilsTest::testTypeToStringPrecise() { double i(1.0); std::string expected("1"); - std::string actual = ml::core::CStringUtils::typeToStringPrecise(i, ml::core::CIEEE754::E_SinglePrecision); + std::string actual = ml::core::CStringUtils::typeToStringPrecise( + i, ml::core::CIEEE754::E_SinglePrecision); CPPUNIT_ASSERT_EQUAL(expected, actual); } { double i(1.0); std::string expected("1"); - std::string actual = ml::core::CStringUtils::typeToStringPrecise(i, ml::core::CIEEE754::E_DoublePrecision); + std::string actual = ml::core::CStringUtils::typeToStringPrecise( + i, ml::core::CIEEE754::E_DoublePrecision); CPPUNIT_ASSERT_EQUAL(expected, actual); } { double i(0.123456); std::string expected("1.23456e-1"); - std::string actual = ml::core::CStringUtils::typeToStringPrecise(i, ml::core::CIEEE754::E_SinglePrecision); + std::string actual = ml::core::CStringUtils::typeToStringPrecise( + i, ml::core::CIEEE754::E_SinglePrecision); CPPUNIT_ASSERT_EQUAL(expected, actual); } { double i(0.123456); std::string expected("1.23456e-1"); - std::string actual = ml::core::CStringUtils::typeToStringPrecise(i, ml::core::CIEEE754::E_DoublePrecision); + std::string actual = ml::core::CStringUtils::typeToStringPrecise( + i, ml::core::CIEEE754::E_DoublePrecision); CPPUNIT_ASSERT_EQUAL(expected, actual); } { double i(0.123456e10); std::string expected("1.23456e9"); - std::string actual = ml::core::CStringUtils::typeToStringPrecise(i, ml::core::CIEEE754::E_SinglePrecision); + std::string actual = ml::core::CStringUtils::typeToStringPrecise( + i, ml::core::CIEEE754::E_SinglePrecision); CPPUNIT_ASSERT_EQUAL(expected, actual); } { double i(0.123456e10); std::string expected("1234560000"); - std::string actual = ml::core::CStringUtils::typeToStringPrecise(i, ml::core::CIEEE754::E_DoublePrecision); + std::string actual = ml::core::CStringUtils::typeToStringPrecise( + i, ml::core::CIEEE754::E_DoublePrecision); CPPUNIT_ASSERT_EQUAL(expected, actual); } { double i(0.123456e-10); std::string expected("1.23456e-11"); - std::string actual = ml::core::CStringUtils::typeToStringPrecise(i, ml::core::CIEEE754::E_SinglePrecision); + std::string actual = ml::core::CStringUtils::typeToStringPrecise( + i, ml::core::CIEEE754::E_SinglePrecision); CPPUNIT_ASSERT_EQUAL(expected, actual); } { double i(0.123456e-10); std::string expected("1.23456e-11"); - std::string actual = ml::core::CStringUtils::typeToStringPrecise(i, ml::core::CIEEE754::E_DoublePrecision); + std::string actual = ml::core::CStringUtils::typeToStringPrecise( + i, ml::core::CIEEE754::E_DoublePrecision); CPPUNIT_ASSERT_EQUAL(expected, actual); } { double i(0.123456787654321e-10); std::string expected("1.234568e-11"); - std::string actual = ml::core::CStringUtils::typeToStringPrecise(i, ml::core::CIEEE754::E_SinglePrecision); + std::string actual = ml::core::CStringUtils::typeToStringPrecise( + i, ml::core::CIEEE754::E_SinglePrecision); CPPUNIT_ASSERT_EQUAL(expected, actual); } { double i(0.123456787654321e-10); std::string expected("1.23456787654321e-11"); - std::string actual = ml::core::CStringUtils::typeToStringPrecise(i, ml::core::CIEEE754::E_DoublePrecision); + std::string actual = ml::core::CStringUtils::typeToStringPrecise( + i, ml::core::CIEEE754::E_DoublePrecision); CPPUNIT_ASSERT_EQUAL(expected, actual); } { double i(0.00000000012345678765432123456); std::string expected("1.234568e-10"); - std::string actual = ml::core::CStringUtils::typeToStringPrecise(i, ml::core::CIEEE754::E_SinglePrecision); + std::string actual = ml::core::CStringUtils::typeToStringPrecise( + i, ml::core::CIEEE754::E_SinglePrecision); CPPUNIT_ASSERT_EQUAL(expected, actual); } { double i(0.00000000012345678765432123456); std::string expected("1.23456787654321e-10"); - std::string actual = ml::core::CStringUtils::typeToStringPrecise(i, ml::core::CIEEE754::E_DoublePrecision); + std::string actual = ml::core::CStringUtils::typeToStringPrecise( + i, ml::core::CIEEE754::E_DoublePrecision); CPPUNIT_ASSERT_EQUAL(expected, actual); } { double i(123456787654321.23456); std::string expected("1.234568e14"); - std::string actual = ml::core::CStringUtils::typeToStringPrecise(i, ml::core::CIEEE754::E_SinglePrecision); + std::string actual = ml::core::CStringUtils::typeToStringPrecise( + i, ml::core::CIEEE754::E_SinglePrecision); CPPUNIT_ASSERT_EQUAL(expected, actual); } { double i(123456787654321.23456); std::string expected("123456787654321"); - std::string actual = ml::core::CStringUtils::typeToStringPrecise(i, ml::core::CIEEE754::E_DoublePrecision); + std::string actual = ml::core::CStringUtils::typeToStringPrecise( + i, ml::core::CIEEE754::E_DoublePrecision); CPPUNIT_ASSERT_EQUAL(expected, actual); } } @@ -273,10 +297,14 @@ void CStringUtilsTest::testTypeToStringPretty() { LOG_DEBUG(<< "1.0 -> " << ml::core::CStringUtils::typeToStringPretty(1.0)); LOG_DEBUG(<< "0.123456 -> " << ml::core::CStringUtils::typeToStringPretty(0.123456)); LOG_DEBUG(<< "0.123456e10 -> " << ml::core::CStringUtils::typeToStringPretty(0.123456e10)); - LOG_DEBUG(<< "0.123456e-10 -> " << ml::core::CStringUtils::typeToStringPretty(0.123456e-10)); - LOG_DEBUG(<< "0.123456787654321e-10 -> " << ml::core::CStringUtils::typeToStringPretty(0.123456787654321e-10)); - LOG_DEBUG(<< "0.00000000012345678765432123456 -> " << ml::core::CStringUtils::typeToStringPretty(0.00000000012345678765432123456)); - LOG_DEBUG(<< "123456787654321.23456 -> " << ml::core::CStringUtils::typeToStringPretty(123456787654321.23456)); + LOG_DEBUG(<< "0.123456e-10 -> " + << ml::core::CStringUtils::typeToStringPretty(0.123456e-10)); + LOG_DEBUG(<< "0.123456787654321e-10 -> " + << ml::core::CStringUtils::typeToStringPretty(0.123456787654321e-10)); + LOG_DEBUG(<< "0.00000000012345678765432123456 -> " + << ml::core::CStringUtils::typeToStringPretty(0.00000000012345678765432123456)); + LOG_DEBUG(<< "123456787654321.23456 -> " + << ml::core::CStringUtils::typeToStringPretty(123456787654321.23456)); } void CStringUtilsTest::testStringToType() { @@ -467,7 +495,8 @@ void CStringUtilsTest::testTokeniser(const std::string& delim, const std::string LOG_DEBUG(<< str << " DELIM = '" << delim << "' REMAINDER = '" << remainder << "'"); - for (ml::core::CStringUtils::TStrVecItr itr = tokens.begin(); itr != tokens.end(); ++itr) { + for (ml::core::CStringUtils::TStrVecItr itr = tokens.begin(); + itr != tokens.end(); ++itr) { LOG_DEBUG(<< "'" << *itr << "'"); } @@ -567,17 +596,20 @@ void CStringUtilsTest::testJoin() { strVec.push_back(std::string("bbb")); strVec.push_back(std::string("ccc")); - CPPUNIT_ASSERT_EQUAL(std::string("aaa,bbb,ccc"), CStringUtils::join(strVec, std::string(","))); + CPPUNIT_ASSERT_EQUAL(std::string("aaa,bbb,ccc"), + CStringUtils::join(strVec, std::string(","))); LOG_DEBUG(<< "Test delimiter has more than one characters") - CPPUNIT_ASSERT_EQUAL(std::string("aaa::bbb::ccc"), CStringUtils::join(strVec, std::string("::"))); + CPPUNIT_ASSERT_EQUAL(std::string("aaa::bbb::ccc"), + CStringUtils::join(strVec, std::string("::"))); LOG_DEBUG(<< "Test set instead of vector") TStrSet strSet; strSet.insert(std::string("aaa")); strSet.insert(std::string("bbb")); strSet.insert(std::string("ccc")); - CPPUNIT_ASSERT_EQUAL(std::string("aaa,bbb,ccc"), CStringUtils::join(strSet, std::string(","))); + CPPUNIT_ASSERT_EQUAL(std::string("aaa,bbb,ccc"), + CStringUtils::join(strSet, std::string(","))); } void CStringUtilsTest::testLower() { @@ -585,9 +617,11 @@ void CStringUtilsTest::testLower() { CPPUNIT_ASSERT_EQUAL(std::string("hello"), ml::core::CStringUtils::toLower("Hello")); CPPUNIT_ASSERT_EQUAL(std::string("hello"), ml::core::CStringUtils::toLower("HELLO")); - CPPUNIT_ASSERT_EQUAL(std::string("123hello"), ml::core::CStringUtils::toLower("123hello")); + CPPUNIT_ASSERT_EQUAL(std::string("123hello"), + ml::core::CStringUtils::toLower("123hello")); CPPUNIT_ASSERT_EQUAL(std::string("hello "), ml::core::CStringUtils::toLower("Hello ")); - CPPUNIT_ASSERT_EQUAL(std::string("_-+hello"), ml::core::CStringUtils::toLower("_-+HELLO")); + CPPUNIT_ASSERT_EQUAL(std::string("_-+hello"), + ml::core::CStringUtils::toLower("_-+HELLO")); } void CStringUtilsTest::testUpper() { @@ -595,17 +629,21 @@ void CStringUtilsTest::testUpper() { CPPUNIT_ASSERT_EQUAL(std::string("HELLO"), ml::core::CStringUtils::toUpper("Hello")); CPPUNIT_ASSERT_EQUAL(std::string("HELLO"), ml::core::CStringUtils::toUpper("HELLO")); - CPPUNIT_ASSERT_EQUAL(std::string("123HELLO"), ml::core::CStringUtils::toUpper("123hello")); + CPPUNIT_ASSERT_EQUAL(std::string("123HELLO"), + ml::core::CStringUtils::toUpper("123hello")); CPPUNIT_ASSERT_EQUAL(std::string("HELLO "), ml::core::CStringUtils::toUpper("Hello ")); - CPPUNIT_ASSERT_EQUAL(std::string("_-+HELLO"), ml::core::CStringUtils::toUpper("_-+HELLO")); + CPPUNIT_ASSERT_EQUAL(std::string("_-+HELLO"), + ml::core::CStringUtils::toUpper("_-+HELLO")); } void CStringUtilsTest::testNarrowWiden() { std::string hello1("Hello"); std::wstring hello2(L"Hello"); - CPPUNIT_ASSERT_EQUAL(hello1.length(), ml::core::CStringUtils::narrowToWide(hello1).length()); - CPPUNIT_ASSERT_EQUAL(hello2.length(), ml::core::CStringUtils::wideToNarrow(hello2).length()); + CPPUNIT_ASSERT_EQUAL(hello1.length(), + ml::core::CStringUtils::narrowToWide(hello1).length()); + CPPUNIT_ASSERT_EQUAL(hello2.length(), + ml::core::CStringUtils::wideToNarrow(hello2).length()); CPPUNIT_ASSERT(ml::core::CStringUtils::narrowToWide(hello1) == hello2); CPPUNIT_ASSERT(ml::core::CStringUtils::wideToNarrow(hello2) == hello1); @@ -654,7 +692,8 @@ void CStringUtilsTest::testLongestSubstr() { CPPUNIT_ASSERT_EQUAL(std::string(""), common); - LOG_DEBUG(<< "Longest common substring of '" << str1 << "' and '" << str2 << "' is '" << common << "'"); + LOG_DEBUG(<< "Longest common substring of '" << str1 << "' and '" + << str2 << "' is '" << common << "'"); } { std::string str1("Hello world"); @@ -664,7 +703,8 @@ void CStringUtilsTest::testLongestSubstr() { CPPUNIT_ASSERT_EQUAL(std::string(""), common); - LOG_DEBUG(<< "Longest common substring of '" << str1 << "' and '" << str2 << "' is '" << common << "'"); + LOG_DEBUG(<< "Longest common substring of '" << str1 << "' and '" + << str2 << "' is '" << common << "'"); } { std::string str1("Hello world"); @@ -674,7 +714,8 @@ void CStringUtilsTest::testLongestSubstr() { CPPUNIT_ASSERT_EQUAL(std::string("Hello "), common); - LOG_DEBUG(<< "Longest common substring of '" << str1 << "' and '" << str2 << "' is '" << common << "'"); + LOG_DEBUG(<< "Longest common substring of '" << str1 << "' and '" + << str2 << "' is '" << common << "'"); } { std::string str1("Hello world"); @@ -684,7 +725,8 @@ void CStringUtilsTest::testLongestSubstr() { CPPUNIT_ASSERT_EQUAL(std::string("ello"), common); - LOG_DEBUG(<< "Longest common substring of '" << str1 << "' and '" << str2 << "' is '" << common << "'"); + LOG_DEBUG(<< "Longest common substring of '" << str1 << "' and '" + << str2 << "' is '" << common << "'"); } { std::string str1("abc"); @@ -694,7 +736,8 @@ void CStringUtilsTest::testLongestSubstr() { CPPUNIT_ASSERT_EQUAL(std::string(""), common); - LOG_DEBUG(<< "Longest common substring of '" << str1 << "' and '" << str2 << "' is '" << common << "'"); + LOG_DEBUG(<< "Longest common substring of '" << str1 << "' and '" + << str2 << "' is '" << common << "'"); } { std::string str1("abc xyz defgv hij"); @@ -704,7 +747,8 @@ void CStringUtilsTest::testLongestSubstr() { CPPUNIT_ASSERT_EQUAL(std::string(" defg"), common); - LOG_DEBUG(<< "Longest common substring of '" << str1 << "' and '" << str2 << "' is '" << common << "'"); + LOG_DEBUG(<< "Longest common substring of '" << str1 << "' and '" + << str2 << "' is '" << common << "'"); } { std::string str1("Source LOTS on 13080:742 has shut down."); @@ -714,7 +758,8 @@ void CStringUtilsTest::testLongestSubstr() { CPPUNIT_ASSERT_EQUAL(std::string(" has shut down."), common); - LOG_DEBUG(<< "Longest common substring of '" << str1 << "' and '" << str2 << "' is '" << common << "'"); + LOG_DEBUG(<< "Longest common substring of '" << str1 << "' and '" + << str2 << "' is '" << common << "'"); } { std::string str1("No filter list defined for ."); @@ -724,7 +769,8 @@ void CStringUtilsTest::testLongestSubstr() { CPPUNIT_ASSERT_EQUAL(std::string("No filter list defined for "), common); - LOG_DEBUG(<< "Longest common substring of '" << str1 << "' and '" << str2 << "' is '" << common << "'"); + LOG_DEBUG(<< "Longest common substring of '" << str1 << "' and '" + << str2 << "' is '" << common << "'"); } } @@ -737,7 +783,8 @@ void CStringUtilsTest::testLongestSubseq() { CPPUNIT_ASSERT_EQUAL(std::string(""), common); - LOG_DEBUG(<< "Longest common subsequence of '" << str1 << "' and '" << str2 << "' is '" << common << "'"); + LOG_DEBUG(<< "Longest common subsequence of '" << str1 << "' and '" + << str2 << "' is '" << common << "'"); } { std::string str1("Hello world"); @@ -747,7 +794,8 @@ void CStringUtilsTest::testLongestSubseq() { CPPUNIT_ASSERT_EQUAL(std::string(""), common); - LOG_DEBUG(<< "Longest common subsequence of '" << str1 << "' and '" << str2 << "' is '" << common << "'"); + LOG_DEBUG(<< "Longest common subsequence of '" << str1 << "' and '" + << str2 << "' is '" << common << "'"); } { std::string str1("Hello world"); @@ -757,7 +805,8 @@ void CStringUtilsTest::testLongestSubseq() { CPPUNIT_ASSERT_EQUAL(std::string("Hello "), common); - LOG_DEBUG(<< "Longest common subsequence of '" << str1 << "' and '" << str2 << "' is '" << common << "'"); + LOG_DEBUG(<< "Longest common subsequence of '" << str1 << "' and '" + << str2 << "' is '" << common << "'"); } { std::string str1("Hello world"); @@ -767,7 +816,8 @@ void CStringUtilsTest::testLongestSubseq() { CPPUNIT_ASSERT_EQUAL(std::string("ello"), common); - LOG_DEBUG(<< "Longest common subsequence of '" << str1 << "' and '" << str2 << "' is '" << common << "'"); + LOG_DEBUG(<< "Longest common subsequence of '" << str1 << "' and '" + << str2 << "' is '" << common << "'"); } { std::string str1("abc"); @@ -777,7 +827,8 @@ void CStringUtilsTest::testLongestSubseq() { CPPUNIT_ASSERT_EQUAL(std::string(""), common); - LOG_DEBUG(<< "Longest common subsequence of '" << str1 << "' and '" << str2 << "' is '" << common << "'"); + LOG_DEBUG(<< "Longest common subsequence of '" << str1 << "' and '" + << str2 << "' is '" << common << "'"); } { std::string str1("abc xyz defgv hij"); @@ -787,7 +838,8 @@ void CStringUtilsTest::testLongestSubseq() { CPPUNIT_ASSERT_EQUAL(std::string("abc defg hij"), common); - LOG_DEBUG(<< "Longest common subsequence of '" << str1 << "' and '" << str2 << "' is '" << common << "'"); + LOG_DEBUG(<< "Longest common subsequence of '" << str1 << "' and '" + << str2 << "' is '" << common << "'"); } { std::string str1("Source LOTS on 13080:742 has shut down."); @@ -797,7 +849,8 @@ void CStringUtilsTest::testLongestSubseq() { CPPUNIT_ASSERT_EQUAL(std::string("Source T on 13080:2 has shut down."), common); - LOG_DEBUG(<< "Longest common subsequence of '" << str1 << "' and '" << str2 << "' is '" << common << "'"); + LOG_DEBUG(<< "Longest common subsequence of '" << str1 << "' and '" + << str2 << "' is '" << common << "'"); } { std::string str1("No filter list defined for ."); @@ -807,7 +860,8 @@ void CStringUtilsTest::testLongestSubseq() { CPPUNIT_ASSERT_EQUAL(std::string("No filter list defined for ."), common); - LOG_DEBUG(<< "Longest common subsequence of '" << str1 << "' and '" << str2 << "' is '" << common << "'"); + LOG_DEBUG(<< "Longest common subsequence of '" << str1 << "' and '" + << str2 << "' is '" << common << "'"); } } @@ -906,7 +960,8 @@ void CStringUtilsTest::testUtf8ByteType() { } void CStringUtilsTest::testRoundtripMaxDouble() { - ml::core::CIEEE754::EPrecision precisions[] = {ml::core::CIEEE754::E_SinglePrecision, ml::core::CIEEE754::E_DoublePrecision}; + ml::core::CIEEE754::EPrecision precisions[] = { + ml::core::CIEEE754::E_SinglePrecision, ml::core::CIEEE754::E_DoublePrecision}; double tolerances[] = {5e-7, 5e-15}; for (std::size_t i = 0u; i < boost::size(precisions); ++i) { double max = std::numeric_limits::max(); diff --git a/lib/core/unittest/CThreadFarmTest.cc b/lib/core/unittest/CThreadFarmTest.cc index e674c6776d..0f3eddd4aa 100644 --- a/lib/core/unittest/CThreadFarmTest.cc +++ b/lib/core/unittest/CThreadFarmTest.cc @@ -19,8 +19,10 @@ CppUnit::Test* CThreadFarmTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CThreadFarmTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CThreadFarmTest::testNumCpus", &CThreadFarmTest::testNumCpus)); - suiteOfTests->addTest(new CppUnit::TestCaller("CThreadFarmTest::testSendReceive", &CThreadFarmTest::testSendReceive)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CThreadFarmTest::testNumCpus", &CThreadFarmTest::testNumCpus)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CThreadFarmTest::testSendReceive", &CThreadFarmTest::testSendReceive)); return suiteOfTests; } @@ -59,7 +61,8 @@ class CString { class CHandler { public: void processResult(const CString& result) { - LOG_DEBUG(<< "Process result " << result.str() << " in thread " << ml::core::CThread::currentThreadId()); + LOG_DEBUG(<< "Process result " << result.str() << " in thread " + << ml::core::CThread::currentThreadId()); ml::core::CScopedLock lock(m_Mutex); m_OutstandingOutput.erase(result.str()); diff --git a/lib/core/unittest/CThreadMutexConditionTest.cc b/lib/core/unittest/CThreadMutexConditionTest.cc index dab30d220d..98124388c8 100644 --- a/lib/core/unittest/CThreadMutexConditionTest.cc +++ b/lib/core/unittest/CThreadMutexConditionTest.cc @@ -14,10 +14,11 @@ CppUnit::Test* CThreadMutexConditionTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CThreadMutexConditionTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CThreadMutexConditionTest::testThread", - &CThreadMutexConditionTest::testThread)); - suiteOfTests->addTest(new CppUnit::TestCaller("CThreadMutexConditionTest::testThreadCondition", - &CThreadMutexConditionTest::testThreadCondition)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CThreadMutexConditionTest::testThread", &CThreadMutexConditionTest::testThread)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CThreadMutexConditionTest::testThreadCondition", + &CThreadMutexConditionTest::testThreadCondition)); return suiteOfTests; } diff --git a/lib/core/unittest/CThreadPoolTest.cc b/lib/core/unittest/CThreadPoolTest.cc index 5103d8d913..e678e79a8b 100644 --- a/lib/core/unittest/CThreadPoolTest.cc +++ b/lib/core/unittest/CThreadPoolTest.cc @@ -12,7 +12,8 @@ CppUnit::Test* CThreadPoolTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CThreadPoolTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CThreadPoolTest::testPool", &CThreadPoolTest::testPool)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CThreadPoolTest::testPool", &CThreadPoolTest::testPool)); return suiteOfTests; } diff --git a/lib/core/unittest/CTickerTest.cc b/lib/core/unittest/CTickerTest.cc index 932abe1106..617e0b09cd 100644 --- a/lib/core/unittest/CTickerTest.cc +++ b/lib/core/unittest/CTickerTest.cc @@ -12,7 +12,8 @@ CppUnit::Test* CTickerTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CTickerTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CTickerTest::testTicker", &CTickerTest::testTicker)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTickerTest::testTicker", &CTickerTest::testTicker)); return suiteOfTests; } diff --git a/lib/core/unittest/CTimeUtilsTest.cc b/lib/core/unittest/CTimeUtilsTest.cc index d3a20d7634..f9b444237a 100644 --- a/lib/core/unittest/CTimeUtilsTest.cc +++ b/lib/core/unittest/CTimeUtilsTest.cc @@ -16,13 +16,20 @@ CppUnit::Test* CTimeUtilsTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CTimeUtilsTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CTimeUtilsTest::testNow", &CTimeUtilsTest::testNow)); - suiteOfTests->addTest(new CppUnit::TestCaller("CTimeUtilsTest::testToIso8601", &CTimeUtilsTest::testToIso8601)); - suiteOfTests->addTest(new CppUnit::TestCaller("CTimeUtilsTest::testToLocal", &CTimeUtilsTest::testToLocal)); - suiteOfTests->addTest(new CppUnit::TestCaller("CTimeUtilsTest::testToEpochMs", &CTimeUtilsTest::testToEpochMs)); - suiteOfTests->addTest(new CppUnit::TestCaller("CTimeUtilsTest::testStrptime", &CTimeUtilsTest::testStrptime)); - suiteOfTests->addTest(new CppUnit::TestCaller("CTimeUtilsTest::testTimezone", &CTimeUtilsTest::testTimezone)); - suiteOfTests->addTest(new CppUnit::TestCaller("CTimeUtilsTest::testDateWords", &CTimeUtilsTest::testDateWords)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTimeUtilsTest::testNow", &CTimeUtilsTest::testNow)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTimeUtilsTest::testToIso8601", &CTimeUtilsTest::testToIso8601)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTimeUtilsTest::testToLocal", &CTimeUtilsTest::testToLocal)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTimeUtilsTest::testToEpochMs", &CTimeUtilsTest::testToEpochMs)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTimeUtilsTest::testStrptime", &CTimeUtilsTest::testStrptime)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTimeUtilsTest::testTimezone", &CTimeUtilsTest::testTimezone)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTimeUtilsTest::testDateWords", &CTimeUtilsTest::testDateWords)); return suiteOfTests; } @@ -90,10 +97,14 @@ void CTimeUtilsTest::testToLocal() { } void CTimeUtilsTest::testToEpochMs() { - CPPUNIT_ASSERT_EQUAL(int64_t(1000), ml::core::CTimeUtils::toEpochMs(ml::core_t::TTime(1))); - CPPUNIT_ASSERT_EQUAL(int64_t(-1000), ml::core::CTimeUtils::toEpochMs(ml::core_t::TTime(-1))); - CPPUNIT_ASSERT_EQUAL(int64_t(1521035866000), ml::core::CTimeUtils::toEpochMs(ml::core_t::TTime(1521035866))); - CPPUNIT_ASSERT_EQUAL(int64_t(-1521035866000), ml::core::CTimeUtils::toEpochMs(ml::core_t::TTime(-1521035866))); + CPPUNIT_ASSERT_EQUAL(int64_t(1000), + ml::core::CTimeUtils::toEpochMs(ml::core_t::TTime(1))); + CPPUNIT_ASSERT_EQUAL(int64_t(-1000), + ml::core::CTimeUtils::toEpochMs(ml::core_t::TTime(-1))); + CPPUNIT_ASSERT_EQUAL(int64_t(1521035866000), + ml::core::CTimeUtils::toEpochMs(ml::core_t::TTime(1521035866))); + CPPUNIT_ASSERT_EQUAL(int64_t(-1521035866000), + ml::core::CTimeUtils::toEpochMs(ml::core_t::TTime(-1521035866))); } void CTimeUtilsTest::testStrptime() { @@ -229,7 +240,8 @@ void CTimeUtilsTest::testStrptime() { << ml::core::CCTimeR::cTimeR(&actual, buf)); // Allow small tolerance in case of clock discrepancies between machines - CPPUNIT_ASSERT(actual <= ml::core::CTimeUtils::now() + ml::core::CTimeUtils::MAX_CLOCK_DISCREPANCY); + CPPUNIT_ASSERT(actual <= ml::core::CTimeUtils::now() + + ml::core::CTimeUtils::MAX_CLOCK_DISCREPANCY); } { // Test what happens when no year is given @@ -250,7 +262,8 @@ void CTimeUtilsTest::testStrptime() { << ml::core::CCTimeR::cTimeR(&actual, buf)); // Allow small tolerance in case of clock discrepancies between machines - CPPUNIT_ASSERT(actual <= ml::core::CTimeUtils::now() + ml::core::CTimeUtils::MAX_CLOCK_DISCREPANCY); + CPPUNIT_ASSERT(actual <= ml::core::CTimeUtils::now() + + ml::core::CTimeUtils::MAX_CLOCK_DISCREPANCY); } { // Test what happens when no year is given @@ -271,7 +284,8 @@ void CTimeUtilsTest::testStrptime() { << ml::core::CCTimeR::cTimeR(&actual, buf)); // Allow small tolerance in case of clock discrepancies between machines - CPPUNIT_ASSERT(actual <= ml::core::CTimeUtils::now() + ml::core::CTimeUtils::MAX_CLOCK_DISCREPANCY); + CPPUNIT_ASSERT(actual <= ml::core::CTimeUtils::now() + + ml::core::CTimeUtils::MAX_CLOCK_DISCREPANCY); } } @@ -311,7 +325,8 @@ void CTimeUtilsTest::testTimezone() { CPPUNIT_ASSERT(ml::core::CTimeUtils::strptime(formatExplicit, dateTimeUtc, actual)); CPPUNIT_ASSERT_EQUAL(utcExpected, actual); - CPPUNIT_ASSERT(ml::core::CTimeUtils::strptime(formatExplicit, dateTimeTwoHoursBehindUtc, actual)); + CPPUNIT_ASSERT(ml::core::CTimeUtils::strptime( + formatExplicit, dateTimeTwoHoursBehindUtc, actual)); CPPUNIT_ASSERT_EQUAL(twoHoursBehindUtc, actual); } @@ -330,7 +345,8 @@ void CTimeUtilsTest::testTimezone() { CPPUNIT_ASSERT(ml::core::CTimeUtils::strptime(formatExplicit, dateTimeUtc, actual)); CPPUNIT_ASSERT_EQUAL(utcExpected, actual); - CPPUNIT_ASSERT(ml::core::CTimeUtils::strptime(formatExplicit, dateTimeTwoHoursBehindUtc, actual)); + CPPUNIT_ASSERT(ml::core::CTimeUtils::strptime( + formatExplicit, dateTimeTwoHoursBehindUtc, actual)); CPPUNIT_ASSERT_EQUAL(twoHoursBehindUtc, actual); } @@ -347,7 +363,8 @@ void CTimeUtilsTest::testTimezone() { CPPUNIT_ASSERT(ml::core::CTimeUtils::strptime(formatExplicit, dateTimeUtc, actual)); CPPUNIT_ASSERT_EQUAL(utcExpected, actual); - CPPUNIT_ASSERT(ml::core::CTimeUtils::strptime(formatExplicit, dateTimeTwoHoursBehindUtc, actual)); + CPPUNIT_ASSERT(ml::core::CTimeUtils::strptime( + formatExplicit, dateTimeTwoHoursBehindUtc, actual)); CPPUNIT_ASSERT_EQUAL(twoHoursBehindUtc, actual); } @@ -359,7 +376,8 @@ void CTimeUtilsTest::testTimezone() { // Northern Territory first CPPUNIT_ASSERT(ml::core::CTimezone::setTimezone("Australia/Darwin")); { - ml::core_t::TTime expected(utcExpected - static_cast(9.5 * SECONDS_PER_HOUR)); + ml::core_t::TTime expected( + utcExpected - static_cast(9.5 * SECONDS_PER_HOUR)); ml::core_t::TTime actual(0); CPPUNIT_ASSERT(ml::core::CTimeUtils::strptime(format, dateTime, actual)); @@ -368,7 +386,8 @@ void CTimeUtilsTest::testTimezone() { CPPUNIT_ASSERT(ml::core::CTimeUtils::strptime(formatExplicit, dateTimeUtc, actual)); CPPUNIT_ASSERT_EQUAL(utcExpected, actual); - CPPUNIT_ASSERT(ml::core::CTimeUtils::strptime(formatExplicit, dateTimeTwoHoursBehindUtc, actual)); + CPPUNIT_ASSERT(ml::core::CTimeUtils::strptime( + formatExplicit, dateTimeTwoHoursBehindUtc, actual)); CPPUNIT_ASSERT_EQUAL(twoHoursBehindUtc, actual); } @@ -376,7 +395,8 @@ void CTimeUtilsTest::testTimezone() { // so daylight saving is in force CPPUNIT_ASSERT(ml::core::CTimezone::setTimezone("Australia/Adelaide")); { - ml::core_t::TTime expected(utcExpected - static_cast(10.5 * SECONDS_PER_HOUR)); + ml::core_t::TTime expected( + utcExpected - static_cast(10.5 * SECONDS_PER_HOUR)); ml::core_t::TTime actual(0); CPPUNIT_ASSERT(ml::core::CTimeUtils::strptime(format, dateTime, actual)); @@ -385,7 +405,8 @@ void CTimeUtilsTest::testTimezone() { CPPUNIT_ASSERT(ml::core::CTimeUtils::strptime(formatExplicit, dateTimeUtc, actual)); CPPUNIT_ASSERT_EQUAL(utcExpected, actual); - CPPUNIT_ASSERT(ml::core::CTimeUtils::strptime(formatExplicit, dateTimeTwoHoursBehindUtc, actual)); + CPPUNIT_ASSERT(ml::core::CTimeUtils::strptime( + formatExplicit, dateTimeTwoHoursBehindUtc, actual)); CPPUNIT_ASSERT_EQUAL(twoHoursBehindUtc, actual); } diff --git a/lib/core/unittest/CTripleTest.cc b/lib/core/unittest/CTripleTest.cc index 535e850342..c4dd9c11d3 100644 --- a/lib/core/unittest/CTripleTest.cc +++ b/lib/core/unittest/CTripleTest.cc @@ -15,8 +15,10 @@ CppUnit::Test* CTripleTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CTripleTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CTripleTest::testOperators", &CTripleTest::testOperators)); - suiteOfTests->addTest(new CppUnit::TestCaller("CTripleTest::testBoostHashReady", &CTripleTest::testBoostHashReady)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTripleTest::testOperators", &CTripleTest::testOperators)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTripleTest::testBoostHashReady", &CTripleTest::testBoostHashReady)); return suiteOfTests; } @@ -59,13 +61,19 @@ void CTripleTest::testOperators() { void CTripleTest::testBoostHashReady() { using TStringSizeShortTriple = ml::core::CTriple; - using TStringSizeShortTripleSizeMap = boost::unordered_map; + using TStringSizeShortTripleSizeMap = + boost::unordered_map; TStringSizeShortTripleSizeMap map; map.emplace(ml::core::make_triple(std::string("foo"), std::size_t(10), short(3)), 1); map.emplace(ml::core::make_triple(std::string("bar"), std::size_t(20), short(4)), 2); - CPPUNIT_ASSERT_EQUAL(std::size_t(1), map[ml::core::make_triple(std::string("foo"), std::size_t(10), short(3))]); - CPPUNIT_ASSERT_EQUAL(std::size_t(2), map[ml::core::make_triple(std::string("bar"), std::size_t(20), short(4))]); - CPPUNIT_ASSERT(map.find(ml::core::make_triple(std::string("bar"), std::size_t(20), short(8))) == map.end()); + CPPUNIT_ASSERT_EQUAL( + std::size_t(1), + map[ml::core::make_triple(std::string("foo"), std::size_t(10), short(3))]); + CPPUNIT_ASSERT_EQUAL( + std::size_t(2), + map[ml::core::make_triple(std::string("bar"), std::size_t(20), short(4))]); + CPPUNIT_ASSERT(map.find(ml::core::make_triple(std::string("bar"), std::size_t(20), + short(8))) == map.end()); } diff --git a/lib/core/unittest/CUnameTest.cc b/lib/core/unittest/CUnameTest.cc index 0733e9f3f5..5499c5d223 100644 --- a/lib/core/unittest/CUnameTest.cc +++ b/lib/core/unittest/CUnameTest.cc @@ -11,7 +11,8 @@ CppUnit::Test* CUnameTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CUnameTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CUnameTest::testUname", &CUnameTest::testUname)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CUnameTest::testUname", &CUnameTest::testUname)); return suiteOfTests; } diff --git a/lib/core/unittest/CVectorRangeTest.cc b/lib/core/unittest/CVectorRangeTest.cc index 2b78ed2fde..d19f10cb3f 100644 --- a/lib/core/unittest/CVectorRangeTest.cc +++ b/lib/core/unittest/CVectorRangeTest.cc @@ -27,16 +27,20 @@ void CVectorRangeTest::testCreation() { TDoubleRng range13{values1, 1, 3}; range13 = core::make_range(values1, 0, 3); - CPPUNIT_ASSERT_EQUAL(std::string("[1, 1, 0.1, 0.7, 9.8]"), core::CContainerPrinter::print(values1)); + CPPUNIT_ASSERT_EQUAL(std::string("[1, 1, 0.1, 0.7, 9.8]"), + core::CContainerPrinter::print(values1)); range13 = core::make_range(values2, 1, 4); - CPPUNIT_ASSERT_EQUAL(std::string("[1, 1.4, 5.7, 1.2, 9.8]"), core::CContainerPrinter::print(values1)); + CPPUNIT_ASSERT_EQUAL(std::string("[1, 1.4, 5.7, 1.2, 9.8]"), + core::CContainerPrinter::print(values1)); range13.assign(2, 2.0); - CPPUNIT_ASSERT_EQUAL(std::string("[1, 2, 2, 9.8]"), core::CContainerPrinter::print(values1)); + CPPUNIT_ASSERT_EQUAL(std::string("[1, 2, 2, 9.8]"), + core::CContainerPrinter::print(values1)); range13.assign(values2.begin(), values2.end()); - CPPUNIT_ASSERT_EQUAL(std::string("[1, 3.1, 1.4, 5.7, 1.2, 9.8]"), core::CContainerPrinter::print(values1)); + CPPUNIT_ASSERT_EQUAL(std::string("[1, 3.1, 1.4, 5.7, 1.2, 9.8]"), + core::CContainerPrinter::print(values1)); } { TDoubleVec values1{1.0, 0.1, 0.7, 9.8}; @@ -44,8 +48,10 @@ void CVectorRangeTest::testCreation() { TDoubleCRng range1{values1, 1, 3}; range1 = TDoubleCRng(values2, 0, 3); - CPPUNIT_ASSERT_EQUAL(std::string("[1, 0.1, 0.7, 9.8]"), core::CContainerPrinter::print(values1)); - CPPUNIT_ASSERT_EQUAL(std::string("[3.1, 1.4, 5.7]"), core::CContainerPrinter::print(range1)); + CPPUNIT_ASSERT_EQUAL(std::string("[1, 0.1, 0.7, 9.8]"), + core::CContainerPrinter::print(values1)); + CPPUNIT_ASSERT_EQUAL(std::string("[3.1, 1.4, 5.7]"), + core::CContainerPrinter::print(range1)); } } @@ -60,11 +66,13 @@ void CVectorRangeTest::testAccessors() { CPPUNIT_ASSERT_EQUAL(0.1, range14.at(0)); CPPUNIT_ASSERT_EQUAL(0.7, range14.at(1)); CPPUNIT_ASSERT_EQUAL(9.8, range14.at(2)); - CPPUNIT_ASSERT_THROW_MESSAGE(std::string("out of range: 3 >= 3"), range14.at(3), std::out_of_range); + CPPUNIT_ASSERT_THROW_MESSAGE(std::string("out of range: 3 >= 3"), + range14.at(3), std::out_of_range); CPPUNIT_ASSERT_EQUAL(0.1, crange14.at(0)); CPPUNIT_ASSERT_EQUAL(0.7, crange14.at(1)); CPPUNIT_ASSERT_EQUAL(9.8, crange14.at(2)); - CPPUNIT_ASSERT_THROW_MESSAGE(std::string("out of range: 4 >= 3"), crange14.at(4), std::out_of_range); + CPPUNIT_ASSERT_THROW_MESSAGE(std::string("out of range: 4 >= 3"), + crange14.at(4), std::out_of_range); CPPUNIT_ASSERT_EQUAL(0.1, range14[0]); CPPUNIT_ASSERT_EQUAL(0.7, range14[1]); @@ -93,12 +101,14 @@ void CVectorRangeTest::testIterators() { } CPPUNIT_ASSERT_EQUAL(std::ptrdiff_t(3), range14.end() - range14.begin()); - for (auto i = range14.cbegin(), j = values.cbegin() + 1; i != range14.cend(); ++i, ++j) { + for (auto i = range14.cbegin(), j = values.cbegin() + 1; + i != range14.cend(); ++i, ++j) { CPPUNIT_ASSERT_EQUAL(*j, *i); } CPPUNIT_ASSERT_EQUAL(std::ptrdiff_t(3), range14.end() - range14.begin()); - for (auto i = crange14.begin(), j = values.cbegin() + 1; i != crange14.end(); ++i, ++j) { + for (auto i = crange14.begin(), j = values.cbegin() + 1; + i != crange14.end(); ++i, ++j) { CPPUNIT_ASSERT_EQUAL(*j, *i); } CPPUNIT_ASSERT_EQUAL(std::ptrdiff_t(3), crange14.end() - crange14.begin()); @@ -143,41 +153,57 @@ void CVectorRangeTest::testModifiers() { CPPUNIT_ASSERT_EQUAL(std::size_t(2), range111.size()); range111.clear(); CPPUNIT_ASSERT(range111.empty()); - CPPUNIT_ASSERT_EQUAL(std::string("[1, 0.1, 0.7, 9.8, 8]"), core::CContainerPrinter::print(values1)); + CPPUNIT_ASSERT_EQUAL(std::string("[1, 0.1, 0.7, 9.8, 8]"), + core::CContainerPrinter::print(values1)); TDoubleRng range125{values1, 2, 5}; range125.insert(range125.begin(), values2.begin(), values2.end()); - CPPUNIT_ASSERT_EQUAL(std::string("[1, 0.1, 2, 3.5, 8.1, 1.8, 0.7, 9.8, 8]"), core::CContainerPrinter::print(values1)); - CPPUNIT_ASSERT_EQUAL(std::string("[2, 3.5, 8.1, 1.8, 0.7, 9.8, 8]"), core::CContainerPrinter::print(range125)); + CPPUNIT_ASSERT_EQUAL(std::string("[1, 0.1, 2, 3.5, 8.1, 1.8, 0.7, 9.8, 8]"), + core::CContainerPrinter::print(values1)); + CPPUNIT_ASSERT_EQUAL(std::string("[2, 3.5, 8.1, 1.8, 0.7, 9.8, 8]"), + core::CContainerPrinter::print(range125)); CPPUNIT_ASSERT_EQUAL(std::size_t(7), range125.size()); range125.erase(range125.begin(), range125.begin() + 4); - CPPUNIT_ASSERT_EQUAL(std::string("[1, 0.1, 0.7, 9.8, 8]"), core::CContainerPrinter::print(values1)); - CPPUNIT_ASSERT_EQUAL(std::string("[0.7, 9.8, 8]"), core::CContainerPrinter::print(range125)); + CPPUNIT_ASSERT_EQUAL(std::string("[1, 0.1, 0.7, 9.8, 8]"), + core::CContainerPrinter::print(values1)); + CPPUNIT_ASSERT_EQUAL(std::string("[0.7, 9.8, 8]"), + core::CContainerPrinter::print(range125)); TDoubleRng range203{values2, 0, 3}; range203.push_back(5.0); - CPPUNIT_ASSERT_EQUAL(std::string("[2, 3.5, 8.1, 5, 1.8]"), core::CContainerPrinter::print(values2)); - CPPUNIT_ASSERT_EQUAL(std::string("[2, 3.5, 8.1, 5]"), core::CContainerPrinter::print(range203)); + CPPUNIT_ASSERT_EQUAL(std::string("[2, 3.5, 8.1, 5, 1.8]"), + core::CContainerPrinter::print(values2)); + CPPUNIT_ASSERT_EQUAL(std::string("[2, 3.5, 8.1, 5]"), + core::CContainerPrinter::print(range203)); CPPUNIT_ASSERT_EQUAL(std::size_t(4), range203.size()); range203.push_back(3.2); - CPPUNIT_ASSERT_EQUAL(std::string("[2, 3.5, 8.1, 5, 3.2, 1.8]"), core::CContainerPrinter::print(values2)); - CPPUNIT_ASSERT_EQUAL(std::string("[2, 3.5, 8.1, 5, 3.2]"), core::CContainerPrinter::print(range203)); + CPPUNIT_ASSERT_EQUAL(std::string("[2, 3.5, 8.1, 5, 3.2, 1.8]"), + core::CContainerPrinter::print(values2)); + CPPUNIT_ASSERT_EQUAL(std::string("[2, 3.5, 8.1, 5, 3.2]"), + core::CContainerPrinter::print(range203)); CPPUNIT_ASSERT_EQUAL(std::size_t(5), range203.size()); range203.pop_back(); - CPPUNIT_ASSERT_EQUAL(std::string("[2, 3.5, 8.1, 5, 1.8]"), core::CContainerPrinter::print(values2)); - CPPUNIT_ASSERT_EQUAL(std::string("[2, 3.5, 8.1, 5]"), core::CContainerPrinter::print(range203)); + CPPUNIT_ASSERT_EQUAL(std::string("[2, 3.5, 8.1, 5, 1.8]"), + core::CContainerPrinter::print(values2)); + CPPUNIT_ASSERT_EQUAL(std::string("[2, 3.5, 8.1, 5]"), + core::CContainerPrinter::print(range203)); CPPUNIT_ASSERT_EQUAL(std::size_t(4), range203.size()); range203.pop_back(); - CPPUNIT_ASSERT_EQUAL(std::string("[2, 3.5, 8.1, 1.8]"), core::CContainerPrinter::print(values2)); - CPPUNIT_ASSERT_EQUAL(std::string("[2, 3.5, 8.1]"), core::CContainerPrinter::print(range203)); + CPPUNIT_ASSERT_EQUAL(std::string("[2, 3.5, 8.1, 1.8]"), + core::CContainerPrinter::print(values2)); + CPPUNIT_ASSERT_EQUAL(std::string("[2, 3.5, 8.1]"), + core::CContainerPrinter::print(range203)); CPPUNIT_ASSERT_EQUAL(std::size_t(3), range203.size()); TDoubleRng range102{values1, 0, 2}; range102.resize(3, 5.0); - CPPUNIT_ASSERT_EQUAL(std::string("[1, 0.1, 5, 0.7, 9.8, 8]"), core::CContainerPrinter::print(values1)); - CPPUNIT_ASSERT_EQUAL(std::string("[1, 0.1, 5]"), core::CContainerPrinter::print(range102)); + CPPUNIT_ASSERT_EQUAL(std::string("[1, 0.1, 5, 0.7, 9.8, 8]"), + core::CContainerPrinter::print(values1)); + CPPUNIT_ASSERT_EQUAL(std::string("[1, 0.1, 5]"), + core::CContainerPrinter::print(range102)); range102.resize(1); - CPPUNIT_ASSERT_EQUAL(std::string("[1, 0.7, 9.8, 8]"), core::CContainerPrinter::print(values1)); + CPPUNIT_ASSERT_EQUAL(std::string("[1, 0.7, 9.8, 8]"), + core::CContainerPrinter::print(values1)); CPPUNIT_ASSERT_EQUAL(std::string("[1]"), core::CContainerPrinter::print(range102)); TDoubleRng range113{values1, 1, 3}; @@ -232,13 +258,18 @@ void CVectorRangeTest::testComparisons() { CppUnit::Test* CVectorRangeTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CVectorRangeTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CVectorRangeTest::testCreation", &CVectorRangeTest::testCreation)); - suiteOfTests->addTest(new CppUnit::TestCaller("CVectorRangeTest::testAccessors", &CVectorRangeTest::testAccessors)); - suiteOfTests->addTest(new CppUnit::TestCaller("CVectorRangeTest::testIterators", &CVectorRangeTest::testIterators)); - suiteOfTests->addTest(new CppUnit::TestCaller("CVectorRangeTest::testSizing", &CVectorRangeTest::testSizing)); - suiteOfTests->addTest(new CppUnit::TestCaller("CVectorRangeTest::testModifiers", &CVectorRangeTest::testModifiers)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CVectorRangeTest::testComparisons", &CVectorRangeTest::testComparisons)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CVectorRangeTest::testCreation", &CVectorRangeTest::testCreation)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CVectorRangeTest::testAccessors", &CVectorRangeTest::testAccessors)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CVectorRangeTest::testIterators", &CVectorRangeTest::testIterators)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CVectorRangeTest::testSizing", &CVectorRangeTest::testSizing)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CVectorRangeTest::testModifiers", &CVectorRangeTest::testModifiers)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CVectorRangeTest::testComparisons", &CVectorRangeTest::testComparisons)); return suiteOfTests; } diff --git a/lib/core/unittest/CWindowsErrorTest.cc b/lib/core/unittest/CWindowsErrorTest.cc index a3d050d3af..867f5e2735 100644 --- a/lib/core/unittest/CWindowsErrorTest.cc +++ b/lib/core/unittest/CWindowsErrorTest.cc @@ -11,7 +11,8 @@ CppUnit::Test* CWindowsErrorTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CWindowsErrorTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CWindowsErrorTest::testErrors", &CWindowsErrorTest::testErrors)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CWindowsErrorTest::testErrors", &CWindowsErrorTest::testErrors)); return suiteOfTests; } diff --git a/lib/core/unittest/CWordDictionaryTest.cc b/lib/core/unittest/CWordDictionaryTest.cc index b990dff417..5003d661fb 100644 --- a/lib/core/unittest/CWordDictionaryTest.cc +++ b/lib/core/unittest/CWordDictionaryTest.cc @@ -12,14 +12,14 @@ CppUnit::Test* CWordDictionaryTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CWordDictionaryTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CWordDictionaryTest::testLookups", &CWordDictionaryTest::testLookups)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CWordDictionaryTest::testPartOfSpeech", &CWordDictionaryTest::testPartOfSpeech)); - suiteOfTests->addTest(new CppUnit::TestCaller("CWordDictionaryTest::testWeightingFunctors", - &CWordDictionaryTest::testWeightingFunctors)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CWordDictionaryTest::testPerformance", &CWordDictionaryTest::testPerformance)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CWordDictionaryTest::testLookups", &CWordDictionaryTest::testLookups)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CWordDictionaryTest::testPartOfSpeech", &CWordDictionaryTest::testPartOfSpeech)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CWordDictionaryTest::testWeightingFunctors", &CWordDictionaryTest::testWeightingFunctors)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CWordDictionaryTest::testPerformance", &CWordDictionaryTest::testPerformance)); return suiteOfTests; } @@ -44,19 +44,25 @@ void CWordDictionaryTest::testLookups() { void CWordDictionaryTest::testPartOfSpeech() { const ml::core::CWordDictionary& dict = ml::core::CWordDictionary::instance(); - CPPUNIT_ASSERT_EQUAL(ml::core::CWordDictionary::E_NotInDictionary, dict.partOfSpeech("ajksdf")); - CPPUNIT_ASSERT_EQUAL(ml::core::CWordDictionary::E_UnknownPart, dict.partOfSpeech("callback")); + CPPUNIT_ASSERT_EQUAL(ml::core::CWordDictionary::E_NotInDictionary, + dict.partOfSpeech("ajksdf")); + CPPUNIT_ASSERT_EQUAL(ml::core::CWordDictionary::E_UnknownPart, + dict.partOfSpeech("callback")); CPPUNIT_ASSERT_EQUAL(ml::core::CWordDictionary::E_Noun, dict.partOfSpeech("House")); CPPUNIT_ASSERT_EQUAL(ml::core::CWordDictionary::E_Plural, dict.partOfSpeech("Houses")); CPPUNIT_ASSERT_EQUAL(ml::core::CWordDictionary::E_Verb, dict.partOfSpeech("COMPLETED")); CPPUNIT_ASSERT_EQUAL(ml::core::CWordDictionary::E_Adjective, dict.partOfSpeech("heavy")); CPPUNIT_ASSERT_EQUAL(ml::core::CWordDictionary::E_Adverb, dict.partOfSpeech("slowly")); CPPUNIT_ASSERT_EQUAL(ml::core::CWordDictionary::E_Conjunction, dict.partOfSpeech("AND")); - CPPUNIT_ASSERT_EQUAL(ml::core::CWordDictionary::E_Preposition, dict.partOfSpeech("without")); - CPPUNIT_ASSERT_EQUAL(ml::core::CWordDictionary::E_Interjection, dict.partOfSpeech("gosh")); + CPPUNIT_ASSERT_EQUAL(ml::core::CWordDictionary::E_Preposition, + dict.partOfSpeech("without")); + CPPUNIT_ASSERT_EQUAL(ml::core::CWordDictionary::E_Interjection, + dict.partOfSpeech("gosh")); CPPUNIT_ASSERT_EQUAL(ml::core::CWordDictionary::E_Pronoun, dict.partOfSpeech("hers")); - CPPUNIT_ASSERT_EQUAL(ml::core::CWordDictionary::E_DefiniteArticle, dict.partOfSpeech("the")); - CPPUNIT_ASSERT_EQUAL(ml::core::CWordDictionary::E_IndefiniteArticle, dict.partOfSpeech("a")); + CPPUNIT_ASSERT_EQUAL(ml::core::CWordDictionary::E_DefiniteArticle, + dict.partOfSpeech("the")); + CPPUNIT_ASSERT_EQUAL(ml::core::CWordDictionary::E_IndefiniteArticle, + dict.partOfSpeech("a")); } void CWordDictionaryTest::testWeightingFunctors() { @@ -100,7 +106,8 @@ void CWordDictionaryTest::testPerformance() { const ml::core::CWordDictionary& dict = ml::core::CWordDictionary::instance(); ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Starting word dictionary throughput test at " << ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO(<< "Starting word dictionary throughput test at " + << ml::core::CTimeUtils::toTimeString(start)); static const size_t TEST_SIZE(100000); for (size_t count = 0; count < TEST_SIZE; ++count) { @@ -119,7 +126,8 @@ void CWordDictionaryTest::testPerformance() { } ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Finished word dictionary throughput test at " << ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO(<< "Finished word dictionary throughput test at " + << ml::core::CTimeUtils::toTimeString(end)); LOG_INFO(<< "Word dictionary throughput test took " << (end - start) << " seconds"); } diff --git a/lib/core/unittest/CWordExtractorTest.cc b/lib/core/unittest/CWordExtractorTest.cc index 33748d82c7..e20fcbfd8b 100644 --- a/lib/core/unittest/CWordExtractorTest.cc +++ b/lib/core/unittest/CWordExtractorTest.cc @@ -11,10 +11,10 @@ CppUnit::Test* CWordExtractorTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CWordExtractorTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CWordExtractorTest::testWordExtract", &CWordExtractorTest::testWordExtract)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CWordExtractorTest::testMinConsecutive", &CWordExtractorTest::testMinConsecutive)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CWordExtractorTest::testWordExtract", &CWordExtractorTest::testWordExtract)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CWordExtractorTest::testMinConsecutive", &CWordExtractorTest::testMinConsecutive)); return suiteOfTests; } @@ -32,8 +32,9 @@ void CWordExtractorTest::testWordExtract() { CPPUNIT_ASSERT_EQUAL(std::string("Failed to Rollback"), words); } { - std::string message("2017-01-25 14:20:49,646 INFO [co.elastic.settlement.synchronization.errors.NonFXInstructionSyncImpl] Found " - "corresponding outgoingPaymentFlow :: OutGoingPaymentFlow.id = 7480"); + std::string message( + "2017-01-25 14:20:49,646 INFO [co.elastic.settlement.synchronization.errors.NonFXInstructionSyncImpl] Found " + "corresponding outgoingPaymentFlow :: OutGoingPaymentFlow.id = 7480"); std::string words; ml::core::CWordExtractor::extractWordsFromMessage(message, words); @@ -52,7 +53,8 @@ void CWordExtractorTest::testWordExtract() { LOG_DEBUG(<< "Message: " << message); LOG_DEBUG(<< "Words: " << words); - CPPUNIT_ASSERT_EQUAL(std::string("which is more than the configured time of seconds Stack trace"), words); + CPPUNIT_ASSERT_EQUAL(std::string("which is more than the configured time of seconds Stack trace"), + words); } } @@ -95,7 +97,8 @@ void CWordExtractorTest::testMinConsecutive() { LOG_DEBUG(<< "Message: " << message); LOG_DEBUG(<< "Words: " << words); - CPPUNIT_ASSERT_EQUAL(std::string("which is more than the configured time seconds Stack trace"), words); + CPPUNIT_ASSERT_EQUAL(std::string("which is more than the configured time seconds Stack trace"), + words); ml::core::CWordExtractor::extractWordsFromMessage(3, message, words); @@ -103,7 +106,8 @@ void CWordExtractorTest::testMinConsecutive() { LOG_DEBUG(<< "Message: " << message); LOG_DEBUG(<< "Words: " << words); - CPPUNIT_ASSERT_EQUAL(std::string("which is more than the configured time seconds Stack trace"), words); + CPPUNIT_ASSERT_EQUAL(std::string("which is more than the configured time seconds Stack trace"), + words); ml::core::CWordExtractor::extractWordsFromMessage(4, message, words); @@ -124,7 +128,8 @@ void CWordExtractorTest::testMinConsecutive() { LOG_DEBUG(<< "Message: " << message); LOG_DEBUG(<< "Words: " << words); - CPPUNIT_ASSERT_EQUAL(std::string("Output threshold breached for at position using application on channel"), words); + CPPUNIT_ASSERT_EQUAL(std::string("Output threshold breached for at position using application on channel"), + words); ml::core::CWordExtractor::extractWordsFromMessage(3, message, words); diff --git a/lib/core/unittest/CXmlNodeWithChildrenTest.cc b/lib/core/unittest/CXmlNodeWithChildrenTest.cc index a0c27117db..4ac9d4dc35 100644 --- a/lib/core/unittest/CXmlNodeWithChildrenTest.cc +++ b/lib/core/unittest/CXmlNodeWithChildrenTest.cc @@ -14,14 +14,18 @@ CppUnit::Test* CXmlNodeWithChildrenTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CXmlNodeWithChildrenTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CXmlNodeWithChildrenTest::testNodeHierarchyToXml", - &CXmlNodeWithChildrenTest::testNodeHierarchyToXml)); - suiteOfTests->addTest(new CppUnit::TestCaller("CXmlNodeWithChildrenTest::testParserToNodeHierarchy", - &CXmlNodeWithChildrenTest::testParserToNodeHierarchy)); - suiteOfTests->addTest(new CppUnit::TestCaller("CXmlNodeWithChildrenTest::testPerformanceNoPool", - &CXmlNodeWithChildrenTest::testPerformanceNoPool)); - suiteOfTests->addTest(new CppUnit::TestCaller("CXmlNodeWithChildrenTest::testPerformanceWithPool", - &CXmlNodeWithChildrenTest::testPerformanceWithPool)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXmlNodeWithChildrenTest::testNodeHierarchyToXml", + &CXmlNodeWithChildrenTest::testNodeHierarchyToXml)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXmlNodeWithChildrenTest::testParserToNodeHierarchy", + &CXmlNodeWithChildrenTest::testParserToNodeHierarchy)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXmlNodeWithChildrenTest::testPerformanceNoPool", + &CXmlNodeWithChildrenTest::testPerformanceNoPool)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXmlNodeWithChildrenTest::testPerformanceWithPool", + &CXmlNodeWithChildrenTest::testPerformanceWithPool)); return suiteOfTests; } @@ -50,7 +54,8 @@ void CXmlNodeWithChildrenTest::testNodeHierarchyToXml() { root.addChild(oneDeep2); std::string strRep(root.dump()); - LOG_DEBUG(<< "Indented representation of XML node hierarchy is:\n" << strRep); + LOG_DEBUG(<< "Indented representation of XML node hierarchy is:\n" + << strRep); CPPUNIT_ASSERT(strRep.find("root") != std::string::npos); CPPUNIT_ASSERT(strRep.find("oneDeep1") != std::string::npos); @@ -124,7 +129,8 @@ void CXmlNodeWithChildrenTest::testParserToNodeHierarchy() { CPPUNIT_ASSERT(rootNodePtr != nullptr); std::string strRep(rootNodePtr->dump()); - LOG_DEBUG(<< "Indented representation of XML node hierarchy is:\n" << strRep); + LOG_DEBUG(<< "Indented representation of XML node hierarchy is:\n" + << strRep); CPPUNIT_ASSERT(xml.find("root") != std::string::npos); CPPUNIT_ASSERT(xml.find("name1") != std::string::npos); @@ -150,7 +156,8 @@ void CXmlNodeWithChildrenTest::testPerformanceNoPool() { CPPUNIT_ASSERT(parser.parseFile("testfiles/p2psmon.xml")); ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Starting node hierarchy performance test with no pool at " << ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO(<< "Starting node hierarchy performance test with no pool at " + << ml::core::CTimeUtils::toTimeString(start)); static const size_t TEST_SIZE(20000); for (size_t count = 0; count < TEST_SIZE; ++count) { @@ -160,9 +167,11 @@ void CXmlNodeWithChildrenTest::testPerformanceNoPool() { } ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Finished node hierarchy performance test with no pool at " << ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO(<< "Finished node hierarchy performance test with no pool at " + << ml::core::CTimeUtils::toTimeString(end)); - LOG_INFO(<< "Node hierarchy performance test of size " << TEST_SIZE << " with no pool took " << (end - start) << " seconds"); + LOG_INFO(<< "Node hierarchy performance test of size " << TEST_SIZE + << " with no pool took " << (end - start) << " seconds"); } void CXmlNodeWithChildrenTest::testPerformanceWithPool() { @@ -171,7 +180,8 @@ void CXmlNodeWithChildrenTest::testPerformanceWithPool() { CPPUNIT_ASSERT(parser.parseFile("testfiles/p2psmon.xml")); ml::core_t::TTime start(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Starting node hierarchy performance test with pool at " << ml::core::CTimeUtils::toTimeString(start)); + LOG_INFO(<< "Starting node hierarchy performance test with pool at " + << ml::core::CTimeUtils::toTimeString(start)); ml::core::CXmlNodeWithChildrenPool pool; @@ -184,7 +194,9 @@ void CXmlNodeWithChildrenTest::testPerformanceWithPool() { } ml::core_t::TTime end(ml::core::CTimeUtils::now()); - LOG_INFO(<< "Finished node hierarchy performance test with pool at " << ml::core::CTimeUtils::toTimeString(end)); + LOG_INFO(<< "Finished node hierarchy performance test with pool at " + << ml::core::CTimeUtils::toTimeString(end)); - LOG_INFO(<< "Node hierarchy performance test of size " << TEST_SIZE << " with pool took " << (end - start) << " seconds"); + LOG_INFO(<< "Node hierarchy performance test of size " << TEST_SIZE + << " with pool took " << (end - start) << " seconds"); } diff --git a/lib/core/unittest/CXmlParserTest.cc b/lib/core/unittest/CXmlParserTest.cc index 5ea49222b9..edbd224b2a 100644 --- a/lib/core/unittest/CXmlParserTest.cc +++ b/lib/core/unittest/CXmlParserTest.cc @@ -22,29 +22,50 @@ CppUnit::Test* CXmlParserTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CXmlParserTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CXmlParserTest::testParse1File", &CXmlParserTest::testParse1File)); - suiteOfTests->addTest(new CppUnit::TestCaller("CXmlParserTest::testParse1String", &CXmlParserTest::testParse1String)); - suiteOfTests->addTest(new CppUnit::TestCaller("CXmlParserTest::testParse2", &CXmlParserTest::testParse2)); - suiteOfTests->addTest(new CppUnit::TestCaller("CXmlParserTest::testNavigate", &CXmlParserTest::testNavigate)); - suiteOfTests->addTest(new CppUnit::TestCaller("CXmlParserTest::testParseXInclude", &CXmlParserTest::testParseXInclude)); - suiteOfTests->addTest(new CppUnit::TestCaller("CXmlParserTest::testParse3", &CXmlParserTest::testParse3)); - suiteOfTests->addTest(new CppUnit::TestCaller("CXmlParserTest::testParse4", &CXmlParserTest::testParse4)); - suiteOfTests->addTest(new CppUnit::TestCaller("CXmlParserTest::testParse5", &CXmlParserTest::testParse5)); - suiteOfTests->addTest(new CppUnit::TestCaller("CXmlParserTest::testParse6", &CXmlParserTest::testParse6)); - suiteOfTests->addTest(new CppUnit::TestCaller("CXmlParserTest::testConvert1", &CXmlParserTest::testConvert1)); - suiteOfTests->addTest(new CppUnit::TestCaller("CXmlParserTest::testConvert2", &CXmlParserTest::testConvert2)); - suiteOfTests->addTest(new CppUnit::TestCaller("CXmlParserTest::testConvert3", &CXmlParserTest::testConvert3)); - suiteOfTests->addTest(new CppUnit::TestCaller("CXmlParserTest::testConvert4", &CXmlParserTest::testConvert4)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CXmlParserTest::testAddNewChildNode", &CXmlParserTest::testAddNewChildNode)); - suiteOfTests->addTest(new CppUnit::TestCaller("CXmlParserTest::testSetRootNode", &CXmlParserTest::testSetRootNode)); - suiteOfTests->addTest(new CppUnit::TestCaller("CXmlParserTest::testDump", &CXmlParserTest::testDump)); - suiteOfTests->addTest(new CppUnit::TestCaller("CXmlParserTest::testMakeValidName", &CXmlParserTest::testMakeValidName)); - suiteOfTests->addTest(new CppUnit::TestCaller("CXmlParserTest::testChangeChild", &CXmlParserTest::testChangeChild)); - suiteOfTests->addTest(new CppUnit::TestCaller("CXmlParserTest::testHugeDoc", &CXmlParserTest::testHugeDoc)); - suiteOfTests->addTest(new CppUnit::TestCaller("CXmlParserTest::testParseSpeed", &CXmlParserTest::testParseSpeed)); - suiteOfTests->addTest(new CppUnit::TestCaller("CXmlParserTest::testConvertSpeed", &CXmlParserTest::testConvertSpeed)); - suiteOfTests->addTest(new CppUnit::TestCaller("CXmlParserTest::testComplexXPath", &CXmlParserTest::testComplexXPath)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXmlParserTest::testParse1File", &CXmlParserTest::testParse1File)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXmlParserTest::testParse1String", &CXmlParserTest::testParse1String)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXmlParserTest::testParse2", &CXmlParserTest::testParse2)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXmlParserTest::testNavigate", &CXmlParserTest::testNavigate)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXmlParserTest::testParseXInclude", &CXmlParserTest::testParseXInclude)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXmlParserTest::testParse3", &CXmlParserTest::testParse3)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXmlParserTest::testParse4", &CXmlParserTest::testParse4)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXmlParserTest::testParse5", &CXmlParserTest::testParse5)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXmlParserTest::testParse6", &CXmlParserTest::testParse6)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXmlParserTest::testConvert1", &CXmlParserTest::testConvert1)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXmlParserTest::testConvert2", &CXmlParserTest::testConvert2)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXmlParserTest::testConvert3", &CXmlParserTest::testConvert3)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXmlParserTest::testConvert4", &CXmlParserTest::testConvert4)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXmlParserTest::testAddNewChildNode", &CXmlParserTest::testAddNewChildNode)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXmlParserTest::testSetRootNode", &CXmlParserTest::testSetRootNode)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXmlParserTest::testDump", &CXmlParserTest::testDump)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXmlParserTest::testMakeValidName", &CXmlParserTest::testMakeValidName)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXmlParserTest::testChangeChild", &CXmlParserTest::testChangeChild)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXmlParserTest::testHugeDoc", &CXmlParserTest::testHugeDoc)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXmlParserTest::testParseSpeed", &CXmlParserTest::testParseSpeed)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXmlParserTest::testConvertSpeed", &CXmlParserTest::testConvertSpeed)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXmlParserTest::testComplexXPath", &CXmlParserTest::testComplexXPath)); return suiteOfTests; } @@ -83,7 +104,8 @@ void CXmlParserTest::testParse2() { CPPUNIT_ASSERT(parser.evalXPathExpression("//badpath", nodes)); CPPUNIT_ASSERT(nodes.empty()); - CPPUNIT_ASSERT(parser.evalXPathExpression("/syslog_parser/parsetree/expression/description", nodes)); + CPPUNIT_ASSERT(parser.evalXPathExpression( + "/syslog_parser/parsetree/expression/description", nodes)); CPPUNIT_ASSERT_EQUAL(size_t(2), nodes.size()); CPPUNIT_ASSERT_EQUAL(std::string("description"), nodes[0].name()); @@ -94,7 +116,8 @@ void CXmlParserTest::testParse2() { CPPUNIT_ASSERT_EQUAL(std::string("Transport read error"), nodes[1].value()); CPPUNIT_ASSERT(nodes[1].attributes().empty()); - CPPUNIT_ASSERT(parser.evalXPathExpression("/syslog_parser/parsetree/expression[1]/regexes/varbind/token", nodes)); + CPPUNIT_ASSERT(parser.evalXPathExpression( + "/syslog_parser/parsetree/expression[1]/regexes/varbind/token", nodes)); CPPUNIT_ASSERT_EQUAL(size_t(2), nodes.size()); CPPUNIT_ASSERT_EQUAL(std::string("token"), nodes[0].name()); @@ -105,7 +128,8 @@ void CXmlParserTest::testParse2() { CPPUNIT_ASSERT_EQUAL(std::string("source"), nodes[1].value()); CPPUNIT_ASSERT(nodes[1].attributes().empty()); - CPPUNIT_ASSERT(parser.evalXPathExpression("/syslog_parser/parsetree/expression[1]/regexes/varbind/regex", nodes)); + CPPUNIT_ASSERT(parser.evalXPathExpression( + "/syslog_parser/parsetree/expression[1]/regexes/varbind/regex", nodes)); CPPUNIT_ASSERT_EQUAL(size_t(2), nodes.size()); CPPUNIT_ASSERT_EQUAL(std::string("regex"), nodes[0].name()); @@ -115,7 +139,8 @@ void CXmlParserTest::testParse2() { CPPUNIT_ASSERT(this->testAttribute(nodes[0], "local", "BZ")); CPPUNIT_ASSERT_EQUAL(std::string("regex"), nodes[1].name()); - CPPUNIT_ASSERT_EQUAL(std::string("(template[[:space:]]*<[^;:{]+>[[:space:]]*)?"), nodes[1].value()); + CPPUNIT_ASSERT_EQUAL(std::string("(template[[:space:]]*<[^;:{]+>[[:space:]]*)?"), + nodes[1].value()); CPPUNIT_ASSERT(nodes[1].attributes().empty()); } @@ -167,7 +192,8 @@ void CXmlParserTest::testParseXInclude() { CPPUNIT_ASSERT(parser.evalXPathExpression("//badpath", nodes)); CPPUNIT_ASSERT(nodes.empty()); - CPPUNIT_ASSERT(parser.evalXPathExpression("/syslog_parser/parsetree/expression/description", nodes)); + CPPUNIT_ASSERT(parser.evalXPathExpression( + "/syslog_parser/parsetree/expression/description", nodes)); CPPUNIT_ASSERT_EQUAL(size_t(2), nodes.size()); CPPUNIT_ASSERT_EQUAL(std::string("description"), nodes[0].name()); @@ -178,7 +204,8 @@ void CXmlParserTest::testParseXInclude() { CPPUNIT_ASSERT_EQUAL(std::string("Transport read error"), nodes[1].value()); CPPUNIT_ASSERT(nodes[1].attributes().empty()); - CPPUNIT_ASSERT(parser.evalXPathExpression("/syslog_parser/parsetree/expression[1]/regexes/varbind/token", nodes)); + CPPUNIT_ASSERT(parser.evalXPathExpression( + "/syslog_parser/parsetree/expression[1]/regexes/varbind/token", nodes)); CPPUNIT_ASSERT_EQUAL(size_t(2), nodes.size()); CPPUNIT_ASSERT_EQUAL(std::string("token"), nodes[0].name()); @@ -189,7 +216,8 @@ void CXmlParserTest::testParseXInclude() { CPPUNIT_ASSERT_EQUAL(std::string("source"), nodes[1].value()); CPPUNIT_ASSERT(nodes[1].attributes().empty()); - CPPUNIT_ASSERT(parser.evalXPathExpression("/syslog_parser/parsetree/expression[1]/regexes/varbind/regex", nodes)); + CPPUNIT_ASSERT(parser.evalXPathExpression( + "/syslog_parser/parsetree/expression[1]/regexes/varbind/regex", nodes)); CPPUNIT_ASSERT_EQUAL(size_t(2), nodes.size()); CPPUNIT_ASSERT_EQUAL(std::string("regex"), nodes[0].name()); @@ -199,7 +227,8 @@ void CXmlParserTest::testParseXInclude() { CPPUNIT_ASSERT(this->testAttribute(nodes[0], "local", "BZ")); CPPUNIT_ASSERT_EQUAL(std::string("regex"), nodes[1].name()); - CPPUNIT_ASSERT_EQUAL(std::string("(template[[:space:]]*<[^;:{]+>[[:space:]]*)?"), nodes[1].value()); + CPPUNIT_ASSERT_EQUAL(std::string("(template[[:space:]]*<[^;:{]+>[[:space:]]*)?"), + nodes[1].value()); CPPUNIT_ASSERT(nodes[1].attributes().empty()); } @@ -212,10 +241,12 @@ void CXmlParserTest::testParse3() { ml::core::CXmlParser::TXmlNodeVec arguments; - CPPUNIT_ASSERT(parser.evalXPathExpression("/ItemSearchResponse/OperationRequest/Arguments/Argument", arguments)); + CPPUNIT_ASSERT(parser.evalXPathExpression( + "/ItemSearchResponse/OperationRequest/Arguments/Argument", arguments)); CPPUNIT_ASSERT_EQUAL(size_t(7), arguments.size()); - for (ml::core::CXmlParser::TXmlNodeVecItr itr = arguments.begin(); itr != arguments.end(); ++itr) { + for (ml::core::CXmlParser::TXmlNodeVecItr itr = arguments.begin(); + itr != arguments.end(); ++itr) { if (itr->value() == "Service") { CPPUNIT_ASSERT(this->testAttribute(*itr, "Value", "AWSECommerceService")); } else if (itr->value() == "AssociateTag") { @@ -250,10 +281,12 @@ void CXmlParserTest::testParse4() { CPPUNIT_ASSERT(parser.evalXPathExpression("/ItemSearchResponse/Items/TotalPages", valid)); CPPUNIT_ASSERT(valid); - CPPUNIT_ASSERT(parser.evalXPathExpression("/ItemSearchResponse/Items/Request/IsNotValid", valid)); + CPPUNIT_ASSERT(parser.evalXPathExpression( + "/ItemSearchResponse/Items/Request/IsNotValid", valid)); CPPUNIT_ASSERT(!valid); - CPPUNIT_ASSERT(parser.evalXPathExpression("/ItemSearchResponse/Items/Request/IsNotValidNo", valid)); + CPPUNIT_ASSERT(parser.evalXPathExpression( + "/ItemSearchResponse/Items/Request/IsNotValidNo", valid)); CPPUNIT_ASSERT(!valid); int i; @@ -262,8 +295,10 @@ void CXmlParserTest::testParse4() { // Invalid conversions CPPUNIT_ASSERT(!parser.evalXPathExpression("/ItemSearchResponse/Items/Request/IsValid", i)); - CPPUNIT_ASSERT(!parser.evalXPathExpression("/ItemSearchResponse/Items/Request/ItemSearchRequest", i)); - CPPUNIT_ASSERT(!parser.evalXPathExpression("/ItemSearchResponse/Items/Request/ItemSearchRequest/Author", i)); + CPPUNIT_ASSERT(!parser.evalXPathExpression( + "/ItemSearchResponse/Items/Request/ItemSearchRequest", i)); + CPPUNIT_ASSERT(!parser.evalXPathExpression( + "/ItemSearchResponse/Items/Request/ItemSearchRequest/Author", i)); } void CXmlParserTest::testParse5() { @@ -454,7 +489,8 @@ void CXmlParserTest::testConvert3() { void CXmlParserTest::testConvert4() { // Use a standard node hierarchy to allow for comparison with the // standards-compliant XML parser - ml::core::CXmlNodeWithChildren::TXmlNodeWithChildrenP root(CRapidXmlParserTest::makeTestNodeHierarchy()); + ml::core::CXmlNodeWithChildren::TXmlNodeWithChildrenP root( + CRapidXmlParserTest::makeTestNodeHierarchy()); std::string converted; ml::core::CXmlParser::convert(*root, converted); @@ -474,7 +510,8 @@ void CXmlParserTest::testConvert4() { CPPUNIT_ASSERT(converted.find("") != std::string::npos); CPPUNIT_ASSERT(converted.find("") != std::string::npos || converted.find("") != std::string::npos); + CPPUNIT_ASSERT(converted.find("") != std::string::npos || + converted.find("") != std::string::npos); CPPUNIT_ASSERT(converted.find("initialized()) { return; } core_t::TTime size{static_cast(values.size())}; core_t::TTime dT{(end - start) / size}; - core_t::TTime dt{static_cast(CTools::truncate(m_MinimumBucketLength, 1.0, static_cast(dT)))}; + core_t::TTime dt{static_cast( + CTools::truncate(m_MinimumBucketLength, 1.0, static_cast(dT)))}; double scale{std::pow(static_cast(dt) / static_cast(dT), 2.0)}; @@ -159,7 +165,8 @@ void CAdaptiveBucketing::clear() { } void CAdaptiveBucketing::add(std::size_t bucket, core_t::TTime time, double weight) { - TDoubleMeanAccumulator centre{CBasicStatistics::accumulator(this->count(bucket), static_cast(m_Centres[bucket]))}; + TDoubleMeanAccumulator centre{CBasicStatistics::accumulator( + this->count(bucket), static_cast(m_Centres[bucket]))}; centre.add(this->offset(time), weight); m_Centres[bucket] = CBasicStatistics::mean(centre); } @@ -213,8 +220,9 @@ void CAdaptiveBucketing::refine(core_t::TTime time) { TDoubleVec ranges; ranges.reserve(n); for (std::size_t i = 0u; i < n; ++i) { - TDoubleDoublePr v[]{ - values[(n + i - 2) % n], values[(n + i - 1) % n], values[(n + i + 0) % n], values[(n + i + 1) % n], values[(n + i + 2) % n]}; + TDoubleDoublePr v[]{values[(n + i - 2) % n], values[(n + i - 1) % n], + values[(n + i + 0) % n], values[(n + i + 1) % n], + values[(n + i + 2) % n]}; TMinAccumulator min; TMaxAccumulator max; @@ -226,8 +234,10 @@ void CAdaptiveBucketing::refine(core_t::TTime time) { } if (min.count() > 0) { - ranges.push_back(WEIGHTS[max[0].second > min[0].second ? max[0].second - min[0].second : min[0].second - max[0].second] * - std::pow(max[0].first - min[0].first, 0.75)); + ranges.push_back( + WEIGHTS[max[0].second > min[0].second ? max[0].second - min[0].second + : min[0].second - max[0].second] * + std::pow(max[0].first - min[0].first, 0.75)); } else { ranges.push_back(0.0); } @@ -285,7 +295,8 @@ void CAdaptiveBucketing::refine(core_t::TTime time) { // details. double alpha{ALPHA * (CBasicStatistics::mean(m_Force) == 0.0 ? 1.0 - : std::fabs(CBasicStatistics::mean(m_LpForce)) / CBasicStatistics::mean(m_Force))}; + : std::fabs(CBasicStatistics::mean(m_LpForce)) / + CBasicStatistics::mean(m_Force))}; double force{0.0}; // Linearly interpolate between the current end points @@ -305,8 +316,9 @@ void CAdaptiveBucketing::refine(core_t::TTime time) { double x{h * e_ / averagingErrors[i]}; m_Endpoints[j] = endpoints[j] + alpha * (ai + x - endpoints[j]); force += (ai + x) - endpoints[j]; - LOG_TRACE(<< "interval averaging error = " << e << ", a(i) = " << ai << ", x = " << x << ", endpoint " << endpoints[j] - << " -> " << ai + x); + LOG_TRACE(<< "interval averaging error = " << e + << ", a(i) = " << ai << ", x = " << x << ", endpoint " + << endpoints[j] << " -> " << ai + x); ++j; } } @@ -341,7 +353,8 @@ bool CAdaptiveBucketing::knots(core_t::TTime time, for (std::size_t i = 0u; i < n; ++i) { if (this->count(i) > 0.0) { double wide{3.0 * (m_Endpoints[n] - m_Endpoints[0]) / static_cast(n)}; - LOG_TRACE(<< "period " << m_Endpoints[n] - m_Endpoints[0] << ", # buckets = " << n << ", wide = " << wide); + LOG_TRACE(<< "period " << m_Endpoints[n] - m_Endpoints[0] + << ", # buckets = " << n << ", wide = " << wide); // We get two points for each wide bucket but at most // one third of the buckets can be wide. In this case @@ -445,10 +458,12 @@ CAdaptiveBucketing::TDoubleVec CAdaptiveBucketing::variances() const { bool CAdaptiveBucketing::bucket(core_t::TTime time, std::size_t& result) const { double t{this->offset(time)}; - std::size_t i(std::upper_bound(m_Endpoints.begin(), m_Endpoints.end(), t) - m_Endpoints.begin()); + std::size_t i(std::upper_bound(m_Endpoints.begin(), m_Endpoints.end(), t) - + m_Endpoints.begin()); std::size_t n{m_Endpoints.size()}; if (t < m_Endpoints[0] || i == n) { - LOG_ERROR(<< "t = " << t << " out of range [" << m_Endpoints[0] << "," << m_Endpoints[n - 1] << ")"); + LOG_ERROR(<< "t = " << t << " out of range [" << m_Endpoints[0] << "," + << m_Endpoints[n - 1] << ")"); return false; } diff --git a/lib/maths/CAgglomerativeClusterer.cc b/lib/maths/CAgglomerativeClusterer.cc index b88bc12bca..3bb9d7b9d8 100644 --- a/lib/maths/CAgglomerativeClusterer.cc +++ b/lib/maths/CAgglomerativeClusterer.cc @@ -62,8 +62,13 @@ inline double distance(const TDoubleVecVec& distanceMatrix, std::size_t i, std:: //! \f$\displaystyle \max_{a \in A, b \in B}{d[a,b]}\f$ //! struct SComplete { - void operator()(const TDoubleVec& /*sizes*/, std::size_t x, std::size_t a, std::size_t b, TDoubleVecVec& distanceMatrix) const { - distance(distanceMatrix, b, x) = std::max(distance(distanceMatrix, a, x), distance(distanceMatrix, b, x)); + void operator()(const TDoubleVec& /*sizes*/, + std::size_t x, + std::size_t a, + std::size_t b, + TDoubleVecVec& distanceMatrix) const { + distance(distanceMatrix, b, x) = std::max(distance(distanceMatrix, a, x), + distance(distanceMatrix, b, x)); } }; @@ -74,17 +79,28 @@ struct SComplete { //! \f$\displaystyle \frac{1}{|A||B|}\sum_{a \in A, b \in B}{d[a,b]}\f$ //! struct SAverage { - void operator()(const TDoubleVec& sizes, std::size_t x, std::size_t a, std::size_t b, TDoubleVecVec& distanceMatrix) const { + void operator()(const TDoubleVec& sizes, + std::size_t x, + std::size_t a, + std::size_t b, + TDoubleVecVec& distanceMatrix) const { double sa = sizes[a]; double sb = sizes[b]; - distance(distanceMatrix, b, x) = (sa * distance(distanceMatrix, a, x) + sb * distance(distanceMatrix, b, x)) / (sa + sb); + distance(distanceMatrix, b, x) = (sa * distance(distanceMatrix, a, x) + + sb * distance(distanceMatrix, b, x)) / + (sa + sb); } }; //! \brief Weighted objective distance update function. struct SWeighted { - void operator()(const TDoubleVec /*sizes*/, std::size_t x, std::size_t a, std::size_t b, TDoubleVecVec& distanceMatrix) const { - distance(distanceMatrix, b, x) = (distance(distanceMatrix, a, x) + distance(distanceMatrix, b, x)) / 2.0; + void operator()(const TDoubleVec /*sizes*/, + std::size_t x, + std::size_t a, + std::size_t b, + TDoubleVecVec& distanceMatrix) const { + distance(distanceMatrix, b, x) = + (distance(distanceMatrix, a, x) + distance(distanceMatrix, b, x)) / 2.0; } }; @@ -92,13 +108,19 @@ struct SWeighted { //! //! See https://en.wikipedia.org/wiki/Ward%27s_method. struct SWard { - void operator()(const TDoubleVec sizes, std::size_t x, std::size_t a, std::size_t b, TDoubleVecVec& distanceMatrix) const { + void operator()(const TDoubleVec sizes, + std::size_t x, + std::size_t a, + std::size_t b, + TDoubleVecVec& distanceMatrix) const { double sa = sizes[a]; double sb = sizes[b]; double sx = sizes[x]; - distance(distanceMatrix, b, x) = std::sqrt((sa + sx) * distance(distanceMatrix, a, x) + (sb + sx) * distance(distanceMatrix, b, x) - - sx * distance(distanceMatrix, a, b)) / - (sa + sb + sx); + distance(distanceMatrix, b, x) = + std::sqrt((sa + sx) * distance(distanceMatrix, a, x) + + (sb + sx) * distance(distanceMatrix, b, x) - + sx * distance(distanceMatrix, a, b)) / + (sa + sb + sx); } }; @@ -256,7 +278,8 @@ void nnCluster(TDoubleVecVec& distanceMatrix, UPDATE update, TDoubleSizeSizePrPr std::size_t rb = rightmost[b]; LOG_TRACE(<< "chain = " << core::CContainerPrinter::print(chain)); - LOG_TRACE(<< "d = " << d << ", a = " << a << ", b = " << b << ", rightmost a = " << ra << ", rightmost b " << rb << ", m = " << m); + LOG_TRACE(<< "d = " << d << ", a = " << a << ", b = " << b << ", rightmost a = " + << ra << ", rightmost b " << rb << ", m = " << m); // a and b are reciprocal nearest neighbors. L.emplace_back(d, std::make_pair(ra, rb)); @@ -374,7 +397,8 @@ void CAgglomerativeClusterer::run(EObjective objective, TNodeVec& tree) { ////// CNode ////// CAgglomerativeClusterer::CNode::CNode(std::size_t index, double height) - : m_Parent(nullptr), m_LeftChild(nullptr), m_RightChild(nullptr), m_Index(index), m_Height(height) { + : m_Parent(nullptr), m_LeftChild(nullptr), m_RightChild(nullptr), + m_Index(index), m_Height(height) { } bool CAgglomerativeClusterer::CNode::addChild(CNode& child) { diff --git a/lib/maths/CAssignment.cc b/lib/maths/CAssignment.cc index 4110108c3b..8c8c3d21ac 100644 --- a/lib/maths/CAssignment.cc +++ b/lib/maths/CAssignment.cc @@ -52,8 +52,11 @@ inline double cost(const TDoubleVecVec& costs, std::size_t i, std::size_t j) { //! \param[in] columnPotential The column potential function. //! \param[in] i The row index. //! \param[in] j The column index. -inline double -adjustedCost(const TDoubleVecVec& costs, const TDoubleVec& rowPotential, const TDoubleVec& columnPotential, std::size_t i, std::size_t j) { +inline double adjustedCost(const TDoubleVecVec& costs, + const TDoubleVec& rowPotential, + const TDoubleVec& columnPotential, + std::size_t i, + std::size_t j) { // The bracketing is important in this expression since // it ensures we find the correct initial feasible solution. return (cost(costs, i, j) - columnPotential[j]) - rowPotential[i]; @@ -181,8 +184,10 @@ bool CAssignment::kuhnMunkres(const TDoubleVecVec& costs, TSizeSizePrVec& matchi TDoubleVec minSlackValueByColumn(N, MAXIMUM_COST); while (unmatched > 0) { - LOG_TRACE(<< "matchColumnByRow = " << core::CContainerPrinter::print(matchColumnByRow)); - LOG_TRACE(<< "matchRowByColumn = " << core::CContainerPrinter::print(matchRowByColumn)); + LOG_TRACE(<< "matchColumnByRow = " + << core::CContainerPrinter::print(matchColumnByRow)); + LOG_TRACE(<< "matchRowByColumn = " + << core::CContainerPrinter::print(matchRowByColumn)); LOG_TRACE(<< "unmatched = " << unmatched); LOG_TRACE(<< "*** Initialize augmenting path ***"); @@ -206,20 +211,14 @@ bool CAssignment::kuhnMunkres(const TDoubleVecVec& costs, TSizeSizePrVec& matchi std::size_t minSlackRow; std::size_t minSlackColumn; double minSlackValue; - grow(costs, - rowPotential, - columnPotential, - parentRowByCommittedColumn, - pivot, - committedRows, - minSlackRowByColumn, - minSlackValueByColumn, - minSlackRow, - minSlackColumn, - minSlackValue); + grow(costs, rowPotential, columnPotential, parentRowByCommittedColumn, + pivot, committedRows, minSlackRowByColumn, minSlackValueByColumn, + minSlackRow, minSlackColumn, minSlackValue); LOG_TRACE(<< "committedRows = " << core::CContainerPrinter::print(committedRows)); - LOG_TRACE(<< "minSlackRowByColumn = " << core::CContainerPrinter::print(minSlackRowByColumn)); - LOG_TRACE(<< "minSlackValueByColumn = " << core::CContainerPrinter::print(minSlackValueByColumn)); + LOG_TRACE(<< "minSlackRowByColumn = " + << core::CContainerPrinter::print(minSlackRowByColumn)); + LOG_TRACE(<< "minSlackValueByColumn = " + << core::CContainerPrinter::print(minSlackValueByColumn)); // Search for an augmenting path following zero slack // edges. In each iteration the minimum potential is @@ -234,8 +233,8 @@ bool CAssignment::kuhnMunkres(const TDoubleVecVec& costs, TSizeSizePrVec& matchi std::size_t check = 0u; for (/**/; check < N; ++check) { - LOG_TRACE(<< " minSlackValue = " << minSlackValue << ", minSlackRow = " << minSlackRow - << ", minSlackColumn = " << minSlackColumn); + LOG_TRACE(<< " minSlackValue = " << minSlackValue << ", minSlackRow = " + << minSlackRow << ", minSlackColumn = " << minSlackColumn); // Checking greater than zero here is important since // due to non-associativity of floating point arithmetic @@ -255,8 +254,10 @@ bool CAssignment::kuhnMunkres(const TDoubleVecVec& costs, TSizeSizePrVec& matchi columnPotential[j] -= adjustment; } } - LOG_TRACE(<< " rowPotential = " << core::CContainerPrinter::print(rowPotential)); - LOG_TRACE(<< " columnPotential = " << core::CContainerPrinter::print(columnPotential)); + LOG_TRACE(<< " rowPotential = " + << core::CContainerPrinter::print(rowPotential)); + LOG_TRACE(<< " columnPotential = " + << core::CContainerPrinter::print(columnPotential)); } parentRowByCommittedColumn[minSlackColumn] = minSlackRow; @@ -275,7 +276,8 @@ bool CAssignment::kuhnMunkres(const TDoubleVecVec& costs, TSizeSizePrVec& matchi } } if (check == N) { - LOG_ERROR(<< "Bad augmenting path: costs = " << core::CContainerPrinter::print(costs)); + LOG_ERROR(<< "Bad augmenting path: costs = " + << core::CContainerPrinter::print(costs)); return false; } --unmatched; @@ -283,27 +285,24 @@ bool CAssignment::kuhnMunkres(const TDoubleVecVec& costs, TSizeSizePrVec& matchi break; } else { LOG_TRACE(<< " pivot = " << pivot); - LOG_TRACE(<< " parentRowByCommittedColumn = " << core::CContainerPrinter::print(parentRowByCommittedColumn)); + LOG_TRACE(<< " parentRowByCommittedColumn = " + << core::CContainerPrinter::print(parentRowByCommittedColumn)); // Grow the path to include the pivot row. - grow(costs, - rowPotential, - columnPotential, - parentRowByCommittedColumn, - pivot, - committedRows, - minSlackRowByColumn, - minSlackValueByColumn, - minSlackRow, - minSlackColumn, - minSlackValue); - LOG_TRACE(<< " committedRows = " << core::CContainerPrinter::print(committedRows)); - LOG_TRACE(<< " minSlackRowByColumn = " << core::CContainerPrinter::print(minSlackRowByColumn)); - LOG_TRACE(<< " minSlackValueByColumn = " << core::CContainerPrinter::print(minSlackValueByColumn)); + grow(costs, rowPotential, columnPotential, parentRowByCommittedColumn, + pivot, committedRows, minSlackRowByColumn, minSlackValueByColumn, + minSlackRow, minSlackColumn, minSlackValue); + LOG_TRACE(<< " committedRows = " + << core::CContainerPrinter::print(committedRows)); + LOG_TRACE(<< " minSlackRowByColumn = " + << core::CContainerPrinter::print(minSlackRowByColumn)); + LOG_TRACE(<< " minSlackValueByColumn = " + << core::CContainerPrinter::print(minSlackValueByColumn)); } } if (check == N) { - LOG_ERROR(<< "Failed to find path: costs " << core::CContainerPrinter::print(costs)); + LOG_ERROR(<< "Failed to find path: costs " + << core::CContainerPrinter::print(costs)); return false; } diff --git a/lib/maths/CBasicStatistics.cc b/lib/maths/CBasicStatistics.cc index 848d3aed20..4e3675208f 100644 --- a/lib/maths/CBasicStatistics.cc +++ b/lib/maths/CBasicStatistics.cc @@ -19,7 +19,8 @@ double CBasicStatistics::mean(const TDoubleDoublePr& samples) { } double CBasicStatistics::mean(const TDoubleVec& sample) { - return std::accumulate(sample.begin(), sample.end(), 0.0) / static_cast(sample.size()); + return std::accumulate(sample.begin(), sample.end(), 0.0) / + static_cast(sample.size()); } double CBasicStatistics::median(const TDoubleVec& dataIn) { diff --git a/lib/maths/CBjkstUniqueValues.cc b/lib/maths/CBjkstUniqueValues.cc index f5a42c93ee..f310acca5b 100644 --- a/lib/maths/CBjkstUniqueValues.cc +++ b/lib/maths/CBjkstUniqueValues.cc @@ -52,8 +52,7 @@ using TUInt8UInt8Pr = std::pair; //! \endcode class CHashIterator : public std::iterator, - private boost::less_than_comparable>> { + private boost::less_than_comparable>> { public: //! The STL that comes with g++ requires a default constructor - this //! will create an object that's suitable only to be assigned to, which @@ -99,7 +98,9 @@ class CHashIterator m_Itr -= 3 * n; return *this; } - ptrdiff_t operator-(const CHashIterator& other) const { return (m_Itr - other.m_Itr) / 3; } + ptrdiff_t operator-(const CHashIterator& other) const { + return (m_Itr - other.m_Itr) / 3; + } private: TUInt8VecItr m_Itr; @@ -110,7 +111,8 @@ bool insert(TUInt8Vec& b, uint16_t g, uint8_t zeros) { // |<---8 bits--->|<---8 bits--->|<---8 bits--->| // |(g >> 8) % 256| g % 256 | zeros | - CHashIterator lb = std::lower_bound(CHashIterator(b.begin()), CHashIterator(b.end()), g); + CHashIterator lb = + std::lower_bound(CHashIterator(b.begin()), CHashIterator(b.end()), g); if (lb.base() != b.end() && *lb == g) { // We've got this value in the set. Update the zeros, // which may have changed if the h hash has changed. @@ -128,8 +130,9 @@ bool insert(TUInt8Vec& b, uint16_t g, uint8_t zeros) { ptrdiff_t i = lb.base() - b.begin(); uint8_t g1 = static_cast(g >> 8); uint8_t g2 = static_cast(g); - LOG_TRACE(<< "Adding g = " << g << " at " << i << " (g1 = " << static_cast(g1) << ", g2 = " << static_cast(g2) - << ")"); + LOG_TRACE(<< "Adding g = " << g << " at " << i + << " (g1 = " << static_cast(g1) + << ", g2 = " << static_cast(g2) << ")"); b.insert(lb.base(), 3u, uint8_t()); b[i] = g1; @@ -143,7 +146,8 @@ void remove(TUInt8Vec& b, uint16_t g) { // |<---8 bits--->|<---8 bits--->|<---8 bits--->| // |(g >> 8) % 256| g % 256 | zeros | - CHashIterator lb = std::lower_bound(CHashIterator(b.begin()), CHashIterator(b.end()), g); + CHashIterator lb = + std::lower_bound(CHashIterator(b.begin()), CHashIterator(b.end()), g); if (lb.base() != b.end() && *lb == g) { // We've got this value in the set. b.erase(lb.base(), lb.base() + 3); @@ -163,7 +167,8 @@ void prune(TUInt8Vec& b, uint8_t z) { b[j + 2] = b[i + 2]; j += 3; } else { - LOG_TRACE(<< "Removing " << from8Bit(b[i], b[i + 1]) << ", zeros = " << static_cast(b[i + 2]) + LOG_TRACE(<< "Removing " << from8Bit(b[i], b[i + 1]) + << ", zeros = " << static_cast(b[i + 2]) << ", z = " << static_cast(z)); } } @@ -243,8 +248,10 @@ CBjkstUniqueValues::CBjkstUniqueValues(std::size_t numberHashes, std::size_t max : m_MaxSize(maxSize), m_NumberHashes(numberHashes), m_Sketch(TUInt32Vec()) { } -CBjkstUniqueValues::CBjkstUniqueValues(core::CStateRestoreTraverser& traverser) : m_MaxSize(0), m_NumberHashes(0) { - traverser.traverseSubLevel(boost::bind(&CBjkstUniqueValues::acceptRestoreTraverser, this, _1)); +CBjkstUniqueValues::CBjkstUniqueValues(core::CStateRestoreTraverser& traverser) + : m_MaxSize(0), m_NumberHashes(0) { + traverser.traverseSubLevel( + boost::bind(&CBjkstUniqueValues::acceptRestoreTraverser, this, _1)); } void CBjkstUniqueValues::swap(CBjkstUniqueValues& other) { @@ -285,7 +292,9 @@ void CBjkstUniqueValues::swap(CBjkstUniqueValues& other) { boost::get(m_Sketch).swap(tmp); } } - } catch (const std::exception& e) { LOG_ABORT(<< "Unexpected exception: " << e.what()); } + } catch (const std::exception& e) { + LOG_ABORT(<< "Unexpected exception: " << e.what()); + } } bool CBjkstUniqueValues::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { @@ -308,7 +317,8 @@ bool CBjkstUniqueValues::acceptRestoreTraverser(core::CStateRestoreTraverser& tr sketch.s_H.reserve(m_NumberHashes); sketch.s_Z.reserve(m_NumberHashes); sketch.s_B.reserve(m_NumberHashes); - if (traverser.traverseSubLevel(boost::bind(&SSketch::acceptRestoreTraverser, &sketch, _1, m_NumberHashes)) == false) { + if (traverser.traverseSubLevel(boost::bind( + &SSketch::acceptRestoreTraverser, &sketch, _1, m_NumberHashes)) == false) { return false; } continue; @@ -328,8 +338,11 @@ void CBjkstUniqueValues::acceptPersistInserter(core::CStatePersistInserter& inse } else { try { const SSketch& sketch = boost::get(m_Sketch); - inserter.insertLevel(SKETCH_TAG, boost::bind(&SSketch::acceptPersistInserter, &sketch, _1)); - } catch (const std::exception& e) { LOG_ABORT(<< "Unexpected exception: " << e.what()); } + inserter.insertLevel( + SKETCH_TAG, boost::bind(&SSketch::acceptPersistInserter, &sketch, _1)); + } catch (const std::exception& e) { + LOG_ABORT(<< "Unexpected exception: " << e.what()); + } } } @@ -345,7 +358,9 @@ void CBjkstUniqueValues::add(uint32_t value) { try { SSketch& sketch = boost::get(m_Sketch); sketch.add(m_MaxSize, value); - } catch (const std::exception& e) { LOG_ABORT(<< "Unexpected exception: " << e.what()); } + } catch (const std::exception& e) { + LOG_ABORT(<< "Unexpected exception: " << e.what()); + } } } @@ -360,7 +375,9 @@ void CBjkstUniqueValues::remove(uint32_t value) { try { SSketch& sketch = boost::get(m_Sketch); sketch.remove(value); - } catch (const std::exception& e) { LOG_ABORT(<< "Unexpected exception: " << e.what()); } + } catch (const std::exception& e) { + LOG_ABORT(<< "Unexpected exception: " << e.what()); + } } } @@ -370,7 +387,9 @@ uint32_t CBjkstUniqueValues::number() const { try { const SSketch& sketch = boost::get(m_Sketch); return sketch.number(); - } catch (const std::exception& e) { LOG_ABORT(<< "Unexpected exception: " << e.what()); } + } catch (const std::exception& e) { + LOG_ABORT(<< "Unexpected exception: " << e.what()); + } } return static_cast(values->size()); } @@ -386,7 +405,9 @@ uint64_t CBjkstUniqueValues::checksum(uint64_t seed) const { seed = CChecksum::calculate(seed, sketch.s_H); seed = CChecksum::calculate(seed, sketch.s_Z); return CChecksum::calculate(seed, sketch.s_B); - } catch (const std::exception& e) { LOG_ABORT(<< "Unexpected exception: " << e.what()); } + } catch (const std::exception& e) { + LOG_ABORT(<< "Unexpected exception: " << e.what()); + } } return CChecksum::calculate(seed, *values); } @@ -404,7 +425,9 @@ void CBjkstUniqueValues::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr me core::CMemoryDebug::dynamicSize("sketch.s_H", sketch.s_H, mem); core::CMemoryDebug::dynamicSize("sketch.s_Z", sketch.s_Z, mem); core::CMemoryDebug::dynamicSize("sketch.s_B", sketch.s_B, mem); - } catch (const std::exception& e) { LOG_ABORT(<< "Unexpected exception: " << e.what()); } + } catch (const std::exception& e) { + LOG_ABORT(<< "Unexpected exception: " << e.what()); + } } } @@ -421,7 +444,9 @@ std::size_t CBjkstUniqueValues::memoryUsage() const { mem += core::CMemory::dynamicSize(sketch.s_H); mem += core::CMemory::dynamicSize(sketch.s_Z); mem += core::CMemory::dynamicSize(sketch.s_B); - } catch (const std::exception& e) { LOG_ABORT(<< "Unexpected exception: " << e.what()); } + } catch (const std::exception& e) { + LOG_ABORT(<< "Unexpected exception: " << e.what()); + } } return mem; } @@ -429,7 +454,8 @@ std::size_t CBjkstUniqueValues::memoryUsage() const { void CBjkstUniqueValues::sketch() { static const std::size_t UINT8_SIZE = sizeof(uint8_t); static const std::size_t UINT32_SIZE = sizeof(uint32_t); - static const std::size_t HASH_SIZE = sizeof(core::CHashing::CUniversalHash::CUInt32UnrestrictedHash); + static const std::size_t HASH_SIZE = + sizeof(core::CHashing::CUniversalHash::CUInt32UnrestrictedHash); static const std::size_t VEC8_SIZE = sizeof(TUInt8Vec); static const std::size_t VEC32_SIZE = sizeof(TUInt32Vec); static const std::size_t SKETCH_SIZE = sizeof(SSketch); @@ -438,9 +464,11 @@ void CBjkstUniqueValues::sketch() { if (values) { std::size_t valuesSize = VEC32_SIZE + UINT32_SIZE * values->capacity(); std::size_t sketchSize = - SKETCH_SIZE + m_NumberHashes * (2 * HASH_SIZE + 1 * UINT8_SIZE + 1 * VEC8_SIZE + 3 * m_MaxSize * UINT8_SIZE); + SKETCH_SIZE + m_NumberHashes * (2 * HASH_SIZE + 1 * UINT8_SIZE + + 1 * VEC8_SIZE + 3 * m_MaxSize * UINT8_SIZE); if (valuesSize > sketchSize) { - if (values->capacity() > values->size() && values->size() < (sketchSize - VEC32_SIZE) / UINT32_SIZE) { + if (values->capacity() > values->size() && + values->size() < (sketchSize - VEC32_SIZE) / UINT32_SIZE) { TUInt32Vec shrunk; shrunk.reserve((sketchSize - VEC32_SIZE) / UINT32_SIZE); shrunk.assign(values->begin(), values->end()); @@ -477,29 +505,36 @@ void CBjkstUniqueValues::SSketch::swap(SSketch& other) { s_B.swap(other.s_B); } -bool CBjkstUniqueValues::SSketch::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser, std::size_t numberHashes) { +bool CBjkstUniqueValues::SSketch::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser, + std::size_t numberHashes) { core::CHashing::CUniversalHash::CFromString hashFromString(PAIR_DELIMITER); do { const std::string& name = traverser.name(); if (name == HASH_G_TAG) { - if (core::CPersistUtils::fromString(traverser.value(), hashFromString, s_G, DELIMITER) == false || s_G.size() != numberHashes) { + if (core::CPersistUtils::fromString(traverser.value(), hashFromString, + s_G, DELIMITER) == false || + s_G.size() != numberHashes) { LOG_ERROR(<< "Invalid hashes in " << traverser.value()); return false; } } else if (name == HASH_H_TAG) { - if (core::CPersistUtils::fromString(traverser.value(), hashFromString, s_H, DELIMITER) == false || s_H.size() != numberHashes) { + if (core::CPersistUtils::fromString(traverser.value(), hashFromString, + s_H, DELIMITER) == false || + s_H.size() != numberHashes) { LOG_ERROR(<< "Invalid hashes in " << traverser.value()); return false; } } else if (name == Z_TAG) { - if (core::CPersistUtils::fromString(traverser.value(), CFromString(), s_Z, DELIMITER) == false || + if (core::CPersistUtils::fromString(traverser.value(), CFromString(), + s_Z, DELIMITER) == false || s_Z.size() != numberHashes) { LOG_ERROR(<< "Invalid zeros in " << traverser.value()); return false; } } else if (name == B_TAG) { s_B.push_back(TUInt8Vec()); - if (core::CPersistUtils::fromString(traverser.value(), CFromString(), s_B.back(), DELIMITER) == false) { + if (core::CPersistUtils::fromString(traverser.value(), CFromString(), + s_B.back(), DELIMITER) == false) { LOG_ERROR(<< "Invalid values in " << traverser.value()); return false; } @@ -516,11 +551,15 @@ bool CBjkstUniqueValues::SSketch::acceptRestoreTraverser(core::CStateRestoreTrav void CBjkstUniqueValues::SSketch::acceptPersistInserter(core::CStatePersistInserter& inserter) const { core::CHashing::CUniversalHash::CToString hashToString(PAIR_DELIMITER); - inserter.insertValue(HASH_G_TAG, core::CPersistUtils::toString(s_G, hashToString, DELIMITER)); - inserter.insertValue(HASH_H_TAG, core::CPersistUtils::toString(s_H, hashToString, DELIMITER)); - inserter.insertValue(Z_TAG, core::CPersistUtils::toString(s_Z, CToString(), DELIMITER)); + inserter.insertValue( + HASH_G_TAG, core::CPersistUtils::toString(s_G, hashToString, DELIMITER)); + inserter.insertValue( + HASH_H_TAG, core::CPersistUtils::toString(s_H, hashToString, DELIMITER)); + inserter.insertValue( + Z_TAG, core::CPersistUtils::toString(s_Z, CToString(), DELIMITER)); for (std::size_t i = 0u; i < s_B.size(); ++i) { - inserter.insertValue(B_TAG, core::CPersistUtils::toString(s_B[i], CToString(), DELIMITER)); + inserter.insertValue(B_TAG, core::CPersistUtils::toString( + s_B[i], CToString(), DELIMITER)); } } @@ -543,7 +582,8 @@ void CBjkstUniqueValues::SSketch::add(std::size_t maxSize, uint32_t value) { shrunk.assign(b.begin(), b.end()); b.swap(shrunk); } - LOG_TRACE(<< "|B| = " << b.size() << ", z = " << static_cast(s_Z[i])); + LOG_TRACE(<< "|B| = " << b.size() + << ", z = " << static_cast(s_Z[i])); } } } @@ -566,7 +606,8 @@ uint32_t CBjkstUniqueValues::SSketch::number() const { TUInt32Vec estimates; estimates.reserve(s_Z.size()); for (std::size_t i = 0u; i < s_Z.size(); ++i) { - LOG_TRACE(<< "|B| = " << s_B[i].size() << ", z = " << static_cast(s_Z[i])); + LOG_TRACE(<< "|B| = " << s_B[i].size() + << ", z = " << static_cast(s_Z[i])); estimates.push_back(static_cast(s_B[i].size() / 3) * (1 << s_Z[i])); } @@ -574,7 +615,8 @@ uint32_t CBjkstUniqueValues::SSketch::number() const { std::size_t n = estimates.size(); if (n % 2 == 0) { - std::partial_sort(estimates.begin(), estimates.begin() + n / 2 + 1, estimates.end()); + std::partial_sort(estimates.begin(), estimates.begin() + n / 2 + 1, + estimates.end()); return (estimates[n / 2] + estimates[n / 2 - 1]) / 2; } diff --git a/lib/maths/CCalendarComponent.cc b/lib/maths/CCalendarComponent.cc index 93dc1f9941..93dcd5c382 100644 --- a/lib/maths/CCalendarComponent.cc +++ b/lib/maths/CCalendarComponent.cc @@ -41,7 +41,8 @@ CCalendarComponent::CCalendarComponent(const CCalendarFeature& feature, CSplineTypes::EBoundaryCondition boundaryCondition, CSplineTypes::EType valueInterpolationType, CSplineTypes::EType varianceInterpolationType) - : CDecompositionComponent{maxSize, boundaryCondition, valueInterpolationType, varianceInterpolationType}, + : CDecompositionComponent{maxSize, boundaryCondition, + valueInterpolationType, varianceInterpolationType}, m_Bucketing{feature, decayRate, minimumBucketLength} { } @@ -50,8 +51,10 @@ CCalendarComponent::CCalendarComponent(double decayRate, core::CStateRestoreTraverser& traverser, CSplineTypes::EType valueInterpolationType, CSplineTypes::EType varianceInterpolationType) - : CDecompositionComponent{0, CSplineTypes::E_Periodic, valueInterpolationType, varianceInterpolationType} { - traverser.traverseSubLevel(boost::bind(&CCalendarComponent::acceptRestoreTraverser, this, decayRate, minimumBucketLength, _1)); + : CDecompositionComponent{0, CSplineTypes::E_Periodic, + valueInterpolationType, varianceInterpolationType} { + traverser.traverseSubLevel(boost::bind(&CCalendarComponent::acceptRestoreTraverser, + this, decayRate, minimumBucketLength, _1)); } void CCalendarComponent::swap(CCalendarComponent& other) { @@ -59,16 +62,19 @@ void CCalendarComponent::swap(CCalendarComponent& other) { m_Bucketing.swap(other.m_Bucketing); } -bool CCalendarComponent::acceptRestoreTraverser(double decayRate, double minimumBucketLength, core::CStateRestoreTraverser& traverser) { +bool CCalendarComponent::acceptRestoreTraverser(double decayRate, + double minimumBucketLength, + core::CStateRestoreTraverser& traverser) { do { const std::string& name{traverser.name()}; RESTORE(DECOMPOSITION_COMPONENT_TAG, traverser.traverseSubLevel( - boost::bind(&CDecompositionComponent::acceptRestoreTraverser, static_cast(this), _1))) + boost::bind(&CDecompositionComponent::acceptRestoreTraverser, + static_cast(this), _1))) RESTORE_SETUP_TEARDOWN(BUCKETING_TAG, - CCalendarComponentAdaptiveBucketing bucketing(decayRate, minimumBucketLength, traverser), - true, - m_Bucketing.swap(bucketing)) + CCalendarComponentAdaptiveBucketing bucketing( + decayRate, minimumBucketLength, traverser), + true, m_Bucketing.swap(bucketing)) } while (traverser.next()); return true; @@ -77,8 +83,10 @@ bool CCalendarComponent::acceptRestoreTraverser(double decayRate, double minimum void CCalendarComponent::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertLevel( DECOMPOSITION_COMPONENT_TAG, - boost::bind(&CDecompositionComponent::acceptPersistInserter, static_cast(this), _1)); - inserter.insertLevel(BUCKETING_TAG, boost::bind(&CCalendarComponentAdaptiveBucketing::acceptPersistInserter, &m_Bucketing, _1)); + boost::bind(&CDecompositionComponent::acceptPersistInserter, + static_cast(this), _1)); + inserter.insertLevel(BUCKETING_TAG, boost::bind(&CCalendarComponentAdaptiveBucketing::acceptPersistInserter, + &m_Bucketing, _1)); } bool CCalendarComponent::initialized() const { @@ -175,7 +183,8 @@ void CCalendarComponent::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr me } std::size_t CCalendarComponent::memoryUsage() const { - return core::CMemory::dynamicSize(m_Bucketing) + core::CMemory::dynamicSize(this->splines()); + return core::CMemory::dynamicSize(m_Bucketing) + + core::CMemory::dynamicSize(this->splines()); } } } diff --git a/lib/maths/CCalendarComponentAdaptiveBucketing.cc b/lib/maths/CCalendarComponentAdaptiveBucketing.cc index a45e46f039..3e518debfc 100644 --- a/lib/maths/CCalendarComponentAdaptiveBucketing.cc +++ b/lib/maths/CCalendarComponentAdaptiveBucketing.cc @@ -44,7 +44,8 @@ const std::string VALUES_TAG{"c"}; const std::string EMPTY_STRING; } -CCalendarComponentAdaptiveBucketing::CCalendarComponentAdaptiveBucketing() : CAdaptiveBucketing{0.0, 0.0} { +CCalendarComponentAdaptiveBucketing::CCalendarComponentAdaptiveBucketing() + : CAdaptiveBucketing{0.0, 0.0} { } CCalendarComponentAdaptiveBucketing::CCalendarComponentAdaptiveBucketing(CCalendarFeature feature, @@ -53,16 +54,19 @@ CCalendarComponentAdaptiveBucketing::CCalendarComponentAdaptiveBucketing(CCalend : CAdaptiveBucketing{decayRate, minimumBucketLength}, m_Feature{feature} { } -CCalendarComponentAdaptiveBucketing::CCalendarComponentAdaptiveBucketing(double decayRate, - double minimumBucketLength, - core::CStateRestoreTraverser& traverser) +CCalendarComponentAdaptiveBucketing::CCalendarComponentAdaptiveBucketing( + double decayRate, + double minimumBucketLength, + core::CStateRestoreTraverser& traverser) : CAdaptiveBucketing{decayRate, minimumBucketLength} { - traverser.traverseSubLevel(boost::bind(&CCalendarComponentAdaptiveBucketing::acceptRestoreTraverser, this, _1)); + traverser.traverseSubLevel(boost::bind( + &CCalendarComponentAdaptiveBucketing::acceptRestoreTraverser, this, _1)); } void CCalendarComponentAdaptiveBucketing::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertLevel(ADAPTIVE_BUCKETING_TAG, - boost::bind(&CAdaptiveBucketing::acceptPersistInserter, static_cast(this), _1)); + boost::bind(&CAdaptiveBucketing::acceptPersistInserter, + static_cast(this), _1)); inserter.insertValue(FEATURE_TAG, m_Feature.toDelimited()); core::CPersistUtils::persist(VALUES_TAG, m_Values, inserter); } @@ -111,7 +115,8 @@ void CCalendarComponentAdaptiveBucketing::add(core_t::TTime time, double value, TFloatMeanVarAccumulator variance{m_Values[bucket]}; variance.add(value, weight * weight); m_Values[bucket].add(value, weight); - CBasicStatistics::moment<1>(m_Values[bucket]) = CBasicStatistics::maximumLikelihoodVariance(variance); + CBasicStatistics::moment<1>(m_Values[bucket]) = + CBasicStatistics::maximumLikelihoodVariance(variance); } } @@ -152,7 +157,8 @@ double CCalendarComponentAdaptiveBucketing::count(core_t::TTime time) const { return value ? static_cast(CBasicStatistics::count(*value)) : 0.0; } -const TFloatMeanVarAccumulator* CCalendarComponentAdaptiveBucketing::value(core_t::TTime time) const { +const TFloatMeanVarAccumulator* +CCalendarComponentAdaptiveBucketing::value(core_t::TTime time) const { const TFloatMeanVarAccumulator* result{nullptr}; if (this->initialized()) { std::size_t bucket{0}; @@ -179,7 +185,8 @@ uint64_t CCalendarComponentAdaptiveBucketing::checksum(uint64_t seed) const { void CCalendarComponentAdaptiveBucketing::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CCalendarComponentAdaptiveBucketing"); - core::CMemoryDebug::dynamicSize("m_Endpoints", this->CAdaptiveBucketing::endpoints(), mem); + core::CMemoryDebug::dynamicSize("m_Endpoints", + this->CAdaptiveBucketing::endpoints(), mem); core::CMemoryDebug::dynamicSize("m_Centres", this->CAdaptiveBucketing::centres(), mem); core::CMemoryDebug::dynamicSize("m_Values", m_Values, mem); } @@ -188,7 +195,8 @@ std::size_t CCalendarComponentAdaptiveBucketing::memoryUsage() const { return this->CAdaptiveBucketing::memoryUsage() + core::CMemory::dynamicSize(m_Values); } -const CCalendarComponentAdaptiveBucketing::TFloatVec& CCalendarComponentAdaptiveBucketing::endpoints() const { +const CCalendarComponentAdaptiveBucketing::TFloatVec& +CCalendarComponentAdaptiveBucketing::endpoints() const { return this->CAdaptiveBucketing::endpoints(); } @@ -196,11 +204,13 @@ double CCalendarComponentAdaptiveBucketing::count() const { return this->CAdaptiveBucketing::count(); } -CCalendarComponentAdaptiveBucketing::TDoubleVec CCalendarComponentAdaptiveBucketing::values(core_t::TTime time) const { +CCalendarComponentAdaptiveBucketing::TDoubleVec +CCalendarComponentAdaptiveBucketing::values(core_t::TTime time) const { return this->CAdaptiveBucketing::values(time); } -CCalendarComponentAdaptiveBucketing::TDoubleVec CCalendarComponentAdaptiveBucketing::variances() const { +CCalendarComponentAdaptiveBucketing::TDoubleVec +CCalendarComponentAdaptiveBucketing::variances() const { return this->CAdaptiveBucketing::variances(); } @@ -209,7 +219,8 @@ bool CCalendarComponentAdaptiveBucketing::acceptRestoreTraverser(core::CStateRes const std::string& name{traverser.name()}; RESTORE(ADAPTIVE_BUCKETING_TAG, traverser.traverseSubLevel( - boost::bind(&CAdaptiveBucketing::acceptRestoreTraverser, static_cast(this), _1))); + boost::bind(&CAdaptiveBucketing::acceptRestoreTraverser, + static_cast(this), _1))); RESTORE(FEATURE_TAG, m_Feature.fromDelimited(traverser.value())); RESTORE(VALUES_TAG, core::CPersistUtils::restore(VALUES_TAG, m_Values, traverser)) } while (traverser.next()); @@ -262,10 +273,12 @@ void CCalendarComponentAdaptiveBucketing::refresh(const TFloatVec& endpoints) { for (std::size_t i = 1u; i < n; ++i) { double yl{m_Endpoints[i - 1]}; double yr{m_Endpoints[i]}; - std::size_t r = std::lower_bound(endpoints.begin(), endpoints.end(), yr) - endpoints.begin(); + std::size_t r = std::lower_bound(endpoints.begin(), endpoints.end(), yr) - + endpoints.begin(); r = CTools::truncate(r, std::size_t(1), n - 1); - std::size_t l = std::upper_bound(endpoints.begin(), endpoints.end(), yl) - endpoints.begin(); + std::size_t l = std::upper_bound(endpoints.begin(), endpoints.end(), yl) - + endpoints.begin(); l = CTools::truncate(l, std::size_t(1), r); LOG_TRACE(<< "interval = [" << yl << "," << yr << "]"); @@ -278,17 +291,21 @@ void CCalendarComponentAdaptiveBucketing::refresh(const TFloatVec& endpoints) { double interval{m_Endpoints[i] - m_Endpoints[i - 1]}; double w{CTools::truncate(interval / (xr - xl), 0.0, 1.0)}; values.push_back(CBasicStatistics::scaled(m_Values[l - 1], w * w)); - centres.push_back(CTools::truncate(static_cast(m_Centres[l - 1]), yl, yr)); + centres.push_back( + CTools::truncate(static_cast(m_Centres[l - 1]), yl, yr)); } else { double interval{xr - m_Endpoints[i - 1]}; double w{CTools::truncate(interval / (xr - xl), 0.0, 1.0)}; TDoubleMeanVarAccumulator value{CBasicStatistics::scaled(m_Values[l - 1], w)}; - TDoubleMeanAccumulator centre{ - CBasicStatistics::accumulator(w * CBasicStatistics::count(m_Values[l - 1]), static_cast(m_Centres[l - 1]))}; + TDoubleMeanAccumulator centre{CBasicStatistics::accumulator( + w * CBasicStatistics::count(m_Values[l - 1]), + static_cast(m_Centres[l - 1]))}; double count{w * w * CBasicStatistics::count(m_Values[l - 1])}; while (++l < r) { value += m_Values[l - 1]; - centre += CBasicStatistics::accumulator(CBasicStatistics::count(m_Values[l - 1]), static_cast(m_Centres[l - 1])); + centre += CBasicStatistics::accumulator( + CBasicStatistics::count(m_Values[l - 1]), + static_cast(m_Centres[l - 1])); count += CBasicStatistics::count(m_Values[l - 1]); } xl = endpoints[l - 1]; @@ -296,7 +313,9 @@ void CCalendarComponentAdaptiveBucketing::refresh(const TFloatVec& endpoints) { interval = m_Endpoints[i] - xl; w = CTools::truncate(interval / (xr - xl), 0.0, 1.0); value += CBasicStatistics::scaled(m_Values[l - 1], w); - centre += CBasicStatistics::accumulator(w * CBasicStatistics::count(m_Values[l - 1]), static_cast(m_Centres[l - 1])); + centre += CBasicStatistics::accumulator( + w * CBasicStatistics::count(m_Values[l - 1]), + static_cast(m_Centres[l - 1])); count += w * w * CBasicStatistics::count(m_Values[l - 1]); double scale{count / CBasicStatistics::count(value)}; values.push_back(CBasicStatistics::scaled(value, scale)); @@ -316,7 +335,8 @@ void CCalendarComponentAdaptiveBucketing::refresh(const TFloatVec& endpoints) { for (std::size_t i = 0u; i < m; ++i) { double ci{CBasicStatistics::count(values[i])}; if (ci > 0.0) { - CBasicStatistics::scale(count * (endpoints[i + 1] - endpoints[i]) / ci, values[i]); + CBasicStatistics::scale(count * (endpoints[i + 1] - endpoints[i]) / ci, + values[i]); } } @@ -334,7 +354,10 @@ bool CCalendarComponentAdaptiveBucketing::inWindow(core_t::TTime time) const { return m_Feature.inWindow(time); } -void CCalendarComponentAdaptiveBucketing::add(std::size_t bucket, core_t::TTime /*time*/, double value, double weight) { +void CCalendarComponentAdaptiveBucketing::add(std::size_t bucket, + core_t::TTime /*time*/, + double value, + double weight) { m_Values[bucket].add(value, weight); } @@ -346,7 +369,9 @@ double CCalendarComponentAdaptiveBucketing::count(std::size_t bucket) const { return CBasicStatistics::count(m_Values[bucket]); } -double CCalendarComponentAdaptiveBucketing::predict(std::size_t bucket, core_t::TTime /*time*/, double /*offset*/) const { +double CCalendarComponentAdaptiveBucketing::predict(std::size_t bucket, + core_t::TTime /*time*/, + double /*offset*/) const { return CBasicStatistics::mean(m_Values[bucket]); } diff --git a/lib/maths/CCalendarFeature.cc b/lib/maths/CCalendarFeature.cc index 0f27259d56..871bee4768 100644 --- a/lib/maths/CCalendarFeature.cc +++ b/lib/maths/CCalendarFeature.cc @@ -20,8 +20,10 @@ namespace ml { namespace maths { namespace { -const int LAST_DAY_IN_MONTH[] = {30, 27, 30, 29, 30, 29, 30, 30, 29, 30, 29, 30}; -const std::string DAYS[] = {"Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"}; +const int LAST_DAY_IN_MONTH[] = {30, 27, 30, 29, 30, 29, + 30, 30, 29, 30, 29, 30}; +const std::string DAYS[] = {"Sunday", "Monday", "Tuesday", "Wednesday", + "Thursday", "Friday", "Saturday"}; const int DAY = core::constants::DAY; @@ -45,21 +47,24 @@ int dayOfFirst(int dayOfMonth, int dayOfWeek) { //! Print the day or week count. std::string print_(int count, bool suffix) { static const std::string suffix_[] = {"th", "st", "nd", "rd", "th"}; - return core::CStringUtils::typeToString(count) + (suffix ? suffix_[count < 20 ? std::min(count, 4) : std::min(count % 10, 4)] : ""); + return core::CStringUtils::typeToString(count) + + (suffix ? suffix_[count < 20 ? std::min(count, 4) : std::min(count % 10, 4)] : ""); } } CCalendarFeature::CCalendarFeature() : m_Feature(INVALID), m_Value(INVALID) { } -CCalendarFeature::CCalendarFeature(uint16_t feature, core_t::TTime time) : m_Feature(INVALID), m_Value(INVALID) { +CCalendarFeature::CCalendarFeature(uint16_t feature, core_t::TTime time) + : m_Feature(INVALID), m_Value(INVALID) { int dayOfWeek{}; int dayOfMonth{}; int dayOfYear{}; int month{}; int year{}; int secondsSinceMidnight{}; - if (core::CTimezone::instance().dateFields(time, dayOfWeek, dayOfMonth, dayOfYear, month, year, secondsSinceMidnight)) { + if (core::CTimezone::instance().dateFields(time, dayOfWeek, dayOfMonth, dayOfYear, + month, year, secondsSinceMidnight)) { dayOfMonth -= 1; this->initialize(feature, dayOfWeek, dayOfMonth, month, year); } else { @@ -75,7 +80,8 @@ CCalendarFeature::TCalendarFeature4Ary CCalendarFeature::features(core_t::TTime int month{}; int year{}; int secondsSinceMidnight{}; - if (core::CTimezone::instance().dateFields(time, dayOfWeek, dayOfMonth, dayOfYear, month, year, secondsSinceMidnight)) { + if (core::CTimezone::instance().dateFields(time, dayOfWeek, dayOfMonth, dayOfYear, + month, year, secondsSinceMidnight)) { dayOfMonth -= 1; auto i = result.begin(); for (uint16_t feature = BEGIN_FEATURES; feature < END_FEATURES; ++feature, ++i) { @@ -103,7 +109,8 @@ void CCalendarFeature::initialize(uint16_t feature, int dayOfWeek, int dayOfMont break; case DAY_OF_WEEK_AND_WEEKS_BEFORE_END_OF_MONTH: m_Feature = feature; - m_Value = static_cast(8 * ((lastDayInMonth(year, month) - dayOfMonth) / 7) + dayOfWeek); + m_Value = static_cast( + 8 * ((lastDayInMonth(year, month) - dayOfMonth) / 7) + dayOfWeek); break; default: LOG_ERROR(<< "Invalid feature: " << feature); @@ -143,13 +150,16 @@ core_t::TTime CCalendarFeature::offset(core_t::TTime time) const { int month{}; int year{}; int secondsSinceMidnight{}; - if (core::CTimezone::instance().dateFields(time, dayOfWeek, dayOfMonth, dayOfYear, month, year, secondsSinceMidnight)) { + if (core::CTimezone::instance().dateFields(time, dayOfWeek, dayOfMonth, dayOfYear, + month, year, secondsSinceMidnight)) { dayOfMonth -= 1; switch (m_Feature) { case DAYS_SINCE_START_OF_MONTH: return DAY * (dayOfMonth - static_cast(m_Value)) + secondsSinceMidnight; case DAYS_BEFORE_END_OF_MONTH: - return DAY * (dayOfMonth - (lastDayInMonth(year, month) - static_cast(m_Value))) + secondsSinceMidnight; + return DAY * (dayOfMonth - + (lastDayInMonth(year, month) - static_cast(m_Value))) + + secondsSinceMidnight; case DAY_OF_WEEK_AND_WEEKS_SINCE_START_OF_MONTH: { int dayOfFirst_ = dayOfFirst(dayOfMonth, dayOfWeek); int dayOfWeek_ = static_cast(m_Value) % 8; @@ -162,7 +172,8 @@ core_t::TTime CCalendarFeature::offset(core_t::TTime time) const { int dayOfLast_ = (lastDayInMonth_ + dayOfFirst(dayOfMonth, dayOfWeek)) % 7; int dayOfWeek_ = static_cast(m_Value) % 8; int weeksToEndOfMonth_ = static_cast(m_Value) / 8; - int dayOfMonth_ = lastDayInMonth_ - (7 * weeksToEndOfMonth_ + (7 + dayOfLast_ - dayOfWeek_) % 7); + int dayOfMonth_ = lastDayInMonth_ - (7 * weeksToEndOfMonth_ + + (7 + dayOfLast_ - dayOfWeek_) % 7); return DAY * (dayOfMonth - dayOfMonth_) + secondsSinceMidnight; } default: diff --git a/lib/maths/CCategoricalTools.cc b/lib/maths/CCategoricalTools.cc index 794046dac7..6f38196b73 100644 --- a/lib/maths/CCategoricalTools.cc +++ b/lib/maths/CCategoricalTools.cc @@ -30,7 +30,8 @@ const double LOG_TWO = std::log(2.0); //! A fast lower bound for the binomial probability of \p m //! successes for \p n trials and probability of success \p p. -inline maths_t::EFloatingPointErrorStatus logBinomialProbabilityFastLowerBound(std::size_t n, double p, std::size_t m, double& result) { +inline maths_t::EFloatingPointErrorStatus +logBinomialProbabilityFastLowerBound(std::size_t n, double p, std::size_t m, double& result) { double n_ = static_cast(n); double m_ = static_cast(m); @@ -71,7 +72,8 @@ inline maths_t::EFloatingPointErrorStatus logBinomialProbabilityFastLowerBound(s static const double CONSTANT = std::log(boost::math::double_constants::root_two_pi) - 2.0; double p_ = m_ / n_; - result = -0.5 * std::log(n_ * (1.0 - p_) * p_) + m_ * std::log(p / p_) + (n_ - m_) * std::log((1.0 - p) / (1.0 - p_)) + CONSTANT; + result = -0.5 * std::log(n_ * (1.0 - p_) * p_) + m_ * std::log(p / p_) + + (n_ - m_) * std::log((1.0 - p) / (1.0 - p_)) + CONSTANT; return maths_t::E_FpNoErrors; } @@ -79,7 +81,8 @@ inline maths_t::EFloatingPointErrorStatus logBinomialProbabilityFastLowerBound(s //! binomial, i.e. the probability of seeing m or a larger value //! from a binomial with \p trials and probability of success //! \p p. -maths_t::EFloatingPointErrorStatus logRightTailProbabilityUpperBound(std::size_t n, double p, std::size_t m, double& result) { +maths_t::EFloatingPointErrorStatus +logRightTailProbabilityUpperBound(std::size_t n, double p, std::size_t m, double& result) { if (m > n) { LOG_ERROR(<< "Invalid sample: " << m << " > " << n); result = boost::numeric::bounds::lowest(); @@ -122,11 +125,13 @@ maths_t::EFloatingPointErrorStatus logRightTailProbabilityUpperBound(std::size_t double eps = (m_ - n_ * p) / n_; double q = p + eps; - double chernoff = m_ * (q * std::log(p / q) + (1.0 - q) * std::log((1.0 - p) / (1.0 - q))); + double chernoff = m_ * (q * std::log(p / q) + + (1.0 - q) * std::log((1.0 - p) / (1.0 - q))); result = std::min(chernoff + LOG_TWO, 0.0); return maths_t::E_FpNoErrors; } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to calculate c.d.f. complement: " << e.what() << ", n = " << n << ", p = " << p); + LOG_ERROR(<< "Failed to calculate c.d.f. complement: " << e.what() + << ", n = " << n << ", p = " << p); } return maths_t::E_FpOverflowed; @@ -136,7 +141,8 @@ maths_t::EFloatingPointErrorStatus logRightTailProbabilityUpperBound(std::size_t //! binomial, i.e. the probability of seeing m or a larger value //! from a binomial with \p trials and probability of success //! \p p. -maths_t::EFloatingPointErrorStatus logRightTailProbabilityLowerBound(std::size_t n, double p, std::size_t m, double& result) { +maths_t::EFloatingPointErrorStatus +logRightTailProbabilityLowerBound(std::size_t n, double p, std::size_t m, double& result) { if (m > n) { LOG_ERROR(<< "Invalid sample: " << m << " > " << n); result = boost::numeric::bounds::lowest(); @@ -207,16 +213,20 @@ maths_t::EFloatingPointErrorStatus logRightTailProbabilityLowerBound(std::size_t } double logf; - maths_t::EFloatingPointErrorStatus status = logBinomialProbabilityFastLowerBound(n, p, m, logf); + maths_t::EFloatingPointErrorStatus status = + logBinomialProbabilityFastLowerBound(n, p, m, logf); if (status & maths_t::E_FpAllErrors) { result = logf; return status; } - double bound = logf + std::log(1.0 + n_ / (m_ + 1.0) * (std::exp(p / (1.0 - p) * (n_ - m_) / n_) - 1.0)); + double bound = + logf + std::log(1.0 + n_ / (m_ + 1.0) * + (std::exp(p / (1.0 - p) * (n_ - m_) / n_) - 1.0)); result = std::min(bound + LOG_TWO, 0.0); return maths_t::E_FpNoErrors; } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to calculate c.d.f. complement: " << e.what() << ", n = " << n << ", p = " << p); + LOG_ERROR(<< "Failed to calculate c.d.f. complement: " << e.what() + << ", n = " << n << ", p = " << p); } return maths_t::E_FpFailed; @@ -225,7 +235,8 @@ maths_t::EFloatingPointErrorStatus logRightTailProbabilityLowerBound(std::size_t //! Get the log of right tail probability, i.e. the probability //! of seeing m or a larger value from a binomial with \p trials //! and probability of success \p p. -maths_t::EFloatingPointErrorStatus logRightTailProbability(std::size_t n, double p, std::size_t m, double& result) { +maths_t::EFloatingPointErrorStatus +logRightTailProbability(std::size_t n, double p, std::size_t m, double& result) { if (m > n) { LOG_ERROR(<< "Invalid sample: " << m << " > " << n); result = boost::numeric::bounds::lowest(); @@ -266,15 +277,20 @@ maths_t::EFloatingPointErrorStatus logRightTailProbability(std::size_t n, double // upper bound. double lb, ub; - maths_t::EFloatingPointErrorStatus status = logRightTailProbabilityLowerBound(n, p, m, lb); + maths_t::EFloatingPointErrorStatus status = + logRightTailProbabilityLowerBound(n, p, m, lb); if (status & maths_t::E_FpAllErrors) { - result = status == maths_t::E_FpOverflowed ? boost::numeric::bounds::lowest() : 0.0; + result = status == maths_t::E_FpOverflowed + ? boost::numeric::bounds::lowest() + : 0.0; return status; } status = logRightTailProbabilityUpperBound(n, p, m, ub); if (status & maths_t::E_FpAllErrors) { - result = status == maths_t::E_FpOverflowed ? boost::numeric::bounds::lowest() : 0.0; + result = status == maths_t::E_FpOverflowed + ? boost::numeric::bounds::lowest() + : 0.0; return status; } @@ -299,7 +315,8 @@ maths_t::EFloatingPointErrorStatus logRightTailProbability(std::size_t n, double result = std::min(std::log(oneMinusF + f) + LOG_TWO, 0.0); return maths_t::E_FpNoErrors; } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to calculate c.d.f. complement: " << e.what() << ", n = " << n << ", p = " << p); + LOG_ERROR(<< "Failed to calculate c.d.f. complement: " << e.what() + << ", n = " << n << ", p = " << p); } return maths_t::E_FpFailed; @@ -313,7 +330,8 @@ bool CCategoricalTools::probabilityOfLessLikelyMultinomialSample(const TDoubleVe result = 1.0; if (i.size() != ni.size()) { - LOG_ERROR(<< "Inconsistent categories and counts: " << core::CContainerPrinter::print(i) << " " + LOG_ERROR(<< "Inconsistent categories and counts: " + << core::CContainerPrinter::print(i) << " " << core::CContainerPrinter::print(ni)); return false; } @@ -338,7 +356,9 @@ double CCategoricalTools::probabilityOfCategory(std::size_t n, const double prob return probability; } -bool CCategoricalTools::expectedDistinctCategories(const TDoubleVec& probabilities, const double n, double& result) { +bool CCategoricalTools::expectedDistinctCategories(const TDoubleVec& probabilities, + const double n, + double& result) { // We imagine drawing n samples from a multinomial random variable // with m categories. We'd like to calculate how many distinct // categories we'd expect in this sample of n. This quantity is @@ -388,7 +408,8 @@ double CCategoricalTools::logBinomialCoefficient(std::size_t n, std::size_t m) { } double n_ = static_cast(n); double m_ = static_cast(m); - return boost::math::lgamma(n_ + 1.0) - boost::math::lgamma(m_ + 1.0) - boost::math::lgamma(n_ - m_ + 1.0); + return boost::math::lgamma(n_ + 1.0) - boost::math::lgamma(m_ + 1.0) - + boost::math::lgamma(n_ - m_ + 1.0); } double CCategoricalTools::binomialCoefficient(std::size_t n, std::size_t m) { @@ -403,7 +424,8 @@ bool CCategoricalTools::probabilityOfLessLikelyCategoryCount(TDoubleVec& probabi result.clear(); if (i.size() != ni.size()) { - LOG_ERROR(<< "Inconsistent categories and counts: " << core::CContainerPrinter::print(i) << " " + LOG_ERROR(<< "Inconsistent categories and counts: " + << core::CContainerPrinter::print(i) << " " << core::CContainerPrinter::print(ni)); return false; } @@ -468,7 +490,8 @@ bool CCategoricalTools::probabilityOfLessLikelyCategoryCount(TDoubleVec& probabi if (sample[j] > static_cast((n_ + 1.0) * pj)) { std::size_t nj = sample[j]; double lowerBound; - if (logRightTailProbabilityLowerBound(n, pj, nj, lowerBound) & maths_t::E_FpAllErrors) { + if (logRightTailProbabilityLowerBound(n, pj, nj, lowerBound) & + maths_t::E_FpAllErrors) { continue; } if (logPMin > lowerBound) { @@ -491,7 +514,8 @@ bool CCategoricalTools::probabilityOfLessLikelyCategoryCount(TDoubleVec& probabi return 0.0; } -maths_t::EFloatingPointErrorStatus CCategoricalTools::logBinomialProbability(std::size_t n, double p, std::size_t m, double& result) { +maths_t::EFloatingPointErrorStatus +CCategoricalTools::logBinomialProbability(std::size_t n, double p, std::size_t m, double& result) { if (m > n) { result = boost::numeric::bounds::lowest(); return maths_t::E_FpOverflowed; @@ -520,18 +544,22 @@ maths_t::EFloatingPointErrorStatus CCategoricalTools::logBinomialProbability(std double n_ = static_cast(n); double m_ = static_cast(m); - result = std::min(boost::math::lgamma(n_ + 1.0) - boost::math::lgamma(m_ + 1.0) - boost::math::lgamma(n_ - m_ + 1.0) + + result = std::min(boost::math::lgamma(n_ + 1.0) - boost::math::lgamma(m_ + 1.0) - + boost::math::lgamma(n_ - m_ + 1.0) + m_ * std::log(p) + (n_ - m_) * std::log(1.0 - p), 0.0); return maths_t::E_FpNoErrors; } maths_t::EFloatingPointErrorStatus -CCategoricalTools::logMultinomialProbability(const TDoubleVec& probabilities, const TSizeVec& ni, double& result) { +CCategoricalTools::logMultinomialProbability(const TDoubleVec& probabilities, + const TSizeVec& ni, + double& result) { result = 0.0; if (probabilities.size() != ni.size()) { - LOG_ERROR(<< "Inconsistent categories and counts: " << core::CContainerPrinter::print(probabilities) << " " + LOG_ERROR(<< "Inconsistent categories and counts: " + << core::CContainerPrinter::print(probabilities) << " " << core::CContainerPrinter::print(ni)); return maths_t::E_FpFailed; } diff --git a/lib/maths/CClusterer.cc b/lib/maths/CClusterer.cc index b1867ec010..139230ed60 100644 --- a/lib/maths/CClusterer.cc +++ b/lib/maths/CClusterer.cc @@ -12,7 +12,8 @@ namespace { const std::string INDEX_TAG("a"); } -CClustererTypes::CIndexGenerator::CIndexGenerator() : m_IndexHeap(new TSizeVec(1u, 0u)) { +CClustererTypes::CIndexGenerator::CIndexGenerator() + : m_IndexHeap(new TSizeVec(1u, 0u)) { } bool CClustererTypes::CIndexGenerator::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { diff --git a/lib/maths/CClustererStateSerialiser.cc b/lib/maths/CClustererStateSerialiser.cc index e335ff3b93..124d4b2191 100644 --- a/lib/maths/CClustererStateSerialiser.cc +++ b/lib/maths/CClustererStateSerialiser.cc @@ -14,9 +14,11 @@ namespace ml { namespace maths { -bool CClustererStateSerialiser:: -operator()(const SDistributionRestoreParams& params, TClusterer1dPtr& ptr, core::CStateRestoreTraverser& traverser) { - return this->operator()(params, CClusterer1d::CDoNothing(), CClusterer1d::CDoNothing(), ptr, traverser); +bool CClustererStateSerialiser::operator()(const SDistributionRestoreParams& params, + TClusterer1dPtr& ptr, + core::CStateRestoreTraverser& traverser) { + return this->operator()(params, CClusterer1d::CDoNothing(), + CClusterer1d::CDoNothing(), ptr, traverser); } bool CClustererStateSerialiser::operator()(const SDistributionRestoreParams& params, @@ -48,8 +50,10 @@ bool CClustererStateSerialiser::operator()(const SDistributionRestoreParams& par return true; } -void CClustererStateSerialiser::operator()(const CClusterer1d& clusterer, core::CStatePersistInserter& inserter) { - inserter.insertLevel(clusterer.persistenceTag(), boost::bind(&CClusterer1d::acceptPersistInserter, &clusterer, _1)); +void CClustererStateSerialiser::operator()(const CClusterer1d& clusterer, + core::CStatePersistInserter& inserter) { + inserter.insertLevel(clusterer.persistenceTag(), + boost::bind(&CClusterer1d::acceptPersistInserter, &clusterer, _1)); } } } diff --git a/lib/maths/CConstantPrior.cc b/lib/maths/CConstantPrior.cc index 05a8a89411..26f9ef5923 100644 --- a/lib/maths/CConstantPrior.cc +++ b/lib/maths/CConstantPrior.cc @@ -46,21 +46,25 @@ const std::string EMPTY_STRING; const double LOG_TWO = std::log(2.0); } -CConstantPrior::CConstantPrior(const TOptionalDouble& constant) : CPrior(maths_t::E_DiscreteData, 0.0) { +CConstantPrior::CConstantPrior(const TOptionalDouble& constant) + : CPrior(maths_t::E_DiscreteData, 0.0) { if (constant) { setConstant(*constant, m_Constant); } } -CConstantPrior::CConstantPrior(core::CStateRestoreTraverser& traverser) : CPrior(maths_t::E_DiscreteData, 0.0) { - traverser.traverseSubLevel(boost::bind(&CConstantPrior::acceptRestoreTraverser, this, _1)); +CConstantPrior::CConstantPrior(core::CStateRestoreTraverser& traverser) + : CPrior(maths_t::E_DiscreteData, 0.0) { + traverser.traverseSubLevel( + boost::bind(&CConstantPrior::acceptRestoreTraverser, this, _1)); } bool CConstantPrior::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { do { const std::string& name = traverser.name(); - RESTORE_SETUP_TEARDOWN( - CONSTANT_TAG, double constant, core::CStringUtils::stringToType(traverser.value(), constant), m_Constant.reset(constant)) + RESTORE_SETUP_TEARDOWN(CONSTANT_TAG, double constant, + core::CStringUtils::stringToType(traverser.value(), constant), + m_Constant.reset(constant)) } while (traverser.next()); return true; @@ -82,8 +86,9 @@ bool CConstantPrior::needsOffset() const { return false; } -double -CConstantPrior::adjustOffset(const TWeightStyleVec& /*weightStyle*/, const TDouble1Vec& /*samples*/, const TDouble4Vec1Vec& /*weights*/) { +double CConstantPrior::adjustOffset(const TWeightStyleVec& /*weightStyle*/, + const TDouble1Vec& /*samples*/, + const TDouble4Vec1Vec& /*weights*/) { return 0.0; } @@ -91,7 +96,9 @@ double CConstantPrior::offset() const { return 0.0; } -void CConstantPrior::addSamples(const TWeightStyleVec& /*weightStyle*/, const TDouble1Vec& samples, const TDouble4Vec1Vec& /*weights*/) { +void CConstantPrior::addSamples(const TWeightStyleVec& /*weightStyle*/, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& /*weights*/) { if (m_Constant || samples.empty()) { return; } @@ -102,7 +109,8 @@ void CConstantPrior::propagateForwardsByTime(double /*time*/) { } CConstantPrior::TDoubleDoublePr CConstantPrior::marginalLikelihoodSupport() const { - return std::make_pair(boost::numeric::bounds::lowest(), boost::numeric::bounds::highest()); + return std::make_pair(boost::numeric::bounds::lowest(), + boost::numeric::bounds::highest()); } double CConstantPrior::marginalLikelihoodMean() const { @@ -113,13 +121,15 @@ double CConstantPrior::marginalLikelihoodMean() const { return *m_Constant; } -double CConstantPrior::marginalLikelihoodMode(const TWeightStyleVec& /*weightStyles*/, const TDouble4Vec& /*weights*/) const { +double CConstantPrior::marginalLikelihoodMode(const TWeightStyleVec& /*weightStyles*/, + const TDouble4Vec& /*weights*/) const { return this->marginalLikelihoodMean(); } -CConstantPrior::TDoubleDoublePr CConstantPrior::marginalLikelihoodConfidenceInterval(double /*percentage*/, - const TWeightStyleVec& /*weightStyles*/, - const TDouble4Vec& /*weights*/) const { +CConstantPrior::TDoubleDoublePr +CConstantPrior::marginalLikelihoodConfidenceInterval(double /*percentage*/, + const TWeightStyleVec& /*weightStyles*/, + const TDouble4Vec& /*weights*/) const { if (this->isNonInformative()) { return this->marginalLikelihoodSupport(); } @@ -127,14 +137,16 @@ CConstantPrior::TDoubleDoublePr CConstantPrior::marginalLikelihoodConfidenceInte return std::make_pair(*m_Constant, *m_Constant); } -double CConstantPrior::marginalLikelihoodVariance(const TWeightStyleVec& /*weightStyles*/, const TDouble4Vec& /*weights*/) const { +double CConstantPrior::marginalLikelihoodVariance(const TWeightStyleVec& /*weightStyles*/, + const TDouble4Vec& /*weights*/) const { return this->isNonInformative() ? boost::numeric::bounds::highest() : 0.0; } -maths_t::EFloatingPointErrorStatus CConstantPrior::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, - double& result) const { +maths_t::EFloatingPointErrorStatus +CConstantPrior::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& result) const { result = 0.0; if (samples.empty()) { @@ -143,7 +155,8 @@ maths_t::EFloatingPointErrorStatus CConstantPrior::jointLogMarginalLikelihood(co } if (samples.size() != weights.size()) { - LOG_ERROR(<< "Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '" + LOG_ERROR(<< "Mismatch in samples '" + << core::CContainerPrinter::print(samples) << "' and weights '" << core::CContainerPrinter::print(weights) << "'"); return maths_t::E_FpFailed; } @@ -177,7 +190,8 @@ maths_t::EFloatingPointErrorStatus CConstantPrior::jointLogMarginalLikelihood(co return maths_t::E_FpNoErrors; } -void CConstantPrior::sampleMarginalLikelihood(std::size_t numberSamples, TDouble1Vec& samples) const { +void CConstantPrior::sampleMarginalLikelihood(std::size_t numberSamples, + TDouble1Vec& samples) const { samples.clear(); if (this->isNonInformative()) { @@ -302,8 +316,9 @@ bool CConstantPrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCalcula } } - LOG_TRACE(<< "samples = " << core::CContainerPrinter::print(samples) << ", constant = " << *m_Constant - << ", lowerBound = " << lowerBound << ", upperBound = " << upperBound << ", tail = " << tail); + LOG_TRACE(<< "samples = " << core::CContainerPrinter::print(samples) + << ", constant = " << *m_Constant << ", lowerBound = " << lowerBound + << ", upperBound = " << upperBound << ", tail = " << tail); tail = static_cast(tail_); return true; @@ -315,7 +330,8 @@ bool CConstantPrior::isNonInformative() const { void CConstantPrior::print(const std::string& indent, std::string& result) const { result += core_t::LINE_ENDING + indent + "constant " + - (this->isNonInformative() ? std::string("non-informative") : core::CStringUtils::typeToString(*m_Constant)); + (this->isNonInformative() ? std::string("non-informative") + : core::CStringUtils::typeToString(*m_Constant)); } std::string CConstantPrior::printMarginalLikelihoodFunction(double /*weight*/) const { diff --git a/lib/maths/CCooccurrences.cc b/lib/maths/CCooccurrences.cc index 79133fcef5..044f89f755 100644 --- a/lib/maths/CCooccurrences.cc +++ b/lib/maths/CCooccurrences.cc @@ -39,10 +39,12 @@ using TPackedBitVectorVec = std::vector; //! \brief Counts the (co-)occurrences of two variables. struct SCooccurrence { SCooccurrence() : s_Nxy(0.0), s_Nx(0.0), s_Ny(0.0), s_X(0), s_Y(0) {} - SCooccurrence(double nxy, double nx, double ny, std::size_t x, std::size_t y) : s_Nxy(nxy), s_Nx(nx), s_Ny(ny), s_X(x), s_Y(y) {} + SCooccurrence(double nxy, double nx, double ny, std::size_t x, std::size_t y) + : s_Nxy(nxy), s_Nx(nx), s_Ny(ny), s_X(x), s_Y(y) {} bool operator<(const SCooccurrence& rhs) const { - return s_Nxy * static_cast(rhs.s_X) * static_cast(rhs.s_Y) < rhs.s_Nxy * s_Nx * s_Ny; + return s_Nxy * static_cast(rhs.s_X) * static_cast(rhs.s_Y) < + rhs.s_Nxy * s_Nx * s_Ny; } double s_Nxy, s_Nx, s_Ny; @@ -96,7 +98,10 @@ void generateProjection(std::size_t dimension, CPackedBitVector& result) { //! \param[in] lengths The Euclidean lengths of the indicator vectors. //! \param[in] mask A mask of events to consider. //! \param[in] result Filled in with the p projections of indicator vectors. -void generateProjections(const TPackedBitVectorVec& indicators, const TDoubleVec& lengths, const TSizeVec& mask, TDoubleVecVec& result) { +void generateProjections(const TPackedBitVectorVec& indicators, + const TDoubleVec& lengths, + const TSizeVec& mask, + TDoubleVecVec& result) { std::size_t dimension = indicators[0].dimension(); for (std::size_t i = 0u; i < result.size(); ++i) { CPackedBitVector projection; @@ -182,8 +187,10 @@ void seed(const TPackedBitVectorVec& indicators, //! \param[out] result The indices of the events in the filter. void computeFilter(const TSizeVec& mask, const TDoubleVec& theta, std::size_t i, double bound, TSizeVec& result) { result.clear(); - ptrdiff_t start = std::lower_bound(theta.begin(), theta.end(), theta[i] - bound) - theta.begin(); - ptrdiff_t end = std::upper_bound(theta.begin(), theta.end(), theta[i] + bound) - theta.begin(); + ptrdiff_t start = std::lower_bound(theta.begin(), theta.end(), theta[i] - bound) - + theta.begin(); + ptrdiff_t end = std::upper_bound(theta.begin(), theta.end(), theta[i] + bound) - + theta.begin(); result.reserve(end - start); result.insert(result.end(), mask.begin() + start, mask.begin() + i); result.insert(result.end(), mask.begin() + i + 1, mask.begin() + end); @@ -193,7 +200,8 @@ void computeFilter(const TSizeVec& mask, const TDoubleVec& theta, std::size_t i, //! Apply \p filter to \p result (set intersection). void applyFilter(const TSizeVec& filter, TSizeVec& placeholder, TSizeVec& result) { placeholder.clear(); - std::set_intersection(result.begin(), result.end(), filter.begin(), filter.end(), std::back_inserter(placeholder)); + std::set_intersection(result.begin(), result.end(), filter.begin(), + filter.end(), std::back_inserter(placeholder)); result.swap(placeholder); } @@ -239,7 +247,9 @@ void searchForMostSignificantCooccurrences(const TPackedBitVectorVec& indicators TSizeVec placeholder; for (std::size_t i = 0u; i < n; ++i) { - double lambda = mostSignificant.biggest().s_Nxy / (mostSignificant.biggest().s_Nx * mostSignificant.biggest().s_Ny); + double lambda = + mostSignificant.biggest().s_Nxy / + (mostSignificant.biggest().s_Nx * mostSignificant.biggest().s_Ny); double bound = 2.0 * std::asin(1.0 - lambda); @@ -326,9 +336,12 @@ double significance(double nxy, double nx, double ny, double n) { double px = nx / n; double py = ny / n; - double lambda = n * (-g * px * py * std::log(g) + px * (1.0 - g * py) * std::log((1.0 - py) / (1.0 - g * py)) + - py * (1.0 - g * px) * std::log((1.0 - px) / (1.0 - g * px)) + - (1.0 - px - py + g * px * py) * std::log((1.0 - px) * (1.0 - py) / (1.0 - px - py + g * px * py))); + double lambda = + n * (-g * px * py * std::log(g) + + px * (1.0 - g * py) * std::log((1.0 - py) / (1.0 - g * py)) + + py * (1.0 - g * px) * std::log((1.0 - px) / (1.0 - g * px)) + + (1.0 - px - py + g * px * py) * + std::log((1.0 - px) * (1.0 - py) / (1.0 - px - py + g * px * py))); boost::math::chi_squared_distribution<> chi(1.0); @@ -345,21 +358,25 @@ std::string INDICATOR_TAG("d"); } CCooccurrences::CCooccurrences(std::size_t maximumLength, std::size_t indicatorWidth) - : m_MaximumLength(maximumLength), m_Length(0), m_IndicatorWidth(indicatorWidth), m_Offset(0) { + : m_MaximumLength(maximumLength), m_Length(0), + m_IndicatorWidth(indicatorWidth), m_Offset(0) { } bool CCooccurrences::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { do { const std::string& name = traverser.name(); - if (name == LENGTH_TAG && core::CStringUtils::stringToType(traverser.value(), m_Length) == false) { + if (name == LENGTH_TAG && + core::CStringUtils::stringToType(traverser.value(), m_Length) == false) { LOG_ERROR(<< "Invalid length in " << traverser.value()); return false; } - if (name == OFFSET_TAG && core::CStringUtils::stringToType(traverser.value(), m_Offset) == false) { + if (name == OFFSET_TAG && + core::CStringUtils::stringToType(traverser.value(), m_Offset) == false) { LOG_ERROR(<< "Invalid offset in " << traverser.value()); return false; } - if (core::CPersistUtils::restore(CURRENT_INDICATOR_TAG, m_CurrentIndicators, traverser) == false) { + if (core::CPersistUtils::restore(CURRENT_INDICATOR_TAG, + m_CurrentIndicators, traverser) == false) { LOG_ERROR(<< "Invalid indicators in " << traverser.value()); return false; } @@ -378,7 +395,10 @@ void CCooccurrences::acceptPersistInserter(core::CStatePersistInserter& inserter core::CPersistUtils::persist(INDICATOR_TAG, m_Indicators, inserter); } -void CCooccurrences::topNBySignificance(std::size_t X, std::size_t /*n*/, TSizeSizePrVec& /*top*/, TDoubleVec& /*significances*/) const { +void CCooccurrences::topNBySignificance(std::size_t X, + std::size_t /*n*/, + TSizeSizePrVec& /*top*/, + TDoubleVec& /*significances*/) const { if (X >= m_Indicators.size()) { LOG_ERROR(<< "Unexpected event " << X); return; @@ -387,7 +407,9 @@ void CCooccurrences::topNBySignificance(std::size_t X, std::size_t /*n*/, TSizeS // TODO } -void CCooccurrences::topNBySignificance(std::size_t n, TSizeSizePrVec& top, TDoubleVec& significances) const { +void CCooccurrences::topNBySignificance(std::size_t n, + TSizeSizePrVec& top, + TDoubleVec& significances) const { top.clear(); significances.clear(); @@ -410,7 +432,8 @@ void CCooccurrences::topNBySignificance(std::size_t n, TSizeSizePrVec& top, TDou } } - std::size_t p = static_cast(std::max(std::sqrt(static_cast(dimension)), 1.0) + 0.5); + std::size_t p = static_cast( + std::max(std::sqrt(static_cast(dimension)), 1.0) + 0.5); TMostSignificant mostSignificant(n); searchForMostSignificantCooccurrences(m_Indicators, lengths, mask, p, mostSignificant); @@ -431,7 +454,8 @@ void CCooccurrences::topNBySignificance(std::size_t n, TSizeSizePrVec& top, TDou void CCooccurrences::addEventStreams(std::size_t n) { if (n > m_Indicators.size()) { - core::CAllocationStrategy::resize(m_Indicators, n, CPackedBitVector(m_Length, false)); + core::CAllocationStrategy::resize(m_Indicators, n, + CPackedBitVector(m_Length, false)); } } diff --git a/lib/maths/CCountMinSketch.cc b/lib/maths/CCountMinSketch.cc index 4bd50fd5ee..b7a9a54d57 100644 --- a/lib/maths/CCountMinSketch.cc +++ b/lib/maths/CCountMinSketch.cc @@ -37,11 +37,14 @@ const char PAIR_DELIMITER(';'); } CCountMinSketch::CCountMinSketch(std::size_t rows, std::size_t columns) - : m_Rows(rows), m_Columns(columns), m_TotalCount(0.0), m_Sketch(TUInt32FloatPrVec()) { + : m_Rows(rows), m_Columns(columns), m_TotalCount(0.0), + m_Sketch(TUInt32FloatPrVec()) { } -CCountMinSketch::CCountMinSketch(core::CStateRestoreTraverser& traverser) : m_Rows(0), m_Columns(0), m_TotalCount(0.0), m_Sketch() { - traverser.traverseSubLevel(boost::bind(&CCountMinSketch::acceptRestoreTraverser, this, _1)); +CCountMinSketch::CCountMinSketch(core::CStateRestoreTraverser& traverser) + : m_Rows(0), m_Columns(0), m_TotalCount(0.0), m_Sketch() { + traverser.traverseSubLevel( + boost::bind(&CCountMinSketch::acceptRestoreTraverser, this, _1)); } void CCountMinSketch::swap(CCountMinSketch& other) { @@ -56,7 +59,8 @@ void CCountMinSketch::swap(CCountMinSketch& other) { try { TUInt32FloatPrVec* counts = boost::get(&m_Sketch); if (counts) { - TUInt32FloatPrVec* otherCounts = boost::get(&other.m_Sketch); + TUInt32FloatPrVec* otherCounts = + boost::get(&other.m_Sketch); if (otherCounts) { counts->swap(*otherCounts); } else { @@ -76,7 +80,8 @@ void CCountMinSketch::swap(CCountMinSketch& other) { sketch.s_Hashes.swap(otherSketch->s_Hashes); sketch.s_Counts.swap(otherSketch->s_Counts); } else { - TUInt32FloatPrVec& otherCounts = boost::get(other.m_Sketch); + TUInt32FloatPrVec& otherCounts = + boost::get(other.m_Sketch); TUInt32FloatPrVec tmp; tmp.swap(otherCounts); other.m_Sketch = SSketch(); @@ -86,7 +91,9 @@ void CCountMinSketch::swap(CCountMinSketch& other) { boost::get(m_Sketch).swap(tmp); } } - } catch (const std::exception& e) { LOG_ABORT(<< "Unexpected exception " << e.what()); } + } catch (const std::exception& e) { + LOG_ABORT(<< "Unexpected exception " << e.what()); + } } bool CCountMinSketch::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { @@ -110,7 +117,8 @@ bool CCountMinSketch::acceptRestoreTraverser(core::CStateRestoreTraverser& trave } else if (name == CATEGORY_COUNTS_TAG) { m_Sketch = TUInt32FloatPrVec(); TUInt32FloatPrVec& counts = boost::get(m_Sketch); - if (core::CPersistUtils::fromString(traverser.value(), counts, DELIMITER, PAIR_DELIMITER) == false) { + if (core::CPersistUtils::fromString(traverser.value(), counts, DELIMITER, + PAIR_DELIMITER) == false) { LOG_ERROR(<< "Invalid category counts in " << traverser.value()); return false; } @@ -119,7 +127,8 @@ bool CCountMinSketch::acceptRestoreTraverser(core::CStateRestoreTraverser& trave SSketch& sketch = boost::get(m_Sketch); sketch.s_Hashes.reserve(m_Rows); sketch.s_Counts.reserve(m_Rows); - if (traverser.traverseSubLevel(boost::bind(&SSketch::acceptRestoreTraverser, &sketch, _1, m_Rows, m_Columns)) == false) { + if (traverser.traverseSubLevel(boost::bind(&SSketch::acceptRestoreTraverser, &sketch, + _1, m_Rows, m_Columns)) == false) { return false; } } @@ -133,12 +142,16 @@ void CCountMinSketch::acceptPersistInserter(core::CStatePersistInserter& inserte inserter.insertValue(TOTAL_COUNT_TAG, m_TotalCount, core::CIEEE754::E_SinglePrecision); const TUInt32FloatPrVec* counts = boost::get(&m_Sketch); if (counts) { - inserter.insertValue(CATEGORY_COUNTS_TAG, core::CPersistUtils::toString(*counts, DELIMITER, PAIR_DELIMITER)); + inserter.insertValue(CATEGORY_COUNTS_TAG, + core::CPersistUtils::toString(*counts, DELIMITER, PAIR_DELIMITER)); } else { try { const SSketch& sketch = boost::get(m_Sketch); - inserter.insertLevel(SKETCH_TAG, boost::bind(&SSketch::acceptPersistInserter, &sketch, _1)); - } catch (const std::exception& e) { LOG_ABORT(<< "Unexpected exception " << e.what()); } + inserter.insertLevel( + SKETCH_TAG, boost::bind(&SSketch::acceptPersistInserter, &sketch, _1)); + } catch (const std::exception& e) { + LOG_ABORT(<< "Unexpected exception " << e.what()); + } } } @@ -163,7 +176,8 @@ double CCountMinSketch::oneMinusDeltaError() const { if (!sketch) { return 0.0; } - return std::min(boost::math::double_constants::e / static_cast(m_Columns), 1.0) * m_TotalCount; + return std::min(boost::math::double_constants::e / static_cast(m_Columns), 1.0) * + m_TotalCount; } void CCountMinSketch::add(uint32_t category, double count) { @@ -173,7 +187,8 @@ void CCountMinSketch::add(uint32_t category, double count) { TUInt32FloatPrVec* counts = boost::get(&m_Sketch); if (counts) { - auto itr = std::lower_bound(counts->begin(), counts->end(), category, COrderings::SFirstLess()); + auto itr = std::lower_bound(counts->begin(), counts->end(), category, + COrderings::SFirstLess()); if (itr == counts->end() || itr->first != category) { itr = counts->insert(itr, TUInt32FloatPr(category, 0.0)); @@ -196,14 +211,17 @@ void CCountMinSketch::add(uint32_t category, double count) { LOG_TRACE(<< "count (i,j) = (" << i << "," << j << ")" << " -> " << sketch.s_Counts[i][j]); } - } catch (const std::exception& e) { LOG_ABORT(<< "Unexpected exception " << e.what()); } + } catch (const std::exception& e) { + LOG_ABORT(<< "Unexpected exception " << e.what()); + } } } void CCountMinSketch::removeFromMap(uint32_t category) { TUInt32FloatPrVec* counts = boost::get(&m_Sketch); if (counts) { - auto itr = std::lower_bound(counts->begin(), counts->end(), category, COrderings::SFirstLess()); + auto itr = std::lower_bound(counts->begin(), counts->end(), category, + COrderings::SFirstLess()); if (itr != counts->end() && itr->first == category) { counts->erase(itr); } @@ -224,7 +242,9 @@ void CCountMinSketch::age(double alpha) { sketch.s_Counts[i][j] *= alpha; } } - } catch (const std::exception& e) { LOG_ABORT(<< "Unexpected exception " << e.what()); } + } catch (const std::exception& e) { + LOG_ABORT(<< "Unexpected exception " << e.what()); + } } } @@ -237,9 +257,12 @@ double CCountMinSketch::count(uint32_t category) const { const TUInt32FloatPrVec* counts = boost::get(&m_Sketch); if (counts) { - auto itr = std::lower_bound(counts->begin(), counts->end(), category, COrderings::SFirstLess()); + auto itr = std::lower_bound(counts->begin(), counts->end(), category, + COrderings::SFirstLess()); - return itr == counts->end() || itr->first != category ? 0.0 : static_cast(itr->second); + return itr == counts->end() || itr->first != category + ? 0.0 + : static_cast(itr->second); } TMinAccumulator result; @@ -252,7 +275,9 @@ double CCountMinSketch::count(uint32_t category) const { << " <- " << sketch.s_Counts[i][j]); result.add(sketch.s_Counts[i][j]); } - } catch (const std::exception& e) { LOG_ABORT(<< "Unexpected exception " << e.what()); } + } catch (const std::exception& e) { + LOG_ABORT(<< "Unexpected exception " << e.what()); + } return result.count() > 0 ? result[0] : 0.0; } @@ -275,7 +300,9 @@ uint64_t CCountMinSketch::checksum(uint64_t seed) const { const SSketch& sketch = boost::get(m_Sketch); seed = CChecksum::calculate(seed, sketch.s_Hashes); return CChecksum::calculate(seed, sketch.s_Counts); - } catch (const std::exception& e) { LOG_ABORT(<< "Unexpected exception " << e.what()); } + } catch (const std::exception& e) { + LOG_ABORT(<< "Unexpected exception " << e.what()); + } } return CChecksum::calculate(seed, *counts); } @@ -292,7 +319,9 @@ void CCountMinSketch::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) core::CMemoryDebug::dynamicSize("sketch", sketch, mem); core::CMemoryDebug::dynamicSize("s_Hashes", sketch.s_Hashes, mem); core::CMemoryDebug::dynamicSize("s_Counts", sketch.s_Counts, mem); - } catch (const std::exception& e) { LOG_ABORT(<< "Unexpected exception " << e.what()); } + } catch (const std::exception& e) { + LOG_ABORT(<< "Unexpected exception " << e.what()); + } } } @@ -307,14 +336,17 @@ std::size_t CCountMinSketch::memoryUsage() const { mem += sizeof(SSketch); mem += core::CMemory::dynamicSize(sketch.s_Hashes); mem += core::CMemory::dynamicSize(sketch.s_Counts); - } catch (const std::exception& e) { LOG_ABORT(<< "Unexpected exception " << e.what()); } + } catch (const std::exception& e) { + LOG_ABORT(<< "Unexpected exception " << e.what()); + } } return mem; } void CCountMinSketch::sketch() { static const std::size_t FLOAT_SIZE = sizeof(CFloatStorage); - static const std::size_t HASH_SIZE = sizeof(core::CHashing::CUniversalHash::CUInt32UnrestrictedHash); + static const std::size_t HASH_SIZE = + sizeof(core::CHashing::CUniversalHash::CUInt32UnrestrictedHash); static const std::size_t PAIR_SIZE = sizeof(TUInt32FloatPr); static const std::size_t VEC_SIZE = sizeof(TUInt32FloatPrVec); static const std::size_t SKETCH_SIZE = sizeof(SSketch); @@ -325,7 +357,8 @@ void CCountMinSketch::sketch() { std::size_t sketchSize = SKETCH_SIZE + m_Rows * (m_Columns * FLOAT_SIZE + HASH_SIZE); if (countsSize > sketchSize) { - if (counts->capacity() > counts->size() && counts->size() < (sketchSize - VEC_SIZE) / PAIR_SIZE) { + if (counts->capacity() > counts->size() && + counts->size() < (sketchSize - VEC_SIZE) / PAIR_SIZE) { TUInt32FloatPrVec shrunk; shrunk.reserve((sketchSize - VEC_SIZE) / PAIR_SIZE); shrunk.assign(counts->begin(), counts->end()); @@ -346,23 +379,28 @@ void CCountMinSketch::sketch() { } } -CCountMinSketch::SSketch::SSketch(std::size_t rows, std::size_t columns) : s_Counts(rows, TFloatVec(columns, 0.0)) { +CCountMinSketch::SSketch::SSketch(std::size_t rows, std::size_t columns) + : s_Counts(rows, TFloatVec(columns, 0.0)) { core::CHashing::CUniversalHash::generateHashes(rows, s_Hashes); } -bool CCountMinSketch::SSketch::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser, std::size_t rows, std::size_t columns) { +bool CCountMinSketch::SSketch::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser, + std::size_t rows, + std::size_t columns) { do { const std::string& name = traverser.name(); if (name == HASHES_TAG) { core::CHashing::CUniversalHash::CFromString hashFromString(PAIR_DELIMITER); - if (core::CPersistUtils::fromString(traverser.value(), hashFromString, s_Hashes, DELIMITER) == false || + if (core::CPersistUtils::fromString(traverser.value(), hashFromString, + s_Hashes, DELIMITER) == false || s_Hashes.size() != rows) { LOG_ERROR(<< "Invalid hashes in " << traverser.value()); return false; } } else if (name == COUNTS_TAG) { s_Counts.push_back(TFloatVec()); - if (core::CPersistUtils::fromString(traverser.value(), s_Counts.back(), DELIMITER) == false || + if (core::CPersistUtils::fromString(traverser.value(), s_Counts.back(), + DELIMITER) == false || s_Counts.back().size() != columns) { LOG_ERROR(<< "Invalid counts in " << traverser.value()); return false; @@ -371,7 +409,8 @@ bool CCountMinSketch::SSketch::acceptRestoreTraverser(core::CStateRestoreTravers } while (traverser.next()); if (s_Counts.size() != rows) { - LOG_ERROR(<< "Unexpected number of counts " << s_Counts.size() << ", number of rows " << rows); + LOG_ERROR(<< "Unexpected number of counts " << s_Counts.size() + << ", number of rows " << rows); return false; } return true; @@ -379,7 +418,8 @@ bool CCountMinSketch::SSketch::acceptRestoreTraverser(core::CStateRestoreTravers void CCountMinSketch::SSketch::acceptPersistInserter(core::CStatePersistInserter& inserter) const { core::CHashing::CUniversalHash::CToString hashToString(PAIR_DELIMITER); - inserter.insertValue(HASHES_TAG, core::CPersistUtils::toString(s_Hashes, hashToString, DELIMITER)); + inserter.insertValue( + HASHES_TAG, core::CPersistUtils::toString(s_Hashes, hashToString, DELIMITER)); for (const auto& count : s_Counts) { inserter.insertValue(COUNTS_TAG, core::CPersistUtils::toString(count, DELIMITER)); } diff --git a/lib/maths/CDecayRateController.cc b/lib/maths/CDecayRateController.cc index cb504ba3f9..c05678c3d0 100644 --- a/lib/maths/CDecayRateController.cc +++ b/lib/maths/CDecayRateController.cc @@ -89,7 +89,8 @@ double adjustMultiplier(double multiplier, core_t::TTime bucketLength_) { //! Adjust the maximum decay rate multiplier for long bucket lengths. double adjustedMaximumMultiplier(core_t::TTime bucketLength_) { double bucketLength{static_cast(bucketLength_)}; - return MAXIMUM_MULTIPLIER / (1.0 + CTools::truncate((bucketLength - 1800.0) / 86400.0, 0.0, 1.0)); + return MAXIMUM_MULTIPLIER / + (1.0 + CTools::truncate((bucketLength - 1800.0) / 86400.0, 0.0, 1.0)); } } @@ -98,12 +99,8 @@ CDecayRateController::CDecayRateController() : m_Checks(0), m_Target(1.0) { } CDecayRateController::CDecayRateController(int checks, std::size_t dimension) - : m_Checks(checks), - m_Target(1.0), - m_PredictionMean(dimension), - m_Bias(dimension), - m_RecentAbsError(dimension), - m_HistoricalAbsError(dimension) { + : m_Checks(checks), m_Target(1.0), m_PredictionMean(dimension), m_Bias(dimension), + m_RecentAbsError(dimension), m_HistoricalAbsError(dimension) { m_Multiplier.add(m_Target); } @@ -124,10 +121,14 @@ bool CDecayRateController::acceptRestoreTraverser(core::CStateRestoreTraverser& RESTORE_BUILT_IN(TARGET_TAG, m_Target) RESTORE(MULTIPLIER_TAG, m_Multiplier.fromDelimited(traverser.value())) RESTORE(RNG_TAG, m_Rng.fromString(traverser.value())) - RESTORE(PREDICTION_MEAN_TAG, core::CPersistUtils::restore(PREDICTION_MEAN_TAG, m_PredictionMean, traverser)); + RESTORE(PREDICTION_MEAN_TAG, + core::CPersistUtils::restore(PREDICTION_MEAN_TAG, m_PredictionMean, traverser)); RESTORE(BIAS_TAG, core::CPersistUtils::restore(BIAS_TAG, m_Bias, traverser)) - RESTORE(RECENT_ABS_ERROR_TAG, core::CPersistUtils::restore(RECENT_ABS_ERROR_TAG, m_RecentAbsError, traverser)) - RESTORE(HISTORICAL_ABS_ERROR_TAG, core::CPersistUtils::restore(HISTORICAL_ABS_ERROR_TAG, m_HistoricalAbsError, traverser)) + RESTORE(RECENT_ABS_ERROR_TAG, + core::CPersistUtils::restore(RECENT_ABS_ERROR_TAG, m_RecentAbsError, traverser)) + RESTORE(HISTORICAL_ABS_ERROR_TAG, + core::CPersistUtils::restore(HISTORICAL_ABS_ERROR_TAG, + m_HistoricalAbsError, traverser)) } while (traverser.next()); if (CBasicStatistics::count(m_Multiplier) == 0.0) { m_Multiplier.add(m_Target); @@ -175,7 +176,8 @@ double CDecayRateController::multiplier(const TDouble1Vec& prediction, if (count > 0.0) { double bias{CBasicStatistics::mean(m_Bias[d])}; double width{10.0 * CBasicStatistics::mean(m_HistoricalAbsError[d])}; - predictionError[d] = CTools::truncate(predictionError[d], bias - width, bias + width); + predictionError[d] = CTools::truncate(predictionError[d], + bias - width, bias + width); } // The idea of the following is to allow the model memory @@ -188,7 +190,8 @@ double CDecayRateController::multiplier(const TDouble1Vec& prediction, // so the controller will actively decrease the decay rate. double weight{learnRate / numberPredictionErrors}; - double sd{MINIMUM_COV_TO_CONTROL * std::fabs(CBasicStatistics::mean(m_PredictionMean[d]))}; + double sd{MINIMUM_COV_TO_CONTROL * + std::fabs(CBasicStatistics::mean(m_PredictionMean[d]))}; double tolerance{sd > 0.0 ? CSampling::normalSample(m_Rng, 0.0, sd * sd) : 0.0}; m_PredictionMean[d].add(prediction[d], weight); (*stats_[0])[d].add(predictionError[d] + tolerance, weight); @@ -200,8 +203,9 @@ double CDecayRateController::multiplier(const TDouble1Vec& prediction, } if (count > 0.0) { - double factors[]{ - std::exp(-FAST_DECAY_RATE * decayRate), std::exp(-FAST_DECAY_RATE * decayRate), std::exp(-SLOW_DECAY_RATE * decayRate)}; + double factors[]{std::exp(-FAST_DECAY_RATE * decayRate), + std::exp(-FAST_DECAY_RATE * decayRate), + std::exp(-SLOW_DECAY_RATE * decayRate)}; for (auto& component : m_PredictionMean) { component.age(factors[2]); } @@ -227,7 +231,9 @@ double CDecayRateController::multiplier(const TDouble1Vec& prediction, change.add(this->change(stats, bucketLength)); } - m_Target *= CTools::truncate(m_Target * change[0], MINIMUM_MULTIPLIER, adjustedMaximumMultiplier(bucketLength)) / m_Target; + m_Target *= CTools::truncate(m_Target * change[0], MINIMUM_MULTIPLIER, + adjustedMaximumMultiplier(bucketLength)) / + m_Target; // We smooth the target decay rate. Over time this should // converge to the single decay rate which would minimize @@ -285,8 +291,10 @@ double CDecayRateController::change(const double (&stats)[3], core_t::TTime buck ((m_Checks & E_PredictionBias) && stats[0] > BIASED * stats[1])) { return adjustMultiplier(INCREASE_RATE, bucketLength); } - if ((!(m_Checks & E_PredictionErrorIncrease) || stats[1] < ERROR_NOT_INCREASING * stats[2]) && - (!(m_Checks & E_PredictionErrorDecrease) || stats[2] < ERROR_NOT_DECREASING * stats[1]) && + if ((!(m_Checks & E_PredictionErrorIncrease) || + stats[1] < ERROR_NOT_INCREASING * stats[2]) && + (!(m_Checks & E_PredictionErrorDecrease) || + stats[2] < ERROR_NOT_DECREASING * stats[1]) && (!(m_Checks & E_PredictionBias) || stats[0] < NOT_BIASED * stats[1])) { return adjustMultiplier(DECREASE_RATE, bucketLength); } diff --git a/lib/maths/CDecompositionComponent.cc b/lib/maths/CDecompositionComponent.cc index 280aaef360..e2aba37be2 100644 --- a/lib/maths/CDecompositionComponent.cc +++ b/lib/maths/CDecompositionComponent.cc @@ -50,23 +50,22 @@ CDecompositionComponent::CDecompositionComponent(std::size_t maxSize, CSplineTypes::EBoundaryCondition boundaryCondition, CSplineTypes::EType valueInterpolationType, CSplineTypes::EType varianceInterpolationType) - : m_MaxSize{maxSize}, - m_BoundaryCondition{boundaryCondition}, - m_Splines{valueInterpolationType, varianceInterpolationType}, - m_MeanValue{0.0}, - m_MeanVariance{0.0} { + : m_MaxSize{maxSize}, m_BoundaryCondition{boundaryCondition}, m_Splines{valueInterpolationType, + varianceInterpolationType}, + m_MeanValue{0.0}, m_MeanVariance{0.0} { } bool CDecompositionComponent::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { do { const std::string& name{traverser.name()}; RESTORE_BUILT_IN(MAX_SIZE_TAG, m_MaxSize) - RESTORE_SETUP_TEARDOWN(BOUNDARY_CONDITION_TAG, - int boundaryCondition, - core::CStringUtils::stringToType(traverser.value(), boundaryCondition), - m_BoundaryCondition = static_cast(boundaryCondition)) - RESTORE(SPLINES_TAG, - traverser.traverseSubLevel(boost::bind(&CPackedSplines::acceptRestoreTraverser, &m_Splines, m_BoundaryCondition, _1))) + RESTORE_SETUP_TEARDOWN( + BOUNDARY_CONDITION_TAG, int boundaryCondition, + core::CStringUtils::stringToType(traverser.value(), boundaryCondition), + m_BoundaryCondition = static_cast(boundaryCondition)) + RESTORE(SPLINES_TAG, traverser.traverseSubLevel( + boost::bind(&CPackedSplines::acceptRestoreTraverser, + &m_Splines, m_BoundaryCondition, _1))) } while (traverser.next()); if (this->initialized()) { @@ -80,7 +79,8 @@ bool CDecompositionComponent::acceptRestoreTraverser(core::CStateRestoreTraverse void CDecompositionComponent::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(MAX_SIZE_TAG, m_MaxSize); inserter.insertValue(BOUNDARY_CONDITION_TAG, static_cast(m_BoundaryCondition)); - inserter.insertLevel(SPLINES_TAG, boost::bind(&CPackedSplines::acceptPersistInserter, &m_Splines, _1)); + inserter.insertLevel(SPLINES_TAG, boost::bind(&CPackedSplines::acceptPersistInserter, + &m_Splines, _1)); } void CDecompositionComponent::swap(CDecompositionComponent& other) { @@ -103,7 +103,9 @@ void CDecompositionComponent::clear() { m_MeanVariance = 0.0; } -void CDecompositionComponent::interpolate(const TDoubleVec& knots, const TDoubleVec& values, const TDoubleVec& variances) { +void CDecompositionComponent::interpolate(const TDoubleVec& knots, + const TDoubleVec& values, + const TDoubleVec& variances) { m_Splines.interpolate(knots, values, variances, m_BoundaryCondition); m_MeanValue = this->valueSpline().mean(); m_MeanVariance = this->varianceSpline().mean(); @@ -140,7 +142,8 @@ TDoubleDoublePr CDecompositionComponent::value(double offset, double n, double c double qu{boost::math::quantile(normal, (100.0 + confidence) / 200.0)}; return {ql, qu}; } catch (const std::exception& e) { - LOG_ERROR(<< "Failed calculating confidence interval: " << e.what() << ", n = " << n << ", m = " << m << ", sd = " << sd + LOG_ERROR(<< "Failed calculating confidence interval: " << e.what() + << ", n = " << n << ", m = " << m << ", sd = " << sd << ", confidence = " << confidence); } return {m, m}; @@ -172,7 +175,8 @@ TDoubleDoublePr CDecompositionComponent::variance(double offset, double n, doubl double qu{boost::math::quantile(chi, (100.0 + confidence) / 200.0)}; return std::make_pair(ql * v / (n - 1.0), qu * v / (n - 1.0)); } catch (const std::exception& e) { - LOG_ERROR(<< "Failed calculating confidence interval: " << e.what() << ", n = " << n << ", confidence = " << confidence); + LOG_ERROR(<< "Failed calculating confidence interval: " << e.what() + << ", n = " << n << ", confidence = " << confidence); } return {v, v}; } @@ -238,8 +242,9 @@ CDecompositionComponent::CPackedSplines::CPackedSplines(CSplineTypes::EType valu m_Types[static_cast(E_Variance)] = varianceInterpolationType; } -bool CDecompositionComponent::CPackedSplines::acceptRestoreTraverser(CSplineTypes::EBoundaryCondition boundary, - core::CStateRestoreTraverser& traverser) { +bool CDecompositionComponent::CPackedSplines::acceptRestoreTraverser( + CSplineTypes::EBoundaryCondition boundary, + core::CStateRestoreTraverser& traverser) { int estimated{0}; TDoubleVec knots; TDoubleVec values; @@ -293,16 +298,16 @@ void CDecompositionComponent::CPackedSplines::shift(ESpline spline, double shift } } -CDecompositionComponent::TSplineCRef CDecompositionComponent::CPackedSplines::spline(ESpline spline) const { - return TSplineCRef(m_Types[static_cast(spline)], - boost::cref(m_Knots), +CDecompositionComponent::TSplineCRef +CDecompositionComponent::CPackedSplines::spline(ESpline spline) const { + return TSplineCRef(m_Types[static_cast(spline)], boost::cref(m_Knots), boost::cref(m_Values[static_cast(spline)]), boost::cref(m_Curvatures[static_cast(spline)])); } -CDecompositionComponent::TSplineRef CDecompositionComponent::CPackedSplines::spline(ESpline spline) { - return TSplineRef(m_Types[static_cast(spline)], - boost::ref(m_Knots), +CDecompositionComponent::TSplineRef +CDecompositionComponent::CPackedSplines::spline(ESpline spline) { + return TSplineRef(m_Types[static_cast(spline)], boost::ref(m_Knots), boost::ref(m_Values[static_cast(spline)]), boost::ref(m_Curvatures[static_cast(spline)])); } diff --git a/lib/maths/CEntropySketch.cc b/lib/maths/CEntropySketch.cc index 37164c671c..bb668f0832 100644 --- a/lib/maths/CEntropySketch.cc +++ b/lib/maths/CEntropySketch.cc @@ -45,8 +45,9 @@ void CEntropySketch::generateProjection(std::size_t category, TDoubleVec& projec for (std::size_t i = 0u; i < projection.size(); i += 2) { double w1 = boost::math::double_constants::pi * (projection[i] - 0.5); double w2 = -std::log(projection[i + 1]); - projection[i / 2] = std::tan(w1) * (boost::math::double_constants::half_pi - w1) + - std::log(w2 * std::cos(w1) / (boost::math::double_constants::half_pi - w1)); + projection[i / 2] = + std::tan(w1) * (boost::math::double_constants::half_pi - w1) + + std::log(w2 * std::cos(w1) / (boost::math::double_constants::half_pi - w1)); } projection.resize(m_Yi.size()); LOG_TRACE(<< "projection = " << core::CContainerPrinter::print(projection)); diff --git a/lib/maths/CExpandingWindow.cc b/lib/maths/CExpandingWindow.cc index df0bf7ce72..6491c85b62 100644 --- a/lib/maths/CExpandingWindow.cc +++ b/lib/maths/CExpandingWindow.cc @@ -26,11 +26,12 @@ const std::string BUCKET_VALUES_TAG("b"); const std::string START_TIME_TAG("c"); } -CExpandingWindow::CExpandingWindow(core_t::TTime bucketLength, TTimeCRng bucketLengths, std::size_t size, double decayRate) - : m_DecayRate(decayRate), - m_BucketLength(bucketLength), - m_BucketLengths(bucketLengths), - m_BucketLengthIndex(0), +CExpandingWindow::CExpandingWindow(core_t::TTime bucketLength, + TTimeCRng bucketLengths, + std::size_t size, + double decayRate) + : m_DecayRate(decayRate), m_BucketLength(bucketLength), + m_BucketLengths(bucketLengths), m_BucketLengthIndex(0), m_StartTime(boost::numeric::bounds::lowest()), m_BucketValues(size % 2 == 0 ? size : size + 1) { } @@ -41,7 +42,8 @@ bool CExpandingWindow::acceptRestoreTraverser(core::CStateRestoreTraverser& trav const std::string& name = traverser.name(); RESTORE_BUILT_IN(BUCKET_LENGTH_INDEX_TAG, m_BucketLengthIndex) RESTORE_BUILT_IN(START_TIME_TAG, m_StartTime) - RESTORE(BUCKET_VALUES_TAG, core::CPersistUtils::restore(BUCKET_VALUES_TAG, m_BucketValues, traverser)); + RESTORE(BUCKET_VALUES_TAG, + core::CPersistUtils::restore(BUCKET_VALUES_TAG, m_BucketValues, traverser)); } while (traverser.next()); return true; } @@ -57,7 +59,8 @@ core_t::TTime CExpandingWindow::startTime() const { } core_t::TTime CExpandingWindow::endTime() const { - return m_StartTime + (static_cast(m_BucketValues.size()) * m_BucketLengths[m_BucketLengthIndex]); + return m_StartTime + (static_cast(m_BucketValues.size()) * + m_BucketLengths[m_BucketLengthIndex]); } core_t::TTime CExpandingWindow::bucketLength() const { @@ -68,11 +71,13 @@ const CExpandingWindow::TFloatMeanAccumulatorVec& CExpandingWindow::values() con return m_BucketValues; } -CExpandingWindow::TFloatMeanAccumulatorVec CExpandingWindow::valuesMinusPrediction(const TPredictor& predictor) const { +CExpandingWindow::TFloatMeanAccumulatorVec +CExpandingWindow::valuesMinusPrediction(const TPredictor& predictor) const { core_t::TTime start{CIntegerTools::floor(this->startTime(), m_BucketLength)}; core_t::TTime end{CIntegerTools::ceil(this->endTime(), m_BucketLength)}; core_t::TTime size{static_cast(m_BucketValues.size())}; - core_t::TTime offset{static_cast(CBasicStatistics::mean(m_MeanOffset) + 0.5)}; + core_t::TTime offset{ + static_cast(CBasicStatistics::mean(m_MeanOffset) + 0.5)}; TFloatMeanAccumulatorVec predictions(size); for (core_t::TTime time = start + offset; time < end; time += m_BucketLength) { @@ -85,7 +90,8 @@ CExpandingWindow::TFloatMeanAccumulatorVec CExpandingWindow::valuesMinusPredicti TFloatMeanAccumulatorVec result(m_BucketValues); for (core_t::TTime i = 0; i < size; ++i) { if (CBasicStatistics::count(result[i]) > 0.0) { - CBasicStatistics::moment<0>(result[i]) -= CBasicStatistics::mean(predictions[i]); + CBasicStatistics::moment<0>(result[i]) -= + CBasicStatistics::mean(predictions[i]); } } @@ -115,10 +121,12 @@ void CExpandingWindow::add(core_t::TTime time, double value, double weight) { if (m_BucketLengthIndex == 0) { m_StartTime = CIntegerTools::floor(time, m_BucketLengths[0]); } else { - std::size_t compression = m_BucketLengths[m_BucketLengthIndex] / m_BucketLengths[m_BucketLengthIndex - 1]; + std::size_t compression = m_BucketLengths[m_BucketLengthIndex] / + m_BucketLengths[m_BucketLengthIndex - 1]; for (std::size_t i = 0u; i < m_BucketValues.size(); i += compression, ++end) { std::swap(*end, m_BucketValues[i]); - for (std::size_t j = 1u; j < compression && i + j < m_BucketValues.size(); ++j) { + for (std::size_t j = 1u; + j < compression && i + j < m_BucketValues.size(); ++j) { *end += m_BucketValues[i + j]; } } @@ -126,7 +134,8 @@ void CExpandingWindow::add(core_t::TTime time, double value, double weight) { std::fill(end, m_BucketValues.end(), TFloatMeanAccumulator()); } - m_BucketValues[(time - m_StartTime) / m_BucketLengths[m_BucketLengthIndex]].add(value, weight); + m_BucketValues[(time - m_StartTime) / m_BucketLengths[m_BucketLengthIndex]] + .add(value, weight); m_MeanOffset.add(static_cast(time % m_BucketLength)); } } diff --git a/lib/maths/CGammaRateConjugate.cc b/lib/maths/CGammaRateConjugate.cc index 7d09e97f72..5cedb059fb 100644 --- a/lib/maths/CGammaRateConjugate.cc +++ b/lib/maths/CGammaRateConjugate.cc @@ -57,7 +57,8 @@ const double NON_INFORMATIVE_COUNT = 3.5; //! Compute the coefficient of variance of the sample moments. double minimumCoefficientOfVariation(bool isInteger, double mean) { - return std::max(MINIMUM_COEFFICIENT_OF_VARIATION, isInteger ? std::sqrt(1.0 / 12.0) / mean : 0.0); + return std::max(MINIMUM_COEFFICIENT_OF_VARIATION, + isInteger ? std::sqrt(1.0 / 12.0) / mean : 0.0); } //! Apply the minimum coefficient of variation constraint to the sample @@ -118,9 +119,12 @@ void truncateVariance(bool isInteger, TMeanAccumulator& logMean, TMeanVarAccumul //! function for gamma distributed data with known prior for the rate. class CLikelihoodDerivativeFunction : public std::unary_function { public: - CLikelihoodDerivativeFunction(double numberSamples, double target) : m_NumberSamples(numberSamples), m_Target(target) {} + CLikelihoodDerivativeFunction(double numberSamples, double target) + : m_NumberSamples(numberSamples), m_Target(target) {} - double operator()(double x) const { return boost::math::digamma(m_NumberSamples * x) - boost::math::digamma(x) - m_Target; } + double operator()(double x) const { + return boost::math::digamma(m_NumberSamples * x) - boost::math::digamma(x) - m_Target; + } private: double m_NumberSamples; @@ -208,7 +212,8 @@ double maximumLikelihoodShape(double oldShape, // in one iteration and not overshoot too much. Again we truncate // the values so that bracketing loop is well behaved. double dTarget = std::fabs(target - oldTarget); - downFactor = CTools::truncate(1.0 - 2.0 * dTarget / gradient, MIN_DOWN_FACTOR, 1.0 - EPS); + downFactor = CTools::truncate(1.0 - 2.0 * dTarget / gradient, + MIN_DOWN_FACTOR, 1.0 - EPS); upFactor = CTools::truncate(1.0 + 2.0 * dTarget / gradient, 1.0 + EPS, MAX_UP_FACTOR); } @@ -252,33 +257,42 @@ double maximumLikelihoodShape(double oldShape, } } } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to bracket root: " << e.what() << ", newNumber = " << newNumber << ", newMean = " << newMean - << ", newLogMean = " << newLogMean << ", x0 = " << x0 << ", f(x0) = " << f0 + LOG_ERROR(<< "Failed to bracket root: " << e.what() << ", newNumber = " << newNumber + << ", newMean = " << newMean << ", newLogMean = " << newLogMean + << ", x0 = " << x0 << ", f(x0) = " << f0 << ", bracket = " << core::CContainerPrinter::print(bracket) - << ", f(bracket) = " << core::CContainerPrinter::print(fBracket) << ", bestGuess = " << bestGuess); + << ", f(bracket) = " << core::CContainerPrinter::print(fBracket) + << ", bestGuess = " << bestGuess); return bestGuess; } if (maxIterations == 0) { LOG_TRACE(<< "Failed to bracket root:" - << " newNumber = " << newNumber << ", newMean = " << newMean << ", newLogMean = " << newLogMean << ", x0 = " << x0 - << ", f(x0) = " << f0 << ", bracket = " << core::CContainerPrinter::print(bracket) - << ", f(bracket) = " << core::CContainerPrinter::print(fBracket) << ", bestGuess = " << bestGuess); + << " newNumber = " << newNumber << ", newMean = " << newMean + << ", newLogMean = " << newLogMean << ", x0 = " << x0 << ", f(x0) = " << f0 + << ", bracket = " << core::CContainerPrinter::print(bracket) + << ", f(bracket) = " << core::CContainerPrinter::print(fBracket) + << ", bestGuess = " << bestGuess); return bestGuess; } - LOG_TRACE(<< "newNumber = " << newNumber << ", newMean = " << newMean << ", newLogMean = " << newLogMean - << ", oldTarget = " << oldTarget << ", target = " << target << ", upFactor = " << upFactor << ", downFactor = " << downFactor - << ", x0 = " << x0 << ", f(x0) = " << f0 << ", bracket = " << core::CContainerPrinter::print(bracket) + LOG_TRACE(<< "newNumber = " << newNumber << ", newMean = " << newMean + << ", newLogMean = " << newLogMean << ", oldTarget = " << oldTarget + << ", target = " << target << ", upFactor = " << upFactor + << ", downFactor = " << downFactor << ", x0 = " << x0 << ", f(x0) = " << f0 + << ", bracket = " << core::CContainerPrinter::print(bracket) << ", f(bracket) = " << core::CContainerPrinter::print(fBracket)); try { CEqualWithTolerance tolerance(CToleranceTypes::E_AbsoluteTolerance, EPS * x0); - CSolvers::solve(bracket.first, bracket.second, fBracket.first, fBracket.second, derivative, maxIterations, tolerance, bestGuess); + CSolvers::solve(bracket.first, bracket.second, fBracket.first, fBracket.second, + derivative, maxIterations, tolerance, bestGuess); } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to solve: " << e.what() << ", newNumber = " << newNumber << ", x0 = " << x0 << ", f(x0) = " << f0 + LOG_ERROR(<< "Failed to solve: " << e.what() << ", newNumber = " << newNumber + << ", x0 = " << x0 << ", f(x0) = " << f0 << ", bracket = " << core::CContainerPrinter::print(bracket) - << ", f(bracket) = " << core::CContainerPrinter::print(fBracket) << ", bestGuess = " << bestGuess); + << ", f(bracket) = " << core::CContainerPrinter::print(fBracket) + << ", bestGuess = " << bestGuess); return bestGuess; } @@ -289,7 +303,9 @@ double maximumLikelihoodShape(double oldShape, //! Adds "weight" x "right operand" to the "left operand". struct SPlusWeight { - double operator()(double lhs, double rhs, double weight = 1.0) const { return lhs + weight * rhs; } + double operator()(double lhs, double rhs, double weight = 1.0) const { + return lhs + weight * rhs; + } }; //! Evaluate \p func on the joint predictive distribution for \p samples @@ -346,7 +362,8 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, static const double MINIMUM_GAMMA_SHAPE = 100.0; - LOG_TRACE(<< "likelihoodShape = " << likelihoodShape << ", priorShape = " << priorShape << ", priorRate = " << priorRate); + LOG_TRACE(<< "likelihoodShape = " << likelihoodShape + << ", priorShape = " << priorShape << ", priorRate = " << priorRate); try { if (isNonInformative) { @@ -390,7 +407,8 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, double n = maths_t::count(weightStyles, weights[i]); double varianceScale = - maths_t::seasonalVarianceScale(weightStyles, weights[i]) * maths_t::countVarianceScale(weightStyles, weights[i]); + maths_t::seasonalVarianceScale(weightStyles, weights[i]) * + maths_t::countVarianceScale(weightStyles, weights[i]); double x = samples[i] + offset; LOG_TRACE(<< "x = " << x); @@ -417,7 +435,8 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, double n = maths_t::count(weightStyles, weights[i]); double varianceScale = - maths_t::seasonalVarianceScale(weightStyles, weights[i]) * maths_t::countVarianceScale(weightStyles, weights[i]); + maths_t::seasonalVarianceScale(weightStyles, weights[i]) * + maths_t::countVarianceScale(weightStyles, weights[i]); double x = samples[i] + offset; double scaledLikelihoodShape = likelihoodShape / varianceScale; double scaledPriorRate = varianceScale * priorRate; @@ -429,8 +448,9 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, } } } catch (const std::exception& e) { - LOG_ERROR(<< "Error calculating joint distribution: " << e.what() << ", offset = " << offset - << ", likelihoodShape = " << likelihoodShape << ", priorShape = " << priorShape << ", priorRate = " << priorRate + LOG_ERROR(<< "Error calculating joint distribution: " << e.what() + << ", offset = " << offset << ", likelihoodShape = " << likelihoodShape + << ", priorShape = " << priorShape << ", priorRate = " << priorRate << ", samples = " << core::CContainerPrinter::print(samples)); return false; } @@ -457,27 +477,15 @@ class CEvaluateOnSamples : core::CNonCopyable { double likelihoodShape, double priorShape, double priorRate) - : m_WeightStyles(weightStyles), - m_Samples(samples), - m_Weights(weights), - m_IsNonInformative(isNonInformative), - m_Offset(offset), - m_LikelihoodShape(likelihoodShape), - m_PriorShape(priorShape), + : m_WeightStyles(weightStyles), m_Samples(samples), m_Weights(weights), + m_IsNonInformative(isNonInformative), m_Offset(offset), + m_LikelihoodShape(likelihoodShape), m_PriorShape(priorShape), m_PriorRate(priorRate) {} bool operator()(double x, double& result) const { - return evaluateFunctionOnJointDistribution(m_WeightStyles, - m_Samples, - m_Weights, - F(), - SPlusWeight(), - m_IsNonInformative, - m_Offset + x, - m_LikelihoodShape, - m_PriorShape, - m_PriorRate, - result); + return evaluateFunctionOnJointDistribution( + m_WeightStyles, m_Samples, m_Weights, F(), SPlusWeight(), m_IsNonInformative, + m_Offset + x, m_LikelihoodShape, m_PriorShape, m_PriorRate, result); } private: @@ -507,33 +515,21 @@ class CProbabilityOfLessLikelySamples : core::CNonCopyable { double likelihoodShape, double priorShape, double priorRate) - : m_Calculation(calculation), - m_WeightStyles(weightStyles), - m_Samples(samples), - m_Weights(weights), - m_IsNonInformative(isNonInformative), - m_Offset(offset), - m_LikelihoodShape(likelihoodShape), - m_PriorShape(priorShape), - m_PriorRate(priorRate), - m_Tail(0) {} + : m_Calculation(calculation), m_WeightStyles(weightStyles), + m_Samples(samples), m_Weights(weights), m_IsNonInformative(isNonInformative), + m_Offset(offset), m_LikelihoodShape(likelihoodShape), + m_PriorShape(priorShape), m_PriorRate(priorRate), m_Tail(0) {} bool operator()(double x, double& result) const { CJointProbabilityOfLessLikelySamples probability; maths_t::ETail tail = maths_t::E_UndeterminedTail; if (!evaluateFunctionOnJointDistribution( - m_WeightStyles, - m_Samples, - m_Weights, - boost::bind(CTools::CProbabilityOfLessLikelySample(m_Calculation), _1, _2, boost::ref(tail)), - CJointProbabilityOfLessLikelySamples::SAddProbability(), - m_IsNonInformative, - m_Offset + x, - m_LikelihoodShape, - m_PriorShape, - m_PriorRate, - probability) || + m_WeightStyles, m_Samples, m_Weights, + boost::bind(CTools::CProbabilityOfLessLikelySample(m_Calculation), + _1, _2, boost::ref(tail)), + CJointProbabilityOfLessLikelySamples::SAddProbability(), m_IsNonInformative, + m_Offset + x, m_LikelihoodShape, m_PriorShape, m_PriorRate, probability) || !probability.calculate(result)) { LOG_ERROR(<< "Failed to compute probability of less likely samples"); return false; @@ -585,17 +581,10 @@ class CLogMarginalLikelihood : core::CNonCopyable { double likelihoodShape, double priorShape, double priorRate) - : m_WeightStyles(weightStyles), - m_Samples(samples), - m_Weights(weights), - m_Offset(offset), - m_LikelihoodShape(likelihoodShape), - m_PriorShape(priorShape), - m_PriorRate(priorRate), - m_NumberSamples(0.0), - m_ImpliedShape(0.0), - m_Constant(0.0), - m_ErrorStatus(maths_t::E_FpNoErrors) { + : m_WeightStyles(weightStyles), m_Samples(samples), m_Weights(weights), + m_Offset(offset), m_LikelihoodShape(likelihoodShape), + m_PriorShape(priorShape), m_PriorRate(priorRate), m_NumberSamples(0.0), + m_ImpliedShape(0.0), m_Constant(0.0), m_ErrorStatus(maths_t::E_FpNoErrors) { this->precompute(); } @@ -612,8 +601,9 @@ class CLogMarginalLikelihood : core::CNonCopyable { try { for (std::size_t i = 0u; i < m_Samples.size(); ++i) { double n = maths_t::countForUpdate(m_WeightStyles, m_Weights[i]); - double varianceScale = maths_t::seasonalVarianceScale(m_WeightStyles, m_Weights[i]) * - maths_t::countVarianceScale(m_WeightStyles, m_Weights[i]); + double varianceScale = + maths_t::seasonalVarianceScale(m_WeightStyles, m_Weights[i]) * + maths_t::countVarianceScale(m_WeightStyles, m_Weights[i]); double sample = m_Samples[i] + x + m_Offset; @@ -631,7 +621,8 @@ class CLogMarginalLikelihood : core::CNonCopyable { this->addErrorStatus(maths_t::E_FpOverflowed); return false; } - logSamplesSum += n * (m_LikelihoodShape / varianceScale - 1.0) * std::log(sample); + logSamplesSum += n * (m_LikelihoodShape / varianceScale - 1.0) * + std::log(sample); sampleSum += n / varianceScale * sample; } } catch (const std::exception& e) { @@ -640,13 +631,16 @@ class CLogMarginalLikelihood : core::CNonCopyable { return false; } - result = m_Constant + logSamplesSum - m_ImpliedShape * std::log(m_PriorRate + sampleSum) - logSeasonalScaleSum; + result = m_Constant + logSamplesSum - + m_ImpliedShape * std::log(m_PriorRate + sampleSum) - logSeasonalScaleSum; return true; } //! Retrieve the error status for the integration. - maths_t::EFloatingPointErrorStatus errorStatus() const { return m_ErrorStatus; } + maths_t::EFloatingPointErrorStatus errorStatus() const { + return m_ErrorStatus; + } private: //! Compute all the constants in the integrand. @@ -660,12 +654,15 @@ class CLogMarginalLikelihood : core::CNonCopyable { try { for (std::size_t i = 0u; i < m_Weights.size(); ++i) { double n = maths_t::countForUpdate(m_WeightStyles, m_Weights[i]); - double varianceScale = maths_t::seasonalVarianceScale(m_WeightStyles, m_Weights[i]) * - maths_t::countVarianceScale(m_WeightStyles, m_Weights[i]); + double varianceScale = + maths_t::seasonalVarianceScale(m_WeightStyles, m_Weights[i]) * + maths_t::countVarianceScale(m_WeightStyles, m_Weights[i]); m_NumberSamples += n; if (varianceScale != 1.0) { - logVarianceScaleSum -= m_LikelihoodShape / varianceScale * std::log(varianceScale); - logGammaScaledLikelihoodShape += n * boost::math::lgamma(m_LikelihoodShape / varianceScale); + logVarianceScaleSum -= m_LikelihoodShape / varianceScale * + std::log(varianceScale); + logGammaScaledLikelihoodShape += + n * boost::math::lgamma(m_LikelihoodShape / varianceScale); scaledImpliedShape += n * m_LikelihoodShape / varianceScale; } else { nResidual += n; @@ -676,8 +673,10 @@ class CLogMarginalLikelihood : core::CNonCopyable { LOG_TRACE(<< "numberSamples = " << m_NumberSamples); - m_Constant = m_PriorShape * std::log(m_PriorRate) - boost::math::lgamma(m_PriorShape) + logVarianceScaleSum - - logGammaScaledLikelihoodShape - nResidual * boost::math::lgamma(m_LikelihoodShape) + + m_Constant = m_PriorShape * std::log(m_PriorRate) - + boost::math::lgamma(m_PriorShape) + + logVarianceScaleSum - logGammaScaledLikelihoodShape - + nResidual * boost::math::lgamma(m_LikelihoodShape) + boost::math::lgamma(m_ImpliedShape); } catch (const std::exception& e) { LOG_ERROR(<< "Error calculating marginal likelihood: " << e.what()); @@ -726,39 +725,32 @@ CGammaRateConjugate::CGammaRateConjugate(maths_t::EDataType dataType, double rate, double decayRate, double offsetMargin) - : CPrior(dataType, decayRate), - m_Offset(offset), - m_OffsetMargin(offsetMargin), - m_LikelihoodShape(1.0), - m_PriorShape(shape), - m_PriorRate(rate) { + : CPrior(dataType, decayRate), m_Offset(offset), m_OffsetMargin(offsetMargin), + m_LikelihoodShape(1.0), m_PriorShape(shape), m_PriorRate(rate) { } CGammaRateConjugate::CGammaRateConjugate(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser, double offsetMargin) - : CPrior(params.s_DataType, 0.0), - m_Offset(0.0), - m_OffsetMargin(offsetMargin), - m_LikelihoodShape(1.0), - m_PriorShape(0.0), - m_PriorRate(0.0) { - traverser.traverseSubLevel(boost::bind(&CGammaRateConjugate::acceptRestoreTraverser, this, _1)); + : CPrior(params.s_DataType, 0.0), m_Offset(0.0), m_OffsetMargin(offsetMargin), + m_LikelihoodShape(1.0), m_PriorShape(0.0), m_PriorRate(0.0) { + traverser.traverseSubLevel( + boost::bind(&CGammaRateConjugate::acceptRestoreTraverser, this, _1)); } bool CGammaRateConjugate::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { do { const std::string& name = traverser.name(); - RESTORE_SETUP_TEARDOWN( - DECAY_RATE_TAG, double decayRate, core::CStringUtils::stringToType(traverser.value(), decayRate), this->decayRate(decayRate)) + RESTORE_SETUP_TEARDOWN(DECAY_RATE_TAG, double decayRate, + core::CStringUtils::stringToType(traverser.value(), decayRate), + this->decayRate(decayRate)) RESTORE_BUILT_IN(OFFSET_TAG, m_Offset) RESTORE_BUILT_IN(LIKELIHOOD_SHAPE_TAG, m_LikelihoodShape) RESTORE(LOG_SAMPLES_MEAN_TAG, m_LogSamplesMean.fromDelimited(traverser.value())) RESTORE(SAMPLE_MOMENTS_TAG, m_SampleMoments.fromDelimited(traverser.value())) RESTORE_BUILT_IN(PRIOR_SHAPE_TAG, m_PriorShape) RESTORE_BUILT_IN(PRIOR_RATE_TAG, m_PriorRate) - RESTORE_SETUP_TEARDOWN(NUMBER_SAMPLES_TAG, - double numberSamples, + RESTORE_SETUP_TEARDOWN(NUMBER_SAMPLES_TAG, double numberSamples, core::CStringUtils::stringToType(traverser.value(), numberSamples), this->numberSamples(numberSamples)) } while (traverser.next()); @@ -766,9 +758,12 @@ bool CGammaRateConjugate::acceptRestoreTraverser(core::CStateRestoreTraverser& t return true; } -CGammaRateConjugate -CGammaRateConjugate::nonInformativePrior(maths_t::EDataType dataType, double offset, double decayRate, double offsetMargin) { - return CGammaRateConjugate(dataType, offset + offsetMargin, NON_INFORMATIVE_SHAPE, NON_INFORMATIVE_RATE, decayRate, offsetMargin); +CGammaRateConjugate CGammaRateConjugate::nonInformativePrior(maths_t::EDataType dataType, + double offset, + double decayRate, + double offsetMargin) { + return CGammaRateConjugate(dataType, offset + offsetMargin, NON_INFORMATIVE_SHAPE, + NON_INFORMATIVE_RATE, decayRate, offsetMargin); } CGammaRateConjugate::EPrior CGammaRateConjugate::type() const { @@ -780,7 +775,8 @@ CGammaRateConjugate* CGammaRateConjugate::clone() const { } void CGammaRateConjugate::setToNonInformative(double offset, double decayRate) { - *this = nonInformativePrior(this->dataType(), offset + this->offsetMargin(), decayRate, this->offsetMargin()); + *this = nonInformativePrior(this->dataType(), offset + this->offsetMargin(), + decayRate, this->offsetMargin()); } double CGammaRateConjugate::offsetMargin() const { @@ -791,7 +787,9 @@ bool CGammaRateConjugate::needsOffset() const { return true; } -double CGammaRateConjugate::adjustOffset(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights) { +double CGammaRateConjugate::adjustOffset(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights) { COffsetCost cost(*this); CApplyOffset apply(*this); return this->adjustOffsetWithCost(weightStyles, samples, weights, cost, apply); @@ -801,13 +799,16 @@ double CGammaRateConjugate::offset() const { return m_Offset; } -void CGammaRateConjugate::addSamples(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights) { +void CGammaRateConjugate::addSamples(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights) { if (samples.empty()) { return; } if (samples.size() != weights.size()) { - LOG_ERROR(<< "Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '" + LOG_ERROR(<< "Mismatch in samples '" + << core::CContainerPrinter::print(samples) << "' and weights '" << core::CContainerPrinter::print(weights) << "'"); return; } @@ -889,7 +890,8 @@ void CGammaRateConjugate::addSamples(const TWeightStyleVec& weightStyles, const for (std::size_t i = 0u; i < samples.size(); ++i) { double n = maths_t::countForUpdate(weightStyles, weights[i]); double varianceScale = - maths_t::seasonalVarianceScale(weightStyles, weights[i]) * maths_t::countVarianceScale(weightStyles, weights[i]); + maths_t::seasonalVarianceScale(weightStyles, weights[i]) * + maths_t::countVarianceScale(weightStyles, weights[i]); double x = samples[i] + m_Offset; if (!CMathsFuncs::isFinite(x) || x <= 0.0) { @@ -897,12 +899,14 @@ void CGammaRateConjugate::addSamples(const TWeightStyleVec& weightStyles, const continue; } - double shift_ = -shift + boost::math::digamma(m_LikelihoodShape / varianceScale) + std::log(varianceScale); + double shift_ = -shift + boost::math::digamma(m_LikelihoodShape / varianceScale) + + std::log(varianceScale); if (this->isInteger()) { double logxInvPlus1 = std::log(1.0 / x + 1.0); double logxPlus1 = std::log(x + 1.0); - m_LogSamplesMean.add(x * logxInvPlus1 + logxPlus1 - 1.0 - shift_, n / varianceScale); + m_LogSamplesMean.add(x * logxInvPlus1 + logxPlus1 - 1.0 - shift_, + n / varianceScale); m_SampleMoments.add(x + 0.5, n / varianceScale); } else { m_LogSamplesMean.add(std::log(x) - shift_, n / varianceScale); @@ -928,10 +932,13 @@ void CGammaRateConjugate::addSamples(const TWeightStyleVec& weightStyles, const detail::truncateVariance(this->isInteger(), logSamplesMean, sampleMoments); detail::truncateVariance(this->isInteger(), m_LogSamplesMean, m_SampleMoments); - m_LikelihoodShape = detail::maximumLikelihoodShape(m_LikelihoodShape, logSamplesMean, m_LogSamplesMean, sampleMoments, m_SampleMoments); + m_LikelihoodShape = detail::maximumLikelihoodShape( + m_LikelihoodShape, logSamplesMean, m_LogSamplesMean, sampleMoments, m_SampleMoments); - LOG_TRACE(<< "m_Offset = " << m_Offset << ", m_LikelihoodShape = " << m_LikelihoodShape << ", m_LogSamplesMean = " << m_LogSamplesMean - << ", m_SampleMoments = " << m_SampleMoments << ", m_PriorShape = " << m_PriorShape << ", m_PriorRate = " << m_PriorRate); + LOG_TRACE(<< "m_Offset = " << m_Offset << ", m_LikelihoodShape = " << m_LikelihoodShape + << ", m_LogSamplesMean = " << m_LogSamplesMean + << ", m_SampleMoments = " << m_SampleMoments << ", m_PriorShape = " << m_PriorShape + << ", m_PriorRate = " << m_PriorRate); if (this->isBad()) { LOG_ERROR(<< "Update failed (" << this->debug() << ")"); @@ -973,18 +980,21 @@ void CGammaRateConjugate::propagateForwardsByTime(double time) { double count = CBasicStatistics::count(m_LogSamplesMean); double alpha = std::exp(-this->decayRate() * time); - alpha = count > detail::NON_INFORMATIVE_COUNT ? (alpha * count + (1.0 - alpha) * detail::NON_INFORMATIVE_COUNT) / count : 1.0; + alpha = count > detail::NON_INFORMATIVE_COUNT + ? (alpha * count + (1.0 - alpha) * detail::NON_INFORMATIVE_COUNT) / count + : 1.0; if (alpha < 1.0) { m_LogSamplesMean.age(alpha); m_SampleMoments.age(alpha); - m_LikelihoodShape = - detail::maximumLikelihoodShape(m_LikelihoodShape, logSamplesMean, m_LogSamplesMean, sampleMoments, m_SampleMoments); + m_LikelihoodShape = detail::maximumLikelihoodShape( + m_LikelihoodShape, logSamplesMean, m_LogSamplesMean, sampleMoments, m_SampleMoments); } this->numberSamples(this->numberSamples() * alpha); - LOG_TRACE(<< "m_LikelihoodShape = " << m_LikelihoodShape << ", m_LogSamplesMean = " << m_LogSamplesMean - << ", m_SampleMoments = " << m_SampleMoments << ", numberSamples = " << this->numberSamples()); + LOG_TRACE(<< "m_LikelihoodShape = " << m_LikelihoodShape << ", m_LogSamplesMean = " + << m_LogSamplesMean << ", m_SampleMoments = " << m_SampleMoments + << ", numberSamples = " << this->numberSamples()); } CGammaRateConjugate::TDoubleDoublePr CGammaRateConjugate::marginalLikelihoodSupport() const { @@ -995,11 +1005,15 @@ double CGammaRateConjugate::marginalLikelihoodMean() const { return this->isInteger() ? this->mean() - 0.5 : this->mean(); } -double CGammaRateConjugate::marginalLikelihoodMode(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights) const { +double CGammaRateConjugate::marginalLikelihoodMode(const TWeightStyleVec& weightStyles, + const TDouble4Vec& weights) const { double varianceScale = 1.0; try { - varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) * maths_t::countVarianceScale(weightStyles, weights); - } catch (const std::exception& e) { LOG_ERROR(<< "Failed to get variance scale: " << e.what()); } + varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) * + maths_t::countVarianceScale(weightStyles, weights); + } catch (const std::exception& e) { + LOG_ERROR(<< "Failed to get variance scale: " << e.what()); + } if (!this->isNonInformative()) { // We use the fact that the marginal likelihood is the distribution @@ -1018,11 +1032,13 @@ double CGammaRateConjugate::marginalLikelihoodMode(const TWeightStyleVec& weight if (scaledLikelihoodShape > 1.0 && this->priorShape() > 1.0) { try { double scaledPriorRate = varianceScale * this->priorRate(); - boost::math::beta_distribution<> beta(scaledLikelihoodShape, this->priorShape()); + boost::math::beta_distribution<> beta(scaledLikelihoodShape, + this->priorShape()); double mode = boost::math::mode(beta); return scaledPriorRate * mode / (1.0 - mode) - m_Offset; } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to compute marginal likelihood mode: " << e.what() << ", likelihood shape = " << m_LikelihoodShape + LOG_ERROR(<< "Failed to compute marginal likelihood mode: " << e.what() + << ", likelihood shape = " << m_LikelihoodShape << ", prior shape = " << this->priorShape()); } } @@ -1041,7 +1057,8 @@ double CGammaRateConjugate::marginalLikelihoodMode(const TWeightStyleVec& weight return std::max(mean == 0.0 ? 0.0 : mean - variance / mean, 0.0) - m_Offset; } -double CGammaRateConjugate::marginalLikelihoodVariance(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights) const { +double CGammaRateConjugate::marginalLikelihoodVariance(const TWeightStyleVec& weightStyles, + const TDouble4Vec& weights) const { if (this->isNonInformative()) { return boost::numeric::bounds::highest(); } @@ -1061,19 +1078,24 @@ double CGammaRateConjugate::marginalLikelihoodVariance(const TWeightStyleVec& we double varianceScale = 1.0; try { - varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) * maths_t::countVarianceScale(weightStyles, weights); - } catch (const std::exception& e) { LOG_ERROR(<< "Failed to get variance scale: " << e.what()); } + varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) * + maths_t::countVarianceScale(weightStyles, weights); + } catch (const std::exception& e) { + LOG_ERROR(<< "Failed to get variance scale: " << e.what()); + } double a = this->priorShape(); if (a <= 2.0) { return varianceScale * CBasicStatistics::variance(m_SampleMoments); } double b = this->priorRate(); - return varianceScale * (1.0 + m_LikelihoodShape / (a - 1.0)) * m_LikelihoodShape * b * b / (a - 1.0) / (a - 2.0); + return varianceScale * (1.0 + m_LikelihoodShape / (a - 1.0)) * + m_LikelihoodShape * b * b / (a - 1.0) / (a - 2.0); } -CGammaRateConjugate::TDoubleDoublePr CGammaRateConjugate::marginalLikelihoodConfidenceInterval(double percentage, - const TWeightStyleVec& weightStyles, - const TDouble4Vec& weights) const { +CGammaRateConjugate::TDoubleDoublePr +CGammaRateConjugate::marginalLikelihoodConfidenceInterval(double percentage, + const TWeightStyleVec& weightStyles, + const TDouble4Vec& weights) const { if (this->isNonInformative()) { return this->marginalLikelihoodSupport(); } @@ -1092,7 +1114,8 @@ CGammaRateConjugate::TDoubleDoublePr CGammaRateConjugate::marginalLikelihoodConf // and beta equal to m_PriorShape. try { - double varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) * maths_t::countVarianceScale(weightStyles, weights); + double varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) * + maths_t::countVarianceScale(weightStyles, weights); double scaledLikelihoodShape = m_LikelihoodShape / varianceScale; double scaledPriorRate = varianceScale * this->priorRate(); boost::math::beta_distribution<> beta(scaledLikelihoodShape, this->priorShape()); @@ -1101,19 +1124,23 @@ CGammaRateConjugate::TDoubleDoublePr CGammaRateConjugate::marginalLikelihoodConf double x2 = x1; if (percentage > 0.0) { x2 = boost::math::quantile(beta, (1.0 + percentage) / 2.0); - x2 = scaledPriorRate * x2 / (1.0 - x2) - m_Offset - (this->isInteger() ? 0.5 : 0.0); + x2 = scaledPriorRate * x2 / (1.0 - x2) - m_Offset - + (this->isInteger() ? 0.5 : 0.0); } LOG_TRACE(<< "x1 = " << x1 << ", x2 = " << x2); return std::make_pair(x1, x2); - } catch (const std::exception& e) { LOG_ERROR(<< "Failed to compute confidence interval: " << e.what()); } + } catch (const std::exception& e) { + LOG_ERROR(<< "Failed to compute confidence interval: " << e.what()); + } return this->marginalLikelihoodSupport(); } -maths_t::EFloatingPointErrorStatus CGammaRateConjugate::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, - double& result) const { +maths_t::EFloatingPointErrorStatus +CGammaRateConjugate::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& result) const { result = 0.0; if (samples.empty()) { @@ -1122,7 +1149,8 @@ maths_t::EFloatingPointErrorStatus CGammaRateConjugate::jointLogMarginalLikeliho } if (samples.size() != weights.size()) { - LOG_ERROR(<< "Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '" + LOG_ERROR(<< "Mismatch in samples '" + << core::CContainerPrinter::print(samples) << "' and weights '" << core::CContainerPrinter::print(weights) << "'"); return maths_t::E_FpFailed; } @@ -1143,17 +1171,20 @@ maths_t::EFloatingPointErrorStatus CGammaRateConjugate::jointLogMarginalLikeliho maths_t::EFloatingPointErrorStatus status = maths_t::E_FpFailed; try { detail::CLogMarginalLikelihood logMarginalLikelihood( - weightStyles, samples, weights, m_Offset, m_LikelihoodShape, this->priorShape(), this->priorRate()); + weightStyles, samples, weights, m_Offset, m_LikelihoodShape, + this->priorShape(), this->priorRate()); if (this->isInteger()) { // If the data are discrete we compute the approximate expectation // w.r.t. to the hidden offset of the samples Z, which is uniform // on the interval [0,1]. - CIntegration::logGaussLegendre(logMarginalLikelihood, 0.0, 1.0, result); + CIntegration::logGaussLegendre( + logMarginalLikelihood, 0.0, 1.0, result); } else { logMarginalLikelihood(0.0, result); } - status = static_cast(logMarginalLikelihood.errorStatus() | CMathsFuncs::fpStatus(result)); + status = static_cast( + logMarginalLikelihood.errorStatus() | CMathsFuncs::fpStatus(result)); if (status & maths_t::E_FpFailed) { LOG_ERROR(<< "Failed to compute log likelihood (" << this->debug() << ")"); LOG_ERROR(<< "samples = " << core::CContainerPrinter::print(samples)); @@ -1163,11 +1194,14 @@ maths_t::EFloatingPointErrorStatus CGammaRateConjugate::jointLogMarginalLikeliho LOG_TRACE(<< "samples = " << core::CContainerPrinter::print(samples)); LOG_TRACE(<< "weights = " << core::CContainerPrinter::print(weights)); } - } catch (const std::exception& e) { LOG_ERROR(<< "Failed to compute likelihood: " << e.what()); } + } catch (const std::exception& e) { + LOG_ERROR(<< "Failed to compute likelihood: " << e.what()); + } return status; } -void CGammaRateConjugate::sampleMarginalLikelihood(std::size_t numberSamples, TDouble1Vec& samples) const { +void CGammaRateConjugate::sampleMarginalLikelihood(std::size_t numberSamples, + TDouble1Vec& samples) const { samples.clear(); if (numberSamples == 0 || this->numberSamples() == 0.0) { @@ -1178,7 +1212,8 @@ void CGammaRateConjugate::sampleMarginalLikelihood(std::size_t numberSamples, TD // We can't sample the marginal likelihood directly so match sample // moments and sampled moments. - numberSamples = std::min(numberSamples, static_cast(this->numberSamples() + 0.5)); + numberSamples = std::min( + numberSamples, static_cast(this->numberSamples() + 0.5)); double mean = CBasicStatistics::mean(m_SampleMoments) - m_Offset; double deviation = std::sqrt(CBasicStatistics::variance(m_SampleMoments)); double root_two = boost::math::double_constants::root_two; @@ -1241,7 +1276,8 @@ void CGammaRateConjugate::sampleMarginalLikelihood(std::size_t numberSamples, TD try { boost::math::beta_distribution<> beta1(m_LikelihoodShape, this->priorShape()); - boost::math::beta_distribution<> beta2(m_LikelihoodShape + 1.0, this->priorShape() - 1.0); + boost::math::beta_distribution<> beta2(m_LikelihoodShape + 1.0, + this->priorShape() - 1.0); LOG_TRACE(<< "mean = " << mean << ", numberSamples = " << numberSamples); @@ -1255,7 +1291,9 @@ void CGammaRateConjugate::sampleMarginalLikelihood(std::size_t numberSamples, TD double partialExpectation = mean * CTools::safeCdf(beta2, xq); - double sample = static_cast(numberSamples) * (partialExpectation - lastPartialExpectation) - m_Offset; + double sample = static_cast(numberSamples) * + (partialExpectation - lastPartialExpectation) - + m_Offset; LOG_TRACE(<< "sample = " << sample); @@ -1263,14 +1301,17 @@ void CGammaRateConjugate::sampleMarginalLikelihood(std::size_t numberSamples, TD if (sample >= support.first && sample <= support.second) { samples.push_back(sample); } else { - LOG_ERROR(<< "Sample out of bounds: sample = " << sample << ", likelihoodShape = " << m_LikelihoodShape - << ", priorShape = " << this->priorShape() << ", q = " << q << ", x(q) = " << xq << ", mean = " << mean); + LOG_ERROR(<< "Sample out of bounds: sample = " << sample + << ", likelihoodShape = " << m_LikelihoodShape + << ", priorShape = " << this->priorShape() << ", q = " << q + << ", x(q) = " << xq << ", mean = " << mean); } lastPartialExpectation = partialExpectation; } - double sample = static_cast(numberSamples) * (mean - lastPartialExpectation) - m_Offset; + double sample = + static_cast(numberSamples) * (mean - lastPartialExpectation) - m_Offset; LOG_TRACE(<< "sample = " << sample); @@ -1278,7 +1319,8 @@ void CGammaRateConjugate::sampleMarginalLikelihood(std::size_t numberSamples, TD if (sample >= support.first && sample <= support.second) { samples.push_back(sample); } else { - LOG_ERROR(<< "Sample out of bounds: sample = " << sample << ", likelihoodShape = " << m_LikelihoodShape + LOG_ERROR(<< "Sample out of bounds: sample = " << sample + << ", likelihoodShape = " << m_LikelihoodShape << ", priorShape = " << this->priorShape() << ", mean = " << mean); } } catch (const std::exception& e) { @@ -1296,16 +1338,19 @@ bool CGammaRateConjugate::minusLogJointCdf(const TWeightStyleVec& weightStyles, lowerBound = upperBound = 0.0; - TMinusLogCdf minusLogCdf( - weightStyles, samples, weights, this->isNonInformative(), m_Offset, m_LikelihoodShape, this->priorShape(), this->priorRate()); + TMinusLogCdf minusLogCdf(weightStyles, samples, weights, + this->isNonInformative(), m_Offset, m_LikelihoodShape, + this->priorShape(), this->priorRate()); if (this->isInteger()) { // If the data are discrete we compute the approximate expectation // w.r.t. to the hidden offset of the samples Z, which is uniform // on the interval [0,1]. double value; - if (!CIntegration::logGaussLegendre(minusLogCdf, 0.0, 1.0, value)) { - LOG_ERROR(<< "Failed computing c.d.f. for " << core::CContainerPrinter::print(samples)); + if (!CIntegration::logGaussLegendre( + minusLogCdf, 0.0, 1.0, value)) { + LOG_ERROR(<< "Failed computing c.d.f. for " + << core::CContainerPrinter::print(samples)); return false; } @@ -1315,7 +1360,8 @@ bool CGammaRateConjugate::minusLogJointCdf(const TWeightStyleVec& weightStyles, double value; if (!minusLogCdf(0.0, value)) { - LOG_ERROR(<< "Failed computing c.d.f. for " << core::CContainerPrinter::print(samples)); + LOG_ERROR(<< "Failed computing c.d.f. for " + << core::CContainerPrinter::print(samples)); return false; } @@ -1333,15 +1379,18 @@ bool CGammaRateConjugate::minusLogJointCdfComplement(const TWeightStyleVec& weig lowerBound = upperBound = 0.0; TMinusLogCdfComplement minusLogCdfComplement( - weightStyles, samples, weights, this->isNonInformative(), m_Offset, m_LikelihoodShape, this->priorShape(), this->priorRate()); + weightStyles, samples, weights, this->isNonInformative(), m_Offset, + m_LikelihoodShape, this->priorShape(), this->priorRate()); if (this->isInteger()) { // If the data are discrete we compute the approximate expectation // w.r.t. to the hidden offset of the samples Z, which is uniform // on the interval [0,1]. double value; - if (!CIntegration::logGaussLegendre(minusLogCdfComplement, 0.0, 1.0, value)) { - LOG_ERROR(<< "Failed computing c.d.f. complement for " << core::CContainerPrinter::print(samples)); + if (!CIntegration::logGaussLegendre( + minusLogCdfComplement, 0.0, 1.0, value)) { + LOG_ERROR(<< "Failed computing c.d.f. complement for " + << core::CContainerPrinter::print(samples)); return false; } @@ -1351,7 +1400,8 @@ bool CGammaRateConjugate::minusLogJointCdfComplement(const TWeightStyleVec& weig double value; if (!minusLogCdfComplement(0.0, value)) { - LOG_ERROR(<< "Failed computing c.d.f. complement for " << core::CContainerPrinter::print(samples)); + LOG_ERROR(<< "Failed computing c.d.f. complement for " + << core::CContainerPrinter::print(samples)); return false; } @@ -1369,23 +1419,19 @@ bool CGammaRateConjugate::probabilityOfLessLikelySamples(maths_t::EProbabilityCa lowerBound = upperBound = 0.0; tail = maths_t::E_UndeterminedTail; - detail::CProbabilityOfLessLikelySamples probability(calculation, - weightStyles, - samples, - weights, - this->isNonInformative(), - m_Offset, - m_LikelihoodShape, - this->priorShape(), - this->priorRate()); + detail::CProbabilityOfLessLikelySamples probability( + calculation, weightStyles, samples, weights, this->isNonInformative(), + m_Offset, m_LikelihoodShape, this->priorShape(), this->priorRate()); if (this->isInteger()) { // If the data are discrete we compute the approximate expectation // w.r.t. to the hidden offset of the samples Z, which is uniform // on the interval [0,1]. double value; - if (!CIntegration::gaussLegendre(probability, 0.0, 1.0, value)) { - LOG_ERROR(<< "Failed computing probability for " << core::CContainerPrinter::print(samples)); + if (!CIntegration::gaussLegendre(probability, 0.0, + 1.0, value)) { + LOG_ERROR(<< "Failed computing probability for " + << core::CContainerPrinter::print(samples)); return false; } @@ -1397,7 +1443,8 @@ bool CGammaRateConjugate::probabilityOfLessLikelySamples(maths_t::EProbabilityCa double value; if (!probability(0.0, value)) { - LOG_ERROR(<< "Failed computing probability for " << core::CContainerPrinter::print(samples)); + LOG_ERROR(<< "Failed computing probability for " + << core::CContainerPrinter::print(samples)); return false; } @@ -1408,7 +1455,8 @@ bool CGammaRateConjugate::probabilityOfLessLikelySamples(maths_t::EProbabilityCa } bool CGammaRateConjugate::isNonInformative() const { - return CBasicStatistics::count(m_SampleMoments) < detail::NON_INFORMATIVE_COUNT || this->priorRate() == NON_INFORMATIVE_RATE; + return CBasicStatistics::count(m_SampleMoments) < detail::NON_INFORMATIVE_COUNT || + this->priorRate() == NON_INFORMATIVE_RATE; } void CGammaRateConjugate::print(const std::string& indent, std::string& result) const { @@ -1420,7 +1468,8 @@ void CGammaRateConjugate::print(const std::string& indent, std::string& result) try { if (this->priorShape() > 2.0) { - double shape = (this->priorShape() - 2.0) / (this->priorShape() - 1.0) * m_LikelihoodShape; + double shape = (this->priorShape() - 2.0) / + (this->priorShape() - 1.0) * m_LikelihoodShape; double rate = this->priorRate() / (this->priorShape() - 2.0); boost::math::gamma_distribution<> gamma(shape, rate); double mean = boost::math::mean(gamma); @@ -1432,8 +1481,8 @@ void CGammaRateConjugate::print(const std::string& indent, std::string& result) } catch (const std::exception&) {} double mean = CBasicStatistics::mean(m_SampleMoments); double deviation = std::sqrt(CBasicStatistics::variance(m_SampleMoments)); - result += - "mean = " + core::CStringUtils::typeToStringPretty(mean - m_Offset) + " sd = " + core::CStringUtils::typeToStringPretty(deviation); + result += "mean = " + core::CStringUtils::typeToStringPretty(mean - m_Offset) + + " sd = " + core::CStringUtils::typeToStringPretty(deviation); } std::string CGammaRateConjugate::printJointDensityFunction() const { @@ -1498,12 +1547,14 @@ std::size_t CGammaRateConjugate::staticSize() const { void CGammaRateConjugate::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(DECAY_RATE_TAG, this->decayRate(), core::CIEEE754::E_SinglePrecision); inserter.insertValue(OFFSET_TAG, m_Offset, core::CIEEE754::E_SinglePrecision); - inserter.insertValue(LIKELIHOOD_SHAPE_TAG, m_LikelihoodShape, core::CIEEE754::E_SinglePrecision); + inserter.insertValue(LIKELIHOOD_SHAPE_TAG, m_LikelihoodShape, + core::CIEEE754::E_SinglePrecision); inserter.insertValue(LOG_SAMPLES_MEAN_TAG, m_LogSamplesMean.toDelimited()); inserter.insertValue(SAMPLE_MOMENTS_TAG, m_SampleMoments.toDelimited()); inserter.insertValue(PRIOR_SHAPE_TAG, m_PriorShape, core::CIEEE754::E_SinglePrecision); inserter.insertValue(PRIOR_RATE_TAG, m_PriorRate, core::CIEEE754::E_SinglePrecision); - inserter.insertValue(NUMBER_SAMPLES_TAG, this->numberSamples(), core::CIEEE754::E_SinglePrecision); + inserter.insertValue(NUMBER_SAMPLES_TAG, this->numberSamples(), + core::CIEEE754::E_SinglePrecision); } double CGammaRateConjugate::likelihoodShape() const { @@ -1519,16 +1570,18 @@ double CGammaRateConjugate::likelihoodRate() const { boost::math::gamma_distribution<> gamma(this->priorShape(), 1.0 / this->priorRate()); return boost::math::mean(gamma); } catch (std::exception& e) { - LOG_ERROR(<< "Failed to compute likelihood rate: " << e.what() << ", prior shape = " << this->priorShape() - << ", prior rate = " << this->priorRate()); + LOG_ERROR(<< "Failed to compute likelihood rate: " << e.what() << ", prior shape = " + << this->priorShape() << ", prior rate = " << this->priorRate()); } return 0.0; } -CGammaRateConjugate::TDoubleDoublePr CGammaRateConjugate::confidenceIntervalRate(double percentage) const { +CGammaRateConjugate::TDoubleDoublePr +CGammaRateConjugate::confidenceIntervalRate(double percentage) const { if (this->isNonInformative()) { - return std::make_pair(boost::numeric::bounds::lowest(), boost::numeric::bounds::highest()); + return std::make_pair(boost::numeric::bounds::lowest(), + boost::numeric::bounds::highest()); } percentage /= 100.0; @@ -1538,19 +1591,25 @@ CGammaRateConjugate::TDoubleDoublePr CGammaRateConjugate::confidenceIntervalRate try { // The prior distribution for the rate is gamma. boost::math::gamma_distribution<> gamma(this->priorShape(), 1.0 / this->priorRate()); - return std::make_pair(boost::math::quantile(gamma, lowerPercentile), boost::math::quantile(gamma, upperPercentile)); + return std::make_pair(boost::math::quantile(gamma, lowerPercentile), + boost::math::quantile(gamma, upperPercentile)); } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to compute confidence interval: " << e.what() << ", prior shape = " << this->priorShape() + LOG_ERROR(<< "Failed to compute confidence interval: " << e.what() + << ", prior shape = " << this->priorShape() << ", prior rate = " << this->priorRate()); } - return std::make_pair(boost::numeric::bounds::lowest(), boost::numeric::bounds::highest()); + return std::make_pair(boost::numeric::bounds::lowest(), + boost::numeric::bounds::highest()); } -bool CGammaRateConjugate::equalTolerance(const CGammaRateConjugate& rhs, const TEqualWithTolerance& equal) const { - LOG_DEBUG(<< m_LikelihoodShape << " " << rhs.m_LikelihoodShape << ", " << this->priorShape() << " " << rhs.priorShape() << ", " +bool CGammaRateConjugate::equalTolerance(const CGammaRateConjugate& rhs, + const TEqualWithTolerance& equal) const { + LOG_DEBUG(<< m_LikelihoodShape << " " << rhs.m_LikelihoodShape << ", " + << this->priorShape() << " " << rhs.priorShape() << ", " << this->priorRate() << " " << rhs.priorRate()); - return equal(m_LikelihoodShape, rhs.m_LikelihoodShape) && equal(this->priorShape(), rhs.priorShape()) && + return equal(m_LikelihoodShape, rhs.m_LikelihoodShape) && + equal(this->priorShape(), rhs.priorShape()) && equal(this->priorRate(), rhs.priorRate()); } @@ -1574,26 +1633,30 @@ double CGammaRateConjugate::mean() const { } double CGammaRateConjugate::priorShape() const { - return m_PriorShape + RATE_VARIANCE_SCALE * CBasicStatistics::count(m_SampleMoments) * m_LikelihoodShape; + return m_PriorShape + RATE_VARIANCE_SCALE * + CBasicStatistics::count(m_SampleMoments) * m_LikelihoodShape; } double CGammaRateConjugate::priorRate() const { - return m_PriorRate + RATE_VARIANCE_SCALE * CBasicStatistics::count(m_SampleMoments) * CBasicStatistics::mean(m_SampleMoments); + return m_PriorRate + RATE_VARIANCE_SCALE * CBasicStatistics::count(m_SampleMoments) * + CBasicStatistics::mean(m_SampleMoments); } bool CGammaRateConjugate::isBad() const { - return !CMathsFuncs::isFinite(m_Offset) || !CMathsFuncs::isFinite(m_LikelihoodShape) || + return !CMathsFuncs::isFinite(m_Offset) || + !CMathsFuncs::isFinite(m_LikelihoodShape) || !CMathsFuncs::isFinite(CBasicStatistics::count(m_LogSamplesMean)) || !CMathsFuncs::isFinite(CBasicStatistics::moment<0>(m_LogSamplesMean)) || !CMathsFuncs::isFinite(CBasicStatistics::count(m_SampleMoments)) || !CMathsFuncs::isFinite(CBasicStatistics::moment<0>(m_SampleMoments)) || - !CMathsFuncs::isFinite(CBasicStatistics::moment<1>(m_SampleMoments)) || !CMathsFuncs::isFinite(m_PriorShape) || - !CMathsFuncs::isFinite(m_PriorRate); + !CMathsFuncs::isFinite(CBasicStatistics::moment<1>(m_SampleMoments)) || + !CMathsFuncs::isFinite(m_PriorShape) || !CMathsFuncs::isFinite(m_PriorRate); } std::string CGammaRateConjugate::debug() const { std::ostringstream result; - result << std::scientific << std::setprecision(15) << m_Offset << " " << m_LikelihoodShape << " " << m_LogSamplesMean << " " + result << std::scientific << std::setprecision(15) << m_Offset << " " + << m_LikelihoodShape << " " << m_LogSamplesMean << " " << m_SampleMoments << " " << m_PriorShape << " " << m_PriorRate; return result.str(); } diff --git a/lib/maths/CGradientDescent.cc b/lib/maths/CGradientDescent.cc index a4cc64e035..a45adb1d0b 100644 --- a/lib/maths/CGradientDescent.cc +++ b/lib/maths/CGradientDescent.cc @@ -14,7 +14,8 @@ namespace ml { namespace maths { -CGradientDescent::CGradientDescent(double learnRate, double momentum) : m_LearnRate(learnRate), m_Momentum(momentum) { +CGradientDescent::CGradientDescent(double learnRate, double momentum) + : m_LearnRate(learnRate), m_Momentum(momentum) { } void CGradientDescent::learnRate(double learnRate) { @@ -25,7 +26,12 @@ void CGradientDescent::momentum(double momentum) { m_Momentum = momentum; } -bool CGradientDescent::run(std::size_t n, const TVector& x0, const CFunction& f, const CGradient& gf, TVector& xBest, TDoubleVec& fi) { +bool CGradientDescent::run(std::size_t n, + const TVector& x0, + const CFunction& f, + const CGradient& gf, + TVector& xBest, + TDoubleVec& fi) { fi.clear(); fi.reserve(n); @@ -73,10 +79,13 @@ CGradientDescent::CFunction::~CFunction() { CGradientDescent::CGradient::~CGradient() { } -CGradientDescent::CEmpiricalCentralGradient::CEmpiricalCentralGradient(const CFunction& f, double eps) : m_Eps(eps), m_F(f) { +CGradientDescent::CEmpiricalCentralGradient::CEmpiricalCentralGradient(const CFunction& f, + double eps) + : m_Eps(eps), m_F(f) { } -bool CGradientDescent::CEmpiricalCentralGradient::operator()(const TVector& x, TVector& result) const { +bool CGradientDescent::CEmpiricalCentralGradient::operator()(const TVector& x, + TVector& result) const { if (x.dimension() != result.dimension()) { LOG_ERROR(<< "Dimension mismatch"); return false; diff --git a/lib/maths/CGramSchmidt.cc b/lib/maths/CGramSchmidt.cc index 1e9a6d38a7..b18d81ecc7 100644 --- a/lib/maths/CGramSchmidt.cc +++ b/lib/maths/CGramSchmidt.cc @@ -27,7 +27,8 @@ void CGramSchmidt::swap(TVector& x, TVector& y) { x.swap(y); } -const CGramSchmidt::TDoubleVec& CGramSchmidt::minusProjection(TDoubleVec& x, const TDoubleVec& e) { +const CGramSchmidt::TDoubleVec& CGramSchmidt::minusProjection(TDoubleVec& x, + const TDoubleVec& e) { sameDimension(x, e); double n = inner(x, e); for (std::size_t i = 0u; i < x.size(); ++i) { @@ -76,15 +77,17 @@ double CGramSchmidt::inner(const TVector& x, const TVector& y) { void CGramSchmidt::sameDimension(const TDoubleVec& x, const TDoubleVec& y) { if (x.size() != y.size()) { - throw std::runtime_error("Mismatching dimensions: " + core::CStringUtils::typeToString(x.size()) + - " != " + core::CStringUtils::typeToString(y.size())); + throw std::runtime_error( + "Mismatching dimensions: " + core::CStringUtils::typeToString(x.size()) + + " != " + core::CStringUtils::typeToString(y.size())); } } void CGramSchmidt::sameDimension(const TVector& x, const TVector& y) { if (x.dimension() != y.dimension()) { - throw std::runtime_error("Mismatching dimensions: " + core::CStringUtils::typeToString(x.dimension()) + - " != " + core::CStringUtils::typeToString(y.dimension())); + throw std::runtime_error( + "Mismatching dimensions: " + core::CStringUtils::typeToString(x.dimension()) + + " != " + core::CStringUtils::typeToString(y.dimension())); } } diff --git a/lib/maths/CInformationCriteria.cc b/lib/maths/CInformationCriteria.cc index 8e853d138e..ad58dafb87 100644 --- a/lib/maths/CInformationCriteria.cc +++ b/lib/maths/CInformationCriteria.cc @@ -36,8 +36,10 @@ double confidence(double df) { return boost::math::quantile(chi, VARIANCE_CONFIDENCE) / df; } -#define LOG_DETERMINANT(N) \ - double logDeterminant(const CSymmetricMatrixNxN& c, double upper) { return logDeterminant_(toDenseMatrix(c), upper); } +#define LOG_DETERMINANT(N) \ + double logDeterminant(const CSymmetricMatrixNxN& c, double upper) { \ + return logDeterminant_(toDenseMatrix(c), upper); \ + } LOG_DETERMINANT(2) LOG_DETERMINANT(3) LOG_DETERMINANT(4) diff --git a/lib/maths/CIntegration.cc b/lib/maths/CIntegration.cc index 1c917d801f..a229015a9b 100644 --- a/lib/maths/CIntegration.cc +++ b/lib/maths/CIntegration.cc @@ -67,101 +67,64 @@ const double* CIntegration::CGaussLegendreQuadrature::abscissas(EOrder order) { const double CIntegration::CGaussLegendreQuadrature::WEIGHTS1[] = {2.0}; const double CIntegration::CGaussLegendreQuadrature::WEIGHTS2[] = {1.0, 1.0}; -const double CIntegration::CGaussLegendreQuadrature::WEIGHTS3[] = {0.8888888888888888, 0.5555555555555556, 0.5555555555555556}; -const double CIntegration::CGaussLegendreQuadrature::WEIGHTS4[] = {0.6521451548625461, - 0.6521451548625461, - 0.3478548451374538, - 0.3478548451374538}; -const double CIntegration::CGaussLegendreQuadrature::WEIGHTS5[] = {0.5688888888888889, - 0.4786286704993665, - 0.4786286704993665, - 0.2369268850561891, - 0.2369268850561891}; -const double CIntegration::CGaussLegendreQuadrature::WEIGHTS6[] = - {0.3607615730481386, 0.3607615730481386, 0.4679139345726910, 0.4679139345726910, 0.1713244923791704, 0.1713244923791704}; -const double CIntegration::CGaussLegendreQuadrature::WEIGHTS7[] = {0.4179591836734694, - 0.3818300505051189, - 0.3818300505051189, - 0.2797053914892766, - 0.2797053914892766, - 0.1294849661688697, - 0.1294849661688697}; -const double CIntegration::CGaussLegendreQuadrature::WEIGHTS8[] = {0.3626837833783620, - 0.3626837833783620, - 0.3137066458778873, - 0.3137066458778873, - 0.2223810344533745, - 0.2223810344533745, - 0.1012285362903763, - 0.1012285362903763}; -const double CIntegration::CGaussLegendreQuadrature::WEIGHTS9[] = {0.3302393550012598, - 0.1806481606948574, - 0.1806481606948574, - 0.0812743883615744, - 0.0812743883615744, - 0.3123470770400029, - 0.3123470770400029, - 0.2606106964029354, - 0.2606106964029354}; -const double CIntegration::CGaussLegendreQuadrature::WEIGHTS10[] = {0.2955242247147529, - 0.2955242247147529, - 0.2692667193099963, - 0.2692667193099963, - 0.2190863625159820, - 0.2190863625159820, - 0.1494513491505806, - 0.1494513491505806, - 0.0666713443086881, - 0.0666713443086881}; +const double CIntegration::CGaussLegendreQuadrature::WEIGHTS3[] = { + 0.8888888888888888, 0.5555555555555556, 0.5555555555555556}; +const double CIntegration::CGaussLegendreQuadrature::WEIGHTS4[] = { + 0.6521451548625461, 0.6521451548625461, 0.3478548451374538, 0.3478548451374538}; +const double CIntegration::CGaussLegendreQuadrature::WEIGHTS5[] = { + 0.5688888888888889, 0.4786286704993665, 0.4786286704993665, + 0.2369268850561891, 0.2369268850561891}; +const double CIntegration::CGaussLegendreQuadrature::WEIGHTS6[] = { + 0.3607615730481386, 0.3607615730481386, 0.4679139345726910, + 0.4679139345726910, 0.1713244923791704, 0.1713244923791704}; +const double CIntegration::CGaussLegendreQuadrature::WEIGHTS7[] = { + 0.4179591836734694, 0.3818300505051189, 0.3818300505051189, + 0.2797053914892766, 0.2797053914892766, 0.1294849661688697, + 0.1294849661688697}; +const double CIntegration::CGaussLegendreQuadrature::WEIGHTS8[] = { + 0.3626837833783620, 0.3626837833783620, 0.3137066458778873, + 0.3137066458778873, 0.2223810344533745, 0.2223810344533745, + 0.1012285362903763, 0.1012285362903763}; +const double CIntegration::CGaussLegendreQuadrature::WEIGHTS9[] = { + 0.3302393550012598, 0.1806481606948574, 0.1806481606948574, + 0.0812743883615744, 0.0812743883615744, 0.3123470770400029, + 0.3123470770400029, 0.2606106964029354, 0.2606106964029354}; +const double CIntegration::CGaussLegendreQuadrature::WEIGHTS10[] = { + 0.2955242247147529, 0.2955242247147529, 0.2692667193099963, + 0.2692667193099963, 0.2190863625159820, 0.2190863625159820, + 0.1494513491505806, 0.1494513491505806, 0.0666713443086881, + 0.0666713443086881}; const double CIntegration::CGaussLegendreQuadrature::ABSCISSAS1[] = {0.0}; -const double CIntegration::CGaussLegendreQuadrature::ABSCISSAS2[] = {-0.5773502691896257, 0.5773502691896257}; -const double CIntegration::CGaussLegendreQuadrature::ABSCISSAS3[] = {0.0000000000000000, -0.7745966692414834, 0.7745966692414834}; -const double CIntegration::CGaussLegendreQuadrature::ABSCISSAS4[] = {-0.3399810435848563, - 0.3399810435848563, - -0.8611363115940526, - 0.8611363115940526}; -const double CIntegration::CGaussLegendreQuadrature::ABSCISSAS5[] = {0.0000000000000000, - -0.5384693101056831, - 0.5384693101056831, - -0.9061798459386640, - 0.9061798459386640}; -const double CIntegration::CGaussLegendreQuadrature::ABSCISSAS6[] = - {0.6612093864662645, -0.6612093864662645, -0.2386191860831969, 0.2386191860831969, -0.9324695142031521, 0.9324695142031521}; -const double CIntegration::CGaussLegendreQuadrature::ABSCISSAS7[] = {0.0000000000000000, - 0.4058451513773972, - -0.4058451513773972, - -0.7415311855993945, - 0.7415311855993945, - -0.9491079123427585, - 0.9491079123427585}; -const double CIntegration::CGaussLegendreQuadrature::ABSCISSAS8[] = {-0.1834346424956498, - 0.1834346424956498, - -0.5255324099163290, - 0.5255324099163290, - -0.7966664774136267, - 0.7966664774136267, - -0.9602898564975363, - 0.9602898564975363}; -const double CIntegration::CGaussLegendreQuadrature::ABSCISSAS9[] = {0.0000000000000000, - -0.8360311073266358, - 0.8360311073266358, - -0.9681602395076261, - 0.9681602395076261, - -0.3242534234038089, - 0.3242534234038089, - -0.6133714327005904, - 0.6133714327005904}; -const double CIntegration::CGaussLegendreQuadrature::ABSCISSAS10[] = {-0.1488743389816312, - 0.1488743389816312, - -0.4333953941292472, - 0.4333953941292472, - -0.6794095682990244, - 0.6794095682990244, - -0.8650633666889845, - 0.8650633666889845, - -0.9739065285171717, - 0.9739065285171717}; +const double CIntegration::CGaussLegendreQuadrature::ABSCISSAS2[] = { + -0.5773502691896257, 0.5773502691896257}; +const double CIntegration::CGaussLegendreQuadrature::ABSCISSAS3[] = { + 0.0000000000000000, -0.7745966692414834, 0.7745966692414834}; +const double CIntegration::CGaussLegendreQuadrature::ABSCISSAS4[] = { + -0.3399810435848563, 0.3399810435848563, -0.8611363115940526, 0.8611363115940526}; +const double CIntegration::CGaussLegendreQuadrature::ABSCISSAS5[] = { + 0.0000000000000000, -0.5384693101056831, 0.5384693101056831, + -0.9061798459386640, 0.9061798459386640}; +const double CIntegration::CGaussLegendreQuadrature::ABSCISSAS6[] = { + 0.6612093864662645, -0.6612093864662645, -0.2386191860831969, + 0.2386191860831969, -0.9324695142031521, 0.9324695142031521}; +const double CIntegration::CGaussLegendreQuadrature::ABSCISSAS7[] = { + 0.0000000000000000, 0.4058451513773972, -0.4058451513773972, + -0.7415311855993945, 0.7415311855993945, -0.9491079123427585, + 0.9491079123427585}; +const double CIntegration::CGaussLegendreQuadrature::ABSCISSAS8[] = { + -0.1834346424956498, 0.1834346424956498, -0.5255324099163290, + 0.5255324099163290, -0.7966664774136267, 0.7966664774136267, + -0.9602898564975363, 0.9602898564975363}; +const double CIntegration::CGaussLegendreQuadrature::ABSCISSAS9[] = { + 0.0000000000000000, -0.8360311073266358, 0.8360311073266358, + -0.9681602395076261, 0.9681602395076261, -0.3242534234038089, + 0.3242534234038089, -0.6133714327005904, 0.6133714327005904}; +const double CIntegration::CGaussLegendreQuadrature::ABSCISSAS10[] = { + -0.1488743389816312, 0.1488743389816312, -0.4333953941292472, + 0.4333953941292472, -0.6794095682990244, 0.6794095682990244, + -0.8650633666889845, 0.8650633666889845, -0.9739065285171717, + 0.9739065285171717}; core::CFastMutex CIntegration::ms_Mutex; } diff --git a/lib/maths/CKMeansOnline1d.cc b/lib/maths/CKMeansOnline1d.cc index b095411aae..05ecb07a06 100644 --- a/lib/maths/CKMeansOnline1d.cc +++ b/lib/maths/CKMeansOnline1d.cc @@ -41,18 +41,24 @@ namespace detail { //! \brief Orders two normals by their means. struct SNormalMeanLess { public: - bool operator()(const CNormalMeanPrecConjugate& lhs, const CNormalMeanPrecConjugate& rhs) const { + bool operator()(const CNormalMeanPrecConjugate& lhs, + const CNormalMeanPrecConjugate& rhs) const { return lhs.marginalLikelihoodMean() < rhs.marginalLikelihoodMean(); } - bool operator()(double lhs, const CNormalMeanPrecConjugate& rhs) const { return lhs < rhs.marginalLikelihoodMean(); } - bool operator()(const CNormalMeanPrecConjugate& lhs, double rhs) const { return lhs.marginalLikelihoodMean() < rhs; } + bool operator()(double lhs, const CNormalMeanPrecConjugate& rhs) const { + return lhs < rhs.marginalLikelihoodMean(); + } + bool operator()(const CNormalMeanPrecConjugate& lhs, double rhs) const { + return lhs.marginalLikelihoodMean() < rhs; + } }; //! Get the log of the likelihood that \p point is from \p normal. -double logLikelihoodFromCluster(const TDouble1Vec& sample, const CNormalMeanPrecConjugate& normal) { +double logLikelihoodFromCluster(const TDouble1Vec& sample, + const CNormalMeanPrecConjugate& normal) { double likelihood; - maths_t::EFloatingPointErrorStatus status = - normal.jointLogMarginalLikelihood(CConstantWeights::COUNT, sample, CConstantWeights::SINGLE_UNIT, likelihood); + maths_t::EFloatingPointErrorStatus status = normal.jointLogMarginalLikelihood( + CConstantWeights::COUNT, sample, CConstantWeights::SINGLE_UNIT, likelihood); if (status & maths_t::E_FpFailed) { LOG_ERROR(<< "Unable to compute probability for: " << sample[0]); return core::constants::LOG_MIN_DOUBLE - 1.0; @@ -76,11 +82,14 @@ CKMeansOnline1d::CKMeansOnline1d(TNormalVec& clusters) { m_Clusters.assign(clusters.begin(), clusters.end()); } -CKMeansOnline1d::CKMeansOnline1d(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) { - traverser.traverseSubLevel(boost::bind(&CKMeansOnline1d::acceptRestoreTraverser, this, boost::cref(params), _1)); +CKMeansOnline1d::CKMeansOnline1d(const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser) { + traverser.traverseSubLevel(boost::bind(&CKMeansOnline1d::acceptRestoreTraverser, + this, boost::cref(params), _1)); } -bool CKMeansOnline1d::acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) { +bool CKMeansOnline1d::acceptRestoreTraverser(const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser) { do { const std::string& name = traverser.name(); if (name == CLUSTER_TAG) { @@ -98,7 +107,8 @@ std::string CKMeansOnline1d::persistenceTag() const { void CKMeansOnline1d::acceptPersistInserter(core::CStatePersistInserter& inserter) const { for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { - inserter.insertLevel(CLUSTER_TAG, boost::bind(&CNormalMeanPrecConjugate::acceptPersistInserter, &m_Clusters[i], _1)); + inserter.insertLevel(CLUSTER_TAG, boost::bind(&CNormalMeanPrecConjugate::acceptPersistInserter, + &m_Clusters[i], _1)); } } @@ -156,7 +166,8 @@ void CKMeansOnline1d::cluster(const double& point, TSizeDoublePr2Vec& result, do return; } - auto rightCluster = std::lower_bound(m_Clusters.begin(), m_Clusters.end(), point, detail::SNormalMeanLess()); + auto rightCluster = std::lower_bound(m_Clusters.begin(), m_Clusters.end(), + point, detail::SNormalMeanLess()); if (rightCluster == m_Clusters.end()) { --rightCluster; diff --git a/lib/maths/CKMostCorrelated.cc b/lib/maths/CKMostCorrelated.cc index de0003069d..b858157251 100644 --- a/lib/maths/CKMostCorrelated.cc +++ b/lib/maths/CKMostCorrelated.cc @@ -64,7 +64,8 @@ class CNotEqual : public std::unary_function { //! specified collection pairs of variables. class CPairNotIn : public std::unary_function { public: - CPairNotIn(const TSizeSizePrUSet& lookup, std::size_t X) : m_Lookup(&lookup), m_X(X) {} + CPairNotIn(const TSizeSizePrUSet& lookup, std::size_t X) + : m_Lookup(&lookup), m_X(X) {} bool operator()(const TPointSizePr& y) const { std::size_t Y = y.second; @@ -81,9 +82,12 @@ class CPairNotIn : public std::unary_function { //! a specified threshold. class CCloserThan : public std::unary_function { public: - CCloserThan(double threshold, const TPoint& x) : m_Threshold(threshold), m_X(x) {} + CCloserThan(double threshold, const TPoint& x) + : m_Threshold(threshold), m_X(x) {} - bool operator()(const TPointSizePr& y) const { return pow2(bg::distance(m_X, y.first)) < m_Threshold; } + bool operator()(const TPointSizePr& y) const { + return pow2(bg::distance(m_X, y.first)) < m_Threshold; + } private: static double pow2(double x) { return x * x; } @@ -109,7 +113,8 @@ const double MINIMUM_FREQUENCY = 0.25; } // unnamed:: -CKMostCorrelated::CKMostCorrelated(std::size_t k, double decayRate, bool initialize) : m_K(k), m_DecayRate(decayRate), m_MaximumCount(0.0) { +CKMostCorrelated::CKMostCorrelated(std::size_t k, double decayRate, bool initialize) + : m_K(k), m_DecayRate(decayRate), m_MaximumCount(0.0) { if (initialize) { this->nextProjection(); } @@ -125,12 +130,16 @@ bool CKMostCorrelated::acceptRestoreTraverser(core::CStateRestoreTraverser& trav do { const std::string& name = traverser.name(); RESTORE(RNG_TAG, m_Rng.fromString(traverser.value())) - RESTORE(PROJECTIONS_TAG, core::CPersistUtils::restore(PROJECTIONS_TAG, m_Projections, traverser)) - RESTORE(CURRENT_PROJECTED_TAG, core::CPersistUtils::restore(CURRENT_PROJECTED_TAG, m_CurrentProjected, traverser)) - RESTORE(PROJECTED_TAG, core::CPersistUtils::restore(PROJECTED_TAG, m_Projected, traverser)) + RESTORE(PROJECTIONS_TAG, + core::CPersistUtils::restore(PROJECTIONS_TAG, m_Projections, traverser)) + RESTORE(CURRENT_PROJECTED_TAG, + core::CPersistUtils::restore(CURRENT_PROJECTED_TAG, m_CurrentProjected, traverser)) + RESTORE(PROJECTED_TAG, + core::CPersistUtils::restore(PROJECTED_TAG, m_Projected, traverser)) RESTORE_BUILT_IN(MAXIMUM_COUNT_TAG, m_MaximumCount) RESTORE(MOMENTS_TAG, core::CPersistUtils::restore(MOMENTS_TAG, m_Moments, traverser)) - RESTORE(MOST_CORRELATED_TAG, core::CPersistUtils::restore(MOST_CORRELATED_TAG, m_MostCorrelated, traverser)) + RESTORE(MOST_CORRELATED_TAG, + core::CPersistUtils::restore(MOST_CORRELATED_TAG, m_MostCorrelated, traverser)) } while (traverser.next()); return true; @@ -157,7 +166,9 @@ void CKMostCorrelated::mostCorrelated(TSizeSizePrVec& result) const { } } -void CKMostCorrelated::mostCorrelated(std::size_t n, TSizeSizePrVec& correlates, TDoubleVec* pearson) const { +void CKMostCorrelated::mostCorrelated(std::size_t n, + TSizeSizePrVec& correlates, + TDoubleVec* pearson) const { correlates.clear(); if (pearson) { pearson->clear(); @@ -209,7 +220,9 @@ void CKMostCorrelated::removeVariables(const TSizeVec& remove) { if (remove[i] < m_Moments.size()) { m_Moments[remove[i]] = TMeanVarAccumulator(); m_Projected.erase(remove[i]); - m_MostCorrelated.erase(std::remove_if(m_MostCorrelated.begin(), m_MostCorrelated.end(), CMatches(remove[i])), + m_MostCorrelated.erase(std::remove_if(m_MostCorrelated.begin(), + m_MostCorrelated.end(), + CMatches(remove[i])), m_MostCorrelated.end()); } } @@ -241,17 +254,22 @@ void CKMostCorrelated::add(std::size_t X, double x) { void CKMostCorrelated::capture() { m_MaximumCount += 1.0; - for (TSizeVectorUMapCItr i = m_CurrentProjected.begin(); i != m_CurrentProjected.end(); ++i) { + for (TSizeVectorUMapCItr i = m_CurrentProjected.begin(); + i != m_CurrentProjected.end(); ++i) { std::size_t X = i->first; TSizeVectorPackedBitVectorPrUMapItr j = m_Projected.find(X); if (j == m_Projected.end()) { TVector zero(0.0); CPackedBitVector indicator(PROJECTION_DIMENSION - m_Projections.size(), false); - j = m_Projected.emplace(boost::unordered::piecewise_construct, boost::make_tuple(X), boost::make_tuple(zero, indicator)).first; + j = m_Projected + .emplace(boost::unordered::piecewise_construct, + boost::make_tuple(X), boost::make_tuple(zero, indicator)) + .first; } j->second.first += i->second; } - for (TSizeVectorPackedBitVectorPrUMapItr i = m_Projected.begin(); i != m_Projected.end(); ++i) { + for (TSizeVectorPackedBitVectorPrUMapItr i = m_Projected.begin(); + i != m_Projected.end(); ++i) { i->second.second.extend(m_CurrentProjected.count(i->first) > 0); } @@ -269,10 +287,12 @@ void CKMostCorrelated::capture() { std::stable_sort(m_MostCorrelated.begin(), m_MostCorrelated.end()); // Remove any variables for which the correlation will necessarily be zero. - for (TSizeVectorPackedBitVectorPrUMapItr i = m_Projected.begin(); i != m_Projected.end(); + for (TSizeVectorPackedBitVectorPrUMapItr i = m_Projected.begin(); + i != m_Projected.end(); /**/) { const CPackedBitVector& indicator = i->second.second; - if (indicator.manhattan() <= MINIMUM_FREQUENCY * static_cast(indicator.dimension())) { + if (indicator.manhattan() <= + MINIMUM_FREQUENCY * static_cast(indicator.dimension())) { i = m_Projected.erase(i); } else { ++i; @@ -289,7 +309,8 @@ void CKMostCorrelated::capture() { std::size_t n = add.size(); std::size_t desired = 2 * m_K; std::size_t added = N < desired ? std::min(desired - N, n) : 0; - LOG_TRACE(<< "N = " << N << ", n = " << n << ", desired = " << desired << ", added = " << added); + LOG_TRACE(<< "N = " << N << ", n = " << n << ", desired = " << desired + << ", added = " << added); if (added > 0) { m_MostCorrelated.insert(m_MostCorrelated.end(), add.end() - added, add.end()); @@ -360,7 +381,8 @@ std::size_t CKMostCorrelated::memoryUsage() const { } void CKMostCorrelated::mostCorrelated(TCorrelationVec& result) const { - using TMaxDoubleAccumulator = CBasicStatistics::COrderStatisticsStack>; + using TMaxDoubleAccumulator = + CBasicStatistics::COrderStatisticsStack>; using TMaxCorrelationAccumulator = CBasicStatistics::COrderStatisticsHeap; using TPointRTree = bgi::rtree>; @@ -381,8 +403,9 @@ void CKMostCorrelated::mostCorrelated(TCorrelationVec& result) const { lookup.insert(std::make_pair(std::min(X, Y), std::max(X, Y))); } - std::size_t replace = - std::max(static_cast(REPLACE_FRACTION * static_cast(desired) + 0.5), std::max(desired - N, std::size_t(1))); + std::size_t replace = std::max( + static_cast(REPLACE_FRACTION * static_cast(desired) + 0.5), + std::max(desired - N, std::size_t(1))); LOG_TRACE(<< "replace = " << replace); TMaxCorrelationAccumulator mostCorrelated(replace); @@ -390,13 +413,15 @@ void CKMostCorrelated::mostCorrelated(TCorrelationVec& result) const { if (10 * replace > V * (V - 1)) { LOG_TRACE(<< "Exhaustive search"); - for (TSizeVectorPackedBitVectorPrUMapCItr x = m_Projected.begin(); x != m_Projected.end(); ++x) { + for (TSizeVectorPackedBitVectorPrUMapCItr x = m_Projected.begin(); + x != m_Projected.end(); ++x) { std::size_t X = x->first; TSizeVectorPackedBitVectorPrUMapCItr y = x; while (++y != m_Projected.end()) { std::size_t Y = y->first; if (lookup.count(std::make_pair(std::min(X, Y), std::max(X, Y))) == 0) { - SCorrelation cxy(X, x->second.first, x->second.second, Y, y->second.first, y->second.second); + SCorrelation cxy(X, x->second.first, x->second.second, Y, + y->second.first, y->second.second); mostCorrelated.add(cxy); } } @@ -416,7 +441,8 @@ void CKMostCorrelated::mostCorrelated(TCorrelationVec& result) const { // Bound the correlation based on the sparsity of the metric. TMaxDoubleAccumulator fmax; double dimension = 0.0; - for (TSizeVectorPackedBitVectorPrUMapCItr i = m_Projected.begin(); i != m_Projected.end(); ++i) { + for (TSizeVectorPackedBitVectorPrUMapCItr i = m_Projected.begin(); + i != m_Projected.end(); ++i) { const CPackedBitVector& ix = i->second.second; dimension = static_cast(ix.dimension()); fmax.add(ix.manhattan() / dimension); @@ -429,7 +455,8 @@ void CKMostCorrelated::mostCorrelated(TCorrelationVec& result) const { TPointSizePrVec points; points.reserve(m_Projected.size()); - for (TSizeVectorPackedBitVectorPrUMapCItr i = m_Projected.begin(); i != m_Projected.end(); ++i) { + for (TSizeVectorPackedBitVectorPrUMapCItr i = m_Projected.begin(); + i != m_Projected.end(); ++i) { points.emplace_back(i->second.first.to().toBoostArray(), i->first); } LOG_TRACE(<< "# points = " << points.size()); @@ -448,7 +475,8 @@ void CKMostCorrelated::mostCorrelated(TCorrelationVec& result) const { seeds.erase(std::unique(seeds.begin(), seeds.end()), seeds.end()); } else { seeds.reserve(V); - seeds.assign(boost::counting_iterator(0), boost::counting_iterator(V)); + seeds.assign(boost::counting_iterator(0), + boost::counting_iterator(V)); } try { @@ -460,11 +488,13 @@ void CKMostCorrelated::mostCorrelated(TCorrelationVec& result) const { nearest.clear(); bgi::query(rtree, - bgi::satisfies(CNotEqual(X)) && bgi::satisfies(CPairNotIn(lookup, X)) && + bgi::satisfies(CNotEqual(X)) && + bgi::satisfies(CPairNotIn(lookup, X)) && bgi::nearest((px.first.to()).toBoostArray(), k), std::back_inserter(nearest)); bgi::query(rtree, - bgi::satisfies(CNotEqual(X)) && bgi::satisfies(CPairNotIn(lookup, X)) && + bgi::satisfies(CNotEqual(X)) && + bgi::satisfies(CPairNotIn(lookup, X)) && bgi::nearest((-px.first.to()).toBoostArray(), k), std::back_inserter(nearest)); @@ -500,18 +530,22 @@ void CKMostCorrelated::mostCorrelated(TCorrelationVec& result) const { TVector width(std::sqrt(threshold)); nearest.clear(); { - bgm::box box((px.first - width).to().toBoostArray(), (px.first + width).to().toBoostArray()); + bgm::box box((px.first - width).to().toBoostArray(), + (px.first + width).to().toBoostArray()); bgi::query(rtree, bgi::within(box) && bgi::satisfies(CNotEqual(X)) && - bgi::satisfies(CCloserThan(threshold, px.first.to().toBoostArray())) && + bgi::satisfies(CCloserThan( + threshold, px.first.to().toBoostArray())) && bgi::satisfies(CPairNotIn(lookup, X)), std::back_inserter(nearest)); } { - bgm::box box((-px.first - width).to().toBoostArray(), (-px.first + width).to().toBoostArray()); + bgm::box box((-px.first - width).to().toBoostArray(), + (-px.first + width).to().toBoostArray()); bgi::query(rtree, bgi::within(box) && bgi::satisfies(CNotEqual(X)) && - bgi::satisfies(CCloserThan(threshold, (-px.first).to().toBoostArray())) && + bgi::satisfies(CCloserThan( + threshold, (-px.first).to().toBoostArray())) && bgi::satisfies(CPairNotIn(lookup, X)), std::back_inserter(nearest)); } @@ -548,7 +582,8 @@ void CKMostCorrelated::mostCorrelated(TCorrelationVec& result) const { void CKMostCorrelated::nextProjection() { TDoubleVec uniform01; - CSampling::uniformSample(m_Rng, 0.0, 1.0, NUMBER_PROJECTIONS * PROJECTION_DIMENSION, uniform01); + CSampling::uniformSample(m_Rng, 0.0, 1.0, + NUMBER_PROJECTIONS * PROJECTION_DIMENSION, uniform01); m_Projections.reserve(PROJECTION_DIMENSION); m_Projections.resize(PROJECTION_DIMENSION); for (std::size_t i = 0u, j = 0u; i < PROJECTION_DIMENSION; ++i) { @@ -590,7 +625,8 @@ const double CKMostCorrelated::MINIMUM_SPARSENESS = 0.5; const double CKMostCorrelated::REPLACE_FRACTION = 0.1; CKMostCorrelated::SCorrelation::SCorrelation() - : s_X(std::numeric_limits::max()), s_Y(std::numeric_limits::max()) { + : s_X(std::numeric_limits::max()), + s_Y(std::numeric_limits::max()) { } CKMostCorrelated::SCorrelation::SCorrelation(std::size_t X, @@ -633,7 +669,8 @@ void CKMostCorrelated::SCorrelation::acceptPersistInserter(core::CStatePersistIn } bool CKMostCorrelated::SCorrelation::operator<(const SCorrelation& rhs) const { - return COrderings::lexicographical_compare(-this->absCorrelation(), s_X, s_Y, -rhs.absCorrelation(), rhs.s_X, rhs.s_Y); + return COrderings::lexicographical_compare( + -this->absCorrelation(), s_X, s_Y, -rhs.absCorrelation(), rhs.s_X, rhs.s_Y); } void CKMostCorrelated::SCorrelation::update(const TSizeVectorPackedBitVectorPrUMap& projected) { @@ -649,16 +686,20 @@ void CKMostCorrelated::SCorrelation::update(const TSizeVectorPackedBitVectorPrUM } double CKMostCorrelated::SCorrelation::distance(double amax) const { - return static_cast(NUMBER_PROJECTIONS) * amax * 2.0 * (1.0 - std::fabs(CBasicStatistics::mean(s_Correlation))); + return static_cast(NUMBER_PROJECTIONS) * amax * 2.0 * + (1.0 - std::fabs(CBasicStatistics::mean(s_Correlation))); } double CKMostCorrelated::SCorrelation::absCorrelation() const { return std::fabs(CBasicStatistics::mean(s_Correlation)) - - (1.0 / std::max(CBasicStatistics::count(s_Correlation), 2.0) + std::sqrt(CBasicStatistics::variance(s_Correlation))); + (1.0 / std::max(CBasicStatistics::count(s_Correlation), 2.0) + + std::sqrt(CBasicStatistics::variance(s_Correlation))); } -double -CKMostCorrelated::SCorrelation::correlation(const TVector& px, const CPackedBitVector& ix, const TVector& py, const CPackedBitVector& iy) { +double CKMostCorrelated::SCorrelation::correlation(const TVector& px, + const CPackedBitVector& ix, + const TVector& py, + const CPackedBitVector& iy) { double result = 0.0; double nx = ix.manhattan() / static_cast(ix.dimension()); @@ -722,7 +763,8 @@ uint64_t CKMostCorrelated::SCorrelation::checksum(uint64_t seed) const { } std::string CKMostCorrelated::SCorrelation::print() const { - return CBasicStatistics::print(s_Correlation) + ' ' + core::CStringUtils::typeToString(s_X) + ' ' + + return CBasicStatistics::print(s_Correlation) + ' ' + + core::CStringUtils::typeToString(s_X) + ' ' + core::CStringUtils::typeToString(s_Y); } diff --git a/lib/maths/CLassoLogisticRegression.cc b/lib/maths/CLassoLogisticRegression.cc index ddc6796a97..074c829805 100644 --- a/lib/maths/CLassoLogisticRegression.cc +++ b/lib/maths/CLassoLogisticRegression.cc @@ -78,7 +78,10 @@ double lassoStep(double beta, double lambda, double n, double d) { //! \note That this should decrease monotonically in each iteration //! of the inner solver loop. template -double logLikelihood(const MATRIX& x, const TDoubleVec& y, const TDoubleVec& lambda, const TDoubleVec& beta) { +double logLikelihood(const MATRIX& x, + const TDoubleVec& y, + const TDoubleVec& lambda, + const TDoubleVec& beta) { using iterator = typename MATRIX::iterator; double result = 0.0; @@ -220,7 +223,8 @@ void CDenseMatrix::swap(CDenseMatrix& other) { CSparseMatrix::CSparseMatrix() : m_Rows(0), m_Columns(0) { } -CSparseMatrix::CSparseMatrix(std::size_t rows, std::size_t columns, TSizeSizePrDoublePrVec& elements) : m_Rows(rows), m_Columns(columns) { +CSparseMatrix::CSparseMatrix(std::size_t rows, std::size_t columns, TSizeSizePrDoublePrVec& elements) + : m_Rows(rows), m_Columns(columns) { m_Elements.swap(elements); std::sort(m_Elements.begin(), m_Elements.end(), COrderings::SFirstLess()); } @@ -233,21 +237,26 @@ void CSparseMatrix::swap(CSparseMatrix& other) { ////// CCyclicCoordinateDescent ////// -CCyclicCoordinateDescent::CCyclicCoordinateDescent(std::size_t maxIterations, double eps) : m_MaxIterations(maxIterations), m_Eps(eps) { +CCyclicCoordinateDescent::CCyclicCoordinateDescent(std::size_t maxIterations, double eps) + : m_MaxIterations(maxIterations), m_Eps(eps) { } template -bool CCyclicCoordinateDescent::checkInputs(const MATRIX& x, const TDoubleVec& y, const TDoubleVec& lambda) { +bool CCyclicCoordinateDescent::checkInputs(const MATRIX& x, + const TDoubleVec& y, + const TDoubleVec& lambda) { if (x.rows() == 0) { LOG_ERROR(<< "No training data"); return false; } if (x.rows() != y.size()) { - LOG_ERROR(<< "Inconsistent training data |x| = " << x.rows() << ", |y| = " << y.size()); + LOG_ERROR(<< "Inconsistent training data |x| = " << x.rows() + << ", |y| = " << y.size()); return false; } if (lambda.size() != x.columns()) { - LOG_ERROR(<< "Inconsistent prior |lambda| = " << lambda.size() << ", D = " << x.columns()); + LOG_ERROR(<< "Inconsistent prior |lambda| = " << lambda.size() + << ", D = " << x.columns()); return false; } return true; @@ -293,7 +302,8 @@ bool CCyclicCoordinateDescent::runIncremental(const CDenseMatrix& x, return false; } if (beta.size() != lambda.size()) { - LOG_ERROR(<< "Inconsistent seed parameter vector |beta| = " << beta.size() << ", D = " << lambda.size()); + LOG_ERROR(<< "Inconsistent seed parameter vector |beta| = " << beta.size() + << ", D = " << lambda.size()); return false; } @@ -321,7 +331,8 @@ bool CCyclicCoordinateDescent::runIncremental(const CSparseMatrix& x, return false; } if (beta.size() != lambda.size()) { - LOG_ERROR(<< "Inconsistent seed parameter vector |beta| = " << beta.size() << ", D = " << lambda.size()); + LOG_ERROR(<< "Inconsistent seed parameter vector |beta| = " << beta.size() + << ", D = " << lambda.size()); return false; } @@ -346,7 +357,8 @@ bool CCyclicCoordinateDescent::runIncremental(const CSparseMatrix& x, CLogisticRegressionModel::CLogisticRegressionModel() : m_Beta0(0.0), m_Beta() { } -CLogisticRegressionModel::CLogisticRegressionModel(double beta0, TSizeDoublePrVec& beta) : m_Beta0(beta0), m_Beta() { +CLogisticRegressionModel::CLogisticRegressionModel(double beta0, TSizeDoublePrVec& beta) + : m_Beta0(beta0), m_Beta() { m_Beta.swap(beta); } @@ -363,7 +375,8 @@ bool CLogisticRegressionModel::operator()(const TDoubleVec& x, double& probabili std::size_t n = m_Beta.size(); if (x.size() <= m_Beta[n - 1].first) { - LOG_ERROR(<< "Invalid feature vector |x| = " << x.size() << ", D = " << m_Beta[n - 1].first + 1) + LOG_ERROR(<< "Invalid feature vector |x| = " << x.size() + << ", D = " << m_Beta[n - 1].first + 1) } double r = -m_Beta0; for (std::size_t i = 0u; i < m_Beta.size(); ++i) { @@ -409,7 +422,11 @@ using TSizeUSet = boost::unordered_set; //! \param[in] mask The indices of the feature vectors to remove. //! \param[out] xMasked The training matrix corresponding to \p x. //! \param[out] yMasked The training labels corresponding to \p y. -void setupTrainingData(const TDoubleVecVec& x, const TDoubleVec& y, const TSizeUSet& mask, CDenseMatrix& xMasked, TDoubleVec& yMasked) { +void setupTrainingData(const TDoubleVecVec& x, + const TDoubleVec& y, + const TSizeUSet& mask, + CDenseMatrix& xMasked, + TDoubleVec& yMasked) { xMasked = CDenseMatrix(); yMasked.clear(); @@ -462,7 +479,8 @@ void setupTrainingData(const TSizeDoublePrVecVec& x, if (mask.count(i) == 0) { for (std::size_t j = 0u, d = x[i].size(); j < d; ++j) { std::size_t j_ = x[i][j].first; - xTranspose.push_back(TSizeSizePrDoublePr(TSizeSizePr(j_, i_), x[i][j].second)); + xTranspose.push_back( + TSizeSizePrDoublePr(TSizeSizePr(j_, i_), x[i][j].second)); columns = std::max(columns, j_ + 1); } yMasked.push_back(y[i]); @@ -563,12 +581,14 @@ class C2FoldCrossValidatedLogLikelihood { using result_type = double; public: - C2FoldCrossValidatedLogLikelihood(std::size_t d) : m_D(d + 1), m_Splits(0) {} + C2FoldCrossValidatedLogLikelihood(std::size_t d) + : m_D(d + 1), m_Splits(0) {} //! Add a 2-split of the training data. void addSplit(MATRIX& xTrain, TDoubleVec& yTrain, MATRIX& xTest, TDoubleVec& yTest) { if (xTrain.rows() != m_D || xTest.rows() != m_D) { - LOG_ERROR(<< "Bad training data: |train| = " << xTrain.rows() << ", |test| = " << xTest.rows() << ", D = " << m_D); + LOG_ERROR(<< "Bad training data: |train| = " << xTrain.rows() + << ", |test| = " << xTest.rows() << ", D = " << m_D); return; } ++m_Splits; @@ -591,7 +611,8 @@ class C2FoldCrossValidatedLogLikelihood { for (std::size_t j = 0u; j < m_Splits; ++j) { for (std::size_t i = 0u; i < 2; ++i) { learn(m_X[i][j], m_Y[i][j], m_Lambda, m_Beta); - result += logLikelihood(m_X[(i + 1) % 2][j], m_Y[(i + 1) % 2][j], m_Lambda, m_Beta); + result += logLikelihood(m_X[(i + 1) % 2][j], + m_Y[(i + 1) % 2][j], m_Lambda, m_Beta); } } return result; @@ -622,7 +643,8 @@ class C2FoldCrossValidatedLogLikelihood { ////// CLassoLogisticRegression ////// template -CLassoLogisticRegression::CLassoLogisticRegression() : m_X(), m_D(0), m_Y(), m_Lambda(1.0), m_Beta() { +CLassoLogisticRegression::CLassoLogisticRegression() + : m_X(), m_D(0), m_Y(), m_Lambda(1.0), m_Beta() { } template @@ -662,7 +684,8 @@ void CLassoLogisticRegression::doLearnHyperparameter(EHyperparametersSt (m_Y[i] > 0.0 ? positive : negative).push_back(i); } if (positive.size() <= 1 || negative.size() <= 1) { - LOG_WARN(<< "Can't cross-validate: insufficient " << (positive.size() <= 1 ? "" : "un") << "interesting examples provided"); + LOG_WARN(<< "Can't cross-validate: insufficient " + << (positive.size() <= 1 ? "" : "un") << "interesting examples provided"); return; } for (std::size_t i = 0u, np = positive.size(), nn = negative.size(); i < 2; ++i) { @@ -702,8 +725,9 @@ void CLassoLogisticRegression::doLearnHyperparameter(EHyperparametersSt std::size_t maxIterations = boost::size(scales) / 2; double logLikelihood; - CSolvers::maximize( - scales[a] * lambda, scales[b] * lambda, logLikelihoods[a], logLikelihoods[b], objective, 0.0, maxIterations, lambda, logLikelihood); + CSolvers::maximize(scales[a] * lambda, scales[b] * lambda, + logLikelihoods[a], logLikelihoods[b], objective, 0.0, + maxIterations, lambda, logLikelihood); LOG_TRACE(<< "lambda = " << lambda << " log(L(lambda)) = " << logLikelihood); m_Lambda = logLikelihood > *max ? lambda : scales[max - logLikelihoods] * min; @@ -731,7 +755,8 @@ bool CLassoLogisticRegression::doLearn(CLogisticRegressionModel& result // Create the model. TSizeDoublePrVec sparse; - sparse.reserve(std::count_if(m_Beta.begin(), m_Beta.end(), boost::bind(std::greater(), _1, 0.0))); + sparse.reserve(std::count_if(m_Beta.begin(), m_Beta.end(), + boost::bind(std::greater(), _1, 0.0))); for (std::size_t j = 0u; j < m_D; ++j) { if (m_Beta[j] > 0.0) { sparse.emplace_back(j, m_Beta[j]); @@ -756,7 +781,8 @@ bool CLassoLogisticRegression::sanityChecks() const { (m_Y[i] < 0.0 ? negative : positive) = true; } if (!negative || !positive) { - LOG_WARN(<< "Only " << (negative ? "un" : "") << "interesting examples provided: problem is ill posed"); + LOG_WARN(<< "Only " << (negative ? "un" : "") + << "interesting examples provided: problem is ill posed"); return false; } return true; @@ -769,7 +795,8 @@ void CLassoLogisticRegressionDense::addTrainingData(const TDoubleVec& x, bool in this->d() = x.size(); } if (x.size() != this->d()) { - LOG_ERROR(<< "Ignoring inconsistent training data |x| = " << x.size() << ", D = " << this->x()[0].size()); + LOG_ERROR(<< "Ignoring inconsistent training data |x| = " << x.size() + << ", D = " << this->x()[0].size()); return; } this->x().push_back(x); diff --git a/lib/maths/CLinearAlgebraTools.cc b/lib/maths/CLinearAlgebraTools.cc index cd7b211d74..938fc8b050 100644 --- a/lib/maths/CLinearAlgebraTools.cc +++ b/lib/maths/CLinearAlgebraTools.cc @@ -21,8 +21,11 @@ template class CInverseQuadraticProduct { public: template - static maths_t::EFloatingPointErrorStatus - compute(std::size_t d, const MATRIX& covariance_, const VECTOR& residual, double& result, bool ignoreSingularSubspace) { + static maths_t::EFloatingPointErrorStatus compute(std::size_t d, + const MATRIX& covariance_, + const VECTOR& residual, + double& result, + bool ignoreSingularSubspace) { if (residual.isZero()) { result = 0.0; return maths_t::E_FpNoErrors; @@ -41,14 +44,17 @@ class CInverseQuadraticProduct { default: { // Note we use Jacobi SVD here so that we handle the case // that m is singular to working precision. - Eigen::JacobiSVD covariance(toDenseMatrix(covariance_), Eigen::ComputeFullU | Eigen::ComputeFullV); + Eigen::JacobiSVD covariance( + toDenseMatrix(covariance_), Eigen::ComputeFullU | Eigen::ComputeFullV); EIGENVECTOR y(toDenseVector(residual)); // Check the residual is zero on the singular subspace. std::size_t rank = static_cast(covariance.rank()); if (!ignoreSingularSubspace && rank < d) { - double normC = (y.transpose() * covariance.matrixU().leftCols(rank)).norm(); - double normS = (y.transpose() * covariance.matrixU().rightCols(d - rank)).norm(); + double normC = + (y.transpose() * covariance.matrixU().leftCols(rank)).norm(); + double normS = + (y.transpose() * covariance.matrixU().rightCols(d - rank)).norm(); if (normS > std::numeric_limits::epsilon() * normC) { return maths_t::E_FpOverflowed; } @@ -66,8 +72,11 @@ template class CGaussianLogLikelihood { public: template - static maths_t::EFloatingPointErrorStatus - compute(std::size_t d, const MATRIX& covariance_, const VECTOR& residual, double& result, bool ignoreSingularSubspace) { + static maths_t::EFloatingPointErrorStatus compute(std::size_t d, + const MATRIX& covariance_, + const VECTOR& residual, + double& result, + bool ignoreSingularSubspace) { result = core::constants::LOG_MIN_DOUBLE - 1.0; switch (d) { @@ -75,22 +84,27 @@ class CGaussianLogLikelihood { if (covariance_(0, 0) == 0.0) { return maths_t::E_FpOverflowed; } - result = -0.5 * (residual(0) * residual(0) / covariance_(0, 0) + core::constants::LOG_TWO_PI + std::log(covariance_(0, 0))); + result = -0.5 * (residual(0) * residual(0) / covariance_(0, 0) + + core::constants::LOG_TWO_PI + std::log(covariance_(0, 0))); return maths_t::E_FpNoErrors; default: { // Note we use Jacobi SVD here so that we handle the case // that m is singular to working precision. - Eigen::JacobiSVD covariance(toDenseMatrix(covariance_), Eigen::ComputeFullU | Eigen::ComputeFullV); + Eigen::JacobiSVD covariance( + toDenseMatrix(covariance_), Eigen::ComputeFullU | Eigen::ComputeFullV); EIGENVECTOR y(toDenseVector(residual)); // Check the residual is zero on the singular subspace. std::size_t rank = static_cast(covariance.rank()); if (!ignoreSingularSubspace && rank < d) { - double normC = (y.transpose() * covariance.matrixU().leftCols(rank)).norm(); - double normS = (y.transpose() * covariance.matrixU().rightCols(d - rank)).norm(); - result = normS > std::numeric_limits::epsilon() * normC ? core::constants::LOG_MIN_DOUBLE - 1.0 - : core::constants::LOG_MAX_DOUBLE + 1.0; + double normC = + (y.transpose() * covariance.matrixU().leftCols(rank)).norm(); + double normS = + (y.transpose() * covariance.matrixU().rightCols(d - rank)).norm(); + result = normS > std::numeric_limits::epsilon() * normC + ? core::constants::LOG_MIN_DOUBLE - 1.0 + : core::constants::LOG_MAX_DOUBLE + 1.0; return maths_t::E_FpOverflowed; } y = covariance.solve(y); @@ -98,7 +112,9 @@ class CGaussianLogLikelihood { for (std::size_t i = 0u; i < rank; ++i) { logDeterminant += std::log(covariance.singularValues()(i)); } - result = -0.5 * (residual.inner(y) + static_cast(rank) * core::constants::LOG_TWO_PI + logDeterminant); + result = -0.5 * (residual.inner(y) + + static_cast(rank) * core::constants::LOG_TWO_PI + + logDeterminant); return maths_t::E_FpNoErrors; } } @@ -110,7 +126,10 @@ template class CSampleGaussian { public: template - static void generate(std::size_t n, const VECTOR& mean_, const MATRIX& covariance_, std::vector& result) { + static void generate(std::size_t n, + const VECTOR& mean_, + const MATRIX& covariance_, + std::vector& result) { result.clear(); if (n == 0) { return; @@ -127,7 +146,8 @@ class CSampleGaussian { // for more discussion on this sampling strategy. VECTOR_PRECISE mean(mean_); - Eigen::JacobiSVD covariance(toDenseMatrix(covariance_), Eigen::ComputeFullU | Eigen::ComputeFullV); + Eigen::JacobiSVD covariance( + toDenseMatrix(covariance_), Eigen::ComputeFullU | Eigen::ComputeFullV); std::size_t rank = static_cast(covariance.rank()); std::size_t numberIntervals = n / rank; @@ -153,7 +173,8 @@ class CSampleGaussian { double q = static_cast(j) / static_cast(numberIntervals); double xq = boost::math::quantile(normal, q); double partialExpectation = -variance * CTools::safePdf(normal, xq); - double dx = scale * static_cast(numberIntervals) * (partialExpectation - lastPartialExpectation); + double dx = scale * static_cast(numberIntervals) * + (partialExpectation - lastPartialExpectation); lastPartialExpectation = partialExpectation; LOG_TRACE(<< "dx = " << dx); result.push_back(mean + dx * u); @@ -161,7 +182,9 @@ class CSampleGaussian { double dx = -scale * static_cast(numberIntervals) * lastPartialExpectation; LOG_TRACE(<< "dx = " << dx); result.push_back(mean + dx * u); - } catch (const std::exception& e) { LOG_ERROR(<< "Failed to sample eigenvector " << u << ": " << e.what()); } + } catch (const std::exception& e) { + LOG_ERROR(<< "Failed to sample eigenvector " << u << ": " << e.what()); + } } } } @@ -172,7 +195,8 @@ template class CLogDeterminant { public: template - static maths_t::EFloatingPointErrorStatus compute(std::size_t d, const MATRIX& m_, double& result, bool ignoreSingularSubspace) { + static maths_t::EFloatingPointErrorStatus + compute(std::size_t d, const MATRIX& m_, double& result, bool ignoreSingularSubspace) { result = core::constants::LOG_MIN_DOUBLE - 1.0; switch (d) { @@ -191,7 +215,8 @@ class CLogDeterminant { // Check the residual is zero on the singular subspace. std::size_t rank = static_cast(svd.rank()); if (!ignoreSingularSubspace && rank < d) { - result = static_cast(d - rank) * std::log(svd.threshold() * svd.singularValues()(0)); + result = static_cast(d - rank) * + std::log(svd.threshold() * svd.singularValues()(0)); return maths_t::E_FpOverflowed; } result = 0.0; @@ -205,14 +230,13 @@ class CLogDeterminant { }; } -#define INVERSE_QUADRATIC_PRODUCT(T, N) \ - maths_t::EFloatingPointErrorStatus inverseQuadraticProduct(std::size_t d, \ - const CSymmetricMatrixNxN& covariance, \ - const CVectorNx1& residual, \ - double& result, \ - bool ignoreSingularSubspace) { \ - return CInverseQuadraticProduct>::Type, SDenseVector>::Type>::compute( \ - d, covariance, residual, result, ignoreSingularSubspace); \ +#define INVERSE_QUADRATIC_PRODUCT(T, N) \ + maths_t::EFloatingPointErrorStatus inverseQuadraticProduct( \ + std::size_t d, const CSymmetricMatrixNxN& covariance, \ + const CVectorNx1& residual, double& result, bool ignoreSingularSubspace) { \ + return CInverseQuadraticProduct>::Type, \ + SDenseVector>::Type>::compute(d, covariance, residual, \ + result, ignoreSingularSubspace); \ } INVERSE_QUADRATIC_PRODUCT(CFloatStorage, 2) INVERSE_QUADRATIC_PRODUCT(CFloatStorage, 3) @@ -223,33 +247,29 @@ INVERSE_QUADRATIC_PRODUCT(double, 3) INVERSE_QUADRATIC_PRODUCT(double, 4) INVERSE_QUADRATIC_PRODUCT(double, 5) #undef INVERSE_QUADRATIC_PRODUCT -maths_t::EFloatingPointErrorStatus inverseQuadraticProduct(std::size_t d, - const CSymmetricMatrix& covariance, - const CVector& residual, - double& result, - bool ignoreSingularSubspace) { - return CInverseQuadraticProduct>::Type, - SDenseVector>::Type>::compute(d, - covariance, - residual, - result, - ignoreSingularSubspace); +maths_t::EFloatingPointErrorStatus +inverseQuadraticProduct(std::size_t d, + const CSymmetricMatrix& covariance, + const CVector& residual, + double& result, + bool ignoreSingularSubspace) { + return CInverseQuadraticProduct>::Type, SDenseVector>::Type>::compute( + d, covariance, residual, result, ignoreSingularSubspace); } -maths_t::EFloatingPointErrorStatus inverseQuadraticProduct(std::size_t d, - const CSymmetricMatrix& covariance, - const CVector& residual, - double& result, - bool ignoreSingularSubspace) { +maths_t::EFloatingPointErrorStatus +inverseQuadraticProduct(std::size_t d, + const CSymmetricMatrix& covariance, + const CVector& residual, + double& result, + bool ignoreSingularSubspace) { return CInverseQuadraticProduct>::Type, SDenseVector>::Type>::compute( d, covariance, residual, result, ignoreSingularSubspace); } #define GAUSSIAN_LOG_LIKELIHOOD(T, N) \ - maths_t::EFloatingPointErrorStatus gaussianLogLikelihood(std::size_t d, \ - const CSymmetricMatrixNxN& covariance, \ - const CVectorNx1& residual, \ - double& result, \ - bool ignoreSingularSubspace) { \ + maths_t::EFloatingPointErrorStatus gaussianLogLikelihood( \ + std::size_t d, const CSymmetricMatrixNxN& covariance, \ + const CVectorNx1& residual, double& result, bool ignoreSingularSubspace) { \ return CGaussianLogLikelihood>::Type, SDenseVector>::Type>::compute( \ d, covariance, residual, result, ignoreSingularSubspace); \ } @@ -262,29 +282,32 @@ GAUSSIAN_LOG_LIKELIHOOD(double, 3) GAUSSIAN_LOG_LIKELIHOOD(double, 4) GAUSSIAN_LOG_LIKELIHOOD(double, 5) #undef GAUSSIAN_LOG_LIKELIHOOD -maths_t::EFloatingPointErrorStatus gaussianLogLikelihood(std::size_t d, - const CSymmetricMatrix& covariance, - const CVector& residual, - double& result, - bool ignoreSingularSubspace) { - return CGaussianLogLikelihood>::Type, SDenseVector>::Type>::compute( - d, covariance, residual, result, ignoreSingularSubspace); +maths_t::EFloatingPointErrorStatus +gaussianLogLikelihood(std::size_t d, + const CSymmetricMatrix& covariance, + const CVector& residual, + double& result, + bool ignoreSingularSubspace) { + return CGaussianLogLikelihood>::Type, + SDenseVector>::Type>::compute(d, covariance, residual, + result, ignoreSingularSubspace); } -maths_t::EFloatingPointErrorStatus gaussianLogLikelihood(std::size_t d, - const CSymmetricMatrix& covariance, - const CVector& residual, - double& result, - bool ignoreSingularSubspace) { +maths_t::EFloatingPointErrorStatus +gaussianLogLikelihood(std::size_t d, + const CSymmetricMatrix& covariance, + const CVector& residual, + double& result, + bool ignoreSingularSubspace) { return CGaussianLogLikelihood>::Type, SDenseVector>::Type>::compute( d, covariance, residual, result, ignoreSingularSubspace); } -#define SAMPLE_GAUSSIAN(T, N) \ - void sampleGaussian(std::size_t d, \ - const CVectorNx1& mean, \ - const CSymmetricMatrixNxN& covariance, \ - std::vector>& result) { \ - CSampleGaussian>::Type>::generate(d, mean, covariance, result); \ +#define SAMPLE_GAUSSIAN(T, N) \ + void sampleGaussian(std::size_t d, const CVectorNx1& mean, \ + const CSymmetricMatrixNxN& covariance, \ + std::vector>& result) { \ + CSampleGaussian>::Type>::generate( \ + d, mean, covariance, result); \ } SAMPLE_GAUSSIAN(CFloatStorage, 2) SAMPLE_GAUSSIAN(CFloatStorage, 3) @@ -299,19 +322,23 @@ void sampleGaussian(std::size_t d, const CVector& mean, const CSymmetricMatrix& covariance, std::vector>& result) { - return CSampleGaussian>::Type>::generate(d, mean, covariance, result); + return CSampleGaussian>::Type>::generate( + d, mean, covariance, result); } void sampleGaussian(std::size_t d, const CVector& mean, const CSymmetricMatrix& covariance, std::vector>& result) { - return CSampleGaussian>::Type>::generate(d, mean, covariance, result); + return CSampleGaussian>::Type>::generate( + d, mean, covariance, result); } -#define LOG_DETERMINANT(T, N) \ - maths_t::EFloatingPointErrorStatus logDeterminant( \ - std::size_t d, const CSymmetricMatrixNxN& matrix, double& result, bool ignoreSingularSubspace) { \ - return CLogDeterminant>::Type>::compute(d, matrix, result, ignoreSingularSubspace); \ +#define LOG_DETERMINANT(T, N) \ + maths_t::EFloatingPointErrorStatus logDeterminant( \ + std::size_t d, const CSymmetricMatrixNxN& matrix, \ + double& result, bool ignoreSingularSubspace) { \ + return CLogDeterminant>::Type>::compute( \ + d, matrix, result, ignoreSingularSubspace); \ } LOG_DETERMINANT(CFloatStorage, 2) LOG_DETERMINANT(CFloatStorage, 3) @@ -322,13 +349,19 @@ LOG_DETERMINANT(double, 3) LOG_DETERMINANT(double, 4) LOG_DETERMINANT(double, 5) #undef LOG_DETERMINANT -maths_t::EFloatingPointErrorStatus -logDeterminant(std::size_t d, const CSymmetricMatrix& matrix, double& result, bool ignoreSingularSubspace) { - return CLogDeterminant>::Type>::compute(d, matrix, result, ignoreSingularSubspace); +maths_t::EFloatingPointErrorStatus logDeterminant(std::size_t d, + const CSymmetricMatrix& matrix, + double& result, + bool ignoreSingularSubspace) { + return CLogDeterminant>::Type>::compute( + d, matrix, result, ignoreSingularSubspace); } -maths_t::EFloatingPointErrorStatus -logDeterminant(std::size_t d, const CSymmetricMatrix& matrix, double& result, bool ignoreSingularSubspace) { - return CLogDeterminant>::Type>::compute(d, matrix, result, ignoreSingularSubspace); +maths_t::EFloatingPointErrorStatus logDeterminant(std::size_t d, + const CSymmetricMatrix& matrix, + double& result, + bool ignoreSingularSubspace) { + return CLogDeterminant>::Type>::compute( + d, matrix, result, ignoreSingularSubspace); } } } diff --git a/lib/maths/CLogNormalMeanPrecConjugate.cc b/lib/maths/CLogNormalMeanPrecConjugate.cc index 4565de40d4..8202bcd851 100644 --- a/lib/maths/CLogNormalMeanPrecConjugate.cc +++ b/lib/maths/CLogNormalMeanPrecConjugate.cc @@ -67,7 +67,9 @@ using TDoubleDoublePrVec = std::vector; //! \brief Adds "weight" x "right operand" to the "left operand". struct SPlusWeight { - double operator()(double lhs, double rhs, double weight = 1.0) const { return lhs + weight * rhs; } + double operator()(double lhs, double rhs, double weight = 1.0) const { + return lhs + weight * rhs; + } }; //! Get the effective location and scale of the sample. @@ -79,8 +81,15 @@ struct SPlusWeight { //! \param[in] shape The gamma prior shape. //! \param[out] location The effective location of sample distribution. //! \param[out] scale The effective scale of sample distribution. -inline void -locationAndScale(double vs, double r, double s, double mean, double precision, double rate, double shape, double& location, double& scale) { +inline void locationAndScale(double vs, + double r, + double s, + double mean, + double precision, + double rate, + double shape, + double& location, + double& scale) { double t = vs == 1.0 ? r : r + std::log(s + vs * (1.0 - s)); double scaledPrecision = t == r ? precision : t / r * precision; double scaledRate = t == r ? rate : t / r * rate; @@ -148,7 +157,8 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, // of this distribution.) for (std::size_t i = 0u; i < samples.size(); ++i) { double n = maths_t::count(weightStyles, weights[i]); - result = aggregate(result, func(CTools::SImproperDistribution(), samples[i] + offset), n); + result = aggregate( + result, func(CTools::SImproperDistribution(), samples[i] + offset), n); } } else if (shape > MINIMUM_LOGNORMAL_SHAPE) { // For large shape the marginal likelihood is very well approximated @@ -174,10 +184,12 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, for (std::size_t i = 0u; i < samples.size(); ++i) { double n = maths_t::count(weightStyles, weights[i]); double varianceScale = - maths_t::seasonalVarianceScale(weightStyles, weights[i]) * maths_t::countVarianceScale(weightStyles, weights[i]); + maths_t::seasonalVarianceScale(weightStyles, weights[i]) * + maths_t::countVarianceScale(weightStyles, weights[i]); double location; double scale; - locationAndScale(varianceScale, r, s, mean, precision, rate, shape, location, scale); + locationAndScale(varianceScale, r, s, mean, precision, rate, + shape, location, scale); boost::math::lognormal_distribution<> lognormal(location, scale); result = aggregate(result, func(lognormal, samples[i] + offset), n); } @@ -191,10 +203,12 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, for (std::size_t i = 0u; i < samples.size(); ++i) { double n = maths_t::count(weightStyles, weights[i]); double varianceScale = - maths_t::seasonalVarianceScale(weightStyles, weights[i]) * maths_t::countVarianceScale(weightStyles, weights[i]); + maths_t::seasonalVarianceScale(weightStyles, weights[i]) * + maths_t::countVarianceScale(weightStyles, weights[i]); double location; double scale; - locationAndScale(varianceScale, r, s, mean, precision, rate, shape, location, scale); + locationAndScale(varianceScale, r, s, mean, precision, rate, + shape, location, scale); CLogTDistribution logt(2.0 * shape, location, scale); result = aggregate(result, func(logt, samples[i] + offset), n); } @@ -227,29 +241,14 @@ class CEvaluateOnSamples : core::CNonCopyable { double precision, double shape, double rate) - : m_WeightStyles(weightStyles), - m_Samples(samples), - m_Weights(weights), - m_IsNonInformative(isNonInformative), - m_Offset(offset), - m_Mean(mean), - m_Precision(precision), - m_Shape(shape), - m_Rate(rate) {} + : m_WeightStyles(weightStyles), m_Samples(samples), m_Weights(weights), + m_IsNonInformative(isNonInformative), m_Offset(offset), m_Mean(mean), + m_Precision(precision), m_Shape(shape), m_Rate(rate) {} bool operator()(double x, double& result) const { - return evaluateFunctionOnJointDistribution(m_WeightStyles, - m_Samples, - m_Weights, - F(), - SPlusWeight(), - m_IsNonInformative, - m_Offset + x, - m_Shape, - m_Rate, - m_Mean, - m_Precision, - result); + return evaluateFunctionOnJointDistribution( + m_WeightStyles, m_Samples, m_Weights, F(), SPlusWeight(), m_IsNonInformative, + m_Offset + x, m_Shape, m_Rate, m_Mean, m_Precision, result); } private: @@ -279,7 +278,8 @@ class CMeanKernel { using TValue = CVectorNx1; public: - CMeanKernel(double m, double p, double a, double b) : m_M(m), m_P(p), m_A(a), m_B(b) {} + CMeanKernel(double m, double p, double a, double b) + : m_M(m), m_P(p), m_A(a), m_B(b) {} bool operator()(double x, TValue& result) const { try { @@ -288,7 +288,8 @@ class CMeanKernel { result(0) = std::exp(m_M + 0.5 / x * (1.0 / m_P + 1.0)) * fx; result(1) = fx; } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to calculate mean kernel: " << e.what() << ", m = " << m_M << ", p = " << m_P << ", a = " << m_A + LOG_ERROR(<< "Failed to calculate mean kernel: " << e.what() + << ", m = " << m_M << ", p = " << m_P << ", a = " << m_A << ", b = " << m_B << ", x = " << x); return false; } @@ -309,7 +310,8 @@ class CVarianceKernel { using TValue = CVectorNx1; public: - CVarianceKernel(double mean, double m, double p, double a, double b) : m_Mean(mean), m_M(m), m_P(p), m_A(a), m_B(b) {} + CVarianceKernel(double mean, double m, double p, double a, double b) + : m_Mean(mean), m_M(m), m_P(p), m_A(a), m_B(b) {} bool operator()(const TValue& x, TValue& result) const { try { @@ -320,7 +322,8 @@ class CVarianceKernel { result(0) = (m * m * (std::exp(1.0 / x(0)) - 1.0) + pow2(m - m_Mean)) * fx; result(1) = fx; } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to calculate mean kernel: " << e.what() << ", m = " << m_M << ", p = " << m_P << ", a = " << m_A + LOG_ERROR(<< "Failed to calculate mean kernel: " << e.what() + << ", m = " << m_M << ", p = " << m_P << ", a = " << m_A << ", b = " << m_B << ", x = " << x); return false; } @@ -349,38 +352,25 @@ class CProbabilityOfLessLikelySamples : core::CNonCopyable { double precision, double shape, double rate) - : m_Calculation(calculation), - m_WeightStyles(weightStyles), - m_Samples(samples), - m_Weights(weights), - m_IsNonInformative(isNonInformative), - m_Offset(offset), - m_Mean(mean), - m_Precision(precision), - m_Shape(shape), - m_Rate(rate), - m_Tail(0) {} + : m_Calculation(calculation), m_WeightStyles(weightStyles), + m_Samples(samples), m_Weights(weights), + m_IsNonInformative(isNonInformative), m_Offset(offset), m_Mean(mean), + m_Precision(precision), m_Shape(shape), m_Rate(rate), m_Tail(0) {} bool operator()(double x, double& result) const { CJointProbabilityOfLessLikelySamples probability; maths_t::ETail tail = maths_t::E_UndeterminedTail; if (!evaluateFunctionOnJointDistribution( - m_WeightStyles, - m_Samples, - m_Weights, - boost::bind(CTools::CProbabilityOfLessLikelySample(m_Calculation), _1, _2, boost::ref(tail)), - CJointProbabilityOfLessLikelySamples::SAddProbability(), - m_IsNonInformative, - m_Offset + x, - m_Shape, - m_Rate, - m_Mean, - m_Precision, - probability) || + m_WeightStyles, m_Samples, m_Weights, + boost::bind(CTools::CProbabilityOfLessLikelySample(m_Calculation), + _1, _2, boost::ref(tail)), + CJointProbabilityOfLessLikelySamples::SAddProbability(), m_IsNonInformative, + m_Offset + x, m_Shape, m_Rate, m_Mean, m_Precision, probability) || !probability.calculate(result)) { LOG_ERROR(<< "Failed to compute probability of less likely samples" - << ", samples = " << core::CContainerPrinter::print(m_Samples) << ", offset = " << m_Offset + x); + << ", samples = " << core::CContainerPrinter::print(m_Samples) + << ", offset = " << m_Offset + x); return false; } @@ -440,18 +430,10 @@ class CLogMarginalLikelihood : core::CNonCopyable { double precision, double shape, double rate) - : m_WeightStyles(weightStyles), - m_Samples(samples), - m_Weights(weights), - m_Offset(offset), - m_Mean(mean), - m_Precision(precision), - m_Shape(shape), - m_Rate(rate), - m_NumberSamples(0.0), - m_Scales(), - m_Constant(0.0), - m_ErrorStatus(maths_t::E_FpNoErrors) { + : m_WeightStyles(weightStyles), m_Samples(samples), m_Weights(weights), + m_Offset(offset), m_Mean(mean), m_Precision(precision), + m_Shape(shape), m_Rate(rate), m_NumberSamples(0.0), m_Scales(), + m_Constant(0.0), m_ErrorStatus(maths_t::E_FpNoErrors) { this->precompute(); } @@ -498,12 +480,14 @@ class CLogMarginalLikelihood : core::CNonCopyable { double weightedNumberSamples = CBasicStatistics::count(logSampleMoments); double logSamplesMean = CBasicStatistics::mean(logSampleMoments); - double logSamplesSquareDeviation = (weightedNumberSamples - 1.0) * CBasicStatistics::variance(logSampleMoments); + double logSamplesSquareDeviation = (weightedNumberSamples - 1.0) * + CBasicStatistics::variance(logSampleMoments); double impliedShape = m_Shape + 0.5 * m_NumberSamples; - double impliedRate = - m_Rate + 0.5 * (logSamplesSquareDeviation + - m_Precision * weightedNumberSamples * pow2(logSamplesMean - m_Mean) / (m_Precision + weightedNumberSamples)); + double impliedRate = m_Rate + 0.5 * (logSamplesSquareDeviation + + m_Precision * weightedNumberSamples * + pow2(logSamplesMean - m_Mean) / + (m_Precision + weightedNumberSamples)); result = m_Constant - impliedShape * std::log(impliedRate) - logSamplesSum; @@ -511,7 +495,9 @@ class CLogMarginalLikelihood : core::CNonCopyable { } //! Retrieve the error status for the integration. - maths_t::EFloatingPointErrorStatus errorStatus() const { return m_ErrorStatus; } + maths_t::EFloatingPointErrorStatus errorStatus() const { + return m_ErrorStatus; + } private: static const double LOG_2_PI; @@ -522,13 +508,15 @@ class CLogMarginalLikelihood : core::CNonCopyable { try { double logVarianceScaleSum = 0.0; - if (maths_t::hasSeasonalVarianceScale(m_WeightStyles, m_Weights) || maths_t::hasCountVarianceScale(m_WeightStyles, m_Weights)) { + if (maths_t::hasSeasonalVarianceScale(m_WeightStyles, m_Weights) || + maths_t::hasCountVarianceScale(m_WeightStyles, m_Weights)) { m_Scales.reserve(m_Weights.size()); double r = m_Rate / m_Shape; double s = std::exp(-r); for (std::size_t i = 0u; i < m_Weights.size(); ++i) { - double varianceScale = maths_t::seasonalVarianceScale(m_WeightStyles, m_Weights[i]) * - maths_t::countVarianceScale(m_WeightStyles, m_Weights[i]); + double varianceScale = + maths_t::seasonalVarianceScale(m_WeightStyles, m_Weights[i]) * + maths_t::countVarianceScale(m_WeightStyles, m_Weights[i]); // Get the scale and shift of the exponentiated Gaussian. if (varianceScale == 1.0) { @@ -547,15 +535,17 @@ class CLogMarginalLikelihood : core::CNonCopyable { for (std::size_t i = 0u; i < m_Weights.size(); ++i) { double n = maths_t::countForUpdate(m_WeightStyles, m_Weights[i]); m_NumberSamples += n; - weightedNumberSamples += n / (m_Scales.empty() ? 1.0 : m_Scales[i].first); + weightedNumberSamples += + n / (m_Scales.empty() ? 1.0 : m_Scales[i].first); } double impliedShape = m_Shape + 0.5 * m_NumberSamples; double impliedPrecision = m_Precision + weightedNumberSamples; - m_Constant = 0.5 * (std::log(m_Precision) - std::log(impliedPrecision)) - 0.5 * m_NumberSamples * LOG_2_PI - - 0.5 * logVarianceScaleSum + boost::math::lgamma(impliedShape) - boost::math::lgamma(m_Shape) + - m_Shape * std::log(m_Rate); + m_Constant = 0.5 * (std::log(m_Precision) - std::log(impliedPrecision)) - + 0.5 * m_NumberSamples * LOG_2_PI - 0.5 * logVarianceScaleSum + + boost::math::lgamma(impliedShape) - + boost::math::lgamma(m_Shape) + m_Shape * std::log(m_Rate); } catch (const std::exception& e) { LOG_ERROR(<< "Error calculating marginal likelihood: " << e.what()); this->addErrorStatus(maths_t::E_FpFailed); @@ -582,7 +572,8 @@ class CLogMarginalLikelihood : core::CNonCopyable { mutable maths_t::EFloatingPointErrorStatus m_ErrorStatus; }; -const double CLogMarginalLikelihood::LOG_2_PI = std::log(boost::math::double_constants::two_pi); +const double CLogMarginalLikelihood::LOG_2_PI = + std::log(boost::math::double_constants::two_pi); //! \brief Wraps up the sample total square deviation of the logs of a //! collection of samples, i.e. @@ -595,8 +586,12 @@ const double CLogMarginalLikelihood::LOG_2_PI = std::log(boost::math::double_con //! [n, n+1]. class CLogSampleSquareDeviation : core::CNonCopyable { public: - CLogSampleSquareDeviation(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights, double mean) - : m_WeightStyles(weightStyles), m_Samples(samples), m_Weights(weights), m_Mean(mean) {} + CLogSampleSquareDeviation(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double mean) + : m_WeightStyles(weightStyles), m_Samples(samples), m_Weights(weights), + m_Mean(mean) {} bool operator()(double x, double& result) const { result = 0.0; @@ -642,40 +637,33 @@ CLogNormalMeanPrecConjugate::CLogNormalMeanPrecConjugate(maths_t::EDataType data double gammaRate, double decayRate, double offsetMargin) - : CPrior(dataType, decayRate), - m_Offset(offset), - m_OffsetMargin(offsetMargin), - m_GaussianMean(gaussianMean), - m_GaussianPrecision(gaussianPrecision), - m_GammaShape(gammaShape), - m_GammaRate(gammaRate) { + : CPrior(dataType, decayRate), m_Offset(offset), m_OffsetMargin(offsetMargin), + m_GaussianMean(gaussianMean), m_GaussianPrecision(gaussianPrecision), + m_GammaShape(gammaShape), m_GammaRate(gammaRate) { } CLogNormalMeanPrecConjugate::CLogNormalMeanPrecConjugate(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser, double offsetMargin) - : CPrior(params.s_DataType, params.s_DecayRate), - m_Offset(0.0), - m_OffsetMargin(offsetMargin), - m_GaussianMean(0.0), - m_GaussianPrecision(0.0), - m_GammaShape(0.0), - m_GammaRate(0.0) { - traverser.traverseSubLevel(boost::bind(&CLogNormalMeanPrecConjugate::acceptRestoreTraverser, this, _1)); + : CPrior(params.s_DataType, params.s_DecayRate), m_Offset(0.0), + m_OffsetMargin(offsetMargin), m_GaussianMean(0.0), + m_GaussianPrecision(0.0), m_GammaShape(0.0), m_GammaRate(0.0) { + traverser.traverseSubLevel( + boost::bind(&CLogNormalMeanPrecConjugate::acceptRestoreTraverser, this, _1)); } bool CLogNormalMeanPrecConjugate::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { do { const std::string& name = traverser.name(); - RESTORE_SETUP_TEARDOWN( - DECAY_RATE_TAG, double decayRate, core::CStringUtils::stringToType(traverser.value(), decayRate), this->decayRate(decayRate)) + RESTORE_SETUP_TEARDOWN(DECAY_RATE_TAG, double decayRate, + core::CStringUtils::stringToType(traverser.value(), decayRate), + this->decayRate(decayRate)) RESTORE_BUILT_IN(OFFSET_TAG, m_Offset) RESTORE_BUILT_IN(GAUSSIAN_MEAN_TAG, m_GaussianMean) RESTORE_BUILT_IN(GAUSSIAN_PRECISION_TAG, m_GaussianPrecision) RESTORE_BUILT_IN(GAMMA_SHAPE_TAG, m_GammaShape) RESTORE_BUILT_IN(GAMMA_RATE_TAG, m_GammaRate) - RESTORE_SETUP_TEARDOWN(NUMBER_SAMPLES_TAG, - double numberSamples, + RESTORE_SETUP_TEARDOWN(NUMBER_SAMPLES_TAG, double numberSamples, core::CStringUtils::stringToType(traverser.value(), numberSamples), this->numberSamples(numberSamples)) } while (traverser.next()); @@ -684,15 +672,13 @@ bool CLogNormalMeanPrecConjugate::acceptRestoreTraverser(core::CStateRestoreTrav } CLogNormalMeanPrecConjugate -CLogNormalMeanPrecConjugate::nonInformativePrior(maths_t::EDataType dataType, double offset, double decayRate, double offsetMargin) { - return CLogNormalMeanPrecConjugate(dataType, - offset + offsetMargin, - NON_INFORMATIVE_MEAN, - NON_INFORMATIVE_PRECISION, - NON_INFORMATIVE_SHAPE, - NON_INFORMATIVE_RATE, - decayRate, - offsetMargin); +CLogNormalMeanPrecConjugate::nonInformativePrior(maths_t::EDataType dataType, + double offset, + double decayRate, + double offsetMargin) { + return CLogNormalMeanPrecConjugate( + dataType, offset + offsetMargin, NON_INFORMATIVE_MEAN, NON_INFORMATIVE_PRECISION, + NON_INFORMATIVE_SHAPE, NON_INFORMATIVE_RATE, decayRate, offsetMargin); } CLogNormalMeanPrecConjugate::EPrior CLogNormalMeanPrecConjugate::type() const { @@ -704,7 +690,8 @@ CLogNormalMeanPrecConjugate* CLogNormalMeanPrecConjugate::clone() const { } void CLogNormalMeanPrecConjugate::setToNonInformative(double offset, double decayRate) { - *this = nonInformativePrior(this->dataType(), offset + this->offsetMargin(), decayRate, this->offsetMargin()); + *this = nonInformativePrior(this->dataType(), offset + this->offsetMargin(), + decayRate, this->offsetMargin()); } double CLogNormalMeanPrecConjugate::offsetMargin() const { @@ -715,8 +702,9 @@ bool CLogNormalMeanPrecConjugate::needsOffset() const { return true; } -double -CLogNormalMeanPrecConjugate::adjustOffset(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights) { +double CLogNormalMeanPrecConjugate::adjustOffset(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights) { COffsetCost cost(*this); CApplyOffset apply(*this); return this->adjustOffsetWithCost(weightStyles, samples, weights, cost, apply); @@ -734,7 +722,8 @@ void CLogNormalMeanPrecConjugate::addSamples(const TWeightStyleVec& weightStyles } if (samples.size() != weights.size()) { - LOG_ERROR(<< "Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '" + LOG_ERROR(<< "Mismatch in samples '" + << core::CContainerPrinter::print(samples) << "' and weights '" << core::CContainerPrinter::print(weights) << "'"); return; } @@ -816,10 +805,13 @@ void CLogNormalMeanPrecConjugate::addSamples(const TWeightStyleVec& weightStyles for (std::size_t i = 0u; i < samples.size(); ++i) { double n = maths_t::countForUpdate(weightStyles, weights[i]); double varianceScale = - maths_t::seasonalVarianceScale(weightStyles, weights[i]) * maths_t::countVarianceScale(weightStyles, weights[i]); + maths_t::seasonalVarianceScale(weightStyles, weights[i]) * + maths_t::countVarianceScale(weightStyles, weights[i]); double x = samples[i] + m_Offset; numberSamples += n; - double t = varianceScale == 1.0 ? r : r + std::log(s + varianceScale * (1.0 - s)); + double t = varianceScale == 1.0 + ? r + : r + std::log(s + varianceScale * (1.0 - s)); double shift = (r - t) / 2.0; double scale = r == t ? 1.0 : t / r; scaledSamples[i] = scale; @@ -830,36 +822,46 @@ void CLogNormalMeanPrecConjugate::addSamples(const TWeightStyleVec& weightStyles scaledNumberSamples = CBasicStatistics::count(logSamplesMean_); logSamplesMean = CBasicStatistics::mean(logSamplesMean_); - double mean = - (m_GaussianPrecision * m_GaussianMean + scaledNumberSamples * logSamplesMean) / (m_GaussianPrecision + scaledNumberSamples); + double mean = (m_GaussianPrecision * m_GaussianMean + + scaledNumberSamples * logSamplesMean) / + (m_GaussianPrecision + scaledNumberSamples); for (std::size_t i = 0u; i < scaledSamples.size(); ++i) { double scale = scaledSamples[i]; scaledSamples[i] = - scale == 1.0 ? samples[i] + m_Offset : std::exp(mean + (std::log(samples[i] + m_Offset) - mean) / std::sqrt(scale)); + scale == 1.0 + ? samples[i] + m_Offset + : std::exp(mean + (std::log(samples[i] + m_Offset) - mean) / + std::sqrt(scale)); } - detail::CLogSampleSquareDeviation deviationFunction(weightStyles, scaledSamples, weights, logSamplesMean); - CIntegration::gaussLegendre(deviationFunction, 0.0, 1.0, logSamplesSquareDeviation); + detail::CLogSampleSquareDeviation deviationFunction( + weightStyles, scaledSamples, weights, logSamplesMean); + CIntegration::gaussLegendre( + deviationFunction, 0.0, 1.0, logSamplesSquareDeviation); } else { TMeanVarAccumulator logSamplesMoments; for (std::size_t i = 0u; i < samples.size(); ++i) { double n = maths_t::countForUpdate(weightStyles, weights[i]); double varianceScale = - maths_t::seasonalVarianceScale(weightStyles, weights[i]) * maths_t::countVarianceScale(weightStyles, weights[i]); + maths_t::seasonalVarianceScale(weightStyles, weights[i]) * + maths_t::countVarianceScale(weightStyles, weights[i]); double x = samples[i] + m_Offset; if (x <= 0.0) { LOG_ERROR(<< "Discarding " << x << " it's not log-normal"); continue; } numberSamples += n; - double t = varianceScale == 1.0 ? r : r + std::log(s + varianceScale * (1.0 - s)); + double t = varianceScale == 1.0 + ? r + : r + std::log(s + varianceScale * (1.0 - s)); double scale = r == t ? 1.0 : t / r; double shift = (r - t) / 2.0; logSamplesMoments.add(std::log(x) - shift, n / scale); } scaledNumberSamples = CBasicStatistics::count(logSamplesMoments); logSamplesMean = CBasicStatistics::mean(logSamplesMoments); - logSamplesSquareDeviation = (scaledNumberSamples - 1.0) * CBasicStatistics::variance(logSamplesMoments); + logSamplesSquareDeviation = (scaledNumberSamples - 1.0) * + CBasicStatistics::variance(logSamplesMoments); } } catch (const std::exception& e) { LOG_ERROR(<< "Failed to update likelihood: " << e.what()); @@ -867,11 +869,13 @@ void CLogNormalMeanPrecConjugate::addSamples(const TWeightStyleVec& weightStyles } m_GammaShape += 0.5 * numberSamples; - m_GammaRate += 0.5 * (logSamplesSquareDeviation + m_GaussianPrecision * scaledNumberSamples * pow2(logSamplesMean - m_GaussianMean) / - (m_GaussianPrecision + scaledNumberSamples)); + m_GammaRate += 0.5 * (logSamplesSquareDeviation + + m_GaussianPrecision * scaledNumberSamples * + pow2(logSamplesMean - m_GaussianMean) / + (m_GaussianPrecision + scaledNumberSamples)); - m_GaussianMean = - (m_GaussianPrecision * m_GaussianMean + scaledNumberSamples * logSamplesMean) / (m_GaussianPrecision + scaledNumberSamples); + m_GaussianMean = (m_GaussianPrecision * m_GaussianMean + scaledNumberSamples * logSamplesMean) / + (m_GaussianPrecision + scaledNumberSamples); m_GaussianPrecision += scaledNumberSamples; // If the coefficient of variation of the data is too small we run @@ -902,16 +906,19 @@ void CLogNormalMeanPrecConjugate::addSamples(const TWeightStyleVec& weightStyles double minimumRate = (2.0 * m_GammaShape - 1.0) * pow2(MINIMUM_COEFFICIENT_OF_VARIATION); if (m_GammaRate < minimumRate) { - double extraVariation = (minimumRate - m_GammaRate) / (m_GaussianPrecision - 1.0); + double extraVariation = (minimumRate - m_GammaRate) / + (m_GaussianPrecision - 1.0); m_GammaRate = minimumRate; m_GaussianMean -= 0.5 * extraVariation; } } - LOG_TRACE(<< "logSamplesMean = " << logSamplesMean << ", logSamplesSquareDeviation = " << logSamplesSquareDeviation - << ", numberSamples = " << numberSamples << ", scaledNumberSamples = " << scaledNumberSamples); - LOG_TRACE(<< "m_GammaShape = " << m_GammaShape << ", m_GammaRate = " << m_GammaRate << ", m_GaussianMean = " << m_GaussianMean - << ", m_GaussianPrecision = " << m_GaussianPrecision << ", m_Offset = " << m_Offset); + LOG_TRACE(<< "logSamplesMean = " << logSamplesMean << ", logSamplesSquareDeviation = " + << logSamplesSquareDeviation << ", numberSamples = " << numberSamples + << ", scaledNumberSamples = " << scaledNumberSamples); + LOG_TRACE(<< "m_GammaShape = " << m_GammaShape << ", m_GammaRate = " << m_GammaRate + << ", m_GaussianMean = " << m_GaussianMean << ", m_GaussianPrecision = " + << m_GaussianPrecision << ", m_Offset = " << m_Offset); if (this->isBad()) { LOG_ERROR(<< "Update failed (" << this->debug() << ")"); @@ -947,18 +954,22 @@ void CLogNormalMeanPrecConjugate::propagateForwardsByTime(double time) { // // Thus the mean is unchanged and variance is increased by 1 / f. - double factor = std::min((alpha * m_GammaShape + beta * NON_INFORMATIVE_SHAPE) / m_GammaShape, 1.0); + double factor = std::min( + (alpha * m_GammaShape + beta * NON_INFORMATIVE_SHAPE) / m_GammaShape, 1.0); m_GammaShape *= factor; m_GammaRate *= factor; this->numberSamples(this->numberSamples() * alpha); - LOG_TRACE(<< "time = " << time << ", alpha = " << alpha << ", m_GaussianPrecision = " << m_GaussianPrecision - << ", m_GammaShape = " << m_GammaShape << ", m_GammaRate = " << m_GammaRate << ", numberSamples = " << this->numberSamples()); + LOG_TRACE(<< "time = " << time << ", alpha = " << alpha + << ", m_GaussianPrecision = " << m_GaussianPrecision + << ", m_GammaShape = " << m_GammaShape << ", m_GammaRate = " << m_GammaRate + << ", numberSamples = " << this->numberSamples()); } -CLogNormalMeanPrecConjugate::TDoubleDoublePr CLogNormalMeanPrecConjugate::marginalLikelihoodSupport() const { +CLogNormalMeanPrecConjugate::TDoubleDoublePr +CLogNormalMeanPrecConjugate::marginalLikelihoodSupport() const { return std::make_pair(-m_Offset, boost::numeric::bounds::highest()); } @@ -966,7 +977,8 @@ double CLogNormalMeanPrecConjugate::marginalLikelihoodMean() const { return this->isInteger() ? this->mean() - 0.5 : this->mean(); } -double CLogNormalMeanPrecConjugate::marginalLikelihoodMode(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights) const { +double CLogNormalMeanPrecConjugate::marginalLikelihoodMode(const TWeightStyleVec& weightStyles, + const TDouble4Vec& weights) const { if (this->isNonInformative()) { return std::exp(m_GaussianMean) - m_Offset; } @@ -977,14 +989,18 @@ double CLogNormalMeanPrecConjugate::marginalLikelihoodMode(const TWeightStyleVec double varianceScale = 1.0; try { - varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) * maths_t::countVarianceScale(weightStyles, weights); - } catch (const std::exception& e) { LOG_ERROR(<< "Failed to get variance scale: " << e.what()); } + varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) * + maths_t::countVarianceScale(weightStyles, weights); + } catch (const std::exception& e) { + LOG_ERROR(<< "Failed to get variance scale: " << e.what()); + } try { double r = m_GammaRate / m_GammaShape; double s = std::exp(-r); double location; double scale; - detail::locationAndScale(varianceScale, r, s, m_GaussianMean, m_GaussianPrecision, m_GammaRate, m_GammaShape, location, scale); + detail::locationAndScale(varianceScale, r, s, m_GaussianMean, m_GaussianPrecision, + m_GammaRate, m_GammaShape, location, scale); LOG_TRACE(<< "location = " << location << ", scale = " << scale); if (m_GammaShape > MINIMUM_LOGNORMAL_SHAPE) { boost::math::lognormal_distribution<> logNormal(location, scale); @@ -994,8 +1010,9 @@ double CLogNormalMeanPrecConjugate::marginalLikelihoodMode(const TWeightStyleVec double result = mode(logt) - m_Offset - (this->isInteger() ? 0.5 : 0.0); return result; } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to compute marginal likelihood mode: " << e.what() << ", gaussian mean = " << m_GaussianMean - << ", gaussian precision = " << m_GaussianPrecision << ", gamma rate = " << m_GammaRate + LOG_ERROR(<< "Failed to compute marginal likelihood mode: " << e.what() + << ", gaussian mean = " << m_GaussianMean << ", gaussian precision = " + << m_GaussianPrecision << ", gamma rate = " << m_GammaRate << ", gamma shape = " << m_GammaShape); } @@ -1005,7 +1022,8 @@ double CLogNormalMeanPrecConjugate::marginalLikelihoodMode(const TWeightStyleVec return (normalPrecision == 0.0 ? 0.0 : std::exp(normalMean - 1.0 / normalPrecision)) - m_Offset; } -double CLogNormalMeanPrecConjugate::marginalLikelihoodVariance(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights) const { +double CLogNormalMeanPrecConjugate::marginalLikelihoodVariance(const TWeightStyleVec& weightStyles, + const TDouble4Vec& weights) const { if (this->isNonInformative()) { return boost::numeric::bounds::highest(); } @@ -1032,31 +1050,41 @@ double CLogNormalMeanPrecConjugate::marginalLikelihoodVariance(const TWeightStyl double varianceScale = 1.0; try { - varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) * maths_t::countVarianceScale(weightStyles, weights); - } catch (const std::exception& e) { LOG_ERROR(<< "Failed to get variance scale: " << e.what()); } - double vh = std::exp(2.0 * m_GaussianMean + m_GammaRate / m_GammaShape * (2.0 / m_GaussianPrecision + 1.0)) * + varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) * + maths_t::countVarianceScale(weightStyles, weights); + } catch (const std::exception& e) { + LOG_ERROR(<< "Failed to get variance scale: " << e.what()); + } + double vh = std::exp(2.0 * m_GaussianMean + m_GammaRate / m_GammaShape * + (2.0 / m_GaussianPrecision + 1.0)) * (std::exp(m_GammaRate / m_GammaShape) - 1.0); if (m_GammaShape < MINIMUM_LOGNORMAL_SHAPE) { try { - detail::CVarianceKernel f(this->marginalLikelihoodMean(), m_GaussianMean, m_GaussianPrecision, m_GammaShape, m_GammaRate); + detail::CVarianceKernel f(this->marginalLikelihoodMean(), m_GaussianMean, + m_GaussianPrecision, m_GammaShape, m_GammaRate); boost::math::gamma_distribution<> gamma(m_GammaShape, 1.0 / m_GammaRate); TDoubleVec a(2); TDoubleVec b(2); a[0] = boost::math::quantile(gamma, 0.03); b[0] = boost::math::quantile(gamma, 0.97); - boost::math::normal_distribution<> normal(m_GaussianMean, 1.0 / a[0] / m_GaussianPrecision); + boost::math::normal_distribution<> normal( + m_GaussianMean, 1.0 / a[0] / m_GaussianPrecision); a[1] = boost::math::quantile(normal, 0.03); b[1] = boost::math::quantile(normal, 0.97); detail::CVarianceKernel::TValue variance; - if (CIntegration::sparseGaussLegendre(f, a, b, variance)) { + if (CIntegration::sparseGaussLegendre( + f, a, b, variance)) { double vl = variance(0) / variance(1); - double alpha = std::min(2.0 * (1.0 - m_GammaShape / MINIMUM_LOGNORMAL_SHAPE), 1.0); + double alpha = std::min( + 2.0 * (1.0 - m_GammaShape / MINIMUM_LOGNORMAL_SHAPE), 1.0); return varianceScale * alpha * vl + (1.0 - alpha) * vh; } - } catch (const std::exception& e) { LOG_ERROR(<< "Failed to calculate variance: " << e.what()); } + } catch (const std::exception& e) { + LOG_ERROR(<< "Failed to calculate variance: " << e.what()); + } } return varianceScale * vh; } @@ -1075,38 +1103,48 @@ CLogNormalMeanPrecConjugate::marginalLikelihoodConfidenceInterval(double percent // We use the fact that the marginal likelihood is a log-t distribution. try { - double varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) * maths_t::countVarianceScale(weightStyles, weights); + double varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) * + maths_t::countVarianceScale(weightStyles, weights); double r = m_GammaRate / m_GammaShape; double s = std::exp(-r); double location; double scale; - detail::locationAndScale(varianceScale, r, s, m_GaussianMean, m_GaussianPrecision, m_GammaRate, m_GammaShape, location, scale); + detail::locationAndScale(varianceScale, r, s, m_GaussianMean, m_GaussianPrecision, + m_GammaRate, m_GammaShape, location, scale); LOG_TRACE(<< "location = " << location << ", scale = " << scale); if (m_GammaShape > MINIMUM_LOGNORMAL_SHAPE) { boost::math::lognormal_distribution<> logNormal(location, scale); - double x1 = boost::math::quantile(logNormal, (1.0 - percentage) / 2.0) - m_Offset - (this->isInteger() ? 0.5 : 0.0); + double x1 = boost::math::quantile(logNormal, (1.0 - percentage) / 2.0) - + m_Offset - (this->isInteger() ? 0.5 : 0.0); double x2 = percentage > 0.0 - ? boost::math::quantile(logNormal, (1.0 + percentage) / 2.0) - m_Offset - (this->isInteger() ? 0.5 : 0.0) + ? boost::math::quantile(logNormal, (1.0 + percentage) / 2.0) - + m_Offset - (this->isInteger() ? 0.5 : 0.0) : x1; LOG_TRACE(<< "x1 = " << x1 << ", x2 = " << x2); return std::make_pair(x1, x2); } CLogTDistribution logt(2.0 * m_GammaShape, location, scale); - double x1 = quantile(logt, (1.0 - percentage) / 2.0) - m_Offset - (this->isInteger() ? 0.5 : 0.0); - double x2 = percentage > 0.0 ? quantile(logt, (1.0 + percentage) / 2.0) - m_Offset - (this->isInteger() ? 0.5 : 0.0) : x1; + double x1 = quantile(logt, (1.0 - percentage) / 2.0) - m_Offset - + (this->isInteger() ? 0.5 : 0.0); + double x2 = percentage > 0.0 ? quantile(logt, (1.0 + percentage) / 2.0) - + m_Offset - (this->isInteger() ? 0.5 : 0.0) + : x1; LOG_TRACE(<< "x1 = " << x1 << ", x2 = " << x2); return std::make_pair(x1, x2); - } catch (const std::exception& e) { LOG_ERROR(<< "Failed to compute confidence interval: " << e.what()); } + } catch (const std::exception& e) { + LOG_ERROR(<< "Failed to compute confidence interval: " << e.what()); + } return this->marginalLikelihoodSupport(); } -maths_t::EFloatingPointErrorStatus CLogNormalMeanPrecConjugate::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, - double& result) const { +maths_t::EFloatingPointErrorStatus +CLogNormalMeanPrecConjugate::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& result) const { result = 0.0; if (samples.empty()) { @@ -1115,7 +1153,8 @@ maths_t::EFloatingPointErrorStatus CLogNormalMeanPrecConjugate::jointLogMarginal } if (samples.size() != weights.size()) { - LOG_ERROR(<< "Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '" + LOG_ERROR(<< "Mismatch in samples '" + << core::CContainerPrinter::print(samples) << "' and weights '" << core::CContainerPrinter::print(weights) << "'"); return maths_t::E_FpFailed; } @@ -1134,15 +1173,17 @@ maths_t::EFloatingPointErrorStatus CLogNormalMeanPrecConjugate::jointLogMarginal } detail::CLogMarginalLikelihood logMarginalLikelihood( - weightStyles, samples, weights, m_Offset, m_GaussianMean, m_GaussianPrecision, m_GammaShape, m_GammaRate); + weightStyles, samples, weights, m_Offset, m_GaussianMean, + m_GaussianPrecision, m_GammaShape, m_GammaRate); if (this->isInteger()) { - CIntegration::logGaussLegendre(logMarginalLikelihood, 0.0, 1.0, result); + CIntegration::logGaussLegendre( + logMarginalLikelihood, 0.0, 1.0, result); } else { logMarginalLikelihood(0.0, result); } - maths_t::EFloatingPointErrorStatus status = - static_cast(logMarginalLikelihood.errorStatus() | CMathsFuncs::fpStatus(result)); + maths_t::EFloatingPointErrorStatus status = static_cast( + logMarginalLikelihood.errorStatus() | CMathsFuncs::fpStatus(result)); if (status & maths_t::E_FpFailed) { LOG_ERROR(<< "Failed to compute log likelihood (" << this->debug() << ")"); LOG_ERROR(<< "samples = " << core::CContainerPrinter::print(samples)); @@ -1155,7 +1196,8 @@ maths_t::EFloatingPointErrorStatus CLogNormalMeanPrecConjugate::jointLogMarginal return status; } -void CLogNormalMeanPrecConjugate::sampleMarginalLikelihood(std::size_t numberSamples, TDouble1Vec& samples) const { +void CLogNormalMeanPrecConjugate::sampleMarginalLikelihood(std::size_t numberSamples, + TDouble1Vec& samples) const { samples.clear(); if (numberSamples == 0 || this->numberSamples() == 0.0) { @@ -1206,13 +1248,15 @@ void CLogNormalMeanPrecConjugate::sampleMarginalLikelihood(std::size_t numberSam samples.reserve(numberSamples); - double scale = std::sqrt((m_GaussianPrecision + 1.0) / m_GaussianPrecision * m_GammaRate / m_GammaShape); + double scale = std::sqrt((m_GaussianPrecision + 1.0) / m_GaussianPrecision * + m_GammaRate / m_GammaShape); try { boost::math::lognormal_distribution<> lognormal(m_GaussianMean, scale); double mean = boost::math::mean(lognormal); - LOG_TRACE(<< "mean = " << mean << ", scale = " << scale << ", numberSamples = " << numberSamples); + LOG_TRACE(<< "mean = " << mean << ", scale = " << scale + << ", numberSamples = " << numberSamples); TDoubleDoublePr support = this->marginalLikelihoodSupport(); @@ -1222,11 +1266,14 @@ void CLogNormalMeanPrecConjugate::sampleMarginalLikelihood(std::size_t numberSam double q = static_cast(i) / static_cast(numberSamples); double xq = std::log(boost::math::quantile(lognormal, q)); - double z = (xq - m_GaussianMean - scale * scale) / scale / boost::math::double_constants::root_two; + double z = (xq - m_GaussianMean - scale * scale) / scale / + boost::math::double_constants::root_two; double partialExpectation = mean * (1.0 + boost::math::erf(z)) / 2.0; - double sample = static_cast(numberSamples) * (partialExpectation - lastPartialExpectation) - m_Offset; + double sample = static_cast(numberSamples) * + (partialExpectation - lastPartialExpectation) - + m_Offset; LOG_TRACE(<< "sample = " << sample); @@ -1234,25 +1281,29 @@ void CLogNormalMeanPrecConjugate::sampleMarginalLikelihood(std::size_t numberSam if (sample >= support.first && sample <= support.second) { samples.push_back(sample); } else { - LOG_ERROR(<< "Sample out of bounds: sample = " << sample - m_Offset << ", gaussianMean = " << m_GaussianMean - << ", scale = " << scale << ", q = " << q << ", x(q) = " << xq << ", mean = " << mean); + LOG_ERROR(<< "Sample out of bounds: sample = " << sample - m_Offset + << ", gaussianMean = " << m_GaussianMean << ", scale = " << scale + << ", q = " << q << ", x(q) = " << xq << ", mean = " << mean); } lastPartialExpectation = partialExpectation; } - double sample = static_cast(numberSamples) * (mean - lastPartialExpectation) - m_Offset; + double sample = + static_cast(numberSamples) * (mean - lastPartialExpectation) - m_Offset; LOG_TRACE(<< "sample = " << sample); if (sample >= support.first && sample <= support.second) { samples.push_back(sample); } else { - LOG_ERROR(<< "Sample out of bounds: sample = " << sample << ", gaussianMean = " << m_GaussianMean << ", scale = " << scale - << ", mean = " << mean); + LOG_ERROR(<< "Sample out of bounds: sample = " << sample + << ", gaussianMean = " << m_GaussianMean + << ", scale = " << scale << ", mean = " << mean); } } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to sample: " << e.what() << ", gaussianMean " << m_GaussianMean << ", scale = " << scale); + LOG_ERROR(<< "Failed to sample: " << e.what() << ", gaussianMean " + << m_GaussianMean << ", scale = " << scale); } } @@ -1265,16 +1316,19 @@ bool CLogNormalMeanPrecConjugate::minusLogJointCdf(const TWeightStyleVec& weight lowerBound = upperBound = 0.0; - TMinusLogCdf minusLogCdf( - weightStyles, samples, weights, this->isNonInformative(), m_Offset, m_GaussianMean, m_GaussianPrecision, m_GammaShape, m_GammaRate); + TMinusLogCdf minusLogCdf(weightStyles, samples, weights, + this->isNonInformative(), m_Offset, m_GaussianMean, + m_GaussianPrecision, m_GammaShape, m_GammaRate); if (this->isInteger()) { // If the data are discrete we compute the approximate expectation // w.r.t. to the hidden offset of the samples Z, which is uniform // on the interval [0,1]. double value; - if (!CIntegration::logGaussLegendre(minusLogCdf, 0.0, 1.0, value)) { - LOG_ERROR(<< "Failed computing c.d.f. for " << core::CContainerPrinter::print(samples)); + if (!CIntegration::logGaussLegendre( + minusLogCdf, 0.0, 1.0, value)) { + LOG_ERROR(<< "Failed computing c.d.f. for " + << core::CContainerPrinter::print(samples)); return false; } @@ -1284,7 +1338,8 @@ bool CLogNormalMeanPrecConjugate::minusLogJointCdf(const TWeightStyleVec& weight double value; if (!minusLogCdf(0.0, value)) { - LOG_ERROR(<< "Failed computing c.d.f for " << core::CContainerPrinter::print(samples)); + LOG_ERROR(<< "Failed computing c.d.f for " + << core::CContainerPrinter::print(samples)); return false; } @@ -1302,15 +1357,18 @@ bool CLogNormalMeanPrecConjugate::minusLogJointCdfComplement(const TWeightStyleV lowerBound = upperBound = 0.0; TMinusLogCdfComplement minusLogCdfComplement( - weightStyles, samples, weights, this->isNonInformative(), m_Offset, m_GaussianMean, m_GaussianPrecision, m_GammaShape, m_GammaRate); + weightStyles, samples, weights, this->isNonInformative(), m_Offset, + m_GaussianMean, m_GaussianPrecision, m_GammaShape, m_GammaRate); if (this->isInteger()) { // If the data are discrete we compute the approximate expectation // w.r.t. to the hidden offset of the samples Z, which is uniform // on the interval [0,1]. double value; - if (!CIntegration::logGaussLegendre(minusLogCdfComplement, 0.0, 1.0, value)) { - LOG_ERROR(<< "Failed computing c.d.f. complement for " << core::CContainerPrinter::print(samples)); + if (!CIntegration::logGaussLegendre( + minusLogCdfComplement, 0.0, 1.0, value)) { + LOG_ERROR(<< "Failed computing c.d.f. complement for " + << core::CContainerPrinter::print(samples)); return false; } @@ -1320,7 +1378,8 @@ bool CLogNormalMeanPrecConjugate::minusLogJointCdfComplement(const TWeightStyleV double value; if (!minusLogCdfComplement(0.0, value)) { - LOG_ERROR(<< "Failed computing c.d.f complement for " << core::CContainerPrinter::print(samples)); + LOG_ERROR(<< "Failed computing c.d.f complement for " + << core::CContainerPrinter::print(samples)); return false; } @@ -1328,34 +1387,30 @@ bool CLogNormalMeanPrecConjugate::minusLogJointCdfComplement(const TWeightStyleV return true; } -bool CLogNormalMeanPrecConjugate::probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, - double& lowerBound, - double& upperBound, - maths_t::ETail& tail) const { +bool CLogNormalMeanPrecConjugate::probabilityOfLessLikelySamples( + maths_t::EProbabilityCalculation calculation, + const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound, + maths_t::ETail& tail) const { lowerBound = upperBound = 0.0; tail = maths_t::E_UndeterminedTail; - detail::CProbabilityOfLessLikelySamples probability(calculation, - weightStyles, - samples, - weights, - this->isNonInformative(), - m_Offset, - m_GaussianMean, - m_GaussianPrecision, - m_GammaShape, - m_GammaRate); + detail::CProbabilityOfLessLikelySamples probability( + calculation, weightStyles, samples, weights, this->isNonInformative(), + m_Offset, m_GaussianMean, m_GaussianPrecision, m_GammaShape, m_GammaRate); if (this->isInteger()) { // If the data are discrete we compute the approximate expectation // w.r.t. to the hidden offset of the samples Z, which is uniform // on the interval [0,1]. double value; - if (!CIntegration::gaussLegendre(probability, 0.0, 1.0, value)) { - LOG_ERROR(<< "Failed computing probability for " << core::CContainerPrinter::print(samples)); + if (!CIntegration::gaussLegendre(probability, 0.0, + 1.0, value)) { + LOG_ERROR(<< "Failed computing probability for " + << core::CContainerPrinter::print(samples)); return false; } @@ -1367,7 +1422,8 @@ bool CLogNormalMeanPrecConjugate::probabilityOfLessLikelySamples(maths_t::EProba double value; if (!probability(0.0, value)) { - LOG_ERROR(<< "Failed computing probability for " << core::CContainerPrinter::print(samples)); + LOG_ERROR(<< "Failed computing probability for " + << core::CContainerPrinter::print(samples)); return false; } @@ -1388,7 +1444,8 @@ void CLogNormalMeanPrecConjugate::print(const std::string& indent, std::string& return; } - double scale = std::sqrt((m_GaussianPrecision + 1.0) / m_GaussianPrecision * m_GammaRate / m_GammaShape); + double scale = std::sqrt((m_GaussianPrecision + 1.0) / m_GaussianPrecision * + m_GammaRate / m_GammaShape); try { boost::math::lognormal_distribution<> lognormal(m_GaussianMean, scale); double mean = boost::math::mean(lognormal); @@ -1444,9 +1501,11 @@ std::string CLogNormalMeanPrecConjugate::printJointDensityFunction() const { y = yStart; for (unsigned int j = 0u; j < POINTS; ++j, y += yIncrement) { double conditionalPrecision = m_GaussianPrecision * x; - boost::math::normal_distribution<> conditionalGaussian(m_GaussianMean, 1.0 / std::sqrt(conditionalPrecision)); + boost::math::normal_distribution<> conditionalGaussian( + m_GaussianMean, 1.0 / std::sqrt(conditionalPrecision)); - pdf << (CTools::safePdf(gamma, x) * CTools::safePdf(conditionalGaussian, y)) << " "; + pdf << (CTools::safePdf(gamma, x) * CTools::safePdf(conditionalGaussian, y)) + << " "; } pdf << core_t::LINE_ENDING; } @@ -1480,10 +1539,12 @@ void CLogNormalMeanPrecConjugate::acceptPersistInserter(core::CStatePersistInser inserter.insertValue(DECAY_RATE_TAG, this->decayRate(), core::CIEEE754::E_SinglePrecision); inserter.insertValue(OFFSET_TAG, m_Offset, core::CIEEE754::E_SinglePrecision); inserter.insertValue(GAUSSIAN_MEAN_TAG, m_GaussianMean, core::CIEEE754::E_SinglePrecision); - inserter.insertValue(GAUSSIAN_PRECISION_TAG, m_GaussianPrecision, core::CIEEE754::E_SinglePrecision); + inserter.insertValue(GAUSSIAN_PRECISION_TAG, m_GaussianPrecision, + core::CIEEE754::E_SinglePrecision); inserter.insertValue(GAMMA_SHAPE_TAG, m_GammaShape, core::CIEEE754::E_SinglePrecision); inserter.insertValue(GAMMA_RATE_TAG, m_GammaRate, core::CIEEE754::E_SinglePrecision); - inserter.insertValue(NUMBER_SAMPLES_TAG, this->numberSamples(), core::CIEEE754::E_SinglePrecision); + inserter.insertValue(NUMBER_SAMPLES_TAG, this->numberSamples(), + core::CIEEE754::E_SinglePrecision); } double CLogNormalMeanPrecConjugate::normalMean() const { @@ -1499,15 +1560,18 @@ double CLogNormalMeanPrecConjugate::normalPrecision() const { boost::math::gamma_distribution<> gamma(m_GammaShape, 1.0 / m_GammaRate); return boost::math::mean(gamma); } catch (std::exception& e) { - LOG_ERROR(<< "Failed to create prior: " << e.what() << " shape = " << m_GammaShape << ", rate = " << m_GammaRate); + LOG_ERROR(<< "Failed to create prior: " << e.what() + << " shape = " << m_GammaShape << ", rate = " << m_GammaRate); } return 0.0; } -CLogNormalMeanPrecConjugate::TDoubleDoublePr CLogNormalMeanPrecConjugate::confidenceIntervalNormalMean(double percentage) const { +CLogNormalMeanPrecConjugate::TDoubleDoublePr +CLogNormalMeanPrecConjugate::confidenceIntervalNormalMean(double percentage) const { if (this->isNonInformative()) { - return std::make_pair(boost::numeric::bounds::lowest(), boost::numeric::bounds::highest()); + return std::make_pair(boost::numeric::bounds::lowest(), + boost::numeric::bounds::highest()); } // Compute the symmetric confidence interval around the median of the @@ -1545,9 +1609,11 @@ CLogNormalMeanPrecConjugate::TDoubleDoublePr CLogNormalMeanPrecConjugate::confid return std::make_pair(xLower, xUpper); } -CLogNormalMeanPrecConjugate::TDoubleDoublePr CLogNormalMeanPrecConjugate::confidenceIntervalNormalPrecision(double percentage) const { +CLogNormalMeanPrecConjugate::TDoubleDoublePr +CLogNormalMeanPrecConjugate::confidenceIntervalNormalPrecision(double percentage) const { if (this->isNonInformative()) { - return std::make_pair(boost::numeric::bounds::lowest(), boost::numeric::bounds::highest()); + return std::make_pair(boost::numeric::bounds::lowest(), + boost::numeric::bounds::highest()); } percentage /= 100.0; @@ -1557,13 +1623,17 @@ CLogNormalMeanPrecConjugate::TDoubleDoublePr CLogNormalMeanPrecConjugate::confid // The marginal prior distribution for the precision is gamma. boost::math::gamma_distribution<> gamma(m_GammaShape, 1.0 / m_GammaRate); - return std::make_pair(boost::math::quantile(gamma, lowerPercentile), boost::math::quantile(gamma, upperPercentile)); + return std::make_pair(boost::math::quantile(gamma, lowerPercentile), + boost::math::quantile(gamma, upperPercentile)); } -bool CLogNormalMeanPrecConjugate::equalTolerance(const CLogNormalMeanPrecConjugate& rhs, const TEqualWithTolerance& equal) const { - LOG_DEBUG(<< m_GaussianMean << " " << rhs.m_GaussianMean << ", " << m_GaussianPrecision << " " << rhs.m_GaussianPrecision << ", " - << m_GammaShape << " " << rhs.m_GammaShape << ", " << m_GammaRate << " " << rhs.m_GammaRate); - return equal(m_GaussianMean, rhs.m_GaussianMean) && equal(m_GaussianPrecision, rhs.m_GaussianPrecision) && +bool CLogNormalMeanPrecConjugate::equalTolerance(const CLogNormalMeanPrecConjugate& rhs, + const TEqualWithTolerance& equal) const { + LOG_DEBUG(<< m_GaussianMean << " " << rhs.m_GaussianMean << ", " << m_GaussianPrecision + << " " << rhs.m_GaussianPrecision << ", " << m_GammaShape << " " + << rhs.m_GammaShape << ", " << m_GammaRate << " " << rhs.m_GammaRate); + return equal(m_GaussianMean, rhs.m_GaussianMean) && + equal(m_GaussianPrecision, rhs.m_GaussianPrecision) && equal(m_GammaShape, rhs.m_GammaShape) && equal(m_GammaRate, rhs.m_GammaRate); } @@ -1593,7 +1663,8 @@ double CLogNormalMeanPrecConjugate::mean() const { if (m_GammaShape < MINIMUM_LOGNORMAL_SHAPE) { try { - detail::CMeanKernel f(m_GaussianMean, m_GaussianPrecision, m_GammaShape, m_GammaRate); + detail::CMeanKernel f(m_GaussianMean, m_GaussianPrecision, + m_GammaShape, m_GammaRate); boost::math::gamma_distribution<> gamma(m_GammaShape, 1.0 / m_GammaRate); double a = boost::math::quantile(gamma, 0.1); double b = boost::math::quantile(gamma, 0.9); @@ -1601,20 +1672,25 @@ double CLogNormalMeanPrecConjugate::mean() const { if (CIntegration::gaussLegendre(f, a, b, result)) { return result(0) / result(1) - m_Offset; } - } catch (const std::exception& e) { LOG_ERROR(<< "Failed to calculate mean: " << e.what()); } + } catch (const std::exception& e) { + LOG_ERROR(<< "Failed to calculate mean: " << e.what()); + } } - return std::exp(m_GaussianMean + 0.5 * m_GammaRate / m_GammaShape * (1.0 / m_GaussianPrecision + 1.0)) - m_Offset; + return std::exp(m_GaussianMean + 0.5 * m_GammaRate / m_GammaShape * + (1.0 / m_GaussianPrecision + 1.0)) - + m_Offset; } bool CLogNormalMeanPrecConjugate::isBad() const { - return !CMathsFuncs::isFinite(m_Offset) || !CMathsFuncs::isFinite(m_GaussianMean) || !CMathsFuncs::isFinite(m_GaussianPrecision) || + return !CMathsFuncs::isFinite(m_Offset) || !CMathsFuncs::isFinite(m_GaussianMean) || + !CMathsFuncs::isFinite(m_GaussianPrecision) || !CMathsFuncs::isFinite(m_GammaShape) || !CMathsFuncs::isFinite(m_GammaRate); } std::string CLogNormalMeanPrecConjugate::debug() const { std::ostringstream result; - result << std::scientific << std::setprecision(15) << m_Offset << " " << m_GaussianMean << " " << m_GaussianMean << " " << m_GammaShape - << " " << m_GammaRate; + result << std::scientific << std::setprecision(15) << m_Offset << " " << m_GaussianMean + << " " << m_GaussianMean << " " << m_GammaShape << " " << m_GammaRate; return result.str(); } diff --git a/lib/maths/CLogTDistribution.cc b/lib/maths/CLogTDistribution.cc index 251e9b914c..cad91f9a60 100644 --- a/lib/maths/CLogTDistribution.cc +++ b/lib/maths/CLogTDistribution.cc @@ -82,7 +82,8 @@ double mode(const CLogTDistribution& distribution) { double location = distribution.location(); - return std::exp(location - (degreesFreedom + 1.0) / 2.0 + std::sqrt(square(degreesFreedom + 1.0) / 4.0 - degreesFreedom * squareScale)); + return std::exp(location - (degreesFreedom + 1.0) / 2.0 + + std::sqrt(square(degreesFreedom + 1.0) / 4.0 - degreesFreedom * squareScale)); } CLogTDistribution::TOptionalDouble localMinimum(const CLogTDistribution& distribution) { @@ -103,7 +104,8 @@ CLogTDistribution::TOptionalDouble localMinimum(const CLogTDistribution& distrib double location = distribution.location(); - return std::exp(location - (degreesFreedom + 1.0) / 2.0 - std::sqrt(square(degreesFreedom + 1.0) / 4.0 - degreesFreedom * squareScale)); + return std::exp(location - (degreesFreedom + 1.0) / 2.0 - + std::sqrt(square(degreesFreedom + 1.0) / 4.0 - degreesFreedom * squareScale)); } double pdf(const CLogTDistribution& distribution, double x) { diff --git a/lib/maths/CMixtureDistribution.cc b/lib/maths/CMixtureDistribution.cc index 8d419c306e..598a567e70 100644 --- a/lib/maths/CMixtureDistribution.cc +++ b/lib/maths/CMixtureDistribution.cc @@ -105,26 +105,33 @@ class CBinaryVisitor { namespace mixture_detail { -CMixtureModeImpl::CMixtureModeImpl(const boost::math::normal_distribution<>& normal) : m_Distribution(normal) { +CMixtureModeImpl::CMixtureModeImpl(const boost::math::normal_distribution<>& normal) + : m_Distribution(normal) { } -CMixtureModeImpl::CMixtureModeImpl(const boost::math::gamma_distribution<>& gamma) : m_Distribution(gamma) { +CMixtureModeImpl::CMixtureModeImpl(const boost::math::gamma_distribution<>& gamma) + : m_Distribution(gamma) { } -CMixtureModeImpl::CMixtureModeImpl(const boost::math::lognormal_distribution<>& lognormal) : m_Distribution(lognormal) { +CMixtureModeImpl::CMixtureModeImpl(const boost::math::lognormal_distribution<>& lognormal) + : m_Distribution(lognormal) { } } -CMixtureMode::CMixtureMode(const boost::math::normal_distribution<>& normal) : mixture_detail::CMixtureModeImpl(normal) { +CMixtureMode::CMixtureMode(const boost::math::normal_distribution<>& normal) + : mixture_detail::CMixtureModeImpl(normal) { } -CMixtureMode::CMixtureMode(const boost::math::gamma_distribution<>& gamma) : mixture_detail::CMixtureModeImpl(gamma) { +CMixtureMode::CMixtureMode(const boost::math::gamma_distribution<>& gamma) + : mixture_detail::CMixtureModeImpl(gamma) { } -CMixtureMode::CMixtureMode(const boost::math::lognormal_distribution<>& lognormal) : mixture_detail::CMixtureModeImpl(lognormal) { +CMixtureMode::CMixtureMode(const boost::math::lognormal_distribution<>& lognormal) + : mixture_detail::CMixtureModeImpl(lognormal) { } -CMixtureMode::CMixtureMode(const CMixtureMode& other) : mixture_detail::CMixtureModeImpl(other) { +CMixtureMode::CMixtureMode(const CMixtureMode& other) + : mixture_detail::CMixtureModeImpl(other) { } mixture_detail::TDoubleDoublePr support(const CMixtureMode& mode) { diff --git a/lib/maths/CModel.cc b/lib/maths/CModel.cc index 2a5dc812c8..c414d78b85 100644 --- a/lib/maths/CModel.cc +++ b/lib/maths/CModel.cc @@ -26,12 +26,14 @@ using TDouble2Vec = core::CSmallVector; //! Check if all the elements of \p lhs are less than or equal to the \p rhs. bool lessThanEqual(const TDouble2Vec& lhs, double rhs) { - return std::find_if(lhs.begin(), lhs.end(), [rhs](double lhs_) { return lhs_ > rhs; }) == lhs.end(); + return std::find_if(lhs.begin(), lhs.end(), + [rhs](double lhs_) { return lhs_ > rhs; }) == lhs.end(); } //! Check if all the elements of \p lhs are less than or equal to the \p rhs. bool greaterThanEqual(const TDouble2Vec& lhs, double rhs) { - return std::find_if(lhs.begin(), lhs.end(), [rhs](double lhs_) { return lhs_ < rhs; }) == lhs.end(); + return std::find_if(lhs.begin(), lhs.end(), + [rhs](double lhs_) { return lhs_ < rhs; }) == lhs.end(); } //! Get the correction to apply to the one-sided probability calculations. @@ -40,8 +42,9 @@ bool greaterThanEqual(const TDouble2Vec& lhs, double rhs) { //! if the calculation is one sided below (above) we need to add on twice //! the probability of zero if the actual feature value is greater (less) //! than zero. -double -oneSidedEmptyBucketCorrection(maths_t::EProbabilityCalculation calculation, const TDouble2Vec& value, double probabilityEmptyBucket) { +double oneSidedEmptyBucketCorrection(maths_t::EProbabilityCalculation calculation, + const TDouble2Vec& value, + double probabilityEmptyBucket) { switch (calculation) { case maths_t::E_OneSidedBelow: return greaterThanEqual(value, 0.0) ? 2.0 * probabilityEmptyBucket : 0.0; @@ -53,11 +56,13 @@ oneSidedEmptyBucketCorrection(maths_t::EProbabilityCalculation calculation, cons return 0.0; } -const double EFFECTIVE_COUNT[]{1.0, 0.8, 0.7, 0.65, 0.6, 0.57, 0.54, 0.52, 0.51}; +const double EFFECTIVE_COUNT[]{1.0, 0.8, 0.7, 0.65, 0.6, + 0.57, 0.54, 0.52, 0.51}; //! Get the parameters for the stub model. CModelParams stubParameters() { - return CModelParams{0, 1.0, 0.0, 0.0, 6 * core::constants::HOUR, core::constants::DAY}; + return CModelParams{ + 0, 1.0, 0.0, 0.0, 6 * core::constants::HOUR, core::constants::DAY}; } } @@ -67,9 +72,7 @@ CModelParams::CModelParams(core_t::TTime bucketLength, double minimumSeasonalVarianceScale, core_t::TTime minimumTimeToDetectChange, core_t::TTime maximumTimeToTestForChange) - : m_BucketLength(bucketLength), - m_LearnRate(learnRate), - m_DecayRate(decayRate), + : m_BucketLength(bucketLength), m_LearnRate(learnRate), m_DecayRate(decayRate), m_MinimumSeasonalVarianceScale(minimumSeasonalVarianceScale), m_MinimumTimeToDetectChange(std::max(minimumTimeToDetectChange, 12 * bucketLength)), m_MaximumTimeToTestForChange(std::max(maximumTimeToTestForChange, 48 * bucketLength)), @@ -117,12 +120,9 @@ double CModelParams::probabilityBucketEmpty() const { } CModelAddSamplesParams::CModelAddSamplesParams() - : m_Type(maths_t::E_MixedData), - m_IsNonNegative(false), - m_PropagationInterval(1.0), - m_WeightStyles(nullptr), - m_TrendWeights(nullptr), - m_PriorWeights(nullptr) { + : m_Type(maths_t::E_MixedData), m_IsNonNegative(false), + m_PropagationInterval(1.0), m_WeightStyles(nullptr), + m_TrendWeights(nullptr), m_PriorWeights(nullptr) { } CModelAddSamplesParams& CModelAddSamplesParams::integer(bool integer) { @@ -152,7 +152,8 @@ double CModelAddSamplesParams::propagationInterval() const { return m_PropagationInterval; } -CModelAddSamplesParams& CModelAddSamplesParams::weightStyles(const maths_t::TWeightStyleVec& styles) { +CModelAddSamplesParams& +CModelAddSamplesParams::weightStyles(const maths_t::TWeightStyleVec& styles) { m_WeightStyles = &styles; return *this; } @@ -180,7 +181,8 @@ const CModelAddSamplesParams::TDouble2Vec4VecVec& CModelAddSamplesParams::priorW } CModelProbabilityParams::CModelProbabilityParams() - : m_Tag(0), m_SeasonalConfidenceInterval(DEFAULT_SEASONAL_CONFIDENCE_INTERVAL), m_WeightStyles(nullptr), m_UpdateAnomalyModel(true) { + : m_Tag(0), m_SeasonalConfidenceInterval(DEFAULT_SEASONAL_CONFIDENCE_INTERVAL), + m_WeightStyles(nullptr), m_UpdateAnomalyModel(true) { } CModelProbabilityParams& CModelProbabilityParams::tag(std::size_t tag) { @@ -192,7 +194,8 @@ std::size_t CModelProbabilityParams::tag() const { return m_Tag; } -CModelProbabilityParams& CModelProbabilityParams::addCalculation(maths_t::EProbabilityCalculation calculation) { +CModelProbabilityParams& +CModelProbabilityParams::addCalculation(maths_t::EProbabilityCalculation calculation) { m_Calculations.push_back(calculation); return *this; } @@ -223,7 +226,8 @@ const CModelProbabilityParams::TBool2Vec1Vec& CModelProbabilityParams::bucketEmp return m_BucketEmpty; } -CModelProbabilityParams& CModelProbabilityParams::weightStyles(const maths_t::TWeightStyleVec& styles) { +CModelProbabilityParams& +CModelProbabilityParams::weightStyles(const maths_t::TWeightStyleVec& styles) { m_WeightStyles = &styles; return *this; } @@ -325,20 +329,23 @@ double CModel::correctForEmptyBucket(maths_t::EProbabilityCalculation calculatio const TDouble2Vec& probabilityEmptyBucket, double probability) { if (!bucketEmpty[0] && !bucketEmpty[1]) { - double pState{(1.0 - probabilityEmptyBucket[0]) * (1.0 - probabilityEmptyBucket[1])}; + double pState{(1.0 - probabilityEmptyBucket[0]) * + (1.0 - probabilityEmptyBucket[1])}; double pOneSided{oneSidedEmptyBucketCorrection(calculation, {value}, 1.0 - pState)}; return std::min(pOneSided + pState * probability, 1.0); } if (!bucketEmpty[0]) { double pState{(1.0 - probabilityEmptyBucket[0]) * probabilityEmptyBucket[1]}; - double pOneSided{oneSidedEmptyBucketCorrection(calculation, {value}, probabilityEmptyBucket[0])}; + double pOneSided{oneSidedEmptyBucketCorrection(calculation, {value}, + probabilityEmptyBucket[0])}; return std::min(pOneSided + pState + (1.0 - pState) * probability, 1.0); } if (!bucketEmpty[1]) { double pState{probabilityEmptyBucket[0] * (1.0 - probabilityEmptyBucket[1])}; - double pOneSided{oneSidedEmptyBucketCorrection(calculation, {value}, probabilityEmptyBucket[1])}; + double pOneSided{oneSidedEmptyBucketCorrection(calculation, {value}, + probabilityEmptyBucket[1])}; return std::min(pOneSided + pState + (1.0 - pState) * probability, 1.0); } @@ -376,43 +383,52 @@ CModelStub::TSize2Vec1Vec CModelStub::correlates() const { return {}; } -CModelStub::TDouble2Vec -CModelStub::mode(core_t::TTime /*time*/, const maths_t::TWeightStyleVec& /*weightStyles*/, const TDouble2Vec4Vec& /*weights*/) const { +CModelStub::TDouble2Vec CModelStub::mode(core_t::TTime /*time*/, + const maths_t::TWeightStyleVec& /*weightStyles*/, + const TDouble2Vec4Vec& /*weights*/) const { return {}; } -CModelStub::TDouble2Vec1Vec CModelStub::correlateModes(core_t::TTime /*time*/, - const maths_t::TWeightStyleVec& /*weightStyles*/, - const TDouble2Vec4Vec1Vec& /*weights*/) const { +CModelStub::TDouble2Vec1Vec +CModelStub::correlateModes(core_t::TTime /*time*/, + const maths_t::TWeightStyleVec& /*weightStyles*/, + const TDouble2Vec4Vec1Vec& /*weights*/) const { return {}; } -CModelStub::TDouble2Vec1Vec CModelStub::residualModes(const maths_t::TWeightStyleVec& /*weightStyles*/, - const TDouble2Vec4Vec& /*weights*/) const { +CModelStub::TDouble2Vec1Vec +CModelStub::residualModes(const maths_t::TWeightStyleVec& /*weightStyles*/, + const TDouble2Vec4Vec& /*weights*/) const { return {}; } void CModelStub::addBucketValue(const TTimeDouble2VecSizeTrVec& /*value*/) { } -CModelStub::EUpdateResult CModelStub::addSamples(const CModelAddSamplesParams& /*params*/, TTimeDouble2VecSizeTrVec /*samples*/) { +CModelStub::EUpdateResult CModelStub::addSamples(const CModelAddSamplesParams& /*params*/, + TTimeDouble2VecSizeTrVec /*samples*/) { return E_Success; } void CModelStub::skipTime(core_t::TTime /*gap*/) { } -void CModelStub::detrend(const TTime2Vec1Vec& /*time*/, double /*confidenceInterval*/, TDouble2Vec1Vec& /*value*/) const { +void CModelStub::detrend(const TTime2Vec1Vec& /*time*/, + double /*confidenceInterval*/, + TDouble2Vec1Vec& /*value*/) const { } -CModelStub::TDouble2Vec CModelStub::predict(core_t::TTime /*time*/, const TSizeDoublePr1Vec& /*correlated*/, TDouble2Vec /*hint*/) const { +CModelStub::TDouble2Vec CModelStub::predict(core_t::TTime /*time*/, + const TSizeDoublePr1Vec& /*correlated*/, + TDouble2Vec /*hint*/) const { return {}; } -CModelStub::TDouble2Vec3Vec CModelStub::confidenceInterval(core_t::TTime /*time*/, - double /*confidenceInterval*/, - const maths_t::TWeightStyleVec& /*weightStyles*/, - const TDouble2Vec4Vec& /*weights*/) const { +CModelStub::TDouble2Vec3Vec +CModelStub::confidenceInterval(core_t::TTime /*time*/, + double /*confidenceInterval*/, + const maths_t::TWeightStyleVec& /*weightStyles*/, + const TDouble2Vec4Vec& /*weights*/) const { return {}; } @@ -440,11 +456,14 @@ bool CModelStub::probability(const CModelProbabilityParams& /*params*/, return true; } -CModelStub::TDouble2Vec CModelStub::winsorisationWeight(double /*derate*/, core_t::TTime /*time*/, const TDouble2Vec& /*value*/) const { +CModelStub::TDouble2Vec CModelStub::winsorisationWeight(double /*derate*/, + core_t::TTime /*time*/, + const TDouble2Vec& /*value*/) const { return {}; } -CModelStub::TDouble2Vec CModelStub::seasonalWeight(double /*confidence*/, core_t::TTime /*time*/) const { +CModelStub::TDouble2Vec CModelStub::seasonalWeight(double /*confidence*/, + core_t::TTime /*time*/) const { return {}; } diff --git a/lib/maths/CModelStateSerialiser.cc b/lib/maths/CModelStateSerialiser.cc index 34d8f6454a..adbde792d3 100644 --- a/lib/maths/CModelStateSerialiser.cc +++ b/lib/maths/CModelStateSerialiser.cc @@ -18,8 +18,9 @@ const std::string MULTIVARIATE_TIME_SERIES_TAG{"b"}; const std::string MODEL_STUB_TAG{"c"}; } -bool CModelStateSerialiser:: -operator()(const SModelRestoreParams& params, TModelPtr& result, core::CStateRestoreTraverser& traverser) const { +bool CModelStateSerialiser::operator()(const SModelRestoreParams& params, + TModelPtr& result, + core::CStateRestoreTraverser& traverser) const { std::size_t numResults = 0; do { @@ -48,11 +49,14 @@ operator()(const SModelRestoreParams& params, TModelPtr& result, core::CStateRes return true; } -void CModelStateSerialiser::operator()(const CModel& model, core::CStatePersistInserter& inserter) const { +void CModelStateSerialiser::operator()(const CModel& model, + core::CStatePersistInserter& inserter) const { if (dynamic_cast(&model) != nullptr) { - inserter.insertLevel(UNIVARIATE_TIME_SERIES_TAG, boost::bind(&CModel::acceptPersistInserter, &model, _1)); + inserter.insertLevel(UNIVARIATE_TIME_SERIES_TAG, + boost::bind(&CModel::acceptPersistInserter, &model, _1)); } else if (dynamic_cast(&model) != nullptr) { - inserter.insertLevel(MULTIVARIATE_TIME_SERIES_TAG, boost::bind(&CModel::acceptPersistInserter, &model, _1)); + inserter.insertLevel(MULTIVARIATE_TIME_SERIES_TAG, + boost::bind(&CModel::acceptPersistInserter, &model, _1)); } else if (dynamic_cast(&model) != nullptr) { inserter.insertValue(MODEL_STUB_TAG, ""); } else { diff --git a/lib/maths/CModelWeight.cc b/lib/maths/CModelWeight.cc index 0939178e7a..2b0daf797b 100644 --- a/lib/maths/CModelWeight.cc +++ b/lib/maths/CModelWeight.cc @@ -26,7 +26,8 @@ const std::string LONG_TERM_LOG_WEIGHT_TAG("c"); const double LOG_SMALLEST_WEIGHT = std::log(CTools::smallestProbability()); } -CModelWeight::CModelWeight(double weight) : m_LogWeight(std::log(weight)), m_LongTermLogWeight(m_LogWeight) { +CModelWeight::CModelWeight(double weight) + : m_LogWeight(std::log(weight)), m_LongTermLogWeight(m_LogWeight) { } CModelWeight::operator double() const { @@ -65,7 +66,8 @@ bool CModelWeight::acceptRestoreTraverser(core::CStateRestoreTraverser& traverse void CModelWeight::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(LOG_WEIGHT_TAG, m_LogWeight, core::CIEEE754::E_DoublePrecision); - inserter.insertValue(LONG_TERM_LOG_WEIGHT_TAG, m_LongTermLogWeight, core::CIEEE754::E_SinglePrecision); + inserter.insertValue(LONG_TERM_LOG_WEIGHT_TAG, m_LongTermLogWeight, + core::CIEEE754::E_SinglePrecision); } } } diff --git a/lib/maths/CMultimodalPrior.cc b/lib/maths/CMultimodalPrior.cc index ab889944ae..93df6067d5 100644 --- a/lib/maths/CMultimodalPrior.cc +++ b/lib/maths/CMultimodalPrior.cc @@ -71,14 +71,19 @@ CMultimodalPrior::CMultimodalPrior(maths_t::EDataType dataType, const CClusterer1d& clusterer, const CPrior& seedPrior, double decayRate /*= 0.0*/) - : CPrior(dataType, decayRate), m_Clusterer(clusterer.clone()), m_SeedPrior(seedPrior.clone()) { + : CPrior(dataType, decayRate), m_Clusterer(clusterer.clone()), + m_SeedPrior(seedPrior.clone()) { // Register the split and merge callbacks. m_Clusterer->splitFunc(CModeSplitCallback(*this)); m_Clusterer->mergeFunc(CModeMergeCallback(*this)); } -CMultimodalPrior::CMultimodalPrior(maths_t::EDataType dataType, const TMeanVarAccumulatorVec& moments, double decayRate /*= 0.0*/) - : CPrior(dataType, decayRate), m_SeedPrior(CNormalMeanPrecConjugate::nonInformativePrior(dataType, decayRate).clone()) { +CMultimodalPrior::CMultimodalPrior(maths_t::EDataType dataType, + const TMeanVarAccumulatorVec& moments, + double decayRate /*= 0.0*/) + : CPrior(dataType, decayRate), + m_SeedPrior( + CNormalMeanPrecConjugate::nonInformativePrior(dataType, decayRate).clone()) { using TNormalVec = std::vector; TNormalVec normals; @@ -95,34 +100,40 @@ CMultimodalPrior::CMultimodalPrior(maths_t::EDataType dataType, const TMeanVarAc } } -CMultimodalPrior::CMultimodalPrior(maths_t::EDataType dataType, double decayRate, TPriorPtrVec& priors) : CPrior(dataType, decayRate) { +CMultimodalPrior::CMultimodalPrior(maths_t::EDataType dataType, double decayRate, TPriorPtrVec& priors) + : CPrior(dataType, decayRate) { m_Modes.reserve(priors.size()); for (std::size_t i = 0u; i < priors.size(); ++i) { m_Modes.emplace_back(i, priors[i]); } } -CMultimodalPrior::CMultimodalPrior(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) +CMultimodalPrior::CMultimodalPrior(const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser) : CPrior(params.s_DataType, params.s_DecayRate) { - traverser.traverseSubLevel(boost::bind(&CMultimodalPrior::acceptRestoreTraverser, this, boost::cref(params), _1)); + traverser.traverseSubLevel(boost::bind(&CMultimodalPrior::acceptRestoreTraverser, + this, boost::cref(params), _1)); } -bool CMultimodalPrior::acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) { +bool CMultimodalPrior::acceptRestoreTraverser(const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser) { do { const std::string& name = traverser.name(); + RESTORE_SETUP_TEARDOWN(DECAY_RATE_TAG, double decayRate, + core::CStringUtils::stringToType(traverser.value(), decayRate), + this->decayRate(decayRate)) + RESTORE(CLUSTERER_TAG, traverser.traverseSubLevel(boost::bind( + CClustererStateSerialiser(), boost::cref(params), + boost::ref(m_Clusterer), _1))) + RESTORE(SEED_PRIOR_TAG, traverser.traverseSubLevel(boost::bind( + CPriorStateSerialiser(), boost::cref(params), + boost::ref(m_SeedPrior), _1))) RESTORE_SETUP_TEARDOWN( - DECAY_RATE_TAG, double decayRate, core::CStringUtils::stringToType(traverser.value(), decayRate), this->decayRate(decayRate)) - RESTORE( - CLUSTERER_TAG, - traverser.traverseSubLevel(boost::bind(CClustererStateSerialiser(), boost::cref(params), boost::ref(m_Clusterer), _1))) - RESTORE(SEED_PRIOR_TAG, - traverser.traverseSubLevel(boost::bind(CPriorStateSerialiser(), boost::cref(params), boost::ref(m_SeedPrior), _1))) - RESTORE_SETUP_TEARDOWN(MODE_TAG, - TMode mode, - traverser.traverseSubLevel(boost::bind(&TMode::acceptRestoreTraverser, &mode, boost::cref(params), _1)), - m_Modes.push_back(mode)) - RESTORE_SETUP_TEARDOWN(NUMBER_SAMPLES_TAG, - double numberSamples, + MODE_TAG, TMode mode, + traverser.traverseSubLevel(boost::bind(&TMode::acceptRestoreTraverser, + &mode, boost::cref(params), _1)), + m_Modes.push_back(mode)) + RESTORE_SETUP_TEARDOWN(NUMBER_SAMPLES_TAG, double numberSamples, core::CStringUtils::stringToType(traverser.value(), numberSamples), this->numberSamples(numberSamples)) } while (traverser.next()); @@ -137,7 +148,9 @@ bool CMultimodalPrior::acceptRestoreTraverser(const SDistributionRestoreParams& } CMultimodalPrior::CMultimodalPrior(const CMultimodalPrior& other) - : CPrior(other.dataType(), other.decayRate()), m_Clusterer(other.m_Clusterer->clone()), m_SeedPrior(other.m_SeedPrior->clone()) { + : CPrior(other.dataType(), other.decayRate()), + m_Clusterer(other.m_Clusterer->clone()), + m_SeedPrior(other.m_SeedPrior->clone()) { // Register the split and merge callbacks. m_Clusterer->splitFunc(CModeSplitCallback(*this)); m_Clusterer->mergeFunc(CModeMergeCallback(*this)); @@ -218,7 +231,9 @@ bool CMultimodalPrior::needsOffset() const { return false; } -double CMultimodalPrior::adjustOffset(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights) { +double CMultimodalPrior::adjustOffset(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights) { double result = 0.0; if (this->needsOffset()) { @@ -226,9 +241,12 @@ double CMultimodalPrior::adjustOffset(const TWeightStyleVec& weightStyles, const for (std::size_t i = 0u; i < samples.size(); ++i) { m_Clusterer->cluster(samples[i], clusters); for (const auto& cluster : clusters) { - auto j = std::find_if(m_Modes.begin(), m_Modes.end(), CSetTools::CIndexInSet(cluster.first)); + auto j = std::find_if(m_Modes.begin(), m_Modes.end(), + CSetTools::CIndexInSet(cluster.first)); if (j != m_Modes.end()) { - result += j->s_Prior->adjustOffset(weightStyles, TDouble1Vec(1, samples[i]), TDouble4Vec1Vec(1, weights[i])); + result += j->s_Prior->adjustOffset( + weightStyles, TDouble1Vec(1, samples[i]), + TDouble4Vec1Vec(1, weights[i])); } } } @@ -245,13 +263,16 @@ double CMultimodalPrior::offset() const { return offset; } -void CMultimodalPrior::addSamples(const TWeightStyleVec& weightStyles_, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights) { +void CMultimodalPrior::addSamples(const TWeightStyleVec& weightStyles_, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights) { if (samples.empty()) { return; } if (samples.size() != weights.size()) { - LOG_ERROR(<< "Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '" + LOG_ERROR(<< "Mismatch in samples '" + << core::CContainerPrinter::print(samples) << "' and weights '" << core::CContainerPrinter::print(weights) << "'"); return; } @@ -301,8 +322,10 @@ void CMultimodalPrior::addSamples(const TWeightStyleVec& weightStyles_, const TD try { bool hasSeasonalScale = !this->isNonInformative() && seasonal != missing; - double mean = - (!this->isNonInformative() && maths_t::hasSeasonalVarianceScale(weightStyles_, weights)) ? this->marginalLikelihoodMean() : 0.0; + double mean = (!this->isNonInformative() && + maths_t::hasSeasonalVarianceScale(weightStyles_, weights)) + ? this->marginalLikelihoodMean() + : 0.0; for (std::size_t i = 0u; i < samples.size(); ++i) { double x = samples[i]; @@ -325,11 +348,13 @@ void CMultimodalPrior::addSamples(const TWeightStyleVec& weightStyles_, const TD m_Clusterer->add(x, clusters, weight[0][count]); double Z = std::accumulate( - m_Modes.begin(), m_Modes.end(), weight[0][count], [](double sum, const TMode& mode) { return sum + mode.weight(); }); + m_Modes.begin(), m_Modes.end(), weight[0][count], + [](double sum, const TMode& mode) { return sum + mode.weight(); }); double n = 0.0; for (const auto& cluster : clusters) { - auto k = std::find_if(m_Modes.begin(), m_Modes.end(), CSetTools::CIndexInSet(cluster.first)); + auto k = std::find_if(m_Modes.begin(), m_Modes.end(), + CSetTools::CIndexInSet(cluster.first)); if (k == m_Modes.end()) { LOG_TRACE(<< "Creating mode with index " << cluster.first); m_Modes.emplace_back(cluster.first, m_SeedPrior); @@ -346,7 +371,9 @@ void CMultimodalPrior::addSamples(const TWeightStyleVec& weightStyles_, const TD } this->addSamples(n); } - } catch (const std::exception& e) { LOG_ERROR(<< "Failed to update likelihood: " << e.what()); } + } catch (const std::exception& e) { + LOG_ERROR(<< "Failed to update likelihood: " << e.what()); + } } void CMultimodalPrior::propagateForwardsByTime(double time) { @@ -402,12 +429,14 @@ double CMultimodalPrior::nearestMarginalLikelihoodMean(double value) const { return result; } -double CMultimodalPrior::marginalLikelihoodMode(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights) const { +double CMultimodalPrior::marginalLikelihoodMode(const TWeightStyleVec& weightStyles, + const TDouble4Vec& weights) const { return CMultimodalPriorUtils::marginalLikelihoodMode(m_Modes, weightStyles, weights); } -CMultimodalPrior::TDouble1Vec CMultimodalPrior::marginalLikelihoodModes(const TWeightStyleVec& weightStyles, - const TDouble4Vec& weights) const { +CMultimodalPrior::TDouble1Vec +CMultimodalPrior::marginalLikelihoodModes(const TWeightStyleVec& weightStyles, + const TDouble4Vec& weights) const { TDouble1Vec result(m_Modes.size()); for (std::size_t i = 0u; i < m_Modes.size(); ++i) { result[i] = m_Modes[i].s_Prior->marginalLikelihoodMode(weightStyles, weights); @@ -415,20 +444,24 @@ CMultimodalPrior::TDouble1Vec CMultimodalPrior::marginalLikelihoodModes(const TW return result; } -double CMultimodalPrior::marginalLikelihoodVariance(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights) const { +double CMultimodalPrior::marginalLikelihoodVariance(const TWeightStyleVec& weightStyles, + const TDouble4Vec& weights) const { return CMultimodalPriorUtils::marginalLikelihoodVariance(m_Modes, weightStyles, weights); } -TDoubleDoublePr CMultimodalPrior::marginalLikelihoodConfidenceInterval(double percentage, - const TWeightStyleVec& weightStyles, - const TDouble4Vec& weights) const { - return CMultimodalPriorUtils::marginalLikelihoodConfidenceInterval(*this, m_Modes, percentage, weightStyles, weights); +TDoubleDoublePr +CMultimodalPrior::marginalLikelihoodConfidenceInterval(double percentage, + const TWeightStyleVec& weightStyles, + const TDouble4Vec& weights) const { + return CMultimodalPriorUtils::marginalLikelihoodConfidenceInterval( + *this, m_Modes, percentage, weightStyles, weights); } -maths_t::EFloatingPointErrorStatus CMultimodalPrior::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, - double& result) const { +maths_t::EFloatingPointErrorStatus +CMultimodalPrior::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& result) const { result = 0.0; if (samples.empty()) { @@ -437,7 +470,8 @@ maths_t::EFloatingPointErrorStatus CMultimodalPrior::jointLogMarginalLikelihood( } if (samples.size() != weights.size()) { - LOG_ERROR(<< "Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '" + LOG_ERROR(<< "Mismatch in samples '" + << core::CContainerPrinter::print(samples) << "' and weights '" << core::CContainerPrinter::print(weights) << "'"); return maths_t::E_FpFailed; } @@ -455,11 +489,14 @@ maths_t::EFloatingPointErrorStatus CMultimodalPrior::jointLogMarginalLikelihood( return maths_t::E_FpOverflowed; } - return m_Modes.size() == 1 ? m_Modes[0].s_Prior->jointLogMarginalLikelihood(weightStyles, samples, weights, result) - : CMultimodalPriorUtils::jointLogMarginalLikelihood(m_Modes, weightStyles, samples, weights, result); + return m_Modes.size() == 1 ? m_Modes[0].s_Prior->jointLogMarginalLikelihood( + weightStyles, samples, weights, result) + : CMultimodalPriorUtils::jointLogMarginalLikelihood( + m_Modes, weightStyles, samples, weights, result); } -void CMultimodalPrior::sampleMarginalLikelihood(std::size_t numberSamples, TDouble1Vec& samples) const { +void CMultimodalPrior::sampleMarginalLikelihood(std::size_t numberSamples, + TDouble1Vec& samples) const { samples.clear(); if (numberSamples == 0 || this->numberSamples() == 0.0) { @@ -474,7 +511,8 @@ bool CMultimodalPrior::minusLogJointCdf(const TWeightStyleVec& weightStyles, const TDouble4Vec1Vec& weights, double& lowerBound, double& upperBound) const { - return CMultimodalPriorUtils::minusLogJointCdf(m_Modes, weightStyles, samples, weights, lowerBound, upperBound); + return CMultimodalPriorUtils::minusLogJointCdf(m_Modes, weightStyles, samples, + weights, lowerBound, upperBound); } bool CMultimodalPrior::minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, @@ -482,7 +520,8 @@ bool CMultimodalPrior::minusLogJointCdfComplement(const TWeightStyleVec& weightS const TDouble4Vec1Vec& weights, double& lowerBound, double& upperBound) const { - return CMultimodalPriorUtils::minusLogJointCdfComplement(m_Modes, weightStyles, samples, weights, lowerBound, upperBound); + return CMultimodalPriorUtils::minusLogJointCdfComplement( + m_Modes, weightStyles, samples, weights, lowerBound, upperBound); } bool CMultimodalPrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, @@ -493,7 +532,8 @@ bool CMultimodalPrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCalcu double& upperBound, maths_t::ETail& tail) const { return CMultimodalPriorUtils::probabilityOfLessLikelySamples( - *this, m_Modes, calculation, weightStyles, samples, weights, lowerBound, upperBound, tail); + *this, m_Modes, calculation, weightStyles, samples, weights, lowerBound, + upperBound, tail); } bool CMultimodalPrior::isNonInformative() const { @@ -534,13 +574,18 @@ std::size_t CMultimodalPrior::staticSize() const { } void CMultimodalPrior::acceptPersistInserter(core::CStatePersistInserter& inserter) const { - inserter.insertLevel(CLUSTERER_TAG, boost::bind(CClustererStateSerialiser(), boost::cref(*m_Clusterer), _1)); - inserter.insertLevel(SEED_PRIOR_TAG, boost::bind(CPriorStateSerialiser(), boost::cref(*m_SeedPrior), _1)); + inserter.insertLevel(CLUSTERER_TAG, + boost::bind(CClustererStateSerialiser(), + boost::cref(*m_Clusterer), _1)); + inserter.insertLevel(SEED_PRIOR_TAG, boost::bind(CPriorStateSerialiser(), + boost::cref(*m_SeedPrior), _1)); for (std::size_t i = 0u; i < m_Modes.size(); ++i) { - inserter.insertLevel(MODE_TAG, boost::bind(&TMode::acceptPersistInserter, &m_Modes[i], _1)); + inserter.insertLevel( + MODE_TAG, boost::bind(&TMode::acceptPersistInserter, &m_Modes[i], _1)); } inserter.insertValue(DECAY_RATE_TAG, this->decayRate(), core::CIEEE754::E_SinglePrecision); - inserter.insertValue(NUMBER_SAMPLES_TAG, this->numberSamples(), core::CIEEE754::E_SinglePrecision); + inserter.insertValue(NUMBER_SAMPLES_TAG, this->numberSamples(), + core::CIEEE754::E_SinglePrecision); } std::size_t CMultimodalPrior::numberModes() const { @@ -551,7 +596,8 @@ bool CMultimodalPrior::checkInvariants(const std::string& tag) const { bool result = true; if (m_Modes.size() != m_Clusterer->numberClusters()) { - LOG_ERROR(<< tag << "# modes = " << m_Modes.size() << ", # clusters = " << m_Clusterer->numberClusters()); + LOG_ERROR(<< tag << "# modes = " << m_Modes.size() + << ", # clusters = " << m_Clusterer->numberClusters()); result = false; } @@ -565,9 +611,11 @@ bool CMultimodalPrior::checkInvariants(const std::string& tag) const { modeSamples += mode.s_Prior->numberSamples(); } - CEqualWithTolerance equal(CToleranceTypes::E_AbsoluteTolerance | CToleranceTypes::E_RelativeTolerance, 1e-3); + CEqualWithTolerance equal( + CToleranceTypes::E_AbsoluteTolerance | CToleranceTypes::E_RelativeTolerance, 1e-3); if (!equal(modeSamples, numberSamples)) { - LOG_ERROR(<< tag << "Sum mode samples = " << modeSamples << ", total samples = " << numberSamples); + LOG_ERROR(<< tag << "Sum mode samples = " << modeSamples + << ", total samples = " << numberSamples); result = false; } @@ -588,11 +636,13 @@ std::string CMultimodalPrior::debugWeights() const { ////////// CMultimodalPrior::CModeSplitCallback Implementation ////////// -CMultimodalPrior::CModeSplitCallback::CModeSplitCallback(CMultimodalPrior& prior) : m_Prior(&prior) { +CMultimodalPrior::CModeSplitCallback::CModeSplitCallback(CMultimodalPrior& prior) + : m_Prior(&prior) { } -void CMultimodalPrior::CModeSplitCallback:: -operator()(std::size_t sourceIndex, std::size_t leftSplitIndex, std::size_t rightSplitIndex) const { +void CMultimodalPrior::CModeSplitCallback::operator()(std::size_t sourceIndex, + std::size_t leftSplitIndex, + std::size_t rightSplitIndex) const { LOG_TRACE(<< "Splitting mode with index " << sourceIndex); TModeVec& modes = m_Prior->m_Modes; @@ -609,7 +659,8 @@ operator()(std::size_t sourceIndex, std::size_t leftSplitIndex, std::size_t righ pLeft /= Z; pRight /= Z; } - LOG_TRACE(<< "# samples = " << numberSamples << ", pLeft = " << pLeft << ", pRight = " << pRight); + LOG_TRACE(<< "# samples = " << numberSamples << ", pLeft = " << pLeft + << ", pRight = " << pRight); // Create the child modes. @@ -666,7 +717,8 @@ operator()(std::size_t sourceIndex, std::size_t leftSplitIndex, std::size_t righ } if (!m_Prior->checkInvariants("SPLIT: ")) { - LOG_ERROR(<< "# samples = " << numberSamples << ", # modes = " << modes.size() << ", pLeft = " << pLeft << ", pRight = " << pRight); + LOG_ERROR(<< "# samples = " << numberSamples << ", # modes = " << modes.size() + << ", pLeft = " << pLeft << ", pRight = " << pRight); } LOG_TRACE(<< "Split mode"); @@ -674,11 +726,13 @@ operator()(std::size_t sourceIndex, std::size_t leftSplitIndex, std::size_t righ ////////// CMultimodalPrior::CModeMergeCallback Implementation ////////// -CMultimodalPrior::CModeMergeCallback::CModeMergeCallback(CMultimodalPrior& prior) : m_Prior(&prior) { +CMultimodalPrior::CModeMergeCallback::CModeMergeCallback(CMultimodalPrior& prior) + : m_Prior(&prior) { } -void CMultimodalPrior::CModeMergeCallback:: -operator()(std::size_t leftMergeIndex, std::size_t rightMergeIndex, std::size_t targetIndex) const { +void CMultimodalPrior::CModeMergeCallback::operator()(std::size_t leftMergeIndex, + std::size_t rightMergeIndex, + std::size_t targetIndex) const { LOG_TRACE(<< "Merging modes with indices " << leftMergeIndex << " " << rightMergeIndex); TModeVec& modes = m_Prior->m_Modes; @@ -693,7 +747,8 @@ operator()(std::size_t leftMergeIndex, std::size_t rightMergeIndex, std::size_t std::size_t nr = 0; TDouble1Vec samples; - auto leftMode = std::find_if(modes.begin(), modes.end(), CSetTools::CIndexInSet(leftMergeIndex)); + auto leftMode = std::find_if(modes.begin(), modes.end(), + CSetTools::CIndexInSet(leftMergeIndex)); if (leftMode != modes.end()) { wl = leftMode->s_Prior->numberSamples(); n += wl; @@ -705,7 +760,8 @@ operator()(std::size_t leftMergeIndex, std::size_t rightMergeIndex, std::size_t LOG_ERROR(<< "Couldn't find mode for " << leftMergeIndex); } - auto rightMode = std::find_if(modes.begin(), modes.end(), CSetTools::CIndexInSet(rightMergeIndex)); + auto rightMode = std::find_if(modes.begin(), modes.end(), + CSetTools::CIndexInSet(rightMergeIndex)); if (rightMode != modes.end()) { wr = rightMode->s_Prior->numberSamples(); n += wr; @@ -750,7 +806,8 @@ operator()(std::size_t leftMergeIndex, std::size_t rightMergeIndex, std::size_t TSizeSet mergedIndices; mergedIndices.insert(leftMergeIndex); mergedIndices.insert(rightMergeIndex); - modes.erase(std::remove_if(modes.begin(), modes.end(), CSetTools::CIndexInSet(mergedIndices)), modes.end()); + modes.erase(std::remove_if(modes.begin(), modes.end(), CSetTools::CIndexInSet(mergedIndices)), + modes.end()); // Add the new mode. LOG_TRACE(<< "Creating mode with index " << targetIndex); diff --git a/lib/maths/CMultinomialConjugate.cc b/lib/maths/CMultinomialConjugate.cc index 9648f131a7..f12b73a300 100644 --- a/lib/maths/CMultinomialConjugate.cc +++ b/lib/maths/CMultinomialConjugate.cc @@ -51,7 +51,9 @@ using TDoubleDoublePr = std::pair; //! Truncate \p to fit into a signed integer. int truncate(std::size_t x) { - return x > static_cast(std::numeric_limits::max()) ? std::numeric_limits::max() : static_cast(x); + return x > static_cast(std::numeric_limits::max()) + ? std::numeric_limits::max() + : static_cast(x); } //! This computes the cumulative density function of the predictive @@ -100,7 +102,9 @@ class CCdf : core::CNonCopyable { } void operator()(double x, double& lowerBound, double& upperBound) const { - std::size_t category = std::upper_bound(m_Categories.begin(), m_Categories.end(), x) - m_Categories.begin(); + std::size_t category = + std::upper_bound(m_Categories.begin(), m_Categories.end(), x) - + m_Categories.begin(); lowerBound = m_Cdf[category]; upperBound = m_Cdf[category] + m_Pu; @@ -155,7 +159,9 @@ class CCdfComplement : core::CNonCopyable { } void operator()(double x, double& lowerBound, double& upperBound) const { - std::size_t category = std::lower_bound(m_Categories.begin(), m_Categories.end(), x) - m_Categories.begin(); + std::size_t category = + std::lower_bound(m_Categories.begin(), m_Categories.end(), x) - + m_Categories.begin(); lowerBound = m_CdfComplement[category + 1]; upperBound = m_CdfComplement[category + 1] + m_Pu; @@ -182,7 +188,8 @@ class CCdfComplement : core::CNonCopyable { //! This was determined, empirically, to give reasonable errors //! in the calculation of the less likely probabilities. std::size_t numberPriorSamples(double x) { - static const double THRESHOLDS[] = {100.0, 1000.0, 10000.0, boost::numeric::bounds::highest()}; + static const double THRESHOLDS[] = {100.0, 1000.0, 10000.0, + boost::numeric::bounds::highest()}; static const std::size_t NUMBERS[] = {7u, 5u, 3u, 1u}; return NUMBERS[std::lower_bound(boost::begin(THRESHOLDS), boost::end(THRESHOLDS), x) - boost::begin(THRESHOLDS)]; } @@ -248,7 +255,8 @@ const std::string DECAY_RATE_TAG("h"); const std::string EMPTY_STRING; } -CMultinomialConjugate::CMultinomialConjugate() : m_NumberAvailableCategories(0), m_TotalConcentration(0.0) { +CMultinomialConjugate::CMultinomialConjugate() + : m_NumberAvailableCategories(0), m_TotalConcentration(0.0) { } CMultinomialConjugate::CMultinomialConjugate(std::size_t maximumNumberOfCategories, @@ -256,30 +264,36 @@ CMultinomialConjugate::CMultinomialConjugate(std::size_t maximumNumberOfCategori const TDoubleVec& concentrations, double decayRate) : CPrior(maths_t::E_DiscreteData, decayRate), - m_NumberAvailableCategories(detail::truncate(maximumNumberOfCategories) - detail::truncate(categories.size())), - m_Categories(categories), - m_Concentrations(concentrations), + m_NumberAvailableCategories(detail::truncate(maximumNumberOfCategories) - + detail::truncate(categories.size())), + m_Categories(categories), m_Concentrations(concentrations), m_TotalConcentration(0.0) { m_Concentrations.resize(m_Categories.size(), NON_INFORMATIVE_CONCENTRATION); - m_TotalConcentration = std::accumulate(m_Concentrations.begin(), m_Concentrations.end(), 0.0); + m_TotalConcentration = + std::accumulate(m_Concentrations.begin(), m_Concentrations.end(), 0.0); this->numberSamples(m_TotalConcentration); } -CMultinomialConjugate::CMultinomialConjugate(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) - : CPrior(maths_t::E_DiscreteData, params.s_DecayRate), m_NumberAvailableCategories(0), m_TotalConcentration(0.0) { - traverser.traverseSubLevel(boost::bind(&CMultinomialConjugate::acceptRestoreTraverser, this, _1)); +CMultinomialConjugate::CMultinomialConjugate(const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser) + : CPrior(maths_t::E_DiscreteData, params.s_DecayRate), + m_NumberAvailableCategories(0), m_TotalConcentration(0.0) { + traverser.traverseSubLevel( + boost::bind(&CMultinomialConjugate::acceptRestoreTraverser, this, _1)); } bool CMultinomialConjugate::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { do { const std::string& name = traverser.name(); - RESTORE_SETUP_TEARDOWN( - DECAY_RATE_TAG, double decayRate, core::CStringUtils::stringToType(traverser.value(), decayRate), this->decayRate(decayRate)) + RESTORE_SETUP_TEARDOWN(DECAY_RATE_TAG, double decayRate, + core::CStringUtils::stringToType(traverser.value(), decayRate), + this->decayRate(decayRate)) RESTORE_BUILT_IN(NUMBER_AVAILABLE_CATEGORIES_TAG, m_NumberAvailableCategories) if (!name.empty() && name[0] == CATEGORY_TAG[0]) { // Categories have been split across multiple fields b0, b1, etc if (core::CPersistUtils::fromString( - traverser.value(), m_Categories, core::CPersistUtils::DELIMITER, core::CPersistUtils::PAIR_DELIMITER, true) == false) { + traverser.value(), m_Categories, core::CPersistUtils::DELIMITER, + core::CPersistUtils::PAIR_DELIMITER, true) == false) { LOG_ERROR(<< "Invalid categories in split " << traverser.value()); return false; } @@ -288,16 +302,15 @@ bool CMultinomialConjugate::acceptRestoreTraverser(core::CStateRestoreTraverser& if (!name.empty() && name[0] == CONCENTRATION_TAG[0]) { // Concentrations have been split across multiple fields c0, c1, c2, etc if (core::CPersistUtils::fromString( - traverser.value(), m_Concentrations, core::CPersistUtils::DELIMITER, core::CPersistUtils::PAIR_DELIMITER, true) == - false) { + traverser.value(), m_Concentrations, core::CPersistUtils::DELIMITER, + core::CPersistUtils::PAIR_DELIMITER, true) == false) { LOG_ERROR(<< "Invalid concentrations in split " << traverser.value()); return false; } continue; } RESTORE_BUILT_IN(TOTAL_CONCENTRATION_TAG, m_TotalConcentration) - RESTORE_SETUP_TEARDOWN(NUMBER_SAMPLES_TAG, - double numberSamples, + RESTORE_SETUP_TEARDOWN(NUMBER_SAMPLES_TAG, double numberSamples, core::CStringUtils::stringToType(traverser.value(), numberSamples), this->numberSamples(numberSamples)) } while (traverser.next()); @@ -315,8 +328,10 @@ void CMultinomialConjugate::swap(CMultinomialConjugate& other) { std::swap(m_TotalConcentration, other.m_TotalConcentration); } -CMultinomialConjugate CMultinomialConjugate::nonInformativePrior(std::size_t maximumNumberOfCategories, double decayRate) { - return CMultinomialConjugate(maximumNumberOfCategories, TDoubleVec(), TDoubleVec(), decayRate); +CMultinomialConjugate CMultinomialConjugate::nonInformativePrior(std::size_t maximumNumberOfCategories, + double decayRate) { + return CMultinomialConjugate(maximumNumberOfCategories, TDoubleVec(), + TDoubleVec(), decayRate); } CMultinomialConjugate::EPrior CMultinomialConjugate::type() const { @@ -328,7 +343,8 @@ CMultinomialConjugate* CMultinomialConjugate::clone() const { } void CMultinomialConjugate::setToNonInformative(double /*offset*/, double decayRate) { - *this = nonInformativePrior(m_NumberAvailableCategories + detail::truncate(m_Categories.size()), decayRate); + *this = nonInformativePrior( + m_NumberAvailableCategories + detail::truncate(m_Categories.size()), decayRate); } bool CMultinomialConjugate::needsOffset() const { @@ -345,13 +361,16 @@ double CMultinomialConjugate::offset() const { return 0.0; } -void CMultinomialConjugate::addSamples(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights) { +void CMultinomialConjugate::addSamples(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights) { if (samples.empty()) { return; } if (samples.size() != weights.size()) { - LOG_ERROR(<< "Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '" + LOG_ERROR(<< "Mismatch in samples '" + << core::CContainerPrinter::print(samples) << "' and weights '" << core::CContainerPrinter::print(weights) << "'"); return; } @@ -396,7 +415,9 @@ void CMultinomialConjugate::addSamples(const TWeightStyleVec& weightStyles, cons m_TotalConcentration += n; - std::size_t category = std::lower_bound(m_Categories.begin(), m_Categories.end(), x) - m_Categories.begin(); + std::size_t category = + std::lower_bound(m_Categories.begin(), m_Categories.end(), x) - + m_Categories.begin(); if (category == m_Categories.size() || m_Categories[category] != x) { m_NumberAvailableCategories = std::max(m_NumberAvailableCategories - 1, -1); if (m_NumberAvailableCategories < 0) { @@ -406,15 +427,17 @@ void CMultinomialConjugate::addSamples(const TWeightStyleVec& weightStyles, cons // This is infrequent so the amortized cost is low. m_Categories.insert(m_Categories.begin() + category, x); - m_Concentrations.insert(m_Concentrations.begin() + category, NON_INFORMATIVE_CONCENTRATION); + m_Concentrations.insert(m_Concentrations.begin() + category, + NON_INFORMATIVE_CONCENTRATION); this->shrink(); } m_Concentrations[category] += n; } - LOG_TRACE(<< "samples = " << core::CContainerPrinter::print(samples) << ", m_NumberAvailableCategories = " - << m_NumberAvailableCategories << ", m_Categories = " << core::CContainerPrinter::print(m_Categories) + LOG_TRACE(<< "samples = " << core::CContainerPrinter::print(samples) + << ", m_NumberAvailableCategories = " << m_NumberAvailableCategories + << ", m_Categories = " << core::CContainerPrinter::print(m_Categories) << ", m_Concentrations = " << core::CContainerPrinter::print(m_Concentrations) << ", m_TotalConcentration = " << m_TotalConcentration); } @@ -445,7 +468,9 @@ void CMultinomialConjugate::propagateForwardsByTime(double time) { // Thus the mean is unchanged and for large a0 the variance is // increased by very nearly 1 / f. - double factor = std::min((alpha * m_TotalConcentration + (1.0 - alpha) * NON_INFORMATIVE_CONCENTRATION) / m_TotalConcentration, 1.0); + double factor = std::min( + (alpha * m_TotalConcentration + (1.0 - alpha) * NON_INFORMATIVE_CONCENTRATION) / m_TotalConcentration, + 1.0); for (std::size_t i = 0u; i < m_Concentrations.size(); ++i) { m_Concentrations[i] *= factor; @@ -455,8 +480,10 @@ void CMultinomialConjugate::propagateForwardsByTime(double time) { this->numberSamples(this->numberSamples() * factor); - LOG_TRACE(<< "factor = " << factor << ", m_Concentrations = " << core::CContainerPrinter::print(m_Concentrations) - << ", m_TotalConcentration = " << m_TotalConcentration << ", numberSamples = " << this->numberSamples()); + LOG_TRACE(<< "factor = " << factor + << ", m_Concentrations = " << core::CContainerPrinter::print(m_Concentrations) + << ", m_TotalConcentration = " << m_TotalConcentration + << ", numberSamples = " << this->numberSamples()); } CMultinomialConjugate::TDoubleDoublePr CMultinomialConjugate::marginalLikelihoodSupport() const { @@ -465,7 +492,8 @@ CMultinomialConjugate::TDoubleDoublePr CMultinomialConjugate::marginalLikelihood // in the support for the possible discrete values which can // be any real numbers. - return std::make_pair(boost::numeric::bounds::lowest(), boost::numeric::bounds::highest()); + return std::make_pair(boost::numeric::bounds::lowest(), + boost::numeric::bounds::highest()); } double CMultinomialConjugate::marginalLikelihoodMean() const { @@ -487,7 +515,8 @@ double CMultinomialConjugate::marginalLikelihoodMean() const { return CBasicStatistics::mean(result); } -double CMultinomialConjugate::marginalLikelihoodMode(const TWeightStyleVec& /*weightStyles*/, const TDouble4Vec& /*weights*/) const { +double CMultinomialConjugate::marginalLikelihoodMode(const TWeightStyleVec& /*weightStyles*/, + const TDouble4Vec& /*weights*/) const { if (this->isNonInformative()) { return 0.0; } @@ -506,7 +535,8 @@ double CMultinomialConjugate::marginalLikelihoodMode(const TWeightStyleVec& /*we return m_Categories[mode]; } -double CMultinomialConjugate::marginalLikelihoodVariance(const TWeightStyleVec& /*weightStyles*/, const TDouble4Vec& /*weights*/) const { +double CMultinomialConjugate::marginalLikelihoodVariance(const TWeightStyleVec& /*weightStyles*/, + const TDouble4Vec& /*weights*/) const { using TMeanVarAccumulator = CBasicStatistics::SSampleMeanVar::TAccumulator; if (this->isNonInformative()) { @@ -527,9 +557,10 @@ double CMultinomialConjugate::marginalLikelihoodVariance(const TWeightStyleVec& return CBasicStatistics::variance(result); } -CMultinomialConjugate::TDoubleDoublePr CMultinomialConjugate::marginalLikelihoodConfidenceInterval(double percentage, - const TWeightStyleVec& /*weightStyles*/, - const TDouble4Vec& /*weights*/) const { +CMultinomialConjugate::TDoubleDoublePr +CMultinomialConjugate::marginalLikelihoodConfidenceInterval(double percentage, + const TWeightStyleVec& /*weightStyles*/, + const TDouble4Vec& /*weights*/) const { if (this->isNonInformative()) { return this->marginalLikelihoodSupport(); } @@ -550,13 +581,16 @@ CMultinomialConjugate::TDoubleDoublePr CMultinomialConjugate::marginalLikelihood pU += 1.0 / static_cast(m_Concentrations.size()) - p; } double q1 = (1.0 - percentage) / 2.0; - ptrdiff_t i1 = std::lower_bound(quantiles.begin(), quantiles.end(), q1 - pU) - quantiles.begin(); + ptrdiff_t i1 = std::lower_bound(quantiles.begin(), quantiles.end(), q1 - pU) - + quantiles.begin(); double x1 = m_Categories[i1]; double x2 = x1; if (percentage > 0.0) { double q2 = (1.0 + percentage) / 2.0; - ptrdiff_t i2 = std::min(std::lower_bound(quantiles.begin(), quantiles.end(), q2 + pU) - quantiles.begin(), - static_cast(quantiles.size()) - 1); + ptrdiff_t i2 = + std::min(std::lower_bound(quantiles.begin(), quantiles.end(), q2 + pU) - + quantiles.begin(), + static_cast(quantiles.size()) - 1); x2 = m_Categories[i2]; } LOG_TRACE(<< "x1 = " << x1 << ", x2 = " << x2) @@ -566,10 +600,11 @@ CMultinomialConjugate::TDoubleDoublePr CMultinomialConjugate::marginalLikelihood return std::make_pair(x1, x2); } -maths_t::EFloatingPointErrorStatus CMultinomialConjugate::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, - double& result) const { +maths_t::EFloatingPointErrorStatus +CMultinomialConjugate::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& result) const { result = 0.0; if (samples.empty()) { @@ -578,7 +613,8 @@ maths_t::EFloatingPointErrorStatus CMultinomialConjugate::jointLogMarginalLikeli } if (samples.size() != weights.size()) { - LOG_ERROR(<< "Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '" + LOG_ERROR(<< "Mismatch in samples '" + << core::CContainerPrinter::print(samples) << "' and weights '" << core::CContainerPrinter::print(weights) << "'"); return maths_t::E_FpFailed; } @@ -627,26 +663,33 @@ maths_t::EFloatingPointErrorStatus CMultinomialConjugate::jointLogMarginalLikeli } try { - LOG_TRACE(<< "# samples = " << numberSamples << ", total concentration = " << m_TotalConcentration); + LOG_TRACE(<< "# samples = " << numberSamples + << ", total concentration = " << m_TotalConcentration); - result = boost::math::lgamma(numberSamples + 1.0) + boost::math::lgamma(m_TotalConcentration) - + result = boost::math::lgamma(numberSamples + 1.0) + + boost::math::lgamma(m_TotalConcentration) - boost::math::lgamma(m_TotalConcentration + numberSamples); - for (TDoubleDoubleMapCItr countItr = categoryCounts.begin(); countItr != categoryCounts.end(); ++countItr) { + for (TDoubleDoubleMapCItr countItr = categoryCounts.begin(); + countItr != categoryCounts.end(); ++countItr) { double category = countItr->first; double count = countItr->second; LOG_TRACE(<< "category = " << category << ", count = " << count); result -= boost::math::lgamma(countItr->second + 1.0); - std::size_t index = std::lower_bound(m_Categories.begin(), m_Categories.end(), category) - m_Categories.begin(); + std::size_t index = std::lower_bound(m_Categories.begin(), + m_Categories.end(), category) - + m_Categories.begin(); if (index < m_Categories.size() && m_Categories[index] == category) { LOG_TRACE(<< "concentration = " << m_Concentrations[index]); - result += boost::math::lgamma(m_Concentrations[index] + count) - boost::math::lgamma(m_Concentrations[index]); + result += boost::math::lgamma(m_Concentrations[index] + count) - + boost::math::lgamma(m_Concentrations[index]); } } } catch (const std::exception& e) { - LOG_ERROR(<< "Unable to compute joint log likelihood: " << e.what() << ", samples = " << core::CContainerPrinter::print(samples) + LOG_ERROR(<< "Unable to compute joint log likelihood: " << e.what() + << ", samples = " << core::CContainerPrinter::print(samples) << ", categories = " << core::CContainerPrinter::print(m_Categories) << ", concentrations = " << core::CContainerPrinter::print(m_Concentrations)); return maths_t::E_FpFailed; @@ -662,7 +705,8 @@ maths_t::EFloatingPointErrorStatus CMultinomialConjugate::jointLogMarginalLikeli return status; } -void CMultinomialConjugate::sampleMarginalLikelihood(std::size_t numberSamples, TDouble1Vec& samples) const { +void CMultinomialConjugate::sampleMarginalLikelihood(std::size_t numberSamples, + TDouble1Vec& samples) const { samples.clear(); if (numberSamples == 0 || this->isNonInformative()) { @@ -758,8 +802,12 @@ bool CMultinomialConjugate::minusLogJointCdf(const TWeightStyleVec& weightStyles // We need to handle the case that the c.d.f. is zero and hence // the log blows up. - lowerBound = sampleLowerBound == 0.0 || lowerBound == MAX_DOUBLE ? MAX_DOUBLE : lowerBound - n * std::log(sampleLowerBound); - upperBound = sampleUpperBound == 0.0 || upperBound == MAX_DOUBLE ? MAX_DOUBLE : upperBound - n * std::log(sampleUpperBound); + lowerBound = sampleLowerBound == 0.0 || lowerBound == MAX_DOUBLE + ? MAX_DOUBLE + : lowerBound - n * std::log(sampleLowerBound); + upperBound = sampleUpperBound == 0.0 || upperBound == MAX_DOUBLE + ? MAX_DOUBLE + : upperBound - n * std::log(sampleUpperBound); } return true; @@ -786,8 +834,12 @@ bool CMultinomialConjugate::minusLogJointCdfComplement(const TWeightStyleVec& we // We need to handle the case that the c.d.f. is zero and hence // the log blows up. - lowerBound = sampleLowerBound == 0.0 || lowerBound == MAX_DOUBLE ? MAX_DOUBLE : lowerBound - n * std::log(sampleLowerBound); - upperBound = sampleUpperBound == 0.0 || upperBound == MAX_DOUBLE ? MAX_DOUBLE : upperBound - n * std::log(sampleUpperBound); + lowerBound = sampleLowerBound == 0.0 || lowerBound == MAX_DOUBLE + ? MAX_DOUBLE + : lowerBound - n * std::log(sampleLowerBound); + upperBound = sampleUpperBound == 0.0 || upperBound == MAX_DOUBLE + ? MAX_DOUBLE + : upperBound - n * std::log(sampleUpperBound); } return true; @@ -845,8 +897,9 @@ bool CMultinomialConjugate::probabilityOfLessLikelySamples(maths_t::EProbability } if (!jointLowerBound.calculate(lowerBound) || !jointUpperBound.calculate(upperBound)) { - LOG_ERROR(<< "Unable to compute probability for " << core::CContainerPrinter::print(samples) << ": " << jointLowerBound << ", " - << jointUpperBound); + LOG_ERROR(<< "Unable to compute probability for " + << core::CContainerPrinter::print(samples) << ": " + << jointLowerBound << ", " << jointUpperBound); return false; } tail = maths_t::E_LeftTail; @@ -942,7 +995,9 @@ bool CMultinomialConjugate::probabilityOfLessLikelySamples(maths_t::EProbability // Get the index of largest probability less than or equal to P(U). std::size_t l = pCategories.size(); if (pU > 0.0) { - l = std::lower_bound(pCategories.begin(), pCategories.end(), TDoubleDoubleSizeTr(pU, pU, 0)) - pCategories.begin(); + l = std::lower_bound(pCategories.begin(), pCategories.end(), + TDoubleDoubleSizeTr(pU, pU, 0)) - + pCategories.begin(); } // Compute probabilities of less likely categories. @@ -973,7 +1028,9 @@ bool CMultinomialConjugate::probabilityOfLessLikelySamples(maths_t::EProbability TSizeVec categoryIndices; categoryIndices.reserve(samples.size()); for (std::size_t i = 0u; i < samples.size(); ++i) { - std::size_t index = std::lower_bound(m_Categories.begin(), m_Categories.end(), samples[i]) - m_Categories.begin(); + std::size_t index = std::lower_bound(m_Categories.begin(), + m_Categories.end(), samples[i]) - + m_Categories.begin(); if (index < m_Categories.size() && m_Categories[index] == samples[i]) { categoryIndices.push_back(index); } @@ -994,22 +1051,27 @@ bool CMultinomialConjugate::probabilityOfLessLikelySamples(maths_t::EProbability double a = m_Concentrations[j]; double b = m_TotalConcentration - m_Concentrations[j]; detail::generateBetaSamples(a, b, nSamples, marginalSamples); - LOG_TRACE(<< "E[p] = " << pCategories[i].get<0>() << ", mean = " << CBasicStatistics::mean(marginalSamples) + LOG_TRACE(<< "E[p] = " << pCategories[i].get<0>() + << ", mean = " << CBasicStatistics::mean(marginalSamples) << ", samples = " << marginalSamples); TMeanAccumulator pAcc; for (std::size_t k = 0u; k < marginalSamples.size(); ++k) { TDoubleDoubleSizeTr x(1.05 * marginalSamples[k], 0.0, 0); - ptrdiff_t r = std::min(std::upper_bound(pCategories.begin(), pCategories.end(), x) - pCategories.begin(), - static_cast(pCategories.size()) - 1); + ptrdiff_t r = std::min( + std::upper_bound(pCategories.begin(), pCategories.end(), x) - + pCategories.begin(), + static_cast(pCategories.size()) - 1); double fl = r > 0 ? pCategories[r - 1].get<0>() : 0.0; double fr = pCategories[r].get<0>(); double pl_ = r > 0 ? pCategories[r - 1].get<1>() : 0.0; double pr_ = pCategories[r].get<1>(); - double alpha = std::min((fr - fl == 0.0) ? 0.0 : (x.get<0>() - fl) / (fr - fl), 1.0); + double alpha = std::min( + (fr - fl == 0.0) ? 0.0 : (x.get<0>() - fl) / (fr - fl), 1.0); double px = (1.0 - alpha) * pl_ + alpha * pr_; - LOG_TRACE(<< "E[p(l)] = " << fl << ", P(l) = " << pl_ << ", E[p(r)] = " << fr << ", P(r) = " << pr_ + LOG_TRACE(<< "E[p(l)] = " << fl << ", P(l) = " << pl_ + << ", E[p(r)] = " << fr << ", P(r) = " << pr_ << ", alpha = " << alpha << ", p = " << px); pAcc.add(px); @@ -1038,7 +1100,9 @@ bool CMultinomialConjugate::probabilityOfLessLikelySamples(maths_t::EProbability if (samples.size() == 1) { // No special aggregation is required if there is a single sample. - std::size_t index = std::lower_bound(m_Categories.begin(), m_Categories.end(), samples[0]) - m_Categories.begin(); + std::size_t index = std::lower_bound(m_Categories.begin(), + m_Categories.end(), samples[0]) - + m_Categories.begin(); if (index < m_Categories.size() && m_Categories[index] == samples[0]) { double p = pCategories[index].get<1>(); @@ -1066,12 +1130,15 @@ bool CMultinomialConjugate::probabilityOfLessLikelySamples(maths_t::EProbability CJointProbabilityOfLessLikelySamples jointLowerBound; CJointProbabilityOfLessLikelySamples jointUpperBound; - for (TDoubleDoubleMapCItr countItr = categoryCounts.begin(); countItr != categoryCounts.end(); ++countItr) { + for (TDoubleDoubleMapCItr countItr = categoryCounts.begin(); + countItr != categoryCounts.end(); ++countItr) { double category = countItr->first; double count = countItr->second; LOG_TRACE(<< "category = " << category << ", count = " << count); - std::size_t index = std::lower_bound(m_Categories.begin(), m_Categories.end(), category) - m_Categories.begin(); + std::size_t index = std::lower_bound(m_Categories.begin(), + m_Categories.end(), category) - + m_Categories.begin(); double p = pCategories[index].get<1>(); if (index < m_Categories.size() && m_Categories[index] == category) { @@ -1084,8 +1151,9 @@ bool CMultinomialConjugate::probabilityOfLessLikelySamples(maths_t::EProbability } if (!jointLowerBound.calculate(lowerBound) || !jointUpperBound.calculate(upperBound)) { - LOG_ERROR(<< "Unable to compute probability for " << core::CContainerPrinter::print(samples) << ": " << jointLowerBound << ", " - << jointUpperBound); + LOG_ERROR(<< "Unable to compute probability for " + << core::CContainerPrinter::print(samples) << ": " + << jointLowerBound << ", " << jointUpperBound); return false; } @@ -1110,8 +1178,9 @@ bool CMultinomialConjugate::probabilityOfLessLikelySamples(maths_t::EProbability } if (!jointLowerBound.calculate(lowerBound) || !jointUpperBound.calculate(upperBound)) { - LOG_ERROR(<< "Unable to compute probability for " << core::CContainerPrinter::print(samples) << ": " << jointLowerBound << ", " - << jointUpperBound); + LOG_ERROR(<< "Unable to compute probability for " + << core::CContainerPrinter::print(samples) << ": " + << jointLowerBound << ", " << jointUpperBound); return false; } tail = maths_t::E_RightTail; @@ -1127,9 +1196,11 @@ bool CMultinomialConjugate::isNonInformative() const { void CMultinomialConjugate::print(const std::string& indent, std::string& result) const { result += core_t::LINE_ENDING + indent + "multinomial " + - (this->isNonInformative() ? std::string("non-informative") - : std::string("categories ") + core::CContainerPrinter::print(m_Categories) + " concentrations " + - core::CContainerPrinter::print(m_Concentrations)); + (this->isNonInformative() + ? std::string("non-informative") + : std::string("categories ") + + core::CContainerPrinter::print(m_Categories) + " concentrations " + + core::CContainerPrinter::print(m_Concentrations)); } std::string CMultinomialConjugate::printMarginalLikelihoodFunction(double /*weight*/) const { @@ -1206,8 +1277,10 @@ void CMultinomialConjugate::acceptPersistInserter(core::CStatePersistInserter& i inserter.insertValue(NUMBER_AVAILABLE_CATEGORIES_TAG, m_NumberAvailableCategories); inserter.insertValue(CATEGORY_TAG, core::CPersistUtils::toString(m_Categories)); inserter.insertValue(CONCENTRATION_TAG, core::CPersistUtils::toString(m_Concentrations)); - inserter.insertValue(TOTAL_CONCENTRATION_TAG, m_TotalConcentration, core::CIEEE754::E_SinglePrecision); - inserter.insertValue(NUMBER_SAMPLES_TAG, this->numberSamples(), core::CIEEE754::E_SinglePrecision); + inserter.insertValue(TOTAL_CONCENTRATION_TAG, m_TotalConcentration, + core::CIEEE754::E_SinglePrecision); + inserter.insertValue(NUMBER_SAMPLES_TAG, this->numberSamples(), + core::CIEEE754::E_SinglePrecision); } void CMultinomialConjugate::removeCategories(TDoubleVec categoriesToRemove) { @@ -1233,7 +1306,8 @@ void CMultinomialConjugate::removeCategories(TDoubleVec categoriesToRemove) { m_Categories.erase(m_Categories.begin() + end, m_Categories.end()); m_Concentrations.erase(m_Concentrations.begin() + end, m_Concentrations.end()); - m_TotalConcentration = std::accumulate(m_Concentrations.begin(), m_Concentrations.end(), 0.0); + m_TotalConcentration = + std::accumulate(m_Concentrations.begin(), m_Concentrations.end(), 0.0); LOG_TRACE(<< "categories = " << core::CContainerPrinter::print(m_Categories)); LOG_TRACE(<< "concentrations = " << core::CContainerPrinter::print(m_Concentrations)); @@ -1243,7 +1317,8 @@ void CMultinomialConjugate::removeCategories(TDoubleVec categoriesToRemove) { bool CMultinomialConjugate::index(double category, std::size_t& result) const { result = std::numeric_limits::max(); - TDoubleVecCItr categoryItr = std::lower_bound(m_Categories.begin(), m_Categories.end(), category); + TDoubleVecCItr categoryItr = + std::lower_bound(m_Categories.begin(), m_Categories.end(), category); if (categoryItr == m_Categories.end() || *categoryItr != category) { return false; } @@ -1332,7 +1407,9 @@ void CMultinomialConjugate::probabilitiesOfLessLikelyCategories(maths_t::EProbab { std::size_t l = pCategories.size(); if (pU > 0.0) { - l = std::lower_bound(pCategories.begin(), pCategories.end(), TDoubleDoubleSizeTr(pU, pU, 0)) - pCategories.begin(); + l = std::lower_bound(pCategories.begin(), pCategories.end(), + TDoubleDoubleSizeTr(pU, pU, 0)) - + pCategories.begin(); } // Compute probabilities of less likely categories. @@ -1381,22 +1458,27 @@ void CMultinomialConjugate::probabilitiesOfLessLikelyCategories(maths_t::EProbab double a = m_Concentrations[j]; double b = m_TotalConcentration - m_Concentrations[j]; detail::generateBetaSamples(a, b, n, samples); - LOG_TRACE(<< "E[p] = " << pCategories[i].get<0>() << ", mean = " << CBasicStatistics::mean(samples) + LOG_TRACE(<< "E[p] = " << pCategories[i].get<0>() + << ", mean = " << CBasicStatistics::mean(samples) << ", samples = " << core::CContainerPrinter::print(samples)); TMeanAccumulator pAcc; for (std::size_t k = 0u; k < samples.size(); ++k) { TDoubleDoubleSizeTr x(1.05 * samples[k], 0.0, 0); - ptrdiff_t r = std::min(std::upper_bound(pCategories.begin(), pCategories.end(), x) - pCategories.begin(), - static_cast(pCategories.size()) - 1); + ptrdiff_t r = std::min( + std::upper_bound(pCategories.begin(), pCategories.end(), x) - + pCategories.begin(), + static_cast(pCategories.size()) - 1); double fl = r > 0 ? pCategories[r - 1].get<0>() : 0.0; double fr = pCategories[r].get<0>(); double pl_ = r > 0 ? pCategories[r - 1].get<1>() : 0.0; double pr_ = pCategories[r].get<1>(); - double alpha = std::min((fr - fl == 0.0) ? 0.0 : (x.get<0>() - fl) / (fr - fl), 1.0); + double alpha = std::min( + (fr - fl == 0.0) ? 0.0 : (x.get<0>() - fl) / (fr - fl), 1.0); double px = (1.0 - alpha) * pl_ + alpha * pr_; - LOG_TRACE(<< "E[p(l)] = " << fl << ", P(l) = " << pl_ << ", E[p(r)] = " << fr << ", P(r) = " << pr_ + LOG_TRACE(<< "E[p(l)] = " << fl << ", P(l) = " << pl_ + << ", E[p(r)] = " << fr << ", P(r) = " << pr_ << ", alpha = " << alpha << ", p = " << px); pAcc.add(px); @@ -1418,7 +1500,8 @@ void CMultinomialConjugate::probabilitiesOfLessLikelyCategories(maths_t::EProbab } } -CMultinomialConjugate::TDoubleDoublePrVec CMultinomialConjugate::confidenceIntervalProbabilities(double percentage) const { +CMultinomialConjugate::TDoubleDoublePrVec +CMultinomialConjugate::confidenceIntervalProbabilities(double percentage) const { if (this->isNonInformative()) { return TDoubleDoublePrVec(m_Concentrations.size(), std::make_pair(0.0, 1.0)); } @@ -1456,21 +1539,27 @@ CMultinomialConjugate::TDoubleDoublePrVec CMultinomialConjugate::confidenceInter double a = m_Concentrations[i]; double b = m_TotalConcentration - m_Concentrations[i]; boost::math::beta_distribution<> beta(a, b); - TDoubleDoublePr percentiles(boost::math::quantile(beta, lowerPercentile), boost::math::quantile(beta, upperPercentile)); + TDoubleDoublePr percentiles(boost::math::quantile(beta, lowerPercentile), + boost::math::quantile(beta, upperPercentile)); result.push_back(percentiles); } return result; } -bool CMultinomialConjugate::equalTolerance(const CMultinomialConjugate& rhs, const TEqualWithTolerance& equal) const { +bool CMultinomialConjugate::equalTolerance(const CMultinomialConjugate& rhs, + const TEqualWithTolerance& equal) const { LOG_DEBUG(<< m_NumberAvailableCategories << " " << rhs.m_NumberAvailableCategories); - LOG_DEBUG(<< core::CContainerPrinter::print(m_Categories) << " " << core::CContainerPrinter::print(rhs.m_Categories)); - LOG_DEBUG(<< core::CContainerPrinter::print(m_Concentrations) << " " << core::CContainerPrinter::print(rhs.m_Concentrations)); + LOG_DEBUG(<< core::CContainerPrinter::print(m_Categories) << " " + << core::CContainerPrinter::print(rhs.m_Categories)); + LOG_DEBUG(<< core::CContainerPrinter::print(m_Concentrations) << " " + << core::CContainerPrinter::print(rhs.m_Concentrations)); LOG_DEBUG(<< m_TotalConcentration << " " << rhs.m_TotalConcentration); - return m_NumberAvailableCategories == rhs.m_NumberAvailableCategories && m_Categories == rhs.m_Categories && - std::equal(m_Concentrations.begin(), m_Concentrations.end(), rhs.m_Concentrations.begin(), equal) && + return m_NumberAvailableCategories == rhs.m_NumberAvailableCategories && + m_Categories == rhs.m_Categories && + std::equal(m_Concentrations.begin(), m_Concentrations.end(), + rhs.m_Concentrations.begin(), equal) && equal(m_TotalConcentration, rhs.m_TotalConcentration); } diff --git a/lib/maths/CMultivariateConstantPrior.cc b/lib/maths/CMultivariateConstantPrior.cc index 26a855df62..114f096d9d 100644 --- a/lib/maths/CMultivariateConstantPrior.cc +++ b/lib/maths/CMultivariateConstantPrior.cc @@ -37,7 +37,9 @@ using TOptionalDouble10Vec = boost::optional; //! \brief Converts a constant value to a string. class CConstantToString { public: - std::string operator()(double value) const { return core::CStringUtils::typeToStringPrecise(value, core::CIEEE754::E_DoublePrecision); } + std::string operator()(double value) const { + return core::CStringUtils::typeToStringPrecise(value, core::CIEEE754::E_DoublePrecision); + } }; //! Set the constant, validating the input. @@ -57,23 +59,27 @@ const std::string CONSTANT_TAG("a"); const std::string EMPTY_STRING; } -CMultivariateConstantPrior::CMultivariateConstantPrior(std::size_t dimension, const TOptionalDouble10Vec& constant) +CMultivariateConstantPrior::CMultivariateConstantPrior(std::size_t dimension, + const TOptionalDouble10Vec& constant) : CMultivariatePrior(maths_t::E_DiscreteData, 0.0), m_Dimension(dimension) { if (constant) { setConstant(m_Dimension, *constant, m_Constant); } } -CMultivariateConstantPrior::CMultivariateConstantPrior(std::size_t dimension, core::CStateRestoreTraverser& traverser) +CMultivariateConstantPrior::CMultivariateConstantPrior(std::size_t dimension, + core::CStateRestoreTraverser& traverser) : CMultivariatePrior(maths_t::E_DiscreteData, 0.0), m_Dimension(dimension) { - traverser.traverseSubLevel(boost::bind(&CMultivariateConstantPrior::acceptRestoreTraverser, this, _1)); + traverser.traverseSubLevel( + boost::bind(&CMultivariateConstantPrior::acceptRestoreTraverser, this, _1)); } bool CMultivariateConstantPrior::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { do { const std::string& name = traverser.name(); - RESTORE_SETUP_TEARDOWN( - CONSTANT_TAG, TDouble10Vec constant, core::CPersistUtils::fromString(traverser.value(), constant), m_Constant.reset(constant)) + RESTORE_SETUP_TEARDOWN(CONSTANT_TAG, TDouble10Vec constant, + core::CPersistUtils::fromString(traverser.value(), constant), + m_Constant.reset(constant)) } while (traverser.next()); return true; @@ -108,8 +114,9 @@ void CMultivariateConstantPrior::addSamples(const TWeightStyleVec& /*weightStyle void CMultivariateConstantPrior::propagateForwardsByTime(double /*time*/) { } -CMultivariateConstantPrior::TUnivariatePriorPtrDoublePr CMultivariateConstantPrior::univariate(const TSize10Vec& marginalize, - const TSizeDoublePr10Vec& condition) const { +CMultivariateConstantPrior::TUnivariatePriorPtrDoublePr +CMultivariateConstantPrior::univariate(const TSize10Vec& marginalize, + const TSizeDoublePr10Vec& condition) const { if (!this->check(marginalize, condition)) { return TUnivariatePriorPtrDoublePr(); } @@ -123,12 +130,15 @@ CMultivariateConstantPrior::TUnivariatePriorPtrDoublePr CMultivariateConstantPri return TUnivariatePriorPtrDoublePr(); } - return this->isNonInformative() ? TUnivariatePriorPtrDoublePr(TUnivariatePriorPtr(new CConstantPrior), 0.0) - : TUnivariatePriorPtrDoublePr(TUnivariatePriorPtr(new CConstantPrior((*m_Constant)[i1[0]])), 0.0); + return this->isNonInformative() + ? TUnivariatePriorPtrDoublePr(TUnivariatePriorPtr(new CConstantPrior), 0.0) + : TUnivariatePriorPtrDoublePr( + TUnivariatePriorPtr(new CConstantPrior((*m_Constant)[i1[0]])), 0.0); } -CMultivariateConstantPrior::TPriorPtrDoublePr CMultivariateConstantPrior::bivariate(const TSize10Vec& marginalize, - const TSizeDoublePr10Vec& condition) const { +CMultivariateConstantPrior::TPriorPtrDoublePr +CMultivariateConstantPrior::bivariate(const TSize10Vec& marginalize, + const TSizeDoublePr10Vec& condition) const { if (m_Dimension == 2) { return TPriorPtrDoublePr(TPriorPtr(this->clone()), 0.0); } @@ -155,7 +165,8 @@ CMultivariateConstantPrior::TPriorPtrDoublePr CMultivariateConstantPrior::bivari return TPriorPtrDoublePr(TPriorPtr(new CMultivariateConstantPrior(2)), 0.0); } -CMultivariateConstantPrior::TDouble10VecDouble10VecPr CMultivariateConstantPrior::marginalLikelihoodSupport() const { +CMultivariateConstantPrior::TDouble10VecDouble10VecPr +CMultivariateConstantPrior::marginalLikelihoodSupport() const { TDouble10Vec lowest(m_Dimension); TDouble10Vec highest(m_Dimension); for (std::size_t i = 0u; i < m_Dimension; ++i) { @@ -165,7 +176,8 @@ CMultivariateConstantPrior::TDouble10VecDouble10VecPr CMultivariateConstantPrior return std::make_pair(lowest, highest); } -CMultivariateConstantPrior::TDouble10Vec CMultivariateConstantPrior::marginalLikelihoodMean() const { +CMultivariateConstantPrior::TDouble10Vec +CMultivariateConstantPrior::marginalLikelihoodMean() const { if (this->isNonInformative()) { return TDouble10Vec(m_Dimension, 0.0); } @@ -173,12 +185,14 @@ CMultivariateConstantPrior::TDouble10Vec CMultivariateConstantPrior::marginalLik return *m_Constant; } -CMultivariateConstantPrior::TDouble10Vec CMultivariateConstantPrior::marginalLikelihoodMode(const TWeightStyleVec& /*weightStyles*/, - const TDouble10Vec4Vec& /*weights*/) const { +CMultivariateConstantPrior::TDouble10Vec +CMultivariateConstantPrior::marginalLikelihoodMode(const TWeightStyleVec& /*weightStyles*/, + const TDouble10Vec4Vec& /*weights*/) const { return this->marginalLikelihoodMean(); } -CMultivariateConstantPrior::TDouble10Vec10Vec CMultivariateConstantPrior::marginalLikelihoodCovariance() const { +CMultivariateConstantPrior::TDouble10Vec10Vec +CMultivariateConstantPrior::marginalLikelihoodCovariance() const { TDouble10Vec10Vec result(m_Dimension, TDouble10Vec(m_Dimension, 0.0)); if (this->isNonInformative()) { for (std::size_t i = 0u; i < m_Dimension; ++i) { @@ -188,14 +202,18 @@ CMultivariateConstantPrior::TDouble10Vec10Vec CMultivariateConstantPrior::margin return result; } -CMultivariateConstantPrior::TDouble10Vec CMultivariateConstantPrior::marginalLikelihoodVariances() const { - return TDouble10Vec(m_Dimension, this->isNonInformative() ? boost::numeric::bounds::highest() : 0.0); +CMultivariateConstantPrior::TDouble10Vec +CMultivariateConstantPrior::marginalLikelihoodVariances() const { + return TDouble10Vec(m_Dimension, this->isNonInformative() + ? boost::numeric::bounds::highest() + : 0.0); } -maths_t::EFloatingPointErrorStatus CMultivariateConstantPrior::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights, - double& result) const { +maths_t::EFloatingPointErrorStatus +CMultivariateConstantPrior::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble10Vec1Vec& samples, + const TDouble10Vec4Vec1Vec& weights, + double& result) const { result = 0.0; if (samples.empty()) { @@ -204,7 +222,8 @@ maths_t::EFloatingPointErrorStatus CMultivariateConstantPrior::jointLogMarginalL } if (samples.size() != weights.size()) { - LOG_ERROR(<< "Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '" + LOG_ERROR(<< "Mismatch in samples '" + << core::CContainerPrinter::print(samples) << "' and weights '" << core::CContainerPrinter::print(weights) << "'"); return maths_t::E_FpFailed; } @@ -235,14 +254,16 @@ maths_t::EFloatingPointErrorStatus CMultivariateConstantPrior::jointLogMarginalL return maths_t::E_FpOverflowed; } - numberSamples += this->smallest(maths_t::countForUpdate(m_Dimension, weightStyles, weights[i])); + numberSamples += this->smallest( + maths_t::countForUpdate(m_Dimension, weightStyles, weights[i])); } result = numberSamples * core::constants::LOG_MAX_DOUBLE; return maths_t::E_FpNoErrors; } -void CMultivariateConstantPrior::sampleMarginalLikelihood(std::size_t numberSamples, TDouble10Vec1Vec& samples) const { +void CMultivariateConstantPrior::sampleMarginalLikelihood(std::size_t numberSamples, + TDouble10Vec1Vec& samples) const { samples.clear(); if (this->isNonInformative()) { @@ -258,7 +279,8 @@ bool CMultivariateConstantPrior::isNonInformative() const { void CMultivariateConstantPrior::print(const std::string& separator, std::string& result) const { result += core_t::LINE_ENDING + separator + "constant " + - (this->isNonInformative() ? std::string("non-informative") : core::CContainerPrinter::print(*m_Constant)); + (this->isNonInformative() ? std::string("non-informative") + : core::CContainerPrinter::print(*m_Constant)); } uint64_t CMultivariateConstantPrior::checksum(uint64_t seed) const { @@ -281,7 +303,8 @@ std::size_t CMultivariateConstantPrior::staticSize() const { void CMultivariateConstantPrior::acceptPersistInserter(core::CStatePersistInserter& inserter) const { if (m_Constant) { - inserter.insertValue(CONSTANT_TAG, core::CPersistUtils::toString(*m_Constant, CConstantToString())); + inserter.insertValue(CONSTANT_TAG, core::CPersistUtils::toString( + *m_Constant, CConstantToString())); } } @@ -289,7 +312,8 @@ std::string CMultivariateConstantPrior::persistenceTag() const { return CONSTANT_TAG + core::CStringUtils::typeToString(m_Dimension); } -const CMultivariateConstantPrior::TOptionalDouble10Vec& CMultivariateConstantPrior::constant() const { +const CMultivariateConstantPrior::TOptionalDouble10Vec& +CMultivariateConstantPrior::constant() const { return m_Constant; } } diff --git a/lib/maths/CMultivariateMultimodalPrior.cc b/lib/maths/CMultivariateMultimodalPrior.cc index 22a58d2fd6..c8733c573d 100644 --- a/lib/maths/CMultivariateMultimodalPrior.cc +++ b/lib/maths/CMultivariateMultimodalPrior.cc @@ -36,12 +36,13 @@ std::string printIndices(const TModeVec& modes) { } } -maths_t::EFloatingPointErrorStatus jointLogMarginalLikelihood(const TModeVec& modes, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble10Vec1Vec& sample, - const TDouble10Vec4Vec1Vec& weights, - TSizeDoublePr3Vec& modeLogLikelihoods, - double& result) { +maths_t::EFloatingPointErrorStatus +jointLogMarginalLikelihood(const TModeVec& modes, + const maths_t::TWeightStyleVec& weightStyles, + const TDouble10Vec1Vec& sample, + const TDouble10Vec4Vec1Vec& weights, + TSizeDoublePr3Vec& modeLogLikelihoods, + double& result) { try { // We re-normalize so that the maximum log likelihood is one // to avoid underflow. @@ -50,8 +51,8 @@ maths_t::EFloatingPointErrorStatus jointLogMarginalLikelihood(const TModeVec& mo for (std::size_t i = 0u; i < modes.size(); ++i) { double modeLogLikelihood; - maths_t::EFloatingPointErrorStatus status = - modes[i].s_Prior->jointLogMarginalLikelihood(weightStyles, sample, weights, modeLogLikelihood); + maths_t::EFloatingPointErrorStatus status = modes[i].s_Prior->jointLogMarginalLikelihood( + weightStyles, sample, weights, modeLogLikelihood); if (status & maths_t::E_FpFailed) { // Logging handled at a lower level. return status; @@ -76,7 +77,8 @@ maths_t::EFloatingPointErrorStatus jointLogMarginalLikelihood(const TModeVec& mo return maths_t::E_FpOverflowed; } - LOG_TRACE(<< "modeLogLikelihoods = " << core::CContainerPrinter::print(modeLogLikelihoods)); + LOG_TRACE(<< "modeLogLikelihoods = " + << core::CContainerPrinter::print(modeLogLikelihoods)); double sampleLikelihood = 0.0; double Z = 0.0; @@ -91,7 +93,8 @@ maths_t::EFloatingPointErrorStatus jointLogMarginalLikelihood(const TModeVec& mo sampleLikelihood /= Z; result = (std::log(sampleLikelihood) + maxLogLikelihood); - LOG_TRACE(<< "sample = " << core::CContainerPrinter::print(sample) << ", maxLogLikelihood = " << maxLogLikelihood + LOG_TRACE(<< "sample = " << core::CContainerPrinter::print(sample) + << ", maxLogLikelihood = " << maxLogLikelihood << ", sampleLogLikelihood = " << result); } catch (const std::exception& e) { LOG_ERROR(<< "Failed to compute likelihood: " << e.what()); @@ -101,7 +104,9 @@ maths_t::EFloatingPointErrorStatus jointLogMarginalLikelihood(const TModeVec& mo return maths_t::E_FpNoErrors; } -void sampleMarginalLikelihood(const TModeVec& modes, std::size_t numberSamples, TDouble10Vec1Vec& samples) { +void sampleMarginalLikelihood(const TModeVec& modes, + std::size_t numberSamples, + TDouble10Vec1Vec& samples) { samples.clear(); if (modes.size() == 1) { @@ -146,14 +151,17 @@ void sampleMarginalLikelihood(const TModeVec& modes, std::size_t numberSamples, } void print(const TModeVec& modes, const std::string& separator, std::string& result) { - double Z = std::accumulate(modes.begin(), modes.end(), 0.0, [](double sum, const TMode& mode) { return sum + mode.weight(); }); + double Z = std::accumulate( + modes.begin(), modes.end(), 0.0, + [](double sum, const TMode& mode) { return sum + mode.weight(); }); std::string separator_ = separator + separator; result += ":"; for (const auto& mode : modes) { double weight = mode.weight() / Z; - result += core_t::LINE_ENDING + separator_ + " weight " + core::CStringUtils::typeToStringPretty(weight); + result += core_t::LINE_ENDING + separator_ + " weight " + + core::CStringUtils::typeToStringPretty(weight); mode.s_Prior->print(separator_, result); } } @@ -179,7 +187,8 @@ void modeMergeCallback(std::size_t dimension, std::size_t nr = 0; TDouble10Vec1Vec samples; - auto leftMode = std::find_if(modes.begin(), modes.end(), CSetTools::CIndexInSet(leftMergeIndex)); + auto leftMode = std::find_if(modes.begin(), modes.end(), + CSetTools::CIndexInSet(leftMergeIndex)); if (leftMode != modes.end()) { wl = leftMode->s_Prior->numberSamples(); n += wl; @@ -188,11 +197,13 @@ void modeMergeCallback(std::size_t dimension, nl = leftSamples.size(); samples.insert(samples.end(), leftSamples.begin(), leftSamples.end()); } else { - LOG_ERROR(<< "Couldn't find mode for " << leftMergeIndex << " in " << printIndices(modes) << ", other index = " << rightMergeIndex + LOG_ERROR(<< "Couldn't find mode for " << leftMergeIndex << " in " + << printIndices(modes) << ", other index = " << rightMergeIndex << ", merged index = " << targetIndex); } - auto rightMode = std::find_if(modes.begin(), modes.end(), CSetTools::CIndexInSet(rightMergeIndex)); + auto rightMode = std::find_if(modes.begin(), modes.end(), + CSetTools::CIndexInSet(rightMergeIndex)); if (rightMode != modes.end()) { wr = rightMode->s_Prior->numberSamples(); n += wr; @@ -201,7 +212,8 @@ void modeMergeCallback(std::size_t dimension, nr = rightSamples.size(); samples.insert(samples.end(), rightSamples.begin(), rightSamples.end()); } else { - LOG_ERROR(<< "Couldn't find mode for " << rightMergeIndex << " in " << printIndices(modes) << ", other index = " << leftMergeIndex + LOG_ERROR(<< "Couldn't find mode for " << rightMergeIndex << " in " + << printIndices(modes) << ", other index = " << leftMergeIndex << ", merged index = " << targetIndex); } @@ -243,7 +255,8 @@ void modeMergeCallback(std::size_t dimension, TSizeSet mergedIndices; mergedIndices.insert(leftMergeIndex); mergedIndices.insert(rightMergeIndex); - modes.erase(std::remove_if(modes.begin(), modes.end(), CSetTools::CIndexInSet(mergedIndices)), modes.end()); + modes.erase(std::remove_if(modes.begin(), modes.end(), CSetTools::CIndexInSet(mergedIndices)), + modes.end()); // Add the new mode. LOG_TRACE(<< "Creating mode with index " << targetIndex); diff --git a/lib/maths/CMultivariateMultimodalPriorFactory.cc b/lib/maths/CMultivariateMultimodalPriorFactory.cc index 91ff80eb47..68ebf674bf 100644 --- a/lib/maths/CMultivariateMultimodalPriorFactory.cc +++ b/lib/maths/CMultivariateMultimodalPriorFactory.cc @@ -19,7 +19,8 @@ namespace { template class CFactory { public: - static CMultivariateMultimodalPrior* make(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) { + static CMultivariateMultimodalPrior* + make(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) { return new CMultivariateMultimodalPrior(params, traverser); } @@ -30,42 +31,47 @@ class CFactory { double minimumClusterCount, double minimumCategoryCount, const CMultivariatePrior& seedPrior) { - boost::scoped_ptr>> clusterer(CXMeansOnlineFactory::make( - dataType, weightCalc, decayRate, minimumClusterFraction, minimumClusterCount, minimumCategoryCount)); + boost::scoped_ptr>> clusterer( + CXMeansOnlineFactory::make( + dataType, weightCalc, decayRate, minimumClusterFraction, + minimumClusterCount, minimumCategoryCount)); return new CMultivariateMultimodalPrior(dataType, *clusterer, seedPrior, decayRate); } }; } -#define CREATE_PRIOR(N) \ - switch (N) { \ - case 2: \ - ptr.reset(CFactory<2>::make(FACTORY_ARGS)); \ - break; \ - case 3: \ - ptr.reset(CFactory<3>::make(FACTORY_ARGS)); \ - break; \ - case 4: \ - ptr.reset(CFactory<4>::make(FACTORY_ARGS)); \ - break; \ - case 5: \ - ptr.reset(CFactory<5>::make(FACTORY_ARGS)); \ - break; \ - default: \ - LOG_ERROR(<< "Unsupported dimension " << N); \ - break; \ +#define CREATE_PRIOR(N) \ + switch (N) { \ + case 2: \ + ptr.reset(CFactory<2>::make(FACTORY_ARGS)); \ + break; \ + case 3: \ + ptr.reset(CFactory<3>::make(FACTORY_ARGS)); \ + break; \ + case 4: \ + ptr.reset(CFactory<4>::make(FACTORY_ARGS)); \ + break; \ + case 5: \ + ptr.reset(CFactory<5>::make(FACTORY_ARGS)); \ + break; \ + default: \ + LOG_ERROR(<< "Unsupported dimension " << N); \ + break; \ } -CMultivariateMultimodalPriorFactory::TPriorPtr CMultivariateMultimodalPriorFactory::nonInformative(std::size_t dimension, - maths_t::EDataType dataType, - double decayRate, - maths_t::EClusterWeightCalc weightCalc, - double minimumClusterFraction, - double minimumClusterCount, - double minimumCategoryCount, - const CMultivariatePrior& seedPrior) { +CMultivariateMultimodalPriorFactory::TPriorPtr +CMultivariateMultimodalPriorFactory::nonInformative(std::size_t dimension, + maths_t::EDataType dataType, + double decayRate, + maths_t::EClusterWeightCalc weightCalc, + double minimumClusterFraction, + double minimumClusterCount, + double minimumCategoryCount, + const CMultivariatePrior& seedPrior) { TPriorPtr ptr; -#define FACTORY_ARGS dataType, decayRate, weightCalc, minimumClusterFraction, minimumClusterCount, minimumCategoryCount, seedPrior +#define FACTORY_ARGS \ + dataType, decayRate, weightCalc, minimumClusterFraction, \ + minimumClusterCount, minimumCategoryCount, seedPrior CREATE_PRIOR(dimension) #undef FACTORY_ARGS return ptr; diff --git a/lib/maths/CMultivariateNormalConjugateFactory.cc b/lib/maths/CMultivariateNormalConjugateFactory.cc index d70752f9a5..1f1ebb00d7 100644 --- a/lib/maths/CMultivariateNormalConjugateFactory.cc +++ b/lib/maths/CMultivariateNormalConjugateFactory.cc @@ -16,37 +16,41 @@ namespace { template class CFactory { public: - static CMultivariateNormalConjugate* make(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) { + static CMultivariateNormalConjugate* + make(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) { return new CMultivariateNormalConjugate(params, traverser); } static CMultivariateNormalConjugate* make(maths_t::EDataType dataType, double decayRate) { - return CMultivariateNormalConjugate::nonInformativePrior(dataType, decayRate).clone(); + return CMultivariateNormalConjugate::nonInformativePrior(dataType, decayRate) + .clone(); } }; } -#define CREATE_PRIOR(N) \ - switch (N) { \ - case 2: \ - ptr.reset(CFactory<2>::make(FACTORY_ARGS)); \ - break; \ - case 3: \ - ptr.reset(CFactory<3>::make(FACTORY_ARGS)); \ - break; \ - case 4: \ - ptr.reset(CFactory<4>::make(FACTORY_ARGS)); \ - break; \ - case 5: \ - ptr.reset(CFactory<5>::make(FACTORY_ARGS)); \ - break; \ - default: \ - LOG_ERROR(<< "Unsupported dimension " << N); \ - break; \ +#define CREATE_PRIOR(N) \ + switch (N) { \ + case 2: \ + ptr.reset(CFactory<2>::make(FACTORY_ARGS)); \ + break; \ + case 3: \ + ptr.reset(CFactory<3>::make(FACTORY_ARGS)); \ + break; \ + case 4: \ + ptr.reset(CFactory<4>::make(FACTORY_ARGS)); \ + break; \ + case 5: \ + ptr.reset(CFactory<5>::make(FACTORY_ARGS)); \ + break; \ + default: \ + LOG_ERROR(<< "Unsupported dimension " << N); \ + break; \ } CMultivariateNormalConjugateFactory::TPriorPtr -CMultivariateNormalConjugateFactory::nonInformative(std::size_t dimension, maths_t::EDataType dataType, double decayRate) { +CMultivariateNormalConjugateFactory::nonInformative(std::size_t dimension, + maths_t::EDataType dataType, + double decayRate) { TPriorPtr ptr; #define FACTORY_ARGS dataType, decayRate CREATE_PRIOR(dimension); diff --git a/lib/maths/CMultivariateOneOfNPrior.cc b/lib/maths/CMultivariateOneOfNPrior.cc index 6d35b36f5e..e6d9be0665 100644 --- a/lib/maths/CMultivariateOneOfNPrior.cc +++ b/lib/maths/CMultivariateOneOfNPrior.cc @@ -119,10 +119,12 @@ bool modelAcceptRestoreTraverser(const SDistributionRestoreParams& params, const std::string& name = traverser.name(); RESTORE_SETUP_TEARDOWN(WEIGHT_TAG, /**/, - traverser.traverseSubLevel(boost::bind(&CModelWeight::acceptRestoreTraverser, &weight, _1)), + traverser.traverseSubLevel(boost::bind( + &CModelWeight::acceptRestoreTraverser, &weight, _1)), gotWeight = true) - RESTORE(PRIOR_TAG, - traverser.traverseSubLevel(boost::bind(CPriorStateSerialiser(), boost::cref(params), boost::ref(model), _1))) + RESTORE(PRIOR_TAG, traverser.traverseSubLevel(boost::bind( + CPriorStateSerialiser(), boost::cref(params), + boost::ref(model), _1))) } while (traverser.next()); if (!gotWeight) { @@ -148,8 +150,9 @@ bool acceptRestoreTraverser(const SDistributionRestoreParams& params, do { const std::string& name = traverser.name(); RESTORE_BUILT_IN(DECAY_RATE_TAG, decayRate) - RESTORE(MODEL_TAG, - traverser.traverseSubLevel(boost::bind(&modelAcceptRestoreTraverser, boost::cref(params), boost::ref(models), _1))) + RESTORE(MODEL_TAG, traverser.traverseSubLevel(boost::bind( + &modelAcceptRestoreTraverser, + boost::cref(params), boost::ref(models), _1))) RESTORE_BUILT_IN(NUMBER_SAMPLES_TAG, numberSamples) } while (traverser.next()); @@ -158,9 +161,13 @@ bool acceptRestoreTraverser(const SDistributionRestoreParams& params, //! Persist state for one of the models by passing information //! to the supplied inserter. -void modelAcceptPersistInserter(const CModelWeight& weight, const CMultivariatePrior& prior, core::CStatePersistInserter& inserter) { - inserter.insertLevel(WEIGHT_TAG, boost::bind(&CModelWeight::acceptPersistInserter, &weight, _1)); - inserter.insertLevel(PRIOR_TAG, boost::bind(CPriorStateSerialiser(), boost::cref(prior), _1)); +void modelAcceptPersistInserter(const CModelWeight& weight, + const CMultivariatePrior& prior, + core::CStatePersistInserter& inserter) { + inserter.insertLevel( + WEIGHT_TAG, boost::bind(&CModelWeight::acceptPersistInserter, &weight, _1)); + inserter.insertLevel(PRIOR_TAG, boost::bind(CPriorStateSerialiser(), + boost::cref(prior), _1)); } const double DERATE = 0.99999; @@ -210,12 +217,13 @@ CMultivariateOneOfNPrior::CMultivariateOneOfNPrior(std::size_t dimension, CMultivariateOneOfNPrior::CMultivariateOneOfNPrior(std::size_t dimension, const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) - : CMultivariatePrior(params.s_DataType, params.s_DecayRate), m_Dimension(dimension) { + : CMultivariatePrior(params.s_DataType, params.s_DecayRate), + m_Dimension(dimension) { double decayRate; double numberSamples; if (traverser.traverseSubLevel(boost::bind( - &acceptRestoreTraverser, boost::cref(params), boost::ref(m_Models), boost::ref(decayRate), boost::ref(numberSamples), _1)) == - false) { + &acceptRestoreTraverser, boost::cref(params), boost::ref(m_Models), + boost::ref(decayRate), boost::ref(numberSamples), _1)) == false) { return; } this->decayRate(decayRate); @@ -223,7 +231,8 @@ CMultivariateOneOfNPrior::CMultivariateOneOfNPrior(std::size_t dimension, } CMultivariateOneOfNPrior::CMultivariateOneOfNPrior(const CMultivariateOneOfNPrior& other) - : CMultivariatePrior(other.dataType(), other.decayRate()), m_Dimension(other.m_Dimension) { + : CMultivariatePrior(other.dataType(), other.decayRate()), + m_Dimension(other.m_Dimension) { // Clone all the models up front so we can implement strong exception safety. m_Models.reserve(other.m_Models.size()); for (const auto& model : other.m_Models) { @@ -233,7 +242,8 @@ CMultivariateOneOfNPrior::CMultivariateOneOfNPrior(const CMultivariateOneOfNPrio this->CMultivariatePrior::addSamples(other.numberSamples()); } -CMultivariateOneOfNPrior& CMultivariateOneOfNPrior::operator=(const CMultivariateOneOfNPrior& rhs) { +CMultivariateOneOfNPrior& CMultivariateOneOfNPrior:: +operator=(const CMultivariateOneOfNPrior& rhs) { if (this != &rhs) { CMultivariateOneOfNPrior tmp(rhs); this->swap(tmp); @@ -320,7 +330,9 @@ void CMultivariateOneOfNPrior::addSamples(const TWeightStyleVec& weightStyles, // Update the weights with the marginal likelihoods. double logLikelihood = 0.0; maths_t::EFloatingPointErrorStatus status = - use ? model.second->jointLogMarginalLikelihood(weightStyles, samples, weights, logLikelihood) : maths_t::E_FpOverflowed; + use ? model.second->jointLogMarginalLikelihood(weightStyles, samples, + weights, logLikelihood) + : maths_t::E_FpOverflowed; if (status & maths_t::E_FpFailed) { LOG_ERROR(<< "Failed to compute log-likelihood"); LOG_ERROR(<< "samples = " << core::CContainerPrinter::print(samples)); @@ -358,7 +370,9 @@ void CMultivariateOneOfNPrior::addSamples(const TWeightStyleVec& weightStyles, // affect model selection, particularly early on in the model // life-cycle. double l = largest(n); - double minLogLikelihood = maxLogLikelihood[0] - l * std::min(maxModelPenalty(this->numberSamples()), 100.0); + double minLogLikelihood = + maxLogLikelihood[0] - + l * std::min(maxModelPenalty(this->numberSamples()), 100.0); TMaxAccumulator maxLogWeight; for (std::size_t i = 0; i < logLikelihoods.size(); ++i) { @@ -407,8 +421,9 @@ void CMultivariateOneOfNPrior::propagateForwardsByTime(double time) { LOG_TRACE(<< "numberSamples = " << this->numberSamples()); } -CMultivariateOneOfNPrior::TUnivariatePriorPtrDoublePr CMultivariateOneOfNPrior::univariate(const TSize10Vec& marginalize, - const TSizeDoublePr10Vec& condition) const { +CMultivariateOneOfNPrior::TUnivariatePriorPtrDoublePr +CMultivariateOneOfNPrior::univariate(const TSize10Vec& marginalize, + const TSizeDoublePr10Vec& condition) const { COneOfNPrior::TDoublePriorPtrPrVec models; TDouble3Vec weights; TMaxAccumulator maxWeight; @@ -431,12 +446,14 @@ CMultivariateOneOfNPrior::TUnivariatePriorPtrDoublePr CMultivariateOneOfNPrior:: models[i].first *= std::exp(weights[i] - maxWeight[0]) / Z; } - return std::make_pair(TUnivariatePriorPtr(new COneOfNPrior(models, this->dataType(), this->decayRate())), + return std::make_pair(TUnivariatePriorPtr(new COneOfNPrior( + models, this->dataType(), this->decayRate())), maxWeight.count() > 0 ? maxWeight[0] : 0.0); } -CMultivariateOneOfNPrior::TPriorPtrDoublePr CMultivariateOneOfNPrior::bivariate(const TSize10Vec& marginalize, - const TSizeDoublePr10Vec& condition) const { +CMultivariateOneOfNPrior::TPriorPtrDoublePr +CMultivariateOneOfNPrior::bivariate(const TSize10Vec& marginalize, + const TSizeDoublePr10Vec& condition) const { if (m_Dimension == 2) { return TPriorPtrDoublePr(TPriorPtr(this->clone()), 0.0); } @@ -463,7 +480,8 @@ CMultivariateOneOfNPrior::TPriorPtrDoublePr CMultivariateOneOfNPrior::bivariate( models[i].first *= std::exp(weights[i] - maxWeight[0]) / Z; } - return std::make_pair(TPriorPtr(new CMultivariateOneOfNPrior(2, models, this->dataType(), this->decayRate())), + return std::make_pair(TPriorPtr(new CMultivariateOneOfNPrior( + 2, models, this->dataType(), this->decayRate())), maxWeight.count() > 0 ? maxWeight[0] : 0.0); } @@ -471,7 +489,8 @@ TDouble10VecDouble10VecPr CMultivariateOneOfNPrior::marginalLikelihoodSupport() // We define this is as the intersection of the component model // supports. - TDouble10VecDouble10VecPr result(TDouble10Vec(m_Dimension, MINUS_INF), TDouble10Vec(m_Dimension, INF)); + TDouble10VecDouble10VecPr result(TDouble10Vec(m_Dimension, MINUS_INF), + TDouble10Vec(m_Dimension, INF)); TDouble10VecDouble10VecPr modelSupport; for (const auto& model : m_Models) { if (model.second->participatesInModelSelection()) { @@ -502,7 +521,8 @@ TDouble10Vec CMultivariateOneOfNPrior::marginalLikelihoodMean() const { return result; } -TDouble10Vec CMultivariateOneOfNPrior::nearestMarginalLikelihoodMean(const TDouble10Vec& value) const { +TDouble10Vec +CMultivariateOneOfNPrior::nearestMarginalLikelihoodMean(const TDouble10Vec& value) const { // See marginalLikelihoodMean for discussion. TDouble10Vec result(m_Dimension, 0.0); @@ -557,7 +577,9 @@ TDouble10Vec CMultivariateOneOfNPrior::marginalLikelihoodVariances() const { return result; } -TDouble10Vec CMultivariateOneOfNPrior::marginalLikelihoodMode(const TWeightStyleVec& weightStyles, const TDouble10Vec4Vec& weights) const { +TDouble10Vec +CMultivariateOneOfNPrior::marginalLikelihoodMode(const TWeightStyleVec& weightStyles, + const TDouble10Vec4Vec& weights) const { // We approximate this as the weighted average of the component // model modes. @@ -572,7 +594,8 @@ TDouble10Vec CMultivariateOneOfNPrior::marginalLikelihoodMode(const TWeightStyle if (model.second->participatesInModelSelection()) { sample[0] = model.second->marginalLikelihoodMode(weightStyles, weights); double logLikelihood; - model.second->jointLogMarginalLikelihood(weightStyles, sample, sampleWeights, logLikelihood); + model.second->jointLogMarginalLikelihood(weightStyles, sample, + sampleWeights, logLikelihood); updateMean(sample[0], model.first * std::exp(logLikelihood), result, w); } } @@ -581,10 +604,11 @@ TDouble10Vec CMultivariateOneOfNPrior::marginalLikelihoodMode(const TWeightStyle return CTools::truncate(result, support.first, support.second); } -maths_t::EFloatingPointErrorStatus CMultivariateOneOfNPrior::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights, - double& result) const { +maths_t::EFloatingPointErrorStatus +CMultivariateOneOfNPrior::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble10Vec1Vec& samples, + const TDouble10Vec4Vec1Vec& weights, + double& result) const { result = 0.0; if (samples.empty()) { @@ -606,8 +630,8 @@ maths_t::EFloatingPointErrorStatus CMultivariateOneOfNPrior::jointLogMarginalLik for (const auto& model : m_Models) { if (model.second->participatesInModelSelection()) { double logLikelihood; - maths_t::EFloatingPointErrorStatus status = - model.second->jointLogMarginalLikelihood(weightStyles, samples, weights, logLikelihood); + maths_t::EFloatingPointErrorStatus status = model.second->jointLogMarginalLikelihood( + weightStyles, samples, weights, logLikelihood); if (status & maths_t::E_FpFailed) { return status; } @@ -647,7 +671,8 @@ maths_t::EFloatingPointErrorStatus CMultivariateOneOfNPrior::jointLogMarginalLik return status; } -void CMultivariateOneOfNPrior::sampleMarginalLikelihood(std::size_t numberSamples, TDouble10Vec1Vec& samples) const { +void CMultivariateOneOfNPrior::sampleMarginalLikelihood(std::size_t numberSamples, + TDouble10Vec1Vec& samples) const { samples.clear(); if (numberSamples == 0 || this->isNonInformative()) { @@ -666,7 +691,8 @@ void CMultivariateOneOfNPrior::sampleMarginalLikelihood(std::size_t numberSample CSampling::TSizeVec sampling; CSampling::weightedSample(numberSamples, weights, sampling); - LOG_TRACE(<< "weights = " << core::CContainerPrinter::print(weights) << ", sampling = " << core::CContainerPrinter::print(sampling)); + LOG_TRACE(<< "weights = " << core::CContainerPrinter::print(weights) + << ", sampling = " << core::CContainerPrinter::print(sampling)); if (sampling.size() != m_Models.size()) { LOG_ERROR(<< "Failed to sample marginal likelihood"); @@ -707,14 +733,16 @@ void CMultivariateOneOfNPrior::print(const std::string& separator, std::string& } result += ':'; - result += core_t::LINE_ENDING + separator + " # samples " + core::CStringUtils::typeToStringPretty(this->numberSamples()); + result += core_t::LINE_ENDING + separator + " # samples " + + core::CStringUtils::typeToStringPretty(this->numberSamples()); std::string separator_ = separator + separator; for (const auto& model : m_Models) { double weight = model.first; if (weight >= MINIMUM_SIGNIFICANT_WEIGHT) { - result += core_t::LINE_ENDING + separator_ + " weight " + core::CStringUtils::typeToStringPretty(weight); + result += core_t::LINE_ENDING + separator_ + " weight " + + core::CStringUtils::typeToStringPretty(weight); model.second->print(separator_, result); } } @@ -744,10 +772,13 @@ std::string CMultivariateOneOfNPrior::persistenceTag() const { void CMultivariateOneOfNPrior::acceptPersistInserter(core::CStatePersistInserter& inserter) const { for (const auto& model : m_Models) { - inserter.insertLevel(MODEL_TAG, boost::bind(&modelAcceptPersistInserter, boost::cref(model.first), boost::cref(*model.second), _1)); + inserter.insertLevel(MODEL_TAG, boost::bind(&modelAcceptPersistInserter, + boost::cref(model.first), + boost::cref(*model.second), _1)); } inserter.insertValue(DECAY_RATE_TAG, this->decayRate(), core::CIEEE754::E_SinglePrecision); - inserter.insertValue(NUMBER_SAMPLES_TAG, this->numberSamples(), core::CIEEE754::E_SinglePrecision); + inserter.insertValue(NUMBER_SAMPLES_TAG, this->numberSamples(), + core::CIEEE754::E_SinglePrecision); } CMultivariateOneOfNPrior::TDouble3Vec CMultivariateOneOfNPrior::weights() const { @@ -805,6 +836,7 @@ std::string CMultivariateOneOfNPrior::debugWeights() const { } const double CMultivariateOneOfNPrior::MAXIMUM_RELATIVE_ERROR = 1e-3; -const double CMultivariateOneOfNPrior::LOG_MAXIMUM_RELATIVE_ERROR = std::log(MAXIMUM_RELATIVE_ERROR); +const double CMultivariateOneOfNPrior::LOG_MAXIMUM_RELATIVE_ERROR = + std::log(MAXIMUM_RELATIVE_ERROR); } } diff --git a/lib/maths/CMultivariateOneOfNPriorFactory.cc b/lib/maths/CMultivariateOneOfNPriorFactory.cc index f2b6f475ec..0652879b54 100644 --- a/lib/maths/CMultivariateOneOfNPriorFactory.cc +++ b/lib/maths/CMultivariateOneOfNPriorFactory.cc @@ -11,10 +11,11 @@ namespace ml { namespace maths { -CMultivariateOneOfNPriorFactory::TPriorPtr CMultivariateOneOfNPriorFactory::nonInformative(std::size_t dimension, - maths_t::EDataType dataType, - double decayRate, - const TPriorPtrVec& models) { +CMultivariateOneOfNPriorFactory::TPriorPtr +CMultivariateOneOfNPriorFactory::nonInformative(std::size_t dimension, + maths_t::EDataType dataType, + double decayRate, + const TPriorPtrVec& models) { return TPriorPtr(new CMultivariateOneOfNPrior(dimension, models, dataType, decayRate)); } diff --git a/lib/maths/CMultivariatePrior.cc b/lib/maths/CMultivariatePrior.cc index 62ef3cad88..4c9a614744 100644 --- a/lib/maths/CMultivariatePrior.cc +++ b/lib/maths/CMultivariatePrior.cc @@ -34,7 +34,9 @@ void setDecayRate(double value, double fallback, double& result) { } } -CMultivariatePrior::CMultivariatePrior() : m_Forecasting(false), m_DataType(maths_t::E_DiscreteData), m_DecayRate(0.0), m_NumberSamples(0) { +CMultivariatePrior::CMultivariatePrior() + : m_Forecasting(false), m_DataType(maths_t::E_DiscreteData), + m_DecayRate(0.0), m_NumberSamples(0) { } CMultivariatePrior::CMultivariatePrior(maths_t::EDataType dataType, double decayRate) @@ -93,16 +95,20 @@ void CMultivariatePrior::addSamples(const TWeightStyleVec& weightStyles, n[j] += wi[j]; } } - } catch (const std::exception& e) { LOG_ERROR(<< "Failed to extract sample counts: " << e.what()); } + } catch (const std::exception& e) { + LOG_ERROR(<< "Failed to extract sample counts: " << e.what()); + } this->addSamples(smallest(n)); } -CMultivariatePrior::TDouble10Vec CMultivariatePrior::nearestMarginalLikelihoodMean(const TDouble10Vec& /*value*/) const { +CMultivariatePrior::TDouble10Vec +CMultivariatePrior::nearestMarginalLikelihoodMean(const TDouble10Vec& /*value*/) const { return this->marginalLikelihoodMean(); } -CMultivariatePrior::TDouble10Vec1Vec CMultivariatePrior::marginalLikelihoodModes(const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec& weights) const { +CMultivariatePrior::TDouble10Vec1Vec +CMultivariatePrior::marginalLikelihoodModes(const TWeightStyleVec& weightStyles, + const TDouble10Vec4Vec& weights) const { return TDouble10Vec1Vec{this->marginalLikelihoodMode(weightStyles, weights)}; } @@ -136,15 +142,18 @@ bool CMultivariatePrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCal using TDouble1Vec = core::CSmallVector; using TDouble4Vec = core::CSmallVector; using TDouble4Vec1Vec = core::CSmallVector; - using TJointProbabilityOfLessLikelySamplesVec = core::CSmallVector; + using TJointProbabilityOfLessLikelySamplesVec = + core::CSmallVector; static const TSize10Vec NO_MARGINS; static const TSizeDoublePr10Vec NO_CONDITIONS; - TJointProbabilityOfLessLikelySamplesVec lowerBounds_[2] = {TJointProbabilityOfLessLikelySamplesVec(coordinates.size()), - TJointProbabilityOfLessLikelySamplesVec(coordinates.size())}; - TJointProbabilityOfLessLikelySamplesVec upperBounds_[2] = {TJointProbabilityOfLessLikelySamplesVec(coordinates.size()), - TJointProbabilityOfLessLikelySamplesVec(coordinates.size())}; + TJointProbabilityOfLessLikelySamplesVec lowerBounds_[2] = { + TJointProbabilityOfLessLikelySamplesVec(coordinates.size()), + TJointProbabilityOfLessLikelySamplesVec(coordinates.size())}; + TJointProbabilityOfLessLikelySamplesVec upperBounds_[2] = { + TJointProbabilityOfLessLikelySamplesVec(coordinates.size()), + TJointProbabilityOfLessLikelySamplesVec(coordinates.size())}; std::size_t d = this->dimension(); TSize10Vec marginalize(d - 1); @@ -156,8 +165,7 @@ bool CMultivariatePrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCal std::size_t coordinate = coordinates[i]; std::copy_if(boost::make_counting_iterator(std::size_t(0)), - boost::make_counting_iterator(d), - marginalize.begin(), + boost::make_counting_iterator(d), marginalize.begin(), [coordinate](std::size_t j) { return j != coordinate; }); TUnivariatePriorPtr margin(this->univariate(marginalize, NO_CONDITIONS).first); if (!margin) { @@ -179,18 +187,23 @@ bool CMultivariatePrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCal double lb[2], ub[2]; maths_t::ETail tc[2]; - if (!margin->probabilityOfLessLikelySamples(calculation, weightStyles, sc, wc, lb[0], ub[0], tc[0])) { + if (!margin->probabilityOfLessLikelySamples( + calculation, weightStyles, sc, wc, lb[0], ub[0], tc[0])) { LOG_ERROR(<< "Failed to compute probability for coordinate " << coordinate); return false; } - LOG_TRACE(<< "lb(" << coordinate << ") = " << lb[0] << ", ub(" << coordinate << ") = " << ub[0]); + LOG_TRACE(<< "lb(" << coordinate << ") = " << lb[0] << ", ub(" + << coordinate << ") = " << ub[0]); - TUnivariatePriorPtr conditional(this->univariate(NO_MARGINS, condition).first); - if (!conditional->probabilityOfLessLikelySamples(calculation, weightStyles, sc, wc, lb[1], ub[1], tc[1])) { + TUnivariatePriorPtr conditional( + this->univariate(NO_MARGINS, condition).first); + if (!conditional->probabilityOfLessLikelySamples( + calculation, weightStyles, sc, wc, lb[1], ub[1], tc[1])) { LOG_ERROR(<< "Failed to compute probability for coordinate " << coordinate); return false; } - LOG_TRACE(<< "lb(" << coordinate << "|.) = " << lb[1] << ", ub(" << coordinate << "|.) = " << ub[1]); + LOG_TRACE(<< "lb(" << coordinate << "|.) = " << lb[1] << ", ub(" + << coordinate << "|.) = " << ub[1]); lowerBounds_[0][i].add(lb[0]); upperBounds_[0][i].add(ub[0]); @@ -201,8 +214,10 @@ bool CMultivariatePrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCal } for (std::size_t i = 0; i < coordinates.size(); ++i) { - if (!lowerBounds_[0][i].calculate(lowerBounds[0][i]) || !upperBounds_[0][i].calculate(upperBounds[0][i]) || - !lowerBounds_[1][i].calculate(lowerBounds[1][i]) || !upperBounds_[1][i].calculate(upperBounds[1][i])) { + if (!lowerBounds_[0][i].calculate(lowerBounds[0][i]) || + !upperBounds_[0][i].calculate(upperBounds[0][i]) || + !lowerBounds_[1][i].calculate(lowerBounds[1][i]) || + !upperBounds_[1][i].calculate(upperBounds[1][i])) { LOG_ERROR(<< "Failed to compute probability for coordinate " << coordinates[i]); return false; } @@ -238,7 +253,8 @@ bool CMultivariatePrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCal for (std::size_t i = 0u; i < samples.size(); ++i) { sample[0] = samples[i]; weight[0] = weights[i]; - if (!this->probabilityOfLessLikelySamples(calculation, weightStyles, sample, weight, coordinates, lbs, ubs, tail)) { + if (!this->probabilityOfLessLikelySamples(calculation, weightStyles, sample, weight, + coordinates, lbs, ubs, tail)) { return false; } @@ -251,18 +267,20 @@ bool CMultivariatePrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCal } double lb[2], ub[2]; - if (!lowerBound_[0].calculate(lb[0]) || !upperBound_[0].calculate(ub[0]) || !lowerBound_[1].calculate(lb[1]) || - !upperBound_[1].calculate(ub[1])) { + if (!lowerBound_[0].calculate(lb[0]) || !upperBound_[0].calculate(ub[0]) || + !lowerBound_[1].calculate(lb[1]) || !upperBound_[1].calculate(ub[1])) { return false; } - LOG_TRACE(<< "lb = " << core::CContainerPrinter::print(lb) << ", ub = " << core::CContainerPrinter::print(ub)); + LOG_TRACE(<< "lb = " << core::CContainerPrinter::print(lb) + << ", ub = " << core::CContainerPrinter::print(ub)); lowerBound = std::sqrt(lb[0] * lb[1]); upperBound = std::sqrt(ub[0] * ub[1]); return true; } -std::string CMultivariatePrior::printMarginalLikelihoodFunction(std::size_t x, std::size_t y) const { +std::string CMultivariatePrior::printMarginalLikelihoodFunction(std::size_t x, + std::size_t y) const { // We'll plot the marginal likelihood function over a range where // most of the mass is, i.e. the 99% confidence interval. @@ -294,11 +312,13 @@ std::string CMultivariatePrior::printMarginalLikelihoodFunction(std::size_t x, s boost::shared_ptr xMargin(this->univariate(xm, TSizeDoublePr10Vec()).first); if (x == y) { - return xMargin != nullptr ? xMargin->printMarginalLikelihoodFunction() : std::string(); + return xMargin != nullptr ? xMargin->printMarginalLikelihoodFunction() + : std::string(); } boost::shared_ptr yMargin(this->univariate(ym, TSizeDoublePr10Vec()).first); - boost::shared_ptr xyMargin(this->bivariate(xym, TSizeDoublePr10Vec()).first); + boost::shared_ptr xyMargin( + this->bivariate(xym, TSizeDoublePr10Vec()).first); TDoubleDoublePr xRange = xMargin->marginalLikelihoodConfidenceInterval(RANGE); TDoubleDoublePr yRange = yMargin->marginalLikelihoodConfidenceInterval(RANGE); @@ -382,9 +402,11 @@ void CMultivariatePrior::addSamples(double n) { m_NumberSamples += n; } -bool CMultivariatePrior::check(const TDouble10Vec1Vec& samples, const TDouble10Vec4Vec1Vec& weights) const { +bool CMultivariatePrior::check(const TDouble10Vec1Vec& samples, + const TDouble10Vec4Vec1Vec& weights) const { if (samples.size() != weights.size()) { - LOG_ERROR(<< "Mismatch in samples '" << samples << "' and weights '" << weights << "'"); + LOG_ERROR(<< "Mismatch in samples '" << samples << "' and weights '" + << weights << "'"); return false; } for (std::size_t i = 0u; i < samples.size(); ++i) { @@ -402,14 +424,18 @@ bool CMultivariatePrior::check(const TDouble10Vec1Vec& samples, const TDouble10V return true; } -bool CMultivariatePrior::check(const TSize10Vec& marginalize, const TSizeDoublePr10Vec& condition) const { - static const auto FIRST = [](const TSizeDoublePr& pair) { return pair.first; }; +bool CMultivariatePrior::check(const TSize10Vec& marginalize, + const TSizeDoublePr10Vec& condition) const { + static const auto FIRST = [](const TSizeDoublePr& pair) { + return pair.first; + }; std::size_t d = this->dimension(); - if ((marginalize.size() > 0 && marginalize.back() >= d) || (condition.size() > 0 && condition.back().first >= d) || - CSetTools::setIntersectSize(marginalize.begin(), - marginalize.end(), - boost::make_transform_iterator(condition.begin(), FIRST), - boost::make_transform_iterator(condition.end(), FIRST)) != 0) { + if ((marginalize.size() > 0 && marginalize.back() >= d) || + (condition.size() > 0 && condition.back().first >= d) || + CSetTools::setIntersectSize( + marginalize.begin(), marginalize.end(), + boost::make_transform_iterator(condition.begin(), FIRST), + boost::make_transform_iterator(condition.end(), FIRST)) != 0) { LOG_ERROR(<< "Invalid variables for computing univariate distribution: " << "marginalize '" << marginalize << "'" << ", condition '" << condition << "'"); @@ -418,7 +444,9 @@ bool CMultivariatePrior::check(const TSize10Vec& marginalize, const TSizeDoubleP return true; } -void CMultivariatePrior::remainingVariables(const TSize10Vec& marginalize, const TSizeDoublePr10Vec& condition, TSize10Vec& result) const { +void CMultivariatePrior::remainingVariables(const TSize10Vec& marginalize, + const TSizeDoublePr10Vec& condition, + TSize10Vec& result) const { std::size_t d = this->dimension(); result.reserve(d - marginalize.size() - condition.size()); for (std::size_t i = 0u, j = 0u, k = 0u; k < d; ++k) { diff --git a/lib/maths/CNaiveBayes.cc b/lib/maths/CNaiveBayes.cc index cd71fbb846..7d83fe6f18 100644 --- a/lib/maths/CNaiveBayes.cc +++ b/lib/maths/CNaiveBayes.cc @@ -36,7 +36,8 @@ const std::string COUNT_TAG{"e"}; const std::string CONDITIONAL_DENSITY_FROM_PRIOR_TAG{"f"}; } -CNaiveBayesFeatureDensityFromPrior::CNaiveBayesFeatureDensityFromPrior(const CPrior& prior) : m_Prior(prior.clone()) { +CNaiveBayesFeatureDensityFromPrior::CNaiveBayesFeatureDensityFromPrior(const CPrior& prior) + : m_Prior(prior.clone()) { } void CNaiveBayesFeatureDensityFromPrior::add(const TDouble1Vec& x) { @@ -47,23 +48,27 @@ CNaiveBayesFeatureDensityFromPrior* CNaiveBayesFeatureDensityFromPrior::clone() return new CNaiveBayesFeatureDensityFromPrior(*m_Prior); } -bool CNaiveBayesFeatureDensityFromPrior::acceptRestoreTraverser(const SDistributionRestoreParams& params, - core::CStateRestoreTraverser& traverser) { +bool CNaiveBayesFeatureDensityFromPrior::acceptRestoreTraverser( + const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser) { do { const std::string& name{traverser.name()}; - RESTORE(PRIOR_TAG, - traverser.traverseSubLevel(boost::bind(CPriorStateSerialiser(), boost::cref(params), boost::ref(m_Prior), _1))); + RESTORE(PRIOR_TAG, traverser.traverseSubLevel(boost::bind( + CPriorStateSerialiser(), boost::cref(params), + boost::ref(m_Prior), _1))); } while (traverser.next()); return true; } void CNaiveBayesFeatureDensityFromPrior::acceptPersistInserter(core::CStatePersistInserter& inserter) const { - inserter.insertLevel(PRIOR_TAG, boost::bind(CPriorStateSerialiser(), boost::cref(*m_Prior), _1)); + inserter.insertLevel(PRIOR_TAG, boost::bind(CPriorStateSerialiser(), + boost::cref(*m_Prior), _1)); } double CNaiveBayesFeatureDensityFromPrior::logValue(const TDouble1Vec& x) const { double result; - if (m_Prior->jointLogMarginalLikelihood(CConstantWeights::COUNT, x, CConstantWeights::SINGLE_UNIT, result) != maths_t::E_FpNoErrors) { + if (m_Prior->jointLogMarginalLikelihood(CConstantWeights::COUNT, x, CConstantWeights::SINGLE_UNIT, + result) != maths_t::E_FpNoErrors) { LOG_ERROR("Bad density value at " << x << " for " << m_Prior->print()); return boost::numeric::bounds::lowest(); } @@ -73,7 +78,8 @@ double CNaiveBayesFeatureDensityFromPrior::logValue(const TDouble1Vec& x) const double CNaiveBayesFeatureDensityFromPrior::logMaximumValue() const { double result; if (m_Prior->jointLogMarginalLikelihood( - CConstantWeights::COUNT, {m_Prior->marginalLikelihoodMode()}, CConstantWeights::SINGLE_UNIT, result) != maths_t::E_FpNoErrors) { + CConstantWeights::COUNT, {m_Prior->marginalLikelihoodMode()}, + CConstantWeights::SINGLE_UNIT, result) != maths_t::E_FpNoErrors) { LOG_ERROR("Bad density value for " << m_Prior->print()); return boost::numeric::bounds::lowest(); } @@ -110,30 +116,32 @@ std::string CNaiveBayesFeatureDensityFromPrior::print() const { return result; } -CNaiveBayes::CNaiveBayes(const CNaiveBayesFeatureDensity& exemplar, double decayRate, TOptionalDouble minMaxLogLikelihoodToUseFeature) +CNaiveBayes::CNaiveBayes(const CNaiveBayesFeatureDensity& exemplar, + double decayRate, + TOptionalDouble minMaxLogLikelihoodToUseFeature) : m_MinMaxLogLikelihoodToUseFeature{minMaxLogLikelihoodToUseFeature}, - m_DecayRate{decayRate}, - m_Exemplar{exemplar.clone()}, - m_ClassConditionalDensities{2} { + m_DecayRate{decayRate}, m_Exemplar{exemplar.clone()}, m_ClassConditionalDensities{2} { } -CNaiveBayes::CNaiveBayes(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) +CNaiveBayes::CNaiveBayes(const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser) : m_DecayRate{params.s_DecayRate}, m_ClassConditionalDensities{2} { - traverser.traverseSubLevel(boost::bind(&CNaiveBayes::acceptRestoreTraverser, this, boost::cref(params), _1)); + traverser.traverseSubLevel(boost::bind(&CNaiveBayes::acceptRestoreTraverser, + this, boost::cref(params), _1)); } -bool CNaiveBayes::acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) { +bool CNaiveBayes::acceptRestoreTraverser(const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser) { std::size_t label; do { const std::string& name{traverser.name()}; RESTORE_BUILT_IN(CLASS_LABEL_TAG, label) - RESTORE_SETUP_TEARDOWN( - CLASS_MODEL_TAG, - SClass class_, - traverser.traverseSubLevel(boost::bind(&SClass::acceptRestoreTraverser, boost::ref(class_), boost::cref(params), _1)), - m_ClassConditionalDensities.emplace(label, class_)) - RESTORE_SETUP_TEARDOWN(MIN_MAX_LOG_LIKELIHOOD_TO_USE_FEATURE_TAG, - double value, + RESTORE_SETUP_TEARDOWN(CLASS_MODEL_TAG, SClass class_, + traverser.traverseSubLevel(boost::bind( + &SClass::acceptRestoreTraverser, + boost::ref(class_), boost::cref(params), _1)), + m_ClassConditionalDensities.emplace(label, class_)) + RESTORE_SETUP_TEARDOWN(MIN_MAX_LOG_LIKELIHOOD_TO_USE_FEATURE_TAG, double value, core::CStringUtils::stringToType(traverser.value(), value), m_MinMaxLogLikelihoodToUseFeature.reset(value)) } while (traverser.next()); @@ -145,17 +153,22 @@ void CNaiveBayes::acceptPersistInserter(core::CStatePersistInserter& inserter) c using TSizeClassUMapCItrVec = std::vector; TSizeClassUMapCItrVec classes; classes.reserve(m_ClassConditionalDensities.size()); - for (auto i = m_ClassConditionalDensities.begin(); i != m_ClassConditionalDensities.end(); ++i) { + for (auto i = m_ClassConditionalDensities.begin(); + i != m_ClassConditionalDensities.end(); ++i) { classes.push_back(i); } - std::sort(classes.begin(), classes.end(), core::CFunctional::SDereference()); + std::sort(classes.begin(), classes.end(), + core::CFunctional::SDereference()); for (const auto& class_ : classes) { inserter.insertValue(CLASS_LABEL_TAG, class_->first); - inserter.insertLevel(CLASS_MODEL_TAG, boost::bind(&SClass::acceptPersistInserter, boost::ref(class_->second), _1)); + inserter.insertLevel(CLASS_MODEL_TAG, + boost::bind(&SClass::acceptPersistInserter, + boost::ref(class_->second), _1)); } if (m_MinMaxLogLikelihoodToUseFeature) { - inserter.insertValue( - MIN_MAX_LOG_LIKELIHOOD_TO_USE_FEATURE_TAG, *m_MinMaxLogLikelihoodToUseFeature, core::CIEEE754::E_SinglePrecision); + inserter.insertValue(MIN_MAX_LOG_LIKELIHOOD_TO_USE_FEATURE_TAG, + *m_MinMaxLogLikelihoodToUseFeature, + core::CIEEE754::E_SinglePrecision); } } @@ -186,7 +199,8 @@ void CNaiveBayes::addTrainingDataPoint(std::size_t label, const TDouble1VecVec& if (class_.s_ConditionalDensities.empty()) { class_.s_ConditionalDensities.reserve(x.size()); std::generate_n( - std::back_inserter(class_.s_ConditionalDensities), x.size(), [this]() { return TFeatureDensityPtr{m_Exemplar->clone()}; }); + std::back_inserter(class_.s_ConditionalDensities), x.size(), + [this]() { return TFeatureDensityPtr{m_Exemplar->clone()}; }); } bool updateCount{false}; @@ -222,7 +236,8 @@ void CNaiveBayes::propagateForwardsByTime(double time) { } } -CNaiveBayes::TDoubleSizePrVec CNaiveBayes::highestClassProbabilities(std::size_t n, const TDouble1VecVec& x) const { +CNaiveBayes::TDoubleSizePrVec +CNaiveBayes::highestClassProbabilities(std::size_t n, const TDouble1VecVec& x) const { TDoubleSizePrVec p(this->classProbabilities(x)); n = std::min(n, p.size()); std::sort(p.begin(), p.begin() + n, std::greater()); @@ -231,7 +246,9 @@ CNaiveBayes::TDoubleSizePrVec CNaiveBayes::highestClassProbabilities(std::size_t double CNaiveBayes::classProbability(std::size_t label, const TDouble1VecVec& x) const { TDoubleSizePrVec p(this->classProbabilities(x)); - auto i = std::find_if(p.begin(), p.end(), [label](const TDoubleSizePr& p_) { return p_.second == label; }); + auto i = std::find_if(p.begin(), p.end(), [label](const TDoubleSizePr& p_) { + return p_.second == label; + }); return i == p.end() ? 0.0 : i->first; } @@ -268,7 +285,9 @@ CNaiveBayes::TDoubleSizePrVec CNaiveBayes::classProbabilities(const TDouble1VecV double weight{1.0}; if (m_MinMaxLogLikelihoodToUseFeature) { weight = CTools::logisticFunction( - (maxLogLikelihood[0] - *m_MinMaxLogLikelihoodToUseFeature) / std::fabs(*m_MinMaxLogLikelihoodToUseFeature), 0.1); + (maxLogLikelihood[0] - *m_MinMaxLogLikelihoodToUseFeature) / + std::fabs(*m_MinMaxLogLikelihoodToUseFeature), + 0.1); } for (std::size_t j = 0u; j < logLikelihoods.size(); ++j) { p[j].first += weight * logLikelihoods[j]; @@ -291,11 +310,13 @@ CNaiveBayes::TDoubleSizePrVec CNaiveBayes::classProbabilities(const TDouble1VecV void CNaiveBayes::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { core::CMemoryDebug::dynamicSize("m_Exemplar", m_Exemplar, mem); - core::CMemoryDebug::dynamicSize("m_ClassConditionalDensities", m_ClassConditionalDensities, mem); + core::CMemoryDebug::dynamicSize("m_ClassConditionalDensities", + m_ClassConditionalDensities, mem); } std::size_t CNaiveBayes::memoryUsage() const { - return core::CMemory::dynamicSize(m_Exemplar) + core::CMemory::dynamicSize(m_ClassConditionalDensities); + return core::CMemory::dynamicSize(m_Exemplar) + + core::CMemory::dynamicSize(m_ClassConditionalDensities); } uint64_t CNaiveBayes::checksum(uint64_t seed) const { @@ -317,7 +338,8 @@ std::string CNaiveBayes::print() const { bool CNaiveBayes::validate(const TDouble1VecVec& x) const { auto class_ = m_ClassConditionalDensities.begin(); - if (class_ != m_ClassConditionalDensities.end() && class_->second.s_ConditionalDensities.size() > 0 && + if (class_ != m_ClassConditionalDensities.end() && + class_->second.s_ConditionalDensities.size() > 0 && class_->second.s_ConditionalDensities.size() != x.size()) { LOG_ERROR("Unexpected feature vector: " << core::CContainerPrinter::print(x)); return false; @@ -325,14 +347,16 @@ bool CNaiveBayes::validate(const TDouble1VecVec& x) const { return true; } -bool CNaiveBayes::SClass::acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) { +bool CNaiveBayes::SClass::acceptRestoreTraverser(const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser) { do { const std::string& name{traverser.name()}; RESTORE_BUILT_IN(COUNT_TAG, s_Count) RESTORE_SETUP_TEARDOWN(CONDITIONAL_DENSITY_FROM_PRIOR_TAG, CNaiveBayesFeatureDensityFromPrior tmp, traverser.traverseSubLevel(boost::bind( - &CNaiveBayesFeatureDensityFromPrior::acceptRestoreTraverser, boost::ref(tmp), boost::cref(params), _1)), + &CNaiveBayesFeatureDensityFromPrior::acceptRestoreTraverser, + boost::ref(tmp), boost::cref(params), _1)), s_ConditionalDensities.emplace_back(tmp.clone())) // Add other implementations' restore code here. } while (traverser.next()); @@ -344,7 +368,8 @@ void CNaiveBayes::SClass::acceptPersistInserter(core::CStatePersistInserter& ins for (const auto& density : s_ConditionalDensities) { if (dynamic_cast(density.get())) { inserter.insertLevel(CONDITIONAL_DENSITY_FROM_PRIOR_TAG, - boost::bind(&CNaiveBayesFeatureDensity::acceptPersistInserter, density.get(), _1)); + boost::bind(&CNaiveBayesFeatureDensity::acceptPersistInserter, + density.get(), _1)); continue; } // Add other implementations' persist code here. diff --git a/lib/maths/CNaturalBreaksClassifier.cc b/lib/maths/CNaturalBreaksClassifier.cc index 9f4366b282..436faf5709 100644 --- a/lib/maths/CNaturalBreaksClassifier.cc +++ b/lib/maths/CNaturalBreaksClassifier.cc @@ -36,7 +36,8 @@ namespace { //! Orders two tuples by their mean. struct SMeanLess { - bool operator()(const CNaturalBreaksClassifier::TTuple& lhs, const CNaturalBreaksClassifier::TTuple& rhs) const { + bool operator()(const CNaturalBreaksClassifier::TTuple& lhs, + const CNaturalBreaksClassifier::TTuple& rhs) const { return CBasicStatistics::mean(lhs) < CBasicStatistics::mean(rhs); } }; @@ -46,7 +47,9 @@ class CCountLessThan { public: CCountLessThan(double count) : m_Count(count) {} - bool operator()(const CNaturalBreaksClassifier::TTuple& tuple) const { return CBasicStatistics::count(tuple) < m_Count; } + bool operator()(const CNaturalBreaksClassifier::TTuple& tuple) const { + return CBasicStatistics::count(tuple) < m_Count; + } private: double m_Count; @@ -59,13 +62,17 @@ const std::string DECAY_RATE_TAG("d"); const std::string EMPTY_STRING; } -CNaturalBreaksClassifier::CNaturalBreaksClassifier(std::size_t space, double decayRate, double minimumCategoryCount) - : m_Space(std::max(space, MINIMUM_SPACE)), m_DecayRate(decayRate), m_MinimumCategoryCount(minimumCategoryCount) { +CNaturalBreaksClassifier::CNaturalBreaksClassifier(std::size_t space, + double decayRate, + double minimumCategoryCount) + : m_Space(std::max(space, MINIMUM_SPACE)), m_DecayRate(decayRate), + m_MinimumCategoryCount(minimumCategoryCount) { m_Categories.reserve(m_Space + MAXIMUM_BUFFER_SIZE + 1u); m_PointsBuffer.reserve(MAXIMUM_BUFFER_SIZE); } -bool CNaturalBreaksClassifier::acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) { +bool CNaturalBreaksClassifier::acceptRestoreTraverser(const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser) { m_DecayRate = params.s_DecayRate; m_MinimumCategoryCount = params.s_MinimumCategoryCount; @@ -102,14 +109,17 @@ double CNaturalBreaksClassifier::percentile(double p) const { double count = CBasicStatistics::count(m_Categories[i]); if (percentileCount < count) { double mean = CBasicStatistics::mean(m_Categories[i]); - double deviation = std::sqrt(CBasicStatistics::maximumLikelihoodVariance(m_Categories[i])); + double deviation = std::sqrt( + CBasicStatistics::maximumLikelihoodVariance(m_Categories[i])); if (deviation == 0.0) { return mean; } boost::math::normal_distribution<> normal(mean, deviation); double q = (count - percentileCount) / count; - double x = q > 0.0 && q < 1.0 ? boost::math::quantile(normal, q) : (2.0 * q - 1.0) * boost::numeric::bounds::highest(); + double x = q > 0.0 && q < 1.0 + ? boost::math::quantile(normal, q) + : (2.0 * q - 1.0) * boost::numeric::bounds::highest(); LOG_TRACE(<< "N(" << mean << "," << deviation << ")" << ", q = " << q << ", x = " << x) @@ -119,7 +129,8 @@ double CNaturalBreaksClassifier::percentile(double p) const { // details. double n1 = std::sqrt(CBasicStatistics::count(m_Categories[i - 1])); double m1 = CBasicStatistics::mean(m_Categories[i - 1]); - double d1 = std::sqrt(CBasicStatistics::maximumLikelihoodVariance(m_Categories[i - 1])); + double d1 = std::sqrt( + CBasicStatistics::maximumLikelihoodVariance(m_Categories[i - 1])); double n2 = count; double m2 = mean; double d2 = deviation; @@ -138,7 +149,8 @@ double CNaturalBreaksClassifier::percentile(double p) const { double d1 = deviation; double n2 = std::sqrt(CBasicStatistics::count(m_Categories[i + 1])); double m2 = CBasicStatistics::mean(m_Categories[i + 1]); - double d2 = std::sqrt(CBasicStatistics::maximumLikelihoodVariance(m_Categories[i + 1])); + double d2 = std::sqrt( + CBasicStatistics::maximumLikelihoodVariance(m_Categories[i + 1])); double w1 = std::sqrt(n2 * d2); double w2 = std::sqrt(n1 * d1); double xr = (w1 * m1 + w2 * m2) / (w1 + w2); @@ -182,7 +194,8 @@ bool CNaturalBreaksClassifier::split(std::size_t n, std::size_t p, TClassifierVe TTupleVec category(1); for (std::size_t i = 0u; i < m_Categories.size(); ++i) { category[0] = m_Categories[i]; - result.push_back(CNaturalBreaksClassifier(m_Space, m_DecayRate, m_MinimumCategoryCount, category)); + result.push_back(CNaturalBreaksClassifier( + m_Space, m_DecayRate, m_MinimumCategoryCount, category)); } return true; } else if (n == 1) { @@ -209,7 +222,8 @@ bool CNaturalBreaksClassifier::split(std::size_t n, std::size_t p, TClassifierVe for (/**/; j < split[i]; ++j) { categories.push_back(m_Categories[j]); } - result.push_back(CNaturalBreaksClassifier(m_Space, m_DecayRate, m_MinimumCategoryCount, categories)); + result.push_back(CNaturalBreaksClassifier( + m_Space, m_DecayRate, m_MinimumCategoryCount, categories)); } return true; @@ -221,7 +235,8 @@ bool CNaturalBreaksClassifier::split(const TSizeVec& split, TClassifierVec& resu this->reduce(); // Sanity checks. - if (split.empty() || split[split.size() - 1] != m_Categories.size() || !boost::algorithm::is_sorted(split.begin(), split.end())) { + if (split.empty() || split[split.size() - 1] != m_Categories.size() || + !boost::algorithm::is_sorted(split.begin(), split.end())) { LOG_ERROR(<< "Bad split = " << core::CContainerPrinter::print(split)); return false; } @@ -234,7 +249,8 @@ bool CNaturalBreaksClassifier::split(const TSizeVec& split, TClassifierVec& resu for (/**/; j < split[i]; ++j) { categories.push_back(m_Categories[j]); } - result.push_back(CNaturalBreaksClassifier(m_Space, m_DecayRate, m_MinimumCategoryCount, categories)); + result.push_back(CNaturalBreaksClassifier( + m_Space, m_DecayRate, m_MinimumCategoryCount, categories)); } return true; @@ -274,7 +290,8 @@ bool CNaturalBreaksClassifier::categories(std::size_t n, std::size_t p, TTupleVe return true; } else if (n == 1) { double p_ = static_cast(p); - TTuple category = std::accumulate(m_Categories.begin(), m_Categories.end(), TTuple()); + TTuple category = + std::accumulate(m_Categories.begin(), m_Categories.end(), TTuple()); if (CBasicStatistics::count(category) < p_) { return false; } @@ -303,7 +320,8 @@ bool CNaturalBreaksClassifier::categories(const TSizeVec& split, TTupleVec& resu result.clear(); // Sanity checks. - if (split.empty() || split[split.size() - 1] != m_Categories.size() || !boost::algorithm::is_sorted(split.begin(), split.end())) { + if (split.empty() || split[split.size() - 1] != m_Categories.size() || + !boost::algorithm::is_sorted(split.begin(), split.end())) { LOG_ERROR(<< "Bad split = " << core::CContainerPrinter::print(split)); return false; } @@ -337,9 +355,11 @@ void CNaturalBreaksClassifier::merge(const CNaturalBreaksClassifier& other) { for (std::size_t i = 0u; i < other.m_PointsBuffer.size(); ++i) { m_Categories.push_back(TTuple()); - m_Categories.back().add(other.m_PointsBuffer[i].first, other.m_PointsBuffer[i].second); + m_Categories.back().add(other.m_PointsBuffer[i].first, + other.m_PointsBuffer[i].second); } - m_Categories.insert(m_Categories.end(), other.m_Categories.begin(), other.m_Categories.end()); + m_Categories.insert(m_Categories.end(), other.m_Categories.begin(), + other.m_Categories.end()); this->reduce(); @@ -368,7 +388,8 @@ void CNaturalBreaksClassifier::propagateForwardsByTime(double time) { // Prune any dead categories: we're not interested in maintaining // categories with low counts. - m_Categories.erase(std::remove_if(m_Categories.begin(), m_Categories.end(), CCountLessThan(m_MinimumCategoryCount)), + m_Categories.erase(std::remove_if(m_Categories.begin(), m_Categories.end(), + CCountLessThan(m_MinimumCategoryCount)), m_Categories.end()); LOG_TRACE(<< "categories = " << core::CContainerPrinter::print(m_Categories)); @@ -378,7 +399,10 @@ bool CNaturalBreaksClassifier::buffering() const { return m_PointsBuffer.size() > 0; } -void CNaturalBreaksClassifier::sample(std::size_t numberSamples, double smallest, double /*largest*/, TDoubleVec& result) const { +void CNaturalBreaksClassifier::sample(std::size_t numberSamples, + double smallest, + double /*largest*/, + TDoubleVec& result) const { result.clear(); if (numberSamples == 0) { return; @@ -406,7 +430,8 @@ void CNaturalBreaksClassifier::sample(std::size_t numberSamples, double smallest } numberSamples = std::min(numberSamples, static_cast(weightSum)); - LOG_TRACE(<< "weights = " << core::CContainerPrinter::print(weights) << ", weightSum = " << weightSum << ", n = " << numberSamples); + LOG_TRACE(<< "weights = " << core::CContainerPrinter::print(weights) + << ", weightSum = " << weightSum << ", n = " << numberSamples); result.reserve(numberSamples); @@ -444,7 +469,9 @@ void CNaturalBreaksClassifier::sample(std::size_t numberSamples, double smallest sample.add(categorySamples[j], nij); if (CBasicStatistics::count(sample) > ALMOST_ONE) { result.push_back(CBasicStatistics::mean(sample)); - sample = nij < ni ? CBasicStatistics::accumulator(ni - nij, categorySamples[j]) : TMeanAccumulator(); + sample = nij < ni ? CBasicStatistics::accumulator( + ni - nij, categorySamples[j]) + : TMeanAccumulator(); } } } @@ -570,7 +597,9 @@ bool CNaturalBreaksClassifier::naturalBreaksImpl(const std::vector& categ TTuple t; for (std::size_t j = i; j >= m; --j) { t += categories[j]; - double c = (D[j - 1][m - 1] == INF || CBasicStatistics::count(t) < pp) ? INF : D[j - 1][m - 1] + objective(target, t); + double c = (D[j - 1][m - 1] == INF || CBasicStatistics::count(t) < pp) + ? INF + : D[j - 1][m - 1] + objective(target, t); if (c <= d) { b = j; d = c; @@ -607,8 +636,12 @@ bool CNaturalBreaksClassifier::naturalBreaksImpl(const std::vector& categ return true; } -CNaturalBreaksClassifier::CNaturalBreaksClassifier(std::size_t space, double decayRate, double minimumCategoryCount, TTupleVec& categories) - : m_Space(space), m_DecayRate(decayRate), m_MinimumCategoryCount(minimumCategoryCount) { +CNaturalBreaksClassifier::CNaturalBreaksClassifier(std::size_t space, + double decayRate, + double minimumCategoryCount, + TTupleVec& categories) + : m_Space(space), m_DecayRate(decayRate), + m_MinimumCategoryCount(minimumCategoryCount) { m_Categories.swap(categories); m_Categories.reserve(m_Space + MAXIMUM_BUFFER_SIZE + 1u); m_PointsBuffer.reserve(MAXIMUM_BUFFER_SIZE); @@ -651,10 +684,13 @@ CNaturalBreaksClassifier::TSizeSizePr CNaturalBreaksClassifier::closestPair() co double dDeviationMin = boost::numeric::bounds::highest(); for (std::size_t i = 1u; i < m_Categories.size(); ++i) { - double dDeviation = deviation(m_Categories[i] + m_Categories[i - 1]) - deviation(m_Categories[i]) - deviation(m_Categories[i - 1]); + double dDeviation = deviation(m_Categories[i] + m_Categories[i - 1]) - + deviation(m_Categories[i]) - deviation(m_Categories[i - 1]); - LOG_TRACE(<< "mean[" << i - 1 << "] = " << CBasicStatistics::mean(m_Categories[i - 1]) << ", mean[" << i - << "] = " << CBasicStatistics::mean(m_Categories[i]) << ", dDeviation = " << dDeviation); + LOG_TRACE(<< "mean[" << i - 1 + << "] = " << CBasicStatistics::mean(m_Categories[i - 1]) << ", mean[" + << i << "] = " << CBasicStatistics::mean(m_Categories[i]) + << ", dDeviation = " << dDeviation); if (dDeviation < dDeviationMin) { result = TSizeSizePr(i - 1, i); diff --git a/lib/maths/CNormalMeanPrecConjugate.cc b/lib/maths/CNormalMeanPrecConjugate.cc index 1de887ca74..7f12a79650 100644 --- a/lib/maths/CNormalMeanPrecConjugate.cc +++ b/lib/maths/CNormalMeanPrecConjugate.cc @@ -57,7 +57,9 @@ using TDoubleDoublePrVec = std::vector; //! Adds "weight" x "right operand" to the "left operand". struct SPlusWeight { - double operator()(double lhs, double rhs, double weight = 1.0) const { return lhs + weight * rhs; } + double operator()(double lhs, double rhs, double weight = 1.0) const { + return lhs + weight * rhs; + } }; //! Evaluate \p func on the joint predictive distribution for \p samples @@ -137,16 +139,21 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, for (std::size_t i = 0u; i < samples.size(); ++i) { double n = maths_t::count(weightStyles, weights[i]); - double seasonalScale = std::sqrt(maths_t::seasonalVarianceScale(weightStyles, weights[i])); - double countVarianceScale = maths_t::countVarianceScale(weightStyles, weights[i]); + double seasonalScale = std::sqrt( + maths_t::seasonalVarianceScale(weightStyles, weights[i])); + double countVarianceScale = + maths_t::countVarianceScale(weightStyles, weights[i]); - double x = seasonalScale != 1.0 ? predictionMean + (samples[i] - predictionMean) / seasonalScale : samples[i]; + double x = seasonalScale != 1.0 + ? predictionMean + (samples[i] - predictionMean) / seasonalScale + : samples[i]; // Get the effective precision and rate of the sample. double scaledPrecision = countVarianceScale * precision; double scaledRate = countVarianceScale * rate; - double deviation = std::sqrt((scaledPrecision + 1.0) / scaledPrecision * scaledRate / shape); + double deviation = std::sqrt((scaledPrecision + 1.0) / + scaledPrecision * scaledRate / shape); boost::math::normal_distribution<> normal(mean, deviation); result = aggregate(result, func(normal, x + offset), n); } @@ -162,16 +169,21 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, for (std::size_t i = 0u; i < samples.size(); ++i) { double n = maths_t::count(weightStyles, weights[i]); - double seasonalScale = std::sqrt(maths_t::seasonalVarianceScale(weightStyles, weights[i])); - double countVarianceScale = maths_t::countVarianceScale(weightStyles, weights[i]); + double seasonalScale = std::sqrt( + maths_t::seasonalVarianceScale(weightStyles, weights[i])); + double countVarianceScale = + maths_t::countVarianceScale(weightStyles, weights[i]); - double x = seasonalScale != 1.0 ? predictionMean + (samples[i] - predictionMean) / seasonalScale : samples[i]; + double x = seasonalScale != 1.0 + ? predictionMean + (samples[i] - predictionMean) / seasonalScale + : samples[i]; // Get the effective precision and rate of the sample. double scaledPrecision = countVarianceScale * precision; double scaledRate = countVarianceScale * rate; - double scale = std::sqrt((scaledPrecision + 1.0) / scaledPrecision * scaledRate / shape); + double scale = std::sqrt((scaledPrecision + 1.0) / + scaledPrecision * scaledRate / shape); double sample = (x + offset - mean) / scale; result = aggregate(result, func(students, sample), n); } @@ -204,30 +216,14 @@ class CEvaluateOnSamples : core::CNonCopyable { double shape, double rate, double predictionMean) - : m_WeightStyles(weightStyles), - m_Samples(samples), - m_Weights(weights), - m_IsNonInformative(isNonInformative), - m_Mean(mean), - m_Precision(precision), - m_Shape(shape), - m_Rate(rate), - m_PredictionMean(predictionMean) {} + : m_WeightStyles(weightStyles), m_Samples(samples), m_Weights(weights), + m_IsNonInformative(isNonInformative), m_Mean(mean), m_Precision(precision), + m_Shape(shape), m_Rate(rate), m_PredictionMean(predictionMean) {} bool operator()(double x, double& result) const { - return evaluateFunctionOnJointDistribution(m_WeightStyles, - m_Samples, - m_Weights, - F(), - SPlusWeight(), - m_IsNonInformative, - x, - m_Shape, - m_Rate, - m_Mean, - m_Precision, - m_PredictionMean, - result); + return evaluateFunctionOnJointDistribution( + m_WeightStyles, m_Samples, m_Weights, F(), SPlusWeight(), m_IsNonInformative, + x, m_Shape, m_Rate, m_Mean, m_Precision, m_PredictionMean, result); } private: @@ -259,36 +255,21 @@ class CProbabilityOfLessLikelySamples : core::CNonCopyable { double shape, double rate, double predictionMean) - : m_Calculation(calculation), - m_WeightStyles(weightStyles), - m_Samples(samples), - m_Weights(weights), - m_IsNonInformative(isNonInformative), - m_Mean(mean), - m_Precision(precision), - m_Shape(shape), - m_Rate(rate), - m_PredictionMean(predictionMean), - m_Tail(0) {} + : m_Calculation(calculation), m_WeightStyles(weightStyles), + m_Samples(samples), m_Weights(weights), m_IsNonInformative(isNonInformative), + m_Mean(mean), m_Precision(precision), m_Shape(shape), m_Rate(rate), + m_PredictionMean(predictionMean), m_Tail(0) {} bool operator()(double x, double& result) const { CJointProbabilityOfLessLikelySamples probability; maths_t::ETail tail = maths_t::E_UndeterminedTail; if (!evaluateFunctionOnJointDistribution( - m_WeightStyles, - m_Samples, - m_Weights, - boost::bind(CTools::CProbabilityOfLessLikelySample(m_Calculation), _1, _2, boost::ref(tail)), - CJointProbabilityOfLessLikelySamples::SAddProbability(), - m_IsNonInformative, - x, - m_Shape, - m_Rate, - m_Mean, - m_Precision, - m_PredictionMean, - probability) || + m_WeightStyles, m_Samples, m_Weights, + boost::bind(CTools::CProbabilityOfLessLikelySample(m_Calculation), + _1, _2, boost::ref(tail)), + CJointProbabilityOfLessLikelySamples::SAddProbability(), m_IsNonInformative, + x, m_Shape, m_Rate, m_Mean, m_Precision, m_PredictionMean, probability) || !probability.calculate(result)) { LOG_ERROR(<< "Failed to compute probability of less likely samples"); return false; @@ -343,15 +324,9 @@ class CLogMarginalLikelihood : core::CNonCopyable { double shape, double rate, double predictionMean) - : m_Mean(mean), - m_Precision(precision), - m_Shape(shape), - m_Rate(rate), - m_NumberSamples(0.0), - m_WeightedNumberSamples(0.0), - m_SampleMean(0.0), - m_SampleSquareDeviation(0.0), - m_Constant(0.0), + : m_Mean(mean), m_Precision(precision), m_Shape(shape), m_Rate(rate), + m_NumberSamples(0.0), m_WeightedNumberSamples(0.0), m_SampleMean(0.0), + m_SampleSquareDeviation(0.0), m_Constant(0.0), m_ErrorStatus(maths_t::E_FpNoErrors) { this->precompute(weightStyles, samples, weights, predictionMean); } @@ -364,23 +339,30 @@ class CLogMarginalLikelihood : core::CNonCopyable { double sampleMean = m_SampleMean + x; double impliedShape = m_Shape + 0.5 * m_NumberSamples; - double impliedRate = m_Rate + 0.5 * (m_SampleSquareDeviation + m_Precision * m_WeightedNumberSamples * (sampleMean - m_Mean) * - (sampleMean - m_Mean) / (m_Precision + m_WeightedNumberSamples)); + double impliedRate = + m_Rate + 0.5 * (m_SampleSquareDeviation + + m_Precision * m_WeightedNumberSamples * + (sampleMean - m_Mean) * (sampleMean - m_Mean) / + (m_Precision + m_WeightedNumberSamples)); result = m_Constant - impliedShape * std::log(impliedRate); return true; } //! Retrieve the error status for the integration. - maths_t::EFloatingPointErrorStatus errorStatus() const { return m_ErrorStatus; } + maths_t::EFloatingPointErrorStatus errorStatus() const { + return m_ErrorStatus; + } private: static const double LOG_2_PI; private: //! Compute all the constants in the integrand. - void - precompute(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights, double predictionMean) { + void precompute(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double predictionMean) { m_NumberSamples = 0.0; TMeanVarAccumulator sampleMoments; double logVarianceScaleSum = 0.0; @@ -388,12 +370,15 @@ class CLogMarginalLikelihood : core::CNonCopyable { try { for (std::size_t i = 0u; i < samples.size(); ++i) { double n = maths_t::countForUpdate(weightStyles, weights[i]); - double seasonalScale = std::sqrt(maths_t::seasonalVarianceScale(weightStyles, weights[i])); - double countVarianceScale = maths_t::countVarianceScale(weightStyles, weights[i]); + double seasonalScale = std::sqrt( + maths_t::seasonalVarianceScale(weightStyles, weights[i])); + double countVarianceScale = + maths_t::countVarianceScale(weightStyles, weights[i]); double w = 1.0 / countVarianceScale; m_NumberSamples += n; if (seasonalScale != 1.0) { - sampleMoments.add(predictionMean + (samples[i] - predictionMean) / seasonalScale, n * w); + sampleMoments.add(predictionMean + (samples[i] - predictionMean) / seasonalScale, + n * w); logVarianceScaleSum += 2.0 * std::log(seasonalScale); } else { sampleMoments.add(samples[i], n * w); @@ -404,14 +389,16 @@ class CLogMarginalLikelihood : core::CNonCopyable { } m_WeightedNumberSamples = CBasicStatistics::count(sampleMoments); m_SampleMean = CBasicStatistics::mean(sampleMoments); - m_SampleSquareDeviation = (m_WeightedNumberSamples - 1.0) * CBasicStatistics::variance(sampleMoments); + m_SampleSquareDeviation = (m_WeightedNumberSamples - 1.0) * + CBasicStatistics::variance(sampleMoments); double impliedShape = m_Shape + 0.5 * m_NumberSamples; double impliedPrecision = m_Precision + m_WeightedNumberSamples; - m_Constant = 0.5 * (std::log(m_Precision) - std::log(impliedPrecision)) - 0.5 * m_NumberSamples * LOG_2_PI - - 0.5 * logVarianceScaleSum + boost::math::lgamma(impliedShape) - boost::math::lgamma(m_Shape) + - m_Shape * std::log(m_Rate); + m_Constant = 0.5 * (std::log(m_Precision) - std::log(impliedPrecision)) - + 0.5 * m_NumberSamples * LOG_2_PI - 0.5 * logVarianceScaleSum + + boost::math::lgamma(impliedShape) - + boost::math::lgamma(m_Shape) + m_Shape * std::log(m_Rate); } catch (const std::exception& e) { LOG_ERROR(<< "Error calculating marginal likelihood: " << e.what()); this->addErrorStatus(maths_t::E_FpFailed); @@ -436,7 +423,8 @@ class CLogMarginalLikelihood : core::CNonCopyable { mutable maths_t::EFloatingPointErrorStatus m_ErrorStatus; }; -const double CLogMarginalLikelihood::LOG_2_PI = std::log(boost::math::double_constants::two_pi); +const double CLogMarginalLikelihood::LOG_2_PI = + std::log(boost::math::double_constants::two_pi); } // detail:: @@ -458,35 +446,39 @@ CNormalMeanPrecConjugate::CNormalMeanPrecConjugate(maths_t::EDataType dataType, double gammaShape, double gammaRate, double decayRate /*= 0.0*/) - : CPrior(dataType, decayRate), - m_GaussianMean(gaussianMean), - m_GaussianPrecision(gaussianPrecision), - m_GammaShape(gammaShape), + : CPrior(dataType, decayRate), m_GaussianMean(gaussianMean), + m_GaussianPrecision(gaussianPrecision), m_GammaShape(gammaShape), m_GammaRate(gammaRate) { this->numberSamples(gaussianPrecision); } -CNormalMeanPrecConjugate::CNormalMeanPrecConjugate(maths_t::EDataType dataType, const TMeanVarAccumulator& moments, double decayRate) - : CPrior(dataType, decayRate), m_GaussianMean(0.0), m_GaussianPrecision(0.0), m_GammaShape(0.0), m_GammaRate(0.0) { +CNormalMeanPrecConjugate::CNormalMeanPrecConjugate(maths_t::EDataType dataType, + const TMeanVarAccumulator& moments, + double decayRate) + : CPrior(dataType, decayRate), m_GaussianMean(0.0), + m_GaussianPrecision(0.0), m_GammaShape(0.0), m_GammaRate(0.0) { this->reset(dataType, moments, decayRate); } -CNormalMeanPrecConjugate::CNormalMeanPrecConjugate(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) - : CPrior(params.s_DataType, params.s_DecayRate), m_GaussianMean(0.0), m_GaussianPrecision(0.0), m_GammaShape(0.0), m_GammaRate(0.0) { - traverser.traverseSubLevel(boost::bind(&CNormalMeanPrecConjugate::acceptRestoreTraverser, this, _1)); +CNormalMeanPrecConjugate::CNormalMeanPrecConjugate(const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser) + : CPrior(params.s_DataType, params.s_DecayRate), m_GaussianMean(0.0), + m_GaussianPrecision(0.0), m_GammaShape(0.0), m_GammaRate(0.0) { + traverser.traverseSubLevel( + boost::bind(&CNormalMeanPrecConjugate::acceptRestoreTraverser, this, _1)); } bool CNormalMeanPrecConjugate::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { do { const std::string& name = traverser.name(); - RESTORE_SETUP_TEARDOWN( - DECAY_RATE_TAG, double decayRate, core::CStringUtils::stringToType(traverser.value(), decayRate), this->decayRate(decayRate)) + RESTORE_SETUP_TEARDOWN(DECAY_RATE_TAG, double decayRate, + core::CStringUtils::stringToType(traverser.value(), decayRate), + this->decayRate(decayRate)) RESTORE_BUILT_IN(GAUSSIAN_MEAN_TAG, m_GaussianMean) RESTORE_BUILT_IN(GAUSSIAN_PRECISION_TAG, m_GaussianPrecision) RESTORE_BUILT_IN(GAMMA_SHAPE_TAG, m_GammaShape) RESTORE_BUILT_IN(GAMMA_RATE_TAG, m_GammaRate) - RESTORE_SETUP_TEARDOWN(NUMBER_SAMPLES_TAG, - double numberSamples, + RESTORE_SETUP_TEARDOWN(NUMBER_SAMPLES_TAG, double numberSamples, core::CStringUtils::stringToType(traverser.value(), numberSamples), this->numberSamples(numberSamples)) } while (traverser.next()); @@ -494,7 +486,9 @@ bool CNormalMeanPrecConjugate::acceptRestoreTraverser(core::CStateRestoreTravers return true; } -void CNormalMeanPrecConjugate::reset(maths_t::EDataType dataType, const TMeanVarAccumulator& moments, double decayRate) { +void CNormalMeanPrecConjugate::reset(maths_t::EDataType dataType, + const TMeanVarAccumulator& moments, + double decayRate) { this->dataType(dataType); this->decayRate(decayRate); @@ -505,7 +499,8 @@ void CNormalMeanPrecConjugate::reset(maths_t::EDataType dataType, const TMeanVar m_GaussianMean = NON_INFORMATIVE_MEAN + mean + (this->isInteger() ? 0.5 : 0.0); m_GaussianPrecision = NON_INFORMATIVE_PRECISION + n; m_GammaShape = NON_INFORMATIVE_SHAPE + n / 2.0; - m_GammaRate = NON_INFORMATIVE_RATE + n / 2.0 * (variance + (this->isInteger() ? 1.0 / 12.0 : 0.0)); + m_GammaRate = NON_INFORMATIVE_RATE + + n / 2.0 * (variance + (this->isInteger() ? 1.0 / 12.0 : 0.0)); // If the coefficient of variation of the data is too small we run // in to numerical problems. We truncate the variation by modeling @@ -527,9 +522,12 @@ bool CNormalMeanPrecConjugate::needsOffset() const { return false; } -CNormalMeanPrecConjugate CNormalMeanPrecConjugate::nonInformativePrior(maths_t::EDataType dataType, double decayRate /*= 0.0*/) { - return CNormalMeanPrecConjugate( - dataType, NON_INFORMATIVE_MEAN, NON_INFORMATIVE_PRECISION, NON_INFORMATIVE_SHAPE, NON_INFORMATIVE_RATE, decayRate); +CNormalMeanPrecConjugate +CNormalMeanPrecConjugate::nonInformativePrior(maths_t::EDataType dataType, + double decayRate /*= 0.0*/) { + return CNormalMeanPrecConjugate(dataType, NON_INFORMATIVE_MEAN, + NON_INFORMATIVE_PRECISION, NON_INFORMATIVE_SHAPE, + NON_INFORMATIVE_RATE, decayRate); } CNormalMeanPrecConjugate::EPrior CNormalMeanPrecConjugate::type() const { @@ -554,13 +552,16 @@ double CNormalMeanPrecConjugate::offset() const { return 0.0; } -void CNormalMeanPrecConjugate::addSamples(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights) { +void CNormalMeanPrecConjugate::addSamples(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights) { if (samples.empty()) { return; } if (samples.size() != weights.size()) { - LOG_ERROR(<< "Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '" + LOG_ERROR(<< "Mismatch in samples '" + << core::CContainerPrinter::print(samples) << "' and weights '" << core::CContainerPrinter::print(weights) << "'"); return; } @@ -626,7 +627,8 @@ void CNormalMeanPrecConjugate::addSamples(const TWeightStyleVec& weightStyles, c for (std::size_t i = 0u; i < samples.size(); ++i) { double n = maths_t::countForUpdate(weightStyles, weights[i]); double varianceScale = - maths_t::seasonalVarianceScale(weightStyles, weights[i]) * maths_t::countVarianceScale(weightStyles, weights[i]); + maths_t::seasonalVarianceScale(weightStyles, weights[i]) * + maths_t::countVarianceScale(weightStyles, weights[i]); numberSamples += n; sampleMoments.add(samples[i], n / varianceScale); } @@ -636,7 +638,8 @@ void CNormalMeanPrecConjugate::addSamples(const TWeightStyleVec& weightStyles, c } double scaledNumberSamples = CBasicStatistics::count(sampleMoments); double sampleMean = CBasicStatistics::mean(sampleMoments); - double sampleSquareDeviation = (scaledNumberSamples - 1.0) * CBasicStatistics::variance(sampleMoments); + double sampleSquareDeviation = (scaledNumberSamples - 1.0) * + CBasicStatistics::variance(sampleMoments); if (this->isInteger()) { sampleMean += 0.5; @@ -644,11 +647,13 @@ void CNormalMeanPrecConjugate::addSamples(const TWeightStyleVec& weightStyles, c } m_GammaShape += 0.5 * numberSamples; - m_GammaRate += 0.5 * (sampleSquareDeviation + m_GaussianPrecision * scaledNumberSamples * (sampleMean - m_GaussianMean) * - (sampleMean - m_GaussianMean) / (m_GaussianPrecision + scaledNumberSamples)); + m_GammaRate += 0.5 * (sampleSquareDeviation + + m_GaussianPrecision * scaledNumberSamples * + (sampleMean - m_GaussianMean) * (sampleMean - m_GaussianMean) / + (m_GaussianPrecision + scaledNumberSamples)); - m_GaussianMean = - (m_GaussianPrecision * m_GaussianMean + scaledNumberSamples * sampleMean) / (m_GaussianPrecision + scaledNumberSamples); + m_GaussianMean = (m_GaussianPrecision * m_GaussianMean + scaledNumberSamples * sampleMean) / + (m_GaussianPrecision + scaledNumberSamples); m_GaussianPrecision += scaledNumberSamples; // If the coefficient of variation of the data is too small we run @@ -665,8 +670,9 @@ void CNormalMeanPrecConjugate::addSamples(const TWeightStyleVec& weightStyles, c } LOG_TRACE(<< "sampleMean = " << sampleMean << ", sampleSquareDeviation = " << sampleSquareDeviation - << ", numberSamples = " << numberSamples << ", scaledNumberSamples = " << scaledNumberSamples - << ", m_GammaShape = " << m_GammaShape << ", m_GammaRate = " << m_GammaRate << ", m_GaussianMean = " << m_GaussianMean + << ", numberSamples = " << numberSamples << ", scaledNumberSamples = " + << scaledNumberSamples << ", m_GammaShape = " << m_GammaShape + << ", m_GammaRate = " << m_GammaRate << ", m_GaussianMean = " << m_GaussianMean << ", m_GaussianPrecision = " << m_GaussianPrecision); if (this->isBad()) { @@ -703,30 +709,37 @@ void CNormalMeanPrecConjugate::propagateForwardsByTime(double time) { // // Thus the mean is unchanged and variance is increased by 1 / f. - double factor = std::min((alpha * m_GammaShape + beta * NON_INFORMATIVE_SHAPE) / m_GammaShape, 1.0); + double factor = std::min( + (alpha * m_GammaShape + beta * NON_INFORMATIVE_SHAPE) / m_GammaShape, 1.0); m_GammaShape *= factor; m_GammaRate *= factor; this->numberSamples(this->numberSamples() * alpha); - LOG_TRACE(<< "time = " << time << ", alpha = " << alpha << ", m_GaussianPrecision = " << m_GaussianPrecision - << ", m_GammaShape = " << m_GammaShape << ", m_GammaRate = " << m_GammaRate << ", numberSamples = " << this->numberSamples()); + LOG_TRACE(<< "time = " << time << ", alpha = " << alpha + << ", m_GaussianPrecision = " << m_GaussianPrecision + << ", m_GammaShape = " << m_GammaShape << ", m_GammaRate = " << m_GammaRate + << ", numberSamples = " << this->numberSamples()); } -CNormalMeanPrecConjugate::TDoubleDoublePr CNormalMeanPrecConjugate::marginalLikelihoodSupport() const { - return std::make_pair(boost::numeric::bounds::lowest(), boost::numeric::bounds::highest()); +CNormalMeanPrecConjugate::TDoubleDoublePr +CNormalMeanPrecConjugate::marginalLikelihoodSupport() const { + return std::make_pair(boost::numeric::bounds::lowest(), + boost::numeric::bounds::highest()); } double CNormalMeanPrecConjugate::marginalLikelihoodMean() const { return this->isInteger() ? this->mean() - 0.5 : this->mean(); } -double CNormalMeanPrecConjugate::marginalLikelihoodMode(const TWeightStyleVec& /*weightStyles*/, const TDouble4Vec& /*weights*/) const { +double CNormalMeanPrecConjugate::marginalLikelihoodMode(const TWeightStyleVec& /*weightStyles*/, + const TDouble4Vec& /*weights*/) const { return this->marginalLikelihoodMean(); } -double CNormalMeanPrecConjugate::marginalLikelihoodVariance(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights) const { +double CNormalMeanPrecConjugate::marginalLikelihoodVariance(const TWeightStyleVec& weightStyles, + const TDouble4Vec& weights) const { if (this->isNonInformative() || m_GammaShape <= 1.0) { return boost::numeric::bounds::highest(); } @@ -742,8 +755,11 @@ double CNormalMeanPrecConjugate::marginalLikelihoodVariance(const TWeightStyleVe double varianceScale = 1.0; try { - varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) * maths_t::countVarianceScale(weightStyles, weights); - } catch (const std::exception& e) { LOG_ERROR(<< "Failed to get variance scale: " << e.what()); } + varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) * + maths_t::countVarianceScale(weightStyles, weights); + } catch (const std::exception& e) { + LOG_ERROR(<< "Failed to get variance scale: " << e.what()); + } double a = m_GammaShape; double b = m_GammaRate; double t = m_GaussianPrecision; @@ -764,41 +780,55 @@ CNormalMeanPrecConjugate::marginalLikelihoodConfidenceInterval(double percentage // We use the fact that the marginal likelihood is a t-distribution. try { - double seasonalScale = std::sqrt(maths_t::seasonalVarianceScale(weightStyles, weights)); + double seasonalScale = + std::sqrt(maths_t::seasonalVarianceScale(weightStyles, weights)); double countVarianceScale = maths_t::countVarianceScale(weightStyles, weights); double scaledPrecision = countVarianceScale * m_GaussianPrecision; double scaledRate = countVarianceScale * m_GammaRate; - double scale = std::sqrt((scaledPrecision + 1.0) / scaledPrecision * scaledRate / m_GammaShape); + double scale = std::sqrt((scaledPrecision + 1.0) / scaledPrecision * + scaledRate / m_GammaShape); double m = this->marginalLikelihoodMean(); if (m_GammaShape > MINIMUM_GAUSSIAN_SHAPE) { boost::math::normal_distribution<> normal(m_GaussianMean, scale); - double x1 = boost::math::quantile(normal, (1.0 - percentage) / 2.0) - (this->isInteger() ? 0.5 : 0.0); + double x1 = boost::math::quantile(normal, (1.0 - percentage) / 2.0) - + (this->isInteger() ? 0.5 : 0.0); x1 = seasonalScale != 1.0 ? m + seasonalScale * (x1 - m) : x1; - double x2 = percentage > 0.0 ? boost::math::quantile(normal, (1.0 + percentage) / 2.0) - (this->isInteger() ? 0.5 : 0.0) : x1; + double x2 = percentage > 0.0 + ? boost::math::quantile(normal, (1.0 + percentage) / 2.0) - + (this->isInteger() ? 0.5 : 0.0) + : x1; x2 = seasonalScale != 1.0 ? m + seasonalScale * (x2 - m) : x2; LOG_TRACE(<< "x1 = " << x1 << ", x2 = " << x2 << ", scale = " << scale); return std::make_pair(x1, x2); } boost::math::students_t_distribution<> students(2.0 * m_GammaShape); - double x1 = m_GaussianMean + scale * boost::math::quantile(students, (1.0 - percentage) / 2.0) - (this->isInteger() ? 0.5 : 0.0); + double x1 = m_GaussianMean + + scale * boost::math::quantile(students, (1.0 - percentage) / 2.0) - + (this->isInteger() ? 0.5 : 0.0); x1 = seasonalScale != 1.0 ? m + seasonalScale * (x1 - m) : x1; - double x2 = percentage > 0.0 ? m_GaussianMean + scale * boost::math::quantile(students, (1.0 + percentage) / 2.0) - - (this->isInteger() ? 0.5 : 0.0) - : x1; + double x2 = percentage > 0.0 + ? m_GaussianMean + + scale * boost::math::quantile( + students, (1.0 + percentage) / 2.0) - + (this->isInteger() ? 0.5 : 0.0) + : x1; x2 = seasonalScale != 1.0 ? m + seasonalScale * (x2 - m) : x2; LOG_TRACE(<< "x1 = " << x1 << ", x2 = " << x2 << ", scale = " << scale); return std::make_pair(x1, x2); - } catch (const std::exception& e) { LOG_ERROR(<< "Failed to compute confidence interval: " << e.what()); } + } catch (const std::exception& e) { + LOG_ERROR(<< "Failed to compute confidence interval: " << e.what()); + } return this->marginalLikelihoodSupport(); } -maths_t::EFloatingPointErrorStatus CNormalMeanPrecConjugate::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, - double& result) const { +maths_t::EFloatingPointErrorStatus +CNormalMeanPrecConjugate::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& result) const { result = 0.0; if (samples.empty()) { @@ -807,7 +837,8 @@ maths_t::EFloatingPointErrorStatus CNormalMeanPrecConjugate::jointLogMarginalLik } if (samples.size() != weights.size()) { - LOG_ERROR(<< "Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '" + LOG_ERROR(<< "Mismatch in samples '" + << core::CContainerPrinter::print(samples) << "' and weights '" << core::CContainerPrinter::print(weights) << "'"); return maths_t::E_FpFailed; } @@ -826,15 +857,17 @@ maths_t::EFloatingPointErrorStatus CNormalMeanPrecConjugate::jointLogMarginalLik } detail::CLogMarginalLikelihood logMarginalLikelihood( - weightStyles, samples, weights, m_GaussianMean, m_GaussianPrecision, m_GammaShape, m_GammaRate, this->marginalLikelihoodMean()); + weightStyles, samples, weights, m_GaussianMean, m_GaussianPrecision, + m_GammaShape, m_GammaRate, this->marginalLikelihoodMean()); if (this->isInteger()) { - CIntegration::logGaussLegendre(logMarginalLikelihood, 0.0, 1.0, result); + CIntegration::logGaussLegendre( + logMarginalLikelihood, 0.0, 1.0, result); } else { logMarginalLikelihood(0.0, result); } - maths_t::EFloatingPointErrorStatus status = - static_cast(logMarginalLikelihood.errorStatus() | CMathsFuncs::fpStatus(result)); + maths_t::EFloatingPointErrorStatus status = static_cast( + logMarginalLikelihood.errorStatus() | CMathsFuncs::fpStatus(result)); if (status & maths_t::E_FpFailed) { LOG_ERROR(<< "Failed to compute log likelihood (" << this->debug() << ")"); LOG_ERROR(<< "samples = " << core::CContainerPrinter::print(samples)); @@ -847,7 +880,8 @@ maths_t::EFloatingPointErrorStatus CNormalMeanPrecConjugate::jointLogMarginalLik return status; } -void CNormalMeanPrecConjugate::sampleMarginalLikelihood(std::size_t numberSamples, TDouble1Vec& samples) const { +void CNormalMeanPrecConjugate::sampleMarginalLikelihood(std::size_t numberSamples, + TDouble1Vec& samples) const { samples.clear(); if (numberSamples == 0 || this->numberSamples() == 0.0) { @@ -912,9 +946,11 @@ void CNormalMeanPrecConjugate::sampleMarginalLikelihood(std::size_t numberSample double lastPartialExpectation = 0.0; if (m_GammaShape > MINIMUM_GAUSSIAN_SHAPE) { - double variance = (m_GaussianPrecision + 1.0) / m_GaussianPrecision * m_GammaRate / m_GammaShape; + double variance = (m_GaussianPrecision + 1.0) / m_GaussianPrecision * + m_GammaRate / m_GammaShape; - LOG_TRACE(<< "mean = " << m_GaussianMean << ", variance = " << variance << ", numberSamples = " << numberSamples); + LOG_TRACE(<< "mean = " << m_GaussianMean << ", variance = " << variance + << ", numberSamples = " << numberSamples); try { boost::math::normal_distribution<> normal(m_GaussianMean, std::sqrt(variance)); @@ -923,9 +959,11 @@ void CNormalMeanPrecConjugate::sampleMarginalLikelihood(std::size_t numberSample double q = static_cast(i) / static_cast(numberSamples); double xq = boost::math::quantile(normal, q); - double partialExpectation = m_GaussianMean * q - variance * CTools::safePdf(normal, xq); + double partialExpectation = m_GaussianMean * q - + variance * CTools::safePdf(normal, xq); - double sample = static_cast(numberSamples) * (partialExpectation - lastPartialExpectation); + double sample = static_cast(numberSamples) * + (partialExpectation - lastPartialExpectation); LOG_TRACE(<< "sample = " << sample); @@ -933,14 +971,16 @@ void CNormalMeanPrecConjugate::sampleMarginalLikelihood(std::size_t numberSample if (sample >= support.first && sample <= support.second) { samples.push_back(sample); } else { - LOG_ERROR(<< "Sample out of bounds: sample = " << sample << ", gaussianMean = " << m_GaussianMean - << ", variance = " << variance << ", q = " << q << ", x(q) = " << xq); + LOG_ERROR(<< "Sample out of bounds: sample = " << sample + << ", gaussianMean = " << m_GaussianMean << ", variance = " + << variance << ", q = " << q << ", x(q) = " << xq); } lastPartialExpectation = partialExpectation; } } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to sample: " << e.what() << ", gaussianMean = " << m_GaussianMean << ", variance = " << variance); + LOG_ERROR(<< "Failed to sample: " << e.what() << ", gaussianMean = " << m_GaussianMean + << ", variance = " << variance); } } else { double degreesFreedom = 2.0 * m_GammaShape; @@ -948,12 +988,14 @@ void CNormalMeanPrecConjugate::sampleMarginalLikelihood(std::size_t numberSample try { boost::math::students_t_distribution<> students(degreesFreedom); - double scale = std::sqrt((m_GaussianPrecision + 1.0) / m_GaussianPrecision * m_GammaRate / m_GammaShape); + double scale = std::sqrt((m_GaussianPrecision + 1.0) / m_GaussianPrecision * + m_GammaRate / m_GammaShape); - LOG_TRACE(<< "degreesFreedom = " << degreesFreedom << ", mean = " << m_GaussianMean << ", scale = " << scale - << ", numberSamples = " << numberSamples); + LOG_TRACE(<< "degreesFreedom = " << degreesFreedom << ", mean = " << m_GaussianMean + << ", scale = " << scale << ", numberSamples = " << numberSamples); - double constant = CTools::safePdf(students, 0.0) * scale * degreesFreedom / (degreesFreedom - 1.0); + double constant = CTools::safePdf(students, 0.0) * scale * + degreesFreedom / (degreesFreedom - 1.0); for (std::size_t i = 1u; i < numberSamples; ++i) { double q = static_cast(i) / static_cast(numberSamples); @@ -962,9 +1004,12 @@ void CNormalMeanPrecConjugate::sampleMarginalLikelihood(std::size_t numberSample double residual = xq * xq / degreesFreedom; double partialExpectation = - m_GaussianMean * q - constant * std::exp(-(degreesFreedom - 1.0) / 2.0 * std::log(1.0 + residual)); + m_GaussianMean * q - + constant * std::exp(-(degreesFreedom - 1.0) / 2.0 * + std::log(1.0 + residual)); - double sample = static_cast(numberSamples) * (partialExpectation - lastPartialExpectation); + double sample = static_cast(numberSamples) * + (partialExpectation - lastPartialExpectation); LOG_TRACE(<< "sample = " << sample); @@ -972,16 +1017,22 @@ void CNormalMeanPrecConjugate::sampleMarginalLikelihood(std::size_t numberSample if (sample >= support.first && sample <= support.second) { samples.push_back(sample); } else { - LOG_ERROR(<< "Sample out of bounds: sample = " << sample << ", gaussianMean = " << m_GaussianMean - << ", constant = " << constant << ", residual = " << residual << ", q = " << q << ", x(q) = " << xq); + LOG_ERROR(<< "Sample out of bounds: sample = " << sample + << ", gaussianMean = " << m_GaussianMean + << ", constant = " << constant << ", residual = " << residual + << ", q = " << q << ", x(q) = " << xq); } lastPartialExpectation = partialExpectation; } - } catch (const std::exception& e) { LOG_ERROR(<< "Failed to sample: " << e.what() << ", degreesFreedom = " << degreesFreedom); } + } catch (const std::exception& e) { + LOG_ERROR(<< "Failed to sample: " << e.what() + << ", degreesFreedom = " << degreesFreedom); + } } - double sample = static_cast(numberSamples) * (m_GaussianMean - lastPartialExpectation); + double sample = static_cast(numberSamples) * + (m_GaussianMean - lastPartialExpectation); LOG_TRACE(<< "sample = " << sample); @@ -989,7 +1040,8 @@ void CNormalMeanPrecConjugate::sampleMarginalLikelihood(std::size_t numberSample if (sample >= support.first && sample <= support.second) { samples.push_back(sample); } else { - LOG_ERROR(<< "Sample out of bounds: sample = " << sample << ", gaussianMean = " << m_GaussianMean); + LOG_ERROR(<< "Sample out of bounds: sample = " << sample + << ", gaussianMean = " << m_GaussianMean); } } @@ -1002,23 +1054,19 @@ bool CNormalMeanPrecConjugate::minusLogJointCdf(const TWeightStyleVec& weightSty lowerBound = upperBound = 0.0; - TMinusLogCdf minusLogCdf(weightStyles, - samples, - weights, - this->isNonInformative(), - m_GaussianMean, - m_GaussianPrecision, - m_GammaShape, - m_GammaRate, - this->marginalLikelihoodMean()); + TMinusLogCdf minusLogCdf(weightStyles, samples, weights, this->isNonInformative(), + m_GaussianMean, m_GaussianPrecision, m_GammaShape, + m_GammaRate, this->marginalLikelihoodMean()); if (this->isInteger()) { // If the data are discrete we compute the approximate expectation // w.r.t. to the hidden offset of the samples Z, which is uniform // on the interval [0,1]. double value; - if (!CIntegration::logGaussLegendre(minusLogCdf, 0.0, 1.0, value)) { - LOG_ERROR(<< "Failed computing c.d.f. for " << core::CContainerPrinter::print(samples)); + if (!CIntegration::logGaussLegendre( + minusLogCdf, 0.0, 1.0, value)) { + LOG_ERROR(<< "Failed computing c.d.f. for " + << core::CContainerPrinter::print(samples)); return false; } @@ -1028,7 +1076,8 @@ bool CNormalMeanPrecConjugate::minusLogJointCdf(const TWeightStyleVec& weightSty double value; if (!minusLogCdf(0.0, value)) { - LOG_ERROR(<< "Failed computing c.d.f. for " << core::CContainerPrinter::print(samples)); + LOG_ERROR(<< "Failed computing c.d.f. for " + << core::CContainerPrinter::print(samples)); return false; } @@ -1045,23 +1094,19 @@ bool CNormalMeanPrecConjugate::minusLogJointCdfComplement(const TWeightStyleVec& lowerBound = upperBound = 0.0; - TMinusLogCdfComplement minusLogCdfComplement(weightStyles, - samples, - weights, - this->isNonInformative(), - m_GaussianMean, - m_GaussianPrecision, - m_GammaShape, - m_GammaRate, - this->marginalLikelihoodMean()); + TMinusLogCdfComplement minusLogCdfComplement( + weightStyles, samples, weights, this->isNonInformative(), m_GaussianMean, + m_GaussianPrecision, m_GammaShape, m_GammaRate, this->marginalLikelihoodMean()); if (this->isInteger()) { // If the data are discrete we compute the approximate expectation // w.r.t. to the hidden offset of the samples Z, which is uniform // on the interval [0,1]. double value; - if (!CIntegration::logGaussLegendre(minusLogCdfComplement, 0.0, 1.0, value)) { - LOG_ERROR(<< "Failed computing c.d.f. complement for " << core::CContainerPrinter::print(samples)); + if (!CIntegration::logGaussLegendre( + minusLogCdfComplement, 0.0, 1.0, value)) { + LOG_ERROR(<< "Failed computing c.d.f. complement for " + << core::CContainerPrinter::print(samples)); return false; } @@ -1071,7 +1116,8 @@ bool CNormalMeanPrecConjugate::minusLogJointCdfComplement(const TWeightStyleVec& double value; if (!minusLogCdfComplement(0.0, value)) { - LOG_ERROR(<< "Failed computing c.d.f. complement for " << core::CContainerPrinter::print(samples)); + LOG_ERROR(<< "Failed computing c.d.f. complement for " + << core::CContainerPrinter::print(samples)); return false; } @@ -1079,34 +1125,31 @@ bool CNormalMeanPrecConjugate::minusLogJointCdfComplement(const TWeightStyleVec& return true; } -bool CNormalMeanPrecConjugate::probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, - double& lowerBound, - double& upperBound, - maths_t::ETail& tail) const { +bool CNormalMeanPrecConjugate::probabilityOfLessLikelySamples( + maths_t::EProbabilityCalculation calculation, + const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& lowerBound, + double& upperBound, + maths_t::ETail& tail) const { lowerBound = upperBound = 0.0; tail = maths_t::E_UndeterminedTail; - detail::CProbabilityOfLessLikelySamples probability(calculation, - weightStyles, - samples, - weights, - this->isNonInformative(), - m_GaussianMean, - m_GaussianPrecision, - m_GammaShape, - m_GammaRate, - this->marginalLikelihoodMean()); + detail::CProbabilityOfLessLikelySamples probability( + calculation, weightStyles, samples, weights, this->isNonInformative(), + m_GaussianMean, m_GaussianPrecision, m_GammaShape, m_GammaRate, + this->marginalLikelihoodMean()); if (this->isInteger()) { // If the data are discrete we compute the approximate expectation // w.r.t. to the hidden offset of the samples Z, which is uniform // on the interval [0,1]. double value; - if (!CIntegration::gaussLegendre(probability, 0.0, 1.0, value)) { - LOG_ERROR(<< "Failed computing probability for " << core::CContainerPrinter::print(samples)); + if (!CIntegration::gaussLegendre(probability, 0.0, + 1.0, value)) { + LOG_ERROR(<< "Failed computing probability for " + << core::CContainerPrinter::print(samples)); return false; } @@ -1118,7 +1161,8 @@ bool CNormalMeanPrecConjugate::probabilityOfLessLikelySamples(maths_t::EProbabil double value; if (!probability(0.0, value)) { - LOG_ERROR(<< "Failed computing probability for " << core::CContainerPrinter::print(samples)); + LOG_ERROR(<< "Failed computing probability for " + << core::CContainerPrinter::print(samples)); return false; } @@ -1139,7 +1183,9 @@ void CNormalMeanPrecConjugate::print(const std::string& indent, std::string& res return; } result += "mean = " + core::CStringUtils::typeToStringPretty(this->marginalLikelihoodMean()) + - " sd = " + core::CStringUtils::typeToStringPretty(std::sqrt(this->marginalLikelihoodVariance())); + " sd = " + + core::CStringUtils::typeToStringPretty( + std::sqrt(this->marginalLikelihoodVariance())); } std::string CNormalMeanPrecConjugate::printJointDensityFunction() const { @@ -1186,9 +1232,11 @@ std::string CNormalMeanPrecConjugate::printJointDensityFunction() const { y = yStart; for (unsigned int j = 0u; j < POINTS; ++j, y += yIncrement) { double conditionalPrecision = m_GaussianPrecision * x; - boost::math::normal_distribution<> conditionalGaussian(m_GaussianMean, 1.0 / std::sqrt(conditionalPrecision)); + boost::math::normal_distribution<> conditionalGaussian( + m_GaussianMean, 1.0 / std::sqrt(conditionalPrecision)); - pdf << (CTools::safePdf(gamma, x) * CTools::safePdf(conditionalGaussian, y)) << " "; + pdf << (CTools::safePdf(gamma, x) * CTools::safePdf(conditionalGaussian, y)) + << " "; } pdf << core_t::LINE_ENDING; } @@ -1220,10 +1268,12 @@ std::size_t CNormalMeanPrecConjugate::staticSize() const { void CNormalMeanPrecConjugate::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(DECAY_RATE_TAG, this->decayRate(), core::CIEEE754::E_SinglePrecision); inserter.insertValue(GAUSSIAN_MEAN_TAG, m_GaussianMean, core::CIEEE754::E_SinglePrecision); - inserter.insertValue(GAUSSIAN_PRECISION_TAG, m_GaussianPrecision, core::CIEEE754::E_SinglePrecision); + inserter.insertValue(GAUSSIAN_PRECISION_TAG, m_GaussianPrecision, + core::CIEEE754::E_SinglePrecision); inserter.insertValue(GAMMA_SHAPE_TAG, m_GammaShape, core::CIEEE754::E_SinglePrecision); inserter.insertValue(GAMMA_RATE_TAG, m_GammaRate, core::CIEEE754::E_SinglePrecision); - inserter.insertValue(NUMBER_SAMPLES_TAG, this->numberSamples(), core::CIEEE754::E_SinglePrecision); + inserter.insertValue(NUMBER_SAMPLES_TAG, this->numberSamples(), + core::CIEEE754::E_SinglePrecision); } double CNormalMeanPrecConjugate::mean() const { @@ -1238,9 +1288,11 @@ double CNormalMeanPrecConjugate::precision() const { return m_GammaShape / m_GammaRate; } -CNormalMeanPrecConjugate::TDoubleDoublePr CNormalMeanPrecConjugate::confidenceIntervalMean(double percentage) const { +CNormalMeanPrecConjugate::TDoubleDoublePr +CNormalMeanPrecConjugate::confidenceIntervalMean(double percentage) const { if (this->isNonInformative()) { - return std::make_pair(boost::numeric::bounds::lowest(), boost::numeric::bounds::highest()); + return std::make_pair(boost::numeric::bounds::lowest(), + boost::numeric::bounds::highest()); } // Compute the symmetric confidence interval around the median of the @@ -1267,16 +1319,20 @@ CNormalMeanPrecConjugate::TDoubleDoublePr CNormalMeanPrecConjugate::confidenceIn boost::math::students_t_distribution<> students(2.0 * m_GammaShape); double xLower = boost::math::quantile(students, lowerPercentile); - xLower = m_GaussianMean + xLower / std::sqrt(m_GaussianPrecision * m_GammaShape / m_GammaRate); + xLower = m_GaussianMean + + xLower / std::sqrt(m_GaussianPrecision * m_GammaShape / m_GammaRate); double xUpper = boost::math::quantile(students, upperPercentile); - xUpper = m_GaussianMean + xUpper / std::sqrt(m_GaussianPrecision * m_GammaShape / m_GammaRate); + xUpper = m_GaussianMean + + xUpper / std::sqrt(m_GaussianPrecision * m_GammaShape / m_GammaRate); return std::make_pair(xLower, xUpper); } -CNormalMeanPrecConjugate::TDoubleDoublePr CNormalMeanPrecConjugate::confidenceIntervalPrecision(double percentage) const { +CNormalMeanPrecConjugate::TDoubleDoublePr +CNormalMeanPrecConjugate::confidenceIntervalPrecision(double percentage) const { if (this->isNonInformative()) { - return std::make_pair(boost::numeric::bounds::lowest(), boost::numeric::bounds::highest()); + return std::make_pair(boost::numeric::bounds::lowest(), + boost::numeric::bounds::highest()); } percentage /= 100.0; @@ -1286,26 +1342,31 @@ CNormalMeanPrecConjugate::TDoubleDoublePr CNormalMeanPrecConjugate::confidenceIn // The marginal prior distribution for the precision is gamma. boost::math::gamma_distribution<> gamma(m_GammaShape, 1.0 / m_GammaRate); - return std::make_pair(boost::math::quantile(gamma, lowerPercentile), boost::math::quantile(gamma, upperPercentile)); + return std::make_pair(boost::math::quantile(gamma, lowerPercentile), + boost::math::quantile(gamma, upperPercentile)); } -bool CNormalMeanPrecConjugate::equalTolerance(const CNormalMeanPrecConjugate& rhs, const TEqualWithTolerance& equal) const { - LOG_DEBUG(<< m_GaussianMean << " " << rhs.m_GaussianMean << ", " << m_GaussianPrecision << " " << rhs.m_GaussianPrecision << ", " - << m_GammaShape << " " << rhs.m_GammaShape << ", " << m_GammaRate << " " << rhs.m_GammaRate); +bool CNormalMeanPrecConjugate::equalTolerance(const CNormalMeanPrecConjugate& rhs, + const TEqualWithTolerance& equal) const { + LOG_DEBUG(<< m_GaussianMean << " " << rhs.m_GaussianMean << ", " << m_GaussianPrecision + << " " << rhs.m_GaussianPrecision << ", " << m_GammaShape << " " + << rhs.m_GammaShape << ", " << m_GammaRate << " " << rhs.m_GammaRate); - return equal(m_GaussianMean, rhs.m_GaussianMean) && equal(m_GaussianPrecision, rhs.m_GaussianPrecision) && + return equal(m_GaussianMean, rhs.m_GaussianMean) && + equal(m_GaussianPrecision, rhs.m_GaussianPrecision) && equal(m_GammaShape, rhs.m_GammaShape) && equal(m_GammaRate, rhs.m_GammaRate); } bool CNormalMeanPrecConjugate::isBad() const { - return !CMathsFuncs::isFinite(m_GaussianMean) || !CMathsFuncs::isFinite(m_GaussianPrecision) || !CMathsFuncs::isFinite(m_GammaShape) || - !CMathsFuncs::isFinite(m_GammaRate); + return !CMathsFuncs::isFinite(m_GaussianMean) || + !CMathsFuncs::isFinite(m_GaussianPrecision) || + !CMathsFuncs::isFinite(m_GammaShape) || !CMathsFuncs::isFinite(m_GammaRate); } std::string CNormalMeanPrecConjugate::debug() const { std::ostringstream result; - result << std::scientific << std::setprecision(15) << m_GaussianMean << " " << m_GaussianPrecision << " " << m_GammaShape << " " - << m_GammaRate; + result << std::scientific << std::setprecision(15) << m_GaussianMean << " " + << m_GaussianPrecision << " " << m_GammaShape << " " << m_GammaRate; return result.str(); } diff --git a/lib/maths/COneOfNPrior.cc b/lib/maths/COneOfNPrior.cc index 93bbd793f9..c14e987f8a 100644 --- a/lib/maths/COneOfNPrior.cc +++ b/lib/maths/COneOfNPrior.cc @@ -44,7 +44,8 @@ using TMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; //! Compute the log of \p n. double logn(std::size_t n) { - static const double LOG_N[] = {0.0, std::log(2.0), std::log(3.0), std::log(4.0), std::log(5.0)}; + static const double LOG_N[] = {0.0, std::log(2.0), std::log(3.0), + std::log(4.0), std::log(5.0)}; return n < boost::size(LOG_N) ? LOG_N[n - 1] : std::log(static_cast(n)); } @@ -70,15 +71,20 @@ const std::string PRIOR_TAG("b"); const std::string EMPTY_STRING; //! Persist state for a models by passing information to \p inserter. -void modelAcceptPersistInserter(const CModelWeight& weight, const CPrior& prior, core::CStatePersistInserter& inserter) { - inserter.insertLevel(WEIGHT_TAG, boost::bind(&CModelWeight::acceptPersistInserter, &weight, _1)); - inserter.insertLevel(PRIOR_TAG, boost::bind(CPriorStateSerialiser(), boost::cref(prior), _1)); +void modelAcceptPersistInserter(const CModelWeight& weight, + const CPrior& prior, + core::CStatePersistInserter& inserter) { + inserter.insertLevel( + WEIGHT_TAG, boost::bind(&CModelWeight::acceptPersistInserter, &weight, _1)); + inserter.insertLevel(PRIOR_TAG, boost::bind(CPriorStateSerialiser(), + boost::cref(prior), _1)); } } //////// COneOfNPrior Implementation //////// -COneOfNPrior::COneOfNPrior(const TPriorPtrVec& models, maths_t::EDataType dataType, double decayRate) : CPrior(dataType, decayRate) { +COneOfNPrior::COneOfNPrior(const TPriorPtrVec& models, maths_t::EDataType dataType, double decayRate) + : CPrior(dataType, decayRate) { if (models.empty()) { LOG_ERROR(<< "Can't initialize one-of-n with no models!"); return; @@ -92,7 +98,9 @@ COneOfNPrior::COneOfNPrior(const TPriorPtrVec& models, maths_t::EDataType dataTy } } -COneOfNPrior::COneOfNPrior(const TDoublePriorPtrPrVec& models, maths_t::EDataType dataType, double decayRate /*= 0.0*/) +COneOfNPrior::COneOfNPrior(const TDoublePriorPtrPrVec& models, + maths_t::EDataType dataType, + double decayRate /*= 0.0*/) : CPrior(dataType, decayRate) { if (models.empty()) { LOG_ERROR(<< "Can't initialize mixed model with no models!"); @@ -108,20 +116,24 @@ COneOfNPrior::COneOfNPrior(const TDoublePriorPtrPrVec& models, maths_t::EDataTyp } } -COneOfNPrior::COneOfNPrior(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) +COneOfNPrior::COneOfNPrior(const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser) : CPrior(params.s_DataType, params.s_DecayRate) { - traverser.traverseSubLevel(boost::bind(&COneOfNPrior::acceptRestoreTraverser, this, boost::cref(params), _1)); + traverser.traverseSubLevel(boost::bind(&COneOfNPrior::acceptRestoreTraverser, + this, boost::cref(params), _1)); } -bool COneOfNPrior::acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) { +bool COneOfNPrior::acceptRestoreTraverser(const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser) { do { const std::string& name = traverser.name(); - RESTORE_SETUP_TEARDOWN( - DECAY_RATE_TAG, double decayRate, core::CStringUtils::stringToType(traverser.value(), decayRate), this->decayRate(decayRate)) - RESTORE(MODEL_TAG, - traverser.traverseSubLevel(boost::bind(&COneOfNPrior::modelAcceptRestoreTraverser, this, boost::cref(params), _1))) - RESTORE_SETUP_TEARDOWN(NUMBER_SAMPLES_TAG, - double numberSamples, + RESTORE_SETUP_TEARDOWN(DECAY_RATE_TAG, double decayRate, + core::CStringUtils::stringToType(traverser.value(), decayRate), + this->decayRate(decayRate)) + RESTORE(MODEL_TAG, traverser.traverseSubLevel( + boost::bind(&COneOfNPrior::modelAcceptRestoreTraverser, + this, boost::cref(params), _1))) + RESTORE_SETUP_TEARDOWN(NUMBER_SAMPLES_TAG, double numberSamples, core::CStringUtils::stringToType(traverser.value(), numberSamples), this->numberSamples(numberSamples)) } while (traverser.next()); @@ -129,7 +141,8 @@ bool COneOfNPrior::acceptRestoreTraverser(const SDistributionRestoreParams& para return true; } -COneOfNPrior::COneOfNPrior(const COneOfNPrior& other) : CPrior(other.dataType(), other.decayRate()) { +COneOfNPrior::COneOfNPrior(const COneOfNPrior& other) + : CPrior(other.dataType(), other.decayRate()) { // Clone all the models up front so we can implement strong exception safety. m_Models.reserve(other.m_Models.size()); for (const auto& model : other.m_Models) { @@ -207,7 +220,9 @@ bool COneOfNPrior::needsOffset() const { return false; } -double COneOfNPrior::adjustOffset(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights) { +double COneOfNPrior::adjustOffset(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights) { TMeanAccumulator result; TDouble5Vec penalties; @@ -220,7 +235,8 @@ double COneOfNPrior::adjustOffset(const TWeightStyleVec& weightStyles, const TDo if (CBasicStatistics::mean(result) != 0.0) { CScopeCanonicalizeWeights canonicalize(m_Models); for (std::size_t i = 0u; i < penalties.size(); ++i) { - if (m_Models[i].second->participatesInModelSelection() && CMathsFuncs::isFinite(penalties)) { + if (m_Models[i].second->participatesInModelSelection() && + CMathsFuncs::isFinite(penalties)) { CModelWeight& weight = m_Models[i].first; weight.logWeight(weight.logWeight() + penalties[i]); } @@ -238,13 +254,16 @@ double COneOfNPrior::offset() const { return offset; } -void COneOfNPrior::addSamples(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights) { +void COneOfNPrior::addSamples(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights) { if (samples.empty()) { return; } if (samples.size() != weights.size()) { - LOG_ERROR(<< "Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '" + LOG_ERROR(<< "Mismatch in samples '" + << core::CContainerPrinter::print(samples) << "' and weights '" << core::CContainerPrinter::print(weights) << "'"); return; } @@ -319,7 +338,9 @@ void COneOfNPrior::addSamples(const TWeightStyleVec& weightStyles, const TDouble // Update the weights with the marginal likelihoods. double logLikelihood = 0.0; maths_t::EFloatingPointErrorStatus status = - use ? model.second->jointLogMarginalLikelihood(weightStyles, samples, weights, logLikelihood) : maths_t::E_FpOverflowed; + use ? model.second->jointLogMarginalLikelihood(weightStyles, samples, + weights, logLikelihood) + : maths_t::E_FpOverflowed; if (status & maths_t::E_FpFailed) { LOG_ERROR(<< "Failed to compute log-likelihood"); @@ -365,7 +386,9 @@ void COneOfNPrior::addSamples(const TWeightStyleVec& weightStyles, const TDouble // The idea here is to limit the amount which extreme samples // affect model selection, particularly early on in the model // life-cycle. - double minLogLikelihood = maxLogLikelihood[0] - n * std::min(maxModelPenalty(this->numberSamples()), 100.0); + double minLogLikelihood = + maxLogLikelihood[0] - + n * std::min(maxModelPenalty(this->numberSamples()), 100.0); TMaxAccumulator maxLogWeight; for (std::size_t i = 0; i < m_Models.size(); ++i) { @@ -467,7 +490,8 @@ double COneOfNPrior::nearestMarginalLikelihoodMean(double value) const { return result / Z; } -double COneOfNPrior::marginalLikelihoodMode(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights) const { +double COneOfNPrior::marginalLikelihoodMode(const TWeightStyleVec& weightStyles, + const TDouble4Vec& weights) const { // We approximate this as the weighted average of the component // model modes. @@ -493,7 +517,8 @@ double COneOfNPrior::marginalLikelihoodMode(const TWeightStyleVec& weightStyles, return CTools::truncate(result, support.first, support.second); } -double COneOfNPrior::marginalLikelihoodVariance(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights) const { +double COneOfNPrior::marginalLikelihoodVariance(const TWeightStyleVec& weightStyles, + const TDouble4Vec& weights) const { if (this->isNonInformative()) { return INF; } @@ -516,9 +541,10 @@ double COneOfNPrior::marginalLikelihoodVariance(const TWeightStyleVec& weightSty return result / Z; } -COneOfNPrior::TDoubleDoublePr COneOfNPrior::marginalLikelihoodConfidenceInterval(double percentage, - const TWeightStyleVec& weightStyles, - const TDouble4Vec& weights) const { +COneOfNPrior::TDoubleDoublePr +COneOfNPrior::marginalLikelihoodConfidenceInterval(double percentage, + const TWeightStyleVec& weightStyles, + const TDouble4Vec& weights) const { // We approximate this as the weighted sum of the component model // intervals. To compute the weights we expand all component model // marginal likelihoods about a reasonable estimate for the true @@ -544,7 +570,8 @@ COneOfNPrior::TDoubleDoublePr COneOfNPrior::marginalLikelihoodConfidenceInterval for (const auto& model : m_Models) { double weight = model.first; if (weight >= MAXIMUM_RELATIVE_ERROR) { - TDoubleDoublePr interval = model.second->marginalLikelihoodConfidenceInterval(percentage, weightStyles, weights); + TDoubleDoublePr interval = model.second->marginalLikelihoodConfidenceInterval( + percentage, weightStyles, weights); x1.add(interval.first, weight); x2.add(interval.second, weight); } @@ -554,10 +581,11 @@ COneOfNPrior::TDoubleDoublePr COneOfNPrior::marginalLikelihoodConfidenceInterval return std::make_pair(CBasicStatistics::mean(x1), CBasicStatistics::mean(x2)); } -maths_t::EFloatingPointErrorStatus COneOfNPrior::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, - double& result) const { +maths_t::EFloatingPointErrorStatus +COneOfNPrior::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& result) const { result = 0.0; if (samples.empty()) { @@ -566,7 +594,8 @@ maths_t::EFloatingPointErrorStatus COneOfNPrior::jointLogMarginalLikelihood(cons } if (samples.size() != weights.size()) { - LOG_ERROR(<< "Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '" + LOG_ERROR(<< "Mismatch in samples '" + << core::CContainerPrinter::print(samples) << "' and weights '" << core::CContainerPrinter::print(weights) << "'"); return maths_t::E_FpFailed; } @@ -589,8 +618,8 @@ maths_t::EFloatingPointErrorStatus COneOfNPrior::jointLogMarginalLikelihood(cons for (const auto& model : m_Models) { if (model.second->participatesInModelSelection()) { double logLikelihood; - maths_t::EFloatingPointErrorStatus status = - model.second->jointLogMarginalLikelihood(weightStyles, samples, weights, logLikelihood); + maths_t::EFloatingPointErrorStatus status = model.second->jointLogMarginalLikelihood( + weightStyles, samples, weights, logLikelihood); if (status & maths_t::E_FpFailed) { return status; } @@ -638,7 +667,8 @@ maths_t::EFloatingPointErrorStatus COneOfNPrior::jointLogMarginalLikelihood(cons return status; } -void COneOfNPrior::sampleMarginalLikelihood(std::size_t numberSamples, TDouble1Vec& samples) const { +void COneOfNPrior::sampleMarginalLikelihood(std::size_t numberSamples, + TDouble1Vec& samples) const { samples.clear(); if (numberSamples == 0 || this->isNonInformative()) { @@ -657,7 +687,8 @@ void COneOfNPrior::sampleMarginalLikelihood(std::size_t numberSamples, TDouble1V CSampling::TSizeVec sampling; CSampling::weightedSample(numberSamples, weights, sampling); - LOG_TRACE(<< "weights = " << core::CContainerPrinter::print(weights) << ", sampling = " << core::CContainerPrinter::print(sampling)); + LOG_TRACE(<< "weights = " << core::CContainerPrinter::print(weights) + << ", sampling = " << core::CContainerPrinter::print(sampling)); if (sampling.size() != m_Models.size()) { LOG_ERROR(<< "Failed to sample marginal likelihood"); @@ -689,12 +720,14 @@ bool COneOfNPrior::minusLogJointCdfImpl(bool complement, lowerBound = upperBound = 0.0; if (samples.empty()) { - LOG_ERROR(<< "Can't compute c.d.f. " << (complement ? "complement " : "") << "for empty sample set"); + LOG_ERROR(<< "Can't compute c.d.f. " << (complement ? "complement " : "") + << "for empty sample set"); return false; } if (this->isNonInformative()) { - lowerBound = upperBound = -std::log(complement ? 1.0 - CTools::IMPROPER_CDF : CTools::IMPROPER_CDF); + lowerBound = upperBound = -std::log(complement ? 1.0 - CTools::IMPROPER_CDF + : CTools::IMPROPER_CDF); return true; } @@ -723,11 +756,15 @@ bool COneOfNPrior::minusLogJointCdfImpl(bool complement, double li = 0.0; double ui = 0.0; - if (complement && !model.minusLogJointCdfComplement(weightStyles, samples, weights, li, ui)) { - LOG_ERROR(<< "Failed computing c.d.f. complement for " << core::CContainerPrinter::print(samples)); + if (complement && !model.minusLogJointCdfComplement(weightStyles, samples, + weights, li, ui)) { + LOG_ERROR(<< "Failed computing c.d.f. complement for " + << core::CContainerPrinter::print(samples)); return false; - } else if (!complement && !model.minusLogJointCdf(weightStyles, samples, weights, li, ui)) { - LOG_ERROR(<< "Failed computing c.d.f. for " << core::CContainerPrinter::print(samples)); + } else if (!complement && + !model.minusLogJointCdf(weightStyles, samples, weights, li, ui)) { + LOG_ERROR(<< "Failed computing c.d.f. for " + << core::CContainerPrinter::print(samples)); return false; } li = wi - li; @@ -766,7 +803,8 @@ bool COneOfNPrior::minusLogJointCdfImpl(bool complement, lowerBound = std::max(lowerBound, 0.0); upperBound = std::max(upperBound, 0.0); - LOG_TRACE(<< "Joint -log(c.d.f." << (complement ? " complement" : "") << ") = [" << lowerBound << "," << upperBound << "]"); + LOG_TRACE(<< "Joint -log(c.d.f." << (complement ? " complement" : "") + << ") = [" << lowerBound << "," << upperBound << "]"); return true; } @@ -777,11 +815,7 @@ bool COneOfNPrior::minusLogJointCdf(const TWeightStyleVec& weightStyles, double& lowerBound, double& upperBound) const { return this->minusLogJointCdfImpl(false, // complement - weightStyles, - samples, - weights, - lowerBound, - upperBound); + weightStyles, samples, weights, lowerBound, upperBound); } bool COneOfNPrior::minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, @@ -790,11 +824,7 @@ bool COneOfNPrior::minusLogJointCdfComplement(const TWeightStyleVec& weightStyle double& lowerBound, double& upperBound) const { return this->minusLogJointCdfImpl(true, // complement - weightStyles, - samples, - weights, - lowerBound, - upperBound); + weightStyles, samples, weights, lowerBound, upperBound); } bool COneOfNPrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, @@ -846,20 +876,23 @@ bool COneOfNPrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCalculati double modelLowerBound, modelUpperBound; maths_t::ETail modelTail; - if (!model.probabilityOfLessLikelySamples( - calculation, weightStyles, samples, weights, modelLowerBound, modelUpperBound, modelTail)) { + if (!model.probabilityOfLessLikelySamples(calculation, weightStyles, + samples, weights, modelLowerBound, + modelUpperBound, modelTail)) { // Logging handled at a lower level. return false; } - LOG_TRACE(<< "weight = " << weight << ", modelLowerBound = " << modelLowerBound << ", modelUpperBound = " << modelLowerBound); + LOG_TRACE(<< "weight = " << weight << ", modelLowerBound = " << modelLowerBound + << ", modelUpperBound = " << modelLowerBound); lowerBound += weight * modelLowerBound; upperBound += weight * modelUpperBound; tail_.add(TDoubleTailPr(weight * (modelLowerBound + modelUpperBound), modelTail)); } - if (!(lowerBound >= 0.0 && lowerBound <= 1.001) || !(upperBound >= 0.0 && upperBound <= 1.001)) { + if (!(lowerBound >= 0.0 && lowerBound <= 1.001) || + !(upperBound >= 0.0 && upperBound <= 1.001)) { LOG_ERROR(<< "Bad probability bounds = [" << lowerBound << ", " << upperBound << "]" << ", " << core::CContainerPrinter::print(logWeights)); } @@ -881,7 +914,8 @@ bool COneOfNPrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCalculati bool COneOfNPrior::isNonInformative() const { for (const auto& model : m_Models) { - if (model.second->participatesInModelSelection() && model.second->isNonInformative()) { + if (model.second->participatesInModelSelection() && + model.second->isNonInformative()) { return true; } } @@ -897,11 +931,13 @@ void COneOfNPrior::print(const std::string& indent, std::string& result) const { static const double MINIMUM_SIGNIFICANT_WEIGHT = 0.05; result += ':'; - result += core_t::LINE_ENDING + indent + " # samples " + core::CStringUtils::typeToStringPretty(this->numberSamples()); + result += core_t::LINE_ENDING + indent + " # samples " + + core::CStringUtils::typeToStringPretty(this->numberSamples()); for (const auto& model : m_Models) { double weight = model.first; if (weight >= MINIMUM_SIGNIFICANT_WEIGHT) { - std::string indent_ = indent + " weight " + core::CStringUtils::typeToStringPretty(weight) + " "; + std::string indent_ = indent + " weight " + + core::CStringUtils::typeToStringPretty(weight) + " "; model.second->print(indent_, result); } } @@ -931,10 +967,13 @@ std::size_t COneOfNPrior::staticSize() const { void COneOfNPrior::acceptPersistInserter(core::CStatePersistInserter& inserter) const { for (const auto& model : m_Models) { - inserter.insertLevel(MODEL_TAG, boost::bind(&modelAcceptPersistInserter, boost::cref(model.first), boost::cref(*model.second), _1)); + inserter.insertLevel(MODEL_TAG, boost::bind(&modelAcceptPersistInserter, + boost::cref(model.first), + boost::cref(*model.second), _1)); } inserter.insertValue(DECAY_RATE_TAG, this->decayRate(), core::CIEEE754::E_SinglePrecision); - inserter.insertValue(NUMBER_SAMPLES_TAG, this->numberSamples(), core::CIEEE754::E_SinglePrecision); + inserter.insertValue(NUMBER_SAMPLES_TAG, this->numberSamples(), + core::CIEEE754::E_SinglePrecision); } COneOfNPrior::TDoubleVec COneOfNPrior::weights() const { @@ -971,7 +1010,8 @@ COneOfNPrior::TPriorCPtrVec COneOfNPrior::models() const { return result; } -bool COneOfNPrior::modelAcceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) { +bool COneOfNPrior::modelAcceptRestoreTraverser(const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser) { CModelWeight weight(1.0); bool gotWeight = false; TPriorPtr model; @@ -980,10 +1020,12 @@ bool COneOfNPrior::modelAcceptRestoreTraverser(const SDistributionRestoreParams& const std::string& name = traverser.name(); RESTORE_SETUP_TEARDOWN(WEIGHT_TAG, /*no-op*/, - traverser.traverseSubLevel(boost::bind(&CModelWeight::acceptRestoreTraverser, &weight, _1)), + traverser.traverseSubLevel(boost::bind( + &CModelWeight::acceptRestoreTraverser, &weight, _1)), gotWeight = true) - RESTORE(PRIOR_TAG, - traverser.traverseSubLevel(boost::bind(CPriorStateSerialiser(), boost::cref(params), boost::ref(model), _1))) + RESTORE(PRIOR_TAG, traverser.traverseSubLevel(boost::bind( + CPriorStateSerialiser(), boost::cref(params), + boost::ref(model), _1))) } while (traverser.next()); if (!gotWeight) { diff --git a/lib/maths/COrdinal.cc b/lib/maths/COrdinal.cc index ccfe5fe944..678e992f37 100644 --- a/lib/maths/COrdinal.cc +++ b/lib/maths/COrdinal.cc @@ -93,7 +93,8 @@ bool COrdinal::operator<(COrdinal rhs) const { case E_PositiveInteger: switch (rhs.m_Type) { case E_Integer: - return !this->equal(rhs.m_Value.integer, m_Value.positiveInteger) && !this->less(rhs.m_Value.integer, m_Value.positiveInteger); + return !this->equal(rhs.m_Value.integer, m_Value.positiveInteger) && + !this->less(rhs.m_Value.integer, m_Value.positiveInteger); case E_PositiveInteger: return m_Value.positiveInteger < rhs.m_Value.positiveInteger; case E_Real: @@ -105,9 +106,11 @@ bool COrdinal::operator<(COrdinal rhs) const { case E_Real: switch (rhs.m_Type) { case E_Integer: - return !this->equal(rhs.m_Value.integer, m_Value.real) && !this->less(rhs.m_Value.integer, m_Value.real); + return !this->equal(rhs.m_Value.integer, m_Value.real) && + !this->less(rhs.m_Value.integer, m_Value.real); case E_PositiveInteger: - return !this->equal(rhs.m_Value.positiveInteger, m_Value.real) && !this->less(rhs.m_Value.positiveInteger, m_Value.real); + return !this->equal(rhs.m_Value.positiveInteger, m_Value.real) && + !this->less(rhs.m_Value.positiveInteger, m_Value.real); case E_Real: return m_Value.real < rhs.m_Value.real; case E_Nan: @@ -157,7 +160,8 @@ bool COrdinal::equal(int64_t lhs, double rhs) const { } bool COrdinal::equal(uint64_t lhs, double rhs) const { - if (rhs < 0.0 || rhs > static_cast(boost::numeric::bounds::highest())) { + if (rhs < 0.0 || + rhs > static_cast(boost::numeric::bounds::highest())) { return false; } double integerPart; @@ -178,7 +182,8 @@ bool COrdinal::less(int64_t lhs, double rhs) const { } double integerPart; double remainder = ::modf(rhs, &integerPart); - return lhs < static_cast(integerPart) || (lhs == static_cast(integerPart) && remainder > 0.0); + return lhs < static_cast(integerPart) || + (lhs == static_cast(integerPart) && remainder > 0.0); } bool COrdinal::less(uint64_t lhs, double rhs) const { @@ -190,7 +195,8 @@ bool COrdinal::less(uint64_t lhs, double rhs) const { } double integerPart; double remainder = ::modf(rhs, &integerPart); - return lhs < static_cast(integerPart) || (lhs == static_cast(integerPart) && remainder > 0.0); + return lhs < static_cast(integerPart) || + (lhs == static_cast(integerPart) && remainder > 0.0); } std::ostream& operator<<(std::ostream& o, COrdinal ord) { diff --git a/lib/maths/CPRNG.cc b/lib/maths/CPRNG.cc index b2d2bc1f10..7974ab3ee0 100644 --- a/lib/maths/CPRNG.cc +++ b/lib/maths/CPRNG.cc @@ -220,7 +220,8 @@ void CPRNG::CXorShift1024Mult::jump() { std::string CPRNG::CXorShift1024Mult::toString() const { const uint64_t* begin = &m_X[0]; const uint64_t* end = &m_X[16]; - return core::CPersistUtils::toString(begin, end) + core::CPersistUtils::PAIR_DELIMITER + core::CStringUtils::typeToString(m_P); + return core::CPersistUtils::toString(begin, end) + + core::CPersistUtils::PAIR_DELIMITER + core::CStringUtils::typeToString(m_P); } bool CPRNG::CXorShift1024Mult::fromString(std::string state) { @@ -238,21 +239,12 @@ bool CPRNG::CXorShift1024Mult::fromString(std::string state) { } const uint64_t CPRNG::CXorShift1024Mult::A(1181783497276652981); -const uint64_t CPRNG::CXorShift1024Mult::JUMP[16] = {0x84242f96eca9c41d, - 0xa3c65b8776f96855, - 0x5b34a39f070b5837, - 0x4489affce4f31a1e, - 0x2ffeeb0a48316f40, - 0xdc2d9891fe68c022, - 0x3659132bb12fea70, - 0xaac17d8efa43cab8, - 0xc4cb815590989b13, - 0x5ee975283d71c93b, - 0x691548c86c1bd540, - 0x7910c41d10a1e6a5, - 0x0b5fc64563b3e2a8, - 0x047f7684e9fc949d, - 0xb99181f2d8f685ca, - 0x284600e3f30e38c3}; +const uint64_t CPRNG::CXorShift1024Mult::JUMP[16] = { + 0x84242f96eca9c41d, 0xa3c65b8776f96855, 0x5b34a39f070b5837, + 0x4489affce4f31a1e, 0x2ffeeb0a48316f40, 0xdc2d9891fe68c022, + 0x3659132bb12fea70, 0xaac17d8efa43cab8, 0xc4cb815590989b13, + 0x5ee975283d71c93b, 0x691548c86c1bd540, 0x7910c41d10a1e6a5, + 0x0b5fc64563b3e2a8, 0x047f7684e9fc949d, 0xb99181f2d8f685ca, + 0x284600e3f30e38c3}; } } diff --git a/lib/maths/CPackedBitVector.cc b/lib/maths/CPackedBitVector.cc index 3ef0e9865b..d3df9d927e 100644 --- a/lib/maths/CPackedBitVector.cc +++ b/lib/maths/CPackedBitVector.cc @@ -16,17 +16,20 @@ namespace ml { namespace maths { -CPackedBitVector::CPackedBitVector() : m_Dimension(0), m_First(false), m_Parity(true) { +CPackedBitVector::CPackedBitVector() + : m_Dimension(0), m_First(false), m_Parity(true) { } -CPackedBitVector::CPackedBitVector(bool bit) : m_Dimension(1), m_First(bit), m_Parity(true), m_RunLengths(1, 1) { +CPackedBitVector::CPackedBitVector(bool bit) + : m_Dimension(1), m_First(bit), m_Parity(true), m_RunLengths(1, 1) { } CPackedBitVector::CPackedBitVector(std::size_t dimension, bool bit) : m_Dimension(static_cast(dimension)), m_First(bit), m_Parity(true) { if (dimension > 0) { std::size_t remainder = static_cast(MAX_RUN_LENGTH); - for (/**/; remainder <= dimension; remainder += static_cast(MAX_RUN_LENGTH)) { + for (/**/; remainder <= dimension; + remainder += static_cast(MAX_RUN_LENGTH)) { m_RunLengths.push_back(MAX_RUN_LENGTH); } remainder -= static_cast(MAX_RUN_LENGTH); @@ -35,7 +38,8 @@ CPackedBitVector::CPackedBitVector(std::size_t dimension, bool bit) } CPackedBitVector::CPackedBitVector(const TBoolVec& bits) - : m_Dimension(static_cast(bits.size())), m_First(bits.empty() ? false : bits[0]), m_Parity(true) { + : m_Dimension(static_cast(bits.size())), + m_First(bits.empty() ? false : bits[0]), m_Parity(true) { std::size_t length = 1u; for (std::size_t i = 1u; i < bits.size(); ++i) { if (bits[i] == bits[i - 1]) { @@ -102,7 +106,8 @@ void CPackedBitVector::extend(bool bit) { bool CPackedBitVector::fromDelimited(const std::string& str) { std::size_t last = 0u; std::size_t pos = str.find_first_of(core::CPersistUtils::DELIMITER, last); - if (pos == std::string::npos || core::CStringUtils::stringToType(str.substr(last, pos - last), m_Dimension) == false) { + if (pos == std::string::npos || + core::CStringUtils::stringToType(str.substr(last, pos - last), m_Dimension) == false) { LOG_ERROR(<< "Invalid packed vector in " << str); return false; } @@ -110,7 +115,8 @@ bool CPackedBitVector::fromDelimited(const std::string& str) { last = pos; pos = str.find_first_of(core::CPersistUtils::DELIMITER, last + 1); int first = 0; - if (pos == std::string::npos || core::CStringUtils::stringToType(str.substr(last + 1, pos - last - 1), first) == false) { + if (pos == std::string::npos || + core::CStringUtils::stringToType(str.substr(last + 1, pos - last - 1), first) == false) { LOG_ERROR(<< "Invalid packed vector in " << str); return false; } @@ -119,7 +125,8 @@ bool CPackedBitVector::fromDelimited(const std::string& str) { last = pos; pos = str.find_first_of(core::CPersistUtils::DELIMITER, last + 1); int parity = 0; - if (pos == std::string::npos || core::CStringUtils::stringToType(str.substr(last + 1, pos - last - 1), parity) == false) { + if (pos == std::string::npos || + core::CStringUtils::stringToType(str.substr(last + 1, pos - last - 1), parity) == false) { LOG_ERROR(<< "Invalid packed vector in " << str); return false; } @@ -136,8 +143,10 @@ bool CPackedBitVector::fromDelimited(const std::string& str) { std::string CPackedBitVector::toDelimited() const { std::string result; result += core::CStringUtils::typeToString(m_Dimension) + core::CPersistUtils::DELIMITER; - result += core::CStringUtils::typeToString(static_cast(m_First)) + core::CPersistUtils::DELIMITER; - result += core::CStringUtils::typeToString(static_cast(m_Parity)) + core::CPersistUtils::DELIMITER; + result += core::CStringUtils::typeToString(static_cast(m_First)) + + core::CPersistUtils::DELIMITER; + result += core::CStringUtils::typeToString(static_cast(m_Parity)) + + core::CPersistUtils::DELIMITER; result += core::CPersistUtils::toString(m_RunLengths); return result; } @@ -148,7 +157,8 @@ std::size_t CPackedBitVector::dimension() const { bool CPackedBitVector::operator()(std::size_t i) const { bool parity = true; - for (std::size_t j = 0u, k = static_cast(m_RunLengths[j]); k <= i; k += static_cast(m_RunLengths[++j])) { + for (std::size_t j = 0u, k = static_cast(m_RunLengths[j]); + k <= i; k += static_cast(m_RunLengths[++j])) { if (m_RunLengths[j] != MAX_RUN_LENGTH) { parity = !parity; } @@ -157,12 +167,14 @@ bool CPackedBitVector::operator()(std::size_t i) const { } bool CPackedBitVector::operator==(const CPackedBitVector& other) const { - return m_Dimension == other.m_Dimension && m_First == other.m_First && m_Parity == other.m_Parity && m_RunLengths == other.m_RunLengths; + return m_Dimension == other.m_Dimension && m_First == other.m_First && + m_Parity == other.m_Parity && m_RunLengths == other.m_RunLengths; } bool CPackedBitVector::operator<(const CPackedBitVector& rhs) const { - return COrderings::lexicographical_compare( - m_Dimension, m_First, m_Parity, m_RunLengths, rhs.m_Dimension, rhs.m_First, rhs.m_Parity, rhs.m_RunLengths); + return COrderings::lexicographical_compare(m_Dimension, m_First, m_Parity, + m_RunLengths, rhs.m_Dimension, rhs.m_First, + rhs.m_Parity, rhs.m_RunLengths); } CPackedBitVector CPackedBitVector::complement() const { @@ -254,7 +266,8 @@ CPackedBitVector::TBoolVec CPackedBitVector::toBitVector() const { bool parity = true; for (std::size_t i = 0u; i < m_RunLengths.size(); ++i) { - std::fill_n(std::back_inserter(result), static_cast(m_RunLengths[i]), parity ? m_First : !m_First); + std::fill_n(std::back_inserter(result), + static_cast(m_RunLengths[i]), parity ? m_First : !m_First); if (m_RunLengths[i] != MAX_RUN_LENGTH) { parity = !parity; } diff --git a/lib/maths/CPeriodicityHypothesisTests.cc b/lib/maths/CPeriodicityHypothesisTests.cc index d1837c9d63..8e85e1327e 100644 --- a/lib/maths/CPeriodicityHypothesisTests.cc +++ b/lib/maths/CPeriodicityHypothesisTests.cc @@ -59,7 +59,8 @@ using TTimeTimePrMeanVarAccumulatorPr = std::pair 0.0) { @@ -71,7 +72,8 @@ class CMinAmplitude { double amplitude() const { if (this->count() >= MINIMUM_COUNT_TO_TEST) { - return std::max(std::max(-m_Min.biggest(), 0.0), std::max(m_Max.biggest(), 0.0)); + return std::max(std::max(-m_Min.biggest(), 0.0), + std::max(m_Max.biggest(), 0.0)); } return 0.0; } @@ -93,7 +95,8 @@ class CMinAmplitude { private: using TMinAccumulator = CBasicStatistics::COrderStatisticsHeap; - using TMaxAccumulator = CBasicStatistics::COrderStatisticsHeap>; + using TMaxAccumulator = + CBasicStatistics::COrderStatisticsHeap>; private: std::size_t count() const { return m_Min.count(); } @@ -120,7 +123,8 @@ using TMinAmplitudeVec = std::vector; //! \brief Holds the relevant summary for choosing between alternative //! (non-nested) hypotheses. struct SHypothesisSummary { - SHypothesisSummary(double v, double DF, const CPeriodicityHypothesisTestsResult& H) : s_V(v), s_DF(DF), s_H(H) {} + SHypothesisSummary(double v, double DF, const CPeriodicityHypothesisTestsResult& H) + : s_V(v), s_DF(DF), s_H(H) {} double s_V; double s_DF; @@ -151,7 +155,9 @@ const core_t::TTime DIURNAL_PERIODS[]{DAY, WEEK}; //! The weekend/day windows. const TTimeTimePr DIURNAL_WINDOWS[]{{0, WEEKEND}, {WEEKEND, WEEK}, {0, WEEK}}; //! The names of the the diurnal periodic components. -const std::string DIURNAL_COMPONENT_NAMES[] = {"weekend daily", "weekend weekly", "weekday daily", "weekday weekly", "daily", "weekly"}; +const std::string DIURNAL_COMPONENT_NAMES[] = { + "weekend daily", "weekend weekly", "weekday daily", + "weekday weekly", "daily", "weekly"}; //! The confidence interval used for test statistic values. const double CONFIDENCE_INTERVAL{80.0}; @@ -181,10 +187,12 @@ void removeLinearTrend(TFloatMeanAccumulatorVec& values) { //! in one repeat of the partitioning pattern. double weekendPartitionVarianceCorrection(std::size_t bucketsPerWeek) { static const std::size_t BUCKETS_PER_WEEK[]{7, 14, 21, 28, 42, 56, 84, 168}; - static const double CORRECTIONS[]{1.0, 1.0, 1.0, 1.12, 1.31, 1.31, 1.31, 1.31}; - std::ptrdiff_t index{std::min( - std::lower_bound(boost::begin(BUCKETS_PER_WEEK), boost::end(BUCKETS_PER_WEEK), bucketsPerWeek) - boost::begin(BUCKETS_PER_WEEK), - std::ptrdiff_t(boost::size(BUCKETS_PER_WEEK) - 1))}; + static const double CORRECTIONS[]{1.0, 1.0, 1.0, 1.12, + 1.31, 1.31, 1.31, 1.31}; + std::ptrdiff_t index{std::min(std::lower_bound(boost::begin(BUCKETS_PER_WEEK), + boost::end(BUCKETS_PER_WEEK), bucketsPerWeek) - + boost::begin(BUCKETS_PER_WEEK), + std::ptrdiff_t(boost::size(BUCKETS_PER_WEEK) - 1))}; return CORRECTIONS[index]; } @@ -194,7 +202,10 @@ double varianceAtPercentile(double variance, double df, double percentage) { try { boost::math::chi_squared chi(df); return boost::math::quantile(chi, percentage / 100.0) / df * variance; - } catch (const std::exception& e) { LOG_ERROR(<< "Bad input: " << e.what() << ", df = " << df << ", percentage = " << percentage); } + } catch (const std::exception& e) { + LOG_ERROR(<< "Bad input: " << e.what() << ", df = " << df + << ", percentage = " << percentage); + } return variance; } @@ -204,7 +215,10 @@ double autocorrelationAtPercentile(double autocorrelation, double n, double perc try { boost::math::fisher_f f(n - 1.0, n - 1.0); return boost::math::quantile(f, percentage / 100.0) * autocorrelation; - } catch (const std::exception& e) { LOG_ERROR(<< "Bad input: " << e.what() << ", n = " << n << ", percentage = " << percentage); } + } catch (const std::exception& e) { + LOG_ERROR(<< "Bad input: " << e.what() << ", n = " << n + << ", percentage = " << percentage); + } return autocorrelation; } @@ -217,8 +231,10 @@ T length(const std::pair& window) { //! Get the total length of the \p windows. template T length(const core::CSmallVector, 2>& windows) { - return std::accumulate( - windows.begin(), windows.end(), 0, [](core_t::TTime length_, const TTimeTimePr& window) { return length_ + length(window); }); + return std::accumulate(windows.begin(), windows.end(), 0, + [](core_t::TTime length_, const TTimeTimePr& window) { + return length_ + length(window); + }); } //! Get the length of \p buckets. @@ -228,7 +244,10 @@ core_t::TTime length(const T& buckets, core_t::TTime bucketLength) { } //! Compute the windows at repeat \p repeat with length \p length. -TTimeTimePr2Vec calculateWindows(core_t::TTime startOfWeek, core_t::TTime window, core_t::TTime repeat, const TTimeTimePr& interval) { +TTimeTimePr2Vec calculateWindows(core_t::TTime startOfWeek, + core_t::TTime window, + core_t::TTime repeat, + const TTimeTimePr& interval) { core_t::TTime a{startOfWeek + interval.first}; core_t::TTime b{startOfWeek + window}; core_t::TTime l{length(interval)}; @@ -241,7 +260,9 @@ TTimeTimePr2Vec calculateWindows(core_t::TTime startOfWeek, core_t::TTime window } //! Get the index ranges corresponding to \p windows. -std::size_t calculateIndexWindows(const TTimeTimePr2Vec& windows, core_t::TTime bucketLength, TSizeSizePr2Vec& result) { +std::size_t calculateIndexWindows(const TTimeTimePr2Vec& windows, + core_t::TTime bucketLength, + TSizeSizePr2Vec& result) { std::size_t l(0); result.reserve(windows.size()); for (const auto& window : windows) { @@ -277,7 +298,10 @@ void project(const TFloatMeanAccumulatorVec& values, //! Compute the periodic trend from \p values falling in \p windows. template -void periodicTrend(const U& values, const TSizeSizePr2Vec& windows_, core_t::TTime bucketLength, V& trend) { +void periodicTrend(const U& values, + const TSizeSizePr2Vec& windows_, + core_t::TTime bucketLength, + V& trend) { if (!trend.empty()) { TSizeSizePr2Vec windows; calculateIndexWindows(windows_, bucketLength, windows); @@ -288,17 +312,22 @@ void periodicTrend(const U& values, const TSizeSizePr2Vec& windows_, core_t::TTi std::size_t b{windows[i].second}; for (std::size_t j = a; j < b; ++j) { const TFloatMeanAccumulator& value{values[j % n]}; - trend[(j - a) % period].add(CBasicStatistics::mean(value), CBasicStatistics::count(value)); + trend[(j - a) % period].add(CBasicStatistics::mean(value), + CBasicStatistics::count(value)); } } } } //! Compute the average of the values at \p times. -void averageValue(const TFloatMeanAccumulatorVec& values, const TTimeVec& times, core_t::TTime bucketLength, TMeanVarAccumulator& value) { +void averageValue(const TFloatMeanAccumulatorVec& values, + const TTimeVec& times, + core_t::TTime bucketLength, + TMeanVarAccumulator& value) { for (const auto time : times) { std::size_t index(time / bucketLength); - value.add(CBasicStatistics::mean(values[index]), CBasicStatistics::count(values[index])); + value.add(CBasicStatistics::mean(values[index]), + CBasicStatistics::count(values[index])); } } @@ -332,7 +361,8 @@ double residualVariance(const TMeanAccumulator& mean) { //! Extract the residual variance of \p bucket of a trend. TMeanAccumulator residualVariance(const TMeanVarAccumulator& bucket, double scale) { - return CBasicStatistics::accumulator(scale * CBasicStatistics::count(bucket), CBasicStatistics::maximumLikelihoodVariance(bucket)); + return CBasicStatistics::accumulator(scale * CBasicStatistics::count(bucket), + CBasicStatistics::maximumLikelihoodVariance(bucket)); } //! \brief Partially specialized helper class to get the trend @@ -343,7 +373,9 @@ struct SResidualVarianceImpl {}; //! \brief Get the residual variance as a double. template<> struct SResidualVarianceImpl { - static double get(const TMeanAccumulator& mean) { return residualVariance(mean); } + static double get(const TMeanAccumulator& mean) { + return residualVariance(mean); + } }; //! \brief Get the residual variance as a mean accumulator. @@ -357,19 +389,23 @@ template R residualVariance(const T& trend, double scale) { TMeanAccumulator result; for (const auto& bucket : trend) { - result.add(CBasicStatistics::maximumLikelihoodVariance(bucket), CBasicStatistics::count(bucket)); + result.add(CBasicStatistics::maximumLikelihoodVariance(bucket), + CBasicStatistics::count(bucket)); } result.s_Count *= scale; return SResidualVarianceImpl::get(result); } } -bool CPeriodicityHypothesisTestsResult::operator==(const CPeriodicityHypothesisTestsResult& other) const { +bool CPeriodicityHypothesisTestsResult:: +operator==(const CPeriodicityHypothesisTestsResult& other) const { return m_Components == other.m_Components; } -const CPeriodicityHypothesisTestsResult& CPeriodicityHypothesisTestsResult::operator+=(const CPeriodicityHypothesisTestsResult& other) { - m_Components.insert(m_Components.end(), other.m_Components.begin(), other.m_Components.end()); +const CPeriodicityHypothesisTestsResult& CPeriodicityHypothesisTestsResult:: +operator+=(const CPeriodicityHypothesisTestsResult& other) { + m_Components.insert(m_Components.end(), other.m_Components.begin(), + other.m_Components.end()); return *this; } @@ -379,13 +415,15 @@ void CPeriodicityHypothesisTestsResult::add(const std::string& description, core_t::TTime period, const TTimeTimePr& window, double precedence) { - m_Components.emplace_back(description, diurnal, startOfPartition, period, window, precedence); + m_Components.emplace_back(description, diurnal, startOfPartition, period, + window, precedence); } void CPeriodicityHypothesisTestsResult::remove(const std::string& description) { - auto i = std::find_if(m_Components.begin(), m_Components.end(), [&description](const SComponent& component) { - return component.s_Description == description; - }); + auto i = std::find_if(m_Components.begin(), m_Components.end(), + [&description](const SComponent& component) { + return component.s_Description == description; + }); if (i != m_Components.end()) { m_Components.erase(i); } @@ -395,7 +433,8 @@ bool CPeriodicityHypothesisTestsResult::periodic() const { return m_Components.size() > 0; } -const CPeriodicityHypothesisTestsResult::TComponent5Vec& CPeriodicityHypothesisTestsResult::components() const { +const CPeriodicityHypothesisTestsResult::TComponent5Vec& +CPeriodicityHypothesisTestsResult::components() const { return m_Components; } @@ -409,7 +448,8 @@ std::string CPeriodicityHypothesisTestsResult::print() const { } CPeriodicityHypothesisTestsResult::SComponent::SComponent() - : s_Description(""), s_Diurnal(false), s_StartOfPartition(0), s_Period(0), s_Precedence(0.0) { + : s_Description(""), s_Diurnal(false), s_StartOfPartition(0), s_Period(0), + s_Precedence(0.0) { } CPeriodicityHypothesisTestsResult::SComponent::SComponent(const std::string& description, @@ -418,11 +458,8 @@ CPeriodicityHypothesisTestsResult::SComponent::SComponent(const std::string& des core_t::TTime period, const TTimeTimePr& window, double precedence) - : s_Description(description), - s_Diurnal(diurnal), - s_StartOfPartition(startOfPartition), - s_Period(period), - s_Window(window), + : s_Description(description), s_Diurnal(diurnal), + s_StartOfPartition(startOfPartition), s_Period(period), s_Window(window), s_Precedence(precedence) { } @@ -432,13 +469,15 @@ bool CPeriodicityHypothesisTestsResult::SComponent::operator==(const SComponent& CSeasonalTime* CPeriodicityHypothesisTestsResult::SComponent::seasonalTime() const { if (s_Diurnal) { - return new CDiurnalTime(s_StartOfPartition, s_Window.first, s_Window.second, s_Period, s_Precedence); + return new CDiurnalTime(s_StartOfPartition, s_Window.first, + s_Window.second, s_Period, s_Precedence); } return new CGeneralPeriodTime(s_Period, s_Precedence); } CPeriodicityHypothesisTestsConfig::CPeriodicityHypothesisTestsConfig() - : m_TestForDiurnal(true), m_HasDaily(false), m_HasWeekend(false), m_HasWeekly(false), m_StartOfWeek(0) { + : m_TestForDiurnal(true), m_HasDaily(false), m_HasWeekend(false), + m_HasWeekly(false), m_StartOfWeek(0) { } void CPeriodicityHypothesisTestsConfig::disableDiurnal() { @@ -481,7 +520,8 @@ core_t::TTime CPeriodicityHypothesisTestsConfig::startOfWeek() const { return m_StartOfWeek; } -CPeriodicityHypothesisTests::CPeriodicityHypothesisTests() : m_BucketLength(0), m_WindowLength(0), m_Period(0) { +CPeriodicityHypothesisTests::CPeriodicityHypothesisTests() + : m_BucketLength(0), m_WindowLength(0), m_Period(0) { } CPeriodicityHypothesisTests::CPeriodicityHypothesisTests(const CPeriodicityHypothesisTestsConfig& config) : m_Config(config), m_BucketLength(0), m_WindowLength(0), m_Period(0) { @@ -491,7 +531,9 @@ bool CPeriodicityHypothesisTests::initialized() const { return m_BucketValues.size() > 0; } -void CPeriodicityHypothesisTests::initialize(core_t::TTime bucketLength, core_t::TTime windowLength, core_t::TTime period) { +void CPeriodicityHypothesisTests::initialize(core_t::TTime bucketLength, + core_t::TTime windowLength, + core_t::TTime period) { m_BucketLength = bucketLength; m_WindowLength = windowLength; m_BucketValues.resize(static_cast(windowLength / m_BucketLength)); @@ -547,15 +589,21 @@ CPeriodicityHypothesisTestsResult CPeriodicityHypothesisTests::test() const { TTimeTimePr2Vec windowForTestingDaily(window(DAY)); TTimeTimePr2Vec windowForTestingWeekly(window(WEEK)); TTimeTimePr2Vec windowForTestingPeriod(window(m_Period)); - TFloatMeanAccumulatorCRng bucketsForTestingDaily[]{{m_BucketValues, 0, buckets(DAY)}, {detrendedBucketValues, 0, buckets(DAY)}}; - TFloatMeanAccumulatorCRng bucketsForTestingWeekly[]{{m_BucketValues, 0, buckets(WEEK)}, {detrendedBucketValues, 0, buckets(WEEK)}}; - TFloatMeanAccumulatorCRng bucketsForTestingPeriod[]{{m_BucketValues, 0, buckets(m_Period)}, - {detrendedBucketValues, 0, buckets(m_Period)}}; + TFloatMeanAccumulatorCRng bucketsForTestingDaily[]{ + {m_BucketValues, 0, buckets(DAY)}, {detrendedBucketValues, 0, buckets(DAY)}}; + TFloatMeanAccumulatorCRng bucketsForTestingWeekly[]{ + {m_BucketValues, 0, buckets(WEEK)}, {detrendedBucketValues, 0, buckets(WEEK)}}; + TFloatMeanAccumulatorCRng bucketsForTestingPeriod[]{ + {m_BucketValues, 0, buckets(m_Period)}, + {detrendedBucketValues, 0, buckets(m_Period)}}; LOG_TRACE(<< "Testing periodicity hypotheses"); - LOG_TRACE(<< "window for daily = " << core::CContainerPrinter::print(windowForTestingDaily)); - LOG_TRACE(<< "window for weekly = " << core::CContainerPrinter::print(windowForTestingWeekly)); - LOG_TRACE(<< "window for period = " << core::CContainerPrinter::print(windowForTestingPeriod)); + LOG_TRACE(<< "window for daily = " + << core::CContainerPrinter::print(windowForTestingDaily)); + LOG_TRACE(<< "window for weekly = " + << core::CContainerPrinter::print(windowForTestingWeekly)); + LOG_TRACE(<< "window for period = " + << core::CContainerPrinter::print(windowForTestingPeriod)); TNestedHypothesesVec hypotheses; @@ -563,13 +611,16 @@ CPeriodicityHypothesisTestsResult CPeriodicityHypothesisTests::test() const { TNestedHypothesesVec hypotheses_; if (this->seenSufficientDataToTest(WEEK, bucketsForTestingWeekly[i])) { - this->hypothesesForWeekly( - windowForTestingWeekly, bucketsForTestingWeekly[i], windowForTestingPeriod, bucketsForTestingPeriod[i], hypotheses_); + this->hypothesesForWeekly(windowForTestingWeekly, + bucketsForTestingWeekly[i], windowForTestingPeriod, + bucketsForTestingPeriod[i], hypotheses_); } else if (this->seenSufficientDataToTest(DAY, bucketsForTestingDaily[i])) { - this->hypothesesForDaily( - windowForTestingDaily, bucketsForTestingDaily[i], windowForTestingPeriod, bucketsForTestingPeriod[i], hypotheses_); + this->hypothesesForDaily(windowForTestingDaily, + bucketsForTestingDaily[i], windowForTestingPeriod, + bucketsForTestingPeriod[i], hypotheses_); } else if (this->seenSufficientDataToTest(m_Period, bucketsForTestingPeriod[i])) { - this->hypothesesForPeriod(windowForTestingPeriod, bucketsForTestingPeriod[i], hypotheses_); + this->hypothesesForPeriod(windowForTestingPeriod, + bucketsForTestingPeriod[i], hypotheses_); } hypotheses.insert(hypotheses.end(), hypotheses_.begin(), hypotheses_.end()); @@ -578,36 +629,32 @@ CPeriodicityHypothesisTestsResult CPeriodicityHypothesisTests::test() const { return this->best(hypotheses); } -void CPeriodicityHypothesisTests::hypothesesForWeekly(const TTimeTimePr2Vec& windowForTestingWeekly, - const TFloatMeanAccumulatorCRng& bucketsForTestingWeekly, - const TTimeTimePr2Vec& windowForTestingPeriod, - const TFloatMeanAccumulatorCRng& bucketsForTestingPeriod, - TNestedHypothesesVec& hypotheses) const { +void CPeriodicityHypothesisTests::hypothesesForWeekly( + const TTimeTimePr2Vec& windowForTestingWeekly, + const TFloatMeanAccumulatorCRng& bucketsForTestingWeekly, + const TTimeTimePr2Vec& windowForTestingPeriod, + const TFloatMeanAccumulatorCRng& bucketsForTestingPeriod, + TNestedHypothesesVec& hypotheses) const { if (WEEK % m_Period == 0) { - auto testForNull = boost::bind( - &CPeriodicityHypothesisTests::testForNull, this, boost::cref(windowForTestingWeekly), boost::cref(bucketsForTestingWeekly), _1); + auto testForNull = boost::bind(&CPeriodicityHypothesisTests::testForNull, + this, boost::cref(windowForTestingWeekly), + boost::cref(bucketsForTestingWeekly), _1); auto testForPeriod = boost::bind(&CPeriodicityHypothesisTests::testForPeriod, - this, - boost::cref(windowForTestingWeekly), - boost::cref(bucketsForTestingWeekly), - _1); + this, boost::cref(windowForTestingWeekly), + boost::cref(bucketsForTestingWeekly), _1); auto testForDaily = boost::bind(&CPeriodicityHypothesisTests::testForDaily, - this, - boost::cref(windowForTestingWeekly), - boost::cref(bucketsForTestingWeekly), - _1); + this, boost::cref(windowForTestingWeekly), + boost::cref(bucketsForTestingWeekly), _1); auto testForWeekly = boost::bind(&CPeriodicityHypothesisTests::testForWeekly, - this, - boost::cref(windowForTestingWeekly), - boost::cref(bucketsForTestingWeekly), - _1); + this, boost::cref(windowForTestingWeekly), + boost::cref(bucketsForTestingWeekly), _1); auto testForDailyWithWeekend = - boost::bind(&CPeriodicityHypothesisTests::testForDailyWithWeekend, this, boost::cref(bucketsForTestingWeekly), _1); - auto testForWeeklyGivenWeekend = boost::bind(&CPeriodicityHypothesisTests::testForWeeklyGivenDailyWithWeekend, - this, - boost::cref(windowForTestingWeekly), - boost::cref(bucketsForTestingWeekly), - _1); + boost::bind(&CPeriodicityHypothesisTests::testForDailyWithWeekend, + this, boost::cref(bucketsForTestingWeekly), _1); + auto testForWeeklyGivenWeekend = boost::bind( + &CPeriodicityHypothesisTests::testForWeeklyGivenDailyWithWeekend, + this, boost::cref(windowForTestingWeekly), + boost::cref(bucketsForTestingWeekly), _1); hypotheses.resize(1); if (DAY % m_Period == 0) { @@ -649,30 +696,25 @@ void CPeriodicityHypothesisTests::hypothesesForWeekly(const TTimeTimePr2Vec& win .addAlternative(testForWeekly); } } else if (m_Period % WEEK == 0) { - auto testForNull = boost::bind( - &CPeriodicityHypothesisTests::testForNull, this, boost::cref(windowForTestingPeriod), boost::cref(bucketsForTestingPeriod), _1); + auto testForNull = boost::bind(&CPeriodicityHypothesisTests::testForNull, + this, boost::cref(windowForTestingPeriod), + boost::cref(bucketsForTestingPeriod), _1); auto testForPeriod = boost::bind(&CPeriodicityHypothesisTests::testForPeriod, - this, - boost::cref(windowForTestingPeriod), - boost::cref(bucketsForTestingPeriod), - _1); + this, boost::cref(windowForTestingPeriod), + boost::cref(bucketsForTestingPeriod), _1); auto testForDaily = boost::bind(&CPeriodicityHypothesisTests::testForDaily, - this, - boost::cref(windowForTestingPeriod), - boost::cref(bucketsForTestingPeriod), - _1); + this, boost::cref(windowForTestingPeriod), + boost::cref(bucketsForTestingPeriod), _1); auto testForWeekly = boost::bind(&CPeriodicityHypothesisTests::testForWeekly, - this, - boost::cref(windowForTestingPeriod), - boost::cref(bucketsForTestingPeriod), - _1); + this, boost::cref(windowForTestingPeriod), + boost::cref(bucketsForTestingPeriod), _1); auto testForDailyWithWeekend = - boost::bind(&CPeriodicityHypothesisTests::testForDailyWithWeekend, this, boost::cref(bucketsForTestingPeriod), _1); - auto testForWeeklyGivenWeekend = boost::bind(&CPeriodicityHypothesisTests::testForWeeklyGivenDailyWithWeekend, - this, - boost::cref(windowForTestingPeriod), - boost::cref(bucketsForTestingPeriod), - _1); + boost::bind(&CPeriodicityHypothesisTests::testForDailyWithWeekend, + this, boost::cref(bucketsForTestingPeriod), _1); + auto testForWeeklyGivenWeekend = boost::bind( + &CPeriodicityHypothesisTests::testForWeeklyGivenDailyWithWeekend, + this, boost::cref(windowForTestingPeriod), + boost::cref(bucketsForTestingPeriod), _1); hypotheses.resize(1); hypotheses[0] @@ -699,27 +741,22 @@ void CPeriodicityHypothesisTests::hypothesesForWeekly(const TTimeTimePr2Vec& win } else { { auto testForNull = boost::bind(&CPeriodicityHypothesisTests::testForNull, - this, - boost::cref(windowForTestingWeekly), - boost::cref(bucketsForTestingWeekly), - _1); + this, boost::cref(windowForTestingWeekly), + boost::cref(bucketsForTestingWeekly), _1); auto testForDaily = boost::bind(&CPeriodicityHypothesisTests::testForDaily, - this, - boost::cref(windowForTestingWeekly), - boost::cref(bucketsForTestingWeekly), - _1); - auto testForWeekly = boost::bind(&CPeriodicityHypothesisTests::testForWeekly, - this, - boost::cref(windowForTestingWeekly), - boost::cref(bucketsForTestingWeekly), - _1); + this, boost::cref(windowForTestingWeekly), + boost::cref(bucketsForTestingWeekly), _1); + auto testForWeekly = + boost::bind(&CPeriodicityHypothesisTests::testForWeekly, this, + boost::cref(windowForTestingWeekly), + boost::cref(bucketsForTestingWeekly), _1); auto testForDailyWithWeekend = - boost::bind(&CPeriodicityHypothesisTests::testForDailyWithWeekend, this, boost::cref(bucketsForTestingWeekly), _1); - auto testForWeeklyGivenWeekend = boost::bind(&CPeriodicityHypothesisTests::testForWeeklyGivenDailyWithWeekend, - this, - boost::cref(windowForTestingWeekly), - boost::cref(bucketsForTestingWeekly), - _1); + boost::bind(&CPeriodicityHypothesisTests::testForDailyWithWeekend, + this, boost::cref(bucketsForTestingWeekly), _1); + auto testForWeeklyGivenWeekend = boost::bind( + &CPeriodicityHypothesisTests::testForWeeklyGivenDailyWithWeekend, + this, boost::cref(windowForTestingWeekly), + boost::cref(bucketsForTestingWeekly), _1); hypotheses.resize(2); hypotheses[0] @@ -737,97 +774,93 @@ void CPeriodicityHypothesisTests::hypothesesForWeekly(const TTimeTimePr2Vec& win } if (m_Period % DAY == 0) { auto testForNull = boost::bind(&CPeriodicityHypothesisTests::testForNull, - this, - boost::cref(windowForTestingPeriod), - boost::cref(bucketsForTestingPeriod), - _1); + this, boost::cref(windowForTestingPeriod), + boost::cref(bucketsForTestingPeriod), _1); auto testForDaily = boost::bind(&CPeriodicityHypothesisTests::testForDaily, - this, - boost::cref(windowForTestingPeriod), - boost::cref(bucketsForTestingPeriod), - _1); - auto testForPeriod = boost::bind(&CPeriodicityHypothesisTests::testForPeriod, - this, - boost::cref(windowForTestingPeriod), - boost::cref(bucketsForTestingPeriod), - _1); - - hypotheses[1].null(testForNull).addNested(testForDaily).addNested(testForPeriod).finishedNested().addAlternative(testForPeriod); + this, boost::cref(windowForTestingPeriod), + boost::cref(bucketsForTestingPeriod), _1); + auto testForPeriod = + boost::bind(&CPeriodicityHypothesisTests::testForPeriod, this, + boost::cref(windowForTestingPeriod), + boost::cref(bucketsForTestingPeriod), _1); + + hypotheses[1] + .null(testForNull) + .addNested(testForDaily) + .addNested(testForPeriod) + .finishedNested() + .addAlternative(testForPeriod); } else { auto testForNull = boost::bind(&CPeriodicityHypothesisTests::testForNull, - this, - boost::cref(windowForTestingPeriod), - boost::cref(bucketsForTestingPeriod), - _1); - auto testForPeriod = boost::bind(&CPeriodicityHypothesisTests::testForPeriod, - this, - boost::cref(windowForTestingPeriod), - boost::cref(bucketsForTestingPeriod), - _1); + this, boost::cref(windowForTestingPeriod), + boost::cref(bucketsForTestingPeriod), _1); + auto testForPeriod = + boost::bind(&CPeriodicityHypothesisTests::testForPeriod, this, + boost::cref(windowForTestingPeriod), + boost::cref(bucketsForTestingPeriod), _1); hypotheses[1].null(testForNull).addNested(testForPeriod); } } } -void CPeriodicityHypothesisTests::hypothesesForDaily(const TTimeTimePr2Vec& windowForTestingDaily, - const TFloatMeanAccumulatorCRng& bucketsForTestingDaily, - const TTimeTimePr2Vec& windowForTestingPeriod, - const TFloatMeanAccumulatorCRng& bucketsForTestingPeriod, - TNestedHypothesesVec& hypotheses) const { +void CPeriodicityHypothesisTests::hypothesesForDaily( + const TTimeTimePr2Vec& windowForTestingDaily, + const TFloatMeanAccumulatorCRng& bucketsForTestingDaily, + const TTimeTimePr2Vec& windowForTestingPeriod, + const TFloatMeanAccumulatorCRng& bucketsForTestingPeriod, + TNestedHypothesesVec& hypotheses) const { if (DAY % m_Period == 0) { - auto testForNull = boost::bind( - &CPeriodicityHypothesisTests::testForNull, this, boost::cref(windowForTestingDaily), boost::cref(bucketsForTestingDaily), _1); - auto testForPeriod = boost::bind( - &CPeriodicityHypothesisTests::testForPeriod, this, boost::cref(windowForTestingDaily), boost::cref(bucketsForTestingDaily), _1); - auto testForDaily = boost::bind( - &CPeriodicityHypothesisTests::testForDaily, this, boost::cref(windowForTestingDaily), boost::cref(bucketsForTestingDaily), _1); + auto testForNull = boost::bind(&CPeriodicityHypothesisTests::testForNull, + this, boost::cref(windowForTestingDaily), + boost::cref(bucketsForTestingDaily), _1); + auto testForPeriod = boost::bind(&CPeriodicityHypothesisTests::testForPeriod, + this, boost::cref(windowForTestingDaily), + boost::cref(bucketsForTestingDaily), _1); + auto testForDaily = boost::bind(&CPeriodicityHypothesisTests::testForDaily, + this, boost::cref(windowForTestingDaily), + boost::cref(bucketsForTestingDaily), _1); hypotheses.resize(1); - hypotheses[0].null(testForNull).addNested(testForPeriod).addNested(testForDaily).finishedNested().addAlternative(testForDaily); + hypotheses[0] + .null(testForNull) + .addNested(testForPeriod) + .addNested(testForDaily) + .finishedNested() + .addAlternative(testForDaily); } else if (m_Period % DAY == 0) { - auto testForNull = boost::bind( - &CPeriodicityHypothesisTests::testForNull, this, boost::cref(windowForTestingPeriod), boost::cref(bucketsForTestingPeriod), _1); + auto testForNull = boost::bind(&CPeriodicityHypothesisTests::testForNull, + this, boost::cref(windowForTestingPeriod), + boost::cref(bucketsForTestingPeriod), _1); auto testForPeriod = boost::bind(&CPeriodicityHypothesisTests::testForPeriod, - this, - boost::cref(windowForTestingPeriod), - boost::cref(bucketsForTestingPeriod), - _1); + this, boost::cref(windowForTestingPeriod), + boost::cref(bucketsForTestingPeriod), _1); auto testForDaily = boost::bind(&CPeriodicityHypothesisTests::testForDaily, - this, - boost::cref(windowForTestingPeriod), - boost::cref(bucketsForTestingPeriod), - _1); + this, boost::cref(windowForTestingPeriod), + boost::cref(bucketsForTestingPeriod), _1); hypotheses.resize(1); hypotheses[0].null(testForNull).addNested(testForDaily).addNested(testForPeriod); } else { { auto testForNull = boost::bind(&CPeriodicityHypothesisTests::testForNull, - this, - boost::cref(windowForTestingDaily), - boost::cref(bucketsForTestingDaily), - _1); + this, boost::cref(windowForTestingDaily), + boost::cref(bucketsForTestingDaily), _1); auto testForDaily = boost::bind(&CPeriodicityHypothesisTests::testForDaily, - this, - boost::cref(windowForTestingDaily), - boost::cref(bucketsForTestingDaily), - _1); + this, boost::cref(windowForTestingDaily), + boost::cref(bucketsForTestingDaily), _1); hypotheses.resize(2); hypotheses[0].null(testForNull).addNested(testForDaily); } { auto testForNull = boost::bind(&CPeriodicityHypothesisTests::testForNull, - this, - boost::cref(windowForTestingPeriod), - boost::cref(bucketsForTestingPeriod), - _1); - auto testForPeriod = boost::bind(&CPeriodicityHypothesisTests::testForPeriod, - this, - boost::cref(windowForTestingPeriod), - boost::cref(bucketsForTestingPeriod), - _1); + this, boost::cref(windowForTestingPeriod), + boost::cref(bucketsForTestingPeriod), _1); + auto testForPeriod = + boost::bind(&CPeriodicityHypothesisTests::testForPeriod, this, + boost::cref(windowForTestingPeriod), + boost::cref(bucketsForTestingPeriod), _1); hypotheses[1].null(testForNull).addNested(testForPeriod); } } @@ -836,14 +869,17 @@ void CPeriodicityHypothesisTests::hypothesesForDaily(const TTimeTimePr2Vec& wind void CPeriodicityHypothesisTests::hypothesesForPeriod(const TTimeTimePr2Vec& windows, const TFloatMeanAccumulatorCRng& buckets, TNestedHypothesesVec& hypotheses) const { - auto testForNull = boost::bind(&CPeriodicityHypothesisTests::testForNull, this, boost::cref(windows), boost::cref(buckets), _1); - auto testForPeriod = boost::bind(&CPeriodicityHypothesisTests::testForPeriod, this, boost::cref(windows), boost::cref(buckets), _1); + auto testForNull = boost::bind(&CPeriodicityHypothesisTests::testForNull, this, + boost::cref(windows), boost::cref(buckets), _1); + auto testForPeriod = boost::bind(&CPeriodicityHypothesisTests::testForPeriod, this, + boost::cref(windows), boost::cref(buckets), _1); hypotheses.resize(1); hypotheses[0].null(testForNull).addNested(testForPeriod); } -CPeriodicityHypothesisTestsResult CPeriodicityHypothesisTests::best(const TNestedHypothesesVec& hypotheses) const { +CPeriodicityHypothesisTestsResult +CPeriodicityHypothesisTests::best(const TNestedHypothesesVec& hypotheses) const { // Note if there isn't a clear cut best hypothesis for variance // reduction we choose the simplest hypothesis, i.e. with maximum // degrees-of-freedom. @@ -861,20 +897,23 @@ CPeriodicityHypothesisTestsResult CPeriodicityHypothesisTests::best(const TNeste STestStats stats; CPeriodicityHypothesisTestsResult resultForHypothesis{hypothesis.test(stats)}; if (stats.s_B > stats.s_DF0) { - summaries.emplace_back(stats.s_V0, stats.s_B - stats.s_DF0, std::move(resultForHypothesis)); + summaries.emplace_back(stats.s_V0, stats.s_B - stats.s_DF0, + std::move(resultForHypothesis)); } } TMinAccumulator vCutoff; for (const auto& summary : summaries) { - vCutoff.add(varianceAtPercentile(summary.s_V, summary.s_DF, 50.0 + CONFIDENCE_INTERVAL / 2.0)); + vCutoff.add(varianceAtPercentile(summary.s_V, summary.s_DF, + 50.0 + CONFIDENCE_INTERVAL / 2.0)); } if (vCutoff.count() > 0) { LOG_TRACE(<< "variance cutoff = " << vCutoff[0]); TMinAccumulator df; for (const auto& summary : summaries) { - double v{varianceAtPercentile(summary.s_V, summary.s_DF, 50.0 - CONFIDENCE_INTERVAL / 2.0)}; + double v{varianceAtPercentile(summary.s_V, summary.s_DF, + 50.0 - CONFIDENCE_INTERVAL / 2.0)}; if (v <= vCutoff[0] && df.add(-summary.s_DF)) { result = summary.s_H; } @@ -884,29 +923,32 @@ CPeriodicityHypothesisTestsResult CPeriodicityHypothesisTests::best(const TNeste } CPeriodicityHypothesisTestsResult -CPeriodicityHypothesisTests::testForNull(const TTimeTimePr2Vec& window, const TFloatMeanAccumulatorCRng& buckets, STestStats& stats) const { +CPeriodicityHypothesisTests::testForNull(const TTimeTimePr2Vec& window, + const TFloatMeanAccumulatorCRng& buckets, + STestStats& stats) const { LOG_TRACE(<< "Testing null on " << core::CContainerPrinter::print(window)); this->nullHypothesis(window, buckets, stats); return CPeriodicityHypothesisTestsResult(); } -CPeriodicityHypothesisTestsResult CPeriodicityHypothesisTests::testForDaily(const TTimeTimePr2Vec& windows, - const TFloatMeanAccumulatorCRng& buckets, - STestStats& stats) const { +CPeriodicityHypothesisTestsResult +CPeriodicityHypothesisTests::testForDaily(const TTimeTimePr2Vec& windows, + const TFloatMeanAccumulatorCRng& buckets, + STestStats& stats) const { LOG_TRACE(<< "Testing daily on " << core::CContainerPrinter::print(windows)); CPeriodicityHypothesisTestsResult result{stats.s_H0}; stats.s_HasPeriod = m_Config.hasDaily(); - stats.setThresholds( - SIGNIFICANT_VARIANCE_REDUCTION[E_LowThreshold], SIGNIFICANT_AMPLITUDE[E_LowThreshold], SIGNIFICANT_AUTOCORRELATION[E_LowThreshold]); + stats.setThresholds(SIGNIFICANT_VARIANCE_REDUCTION[E_LowThreshold], + SIGNIFICANT_AMPLITUDE[E_LowThreshold], + SIGNIFICANT_AUTOCORRELATION[E_LowThreshold]); - if (m_Config.testForDiurnal() && m_BucketLength <= DAY / 4 && this->seenSufficientDataToTest(DAY, buckets) && + if (m_Config.testForDiurnal() && m_BucketLength <= DAY / 4 && + this->seenSufficientDataToTest(DAY, buckets) && this->testPeriod(windows, buckets, DAY, stats)) { this->hypothesis({DAY}, buckets, stats); - result.add(DIURNAL_COMPONENT_NAMES[E_Day], - true, - 0, + result.add(DIURNAL_COMPONENT_NAMES[E_Day], true, 0, DIURNAL_PERIODS[static_cast(E_Day) % 2], DIURNAL_WINDOWS[static_cast(E_Day) / 2]); } @@ -914,25 +956,26 @@ CPeriodicityHypothesisTestsResult CPeriodicityHypothesisTests::testForDaily(cons return result; } -CPeriodicityHypothesisTestsResult CPeriodicityHypothesisTests::testForWeekly(const TTimeTimePr2Vec& windows, - const TFloatMeanAccumulatorCRng& buckets, - STestStats& stats) const { +CPeriodicityHypothesisTestsResult +CPeriodicityHypothesisTests::testForWeekly(const TTimeTimePr2Vec& windows, + const TFloatMeanAccumulatorCRng& buckets, + STestStats& stats) const { LOG_TRACE(<< "Testing weekly on " << core::CContainerPrinter::print(windows)); CPeriodicityHypothesisTestsResult result{stats.s_H0}; stats.s_HasPeriod = m_Config.hasWeekly(); - stats.setThresholds( - SIGNIFICANT_VARIANCE_REDUCTION[E_LowThreshold], SIGNIFICANT_AMPLITUDE[E_LowThreshold], SIGNIFICANT_AUTOCORRELATION[E_LowThreshold]); + stats.setThresholds(SIGNIFICANT_VARIANCE_REDUCTION[E_LowThreshold], + SIGNIFICANT_AMPLITUDE[E_LowThreshold], + SIGNIFICANT_AUTOCORRELATION[E_LowThreshold]); - if (m_Config.testForDiurnal() && m_BucketLength <= WEEK / 4 && this->seenSufficientDataToTest(WEEK, buckets) && + if (m_Config.testForDiurnal() && m_BucketLength <= WEEK / 4 && + this->seenSufficientDataToTest(WEEK, buckets) && this->testPeriod(windows, buckets, WEEK, stats)) { stats.s_StartOfPartition = 0; stats.s_Partition.assign(1, {0, length(buckets, m_BucketLength)}); this->hypothesis({WEEK}, buckets, stats); - result.add(DIURNAL_COMPONENT_NAMES[E_Week], - true, - 0, + result.add(DIURNAL_COMPONENT_NAMES[E_Week], true, 0, DIURNAL_PERIODS[static_cast(E_Week) % 2], DIURNAL_WINDOWS[static_cast(E_Week) / 2]); } @@ -940,8 +983,9 @@ CPeriodicityHypothesisTestsResult CPeriodicityHypothesisTests::testForWeekly(con return result; } -CPeriodicityHypothesisTestsResult CPeriodicityHypothesisTests::testForDailyWithWeekend(const TFloatMeanAccumulatorCRng& buckets, - STestStats& stats) const { +CPeriodicityHypothesisTestsResult +CPeriodicityHypothesisTests::testForDailyWithWeekend(const TFloatMeanAccumulatorCRng& buckets, + STestStats& stats) const { LOG_TRACE(<< "Testing for weekend"); CPeriodicityHypothesisTestsResult result{stats.s_H0}; @@ -955,33 +999,31 @@ CPeriodicityHypothesisTestsResult CPeriodicityHypothesisTests::testForDailyWithW TTimeTimePr2Vec partition{{0, WEEKEND}, {WEEKEND, WEEK}}; std::size_t bucketsPerWeek(WEEK / m_BucketLength); - if (m_Config.testForDiurnal() && m_BucketLength <= DAY / 4 && this->seenSufficientDataToTest(WEEK, buckets) && - this->testPartition(partition, buckets, DAY, weekendPartitionVarianceCorrection(bucketsPerWeek), stats)) { + if (m_Config.testForDiurnal() && m_BucketLength <= DAY / 4 && + this->seenSufficientDataToTest(WEEK, buckets) && + this->testPartition(partition, buckets, DAY, + weekendPartitionVarianceCorrection(bucketsPerWeek), stats)) { stats.s_Partition = partition; this->hypothesis({DAY, DAY}, buckets, stats); core_t::TTime startOfWeek{stats.s_StartOfPartition}; result.remove(DIURNAL_COMPONENT_NAMES[E_Day]); - result.add(DIURNAL_COMPONENT_NAMES[E_WeekendDay], - true, - startOfWeek, + result.add(DIURNAL_COMPONENT_NAMES[E_WeekendDay], true, startOfWeek, DIURNAL_PERIODS[static_cast(E_WeekendDay) % 2], - DIURNAL_WINDOWS[static_cast(E_WeekendDay) / 2], - HIGH_PRIORITY); - result.add(DIURNAL_COMPONENT_NAMES[E_WeekdayDay], - true, - startOfWeek, + DIURNAL_WINDOWS[static_cast(E_WeekendDay) / 2], HIGH_PRIORITY); + result.add(DIURNAL_COMPONENT_NAMES[E_WeekdayDay], true, startOfWeek, DIURNAL_PERIODS[static_cast(E_WeekdayDay) % 2], - DIURNAL_WINDOWS[static_cast(E_WeekdayDay) / 2], - HIGH_PRIORITY); + DIURNAL_WINDOWS[static_cast(E_WeekdayDay) / 2], HIGH_PRIORITY); } return result; } -CPeriodicityHypothesisTestsResult CPeriodicityHypothesisTests::testForWeeklyGivenDailyWithWeekend(const TTimeTimePr2Vec& windows, - const TFloatMeanAccumulatorCRng& buckets, - STestStats& stats) const { - LOG_TRACE(<< "Testing for weekly given weekend on " << core::CContainerPrinter::print(windows)); +CPeriodicityHypothesisTestsResult CPeriodicityHypothesisTests::testForWeeklyGivenDailyWithWeekend( + const TTimeTimePr2Vec& windows, + const TFloatMeanAccumulatorCRng& buckets, + STestStats& stats) const { + LOG_TRACE(<< "Testing for weekly given weekend on " + << core::CContainerPrinter::print(windows)); CPeriodicityHypothesisTestsResult result(stats.s_H0); @@ -991,92 +1033,96 @@ CPeriodicityHypothesisTestsResult CPeriodicityHypothesisTests::testForWeeklyGive core_t::TTime startOfWeek{stats.s_StartOfPartition}; - CPeriodicityHypothesisTestsResult resultForWeekly{this->testForWeekly(windows, buckets, stats)}; + CPeriodicityHypothesisTestsResult resultForWeekly{ + this->testForWeekly(windows, buckets, stats)}; if (resultForWeekly != result) { // Note that testForWeekly sets up the hypothesis for us. - result.add(DIURNAL_COMPONENT_NAMES[E_WeekendWeek], - true, - startOfWeek, + result.add(DIURNAL_COMPONENT_NAMES[E_WeekendWeek], true, startOfWeek, DIURNAL_PERIODS[static_cast(E_WeekendWeek) % 2], - DIURNAL_WINDOWS[static_cast(E_WeekendWeek) / 2], - HIGH_PRIORITY); - result.add(DIURNAL_COMPONENT_NAMES[E_WeekdayWeek], - true, - startOfWeek, + DIURNAL_WINDOWS[static_cast(E_WeekendWeek) / 2], HIGH_PRIORITY); + result.add(DIURNAL_COMPONENT_NAMES[E_WeekdayWeek], true, startOfWeek, DIURNAL_PERIODS[static_cast(E_WeekdayWeek) % 2], - DIURNAL_WINDOWS[static_cast(E_WeekdayWeek) / 2], - HIGH_PRIORITY); + DIURNAL_WINDOWS[static_cast(E_WeekdayWeek) / 2], HIGH_PRIORITY); return result; } core_t::TTime windowLength{length(windows)}; TTimeTimePr2Vec partition{{0, WEEKEND}, {WEEKEND, WEEK}}; - TTimeTimePr2Vec weekday(calculateWindows(startOfWeek, windowLength, WEEK, {WEEKEND, WEEK})); - CPeriodicityHypothesisTestsResult resultForWeekday{this->testForWeekly(weekday, buckets, stats)}; + TTimeTimePr2Vec weekday( + calculateWindows(startOfWeek, windowLength, WEEK, {WEEKEND, WEEK})); + CPeriodicityHypothesisTestsResult resultForWeekday{ + this->testForWeekly(weekday, buckets, stats)}; if (resultForWeekday != result) { stats.s_StartOfPartition = startOfWeek; stats.s_Partition = partition; this->hypothesis({DAY, WEEK}, buckets, stats); - result.add(DIURNAL_COMPONENT_NAMES[E_WeekdayWeek], - true, - startOfWeek, + result.add(DIURNAL_COMPONENT_NAMES[E_WeekdayWeek], true, startOfWeek, DIURNAL_PERIODS[static_cast(E_WeekdayWeek) % 2], - DIURNAL_WINDOWS[static_cast(E_WeekdayWeek) / 2], - HIGH_PRIORITY); + DIURNAL_WINDOWS[static_cast(E_WeekdayWeek) / 2], HIGH_PRIORITY); return result; } TTimeTimePr2Vec weekend(calculateWindows(startOfWeek, windowLength, WEEK, {0, WEEKEND})); - CPeriodicityHypothesisTestsResult resultForWeekend{this->testForWeekly(weekend, buckets, stats)}; + CPeriodicityHypothesisTestsResult resultForWeekend{ + this->testForWeekly(weekend, buckets, stats)}; if (resultForWeekend != result) { stats.s_StartOfPartition = startOfWeek; stats.s_Partition = partition; this->hypothesis({WEEK, DAY}, buckets, stats); - result.add(DIURNAL_COMPONENT_NAMES[E_WeekendWeek], - true, - startOfWeek, + result.add(DIURNAL_COMPONENT_NAMES[E_WeekendWeek], true, startOfWeek, DIURNAL_PERIODS[static_cast(E_WeekendWeek) % 2], - DIURNAL_WINDOWS[static_cast(E_WeekendWeek) / 2], - HIGH_PRIORITY); + DIURNAL_WINDOWS[static_cast(E_WeekendWeek) / 2], HIGH_PRIORITY); } return result; } -CPeriodicityHypothesisTestsResult CPeriodicityHypothesisTests::testForPeriod(const TTimeTimePr2Vec& windows, - const TFloatMeanAccumulatorCRng& buckets, - STestStats& stats) const { - LOG_TRACE(<< "Testing for " << m_Period << " on " << core::CContainerPrinter::print(windows)); +CPeriodicityHypothesisTestsResult +CPeriodicityHypothesisTests::testForPeriod(const TTimeTimePr2Vec& windows, + const TFloatMeanAccumulatorCRng& buckets, + STestStats& stats) const { + LOG_TRACE(<< "Testing for " << m_Period << " on " + << core::CContainerPrinter::print(windows)); CPeriodicityHypothesisTestsResult result{stats.s_H0}; - if (m_Period != DAY && m_Period != WEEK && m_BucketLength <= m_Period / 4 && this->seenSufficientDataToTest(m_Period, buckets)) { + if (m_Period != DAY && m_Period != WEEK && m_BucketLength <= m_Period / 4 && + this->seenSufficientDataToTest(m_Period, buckets)) { stats.s_HasPeriod = false; EThreshold index{m_Period % DAY == 0 ? E_LowThreshold : E_HighThreshold}; - stats.setThresholds(SIGNIFICANT_VARIANCE_REDUCTION[index], SIGNIFICANT_AMPLITUDE[index], SIGNIFICANT_AUTOCORRELATION[index]); + stats.setThresholds(SIGNIFICANT_VARIANCE_REDUCTION[index], + SIGNIFICANT_AMPLITUDE[index], + SIGNIFICANT_AUTOCORRELATION[index]); if (this->testPeriod(windows, buckets, m_Period, stats)) { stats.s_StartOfPartition = 0; stats.s_Partition.assign(1, {0, length(buckets, m_BucketLength)}); this->hypothesis({m_Period}, buckets, stats); - result.add(core::CStringUtils::typeToString(m_Period), false, 0, m_Period, {0, m_Period}); + result.add(core::CStringUtils::typeToString(m_Period), false, 0, + m_Period, {0, m_Period}); } } return result; } -bool CPeriodicityHypothesisTests::seenSufficientDataToTest(core_t::TTime period, const TFloatMeanAccumulatorCRng& buckets) const { - return (buckets.size() * m_BucketLength) / period >= 2 && m_TimeRange.initialized() && - static_cast(m_TimeRange.range()) >= 2.0 * ACCURATE_TEST_POPULATED_FRACTION * static_cast(period); +bool CPeriodicityHypothesisTests::seenSufficientDataToTest(core_t::TTime period, + const TFloatMeanAccumulatorCRng& buckets) const { + return (buckets.size() * m_BucketLength) / period >= 2 && + m_TimeRange.initialized() && + static_cast(m_TimeRange.range()) >= + 2.0 * ACCURATE_TEST_POPULATED_FRACTION * static_cast(period); } -bool CPeriodicityHypothesisTests::seenSufficientPeriodicallyPopulatedBucketsToTest(const TFloatMeanAccumulatorCRng& buckets, - std::size_t period) const { +bool CPeriodicityHypothesisTests::seenSufficientPeriodicallyPopulatedBucketsToTest( + const TFloatMeanAccumulatorCRng& buckets, + std::size_t period) const { double repeats{0.0}; for (std::size_t i = 0u; i < period; ++i) { for (std::size_t j = i + period; j < buckets.size(); j += period) { - if (CBasicStatistics::count(buckets[j]) * CBasicStatistics::count(buckets[j - period]) > 0.0) { + if (CBasicStatistics::count(buckets[j]) * + CBasicStatistics::count(buckets[j - period]) > + 0.0) { repeats += 1.0; break; } @@ -1086,7 +1132,8 @@ bool CPeriodicityHypothesisTests::seenSufficientPeriodicallyPopulatedBucketsToTe return repeats >= static_cast(period) * ACCURATE_TEST_POPULATED_FRACTION / 3.0; } -bool CPeriodicityHypothesisTests::testStatisticsFor(const TFloatMeanAccumulatorCRng& buckets, STestStats& stats) const { +bool CPeriodicityHypothesisTests::testStatisticsFor(const TFloatMeanAccumulatorCRng& buckets, + STestStats& stats) const { CBasicStatistics::CMinMax range; double populated{0.0}; double count{0.0}; @@ -1103,12 +1150,14 @@ bool CPeriodicityHypothesisTests::testStatisticsFor(const TFloatMeanAccumulatorC return false; } - LOG_TRACE(<< "populated = " << 100.0 * populated / static_cast(buckets.size()) << "%"); + LOG_TRACE(<< "populated = " + << 100.0 * populated / static_cast(buckets.size()) << "%"); stats.s_Range = range.max() - range.min(); stats.s_B = populated; stats.s_M = count / stats.s_B; - LOG_TRACE(<< "range = " << stats.s_Range << ", populatedBuckets = " << stats.s_B << ", valuesPerBucket = " << stats.s_M); + LOG_TRACE(<< "range = " << stats.s_Range << ", populatedBuckets = " << stats.s_B + << ", valuesPerBucket = " << stats.s_M); return true; } @@ -1130,7 +1179,9 @@ void CPeriodicityHypothesisTests::nullHypothesis(const TTimeTimePr2Vec& window, } } -void CPeriodicityHypothesisTests::hypothesis(const TTime2Vec& periods, const TFloatMeanAccumulatorCRng& buckets, STestStats& stats) const { +void CPeriodicityHypothesisTests::hypothesis(const TTime2Vec& periods, + const TFloatMeanAccumulatorCRng& buckets, + STestStats& stats) const { if (this->testStatisticsFor(buckets, stats)) { stats.s_V0 = 0.0; stats.s_DF0 = 0.0; @@ -1138,16 +1189,20 @@ void CPeriodicityHypothesisTests::hypothesis(const TTime2Vec& periods, const TFl for (std::size_t i = 0u; i < stats.s_Partition.size(); ++i) { core_t::TTime period_{std::min(periods[i], length(stats.s_Partition[i])) / m_BucketLength}; TTimeTimePr2Vec windows(calculateWindows( - stats.s_StartOfPartition, length(buckets, m_BucketLength), length(stats.s_Partition), stats.s_Partition[i])); + stats.s_StartOfPartition, length(buckets, m_BucketLength), + length(stats.s_Partition), stats.s_Partition[i])); TMeanVarAccumulatorVec trend(periods[i] / m_BucketLength); periodicTrend(buckets, windows, m_BucketLength, trend); stats.s_V0 += residualVariance(trend, 1.0 / stats.s_M); stats.s_DF0 += static_cast(std::count_if( - trend.begin(), trend.end(), [](const TMeanVarAccumulator& value) { return CBasicStatistics::count(value) > 0.0; })); + trend.begin(), trend.end(), [](const TMeanVarAccumulator& value) { + return CBasicStatistics::count(value) > 0.0; + })); stats.s_T0[i].reserve(period_); - std::for_each(trend.begin(), trend.end(), [&stats, i](const TMeanVarAccumulator& value) { - stats.s_T0[i].push_back(CBasicStatistics::mean(value)); - }); + std::for_each(trend.begin(), trend.end(), + [&stats, i](const TMeanVarAccumulator& value) { + stats.s_T0[i].push_back(CBasicStatistics::mean(value)); + }); } stats.s_V0 /= static_cast(periods.size()); } @@ -1159,17 +1214,21 @@ void CPeriodicityHypothesisTests::conditionOnHypothesis(const TTimeTimePr2Vec& w std::size_t n{buckets.size()}; core_t::TTime windowLength{static_cast(n) * m_BucketLength}; for (std::size_t i = 0u; i < stats.s_Partition.size(); ++i) { - TTimeTimePr2Vec windows_(calculateWindows(stats.s_StartOfPartition, windowLength, length(stats.s_Partition), stats.s_Partition[i])); + TTimeTimePr2Vec windows_( + calculateWindows(stats.s_StartOfPartition, windowLength, + length(stats.s_Partition), stats.s_Partition[i])); TSizeSizePr2Vec indexWindows; calculateIndexWindows(windows_, m_BucketLength, indexWindows); std::size_t period{stats.s_T0[i].size()}; - LOG_TRACE(<< "Conditioning on period = " << period << " in windows = " << core::CContainerPrinter::print(windows_)); + LOG_TRACE(<< "Conditioning on period = " << period + << " in windows = " << core::CContainerPrinter::print(windows_)); for (const auto& window : indexWindows) { std::size_t a{window.first}; std::size_t b{window.second}; for (std::size_t j = a; j < b; ++j) { - CBasicStatistics::moment<0>(buckets[j % n]) -= stats.s_T0[i][(j - a) % period]; + CBasicStatistics::moment<0>(buckets[j % n]) -= + stats.s_T0[i][(j - a) % period]; } } } @@ -1232,19 +1291,24 @@ bool CPeriodicityHypothesisTests::testPeriod(const TTimeTimePr2Vec& windows, TMeanVarAccumulatorVec trend(period); periodicTrend(values, window, m_BucketLength, trend); - double b{static_cast( - std::count_if(trend.begin(), trend.end(), [](const TMeanVarAccumulator& value) { return CBasicStatistics::count(value) > 0.0; }))}; + double b{static_cast(std::count_if( + trend.begin(), trend.end(), [](const TMeanVarAccumulator& value) { + return CBasicStatistics::count(value) > 0.0; + }))}; LOG_TRACE(<< " populated = " << b); double df1{B - b}; if (df1 > 0.0) { - double v1{varianceAtPercentile(residualVariance(trend, scale), df1, 50.0 + CONFIDENCE_INTERVAL / 2.0)}; + double v1{varianceAtPercentile(residualVariance(trend, scale), + df1, 50.0 + CONFIDENCE_INTERVAL / 2.0)}; LOG_TRACE(<< " variance = " << v1); LOG_TRACE(<< " varianceThreshold = " << vt); - LOG_TRACE(<< " significance = " << CStatisticalTests::leftTailFTest(v1 / v0, df1, df0)); + LOG_TRACE(<< " significance = " + << CStatisticalTests::leftTailFTest(v1 / v0, df1, df0)); double Rt{stats.s_Rt * CTools::truncate(1.0 - 0.5 * (vt - v1) / vt, 0.9, 1.0)}; - if (v1 < vt && B > 1.0 && CStatisticalTests::leftTailFTest(v1 / v0, df1, df0) <= MAXIMUM_SIGNIFICANCE) { + if (v1 < vt && B > 1.0 && + CStatisticalTests::leftTailFTest(v1 / v0, df1, df0) <= MAXIMUM_SIGNIFICANCE) { double R{CSignal::autocorrelation(period, values)}; R = autocorrelationAtPercentile(R, B, 50.0 - CONFIDENCE_INTERVAL / 2.0); LOG_TRACE(<< " autocorrelation = " << R); @@ -1259,7 +1323,8 @@ bool CPeriodicityHypothesisTests::testPeriod(const TTimeTimePr2Vec& windows, double F1{1.0}; if (v1 > 0.0) { try { - std::size_t n{static_cast(std::ceil(Rt * static_cast(length(window) / period_)))}; + std::size_t n{static_cast( + std::ceil(Rt * static_cast(length(window) / period_)))}; TMeanAccumulator level; for (const auto& value : values) { if (CBasicStatistics::count(value) > 0.0) { @@ -1269,12 +1334,15 @@ bool CPeriodicityHypothesisTests::testPeriod(const TTimeTimePr2Vec& windows, TMinAmplitudeVec amplitudes(period, {n, CBasicStatistics::mean(level)}); periodicTrend(values, window, m_BucketLength, amplitudes); boost::math::normal normal(0.0, std::sqrt(v1)); - std::for_each(amplitudes.begin(), amplitudes.end(), [&F1, &normal, at](CMinAmplitude& x) { - if (x.amplitude() >= at) { - F1 = std::min(F1, x.significance(normal)); - } - }); - } catch (const std::exception& e) { LOG_ERROR(<< "Unable to compute significance of amplitude: " << e.what()); } + std::for_each(amplitudes.begin(), amplitudes.end(), + [&F1, &normal, at](CMinAmplitude& x) { + if (x.amplitude() >= at) { + F1 = std::min(F1, x.significance(normal)); + } + }); + } catch (const std::exception& e) { + LOG_ERROR(<< "Unable to compute significance of amplitude: " << e.what()); + } } LOG_TRACE(<< " F(amplitude) = " << F1); @@ -1295,7 +1363,8 @@ bool CPeriodicityHypothesisTests::testPartition(const TTimeTimePr2Vec& partition using TMinAccumulator = CBasicStatistics::COrderStatisticsStack; using TMeanVarAccumulatorBuffer = boost::circular_buffer; - LOG_TRACE(<< "Testing partition " << core::CContainerPrinter::print(partition) << " with period " << period_); + LOG_TRACE(<< "Testing partition " << core::CContainerPrinter::print(partition) + << " with period " << period_); if (!this->testStatisticsFor(buckets, stats) || stats.nullHypothesisGoodEnough()) { return false; @@ -1333,8 +1402,9 @@ bool CPeriodicityHypothesisTests::testPartition(const TTimeTimePr2Vec& partition TFloatMeanAccumulatorVec values(buckets.begin(), buckets.end()); this->conditionOnHypothesis({{0, windowLength}}, stats, values); - TTimeTimePr2Vec windows[]{calculateWindows(startOfPartition, windowLength, repeat, partition[0]), - calculateWindows(startOfPartition, windowLength, repeat, partition[1])}; + TTimeTimePr2Vec windows[]{ + calculateWindows(startOfPartition, windowLength, repeat, partition[0]), + calculateWindows(startOfPartition, windowLength, repeat, partition[1])}; LOG_TRACE(<< "windows = " << core::CContainerPrinter::print(windows)); TTimeVec deltas[2]; @@ -1351,8 +1421,9 @@ bool CPeriodicityHypothesisTests::testPartition(const TTimeTimePr2Vec& partition } LOG_TRACE(<< "deltas = " << core::CContainerPrinter::print(deltas)); - TMeanVarAccumulatorBuffer trends[]{TMeanVarAccumulatorBuffer(period, TMeanVarAccumulator()), - TMeanVarAccumulatorBuffer(period, TMeanVarAccumulator())}; + TMeanVarAccumulatorBuffer trends[]{ + TMeanVarAccumulatorBuffer(period, TMeanVarAccumulator()), + TMeanVarAccumulatorBuffer(period, TMeanVarAccumulator())}; periodicTrend(values, windows[0], m_BucketLength, trends[0]); periodicTrend(values, windows[1], m_BucketLength, trends[1]); @@ -1379,7 +1450,8 @@ bool CPeriodicityHypothesisTests::testPartition(const TTimeTimePr2Vec& partition variances[i] -= residualVariance(oldBucket, scale); variances[i] += residualVariance(newBucket, scale); } - double variance{(residualVariance(variances[0]) + residualVariance(variances[1])) / 2.0}; + double variance{ + (residualVariance(variances[0]) + residualVariance(variances[1])) / 2.0}; minimum.add({variance, time}); if (variance <= 1.05 * minimum[0].first) { candidates.emplace_back(variance, time); @@ -1393,7 +1465,8 @@ bool CPeriodicityHypothesisTests::testPartition(const TTimeTimePr2Vec& partition for (const auto& candidate : candidates) { if (candidate.first <= 1.05 * minimum[0].first) { core_t::TTime candidateStartOfPartition{candidate.second}; - candidateWindows = calculateWindows(candidateStartOfPartition, windowLength, repeat, partition[0]); + candidateWindows = calculateWindows(candidateStartOfPartition, + windowLength, repeat, partition[0]); TMeanAccumulator cost; for (const auto& window : candidateWindows) { core_t::TTime a_{window.first / m_BucketLength}; @@ -1405,13 +1478,16 @@ bool CPeriodicityHypothesisTests::testPartition(const TTimeTimePr2Vec& partition if (best.add({CBasicStatistics::mean(cost), candidateStartOfPartition})) { b = 0.0; for (std::size_t i = 0u; i < 2; ++i) { - candidateWindows = calculateWindows(candidateStartOfPartition, windowLength, repeat, partition[i]); + candidateWindows = calculateWindows(candidateStartOfPartition, windowLength, + repeat, partition[i]); TMeanVarAccumulatorVec trend(period); periodicTrend(values, candidateWindows, m_BucketLength, trend); b += static_cast(std::count_if( - trend.begin(), trend.end(), [](const TMeanVarAccumulator& value) { return CBasicStatistics::count(value) > 0.0; })); + trend.begin(), trend.end(), [](const TMeanVarAccumulator& value) { + return CBasicStatistics::count(value) > 0.0; + })); } } } @@ -1423,26 +1499,31 @@ bool CPeriodicityHypothesisTests::testPartition(const TTimeTimePr2Vec& partition double v1{varianceAtPercentile(variance, df1, 50.0 + CONFIDENCE_INTERVAL / 2.0)}; LOG_TRACE(<< " variance = " << v1); LOG_TRACE(<< " varianceThreshold = " << vt); - LOG_TRACE(<< " significance = " << CStatisticalTests::leftTailFTest(v1 / v0, df1, df0)); + LOG_TRACE(<< " significance = " + << CStatisticalTests::leftTailFTest(v1 / v0, df1, df0)); if (v1 <= vt && CStatisticalTests::leftTailFTest(v1 / v0, df1, df0) <= MAXIMUM_SIGNIFICANCE) { double R{-1.0}; double Rt{stats.s_Rt * CTools::truncate(1.0 - 0.5 * (vt - v1) / vt, 0.9, 1.0)}; startOfPartition = best[0].second; - windows[0] = calculateWindows(startOfPartition, windowLength, repeat, partition[0]); - windows[1] = calculateWindows(startOfPartition, windowLength, repeat, partition[1]); + windows[0] = calculateWindows(startOfPartition, windowLength, + repeat, partition[0]); + windows[1] = calculateWindows(startOfPartition, windowLength, + repeat, partition[1]); for (const auto& windows_ : windows) { TFloatMeanAccumulatorVec partitionValues; project(values, windows_, m_BucketLength, partitionValues); std::size_t windowLength_(length(windows_[0]) / m_BucketLength); - double BW{ - std::accumulate(partitionValues.begin(), partitionValues.end(), 0.0, [](double n, const TFloatMeanAccumulator& value) { + double BW{std::accumulate( + partitionValues.begin(), partitionValues.end(), 0.0, + [](double n, const TFloatMeanAccumulator& value) { return n + (CBasicStatistics::count(value) > 0.0 ? 1.0 : 0.0); })}; if (BW > 1.0) { double RW{CSignal::autocorrelation(windowLength_ + period, partitionValues)}; - R = std::max(R, autocorrelationAtPercentile(RW, BW, 50.0 - CONFIDENCE_INTERVAL / 2.0)); + R = std::max(R, autocorrelationAtPercentile( + RW, BW, 50.0 - CONFIDENCE_INTERVAL / 2.0)); LOG_TRACE(<< " autocorrelation = " << R); LOG_TRACE(<< " autocorrelationThreshold = " << Rt); } @@ -1461,17 +1542,8 @@ const double CPeriodicityHypothesisTests::ACCURATE_TEST_POPULATED_FRACTION{0.9}; const double CPeriodicityHypothesisTests::MINIMUM_COEFFICIENT_OF_VARIATION{1e-4}; CPeriodicityHypothesisTests::STestStats::STestStats() - : s_HasPeriod(false), - s_HasPartition(false), - s_Vt(0.0), - s_At(0.0), - s_Rt(0.0), - s_Range(0.0), - s_B(0.0), - s_M(0.0), - s_V0(0.0), - s_DF0(0.0), - s_StartOfPartition(0) { + : s_HasPeriod(false), s_HasPartition(false), s_Vt(0.0), s_At(0.0), s_Rt(0.0), + s_Range(0.0), s_B(0.0), s_M(0.0), s_V0(0.0), s_DF0(0.0), s_StartOfPartition(0) { } void CPeriodicityHypothesisTests::STestStats::setThresholds(double vt, double at, double Rt) { @@ -1483,29 +1555,35 @@ void CPeriodicityHypothesisTests::STestStats::setThresholds(double vt, double at bool CPeriodicityHypothesisTests::STestStats::nullHypothesisGoodEnough() const { TMeanAccumulator mean; for (const auto& t : s_T0) { - mean += std::accumulate(t.begin(), t.end(), TMeanAccumulator(), [](TMeanAccumulator m, double x) { - m.add(std::fabs(x)); - return m; - }); + mean += std::accumulate(t.begin(), t.end(), TMeanAccumulator(), + [](TMeanAccumulator m, double x) { + m.add(std::fabs(x)); + return m; + }); } - return std::sqrt(s_V0) <= MINIMUM_COEFFICIENT_OF_VARIATION * CBasicStatistics::mean(mean); + return std::sqrt(s_V0) <= + MINIMUM_COEFFICIENT_OF_VARIATION * CBasicStatistics::mean(mean); } -CPeriodicityHypothesisTests::CNestedHypotheses::CNestedHypotheses(TTestFunc test) : m_Test(test), m_AlwaysTestNested(false) { +CPeriodicityHypothesisTests::CNestedHypotheses::CNestedHypotheses(TTestFunc test) + : m_Test(test), m_AlwaysTestNested(false) { } -CPeriodicityHypothesisTests::CNestedHypotheses::CBuilder CPeriodicityHypothesisTests::CNestedHypotheses::null(TTestFunc test) { +CPeriodicityHypothesisTests::CNestedHypotheses::CBuilder +CPeriodicityHypothesisTests::CNestedHypotheses::null(TTestFunc test) { m_Test = test; m_AlwaysTestNested = true; return CBuilder(*this); } -CPeriodicityHypothesisTests::CNestedHypotheses& CPeriodicityHypothesisTests::CNestedHypotheses::addNested(TTestFunc test) { +CPeriodicityHypothesisTests::CNestedHypotheses& +CPeriodicityHypothesisTests::CNestedHypotheses::addNested(TTestFunc test) { m_Nested.emplace_back(test); return m_Nested.back(); } -CPeriodicityHypothesisTestsResult CPeriodicityHypothesisTests::CNestedHypotheses::test(STestStats& stats) const { +CPeriodicityHypothesisTestsResult +CPeriodicityHypothesisTests::CNestedHypotheses::test(STestStats& stats) const { CPeriodicityHypothesisTestsResult result{m_Test(stats)}; if (m_AlwaysTestNested || result != stats.s_H0) { stats.s_H0 = result; @@ -1536,7 +1614,8 @@ CPeriodicityHypothesisTests::CNestedHypotheses::CBuilder::addAlternative(TTestFu return this->addNested(test); } -CPeriodicityHypothesisTests::CNestedHypotheses::CBuilder& CPeriodicityHypothesisTests::CNestedHypotheses::CBuilder::finishedNested() { +CPeriodicityHypothesisTests::CNestedHypotheses::CBuilder& +CPeriodicityHypothesisTests::CNestedHypotheses::CBuilder::finishedNested() { m_Levels.pop_back(); return *this; } @@ -1545,9 +1624,12 @@ namespace { //! Compute the mean of the autocorrelation for \f${P, 2P, ...}\f$ //! where \f$P\f$ is \p period. -double meanAutocorrelationForPeriodicOffsets(const TDoubleVec& correlations, std::size_t window, std::size_t period) { +double meanAutocorrelationForPeriodicOffsets(const TDoubleVec& correlations, + std::size_t window, + std::size_t period) { auto correctForPad = [window](double correlation, std::size_t offset) { - return correlation * static_cast(window) / static_cast(window - offset); + return correlation * static_cast(window) / + static_cast(window - offset); }; TMeanAccumulator result; for (std::size_t offset = period; offset < correlations.size(); offset += period) { @@ -1561,7 +1643,8 @@ double meanAutocorrelationForPeriodicOffsets(const TDoubleVec& correlations, std std::size_t mostSignificantPeriodicComponent(TFloatMeanAccumulatorVec values) { using TSizeVec = std::vector; using TDoubleSizePr = std::pair; - using TMaxAccumulator = CBasicStatistics::COrderStatisticsHeap>; + using TMaxAccumulator = + CBasicStatistics::COrderStatisticsHeap>; using TFloatMeanAccumulatorCRng = core::CVectorRange; std::size_t n{values.size()}; @@ -1590,7 +1673,8 @@ std::size_t mostSignificantPeriodicComponent(TFloatMeanAccumulatorVec values) { // Sort by decreasing cyclic autocorrelation. TSizeVec candidatePeriods(15); std::transform( - candidates.begin(), candidates.end(), candidatePeriods.begin(), [](const TDoubleSizePr& candidate_) { return candidate_.second; }); + candidates.begin(), candidates.end(), candidatePeriods.begin(), + [](const TDoubleSizePr& candidate_) { return candidate_.second; }); candidates.clear(); for (const auto period : candidatePeriods) { TFloatMeanAccumulatorCRng window(values, 0, period * (values.size() / period)); @@ -1615,17 +1699,18 @@ std::size_t mostSignificantPeriodicComponent(TFloatMeanAccumulatorVec values) { } } -CPeriodicityHypothesisTestsResult testForPeriods(const CPeriodicityHypothesisTestsConfig& config, - core_t::TTime startTime, - core_t::TTime bucketLength, - const TFloatMeanAccumulatorVec& values) { +CPeriodicityHypothesisTestsResult +testForPeriods(const CPeriodicityHypothesisTestsConfig& config, + core_t::TTime startTime, + core_t::TTime bucketLength, + const TFloatMeanAccumulatorVec& values) { // Find the single periodic component which explains the // most cyclic autocorrelation. std::size_t period_{mostSignificantPeriodicComponent(values)}; core_t::TTime window{static_cast(values.size()) * bucketLength}; core_t::TTime period{static_cast(period_) * bucketLength}; - LOG_TRACE(<< "bucket length = " << bucketLength << ", window = " << window << ", periods to test = " << period - << ", # values = " << values.size()); + LOG_TRACE(<< "bucket length = " << bucketLength << ", window = " << window + << ", periods to test = " << period << ", # values = " << values.size()); // Set up the hypothesis tests. CPeriodicityHypothesisTests test{config}; diff --git a/lib/maths/CPoissonMeanConjugate.cc b/lib/maths/CPoissonMeanConjugate.cc index 0e09631415..a56cefe743 100644 --- a/lib/maths/CPoissonMeanConjugate.cc +++ b/lib/maths/CPoissonMeanConjugate.cc @@ -61,7 +61,9 @@ using TWeightStyleVec = maths_t::TWeightStyleVec; //! Adds "weight" x "right operand" to the "left operand". struct SPlusWeight { - double operator()(double lhs, double rhs, double weight = 1.0) const { return lhs + weight * rhs; } + double operator()(double lhs, double rhs, double weight = 1.0) const { + return lhs + weight * rhs; + } }; //! Evaluate \p func on the joint predictive distribution for \p samples @@ -155,7 +157,8 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, } } catch (const std::exception& e) { LOG_ERROR(<< "Error calculating joint c.d.f." - << " offset = " << offset << ", shape = " << shape << ", rate = " << rate << ": " << e.what()); + << " offset = " << offset << ", shape = " << shape + << ", rate = " << rate << ": " << e.what()); return false; } @@ -178,24 +181,28 @@ const std::string EMPTY_STRING; } CPoissonMeanConjugate::CPoissonMeanConjugate(double offset, double shape, double rate, double decayRate /*= 0.0*/) - : CPrior(maths_t::E_IntegerData, decayRate), m_Offset(offset), m_Shape(shape), m_Rate(rate) { + : CPrior(maths_t::E_IntegerData, decayRate), m_Offset(offset), + m_Shape(shape), m_Rate(rate) { } -CPoissonMeanConjugate::CPoissonMeanConjugate(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) - : CPrior(maths_t::E_IntegerData, params.s_DecayRate), m_Offset(0.0), m_Shape(0.0), m_Rate(0.0) { - traverser.traverseSubLevel(boost::bind(&CPoissonMeanConjugate::acceptRestoreTraverser, this, _1)); +CPoissonMeanConjugate::CPoissonMeanConjugate(const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser) + : CPrior(maths_t::E_IntegerData, params.s_DecayRate), m_Offset(0.0), + m_Shape(0.0), m_Rate(0.0) { + traverser.traverseSubLevel( + boost::bind(&CPoissonMeanConjugate::acceptRestoreTraverser, this, _1)); } bool CPoissonMeanConjugate::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { do { const std::string& name = traverser.name(); - RESTORE_SETUP_TEARDOWN( - DECAY_RATE_TAG, double decayRate, core::CStringUtils::stringToType(traverser.value(), decayRate), this->decayRate(decayRate)) + RESTORE_SETUP_TEARDOWN(DECAY_RATE_TAG, double decayRate, + core::CStringUtils::stringToType(traverser.value(), decayRate), + this->decayRate(decayRate)) RESTORE_BUILT_IN(OFFSET_TAG, m_Offset) RESTORE_BUILT_IN(SHAPE_TAG, m_Shape) RESTORE_BUILT_IN(RATE_TAG, m_Rate) - RESTORE_SETUP_TEARDOWN(NUMBER_SAMPLES_TAG, - double numberSamples, + RESTORE_SETUP_TEARDOWN(NUMBER_SAMPLES_TAG, double numberSamples, core::CStringUtils::stringToType(traverser.value(), numberSamples), this->numberSamples(numberSamples)) } while (traverser.next()); @@ -210,7 +217,8 @@ CPoissonMeanConjugate CPoissonMeanConjugate::nonInformativePrior(double offset, // Since we have defined the gamma distribution in terms of the inverse // scale "k -> inf" is equivalent to "b = 1 / k -> 0.0". - return CPoissonMeanConjugate(offset, NON_INFORMATIVE_SHAPE, NON_INFORMATIVE_RATE, decayRate); + return CPoissonMeanConjugate(offset, NON_INFORMATIVE_SHAPE, + NON_INFORMATIVE_RATE, decayRate); } CPoissonMeanConjugate::EPrior CPoissonMeanConjugate::type() const { @@ -232,7 +240,8 @@ bool CPoissonMeanConjugate::needsOffset() const { double CPoissonMeanConjugate::adjustOffset(const TWeightStyleVec& /*weightStyles*/, const TDouble1Vec& samples, const TDouble4Vec1Vec& /*weights*/) { - if (samples.empty() || CMathsFuncs::beginFinite(samples) == CMathsFuncs::endFinite(samples)) { + if (samples.empty() || + CMathsFuncs::beginFinite(samples) == CMathsFuncs::endFinite(samples)) { return 0.0; } @@ -249,7 +258,8 @@ double CPoissonMeanConjugate::adjustOffset(const TWeightStyleVec& /*weightStyles static const double EPS = 0.01; static const double OFFSET_MARGIN = 0.0; - double minimumSample = *std::min_element(CMathsFuncs::beginFinite(samples), CMathsFuncs::endFinite(samples)); + double minimumSample = *std::min_element(CMathsFuncs::beginFinite(samples), + CMathsFuncs::endFinite(samples)); if (minimumSample + m_Offset >= OFFSET_MARGIN) { return 0.0; } @@ -280,7 +290,8 @@ double CPoissonMeanConjugate::adjustOffset(const TWeightStyleVec& /*weightStyles sample = std::max(sample, OFFSET_MARGIN - offset); } - LOG_TRACE(<< "resamples = " << core::CContainerPrinter::print(resamples) << ", weight = " << weight << ", offset = " << m_Offset); + LOG_TRACE(<< "resamples = " << core::CContainerPrinter::print(resamples) + << ", weight = " << weight << ", offset = " << m_Offset); this->addSamples(weightStyle, resamples, weights); @@ -294,13 +305,16 @@ double CPoissonMeanConjugate::offset() const { return m_Offset; } -void CPoissonMeanConjugate::addSamples(const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, const TDouble4Vec1Vec& weights) { +void CPoissonMeanConjugate::addSamples(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights) { if (samples.empty()) { return; } if (samples.size() != weights.size()) { - LOG_ERROR(<< "Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '" + LOG_ERROR(<< "Mismatch in samples '" + << core::CContainerPrinter::print(samples) << "' and weights '" << core::CContainerPrinter::print(weights) << "'"); return; } @@ -347,8 +361,9 @@ void CPoissonMeanConjugate::addSamples(const TWeightStyleVec& weightStyles, cons m_Shape += sampleSum; m_Rate += numberSamples; - LOG_TRACE(<< "# samples = " << numberSamples << ", sampleSum = " << sampleSum << ", m_Shape = " << m_Shape << ", m_Rate = " << m_Rate - << ", m_Offset = " << m_Offset); + LOG_TRACE(<< "# samples = " << numberSamples + << ", sampleSum = " << sampleSum << ", m_Shape = " << m_Shape + << ", m_Rate = " << m_Rate << ", m_Offset = " << m_Offset); } void CPoissonMeanConjugate::propagateForwardsByTime(double time) { @@ -374,14 +389,16 @@ void CPoissonMeanConjugate::propagateForwardsByTime(double time) { // // Thus the mean is unchanged and variance is increased by 1 / f. - double factor = std::min((alpha * m_Shape + (1.0 - alpha) * NON_INFORMATIVE_SHAPE) / m_Shape, 1.0); + double factor = std::min( + (alpha * m_Shape + (1.0 - alpha) * NON_INFORMATIVE_SHAPE) / m_Shape, 1.0); m_Shape *= factor; m_Rate *= factor; this->numberSamples(this->numberSamples() * alpha); - LOG_TRACE(<< "time = " << time << ", alpha = " << alpha << ", m_Shape = " << m_Shape << ", m_Rate = " << m_Rate + LOG_TRACE(<< "time = " << time << ", alpha = " << alpha + << ", m_Shape = " << m_Shape << ", m_Rate = " << m_Rate << ", numberSamples = " << this->numberSamples()); } @@ -401,7 +418,8 @@ double CPoissonMeanConjugate::marginalLikelihoodMean() const { return this->priorMean() - m_Offset; } -double CPoissonMeanConjugate::marginalLikelihoodMode(const TWeightStyleVec& /*weightStyles*/, const TDouble4Vec& /*weights*/) const { +double CPoissonMeanConjugate::marginalLikelihoodMode(const TWeightStyleVec& /*weightStyles*/, + const TDouble4Vec& /*weights*/) const { if (this->isNonInformative()) { return -m_Offset; } @@ -422,14 +440,15 @@ double CPoissonMeanConjugate::marginalLikelihoodMode(const TWeightStyleVec& /*we boost::math::negative_binomial_distribution<> negativeBinomial(r, p); return boost::math::mode(negativeBinomial) - m_Offset; } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to compute marginal likelihood mode: " << e.what() << ", prior shape = " << m_Shape - << ", prior rate = " << m_Rate); + LOG_ERROR(<< "Failed to compute marginal likelihood mode: " << e.what() + << ", prior shape = " << m_Shape << ", prior rate = " << m_Rate); } return -m_Offset; } -double CPoissonMeanConjugate::marginalLikelihoodVariance(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights) const { +double CPoissonMeanConjugate::marginalLikelihoodVariance(const TWeightStyleVec& weightStyles, + const TDouble4Vec& weights) const { if (this->isNonInformative()) { return boost::numeric::bounds::highest(); } @@ -440,14 +459,18 @@ double CPoissonMeanConjugate::marginalLikelihoodVariance(const TWeightStyleVec& double varianceScale = 1.0; try { - varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) * maths_t::countVarianceScale(weightStyles, weights); - } catch (const std::exception& e) { LOG_ERROR(<< "Failed to get variance scale: " << e.what()); } + varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) * + maths_t::countVarianceScale(weightStyles, weights); + } catch (const std::exception& e) { + LOG_ERROR(<< "Failed to get variance scale: " << e.what()); + } return varianceScale * (this->priorMean() + this->priorVariance()); } -CPoissonMeanConjugate::TDoubleDoublePr CPoissonMeanConjugate::marginalLikelihoodConfidenceInterval(double percentage, - const TWeightStyleVec& /*weightStyles*/, - const TDouble4Vec& /*weights*/) const { +CPoissonMeanConjugate::TDoubleDoublePr +CPoissonMeanConjugate::marginalLikelihoodConfidenceInterval(double percentage, + const TWeightStyleVec& /*weightStyles*/, + const TDouble4Vec& /*weights*/) const { if (this->isNonInformative()) { return this->marginalLikelihoodSupport(); } @@ -463,17 +486,22 @@ CPoissonMeanConjugate::TDoubleDoublePr CPoissonMeanConjugate::marginalLikelihood double p = m_Rate / (m_Rate + 1.0); boost::math::negative_binomial_distribution<> negativeBinomial(r, p); double x1 = boost::math::quantile(negativeBinomial, (1.0 - percentage) / 2.0) - m_Offset; - double x2 = percentage > 0.0 ? boost::math::quantile(negativeBinomial, (1.0 + percentage) / 2.0) - m_Offset : x1; + double x2 = percentage > 0.0 + ? boost::math::quantile(negativeBinomial, (1.0 + percentage) / 2.0) - m_Offset + : x1; return std::make_pair(x1, x2); - } catch (const std::exception& e) { LOG_ERROR(<< "Failed to compute confidence interval: " << e.what()); } + } catch (const std::exception& e) { + LOG_ERROR(<< "Failed to compute confidence interval: " << e.what()); + } return this->marginalLikelihoodSupport(); } -maths_t::EFloatingPointErrorStatus CPoissonMeanConjugate::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, - double& result) const { +maths_t::EFloatingPointErrorStatus +CPoissonMeanConjugate::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, + const TDouble1Vec& samples, + const TDouble4Vec1Vec& weights, + double& result) const { result = 0.0; if (samples.empty()) { @@ -482,7 +510,8 @@ maths_t::EFloatingPointErrorStatus CPoissonMeanConjugate::jointLogMarginalLikeli } if (samples.size() != weights.size()) { - LOG_ERROR(<< "Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '" + LOG_ERROR(<< "Mismatch in samples '" + << core::CContainerPrinter::print(samples) << "' and weights '" << core::CContainerPrinter::print(weights) << "'"); return maths_t::E_FpFailed; } @@ -550,8 +579,9 @@ maths_t::EFloatingPointErrorStatus CPoissonMeanConjugate::jointLogMarginalLikeli double impliedShape = m_Shape + sampleSum; double impliedRate = m_Rate + numberSamples; - result = boost::math::lgamma(impliedShape) + m_Shape * std::log(m_Rate) - impliedShape * std::log(impliedRate) - - sampleLogFactorialSum - boost::math::lgamma(m_Shape); + result = boost::math::lgamma(impliedShape) + m_Shape * std::log(m_Rate) - + impliedShape * std::log(impliedRate) - sampleLogFactorialSum - + boost::math::lgamma(m_Shape); } catch (const std::exception& e) { LOG_ERROR(<< "Error calculating marginal likelihood: " << e.what()); return maths_t::E_FpFailed; @@ -566,7 +596,8 @@ maths_t::EFloatingPointErrorStatus CPoissonMeanConjugate::jointLogMarginalLikeli return status; } -void CPoissonMeanConjugate::sampleMarginalLikelihood(std::size_t numberSamples, TDouble1Vec& samples) const { +void CPoissonMeanConjugate::sampleMarginalLikelihood(std::size_t numberSamples, + TDouble1Vec& samples) const { samples.clear(); if (numberSamples == 0 || this->isNonInformative()) { @@ -632,9 +663,12 @@ void CPoissonMeanConjugate::sampleMarginalLikelihood(std::size_t numberSamples, double q = static_cast(i) / static_cast(numberSamples); double xq = boost::math::quantile(normal, q); - double partialExpectation = mean * q - variance * CTools::safePdf(normal, xq); + double partialExpectation = mean * q - + variance * CTools::safePdf(normal, xq); - double sample = static_cast(numberSamples) * (partialExpectation - lastPartialExpectation) - m_Offset; + double sample = static_cast(numberSamples) * + (partialExpectation - lastPartialExpectation) - + m_Offset; LOG_TRACE(<< "sample = " << sample); @@ -642,14 +676,16 @@ void CPoissonMeanConjugate::sampleMarginalLikelihood(std::size_t numberSamples, if (sample >= support.first && sample <= support.second) { samples.push_back(sample); } else { - LOG_ERROR(<< "Sample out of bounds: sample = " << sample << ", support = [" << support.first << "," << support.second - << "]" - << ", mean = " << mean << ", variance = " << variance << ", q = " << q << ", x(q) = " << xq); + LOG_ERROR(<< "Sample out of bounds: sample = " << sample << ", support = [" + << support.first << "," << support.second << "]" + << ", mean = " << mean << ", variance = " << variance + << ", q = " << q << ", x(q) = " << xq); } lastPartialExpectation = partialExpectation; } } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to sample: " << e.what() << ", mean = " << mean << ", variance = " << variance); + LOG_ERROR(<< "Failed to sample: " << e.what() << ", mean = " << mean + << ", variance = " << variance); } } else { double r = m_Shape; @@ -662,7 +698,8 @@ void CPoissonMeanConjugate::sampleMarginalLikelihood(std::size_t numberSamples, using boost::math::policies::real; using TRealQuantilePolicy = policy>; - using TNegativeBinomialRealQuantile = boost::math::negative_binomial_distribution; + using TNegativeBinomialRealQuantile = + boost::math::negative_binomial_distribution; try { TNegativeBinomialRealQuantile negativeBinomial1(r, p); @@ -672,9 +709,12 @@ void CPoissonMeanConjugate::sampleMarginalLikelihood(std::size_t numberSamples, double q = static_cast(i) / static_cast(numberSamples); double xq = boost::math::quantile(negativeBinomial1, q); - double partialExpectation = mean * boost::math::cdf(negativeBinomial2, std::max(xq - 1.0, 0.0)); + double partialExpectation = + mean * boost::math::cdf(negativeBinomial2, std::max(xq - 1.0, 0.0)); - double sample = static_cast(numberSamples) * (partialExpectation - lastPartialExpectation) - m_Offset; + double sample = static_cast(numberSamples) * + (partialExpectation - lastPartialExpectation) - + m_Offset; LOG_TRACE(<< "sample = " << sample); @@ -682,14 +722,16 @@ void CPoissonMeanConjugate::sampleMarginalLikelihood(std::size_t numberSamples, if (sample >= support.first && sample <= support.second) { samples.push_back(sample); } else { - LOG_ERROR(<< "Sample out of bounds: sample = " << sample << ", support = [" << support.first << "," << support.second - << "]" - << ", mean = " << mean << ", r = " << r << ", p = " << p << ", q = " << q << ", x(q) = " << xq); + LOG_ERROR(<< "Sample out of bounds: sample = " << sample << ", support = [" + << support.first << "," << support.second << "]" + << ", mean = " << mean << ", r = " << r << ", p = " << p + << ", q = " << q << ", x(q) = " << xq); } lastPartialExpectation = partialExpectation; } } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to sample: " << e.what() << ", mean = " << mean << ", r = " << r << ", p = " << p); + LOG_ERROR(<< "Failed to sample: " << e.what() << ", mean = " << mean + << ", r = " << r << ", p = " << p); } } @@ -713,17 +755,11 @@ bool CPoissonMeanConjugate::minusLogJointCdf(const TWeightStyleVec& weightStyles lowerBound = upperBound = 0.0; double value; - if (!detail::evaluateFunctionOnJointDistribution(weightStyles, - samples, - weights, - CTools::SMinusLogCdf(), - detail::SPlusWeight(), - m_Offset, - this->isNonInformative(), - m_Shape, - m_Rate, - value)) { - LOG_ERROR(<< "Failed computing c.d.f. for " << core::CContainerPrinter::print(samples)); + if (!detail::evaluateFunctionOnJointDistribution( + weightStyles, samples, weights, CTools::SMinusLogCdf(), detail::SPlusWeight(), + m_Offset, this->isNonInformative(), m_Shape, m_Rate, value)) { + LOG_ERROR(<< "Failed computing c.d.f. for " + << core::CContainerPrinter::print(samples)); return false; } @@ -739,17 +775,12 @@ bool CPoissonMeanConjugate::minusLogJointCdfComplement(const TWeightStyleVec& we lowerBound = upperBound = 0.0; double value; - if (!detail::evaluateFunctionOnJointDistribution(weightStyles, - samples, - weights, - CTools::SMinusLogCdfComplement(), - detail::SPlusWeight(), - m_Offset, - this->isNonInformative(), - m_Shape, - m_Rate, - value)) { - LOG_ERROR(<< "Failed computing c.d.f. complement for " << core::CContainerPrinter::print(samples)); + if (!detail::evaluateFunctionOnJointDistribution( + weightStyles, samples, weights, CTools::SMinusLogCdfComplement(), + detail::SPlusWeight(), m_Offset, this->isNonInformative(), m_Shape, + m_Rate, value)) { + LOG_ERROR(<< "Failed computing c.d.f. complement for " + << core::CContainerPrinter::print(samples)); return false; } @@ -772,18 +803,14 @@ bool CPoissonMeanConjugate::probabilityOfLessLikelySamples(maths_t::EProbability CJointProbabilityOfLessLikelySamples probability; if (!detail::evaluateFunctionOnJointDistribution( - weightStyles, - samples, - weights, - boost::bind(CTools::CProbabilityOfLessLikelySample(calculation), _1, _2, boost::ref(tail_)), - CJointProbabilityOfLessLikelySamples::SAddProbability(), - m_Offset, - this->isNonInformative(), - m_Shape, - m_Rate, - probability) || + weightStyles, samples, weights, + boost::bind(CTools::CProbabilityOfLessLikelySample(calculation), + _1, _2, boost::ref(tail_)), + CJointProbabilityOfLessLikelySamples::SAddProbability(), m_Offset, + this->isNonInformative(), m_Shape, m_Rate, probability) || !probability.calculate(value)) { - LOG_ERROR(<< "Failed computing probability for " << core::CContainerPrinter::print(samples)); + LOG_ERROR(<< "Failed computing probability for " + << core::CContainerPrinter::print(samples)); return false; } @@ -804,7 +831,9 @@ void CPoissonMeanConjugate::print(const std::string& indent, std::string& result return; } result += "mean = " + core::CStringUtils::typeToStringPretty(this->marginalLikelihoodMean()) + - " sd = " + core::CStringUtils::typeToStringPretty(std::sqrt(this->marginalLikelihoodVariance())); + " sd = " + + core::CStringUtils::typeToStringPretty( + std::sqrt(this->marginalLikelihoodVariance())); } std::string CPoissonMeanConjugate::printJointDensityFunction() const { @@ -870,7 +899,8 @@ void CPoissonMeanConjugate::acceptPersistInserter(core::CStatePersistInserter& i inserter.insertValue(OFFSET_TAG, m_Offset, core::CIEEE754::E_SinglePrecision); inserter.insertValue(SHAPE_TAG, m_Shape, core::CIEEE754::E_SinglePrecision); inserter.insertValue(RATE_TAG, m_Rate, core::CIEEE754::E_SinglePrecision); - inserter.insertValue(NUMBER_SAMPLES_TAG, this->numberSamples(), core::CIEEE754::E_SinglePrecision); + inserter.insertValue(NUMBER_SAMPLES_TAG, this->numberSamples(), + core::CIEEE754::E_SinglePrecision); } double CPoissonMeanConjugate::priorMean() const { @@ -882,7 +912,8 @@ double CPoissonMeanConjugate::priorMean() const { boost::math::gamma_distribution<> gamma(m_Shape, 1.0 / m_Rate); return boost::math::mean(gamma); } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to calculate mean: " << e.what() << ", prior shape = " << m_Shape << ", prior rate = " << m_Rate); + LOG_ERROR(<< "Failed to calculate mean: " << e.what() + << ", prior shape = " << m_Shape << ", prior rate = " << m_Rate); } return 0.0; @@ -897,13 +928,15 @@ double CPoissonMeanConjugate::priorVariance() const { boost::math::gamma_distribution<> gamma(m_Shape, 1.0 / m_Rate); return boost::math::variance(gamma); } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to calculate variance: " << e.what() << ", prior shape = " << m_Shape << ", prior rate = " << m_Rate); + LOG_ERROR(<< "Failed to calculate variance: " << e.what() + << ", prior shape = " << m_Shape << ", prior rate = " << m_Rate); } return boost::numeric::bounds::highest(); } -CPoissonMeanConjugate::TDoubleDoublePr CPoissonMeanConjugate::meanConfidenceInterval(double percentage) const { +CPoissonMeanConjugate::TDoubleDoublePr +CPoissonMeanConjugate::meanConfidenceInterval(double percentage) const { if (this->isNonInformative()) { return this->marginalLikelihoodSupport(); } @@ -923,14 +956,15 @@ CPoissonMeanConjugate::TDoubleDoublePr CPoissonMeanConjugate::meanConfidenceInte return std::make_pair(boost::math::quantile(gamma, lowerPercentile) - m_Offset, boost::math::quantile(gamma, upperPercentile) - m_Offset); } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to compute mean confidence interval: " << e.what() << ", prior shape = " << m_Shape - << ", prior rate = " << m_Rate); + LOG_ERROR(<< "Failed to compute mean confidence interval: " << e.what() + << ", prior shape = " << m_Shape << ", prior rate = " << m_Rate); } return this->marginalLikelihoodSupport(); } -bool CPoissonMeanConjugate::equalTolerance(const CPoissonMeanConjugate& rhs, const TEqualWithTolerance& equal) const { +bool CPoissonMeanConjugate::equalTolerance(const CPoissonMeanConjugate& rhs, + const TEqualWithTolerance& equal) const { LOG_DEBUG(<< m_Shape << " " << rhs.m_Shape << ", " << m_Rate << " " << rhs.m_Rate); return equal(m_Shape, rhs.m_Shape) && equal(m_Rate, rhs.m_Rate); } diff --git a/lib/maths/CPrior.cc b/lib/maths/CPrior.cc index 53d8f8c68d..b252693768 100644 --- a/lib/maths/CPrior.cc +++ b/lib/maths/CPrior.cc @@ -47,10 +47,12 @@ void setDecayRate(double value, double fallback, CFloatStorage& result) { const std::size_t ADJUST_OFFSET_TRIALS = 20; } -CPrior::CPrior() : m_DataType(maths_t::E_DiscreteData), m_DecayRate(0.0), m_NumberSamples(0) { +CPrior::CPrior() + : m_DataType(maths_t::E_DiscreteData), m_DecayRate(0.0), m_NumberSamples(0) { } -CPrior::CPrior(maths_t::EDataType dataType, double decayRate) : m_DataType(dataType), m_NumberSamples(0) { +CPrior::CPrior(maths_t::EDataType dataType, double decayRate) + : m_DataType(dataType), m_NumberSamples(0) { detail::setDecayRate(decayRate, FALLBACK_DECAY_RATE, m_DecayRate); } @@ -91,13 +93,17 @@ double CPrior::offsetMargin() const { return 0.0; } -void CPrior::addSamples(const TWeightStyleVec& weightStyles, const TDouble1Vec& /*samples*/, const TDouble4Vec1Vec& weights) { +void CPrior::addSamples(const TWeightStyleVec& weightStyles, + const TDouble1Vec& /*samples*/, + const TDouble4Vec1Vec& weights) { double n = 0.0; try { for (const auto& weight : weights) { n += maths_t::countForUpdate(weightStyles, weight); } - } catch (const std::exception& e) { LOG_ERROR(<< "Failed to extract sample counts: " << e.what()); } + } catch (const std::exception& e) { + LOG_ERROR(<< "Failed to extract sample counts: " << e.what()); + } this->addSamples(n); } @@ -105,7 +111,8 @@ double CPrior::nearestMarginalLikelihoodMean(double /*value*/) const { return this->marginalLikelihoodMean(); } -CPrior::TDouble1Vec CPrior::marginalLikelihoodModes(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights) const { +CPrior::TDouble1Vec CPrior::marginalLikelihoodModes(const TWeightStyleVec& weightStyles, + const TDouble4Vec& weights) const { return TDouble1Vec{this->marginalLikelihoodMode(weightStyles, weights)}; } @@ -156,8 +163,8 @@ CPrior::SPlot CPrior::marginalLikelihoodPlot(unsigned int numberPoints, double w for (auto x : plot.s_Abscissa) { double likelihood; - maths_t::EFloatingPointErrorStatus status = - this->jointLogMarginalLikelihood(CConstantWeights::COUNT, {x}, CConstantWeights::SINGLE_UNIT, likelihood); + maths_t::EFloatingPointErrorStatus status = this->jointLogMarginalLikelihood( + CConstantWeights::COUNT, {x}, CConstantWeights::SINGLE_UNIT, likelihood); if (status & maths_t::E_FpFailed) { // Ignore point. } else if (status & maths_t::E_FpOverflowed) { @@ -192,10 +199,14 @@ double CPrior::unmarginalizedParameters() const { return 0.0; } -void CPrior::adjustOffsetResamples(double minimumSample, TDouble1Vec& resamples, TDouble4Vec1Vec& resamplesWeights) const { +void CPrior::adjustOffsetResamples(double minimumSample, + TDouble1Vec& resamples, + TDouble4Vec1Vec& resamplesWeights) const { this->sampleMarginalLikelihood(ADJUST_OFFSET_SAMPLE_SIZE, resamples); std::size_t n = resamples.size(); - resamples.erase(std::remove_if(resamples.begin(), resamples.end(), std::not1(CMathsFuncs::SIsFinite())), resamples.end()); + resamples.erase(std::remove_if(resamples.begin(), resamples.end(), + std::not1(CMathsFuncs::SIsFinite())), + resamples.end()); if (resamples.size() != n) { LOG_ERROR(<< "Bad samples (" << this->debug() << ")"); n = resamples.size(); @@ -216,7 +227,8 @@ double CPrior::adjustOffsetWithCost(const TWeightStyleVec& weightStyles, const TDouble4Vec1Vec& weights, COffsetCost& cost, CApplyOffset& apply) { - if (samples.empty() || CMathsFuncs::beginFinite(samples) == CMathsFuncs::endFinite(samples)) { + if (samples.empty() || + CMathsFuncs::beginFinite(samples) == CMathsFuncs::endFinite(samples)) { return 0.0; } @@ -232,7 +244,8 @@ double CPrior::adjustOffsetWithCost(const TWeightStyleVec& weightStyles, // likelihood of these samples w.r.t. the offset. double margin = this->offsetMargin(); - double minimumSample = *std::min_element(CMathsFuncs::beginFinite(samples), CMathsFuncs::endFinite(samples)); + double minimumSample = *std::min_element(CMathsFuncs::beginFinite(samples), + CMathsFuncs::endFinite(samples)); if (minimumSample + this->offset() >= margin) { return 0.0; } @@ -256,12 +269,16 @@ double CPrior::adjustOffsetWithCost(const TWeightStyleVec& weightStyles, this->adjustOffsetResamples(minimumSample, resamples, resamplesWeights); double before; - this->jointLogMarginalLikelihood(CConstantWeights::COUNT, resamples, resamplesWeights, before); + this->jointLogMarginalLikelihood(CConstantWeights::COUNT, resamples, + resamplesWeights, before); double maximumSample = *std::max_element(samples.begin(), samples.end()); - double range = resamples.empty() ? maximumSample - minimumSample - : std::max(maximumSample - minimumSample, resamples[resamples.size() - 1] - resamples[0]); - double increment = std::max((range - margin) / static_cast(ADJUST_OFFSET_TRIALS - 1), 0.0); + double range = resamples.empty() + ? maximumSample - minimumSample + : std::max(maximumSample - minimumSample, + resamples[resamples.size() - 1] - resamples[0]); + double increment = std::max( + (range - margin) / static_cast(ADJUST_OFFSET_TRIALS - 1), 0.0); if (increment > 0.0) { TDouble1Vec trialOffsets; @@ -272,13 +289,15 @@ double CPrior::adjustOffsetWithCost(const TWeightStyleVec& weightStyles, } double likelihood; CSolvers::globalMinimize(trialOffsets, cost, offset, likelihood); - LOG_TRACE(<< "samples = " << core::CContainerPrinter::print(samples) << ", offset = " << offset << ", likelihood = " << likelihood); + LOG_TRACE(<< "samples = " << core::CContainerPrinter::print(samples) + << ", offset = " << offset << ", likelihood = " << likelihood); } apply(offset); double after; - this->jointLogMarginalLikelihood(CConstantWeights::COUNT, resamples, resamplesWeights, after); + this->jointLogMarginalLikelihood(CConstantWeights::COUNT, resamples, + resamplesWeights, after); return std::min(after - before, 0.0); } @@ -318,20 +337,23 @@ CPrior::CLogMarginalLikelihood::CLogMarginalLikelihood(const CPrior& prior, double CPrior::CLogMarginalLikelihood::operator()(double x) const { double result; if (!this->operator()(x, result)) { - throw std::runtime_error("Unable to compute likelihood at " + core::CStringUtils::typeToString(x)); + throw std::runtime_error("Unable to compute likelihood at " + + core::CStringUtils::typeToString(x)); } return result; } bool CPrior::CLogMarginalLikelihood::operator()(double x, double& result) const { m_X[0] = x; - return !(m_Prior->jointLogMarginalLikelihood(*m_WeightStyles, m_X, *m_Weights, result) & maths_t::E_FpFailed); + return !(m_Prior->jointLogMarginalLikelihood(*m_WeightStyles, m_X, *m_Weights, result) & + maths_t::E_FpFailed); } ////////// CPrior::COffsetParameters Implementation ////////// CPrior::COffsetParameters::COffsetParameters(CPrior& prior) - : m_Prior(&prior), m_WeightStyles(nullptr), m_Samples(nullptr), m_Weights(nullptr), m_Resamples(0), m_ResamplesWeights(0) { + : m_Prior(&prior), m_WeightStyles(nullptr), m_Samples(nullptr), + m_Weights(nullptr), m_Resamples(0), m_ResamplesWeights(0) { } void CPrior::COffsetParameters::samples(const maths_t::TWeightStyleVec& weightStyles, @@ -390,21 +412,23 @@ double CPrior::COffsetCost::computeCost(double offset) const { double resamplesLogLikelihood = 0.0; maths_t::EFloatingPointErrorStatus status; if (this->resamples().size() > 0) { - status = - this->prior().jointLogMarginalLikelihood(TWeights::COUNT, this->resamples(), this->resamplesWeights(), resamplesLogLikelihood); + status = this->prior().jointLogMarginalLikelihood( + TWeights::COUNT, this->resamples(), this->resamplesWeights(), resamplesLogLikelihood); if (status != maths_t::E_FpNoErrors) { LOG_ERROR(<< "Failed evaluating log-likelihood at " << offset << " for samples " << core::CContainerPrinter::print(this->resamples()) << " and weights " - << core::CContainerPrinter::print(this->resamplesWeights()) << ", the prior is " << this->prior().print() - << ": status " << status); + << core::CContainerPrinter::print(this->resamplesWeights()) << ", the prior is " + << this->prior().print() << ": status " << status); } } double samplesLogLikelihood; - status = this->prior().jointLogMarginalLikelihood(this->weightStyles(), this->samples(), this->weights(), samplesLogLikelihood); + status = this->prior().jointLogMarginalLikelihood( + this->weightStyles(), this->samples(), this->weights(), samplesLogLikelihood); if (status != maths_t::E_FpNoErrors) { - LOG_ERROR(<< "Failed evaluating log-likelihood at " << offset << " for " << core::CContainerPrinter::print(this->samples()) - << " and weights " << core::CContainerPrinter::print(this->weights()) << ", the prior is " << this->prior().print() - << ": status " << status); + LOG_ERROR(<< "Failed evaluating log-likelihood at " << offset << " for " + << core::CContainerPrinter::print(this->samples()) << " and weights " + << core::CContainerPrinter::print(this->weights()) << ", the prior is " + << this->prior().print() << ": status " << status); } return -(resamplesLogLikelihood + samplesLogLikelihood); } diff --git a/lib/maths/CPriorStateSerialiser.cc b/lib/maths/CPriorStateSerialiser.cc index df44074ee8..18b1d9883e 100644 --- a/lib/maths/CPriorStateSerialiser.cc +++ b/lib/maths/CPriorStateSerialiser.cc @@ -47,8 +47,9 @@ const std::string CONSTANT_TAG("h"); const std::string EMPTY_STRING; } -bool CPriorStateSerialiser:: -operator()(const SDistributionRestoreParams& params, TPriorPtr& ptr, core::CStateRestoreTraverser& traverser) const { +bool CPriorStateSerialiser::operator()(const SDistributionRestoreParams& params, + TPriorPtr& ptr, + core::CStateRestoreTraverser& traverser) const { size_t numResults(0); do { @@ -81,7 +82,8 @@ operator()(const SDistributionRestoreParams& params, TPriorPtr& ptr, core::CStat // Due to the way we divide large state into multiple chunks // this is not necessarily a problem - the unexpected element may be // marking the start of a new chunk - LOG_WARN(<< "No prior distribution corresponds to node name " << traverser.name()); + LOG_WARN(<< "No prior distribution corresponds to node name " + << traverser.name()); } } while (traverser.next()); @@ -94,7 +96,8 @@ operator()(const SDistributionRestoreParams& params, TPriorPtr& ptr, core::CStat return true; } -void CPriorStateSerialiser::operator()(const CPrior& prior, core::CStatePersistInserter& inserter) const { +void CPriorStateSerialiser::operator()(const CPrior& prior, + core::CStatePersistInserter& inserter) const { std::string tagName; if (dynamic_cast(&prior) != nullptr) { @@ -114,22 +117,25 @@ void CPriorStateSerialiser::operator()(const CPrior& prior, core::CStatePersistI } else if (dynamic_cast(&prior) != nullptr) { tagName = POISSON_TAG; } else { - LOG_ERROR(<< "Prior distribution with type '" << typeid(prior).name() << "' has no defined field name"); + LOG_ERROR(<< "Prior distribution with type '" << typeid(prior).name() + << "' has no defined field name"); return; } inserter.insertLevel(tagName, boost::bind(&CPrior::acceptPersistInserter, &prior, _1)); } -bool CPriorStateSerialiser:: -operator()(const SDistributionRestoreParams& params, TMultivariatePriorPtr& ptr, core::CStateRestoreTraverser& traverser) const { +bool CPriorStateSerialiser::operator()(const SDistributionRestoreParams& params, + TMultivariatePriorPtr& ptr, + core::CStateRestoreTraverser& traverser) const { std::size_t numResults = 0u; do { const std::string& name = traverser.name(); if (name == CMultivariatePrior::CONSTANT_TAG) { std::size_t dimension; - if (core::CStringUtils::stringToType(name.substr(CMultivariatePrior::CONSTANT_TAG.length()), dimension) == false) { + if (core::CStringUtils::stringToType( + name.substr(CMultivariatePrior::CONSTANT_TAG.length()), dimension) == false) { LOG_ERROR(<< "Bad dimension encoded in " << name); return false; } @@ -137,7 +143,9 @@ operator()(const SDistributionRestoreParams& params, TMultivariatePriorPtr& ptr, ++numResults; } else if (name.find(CMultivariatePrior::MULTIMODAL_TAG) != std::string::npos) { std::size_t dimension; - if (core::CStringUtils::stringToType(name.substr(CMultivariatePrior::MULTIMODAL_TAG.length()), dimension) == false) { + if (core::CStringUtils::stringToType( + name.substr(CMultivariatePrior::MULTIMODAL_TAG.length()), + dimension) == false) { LOG_ERROR(<< "Bad dimension encoded in " << name); return false; } @@ -145,7 +153,8 @@ operator()(const SDistributionRestoreParams& params, TMultivariatePriorPtr& ptr, ++numResults; } else if (name.find(CMultivariatePrior::NORMAL_TAG) != std::string::npos) { std::size_t dimension; - if (core::CStringUtils::stringToType(name.substr(CMultivariatePrior::NORMAL_TAG.length()), dimension) == false) { + if (core::CStringUtils::stringToType( + name.substr(CMultivariatePrior::NORMAL_TAG.length()), dimension) == false) { LOG_ERROR(<< "Bad dimension encoded in " << name); return false; } @@ -153,7 +162,8 @@ operator()(const SDistributionRestoreParams& params, TMultivariatePriorPtr& ptr, ++numResults; } else if (name.find(CMultivariatePrior::ONE_OF_N_TAG) != std::string::npos) { std::size_t dimension; - if (core::CStringUtils::stringToType(name.substr(CMultivariatePrior::ONE_OF_N_TAG.length()), dimension) == false) { + if (core::CStringUtils::stringToType( + name.substr(CMultivariatePrior::ONE_OF_N_TAG.length()), dimension) == false) { LOG_ERROR(<< "Bad dimension encoded in " << name); return false; } @@ -163,7 +173,8 @@ operator()(const SDistributionRestoreParams& params, TMultivariatePriorPtr& ptr, // Due to the way we divide large state into multiple chunks // this is not necessarily a problem - the unexpected element may be // marking the start of a new chunk - LOG_WARN(<< "No prior distribution corresponds to node name " << traverser.name()); + LOG_WARN(<< "No prior distribution corresponds to node name " + << traverser.name()); } } while (traverser.next()); @@ -176,8 +187,10 @@ operator()(const SDistributionRestoreParams& params, TMultivariatePriorPtr& ptr, return true; } -void CPriorStateSerialiser::operator()(const CMultivariatePrior& prior, core::CStatePersistInserter& inserter) const { - inserter.insertLevel(prior.persistenceTag(), boost::bind(&CMultivariatePrior::acceptPersistInserter, &prior, _1)); +void CPriorStateSerialiser::operator()(const CMultivariatePrior& prior, + core::CStatePersistInserter& inserter) const { + inserter.insertLevel(prior.persistenceTag(), + boost::bind(&CMultivariatePrior::acceptPersistInserter, &prior, _1)); } } } diff --git a/lib/maths/CProbabilityCalibrator.cc b/lib/maths/CProbabilityCalibrator.cc index 7fc7ddc663..214d5bfad1 100644 --- a/lib/maths/CProbabilityCalibrator.cc +++ b/lib/maths/CProbabilityCalibrator.cc @@ -45,7 +45,8 @@ const std::string EMPTY_STRING; } CProbabilityCalibrator::CProbabilityCalibrator(EStyle style, double cutoffProbability) - : m_Style(style), m_CutoffProbability(cutoffProbability), m_DiscreteProbabilityQuantiles(new CQDigest(QUANTILE_SIZE)) { + : m_Style(style), m_CutoffProbability(cutoffProbability), + m_DiscreteProbabilityQuantiles(new CQDigest(QUANTILE_SIZE)) { if (!(m_CutoffProbability >= 0.0 && m_CutoffProbability <= 1.0)) { LOG_ERROR(<< "Invalid cutoff probability " << m_CutoffProbability); CTools::truncate(m_CutoffProbability, 0.0, 1.0); @@ -56,7 +57,8 @@ void CProbabilityCalibrator::acceptPersistInserter(core::CStatePersistInserter& inserter.insertValue(STYLE_TAG, static_cast(m_Style)); inserter.insertValue(CUTOFF_PROBABILITY_TAG, m_CutoffProbability); inserter.insertLevel(DISCRETE_PROBABILITY_QUANTILE_TAG, - boost::bind(&CQDigest::acceptPersistInserter, m_DiscreteProbabilityQuantiles.get(), _1)); + boost::bind(&CQDigest::acceptPersistInserter, + m_DiscreteProbabilityQuantiles.get(), _1)); } bool CProbabilityCalibrator::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { @@ -70,13 +72,15 @@ bool CProbabilityCalibrator::acceptRestoreTraverser(core::CStateRestoreTraverser } m_Style = static_cast(style); } else if (name == CUTOFF_PROBABILITY_TAG) { - if (core::CStringUtils::stringToType(traverser.value(), m_CutoffProbability) == false) { + if (core::CStringUtils::stringToType(traverser.value(), + m_CutoffProbability) == false) { LOG_ERROR(<< "Invalid cutoff in " << traverser.value()); return false; } } else if (name == DISCRETE_PROBABILITY_QUANTILE_TAG) { - if (traverser.traverseSubLevel(boost::bind(&CQDigest::acceptRestoreTraverser, m_DiscreteProbabilityQuantiles.get(), _1)) == - false) { + if (traverser.traverseSubLevel( + boost::bind(&CQDigest::acceptRestoreTraverser, + m_DiscreteProbabilityQuantiles.get(), _1)) == false) { LOG_ERROR(<< "Invalid quantiles in " << traverser.value()); return false; } @@ -148,14 +152,16 @@ double CProbabilityCalibrator::calibrate(double probability) const { case E_PartialCalibration: if (Fu > m_CutoffProbability) { uint32_t pThreshold; - m_DiscreteProbabilityQuantiles->quantileSublevelSetSupremum(m_CutoffProbability, pThreshold); + m_DiscreteProbabilityQuantiles->quantileSublevelSetSupremum( + m_CutoffProbability, pThreshold); m_DiscreteProbabilityQuantiles->cdf(pThreshold, 0.0, Fl, Fu); a = n * Fu + 1.0; b = n * (1.0 - Fu) + 1.0; beta = boost::math::beta_distribution<>(a, b); Fu = boost::math::quantile(beta, 0.75); double scale = std::max((1.0 - Fu) / rawProbability(pThreshold), 1.0); - LOG_TRACE(<< "scale = " << scale << ", 1 - F = " << 1.0 - Fu << ", p = " << rawProbability(pThreshold)); + LOG_TRACE(<< "scale = " << scale << ", 1 - F = " << 1.0 - Fu + << ", p = " << rawProbability(pThreshold)); return probability * scale; } return std::max(probability, 1.0 - Fu); diff --git a/lib/maths/CQDigest.cc b/lib/maths/CQDigest.cc index 1c6f7a13ce..fcfce18f6b 100644 --- a/lib/maths/CQDigest.cc +++ b/lib/maths/CQDigest.cc @@ -39,7 +39,8 @@ const std::string CQDigest::N_TAG("b"); const std::string CQDigest::NODE_TAG("c"); CQDigest::CQDigest(uint64_t k, double decayRate) - : m_K(k), m_N(0u), m_Root(nullptr), m_NodeAllocator(static_cast(3 * m_K + 2)), m_DecayRate(decayRate) { + : m_K(k), m_N(0u), m_Root(nullptr), + m_NodeAllocator(static_cast(3 * m_K + 2)), m_DecayRate(decayRate) { m_Root = &m_NodeAllocator.create(CNode(0, 1, 0, 0)); } @@ -60,7 +61,8 @@ bool CQDigest::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { RESTORE_BUILT_IN(N_TAG, m_N) if (name == NODE_TAG) { CNode node; - if (traverser.traverseSubLevel(boost::bind(&CNode::acceptRestoreTraverser, &node, _1)) == false) { + if (traverser.traverseSubLevel(boost::bind(&CNode::acceptRestoreTraverser, + &node, _1)) == false) { LOG_ERROR(<< "Failed to restore NODE_TAG, got " << traverser.value()); } if (nodeCount++ == 0) { @@ -181,17 +183,20 @@ bool CQDigest::scale(double factor) { uint32_t span = max - min + 1; uint64_t count = node.get<2>() / span; uint64_t remainder = node.get<2>() - count * span; - LOG_TRACE(<< "min = " << min << ", max = " << max << ", count = " << count << ", remainder = " << remainder); + LOG_TRACE(<< "min = " << min << ", max = " << max + << ", count = " << count << ", remainder = " << remainder); if (count > 0) { for (uint32_t j = 0u; j < span; ++j) { - this->add(static_cast(factor * static_cast(min + j) + 0.5), count); + this->add(static_cast(factor * static_cast(min + j) + 0.5), + count); } } if (remainder > 0) { boost::random::uniform_int_distribution uniform(0u, span - 1); for (uint64_t j = 0u; j < remainder; ++j) { - this->add(static_cast(factor * static_cast(min + uniform(generator)) + 0.5)); + this->add(static_cast( + factor * static_cast(min + uniform(generator)) + 0.5)); } } } @@ -275,7 +280,8 @@ double CQDigest::cdfQuantile(double n, double p, double q) { boost::math::beta_distribution<> beta(a, b); return boost::math::quantile(beta, q); } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to calculate c.d.f. quantile: " << e.what() << ", n = " << n << ", p = " << p << ", q = " << q); + LOG_ERROR(<< "Failed to calculate c.d.f. quantile: " << e.what() + << ", n = " << n << ", p = " << p << ", q = " << q); } return p; } @@ -293,14 +299,16 @@ bool CQDigest::cdf(uint32_t x, double confidence, double& lowerBound, double& up m_Root->cdfLowerBound(x, l); lowerBound = static_cast(l) / static_cast(m_N); if (confidence > 0.0) { - lowerBound = cdfQuantile(static_cast(m_N), lowerBound, (100.0 - confidence) / 200.0); + lowerBound = cdfQuantile(static_cast(m_N), lowerBound, + (100.0 - confidence) / 200.0); } uint64_t u = 0ull; m_Root->cdfUpperBound(x, u); upperBound = static_cast(u) / static_cast(m_N); if (confidence > 0.0) { - upperBound = cdfQuantile(static_cast(m_N), upperBound, (100.0 + confidence) / 200.0); + upperBound = cdfQuantile(static_cast(m_N), upperBound, + (100.0 + confidence) / 200.0); } return true; @@ -328,11 +336,14 @@ void CQDigest::pdf(uint32_t x, double confidence, double& lowerBound, double& up double supremumUpperBound; this->cdf(supremum, confidence, supremumLowerBound, supremumUpperBound); - lowerBound = std::max(supremumLowerBound - infimumUpperBound, 0.0) / std::max(static_cast(supremum - infimum), 1.0); - upperBound = std::max(supremumUpperBound - infimumLowerBound, 0.0) / std::max(static_cast(supremum - infimum), 1.0); + lowerBound = std::max(supremumLowerBound - infimumUpperBound, 0.0) / + std::max(static_cast(supremum - infimum), 1.0); + upperBound = std::max(supremumUpperBound - infimumLowerBound, 0.0) / + std::max(static_cast(supremum - infimum), 1.0); - LOG_TRACE(<< "x = " << x << ", supremum = " << supremum << ", infimum = " << infimum << ", cdf(supremum) = [" << supremumLowerBound - << "," << supremumUpperBound << "]" + LOG_TRACE(<< "x = " << x << ", supremum = " << supremum + << ", infimum = " << infimum << ", cdf(supremum) = [" + << supremumLowerBound << "," << supremumUpperBound << "]" << ", cdf(infimum) = [" << infimumLowerBound << "," << infimumUpperBound << "]" << ", pdf = [" << lowerBound << "," << upperBound << "]"); } @@ -422,7 +433,8 @@ std::string CQDigest::print() const { result << m_N << " | " << m_K << " | {"; for (const auto& node : nodes) { - result << " \"" << node->print() << ',' << node->count() << ',' << node->subtreeCount() << '"'; + result << " \"" << node->print() << ',' << node->count() << ',' + << node->subtreeCount() << '"'; } result << " }"; @@ -463,11 +475,13 @@ bool CQDigest::compress(TNodePtrVec& compress) { } bool CQDigest::SLevelLess::operator()(const CNode* lhs, const CNode* rhs) const { - return lhs->span() > rhs->span() || (lhs->span() == rhs->span() && lhs->max() > rhs->max()); + return lhs->span() > rhs->span() || + (lhs->span() == rhs->span() && lhs->max() > rhs->max()); } bool CQDigest::SPostLess::operator()(const CNode* lhs, const CNode* rhs) const { - return lhs->max() < rhs->max() || (lhs->max() == rhs->max() && lhs->span() < rhs->span()); + return lhs->max() < rhs->max() || + (lhs->max() == rhs->max() && lhs->span() < rhs->span()); } const std::string CQDigest::CNode::MIN_TAG("a"); @@ -475,11 +489,13 @@ const std::string CQDigest::CNode::MAX_TAG("b"); const std::string CQDigest::CNode::COUNT_TAG("c"); CQDigest::CNode::CNode() - : m_Ancestor(nullptr), m_Descendants(), m_Min(0xDEADBEEF), m_Max(0xDEADBEEF), m_Count(0xDEADBEEF), m_SubtreeCount(0xDEADBEEF) { + : m_Ancestor(nullptr), m_Descendants(), m_Min(0xDEADBEEF), + m_Max(0xDEADBEEF), m_Count(0xDEADBEEF), m_SubtreeCount(0xDEADBEEF) { } CQDigest::CNode::CNode(uint32_t min, uint32_t max, uint64_t count, uint64_t subtreeCount) - : m_Ancestor(nullptr), m_Descendants(), m_Min(min), m_Max(max), m_Count(count), m_SubtreeCount(subtreeCount) { + : m_Ancestor(nullptr), m_Descendants(), m_Min(min), m_Max(max), + m_Count(count), m_SubtreeCount(subtreeCount) { } std::size_t CQDigest::CNode::size() const { @@ -510,7 +526,9 @@ uint32_t CQDigest::CNode::quantile(uint64_t leftCount, uint64_t n) const { return m_Max; } -bool CQDigest::CNode::quantileSublevelSetSupremum(uint64_t n, uint64_t leftCount, uint32_t& result) const { +bool CQDigest::CNode::quantileSublevelSetSupremum(uint64_t n, + uint64_t leftCount, + uint32_t& result) const { // We are looking for the right end of the rightmost node // whose count together with those nodes to the left is // is less than n. @@ -523,7 +541,8 @@ bool CQDigest::CNode::quantileSublevelSetSupremum(uint64_t n, uint64_t leftCount leftCount += m_SubtreeCount; for (auto i = m_Descendants.rbegin(); i != m_Descendants.rend(); ++i) { leftCount -= (*i)->subtreeCount(); - if (leftCount + (*i)->count() < n && (*i)->quantileSublevelSetSupremum(n, leftCount, result)) { + if (leftCount + (*i)->count() < n && + (*i)->quantileSublevelSetSupremum(n, leftCount, result)) { break; } } @@ -623,7 +642,8 @@ CQDigest::CNode& CQDigest::CNode::insert(CNodeAllocator& allocator, const CNode& return *this; } - auto next = std::lower_bound(m_Descendants.begin(), m_Descendants.end(), &node, SPostLess()); + auto next = std::lower_bound(m_Descendants.begin(), m_Descendants.end(), + &node, SPostLess()); // If it exists the ancestor will be after the node // in post order. @@ -658,7 +678,8 @@ CQDigest::CNode* CQDigest::CNode::compress(CNodeAllocator& allocator, uint64_t c // Get the sibling of this node if it exists. CNode* sibling = ancestor->sibling(*this); - uint64_t count = (ancestor->isParent(*this) ? ancestor->count() : 0ull) + this->count() + (sibling ? sibling->count() : 0ull); + uint64_t count = (ancestor->isParent(*this) ? ancestor->count() : 0ull) + + this->count() + (sibling ? sibling->count() : 0ull); // Check if we should compress this node. if (count >= compressionFactor) { @@ -694,7 +715,8 @@ uint64_t CQDigest::CNode::age(double factor) { } if (m_Count > 0) { - m_Count = static_cast(std::max(static_cast(m_Count) * factor + 0.5, 1.0)); + m_Count = static_cast( + std::max(static_cast(m_Count) * factor + 0.5, 1.0)); } m_SubtreeCount += m_Count; @@ -721,7 +743,8 @@ const uint64_t& CQDigest::CNode::subtreeCount() const { return m_SubtreeCount; } -void CQDigest::CNode::persistRecursive(const std::string& nodeTag, core::CStatePersistInserter& inserter) const { +void CQDigest::CNode::persistRecursive(const std::string& nodeTag, + core::CStatePersistInserter& inserter) const { inserter.insertLevel(NODE_TAG, boost::bind(&CNode::acceptPersistInserter, this, _1)); // Note the tree is serialized flat in pre-order. @@ -789,15 +812,19 @@ bool CQDigest::CNode::checkInvariants(uint64_t compressionFactor) const { for (std::size_t i = 0u; i < m_Descendants.size(); ++i) { if (m_Descendants[i]->m_Ancestor != this) { - LOG_ERROR(<< "Bad connectivity: " << this->print() << " -> " << m_Descendants[i]->print() << " <- " + LOG_ERROR(<< "Bad connectivity: " << this->print() << " -> " + << m_Descendants[i]->print() << " <- " << m_Descendants[i]->m_Ancestor->print()); } if (!this->isAncestor(*m_Descendants[i])) { - LOG_ERROR(<< "Bad connectivity: " << this->print() << " -> " << m_Descendants[i]->print()); + LOG_ERROR(<< "Bad connectivity: " << this->print() << " -> " + << m_Descendants[i]->print()); return false; } - if (i + 1u < m_Descendants.size() && !postLess(m_Descendants[i], m_Descendants[i + 1u])) { - LOG_ERROR(<< "Bad order: " << m_Descendants[i]->print() << " >= " << m_Descendants[i + 1u]->print()); + if (i + 1u < m_Descendants.size() && + !postLess(m_Descendants[i], m_Descendants[i + 1u])) { + LOG_ERROR(<< "Bad order: " << m_Descendants[i]->print() + << " >= " << m_Descendants[i + 1u]->print()); return false; } if (!m_Descendants[i]->checkInvariants(compressionFactor)) { @@ -818,7 +845,8 @@ bool CQDigest::CNode::checkInvariants(uint64_t compressionFactor) const { if (!this->isRoot()) { const CNode* sibling = m_Ancestor->sibling(*this); - uint64_t count = m_Count + (sibling ? sibling->count() : 0ull) + (m_Ancestor->isParent(*this) ? m_Ancestor->count() : 0ull); + uint64_t count = m_Count + (sibling ? sibling->count() : 0ull) + + (m_Ancestor->isParent(*this) ? m_Ancestor->count() : 0ull); if (count < compressionFactor) { LOG_ERROR(<< "Bad triple count: " << count << ", floor(n/k) = " << compressionFactor); return false; @@ -857,7 +885,8 @@ CQDigest::CNode* CQDigest::CNode::sibling(const CNode& node) const { node.isLeftChild() ? max += node.span() : max -= node.span(); CNode sibling(min, max, 0u, 0u); - auto next = std::lower_bound(m_Descendants.begin(), m_Descendants.end(), &sibling, SPostLess()); + auto next = std::lower_bound(m_Descendants.begin(), m_Descendants.end(), + &sibling, SPostLess()); if (next != m_Descendants.end() && (*next)->isSibling(node)) { return *next; @@ -868,7 +897,8 @@ CQDigest::CNode* CQDigest::CNode::sibling(const CNode& node) const { bool CQDigest::CNode::isSibling(const CNode& node) const { // Check if the nodes are on the same level and share a parent. - return this->span() == node.span() && (this->isLeftChild() ? m_Max + 1u == node.m_Min : m_Min == node.m_Max + 1u); + return this->span() == node.span() && + (this->isLeftChild() ? m_Max + 1u == node.m_Min : m_Min == node.m_Max + 1u); } bool CQDigest::CNode::isParent(const CNode& node) const { @@ -878,7 +908,8 @@ bool CQDigest::CNode::isParent(const CNode& node) const { bool CQDigest::CNode::isAncestor(const CNode& node) const { // Check for inclusion of node range. - return (m_Min < node.m_Min && m_Max >= node.m_Max) || (m_Min <= node.m_Min && m_Max > node.m_Max); + return (m_Min < node.m_Min && m_Max >= node.m_Max) || + (m_Min <= node.m_Min && m_Max > node.m_Max); } bool CQDigest::CNode::isRoot() const { @@ -907,7 +938,8 @@ void CQDigest::CNode::detach(CNodeAllocator& allocator) { void CQDigest::CNode::removeDescendant(CNode& node) { // Remove node from the descendants. - m_Descendants.erase(std::remove(m_Descendants.begin(), m_Descendants.end(), &node), m_Descendants.end()); + m_Descendants.erase(std::remove(m_Descendants.begin(), m_Descendants.end(), &node), + m_Descendants.end()); } bool CQDigest::CNode::takeDescendants(CNode& node) { @@ -932,12 +964,8 @@ bool CQDigest::CNode::takeDescendants(CNode& node) { // Merge the descendants. TNodePtrVec descendants; descendants.reserve(m_Descendants.size() + nodesToTake.size()); - std::merge(m_Descendants.begin(), - m_Descendants.end(), - nodesToTake.begin(), - nodesToTake.end(), - std::back_inserter(descendants), - SPostLess()); + std::merge(m_Descendants.begin(), m_Descendants.end(), nodesToTake.begin(), + nodesToTake.end(), std::back_inserter(descendants), SPostLess()); // Update the node's descendants. nodesToLeave.swap(node.m_Descendants); @@ -955,12 +983,8 @@ bool CQDigest::CNode::takeDescendants(CNode& node) { // Merge the descendants. TNodePtrVec descendants; descendants.reserve(m_Descendants.size() + node.numberDescendants()); - std::merge(m_Descendants.begin(), - m_Descendants.end(), - node.beginDescendants(), - node.endDescendants(), - std::back_inserter(descendants), - SPostLess()); + std::merge(m_Descendants.begin(), m_Descendants.end(), node.beginDescendants(), + node.endDescendants(), std::back_inserter(descendants), SPostLess()); // Clear out the node's descendants. TNodePtrVec empty; @@ -1005,7 +1029,8 @@ CQDigest::CNode& CQDigest::CNodeAllocator::create(const CNode& node) { void CQDigest::CNodeAllocator::release(CNode& node) { std::size_t block = this->findBlock(node); if (block >= m_FreeNodes.size()) { - LOG_ABORT(<< "Bad block address = " << block << ", max = " << m_FreeNodes.size() - 1u); + LOG_ABORT(<< "Bad block address = " << block + << ", max = " << m_FreeNodes.size() - 1u); } m_FreeNodes[block].push_back(&node); diff --git a/lib/maths/CQuantileSketch.cc b/lib/maths/CQuantileSketch.cc index 96fb377ac0..efc00bb819 100644 --- a/lib/maths/CQuantileSketch.cc +++ b/lib/maths/CQuantileSketch.cc @@ -39,7 +39,8 @@ class CIndexingGreater { bool operator()(std::size_t lhs, std::size_t rhs) const { return COrderings::lexicographical_compare( - -(*m_Values)[lhs].first, (*m_Values)[lhs].second, -(*m_Values)[rhs].first, (*m_Values)[rhs].second); + -(*m_Values)[lhs].first, (*m_Values)[lhs].second, + -(*m_Values)[rhs].first, (*m_Values)[rhs].second); } private: @@ -48,13 +49,14 @@ class CIndexingGreater { //! \brief An iterator over just the unique knot values. class CUniqueIterator - : private boost::addable2>> { + : private boost::addable2>> { public: - CUniqueIterator(TFloatFloatPrVec& knots, std::size_t i) : m_Knots(&knots), m_I(i) {} + CUniqueIterator(TFloatFloatPrVec& knots, std::size_t i) + : m_Knots(&knots), m_I(i) {} - bool operator==(const CUniqueIterator& rhs) const { return m_I == rhs.m_I && m_Knots == rhs.m_Knots; } + bool operator==(const CUniqueIterator& rhs) const { + return m_I == rhs.m_I && m_Knots == rhs.m_Knots; + } TFloatFloatPr& operator*() const { return (*m_Knots)[m_I]; } TFloatFloatPr* operator->() const { return &(*m_Knots)[m_I]; } @@ -103,7 +105,8 @@ const std::string COUNT_TAG("c"); } CQuantileSketch::CQuantileSketch(EInterpolation interpolation, std::size_t size) - : m_Interpolation(interpolation), m_MaxSize(std::max(size, MINIMUM_MAX_SIZE)), m_Unsorted(0), m_Count(0.0) { + : m_Interpolation(interpolation), + m_MaxSize(std::max(size, MINIMUM_MAX_SIZE)), m_Unsorted(0), m_Count(0.0) { m_Knots.reserve(m_MaxSize + 1); } @@ -172,7 +175,9 @@ bool CQuantileSketch::cdf(double x_, double& result) const { return true; } - ptrdiff_t k = std::lower_bound(m_Knots.begin(), m_Knots.end(), x, COrderings::SFirstLess()) - m_Knots.begin(); + ptrdiff_t k = std::lower_bound(m_Knots.begin(), m_Knots.end(), x, + COrderings::SFirstLess()) - + m_Knots.begin(); LOG_TRACE(<< "k = " << k); switch (m_Interpolation) { @@ -195,25 +200,30 @@ bool CQuantileSketch::cdf(double x_, double& result) const { bool left = (2 * k < n); bool loc = (2.0 * x < xl + xr); double partial = 0.0; - for (ptrdiff_t i = left ? 0 : (loc ? k : k + 1), m = left ? (loc ? k - 1 : k) : n; i < m; ++i) { + for (ptrdiff_t i = left ? 0 : (loc ? k : k + 1), + m = left ? (loc ? k - 1 : k) : n; + i < m; ++i) { partial += m_Knots[i].second; } partial /= m_Count; double dn; if (loc) { - double xll = k > 1 ? static_cast(m_Knots[k - 2].first) : 2.0 * xl - xr; + double xll = k > 1 ? static_cast(m_Knots[k - 2].first) + : 2.0 * xl - xr; xr = 0.5 * (xl + xr); xl = 0.5 * (xll + xl); dn = m_Knots[k - 1].second / m_Count; } else { - double xrr = k + 1 < n ? static_cast(m_Knots[k + 1].first) : 2.0 * xr - xl; + double xrr = k + 1 < n ? static_cast(m_Knots[k + 1].first) + : 2.0 * xr - xl; xl = 0.5 * (xl + xr); xr = 0.5 * (xr + xrr); dn = m_Knots[k].second / m_Count; } - LOG_TRACE(<< "left = " << left << ", loc = " << loc << ", partial = " << partial << ", xl = " << xl << ", xr = " << xr - << ", dn = " << dn); - result = left ? partial + dn * (x - xl) / (xr - xl) : 1.0 - partial - dn * (xr - x) / (xr - xl); + LOG_TRACE(<< "left = " << left << ", loc = " << loc << ", partial = " << partial + << ", xl = " << xl << ", xr = " << xr << ", dn = " << dn); + result = left ? partial + dn * (x - xl) / (xr - xl) + : 1.0 - partial - dn * (xr - x) / (xr - xl); } return true; } @@ -292,9 +302,11 @@ bool CQuantileSketch::quantile(double percentage, double& result) const { } else { double x0 = m_Knots[0].first; double x1 = m_Knots[1].first; - double xa = i == 0 ? 2.0 * x0 - x1 : static_cast(m_Knots[i - 1].first); + double xa = i == 0 ? 2.0 * x0 - x1 + : static_cast(m_Knots[i - 1].first); double xb = m_Knots[i].first; - double xc = i + 1 == n ? 2.0 * xb - xa : static_cast(m_Knots[i + 1].first); + double xc = i + 1 == n ? 2.0 * xb - xa + : static_cast(m_Knots[i + 1].first); xa += 0.5 * (xb - xa); xb += 0.5 * (xc - xb); double dx = (xb - xa); @@ -352,7 +364,8 @@ bool CQuantileSketch::checkInvariants() const { return false; } if (!boost::algorithm::is_sorted(m_Knots.begin(), m_Knots.end() - m_Unsorted)) { - LOG_ERROR(<< "Unordered knots: " << core::CContainerPrinter::print(m_Knots.begin(), m_Knots.end() - m_Unsorted)); + LOG_ERROR(<< "Unordered knots: " + << core::CContainerPrinter::print(m_Knots.begin(), m_Knots.end() - m_Unsorted)); return false; } double count = 0.0; @@ -398,7 +411,8 @@ void CQuantileSketch::reduce() { std::size_t l = indexing[0] + 1; std::size_t r = (CUniqueIterator(m_Knots, l) + 1).index(); - LOG_TRACE(<< "Considering merging " << l << " and " << r << ", cost = " << costs[l - 1].first); + LOG_TRACE(<< "Considering merging " << l << " and " << r + << ", cost = " << costs[l - 1].first); std::pop_heap(indexing.begin(), indexing.end(), CIndexingGreater(costs)); indexing.pop_back(); @@ -411,7 +425,8 @@ void CQuantileSketch::reduce() { double xr = m_Knots[r].first; double nl = m_Knots[l].second; double nr = m_Knots[r].second; - LOG_TRACE(<< "xl = " << xl << ", nl = " << nl << ", xr = " << xr << ", nr = " << nr); + LOG_TRACE(<< "xl = " << xl << ", nl = " << nl << ", xr = " << xr + << ", nr = " << nr); // Find the points that have been merged with xl and xr. std::size_t ll = (CUniqueIterator(m_Knots, l) - 1).index(); @@ -432,7 +447,8 @@ void CQuantileSketch::reduce() { m_Knots[i].first = xm; m_Knots[i].second = nm; } - LOG_TRACE(<< "merged = " << core::CContainerPrinter::print(&m_Knots[ll + 1], &m_Knots[rr])); + LOG_TRACE(<< "merged = " + << core::CContainerPrinter::print(&m_Knots[ll + 1], &m_Knots[rr])); LOG_TRACE(<< "right = " << core::CContainerPrinter::print(m_Knots[rr])); if (ll > 0) { diff --git a/lib/maths/CRadialBasisFunction.cc b/lib/maths/CRadialBasisFunction.cc index 22d031a85c..f9fee6d8de 100644 --- a/lib/maths/CRadialBasisFunction.cc +++ b/lib/maths/CRadialBasisFunction.cc @@ -32,7 +32,8 @@ inline bool contains(double a, double b, double x) { double gaussianSquareDerivative(double x, double centre, double scale) { double r = scale * (x - centre); return scale * - (boost::math::double_constants::root_two_pi * boost::math::erf(boost::math::double_constants::root_two * r) - + (boost::math::double_constants::root_two_pi * + boost::math::erf(boost::math::double_constants::root_two * r) - 4.0 * r * std::exp(-2.0 * r * r)) / 4.0; } @@ -49,7 +50,8 @@ double gaussianProduct(double x, double centre1, double centre2, double scale1, double m = (scale1 * scale1 * centre1 + scale2 * scale2 * centre2) / (scale * scale); double d = scale1 * scale2 * (centre2 - centre1); - return boost::math::double_constants::root_pi * std::exp(-d * d / (scale * scale)) * boost::math::erf(scale * (x - m)) / (2.0 * scale); + return boost::math::double_constants::root_pi * std::exp(-d * d / (scale * scale)) * + boost::math::erf(scale * (x - m)) / (2.0 * scale); } //! The indefinite integral @@ -59,7 +61,8 @@ double gaussianProduct(double x, double centre1, double centre2, double scale1, double inverseQuadraticSquareDerivative(double x, double centre, double scale) { double r = scale * (x - centre); double d = (1.0 + r * r); - return scale * (3.0 * r / d + 2.0 * r / (d * d) - 8.0 * r / (d * d * d) + 3.0 * std::atan(r)) / 12.0; + return scale * + (3.0 * r / d + 2.0 * r / (d * d) - 8.0 * r / (d * d * d) + 3.0 * std::atan(r)) / 12.0; } //! The indefinite integral @@ -78,11 +81,13 @@ double inverseQuadraticProduct(double x, double centre1, double centre2, double } if ((d * d) > 1.0) { - return (scale1 * scale2 / d * std::log((1.0 + r1 * r1) / (1.0 + r2 * r2)) + scale1 * (1.0 - (ss * sd) / (d * d)) * std::atan(r1) + + return (scale1 * scale2 / d * std::log((1.0 + r1 * r1) / (1.0 + r2 * r2)) + + scale1 * (1.0 - (ss * sd) / (d * d)) * std::atan(r1) + scale2 * (1.0 + (ss * sd) / (d * d)) * std::atan(r2)) / ((1.0 + (ss * ss) / (d * d)) * (d * d + sd * sd)); } - return (scale1 * scale2 * d * std::log((1.0 + r1 * r1) / (1.0 + r2 * r2)) + (d * d - ss * sd) * scale1 * std::atan(r1) + + return (scale1 * scale2 * d * std::log((1.0 + r1 * r1) / (1.0 + r2 * r2)) + + (d * d - ss * sd) * scale1 * std::atan(r1) + (d * d + ss * sd) * scale2 * std::atan(r2)) / ((d * d + ss * ss) * (d * d + sd * sd)); } @@ -139,7 +144,9 @@ double CGaussianBasisFunction::mean(double a, double b, double centre, double sc } return std::max(boost::math::double_constants::root_pi / 2.0 / scale * - (boost::math::erf(scale * (b - centre)) - boost::math::erf(scale * (a - centre))) / (b - a), + (boost::math::erf(scale * (b - centre)) - + boost::math::erf(scale * (a - centre))) / + (b - a), 0.0); } @@ -165,7 +172,9 @@ double CGaussianBasisFunction::meanSquareDerivative(double a, double b, double c double fa = this->derivative(a, centre, scale); double fb = this->derivative(b, centre, scale); double fmin = contains(a, b, centre) ? 0.0 : std::min(fa, fb); - double fmax = (contains(a, b, maxima[0]) || contains(a, b, maxima[1])) ? this->derivative(maxima[0], centre, scale) : std::max(fa, fb); + double fmax = (contains(a, b, maxima[0]) || contains(a, b, maxima[1])) + ? this->derivative(maxima[0], centre, scale) + : std::max(fa, fb); double smin = fmin * fmin; double smax = fmax * fmax; @@ -174,10 +183,18 @@ double CGaussianBasisFunction::meanSquareDerivative(double a, double b, double c return (smin + smax) / 2.0; } - return std::max((gaussianSquareDerivative(b, centre, scale) - gaussianSquareDerivative(a, centre, scale)) / (b - a), 0.0); + return std::max((gaussianSquareDerivative(b, centre, scale) - + gaussianSquareDerivative(a, centre, scale)) / + (b - a), + 0.0); } -double CGaussianBasisFunction::product(double a, double b, double centre1, double centre2, double scale1, double scale2) const { +double CGaussianBasisFunction::product(double a, + double b, + double centre1, + double centre2, + double scale1, + double scale2) const { // The maximum function value is at the minimum of |x - c| // in the range [a,b] and the maximum is at the maximum of // |x - c|. Denoting these x+ and x-, respectively, we can @@ -206,7 +223,9 @@ double CGaussianBasisFunction::product(double a, double b, double centre1, doubl return (pmin + pmax) / 2.0; } - return std::max((gaussianProduct(b, centre1, centre2, scale1, scale2) - gaussianProduct(a, centre1, centre2, scale1, scale2)) / (b - a), + return std::max((gaussianProduct(b, centre1, centre2, scale1, scale2) - + gaussianProduct(a, centre1, centre2, scale1, scale2)) / + (b - a), 0.0); } @@ -250,10 +269,15 @@ double CInverseQuadraticBasisFunction::mean(double a, double b, double centre, d return (fmax + fmin) / 2.0; } - return std::max((std::atan(scale * (b - centre)) - std::atan(scale * (a - centre))) / scale / (b - a), 0.0); + return std::max((std::atan(scale * (b - centre)) - std::atan(scale * (a - centre))) / + scale / (b - a), + 0.0); } -double CInverseQuadraticBasisFunction::meanSquareDerivative(double a, double b, double centre, double scale) const { +double CInverseQuadraticBasisFunction::meanSquareDerivative(double a, + double b, + double centre, + double scale) const { // The maximum of the derivative function is at the point // c +/- 1 / sqrt(3) / s. To find the maximum and minimum // values of the derivative function x+ and x- we need to @@ -275,7 +299,9 @@ double CInverseQuadraticBasisFunction::meanSquareDerivative(double a, double b, double fa = this->derivative(a, centre, scale); double fb = this->derivative(b, centre, scale); double fmin = contains(a, b, centre) ? 0.0 : std::min(fa, fb); - double fmax = (contains(a, b, maxima[0]) || contains(a, b, maxima[1])) ? this->derivative(maxima[0], centre, scale) : std::max(fa, fb); + double fmax = (contains(a, b, maxima[0]) || contains(a, b, maxima[1])) + ? this->derivative(maxima[0], centre, scale) + : std::max(fa, fb); double smin = fmin * fmin; double smax = fmax * fmax; @@ -284,7 +310,9 @@ double CInverseQuadraticBasisFunction::meanSquareDerivative(double a, double b, return (smin + smax) / 2.0; } - return std::max((inverseQuadraticSquareDerivative(b, centre, scale) - inverseQuadraticSquareDerivative(a, centre, scale)) / (b - a), + return std::max((inverseQuadraticSquareDerivative(b, centre, scale) - + inverseQuadraticSquareDerivative(a, centre, scale)) / + (b - a), 0.0); } @@ -296,7 +324,12 @@ bool CInverseQuadraticBasisFunction::scale(double distance, double value, double return true; } -double CInverseQuadraticBasisFunction::product(double a, double b, double centre1, double centre2, double scale1, double scale2) const { +double CInverseQuadraticBasisFunction::product(double a, + double b, + double centre1, + double centre2, + double scale1, + double scale2) const { // The maximum function value is at the minimum of |x - c| // in the range [a,b] and the maximum is at the maximum of // |x - c|. Denoting these x+ and x-, respectively, we can @@ -325,10 +358,10 @@ double CInverseQuadraticBasisFunction::product(double a, double b, double centre return (pmin + pmax) / 2.0; } - return std::max( - (inverseQuadraticProduct(b, centre1, centre2, scale1, scale2) - inverseQuadraticProduct(a, centre1, centre2, scale1, scale2)) / - (b - a), - 0.0); + return std::max((inverseQuadraticProduct(b, centre1, centre2, scale1, scale2) - + inverseQuadraticProduct(a, centre1, centre2, scale1, scale2)) / + (b - a), + 0.0); } } } diff --git a/lib/maths/CRestoreParams.cc b/lib/maths/CRestoreParams.cc index 25776c9fa6..d842095b9a 100644 --- a/lib/maths/CRestoreParams.cc +++ b/lib/maths/CRestoreParams.cc @@ -16,30 +16,25 @@ SDistributionRestoreParams::SDistributionRestoreParams(maths_t::EDataType dataTy double minimumClusterFraction, double minimumClusterCount, double minimumCategoryCount) - : s_DataType{dataType}, - s_DecayRate{decayRate}, - s_MinimumClusterFraction{minimumClusterFraction}, - s_MinimumClusterCount{minimumClusterCount}, - s_MinimumCategoryCount{minimumCategoryCount} { + : s_DataType{dataType}, s_DecayRate{decayRate}, s_MinimumClusterFraction{minimumClusterFraction}, + s_MinimumClusterCount{minimumClusterCount}, s_MinimumCategoryCount{minimumCategoryCount} { } -STimeSeriesDecompositionRestoreParams::STimeSeriesDecompositionRestoreParams(double decayRate, - core_t::TTime minimumBucketLength, - std::size_t componentSize, - const SDistributionRestoreParams& changeModelParams) - : s_DecayRate{decayRate}, - s_MinimumBucketLength{minimumBucketLength}, - s_ComponentSize{componentSize}, - s_ChangeModelParams{changeModelParams} { +STimeSeriesDecompositionRestoreParams::STimeSeriesDecompositionRestoreParams( + double decayRate, + core_t::TTime minimumBucketLength, + std::size_t componentSize, + const SDistributionRestoreParams& changeModelParams) + : s_DecayRate{decayRate}, s_MinimumBucketLength{minimumBucketLength}, + s_ComponentSize{componentSize}, s_ChangeModelParams{changeModelParams} { } -STimeSeriesDecompositionRestoreParams::STimeSeriesDecompositionRestoreParams(double decayRate, - core_t::TTime minimumBucketLength, - const SDistributionRestoreParams& changeModelParams) - : s_DecayRate{decayRate}, - s_MinimumBucketLength{minimumBucketLength}, - s_ComponentSize{DECOMPOSITION_COMPONENT_SIZE}, - s_ChangeModelParams{changeModelParams} { +STimeSeriesDecompositionRestoreParams::STimeSeriesDecompositionRestoreParams( + double decayRate, + core_t::TTime minimumBucketLength, + const SDistributionRestoreParams& changeModelParams) + : s_DecayRate{decayRate}, s_MinimumBucketLength{minimumBucketLength}, + s_ComponentSize{DECOMPOSITION_COMPONENT_SIZE}, s_ChangeModelParams{changeModelParams} { } SModelRestoreParams::SModelRestoreParams(const CModelParams& params, diff --git a/lib/maths/CSampling.cc b/lib/maths/CSampling.cc index a5afe440bd..f3bf73bc11 100644 --- a/lib/maths/CSampling.cc +++ b/lib/maths/CSampling.cc @@ -136,14 +136,18 @@ std::size_t doCategoricalSample(RNG& rng, TDoubleVec& probabilities) { uniform0X = uniform(rng); } - return std::min( - static_cast(std::lower_bound(probabilities.begin(), probabilities.end(), uniform0X) - probabilities.begin()), - probabilities.size() - 1); + return std::min(static_cast(std::lower_bound(probabilities.begin(), + probabilities.end(), uniform0X) - + probabilities.begin()), + probabilities.size() - 1); } //! Implementation of categorical sampling with replacement. template -void doCategoricalSampleWithReplacement(RNG& rng, TDoubleVec& probabilities, std::size_t n, TSizeVec& result) { +void doCategoricalSampleWithReplacement(RNG& rng, + TDoubleVec& probabilities, + std::size_t n, + TSizeVec& result) { // We use inverse transform sampling to generate the categorical // samples from random samples on [0,1]. @@ -167,7 +171,9 @@ void doCategoricalSampleWithReplacement(RNG& rng, TDoubleVec& probabilities, std for (std::size_t i = 0u; i < n; ++i) { double uniform0X = uniform(rng); result.push_back(std::min( - static_cast(std::lower_bound(probabilities.begin(), probabilities.end(), uniform0X) - probabilities.begin()), + static_cast(std::lower_bound(probabilities.begin(), + probabilities.end(), uniform0X) - + probabilities.begin()), probabilities.size() - 1)); } } @@ -175,7 +181,10 @@ void doCategoricalSampleWithReplacement(RNG& rng, TDoubleVec& probabilities, std //! Implementation of categorical sampling without replacement. template -void doCategoricalSampleWithoutReplacement(RNG& rng, TDoubleVec& probabilities, std::size_t n, TSizeVec& result) { +void doCategoricalSampleWithoutReplacement(RNG& rng, + TDoubleVec& probabilities, + std::size_t n, + TSizeVec& result) { // We use inverse transform sampling to generate the categorical // samples from random samples on [0,1] and update the probabilities // throughout the sampling to exclude the values already taken. @@ -187,7 +196,8 @@ void doCategoricalSampleWithoutReplacement(RNG& rng, TDoubleVec& probabilities, std::size_t p = probabilities.size(); if (n >= p) { - result.assign(boost::counting_iterator(0), boost::counting_iterator(p)); + result.assign(boost::counting_iterator(0), + boost::counting_iterator(p)); } // Construct the transform function. @@ -196,7 +206,8 @@ void doCategoricalSampleWithoutReplacement(RNG& rng, TDoubleVec& probabilities, } result.reserve(n); - TSizeVec indices(boost::counting_iterator(0), boost::counting_iterator(p)); + TSizeVec indices(boost::counting_iterator(0), + boost::counting_iterator(p)); TSizeVec s(1); for (std::size_t i = 0u; i < n; ++i, --p) { @@ -206,9 +217,11 @@ void doCategoricalSampleWithoutReplacement(RNG& rng, TDoubleVec& probabilities, } else { boost::random::uniform_real_distribution<> uniform(0.0, probabilities[p - 1]); double uniform0X = uniform(rng); - s[0] = std::min( - static_cast(std::lower_bound(probabilities.begin(), probabilities.end(), uniform0X) - probabilities.begin()), - probabilities.size() - 1); + s[0] = std::min(static_cast( + std::lower_bound(probabilities.begin(), + probabilities.end(), uniform0X) - + probabilities.begin()), + probabilities.size() - 1); result.push_back(indices[s[0]]); @@ -224,11 +237,16 @@ void doCategoricalSampleWithoutReplacement(RNG& rng, TDoubleVec& probabilities, //! Implementation of multivariate normal sampling. template -bool doMultivariateNormalSample(RNG& rng, const TDoubleVec& mean, const TDoubleVecVec& covariance, std::size_t n, TDoubleVecVec& samples) { +bool doMultivariateNormalSample(RNG& rng, + const TDoubleVec& mean, + const TDoubleVecVec& covariance, + std::size_t n, + TDoubleVecVec& samples) { using TJacobiSvd = Eigen::JacobiSVD>; if (mean.size() != covariance.size()) { - LOG_ERROR(<< "Incompatible mean and covariance: " << core::CContainerPrinter::print(mean) << ", " + LOG_ERROR(<< "Incompatible mean and covariance: " + << core::CContainerPrinter::print(mean) << ", " << core::CContainerPrinter::print(covariance)); return false; } @@ -262,7 +280,8 @@ bool doMultivariateNormalSample(RNG& rng, const TDoubleVec& mean, const TDoubleV for (std::size_t i = 0u; i < d; ++i) { C(i, i) = covariance[i][i]; if (covariance[i].size() < d - i) { - LOG_ERROR(<< "Bad covariance matrix: " << core::CContainerPrinter::print(covariance)); + LOG_ERROR(<< "Bad covariance matrix: " + << core::CContainerPrinter::print(covariance)); return false; } for (std::size_t j = 0; j < i; ++j) { @@ -416,22 +435,28 @@ void CSampling::seed() { ms_Rng.seed(); } -#define UNIFORM_SAMPLE(TYPE) \ - TYPE CSampling::uniformSample(TYPE a, TYPE b) { \ - core::CScopedFastLock scopedLock(ms_Lock); \ - return doUniformSample(ms_Rng, a, b); \ - } \ - TYPE CSampling::uniformSample(CPRNG::CXorOShiro128Plus& rng, TYPE a, TYPE b) { return doUniformSample(rng, a, b); } \ - TYPE CSampling::uniformSample(CPRNG::CXorShift1024Mult& rng, TYPE a, TYPE b) { return doUniformSample(rng, a, b); } \ - void CSampling::uniformSample(TYPE a, TYPE b, std::size_t n, std::vector& result) { \ - core::CScopedFastLock scopedLock(ms_Lock); \ - doUniformSample(ms_Rng, a, b, n, result); \ - } \ - void CSampling::uniformSample(CPRNG::CXorOShiro128Plus& rng, TYPE a, TYPE b, std::size_t n, std::vector& result) { \ - doUniformSample(rng, a, b, n, result); \ - } \ - void CSampling::uniformSample(CPRNG::CXorShift1024Mult& rng, TYPE a, TYPE b, std::size_t n, std::vector& result) { \ - doUniformSample(rng, a, b, n, result); \ +#define UNIFORM_SAMPLE(TYPE) \ + TYPE CSampling::uniformSample(TYPE a, TYPE b) { \ + core::CScopedFastLock scopedLock(ms_Lock); \ + return doUniformSample(ms_Rng, a, b); \ + } \ + TYPE CSampling::uniformSample(CPRNG::CXorOShiro128Plus& rng, TYPE a, TYPE b) { \ + return doUniformSample(rng, a, b); \ + } \ + TYPE CSampling::uniformSample(CPRNG::CXorShift1024Mult& rng, TYPE a, TYPE b) { \ + return doUniformSample(rng, a, b); \ + } \ + void CSampling::uniformSample(TYPE a, TYPE b, std::size_t n, std::vector& result) { \ + core::CScopedFastLock scopedLock(ms_Lock); \ + doUniformSample(ms_Rng, a, b, n, result); \ + } \ + void CSampling::uniformSample(CPRNG::CXorOShiro128Plus& rng, TYPE a, TYPE b, \ + std::size_t n, std::vector& result) { \ + doUniformSample(rng, a, b, n, result); \ + } \ + void CSampling::uniformSample(CPRNG::CXorShift1024Mult& rng, TYPE a, TYPE b, \ + std::size_t n, std::vector& result) { \ + doUniformSample(rng, a, b, n, result); \ } UNIFORM_SAMPLE(std::size_t) UNIFORM_SAMPLE(std::ptrdiff_t) @@ -456,11 +481,19 @@ void CSampling::normalSample(double mean, double variance, std::size_t n, TDoubl doNormalSample(ms_Rng, mean, variance, n, result); } -void CSampling::normalSample(CPRNG::CXorOShiro128Plus& rng, double mean, double variance, std::size_t n, TDoubleVec& result) { +void CSampling::normalSample(CPRNG::CXorOShiro128Plus& rng, + double mean, + double variance, + std::size_t n, + TDoubleVec& result) { doNormalSample(rng, mean, variance, n, result); } -void CSampling::normalSample(CPRNG::CXorShift1024Mult& rng, double mean, double variance, std::size_t n, TDoubleVec& result) { +void CSampling::normalSample(CPRNG::CXorShift1024Mult& rng, + double mean, + double variance, + std::size_t n, + TDoubleVec& result) { doNormalSample(rng, mean, variance, n, result); } @@ -469,15 +502,24 @@ void CSampling::chiSquaredSample(double f, std::size_t n, TDoubleVec& result) { doChiSquaredSample(ms_Rng, f, n, result); } -void CSampling::chiSquaredSample(CPRNG::CXorOShiro128Plus& rng, double f, std::size_t n, TDoubleVec& result) { +void CSampling::chiSquaredSample(CPRNG::CXorOShiro128Plus& rng, + double f, + std::size_t n, + TDoubleVec& result) { doChiSquaredSample(rng, f, n, result); } -void CSampling::chiSquaredSample(CPRNG::CXorShift1024Mult& rng, double f, std::size_t n, TDoubleVec& result) { +void CSampling::chiSquaredSample(CPRNG::CXorShift1024Mult& rng, + double f, + std::size_t n, + TDoubleVec& result) { doChiSquaredSample(rng, f, n, result); } -bool CSampling::multivariateNormalSample(const TDoubleVec& mean, const TDoubleVecVec& covariance, std::size_t n, TDoubleVecVec& samples) { +bool CSampling::multivariateNormalSample(const TDoubleVec& mean, + const TDoubleVecVec& covariance, + std::size_t n, + TDoubleVecVec& samples) { core::CScopedFastLock scopedLock(ms_Lock); return doMultivariateNormalSample(ms_Rng, mean, covariance, n, samples); } @@ -498,27 +540,24 @@ bool CSampling::multivariateNormalSample(CPRNG::CXorShift1024Mult& rng, return doMultivariateNormalSample(rng, mean, covariance, n, samples); } -#define MULTIVARIATE_NORMAL_SAMPLE(N) \ - void CSampling::multivariateNormalSample(const CVectorNx1& mean, \ - const CSymmetricMatrixNxN& covariance, \ - std::size_t n, \ - std::vector>& samples) { \ - core::CScopedFastLock scopedLock(ms_Lock); \ - doMultivariateNormalSample(ms_Rng, mean, covariance, n, samples); \ - } \ - void CSampling::multivariateNormalSample(CPRNG::CXorOShiro128Plus& rng, \ - const CVectorNx1& mean, \ - const CSymmetricMatrixNxN& covariance, \ - std::size_t n, \ - std::vector>& samples) { \ - doMultivariateNormalSample(rng, mean, covariance, n, samples); \ - } \ - void CSampling::multivariateNormalSample(CPRNG::CXorShift1024Mult& rng, \ - const CVectorNx1& mean, \ - const CSymmetricMatrixNxN& covariance, \ - std::size_t n, \ - std::vector>& samples) { \ - doMultivariateNormalSample(rng, mean, covariance, n, samples); \ +#define MULTIVARIATE_NORMAL_SAMPLE(N) \ + void CSampling::multivariateNormalSample( \ + const CVectorNx1& mean, const CSymmetricMatrixNxN& covariance, \ + std::size_t n, std::vector>& samples) { \ + core::CScopedFastLock scopedLock(ms_Lock); \ + doMultivariateNormalSample(ms_Rng, mean, covariance, n, samples); \ + } \ + void CSampling::multivariateNormalSample( \ + CPRNG::CXorOShiro128Plus& rng, const CVectorNx1& mean, \ + const CSymmetricMatrixNxN& covariance, std::size_t n, \ + std::vector>& samples) { \ + doMultivariateNormalSample(rng, mean, covariance, n, samples); \ + } \ + void CSampling::multivariateNormalSample( \ + CPRNG::CXorShift1024Mult& rng, const CVectorNx1& mean, \ + const CSymmetricMatrixNxN& covariance, std::size_t n, \ + std::vector>& samples) { \ + doMultivariateNormalSample(rng, mean, covariance, n, samples); \ } MULTIVARIATE_NORMAL_SAMPLE(2) MULTIVARIATE_NORMAL_SAMPLE(3) @@ -531,15 +570,19 @@ std::size_t CSampling::categoricalSample(TDoubleVec& probabilities) { return doCategoricalSample(ms_Rng, probabilities); } -std::size_t CSampling::categoricalSample(CPRNG::CXorOShiro128Plus& rng, TDoubleVec& probabilities) { +std::size_t CSampling::categoricalSample(CPRNG::CXorOShiro128Plus& rng, + TDoubleVec& probabilities) { return doCategoricalSample(rng, probabilities); } -std::size_t CSampling::categoricalSample(CPRNG::CXorShift1024Mult& rng, TDoubleVec& probabilities) { +std::size_t CSampling::categoricalSample(CPRNG::CXorShift1024Mult& rng, + TDoubleVec& probabilities) { return doCategoricalSample(rng, probabilities); } -void CSampling::categoricalSampleWithReplacement(TDoubleVec& probabilities, std::size_t n, TSizeVec& result) { +void CSampling::categoricalSampleWithReplacement(TDoubleVec& probabilities, + std::size_t n, + TSizeVec& result) { core::CScopedFastLock scopedLock(ms_Lock); doCategoricalSampleWithReplacement(ms_Rng, probabilities, n, result); } @@ -558,7 +601,9 @@ void CSampling::categoricalSampleWithReplacement(CPRNG::CXorShift1024Mult& rng, doCategoricalSampleWithReplacement(rng, probabilities, n, result); } -void CSampling::categoricalSampleWithoutReplacement(TDoubleVec& probabilities, std::size_t n, TSizeVec& result) { +void CSampling::categoricalSampleWithoutReplacement(TDoubleVec& probabilities, + std::size_t n, + TSizeVec& result) { core::CScopedFastLock scopedLock(ms_Lock); doCategoricalSampleWithoutReplacement(ms_Rng, probabilities, n, result); } @@ -577,7 +622,10 @@ void CSampling::categoricalSampleWithoutReplacement(CPRNG::CXorShift1024Mult& rn doCategoricalSampleWithReplacement(rng, probabilities, n, result); } -void CSampling::multinomialSampleFast(TDoubleVec& probabilities, std::size_t n, TSizeVec& sample, bool sorted) { +void CSampling::multinomialSampleFast(TDoubleVec& probabilities, + std::size_t n, + TSizeVec& sample, + bool sorted) { sample.clear(); if (n == 0 || probabilities.empty()) { @@ -620,7 +668,8 @@ void CSampling::multinomialSampleFast(TDoubleVec& probabilities, std::size_t n, std::size_t m = probabilities.size() - 1; core::CScopedFastLock scopedLock(ms_Lock); for (std::size_t i = 0u; r > 0 && i < m; ++i) { - boost::random::binomial_distribution<> binomial(static_cast(r), probabilities[i] / p); + boost::random::binomial_distribution<> binomial(static_cast(r), + probabilities[i] / p); std::size_t ni = static_cast(binomial(ms_Rng)); sample.push_back(ni); r -= ni; @@ -689,7 +738,8 @@ void CSampling::weightedSample(std::size_t n, const TDoubleVec& weights, TSizeVe double totalWeight = std::accumulate(weights.begin(), weights.end(), 0.0); - n = std::max(static_cast(totalWeight * static_cast(n) + 0.5), static_cast(1u)); + n = std::max(static_cast(totalWeight * static_cast(n) + 0.5), + static_cast(1u)); LOG_TRACE(<< "totalWeight = " << totalWeight << ", n = " << n); @@ -718,14 +768,16 @@ void CSampling::weightedSample(std::size_t n, const TDoubleVec& weights, TSizeVe TDoubleSizePrVec candidates; for (std::size_t i = 0u; i < choices.size(); ++i) { - if ((totalRemainder > 0.0 && choices[i] == 0u) || (totalRemainder < 0.0 && choices[i] == 1u)) { + if ((totalRemainder > 0.0 && choices[i] == 0u) || + (totalRemainder < 0.0 && choices[i] == 1u)) { candidates.emplace_back(-std::fabs(remainders[choices[i]][i]), i); } } std::sort(candidates.begin(), candidates.end()); LOG_TRACE(<< "candidates = " << core::CContainerPrinter::print(candidates)); - for (std::size_t i = 0u; i < candidates.size() && std::fabs(totalRemainder) > 0.5; ++i) { + for (std::size_t i = 0u; + i < candidates.size() && std::fabs(totalRemainder) > 0.5; ++i) { std::size_t j = candidates[i].second; unsigned int choice = choices[j]; choices[j] = (choice + 1u) % 2u; @@ -738,7 +790,8 @@ void CSampling::weightedSample(std::size_t n, const TDoubleVec& weights, TSizeVe for (std::size_t i = 0u; i < weights.size(); ++i) { double number = weights[i] * static_cast(n) / totalWeight; - sampling.push_back(static_cast(choices[i] == 0u ? std::floor(number) : std::ceil(number))); + sampling.push_back(static_cast( + choices[i] == 0u ? std::floor(number) : std::ceil(number))); } } @@ -757,7 +810,8 @@ void CSampling::normalSampleQuantiles(double mean, double variance, std::size_t boost::math::normal_distribution<> normal(mean, std::sqrt(variance)); sampleQuantiles(normal, n, result); } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to sample normal quantiles: " << e.what() << ", mean = " << mean << ", variance = " << variance); + LOG_ERROR(<< "Failed to sample normal quantiles: " << e.what() + << ", mean = " << mean << ", variance = " << variance); result.clear(); } } @@ -772,7 +826,8 @@ void CSampling::gammaSampleQuantiles(double shape, double rate, std::size_t n, T boost::math::gamma_distribution<> gamma(shape, 1.0 / rate); sampleQuantiles(gamma, n, result); } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to sample normal quantiles: " << e.what() << ", shape = " << shape << ", rate = " << rate); + LOG_ERROR(<< "Failed to sample normal quantiles: " << e.what() + << ", shape = " << shape << ", rate = " << rate); result.clear(); } } diff --git a/lib/maths/CSeasonalComponent.cc b/lib/maths/CSeasonalComponent.cc index a193b009a1..15588eb69b 100644 --- a/lib/maths/CSeasonalComponent.cc +++ b/lib/maths/CSeasonalComponent.cc @@ -45,7 +45,8 @@ CSeasonalComponent::CSeasonalComponent(const CSeasonalTime& time, CSplineTypes::EBoundaryCondition boundaryCondition, CSplineTypes::EType valueInterpolationType, CSplineTypes::EType varianceInterpolationType) - : CDecompositionComponent{maxSize, boundaryCondition, valueInterpolationType, varianceInterpolationType}, + : CDecompositionComponent{maxSize, boundaryCondition, + valueInterpolationType, varianceInterpolationType}, m_Bucketing{time, decayRate, minimumBucketLength} { } @@ -54,8 +55,10 @@ CSeasonalComponent::CSeasonalComponent(double decayRate, core::CStateRestoreTraverser& traverser, CSplineTypes::EType valueInterpolationType, CSplineTypes::EType varianceInterpolationType) - : CDecompositionComponent{0, CSplineTypes::E_Periodic, valueInterpolationType, varianceInterpolationType} { - traverser.traverseSubLevel(boost::bind(&CSeasonalComponent::acceptRestoreTraverser, this, decayRate, minimumBucketLength, _1)); + : CDecompositionComponent{0, CSplineTypes::E_Periodic, + valueInterpolationType, varianceInterpolationType} { + traverser.traverseSubLevel(boost::bind(&CSeasonalComponent::acceptRestoreTraverser, + this, decayRate, minimumBucketLength, _1)); } void CSeasonalComponent::swap(CSeasonalComponent& other) { @@ -64,17 +67,20 @@ void CSeasonalComponent::swap(CSeasonalComponent& other) { m_Bucketing.swap(other.m_Bucketing); } -bool CSeasonalComponent::acceptRestoreTraverser(double decayRate, double minimumBucketLength, core::CStateRestoreTraverser& traverser) { +bool CSeasonalComponent::acceptRestoreTraverser(double decayRate, + double minimumBucketLength, + core::CStateRestoreTraverser& traverser) { do { const std::string& name{traverser.name()}; RESTORE(DECOMPOSITION_COMPONENT_TAG, traverser.traverseSubLevel( - boost::bind(&CDecompositionComponent::acceptRestoreTraverser, static_cast(this), _1))) + boost::bind(&CDecompositionComponent::acceptRestoreTraverser, + static_cast(this), _1))) RESTORE(RNG_TAG, m_Rng.fromString(traverser.value())) RESTORE_SETUP_TEARDOWN(BUCKETING_TAG, - CSeasonalComponentAdaptiveBucketing bucketing(decayRate, minimumBucketLength, traverser), - true, - m_Bucketing.swap(bucketing)) + CSeasonalComponentAdaptiveBucketing bucketing( + decayRate, minimumBucketLength, traverser), + true, m_Bucketing.swap(bucketing)) } while (traverser.next()); return true; @@ -83,16 +89,20 @@ bool CSeasonalComponent::acceptRestoreTraverser(double decayRate, double minimum void CSeasonalComponent::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertLevel( DECOMPOSITION_COMPONENT_TAG, - boost::bind(&CDecompositionComponent::acceptPersistInserter, static_cast(this), _1)); + boost::bind(&CDecompositionComponent::acceptPersistInserter, + static_cast(this), _1)); inserter.insertValue(RNG_TAG, m_Rng.toString()); - inserter.insertLevel(BUCKETING_TAG, boost::bind(&CSeasonalComponentAdaptiveBucketing::acceptPersistInserter, &m_Bucketing, _1)); + inserter.insertLevel(BUCKETING_TAG, boost::bind(&CSeasonalComponentAdaptiveBucketing::acceptPersistInserter, + &m_Bucketing, _1)); } bool CSeasonalComponent::initialized() const { return this->CDecompositionComponent::initialized(); } -bool CSeasonalComponent::initialize(core_t::TTime startTime, core_t::TTime endTime, const TFloatMeanAccumulatorVec& values) { +bool CSeasonalComponent::initialize(core_t::TTime startTime, + core_t::TTime endTime, + const TFloatMeanAccumulatorVec& values) { this->clear(); if (!m_Bucketing.initialize(this->maxSize())) { @@ -178,7 +188,9 @@ double CSeasonalComponent::meanValue() const { return this->CDecompositionComponent::meanValue(); } -double CSeasonalComponent::delta(core_t::TTime time, core_t::TTime shortPeriod, double shortPeriodValue) const { +double CSeasonalComponent::delta(core_t::TTime time, + core_t::TTime shortPeriod, + double shortPeriodValue) const { using TMinAccumulator = CBasicStatistics::SMin::TAccumulator; using TMinMaxAccumulator = CBasicStatistics::CMinMax; @@ -284,7 +296,8 @@ void CSeasonalComponent::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr me } std::size_t CSeasonalComponent::memoryUsage() const { - return core::CMemory::dynamicSize(m_Bucketing) + core::CMemory::dynamicSize(this->splines()); + return core::CMemory::dynamicSize(m_Bucketing) + + core::CMemory::dynamicSize(this->splines()); } core_t::TTime CSeasonalComponent::jitter(core_t::TTime time) { @@ -294,7 +307,8 @@ core_t::TTime CSeasonalComponent::jitter(core_t::TTime time) { double f{CSampling::uniformSample(m_Rng, 0.0, 1.0)}; core_t::TTime a{time_.startOfWindow(time)}; core_t::TTime b{a + time_.windowLength() - 1}; - double jitter{0.5 * m_Bucketing.minimumBucketLength() * (f <= 0.5 ? std::sqrt(2.0 * f) - 1.0 : std::sqrt(2.0 * (f - 0.5)))}; + double jitter{0.5 * m_Bucketing.minimumBucketLength() * + (f <= 0.5 ? std::sqrt(2.0 * f) - 1.0 : std::sqrt(2.0 * (f - 0.5)))}; result = CTools::truncate(result + static_cast(jitter + 0.5), a, b); } return result; diff --git a/lib/maths/CSeasonalComponentAdaptiveBucketing.cc b/lib/maths/CSeasonalComponentAdaptiveBucketing.cc index 67c651b4ef..f4eab88b35 100644 --- a/lib/maths/CSeasonalComponentAdaptiveBucketing.cc +++ b/lib/maths/CSeasonalComponentAdaptiveBucketing.cc @@ -76,7 +76,8 @@ const core_t::TTime UNSET_TIME{0}; const double SUFFICIENT_INTERVAL_TO_ESTIMATE_SLOPE{2.5}; } -CSeasonalComponentAdaptiveBucketing::CSeasonalComponentAdaptiveBucketing() : CAdaptiveBucketing{0.0, 0.0} { +CSeasonalComponentAdaptiveBucketing::CSeasonalComponentAdaptiveBucketing() + : CAdaptiveBucketing{0.0, 0.0} { } CSeasonalComponentAdaptiveBucketing::CSeasonalComponentAdaptiveBucketing(const CSeasonalTime& time, @@ -86,17 +87,21 @@ CSeasonalComponentAdaptiveBucketing::CSeasonalComponentAdaptiveBucketing(const C } CSeasonalComponentAdaptiveBucketing::CSeasonalComponentAdaptiveBucketing(const CSeasonalComponentAdaptiveBucketing& other) - : CAdaptiveBucketing(other), m_Time{other.m_Time->clone()}, m_Buckets(other.m_Buckets) { + : CAdaptiveBucketing(other), m_Time{other.m_Time->clone()}, + m_Buckets(other.m_Buckets) { } -CSeasonalComponentAdaptiveBucketing::CSeasonalComponentAdaptiveBucketing(double decayRate, - double minimumBucketLength, - core::CStateRestoreTraverser& traverser) +CSeasonalComponentAdaptiveBucketing::CSeasonalComponentAdaptiveBucketing( + double decayRate, + double minimumBucketLength, + core::CStateRestoreTraverser& traverser) : CAdaptiveBucketing{decayRate, minimumBucketLength} { - traverser.traverseSubLevel(boost::bind(&CSeasonalComponentAdaptiveBucketing::acceptRestoreTraverser, this, _1)); + traverser.traverseSubLevel(boost::bind( + &CSeasonalComponentAdaptiveBucketing::acceptRestoreTraverser, this, _1)); } -const CSeasonalComponentAdaptiveBucketing& CSeasonalComponentAdaptiveBucketing::operator=(const CSeasonalComponentAdaptiveBucketing& rhs) { +const CSeasonalComponentAdaptiveBucketing& CSeasonalComponentAdaptiveBucketing:: +operator=(const CSeasonalComponentAdaptiveBucketing& rhs) { if (&rhs != this) { CSeasonalComponentAdaptiveBucketing tmp(rhs); this->swap(tmp); @@ -107,8 +112,10 @@ const CSeasonalComponentAdaptiveBucketing& CSeasonalComponentAdaptiveBucketing:: void CSeasonalComponentAdaptiveBucketing::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(VERSION_6_3_TAG, ""); inserter.insertLevel(ADAPTIVE_BUCKETING_6_3_TAG, - boost::bind(&CAdaptiveBucketing::acceptPersistInserter, static_cast(this), _1)); - inserter.insertLevel(TIME_6_3_TAG, boost::bind(&CSeasonalTimeStateSerializer::acceptPersistInserter, boost::cref(*m_Time), _1)); + boost::bind(&CAdaptiveBucketing::acceptPersistInserter, + static_cast(this), _1)); + inserter.insertLevel(TIME_6_3_TAG, boost::bind(&CSeasonalTimeStateSerializer::acceptPersistInserter, + boost::cref(*m_Time), _1)); core::CPersistUtils::persist(BUCKETS_6_3_TAG, m_Buckets, inserter); } @@ -124,7 +131,8 @@ bool CSeasonalComponentAdaptiveBucketing::initialized() const { bool CSeasonalComponentAdaptiveBucketing::initialize(std::size_t n) { double a{0.0}; - double b{static_cast(std::min(this->time().windowLength(), this->time().period()))}; + double b{static_cast( + std::min(this->time().windowLength(), this->time().period()))}; if (this->CAdaptiveBucketing::initialize(a, b, n)) { n = this->size(); @@ -184,7 +192,10 @@ void CSeasonalComponentAdaptiveBucketing::linearScale(double scale) { } } -void CSeasonalComponentAdaptiveBucketing::add(core_t::TTime time, double value, double prediction, double weight) { +void CSeasonalComponentAdaptiveBucketing::add(core_t::TTime time, + double value, + double prediction, + double weight) { std::size_t bucket{0}; if (!this->initialized() || !this->bucket(time, bucket)) { return; @@ -196,22 +207,27 @@ void CSeasonalComponentAdaptiveBucketing::add(core_t::TTime time, double value, double t{m_Time->regression(time)}; TRegression& regression{bucket_.s_Regression}; - TDoubleMeanVarAccumulator moments = - CBasicStatistics::accumulator(regression.count(), prediction, static_cast(bucket_.s_Variance)); + TDoubleMeanVarAccumulator moments = CBasicStatistics::accumulator( + regression.count(), prediction, static_cast(bucket_.s_Variance)); moments.add(value, weight * weight); regression.add(t, value, weight); bucket_.s_Variance = CBasicStatistics::maximumLikelihoodVariance(moments); - if (m_Time->regressionInterval(bucket_.s_FirstUpdate, bucket_.s_LastUpdate) < SUFFICIENT_INTERVAL_TO_ESTIMATE_SLOPE) { + if (m_Time->regressionInterval(bucket_.s_FirstUpdate, bucket_.s_LastUpdate) < + SUFFICIENT_INTERVAL_TO_ESTIMATE_SLOPE) { double delta{regression.predict(t)}; regression.shiftGradient(-gradient(regression)); delta -= regression.predict(t); regression.shiftOrdinate(delta); } - bucket_.s_FirstUpdate = bucket_.s_FirstUpdate == UNSET_TIME ? time : std::min(bucket_.s_FirstUpdate, time); - bucket_.s_LastUpdate = bucket_.s_LastUpdate == UNSET_TIME ? time : std::max(bucket_.s_LastUpdate, time); + bucket_.s_FirstUpdate = bucket_.s_FirstUpdate == UNSET_TIME + ? time + : std::min(bucket_.s_FirstUpdate, time); + bucket_.s_LastUpdate = bucket_.s_LastUpdate == UNSET_TIME + ? time + : std::max(bucket_.s_LastUpdate, time); } const CSeasonalTime& CSeasonalComponentAdaptiveBucketing::time() const { @@ -292,7 +308,8 @@ uint64_t CSeasonalComponentAdaptiveBucketing::checksum(uint64_t seed) const { void CSeasonalComponentAdaptiveBucketing::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CSeasonalComponentAdaptiveBucketing"); - core::CMemoryDebug::dynamicSize("m_Endpoints", this->CAdaptiveBucketing::endpoints(), mem); + core::CMemoryDebug::dynamicSize("m_Endpoints", + this->CAdaptiveBucketing::endpoints(), mem); core::CMemoryDebug::dynamicSize("m_Centres", this->CAdaptiveBucketing::centres(), mem); core::CMemoryDebug::dynamicSize("m_Buckets", m_Buckets, mem); } @@ -301,7 +318,8 @@ std::size_t CSeasonalComponentAdaptiveBucketing::memoryUsage() const { return this->CAdaptiveBucketing::memoryUsage() + core::CMemory::dynamicSize(m_Buckets); } -const CSeasonalComponentAdaptiveBucketing::TFloatVec& CSeasonalComponentAdaptiveBucketing::endpoints() const { +const CSeasonalComponentAdaptiveBucketing::TFloatVec& +CSeasonalComponentAdaptiveBucketing::endpoints() const { return this->CAdaptiveBucketing::endpoints(); } @@ -309,11 +327,13 @@ double CSeasonalComponentAdaptiveBucketing::count() const { return this->CAdaptiveBucketing::count(); } -CSeasonalComponentAdaptiveBucketing::TDoubleVec CSeasonalComponentAdaptiveBucketing::values(core_t::TTime time) const { +CSeasonalComponentAdaptiveBucketing::TDoubleVec +CSeasonalComponentAdaptiveBucketing::values(core_t::TTime time) const { return this->CAdaptiveBucketing::values(time); } -CSeasonalComponentAdaptiveBucketing::TDoubleVec CSeasonalComponentAdaptiveBucketing::variances() const { +CSeasonalComponentAdaptiveBucketing::TDoubleVec +CSeasonalComponentAdaptiveBucketing::variances() const { return this->CAdaptiveBucketing::variances(); } @@ -323,10 +343,13 @@ bool CSeasonalComponentAdaptiveBucketing::acceptRestoreTraverser(core::CStateRes const std::string& name{traverser.name()}; RESTORE(ADAPTIVE_BUCKETING_6_3_TAG, traverser.traverseSubLevel( - boost::bind(&CAdaptiveBucketing::acceptRestoreTraverser, static_cast(this), _1))); - RESTORE(TIME_6_3_TAG, - traverser.traverseSubLevel(boost::bind(&CSeasonalTimeStateSerializer::acceptRestoreTraverser, boost::ref(m_Time), _1))) - RESTORE(BUCKETS_6_3_TAG, core::CPersistUtils::restore(BUCKETS_6_3_TAG, m_Buckets, traverser)) + boost::bind(&CAdaptiveBucketing::acceptRestoreTraverser, + static_cast(this), _1))); + RESTORE(TIME_6_3_TAG, traverser.traverseSubLevel(boost::bind( + &CSeasonalTimeStateSerializer::acceptRestoreTraverser, + boost::ref(m_Time), _1))) + RESTORE(BUCKETS_6_3_TAG, + core::CPersistUtils::restore(BUCKETS_6_3_TAG, m_Buckets, traverser)) } } else { // There is no version string this is historic state. @@ -342,22 +365,28 @@ bool CSeasonalComponentAdaptiveBucketing::acceptRestoreTraverser(core::CStateRes const std::string& name{traverser.name()}; RESTORE(ADAPTIVE_BUCKETING_OLD_TAG, traverser.traverseSubLevel( - boost::bind(&CAdaptiveBucketing::acceptRestoreTraverser, static_cast(this), _1))); - RESTORE(TIME_OLD_TAG, - traverser.traverseSubLevel(boost::bind(&CSeasonalTimeStateSerializer::acceptRestoreTraverser, boost::ref(m_Time), _1))) + boost::bind(&CAdaptiveBucketing::acceptRestoreTraverser, + static_cast(this), _1))); + RESTORE(TIME_OLD_TAG, traverser.traverseSubLevel(boost::bind( + &CSeasonalTimeStateSerializer::acceptRestoreTraverser, + boost::ref(m_Time), _1))) RESTORE_BUILT_IN(INITIAL_TIME_OLD_TAG, initialTime) - RESTORE_SETUP_TEARDOWN(REGRESSION_OLD_TAG, - TRegression regression, - traverser.traverseSubLevel(boost::bind(&TRegression::acceptRestoreTraverser, ®ression, _1)), - regressions.push_back(regression)) - RESTORE(VARIANCES_OLD_TAG, core::CPersistUtils::fromString(traverser.value(), variances)) - RESTORE(LAST_UPDATES_OLD_TAG, core::CPersistUtils::fromString(traverser.value(), lastUpdates)) + RESTORE_SETUP_TEARDOWN( + REGRESSION_OLD_TAG, TRegression regression, + traverser.traverseSubLevel(boost::bind( + &TRegression::acceptRestoreTraverser, ®ression, _1)), + regressions.push_back(regression)) + RESTORE(VARIANCES_OLD_TAG, + core::CPersistUtils::fromString(traverser.value(), variances)) + RESTORE(LAST_UPDATES_OLD_TAG, + core::CPersistUtils::fromString(traverser.value(), lastUpdates)) } while (traverser.next()); m_Buckets.clear(); m_Buckets.reserve(regressions.size()); for (std::size_t i = 0u; i < regressions.size(); ++i) { - m_Buckets.emplace_back(regressions[i], variances[i], initialTime, lastUpdates[i]); + m_Buckets.emplace_back(regressions[i], variances[i], initialTime, + lastUpdates[i]); } } @@ -411,10 +440,12 @@ void CSeasonalComponentAdaptiveBucketing::refresh(const TFloatVec& endpoints) { for (std::size_t i = 1u; i < n; ++i) { double yl{m_Endpoints[i - 1]}; double yr{m_Endpoints[i]}; - std::size_t r = std::lower_bound(endpoints.begin(), endpoints.end(), yr) - endpoints.begin(); + std::size_t r = std::lower_bound(endpoints.begin(), endpoints.end(), yr) - + endpoints.begin(); r = CTools::truncate(r, std::size_t(1), n - 1); - std::size_t l = std::upper_bound(endpoints.begin(), endpoints.end(), yl) - endpoints.begin(); + std::size_t l = std::upper_bound(endpoints.begin(), endpoints.end(), yl) - + endpoints.begin(); l = CTools::truncate(l, std::size_t(1), r); LOG_TRACE(<< "interval = [" << yl << "," << yr << "]"); @@ -427,8 +458,10 @@ void CSeasonalComponentAdaptiveBucketing::refresh(const TFloatVec& endpoints) { double interval{m_Endpoints[i] - m_Endpoints[i - 1]}; double w{CTools::truncate(interval / (xr - xl), 0.0, 1.0)}; const SBucket& bucket{m_Buckets[l - 1]}; - buckets.emplace_back(bucket.s_Regression.scaled(w * w), bucket.s_Variance, bucket.s_FirstUpdate, bucket.s_LastUpdate); - centres.push_back(CTools::truncate(static_cast(m_Centres[l - 1]), yl, yr)); + buckets.emplace_back(bucket.s_Regression.scaled(w * w), bucket.s_Variance, + bucket.s_FirstUpdate, bucket.s_LastUpdate); + centres.push_back( + CTools::truncate(static_cast(m_Centres[l - 1]), yl, yr)); } else { double interval{xr - m_Endpoints[i - 1]}; double w{CTools::truncate(interval / (xr - xl), 0.0, 1.0)}; @@ -437,20 +470,23 @@ void CSeasonalComponentAdaptiveBucketing::refresh(const TFloatVec& endpoints) { TMinAccumulator lastUpdate; TDoubleRegression regression{bucket->s_Regression.scaled(w)}; TDoubleMeanVarAccumulator variance{CBasicStatistics::accumulator( - w * bucket->s_Regression.count(), bucket->s_Regression.mean(), static_cast(bucket->s_Variance))}; + w * bucket->s_Regression.count(), bucket->s_Regression.mean(), + static_cast(bucket->s_Variance))}; firstUpdate.add(bucket->s_FirstUpdate); lastUpdate.add(bucket->s_LastUpdate); - TDoubleMeanAccumulator centre{ - CBasicStatistics::accumulator(w * bucket->s_Regression.count(), static_cast(m_Centres[l - 1]))}; + TDoubleMeanAccumulator centre{CBasicStatistics::accumulator( + w * bucket->s_Regression.count(), static_cast(m_Centres[l - 1]))}; double count{w * w * bucket->s_Regression.count()}; while (++l < r) { bucket = &m_Buckets[l - 1]; regression += bucket->s_Regression; variance += CBasicStatistics::accumulator( - bucket->s_Regression.count(), bucket->s_Regression.mean(), static_cast(bucket->s_Variance)); + bucket->s_Regression.count(), bucket->s_Regression.mean(), + static_cast(bucket->s_Variance)); firstUpdate.add(bucket->s_FirstUpdate); lastUpdate.add(bucket->s_LastUpdate); - centre += CBasicStatistics::accumulator(bucket->s_Regression.count(), static_cast(m_Centres[l - 1])); + centre += CBasicStatistics::accumulator( + bucket->s_Regression.count(), static_cast(m_Centres[l - 1])); count += bucket->s_Regression.count(); } xl = endpoints[l - 1]; @@ -460,14 +496,17 @@ void CSeasonalComponentAdaptiveBucketing::refresh(const TFloatVec& endpoints) { w = CTools::truncate(interval / (xr - xl), 0.0, 1.0); regression += bucket->s_Regression.scaled(w); variance += CBasicStatistics::accumulator( - w * bucket->s_Regression.count(), bucket->s_Regression.mean(), static_cast(bucket->s_Variance)); + w * bucket->s_Regression.count(), bucket->s_Regression.mean(), + static_cast(bucket->s_Variance)); firstUpdate.add(bucket->s_FirstUpdate); lastUpdate.add(bucket->s_LastUpdate); - centre += CBasicStatistics::accumulator(w * bucket->s_Regression.count(), static_cast(m_Centres[l - 1])); + centre += CBasicStatistics::accumulator( + w * bucket->s_Regression.count(), static_cast(m_Centres[l - 1])); count += w * w * bucket->s_Regression.count(); double scale{count == regression.count() ? 1.0 : count / regression.count()}; - buckets.emplace_back( - regression.scaled(scale), CBasicStatistics::maximumLikelihoodVariance(variance), firstUpdate[0], lastUpdate[0]); + buckets.emplace_back(regression.scaled(scale), + CBasicStatistics::maximumLikelihoodVariance(variance), + firstUpdate[0], lastUpdate[0]); centres.push_back(CTools::truncate(CBasicStatistics::mean(centre), yl, yr)); } } @@ -500,12 +539,15 @@ bool CSeasonalComponentAdaptiveBucketing::inWindow(core_t::TTime time) const { return m_Time->inWindow(time); } -void CSeasonalComponentAdaptiveBucketing::add(std::size_t bucket, core_t::TTime time, double value, double weight) { +void CSeasonalComponentAdaptiveBucketing::add(std::size_t bucket, + core_t::TTime time, + double value, + double weight) { SBucket& bucket_{m_Buckets[bucket]}; TRegression& regression{bucket_.s_Regression}; CFloatStorage& variance{bucket_.s_Variance}; - TDoubleMeanVarAccumulator variance_{ - CBasicStatistics::accumulator(regression.count(), regression.mean(), static_cast(variance))}; + TDoubleMeanVarAccumulator variance_{CBasicStatistics::accumulator( + regression.count(), regression.mean(), static_cast(variance))}; variance_.add(value, weight); regression.add(m_Time->regression(time), value, weight); variance = CBasicStatistics::maximumLikelihoodVariance(variance_); @@ -519,7 +561,9 @@ double CSeasonalComponentAdaptiveBucketing::count(std::size_t bucket) const { return m_Buckets[bucket].s_Regression.count(); } -double CSeasonalComponentAdaptiveBucketing::predict(std::size_t bucket, core_t::TTime time, double offset) const { +double CSeasonalComponentAdaptiveBucketing::predict(std::size_t bucket, + core_t::TTime time, + double offset) const { const SBucket& bucket_{m_Buckets[bucket]}; core_t::TTime firstUpdate{bucket_.s_FirstUpdate}; core_t::TTime lastUpdate{bucket_.s_LastUpdate}; @@ -532,7 +576,8 @@ double CSeasonalComponentAdaptiveBucketing::predict(std::size_t bucket, core_t:: double t{m_Time->regression(time + static_cast(offset + 0.5))}; - double extrapolateInterval{static_cast(CBasicStatistics::max(time - lastUpdate, firstUpdate - time, core_t::TTime(0)))}; + double extrapolateInterval{static_cast(CBasicStatistics::max( + time - lastUpdate, firstUpdate - time, core_t::TTime(0)))}; if (extrapolateInterval == 0.0) { return regression.predict(t); } @@ -550,14 +595,16 @@ double CSeasonalComponentAdaptiveBucketing::variance(std::size_t bucket) const { double CSeasonalComponentAdaptiveBucketing::observedInterval(core_t::TTime time) const { return m_Time->regressionInterval( - std::min_element(m_Buckets.begin(), - m_Buckets.end(), - [](const SBucket& lhs, const SBucket& rhs) { return lhs.s_FirstUpdate < rhs.s_FirstUpdate; }) + std::min_element(m_Buckets.begin(), m_Buckets.end(), + [](const SBucket& lhs, const SBucket& rhs) { + return lhs.s_FirstUpdate < rhs.s_FirstUpdate; + }) ->s_FirstUpdate, time); } -CSeasonalComponentAdaptiveBucketing::SBucket::SBucket() : s_Variance{0.0}, s_FirstUpdate{UNSET_TIME}, s_LastUpdate{UNSET_TIME} { +CSeasonalComponentAdaptiveBucketing::SBucket::SBucket() + : s_Variance{0.0}, s_FirstUpdate{UNSET_TIME}, s_LastUpdate{UNSET_TIME} { } CSeasonalComponentAdaptiveBucketing::SBucket::SBucket(const TRegression& regression, @@ -570,7 +617,9 @@ CSeasonalComponentAdaptiveBucketing::SBucket::SBucket(const TRegression& regress bool CSeasonalComponentAdaptiveBucketing::SBucket::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { do { const std::string& name{traverser.name()}; - RESTORE(REGRESSION_6_3_TAG, traverser.traverseSubLevel(boost::bind(&TRegression::acceptRestoreTraverser, &s_Regression, _1))) + RESTORE(REGRESSION_6_3_TAG, + traverser.traverseSubLevel(boost::bind( + &TRegression::acceptRestoreTraverser, &s_Regression, _1))) RESTORE(VARIANCE_6_3_TAG, s_Variance.fromString(traverser.value())) RESTORE_BUILT_IN(FIRST_UPDATE_6_3_TAG, s_FirstUpdate) RESTORE_BUILT_IN(LAST_UPDATE_6_3_TAG, s_LastUpdate) @@ -578,8 +627,10 @@ bool CSeasonalComponentAdaptiveBucketing::SBucket::acceptRestoreTraverser(core:: return true; } -void CSeasonalComponentAdaptiveBucketing::SBucket::acceptPersistInserter(core::CStatePersistInserter& inserter) const { - inserter.insertLevel(REGRESSION_6_3_TAG, boost::bind(&TRegression::acceptPersistInserter, &s_Regression, _1)); +void CSeasonalComponentAdaptiveBucketing::SBucket::acceptPersistInserter( + core::CStatePersistInserter& inserter) const { + inserter.insertLevel(REGRESSION_6_3_TAG, boost::bind(&TRegression::acceptPersistInserter, + &s_Regression, _1)); inserter.insertValue(VARIANCE_6_3_TAG, s_Variance.toString()); inserter.insertValue(FIRST_UPDATE_6_3_TAG, s_FirstUpdate); inserter.insertValue(LAST_UPDATE_6_3_TAG, s_LastUpdate); diff --git a/lib/maths/CSeasonalTime.cc b/lib/maths/CSeasonalTime.cc index cfa74b6328..662e2afa8a 100644 --- a/lib/maths/CSeasonalTime.cc +++ b/lib/maths/CSeasonalTime.cc @@ -31,14 +31,17 @@ const std::string ARBITRARY_PERIOD_TIME_TAG("b"); //////// CSeasonalTime //////// -CSeasonalTime::CSeasonalTime() : m_Period(0), m_RegressionOrigin(0), m_Precedence(0) { +CSeasonalTime::CSeasonalTime() + : m_Period(0), m_RegressionOrigin(0), m_Precedence(0) { } -CSeasonalTime::CSeasonalTime(core_t::TTime period, double precedence) : m_Period(period), m_RegressionOrigin(0), m_Precedence(precedence) { +CSeasonalTime::CSeasonalTime(core_t::TTime period, double precedence) + : m_Period(period), m_RegressionOrigin(0), m_Precedence(precedence) { } bool CSeasonalTime::operator<(const CSeasonalTime& rhs) const { - return COrderings::lexicographical_compare(m_Period, -m_Precedence, rhs.m_Period, -rhs.m_Precedence); + return COrderings::lexicographical_compare(m_Period, -m_Precedence, + rhs.m_Period, -rhs.m_Precedence); } double CSeasonalTime::periodic(core_t::TTime time) const { @@ -46,11 +49,13 @@ double CSeasonalTime::periodic(core_t::TTime time) const { } double CSeasonalTime::regression(core_t::TTime time) const { - return static_cast(time - m_RegressionOrigin) / static_cast(this->regressionTimeScale()); + return static_cast(time - m_RegressionOrigin) / + static_cast(this->regressionTimeScale()); } double CSeasonalTime::regressionInterval(core_t::TTime start, core_t::TTime end) const { - return static_cast(end - start) / static_cast(this->regressionTimeScale()); + return static_cast(end - start) / + static_cast(this->regressionTimeScale()); } core_t::TTime CSeasonalTime::startOfWindowRepeat(core_t::TTime time) const { @@ -95,20 +100,24 @@ bool CSeasonalTime::windowed() const { } double CSeasonalTime::fractionInWindow() const { - return static_cast(std::max(this->period(), this->windowLength())) / static_cast(this->windowRepeat()); + return static_cast(std::max(this->period(), this->windowLength())) / + static_cast(this->windowRepeat()); } bool CSeasonalTime::excludes(const CSeasonalTime& other) const { - return std::abs(other.m_Period - m_Period) < std::max(other.m_Period, m_Period) / 20 && m_Precedence >= other.m_Precedence; + return std::abs(other.m_Period - m_Period) < std::max(other.m_Period, m_Period) / 20 && + m_Precedence >= other.m_Precedence; } -core_t::TTime CSeasonalTime::startOfWindowRepeat(core_t::TTime offset, core_t::TTime time) const { +core_t::TTime CSeasonalTime::startOfWindowRepeat(core_t::TTime offset, + core_t::TTime time) const { return offset + CIntegerTools::floor(time - offset, this->windowRepeat()); } //////// CDiurnalTime //////// -CDiurnalTime::CDiurnalTime() : m_StartOfWeek(0), m_WindowStart(0), m_WindowEnd(0) { +CDiurnalTime::CDiurnalTime() + : m_StartOfWeek(0), m_WindowStart(0), m_WindowEnd(0) { } CDiurnalTime::CDiurnalTime(core_t::TTime startOfWeek, @@ -116,7 +125,8 @@ CDiurnalTime::CDiurnalTime(core_t::TTime startOfWeek, core_t::TTime windowEnd, core_t::TTime period, double precedence) - : CSeasonalTime(period, precedence), m_StartOfWeek(startOfWeek), m_WindowStart(windowStart), m_WindowEnd(windowEnd) { + : CSeasonalTime(period, precedence), m_StartOfWeek(startOfWeek), + m_WindowStart(windowStart), m_WindowEnd(windowEnd) { } CDiurnalTime* CDiurnalTime::clone() const { @@ -163,7 +173,8 @@ core_t::TTime CDiurnalTime::windowEnd() const { } bool CDiurnalTime::hasWeekend() const { - return this->windowLength() == core::constants::WEEKEND || this->windowLength() == core::constants::WEEKDAYS; + return this->windowLength() == core::constants::WEEKEND || + this->windowLength() == core::constants::WEEKDAYS; } uint64_t CDiurnalTime::checksum(uint64_t seed) const { @@ -179,7 +190,8 @@ core_t::TTime CDiurnalTime::regressionTimeScale() const { //////// CGeneralPeriodTime //////// -CGeneralPeriodTime::CGeneralPeriodTime(core_t::TTime period, double precedence) : CSeasonalTime(period, precedence) { +CGeneralPeriodTime::CGeneralPeriodTime(core_t::TTime period, double precedence) + : CSeasonalTime(period, precedence) { } CGeneralPeriodTime* CGeneralPeriodTime::clone() const { @@ -233,7 +245,8 @@ core_t::TTime CGeneralPeriodTime::regressionTimeScale() const { //////// CSeasonalTimeStateSerializer //////// -bool CSeasonalTimeStateSerializer::acceptRestoreTraverser(TSeasonalTimePtr& result, core::CStateRestoreTraverser& traverser) { +bool CSeasonalTimeStateSerializer::acceptRestoreTraverser(TSeasonalTimePtr& result, + core::CStateRestoreTraverser& traverser) { std::size_t numResults = 0; do { @@ -261,7 +274,8 @@ bool CSeasonalTimeStateSerializer::acceptRestoreTraverser(TSeasonalTimePtr& resu return true; } -void CSeasonalTimeStateSerializer::acceptPersistInserter(const CSeasonalTime& time, core::CStatePersistInserter& inserter) { +void CSeasonalTimeStateSerializer::acceptPersistInserter(const CSeasonalTime& time, + core::CStatePersistInserter& inserter) { if (dynamic_cast(&time) != nullptr) { inserter.insertValue(DIURNAL_TIME_TAG, time.toString()); } else if (dynamic_cast(&time) != nullptr) { diff --git a/lib/maths/CSignal.cc b/lib/maths/CSignal.cc index 7d877d095f..ee01f03a40 100644 --- a/lib/maths/CSignal.cc +++ b/lib/maths/CSignal.cc @@ -50,7 +50,8 @@ void radix2fft(TComplexVec& f) { for (std::size_t stride = 1; stride < f.size(); stride <<= 1) { for (std::size_t k = 0u; k < stride; ++k) { - double t = boost::math::double_constants::pi * static_cast(k) / static_cast(stride); + double t = boost::math::double_constants::pi * + static_cast(k) / static_cast(stride); TComplex w(std::cos(t), std::sin(t)); for (std::size_t start = k; start < f.size(); start += 2 * stride) { TComplex fs = f[start]; @@ -105,7 +106,8 @@ void CSignal::fft(TComplexVec& f) { a[0] = f[0] * chirp[0]; b[0] = chirp[0]; for (std::size_t i = 1u; i < n; ++i) { - double t = boost::math::double_constants::pi * static_cast(i * i) / static_cast(n); + double t = boost::math::double_constants::pi * + static_cast(i * i) / static_cast(n); chirp.emplace_back(std::cos(t), std::sin(t)); a[i] = f[i] * std::conj(chirp[i]); b[i] = b[m - i] = chirp[i]; @@ -151,7 +153,8 @@ double CSignal::autocorrelation(std::size_t offset, TFloatMeanAccumulatorCRng va double ni = CBasicStatistics::count(values[i]); double nj = CBasicStatistics::count(values[j]); if (ni > 0.0 && nj > 0.0) { - autocorrelation.add((CBasicStatistics::mean(values[i]) - mean) * (CBasicStatistics::mean(values[j]) - mean)); + autocorrelation.add((CBasicStatistics::mean(values[i]) - mean) * + (CBasicStatistics::mean(values[j]) - mean)); } } @@ -192,7 +195,8 @@ void CSignal::autocorrelations(const TFloatMeanAccumulatorVec& values, TDoubleVe f.resize(j - 1, TComplex(0.0, 0.0)); } else { for (std::size_t k = i; k < j; ++k) { - double alpha = static_cast(k - i + 1) / static_cast(j - i + 1); + double alpha = static_cast(k - i + 1) / + static_cast(j - i + 1); double real = CBasicStatistics::mean(values[j]) - mean; f.push_back((1.0 - alpha) * f[i - 1] + alpha * TComplex(real, 0.0)); } diff --git a/lib/maths/CSpline.cc b/lib/maths/CSpline.cc index bce86c43ec..73673dbd67 100644 --- a/lib/maths/CSpline.cc +++ b/lib/maths/CSpline.cc @@ -20,17 +20,20 @@ namespace { bool checkTridiagonal(const TDoubleVec& a, const TDoubleVec& b, const TDoubleVec& c, const TDoubleVec& x) { if (a.size() + 1 != b.size()) { LOG_ERROR(<< "Lower diagonal and main diagonal inconsistent:" - << " a = " << core::CContainerPrinter::print(a) << " b = " << core::CContainerPrinter::print(b)); + << " a = " << core::CContainerPrinter::print(a) + << " b = " << core::CContainerPrinter::print(b)); return false; } if (c.size() + 1 != b.size()) { LOG_ERROR(<< "Upper diagonal and main diagonal inconsistent:" - << " b = " << core::CContainerPrinter::print(b) << " c = " << core::CContainerPrinter::print(c)); + << " b = " << core::CContainerPrinter::print(b) + << " c = " << core::CContainerPrinter::print(c)); return false; } if (b.size() != x.size()) { LOG_ERROR(<< "Dimension mismatch:" - << " x = " << core::CContainerPrinter::print(x) << ", b = " << core::CContainerPrinter::print(b)) + << " x = " << core::CContainerPrinter::print(x) + << ", b = " << core::CContainerPrinter::print(b)) return false; } return true; @@ -90,7 +93,12 @@ bool solveTridiagonal(const TDoubleVec& a, const TDoubleVec& b, TDoubleVec& c, T return true; } -bool solvePeturbedTridiagonal(const TDoubleVec& a, const TDoubleVec& b, TDoubleVec& c, TDoubleVec& u, const TDoubleVec& v, TDoubleVec& x) { +bool solvePeturbedTridiagonal(const TDoubleVec& a, + const TDoubleVec& b, + TDoubleVec& c, + TDoubleVec& u, + const TDoubleVec& v, + TDoubleVec& x) { if (!checkTridiagonal(a, b, c, x)) { return false; } diff --git a/lib/maths/CStatisticalTests.cc b/lib/maths/CStatisticalTests.cc index b4dd81dc34..9540622ebc 100644 --- a/lib/maths/CStatisticalTests.cc +++ b/lib/maths/CStatisticalTests.cc @@ -77,7 +77,8 @@ double CStatisticalTests::leftTailFTest(double x, double d1, double d2) { boost::math::fisher_f_distribution<> F(d1, d2); return boost::math::cdf(F, x); } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to compute significance " << e.what() << " d1 = " << d1 << ", d2 = " << d2 << ", x = " << x); + LOG_ERROR(<< "Failed to compute significance " << e.what() + << " d1 = " << d1 << ", d2 = " << d2 << ", x = " << x); } return 1.0; } @@ -93,7 +94,8 @@ double CStatisticalTests::rightTailFTest(double x, double d1, double d2) { boost::math::fisher_f_distribution<> F(d1, d2); return boost::math::cdf(boost::math::complement(F, x)); } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to compute significance " << e.what() << " d1 = " << d1 << ", d2 = " << d2 << ", x = " << x); + LOG_ERROR(<< "Failed to compute significance " << e.what() + << " d1 = " << d1 << ", d2 = " << d2 << ", x = " << x); } return 1.0; } @@ -127,18 +129,22 @@ double CStatisticalTests::twoSampleKS(TDoubleVec x, TDoubleVec y) { D = std::max(D, std::fabs(Fx - Fy)); } - double neff = std::sqrt(static_cast(nx) * static_cast(ny) / static_cast(nx + ny)); + double neff = std::sqrt(static_cast(nx) * static_cast(ny) / + static_cast(nx + ny)); double result = significance((neff + 0.12 + 0.11 / neff) * D); - LOG_TRACE(<< "nx = " << nx << ", ny = " << ny << ", D = " << D << ", significance = " << result); + LOG_TRACE(<< "nx = " << nx << ", ny = " << ny << ", D = " << D + << ", significance = " << result); return result; } -CStatisticalTests::CCramerVonMises::CCramerVonMises(std::size_t size) : m_Size(CTools::truncate(size, N[0] - 1, N[12] - 1)) { +CStatisticalTests::CCramerVonMises::CCramerVonMises(std::size_t size) + : m_Size(CTools::truncate(size, N[0] - 1, N[12] - 1)) { m_F.reserve(size); } CStatisticalTests::CCramerVonMises::CCramerVonMises(core::CStateRestoreTraverser& traverser) { - traverser.traverseSubLevel(boost::bind(&CStatisticalTests::CCramerVonMises::acceptRestoreTraverser, this, _1)); + traverser.traverseSubLevel(boost::bind( + &CStatisticalTests::CCramerVonMises::acceptRestoreTraverser, this, _1)); } bool CStatisticalTests::CCramerVonMises::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { @@ -149,8 +155,9 @@ bool CStatisticalTests::CCramerVonMises::acceptRestoreTraverser(core::CStateRest core::CStringUtils::stringToType(traverser.value(), m_Size), m_F.reserve(m_Size)) RESTORE(T_TAG, m_T.fromDelimited(traverser.value())) - RESTORE_SETUP_TEARDOWN( - F_TAG, int f, core::CStringUtils::stringToType(traverser.value(), f), m_F.push_back(static_cast(f))) + RESTORE_SETUP_TEARDOWN(F_TAG, int f, + core::CStringUtils::stringToType(traverser.value(), f), + m_F.push_back(static_cast(f))) } while (traverser.next()); return true; @@ -196,8 +203,11 @@ double CStatisticalTests::CCramerVonMises::pValue() const { // Linearly interpolate between the rows of the T statistic // values. double tt[16]; - ptrdiff_t row = CTools::truncate(std::lower_bound(boost::begin(N), boost::end(N), m_Size + 1) - N, ptrdiff_t(1), ptrdiff_t(12)); - double alpha = static_cast(m_Size + 1 - N[row - 1]) / static_cast(N[row] - N[row - 1]); + ptrdiff_t row = CTools::truncate( + std::lower_bound(boost::begin(N), boost::end(N), m_Size + 1) - N, + ptrdiff_t(1), ptrdiff_t(12)); + double alpha = static_cast(m_Size + 1 - N[row - 1]) / + static_cast(N[row] - N[row - 1]); double beta = 1.0 - alpha; for (std::size_t i = 0u; i < 16; ++i) { tt[i] = alpha * T_VALUES[row][i] + beta * T_VALUES[row - 1][i]; @@ -211,7 +221,9 @@ double CStatisticalTests::CCramerVonMises::pValue() const { return 1.0; } - ptrdiff_t col = CTools::truncate(std::lower_bound(boost::begin(tt), boost::end(tt), t) - tt, ptrdiff_t(1), ptrdiff_t(15)); + ptrdiff_t col = + CTools::truncate(std::lower_bound(boost::begin(tt), boost::end(tt), t) - tt, + ptrdiff_t(1), ptrdiff_t(15)); double a = tt[col - 1]; double b = tt[col]; double fa = P_VALUES[col - 1]; @@ -258,217 +270,38 @@ uint64_t CStatisticalTests::CCramerVonMises::checksum(uint64_t seed) const { return CChecksum::calculate(seed, m_F); } -const double CStatisticalTests::CCramerVonMises::P_VALUES[16] = - {0.01, 0.025, 0.05, 0.1, 0.15, 0.2, 0.25, 0.5, 0.75, 0.8, 0.85, 0.9, 0.95, 0.975, 0.99, 0.999}; -const std::size_t CStatisticalTests::CCramerVonMises::N[13] = {2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 50, 200, 1000}; -const double CStatisticalTests::CCramerVonMises::T_VALUES[13][16] = {{0.04326, - 0.04565, - 0.04962, - 0.05758, - 0.06554, - 0.07350, - 0.08146, - 0.12659, - 0.21522, - 0.24743, - 0.28854, - 0.34343, - 0.42480, - 0.48901, - 0.55058, - 0.62858}, - {0.03319, - 0.03774, - 0.04360, - 0.05205, - 0.06091, - 0.06887, - 0.07683, - 0.12542, - 0.21338, - 0.24167, - 0.27960, - 0.33785, - 0.43939, - 0.53318, - 0.63980, - 0.82240}, - {0.03002, - 0.03536, - 0.04149, - 0.05093, - 0.05896, - 0.06681, - 0.07493, - 0.12406, - 0.21171, - 0.24260, - 0.28336, - 0.34184, - 0.44206, - 0.54200, - 0.67017, - 0.92970}, - {0.02869, - 0.03422, - 0.04036, - 0.04969, - 0.05800, - 0.06610, - 0.07427, - 0.12250, - 0.21164, - 0.24237, - 0.28305, - 0.34238, - 0.44697, - 0.55056, - 0.68352, - 0.98730}, - {0.02796, - 0.03344, - 0.03959, - 0.04911, - 0.05747, - 0.06548, - 0.07351, - 0.12200, - 0.21110, - 0.24198, - 0.28331, - 0.34352, - 0.44911, - 0.55572, - 0.69443, - 1.02000}, - {0.02741, - 0.03292, - 0.03914, - 0.04869, - 0.05698, - 0.06492, - 0.07297, - 0.12158, - 0.21087, - 0.24197, - 0.28345, - 0.34397, - 0.45100, - 0.55935, - 0.70154, - 1.04250}, - {0.02702, - 0.03257, - 0.03875, - 0.04823, - 0.05650, - 0.06448, - 0.07254, - 0.12113, - 0.21065, - 0.24186, - 0.28356, - 0.34458, - 0.45240, - 0.56220, - 0.70720, - 1.05910}, - {0.02679, - 0.03230, - 0.03850, - 0.04798, - 0.05625, - 0.06423, - 0.07228, - 0.12088, - 0.21051, - 0.24179, - 0.28361, - 0.34487, - 0.45367, - 0.56493, - 0.71233, - 1.07220}, - {0.02657, - 0.03209, - 0.03830, - 0.04778, - 0.05605, - 0.06403, - 0.07208, - 0.12068, - 0.21040, - 0.24173, - 0.28365, - 0.34510, - 0.45441, - 0.56643, - 0.71531, - 1.08220}, - {0.02564, - 0.03120, - 0.03742, - 0.04689, - 0.05515, - 0.06312, - 0.07117, - 0.11978, - 0.20989, - 0.24148, - 0.28384, - 0.34617, - 0.45778, - 0.57331, - 0.72895, - 1.11898}, - {0.02512, - 0.03068, - 0.03690, - 0.04636, - 0.05462, - 0.06258, - 0.07062, - 0.11924, - 0.20958, - 0.24132, - 0.28396, - 0.34682, - 0.45986, - 0.57754, - 0.73728, - 1.14507}, - {0.02488, - 0.03043, - 0.03665, - 0.04610, - 0.05435, - 0.06231, - 0.07035, - 0.11897, - 0.20943, - 0.24125, - 0.28402, - 0.34715, - 0.46091, - 0.57968, - 0.74149, - 1.15783}, - {0.02481, - 0.03037, - 0.03658, - 0.04603, - 0.05428, - 0.06224, - 0.07027, - 0.11889, - 0.20938, - 0.24123, - 0.28403, - 0.34724, - 0.46119, - 0.58026, - 0.74262, - 1.16120}}; +const double CStatisticalTests::CCramerVonMises::P_VALUES[16] = { + 0.01, 0.025, 0.05, 0.1, 0.15, 0.2, 0.25, 0.5, + 0.75, 0.8, 0.85, 0.9, 0.95, 0.975, 0.99, 0.999}; +const std::size_t CStatisticalTests::CCramerVonMises::N[13] = { + 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 50, 200, 1000}; +const double CStatisticalTests::CCramerVonMises::T_VALUES[13][16] = { + {0.04326, 0.04565, 0.04962, 0.05758, 0.06554, 0.07350, 0.08146, 0.12659, + 0.21522, 0.24743, 0.28854, 0.34343, 0.42480, 0.48901, 0.55058, 0.62858}, + {0.03319, 0.03774, 0.04360, 0.05205, 0.06091, 0.06887, 0.07683, 0.12542, + 0.21338, 0.24167, 0.27960, 0.33785, 0.43939, 0.53318, 0.63980, 0.82240}, + {0.03002, 0.03536, 0.04149, 0.05093, 0.05896, 0.06681, 0.07493, 0.12406, + 0.21171, 0.24260, 0.28336, 0.34184, 0.44206, 0.54200, 0.67017, 0.92970}, + {0.02869, 0.03422, 0.04036, 0.04969, 0.05800, 0.06610, 0.07427, 0.12250, + 0.21164, 0.24237, 0.28305, 0.34238, 0.44697, 0.55056, 0.68352, 0.98730}, + {0.02796, 0.03344, 0.03959, 0.04911, 0.05747, 0.06548, 0.07351, 0.12200, + 0.21110, 0.24198, 0.28331, 0.34352, 0.44911, 0.55572, 0.69443, 1.02000}, + {0.02741, 0.03292, 0.03914, 0.04869, 0.05698, 0.06492, 0.07297, 0.12158, + 0.21087, 0.24197, 0.28345, 0.34397, 0.45100, 0.55935, 0.70154, 1.04250}, + {0.02702, 0.03257, 0.03875, 0.04823, 0.05650, 0.06448, 0.07254, 0.12113, + 0.21065, 0.24186, 0.28356, 0.34458, 0.45240, 0.56220, 0.70720, 1.05910}, + {0.02679, 0.03230, 0.03850, 0.04798, 0.05625, 0.06423, 0.07228, 0.12088, + 0.21051, 0.24179, 0.28361, 0.34487, 0.45367, 0.56493, 0.71233, 1.07220}, + {0.02657, 0.03209, 0.03830, 0.04778, 0.05605, 0.06403, 0.07208, 0.12068, + 0.21040, 0.24173, 0.28365, 0.34510, 0.45441, 0.56643, 0.71531, 1.08220}, + {0.02564, 0.03120, 0.03742, 0.04689, 0.05515, 0.06312, 0.07117, 0.11978, + 0.20989, 0.24148, 0.28384, 0.34617, 0.45778, 0.57331, 0.72895, 1.11898}, + {0.02512, 0.03068, 0.03690, 0.04636, 0.05462, 0.06258, 0.07062, 0.11924, + 0.20958, 0.24132, 0.28396, 0.34682, 0.45986, 0.57754, 0.73728, 1.14507}, + {0.02488, 0.03043, 0.03665, 0.04610, 0.05435, 0.06231, 0.07035, 0.11897, + 0.20943, 0.24125, 0.28402, 0.34715, 0.46091, 0.57968, 0.74149, 1.15783}, + {0.02481, 0.03037, 0.03658, 0.04603, 0.05428, 0.06224, 0.07027, 0.11889, + 0.20938, 0.24123, 0.28403, 0.34724, 0.46119, 0.58026, 0.74262, 1.16120}}; const double CStatisticalTests::CCramerVonMises::SCALE(65536.0); } } diff --git a/lib/maths/CTimeSeriesChangeDetector.cc b/lib/maths/CTimeSeriesChangeDetector.cc index a696bf7369..b9a28e39c6 100644 --- a/lib/maths/CTimeSeriesChangeDetector.cc +++ b/lib/maths/CTimeSeriesChangeDetector.cc @@ -81,27 +81,32 @@ std::string SChangeDescription::print() const { return result + core::CStringUtils::typeToString(s_Value[0]); } -CUnivariateTimeSeriesChangeDetector::CUnivariateTimeSeriesChangeDetector(const TDecompositionPtr& trendModel, - const TPriorPtr& residualModel, - core_t::TTime minimumTimeToDetect, - core_t::TTime maximumTimeToDetect, - double minimumDeltaBicToDetect) - : m_MinimumTimeToDetect{minimumTimeToDetect}, - m_MaximumTimeToDetect{maximumTimeToDetect}, - m_MinimumDeltaBicToDetect{minimumDeltaBicToDetect}, - m_SampleCount{0}, - m_CurrentEvidenceOfChange{0.0}, - m_ChangeModels{boost::make_shared(trendModel, residualModel), - boost::make_shared(trendModel, residualModel), - boost::make_shared(trendModel, residualModel, -core::constants::HOUR), - boost::make_shared(trendModel, residualModel, +core::constants::HOUR)} { +CUnivariateTimeSeriesChangeDetector::CUnivariateTimeSeriesChangeDetector( + const TDecompositionPtr& trendModel, + const TPriorPtr& residualModel, + core_t::TTime minimumTimeToDetect, + core_t::TTime maximumTimeToDetect, + double minimumDeltaBicToDetect) + : m_MinimumTimeToDetect{minimumTimeToDetect}, m_MaximumTimeToDetect{maximumTimeToDetect}, + m_MinimumDeltaBicToDetect{minimumDeltaBicToDetect}, m_SampleCount{0}, m_CurrentEvidenceOfChange{0.0}, + m_ChangeModels{ + boost::make_shared(trendModel, residualModel), + boost::make_shared(trendModel, residualModel), + boost::make_shared(trendModel, + residualModel, + -core::constants::HOUR), + boost::make_shared(trendModel, + residualModel, + +core::constants::HOUR)} { if (trendModel->seasonalComponents().size() > 0) { - m_ChangeModels.push_back(boost::make_shared(trendModel, residualModel)); + m_ChangeModels.push_back(boost::make_shared( + trendModel, residualModel)); } } -bool CUnivariateTimeSeriesChangeDetector::acceptRestoreTraverser(const SModelRestoreParams& params, - core::CStateRestoreTraverser& traverser) { +bool CUnivariateTimeSeriesChangeDetector::acceptRestoreTraverser( + const SModelRestoreParams& params, + core::CStateRestoreTraverser& traverser) { auto model = m_ChangeModels.begin(); do { const std::string name{traverser.name()}; @@ -110,13 +115,15 @@ bool CUnivariateTimeSeriesChangeDetector::acceptRestoreTraverser(const SModelRes RESTORE_BUILT_IN(MINIMUM_DELTA_BIC_TO_DETECT, m_MinimumDeltaBicToDetect) RESTORE_BUILT_IN(SAMPLE_COUNT_TAG, m_SampleCount) RESTORE_BUILT_IN(CURRENT_EVIDENCE_OF_CHANGE, m_CurrentEvidenceOfChange) - RESTORE_SETUP_TEARDOWN( - MIN_TIME_TAG, core_t::TTime time, core::CStringUtils::stringToType(traverser.value(), time), m_TimeRange.add(time)) - RESTORE_SETUP_TEARDOWN( - MAX_TIME_TAG, core_t::TTime time, core::CStringUtils::stringToType(traverser.value(), time), m_TimeRange.add(time)) - RESTORE(CHANGE_MODEL_TAG, - traverser.traverseSubLevel( - boost::bind(&CUnivariateChangeModel::acceptRestoreTraverser, (model++)->get(), boost::cref(params), _1))) + RESTORE_SETUP_TEARDOWN(MIN_TIME_TAG, core_t::TTime time, + core::CStringUtils::stringToType(traverser.value(), time), + m_TimeRange.add(time)) + RESTORE_SETUP_TEARDOWN(MAX_TIME_TAG, core_t::TTime time, + core::CStringUtils::stringToType(traverser.value(), time), + m_TimeRange.add(time)) + RESTORE(CHANGE_MODEL_TAG, traverser.traverseSubLevel(boost::bind( + &CUnivariateChangeModel::acceptRestoreTraverser, + (model++)->get(), boost::cref(params), _1))) } while (traverser.next()); return true; } @@ -124,15 +131,18 @@ bool CUnivariateTimeSeriesChangeDetector::acceptRestoreTraverser(const SModelRes void CUnivariateTimeSeriesChangeDetector::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(MINIMUM_TIME_TO_DETECT, m_MinimumTimeToDetect); inserter.insertValue(MAXIMUM_TIME_TO_DETECT, m_MaximumTimeToDetect); - inserter.insertValue(MINIMUM_DELTA_BIC_TO_DETECT, m_MinimumDeltaBicToDetect, core::CIEEE754::E_SinglePrecision); + inserter.insertValue(MINIMUM_DELTA_BIC_TO_DETECT, m_MinimumDeltaBicToDetect, + core::CIEEE754::E_SinglePrecision); inserter.insertValue(SAMPLE_COUNT_TAG, m_SampleCount); - inserter.insertValue(CURRENT_EVIDENCE_OF_CHANGE, m_CurrentEvidenceOfChange, core::CIEEE754::E_SinglePrecision); + inserter.insertValue(CURRENT_EVIDENCE_OF_CHANGE, m_CurrentEvidenceOfChange, + core::CIEEE754::E_SinglePrecision); if (m_TimeRange.initialized()) { inserter.insertValue(MIN_TIME_TAG, m_TimeRange.min()); inserter.insertValue(MAX_TIME_TAG, m_TimeRange.max()); } for (const auto& model : m_ChangeModels) { - inserter.insertLevel(CHANGE_MODEL_TAG, boost::bind(&CUnivariateChangeModel::acceptPersistInserter, model.get(), _1)); + inserter.insertLevel(CHANGE_MODEL_TAG, boost::bind(&CUnivariateChangeModel::acceptPersistInserter, + model.get(), _1)); } } @@ -145,7 +155,8 @@ TOptionalChangeDescription CUnivariateTimeSeriesChangeDetector::change() { return m_ChangeModels[candidate]->change(); } - m_CurrentEvidenceOfChange = m_ChangeModels[0]->bic() - m_ChangeModels[candidate]->bic(); + m_CurrentEvidenceOfChange = m_ChangeModels[0]->bic() - + m_ChangeModels[candidate]->bic(); } return TOptionalChangeDescription(); } @@ -153,7 +164,8 @@ TOptionalChangeDescription CUnivariateTimeSeriesChangeDetector::change() { double CUnivariateTimeSeriesChangeDetector::decisionFunction(std::size_t& change) const { using TChangeModelPtr5VecCItr = TChangeModelPtr5Vec::const_iterator; using TDoubleChangeModelPtr5VecCItrPr = std::pair; - using TMinAccumulator = CBasicStatistics::COrderStatisticsStack; + using TMinAccumulator = + CBasicStatistics::COrderStatisticsStack; if (m_SampleCount <= COUNT_TO_INITIALIZE) { return 0.0; @@ -166,7 +178,8 @@ double CUnivariateTimeSeriesChangeDetector::decisionFunction(std::size_t& change } candidates.sort(); - double evidences[]{noChangeBic - candidates[0].first, noChangeBic - candidates[1].first}; + double evidences[]{noChangeBic - candidates[0].first, + noChangeBic - candidates[1].first}; double expectedEvidence{noChangeBic - (*candidates[0].second)->expectedBic()}; double x[]{evidences[0] / m_MinimumDeltaBicToDetect, @@ -174,9 +187,12 @@ double CUnivariateTimeSeriesChangeDetector::decisionFunction(std::size_t& change evidences[0] / EXPECTED_EVIDENCE_THRESHOLD_MULTIPLIER / expectedEvidence, static_cast(m_TimeRange.range() - m_MinimumTimeToDetect) / static_cast(m_MaximumTimeToDetect - m_MinimumTimeToDetect)}; - double p{CTools::logisticFunction(x[0], 0.05, 1.0) * CTools::logisticFunction(x[1], 0.1, 1.0) * - (x[2] < 0.0 ? 1.0 : CTools::logisticFunction(x[2], 0.2, 1.0)) * CTools::logisticFunction(x[3], 0.2, 0.5)}; - LOG_TRACE("p(" << (*candidates[0].second)->change()->print() << ") = " << p << " | x = " << core::CContainerPrinter::print(x)); + double p{CTools::logisticFunction(x[0], 0.05, 1.0) * + CTools::logisticFunction(x[1], 0.1, 1.0) * + (x[2] < 0.0 ? 1.0 : CTools::logisticFunction(x[2], 0.2, 1.0)) * + CTools::logisticFunction(x[3], 0.2, 0.5)}; + LOG_TRACE("p(" << (*candidates[0].second)->change()->print() << ") = " << p + << " | x = " << core::CContainerPrinter::print(x)); change = candidates[0].second - m_ChangeModels.begin(); @@ -193,9 +209,11 @@ double CUnivariateTimeSeriesChangeDetector::decisionFunction(std::size_t& change bool CUnivariateTimeSeriesChangeDetector::stopTesting() const { core_t::TTime range{m_TimeRange.range()}; if (range > m_MinimumTimeToDetect) { - double scale{0.5 + CTools::logisticFunction(2.0 * m_CurrentEvidenceOfChange / m_MinimumDeltaBicToDetect, 0.2, 1.0)}; + double scale{0.5 + CTools::logisticFunction(2.0 * m_CurrentEvidenceOfChange / m_MinimumDeltaBicToDetect, + 0.2, 1.0)}; return static_cast(range) > - m_MinimumTimeToDetect + scale * static_cast(m_MaximumTimeToDetect - m_MinimumTimeToDetect); + m_MinimumTimeToDetect + + scale * static_cast(m_MaximumTimeToDetect - m_MinimumTimeToDetect); } return false; } @@ -234,11 +252,14 @@ uint64_t CUnivariateTimeSeriesChangeDetector::checksum(uint64_t seed) const { namespace time_series_change_detector_detail { -CUnivariateChangeModel::CUnivariateChangeModel(const TDecompositionPtr& trendModel, const TPriorPtr& residualModel) - : m_LogLikelihood{0.0}, m_ExpectedLogLikelihood{0.0}, m_TrendModel{trendModel}, m_ResidualModel{residualModel} { +CUnivariateChangeModel::CUnivariateChangeModel(const TDecompositionPtr& trendModel, + const TPriorPtr& residualModel) + : m_LogLikelihood{0.0}, m_ExpectedLogLikelihood{0.0}, + m_TrendModel{trendModel}, m_ResidualModel{residualModel} { } -bool CUnivariateChangeModel::acceptRestoreTraverser(const SModelRestoreParams& /*params*/, core::CStateRestoreTraverser& traverser) { +bool CUnivariateChangeModel::acceptRestoreTraverser(const SModelRestoreParams& /*params*/, + core::CStateRestoreTraverser& traverser) { do { const std::string name{traverser.name()}; RESTORE_BUILT_IN(LOG_LIKELIHOOD_TAG, m_LogLikelihood); @@ -250,7 +271,8 @@ bool CUnivariateChangeModel::acceptRestoreTraverser(const SModelRestoreParams& / void CUnivariateChangeModel::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(LOG_LIKELIHOOD_TAG, m_LogLikelihood, core::CIEEE754::E_SinglePrecision); - inserter.insertValue(EXPECTED_LOG_LIKELIHOOD_TAG, m_ExpectedLogLikelihood, core::CIEEE754::E_SinglePrecision); + inserter.insertValue(EXPECTED_LOG_LIKELIHOOD_TAG, m_ExpectedLogLikelihood, + core::CIEEE754::E_SinglePrecision); } void CUnivariateChangeModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { @@ -263,7 +285,8 @@ void CUnivariateChangeModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePt std::size_t CUnivariateChangeModel::memoryUsage() const { // See above. - return core::CMemory::dynamicSize(m_TrendModel) + core::CMemory::dynamicSize(m_ResidualModel); + return core::CMemory::dynamicSize(m_TrendModel) + + core::CMemory::dynamicSize(m_ResidualModel); } uint64_t CUnivariateChangeModel::checksum(uint64_t seed) const { @@ -273,8 +296,10 @@ uint64_t CUnivariateChangeModel::checksum(uint64_t seed) const { return CChecksum::calculate(seed, m_ResidualModel); } -bool CUnivariateChangeModel::restoreResidualModel(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) { - return traverser.traverseSubLevel(boost::bind(CPriorStateSerialiser(), boost::cref(params), boost::ref(m_ResidualModel), _1)); +bool CUnivariateChangeModel::restoreResidualModel(const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser) { + return traverser.traverseSubLevel(boost::bind( + CPriorStateSerialiser(), boost::cref(params), boost::ref(m_ResidualModel), _1)); } double CUnivariateChangeModel::logLikelihood() const { @@ -289,20 +314,21 @@ void CUnivariateChangeModel::updateLogLikelihood(const TWeightStyleVec& weightSt const TDouble1Vec& samples, const TDouble4Vec1Vec& weights) { double logLikelihood{}; - if (m_ResidualModel->jointLogMarginalLikelihood(weightStyles, samples, weights, logLikelihood) == maths_t::E_FpNoErrors) { + if (m_ResidualModel->jointLogMarginalLikelihood( + weightStyles, samples, weights, logLikelihood) == maths_t::E_FpNoErrors) { m_LogLikelihood += logLikelihood; } } -void CUnivariateChangeModel::updateExpectedLogLikelihood(const TWeightStyleVec& weightStyles, const TDouble4Vec1Vec& weights) { +void CUnivariateChangeModel::updateExpectedLogLikelihood(const TWeightStyleVec& weightStyles, + const TDouble4Vec1Vec& weights) { for (const auto& weight : weights) { double expectedLogLikelihood{}; TDouble4Vec1Vec weight_{weight}; - if (m_ResidualModel->expectation(maths::CPrior::CLogMarginalLikelihood{*m_ResidualModel, weightStyles, weight_}, - EXPECTED_LOG_LIKELIHOOD_NUMBER_INTERVALS, - expectedLogLikelihood, - weightStyles, - weight)) { + if (m_ResidualModel->expectation( + maths::CPrior::CLogMarginalLikelihood{*m_ResidualModel, weightStyles, weight_}, + EXPECTED_LOG_LIKELIHOOD_NUMBER_INTERVALS, expectedLogLikelihood, + weightStyles, weight)) { m_ExpectedLogLikelihood += expectedLogLikelihood; } } @@ -324,11 +350,13 @@ CUnivariateChangeModel::TPriorPtr CUnivariateChangeModel::residualModelPtr() con return m_ResidualModel; } -CUnivariateNoChangeModel::CUnivariateNoChangeModel(const TDecompositionPtr& trendModel, const TPriorPtr& residualModel) +CUnivariateNoChangeModel::CUnivariateNoChangeModel(const TDecompositionPtr& trendModel, + const TPriorPtr& residualModel) : CUnivariateChangeModel{trendModel, residualModel} { } -bool CUnivariateNoChangeModel::acceptRestoreTraverser(const SModelRestoreParams& params, core::CStateRestoreTraverser& traverser) { +bool CUnivariateNoChangeModel::acceptRestoreTraverser(const SModelRestoreParams& params, + core::CStateRestoreTraverser& traverser) { return this->CUnivariateChangeModel::acceptRestoreTraverser(params, traverser); } @@ -369,11 +397,13 @@ void CUnivariateNoChangeModel::addSamples(const std::size_t count, double sample{this->trendModel().detrend(time, value, 0.0)}; double weight{tailWinsorisationWeight(residualModel, 0.2, seasonalScale, sample)}; samples.push_back(sample); - maths_t::setWeight(maths_t::E_SampleWinsorisationWeight, weight, weightStyles, weights[i]); + maths_t::setWeight(maths_t::E_SampleWinsorisationWeight, weight, + weightStyles, weights[i]); } for (auto& weight : weights) { - maths_t::setWeight(maths_t::E_SampleWinsorisationWeight, 1.0, weightStyles, weight); + maths_t::setWeight(maths_t::E_SampleWinsorisationWeight, 1.0, + weightStyles, weight); } this->updateLogLikelihood(weightStyles, samples, weights); } @@ -387,13 +417,14 @@ uint64_t CUnivariateNoChangeModel::checksum(uint64_t seed) const { return this->CUnivariateChangeModel::checksum(seed); } -CUnivariateLevelShiftModel::CUnivariateLevelShiftModel(const TDecompositionPtr& trendModel, const TPriorPtr& residualModel) +CUnivariateLevelShiftModel::CUnivariateLevelShiftModel(const TDecompositionPtr& trendModel, + const TPriorPtr& residualModel) : CUnivariateChangeModel{trendModel, TPriorPtr{residualModel->clone()}}, - m_ResidualModelMode{residualModel->marginalLikelihoodMode()}, - m_SampleCount{0.0} { + m_ResidualModelMode{residualModel->marginalLikelihoodMode()}, m_SampleCount{0.0} { } -bool CUnivariateLevelShiftModel::acceptRestoreTraverser(const SModelRestoreParams& params, core::CStateRestoreTraverser& traverser) { +bool CUnivariateLevelShiftModel::acceptRestoreTraverser(const SModelRestoreParams& params, + core::CStateRestoreTraverser& traverser) { if (this->CUnivariateChangeModel::acceptRestoreTraverser(params, traverser) == false) { return false; } @@ -402,7 +433,8 @@ bool CUnivariateLevelShiftModel::acceptRestoreTraverser(const SModelRestoreParam RESTORE(SHIFT_TAG, m_Shift.fromDelimited(traverser.value())) RESTORE_BUILT_IN(RESIDUAL_MODEL_MODE_TAG, m_ResidualModelMode) RESTORE_BUILT_IN(SAMPLE_COUNT_TAG, m_SampleCount) - RESTORE(RESIDUAL_MODEL_TAG, this->restoreResidualModel(params.s_DistributionParams, traverser)) + RESTORE(RESIDUAL_MODEL_TAG, + this->restoreResidualModel(params.s_DistributionParams, traverser)) } while (traverser.next()); return true; } @@ -411,7 +443,9 @@ void CUnivariateLevelShiftModel::acceptPersistInserter(core::CStatePersistInsert this->CUnivariateChangeModel::acceptPersistInserter(inserter); inserter.insertValue(SHIFT_TAG, m_Shift.toDelimited()); inserter.insertValue(SAMPLE_COUNT_TAG, m_SampleCount); - inserter.insertLevel(RESIDUAL_MODEL_TAG, boost::bind(CPriorStateSerialiser(), boost::cref(this->residualModel()), _1)); + inserter.insertLevel(RESIDUAL_MODEL_TAG, + boost::bind(CPriorStateSerialiser(), + boost::cref(this->residualModel()), _1)); } double CUnivariateLevelShiftModel::bic() const { @@ -423,7 +457,8 @@ double CUnivariateLevelShiftModel::expectedBic() const { } TOptionalChangeDescription CUnivariateLevelShiftModel::change() const { - return SChangeDescription{SChangeDescription::E_LevelShift, CBasicStatistics::mean(m_Shift), this->residualModelPtr()}; + return SChangeDescription{SChangeDescription::E_LevelShift, + CBasicStatistics::mean(m_Shift), this->residualModelPtr()}; } void CUnivariateLevelShiftModel::addSamples(const std::size_t count, @@ -451,7 +486,8 @@ void CUnivariateLevelShiftModel::addSamples(const std::size_t count, double sample{trendModel.detrend(time, value, 0.0) - shift}; double weight{tailWinsorisationWeight(residualModel, 0.2, seasonalScale, sample)}; samples.push_back(sample); - maths_t::setWeight(maths_t::E_SampleWinsorisationWeight, weight, weightStyles, weights[i]); + maths_t::setWeight(maths_t::E_SampleWinsorisationWeight, weight, + weightStyles, weights[i]); m_SampleCount += maths_t::count(weightStyles, weights[i]); } @@ -459,7 +495,8 @@ void CUnivariateLevelShiftModel::addSamples(const std::size_t count, residualModel.propagateForwardsByTime(1.0); for (auto& weight : weights) { - maths_t::setWeight(maths_t::E_SampleWinsorisationWeight, 1.0, weightStyles, weight); + maths_t::setWeight(maths_t::E_SampleWinsorisationWeight, 1.0, + weightStyles, weight); } this->updateLogLikelihood(weightStyles, samples, weights); this->updateExpectedLogLikelihood(weightStyles, weights); @@ -483,13 +520,14 @@ uint64_t CUnivariateLevelShiftModel::checksum(uint64_t seed) const { return CChecksum::calculate(seed, m_SampleCount); } -CUnivariateLinearScaleModel::CUnivariateLinearScaleModel(const TDecompositionPtr& trendModel, const TPriorPtr& residualModel) +CUnivariateLinearScaleModel::CUnivariateLinearScaleModel(const TDecompositionPtr& trendModel, + const TPriorPtr& residualModel) : CUnivariateChangeModel{trendModel, TPriorPtr{residualModel->clone()}}, - m_ResidualModelMode{residualModel->marginalLikelihoodMode()}, - m_SampleCount{0.0} { + m_ResidualModelMode{residualModel->marginalLikelihoodMode()}, m_SampleCount{0.0} { } -bool CUnivariateLinearScaleModel::acceptRestoreTraverser(const SModelRestoreParams& params, core::CStateRestoreTraverser& traverser) { +bool CUnivariateLinearScaleModel::acceptRestoreTraverser(const SModelRestoreParams& params, + core::CStateRestoreTraverser& traverser) { if (this->CUnivariateChangeModel::acceptRestoreTraverser(params, traverser) == false) { return false; } @@ -498,7 +536,8 @@ bool CUnivariateLinearScaleModel::acceptRestoreTraverser(const SModelRestorePara RESTORE(SCALE_TAG, m_Scale.fromDelimited(traverser.value())) RESTORE_BUILT_IN(RESIDUAL_MODEL_MODE_TAG, m_ResidualModelMode) RESTORE_BUILT_IN(SAMPLE_COUNT_TAG, m_SampleCount) - RESTORE(RESIDUAL_MODEL_TAG, this->restoreResidualModel(params.s_DistributionParams, traverser)) + RESTORE(RESIDUAL_MODEL_TAG, + this->restoreResidualModel(params.s_DistributionParams, traverser)) } while (traverser.next()); return true; } @@ -507,7 +546,9 @@ void CUnivariateLinearScaleModel::acceptPersistInserter(core::CStatePersistInser this->CUnivariateChangeModel::acceptPersistInserter(inserter); inserter.insertValue(SCALE_TAG, m_Scale.toDelimited()); inserter.insertValue(SAMPLE_COUNT_TAG, m_SampleCount); - inserter.insertLevel(RESIDUAL_MODEL_TAG, boost::bind(CPriorStateSerialiser(), boost::cref(this->residualModel()), _1)); + inserter.insertLevel(RESIDUAL_MODEL_TAG, + boost::bind(CPriorStateSerialiser(), + boost::cref(this->residualModel()), _1)); } double CUnivariateLinearScaleModel::bic() const { @@ -518,8 +559,10 @@ double CUnivariateLinearScaleModel::expectedBic() const { return -2.0 * this->expectedLogLikelihood() + CTools::fastLog(m_SampleCount); } -CUnivariateLinearScaleModel::TOptionalChangeDescription CUnivariateLinearScaleModel::change() const { - return SChangeDescription{SChangeDescription::E_LinearScale, CBasicStatistics::mean(m_Scale), this->residualModelPtr()}; +CUnivariateLinearScaleModel::TOptionalChangeDescription +CUnivariateLinearScaleModel::change() const { + return SChangeDescription{SChangeDescription::E_LinearScale, + CBasicStatistics::mean(m_Scale), this->residualModelPtr()}; } void CUnivariateLinearScaleModel::addSamples(const std::size_t count, @@ -539,7 +582,9 @@ void CUnivariateLinearScaleModel::addSamples(const std::size_t count, double value{samples_[i].second - m_ResidualModelMode}; double prediction{CBasicStatistics::mean(trendModel.value(time, 0.0))}; double scale{std::fabs(value) / std::fabs(prediction)}; - m_Scale.add(value * prediction < 0.0 ? MINIMUM_SCALE : CTools::truncate(scale, MINIMUM_SCALE, MAXIMUM_SCALE), + m_Scale.add(value * prediction < 0.0 + ? MINIMUM_SCALE + : CTools::truncate(scale, MINIMUM_SCALE, MAXIMUM_SCALE), std::fabs(prediction)); } @@ -557,7 +602,8 @@ void CUnivariateLinearScaleModel::addSamples(const std::size_t count, double sample{value - scale * prediction}; double weight{tailWinsorisationWeight(residualModel, 0.2, seasonalScale, sample)}; samples.push_back(sample); - maths_t::setWeight(maths_t::E_SampleWinsorisationWeight, weight, weightStyles, weights[i]); + maths_t::setWeight(maths_t::E_SampleWinsorisationWeight, weight, + weightStyles, weights[i]); m_SampleCount += maths_t::count(weightStyles, weights[i]); } @@ -565,7 +611,8 @@ void CUnivariateLinearScaleModel::addSamples(const std::size_t count, residualModel.propagateForwardsByTime(1.0); for (auto& weight : weights) { - maths_t::setWeight(maths_t::E_SampleWinsorisationWeight, 1.0, weightStyles, weight); + maths_t::setWeight(maths_t::E_SampleWinsorisationWeight, 1.0, + weightStyles, weight); } this->updateLogLikelihood(weightStyles, samples, weights); this->updateExpectedLogLikelihood(weightStyles, weights); @@ -588,20 +635,24 @@ CUnivariateTimeShiftModel::CUnivariateTimeShiftModel(const TDecompositionPtr& tr : CUnivariateChangeModel{trendModel, TPriorPtr{residualModel->clone()}}, m_Shift{shift} { } -bool CUnivariateTimeShiftModel::acceptRestoreTraverser(const SModelRestoreParams& params, core::CStateRestoreTraverser& traverser) { +bool CUnivariateTimeShiftModel::acceptRestoreTraverser(const SModelRestoreParams& params, + core::CStateRestoreTraverser& traverser) { if (this->CUnivariateChangeModel::acceptRestoreTraverser(params, traverser) == false) { return false; } do { const std::string name{traverser.name()}; - RESTORE(RESIDUAL_MODEL_TAG, this->restoreResidualModel(params.s_DistributionParams, traverser)) + RESTORE(RESIDUAL_MODEL_TAG, + this->restoreResidualModel(params.s_DistributionParams, traverser)) } while (traverser.next()); return true; } void CUnivariateTimeShiftModel::acceptPersistInserter(core::CStatePersistInserter& inserter) const { this->CUnivariateChangeModel::acceptPersistInserter(inserter); - inserter.insertLevel(RESIDUAL_MODEL_TAG, boost::bind(CPriorStateSerialiser(), boost::cref(this->residualModel()), _1)); + inserter.insertLevel(RESIDUAL_MODEL_TAG, + boost::bind(CPriorStateSerialiser(), + boost::cref(this->residualModel()), _1)); } double CUnivariateTimeShiftModel::bic() const { @@ -613,7 +664,8 @@ double CUnivariateTimeShiftModel::expectedBic() const { } TOptionalChangeDescription CUnivariateTimeShiftModel::change() const { - return SChangeDescription{SChangeDescription::E_TimeShift, static_cast(m_Shift), this->residualModelPtr()}; + return SChangeDescription{SChangeDescription::E_TimeShift, + static_cast(m_Shift), this->residualModelPtr()}; } void CUnivariateTimeShiftModel::addSamples(const std::size_t count, @@ -635,14 +687,16 @@ void CUnivariateTimeShiftModel::addSamples(const std::size_t count, double sample{this->trendModel().detrend(time + m_Shift, value, 0.0)}; double weight{tailWinsorisationWeight(residualModel, 0.2, seasonalScale, sample)}; samples.push_back(sample); - maths_t::setWeight(maths_t::E_SampleWinsorisationWeight, weight, weightStyles, weights[i]); + maths_t::setWeight(maths_t::E_SampleWinsorisationWeight, weight, + weightStyles, weights[i]); } residualModel.addSamples(weightStyles, samples, weights); residualModel.propagateForwardsByTime(1.0); for (auto& weight : weights) { - maths_t::setWeight(maths_t::E_SampleWinsorisationWeight, 1.0, weightStyles, weight); + maths_t::setWeight(maths_t::E_SampleWinsorisationWeight, 1.0, + weightStyles, weight); } this->updateLogLikelihood(weightStyles, samples, weights); this->updateExpectedLogLikelihood(weightStyles, weights); diff --git a/lib/maths/CTimeSeriesDecomposition.cc b/lib/maths/CTimeSeriesDecomposition.cc index 3009cda0a2..8bbd9491bd 100644 --- a/lib/maths/CTimeSeriesDecomposition.cc +++ b/lib/maths/CTimeSeriesDecomposition.cc @@ -64,8 +64,8 @@ TDoubleDoublePr confidenceInterval(double confidence, double variance) { double qu{boost::math::quantile(normal, (100.0 + confidence) / 200.0)}; return {ql, qu}; } catch (const std::exception& e) { - LOG_ERROR(<< "Failed calculating confidence interval: " << e.what() << ", variance = " << variance - << ", confidence = " << confidence); + LOG_ERROR(<< "Failed calculating confidence interval: " << e.what() + << ", variance = " << variance << ", confidence = " << confidence); } } return {0.0, 0.0}; @@ -89,40 +89,39 @@ const std::string LAST_PROPAGATION_TIME_OLD_TAG{"h"}; const std::string EMPTY_STRING; } -CTimeSeriesDecomposition::CTimeSeriesDecomposition(double decayRate, core_t::TTime bucketLength, std::size_t seasonalComponentSize) - : m_TimeShift{0}, - m_LastValueTime{0}, - m_LastPropagationTime{0}, - m_PeriodicityTest{decayRate, bucketLength}, - m_CalendarCyclicTest{decayRate, bucketLength}, +CTimeSeriesDecomposition::CTimeSeriesDecomposition(double decayRate, + core_t::TTime bucketLength, + std::size_t seasonalComponentSize) + : m_TimeShift{0}, m_LastValueTime{0}, m_LastPropagationTime{0}, + m_PeriodicityTest{decayRate, bucketLength}, m_CalendarCyclicTest{decayRate, bucketLength}, m_Components{decayRate, bucketLength, seasonalComponentSize} { this->initializeMediator(); } CTimeSeriesDecomposition::CTimeSeriesDecomposition(const STimeSeriesDecompositionRestoreParams& params, core::CStateRestoreTraverser& traverser) - : m_TimeShift{0}, - m_LastValueTime{0}, - m_LastPropagationTime{0}, + : m_TimeShift{0}, m_LastValueTime{0}, m_LastPropagationTime{0}, m_PeriodicityTest{params.s_DecayRate, params.s_MinimumBucketLength}, m_CalendarCyclicTest{params.s_DecayRate, params.s_MinimumBucketLength}, m_Components{params.s_DecayRate, params.s_MinimumBucketLength, params.s_ComponentSize} { traverser.traverseSubLevel( - boost::bind(&CTimeSeriesDecomposition::acceptRestoreTraverser, this, boost::cref(params.s_ChangeModelParams), _1)); + boost::bind(&CTimeSeriesDecomposition::acceptRestoreTraverser, this, + boost::cref(params.s_ChangeModelParams), _1)); this->initializeMediator(); } -CTimeSeriesDecomposition::CTimeSeriesDecomposition(const CTimeSeriesDecomposition& other, bool isForForecast) - : m_TimeShift{other.m_TimeShift}, - m_LastValueTime{other.m_LastValueTime}, +CTimeSeriesDecomposition::CTimeSeriesDecomposition(const CTimeSeriesDecomposition& other, + bool isForForecast) + : m_TimeShift{other.m_TimeShift}, m_LastValueTime{other.m_LastValueTime}, m_LastPropagationTime{other.m_LastPropagationTime}, m_PeriodicityTest{other.m_PeriodicityTest, isForForecast}, - m_CalendarCyclicTest{other.m_CalendarCyclicTest, isForForecast}, - m_Components{other.m_Components} { + m_CalendarCyclicTest{other.m_CalendarCyclicTest, isForForecast}, m_Components{ + other.m_Components} { this->initializeMediator(); } -bool CTimeSeriesDecomposition::acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) { +bool CTimeSeriesDecomposition::acceptRestoreTraverser(const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser) { if (traverser.name() == VERSION_6_3_TAG) { while (traverser.next()) { const std::string& name{traverser.name()}; @@ -130,11 +129,14 @@ bool CTimeSeriesDecomposition::acceptRestoreTraverser(const SDistributionRestore RESTORE_BUILT_IN(LAST_VALUE_TIME_6_3_TAG, m_LastValueTime) RESTORE_BUILT_IN(LAST_PROPAGATION_TIME_6_3_TAG, m_LastPropagationTime) RESTORE(PERIODICITY_TEST_6_3_TAG, - traverser.traverseSubLevel(boost::bind(&CPeriodicityTest::acceptRestoreTraverser, &m_PeriodicityTest, _1))) + traverser.traverseSubLevel(boost::bind(&CPeriodicityTest::acceptRestoreTraverser, + &m_PeriodicityTest, _1))) RESTORE(CALENDAR_CYCLIC_TEST_6_3_TAG, - traverser.traverseSubLevel(boost::bind(&CCalendarTest::acceptRestoreTraverser, &m_CalendarCyclicTest, _1))) - RESTORE(COMPONENTS_6_3_TAG, - traverser.traverseSubLevel(boost::bind(&CComponents::acceptRestoreTraverser, &m_Components, boost::cref(params), _1))) + traverser.traverseSubLevel(boost::bind(&CCalendarTest::acceptRestoreTraverser, + &m_CalendarCyclicTest, _1))) + RESTORE(COMPONENTS_6_3_TAG, traverser.traverseSubLevel(boost::bind( + &CComponents::acceptRestoreTraverser, + &m_Components, boost::cref(params), _1))) } } else { // There is no version string this is historic state. @@ -145,9 +147,11 @@ bool CTimeSeriesDecomposition::acceptRestoreTraverser(const SDistributionRestore RESTORE_BUILT_IN(LAST_VALUE_TIME_OLD_TAG, m_LastValueTime) RESTORE_BUILT_IN(LAST_PROPAGATION_TIME_OLD_TAG, m_LastPropagationTime) RESTORE(CALENDAR_CYCLIC_TEST_OLD_TAG, - traverser.traverseSubLevel(boost::bind(&CCalendarTest::acceptRestoreTraverser, &m_CalendarCyclicTest, _1))) - RESTORE(COMPONENTS_OLD_TAG, - traverser.traverseSubLevel(boost::bind(&CComponents::acceptRestoreTraverser, &m_Components, boost::cref(params), _1))) + traverser.traverseSubLevel(boost::bind(&CCalendarTest::acceptRestoreTraverser, + &m_CalendarCyclicTest, _1))) + RESTORE(COMPONENTS_OLD_TAG, traverser.traverseSubLevel(boost::bind( + &CComponents::acceptRestoreTraverser, + &m_Components, boost::cref(params), _1))) } while (traverser.next()); this->decayRate(decayRate); } @@ -163,7 +167,8 @@ void CTimeSeriesDecomposition::swap(CTimeSeriesDecomposition& other) { m_Components.swap(other.m_Components); } -CTimeSeriesDecomposition& CTimeSeriesDecomposition::operator=(const CTimeSeriesDecomposition& other) { +CTimeSeriesDecomposition& CTimeSeriesDecomposition:: +operator=(const CTimeSeriesDecomposition& other) { if (this != &other) { CTimeSeriesDecomposition copy{other}; this->swap(copy); @@ -176,9 +181,14 @@ void CTimeSeriesDecomposition::acceptPersistInserter(core::CStatePersistInserter inserter.insertValue(TIME_SHIFT_6_3_TAG, m_TimeShift); inserter.insertValue(LAST_VALUE_TIME_6_3_TAG, m_LastValueTime); inserter.insertValue(LAST_PROPAGATION_TIME_6_3_TAG, m_LastPropagationTime); - inserter.insertLevel(PERIODICITY_TEST_6_3_TAG, boost::bind(&CPeriodicityTest::acceptPersistInserter, &m_PeriodicityTest, _1)); - inserter.insertLevel(CALENDAR_CYCLIC_TEST_6_3_TAG, boost::bind(&CCalendarTest::acceptPersistInserter, &m_CalendarCyclicTest, _1)); - inserter.insertLevel(COMPONENTS_6_3_TAG, boost::bind(&CComponents::acceptPersistInserter, &m_Components, _1)); + inserter.insertLevel(PERIODICITY_TEST_6_3_TAG, + boost::bind(&CPeriodicityTest::acceptPersistInserter, + &m_PeriodicityTest, _1)); + inserter.insertLevel(CALENDAR_CYCLIC_TEST_6_3_TAG, + boost::bind(&CCalendarTest::acceptPersistInserter, + &m_CalendarCyclicTest, _1)); + inserter.insertLevel(COMPONENTS_6_3_TAG, boost::bind(&CComponents::acceptPersistInserter, + &m_Components, _1)); } CTimeSeriesDecomposition* CTimeSeriesDecomposition::clone(bool isForForecast) const { @@ -223,7 +233,10 @@ bool CTimeSeriesDecomposition::addPoint(core_t::TTime time, CBasicStatistics::mean(this->value(time, 0.0, E_TrendForced)), CBasicStatistics::mean(this->value(time, 0.0, E_Seasonal)), CBasicStatistics::mean(this->value(time, 0.0, E_Calendar)), - [this](core_t::TTime time_) { return CBasicStatistics::mean(this->value(time_, 0.0, E_Seasonal | E_Calendar)); }, + [this](core_t::TTime time_) { + return CBasicStatistics::mean( + this->value(time_, 0.0, E_Seasonal | E_Calendar)); + }, m_Components.periodicityTestConfig()}; m_Components.handle(message); @@ -233,7 +246,9 @@ bool CTimeSeriesDecomposition::addPoint(core_t::TTime time, return result.changed(); } -bool CTimeSeriesDecomposition::applyChange(core_t::TTime time, double value, const SChangeDescription& change) { +bool CTimeSeriesDecomposition::applyChange(core_t::TTime time, + double value, + const SChangeDescription& change) { bool result{m_Components.usingTrendForPrediction() == false}; m_Components.useTrendForPrediction(); @@ -265,7 +280,10 @@ double CTimeSeriesDecomposition::meanValue(core_t::TTime time) const { return m_Components.meanValue(time); } -TDoubleDoublePr CTimeSeriesDecomposition::value(core_t::TTime time, double confidence, int components, bool smooth) const { +TDoubleDoublePr CTimeSeriesDecomposition::value(core_t::TTime time, + double confidence, + int components, + bool smooth) const { TVector2x1 baseline{0.0}; time += m_TimeShift; @@ -295,10 +313,10 @@ TDoubleDoublePr CTimeSeriesDecomposition::value(core_t::TTime time, double confi } if (smooth) { - baseline += - vector2x1(this->smooth(boost::bind(&CTimeSeriesDecomposition::value, this, _1, confidence, components & E_Seasonal, false), - time - m_TimeShift, - components)); + baseline += vector2x1( + this->smooth(boost::bind(&CTimeSeriesDecomposition::value, this, _1, + confidence, components & E_Seasonal, false), + time - m_TimeShift, components)); } return pair(baseline); @@ -341,20 +359,29 @@ void CTimeSeriesDecomposition::forecast(core_t::TTime startTime, double trendVariance{CBasicStatistics::mean(m_Components.trend().variance(0.0))}; double seasonalVariance{m_Components.meanVariance() - trendVariance}; double variance{this->meanVariance()}; - double scale0{std::sqrt(std::max(CBasicStatistics::mean(this->scale(startTime, variance, 0.0)), minimumScale))}; + double scale0{std::sqrt(std::max( + CBasicStatistics::mean(this->scale(startTime, variance, 0.0)), minimumScale))}; TVector2x1 i0{vector2x1(confidenceInterval(confidence, seasonalVariance))}; auto forecastSeasonal = [&](core_t::TTime time) { m_Components.interpolateForForecast(time); - double scale{std::sqrt(std::max(CBasicStatistics::mean(this->scale(time, variance, 0.0)), minimumScale))}; - TVector2x1 prediction{vector2x1(seasonal(time)) + vector2x1(this->smooth(seasonal, time, E_Seasonal)) + (scale - scale0) * i0}; - return TDouble3Vec{prediction(0), (prediction(0) + prediction(1)) / 2.0, prediction(1)}; + double scale{std::sqrt(std::max( + CBasicStatistics::mean(this->scale(time, variance, 0.0)), minimumScale))}; + TVector2x1 prediction{vector2x1(seasonal(time)) + + vector2x1(this->smooth(seasonal, time, E_Seasonal)) + + (scale - scale0) * i0}; + return TDouble3Vec{prediction(0), (prediction(0) + prediction(1)) / 2.0, + prediction(1)}; }; - m_Components.trend().forecast(startTime, endTime, step, confidence, forecastSeasonal, writer); + m_Components.trend().forecast(startTime, endTime, step, confidence, + forecastSeasonal, writer); } -double CTimeSeriesDecomposition::detrend(core_t::TTime time, double value, double confidence, int components) const { +double CTimeSeriesDecomposition::detrend(core_t::TTime time, + double value, + double confidence, + int components) const { if (!this->initialized()) { return value; } @@ -366,7 +393,10 @@ double CTimeSeriesDecomposition::meanVariance() const { return m_Components.meanVarianceScale() * m_Components.meanVariance(); } -TDoubleDoublePr CTimeSeriesDecomposition::scale(core_t::TTime time, double variance, double confidence, bool smooth) const { +TDoubleDoublePr CTimeSeriesDecomposition::scale(core_t::TTime time, + double variance, + double confidence, + bool smooth) const { if (!this->initialized()) { return {1.0, 1.0}; } @@ -407,7 +437,9 @@ TDoubleDoublePr CTimeSeriesDecomposition::scale(core_t::TTime time, double varia scale = TVector2x1{1.0} + bias * (scale - TVector2x1{1.0}); if (smooth) { - scale += vector2x1(this->smooth(boost::bind(&CTimeSeriesDecomposition::scale, this, _1, variance, confidence, false), time, E_All)); + scale += vector2x1(this->smooth(boost::bind(&CTimeSeriesDecomposition::scale, this, + _1, variance, confidence, false), + time, E_All)); } return pair(scale); @@ -435,8 +467,10 @@ void CTimeSeriesDecomposition::debugMemoryUsage(core::CMemoryUsage::TMemoryUsage } std::size_t CTimeSeriesDecomposition::memoryUsage() const { - return core::CMemory::dynamicSize(m_Mediator) + core::CMemory::dynamicSize(m_PeriodicityTest) + - core::CMemory::dynamicSize(m_CalendarCyclicTest) + core::CMemory::dynamicSize(m_Components); + return core::CMemory::dynamicSize(m_Mediator) + + core::CMemory::dynamicSize(m_PeriodicityTest) + + core::CMemory::dynamicSize(m_CalendarCyclicTest) + + core::CMemory::dynamicSize(m_Components); } std::size_t CTimeSeriesDecomposition::staticSize() const { @@ -459,16 +493,20 @@ void CTimeSeriesDecomposition::initializeMediator() { } template -TDoubleDoublePr CTimeSeriesDecomposition::smooth(const F& f, core_t::TTime time, int components) const { +TDoubleDoublePr +CTimeSeriesDecomposition::smooth(const F& f, core_t::TTime time, int components) const { auto offset = [&f, time](core_t::TTime discontinuity) { TVector2x1 baselineMinusEps{vector2x1(f(discontinuity - 1))}; TVector2x1 baselinePlusEps{vector2x1(f(discontinuity + 1))}; - return 0.5 * (1.0 - static_cast(std::abs(time - discontinuity)) / static_cast(SMOOTHING_INTERVAL)) * + return 0.5 * + (1.0 - static_cast(std::abs(time - discontinuity)) / + static_cast(SMOOTHING_INTERVAL)) * (baselinePlusEps - baselineMinusEps); }; for (const auto& component : m_Components.seasonal()) { - if (!component.initialized() || !this->matches(components, component) || component.time().windowRepeat() <= SMOOTHING_INTERVAL) { + if (!component.initialized() || !this->matches(components, component) || + component.time().windowRepeat() <= SMOOTHING_INTERVAL) { continue; } @@ -478,12 +516,15 @@ TDoubleDoublePr CTimeSeriesDecomposition::smooth(const F& f, core_t::TTime time, bool inWindowBefore{times.inWindow(time - SMOOTHING_INTERVAL)}; bool inWindowAfter{times.inWindow(time + SMOOTHING_INTERVAL)}; if ((!timeInWindow && inWindowBefore) || - (timeInWindow && inWindowBefore && times.startOfWindow(time) != times.startOfWindow(time + SMOOTHING_INTERVAL))) { - core_t::TTime discontinuity{times.startOfWindow(time - SMOOTHING_INTERVAL) + times.windowLength()}; + (timeInWindow && inWindowBefore && + times.startOfWindow(time) != times.startOfWindow(time + SMOOTHING_INTERVAL))) { + core_t::TTime discontinuity{times.startOfWindow(time - SMOOTHING_INTERVAL) + + times.windowLength()}; return pair(-offset(discontinuity)); } if ((!timeInWindow && inWindowAfter) || - (timeInWindow && inWindowAfter && times.startOfWindow(time) != times.startOfWindow(time + SMOOTHING_INTERVAL))) { + (timeInWindow && inWindowAfter && + times.startOfWindow(time) != times.startOfWindow(time + SMOOTHING_INTERVAL))) { core_t::TTime discontinuity{component.time().startOfWindow(time + SMOOTHING_INTERVAL)}; return pair(offset(discontinuity)); } @@ -492,8 +533,11 @@ TDoubleDoublePr CTimeSeriesDecomposition::smooth(const F& f, core_t::TTime time, return {0.0, 0.0}; } -bool CTimeSeriesDecomposition::selected(core_t::TTime time, int components, const CSeasonalComponent& component) const { - return component.initialized() && this->matches(components, component) && component.time().inWindow(time); +bool CTimeSeriesDecomposition::selected(core_t::TTime time, + int components, + const CSeasonalComponent& component) const { + return component.initialized() && this->matches(components, component) && + component.time().inWindow(time); } bool CTimeSeriesDecomposition::matches(int components, const CSeasonalComponent& component) const { @@ -502,7 +546,8 @@ bool CTimeSeriesDecomposition::matches(int components, const CSeasonalComponent& return true; } core_t::TTime period{component.time().period()}; - bool diurnal{(period % core::constants::DAY == 0) || (period % core::constants::WEEK == 0)}; + bool diurnal{(period % core::constants::DAY == 0) || + (period % core::constants::WEEK == 0)}; return (seasonal == E_Diurnal && diurnal) || (seasonal == E_NonDiurnal && !diurnal); } diff --git a/lib/maths/CTimeSeriesDecompositionDetail.cc b/lib/maths/CTimeSeriesDecompositionDetail.cc index 0c6f700fe8..34d3c08f18 100644 --- a/lib/maths/CTimeSeriesDecompositionDetail.cc +++ b/lib/maths/CTimeSeriesDecompositionDetail.cc @@ -186,7 +186,10 @@ void decompose(const CTrendComponent& trend, //! Propagate a test forwards to account for \p end - \p start //! elapsed time in steps or size \p step. template -void stepwisePropagateForwards(core_t::TTime step, core_t::TTime start, core_t::TTime end, const T& target) { +void stepwisePropagateForwards(core_t::TTime step, + core_t::TTime start, + core_t::TTime end, + const T& target) { if (target) { start = CIntegerTools::floor(start, step); end = CIntegerTools::floor(end, step); @@ -222,8 +225,9 @@ const std::size_t PT_NEW_VALUE = 0; const std::size_t PT_RESET = 1; const TStrVec PT_ALPHABET{"NEW_VALUE", "RESET"}; // Transition Function -const TSizeVecVec PT_TRANSITION_FUNCTION{TSizeVec{PT_TEST, PT_TEST, PT_NOT_TESTING, PT_ERROR}, - TSizeVec{PT_INITIAL, PT_INITIAL, PT_NOT_TESTING, PT_INITIAL}}; +const TSizeVecVec PT_TRANSITION_FUNCTION{ + TSizeVec{PT_TEST, PT_TEST, PT_NOT_TESTING, PT_ERROR}, + TSizeVec{PT_INITIAL, PT_INITIAL, PT_NOT_TESTING, PT_INITIAL}}; // Calendar Cyclic Test State Machine @@ -238,8 +242,9 @@ const std::size_t CC_NEW_VALUE = 0; const std::size_t CC_RESET = 1; const TStrVec CC_ALPHABET{"NEW_VALUE", "RESET"}; // Transition Function -const TSizeVecVec CC_TRANSITION_FUNCTION{TSizeVec{CC_TEST, CC_TEST, CC_NOT_TESTING, CC_ERROR}, - TSizeVec{CC_INITIAL, CC_INITIAL, CC_NOT_TESTING, CC_INITIAL}}; +const TSizeVecVec CC_TRANSITION_FUNCTION{ + TSizeVec{CC_TEST, CC_TEST, CC_NOT_TESTING, CC_ERROR}, + TSizeVec{CC_INITIAL, CC_INITIAL, CC_NOT_TESTING, CC_INITIAL}}; // Components State Machine @@ -255,9 +260,10 @@ const std::size_t SC_INTERPOLATED = 1; const std::size_t SC_RESET = 2; const TStrVec SC_ALPHABET{"ADDED_COMPONENTS", "INTERPOLATED", "RESET"}; // Transition Function -const TSizeVecVec SC_TRANSITION_FUNCTION{TSizeVec{SC_NEW_COMPONENTS, SC_NEW_COMPONENTS, SC_DISABLED, SC_ERROR}, - TSizeVec{SC_NORMAL, SC_NORMAL, SC_DISABLED, SC_ERROR}, - TSizeVec{SC_NORMAL, SC_NORMAL, SC_NORMAL, SC_NORMAL}}; +const TSizeVecVec SC_TRANSITION_FUNCTION{ + TSizeVec{SC_NEW_COMPONENTS, SC_NEW_COMPONENTS, SC_DISABLED, SC_ERROR}, + TSizeVec{SC_NORMAL, SC_NORMAL, SC_DISABLED, SC_ERROR}, + TSizeVec{SC_NORMAL, SC_NORMAL, SC_NORMAL, SC_NORMAL}}; const std::string VERSION_6_3_TAG("6.3"); @@ -304,7 +310,9 @@ const std::string LAST_UPDATE_OLD_TAG{"j"}; const double MODEL_WEIGHT_UPGRADING_TO_VERSION_6p3{48.0}; -bool upgradeTrendModelToVersion6p3(const core_t::TTime bucketLength, CTrendComponent& trend, core::CStateRestoreTraverser& traverser) { +bool upgradeTrendModelToVersion6p3(const core_t::TTime bucketLength, + CTrendComponent& trend, + core::CStateRestoreTraverser& traverser) { using TRegression = CRegression::CLeastSquaresOnline<3, double>; TRegression regression; @@ -313,7 +321,9 @@ bool upgradeTrendModelToVersion6p3(const core_t::TTime bucketLength, CTrendCompo core_t::TTime lastUpdate{0}; do { const std::string& name{traverser.name()}; - RESTORE(REGRESSION_OLD_TAG, traverser.traverseSubLevel(boost::bind(&TRegression::acceptRestoreTraverser, ®ression, _1))) + RESTORE(REGRESSION_OLD_TAG, + traverser.traverseSubLevel(boost::bind( + &TRegression::acceptRestoreTraverser, ®ression, _1))) RESTORE_BUILT_IN(VARIANCE_OLD_TAG, variance) RESTORE_BUILT_IN(TIME_ORIGIN_OLD_TAG, origin) RESTORE_BUILT_IN(LAST_UPDATE_OLD_TAG, lastUpdate) @@ -321,7 +331,8 @@ bool upgradeTrendModelToVersion6p3(const core_t::TTime bucketLength, CTrendCompo // Generate some samples from the old trend model. - double weight{MODEL_WEIGHT_UPGRADING_TO_VERSION_6p3 * static_cast(bucketLength) / static_cast(4 * WEEK)}; + double weight{MODEL_WEIGHT_UPGRADING_TO_VERSION_6p3 * + static_cast(bucketLength) / static_cast(4 * WEEK)}; CPRNG::CXorOShiro128Plus rng; for (core_t::TTime time = lastUpdate - 4 * WEEK; time < lastUpdate; time += bucketLength) { @@ -344,51 +355,52 @@ const TCalendarComponentVec NO_CALENDAR_COMPONENTS; //////// SMessage //////// -CTimeSeriesDecompositionDetail::SMessage::SMessage(core_t::TTime time, core_t::TTime lastTime) : s_Time{time}, s_LastTime{lastTime} { +CTimeSeriesDecompositionDetail::SMessage::SMessage(core_t::TTime time, core_t::TTime lastTime) + : s_Time{time}, s_LastTime{lastTime} { } //////// SAddValue //////// -CTimeSeriesDecompositionDetail::SAddValue::SAddValue(core_t::TTime time, - core_t::TTime lastTime, - double value, - const maths_t::TWeightStyleVec& weightStyles, - const maths_t::TDouble4Vec& weights, - double trend, - double seasonal, - double calendar, - const TPredictor& predictor, - const CPeriodicityHypothesisTestsConfig& periodicityTestConfig) - : SMessage{time, lastTime}, - s_Value{value}, - s_WeightStyles{weightStyles}, - s_Weights{weights}, - s_Trend{trend}, - s_Seasonal{seasonal}, - s_Calendar{calendar}, - s_Predictor{predictor}, - s_PeriodicityTestConfig{periodicityTestConfig} { +CTimeSeriesDecompositionDetail::SAddValue::SAddValue( + core_t::TTime time, + core_t::TTime lastTime, + double value, + const maths_t::TWeightStyleVec& weightStyles, + const maths_t::TDouble4Vec& weights, + double trend, + double seasonal, + double calendar, + const TPredictor& predictor, + const CPeriodicityHypothesisTestsConfig& periodicityTestConfig) + : SMessage{time, lastTime}, s_Value{value}, s_WeightStyles{weightStyles}, + s_Weights{weights}, s_Trend{trend}, s_Seasonal{seasonal}, s_Calendar{calendar}, + s_Predictor{predictor}, s_PeriodicityTestConfig{periodicityTestConfig} { } //////// SDetectedSeasonal //////// -CTimeSeriesDecompositionDetail::SDetectedSeasonal::SDetectedSeasonal(core_t::TTime time, - core_t::TTime lastTime, - const CPeriodicityHypothesisTestsResult& result, - const CExpandingWindow& window, - const TPredictor& predictor) +CTimeSeriesDecompositionDetail::SDetectedSeasonal::SDetectedSeasonal( + core_t::TTime time, + core_t::TTime lastTime, + const CPeriodicityHypothesisTestsResult& result, + const CExpandingWindow& window, + const TPredictor& predictor) : SMessage{time, lastTime}, s_Result{result}, s_Window{window}, s_Predictor{predictor} { } //////// SDetectedCalendar //////// -CTimeSeriesDecompositionDetail::SDetectedCalendar::SDetectedCalendar(core_t::TTime time, core_t::TTime lastTime, CCalendarFeature feature) +CTimeSeriesDecompositionDetail::SDetectedCalendar::SDetectedCalendar(core_t::TTime time, + core_t::TTime lastTime, + CCalendarFeature feature) : SMessage{time, lastTime}, s_Feature{feature} { } //////// SNewComponent //////// -CTimeSeriesDecompositionDetail::SNewComponents::SNewComponents(core_t::TTime time, core_t::TTime lastTime, EComponent component) +CTimeSeriesDecompositionDetail::SNewComponents::SNewComponents(core_t::TTime time, + core_t::TTime lastTime, + EComponent component) : SMessage{time, lastTime}, s_Component{component} { } @@ -415,7 +427,8 @@ void CTimeSeriesDecompositionDetail::CHandler::mediator(CMediator* mediator) { m_Mediator = mediator; } -CTimeSeriesDecompositionDetail::CMediator* CTimeSeriesDecompositionDetail::CHandler::mediator() const { +CTimeSeriesDecompositionDetail::CMediator* +CTimeSeriesDecompositionDetail::CHandler::mediator() const { return m_Mediator; } @@ -444,17 +457,20 @@ std::size_t CTimeSeriesDecompositionDetail::CMediator::memoryUsage() const { //////// CPeriodicityTest //////// -CTimeSeriesDecompositionDetail::CPeriodicityTest::CPeriodicityTest(double decayRate, core_t::TTime bucketLength) - : m_Machine{core::CStateMachine::create(PT_ALPHABET, - PT_STATES, - PT_TRANSITION_FUNCTION, - bucketLength > LONG_BUCKET_LENGTHS.back() ? PT_NOT_TESTING : PT_INITIAL)}, - m_DecayRate{decayRate}, - m_BucketLength{bucketLength} { +CTimeSeriesDecompositionDetail::CPeriodicityTest::CPeriodicityTest(double decayRate, + core_t::TTime bucketLength) + : m_Machine{core::CStateMachine::create( + PT_ALPHABET, + PT_STATES, + PT_TRANSITION_FUNCTION, + bucketLength > LONG_BUCKET_LENGTHS.back() ? PT_NOT_TESTING : PT_INITIAL)}, + m_DecayRate{decayRate}, m_BucketLength{bucketLength} { } -CTimeSeriesDecompositionDetail::CPeriodicityTest::CPeriodicityTest(const CPeriodicityTest& other, bool isForForecast) - : m_Machine{other.m_Machine}, m_DecayRate{other.m_DecayRate}, m_BucketLength{other.m_BucketLength} { +CTimeSeriesDecompositionDetail::CPeriodicityTest::CPeriodicityTest(const CPeriodicityTest& other, + bool isForForecast) + : m_Machine{other.m_Machine}, m_DecayRate{other.m_DecayRate}, m_BucketLength{ + other.m_BucketLength} { // Note that m_Windows is an array. for (std::size_t i = 0u; !isForForecast && i < other.m_Windows.size(); ++i) { if (other.m_Windows[i]) { @@ -463,32 +479,43 @@ CTimeSeriesDecompositionDetail::CPeriodicityTest::CPeriodicityTest(const CPeriod } } -bool CTimeSeriesDecompositionDetail::CPeriodicityTest::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { +bool CTimeSeriesDecompositionDetail::CPeriodicityTest::acceptRestoreTraverser( + core::CStateRestoreTraverser& traverser) { do { const std::string& name{traverser.name()}; RESTORE(PERIODICITY_TEST_MACHINE_6_3_TAG, - traverser.traverseSubLevel(boost::bind(&core::CStateMachine::acceptRestoreTraverser, &m_Machine, _1))) - RESTORE_SETUP_TEARDOWN(SHORT_WINDOW_6_3_TAG, - m_Windows[E_Short].reset(this->newWindow(E_Short)), - m_Windows[E_Short] && traverser.traverseSubLevel(boost::bind( - &CExpandingWindow::acceptRestoreTraverser, m_Windows[E_Short].get(), _1)), - /**/) - RESTORE_SETUP_TEARDOWN(LONG_WINDOW_6_3_TAG, - m_Windows[E_Long].reset(this->newWindow(E_Long)), - m_Windows[E_Long] && traverser.traverseSubLevel(boost::bind( - &CExpandingWindow::acceptRestoreTraverser, m_Windows[E_Long].get(), _1)), - /**/) + traverser.traverseSubLevel(boost::bind( + &core::CStateMachine::acceptRestoreTraverser, &m_Machine, _1))) + RESTORE_SETUP_TEARDOWN( + SHORT_WINDOW_6_3_TAG, m_Windows[E_Short].reset(this->newWindow(E_Short)), + m_Windows[E_Short] && traverser.traverseSubLevel(boost::bind( + &CExpandingWindow::acceptRestoreTraverser, + m_Windows[E_Short].get(), _1)), + /**/) + RESTORE_SETUP_TEARDOWN( + LONG_WINDOW_6_3_TAG, m_Windows[E_Long].reset(this->newWindow(E_Long)), + m_Windows[E_Long] && + traverser.traverseSubLevel(boost::bind(&CExpandingWindow::acceptRestoreTraverser, + m_Windows[E_Long].get(), _1)), + /**/) } while (traverser.next()); return true; } -void CTimeSeriesDecompositionDetail::CPeriodicityTest::acceptPersistInserter(core::CStatePersistInserter& inserter) const { - inserter.insertLevel(PERIODICITY_TEST_MACHINE_6_3_TAG, boost::bind(&core::CStateMachine::acceptPersistInserter, &m_Machine, _1)); +void CTimeSeriesDecompositionDetail::CPeriodicityTest::acceptPersistInserter( + core::CStatePersistInserter& inserter) const { + inserter.insertLevel( + PERIODICITY_TEST_MACHINE_6_3_TAG, + boost::bind(&core::CStateMachine::acceptPersistInserter, &m_Machine, _1)); if (m_Windows[E_Short]) { - inserter.insertLevel(SHORT_WINDOW_6_3_TAG, boost::bind(&CExpandingWindow::acceptPersistInserter, m_Windows[E_Short].get(), _1)); + inserter.insertLevel(SHORT_WINDOW_6_3_TAG, + boost::bind(&CExpandingWindow::acceptPersistInserter, + m_Windows[E_Short].get(), _1)); } if (m_Windows[E_Long]) { - inserter.insertLevel(LONG_WINDOW_6_3_TAG, boost::bind(&CExpandingWindow::acceptPersistInserter, m_Windows[E_Long].get(), _1)); + inserter.insertLevel(LONG_WINDOW_6_3_TAG, + boost::bind(&CExpandingWindow::acceptPersistInserter, + m_Windows[E_Long].get(), _1)); } } @@ -549,9 +576,11 @@ void CTimeSeriesDecompositionDetail::CPeriodicityTest::test(const SAddValue& mes TFloatMeanAccumulatorVec values(window->valuesMinusPrediction(predictor)); core_t::TTime start{CIntegerTools::floor(window->startTime(), m_BucketLength)}; core_t::TTime bucketLength{window->bucketLength()}; - CPeriodicityHypothesisTestsResult result{testForPeriods(config, start, bucketLength, values)}; + CPeriodicityHypothesisTestsResult result{ + testForPeriods(config, start, bucketLength, values)}; if (result.periodic()) { - this->mediator()->forward(SDetectedSeasonal{time, lastTime, result, *window, predictor}); + this->mediator()->forward(SDetectedSeasonal{ + time, lastTime, result, *window, predictor}); } } } @@ -566,7 +595,8 @@ void CTimeSeriesDecompositionDetail::CPeriodicityTest::test(const SAddValue& mes } } -void CTimeSeriesDecompositionDetail::CPeriodicityTest::propagateForwards(core_t::TTime start, core_t::TTime end) { +void CTimeSeriesDecompositionDetail::CPeriodicityTest::propagateForwards(core_t::TTime start, + core_t::TTime end) { stepwisePropagateForwards(DAY, start, end, m_Windows[E_Short]); stepwisePropagateForwards(WEEK, start, end, m_Windows[E_Long]); } @@ -578,7 +608,8 @@ uint64_t CTimeSeriesDecompositionDetail::CPeriodicityTest::checksum(uint64_t see return CChecksum::calculate(seed, m_Windows); } -void CTimeSeriesDecompositionDetail::CPeriodicityTest::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { +void CTimeSeriesDecompositionDetail::CPeriodicityTest::debugMemoryUsage( + core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CPeriodicityTest"); core::CMemoryDebug::dynamicSize("m_Windows", m_Windows, mem); } @@ -602,7 +633,8 @@ std::size_t CTimeSeriesDecompositionDetail::CPeriodicityTest::extraMemoryOnIniti return result; } -void CTimeSeriesDecompositionDetail::CPeriodicityTest::apply(std::size_t symbol, const SMessage& message) { +void CTimeSeriesDecompositionDetail::CPeriodicityTest::apply(std::size_t symbol, + const SMessage& message) { core_t::TTime time{message.s_Time}; std::size_t old{m_Machine.state()}; @@ -610,7 +642,8 @@ void CTimeSeriesDecompositionDetail::CPeriodicityTest::apply(std::size_t symbol, std::size_t state{m_Machine.state()}; if (state != old) { - LOG_TRACE(<< PT_STATES[old] << "," << PT_ALPHABET[symbol] << " -> " << PT_STATES[state]); + LOG_TRACE(<< PT_STATES[old] << "," << PT_ALPHABET[symbol] << " -> " + << PT_STATES[state]); auto initialize = [this](core_t::TTime time_) { for (auto i : {E_Short, E_Long}) { @@ -623,7 +656,9 @@ void CTimeSeriesDecompositionDetail::CPeriodicityTest::apply(std::size_t symbol, switch (state) { case PT_TEST: - if (std::all_of(m_Windows.begin(), m_Windows.end(), [](const TExpandingWindowPtr& window) { return !window; })) { + if (std::all_of( + m_Windows.begin(), m_Windows.end(), + [](const TExpandingWindowPtr& window) { return !window; })) { initialize(time); } break; @@ -642,7 +677,8 @@ void CTimeSeriesDecompositionDetail::CPeriodicityTest::apply(std::size_t symbol, } } -bool CTimeSeriesDecompositionDetail::CPeriodicityTest::shouldTest(const TExpandingWindowPtr& window, core_t::TTime time) const { +bool CTimeSeriesDecompositionDetail::CPeriodicityTest::shouldTest(const TExpandingWindowPtr& window, + core_t::TTime time) const { // We need to test more frequently than when we compress, because // this only happens after we've seen 336 buckets, this would thus // significantly delay when we first detect a daily periodic for @@ -665,7 +701,9 @@ CExpandingWindow* CTimeSeriesDecompositionDetail::CPeriodicityTest::newWindow(ET auto newWindow = [this](const TTimeVec& bucketLengths) { if (m_BucketLength <= bucketLengths.back()) { - std::ptrdiff_t a{std::lower_bound(bucketLengths.begin(), bucketLengths.end(), m_BucketLength) - bucketLengths.begin()}; + std::ptrdiff_t a{std::lower_bound(bucketLengths.begin(), + bucketLengths.end(), m_BucketLength) - + bucketLengths.begin()}; std::size_t b{bucketLengths.size()}; TTimeCRng bucketLengths_(bucketLengths, a, b); return new CExpandingWindow(m_BucketLength, bucketLengths_, 336, m_DecayRate); @@ -682,46 +720,58 @@ CExpandingWindow* CTimeSeriesDecompositionDetail::CPeriodicityTest::newWindow(ET return nullptr; } -const TTimeVec CTimeSeriesDecompositionDetail::CPeriodicityTest::SHORT_BUCKET_LENGTHS{1, 5, 10, 30, 60, 300, 600, 1800, 3600}; -const TTimeVec CTimeSeriesDecompositionDetail::CPeriodicityTest::LONG_BUCKET_LENGTHS{7200, 21600, 43200, 86400, 172800, 345600}; +const TTimeVec CTimeSeriesDecompositionDetail::CPeriodicityTest::SHORT_BUCKET_LENGTHS{ + 1, 5, 10, 30, 60, 300, 600, 1800, 3600}; +const TTimeVec CTimeSeriesDecompositionDetail::CPeriodicityTest::LONG_BUCKET_LENGTHS{ + 7200, 21600, 43200, 86400, 172800, 345600}; //////// CCalendarCyclic //////// -CTimeSeriesDecompositionDetail::CCalendarTest::CCalendarTest(double decayRate, core_t::TTime bucketLength) +CTimeSeriesDecompositionDetail::CCalendarTest::CCalendarTest(double decayRate, + core_t::TTime bucketLength) : m_Machine{core::CStateMachine::create(CC_ALPHABET, CC_STATES, CC_TRANSITION_FUNCTION, bucketLength > DAY ? CC_NOT_TESTING : CC_INITIAL)}, - m_DecayRate{decayRate}, - m_LastMonth{} { + m_DecayRate{decayRate}, m_LastMonth{} { } -CTimeSeriesDecompositionDetail::CCalendarTest::CCalendarTest(const CCalendarTest& other, bool isForForecast) - : m_Machine{other.m_Machine}, - m_DecayRate{other.m_DecayRate}, - m_LastMonth{other.m_LastMonth}, - m_Test{!isForForecast && other.m_Test ? boost::make_shared(*other.m_Test) : 0} { +CTimeSeriesDecompositionDetail::CCalendarTest::CCalendarTest(const CCalendarTest& other, + bool isForForecast) + : m_Machine{other.m_Machine}, m_DecayRate{other.m_DecayRate}, + m_LastMonth{other.m_LastMonth}, m_Test{!isForForecast && other.m_Test + ? boost::make_shared( + *other.m_Test) + : 0} { } bool CTimeSeriesDecompositionDetail::CCalendarTest::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { do { const std::string& name{traverser.name()}; RESTORE(CALENDAR_TEST_MACHINE_6_3_TAG, - traverser.traverseSubLevel(boost::bind(&core::CStateMachine::acceptRestoreTraverser, &m_Machine, _1))) + traverser.traverseSubLevel(boost::bind( + &core::CStateMachine::acceptRestoreTraverser, &m_Machine, _1))) RESTORE_BUILT_IN(LAST_MONTH_6_3_TAG, m_LastMonth); - RESTORE_SETUP_TEARDOWN(CALENDAR_TEST_6_3_TAG, - m_Test = boost::make_shared(m_DecayRate), - traverser.traverseSubLevel(boost::bind(&CCalendarCyclicTest::acceptRestoreTraverser, m_Test.get(), _1)), - /**/) + RESTORE_SETUP_TEARDOWN( + CALENDAR_TEST_6_3_TAG, + m_Test = boost::make_shared(m_DecayRate), + traverser.traverseSubLevel(boost::bind( + &CCalendarCyclicTest::acceptRestoreTraverser, m_Test.get(), _1)), + /**/) } while (traverser.next()); return true; } -void CTimeSeriesDecompositionDetail::CCalendarTest::acceptPersistInserter(core::CStatePersistInserter& inserter) const { - inserter.insertLevel(CALENDAR_TEST_MACHINE_6_3_TAG, boost::bind(&core::CStateMachine::acceptPersistInserter, &m_Machine, _1)); +void CTimeSeriesDecompositionDetail::CCalendarTest::acceptPersistInserter( + core::CStatePersistInserter& inserter) const { + inserter.insertLevel( + CALENDAR_TEST_MACHINE_6_3_TAG, + boost::bind(&core::CStateMachine::acceptPersistInserter, &m_Machine, _1)); inserter.insertValue(LAST_MONTH_6_3_TAG, m_LastMonth); if (m_Test) { - inserter.insertLevel(CALENDAR_TEST_6_3_TAG, boost::bind(&CCalendarCyclicTest::acceptPersistInserter, m_Test.get(), _1)); + inserter.insertLevel(CALENDAR_TEST_6_3_TAG, + boost::bind(&CCalendarCyclicTest::acceptPersistInserter, + m_Test.get(), _1)); } } @@ -734,7 +784,8 @@ void CTimeSeriesDecompositionDetail::CCalendarTest::swap(CCalendarTest& other) { void CTimeSeriesDecompositionDetail::CCalendarTest::handle(const SAddValue& message) { core_t::TTime time{message.s_Time}; - double error{message.s_Value - message.s_Trend - message.s_Seasonal - message.s_Calendar}; + double error{message.s_Value - message.s_Trend - message.s_Seasonal - + message.s_Calendar}; const maths_t::TWeightStyleVec& weightStyles{message.s_WeightStyles}; const maths_t::TDouble4Vec& weights{message.s_Weights}; @@ -793,7 +844,8 @@ void CTimeSeriesDecompositionDetail::CCalendarTest::test(const SMessage& message } } -void CTimeSeriesDecompositionDetail::CCalendarTest::propagateForwards(core_t::TTime start, core_t::TTime end) { +void CTimeSeriesDecompositionDetail::CCalendarTest::propagateForwards(core_t::TTime start, + core_t::TTime end) { stepwisePropagateForwards(DAY, start, end, m_Test); } @@ -804,7 +856,8 @@ uint64_t CTimeSeriesDecompositionDetail::CCalendarTest::checksum(uint64_t seed) return CChecksum::calculate(seed, m_Test); } -void CTimeSeriesDecompositionDetail::CCalendarTest::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { +void CTimeSeriesDecompositionDetail::CCalendarTest::debugMemoryUsage( + core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CCalendarTest"); core::CMemoryDebug::dynamicSize("m_Test", m_Test, mem); } @@ -826,7 +879,8 @@ std::size_t CTimeSeriesDecompositionDetail::CCalendarTest::extraMemoryOnInitiali return result; } -void CTimeSeriesDecompositionDetail::CCalendarTest::apply(std::size_t symbol, const SMessage& message) { +void CTimeSeriesDecompositionDetail::CCalendarTest::apply(std::size_t symbol, + const SMessage& message) { core_t::TTime time{message.s_Time}; std::size_t old{m_Machine.state()}; @@ -834,7 +888,8 @@ void CTimeSeriesDecompositionDetail::CCalendarTest::apply(std::size_t symbol, co std::size_t state{m_Machine.state()}; if (state != old) { - LOG_TRACE(<< CC_STATES[old] << "," << CC_ALPHABET[symbol] << " -> " << CC_STATES[state]); + LOG_TRACE(<< CC_STATES[old] << "," << CC_ALPHABET[symbol] << " -> " + << CC_STATES[state]); switch (state) { case CC_TEST: @@ -874,56 +929,56 @@ int CTimeSeriesDecompositionDetail::CCalendarTest::month(core_t::TTime time) con //////// CComponents //////// -CTimeSeriesDecompositionDetail::CComponents::CComponents(double decayRate, core_t::TTime bucketLength, std::size_t seasonalComponentSize) +CTimeSeriesDecompositionDetail::CComponents::CComponents(double decayRate, + core_t::TTime bucketLength, + std::size_t seasonalComponentSize) : m_Machine{core::CStateMachine::create(SC_ALPHABET, SC_STATES, SC_TRANSITION_FUNCTION, SC_NORMAL)}, - m_DecayRate{decayRate}, - m_BucketLength{bucketLength}, - m_SeasonalComponentSize{seasonalComponentSize}, - m_CalendarComponentSize{seasonalComponentSize / 3}, - m_Trend{decayRate}, - m_UsingTrendForPrediction{false}, - m_Watcher{nullptr} { + m_DecayRate{decayRate}, m_BucketLength{bucketLength}, m_SeasonalComponentSize{seasonalComponentSize}, + m_CalendarComponentSize{seasonalComponentSize / 3}, m_Trend{decayRate}, + m_UsingTrendForPrediction{false}, m_Watcher{nullptr} { } CTimeSeriesDecompositionDetail::CComponents::CComponents(const CComponents& other) - : m_Machine{other.m_Machine}, - m_DecayRate{other.m_DecayRate}, - m_BucketLength{other.m_BucketLength}, - m_SeasonalComponentSize{other.m_SeasonalComponentSize}, - m_CalendarComponentSize{other.m_CalendarComponentSize}, - m_Trend{other.m_Trend}, + : m_Machine{other.m_Machine}, m_DecayRate{other.m_DecayRate}, + m_BucketLength{other.m_BucketLength}, m_SeasonalComponentSize{other.m_SeasonalComponentSize}, + m_CalendarComponentSize{other.m_CalendarComponentSize}, m_Trend{other.m_Trend}, m_Seasonal{other.m_Seasonal ? new SSeasonal{*other.m_Seasonal} : nullptr}, m_Calendar{other.m_Calendar ? new SCalendar{*other.m_Calendar} : nullptr}, - m_MeanVarianceScale{other.m_MeanVarianceScale}, - m_Moments{other.m_Moments}, + m_MeanVarianceScale{other.m_MeanVarianceScale}, m_Moments{other.m_Moments}, m_MomentsMinusTrend{other.m_MomentsMinusTrend}, - m_UsingTrendForPrediction{other.m_UsingTrendForPrediction}, - m_Watcher{nullptr} { + m_UsingTrendForPrediction{other.m_UsingTrendForPrediction}, m_Watcher{nullptr} { } -bool CTimeSeriesDecompositionDetail::CComponents::acceptRestoreTraverser(const SDistributionRestoreParams& params, - core::CStateRestoreTraverser& traverser) { +bool CTimeSeriesDecompositionDetail::CComponents::acceptRestoreTraverser( + const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser) { if (traverser.name() == VERSION_6_3_TAG) { while (traverser.next()) { const std::string& name{traverser.name()}; RESTORE(COMPONENTS_MACHINE_6_3_TAG, - traverser.traverseSubLevel(boost::bind(&core::CStateMachine::acceptRestoreTraverser, &m_Machine, _1))); + traverser.traverseSubLevel(boost::bind( + &core::CStateMachine::acceptRestoreTraverser, &m_Machine, _1))); RESTORE_BUILT_IN(DECAY_RATE_6_3_TAG, m_DecayRate); - RESTORE(TREND_6_3_TAG, - traverser.traverseSubLevel(boost::bind(&CTrendComponent::acceptRestoreTraverser, &m_Trend, boost::cref(params), _1))) - RESTORE_SETUP_TEARDOWN(SEASONAL_6_3_TAG, - m_Seasonal.reset(new SSeasonal), - traverser.traverseSubLevel( - boost::bind(&SSeasonal::acceptRestoreTraverser, m_Seasonal.get(), m_DecayRate, m_BucketLength, _1)), - /**/) - RESTORE_SETUP_TEARDOWN(CALENDAR_6_3_TAG, - m_Calendar.reset(new SCalendar), - traverser.traverseSubLevel( - boost::bind(&SCalendar::acceptRestoreTraverser, m_Calendar.get(), m_DecayRate, m_BucketLength, _1)), - /**/) - RESTORE(MEAN_VARIANCE_SCALE_6_3_TAG, m_MeanVarianceScale.fromDelimited(traverser.value())) + RESTORE(TREND_6_3_TAG, traverser.traverseSubLevel(boost::bind( + &CTrendComponent::acceptRestoreTraverser, + &m_Trend, boost::cref(params), _1))) + RESTORE_SETUP_TEARDOWN( + SEASONAL_6_3_TAG, m_Seasonal.reset(new SSeasonal), + traverser.traverseSubLevel( + boost::bind(&SSeasonal::acceptRestoreTraverser, + m_Seasonal.get(), m_DecayRate, m_BucketLength, _1)), + /**/) + RESTORE_SETUP_TEARDOWN( + CALENDAR_6_3_TAG, m_Calendar.reset(new SCalendar), + traverser.traverseSubLevel( + boost::bind(&SCalendar::acceptRestoreTraverser, + m_Calendar.get(), m_DecayRate, m_BucketLength, _1)), + /**/) + RESTORE(MEAN_VARIANCE_SCALE_6_3_TAG, + m_MeanVarianceScale.fromDelimited(traverser.value())) RESTORE(MOMENTS_6_3_TAG, m_Moments.fromDelimited(traverser.value())); - RESTORE(MOMENTS_MINUS_TREND_6_3_TAG, m_MomentsMinusTrend.fromDelimited(traverser.value())); + RESTORE(MOMENTS_MINUS_TREND_6_3_TAG, + m_MomentsMinusTrend.fromDelimited(traverser.value())); RESTORE_BUILT_IN(USING_TREND_FOR_PREDICTION_6_3_TAG, m_UsingTrendForPrediction) } @@ -933,22 +988,26 @@ bool CTimeSeriesDecompositionDetail::CComponents::acceptRestoreTraverser(const S do { const std::string& name{traverser.name()}; RESTORE(COMPONENTS_MACHINE_OLD_TAG, - traverser.traverseSubLevel(boost::bind(&core::CStateMachine::acceptRestoreTraverser, &m_Machine, _1))); + traverser.traverseSubLevel(boost::bind( + &core::CStateMachine::acceptRestoreTraverser, &m_Machine, _1))); + RESTORE_SETUP_TEARDOWN(TREND_OLD_TAG, + /**/, + traverser.traverseSubLevel(boost::bind( + upgradeTrendModelToVersion6p3, + m_BucketLength, boost::ref(m_Trend), _1)), + m_UsingTrendForPrediction = true) + RESTORE_SETUP_TEARDOWN( + SEASONAL_OLD_TAG, m_Seasonal.reset(new SSeasonal), + traverser.traverseSubLevel( + boost::bind(&SSeasonal::acceptRestoreTraverser, + m_Seasonal.get(), m_DecayRate, m_BucketLength, _1)), + /**/) RESTORE_SETUP_TEARDOWN( - TREND_OLD_TAG, - /**/, - traverser.traverseSubLevel(boost::bind(upgradeTrendModelToVersion6p3, m_BucketLength, boost::ref(m_Trend), _1)), - m_UsingTrendForPrediction = true) - RESTORE_SETUP_TEARDOWN(SEASONAL_OLD_TAG, - m_Seasonal.reset(new SSeasonal), - traverser.traverseSubLevel( - boost::bind(&SSeasonal::acceptRestoreTraverser, m_Seasonal.get(), m_DecayRate, m_BucketLength, _1)), - /**/) - RESTORE_SETUP_TEARDOWN(CALENDAR_OLD_TAG, - m_Calendar.reset(new SCalendar), - traverser.traverseSubLevel( - boost::bind(&SCalendar::acceptRestoreTraverser, m_Calendar.get(), m_DecayRate, m_BucketLength, _1)), - /**/) + CALENDAR_OLD_TAG, m_Calendar.reset(new SCalendar), + traverser.traverseSubLevel( + boost::bind(&SCalendar::acceptRestoreTraverser, + m_Calendar.get(), m_DecayRate, m_BucketLength, _1)), + /**/) } while (traverser.next()); m_MeanVarianceScale.add(1.0, MODEL_WEIGHT_UPGRADING_TO_VERSION_6p3); @@ -956,16 +1015,22 @@ bool CTimeSeriesDecompositionDetail::CComponents::acceptRestoreTraverser(const S return true; } -void CTimeSeriesDecompositionDetail::CComponents::acceptPersistInserter(core::CStatePersistInserter& inserter) const { +void CTimeSeriesDecompositionDetail::CComponents::acceptPersistInserter( + core::CStatePersistInserter& inserter) const { inserter.insertValue(VERSION_6_3_TAG, ""); - inserter.insertLevel(COMPONENTS_MACHINE_6_3_TAG, boost::bind(&core::CStateMachine::acceptPersistInserter, &m_Machine, _1)); + inserter.insertLevel( + COMPONENTS_MACHINE_6_3_TAG, + boost::bind(&core::CStateMachine::acceptPersistInserter, &m_Machine, _1)); inserter.insertValue(DECAY_RATE_6_3_TAG, m_DecayRate, core::CIEEE754::E_SinglePrecision); - inserter.insertLevel(TREND_6_3_TAG, boost::bind(&CTrendComponent::acceptPersistInserter, m_Trend, _1)); + inserter.insertLevel(TREND_6_3_TAG, boost::bind(&CTrendComponent::acceptPersistInserter, + m_Trend, _1)); if (m_Seasonal) { - inserter.insertLevel(SEASONAL_6_3_TAG, boost::bind(&SSeasonal::acceptPersistInserter, m_Seasonal.get(), _1)); + inserter.insertLevel(SEASONAL_6_3_TAG, boost::bind(&SSeasonal::acceptPersistInserter, + m_Seasonal.get(), _1)); } if (m_Calendar) { - inserter.insertLevel(CALENDAR_6_3_TAG, boost::bind(&SCalendar::acceptPersistInserter, m_Calendar.get(), _1)); + inserter.insertLevel(CALENDAR_6_3_TAG, boost::bind(&SCalendar::acceptPersistInserter, + m_Calendar.get(), _1)); } inserter.insertValue(MEAN_VARIANCE_SCALE_6_3_TAG, m_MeanVarianceScale.toDelimited()); inserter.insertValue(MOMENTS_6_3_TAG, m_Moments.toDelimited()); @@ -1009,7 +1074,8 @@ void CTimeSeriesDecompositionDetail::CComponents::handle(const SAddValue& messag TDoubleVec deltas; if (m_Seasonal) { - m_Seasonal->componentsErrorsAndDeltas(time, seasonalComponents, seasonalErrors, deltas); + m_Seasonal->componentsErrorsAndDeltas(time, seasonalComponents, + seasonalErrors, deltas); } if (m_Calendar) { m_Calendar->componentsAndErrors(time, calendarComponents, calendarErrors); @@ -1023,7 +1089,8 @@ void CTimeSeriesDecompositionDetail::CComponents::handle(const SAddValue& messag TDoubleVec predictions(m + n); double error; double scale; - decompose(m_Trend, seasonalComponents, calendarComponents, time, deltas, values, predictions, error, scale); + decompose(m_Trend, seasonalComponents, calendarComponents, time, deltas, + values, predictions, error, scale); core_t::TTime observedInterval{m_Trend.observedInterval()}; @@ -1052,8 +1119,9 @@ void CTimeSeriesDecompositionDetail::CComponents::handle(const SAddValue& messag double v1{CBasicStatistics::variance(m_MomentsMinusTrend)}; double df0{CBasicStatistics::count(m_Moments) - 1.0}; double df1{CBasicStatistics::count(m_MomentsMinusTrend) - m_Trend.parameters()}; - m_UsingTrendForPrediction = v1 < SIGNIFICANT_VARIANCE_REDUCTION[0] * v0 && df0 > 0.0 && df1 > 0.0 && - CStatisticalTests::leftTailFTest(v1 / v0, df1, df0) <= MAXIMUM_SIGNIFICANCE; + m_UsingTrendForPrediction = + v1 < SIGNIFICANT_VARIANCE_REDUCTION[0] * v0 && df0 > 0.0 && df1 > 0.0 && + CStatisticalTests::leftTailFTest(v1 / v0, df1, df0) <= MAXIMUM_SIGNIFICANCE; if (m_UsingTrendForPrediction) { LOG_DEBUG("Detected trend at " << time); } @@ -1090,7 +1158,8 @@ void CTimeSeriesDecompositionDetail::CComponents::handle(const SDetectedSeasonal TSeasonalComponentVec& components{m_Seasonal->s_Components}; TComponentErrorsVec& errors{m_Seasonal->s_PredictionErrors}; - if (!this->addSeasonalComponents(result, window, predictor, m_Trend, components, errors)) { + if (!this->addSeasonalComponents(result, window, predictor, m_Trend, + components, errors)) { break; } if (m_Watcher) { @@ -1101,7 +1170,8 @@ void CTimeSeriesDecompositionDetail::CComponents::handle(const SDetectedSeasonal m_UsingTrendForPrediction = true; this->clearComponentErrors(); this->apply(SC_ADDED_COMPONENTS, message); - this->mediator()->forward(SNewComponents(time, lastTime, SNewComponents::E_GeneralSeasonal)); + this->mediator()->forward( + SNewComponents(time, lastTime, SNewComponents::E_GeneralSeasonal)); break; } case SC_DISABLED: @@ -1138,7 +1208,8 @@ void CTimeSeriesDecompositionDetail::CComponents::handle(const SDetectedCalendar this->addCalendarComponent(feature, time, components, errors); this->apply(SC_ADDED_COMPONENTS, message); - this->mediator()->forward(SNewComponents(time, lastTime, SNewComponents::E_CalendarCyclic)); + this->mediator()->forward( + SNewComponents(time, lastTime, SNewComponents::E_CalendarCyclic)); break; } case SC_DISABLED: @@ -1154,7 +1225,9 @@ void CTimeSeriesDecompositionDetail::CComponents::useTrendForPrediction(void) { m_UsingTrendForPrediction = true; } -void CTimeSeriesDecompositionDetail::CComponents::shiftLevel(core_t::TTime time, double value, double shift) { +void CTimeSeriesDecompositionDetail::CComponents::shiftLevel(core_t::TTime time, + double value, + double shift) { m_Trend.shiftLevel(time, value, shift); } @@ -1230,7 +1303,8 @@ double CTimeSeriesDecompositionDetail::CComponents::decayRate() const { return m_DecayRate; } -void CTimeSeriesDecompositionDetail::CComponents::propagateForwards(core_t::TTime start, core_t::TTime end) { +void CTimeSeriesDecompositionDetail::CComponents::propagateForwards(core_t::TTime start, + core_t::TTime end) { m_Trend.propagateForwardsByTime(end - start); if (m_Seasonal) { m_Seasonal->propagateForwards(start, end); @@ -1238,7 +1312,8 @@ void CTimeSeriesDecompositionDetail::CComponents::propagateForwards(core_t::TTim if (m_Calendar) { m_Calendar->propagateForwards(start, end); } - double factor{std::exp(-m_DecayRate * static_cast(end - start) / static_cast(DAY))}; + double factor{std::exp(-m_DecayRate * static_cast(end - start) / + static_cast(DAY))}; m_MeanVarianceScale.age(factor); m_Moments.age(factor); m_MomentsMinusTrend.age(factor); @@ -1247,8 +1322,10 @@ void CTimeSeriesDecompositionDetail::CComponents::propagateForwards(core_t::TTim bool CTimeSeriesDecompositionDetail::CComponents::initialized() const { return m_UsingTrendForPrediction && m_Trend.initialized() ? true - : (m_Seasonal && m_Calendar ? m_Seasonal->initialized() || m_Calendar->initialized() - : (m_Seasonal ? m_Seasonal->initialized() : (m_Calendar ? m_Calendar->initialized() : false))); + : (m_Seasonal && m_Calendar + ? m_Seasonal->initialized() || m_Calendar->initialized() + : (m_Seasonal ? m_Seasonal->initialized() + : (m_Calendar ? m_Calendar->initialized() : false))); } const CTrendComponent& CTimeSeriesDecompositionDetail::CComponents::trend() const { @@ -1259,7 +1336,8 @@ const TSeasonalComponentVec& CTimeSeriesDecompositionDetail::CComponents::season return m_Seasonal ? m_Seasonal->s_Components : NO_SEASONAL_COMPONENTS; } -const maths_t::TCalendarComponentVec& CTimeSeriesDecompositionDetail::CComponents::calendar() const { +const maths_t::TCalendarComponentVec& +CTimeSeriesDecompositionDetail::CComponents::calendar() const { return m_Calendar ? m_Calendar->s_Components : NO_CALENDAR_COMPONENTS; } @@ -1267,7 +1345,8 @@ bool CTimeSeriesDecompositionDetail::CComponents::usingTrendForPrediction() cons return m_UsingTrendForPrediction; } -CPeriodicityHypothesisTestsConfig CTimeSeriesDecompositionDetail::CComponents::periodicityTestConfig() const { +CPeriodicityHypothesisTestsConfig +CTimeSeriesDecompositionDetail::CComponents::periodicityTestConfig() const { CPeriodicityHypothesisTestsConfig result; for (const auto& component : this->seasonal()) { const CSeasonalTime& time{component.time()}; @@ -1282,15 +1361,21 @@ CPeriodicityHypothesisTestsConfig CTimeSeriesDecompositionDetail::CComponents::p } double CTimeSeriesDecompositionDetail::CComponents::meanValue(core_t::TTime time) const { - return this->initialized() ? ((m_UsingTrendForPrediction ? CBasicStatistics::mean(m_Trend.value(time, 0.0)) : 0.0) + - meanOf(&CSeasonalComponent::meanValue, this->seasonal())) - : 0.0; + return this->initialized() + ? ((m_UsingTrendForPrediction + ? CBasicStatistics::mean(m_Trend.value(time, 0.0)) + : 0.0) + + meanOf(&CSeasonalComponent::meanValue, this->seasonal())) + : 0.0; } double CTimeSeriesDecompositionDetail::CComponents::meanVariance() const { - return this->initialized() ? ((m_UsingTrendForPrediction ? CBasicStatistics::mean(this->trend().variance(0.0)) : 0.0) + - meanOf(&CSeasonalComponent::meanVariance, this->seasonal())) - : 0.0; + return this->initialized() + ? ((m_UsingTrendForPrediction + ? CBasicStatistics::mean(this->trend().variance(0.0)) + : 0.0) + + meanOf(&CSeasonalComponent::meanVariance, this->seasonal())) + : 0.0; } double CTimeSeriesDecompositionDetail::CComponents::meanVarianceScale() const { @@ -1320,7 +1405,8 @@ void CTimeSeriesDecompositionDetail::CComponents::debugMemoryUsage(core::CMemory } std::size_t CTimeSeriesDecompositionDetail::CComponents::memoryUsage() const { - return core::CMemory::dynamicSize(m_Trend) + core::CMemory::dynamicSize(m_Seasonal) + core::CMemory::dynamicSize(m_Calendar); + return core::CMemory::dynamicSize(m_Trend) + core::CMemory::dynamicSize(m_Seasonal) + + core::CMemory::dynamicSize(m_Calendar); } std::size_t CTimeSeriesDecompositionDetail::CComponents::size() const { @@ -1331,12 +1417,13 @@ std::size_t CTimeSeriesDecompositionDetail::CComponents::maxSize() const { return MAXIMUM_COMPONENTS * m_SeasonalComponentSize; } -bool CTimeSeriesDecompositionDetail::CComponents::addSeasonalComponents(const CPeriodicityHypothesisTestsResult& result, - const CExpandingWindow& window, - const TPredictor& predictor, - CTrendComponent& trend, - TSeasonalComponentVec& components, - TComponentErrorsVec& errors) const { +bool CTimeSeriesDecompositionDetail::CComponents::addSeasonalComponents( + const CPeriodicityHypothesisTestsResult& result, + const CExpandingWindow& window, + const TPredictor& predictor, + CTrendComponent& trend, + TSeasonalComponentVec& components, + TComponentErrorsVec& errors) const { using TSeasonalTimePtr = boost::shared_ptr; using TSeasonalTimePtrVec = std::vector; @@ -1344,9 +1431,10 @@ bool CTimeSeriesDecompositionDetail::CComponents::addSeasonalComponents(const CP for (const auto& candidate_ : result.components()) { TSeasonalTimePtr seasonalTime(candidate_.seasonalTime()); - if (std::find_if(components.begin(), components.end(), [&seasonalTime](const CSeasonalComponent& component) { - return component.time().excludes(*seasonalTime); - }) == components.end()) { + if (std::find_if(components.begin(), components.end(), + [&seasonalTime](const CSeasonalComponent& component) { + return component.time().excludes(*seasonalTime); + }) == components.end()) { LOG_DEBUG(<< "Detected '" << candidate_.s_Description << "'"); newSeasonalTimes.push_back(seasonalTime); } @@ -1355,21 +1443,25 @@ bool CTimeSeriesDecompositionDetail::CComponents::addSeasonalComponents(const CP if (newSeasonalTimes.size() > 0) { for (const auto& seasonalTime : newSeasonalTimes) { components.erase( - std::remove_if(components.begin(), - components.end(), - [&seasonalTime](const CSeasonalComponent& component) { return seasonalTime->excludes(component.time()); }), + std::remove_if(components.begin(), components.end(), + [&seasonalTime](const CSeasonalComponent& component) { + return seasonalTime->excludes(component.time()); + }), components.end()); } - std::sort(newSeasonalTimes.begin(), newSeasonalTimes.end(), maths::COrderings::SLess()); + std::sort(newSeasonalTimes.begin(), newSeasonalTimes.end(), + maths::COrderings::SLess()); TFloatMeanAccumulatorVec values; for (const auto& seasonalTime : newSeasonalTimes) { values = window.valuesMinusPrediction(predictor); - components.emplace_back( - *seasonalTime, m_SeasonalComponentSize, m_DecayRate, static_cast(m_BucketLength), CSplineTypes::E_Natural); + components.emplace_back(*seasonalTime, m_SeasonalComponentSize, + m_DecayRate, static_cast(m_BucketLength), + CSplineTypes::E_Natural); components.back().initialize(window.startTime(), window.endTime(), values); - components.back().interpolate(CIntegerTools::floor(window.endTime(), seasonalTime->period())); + components.back().interpolate( + CIntegerTools::floor(window.endTime(), seasonalTime->period())); } CTrendComponent windowTrend{trend.defaultDecayRate()}; @@ -1380,7 +1472,8 @@ bool CTimeSeriesDecompositionDetail::CComponents::addSeasonalComponents(const CP // we can get a run of unset values at the end of the window, // we should just ignore these. if (CBasicStatistics::count(value) > 0.0) { - windowTrend.add(time, CBasicStatistics::mean(value), CBasicStatistics::count(value)); + windowTrend.add(time, CBasicStatistics::mean(value), + CBasicStatistics::count(value)); windowTrend.propagateForwardsByTime(window.bucketLength()); } time += window.bucketLength(); @@ -1389,18 +1482,23 @@ bool CTimeSeriesDecompositionDetail::CComponents::addSeasonalComponents(const CP errors.resize(components.size()); COrderings::simultaneousSort( - components, errors, [](const CSeasonalComponent& lhs, const CSeasonalComponent& rhs) { return lhs.time() < rhs.time(); }); + components, errors, + [](const CSeasonalComponent& lhs, const CSeasonalComponent& rhs) { + return lhs.time() < rhs.time(); + }); } return newSeasonalTimes.size() > 0; } -bool CTimeSeriesDecompositionDetail::CComponents::addCalendarComponent(const CCalendarFeature& feature, - core_t::TTime time, - maths_t::TCalendarComponentVec& components, - TComponentErrorsVec& errors) const { +bool CTimeSeriesDecompositionDetail::CComponents::addCalendarComponent( + const CCalendarFeature& feature, + core_t::TTime time, + maths_t::TCalendarComponentVec& components, + TComponentErrorsVec& errors) const { double bucketLength{static_cast(m_BucketLength)}; - components.emplace_back(feature, m_CalendarComponentSize, m_DecayRate, bucketLength, CSplineTypes::E_Natural); + components.emplace_back(feature, m_CalendarComponentSize, m_DecayRate, + bucketLength, CSplineTypes::E_Natural); components.back().initialize(); errors.resize(components.size()); LOG_DEBUG(<< "Detected feature '" << feature.print() << "' at " << time); @@ -1420,7 +1518,8 @@ void CTimeSeriesDecompositionDetail::CComponents::clearComponentErrors() { } } -void CTimeSeriesDecompositionDetail::CComponents::apply(std::size_t symbol, const SMessage& message) { +void CTimeSeriesDecompositionDetail::CComponents::apply(std::size_t symbol, + const SMessage& message) { if (symbol == SC_RESET) { m_Trend.clear(); m_Seasonal.reset(); @@ -1432,7 +1531,8 @@ void CTimeSeriesDecompositionDetail::CComponents::apply(std::size_t symbol, cons std::size_t state{m_Machine.state()}; if (state != old) { - LOG_TRACE(<< SC_STATES[old] << "," << SC_ALPHABET[symbol] << " -> " << SC_STATES[state]); + LOG_TRACE(<< SC_STATES[old] << "," << SC_ALPHABET[symbol] << " -> " + << SC_STATES[state]); switch (state) { case SC_NORMAL: @@ -1452,8 +1552,10 @@ void CTimeSeriesDecompositionDetail::CComponents::apply(std::size_t symbol, cons } } -bool CTimeSeriesDecompositionDetail::CComponents::shouldInterpolate(core_t::TTime time, core_t::TTime last) { - return m_Machine.state() == SC_NEW_COMPONENTS || (m_Seasonal && m_Seasonal->shouldInterpolate(time, last)) || +bool CTimeSeriesDecompositionDetail::CComponents::shouldInterpolate(core_t::TTime time, + core_t::TTime last) { + return m_Machine.state() == SC_NEW_COMPONENTS || + (m_Seasonal && m_Seasonal->shouldInterpolate(time, last)) || (m_Calendar && m_Calendar->shouldInterpolate(time, last)); } @@ -1528,11 +1630,13 @@ bool CTimeSeriesDecompositionDetail::CComponents::CComponentErrors::fromDelimite } std::string CTimeSeriesDecompositionDetail::CComponents::CComponentErrors::toDelimited() const { - return m_MeanErrorWithComponent.toDelimited() + CBasicStatistics::EXTERNAL_DELIMITER + m_MeanErrorWithoutComponent.toDelimited() + - CBasicStatistics::EXTERNAL_DELIMITER; + return m_MeanErrorWithComponent.toDelimited() + CBasicStatistics::EXTERNAL_DELIMITER + + m_MeanErrorWithoutComponent.toDelimited() + CBasicStatistics::EXTERNAL_DELIMITER; } -void CTimeSeriesDecompositionDetail::CComponents::CComponentErrors::add(double error, double prediction, double weight) { +void CTimeSeriesDecompositionDetail::CComponents::CComponentErrors::add(double error, + double prediction, + double weight) { double errorWithComponent{winsorise(pow2(error), m_MeanErrorWithComponent)}; double errorWithoutComponent{winsorise(pow2(error - prediction), m_MeanErrorWithoutComponent)}; m_MeanErrorWithComponent.add(errorWithComponent, weight); @@ -1544,20 +1648,26 @@ void CTimeSeriesDecompositionDetail::CComponents::CComponentErrors::clear() { m_MeanErrorWithoutComponent = TFloatMeanAccumulator(); } -bool CTimeSeriesDecompositionDetail::CComponents::CComponentErrors::remove(core_t::TTime bucketLength, CSeasonalComponent& seasonal) const { +bool CTimeSeriesDecompositionDetail::CComponents::CComponentErrors::remove( + core_t::TTime bucketLength, + CSeasonalComponent& seasonal) const { double count{CBasicStatistics::count(m_MeanErrorWithComponent)}; double errorWithComponent{CBasicStatistics::mean(m_MeanErrorWithComponent)}; double errorWithoutComponent{CBasicStatistics::mean(m_MeanErrorWithoutComponent)}; return count > static_cast(10 * seasonal.time().period() / bucketLength) && - std::max(errorWithoutComponent / errorWithComponent, seasonal.heteroscedasticity()) < 1.5; + std::max(errorWithoutComponent / errorWithComponent, + seasonal.heteroscedasticity()) < 1.5; } -bool CTimeSeriesDecompositionDetail::CComponents::CComponentErrors::remove(core_t::TTime bucketLength, CCalendarComponent& calendar) const { +bool CTimeSeriesDecompositionDetail::CComponents::CComponentErrors::remove( + core_t::TTime bucketLength, + CCalendarComponent& calendar) const { double count{CBasicStatistics::count(m_MeanErrorWithComponent)}; double errorWithComponent{CBasicStatistics::mean(m_MeanErrorWithComponent)}; double errorWithoutComponent{CBasicStatistics::mean(m_MeanErrorWithoutComponent)}; return count > static_cast(5 * calendar.feature().window() / bucketLength) && - std::max(errorWithoutComponent / errorWithComponent, calendar.heteroscedasticity()) < 1.5; + std::max(errorWithoutComponent / errorWithComponent, + calendar.heteroscedasticity()) < 1.5; } void CTimeSeriesDecompositionDetail::CComponents::CComponentErrors::age(double factor) { @@ -1570,35 +1680,46 @@ uint64_t CTimeSeriesDecompositionDetail::CComponents::CComponentErrors::checksum return CChecksum::calculate(seed, m_MeanErrorWithoutComponent); } -double CTimeSeriesDecompositionDetail::CComponents::CComponentErrors::winsorise(double squareError, const TFloatMeanAccumulator& variance) { - return CBasicStatistics::count(variance) > 10.0 ? std::min(squareError, 36.0 * CBasicStatistics::mean(variance)) : squareError; +double CTimeSeriesDecompositionDetail::CComponents::CComponentErrors::winsorise( + double squareError, + const TFloatMeanAccumulator& variance) { + return CBasicStatistics::count(variance) > 10.0 + ? std::min(squareError, 36.0 * CBasicStatistics::mean(variance)) + : squareError; } -bool CTimeSeriesDecompositionDetail::CComponents::SSeasonal::acceptRestoreTraverser(double decayRate, - core_t::TTime bucketLength_, - core::CStateRestoreTraverser& traverser) { +bool CTimeSeriesDecompositionDetail::CComponents::SSeasonal::acceptRestoreTraverser( + double decayRate, + core_t::TTime bucketLength_, + core::CStateRestoreTraverser& traverser) { double bucketLength{static_cast(bucketLength_)}; if (traverser.name() == VERSION_6_3_TAG) { while (traverser.next()) { const std::string& name{traverser.name()}; - RESTORE_NO_ERROR(COMPONENT_6_3_TAG, s_Components.emplace_back(decayRate, bucketLength, traverser)) - RESTORE(ERRORS_6_3_TAG, core::CPersistUtils::restore(ERRORS_6_3_TAG, s_PredictionErrors, traverser)) + RESTORE_NO_ERROR(COMPONENT_6_3_TAG, + s_Components.emplace_back(decayRate, bucketLength, traverser)) + RESTORE(ERRORS_6_3_TAG, + core::CPersistUtils::restore(ERRORS_6_3_TAG, s_PredictionErrors, traverser)) } } else { // There is no version string this is historic state. do { const std::string& name{traverser.name()}; - RESTORE_NO_ERROR(COMPONENT_OLD_TAG, s_Components.emplace_back(decayRate, bucketLength, traverser)) - RESTORE(ERRORS_OLD_TAG, core::CPersistUtils::restore(ERRORS_OLD_TAG, s_PredictionErrors, traverser)) + RESTORE_NO_ERROR(COMPONENT_OLD_TAG, + s_Components.emplace_back(decayRate, bucketLength, traverser)) + RESTORE(ERRORS_OLD_TAG, + core::CPersistUtils::restore(ERRORS_OLD_TAG, s_PredictionErrors, traverser)) } while (traverser.next()); } return true; } -void CTimeSeriesDecompositionDetail::CComponents::SSeasonal::acceptPersistInserter(core::CStatePersistInserter& inserter) const { +void CTimeSeriesDecompositionDetail::CComponents::SSeasonal::acceptPersistInserter( + core::CStatePersistInserter& inserter) const { inserter.insertValue(VERSION_6_3_TAG, ""); for (const auto& component : s_Components) { - inserter.insertLevel(COMPONENT_6_3_TAG, boost::bind(&CSeasonalComponent::acceptPersistInserter, &component, _1)); + inserter.insertLevel(COMPONENT_6_3_TAG, boost::bind(&CSeasonalComponent::acceptPersistInserter, + &component, _1)); } core::CPersistUtils::persist(ERRORS_6_3_TAG, s_PredictionErrors, inserter); } @@ -1609,13 +1730,15 @@ void CTimeSeriesDecompositionDetail::CComponents::SSeasonal::decayRate(double de } } -void CTimeSeriesDecompositionDetail::CComponents::SSeasonal::propagateForwards(core_t::TTime start, core_t::TTime end) { +void CTimeSeriesDecompositionDetail::CComponents::SSeasonal::propagateForwards(core_t::TTime start, + core_t::TTime end) { for (std::size_t i = 0u; i < s_Components.size(); ++i) { core_t::TTime period{s_Components[i].time().period()}; core_t::TTime a{CIntegerTools::floor(start, period)}; core_t::TTime b{CIntegerTools::floor(end, period)}; if (b > a) { - double time{static_cast(b - a) / static_cast(CTools::truncate(period, DAY, WEEK))}; + double time{static_cast(b - a) / + static_cast(CTools::truncate(period, DAY, WEEK))}; s_Components[i].propagateForwardsByTime(time); s_PredictionErrors[i].age(std::exp(-s_Components[i].decayRate() * time)); } @@ -1630,10 +1753,11 @@ std::size_t CTimeSeriesDecompositionDetail::CComponents::SSeasonal::size() const return result; } -void CTimeSeriesDecompositionDetail::CComponents::SSeasonal::componentsErrorsAndDeltas(core_t::TTime time, - TSeasonalComponentPtrVec& components, - TComponentErrorsPtrVec& errors, - TDoubleVec& deltas) { +void CTimeSeriesDecompositionDetail::CComponents::SSeasonal::componentsErrorsAndDeltas( + core_t::TTime time, + TSeasonalComponentPtrVec& components, + TComponentErrorsPtrVec& errors, + TDoubleVec& deltas) { std::size_t n{s_Components.size()}; components.reserve(n); @@ -1662,7 +1786,9 @@ void CTimeSeriesDecompositionDetail::CComponents::SSeasonal::componentsErrorsAnd } } -bool CTimeSeriesDecompositionDetail::CComponents::SSeasonal::shouldInterpolate(core_t::TTime time, core_t::TTime last) const { +bool CTimeSeriesDecompositionDetail::CComponents::SSeasonal::shouldInterpolate( + core_t::TTime time, + core_t::TTime last) const { for (const auto& component : s_Components) { core_t::TTime period{component.time().period()}; core_t::TTime a{CIntegerTools::floor(last, period)}; @@ -1674,7 +1800,9 @@ bool CTimeSeriesDecompositionDetail::CComponents::SSeasonal::shouldInterpolate(c return false; } -void CTimeSeriesDecompositionDetail::CComponents::SSeasonal::interpolate(core_t::TTime time, core_t::TTime last, bool refine) { +void CTimeSeriesDecompositionDetail::CComponents::SSeasonal::interpolate(core_t::TTime time, + core_t::TTime last, + bool refine) { for (auto& component : s_Components) { core_t::TTime period{component.time().period()}; core_t::TTime a{CIntegerTools::floor(last, period)}; @@ -1694,7 +1822,8 @@ bool CTimeSeriesDecompositionDetail::CComponents::SSeasonal::initialized() const return false; } -bool CTimeSeriesDecompositionDetail::CComponents::SSeasonal::prune(core_t::TTime time, core_t::TTime bucketLength) { +bool CTimeSeriesDecompositionDetail::CComponents::SSeasonal::prune(core_t::TTime time, + core_t::TTime bucketLength) { std::size_t n = s_Components.size(); if (n > 1) { @@ -1724,7 +1853,8 @@ bool CTimeSeriesDecompositionDetail::CComponents::SSeasonal::prune(core_t::TTime } } - CSetTools::simultaneousRemoveIf(remove, s_Components, s_PredictionErrors, [](bool remove_) { return remove_; }); + CSetTools::simultaneousRemoveIf(remove, s_Components, s_PredictionErrors, + [](bool remove_) { return remove_; }); for (auto& shift : shifts) { if (windowed.count(shift.first) > 0) { @@ -1749,7 +1879,8 @@ bool CTimeSeriesDecompositionDetail::CComponents::SSeasonal::prune(core_t::TTime for (auto& component : s_Components) { const CSeasonalTime& time_ = component.time(); if (std::find_if(shifted.begin(), shifted.end(), [&time_](const TTimeTimePr& window) { - return !(time_.windowEnd() <= window.first || time_.windowStart() >= window.second); + return !(time_.windowEnd() <= window.first || + time_.windowStart() >= window.second); }) == shifted.end()) { component.shiftLevel(shift.second); } @@ -1768,7 +1899,8 @@ void CTimeSeriesDecompositionDetail::CComponents::SSeasonal::shiftOrigin(core_t: } } -void CTimeSeriesDecompositionDetail::CComponents::SSeasonal::linearScale(core_t::TTime time, double scale) { +void CTimeSeriesDecompositionDetail::CComponents::SSeasonal::linearScale(core_t::TTime time, + double scale) { for (auto& component : s_Components) { component.linearScale(time, scale); } @@ -1779,41 +1911,50 @@ uint64_t CTimeSeriesDecompositionDetail::CComponents::SSeasonal::checksum(uint64 return CChecksum::calculate(seed, s_PredictionErrors); } -void CTimeSeriesDecompositionDetail::CComponents::SSeasonal::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { +void CTimeSeriesDecompositionDetail::CComponents::SSeasonal::debugMemoryUsage( + core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("SSeasonal"); core::CMemoryDebug::dynamicSize("s_Components", s_Components, mem); core::CMemoryDebug::dynamicSize("s_PredictionErrors", s_PredictionErrors, mem); } std::size_t CTimeSeriesDecompositionDetail::CComponents::SSeasonal::memoryUsage() const { - return core::CMemory::dynamicSize(s_Components) + core::CMemory::dynamicSize(s_PredictionErrors); + return core::CMemory::dynamicSize(s_Components) + + core::CMemory::dynamicSize(s_PredictionErrors); } -bool CTimeSeriesDecompositionDetail::CComponents::SCalendar::acceptRestoreTraverser(double decayRate, - core_t::TTime bucketLength_, - core::CStateRestoreTraverser& traverser) { +bool CTimeSeriesDecompositionDetail::CComponents::SCalendar::acceptRestoreTraverser( + double decayRate, + core_t::TTime bucketLength_, + core::CStateRestoreTraverser& traverser) { double bucketLength{static_cast(bucketLength_)}; if (traverser.name() == VERSION_6_3_TAG) { while (traverser.next()) { const std::string& name{traverser.name()}; - RESTORE_NO_ERROR(COMPONENT_6_3_TAG, s_Components.emplace_back(decayRate, bucketLength, traverser)) - RESTORE(ERRORS_6_3_TAG, core::CPersistUtils::restore(ERRORS_6_3_TAG, s_PredictionErrors, traverser)) + RESTORE_NO_ERROR(COMPONENT_6_3_TAG, + s_Components.emplace_back(decayRate, bucketLength, traverser)) + RESTORE(ERRORS_6_3_TAG, + core::CPersistUtils::restore(ERRORS_6_3_TAG, s_PredictionErrors, traverser)) } } else { // There is no version string this is historic state. do { const std::string& name{traverser.name()}; - RESTORE_NO_ERROR(COMPONENT_OLD_TAG, s_Components.emplace_back(decayRate, bucketLength, traverser)) - RESTORE(ERRORS_OLD_TAG, core::CPersistUtils::restore(ERRORS_OLD_TAG, s_PredictionErrors, traverser)) + RESTORE_NO_ERROR(COMPONENT_OLD_TAG, + s_Components.emplace_back(decayRate, bucketLength, traverser)) + RESTORE(ERRORS_OLD_TAG, + core::CPersistUtils::restore(ERRORS_OLD_TAG, s_PredictionErrors, traverser)) } while (traverser.next()); } return true; } -void CTimeSeriesDecompositionDetail::CComponents::SCalendar::acceptPersistInserter(core::CStatePersistInserter& inserter) const { +void CTimeSeriesDecompositionDetail::CComponents::SCalendar::acceptPersistInserter( + core::CStatePersistInserter& inserter) const { inserter.insertValue(VERSION_6_3_TAG, ""); for (const auto& component : s_Components) { - inserter.insertLevel(COMPONENT_6_3_TAG, boost::bind(&CCalendarComponent::acceptPersistInserter, &component, _1)); + inserter.insertLevel(COMPONENT_6_3_TAG, boost::bind(&CCalendarComponent::acceptPersistInserter, + &component, _1)); } core::CPersistUtils::persist(ERRORS_6_3_TAG, s_PredictionErrors, inserter); } @@ -1824,7 +1965,8 @@ void CTimeSeriesDecompositionDetail::CComponents::SCalendar::decayRate(double de } } -void CTimeSeriesDecompositionDetail::CComponents::SCalendar::propagateForwards(core_t::TTime start, core_t::TTime end) { +void CTimeSeriesDecompositionDetail::CComponents::SCalendar::propagateForwards(core_t::TTime start, + core_t::TTime end) { for (std::size_t i = 0u; i < s_Components.size(); ++i) { core_t::TTime a{CIntegerTools::floor(start, MONTH)}; core_t::TTime b{CIntegerTools::floor(end, MONTH)}; @@ -1853,9 +1995,10 @@ bool CTimeSeriesDecompositionDetail::CComponents::SCalendar::haveComponent(CCale return false; } -void CTimeSeriesDecompositionDetail::CComponents::SCalendar::componentsAndErrors(core_t::TTime time, - TCalendarComponentPtrVec& components, - TComponentErrorsPtrVec& errors) { +void CTimeSeriesDecompositionDetail::CComponents::SCalendar::componentsAndErrors( + core_t::TTime time, + TCalendarComponentPtrVec& components, + TComponentErrorsPtrVec& errors) { std::size_t n = s_Components.size(); components.reserve(n); errors.reserve(n); @@ -1867,7 +2010,9 @@ void CTimeSeriesDecompositionDetail::CComponents::SCalendar::componentsAndErrors } } -bool CTimeSeriesDecompositionDetail::CComponents::SCalendar::shouldInterpolate(core_t::TTime time, core_t::TTime last) const { +bool CTimeSeriesDecompositionDetail::CComponents::SCalendar::shouldInterpolate( + core_t::TTime time, + core_t::TTime last) const { for (const auto& component : s_Components) { CCalendarFeature feature = component.feature(); if (!feature.inWindow(time) && feature.inWindow(last)) { @@ -1877,7 +2022,9 @@ bool CTimeSeriesDecompositionDetail::CComponents::SCalendar::shouldInterpolate(c return false; } -void CTimeSeriesDecompositionDetail::CComponents::SCalendar::interpolate(core_t::TTime time, core_t::TTime last, bool refine) { +void CTimeSeriesDecompositionDetail::CComponents::SCalendar::interpolate(core_t::TTime time, + core_t::TTime last, + bool refine) { for (auto& component : s_Components) { CCalendarFeature feature = component.feature(); if (!feature.inWindow(time) && feature.inWindow(last)) { @@ -1895,7 +2042,8 @@ bool CTimeSeriesDecompositionDetail::CComponents::SCalendar::initialized() const return false; } -bool CTimeSeriesDecompositionDetail::CComponents::SCalendar::prune(core_t::TTime time, core_t::TTime bucketLength) { +bool CTimeSeriesDecompositionDetail::CComponents::SCalendar::prune(core_t::TTime time, + core_t::TTime bucketLength) { TBoolVec remove(s_Components.size(), false); for (std::size_t i = 0u; i < s_Components.size(); ++i) { if (s_PredictionErrors[i].remove(bucketLength, s_Components[i])) { @@ -1905,12 +2053,14 @@ bool CTimeSeriesDecompositionDetail::CComponents::SCalendar::prune(core_t::TTime } } - CSetTools::simultaneousRemoveIf(remove, s_Components, s_PredictionErrors, [](bool remove_) { return remove_; }); + CSetTools::simultaneousRemoveIf(remove, s_Components, s_PredictionErrors, + [](bool remove_) { return remove_; }); return s_Components.empty(); } -void CTimeSeriesDecompositionDetail::CComponents::SCalendar::linearScale(core_t::TTime time, double scale) { +void CTimeSeriesDecompositionDetail::CComponents::SCalendar::linearScale(core_t::TTime time, + double scale) { for (auto& component : s_Components) { component.linearScale(time, scale); } @@ -1921,14 +2071,16 @@ uint64_t CTimeSeriesDecompositionDetail::CComponents::SCalendar::checksum(uint64 return CChecksum::calculate(seed, s_PredictionErrors); } -void CTimeSeriesDecompositionDetail::CComponents::SCalendar::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { +void CTimeSeriesDecompositionDetail::CComponents::SCalendar::debugMemoryUsage( + core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("SCalendar"); core::CMemoryDebug::dynamicSize("s_Components", s_Components, mem); core::CMemoryDebug::dynamicSize("s_PredictionErrors", s_PredictionErrors, mem); } std::size_t CTimeSeriesDecompositionDetail::CComponents::SCalendar::memoryUsage() const { - return core::CMemory::dynamicSize(s_Components) + core::CMemory::dynamicSize(s_PredictionErrors); + return core::CMemory::dynamicSize(s_Components) + + core::CMemory::dynamicSize(s_PredictionErrors); } } } diff --git a/lib/maths/CTimeSeriesDecompositionStateSerialiser.cc b/lib/maths/CTimeSeriesDecompositionStateSerialiser.cc index 4f49c74939..8bfe1a0c64 100644 --- a/lib/maths/CTimeSeriesDecompositionStateSerialiser.cc +++ b/lib/maths/CTimeSeriesDecompositionStateSerialiser.cc @@ -35,7 +35,9 @@ const std::string EMPTY_STRING; } bool CTimeSeriesDecompositionStateSerialiser:: -operator()(const STimeSeriesDecompositionRestoreParams& params, TDecompositionPtr& result, core::CStateRestoreTraverser& traverser) const { +operator()(const STimeSeriesDecompositionRestoreParams& params, + TDecompositionPtr& result, + core::CStateRestoreTraverser& traverser) const { std::size_t numResults = 0; do { @@ -61,17 +63,19 @@ operator()(const STimeSeriesDecompositionRestoreParams& params, TDecompositionPt return true; } -void CTimeSeriesDecompositionStateSerialiser::operator()(const CTimeSeriesDecompositionInterface& decomposition, - core::CStatePersistInserter& inserter) const { +void CTimeSeriesDecompositionStateSerialiser:: +operator()(const CTimeSeriesDecompositionInterface& decomposition, + core::CStatePersistInserter& inserter) const { if (dynamic_cast(&decomposition) != nullptr) { - inserter.insertLevel(TIME_SERIES_DECOMPOSITION_TAG, - boost::bind(&CTimeSeriesDecomposition::acceptPersistInserter, - dynamic_cast(&decomposition), - _1)); + inserter.insertLevel( + TIME_SERIES_DECOMPOSITION_TAG, + boost::bind(&CTimeSeriesDecomposition::acceptPersistInserter, + dynamic_cast(&decomposition), _1)); } else if (dynamic_cast(&decomposition) != nullptr) { inserter.insertValue(TIME_SERIES_DECOMPOSITION_STUB_TAG, ""); } else { - LOG_ERROR(<< "Decomposition with type '" << typeid(decomposition).name() << "' has no defined name"); + LOG_ERROR(<< "Decomposition with type '" << typeid(decomposition).name() + << "' has no defined name"); } } } diff --git a/lib/maths/CTimeSeriesDecompositionStub.cc b/lib/maths/CTimeSeriesDecompositionStub.cc index 5f66299842..0a1f74a660 100644 --- a/lib/maths/CTimeSeriesDecompositionStub.cc +++ b/lib/maths/CTimeSeriesDecompositionStub.cc @@ -39,7 +39,9 @@ bool CTimeSeriesDecompositionStub::addPoint(core_t::TTime /*time*/, return false; } -bool CTimeSeriesDecompositionStub::applyChange(core_t::TTime /*time*/, double /*value*/, const SChangeDescription& /*change*/) { +bool CTimeSeriesDecompositionStub::applyChange(core_t::TTime /*time*/, + double /*value*/, + const SChangeDescription& /*change*/) { return false; } @@ -50,8 +52,10 @@ double CTimeSeriesDecompositionStub::meanValue(core_t::TTime /*time*/) const { return 0.0; } -maths_t::TDoubleDoublePr -CTimeSeriesDecompositionStub::value(core_t::TTime /*time*/, double /*confidence*/, int /*components*/, bool /*smooth*/) const { +maths_t::TDoubleDoublePr CTimeSeriesDecompositionStub::value(core_t::TTime /*time*/, + double /*confidence*/, + int /*components*/, + bool /*smooth*/) const { return {0.0, 0.0}; } @@ -63,7 +67,10 @@ void CTimeSeriesDecompositionStub::forecast(core_t::TTime /*startTime*/, const TWriteForecastResult& /*writer*/) { } -double CTimeSeriesDecompositionStub::detrend(core_t::TTime /*time*/, double value, double /*confidence*/, int /*components*/) const { +double CTimeSeriesDecompositionStub::detrend(core_t::TTime /*time*/, + double value, + double /*confidence*/, + int /*components*/) const { return value; } @@ -71,8 +78,10 @@ double CTimeSeriesDecompositionStub::meanVariance() const { return 0.0; } -maths_t::TDoubleDoublePr -CTimeSeriesDecompositionStub::scale(core_t::TTime /*time*/, double /*variance*/, double /*confidence*/, bool /*smooth*/) const { +maths_t::TDoubleDoublePr CTimeSeriesDecompositionStub::scale(core_t::TTime /*time*/, + double /*variance*/, + double /*confidence*/, + bool /*smooth*/) const { return {1.0, 1.0}; } diff --git a/lib/maths/CTimeSeriesModel.cc b/lib/maths/CTimeSeriesModel.cc index f9c6155f4e..385d2cbbc6 100644 --- a/lib/maths/CTimeSeriesModel.cc +++ b/lib/maths/CTimeSeriesModel.cc @@ -64,7 +64,11 @@ using TChangeDetectorPtr = boost::shared_ptr::epsilon())}; +const double MINUS_LOG_TOLERANCE{ + -std::log(1.0 - 100.0 * std::numeric_limits::epsilon())}; //! Derate the minimum Winsorisation weight. double deratedMinimumWinsorisationWeight(double derate) { derate = CTools::truncate(derate, 0.0, 1.0); - return MINIMUM_TAIL_WINSORISATION_WEIGHT + (0.5 - MINIMUM_TAIL_WINSORISATION_WEIGHT) * derate; + return MINIMUM_TAIL_WINSORISATION_WEIGHT + + (0.5 - MINIMUM_TAIL_WINSORISATION_WEIGHT) * derate; } //! Get the one tail p-value from a specified Winsorisation weight. @@ -93,33 +99,39 @@ double pValueFromTailWinsorisationWeight(double weight) { } double logw{std::log(std::max(weight, MINIMUM_TAIL_WINSORISATION_WEIGHT))}; - return std::exp(0.5 * - (LOG_WINSORISED_FRACTION - std::sqrt(CTools::pow2(LOG_WINSORISED_FRACTION) + - 4.0 * logw / LOG_MINIMUM_TAIL_WINSORISATION_WEIGHT * LOG_MINIMUM_WEIGHT_FRACTION * - (LOG_MINIMUM_WEIGHT_FRACTION - LOG_WINSORISED_FRACTION)))); + return std::exp( + 0.5 * (LOG_WINSORISED_FRACTION - + std::sqrt(CTools::pow2(LOG_WINSORISED_FRACTION) + + 4.0 * logw / LOG_MINIMUM_TAIL_WINSORISATION_WEIGHT * LOG_MINIMUM_WEIGHT_FRACTION * + (LOG_MINIMUM_WEIGHT_FRACTION - LOG_WINSORISED_FRACTION)))); } //! Optionally randomly sample from \p indices. -TOptionalSize -randomlySample(CPRNG::CXorOShiro128Plus& rng, const CModelAddSamplesParams& params, core_t::TTime bucketLength, const TSizeVec& indices) { +TOptionalSize randomlySample(CPRNG::CXorOShiro128Plus& rng, + const CModelAddSamplesParams& params, + core_t::TTime bucketLength, + const TSizeVec& indices) { using TDouble2Vec4Vec = core::CSmallVector; double weight{1.0}; { - auto i = std::find(params.weightStyles().begin(), params.weightStyles().end(), maths_t::E_SampleWinsorisationWeight); + auto i = std::find(params.weightStyles().begin(), params.weightStyles().end(), + maths_t::E_SampleWinsorisationWeight); if (i != params.weightStyles().end()) { std::ptrdiff_t index{i - params.weightStyles().begin()}; auto addWeight = [index](TMeanAccumulator mean, const TDouble2Vec4Vec& weight_) { mean.add(weight_[index]); return mean; }; - TMeanAccumulator mean{ - std::accumulate(params.trendWeights().begin(), params.trendWeights().end(), TMeanAccumulator{}, addWeight)}; + TMeanAccumulator mean{std::accumulate(params.trendWeights().begin(), + params.trendWeights().end(), + TMeanAccumulator{}, addWeight)}; weight = CBasicStatistics::mean(mean); } } - double p{SLIDING_WINDOW_SIZE * static_cast(bucketLength) / static_cast(core::constants::DAY) * weight}; + double p{SLIDING_WINDOW_SIZE * static_cast(bucketLength) / + static_cast(core::constants::DAY) * weight}; if (p >= 1.0 || CSampling::uniformSample(rng, 0.0, 1.0) < p) { std::size_t i{CSampling::uniformSample(rng, 0, indices.size())}; return indices[i]; @@ -133,7 +145,9 @@ randomlySample(CPRNG::CXorOShiro128Plus& rng, const CModelAddSamplesParams& para double changeWinsorisationWeight(const TChangeDetectorPtr& detector) { if (detector != nullptr) { std::size_t dummy; - return std::max(CTools::logisticFunction(detector->decisionFunction(dummy), 0.1, 1.0, -1.0), MINIMUM_CHANGE_WINSORISATION_WEIGHT); + return std::max(CTools::logisticFunction(detector->decisionFunction(dummy), + 0.1, 1.0, -1.0), + MINIMUM_CHANGE_WINSORISATION_WEIGHT); } return 1.0; } @@ -147,7 +161,8 @@ std::string toDelimited(const TTimeDoublePr& value) { //! Extract \p value from comma separated string. bool fromDelimited(const std::string& str, TTimeDoublePr& value) { std::size_t pos{str.find(',')}; - return pos != std::string::npos && core::CStringUtils::stringToType(str.substr(0, pos), value.first) && + return pos != std::string::npos && + core::CStringUtils::stringToType(str.substr(0, pos), value.first) && core::CStringUtils::stringToType(str.substr(pos + 1), value.second); } @@ -206,11 +221,13 @@ double tailWinsorisationWeight(const CPrior& prior, double derate, double scale, double lowerBound; double upperBound; - if (!prior.minusLogJointCdf(CConstantWeights::SEASONAL_VARIANCE, {value}, {{scale}}, lowerBound, upperBound)) { + if (!prior.minusLogJointCdf(CConstantWeights::SEASONAL_VARIANCE, {value}, + {{scale}}, lowerBound, upperBound)) { return 1.0; } if (upperBound < MINUS_LOG_TOLERANCE && - !prior.minusLogJointCdfComplement(CConstantWeights::SEASONAL_VARIANCE, {value}, {{scale}}, lowerBound, upperBound)) { + !prior.minusLogJointCdfComplement(CConstantWeights::SEASONAL_VARIANCE, {value}, + {{scale}}, lowerBound, upperBound)) { return 1.0; } @@ -255,7 +272,8 @@ double tailWinsorisationWeight(const CMultivariatePrior& prior, condition[j++] = std::make_pair(i, value[i]); } } - boost::shared_ptr conditional(prior.univariate(NOTHING_TO_MARGINALIZE, condition).first); + boost::shared_ptr conditional( + prior.univariate(NOTHING_TO_MARGINALIZE, condition).first); return tailWinsorisationWeight(*conditional, derate, scale, value[dimension]); } @@ -269,7 +287,10 @@ class CTimeSeriesAnomalyModel { //! //! This extends the current anomaly if \p probability is small. //! Otherwise it closes it. - void updateAnomaly(const CModelProbabilityParams& params, core_t::TTime time, TDouble2Vec errors, double probability); + void updateAnomaly(const CModelProbabilityParams& params, + core_t::TTime time, + TDouble2Vec errors, + double probability); //! If the time series is currently anomalous, update the model //! with the anomaly feature vector. @@ -280,7 +301,9 @@ class CTimeSeriesAnomalyModel { //! If the time series is currently anomalous, compute the anomalousness //! of the anomaly feature vector. - void probability(const CModelProbabilityParams& params, core_t::TTime time, double& probability) const; + void probability(const CModelProbabilityParams& params, + core_t::TTime time, + double& probability) const; //! Age the model to account for \p time elapsed time. void propagateForwardsByTime(double time); @@ -295,7 +318,8 @@ class CTimeSeriesAnomalyModel { std::size_t memoryUsage() const; //! Initialize reading state from \p traverser. - bool acceptRestoreTraverser(const SModelRestoreParams& params, core::CStateRestoreTraverser& traverser); + bool acceptRestoreTraverser(const SModelRestoreParams& params, + core::CStateRestoreTraverser& traverser); //! Persist by passing information to \p inserter. void acceptPersistInserter(core::CStatePersistInserter& inserter) const; @@ -315,7 +339,8 @@ class CTimeSeriesAnomalyModel { public: CAnomaly() : m_Tag(0), m_OpenTime(0), m_Sign(0.0) {} - CAnomaly(std::size_t tag, core_t::TTime time) : m_Tag(tag), m_OpenTime(time), m_Sign(0.0) {} + CAnomaly(std::size_t tag, core_t::TTime time) + : m_Tag(tag), m_OpenTime(time), m_Sign(0.0) {} //! Get the anomaly tag. std::size_t tag() const { return m_Tag; } @@ -331,14 +356,17 @@ class CTimeSeriesAnomalyModel { } //! Get the weight to apply to this anomaly on update. - double weight(core_t::TTime time) const { return 1.0 / (1.0 + std::max(static_cast(time - m_OpenTime), 0.0)); } + double weight(core_t::TTime time) const { + return 1.0 / (1.0 + std::max(static_cast(time - m_OpenTime), 0.0)); + } //! Check if this anomaly is positive or negative. bool positive() const { return m_Sign > 0.0; } //! Get the feature vector for this anomaly. TDouble10Vec features(core_t::TTime time) const { - return {static_cast(time - m_OpenTime), CBasicStatistics::mean(m_MeanErrorNorm)}; + return {static_cast(time - m_OpenTime), + CBasicStatistics::mean(m_MeanErrorNorm)}; } //! Compute a checksum for this object. @@ -356,7 +384,8 @@ class CTimeSeriesAnomalyModel { RESTORE_BUILT_IN(TAG_TAG, m_Tag) RESTORE_BUILT_IN(OPEN_TIME_TAG, m_OpenTime) RESTORE_BUILT_IN(SIGN_TAG, m_Sign) - RESTORE(MEAN_ERROR_NORM_TAG, m_MeanErrorNorm.fromDelimited(traverser.value())) + RESTORE(MEAN_ERROR_NORM_TAG, + m_MeanErrorNorm.fromDelimited(traverser.value())) } while (traverser.next()); return true; } @@ -400,11 +429,14 @@ class CTimeSeriesAnomalyModel { void sample(core_t::TTime time, const CAnomaly& anomaly, double weight) { std::size_t index(anomaly.positive() ? 0 : 1); TDouble10Vec1Vec features{anomaly.features(this->scale(time))}; - m_AnomalyFeatureModels[index].addSamples(CConstantWeights::COUNT, features, {{TDouble10Vec(2, weight)}}); + m_AnomalyFeatureModels[index].addSamples(CConstantWeights::COUNT, features, + {{TDouble10Vec(2, weight)}}); } //! Get the scaled time. - core_t::TTime scale(core_t::TTime time) const { return time / m_BucketLength; } + core_t::TTime scale(core_t::TTime time) const { + return time / m_BucketLength; + } private: //! The data bucketing interval. @@ -422,16 +454,19 @@ class CTimeSeriesAnomalyModel { CTimeSeriesAnomalyModel::CTimeSeriesAnomalyModel() : m_BucketLength(0) { m_AnomalyFeatureModels.reserve(2); - m_AnomalyFeatureModels.push_back(TMultivariateNormalConjugate::nonInformativePrior(maths_t::E_ContinuousData)); - m_AnomalyFeatureModels.push_back(TMultivariateNormalConjugate::nonInformativePrior(maths_t::E_ContinuousData)); + m_AnomalyFeatureModels.push_back( + TMultivariateNormalConjugate::nonInformativePrior(maths_t::E_ContinuousData)); + m_AnomalyFeatureModels.push_back( + TMultivariateNormalConjugate::nonInformativePrior(maths_t::E_ContinuousData)); } -CTimeSeriesAnomalyModel::CTimeSeriesAnomalyModel(core_t::TTime bucketLength, double decayRate) : m_BucketLength(bucketLength) { +CTimeSeriesAnomalyModel::CTimeSeriesAnomalyModel(core_t::TTime bucketLength, double decayRate) + : m_BucketLength(bucketLength) { m_AnomalyFeatureModels.reserve(2); - m_AnomalyFeatureModels.push_back( - TMultivariateNormalConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.5 * LARGEST_ANOMALOUS_PROBABILITY * decayRate)); - m_AnomalyFeatureModels.push_back( - TMultivariateNormalConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.5 * LARGEST_ANOMALOUS_PROBABILITY * decayRate)); + m_AnomalyFeatureModels.push_back(TMultivariateNormalConjugate::nonInformativePrior( + maths_t::E_ContinuousData, 0.5 * LARGEST_ANOMALOUS_PROBABILITY * decayRate)); + m_AnomalyFeatureModels.push_back(TMultivariateNormalConjugate::nonInformativePrior( + maths_t::E_ContinuousData, 0.5 * LARGEST_ANOMALOUS_PROBABILITY * decayRate)); } void CTimeSeriesAnomalyModel::updateAnomaly(const CModelProbabilityParams& params, @@ -440,11 +475,14 @@ void CTimeSeriesAnomalyModel::updateAnomaly(const CModelProbabilityParams& param double probability) { if (params.updateAnomalyModel()) { std::size_t tag{params.tag()}; - auto anomaly = - std::find_if(m_Anomalies.begin(), m_Anomalies.end(), [tag](const CAnomaly& anomaly_) { return anomaly_.tag() == tag; }); + auto anomaly = std::find_if( + m_Anomalies.begin(), m_Anomalies.end(), + [tag](const CAnomaly& anomaly_) { return anomaly_.tag() == tag; }); if (probability < LARGEST_ANOMALOUS_PROBABILITY) { - m_MeanError.add(std::sqrt(std::accumulate(errors.begin(), errors.end(), 0.0, [](double n, double x) { return n + x * x; }))); + m_MeanError.add(std::sqrt( + std::accumulate(errors.begin(), errors.end(), 0.0, + [](double n, double x) { return n + x * x; }))); double scale{CBasicStatistics::mean(m_MeanError)}; for (auto& error : errors) { @@ -463,11 +501,13 @@ void CTimeSeriesAnomalyModel::updateAnomaly(const CModelProbabilityParams& param } } -void CTimeSeriesAnomalyModel::sampleAnomaly(const CModelProbabilityParams& params, core_t::TTime time) { +void CTimeSeriesAnomalyModel::sampleAnomaly(const CModelProbabilityParams& params, + core_t::TTime time) { if (params.updateAnomalyModel()) { std::size_t tag{params.tag()}; - auto anomaly = - std::find_if(m_Anomalies.begin(), m_Anomalies.end(), [tag](const CAnomaly& anomaly_) { return anomaly_.tag() == tag; }); + auto anomaly = std::find_if( + m_Anomalies.begin(), m_Anomalies.end(), + [tag](const CAnomaly& anomaly_) { return anomaly_.tag() == tag; }); if (anomaly != m_Anomalies.end()) { this->sample(time, *anomaly, anomaly->weight(this->scale(time))); } @@ -477,32 +517,41 @@ void CTimeSeriesAnomalyModel::sampleAnomaly(const CModelProbabilityParams& param void CTimeSeriesAnomalyModel::reset() { m_MeanError = TMeanAccumulator(); for (auto& model : m_AnomalyFeatureModels) { - model = TMultivariateNormalConjugate::nonInformativePrior(maths_t::E_ContinuousData, model.decayRate()); + model = TMultivariateNormalConjugate::nonInformativePrior( + maths_t::E_ContinuousData, model.decayRate()); } } -void CTimeSeriesAnomalyModel::probability(const CModelProbabilityParams& params, core_t::TTime time, double& probability) const { +void CTimeSeriesAnomalyModel::probability(const CModelProbabilityParams& params, + core_t::TTime time, + double& probability) const { std::size_t tag{params.tag()}; - auto anomaly = std::find_if(m_Anomalies.begin(), m_Anomalies.end(), [tag](const CAnomaly& anomaly_) { return anomaly_.tag() == tag; }); + auto anomaly = std::find_if( + m_Anomalies.begin(), m_Anomalies.end(), + [tag](const CAnomaly& anomaly_) { return anomaly_.tag() == tag; }); if (anomaly != m_Anomalies.end()) { std::size_t index(anomaly->positive() ? 0 : 1); TDouble10Vec1Vec features{anomaly->features(this->scale(time))}; double pl, pu; TTail10Vec tail; - if (probability < LARGEST_ANOMALOUS_PROBABILITY && !m_AnomalyFeatureModels[index].isNonInformative() && + if (probability < LARGEST_ANOMALOUS_PROBABILITY && + !m_AnomalyFeatureModels[index].isNonInformative() && m_AnomalyFeatureModels[index].probabilityOfLessLikelySamples( - maths_t::E_OneSidedAbove, CConstantWeights::COUNT, features, UNIT, pl, pu, tail)) { + maths_t::E_OneSidedAbove, CConstantWeights::COUNT, features, + UNIT, pl, pu, tail)) { double logp{CTools::fastLog(probability)}; - double alpha{ - 0.5 * - std::min((logp - LOG_LARGEST_ANOMALOUS_PROBABILITY) / (LOG_SMALL_PROBABILITY - LOG_LARGEST_ANOMALOUS_PROBABILITY), 1.0)}; + double alpha{0.5 * std::min((logp - LOG_LARGEST_ANOMALOUS_PROBABILITY) / + (LOG_SMALL_PROBABILITY - LOG_LARGEST_ANOMALOUS_PROBABILITY), + 1.0)}; double pGivenAnomalous{(pl + pu) / 2.0}; double pScore{CTools::anomalyScore(probability)}; double pScoreGivenAnomalous{CTools::anomalyScore(pGivenAnomalous)}; - LOG_TRACE(<< "features = " << features << " score(.) = " << pScore << " score(.|anomalous) = " << pScoreGivenAnomalous + LOG_TRACE(<< "features = " << features << " score(.) = " << pScore + << " score(.|anomalous) = " << pScoreGivenAnomalous << " p = " << probability); - probability = - std::min(CTools::inverseAnomalyScore((1.0 - alpha) * pScore + alpha * pScoreGivenAnomalous), LARGEST_ANOMALOUS_PROBABILITY); + probability = std::min( + CTools::inverseAnomalyScore((1.0 - alpha) * pScore + alpha * pScoreGivenAnomalous), + LARGEST_ANOMALOUS_PROBABILITY); } } } @@ -527,19 +576,23 @@ void CTimeSeriesAnomalyModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsageP } std::size_t CTimeSeriesAnomalyModel::memoryUsage() const { - return core::CMemory::dynamicSize(m_Anomalies) + core::CMemory::dynamicSize(m_AnomalyFeatureModels); + return core::CMemory::dynamicSize(m_Anomalies) + + core::CMemory::dynamicSize(m_AnomalyFeatureModels); } -bool CTimeSeriesAnomalyModel::acceptRestoreTraverser(const SModelRestoreParams& params, core::CStateRestoreTraverser& traverser) { +bool CTimeSeriesAnomalyModel::acceptRestoreTraverser(const SModelRestoreParams& params, + core::CStateRestoreTraverser& traverser) { m_BucketLength = boost::unwrap_ref(params.s_Params).bucketLength(); std::size_t index{0}; do { const std::string& name{traverser.name()}; RESTORE(MEAN_ERROR_TAG, m_MeanError.fromDelimited(traverser.value())); - RESTORE(ANOMALIES_TAG, core::CPersistUtils::restore(ANOMALIES_TAG, m_Anomalies, traverser)); + RESTORE(ANOMALIES_TAG, + core::CPersistUtils::restore(ANOMALIES_TAG, m_Anomalies, traverser)); RESTORE(ANOMALY_FEATURE_MODEL_TAG, traverser.traverseSubLevel( - boost::bind(&TMultivariateNormalConjugate::acceptRestoreTraverser, &m_AnomalyFeatureModels[index++], _1))) + boost::bind(&TMultivariateNormalConjugate::acceptRestoreTraverser, + &m_AnomalyFeatureModels[index++], _1))) } while (traverser.next()); return true; } @@ -548,15 +601,19 @@ void CTimeSeriesAnomalyModel::acceptPersistInserter(core::CStatePersistInserter& inserter.insertValue(MEAN_ERROR_TAG, m_MeanError.toDelimited()); core::CPersistUtils::persist(ANOMALIES_TAG, m_Anomalies, inserter); inserter.insertLevel(ANOMALY_FEATURE_MODEL_TAG, - boost::bind(&TMultivariateNormalConjugate::acceptPersistInserter, &m_AnomalyFeatureModels[0], _1)); + boost::bind(&TMultivariateNormalConjugate::acceptPersistInserter, + &m_AnomalyFeatureModels[0], _1)); inserter.insertLevel(ANOMALY_FEATURE_MODEL_TAG, - boost::bind(&TMultivariateNormalConjugate::acceptPersistInserter, &m_AnomalyFeatureModels[1], _1)); + boost::bind(&TMultivariateNormalConjugate::acceptPersistInserter, + &m_AnomalyFeatureModels[1], _1)); } const double CTimeSeriesAnomalyModel::LARGEST_ANOMALOUS_PROBABILITY{0.1}; -const double CTimeSeriesAnomalyModel::LOG_LARGEST_ANOMALOUS_PROBABILITY{CTools::fastLog(LARGEST_ANOMALOUS_PROBABILITY)}; +const double CTimeSeriesAnomalyModel::LOG_LARGEST_ANOMALOUS_PROBABILITY{ + CTools::fastLog(LARGEST_ANOMALOUS_PROBABILITY)}; const double CTimeSeriesAnomalyModel::LOG_SMALL_PROBABILITY{CTools::fastLog(SMALL_PROBABILITY)}; -const TDouble10Vec4Vec1Vec CTimeSeriesAnomalyModel::UNIT{CConstantWeights::unit(2)}; +const TDouble10Vec4Vec1Vec CTimeSeriesAnomalyModel::UNIT{ + CConstantWeights::unit(2)}; CUnivariateTimeSeriesModel::CUnivariateTimeSeriesModel(const CModelParams& params, std::size_t id, @@ -564,25 +621,25 @@ CUnivariateTimeSeriesModel::CUnivariateTimeSeriesModel(const CModelParams& param const CPrior& residualModel, const TDecayRateController2Ary* controllers, bool modelAnomalies) - : CModel(params), - m_Id(id), - m_IsNonNegative(false), - m_IsForecastable(true), - m_TrendModel(trendModel.clone()), - m_ResidualModel(residualModel.clone()), - m_AnomalyModel(modelAnomalies ? boost::make_shared(params.bucketLength(), params.decayRate()) + : CModel(params), m_Id(id), m_IsNonNegative(false), m_IsForecastable(true), + m_TrendModel(trendModel.clone()), m_ResidualModel(residualModel.clone()), + m_AnomalyModel(modelAnomalies ? boost::make_shared( + params.bucketLength(), + params.decayRate()) : TAnomalyModelPtr()), - m_CurrentChangeInterval(0), - m_SlidingWindow(SLIDING_WINDOW_SIZE), + m_CurrentChangeInterval(0), m_SlidingWindow(SLIDING_WINDOW_SIZE), m_Correlations(nullptr) { if (controllers) { m_Controllers = boost::make_shared(*controllers); } } -CUnivariateTimeSeriesModel::CUnivariateTimeSeriesModel(const SModelRestoreParams& params, core::CStateRestoreTraverser& traverser) - : CModel(params.s_Params), m_IsForecastable(false), m_SlidingWindow(SLIDING_WINDOW_SIZE), m_Correlations(nullptr) { - traverser.traverseSubLevel(boost::bind(&CUnivariateTimeSeriesModel::acceptRestoreTraverser, this, boost::cref(params), _1)); +CUnivariateTimeSeriesModel::CUnivariateTimeSeriesModel(const SModelRestoreParams& params, + core::CStateRestoreTraverser& traverser) + : CModel(params.s_Params), m_IsForecastable(false), + m_SlidingWindow(SLIDING_WINDOW_SIZE), m_Correlations(nullptr) { + traverser.traverseSubLevel(boost::bind(&CUnivariateTimeSeriesModel::acceptRestoreTraverser, + this, boost::cref(params), _1)); } CUnivariateTimeSeriesModel::~CUnivariateTimeSeriesModel() { @@ -638,12 +695,15 @@ TSize2Vec1Vec CUnivariateTimeSeriesModel::correlates() const { void CUnivariateTimeSeriesModel::addBucketValue(const TTimeDouble2VecSizeTrVec& values) { for (const auto& value : values) { m_ResidualModel->adjustOffset( - CConstantWeights::COUNT, {m_TrendModel->detrend(value.first, value.second[0], 0.0)}, CConstantWeights::SINGLE_UNIT); + CConstantWeights::COUNT, + {m_TrendModel->detrend(value.first, value.second[0], 0.0)}, + CConstantWeights::SINGLE_UNIT); } } -CUnivariateTimeSeriesModel::EUpdateResult CUnivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams& params, - TTimeDouble2VecSizeTrVec samples) { +CUnivariateTimeSeriesModel::EUpdateResult +CUnivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams& params, + TTimeDouble2VecSizeTrVec samples) { if (samples.empty()) { return E_Success; } @@ -652,12 +712,14 @@ CUnivariateTimeSeriesModel::EUpdateResult CUnivariateTimeSeriesModel::addSamples TSizeVec valueorder(samples.size()); std::iota(valueorder.begin(), valueorder.end(), 0); - std::stable_sort(valueorder.begin(), valueorder.end(), [&samples](std::size_t lhs, std::size_t rhs) { - return samples[lhs].second < samples[rhs].second; - }); + std::stable_sort(valueorder.begin(), valueorder.end(), + [&samples](std::size_t lhs, std::size_t rhs) { + return samples[lhs].second < samples[rhs].second; + }); TOptionalTimeDoublePr randomSample; - if (TOptionalSize index = randomlySample(m_Rng, params, this->params().bucketLength(), valueorder)) { + if (TOptionalSize index = randomlySample( + m_Rng, params, this->params().bucketLength(), valueorder)) { randomSample.reset({samples[*index].first, samples[*index].second[0]}); } @@ -669,15 +731,17 @@ CUnivariateTimeSeriesModel::EUpdateResult CUnivariateTimeSeriesModel::addSamples m_ResidualModel->dataType(type); m_TrendModel->dataType(type); - result = CModel::combine(result, this->updateTrend(params.weightStyles(), samples, params.trendWeights())); + result = CModel::combine(result, this->updateTrend(params.weightStyles(), samples, + params.trendWeights())); for (auto& sample : samples) { sample.second[0] = m_TrendModel->detrend(sample.first, sample.second[0], 0.0); } - std::stable_sort(valueorder.begin(), valueorder.end(), [&samples](std::size_t lhs, std::size_t rhs) { - return samples[lhs].second < samples[rhs].second; - }); + std::stable_sort(valueorder.begin(), valueorder.end(), + [&samples](std::size_t lhs, std::size_t rhs) { + return samples[lhs].second < samples[rhs].second; + }); TDouble1Vec samples_; TDouble4Vec1Vec weights_; @@ -710,7 +774,8 @@ CUnivariateTimeSeriesModel::EUpdateResult CUnivariateTimeSeriesModel::addSamples core_t::TTime time{static_cast(CBasicStatistics::mean(averageTime))}; TDouble1Vec trendMean{m_TrendModel->meanValue(time)}; multiplier = controller.multiplier( - trendMean, errors[E_TrendControl], this->params().bucketLength(), this->params().learnRate(), this->params().decayRate()); + trendMean, errors[E_TrendControl], this->params().bucketLength(), + this->params().learnRate(), this->params().decayRate()); if (multiplier != 1.0) { m_TrendModel->decayRate(multiplier * m_TrendModel->decayRate()); LOG_TRACE(<< "trend decay rate = " << m_TrendModel->decayRate()); @@ -719,11 +784,9 @@ CUnivariateTimeSeriesModel::EUpdateResult CUnivariateTimeSeriesModel::addSamples { CDecayRateController& controller{(*m_Controllers)[E_ResidualControl]}; TDouble1Vec residualMean{m_ResidualModel->marginalLikelihoodMean()}; - multiplier = controller.multiplier(residualMean, - errors[E_ResidualControl], - this->params().bucketLength(), - this->params().learnRate(), - this->params().decayRate()); + multiplier = controller.multiplier( + residualMean, errors[E_ResidualControl], this->params().bucketLength(), + this->params().learnRate(), this->params().decayRate()); if (multiplier != 1.0) { m_ResidualModel->decayRate(multiplier * m_ResidualModel->decayRate()); LOG_TRACE(<< "prior decay rate = " << m_ResidualModel->decayRate()); @@ -747,28 +810,34 @@ void CUnivariateTimeSeriesModel::skipTime(core_t::TTime gap) { } CUnivariateTimeSeriesModel::TDouble2Vec -CUnivariateTimeSeriesModel::mode(core_t::TTime time, const maths_t::TWeightStyleVec& weightStyles, const TDouble2Vec4Vec& weights) const { - return {m_ResidualModel->marginalLikelihoodMode(weightStyles, unpack(weights)) + CBasicStatistics::mean(m_TrendModel->value(time))}; +CUnivariateTimeSeriesModel::mode(core_t::TTime time, + const maths_t::TWeightStyleVec& weightStyles, + const TDouble2Vec4Vec& weights) const { + return {m_ResidualModel->marginalLikelihoodMode(weightStyles, unpack(weights)) + + CBasicStatistics::mean(m_TrendModel->value(time))}; } -CUnivariateTimeSeriesModel::TDouble2Vec1Vec CUnivariateTimeSeriesModel::correlateModes(core_t::TTime time, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec1Vec& weights) const { +CUnivariateTimeSeriesModel::TDouble2Vec1Vec +CUnivariateTimeSeriesModel::correlateModes(core_t::TTime time, + const maths_t::TWeightStyleVec& weightStyles, + const TDouble2Vec4Vec1Vec& weights) const { TDouble2Vec1Vec result; TSize1Vec correlated; TSize2Vec1Vec variables; TMultivariatePriorCPtrSizePr1Vec correlationModels; TModelCPtr1Vec correlatedTimeSeriesModels; - if (this->correlationModels(correlated, variables, correlationModels, correlatedTimeSeriesModels)) { + if (this->correlationModels(correlated, variables, correlationModels, + correlatedTimeSeriesModels)) { result.resize(correlated.size(), TDouble10Vec(2)); double baseline[2]; baseline[0] = CBasicStatistics::mean(m_TrendModel->value(time)); for (std::size_t i = 0u; i < correlated.size(); ++i) { - baseline[1] = CBasicStatistics::mean(correlatedTimeSeriesModels[i]->m_TrendModel->value(time)); - TDouble10Vec mode( - correlationModels[i].first->marginalLikelihoodMode(weightStyles, CMultivariateTimeSeriesModel::unpack(weights[i]))); + baseline[1] = CBasicStatistics::mean( + correlatedTimeSeriesModels[i]->m_TrendModel->value(time)); + TDouble10Vec mode(correlationModels[i].first->marginalLikelihoodMode( + weightStyles, CMultivariateTimeSeriesModel::unpack(weights[i]))); result[i][variables[i][0]] = baseline[0] + mode[variables[i][0]]; result[i][variables[i][1]] = baseline[1] + mode[variables[i][1]]; } @@ -777,8 +846,9 @@ CUnivariateTimeSeriesModel::TDouble2Vec1Vec CUnivariateTimeSeriesModel::correlat return result; } -CUnivariateTimeSeriesModel::TDouble2Vec1Vec CUnivariateTimeSeriesModel::residualModes(const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec& weights) const { +CUnivariateTimeSeriesModel::TDouble2Vec1Vec +CUnivariateTimeSeriesModel::residualModes(const maths_t::TWeightStyleVec& weightStyles, + const TDouble2Vec4Vec& weights) const { TDouble1Vec modes(m_ResidualModel->marginalLikelihoodModes(weightStyles, unpack(weights))); TDouble2Vec1Vec result; @@ -790,7 +860,9 @@ CUnivariateTimeSeriesModel::TDouble2Vec1Vec CUnivariateTimeSeriesModel::residual return result; } -void CUnivariateTimeSeriesModel::detrend(const TTime2Vec1Vec& time, double confidenceInterval, TDouble2Vec1Vec& value) const { +void CUnivariateTimeSeriesModel::detrend(const TTime2Vec1Vec& time, + double confidenceInterval, + TDouble2Vec1Vec& value) const { if (value.empty()) { return; } @@ -802,13 +874,16 @@ void CUnivariateTimeSeriesModel::detrend(const TTime2Vec1Vec& time, double confi TSize2Vec1Vec variables; TMultivariatePriorCPtrSizePr1Vec correlationModels; TModelCPtr1Vec correlatedTimeSeriesModels; - if (this->correlationModels(correlated, variables, correlationModels, correlatedTimeSeriesModels)) { + if (this->correlationModels(correlated, variables, correlationModels, + correlatedTimeSeriesModels)) { for (std::size_t i = 0u; i < variables.size(); ++i) { if (!value[i].empty()) { - value[i][variables[i][0]] = - m_TrendModel->detrend(time[i][variables[i][0]], value[i][variables[i][0]], confidenceInterval); - value[i][variables[i][1]] = correlatedTimeSeriesModels[i]->m_TrendModel->detrend( - time[i][variables[i][1]], value[i][variables[i][1]], confidenceInterval); + value[i][variables[i][0]] = m_TrendModel->detrend( + time[i][variables[i][0]], value[i][variables[i][0]], confidenceInterval); + value[i][variables[i][1]] = + correlatedTimeSeriesModels[i]->m_TrendModel->detrend( + time[i][variables[i][1]], value[i][variables[i][1]], + confidenceInterval); } } } @@ -816,21 +891,28 @@ void CUnivariateTimeSeriesModel::detrend(const TTime2Vec1Vec& time, double confi } CUnivariateTimeSeriesModel::TDouble2Vec -CUnivariateTimeSeriesModel::predict(core_t::TTime time, const TSizeDoublePr1Vec& correlatedValue, TDouble2Vec hint) const { +CUnivariateTimeSeriesModel::predict(core_t::TTime time, + const TSizeDoublePr1Vec& correlatedValue, + TDouble2Vec hint) const { double correlateCorrection{0.0}; if (!correlatedValue.empty()) { TSize1Vec correlated{correlatedValue[0].first}; TSize2Vec1Vec variables; TMultivariatePriorCPtrSizePr1Vec correlationModel; TModelCPtr1Vec correlatedModel; - if (m_Correlations->correlationModels(m_Id, correlated, variables, correlationModel, correlatedModel)) { - double sample{correlatedModel[0]->m_TrendModel->detrend(time, correlatedValue[0].second, 0.0)}; + if (m_Correlations->correlationModels(m_Id, correlated, variables, + correlationModel, correlatedModel)) { + double sample{correlatedModel[0]->m_TrendModel->detrend( + time, correlatedValue[0].second, 0.0)}; TSize10Vec marginalize{variables[0][1]}; TSizeDoublePr10Vec condition{{variables[0][1], sample}}; const CMultivariatePrior* joint{correlationModel[0].first}; - TPriorPtr margin{joint->univariate(marginalize, NOTHING_TO_CONDITION).first}; - TPriorPtr conditional{joint->univariate(NOTHING_TO_MARGINALIZE, condition).first}; - correlateCorrection = conditional->marginalLikelihoodMean() - margin->marginalLikelihoodMean(); + TPriorPtr margin{ + joint->univariate(marginalize, NOTHING_TO_CONDITION).first}; + TPriorPtr conditional{ + joint->univariate(NOTHING_TO_MARGINALIZE, condition).first}; + correlateCorrection = conditional->marginalLikelihoodMean() - + margin->marginalLikelihoodMean(); } } @@ -845,32 +927,40 @@ CUnivariateTimeSeriesModel::predict(core_t::TTime time, const TSizeDoublePr1Vec& hint[0] = m_TrendModel->detrend(time, hint[0], 0.0); } - double median{m_ResidualModel->isNonInformative() - ? m_ResidualModel->marginalLikelihoodMean() - : (hint.empty() ? CBasicStatistics::mean(m_ResidualModel->marginalLikelihoodConfidenceInterval(0.0)) - : m_ResidualModel->nearestMarginalLikelihoodMean(hint[0]))}; + double median{ + m_ResidualModel->isNonInformative() + ? m_ResidualModel->marginalLikelihoodMean() + : (hint.empty() + ? CBasicStatistics::mean(m_ResidualModel->marginalLikelihoodConfidenceInterval(0.0)) + : m_ResidualModel->nearestMarginalLikelihoodMean(hint[0]))}; double result{scale * (trend + median + correlateCorrection)}; return {m_IsNonNegative ? std::max(result, 0.0) : result}; } -CUnivariateTimeSeriesModel::TDouble2Vec3Vec CUnivariateTimeSeriesModel::confidenceInterval(core_t::TTime time, - double confidenceInterval, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec& weights_) const { +CUnivariateTimeSeriesModel::TDouble2Vec3Vec +CUnivariateTimeSeriesModel::confidenceInterval(core_t::TTime time, + double confidenceInterval, + const maths_t::TWeightStyleVec& weightStyles, + const TDouble2Vec4Vec& weights_) const { if (m_ResidualModel->isNonInformative()) { return TDouble2Vec3Vec(); } double scale{1.0 - this->params().probabilityBucketEmpty()}; - double trend{m_TrendModel->initialized() ? CBasicStatistics::mean(m_TrendModel->value(time, confidenceInterval)) : 0.0}; + double trend{m_TrendModel->initialized() + ? CBasicStatistics::mean(m_TrendModel->value(time, confidenceInterval)) + : 0.0}; TDouble4Vec weights(unpack(weights_)); - double median{CBasicStatistics::mean(m_ResidualModel->marginalLikelihoodConfidenceInterval(0.0, weightStyles, weights))}; - TDoubleDoublePr interval{m_ResidualModel->marginalLikelihoodConfidenceInterval(confidenceInterval, weightStyles, weights)}; + double median{CBasicStatistics::mean(m_ResidualModel->marginalLikelihoodConfidenceInterval( + 0.0, weightStyles, weights))}; + TDoubleDoublePr interval{m_ResidualModel->marginalLikelihoodConfidenceInterval( + confidenceInterval, weightStyles, weights)}; - double result[]{scale * (trend + interval.first), scale * (trend + median), scale * (trend + interval.second)}; + double result[]{scale * (trend + interval.first), scale * (trend + median), + scale * (trend + interval.second)}; return {{m_IsNonNegative ? std::max(result[0], 0.0) : result[0]}, {m_IsNonNegative ? std::max(result[1], 0.0) : result[1]}, @@ -896,15 +986,16 @@ bool CUnivariateTimeSeriesModel::forecast(core_t::TTime startTime, double maximum{m_IsNonNegative ? std::max(maximum_[0], 0.0) : maximum_[0]}; auto writer = [&](core_t::TTime time, const TDouble3Vec& prediction) { - SErrorBar errorBar{time, - bucketLength, - CTools::truncate(prediction[0], minimum, maximum + prediction[0] - prediction[1]), - CTools::truncate(prediction[1], minimum, maximum), - CTools::truncate(prediction[2], minimum + prediction[2] - prediction[1], maximum)}; + SErrorBar errorBar{ + time, bucketLength, + CTools::truncate(prediction[0], minimum, maximum + prediction[0] - prediction[1]), + CTools::truncate(prediction[1], minimum, maximum), + CTools::truncate(prediction[2], minimum + prediction[2] - prediction[1], maximum)}; forecastPushDataPointFunc(errorBar); }; - m_TrendModel->forecast(startTime, endTime, bucketLength, confidenceInterval, this->params().minimumSeasonalVarianceScale(), writer); + m_TrendModel->forecast(startTime, endTime, bucketLength, confidenceInterval, + this->params().minimumSeasonalVarianceScale(), writer); return true; } @@ -927,23 +1018,29 @@ bool CUnivariateTimeSeriesModel::probability(const CModelProbabilityParams& para if (value[0].size() == 1) { core_t::TTime time{time_[0][0]}; - TDouble1Vec sample{m_TrendModel->detrend(time, value[0][0], params.seasonalConfidenceInterval())}; + TDouble1Vec sample{m_TrendModel->detrend( + time, value[0][0], params.seasonalConfidenceInterval())}; TDouble4Vec1Vec weights{unpack(params.weights()[0])}; double pl, pu; maths_t::ETail tail_; - if (m_ResidualModel->probabilityOfLessLikelySamples(params.calculation(0), params.weightStyles(), sample, weights, pl, pu, tail_)) { - LOG_TRACE(<< "P(" << sample << " | weight = " << weights << ", time = " << time << ") = " << (pl + pu) / 2.0); + if (m_ResidualModel->probabilityOfLessLikelySamples( + params.calculation(0), params.weightStyles(), sample, weights, pl, pu, tail_)) { + LOG_TRACE(<< "P(" << sample << " | weight = " << weights + << ", time = " << time << ") = " << (pl + pu) / 2.0); } else { - LOG_ERROR(<< "Failed to compute P(" << sample << " | weight = " << weights << ", time = " << time << ")"); + LOG_ERROR(<< "Failed to compute P(" << sample + << " | weight = " << weights << ", time = " << time << ")"); return false; } probability = correctForEmptyBucket( - params.calculation(0), value[0], params.bucketEmpty()[0][0], this->params().probabilityBucketEmpty(), (pl + pu) / 2.0); + params.calculation(0), value[0], params.bucketEmpty()[0][0], + this->params().probabilityBucketEmpty(), (pl + pu) / 2.0); if (m_AnomalyModel != nullptr) { - TDouble2Vec residual{(sample[0] - m_ResidualModel->nearestMarginalLikelihoodMean(sample[0])) / - std::max(std::sqrt(this->seasonalWeight(0.0, time)[0]), 1.0)}; + TDouble2Vec residual{ + (sample[0] - m_ResidualModel->nearestMarginalLikelihoodMean(sample[0])) / + std::max(std::sqrt(this->seasonalWeight(0.0, time)[0]), 1.0)}; m_AnomalyModel->updateAnomaly(params, time, residual, probability); m_AnomalyModel->probability(params, time, probability); m_AnomalyModel->sampleAnomaly(params, time); @@ -954,7 +1051,8 @@ bool CUnivariateTimeSeriesModel::probability(const CModelProbabilityParams& para TSize2Vec1Vec variables; TMultivariatePriorCPtrSizePr1Vec correlationModels; TModelCPtr1Vec correlatedTimeSeriesModels; - if (!this->correlationModels(correlated, variables, correlationModels, correlatedTimeSeriesModels)) { + if (!this->correlationModels(correlated, variables, correlationModels, + correlatedTimeSeriesModels)) { return false; } @@ -974,31 +1072,42 @@ bool CUnivariateTimeSeriesModel::probability(const CModelProbabilityParams& para TPriorPtr mostAnomalousCorrelationModel; for (std::size_t i = 0u; i < variables.size(); ++i) { - if (!value[i].empty() || (!params.mostAnomalousCorrelate() || i == *params.mostAnomalousCorrelate())) { + if (!value[i].empty() || (!params.mostAnomalousCorrelate() || + i == *params.mostAnomalousCorrelate())) { variable[0] = variables[i][0]; - sample[0][variables[i][0]] = - m_TrendModel->detrend(time_[i][variables[i][0]], value[i][variables[i][0]], params.seasonalConfidenceInterval()); - sample[0][variables[i][1]] = correlatedTimeSeriesModels[i]->m_TrendModel->detrend( - time_[i][variables[i][1]], value[i][variables[i][1]], params.seasonalConfidenceInterval()); + sample[0][variables[i][0]] = m_TrendModel->detrend( + time_[i][variables[i][0]], value[i][variables[i][0]], + params.seasonalConfidenceInterval()); + sample[0][variables[i][1]] = + correlatedTimeSeriesModels[i]->m_TrendModel->detrend( + time_[i][variables[i][1]], value[i][variables[i][1]], + params.seasonalConfidenceInterval()); weights[0] = CMultivariateTimeSeriesModel::unpack(params.weights()[i]); if (correlationModels[i].first->probabilityOfLessLikelySamples( - params.calculation(0), params.weightStyles(), sample, weights, variable, pli, pui, ti)) { - LOG_TRACE(<< "Marginal P(" << sample << " | weight = " << weights << ", coordinate = " << variable + params.calculation(0), params.weightStyles(), sample, + weights, variable, pli, pui, ti)) { + LOG_TRACE(<< "Marginal P(" << sample << " | weight = " << weights + << ", coordinate = " << variable << ") = " << (pli[0][0] + pui[0][0]) / 2.0); - LOG_TRACE(<< "Conditional P(" << sample << " | weight = " << weights << ", coordinate = " << variable + LOG_TRACE(<< "Conditional P(" << sample << " | weight = " << weights + << ", coordinate = " << variable << ") = " << (pli[1][0] + pui[1][0]) / 2.0); } else { - LOG_ERROR(<< "Failed to compute P(" << sample << " | weight = " << weights << ", coordinate = " << variable << ")"); + LOG_ERROR(<< "Failed to compute P(" << sample << " | weight = " << weights + << ", coordinate = " << variable << ")"); continue; } - probabilityBucketEmpty[variables[i][0]] = this->params().probabilityBucketEmpty(); - probabilityBucketEmpty[variables[i][1]] = correlatedTimeSeriesModels[i]->params().probabilityBucketEmpty(); + probabilityBucketEmpty[variables[i][0]] = + this->params().probabilityBucketEmpty(); + probabilityBucketEmpty[variables[i][1]] = + correlatedTimeSeriesModels[i]->params().probabilityBucketEmpty(); double pl{std::sqrt(pli[0][0] * pli[1][0])}; double pu{std::sqrt(pui[0][0] * pui[1][0])}; double p{correctForEmptyBucket( - params.calculation(0), value[0][variable[0]], params.bucketEmpty()[i], probabilityBucketEmpty, (pl + pu) / 2.0)}; + params.calculation(0), value[0][variable[0]], + params.bucketEmpty()[i], probabilityBucketEmpty, (pl + pu) / 2.0)}; aggregator.add(p, neff); if (minProbability.add(p)) { @@ -1008,10 +1117,17 @@ bool CUnivariateTimeSeriesModel::probability(const CModelProbabilityParams& para mostAnomalousTime = time_[0][variables[i][0]]; mostAnomalousSample = sample[0][variables[i][0]]; mostAnomalousCorrelationModel = - conditional ? correlationModels[i].first->univariate({variables[i][1]}, NOTHING_TO_CONDITION).first - : correlationModels[i] - .first->univariate(NOTHING_TO_MARGINALIZE, {{variables[i][1], sample[0][variables[i][1]]}}) - .first; + conditional + ? correlationModels[i] + .first + ->univariate({variables[i][1]}, NOTHING_TO_CONDITION) + .first + : correlationModels[i] + .first + ->univariate( + NOTHING_TO_MARGINALIZE, + {{variables[i][1], sample[0][variables[i][1]]}}) + .first; } } else { aggregator.add(1.0, neff); @@ -1020,8 +1136,10 @@ bool CUnivariateTimeSeriesModel::probability(const CModelProbabilityParams& para aggregator.calculate(probability); if (m_AnomalyModel != nullptr) { - TDouble2Vec residual{(mostAnomalousSample - mostAnomalousCorrelationModel->nearestMarginalLikelihoodMean(mostAnomalousSample)) / - std::max(std::sqrt(this->seasonalWeight(0.0, mostAnomalousTime)[0]), 1.0)}; + TDouble2Vec residual{ + (mostAnomalousSample - mostAnomalousCorrelationModel->nearestMarginalLikelihoodMean( + mostAnomalousSample)) / + std::max(std::sqrt(this->seasonalWeight(0.0, mostAnomalousTime)[0]), 1.0)}; m_AnomalyModel->updateAnomaly(params, mostAnomalousTime, residual, probability); m_AnomalyModel->probability(params, mostAnomalousTime, probability); m_AnomalyModel->sampleAnomaly(params, mostAnomalousTime); @@ -1032,14 +1150,20 @@ bool CUnivariateTimeSeriesModel::probability(const CModelProbabilityParams& para } CUnivariateTimeSeriesModel::TDouble2Vec -CUnivariateTimeSeriesModel::winsorisationWeight(double derate, core_t::TTime time, const TDouble2Vec& value) const { +CUnivariateTimeSeriesModel::winsorisationWeight(double derate, + core_t::TTime time, + const TDouble2Vec& value) const { double scale{this->seasonalWeight(0.0, time)[0]}; double sample{m_TrendModel->detrend(time, value[0], 0.0)}; - return {tailWinsorisationWeight(*m_ResidualModel, derate, scale, sample) * changeWinsorisationWeight(m_ChangeDetector)}; + return {tailWinsorisationWeight(*m_ResidualModel, derate, scale, sample) * + changeWinsorisationWeight(m_ChangeDetector)}; } -CUnivariateTimeSeriesModel::TDouble2Vec CUnivariateTimeSeriesModel::seasonalWeight(double confidence, core_t::TTime time) const { - double scale{m_TrendModel->scale(time, m_ResidualModel->marginalLikelihoodVariance(), confidence).second}; +CUnivariateTimeSeriesModel::TDouble2Vec +CUnivariateTimeSeriesModel::seasonalWeight(double confidence, core_t::TTime time) const { + double scale{m_TrendModel + ->scale(time, m_ResidualModel->marginalLikelihoodVariance(), confidence) + .second}; return {std::max(scale, this->params().minimumSeasonalVarianceScale())}; } @@ -1067,12 +1191,16 @@ void CUnivariateTimeSeriesModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsa } std::size_t CUnivariateTimeSeriesModel::memoryUsage() const { - return core::CMemory::dynamicSize(m_Controllers) + core::CMemory::dynamicSize(m_TrendModel) + - core::CMemory::dynamicSize(m_ResidualModel) + core::CMemory::dynamicSize(m_AnomalyModel) + - core::CMemory::dynamicSize(m_ChangeDetector) + core::CMemory::dynamicSize(m_SlidingWindow); + return core::CMemory::dynamicSize(m_Controllers) + + core::CMemory::dynamicSize(m_TrendModel) + + core::CMemory::dynamicSize(m_ResidualModel) + + core::CMemory::dynamicSize(m_AnomalyModel) + + core::CMemory::dynamicSize(m_ChangeDetector) + + core::CMemory::dynamicSize(m_SlidingWindow); } -bool CUnivariateTimeSeriesModel::acceptRestoreTraverser(const SModelRestoreParams& params, core::CStateRestoreTraverser& traverser) { +bool CUnivariateTimeSeriesModel::acceptRestoreTraverser(const SModelRestoreParams& params, + core::CStateRestoreTraverser& traverser) { if (traverser.name() == VERSION_6_3_TAG) { while (traverser.next()) { const std::string& name{traverser.name()}; @@ -1080,31 +1208,40 @@ bool CUnivariateTimeSeriesModel::acceptRestoreTraverser(const SModelRestoreParam RESTORE_BOOL(IS_NON_NEGATIVE_6_3_TAG, m_IsNonNegative) RESTORE_BOOL(IS_FORECASTABLE_6_3_TAG, m_IsForecastable) RESTORE(RNG_6_3_TAG, m_Rng.fromString(traverser.value())) - RESTORE_SETUP_TEARDOWN(CONTROLLER_6_3_TAG, - m_Controllers = boost::make_shared(), - core::CPersistUtils::restore(CONTROLLER_6_3_TAG, *m_Controllers, traverser), - /**/) - RESTORE( - TREND_MODEL_6_3_TAG, - traverser.traverseSubLevel(boost::bind( - CTimeSeriesDecompositionStateSerialiser(), boost::cref(params.s_DecompositionParams), boost::ref(m_TrendModel), _1))) + RESTORE_SETUP_TEARDOWN( + CONTROLLER_6_3_TAG, + m_Controllers = boost::make_shared(), + core::CPersistUtils::restore(CONTROLLER_6_3_TAG, *m_Controllers, traverser), + /**/) + RESTORE(TREND_MODEL_6_3_TAG, traverser.traverseSubLevel(boost::bind( + CTimeSeriesDecompositionStateSerialiser(), + boost::cref(params.s_DecompositionParams), + boost::ref(m_TrendModel), _1))) RESTORE(RESIDUAL_MODEL_6_3_TAG, traverser.traverseSubLevel(boost::bind( - CPriorStateSerialiser(), boost::cref(params.s_DistributionParams), boost::ref(m_ResidualModel), _1))) - RESTORE_SETUP_TEARDOWN(ANOMALY_MODEL_6_3_TAG, - m_AnomalyModel = boost::make_shared(), - traverser.traverseSubLevel(boost::bind( - &CTimeSeriesAnomalyModel::acceptRestoreTraverser, m_AnomalyModel.get(), boost::cref(params), _1)), - /**/) - RESTORE(CANDIDATE_CHANGE_POINT_6_3_TAG, fromDelimited(traverser.value(), m_CandidateChangePoint)) + CPriorStateSerialiser(), boost::cref(params.s_DistributionParams), + boost::ref(m_ResidualModel), _1))) + RESTORE_SETUP_TEARDOWN( + ANOMALY_MODEL_6_3_TAG, + m_AnomalyModel = boost::make_shared(), + traverser.traverseSubLevel( + boost::bind(&CTimeSeriesAnomalyModel::acceptRestoreTraverser, + m_AnomalyModel.get(), boost::cref(params), _1)), + /**/) + RESTORE(CANDIDATE_CHANGE_POINT_6_3_TAG, + fromDelimited(traverser.value(), m_CandidateChangePoint)) RESTORE_BUILT_IN(CURRENT_CHANGE_INTERVAL_6_3_TAG, m_CurrentChangeInterval) RESTORE_SETUP_TEARDOWN( CHANGE_DETECTOR_6_3_TAG, - m_ChangeDetector = boost::make_shared(m_TrendModel, m_ResidualModel), + m_ChangeDetector = boost::make_shared( + m_TrendModel, m_ResidualModel), traverser.traverseSubLevel(boost::bind( - &CUnivariateTimeSeriesChangeDetector::acceptRestoreTraverser, m_ChangeDetector.get(), boost::cref(params), _1)), + &CUnivariateTimeSeriesChangeDetector::acceptRestoreTraverser, + m_ChangeDetector.get(), boost::cref(params), _1)), /**/) - RESTORE(SLIDING_WINDOW_6_3_TAG, core::CPersistUtils::restore(SLIDING_WINDOW_6_3_TAG, m_SlidingWindow, traverser)) + RESTORE(SLIDING_WINDOW_6_3_TAG, + core::CPersistUtils::restore(SLIDING_WINDOW_6_3_TAG, + m_SlidingWindow, traverser)) } } else { // There is no version string this is historic state. @@ -1113,22 +1250,26 @@ bool CUnivariateTimeSeriesModel::acceptRestoreTraverser(const SModelRestoreParam RESTORE_BUILT_IN(ID_OLD_TAG, m_Id) RESTORE_BOOL(IS_NON_NEGATIVE_OLD_TAG, m_IsNonNegative) RESTORE_BOOL(IS_FORECASTABLE_OLD_TAG, m_IsForecastable) - RESTORE_SETUP_TEARDOWN(CONTROLLER_OLD_TAG, - m_Controllers = boost::make_shared(), - core::CPersistUtils::restore(CONTROLLER_OLD_TAG, *m_Controllers, traverser), - /**/) - RESTORE( - TREND_OLD_TAG, - traverser.traverseSubLevel(boost::bind( - CTimeSeriesDecompositionStateSerialiser(), boost::cref(params.s_DecompositionParams), boost::ref(m_TrendModel), _1))) - RESTORE(PRIOR_OLD_TAG, - traverser.traverseSubLevel(boost::bind( - CPriorStateSerialiser(), boost::cref(params.s_DistributionParams), boost::ref(m_ResidualModel), _1))) - RESTORE_SETUP_TEARDOWN(ANOMALY_MODEL_OLD_TAG, - m_AnomalyModel = boost::make_shared(), - traverser.traverseSubLevel(boost::bind( - &CTimeSeriesAnomalyModel::acceptRestoreTraverser, m_AnomalyModel.get(), boost::cref(params), _1)), - /**/) + RESTORE_SETUP_TEARDOWN( + CONTROLLER_OLD_TAG, + m_Controllers = boost::make_shared(), + core::CPersistUtils::restore(CONTROLLER_OLD_TAG, *m_Controllers, traverser), + /**/) + RESTORE(TREND_OLD_TAG, traverser.traverseSubLevel(boost::bind( + CTimeSeriesDecompositionStateSerialiser(), + boost::cref(params.s_DecompositionParams), + boost::ref(m_TrendModel), _1))) + RESTORE(PRIOR_OLD_TAG, traverser.traverseSubLevel(boost::bind( + CPriorStateSerialiser(), + boost::cref(params.s_DistributionParams), + boost::ref(m_ResidualModel), _1))) + RESTORE_SETUP_TEARDOWN( + ANOMALY_MODEL_OLD_TAG, + m_AnomalyModel = boost::make_shared(), + traverser.traverseSubLevel( + boost::bind(&CTimeSeriesAnomalyModel::acceptRestoreTraverser, + m_AnomalyModel.get(), boost::cref(params), _1)), + /**/) } while (traverser.next()); } return true; @@ -1145,16 +1286,23 @@ void CUnivariateTimeSeriesModel::acceptPersistInserter(core::CStatePersistInsert if (m_Controllers) { core::CPersistUtils::persist(CONTROLLER_6_3_TAG, *m_Controllers, inserter); } - inserter.insertLevel(TREND_MODEL_6_3_TAG, boost::bind(CTimeSeriesDecompositionStateSerialiser(), boost::cref(*m_TrendModel), _1)); - inserter.insertLevel(RESIDUAL_MODEL_6_3_TAG, boost::bind(CPriorStateSerialiser(), boost::cref(*m_ResidualModel), _1)); + inserter.insertLevel(TREND_MODEL_6_3_TAG, + boost::bind(CTimeSeriesDecompositionStateSerialiser(), + boost::cref(*m_TrendModel), _1)); + inserter.insertLevel(RESIDUAL_MODEL_6_3_TAG, + boost::bind(CPriorStateSerialiser(), + boost::cref(*m_ResidualModel), _1)); inserter.insertValue(CANDIDATE_CHANGE_POINT_6_3_TAG, toDelimited(m_CandidateChangePoint)); inserter.insertValue(CURRENT_CHANGE_INTERVAL_6_3_TAG, m_CurrentChangeInterval); if (m_ChangeDetector != nullptr) { inserter.insertLevel(CHANGE_DETECTOR_6_3_TAG, - boost::bind(&CUnivariateTimeSeriesChangeDetector::acceptPersistInserter, m_ChangeDetector.get(), _1)); + boost::bind(&CUnivariateTimeSeriesChangeDetector::acceptPersistInserter, + m_ChangeDetector.get(), _1)); } if (m_AnomalyModel != nullptr) { - inserter.insertLevel(ANOMALY_MODEL_6_3_TAG, boost::bind(&CTimeSeriesAnomalyModel::acceptPersistInserter, m_AnomalyModel.get(), _1)); + inserter.insertLevel(ANOMALY_MODEL_6_3_TAG, + boost::bind(&CTimeSeriesAnomalyModel::acceptPersistInserter, + m_AnomalyModel.get(), _1)); } core::CPersistUtils::persist(SLIDING_WINDOW_6_3_TAG, m_SlidingWindow, inserter); } @@ -1163,7 +1311,8 @@ maths_t::EDataType CUnivariateTimeSeriesModel::dataType() const { return m_ResidualModel->dataType(); } -CUnivariateTimeSeriesModel::TDouble4Vec CUnivariateTimeSeriesModel::unpack(const TDouble2Vec4Vec& weights) { +CUnivariateTimeSeriesModel::TDouble4Vec +CUnivariateTimeSeriesModel::unpack(const TDouble2Vec4Vec& weights) { TDouble4Vec result; result.reserve(weights.size()); for (const auto& weight : weights) { @@ -1179,7 +1328,8 @@ void CUnivariateTimeSeriesModel::reinitializeResidualModel(double learnRate, residualModel.setToNonInformative(0.0, residualModel.decayRate()); if (!slidingWindow.empty()) { double slidingWindowLength{static_cast(slidingWindow.size())}; - TDouble4Vec1Vec weight{{std::max(learnRate, std::min(5.0 / slidingWindowLength, 1.0))}}; + TDouble4Vec1Vec weight{ + {std::max(learnRate, std::min(5.0 / slidingWindowLength, 1.0))}}; for (const auto& value : slidingWindow) { TDouble1Vec sample{trend->detrend(value.first, value.second, 0.0)}; residualModel.addSamples(CConstantWeights::COUNT, sample, weight); @@ -1187,7 +1337,8 @@ void CUnivariateTimeSeriesModel::reinitializeResidualModel(double learnRate, } } -const CUnivariateTimeSeriesModel::TTimeDoublePrCBuf& CUnivariateTimeSeriesModel::slidingWindow() const { +const CUnivariateTimeSeriesModel::TTimeDoublePrCBuf& +CUnivariateTimeSeriesModel::slidingWindow() const { return m_SlidingWindow; } @@ -1199,21 +1350,22 @@ const CPrior& CUnivariateTimeSeriesModel::residualModel() const { return *m_ResidualModel; } -CUnivariateTimeSeriesModel::CUnivariateTimeSeriesModel(const CUnivariateTimeSeriesModel& other, std::size_t id, bool isForForecast) - : CModel(other.params()), - m_Id(id), - m_IsNonNegative(other.m_IsNonNegative), - m_IsForecastable(other.m_IsForecastable), - m_Rng(other.m_Rng), +CUnivariateTimeSeriesModel::CUnivariateTimeSeriesModel(const CUnivariateTimeSeriesModel& other, + std::size_t id, + bool isForForecast) + : CModel(other.params()), m_Id(id), m_IsNonNegative(other.m_IsNonNegative), + m_IsForecastable(other.m_IsForecastable), m_Rng(other.m_Rng), m_TrendModel(other.m_TrendModel->clone()), m_ResidualModel(other.m_ResidualModel->clone()), - m_AnomalyModel(!isForForecast && other.m_AnomalyModel ? boost::make_shared(*other.m_AnomalyModel) - : TAnomalyModelPtr()), + m_AnomalyModel(!isForForecast && other.m_AnomalyModel + ? boost::make_shared(*other.m_AnomalyModel) + : TAnomalyModelPtr()), m_CandidateChangePoint(other.m_CandidateChangePoint), m_CurrentChangeInterval(other.m_CurrentChangeInterval), - m_ChangeDetector(!isForForecast && other.m_ChangeDetector - ? boost::make_shared(*other.m_ChangeDetector) - : TChangeDetectorPtr()), + m_ChangeDetector( + !isForForecast && other.m_ChangeDetector + ? boost::make_shared(*other.m_ChangeDetector) + : TChangeDetectorPtr()), m_SlidingWindow(!isForForecast ? other.m_SlidingWindow : TTimeDoublePrCBuf{}), m_Correlations(nullptr) { if (!isForForecast && other.m_Controllers != nullptr) { @@ -1221,9 +1373,10 @@ CUnivariateTimeSeriesModel::CUnivariateTimeSeriesModel(const CUnivariateTimeSeri } } -CUnivariateTimeSeriesModel::EUpdateResult CUnivariateTimeSeriesModel::testAndApplyChange(const CModelAddSamplesParams& params, - const TSizeVec& order, - const TTimeDouble2VecSizeTrVec& values) { +CUnivariateTimeSeriesModel::EUpdateResult +CUnivariateTimeSeriesModel::testAndApplyChange(const CModelAddSamplesParams& params, + const TSizeVec& order, + const TTimeDouble2VecSizeTrVec& values) { std::size_t median{order[order.size() / 2]}; TDouble4Vec weights(unpack(params.priorWeights()[median])); core_t::TTime time{values[median].first}; @@ -1232,7 +1385,8 @@ CUnivariateTimeSeriesModel::EUpdateResult CUnivariateTimeSeriesModel::testAndApp core_t::TTime minimumTimeToDetect{this->params().minimumTimeToDetectChange()}; core_t::TTime maximumTimeToTest{this->params().maximumTimeToTestForChange()}; double weight{maths_t::winsorisationWeight(params.weightStyles(), {weights})}; - if (minimumTimeToDetect < maximumTimeToTest && pValueFromTailWinsorisationWeight(weight) <= 1e-5) { + if (minimumTimeToDetect < maximumTimeToTest && + pValueFromTailWinsorisationWeight(weight) <= 1e-5) { m_CurrentChangeInterval += this->params().bucketLength(); if (this->params().testForChange(m_CurrentChangeInterval)) { m_ChangeDetector = boost::make_shared( @@ -1246,7 +1400,9 @@ CUnivariateTimeSeriesModel::EUpdateResult CUnivariateTimeSeriesModel::testAndApp } if (m_ChangeDetector != nullptr) { - m_ChangeDetector->addSamples(params.weightStyles(), {std::make_pair(time, values[median].second[0])}, {weights}); + m_ChangeDetector->addSamples(params.weightStyles(), + {std::make_pair(time, values[median].second[0])}, + {weights}); if (m_ChangeDetector->stopTesting()) { m_ChangeDetector.reset(); @@ -1260,7 +1416,8 @@ CUnivariateTimeSeriesModel::EUpdateResult CUnivariateTimeSeriesModel::testAndApp return E_Success; } -CUnivariateTimeSeriesModel::EUpdateResult CUnivariateTimeSeriesModel::applyChange(const SChangeDescription& change) { +CUnivariateTimeSeriesModel::EUpdateResult +CUnivariateTimeSeriesModel::applyChange(const SChangeDescription& change) { for (auto& value : m_SlidingWindow) { switch (change.s_Description) { case SChangeDescription::E_LevelShift: @@ -1275,7 +1432,8 @@ CUnivariateTimeSeriesModel::EUpdateResult CUnivariateTimeSeriesModel::applyChang } } - if (m_TrendModel->applyChange(m_CandidateChangePoint.first, m_CandidateChangePoint.second, change)) { + if (m_TrendModel->applyChange(m_CandidateChangePoint.first, + m_CandidateChangePoint.second, change)) { this->reinitializeStateGivenNewComponent(); } else { change.s_ResidualModel->decayRate(m_ResidualModel->decayRate()); @@ -1285,9 +1443,10 @@ CUnivariateTimeSeriesModel::EUpdateResult CUnivariateTimeSeriesModel::applyChang return E_Success; } -CUnivariateTimeSeriesModel::EUpdateResult CUnivariateTimeSeriesModel::updateTrend(const maths_t::TWeightStyleVec& weightStyles, - const TTimeDouble2VecSizeTrVec& samples, - const TDouble2Vec4VecVec& weights) { +CUnivariateTimeSeriesModel::EUpdateResult +CUnivariateTimeSeriesModel::updateTrend(const maths_t::TWeightStyleVec& weightStyles, + const TTimeDouble2VecSizeTrVec& samples, + const TDouble2Vec4VecVec& weights) { for (const auto& sample : samples) { if (sample.second.size() != 1) { LOG_ERROR(<< "Dimension mismatch: '" << sample.second.size() << " != 1'"); @@ -1301,9 +1460,12 @@ CUnivariateTimeSeriesModel::EUpdateResult CUnivariateTimeSeriesModel::updateTren // or for count feature, the times of all samples will be the same. TSizeVec timeorder(samples.size()); std::iota(timeorder.begin(), timeorder.end(), 0); - std::stable_sort(timeorder.begin(), timeorder.end(), [&samples](std::size_t lhs, std::size_t rhs) { - return COrderings::lexicographical_compare(samples[lhs].first, samples[lhs].second, samples[rhs].first, samples[rhs].second); - }); + std::stable_sort(timeorder.begin(), timeorder.end(), + [&samples](std::size_t lhs, std::size_t rhs) { + return COrderings::lexicographical_compare( + samples[lhs].first, samples[lhs].second, + samples[rhs].first, samples[rhs].second); + }); for (auto i : timeorder) { core_t::TTime time{samples[i].first}; @@ -1321,7 +1483,9 @@ CUnivariateTimeSeriesModel::EUpdateResult CUnivariateTimeSeriesModel::updateTren return result; } -void CUnivariateTimeSeriesModel::appendPredictionErrors(double interval, double sample_, TDouble1VecVec (&result)[2]) { +void CUnivariateTimeSeriesModel::appendPredictionErrors(double interval, + double sample_, + TDouble1VecVec (&result)[2]) { using TDecompositionPtr1Vec = core::CSmallVector; TDouble1Vec sample{sample_}; TDecompositionPtr1Vec trend{m_TrendModel}; @@ -1334,13 +1498,16 @@ void CUnivariateTimeSeriesModel::appendPredictionErrors(double interval, double } void CUnivariateTimeSeriesModel::reinitializeStateGivenNewComponent() { - reinitializeResidualModel(this->params().learnRate(), m_TrendModel, m_SlidingWindow, *m_ResidualModel); + reinitializeResidualModel(this->params().learnRate(), m_TrendModel, + m_SlidingWindow, *m_ResidualModel); if (m_Correlations != nullptr) { m_Correlations->removeTimeSeries(m_Id); } if (m_Controllers != nullptr) { - m_ResidualModel->decayRate(m_ResidualModel->decayRate() / (*m_Controllers)[E_ResidualControl].multiplier()); - m_TrendModel->decayRate(m_TrendModel->decayRate() / (*m_Controllers)[E_TrendControl].multiplier()); + m_ResidualModel->decayRate(m_ResidualModel->decayRate() / + (*m_Controllers)[E_ResidualControl].multiplier()); + m_TrendModel->decayRate(m_TrendModel->decayRate() / + (*m_Controllers)[E_TrendControl].multiplier()); for (auto& controller : *m_Controllers) { controller.reset(); } @@ -1357,24 +1524,29 @@ bool CUnivariateTimeSeriesModel::correlationModels(TSize1Vec& correlated, TModelCPtr1Vec& correlatedTimeSeriesModels) const { if (m_Correlations) { correlated = m_Correlations->correlated(m_Id); - m_Correlations->correlationModels(m_Id, correlated, variables, correlationModels, correlatedTimeSeriesModels); + m_Correlations->correlationModels(m_Id, correlated, variables, correlationModels, + correlatedTimeSeriesModels); } return correlated.size() > 0; } -CTimeSeriesCorrelations::CTimeSeriesCorrelations(double minimumSignificantCorrelation, double decayRate) - : m_MinimumSignificantCorrelation(minimumSignificantCorrelation), m_Correlations(MAXIMUM_CORRELATIONS, decayRate) { +CTimeSeriesCorrelations::CTimeSeriesCorrelations(double minimumSignificantCorrelation, + double decayRate) + : m_MinimumSignificantCorrelation(minimumSignificantCorrelation), + m_Correlations(MAXIMUM_CORRELATIONS, decayRate) { } -CTimeSeriesCorrelations::CTimeSeriesCorrelations(const CTimeSeriesCorrelations& other, bool isForPersistence) +CTimeSeriesCorrelations::CTimeSeriesCorrelations(const CTimeSeriesCorrelations& other, + bool isForPersistence) : m_MinimumSignificantCorrelation(other.m_MinimumSignificantCorrelation), - m_SampleData(other.m_SampleData), - m_Correlations(other.m_Correlations), + m_SampleData(other.m_SampleData), m_Correlations(other.m_Correlations), m_CorrelatedLookup(other.m_CorrelatedLookup), m_TimeSeriesModels(isForPersistence ? TModelCPtrVec() : other.m_TimeSeriesModels) { for (const auto& model : other.m_CorrelationDistributionModels) { - m_CorrelationDistributionModels.emplace(model.first, - std::make_pair(TMultivariatePriorPtr(model.second.first->clone()), model.second.second)); + m_CorrelationDistributionModels.emplace( + model.first, + std::make_pair(TMultivariatePriorPtr(model.second.first->clone()), + model.second.second)); } } @@ -1387,8 +1559,10 @@ CTimeSeriesCorrelations* CTimeSeriesCorrelations::cloneForPersistence() const { } void CTimeSeriesCorrelations::processSamples(const maths_t::TWeightStyleVec& weightStyles) { - using TSizeSizePrMultivariatePriorPtrDoublePrUMapCItr = TSizeSizePrMultivariatePriorPtrDoublePrUMap::const_iterator; - using TSizeSizePrMultivariatePriorPtrDoublePrUMapCItrVec = std::vector; + using TSizeSizePrMultivariatePriorPtrDoublePrUMapCItr = + TSizeSizePrMultivariatePriorPtrDoublePrUMap::const_iterator; + using TSizeSizePrMultivariatePriorPtrDoublePrUMapCItrVec = + std::vector; // The priors use a shared pseudo random number generator which // generates a fixed sequence of random numbers. Since the order @@ -1400,10 +1574,12 @@ void CTimeSeriesCorrelations::processSamples(const maths_t::TWeightStyleVec& wei // preserve the random number sequence. TSizeSizePrMultivariatePriorPtrDoublePrUMapCItrVec iterators; iterators.reserve(m_CorrelationDistributionModels.size()); - for (auto i = m_CorrelationDistributionModels.begin(); i != m_CorrelationDistributionModels.end(); ++i) { + for (auto i = m_CorrelationDistributionModels.begin(); + i != m_CorrelationDistributionModels.end(); ++i) { iterators.push_back(i); } - std::sort(iterators.begin(), iterators.end(), core::CFunctional::SDereference()); + std::sort(iterators.begin(), iterators.end(), + core::CFunctional::SDereference()); TDouble10Vec1Vec multivariateSamples; TDouble10Vec4Vec1Vec multivariateWeights; @@ -1428,18 +1604,21 @@ void CTimeSeriesCorrelations::processSamples(const maths_t::TWeightStyleVec& wei std::swap(indices[0], indices[1]); } multivariateSamples.assign(n1, TDouble10Vec(2)); - multivariateWeights.assign(n1, TDouble10Vec4Vec(weightStyles.size(), TDouble10Vec(2))); + multivariateWeights.assign( + n1, TDouble10Vec4Vec(weightStyles.size(), TDouble10Vec(2))); TSize1Vec& tags2{samples2->s_Tags}; TTime1Vec& times2{samples2->s_Times}; - COrderings::simultaneousSort(tags2, times2, samples2->s_Samples, samples2->s_Weights); + COrderings::simultaneousSort(tags2, times2, samples2->s_Samples, + samples2->s_Weights); for (auto j = tags2.begin(); j != tags2.end(); /**/) { auto k = std::upper_bound(j, tags2.end(), *j); std::size_t a = j - tags2.begin(); std::size_t b = k - tags2.begin(); - COrderings::simultaneousSort( - core::make_range(times2, a, b), core::make_range(samples2->s_Samples, a, b), core::make_range(samples2->s_Weights, a, b)); + COrderings::simultaneousSort(core::make_range(times2, a, b), + core::make_range(samples2->s_Samples, a, b), + core::make_range(samples2->s_Weights, a, b)); j = k; } @@ -1448,12 +1627,15 @@ void CTimeSeriesCorrelations::processSamples(const maths_t::TWeightStyleVec& wei if (n2 > 1) { std::size_t tag{samples1->s_Tags[j1]}; core_t::TTime time{samples1->s_Times[j1]}; - std::size_t a_ = std::lower_bound(tags2.begin(), tags2.end(), tag) - tags2.begin(); - std::size_t b_ = std::upper_bound(tags2.begin(), tags2.end(), tag) - tags2.begin(); + std::size_t a_ = std::lower_bound(tags2.begin(), tags2.end(), tag) - + tags2.begin(); + std::size_t b_ = std::upper_bound(tags2.begin(), tags2.end(), tag) - + tags2.begin(); std::size_t b{CTools::truncate( - static_cast(std::lower_bound(times2.begin() + a_, times2.begin() + b_, time) - times2.begin()), - std::size_t(1), - n2 - 1)}; + static_cast( + std::lower_bound(times2.begin() + a_, times2.begin() + b_, time) - + times2.begin()), + std::size_t(1), n2 - 1)}; std::size_t a{b - 1}; j2 = std::abs(times2[a] - time) < std::abs(times2[b] - time) ? a : b; } @@ -1465,14 +1647,17 @@ void CTimeSeriesCorrelations::processSamples(const maths_t::TWeightStyleVec& wei } } LOG_TRACE(<< "correlate samples = " << core::CContainerPrinter::print(multivariateSamples) - << ", correlate weights = " << core::CContainerPrinter::print(multivariateWeights)); + << ", correlate weights = " + << core::CContainerPrinter::print(multivariateWeights)); - prior->dataType(samples1->s_Type == maths_t::E_IntegerData || samples2->s_Type == maths_t::E_IntegerData + prior->dataType(samples1->s_Type == maths_t::E_IntegerData || + samples2->s_Type == maths_t::E_IntegerData ? maths_t::E_IntegerData : maths_t::E_ContinuousData); prior->addSamples(weightStyles, multivariateSamples, multivariateWeights); prior->propagateForwardsByTime(std::min(samples1->s_Interval, samples2->s_Interval)); - prior->decayRate(std::sqrt(samples1->s_Multiplier * samples2->s_Multiplier) * prior->decayRate()); + prior->decayRate(std::sqrt(samples1->s_Multiplier * samples2->s_Multiplier) * + prior->decayRate()); LOG_TRACE(<< "correlation prior:" << core_t::LINE_ENDING << prior->print()); LOG_TRACE(<< "decayRate = " << prior->decayRate()); } @@ -1489,14 +1674,18 @@ void CTimeSeriesCorrelations::refresh(const CTimeSeriesCorrelateModelAllocator& TSizeSizePrVec correlated; TDoubleVec correlationCoeffs; m_Correlations.mostCorrelated( - static_cast(1.2 * static_cast(allocator.maxNumberCorrelations())), correlated, &correlationCoeffs); + static_cast( + 1.2 * static_cast(allocator.maxNumberCorrelations())), + correlated, &correlationCoeffs); LOG_TRACE(<< "correlated = " << core::CContainerPrinter::print(correlated)); - LOG_TRACE(<< "correlationCoeffs = " << core::CContainerPrinter::print(correlationCoeffs)); + LOG_TRACE(<< "correlationCoeffs = " + << core::CContainerPrinter::print(correlationCoeffs)); ptrdiff_t cutoff{std::upper_bound(correlationCoeffs.begin(), - correlationCoeffs.end(), - 0.5 * m_MinimumSignificantCorrelation, - [](double lhs, double rhs) { return std::fabs(lhs) > std::fabs(rhs); }) - + correlationCoeffs.end(), 0.5 * m_MinimumSignificantCorrelation, + [](double lhs, double rhs) { + return std::fabs(lhs) > std::fabs(rhs); + }) - correlationCoeffs.begin()}; LOG_TRACE(<< "cutoff = " << cutoff); @@ -1513,8 +1702,10 @@ void CTimeSeriesCorrelations::refresh(const CTimeSeriesCorrelateModelAllocator& TSizeVec presentRank; TSizeSizePrVec missing; TSizeVec missingRank; - std::size_t np{static_cast(std::max(0.9 * static_cast(correlated.size()), 1.0))}; - std::size_t nm{static_cast(std::max(0.1 * static_cast(correlated.size()), 1.0))}; + std::size_t np{static_cast( + std::max(0.9 * static_cast(correlated.size()), 1.0))}; + std::size_t nm{static_cast( + std::max(0.1 * static_cast(correlated.size()), 1.0))}; present.reserve(np); presentRank.reserve(np); missing.reserve(nm); @@ -1528,8 +1719,11 @@ void CTimeSeriesCorrelations::refresh(const CTimeSeriesCorrelateModelAllocator& // Remove any weakly correlated models. std::size_t initial{m_CorrelationDistributionModels.size()}; COrderings::simultaneousSort(present, presentRank); - for (auto i = m_CorrelationDistributionModels.begin(); i != m_CorrelationDistributionModels.end(); /**/) { - std::size_t j = std::lower_bound(present.begin(), present.end(), i->first) - present.begin(); + for (auto i = m_CorrelationDistributionModels.begin(); + i != m_CorrelationDistributionModels.end(); + /**/) { + std::size_t j = std::lower_bound(present.begin(), present.end(), i->first) - + present.begin(); if (j == present.size() || i->first != present[j]) { i = m_CorrelationDistributionModels.erase(i); } else { @@ -1541,17 +1735,28 @@ void CTimeSeriesCorrelations::refresh(const CTimeSeriesCorrelateModelAllocator& // Remove the remaining most weakly correlated models subject // to the capacity constraint. COrderings::simultaneousSort(presentRank, present, std::greater()); - for (std::size_t i = 0u; m_CorrelationDistributionModels.size() > allocator.maxNumberCorrelations(); ++i) { + for (std::size_t i = 0u; m_CorrelationDistributionModels.size() > + allocator.maxNumberCorrelations(); + ++i) { m_CorrelationDistributionModels.erase(present[i]); } if (allocator.areAllocationsAllowed()) { - for (std::size_t i = 0u, nextChunk = std::min(allocator.maxNumberCorrelations(), initial + allocator.chunkSize()); - m_CorrelationDistributionModels.size() < allocator.maxNumberCorrelations() && i < missing.size() && - (m_CorrelationDistributionModels.size() <= initial || !allocator.exceedsLimit(m_CorrelationDistributionModels.size())); - nextChunk = std::min(allocator.maxNumberCorrelations(), nextChunk + allocator.chunkSize())) { - for (/**/; i < missing.size() && m_CorrelationDistributionModels.size() < nextChunk; ++i) { - m_CorrelationDistributionModels.insert({missing[i], {allocator.newPrior(), correlationCoeffs[missingRank[i]]}}); + for (std::size_t i = 0u, + nextChunk = std::min(allocator.maxNumberCorrelations(), + initial + allocator.chunkSize()); + m_CorrelationDistributionModels.size() < allocator.maxNumberCorrelations() && + i < missing.size() && + (m_CorrelationDistributionModels.size() <= initial || + !allocator.exceedsLimit(m_CorrelationDistributionModels.size())); + nextChunk = std::min(allocator.maxNumberCorrelations(), + nextChunk + allocator.chunkSize())) { + for (/**/; i < missing.size() && + m_CorrelationDistributionModels.size() < nextChunk; + ++i) { + m_CorrelationDistributionModels.insert( + {missing[i], + {allocator.newPrior(), correlationCoeffs[missingRank[i]]}}); } } } @@ -1560,7 +1765,8 @@ void CTimeSeriesCorrelations::refresh(const CTimeSeriesCorrelateModelAllocator& } } -const CTimeSeriesCorrelations::TSizeSizePrMultivariatePriorPtrDoublePrUMap& CTimeSeriesCorrelations::correlationModels() const { +const CTimeSeriesCorrelations::TSizeSizePrMultivariatePriorPtrDoublePrUMap& +CTimeSeriesCorrelations::correlationModels() const { return m_CorrelationDistributionModels; } @@ -1569,22 +1775,29 @@ void CTimeSeriesCorrelations::debugMemoryUsage(core::CMemoryUsage::TMemoryUsageP core::CMemoryDebug::dynamicSize("m_SampleData", m_SampleData, mem); core::CMemoryDebug::dynamicSize("m_Correlations", m_Correlations, mem); core::CMemoryDebug::dynamicSize("m_CorrelatedLookup", m_CorrelatedLookup, mem); - core::CMemoryDebug::dynamicSize("m_CorrelationDistributionModels", m_CorrelationDistributionModels, mem); + core::CMemoryDebug::dynamicSize("m_CorrelationDistributionModels", + m_CorrelationDistributionModels, mem); } std::size_t CTimeSeriesCorrelations::memoryUsage() const { - return core::CMemory::dynamicSize(m_SampleData) + core::CMemory::dynamicSize(m_Correlations) + - core::CMemory::dynamicSize(m_CorrelatedLookup) + core::CMemory::dynamicSize(m_CorrelationDistributionModels); + return core::CMemory::dynamicSize(m_SampleData) + + core::CMemory::dynamicSize(m_Correlations) + + core::CMemory::dynamicSize(m_CorrelatedLookup) + + core::CMemory::dynamicSize(m_CorrelationDistributionModels); } -bool CTimeSeriesCorrelations::acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) { +bool CTimeSeriesCorrelations::acceptRestoreTraverser(const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser) { do { const std::string& name{traverser.name()}; RESTORE(K_MOST_CORRELATED_TAG, - traverser.traverseSubLevel(boost::bind(&CKMostCorrelated::acceptRestoreTraverser, &m_Correlations, _1))) - RESTORE(CORRELATED_LOOKUP_TAG, core::CPersistUtils::restore(CORRELATED_LOOKUP_TAG, m_CorrelatedLookup, traverser)) + traverser.traverseSubLevel(boost::bind( + &CKMostCorrelated::acceptRestoreTraverser, &m_Correlations, _1))) + RESTORE(CORRELATED_LOOKUP_TAG, + core::CPersistUtils::restore(CORRELATED_LOOKUP_TAG, m_CorrelatedLookup, traverser)) RESTORE(CORRELATION_MODELS_TAG, - traverser.traverseSubLevel(boost::bind(&CTimeSeriesCorrelations::restoreCorrelationModels, this, boost::cref(params), _1))) + traverser.traverseSubLevel(boost::bind(&CTimeSeriesCorrelations::restoreCorrelationModels, + this, boost::cref(params), _1))) } while (traverser.next()); return true; } @@ -1595,32 +1808,41 @@ void CTimeSeriesCorrelations::acceptPersistInserter(core::CStatePersistInserter& // maintained transitively during an update at the end of a bucket // and so always empty at the point persistence occurs. - inserter.insertLevel(K_MOST_CORRELATED_TAG, boost::bind(&CKMostCorrelated::acceptPersistInserter, &m_Correlations, _1)); + inserter.insertLevel(K_MOST_CORRELATED_TAG, boost::bind(&CKMostCorrelated::acceptPersistInserter, + &m_Correlations, _1)); core::CPersistUtils::persist(CORRELATED_LOOKUP_TAG, m_CorrelatedLookup, inserter); - inserter.insertLevel(CORRELATION_MODELS_TAG, boost::bind(&CTimeSeriesCorrelations::persistCorrelationModels, this, _1)); + inserter.insertLevel( + CORRELATION_MODELS_TAG, + boost::bind(&CTimeSeriesCorrelations::persistCorrelationModels, this, _1)); } -bool CTimeSeriesCorrelations::restoreCorrelationModels(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) { +bool CTimeSeriesCorrelations::restoreCorrelationModels(const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser) { do { const std::string& name{traverser.name()}; - RESTORE_SETUP_TEARDOWN(CORRELATION_MODEL_TAG, - TSizeSizePrMultivariatePriorPtrDoublePrPr prior, - traverser.traverseSubLevel(boost::bind(&restore, boost::cref(params), boost::ref(prior), _1)), - m_CorrelationDistributionModels.insert(prior)) + RESTORE_SETUP_TEARDOWN( + CORRELATION_MODEL_TAG, TSizeSizePrMultivariatePriorPtrDoublePrPr prior, + traverser.traverseSubLevel(boost::bind(&restore, boost::cref(params), + boost::ref(prior), _1)), + m_CorrelationDistributionModels.insert(prior)) } while (traverser.next()); return true; } void CTimeSeriesCorrelations::persistCorrelationModels(core::CStatePersistInserter& inserter) const { - using TSizeSizePrMultivariatePriorPtrDoublePrUMapCItrVec = std::vector; + using TSizeSizePrMultivariatePriorPtrDoublePrUMapCItrVec = + std::vector; TSizeSizePrMultivariatePriorPtrDoublePrUMapCItrVec ordered; ordered.reserve(m_CorrelationDistributionModels.size()); - for (auto prior = m_CorrelationDistributionModels.begin(); prior != m_CorrelationDistributionModels.end(); ++prior) { + for (auto prior = m_CorrelationDistributionModels.begin(); + prior != m_CorrelationDistributionModels.end(); ++prior) { ordered.push_back(prior); } - std::sort(ordered.begin(), ordered.end(), core::CFunctional::SDereference()); + std::sort(ordered.begin(), ordered.end(), + core::CFunctional::SDereference()); for (auto prior : ordered) { - inserter.insertLevel(CORRELATION_MODEL_TAG, boost::bind(&persist, boost::cref(*prior), _1)); + inserter.insertLevel(CORRELATION_MODEL_TAG, + boost::bind(&persist, boost::cref(*prior), _1)); } } @@ -1631,25 +1853,30 @@ bool CTimeSeriesCorrelations::restore(const SDistributionRestoreParams& params, const std::string& name{traverser.name()}; RESTORE_BUILT_IN(FIRST_CORRELATE_ID_TAG, model.first.first) RESTORE_BUILT_IN(SECOND_CORRELATE_ID_TAG, model.first.second) - RESTORE( - CORRELATION_MODEL_TAG, - traverser.traverseSubLevel(boost::bind(CPriorStateSerialiser(), boost::cref(params), boost::ref(model.second.first), _1))) + RESTORE(CORRELATION_MODEL_TAG, traverser.traverseSubLevel(boost::bind( + CPriorStateSerialiser(), boost::cref(params), + boost::ref(model.second.first), _1))) RESTORE_BUILT_IN(CORRELATION_TAG, model.second.second) } while (traverser.next()); return true; } -void CTimeSeriesCorrelations::persist(const TSizeSizePrMultivariatePriorPtrDoublePrPr& model, core::CStatePersistInserter& inserter) { +void CTimeSeriesCorrelations::persist(const TSizeSizePrMultivariatePriorPtrDoublePrPr& model, + core::CStatePersistInserter& inserter) { inserter.insertValue(FIRST_CORRELATE_ID_TAG, model.first.first); inserter.insertValue(SECOND_CORRELATE_ID_TAG, model.first.second); - inserter.insertLevel(CORRELATION_MODEL_TAG, boost::bind(CPriorStateSerialiser(), boost::cref(*model.second.first), _1)); + inserter.insertLevel(CORRELATION_MODEL_TAG, + boost::bind(CPriorStateSerialiser(), + boost::cref(*model.second.first), _1)); inserter.insertValue(CORRELATION_TAG, model.second.second, core::CIEEE754::E_SinglePrecision); } -void CTimeSeriesCorrelations::addTimeSeries(std::size_t id, const CUnivariateTimeSeriesModel& model) { +void CTimeSeriesCorrelations::addTimeSeries(std::size_t id, + const CUnivariateTimeSeriesModel& model) { m_Correlations.addVariables(id + 1); - core::CAllocationStrategy::resize(m_TimeSeriesModels, std::max(id + 1, m_TimeSeriesModels.size())); + core::CAllocationStrategy::resize(m_TimeSeriesModels, + std::max(id + 1, m_TimeSeriesModels.size())); m_TimeSeriesModels[id] = &model; } @@ -1680,7 +1907,8 @@ void CTimeSeriesCorrelations::addSamples(std::size_t id, data.s_Times.push_back(samples[i].first); data.s_Samples.push_back(samples[i].second[0]); data.s_Tags.push_back(samples[i].third); - data.s_Weights.push_back(CUnivariateTimeSeriesModel::unpack(params.priorWeights()[i])); + data.s_Weights.push_back( + CUnivariateTimeSeriesModel::unpack(params.priorWeights()[i])); } data.s_Interval = params.propagationInterval(); data.s_Multiplier = multiplier; @@ -1717,7 +1945,8 @@ bool CTimeSeriesCorrelations::correlationModels(std::size_t id, std::swap(variable[0], variable[1]); } if (i == m_CorrelationDistributionModels.end()) { - LOG_ERROR(<< "Unexpectedly missing prior for correlation (" << id << "," << correlate << ")"); + LOG_ERROR(<< "Unexpectedly missing prior for correlation (" << id + << "," << correlate << ")"); continue; } if (std::fabs(i->second.second) < m_MinimumSignificantCorrelation) { @@ -1755,15 +1984,16 @@ void CTimeSeriesCorrelations::refreshLookup() { } } -CMultivariateTimeSeriesModel::CMultivariateTimeSeriesModel(const CModelParams& params, - const CTimeSeriesDecompositionInterface& trend, - const CMultivariatePrior& residualModel, - const TDecayRateController2Ary* controllers, - bool modelAnomalies) - : CModel(params), - m_IsNonNegative(false), - m_ResidualModel(residualModel.clone()), - m_AnomalyModel(modelAnomalies ? boost::make_shared(params.bucketLength(), params.decayRate()) +CMultivariateTimeSeriesModel::CMultivariateTimeSeriesModel( + const CModelParams& params, + const CTimeSeriesDecompositionInterface& trend, + const CMultivariatePrior& residualModel, + const TDecayRateController2Ary* controllers, + bool modelAnomalies) + : CModel(params), m_IsNonNegative(false), m_ResidualModel(residualModel.clone()), + m_AnomalyModel(modelAnomalies ? boost::make_shared( + params.bucketLength(), + params.decayRate()) : TAnomalyModelPtr()), m_SlidingWindow(SLIDING_WINDOW_SIZE) { if (controllers) { @@ -1775,10 +2005,11 @@ CMultivariateTimeSeriesModel::CMultivariateTimeSeriesModel(const CModelParams& p } CMultivariateTimeSeriesModel::CMultivariateTimeSeriesModel(const CMultivariateTimeSeriesModel& other) - : CModel(other.params()), - m_IsNonNegative(other.m_IsNonNegative), + : CModel(other.params()), m_IsNonNegative(other.m_IsNonNegative), m_ResidualModel(other.m_ResidualModel->clone()), - m_AnomalyModel(other.m_AnomalyModel ? boost::make_shared(*other.m_AnomalyModel) : TAnomalyModelPtr()), + m_AnomalyModel(other.m_AnomalyModel + ? boost::make_shared(*other.m_AnomalyModel) + : TAnomalyModelPtr()), m_SlidingWindow(other.m_SlidingWindow) { if (other.m_Controllers) { m_Controllers = boost::make_shared(*other.m_Controllers); @@ -1789,9 +2020,11 @@ CMultivariateTimeSeriesModel::CMultivariateTimeSeriesModel(const CMultivariateTi } } -CMultivariateTimeSeriesModel::CMultivariateTimeSeriesModel(const SModelRestoreParams& params, core::CStateRestoreTraverser& traverser) +CMultivariateTimeSeriesModel::CMultivariateTimeSeriesModel(const SModelRestoreParams& params, + core::CStateRestoreTraverser& traverser) : CModel(params.s_Params), m_SlidingWindow(SLIDING_WINDOW_SIZE) { - traverser.traverseSubLevel(boost::bind(&CMultivariateTimeSeriesModel::acceptRestoreTraverser, this, boost::cref(params), _1)); + traverser.traverseSubLevel(boost::bind(&CMultivariateTimeSeriesModel::acceptRestoreTraverser, + this, boost::cref(params), _1)); } std::size_t CMultivariateTimeSeriesModel::identifier() const { @@ -1827,8 +2060,9 @@ void CMultivariateTimeSeriesModel::addBucketValue(const TTimeDouble2VecSizeTrVec // no-op } -CMultivariateTimeSeriesModel::EUpdateResult CMultivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams& params, - TTimeDouble2VecSizeTrVec samples) { +CMultivariateTimeSeriesModel::EUpdateResult +CMultivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams& params, + TTimeDouble2VecSizeTrVec samples) { if (samples.empty()) { return E_Success; } @@ -1837,12 +2071,14 @@ CMultivariateTimeSeriesModel::EUpdateResult CMultivariateTimeSeriesModel::addSam TSizeVec valueorder(samples.size()); std::iota(valueorder.begin(), valueorder.end(), 0); - std::stable_sort(valueorder.begin(), valueorder.end(), [&samples](std::size_t lhs, std::size_t rhs) { - return samples[lhs].second < samples[rhs].second; - }); + std::stable_sort(valueorder.begin(), valueorder.end(), + [&samples](std::size_t lhs, std::size_t rhs) { + return samples[lhs].second < samples[rhs].second; + }); TOptionalTimeDouble2VecPr randomSample; - if (TOptionalSize index = randomlySample(m_Rng, params, this->params().bucketLength(), valueorder)) { + if (TOptionalSize index = randomlySample( + m_Rng, params, this->params().bucketLength(), valueorder)) { randomSample.reset({samples[*index].first, samples[*index].second}); } @@ -1856,11 +2092,13 @@ CMultivariateTimeSeriesModel::EUpdateResult CMultivariateTimeSeriesModel::addSam std::size_t dimension{this->dimension()}; - EUpdateResult result{this->updateTrend(params.weightStyles(), samples, params.trendWeights())}; + EUpdateResult result{this->updateTrend(params.weightStyles(), samples, + params.trendWeights())}; for (auto& sample : samples) { if (sample.second.size() != dimension) { - LOG_ERROR(<< "Unexpected sample dimension: '" << sample.second.size() << " != " << this->dimension() << "' discarding"); + LOG_ERROR(<< "Unexpected sample dimension: '" << sample.second.size() + << " != " << this->dimension() << "' discarding"); continue; } core_t::TTime time{sample.first}; @@ -1869,9 +2107,10 @@ CMultivariateTimeSeriesModel::EUpdateResult CMultivariateTimeSeriesModel::addSam } } - std::stable_sort(valueorder.begin(), valueorder.end(), [&samples](std::size_t lhs, std::size_t rhs) { - return samples[lhs].second < samples[rhs].second; - }); + std::stable_sort(valueorder.begin(), valueorder.end(), + [&samples](std::size_t lhs, std::size_t rhs) { + return samples[lhs].second < samples[rhs].second; + }); TDouble10Vec1Vec samples_; TDouble10Vec4Vec1Vec weights_; @@ -1896,7 +2135,8 @@ CMultivariateTimeSeriesModel::EUpdateResult CMultivariateTimeSeriesModel::addSam errors[0].reserve(samples.size()); errors[1].reserve(samples.size()); for (auto i : valueorder) { - this->appendPredictionErrors(params.propagationInterval(), samples[i].second, errors); + this->appendPredictionErrors(params.propagationInterval(), + samples[i].second, errors); } { CDecayRateController& controller{(*m_Controllers)[E_TrendControl]}; @@ -1906,7 +2146,8 @@ CMultivariateTimeSeriesModel::EUpdateResult CMultivariateTimeSeriesModel::addSam trendMean[d] = m_TrendModel[d]->meanValue(time); } double multiplier{controller.multiplier( - trendMean, errors[E_TrendControl], this->params().bucketLength(), this->params().learnRate(), this->params().decayRate())}; + trendMean, errors[E_TrendControl], this->params().bucketLength(), + this->params().learnRate(), this->params().decayRate())}; if (multiplier != 1.0) { for (const auto& trend : m_TrendModel) { trend->decayRate(multiplier * trend->decayRate()); @@ -1917,11 +2158,9 @@ CMultivariateTimeSeriesModel::EUpdateResult CMultivariateTimeSeriesModel::addSam { CDecayRateController& controller{(*m_Controllers)[E_ResidualControl]}; TDouble1Vec residualMean(m_ResidualModel->marginalLikelihoodMean()); - double multiplier{controller.multiplier(residualMean, - errors[E_ResidualControl], - this->params().bucketLength(), - this->params().learnRate(), - this->params().decayRate())}; + double multiplier{controller.multiplier( + residualMean, errors[E_ResidualControl], this->params().bucketLength(), + this->params().learnRate(), this->params().decayRate())}; if (multiplier != 1.0) { m_ResidualModel->decayRate(multiplier * m_ResidualModel->decayRate()); LOG_TRACE(<< "prior decay rate = " << m_ResidualModel->decayRate()); @@ -1943,7 +2182,9 @@ void CMultivariateTimeSeriesModel::skipTime(core_t::TTime gap) { } CMultivariateTimeSeriesModel::TDouble2Vec -CMultivariateTimeSeriesModel::mode(core_t::TTime time, const maths_t::TWeightStyleVec& weightStyles, const TDouble2Vec4Vec& weights) const { +CMultivariateTimeSeriesModel::mode(core_t::TTime time, + const maths_t::TWeightStyleVec& weightStyles, + const TDouble2Vec4Vec& weights) const { std::size_t dimension = this->dimension(); TDouble2Vec result(dimension); TDouble10Vec mode(m_ResidualModel->marginalLikelihoodMode(weightStyles, unpack(weights))); @@ -1953,15 +2194,18 @@ CMultivariateTimeSeriesModel::mode(core_t::TTime time, const maths_t::TWeightSty return result; } -CMultivariateTimeSeriesModel::TDouble2Vec1Vec CMultivariateTimeSeriesModel::correlateModes(core_t::TTime /*time*/, - const maths_t::TWeightStyleVec& /*weightStyles*/, - const TDouble2Vec4Vec1Vec& /*weights*/) const { +CMultivariateTimeSeriesModel::TDouble2Vec1Vec +CMultivariateTimeSeriesModel::correlateModes(core_t::TTime /*time*/, + const maths_t::TWeightStyleVec& /*weightStyles*/, + const TDouble2Vec4Vec1Vec& /*weights*/) const { return TDouble2Vec1Vec(); } -CMultivariateTimeSeriesModel::TDouble2Vec1Vec CMultivariateTimeSeriesModel::residualModes(const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec& weights) const { - TDouble10Vec1Vec modes(m_ResidualModel->marginalLikelihoodModes(weightStyles, unpack(weights))); +CMultivariateTimeSeriesModel::TDouble2Vec1Vec +CMultivariateTimeSeriesModel::residualModes(const maths_t::TWeightStyleVec& weightStyles, + const TDouble2Vec4Vec& weights) const { + TDouble10Vec1Vec modes( + m_ResidualModel->marginalLikelihoodModes(weightStyles, unpack(weights))); TDouble2Vec1Vec result; result.reserve(modes.size()); for (const auto& mode : modes) { @@ -1970,7 +2214,9 @@ CMultivariateTimeSeriesModel::TDouble2Vec1Vec CMultivariateTimeSeriesModel::resi return result; } -void CMultivariateTimeSeriesModel::detrend(const TTime2Vec1Vec& time_, double confidenceInterval, TDouble2Vec1Vec& value) const { +void CMultivariateTimeSeriesModel::detrend(const TTime2Vec1Vec& time_, + double confidenceInterval, + TDouble2Vec1Vec& value) const { std::size_t dimension{this->dimension()}; core_t::TTime time{time_[0][0]}; for (std::size_t d = 0u; d < dimension; ++d) { @@ -1979,7 +2225,9 @@ void CMultivariateTimeSeriesModel::detrend(const TTime2Vec1Vec& time_, double co } CMultivariateTimeSeriesModel::TDouble2Vec -CMultivariateTimeSeriesModel::predict(core_t::TTime time, const TSizeDoublePr1Vec& /*correlated*/, TDouble2Vec hint) const { +CMultivariateTimeSeriesModel::predict(core_t::TTime time, + const TSizeDoublePr1Vec& /*correlated*/, + TDouble2Vec hint) const { using TUnivariatePriorPtr = boost::shared_ptr; std::size_t dimension{this->dimension()}; @@ -1996,16 +2244,20 @@ CMultivariateTimeSeriesModel::predict(core_t::TTime time, const TSizeDoublePr1Ve TDouble2Vec result(dimension); TDouble10Vec mean(m_ResidualModel->marginalLikelihoodMean()); - for (std::size_t d = 0u; d < dimension; --marginalize[std::min(d, dimension - 2)], ++d) { + for (std::size_t d = 0u; d < dimension; + --marginalize[std::min(d, dimension - 2)], ++d) { double trend{0.0}; if (m_TrendModel[d]->initialized()) { trend = CBasicStatistics::mean(m_TrendModel[d]->value(time)); } double median{mean[d]}; if (!m_ResidualModel->isNonInformative()) { - TUnivariatePriorPtr marginal{m_ResidualModel->univariate(marginalize, NOTHING_TO_CONDITION).first}; - median = hint.empty() ? CBasicStatistics::mean(marginal->marginalLikelihoodConfidenceInterval(0.0)) - : marginal->nearestMarginalLikelihoodMean(hint[d]); + TUnivariatePriorPtr marginal{ + m_ResidualModel->univariate(marginalize, NOTHING_TO_CONDITION).first}; + median = hint.empty() + ? CBasicStatistics::mean( + marginal->marginalLikelihoodConfidenceInterval(0.0)) + : marginal->nearestMarginalLikelihoodMean(hint[d]); } result[d] = scale * (trend + median); if (m_IsNonNegative) { @@ -2016,10 +2268,11 @@ CMultivariateTimeSeriesModel::predict(core_t::TTime time, const TSizeDoublePr1Ve return result; } -CMultivariateTimeSeriesModel::TDouble2Vec3Vec CMultivariateTimeSeriesModel::confidenceInterval(core_t::TTime time, - double confidenceInterval, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec& weights_) const { +CMultivariateTimeSeriesModel::TDouble2Vec3Vec +CMultivariateTimeSeriesModel::confidenceInterval(core_t::TTime time, + double confidenceInterval, + const maths_t::TWeightStyleVec& weightStyles, + const TDouble2Vec4Vec& weights_) const { if (m_ResidualModel->isNonInformative()) { return TDouble2Vec3Vec(); } @@ -2035,8 +2288,11 @@ CMultivariateTimeSeriesModel::TDouble2Vec3Vec CMultivariateTimeSeriesModel::conf TDouble2Vec3Vec result(3, TDouble2Vec(dimension)); TDouble4Vec weights; - for (std::size_t d = 0u; d < dimension; --marginalize[std::min(d, dimension - 2)], ++d) { - double trend{m_TrendModel[d]->initialized() ? CBasicStatistics::mean(m_TrendModel[d]->value(time, confidenceInterval)) : 0.0}; + for (std::size_t d = 0u; d < dimension; + --marginalize[std::min(d, dimension - 2)], ++d) { + double trend{m_TrendModel[d]->initialized() + ? CBasicStatistics::mean(m_TrendModel[d]->value(time, confidenceInterval)) + : 0.0}; weights.clear(); weights.reserve(weights_.size()); @@ -2044,9 +2300,12 @@ CMultivariateTimeSeriesModel::TDouble2Vec3Vec CMultivariateTimeSeriesModel::conf weights.push_back(weight[d]); } - TUnivariatePriorPtr marginal{m_ResidualModel->univariate(marginalize, NOTHING_TO_CONDITION).first}; - double median{CBasicStatistics::mean(marginal->marginalLikelihoodConfidenceInterval(0.0, weightStyles, weights))}; - TDoubleDoublePr interval{marginal->marginalLikelihoodConfidenceInterval(confidenceInterval, weightStyles, weights)}; + TUnivariatePriorPtr marginal{ + m_ResidualModel->univariate(marginalize, NOTHING_TO_CONDITION).first}; + double median{CBasicStatistics::mean( + marginal->marginalLikelihoodConfidenceInterval(0.0, weightStyles, weights))}; + TDoubleDoublePr interval{marginal->marginalLikelihoodConfidenceInterval( + confidenceInterval, weightStyles, weights)}; result[0][d] = scale * (trend + interval.first); result[1][d] = scale * (trend + median); @@ -2095,7 +2354,8 @@ bool CMultivariateTimeSeriesModel::probability(const CModelProbabilityParams& pa core_t::TTime time{time_[0][0]}; TDouble10Vec1Vec sample{TDouble10Vec(dimension)}; for (std::size_t d = 0u; d < dimension; ++d) { - sample[0][d] = m_TrendModel[d]->detrend(time, value[0][d], params.seasonalConfidenceInterval()); + sample[0][d] = m_TrendModel[d]->detrend( + time, value[0][d], params.seasonalConfidenceInterval()); } TDouble10Vec4Vec1Vec weights{unpack(params.weights()[0])}; bool bucketEmpty{params.bucketEmpty()[0][0]}; @@ -2112,18 +2372,24 @@ bool CMultivariateTimeSeriesModel::probability(const CModelProbabilityParams& pa maths_t::EProbabilityCalculation calculation = params.calculation(i); coordinate[0] = coordinates[i]; if (!m_ResidualModel->probabilityOfLessLikelySamples( - calculation, params.weightStyles(), sample, weights, coordinate, pls, pus, tail_)) { + calculation, params.weightStyles(), sample, weights, coordinate, + pls, pus, tail_)) { LOG_ERROR(<< "Failed to compute P(" << sample << " | weight = " << weights << ")"); return false; } - pl_[0].add(correctForEmptyBucket(calculation, value[0], bucketEmpty, probabilityBucketEmpty, pls[0][0])); - pu_[0].add(correctForEmptyBucket(calculation, value[0], bucketEmpty, probabilityBucketEmpty, pus[0][0])); - pl_[1].add(correctForEmptyBucket(calculation, value[0], bucketEmpty, probabilityBucketEmpty, pls[1][0])); - pu_[1].add(correctForEmptyBucket(calculation, value[0], bucketEmpty, probabilityBucketEmpty, pus[1][0])); + pl_[0].add(correctForEmptyBucket(calculation, value[0], bucketEmpty, + probabilityBucketEmpty, pls[0][0])); + pu_[0].add(correctForEmptyBucket(calculation, value[0], bucketEmpty, + probabilityBucketEmpty, pus[0][0])); + pl_[1].add(correctForEmptyBucket(calculation, value[0], bucketEmpty, + probabilityBucketEmpty, pls[1][0])); + pu_[1].add(correctForEmptyBucket(calculation, value[0], bucketEmpty, + probabilityBucketEmpty, pus[1][0])); tail[i] = tail_[0]; } double pl[2], pu[2]; - if (!pl_[0].calculate(pl[0]) || !pu_[0].calculate(pu[0]) || !pl_[1].calculate(pl[1]) || !pu_[1].calculate(pu[1])) { + if (!pl_[0].calculate(pl[0]) || !pu_[0].calculate(pu[0]) || + !pl_[1].calculate(pl[1]) || !pu_[1].calculate(pu[1])) { return false; } @@ -2145,7 +2411,9 @@ bool CMultivariateTimeSeriesModel::probability(const CModelProbabilityParams& pa } CMultivariateTimeSeriesModel::TDouble2Vec -CMultivariateTimeSeriesModel::winsorisationWeight(double derate, core_t::TTime time, const TDouble2Vec& value) const { +CMultivariateTimeSeriesModel::winsorisationWeight(double derate, + core_t::TTime time, + const TDouble2Vec& value) const { TDouble2Vec result(this->dimension()); std::size_t dimension{this->dimension()}; @@ -2162,7 +2430,8 @@ CMultivariateTimeSeriesModel::winsorisationWeight(double derate, core_t::TTime t return result; } -CMultivariateTimeSeriesModel::TDouble2Vec CMultivariateTimeSeriesModel::seasonalWeight(double confidence, core_t::TTime time) const { +CMultivariateTimeSeriesModel::TDouble2Vec +CMultivariateTimeSeriesModel::seasonalWeight(double confidence, core_t::TTime time) const { TDouble2Vec result(this->dimension()); TDouble10Vec variances(m_ResidualModel->marginalLikelihoodVariances()); for (std::size_t d = 0u, dimension = this->dimension(); d < dimension; ++d) { @@ -2191,61 +2460,74 @@ void CMultivariateTimeSeriesModel::debugMemoryUsage(core::CMemoryUsage::TMemoryU } std::size_t CMultivariateTimeSeriesModel::memoryUsage() const { - return core::CMemory::dynamicSize(m_Controllers) + core::CMemory::dynamicSize(m_TrendModel) + - core::CMemory::dynamicSize(m_ResidualModel) + core::CMemory::dynamicSize(m_AnomalyModel) + + return core::CMemory::dynamicSize(m_Controllers) + + core::CMemory::dynamicSize(m_TrendModel) + + core::CMemory::dynamicSize(m_ResidualModel) + + core::CMemory::dynamicSize(m_AnomalyModel) + core::CMemory::dynamicSize(m_SlidingWindow); } -bool CMultivariateTimeSeriesModel::acceptRestoreTraverser(const SModelRestoreParams& params, core::CStateRestoreTraverser& traverser) { +bool CMultivariateTimeSeriesModel::acceptRestoreTraverser(const SModelRestoreParams& params, + core::CStateRestoreTraverser& traverser) { if (traverser.name() == VERSION_6_3_TAG) { while (traverser.next()) { const std::string& name{traverser.name()}; RESTORE_BOOL(IS_NON_NEGATIVE_6_3_TAG, m_IsNonNegative) RESTORE(RNG_6_3_TAG, m_Rng.fromString(traverser.value())) - RESTORE_SETUP_TEARDOWN(CONTROLLER_6_3_TAG, - m_Controllers = boost::make_shared(), - core::CPersistUtils::restore(CONTROLLER_6_3_TAG, *m_Controllers, traverser), - /**/) - RESTORE_SETUP_TEARDOWN(TREND_MODEL_6_3_TAG, - m_TrendModel.push_back(TDecompositionPtr()), - traverser.traverseSubLevel(boost::bind(CTimeSeriesDecompositionStateSerialiser(), - boost::cref(params.s_DecompositionParams), - boost::ref(m_TrendModel.back()), - _1)), - /**/) + RESTORE_SETUP_TEARDOWN( + CONTROLLER_6_3_TAG, + m_Controllers = boost::make_shared(), + core::CPersistUtils::restore(CONTROLLER_6_3_TAG, *m_Controllers, traverser), + /**/) + RESTORE_SETUP_TEARDOWN( + TREND_MODEL_6_3_TAG, m_TrendModel.push_back(TDecompositionPtr()), + traverser.traverseSubLevel( + boost::bind(CTimeSeriesDecompositionStateSerialiser(), + boost::cref(params.s_DecompositionParams), + boost::ref(m_TrendModel.back()), _1)), + /**/) RESTORE(RESIDUAL_MODEL_6_3_TAG, traverser.traverseSubLevel(boost::bind( - CPriorStateSerialiser(), boost::cref(params.s_DistributionParams), boost::ref(m_ResidualModel), _1))) - RESTORE_SETUP_TEARDOWN(ANOMALY_MODEL_6_3_TAG, - m_AnomalyModel = boost::make_shared(), - traverser.traverseSubLevel(boost::bind( - &CTimeSeriesAnomalyModel::acceptRestoreTraverser, m_AnomalyModel.get(), boost::cref(params), _1)), - /**/) - RESTORE(SLIDING_WINDOW_6_3_TAG, core::CPersistUtils::restore(SLIDING_WINDOW_6_3_TAG, m_SlidingWindow, traverser)) + CPriorStateSerialiser(), boost::cref(params.s_DistributionParams), + boost::ref(m_ResidualModel), _1))) + RESTORE_SETUP_TEARDOWN( + ANOMALY_MODEL_6_3_TAG, + m_AnomalyModel = boost::make_shared(), + traverser.traverseSubLevel( + boost::bind(&CTimeSeriesAnomalyModel::acceptRestoreTraverser, + m_AnomalyModel.get(), boost::cref(params), _1)), + /**/) + RESTORE(SLIDING_WINDOW_6_3_TAG, + core::CPersistUtils::restore(SLIDING_WINDOW_6_3_TAG, + m_SlidingWindow, traverser)) } } else { do { const std::string& name{traverser.name()}; RESTORE_BOOL(IS_NON_NEGATIVE_OLD_TAG, m_IsNonNegative) - RESTORE_SETUP_TEARDOWN(CONTROLLER_OLD_TAG, - m_Controllers = boost::make_shared(), - core::CPersistUtils::restore(CONTROLLER_6_3_TAG, *m_Controllers, traverser), - /**/) - RESTORE_SETUP_TEARDOWN(TREND_OLD_TAG, - m_TrendModel.push_back(TDecompositionPtr()), - traverser.traverseSubLevel(boost::bind(CTimeSeriesDecompositionStateSerialiser(), - boost::cref(params.s_DecompositionParams), - boost::ref(m_TrendModel.back()), - _1)), - /**/) - RESTORE(PRIOR_OLD_TAG, - traverser.traverseSubLevel(boost::bind( - CPriorStateSerialiser(), boost::cref(params.s_DistributionParams), boost::ref(m_ResidualModel), _1))) - RESTORE_SETUP_TEARDOWN(ANOMALY_MODEL_OLD_TAG, - m_AnomalyModel = boost::make_shared(), - traverser.traverseSubLevel(boost::bind( - &CTimeSeriesAnomalyModel::acceptRestoreTraverser, m_AnomalyModel.get(), boost::cref(params), _1)), - /**/) + RESTORE_SETUP_TEARDOWN( + CONTROLLER_OLD_TAG, + m_Controllers = boost::make_shared(), + core::CPersistUtils::restore(CONTROLLER_6_3_TAG, *m_Controllers, traverser), + /**/) + RESTORE_SETUP_TEARDOWN( + TREND_OLD_TAG, m_TrendModel.push_back(TDecompositionPtr()), + traverser.traverseSubLevel( + boost::bind(CTimeSeriesDecompositionStateSerialiser(), + boost::cref(params.s_DecompositionParams), + boost::ref(m_TrendModel.back()), _1)), + /**/) + RESTORE(PRIOR_OLD_TAG, traverser.traverseSubLevel(boost::bind( + CPriorStateSerialiser(), + boost::cref(params.s_DistributionParams), + boost::ref(m_ResidualModel), _1))) + RESTORE_SETUP_TEARDOWN( + ANOMALY_MODEL_OLD_TAG, + m_AnomalyModel = boost::make_shared(), + traverser.traverseSubLevel( + boost::bind(&CTimeSeriesAnomalyModel::acceptRestoreTraverser, + m_AnomalyModel.get(), boost::cref(params), _1)), + /**/) } while (traverser.next()); } return true; @@ -2260,11 +2542,17 @@ void CMultivariateTimeSeriesModel::acceptPersistInserter(core::CStatePersistInse core::CPersistUtils::persist(CONTROLLER_6_3_TAG, *m_Controllers, inserter); } for (const auto& trend : m_TrendModel) { - inserter.insertLevel(TREND_MODEL_6_3_TAG, boost::bind(CTimeSeriesDecompositionStateSerialiser(), boost::cref(*trend), _1)); + inserter.insertLevel(TREND_MODEL_6_3_TAG, + boost::bind(CTimeSeriesDecompositionStateSerialiser(), + boost::cref(*trend), _1)); } - inserter.insertLevel(RESIDUAL_MODEL_6_3_TAG, boost::bind(CPriorStateSerialiser(), boost::cref(*m_ResidualModel), _1)); + inserter.insertLevel(RESIDUAL_MODEL_6_3_TAG, + boost::bind(CPriorStateSerialiser(), + boost::cref(*m_ResidualModel), _1)); if (m_AnomalyModel != nullptr) { - inserter.insertLevel(ANOMALY_MODEL_6_3_TAG, boost::bind(&CTimeSeriesAnomalyModel::acceptPersistInserter, m_AnomalyModel.get(), _1)); + inserter.insertLevel(ANOMALY_MODEL_6_3_TAG, + boost::bind(&CTimeSeriesAnomalyModel::acceptPersistInserter, + m_AnomalyModel.get(), _1)); } core::CPersistUtils::persist(SLIDING_WINDOW_6_3_TAG, m_SlidingWindow, inserter); } @@ -2273,7 +2561,8 @@ maths_t::EDataType CMultivariateTimeSeriesModel::dataType() const { return m_ResidualModel->dataType(); } -CMultivariateTimeSeriesModel::TDouble10Vec4Vec CMultivariateTimeSeriesModel::unpack(const TDouble2Vec4Vec& weights) { +CMultivariateTimeSeriesModel::TDouble10Vec4Vec +CMultivariateTimeSeriesModel::unpack(const TDouble2Vec4Vec& weights) { TDouble10Vec4Vec result; result.reserve(weights.size()); for (const auto& weight : weights) { @@ -2282,15 +2571,17 @@ CMultivariateTimeSeriesModel::TDouble10Vec4Vec CMultivariateTimeSeriesModel::unp return result; } -void CMultivariateTimeSeriesModel::reinitializeResidualModel(double learnRate, - const TDecompositionPtr10Vec& trend, - const TTimeDouble2VecPrCBuf& slidingWindow, - CMultivariatePrior& residualModel) { +void CMultivariateTimeSeriesModel::reinitializeResidualModel( + double learnRate, + const TDecompositionPtr10Vec& trend, + const TTimeDouble2VecPrCBuf& slidingWindow, + CMultivariatePrior& residualModel) { residualModel.setToNonInformative(0.0, residualModel.decayRate()); if (!slidingWindow.empty()) { std::size_t dimension{residualModel.dimension()}; double slidingWindowLength{static_cast(slidingWindow.size())}; - TDouble10Vec4Vec1Vec weight{{TDouble10Vec(dimension, std::max(learnRate, std::min(5.0 / slidingWindowLength, 1.0)))}}; + TDouble10Vec4Vec1Vec weight{{TDouble10Vec( + dimension, std::max(learnRate, std::min(5.0 / slidingWindowLength, 1.0)))}}; for (const auto& value : slidingWindow) { TDouble10Vec1Vec sample{TDouble10Vec(dimension)}; for (std::size_t i = 0u; i < dimension; ++i) { @@ -2301,11 +2592,13 @@ void CMultivariateTimeSeriesModel::reinitializeResidualModel(double learnRate, } } -const CMultivariateTimeSeriesModel::TTimeDouble2VecPrCBuf& CMultivariateTimeSeriesModel::slidingWindow() const { +const CMultivariateTimeSeriesModel::TTimeDouble2VecPrCBuf& +CMultivariateTimeSeriesModel::slidingWindow() const { return m_SlidingWindow; } -const CMultivariateTimeSeriesModel::TDecompositionPtr10Vec& CMultivariateTimeSeriesModel::trendModel() const { +const CMultivariateTimeSeriesModel::TDecompositionPtr10Vec& +CMultivariateTimeSeriesModel::trendModel() const { return m_TrendModel; } @@ -2313,14 +2606,16 @@ const CMultivariatePrior& CMultivariateTimeSeriesModel::residualModel() const { return *m_ResidualModel; } -CMultivariateTimeSeriesModel::EUpdateResult CMultivariateTimeSeriesModel::updateTrend(const maths_t::TWeightStyleVec& weightStyles, - const TTimeDouble2VecSizeTrVec& samples, - const TDouble2Vec4VecVec& weights) { +CMultivariateTimeSeriesModel::EUpdateResult +CMultivariateTimeSeriesModel::updateTrend(const maths_t::TWeightStyleVec& weightStyles, + const TTimeDouble2VecSizeTrVec& samples, + const TDouble2Vec4VecVec& weights) { std::size_t dimension{this->dimension()}; for (const auto& sample : samples) { if (sample.second.size() != dimension) { - LOG_ERROR(<< "Dimension mismatch: '" << sample.second.size() << " != " << m_TrendModel.size() << "'"); + LOG_ERROR(<< "Dimension mismatch: '" << sample.second.size() + << " != " << m_TrendModel.size() << "'"); return E_Failure; } } @@ -2329,9 +2624,12 @@ CMultivariateTimeSeriesModel::EUpdateResult CMultivariateTimeSeriesModel::update // or for count feature, the times of all samples will be the same. TSizeVec timeorder(samples.size()); std::iota(timeorder.begin(), timeorder.end(), 0); - std::stable_sort(timeorder.begin(), timeorder.end(), [&samples](std::size_t lhs, std::size_t rhs) { - return COrderings::lexicographical_compare(samples[lhs].first, samples[lhs].second, samples[rhs].first, samples[rhs].second); - }); + std::stable_sort(timeorder.begin(), timeorder.end(), + [&samples](std::size_t lhs, std::size_t rhs) { + return COrderings::lexicographical_compare( + samples[lhs].first, samples[lhs].second, + samples[rhs].first, samples[rhs].second); + }); EUpdateResult result{E_Success}; { @@ -2356,7 +2654,9 @@ CMultivariateTimeSeriesModel::EUpdateResult CMultivariateTimeSeriesModel::update return result; } -void CMultivariateTimeSeriesModel::appendPredictionErrors(double interval, const TDouble2Vec& sample, TDouble1VecVec (&result)[2]) { +void CMultivariateTimeSeriesModel::appendPredictionErrors(double interval, + const TDouble2Vec& sample, + TDouble1VecVec (&result)[2]) { if (auto error = predictionError(interval, m_ResidualModel, sample)) { result[E_ResidualControl].push_back(*error); } @@ -2366,11 +2666,14 @@ void CMultivariateTimeSeriesModel::appendPredictionErrors(double interval, const } void CMultivariateTimeSeriesModel::reinitializeStateGivenNewComponent() { - reinitializeResidualModel(this->params().learnRate(), m_TrendModel, m_SlidingWindow, *m_ResidualModel); + reinitializeResidualModel(this->params().learnRate(), m_TrendModel, + m_SlidingWindow, *m_ResidualModel); if (m_Controllers != nullptr) { - m_ResidualModel->decayRate(m_ResidualModel->decayRate() / (*m_Controllers)[E_ResidualControl].multiplier()); + m_ResidualModel->decayRate(m_ResidualModel->decayRate() / + (*m_Controllers)[E_ResidualControl].multiplier()); for (auto& trend : m_TrendModel) { - trend->decayRate(trend->decayRate() / (*m_Controllers)[E_TrendControl].multiplier()); + trend->decayRate(trend->decayRate() / + (*m_Controllers)[E_TrendControl].multiplier()); } for (auto& controller : *m_Controllers) { controller.reset(); diff --git a/lib/maths/CTools.cc b/lib/maths/CTools.cc index f8bd1a1d9d..c637cbcdcd 100644 --- a/lib/maths/CTools.cc +++ b/lib/maths/CTools.cc @@ -90,9 +90,12 @@ inline TDoubleBoolPr stationaryPoint(const boost::math::beta_distribution<>& bet template class CPdf { public: - CPdf(const DISTRIBUTION& distribution, double target) : m_Distribution(distribution), m_Target(target) {} + CPdf(const DISTRIBUTION& distribution, double target) + : m_Distribution(distribution), m_Target(target) {} - double operator()(double x) const { return adapters::pdf(m_Distribution, x) - m_Target; } + double operator()(double x) const { + return adapters::pdf(m_Distribution, x) - m_Target; + } private: DISTRIBUTION m_Distribution; @@ -238,7 +241,8 @@ double CTools::SMinusLogCdf::operator()(const students_t& students, double x) co return safeMinusLogCdf(safeCdf(students, x)); } -double CTools::SMinusLogCdf::operator()(const negative_binomial& negativeBinomial, double x) const { +double CTools::SMinusLogCdf::operator()(const negative_binomial& negativeBinomial, + double x) const { return safeMinusLogCdf(safeCdf(negativeBinomial, x)); } @@ -272,7 +276,8 @@ double CTools::SMinusLogCdfComplement::operator()(const students_t& students, do return safeMinusLogCdf(safeCdfComplement(students, x)); } -double CTools::SMinusLogCdfComplement::operator()(const negative_binomial& negativeBinomial, double x) const { +double CTools::SMinusLogCdfComplement:: +operator()(const negative_binomial& negativeBinomial, double x) const { return safeMinusLogCdf(safeCdfComplement(negativeBinomial, x)); } @@ -280,7 +285,8 @@ double CTools::SMinusLogCdfComplement::operator()(const lognormal& logNormal, do return safeMinusLogCdf(safeCdfComplement(logNormal, x)); } -double CTools::SMinusLogCdfComplement::operator()(const CLogTDistribution& logt, double x) const { +double CTools::SMinusLogCdfComplement::operator()(const CLogTDistribution& logt, + double x) const { return safeMinusLogCdf(maths::cdfComplement(logt, x)); } @@ -298,13 +304,15 @@ CTools::CProbabilityOfLessLikelySample::CProbabilityOfLessLikelySample(maths_t:: : m_Calculation(calculation) { } -double CTools::CProbabilityOfLessLikelySample::operator()(const SImproperDistribution&, double, maths_t::ETail& tail) const { +double CTools::CProbabilityOfLessLikelySample:: +operator()(const SImproperDistribution&, double, maths_t::ETail& tail) const { // For any finite sample this is one. tail = maths_t::E_MixedOrNeitherTail; return 1.0; } -double CTools::CProbabilityOfLessLikelySample::operator()(const normal& normal_, double x, maths_t::ETail& tail) const { +double CTools::CProbabilityOfLessLikelySample:: +operator()(const normal& normal_, double x, maths_t::ETail& tail) const { double px = 0.0; TDoubleDoublePr support = boost::math::support(normal_); @@ -344,7 +352,8 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const normal& normal_, return px; } -double CTools::CProbabilityOfLessLikelySample::operator()(const students_t& students, double x, maths_t::ETail& tail) const { +double CTools::CProbabilityOfLessLikelySample:: +operator()(const students_t& students, double x, maths_t::ETail& tail) const { double px = 0.0; TDoubleDoublePr support = boost::math::support(students); @@ -383,7 +392,8 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const students_t& stud return px; } -double CTools::CProbabilityOfLessLikelySample::operator()(const negative_binomial& negativeBinomial, double x, maths_t::ETail& tail) const { +double CTools::CProbabilityOfLessLikelySample:: +operator()(const negative_binomial& negativeBinomial, double x, maths_t::ETail& tail) const { x = std::floor(x); double px = 0.0; @@ -404,7 +414,9 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const negative_binomia case maths_t::E_OneSidedAbove: tail = static_cast(tail | maths_t::E_RightTail); - return truncate(2.0 * (safeCdfComplement(negativeBinomial, x) + safePdf(negativeBinomial, x)), 0.0, 1.0); + return truncate(2.0 * (safeCdfComplement(negativeBinomial, x) + + safePdf(negativeBinomial, x)), + 0.0, 1.0); } double fx = safePdf(negativeBinomial, x); @@ -477,7 +489,8 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const negative_binomia double logOneMinusP = std::log(1 - p); - b1 = std::floor(m + std::log(std::max(fx, MIN_DOUBLE) / std::max(fm, MIN_DOUBLE)) / logOneMinusP); + b1 = std::floor(m + std::log(std::max(fx, MIN_DOUBLE) / std::max(fm, MIN_DOUBLE)) / + logOneMinusP); f1 = safePdf(negativeBinomial, b1); b2 = b1; f2 = f1; @@ -499,7 +512,8 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const negative_binomia // We compute successively tighter lower bounds on the // bracket point. - double lowerBound = b2 + std::log(std::max(fx, MIN_DOUBLE) / std::max(f2, MIN_DOUBLE)) / logOneMinusP; + double lowerBound = + b2 + std::log(std::max(fx, MIN_DOUBLE) / std::max(f2, MIN_DOUBLE)) / logOneMinusP; LOG_TRACE(<< "b2 = " << b2 << ", f2 = " << f2 << ", bound = " << lowerBound); if (maxIterations <= 3 * MAX_ITERATIONS / 4) { @@ -523,31 +537,37 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const negative_binomia double eps = 0.05 * px / std::max(fx, MIN_DOUBLE); eps = std::max(eps, EPSILON * std::min(b1, b2)); CEqualWithTolerance equal(CToleranceTypes::E_AbsoluteTolerance, eps); - CSolvers::solve(b1, b2, f1 - fx, f2 - fx, makePdf(negativeBinomial, fx), maxIterations, equal, y); + CSolvers::solve(b1, b2, f1 - fx, f2 - fx, makePdf(negativeBinomial, fx), + maxIterations, equal, y); LOG_TRACE(<< "bracket = (" << b1 << "," << b2 << ")" - << ", iterations = " << maxIterations << ", f(y) = " << safePdf(negativeBinomial, y) - fx << ", eps = " << eps); + << ", iterations = " << maxIterations << ", f(y) = " + << safePdf(negativeBinomial, y) - fx << ", eps = " << eps); } catch (const std::exception& e) { if (std::fabs(f1 - fx) < 10.0 * EPSILON * fx) { y = b1; } else if (std::fabs(f2 - fx) < 10.0 * EPSILON * fx) { y = b2; } else { - LOG_ERROR(<< "Failed in root finding: " << e.what() << ", x = " << x << ", bracket = (" << b1 << "," << b2 << ")" + LOG_ERROR(<< "Failed in root finding: " << e.what() << ", x = " << x + << ", bracket = (" << b1 << "," << b2 << ")" << ", f(bracket) = (" << f1 - fx << "," << f2 - fx << ")"); return truncate(px, 0.0, 1.0); } } - if ((x < m && y < m) || (x > m && y > m) || !(x >= support.first && x <= support.second)) { + if ((x < m && y < m) || (x > m && y > m) || + !(x >= support.first && x <= support.second)) { LOG_ERROR(<< "Bad root " << y << " (x = " << x << ")"); } - double py = x < m ? safeCdfComplement(negativeBinomial, y) : safeCdf(negativeBinomial, y); + double py = x < m ? safeCdfComplement(negativeBinomial, y) + : safeCdf(negativeBinomial, y); return truncate(px + py + fx, 0.0, 1.0); } -double CTools::CProbabilityOfLessLikelySample::operator()(const lognormal& logNormal, double x, maths_t::ETail& tail) const { +double CTools::CProbabilityOfLessLikelySample:: +operator()(const lognormal& logNormal, double x, maths_t::ETail& tail) const { double px = 0.0; TDoubleDoublePr support = boost::math::support(logNormal); @@ -574,8 +594,9 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const lognormal& logNo double logx = std::log(x); double squareScale = pow2(logNormal.scale()); - double discriminant = - std::sqrt(pow2(squareScale) + (logx - logNormal.location() + 2.0 * squareScale) * (logx - logNormal.location())); + double discriminant = std::sqrt( + pow2(squareScale) + (logx - logNormal.location() + 2.0 * squareScale) * + (logx - logNormal.location())); double m = boost::math::mode(logNormal); this->tail(x, m, tail); double y = m * std::exp(x > m ? -discriminant : discriminant); @@ -594,7 +615,8 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const lognormal& logNo return px; } -double CTools::CProbabilityOfLessLikelySample::operator()(const CLogTDistribution& logt, double x, maths_t::ETail& tail) const { +double CTools::CProbabilityOfLessLikelySample:: +operator()(const CLogTDistribution& logt, double x, maths_t::ETail& tail) const { double px = 0.0; TDoubleDoublePr support = maths::support(logt); @@ -660,16 +682,19 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const CLogTDistributio eps = std::max(eps, EPSILON * std::min(b1, b2)); CEqualWithTolerance equal(CToleranceTypes::E_AbsoluteTolerance, eps); std::size_t maxIterations = MAX_ITERATIONS; - CSolvers::solve(b1, b2, f1 - fx, f2 - fx, makePdf(logt, fx), maxIterations, equal, y); + CSolvers::solve(b1, b2, f1 - fx, f2 - fx, makePdf(logt, fx), + maxIterations, equal, y); LOG_TRACE(<< "bracket = (" << b1 << "," << b2 << ")" - << ", iterations = " << maxIterations << ", f(y) = " << pdf(logt, y) - fx << ", eps = " << eps); + << ", iterations = " << maxIterations + << ", f(y) = " << pdf(logt, y) - fx << ", eps = " << eps); } catch (const std::exception& e) { if (std::fabs(f1 - fx) < 10.0 * EPSILON * fx) { y = b1; } else if (std::fabs(f2 - fx) < 10.0 * EPSILON * fx) { y = b2; } else { - LOG_ERROR(<< "Failed in root finding: " << e.what() << ", x = " << x << ", bracket = (" << b1 << "," << b2 << ")" + LOG_ERROR(<< "Failed in root finding: " << e.what() << ", x = " << x + << ", bracket = (" << b1 << "," << b2 << ")" << ", f(bracket) = (" << f1 - fx << "," << f2 - fx << ")"); return truncate(px, 0.0, 1.0); } @@ -732,7 +757,8 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const CLogTDistributio double f1 = fBound < fx ? fm : fBound; double b2 = bound; double f2 = fBound; - LOG_TRACE(<< "b1 = " << b1 << ", f(b1) = " << f1 << ", b2 = " << b2 << ", f(b2) = " << f2); + LOG_TRACE(<< "b1 = " << b1 << ", f(b1) = " << f1 << ", b2 = " << b2 + << ", f(b2) = " << f2); std::size_t maxIterations = MAX_ITERATIONS; @@ -782,16 +808,19 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const CLogTDistributio double eps = 0.05 * px / std::max(fx, MIN_DOUBLE); eps = std::max(eps, EPSILON * std::min(b1, b2)); CEqualWithTolerance equal(CToleranceTypes::E_AbsoluteTolerance, eps); - CSolvers::solve(b1, b2, f1 - fx, f2 - fx, makePdf(logt, fx), maxIterations, equal, y); + CSolvers::solve(b1, b2, f1 - fx, f2 - fx, makePdf(logt, fx), + maxIterations, equal, y); LOG_TRACE(<< "bracket = (" << b1 << "," << b2 << ")" - << ", iterations = " << maxIterations << ", f(y) = " << pdf(logt, y) - fx << ", eps = " << eps); + << ", iterations = " << maxIterations + << ", f(y) = " << pdf(logt, y) - fx << ", eps = " << eps); } catch (const std::exception& e) { if (std::fabs(f1 - fx) < 10.0 * EPSILON * fx) { y = b1; } else if (std::fabs(f2 - fx) < 10.0 * EPSILON * fx) { y = b2; } else { - LOG_ERROR(<< "Failed in root finding: " << e.what() << ", x = " << x << ", bracket = (" << b1 << "," << b2 << ")" + LOG_ERROR(<< "Failed in root finding: " << e.what() << ", x = " << x + << ", bracket = (" << b1 << "," << b2 << ")" << ", f(bracket) = (" << f1 - fx << "," << f2 - fx << ")"); return truncate(px, 0.0, 1.0); } @@ -800,7 +829,8 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const CLogTDistributio return truncate(px + cdfComplement(logt, y), 0.0, 1.0); } -double CTools::CProbabilityOfLessLikelySample::operator()(const gamma& gamma_, double x, maths_t::ETail& tail) const { +double CTools::CProbabilityOfLessLikelySample:: +operator()(const gamma& gamma_, double x, maths_t::ETail& tail) const { double px = 0.0; TDoubleDoublePr support = boost::math::support(gamma_); @@ -862,7 +892,8 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const gamma& gamma_, d for (;;) { y[(i + 1) % 2] = x + m * std::log(y[i % 2] / x); LOG_TRACE(<< "y = " << y[(i + 1) % 2]); - if (++i == MAX_ITERATIONS || std::fabs(y[1] - y[0]) < CONVERGENCE_TOLERANCE * std::max(y[0], y[1])) { + if (++i == MAX_ITERATIONS || + std::fabs(y[1] - y[0]) < CONVERGENCE_TOLERANCE * std::max(y[0], y[1])) { break; } } @@ -888,7 +919,8 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const gamma& gamma_, d for (;;) { y[(i + 1) % 2] = x * std::exp(-(x - y[i % 2]) / m); LOG_TRACE(<< "y = " << y[(i + 1) % 2]); - if (++i == MAX_ITERATIONS || std::fabs(y[1] - y[0]) < CONVERGENCE_TOLERANCE * std::max(y[0], y[1])) { + if (++i == MAX_ITERATIONS || + std::fabs(y[1] - y[0]) < CONVERGENCE_TOLERANCE * std::max(y[0], y[1])) { break; } } @@ -959,9 +991,11 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const gamma& gamma_, d CEqualWithTolerance equal(CToleranceTypes::E_AbsoluteTolerance, eps); std::size_t maxIterations = MAX_ITERATIONS / 2; double candidate; - CSolvers::solve(a, b, fa - fx, fb - fx, makePdf(gamma_, fx), maxIterations, equal, candidate); + CSolvers::solve(a, b, fa - fx, fb - fx, makePdf(gamma_, fx), + maxIterations, equal, candidate); LOG_TRACE(<< "bracket = (" << a << "," << b << ")" - << ", iterations = " << maxIterations << ", f(candidate) = " << safePdf(gamma_, candidate) - fx); + << ", iterations = " << maxIterations + << ", f(candidate) = " << safePdf(gamma_, candidate) - fx); if (std::fabs(safePdf(gamma_, candidate) - fx) < std::fabs(fy - fx)) { y[i % 2] = candidate; @@ -972,7 +1006,8 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const gamma& gamma_, d } else if (std::fabs(fb - fx) < 10.0 * EPSILON * fx) { y[i % 2] = b; } else { - LOG_ERROR(<< "Failed in bracketed solver: " << e.what() << ", x = " << x << ", bracket = (" << a << ", " << b << ")" + LOG_ERROR(<< "Failed in bracketed solver: " << e.what() + << ", x = " << x << ", bracket = (" << a << ", " << b << ")" << ", f(bracket) = (" << fa - fx << "," << fb - fx << ")"); return truncate(px, 0.0, 1.0); } @@ -980,12 +1015,14 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const gamma& gamma_, d LOG_TRACE(<< "f(x) = " << fx << ", f(y) = " << safePdf(gamma_, y[i % 2])); - double py = x > y[i % 2] ? safeCdf(gamma_, y[i % 2]) : safeCdfComplement(gamma_, y[i % 2]); + double py = x > y[i % 2] ? safeCdf(gamma_, y[i % 2]) + : safeCdfComplement(gamma_, y[i % 2]); return truncate(px + py, 0.0, 1.0); } -double CTools::CProbabilityOfLessLikelySample::operator()(const beta& beta_, double x, maths_t::ETail& tail) const { +double CTools::CProbabilityOfLessLikelySample:: +operator()(const beta& beta_, double x, maths_t::ETail& tail) const { double px = 0.0; TDoubleDoublePr support(0.0, 1.0); @@ -1085,7 +1122,8 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const beta& beta_, dou // Max sure y is supported by the p.d.f. if (y[i % 2] > support.second) { - return truncate(sp.second ? safeCdf(beta_, x) : safeCdfComplement(beta_, x), 0.0, 1.0); + return truncate(sp.second ? safeCdf(beta_, x) : safeCdfComplement(beta_, x), + 0.0, 1.0); } y[i % 2] = std::max(y[i % 2], sp.first); @@ -1118,7 +1156,8 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const beta& beta_, dou // Max sure y is supported by the p.d.f. if (y[i % 2] < support.first) { - return truncate(sp.second ? safeCdfComplement(beta_, x) : safeCdf(beta_, x), 0.0, 1.0); + return truncate(sp.second ? safeCdfComplement(beta_, x) : safeCdf(beta_, x), + 0.0, 1.0); } y[i % 2] = std::min(y[i % 2], sp.first); @@ -1137,10 +1176,10 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const beta& beta_, dou if (x > y[i % 2]) { std::swap(x, y[i % 2]); } - return truncate(sp.second ? safeCdf(beta_, x) + safeCdfComplement(beta_, y[i % 2]) - : safeCdf(beta_, y[i % 2]) - safeCdf(beta_, x), - 0.0, - 1.0); + return truncate(sp.second + ? safeCdf(beta_, x) + safeCdfComplement(beta_, y[i % 2]) + : safeCdf(beta_, y[i % 2]) - safeCdf(beta_, x), + 0.0, 1.0); } else if (error > 0.0) { if (x < sp.first) { bracket = std::make_pair(y[i % 2], bracket.second); @@ -1160,7 +1199,8 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const beta& beta_, dou fBracket = std::make_pair(fa, fb); } } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to evaluate p.d.f.: " << e.what() << ", alpha = " << beta_.alpha() << ", beta = " << beta_.beta() + LOG_ERROR(<< "Failed to evaluate p.d.f.: " << e.what() + << ", alpha = " << beta_.alpha() << ", beta = " << beta_.beta() << ", x = " << x << ", y = " << y[i % 2]); return 1.0; } @@ -1179,11 +1219,12 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const beta& beta_, dou CEqualWithTolerance equal(CToleranceTypes::E_AbsoluteTolerance, eps); std::size_t maxIterations = MAX_ITERATIONS; double candidate; - CSolvers::solve( - bracket.first, bracket.second, fBracket.first, fBracket.second, makePdf(beta_, fx), maxIterations, equal, candidate); + CSolvers::solve(bracket.first, bracket.second, fBracket.first, fBracket.second, + makePdf(beta_, fx), maxIterations, equal, candidate); - LOG_TRACE(<< "bracket = " << core::CContainerPrinter::print(bracket) << ", iterations = " << maxIterations - << ", f(candidate) = " << safePdf(beta_, candidate) - fx << ", eps = " << eps); + LOG_TRACE(<< "bracket = " << core::CContainerPrinter::print(bracket) + << ", iterations = " << maxIterations << ", f(candidate) = " + << safePdf(beta_, candidate) - fx << ", eps = " << eps); if (std::fabs(safePdf(beta_, candidate) - fx) < std::fabs(fy - fx)) { y[i % 2] = candidate; @@ -1194,8 +1235,9 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const beta& beta_, dou } else if (std::fabs(fBracket.second - fx) < 10.0 * EPSILON * fx) { y[i % 2] = bracket.second; } else { - LOG_ERROR(<< "Failed in bracketed solver: " << e.what() << ", x = " << x << ", bracket " - << core::CContainerPrinter::print(bracket) << ", f(bracket) = " << core::CContainerPrinter::print(fBracket)); + LOG_ERROR(<< "Failed in bracketed solver: " << e.what() << ", x = " << x + << ", bracket " << core::CContainerPrinter::print(bracket) + << ", f(bracket) = " << core::CContainerPrinter::print(fBracket)); return 1.0; } } @@ -1204,11 +1246,15 @@ double CTools::CProbabilityOfLessLikelySample::operator()(const beta& beta_, dou std::swap(x, y[i % 2]); } - return truncate( - sp.second ? safeCdf(beta_, x) + safeCdfComplement(beta_, y[i % 2]) : safeCdf(beta_, y[i % 2]) - safeCdf(beta_, x), 0.0, 1.0); + return truncate(sp.second ? safeCdf(beta_, x) + safeCdfComplement(beta_, y[i % 2]) + : safeCdf(beta_, y[i % 2]) - safeCdf(beta_, x), + 0.0, 1.0); } -bool CTools::CProbabilityOfLessLikelySample::check(const TDoubleDoublePr& support, double x, double& px, maths_t::ETail& tail) const { +bool CTools::CProbabilityOfLessLikelySample::check(const TDoubleDoublePr& support, + double x, + double& px, + maths_t::ETail& tail) const { if (CMathsFuncs::isNan(x)) { LOG_ERROR(<< "Bad argument x = " << x); tail = maths_t::E_MixedOrNeitherTail; @@ -1252,11 +1298,12 @@ void CTools::CProbabilityOfLessLikelySample::tail(double x, double mode, maths_t //////// CMixtureProbabilityOfLessLikelySample Implementation //////// -CTools::CMixtureProbabilityOfLessLikelySample::CMixtureProbabilityOfLessLikelySample(std::size_t n, - double x, - double logFx, - double a, - double b) +CTools::CMixtureProbabilityOfLessLikelySample::CMixtureProbabilityOfLessLikelySample( + std::size_t n, + double x, + double logFx, + double a, + double b) : m_X(x), m_LogFx(logFx), m_A(a), m_B(b) { m_Endpoints.reserve(4 * n + 2); m_Endpoints.push_back(a); @@ -1271,7 +1318,9 @@ void CTools::CMixtureProbabilityOfLessLikelySample::reinitialize(double x, doubl m_Endpoints.push_back(m_B); } -void CTools::CMixtureProbabilityOfLessLikelySample::addMode(double weight, double modeMean, double modeSd) { +void CTools::CMixtureProbabilityOfLessLikelySample::addMode(double weight, + double modeMean, + double modeSd) { double deviation = m_LogFx - fastLog(weight) + LOG_ROOT_TWO_PI + fastLog(modeSd); if (deviation >= 0.0) { deviation = 0.0; @@ -1294,7 +1343,8 @@ void CTools::CMixtureProbabilityOfLessLikelySample::addMode(double weight, doubl void CTools::CMixtureProbabilityOfLessLikelySample::intervals(TDoubleDoublePrVec& intervals) { std::sort(m_Endpoints.begin(), m_Endpoints.end()); - m_Endpoints.erase(std::unique(m_Endpoints.begin(), m_Endpoints.end()), m_Endpoints.end()); + m_Endpoints.erase(std::unique(m_Endpoints.begin(), m_Endpoints.end()), + m_Endpoints.end()); intervals.reserve(m_Endpoints.size() - 1); for (std::size_t i = 1u; i < m_Endpoints.size(); ++i) { intervals.emplace_back(m_Endpoints[i - 1], m_Endpoints[i]); @@ -1302,7 +1352,8 @@ void CTools::CMixtureProbabilityOfLessLikelySample::intervals(TDoubleDoublePrVec LOG_TRACE(<< "intervals = " << core::CContainerPrinter::print(intervals)); } -const double CTools::CMixtureProbabilityOfLessLikelySample::LOG_ROOT_TWO_PI = 0.5 * std::log(boost::math::double_constants::two_pi); +const double CTools::CMixtureProbabilityOfLessLikelySample::LOG_ROOT_TWO_PI = + 0.5 * std::log(boost::math::double_constants::two_pi); //////// SIntervalExpectation Implementation //////// @@ -1328,10 +1379,12 @@ double CTools::SIntervalExpectation::operator()(const normal& normal_, double a, return expa == expb ? (a + b) / 2.0 : (a * expa + b * expb) / (expa + expb); } - return mean + 2.0 * sd * (expa - expb) / boost::math::double_constants::root_two_pi / (erfb - erfa); + return mean + 2.0 * sd * (expa - expb) / + boost::math::double_constants::root_two_pi / (erfb - erfa); } -double CTools::SIntervalExpectation::operator()(const lognormal& logNormal, double a, double b) const { +double CTools::SIntervalExpectation:: +operator()(const lognormal& logNormal, double a, double b) const { if (a > b) { std::swap(a, b); } @@ -1357,7 +1410,8 @@ double CTools::SIntervalExpectation::operator()(const lognormal& logNormal, doub if (erfb - erfa < std::sqrt(EPSILON)) { double expa = loga == NEG_INF ? 0.0 : std::exp(-a_ * a_); double expb = logb == POS_INF ? 0.0 : std::exp(-b_ * b_); - return expa == expb ? (2.0 * a / (a + b)) * b : (expa + expb) / (expa / a + expb / b); + return expa == expb ? (2.0 * a / (a + b)) * b + : (expa + expb) / (expa / a + expb / b); } double erfa_ = a_ == NEG_INF ? -1.0 : boost::math::erf(a_); @@ -1410,46 +1464,56 @@ using AllowOverflow = policy>; inline boost::math::normal_distribution allowOverflow(const boost::math::normal_distribution<>& normal) { - return boost::math::normal_distribution(normal.mean(), normal.standard_deviation()); + return boost::math::normal_distribution( + normal.mean(), normal.standard_deviation()); } inline boost::math::students_t_distribution allowOverflow(const boost::math::students_t_distribution<>& students) { - return boost::math::students_t_distribution(students.degrees_of_freedom()); + return boost::math::students_t_distribution( + students.degrees_of_freedom()); } inline boost::math::poisson_distribution allowOverflow(const boost::math::poisson_distribution<>& poisson) { - return boost::math::poisson_distribution(poisson.mean()); + return boost::math::poisson_distribution( + poisson.mean()); } inline boost::math::negative_binomial_distribution allowOverflow(const boost::math::negative_binomial_distribution<>& negativeBinomial) { - return boost::math::negative_binomial_distribution(negativeBinomial.successes(), - negativeBinomial.success_fraction()); + return boost::math::negative_binomial_distribution( + negativeBinomial.successes(), negativeBinomial.success_fraction()); } inline boost::math::lognormal_distribution allowOverflow(const boost::math::lognormal_distribution<>& logNormal) { - return boost::math::lognormal_distribution(logNormal.location(), logNormal.scale()); + return boost::math::lognormal_distribution( + logNormal.location(), logNormal.scale()); } -inline boost::math::gamma_distribution allowOverflow(const boost::math::gamma_distribution<>& gamma) { - return boost::math::gamma_distribution(gamma.shape(), gamma.scale()); +inline boost::math::gamma_distribution +allowOverflow(const boost::math::gamma_distribution<>& gamma) { + return boost::math::gamma_distribution( + gamma.shape(), gamma.scale()); } -inline boost::math::beta_distribution allowOverflow(const boost::math::beta_distribution<>& beta) { - return boost::math::beta_distribution(beta.alpha(), beta.beta()); +inline boost::math::beta_distribution +allowOverflow(const boost::math::beta_distribution<>& beta) { + return boost::math::beta_distribution( + beta.alpha(), beta.beta()); } inline boost::math::binomial_distribution allowOverflow(const boost::math::binomial_distribution<>& binomial) { - return boost::math::binomial_distribution(binomial.trials(), binomial.success_fraction()); + return boost::math::binomial_distribution( + binomial.trials(), binomial.success_fraction()); } inline boost::math::chi_squared_distribution allowOverflow(const boost::math::chi_squared_distribution<>& chi2) { - return boost::math::chi_squared_distribution(chi2.degrees_of_freedom()); + return boost::math::chi_squared_distribution( + chi2.degrees_of_freedom()); } } @@ -1658,20 +1722,23 @@ double CTools::anomalyScore(double p) { if (adjP >= SMALL_PROBABILITY) { // We use a linear scaling based on the inverse probability // into the range (0.0, 1.0]. - result = SMALL_PROBABILITY_ANOMALY_SCORE * (1.0 / adjP - INV_LARGEST_SIGNIFICANT_PROBABILITY) / + result = SMALL_PROBABILITY_ANOMALY_SCORE * + (1.0 / adjP - INV_LARGEST_SIGNIFICANT_PROBABILITY) / (INV_SMALL_PROBABILITY - INV_LARGEST_SIGNIFICANT_PROBABILITY); } else if (adjP >= MINUSCULE_PROBABILITY) { // We use a linear scaling based on the log probability into // the range (1.0, 50.0]. - result = SMALL_PROBABILITY_ANOMALY_SCORE + (MINUSCULE_PROBABILITY_ANOMALY_SCORE - SMALL_PROBABILITY_ANOMALY_SCORE) * - (-std::log(adjP) - MINUS_LOG_SMALL_PROBABILITY) / - (MINUS_LOG_MINUSCULE_PROBABILITY - MINUS_LOG_SMALL_PROBABILITY); + result = SMALL_PROBABILITY_ANOMALY_SCORE + + (MINUSCULE_PROBABILITY_ANOMALY_SCORE - SMALL_PROBABILITY_ANOMALY_SCORE) * + (-std::log(adjP) - MINUS_LOG_SMALL_PROBABILITY) / + (MINUS_LOG_MINUSCULE_PROBABILITY - MINUS_LOG_SMALL_PROBABILITY); } else { // We use a linear scaling based on the log probability into // the range (50.0, 100.0]. - result = MINUSCULE_PROBABILITY_ANOMALY_SCORE + (MAX_ANOMALY_SCORE - MINUSCULE_PROBABILITY_ANOMALY_SCORE) * - (-std::log(adjP) - MINUS_LOG_MINUSCULE_PROBABILITY) / - (MINUS_LOG_SMALLEST_PROBABILITY - MINUS_LOG_MINUSCULE_PROBABILITY); + result = MINUSCULE_PROBABILITY_ANOMALY_SCORE + + (MAX_ANOMALY_SCORE - MINUSCULE_PROBABILITY_ANOMALY_SCORE) * + (-std::log(adjP) - MINUS_LOG_MINUSCULE_PROBABILITY) / + (MINUS_LOG_SMALLEST_PROBABILITY - MINUS_LOG_MINUSCULE_PROBABILITY); } } @@ -1694,19 +1761,24 @@ double CTools::inverseAnomalyScore(double deviation) { // We invert the linear scaling of the inverse probability // into the range (0.0, 1.0]. result = 1.0 / (INV_LARGEST_SIGNIFICANT_PROBABILITY + - (INV_SMALL_PROBABILITY - INV_LARGEST_SIGNIFICANT_PROBABILITY) * deviation / SMALL_PROBABILITY_ANOMALY_SCORE); + (INV_SMALL_PROBABILITY - INV_LARGEST_SIGNIFICANT_PROBABILITY) * + deviation / SMALL_PROBABILITY_ANOMALY_SCORE); } else if (adjDeviation <= MINUSCULE_PROBABILITY_ANOMALY_SCORE) { // We invert the linear scaling of the log probability // into the range (1.0, 50.0]. - result = std::exp(-(MINUS_LOG_SMALL_PROBABILITY + (MINUS_LOG_MINUSCULE_PROBABILITY - MINUS_LOG_SMALL_PROBABILITY) * - (deviation - SMALL_PROBABILITY_ANOMALY_SCORE) / - (MINUSCULE_PROBABILITY_ANOMALY_SCORE - SMALL_PROBABILITY_ANOMALY_SCORE))); + result = std::exp( + -(MINUS_LOG_SMALL_PROBABILITY + + (MINUS_LOG_MINUSCULE_PROBABILITY - MINUS_LOG_SMALL_PROBABILITY) * + (deviation - SMALL_PROBABILITY_ANOMALY_SCORE) / + (MINUSCULE_PROBABILITY_ANOMALY_SCORE - SMALL_PROBABILITY_ANOMALY_SCORE))); } else { // We invert the linear scaling of the log probability // into the range (50.0, 100.0]. - result = std::exp(-(MINUS_LOG_MINUSCULE_PROBABILITY + (MINUS_LOG_SMALLEST_PROBABILITY - MINUS_LOG_MINUSCULE_PROBABILITY) * - (deviation - MINUSCULE_PROBABILITY_ANOMALY_SCORE) / - (MAX_ANOMALY_SCORE - MINUSCULE_PROBABILITY_ANOMALY_SCORE))); + result = std::exp( + -(MINUS_LOG_MINUSCULE_PROBABILITY + + (MINUS_LOG_SMALLEST_PROBABILITY - MINUS_LOG_MINUSCULE_PROBABILITY) * + (deviation - MINUSCULE_PROBABILITY_ANOMALY_SCORE) / + (MAX_ANOMALY_SCORE - MINUSCULE_PROBABILITY_ANOMALY_SCORE))); } if (!(result >= 0.0 && result <= 1.0)) { @@ -1744,7 +1816,8 @@ double CTools::differentialEntropy(const normal& normal_) { // m is the mean and variance of the normal distribution. double variance = boost::math::variance(normal_); - return 0.5 * std::log(boost::math::double_constants::two_pi * boost::math::double_constants::e * variance); + return 0.5 * std::log(boost::math::double_constants::two_pi * + boost::math::double_constants::e * variance); } double CTools::differentialEntropy(const lognormal& logNormal) { @@ -1756,7 +1829,9 @@ double CTools::differentialEntropy(const lognormal& logNormal) { double location = logNormal.location(); double scale = logNormal.scale(); - return 0.5 * std::log(boost::math::double_constants::two_pi * boost::math::double_constants::e * pow2(scale)) + location; + return 0.5 * std::log(boost::math::double_constants::two_pi * + boost::math::double_constants::e * pow2(scale)) + + location; } double CTools::differentialEntropy(const gamma& gamma_) { @@ -1769,7 +1844,8 @@ double CTools::differentialEntropy(const gamma& gamma_) { double shape = gamma_.shape(); double scale = gamma_.scale(); - return shape + std::log(scale) + boost::math::lgamma(shape) + (1 - shape) * boost::math::digamma(shape); + return shape + std::log(scale) + boost::math::lgamma(shape) + + (1 - shape) * boost::math::digamma(shape); } //////// CGroup Implementation //////// @@ -1793,16 +1869,18 @@ bool CTools::CGroup::overlap(const CGroup& other, double separation) const { double lr{this->rightEndpoint(separation)}; double rl{other.leftEndpoint(separation)}; double rr{other.rightEndpoint(separation)}; - return !(TOL * (lr + separation) <= rl || ll >= TOL * (rr + separation) || TOL * (rr + separation) <= ll || - rl >= TOL * (lr + separation)); + return !(TOL * (lr + separation) <= rl || ll >= TOL * (rr + separation) || + TOL * (rr + separation) <= ll || rl >= TOL * (lr + separation)); } double CTools::CGroup::leftEndpoint(double separation) const { - return CBasicStatistics::mean(m_Centre) - static_cast(m_B - m_A) * separation / 2.0; + return CBasicStatistics::mean(m_Centre) - + static_cast(m_B - m_A) * separation / 2.0; } double CTools::CGroup::rightEndpoint(double separation) const { - return CBasicStatistics::mean(m_Centre) + static_cast(m_B - m_A) * separation / 2.0; + return CBasicStatistics::mean(m_Centre) + + static_cast(m_B - m_A) * separation / 2.0; } const CTools::CLookupTableForFastLog CTools::FAST_LOG_TABLE; @@ -1811,7 +1889,8 @@ const CTools::CLookupTableForFastLog CTools::FAST_LO namespace { const double EPS{0.1}; -const double COEFFS[]{-1.0, +1.0 / 2.0, -1.0 / 6.0, +1.0 / 24.0, -1.0 / 120.0, +1.0 / 720.0}; +const double COEFFS[]{-1.0, +1.0 / 2.0, -1.0 / 6.0, + +1.0 / 24.0, -1.0 / 120.0, +1.0 / 720.0}; const std::size_t N{boost::size(COEFFS)}; } diff --git a/lib/maths/CTrendComponent.cc b/lib/maths/CTrendComponent.cc index c8ca091e74..9bf7cad3bb 100644 --- a/lib/maths/CTrendComponent.cc +++ b/lib/maths/CTrendComponent.cc @@ -41,7 +41,10 @@ const std::size_t LEVEL_CHANGE_LABEL{1}; //! Get the desired weight for the regression model. double modelWeight(double targetDecayRate, double modelDecayRate) { - return targetDecayRate == modelDecayRate ? 1.0 : std::min(targetDecayRate, modelDecayRate) / std::max(targetDecayRate, modelDecayRate); + return targetDecayRate == modelDecayRate + ? 1.0 + : std::min(targetDecayRate, modelDecayRate) / + std::max(targetDecayRate, modelDecayRate); } //! We scale the time used for the regression model to improve @@ -58,18 +61,18 @@ TOptionalDoubleDoublePr confidenceInterval(double prediction, double variance, d double qu{boost::math::quantile(normal, (100.0 + confidence) / 200.0)}; return std::make_pair(ql, qu); } catch (const std::exception& e) { - LOG_ERROR("Failed calculating confidence interval: " << e.what() << ", prediction = " << prediction << ", variance = " << variance - << ", confidence = " << confidence); + LOG_ERROR("Failed calculating confidence interval: " + << e.what() << ", prediction = " << prediction + << ", variance = " << variance << ", confidence = " << confidence); } return TOptionalDoubleDoublePr{}; } CNaiveBayes initialProbabilityOfChangeModel(double decayRate) { decayRate *= TIME_SCALES[NUMBER_MODELS - 1]; - return CNaiveBayes{ - CNaiveBayesFeatureDensityFromPrior{CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, decayRate)}, - decayRate, - -20.0}; + return CNaiveBayes{CNaiveBayesFeatureDensityFromPrior{CNormalMeanPrecConjugate::nonInformativePrior( + maths_t::E_ContinuousData, decayRate)}, + decayRate, -20.0}; } CNormalMeanPrecConjugate initialMagnitudeOfChangeModel(double decayRate) { @@ -92,12 +95,9 @@ const std::string RESIDUAL_MOMENTS_TAG{"c"}; } CTrendComponent::CTrendComponent(double decayRate) - : m_DefaultDecayRate(decayRate), - m_TargetDecayRate(decayRate), - m_FirstUpdate(UNSET_TIME), - m_LastUpdate(UNSET_TIME), - m_RegressionOrigin(UNSET_TIME), - m_PredictionErrorVariance(0.0), + : m_DefaultDecayRate(decayRate), m_TargetDecayRate(decayRate), + m_FirstUpdate(UNSET_TIME), m_LastUpdate(UNSET_TIME), + m_RegressionOrigin(UNSET_TIME), m_PredictionErrorVariance(0.0), m_TimeOfLastLevelChange(UNSET_TIME), m_ProbabilityOfLevelChangeModel(initialProbabilityOfChangeModel(decayRate)), m_MagnitudeOfLevelChangeModel(initialMagnitudeOfChangeModel(decayRate)) { @@ -126,18 +126,23 @@ void CTrendComponent::acceptPersistInserter(core::CStatePersistInserter& inserte inserter.insertValue(LAST_UPDATE_TAG, m_LastUpdate); inserter.insertValue(REGRESSION_ORIGIN_TAG, m_RegressionOrigin); for (const auto& model : m_TrendModels) { - inserter.insertLevel(MODEL_TAG, boost::bind(&SModel::acceptPersistInserter, &model, _1)); + inserter.insertLevel( + MODEL_TAG, boost::bind(&SModel::acceptPersistInserter, &model, _1)); } - inserter.insertValue(PREDICTION_ERROR_VARIANCE_TAG, m_PredictionErrorVariance, core::CIEEE754::E_DoublePrecision); + inserter.insertValue(PREDICTION_ERROR_VARIANCE_TAG, m_PredictionErrorVariance, + core::CIEEE754::E_DoublePrecision); inserter.insertValue(VALUE_MOMENTS_TAG, m_ValueMoments.toDelimited()); inserter.insertValue(TIME_OF_LAST_LEVEL_CHANGE_TAG, m_TimeOfLastLevelChange); inserter.insertLevel(PROBABILITY_OF_LEVEL_CHANGE_MODEL_TAG, - boost::bind(&CNaiveBayes::acceptPersistInserter, &m_ProbabilityOfLevelChangeModel, _1)); + boost::bind(&CNaiveBayes::acceptPersistInserter, + &m_ProbabilityOfLevelChangeModel, _1)); inserter.insertLevel(MAGNITUDE_OF_LEVEL_CHANGE_MODEL_TAG, - boost::bind(&CNormalMeanPrecConjugate::acceptPersistInserter, &m_MagnitudeOfLevelChangeModel, _1)); + boost::bind(&CNormalMeanPrecConjugate::acceptPersistInserter, + &m_MagnitudeOfLevelChangeModel, _1)); } -bool CTrendComponent::acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) { +bool CTrendComponent::acceptRestoreTraverser(const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser) { std::size_t i{0}; do { const std::string& name{traverser.name()}; @@ -145,12 +150,16 @@ bool CTrendComponent::acceptRestoreTraverser(const SDistributionRestoreParams& p RESTORE_BUILT_IN(FIRST_UPDATE_TAG, m_FirstUpdate) RESTORE_BUILT_IN(LAST_UPDATE_TAG, m_LastUpdate) RESTORE_BUILT_IN(REGRESSION_ORIGIN_TAG, m_RegressionOrigin) - RESTORE(MODEL_TAG, traverser.traverseSubLevel(boost::bind(&SModel::acceptRestoreTraverser, &m_TrendModels[i++], _1))) + RESTORE(MODEL_TAG, traverser.traverseSubLevel(boost::bind( + &SModel::acceptRestoreTraverser, &m_TrendModels[i++], _1))) RESTORE_BUILT_IN(PREDICTION_ERROR_VARIANCE_TAG, m_PredictionErrorVariance) RESTORE(VALUE_MOMENTS_TAG, m_ValueMoments.fromDelimited(traverser.value())) RESTORE_BUILT_IN(TIME_OF_LAST_LEVEL_CHANGE_TAG, m_TimeOfLastLevelChange) - RESTORE_NO_ERROR(PROBABILITY_OF_LEVEL_CHANGE_MODEL_TAG, m_ProbabilityOfLevelChangeModel = CNaiveBayes(params, traverser)) - RESTORE_NO_ERROR(MAGNITUDE_OF_LEVEL_CHANGE_MODEL_TAG, m_MagnitudeOfLevelChangeModel = CNormalMeanPrecConjugate(params, traverser)) + RESTORE_NO_ERROR(PROBABILITY_OF_LEVEL_CHANGE_MODEL_TAG, + m_ProbabilityOfLevelChangeModel = CNaiveBayes(params, traverser)) + RESTORE_NO_ERROR(MAGNITUDE_OF_LEVEL_CHANGE_MODEL_TAG, + m_MagnitudeOfLevelChangeModel = + CNormalMeanPrecConjugate(params, traverser)) } while (traverser.next()); return true; } @@ -197,16 +206,19 @@ void CTrendComponent::shiftLevel(core_t::TTime time, double value, double shift) } if (m_TimeOfLastLevelChange != UNSET_TIME) { double dt{static_cast(time - m_TimeOfLastLevelChange)}; - m_ProbabilityOfLevelChangeModel.addTrainingDataPoint(LEVEL_CHANGE_LABEL, {{dt}, {value}}); + m_ProbabilityOfLevelChangeModel.addTrainingDataPoint(LEVEL_CHANGE_LABEL, + {{dt}, {value}}); } - m_MagnitudeOfLevelChangeModel.addSamples({maths_t::E_SampleCountWeight}, {shift}, {{1.0}}); + m_MagnitudeOfLevelChangeModel.addSamples({maths_t::E_SampleCountWeight}, + {shift}, {{1.0}}); m_TimeOfLastLevelChange = time; } void CTrendComponent::dontShiftLevel(core_t::TTime time, double value) { if (m_TimeOfLastLevelChange != UNSET_TIME) { double dt{static_cast(time - m_TimeOfLastLevelChange)}; - m_ProbabilityOfLevelChangeModel.addTrainingDataPoint(NO_CHANGE_LABEL, {{dt}, {value}}); + m_ProbabilityOfLevelChangeModel.addTrainingDataPoint(NO_CHANGE_LABEL, + {{dt}, {value}}); } } @@ -221,7 +233,8 @@ void CTrendComponent::add(core_t::TTime time, double value, double weight) { // relative difference in the component scale and the target scale. for (std::size_t i = 0u; i < NUMBER_MODELS; ++i) { - m_TrendModels[i].s_Weight.add(modelWeight(m_TargetDecayRate, m_DefaultDecayRate * TIME_SCALES[i])); + m_TrendModels[i].s_Weight.add( + modelWeight(m_TargetDecayRate, m_DefaultDecayRate * TIME_SCALES[i])); } // Update the models. @@ -234,7 +247,8 @@ void CTrendComponent::add(core_t::TTime time, double value, double weight) { double count{this->count()}; if (count > 0.0) { - TMeanVarAccumulator moments{CBasicStatistics::accumulator(count, prediction, m_PredictionErrorVariance)}; + TMeanVarAccumulator moments{CBasicStatistics::accumulator( + count, prediction, m_PredictionErrorVariance)}; moments.add(value, weight); m_PredictionErrorVariance = CBasicStatistics::maximumLikelihoodVariance(moments); } @@ -271,12 +285,14 @@ void CTrendComponent::propagateForwardsByTime(core_t::TTime interval) { m_TrendModels[i].s_Regression.age(factors[i]); m_TrendModels[i].s_ResidualMoments.age(std::sqrt(factors[i])); } - double interval_{static_cast(interval) / static_cast(core::constants::DAY)}; + double interval_{static_cast(interval) / + static_cast(core::constants::DAY)}; m_ProbabilityOfLevelChangeModel.propagateForwardsByTime(interval_); m_MagnitudeOfLevelChangeModel.propagateForwardsByTime(interval_); } -CTrendComponent::TDoubleDoublePr CTrendComponent::value(core_t::TTime time, double confidence) const { +CTrendComponent::TDoubleDoublePr CTrendComponent::value(core_t::TTime time, + double confidence) const { if (!this->initialized()) { return {0.0, 0.0}; } @@ -289,16 +305,19 @@ CTrendComponent::TDoubleDoublePr CTrendComponent::value(core_t::TTime time, doub { TDoubleVec factors(this->factors(std::abs(time - m_LastUpdate))); for (std::size_t i = 0u; i < NUMBER_MODELS; ++i) { - prediction_.add(m_TrendModels[i].s_Regression.predict(scaledTime, MAX_CONDITION), - factors[i] * CBasicStatistics::mean(m_TrendModels[i].s_Weight)); + prediction_.add( + m_TrendModels[i].s_Regression.predict(scaledTime, MAX_CONDITION), + factors[i] * CBasicStatistics::mean(m_TrendModels[i].s_Weight)); } } - double prediction{a * CBasicStatistics::mean(prediction_) + b * CBasicStatistics::mean(m_ValueMoments)}; + double prediction{a * CBasicStatistics::mean(prediction_) + + b * CBasicStatistics::mean(m_ValueMoments)}; if (confidence > 0.0 && m_PredictionErrorVariance > 0.0) { double variance{a * m_PredictionErrorVariance / std::max(this->count(), 1.0) + - b * CBasicStatistics::variance(m_ValueMoments) / std::max(CBasicStatistics::count(m_ValueMoments), 1.0)}; + b * CBasicStatistics::variance(m_ValueMoments) / + std::max(CBasicStatistics::count(m_ValueMoments), 1.0)}; if (auto interval = confidenceInterval(prediction, variance, confidence)) { return *interval; } @@ -322,7 +341,8 @@ CTrendComponent::TDoubleDoublePr CTrendComponent::variance(double confidence) co double qu{boost::math::quantile(chi, (100.0 + confidence) / 200.0)}; return {ql * variance / df, qu * variance / df}; } catch (const std::exception& e) { - LOG_ERROR(<< "Failed calculating confidence interval: " << e.what() << ", df = " << df << ", confidence = " << confidence); + LOG_ERROR(<< "Failed calculating confidence interval: " << e.what() + << ", df = " << df << ", confidence = " << confidence); } } @@ -346,7 +366,7 @@ void CTrendComponent::forecast(core_t::TTime startTime, endTime = startTime + CIntegerTools::ceil(endTime - startTime, step); - LOG_TRACE(<< "forecasting = " << this->print()); + LOG_TRACE(<< "forecasting = " << this->print()); TDoubleVec factors(this->factors(step)); TDoubleVec modelWeights(this->initialForecastModelWeights()); @@ -357,17 +377,19 @@ void CTrendComponent::forecast(core_t::TTime startTime, for (std::size_t i = 0u; i < NUMBER_MODELS; ++i) { const SModel& model{m_TrendModels[i]}; model.s_Regression.parameters(models[i], MAX_CONDITION); - model.s_Regression.covariances(m_PredictionErrorVariance, modelCovariances[i], MAX_CONDITION); + model.s_Regression.covariances(m_PredictionErrorVariance, + modelCovariances[i], MAX_CONDITION); modelCovariances[i] /= std::max(model.s_Regression.count(), 1.0); - residualVariances[i] = - CTools::pow2(CBasicStatistics::mean(model.s_ResidualMoments)) + CBasicStatistics::variance(model.s_ResidualMoments); + residualVariances[i] = CTools::pow2(CBasicStatistics::mean(model.s_ResidualMoments)) + + CBasicStatistics::variance(model.s_ResidualMoments); LOG_TRACE("params = " << core::CContainerPrinter::print(models[i])); LOG_TRACE("covariances = " << modelCovariances[i].toDelimited()) LOG_TRACE("variances = " << residualVariances[i]); } LOG_TRACE(<< "long time variance = " << CBasicStatistics::variance(m_ValueMoments)); - CForecastLevel level{m_ProbabilityOfLevelChangeModel, m_MagnitudeOfLevelChangeModel, m_TimeOfLastLevelChange}; + CForecastLevel level{m_ProbabilityOfLevelChangeModel, + m_MagnitudeOfLevelChangeModel, m_TimeOfLastLevelChange}; TDoubleVec variances(NUMBER_MODELS + 1); for (core_t::TTime time = startTime; time < endTime; time += step) { @@ -394,21 +416,22 @@ void CTrendComponent::forecast(core_t::TTime startTime, variance_.add(variances[j], errorWeights[j]); } - double prediction{this->value(modelWeights, models, scaleTime(time, m_RegressionOrigin))}; + double prediction{this->value(modelWeights, models, + scaleTime(time, m_RegressionOrigin))}; TDouble3Vec seasonal_(seasonal(time)); TDouble3Vec level_(level.forecast(time, seasonal_[1] + prediction, confidence)); double ql{0.0}; double qu{0.0}; - double variance{a * CBasicStatistics::mean(variance_) + b * CBasicStatistics::variance(m_ValueMoments)}; + double variance{a * CBasicStatistics::mean(variance_) + + b * CBasicStatistics::variance(m_ValueMoments)}; if (auto interval = confidenceInterval(0.0, variance, confidence)) { boost::tie(ql, qu) = *interval; } - writer(time, - {level_[0] + seasonal_[0] + prediction + ql, - level_[1] + seasonal_[1] + prediction, - level_[2] + seasonal_[2] + prediction + qu}); + writer(time, {level_[0] + seasonal_[0] + prediction + ql, + level_[1] + seasonal_[1] + prediction, + level_[2] + seasonal_[2] + prediction + qu}); } } @@ -450,7 +473,8 @@ std::string CTrendComponent::print() const { CTrendComponent::TDoubleVec CTrendComponent::factors(core_t::TTime interval) const { TDoubleVec result(NUMBER_MODELS); - double factor{m_DefaultDecayRate * static_cast(interval) / static_cast(core::constants::DAY)}; + double factor{m_DefaultDecayRate * static_cast(interval) / + static_cast(core::constants::DAY)}; for (std::size_t i = 0u; i < NUMBER_MODELS; ++i) { result[i] = std::exp(-TIME_SCALES[i] * factor); } @@ -460,7 +484,8 @@ CTrendComponent::TDoubleVec CTrendComponent::factors(core_t::TTime interval) con CTrendComponent::TDoubleVec CTrendComponent::initialForecastModelWeights() const { TDoubleVec result(NUMBER_MODELS); for (std::size_t i = 0u; i < NUMBER_MODELS; ++i) { - result[i] = std::exp(static_cast(NUMBER_MODELS / 2) - static_cast(i)); + result[i] = std::exp(static_cast(NUMBER_MODELS / 2) - + static_cast(i)); } return result; } @@ -468,7 +493,8 @@ CTrendComponent::TDoubleVec CTrendComponent::initialForecastModelWeights() const CTrendComponent::TDoubleVec CTrendComponent::initialForecastErrorWeights() const { TDoubleVec result(NUMBER_MODELS + 1); for (std::size_t i = 0u; i < NUMBER_MODELS; ++i) { - result[i] = std::exp(static_cast(NUMBER_MODELS / 2) - static_cast(i)); + result[i] = std::exp(static_cast(NUMBER_MODELS / 2) - + static_cast(i)); } result[NUMBER_MODELS] = result[NUMBER_MODELS - 1] / std::exp(1.0); return result; @@ -477,12 +503,15 @@ CTrendComponent::TDoubleVec CTrendComponent::initialForecastErrorWeights() const double CTrendComponent::count() const { TMeanAccumulator result; for (const auto& model : m_TrendModels) { - result.add(CTools::fastLog(model.s_Regression.count()), CBasicStatistics::mean(model.s_Weight)); + result.add(CTools::fastLog(model.s_Regression.count()), + CBasicStatistics::mean(model.s_Weight)); } return std::exp(CBasicStatistics::mean(result)); } -double CTrendComponent::value(const TDoubleVec& weights, const TRegressionArrayVec& models, double time) const { +double CTrendComponent::value(const TDoubleVec& weights, + const TRegressionArrayVec& models, + double time) const { TMeanAccumulator prediction; for (std::size_t i = 0u; i < models.size(); ++i) { prediction.add(CRegression::predict(models[i], time), weights[i]); @@ -496,7 +525,8 @@ double CTrendComponent::weightOfPrediction(core_t::TTime time) const { return 0.0; } - double extrapolateInterval{static_cast(CBasicStatistics::max(time - m_LastUpdate, m_FirstUpdate - time, core_t::TTime(0)))}; + double extrapolateInterval{static_cast(CBasicStatistics::max( + time - m_LastUpdate, m_FirstUpdate - time, core_t::TTime(0)))}; if (extrapolateInterval == 0.0) { return 1.0; } @@ -510,7 +540,8 @@ CTrendComponent::SModel::SModel(double weight) { void CTrendComponent::SModel::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(WEIGHT_TAG, s_Weight.toDelimited()); - inserter.insertLevel(REGRESSION_TAG, boost::bind(&TRegression::acceptPersistInserter, &s_Regression, _1)); + inserter.insertLevel(REGRESSION_TAG, boost::bind(&TRegression::acceptPersistInserter, + &s_Regression, _1)); inserter.insertValue(RESIDUAL_MOMENTS_TAG, s_ResidualMoments.toDelimited()); } @@ -518,7 +549,9 @@ bool CTrendComponent::SModel::acceptRestoreTraverser(core::CStateRestoreTraverse do { const std::string& name{traverser.name()}; RESTORE(WEIGHT_TAG, s_Weight.fromDelimited(traverser.value())) - RESTORE(REGRESSION_TAG, traverser.traverseSubLevel(boost::bind(&TRegression::acceptRestoreTraverser, &s_Regression, _1))) + RESTORE(REGRESSION_TAG, + traverser.traverseSubLevel(boost::bind( + &TRegression::acceptRestoreTraverser, &s_Regression, _1))) RESTORE(RESIDUAL_MOMENTS_TAG, s_ResidualMoments.fromDelimited(traverser.value())) } while (traverser.next()); return true; @@ -534,15 +567,14 @@ CTrendComponent::CForecastLevel::CForecastLevel(const CNaiveBayes& probability, const CNormalMeanPrecConjugate& magnitude, core_t::TTime timeOfLastChange, std::size_t numberPaths) - : m_Probability(probability), - m_Magnitude(magnitude), - m_Levels(numberPaths), + : m_Probability(probability), m_Magnitude(magnitude), m_Levels(numberPaths), m_TimesOfLastChange(numberPaths, timeOfLastChange), m_ProbabilitiesOfChange(numberPaths, 0.0) { m_Uniform01.reserve(numberPaths); } -CTrendComponent::TDouble3Vec CTrendComponent::CForecastLevel::forecast(core_t::TTime time, double prediction, double confidence) { +CTrendComponent::TDouble3Vec +CTrendComponent::CForecastLevel::forecast(core_t::TTime time, double prediction, double confidence) { TDouble3Vec result{0.0, 0.0, 0.0}; if (m_Probability.initialized()) { @@ -567,8 +599,12 @@ CTrendComponent::TDouble3Vec CTrendComponent::CForecastLevel::forecast(core_t::T } double rollouts{static_cast(m_Levels.size())}; - std::size_t lower{std::min(static_cast((100.0 - confidence) / 200.0 * rollouts + 0.5), m_Levels.size())}; - std::size_t upper{std::min(static_cast((100.0 + confidence) / 200.0 * rollouts + 0.5), m_Levels.size() - 1)}; + std::size_t lower{std::min( + static_cast((100.0 - confidence) / 200.0 * rollouts + 0.5), + m_Levels.size())}; + std::size_t upper{std::min( + static_cast((100.0 + confidence) / 200.0 * rollouts + 0.5), + m_Levels.size() - 1)}; result[0] = m_Levels[lower]; result[1] = CBasicStatistics::median(m_Levels); diff --git a/lib/maths/CTrendTests.cc b/lib/maths/CTrendTests.cc index 219dfd0dfc..57009c58b9 100644 --- a/lib/maths/CTrendTests.cc +++ b/lib/maths/CTrendTests.cc @@ -59,7 +59,9 @@ using TTimeVec = std::vector; //! \brief Sets the timezone to a specified value in a constructor //! call so it can be called once by static initialisation. struct SSetTimeZone { - SSetTimeZone(const std::string& zone) { core::CTimezone::instance().timezoneName(zone); } + SSetTimeZone(const std::string& zone) { + core::CTimezone::instance().timezoneName(zone); + } }; //! Generate \p n samples uniformly in the interval [\p a, \p b]. @@ -115,7 +117,8 @@ const core_t::TTime WEEK = core::constants::WEEK; //////// CRandomizedPeriodicitytest //////// CRandomizedPeriodicityTest::CRandomizedPeriodicityTest() - : m_DayRefreshedProjections(-DAY_RESAMPLE_INTERVAL), m_WeekRefreshedProjections(-DAY_RESAMPLE_INTERVAL) { + : m_DayRefreshedProjections(-DAY_RESAMPLE_INTERVAL), + m_WeekRefreshedProjections(-DAY_RESAMPLE_INTERVAL) { resample(0); } @@ -139,29 +142,23 @@ bool CRandomizedPeriodicityTest::staticsAcceptRestoreTraverser(core::CStateResto ss >> ms_Rng; continue; } - RESTORE_SETUP_TEARDOWN(DAY_RESAMPLED_TAG, - core_t::TTime resampled, + RESTORE_SETUP_TEARDOWN(DAY_RESAMPLED_TAG, core_t::TTime resampled, core::CStringUtils::stringToType(traverser.value(), resampled), ms_DayResampled.store(resampled)) - RESTORE_SETUP_TEARDOWN(WEEK_RESAMPLED_TAG, - core_t::TTime resampled, + RESTORE_SETUP_TEARDOWN(WEEK_RESAMPLED_TAG, core_t::TTime resampled, core::CStringUtils::stringToType(traverser.value(), resampled), ms_WeekResampled.store(resampled)) RESTORE_BUILT_IN(ARRAY_INDEX_TAG, index) - RESTORE_SETUP_TEARDOWN(DAY_RANDOM_PROJECTIONS_TAG, - double d, + RESTORE_SETUP_TEARDOWN(DAY_RANDOM_PROJECTIONS_TAG, double d, core::CStringUtils::stringToType(traverser.value(), d), ms_DayRandomProjections[index].push_back(d)) - RESTORE_SETUP_TEARDOWN(DAY_PERIODIC_PROJECTIONS_TAG, - double d, + RESTORE_SETUP_TEARDOWN(DAY_PERIODIC_PROJECTIONS_TAG, double d, core::CStringUtils::stringToType(traverser.value(), d), ms_DayPeriodicProjections[index].push_back(d)) - RESTORE_SETUP_TEARDOWN(WEEK_RANDOM_PROJECTIONS_TAG, - double d, + RESTORE_SETUP_TEARDOWN(WEEK_RANDOM_PROJECTIONS_TAG, double d, core::CStringUtils::stringToType(traverser.value(), d), ms_WeekRandomProjections[index].push_back(d)) - RESTORE_SETUP_TEARDOWN(WEEK_PERIODIC_PROJECTIONS_TAG, - double d, + RESTORE_SETUP_TEARDOWN(WEEK_PERIODIC_PROJECTIONS_TAG, double d, core::CStringUtils::stringToType(traverser.value(), d), ms_WeekPeriodicProjections[index].push_back(d)) } while (traverser.next()); @@ -205,11 +202,13 @@ bool CRandomizedPeriodicityTest::acceptRestoreTraverser(core::CStateRestoreTrave RESTORE(DAY_PROJECTIONS_TAG, m_DayProjections.fromDelimited(traverser.value())) RESTORE(DAY_STATISTICS_TAG, m_DayStatistics.fromDelimited(traverser.value())) - RESTORE(DAY_REFRESHED_PROJECTIONS_TAG, core::CStringUtils::stringToType(traverser.value(), m_DayRefreshedProjections)) + RESTORE(DAY_REFRESHED_PROJECTIONS_TAG, + core::CStringUtils::stringToType(traverser.value(), m_DayRefreshedProjections)) RESTORE(WEEK_PROJECTIONS_TAG, m_WeekProjections.fromDelimited(traverser.value())) RESTORE(WEEK_STATISTICS_TAG, m_WeekStatistics.fromDelimited(traverser.value())) RESTORE(DAY_STATISTICS_TAG, m_DayStatistics.fromDelimited(traverser.value())) - RESTORE(WEEK_REFRESHED_PROJECTIONS_TAG, core::CStringUtils::stringToType(traverser.value(), m_WeekRefreshedProjections)) + RESTORE(WEEK_REFRESHED_PROJECTIONS_TAG, + core::CStringUtils::stringToType(traverser.value(), m_WeekRefreshedProjections)) } while (traverser.next()); return true; @@ -264,8 +263,10 @@ bool CRandomizedPeriodicityTest::test() const { if (nd >= 1.0) { TVector2 S = CBasicStatistics::mean(m_DayStatistics); LOG_TRACE(<< "Day test statistic, S = " << S << ", n = " << nd); - double ratio = - S(0) == S(1) ? 1.0 : (S(0) == 0.0 ? boost::numeric::bounds::highest() : static_cast(S(1) / S(0))); + double ratio = S(0) == S(1) + ? 1.0 + : (S(0) == 0.0 ? boost::numeric::bounds::highest() + : static_cast(S(1) / S(0))); double significance = CStatisticalTests::rightTailFTest(ratio, nd, nd); LOG_TRACE(<< "Daily significance = " << significance); if (significance < SIGNIFICANCE) { @@ -277,15 +278,19 @@ bool CRandomizedPeriodicityTest::test() const { if (nw >= 1.0) { TVector2 S = CBasicStatistics::mean(m_WeekStatistics); LOG_TRACE(<< "Week test statistic, S = " << S); - double ratio = - S(0) == S(1) ? 1.0 : (S(0) == 0.0 ? boost::numeric::bounds::highest() : static_cast(S(1) / S(0))); + double ratio = S(0) == S(1) + ? 1.0 + : (S(0) == 0.0 ? boost::numeric::bounds::highest() + : static_cast(S(1) / S(0))); double significance = CStatisticalTests::rightTailFTest(ratio, nw, nw); LOG_TRACE(<< "Weekly significance = " << significance); if (significance < SIGNIFICANCE) { return true; } } - } catch (const std::exception& e) { LOG_ERROR(<< "Failed to test for periodicity: " << e.what()); } + } catch (const std::exception& e) { + LOG_ERROR(<< "Failed to test for periodicity: " << e.what()); + } return false; } @@ -316,7 +321,8 @@ uint64_t CRandomizedPeriodicityTest::checksum(uint64_t seed) const { return seed; } -void CRandomizedPeriodicityTest::updateStatistics(TVector2NMeanAccumulator& projections, TVector2MeanAccumulator& statistics) { +void CRandomizedPeriodicityTest::updateStatistics(TVector2NMeanAccumulator& projections, + TVector2MeanAccumulator& statistics) { static const double ALPHA = 0.1; if (CBasicStatistics::count(projections) > 0.0) { @@ -344,8 +350,10 @@ void CRandomizedPeriodicityTest::resample(core_t::TTime time) { LOG_TRACE(<< "Updating daily random projections at " << time); if (time >= ms_DayResampled.load(std::memory_order_relaxed) + DAY_RESAMPLE_INTERVAL) { - resample(DAY, DAY_RESAMPLE_INTERVAL, ms_DayPeriodicProjections, ms_DayRandomProjections); - ms_DayResampled.store(CIntegerTools::floor(time, DAY_RESAMPLE_INTERVAL), std::memory_order_release); + resample(DAY, DAY_RESAMPLE_INTERVAL, ms_DayPeriodicProjections, + ms_DayRandomProjections); + ms_DayResampled.store(CIntegerTools::floor(time, DAY_RESAMPLE_INTERVAL), + std::memory_order_release); } } @@ -354,8 +362,10 @@ void CRandomizedPeriodicityTest::resample(core_t::TTime time) { LOG_TRACE(<< "Updating weekly random projections at " << time); if (time >= ms_WeekResampled.load(std::memory_order_relaxed) + WEEK_RESAMPLE_INTERVAL) { - resample(WEEK, WEEK_RESAMPLE_INTERVAL, ms_WeekPeriodicProjections, ms_WeekRandomProjections); - ms_WeekResampled.store(CIntegerTools::floor(time, WEEK_RESAMPLE_INTERVAL), std::memory_order_release); + resample(WEEK, WEEK_RESAMPLE_INTERVAL, ms_WeekPeriodicProjections, + ms_WeekRandomProjections); + ms_WeekResampled.store(CIntegerTools::floor(time, WEEK_RESAMPLE_INTERVAL), + std::memory_order_release); } } } @@ -373,8 +383,10 @@ void CRandomizedPeriodicityTest::resample(core_t::TTime period, zeroMean(periodicProjections[i]); randomProjections[i].resize(t); for (std::size_t j = 0u; j < p; ++j) { - std::copy(periodicProjections[i].begin(), periodicProjections[i].end(), randomProjections[i].begin() + j * n); - CSampling::random_shuffle(ms_Rng, randomProjections[i].begin() + j * n, randomProjections[i].begin() + (j + 1) * n); + std::copy(periodicProjections[i].begin(), periodicProjections[i].end(), + randomProjections[i].begin() + j * n); + CSampling::random_shuffle(ms_Rng, randomProjections[i].begin() + j * n, + randomProjections[i].begin() + (j + 1) * n); } } } @@ -394,7 +406,8 @@ core::CMutex CRandomizedPeriodicityTest::ms_Lock; //////// CCalendarCyclicTest //////// CCalendarCyclicTest::CCalendarCyclicTest(double decayRate) - : m_DecayRate(decayRate), m_Bucket(0), m_ErrorQuantiles(CQuantileSketch::E_Linear, 20), m_ErrorCounts(WINDOW / BUCKET) { + : m_DecayRate(decayRate), m_Bucket(0), + m_ErrorQuantiles(CQuantileSketch::E_Linear, 20), m_ErrorCounts(WINDOW / BUCKET) { static const SSetTimeZone timezone("GMT"); m_ErrorSums.reserve(WINDOW / BUCKET / 10); } @@ -404,8 +417,10 @@ bool CCalendarCyclicTest::acceptRestoreTraverser(core::CStateRestoreTraverser& t const std::string& name = traverser.name(); RESTORE_BUILT_IN(BUCKET_TAG, m_Bucket) RESTORE(ERROR_QUANTILES_TAG, - traverser.traverseSubLevel(boost::bind(&CQuantileSketch::acceptRestoreTraverser, &m_ErrorQuantiles, _1))) - RESTORE(ERROR_COUNTS_TAG, core::CPersistUtils::restore(ERROR_COUNTS_TAG, m_ErrorCounts, traverser)) + traverser.traverseSubLevel(boost::bind(&CQuantileSketch::acceptRestoreTraverser, + &m_ErrorQuantiles, _1))) + RESTORE(ERROR_COUNTS_TAG, + core::CPersistUtils::restore(ERROR_COUNTS_TAG, m_ErrorCounts, traverser)) RESTORE(ERROR_SUMS_TAG, core::CPersistUtils::fromString(traverser.value(), m_ErrorSums)) } while (traverser.next()); return true; @@ -413,7 +428,8 @@ bool CCalendarCyclicTest::acceptRestoreTraverser(core::CStateRestoreTraverser& t void CCalendarCyclicTest::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(BUCKET_TAG, m_Bucket); - inserter.insertLevel(ERROR_QUANTILES_TAG, boost::bind(&CQuantileSketch::acceptPersistInserter, &m_ErrorQuantiles, _1)); + inserter.insertLevel(ERROR_QUANTILES_TAG, boost::bind(&CQuantileSketch::acceptPersistInserter, + &m_ErrorQuantiles, _1)); core::CPersistUtils::persist(ERROR_COUNTS_TAG, m_ErrorCounts, inserter); inserter.insertValue(ERROR_SUMS_TAG, core::CPersistUtils::toString(m_ErrorSums)); } @@ -447,9 +463,11 @@ void CCalendarCyclicTest::add(core_t::TTime time, double error, double weight) { double high; m_ErrorQuantiles.quantile(LARGE_ERROR_PERCENTILE, high); - m_ErrorSums.erase(m_ErrorSums.begin(), std::find_if(m_ErrorSums.begin(), m_ErrorSums.end(), [bucket](const TTimeFloatPr& error_) { - return error_.first + WINDOW > bucket; - })); + m_ErrorSums.erase(m_ErrorSums.begin(), + std::find_if(m_ErrorSums.begin(), m_ErrorSums.end(), + [bucket](const TTimeFloatPr& error_) { + return error_.first + WINDOW > bucket; + })); if (error >= high) { count += (count < 0x100000000 - COUNT_BITS) ? COUNT_BITS : 0; m_ErrorSums[bucket] += this->winsorise(error); @@ -463,7 +481,9 @@ CCalendarCyclicTest::TOptionalFeature CCalendarCyclicTest::test() const { // The statistics we need in order to be able to test for calendar // features. struct SStats { - SStats() : s_Offset(0), s_Repeats(0), s_Sum(0.0), s_Count(0.0), s_Significance(0.0) {} + SStats() + : s_Offset(0), s_Repeats(0), s_Sum(0.0), s_Count(0.0), + s_Significance(0.0) {} core_t::TTime s_Offset; unsigned int s_Repeats; double s_Sum; @@ -487,11 +507,13 @@ CCalendarCyclicTest::TOptionalFeature CCalendarCyclicTest::test() const { for (auto offset : TIMEZONE_OFFSETS) { for (const auto& error : m_ErrorSums) { - std::size_t i = m_ErrorCounts.size() - 1 - static_cast((m_Bucket - error.first) / BUCKET); + std::size_t i = m_ErrorCounts.size() - 1 - + static_cast((m_Bucket - error.first) / BUCKET); double n = static_cast(m_ErrorCounts[i] % COUNT_BITS); double x = static_cast(m_ErrorCounts[i] / COUNT_BITS); double s = this->significance(n, x); - for (auto feature : CCalendarFeature::features(error.first + BUCKET / 2 + offset)) { + for (auto feature : + CCalendarFeature::features(error.first + BUCKET / 2 + offset)) { SStats& stat = stats[feature]; ++stat.s_Repeats; stat.s_Offset = offset; @@ -512,7 +534,8 @@ CCalendarCyclicTest::TOptionalFeature CCalendarCyclicTest::test() const { double x = stat.second.s_Count; double e = stat.second.s_Sum; double s = stat.second.s_Significance; - if (stat.second.s_Repeats >= MINIMUM_REPEATS && e > errorThreshold * x && std::pow(s, r) < MAXIMUM_SIGNIFICANCE) { + if (stat.second.s_Repeats >= MINIMUM_REPEATS && + e > errorThreshold * x && std::pow(s, r) < MAXIMUM_SIGNIFICANCE) { result.add({e, stat.second.s_Offset, feature}); } } @@ -534,7 +557,8 @@ void CCalendarCyclicTest::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr m } std::size_t CCalendarCyclicTest::memoryUsage() const { - return core::CMemory::dynamicSize(m_ErrorQuantiles) + core::CMemory::dynamicSize(m_ErrorCounts) + + return core::CMemory::dynamicSize(m_ErrorQuantiles) + + core::CMemory::dynamicSize(m_ErrorCounts) + core::CMemory::dynamicSize(m_ErrorSums); } @@ -548,7 +572,10 @@ double CCalendarCyclicTest::significance(double n, double x) const { try { boost::math::binomial binom(n, 1.0 - LARGE_ERROR_PERCENTILE / 100.0); return std::min(2.0 * CTools::safeCdfComplement(binom, x - 1.0), 1.0); - } catch (const std::exception& e) { LOG_ERROR(<< "Failed to calculate significance: " << e.what() << " n = " << n << " x = " << x); } + } catch (const std::exception& e) { + LOG_ERROR(<< "Failed to calculate significance: " << e.what() + << " n = " << n << " x = " << x); + } return 1.0; } diff --git a/lib/maths/CXMeansOnline1d.cc b/lib/maths/CXMeansOnline1d.cc index 43aba2a9d7..f90809262a 100644 --- a/lib/maths/CXMeansOnline1d.cc +++ b/lib/maths/CXMeansOnline1d.cc @@ -59,11 +59,16 @@ using TMeanVarAccumulator = CBasicStatistics::SSampleMeanVar::TAccumulat //! \brief Orders two clusters by their centres. struct SClusterCentreLess { - bool operator()(const CXMeansOnline1d::CCluster& lhs, const CXMeansOnline1d::CCluster& rhs) const { + bool operator()(const CXMeansOnline1d::CCluster& lhs, + const CXMeansOnline1d::CCluster& rhs) const { return lhs.centre() < rhs.centre(); } - bool operator()(double lhs, const CXMeansOnline1d::CCluster& rhs) const { return lhs < rhs.centre(); } - bool operator()(const CXMeansOnline1d::CCluster& lhs, double rhs) const { return lhs.centre() < rhs; } + bool operator()(double lhs, const CXMeansOnline1d::CCluster& rhs) const { + return lhs < rhs.centre(); + } + bool operator()(const CXMeansOnline1d::CCluster& lhs, double rhs) const { + return lhs.centre() < rhs; + } }; //! Get \p x time \p x. @@ -77,14 +82,16 @@ double min(double x, double y, double z) { } //! Get the log of the likelihood that \p point is from the \p normal. -maths_t::EFloatingPointErrorStatus -logLikelihoodFromCluster(double point, const CNormalMeanPrecConjugate& normal, double probability, double& result) { +maths_t::EFloatingPointErrorStatus logLikelihoodFromCluster(double point, + const CNormalMeanPrecConjugate& normal, + double probability, + double& result) { result = core::constants::LOG_MIN_DOUBLE - 1.0; double likelihood; - maths_t::EFloatingPointErrorStatus status = - normal.jointLogMarginalLikelihood(CConstantWeights::COUNT, {point}, CConstantWeights::SINGLE_UNIT, likelihood); + maths_t::EFloatingPointErrorStatus status = normal.jointLogMarginalLikelihood( + CConstantWeights::COUNT, {point}, CConstantWeights::SINGLE_UNIT, likelihood); if (status & maths_t::E_FpFailed) { LOG_ERROR(<< "Unable to compute likelihood for: " << point); return status; @@ -107,7 +114,9 @@ void candidates(const TTupleVec& categories, TMeanVarAccumulator& mv, TMeanVarAccumulator& mvl, TMeanVarAccumulator& mvr) { - LOG_TRACE(<< "categories = " << core::CContainerPrinter::print(categories.begin() + start, categories.begin() + end)); + LOG_TRACE(<< "categories = " + << core::CContainerPrinter::print(categories.begin() + start, + categories.begin() + end)); LOG_TRACE(<< "split at = " << split); for (std::size_t i = start; i < split; ++i) { @@ -353,23 +362,27 @@ void BICGain(maths_t::EDataType dataType, } } } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to compute BIC gain: " << e.what() << ", n = " << n << ", m = " << m << ", v = " << v << ", wl = " << wl - << ", ml = " << ml << ", vl = " << vl << ", wr = " << wr << ", mr = " << mr << ", vr = " << vr); + LOG_ERROR(<< "Failed to compute BIC gain: " << e.what() << ", n = " << n + << ", m = " << m << ", v = " << v << ", wl = " << wl + << ", ml = " << ml << ", vl = " << vl << ", wr = " << wr + << ", mr = " << mr << ", vr = " << vr); return; } double logn = std::log(n); - double ll1 = min(distributions.haveNormal() ? ll1n : boost::numeric::bounds::highest(), - distributions.haveLogNormal() ? ll1l : boost::numeric::bounds::highest(), - distributions.haveGamma() ? ll1g : boost::numeric::bounds::highest()) + - distributions.parameters() * logn; - double ll2 = min(distributions.haveNormal() ? ll2nl : boost::numeric::bounds::highest(), - distributions.haveLogNormal() ? ll2ll : boost::numeric::bounds::highest(), - distributions.haveGamma() ? ll2gl : boost::numeric::bounds::highest()) + - min(distributions.haveNormal() ? ll2nr : boost::numeric::bounds::highest(), - distributions.haveLogNormal() ? ll2lr : boost::numeric::bounds::highest(), - distributions.haveGamma() ? ll2gr : boost::numeric::bounds::highest()) + - (2.0 * distributions.parameters() + 1.0) * logn; + double ll1 = + min(distributions.haveNormal() ? ll1n : boost::numeric::bounds::highest(), + distributions.haveLogNormal() ? ll1l : boost::numeric::bounds::highest(), + distributions.haveGamma() ? ll1g : boost::numeric::bounds::highest()) + + distributions.parameters() * logn; + double ll2 = + min(distributions.haveNormal() ? ll2nl : boost::numeric::bounds::highest(), + distributions.haveLogNormal() ? ll2ll : boost::numeric::bounds::highest(), + distributions.haveGamma() ? ll2gl : boost::numeric::bounds::highest()) + + min(distributions.haveNormal() ? ll2nr : boost::numeric::bounds::highest(), + distributions.haveLogNormal() ? ll2lr : boost::numeric::bounds::highest(), + distributions.haveGamma() ? ll2gr : boost::numeric::bounds::highest()) + + (2.0 * distributions.parameters() + 1.0) * logn; LOG_TRACE(<< "BIC(1) = " << ll1 << ", BIC(2) = " << ll2); @@ -419,20 +432,24 @@ void winsorise(const TDoubleDoublePr& interval, TTuple& category) { double eb = xb > t ? 0.0 : std::exp(-xb * xb / 2.0); double km = sigma / boost::math::double_constants::root_two_pi * (ea - eb); - double kv = -sigma * sigma / boost::math::double_constants::root_two_pi * (xa * ea + xb * eb); + double kv = -sigma * sigma / boost::math::double_constants::root_two_pi * + (xa * ea + xb * eb); double wm = pa * a + pb * b + m * (1.0 - pb - pa) + km; xa = a - wm; xb = b - wm; double xm = wm - m; - double wv = xa * xa * pa + xb * xb * pb + (sigma * sigma + xm * xm) * (1.0 - pb - pa) + 2.0 * xm * km + kv; + double wv = xa * xa * pa + xb * xb * pb + + (sigma * sigma + xm * xm) * (1.0 - pb - pa) + 2.0 * xm * km + kv; double n = CBasicStatistics::count(category); category.s_Moments[0] = wm; category.s_Moments[1] = std::max((n - 1.0) / n * wv, 0.0); - } catch (const std::exception& e) { LOG_ERROR(<< "Bad category = " << category << ": " << e.what()); } + } catch (const std::exception& e) { + LOG_ERROR(<< "Bad category = " << category << ": " << e.what()); + } } //! Search for a split of the data that satisfies the constraints @@ -484,9 +501,11 @@ bool splitSearch(double minimumCount, LOG_TRACE(<< "node = " << core::CContainerPrinter::print(node)); LOG_TRACE(<< "categories = " << core::CContainerPrinter::print(categories)); - nodeCategories.assign(categories.begin() + node.first, categories.begin() + node.second); + nodeCategories.assign(categories.begin() + node.first, + categories.begin() + node.second); - CNaturalBreaksClassifier::naturalBreaks(nodeCategories, 2, 0, CNaturalBreaksClassifier::E_TargetDeviation, candidate); + CNaturalBreaksClassifier::naturalBreaks( + nodeCategories, 2, 0, CNaturalBreaksClassifier::E_TargetDeviation, candidate); LOG_TRACE(<< "candidate = " << core::CContainerPrinter::print(candidate)); if (candidate.size() != 2) { @@ -505,7 +524,8 @@ bool splitSearch(double minimumCount, double distance; double nl; double nr; - BICGain(dataType, distributions, smallest, categories, node.first, candidate[0], node.second, distance, nl, nr); + BICGain(dataType, distributions, smallest, categories, node.first, + candidate[0], node.second, distance, nl, nr); // Check the count constraint. bool satisfiesCount = (std::min(nl, nr) >= minimumCount); @@ -513,7 +533,8 @@ bool splitSearch(double minimumCount, // Check the distance constraint. bool satisfiesDistance = (distance > minimumDistance); - LOG_TRACE(<< "max(BIC(1) - BIC(2), 0) = " << distance << " (to split " << minimumDistance << ")"); + LOG_TRACE(<< "max(BIC(1) - BIC(2), 0) = " << distance << " (to split " + << minimumDistance << ")"); if (!satisfiesCount) { // Recurse to the (one) node with sufficient count. @@ -528,9 +549,11 @@ bool splitSearch(double minimumCount, } else if (satisfiesDistance) { LOG_TRACE(<< "Checking full split"); - BICGain(dataType, distributions, smallest, categories, 0, candidate[0], categories.size(), distance, nl, nr); + BICGain(dataType, distributions, smallest, categories, 0, + candidate[0], categories.size(), distance, nl, nr); - LOG_TRACE(<< "max(BIC(1) - BIC(2), 0) = " << distance << " (to split " << minimumDistance << ")"); + LOG_TRACE(<< "max(BIC(1) - BIC(2), 0) = " << distance + << " (to split " << minimumDistance << ")"); if (distance > minimumDistance) { result.push_back(candidate[0]); @@ -571,16 +594,19 @@ static const std::string PRIOR_TAG("c"); const std::string EMPTY_STRING; } -CAvailableModeDistributions::CAvailableModeDistributions(int value) : m_Value(value) { +CAvailableModeDistributions::CAvailableModeDistributions(int value) + : m_Value(value) { } -const CAvailableModeDistributions& CAvailableModeDistributions::operator+(const CAvailableModeDistributions& rhs) { +const CAvailableModeDistributions& CAvailableModeDistributions:: +operator+(const CAvailableModeDistributions& rhs) { m_Value = m_Value | rhs.m_Value; return *this; } double CAvailableModeDistributions::parameters() const { - return (this->haveNormal() ? 2.0 : 0.0) + (this->haveGamma() ? 2.0 : 0.0) + (this->haveLogNormal() ? 2.0 : 0.0); + return (this->haveNormal() ? 2.0 : 0.0) + (this->haveGamma() ? 2.0 : 0.0) + + (this->haveLogNormal() ? 2.0 : 0.0); } bool CAvailableModeDistributions::haveNormal() const { @@ -613,68 +639,55 @@ CXMeansOnline1d::CXMeansOnline1d(maths_t::EDataType dataType, double winsorisationConfidenceInterval, const TSplitFunc& splitFunc, const TMergeFunc& mergeFunc) - : CClusterer1d(splitFunc, mergeFunc), - m_DataType(dataType), + : CClusterer1d(splitFunc, mergeFunc), m_DataType(dataType), m_AvailableDistributions(availableDistributions), - m_InitialDecayRate(decayRate), - m_DecayRate(decayRate), - m_HistoryLength(0.0), - m_WeightCalc(weightCalc), - m_MinimumClusterFraction(minimumClusterFraction), + m_InitialDecayRate(decayRate), m_DecayRate(decayRate), m_HistoryLength(0.0), + m_WeightCalc(weightCalc), m_MinimumClusterFraction(minimumClusterFraction), m_MinimumClusterCount(minimumClusterCount), m_MinimumCategoryCount(minimumCategoryCount), m_WinsorisationConfidenceInterval(winsorisationConfidenceInterval), m_Clusters(1, CCluster(*this)) { } -CXMeansOnline1d::CXMeansOnline1d(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) - : CClusterer1d(CDoNothing(), CDoNothing()), - m_DataType(params.s_DataType), +CXMeansOnline1d::CXMeansOnline1d(const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser) + : CClusterer1d(CDoNothing(), CDoNothing()), m_DataType(params.s_DataType), m_AvailableDistributions(CAvailableModeDistributions::ALL), - m_InitialDecayRate(params.s_DecayRate), - m_DecayRate(params.s_DecayRate), - m_HistoryLength(), - m_WeightCalc(maths_t::E_ClustersEqualWeight), - m_MinimumClusterFraction(), - m_MinimumClusterCount(), + m_InitialDecayRate(params.s_DecayRate), m_DecayRate(params.s_DecayRate), + m_HistoryLength(), m_WeightCalc(maths_t::E_ClustersEqualWeight), + m_MinimumClusterFraction(), m_MinimumClusterCount(), m_MinimumCategoryCount(params.s_MinimumCategoryCount), m_WinsorisationConfidenceInterval() { - traverser.traverseSubLevel(boost::bind(&CXMeansOnline1d::acceptRestoreTraverser, this, boost::cref(params), _1)); + traverser.traverseSubLevel(boost::bind(&CXMeansOnline1d::acceptRestoreTraverser, + this, boost::cref(params), _1)); } CXMeansOnline1d::CXMeansOnline1d(const SDistributionRestoreParams& params, const TSplitFunc& splitFunc, const TMergeFunc& mergeFunc, core::CStateRestoreTraverser& traverser) - : CClusterer1d(splitFunc, mergeFunc), - m_DataType(params.s_DataType), + : CClusterer1d(splitFunc, mergeFunc), m_DataType(params.s_DataType), m_AvailableDistributions(CAvailableModeDistributions::ALL), - m_InitialDecayRate(params.s_DecayRate), - m_DecayRate(params.s_DecayRate), - m_HistoryLength(), - m_WeightCalc(maths_t::E_ClustersEqualWeight), - m_MinimumClusterFraction(), - m_MinimumClusterCount(), + m_InitialDecayRate(params.s_DecayRate), m_DecayRate(params.s_DecayRate), + m_HistoryLength(), m_WeightCalc(maths_t::E_ClustersEqualWeight), + m_MinimumClusterFraction(), m_MinimumClusterCount(), m_MinimumCategoryCount(params.s_MinimumCategoryCount), m_WinsorisationConfidenceInterval() { - traverser.traverseSubLevel(boost::bind(&CXMeansOnline1d::acceptRestoreTraverser, this, boost::cref(params), _1)); + traverser.traverseSubLevel(boost::bind(&CXMeansOnline1d::acceptRestoreTraverser, + this, boost::cref(params), _1)); } CXMeansOnline1d::CXMeansOnline1d(const CXMeansOnline1d& other) - : CClusterer1d(other.splitFunc(), other.mergeFunc()), - m_DataType(other.m_DataType), + : CClusterer1d(other.splitFunc(), other.mergeFunc()), m_DataType(other.m_DataType), m_AvailableDistributions(other.m_AvailableDistributions), - m_InitialDecayRate(other.m_InitialDecayRate), - m_DecayRate(other.m_DecayRate), - m_HistoryLength(other.m_HistoryLength), - m_WeightCalc(other.m_WeightCalc), + m_InitialDecayRate(other.m_InitialDecayRate), m_DecayRate(other.m_DecayRate), + m_HistoryLength(other.m_HistoryLength), m_WeightCalc(other.m_WeightCalc), m_MinimumClusterFraction(other.m_MinimumClusterFraction), m_MinimumClusterCount(other.m_MinimumClusterCount), m_MinimumCategoryCount(other.m_MinimumCategoryCount), m_WinsorisationConfidenceInterval(other.m_WinsorisationConfidenceInterval), m_ClusterIndexGenerator(other.m_ClusterIndexGenerator.deepCopy()), - m_Smallest(other.m_Smallest), - m_Largest(other.m_Largest), + m_Smallest(other.m_Smallest), m_Largest(other.m_Largest), m_Clusters(other.m_Clusters) { } @@ -710,7 +723,8 @@ std::string CXMeansOnline1d::persistenceTag() const { void CXMeansOnline1d::acceptPersistInserter(core::CStatePersistInserter& inserter) const { for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { - inserter.insertLevel(CLUSTER_TAG, boost::bind(&CCluster::acceptPersistInserter, &m_Clusters[i], _1)); + inserter.insertLevel(CLUSTER_TAG, boost::bind(&CCluster::acceptPersistInserter, + &m_Clusters[i], _1)); } inserter.insertValue(AVAILABLE_DISTRIBUTIONS_TAG, m_AvailableDistributions.toString()); inserter.insertValue(DECAY_RATE_TAG, m_DecayRate, core::CIEEE754::E_SinglePrecision); @@ -721,7 +735,9 @@ void CXMeansOnline1d::acceptPersistInserter(core::CStatePersistInserter& inserte inserter.insertValue(MINIMUM_CLUSTER_FRACTION_TAG, m_MinimumClusterFraction); inserter.insertValue(MINIMUM_CLUSTER_COUNT_TAG, m_MinimumClusterCount); inserter.insertValue(WINSORISATION_CONFIDENCE_INTERVAL_TAG, m_WinsorisationConfidenceInterval); - inserter.insertLevel(CLUSTER_INDEX_GENERATOR_TAG, boost::bind(&CIndexGenerator::acceptPersistInserter, &m_ClusterIndexGenerator, _1)); + inserter.insertLevel(CLUSTER_INDEX_GENERATOR_TAG, + boost::bind(&CIndexGenerator::acceptPersistInserter, + &m_ClusterIndexGenerator, _1)); } CXMeansOnline1d* CXMeansOnline1d::clone() const { @@ -729,16 +745,10 @@ CXMeansOnline1d* CXMeansOnline1d::clone() const { } void CXMeansOnline1d::clear() { - *this = CXMeansOnline1d(m_DataType, - m_AvailableDistributions, - m_WeightCalc, - m_InitialDecayRate, - m_MinimumClusterFraction, - m_MinimumClusterCount, - m_MinimumCategoryCount, - m_WinsorisationConfidenceInterval, - this->splitFunc(), - this->mergeFunc()); + *this = CXMeansOnline1d( + m_DataType, m_AvailableDistributions, m_WeightCalc, m_InitialDecayRate, + m_MinimumClusterFraction, m_MinimumClusterCount, m_MinimumCategoryCount, + m_WinsorisationConfidenceInterval, this->splitFunc(), this->mergeFunc()); } std::size_t CXMeansOnline1d::numberClusters() const { @@ -791,7 +801,8 @@ void CXMeansOnline1d::cluster(const double& point, TSizeDoublePr2Vec& result, do return; } - TClusterVecCItr rightCluster = std::lower_bound(m_Clusters.begin(), m_Clusters.end(), point, detail::SClusterCentreLess()); + TClusterVecCItr rightCluster = std::lower_bound( + m_Clusters.begin(), m_Clusters.end(), point, detail::SClusterCentreLess()); if (rightCluster == m_Clusters.end()) { --rightCluster; @@ -849,7 +860,8 @@ void CXMeansOnline1d::add(const double& point, TSizeDoublePr2Vec& clusters, doub clusters.clear(); - TClusterVecItr rightCluster = std::lower_bound(m_Clusters.begin(), m_Clusters.end(), point, detail::SClusterCentreLess()); + TClusterVecItr rightCluster = std::lower_bound( + m_Clusters.begin(), m_Clusters.end(), point, detail::SClusterCentreLess()); if (rightCluster == m_Clusters.end()) { --rightCluster; @@ -912,14 +924,16 @@ void CXMeansOnline1d::add(const double& point, TSizeDoublePr2Vec& clusters, doub // Get the weighted counts. double countLeft = count * pLeft; double countRight = count * pRight; - LOG_TRACE(<< "Soft adding " << point << " " << countLeft << " to " << leftCluster->centre() << " and " << countRight << " to " - << rightCluster->centre()); + LOG_TRACE(<< "Soft adding " << point << " " << countLeft << " to " + << leftCluster->centre() << " and " << countRight + << " to " << rightCluster->centre()); leftCluster->add(point, countLeft); rightCluster->add(point, countRight); clusters.emplace_back(leftCluster->index(), countLeft); clusters.emplace_back(rightCluster->index(), countRight); - if (this->maybeSplit(leftCluster) || this->maybeSplit(rightCluster) || this->maybeMerge(leftCluster, rightCluster)) { + if (this->maybeSplit(leftCluster) || this->maybeSplit(rightCluster) || + this->maybeMerge(leftCluster, rightCluster)) { this->cluster(point, clusters, count); } } @@ -1023,7 +1037,8 @@ std::string CXMeansOnline1d::printClusters() const { static const double RANGE = 99.9; static const unsigned int POINTS = 201; - TDoubleDoublePr range(boost::numeric::bounds::highest(), boost::numeric::bounds::lowest()); + TDoubleDoublePr range(boost::numeric::bounds::highest(), + boost::numeric::bounds::lowest()); for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { const CPrior& prior = m_Clusters[i].prior(); @@ -1054,7 +1069,8 @@ std::string CXMeansOnline1d::printClusters() const { const CPrior& prior = m_Clusters[j].prior(); if (!(prior.jointLogMarginalLikelihood(COUNT_WEIGHT, x, UNIT_WEIGHT, logLikelihood) & (maths_t::E_FpFailed | maths_t::E_FpOverflowed))) { - likelihood += m_Clusters[j].weight(m_WeightCalc) / weightSum * std::exp(logLikelihood); + likelihood += m_Clusters[j].weight(m_WeightCalc) / weightSum * + std::exp(logLikelihood); } } coordinatesStr << x[0] << " "; @@ -1070,26 +1086,30 @@ CXMeansOnline1d::CIndexGenerator& CXMeansOnline1d::indexGenerator() { return m_ClusterIndexGenerator; } -bool CXMeansOnline1d::acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) { +bool CXMeansOnline1d::acceptRestoreTraverser(const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser) { do { const std::string& name = traverser.name(); - RESTORE_SETUP_TEARDOWN( - CLUSTER_TAG, - CCluster cluster(*this), - traverser.traverseSubLevel(boost::bind(&CCluster::acceptRestoreTraverser, &cluster, boost::cref(params), _1)), - m_Clusters.push_back(cluster)) - RESTORE(AVAILABLE_DISTRIBUTIONS_TAG, m_AvailableDistributions.fromString(traverser.value())) - RESTORE_SETUP_TEARDOWN( - DECAY_RATE_TAG, double decayRate, core::CStringUtils::stringToType(traverser.value(), decayRate), this->decayRate(decayRate)) + RESTORE_SETUP_TEARDOWN(CLUSTER_TAG, CCluster cluster(*this), + traverser.traverseSubLevel( + boost::bind(&CCluster::acceptRestoreTraverser, + &cluster, boost::cref(params), _1)), + m_Clusters.push_back(cluster)) + RESTORE(AVAILABLE_DISTRIBUTIONS_TAG, + m_AvailableDistributions.fromString(traverser.value())) + RESTORE_SETUP_TEARDOWN(DECAY_RATE_TAG, double decayRate, + core::CStringUtils::stringToType(traverser.value(), decayRate), + this->decayRate(decayRate)) RESTORE_BUILT_IN(HISTORY_LENGTH_TAG, m_HistoryLength); RESTORE(SMALLEST_TAG, m_Smallest.fromDelimited(traverser.value())) RESTORE(LARGEST_TAG, m_Largest.fromDelimited(traverser.value())) RESTORE(CLUSTER_INDEX_GENERATOR_TAG, - traverser.traverseSubLevel(boost::bind(&CIndexGenerator::acceptRestoreTraverser, &m_ClusterIndexGenerator, _1))) - RESTORE_SETUP_TEARDOWN(WEIGHT_CALC_TAG, - int weightCalc, - core::CStringUtils::stringToType(traverser.value(), weightCalc), - m_WeightCalc = static_cast(weightCalc)) + traverser.traverseSubLevel(boost::bind(&CIndexGenerator::acceptRestoreTraverser, + &m_ClusterIndexGenerator, _1))) + RESTORE_SETUP_TEARDOWN( + WEIGHT_CALC_TAG, int weightCalc, + core::CStringUtils::stringToType(traverser.value(), weightCalc), + m_WeightCalc = static_cast(weightCalc)) RESTORE_BUILT_IN(MINIMUM_CLUSTER_FRACTION_TAG, m_MinimumClusterFraction) RESTORE_BUILT_IN(MINIMUM_CLUSTER_COUNT_TAG, m_MinimumClusterCount) RESTORE_BUILT_IN(WINSORISATION_CONFIDENCE_INTERVAL_TAG, m_WinsorisationConfidenceInterval) @@ -1114,7 +1134,8 @@ double CXMeansOnline1d::minimumSplitCount() const { for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { count += m_Clusters[i].count(); } - double scale = std::max(m_HistoryLength * (1.0 - std::exp(-m_InitialDecayRate)), 1.0); + double scale = + std::max(m_HistoryLength * (1.0 - std::exp(-m_InitialDecayRate)), 1.0); count *= m_MinimumClusterFraction / scale; result = std::max(result, count); } @@ -1129,8 +1150,10 @@ bool CXMeansOnline1d::maybeSplit(TClusterVecItr cluster) { TDoubleDoublePr interval = this->winsorisationInterval(); if (TOptionalClusterClusterPr split = - cluster->split(m_AvailableDistributions, this->minimumSplitCount(), m_Smallest[0], interval, m_ClusterIndexGenerator)) { - LOG_TRACE(<< "Splitting cluster " << cluster->index() << " at " << cluster->centre()); + cluster->split(m_AvailableDistributions, this->minimumSplitCount(), + m_Smallest[0], interval, m_ClusterIndexGenerator)) { + LOG_TRACE(<< "Splitting cluster " << cluster->index() << " at " + << cluster->centre()); std::size_t index = cluster->index(); *cluster = split->second; m_Clusters.insert(cluster, split->first); @@ -1148,7 +1171,8 @@ bool CXMeansOnline1d::maybeMerge(TClusterVecItr cluster1, TClusterVecItr cluster TDoubleDoublePr interval = this->winsorisationInterval(); if (cluster1->shouldMerge(*cluster2, m_AvailableDistributions, m_Smallest[0], interval)) { - LOG_TRACE(<< "Merging cluster " << cluster1->index() << " at " << cluster1->centre() << " and cluster " << cluster2->index() + LOG_TRACE(<< "Merging cluster " << cluster1->index() << " at " + << cluster1->centre() << " and cluster " << cluster2->index() << " at " << cluster2->centre()); std::size_t index1 = cluster1->index(); std::size_t index2 = cluster2->index(); @@ -1176,8 +1200,8 @@ bool CXMeansOnline1d::prune() { if (left.count() < minimumCount || right.count() < minimumCount) { std::size_t leftIndex = left.index(); std::size_t rightIndex = right.index(); - LOG_TRACE(<< "Merging cluster " << leftIndex << " at " << left.centre() << " and cluster " << rightIndex << " at " - << right.centre()); + LOG_TRACE(<< "Merging cluster " << leftIndex << " at " << left.centre() + << " and cluster " << rightIndex << " at " << right.centre()); CCluster merge = left.merge(right, m_ClusterIndexGenerator); left = merge; m_Clusters.erase(m_Clusters.begin() + i); @@ -1197,7 +1221,8 @@ TDoubleDoublePr CXMeansOnline1d::winsorisationInterval() const { if (f * this->count() < 1.0) { // Don't bother if we don't expect a sample outside the // Winsorisation interval. - return std::make_pair(boost::numeric::bounds::lowest() / 2.0, boost::numeric::bounds::highest() / 2.0); + return std::make_pair(boost::numeric::bounds::lowest() / 2.0, + boost::numeric::bounds::highest() / 2.0); } // The Winsorisation interval are the positions corresponding @@ -1212,7 +1237,8 @@ TDoubleDoublePr CXMeansOnline1d::winsorisationInterval() const { double leftCount = f * totalCount; double rightCount = (1.0 - f) * totalCount; - LOG_TRACE(<< "totalCount = " << totalCount << " interval = [" << leftCount << "," << rightCount << "]" + LOG_TRACE(<< "totalCount = " << totalCount << " interval = [" << leftCount + << "," << rightCount << "]" << " # clusters = " << m_Clusters.size()); TDoubleDoublePr result; @@ -1241,22 +1267,26 @@ TDoubleDoublePr CXMeansOnline1d::winsorisationInterval() const { CXMeansOnline1d::CCluster::CCluster(const CXMeansOnline1d& clusterer) : m_Index(clusterer.m_ClusterIndexGenerator.next()), - m_Prior(CNormalMeanPrecConjugate::nonInformativePrior(clusterer.m_DataType, clusterer.m_DecayRate)), + m_Prior(CNormalMeanPrecConjugate::nonInformativePrior(clusterer.m_DataType, + clusterer.m_DecayRate)), m_Structure(STRUCTURE_SIZE, clusterer.m_DecayRate, clusterer.m_MinimumCategoryCount) { } -CXMeansOnline1d::CCluster::CCluster(std::size_t index, const CNormalMeanPrecConjugate& prior, const CNaturalBreaksClassifier& structure) +CXMeansOnline1d::CCluster::CCluster(std::size_t index, + const CNormalMeanPrecConjugate& prior, + const CNaturalBreaksClassifier& structure) : m_Index(index), m_Prior(prior), m_Structure(structure) { } -bool CXMeansOnline1d::CCluster::acceptRestoreTraverser(const SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser) { +bool CXMeansOnline1d::CCluster::acceptRestoreTraverser(const SDistributionRestoreParams& params, + core::CStateRestoreTraverser& traverser) { do { const std::string& name = traverser.name(); RESTORE_BUILT_IN(INDEX_TAG, m_Index) RESTORE_NO_ERROR(PRIOR_TAG, m_Prior = CNormalMeanPrecConjugate(params, traverser)) - RESTORE(STRUCTURE_TAG, - traverser.traverseSubLevel( - boost::bind(&CNaturalBreaksClassifier::acceptRestoreTraverser, &m_Structure, boost::cref(params), _1))) + RESTORE(STRUCTURE_TAG, traverser.traverseSubLevel(boost::bind( + &CNaturalBreaksClassifier::acceptRestoreTraverser, + &m_Structure, boost::cref(params), _1))) } while (traverser.next()); return true; @@ -1264,8 +1294,10 @@ bool CXMeansOnline1d::CCluster::acceptRestoreTraverser(const SDistributionRestor void CXMeansOnline1d::CCluster::acceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(INDEX_TAG, m_Index); - inserter.insertLevel(PRIOR_TAG, boost::bind(&CNormalMeanPrecConjugate::acceptPersistInserter, &m_Prior, _1)); - inserter.insertLevel(STRUCTURE_TAG, boost::bind(&CNaturalBreaksClassifier::acceptPersistInserter, &m_Structure, _1)); + inserter.insertLevel(PRIOR_TAG, boost::bind(&CNormalMeanPrecConjugate::acceptPersistInserter, + &m_Prior, _1)); + inserter.insertLevel(STRUCTURE_TAG, boost::bind(&CNaturalBreaksClassifier::acceptPersistInserter, + &m_Structure, _1)); } void CXMeansOnline1d::CCluster::dataType(maths_t::EDataType dataType) { @@ -1273,7 +1305,8 @@ void CXMeansOnline1d::CCluster::dataType(maths_t::EDataType dataType) { } void CXMeansOnline1d::CCluster::add(double point, double count) { - m_Prior.addSamples(CConstantWeights::COUNT, TDouble1Vec(1, point), TDouble4Vec1Vec(1, TDouble4Vec(1, count))); + m_Prior.addSamples(CConstantWeights::COUNT, TDouble1Vec(1, point), + TDouble4Vec1Vec(1, TDouble4Vec(1, count))); m_Structure.add(point, count); } @@ -1317,23 +1350,29 @@ double CXMeansOnline1d::CCluster::weight(maths_t::EClusterWeightCalc calc) const LOG_ABORT(<< "Unexpected calculation style " << calc); } -double CXMeansOnline1d::CCluster::logLikelihoodFromCluster(maths_t::EClusterWeightCalc calc, double point) const { +double CXMeansOnline1d::CCluster::logLikelihoodFromCluster(maths_t::EClusterWeightCalc calc, + double point) const { double result; - if (detail::logLikelihoodFromCluster(point, m_Prior, this->weight(calc), result) & maths_t::E_FpFailed) { + if (detail::logLikelihoodFromCluster(point, m_Prior, this->weight(calc), result) & + maths_t::E_FpFailed) { LOG_ERROR(<< "Unable to compute likelihood for: " << m_Index); } return result; } -void CXMeansOnline1d::CCluster::sample(std::size_t numberSamples, double smallest, double largest, TDoubleVec& samples) const { +void CXMeansOnline1d::CCluster::sample(std::size_t numberSamples, + double smallest, + double largest, + TDoubleVec& samples) const { m_Structure.sample(numberSamples, smallest, largest, samples); } -CXMeansOnline1d::TOptionalClusterClusterPr CXMeansOnline1d::CCluster::split(CAvailableModeDistributions distributions, - double minimumCount, - double smallest, - const TDoubleDoublePr& interval, - CIndexGenerator& indexGenerator) { +CXMeansOnline1d::TOptionalClusterClusterPr +CXMeansOnline1d::CCluster::split(CAvailableModeDistributions distributions, + double minimumCount, + double smallest, + const TDoubleDoublePr& interval, + CIndexGenerator& indexGenerator) { // We do our clustering top down to minimize space and avoid // making splits before we are confident they exist. This is // important for anomaly detection because we do *not* want @@ -1369,7 +1408,8 @@ CXMeansOnline1d::TOptionalClusterClusterPr CXMeansOnline1d::CCluster::split(CAva for (std::size_t i = 0u; i < categories.size(); ++i) { detail::winsorise(interval, categories[i]); } - if (!detail::splitSearch(minimumCount, MINIMUM_SPLIT_DISTANCE, dataType, distributions, smallest, categories, split)) { + if (!detail::splitSearch(minimumCount, MINIMUM_SPLIT_DISTANCE, dataType, + distributions, smallest, categories, split)) { return TOptionalClusterClusterPr(); } } @@ -1379,7 +1419,8 @@ CXMeansOnline1d::TOptionalClusterClusterPr CXMeansOnline1d::CCluster::split(CAva CNaturalBreaksClassifier::TClassifierVec classifiers; m_Structure.split(split, classifiers); - LOG_TRACE(<< "Splitting cluster " << this->index() << " at " << this->centre() << " left = " << classifiers[0].print() + LOG_TRACE(<< "Splitting cluster " << this->index() << " at " + << this->centre() << " left = " << classifiers[0].print() << ", right = " << classifiers[1].print()); std::size_t index1 = indexGenerator.next(); @@ -1388,14 +1429,16 @@ CXMeansOnline1d::TOptionalClusterClusterPr CXMeansOnline1d::CCluster::split(CAva CNormalMeanPrecConjugate leftNormal(dataType, categories[0], decayRate); CNormalMeanPrecConjugate rightNormal(dataType, categories[1], decayRate); - return TClusterClusterPr(CCluster(index1, leftNormal, classifiers[0]), CCluster(index2, rightNormal, classifiers[1])); + return TClusterClusterPr(CCluster(index1, leftNormal, classifiers[0]), + CCluster(index2, rightNormal, classifiers[1])); } bool CXMeansOnline1d::CCluster::shouldMerge(CCluster& other, CAvailableModeDistributions distributions, double smallest, const TDoubleDoublePr& interval) { - if (m_Structure.buffering() || m_Structure.size() == 0 || other.m_Structure.size() == 0) { + if (m_Structure.buffering() || m_Structure.size() == 0 || + other.m_Structure.size() == 0) { return false; } @@ -1416,13 +1459,16 @@ bool CXMeansOnline1d::CCluster::shouldMerge(CCluster& other, double distance; double nl; double nr; - detail::BICGain(dataType, distributions, smallest, categories, 0, split, categories.size(), distance, nl, nr); - LOG_TRACE(<< "max(BIC(1) - BIC(2), 0) = " << distance << " (to merge " << MAXIMUM_MERGE_DISTANCE << ")"); + detail::BICGain(dataType, distributions, smallest, categories, 0, split, + categories.size(), distance, nl, nr); + LOG_TRACE(<< "max(BIC(1) - BIC(2), 0) = " << distance << " (to merge " + << MAXIMUM_MERGE_DISTANCE << ")"); return distance <= MAXIMUM_MERGE_DISTANCE; } -CXMeansOnline1d::CCluster CXMeansOnline1d::CCluster::merge(CCluster& other, CIndexGenerator& indexGenerator) { +CXMeansOnline1d::CCluster +CXMeansOnline1d::CCluster::merge(CCluster& other, CIndexGenerator& indexGenerator) { TTupleVec left, right; m_Structure.categories(1, 0, left); other.m_Structure.categories(1, 0, right); @@ -1441,7 +1487,8 @@ CXMeansOnline1d::CCluster CXMeansOnline1d::CCluster::merge(CCluster& other, CInd mergedCategories += right[0]; } - CNormalMeanPrecConjugate prior(m_Prior.dataType(), mergedCategories, m_Prior.decayRate()); + CNormalMeanPrecConjugate prior(m_Prior.dataType(), mergedCategories, + m_Prior.decayRate()); CNaturalBreaksClassifier structure(m_Structure); structure.merge(other.m_Structure); diff --git a/lib/maths/CXMeansOnlineFactory.cc b/lib/maths/CXMeansOnlineFactory.cc index b41e9f2c63..9e8774d216 100644 --- a/lib/maths/CXMeansOnlineFactory.cc +++ b/lib/maths/CXMeansOnlineFactory.cc @@ -14,21 +14,20 @@ namespace ml { namespace maths { namespace xmeans_online_factory_detail { -#define XMEANS_FACTORY(T, N) \ - CClusterer>* CFactory::make(maths_t::EDataType dataType, \ - maths_t::EClusterWeightCalc weightCalc, \ - double decayRate, \ - double minimumClusterFraction, \ - double minimumClusterCount, \ - double minimumCategoryCount) { \ - return new CXMeansOnline( \ - dataType, weightCalc, decayRate, minimumClusterFraction, minimumClusterCount, minimumCategoryCount); \ - } \ - CClusterer>* CFactory::restore(const SDistributionRestoreParams& params, \ - const CClustererTypes::TSplitFunc& splitFunc, \ - const CClustererTypes::TMergeFunc& mergeFunc, \ - core::CStateRestoreTraverser& traverser) { \ - return new CXMeansOnline(params, splitFunc, mergeFunc, traverser); \ +#define XMEANS_FACTORY(T, N) \ + CClusterer>* CFactory::make( \ + maths_t::EDataType dataType, maths_t::EClusterWeightCalc weightCalc, \ + double decayRate, double minimumClusterFraction, \ + double minimumClusterCount, double minimumCategoryCount) { \ + return new CXMeansOnline(dataType, weightCalc, decayRate, minimumClusterFraction, \ + minimumClusterCount, minimumCategoryCount); \ + } \ + CClusterer>* CFactory::restore( \ + const SDistributionRestoreParams& params, \ + const CClustererTypes::TSplitFunc& splitFunc, \ + const CClustererTypes::TMergeFunc& mergeFunc, \ + core::CStateRestoreTraverser& traverser) { \ + return new CXMeansOnline(params, splitFunc, mergeFunc, traverser); \ } XMEANS_FACTORY(CFloatStorage, 2) XMEANS_FACTORY(CFloatStorage, 3) diff --git a/lib/maths/Constants.cc b/lib/maths/Constants.cc index 6884d7f2af..183ec48031 100644 --- a/lib/maths/Constants.cc +++ b/lib/maths/Constants.cc @@ -10,8 +10,10 @@ namespace ml { namespace maths { const maths_t::TWeightStyleVec CConstantWeights::COUNT{maths_t::E_SampleCountWeight}; -const maths_t::TWeightStyleVec CConstantWeights::COUNT_VARIANCE{maths_t::E_SampleCountVarianceScaleWeight}; -const maths_t::TWeightStyleVec CConstantWeights::SEASONAL_VARIANCE{maths_t::E_SampleSeasonalVarianceScaleWeight}; +const maths_t::TWeightStyleVec CConstantWeights::COUNT_VARIANCE{ + maths_t::E_SampleCountVarianceScaleWeight}; +const maths_t::TWeightStyleVec CConstantWeights::SEASONAL_VARIANCE{ + maths_t::E_SampleSeasonalVarianceScaleWeight}; const CConstantWeights::TDouble4Vec CConstantWeights::UNIT{1.0}; const CConstantWeights::TDouble4Vec1Vec CConstantWeights::SINGLE_UNIT{UNIT}; diff --git a/lib/maths/MathsTypes.cc b/lib/maths/MathsTypes.cc index 91e52c1bf8..e55b893320 100644 --- a/lib/maths/MathsTypes.cc +++ b/lib/maths/MathsTypes.cc @@ -23,12 +23,13 @@ namespace detail { //! Check that the weights styles and weights are consistent. template -inline bool check(const TWeightStyleVec& weightStyles, const core::CSmallVector& weights) { +inline bool check(const TWeightStyleVec& weightStyles, + const core::CSmallVector& weights) { if (weightStyles.size() == weights.size()) { return true; } - LOG_ERROR(<< "Mismatch in weight styles '" << core::CContainerPrinter::print(weightStyles) << "' and weights '" - << core::CContainerPrinter::print(weights) << "'"); + LOG_ERROR(<< "Mismatch in weight styles '" << core::CContainerPrinter::print(weightStyles) + << "' and weights '" << core::CContainerPrinter::print(weights) << "'"); return false; } @@ -103,7 +104,9 @@ void count(const TWeightStyleVec& weightStyles, const core::CSmallVector& //! Extract the effective sample count with which to update a model //! from a collection of weights. template -void countForUpdate(const TWeightStyleVec& weightStyles, const core::CSmallVector& weights, T& result) { +void countForUpdate(const TWeightStyleVec& weightStyles, + const core::CSmallVector& weights, + T& result) { if (check(weightStyles, weights)) { T candidate(result); for (std::size_t i = 0u; i < weightStyles.size(); ++i) { @@ -130,7 +133,9 @@ void countForUpdate(const TWeightStyleVec& weightStyles, const core::CSmallVecto //! Extract the Winsorisation weight from a collection of weights. template -void winsorisationWeight(const TWeightStyleVec& weightStyles, const core::CSmallVector& weights, T& result) { +void winsorisationWeight(const TWeightStyleVec& weightStyles, + const core::CSmallVector& weights, + T& result) { if (check(weightStyles, weights)) { T candidate(result); for (std::size_t i = 0u; i < weightStyles.size(); ++i) { @@ -156,7 +161,9 @@ void winsorisationWeight(const TWeightStyleVec& weightStyles, const core::CSmall //! Extract the seasonal variance scale from a collection of weights. template -void seasonalVarianceScale(const TWeightStyleVec& weightStyles, const core::CSmallVector& weights, T& result) { +void seasonalVarianceScale(const TWeightStyleVec& weightStyles, + const core::CSmallVector& weights, + T& result) { if (check(weightStyles, weights)) { T candidate(result); for (std::size_t i = 0u; i < weightStyles.size(); ++i) { @@ -182,7 +189,9 @@ void seasonalVarianceScale(const TWeightStyleVec& weightStyles, const core::CSma //! Extract the count variance scale from a collection of weights. template -void countVarianceScale(const TWeightStyleVec& weightStyles, const core::CSmallVector& weights, T& result) { +void countVarianceScale(const TWeightStyleVec& weightStyles, + const core::CSmallVector& weights, + T& result) { if (check(weightStyles, weights)) { T candidate(result); for (std::size_t i = 0u; i < weightStyles.size(); ++i) { @@ -214,7 +223,9 @@ double count(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights) { return result; } -TDouble10Vec count(std::size_t dimension, const TWeightStyleVec& weightStyles, const TDouble10Vec4Vec& weights) { +TDouble10Vec count(std::size_t dimension, + const TWeightStyleVec& weightStyles, + const TDouble10Vec4Vec& weights) { TDouble10Vec result(dimension, 1.0); detail::count(weightStyles, weights, result); return result; @@ -226,7 +237,9 @@ double countForUpdate(const TWeightStyleVec& weightStyles, const TDouble4Vec& we return result; } -TDouble10Vec countForUpdate(std::size_t dimension, const TWeightStyleVec& weightStyles, const TDouble10Vec4Vec& weights) { +TDouble10Vec countForUpdate(std::size_t dimension, + const TWeightStyleVec& weightStyles, + const TDouble10Vec4Vec& weights) { TDouble10Vec result(dimension, 1.0); detail::countForUpdate(weightStyles, weights, result); return result; @@ -238,7 +251,9 @@ double winsorisationWeight(const TWeightStyleVec& weightStyles, const TDouble4Ve return result; } -TDouble10Vec winsorisationWeight(std::size_t dimension, const TWeightStyleVec& weightStyles, const TDouble10Vec4Vec& weights) { +TDouble10Vec winsorisationWeight(std::size_t dimension, + const TWeightStyleVec& weightStyles, + const TDouble10Vec4Vec& weights) { TDouble10Vec result(dimension, 1.0); detail::winsorisationWeight(weightStyles, weights, result); return result; @@ -250,7 +265,9 @@ double seasonalVarianceScale(const TWeightStyleVec& weightStyles, const TDouble4 return result; } -TDouble10Vec seasonalVarianceScale(std::size_t dimension, const TWeightStyleVec& weightStyles, const TDouble10Vec4Vec& weights) { +TDouble10Vec seasonalVarianceScale(std::size_t dimension, + const TWeightStyleVec& weightStyles, + const TDouble10Vec4Vec& weights) { TDouble10Vec result(dimension, 1.0); detail::seasonalVarianceScale(weightStyles, weights, result); return result; @@ -262,7 +279,9 @@ double countVarianceScale(const TWeightStyleVec& weightStyles, const TDouble4Vec return result; } -TDouble10Vec countVarianceScale(std::size_t dimension, const TWeightStyleVec& weightStyles, const TDouble10Vec4Vec& weights) { +TDouble10Vec countVarianceScale(std::size_t dimension, + const TWeightStyleVec& weightStyles, + const TDouble10Vec4Vec& weights) { TDouble10Vec result(dimension, 1.0); detail::countVarianceScale(weightStyles, weights, result); return result; @@ -272,7 +291,8 @@ bool hasSeasonalVarianceScale(const TWeightStyleVec& weightStyles, const TDouble return seasonalVarianceScale(weightStyles, weights) != 1.0; } -bool hasSeasonalVarianceScale(const TWeightStyleVec& weightStyles, const TDouble4Vec1Vec& weights) { +bool hasSeasonalVarianceScale(const TWeightStyleVec& weightStyles, + const TDouble4Vec1Vec& weights) { for (std::size_t i = 0u; i < weights.size(); ++i) { if (hasSeasonalVarianceScale(weightStyles, weights[i])) { return true; @@ -281,7 +301,8 @@ bool hasSeasonalVarianceScale(const TWeightStyleVec& weightStyles, const TDouble return false; } -bool hasSeasonalVarianceScale(const TWeightStyleVec& weightStyles, const TDouble10Vec4Vec& weights) { +bool hasSeasonalVarianceScale(const TWeightStyleVec& weightStyles, + const TDouble10Vec4Vec& weights) { if (!detail::check(weightStyles, weights)) { return false; } @@ -305,7 +326,8 @@ bool hasSeasonalVarianceScale(const TWeightStyleVec& weightStyles, const TDouble return false; } -bool hasSeasonalVarianceScale(const TWeightStyleVec& weightStyles, const TDouble10Vec4Vec1Vec& weights) { +bool hasSeasonalVarianceScale(const TWeightStyleVec& weightStyles, + const TDouble10Vec4Vec1Vec& weights) { for (std::size_t i = 0u; i < weights.size(); ++i) { if (hasSeasonalVarianceScale(weightStyles, weights[i])) { return true; @@ -318,7 +340,8 @@ bool hasCountVarianceScale(const TWeightStyleVec& weightStyles, const TDouble4Ve return countVarianceScale(weightStyles, weights) != 1.0; } -bool hasCountVarianceScale(const TWeightStyleVec& weightStyles, const TDouble4Vec1Vec& weights) { +bool hasCountVarianceScale(const TWeightStyleVec& weightStyles, + const TDouble4Vec1Vec& weights) { for (std::size_t i = 0u; i < weights.size(); ++i) { if (hasCountVarianceScale(weightStyles, weights[i])) { return true; @@ -327,7 +350,8 @@ bool hasCountVarianceScale(const TWeightStyleVec& weightStyles, const TDouble4Ve return false; } -bool hasCountVarianceScale(const TWeightStyleVec& weightStyles, const TDouble10Vec4Vec& weights) { +bool hasCountVarianceScale(const TWeightStyleVec& weightStyles, + const TDouble10Vec4Vec& weights) { if (!detail::check(weightStyles, weights)) { return false; } @@ -351,7 +375,8 @@ bool hasCountVarianceScale(const TWeightStyleVec& weightStyles, const TDouble10V return false; } -bool hasCountVarianceScale(const TWeightStyleVec& weightStyles, const TDouble10Vec4Vec1Vec& weights) { +bool hasCountVarianceScale(const TWeightStyleVec& weightStyles, + const TDouble10Vec4Vec1Vec& weights) { for (std::size_t i = 0u; i < weights.size(); ++i) { if (hasCountVarianceScale(weightStyles, weights[i])) { return true; @@ -361,7 +386,8 @@ bool hasCountVarianceScale(const TWeightStyleVec& weightStyles, const TDouble10V } void setWeight(ESampleWeightStyle style, double weight, TWeightStyleVec& weightStyles, TDouble4Vec& weights) { - std::ptrdiff_t i{std::find(weightStyles.begin(), weightStyles.end(), style) - weightStyles.begin()}; + std::ptrdiff_t i{std::find(weightStyles.begin(), weightStyles.end(), style) - + weightStyles.begin()}; if (static_cast(i) < weightStyles.size()) { weights[i] = weight; } else { @@ -370,8 +396,13 @@ void setWeight(ESampleWeightStyle style, double weight, TWeightStyleVec& weightS } } -void setWeight(ESampleWeightStyle style, double weight, std::size_t dimension, TWeightStyleVec& weightStyles, TDouble10Vec4Vec& weights) { - std::ptrdiff_t i{std::find(weightStyles.begin(), weightStyles.end(), style) - weightStyles.begin()}; +void setWeight(ESampleWeightStyle style, + double weight, + std::size_t dimension, + TWeightStyleVec& weightStyles, + TDouble10Vec4Vec& weights) { + std::ptrdiff_t i{std::find(weightStyles.begin(), weightStyles.end(), style) - + weightStyles.begin()}; if (static_cast(i) < weightStyles.size()) { weights[i].assign(dimension, weight); } else { diff --git a/lib/maths/ProbabilityAggregators.cc b/lib/maths/ProbabilityAggregators.cc index d4c4a1c215..b4449f5ecb 100644 --- a/lib/maths/ProbabilityAggregators.cc +++ b/lib/maths/ProbabilityAggregators.cc @@ -47,7 +47,9 @@ bool deviation(double p, double& result) { boost::math::normal_distribution<> normal(0.0, 1.0); result = CTools::pow2(boost::math::quantile(normal, p / 2.0)); return true; - } catch (const std::exception& e) { LOG_ERROR(<< "Unable to compute quantile: " << e.what() << ", probability = " << p); } + } catch (const std::exception& e) { + LOG_ERROR(<< "Unable to compute quantile: " << e.what() << ", probability = " << p); + } return false; } @@ -78,8 +80,13 @@ class CNumericalLogProbabilityOfMFromNExtremeSamples { //! \param n The total number of samples. //! \param m The number of extreme samples. //! \param i The variable being integrated, i.e. \f$t_i\f$. - CLogIntegrand(const TDoubleVec& limits, const TDoubleVec& corrections, std::size_t n, std::size_t m, std::size_t i) - : m_Limits(&limits), m_Corrections(&corrections), m_N(n), m_M(m), m_I(i) {} + CLogIntegrand(const TDoubleVec& limits, + const TDoubleVec& corrections, + std::size_t n, + std::size_t m, + std::size_t i) + : m_Limits(&limits), m_Corrections(&corrections), m_N(n), m_M(m), + m_I(i) {} //! Wrapper around evaluate which adapts it for CIntegration::gaussLegendre. bool operator()(double x, double& result) const { @@ -95,7 +102,8 @@ class CNumericalLogProbabilityOfMFromNExtremeSamples { } double result; CLogIntegrand f(*m_Limits, *m_Corrections, m_N, m_M, m_I + 1u); - CIntegration::logGaussLegendre(f, x, (*m_Limits)[m_I], result); + CIntegration::logGaussLegendre( + f, x, (*m_Limits)[m_I], result); result += (*m_Corrections)[m_I]; return result; } @@ -114,7 +122,8 @@ class CNumericalLogProbabilityOfMFromNExtremeSamples { public: //! \param p The probabilities (in sorted order). //! \param n The total number of samples. - CNumericalLogProbabilityOfMFromNExtremeSamples(const TMinValueAccumulator& p, std::size_t n) : m_N(n) { + CNumericalLogProbabilityOfMFromNExtremeSamples(const TMinValueAccumulator& p, std::size_t n) + : m_N(n) { if (p.count() > 0) { // For large n the integral is dominated from the contributions // near the lowest probability. @@ -122,7 +131,10 @@ class CNumericalLogProbabilityOfMFromNExtremeSamples { m_Corrections.push_back(0.0); for (std::size_t i = 1u; i < std::min(p.count(), MAX_DIMENSION); ++i) { m_P.push_back(truncate(p[i], m_P[i - 1])); - m_Corrections.push_back(p[i] == p[i - 1] ? 0.0 : std::log(p[i] - p[i - 1]) - std::log(m_P[i] - m_P[i - 1])); + m_Corrections.push_back(p[i] == p[i - 1] + ? 0.0 + : std::log(p[i] - p[i - 1]) - + std::log(m_P[i] - m_P[i - 1])); } } } @@ -132,7 +144,8 @@ class CNumericalLogProbabilityOfMFromNExtremeSamples { double result; CLogIntegrand f(m_P, m_Corrections, m_N, m_P.size(), 1u); CIntegration::logGaussLegendre(f, 0, m_P[0], result); - result += boost::math::lgamma(static_cast(m_N) + 1.0) - boost::math::lgamma(static_cast(m_N - m_P.size()) + 1.0); + result += boost::math::lgamma(static_cast(m_N) + 1.0) - + boost::math::lgamma(static_cast(m_N - m_P.size()) + 1.0); return result; } @@ -156,7 +169,8 @@ const char DELIMITER(':'); //////// CJointProbabilityOfLessLikelySample Implementation //////// -CJointProbabilityOfLessLikelySamples::CJointProbabilityOfLessLikelySamples() : m_Distance(0.0), m_NumberSamples(0.0) { +CJointProbabilityOfLessLikelySamples::CJointProbabilityOfLessLikelySamples() + : m_Distance(0.0), m_NumberSamples(0.0) { } bool CJointProbabilityOfLessLikelySamples::fromDelimited(const std::string& value) { @@ -281,14 +295,14 @@ bool CJointProbabilityOfLessLikelySamples::calculate(double& result) const { try { result = boost::math::gamma_q(m_NumberSamples / 2.0, m_Distance / 2.0); } catch (const std::exception& e) { - LOG_ERROR(<< "Unable to compute probability: " << e.what() << ", m_NumberSamples = " << m_NumberSamples - << ", m_Distance = " << m_Distance); + LOG_ERROR(<< "Unable to compute probability: " << e.what() << ", m_NumberSamples = " + << m_NumberSamples << ", m_Distance = " << m_Distance); return false; } if (!(result >= 0.0 && result <= 1.0)) { - LOG_ERROR(<< "Invalid joint probability = " << result << ", m_NumberSamples = " << m_NumberSamples - << ", m_Distance = " << m_Distance); + LOG_ERROR(<< "Invalid joint probability = " << result << ", m_NumberSamples = " + << m_NumberSamples << ", m_Distance = " << m_Distance); } result = CTools::truncate(result, 0.0, 1.0); @@ -316,14 +330,14 @@ bool CJointProbabilityOfLessLikelySamples::averageProbability(double& result) co boost::math::normal_distribution<> normal(0.0, 1.0); result = 2.0 * boost::math::cdf(normal, -std::sqrt(m_Distance / m_NumberSamples)); } catch (const std::exception& e) { - LOG_ERROR(<< "Unable to compute probability: " << e.what() << ", m_NumberSamples = " << m_NumberSamples - << ", m_Distance = " << m_Distance); + LOG_ERROR(<< "Unable to compute probability: " << e.what() << ", m_NumberSamples = " + << m_NumberSamples << ", m_Distance = " << m_Distance); return false; } if (!(result >= 0.0 && result <= 1.0)) { - LOG_ERROR(<< "Invalid average probability = " << result << ", m_NumberSamples = " << m_NumberSamples - << ", m_Distance = " << m_Distance); + LOG_ERROR(<< "Invalid average probability = " << result << ", m_NumberSamples = " + << m_NumberSamples << ", m_Distance = " << m_Distance); } result = CTools::truncate(result, 0.0, 1.0); @@ -331,7 +345,8 @@ bool CJointProbabilityOfLessLikelySamples::averageProbability(double& result) co return true; } -CJointProbabilityOfLessLikelySamples::TOptionalDouble CJointProbabilityOfLessLikelySamples::onlyProbability() const { +CJointProbabilityOfLessLikelySamples::TOptionalDouble +CJointProbabilityOfLessLikelySamples::onlyProbability() const { return m_OnlyProbability; } @@ -353,12 +368,15 @@ std::ostream& CJointProbabilityOfLessLikelySamples::print(std::ostream& o) const return o << '(' << m_NumberSamples << ", " << m_Distance << ')'; } -std::ostream& operator<<(std::ostream& o, const CJointProbabilityOfLessLikelySamples& probability) { +std::ostream& operator<<(std::ostream& o, + const CJointProbabilityOfLessLikelySamples& probability) { return probability.print(o); } CJointProbabilityOfLessLikelySamples& CJointProbabilityOfLessLikelySamples::SAddProbability:: -operator()(CJointProbabilityOfLessLikelySamples& jointProbability, const double probability, const double weight) const { +operator()(CJointProbabilityOfLessLikelySamples& jointProbability, + const double probability, + const double weight) const { jointProbability.add(probability, weight); return jointProbability; } @@ -459,14 +477,16 @@ bool CLogJointProbabilityOfLessLikelySamples::calculateLowerBound(double& result // If upper incomplete gamma function doesn't underflow use the "exact" value // (we want 1 d.p. of precision). double probability; - if (this->calculate(probability) && probability > 10.0 * boost::numeric::bounds::smallest()) { + if (this->calculate(probability) && + probability > 10.0 * boost::numeric::bounds::smallest()) { LOG_TRACE(<< "probability = " << probability); result = std::log(probability); return true; } static const double E = boost::math::double_constants::e; - static const double LOG_DOUBLE_MAX = std::log(0.1 * boost::numeric::bounds::highest()); + static const double LOG_DOUBLE_MAX = + std::log(0.1 * boost::numeric::bounds::highest()); double s = this->numberSamples() / 2.0; double x = this->distance() / 2.0; @@ -487,7 +507,8 @@ bool CLogJointProbabilityOfLessLikelySamples::calculateLowerBound(double& result b1 = -1.0 - 0.5 * logm + m * (1.0 + logx - logm); } else if (E * x / m != 1.0) { double r = 1.0 - E * x / m; - b1 = -1.0 - 0.5 * logm + std::log(CTools::oneMinusPowOneMinusX(r, m + 1.0) / r); + b1 = -1.0 - 0.5 * logm + + std::log(CTools::oneMinusPowOneMinusX(r, m + 1.0) / r); } else { // Use L'Hopital's rule to show that: // lim { (1 - r^(m+1)) / (1 - r) } = m + 1 @@ -501,10 +522,12 @@ bool CLogJointProbabilityOfLessLikelySamples::calculateLowerBound(double& result double logp = std::log(p); if ((p - m) * (1.0 + logx - logp) >= LOG_DOUBLE_MAX) { // Handle the case that (e*x/p)^(p-m) overflows. - t = m + (m + 1.0) * logx - (m + 1.5) * logp + (p - m - 1.0) * (1.0 + logx - logp); + t = m + (m + 1.0) * logx - (m + 1.5) * logp + + (p - m - 1.0) * (1.0 + logx - logp); } else if (E * x / p != 1.0) { double r = 1.0 - E * x / p; - t = m + (m + 1.0) * logx - (m + 1.5) * logp + std::log(CTools::oneMinusPowOneMinusX(r, p - m) / r); + t = m + (m + 1.0) * logx - (m + 1.5) * logp + + std::log(CTools::oneMinusPowOneMinusX(r, p - m) / r); } else { // Use L'Hopital's rule to show that: // lim { (1 - r^(p - m)) / (1 - r) } = p - m @@ -525,9 +548,13 @@ bool CLogJointProbabilityOfLessLikelySamples::calculateLowerBound(double& result bound = (s - 1.0) * logx - x + logSum - boost::math::lgamma(s); - LOG_TRACE(<< "s = " << s << ", x = " << x << ", p = " << p << ", m = " << m << ", b1 = " << b1 << ", b2 = " << b2 + LOG_TRACE(<< "s = " << s << ", x = " << x << ", p = " << p + << ", m = " << m << ", b1 = " << b1 << ", b2 = " << b2 << ", log(sum) = " << logSum << ", bound = " << bound); - } catch (const std::exception& e) { LOG_ERROR(<< "Failed computing bound: " << e.what() << ", s = " << s << ", x = " << x); } + } catch (const std::exception& e) { + LOG_ERROR(<< "Failed computing bound: " << e.what() << ", s = " << s + << ", x = " << x); + } result = std::min(bound, 0.0); LOG_TRACE(<< "result = " << result); @@ -596,13 +623,15 @@ bool CLogJointProbabilityOfLessLikelySamples::calculateUpperBound(double& result // If upper incomplete gamma function likely isn't going to underflow // use the "exact" value. Note that we want 1 d.p. of precision. double probability; - if (this->calculate(probability) && probability > 10.0 * boost::numeric::bounds::smallest()) { + if (this->calculate(probability) && + probability > 10.0 * boost::numeric::bounds::smallest()) { LOG_TRACE(<< "probability = " << probability); result = std::log(probability); return true; } - static const double LOG_DOUBLE_MAX = std::log(0.10 * boost::numeric::bounds::highest()); + static const double LOG_DOUBLE_MAX = + std::log(0.10 * boost::numeric::bounds::highest()); double s = this->numberSamples() / 2.0; double x = this->distance() / 2.0; @@ -634,9 +663,12 @@ bool CLogJointProbabilityOfLessLikelySamples::calculateUpperBound(double& result bound = (s - 1.0) * std::log(x) - x + logSum - boost::math::lgamma(s); - LOG_TRACE(<< "s = " << s << ", x = " << x << ", b1 = " << b1 << ", b2 = " << b2 << ", log(sum) = " << logSum - << ", bound = " << bound); - } catch (const std::exception& e) { LOG_ERROR(<< "Failed computing bound: " << e.what() << ", s = " << s << ", x = " << x); } + LOG_TRACE(<< "s = " << s << ", x = " << x << ", b1 = " << b1 << ", b2 = " << b2 + << ", log(sum) = " << logSum << ", bound = " << bound); + } catch (const std::exception& e) { + LOG_ERROR(<< "Failed computing bound: " << e.what() << ", s = " << s + << ", x = " << x); + } result = std::min(bound, 0.0); LOG_TRACE(<< "result = " << result); @@ -646,7 +678,8 @@ bool CLogJointProbabilityOfLessLikelySamples::calculateUpperBound(double& result //////// CProbabilityOfExtremeSample Implementation //////// -CProbabilityOfExtremeSample::CProbabilityOfExtremeSample() : m_NumberSamples(0.0) { +CProbabilityOfExtremeSample::CProbabilityOfExtremeSample() + : m_NumberSamples(0.0) { } bool CProbabilityOfExtremeSample::fromDelimited(const std::string& value) { @@ -659,10 +692,12 @@ bool CProbabilityOfExtremeSample::fromDelimited(const std::string& value) { } std::string CProbabilityOfExtremeSample::toDelimited() const { - return core::CStringUtils::typeToString(m_NumberSamples) + DELIMITER + m_MinValue.toDelimited(); + return core::CStringUtils::typeToString(m_NumberSamples) + DELIMITER + + m_MinValue.toDelimited(); } -const CProbabilityOfExtremeSample& CProbabilityOfExtremeSample::operator+=(const CProbabilityOfExtremeSample& other) { +const CProbabilityOfExtremeSample& CProbabilityOfExtremeSample:: +operator+=(const CProbabilityOfExtremeSample& other) { m_MinValue += other.m_MinValue; m_NumberSamples += other.m_NumberSamples; return *this; @@ -677,7 +712,8 @@ bool CProbabilityOfExtremeSample::add(double probability, double weight) { bool CProbabilityOfExtremeSample::calculate(double& result) const { result = 1.0; if (m_NumberSamples > 0) { - result = CTools::truncate(CTools::oneMinusPowOneMinusX(m_MinValue[0], m_NumberSamples), 0.0, 1.0); + result = CTools::truncate( + CTools::oneMinusPowOneMinusX(m_MinValue[0], m_NumberSamples), 0.0, 1.0); } return true; } @@ -697,7 +733,8 @@ std::ostream& operator<<(std::ostream& o, const CProbabilityOfExtremeSample& pro //////// CProbabilityOfMFromNMostExtremeSamples Implementation //////// -CLogProbabilityOfMFromNExtremeSamples::CLogProbabilityOfMFromNExtremeSamples(std::size_t m) : m_MinValues(m), m_NumberSamples(0u) { +CLogProbabilityOfMFromNExtremeSamples::CLogProbabilityOfMFromNExtremeSamples(std::size_t m) + : m_MinValues(m), m_NumberSamples(0u) { } bool CLogProbabilityOfMFromNExtremeSamples::fromDelimited(const std::string& value) { @@ -710,7 +747,8 @@ bool CLogProbabilityOfMFromNExtremeSamples::fromDelimited(const std::string& val } std::string CLogProbabilityOfMFromNExtremeSamples::toDelimited() const { - return core::CStringUtils::typeToString(m_NumberSamples) + DELIMITER + m_MinValues.toDelimited(); + return core::CStringUtils::typeToString(m_NumberSamples) + DELIMITER + + m_MinValues.toDelimited(); } const CLogProbabilityOfMFromNExtremeSamples& CLogProbabilityOfMFromNExtremeSamples:: @@ -769,7 +807,8 @@ bool CLogProbabilityOfMFromNExtremeSamples::calculate(double& result) { m_MinValues.sort(); for (std::size_t i = 0u; i < M; ++i) { - m_MinValues[i] = CTools::truncate(m_MinValues[i], CTools::smallestProbability(), 1.0); + m_MinValues[i] = + CTools::truncate(m_MinValues[i], CTools::smallestProbability(), 1.0); } for (std::size_t m = 1u; m < M; ++m) { @@ -793,7 +832,8 @@ bool CLogProbabilityOfMFromNExtremeSamples::calculate(double& result) { // that the following calculation can't use the re-normalized // "c" directly because it might be infinite. Instead, we make // use the fact that c * (1 - p)^(N - M + m) won't overflow. - double q = CTools::truncate(CTools::powOneMinusX(p, static_cast(N - M + m)), 0.0, 1.0); + double q = CTools::truncate( + CTools::powOneMinusX(p, static_cast(N - M + m)), 0.0, 1.0); coeffs.push_back(-sum - q * std::exp(logc - logLargestCoeff)); LOG_TRACE(<< "c(0) = " << coeffs.back()); @@ -835,7 +875,8 @@ bool CLogProbabilityOfMFromNExtremeSamples::calculate(double& result) { LOG_TRACE(<< "1 - (1 - p(" << M << "))^" << N << " = " << pMin); if (M > 1) { - double logScale = static_cast(M) * std::log(2.0) + boost::math::lgamma(static_cast(N + 1)) - + double logScale = static_cast(M) * std::log(2.0) + + boost::math::lgamma(static_cast(N + 1)) - boost::math::lgamma(static_cast(N - M + 1)) + logLargestCoeff; LOG_TRACE(<< "log(scale) = " << logScale); @@ -848,8 +889,9 @@ bool CLogProbabilityOfMFromNExtremeSamples::calculate(double& result) { double index = static_cast(coeffs.size() - i); double c = coeffs[i] / index; double p = CTools::oneMinusPowOneMinusX(pM / 2.0, index); - LOG_TRACE(<< "term(" << index << ") = " << (c * p) << " (c(" << index << ") = " << c << ", 1 - (1 - p(M)/2)^" << index << " = " - << p << ")"); + LOG_TRACE(<< "term(" << index << ") = " << (c * p) << " (c(" + << index << ") = " << c << ", 1 - (1 - p(M)/2)^" << index + << " = " << p << ")"); terms.push_back(c * p); sum += std::fabs(c * p); (c * p < 0.0 ? negative : positive) += std::fabs(c * p); @@ -887,8 +929,11 @@ bool CLogProbabilityOfMFromNExtremeSamples::calculate(double& result) { for (std::size_t i = 0u; i < terms.size(); ++i) { double remainder = std::fabs(terms[i]) * pMin / sum + terms[i]; result += remainder; - double absTerms[] = {std::fabs(terms[i]), std::fabs(terms[i] * pMin / sum), std::fabs(remainder)}; - condition = std::max(condition, *std::max_element(absTerms, absTerms + 3)); + double absTerms[] = {std::fabs(terms[i]), + std::fabs(terms[i] * pMin / sum), + std::fabs(remainder)}; + condition = std::max( + condition, *std::max_element(absTerms, absTerms + 3)); } } @@ -922,9 +967,11 @@ bool CLogProbabilityOfMFromNExtremeSamples::calculate(double& result) { minValues << " " << m_MinValues[j]; } minValues << "]"; - LOG_ERROR(<< "Invalid log(extreme probability) = " << result << ", m_NumberSamples = " << m_NumberSamples - << ", m_MinValues = " << minValues.str() << ", coeffs = " << core::CContainerPrinter::print(coeffs) - << ", log(max{coeffs}) = " << logLargestCoeff << ", pM = " << pM << ", pMin = " << pMin); + LOG_ERROR(<< "Invalid log(extreme probability) = " << result << ", m_NumberSamples = " + << m_NumberSamples << ", m_MinValues = " << minValues.str() + << ", coeffs = " << core::CContainerPrinter::print(coeffs) + << ", log(max{coeffs}) = " << logLargestCoeff + << ", pM = " << pM << ", pMin = " << pMin); result = 0.0; } else { break; diff --git a/lib/maths/unittest/CAgglomerativeClustererTest.cc b/lib/maths/unittest/CAgglomerativeClustererTest.cc index c927acf812..22e4e24d14 100644 --- a/lib/maths/unittest/CAgglomerativeClustererTest.cc +++ b/lib/maths/unittest/CAgglomerativeClustererTest.cc @@ -41,13 +41,17 @@ class CCluster { CCluster result; result.m_Height = height; result.m_Points.reserve(lhs.m_Points.size() + rhs.m_Points.size()); - result.m_Points.insert(result.m_Points.end(), lhs.m_Points.begin(), lhs.m_Points.end()); - result.m_Points.insert(result.m_Points.end(), rhs.m_Points.begin(), rhs.m_Points.end()); + result.m_Points.insert(result.m_Points.end(), lhs.m_Points.begin(), + lhs.m_Points.end()); + result.m_Points.insert(result.m_Points.end(), rhs.m_Points.begin(), + rhs.m_Points.end()); std::sort(result.m_Points.begin(), result.m_Points.end()); return result; } - void add(TDoubleSizeVecPrVec& result) { result.push_back(TDoubleSizeVecPr(m_Height, m_Points)); } + void add(TDoubleSizeVecPrVec& result) { + result.push_back(TDoubleSizeVecPr(m_Height, m_Points)); + } const TSizeVec& points() const { return m_Points; } @@ -63,7 +67,8 @@ using TClusterVec = std::vector; class CSlinkObjective { public: - CSlinkObjective(const TDoubleVecVec& distanceMatrix) : m_DistanceMatrix(&distanceMatrix) {} + CSlinkObjective(const TDoubleVecVec& distanceMatrix) + : m_DistanceMatrix(&distanceMatrix) {} double operator()(const CCluster& lhs, const CCluster& rhs) { double result = std::numeric_limits::max(); @@ -88,7 +93,8 @@ class CSlinkObjective { class CClinkObjective { public: - CClinkObjective(const TDoubleVecVec& distanceMatrix) : m_DistanceMatrix(&distanceMatrix) {} + CClinkObjective(const TDoubleVecVec& distanceMatrix) + : m_DistanceMatrix(&distanceMatrix) {} double operator()(const CCluster& lhs, const CCluster& rhs) { double result = -std::numeric_limits::max(); @@ -182,15 +188,16 @@ void CAgglomerativeClustererTest::testNode() { double heights[] = {0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.5, 1.9, 4.0}; - maths::CAgglomerativeClusterer::CNode nodes[] = {maths::CAgglomerativeClusterer::CNode(0, heights[0]), - maths::CAgglomerativeClusterer::CNode(1, heights[1]), - maths::CAgglomerativeClusterer::CNode(2, heights[2]), - maths::CAgglomerativeClusterer::CNode(3, heights[3]), - maths::CAgglomerativeClusterer::CNode(4, heights[4]), - maths::CAgglomerativeClusterer::CNode(5, heights[5]), - maths::CAgglomerativeClusterer::CNode(6, heights[6]), - maths::CAgglomerativeClusterer::CNode(7, heights[7]), - maths::CAgglomerativeClusterer::CNode(8, heights[8])}; + maths::CAgglomerativeClusterer::CNode nodes[] = { + maths::CAgglomerativeClusterer::CNode(0, heights[0]), + maths::CAgglomerativeClusterer::CNode(1, heights[1]), + maths::CAgglomerativeClusterer::CNode(2, heights[2]), + maths::CAgglomerativeClusterer::CNode(3, heights[3]), + maths::CAgglomerativeClusterer::CNode(4, heights[4]), + maths::CAgglomerativeClusterer::CNode(5, heights[5]), + maths::CAgglomerativeClusterer::CNode(6, heights[6]), + maths::CAgglomerativeClusterer::CNode(7, heights[7]), + maths::CAgglomerativeClusterer::CNode(8, heights[8])}; nodes[5].addChild(nodes[0]); nodes[5].addChild(nodes[1]); @@ -212,7 +219,8 @@ void CAgglomerativeClustererTest::testNode() { TSizeVec points; root.points(points); std::sort(points.begin(), points.end()); - CPPUNIT_ASSERT_EQUAL(std::string("[0, 1, 2, 3, 4]"), core::CContainerPrinter::print(points)); + CPPUNIT_ASSERT_EQUAL(std::string("[0, 1, 2, 3, 4]"), + core::CContainerPrinter::print(points)); points.clear(); nodes[7].points(points); @@ -235,7 +243,8 @@ void CAgglomerativeClustererTest::testNode() { TSizeVecVec clusters; root.clusteringAt(heights[h], clusters); std::sort(clusters.begin(), clusters.end()); - LOG_DEBUG(<< "Clusters at " << heights[h] << " are " << core::CContainerPrinter::print(clusters)); + LOG_DEBUG(<< "Clusters at " << heights[h] << " are " + << core::CContainerPrinter::print(clusters)); CPPUNIT_ASSERT_EQUAL(expected[h - 5], core::CContainerPrinter::print(clusters)); } } @@ -248,11 +257,12 @@ void CAgglomerativeClustererTest::testSimplePermutations() { double x[] = {1.0, 3.2, 4.5, 7.8}; std::size_t n = boost::size(x); - maths::CAgglomerativeClusterer::EObjective objectives[] = {maths::CAgglomerativeClusterer::E_Single, - maths::CAgglomerativeClusterer::E_Complete}; + maths::CAgglomerativeClusterer::EObjective objectives[] = { + maths::CAgglomerativeClusterer::E_Single, maths::CAgglomerativeClusterer::E_Complete}; - std::string expected[] = {std::string("[(3.3, [0, 1, 2, 3]), (2.2, [0, 1, 2]), (1.3, [1, 2])]"), - std::string("[(6.8, [0, 1, 2, 3]), (3.5, [0, 1, 2]), (1.3, [1, 2])]")}; + std::string expected[] = { + std::string("[(3.3, [0, 1, 2, 3]), (2.2, [0, 1, 2]), (1.3, [1, 2])]"), + std::string("[(6.8, [0, 1, 2, 3]), (3.5, [0, 1, 2]), (1.3, [1, 2])]")}; for (std::size_t o = 0u; o < boost::size(objectives); ++o) { LOG_DEBUG(<< "****** " << print(objectives[o]) << " ******"); @@ -303,8 +313,8 @@ void CAgglomerativeClustererTest::testDegenerate() { double x[] = {1.0, 3.2, 3.2, 3.2, 4.5, 7.8}; std::size_t n = boost::size(x); - maths::CAgglomerativeClusterer::EObjective objectives[] = {maths::CAgglomerativeClusterer::E_Single, - maths::CAgglomerativeClusterer::E_Complete}; + maths::CAgglomerativeClusterer::EObjective objectives[] = { + maths::CAgglomerativeClusterer::E_Single, maths::CAgglomerativeClusterer::E_Complete}; std::string expected[][3] = { {std::string("[(3.3, [0, 1, 2, 3, 4, 5]), (2.2, [0, 1, 2, 3, 4]), (1.3, [1, 2, 3, 4]), (0, [1, 2, 3]), (0, [1, 2])]"), @@ -330,7 +340,8 @@ void CAgglomerativeClustererTest::testDegenerate() { distanceMatrix[j].push_back(std::fabs(x[p[i]] - x[p[j]])); } if (count % 10 == 0) { - LOG_DEBUG(<< "D = " << core::CContainerPrinter::print(distanceMatrix[i])); + LOG_DEBUG(<< "D = " + << core::CContainerPrinter::print(distanceMatrix[i])); } } @@ -344,7 +355,8 @@ void CAgglomerativeClustererTest::testDegenerate() { tree.back().clusters(clusters); if (count % 10 == 0) { - LOG_DEBUG(<< "clusters = " << core::CContainerPrinter::print(clusters)); + LOG_DEBUG(<< "clusters = " + << core::CContainerPrinter::print(clusters)); } for (std::size_t i = 0u; i < clusters.size(); ++i) { @@ -355,7 +367,8 @@ void CAgglomerativeClustererTest::testDegenerate() { } if (count % 10 == 0) { - LOG_DEBUG(<< "canonical clusters = " << core::CContainerPrinter::print(clusters)); + LOG_DEBUG(<< "canonical clusters = " + << core::CContainerPrinter::print(clusters)); } CPPUNIT_ASSERT(expected[o][0] == core::CContainerPrinter::print(clusters) || @@ -375,8 +388,8 @@ void CAgglomerativeClustererTest::testRandom() { std::size_t n = 20u; - maths::CAgglomerativeClusterer::EObjective objectives[] = {maths::CAgglomerativeClusterer::E_Single, - maths::CAgglomerativeClusterer::E_Complete}; + maths::CAgglomerativeClusterer::EObjective objectives[] = { + maths::CAgglomerativeClusterer::E_Single, maths::CAgglomerativeClusterer::E_Complete}; for (std::size_t o = 0u; o < boost::size(objectives); ++o) { LOG_DEBUG(<< "*** " << print(objectives[o]) << " ***"); @@ -416,7 +429,8 @@ void CAgglomerativeClustererTest::testRandom() { } std::sort(expectedClusters.begin(), expectedClusters.end()); - LOG_DEBUG(<< "expected clusters = " << core::CContainerPrinter::print(expectedClusters)); + LOG_DEBUG(<< "expected clusters = " + << core::CContainerPrinter::print(expectedClusters)); maths::CAgglomerativeClusterer clusterer; CPPUNIT_ASSERT(clusterer.initialize(distanceMatrix)); @@ -433,7 +447,8 @@ void CAgglomerativeClustererTest::testRandom() { LOG_DEBUG(<< "clusters = " << core::CContainerPrinter::print(clusters)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedClusters), core::CContainerPrinter::print(clusters)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedClusters), + core::CContainerPrinter::print(clusters)); } } } @@ -441,14 +456,16 @@ void CAgglomerativeClustererTest::testRandom() { CppUnit::Test* CAgglomerativeClustererTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CAgglomerativeClustererTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CAgglomerativeClustererTest::testNode", - &CAgglomerativeClustererTest::testNode)); - suiteOfTests->addTest(new CppUnit::TestCaller("CAgglomerativeClustererTest::testSimplePermutations", - &CAgglomerativeClustererTest::testSimplePermutations)); - suiteOfTests->addTest(new CppUnit::TestCaller("CAgglomerativeClustererTest::testDegenerate", - &CAgglomerativeClustererTest::testDegenerate)); - suiteOfTests->addTest(new CppUnit::TestCaller("CAgglomerativeClustererTest::testRandom", - &CAgglomerativeClustererTest::testRandom)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CAgglomerativeClustererTest::testNode", &CAgglomerativeClustererTest::testNode)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CAgglomerativeClustererTest::testSimplePermutations", + &CAgglomerativeClustererTest::testSimplePermutations)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CAgglomerativeClustererTest::testDegenerate", + &CAgglomerativeClustererTest::testDegenerate)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CAgglomerativeClustererTest::testRandom", &CAgglomerativeClustererTest::testRandom)); return suiteOfTests; } diff --git a/lib/maths/unittest/CAssignmentTest.cc b/lib/maths/unittest/CAssignmentTest.cc index fc4c8351d1..bfbc59a980 100644 --- a/lib/maths/unittest/CAssignmentTest.cc +++ b/lib/maths/unittest/CAssignmentTest.cc @@ -40,7 +40,8 @@ void fill(const double (&costs)[N][M], TDoubleVecVec& result) { } void fill(const TDoubleVec& costs, TDoubleVecVec& result) { - std::size_t n = static_cast(std::sqrt(static_cast(costs.size()))); + std::size_t n = + static_cast(std::sqrt(static_cast(costs.size()))); result.reserve(n); for (std::size_t i = 0u; i < n; ++i) { result.push_back(TDoubleVec()); @@ -122,8 +123,11 @@ void CAssignmentTest::testKuhnMunkres() { } { LOG_DEBUG(<< "test 3: 5x4"); - const double test3[][4] = { - {2.0, 1.0, 1.0, 2.0}, {1.0, 2.0, 2.0, 2.0}, {2.0, 2.0, 2.0, 1.0}, {1.0, 1.0, 2.0, 2.0}, {2.0, 2.0, 2.0, 2.0}}; + const double test3[][4] = {{2.0, 1.0, 1.0, 2.0}, + {1.0, 2.0, 2.0, 2.0}, + {2.0, 2.0, 2.0, 1.0}, + {1.0, 1.0, 2.0, 2.0}, + {2.0, 2.0, 2.0, 2.0}}; TDoubleVecVec costs; fill(test3, costs); @@ -221,7 +225,8 @@ void CAssignmentTest::testKuhnMunkres() { lowestRandomCost = std::min(lowestRandomCost, cost); } - LOG_DEBUG(<< "optimal cost = " << optimalCost << ", lowest random cost = " << lowestRandomCost); + LOG_DEBUG(<< "optimal cost = " << optimalCost + << ", lowest random cost = " << lowestRandomCost); CPPUNIT_ASSERT(lowestRandomCost >= optimalCost); // Check adding higher cost row has no effect. @@ -234,21 +239,22 @@ void CAssignmentTest::testKuhnMunkres() { { LOG_DEBUG(<< "test 7: euler 345"); - const double euler345[][15] = {{7, 53, 183, 439, 863, 497, 383, 563, 79, 973, 287, 63, 343, 169, 583}, - {627, 343, 773, 959, 943, 767, 473, 103, 699, 303, 957, 703, 583, 639, 913}, - {447, 283, 463, 29, 23, 487, 463, 993, 119, 883, 327, 493, 423, 159, 743}, - {217, 623, 3, 399, 853, 407, 103, 983, 89, 463, 290, 516, 212, 462, 350}, - {960, 376, 682, 962, 300, 780, 486, 502, 912, 800, 250, 346, 172, 812, 350}, - {870, 456, 192, 162, 593, 473, 915, 45, 989, 873, 823, 965, 425, 329, 803}, - {973, 965, 905, 919, 133, 673, 665, 235, 509, 613, 673, 815, 165, 992, 326}, - {322, 148, 972, 962, 286, 255, 941, 541, 265, 323, 925, 281, 601, 95, 973}, - {445, 721, 11, 525, 473, 65, 511, 164, 138, 672, 18, 428, 154, 448, 848}, - {414, 456, 310, 312, 798, 104, 566, 520, 302, 248, 694, 976, 430, 392, 198}, - {184, 829, 373, 181, 631, 101, 969, 613, 840, 740, 778, 458, 284, 760, 390}, - {821, 461, 843, 513, 17, 901, 711, 993, 293, 157, 274, 94, 192, 156, 574}, - {34, 124, 4, 878, 450, 476, 712, 914, 838, 669, 875, 299, 823, 329, 699}, - {815, 559, 813, 459, 522, 788, 168, 586, 966, 232, 308, 833, 251, 631, 107}, - {813, 883, 451, 509, 615, 77, 281, 613, 459, 205, 380, 274, 302, 35, 805}}; + const double euler345[][15] = { + {7, 53, 183, 439, 863, 497, 383, 563, 79, 973, 287, 63, 343, 169, 583}, + {627, 343, 773, 959, 943, 767, 473, 103, 699, 303, 957, 703, 583, 639, 913}, + {447, 283, 463, 29, 23, 487, 463, 993, 119, 883, 327, 493, 423, 159, 743}, + {217, 623, 3, 399, 853, 407, 103, 983, 89, 463, 290, 516, 212, 462, 350}, + {960, 376, 682, 962, 300, 780, 486, 502, 912, 800, 250, 346, 172, 812, 350}, + {870, 456, 192, 162, 593, 473, 915, 45, 989, 873, 823, 965, 425, 329, 803}, + {973, 965, 905, 919, 133, 673, 665, 235, 509, 613, 673, 815, 165, 992, 326}, + {322, 148, 972, 962, 286, 255, 941, 541, 265, 323, 925, 281, 601, 95, 973}, + {445, 721, 11, 525, 473, 65, 511, 164, 138, 672, 18, 428, 154, 448, 848}, + {414, 456, 310, 312, 798, 104, 566, 520, 302, 248, 694, 976, 430, 392, 198}, + {184, 829, 373, 181, 631, 101, 969, 613, 840, 740, 778, 458, 284, 760, 390}, + {821, 461, 843, 513, 17, 901, 711, 993, 293, 157, 274, 94, 192, 156, 574}, + {34, 124, 4, 878, 450, 476, 712, 914, 838, 669, 875, 299, 823, 329, 699}, + {815, 559, 813, 459, 522, 788, 168, 586, 966, 232, 308, 833, 251, 631, 107}, + {813, 883, 451, 509, 615, 77, 281, 613, 459, 205, 380, 274, 302, 35, 805}}; TDoubleVecVec costs; fill(euler345, costs); for (std::size_t i = 0u; i < costs.size(); ++i) { @@ -267,7 +273,8 @@ void CAssignmentTest::testKuhnMunkres() { CppUnit::Test* CAssignmentTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CAssignmentTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CAssignmentTest::testKuhnMunkres", &CAssignmentTest::testKuhnMunkres)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CAssignmentTest::testKuhnMunkres", &CAssignmentTest::testKuhnMunkres)); return suiteOfTests; } diff --git a/lib/maths/unittest/CBasicStatisticsTest.cc b/lib/maths/unittest/CBasicStatisticsTest.cc index 186876a9a6..944f7106b7 100644 --- a/lib/maths/unittest/CBasicStatisticsTest.cc +++ b/lib/maths/unittest/CBasicStatisticsTest.cc @@ -29,7 +29,8 @@ namespace { using TMeanAccumulator = ml::maths::CBasicStatistics::SSampleMean::TAccumulator; using TMeanVarAccumulator = ml::maths::CBasicStatistics::SSampleMeanVar::TAccumulator; -using TMeanVarSkewAccumulator = ml::maths::CBasicStatistics::SSampleMeanVarSkew::TAccumulator; +using TMeanVarSkewAccumulator = + ml::maths::CBasicStatistics::SSampleMeanVarSkew::TAccumulator; using TMeanAccumulator2Vec = ml::core::CSmallVector; using TMeanVarAccumulator2Vec = ml::core::CSmallVector; using TMeanVarSkewAccumulator2Vec = ml::core::CSmallVector; @@ -57,21 +58,24 @@ struct SRestore { CppUnit::Test* CBasicStatisticsTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CBasicStatisticsTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CBasicStatisticsTest::testMean", &CBasicStatisticsTest::testMean)); - suiteOfTests->addTest(new CppUnit::TestCaller("CBasicStatisticsTest::testCentralMoments", - &CBasicStatisticsTest::testCentralMoments)); - suiteOfTests->addTest(new CppUnit::TestCaller("CBasicStatisticsTest::testVectorCentralMoments", - &CBasicStatisticsTest::testVectorCentralMoments)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CBasicStatisticsTest::testCovariances", &CBasicStatisticsTest::testCovariances)); - suiteOfTests->addTest(new CppUnit::TestCaller("CBasicStatisticsTest::testCovariancesLedoitWolf", - &CBasicStatisticsTest::testCovariancesLedoitWolf)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CBasicStatisticsTest::testMedian", &CBasicStatisticsTest::testMedian)); - suiteOfTests->addTest(new CppUnit::TestCaller("CBasicStatisticsTest::testOrderStatistics", - &CBasicStatisticsTest::testOrderStatistics)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CBasicStatisticsTest::testMinMax", &CBasicStatisticsTest::testMinMax)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CBasicStatisticsTest::testMean", &CBasicStatisticsTest::testMean)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CBasicStatisticsTest::testCentralMoments", &CBasicStatisticsTest::testCentralMoments)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CBasicStatisticsTest::testVectorCentralMoments", + &CBasicStatisticsTest::testVectorCentralMoments)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CBasicStatisticsTest::testCovariances", &CBasicStatisticsTest::testCovariances)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CBasicStatisticsTest::testCovariancesLedoitWolf", + &CBasicStatisticsTest::testCovariancesLedoitWolf)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CBasicStatisticsTest::testMedian", &CBasicStatisticsTest::testMedian)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CBasicStatisticsTest::testOrderStatistics", &CBasicStatisticsTest::testOrderStatistics)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CBasicStatisticsTest::testMinMax", &CBasicStatisticsTest::testMinMax)); return suiteOfTests; } @@ -83,7 +87,8 @@ void CBasicStatisticsTest::testMean() { double sample[] = {0.9, 10.0, 5.6, 1.23, -12.3, 445.2, 0.0, 1.2}; - ml::maths::CBasicStatistics::TDoubleVec sampleVec(sample, sample + sizeof(sample) / sizeof(sample[0])); + ml::maths::CBasicStatistics::TDoubleVec sampleVec( + sample, sample + sizeof(sample) / sizeof(sample[0])); double mean = ml::maths::CBasicStatistics::mean(sampleVec); @@ -106,7 +111,8 @@ void CBasicStatisticsTest::testCentralMoments() { size_t count = sizeof(samples) / sizeof(samples[0]); acc = std::for_each(samples, samples + count, acc); - CPPUNIT_ASSERT_EQUAL(count, static_cast(ml::maths::CBasicStatistics::count(acc))); + CPPUNIT_ASSERT_EQUAL( + count, static_cast(ml::maths::CBasicStatistics::count(acc))); CPPUNIT_ASSERT_DOUBLES_EQUAL(1.72875, ml::maths::CBasicStatistics::mean(acc), 0.000005); @@ -127,7 +133,8 @@ void CBasicStatisticsTest::testCentralMoments() { size_t count = sizeof(samples) / sizeof(samples[0]); acc = std::for_each(samples, samples + count, acc); - CPPUNIT_ASSERT_EQUAL(count, static_cast(ml::maths::CBasicStatistics::count(acc))); + CPPUNIT_ASSERT_EQUAL( + count, static_cast(ml::maths::CBasicStatistics::count(acc))); CPPUNIT_ASSERT_DOUBLES_EQUAL(1.72875, ml::maths::CBasicStatistics::mean(acc), 0.000005); } @@ -141,11 +148,13 @@ void CBasicStatisticsTest::testCentralMoments() { size_t count = sizeof(samples) / sizeof(samples[0]); acc = std::for_each(samples, samples + count, acc); - CPPUNIT_ASSERT_EQUAL(count, static_cast(ml::maths::CBasicStatistics::count(acc))); + CPPUNIT_ASSERT_EQUAL( + count, static_cast(ml::maths::CBasicStatistics::count(acc))); CPPUNIT_ASSERT_DOUBLES_EQUAL(1.72875, ml::maths::CBasicStatistics::mean(acc), 0.000005); - CPPUNIT_ASSERT_DOUBLES_EQUAL(44.90633, ml::maths::CBasicStatistics::variance(acc), 0.000005); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + 44.90633, ml::maths::CBasicStatistics::variance(acc), 0.000005); double n0 = ml::maths::CBasicStatistics::count(acc); ml::maths::CBasicStatistics::scale(0.5, acc); @@ -162,13 +171,16 @@ void CBasicStatisticsTest::testCentralMoments() { size_t count = sizeof(samples) / sizeof(samples[0]); acc = std::for_each(samples, samples + count, acc); - CPPUNIT_ASSERT_EQUAL(count, static_cast(ml::maths::CBasicStatistics::count(acc))); + CPPUNIT_ASSERT_EQUAL( + count, static_cast(ml::maths::CBasicStatistics::count(acc))); CPPUNIT_ASSERT_DOUBLES_EQUAL(1.72875, ml::maths::CBasicStatistics::mean(acc), 0.000005); - CPPUNIT_ASSERT_DOUBLES_EQUAL(44.90633, ml::maths::CBasicStatistics::variance(acc), 0.000005); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + 44.90633, ml::maths::CBasicStatistics::variance(acc), 0.000005); - CPPUNIT_ASSERT_DOUBLES_EQUAL(-0.82216, ml::maths::CBasicStatistics::skewness(acc), 0.000005); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + -0.82216, ml::maths::CBasicStatistics::skewness(acc), 0.000005); double n0 = ml::maths::CBasicStatistics::count(acc); ml::maths::CBasicStatistics::scale(0.5, acc); @@ -192,7 +204,8 @@ void CBasicStatisticsTest::testCentralMoments() { } } - CPPUNIT_ASSERT_DOUBLES_EQUAL(ml::maths::CBasicStatistics::mean(acc1), ml::maths::CBasicStatistics::mean(acc2), 1e-10); + CPPUNIT_ASSERT_DOUBLES_EQUAL(ml::maths::CBasicStatistics::mean(acc1), + ml::maths::CBasicStatistics::mean(acc2), 1e-10); } { @@ -206,8 +219,10 @@ void CBasicStatisticsTest::testCentralMoments() { } } - CPPUNIT_ASSERT_DOUBLES_EQUAL(ml::maths::CBasicStatistics::mean(acc1), ml::maths::CBasicStatistics::mean(acc2), 1e-10); - CPPUNIT_ASSERT_DOUBLES_EQUAL(ml::maths::CBasicStatistics::variance(acc1), ml::maths::CBasicStatistics::variance(acc2), 1e-10); + CPPUNIT_ASSERT_DOUBLES_EQUAL(ml::maths::CBasicStatistics::mean(acc1), + ml::maths::CBasicStatistics::mean(acc2), 1e-10); + CPPUNIT_ASSERT_DOUBLES_EQUAL(ml::maths::CBasicStatistics::variance(acc1), + ml::maths::CBasicStatistics::variance(acc2), 1e-10); } { @@ -221,9 +236,12 @@ void CBasicStatisticsTest::testCentralMoments() { } } - CPPUNIT_ASSERT_DOUBLES_EQUAL(ml::maths::CBasicStatistics::mean(acc1), ml::maths::CBasicStatistics::mean(acc2), 1e-10); - CPPUNIT_ASSERT_DOUBLES_EQUAL(ml::maths::CBasicStatistics::variance(acc1), ml::maths::CBasicStatistics::variance(acc2), 1e-10); - CPPUNIT_ASSERT_DOUBLES_EQUAL(ml::maths::CBasicStatistics::skewness(acc1), ml::maths::CBasicStatistics::skewness(acc2), 1e-10); + CPPUNIT_ASSERT_DOUBLES_EQUAL(ml::maths::CBasicStatistics::mean(acc1), + ml::maths::CBasicStatistics::mean(acc2), 1e-10); + CPPUNIT_ASSERT_DOUBLES_EQUAL(ml::maths::CBasicStatistics::variance(acc1), + ml::maths::CBasicStatistics::variance(acc2), 1e-10); + CPPUNIT_ASSERT_DOUBLES_EQUAL(ml::maths::CBasicStatistics::skewness(acc1), + ml::maths::CBasicStatistics::skewness(acc2), 1e-10); } } @@ -243,9 +261,12 @@ void CBasicStatisticsTest::testCentralMoments() { acc1 = std::for_each(samples1, samples1 + count1, acc1); acc2 = std::for_each(samples2, samples2 + count2, acc2); - CPPUNIT_ASSERT_EQUAL(count1 + count2, static_cast(ml::maths::CBasicStatistics::count(acc1 + acc2))); + CPPUNIT_ASSERT_EQUAL( + count1 + count2, + static_cast(ml::maths::CBasicStatistics::count(acc1 + acc2))); - CPPUNIT_ASSERT_DOUBLES_EQUAL(1.72875, ml::maths::CBasicStatistics::mean(acc1 + acc2), 0.000005); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + 1.72875, ml::maths::CBasicStatistics::mean(acc1 + acc2), 0.000005); } { @@ -255,11 +276,15 @@ void CBasicStatisticsTest::testCentralMoments() { acc1 = std::for_each(samples1, samples1 + count1, acc1); acc2 = std::for_each(samples2, samples2 + count2, acc2); - CPPUNIT_ASSERT_EQUAL(count1 + count2, static_cast(ml::maths::CBasicStatistics::count(acc1 + acc2))); + CPPUNIT_ASSERT_EQUAL( + count1 + count2, + static_cast(ml::maths::CBasicStatistics::count(acc1 + acc2))); - CPPUNIT_ASSERT_DOUBLES_EQUAL(1.72875, ml::maths::CBasicStatistics::mean(acc1 + acc2), 0.000005); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + 1.72875, ml::maths::CBasicStatistics::mean(acc1 + acc2), 0.000005); - CPPUNIT_ASSERT_DOUBLES_EQUAL(44.90633, ml::maths::CBasicStatistics::variance(acc1 + acc2), 0.000005); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + 44.90633, ml::maths::CBasicStatistics::variance(acc1 + acc2), 0.000005); } { @@ -269,13 +294,18 @@ void CBasicStatisticsTest::testCentralMoments() { acc1 = std::for_each(samples1, samples1 + count1, acc1); acc2 = std::for_each(samples2, samples2 + count2, acc2); - CPPUNIT_ASSERT_EQUAL(count1 + count2, static_cast(ml::maths::CBasicStatistics::count(acc1 + acc2))); + CPPUNIT_ASSERT_EQUAL( + count1 + count2, + static_cast(ml::maths::CBasicStatistics::count(acc1 + acc2))); - CPPUNIT_ASSERT_DOUBLES_EQUAL(1.72875, ml::maths::CBasicStatistics::mean(acc1 + acc2), 0.000005); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + 1.72875, ml::maths::CBasicStatistics::mean(acc1 + acc2), 0.000005); - CPPUNIT_ASSERT_DOUBLES_EQUAL(44.90633, ml::maths::CBasicStatistics::variance(acc1 + acc2), 0.000005); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + 44.90633, ml::maths::CBasicStatistics::variance(acc1 + acc2), 0.000005); - CPPUNIT_ASSERT_DOUBLES_EQUAL(-0.82216, ml::maths::CBasicStatistics::skewness(acc1 + acc2), 0.000005); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + -0.82216, ml::maths::CBasicStatistics::skewness(acc1 + acc2), 0.000005); } } @@ -303,10 +333,16 @@ void CBasicStatisticsTest::testCentralMoments() { TMeanAccumulator sum = acc1 + acc2; - CPPUNIT_ASSERT_EQUAL(ml::maths::CBasicStatistics::count(acc1), ml::maths::CBasicStatistics::count(sum - acc2)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(ml::maths::CBasicStatistics::mean(acc1), ml::maths::CBasicStatistics::mean(sum - acc2), 1e-10); - CPPUNIT_ASSERT_EQUAL(ml::maths::CBasicStatistics::count(acc2), ml::maths::CBasicStatistics::count(sum - acc1)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(ml::maths::CBasicStatistics::mean(acc2), ml::maths::CBasicStatistics::mean(sum - acc1), 1e-10); + CPPUNIT_ASSERT_EQUAL(ml::maths::CBasicStatistics::count(acc1), + ml::maths::CBasicStatistics::count(sum - acc2)); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + ml::maths::CBasicStatistics::mean(acc1), + ml::maths::CBasicStatistics::mean(sum - acc2), 1e-10); + CPPUNIT_ASSERT_EQUAL(ml::maths::CBasicStatistics::count(acc2), + ml::maths::CBasicStatistics::count(sum - acc1)); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + ml::maths::CBasicStatistics::mean(acc2), + ml::maths::CBasicStatistics::mean(sum - acc1), 1e-10); } } LOG_DEBUG(<< "Test mean and variance"); @@ -329,14 +365,22 @@ void CBasicStatisticsTest::testCentralMoments() { TMeanVarAccumulator sum = acc1 + acc2; - CPPUNIT_ASSERT_EQUAL(ml::maths::CBasicStatistics::count(acc1), ml::maths::CBasicStatistics::count(sum - acc2)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(ml::maths::CBasicStatistics::mean(acc1), ml::maths::CBasicStatistics::mean(sum - acc2), 1e-10); + CPPUNIT_ASSERT_EQUAL(ml::maths::CBasicStatistics::count(acc1), + ml::maths::CBasicStatistics::count(sum - acc2)); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + ml::maths::CBasicStatistics::mean(acc1), + ml::maths::CBasicStatistics::mean(sum - acc2), 1e-10); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + ml::maths::CBasicStatistics::variance(acc1), + ml::maths::CBasicStatistics::variance(sum - acc2), 1e-10); + CPPUNIT_ASSERT_EQUAL(ml::maths::CBasicStatistics::count(acc2), + ml::maths::CBasicStatistics::count(sum - acc1)); CPPUNIT_ASSERT_DOUBLES_EQUAL( - ml::maths::CBasicStatistics::variance(acc1), ml::maths::CBasicStatistics::variance(sum - acc2), 1e-10); - CPPUNIT_ASSERT_EQUAL(ml::maths::CBasicStatistics::count(acc2), ml::maths::CBasicStatistics::count(sum - acc1)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(ml::maths::CBasicStatistics::mean(acc2), ml::maths::CBasicStatistics::mean(sum - acc1), 1e-10); + ml::maths::CBasicStatistics::mean(acc2), + ml::maths::CBasicStatistics::mean(sum - acc1), 1e-10); CPPUNIT_ASSERT_DOUBLES_EQUAL( - ml::maths::CBasicStatistics::variance(acc2), ml::maths::CBasicStatistics::variance(sum - acc1), 1e-10); + ml::maths::CBasicStatistics::variance(acc2), + ml::maths::CBasicStatistics::variance(sum - acc1), 1e-10); } } LOG_DEBUG(<< "Test mean, variance and skew"); @@ -359,25 +403,37 @@ void CBasicStatisticsTest::testCentralMoments() { TMeanVarSkewAccumulator sum = acc1 + acc2; - CPPUNIT_ASSERT_EQUAL(ml::maths::CBasicStatistics::count(acc1), ml::maths::CBasicStatistics::count(sum - acc2)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(ml::maths::CBasicStatistics::mean(acc1), ml::maths::CBasicStatistics::mean(sum - acc2), 1e-10); + CPPUNIT_ASSERT_EQUAL(ml::maths::CBasicStatistics::count(acc1), + ml::maths::CBasicStatistics::count(sum - acc2)); CPPUNIT_ASSERT_DOUBLES_EQUAL( - ml::maths::CBasicStatistics::variance(acc1), ml::maths::CBasicStatistics::variance(sum - acc2), 1e-10); + ml::maths::CBasicStatistics::mean(acc1), + ml::maths::CBasicStatistics::mean(sum - acc2), 1e-10); CPPUNIT_ASSERT_DOUBLES_EQUAL( - ml::maths::CBasicStatistics::skewness(acc1), ml::maths::CBasicStatistics::skewness(sum - acc2), 1e-10); - CPPUNIT_ASSERT_EQUAL(ml::maths::CBasicStatistics::count(acc2), ml::maths::CBasicStatistics::count(sum - acc1)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(ml::maths::CBasicStatistics::mean(acc2), ml::maths::CBasicStatistics::mean(sum - acc1), 1e-10); + ml::maths::CBasicStatistics::variance(acc1), + ml::maths::CBasicStatistics::variance(sum - acc2), 1e-10); CPPUNIT_ASSERT_DOUBLES_EQUAL( - ml::maths::CBasicStatistics::variance(acc2), ml::maths::CBasicStatistics::variance(sum - acc1), 1e-10); + ml::maths::CBasicStatistics::skewness(acc1), + ml::maths::CBasicStatistics::skewness(sum - acc2), 1e-10); + CPPUNIT_ASSERT_EQUAL(ml::maths::CBasicStatistics::count(acc2), + ml::maths::CBasicStatistics::count(sum - acc1)); CPPUNIT_ASSERT_DOUBLES_EQUAL( - ml::maths::CBasicStatistics::skewness(acc2), ml::maths::CBasicStatistics::skewness(sum - acc1), 1e-10); + ml::maths::CBasicStatistics::mean(acc2), + ml::maths::CBasicStatistics::mean(sum - acc1), 1e-10); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + ml::maths::CBasicStatistics::variance(acc2), + ml::maths::CBasicStatistics::variance(sum - acc1), 1e-10); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + ml::maths::CBasicStatistics::skewness(acc2), + ml::maths::CBasicStatistics::skewness(sum - acc1), 1e-10); } } } LOG_DEBUG(<< "test vector") { - using TVectorMeanAccumulator = ml::maths::CBasicStatistics::SSampleMean>::TAccumulator; - using TVectorMeanVarAccumulator = ml::maths::CBasicStatistics::SSampleMeanVar>::TAccumulator; + using TVectorMeanAccumulator = + ml::maths::CBasicStatistics::SSampleMean>::TAccumulator; + using TVectorMeanVarAccumulator = + ml::maths::CBasicStatistics::SSampleMeanVar>::TAccumulator; using TVectorMeanVarSkewAccumulator = ml::maths::CBasicStatistics::SSampleMeanVarSkew>::TAccumulator; @@ -401,9 +457,12 @@ void CBasicStatisticsTest::testCentralMoments() { LOG_DEBUG(<< "v = " << v); vectorMean.add(v); - CPPUNIT_ASSERT_EQUAL(ml::maths::CBasicStatistics::count(means[0]), ml::maths::CBasicStatistics::count(vectorMean)); + CPPUNIT_ASSERT_EQUAL(ml::maths::CBasicStatistics::count(means[0]), + ml::maths::CBasicStatistics::count(vectorMean)); for (std::size_t j = 0u; j < 4; ++j) { - CPPUNIT_ASSERT_EQUAL(ml::maths::CBasicStatistics::mean(means[j]), (ml::maths::CBasicStatistics::mean(vectorMean))(j)); + CPPUNIT_ASSERT_EQUAL( + ml::maths::CBasicStatistics::mean(means[j]), + (ml::maths::CBasicStatistics::mean(vectorMean))(j)); } } } @@ -425,13 +484,16 @@ void CBasicStatisticsTest::testCentralMoments() { LOG_DEBUG(<< "v = " << v); vectorMeanAndVariances.add(v); - CPPUNIT_ASSERT_EQUAL(ml::maths::CBasicStatistics::count(meansAndVariances[0]), - ml::maths::CBasicStatistics::count(vectorMeanAndVariances)); + CPPUNIT_ASSERT_EQUAL( + ml::maths::CBasicStatistics::count(meansAndVariances[0]), + ml::maths::CBasicStatistics::count(vectorMeanAndVariances)); for (std::size_t j = 0u; j < 4; ++j) { - CPPUNIT_ASSERT_EQUAL(ml::maths::CBasicStatistics::mean(meansAndVariances[j]), - (ml::maths::CBasicStatistics::mean(vectorMeanAndVariances))(j)); - CPPUNIT_ASSERT_EQUAL(ml::maths::CBasicStatistics::variance(meansAndVariances[j]), - (ml::maths::CBasicStatistics::variance(vectorMeanAndVariances))(j)); + CPPUNIT_ASSERT_EQUAL( + ml::maths::CBasicStatistics::mean(meansAndVariances[j]), + (ml::maths::CBasicStatistics::mean(vectorMeanAndVariances))(j)); + CPPUNIT_ASSERT_EQUAL( + ml::maths::CBasicStatistics::variance(meansAndVariances[j]), + (ml::maths::CBasicStatistics::variance(vectorMeanAndVariances))(j)); } } } @@ -453,15 +515,19 @@ void CBasicStatisticsTest::testCentralMoments() { LOG_DEBUG(<< "v = " << v); vectorMeanVarianceAndSkew.add(v); - CPPUNIT_ASSERT_EQUAL(ml::maths::CBasicStatistics::count(meansVariancesAndSkews[0]), - ml::maths::CBasicStatistics::count(vectorMeanVarianceAndSkew)); + CPPUNIT_ASSERT_EQUAL( + ml::maths::CBasicStatistics::count(meansVariancesAndSkews[0]), + ml::maths::CBasicStatistics::count(vectorMeanVarianceAndSkew)); for (std::size_t j = 0u; j < 4; ++j) { - CPPUNIT_ASSERT_EQUAL(ml::maths::CBasicStatistics::mean(meansVariancesAndSkews[j]), - (ml::maths::CBasicStatistics::mean(vectorMeanVarianceAndSkew))(j)); - CPPUNIT_ASSERT_EQUAL(ml::maths::CBasicStatistics::variance(meansVariancesAndSkews[j]), - (ml::maths::CBasicStatistics::variance(vectorMeanVarianceAndSkew))(j)); - CPPUNIT_ASSERT_EQUAL(ml::maths::CBasicStatistics::skewness(meansVariancesAndSkews[j]), - (ml::maths::CBasicStatistics::skewness(vectorMeanVarianceAndSkew))(j)); + CPPUNIT_ASSERT_EQUAL( + ml::maths::CBasicStatistics::mean(meansVariancesAndSkews[j]), + (ml::maths::CBasicStatistics::mean(vectorMeanVarianceAndSkew))(j)); + CPPUNIT_ASSERT_EQUAL( + ml::maths::CBasicStatistics::variance(meansVariancesAndSkews[j]), + (ml::maths::CBasicStatistics::variance(vectorMeanVarianceAndSkew))(j)); + CPPUNIT_ASSERT_EQUAL( + ml::maths::CBasicStatistics::skewness(meansVariancesAndSkews[j]), + (ml::maths::CBasicStatistics::skewness(vectorMeanVarianceAndSkew))(j)); } } } @@ -486,7 +552,8 @@ void CBasicStatisticsTest::testCentralMoments() { CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(xml)); ml::core::CRapidXmlStateRestoreTraverser traverser(parser); TMeanAccumulatorVec restored; - CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind(SRestore(), boost::ref(restored), _1))); + CPPUNIT_ASSERT(traverser.traverseSubLevel( + boost::bind(SRestore(), boost::ref(restored), _1))); LOG_DEBUG(<< "restored = " << ml::core::CContainerPrinter::print(restored)); CPPUNIT_ASSERT_EQUAL(moments.size(), restored.size()); for (std::size_t i = 0u; i < restored.size(); ++i) { @@ -513,7 +580,8 @@ void CBasicStatisticsTest::testCentralMoments() { CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(xml)); ml::core::CRapidXmlStateRestoreTraverser traverser(parser); TMeanAccumulatorVec restored; - CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind(SRestore(), boost::ref(restored), _1))); + CPPUNIT_ASSERT(traverser.traverseSubLevel( + boost::bind(SRestore(), boost::ref(restored), _1))); LOG_DEBUG(<< "restored = " << ml::core::CContainerPrinter::print(restored)); CPPUNIT_ASSERT_EQUAL(moments.size(), restored.size()); for (std::size_t i = 0u; i < restored.size(); ++i) { @@ -538,7 +606,8 @@ void CBasicStatisticsTest::testCentralMoments() { CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(xml)); ml::core::CRapidXmlStateRestoreTraverser traverser(parser); TMeanVarAccumulatorVec restored; - CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind(SRestore(), boost::ref(restored), _1))); + CPPUNIT_ASSERT(traverser.traverseSubLevel( + boost::bind(SRestore(), boost::ref(restored), _1))); LOG_DEBUG(<< "restored = " << ml::core::CContainerPrinter::print(restored)); CPPUNIT_ASSERT_EQUAL(moments.size(), restored.size()); for (std::size_t i = 0u; i < restored.size(); ++i) { @@ -566,7 +635,8 @@ void CBasicStatisticsTest::testCentralMoments() { CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(xml)); ml::core::CRapidXmlStateRestoreTraverser traverser(parser); TMeanVarAccumulatorVec restored; - CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind(SRestore(), boost::ref(restored), _1))); + CPPUNIT_ASSERT(traverser.traverseSubLevel( + boost::bind(SRestore(), boost::ref(restored), _1))); LOG_DEBUG(<< "restored = " << ml::core::CContainerPrinter::print(restored)); CPPUNIT_ASSERT_EQUAL(moments.size(), restored.size()); for (std::size_t i = 0u; i < restored.size(); ++i) { @@ -591,7 +661,8 @@ void CBasicStatisticsTest::testCentralMoments() { CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(xml)); ml::core::CRapidXmlStateRestoreTraverser traverser(parser); TMeanVarSkewAccumulatorVec restored; - CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind(SRestore(), boost::ref(restored), _1))); + CPPUNIT_ASSERT(traverser.traverseSubLevel( + boost::bind(SRestore(), boost::ref(restored), _1))); LOG_DEBUG(<< "restored = " << ml::core::CContainerPrinter::print(restored)); CPPUNIT_ASSERT_EQUAL(moments.size(), restored.size()); for (std::size_t i = 0u; i < restored.size(); ++i) { @@ -619,7 +690,8 @@ void CBasicStatisticsTest::testCentralMoments() { CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(xml)); ml::core::CRapidXmlStateRestoreTraverser traverser(parser); TMeanVarSkewAccumulatorVec restored; - CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind(SRestore(), boost::ref(restored), _1))); + CPPUNIT_ASSERT(traverser.traverseSubLevel( + boost::bind(SRestore(), boost::ref(restored), _1))); LOG_DEBUG(<< "restored = " << ml::core::CContainerPrinter::print(restored)); CPPUNIT_ASSERT_EQUAL(moments.size(), restored.size()); for (std::size_t i = 0u; i < restored.size(); ++i) { @@ -629,9 +701,12 @@ void CBasicStatisticsTest::testCentralMoments() { } } - CPPUNIT_ASSERT_EQUAL(true, ml::core::memory_detail::SDynamicSizeAlwaysZero::value()); - CPPUNIT_ASSERT_EQUAL(true, ml::core::memory_detail::SDynamicSizeAlwaysZero::value()); - CPPUNIT_ASSERT_EQUAL(true, ml::core::memory_detail::SDynamicSizeAlwaysZero::value()); + CPPUNIT_ASSERT_EQUAL( + true, ml::core::memory_detail::SDynamicSizeAlwaysZero::value()); + CPPUNIT_ASSERT_EQUAL( + true, ml::core::memory_detail::SDynamicSizeAlwaysZero::value()); + CPPUNIT_ASSERT_EQUAL( + true, ml::core::memory_detail::SDynamicSizeAlwaysZero::value()); } void CBasicStatisticsTest::testVectorCentralMoments() { @@ -659,10 +734,14 @@ void CBasicStatisticsTest::testVectorCentralMoments() { TDouble2Vec means1 = ml::maths::CBasicStatistics::mean(moments1); TDoubleVec counts2 = ml::maths::CBasicStatistics::count(moments2); TDoubleVec means2 = ml::maths::CBasicStatistics::mean(moments2); - CPPUNIT_ASSERT_EQUAL(std::string("[3, 2]"), ml::core::CContainerPrinter::print(counts1)); - CPPUNIT_ASSERT_EQUAL(std::string("[3.3, 3.5]"), ml::core::CContainerPrinter::print(means1)); - CPPUNIT_ASSERT_EQUAL(std::string("[3, 2]"), ml::core::CContainerPrinter::print(counts2)); - CPPUNIT_ASSERT_EQUAL(std::string("[3.3, 3.5]"), ml::core::CContainerPrinter::print(means2)); + CPPUNIT_ASSERT_EQUAL(std::string("[3, 2]"), + ml::core::CContainerPrinter::print(counts1)); + CPPUNIT_ASSERT_EQUAL(std::string("[3.3, 3.5]"), + ml::core::CContainerPrinter::print(means1)); + CPPUNIT_ASSERT_EQUAL(std::string("[3, 2]"), + ml::core::CContainerPrinter::print(counts2)); + CPPUNIT_ASSERT_EQUAL(std::string("[3.3, 3.5]"), + ml::core::CContainerPrinter::print(means2)); } { TMeanVarAccumulator2Vec moments1(2); @@ -685,14 +764,22 @@ void CBasicStatisticsTest::testVectorCentralMoments() { TDoubleVec means2 = ml::maths::CBasicStatistics::mean(moments2); TDoubleVec vars2 = ml::maths::CBasicStatistics::variance(moments2); TDouble2Vec mlvars2 = ml::maths::CBasicStatistics::maximumLikelihoodVariance(moments2); - CPPUNIT_ASSERT_EQUAL(std::string("[2, 3]"), ml::core::CContainerPrinter::print(counts1)); - CPPUNIT_ASSERT_EQUAL(std::string("[3, 4]"), ml::core::CContainerPrinter::print(means1)); - CPPUNIT_ASSERT_EQUAL(std::string("[2, 1]"), ml::core::CContainerPrinter::print(vars1)); - CPPUNIT_ASSERT_EQUAL(std::string("[1, 0.6666667]"), ml::core::CContainerPrinter::print(mlvars1)); - CPPUNIT_ASSERT_EQUAL(std::string("[2, 3]"), ml::core::CContainerPrinter::print(counts2)); - CPPUNIT_ASSERT_EQUAL(std::string("[3, 4]"), ml::core::CContainerPrinter::print(means2)); - CPPUNIT_ASSERT_EQUAL(std::string("[2, 1]"), ml::core::CContainerPrinter::print(vars2)); - CPPUNIT_ASSERT_EQUAL(std::string("[1, 0.6666667]"), ml::core::CContainerPrinter::print(mlvars2)); + CPPUNIT_ASSERT_EQUAL(std::string("[2, 3]"), + ml::core::CContainerPrinter::print(counts1)); + CPPUNIT_ASSERT_EQUAL(std::string("[3, 4]"), + ml::core::CContainerPrinter::print(means1)); + CPPUNIT_ASSERT_EQUAL(std::string("[2, 1]"), + ml::core::CContainerPrinter::print(vars1)); + CPPUNIT_ASSERT_EQUAL(std::string("[1, 0.6666667]"), + ml::core::CContainerPrinter::print(mlvars1)); + CPPUNIT_ASSERT_EQUAL(std::string("[2, 3]"), + ml::core::CContainerPrinter::print(counts2)); + CPPUNIT_ASSERT_EQUAL(std::string("[3, 4]"), + ml::core::CContainerPrinter::print(means2)); + CPPUNIT_ASSERT_EQUAL(std::string("[2, 1]"), + ml::core::CContainerPrinter::print(vars2)); + CPPUNIT_ASSERT_EQUAL(std::string("[1, 0.6666667]"), + ml::core::CContainerPrinter::print(mlvars2)); } { TMeanVarSkewAccumulator2Vec moments1(2); @@ -717,16 +804,26 @@ void CBasicStatisticsTest::testVectorCentralMoments() { TDoubleVec vars2 = ml::maths::CBasicStatistics::variance(moments2); TDouble2Vec mlvars2 = ml::maths::CBasicStatistics::maximumLikelihoodVariance(moments2); TDouble2Vec skews2 = ml::maths::CBasicStatistics::skewness(moments2); - CPPUNIT_ASSERT_EQUAL(std::string("[2, 3]"), ml::core::CContainerPrinter::print(counts1)); - CPPUNIT_ASSERT_EQUAL(std::string("[3, 4]"), ml::core::CContainerPrinter::print(means1)); - CPPUNIT_ASSERT_EQUAL(std::string("[2, 3]"), ml::core::CContainerPrinter::print(vars1)); - CPPUNIT_ASSERT_EQUAL(std::string("[1, 2]"), ml::core::CContainerPrinter::print(mlvars1)); - CPPUNIT_ASSERT_EQUAL(std::string("[0, -0.3849002]"), ml::core::CContainerPrinter::print(skews1)); - CPPUNIT_ASSERT_EQUAL(std::string("[2, 3]"), ml::core::CContainerPrinter::print(counts2)); - CPPUNIT_ASSERT_EQUAL(std::string("[3, 4]"), ml::core::CContainerPrinter::print(means2)); - CPPUNIT_ASSERT_EQUAL(std::string("[2, 3]"), ml::core::CContainerPrinter::print(vars2)); - CPPUNIT_ASSERT_EQUAL(std::string("[1, 2]"), ml::core::CContainerPrinter::print(mlvars2)); - CPPUNIT_ASSERT_EQUAL(std::string("[0, -0.3849002]"), ml::core::CContainerPrinter::print(skews2)); + CPPUNIT_ASSERT_EQUAL(std::string("[2, 3]"), + ml::core::CContainerPrinter::print(counts1)); + CPPUNIT_ASSERT_EQUAL(std::string("[3, 4]"), + ml::core::CContainerPrinter::print(means1)); + CPPUNIT_ASSERT_EQUAL(std::string("[2, 3]"), + ml::core::CContainerPrinter::print(vars1)); + CPPUNIT_ASSERT_EQUAL(std::string("[1, 2]"), + ml::core::CContainerPrinter::print(mlvars1)); + CPPUNIT_ASSERT_EQUAL(std::string("[0, -0.3849002]"), + ml::core::CContainerPrinter::print(skews1)); + CPPUNIT_ASSERT_EQUAL(std::string("[2, 3]"), + ml::core::CContainerPrinter::print(counts2)); + CPPUNIT_ASSERT_EQUAL(std::string("[3, 4]"), + ml::core::CContainerPrinter::print(means2)); + CPPUNIT_ASSERT_EQUAL(std::string("[2, 3]"), + ml::core::CContainerPrinter::print(vars2)); + CPPUNIT_ASSERT_EQUAL(std::string("[1, 2]"), + ml::core::CContainerPrinter::print(mlvars2)); + CPPUNIT_ASSERT_EQUAL(std::string("[0, -0.3849002]"), + ml::core::CContainerPrinter::print(skews2)); } } @@ -738,18 +835,26 @@ void CBasicStatisticsTest::testCovariances() { LOG_DEBUG(<< "N(3,I)"); { const double raw[][3] = { - {2.58894, 2.87211, 1.62609}, {3.88246, 2.98577, 2.70981}, {2.03317, 3.33715, 2.93560}, {3.30100, 4.38844, 1.65705}, - {2.12426, 2.21127, 2.57000}, {4.21041, 4.20745, 1.90752}, {3.56139, 3.14454, 0.89316}, {4.29444, 1.58715, 3.58402}, - {3.06731, 3.91581, 2.85951}, {3.62798, 2.28786, 2.89994}, {2.05834, 2.96137, 3.57654}, {2.72185, 3.36003, 3.09708}, - {0.94924, 2.19797, 3.30941}, {2.11159, 2.49182, 3.56793}, {3.10364, 0.32747, 3.62487}, {2.28235, 3.83542, 3.35942}, - {3.30549, 2.95951, 2.97006}, {3.05787, 2.94188, 2.64095}, {3.98245, 2.02892, 3.07909}, {3.81189, 2.89389, 3.81389}, - {3.32811, 3.88484, 4.17866}, {2.06964, 3.80683, 2.46835}, {4.58989, 2.00321, 1.93029}, {2.51484, 4.46106, 3.71248}, - {3.30729, 2.44768, 3.43241}, {3.52222, 2.91724, 1.49631}, {1.71826, 4.79752, 4.38398}, {3.14173, 3.16237, 2.49654}, + {2.58894, 2.87211, 1.62609}, {3.88246, 2.98577, 2.70981}, + {2.03317, 3.33715, 2.93560}, {3.30100, 4.38844, 1.65705}, + {2.12426, 2.21127, 2.57000}, {4.21041, 4.20745, 1.90752}, + {3.56139, 3.14454, 0.89316}, {4.29444, 1.58715, 3.58402}, + {3.06731, 3.91581, 2.85951}, {3.62798, 2.28786, 2.89994}, + {2.05834, 2.96137, 3.57654}, {2.72185, 3.36003, 3.09708}, + {0.94924, 2.19797, 3.30941}, {2.11159, 2.49182, 3.56793}, + {3.10364, 0.32747, 3.62487}, {2.28235, 3.83542, 3.35942}, + {3.30549, 2.95951, 2.97006}, {3.05787, 2.94188, 2.64095}, + {3.98245, 2.02892, 3.07909}, {3.81189, 2.89389, 3.81389}, + {3.32811, 3.88484, 4.17866}, {2.06964, 3.80683, 2.46835}, + {4.58989, 2.00321, 1.93029}, {2.51484, 4.46106, 3.71248}, + {3.30729, 2.44768, 3.43241}, {3.52222, 2.91724, 1.49631}, + {1.71826, 4.79752, 4.38398}, {3.14173, 3.16237, 2.49654}, {3.26538, 2.21858, 5.05477}, {2.88352, 1.94396, 3.08744}}; const double expectedMean[] = {3.013898, 2.952637, 2.964104}; - const double expectedCovariances[][3] = { - {0.711903, -0.174535, -0.199460}, {-0.174535, 0.935285, -0.091192}, {-0.199460, -0.091192, 0.833710}}; + const double expectedCovariances[][3] = {{0.711903, -0.174535, -0.199460}, + {-0.174535, 0.935285, -0.091192}, + {-0.199460, -0.091192, 0.833710}}; ml::maths::CBasicStatistics::SSampleCovariances covariances; @@ -761,19 +866,23 @@ void CBasicStatisticsTest::testCovariances() { LOG_DEBUG(<< "count = " << ml::maths::CBasicStatistics::count(covariances)); LOG_DEBUG(<< "mean = " << ml::maths::CBasicStatistics::mean(covariances)); - LOG_DEBUG(<< "covariances = " << ml::maths::CBasicStatistics::covariances(covariances)); + LOG_DEBUG(<< "covariances = " + << ml::maths::CBasicStatistics::covariances(covariances)); - CPPUNIT_ASSERT_EQUAL(static_cast(boost::size(raw)), ml::maths::CBasicStatistics::count(covariances)); + CPPUNIT_ASSERT_EQUAL(static_cast(boost::size(raw)), + ml::maths::CBasicStatistics::count(covariances)); for (std::size_t i = 0u; i < 3; ++i) { - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMean[i], (ml::maths::CBasicStatistics::mean(covariances))(i), 2e-6); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + expectedMean[i], (ml::maths::CBasicStatistics::mean(covariances))(i), 2e-6); for (std::size_t j = 0u; j < 3; ++j) { CPPUNIT_ASSERT_DOUBLES_EQUAL( - expectedCovariances[i][j], (ml::maths::CBasicStatistics::covariances(covariances))(i, j), 2e-6); + expectedCovariances[i][j], + (ml::maths::CBasicStatistics::covariances(covariances))(i, j), 2e-6); } } - bool dynamicSizeAlwaysZero = - ml::core::memory_detail::SDynamicSizeAlwaysZero>::value(); + bool dynamicSizeAlwaysZero = ml::core::memory_detail::SDynamicSizeAlwaysZero< + ml::maths::CBasicStatistics::SSampleCovariances>::value(); CPPUNIT_ASSERT_EQUAL(true, dynamicSizeAlwaysZero); } @@ -794,10 +903,14 @@ void CBasicStatisticsTest::testCovariances() { ml::maths::CVectorNx1 covariances4(covariances4_); ml::maths::CSymmetricMatrixNxN covariance( - 10.0 * ml::maths::CSymmetricMatrixNxN(ml::maths::E_OuterProduct, covariances1 / covariances1.euclidean()) + - 5.0 * ml::maths::CSymmetricMatrixNxN(ml::maths::E_OuterProduct, covariances2 / covariances2.euclidean()) + - 5.0 * ml::maths::CSymmetricMatrixNxN(ml::maths::E_OuterProduct, covariances3 / covariances3.euclidean()) + - 2.0 * ml::maths::CSymmetricMatrixNxN(ml::maths::E_OuterProduct, covariances4 / covariances4.euclidean())); + 10.0 * ml::maths::CSymmetricMatrixNxN( + ml::maths::E_OuterProduct, covariances1 / covariances1.euclidean()) + + 5.0 * ml::maths::CSymmetricMatrixNxN( + ml::maths::E_OuterProduct, covariances2 / covariances2.euclidean()) + + 5.0 * ml::maths::CSymmetricMatrixNxN( + ml::maths::E_OuterProduct, covariances3 / covariances3.euclidean()) + + 2.0 * ml::maths::CSymmetricMatrixNxN( + ml::maths::E_OuterProduct, covariances4 / covariances4.euclidean())); std::size_t n = 10000u; @@ -814,12 +927,16 @@ void CBasicStatisticsTest::testCovariances() { LOG_DEBUG(<< "expected covariances = " << covariance); LOG_DEBUG(<< "mean = " << ml::maths::CBasicStatistics::mean(sampleCovariance)); - LOG_DEBUG(<< "covariances = " << ml::maths::CBasicStatistics::covariances(sampleCovariance)); + LOG_DEBUG(<< "covariances = " + << ml::maths::CBasicStatistics::covariances(sampleCovariance)); for (std::size_t i = 0u; i < 4; ++i) { - CPPUNIT_ASSERT_DOUBLES_EQUAL(mean(i), (ml::maths::CBasicStatistics::mean(sampleCovariance))(i), 0.05); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + mean(i), (ml::maths::CBasicStatistics::mean(sampleCovariance))(i), 0.05); for (std::size_t j = 0u; j < 4; ++j) { - CPPUNIT_ASSERT_DOUBLES_EQUAL(covariance(i, j), (ml::maths::CBasicStatistics::covariances(sampleCovariance))(i, j), 0.16); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + covariance(i, j), + (ml::maths::CBasicStatistics::covariances(sampleCovariance))(i, j), 0.16); } } } @@ -832,7 +949,8 @@ void CBasicStatisticsTest::testCovariances() { std::vector> points; for (std::size_t i = 0u; i < coordinates.size(); i += 4) { - double c[] = {coordinates[i + 0], coordinates[i + 1], coordinates[i + 2], coordinates[i + 3]}; + double c[] = {coordinates[i + 0], coordinates[i + 1], + coordinates[i + 2], coordinates[i + 3]}; points.push_back(ml::maths::CVectorNx1(c)); } @@ -847,7 +965,8 @@ void CBasicStatisticsTest::testCovariances() { ml::maths::CBasicStatistics::SSampleCovariances sampleCovariances; CPPUNIT_ASSERT(sampleCovariances.fromDelimited(expectedDelimited)); - CPPUNIT_ASSERT_EQUAL(expectedSampleCovariances.checksum(), sampleCovariances.checksum()); + CPPUNIT_ASSERT_EQUAL(expectedSampleCovariances.checksum(), + sampleCovariances.checksum()); std::string delimited = sampleCovariances.toDelimited(); CPPUNIT_ASSERT_EQUAL(expectedDelimited, delimited); @@ -867,7 +986,8 @@ void CBasicStatisticsTest::testCovariancesLedoitWolf() { ml::test::CRandomNumbers rng; - double means[][2] = {{10.0, 10.0}, {20.0, 150.0}, {-10.0, -20.0}, {-20.0, 40.0}, {40.0, 90.0}}; + double means[][2] = { + {10.0, 10.0}, {20.0, 150.0}, {-10.0, -20.0}, {-20.0, 40.0}, {40.0, 90.0}}; double covariances[][2][2] = {{{40.0, 0.0}, {0.0, 40.0}}, {{20.0, 5.0}, {5.0, 10.0}}, @@ -884,7 +1004,8 @@ void CBasicStatisticsTest::testCovariancesLedoitWolf() { TDoubleVec mean(boost::begin(means[i]), boost::end(means[i])); TDoubleVecVec covariance; for (std::size_t j = 0u; j < boost::size(covariances[i]); ++j) { - covariance.push_back(TDoubleVec(boost::begin(covariances[i][j]), boost::end(covariances[i][j]))); + covariance.push_back(TDoubleVec(boost::begin(covariances[i][j]), + boost::end(covariances[i][j]))); } TMatrix2 covExpected(covariance); LOG_DEBUG(<< "cov expected = " << covExpected); @@ -906,8 +1027,10 @@ void CBasicStatisticsTest::testCovariancesLedoitWolf() { ml::maths::CBasicStatistics::SSampleCovariances covLW; ml::maths::CBasicStatistics::covariancesLedoitWolf(jsamples, covLW); - const TMatrix2& covML = ml::maths::CBasicStatistics::maximumLikelihoodCovariances(cov); - const TMatrix2& covLWML = ml::maths::CBasicStatistics::maximumLikelihoodCovariances(covLW); + const TMatrix2& covML = + ml::maths::CBasicStatistics::maximumLikelihoodCovariances(cov); + const TMatrix2& covLWML = + ml::maths::CBasicStatistics::maximumLikelihoodCovariances(covLW); double errorML = (covML - covExpected).frobenius(); double errorLWML = (covLWML - covExpected).frobenius(); @@ -925,7 +1048,8 @@ void CBasicStatisticsTest::testCovariancesLedoitWolf() { LOG_DEBUG(<< "error = " << error); LOG_DEBUG(<< "error LW = " << errorLW); - CPPUNIT_ASSERT(ml::maths::CBasicStatistics::mean(errorLW) < 0.9 * ml::maths::CBasicStatistics::mean(error)); + CPPUNIT_ASSERT(ml::maths::CBasicStatistics::mean(errorLW) < + 0.9 * ml::maths::CBasicStatistics::mean(error)); } void CBasicStatisticsTest::testMedian() { @@ -943,7 +1067,8 @@ void CBasicStatisticsTest::testMedian() { { double sample[] = {1.0}; - ml::maths::CBasicStatistics::TDoubleVec sampleVec(sample, sample + sizeof(sample) / sizeof(sample[0])); + ml::maths::CBasicStatistics::TDoubleVec sampleVec( + sample, sample + sizeof(sample) / sizeof(sample[0])); double median = ml::maths::CBasicStatistics::median(sampleVec); @@ -952,7 +1077,8 @@ void CBasicStatisticsTest::testMedian() { { double sample[] = {2.0, 1.0}; - ml::maths::CBasicStatistics::TDoubleVec sampleVec(sample, sample + sizeof(sample) / sizeof(sample[0])); + ml::maths::CBasicStatistics::TDoubleVec sampleVec( + sample, sample + sizeof(sample) / sizeof(sample[0])); double median = ml::maths::CBasicStatistics::median(sampleVec); @@ -961,7 +1087,8 @@ void CBasicStatisticsTest::testMedian() { { double sample[] = {3.0, 1.0, 2.0}; - ml::maths::CBasicStatistics::TDoubleVec sampleVec(sample, sample + sizeof(sample) / sizeof(sample[0])); + ml::maths::CBasicStatistics::TDoubleVec sampleVec( + sample, sample + sizeof(sample) / sizeof(sample[0])); double median = ml::maths::CBasicStatistics::median(sampleVec); @@ -970,7 +1097,8 @@ void CBasicStatisticsTest::testMedian() { { double sample[] = {3.0, 5.0, 9.0, 1.0, 2.0, 6.0, 7.0, 4.0, 8.0}; - ml::maths::CBasicStatistics::TDoubleVec sampleVec(sample, sample + sizeof(sample) / sizeof(sample[0])); + ml::maths::CBasicStatistics::TDoubleVec sampleVec( + sample, sample + sizeof(sample) / sizeof(sample[0])); double median = ml::maths::CBasicStatistics::median(sampleVec); @@ -979,7 +1107,8 @@ void CBasicStatisticsTest::testMedian() { { double sample[] = {3.0, 5.0, 10.0, 2.0, 6.0, 7.0, 1.0, 9.0, 4.0, 8.0}; - ml::maths::CBasicStatistics::TDoubleVec sampleVec(sample, sample + sizeof(sample) / sizeof(sample[0])); + ml::maths::CBasicStatistics::TDoubleVec sampleVec( + sample, sample + sizeof(sample) / sizeof(sample[0])); double median = ml::maths::CBasicStatistics::median(sampleVec); @@ -996,9 +1125,11 @@ void CBasicStatisticsTest::testOrderStatistics() { // elements of a collection. using TMinStatsStack = ml::maths::CBasicStatistics::COrderStatisticsStack; - using TMaxStatsStack = ml::maths::CBasicStatistics::COrderStatisticsStack>; + using TMaxStatsStack = + ml::maths::CBasicStatistics::COrderStatisticsStack>; using TMinStatsHeap = ml::maths::CBasicStatistics::COrderStatisticsHeap; - using TMaxStatsHeap = ml::maths::CBasicStatistics::COrderStatisticsHeap>; + using TMaxStatsHeap = + ml::maths::CBasicStatistics::COrderStatisticsHeap>; { // Test on the stack min, max, combine and persist and restore. @@ -1023,7 +1154,8 @@ void CBasicStatisticsTest::testOrderStatistics() { std::sort(boost::begin(data), boost::end(data), std::greater()); maxValues.sort(); - LOG_DEBUG(<< "x_n = " << maxValues[0] << ", x_(n-1) = " << maxValues[1] << ", x_(n-2) = " << maxValues[2]); + LOG_DEBUG(<< "x_n = " << maxValues[0] << ", x_(n-1) = " << maxValues[1] + << ", x_(n-2) = " << maxValues[2]); CPPUNIT_ASSERT(std::equal(maxValues.begin(), maxValues.end(), data)); CPPUNIT_ASSERT_EQUAL(static_cast(2), minValues.count()); @@ -1031,7 +1163,8 @@ void CBasicStatisticsTest::testOrderStatistics() { TMinStatsStack minFirstPlusSecondHalf = (minFirstHalf + minSecondHalf); minFirstPlusSecondHalf.sort(); - CPPUNIT_ASSERT(std::equal(minValues.begin(), minValues.end(), minFirstPlusSecondHalf.begin())); + CPPUNIT_ASSERT(std::equal(minValues.begin(), minValues.end(), + minFirstPlusSecondHalf.begin())); // Test persist is idempotent. @@ -1050,7 +1183,8 @@ void CBasicStatisticsTest::testOrderStatistics() { ml::core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); ml::core::CRapidXmlStateRestoreTraverser traverser(parser); - CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind(SRestore(), boost::ref(restoredMinValues), _1))); + CPPUNIT_ASSERT(traverser.traverseSubLevel( + boost::bind(SRestore(), boost::ref(restoredMinValues), _1))); } // The XML representation of the new stats object should be unchanged. @@ -1085,7 +1219,8 @@ void CBasicStatisticsTest::testOrderStatistics() { std::sort(boost::begin(data), boost::end(data), std::greater()); max3Values.sort(); - LOG_DEBUG(<< "x_n = " << max3Values[0] << ", x_(n-1) = " << max3Values[1] << ", x_(n-2) = " << max3Values[2]); + LOG_DEBUG(<< "x_n = " << max3Values[0] << ", x_(n-1) = " << max3Values[1] + << ", x_(n-2) = " << max3Values[2]); CPPUNIT_ASSERT(std::equal(max3Values.begin(), max3Values.end(), data)); max20Values.sort(); @@ -1109,7 +1244,8 @@ void CBasicStatisticsTest::testOrderStatistics() { ml::core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); ml::core::CRapidXmlStateRestoreTraverser traverser(parser); - CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind(SRestore(), boost::ref(restoredMaxValues), _1))); + CPPUNIT_ASSERT(traverser.traverseSubLevel( + boost::bind(SRestore(), boost::ref(restoredMaxValues), _1))); } // The XML representation of the new stats object should be unchanged. @@ -1162,10 +1298,14 @@ void CBasicStatisticsTest::testOrderStatistics() { } { // Test memory. - CPPUNIT_ASSERT_EQUAL(true, ml::core::memory_detail::SDynamicSizeAlwaysZero::value()); - CPPUNIT_ASSERT_EQUAL(true, ml::core::memory_detail::SDynamicSizeAlwaysZero::value()); - CPPUNIT_ASSERT_EQUAL(false, ml::core::memory_detail::SDynamicSizeAlwaysZero::value()); - CPPUNIT_ASSERT_EQUAL(false, ml::core::memory_detail::SDynamicSizeAlwaysZero::value()); + CPPUNIT_ASSERT_EQUAL( + true, ml::core::memory_detail::SDynamicSizeAlwaysZero::value()); + CPPUNIT_ASSERT_EQUAL( + true, ml::core::memory_detail::SDynamicSizeAlwaysZero::value()); + CPPUNIT_ASSERT_EQUAL( + false, ml::core::memory_detail::SDynamicSizeAlwaysZero::value()); + CPPUNIT_ASSERT_EQUAL( + false, ml::core::memory_detail::SDynamicSizeAlwaysZero::value()); } } diff --git a/lib/maths/unittest/CBjkstUniqueValuesTest.cc b/lib/maths/unittest/CBjkstUniqueValuesTest.cc index 8a1038398a..90abbcfad5 100644 --- a/lib/maths/unittest/CBjkstUniqueValuesTest.cc +++ b/lib/maths/unittest/CBjkstUniqueValuesTest.cc @@ -52,7 +52,8 @@ void CBjkstUniqueValuesTest::testTrailingZeros() { for (std::size_t i = 0u; i < samples.size(); ++i) { uint32_t sample = static_cast(samples[i]); - CPPUNIT_ASSERT_EQUAL(trailingZeros(sample), CBjkstUniqueValues::trailingZeros(sample)); + CPPUNIT_ASSERT_EQUAL(trailingZeros(sample), + CBjkstUniqueValues::trailingZeros(sample)); } } @@ -157,9 +158,11 @@ void CBjkstUniqueValuesTest::testRemove() { } LOG_DEBUG(<< "exact = " << unique.size()); LOG_DEBUG(<< "approx = " << sketch.number()); - CPPUNIT_ASSERT_DOUBLES_EQUAL( - static_cast(unique.size()), static_cast(sketch.number()), 0.3 * static_cast(unique.size())); - meanRelativeErrorBeforeRemove.add(std::fabs(static_cast(unique.size()) - static_cast(sketch.number())) / + CPPUNIT_ASSERT_DOUBLES_EQUAL(static_cast(unique.size()), + static_cast(sketch.number()), + 0.3 * static_cast(unique.size())); + meanRelativeErrorBeforeRemove.add(std::fabs(static_cast(unique.size()) - + static_cast(sketch.number())) / static_cast(unique.size())); rng.random_shuffle(categories.begin(), categories.end()); @@ -170,14 +173,18 @@ void CBjkstUniqueValuesTest::testRemove() { } LOG_DEBUG(<< "exact = " << unique.size()); LOG_DEBUG(<< "approx = " << sketch.number()); - CPPUNIT_ASSERT_DOUBLES_EQUAL( - static_cast(unique.size()), static_cast(sketch.number()), 0.25 * static_cast(unique.size())); - meanRelativeErrorAfterRemove.add(std::fabs(static_cast(unique.size()) - static_cast(sketch.number())) / + CPPUNIT_ASSERT_DOUBLES_EQUAL(static_cast(unique.size()), + static_cast(sketch.number()), + 0.25 * static_cast(unique.size())); + meanRelativeErrorAfterRemove.add(std::fabs(static_cast(unique.size()) - + static_cast(sketch.number())) / static_cast(unique.size())); } - LOG_DEBUG(<< "meanRelativeErrorBeforeRemove = " << maths::CBasicStatistics::mean(meanRelativeErrorBeforeRemove)); - LOG_DEBUG(<< "meanRelativeErrorAfterRemove = " << maths::CBasicStatistics::mean(meanRelativeErrorAfterRemove)); + LOG_DEBUG(<< "meanRelativeErrorBeforeRemove = " + << maths::CBasicStatistics::mean(meanRelativeErrorBeforeRemove)); + LOG_DEBUG(<< "meanRelativeErrorAfterRemove = " + << maths::CBasicStatistics::mean(meanRelativeErrorAfterRemove)); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanRelativeErrorBeforeRemove) < 0.05); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanRelativeErrorAfterRemove) < 1.3 * maths::CBasicStatistics::mean(meanRelativeErrorBeforeRemove)); @@ -279,9 +286,11 @@ void CBjkstUniqueValuesTest::testSmall() { unique.insert(category); LOG_DEBUG(<< "exact = " << unique.size()); LOG_DEBUG(<< "approx = " << sketch.number()); - CPPUNIT_ASSERT_DOUBLES_EQUAL( - static_cast(unique.size()), static_cast(sketch.number()), 0.15 * static_cast(unique.size())); - meanRelativeError.add(std::fabs(static_cast(unique.size()) - static_cast(sketch.number())) / + CPPUNIT_ASSERT_DOUBLES_EQUAL(static_cast(unique.size()), + static_cast(sketch.number()), + 0.15 * static_cast(unique.size())); + meanRelativeError.add(std::fabs(static_cast(unique.size()) - + static_cast(sketch.number())) / static_cast(unique.size())); } @@ -319,7 +328,8 @@ void CBjkstUniqueValuesTest::testPersist() { core::CRapidXmlStateRestoreTraverser traverser(parser); maths::CBjkstUniqueValues restoredSketch(traverser); - LOG_DEBUG(<< "orig checksum = " << origSketch.checksum() << ", new checksum = " << restoredSketch.checksum()); + LOG_DEBUG(<< "orig checksum = " << origSketch.checksum() + << ", new checksum = " << restoredSketch.checksum()); CPPUNIT_ASSERT_EQUAL(origSketch.checksum(), restoredSketch.checksum()); std::string newXml; @@ -349,7 +359,8 @@ void CBjkstUniqueValuesTest::testPersist() { core::CRapidXmlStateRestoreTraverser traverser(parser); maths::CBjkstUniqueValues restoredSketch(traverser); - LOG_DEBUG(<< "orig checksum = " << origSketch.checksum() << ", new checksum = " << restoredSketch.checksum()); + LOG_DEBUG(<< "orig checksum = " << origSketch.checksum() + << ", new checksum = " << restoredSketch.checksum()); CPPUNIT_ASSERT_EQUAL(origSketch.checksum(), restoredSketch.checksum()); std::string newXml; @@ -364,18 +375,18 @@ void CBjkstUniqueValuesTest::testPersist() { CppUnit::Test* CBjkstUniqueValuesTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CBjkstUniqueValuesTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CBjkstUniqueValuesTest::testTrailingZeros", - &CBjkstUniqueValuesTest::testTrailingZeros)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CBjkstUniqueValuesTest::testNumber", &CBjkstUniqueValuesTest::testNumber)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CBjkstUniqueValuesTest::testRemove", &CBjkstUniqueValuesTest::testRemove)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CBjkstUniqueValuesTest::testSwap", &CBjkstUniqueValuesTest::testSwap)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CBjkstUniqueValuesTest::testSmall", &CBjkstUniqueValuesTest::testSmall)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CBjkstUniqueValuesTest::testPersist", &CBjkstUniqueValuesTest::testPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CBjkstUniqueValuesTest::testTrailingZeros", &CBjkstUniqueValuesTest::testTrailingZeros)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CBjkstUniqueValuesTest::testNumber", &CBjkstUniqueValuesTest::testNumber)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CBjkstUniqueValuesTest::testRemove", &CBjkstUniqueValuesTest::testRemove)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CBjkstUniqueValuesTest::testSwap", &CBjkstUniqueValuesTest::testSwap)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CBjkstUniqueValuesTest::testSmall", &CBjkstUniqueValuesTest::testSmall)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CBjkstUniqueValuesTest::testPersist", &CBjkstUniqueValuesTest::testPersist)); return suiteOfTests; } diff --git a/lib/maths/unittest/CBootstrapClustererTest.cc b/lib/maths/unittest/CBootstrapClustererTest.cc index f8c0457262..0b0f5a80fb 100644 --- a/lib/maths/unittest/CBootstrapClustererTest.cc +++ b/lib/maths/unittest/CBootstrapClustererTest.cc @@ -34,7 +34,9 @@ using TMatrix2Vec = std::vector; using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; struct SVector2Hash { - std::size_t operator()(const TVector2& x) const { return static_cast(x.checksum()); } + std::size_t operator()(const TVector2& x) const { + return static_cast(x.checksum()); + } }; using TVector2SizeUMap = boost::unordered_map; @@ -53,20 +55,31 @@ class CBootstrapClustererForTest : public maths::CBootstrapClusterer { void buildClusterGraph(TSizeVecVecVec& bootstrapClusters, TGraph& graph) const { TPointVec dummy(1); // only used for reserving memory. - this->maths::CBootstrapClusterer::buildClusterGraph(dummy, bootstrapClusters, graph); + this->maths::CBootstrapClusterer::buildClusterGraph( + dummy, bootstrapClusters, graph); } std::size_t thickets(std::size_t n, const TGraph& graph, TSizeVec& components) const { return this->maths::CBootstrapClusterer::thickets(n, graph, components); } - bool separate(TGraph& graph, TBoolVec& parity) const { return this->maths::CBootstrapClusterer::separate(graph, parity); } + bool separate(TGraph& graph, TBoolVec& parity) const { + return this->maths::CBootstrapClusterer::separate(graph, parity); + } - bool cutSearch(std::size_t u, std::size_t v, const TGraph& graph, double threshold, double& cost, TBoolVec& parities) const { - return this->maths::CBootstrapClusterer::cutSearch(u, v, graph, threshold, cost, parities); + bool cutSearch(std::size_t u, + std::size_t v, + const TGraph& graph, + double threshold, + double& cost, + TBoolVec& parities) const { + return this->maths::CBootstrapClusterer::cutSearch( + u, v, graph, threshold, cost, parities); } - TSizeVec& offsets() { return this->maths::CBootstrapClusterer::offsets(); } + TSizeVec& offsets() { + return this->maths::CBootstrapClusterer::offsets(); + } }; using TBootstrapClustererForTest2 = CBootstrapClustererForTest; @@ -128,7 +141,8 @@ void CBootstrapClustererTest::testFacade() { maths::CSampling::seed(); maths::CBootstrapClustererFacade>> clusterer( - xmeans, improveParamsKmeansIterations, improveStructureClusterSeeds, improveStructureKmeansIterations); + xmeans, improveParamsKmeansIterations, + improveStructureClusterSeeds, improveStructureKmeansIterations); TVector2VecVec actual; { @@ -146,7 +160,8 @@ void CBootstrapClustererTest::testFacade() { maths::CSampling::seed(); xmeans.setPoints(points); - xmeans.run(improveParamsKmeansIterations, improveStructureClusterSeeds, improveStructureKmeansIterations); + xmeans.run(improveParamsKmeansIterations, improveStructureClusterSeeds, + improveStructureKmeansIterations); TVector2VecVec expected(xmeans.clusters().size()); for (std::size_t i = 0u; i < xmeans.clusters().size(); ++i) { @@ -156,7 +171,8 @@ void CBootstrapClustererTest::testFacade() { CPPUNIT_ASSERT_EQUAL(expected.size(), actual.size()); for (std::size_t i = 0u; i < expected.size(); ++i) { - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expected[i]), core::CContainerPrinter::print(actual[i])); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expected[i]), + core::CContainerPrinter::print(actual[i])); } } } @@ -171,10 +187,11 @@ void CBootstrapClustererTest::testBuildClusterGraph() { // thresholds. const std::size_t _ = 15; - std::size_t clusters_[][5][5] = {{{0, 1, 2, 3, 4}, {5, 6, 7, 8, 9}, {10, 11, 12, 13, 14}, {_, _, _, _, _}, {_, _, _, _, _}}, - {{0, 1, _, 3, 4}, {5, 6, _, _, _}, {10, 11, 12, 13, 14}, {2, 7, 8, 9, _}, {_, _, _, _, _}}, - {{0, 1, 2, 3, _}, {5, 6, 7, 8, 9}, {_, _, 12, 13, 14}, {4, _, _, _, _}, {10, 11, _, _, _}}, - {{_, _, 2, 3, 4}, {_, _, _, 8, 9}, {10, 11, 12, 13, 14}, {0, 1, 5, 6, 7}, {_, _, _, _, _}}}; + std::size_t clusters_[][5][5] = { + {{0, 1, 2, 3, 4}, {5, 6, 7, 8, 9}, {10, 11, 12, 13, 14}, {_, _, _, _, _}, {_, _, _, _, _}}, + {{0, 1, _, 3, 4}, {5, 6, _, _, _}, {10, 11, 12, 13, 14}, {2, 7, 8, 9, _}, {_, _, _, _, _}}, + {{0, 1, 2, 3, _}, {5, 6, 7, 8, 9}, {_, _, 12, 13, 14}, {4, _, _, _, _}, {10, 11, _, _, _}}, + {{_, _, 2, 3, 4}, {_, _, _, 8, 9}, {10, 11, 12, 13, 14}, {0, 1, 5, 6, 7}, {_, _, _, _, _}}}; TBootstrapClustererForTest2::TSizeVecVecVec clusters(boost::size(clusters_)); for (std::size_t i = 0u; i < boost::size(clusters_); ++i) { for (std::size_t j = 0u; j < boost::size(clusters_[i]); ++j) { @@ -308,9 +325,12 @@ void CBootstrapClustererTest::testCutSearch() { TBoolVec parities; clusterer.cutSearch(0, 1, graph, 0.0, cost, parities); - LOG_DEBUG(<< "cost = " << cost << ", parities = " << core::CContainerPrinter::print(parities)); + LOG_DEBUG(<< "cost = " << cost + << ", parities = " << core::CContainerPrinter::print(parities)); - double sparsestCut = static_cast(connections[t]) / static_cast(20 - splits[t]) / static_cast(splits[t]); + double sparsestCut = static_cast(connections[t]) / + static_cast(20 - splits[t]) / + static_cast(splits[t]); LOG_DEBUG(<< "sparsest = " << sparsestCut); quality.add(cost - sparsestCut); @@ -360,8 +380,10 @@ void CBootstrapClustererTest::testSeparate() { std::size_t e = boost::num_edges(graph); - LOG_DEBUG(<< "split = " << splits1[t] << ":" << splits2[t] << ":" << v - splits2[t]); - LOG_DEBUG(<< "# connections = " << connections[2 * t] << " " << connections[2 * t + 1]); + LOG_DEBUG(<< "split = " << splits1[t] << ":" << splits2[t] << ":" + << v - splits2[t]); + LOG_DEBUG(<< "# connections = " << connections[2 * t] << " " + << connections[2 * t + 1]); TBootstrapClustererForTest2 clusterer(0.3, 3.0); @@ -383,11 +405,14 @@ void CBootstrapClustererTest::testSeparate() { } LOG_DEBUG(<< "cost = " << cut / (a * b)) - double sparsestCut = - std::min(static_cast(connections[2 * t]) / static_cast(k[0]) / static_cast(v - k[0]), - static_cast(connections[2 * t + 1]) / static_cast(k[1]) / static_cast(v - k[1])); + double sparsestCut = std::min( + static_cast(connections[2 * t]) / + static_cast(k[0]) / static_cast(v - k[0]), + static_cast(connections[2 * t + 1]) / + static_cast(k[1]) / static_cast(v - k[1])); - double threshold = 0.1 * static_cast(2 * e) / static_cast(v * (v - 1)); + double threshold = 0.1 * static_cast(2 * e) / + static_cast(v * (v - 1)); LOG_DEBUG(<< "sparsest = " << sparsestCut << " need " << threshold << " to separate"); @@ -450,7 +475,8 @@ void CBootstrapClustererTest::testThickets() { rng.generateUniformSamples(k[0], k[1], connections[t], V); connect(U, V, graph); - LOG_DEBUG(<< "split = " << splits1[t] << ":" << splits2[t] << ":" << v - splits2[t]); + LOG_DEBUG(<< "split = " << splits1[t] << ":" << splits2[t] << ":" + << v - splits2[t]); LOG_DEBUG(<< "# connections = " << connections[t]); TSizeVec components(v); @@ -471,7 +497,8 @@ void CBootstrapClustererTest::testThickets() { for (std::size_t i = 0u; i < 3; ++i) { double jaccard = maths::CSetTools::jaccard( - expectedClusters[i].begin(), expectedClusters[i].end(), clusters[i].begin(), clusters[i].end()); + expectedClusters[i].begin(), expectedClusters[i].end(), + clusters[i].begin(), clusters[i].end()); CPPUNIT_ASSERT(jaccard > 0.8); meanJaccard.add(jaccard); } @@ -499,27 +526,40 @@ void CBootstrapClustererTest::testNonConvexClustering() { // has mean equal to half a sine wave which poses problems for // x-means. - double x[][2] = {{2.00000, 1.99667}, // Cluster 1 - {4.00000, 3.97339}, {6.00000, 5.91040}, {8.00000, 7.78837}, {10.00000, 9.58851}, {12.00000, 11.29285}, - {14.00000, 12.88435}, {16.00000, 14.34712}, {18.00000, 15.66654}, {20.00000, 16.82942}, {22.00000, 17.82415}, - {24.00000, 18.64078}, {26.00000, 19.27116}, {28.00000, 19.70899}, {30.00000, 19.94990}, {32.00000, 19.99147}, - {34.00000, 19.83330}, {36.00000, 19.47695}, {38.00000, 18.92600}, {40.00000, 18.18595}, {42.00000, 17.26419}, - {44.00000, 16.16993}, {46.00000, 14.91410}, {48.00000, 13.50926}, {50.00000, 11.96944}, {52.00000, 10.31003}, - {54.00000, 8.54760}, {56.00000, 6.69976}, {58.00000, 4.78499}, {60.00000, 2.82240}, {62.00000, 0.83161}, - {181.00000, 9.95004}, // Cluster 2 - {182.00000, 9.80067}, {183.00000, 9.55336}, {184.00000, 9.21061}, {185.00000, 8.77583}, {186.00000, 8.25336}, - {187.00000, 7.64842}, {188.00000, 6.96707}, {189.00000, 6.21610}, {190.00000, 5.40302}, {191.00000, 4.53596}, - {192.00000, 3.62358}, {193.00000, 2.67499}, {194.00000, 1.69967}, {195.00000, 0.70737}, {196.00000, -0.29200}, - {197.00000, -1.28844}, {198.00000, -2.27202}, {199.00000, -3.23290}, {200.00000, -4.16147}, {201.00000, -5.04846}, - {202.00000, -5.88501}, {203.00000, -6.66276}, {204.00000, -7.37394}, {205.00000, -8.01144}, {206.00000, -8.56889}, - {207.00000, -9.04072}, {208.00000, -9.42222}, {209.00000, -9.70958}, {210.00000, -9.89992}, {211.00000, -9.99135}, - {232.41593, -9.95004}, // Cluster 3 - {233.41593, -9.80067}, {234.41593, -9.55336}, {235.41593, -9.21061}, {236.41593, -8.77583}, {237.41593, -8.25336}, - {238.41593, -7.64842}, {239.41593, -6.96707}, {240.41593, -6.21610}, {241.41593, -5.40302}, {242.41593, -4.53596}, - {243.41593, -3.62358}, {244.41593, -2.67499}, {245.41593, -1.69967}, {246.41593, -0.70737}, {247.41593, 0.29200}, - {248.41593, 1.28844}, {249.41593, 2.27202}, {250.41593, 3.23290}, {251.41593, 4.16147}, {252.41593, 5.04846}, - {253.41593, 5.88501}, {254.41593, 6.66276}, {255.41593, 7.37394}, {256.41593, 8.01144}, {257.41593, 8.56889}, - {258.41593, 9.04072}, {259.41593, 9.42222}, {260.41593, 9.70958}, {261.41593, 9.89992}, {262.41593, 9.99135}}; + double x[][2] = { + {2.00000, 1.99667}, // Cluster 1 + {4.00000, 3.97339}, {6.00000, 5.91040}, {8.00000, 7.78837}, + {10.00000, 9.58851}, {12.00000, 11.29285}, {14.00000, 12.88435}, + {16.00000, 14.34712}, {18.00000, 15.66654}, {20.00000, 16.82942}, + {22.00000, 17.82415}, {24.00000, 18.64078}, {26.00000, 19.27116}, + {28.00000, 19.70899}, {30.00000, 19.94990}, {32.00000, 19.99147}, + {34.00000, 19.83330}, {36.00000, 19.47695}, {38.00000, 18.92600}, + {40.00000, 18.18595}, {42.00000, 17.26419}, {44.00000, 16.16993}, + {46.00000, 14.91410}, {48.00000, 13.50926}, {50.00000, 11.96944}, + {52.00000, 10.31003}, {54.00000, 8.54760}, {56.00000, 6.69976}, + {58.00000, 4.78499}, {60.00000, 2.82240}, {62.00000, 0.83161}, + {181.00000, 9.95004}, // Cluster 2 + {182.00000, 9.80067}, {183.00000, 9.55336}, {184.00000, 9.21061}, + {185.00000, 8.77583}, {186.00000, 8.25336}, {187.00000, 7.64842}, + {188.00000, 6.96707}, {189.00000, 6.21610}, {190.00000, 5.40302}, + {191.00000, 4.53596}, {192.00000, 3.62358}, {193.00000, 2.67499}, + {194.00000, 1.69967}, {195.00000, 0.70737}, {196.00000, -0.29200}, + {197.00000, -1.28844}, {198.00000, -2.27202}, {199.00000, -3.23290}, + {200.00000, -4.16147}, {201.00000, -5.04846}, {202.00000, -5.88501}, + {203.00000, -6.66276}, {204.00000, -7.37394}, {205.00000, -8.01144}, + {206.00000, -8.56889}, {207.00000, -9.04072}, {208.00000, -9.42222}, + {209.00000, -9.70958}, {210.00000, -9.89992}, {211.00000, -9.99135}, + {232.41593, -9.95004}, // Cluster 3 + {233.41593, -9.80067}, {234.41593, -9.55336}, {235.41593, -9.21061}, + {236.41593, -8.77583}, {237.41593, -8.25336}, {238.41593, -7.64842}, + {239.41593, -6.96707}, {240.41593, -6.21610}, {241.41593, -5.40302}, + {242.41593, -4.53596}, {243.41593, -3.62358}, {244.41593, -2.67499}, + {245.41593, -1.69967}, {246.41593, -0.70737}, {247.41593, 0.29200}, + {248.41593, 1.28844}, {249.41593, 2.27202}, {250.41593, 3.23290}, + {251.41593, 4.16147}, {252.41593, 5.04846}, {253.41593, 5.88501}, + {254.41593, 6.66276}, {255.41593, 7.37394}, {256.41593, 8.01144}, + {257.41593, 8.56889}, {258.41593, 9.04072}, {259.41593, 9.42222}, + {260.41593, 9.70958}, {261.41593, 9.89992}, {262.41593, 9.99135}}; std::size_t clusters[] = {0, 31, 62, boost::size(x)}; TSizeVecVec perfect(3); @@ -561,10 +601,8 @@ void CBootstrapClustererTest::testNonConvexClustering() { TVector2VecVec bootstrapClusters; maths::bootstrapCluster(flatPoints, 20, // trials - xmeans, - improveParamsKmeansIterations, - improveStructureClusterSeeds, - improveStructureKmeansIterations, + xmeans, improveParamsKmeansIterations, + improveStructureClusterSeeds, improveStructureKmeansIterations, 0.3, // overlap threshold to connect 3.0, // the degree of connection between overlapping clusters bootstrapClusters); @@ -583,18 +621,21 @@ void CBootstrapClustererTest::testNonConvexClustering() { for (std::size_t i = 0u; i < perfect.size(); ++i) { double jmax = 0.0; for (std::size_t j = 0u; j < bootstrap.size(); ++j) { - jmax = std::max(jmax, - maths::CSetTools::jaccard(bootstrap[j].begin(), bootstrap[j].end(), perfect[i].begin(), perfect[i].end())); + jmax = std::max(jmax, maths::CSetTools::jaccard( + bootstrap[j].begin(), bootstrap[j].end(), + perfect[i].begin(), perfect[i].end())); } jaccard.push_back(jmax); } - LOG_DEBUG(<< "# clusters bootstrap = " << bootstrap.size() << ", Jaccard bootstrap = " << core::CContainerPrinter::print(jaccard)); + LOG_DEBUG(<< "# clusters bootstrap = " << bootstrap.size() + << ", Jaccard bootstrap = " << core::CContainerPrinter::print(jaccard)); numberClustersBootstrap.add(static_cast(bootstrap.size())); jaccardBootstrapToPerfect.add(jaccard); TVector2Vec flatPoints_(flatPoints); xmeans.setPoints(flatPoints_); - xmeans.run(improveParamsKmeansIterations, improveStructureClusterSeeds, improveStructureKmeansIterations); + xmeans.run(improveParamsKmeansIterations, improveStructureClusterSeeds, + improveStructureKmeansIterations); vanilla.resize(xmeans.clusters().size()); for (std::size_t i = 0u; i < xmeans.clusters().size(); ++i) { @@ -610,24 +651,33 @@ void CBootstrapClustererTest::testNonConvexClustering() { for (std::size_t i = 0u; i < perfect.size(); ++i) { double jmax = 0.0; for (std::size_t j = 0u; j < vanilla.size(); ++j) { - jmax = - std::max(jmax, maths::CSetTools::jaccard(vanilla[j].begin(), vanilla[j].end(), perfect[i].begin(), perfect[i].end())); + jmax = std::max(jmax, maths::CSetTools::jaccard( + vanilla[j].begin(), vanilla[j].end(), + perfect[i].begin(), perfect[i].end())); } jaccard.push_back(jmax); } - LOG_DEBUG(<< "# clusters vanilla = " << vanilla.size() << ", Jaccard vanilla = " << core::CContainerPrinter::print(jaccard)); + LOG_DEBUG(<< "# clusters vanilla = " << vanilla.size() + << ", Jaccard vanilla = " << core::CContainerPrinter::print(jaccard)); numberClustersVanilla.add(static_cast(vanilla.size())); jaccardVanillaToPerfect.add(jaccard); } - LOG_DEBUG(<< "Jaccard bootstrap to perfect = " << maths::CBasicStatistics::mean(jaccardBootstrapToPerfect)); - LOG_DEBUG(<< "Jaccard vanilla to perfect = " << maths::CBasicStatistics::mean(jaccardVanillaToPerfect)); - LOG_DEBUG(<< "# clusters bootstrap = " << maths::CBasicStatistics::mean(numberClustersBootstrap)); - LOG_DEBUG(<< "# clusters vanilla = " << maths::CBasicStatistics::mean(numberClustersVanilla)); - - CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, maths::CBasicStatistics::mean(jaccardBootstrapToPerfect), 0.1); - CPPUNIT_ASSERT_DOUBLES_EQUAL(3.0, maths::CBasicStatistics::mean(numberClustersBootstrap), 0.6); - CPPUNIT_ASSERT(maths::CBasicStatistics::mean(jaccardBootstrapToPerfect) > maths::CBasicStatistics::mean(jaccardVanillaToPerfect)); + LOG_DEBUG(<< "Jaccard bootstrap to perfect = " + << maths::CBasicStatistics::mean(jaccardBootstrapToPerfect)); + LOG_DEBUG(<< "Jaccard vanilla to perfect = " + << maths::CBasicStatistics::mean(jaccardVanillaToPerfect)); + LOG_DEBUG(<< "# clusters bootstrap = " + << maths::CBasicStatistics::mean(numberClustersBootstrap)); + LOG_DEBUG(<< "# clusters vanilla = " + << maths::CBasicStatistics::mean(numberClustersVanilla)); + + CPPUNIT_ASSERT_DOUBLES_EQUAL( + 1.0, maths::CBasicStatistics::mean(jaccardBootstrapToPerfect), 0.1); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + 3.0, maths::CBasicStatistics::mean(numberClustersBootstrap), 0.6); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(jaccardBootstrapToPerfect) > + maths::CBasicStatistics::mean(jaccardVanillaToPerfect)); } void CBootstrapClustererTest::testClusteringStability() { @@ -682,7 +732,8 @@ void CBootstrapClustererTest::testClusteringStability() { rng.random_shuffle(points2.begin(), points2.end()); points.assign(points1.begin(), points1.begin() + (3 * points1.size()) / 4); - points.insert(points.end(), points2.begin(), points2.begin() + (3 * points2.size()) / 4); + points.insert(points.end(), points2.begin(), + points2.begin() + (3 * points2.size()) / 4); TVector2VecVec bootstrapClusters; maths::CXMeans> xmeans(20); @@ -714,8 +765,11 @@ void CBootstrapClustererTest::testClusteringStability() { double Jmax = 0.0; std::size_t cluster = 0; for (std::size_t j = 0u; j < perfect.size(); ++j) { - double J = maths::CSetTools::jaccard(bootstrap[i].begin(), bootstrap[i].end(), perfect[j].begin(), perfect[j].end()); - boost::tie(Jmax, cluster) = std::max(std::make_pair(Jmax, cluster), std::make_pair(J, j)); + double J = maths::CSetTools::jaccard( + bootstrap[i].begin(), bootstrap[i].end(), + perfect[j].begin(), perfect[j].end()); + boost::tie(Jmax, cluster) = std::max( + std::make_pair(Jmax, cluster), std::make_pair(J, j)); } for (std::size_t j = 0u; j < bootstrap[i].size(); ++j) { ++clusterCounts[cluster][bootstrap[i][j]]; @@ -744,20 +798,23 @@ void CBootstrapClustererTest::testClusteringStability() { CppUnit::Test* CBootstrapClustererTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CBootstrapClustererTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CBootstrapClustererTest::testFacade", &CBootstrapClustererTest::testFacade)); - suiteOfTests->addTest(new CppUnit::TestCaller("CBootstrapClustererTest::testBuildClusterGraph", - &CBootstrapClustererTest::testBuildClusterGraph)); - suiteOfTests->addTest(new CppUnit::TestCaller("CBootstrapClustererTest::testCutSearch", - &CBootstrapClustererTest::testCutSearch)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CBootstrapClustererTest::testSeparate", &CBootstrapClustererTest::testSeparate)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CBootstrapClustererTest::testThickets", &CBootstrapClustererTest::testThickets)); - suiteOfTests->addTest(new CppUnit::TestCaller("CBootstrapClustererTest::testNonConvexClustering", - &CBootstrapClustererTest::testNonConvexClustering)); - suiteOfTests->addTest(new CppUnit::TestCaller("CBootstrapClustererTest::testClusteringStability", - &CBootstrapClustererTest::testClusteringStability)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CBootstrapClustererTest::testFacade", &CBootstrapClustererTest::testFacade)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CBootstrapClustererTest::testBuildClusterGraph", + &CBootstrapClustererTest::testBuildClusterGraph)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CBootstrapClustererTest::testCutSearch", &CBootstrapClustererTest::testCutSearch)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CBootstrapClustererTest::testSeparate", &CBootstrapClustererTest::testSeparate)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CBootstrapClustererTest::testThickets", &CBootstrapClustererTest::testThickets)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CBootstrapClustererTest::testNonConvexClustering", + &CBootstrapClustererTest::testNonConvexClustering)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CBootstrapClustererTest::testClusteringStability", + &CBootstrapClustererTest::testClusteringStability)); return suiteOfTests; } diff --git a/lib/maths/unittest/CBoundingBoxTest.cc b/lib/maths/unittest/CBoundingBoxTest.cc index 5405a56fc6..9359bd984d 100644 --- a/lib/maths/unittest/CBoundingBoxTest.cc +++ b/lib/maths/unittest/CBoundingBoxTest.cc @@ -125,12 +125,14 @@ void CBoundingBoxTest::testCloserTo() { TVector2 y2(&probes[j + 2], &probes[j + 4]); bool closer = closerToX(bb, y1, y2); if (closer) { - LOG_DEBUG(<< "bb = " << bb.print() << " is closer to " << y1 << " than " << y2); + LOG_DEBUG(<< "bb = " << bb.print() << " is closer to " << y1 + << " than " << y2); } CPPUNIT_ASSERT_EQUAL(closer, bb.closerToX(y1, y2)); closer = closerToX(bb, y2, y1); if (closer) { - LOG_DEBUG(<< "bb = " << bb.print() << " is closer to " << y2 << " than " << y1); + LOG_DEBUG(<< "bb = " << bb.print() << " is closer to " << y2 + << " than " << y1); } CPPUNIT_ASSERT_EQUAL(closer, bb.closerToX(y2, y1)); } @@ -148,12 +150,14 @@ void CBoundingBoxTest::testCloserTo() { TVector4 y2(&probes[j + 4], &probes[j + 8]); bool closer = closerToX(bb, y1, y2); if (closer) { - LOG_DEBUG(<< "bb = " << bb.print() << " is closer to " << y1 << " than " << y2); + LOG_DEBUG(<< "bb = " << bb.print() << " is closer to " << y1 + << " than " << y2); } CPPUNIT_ASSERT_EQUAL(closer, bb.closerToX(y1, y2)); closer = closerToX(bb, y2, y1); if (closer) { - LOG_DEBUG(<< "bb = " << bb.print() << " is closer to " << y2 << " than " << y1); + LOG_DEBUG(<< "bb = " << bb.print() << " is closer to " << y2 + << " than " << y1); } CPPUNIT_ASSERT_EQUAL(closer, bb.closerToX(y2, y1)); } @@ -163,8 +167,10 @@ void CBoundingBoxTest::testCloserTo() { CppUnit::Test* CBoundingBoxTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CBoundingBoxTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CBoundingBoxTest::testAdd", &CBoundingBoxTest::testAdd)); - suiteOfTests->addTest(new CppUnit::TestCaller("CBoundingBoxTest::testCloserTo", &CBoundingBoxTest::testCloserTo)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CBoundingBoxTest::testAdd", &CBoundingBoxTest::testAdd)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CBoundingBoxTest::testCloserTo", &CBoundingBoxTest::testCloserTo)); return suiteOfTests; } diff --git a/lib/maths/unittest/CCalendarComponentAdaptiveBucketingTest.cc b/lib/maths/unittest/CCalendarComponentAdaptiveBucketingTest.cc index c79aa15894..fabbe9cfe5 100644 --- a/lib/maths/unittest/CCalendarComponentAdaptiveBucketingTest.cc +++ b/lib/maths/unittest/CCalendarComponentAdaptiveBucketingTest.cc @@ -49,8 +49,10 @@ void CCalendarComponentAdaptiveBucketingTest::testInitialize() { CPPUNIT_ASSERT(!bucketing.initialize(0)); - const std::string expectedEndpoints{"[0, 7200, 14400, 21600, 28800, 36000, 43200, 50400, 57600, 64800, 72000, 79200, 86400]"}; - const std::string expectedKnots{"[0, 3600, 10800, 18000, 25200, 32400, 39600, 46800, 54000, 61200, 68400, 75600, 82800, 86400]"}; + const std::string expectedEndpoints{ + "[0, 7200, 14400, 21600, 28800, 36000, 43200, 50400, 57600, 64800, 72000, 79200, 86400]"}; + const std::string expectedKnots{ + "[0, 3600, 10800, 18000, 25200, 32400, 39600, 46800, 54000, 61200, 68400, 75600, 82800, 86400]"}; const std::string expectedValues{ "[129600, 90000, 97200, 104400, 111600, 118800, 126000, 133200, 140400, 147600, 154800, 162000, 169200, 129600]"}; @@ -90,7 +92,8 @@ void CCalendarComponentAdaptiveBucketingTest::testSwap() { for (std::size_t i = 0u; i < 100; ++i) { core_t::TTime t{start + static_cast(864 * i)}; if (bucketing1.feature().inWindow(t)) { - double y{0.02 * (static_cast(i) - 50.0) * (static_cast(i) - 50.0)}; + double y{0.02 * (static_cast(i) - 50.0) * + (static_cast(i) - 50.0)}; bucketing1.add(t, y + noise[i]); } } @@ -98,7 +101,8 @@ void CCalendarComponentAdaptiveBucketingTest::testSwap() { bucketing1.propagateForwardsByTime(1.0); } - maths::CCalendarFeature feature2{maths::CCalendarFeature::DAYS_BEFORE_END_OF_MONTH, now - core::constants::WEEK}; + maths::CCalendarFeature feature2{maths::CCalendarFeature::DAYS_BEFORE_END_OF_MONTH, + now - core::constants::WEEK}; maths::CCalendarComponentAdaptiveBucketing bucketing2{feature2, 0.1}; uint64_t checksum1{bucketing1.checksum()}; @@ -120,8 +124,10 @@ void CCalendarComponentAdaptiveBucketingTest::testRefine() { // Test that refine reduces the function approximation error. - core_t::TTime times[] = {-1, 3600, 10800, 18000, 25200, 32400, 39600, 46800, 54000, 61200, 68400, 75600, 82800, 86400}; - double function[] = {10, 10, 10, 10, 100, 90, 80, 90, 100, 20, 10, 10, 10, 10}; + core_t::TTime times[] = {-1, 3600, 10800, 18000, 25200, 32400, 39600, + 46800, 54000, 61200, 68400, 75600, 82800, 86400}; + double function[] = {10, 10, 10, 10, 100, 90, 80, + 90, 100, 20, 10, 10, 10, 10}; maths::CCalendarFeature feature{maths::CCalendarFeature::DAYS_SINCE_START_OF_MONTH, 0}; maths::CCalendarComponentAdaptiveBucketing bucketing1{feature}; @@ -137,7 +143,8 @@ void CCalendarComponentAdaptiveBucketingTest::testRefine() { bool inWindow{bucketing1.feature().inWindow(t)}; if (inWindow) { core_t::TTime x{bucketing1.feature().offset(t)}; - ptrdiff_t i{std::lower_bound(boost::begin(times), boost::end(times), x) - boost::begin(times)}; + ptrdiff_t i{std::lower_bound(boost::begin(times), boost::end(times), x) - + boost::begin(times)}; double x0{static_cast(times[i - 1])}; double x1{static_cast(times[i])}; double y0{function[i - 1]}; @@ -158,8 +165,10 @@ void CCalendarComponentAdaptiveBucketingTest::testRefine() { const TFloatVec& endpoints1{bucketing1.endpoints()}; TDoubleVec values1{bucketing1.values(20 * 86400)}; for (std::size_t i = 1; i < endpoints1.size(); ++i) { - core_t::TTime t{static_cast(0.5 * (endpoints1[i] + endpoints1[i - 1] + 1.0))}; - ptrdiff_t j{std::lower_bound(boost::begin(times), boost::end(times), t) - boost::begin(times)}; + core_t::TTime t{static_cast( + 0.5 * (endpoints1[i] + endpoints1[i - 1] + 1.0))}; + ptrdiff_t j{std::lower_bound(boost::begin(times), boost::end(times), t) - + boost::begin(times)}; double x0{static_cast(times[j - 1])}; double x1{static_cast(times[j])}; double y0{function[j - 1]}; @@ -174,8 +183,10 @@ void CCalendarComponentAdaptiveBucketingTest::testRefine() { const TFloatVec& endpoints2{bucketing2.endpoints()}; TDoubleVec values2{bucketing2.values(20 * 86400)}; for (std::size_t i = 1; i < endpoints1.size(); ++i) { - core_t::TTime t{static_cast(0.5 * (endpoints2[i] + endpoints2[i - 1] + 1.0))}; - ptrdiff_t j{std::lower_bound(boost::begin(times), boost::end(times), t) - boost::begin(times)}; + core_t::TTime t{static_cast( + 0.5 * (endpoints2[i] + endpoints2[i - 1] + 1.0))}; + ptrdiff_t j{std::lower_bound(boost::begin(times), boost::end(times), t) - + boost::begin(times)}; double x0{static_cast(times[j - 1])}; double x1{static_cast(times[j])}; double y0{function[j - 1]}; @@ -189,7 +200,8 @@ void CCalendarComponentAdaptiveBucketingTest::testRefine() { LOG_DEBUG(<< "max error = " << maxError1[0]); LOG_DEBUG(<< "refined mean error = " << maths::CBasicStatistics::mean(meanError2)); LOG_DEBUG(<< "refined max error = " << maxError2[0]); - CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError2) < 0.7 * maths::CBasicStatistics::mean(meanError1)); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError2) < + 0.7 * maths::CBasicStatistics::mean(meanError1)); CPPUNIT_ASSERT(maxError2[0] < 0.65 * maxError1[0]); } @@ -207,7 +219,8 @@ void CCalendarComponentAdaptiveBucketingTest::testPropagateForwardsByTime() { bucketing.initialize(10); for (core_t::TTime t = 0; t < 86400; t += 1800) { - double y = 10.0 * (static_cast(t) - 43200.0) / 43200.0 * (static_cast(t) - 43200.0) / 43200.0; + double y = 10.0 * (static_cast(t) - 43200.0) / 43200.0 * + (static_cast(t) - 43200.0) / 43200.0; bucketing.add(t, y); } bucketing.refine(86400); @@ -217,7 +230,8 @@ void CCalendarComponentAdaptiveBucketingTest::testPropagateForwardsByTime() { for (std::size_t i = 0u; i < 20; ++i) { bucketing.propagateForwardsByTime(1.0); double count = bucketing.count(); - LOG_DEBUG(<< "count = " << count << ", lastCount = " << lastCount << " count/lastCount = " << count / lastCount); + LOG_DEBUG(<< "count = " << count << ", lastCount = " << lastCount + << " count/lastCount = " << count / lastCount); CPPUNIT_ASSERT(count < lastCount); CPPUNIT_ASSERT_DOUBLES_EQUAL(0.81873, count / lastCount, 5e-6); lastCount = count; @@ -229,8 +243,9 @@ void CCalendarComponentAdaptiveBucketingTest::testMinimumBucketLength() { LOG_DEBUG(<< "| CCalendarComponentAdaptiveBucketingTest::testMinimumBucketLength |"); LOG_DEBUG(<< "+--------------------------------------------------------------------+"); - double function[] = {0.0, 0.0, 10.0, 12.0, 11.0, 16.0, 15.0, 1.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}; + double function[] = {0.0, 0.0, 10.0, 12.0, 11.0, 16.0, 15.0, 1.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}; std::size_t n{boost::size(function)}; test::CRandomNumbers rng; @@ -264,7 +279,8 @@ void CCalendarComponentAdaptiveBucketingTest::testMinimumBucketLength() { for (std::size_t j = 1u; j < endpoints1.size(); ++j) { minimumBucketLength1.add(endpoints1[j] - endpoints1[j - 1]); minimumBucketLength2.add(endpoints2[j] - endpoints2[j - 1]); - double minimumShift{std::max(1500.0 - (endpoints1[j] - endpoints1[j - 1]), 0.0) / 2.0}; + double minimumShift{ + std::max(1500.0 - (endpoints1[j] - endpoints1[j - 1]), 0.0) / 2.0}; minimumTotalError += minimumShift; } LOG_DEBUG(<< "minimumBucketLength1 = " << minimumBucketLength1); @@ -350,7 +366,8 @@ void CCalendarComponentAdaptiveBucketingTest::testKnots() { bucketing.initialize(24); for (core_t::TTime t = 0; t < 86400; t += 600) { - double y{0.0002 * (static_cast(t) - 43800.0) * (static_cast(t) - 43800.0)}; + double y{0.0002 * (static_cast(t) - 43800.0) * + (static_cast(t) - 43800.0)}; TDoubleVec noise; rng.generateNormalSamples(0.0, 4.0, 1, noise); bucketing.add(t, y + noise[0]); @@ -375,7 +392,9 @@ void CCalendarComponentAdaptiveBucketingTest::testKnots() { } LOG_DEBUG(<< "meanError = " << maths::CBasicStatistics::mean(meanError)); LOG_DEBUG(<< "meanValue = " << maths::CBasicStatistics::mean(meanValue)); - CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError) / maths::CBasicStatistics::mean(meanValue) < 0.02); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError) / + maths::CBasicStatistics::mean(meanValue) < + 0.02); } LOG_DEBUG(<< "*** Variances ***"); @@ -409,15 +428,19 @@ void CCalendarComponentAdaptiveBucketingTest::testKnots() { TMeanAccumulator meanError; TMeanAccumulator meanVariance; for (std::size_t i = 0u; i < knots.size(); ++i) { - double expectedVariance{0.001 * (static_cast(knots[i]) - 43800.0) * (static_cast(knots[i]) - 43800.0) / 86400}; - LOG_DEBUG(<< "expected = " << expectedVariance << ", variance = " << variances[i]); + double expectedVariance{0.001 * (static_cast(knots[i]) - 43800.0) * + (static_cast(knots[i]) - 43800.0) / 86400}; + LOG_DEBUG(<< "expected = " << expectedVariance + << ", variance = " << variances[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedVariance, variances[i], 5.0); meanError.add(std::fabs(variances[i] - expectedVariance)); meanVariance.add(std::fabs(expectedVariance)); } LOG_DEBUG(<< "meanError = " << maths::CBasicStatistics::mean(meanError)); LOG_DEBUG(<< "meanVariance = " << maths::CBasicStatistics::mean(meanVariance)); - CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError) / maths::CBasicStatistics::mean(meanVariance) < 0.16); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError) / + maths::CBasicStatistics::mean(meanVariance) < + 0.16); } } @@ -439,7 +462,8 @@ void CCalendarComponentAdaptiveBucketingTest::testPersist() { for (std::size_t i = 0u; i < 100; ++i) { core_t::TTime t{static_cast(p * 86400 + 864 * i)}; if (bucketing.feature().inWindow(t)) { - double y{0.02 * (static_cast(i) - 50.0) * (static_cast(i) - 50.0)}; + double y{0.02 * (static_cast(i) - 50.0) * + (static_cast(i) - 50.0)}; bucketing.add(t, y); } } @@ -462,9 +486,11 @@ void CCalendarComponentAdaptiveBucketingTest::testPersist() { core::CRapidXmlStateRestoreTraverser traverser(parser); // Restore the XML into a new bucketing. - maths::CCalendarComponentAdaptiveBucketing restoredBucketing{decayRate + 0.1, minimumBucketLength, traverser}; + maths::CCalendarComponentAdaptiveBucketing restoredBucketing{ + decayRate + 0.1, minimumBucketLength, traverser}; - LOG_DEBUG(<< "orig checksum = " << checksum << " restored checksum = " << restoredBucketing.checksum()); + LOG_DEBUG(<< "orig checksum = " << checksum + << " restored checksum = " << restoredBucketing.checksum()); CPPUNIT_ASSERT_EQUAL(checksum, restoredBucketing.checksum()); // The XML representation of the new bucketing should be the @@ -479,14 +505,18 @@ void CCalendarComponentAdaptiveBucketingTest::testPersist() { } CppUnit::Test* CCalendarComponentAdaptiveBucketingTest::suite() { - CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CCalendarComponentAdaptiveBucketingTest"); + CppUnit::TestSuite* suiteOfTests = + new CppUnit::TestSuite("CCalendarComponentAdaptiveBucketingTest"); suiteOfTests->addTest(new CppUnit::TestCaller( - "CCalendarComponentAdaptiveBucketingTest::testInitialize", &CCalendarComponentAdaptiveBucketingTest::testInitialize)); + "CCalendarComponentAdaptiveBucketingTest::testInitialize", + &CCalendarComponentAdaptiveBucketingTest::testInitialize)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CCalendarComponentAdaptiveBucketingTest::testSwap", &CCalendarComponentAdaptiveBucketingTest::testSwap)); + "CCalendarComponentAdaptiveBucketingTest::testSwap", + &CCalendarComponentAdaptiveBucketingTest::testSwap)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CCalendarComponentAdaptiveBucketingTest::testRefine", &CCalendarComponentAdaptiveBucketingTest::testRefine)); + "CCalendarComponentAdaptiveBucketingTest::testRefine", + &CCalendarComponentAdaptiveBucketingTest::testRefine)); suiteOfTests->addTest(new CppUnit::TestCaller( "CCalendarComponentAdaptiveBucketingTest::testPropagateForwardsByTime", &CCalendarComponentAdaptiveBucketingTest::testPropagateForwardsByTime)); @@ -494,11 +524,14 @@ CppUnit::Test* CCalendarComponentAdaptiveBucketingTest::suite() { "CCalendarComponentAdaptiveBucketingTest::testMinimumBucketLength", &CCalendarComponentAdaptiveBucketingTest::testMinimumBucketLength)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CCalendarComponentAdaptiveBucketingTest::testUnintialized", &CCalendarComponentAdaptiveBucketingTest::testUnintialized)); + "CCalendarComponentAdaptiveBucketingTest::testUnintialized", + &CCalendarComponentAdaptiveBucketingTest::testUnintialized)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CCalendarComponentAdaptiveBucketingTest::testKnots", &CCalendarComponentAdaptiveBucketingTest::testKnots)); + "CCalendarComponentAdaptiveBucketingTest::testKnots", + &CCalendarComponentAdaptiveBucketingTest::testKnots)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CCalendarComponentAdaptiveBucketingTest::testPersist", &CCalendarComponentAdaptiveBucketingTest::testPersist)); + "CCalendarComponentAdaptiveBucketingTest::testPersist", + &CCalendarComponentAdaptiveBucketingTest::testPersist)); return suiteOfTests; } diff --git a/lib/maths/unittest/CCalendarFeatureTest.cc b/lib/maths/unittest/CCalendarFeatureTest.cc index c22995be34..a0908c44ef 100644 --- a/lib/maths/unittest/CCalendarFeatureTest.cc +++ b/lib/maths/unittest/CCalendarFeatureTest.cc @@ -56,12 +56,17 @@ void CCalendarFeatureTest::testInitialize() { core_t::TTime time{static_cast(times[i])}; maths::CCalendarFeature::TCalendarFeature4Ary expected; - expected[0] = maths::CCalendarFeature(maths::CCalendarFeature::DAYS_SINCE_START_OF_MONTH, time); - expected[1] = maths::CCalendarFeature(maths::CCalendarFeature::DAYS_BEFORE_END_OF_MONTH, time); - expected[2] = maths::CCalendarFeature(maths::CCalendarFeature::DAY_OF_WEEK_AND_WEEKS_SINCE_START_OF_MONTH, time); - expected[3] = maths::CCalendarFeature(maths::CCalendarFeature::DAY_OF_WEEK_AND_WEEKS_BEFORE_END_OF_MONTH, time); - - maths::CCalendarFeature::TCalendarFeature4Ary actual = maths::CCalendarFeature::features(time); + expected[0] = maths::CCalendarFeature( + maths::CCalendarFeature::DAYS_SINCE_START_OF_MONTH, time); + expected[1] = maths::CCalendarFeature( + maths::CCalendarFeature::DAYS_BEFORE_END_OF_MONTH, time); + expected[2] = maths::CCalendarFeature( + maths::CCalendarFeature::DAY_OF_WEEK_AND_WEEKS_SINCE_START_OF_MONTH, time); + expected[3] = maths::CCalendarFeature( + maths::CCalendarFeature::DAY_OF_WEEK_AND_WEEKS_BEFORE_END_OF_MONTH, time); + + maths::CCalendarFeature::TCalendarFeature4Ary actual = + maths::CCalendarFeature::features(time); CPPUNIT_ASSERT(expected == actual); } @@ -87,7 +92,8 @@ void CCalendarFeatureTest::testComparison() { for (std::size_t i = 0u; i < times.size(); ++i) { core_t::TTime time{static_cast(times[i])}; - maths::CCalendarFeature::TCalendarFeature4Ary fi = maths::CCalendarFeature::features(time); + maths::CCalendarFeature::TCalendarFeature4Ary fi = + maths::CCalendarFeature::features(time); features.insert(features.end(), fi.begin(), fi.end()); } @@ -127,24 +133,28 @@ void CCalendarFeatureTest::testOffset() { for (const auto& time_ : times) { core_t::TTime time{start + static_cast(time_)}; - maths::CCalendarFeature::TCalendarFeature4Ary features = maths::CCalendarFeature::features(time); + maths::CCalendarFeature::TCalendarFeature4Ary features = + maths::CCalendarFeature::features(time); int dummy; int month; core::CTimezone::instance().dateFields(time, dummy, dummy, dummy, month, dummy, dummy); - TTimeVec offsets{-86400, -43400, -12800, -3600, 0, 3600, 12800, 43400, 86400}; + TTimeVec offsets{-86400, -43400, -12800, -3600, 0, + 3600, 12800, 43400, 86400}; for (const auto& offset : offsets) { core_t::TTime offsetTime = time + offset; int offsetMonth; - core::CTimezone::instance().dateFields(offsetTime, dummy, dummy, dummy, offsetMonth, dummy, dummy); + core::CTimezone::instance().dateFields(offsetTime, dummy, dummy, dummy, + offsetMonth, dummy, dummy); if (month == offsetMonth) { for (const auto& feature : features) { - CPPUNIT_ASSERT(feature.offset(time) + offset == feature.offset(offsetTime) || - feature.offset(time) + offset == feature.offset(offsetTime) - 3600 || - feature.offset(time) + offset == feature.offset(offsetTime) + 3600); + CPPUNIT_ASSERT( + feature.offset(time) + offset == feature.offset(offsetTime) || + feature.offset(time) + offset == feature.offset(offsetTime) - 3600 || + feature.offset(time) + offset == feature.offset(offsetTime) + 3600); ++tests; } } @@ -163,68 +173,83 @@ void CCalendarFeatureTest::testOffset() { LOG_DEBUG(<< "Test days since start of month"); { - maths::CCalendarFeature feature(maths::CCalendarFeature::DAYS_SINCE_START_OF_MONTH, feb1st); + maths::CCalendarFeature feature( + maths::CCalendarFeature::DAYS_SINCE_START_OF_MONTH, feb1st); for (core_t::TTime time = march1st; time < april1st; time += DAY) { CPPUNIT_ASSERT_EQUAL(time - march1st, feature.offset(time)); CPPUNIT_ASSERT_EQUAL(time - march1st + 4800, feature.offset(time + 4800)); } } { - maths::CCalendarFeature feature(maths::CCalendarFeature::DAYS_SINCE_START_OF_MONTH, feb1st + 12 * DAY); + maths::CCalendarFeature feature( + maths::CCalendarFeature::DAYS_SINCE_START_OF_MONTH, feb1st + 12 * DAY); for (core_t::TTime time = march1st; time < april1st; time += DAY) { CPPUNIT_ASSERT_EQUAL(time - march1st - 12 * DAY, feature.offset(time)); - CPPUNIT_ASSERT_EQUAL(time - march1st - 12 * DAY + 43400, feature.offset(time + 43400)); + CPPUNIT_ASSERT_EQUAL(time - march1st - 12 * DAY + 43400, + feature.offset(time + 43400)); } } LOG_DEBUG(<< "Test days before end of month") { - maths::CCalendarFeature feature(maths::CCalendarFeature::DAYS_BEFORE_END_OF_MONTH, feb1st); + maths::CCalendarFeature feature( + maths::CCalendarFeature::DAYS_BEFORE_END_OF_MONTH, feb1st); for (core_t::TTime time = march1st; time < april1st; time += DAY) { CPPUNIT_ASSERT_EQUAL(time - march1st - 3 * DAY, feature.offset(time)); - CPPUNIT_ASSERT_EQUAL(time - march1st - 3 * DAY + 7200, feature.offset(time + 7200)); + CPPUNIT_ASSERT_EQUAL(time - march1st - 3 * DAY + 7200, + feature.offset(time + 7200)); } } { - maths::CCalendarFeature feature(maths::CCalendarFeature::DAYS_BEFORE_END_OF_MONTH, feb1st + 10 * DAY); + maths::CCalendarFeature feature( + maths::CCalendarFeature::DAYS_BEFORE_END_OF_MONTH, feb1st + 10 * DAY); for (core_t::TTime time = march1st; time < april1st; time += DAY) { CPPUNIT_ASSERT_EQUAL(time - march1st - 13 * DAY, feature.offset(time)); - CPPUNIT_ASSERT_EQUAL(time - march1st - 13 * DAY + 86399, feature.offset(time + 86399)); + CPPUNIT_ASSERT_EQUAL(time - march1st - 13 * DAY + 86399, + feature.offset(time + 86399)); } } LOG_DEBUG(<< "Test day of week and week of month"); { // Feb 1 1970 is a Sunday and April 1st 1970 is a Wednesday. - maths::CCalendarFeature feature(maths::CCalendarFeature::DAY_OF_WEEK_AND_WEEKS_SINCE_START_OF_MONTH, feb1st); + maths::CCalendarFeature feature( + maths::CCalendarFeature::DAY_OF_WEEK_AND_WEEKS_SINCE_START_OF_MONTH, feb1st); for (core_t::TTime time = april1st; time < may1st; time += DAY) { CPPUNIT_ASSERT_EQUAL(time - april1st - 4 * DAY, feature.offset(time)); - CPPUNIT_ASSERT_EQUAL(time - april1st - 4 * DAY + 7200, feature.offset(time + 7200)); + CPPUNIT_ASSERT_EQUAL(time - april1st - 4 * DAY + 7200, + feature.offset(time + 7200)); } } { // Feb 13 1970 is a Friday and April 1st 1970 is a Wednesday. - maths::CCalendarFeature feature(maths::CCalendarFeature::DAY_OF_WEEK_AND_WEEKS_SINCE_START_OF_MONTH, feb1st + 12 * DAY); + maths::CCalendarFeature feature(maths::CCalendarFeature::DAY_OF_WEEK_AND_WEEKS_SINCE_START_OF_MONTH, + feb1st + 12 * DAY); for (core_t::TTime time = april1st; time < may1st; time += DAY) { CPPUNIT_ASSERT_EQUAL(time - april1st - 9 * DAY, feature.offset(time)); - CPPUNIT_ASSERT_EQUAL(time - april1st - 9 * DAY + 73000, feature.offset(time + 73000)); + CPPUNIT_ASSERT_EQUAL(time - april1st - 9 * DAY + 73000, + feature.offset(time + 73000)); } } LOG_DEBUG(<< "Test day of week and week until end of month"); { // Feb 1 1970 is a Sunday and April 31st 1970 is a Friday. - maths::CCalendarFeature feature(maths::CCalendarFeature::DAY_OF_WEEK_AND_WEEKS_BEFORE_END_OF_MONTH, feb1st); + maths::CCalendarFeature feature( + maths::CCalendarFeature::DAY_OF_WEEK_AND_WEEKS_BEFORE_END_OF_MONTH, feb1st); for (core_t::TTime time = april1st; time < may1st; time += DAY) { CPPUNIT_ASSERT_EQUAL(time - april1st - 4 * DAY, feature.offset(time)); - CPPUNIT_ASSERT_EQUAL(time - april1st - 4 * DAY + 7200, feature.offset(time + 7200)); + CPPUNIT_ASSERT_EQUAL(time - april1st - 4 * DAY + 7200, + feature.offset(time + 7200)); } } { // Feb 13 1970 is a Friday and April 1st 1970 is a Wednesday. - maths::CCalendarFeature feature(maths::CCalendarFeature::DAY_OF_WEEK_AND_WEEKS_SINCE_START_OF_MONTH, feb1st + 12 * DAY); + maths::CCalendarFeature feature(maths::CCalendarFeature::DAY_OF_WEEK_AND_WEEKS_SINCE_START_OF_MONTH, + feb1st + 12 * DAY); for (core_t::TTime time = april1st; time < may1st; time += DAY) { CPPUNIT_ASSERT_EQUAL(time - april1st - 9 * DAY, feature.offset(time)); - CPPUNIT_ASSERT_EQUAL(time - april1st - 9 * DAY + 73000, feature.offset(time + 73000)); + CPPUNIT_ASSERT_EQUAL(time - april1st - 9 * DAY + 73000, + feature.offset(time + 73000)); } } } @@ -234,7 +259,8 @@ void CCalendarFeatureTest::testPersist() { LOG_DEBUG(<< "| CCalendarFeatureTest::testPersist |"); LOG_DEBUG(<< "+-------------------------------------+"); - maths::CCalendarFeature::TCalendarFeature4Ary features = maths::CCalendarFeature::features(core::CTimeUtils::now()); + maths::CCalendarFeature::TCalendarFeature4Ary features = + maths::CCalendarFeature::features(core::CTimeUtils::now()); for (std::size_t i = 0u; i < 4; ++i) { std::string state = features[i].toDelimited(); @@ -251,14 +277,14 @@ void CCalendarFeatureTest::testPersist() { CppUnit::Test* CCalendarFeatureTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CCalendarFeatureTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CCalendarFeatureTest::testInitialize", &CCalendarFeatureTest::testInitialize)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CCalendarFeatureTest::testComparison", &CCalendarFeatureTest::testComparison)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CCalendarFeatureTest::testOffset", &CCalendarFeatureTest::testOffset)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CCalendarFeatureTest::testPersist", &CCalendarFeatureTest::testPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCalendarFeatureTest::testInitialize", &CCalendarFeatureTest::testInitialize)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCalendarFeatureTest::testComparison", &CCalendarFeatureTest::testComparison)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCalendarFeatureTest::testOffset", &CCalendarFeatureTest::testOffset)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCalendarFeatureTest::testPersist", &CCalendarFeatureTest::testPersist)); return suiteOfTests; } diff --git a/lib/maths/unittest/CCategoricalToolsTest.cc b/lib/maths/unittest/CCategoricalToolsTest.cc index 9b4894eba1..99070e4db8 100644 --- a/lib/maths/unittest/CCategoricalToolsTest.cc +++ b/lib/maths/unittest/CCategoricalToolsTest.cc @@ -55,28 +55,32 @@ void CCategoricalToolsTest::testExpectedDistinctCategories() { TMeanVarAccumulator expectedDistinctCategories; for (std::size_t i = 0u; i < nTrials; ++i) { TDoubleVec samples; - rng.generateMultinomialSamples(TDoubleVec(boost::begin(categories), boost::end(categories)), - TDoubleVec(boost::begin(probabilities), boost::end(probabilities)), - boost::size(probabilities), - samples); + rng.generateMultinomialSamples( + TDoubleVec(boost::begin(categories), boost::end(categories)), + TDoubleVec(boost::begin(probabilities), boost::end(probabilities)), + boost::size(probabilities), samples); std::sort(samples.begin(), samples.end()); - samples.erase(std::unique(samples.begin(), samples.end()), samples.end()); + samples.erase(std::unique(samples.begin(), samples.end()), + samples.end()); expectedDistinctCategories.add(static_cast(samples.size())); } LOG_DEBUG(<< "probabilities = " << core::CContainerPrinter::print(probabilities)); - LOG_DEBUG(<< "expectedDistinctCategories = " << maths::CBasicStatistics::mean(expectedDistinctCategories) << " (deviation = " - << std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / static_cast(nTrials)) << ")"); + LOG_DEBUG(<< "expectedDistinctCategories = " + << maths::CBasicStatistics::mean(expectedDistinctCategories) << " (deviation = " + << std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / + static_cast(nTrials)) + << ")"); double distinctCategories; - maths::CCategoricalTools::expectedDistinctCategories(TDoubleVec(boost::begin(probabilities), boost::end(probabilities)), - static_cast(boost::size(probabilities)), - distinctCategories); + maths::CCategoricalTools::expectedDistinctCategories( + TDoubleVec(boost::begin(probabilities), boost::end(probabilities)), + static_cast(boost::size(probabilities)), distinctCategories); LOG_DEBUG(<< "distinctCategories = " << distinctCategories); CPPUNIT_ASSERT_DOUBLES_EQUAL( - maths::CBasicStatistics::mean(expectedDistinctCategories), - distinctCategories, - 2.0 * std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / static_cast(nTrials))); + maths::CBasicStatistics::mean(expectedDistinctCategories), distinctCategories, + 2.0 * std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / + static_cast(nTrials))); } { @@ -85,28 +89,32 @@ void CCategoricalToolsTest::testExpectedDistinctCategories() { TMeanVarAccumulator expectedDistinctCategories; for (std::size_t i = 0u; i < nTrials; ++i) { TDoubleVec samples; - rng.generateMultinomialSamples(TDoubleVec(boost::begin(categories), boost::end(categories)), - TDoubleVec(boost::begin(probabilities), boost::end(probabilities)), - boost::size(probabilities), - samples); + rng.generateMultinomialSamples( + TDoubleVec(boost::begin(categories), boost::end(categories)), + TDoubleVec(boost::begin(probabilities), boost::end(probabilities)), + boost::size(probabilities), samples); std::sort(samples.begin(), samples.end()); - samples.erase(std::unique(samples.begin(), samples.end()), samples.end()); + samples.erase(std::unique(samples.begin(), samples.end()), + samples.end()); expectedDistinctCategories.add(static_cast(samples.size())); } LOG_DEBUG(<< "probabilities = " << core::CContainerPrinter::print(probabilities)); - LOG_DEBUG(<< "expectedDistinctCategories = " << maths::CBasicStatistics::mean(expectedDistinctCategories) << " (deviation = " - << std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / static_cast(nTrials)) << ")"); + LOG_DEBUG(<< "expectedDistinctCategories = " + << maths::CBasicStatistics::mean(expectedDistinctCategories) << " (deviation = " + << std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / + static_cast(nTrials)) + << ")"); double distinctCategories; - maths::CCategoricalTools::expectedDistinctCategories(TDoubleVec(boost::begin(probabilities), boost::end(probabilities)), - static_cast(boost::size(probabilities)), - distinctCategories); + maths::CCategoricalTools::expectedDistinctCategories( + TDoubleVec(boost::begin(probabilities), boost::end(probabilities)), + static_cast(boost::size(probabilities)), distinctCategories); LOG_DEBUG(<< "distinctCategories = " << distinctCategories); CPPUNIT_ASSERT_DOUBLES_EQUAL( - maths::CBasicStatistics::mean(expectedDistinctCategories), - distinctCategories, - 2.0 * std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / static_cast(nTrials))); + maths::CBasicStatistics::mean(expectedDistinctCategories), distinctCategories, + 2.0 * std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / + static_cast(nTrials))); } { double probabilities[] = {0.35, 0.1, 0.25, 0.25, 0.05}; @@ -114,120 +122,140 @@ void CCategoricalToolsTest::testExpectedDistinctCategories() { TMeanVarAccumulator expectedDistinctCategories; for (std::size_t i = 0u; i < nTrials; ++i) { TDoubleVec samples; - rng.generateMultinomialSamples(TDoubleVec(boost::begin(categories), boost::end(categories)), - TDoubleVec(boost::begin(probabilities), boost::end(probabilities)), - boost::size(probabilities), - samples); + rng.generateMultinomialSamples( + TDoubleVec(boost::begin(categories), boost::end(categories)), + TDoubleVec(boost::begin(probabilities), boost::end(probabilities)), + boost::size(probabilities), samples); std::sort(samples.begin(), samples.end()); - samples.erase(std::unique(samples.begin(), samples.end()), samples.end()); + samples.erase(std::unique(samples.begin(), samples.end()), + samples.end()); expectedDistinctCategories.add(static_cast(samples.size())); } LOG_DEBUG(<< "probabilities = " << core::CContainerPrinter::print(probabilities)); - LOG_DEBUG(<< "expectedDistinctCategories = " << maths::CBasicStatistics::mean(expectedDistinctCategories) << " (deviation = " - << std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / static_cast(nTrials)) << ")"); + LOG_DEBUG(<< "expectedDistinctCategories = " + << maths::CBasicStatistics::mean(expectedDistinctCategories) << " (deviation = " + << std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / + static_cast(nTrials)) + << ")"); double distinctCategories; - maths::CCategoricalTools::expectedDistinctCategories(TDoubleVec(boost::begin(probabilities), boost::end(probabilities)), - static_cast(boost::size(probabilities)), - distinctCategories); + maths::CCategoricalTools::expectedDistinctCategories( + TDoubleVec(boost::begin(probabilities), boost::end(probabilities)), + static_cast(boost::size(probabilities)), distinctCategories); LOG_DEBUG(<< "distinctCategories = " << distinctCategories); CPPUNIT_ASSERT_DOUBLES_EQUAL( - maths::CBasicStatistics::mean(expectedDistinctCategories), - distinctCategories, - 2.0 * std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / static_cast(nTrials))); + maths::CBasicStatistics::mean(expectedDistinctCategories), distinctCategories, + 2.0 * std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / + static_cast(nTrials))); } } { - double categories[] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0}; + double categories[] = {1.0, 2.0, 3.0, 4.0, 5.0, + 6.0, 7.0, 8.0, 9.0, 10.0}; { - double probabilities[] = {0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1}; + double probabilities[] = {0.1, 0.1, 0.1, 0.1, 0.1, + 0.1, 0.1, 0.1, 0.1, 0.1}; TMeanVarAccumulator expectedDistinctCategories; for (std::size_t i = 0u; i < nTrials; ++i) { TDoubleVec samples; - rng.generateMultinomialSamples(TDoubleVec(boost::begin(categories), boost::end(categories)), - TDoubleVec(boost::begin(probabilities), boost::end(probabilities)), - boost::size(probabilities), - samples); + rng.generateMultinomialSamples( + TDoubleVec(boost::begin(categories), boost::end(categories)), + TDoubleVec(boost::begin(probabilities), boost::end(probabilities)), + boost::size(probabilities), samples); std::sort(samples.begin(), samples.end()); - samples.erase(std::unique(samples.begin(), samples.end()), samples.end()); + samples.erase(std::unique(samples.begin(), samples.end()), + samples.end()); expectedDistinctCategories.add(static_cast(samples.size())); } LOG_DEBUG(<< "probabilities = " << core::CContainerPrinter::print(probabilities)); - LOG_DEBUG(<< "expectedDistinctCategories = " << maths::CBasicStatistics::mean(expectedDistinctCategories) << " (deviation = " - << std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / static_cast(nTrials)) << ")"); + LOG_DEBUG(<< "expectedDistinctCategories = " + << maths::CBasicStatistics::mean(expectedDistinctCategories) << " (deviation = " + << std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / + static_cast(nTrials)) + << ")"); double distinctCategories; - maths::CCategoricalTools::expectedDistinctCategories(TDoubleVec(boost::begin(probabilities), boost::end(probabilities)), - static_cast(boost::size(probabilities)), - distinctCategories); + maths::CCategoricalTools::expectedDistinctCategories( + TDoubleVec(boost::begin(probabilities), boost::end(probabilities)), + static_cast(boost::size(probabilities)), distinctCategories); LOG_DEBUG(<< "distinctCategories = " << distinctCategories); CPPUNIT_ASSERT_DOUBLES_EQUAL( - maths::CBasicStatistics::mean(expectedDistinctCategories), - distinctCategories, - 2.0 * std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / static_cast(nTrials))); + maths::CBasicStatistics::mean(expectedDistinctCategories), distinctCategories, + 2.0 * std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / + static_cast(nTrials))); } { - double probabilities[] = {0.05, 0.3, 0.4, 0.02, 0.03, 0.05, 0.05, 0.01, 0.02, 0.07}; + double probabilities[] = {0.05, 0.3, 0.4, 0.02, 0.03, + 0.05, 0.05, 0.01, 0.02, 0.07}; TMeanVarAccumulator expectedDistinctCategories; for (std::size_t i = 0u; i < nTrials; ++i) { TDoubleVec samples; - rng.generateMultinomialSamples(TDoubleVec(boost::begin(categories), boost::end(categories)), - TDoubleVec(boost::begin(probabilities), boost::end(probabilities)), - boost::size(probabilities), - samples); + rng.generateMultinomialSamples( + TDoubleVec(boost::begin(categories), boost::end(categories)), + TDoubleVec(boost::begin(probabilities), boost::end(probabilities)), + boost::size(probabilities), samples); std::sort(samples.begin(), samples.end()); - samples.erase(std::unique(samples.begin(), samples.end()), samples.end()); + samples.erase(std::unique(samples.begin(), samples.end()), + samples.end()); expectedDistinctCategories.add(static_cast(samples.size())); } LOG_DEBUG(<< "probabilities = " << core::CContainerPrinter::print(probabilities)); - LOG_DEBUG(<< "expectedDistinctCategories = " << maths::CBasicStatistics::mean(expectedDistinctCategories) << " (deviation = " - << std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / static_cast(nTrials)) << ")"); + LOG_DEBUG(<< "expectedDistinctCategories = " + << maths::CBasicStatistics::mean(expectedDistinctCategories) << " (deviation = " + << std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / + static_cast(nTrials)) + << ")"); double distinctCategories; - maths::CCategoricalTools::expectedDistinctCategories(TDoubleVec(boost::begin(probabilities), boost::end(probabilities)), - static_cast(boost::size(probabilities)), - distinctCategories); + maths::CCategoricalTools::expectedDistinctCategories( + TDoubleVec(boost::begin(probabilities), boost::end(probabilities)), + static_cast(boost::size(probabilities)), distinctCategories); LOG_DEBUG(<< "distinctCategories = " << distinctCategories); CPPUNIT_ASSERT_DOUBLES_EQUAL( - maths::CBasicStatistics::mean(expectedDistinctCategories), - distinctCategories, - 2.0 * std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / static_cast(nTrials))); + maths::CBasicStatistics::mean(expectedDistinctCategories), distinctCategories, + 2.0 * std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / + static_cast(nTrials))); } { - double probabilities[] = {0.05, 0.1, 0.15, 0.15, 0.05, 0.05, 0.1, 0.15, 0.15, 0.05}; + double probabilities[] = {0.05, 0.1, 0.15, 0.15, 0.05, + 0.05, 0.1, 0.15, 0.15, 0.05}; TMeanVarAccumulator expectedDistinctCategories; for (std::size_t i = 0u; i < nTrials; ++i) { TDoubleVec samples; - rng.generateMultinomialSamples(TDoubleVec(boost::begin(categories), boost::end(categories)), - TDoubleVec(boost::begin(probabilities), boost::end(probabilities)), - boost::size(probabilities), - samples); + rng.generateMultinomialSamples( + TDoubleVec(boost::begin(categories), boost::end(categories)), + TDoubleVec(boost::begin(probabilities), boost::end(probabilities)), + boost::size(probabilities), samples); std::sort(samples.begin(), samples.end()); - samples.erase(std::unique(samples.begin(), samples.end()), samples.end()); + samples.erase(std::unique(samples.begin(), samples.end()), + samples.end()); expectedDistinctCategories.add(static_cast(samples.size())); } LOG_DEBUG(<< "probabilities = " << core::CContainerPrinter::print(probabilities)); - LOG_DEBUG(<< "expectedDistinctCategories = " << maths::CBasicStatistics::mean(expectedDistinctCategories) << " (deviation = " - << std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / static_cast(nTrials)) << ")"); + LOG_DEBUG(<< "expectedDistinctCategories = " + << maths::CBasicStatistics::mean(expectedDistinctCategories) << " (deviation = " + << std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / + static_cast(nTrials)) + << ")"); double distinctCategories; - maths::CCategoricalTools::expectedDistinctCategories(TDoubleVec(boost::begin(probabilities), boost::end(probabilities)), - static_cast(boost::size(probabilities)), - distinctCategories); + maths::CCategoricalTools::expectedDistinctCategories( + TDoubleVec(boost::begin(probabilities), boost::end(probabilities)), + static_cast(boost::size(probabilities)), distinctCategories); LOG_DEBUG(<< "distinctCategories = " << distinctCategories); CPPUNIT_ASSERT_DOUBLES_EQUAL( - maths::CBasicStatistics::mean(expectedDistinctCategories), - distinctCategories, - 2.0 * std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / static_cast(nTrials))); + maths::CBasicStatistics::mean(expectedDistinctCategories), distinctCategories, + 2.0 * std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / + static_cast(nTrials))); } } { @@ -246,14 +274,18 @@ void CCategoricalToolsTest::testExpectedDistinctCategories() { TMeanVarAccumulator expectedDistinctCategories; for (std::size_t j = 0u; j < nTrials; ++j) { TDoubleVec samples; - rng.generateMultinomialSamples(categories, probabilities[i], categories.size(), samples); + rng.generateMultinomialSamples(categories, probabilities[i], + categories.size(), samples); std::sort(samples.begin(), samples.end()); - samples.erase(std::unique(samples.begin(), samples.end()), samples.end()); + samples.erase(std::unique(samples.begin(), samples.end()), + samples.end()); expectedDistinctCategories.add(static_cast(samples.size())); } - LOG_DEBUG( - << "expectedDistinctCategories = " << maths::CBasicStatistics::mean(expectedDistinctCategories) << " (deviation = " - << std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / static_cast(nTrials)) << ")"); + LOG_DEBUG(<< "expectedDistinctCategories = " + << maths::CBasicStatistics::mean(expectedDistinctCategories) << " (deviation = " + << std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / + static_cast(nTrials)) + << ")"); double distinctCategories; maths::CCategoricalTools::expectedDistinctCategories( @@ -261,9 +293,9 @@ void CCategoricalToolsTest::testExpectedDistinctCategories() { LOG_DEBUG(<< "distinctCategories = " << distinctCategories); CPPUNIT_ASSERT_DOUBLES_EQUAL( - maths::CBasicStatistics::mean(expectedDistinctCategories), - distinctCategories, - 3.0 * std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / static_cast(nTrials))); + maths::CBasicStatistics::mean(expectedDistinctCategories), distinctCategories, + 3.0 * std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / + static_cast(nTrials))); } } { @@ -279,14 +311,18 @@ void CCategoricalToolsTest::testExpectedDistinctCategories() { TMeanVarAccumulator expectedDistinctCategories; for (std::size_t j = 0u; j < nTrials; ++j) { TDoubleVec samples; - rng.generateMultinomialSamples(categories, probabilities[i], categories.size(), samples); + rng.generateMultinomialSamples(categories, probabilities[i], + categories.size(), samples); std::sort(samples.begin(), samples.end()); - samples.erase(std::unique(samples.begin(), samples.end()), samples.end()); + samples.erase(std::unique(samples.begin(), samples.end()), + samples.end()); expectedDistinctCategories.add(static_cast(samples.size())); } - LOG_DEBUG( - << "expectedDistinctCategories = " << maths::CBasicStatistics::mean(expectedDistinctCategories) << " (deviation = " - << std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / static_cast(nTrials)) << ")"); + LOG_DEBUG(<< "expectedDistinctCategories = " + << maths::CBasicStatistics::mean(expectedDistinctCategories) << " (deviation = " + << std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / + static_cast(nTrials)) + << ")"); double distinctCategories; maths::CCategoricalTools::expectedDistinctCategories( @@ -294,9 +330,9 @@ void CCategoricalToolsTest::testExpectedDistinctCategories() { LOG_DEBUG(<< "distinctCategories = " << distinctCategories); CPPUNIT_ASSERT_DOUBLES_EQUAL( - maths::CBasicStatistics::mean(expectedDistinctCategories), - distinctCategories, - 3.0 * std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / static_cast(nTrials))); + maths::CBasicStatistics::mean(expectedDistinctCategories), distinctCategories, + 3.0 * std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / + static_cast(nTrials))); } } { @@ -316,14 +352,18 @@ void CCategoricalToolsTest::testExpectedDistinctCategories() { TMeanVarAccumulator expectedDistinctCategories; for (std::size_t j = 0u; j < nTrials; ++j) { TDoubleVec samples; - rng.generateMultinomialSamples(categories, probabilities[i], categories.size(), samples); + rng.generateMultinomialSamples(categories, probabilities[i], + categories.size(), samples); std::sort(samples.begin(), samples.end()); - samples.erase(std::unique(samples.begin(), samples.end()), samples.end()); + samples.erase(std::unique(samples.begin(), samples.end()), + samples.end()); expectedDistinctCategories.add(static_cast(samples.size())); } - LOG_DEBUG( - << "expectedDistinctCategories = " << maths::CBasicStatistics::mean(expectedDistinctCategories) << " (deviation = " - << std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / static_cast(nTrials)) << ")"); + LOG_DEBUG(<< "expectedDistinctCategories = " + << maths::CBasicStatistics::mean(expectedDistinctCategories) << " (deviation = " + << std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / + static_cast(nTrials)) + << ")"); double distinctCategories; maths::CCategoricalTools::expectedDistinctCategories( @@ -331,9 +371,9 @@ void CCategoricalToolsTest::testExpectedDistinctCategories() { LOG_DEBUG(<< "distinctCategories = " << distinctCategories); CPPUNIT_ASSERT_DOUBLES_EQUAL( - maths::CBasicStatistics::mean(expectedDistinctCategories), - distinctCategories, - 2.5 * std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / static_cast(nTrials))); + maths::CBasicStatistics::mean(expectedDistinctCategories), distinctCategories, + 2.5 * std::sqrt(maths::CBasicStatistics::variance(expectedDistinctCategories) / + static_cast(nTrials))); } } } @@ -362,8 +402,10 @@ void CCategoricalToolsTest::testLogBinomialProbability() { double logpdf; CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, maths::CCategoricalTools::logBinomialProbability( - static_cast(n[i]), p[j], static_cast(m), logpdf)); - LOG_DEBUG(<< "f(" << m << "), expected = " << pdf << ", actual = " << std::exp(logpdf)); + static_cast(n[i]), p[j], + static_cast(m), logpdf)); + LOG_DEBUG(<< "f(" << m << "), expected = " << pdf + << ", actual = " << std::exp(logpdf)); CPPUNIT_ASSERT_DOUBLES_EQUAL(pdf, std::exp(logpdf), 1e-6 * pdf); } for (std::size_t f = 1u; f < 10; ++f) { @@ -373,8 +415,10 @@ void CCategoricalToolsTest::testLogBinomialProbability() { double logpdf; CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, maths::CCategoricalTools::logBinomialProbability( - static_cast(n[i]), p[j], static_cast(m), logpdf)); - LOG_DEBUG(<< "f(" << m << "), expected = " << pdf << ", actual = " << std::exp(logpdf)); + static_cast(n[i]), p[j], + static_cast(m), logpdf)); + LOG_DEBUG(<< "f(" << m << "), expected = " << pdf + << ", actual = " << std::exp(logpdf)); CPPUNIT_ASSERT_DOUBLES_EQUAL(pdf, std::exp(logpdf), 1e-6 * pdf); } } @@ -413,8 +457,11 @@ void CCategoricalToolsTest::testLogMultinomialProbability() { TSizeVec ni; ni.push_back(static_cast(m)); ni.push_back(static_cast(n[i] - m)); - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, maths::CCategoricalTools::logMultinomialProbability(pi, ni, logpdf)); - LOG_DEBUG(<< "f(" << m << "), expected = " << pdf << ", actual = " << std::exp(logpdf)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, + maths::CCategoricalTools::logMultinomialProbability( + pi, ni, logpdf)); + LOG_DEBUG(<< "f(" << m << "), expected = " << pdf + << ", actual = " << std::exp(logpdf)); CPPUNIT_ASSERT_DOUBLES_EQUAL(pdf, std::exp(logpdf), 1e-6 * pdf); } for (std::size_t f = 1u; f < 10; ++f) { @@ -428,8 +475,11 @@ void CCategoricalToolsTest::testLogMultinomialProbability() { TSizeVec ni; ni.push_back(static_cast(m)); ni.push_back(static_cast(n[i] - m)); - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, maths::CCategoricalTools::logMultinomialProbability(pi, ni, logpdf)); - LOG_DEBUG(<< "f(" << m << "), expected = " << pdf << ", actual = " << std::exp(logpdf)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, + maths::CCategoricalTools::logMultinomialProbability( + pi, ni, logpdf)); + LOG_DEBUG(<< "f(" << m << "), expected = " << pdf + << ", actual = " << std::exp(logpdf)); CPPUNIT_ASSERT_DOUBLES_EQUAL(pdf, std::exp(logpdf), 1e-6 * pdf); } } @@ -453,7 +503,9 @@ void CCategoricalToolsTest::testLogMultinomialProbability() { ni.push_back(m); ni.push_back(i); ni.push_back(n - m - i); - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, maths::CCategoricalTools::logMultinomialProbability(pi, ni, logpdf)); + CPPUNIT_ASSERT_EQUAL( + maths_t::E_FpNoErrors, + maths::CCategoricalTools::logMultinomialProbability(pi, ni, logpdf)); marginal += std::exp(logpdf); } @@ -468,17 +520,21 @@ void CCategoricalToolsTest::testLogMultinomialProbability() { CppUnit::Test* CCategoricalToolsTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CCategoricalToolsTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CCategoricalToolsTest::testProbabilityOfLessLikelyMultinomialSample", - &CCategoricalToolsTest::testProbabilityOfLessLikelyMultinomialSample)); - suiteOfTests->addTest(new CppUnit::TestCaller("CCategoricalToolsTest::testProbabilityOfLessLikelyCategoryCount", - &CCategoricalToolsTest::testProbabilityOfLessLikelyCategoryCount)); - suiteOfTests->addTest(new CppUnit::TestCaller("CCategoricalToolsTest::testExpectedDistinctCategories", - &CCategoricalToolsTest::testExpectedDistinctCategories)); - suiteOfTests->addTest(new CppUnit::TestCaller("CCategoricalToolsTest::testLogBinomialProbability", - &CCategoricalToolsTest::testLogBinomialProbability)); - suiteOfTests->addTest(new CppUnit::TestCaller("CCategoricalToolsTest::testLogMultinomialProbability", - &CCategoricalToolsTest::testLogMultinomialProbability)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCategoricalToolsTest::testProbabilityOfLessLikelyMultinomialSample", + &CCategoricalToolsTest::testProbabilityOfLessLikelyMultinomialSample)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCategoricalToolsTest::testProbabilityOfLessLikelyCategoryCount", + &CCategoricalToolsTest::testProbabilityOfLessLikelyCategoryCount)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCategoricalToolsTest::testExpectedDistinctCategories", + &CCategoricalToolsTest::testExpectedDistinctCategories)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCategoricalToolsTest::testLogBinomialProbability", + &CCategoricalToolsTest::testLogBinomialProbability)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCategoricalToolsTest::testLogMultinomialProbability", + &CCategoricalToolsTest::testLogMultinomialProbability)); return suiteOfTests; } diff --git a/lib/maths/unittest/CChecksumTest.cc b/lib/maths/unittest/CChecksumTest.cc index 28fda1bafc..f2427b873c 100644 --- a/lib/maths/unittest/CChecksumTest.cc +++ b/lib/maths/unittest/CChecksumTest.cc @@ -41,7 +41,9 @@ struct SFoo { struct SBar { SBar(uint64_t key) : s_Key(key) {} - uint64_t checksum(uint64_t seed) const { return core::CHashing::hashCombine(seed, s_Key); } + uint64_t checksum(uint64_t seed) const { + return core::CHashing::hashCombine(seed, s_Key); + } uint64_t s_Key; }; @@ -51,7 +53,8 @@ using TStrSet = std::set; using TStrSetCItr = TStrSet::const_iterator; using TOptionalDouble = boost::optional; using TOptionalDoubleVec = std::vector; -using TMeanVarAccumulator = maths::CBasicStatistics::SSampleMeanVar::TAccumulator; +using TMeanVarAccumulator = + maths::CBasicStatistics::SSampleMeanVar::TAccumulator; using TMeanVarAccumulatorPtr = boost::shared_ptr; using TDoubleMeanVarAccumulatorPr = std::pair; using TDoubleMeanVarAccumulatorPrList = std::list; @@ -72,7 +75,8 @@ void CChecksumTest::testMemberChecksum() { // Test that member functions are invoked. SFoo foo(100); LOG_DEBUG(<< "checksum foo = " << maths::CChecksum::calculate(seed, foo)); - CPPUNIT_ASSERT_EQUAL(maths::CChecksum::calculate(seed, foo), core::CHashing::hashCombine(seed, foo.checksum())); + CPPUNIT_ASSERT_EQUAL(maths::CChecksum::calculate(seed, foo), + core::CHashing::hashCombine(seed, foo.checksum())); SBar bar(200); LOG_DEBUG(<< "checksum bar = " << maths::CChecksum::calculate(seed, bar)); CPPUNIT_ASSERT_EQUAL(maths::CChecksum::calculate(seed, bar), bar.checksum(seed)); @@ -98,21 +102,25 @@ void CChecksumTest::testContainers() { TIntVec b(boost::begin(values), boost::end(values)); LOG_DEBUG(<< "checksum a = " << maths::CChecksum::calculate(seed, a)); LOG_DEBUG(<< "checksum b = " << maths::CChecksum::calculate(seed, b)); - CPPUNIT_ASSERT_EQUAL(maths::CChecksum::calculate(seed, a), maths::CChecksum::calculate(seed, b)); + CPPUNIT_ASSERT_EQUAL(maths::CChecksum::calculate(seed, a), + maths::CChecksum::calculate(seed, b)); b[2] = 3; LOG_DEBUG(<< "checksum a = " << maths::CChecksum::calculate(seed, a)); LOG_DEBUG(<< "checksum b = " << maths::CChecksum::calculate(seed, b)); - CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) != maths::CChecksum::calculate(seed, b)); + CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) != + maths::CChecksum::calculate(seed, b)); b.assign(boost::begin(values), boost::end(values)); rng.random_shuffle(b.begin(), b.end()); LOG_DEBUG(<< "checksum a = " << maths::CChecksum::calculate(seed, a)); LOG_DEBUG(<< "checksum b = " << maths::CChecksum::calculate(seed, b)); - CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) != maths::CChecksum::calculate(seed, b)); + CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) != + maths::CChecksum::calculate(seed, b)); b.assign(boost::begin(values), boost::end(values)); b[b.size() - 1] = 3; LOG_DEBUG(<< "checksum a = " << maths::CChecksum::calculate(seed, a)); LOG_DEBUG(<< "checksum b = " << maths::CChecksum::calculate(seed, b)); - CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) != maths::CChecksum::calculate(seed, b)); + CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) != + maths::CChecksum::calculate(seed, b)); } { TSizeAnEnumMap::value_type values[] = {TSizeAnEnumMap::value_type(-1, E_2), @@ -125,18 +133,21 @@ void CChecksumTest::testContainers() { TSizeAnEnumMap b(boost::begin(values), boost::end(values)); LOG_DEBUG(<< "checksum a = " << maths::CChecksum::calculate(seed, a)); LOG_DEBUG(<< "checksum b = " << maths::CChecksum::calculate(seed, b)); - CPPUNIT_ASSERT_EQUAL(maths::CChecksum::calculate(seed, a), maths::CChecksum::calculate(seed, b)); + CPPUNIT_ASSERT_EQUAL(maths::CChecksum::calculate(seed, a), + maths::CChecksum::calculate(seed, b)); b[2] = E_1; LOG_DEBUG(<< "checksum a = " << maths::CChecksum::calculate(seed, a)); LOG_DEBUG(<< "checksum b = " << maths::CChecksum::calculate(seed, b)); - CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) != maths::CChecksum::calculate(seed, b)); + CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) != + maths::CChecksum::calculate(seed, b)); b.clear(); std::copy(boost::begin(values), boost::end(values), std::inserter(b, b.end())); b.erase(2); b[4] = E_2; LOG_DEBUG(<< "checksum a = " << maths::CChecksum::calculate(seed, a)); LOG_DEBUG(<< "checksum b = " << maths::CChecksum::calculate(seed, b)); - CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) != maths::CChecksum::calculate(seed, b)); + CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) != + maths::CChecksum::calculate(seed, b)); } { std::string values[] = {"rain", "in", "spain"}; @@ -144,7 +155,8 @@ void CChecksumTest::testContainers() { uint64_t expected = seed; core::CHashing::CSafeMurmurHash2String64 hasher; for (TStrSetCItr itr = a.begin(); itr != a.end(); ++itr) { - expected = core::CHashing::safeMurmurHash64(itr->data(), static_cast(itr->size()), expected); + expected = core::CHashing::safeMurmurHash64( + itr->data(), static_cast(itr->size()), expected); } LOG_DEBUG(<< "checksum expected = " << expected); LOG_DEBUG(<< "checksum actual = " << maths::CChecksum::calculate(seed, a)); @@ -164,7 +176,8 @@ void CChecksumTest::testContainers() { LOG_DEBUG(<< "checksum a = " << maths::CChecksum::calculate(seed, a)); LOG_DEBUG(<< "checksum b = " << maths::CChecksum::calculate(seed, b)); - CPPUNIT_ASSERT_EQUAL(maths::CChecksum::calculate(seed, a), maths::CChecksum::calculate(seed, b)); + CPPUNIT_ASSERT_EQUAL(maths::CChecksum::calculate(seed, a), + maths::CChecksum::calculate(seed, b)); } { boost::unordered_map a; @@ -176,7 +189,8 @@ void CChecksumTest::testContainers() { LOG_DEBUG(<< "checksum a = " << maths::CChecksum::calculate(seed, a)); LOG_DEBUG(<< "checksum b = " << maths::CChecksum::calculate(seed, b)); - CPPUNIT_ASSERT_EQUAL(maths::CChecksum::calculate(seed, a), maths::CChecksum::calculate(seed, b)); + CPPUNIT_ASSERT_EQUAL(maths::CChecksum::calculate(seed, a), + maths::CChecksum::calculate(seed, b)); } } @@ -200,7 +214,8 @@ void CChecksumTest::testNullable() { TOptionalDouble optional(value); LOG_DEBUG(<< "checksum expected = " << maths::CChecksum::calculate(seed, value)); LOG_DEBUG(<< "checksum actual = " << maths::CChecksum::calculate(seed, optional)); - CPPUNIT_ASSERT_EQUAL(maths::CChecksum::calculate(seed, value), maths::CChecksum::calculate(seed, optional)); + CPPUNIT_ASSERT_EQUAL(maths::CChecksum::calculate(seed, value), + maths::CChecksum::calculate(seed, optional)); } { TMeanVarAccumulator value; @@ -211,7 +226,8 @@ void CChecksumTest::testNullable() { TMeanVarAccumulatorPtr pointer(new TMeanVarAccumulator(value)); LOG_DEBUG(<< "checksum expected = " << maths::CChecksum::calculate(seed, value)); LOG_DEBUG(<< "checksum actual = " << maths::CChecksum::calculate(seed, pointer)); - CPPUNIT_ASSERT_EQUAL(maths::CChecksum::calculate(seed, value), maths::CChecksum::calculate(seed, pointer)); + CPPUNIT_ASSERT_EQUAL(maths::CChecksum::calculate(seed, value), + maths::CChecksum::calculate(seed, pointer)); } } @@ -228,9 +244,11 @@ void CChecksumTest::testAccumulators() { value.add(234.0); value.add(378.0); value.add(653.0); - LOG_DEBUG(<< "checksum expected = " << core::CHashing::hashCombine(seed, value.checksum())); + LOG_DEBUG(<< "checksum expected = " + << core::CHashing::hashCombine(seed, value.checksum())); LOG_DEBUG(<< "checksum actual = " << maths::CChecksum::calculate(seed, value)); - CPPUNIT_ASSERT_EQUAL(core::CHashing::hashCombine(seed, value.checksum()), maths::CChecksum::calculate(seed, value)); + CPPUNIT_ASSERT_EQUAL(core::CHashing::hashCombine(seed, value.checksum()), + maths::CChecksum::calculate(seed, value)); } } @@ -253,12 +271,14 @@ void CChecksumTest::testPair() { b.first = 4790.0; LOG_DEBUG(<< "checksum a = " << maths::CChecksum::calculate(seed, a)); LOG_DEBUG(<< "checksum b = " << maths::CChecksum::calculate(seed, b)); - CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) != maths::CChecksum::calculate(seed, b)); + CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) != + maths::CChecksum::calculate(seed, b)); b = a; b.second.add(678629.0); LOG_DEBUG(<< "checksum a = " << maths::CChecksum::calculate(seed, a)); LOG_DEBUG(<< "checksum b = " << maths::CChecksum::calculate(seed, b)); - CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) != maths::CChecksum::calculate(seed, b)); + CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) != + maths::CChecksum::calculate(seed, b)); TDoubleMeanVarAccumulatorPrList collection; collection.push_back(a); @@ -283,12 +303,14 @@ void CChecksumTest::testArray() { LOG_DEBUG(<< "checksum a = " << maths::CChecksum::calculate(seed, a)); LOG_DEBUG(<< "checksum b = " << maths::CChecksum::calculate(seed, b)); - CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) == maths::CChecksum::calculate(seed, b)); + CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) == + maths::CChecksum::calculate(seed, b)); b[1] = 23.79; LOG_DEBUG(<< "checksum a = " << maths::CChecksum::calculate(seed, a)); LOG_DEBUG(<< "checksum b = " << maths::CChecksum::calculate(seed, b)); - CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) != maths::CChecksum::calculate(seed, b)); + CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) != + maths::CChecksum::calculate(seed, b)); } void CChecksumTest::testCombinations() { @@ -309,61 +331,78 @@ void CChecksumTest::testCombinations() { // slightly, i.e. by changing an element value, permuting elements, // etc. { - SFoo values[] = {SFoo(static_cast(-1)), SFoo(20), SFoo(10), SFoo(15), SFoo(2), SFoo(2)}; + SFoo values[] = { + SFoo(static_cast(-1)), SFoo(20), SFoo(10), SFoo(15), SFoo(2), SFoo(2)}; TFooDeque a(boost::begin(values), boost::end(values)); TFooDeque b(boost::begin(values), boost::end(values)); LOG_DEBUG(<< "checksum a = " << maths::CChecksum::calculate(seed, a)); LOG_DEBUG(<< "checksum b = " << maths::CChecksum::calculate(seed, b)); - CPPUNIT_ASSERT_EQUAL(maths::CChecksum::calculate(seed, a), maths::CChecksum::calculate(seed, b)); + CPPUNIT_ASSERT_EQUAL(maths::CChecksum::calculate(seed, a), + maths::CChecksum::calculate(seed, b)); b[2] = SFoo(3); LOG_DEBUG(<< "checksum a = " << maths::CChecksum::calculate(seed, a)); LOG_DEBUG(<< "checksum b = " << maths::CChecksum::calculate(seed, b)); - CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) != maths::CChecksum::calculate(seed, b)); + CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) != + maths::CChecksum::calculate(seed, b)); b.assign(boost::begin(values), boost::end(values)); rng.random_shuffle(b.begin(), b.end()); LOG_DEBUG(<< "checksum a = " << maths::CChecksum::calculate(seed, a)); LOG_DEBUG(<< "checksum b = " << maths::CChecksum::calculate(seed, b)); - CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) != maths::CChecksum::calculate(seed, b)); + CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) != + maths::CChecksum::calculate(seed, b)); b.assign(boost::begin(values), boost::end(values)); b[b.size() - 1] = 3; LOG_DEBUG(<< "checksum a = " << maths::CChecksum::calculate(seed, a)); LOG_DEBUG(<< "checksum b = " << maths::CChecksum::calculate(seed, b)); - CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) != maths::CChecksum::calculate(seed, b)); + CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) != + maths::CChecksum::calculate(seed, b)); } { - SBar values[] = {SBar(static_cast(-1)), SBar(20), SBar(10), SBar(15), SBar(2), SBar(2)}; + SBar values[] = { + SBar(static_cast(-1)), SBar(20), SBar(10), SBar(15), SBar(2), SBar(2)}; TBarVec a(boost::begin(values), boost::end(values)); TBarVec b(boost::begin(values), boost::end(values)); LOG_DEBUG(<< "checksum a = " << maths::CChecksum::calculate(seed, a)); LOG_DEBUG(<< "checksum b = " << maths::CChecksum::calculate(seed, b)); - CPPUNIT_ASSERT_EQUAL(maths::CChecksum::calculate(seed, a), maths::CChecksum::calculate(seed, b)); + CPPUNIT_ASSERT_EQUAL(maths::CChecksum::calculate(seed, a), + maths::CChecksum::calculate(seed, b)); b[2] = SBar(3); LOG_DEBUG(<< "checksum a = " << maths::CChecksum::calculate(seed, a)); LOG_DEBUG(<< "checksum b = " << maths::CChecksum::calculate(seed, b)); - CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) != maths::CChecksum::calculate(seed, b)); + CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) != + maths::CChecksum::calculate(seed, b)); b.assign(boost::begin(values), boost::end(values)); rng.random_shuffle(b.begin(), b.end()); LOG_DEBUG(<< "checksum a = " << maths::CChecksum::calculate(seed, a)); LOG_DEBUG(<< "checksum b = " << maths::CChecksum::calculate(seed, b)); - CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) != maths::CChecksum::calculate(seed, b)); + CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) != + maths::CChecksum::calculate(seed, b)); b.assign(boost::begin(values), boost::end(values)); b[b.size() - 1] = 3; LOG_DEBUG(<< "checksum a = " << maths::CChecksum::calculate(seed, a)); LOG_DEBUG(<< "checksum b = " << maths::CChecksum::calculate(seed, b)); - CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) != maths::CChecksum::calculate(seed, b)); + CPPUNIT_ASSERT(maths::CChecksum::calculate(seed, a) != + maths::CChecksum::calculate(seed, b)); } } CppUnit::Test* CChecksumTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CChecksumTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CChecksumTest::testMemberChecksum", &CChecksumTest::testMemberChecksum)); - suiteOfTests->addTest(new CppUnit::TestCaller("CChecksumTest::testContainers", &CChecksumTest::testContainers)); - suiteOfTests->addTest(new CppUnit::TestCaller("CChecksumTest::testNullable", &CChecksumTest::testNullable)); - suiteOfTests->addTest(new CppUnit::TestCaller("CChecksumTest::testAccumulators", &CChecksumTest::testAccumulators)); - suiteOfTests->addTest(new CppUnit::TestCaller("CChecksumTest::testPair", &CChecksumTest::testPair)); - suiteOfTests->addTest(new CppUnit::TestCaller("CChecksumTest::testArray", &CChecksumTest::testArray)); - suiteOfTests->addTest(new CppUnit::TestCaller("CChecksumTest::testCombinations", &CChecksumTest::testCombinations)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CChecksumTest::testMemberChecksum", &CChecksumTest::testMemberChecksum)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CChecksumTest::testContainers", &CChecksumTest::testContainers)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CChecksumTest::testNullable", &CChecksumTest::testNullable)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CChecksumTest::testAccumulators", &CChecksumTest::testAccumulators)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CChecksumTest::testPair", &CChecksumTest::testPair)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CChecksumTest::testArray", &CChecksumTest::testArray)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CChecksumTest::testCombinations", &CChecksumTest::testCombinations)); return suiteOfTests; } diff --git a/lib/maths/unittest/CClustererTest.cc b/lib/maths/unittest/CClustererTest.cc index eed425016c..15594374cd 100644 --- a/lib/maths/unittest/CClustererTest.cc +++ b/lib/maths/unittest/CClustererTest.cc @@ -77,8 +77,8 @@ void CClustererTest::testIndexGenerator() { CppUnit::Test* CClustererTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CClustererTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CClustererTest::testIndexGenerator", &CClustererTest::testIndexGenerator)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CClustererTest::testIndexGenerator", &CClustererTest::testIndexGenerator)); return suiteOfTests; } diff --git a/lib/maths/unittest/CCountMinSketchTest.cc b/lib/maths/unittest/CCountMinSketchTest.cc index c42f771981..f8c50575e2 100644 --- a/lib/maths/unittest/CCountMinSketchTest.cc +++ b/lib/maths/unittest/CCountMinSketchTest.cc @@ -55,7 +55,8 @@ void CCountMinSketchTest::testCounts() { double count = counts[i]; double estimated = sketch.count(static_cast(i)); if (i % 50 == 0) { - LOG_DEBUG(<< "category = " << i << ", true count = " << count << ", estimated count = " << estimated); + LOG_DEBUG(<< "category = " << i << ", true count = " << count + << ", estimated count = " << estimated); } meanError.add(std::fabs(estimated - count)); @@ -102,7 +103,8 @@ void CCountMinSketchTest::testCounts() { for (std::size_t i = 0u; i < heavyHitters.size(); ++i) { double count = heavyHitters[i]; double estimated = sketch.count(static_cast(i)); - LOG_DEBUG(<< "category = " << i << ", true count = " << count << ", estimated count = " << estimated); + LOG_DEBUG(<< "category = " << i << ", true count = " << count + << ", estimated count = " << estimated); double relativeError = std::fabs(estimated - count) / count; CPPUNIT_ASSERT(relativeError < 0.01); @@ -110,7 +112,8 @@ void CCountMinSketchTest::testCounts() { meanRelativeError.add(relativeError); } - LOG_DEBUG(<< "mean relative error " << maths::CBasicStatistics::mean(meanRelativeError)); + LOG_DEBUG(<< "mean relative error " + << maths::CBasicStatistics::mean(meanRelativeError)); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanRelativeError) < 0.005); } } @@ -208,7 +211,8 @@ void CCountMinSketchTest::testPersist() { core::CRapidXmlStateRestoreTraverser traverser(parser); maths::CCountMinSketch restoredSketch(traverser); - LOG_DEBUG(<< "orig checksum = " << origSketch.checksum() << ", new checksum = " << restoredSketch.checksum()); + LOG_DEBUG(<< "orig checksum = " << origSketch.checksum() + << ", new checksum = " << restoredSketch.checksum()); CPPUNIT_ASSERT_EQUAL(origSketch.checksum(), restoredSketch.checksum()); std::string newXml; @@ -241,7 +245,8 @@ void CCountMinSketchTest::testPersist() { core::CRapidXmlStateRestoreTraverser traverser(parser); maths::CCountMinSketch restoredSketch(traverser); - LOG_DEBUG(<< "orig checksum = " << origSketch.checksum() << ", new checksum = " << restoredSketch.checksum()); + LOG_DEBUG(<< "orig checksum = " << origSketch.checksum() + << ", new checksum = " << restoredSketch.checksum()); CPPUNIT_ASSERT_EQUAL(origSketch.checksum(), restoredSketch.checksum()); std::string newXml; @@ -256,11 +261,12 @@ void CCountMinSketchTest::testPersist() { CppUnit::Test* CCountMinSketchTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CCountMinSketchTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CCountMinSketchTest::testCounts", &CCountMinSketchTest::testCounts)); - suiteOfTests->addTest(new CppUnit::TestCaller("CCountMinSketchTest::testSwap", &CCountMinSketchTest::testSwap)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CCountMinSketchTest::testPersist", &CCountMinSketchTest::testPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCountMinSketchTest::testCounts", &CCountMinSketchTest::testCounts)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCountMinSketchTest::testSwap", &CCountMinSketchTest::testSwap)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCountMinSketchTest::testPersist", &CCountMinSketchTest::testPersist)); return suiteOfTests; } diff --git a/lib/maths/unittest/CDecayRateControllerTest.cc b/lib/maths/unittest/CDecayRateControllerTest.cc index 7530ff9148..42ad920c8a 100644 --- a/lib/maths/unittest/CDecayRateControllerTest.cc +++ b/lib/maths/unittest/CDecayRateControllerTest.cc @@ -90,7 +90,8 @@ void CDecayRateControllerTest::testPersist() { TDoubleVec errors; rng.generateUniformSamples(-2.0, 6.0, 1000, errors); - maths::CDecayRateController origController(maths::CDecayRateController::E_PredictionBias, 1); + maths::CDecayRateController origController( + maths::CDecayRateController::E_PredictionBias, 1); for (std::size_t i = 0u; i < values.size(); ++i) { origController.multiplier({values[i]}, {{errors[i]}}, 3600, 1.0, 0.0005); } @@ -110,10 +111,12 @@ void CDecayRateControllerTest::testPersist() { CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); maths::CDecayRateController restoredController; - CPPUNIT_ASSERT_EQUAL( - true, traverser.traverseSubLevel(boost::bind(&maths::CDecayRateController::acceptRestoreTraverser, &restoredController, _1))); + CPPUNIT_ASSERT_EQUAL(true, traverser.traverseSubLevel(boost::bind( + &maths::CDecayRateController::acceptRestoreTraverser, + &restoredController, _1))); - LOG_DEBUG(<< "orig checksum = " << origController.checksum() << ", new checksum = " << restoredController.checksum()); + LOG_DEBUG(<< "orig checksum = " << origController.checksum() + << ", new checksum = " << restoredController.checksum()); CPPUNIT_ASSERT_EQUAL(origController.checksum(), restoredController.checksum()); } } @@ -121,12 +124,13 @@ void CDecayRateControllerTest::testPersist() { CppUnit::Test* CDecayRateControllerTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CDecayRateControllerTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CDecayRateControllerTest::testLowCov", &CDecayRateControllerTest::testLowCov)); - suiteOfTests->addTest(new CppUnit::TestCaller("CDecayRateControllerTest::testOrderedErrors", - &CDecayRateControllerTest::testOrderedErrors)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CDecayRateControllerTest::testPersist", &CDecayRateControllerTest::testPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDecayRateControllerTest::testLowCov", &CDecayRateControllerTest::testLowCov)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDecayRateControllerTest::testOrderedErrors", + &CDecayRateControllerTest::testOrderedErrors)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDecayRateControllerTest::testPersist", &CDecayRateControllerTest::testPersist)); return suiteOfTests; } diff --git a/lib/maths/unittest/CEntropySketchTest.cc b/lib/maths/unittest/CEntropySketchTest.cc index beb0b7ac72..715df64daa 100644 --- a/lib/maths/unittest/CEntropySketchTest.cc +++ b/lib/maths/unittest/CEntropySketchTest.cc @@ -47,9 +47,10 @@ void CEntropySketchTest::testAll() { rng.generateUniformSamples(1, 10, numberCategories[t], counts); std::size_t Z = std::accumulate(counts.begin(), counts.end(), 0); - maths::CEntropySketch entropy[] = {maths::CEntropySketch(static_cast(K[0])), - maths::CEntropySketch(static_cast(K[1])), - maths::CEntropySketch(static_cast(K[2]))}; + maths::CEntropySketch entropy[] = { + maths::CEntropySketch(static_cast(K[0])), + maths::CEntropySketch(static_cast(K[1])), + maths::CEntropySketch(static_cast(K[2]))}; for (std::size_t i = 0u; i < 3; ++i) { TSizeDoubleUMap p; @@ -82,12 +83,14 @@ void CEntropySketchTest::testAll() { for (std::size_t i = 0u; i < 3; ++i) { LOG_DEBUG(<< "max error = " << maxError[i][0]); LOG_DEBUG(<< "mean error = " << maths::CBasicStatistics::mean(meanError[i])); - LOG_DEBUG(<< "large deviations = " << core::CContainerPrinter::print(epsDeviations[i])); + LOG_DEBUG(<< "large deviations = " + << core::CContainerPrinter::print(epsDeviations[i])); CPPUNIT_ASSERT(maxError[i][0] < maxMaxErrors[i]); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError[i]) < maxMeanErrors[i]); // Test additive approximation bounds. for (std::size_t j = 0u; j < 3; ++j) { - CPPUNIT_ASSERT(epsDeviations[i][j] / 1000.0 < 2.0 * std::exp(-K[i] * eps[j] * eps[j] / 6.0)); + CPPUNIT_ASSERT(epsDeviations[i][j] / 1000.0 < + 2.0 * std::exp(-K[i] * eps[j] * eps[j] / 6.0)); } } } @@ -95,7 +98,8 @@ void CEntropySketchTest::testAll() { CppUnit::Test* CEntropySketchTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CEntropySketchTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CEntropySketchTest::testAll", &CEntropySketchTest::testAll)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEntropySketchTest::testAll", &CEntropySketchTest::testAll)); return suiteOfTests; } diff --git a/lib/maths/unittest/CEqualWithToleranceTest.cc b/lib/maths/unittest/CEqualWithToleranceTest.cc index a8675edd03..109ef2246e 100644 --- a/lib/maths/unittest/CEqualWithToleranceTest.cc +++ b/lib/maths/unittest/CEqualWithToleranceTest.cc @@ -19,12 +19,16 @@ void CEqualWithToleranceTest::testScalar() { LOG_DEBUG(<< "+---------------------------------------+"); { - maths::CEqualWithTolerance abs(maths::CToleranceTypes::E_AbsoluteTolerance, 0.31); - maths::CEqualWithTolerance rel(maths::CToleranceTypes::E_RelativeTolerance, 0.01); + maths::CEqualWithTolerance abs( + maths::CToleranceTypes::E_AbsoluteTolerance, 0.31); + maths::CEqualWithTolerance rel( + maths::CToleranceTypes::E_RelativeTolerance, 0.01); maths::CEqualWithTolerance absAndRel( - maths::CToleranceTypes::E_AbsoluteTolerance & maths::CToleranceTypes::E_RelativeTolerance, 0.31, 0.01); + maths::CToleranceTypes::E_AbsoluteTolerance & maths::CToleranceTypes::E_RelativeTolerance, + 0.31, 0.01); maths::CEqualWithTolerance absOrRel( - maths::CToleranceTypes::E_AbsoluteTolerance | maths::CToleranceTypes::E_RelativeTolerance, 0.31, 0.01); + maths::CToleranceTypes::E_AbsoluteTolerance | maths::CToleranceTypes::E_RelativeTolerance, + 0.31, 0.01); { double a = 1.1; double b = 1.4; @@ -58,9 +62,11 @@ void CEqualWithToleranceTest::testScalar() { maths::CEqualWithTolerance abs(maths::CToleranceTypes::E_AbsoluteTolerance, 0.31f); maths::CEqualWithTolerance rel(maths::CToleranceTypes::E_RelativeTolerance, 0.01f); maths::CEqualWithTolerance absAndRel( - maths::CToleranceTypes::E_AbsoluteTolerance & maths::CToleranceTypes::E_RelativeTolerance, 0.31f, 0.01f); + maths::CToleranceTypes::E_AbsoluteTolerance & maths::CToleranceTypes::E_RelativeTolerance, + 0.31f, 0.01f); maths::CEqualWithTolerance absOrRel( - maths::CToleranceTypes::E_AbsoluteTolerance | maths::CToleranceTypes::E_RelativeTolerance, 0.31f, 0.01f); + maths::CToleranceTypes::E_AbsoluteTolerance | maths::CToleranceTypes::E_RelativeTolerance, + 0.31f, 0.01f); float a = 1.1f; float b = 1.4f; @@ -90,12 +96,16 @@ void CEqualWithToleranceTest::testVector() { maths::CVector epsAbs(2, 0.15 / std::sqrt(2.0)); maths::CVector epsRel(2, 0.0062 / std::sqrt(2.0)); - maths::CEqualWithTolerance> abs(maths::CToleranceTypes::E_AbsoluteTolerance, epsAbs); - maths::CEqualWithTolerance> rel(maths::CToleranceTypes::E_RelativeTolerance, epsRel); + maths::CEqualWithTolerance> abs( + maths::CToleranceTypes::E_AbsoluteTolerance, epsAbs); + maths::CEqualWithTolerance> rel( + maths::CToleranceTypes::E_RelativeTolerance, epsRel); maths::CEqualWithTolerance> absAndRel( - maths::CToleranceTypes::E_AbsoluteTolerance & maths::CToleranceTypes::E_RelativeTolerance, epsAbs, epsRel); + maths::CToleranceTypes::E_AbsoluteTolerance & maths::CToleranceTypes::E_RelativeTolerance, + epsAbs, epsRel); maths::CEqualWithTolerance> absOrRel( - maths::CToleranceTypes::E_AbsoluteTolerance | maths::CToleranceTypes::E_RelativeTolerance, epsAbs, epsRel); + maths::CToleranceTypes::E_AbsoluteTolerance | maths::CToleranceTypes::E_RelativeTolerance, + epsAbs, epsRel); { maths::CVector a(a_, a_ + 2); @@ -154,12 +164,16 @@ void CEqualWithToleranceTest::testMatrix() { maths::CSymmetricMatrix epsAbs(2, 0.21 / 2.0); maths::CSymmetricMatrix epsRel(2, 0.005 / 2.0); - maths::CEqualWithTolerance> abs(maths::CToleranceTypes::E_AbsoluteTolerance, epsAbs); - maths::CEqualWithTolerance> rel(maths::CToleranceTypes::E_RelativeTolerance, epsRel); + maths::CEqualWithTolerance> abs( + maths::CToleranceTypes::E_AbsoluteTolerance, epsAbs); + maths::CEqualWithTolerance> rel( + maths::CToleranceTypes::E_RelativeTolerance, epsRel); maths::CEqualWithTolerance> absAndRel( - maths::CToleranceTypes::E_AbsoluteTolerance & maths::CToleranceTypes::E_RelativeTolerance, epsAbs, epsRel); + maths::CToleranceTypes::E_AbsoluteTolerance & maths::CToleranceTypes::E_RelativeTolerance, + epsAbs, epsRel); maths::CEqualWithTolerance> absOrRel( - maths::CToleranceTypes::E_AbsoluteTolerance | maths::CToleranceTypes::E_RelativeTolerance, epsAbs, epsRel); + maths::CToleranceTypes::E_AbsoluteTolerance | maths::CToleranceTypes::E_RelativeTolerance, + epsAbs, epsRel); { maths::CSymmetricMatrix a(a_, a_ + 3); @@ -208,12 +222,12 @@ void CEqualWithToleranceTest::testMatrix() { CppUnit::Test* CEqualWithToleranceTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CEqualWithToleranceTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CEqualWithToleranceTest::testScalar", &CEqualWithToleranceTest::testScalar)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CEqualWithToleranceTest::testVector", &CEqualWithToleranceTest::testVector)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CEqualWithToleranceTest::testMatrix", &CEqualWithToleranceTest::testMatrix)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEqualWithToleranceTest::testScalar", &CEqualWithToleranceTest::testScalar)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEqualWithToleranceTest::testVector", &CEqualWithToleranceTest::testVector)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEqualWithToleranceTest::testMatrix", &CEqualWithToleranceTest::testMatrix)); return suiteOfTests; } diff --git a/lib/maths/unittest/CForecastTest.cc b/lib/maths/unittest/CForecastTest.cc index 896a81f3d6..b08821c1a0 100644 --- a/lib/maths/unittest/CForecastTest.cc +++ b/lib/maths/unittest/CForecastTest.cc @@ -56,13 +56,18 @@ maths::CModelParams params(core_t::TTime bucketLength) { static TTimeDoubleMap learnRates; learnRates[bucketLength] = static_cast(bucketLength) / 1800.0; double minimumSeasonalVarianceScale{0.25}; - return maths::CModelParams{ - bucketLength, learnRates[bucketLength], DECAY_RATE, minimumSeasonalVarianceScale, 6 * core::constants::HOUR, core::constants::DAY}; + return maths::CModelParams{bucketLength, + learnRates[bucketLength], + DECAY_RATE, + minimumSeasonalVarianceScale, + 6 * core::constants::HOUR, + core::constants::DAY}; } maths::CUnivariateTimeSeriesModel::TDecayRateController2Ary decayRateControllers() { - return {{maths::CDecayRateController( - maths::CDecayRateController::E_PredictionBias | maths::CDecayRateController::E_PredictionErrorIncrease, 1), + return {{maths::CDecayRateController(maths::CDecayRateController::E_PredictionBias | + maths::CDecayRateController::E_PredictionErrorIncrease, + 1), maths::CDecayRateController(maths::CDecayRateController::E_PredictionBias | maths::CDecayRateController::E_PredictionErrorIncrease | maths::CDecayRateController::E_PredictionErrorDecrease, @@ -80,8 +85,9 @@ void CForecastTest::testDailyNoLongTermTrend() { LOG_DEBUG(<< "+-------------------------------------------+"); core_t::TTime bucketLength{600}; - TDoubleVec y{0.0, 2.0, 2.0, 4.0, 8.0, 10.0, 15.0, 20.0, 120.0, 120.0, 110.0, 100.0, - 90.0, 100.0, 130.0, 80.0, 30.0, 15.0, 10.0, 8.0, 5.0, 3.0, 2.0, 0.0}; + TDoubleVec y{0.0, 2.0, 2.0, 4.0, 8.0, 10.0, 15.0, 20.0, + 120.0, 120.0, 110.0, 100.0, 90.0, 100.0, 130.0, 80.0, + 30.0, 15.0, 10.0, 8.0, 5.0, 3.0, 2.0, 0.0}; test::CRandomNumbers rng; @@ -101,12 +107,14 @@ void CForecastTest::testDailyConstantLongTermTrend() { LOG_DEBUG(<< "+-------------------------------------------------+"); core_t::TTime bucketLength{3600}; - TDoubleVec y{0.0, 2.0, 2.0, 4.0, 8.0, 10.0, 15.0, 20.0, 80.0, 100.0, 110.0, 120.0, - 110.0, 100.0, 90.0, 80.0, 30.0, 15.0, 10.0, 8.0, 5.0, 3.0, 2.0, 0.0}; + TDoubleVec y{0.0, 2.0, 2.0, 4.0, 8.0, 10.0, 15.0, 20.0, + 80.0, 100.0, 110.0, 120.0, 110.0, 100.0, 90.0, 80.0, + 30.0, 15.0, 10.0, 8.0, 5.0, 3.0, 2.0, 0.0}; auto trend = [&y, bucketLength](core_t::TTime time, double noise) { core_t::TTime i{(time % 86400) / bucketLength}; - return 0.25 * static_cast(time) / static_cast(bucketLength) + y[i] + noise; + return 0.25 * static_cast(time) / static_cast(bucketLength) + + y[i] + noise; }; this->test(trend, bucketLength, 60, 64.0, 15.0, 0.02); @@ -119,18 +127,23 @@ void CForecastTest::testDailyVaryingLongTermTrend() { core_t::TTime bucketLength{3600}; double day{86400.0}; - TDoubleVec times{0.0, 5.0 * day, 10.0 * day, 15.0 * day, 20.0 * day, 25.0 * day, 30.0 * day, 35.0 * day, - 40.0 * day, 45.0 * day, 50.0 * day, 55.0 * day, 60.0 * day, 65.0 * day, 70.0 * day, 75.0 * day, - 80.0 * day, 85.0 * day, 90.0 * day, 95.0 * day, 100.0 * day, 105.0 * day, 110.0 * day, 115.0 * day}; - TDoubleVec values{20.0, 30.0, 25.0, 35.0, 45.0, 40.0, 38.0, 36.0, 35.0, 25.0, 35.0, 45.0, - 55.0, 62.0, 70.0, 76.0, 79.0, 82.0, 86.0, 90.0, 95.0, 100.0, 106.0, 112.0}; + TDoubleVec times{0.0, 5.0 * day, 10.0 * day, 15.0 * day, + 20.0 * day, 25.0 * day, 30.0 * day, 35.0 * day, + 40.0 * day, 45.0 * day, 50.0 * day, 55.0 * day, + 60.0 * day, 65.0 * day, 70.0 * day, 75.0 * day, + 80.0 * day, 85.0 * day, 90.0 * day, 95.0 * day, + 100.0 * day, 105.0 * day, 110.0 * day, 115.0 * day}; + TDoubleVec values{20.0, 30.0, 25.0, 35.0, 45.0, 40.0, 38.0, 36.0, + 35.0, 25.0, 35.0, 45.0, 55.0, 62.0, 70.0, 76.0, + 79.0, 82.0, 86.0, 90.0, 95.0, 100.0, 106.0, 112.0}; maths::CSpline<> trend_(maths::CSplineTypes::E_Cubic); trend_.interpolate(times, values, maths::CSplineTypes::E_Natural); auto trend = [&trend_](core_t::TTime time, double noise) { double time_{static_cast(time)}; - return trend_.value(time_) + 8.0 * std::sin(boost::math::double_constants::two_pi * time_ / 43200.0) + noise; + return trend_.value(time_) + + 8.0 * std::sin(boost::math::double_constants::two_pi * time_ / 43200.0) + noise; }; this->test(trend, bucketLength, 100, 9.0, 13.0, 0.04); @@ -142,8 +155,9 @@ void CForecastTest::testComplexNoLongTermTrend() { LOG_DEBUG(<< "+---------------------------------------------+"); core_t::TTime bucketLength{3600}; - TDoubleVec y{0.0, 10.0, 20.0, 20.0, 30.0, 40.0, 50.0, 60.0, 80.0, 100.0, 110.0, 120.0, - 110.0, 100.0, 90.0, 80.0, 60.0, 40.0, 30.0, 20.0, 10.0, 10.0, 5.0, 0.0}; + TDoubleVec y{0.0, 10.0, 20.0, 20.0, 30.0, 40.0, 50.0, 60.0, + 80.0, 100.0, 110.0, 120.0, 110.0, 100.0, 90.0, 80.0, + 60.0, 40.0, 30.0, 20.0, 10.0, 10.0, 5.0, 0.0}; TDoubleVec scale{1.0, 1.1, 1.05, 0.95, 0.9, 0.3, 0.2}; auto trend = [&y, &scale, bucketLength](core_t::TTime time, double noise) { @@ -161,14 +175,16 @@ void CForecastTest::testComplexConstantLongTermTrend() { LOG_DEBUG(<< "+---------------------------------------------------+"); core_t::TTime bucketLength{3600}; - TDoubleVec y{0.0, 10.0, 20.0, 20.0, 30.0, 40.0, 50.0, 60.0, 80.0, 100.0, 110.0, 120.0, - 110.0, 100.0, 90.0, 80.0, 60.0, 40.0, 30.0, 20.0, 10.0, 10.0, 5.0, 0.0}; + TDoubleVec y{0.0, 10.0, 20.0, 20.0, 30.0, 40.0, 50.0, 60.0, + 80.0, 100.0, 110.0, 120.0, 110.0, 100.0, 90.0, 80.0, + 60.0, 40.0, 30.0, 20.0, 10.0, 10.0, 5.0, 0.0}; TDoubleVec scale{1.0, 1.1, 1.05, 0.95, 0.9, 0.3, 0.2}; auto trend = [&y, &scale, bucketLength](core_t::TTime time, double noise) { core_t::TTime d{(time % 604800) / 86400}; core_t::TTime h{(time % 86400) / bucketLength}; - return 0.25 * static_cast(time) / static_cast(bucketLength) + scale[d] * (20.0 + y[h] + noise); + return 0.25 * static_cast(time) / static_cast(bucketLength) + + scale[d] * (20.0 + y[h] + noise); }; this->test(trend, bucketLength, 60, 24.0, 17.0, 0.04); @@ -181,13 +197,18 @@ void CForecastTest::testComplexVaryingLongTermTrend() { core_t::TTime bucketLength{3600}; double day{86400.0}; - TDoubleVec times{0.0, 5.0 * day, 10.0 * day, 15.0 * day, 20.0 * day, 25.0 * day, 30.0 * day, 35.0 * day, - 40.0 * day, 45.0 * day, 50.0 * day, 55.0 * day, 60.0 * day, 65.0 * day, 70.0 * day, 75.0 * day, - 80.0 * day, 85.0 * day, 90.0 * day, 95.0 * day, 100.0 * day, 105.0 * day, 110.0 * day, 115.0 * day}; - TDoubleVec values{20.0, 30.0, 25.0, 35.0, 45.0, 40.0, 38.0, 36.0, 35.0, 25.0, 35.0, 45.0, - 55.0, 62.0, 70.0, 76.0, 79.0, 82.0, 86.0, 90.0, 95.0, 100.0, 106.0, 112.0}; - TDoubleVec y{0.0, 1.0, 2.0, 2.0, 3.0, 4.0, 5.0, 6.0, 8.0, 10.0, 11.0, 12.0, - 11.0, 10.0, 9.0, 8.0, 6.0, 4.0, 3.0, 2.0, 1.0, 1.0, 0.5, 0.0}; + TDoubleVec times{0.0, 5.0 * day, 10.0 * day, 15.0 * day, + 20.0 * day, 25.0 * day, 30.0 * day, 35.0 * day, + 40.0 * day, 45.0 * day, 50.0 * day, 55.0 * day, + 60.0 * day, 65.0 * day, 70.0 * day, 75.0 * day, + 80.0 * day, 85.0 * day, 90.0 * day, 95.0 * day, + 100.0 * day, 105.0 * day, 110.0 * day, 115.0 * day}; + TDoubleVec values{20.0, 30.0, 25.0, 35.0, 45.0, 40.0, 38.0, 36.0, + 35.0, 25.0, 35.0, 45.0, 55.0, 62.0, 70.0, 76.0, + 79.0, 82.0, 86.0, 90.0, 95.0, 100.0, 106.0, 112.0}; + TDoubleVec y{0.0, 1.0, 2.0, 2.0, 3.0, 4.0, 5.0, 6.0, + 8.0, 10.0, 11.0, 12.0, 11.0, 10.0, 9.0, 8.0, + 6.0, 4.0, 3.0, 2.0, 1.0, 1.0, 0.5, 0.0}; TDoubleVec scale{1.0, 1.1, 1.05, 0.95, 0.9, 0.3, 0.2}; maths::CSpline<> trend_(maths::CSplineTypes::E_Cubic); @@ -213,9 +234,12 @@ void CForecastTest::testNonNegative() { test::CRandomNumbers rng; maths::CTimeSeriesDecomposition trend(0.012, bucketLength); - maths::CNormalMeanPrecConjugate prior = maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, DECAY_RATE); - maths::CUnivariateTimeSeriesModel::TDecayRateController2Ary controllers{decayRateControllers()}; - maths::CUnivariateTimeSeriesModel model(params(bucketLength), TAG, trend, prior, &controllers); + maths::CNormalMeanPrecConjugate prior = maths::CNormalMeanPrecConjugate::nonInformativePrior( + maths_t::E_ContinuousData, DECAY_RATE); + maths::CUnivariateTimeSeriesModel::TDecayRateController2Ary controllers{ + decayRateControllers()}; + maths::CUnivariateTimeSeriesModel model(params(bucketLength), TAG, trend, + prior, &controllers); LOG_DEBUG(<< "*** learn ***"); @@ -252,7 +276,8 @@ void CForecastTest::testNonNegative() { core_t::TTime end{time + 20 * core::constants::DAY}; std::string m; TModelPtr forecastModel(model.cloneForForecast()); - forecastModel->forecast(start, end, 95.0, MINIMUM_VALUE, MAXIMUM_VALUE, boost::bind(&mockSink, _1, boost::ref(prediction)), m); + forecastModel->forecast(start, end, 95.0, MINIMUM_VALUE, MAXIMUM_VALUE, + boost::bind(&mockSink, _1, boost::ref(prediction)), m); std::size_t outOfBounds{0}; std::size_t count{0}; @@ -260,13 +285,15 @@ void CForecastTest::testNonNegative() { for (std::size_t i = 0u; i < prediction.size(); ++i) { TDoubleVec noise; rng.generateNormalSamples(2.0, 3.0, 48, noise); - for (auto value = noise.begin(); i < prediction.size() && value != noise.end(); ++i, ++value, time += bucketLength) { + for (auto value = noise.begin(); i < prediction.size() && value != noise.end(); + ++i, ++value, time += bucketLength) { CPPUNIT_ASSERT(prediction[i].s_LowerBound >= 0); CPPUNIT_ASSERT(prediction[i].s_Predicted >= 0); CPPUNIT_ASSERT(prediction[i].s_UpperBound >= 0); double y{std::max(*value, 0.0)}; - outOfBounds += (y < prediction[i].s_LowerBound || y > prediction[i].s_UpperBound ? 1 : 0); + outOfBounds += + (y < prediction[i].s_LowerBound || y > prediction[i].s_UpperBound ? 1 : 0); ++count; //actual.push_back(y); //ly.push_back(prediction[i].s_LowerBound); @@ -275,7 +302,8 @@ void CForecastTest::testNonNegative() { } } - double percentageOutOfBounds{100.0 * static_cast(outOfBounds) / static_cast(count)}; + double percentageOutOfBounds{100.0 * static_cast(outOfBounds) / + static_cast(count)}; LOG_DEBUG(<< "% out of bounds = " << percentageOutOfBounds); //file << "actual = " << core::CContainerPrinter::print(actual) << ";\n"; @@ -296,16 +324,22 @@ void CForecastTest::testFinancialIndex() { TTimeDoublePrVec timeseries; core_t::TTime startTime; core_t::TTime endTime; - CPPUNIT_ASSERT( - test::CTimeSeriesTestData::parse("testfiles/financial_index.csv", timeseries, startTime, endTime, "^([0-9]+),([0-9\\.]+)")); + CPPUNIT_ASSERT(test::CTimeSeriesTestData::parse("testfiles/financial_index.csv", + timeseries, startTime, endTime, + "^([0-9]+),([0-9\\.]+)")); CPPUNIT_ASSERT(!timeseries.empty()); - LOG_DEBUG(<< "timeseries = " << core::CContainerPrinter::print(timeseries.begin(), timeseries.begin() + 10) << " ..."); + LOG_DEBUG(<< "timeseries = " + << core::CContainerPrinter::print(timeseries.begin(), timeseries.begin() + 10) + << " ..."); maths::CTimeSeriesDecomposition trend(0.012, bucketLength); - maths::CNormalMeanPrecConjugate prior = maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, DECAY_RATE); - maths::CUnivariateTimeSeriesModel::TDecayRateController2Ary controllers{decayRateControllers()}; - maths::CUnivariateTimeSeriesModel model(params(bucketLength), TAG, trend, prior, &controllers); + maths::CNormalMeanPrecConjugate prior = maths::CNormalMeanPrecConjugate::nonInformativePrior( + maths_t::E_ContinuousData, DECAY_RATE); + maths::CUnivariateTimeSeriesModel::TDecayRateController2Ary controllers{ + decayRateControllers()}; + maths::CUnivariateTimeSeriesModel model(params(bucketLength), TAG, trend, + prior, &controllers); LOG_DEBUG(<< "*** learn ***"); @@ -326,7 +360,9 @@ void CForecastTest::testFinancialIndex() { .weightStyles(maths::CConstantWeights::COUNT) .trendWeights(weights) .priorWeights(weights); - model.addSamples(params, {core::make_triple(timeseries[i].first, TDouble2Vec{timeseries[i].second}, TAG)}); + model.addSamples( + params, {core::make_triple(timeseries[i].first, + TDouble2Vec{timeseries[i].second}, TAG)}); //actual.push_back(timeseries[i].second); } @@ -337,15 +373,18 @@ void CForecastTest::testFinancialIndex() { core_t::TTime end{timeseries[timeseries.size() - 1].first}; std::string m; TModelPtr forecastModel(model.cloneForForecast()); - forecastModel->forecast(start, end, 99.0, MINIMUM_VALUE, MAXIMUM_VALUE, boost::bind(&mockSink, _1, boost::ref(prediction)), m); + forecastModel->forecast(start, end, 99.0, MINIMUM_VALUE, MAXIMUM_VALUE, + boost::bind(&mockSink, _1, boost::ref(prediction)), m); std::size_t outOfBounds{0}; std::size_t count{0}; TMeanAccumulator error; - for (std::size_t i = n, j = 0u; i < timeseries.size() && j < prediction.size(); ++i, ++j) { + for (std::size_t i = n, j = 0u; + i < timeseries.size() && j < prediction.size(); ++i, ++j) { double yi{timeseries[i].second}; - outOfBounds += (yi < prediction[j].s_LowerBound || yi > prediction[j].s_UpperBound ? 1 : 0); + outOfBounds += + (yi < prediction[j].s_LowerBound || yi > prediction[j].s_UpperBound ? 1 : 0); ++count; error.add(std::fabs(yi - prediction[j].s_Predicted) / std::fabs(yi)); //actual.push_back(yi); @@ -354,7 +393,8 @@ void CForecastTest::testFinancialIndex() { //uy.push_back(prediction[j].s_UpperBound); } - double percentageOutOfBounds{100.0 * static_cast(outOfBounds) / static_cast(count)}; + double percentageOutOfBounds{100.0 * static_cast(outOfBounds) / + static_cast(count)}; LOG_DEBUG(<< "% out of bounds = " << percentageOutOfBounds); LOG_DEBUG(<< "error = " << maths::CBasicStatistics::mean(error)); @@ -370,20 +410,26 @@ void CForecastTest::testFinancialIndex() { CppUnit::Test* CForecastTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CForecastTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CForecastTest::testDailyNoLongTermTrend", &CForecastTest::testDailyNoLongTermTrend)); - suiteOfTests->addTest(new CppUnit::TestCaller("CForecastTest::testDailyConstantLongTermTrend", - &CForecastTest::testDailyConstantLongTermTrend)); - suiteOfTests->addTest(new CppUnit::TestCaller("CForecastTest::testDailyVaryingLongTermTrend", - &CForecastTest::testDailyVaryingLongTermTrend)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CForecastTest::testComplexNoLongTermTrend", &CForecastTest::testComplexNoLongTermTrend)); - suiteOfTests->addTest(new CppUnit::TestCaller("CForecastTest::testComplexConstantLongTermTrend", - &CForecastTest::testComplexConstantLongTermTrend)); - suiteOfTests->addTest(new CppUnit::TestCaller("CForecastTest::testComplexVaryingLongTermTrend", - &CForecastTest::testComplexVaryingLongTermTrend)); - suiteOfTests->addTest(new CppUnit::TestCaller("CForecastTest::testNonNegative", &CForecastTest::testNonNegative)); - suiteOfTests->addTest(new CppUnit::TestCaller("CForecastTest::testFinancialIndex", &CForecastTest::testFinancialIndex)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CForecastTest::testDailyNoLongTermTrend", &CForecastTest::testDailyNoLongTermTrend)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CForecastTest::testDailyConstantLongTermTrend", + &CForecastTest::testDailyConstantLongTermTrend)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CForecastTest::testDailyVaryingLongTermTrend", + &CForecastTest::testDailyVaryingLongTermTrend)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CForecastTest::testComplexNoLongTermTrend", &CForecastTest::testComplexNoLongTermTrend)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CForecastTest::testComplexConstantLongTermTrend", + &CForecastTest::testComplexConstantLongTermTrend)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CForecastTest::testComplexVaryingLongTermTrend", + &CForecastTest::testComplexVaryingLongTermTrend)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CForecastTest::testNonNegative", &CForecastTest::testNonNegative)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CForecastTest::testFinancialIndex", &CForecastTest::testFinancialIndex)); return suiteOfTests; } @@ -406,12 +452,12 @@ void CForecastTest::test(TTrend trend, test::CRandomNumbers rng; - maths::CUnivariateTimeSeriesModel::TDecayRateController2Ary controllers{decayRateControllers()}; - maths::CUnivariateTimeSeriesModel model(params(bucketLength), - TAG, - maths::CTimeSeriesDecomposition(0.012, bucketLength), - maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, DECAY_RATE), - &controllers); + maths::CUnivariateTimeSeriesModel::TDecayRateController2Ary controllers{ + decayRateControllers()}; + maths::CUnivariateTimeSeriesModel model( + params(bucketLength), TAG, maths::CTimeSeriesDecomposition(0.012, bucketLength), + maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, DECAY_RATE), + &controllers); core_t::TTime time{0}; TDouble2Vec4VecVec weights{{{1.0}}}; @@ -439,7 +485,8 @@ void CForecastTest::test(TTrend trend, core_t::TTime end{time + 2 * core::constants::WEEK}; TModelPtr forecastModel(model.cloneForForecast()); std::string m; - forecastModel->forecast(start, end, 80.0, MINIMUM_VALUE, MAXIMUM_VALUE, boost::bind(&mockSink, _1, boost::ref(prediction)), m); + forecastModel->forecast(start, end, 80.0, MINIMUM_VALUE, MAXIMUM_VALUE, + boost::bind(&mockSink, _1, boost::ref(prediction)), m); std::size_t outOfBounds{0}; std::size_t count{0}; @@ -449,10 +496,12 @@ void CForecastTest::test(TTrend trend, TDoubleVec noise; rng.generateNormalSamples(0.0, noiseVariance, 86400 / bucketLength, noise); TDoubleVec day; - for (std::size_t j = 0u; i < prediction.size() && j < noise.size(); ++i, ++j, time += bucketLength) { + for (std::size_t j = 0u; i < prediction.size() && j < noise.size(); + ++i, ++j, time += bucketLength) { double yj{trend(time, noise[j])}; day.push_back(yj); - outOfBounds += (yj < prediction[i].s_LowerBound || yj > prediction[i].s_UpperBound ? 1 : 0); + outOfBounds += + (yj < prediction[i].s_LowerBound || yj > prediction[i].s_UpperBound ? 1 : 0); ++count; error.add(std::fabs(yj - prediction[i].s_Predicted) / std::fabs(yj)); //actual.push_back(yj); @@ -462,7 +511,8 @@ void CForecastTest::test(TTrend trend, } } - double percentageOutOfBounds{100.0 * static_cast(outOfBounds) / static_cast(count)}; + double percentageOutOfBounds{100.0 * static_cast(outOfBounds) / + static_cast(count)}; LOG_DEBUG(<< "% out of bounds = " << percentageOutOfBounds); LOG_DEBUG(<< "error = " << maths::CBasicStatistics::mean(error)); diff --git a/lib/maths/unittest/CGammaRateConjugateTest.cc b/lib/maths/unittest/CGammaRateConjugateTest.cc index 1c93435d18..97f0c567a0 100644 --- a/lib/maths/unittest/CGammaRateConjugateTest.cc +++ b/lib/maths/unittest/CGammaRateConjugateTest.cc @@ -42,8 +42,9 @@ using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumula using TMeanVarAccumulator = maths::CBasicStatistics::SSampleMeanVar::TAccumulator; using CGammaRateConjugate = CPriorTestInterfaceMixin; -CGammaRateConjugate -makePrior(maths_t::EDataType dataType = maths_t::E_ContinuousData, const double& offset = 0.0, const double& decayRate = 0.0) { +CGammaRateConjugate makePrior(maths_t::EDataType dataType = maths_t::E_ContinuousData, + const double& offset = 0.0, + const double& decayRate = 0.0) { return CGammaRateConjugate::nonInformativePrior(dataType, offset, decayRate, 0.0); } } @@ -92,9 +93,11 @@ void CGammaRateConjugateTest::testMultipleUpdate() { maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); for (std::size_t j = 0u; j < scaledSamples.size(); ++j) { - filter1.addSamples(weightStyle, TDouble1Vec(1, scaledSamples[j]), TDouble4Vec1Vec(1, TDouble4Vec(1, 2.0))); + filter1.addSamples(weightStyle, TDouble1Vec(1, scaledSamples[j]), + TDouble4Vec1Vec(1, TDouble4Vec(1, 2.0))); } - filter2.addSamples(weightStyle, scaledSamples, TDouble4Vec1Vec(scaledSamples.size(), TDouble4Vec(1, 2.0))); + filter2.addSamples(weightStyle, scaledSamples, + TDouble4Vec1Vec(scaledSamples.size(), TDouble4Vec(1, 2.0))); using TEqual = maths::CEqualWithTolerance; TEqual equal(maths::CToleranceTypes::E_RelativeTolerance, 0.03); @@ -152,8 +155,8 @@ void CGammaRateConjugateTest::testPropagation() { double propagatedShape = filter.likelihoodShape(); double propagatedRate = filter.likelihoodRate(); - LOG_DEBUG(<< "shape = " << shape << ", rate = " << rate << ", propagatedShape = " << propagatedShape - << ", propagatedRate = " << propagatedRate); + LOG_DEBUG(<< "shape = " << shape << ", rate = " << rate << ", propagatedShape = " + << propagatedShape << ", propagatedRate = " << propagatedRate); CPPUNIT_ASSERT_DOUBLES_EQUAL(shape, propagatedShape, eps); CPPUNIT_ASSERT_DOUBLES_EQUAL(rate, propagatedRate, eps); @@ -187,7 +190,8 @@ void CGammaRateConjugateTest::testShapeEstimation() { using TGammaRateConjugateVec = std::vector; unsigned int nAggregate = 50u; - TGammaRateConjugateVec filters(nAggregate, makePrior(maths_t::E_ContinuousData, 0.0, decayRates[i])); + TGammaRateConjugateVec filters( + nAggregate, makePrior(maths_t::E_ContinuousData, 0.0, decayRates[i])); double previousError = std::numeric_limits::max(); double averageShape = 0.0; @@ -238,7 +242,8 @@ void CGammaRateConjugateTest::testRateEstimation() { const double decayRates[] = {0.0, 0.001, 0.01}; const unsigned int nTests = 100u; - const double testIntervals[] = {50.0, 60.0, 70.0, 80.0, 85.0, 90.0, 95.0, 99.0}; + const double testIntervals[] = {50.0, 60.0, 70.0, 80.0, + 85.0, 90.0, 95.0, 99.0}; for (size_t i = 0; i < boost::size(decayRates); ++i) { test::CRandomNumbers rng; @@ -261,7 +266,8 @@ void CGammaRateConjugateTest::testRateEstimation() { } for (size_t j = 0; j < boost::size(testIntervals); ++j) { - TDoubleDoublePr confidenceInterval = filter.confidenceIntervalRate(testIntervals[j]); + TDoubleDoublePr confidenceInterval = + filter.confidenceIntervalRate(testIntervals[j]); if (rate < confidenceInterval.first || rate > confidenceInterval.second) { ++errors[j]; @@ -271,7 +277,8 @@ void CGammaRateConjugateTest::testRateEstimation() { for (size_t j = 0; j < boost::size(testIntervals); ++j) { // The number of errors should be inside the percentile bounds. - unsigned int maximumErrors = static_cast(std::ceil((1.0 - testIntervals[j] / 100.0) * nTests)); + unsigned int maximumErrors = static_cast( + std::ceil((1.0 - testIntervals[j] / 100.0) * nTests)); LOG_DEBUG(<< "errors = " << errors[j] << ", maximumErrors = " << maximumErrors); @@ -300,17 +307,16 @@ void CGammaRateConjugateTest::testMarginalLikelihood() { filter.addSamples(samples); maths_t::ESampleWeightStyle weightStyles[] = { - maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight, maths_t::E_SampleCountWeight}; + maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight, + maths_t::E_SampleCountWeight}; double weights[] = {0.1, 1.0, 10.0}; for (std::size_t i = 0u; i < boost::size(weightStyles); ++i) { for (std::size_t j = 0u; j < boost::size(weights); ++j) { double lb, ub; - filter.minusLogJointCdf(maths_t::TWeightStyleVec(1, weightStyles[i]), - TDouble1Vec(1, 1000.0), - TDouble4Vec1Vec(1, TDouble4Vec(1, weights[j])), - lb, - ub); + filter.minusLogJointCdf( + maths_t::TWeightStyleVec(1, weightStyles[i]), TDouble1Vec(1, 1000.0), + TDouble4Vec1Vec(1, TDouble4Vec(1, weights[j])), lb, ub); LOG_DEBUG(<< "-log(c.d.f) = " << (lb + ub) / 2.0); CPPUNIT_ASSERT(lb >= 0.0); CPPUNIT_ASSERT(ub >= 0.0); @@ -340,7 +346,8 @@ void CGammaRateConjugateTest::testMarginalLikelihood() { rng.generateGammaSamples(shape, scale, numberSamples[i], samples); for (size_t j = 0; j < boost::size(decayRates); ++j) { - CGammaRateConjugate filter(makePrior(maths_t::E_ContinuousData, 0.0, decayRates[j])); + CGammaRateConjugate filter( + makePrior(maths_t::E_ContinuousData, 0.0, decayRates[j])); for (std::size_t k = 0u; k < samples.size(); ++k) { filter.addSamples(TDouble1Vec(1, samples[k])); @@ -351,7 +358,8 @@ void CGammaRateConjugateTest::testMarginalLikelihood() { // of the c.d.f. at a range of deltas from the true mean. const double eps = 1e-4; - double deltas[] = {-2.0, -1.6, -1.2, -0.8, -0.4, -0.2, 0.0, 0.5, 1.0, 2.0, 3.0, 4.0, 5.0}; + double deltas[] = {-2.0, -1.6, -1.2, -0.8, -0.4, -0.2, 0.0, + 0.5, 1.0, 2.0, 3.0, 4.0, 5.0}; for (size_t k = 0; k < boost::size(deltas); ++k) { double x = mean + deltas[k] * std::sqrt(variance); @@ -360,7 +368,8 @@ void CGammaRateConjugateTest::testMarginalLikelihood() { LOG_DEBUG(<< "number = " << numberSamples[i] << ", sample = " << sample[0]); double logLikelihood = 0.0; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter.jointLogMarginalLikelihood(sample, logLikelihood)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, + filter.jointLogMarginalLikelihood(sample, logLikelihood)); double pdf = std::exp(logLikelihood); double lowerBound = 0.0, upperBound = 0.0; @@ -409,25 +418,30 @@ void CGammaRateConjugateTest::testMarginalLikelihood() { TDouble1Vec sample(1, samples[i]); filter.addSamples(sample); double logLikelihood = 0.0; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter.jointLogMarginalLikelihood(sample, logLikelihood)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, + filter.jointLogMarginalLikelihood(sample, logLikelihood)); differentialEntropy -= logLikelihood; } differentialEntropy /= static_cast(samples.size()); - LOG_DEBUG(<< "differentialEntropy = " << differentialEntropy << ", expectedDifferentialEntropy = " << expectedDifferentialEntropy); + LOG_DEBUG(<< "differentialEntropy = " << differentialEntropy + << ", expectedDifferentialEntropy = " << expectedDifferentialEntropy); CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedDifferentialEntropy, differentialEntropy, 0.0025); } - const double varianceScales[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.2, 1.5, 2.0, 2.5, 3.0, 4.0, 5.0}; + const double varianceScales[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, + 0.7, 0.8, 0.9, 1.0, 1.2, 1.5, + 2.0, 2.5, 3.0, 4.0, 5.0}; CGammaRateConjugate filter(makePrior()); TDoubleVec samples; rng.generateGammaSamples(shape, scale, 1000, samples); filter.addSamples(samples); - const double percentages[] = {5.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 95.0}; + const double percentages[] = {5.0, 10.0, 20.0, 30.0, 40.0, + 50.0, 60.0, 70.0, 80.0, 95.0}; { // Test that marginal likelihood confidence intervals are @@ -438,7 +452,8 @@ void CGammaRateConjugateTest::testMarginalLikelihood() { double q1, q2; filter.marginalLikelihoodQuantileForTest(50.0 - percentages[i] / 2.0, 1e-3, q1); filter.marginalLikelihoodQuantileForTest(50.0 + percentages[i] / 2.0, 1e-3, q2); - TDoubleDoublePr interval = filter.marginalLikelihoodConfidenceInterval(percentages[i]); + TDoubleDoublePr interval = + filter.marginalLikelihoodConfidenceInterval(percentages[i]); LOG_DEBUG(<< "[q1, q2] = [" << q1 << ", " << q2 << "]" << ", interval = " << core::CContainerPrinter::print(interval)); CPPUNIT_ASSERT_DOUBLES_EQUAL(q1, interval.first, 0.02); @@ -460,9 +475,12 @@ void CGammaRateConjugateTest::testMarginalLikelihood() { LOG_DEBUG(<< "*** vs = " << vs << " ***"); for (std::size_t j = 0u; j < boost::size(percentages); ++j) { boost::math::gamma_distribution<> scaledGamma(shape / vs, vs * scale); - double q1 = boost::math::quantile(scaledGamma, (50.0 - percentages[j] / 2.0) / 100.0); - double q2 = boost::math::quantile(scaledGamma, (50.0 + percentages[j] / 2.0) / 100.0); - TDoubleDoublePr interval = filter.marginalLikelihoodConfidenceInterval(percentages[j], weightStyle, weight); + double q1 = boost::math::quantile( + scaledGamma, (50.0 - percentages[j] / 2.0) / 100.0); + double q2 = boost::math::quantile( + scaledGamma, (50.0 + percentages[j] / 2.0) / 100.0); + TDoubleDoublePr interval = filter.marginalLikelihoodConfidenceInterval( + percentages[j], weightStyle, weight); LOG_DEBUG(<< "[q1, q2] = [" << q1 << ", " << q2 << "]" << ", interval = " << core::CContainerPrinter::print(interval)); CPPUNIT_ASSERT_DOUBLES_EQUAL(q1, interval.first, 0.4); @@ -512,12 +530,14 @@ void CGammaRateConjugateTest::testMarginalLikelihoodMean() { CPPUNIT_ASSERT(filter.marginalLikelihoodMeanForTest(expectedMean)); if (k % 10 == 0) { - LOG_DEBUG(<< "marginalLikelihoodMean = " << filter.marginalLikelihoodMean() << ", expectedMean = " << expectedMean); + LOG_DEBUG(<< "marginalLikelihoodMean = " << filter.marginalLikelihoodMean() + << ", expectedMean = " << expectedMean); } // The error is mainly due to the truncation in the // integration range used to compute the expected mean. - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMean, filter.marginalLikelihoodMean(), 1e-3 * expectedMean); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + expectedMean, filter.marginalLikelihoodMean(), 1e-3 * expectedMean); } } } @@ -533,7 +553,8 @@ void CGammaRateConjugateTest::testMarginalLikelihoodMode() { const double shapes[] = {5.0, 20.0, 40.0}; const double scales[] = {1.0, 10.0, 20.0}; - const double varianceScales[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.2, 1.5, 2.0, 2.5, 3.0, 4.0}; + const double varianceScales[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, + 0.9, 1.0, 1.2, 1.5, 2.0, 2.5, 3.0, 4.0}; test::CRandomNumbers rng; @@ -553,12 +574,17 @@ void CGammaRateConjugateTest::testMarginalLikelihoodMode() { for (std::size_t k = 0u; k < boost::size(varianceScales); ++k) { double vs = varianceScales[k]; weight[0] = vs; - boost::math::gamma_distribution<> scaledGamma(shapes[i] / vs, vs * scales[j]); + boost::math::gamma_distribution<> scaledGamma(shapes[i] / vs, + vs * scales[j]); double expectedMode = boost::math::mode(scaledGamma); - LOG_DEBUG(<< "marginalLikelihoodMode = " << filter.marginalLikelihoodMode(weightStyle, weight) + LOG_DEBUG(<< "marginalLikelihoodMode = " + << filter.marginalLikelihoodMode(weightStyle, weight) << ", expectedMode = " << expectedMode); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMode, filter.marginalLikelihoodMode(weightStyle, weight), 0.28 * expectedMode + 0.3); - double error = std::fabs(filter.marginalLikelihoodMode(weightStyle, weight) - expectedMode); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + expectedMode, filter.marginalLikelihoodMode(weightStyle, weight), + 0.28 * expectedMode + 0.3); + double error = std::fabs( + filter.marginalLikelihoodMode(weightStyle, weight) - expectedMode); relativeError.add(error == 0.0 ? 0.0 : error / expectedMode); } LOG_DEBUG(<< "relativeError = " << maths::CBasicStatistics::mean(relativeError)); @@ -603,15 +629,20 @@ void CGammaRateConjugateTest::testMarginalLikelihoodVariance() { CPPUNIT_ASSERT(filter.marginalLikelihoodVarianceForTest(expectedVariance)); if (k % 10 == 0) { - LOG_DEBUG(<< "marginalLikelihoodVariance = " << filter.marginalLikelihoodVariance() + LOG_DEBUG(<< "marginalLikelihoodVariance = " + << filter.marginalLikelihoodVariance() << ", expectedVariance = " << expectedVariance); } // The error is mainly due to the truncation in the // integration range used to compute the expected mean. - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedVariance, filter.marginalLikelihoodVariance(), 0.01 * expectedVariance); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedVariance, + filter.marginalLikelihoodVariance(), + 0.01 * expectedVariance); - relativeError.add(std::fabs(expectedVariance - filter.marginalLikelihoodVariance()) / expectedVariance); + relativeError.add(std::fabs(expectedVariance - + filter.marginalLikelihoodVariance()) / + expectedVariance); } LOG_DEBUG(<< "relativeError = " << maths::CBasicStatistics::mean(relativeError)); @@ -662,9 +693,10 @@ void CGammaRateConjugateTest::testSampleMarginalLikelihood() { sampledMeanVar = std::for_each(sampled.begin(), sampled.end(), sampledMeanVar); CPPUNIT_ASSERT_EQUAL(i + 1, sampled.size()); - CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(sampleMeanVar), maths::CBasicStatistics::mean(sampledMeanVar), eps); - CPPUNIT_ASSERT_DOUBLES_EQUAL( - maths::CBasicStatistics::variance(sampleMeanVar), maths::CBasicStatistics::variance(sampledMeanVar), eps); + CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(sampleMeanVar), + maths::CBasicStatistics::mean(sampledMeanVar), eps); + CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::variance(sampleMeanVar), + maths::CBasicStatistics::variance(sampledMeanVar), eps); } TMeanAccumulator meanVarError; @@ -682,14 +714,16 @@ void CGammaRateConjugateTest::testSampleMarginalLikelihood() { LOG_DEBUG(<< "expectedMean = " << filter.marginalLikelihoodMean() << ", sampledMean = " << maths::CBasicStatistics::mean(sampledMoments)); - LOG_DEBUG(<< "expectedVar = " << filter.marginalLikelihoodVariance() - << ", sampledVar = " << maths::CBasicStatistics::variance(sampledMoments)); + LOG_DEBUG(<< "expectedVar = " << filter.marginalLikelihoodVariance() << ", sampledVar = " + << maths::CBasicStatistics::variance(sampledMoments)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(filter.marginalLikelihoodMean(), maths::CBasicStatistics::mean(sampledMoments), 1e-8); + CPPUNIT_ASSERT_DOUBLES_EQUAL(filter.marginalLikelihoodMean(), + maths::CBasicStatistics::mean(sampledMoments), 1e-8); CPPUNIT_ASSERT_DOUBLES_EQUAL(filter.marginalLikelihoodVariance(), maths::CBasicStatistics::variance(sampledMoments), 0.25 * filter.marginalLikelihoodVariance()); - meanVarError.add(std::fabs(filter.marginalLikelihoodVariance() - maths::CBasicStatistics::variance(sampledMoments)) / + meanVarError.add(std::fabs(filter.marginalLikelihoodVariance() - + maths::CBasicStatistics::variance(sampledMoments)) / filter.marginalLikelihoodVariance()); std::sort(sampled.begin(), sampled.end()); @@ -699,8 +733,8 @@ void CGammaRateConjugateTest::testSampleMarginalLikelihood() { double expectedQuantile; CPPUNIT_ASSERT(filter.marginalLikelihoodQuantileForTest(q, eps, expectedQuantile)); - LOG_DEBUG(<< "quantile = " << q << ", x_quantile = " << expectedQuantile << ", quantile range = [" << sampled[j - 1u] << "," - << sampled[j] << "]"); + LOG_DEBUG(<< "quantile = " << q << ", x_quantile = " << expectedQuantile << ", quantile range = [" + << sampled[j - 1u] << "," << sampled[j] << "]"); CPPUNIT_ASSERT(expectedQuantile >= sampled[j - 1u]); CPPUNIT_ASSERT(expectedQuantile <= sampled[j]); @@ -743,7 +777,8 @@ void CGammaRateConjugateTest::testCdf() { CPPUNIT_ASSERT(filter.minusLogJointCdf(TDouble1Vec(1, -1.0), lowerBound, upperBound)); double f = (lowerBound + upperBound) / 2.0; - CPPUNIT_ASSERT(filter.minusLogJointCdfComplement(TDouble1Vec(1, -1.0), lowerBound, upperBound)); + CPPUNIT_ASSERT(filter.minusLogJointCdfComplement(TDouble1Vec(1, -1.0), + lowerBound, upperBound)); double fComplement = (lowerBound + upperBound) / 2.0; LOG_DEBUG(<< "log(F(x)) = " << -f << ", log(1 - F(x)) = " << fComplement); CPPUNIT_ASSERT_DOUBLES_EQUAL(std::log(std::numeric_limits::min()), -f, 1e-10); @@ -754,10 +789,12 @@ void CGammaRateConjugateTest::testCdf() { CPPUNIT_ASSERT(filter.minusLogJointCdf(TDouble1Vec(1, x), lowerBound, upperBound)); f = (lowerBound + upperBound) / 2.0; - CPPUNIT_ASSERT(filter.minusLogJointCdfComplement(TDouble1Vec(1, x), lowerBound, upperBound)); + CPPUNIT_ASSERT(filter.minusLogJointCdfComplement(TDouble1Vec(1, x), + lowerBound, upperBound)); fComplement = (lowerBound + upperBound) / 2.0; - LOG_DEBUG(<< "log(F(x)) = " << (f == 0.0 ? f : -f) << ", log(1 - F(x)) = " << (fComplement == 0.0 ? fComplement : -fComplement)); + LOG_DEBUG(<< "log(F(x)) = " << (f == 0.0 ? f : -f) << ", log(1 - F(x)) = " + << (fComplement == 0.0 ? fComplement : -fComplement)); CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, std::exp(-f) + std::exp(-fComplement), 1e-10); } } @@ -811,15 +848,19 @@ void CGammaRateConjugateTest::testProbabilityOfLessLikelySamples() { double fx; filter.jointLogMarginalLikelihood(sample, fx); - double px = static_cast(std::lower_bound(likelihoods.begin(), likelihoods.end(), fx) - likelihoods.begin()) / + double px = static_cast(std::lower_bound(likelihoods.begin(), + likelihoods.end(), fx) - + likelihoods.begin()) / static_cast(likelihoods.size()); double lb, ub; filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, sample, lb, ub); - double ssd = std::sqrt(px * (1.0 - px) / static_cast(samples.size())); + double ssd = std::sqrt(px * (1.0 - px) / + static_cast(samples.size())); - LOG_DEBUG(<< "expected P(x) = " << px << ", actual P(x) = " << (lb + ub) / 2.0 << " sample sd = " << ssd); + LOG_DEBUG(<< "expected P(x) = " << px << ", actual P(x) = " + << (lb + ub) / 2.0 << " sample sd = " << ssd); CPPUNIT_ASSERT_DOUBLES_EQUAL(px, (lb + ub) / 2.0, 3.0 * ssd); @@ -829,7 +870,8 @@ void CGammaRateConjugateTest::testProbabilityOfLessLikelySamples() { maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); for (std::size_t k = 0u; k < boost::size(vs); ++k) { - double mode = filter.marginalLikelihoodMode(weightStyle, TDouble1Vec(1, vs[k])); + double mode = filter.marginalLikelihoodMode(weightStyle, + TDouble1Vec(1, vs[k])); double ss[] = {0.9 * mode, 1.1 * mode}; LOG_DEBUG(<< "vs = " << vs[k] << ", mode = " << mode); @@ -839,57 +881,42 @@ void CGammaRateConjugateTest::testProbabilityOfLessLikelySamples() { { filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(1, ss[0]), TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, weightStyle, TDouble1Vec(1, ss[0]), + TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); if (mode > 0.0) { - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - weightStyle, - TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, - ub, - tail); + filter.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, weightStyle, TDouble1Vec(ss, ss + 2), + TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); - filter.probabilityOfLessLikelySamples(maths_t::E_OneSidedBelow, - weightStyle, - TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, - ub, - tail); + filter.probabilityOfLessLikelySamples( + maths_t::E_OneSidedBelow, weightStyle, + TDouble1Vec(ss, ss + 2), + TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); - filter.probabilityOfLessLikelySamples(maths_t::E_OneSidedAbove, - weightStyle, - TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, - ub, - tail); + filter.probabilityOfLessLikelySamples( + maths_t::E_OneSidedAbove, weightStyle, + TDouble1Vec(ss, ss + 2), + TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } } if (mode > 0.0) { filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(1, ss[1]), TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, weightStyle, TDouble1Vec(1, ss[1]), + TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(ss, ss + 2), TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, weightStyle, TDouble1Vec(ss, ss + 2), + TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); - filter.probabilityOfLessLikelySamples(maths_t::E_OneSidedBelow, - weightStyle, - TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, - ub, - tail); + filter.probabilityOfLessLikelySamples( + maths_t::E_OneSidedBelow, weightStyle, TDouble1Vec(ss, ss + 2), + TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); - filter.probabilityOfLessLikelySamples(maths_t::E_OneSidedAbove, - weightStyle, - TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, - ub, - tail); + filter.probabilityOfLessLikelySamples( + maths_t::E_OneSidedAbove, weightStyle, TDouble1Vec(ss, ss + 2), + TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } } @@ -944,7 +971,8 @@ void CGammaRateConjugateTest::testAnomalyScore() { rng.generateGammaSamples(shapes[i], scales[j], 500, samples); for (size_t k = 0; k < boost::size(decayRates); ++k) { - CGammaRateConjugate filter(makePrior(maths_t::E_ContinuousData, 0.0, decayRates[k])); + CGammaRateConjugate filter( + makePrior(maths_t::E_ContinuousData, 0.0, decayRates[k])); ++test; @@ -957,7 +985,8 @@ void CGammaRateConjugateTest::testAnomalyScore() { for (unsigned int time = 0; time < samples.size(); ++time) { double sample = samples[time] + - (anomalies[std::find(boost::begin(anomalyTimes), boost::end(anomalyTimes), time) - boost::begin(anomalyTimes)] * + (anomalies[std::find(boost::begin(anomalyTimes), boost::end(anomalyTimes), time) - + boost::begin(anomalyTimes)] * boost::math::standard_deviation(gamma)); TDouble1Vec sampleVec(1, sample); @@ -977,28 +1006,29 @@ void CGammaRateConjugateTest::testAnomalyScore() { x << "];\n"; scores << "];\n"; - file << x.str() << scores.str() << "plot(x" << test << ", score" << test << ");\n" + file << x.str() << scores.str() << "plot(x" << test << ", score" + << test << ");\n" << "input(\"Hit any key for next test\");\n\n"; TUIntVec falsePositives; std::set_difference(candidateAnomalies.begin(), candidateAnomalies.end(), - boost::begin(anomalyTimes), - boost::end(anomalyTimes), + boost::begin(anomalyTimes), boost::end(anomalyTimes), std::back_inserter(falsePositives)); - double falsePositiveRate = static_cast(falsePositives.size()) / static_cast(samples.size()); + double falsePositiveRate = static_cast(falsePositives.size()) / + static_cast(samples.size()); totalFalsePositiveRate += falsePositiveRate; TUIntVec positives; - std::set_intersection(candidateAnomalies.begin(), - candidateAnomalies.end(), - boost::begin(anomalyTimes), - boost::end(anomalyTimes), - std::back_inserter(positives)); + std::set_intersection( + candidateAnomalies.begin(), candidateAnomalies.end(), + boost::begin(anomalyTimes), boost::end(anomalyTimes), + std::back_inserter(positives)); - LOG_DEBUG(<< "falsePositiveRate = " << falsePositiveRate << ", positives = " << positives.size()); + LOG_DEBUG(<< "falsePositiveRate = " << falsePositiveRate + << ", positives = " << positives.size()); // False alarm rate should be less than 0.6%. CPPUNIT_ASSERT(falsePositiveRate <= 0.006); @@ -1049,7 +1079,8 @@ void CGammaRateConjugateTest::testOffset() { for (size_t i = 0; i < boost::size(dataTypes); ++i) { for (size_t j = 0; j < boost::size(offsets); ++j) { for (size_t k = 0; k < boost::size(decayRates); ++k) { - CGammaRateConjugate filter1(makePrior(dataTypes[i], offsets[j], decayRates[k])); + CGammaRateConjugate filter1( + makePrior(dataTypes[i], offsets[j], decayRates[k])); CGammaRateConjugate filter2(makePrior(dataTypes[i], 0.0, decayRates[k])); for (std::size_t l = 0u; l < samples.size(); ++l) { @@ -1066,14 +1097,16 @@ void CGammaRateConjugateTest::testOffset() { double likelihood1; filter1.jointLogMarginalLikelihood(offsetSampleVec, likelihood1); double lowerBound1, upperBound1; - filter1.probabilityOfLessLikelySamples(maths_t::E_TwoSided, offsetSampleVec, lowerBound1, upperBound1); + filter1.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, offsetSampleVec, lowerBound1, upperBound1); CPPUNIT_ASSERT_EQUAL(lowerBound1, upperBound1); double probability1 = (lowerBound1 + upperBound1) / 2.0; double likelihood2; filter2.jointLogMarginalLikelihood(sample, likelihood2); double lowerBound2, upperBound2; - filter2.probabilityOfLessLikelySamples(maths_t::E_TwoSided, sample, lowerBound2, upperBound2); + filter2.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, sample, lowerBound2, upperBound2); CPPUNIT_ASSERT_EQUAL(lowerBound2, upperBound2); double probability2 = (lowerBound2 + upperBound2) / 2.0; @@ -1161,14 +1194,16 @@ void CGammaRateConjugateTest::testIntegerData() { TDouble1Vec sample(1, x); double l1, u1; - CPPUNIT_ASSERT(filter1.probabilityOfLessLikelySamples(maths_t::E_TwoSided, sample, l1, u1)); + CPPUNIT_ASSERT(filter1.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, sample, l1, u1)); CPPUNIT_ASSERT_EQUAL(l1, u1); double p1 = (l1 + u1) / 2.0; meanProbability1.add(p1); sample[0] += uniform[k]; double l2, u2; - CPPUNIT_ASSERT(filter2.probabilityOfLessLikelySamples(maths_t::E_TwoSided, sample, l2, u2)); + CPPUNIT_ASSERT(filter2.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, sample, l2, u2)); CPPUNIT_ASSERT_EQUAL(l2, u2); double p2 = (l2 + u2) / 2.0; meanProbability2.add(p2); @@ -1176,7 +1211,8 @@ void CGammaRateConjugateTest::testIntegerData() { double p1 = maths::CBasicStatistics::mean(meanProbability1); double p2 = maths::CBasicStatistics::mean(meanProbability2); - LOG_DEBUG(<< "shape = " << shapes[i] << ", rate = " << scales[j] << ", p1 = " << p1 << ", p2 = " << p2); + LOG_DEBUG(<< "shape = " << shapes[i] << ", rate = " << scales[j] + << ", p1 = " << p1 << ", p2 = " << p2); CPPUNIT_ASSERT_DOUBLES_EQUAL(p1, p2, 0.15 * p1); meanError.add(fabs(p1 - p2)); @@ -1213,7 +1249,8 @@ void CGammaRateConjugateTest::testLowVariationData() { TDoubleDoublePr interval = filter.marginalLikelihoodConfidenceInterval(68.0); double sigma = (interval.second - interval.first) / 2.0; - LOG_DEBUG(<< "68% confidence interval " << core::CContainerPrinter::print(interval) << ", approximate s.t.d. = " << sigma); + LOG_DEBUG(<< "68% confidence interval " << core::CContainerPrinter::print(interval) + << ", approximate s.t.d. = " << sigma); CPPUNIT_ASSERT_DOUBLES_EQUAL(1e-4, sigma / 430.5, 5e-6); } } @@ -1230,8 +1267,9 @@ void CGammaRateConjugateTest::testPersist() { maths::CGammaRateConjugate origFilter(makePrior(maths_t::E_ContinuousData, 0.1)); for (std::size_t i = 0u; i < samples.size(); ++i) { - origFilter.addSamples( - maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), TDouble1Vec(1, samples[i]), TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0))); + origFilter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), + TDouble1Vec(1, samples[i]), + TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0))); } double decayRate = origFilter.decayRate(); @@ -1249,15 +1287,14 @@ void CGammaRateConjugateTest::testPersist() { CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - maths::SDistributionRestoreParams params(maths_t::E_ContinuousData, - decayRate + 0.1, - maths::MINIMUM_CLUSTER_SPLIT_FRACTION, - maths::MINIMUM_CLUSTER_SPLIT_COUNT, - maths::MINIMUM_CATEGORY_COUNT); + maths::SDistributionRestoreParams params( + maths_t::E_ContinuousData, decayRate + 0.1, maths::MINIMUM_CLUSTER_SPLIT_FRACTION, + maths::MINIMUM_CLUSTER_SPLIT_COUNT, maths::MINIMUM_CATEGORY_COUNT); maths::CGammaRateConjugate restoredFilter(params, traverser); uint64_t checksum = origFilter.checksum(); - LOG_DEBUG(<< "orig checksum = " << checksum << " restored checksum = " << restoredFilter.checksum()); + LOG_DEBUG(<< "orig checksum = " << checksum + << " restored checksum = " << restoredFilter.checksum()); CPPUNIT_ASSERT_EQUAL(checksum, restoredFilter.checksum()); // The XML representation of the new filter should be the same as the original @@ -1293,7 +1330,8 @@ void CGammaRateConjugateTest::testVarianceScale() { // Finally, we test update with scaled samples produces the // correct posterior. - maths_t::ESampleWeightStyle scales[] = {maths_t::E_SampleSeasonalVarianceScaleWeight, maths_t::E_SampleCountVarianceScaleWeight}; + maths_t::ESampleWeightStyle scales[] = {maths_t::E_SampleSeasonalVarianceScaleWeight, + maths_t::E_SampleCountVarianceScaleWeight}; for (std::size_t s = 0u; s < boost::size(scales); ++s) { const double shape = 3.0; @@ -1307,7 +1345,8 @@ void CGammaRateConjugateTest::testVarianceScale() { LOG_DEBUG(<< "****** probabilityOfLessLikelySamples ******"); { - const double percentiles[] = {10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0}; + const double percentiles[] = {10.0, 20.0, 30.0, 40.0, 50.0, + 60.0, 70.0, 80.0, 90.0}; const std::size_t nSamples = 1000u; const std::size_t nScaledSamples = 10000u; @@ -1331,7 +1370,8 @@ void CGammaRateConjugateTest::testVarianceScale() { TDouble1Vec sample(1, unscaledSamples[i]); double lowerBound, upperBound; - CPPUNIT_ASSERT(filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, sample, lowerBound, upperBound)); + CPPUNIT_ASSERT(filter.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, sample, lowerBound, upperBound)); CPPUNIT_ASSERT_EQUAL(lowerBound, upperBound); double probability = (lowerBound + upperBound) / 2.0; probabilities.push_back(probability); @@ -1339,7 +1379,8 @@ void CGammaRateConjugateTest::testVarianceScale() { std::sort(probabilities.begin(), probabilities.end()); for (size_t i = 0; i < boost::size(percentiles); ++i) { - std::size_t index = static_cast(static_cast(nScaledSamples) * percentiles[i] / 100.0); + std::size_t index = static_cast( + static_cast(nScaledSamples) * percentiles[i] / 100.0); double error = fabs(probabilities[index] - percentiles[i] / 100.0); expectedPercentileErrors.push_back(error); expectedTotalError += error; @@ -1353,7 +1394,8 @@ void CGammaRateConjugateTest::testVarianceScale() { double ss = varianceScales[i] * scale; { boost::math::gamma_distribution<> gamma(scaledShape, ss); - LOG_DEBUG(<< "mean = " << boost::math::mean(gamma) << ", variance = " << boost::math::variance(gamma)); + LOG_DEBUG(<< "mean = " << boost::math::mean(gamma) + << ", variance = " << boost::math::variance(gamma)); } TDoubleVec scaledSamples; @@ -1364,13 +1406,11 @@ void CGammaRateConjugateTest::testVarianceScale() { for (std::size_t j = 0; j < scaledSamples.size(); ++j) { double lowerBound, upperBound; maths_t::ETail tail; - CPPUNIT_ASSERT(filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - maths_t::TWeightStyleVec(1, scales[s]), - TDouble1Vec(1, scaledSamples[j]), - TDouble4Vec1Vec(1, TDouble4Vec(1, varianceScales[i])), - lowerBound, - upperBound, - tail)); + CPPUNIT_ASSERT(filter.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, maths_t::TWeightStyleVec(1, scales[s]), + TDouble1Vec(1, scaledSamples[j]), + TDouble4Vec1Vec(1, TDouble4Vec(1, varianceScales[i])), + lowerBound, upperBound, tail)); CPPUNIT_ASSERT_EQUAL(lowerBound, upperBound); double probability = (lowerBound + upperBound) / 2.0; probabilities.push_back(probability); @@ -1379,12 +1419,14 @@ void CGammaRateConjugateTest::testVarianceScale() { double totalError = 0.0; for (size_t j = 0; j < boost::size(percentiles); ++j) { - std::size_t index = static_cast(static_cast(nScaledSamples) * percentiles[j] / 100.0); + std::size_t index = static_cast( + static_cast(nScaledSamples) * percentiles[j] / 100.0); double error = fabs(probabilities[index] - percentiles[j] / 100.0); totalError += error; double errorThreshold = 0.017 + expectedPercentileErrors[j]; - LOG_DEBUG(<< "percentile = " << percentiles[j] << ", probability = " << probabilities[index] << ", error = " << error + LOG_DEBUG(<< "percentile = " << percentiles[j] << ", probability = " + << probabilities[index] << ", error = " << error << ", error threshold = " << errorThreshold); CPPUNIT_ASSERT(error < errorThreshold); @@ -1392,7 +1434,8 @@ void CGammaRateConjugateTest::testVarianceScale() { double totalErrorThreshold = 0.1 + expectedTotalError; - LOG_DEBUG(<< "total error = " << totalError << ", totalError threshold = " << totalErrorThreshold); + LOG_DEBUG(<< "total error = " << totalError + << ", totalError threshold = " << totalErrorThreshold); CPPUNIT_ASSERT(totalError < totalErrorThreshold); } @@ -1407,7 +1450,10 @@ void CGammaRateConjugateTest::testVarianceScale() { double scaledShape = shape / varianceScales[i]; double scaledScale = varianceScales[i] * scale; boost::math::gamma_distribution<> gamma(scaledShape, scaledScale); - { LOG_DEBUG(<< "mean = " << boost::math::mean(gamma) << ", variance = " << boost::math::variance(gamma)); } + { + LOG_DEBUG(<< "mean = " << boost::math::mean(gamma) + << ", variance = " << boost::math::variance(gamma)); + } double expectedDifferentialEntropy = maths::CTools::differentialEntropy(gamma); CGammaRateConjugate filter(makePrior()); @@ -1423,11 +1469,12 @@ void CGammaRateConjugateTest::testVarianceScale() { for (std::size_t j = 0u; j < scaledSamples.size(); ++j) { double logLikelihood = 0.0; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, - filter.jointLogMarginalLikelihood(maths_t::TWeightStyleVec(1, scales[s]), - TDouble1Vec(1, scaledSamples[j]), - TDouble4Vec1Vec(1, TDouble4Vec(1, varianceScales[i])), - logLikelihood)); + CPPUNIT_ASSERT_EQUAL( + maths_t::E_FpNoErrors, + filter.jointLogMarginalLikelihood( + maths_t::TWeightStyleVec(1, scales[s]), + TDouble1Vec(1, scaledSamples[j]), + TDouble4Vec1Vec(1, TDouble4Vec(1, varianceScales[i])), logLikelihood)); differentialEntropy -= logLikelihood; } @@ -1436,7 +1483,8 @@ void CGammaRateConjugateTest::testVarianceScale() { LOG_DEBUG(<< "differentialEntropy = " << differentialEntropy << ", expectedDifferentialEntropy = " << expectedDifferentialEntropy); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedDifferentialEntropy, differentialEntropy, 0.05); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedDifferentialEntropy, + differentialEntropy, 0.05); } } @@ -1449,8 +1497,10 @@ void CGammaRateConjugateTest::testVarianceScale() { for (std::size_t s = 0u; s < boost::size(scales); ++s) { for (std::size_t t = 0u; t < boost::size(dataTypes); ++t) { - const double shapes[] = {1.0, 10.0, 100.0, 1000.0, 100000.0, 1000000.0}; - const double rates[] = {1.0, 10.0, 100.0, 1000.0, 100000.0, 1000000.0}; + const double shapes[] = {1.0, 10.0, 100.0, + 1000.0, 100000.0, 1000000.0}; + const double rates[] = {1.0, 10.0, 100.0, + 1000.0, 100000.0, 1000000.0}; const double varianceScales[] = {0.1, 0.5, 1.0, 2.0, 10.0, 100.0}; maths_t::TWeightStyleVec weightStyle(1, scales[s]); @@ -1484,7 +1534,8 @@ void CGammaRateConjugateTest::testVarianceScale() { double scaledShape = shape / scale; double scaledRate = rate / scale; - LOG_DEBUG(<< "scaled shape = " << scaledShape << ", scaled rate = " << scaledRate); + LOG_DEBUG(<< "scaled shape = " << scaledShape + << ", scaled rate = " << scaledRate); TMeanAccumulator meanError; TMeanAccumulator varianceError; @@ -1495,17 +1546,23 @@ void CGammaRateConjugateTest::testVarianceScale() { weights.clear(); weights.resize(samples.size(), TDouble4Vec(1, 1.0)); filter.addSamples(weightStyle, samples, weights); - rng.generateGammaSamples(scaledShape, 1.0 / scaledRate, 200, samples); + rng.generateGammaSamples(scaledShape, 1.0 / scaledRate, + 200, samples); weights.clear(); weights.resize(samples.size(), TDouble4Vec(1, scale)); filter.addSamples(weightStyle, samples, weights); - double estimatedMean = filter.likelihoodShape() / filter.likelihoodRate(); - double estimatedVariance = estimatedMean / filter.likelihoodRate(); + double estimatedMean = filter.likelihoodShape() / + filter.likelihoodRate(); + double estimatedVariance = estimatedMean / + filter.likelihoodRate(); double dm = (dataTypes[t] == maths_t::E_IntegerData ? 0.5 : 0.0); double dv = (dataTypes[t] == maths_t::E_IntegerData ? 1.0 / 12.0 : 0.0); - double trialMeanError = std::fabs(estimatedMean - (mean + dm)) / std::max(1.0, mean + dm); - double trialVarianceError = std::fabs(estimatedVariance - (variance + dv)) / std::max(1.0, variance + dv); + double trialMeanError = std::fabs(estimatedMean - (mean + dm)) / + std::max(1.0, mean + dm); + double trialVarianceError = + std::fabs(estimatedVariance - (variance + dv)) / + std::max(1.0, variance + dv); LOG_DEBUG(<< "trial mean error = " << trialMeanError); LOG_DEBUG(<< "trial variance error = " << trialVarianceError); @@ -1514,11 +1571,15 @@ void CGammaRateConjugateTest::testVarianceScale() { varianceError.add(trialVarianceError); } - LOG_DEBUG(<< "mean error = " << maths::CBasicStatistics::mean(meanError)); - LOG_DEBUG(<< "variance error = " << maths::CBasicStatistics::mean(varianceError)); + LOG_DEBUG(<< "mean error = " + << maths::CBasicStatistics::mean(meanError)); + LOG_DEBUG(<< "variance error = " + << maths::CBasicStatistics::mean(varianceError)); - CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError) < maximumMeanError[t]); - CPPUNIT_ASSERT(maths::CBasicStatistics::mean(varianceError) < maximumVarianceError[t]); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError) < + maximumMeanError[t]); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(varianceError) < + maximumVarianceError[t]); meanMeanError += meanError; meanVarianceError += varianceError; @@ -1527,10 +1588,13 @@ void CGammaRateConjugateTest::testVarianceScale() { } LOG_DEBUG(<< "mean mean error = " << maths::CBasicStatistics::mean(meanMeanError)); - LOG_DEBUG(<< "mean variance error = " << maths::CBasicStatistics::mean(meanVarianceError)); + LOG_DEBUG(<< "mean variance error = " + << maths::CBasicStatistics::mean(meanVarianceError)); - CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanMeanError) < maximumMeanMeanError[t]); - CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanVarianceError) < maximumMeanVarianceError[t]); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanMeanError) < + maximumMeanMeanError[t]); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanVarianceError) < + maximumMeanVarianceError[t]); } } } @@ -1554,8 +1618,10 @@ void CGammaRateConjugateTest::testNegativeSample() { TDoubleVec samples; rng.generateGammaSamples(shape, scale, 100, samples); - CGammaRateConjugate filter1(CGammaRateConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.0, 0.0, 0.2)); - CGammaRateConjugate filter2(CGammaRateConjugate::nonInformativePrior(maths_t::E_ContinuousData, 1.2586, 0.0, 0.2)); + CGammaRateConjugate filter1(CGammaRateConjugate::nonInformativePrior( + maths_t::E_ContinuousData, 0.0, 0.0, 0.2)); + CGammaRateConjugate filter2(CGammaRateConjugate::nonInformativePrior( + maths_t::E_ContinuousData, 1.2586, 0.0, 0.2)); filter1.addSamples(samples); filter2.addSamples(samples); @@ -1574,42 +1640,53 @@ void CGammaRateConjugateTest::testNegativeSample() { CppUnit::Test* CGammaRateConjugateTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CGammaRateConjugateTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CGammaRateConjugateTest::testMultipleUpdate", - &CGammaRateConjugateTest::testMultipleUpdate)); - suiteOfTests->addTest(new CppUnit::TestCaller("CGammaRateConjugateTest::testPropagation", - &CGammaRateConjugateTest::testPropagation)); - suiteOfTests->addTest(new CppUnit::TestCaller("CGammaRateConjugateTest::testShapeEstimation", - &CGammaRateConjugateTest::testShapeEstimation)); - suiteOfTests->addTest(new CppUnit::TestCaller("CGammaRateConjugateTest::testRateEstimation", - &CGammaRateConjugateTest::testRateEstimation)); - suiteOfTests->addTest(new CppUnit::TestCaller("CGammaRateConjugateTest::testMarginalLikelihood", - &CGammaRateConjugateTest::testMarginalLikelihood)); - suiteOfTests->addTest(new CppUnit::TestCaller("CGammaRateConjugateTest::testMarginalLikelihoodMean", - &CGammaRateConjugateTest::testMarginalLikelihoodMean)); - suiteOfTests->addTest(new CppUnit::TestCaller("CGammaRateConjugateTest::testMarginalLikelihoodMode", - &CGammaRateConjugateTest::testMarginalLikelihoodMode)); - suiteOfTests->addTest(new CppUnit::TestCaller("CGammaRateConjugateTest::testMarginalLikelihoodVariance", - &CGammaRateConjugateTest::testMarginalLikelihoodVariance)); - suiteOfTests->addTest(new CppUnit::TestCaller("CGammaRateConjugateTest::testSampleMarginalLikelihood", - &CGammaRateConjugateTest::testSampleMarginalLikelihood)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CGammaRateConjugateTest::testCdf", &CGammaRateConjugateTest::testCdf)); - suiteOfTests->addTest(new CppUnit::TestCaller("CGammaRateConjugateTest::testProbabilityOfLessLikelySamples", - &CGammaRateConjugateTest::testProbabilityOfLessLikelySamples)); - suiteOfTests->addTest(new CppUnit::TestCaller("CGammaRateConjugateTest::testAnomalyScore", - &CGammaRateConjugateTest::testAnomalyScore)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CGammaRateConjugateTest::testOffset", &CGammaRateConjugateTest::testOffset)); - suiteOfTests->addTest(new CppUnit::TestCaller("CGammaRateConjugateTest::testIntegerData", - &CGammaRateConjugateTest::testIntegerData)); - suiteOfTests->addTest(new CppUnit::TestCaller("CGammaRateConjugateTest::testLowVariationData", - &CGammaRateConjugateTest::testLowVariationData)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CGammaRateConjugateTest::testPersist", &CGammaRateConjugateTest::testPersist)); - suiteOfTests->addTest(new CppUnit::TestCaller("CGammaRateConjugateTest::testVarianceScale", - &CGammaRateConjugateTest::testVarianceScale)); - suiteOfTests->addTest(new CppUnit::TestCaller("CGammaRateConjugateTest::testNegativeSample", - &CGammaRateConjugateTest::testNegativeSample)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CGammaRateConjugateTest::testMultipleUpdate", + &CGammaRateConjugateTest::testMultipleUpdate)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CGammaRateConjugateTest::testPropagation", &CGammaRateConjugateTest::testPropagation)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CGammaRateConjugateTest::testShapeEstimation", + &CGammaRateConjugateTest::testShapeEstimation)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CGammaRateConjugateTest::testRateEstimation", + &CGammaRateConjugateTest::testRateEstimation)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CGammaRateConjugateTest::testMarginalLikelihood", + &CGammaRateConjugateTest::testMarginalLikelihood)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CGammaRateConjugateTest::testMarginalLikelihoodMean", + &CGammaRateConjugateTest::testMarginalLikelihoodMean)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CGammaRateConjugateTest::testMarginalLikelihoodMode", + &CGammaRateConjugateTest::testMarginalLikelihoodMode)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CGammaRateConjugateTest::testMarginalLikelihoodVariance", + &CGammaRateConjugateTest::testMarginalLikelihoodVariance)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CGammaRateConjugateTest::testSampleMarginalLikelihood", + &CGammaRateConjugateTest::testSampleMarginalLikelihood)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CGammaRateConjugateTest::testCdf", &CGammaRateConjugateTest::testCdf)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CGammaRateConjugateTest::testProbabilityOfLessLikelySamples", + &CGammaRateConjugateTest::testProbabilityOfLessLikelySamples)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CGammaRateConjugateTest::testAnomalyScore", &CGammaRateConjugateTest::testAnomalyScore)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CGammaRateConjugateTest::testOffset", &CGammaRateConjugateTest::testOffset)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CGammaRateConjugateTest::testIntegerData", &CGammaRateConjugateTest::testIntegerData)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CGammaRateConjugateTest::testLowVariationData", + &CGammaRateConjugateTest::testLowVariationData)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CGammaRateConjugateTest::testPersist", &CGammaRateConjugateTest::testPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CGammaRateConjugateTest::testVarianceScale", &CGammaRateConjugateTest::testVarianceScale)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CGammaRateConjugateTest::testNegativeSample", + &CGammaRateConjugateTest::testNegativeSample)); return suiteOfTests; } diff --git a/lib/maths/unittest/CGramSchmidtTest.cc b/lib/maths/unittest/CGramSchmidtTest.cc index 800eaeba50..81bcbf711d 100644 --- a/lib/maths/unittest/CGramSchmidtTest.cc +++ b/lib/maths/unittest/CGramSchmidtTest.cc @@ -292,7 +292,9 @@ void CGramSchmidtTest::testEdgeCases() { { LOG_DEBUG(<< "*** Test zero vector ***"); - double x_[][5] = {{0.0, 0.0, 0.0, 0.0, 0.0}, {1.0, 3.0, 4.0, 0.0, 6.0}, {0.4, 0.3, 0.6, 1.0, 7.0}}; + double x_[][5] = {{0.0, 0.0, 0.0, 0.0, 0.0}, + {1.0, 3.0, 4.0, 0.0, 6.0}, + {0.4, 0.3, 0.6, 1.0, 7.0}}; std::size_t p[] = {0, 1, 2}; do { @@ -313,8 +315,11 @@ void CGramSchmidtTest::testEdgeCases() { LOG_DEBUG(<< ""); LOG_DEBUG(<< "*** Test degenerate ***"); - double x_[][4] = { - {1.0, 1.0, 1.0, 1.0}, {-1.0, 2.3, 1.0, 0.03}, {1.0, 1.0, 1.0, 1.0}, {-1.0, 2.3, 1.0, 0.03}, {-4.0, 0.3, 1.4, 1.03}}; + double x_[][4] = {{1.0, 1.0, 1.0, 1.0}, + {-1.0, 2.3, 1.0, 0.03}, + {1.0, 1.0, 1.0, 1.0}, + {-1.0, 2.3, 1.0, 0.03}, + {-4.0, 0.3, 1.4, 1.03}}; std::size_t p[] = {0, 1, 2, 3, 4}; @@ -338,12 +343,14 @@ void CGramSchmidtTest::testEdgeCases() { CppUnit::Test* CGramSchmidtTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CGramSchmidtTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CGramSchmidtTest::testOrthogonality", &CGramSchmidtTest::testOrthogonality)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CGramSchmidtTest::testNormalisation", &CGramSchmidtTest::testNormalisation)); - suiteOfTests->addTest(new CppUnit::TestCaller("CGramSchmidtTest::testSpan", &CGramSchmidtTest::testSpan)); - suiteOfTests->addTest(new CppUnit::TestCaller("CGramSchmidtTest::testEdgeCases", &CGramSchmidtTest::testEdgeCases)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CGramSchmidtTest::testOrthogonality", &CGramSchmidtTest::testOrthogonality)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CGramSchmidtTest::testNormalisation", &CGramSchmidtTest::testNormalisation)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CGramSchmidtTest::testSpan", &CGramSchmidtTest::testSpan)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CGramSchmidtTest::testEdgeCases", &CGramSchmidtTest::testEdgeCases)); return suiteOfTests; } diff --git a/lib/maths/unittest/CInformationCriteriaTest.cc b/lib/maths/unittest/CInformationCriteriaTest.cc index 1693299a1a..f1c9d8da00 100644 --- a/lib/maths/unittest/CInformationCriteriaTest.cc +++ b/lib/maths/unittest/CInformationCriteriaTest.cc @@ -37,7 +37,8 @@ template double logfSphericalGaussian(const POINT& mean, double variance, const POINT& x) { double d = static_cast(x.dimension()); double r = (x - mean).euclidean(); - return -0.5 * (d * std::log(boost::math::double_constants::two_pi * variance) + r * r / variance); + return -0.5 * (d * std::log(boost::math::double_constants::two_pi * variance) + + r * r / variance); } template @@ -76,7 +77,8 @@ void CInformationCriteriaTest::testSphericalGaussian() { double likelihood = 0.0; for (std::size_t i = 0u; i < samples.size(); ++i) { - likelihood += -2.0 * logfSphericalGaussian(mean, variance, samples[i]) + 2.0 * std::log(upper); + likelihood += -2.0 * logfSphericalGaussian(mean, variance, samples[i]) + + 2.0 * std::log(upper); } double expectedAICc = likelihood + 6.0 + 12.0 / (n - 4.0); double expectedBIC = likelihood + 3.0 * std::log(n); @@ -96,7 +98,8 @@ void CInformationCriteriaTest::testSphericalGaussian() { { double variance = 8.0; double mean_[] = {-5.0, 30.0, 2.0, 7.9}; - double lowerTriangle[] = {variance, 0.0, variance, 0.0, 0.0, variance, 0.0, 0.0, 0.0, variance}; + double lowerTriangle[] = {variance, 0.0, variance, 0.0, 0.0, + variance, 0.0, 0.0, 0.0, variance}; TVector4 mean(boost::begin(mean_), boost::end(mean_)); TMatrix4 covariance(boost::begin(lowerTriangle), boost::end(lowerTriangle)); @@ -111,7 +114,8 @@ void CInformationCriteriaTest::testSphericalGaussian() { double likelihood = 0.0; for (std::size_t i = 0u; i < samples.size(); ++i) { - likelihood += -2.0 * logfSphericalGaussian(mean, variance, samples[i]) + 4.0 * std::log(upper); + likelihood += -2.0 * logfSphericalGaussian(mean, variance, samples[i]) + + 4.0 * std::log(upper); } double expectedAICc = likelihood + 10.0 + 30.0 / (n - 6.0); double expectedBIC = likelihood + 5.0 * std::log(n); @@ -233,7 +237,8 @@ void CInformationCriteriaTest::testSphericalGaussianWithSphericalCluster() { TSphericalCluster2VecVec clusters; for (std::size_t i = 0u; i < means.size(); ++i) { - maths::CSampling::multivariateNormalSample(means[i], covariance, 1000, points[i]); + maths::CSampling::multivariateNormalSample(means[i], covariance, + 1000, points[i]); TMeanVar2Accumulator moments; moments.add(points[i]); double n = maths::CBasicStatistics::count(moments); @@ -251,7 +256,8 @@ void CInformationCriteriaTest::testSphericalGaussianWithSphericalCluster() { LOG_DEBUG(<< "BIC points = " << bicPoints.calculate()); LOG_DEBUG(<< "BIC clusters = " << bicClusters.calculate()); - CPPUNIT_ASSERT_DOUBLES_EQUAL(bicPoints.calculate(), bicClusters.calculate(), 1e-10 * bicPoints.calculate()); + CPPUNIT_ASSERT_DOUBLES_EQUAL(bicPoints.calculate(), bicClusters.calculate(), + 1e-10 * bicPoints.calculate()); maths::CSphericalGaussianInfoCriterion aicPoints; aicPoints.add(points); @@ -259,7 +265,8 @@ void CInformationCriteriaTest::testSphericalGaussianWithSphericalCluster() { aicClusters.add(clusters); LOG_DEBUG(<< "AICc points = " << aicPoints.calculate()); LOG_DEBUG(<< "AICc clusters = " << aicClusters.calculate()); - CPPUNIT_ASSERT_DOUBLES_EQUAL(aicPoints.calculate(), aicClusters.calculate(), 1e-10 * aicPoints.calculate()); + CPPUNIT_ASSERT_DOUBLES_EQUAL(aicPoints.calculate(), aicClusters.calculate(), + 1e-10 * aicPoints.calculate()); } } @@ -287,7 +294,8 @@ void CInformationCriteriaTest::testGaussian() { double likelihood = 0.0; for (std::size_t i = 0u; i < samples.size(); ++i) { - likelihood += -2.0 * logfGaussian(mean, covariance, samples[i]) + 2.0 * std::log(upper); + likelihood += -2.0 * logfGaussian(mean, covariance, samples[i]) + + 2.0 * std::log(upper); } double expectedAICc = likelihood + 10.0 + 30.0 / (n - 6.0); double expectedBIC = likelihood + 5.0 * std::log(n); @@ -306,7 +314,8 @@ void CInformationCriteriaTest::testGaussian() { } { double mean_[] = {-5.0, 30.0, 2.0, 7.9}; - double lowerTriangle[] = {8.0, 1.0, 8.0, 0.0, 0.0, 8.0, 0.0, 2.0, 0.5, 8.0}; + double lowerTriangle[] = {8.0, 1.0, 8.0, 0.0, 0.0, + 8.0, 0.0, 2.0, 0.5, 8.0}; TVector4 mean(boost::begin(mean_), boost::end(mean_)); TMatrix4 covariance(boost::begin(lowerTriangle), boost::end(lowerTriangle)); @@ -321,7 +330,8 @@ void CInformationCriteriaTest::testGaussian() { double likelihood = 0.0; for (std::size_t i = 0u; i < samples.size(); ++i) { - likelihood += -2.0 * logfGaussian(mean, covariance, samples[i]) + 4.0 * std::log(upper); + likelihood += -2.0 * logfGaussian(mean, covariance, samples[i]) + + 4.0 * std::log(upper); } double expectedAICc = likelihood + 28.0 + 210.0 / (n - 15.0); double expectedBIC = likelihood + 14.0 * std::log(n); @@ -438,7 +448,8 @@ void CInformationCriteriaTest::testGaussianWithSphericalCluster() { TSphericalCluster2VecVec clusters; for (std::size_t i = 0u; i < means.size(); ++i) { - maths::CSampling::multivariateNormalSample(means[i], covariance, 1000, points[i]); + maths::CSampling::multivariateNormalSample(means[i], covariance, + 1000, points[i]); TMeanVar2Accumulator moments; moments.add(points[i]); double n = maths::CBasicStatistics::count(moments); @@ -456,7 +467,8 @@ void CInformationCriteriaTest::testGaussianWithSphericalCluster() { LOG_DEBUG(<< "BIC points = " << bicPoints.calculate()); LOG_DEBUG(<< "BIC clusters = " << bicClusters.calculate()); - CPPUNIT_ASSERT_DOUBLES_EQUAL(bicPoints.calculate(), bicClusters.calculate(), 2e-3 * bicPoints.calculate()); + CPPUNIT_ASSERT_DOUBLES_EQUAL(bicPoints.calculate(), bicClusters.calculate(), + 2e-3 * bicPoints.calculate()); maths::CGaussianInfoCriterion aicPoints; aicPoints.add(points); @@ -464,22 +476,25 @@ void CInformationCriteriaTest::testGaussianWithSphericalCluster() { aicClusters.add(clusters); LOG_DEBUG(<< "AICc points = " << aicPoints.calculate()); LOG_DEBUG(<< "AICc clusters = " << aicClusters.calculate()); - CPPUNIT_ASSERT_DOUBLES_EQUAL(aicPoints.calculate(), aicClusters.calculate(), 2e-3 * aicPoints.calculate()); + CPPUNIT_ASSERT_DOUBLES_EQUAL(aicPoints.calculate(), aicClusters.calculate(), + 2e-3 * aicPoints.calculate()); } } CppUnit::Test* CInformationCriteriaTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CInformationCriteriaTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CInformationCriteriaTest::testSphericalGaussian", - &CInformationCriteriaTest::testSphericalGaussian)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CInformationCriteriaTest::testSphericalGaussianWithSphericalCluster", - &CInformationCriteriaTest::testSphericalGaussianWithSphericalCluster)); - suiteOfTests->addTest(new CppUnit::TestCaller("CInformationCriteriaTest::testGaussian", - &CInformationCriteriaTest::testGaussian)); - suiteOfTests->addTest(new CppUnit::TestCaller("CInformationCriteriaTest::testGaussianWithSphericalCluster", - &CInformationCriteriaTest::testGaussianWithSphericalCluster)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CInformationCriteriaTest::testSphericalGaussian", + &CInformationCriteriaTest::testSphericalGaussian)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CInformationCriteriaTest::testSphericalGaussianWithSphericalCluster", + &CInformationCriteriaTest::testSphericalGaussianWithSphericalCluster)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CInformationCriteriaTest::testGaussian", &CInformationCriteriaTest::testGaussian)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CInformationCriteriaTest::testGaussianWithSphericalCluster", + &CInformationCriteriaTest::testGaussianWithSphericalCluster)); return suiteOfTests; } diff --git a/lib/maths/unittest/CIntegerToolsTest.cc b/lib/maths/unittest/CIntegerToolsTest.cc index 249fa5efb7..4e304df9f8 100644 --- a/lib/maths/unittest/CIntegerToolsTest.cc +++ b/lib/maths/unittest/CIntegerToolsTest.cc @@ -45,7 +45,8 @@ void CIntegerToolsTest::testNextPow2() { test::CRandomNumbers rng; - for (std::size_t test = 1u, shift = 1u; test < (std::numeric_limits::max() >> 1) + 1; test <<= 1, ++shift) { + for (std::size_t test = 1u, shift = 1u; + test < (std::numeric_limits::max() >> 1) + 1; test <<= 1, ++shift) { LOG_DEBUG(<< "Testing shift = " << shift); // Edge cases. @@ -68,7 +69,8 @@ void CIntegerToolsTest::testReverseBits() { test::CRandomNumbers rng; TSizeVec values; - rng.generateUniformSamples(0, boost::numeric::bounds::highest(), 10000, values); + rng.generateUniformSamples(0, boost::numeric::bounds::highest(), + 10000, values); std::string expected; std::string actual; @@ -106,7 +108,8 @@ void CIntegerToolsTest::testGcd() { test::CRandomNumbers rng; LOG_DEBUG(<< "--- gcd(a, b) ---"); - std::size_t primes[] = {2, 3, 5, 7, 11, 13, 17, 19, 29, 97, 821, 5851, 7877}; + std::size_t primes[] = {2, 3, 5, 7, 11, 13, 17, + 19, 29, 97, 821, 5851, 7877}; for (std::size_t i = 0u; i < 1000; ++i) { TSizeVec indices; TSizeVec split; @@ -116,8 +119,9 @@ void CIntegerToolsTest::testGcd() { std::sort(indices.begin() + split[0], indices.end()); TSizeVec cf; - std::set_intersection( - indices.begin(), indices.begin() + split[0], indices.begin() + split[0], indices.end(), std::back_inserter(cf)); + std::set_intersection(indices.begin(), indices.begin() + split[0], + indices.begin() + split[0], indices.end(), + std::back_inserter(cf)); // Use 64 bit integers here otherwise overflow will occur in 32 bit code uint64_t bigGcd = 1; @@ -147,9 +151,11 @@ void CIntegerToolsTest::testGcd() { std::sort(indices.begin() + 6, indices.end()); TSizeVec cf; - std::set_intersection(indices.begin(), indices.begin() + 3, indices.begin() + 3, indices.begin() + 6, std::back_inserter(cf)); + std::set_intersection(indices.begin(), indices.begin() + 3, indices.begin() + 3, + indices.begin() + 6, std::back_inserter(cf)); TSizeVec tmp; - std::set_intersection(cf.begin(), cf.end(), indices.begin() + 6, indices.end(), std::back_inserter(tmp)); + std::set_intersection(cf.begin(), cf.end(), indices.begin() + 6, + indices.end(), std::back_inserter(tmp)); cf.swap(tmp); std::size_t gcd = 1; for (std::size_t j = 0u; j < cf.size(); ++j) { @@ -166,8 +172,8 @@ void CIntegerToolsTest::testGcd() { for (std::size_t j = 6; j < indices.size(); ++j) { n[2] *= primes[indices[j]]; } - LOG_DEBUG(<< "n = " << core::CContainerPrinter::print(n) << " - expected gcd = " << gcd - << ", gcd = " << maths::CIntegerTools::gcd(n)); + LOG_DEBUG(<< "n = " << core::CContainerPrinter::print(n) << " - expected gcd = " + << gcd << ", gcd = " << maths::CIntegerTools::gcd(n)); } LOG_DEBUG(<< "--- gcd(a, b, c, d) ---"); @@ -190,12 +196,15 @@ void CIntegerToolsTest::testBinomial() { for (std::size_t i = 0u; i < boost::size(n); ++i) { for (unsigned int j = 0u; j <= n[i]; ++j) { - LOG_DEBUG(<< "j = " << j << ", n = " << n[i] << ", (n j) = " << maths::CIntegerTools::binomial(n[i], j)); + LOG_DEBUG(<< "j = " << j << ", n = " << n[i] + << ", (n j) = " << maths::CIntegerTools::binomial(n[i], j)); double expected = - std::exp(boost::math::lgamma(static_cast(n[i] + 1)) - boost::math::lgamma(static_cast(n[i] - j + 1)) - + std::exp(boost::math::lgamma(static_cast(n[i] + 1)) - + boost::math::lgamma(static_cast(n[i] - j + 1)) - boost::math::lgamma(static_cast(j + 1))); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, maths::CIntegerTools::binomial(n[i], j), 1e-10); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + expected, maths::CIntegerTools::binomial(n[i], j), 1e-10); } } } @@ -203,11 +212,14 @@ void CIntegerToolsTest::testBinomial() { CppUnit::Test* CIntegerToolsTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CIntegerToolsTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CIntegerToolsTest::testNextPow2", &CIntegerToolsTest::testNextPow2)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CIntegerToolsTest::testReverseBits", &CIntegerToolsTest::testReverseBits)); - suiteOfTests->addTest(new CppUnit::TestCaller("CIntegerToolsTest::testGcd", &CIntegerToolsTest::testGcd)); - suiteOfTests->addTest(new CppUnit::TestCaller("CIntegerToolsTest::testBinomial", &CIntegerToolsTest::testBinomial)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CIntegerToolsTest::testNextPow2", &CIntegerToolsTest::testNextPow2)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CIntegerToolsTest::testReverseBits", &CIntegerToolsTest::testReverseBits)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CIntegerToolsTest::testGcd", &CIntegerToolsTest::testGcd)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CIntegerToolsTest::testBinomial", &CIntegerToolsTest::testBinomial)); return suiteOfTests; } diff --git a/lib/maths/unittest/CIntegrationTest.cc b/lib/maths/unittest/CIntegrationTest.cc index e8cd770c85..4981699008 100644 --- a/lib/maths/unittest/CIntegrationTest.cc +++ b/lib/maths/unittest/CIntegrationTest.cc @@ -31,7 +31,9 @@ namespace { template class CPolynomialFunction : public std::unary_function { public: - CPolynomialFunction(const double (&coefficients)[ORDER + 1]) { std::copy(coefficients, coefficients + ORDER + 1, m_Coefficients); } + CPolynomialFunction(const double (&coefficients)[ORDER + 1]) { + std::copy(coefficients, coefficients + ORDER + 1, m_Coefficients); + } bool operator()(const double& x, double& result) const { result = 0.0; @@ -41,7 +43,9 @@ class CPolynomialFunction : public std::unary_function { return true; } - const double& coefficient(unsigned int i) const { return m_Coefficients[i]; } + const double& coefficient(unsigned int i) const { + return m_Coefficients[i]; + } private: double m_Coefficients[ORDER + 1]; @@ -80,7 +84,8 @@ class CMultivariatePolynomialFunction { struct SMonomial { bool operator<(const SMonomial& rhs) const { - return std::accumulate(s_Powers, s_Powers + DIMENSION, 0.0) < std::accumulate(rhs.s_Powers, rhs.s_Powers + DIMENSION, 0.0); + return std::accumulate(s_Powers, s_Powers + DIMENSION, 0.0) < + std::accumulate(rhs.s_Powers, rhs.s_Powers + DIMENSION, 0.0); } double s_Coefficient; double s_Powers[DIMENSION]; @@ -119,7 +124,8 @@ class CMultivariatePolynomialFunction { }; template -std::ostream& operator<<(std::ostream& o, const CMultivariatePolynomialFunction& f) { +std::ostream& +operator<<(std::ostream& o, const CMultivariatePolynomialFunction& f) { if (!f.terms().empty()) { o << (f.terms())[0].s_Coefficient; for (unsigned int j = 0u; j < DIMENSION; ++j) { @@ -142,7 +148,9 @@ std::ostream& operator<<(std::ostream& o, const CMultivariatePolynomialFunction< using TDoubleVec = std::vector; template -double integrate(const CMultivariatePolynomialFunction& f, const TDoubleVec& a, const TDoubleVec& b) { +double integrate(const CMultivariatePolynomialFunction& f, + const TDoubleVec& a, + const TDoubleVec& b) { double result = 0.0; for (std::size_t i = 0u; i < f.terms().size(); ++i) { double term = (f.terms())[i].s_Coefficient; @@ -200,10 +208,12 @@ class CSmoothHeavySide { using result_type = double; public: - CSmoothHeavySide(double slope, double offset) : m_Slope(slope), m_Offset(offset) {} + CSmoothHeavySide(double slope, double offset) + : m_Slope(slope), m_Offset(offset) {} bool operator()(double x, double& result) const { - result = std::exp(m_Slope * (x - m_Offset)) / (std::exp(m_Slope * (x - m_Offset)) + 1.0); + result = std::exp(m_Slope * (x - m_Offset)) / + (std::exp(m_Slope * (x - m_Offset)) + 1.0); return true; } @@ -271,41 +281,52 @@ void CIntegrationTest::testAllSingleVariate() { double actual; - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); } } } { - double coeffs[][2] = {{-3.2, -1.2}, {0.0, 2.0}, {1.0, -1.0}, {5.0, 6.4}, {12.1, -8.3}}; + double coeffs[][2] = { + {-3.2, -1.2}, {0.0, 2.0}, {1.0, -1.0}, {5.0, 6.4}, {12.1, -8.3}}; for (unsigned int i = 0; i < sizeof(ranges) / sizeof(ranges[0]); ++i) { for (unsigned int j = 0; j < sizeof(coeffs) / sizeof(coeffs[0]); ++j) { @@ -317,41 +338,55 @@ void CIntegrationTest::testAllSingleVariate() { double actual; - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); } } } { - double coeffs[][3] = {{-3.2, -1.2, -3.5}, {0.1, 2.0, 4.6}, {1.0, -1.0, 1.0}, {5.0, 6.4, -4.1}, {12.1, -8.3, 10.1}}; + double coeffs[][3] = {{-3.2, -1.2, -3.5}, + {0.1, 2.0, 4.6}, + {1.0, -1.0, 1.0}, + {5.0, 6.4, -4.1}, + {12.1, -8.3, 10.1}}; for (unsigned int i = 0; i < sizeof(ranges) / sizeof(ranges[0]); ++i) { for (unsigned int j = 0; j < sizeof(coeffs) / sizeof(coeffs[0]); ++j) { @@ -363,39 +398,51 @@ void CIntegrationTest::testAllSingleVariate() { double actual; - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); } } } { - double coeffs[][4] = { - {-1.2, -1.9, -3.0, -3.2}, {0.4, 2.0, 4.6, 2.3}, {1.0, -1.0, 1.0, -1.0}, {4.0, 2.4, -8.1, -2.1}, {10.1, -6.3, 1.1, 8.3}}; + double coeffs[][4] = {{-1.2, -1.9, -3.0, -3.2}, + {0.4, 2.0, 4.6, 2.3}, + {1.0, -1.0, 1.0, -1.0}, + {4.0, 2.4, -8.1, -2.1}, + {10.1, -6.3, 1.1, 8.3}}; for (unsigned int i = 0; i < sizeof(ranges) / sizeof(ranges[0]); ++i) { for (unsigned int j = 0; j < sizeof(coeffs) / sizeof(coeffs[0]); ++j) { @@ -407,28 +454,36 @@ void CIntegrationTest::testAllSingleVariate() { double actual; - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); } } @@ -451,25 +506,32 @@ void CIntegrationTest::testAllSingleVariate() { double actual; - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); } } @@ -492,22 +554,28 @@ void CIntegrationTest::testAllSingleVariate() { double actual; - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); } } @@ -530,19 +598,24 @@ void CIntegrationTest::testAllSingleVariate() { double actual; - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); } } @@ -565,16 +638,20 @@ void CIntegrationTest::testAllSingleVariate() { double actual; - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); } } @@ -597,24 +674,28 @@ void CIntegrationTest::testAllSingleVariate() { double actual; - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); } } } { - double coeffs[][10] = {{-1.1, -0.9, -4.0, -1.2, -0.2, -1.1, -0.1, -2.0, -0.1, -3.4}, - {20.4, 6.0, 2.6, 0.3, 0.7, 2.3, 1.0, 3.0, 10.0, 1.3}, - {1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0}, - {3.0, 2.4, -8.1, -2.1, 1.4, -3.1, 2.1, -2.1, -1.0, 1.1}, - {10.1, -5.3, 2.1, 4.3, -7.1, 0.4, -0.5, 0.3, 0.3, -5.0}}; + double coeffs[][10] = { + {-1.1, -0.9, -4.0, -1.2, -0.2, -1.1, -0.1, -2.0, -0.1, -3.4}, + {20.4, 6.0, 2.6, 0.3, 0.7, 2.3, 1.0, 3.0, 10.0, 1.3}, + {1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0}, + {3.0, 2.4, -8.1, -2.1, 1.4, -3.1, 2.1, -2.1, -1.0, 1.1}, + {10.1, -5.3, 2.1, 4.3, -7.1, 0.4, -0.5, 0.3, 0.3, -5.0}}; for (unsigned int i = 0; i < sizeof(ranges) / sizeof(ranges[0]); ++i) { for (unsigned int j = 0; j < sizeof(coeffs) / sizeof(coeffs[0]); ++j) { @@ -626,21 +707,24 @@ void CIntegrationTest::testAllSingleVariate() { double actual; - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); } } } { - double coeffs[][11] = {{-1.1, -0.9, -4.0, -1.2, -0.2, -1.1, -0.1, -2.0, -0.1, -3.4, -0.9}, - {20.4, 6.0, 2.6, 0.3, 0.7, 2.3, 1.0, 3.0, 10.0, 1.3, 2.0}, - {1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0}, - {3.0, 2.4, -8.1, -2.1, 1.4, -3.1, 2.1, -2.1, -1.0, 1.1, 3.1}, - {10.1, -5.3, 2.1, 4.3, -7.1, 0.4, -0.5, 0.3, 0.3, -5.0, -0.1}}; + double coeffs[][11] = { + {-1.1, -0.9, -4.0, -1.2, -0.2, -1.1, -0.1, -2.0, -0.1, -3.4, -0.9}, + {20.4, 6.0, 2.6, 0.3, 0.7, 2.3, 1.0, 3.0, 10.0, 1.3, 2.0}, + {1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0}, + {3.0, 2.4, -8.1, -2.1, 1.4, -3.1, 2.1, -2.1, -1.0, 1.1, 3.1}, + {10.1, -5.3, 2.1, 4.3, -7.1, 0.4, -0.5, 0.3, 0.3, -5.0, -0.1}}; for (unsigned int i = 0; i < sizeof(ranges) / sizeof(ranges[0]); ++i) { for (unsigned int j = 0; j < sizeof(coeffs) / sizeof(coeffs[0]); ++j) { @@ -652,7 +736,8 @@ void CIntegrationTest::testAllSingleVariate() { double actual; - CPPUNIT_ASSERT(CIntegration::gaussLegendre(f, ranges[i][0], ranges[i][1], actual)); + CPPUNIT_ASSERT(CIntegration::gaussLegendre( + f, ranges[i][0], ranges[i][1], actual)); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, EPS); } } @@ -673,15 +758,18 @@ void CIntegrationTest::testAdaptive() { CSmoothHeavySide heavySide(10.0, 20.0); TDoubleDoublePr intervals_[] = { - TDoubleDoublePr(0.0, 10.0), TDoubleDoublePr(10.0, 20.0), TDoubleDoublePr(20.0, 30.0), TDoubleDoublePr(30.0, 40.0)}; + TDoubleDoublePr(0.0, 10.0), TDoubleDoublePr(10.0, 20.0), + TDoubleDoublePr(20.0, 30.0), TDoubleDoublePr(30.0, 40.0)}; TDoubleDoublePrVec intervals(boost::begin(intervals_), boost::end(intervals_)); TDoubleVec fIntervals(intervals.size()); for (std::size_t i = 0u; i < intervals.size(); ++i) { - CIntegration::gaussLegendre(heavySide, intervals[i].first, intervals[i].second, fIntervals[i]); + CIntegration::gaussLegendre( + heavySide, intervals[i].first, intervals[i].second, fIntervals[i]); } double result = 0.0; - CIntegration::adaptiveGaussLegendre(heavySide, intervals, fIntervals, 3, 5, 0.01, result); + CIntegration::adaptiveGaussLegendre( + heavySide, intervals, fIntervals, 3, 5, 0.01, result); LOG_DEBUG(<< "expectedResult = 20.0"); LOG_DEBUG(<< "result = " << result); CPPUNIT_ASSERT_DOUBLES_EQUAL(20.0, result, 0.01 * 20.0); @@ -700,15 +788,18 @@ void CIntegrationTest::testAdaptive() { } TDoubleDoublePr intervals_[] = { - TDoubleDoublePr(0.0, 10.0), TDoubleDoublePr(10.0, 20.0), TDoubleDoublePr(20.0, 30.0), TDoubleDoublePr(30.0, 40.0)}; + TDoubleDoublePr(0.0, 10.0), TDoubleDoublePr(10.0, 20.0), + TDoubleDoublePr(20.0, 30.0), TDoubleDoublePr(30.0, 40.0)}; TDoubleDoublePrVec intervals(boost::begin(intervals_), boost::end(intervals_)); TDoubleVec fIntervals(intervals.size()); for (std::size_t i = 0u; i < intervals.size(); ++i) { - CIntegration::gaussLegendre(normal, intervals[i].first, intervals[i].second, fIntervals[i]); + CIntegration::gaussLegendre( + normal, intervals[i].first, intervals[i].second, fIntervals[i]); } double result = 0.0; - CIntegration::adaptiveGaussLegendre(normal, intervals, fIntervals, 3, 5, 0.0001, result); + CIntegration::adaptiveGaussLegendre( + normal, intervals, fIntervals, 3, 5, 0.0001, result); LOG_DEBUG(<< "expectedResult = " << expectedResult); LOG_DEBUG(<< "result = " << result); CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedResult, result, 0.0001 * expectedResult); @@ -724,19 +815,23 @@ void CIntegrationTest::testAdaptive() { double expectedResult = 0.0; for (std::size_t i = 0u; i < 400; ++i) { double fi; - CIntegration::gaussLegendre(f, 0.1 * static_cast(i), 0.1 * static_cast(i + 1), fi); + CIntegration::gaussLegendre( + f, 0.1 * static_cast(i), 0.1 * static_cast(i + 1), fi); expectedResult += fi; } - TDoubleDoublePr intervals_[] = {TDoubleDoublePr(0.0, 20.0), TDoubleDoublePr(20.0, 40.0)}; + TDoubleDoublePr intervals_[] = {TDoubleDoublePr(0.0, 20.0), + TDoubleDoublePr(20.0, 40.0)}; TDoubleDoublePrVec intervals(boost::begin(intervals_), boost::end(intervals_)); TDoubleVec fIntervals(intervals.size()); for (std::size_t i = 0u; i < intervals.size(); ++i) { - CIntegration::gaussLegendre(f, intervals[i].first, intervals[i].second, fIntervals[i]); + CIntegration::gaussLegendre( + f, intervals[i].first, intervals[i].second, fIntervals[i]); } double result = 0.0; - CIntegration::adaptiveGaussLegendre(f, intervals, fIntervals, 3, 5, 0.0001, result); + CIntegration::adaptiveGaussLegendre( + f, intervals, fIntervals, 3, 5, 0.0001, result); LOG_DEBUG(<< "expectedResult = " << expectedResult); LOG_DEBUG(<< "result = " << result); CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedResult, result, 0.0001 * expectedResult); @@ -756,9 +851,11 @@ void CIntegrationTest::testSparseGrid() { TDoubleVec expectedWeights; TDoubleVecVec expectedPoints; - CPPUNIT_ASSERT(readGrid("testfiles/sparse_guass_quadrature_test_d2_l1", expectedWeights, expectedPoints)); + CPPUNIT_ASSERT(readGrid("testfiles/sparse_guass_quadrature_test_d2_l1", + expectedWeights, expectedPoints)); - using TSparse2do1 = CIntegration::CSparseGaussLegendreQuadrature; + using TSparse2do1 = + CIntegration::CSparseGaussLegendreQuadrature; const TSparse2do1& sparse = TSparse2do1::instance(); @@ -768,11 +865,13 @@ void CIntegrationTest::testSparseGrid() { for (std::size_t i = 0u; i < expectedWeights.size(); ++i) { LOG_DEBUG(<< "weight = " << (sparse.weights())[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedWeights[i], (sparse.weights())[i] / 4.0, 1e-6); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedWeights[i], + (sparse.weights())[i] / 4.0, 1e-6); LOG_DEBUG(<< "point = " << (sparse.points())[i]); for (std::size_t j = 0u; j < expectedPoints[i].size(); ++j) { - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedPoints[i][j], 0.5 + (sparse.points())[i](j) / 2.0, 1e-6); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedPoints[i][j], + 0.5 + (sparse.points())[i](j) / 2.0, 1e-6); } } } @@ -782,9 +881,11 @@ void CIntegrationTest::testSparseGrid() { TDoubleVec expectedWeights; TDoubleVecVec expectedPoints; - CPPUNIT_ASSERT(readGrid("testfiles/sparse_guass_quadrature_test_d2_l2", expectedWeights, expectedPoints)); + CPPUNIT_ASSERT(readGrid("testfiles/sparse_guass_quadrature_test_d2_l2", + expectedWeights, expectedPoints)); - using TSparse2do2 = CIntegration::CSparseGaussLegendreQuadrature; + using TSparse2do2 = + CIntegration::CSparseGaussLegendreQuadrature; const TSparse2do2& sparse = TSparse2do2::instance(); @@ -794,11 +895,13 @@ void CIntegrationTest::testSparseGrid() { for (std::size_t i = 0u; i < expectedWeights.size(); ++i) { LOG_DEBUG(<< "weight = " << (sparse.weights())[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedWeights[i], (sparse.weights())[i] / 4.0, 1e-6); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedWeights[i], + (sparse.weights())[i] / 4.0, 1e-6); LOG_DEBUG(<< "point = " << (sparse.points())[i]); for (std::size_t j = 0u; j < expectedPoints[i].size(); ++j) { - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedPoints[i][j], 0.5 + (sparse.points())[i](j) / 2.0, 1e-6); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedPoints[i][j], + 0.5 + (sparse.points())[i](j) / 2.0, 1e-6); } } } @@ -808,9 +911,11 @@ void CIntegrationTest::testSparseGrid() { TDoubleVec expectedWeights; TDoubleVecVec expectedPoints; - CPPUNIT_ASSERT(readGrid("testfiles/sparse_guass_quadrature_test_d2_l4", expectedWeights, expectedPoints)); + CPPUNIT_ASSERT(readGrid("testfiles/sparse_guass_quadrature_test_d2_l4", + expectedWeights, expectedPoints)); - using TSparse2do4 = CIntegration::CSparseGaussLegendreQuadrature; + using TSparse2do4 = + CIntegration::CSparseGaussLegendreQuadrature; const TSparse2do4& sparse = TSparse2do4::instance(); @@ -820,11 +925,13 @@ void CIntegrationTest::testSparseGrid() { for (std::size_t i = 0u; i < expectedWeights.size(); ++i) { LOG_DEBUG(<< "weight = " << (sparse.weights())[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedWeights[i], (sparse.weights())[i] / 4.0, 1e-6); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedWeights[i], + (sparse.weights())[i] / 4.0, 1e-6); LOG_DEBUG(<< "point = " << (sparse.points())[i]); for (std::size_t j = 0u; j < expectedPoints[i].size(); ++j) { - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedPoints[i][j], 0.5 + (sparse.points())[i](j) / 2.0, 1e-6); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedPoints[i][j], + 0.5 + (sparse.points())[i](j) / 2.0, 1e-6); } } } @@ -834,9 +941,11 @@ void CIntegrationTest::testSparseGrid() { TDoubleVec expectedWeights; TDoubleVecVec expectedPoints; - CPPUNIT_ASSERT(readGrid("testfiles/sparse_guass_quadrature_test_d7_l3", expectedWeights, expectedPoints)); + CPPUNIT_ASSERT(readGrid("testfiles/sparse_guass_quadrature_test_d7_l3", + expectedWeights, expectedPoints)); - using TSparse7do3 = CIntegration::CSparseGaussLegendreQuadrature; + using TSparse7do3 = + CIntegration::CSparseGaussLegendreQuadrature; const TSparse7do3& sparse = TSparse7do3::instance(); @@ -846,11 +955,13 @@ void CIntegrationTest::testSparseGrid() { for (std::size_t i = 0u; i < expectedWeights.size(); ++i) { LOG_DEBUG(<< "weight = " << (sparse.weights())[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedWeights[i], (sparse.weights())[i] / std::pow(2.0, 7.0), 1e-6); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + expectedWeights[i], (sparse.weights())[i] / std::pow(2.0, 7.0), 1e-6); LOG_DEBUG(<< "point = " << (sparse.points())[i]); for (std::size_t j = 0u; j < expectedPoints[i].size(); ++j) { - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedPoints[i][j], 0.5 + (sparse.points())[i](j) / 2.0, 1e-6); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedPoints[i][j], + 0.5 + (sparse.points())[i](j) / 2.0, 1e-6); } } } @@ -860,9 +971,11 @@ void CIntegrationTest::testSparseGrid() { TDoubleVec expectedWeights; TDoubleVecVec expectedPoints; - CPPUNIT_ASSERT(readGrid("testfiles/sparse_guass_quadrature_test_d7_l5", expectedWeights, expectedPoints)); + CPPUNIT_ASSERT(readGrid("testfiles/sparse_guass_quadrature_test_d7_l5", + expectedWeights, expectedPoints)); - using TSparse7do5 = CIntegration::CSparseGaussLegendreQuadrature; + using TSparse7do5 = + CIntegration::CSparseGaussLegendreQuadrature; const TSparse7do5& sparse = TSparse7do5::instance(); @@ -874,13 +987,15 @@ void CIntegrationTest::testSparseGrid() { if (i % 10 == 0) { LOG_DEBUG(<< "weight = " << (sparse.weights())[i]); } - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedWeights[i], (sparse.weights())[i] / std::pow(2.0, 7.0), 1e-6); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + expectedWeights[i], (sparse.weights())[i] / std::pow(2.0, 7.0), 1e-6); if (i % 10 == 0) { LOG_DEBUG(<< "point = " << (sparse.points())[i]); } for (std::size_t j = 0u; j < expectedPoints[i].size(); ++j) { - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedPoints[i][j], 0.5 + (sparse.points())[i](j) / 2.0, 1e-6); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedPoints[i][j], + 0.5 + (sparse.points())[i](j) / 2.0, 1e-6); } } } @@ -888,40 +1003,46 @@ void CIntegrationTest::testSparseGrid() { unsigned int dimensions[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; unsigned int order[] = {1, 2, 3, 4, 5}; - std::size_t expectedNumberPoints[][5] = {{1, 2, 3, 4, 5}, - {1, 5, 13, 29, 53}, - {1, 7, 25, 69, 165}, - {1, 9, 41, 137, 385}, - {1, 11, 61, 241, 781}, - {1, 13, 85, 389, 1433}, - {1, 15, 113, 589, 2437}, - {1, 17, 145, 849, 3905}, - {1, 19, 181, 1177, 5965}, - {1, 21, 221, 1581, 8761}}; + std::size_t expectedNumberPoints[][5] = { + {1, 2, 3, 4, 5}, {1, 5, 13, 29, 53}, + {1, 7, 25, 69, 165}, {1, 9, 41, 137, 385}, + {1, 11, 61, 241, 781}, {1, 13, 85, 389, 1433}, + {1, 15, 113, 589, 2437}, {1, 17, 145, 849, 3905}, + {1, 19, 181, 1177, 5965}, {1, 21, 221, 1581, 8761}}; for (std::size_t i = 0u; i < boost::size(dimensions); ++i) { LOG_DEBUG(<< "DIMENSION = " << dimensions[i]); -#define NUMBER_POINTS(dimension, n) \ - switch (order[j]) { \ - case 1: \ - n = CIntegration::CSparseGaussLegendreQuadrature::instance().points().size(); \ - break; \ - case 2: \ - n = CIntegration::CSparseGaussLegendreQuadrature::instance().points().size(); \ - break; \ - case 3: \ - n = CIntegration::CSparseGaussLegendreQuadrature::instance().points().size(); \ - break; \ - case 4: \ - n = CIntegration::CSparseGaussLegendreQuadrature::instance().points().size(); \ - break; \ - case 5: \ - n = CIntegration::CSparseGaussLegendreQuadrature::instance().points().size(); \ - break; \ - default: \ - n = 0; \ - break; \ +#define NUMBER_POINTS(dimension, n) \ + switch (order[j]) { \ + case 1: \ + n = CIntegration::CSparseGaussLegendreQuadrature::instance() \ + .points() \ + .size(); \ + break; \ + case 2: \ + n = CIntegration::CSparseGaussLegendreQuadrature::instance() \ + .points() \ + .size(); \ + break; \ + case 3: \ + n = CIntegration::CSparseGaussLegendreQuadrature::instance() \ + .points() \ + .size(); \ + break; \ + case 4: \ + n = CIntegration::CSparseGaussLegendreQuadrature::instance() \ + .points() \ + .size(); \ + break; \ + case 5: \ + n = CIntegration::CSparseGaussLegendreQuadrature::instance() \ + .points() \ + .size(); \ + break; \ + default: \ + n = 0; \ + break; \ } for (std::size_t j = 0u; j < boost::size(order); ++j) { LOG_DEBUG(<< "ORDER = " << order[j]); @@ -964,7 +1085,8 @@ void CIntegrationTest::testSparseGrid() { } #undef NUMBER_POINTS - LOG_DEBUG(<< "number points: actual = " << numberPoints << ", expected = " << expectedNumberPoints[i][j]); + LOG_DEBUG(<< "number points: actual = " << numberPoints + << ", expected = " << expectedNumberPoints[i][j]); CPPUNIT_ASSERT_EQUAL(expectedNumberPoints[i][j], numberPoints); } } @@ -1004,8 +1126,10 @@ void CIntegrationTest::testMultivariateSmooth() { CMultivariatePolynomialFunction polynomial; for (std::size_t i = 0u; i < n; ++i) { double c = static_cast(coefficients[i]); - double p[] = {static_cast(powers[DIMENSION * i + 0]), static_cast(powers[DIMENSION * i + 1])}; - if (std::accumulate(p, p + DIMENSION, 0.0) > (2.0 * static_cast(l) - 1.0)) { + double p[] = {static_cast(powers[DIMENSION * i + 0]), + static_cast(powers[DIMENSION * i + 1])}; + if (std::accumulate(p, p + DIMENSION, 0.0) > + (2.0 * static_cast(l) - 1.0)) { continue; } polynomial.add(c, p); @@ -1029,19 +1153,23 @@ void CIntegrationTest::testMultivariateSmooth() { switch (l) { case 2: successful = - CIntegration::sparseGaussLegendre(polynomial, a, b, actual); + CIntegration::sparseGaussLegendre( + polynomial, a, b, actual); break; case 3: successful = - CIntegration::sparseGaussLegendre(polynomial, a, b, actual); + CIntegration::sparseGaussLegendre( + polynomial, a, b, actual); break; case 4: successful = - CIntegration::sparseGaussLegendre(polynomial, a, b, actual); + CIntegration::sparseGaussLegendre( + polynomial, a, b, actual); break; case 5: successful = - CIntegration::sparseGaussLegendre(polynomial, a, b, actual); + CIntegration::sparseGaussLegendre( + polynomial, a, b, actual); break; default: break; @@ -1080,7 +1208,8 @@ void CIntegrationTest::testMultivariateSmooth() { static_cast(powers[5 * i + 2]), static_cast(powers[5 * i + 3]), static_cast(powers[5 * i + 4])}; - if (std::accumulate(p, p + DIMENSION, 0.0) > (2.0 * static_cast(l) - 1.0)) { + if (std::accumulate(p, p + DIMENSION, 0.0) > + (2.0 * static_cast(l) - 1.0)) { continue; } polynomial.add(c, p); @@ -1104,19 +1233,23 @@ void CIntegrationTest::testMultivariateSmooth() { switch (l) { case 2: successful = - CIntegration::sparseGaussLegendre(polynomial, a, b, actual); + CIntegration::sparseGaussLegendre( + polynomial, a, b, actual); break; case 3: successful = - CIntegration::sparseGaussLegendre(polynomial, a, b, actual); + CIntegration::sparseGaussLegendre( + polynomial, a, b, actual); break; case 4: successful = - CIntegration::sparseGaussLegendre(polynomial, a, b, actual); + CIntegration::sparseGaussLegendre( + polynomial, a, b, actual); break; case 5: successful = - CIntegration::sparseGaussLegendre(polynomial, a, b, actual); + CIntegration::sparseGaussLegendre( + polynomial, a, b, actual); break; default: break; @@ -1134,12 +1267,14 @@ void CIntegrationTest::testMultivariateSmooth() { CppUnit::Test* CIntegrationTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CIntegrationTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CIntegrationTest::testAllSingleVariate", &CIntegrationTest::testAllSingleVariate)); - suiteOfTests->addTest(new CppUnit::TestCaller("CIntegrationTest::testAdaptive", &CIntegrationTest::testAdaptive)); - suiteOfTests->addTest(new CppUnit::TestCaller("CIntegrationTest::testSparseGrid", &CIntegrationTest::testSparseGrid)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CIntegrationTest::testMultivariateSmooth", &CIntegrationTest::testMultivariateSmooth)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CIntegrationTest::testAllSingleVariate", &CIntegrationTest::testAllSingleVariate)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CIntegrationTest::testAdaptive", &CIntegrationTest::testAdaptive)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CIntegrationTest::testSparseGrid", &CIntegrationTest::testSparseGrid)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CIntegrationTest::testMultivariateSmooth", &CIntegrationTest::testMultivariateSmooth)); return suiteOfTests; } diff --git a/lib/maths/unittest/CKMeansFastTest.cc b/lib/maths/unittest/CKMeansFastTest.cc index 2937d9b3d7..64337321f8 100644 --- a/lib/maths/unittest/CKMeansFastTest.cc +++ b/lib/maths/unittest/CKMeansFastTest.cc @@ -71,7 +71,8 @@ struct SKdTreeDataInvariantsChecker { } CPPUNIT_ASSERT_EQUAL(bb.print(), node.boundingBox().print()); - CPPUNIT_ASSERT_EQUAL(maths::CBasicStatistics::print(centroid), maths::CBasicStatistics::print(node.centroid())); + CPPUNIT_ASSERT_EQUAL(maths::CBasicStatistics::print(centroid), + maths::CBasicStatistics::print(node.centroid())); } }; @@ -85,7 +86,8 @@ class CCentreFilterChecker { public: CCentreFilterChecker(const TPointVec& centres, std::size_t& numberAdmitted) - : m_Centres(centres), m_CentreFilter(centres), m_NumberAdmitted(numberAdmitted) {} + : m_Centres(centres), m_CentreFilter(centres), + m_NumberAdmitted(numberAdmitted) {} bool operator()(const typename maths::CKdTree::SNode& node) const { using TDoubleSizePr = std::pair; @@ -97,7 +99,8 @@ class CCentreFilterChecker { closest.add(TDoubleSizePr((m_Centres[i] - node.s_Point).euclidean(), i)); } closest.sort(); - if (std::find(filtered.begin(), filtered.end(), closest[0].second) == filtered.end()) { + if (std::find(filtered.begin(), filtered.end(), closest[0].second) == + filtered.end()) { LOG_DEBUG(<< "filtered = " << core::CContainerPrinter::print(filtered)); LOG_DEBUG(<< "closest = " << closest.print()); CPPUNIT_ASSERT(false); @@ -247,7 +250,9 @@ void CKMeansFastTest::testFilter() { std::size_t numberAdmitted = 0; CCentreFilterChecker checker(centres, numberAdmitted); tree.preorderDepthFirst(checker); - double speedup = static_cast(points.size()) * static_cast(centres.size()) / static_cast(numberAdmitted); + double speedup = static_cast(points.size()) * + static_cast(centres.size()) / + static_cast(numberAdmitted); LOG_DEBUG(<< " speedup = " << speedup); CPPUNIT_ASSERT(speedup > 30.0); } @@ -271,7 +276,9 @@ void CKMeansFastTest::testFilter() { std::size_t numberAdmitted = 0; CCentreFilterChecker checker(centres, numberAdmitted); tree.preorderDepthFirst(checker); - double speedup = static_cast(points.size()) * static_cast(centres.size()) / static_cast(numberAdmitted); + double speedup = static_cast(points.size()) * + static_cast(centres.size()) / + static_cast(numberAdmitted); LOG_DEBUG(<< " speedup = " << speedup); CPPUNIT_ASSERT(speedup > 5.5); } @@ -319,9 +326,12 @@ void CKMeansFastTest::testCentroids() { for (std::size_t j = 0u; j < points.size(); ++j) { expectedCentroids[closest(centres, points[j]).first].add(points[j]); } - LOG_DEBUG(<< " expected centroids = " << core::CContainerPrinter::print(expectedCentroids)); - LOG_DEBUG(<< " centroids = " << core::CContainerPrinter::print(centroids)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedCentroids), core::CContainerPrinter::print(centroids)); + LOG_DEBUG(<< " expected centroids = " + << core::CContainerPrinter::print(expectedCentroids)); + LOG_DEBUG(<< " centroids = " + << core::CContainerPrinter::print(centroids)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedCentroids), + core::CContainerPrinter::print(centroids)); } { LOG_DEBUG(<< "Vector4"); @@ -346,9 +356,12 @@ void CKMeansFastTest::testCentroids() { for (std::size_t j = 0u; j < points.size(); ++j) { expectedCentroids[closest(centres, points[j]).first].add(points[j]); } - LOG_DEBUG(<< " expected centroids = " << core::CContainerPrinter::print(expectedCentroids)); - LOG_DEBUG(<< " centroids = " << core::CContainerPrinter::print(centroids)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedCentroids), core::CContainerPrinter::print(centroids)); + LOG_DEBUG(<< " expected centroids = " + << core::CContainerPrinter::print(expectedCentroids)); + LOG_DEBUG(<< " centroids = " + << core::CContainerPrinter::print(centroids)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedCentroids), + core::CContainerPrinter::print(centroids)); } } } @@ -387,7 +400,8 @@ void CKMeansFastTest::testClosestPoints() { tree.postorderDepthFirst(CKMeansFastForTest::TDataPropagator()); TVector2VecVec closestPoints; - CKMeansFastForTest::TClosestPointsCollector collector(points.size(), centres, closestPoints); + CKMeansFastForTest::TClosestPointsCollector collector( + points.size(), centres, closestPoints); tree.postorderDepthFirst(collector); for (std::size_t j = 0u; j < closestPoints.size(); ++j) { @@ -411,7 +425,8 @@ void CKMeansFastTest::testClosestPoints() { tree.postorderDepthFirst(CKMeansFastForTest::TDataPropagator()); TVector4VecVec closestPoints; - CKMeansFastForTest::TClosestPointsCollector collector(points.size(), centres, closestPoints); + CKMeansFastForTest::TClosestPointsCollector collector( + points.size(), centres, closestPoints); tree.postorderDepthFirst(collector); for (std::size_t j = 0u; j < closestPoints.size(); ++j) { @@ -463,9 +478,11 @@ void CKMeansFastTest::testRun() { LOG_DEBUG(<< "converged = " << converged); LOG_DEBUG(<< "fast converged = " << fastConverged); LOG_DEBUG(<< "centres = " << core::CContainerPrinter::print(centres)); - LOG_DEBUG(<< "fast centres = " << core::CContainerPrinter::print(kmeansFast.centres())); + LOG_DEBUG(<< "fast centres = " + << core::CContainerPrinter::print(kmeansFast.centres())); CPPUNIT_ASSERT_EQUAL(converged, fastConverged); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(centres), core::CContainerPrinter::print(kmeansFast.centres())); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(centres), + core::CContainerPrinter::print(kmeansFast.centres())); } } } @@ -483,8 +500,9 @@ void CKMeansFastTest::testRunWithSphericalClusters() { using TSphericalCluster2Vec = std::vector; using TMeanVar2Accumulator = maths::CBasicStatistics::SSampleMeanVar::TAccumulator; - double means[][2] = { - {1.0, 1.0}, {2.0, 1.5}, {1.5, 1.5}, {1.9, 1.5}, {1.0, 1.5}, {10.0, 15.0}, {12.0, 13.5}, {12.0, 11.5}, {14.0, 10.5}}; + double means[][2] = {{1.0, 1.0}, {2.0, 1.5}, {1.5, 1.5}, + {1.9, 1.5}, {1.0, 1.5}, {10.0, 15.0}, + {12.0, 13.5}, {12.0, 11.5}, {14.0, 10.5}}; std::size_t counts[] = {10, 15, 5, 8, 17, 10, 11, 8, 12}; double lowerTriangle[] = {1.0, 0.0, 1.0}; @@ -534,13 +552,17 @@ void CKMeansFastTest::testRunWithSphericalClusters() { TVector2Vec kmeansPointsCentres = kmeansPoints.centres(); TSphericalCluster2Vec kmeansClustersCentres_ = kmeansClusters.centres(); - TVector2Vec kmeansClustersCentres(kmeansClustersCentres_.begin(), kmeansClustersCentres_.end()); + TVector2Vec kmeansClustersCentres(kmeansClustersCentres_.begin(), + kmeansClustersCentres_.end()); std::sort(kmeansPointsCentres.begin(), kmeansPointsCentres.end()); std::sort(kmeansClustersCentres.begin(), kmeansClustersCentres.end()); - LOG_DEBUG(<< "k-means points = " << core::CContainerPrinter::print(kmeansPointsCentres)); - LOG_DEBUG(<< "k-means clusters = " << core::CContainerPrinter::print(kmeansClustersCentres)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(kmeansPointsCentres), core::CContainerPrinter::print(kmeansClustersCentres)); + LOG_DEBUG(<< "k-means points = " + << core::CContainerPrinter::print(kmeansPointsCentres)); + LOG_DEBUG(<< "k-means clusters = " + << core::CContainerPrinter::print(kmeansClustersCentres)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(kmeansPointsCentres), + core::CContainerPrinter::print(kmeansClustersCentres)); } } @@ -597,14 +619,16 @@ void CKMeansFastTest::testPlusPlus() { TVector2Vec plusPlusCentres; maths::CPRNG::CXorOShiro128Plus rng_; - maths::CKMeansPlusPlusInitialization kmeansPlusPlus(rng_); + maths::CKMeansPlusPlusInitialization kmeansPlusPlus( + rng_); kmeansPlusPlus.run(flatPoints, k, plusPlusCentres); TSizeVec sampledClusters; for (std::size_t i = 0u; i < plusPlusCentres.size(); ++i) { std::size_t j = 0u; for (/**/; j < points.size(); ++j) { - TVector2VecCItr next = std::lower_bound(points[j].begin(), points[j].end(), plusPlusCentres[i]); + TVector2VecCItr next = std::lower_bound( + points[j].begin(), points[j].end(), plusPlusCentres[i]); if (next != points[j].end() && *next == plusPlusCentres[i]) { break; } @@ -612,7 +636,9 @@ void CKMeansFastTest::testPlusPlus() { sampledClusters.push_back(j); } std::sort(sampledClusters.begin(), sampledClusters.end()); - sampledClusters.erase(std::unique(sampledClusters.begin(), sampledClusters.end()), sampledClusters.end()); + sampledClusters.erase( + std::unique(sampledClusters.begin(), sampledClusters.end()), + sampledClusters.end()); CPPUNIT_ASSERT(sampledClusters.size() >= 2); numberClustersSampled.add(static_cast(sampledClusters.size())); @@ -645,7 +671,8 @@ void CKMeansFastTest::testPlusPlus() { maxSSRRatio = std::max(maxSSRRatio, ssrPlusPlus / ssrRandom); } - LOG_DEBUG(<< "# clusters sampled = " << maths::CBasicStatistics::mean(numberClustersSampled)); + LOG_DEBUG(<< "# clusters sampled = " + << maths::CBasicStatistics::mean(numberClustersSampled)); LOG_DEBUG(<< "min ratio = " << minSSRRatio); LOG_DEBUG(<< "mean ratio = " << maths::CBasicStatistics::mean(meanSSRRatio)); LOG_DEBUG(<< "max ratio = " << maxSSRRatio); @@ -653,22 +680,28 @@ void CKMeansFastTest::testPlusPlus() { CPPUNIT_ASSERT(minSSRRatio < 0.14); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanSSRRatio) < 0.9); CPPUNIT_ASSERT(maxSSRRatio < 9.0); - CPPUNIT_ASSERT_DOUBLES_EQUAL(4.0, maths::CBasicStatistics::mean(numberClustersSampled), 0.3); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + 4.0, maths::CBasicStatistics::mean(numberClustersSampled), 0.3); } CppUnit::Test* CKMeansFastTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CKMeansFastTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CKMeansFastTest::testDataPropagation", &CKMeansFastTest::testDataPropagation)); - suiteOfTests->addTest(new CppUnit::TestCaller("CKMeansFastTest::testFilter", &CKMeansFastTest::testFilter)); - suiteOfTests->addTest(new CppUnit::TestCaller("CKMeansFastTest::testCentroids", &CKMeansFastTest::testCentroids)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CKMeansFastTest::testClosestPoints", &CKMeansFastTest::testClosestPoints)); - suiteOfTests->addTest(new CppUnit::TestCaller("CKMeansFastTest::testRun", &CKMeansFastTest::testRun)); - suiteOfTests->addTest(new CppUnit::TestCaller("CKMeansFastTest::testRunWithSphericalClusters", - &CKMeansFastTest::testRunWithSphericalClusters)); - suiteOfTests->addTest(new CppUnit::TestCaller("CKMeansFastTest::testPlusPlus", &CKMeansFastTest::testPlusPlus)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CKMeansFastTest::testDataPropagation", &CKMeansFastTest::testDataPropagation)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CKMeansFastTest::testFilter", &CKMeansFastTest::testFilter)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CKMeansFastTest::testCentroids", &CKMeansFastTest::testCentroids)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CKMeansFastTest::testClosestPoints", &CKMeansFastTest::testClosestPoints)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CKMeansFastTest::testRun", &CKMeansFastTest::testRun)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CKMeansFastTest::testRunWithSphericalClusters", + &CKMeansFastTest::testRunWithSphericalClusters)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CKMeansFastTest::testPlusPlus", &CKMeansFastTest::testPlusPlus)); return suiteOfTests; } diff --git a/lib/maths/unittest/CKMeansOnlineTest.cc b/lib/maths/unittest/CKMeansOnlineTest.cc index 6a851632c5..6b72831127 100644 --- a/lib/maths/unittest/CKMeansOnlineTest.cc +++ b/lib/maths/unittest/CKMeansOnlineTest.cc @@ -40,16 +40,20 @@ class CKMeansOnlineTestForTest : public maths::CKMeansOnline { public: using TSphericalClusterVec = typename maths::CKMeansOnline::TSphericalClusterVec; using TDoubleMeanVarAccumulator = typename maths::CKMeansOnline::TDoubleMeanVarAccumulator; - using TFloatMeanAccumulatorDoublePr = typename maths::CKMeansOnline::TFloatMeanAccumulatorDoublePr; + using TFloatMeanAccumulatorDoublePr = + typename maths::CKMeansOnline::TFloatMeanAccumulatorDoublePr; public: - CKMeansOnlineTestForTest(std::size_t k, double decayRate = 0.0) : maths::CKMeansOnline(k, decayRate) {} + CKMeansOnlineTestForTest(std::size_t k, double decayRate = 0.0) + : maths::CKMeansOnline(k, decayRate) {} static void add(const POINT& x, double count, TFloatMeanAccumulatorDoublePr& cluster) { maths::CKMeansOnline::add(x, count, cluster); } - static double variance(const TDoubleMeanVarAccumulator& moments) { return maths::CKMeansOnline::variance(moments); } + static double variance(const TDoubleMeanVarAccumulator& moments) { + return maths::CKMeansOnline::variance(moments); + } }; template @@ -77,7 +81,8 @@ void CKMeansOnlineTest::testVariance() { rng.generateUniformSamples(0.0, 10.0, 50, coordinates); TVector5Vec points; for (std::size_t i = 0u; i < coordinates.size(); i += 5) { - double c[] = {coordinates[i + 0], coordinates[i + 1], coordinates[i + 2], coordinates[i + 3], coordinates[i + 4]}; + double c[] = {coordinates[i + 0], coordinates[i + 1], coordinates[i + 2], + coordinates[i + 3], coordinates[i + 4]}; points.push_back(TVector5(c)); } @@ -90,11 +95,13 @@ void CKMeansOnlineTest::testVariance() { } LOG_DEBUG(<< "actual = " << CKMeansOnlineTestForTest::variance(actual)); - LOG_DEBUG(<< "expected = " << maths::CBasicStatistics::maximumLikelihoodVariance(expected)); + LOG_DEBUG(<< "expected = " + << maths::CBasicStatistics::maximumLikelihoodVariance(expected)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::maximumLikelihoodVariance(expected), - CKMeansOnlineTestForTest::variance(actual), - 1e-10 * maths::CBasicStatistics::maximumLikelihoodVariance(expected)); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + maths::CBasicStatistics::maximumLikelihoodVariance(expected), + CKMeansOnlineTestForTest::variance(actual), + 1e-10 * maths::CBasicStatistics::maximumLikelihoodVariance(expected)); } } @@ -132,15 +139,21 @@ void CKMeansOnlineTest::testAdd() { TVector2 ones(1.0); - LOG_DEBUG(<< "actual = " << maths::CBasicStatistics::mean(actual.first) << "," << actual.second); - LOG_DEBUG(<< "expected = " << maths::CBasicStatistics::mean(expected) << "," - << maths::CBasicStatistics::maximumLikelihoodVariance(expected).inner(ones) / static_cast(ones.dimension())); + LOG_DEBUG(<< "actual = " << maths::CBasicStatistics::mean(actual.first) + << "," << actual.second); + LOG_DEBUG( + << "expected = " << maths::CBasicStatistics::mean(expected) << "," + << maths::CBasicStatistics::maximumLikelihoodVariance(expected).inner(ones) / + static_cast(ones.dimension())); - CPPUNIT_ASSERT_EQUAL(print(maths::CBasicStatistics::mean(expected)), print(maths::CBasicStatistics::mean(actual.first))); + CPPUNIT_ASSERT_EQUAL(print(maths::CBasicStatistics::mean(expected)), + print(maths::CBasicStatistics::mean(actual.first))); CPPUNIT_ASSERT_DOUBLES_EQUAL( - maths::CBasicStatistics::maximumLikelihoodVariance(expected).inner(ones) / static_cast(ones.dimension()), + maths::CBasicStatistics::maximumLikelihoodVariance(expected).inner(ones) / + static_cast(ones.dimension()), actual.second, - 1e-10 * maths::CBasicStatistics::maximumLikelihoodVariance(expected).inner(ones) / static_cast(ones.dimension())); + 1e-10 * maths::CBasicStatistics::maximumLikelihoodVariance(expected).inner(ones) / + static_cast(ones.dimension())); } } @@ -194,11 +207,15 @@ void CKMeansOnlineTest::testReduce() { LOG_DEBUG(<< "expected = " << expected); LOG_DEBUG(<< "actual = " << actual); - CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::count(expected), maths::CBasicStatistics::count(actual), 1e-10); - CPPUNIT_ASSERT_EQUAL(print(maths::CBasicStatistics::mean(expected)), print(maths::CBasicStatistics::mean(actual))); - CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::maximumLikelihoodVariance(expected).inner(ones), - maths::CBasicStatistics::maximumLikelihoodVariance(actual).inner(ones), - 1e-10 * maths::CBasicStatistics::maximumLikelihoodVariance(expected).inner(ones)); + CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::count(expected), + maths::CBasicStatistics::count(actual), 1e-10); + CPPUNIT_ASSERT_EQUAL(print(maths::CBasicStatistics::mean(expected)), + print(maths::CBasicStatistics::mean(actual))); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + maths::CBasicStatistics::maximumLikelihoodVariance(expected).inner(ones), + maths::CBasicStatistics::maximumLikelihoodVariance(actual).inner(ones), + 1e-10 * maths::CBasicStatistics::maximumLikelihoodVariance(expected) + .inner(ones)); } } } @@ -240,7 +257,8 @@ void CKMeansOnlineTest::testClustering() { TVector2VecVec clusters; maths::CPRNG::CXorOShiro128Plus rng_; for (std::size_t i = 0u; i < 10; ++i) { - maths::CKMeansPlusPlusInitialization seedCentres(rng_); + maths::CKMeansPlusPlusInitialization seedCentres( + rng_); seedCentres.run(points, 2, centres); kmeans.setCentres(centres); kmeans.run(10); @@ -273,9 +291,11 @@ void CKMeansOnlineTest::testClustering() { LOG_DEBUG(<< "cost = " << cost); LOG_DEBUG(<< "cost online = " << costOnline); - CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(costOnline), maths::CBasicStatistics::mean(cost), 1e-10); + CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(costOnline), + maths::CBasicStatistics::mean(cost), 1e-10); CPPUNIT_ASSERT_DOUBLES_EQUAL( - std::sqrt(maths::CBasicStatistics::variance(costOnline)), std::sqrt(maths::CBasicStatistics::variance(cost)), 1e-10); + std::sqrt(maths::CBasicStatistics::variance(costOnline)), + std::sqrt(maths::CBasicStatistics::variance(cost)), 1e-10); } { @@ -302,7 +322,8 @@ void CKMeansOnlineTest::testClustering() { TVector2VecVec clusters; maths::CPRNG::CXorOShiro128Plus rng_; for (std::size_t i = 0u; i < 10; ++i) { - maths::CKMeansPlusPlusInitialization seedCentres(rng_); + maths::CKMeansPlusPlusInitialization seedCentres( + rng_); seedCentres.run(points, 3, centres); kmeans.setCentres(centres); kmeans.run(10); @@ -334,7 +355,8 @@ void CKMeansOnlineTest::testClustering() { LOG_DEBUG(<< "cost = " << cost); LOG_DEBUG(<< "cost online = " << costOnline); - CPPUNIT_ASSERT(maths::CBasicStatistics::mean(costOnline) <= 1.01 * maths::CBasicStatistics::mean(cost)); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(costOnline) <= + 1.01 * maths::CBasicStatistics::mean(cost)); CPPUNIT_ASSERT(std::sqrt(maths::CBasicStatistics::variance(costOnline)) <= 26.0 * std::sqrt(maths::CBasicStatistics::variance(cost))); } @@ -401,7 +423,8 @@ void CKMeansOnlineTest::testSplit() { LOG_DEBUG(<< "expected clusters = " << core::CContainerPrinter::print(expected)); LOG_DEBUG(<< "actual clusters = " << core::CContainerPrinter::print(actual)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expected), core::CContainerPrinter::print(actual)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expected), + core::CContainerPrinter::print(actual)); } } @@ -431,7 +454,8 @@ void CKMeansOnlineTest::testMerge() { } } - maths::CKMeansOnline kmeans[] = {maths::CKMeansOnline(20), maths::CKMeansOnline(25)}; + maths::CKMeansOnline kmeans[] = {maths::CKMeansOnline(20), + maths::CKMeansOnline(25)}; for (std::size_t i = 0u; i < 2; ++i) { for (std::size_t j = 0u; j < points[i].size(); ++j) { kmeans[i].add(points[i][j]); @@ -460,11 +484,14 @@ void CKMeansOnlineTest::testMerge() { LOG_DEBUG(<< "expected = " << expected); LOG_DEBUG(<< "actual = " << actual); - CPPUNIT_ASSERT_EQUAL(maths::CBasicStatistics::count(expected), maths::CBasicStatistics::count(actual)); - CPPUNIT_ASSERT_EQUAL(print(maths::CBasicStatistics::mean(expected)), print(maths::CBasicStatistics::mean(actual))); - CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::maximumLikelihoodVariance(expected).inner(ones), - maths::CBasicStatistics::maximumLikelihoodVariance(actual).inner(ones), - 1e-10 * maths::CBasicStatistics::maximumLikelihoodVariance(expected).inner(ones)); + CPPUNIT_ASSERT_EQUAL(maths::CBasicStatistics::count(expected), + maths::CBasicStatistics::count(actual)); + CPPUNIT_ASSERT_EQUAL(print(maths::CBasicStatistics::mean(expected)), + print(maths::CBasicStatistics::mean(actual))); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + maths::CBasicStatistics::maximumLikelihoodVariance(expected).inner(ones), + maths::CBasicStatistics::maximumLikelihoodVariance(actual).inner(ones), + 1e-10 * maths::CBasicStatistics::maximumLikelihoodVariance(expected).inner(ones)); } void CKMeansOnlineTest::testPropagateForwardsByTime() { @@ -556,7 +583,8 @@ void CKMeansOnlineTest::testSample() { kmeans.sample(i + 1, sampled); std::sort(sampled.begin(), sampled.end()); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedSampled), core::CContainerPrinter::print(sampled)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedSampled), + core::CContainerPrinter::print(sampled)); } for (std::size_t i = 10u; i < samples.size(); ++i) { @@ -570,7 +598,8 @@ void CKMeansOnlineTest::testSample() { maths::CBasicStatistics::SSampleCovariances sampleCovariances[2]; for (std::size_t i = 0u; i < sampled.size(); ++i) { - if ((sampled[i] - TVector2(means[0])).euclidean() < (sampled[i] - TVector2(means[1])).euclidean()) { + if ((sampled[i] - TVector2(means[0])).euclidean() < + (sampled[i] - TVector2(means[1])).euclidean()) { sampleCovariances[0].add(sampled[i]); } else { sampleCovariances[1].add(sampled[i]); @@ -578,9 +607,11 @@ void CKMeansOnlineTest::testSample() { } TVector2 expectedMean0 = maths::CBasicStatistics::mean(expectedSampleCovariances[0]); - TMatrix2 expectedCovariance0 = maths::CBasicStatistics::covariances(expectedSampleCovariances[0]); + TMatrix2 expectedCovariance0 = + maths::CBasicStatistics::covariances(expectedSampleCovariances[0]); TVector2 expectedMean1 = maths::CBasicStatistics::mean(expectedSampleCovariances[1]); - TMatrix2 expectedCovariance1 = maths::CBasicStatistics::covariances(expectedSampleCovariances[1]); + TMatrix2 expectedCovariance1 = + maths::CBasicStatistics::covariances(expectedSampleCovariances[1]); TVector2 mean0 = maths::CBasicStatistics::mean(sampleCovariances[0]); TMatrix2 covariance0 = maths::CBasicStatistics::covariances(sampleCovariances[0]); TVector2 mean1 = maths::CBasicStatistics::mean(sampleCovariances[1]); @@ -592,13 +623,15 @@ void CKMeansOnlineTest::testSample() { LOG_DEBUG(<< "mean, variance 1 = " << mean1 << ", " << covariance1); double meanError0 = (mean0 - expectedMean0).euclidean() / expectedMean0.euclidean(); - double covarianceError0 = (covariance0 - expectedCovariance0).frobenius() / expectedCovariance0.frobenius(); + double covarianceError0 = (covariance0 - expectedCovariance0).frobenius() / + expectedCovariance0.frobenius(); LOG_DEBUG(<< "mean error 0 = " << meanError0 << ", covariance error 0 = " << covarianceError0); CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, meanError0, 0.01); CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, covarianceError0, 0.27); double meanError1 = (mean1 - expectedMean1).euclidean() / expectedMean0.euclidean(); - double covarianceError1 = (covariance1 - expectedCovariance1).frobenius() / expectedCovariance1.frobenius(); + double covarianceError1 = (covariance1 - expectedCovariance1).frobenius() / + expectedCovariance1.frobenius(); LOG_DEBUG(<< "mean error 1 = " << meanError1 << ", covariance error 1 = " << covarianceError1); CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, meanError1, 0.01); CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, covarianceError1, 0.24); @@ -639,15 +672,15 @@ void CKMeansOnlineTest::testPersist() { CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); maths::CKMeansOnline restoredKmeans(0); - maths::SDistributionRestoreParams params(maths_t::E_ContinuousData, - 0.1, - maths::MINIMUM_CLUSTER_SPLIT_FRACTION, - maths::MINIMUM_CLUSTER_SPLIT_COUNT, - maths::MINIMUM_CATEGORY_COUNT); + maths::SDistributionRestoreParams params( + maths_t::E_ContinuousData, 0.1, maths::MINIMUM_CLUSTER_SPLIT_FRACTION, + maths::MINIMUM_CLUSTER_SPLIT_COUNT, maths::MINIMUM_CATEGORY_COUNT); CPPUNIT_ASSERT(traverser.traverseSubLevel( - boost::bind(&maths::CKMeansOnline::acceptRestoreTraverser, &restoredKmeans, boost::cref(params), _1))); + boost::bind(&maths::CKMeansOnline::acceptRestoreTraverser, + &restoredKmeans, boost::cref(params), _1))); - LOG_DEBUG(<< "orig checksum = " << origKmeans.checksum() << ", new checksum = " << restoredKmeans.checksum()); + LOG_DEBUG(<< "orig checksum = " << origKmeans.checksum() + << ", new checksum = " << restoredKmeans.checksum()); CPPUNIT_ASSERT_EQUAL(origKmeans.checksum(), restoredKmeans.checksum()); std::string newXml; @@ -662,17 +695,25 @@ void CKMeansOnlineTest::testPersist() { CppUnit::Test* CKMeansOnlineTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CKMeansOnlineTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CKMeansOnlineTest::testVariance", &CKMeansOnlineTest::testVariance)); - suiteOfTests->addTest(new CppUnit::TestCaller("CKMeansOnlineTest::testAdd", &CKMeansOnlineTest::testAdd)); - suiteOfTests->addTest(new CppUnit::TestCaller("CKMeansOnlineTest::testReduce", &CKMeansOnlineTest::testReduce)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CKMeansOnlineTest::testClustering", &CKMeansOnlineTest::testClustering)); - suiteOfTests->addTest(new CppUnit::TestCaller("CKMeansOnlineTest::testSplit", &CKMeansOnlineTest::testSplit)); - suiteOfTests->addTest(new CppUnit::TestCaller("CKMeansOnlineTest::testMerge", &CKMeansOnlineTest::testMerge)); - suiteOfTests->addTest(new CppUnit::TestCaller("CKMeansOnlineTest::testPropagateForwardsByTime", - &CKMeansOnlineTest::testPropagateForwardsByTime)); - suiteOfTests->addTest(new CppUnit::TestCaller("CKMeansOnlineTest::testSample", &CKMeansOnlineTest::testSample)); - suiteOfTests->addTest(new CppUnit::TestCaller("CKMeansOnlineTest::testPersist", &CKMeansOnlineTest::testPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CKMeansOnlineTest::testVariance", &CKMeansOnlineTest::testVariance)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CKMeansOnlineTest::testAdd", &CKMeansOnlineTest::testAdd)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CKMeansOnlineTest::testReduce", &CKMeansOnlineTest::testReduce)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CKMeansOnlineTest::testClustering", &CKMeansOnlineTest::testClustering)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CKMeansOnlineTest::testSplit", &CKMeansOnlineTest::testSplit)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CKMeansOnlineTest::testMerge", &CKMeansOnlineTest::testMerge)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CKMeansOnlineTest::testPropagateForwardsByTime", + &CKMeansOnlineTest::testPropagateForwardsByTime)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CKMeansOnlineTest::testSample", &CKMeansOnlineTest::testSample)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CKMeansOnlineTest::testPersist", &CKMeansOnlineTest::testPersist)); return suiteOfTests; } diff --git a/lib/maths/unittest/CKMostCorrelatedTest.cc b/lib/maths/unittest/CKMostCorrelatedTest.cc index 1fd9aef0f0..3f4800d49a 100644 --- a/lib/maths/unittest/CKMostCorrelatedTest.cc +++ b/lib/maths/unittest/CKMostCorrelatedTest.cc @@ -39,23 +39,35 @@ class CKMostCorrelatedForTest : public maths::CKMostCorrelated { using TCorrelation = maths::CKMostCorrelated::SCorrelation; using TCorrelationVec = maths::CKMostCorrelated::TCorrelationVec; using TSizeVectorPackedBitVectorPrUMap = maths::CKMostCorrelated::TSizeVectorPackedBitVectorPrUMap; - using TSizeVectorPackedBitVectorPrUMapCItr = maths::CKMostCorrelated::TSizeVectorPackedBitVectorPrUMapCItr; + using TSizeVectorPackedBitVectorPrUMapCItr = + maths::CKMostCorrelated::TSizeVectorPackedBitVectorPrUMapCItr; using TMeanVarAccumulatorVec = maths::CKMostCorrelated::TMeanVarAccumulatorVec; using maths::CKMostCorrelated::correlations; using maths::CKMostCorrelated::mostCorrelated; public: - CKMostCorrelatedForTest(std::size_t size, double decayRate) : maths::CKMostCorrelated(size, decayRate) {} + CKMostCorrelatedForTest(std::size_t size, double decayRate) + : maths::CKMostCorrelated(size, decayRate) {} - void mostCorrelated(TCorrelationVec& result) const { this->maths::CKMostCorrelated::mostCorrelated(result); } + void mostCorrelated(TCorrelationVec& result) const { + this->maths::CKMostCorrelated::mostCorrelated(result); + } - const TVectorVec& projections() const { return this->maths::CKMostCorrelated::projections(); } + const TVectorVec& projections() const { + return this->maths::CKMostCorrelated::projections(); + } - const TSizeVectorPackedBitVectorPrUMap& projected() const { return this->maths::CKMostCorrelated::projected(); } + const TSizeVectorPackedBitVectorPrUMap& projected() const { + return this->maths::CKMostCorrelated::projected(); + } - const TCorrelationVec& correlations() const { return this->maths::CKMostCorrelated::correlations(); } + const TCorrelationVec& correlations() const { + return this->maths::CKMostCorrelated::correlations(); + } - const TMeanVarAccumulatorVec& moments() const { return this->maths::CKMostCorrelated::moments(); } + const TMeanVarAccumulatorVec& moments() const { + return this->maths::CKMostCorrelated::moments(); + } }; double mutualInformation(const TDoubleVec& p1, const TDoubleVec& p2) { @@ -77,14 +89,17 @@ double mutualInformation(const TDoubleVec& p1, const TDoubleVec& p2) { for (std::size_t i = 0u; i < 2; ++i) { for (std::size_t j = 0u; j < 2; ++j) { if (f12[i][j] > 0.0) { - I += f12[i][j] / static_cast(n) * std::log(f12[i][j] * static_cast(n) / f1[i] / f2[j]); + I += f12[i][j] / static_cast(n) * + std::log(f12[i][j] * static_cast(n) / f1[i] / f2[j]); } } if (f1[i] > 0.0) { - H1 -= f1[i] / static_cast(n) * std::log(f1[i] / static_cast(n)); + H1 -= f1[i] / static_cast(n) * + std::log(f1[i] / static_cast(n)); } if (f2[i] > 0.0) { - H2 -= f2[i] / static_cast(n) * std::log(f2[i] / static_cast(n)); + H2 -= f2[i] / static_cast(n) * + std::log(f2[i] / static_cast(n)); } } @@ -131,15 +146,18 @@ void estimateCorrelation(const std::size_t trials, for (std::size_t i = 0u; i < projections.size(); ++i) { sampleMoments.add(samples[i]); if (maths::CBasicStatistics::count(sampleMoments) > 1.0) { - px += projections[i] * (samples[i](0) - maths::CBasicStatistics::mean(sampleMoments)(0)) / + px += projections[i] * + (samples[i](0) - maths::CBasicStatistics::mean(sampleMoments)(0)) / std::sqrt(maths::CBasicStatistics::variance(sampleMoments)(0)); - py += projections[i] * (samples[i](1) - maths::CBasicStatistics::mean(sampleMoments)(1)) / + py += projections[i] * + (samples[i](1) - maths::CBasicStatistics::mean(sampleMoments)(1)) / std::sqrt(maths::CBasicStatistics::variance(sampleMoments)(1)); } } maths::CPackedBitVector ix(50, true); maths::CPackedBitVector iy(50, true); - double correlation = CKMostCorrelatedForTest::TCorrelation::correlation(px, ix, py, iy); + double correlation = + CKMostCorrelatedForTest::TCorrelation::correlation(px, ix, py, iy); if (t % 10 == 0) { LOG_DEBUG(<< "correlation = " << correlation); } @@ -171,7 +189,8 @@ void CKMostCorrelatedTest::testCorrelation() { LOG_DEBUG(<< "correlationEstimate = " << correlationEstimate); double sd = std::sqrt(maths::CBasicStatistics::variance(correlationEstimate)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(0.2, maths::CBasicStatistics::mean(correlationEstimate), 3.0 * sd / 10.0); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + 0.2, maths::CBasicStatistics::mean(correlationEstimate), 3.0 * sd / 10.0); CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, sd, 0.5); } { @@ -187,7 +206,8 @@ void CKMostCorrelatedTest::testCorrelation() { LOG_DEBUG(<< "correlation = " << correlationEstimate); double sd = std::sqrt(maths::CBasicStatistics::variance(correlationEstimate)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(0.5, maths::CBasicStatistics::mean(correlationEstimate), 3.0 * sd / 10.0); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + 0.5, maths::CBasicStatistics::mean(correlationEstimate), 3.0 * sd / 10.0); CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, sd, 0.42); } { @@ -203,7 +223,8 @@ void CKMostCorrelatedTest::testCorrelation() { LOG_DEBUG(<< "correlation = " << correlationEstimate); double sd = std::sqrt(maths::CBasicStatistics::variance(correlationEstimate)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(0.9, maths::CBasicStatistics::mean(correlationEstimate), 3.0 * sd / 10.0); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + 0.9, maths::CBasicStatistics::mean(correlationEstimate), 3.0 * sd / 10.0); CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, sd, 0.2); } } @@ -258,7 +279,8 @@ void CKMostCorrelatedTest::testNextProjection() { for (std::size_t i = 0u; i < 19; ++i) { for (std::size_t j = 0u, X = 0u; j < variables; j += 2) { for (std::size_t k = 0u; k < boost::size(combinations); ++k, ++X) { - double x = combinations[k][0] * samples[i * variables + j] + combinations[k][1] * samples[i * variables + j + 1]; + double x = combinations[k][0] * samples[i * variables + j] + + combinations[k][1] * samples[i * variables + j + 1]; mostCorrelated.add(X, x); } } @@ -268,7 +290,8 @@ void CKMostCorrelatedTest::testNextProjection() { // This should trigger the next projection to be generated. for (std::size_t i = 0u, X = 0u; i < variables; i += 2) { for (std::size_t j = 0u; j < boost::size(combinations); ++j, ++X) { - double x = combinations[j][0] * samples[19 * variables + i] + combinations[j][1] * samples[19 * variables + i + 1]; + double x = combinations[j][0] * samples[19 * variables + i] + + combinations[j][1] * samples[19 * variables + i + 1]; mostCorrelated.add(X, x); } } @@ -313,7 +336,8 @@ void CKMostCorrelatedTest::testNextProjection() { CPPUNIT_ASSERT(maths::CBasicStatistics::mean(I12) < 0.1); for (std::size_t i = 0u; i < moments1.size(); ++i) { - CPPUNIT_ASSERT(maths::CBasicStatistics::count(moments1[i]) > maths::CBasicStatistics::count(moments2[i])); + CPPUNIT_ASSERT(maths::CBasicStatistics::count(moments1[i]) > + maths::CBasicStatistics::count(moments2[i])); } for (std::size_t i = 0u; i < correlations2.size(); ++i) { CPPUNIT_ASSERT(maths::CBasicStatistics::count(correlations2[i].s_Correlation) > 0.0); @@ -328,7 +352,8 @@ void CKMostCorrelatedTest::testMostCorrelated() { // Check the variables with the highest estimated correlation emerge. - using TMaxCorrelationAccumulator = maths::CBasicStatistics::COrderStatisticsHeap; + using TMaxCorrelationAccumulator = + maths::CBasicStatistics::COrderStatisticsHeap; maths::CSampling::seed(); @@ -347,7 +372,8 @@ void CKMostCorrelatedTest::testMostCorrelated() { for (std::size_t i = 0u; i < 19; ++i) { for (std::size_t j = 0u, X = 0u; j < variables; j += 2) { for (std::size_t k = 0u; k < boost::size(combinations); ++k, ++X) { - double x = combinations[k][0] * samples[i * variables + j] + combinations[k][1] * samples[i * variables + j + 1]; + double x = combinations[k][0] * samples[i * variables + j] + + combinations[k][1] * samples[i * variables + j + 1]; mostCorrelated.add(X, x); } } @@ -355,14 +381,16 @@ void CKMostCorrelatedTest::testMostCorrelated() { } TMaxCorrelationAccumulator expected(200); - for (CKMostCorrelatedForTest::TSizeVectorPackedBitVectorPrUMapCItr x = mostCorrelated.projected().begin(); - x != mostCorrelated.projected().end(); - ++x) { + for (CKMostCorrelatedForTest::TSizeVectorPackedBitVectorPrUMapCItr x = + mostCorrelated.projected().begin(); + x != mostCorrelated.projected().end(); ++x) { std::size_t X = x->first; CKMostCorrelatedForTest::TSizeVectorPackedBitVectorPrUMapCItr y = x; while (++y != mostCorrelated.projected().end()) { std::size_t Y = y->first; - CKMostCorrelatedForTest::TCorrelation cxy(X, x->second.first, x->second.second, Y, y->second.first, y->second.second); + CKMostCorrelatedForTest::TCorrelation cxy(X, x->second.first, + x->second.second, Y, + y->second.first, y->second.second); expected.add(cxy); } } @@ -372,7 +400,8 @@ void CKMostCorrelatedTest::testMostCorrelated() { CKMostCorrelatedForTest::TCorrelationVec actual; mostCorrelated.mostCorrelated(actual); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expected), core::CContainerPrinter::print(actual)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expected), + core::CContainerPrinter::print(actual)); } void CKMostCorrelatedTest::testRemoveVariables() { @@ -397,7 +426,8 @@ void CKMostCorrelatedTest::testRemoveVariables() { for (std::size_t i = 0u; i < samples.size(); i += 10) { for (std::size_t j = 0u; j < 10; j += 2) { - samples[i + j + 1] = combinations[0][0] * samples[i + j] + combinations[0][1] * samples[i + j + 1]; + samples[i + j + 1] = combinations[0][0] * samples[i + j] + + combinations[0][1] * samples[i + j + 1]; } } @@ -422,8 +452,10 @@ void CKMostCorrelatedTest::testRemoveVariables() { LOG_DEBUG(<< "correlatedPairs = " << core::CContainerPrinter::print(correlatedPairs)); for (std::size_t i = 0u; i < correlatedPairs.size(); ++i) { - CPPUNIT_ASSERT(std::find(remove.begin(), remove.end(), correlatedPairs[i].first) == remove.end()); - CPPUNIT_ASSERT(std::find(remove.begin(), remove.end(), correlatedPairs[i].second) == remove.end()); + CPPUNIT_ASSERT(std::find(remove.begin(), remove.end(), + correlatedPairs[i].first) == remove.end()); + CPPUNIT_ASSERT(std::find(remove.begin(), remove.end(), + correlatedPairs[i].second) == remove.end()); } } @@ -451,7 +483,8 @@ void CKMostCorrelatedTest::testAccuracy() { for (std::size_t i = 0u; i < samples.size(); i += 10) { for (std::size_t j = 0u; j < 10; j += 2) { - samples[i + j + 1] = combinations[0][0] * samples[i + j] + combinations[0][1] * samples[i + j + 1]; + samples[i + j + 1] = combinations[0][0] * samples[i + j] + + combinations[0][1] * samples[i + j + 1]; } } @@ -469,11 +502,17 @@ void CKMostCorrelatedTest::testAccuracy() { mostCorrelated.mostCorrelated(correlatedPairs); TDoubleVec correlations; mostCorrelated.correlations(correlations); - LOG_DEBUG(<< "correlatedPairs = " << core::CContainerPrinter::print(correlatedPairs.begin(), correlatedPairs.begin() + 5)); - LOG_DEBUG(<< "correlations = " << core::CContainerPrinter::print(correlations.begin(), correlations.begin() + 5)); + LOG_DEBUG(<< "correlatedPairs = " + << core::CContainerPrinter::print( + correlatedPairs.begin(), correlatedPairs.begin() + 5)); + LOG_DEBUG(<< "correlations = " + << core::CContainerPrinter::print( + correlations.begin(), correlations.begin() + 5)); std::sort(correlatedPairs.begin(), correlatedPairs.begin() + 5); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1), (2, 3), (4, 5), (6, 7), (8, 9)]"), - core::CContainerPrinter::print(correlatedPairs.begin(), correlatedPairs.begin() + 5)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(0, 1), (2, 3), (4, 5), (6, 7), (8, 9)]"), + core::CContainerPrinter::print(correlatedPairs.begin(), + correlatedPairs.begin() + 5)); } } } @@ -502,10 +541,12 @@ void CKMostCorrelatedTest::testStability() { for (std::size_t i = 0u; i < samples.size(); i += 20) { for (std::size_t j = 0u; j < 10; j += 2) { - samples[i + j + 1] = combinations[0][0] * samples[i + j] + combinations[0][1] * samples[i + j + 1]; + samples[i + j + 1] = combinations[0][0] * samples[i + j] + + combinations[0][1] * samples[i + j + 1]; } for (std::size_t j = 10u; j < 20; j += 2) { - samples[i + j + 1] = combinations[1][0] * samples[i + j] + combinations[1][1] * samples[i + j + 1]; + samples[i + j + 1] = combinations[1][0] * samples[i + j] + + combinations[1][1] * samples[i + j + 1]; } } @@ -523,7 +564,8 @@ void CKMostCorrelatedTest::testStability() { mostCorrelated.mostCorrelated(correlatedPairs); TDoubleVec correlations; mostCorrelated.correlations(correlations); - LOG_DEBUG(<< "correlatedPairs = " << core::CContainerPrinter::print(correlatedPairs)); + LOG_DEBUG(<< "correlatedPairs = " + << core::CContainerPrinter::print(correlatedPairs)); LOG_DEBUG(<< "correlations = " << core::CContainerPrinter::print(correlations)); std::sort(correlatedPairs.begin(), correlatedPairs.begin() + 5); std::sort(correlatedPairs.begin() + 5, correlatedPairs.begin() + 10); @@ -556,10 +598,12 @@ void CKMostCorrelatedTest::testChangingCorrelation() { for (std::size_t i = 0u; i < samples.size(); i += 10) { for (std::size_t j = 0u; j < 8; j += 2) { - samples[i + j + 1] = combinations[0][0] * samples[i + j] + combinations[0][1] * samples[i + j + 1]; + samples[i + j + 1] = combinations[0][0] * samples[i + j] + + combinations[0][1] * samples[i + j + 1]; } if (i >= samples.size() / 3) { - samples[i + 9] = combinations[0][0] * samples[i + 8] + combinations[0][1] * samples[i + 9]; + samples[i + 9] = combinations[0][0] * samples[i + 8] + + combinations[0][1] * samples[i + 9]; } } @@ -572,12 +616,15 @@ void CKMostCorrelatedTest::testChangingCorrelation() { } mostCorrelated.capture(); } - LOG_DEBUG(<< "correlations = " << core::CContainerPrinter::print(mostCorrelated.correlations())); + LOG_DEBUG(<< "correlations = " + << core::CContainerPrinter::print(mostCorrelated.correlations())); bool present = false; for (std::size_t i = 0u; i < mostCorrelated.correlations().size(); ++i) { - if (mostCorrelated.correlations()[i].s_X == 8 && mostCorrelated.correlations()[i].s_Y == 9) { - CPPUNIT_ASSERT(maths::CBasicStatistics::mean(mostCorrelated.correlations()[i].s_Correlation) > 0.7); + if (mostCorrelated.correlations()[i].s_X == 8 && + mostCorrelated.correlations()[i].s_Y == 9) { + CPPUNIT_ASSERT(maths::CBasicStatistics::mean( + mostCorrelated.correlations()[i].s_Correlation) > 0.7); present = true; } } @@ -607,7 +654,8 @@ void CKMostCorrelatedTest::testMissingData() { for (std::size_t i = 0u; i < samples.size(); i += 10) { for (std::size_t j = 0u; j < 10; j += 2) { - samples[i + j + 1] = combinations[0][0] * samples[i + j] + combinations[0][1] * samples[i + j + 1]; + samples[i + j + 1] = combinations[0][0] * samples[i + j] + + combinations[0][1] * samples[i + j + 1]; } } @@ -632,12 +680,18 @@ void CKMostCorrelatedTest::testMissingData() { mostCorrelated.mostCorrelated(correlatedPairs); TDoubleVec correlations; mostCorrelated.correlations(correlations); - LOG_DEBUG(<< "correlatedPairs = " << core::CContainerPrinter::print(correlatedPairs.begin(), correlatedPairs.begin() + 5)); - LOG_DEBUG(<< "correlations = " << core::CContainerPrinter::print(correlations.begin(), correlations.begin() + 5)); + LOG_DEBUG(<< "correlatedPairs = " + << core::CContainerPrinter::print(correlatedPairs.begin(), + correlatedPairs.begin() + 5)); + LOG_DEBUG(<< "correlations = " + << core::CContainerPrinter::print(correlations.begin(), + correlations.begin() + 5)); std::sort(correlatedPairs.begin(), correlatedPairs.begin() + 3); std::sort(correlatedPairs.begin() + 3, correlatedPairs.begin() + 5); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1), (2, 3), (8, 9), (4, 5), (6, 7)]"), - core::CContainerPrinter::print(correlatedPairs.begin(), correlatedPairs.begin() + 5)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(0, 1), (2, 3), (8, 9), (4, 5), (6, 7)]"), + core::CContainerPrinter::print(correlatedPairs.begin(), + correlatedPairs.begin() + 5)); } } } @@ -663,9 +717,10 @@ void CKMostCorrelatedTest::testScale() { for (std::size_t s = 0u; s < boost::size(n); ++s) { double proportions[] = {0.2, 0.3, 0.5}; std::size_t b = 200; - std::size_t ns[] = {static_cast(static_cast(n[s] * b) * proportions[0]), - static_cast(static_cast(n[s] * b) * proportions[1]), - static_cast(static_cast(n[s] * b) * proportions[2])}; + std::size_t ns[] = { + static_cast(static_cast(n[s] * b) * proportions[0]), + static_cast(static_cast(n[s] * b) * proportions[1]), + static_cast(static_cast(n[s] * b) * proportions[2])}; TDoubleVec scales; rng.generateUniformSamples(10.0, 40.0, n[s], scales); @@ -703,8 +758,10 @@ void CKMostCorrelatedTest::testScale() { watch.start(); for (std::size_t i = 0u; i < samples.size(); ++i) { for (std::size_t j = 0u; j < samples[i].size(); j += 2) { - double x = weights[0][0] * samples[i][j] + weights[0][1] * samples[i][j + 1]; - double y = weights[1][0] * samples[i][j] + weights[1][1] * samples[i][j + 1]; + double x = weights[0][0] * samples[i][j] + + weights[0][1] * samples[i][j + 1]; + double y = weights[1][0] * samples[i][j] + + weights[1][1] * samples[i][j + 1]; mostCorrelated.add(j, x); mostCorrelated.add(j + 1, y); } @@ -731,7 +788,8 @@ void CKMostCorrelatedTest::testScale() { } double exponent = std::log(maths::CBasicStatistics::mean(slope)) / std::log(2.0); LOG_DEBUG(<< "exponent = " << exponent); - double sdRatio = std::sqrt(maths::CBasicStatistics::variance(slope)) / maths::CBasicStatistics::mean(slope); + double sdRatio = std::sqrt(maths::CBasicStatistics::variance(slope)) / + maths::CBasicStatistics::mean(slope); LOG_DEBUG(<< "sdRatio = " << sdRatio); // If $ML_KEEP_GOING is set then we're probably running in CI const char* keepGoingEnvVar{std::getenv("ML_KEEP_GOING")}; @@ -765,7 +823,8 @@ void CKMostCorrelatedTest::testPersistence() { for (std::size_t i = 0u; i < samples.size(); i += 10) { for (std::size_t j = 0u; j < 10; j += 2) { - samples[i + j + 1] = combinations[0][0] * samples[i + j] + combinations[0][1] * samples[i + j + 1]; + samples[i + j + 1] = combinations[0][0] * samples[i + j] + + combinations[0][1] * samples[i + j + 1]; } } @@ -792,9 +851,11 @@ void CKMostCorrelatedTest::testPersistence() { CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); maths::CKMostCorrelated restoredMostCorrelated(10, 0.001); - CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind(&maths::CKMostCorrelated::acceptRestoreTraverser, &restoredMostCorrelated, _1))); + CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind( + &maths::CKMostCorrelated::acceptRestoreTraverser, &restoredMostCorrelated, _1))); - LOG_DEBUG(<< "orig checksum = " << origMostCorrelated.checksum() << ", new checksum = " << restoredMostCorrelated.checksum()); + LOG_DEBUG(<< "orig checksum = " << origMostCorrelated.checksum() + << ", new checksum = " << restoredMostCorrelated.checksum()); CPPUNIT_ASSERT_EQUAL(origMostCorrelated.checksum(), restoredMostCorrelated.checksum()); std::string newXml; @@ -808,26 +869,27 @@ void CKMostCorrelatedTest::testPersistence() { CppUnit::Test* CKMostCorrelatedTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CKMostCorrelatedTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CKMostCorrelatedTest::testCorrelation", &CKMostCorrelatedTest::testCorrelation)); - suiteOfTests->addTest(new CppUnit::TestCaller("CKMostCorrelatedTest::testNextProjection", - &CKMostCorrelatedTest::testNextProjection)); - suiteOfTests->addTest(new CppUnit::TestCaller("CKMostCorrelatedTest::testMostCorrelated", - &CKMostCorrelatedTest::testMostCorrelated)); - suiteOfTests->addTest(new CppUnit::TestCaller("CKMostCorrelatedTest::testRemoveVariables", - &CKMostCorrelatedTest::testRemoveVariables)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CKMostCorrelatedTest::testAccuracy", &CKMostCorrelatedTest::testAccuracy)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CKMostCorrelatedTest::testStability", &CKMostCorrelatedTest::testStability)); - suiteOfTests->addTest(new CppUnit::TestCaller("CKMostCorrelatedTest::testChangingCorrelation", - &CKMostCorrelatedTest::testChangingCorrelation)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CKMostCorrelatedTest::testMissingData", &CKMostCorrelatedTest::testMissingData)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CKMostCorrelatedTest::testScale", &CKMostCorrelatedTest::testScale)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CKMostCorrelatedTest::testPersistence", &CKMostCorrelatedTest::testPersistence)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CKMostCorrelatedTest::testCorrelation", &CKMostCorrelatedTest::testCorrelation)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CKMostCorrelatedTest::testNextProjection", &CKMostCorrelatedTest::testNextProjection)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CKMostCorrelatedTest::testMostCorrelated", &CKMostCorrelatedTest::testMostCorrelated)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CKMostCorrelatedTest::testRemoveVariables", &CKMostCorrelatedTest::testRemoveVariables)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CKMostCorrelatedTest::testAccuracy", &CKMostCorrelatedTest::testAccuracy)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CKMostCorrelatedTest::testStability", &CKMostCorrelatedTest::testStability)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CKMostCorrelatedTest::testChangingCorrelation", + &CKMostCorrelatedTest::testChangingCorrelation)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CKMostCorrelatedTest::testMissingData", &CKMostCorrelatedTest::testMissingData)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CKMostCorrelatedTest::testScale", &CKMostCorrelatedTest::testScale)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CKMostCorrelatedTest::testPersistence", &CKMostCorrelatedTest::testPersistence)); return suiteOfTests; } diff --git a/lib/maths/unittest/CKdTreeTest.cc b/lib/maths/unittest/CKdTreeTest.cc index bb7e4f4f1c..33e3a6a4d0 100644 --- a/lib/maths/unittest/CKdTreeTest.cc +++ b/lib/maths/unittest/CKdTreeTest.cc @@ -106,18 +106,22 @@ void CKdTreeTest::testNearestNeighbour() { LOG_DEBUG(<< "*** Test " << i << " ***"); } for (std::size_t j = 0u; j < tests.size(); ++j) { - using TMinAccumulator = maths::CBasicStatistics::COrderStatisticsStack; + using TMinAccumulator = + maths::CBasicStatistics::COrderStatisticsStack; TMinAccumulator expectedNearest; for (std::size_t k = 0u; k < points.size(); ++k) { - expectedNearest.add(TDoubleVector2Pr((tests[j] - points[k]).euclidean(), points[k])); + expectedNearest.add( + TDoubleVector2Pr((tests[j] - points[k]).euclidean(), points[k])); } const TVector2* nearest = kdTree.nearestNeighbour(tests[j]); CPPUNIT_ASSERT(nearest); if (i % 10 == 0) { - LOG_DEBUG(<< "Expected nearest = " << expectedNearest[0].second << ", expected distance = " << expectedNearest[0].first); - LOG_DEBUG(<< "Nearest = " << *nearest << ", actual distance = " << (tests[j] - *nearest).euclidean()); + LOG_DEBUG(<< "Expected nearest = " << expectedNearest[0].second + << ", expected distance = " << expectedNearest[0].first); + LOG_DEBUG(<< "Nearest = " << *nearest << ", actual distance = " + << (tests[j] - *nearest).euclidean()); } CPPUNIT_ASSERT_EQUAL(print(expectedNearest[0].second), print(*nearest)); } @@ -127,8 +131,10 @@ void CKdTreeTest::testNearestNeighbour() { CppUnit::Test* CKdTreeTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CKdTreeTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CKdTreeTest::testBuild", &CKdTreeTest::testBuild)); - suiteOfTests->addTest(new CppUnit::TestCaller("CKdTreeTest::testNearestNeighbour", &CKdTreeTest::testNearestNeighbour)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CKdTreeTest::testBuild", &CKdTreeTest::testBuild)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CKdTreeTest::testNearestNeighbour", &CKdTreeTest::testNearestNeighbour)); return suiteOfTests; } diff --git a/lib/maths/unittest/CLassoLogisticRegressionTest.cc b/lib/maths/unittest/CLassoLogisticRegressionTest.cc index 19e68e8100..10f8df0874 100644 --- a/lib/maths/unittest/CLassoLogisticRegressionTest.cc +++ b/lib/maths/unittest/CLassoLogisticRegressionTest.cc @@ -58,7 +58,10 @@ double inner(const TDoubleVec& x, const TDoubleVec& y) { return result; } -double logLikelihood(const TDoubleVecVec& x, const TDoubleVec& y, const TDoubleVec& lambda, const TDoubleVec& beta) { +double logLikelihood(const TDoubleVecVec& x, + const TDoubleVec& y, + const TDoubleVec& lambda, + const TDoubleVec& beta) { double result = 0.0; for (std::size_t i = 0u; i < y.size(); ++i) { double f = 0.0; @@ -95,7 +98,8 @@ void CLassoLogisticRegressionTest::testCyclicCoordinateDescent() { maths::lasso_logistic_regression_detail::CCyclicCoordinateDescent clg(50, 0.001); TDoubleVec lambda(2, 0.25); - double x_[][2] = {{0.1, 1.0}, {0.3, 1.0}, {0.4, 1.0}, {0.0, 1.0}, {1.0, 1.0}, {0.6, 1.0}, {0.7, 1.0}, {0.45, 1.0}}; + double x_[][2] = {{0.1, 1.0}, {0.3, 1.0}, {0.4, 1.0}, {0.0, 1.0}, + {1.0, 1.0}, {0.6, 1.0}, {0.7, 1.0}, {0.45, 1.0}}; double y_[] = {-1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0, -1.0}; TDoubleVecVec x; @@ -107,14 +111,18 @@ void CLassoLogisticRegressionTest::testCyclicCoordinateDescent() { TDoubleVec beta1; std::size_t numberIterations; clg.run(x, y, lambda, beta1, numberIterations); - LOG_DEBUG(<< "dense beta = " << core::CContainerPrinter::print(beta1) << ", numberIterations = " << numberIterations); + LOG_DEBUG(<< "dense beta = " << core::CContainerPrinter::print(beta1) + << ", numberIterations = " << numberIterations); TDoubleVec beta2; - maths::lasso_logistic_regression_detail::CSparseMatrix xs(boost::size(x_), boost::size(x_[0]), xs_); + maths::lasso_logistic_regression_detail::CSparseMatrix xs( + boost::size(x_), boost::size(x_[0]), xs_); clg.run(xs, y, lambda, beta2, numberIterations); - LOG_DEBUG(<< "sparse beta = " << core::CContainerPrinter::print(beta2) << ", numberIterations = " << numberIterations); + LOG_DEBUG(<< "sparse beta = " << core::CContainerPrinter::print(beta2) + << ", numberIterations = " << numberIterations); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(beta1), core::CContainerPrinter::print(beta2)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(beta1), + core::CContainerPrinter::print(beta2)); initializeMatrix(x_, x); double ll = logLikelihood(x, y, lambda, beta1); @@ -138,8 +146,10 @@ void CLassoLogisticRegressionTest::testCyclicCoordinateDescent() { llMinusEps += logLikelihood(x, y, lambda, betaMinusEps); llPlusEps += logLikelihood(x, y, lambda, betaPlusEps); - LOG_DEBUG(<< "log-likelihood minus eps = " << llMinusEps / static_cast(i + 1)); - LOG_DEBUG(<< "log-likelihood plus eps = " << llPlusEps / static_cast(i + 1)); + LOG_DEBUG(<< "log-likelihood minus eps = " + << llMinusEps / static_cast(i + 1)); + LOG_DEBUG(<< "log-likelihood plus eps = " + << llPlusEps / static_cast(i + 1)); double slope = (llPlusEps - llMinusEps) / length; LOG_DEBUG(<< "slope = " << slope); @@ -186,7 +196,8 @@ void CLassoLogisticRegressionTest::testCyclicCoordinateDescent() { TDoubleVec beta; std::size_t numberIterations; clg.run(x, y, lambda, beta, numberIterations); - LOG_DEBUG(<< "beta = " << core::CContainerPrinter::print(beta) << ", numberIterations = " << numberIterations); + LOG_DEBUG(<< "beta = " << core::CContainerPrinter::print(beta) + << ", numberIterations = " << numberIterations); TDoubleVec effectiveDecisionNormal; for (std::size_t j = 0u; j < decisionNormal.size(); ++j) { @@ -194,7 +205,8 @@ void CLassoLogisticRegressionTest::testCyclicCoordinateDescent() { } double theta = - std::acos(inner(effectiveDecisionNormal, decisionNormal) / std::sqrt(inner(effectiveDecisionNormal, effectiveDecisionNormal))) * + std::acos(inner(effectiveDecisionNormal, decisionNormal) / + std::sqrt(inner(effectiveDecisionNormal, effectiveDecisionNormal))) * 360.0 / boost::math::double_constants::two_pi; LOG_DEBUG(<< "angular error = " << theta << " deg"); CPPUNIT_ASSERT(theta < 7.5); @@ -228,17 +240,20 @@ CppUnit::Test* CLassoLogisticRegressionTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CLassoLogisticRegressionTest"); suiteOfTests->addTest(new CppUnit::TestCaller( - "CLassoLogisticRegressionTest::testCyclicCoordinateDescent", &CLassoLogisticRegressionTest::testCyclicCoordinateDescent)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CLassoLogisticRegressionTest::testCyclicCoordinateDescentLargeSparse", - &CLassoLogisticRegressionTest::testCyclicCoordinateDescentLargeSparse)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CLassoLogisticRegressionTest::testCyclicCoordinateDescentIncremental", - &CLassoLogisticRegressionTest::testCyclicCoordinateDescentIncremental)); - suiteOfTests->addTest(new CppUnit::TestCaller("CLassoLogisticRegressionTest::testNormBasedLambda", - &CLassoLogisticRegressionTest::testNormBasedLambda)); - suiteOfTests->addTest(new CppUnit::TestCaller("CLassoLogisticRegressionTest::testCrossValidatedLambda", - &CLassoLogisticRegressionTest::testCrossValidatedLambda)); + "CLassoLogisticRegressionTest::testCyclicCoordinateDescent", + &CLassoLogisticRegressionTest::testCyclicCoordinateDescent)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLassoLogisticRegressionTest::testCyclicCoordinateDescentLargeSparse", + &CLassoLogisticRegressionTest::testCyclicCoordinateDescentLargeSparse)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLassoLogisticRegressionTest::testCyclicCoordinateDescentIncremental", + &CLassoLogisticRegressionTest::testCyclicCoordinateDescentIncremental)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLassoLogisticRegressionTest::testNormBasedLambda", + &CLassoLogisticRegressionTest::testNormBasedLambda)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLassoLogisticRegressionTest::testCrossValidatedLambda", + &CLassoLogisticRegressionTest::testCrossValidatedLambda)); return suiteOfTests; } diff --git a/lib/maths/unittest/CLinearAlgebraTest.cc b/lib/maths/unittest/CLinearAlgebraTest.cc index cee1c49656..769e7141ad 100644 --- a/lib/maths/unittest/CLinearAlgebraTest.cc +++ b/lib/maths/unittest/CLinearAlgebraTest.cc @@ -130,7 +130,8 @@ void CLinearAlgebraTest::testSymmetricMatrixNxN() { LOG_DEBUG(<< "3 * m = " << ms); for (std::size_t i = 0u; i < 5; ++i) { for (std::size_t j = 0u; j < 5; ++j) { - CPPUNIT_ASSERT_EQUAL(3.0 * static_cast((i + 1) * (j + 1)), ms(i, j)); + CPPUNIT_ASSERT_EQUAL(3.0 * static_cast((i + 1) * (j + 1)), + ms(i, j)); } } } @@ -144,7 +145,8 @@ void CLinearAlgebraTest::testSymmetricMatrixNxN() { LOG_DEBUG(<< "m / 4.0 = " << ms); for (std::size_t i = 0u; i < 5; ++i) { for (std::size_t j = 0u; j < 5; ++j) { - CPPUNIT_ASSERT_EQUAL(static_cast((i + 1) * (j + 1)) / 4.0, ms(i, j)); + CPPUNIT_ASSERT_EQUAL(static_cast((i + 1) * (j + 1)) / 4.0, + ms(i, j)); } } } @@ -305,7 +307,8 @@ void CLinearAlgebraTest::testSymmetricMatrix() { CPPUNIT_ASSERT_EQUAL(10.8, matrix.trace()); } { - double m[] = {1.1, 2.4, 3.2, 1.4, 1.8, 0.8, 3.7, 0.7, 4.7, 4.7, 4.0, 1.0, 3.1, 1.1, 1.0}; + double m[] = {1.1, 2.4, 3.2, 1.4, 1.8, 0.8, 3.7, 0.7, + 4.7, 4.7, 4.0, 1.0, 3.1, 1.1, 1.0}; maths::CSymmetricMatrix matrix(boost::begin(m), boost::end(m)); LOG_DEBUG(<< "matrix = " << matrix); CPPUNIT_ASSERT_EQUAL(std::size_t(5), matrix.rows()); @@ -350,7 +353,8 @@ void CLinearAlgebraTest::testSymmetricMatrix() { { LOG_DEBUG(<< "Sum"); - double m[] = {1.1, 2.4, 3.2, 1.4, 1.8, 0.8, 3.7, 0.7, 4.7, 4.7, 4.0, 1.0, 3.1, 1.1, 1.0}; + double m[] = {1.1, 2.4, 3.2, 1.4, 1.8, 0.8, 3.7, 0.7, + 4.7, 4.7, 4.0, 1.0, 3.1, 1.1, 1.0}; maths::CSymmetricMatrix matrix(boost::begin(m), boost::end(m)); maths::CSymmetricMatrix sum = matrix + matrix; LOG_DEBUG(<< "sum = " << sum); @@ -387,7 +391,8 @@ void CLinearAlgebraTest::testSymmetricMatrix() { LOG_DEBUG(<< "3 * m = " << ms); for (std::size_t i = 0u; i < 5; ++i) { for (std::size_t j = 0u; j < 5; ++j) { - CPPUNIT_ASSERT_EQUAL(3.0 * static_cast((i + 1) * (j + 1)), ms(i, j)); + CPPUNIT_ASSERT_EQUAL(3.0 * static_cast((i + 1) * (j + 1)), + ms(i, j)); } } } @@ -401,7 +406,8 @@ void CLinearAlgebraTest::testSymmetricMatrix() { LOG_DEBUG(<< "m / 4.0 = " << ms); for (std::size_t i = 0u; i < 5; ++i) { for (std::size_t j = 0u; j < 5; ++j) { - CPPUNIT_ASSERT_EQUAL(static_cast((i + 1) * (j + 1)) / 4.0, ms(i, j)); + CPPUNIT_ASSERT_EQUAL(static_cast((i + 1) * (j + 1)) / 4.0, + ms(i, j)); } } } @@ -528,7 +534,10 @@ void CLinearAlgebraTest::testNorms() { LOG_DEBUG(<< "| CLinearAlgebraTest::testNorms |"); LOG_DEBUG(<< "+---------------------------------+"); - double v[][5] = {{1.0, 2.1, 3.2, 1.7, 0.1}, {0.0, -2.1, 1.2, 1.9, 4.1}, {-1.0, 7.1, 5.2, 1.7, -0.1}, {-3.0, 1.1, -3.3, 1.8, 6.1}}; + double v[][5] = {{1.0, 2.1, 3.2, 1.7, 0.1}, + {0.0, -2.1, 1.2, 1.9, 4.1}, + {-1.0, 7.1, 5.2, 1.7, -0.1}, + {-3.0, 1.1, -3.3, 1.8, 6.1}}; double expectedEuclidean[] = {4.30697, 5.12543, 9.01942, 7.84538}; for (std::size_t i = 0u; i < boost::size(v); ++i) { @@ -536,10 +545,11 @@ void CLinearAlgebraTest::testNorms() { CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedEuclidean[i], v_.euclidean(), 5e-6); } - double m[][15] = {{1.0, 2.1, 3.2, 1.7, 0.1, 4.2, 0.3, 2.8, 4.1, 0.1, 0.4, 1.2, 5.2, 0.2, 6.3}, - {0.0, -2.1, 1.2, 1.9, 4.1, 4.5, -3.1, 0.0, 1.3, 7.5, 0.2, 1.0, 4.5, 8.1, 0.3}, - {-1.0, 7.1, 5.2, 1.7, -0.1, 3.2, 1.8, -3.2, 4.2, 9.1, 0.2, 0.4, 4.1, 7.2, 1.3}, - {-3.0, 1.1, -3.3, 1.8, 6.1, -1.3, 1.3, 4.2, 3.1, 1.9, -2.3, 3.1, 2.4, 2.3, 1.0}}; + double m[][15] = { + {1.0, 2.1, 3.2, 1.7, 0.1, 4.2, 0.3, 2.8, 4.1, 0.1, 0.4, 1.2, 5.2, 0.2, 6.3}, + {0.0, -2.1, 1.2, 1.9, 4.1, 4.5, -3.1, 0.0, 1.3, 7.5, 0.2, 1.0, 4.5, 8.1, 0.3}, + {-1.0, 7.1, 5.2, 1.7, -0.1, 3.2, 1.8, -3.2, 4.2, 9.1, 0.2, 0.4, 4.1, 7.2, 1.3}, + {-3.0, 1.1, -3.3, 1.8, 6.1, -1.3, 1.3, 4.2, 3.1, 1.9, -2.3, 3.1, 2.4, 2.3, 1.0}}; double expectedFrobenius[] = {13.78550, 18.00250, 20.72052, 14.80844}; for (std::size_t i = 0u; i < boost::size(m); ++i) { @@ -601,7 +611,8 @@ void CLinearAlgebraTest::testUtils() { } { - double expected[] = {1.0, std::sqrt(3.1), std::sqrt(2.2), std::sqrt(4.9), std::sqrt(12.0)}; + double expected[] = {1.0, std::sqrt(3.1), std::sqrt(2.2), + std::sqrt(4.9), std::sqrt(12.0)}; LOG_DEBUG(<< "sqrt(v1) = " << maths::sqrt(v1)); for (std::size_t i = 0u; i < 5; ++i) { CPPUNIT_ASSERT_EQUAL(expected[i], (maths::sqrt(v1))(i)); @@ -637,7 +648,8 @@ void CLinearAlgebraTest::testUtils() { } for (std::size_t i = 0u; i < 3; ++i) { for (std::size_t j = 0u; j < 3; ++j) { - CPPUNIT_ASSERT_EQUAL((maths::min(m1, 3.0))(i, j), (maths::min(3.0, m1))(i, j)); + CPPUNIT_ASSERT_EQUAL((maths::min(m1, 3.0))(i, j), + (maths::min(3.0, m1))(i, j)); } } { @@ -661,7 +673,8 @@ void CLinearAlgebraTest::testUtils() { } for (std::size_t i = 0u; i < 3; ++i) { for (std::size_t j = 0u; j < 3; ++j) { - CPPUNIT_ASSERT_EQUAL((maths::max(m1, 2.0))(i, j), (maths::max(2.0, m1))(i, j)); + CPPUNIT_ASSERT_EQUAL((maths::max(m1, 2.0))(i, j), + (maths::max(2.0, m1))(i, j)); } } { @@ -722,13 +735,15 @@ void CLinearAlgebraTest::testGaussianLogLikelihood() { {1.214063, 0.067988, -0.241846, -0.425730}, {-0.306693, -0.188497, -1.092719, 1.288093}}; - const double expected[] = { - -8.512128, -8.569778, -8.706920, -8.700537, -9.794163, -8.602336, -8.462027, -9.096402, -8.521042, -8.590054}; + const double expected[] = {-8.512128, -8.569778, -8.706920, -8.700537, + -9.794163, -8.602336, -8.462027, -9.096402, + -8.521042, -8.590054}; for (std::size_t i = 0u; i < boost::size(x_); ++i) { maths::CVectorNx1 x(x_[i]); double likelihood; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, maths::gaussianLogLikelihood(covariance, x, likelihood)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, + maths::gaussianLogLikelihood(covariance, x, likelihood)); LOG_DEBUG(<< "expected log(L(x)) = " << expected[i]); LOG_DEBUG(<< "got log(L(x)) = " << likelihood); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected[i], likelihood, 1e-6); @@ -746,26 +761,40 @@ void CLinearAlgebraTest::testGaussianLogLikelihood() { maths::CVectorNx1 e3(e3_); maths::CVectorNx1 e4(e4_); maths::CSymmetricMatrixNxN covariance( - 10.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e1 / e1.euclidean()) + - 5.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e2 / e2.euclidean()) + - 5.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e3 / e3.euclidean())); + 10.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, + e1 / e1.euclidean()) + + 5.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, + e2 / e2.euclidean()) + + 5.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, + e3 / e3.euclidean())); double likelihood; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, maths::gaussianLogLikelihood(covariance, e1, likelihood)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, + maths::gaussianLogLikelihood(covariance, e1, likelihood)); CPPUNIT_ASSERT_DOUBLES_EQUAL( - -0.5 * (3.0 * std::log(boost::math::double_constants::two_pi) + std::log(10.0 * 5.0 * 5.0) + 4.0 / 10.0), likelihood, 1e-10); + -0.5 * (3.0 * std::log(boost::math::double_constants::two_pi) + + std::log(10.0 * 5.0 * 5.0) + 4.0 / 10.0), + likelihood, 1e-10); - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, maths::gaussianLogLikelihood(covariance, e2, likelihood)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, + maths::gaussianLogLikelihood(covariance, e2, likelihood)); CPPUNIT_ASSERT_DOUBLES_EQUAL( - -0.5 * (3.0 * std::log(boost::math::double_constants::two_pi) + std::log(10.0 * 5.0 * 5.0) + 2.0 / 5.0), likelihood, 1e-10); + -0.5 * (3.0 * std::log(boost::math::double_constants::two_pi) + + std::log(10.0 * 5.0 * 5.0) + 2.0 / 5.0), + likelihood, 1e-10); - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, maths::gaussianLogLikelihood(covariance, e3, likelihood)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, + maths::gaussianLogLikelihood(covariance, e3, likelihood)); CPPUNIT_ASSERT_DOUBLES_EQUAL( - -0.5 * (3.0 * std::log(boost::math::double_constants::two_pi) + std::log(10.0 * 5.0 * 5.0) + 6.0 / 5.0), likelihood, 1e-10); + -0.5 * (3.0 * std::log(boost::math::double_constants::two_pi) + + std::log(10.0 * 5.0 * 5.0) + 6.0 / 5.0), + likelihood, 1e-10); - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpOverflowed, maths::gaussianLogLikelihood(covariance, e1, likelihood, false)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpOverflowed, + maths::gaussianLogLikelihood(covariance, e1, likelihood, false)); CPPUNIT_ASSERT(likelihood > 0.0); - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpOverflowed, maths::gaussianLogLikelihood(covariance, e4, likelihood, false)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpOverflowed, + maths::gaussianLogLikelihood(covariance, e4, likelihood, false)); CPPUNIT_ASSERT(likelihood < 0.0); } @@ -780,32 +809,40 @@ void CLinearAlgebraTest::testGaussianLogLikelihood() { maths::CVectorNx1 e3(e3_); maths::CVectorNx1 e4(e4_); maths::CSymmetricMatrixNxN covariance( - 10.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e1 / e1.euclidean()) + - 5.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e2 / e2.euclidean()) + - 5.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e3 / e3.euclidean()) + - 2.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e4 / e4.euclidean())); + 10.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, + e1 / e1.euclidean()) + + 5.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, + e2 / e2.euclidean()) + + 5.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, + e3 / e3.euclidean()) + + 2.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, + e4 / e4.euclidean())); double likelihood; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, maths::gaussianLogLikelihood(covariance, e1, likelihood)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, + maths::gaussianLogLikelihood(covariance, e1, likelihood)); CPPUNIT_ASSERT_DOUBLES_EQUAL( - -0.5 * (4.0 * std::log(boost::math::double_constants::two_pi) + std::log(10.0 * 5.0 * 5.0 * 2.0) + 4.0 / 10.0), - likelihood, - 1e-10); - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, maths::gaussianLogLikelihood(covariance, e2, likelihood)); + -0.5 * (4.0 * std::log(boost::math::double_constants::two_pi) + + std::log(10.0 * 5.0 * 5.0 * 2.0) + 4.0 / 10.0), + likelihood, 1e-10); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, + maths::gaussianLogLikelihood(covariance, e2, likelihood)); CPPUNIT_ASSERT_DOUBLES_EQUAL( - -0.5 * (4.0 * std::log(boost::math::double_constants::two_pi) + std::log(10.0 * 5.0 * 5.0 * 2.0) + 2.0 / 5.0), - likelihood, - 1e-10); - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, maths::gaussianLogLikelihood(covariance, e3, likelihood)); + -0.5 * (4.0 * std::log(boost::math::double_constants::two_pi) + + std::log(10.0 * 5.0 * 5.0 * 2.0) + 2.0 / 5.0), + likelihood, 1e-10); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, + maths::gaussianLogLikelihood(covariance, e3, likelihood)); CPPUNIT_ASSERT_DOUBLES_EQUAL( - -0.5 * (4.0 * std::log(boost::math::double_constants::two_pi) + std::log(10.0 * 5.0 * 5.0 * 2.0) + 6.0 / 5.0), - likelihood, - 1e-10); - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, maths::gaussianLogLikelihood(covariance, e4, likelihood)); + -0.5 * (4.0 * std::log(boost::math::double_constants::two_pi) + + std::log(10.0 * 5.0 * 5.0 * 2.0) + 6.0 / 5.0), + likelihood, 1e-10); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, + maths::gaussianLogLikelihood(covariance, e4, likelihood)); CPPUNIT_ASSERT_DOUBLES_EQUAL( - -0.5 * (4.0 * std::log(boost::math::double_constants::two_pi) + std::log(10.0 * 5.0 * 5.0 * 2.0) + 12.0 / 2.0), - likelihood, - 1e-10); + -0.5 * (4.0 * std::log(boost::math::double_constants::two_pi) + + std::log(10.0 * 5.0 * 5.0 * 2.0) + 12.0 / 2.0), + likelihood, 1e-10); } } @@ -828,9 +865,12 @@ void CLinearAlgebraTest::testSampleGaussian() { maths::CVectorNx1 e3(e3_); maths::CVectorNx1 e4(e4_); maths::CSymmetricMatrixNxN covariance( - 10.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e1 / e1.euclidean()) + - 5.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e2 / e2.euclidean()) + - 5.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e3 / e3.euclidean())); + 10.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, + e1 / e1.euclidean()) + + 5.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, + e2 / e2.euclidean()) + + 5.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, + e3 / e3.euclidean())); std::vector> samples; maths::sampleGaussian(100, mean, covariance, samples); @@ -846,16 +886,21 @@ void CLinearAlgebraTest::testSampleGaussian() { LOG_DEBUG(<< "mean = " << mean); LOG_DEBUG(<< "covariance = " << covariance); LOG_DEBUG(<< "sample mean = " << maths::CBasicStatistics::mean(covariances)); - LOG_DEBUG(<< "sample covariance = " << maths::CBasicStatistics::maximumLikelihoodCovariances(covariances)); + LOG_DEBUG(<< "sample covariance = " + << maths::CBasicStatistics::maximumLikelihoodCovariances(covariances)); - maths::CVectorNx1 meanError = maths::CVectorNx1(mean) - maths::CBasicStatistics::mean(covariances); + maths::CVectorNx1 meanError = + maths::CVectorNx1(mean) - maths::CBasicStatistics::mean(covariances); maths::CSymmetricMatrixNxN covarianceError = - maths::CSymmetricMatrixNxN(covariance) - maths::CBasicStatistics::maximumLikelihoodCovariances(covariances); + maths::CSymmetricMatrixNxN(covariance) - + maths::CBasicStatistics::maximumLikelihoodCovariances(covariances); LOG_DEBUG(<< "|error| / |mean| = " << meanError.euclidean() / mean.euclidean()); - LOG_DEBUG(<< "|error| / |covariance| = " << covarianceError.frobenius() / covariance.frobenius()); + LOG_DEBUG(<< "|error| / |covariance| = " + << covarianceError.frobenius() / covariance.frobenius()); CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, meanError.euclidean() / mean.euclidean(), 1e-10); - CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, covarianceError.frobenius() / covariance.frobenius(), 0.01); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + 0.0, covarianceError.frobenius() / covariance.frobenius(), 0.01); } // Construct a matrix whose eigenvalues and vectors are known. @@ -872,10 +917,14 @@ void CLinearAlgebraTest::testSampleGaussian() { maths::CVectorNx1 e3(e3_); maths::CVectorNx1 e4(e4_); maths::CSymmetricMatrixNxN covariance( - 10.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e1 / e1.euclidean()) + - 5.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e2 / e2.euclidean()) + - 5.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e3 / e3.euclidean()) + - 2.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e4 / e4.euclidean())); + 10.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, + e1 / e1.euclidean()) + + 5.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, + e2 / e2.euclidean()) + + 5.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, + e3 / e3.euclidean()) + + 2.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, + e4 / e4.euclidean())); std::vector> samples; maths::sampleGaussian(100, mean, covariance, samples); @@ -891,16 +940,21 @@ void CLinearAlgebraTest::testSampleGaussian() { LOG_DEBUG(<< "mean = " << mean); LOG_DEBUG(<< "covariance = " << covariance); LOG_DEBUG(<< "sample mean = " << maths::CBasicStatistics::mean(covariances)); - LOG_DEBUG(<< "sample covariance = " << maths::CBasicStatistics::maximumLikelihoodCovariances(covariances)); + LOG_DEBUG(<< "sample covariance = " + << maths::CBasicStatistics::maximumLikelihoodCovariances(covariances)); - maths::CVectorNx1 meanError = maths::CVectorNx1(mean) - maths::CBasicStatistics::mean(covariances); + maths::CVectorNx1 meanError = + maths::CVectorNx1(mean) - maths::CBasicStatistics::mean(covariances); maths::CSymmetricMatrixNxN covarianceError = - maths::CSymmetricMatrixNxN(covariance) - maths::CBasicStatistics::maximumLikelihoodCovariances(covariances); + maths::CSymmetricMatrixNxN(covariance) - + maths::CBasicStatistics::maximumLikelihoodCovariances(covariances); LOG_DEBUG(<< "|error| / |mean| = " << meanError.euclidean() / mean.euclidean()); - LOG_DEBUG(<< "|error| / |covariance| = " << covarianceError.frobenius() / covariance.frobenius()); + LOG_DEBUG(<< "|error| / |covariance| = " + << covarianceError.frobenius() / covariance.frobenius()); CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, meanError.euclidean() / mean.euclidean(), 1e-10); - CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, covarianceError.frobenius() / covariance.frobenius(), 0.02); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + 0.0, covarianceError.frobenius() / covariance.frobenius(), 0.02); } } @@ -911,19 +965,21 @@ void CLinearAlgebraTest::testLogDeterminant() { // Test the determinant (expected from octave). { - const double matrices[][3][3] = {{{0.25451, 0.52345, 0.61308}, {0.52345, 1.19825, 1.12804}, {0.61308, 1.12804, 1.78833}}, - {{0.83654, 0.24520, 0.80310}, {0.24520, 0.38368, 0.30554}, {0.80310, 0.30554, 0.78936}}, - {{0.73063, 0.87818, 0.85836}, {0.87818, 1.50305, 1.17931}, {0.85836, 1.17931, 1.05850}}, - {{0.38947, 0.61062, 0.34423}, {0.61062, 1.60437, 0.91664}, {0.34423, 0.91664, 0.52448}}, - {{1.79563, 1.78751, 2.17200}, {1.78751, 1.83443, 2.17340}, {2.17200, 2.17340, 2.62958}}, - {{0.57023, 0.47992, 0.71581}, {0.47992, 1.09182, 0.97989}, {0.71581, 0.97989, 1.32316}}, - {{2.31264, 0.72098, 2.38050}, {0.72098, 0.28103, 0.78025}, {2.38050, 0.78025, 2.49219}}, - {{0.83678, 0.45230, 0.74564}, {0.45230, 0.26482, 0.33491}, {0.74564, 0.33491, 1.29216}}, - {{0.84991, 0.85443, 0.36922}, {0.85443, 1.12737, 0.83074}, {0.36922, 0.83074, 1.01195}}, - {{0.27156, 0.26441, 0.29726}, {0.26441, 0.32388, 0.18895}, {0.29726, 0.18895, 0.47884}}}; + const double matrices[][3][3] = { + {{0.25451, 0.52345, 0.61308}, {0.52345, 1.19825, 1.12804}, {0.61308, 1.12804, 1.78833}}, + {{0.83654, 0.24520, 0.80310}, {0.24520, 0.38368, 0.30554}, {0.80310, 0.30554, 0.78936}}, + {{0.73063, 0.87818, 0.85836}, {0.87818, 1.50305, 1.17931}, {0.85836, 1.17931, 1.05850}}, + {{0.38947, 0.61062, 0.34423}, {0.61062, 1.60437, 0.91664}, {0.34423, 0.91664, 0.52448}}, + {{1.79563, 1.78751, 2.17200}, {1.78751, 1.83443, 2.17340}, {2.17200, 2.17340, 2.62958}}, + {{0.57023, 0.47992, 0.71581}, {0.47992, 1.09182, 0.97989}, {0.71581, 0.97989, 1.32316}}, + {{2.31264, 0.72098, 2.38050}, {0.72098, 0.28103, 0.78025}, {2.38050, 0.78025, 2.49219}}, + {{0.83678, 0.45230, 0.74564}, {0.45230, 0.26482, 0.33491}, {0.74564, 0.33491, 1.29216}}, + {{0.84991, 0.85443, 0.36922}, {0.85443, 1.12737, 0.83074}, {0.36922, 0.83074, 1.01195}}, + {{0.27156, 0.26441, 0.29726}, {0.26441, 0.32388, 0.18895}, {0.29726, 0.18895, 0.47884}}}; const double expected[] = { - 5.1523e-03, 6.7423e-04, 4.5641e-04, 1.5880e-04, 3.1654e-06, 8.5319e-02, 2.0840e-03, 6.8008e-03, 1.4755e-02, 2.6315e-05}; + 5.1523e-03, 6.7423e-04, 4.5641e-04, 1.5880e-04, 3.1654e-06, + 8.5319e-02, 2.0840e-03, 6.8008e-03, 1.4755e-02, 2.6315e-05}; for (std::size_t i = 0u; i < boost::size(matrices); ++i) { maths::CSymmetricMatrixNxN M(matrices[i]); @@ -931,7 +987,8 @@ void CLinearAlgebraTest::testLogDeterminant() { maths::logDeterminant(M, logDeterminant); LOG_DEBUG(<< "expected |M| = " << expected[i]); LOG_DEBUG(<< "got |M| = " << std::exp(logDeterminant)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expected[i], std::exp(logDeterminant), 1e-4 * expected[i]); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expected[i], std::exp(logDeterminant), + 1e-4 * expected[i]); } } @@ -945,10 +1002,15 @@ void CLinearAlgebraTest::testLogDeterminant() { maths::CVectorNx1 e2(e2_); maths::CVectorNx1 e3(e3_); maths::CVectorNx1 e4(e4_); - maths::CSymmetricMatrixNxN M(10.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e1 / e1.euclidean()) + - 5.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e2 / e2.euclidean()) + - 5.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e3 / e3.euclidean()) + - 2.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, e4 / e4.euclidean())); + maths::CSymmetricMatrixNxN M( + 10.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, + e1 / e1.euclidean()) + + 5.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, + e2 / e2.euclidean()) + + 5.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, + e3 / e3.euclidean()) + + 2.0 * maths::CSymmetricMatrixNxN(maths::E_OuterProduct, + e4 / e4.euclidean())); double logDeterminant; maths::logDeterminant(M, logDeterminant); CPPUNIT_ASSERT_DOUBLES_EQUAL(std::log(10.0 * 5.0 * 5.0 * 2.0), logDeterminant, 1e-10); @@ -1012,7 +1074,8 @@ void CLinearAlgebraTest::testProjected() { Eigen::MatrixXd projectedVector = maths::projectedVector(subspace, vector); LOG_DEBUG(<< "projectedMatrix =\n" << projectedMatrix); LOG_DEBUG(<< "projectedVector =\n" << projectedVector); - CPPUNIT_ASSERT_EQUAL(std::string(" 1 2.4 3.1\n2.4 1.2 8.3\n3.1 8.3 0.9"), print(projectedMatrix)); + CPPUNIT_ASSERT_EQUAL(std::string(" 1 2.4 3.1\n2.4 1.2 8.3\n3.1 8.3 0.9"), + print(projectedMatrix)); CPPUNIT_ASSERT_EQUAL(std::string("3.4\n0.3\n5.7"), print(projectedVector)); } } @@ -1026,7 +1089,10 @@ void CLinearAlgebraTest::testPersist() { // bad input produces an error. { - double matrix_[][4] = {{1.0, 2.1, 1.5, 0.1}, {2.1, 2.2, 3.7, 0.6}, {1.5, 3.7, 0.4, 8.1}, {0.1, 0.6, 8.1, 4.3}}; + double matrix_[][4] = {{1.0, 2.1, 1.5, 0.1}, + {2.1, 2.2, 3.7, 0.6}, + {1.5, 3.7, 0.4, 8.1}, + {0.1, 0.6, 8.1, 4.3}}; maths::CSymmetricMatrixNxN matrix(matrix_); @@ -1078,24 +1144,29 @@ void CLinearAlgebraTest::testPersist() { CppUnit::Test* CLinearAlgebraTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CLinearAlgebraTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CLinearAlgebraTest::testSymmetricMatrixNxN", - &CLinearAlgebraTest::testSymmetricMatrixNxN)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CLinearAlgebraTest::testVectorNx1", &CLinearAlgebraTest::testVectorNx1)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CLinearAlgebraTest::testSymmetricMatrix", &CLinearAlgebraTest::testSymmetricMatrix)); - suiteOfTests->addTest(new CppUnit::TestCaller("CLinearAlgebraTest::testVector", &CLinearAlgebraTest::testVector)); - suiteOfTests->addTest(new CppUnit::TestCaller("CLinearAlgebraTest::testNorms", &CLinearAlgebraTest::testNorms)); - suiteOfTests->addTest(new CppUnit::TestCaller("CLinearAlgebraTest::testUtils", &CLinearAlgebraTest::testUtils)); - suiteOfTests->addTest(new CppUnit::TestCaller("CLinearAlgebraTest::testGaussianLogLikelihood", - &CLinearAlgebraTest::testGaussianLogLikelihood)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CLinearAlgebraTest::testSampleGaussian", &CLinearAlgebraTest::testSampleGaussian)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CLinearAlgebraTest::testLogDeterminant", &CLinearAlgebraTest::testLogDeterminant)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CLinearAlgebraTest::testProjected", &CLinearAlgebraTest::testProjected)); - suiteOfTests->addTest(new CppUnit::TestCaller("CLinearAlgebraTest::testPersist", &CLinearAlgebraTest::testPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLinearAlgebraTest::testSymmetricMatrixNxN", &CLinearAlgebraTest::testSymmetricMatrixNxN)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLinearAlgebraTest::testVectorNx1", &CLinearAlgebraTest::testVectorNx1)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLinearAlgebraTest::testSymmetricMatrix", &CLinearAlgebraTest::testSymmetricMatrix)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLinearAlgebraTest::testVector", &CLinearAlgebraTest::testVector)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLinearAlgebraTest::testNorms", &CLinearAlgebraTest::testNorms)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLinearAlgebraTest::testUtils", &CLinearAlgebraTest::testUtils)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLinearAlgebraTest::testGaussianLogLikelihood", + &CLinearAlgebraTest::testGaussianLogLikelihood)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLinearAlgebraTest::testSampleGaussian", &CLinearAlgebraTest::testSampleGaussian)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLinearAlgebraTest::testLogDeterminant", &CLinearAlgebraTest::testLogDeterminant)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLinearAlgebraTest::testProjected", &CLinearAlgebraTest::testProjected)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLinearAlgebraTest::testPersist", &CLinearAlgebraTest::testPersist)); return suiteOfTests; } diff --git a/lib/maths/unittest/CLogNormalMeanPrecConjugateTest.cc b/lib/maths/unittest/CLogNormalMeanPrecConjugateTest.cc index 5b0ca3c928..79f26ab4be 100644 --- a/lib/maths/unittest/CLogNormalMeanPrecConjugateTest.cc +++ b/lib/maths/unittest/CLogNormalMeanPrecConjugateTest.cc @@ -43,8 +43,9 @@ using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumula using TMeanVarAccumulator = maths::CBasicStatistics::SSampleMeanVar::TAccumulator; using CLogNormalMeanPrecConjugate = CPriorTestInterfaceMixin; -CLogNormalMeanPrecConjugate -makePrior(maths_t::EDataType dataType = maths_t::E_ContinuousData, const double& offset = 0.0, const double& decayRate = 0.0) { +CLogNormalMeanPrecConjugate makePrior(maths_t::EDataType dataType = maths_t::E_ContinuousData, + const double& offset = 0.0, + const double& decayRate = 0.0) { return CLogNormalMeanPrecConjugate::nonInformativePrior(dataType, offset, decayRate, 0.0); } } @@ -104,9 +105,11 @@ void CLogNormalMeanPrecConjugateTest::testMultipleUpdate() { maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); for (std::size_t j = 0u; j < scaledSamples.size(); ++j) { - filter1.addSamples(weightStyle, TDouble1Vec(1, scaledSamples[j]), TDouble4Vec1Vec(1, TDouble4Vec(1, 2.0))); + filter1.addSamples(weightStyle, TDouble1Vec(1, scaledSamples[j]), + TDouble4Vec1Vec(1, TDouble4Vec(1, 2.0))); } - filter2.addSamples(weightStyle, scaledSamples, TDouble4Vec1Vec(scaledSamples.size(), TDouble4Vec(1, 2.0))); + filter2.addSamples(weightStyle, scaledSamples, + TDouble4Vec1Vec(scaledSamples.size(), TDouble4Vec(1, 2.0))); LOG_DEBUG(<< filter1.print()); LOG_DEBUG(<< "vs"); @@ -168,7 +171,8 @@ void CLogNormalMeanPrecConjugateTest::testPropagation() { double propagatedMean = filter.normalMean(); double propagatedPrecision = filter.normalPrecision(); - LOG_DEBUG(<< "mean = " << mean << ", precision = " << precision << ", propagatedMean = " << propagatedMean + LOG_DEBUG(<< "mean = " << mean << ", precision = " << precision + << ", propagatedMean = " << propagatedMean << ", propagatedPrecision = " << propagatedPrecision); CPPUNIT_ASSERT_DOUBLES_EQUAL(mean, propagatedMean, eps); @@ -188,7 +192,8 @@ void CLogNormalMeanPrecConjugateTest::testMeanEstimation() { const double decayRates[] = {0.0, 0.001, 0.01}; const unsigned int nTests = 500u; - const double testIntervals[] = {50.0, 60.0, 70.0, 80.0, 85.0, 90.0, 95.0, 99.0}; + const double testIntervals[] = {50.0, 60.0, 70.0, 80.0, + 85.0, 90.0, 95.0, 99.0}; for (size_t i = 0; i < boost::size(decayRates); ++i) { test::CRandomNumbers rng; @@ -202,7 +207,8 @@ void CLogNormalMeanPrecConjugateTest::testMeanEstimation() { TDoubleVec samples; rng.generateLogNormalSamples(location, squareScale, 500, samples); - CLogNormalMeanPrecConjugate filter(makePrior(maths_t::E_ContinuousData, 0.0, decayRates[i])); + CLogNormalMeanPrecConjugate filter( + makePrior(maths_t::E_ContinuousData, 0.0, decayRates[i])); for (std::size_t j = 0u; j < samples.size(); ++j) { filter.addSamples(TDouble1Vec(1, samples[j])); @@ -210,8 +216,10 @@ void CLogNormalMeanPrecConjugateTest::testMeanEstimation() { } for (size_t j = 0u; j < boost::size(testIntervals); ++j) { - TDoubleDoublePr confidenceInterval = filter.confidenceIntervalNormalMean(testIntervals[j]); - if (location < confidenceInterval.first || location > confidenceInterval.second) { + TDoubleDoublePr confidenceInterval = + filter.confidenceIntervalNormalMean(testIntervals[j]); + if (location < confidenceInterval.first || + location > confidenceInterval.second) { ++errors[j]; } } @@ -220,7 +228,8 @@ void CLogNormalMeanPrecConjugateTest::testMeanEstimation() { for (size_t j = 0; j < boost::size(testIntervals); ++j) { double interval = 100.0 * errors[j] / static_cast(nTests); - LOG_DEBUG(<< "interval = " << interval << ", expectedInterval = " << (100.0 - testIntervals[j])); + LOG_DEBUG(<< "interval = " << interval + << ", expectedInterval = " << (100.0 - testIntervals[j])); // If the decay rate is zero the intervals should be accurate. // Otherwise, they should be an upper bound. @@ -246,7 +255,8 @@ void CLogNormalMeanPrecConjugateTest::testPrecisionEstimation() { const double decayRates[] = {0.0, 0.001, 0.01}; const unsigned int nTests = 500u; - const double testIntervals[] = {50.0, 60.0, 70.0, 80.0, 85.0, 90.0, 95.0, 99.0}; + const double testIntervals[] = {50.0, 60.0, 70.0, 80.0, + 85.0, 90.0, 95.0, 99.0}; for (size_t i = 0; i < boost::size(decayRates); ++i) { test::CRandomNumbers rng; @@ -261,7 +271,8 @@ void CLogNormalMeanPrecConjugateTest::testPrecisionEstimation() { TDoubleVec samples; rng.generateLogNormalSamples(location, squareScale, 500, samples); - CLogNormalMeanPrecConjugate filter(makePrior(maths_t::E_ContinuousData, 0.0, decayRates[i])); + CLogNormalMeanPrecConjugate filter( + makePrior(maths_t::E_ContinuousData, 0.0, decayRates[i])); for (std::size_t j = 0u; j < samples.size(); ++j) { filter.addSamples(TDouble1Vec(1, samples[j])); @@ -269,9 +280,11 @@ void CLogNormalMeanPrecConjugateTest::testPrecisionEstimation() { } for (size_t j = 0; j < boost::size(testIntervals); ++j) { - TDoubleDoublePr confidenceInterval = filter.confidenceIntervalNormalPrecision(testIntervals[j]); + TDoubleDoublePr confidenceInterval = + filter.confidenceIntervalNormalPrecision(testIntervals[j]); - if (precision < confidenceInterval.first || precision > confidenceInterval.second) { + if (precision < confidenceInterval.first || + precision > confidenceInterval.second) { ++errors[j]; } } @@ -280,7 +293,8 @@ void CLogNormalMeanPrecConjugateTest::testPrecisionEstimation() { for (size_t j = 0; j < boost::size(testIntervals); ++j) { double interval = 100.0 * errors[j] / static_cast(nTests); - LOG_DEBUG(<< "interval = " << interval << ", expectedInterval = " << (100.0 - testIntervals[j])); + LOG_DEBUG(<< "interval = " << interval + << ", expectedInterval = " << (100.0 - testIntervals[j])); // If the decay rate is zero the intervals should be accurate. // Otherwise, they should be an upper bound. @@ -313,17 +327,16 @@ void CLogNormalMeanPrecConjugateTest::testMarginalLikelihood() { filter.addSamples(samples); maths_t::ESampleWeightStyle weightStyles[] = { - maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight, maths_t::E_SampleCountWeight}; + maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight, + maths_t::E_SampleCountWeight}; double weights[] = {0.1, 1.0, 10.0}; for (std::size_t i = 0u; i < boost::size(weightStyles); ++i) { for (std::size_t j = 0u; j < boost::size(weights); ++j) { double lb, ub; - filter.minusLogJointCdf(maths_t::TWeightStyleVec(1, weightStyles[i]), - TDouble1Vec(1, 10000.0), - TDouble4Vec1Vec(1, TDouble4Vec(1, weights[j])), - lb, - ub); + filter.minusLogJointCdf( + maths_t::TWeightStyleVec(1, weightStyles[i]), TDouble1Vec(1, 10000.0), + TDouble4Vec1Vec(1, TDouble4Vec(1, weights[j])), lb, ub); LOG_DEBUG(<< "-log(c.d.f) = " << (lb + ub) / 2.0); CPPUNIT_ASSERT(lb >= 0.0); CPPUNIT_ASSERT(ub >= 0.0); @@ -350,7 +363,8 @@ void CLogNormalMeanPrecConjugateTest::testMarginalLikelihood() { rng.generateLogNormalSamples(location, squareScale, numberSamples[i], samples); for (size_t j = 0; j < boost::size(decayRates); ++j) { - CLogNormalMeanPrecConjugate filter(makePrior(maths_t::E_ContinuousData, 0.0, decayRates[j])); + CLogNormalMeanPrecConjugate filter( + makePrior(maths_t::E_ContinuousData, 0.0, decayRates[j])); for (std::size_t k = 0u; k < samples.size(); ++k) { filter.addSamples(TDouble1Vec(1, samples[k])); @@ -361,7 +375,8 @@ void CLogNormalMeanPrecConjugateTest::testMarginalLikelihood() { // c.d.f. at a range of deltas from the true location. const double eps = 1e-4; - double deltas[] = {-5.0, -4.0, -3.0, -2.0, -1.0, -0.5, 0.0, 0.5, 1.0, 2.0, 3.0, 4.0, 5.0}; + double deltas[] = {-5.0, -4.0, -3.0, -2.0, -1.0, -0.5, 0.0, + 0.5, 1.0, 2.0, 3.0, 4.0, 5.0}; for (size_t k = 0; k < boost::size(deltas); ++k) { double x = std::exp(location + deltas[k] * std::sqrt(squareScale)); @@ -370,7 +385,8 @@ void CLogNormalMeanPrecConjugateTest::testMarginalLikelihood() { LOG_DEBUG(<< "number = " << numberSamples[i] << ", sample = " << sample[0]); double logLikelihood = 0.0; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter.jointLogMarginalLikelihood(sample, logLikelihood)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, + filter.jointLogMarginalLikelihood(sample, logLikelihood)); double pdf = std::exp(logLikelihood); double lowerBound = 0.0, upperBound = 0.0; @@ -420,19 +436,23 @@ void CLogNormalMeanPrecConjugateTest::testMarginalLikelihood() { TDouble1Vec sample(1, samples[i]); filter.addSamples(sample); double logLikelihood = 0.0; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter.jointLogMarginalLikelihood(sample, logLikelihood)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, + filter.jointLogMarginalLikelihood(sample, logLikelihood)); differentialEntropy -= logLikelihood; } differentialEntropy /= static_cast(samples.size()); - LOG_DEBUG(<< "differentialEntropy = " << differentialEntropy << ", expectedDifferentialEntropy = " << expectedDifferentialEntropy); + LOG_DEBUG(<< "differentialEntropy = " << differentialEntropy + << ", expectedDifferentialEntropy = " << expectedDifferentialEntropy); CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedDifferentialEntropy, differentialEntropy, 5e-3); } { - const double varianceScales[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.2, 1.5, 2.0, 2.5, 3.0, 4.0, 5.0}; + const double varianceScales[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, + 0.7, 0.8, 0.9, 1.0, 1.2, 1.5, + 2.0, 2.5, 3.0, 4.0, 5.0}; boost::math::lognormal_distribution<> logNormal(location, std::sqrt(squareScale)); CLogNormalMeanPrecConjugate filter(makePrior()); @@ -440,7 +460,8 @@ void CLogNormalMeanPrecConjugateTest::testMarginalLikelihood() { rng.generateLogNormalSamples(location, squareScale, 1000, samples); filter.addSamples(samples); - const double percentages[] = {5.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 95.0}; + const double percentages[] = {5.0, 10.0, 20.0, 30.0, 40.0, + 50.0, 60.0, 70.0, 80.0, 95.0}; { // Test that marginal likelihood confidence intervals are @@ -449,9 +470,12 @@ void CLogNormalMeanPrecConjugateTest::testMarginalLikelihood() { TMeanAccumulator error; for (std::size_t i = 0u; i < boost::size(percentages); ++i) { double q1, q2; - filter.marginalLikelihoodQuantileForTest(50.0 - percentages[i] / 2.0, 1e-3, q1); - filter.marginalLikelihoodQuantileForTest(50.0 + percentages[i] / 2.0, 1e-3, q2); - TDoubleDoublePr interval = filter.marginalLikelihoodConfidenceInterval(percentages[i]); + filter.marginalLikelihoodQuantileForTest(50.0 - percentages[i] / 2.0, + 1e-3, q1); + filter.marginalLikelihoodQuantileForTest(50.0 + percentages[i] / 2.0, + 1e-3, q2); + TDoubleDoublePr interval = + filter.marginalLikelihoodConfidenceInterval(percentages[i]); LOG_DEBUG(<< "[q1, q2] = [" << q1 << ", " << q2 << "]" << ", interval = " << core::CContainerPrinter::print(interval)); CPPUNIT_ASSERT_DOUBLES_EQUAL(q1, interval.first, 1e-3); @@ -470,16 +494,25 @@ void CLogNormalMeanPrecConjugateTest::testMarginalLikelihood() { double shift = std::log(1.0 + vs * (std::exp(squareScale) - 1.0)) - squareScale; double shiftedLocation = location - 0.5 * shift; double shiftedSquareScale = squareScale + shift; - boost::math::lognormal_distribution<> scaledLogNormal(shiftedLocation, std::sqrt(shiftedSquareScale)); - LOG_DEBUG(<< "*** vs = " << boost::math::variance(scaledLogNormal) / boost::math::variance(logNormal) << " ***"); + boost::math::lognormal_distribution<> scaledLogNormal( + shiftedLocation, std::sqrt(shiftedSquareScale)); + LOG_DEBUG(<< "*** vs = " + << boost::math::variance(scaledLogNormal) / + boost::math::variance(logNormal) + << " ***"); for (std::size_t j = 0u; j < boost::size(percentages); ++j) { - double q1 = boost::math::quantile(scaledLogNormal, (50.0 - percentages[j] / 2.0) / 100.0); - double q2 = boost::math::quantile(scaledLogNormal, (50.0 + percentages[j] / 2.0) / 100.0); + double q1 = boost::math::quantile( + scaledLogNormal, (50.0 - percentages[j] / 2.0) / 100.0); + double q2 = boost::math::quantile( + scaledLogNormal, (50.0 + percentages[j] / 2.0) / 100.0); TDoubleDoublePr interval = filter.marginalLikelihoodConfidenceInterval( - percentages[j], maths_t::TWeightStyleVec(1, maths_t::E_SampleCountVarianceScaleWeight), TDouble4Vec(1, vs)); + percentages[j], + maths_t::TWeightStyleVec(1, maths_t::E_SampleCountVarianceScaleWeight), + TDouble4Vec(1, vs)); LOG_DEBUG(<< "[q1, q2] = [" << q1 << ", " << q2 << "]" << ", interval = " << core::CContainerPrinter::print(interval)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(q1, interval.first, std::max(0.5, 0.2 * q1)); + CPPUNIT_ASSERT_DOUBLES_EQUAL(q1, interval.first, + std::max(0.5, 0.2 * q1)); CPPUNIT_ASSERT_DOUBLES_EQUAL(q2, interval.second, 0.1 * q2); error.add(std::fabs(interval.first - q1) / q1); error.add(std::fabs(interval.second - q2) / q2); @@ -509,7 +542,8 @@ void CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodMean() { for (std::size_t i = 0u; i < boost::size(locations); ++i) { for (std::size_t j = 0u; j < boost::size(squareScales); ++j) { - LOG_DEBUG(<< "*** location = " << locations[i] << ", squareScale = " << squareScales[j] << " ***"); + LOG_DEBUG(<< "*** location = " << locations[i] + << ", squareScale = " << squareScales[j] << " ***"); CLogNormalMeanPrecConjugate filter(makePrior()); @@ -529,12 +563,15 @@ void CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodMean() { CPPUNIT_ASSERT(filter.marginalLikelihoodMeanForTest(expectedMean)); if (k % 10 == 0) { - LOG_DEBUG(<< "marginalLikelihoodMean = " << filter.marginalLikelihoodMean() << ", expectedMean = " << expectedMean); + LOG_DEBUG(<< "marginalLikelihoodMean = " << filter.marginalLikelihoodMean() + << ", expectedMean = " << expectedMean); } - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMean, filter.marginalLikelihoodMean(), 0.35 * expectedMean); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + expectedMean, filter.marginalLikelihoodMean(), 0.35 * expectedMean); - relativeError.add(std::fabs(filter.marginalLikelihoodMean() - expectedMean) / expectedMean); + relativeError.add(std::fabs(filter.marginalLikelihoodMean() - expectedMean) / + expectedMean); } LOG_DEBUG(<< "relativeError = " << maths::CBasicStatistics::mean(relativeError)); @@ -553,15 +590,19 @@ void CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodMode() { const double locations[] = {0.1, 1.0, 3.0}; const double squareScales[] = {0.1, 1.0, 3.0}; - const double varianceScales[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.2, 1.5, 2.0, 2.5, 3.0, 4.0, 5.0}; + const double varianceScales[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, + 0.7, 0.8, 0.9, 1.0, 1.2, 1.5, + 2.0, 2.5, 3.0, 4.0, 5.0}; test::CRandomNumbers rng; for (std::size_t i = 0u; i < boost::size(locations); ++i) { for (std::size_t j = 0u; j < boost::size(squareScales); ++j) { - LOG_DEBUG(<< "*** location = " << locations[i] << ", squareScale = " << squareScales[j] << " ***"); + LOG_DEBUG(<< "*** location = " << locations[i] + << ", squareScale = " << squareScales[j] << " ***"); - boost::math::lognormal_distribution<> logNormal(locations[i], std::sqrt(squareScales[j])); + boost::math::lognormal_distribution<> logNormal( + locations[i], std::sqrt(squareScales[j])); CLogNormalMeanPrecConjugate filter(makePrior()); TDoubleVec samples; @@ -574,17 +615,25 @@ void CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodMode() { for (std::size_t k = 0u; k < boost::size(varianceScales); ++k) { double vs = varianceScales[k]; weight[0] = vs; - double shift = std::log(1.0 + vs * (std::exp(squareScales[j]) - 1.0)) - squareScales[j]; + double shift = std::log(1.0 + vs * (std::exp(squareScales[j]) - 1.0)) - + squareScales[j]; double shiftedLocation = locations[i] - 0.5 * shift; double shiftedSquareScale = squareScales[j] + shift; - boost::math::lognormal_distribution<> scaledLogNormal(shiftedLocation, std::sqrt(shiftedSquareScale)); + boost::math::lognormal_distribution<> scaledLogNormal( + shiftedLocation, std::sqrt(shiftedSquareScale)); double expectedMode = boost::math::mode(scaledLogNormal); - LOG_DEBUG(<< "dm = " << boost::math::mean(scaledLogNormal) - boost::math::mean(logNormal) - << ", vs = " << boost::math::variance(scaledLogNormal) / boost::math::variance(logNormal) - << ", marginalLikelihoodMode = " << filter.marginalLikelihoodMode(weightStyle, weight) + LOG_DEBUG(<< "dm = " + << boost::math::mean(scaledLogNormal) - boost::math::mean(logNormal) + << ", vs = " + << boost::math::variance(scaledLogNormal) / + boost::math::variance(logNormal) + << ", marginalLikelihoodMode = " + << filter.marginalLikelihoodMode(weightStyle, weight) << ", expectedMode = " << expectedMode); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMode, filter.marginalLikelihoodMode(weightStyle, weight), 1.0); - error.add(std::fabs(filter.marginalLikelihoodMode(weightStyle, weight) - expectedMode)); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + expectedMode, filter.marginalLikelihoodMode(weightStyle, weight), 1.0); + error.add(std::fabs(filter.marginalLikelihoodMode(weightStyle, weight) - + expectedMode)); } LOG_DEBUG(<< "error = " << maths::CBasicStatistics::mean(error)); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(error) < 0.26); @@ -608,7 +657,8 @@ void CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodVariance() { for (std::size_t i = 0u; i < boost::size(locations); ++i) { for (std::size_t j = 0u; j < boost::size(squareScales); ++j) { - LOG_DEBUG(<< "*** location = " << locations[i] << ", squareScale = " << squareScales[j] << " ***"); + LOG_DEBUG(<< "*** location = " << locations[i] + << ", squareScale = " << squareScales[j] << " ***"); CLogNormalMeanPrecConjugate filter(makePrior()); @@ -628,11 +678,13 @@ void CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodVariance() { CPPUNIT_ASSERT(filter.marginalLikelihoodVarianceForTest(expectedVariance)); if (k % 10 == 0) { - LOG_DEBUG(<< "marginalLikelihoodVariance = " << filter.marginalLikelihoodVariance() + LOG_DEBUG(<< "marginalLikelihoodVariance = " + << filter.marginalLikelihoodVariance() << ", expectedVariance = " << expectedVariance); } - relativeError.add(std::fabs(filter.marginalLikelihoodVariance() - expectedVariance) / expectedVariance); + relativeError.add(std::fabs(filter.marginalLikelihoodVariance() - expectedVariance) / + expectedVariance); } LOG_DEBUG(<< "relativeError = " << maths::CBasicStatistics::mean(relativeError)); @@ -693,13 +745,15 @@ void CLogNormalMeanPrecConjugateTest::testSampleMarginalLikelihood() { sampledMoments = std::for_each(sampled.begin(), sampled.end(), sampledMoments); CPPUNIT_ASSERT_EQUAL(numberSampled, sampled.size()); - LOG_DEBUG(<< "expectedMean = " << filter.marginalLikelihoodMean() - << ", sampledMean = " << maths::CBasicStatistics::mean(sampledMoments)); - LOG_DEBUG(<< "expectedVar = " << filter.marginalLikelihoodVariance() - << ", sampledVar = " << maths::CBasicStatistics::variance(sampledMoments)); + LOG_DEBUG(<< "expectedMean = " << filter.marginalLikelihoodMean() << ", sampledMean = " + << maths::CBasicStatistics::mean(sampledMoments)); + LOG_DEBUG(<< "expectedVar = " << filter.marginalLikelihoodVariance() << ", sampledVar = " + << maths::CBasicStatistics::variance(sampledMoments)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(filter.marginalLikelihoodMean(), maths::CBasicStatistics::mean(sampledMoments), 0.8); - meanMeanError.add(std::fabs(filter.marginalLikelihoodMean() - maths::CBasicStatistics::mean(sampledMoments))); + CPPUNIT_ASSERT_DOUBLES_EQUAL(filter.marginalLikelihoodMean(), + maths::CBasicStatistics::mean(sampledMoments), 0.8); + meanMeanError.add(std::fabs(filter.marginalLikelihoodMean() - + maths::CBasicStatistics::mean(sampledMoments))); } std::sort(sampled.begin(), sampled.end()); @@ -709,10 +763,13 @@ void CLogNormalMeanPrecConjugateTest::testSampleMarginalLikelihood() { double expectedQuantile; CPPUNIT_ASSERT(filter.marginalLikelihoodQuantileForTest(q, eps, expectedQuantile)); - LOG_DEBUG(<< "quantile = " << q << ", x_quantile = " << expectedQuantile << ", quantile range = [" << sampled[j - 1] << "," - << sampled[j] << "]"); - CPPUNIT_ASSERT(expectedQuantile >= sampled[j - 1] - 0.2 * std::max(6.0 - static_cast(i), 0.0)); - CPPUNIT_ASSERT(expectedQuantile <= sampled[j] + 1.2 * std::max(6.0 - static_cast(i), 0.0)); + LOG_DEBUG(<< "quantile = " << q << ", x_quantile = " << expectedQuantile << ", quantile range = [" + << sampled[j - 1] << "," << sampled[j] << "]"); + CPPUNIT_ASSERT(expectedQuantile >= + sampled[j - 1] - + 0.2 * std::max(6.0 - static_cast(i), 0.0)); + CPPUNIT_ASSERT(expectedQuantile <= + sampled[j] + 1.2 * std::max(6.0 - static_cast(i), 0.0)); } } @@ -753,7 +810,8 @@ void CLogNormalMeanPrecConjugateTest::testCdf() { CPPUNIT_ASSERT(filter.minusLogJointCdf(TDouble1Vec(1, -1.0), lowerBound, upperBound)); double f = (lowerBound + upperBound) / 2.0; - CPPUNIT_ASSERT(filter.minusLogJointCdfComplement(TDouble1Vec(1, -1.0), lowerBound, upperBound)); + CPPUNIT_ASSERT(filter.minusLogJointCdfComplement(TDouble1Vec(1, -1.0), + lowerBound, upperBound)); double fComplement = (lowerBound + upperBound) / 2.0; LOG_DEBUG(<< "log(F(x)) = " << -f << ", log(1 - F(x)) = " << fComplement); CPPUNIT_ASSERT_DOUBLES_EQUAL(std::log(std::numeric_limits::min()), -f, 1e-10); @@ -764,10 +822,11 @@ void CLogNormalMeanPrecConjugateTest::testCdf() { CPPUNIT_ASSERT(filter.minusLogJointCdf(TDouble1Vec(1, x), lowerBound, upperBound)); f = (lowerBound + upperBound) / 2.0; - CPPUNIT_ASSERT(filter.minusLogJointCdfComplement(TDouble1Vec(1, x), lowerBound, upperBound)); + CPPUNIT_ASSERT(filter.minusLogJointCdfComplement( + TDouble1Vec(1, x), lowerBound, upperBound)); fComplement = (lowerBound + upperBound) / 2.0; - LOG_DEBUG(<< "log(F(x)) = " << (f == 0.0 ? f : -f) - << ", log(1 - F(x)) = " << (fComplement == 0.0 ? fComplement : -fComplement)); + LOG_DEBUG(<< "log(F(x)) = " << (f == 0.0 ? f : -f) << ", log(1 - F(x)) = " + << (fComplement == 0.0 ? fComplement : -fComplement)); CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, std::exp(-f) + std::exp(-fComplement), 1e-10); } } @@ -795,7 +854,8 @@ void CLogNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples() { for (size_t i = 0; i < boost::size(means); ++i) { for (size_t j = 0; j < boost::size(squareScales); ++j) { - LOG_DEBUG(<< "means = " << means[i] << ", scale = " << std::sqrt(squareScales[j])); + LOG_DEBUG(<< "means = " << means[i] + << ", scale = " << std::sqrt(squareScales[j])); TDoubleVec samples; rng.generateLogNormalSamples(means[i], squareScales[j], 1000, samples); @@ -822,15 +882,19 @@ void CLogNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples() { double fx; filter.jointLogMarginalLikelihood(sample, fx); - double px = static_cast(std::lower_bound(likelihoods.begin(), likelihoods.end(), fx) - likelihoods.begin()) / + double px = static_cast(std::lower_bound(likelihoods.begin(), + likelihoods.end(), fx) - + likelihoods.begin()) / static_cast(likelihoods.size()); double lb, ub; filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, sample, lb, ub); - double ssd = std::sqrt(px * (1.0 - px) / static_cast(samples.size())); + double ssd = std::sqrt(px * (1.0 - px) / + static_cast(samples.size())); - LOG_DEBUG(<< "expected P(x) = " << px << ", actual P(x) = " << (lb + ub) / 2.0 << " sample sd = " << ssd); + LOG_DEBUG(<< "expected P(x) = " << px << ", actual P(x) = " + << (lb + ub) / 2.0 << " sample sd = " << ssd); CPPUNIT_ASSERT_DOUBLES_EQUAL(px, (lb + ub) / 2.0, 3.0 * ssd); @@ -840,7 +904,8 @@ void CLogNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples() { maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); for (std::size_t k = 0u; k < boost::size(vs); ++k) { - double mode = filter.marginalLikelihoodMode(weightStyle, TDouble4Vec(1, vs[k])); + double mode = filter.marginalLikelihoodMode(weightStyle, + TDouble4Vec(1, vs[k])); double ss[] = {0.9 * mode, 1.1 * mode}; LOG_DEBUG(<< "vs = " << vs[k] << ", mode = " << mode); @@ -850,57 +915,42 @@ void CLogNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples() { { filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(1, ss[0]), TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, weightStyle, TDouble1Vec(1, ss[0]), + TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); if (mode > 0.0) { - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - weightStyle, - TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, - ub, - tail); + filter.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, weightStyle, TDouble1Vec(ss, ss + 2), + TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); - filter.probabilityOfLessLikelySamples(maths_t::E_OneSidedBelow, - weightStyle, - TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, - ub, - tail); + filter.probabilityOfLessLikelySamples( + maths_t::E_OneSidedBelow, weightStyle, + TDouble1Vec(ss, ss + 2), + TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); - filter.probabilityOfLessLikelySamples(maths_t::E_OneSidedAbove, - weightStyle, - TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, - ub, - tail); + filter.probabilityOfLessLikelySamples( + maths_t::E_OneSidedAbove, weightStyle, + TDouble1Vec(ss, ss + 2), + TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } } if (mode > 0.0) { filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(1, ss[1]), TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, weightStyle, TDouble1Vec(1, ss[1]), + TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(ss, ss + 2), TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, weightStyle, TDouble1Vec(ss, ss + 2), + TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); - filter.probabilityOfLessLikelySamples(maths_t::E_OneSidedBelow, - weightStyle, - TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, - ub, - tail); + filter.probabilityOfLessLikelySamples( + maths_t::E_OneSidedBelow, weightStyle, TDouble1Vec(ss, ss + 2), + TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); - filter.probabilityOfLessLikelySamples(maths_t::E_OneSidedAbove, - weightStyle, - TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, - ub, - tail); + filter.probabilityOfLessLikelySamples( + maths_t::E_OneSidedAbove, weightStyle, TDouble1Vec(ss, ss + 2), + TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } } @@ -947,15 +997,18 @@ void CLogNormalMeanPrecConjugateTest::testAnomalyScore() { for (size_t i = 0; i < boost::size(means); ++i) { for (size_t j = 0; j < boost::size(squareScales); ++j) { - LOG_DEBUG(<< "mean = " << means[i] << ", scale = " << std::sqrt(squareScales[j])); + LOG_DEBUG(<< "mean = " << means[i] + << ", scale = " << std::sqrt(squareScales[j])); - boost::math::lognormal_distribution<> logNormal(means[i], std::sqrt(squareScales[j])); + boost::math::lognormal_distribution<> logNormal( + means[i], std::sqrt(squareScales[j])); TDoubleVec samples; rng.generateLogNormalSamples(means[i], squareScales[j], 500, samples); for (size_t k = 0; k < boost::size(decayRates); ++k) { - CLogNormalMeanPrecConjugate filter(makePrior(maths_t::E_ContinuousData, 0.0, decayRates[k])); + CLogNormalMeanPrecConjugate filter( + makePrior(maths_t::E_ContinuousData, 0.0, decayRates[k])); ++test; @@ -967,7 +1020,8 @@ void CLogNormalMeanPrecConjugateTest::testAnomalyScore() { TUIntVec candidateAnomalies; for (unsigned int time = 0; time < samples.size(); ++time) { double anomaly = - anomalies[std::find(boost::begin(anomalyTimes), boost::end(anomalyTimes), time) - boost::begin(anomalyTimes)] * + anomalies[std::find(boost::begin(anomalyTimes), boost::end(anomalyTimes), time) - + boost::begin(anomalyTimes)] * boost::math::standard_deviation(logNormal); double sample = samples[time] + anomaly; @@ -988,28 +1042,29 @@ void CLogNormalMeanPrecConjugateTest::testAnomalyScore() { x << "];\n"; scores << "];\n"; - file << x.str() << scores.str() << "plot(x" << test << ", score" << test << ");\n" + file << x.str() << scores.str() << "plot(x" << test << ", score" + << test << ");\n" << "input(\"Hit any key for next test\");\n\n"; TUIntVec falsePositives; std::set_difference(candidateAnomalies.begin(), candidateAnomalies.end(), - boost::begin(anomalyTimes), - boost::end(anomalyTimes), + boost::begin(anomalyTimes), boost::end(anomalyTimes), std::back_inserter(falsePositives)); - double falsePositiveRate = static_cast(falsePositives.size()) / static_cast(samples.size()); + double falsePositiveRate = static_cast(falsePositives.size()) / + static_cast(samples.size()); totalFalsePositiveRate += falsePositiveRate; TUIntVec positives; - std::set_intersection(candidateAnomalies.begin(), - candidateAnomalies.end(), - boost::begin(anomalyTimes), - boost::end(anomalyTimes), - std::back_inserter(positives)); + std::set_intersection( + candidateAnomalies.begin(), candidateAnomalies.end(), + boost::begin(anomalyTimes), boost::end(anomalyTimes), + std::back_inserter(positives)); - LOG_DEBUG(<< "falsePositiveRate = " << falsePositiveRate << ", positives = " << positives.size()); + LOG_DEBUG(<< "falsePositiveRate = " << falsePositiveRate + << ", positives = " << positives.size()); // False alarm rate should be less than 1%. CPPUNIT_ASSERT(falsePositiveRate <= 0.01); @@ -1060,8 +1115,10 @@ void CLogNormalMeanPrecConjugateTest::testOffset() { for (size_t i = 0; i < boost::size(dataTypes); ++i) { for (size_t j = 0; j < boost::size(offsets); ++j) { for (size_t k = 0; k < boost::size(decayRates); ++k) { - CLogNormalMeanPrecConjugate filter1(makePrior(dataTypes[i], offsets[j], decayRates[k])); - CLogNormalMeanPrecConjugate filter2(makePrior(dataTypes[i], 0.0, decayRates[k])); + CLogNormalMeanPrecConjugate filter1( + makePrior(dataTypes[i], offsets[j], decayRates[k])); + CLogNormalMeanPrecConjugate filter2( + makePrior(dataTypes[i], 0.0, decayRates[k])); for (std::size_t l = 0u; l < samples.size(); ++l) { double offsetSample = samples[l] - offsets[j]; @@ -1077,14 +1134,16 @@ void CLogNormalMeanPrecConjugateTest::testOffset() { double likelihood1; filter1.jointLogMarginalLikelihood(offsetSampleVec, likelihood1); double lowerBound1, upperBound1; - filter1.probabilityOfLessLikelySamples(maths_t::E_TwoSided, offsetSampleVec, lowerBound1, upperBound1); + filter1.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, offsetSampleVec, lowerBound1, upperBound1); CPPUNIT_ASSERT_EQUAL(lowerBound1, upperBound1); double probability1 = (lowerBound1 + upperBound1) / 2.0; double likelihood2; filter2.jointLogMarginalLikelihood(sample, likelihood2); double lowerBound2, upperBound2; - filter2.probabilityOfLessLikelySamples(maths_t::E_TwoSided, sample, lowerBound2, upperBound2); + filter2.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, sample, lowerBound2, upperBound2); CPPUNIT_ASSERT_EQUAL(lowerBound2, upperBound2); double probability2 = (lowerBound2 + upperBound2) / 2.0; @@ -1158,10 +1217,12 @@ void CLogNormalMeanPrecConjugateTest::testIntegerData() { } LOG_DEBUG(<< "meanLogLikelihood1 = " << maths::CBasicStatistics::mean(meanLogLikelihood1) - << ", meanLogLikelihood2 = " << maths::CBasicStatistics::mean(meanLogLikelihood2)); + << ", meanLogLikelihood2 = " + << maths::CBasicStatistics::mean(meanLogLikelihood2)); CPPUNIT_ASSERT_DOUBLES_EQUAL( - maths::CBasicStatistics::mean(meanLogLikelihood1), maths::CBasicStatistics::mean(meanLogLikelihood2), 0.05); + maths::CBasicStatistics::mean(meanLogLikelihood1), + maths::CBasicStatistics::mean(meanLogLikelihood2), 0.05); } } @@ -1194,14 +1255,16 @@ void CLogNormalMeanPrecConjugateTest::testIntegerData() { TDouble1Vec sample(1, x); double l1, u1; - CPPUNIT_ASSERT(filter1.probabilityOfLessLikelySamples(maths_t::E_TwoSided, sample, l1, u1)); + CPPUNIT_ASSERT(filter1.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, sample, l1, u1)); CPPUNIT_ASSERT_EQUAL(l1, u1); double p1 = (l1 + u1) / 2.0; meanProbability1.add(p1); sample[0] += uniform[k]; double l2, u2; - CPPUNIT_ASSERT(filter2.probabilityOfLessLikelySamples(maths_t::E_TwoSided, sample, l2, u2)); + CPPUNIT_ASSERT(filter2.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, sample, l2, u2)); CPPUNIT_ASSERT_EQUAL(l2, u2); double p2 = (l2 + u2) / 2.0; meanProbability2.add(p2); @@ -1247,7 +1310,8 @@ void CLogNormalMeanPrecConjugateTest::testLowVariationData() { TDoubleDoublePr interval = filter.marginalLikelihoodConfidenceInterval(68.0); double sigma = (interval.second - interval.first) / 2.0; - LOG_DEBUG(<< "68% confidence interval " << core::CContainerPrinter::print(interval) << ", approximate s.t.d. = " << sigma); + LOG_DEBUG(<< "68% confidence interval " << core::CContainerPrinter::print(interval) + << ", approximate s.t.d. = " << sigma); CPPUNIT_ASSERT_DOUBLES_EQUAL(1e-4, sigma / 430.5, 5e-5); } } @@ -1267,8 +1331,9 @@ void CLogNormalMeanPrecConjugateTest::testPersist() { maths::CLogNormalMeanPrecConjugate origFilter(makePrior()); for (std::size_t i = 0u; i < samples.size(); ++i) { - origFilter.addSamples( - maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), TDouble1Vec(1, samples[i]), TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0))); + origFilter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), + TDouble1Vec(1, samples[i]), + TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0))); } double decayRate = origFilter.decayRate(); uint64_t checksum = origFilter.checksum(); @@ -1287,14 +1352,13 @@ void CLogNormalMeanPrecConjugateTest::testPersist() { CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - maths::SDistributionRestoreParams params(maths_t::E_ContinuousData, - decayRate + 0.1, - maths::MINIMUM_CLUSTER_SPLIT_FRACTION, - maths::MINIMUM_CLUSTER_SPLIT_COUNT, - maths::MINIMUM_CATEGORY_COUNT); + maths::SDistributionRestoreParams params( + maths_t::E_ContinuousData, decayRate + 0.1, maths::MINIMUM_CLUSTER_SPLIT_FRACTION, + maths::MINIMUM_CLUSTER_SPLIT_COUNT, maths::MINIMUM_CATEGORY_COUNT); maths::CLogNormalMeanPrecConjugate restoredFilter(params, traverser); - LOG_DEBUG(<< "orig checksum = " << checksum << " restored checksum = " << restoredFilter.checksum()); + LOG_DEBUG(<< "orig checksum = " << checksum + << " restored checksum = " << restoredFilter.checksum()); CPPUNIT_ASSERT_EQUAL(checksum, restoredFilter.checksum()); // The XML representation of the new filter should be the same as the original @@ -1330,14 +1394,16 @@ void CLogNormalMeanPrecConjugateTest::testVarianceScale() { // Finally, we test update with scaled samples produces the // correct posterior. - maths_t::ESampleWeightStyle scales[] = {maths_t::E_SampleSeasonalVarianceScaleWeight, maths_t::E_SampleCountVarianceScaleWeight}; + maths_t::ESampleWeightStyle scales[] = {maths_t::E_SampleSeasonalVarianceScaleWeight, + maths_t::E_SampleCountVarianceScaleWeight}; for (std::size_t s = 0u; s < boost::size(scales); ++s) { const double location = 2.0; const double squareScale = 1.5; { boost::math::lognormal_distribution<> logNormal(location, std::sqrt(squareScale)); - LOG_DEBUG(<< "mean = " << boost::math::mean(logNormal) << ", variance = " << boost::math::variance(logNormal)); + LOG_DEBUG(<< "mean = " << boost::math::mean(logNormal) + << ", variance = " << boost::math::variance(logNormal)); } const double varianceScales[] = {0.20, 0.50, 0.75, 1.50, 2.00, 5.00}; @@ -1345,7 +1411,8 @@ void CLogNormalMeanPrecConjugateTest::testVarianceScale() { LOG_DEBUG(<< ""); LOG_DEBUG(<< "****** probabilityOfLessLikelySamples ******"); - const double percentiles[] = {10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0}; + const double percentiles[] = {10.0, 20.0, 30.0, 40.0, 50.0, + 60.0, 70.0, 80.0, 90.0}; const std::size_t nSamples[] = {10u, 20u, 40u, 80u, 1000u}; const std::size_t nScaledSamples = 50000u; @@ -1373,7 +1440,8 @@ void CLogNormalMeanPrecConjugateTest::testVarianceScale() { TDoubleVec unscaledPercentileErrors; { TDoubleVec unscaledSamples; - rng.generateLogNormalSamples(location, squareScale, nScaledSamples, unscaledSamples); + rng.generateLogNormalSamples(location, squareScale, + nScaledSamples, unscaledSamples); TDoubleVec probabilities; probabilities.reserve(nScaledSamples); @@ -1381,7 +1449,8 @@ void CLogNormalMeanPrecConjugateTest::testVarianceScale() { TDouble1Vec sample(1, unscaledSamples[j]); double lowerBound, upperBound; - CPPUNIT_ASSERT(filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, sample, lowerBound, upperBound)); + CPPUNIT_ASSERT(filter.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, sample, lowerBound, upperBound)); CPPUNIT_ASSERT_EQUAL(lowerBound, upperBound); double probability = (lowerBound + upperBound) / 2.0; probabilities.push_back(probability); @@ -1389,7 +1458,8 @@ void CLogNormalMeanPrecConjugateTest::testVarianceScale() { std::sort(probabilities.begin(), probabilities.end()); for (size_t j = 0; j < boost::size(percentiles); ++j) { - std::size_t index = static_cast(static_cast(nScaledSamples) * percentiles[j] / 100.0); + std::size_t index = static_cast( + static_cast(nScaledSamples) * percentiles[j] / 100.0); double error = std::fabs(probabilities[index] - percentiles[j] / 100.0); unscaledPercentileErrors.push_back(error); unscaledMeanPercentileError += error; @@ -1403,8 +1473,10 @@ void CLogNormalMeanPrecConjugateTest::testVarianceScale() { double ss = std::log(1.0 + varianceScales[j] * (std::exp(squareScale) - 1.0)); double shiftedLocation = location + (squareScale - ss) / 2.0; { - boost::math::lognormal_distribution<> logNormal(shiftedLocation, std::sqrt(ss)); - LOG_DEBUG(<< "mean = " << boost::math::mean(logNormal) << ", variance = " << boost::math::variance(logNormal)); + boost::math::lognormal_distribution<> logNormal(shiftedLocation, + std::sqrt(ss)); + LOG_DEBUG(<< "mean = " << boost::math::mean(logNormal) + << ", variance = " << boost::math::variance(logNormal)); } TDoubleVec scaledSamples; @@ -1415,13 +1487,11 @@ void CLogNormalMeanPrecConjugateTest::testVarianceScale() { for (std::size_t k = 0; k < scaledSamples.size(); ++k) { double lowerBound, upperBound; maths_t::ETail tail; - CPPUNIT_ASSERT(filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - maths_t::TWeightStyleVec(1, scales[s]), - TDouble1Vec(1, scaledSamples[k]), - TDouble4Vec1Vec(1, TDouble4Vec(1, varianceScales[j])), - lowerBound, - upperBound, - tail)); + CPPUNIT_ASSERT(filter.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, maths_t::TWeightStyleVec(1, scales[s]), + TDouble1Vec(1, scaledSamples[k]), + TDouble4Vec1Vec(1, TDouble4Vec(1, varianceScales[j])), + lowerBound, upperBound, tail)); CPPUNIT_ASSERT_EQUAL(lowerBound, upperBound); double probability = (lowerBound + upperBound) / 2.0; probabilities.push_back(probability); @@ -1430,12 +1500,15 @@ void CLogNormalMeanPrecConjugateTest::testVarianceScale() { double meanPercentileError = 0.0; for (size_t k = 0; k < boost::size(percentiles); ++k) { - std::size_t index = static_cast(static_cast(nScaledSamples) * percentiles[k] / 100.0); + std::size_t index = static_cast( + static_cast(nScaledSamples) * percentiles[k] / 100.0); double error = std::fabs(probabilities[index] - percentiles[k] / 100.0); meanPercentileError += error; - double threshold = percentileErrorTolerance + unscaledPercentileErrors[k]; + double threshold = percentileErrorTolerance + + unscaledPercentileErrors[k]; - LOG_DEBUG(<< "percentile = " << percentiles[k] << ", probability = " << probabilities[index] << ", error = " << error + LOG_DEBUG(<< "percentile = " << percentiles[k] << ", probability = " + << probabilities[index] << ", error = " << error << ", error threshold = " << threshold); CPPUNIT_ASSERT(error < threshold); @@ -1444,7 +1517,8 @@ void CLogNormalMeanPrecConjugateTest::testVarianceScale() { double threshold = meanPercentileErrorTolerance + unscaledMeanPercentileError; - LOG_DEBUG(<< "mean error = " << meanPercentileError << ", mean error threshold = " << threshold); + LOG_DEBUG(<< "mean error = " << meanPercentileError + << ", mean error threshold = " << threshold); CPPUNIT_ASSERT(meanPercentileError < threshold); @@ -1459,7 +1533,8 @@ void CLogNormalMeanPrecConjugateTest::testVarianceScale() { { double threshold = totalMeanPercentileErrorTolerance + totalUnscaledMeanPercentileError; LOG_DEBUG(<< "total unscaled mean error = " << totalUnscaledMeanPercentileError); - LOG_DEBUG(<< "total mean error = " << totalMeanPercentileError << ", total mean error threshold = " << threshold); + LOG_DEBUG(<< "total mean error = " << totalMeanPercentileError + << ", total mean error threshold = " << threshold); CPPUNIT_ASSERT(totalMeanPercentileError < threshold); } @@ -1473,8 +1548,12 @@ void CLogNormalMeanPrecConjugateTest::testVarianceScale() { double ss = std::log(1.0 + varianceScales[i] * (std::exp(squareScale) - 1.0)); double shiftedLocation = location + (squareScale - ss) / 2.0; - boost::math::lognormal_distribution<> logNormal(shiftedLocation, std::sqrt(ss)); - { LOG_DEBUG(<< "mean = " << boost::math::mean(logNormal) << ", variance = " << boost::math::variance(logNormal)); } + boost::math::lognormal_distribution<> logNormal(shiftedLocation, + std::sqrt(ss)); + { + LOG_DEBUG(<< "mean = " << boost::math::mean(logNormal) + << ", variance = " << boost::math::variance(logNormal)); + } double expectedDifferentialEntropy = maths::CTools::differentialEntropy(logNormal); CLogNormalMeanPrecConjugate filter(makePrior()); @@ -1490,11 +1569,12 @@ void CLogNormalMeanPrecConjugateTest::testVarianceScale() { for (std::size_t j = 0u; j < scaledSamples.size(); ++j) { double logLikelihood = 0.0; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, - filter.jointLogMarginalLikelihood(maths_t::TWeightStyleVec(1, scales[s]), - TDouble1Vec(1, scaledSamples[j]), - TDouble4Vec1Vec(1, TDouble4Vec(1, varianceScales[i])), - logLikelihood)); + CPPUNIT_ASSERT_EQUAL( + maths_t::E_FpNoErrors, + filter.jointLogMarginalLikelihood( + maths_t::TWeightStyleVec(1, scales[s]), + TDouble1Vec(1, scaledSamples[j]), + TDouble4Vec1Vec(1, TDouble4Vec(1, varianceScales[i])), logLikelihood)); differentialEntropy -= logLikelihood; } @@ -1503,7 +1583,8 @@ void CLogNormalMeanPrecConjugateTest::testVarianceScale() { LOG_DEBUG(<< "differentialEntropy = " << differentialEntropy << ", expectedDifferentialEntropy = " << expectedDifferentialEntropy); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedDifferentialEntropy, differentialEntropy, 0.5); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedDifferentialEntropy, + differentialEntropy, 0.5); } } @@ -1516,8 +1597,10 @@ void CLogNormalMeanPrecConjugateTest::testVarianceScale() { for (std::size_t s = 0u; s < boost::size(scales); ++s) { for (std::size_t t = 0u; t < boost::size(dataTypes); ++t) { - const double means[] = {0.1, 1.0, 10.0, 100.0, 1000.0, 100000.0, 1000000.0}; - const double variances[] = {0.1, 1.0, 10.0, 100.0, 1000.0, 100000.0, 1000000.0}; + const double means[] = {0.1, 1.0, 10.0, 100.0, + 1000.0, 100000.0, 1000000.0}; + const double variances[] = {0.1, 1.0, 10.0, 100.0, + 1000.0, 100000.0, 1000000.0}; const double varianceScales[] = {0.1, 0.5, 1.0, 2.0, 10.0, 100.0}; maths_t::TWeightStyleVec weightStyle(1, scales[s]); @@ -1550,9 +1633,11 @@ void CLogNormalMeanPrecConjugateTest::testVarianceScale() { double location = std::log(mean) - squareScale / 2.0; double precision = 1.0 / squareScale; { - boost::math::lognormal_distribution<> logNormal(location, std::sqrt(squareScale)); + boost::math::lognormal_distribution<> logNormal( + location, std::sqrt(squareScale)); LOG_DEBUG(<< ""); - LOG_DEBUG(<< "****** mean = " << boost::math::mean(logNormal) << ", variance = " << boost::math::variance(logNormal) + LOG_DEBUG(<< "****** mean = " << boost::math::mean(logNormal) + << ", variance = " << boost::math::variance(logNormal) << " ******"); LOG_DEBUG(<< "location = " << location << ", precision = " << precision); } @@ -1564,14 +1649,18 @@ void CLogNormalMeanPrecConjugateTest::testVarianceScale() { } LOG_DEBUG(<< "*** scale = " << scale << " ***"); - double scaledSquareScale = std::log(1.0 + variance * scale / (mean * mean)); + double scaledSquareScale = + std::log(1.0 + variance * scale / (mean * mean)); double scaledLocation = std::log(mean) - scaledSquareScale / 2.0; double scaledPrecision = 1.0 / scaledSquareScale; { - boost::math::lognormal_distribution<> logNormal(scaledLocation, std::sqrt(scaledSquareScale)); + boost::math::lognormal_distribution<> logNormal( + scaledLocation, std::sqrt(scaledSquareScale)); LOG_DEBUG(<< "scaled mean = " << boost::math::mean(logNormal) - << ", scaled variance = " << boost::math::variance(logNormal)); - LOG_DEBUG(<< "scaled location = " << scaledLocation << ", scaled precision = " << scaledPrecision); + << ", scaled variance = " + << boost::math::variance(logNormal)); + LOG_DEBUG(<< "scaled location = " << scaledLocation + << ", scaled precision = " << scaledPrecision); } TMeanAccumulator meanError; @@ -1583,17 +1672,23 @@ void CLogNormalMeanPrecConjugateTest::testVarianceScale() { weights.clear(); weights.resize(samples.size(), TDouble4Vec(1, 1.0)); filter.addSamples(weightStyle, samples, weights); - rng.generateLogNormalSamples(scaledLocation, scaledSquareScale, 200, samples); + rng.generateLogNormalSamples( + scaledLocation, scaledSquareScale, 200, samples); weights.clear(); weights.resize(samples.size(), TDouble4Vec(1, scale)); filter.addSamples(weightStyle, samples, weights); - boost::math::lognormal_distribution<> logNormal(filter.normalMean(), std::sqrt(1.0 / filter.normalPrecision())); + boost::math::lognormal_distribution<> logNormal( + filter.normalMean(), + std::sqrt(1.0 / filter.normalPrecision())); double dm = (dataTypes[t] == maths_t::E_IntegerData ? 0.5 : 0.0); double dv = (dataTypes[t] == maths_t::E_IntegerData ? 1.0 / 12.0 : 0.0); - double trialMeanError = std::fabs(boost::math::mean(logNormal) - (mean + dm)) / std::max(1.0, mean); + double trialMeanError = + std::fabs(boost::math::mean(logNormal) - (mean + dm)) / + std::max(1.0, mean); double trialVarianceError = - std::fabs(boost::math::variance(logNormal) - (variance + dv)) / std::max(1.0, variance); + std::fabs(boost::math::variance(logNormal) - (variance + dv)) / + std::max(1.0, variance); LOG_DEBUG(<< "trial mean error = " << trialMeanError); LOG_DEBUG(<< "trial variance error = " << trialVarianceError); @@ -1602,11 +1697,15 @@ void CLogNormalMeanPrecConjugateTest::testVarianceScale() { varianceError.add(trialVarianceError); } - LOG_DEBUG(<< "mean error = " << maths::CBasicStatistics::mean(meanError)); - LOG_DEBUG(<< "variance error = " << maths::CBasicStatistics::mean(varianceError)); + LOG_DEBUG(<< "mean error = " + << maths::CBasicStatistics::mean(meanError)); + LOG_DEBUG(<< "variance error = " + << maths::CBasicStatistics::mean(varianceError)); - CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError) < maximumMeanError[t]); - CPPUNIT_ASSERT(maths::CBasicStatistics::mean(varianceError) < maximumVarianceError[t]); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError) < + maximumMeanError[t]); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(varianceError) < + maximumVarianceError[t]); meanMeanError += meanError; meanVarianceError += varianceError; @@ -1615,10 +1714,13 @@ void CLogNormalMeanPrecConjugateTest::testVarianceScale() { } LOG_DEBUG(<< "mean mean error = " << maths::CBasicStatistics::mean(meanMeanError)); - LOG_DEBUG(<< "mean variance error = " << maths::CBasicStatistics::mean(meanVarianceError)); + LOG_DEBUG(<< "mean variance error = " + << maths::CBasicStatistics::mean(meanVarianceError)); - CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanMeanError) < maximumMeanMeanError[t]); - CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanVarianceError) < maximumMeanVarianceError[t]); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanMeanError) < + maximumMeanMeanError[t]); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanVarianceError) < + maximumMeanVarianceError[t]); } } } @@ -1642,8 +1744,10 @@ void CLogNormalMeanPrecConjugateTest::testNegativeSample() { TDoubleVec samples; rng.generateLogNormalSamples(location, squareScale, 100, samples); - CLogNormalMeanPrecConjugate filter1 = CLogNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.0, 0.0, 0.2); - CLogNormalMeanPrecConjugate filter2 = CLogNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, 1.74524, 0.0, 0.2); + CLogNormalMeanPrecConjugate filter1 = CLogNormalMeanPrecConjugate::nonInformativePrior( + maths_t::E_ContinuousData, 0.0, 0.0, 0.2); + CLogNormalMeanPrecConjugate filter2 = CLogNormalMeanPrecConjugate::nonInformativePrior( + maths_t::E_ContinuousData, 1.74524, 0.0, 0.2); filter1.addSamples(samples); filter2.addSamples(samples); @@ -1662,44 +1766,59 @@ void CLogNormalMeanPrecConjugateTest::testNegativeSample() { CppUnit::Test* CLogNormalMeanPrecConjugateTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CLogNormalMeanPrecConjugateTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CLogNormalMeanPrecConjugateTest::testMultipleUpdate", - &CLogNormalMeanPrecConjugateTest::testMultipleUpdate)); - suiteOfTests->addTest(new CppUnit::TestCaller("CLogNormalMeanPrecConjugateTest::testPropagation", - &CLogNormalMeanPrecConjugateTest::testPropagation)); - suiteOfTests->addTest(new CppUnit::TestCaller("CLogNormalMeanPrecConjugateTest::testMeanEstimation", - &CLogNormalMeanPrecConjugateTest::testMeanEstimation)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CLogNormalMeanPrecConjugateTest::testPrecisionEstimation", &CLogNormalMeanPrecConjugateTest::testPrecisionEstimation)); + "CLogNormalMeanPrecConjugateTest::testMultipleUpdate", + &CLogNormalMeanPrecConjugateTest::testMultipleUpdate)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLogNormalMeanPrecConjugateTest::testPropagation", + &CLogNormalMeanPrecConjugateTest::testPropagation)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLogNormalMeanPrecConjugateTest::testMeanEstimation", + &CLogNormalMeanPrecConjugateTest::testMeanEstimation)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLogNormalMeanPrecConjugateTest::testPrecisionEstimation", + &CLogNormalMeanPrecConjugateTest::testPrecisionEstimation)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLogNormalMeanPrecConjugateTest::testMarginalLikelihood", + &CLogNormalMeanPrecConjugateTest::testMarginalLikelihood)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodMean", + &CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodMean)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodMode", + &CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodMode)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodVariance", + &CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodVariance)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLogNormalMeanPrecConjugateTest::testSampleMarginalLikelihood", + &CLogNormalMeanPrecConjugateTest::testSampleMarginalLikelihood)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLogNormalMeanPrecConjugateTest::testCdf", &CLogNormalMeanPrecConjugateTest::testCdf)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLogNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples", + &CLogNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLogNormalMeanPrecConjugateTest::testAnomalyScore", + &CLogNormalMeanPrecConjugateTest::testAnomalyScore)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLogNormalMeanPrecConjugateTest::testOffset", + &CLogNormalMeanPrecConjugateTest::testOffset)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLogNormalMeanPrecConjugateTest::testIntegerData", + &CLogNormalMeanPrecConjugateTest::testIntegerData)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CLogNormalMeanPrecConjugateTest::testMarginalLikelihood", &CLogNormalMeanPrecConjugateTest::testMarginalLikelihood)); + "CLogNormalMeanPrecConjugateTest::testLowVariationData", + &CLogNormalMeanPrecConjugateTest::testLowVariationData)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodMean", &CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodMean)); + "CLogNormalMeanPrecConjugateTest::testPersist", + &CLogNormalMeanPrecConjugateTest::testPersist)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodMode", &CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodMode)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodVariance", - &CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodVariance)); + "CLogNormalMeanPrecConjugateTest::testVarianceScale", + &CLogNormalMeanPrecConjugateTest::testVarianceScale)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CLogNormalMeanPrecConjugateTest::testSampleMarginalLikelihood", &CLogNormalMeanPrecConjugateTest::testSampleMarginalLikelihood)); - suiteOfTests->addTest(new CppUnit::TestCaller("CLogNormalMeanPrecConjugateTest::testCdf", - &CLogNormalMeanPrecConjugateTest::testCdf)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CLogNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples", - &CLogNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples)); - suiteOfTests->addTest(new CppUnit::TestCaller("CLogNormalMeanPrecConjugateTest::testAnomalyScore", - &CLogNormalMeanPrecConjugateTest::testAnomalyScore)); - suiteOfTests->addTest(new CppUnit::TestCaller("CLogNormalMeanPrecConjugateTest::testOffset", - &CLogNormalMeanPrecConjugateTest::testOffset)); - suiteOfTests->addTest(new CppUnit::TestCaller("CLogNormalMeanPrecConjugateTest::testIntegerData", - &CLogNormalMeanPrecConjugateTest::testIntegerData)); - suiteOfTests->addTest(new CppUnit::TestCaller("CLogNormalMeanPrecConjugateTest::testLowVariationData", - &CLogNormalMeanPrecConjugateTest::testLowVariationData)); - suiteOfTests->addTest(new CppUnit::TestCaller("CLogNormalMeanPrecConjugateTest::testPersist", - &CLogNormalMeanPrecConjugateTest::testPersist)); - suiteOfTests->addTest(new CppUnit::TestCaller("CLogNormalMeanPrecConjugateTest::testVarianceScale", - &CLogNormalMeanPrecConjugateTest::testVarianceScale)); - suiteOfTests->addTest(new CppUnit::TestCaller("CLogNormalMeanPrecConjugateTest::testNegativeSample", - &CLogNormalMeanPrecConjugateTest::testNegativeSample)); + "CLogNormalMeanPrecConjugateTest::testNegativeSample", + &CLogNormalMeanPrecConjugateTest::testNegativeSample)); return suiteOfTests; } diff --git a/lib/maths/unittest/CLogTDistributionTest.cc b/lib/maths/unittest/CLogTDistributionTest.cc index ca0dfc7912..335a8890ef 100644 --- a/lib/maths/unittest/CLogTDistributionTest.cc +++ b/lib/maths/unittest/CLogTDistributionTest.cc @@ -42,10 +42,12 @@ void CLogTDistributionTest::testMode() { for (size_t i = 0; i < boost::size(degreesFreedoms); ++i) { for (size_t j = 0; j < boost::size(locations); ++j) { for (size_t k = 0; k < boost::size(squareScales); ++k) { - LOG_DEBUG(<< "degrees freedom = " << degreesFreedoms[i] << ", location = " << locations[j] + LOG_DEBUG(<< "degrees freedom = " << degreesFreedoms[i] + << ", location = " << locations[j] << ", scale = " << std::sqrt(squareScales[k])); - CLogTDistribution logt(degreesFreedoms[i], locations[j], std::sqrt(squareScales[k])); + CLogTDistribution logt(degreesFreedoms[i], locations[j], + std::sqrt(squareScales[k])); double x = mode(logt); @@ -84,7 +86,8 @@ void CLogTDistributionTest::testPdf() { nTests = std::min(nTests, boost::size(squareScales)); for (size_t test = 0; test < nTests; ++test) { - CLogTDistribution logt(degreesFreedom[test], locations[test], std::sqrt(squareScales[test])); + CLogTDistribution logt(degreesFreedom[test], locations[test], + std::sqrt(squareScales[test])); for (unsigned int p = 1; p < 100; ++p) { double q = static_cast(p) / 100.0; @@ -124,12 +127,15 @@ void CLogTDistributionTest::testCdf() { TDoubleVec samples; rng.generateStudentsSamples(degreesFreedom[test], nSamples, samples); - for (TDoubleVecItr sampleItr = samples.begin(); sampleItr != samples.end(); ++sampleItr) { - *sampleItr = std::exp(*sampleItr * std::sqrt(squareScales[test]) + locations[test]); + for (TDoubleVecItr sampleItr = samples.begin(); + sampleItr != samples.end(); ++sampleItr) { + *sampleItr = std::exp(*sampleItr * std::sqrt(squareScales[test]) + + locations[test]); } // Check the data percentiles. - CLogTDistribution logt(degreesFreedom[test], locations[test], std::sqrt(squareScales[test])); + CLogTDistribution logt(degreesFreedom[test], locations[test], + std::sqrt(squareScales[test])); std::sort(samples.begin(), samples.end()); for (unsigned int p = 1; p < 100; ++p) { @@ -138,7 +144,8 @@ void CLogTDistributionTest::testCdf() { double expectedCdf = static_cast(p) / 100; LOG_DEBUG(<< "percentile = " << p << "%" - << ", actual cdf = " << actualCdf << ", expected cdf = " << expectedCdf); + << ", actual cdf = " << actualCdf + << ", expected cdf = " << expectedCdf); // No more than a 10% error in the sample percentile. CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedCdf, actualCdf, 0.1 * expectedCdf); @@ -161,7 +168,8 @@ void CLogTDistributionTest::testQuantile() { nTests = std::min(nTests, boost::size(squareScales)); for (size_t test = 0; test < nTests; ++test) { - CLogTDistribution logt(degreesFreedom[test], locations[test], std::sqrt(squareScales[test])); + CLogTDistribution logt(degreesFreedom[test], locations[test], + std::sqrt(squareScales[test])); for (unsigned int p = 1; p < 100; ++p) { double q = static_cast(p) / 100.0; @@ -176,14 +184,14 @@ void CLogTDistributionTest::testQuantile() { CppUnit::Test* CLogTDistributionTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CLogTDistributionTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CLogTDistributionTest::testMode", &CLogTDistributionTest::testMode)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CLogTDistributionTest::testPdf", &CLogTDistributionTest::testPdf)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CLogTDistributionTest::testCdf", &CLogTDistributionTest::testCdf)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CLogTDistributionTest::testQuantile", &CLogTDistributionTest::testQuantile)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLogTDistributionTest::testMode", &CLogTDistributionTest::testMode)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLogTDistributionTest::testPdf", &CLogTDistributionTest::testPdf)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLogTDistributionTest::testCdf", &CLogTDistributionTest::testCdf)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLogTDistributionTest::testQuantile", &CLogTDistributionTest::testQuantile)); return suiteOfTests; } diff --git a/lib/maths/unittest/CMathsFuncsTest.cc b/lib/maths/unittest/CMathsFuncsTest.cc index d34deedb4f..9d2c4ad7de 100644 --- a/lib/maths/unittest/CMathsFuncsTest.cc +++ b/lib/maths/unittest/CMathsFuncsTest.cc @@ -66,7 +66,8 @@ void CMathsFuncsTest::testIsFinite() { test1.push_back(2.0); test1.push_back(25.0); test1.push_back(-1e6); - CPPUNIT_ASSERT(std::equal(test1.begin(), test1.end(), maths::CMathsFuncs::beginFinite(test1))); + CPPUNIT_ASSERT(std::equal(test1.begin(), test1.end(), + maths::CMathsFuncs::beginFinite(test1))); TDoubleVec test2; test2.push_back(zero() / zero()); @@ -77,31 +78,40 @@ void CMathsFuncsTest::testIsFinite() { test2.push_back(25.0); test2.push_back(-1e6); test2.push_back(zero() / zero()); - CPPUNIT_ASSERT(std::equal(test1.begin(), test1.end(), maths::CMathsFuncs::beginFinite(test2))); + CPPUNIT_ASSERT(std::equal(test1.begin(), test1.end(), + maths::CMathsFuncs::beginFinite(test2))); TDoubleVec test3; - CPPUNIT_ASSERT(maths::CMathsFuncs::beginFinite(test3) == maths::CMathsFuncs::endFinite(test3)); + CPPUNIT_ASSERT(maths::CMathsFuncs::beginFinite(test3) == + maths::CMathsFuncs::endFinite(test3)); TDoubleVec test4; test4.push_back(zero() / zero()); test4.push_back(1.0 / zero()); test4.push_back(zero() / zero()); - CPPUNIT_ASSERT(maths::CMathsFuncs::beginFinite(test4) == maths::CMathsFuncs::endFinite(test4)); + CPPUNIT_ASSERT(maths::CMathsFuncs::beginFinite(test4) == + maths::CMathsFuncs::endFinite(test4)); } void CMathsFuncsTest::testFpStatus() { CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, maths::CMathsFuncs::fpStatus(3.8)); - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpOverflowed, maths::CMathsFuncs::fpStatus(1.0 / zero())); - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpFailed, maths::CMathsFuncs::fpStatus(zero() / zero())); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpOverflowed, + maths::CMathsFuncs::fpStatus(1.0 / zero())); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpFailed, + maths::CMathsFuncs::fpStatus(zero() / zero())); } CppUnit::Test* CMathsFuncsTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CMathsFuncsTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CMathsFuncsTest::testIsNan", &CMathsFuncsTest::testIsNan)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMathsFuncsTest::testIsInf", &CMathsFuncsTest::testIsInf)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMathsFuncsTest::testIsFinite", &CMathsFuncsTest::testIsFinite)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMathsFuncsTest::testFpStatus", &CMathsFuncsTest::testFpStatus)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMathsFuncsTest::testIsNan", &CMathsFuncsTest::testIsNan)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMathsFuncsTest::testIsInf", &CMathsFuncsTest::testIsInf)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMathsFuncsTest::testIsFinite", &CMathsFuncsTest::testIsFinite)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMathsFuncsTest::testFpStatus", &CMathsFuncsTest::testFpStatus)); return suiteOfTests; } diff --git a/lib/maths/unittest/CMathsMemoryTest.cc b/lib/maths/unittest/CMathsMemoryTest.cc index af81203250..5d8f89e0c6 100644 --- a/lib/maths/unittest/CMathsMemoryTest.cc +++ b/lib/maths/unittest/CMathsMemoryTest.cc @@ -58,7 +58,8 @@ void CMathsMemoryTest::testPriors() { gammaRateConjugate.addSamples(weightStyles, samples, weights); CPPUNIT_ASSERT_EQUAL(std::size_t(0), gammaRateConjugate.memoryUsage()); - CLogNormalMeanPrecConjugate logNormalConjugate(maths_t::E_ContinuousData, 0.0, 0.9, 0.8, 0.7, 0.2); + CLogNormalMeanPrecConjugate logNormalConjugate(maths_t::E_ContinuousData, + 0.0, 0.9, 0.8, 0.7, 0.2); CPPUNIT_ASSERT_EQUAL(std::size_t(0), logNormalConjugate.memoryUsage()); logNormalConjugate.addSamples(weightStyles, samples, weights); CPPUNIT_ASSERT_EQUAL(std::size_t(0), logNormalConjugate.memoryUsage()); @@ -68,7 +69,8 @@ void CMathsMemoryTest::testPriors() { poissonConjugate.addSamples(weightStyles, samples, weights); CPPUNIT_ASSERT_EQUAL(std::size_t(0), poissonConjugate.memoryUsage()); - CNormalMeanPrecConjugate normalConjugate(maths_t::E_ContinuousData, 0.0, 0.9, 0.8, 0.7, 0.2); + CNormalMeanPrecConjugate normalConjugate(maths_t::E_ContinuousData, 0.0, + 0.9, 0.8, 0.7, 0.2); CPPUNIT_ASSERT_EQUAL(std::size_t(0), normalConjugate.memoryUsage()); normalConjugate.addSamples(weightStyles, samples, weights); CPPUNIT_ASSERT_EQUAL(std::size_t(0), normalConjugate.memoryUsage()); @@ -78,11 +80,15 @@ void CMathsMemoryTest::testPriors() { multinomialConjugate.addSamples(weightStyles, samples, weights); CPPUNIT_ASSERT_EQUAL(std::size_t(0), multinomialConjugate.memoryUsage()); - CXMeansOnline1d clusterer(maths_t::E_ContinuousData, maths::CAvailableModeDistributions::ALL, maths_t::E_ClustersEqualWeight); + CXMeansOnline1d clusterer(maths_t::E_ContinuousData, + maths::CAvailableModeDistributions::ALL, + maths_t::E_ClustersEqualWeight); // Check that the clusterer has size at least as great as the sum of it's fixed members - std::size_t clustererSize = sizeof(maths_t::EDataType) + 4 * sizeof(double) + sizeof(maths_t::EClusterWeightCalc) + - sizeof(CClusterer1d::CIndexGenerator) + sizeof(CXMeansOnline1d::TClusterVec); + std::size_t clustererSize = sizeof(maths_t::EDataType) + 4 * sizeof(double) + + sizeof(maths_t::EClusterWeightCalc) + + sizeof(CClusterer1d::CIndexGenerator) + + sizeof(CXMeansOnline1d::TClusterVec); CPPUNIT_ASSERT(clusterer.memoryUsage() >= clustererSize); @@ -158,12 +164,15 @@ void CMathsMemoryTest::testBjkstVec() { CppUnit::Test* CMathsMemoryTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CMathsMemoryTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CMathsMemoryTest::testPriors", &CMathsMemoryTest::testPriors)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMathsMemoryTest::testPriors", &CMathsMemoryTest::testPriors)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMathsMemoryTest::testTimeSeriesDecompositions", - &CMathsMemoryTest::testTimeSeriesDecompositions)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMathsMemoryTest::testTimeSeriesDecompositions", + &CMathsMemoryTest::testTimeSeriesDecompositions)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMathsMemoryTest::testBjkstVec", &CMathsMemoryTest::testBjkstVec)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMathsMemoryTest::testBjkstVec", &CMathsMemoryTest::testBjkstVec)); return suiteOfTests; } diff --git a/lib/maths/unittest/CMixtureDistributionTest.cc b/lib/maths/unittest/CMixtureDistributionTest.cc index 5b14746de1..fbe6421246 100644 --- a/lib/maths/unittest/CMixtureDistributionTest.cc +++ b/lib/maths/unittest/CMixtureDistributionTest.cc @@ -39,7 +39,8 @@ void CMixtureDistributionTest::testSupport() { modes.push_back(n1); modes.push_back(n2); CMixtureDistribution> mixture(weights, modes); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(boost::math::support(n1)), core::CContainerPrinter::print(support(mixture))); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(boost::math::support(n1)), + core::CContainerPrinter::print(support(mixture))); } { boost::math::lognormal_distribution<> l1(1.0, 0.5); @@ -51,7 +52,8 @@ void CMixtureDistributionTest::testSupport() { modes.push_back(l1); modes.push_back(l2); CMixtureDistribution> mixture(weights, modes); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(boost::math::support(l1)), core::CContainerPrinter::print(support(mixture))); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(boost::math::support(l1)), + core::CContainerPrinter::print(support(mixture))); } } @@ -69,8 +71,9 @@ void CMixtureDistributionTest::testMode() { { LOG_DEBUG(<< "Mixture Two Normals"); - double means[][2] = { - {0.0, 10.0}, {0.0, 9.0}, {0.0, 8.0}, {0.0, 7.0}, {0.0, 6.0}, {0.0, 5.0}, {0.0, 4.0}, {0.0, 3.0}, {0.0, 2.0}, {0.0, 1.0}}; + double means[][2] = {{0.0, 10.0}, {0.0, 9.0}, {0.0, 8.0}, {0.0, 7.0}, + {0.0, 6.0}, {0.0, 5.0}, {0.0, 4.0}, {0.0, 3.0}, + {0.0, 2.0}, {0.0, 1.0}}; for (std::size_t i = 0u; i < boost::size(means); ++i) { LOG_DEBUG(<< "means = " << core::CContainerPrinter::print(means[i])); @@ -93,7 +96,8 @@ void CMixtureDistributionTest::testMode() { double derivative = (pPlusEps - pMinusEps) / 2.0 / eps; double curvature = (pPlusEps - 2.0 * p + pMinusEps) / eps / eps; - LOG_DEBUG(<< "x = " << x << ", df/dx = " << derivative << ", d^2f/dx^2 = " << curvature); + LOG_DEBUG(<< "x = " << x << ", df/dx = " << derivative + << ", d^2f/dx^2 = " << curvature); // Gradient zero + curvature negative => maximum. CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, derivative, 1e-6); @@ -172,11 +176,14 @@ void CMixtureDistributionTest::testPdf() { { double weights[][2] = { - {0.5, 0.5}, {0.3, 0.7}, {0.6, 0.4}, {0.5, 0.5}, {0.1, 0.9}, {0.61, 0.39}, {0.7, 0.3}, {0.8, 0.2}, {0.15, 0.85}, {0.3, 0.7}}; - double means[][2] = { - {0.0, 10.0}, {1.0, 9.0}, {1.4, 6.0}, {0.0, 7.0}, {3.0, 7.5}, {0.0, 5.0}, {2.0, 4.0}, {1.0, 3.0}, {1.1, 2.0}, {3.0, 3.2}}; + {0.5, 0.5}, {0.3, 0.7}, {0.6, 0.4}, {0.5, 0.5}, {0.1, 0.9}, + {0.61, 0.39}, {0.7, 0.3}, {0.8, 0.2}, {0.15, 0.85}, {0.3, 0.7}}; + double means[][2] = {{0.0, 10.0}, {1.0, 9.0}, {1.4, 6.0}, {0.0, 7.0}, + {3.0, 7.5}, {0.0, 5.0}, {2.0, 4.0}, {1.0, 3.0}, + {1.1, 2.0}, {3.0, 3.2}}; double variances[][2] = { - {0.3, 10.0}, {1.0, 0.4}, {1.4, 6.0}, {3.0, 1.1}, {3.0, 3.5}, {1.0, 5.0}, {2.3, 4.0}, {3.0, 1.0}, {1.1, 1.0}, {3.0, 3.2}}; + {0.3, 10.0}, {1.0, 0.4}, {1.4, 6.0}, {3.0, 1.1}, {3.0, 3.5}, + {1.0, 5.0}, {2.3, 4.0}, {3.0, 1.0}, {1.1, 1.0}, {3.0, 3.2}}; CPPUNIT_ASSERT_EQUAL(boost::size(weights), boost::size(means)); CPPUNIT_ASSERT_EQUAL(boost::size(means), boost::size(variances)); @@ -220,8 +227,10 @@ void CMixtureDistributionTest::testCdf() { const std::size_t nSamples = 100000u; - const double weights[][2] = {{0.3, 0.7}, {0.5, 0.5}, {0.6, 0.4}, {0.35, 0.65}, {0.55, 0.45}}; - const double shapes[][2] = {{10.0, 30.0}, {5.0, 25.0}, {20.0, 25.0}, {4.0, 50.0}, {11.0, 33.0}}; + const double weights[][2] = { + {0.3, 0.7}, {0.5, 0.5}, {0.6, 0.4}, {0.35, 0.65}, {0.55, 0.45}}; + const double shapes[][2] = { + {10.0, 30.0}, {5.0, 25.0}, {20.0, 25.0}, {4.0, 50.0}, {11.0, 33.0}}; const double scales[][2] = {{0.3, 0.2}, {1.0, 1.1}, {0.9, 0.95}, {0.4, 1.2}, {2.3, 2.1}}; CPPUNIT_ASSERT_EQUAL(boost::size(weights), boost::size(shapes)); @@ -234,10 +243,14 @@ void CMixtureDistributionTest::testCdf() { TDoubleVec samples1; rng.generateGammaSamples( - shapes[i][0], scales[i][0], static_cast(weights[i][0] * static_cast(nSamples)), samples1); + shapes[i][0], scales[i][0], + static_cast(weights[i][0] * static_cast(nSamples)), + samples1); TDoubleVec samples2; rng.generateGammaSamples( - shapes[i][1], scales[i][1], static_cast(weights[i][1] * static_cast(nSamples)), samples2); + shapes[i][1], scales[i][1], + static_cast(weights[i][1] * static_cast(nSamples)), + samples2); TDoubleVec samples; samples.insert(samples.end(), samples1.begin(), samples1.end()); @@ -261,7 +274,8 @@ void CMixtureDistributionTest::testCdf() { double expectedCdf = static_cast(p) / 100; LOG_DEBUG(<< "percentile = " << p << "%" - << ", actual cdf = " << actualCdf << ", expected cdf = " << expectedCdf); + << ", actual cdf = " << actualCdf + << ", expected cdf = " << expectedCdf); // No more than a 10% error in the sample percentile. CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedCdf, actualCdf, 0.1 * expectedCdf); @@ -276,9 +290,12 @@ void CMixtureDistributionTest::testQuantile() { // Check that the quantile is the inverse of the c.d.f. - const double weights[][3] = {{0.4, 0.3, 0.3}, {0.1, 0.4, 0.5}, {0.6, 0.2, 0.2}, {0.1, 0.8, 0.1}, {0.25, 0.3, 0.45}}; - const double locations[][3] = {{1.0, 1.9, 2.2}, {0.9, 1.8, 3.0}, {2.0, 4.0, 4.5}, {0.1, 0.3, 0.4}, {0.2, 1.3, 4.8}}; - const double scales[][3] = {{0.1, 0.04, 0.5}, {0.8, 0.3, 0.6}, {0.5, 0.3, 0.4}, {0.3, 0.08, 0.9}, {0.1, 0.2, 1.0}}; + const double weights[][3] = { + {0.4, 0.3, 0.3}, {0.1, 0.4, 0.5}, {0.6, 0.2, 0.2}, {0.1, 0.8, 0.1}, {0.25, 0.3, 0.45}}; + const double locations[][3] = { + {1.0, 1.9, 2.2}, {0.9, 1.8, 3.0}, {2.0, 4.0, 4.5}, {0.1, 0.3, 0.4}, {0.2, 1.3, 4.8}}; + const double scales[][3] = { + {0.1, 0.04, 0.5}, {0.8, 0.3, 0.6}, {0.5, 0.3, 0.4}, {0.3, 0.08, 0.9}, {0.1, 0.2, 1.0}}; CPPUNIT_ASSERT_EQUAL(boost::size(weights), boost::size(locations)); CPPUNIT_ASSERT_EQUAL(boost::size(locations), boost::size(scales)); @@ -311,16 +328,16 @@ void CMixtureDistributionTest::testQuantile() { CppUnit::Test* CMixtureDistributionTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CMixtureDistributionTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CMixtureDistributionTest::testSupport", &CMixtureDistributionTest::testSupport)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CMixtureDistributionTest::testMode", &CMixtureDistributionTest::testMode)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CMixtureDistributionTest::testPdf", &CMixtureDistributionTest::testPdf)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CMixtureDistributionTest::testCdf", &CMixtureDistributionTest::testCdf)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMixtureDistributionTest::testQuantile", - &CMixtureDistributionTest::testQuantile)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMixtureDistributionTest::testSupport", &CMixtureDistributionTest::testSupport)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMixtureDistributionTest::testMode", &CMixtureDistributionTest::testMode)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMixtureDistributionTest::testPdf", &CMixtureDistributionTest::testPdf)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMixtureDistributionTest::testCdf", &CMixtureDistributionTest::testCdf)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMixtureDistributionTest::testQuantile", &CMixtureDistributionTest::testQuantile)); return suiteOfTests; } diff --git a/lib/maths/unittest/CModelTest.cc b/lib/maths/unittest/CModelTest.cc index 5d90b97096..701ecc0895 100644 --- a/lib/maths/unittest/CModelTest.cc +++ b/lib/maths/unittest/CModelTest.cc @@ -26,12 +26,13 @@ void CModelTest::testAll() { double learnRate{0.5}; double decayRate{0.001}; double minimumSeasonalVarianceScale{0.3}; - maths::CModelParams params( - bucketLength, learnRate, decayRate, minimumSeasonalVarianceScale, 6 * core::constants::HOUR, core::constants::DAY); + maths::CModelParams params(bucketLength, learnRate, decayRate, minimumSeasonalVarianceScale, + 6 * core::constants::HOUR, core::constants::DAY); CPPUNIT_ASSERT_EQUAL(bucketLength, params.bucketLength()); CPPUNIT_ASSERT_EQUAL(learnRate, params.learnRate()); CPPUNIT_ASSERT_EQUAL(decayRate, params.decayRate()); - CPPUNIT_ASSERT_EQUAL(minimumSeasonalVarianceScale, params.minimumSeasonalVarianceScale()); + CPPUNIT_ASSERT_EQUAL(minimumSeasonalVarianceScale, + params.minimumSeasonalVarianceScale()); CPPUNIT_ASSERT_EQUAL(0.0, params.probabilityBucketEmpty()); params.probabilityBucketEmpty(0.2); CPPUNIT_ASSERT_EQUAL(0.2, params.probabilityBucketEmpty()); @@ -55,8 +56,10 @@ void CModelTest::testAll() { CPPUNIT_ASSERT_EQUAL(1.5, params.propagationInterval()); CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(maths::CConstantWeights::SEASONAL_VARIANCE), core::CContainerPrinter::print(params.weightStyles())); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(trendWeights), core::CContainerPrinter::print(params.trendWeights())); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(priorWeights), core::CContainerPrinter::print(params.priorWeights())); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(trendWeights), + core::CContainerPrinter::print(params.trendWeights())); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(priorWeights), + core::CContainerPrinter::print(params.priorWeights())); } { maths::CModelProbabilityParams::TDouble2Vec weight1(2, 0.4); @@ -81,19 +84,23 @@ void CModelTest::testAll() { CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedAbove, params.calculation(0)); CPPUNIT_ASSERT_EQUAL(maths_t::E_TwoSided, params.calculation(1)); CPPUNIT_ASSERT_EQUAL(50.0, params.seasonalConfidenceInterval()); - CPPUNIT_ASSERT_EQUAL(std::string("[[true, true], [false, true]]"), core::CContainerPrinter::print(params.bucketEmpty())); + CPPUNIT_ASSERT_EQUAL(std::string("[[true, true], [false, true]]"), + core::CContainerPrinter::print(params.bucketEmpty())); CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(maths::CConstantWeights::COUNT_VARIANCE), core::CContainerPrinter::print(params.weightStyles())); - CPPUNIT_ASSERT_EQUAL(std::string("[[[0.4, 0.4]], [[0.7, 0.7]]]"), core::CContainerPrinter::print(params.weights())); + CPPUNIT_ASSERT_EQUAL(std::string("[[[0.4, 0.4]], [[0.7, 0.7]]]"), + core::CContainerPrinter::print(params.weights())); CPPUNIT_ASSERT_EQUAL(std::size_t(1), *params.mostAnomalousCorrelate()); - CPPUNIT_ASSERT_EQUAL(std::string("[1, 0]"), core::CContainerPrinter::print(params.coordinates())); + CPPUNIT_ASSERT_EQUAL(std::string("[1, 0]"), + core::CContainerPrinter::print(params.coordinates())); } } CppUnit::Test* CModelTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CModelTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CModelTest::testAll", &CModelTest::testAll)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CModelTest::testAll", &CModelTest::testAll)); return suiteOfTests; } diff --git a/lib/maths/unittest/CMultimodalPriorTest.cc b/lib/maths/unittest/CMultimodalPriorTest.cc index 53bd61cf3c..9478c4180f 100644 --- a/lib/maths/unittest/CMultimodalPriorTest.cc +++ b/lib/maths/unittest/CMultimodalPriorTest.cc @@ -52,10 +52,12 @@ using COneOfNPrior = CPriorTestInterfaceMixin; //! Make the default mode prior. COneOfNPrior makeModePrior(const double& decayRate = 0.0) { - CGammaRateConjugate gamma(maths::CGammaRateConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.01, decayRate, 0.0)); - CLogNormalMeanPrecConjugate logNormal( - maths::CLogNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.01, decayRate, 0.0)); - CNormalMeanPrecConjugate normal(maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, decayRate)); + CGammaRateConjugate gamma(maths::CGammaRateConjugate::nonInformativePrior( + maths_t::E_ContinuousData, 0.01, decayRate, 0.0)); + CLogNormalMeanPrecConjugate logNormal(maths::CLogNormalMeanPrecConjugate::nonInformativePrior( + maths_t::E_ContinuousData, 0.01, decayRate, 0.0)); + CNormalMeanPrecConjugate normal(maths::CNormalMeanPrecConjugate::nonInformativePrior( + maths_t::E_ContinuousData, decayRate)); COneOfNPrior::TPriorPtrVec priors; priors.push_back(COneOfNPrior::TPriorPtr(gamma.clone())); @@ -66,13 +68,16 @@ COneOfNPrior makeModePrior(const double& decayRate = 0.0) { //! Make a vanilla multimodal prior. CMultimodalPrior makePrior(const maths::CPrior* modePrior, const double& decayRate) { - maths::CXMeansOnline1d clusterer( - maths_t::E_ContinuousData, maths::CAvailableModeDistributions::ALL, maths_t::E_ClustersFractionWeight, decayRate); + maths::CXMeansOnline1d clusterer(maths_t::E_ContinuousData, + maths::CAvailableModeDistributions::ALL, + maths_t::E_ClustersFractionWeight, decayRate); if (modePrior) { - return maths::CMultimodalPrior(maths_t::E_ContinuousData, clusterer, *modePrior, decayRate); + return maths::CMultimodalPrior(maths_t::E_ContinuousData, clusterer, + *modePrior, decayRate); } - return maths::CMultimodalPrior(maths_t::E_ContinuousData, clusterer, makeModePrior(decayRate), decayRate); + return maths::CMultimodalPrior(maths_t::E_ContinuousData, clusterer, + makeModePrior(decayRate), decayRate); } CMultimodalPrior makePrior(const maths::CPrior* modePrior) { return makePrior(modePrior, 0.0); @@ -86,19 +91,31 @@ CMultimodalPrior makePrior() { test::CRandomNumbers RNG; -void sample(const boost::math::normal_distribution<>& normal, std::size_t numberSamples, TDoubleVec& result) { - RNG.generateNormalSamples(boost::math::mean(normal), boost::math::variance(normal), numberSamples, result); +void sample(const boost::math::normal_distribution<>& normal, + std::size_t numberSamples, + TDoubleVec& result) { + RNG.generateNormalSamples(boost::math::mean(normal), + boost::math::variance(normal), numberSamples, result); } -void sample(const boost::math::lognormal_distribution<>& lognormal, std::size_t numberSamples, TDoubleVec& result) { - RNG.generateLogNormalSamples(lognormal.location(), lognormal.scale() * lognormal.scale(), numberSamples, result); +void sample(const boost::math::lognormal_distribution<>& lognormal, + std::size_t numberSamples, + TDoubleVec& result) { + RNG.generateLogNormalSamples(lognormal.location(), + lognormal.scale() * lognormal.scale(), + numberSamples, result); } -void sample(const boost::math::gamma_distribution<>& gamma, std::size_t numberSamples, TDoubleVec& result) { +void sample(const boost::math::gamma_distribution<>& gamma, + std::size_t numberSamples, + TDoubleVec& result) { RNG.generateGammaSamples(gamma.shape(), gamma.scale(), numberSamples, result); } template -void probabilityOfLessLikelySample(const maths::CMixtureDistribution& mixture, const double& x, double& probability, double& deviation) { +void probabilityOfLessLikelySample(const maths::CMixtureDistribution& mixture, + const double& x, + double& probability, + double& deviation) { using TModeVec = typename maths::CMixtureDistribution::TModeVec; static const double NUMBER_SAMPLES = 10000.0; @@ -145,10 +162,13 @@ void CMultimodalPriorTest::testMultipleUpdate() { rng.generateNormalSamples(shape, scale, 100, samples); for (size_t i = 0; i < boost::size(dataTypes); ++i) { - maths::CXMeansOnline1d clusterer(dataTypes[i], maths::CAvailableModeDistributions::ALL, maths_t::E_ClustersFractionWeight); + maths::CXMeansOnline1d clusterer(dataTypes[i], + maths::CAvailableModeDistributions::ALL, + maths_t::E_ClustersFractionWeight); CMultimodalPrior filter1(maths::CMultimodalPrior( - dataTypes[i], clusterer, maths::CNormalMeanPrecConjugate::nonInformativePrior(dataTypes[i], decayRate))); + dataTypes[i], clusterer, + maths::CNormalMeanPrecConjugate::nonInformativePrior(dataTypes[i], decayRate))); CMultimodalPrior filter2(filter1); for (std::size_t j = 0; j < samples.size(); ++j) { @@ -194,23 +214,26 @@ void CMultimodalPriorTest::testPropagation() { } double mean = filter.marginalLikelihoodMean(); - TDoubleDoublePr percentiles[] = {filter.marginalLikelihoodConfidenceInterval(60.0), - filter.marginalLikelihoodConfidenceInterval(70.0), - filter.marginalLikelihoodConfidenceInterval(80.0), - filter.marginalLikelihoodConfidenceInterval(90.0)}; + TDoubleDoublePr percentiles[] = { + filter.marginalLikelihoodConfidenceInterval(60.0), + filter.marginalLikelihoodConfidenceInterval(70.0), + filter.marginalLikelihoodConfidenceInterval(80.0), + filter.marginalLikelihoodConfidenceInterval(90.0)}; filter.propagateForwardsByTime(40.0); CPPUNIT_ASSERT(filter.checkInvariants()); double propagatedMean = filter.marginalLikelihoodMean(); - TDoubleDoublePr propagatedPercentiles[] = {filter.marginalLikelihoodConfidenceInterval(60.0), - filter.marginalLikelihoodConfidenceInterval(70.0), - filter.marginalLikelihoodConfidenceInterval(80.0), - filter.marginalLikelihoodConfidenceInterval(90.0)}; + TDoubleDoublePr propagatedPercentiles[] = { + filter.marginalLikelihoodConfidenceInterval(60.0), + filter.marginalLikelihoodConfidenceInterval(70.0), + filter.marginalLikelihoodConfidenceInterval(80.0), + filter.marginalLikelihoodConfidenceInterval(90.0)}; LOG_DEBUG(<< "mean = " << mean << ", propagatedMean = " << propagatedMean); LOG_DEBUG(<< "percentiles = " << core::CContainerPrinter::print(percentiles)); - LOG_DEBUG(<< "propagatedPercentiles = " << core::CContainerPrinter::print(propagatedPercentiles)); + LOG_DEBUG(<< "propagatedPercentiles = " + << core::CContainerPrinter::print(propagatedPercentiles)); CPPUNIT_ASSERT_DOUBLES_EQUAL(mean, propagatedMean, eps * mean); for (std::size_t i = 0u; i < boost::size(percentiles); ++i) { @@ -261,18 +284,23 @@ void CMultimodalPriorTest::testSingleMode() { double fx = boost::math::pdf(f, samples[i]); TDouble1Vec sample(1, samples[i]); double l1; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter1.jointLogMarginalLikelihood(sample, l1)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, + filter1.jointLogMarginalLikelihood(sample, l1)); L1G.add(std::log(fx) - l1); double l2; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter2.jointLogMarginalLikelihood(sample, l2)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, + filter2.jointLogMarginalLikelihood(sample, l2)); L12.add(l2 - l1); differentialEntropy.add(-std::log(fx)); } - LOG_DEBUG(<< "L1G = " << maths::CBasicStatistics::mean(L1G) << ", L12 = " << maths::CBasicStatistics::mean(L12) + LOG_DEBUG(<< "L1G = " << maths::CBasicStatistics::mean(L1G) + << ", L12 = " << maths::CBasicStatistics::mean(L12) << ", differential entropy " << differentialEntropy); - CPPUNIT_ASSERT(maths::CBasicStatistics::mean(L1G) / maths::CBasicStatistics::mean(differentialEntropy) < 0.0); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(L1G) / + maths::CBasicStatistics::mean(differentialEntropy) < + 0.0); } LOG_DEBUG(<< "Log-Normal"); { @@ -303,18 +331,23 @@ void CMultimodalPriorTest::testSingleMode() { double fx = boost::math::pdf(f, samples[i]); TDouble1Vec sample(1, samples[i]); double l1; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter1.jointLogMarginalLikelihood(sample, l1)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, + filter1.jointLogMarginalLikelihood(sample, l1)); L1G.add(std::log(fx) - l1); double l2; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter2.jointLogMarginalLikelihood(sample, l2)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, + filter2.jointLogMarginalLikelihood(sample, l2)); L12.add(l2 - l1); differentialEntropy.add(-std::log(fx)); } - LOG_DEBUG(<< "L1G = " << maths::CBasicStatistics::mean(L1G) << ", L12 = " << maths::CBasicStatistics::mean(L12) + LOG_DEBUG(<< "L1G = " << maths::CBasicStatistics::mean(L1G) + << ", L12 = " << maths::CBasicStatistics::mean(L12) << ", differential entropy " << differentialEntropy); - CPPUNIT_ASSERT(maths::CBasicStatistics::mean(L1G) / maths::CBasicStatistics::mean(differentialEntropy) < 0.0); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(L1G) / + maths::CBasicStatistics::mean(differentialEntropy) < + 0.0); } LOG_DEBUG(<< "Gamma"); { @@ -345,18 +378,23 @@ void CMultimodalPriorTest::testSingleMode() { double fx = boost::math::pdf(f, samples[i]); TDouble1Vec sample(1, samples[i]); double l1; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter1.jointLogMarginalLikelihood(sample, l1)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, + filter1.jointLogMarginalLikelihood(sample, l1)); L1G.add(std::log(fx) - l1); double l2; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter2.jointLogMarginalLikelihood(sample, l2)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, + filter2.jointLogMarginalLikelihood(sample, l2)); L12.add(l2 - l1); differentialEntropy.add(-std::log(fx)); } - LOG_DEBUG(<< "L1G = " << maths::CBasicStatistics::mean(L1G) << ", L12 = " << maths::CBasicStatistics::mean(L12) + LOG_DEBUG(<< "L1G = " << maths::CBasicStatistics::mean(L1G) + << ", L12 = " << maths::CBasicStatistics::mean(L12) << ", differential entropy " << differentialEntropy); - CPPUNIT_ASSERT(maths::CBasicStatistics::mean(L1G) / maths::CBasicStatistics::mean(differentialEntropy) < 0.1); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(L1G) / + maths::CBasicStatistics::mean(differentialEntropy) < + 0.1); } } @@ -408,7 +446,8 @@ void CMultimodalPriorTest::testMultipleModes() { double loss = 0.0; TMeanAccumulator differentialEntropy_; for (std::size_t j = 0u; j < samples.size(); ++j) { - double fx = w1 * boost::math::pdf(mode1Distribution, samples[j]) + w2 * boost::math::pdf(mode2Distribution, samples[j]); + double fx = w1 * boost::math::pdf(mode1Distribution, samples[j]) + + w2 * boost::math::pdf(mode2Distribution, samples[j]); differentialEntropy_.add(-std::log(fx)); } double differentialEntropy = maths::CBasicStatistics::mean(differentialEntropy_); @@ -433,17 +472,21 @@ void CMultimodalPriorTest::testMultipleModes() { TMeanAccumulator loss12; for (std::size_t j = 0u; j < samples.size(); ++j) { - double fx = w1 * boost::math::pdf(mode1Distribution, samples[j]) + w2 * boost::math::pdf(mode2Distribution, samples[j]); + double fx = w1 * boost::math::pdf(mode1Distribution, samples[j]) + + w2 * boost::math::pdf(mode2Distribution, samples[j]); TDouble1Vec sample(1, samples[j]); double l1; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter1.jointLogMarginalLikelihood(sample, l1)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, + filter1.jointLogMarginalLikelihood(sample, l1)); loss1G.add(std::log(fx) - l1); double l2; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter2.jointLogMarginalLikelihood(sample, l2)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, + filter2.jointLogMarginalLikelihood(sample, l2)); loss12.add(l2 - l1); } - LOG_DEBUG(<< "loss1G = " << maths::CBasicStatistics::mean(loss1G) << ", loss12 = " << maths::CBasicStatistics::mean(loss12) + LOG_DEBUG(<< "loss1G = " << maths::CBasicStatistics::mean(loss1G) + << ", loss12 = " << maths::CBasicStatistics::mean(loss12) << ", differential entropy " << differentialEntropy); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(loss12) < 0.0); @@ -487,14 +530,18 @@ void CMultimodalPriorTest::testMultipleModes() { double w1 = n1 / static_cast(n1 + n2 + n3); double w2 = n2 / static_cast(n1 + n2 + n3); double w3 = n3 / static_cast(n1 + n2 + n3); - boost::math::lognormal_distribution<> mode1Distribution(location1, std::sqrt(squareScale1)); - boost::math::lognormal_distribution<> mode2Distribution(location2, std::sqrt(squareScale2)); - boost::math::lognormal_distribution<> mode3Distribution(location3, std::sqrt(squareScale3)); + boost::math::lognormal_distribution<> mode1Distribution( + location1, std::sqrt(squareScale1)); + boost::math::lognormal_distribution<> mode2Distribution( + location2, std::sqrt(squareScale2)); + boost::math::lognormal_distribution<> mode3Distribution( + location3, std::sqrt(squareScale3)); double loss = 0.0; TMeanAccumulator differentialEntropy_; for (std::size_t j = 0u; j < samples.size(); ++j) { - double fx = w1 * boost::math::pdf(mode1Distribution, samples[j]) + w2 * boost::math::pdf(mode2Distribution, samples[j]) + + double fx = w1 * boost::math::pdf(mode1Distribution, samples[j]) + + w2 * boost::math::pdf(mode2Distribution, samples[j]) + w3 * boost::math::pdf(mode3Distribution, samples[j]); differentialEntropy_.add(-std::log(fx)); } @@ -520,18 +567,22 @@ void CMultimodalPriorTest::testMultipleModes() { TMeanAccumulator loss12; for (std::size_t j = 0u; j < samples.size(); ++j) { - double fx = w1 * boost::math::pdf(mode1Distribution, samples[j]) + w2 * boost::math::pdf(mode2Distribution, samples[j]) + + double fx = w1 * boost::math::pdf(mode1Distribution, samples[j]) + + w2 * boost::math::pdf(mode2Distribution, samples[j]) + w3 * boost::math::pdf(mode3Distribution, samples[j]); TDouble1Vec sample(1, samples[j]); double l1; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter1.jointLogMarginalLikelihood(sample, l1)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, + filter1.jointLogMarginalLikelihood(sample, l1)); loss1G.add(std::log(fx) - l1); double l2; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter2.jointLogMarginalLikelihood(sample, l2)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, + filter2.jointLogMarginalLikelihood(sample, l2)); loss12.add(l2 - l1); } - LOG_DEBUG(<< "loss1G = " << maths::CBasicStatistics::mean(loss1G) << ", loss12 = " << maths::CBasicStatistics::mean(loss12) + LOG_DEBUG(<< "loss1G = " << maths::CBasicStatistics::mean(loss1G) + << ", loss12 = " << maths::CBasicStatistics::mean(loss12) << ", differential entropy " << differentialEntropy); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(loss12) < 0.0); @@ -576,13 +627,15 @@ void CMultimodalPriorTest::testMultipleModes() { double w2 = n2 / static_cast(n1 + n2 + n3); double w3 = n3 / static_cast(n1 + n2 + n3); boost::math::normal_distribution<> mode1Distribution(mean1, std::sqrt(variance1)); - boost::math::lognormal_distribution<> mode2Distribution(location2, std::sqrt(squareScale2)); + boost::math::lognormal_distribution<> mode2Distribution( + location2, std::sqrt(squareScale2)); boost::math::gamma_distribution<> mode3Distribution(shape3, scale3); double loss = 0.0; TMeanAccumulator differentialEntropy_; for (std::size_t j = 0u; j < samples.size(); ++j) { - double fx = w1 * boost::math::pdf(mode1Distribution, samples[j]) + w2 * boost::math::pdf(mode2Distribution, samples[j]) + + double fx = w1 * boost::math::pdf(mode1Distribution, samples[j]) + + w2 * boost::math::pdf(mode2Distribution, samples[j]) + w3 * boost::math::pdf(mode3Distribution, samples[j]); differentialEntropy_.add(-std::log(fx)); } @@ -608,18 +661,22 @@ void CMultimodalPriorTest::testMultipleModes() { TMeanAccumulator loss12; for (std::size_t j = 0u; j < samples.size(); ++j) { - double fx = w1 * boost::math::pdf(mode1Distribution, samples[j]) + w2 * boost::math::pdf(mode2Distribution, samples[j]) + + double fx = w1 * boost::math::pdf(mode1Distribution, samples[j]) + + w2 * boost::math::pdf(mode2Distribution, samples[j]) + w3 * boost::math::pdf(mode3Distribution, samples[j]); TDouble1Vec sample(1, samples[j]); double l1; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter1.jointLogMarginalLikelihood(sample, l1)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, + filter1.jointLogMarginalLikelihood(sample, l1)); loss1G.add(std::log(fx) - l1); double l2; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter2.jointLogMarginalLikelihood(sample, l2)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, + filter2.jointLogMarginalLikelihood(sample, l2)); loss12.add(l2 - l1); } - LOG_DEBUG(<< "loss1G = " << maths::CBasicStatistics::mean(loss1G) << ", loss12 = " << maths::CBasicStatistics::mean(loss12) + LOG_DEBUG(<< "loss1G = " << maths::CBasicStatistics::mean(loss1G) + << ", loss12 = " << maths::CBasicStatistics::mean(loss12) << ", differential entropy " << differentialEntropy); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(loss12) < 0.0); @@ -658,17 +715,16 @@ void CMultimodalPriorTest::testMarginalLikelihood() { filter.addSamples(samples); maths_t::ESampleWeightStyle weightStyles[] = { - maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight, maths_t::E_SampleCountWeight}; + maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight, + maths_t::E_SampleCountWeight}; double weights[] = {0.1, 1.0, 10.0}; for (std::size_t i = 0u; i < boost::size(weightStyles); ++i) { for (std::size_t j = 0u; j < boost::size(weights); ++j) { double lb, ub; - filter.minusLogJointCdf(maths_t::TWeightStyleVec(1, weightStyles[i]), - TDouble1Vec(1, 20000.0), - TDouble4Vec1Vec(1, TDouble4Vec(1, weights[j])), - lb, - ub); + filter.minusLogJointCdf( + maths_t::TWeightStyleVec(1, weightStyles[i]), TDouble1Vec(1, 20000.0), + TDouble4Vec1Vec(1, TDouble4Vec(1, weights[j])), lb, ub); LOG_DEBUG(<< "-log(c.d.f) = " << (lb + ub) / 2.0); CPPUNIT_ASSERT(lb >= 0.0); CPPUNIT_ASSERT(ub >= 0.0); @@ -697,11 +753,14 @@ void CMultimodalPriorTest::testMarginalLikelihood() { const double mean3 = 25.0; const double variance3 = 3.0; TDoubleVec samples1; - rng.generateNormalSamples(mean1, variance1, static_cast(w1 * 500.0), samples1); + rng.generateNormalSamples(mean1, variance1, + static_cast(w1 * 500.0), samples1); TDoubleVec samples2; - rng.generateNormalSamples(mean2, variance2, static_cast(w2 * 500.0), samples2); + rng.generateNormalSamples(mean2, variance2, + static_cast(w2 * 500.0), samples2); TDoubleVec samples3; - rng.generateNormalSamples(mean3, variance3, static_cast(w3 * 500.0), samples3); + rng.generateNormalSamples(mean3, variance3, + static_cast(w3 * 500.0), samples3); TDoubleVec samples; samples.insert(samples.end(), samples1.begin(), samples1.end()); samples.insert(samples.end(), samples2.begin(), samples2.end()); @@ -730,7 +789,8 @@ void CMultimodalPriorTest::testMarginalLikelihood() { LOG_DEBUG(<< "number = " << numberSamples[i] << ", sample = " << sample[0]); double logLikelihood = 0.0; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter.jointLogMarginalLikelihood(sample, logLikelihood)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, + filter.jointLogMarginalLikelihood(sample, logLikelihood)); double pdf = std::exp(logLikelihood); double lowerBound = 0.0, upperBound = 0.0; @@ -767,11 +827,14 @@ void CMultimodalPriorTest::testMarginalLikelihood() { LOG_DEBUG(<< "# modes = " << filter.numberModes()); TDoubleVec manySamples1; - rng.generateNormalSamples(mean1, variance1, static_cast(w1 * 100000.0), manySamples1); + rng.generateNormalSamples(mean1, variance1, + static_cast(w1 * 100000.0), manySamples1); TDoubleVec manySamples2; - rng.generateNormalSamples(mean2, variance2, static_cast(w2 * 100000.0), manySamples2); + rng.generateNormalSamples(mean2, variance2, + static_cast(w2 * 100000.0), manySamples2); TDoubleVec manySamples3; - rng.generateNormalSamples(mean3, variance3, static_cast(w3 * 100000.0), manySamples3); + rng.generateNormalSamples(mean3, variance3, + static_cast(w3 * 100000.0), manySamples3); TDoubleVec manySamples; manySamples.insert(manySamples.end(), manySamples1.begin(), manySamples1.end()); manySamples.insert(manySamples.end(), manySamples2.begin(), manySamples2.end()); @@ -797,15 +860,18 @@ void CMultimodalPriorTest::testMarginalLikelihood() { TDouble1Vec sample(1, manySamples[i]); filter.addSamples(sample); double logLikelihood = 0.0; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter.jointLogMarginalLikelihood(sample, logLikelihood)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, + filter.jointLogMarginalLikelihood(sample, logLikelihood)); differentialEntropy -= logLikelihood; } differentialEntropy /= static_cast(manySamples.size()); - LOG_DEBUG(<< "differentialEntropy = " << differentialEntropy << ", expectedDifferentialEntropy = " << expectedDifferentialEntropy); + LOG_DEBUG(<< "differentialEntropy = " << differentialEntropy + << ", expectedDifferentialEntropy = " << expectedDifferentialEntropy); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedDifferentialEntropy, differentialEntropy, 0.05 * expectedDifferentialEntropy); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedDifferentialEntropy, differentialEntropy, + 0.05 * expectedDifferentialEntropy); } } @@ -827,15 +893,19 @@ void CMultimodalPriorTest::testMarginalLikelihoodMode() { double mean2 = 8.0; double variance2 = 1.5; TDoubleVec samples1; - rng.generateNormalSamples(mean1, variance1, static_cast(w1 * 500.0), samples1); + rng.generateNormalSamples(mean1, variance1, + static_cast(w1 * 500.0), samples1); TDoubleVec samples2; - rng.generateNormalSamples(mean2, variance2, static_cast(w2 * 500.0), samples2); + rng.generateNormalSamples(mean2, variance2, + static_cast(w2 * 500.0), samples2); TDoubleVec samples; samples.insert(samples.end(), samples1.begin(), samples1.end()); samples.insert(samples.end(), samples2.begin(), samples2.end()); rng.random_shuffle(samples.begin(), samples.end()); - const double varianceScales[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.2, 1.5, 2.0, 2.5, 3.0, 4.0, 5.0}; + const double varianceScales[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, + 0.7, 0.8, 0.9, 1.0, 1.2, 1.5, + 2.0, 2.5, 3.0, 4.0, 5.0}; CMultimodalPrior filter(makePrior()); filter.addSamples(samples); @@ -853,19 +923,23 @@ void CMultimodalPriorTest::testMarginalLikelihoodMode() { double mode = filter.marginalLikelihoodMode(weightStyle, weight); LOG_DEBUG(<< "marginalLikelihoodMode = " << mode); // Should be near 8. - CPPUNIT_ASSERT_DOUBLES_EQUAL(8.0, filter.marginalLikelihoodMode(weightStyle, weight), 2.0); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + 8.0, filter.marginalLikelihoodMode(weightStyle, weight), 2.0); double eps = 0.01; double modeMinusEps = mode - eps; double modePlusEps = mode + eps; double fMode, fModeMinusEps, fModePlusEps; filter.jointLogMarginalLikelihood(weightStyle, TDouble1Vec(1, mode), weights, fMode); - filter.jointLogMarginalLikelihood(weightStyle, TDouble1Vec(1, modeMinusEps), weights, fModeMinusEps); - filter.jointLogMarginalLikelihood(weightStyle, TDouble1Vec(1, modePlusEps), weights, fModePlusEps); + filter.jointLogMarginalLikelihood(weightStyle, TDouble1Vec(1, modeMinusEps), + weights, fModeMinusEps); + filter.jointLogMarginalLikelihood(weightStyle, TDouble1Vec(1, modePlusEps), + weights, fModePlusEps); fMode = std::exp(fMode); fModeMinusEps = std::exp(fModeMinusEps); fModePlusEps = std::exp(fModePlusEps); double gradient = (fModePlusEps - fModeMinusEps) / 2.0 / eps; - LOG_DEBUG(<< "f(mode) = " << fMode << ", f(mode-eps) = " << fModeMinusEps << ", f(mode + eps) = " << fModePlusEps); + LOG_DEBUG(<< "f(mode) = " << fMode << ", f(mode-eps) = " << fModeMinusEps + << ", f(mode + eps) = " << fModePlusEps); LOG_DEBUG(<< "gradient = " << gradient); CPPUNIT_ASSERT(std::fabs(gradient) < 0.05); CPPUNIT_ASSERT(fMode > 0.999 * fModeMinusEps); @@ -876,7 +950,8 @@ void CMultimodalPriorTest::testMarginalLikelihoodMode() { TDoubleVec fTrials; for (std::size_t j = 0u; j < trials.size(); ++j) { double fTrial; - filter.jointLogMarginalLikelihood(weightStyle, TDouble1Vec(1, trials[j]), weights, fTrial); + filter.jointLogMarginalLikelihood( + weightStyle, TDouble1Vec(1, trials[j]), weights, fTrial); fTrial = std::exp(fTrial); if (fTrial > fMode) { LOG_DEBUG(<< "f(" << trials[j] << ") = " << fTrial << " > " << fMode); @@ -914,17 +989,22 @@ void CMultimodalPriorTest::testMarginalLikelihoodConfidenceInterval() { double mean2 = 8.0; double variance2 = 2.0; TDoubleVec samples1; - rng.generateLogNormalSamples(location1, squareScale1, static_cast(w1 * 2000.0), samples1); + rng.generateLogNormalSamples(location1, squareScale1, + static_cast(w1 * 2000.0), samples1); TDoubleVec samples2; - rng.generateNormalSamples(mean2, variance2, static_cast(w2 * 2000.0), samples2); + rng.generateNormalSamples(mean2, variance2, + static_cast(w2 * 2000.0), samples2); TDoubleVec samples; samples.insert(samples.end(), samples1.begin(), samples1.end()); samples.insert(samples.end(), samples2.begin(), samples2.end()); rng.random_shuffle(samples.begin(), samples.end()); - const double varianceScales[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.2, 1.5, 2.0, 2.5, 3.0, 4.0, 5.0}; + const double varianceScales[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, + 0.7, 0.8, 0.9, 1.0, 1.2, 1.5, + 2.0, 2.5, 3.0, 4.0, 5.0}; - const double percentages[] = {5.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 95.0, 99.0, 99.9, 99.99}; + const double percentages[] = {5.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, + 70.0, 80.0, 95.0, 99.0, 99.9, 99.99}; CMultimodalPrior filter(makePrior()); filter.addSamples(samples); @@ -935,9 +1015,12 @@ void CMultimodalPriorTest::testMarginalLikelihoodConfidenceInterval() { for (std::size_t j = 0u; j < boost::size(percentages); ++j) { LOG_DEBUG(<< "** percentage = " << percentages[j] << " **"); double q1, q2; - filter.marginalLikelihoodQuantileForTest(50.0 - percentages[j] / 2.0, 1e-3, q1); - filter.marginalLikelihoodQuantileForTest(50.0 + percentages[j] / 2.0, 1e-3, q2); - TDoubleDoublePr interval = filter.marginalLikelihoodConfidenceInterval(percentages[j]); + filter.marginalLikelihoodQuantileForTest(50.0 - percentages[j] / 2.0, + 1e-3, q1); + filter.marginalLikelihoodQuantileForTest(50.0 + percentages[j] / 2.0, + 1e-3, q2); + TDoubleDoublePr interval = + filter.marginalLikelihoodConfidenceInterval(percentages[j]); LOG_DEBUG(<< "[q1, q2] = [" << q1 << ", " << q2 << "]" << ", interval = " << core::CContainerPrinter::print(interval)); CPPUNIT_ASSERT_DOUBLES_EQUAL(q1, interval.first, 0.1); @@ -953,11 +1036,14 @@ void CMultimodalPriorTest::testMarginalLikelihoodConfidenceInterval() { TMeanAccumulator error; for (std::size_t i = 0u; i < boost::size(percentages); ++i) { LOG_DEBUG(<< "** percentage = " << percentages[i] << " **"); - std::size_t i1 = static_cast(static_cast(samples.size()) * (50.0 - percentages[i] / 2.0) / 100.0 + 0.5); - std::size_t i2 = static_cast(static_cast(samples.size()) * (50.0 + percentages[i] / 2.0) / 100.0 + 0.5); + std::size_t i1 = static_cast( + static_cast(samples.size()) * (50.0 - percentages[i] / 2.0) / 100.0 + 0.5); + std::size_t i2 = static_cast( + static_cast(samples.size()) * (50.0 + percentages[i] / 2.0) / 100.0 + 0.5); double q1 = samples[i1]; double q2 = samples[std::min(i2, samples.size() - 1)]; - TDoubleDoublePr interval = filter.marginalLikelihoodConfidenceInterval(percentages[i]); + TDoubleDoublePr interval = + filter.marginalLikelihoodConfidenceInterval(percentages[i]); LOG_DEBUG(<< "[q1, q2] = [" << q1 << ", " << q2 << "]" << ", interval = " << core::CContainerPrinter::print(interval)); CPPUNIT_ASSERT_DOUBLES_EQUAL(q1, interval.first, std::max(0.1 * q1, 0.15)); @@ -978,18 +1064,16 @@ void CMultimodalPriorTest::testMarginalLikelihoodConfidenceInterval() { core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(state.str())); core::CRapidXmlStateRestoreTraverser traverser(parser); - maths::SDistributionRestoreParams params(maths_t::E_ContinuousData, - 0.0, - maths::MINIMUM_CLUSTER_SPLIT_FRACTION, - maths::MINIMUM_CLUSTER_SPLIT_COUNT, - maths::MINIMUM_CATEGORY_COUNT); + maths::SDistributionRestoreParams params( + maths_t::E_ContinuousData, 0.0, maths::MINIMUM_CLUSTER_SPLIT_FRACTION, + maths::MINIMUM_CLUSTER_SPLIT_COUNT, maths::MINIMUM_CATEGORY_COUNT); TPriorPtr prior; maths::CPriorStateSerialiser restorer; CPPUNIT_ASSERT(restorer(params, prior, traverser)); - TDoubleDoublePr median = - prior->marginalLikelihoodConfidenceInterval(0, maths::CConstantWeights::COUNT, maths::CConstantWeights::UNIT); - TDoubleDoublePr i90 = - prior->marginalLikelihoodConfidenceInterval(90, maths::CConstantWeights::COUNT, maths::CConstantWeights::UNIT); + TDoubleDoublePr median = prior->marginalLikelihoodConfidenceInterval( + 0, maths::CConstantWeights::COUNT, maths::CConstantWeights::UNIT); + TDoubleDoublePr i90 = prior->marginalLikelihoodConfidenceInterval( + 90, maths::CConstantWeights::COUNT, maths::CConstantWeights::UNIT); LOG_DEBUG(<< "median = " << maths::CBasicStatistics::mean(median)); LOG_DEBUG(<< "confidence interval = " << core::CContainerPrinter::print(i90)); @@ -1020,7 +1104,8 @@ void CMultimodalPriorTest::testSampleMarginalLikelihood() { using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; using TMeanVarAccumulator = maths::CBasicStatistics::SSampleMeanVar::TAccumulator; - using TMeanVarSkewAccumulator = maths::CBasicStatistics::SSampleMeanVarSkew::TAccumulator; + using TMeanVarSkewAccumulator = + maths::CBasicStatistics::SSampleMeanVarSkew::TAccumulator; const double eps = 1e-3; @@ -1070,31 +1155,35 @@ void CMultimodalPriorTest::testSampleMarginalLikelihood() { TMeanVarAccumulator sampledMoments; sampledMoments = std::for_each(sampled.begin(), sampled.end(), sampledMoments); - LOG_DEBUG(<< "expectedMean = " << filter.marginalLikelihoodMean() - << ", sampledMean = " << maths::CBasicStatistics::mean(sampledMoments)); - LOG_DEBUG(<< "expectedVariance = " << filter.marginalLikelihoodVariance() - << ", sampledVariance = " << maths::CBasicStatistics::variance(sampledMoments)); + LOG_DEBUG(<< "expectedMean = " << filter.marginalLikelihoodMean() << ", sampledMean = " + << maths::CBasicStatistics::mean(sampledMoments)); + LOG_DEBUG(<< "expectedVariance = " << filter.marginalLikelihoodVariance() << ", sampledVariance = " + << maths::CBasicStatistics::variance(sampledMoments)); - CPPUNIT_ASSERT_DOUBLES_EQUAL( - filter.marginalLikelihoodMean(), maths::CBasicStatistics::mean(sampledMoments), 0.005 * filter.marginalLikelihoodMean()); + CPPUNIT_ASSERT_DOUBLES_EQUAL(filter.marginalLikelihoodMean(), + maths::CBasicStatistics::mean(sampledMoments), + 0.005 * filter.marginalLikelihoodMean()); CPPUNIT_ASSERT_DOUBLES_EQUAL(filter.marginalLikelihoodVariance(), maths::CBasicStatistics::variance(sampledMoments), 0.2 * filter.marginalLikelihoodVariance()); - meanMeanError.add(std::fabs(filter.marginalLikelihoodMean() - maths::CBasicStatistics::mean(sampledMoments)) / + meanMeanError.add(std::fabs(filter.marginalLikelihoodMean() - + maths::CBasicStatistics::mean(sampledMoments)) / filter.marginalLikelihoodMean()); - meanVarError.add(std::fabs(filter.marginalLikelihoodVariance() - maths::CBasicStatistics::variance(sampledMoments)) / + meanVarError.add(std::fabs(filter.marginalLikelihoodVariance() - + maths::CBasicStatistics::variance(sampledMoments)) / filter.marginalLikelihoodVariance()); } std::sort(sampled.begin(), sampled.end()); for (std::size_t j = 1u; j < sampled.size(); ++j) { - double q = 100.0 * static_cast(j) / static_cast(sampled.size()); + double q = 100.0 * static_cast(j) / + static_cast(sampled.size()); double expectedQuantile; CPPUNIT_ASSERT(filter.marginalLikelihoodQuantileForTest(q, eps, expectedQuantile)); - LOG_DEBUG(<< "quantile = " << q << ", x_quantile = " << expectedQuantile << ", quantile range = [" << sampled[j - 1] << "," - << sampled[j] << "]"); + LOG_DEBUG(<< "quantile = " << q << ", x_quantile = " << expectedQuantile << ", quantile range = [" + << sampled[j - 1] << "," << sampled[j] << "]"); CPPUNIT_ASSERT(expectedQuantile >= 0.98 * sampled[j - 1]); CPPUNIT_ASSERT(expectedQuantile <= 1.02 * sampled[j]); @@ -1142,8 +1231,10 @@ void CMultimodalPriorTest::testCdf() { test::CRandomNumbers rng; - CGammaRateConjugate gamma(maths::CGammaRateConjugate::nonInformativePrior(maths_t::E_ContinuousData)); - CLogNormalMeanPrecConjugate logNormal(maths::CLogNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData)); + CGammaRateConjugate gamma( + maths::CGammaRateConjugate::nonInformativePrior(maths_t::E_ContinuousData)); + CLogNormalMeanPrecConjugate logNormal( + maths::CLogNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData)); COneOfNPrior::TPriorPtrVec priors; priors.push_back(COneOfNPrior::TPriorPtr(gamma.clone())); priors.push_back(COneOfNPrior::TPriorPtr(logNormal.clone())); @@ -1163,7 +1254,8 @@ void CMultimodalPriorTest::testCdf() { CPPUNIT_ASSERT(filter.minusLogJointCdf(TDouble1Vec(1, -1.0), lowerBound, upperBound)); double f = (lowerBound + upperBound) / 2.0; - CPPUNIT_ASSERT(filter.minusLogJointCdfComplement(TDouble1Vec(1, -1.0), lowerBound, upperBound)); + CPPUNIT_ASSERT(filter.minusLogJointCdfComplement(TDouble1Vec(1, -1.0), + lowerBound, upperBound)); double fComplement = (lowerBound + upperBound) / 2.0; LOG_DEBUG(<< "log(F(x)) = " << -f << ", log(1 - F(x)) = " << fComplement); CPPUNIT_ASSERT_DOUBLES_EQUAL(std::log(std::numeric_limits::min()), -f, 1e-8); @@ -1174,10 +1266,12 @@ void CMultimodalPriorTest::testCdf() { CPPUNIT_ASSERT(filter.minusLogJointCdf(TDouble1Vec(1, x), lowerBound, upperBound)); f = (lowerBound + upperBound) / 2.0; - CPPUNIT_ASSERT(filter.minusLogJointCdfComplement(TDouble1Vec(1, x), lowerBound, upperBound)); + CPPUNIT_ASSERT(filter.minusLogJointCdfComplement(TDouble1Vec(1, x), + lowerBound, upperBound)); fComplement = (lowerBound + upperBound) / 2.0; - LOG_DEBUG(<< "log(F(x)) = " << (f == 0.0 ? f : -f) << ", log(1 - F(x)) = " << (fComplement == 0.0 ? fComplement : -fComplement)); + LOG_DEBUG(<< "log(F(x)) = " << (f == 0.0 ? f : -f) << ", log(1 - F(x)) = " + << (fComplement == 0.0 ? fComplement : -fComplement)); CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, std::exp(-f) + std::exp(-fComplement), 1e-8); } } @@ -1199,9 +1293,11 @@ void CMultimodalPriorTest::testProbabilityOfLessLikelySamples() { double variance1 = 1.0, variance2 = 1.0; TDoubleVec samples1; - rng.generateNormalSamples(mean1, variance1, static_cast(10000.0 * weight1), samples1); + rng.generateNormalSamples(mean1, variance1, + static_cast(10000.0 * weight1), samples1); TDoubleVec samples2; - rng.generateNormalSamples(mean2, variance2, static_cast(10000.0 * weight2), samples2); + rng.generateNormalSamples(mean2, variance2, + static_cast(10000.0 * weight2), samples2); TDoubleVec samples; samples.insert(samples.end(), samples1.begin(), samples1.end()); samples.insert(samples.end(), samples2.begin(), samples2.end()); @@ -1230,17 +1326,21 @@ void CMultimodalPriorTest::testProbabilityOfLessLikelySamples() { double lowerBound; double upperBound; - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, TDouble1Vec(1, x[i]), lowerBound, upperBound); + filter.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, TDouble1Vec(1, x[i]), lowerBound, upperBound); LOG_DEBUG(<< "lowerBound = " << lowerBound << ", upperBound = " << upperBound - << ", expectedProbability = " << expectedProbability << ", deviation = " << deviation); + << ", expectedProbability = " << expectedProbability + << ", deviation = " << deviation); double probability = (lowerBound + upperBound) / 2.0; - error += - probability < expectedProbability - 2.0 * deviation - ? (expectedProbability - 2.0 * deviation) - probability - : (probability > expectedProbability + 2.0 * deviation ? probability - (expectedProbability + 2.0 * deviation) : 0.0); - - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedProbability, probability, std::max(3.0 * deviation, 3e-5)); + error += probability < expectedProbability - 2.0 * deviation + ? (expectedProbability - 2.0 * deviation) - probability + : (probability > expectedProbability + 2.0 * deviation + ? probability - (expectedProbability + 2.0 * deviation) + : 0.0); + + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedProbability, probability, + std::max(3.0 * deviation, 3e-5)); } error /= static_cast(boost::size(x)); @@ -1249,29 +1349,20 @@ void CMultimodalPriorTest::testProbabilityOfLessLikelySamples() { double lb, ub; maths_t::ETail tail; - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - maths_t::TWeightStyleVec(1, maths_t::E_SampleCountVarianceScaleWeight), - TDouble1Vec(1, 49.0), - TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0)), - lb, - ub, - tail); + filter.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, + maths_t::TWeightStyleVec(1, maths_t::E_SampleCountVarianceScaleWeight), + TDouble1Vec(1, 49.0), TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0)), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - maths_t::TWeightStyleVec(1, maths_t::E_SampleCountVarianceScaleWeight), - TDouble1Vec(1, 54.0), - TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0)), - lb, - ub, - tail); + filter.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, + maths_t::TWeightStyleVec(1, maths_t::E_SampleCountVarianceScaleWeight), + TDouble1Vec(1, 54.0), TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0)), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - maths_t::TWeightStyleVec(1, maths_t::E_SampleCountVarianceScaleWeight), - TDouble1Vec(1, 59.0), - TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0)), - lb, - ub, - tail); + filter.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, + maths_t::TWeightStyleVec(1, maths_t::E_SampleCountVarianceScaleWeight), + TDouble1Vec(1, 59.0), TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0)), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } { @@ -1283,17 +1374,23 @@ void CMultimodalPriorTest::testProbabilityOfLessLikelySamples() { samples.reserve(20000u); for (std::size_t i = 0u; i < boost::size(weights); ++i) { TDoubleVec modeSamples; - rng.generateLogNormalSamples(locations[i], squareScales[i], static_cast(20000.0 * weights[i]), modeSamples); + rng.generateLogNormalSamples( + locations[i], squareScales[i], + static_cast(20000.0 * weights[i]), modeSamples); samples.insert(samples.end(), modeSamples.begin(), modeSamples.end()); } rng.random_shuffle(samples.begin(), samples.end()); TDoubleVec mixtureWeights(boost::begin(weights), boost::end(weights)); TLogNormalVec modes; - modes.push_back(boost::math::lognormal_distribution<>(locations[0], std::sqrt(squareScales[0]))); - modes.push_back(boost::math::lognormal_distribution<>(locations[1], std::sqrt(squareScales[1]))); - modes.push_back(boost::math::lognormal_distribution<>(locations[2], std::sqrt(squareScales[2]))); - maths::CMixtureDistribution> mixture(mixtureWeights, modes); + modes.push_back(boost::math::lognormal_distribution<>( + locations[0], std::sqrt(squareScales[0]))); + modes.push_back(boost::math::lognormal_distribution<>( + locations[1], std::sqrt(squareScales[1]))); + modes.push_back(boost::math::lognormal_distribution<>( + locations[2], std::sqrt(squareScales[2]))); + maths::CMixtureDistribution> mixture( + mixtureWeights, modes); CMultimodalPrior filter(makePrior()); filter.addSamples(samples); @@ -1310,18 +1407,22 @@ void CMultimodalPriorTest::testProbabilityOfLessLikelySamples() { double lowerBound; double upperBound; - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, TDouble1Vec(1, x[i]), lowerBound, upperBound); + filter.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, TDouble1Vec(1, x[i]), lowerBound, upperBound); LOG_DEBUG(<< "lowerBound = " << lowerBound << ", upperBound = " << upperBound - << ", expectedProbability = " << expectedProbability << ", deviation = " << deviation); + << ", expectedProbability = " << expectedProbability + << ", deviation = " << deviation); double probability = (lowerBound + upperBound) / 2.0; - error += - probability < expectedProbability - 2.0 * deviation - ? (expectedProbability - 2.0 * deviation) - probability - : (probability > expectedProbability + 2.0 * deviation ? probability - (expectedProbability + 2.0 * deviation) : 0.0); + error += probability < expectedProbability - 2.0 * deviation + ? (expectedProbability - 2.0 * deviation) - probability + : (probability > expectedProbability + 2.0 * deviation + ? probability - (expectedProbability + 2.0 * deviation) + : 0.0); CPPUNIT_ASSERT_DOUBLES_EQUAL( - expectedProbability, probability, std::min(0.2 * expectedProbability + std::max(3.0 * deviation, 1e-10), 0.06)); + expectedProbability, probability, + std::min(0.2 * expectedProbability + std::max(3.0 * deviation, 1e-10), 0.06)); } error /= static_cast(boost::size(x)); @@ -1337,7 +1438,9 @@ void CMultimodalPriorTest::testProbabilityOfLessLikelySamples() { samples.reserve(20000u); for (std::size_t i = 0u; i < boost::size(weights); ++i) { TDoubleVec modeSamples; - rng.generateGammaSamples(shapes[i], scales[i], static_cast(20000.0 * weights[i]), modeSamples); + rng.generateGammaSamples(shapes[i], scales[i], + static_cast(20000.0 * weights[i]), + modeSamples); samples.insert(samples.end(), modeSamples.begin(), modeSamples.end()); } rng.random_shuffle(samples.begin(), samples.end()); @@ -1346,13 +1449,15 @@ void CMultimodalPriorTest::testProbabilityOfLessLikelySamples() { TGammaVec modes; modes.push_back(boost::math::gamma_distribution<>(shapes[0], scales[0])); modes.push_back(boost::math::gamma_distribution<>(shapes[1], scales[1])); - maths::CMixtureDistribution> mixture(mixtureWeights, modes); + maths::CMixtureDistribution> mixture( + mixtureWeights, modes); CMultimodalPrior filter(makePrior()); filter.addSamples(samples); LOG_DEBUG(<< "# modes = " << filter.numberModes()); - double x[] = {0.5, 1.5, 3.0, 35.0, 100.0, 320.0, 340.0, 360.0, 380.0, 410.0}; + double x[] = {0.5, 1.5, 3.0, 35.0, 100.0, + 320.0, 340.0, 360.0, 380.0, 410.0}; double error = 0.0; @@ -1363,17 +1468,22 @@ void CMultimodalPriorTest::testProbabilityOfLessLikelySamples() { double lowerBound; double upperBound; - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, TDouble1Vec(1, x[i]), lowerBound, upperBound); + filter.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, TDouble1Vec(1, x[i]), lowerBound, upperBound); LOG_DEBUG(<< "lowerBound = " << lowerBound << ", upperBound = " << upperBound - << ", expectedProbability = " << expectedProbability << ", deviation = " << deviation); + << ", expectedProbability = " << expectedProbability + << ", deviation = " << deviation); double probability = (lowerBound + upperBound) / 2.0; - error += - probability < expectedProbability - 2.0 * deviation - ? (expectedProbability - 2.0 * deviation) - probability - : (probability > expectedProbability + 2.0 * deviation ? probability - (expectedProbability + 2.0 * deviation) : 0.0); - - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedProbability, probability, 0.18 * expectedProbability + std::max(2.5 * deviation, 1e-3)); + error += probability < expectedProbability - 2.0 * deviation + ? (expectedProbability - 2.0 * deviation) - probability + : (probability > expectedProbability + 2.0 * deviation + ? probability - (expectedProbability + 2.0 * deviation) + : 0.0); + + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedProbability, probability, + 0.18 * expectedProbability + + std::max(2.5 * deviation, 1e-3)); } error /= static_cast(boost::size(x)); @@ -1391,71 +1501,133 @@ void CMultimodalPriorTest::testLargeValues() { // well conditioned for very large values. TDoubleVec values{ - 7.324121e+10, 7.251927e+10, 7.152208e+10, 7.089604e+10, 7.018831e+10, 6.94266e+10, 6.890659e+10, 6.837292e+10, - 6.794372e+10, 6.793463e+10, 6.785385e+10, 6.773589e+10, 6.787609e+10, 6.760049e+10, 6.709596e+10, 6.701824e+10, - 6.672568e+10, 6.617609e+10, 6.620431e+10, 6.627069e+10, 6.617393e+10, 6.633176e+10, 6.600326e+10, 6.530363e+10, - 6.494482e+10, 6.433443e+10, 6.362233e+10, 6.317814e+10, 6.296127e+10, 6.272491e+10, 6.243567e+10, 6.19567e+10, - 6.13123e+10, 6.150823e+10, 6.160438e+10, 6.106396e+10, 6.128276e+10, 6.13318e+10, 6.161243e+10, 6.182719e+10, - 6.177156e+10, 6.174539e+10, 6.216147e+10, 6.272091e+10, 6.286637e+10, 6.310137e+10, 6.315882e+10, 6.312109e+10, - 6.312296e+10, 6.312432e+10, 6.328676e+10, 6.37708e+10, 6.421867e+10, 6.490675e+10, 6.547625e+10, 6.593425e+10, - 6.67186e+10, 6.755033e+10, 6.754501e+10, 6.730381e+10, 6.76163e+10, 6.761007e+10, 6.745505e+10, 6.777796e+10, - 6.783472e+10, 6.779558e+10, 6.787643e+10, 6.800003e+10, 6.840413e+10, 6.856255e+10, 6.939239e+10, 6.907512e+10, - 6.914988e+10, 6.901868e+10, 6.884531e+10, 6.934499e+10, 6.955862e+10, 6.938019e+10, 6.942022e+10, 6.950912e+10, - 6.979618e+10, 7.064871e+10, 7.152501e+10, 7.178129e+10, 7.2239e+10, 7.257321e+10, 7.28913e+10, 7.365193e+10, - 7.432521e+10, 7.475098e+10, 7.553025e+10, 7.654561e+10, 7.698032e+10, 7.768267e+10, 7.826669e+10, 7.866854e+10, - 7.924608e+10, 7.998602e+10, 8.038091e+10, 8.094976e+10, 8.145126e+10, 8.132123e+10, 8.142747e+10, 8.148276e+10, - 8.118588e+10, 8.122279e+10, 8.078815e+10, 8.008936e+10, 7.991103e+10, 7.981722e+10, 7.932372e+10, 7.900164e+10, - 7.881053e+10, 7.837734e+10, 7.847101e+10, 7.816575e+10, 7.789224e+10, 7.803634e+10, 7.827226e+10, 7.812112e+10, - 7.814848e+10, 7.812407e+10, 7.779805e+10, 7.783394e+10, 7.768365e+10, 7.74484e+10, 7.740301e+10, 7.725512e+10, - 7.666682e+10, 7.635862e+10, 7.592468e+10, 7.539656e+10, 7.529974e+10, 7.501661e+10, 7.442706e+10, 7.406878e+10, - 7.347894e+10, 7.268775e+10, 7.23729e+10, 7.171337e+10, 7.146626e+10, 7.130693e+10, 7.066356e+10, 6.977915e+10, - 6.915126e+10, 6.830462e+10, 6.73021e+10, 6.67686e+10, 6.600806e+10, 6.504958e+10, 6.427045e+10, 6.35093e+10, - 6.277891e+10, 6.258429e+10, 6.184866e+10, 6.114754e+10, 6.093035e+10, 6.063859e+10, 5.999596e+10, 5.952608e+10, - 5.927059e+10, 5.831014e+10, 5.763428e+10, 5.77239e+10, 5.82414e+10, 5.911797e+10, 5.987076e+10, 5.976584e+10, - 6.017487e+10, 6.023042e+10, 6.029144e+10, 6.068466e+10, 6.139924e+10, 6.208432e+10, 6.259237e+10, 6.300856e+10, - 6.342197e+10, 6.423638e+10, 6.494938e+10, 6.478293e+10, 6.444705e+10, 6.432593e+10, 6.437474e+10, 6.447832e+10, - 6.450247e+10, 6.398122e+10, 6.399681e+10, 6.406744e+10, 6.404553e+10, 6.417746e+10, 6.39819e+10, 6.389218e+10, - 6.453242e+10, 6.491168e+10, 6.493824e+10, 6.524365e+10, 6.537463e+10, 6.543864e+10, 6.583769e+10, 6.596521e+10, - 6.641129e+10, 6.718787e+10, 6.741177e+10, 6.776819e+10, 6.786579e+10, 6.783788e+10, 6.790788e+10, 6.77233e+10, - 6.738099e+10, 6.718351e+10, 6.739131e+10, 6.752051e+10, 6.747344e+10, 6.757187e+10, 6.739908e+10, 6.702725e+10, - 6.70474e+10, 6.708783e+10, 6.72989e+10, 6.75298e+10, 6.727323e+10, 6.677787e+10, 6.686342e+10, 6.687026e+10, - 6.714555e+10, 6.750766e+10, 6.807156e+10, 6.847816e+10, 6.915895e+10, 6.958225e+10, 6.970934e+10, 6.972807e+10, - 6.973312e+10, 6.970858e+10, 6.962325e+10, 6.968693e+10, 6.965446e+10, 6.983768e+10, 6.974386e+10, 6.992195e+10, - 7.010707e+10, 7.004337e+10, 7.006336e+10, 7.06312e+10, 7.078169e+10, 7.080609e+10, 7.107845e+10, 7.084754e+10, - 7.032667e+10, 7.052029e+10, 7.031464e+10, 7.006906e+10, 7.018558e+10, 7.022278e+10, 7.012379e+10, 7.043974e+10, - 7.016036e+10, 6.975801e+10, 6.95197e+10, 6.92444e+10, 6.85828e+10, 6.808828e+10, 6.74055e+10, 6.663602e+10, - 6.588224e+10, 6.52747e+10, 6.412303e+10, 6.315978e+10, 6.268569e+10, 6.219346e+10, 6.177174e+10, 6.101807e+10, - 6.018369e+10, 5.97554e+10, 5.924427e+10, 5.867325e+10, 5.814079e+10, 5.745633e+10, 5.641881e+10, 5.608709e+10, - 5.529503e+10, 5.450575e+10, 5.383054e+10, 5.297568e+10, 5.210389e+10, 5.139513e+10, 5.03026e+10, 4.922761e+10, - 4.839502e+10, 4.739353e+10, 4.605013e+10, 4.486422e+10, 4.369101e+10, 4.241115e+10, 4.128026e+10, 4.025775e+10, - 3.915851e+10, 3.819004e+10, 3.700971e+10, 3.581475e+10, 3.498126e+10, 3.384422e+10, 3.224959e+10, 3.108637e+10, - 2.997983e+10, 2.86439e+10, 2.774108e+10, 2.682793e+10, 2.590098e+10, 2.500665e+10, 2.368987e+10, 2.24582e+10, - 2.158596e+10, 2.062636e+10, 1.942922e+10, 1.873734e+10, 1.823214e+10, 1.726518e+10, 1.665115e+10, 1.582729e+10, - 1.477715e+10, 1.406265e+10, 1.285904e+10, 1.145722e+10, 1.038312e+10, 9.181713e+09, 8.141138e+09, 7.45358e+09, - 6.59996e+09, 5.72857e+09, 5.136189e+09, 4.51829e+09, 3.649536e+09, 2.990132e+09, 2.29392e+09, 1.390141e+09, - 5.611192e+08, -1.62469e+08, -1.041465e+09, -1.804217e+09, -2.923116e+09, -4.205691e+09, -5.09832e+09, -6.12155e+09, - -7.10503e+09, -7.957297e+09, -9.107372e+09, -1.039097e+10, -1.133152e+10, -1.221205e+10, -1.318018e+10, -1.402195e+10, - -1.512e+10, -1.634369e+10, -1.710999e+10, -1.786548e+10, -1.866482e+10, -1.938912e+10, -2.039964e+10, -2.160603e+10, - -2.259855e+10, -2.353314e+10, -2.449689e+10, -2.52005e+10, -2.627104e+10, -2.730019e+10, -2.815777e+10, -2.920027e+10, - -3.03507e+10, -3.126021e+10, -3.212383e+10, -3.329089e+10, -3.402306e+10, -3.475361e+10, -3.572698e+10, -3.644467e+10, - -3.721484e+10, -3.800023e+10, -3.865459e+10, -3.918282e+10, -3.983764e+10, -4.051065e+10, -4.119051e+10, -4.202436e+10, - -4.24868e+10, -4.340278e+10, -4.418258e+10, -4.490206e+10, -4.587365e+10, -4.697342e+10, -4.778222e+10, -4.882614e+10, - -4.984197e+10, -5.051089e+10, -5.143766e+10, -5.252824e+10, -5.353136e+10, -5.436329e+10, -5.533555e+10, -5.623246e+10, - -5.689744e+10, -5.798439e+10, -5.882786e+10, -5.96284e+10, -6.061507e+10, -6.145417e+10, -6.235327e+10, -6.335978e+10, - -6.405788e+10, -6.496648e+10, -6.600807e+10, -6.686964e+10, -6.782611e+10, -6.890904e+10, -6.941638e+10, -7.012465e+10, - -7.113145e+10, -7.186233e+10, -7.2293e+10, -7.313894e+10, -7.394114e+10, -7.475566e+10, -7.572029e+10, -7.660066e+10, - -7.738602e+10, -7.846013e+10, -7.921084e+10, -7.986093e+10, -8.07113e+10, -8.159104e+10, -8.243174e+10, -8.305353e+10, - -8.346367e+10, -8.402575e+10, -8.482895e+10, -8.536747e+10, -8.581526e+10, -8.640365e+10, -8.683093e+10, -8.724777e+10, - -8.746026e+10, -8.760338e+10, -8.809235e+10, -8.870936e+10, -8.905536e+10, -8.953669e+10, -9.031665e+10, -9.090067e+10, - -9.135409e+10, -9.185499e+10, -9.225697e+10, -9.253896e+10, -9.314785e+10, -9.354807e+10, -9.391591e+10, -9.436751e+10, - -9.471133e+10, -9.517393e+10, -9.587184e+10, -9.619209e+10, -9.607482e+10, -9.593427e+10, -9.604743e+10, -9.619758e+10, - -9.62449e+10, -9.61466e+10, -9.636941e+10, -9.692289e+10, -9.735416e+10, -9.774056e+10, -9.828883e+10, -9.859253e+10, + 7.324121e+10, 7.251927e+10, 7.152208e+10, 7.089604e+10, + 7.018831e+10, 6.94266e+10, 6.890659e+10, 6.837292e+10, + 6.794372e+10, 6.793463e+10, 6.785385e+10, 6.773589e+10, + 6.787609e+10, 6.760049e+10, 6.709596e+10, 6.701824e+10, + 6.672568e+10, 6.617609e+10, 6.620431e+10, 6.627069e+10, + 6.617393e+10, 6.633176e+10, 6.600326e+10, 6.530363e+10, + 6.494482e+10, 6.433443e+10, 6.362233e+10, 6.317814e+10, + 6.296127e+10, 6.272491e+10, 6.243567e+10, 6.19567e+10, + 6.13123e+10, 6.150823e+10, 6.160438e+10, 6.106396e+10, + 6.128276e+10, 6.13318e+10, 6.161243e+10, 6.182719e+10, + 6.177156e+10, 6.174539e+10, 6.216147e+10, 6.272091e+10, + 6.286637e+10, 6.310137e+10, 6.315882e+10, 6.312109e+10, + 6.312296e+10, 6.312432e+10, 6.328676e+10, 6.37708e+10, + 6.421867e+10, 6.490675e+10, 6.547625e+10, 6.593425e+10, + 6.67186e+10, 6.755033e+10, 6.754501e+10, 6.730381e+10, + 6.76163e+10, 6.761007e+10, 6.745505e+10, 6.777796e+10, + 6.783472e+10, 6.779558e+10, 6.787643e+10, 6.800003e+10, + 6.840413e+10, 6.856255e+10, 6.939239e+10, 6.907512e+10, + 6.914988e+10, 6.901868e+10, 6.884531e+10, 6.934499e+10, + 6.955862e+10, 6.938019e+10, 6.942022e+10, 6.950912e+10, + 6.979618e+10, 7.064871e+10, 7.152501e+10, 7.178129e+10, + 7.2239e+10, 7.257321e+10, 7.28913e+10, 7.365193e+10, + 7.432521e+10, 7.475098e+10, 7.553025e+10, 7.654561e+10, + 7.698032e+10, 7.768267e+10, 7.826669e+10, 7.866854e+10, + 7.924608e+10, 7.998602e+10, 8.038091e+10, 8.094976e+10, + 8.145126e+10, 8.132123e+10, 8.142747e+10, 8.148276e+10, + 8.118588e+10, 8.122279e+10, 8.078815e+10, 8.008936e+10, + 7.991103e+10, 7.981722e+10, 7.932372e+10, 7.900164e+10, + 7.881053e+10, 7.837734e+10, 7.847101e+10, 7.816575e+10, + 7.789224e+10, 7.803634e+10, 7.827226e+10, 7.812112e+10, + 7.814848e+10, 7.812407e+10, 7.779805e+10, 7.783394e+10, + 7.768365e+10, 7.74484e+10, 7.740301e+10, 7.725512e+10, + 7.666682e+10, 7.635862e+10, 7.592468e+10, 7.539656e+10, + 7.529974e+10, 7.501661e+10, 7.442706e+10, 7.406878e+10, + 7.347894e+10, 7.268775e+10, 7.23729e+10, 7.171337e+10, + 7.146626e+10, 7.130693e+10, 7.066356e+10, 6.977915e+10, + 6.915126e+10, 6.830462e+10, 6.73021e+10, 6.67686e+10, + 6.600806e+10, 6.504958e+10, 6.427045e+10, 6.35093e+10, + 6.277891e+10, 6.258429e+10, 6.184866e+10, 6.114754e+10, + 6.093035e+10, 6.063859e+10, 5.999596e+10, 5.952608e+10, + 5.927059e+10, 5.831014e+10, 5.763428e+10, 5.77239e+10, + 5.82414e+10, 5.911797e+10, 5.987076e+10, 5.976584e+10, + 6.017487e+10, 6.023042e+10, 6.029144e+10, 6.068466e+10, + 6.139924e+10, 6.208432e+10, 6.259237e+10, 6.300856e+10, + 6.342197e+10, 6.423638e+10, 6.494938e+10, 6.478293e+10, + 6.444705e+10, 6.432593e+10, 6.437474e+10, 6.447832e+10, + 6.450247e+10, 6.398122e+10, 6.399681e+10, 6.406744e+10, + 6.404553e+10, 6.417746e+10, 6.39819e+10, 6.389218e+10, + 6.453242e+10, 6.491168e+10, 6.493824e+10, 6.524365e+10, + 6.537463e+10, 6.543864e+10, 6.583769e+10, 6.596521e+10, + 6.641129e+10, 6.718787e+10, 6.741177e+10, 6.776819e+10, + 6.786579e+10, 6.783788e+10, 6.790788e+10, 6.77233e+10, + 6.738099e+10, 6.718351e+10, 6.739131e+10, 6.752051e+10, + 6.747344e+10, 6.757187e+10, 6.739908e+10, 6.702725e+10, + 6.70474e+10, 6.708783e+10, 6.72989e+10, 6.75298e+10, + 6.727323e+10, 6.677787e+10, 6.686342e+10, 6.687026e+10, + 6.714555e+10, 6.750766e+10, 6.807156e+10, 6.847816e+10, + 6.915895e+10, 6.958225e+10, 6.970934e+10, 6.972807e+10, + 6.973312e+10, 6.970858e+10, 6.962325e+10, 6.968693e+10, + 6.965446e+10, 6.983768e+10, 6.974386e+10, 6.992195e+10, + 7.010707e+10, 7.004337e+10, 7.006336e+10, 7.06312e+10, + 7.078169e+10, 7.080609e+10, 7.107845e+10, 7.084754e+10, + 7.032667e+10, 7.052029e+10, 7.031464e+10, 7.006906e+10, + 7.018558e+10, 7.022278e+10, 7.012379e+10, 7.043974e+10, + 7.016036e+10, 6.975801e+10, 6.95197e+10, 6.92444e+10, + 6.85828e+10, 6.808828e+10, 6.74055e+10, 6.663602e+10, + 6.588224e+10, 6.52747e+10, 6.412303e+10, 6.315978e+10, + 6.268569e+10, 6.219346e+10, 6.177174e+10, 6.101807e+10, + 6.018369e+10, 5.97554e+10, 5.924427e+10, 5.867325e+10, + 5.814079e+10, 5.745633e+10, 5.641881e+10, 5.608709e+10, + 5.529503e+10, 5.450575e+10, 5.383054e+10, 5.297568e+10, + 5.210389e+10, 5.139513e+10, 5.03026e+10, 4.922761e+10, + 4.839502e+10, 4.739353e+10, 4.605013e+10, 4.486422e+10, + 4.369101e+10, 4.241115e+10, 4.128026e+10, 4.025775e+10, + 3.915851e+10, 3.819004e+10, 3.700971e+10, 3.581475e+10, + 3.498126e+10, 3.384422e+10, 3.224959e+10, 3.108637e+10, + 2.997983e+10, 2.86439e+10, 2.774108e+10, 2.682793e+10, + 2.590098e+10, 2.500665e+10, 2.368987e+10, 2.24582e+10, + 2.158596e+10, 2.062636e+10, 1.942922e+10, 1.873734e+10, + 1.823214e+10, 1.726518e+10, 1.665115e+10, 1.582729e+10, + 1.477715e+10, 1.406265e+10, 1.285904e+10, 1.145722e+10, + 1.038312e+10, 9.181713e+09, 8.141138e+09, 7.45358e+09, + 6.59996e+09, 5.72857e+09, 5.136189e+09, 4.51829e+09, + 3.649536e+09, 2.990132e+09, 2.29392e+09, 1.390141e+09, + 5.611192e+08, -1.62469e+08, -1.041465e+09, -1.804217e+09, + -2.923116e+09, -4.205691e+09, -5.09832e+09, -6.12155e+09, + -7.10503e+09, -7.957297e+09, -9.107372e+09, -1.039097e+10, + -1.133152e+10, -1.221205e+10, -1.318018e+10, -1.402195e+10, + -1.512e+10, -1.634369e+10, -1.710999e+10, -1.786548e+10, + -1.866482e+10, -1.938912e+10, -2.039964e+10, -2.160603e+10, + -2.259855e+10, -2.353314e+10, -2.449689e+10, -2.52005e+10, + -2.627104e+10, -2.730019e+10, -2.815777e+10, -2.920027e+10, + -3.03507e+10, -3.126021e+10, -3.212383e+10, -3.329089e+10, + -3.402306e+10, -3.475361e+10, -3.572698e+10, -3.644467e+10, + -3.721484e+10, -3.800023e+10, -3.865459e+10, -3.918282e+10, + -3.983764e+10, -4.051065e+10, -4.119051e+10, -4.202436e+10, + -4.24868e+10, -4.340278e+10, -4.418258e+10, -4.490206e+10, + -4.587365e+10, -4.697342e+10, -4.778222e+10, -4.882614e+10, + -4.984197e+10, -5.051089e+10, -5.143766e+10, -5.252824e+10, + -5.353136e+10, -5.436329e+10, -5.533555e+10, -5.623246e+10, + -5.689744e+10, -5.798439e+10, -5.882786e+10, -5.96284e+10, + -6.061507e+10, -6.145417e+10, -6.235327e+10, -6.335978e+10, + -6.405788e+10, -6.496648e+10, -6.600807e+10, -6.686964e+10, + -6.782611e+10, -6.890904e+10, -6.941638e+10, -7.012465e+10, + -7.113145e+10, -7.186233e+10, -7.2293e+10, -7.313894e+10, + -7.394114e+10, -7.475566e+10, -7.572029e+10, -7.660066e+10, + -7.738602e+10, -7.846013e+10, -7.921084e+10, -7.986093e+10, + -8.07113e+10, -8.159104e+10, -8.243174e+10, -8.305353e+10, + -8.346367e+10, -8.402575e+10, -8.482895e+10, -8.536747e+10, + -8.581526e+10, -8.640365e+10, -8.683093e+10, -8.724777e+10, + -8.746026e+10, -8.760338e+10, -8.809235e+10, -8.870936e+10, + -8.905536e+10, -8.953669e+10, -9.031665e+10, -9.090067e+10, + -9.135409e+10, -9.185499e+10, -9.225697e+10, -9.253896e+10, + -9.314785e+10, -9.354807e+10, -9.391591e+10, -9.436751e+10, + -9.471133e+10, -9.517393e+10, -9.587184e+10, -9.619209e+10, + -9.607482e+10, -9.593427e+10, -9.604743e+10, -9.619758e+10, + -9.62449e+10, -9.61466e+10, -9.636941e+10, -9.692289e+10, + -9.735416e+10, -9.774056e+10, -9.828883e+10, -9.859253e+10, -9.888183e+10, -9.95351e+10, -1.001142e+11}; - maths::CGammaRateConjugate gammaPrior = maths::CGammaRateConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.2, 0.001); - maths::CNormalMeanPrecConjugate normalPrior = maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.001); + maths::CGammaRateConjugate gammaPrior = maths::CGammaRateConjugate::nonInformativePrior( + maths_t::E_ContinuousData, 0.2, 0.001); + maths::CNormalMeanPrecConjugate normalPrior = + maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.001); maths::CLogNormalMeanPrecConjugate logNormalPrior = - maths::CLogNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.2, 0.001); + maths::CLogNormalMeanPrecConjugate::nonInformativePrior( + maths_t::E_ContinuousData, 0.2, 0.001); maths::COneOfNPrior::TPriorPtrVec modePriors; modePriors.reserve(3u); @@ -1464,15 +1636,18 @@ void CMultimodalPriorTest::testLargeValues() { modePriors.push_back(TPriorPtr(normalPrior.clone())); maths::COneOfNPrior modePrior(modePriors, maths_t::E_ContinuousData, 0.001); maths::CXMeansOnline1d clusterer( - maths_t::E_ContinuousData, maths::CAvailableModeDistributions::ALL, maths_t::E_ClustersFractionWeight, 0.001, 0.05, 12, 0.8 / 3.0); - maths::CMultimodalPrior multimodalPrior(maths_t::E_ContinuousData, clusterer, modePrior, 0.001); + maths_t::E_ContinuousData, maths::CAvailableModeDistributions::ALL, + maths_t::E_ClustersFractionWeight, 0.001, 0.05, 12, 0.8 / 3.0); + maths::CMultimodalPrior multimodalPrior(maths_t::E_ContinuousData, + clusterer, modePrior, 0.001); for (auto value : values) { - multimodalPrior.addSamples(maths::CConstantWeights::COUNT, TDouble1Vec(1, value), TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0 / 3.0))); + multimodalPrior.addSamples(maths::CConstantWeights::COUNT, TDouble1Vec(1, value), + TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0 / 3.0))); if (!multimodalPrior.isNonInformative()) { - TDoubleDoublePr interval = - multimodalPrior.marginalLikelihoodConfidenceInterval(95.0, maths::CConstantWeights::COUNT, maths::CConstantWeights::UNIT); + TDoubleDoublePr interval = multimodalPrior.marginalLikelihoodConfidenceInterval( + 95.0, maths::CConstantWeights::COUNT, maths::CConstantWeights::UNIT); if (interval.second - interval.first >= 3e11) { LOG_DEBUG(<< "interval = " << interval.second - interval.first); LOG_DEBUG(<< multimodalPrior.print()); @@ -1533,9 +1708,11 @@ void CMultimodalPriorTest::testSeasonalVarianceScale() { double points[] = {0.5, 4.0, 12.0, 20.0, 40.0, 50.0, 60.0}; double unscaledExpectationVariance; - filter.expectation(CVarianceKernel(filter.marginalLikelihoodMean()), 50, unscaledExpectationVariance); + filter.expectation(CVarianceKernel(filter.marginalLikelihoodMean()), 50, + unscaledExpectationVariance); LOG_DEBUG(<< "unscaledExpectationVariance = " << unscaledExpectationVariance); - CPPUNIT_ASSERT_DOUBLES_EQUAL(v, unscaledExpectationVariance, 1e-2 * unscaledExpectationVariance); + CPPUNIT_ASSERT_DOUBLES_EQUAL(v, unscaledExpectationVariance, + 1e-2 * unscaledExpectationVariance); for (std::size_t i = 0u; i < boost::size(varianceScales); ++i) { double vs = varianceScales[i]; @@ -1550,12 +1727,14 @@ void CMultimodalPriorTest::testSeasonalVarianceScale() { LOG_DEBUG(<< "sv = " << filter.marginalLikelihoodVariance(weightStyle, weight)); double expectationVariance; - filter.expectation(CVarianceKernel(filter.marginalLikelihoodMean()), 50, expectationVariance, weightStyle, weight); + filter.expectation(CVarianceKernel(filter.marginalLikelihoodMean()), + 50, expectationVariance, weightStyle, weight); LOG_DEBUG(<< "expectationVariance = " << expectationVariance); - CPPUNIT_ASSERT_DOUBLES_EQUAL(vs * unscaledExpectationVariance, expectationVariance, 1e-3 * vs * unscaledExpectationVariance); - CPPUNIT_ASSERT_DOUBLES_EQUAL(filter.marginalLikelihoodVariance(weightStyle, weight), - expectationVariance, - 1e-3 * filter.marginalLikelihoodVariance(weightStyle, weight)); + CPPUNIT_ASSERT_DOUBLES_EQUAL(vs * unscaledExpectationVariance, expectationVariance, + 1e-3 * vs * unscaledExpectationVariance); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + filter.marginalLikelihoodVariance(weightStyle, weight), expectationVariance, + 1e-3 * filter.marginalLikelihoodVariance(weightStyle, weight)); TDouble1Vec sample(1, 0.0); for (std::size_t j = 0u; j < boost::size(points); ++j) { @@ -1569,9 +1748,10 @@ void CMultimodalPriorTest::testSeasonalVarianceScale() { double FxPlusEps = std::exp(-(lb + ub) / 2.0); filter.minusLogJointCdf(weightStyle, xMinusEps, weights, lb, ub); double FxMinusEps = std::exp(-(lb + ub) / 2.0); - LOG_DEBUG(<< "x = " << points[j] << ", log(f(x)) = " << fx - << ", log(dF/dx)) = " << std::log((FxPlusEps - FxMinusEps) / 2e-3)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(fx, std::log((FxPlusEps - FxMinusEps) / 2e-3), 0.05 * std::fabs(fx)); + LOG_DEBUG(<< "x = " << points[j] << ", log(f(x)) = " << fx << ", log(dF/dx)) = " + << std::log((FxPlusEps - FxMinusEps) / 2e-3)); + CPPUNIT_ASSERT_DOUBLES_EQUAL(fx, std::log((FxPlusEps - FxMinusEps) / 2e-3), + 0.05 * std::fabs(fx)); sample[0] = m + (points[j] - m) / std::sqrt(vs); weights[0][0] = 1.0; @@ -1579,14 +1759,17 @@ void CMultimodalPriorTest::testSeasonalVarianceScale() { double expectedUpperBound; maths_t::ETail expectedTail; filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, sample, weights, expectedLowerBound, expectedUpperBound, expectedTail); + maths_t::E_TwoSided, weightStyle, sample, weights, + expectedLowerBound, expectedUpperBound, expectedTail); sample[0] = points[j]; weights[0][0] = vs; double lowerBound; double upperBound; maths_t::ETail tail; - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, weightStyle, sample, weights, lowerBound, upperBound, tail); + filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, + weightStyle, sample, weights, + lowerBound, upperBound, tail); LOG_DEBUG(<< "expectedLowerBound = " << expectedLowerBound); LOG_DEBUG(<< "lowerBound = " << lowerBound); @@ -1597,12 +1780,16 @@ void CMultimodalPriorTest::testSeasonalVarianceScale() { if ((expectedLowerBound + expectedUpperBound) < 0.02) { CPPUNIT_ASSERT_DOUBLES_EQUAL( - std::log(expectedLowerBound), std::log(lowerBound), 0.1 * std::fabs(std::log(expectedLowerBound))); + std::log(expectedLowerBound), std::log(lowerBound), + 0.1 * std::fabs(std::log(expectedLowerBound))); CPPUNIT_ASSERT_DOUBLES_EQUAL( - std::log(expectedUpperBound), std::log(upperBound), 0.1 * std::fabs(std::log(expectedUpperBound))); + std::log(expectedUpperBound), std::log(upperBound), + 0.1 * std::fabs(std::log(expectedUpperBound))); } else { - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedLowerBound, lowerBound, 0.05 * expectedLowerBound); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedUpperBound, upperBound, 0.05 * expectedUpperBound); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedLowerBound, lowerBound, + 0.05 * expectedLowerBound); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedUpperBound, upperBound, + 0.05 * expectedUpperBound); } CPPUNIT_ASSERT_EQUAL(expectedTail, tail); } @@ -1648,10 +1835,15 @@ void CMultimodalPriorTest::testPersist() { samples.insert(samples.end(), samples2.begin(), samples2.end()); rng.random_shuffle(samples.begin(), samples.end()); - maths::CXMeansOnline1d clusterer(maths_t::E_ContinuousData, maths::CAvailableModeDistributions::ALL, maths_t::E_ClustersFractionWeight); - maths::CGammaRateConjugate gamma = maths::CGammaRateConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.01); - maths::CLogNormalMeanPrecConjugate logNormal = maths::CLogNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.01); - maths::CNormalMeanPrecConjugate normal = maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); + maths::CXMeansOnline1d clusterer(maths_t::E_ContinuousData, + maths::CAvailableModeDistributions::ALL, + maths_t::E_ClustersFractionWeight); + maths::CGammaRateConjugate gamma = + maths::CGammaRateConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.01); + maths::CLogNormalMeanPrecConjugate logNormal = + maths::CLogNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.01); + maths::CNormalMeanPrecConjugate normal = + maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); COneOfNPrior::TPriorPtrVec priors; priors.push_back(COneOfNPrior::TPriorPtr(gamma.clone())); @@ -1661,8 +1853,9 @@ void CMultimodalPriorTest::testPersist() { maths::CMultimodalPrior origFilter(maths_t::E_ContinuousData, clusterer, modePrior); for (std::size_t i = 0u; i < samples.size(); ++i) { - origFilter.addSamples( - maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), TDouble1Vec(1, samples[i]), TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0))); + origFilter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), + TDouble1Vec(1, samples[i]), + TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0))); } double decayRate = origFilter.decayRate(); uint64_t checksum = origFilter.checksum(); @@ -1681,14 +1874,13 @@ void CMultimodalPriorTest::testPersist() { CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - maths::SDistributionRestoreParams params(maths_t::E_ContinuousData, - decayRate + 0.1, - maths::MINIMUM_CLUSTER_SPLIT_FRACTION, - maths::MINIMUM_CLUSTER_SPLIT_COUNT, - maths::MINIMUM_CATEGORY_COUNT); + maths::SDistributionRestoreParams params( + maths_t::E_ContinuousData, decayRate + 0.1, maths::MINIMUM_CLUSTER_SPLIT_FRACTION, + maths::MINIMUM_CLUSTER_SPLIT_COUNT, maths::MINIMUM_CATEGORY_COUNT); maths::CMultimodalPrior restoredFilter(params, traverser); - LOG_DEBUG(<< "orig checksum = " << checksum << " restored checksum = " << restoredFilter.checksum()); + LOG_DEBUG(<< "orig checksum = " << checksum + << " restored checksum = " << restoredFilter.checksum()); CPPUNIT_ASSERT_EQUAL(checksum, restoredFilter.checksum()); // The XML representation of the new filter should be the same as the original @@ -1704,31 +1896,38 @@ void CMultimodalPriorTest::testPersist() { CppUnit::Test* CMultimodalPriorTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CMultimodalPriorTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CMultimodalPriorTest::testMultipleUpdate", - &CMultimodalPriorTest::testMultipleUpdate)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CMultimodalPriorTest::testPropagation", &CMultimodalPriorTest::testPropagation)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CMultimodalPriorTest::testSingleMode", &CMultimodalPriorTest::testSingleMode)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CMultimodalPriorTest::testMultipleModes", &CMultimodalPriorTest::testMultipleModes)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMultimodalPriorTest::testMarginalLikelihood", - &CMultimodalPriorTest::testMarginalLikelihood)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMultimodalPriorTest::testMarginalLikelihoodMode", - &CMultimodalPriorTest::testMarginalLikelihoodMode)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMultimodalPriorTest::testMarginalLikelihoodConfidenceInterval", - &CMultimodalPriorTest::testMarginalLikelihoodConfidenceInterval)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMultimodalPriorTest::testSampleMarginalLikelihood", - &CMultimodalPriorTest::testSampleMarginalLikelihood)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMultimodalPriorTest::testCdf", &CMultimodalPriorTest::testCdf)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMultimodalPriorTest::testProbabilityOfLessLikelySamples", - &CMultimodalPriorTest::testProbabilityOfLessLikelySamples)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMultimodalPriorTest::testSeasonalVarianceScale", - &CMultimodalPriorTest::testSeasonalVarianceScale)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CMultimodalPriorTest::testLargeValues", &CMultimodalPriorTest::testLargeValues)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CMultimodalPriorTest::testPersist", &CMultimodalPriorTest::testPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultimodalPriorTest::testMultipleUpdate", &CMultimodalPriorTest::testMultipleUpdate)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultimodalPriorTest::testPropagation", &CMultimodalPriorTest::testPropagation)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultimodalPriorTest::testSingleMode", &CMultimodalPriorTest::testSingleMode)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultimodalPriorTest::testMultipleModes", &CMultimodalPriorTest::testMultipleModes)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultimodalPriorTest::testMarginalLikelihood", + &CMultimodalPriorTest::testMarginalLikelihood)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultimodalPriorTest::testMarginalLikelihoodMode", + &CMultimodalPriorTest::testMarginalLikelihoodMode)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultimodalPriorTest::testMarginalLikelihoodConfidenceInterval", + &CMultimodalPriorTest::testMarginalLikelihoodConfidenceInterval)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultimodalPriorTest::testSampleMarginalLikelihood", + &CMultimodalPriorTest::testSampleMarginalLikelihood)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultimodalPriorTest::testCdf", &CMultimodalPriorTest::testCdf)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultimodalPriorTest::testProbabilityOfLessLikelySamples", + &CMultimodalPriorTest::testProbabilityOfLessLikelySamples)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultimodalPriorTest::testSeasonalVarianceScale", + &CMultimodalPriorTest::testSeasonalVarianceScale)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultimodalPriorTest::testLargeValues", &CMultimodalPriorTest::testLargeValues)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultimodalPriorTest::testPersist", &CMultimodalPriorTest::testPersist)); return suiteOfTests; } diff --git a/lib/maths/unittest/CMultinomialConjugateTest.cc b/lib/maths/unittest/CMultinomialConjugateTest.cc index cdb84866f8..0f0668ffd0 100644 --- a/lib/maths/unittest/CMultinomialConjugateTest.cc +++ b/lib/maths/unittest/CMultinomialConjugateTest.cc @@ -51,7 +51,8 @@ void CMultinomialConjugateTest::testMultipleUpdate() { const double rawCategories[] = {-1.2, 5.1, 2.0, 18.0, 10.3}; const double rawProbabilities[] = {0.17, 0.13, 0.35, 0.3, 0.05}; const TDoubleVec categories(boost::begin(rawCategories), boost::end(rawCategories)); - const TDoubleVec probabilities(boost::begin(rawProbabilities), boost::end(rawProbabilities)); + const TDoubleVec probabilities(boost::begin(rawProbabilities), + boost::end(rawProbabilities)); test::CRandomNumbers rng; @@ -82,7 +83,8 @@ void CMultinomialConjugateTest::testPropagation() { const double rawCategories[] = {0.0, 1.1, 2.0}; const double rawProbabilities[] = {0.27, 0.13, 0.6}; const TDoubleVec categories(boost::begin(rawCategories), boost::end(rawCategories)); - const TDoubleVec probabilities(boost::begin(rawProbabilities), boost::end(rawProbabilities)); + const TDoubleVec probabilities(boost::begin(rawProbabilities), + boost::end(rawProbabilities)); test::CRandomNumbers rng; @@ -102,11 +104,14 @@ void CMultinomialConjugateTest::testPropagation() { TDoubleVec propagatedExpectedProbabilities = filter.probabilities(); LOG_DEBUG(<< "expectedProbabilities = " << core::CContainerPrinter::print(expectedProbabilities) - << ", propagatedExpectedProbabilities = " << core::CContainerPrinter::print(propagatedExpectedProbabilities)); + << ", propagatedExpectedProbabilities = " + << core::CContainerPrinter::print(propagatedExpectedProbabilities)); using TEqual = maths::CEqualWithTolerance; TEqual equal(maths::CToleranceTypes::E_AbsoluteTolerance, 1e-12); - CPPUNIT_ASSERT(std::equal(expectedProbabilities.begin(), expectedProbabilities.end(), propagatedExpectedProbabilities.begin(), equal)); + CPPUNIT_ASSERT(std::equal(expectedProbabilities.begin(), + expectedProbabilities.end(), + propagatedExpectedProbabilities.begin(), equal)); } void CMultinomialConjugateTest::testProbabilityEstimation() { @@ -122,24 +127,28 @@ void CMultinomialConjugateTest::testProbabilityEstimation() { const double rawCategories[] = {0.0, 1.1, 2.0, 5.0, 12.0, 15.0}; const double rawProbabilities[] = {0.1, 0.15, 0.12, 0.31, 0.03, 0.29}; const TDoubleVec categories(boost::begin(rawCategories), boost::end(rawCategories)); - const TDoubleVec probabilities(boost::begin(rawProbabilities), boost::end(rawProbabilities)); + const TDoubleVec probabilities(boost::begin(rawProbabilities), + boost::end(rawProbabilities)); const double decayRates[] = {0.0, 0.001, 0.01}; const unsigned int nTests = 5000u; - const double testIntervals[] = {50.0, 60.0, 70.0, 80.0, 85.0, 90.0, 95.0, 99.0}; + const double testIntervals[] = {50.0, 60.0, 70.0, 80.0, + 85.0, 90.0, 95.0, 99.0}; for (size_t i = 0; i < boost::size(decayRates); ++i) { test::CRandomNumbers rng; - TUIntVec errors[] = { - TUIntVec(6, 0), TUIntVec(6, 0), TUIntVec(6, 0), TUIntVec(6, 0), TUIntVec(6, 0), TUIntVec(6, 0), TUIntVec(6, 0), TUIntVec(6, 0)}; + TUIntVec errors[] = {TUIntVec(6, 0), TUIntVec(6, 0), TUIntVec(6, 0), + TUIntVec(6, 0), TUIntVec(6, 0), TUIntVec(6, 0), + TUIntVec(6, 0), TUIntVec(6, 0)}; for (unsigned int test = 0; test < nTests; ++test) { TDoubleVec samples; rng.generateMultinomialSamples(categories, probabilities, 500, samples); - CMultinomialConjugate filter(CMultinomialConjugate::nonInformativePrior(6, decayRates[i])); + CMultinomialConjugate filter( + CMultinomialConjugate::nonInformativePrior(6, decayRates[i])); for (std::size_t j = 0u; j < samples.size(); ++j) { filter.addSamples(TDouble1Vec(1, samples[j])); @@ -147,11 +156,13 @@ void CMultinomialConjugateTest::testProbabilityEstimation() { } for (size_t j = 0u; j < boost::size(testIntervals); ++j) { - TDoubleDoublePrVec confidenceIntervals = filter.confidenceIntervalProbabilities(testIntervals[j]); + TDoubleDoublePrVec confidenceIntervals = + filter.confidenceIntervalProbabilities(testIntervals[j]); CPPUNIT_ASSERT_EQUAL(confidenceIntervals.size(), probabilities.size()); for (std::size_t k = 0u; k < probabilities.size(); ++k) { - if (probabilities[k] < confidenceIntervals[k].first || probabilities[k] > confidenceIntervals[k].second) { + if (probabilities[k] < confidenceIntervals[k].first || + probabilities[k] > confidenceIntervals[k].second) { ++errors[j][k]; } } @@ -172,7 +183,9 @@ void CMultinomialConjugateTest::testProbabilityEstimation() { double meanError = 0.0; for (std::size_t k = 0u; k < intervals.size(); ++k) { if (decayRates[i] == 0.0) { - CPPUNIT_ASSERT_DOUBLES_EQUAL(intervals[k], 100.0 - testIntervals[j], std::min(5.0, 0.4 * (100.0 - testIntervals[j]))); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + intervals[k], 100.0 - testIntervals[j], + std::min(5.0, 0.4 * (100.0 - testIntervals[j]))); meanError += std::fabs(intervals[k] - (100.0 - testIntervals[j])); } else { CPPUNIT_ASSERT(intervals[k] <= (100.0 - testIntervals[j])); @@ -180,7 +193,8 @@ void CMultinomialConjugateTest::testProbabilityEstimation() { } meanError /= static_cast(intervals.size()); LOG_DEBUG(<< "meanError = " << meanError); - CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, meanError, std::min(2.0, 0.2 * (100.0 - testIntervals[j]))); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + 0.0, meanError, std::min(2.0, 0.2 * (100.0 - testIntervals[j]))); } } } @@ -200,7 +214,8 @@ void CMultinomialConjugateTest::testMarginalLikelihood() { const double rawCategories[] = {0.0, 1.0, 2.0}; const double rawProbabilities[] = {0.15, 0.5, 0.35}; const TDoubleVec categories(boost::begin(rawCategories), boost::end(rawCategories)); - const TDoubleVec probabilities(boost::begin(rawProbabilities), boost::end(rawProbabilities)); + const TDoubleVec probabilities(boost::begin(rawProbabilities), + boost::end(rawProbabilities)); TDoubleVec samples; rng.generateMultinomialSamples(categories, probabilities, 50, samples); @@ -210,7 +225,8 @@ void CMultinomialConjugateTest::testMarginalLikelihood() { for (size_t i = 0; i < boost::size(decayRates); ++i) { LOG_DEBUG(<< "**** Decay rate = " << decayRates[i] << " ****"); - CMultinomialConjugate filter(CMultinomialConjugate::nonInformativePrior(3, decayRates[i])); + CMultinomialConjugate filter( + CMultinomialConjugate::nonInformativePrior(3, decayRates[i])); for (std::size_t j = 0u; j < samples.size(); ++j) { TDouble1Vec sample(1, samples[j]); @@ -219,15 +235,19 @@ void CMultinomialConjugateTest::testMarginalLikelihood() { filter.propagateForwardsByTime(1.0); double logp; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter.jointLogMarginalLikelihood(sample, logp)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, + filter.jointLogMarginalLikelihood(sample, logp)); const TDoubleVec& filterCategories = filter.categories(); - std::size_t k = std::lower_bound(filterCategories.begin(), filterCategories.end(), samples[j]) - filterCategories.begin(); + std::size_t k = std::lower_bound(filterCategories.begin(), + filterCategories.end(), samples[j]) - + filterCategories.begin(); TDoubleVec filterProbabilities(filter.probabilities()); CPPUNIT_ASSERT(k < filterProbabilities.size()); double p = filterProbabilities[k]; - LOG_DEBUG(<< "sample = " << samples[j] << ", expected likelihood = " << p << ", likelihood = " << std::exp(logp)); + LOG_DEBUG(<< "sample = " << samples[j] << ", expected likelihood = " << p + << ", likelihood = " << std::exp(logp)); CPPUNIT_ASSERT_DOUBLES_EQUAL(p, std::exp(logp), 1e-12); } @@ -252,7 +272,8 @@ void CMultinomialConjugateTest::testMarginalLikelihood() { const double rawCategories[] = {0.0, 1.0, 2.0}; const double rawProbabilities[] = {0.1, 0.6, 0.3}; const TDoubleVec categories(boost::begin(rawCategories), boost::end(rawCategories)); - const TDoubleVec probabilities(boost::begin(rawProbabilities), boost::end(rawProbabilities)); + const TDoubleVec probabilities(boost::begin(rawProbabilities), + boost::end(rawProbabilities)); // Compute the outer products of size 2 and 3. TDoubleVecVec o2, o3; @@ -273,7 +294,8 @@ void CMultinomialConjugateTest::testMarginalLikelihood() { LOG_DEBUG(<< "o3 = " << core::CContainerPrinter::print(o3)); double rawConcentrations[] = {1000.0, 6000.0, 3000.0}; - TDoubleVec concentrations(boost::begin(rawConcentrations), boost::end(rawConcentrations)); + TDoubleVec concentrations(boost::begin(rawConcentrations), + boost::end(rawConcentrations)); CMultinomialConjugate filter(maths::CMultinomialConjugate(3, categories, concentrations)); @@ -285,12 +307,15 @@ void CMultinomialConjugateTest::testMarginalLikelihood() { TDoubleVec p2; for (std::size_t i = 0u; i < o2.size(); ++i) { double p; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter.jointLogMarginalLikelihood(o2[i], p)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, + filter.jointLogMarginalLikelihood(o2[i], p)); p = std::exp(p); p2.push_back(p); - LOG_DEBUG(<< "categories = " << core::CContainerPrinter::print(o2[i]) << ", p = " << p); + LOG_DEBUG(<< "categories = " << core::CContainerPrinter::print(o2[i]) + << ", p = " << p); } - CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, std::accumulate(p2.begin(), p2.end(), 0.0), 1e-10); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + 1.0, std::accumulate(p2.begin(), p2.end(), 0.0), 1e-10); TDoubleVec frequencies(o2.size(), 0.0); @@ -303,7 +328,8 @@ void CMultinomialConjugateTest::testMarginalLikelihood() { sample.push_back(samples[2 * test + 1]); std::sort(sample.begin(), sample.end()); - std::size_t i = std::lower_bound(o2.begin(), o2.end(), sample) - o2.begin(); + std::size_t i = std::lower_bound(o2.begin(), o2.end(), sample) - + o2.begin(); CPPUNIT_ASSERT(i < o2.size()); frequencies[i] += 1.0; } @@ -311,7 +337,8 @@ void CMultinomialConjugateTest::testMarginalLikelihood() { for (std::size_t i = 0u; i < o2.size(); ++i) { double p = frequencies[i] / static_cast(nTests); - LOG_DEBUG(<< "category = " << core::CContainerPrinter::print(o2[i]) << ", p = " << p << ", expected p = " << p2[i]); + LOG_DEBUG(<< "category = " << core::CContainerPrinter::print(o2[i]) + << ", p = " << p << ", expected p = " << p2[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(p, p2[i], 0.05 * std::max(p, p2[i])); } } @@ -321,12 +348,15 @@ void CMultinomialConjugateTest::testMarginalLikelihood() { TDoubleVec p3; for (std::size_t i = 0u; i < o3.size(); ++i) { double p; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter.jointLogMarginalLikelihood(o3[i], p)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, + filter.jointLogMarginalLikelihood(o3[i], p)); p = std::exp(p); p3.push_back(p); - LOG_DEBUG(<< "categories = " << core::CContainerPrinter::print(o3[i]) << ", p = " << p); + LOG_DEBUG(<< "categories = " << core::CContainerPrinter::print(o3[i]) + << ", p = " << p); } - CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, std::accumulate(p3.begin(), p3.end(), 0.0), 1e-10); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + 1.0, std::accumulate(p3.begin(), p3.end(), 0.0), 1e-10); TDoubleVec frequencies(o3.size(), 0.0); @@ -340,7 +370,8 @@ void CMultinomialConjugateTest::testMarginalLikelihood() { sample.push_back(samples[3 * test + 2]); std::sort(sample.begin(), sample.end()); - std::size_t i = std::lower_bound(o3.begin(), o3.end(), sample) - o3.begin(); + std::size_t i = std::lower_bound(o3.begin(), o3.end(), sample) - + o3.begin(); CPPUNIT_ASSERT(i < o3.size()); frequencies[i] += 1.0; } @@ -348,7 +379,8 @@ void CMultinomialConjugateTest::testMarginalLikelihood() { for (std::size_t i = 0u; i < o3.size(); ++i) { double p = frequencies[i] / static_cast(nTests); - LOG_DEBUG(<< "category = " << core::CContainerPrinter::print(o3[i]) << ", p = " << p << ", expected p = " << p3[i]); + LOG_DEBUG(<< "category = " << core::CContainerPrinter::print(o3[i]) + << ", p = " << p << ", expected p = " << p3[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(p, p3[i], 0.05 * std::max(p, p3[i])); } } @@ -390,7 +422,8 @@ void CMultinomialConjugateTest::testSampleMarginalLikelihood() { LOG_DEBUG(<< "samples = " << core::CContainerPrinter::print(samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[1.1, 1.1, 1.1, 1.2, 2.1, 2.1, 2.2, 2.2, 2.2, 2.2]"), core::CContainerPrinter::print(samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[1.1, 1.1, 1.1, 1.2, 2.1, 2.1, 2.2, 2.2, 2.2, 2.2]"), + core::CContainerPrinter::print(samples)); } { @@ -412,7 +445,8 @@ void CMultinomialConjugateTest::testSampleMarginalLikelihood() { LOG_DEBUG(<< "samples = " << core::CContainerPrinter::print(samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[1.1, 1.2, 1.2, 2.1, 2.1, 2.2, 2.2, 2.2, 2.2, 5.1]"), core::CContainerPrinter::print(samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[1.1, 1.2, 1.2, 2.1, 2.1, 2.2, 2.2, 2.2, 2.2, 5.1]"), + core::CContainerPrinter::print(samples)); } { @@ -434,7 +468,8 @@ void CMultinomialConjugateTest::testSampleMarginalLikelihood() { LOG_DEBUG(<< "samples = " << core::CContainerPrinter::print(samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[1.1, 1.2, 1.2, 2.1, 2.1, 2.2, 2.2, 2.2, 2.2, 3.2]"), core::CContainerPrinter::print(samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[1.1, 1.2, 1.2, 2.1, 2.1, 2.2, 2.2, 2.2, 2.2, 3.2]"), + core::CContainerPrinter::print(samples)); } } @@ -491,13 +526,17 @@ void CMultinomialConjugateTest::testProbabilityOfLessLikelySamples() { // P(2.2) = 1.00 // P(3.2) = 0.04 // P(5.1) = 0.10 - double expectedProbabilities[] = {0.20, 0.32, 0.61, 1.0, 0.04, 0.10}; + double expectedProbabilities[] = {0.20, 0.32, 0.61, + 1.0, 0.04, 0.10}; for (size_t i = 0; i < boost::size(categories); ++i) { double lowerBound, upperBound; - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, TDouble1Vec(1, categories[i]), lowerBound, upperBound); + filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, + TDouble1Vec(1, categories[i]), + lowerBound, upperBound); - LOG_DEBUG(<< "category = " << categories[i] << ", lower bound = " << lowerBound << ", upper bound = " << upperBound + LOG_DEBUG(<< "category = " << categories[i] << ", lower bound = " << lowerBound + << ", upper bound = " << upperBound << ", expected probability = " << expectedProbabilities[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(lowerBound, expectedProbabilities[i], 1e-10); @@ -537,9 +576,12 @@ void CMultinomialConjugateTest::testProbabilityOfLessLikelySamples() { for (size_t i = 0; i < boost::size(categories); ++i) { double lowerBound, upperBound; - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, TDouble1Vec(1, categories[i]), lowerBound, upperBound); + filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, + TDouble1Vec(1, categories[i]), + lowerBound, upperBound); - LOG_DEBUG(<< "category = " << categories[i] << ", lower bound = " << lowerBound << ", upper bound = " << upperBound + LOG_DEBUG(<< "category = " << categories[i] << ", lower bound = " << lowerBound + << ", upper bound = " << upperBound << ", expected probability = " << expectedProbabilities[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(lowerBound, expectedProbabilities[i], 1e-10); @@ -578,9 +620,12 @@ void CMultinomialConjugateTest::testProbabilityOfLessLikelySamples() { for (size_t i = 0; i < boost::size(categories); ++i) { double lowerBound, upperBound; - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, TDouble1Vec(1, categories[i]), lowerBound, upperBound); + filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, + TDouble1Vec(1, categories[i]), + lowerBound, upperBound); - LOG_DEBUG(<< "category = " << categories[i] << ", lower bound = " << lowerBound << ", upper bound = " << upperBound + LOG_DEBUG(<< "category = " << categories[i] << ", lower bound = " << lowerBound + << ", upper bound = " << upperBound << ", expected probability = " << expectedProbabilities[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(lowerBound, expectedProbabilities[i], 1e-10); @@ -601,9 +646,12 @@ void CMultinomialConjugateTest::testProbabilityOfLessLikelySamples() { for (size_t i = 0; i < boost::size(expectedProbabilities); ++i) { double lowerBound, upperBound; - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, TDouble1Vec(1, categories[i]), lowerBound, upperBound); + filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, + TDouble1Vec(1, categories[i]), + lowerBound, upperBound); - LOG_DEBUG(<< "category = " << categories[i] << ", lower bound = " << lowerBound << ", upper bound = " << upperBound + LOG_DEBUG(<< "category = " << categories[i] << ", lower bound = " << lowerBound + << ", upper bound = " << upperBound << ", expected probability = " << expectedProbabilities[i]); CPPUNIT_ASSERT(lowerBound > expectedProbabilities[i]); @@ -616,32 +664,40 @@ void CMultinomialConjugateTest::testProbabilityOfLessLikelySamples() { using TDoubleVecDoubleMap = std::map; using TDoubleVecDoubleMapCItr = TDoubleVecDoubleMap::const_iterator; - double categoryProbabilities[] = {0.10, 0.12, 0.29, 0.39, 0.04, 0.06}; + double categoryProbabilities[] = {0.10, 0.12, 0.29, + 0.39, 0.04, 0.06}; TDoubleDoubleVecMap categoryPairProbabilities; for (size_t i = 0u; i < boost::size(categories); ++i) { for (size_t j = i; j < boost::size(categories); ++j) { - double p = (i != j ? 2.0 : 1.0) * categoryProbabilities[i] * categoryProbabilities[j]; + double p = (i != j ? 2.0 : 1.0) * categoryProbabilities[i] * + categoryProbabilities[j]; TDoubleVec& categoryPair = - categoryPairProbabilities.insert(TDoubleDoubleVecMap::value_type(p, TDoubleVec())).first->second; + categoryPairProbabilities + .insert(TDoubleDoubleVecMap::value_type(p, TDoubleVec())) + .first->second; categoryPair.push_back(categories[i]); categoryPair.push_back(categories[j]); } } - LOG_DEBUG(<< "category pair probabilities = " << core::CContainerPrinter::print(categoryPairProbabilities)); + LOG_DEBUG(<< "category pair probabilities = " + << core::CContainerPrinter::print(categoryPairProbabilities)); double pc = 0.0; TDoubleVecDoubleMap trueProbabilities; - for (TDoubleDoubleVecMapCItr itr = categoryPairProbabilities.begin(); itr != categoryPairProbabilities.end(); ++itr) { + for (TDoubleDoubleVecMapCItr itr = categoryPairProbabilities.begin(); + itr != categoryPairProbabilities.end(); ++itr) { pc += itr->first * static_cast(itr->second.size() / 2u); for (std::size_t i = 0u; i < itr->second.size(); i += 2u) { TDoubleVec categoryPair; categoryPair.push_back(itr->second[i]); categoryPair.push_back(itr->second[i + 1u]); - trueProbabilities.insert(TDoubleVecDoubleMap::value_type(categoryPair, pc)); + trueProbabilities.insert( + TDoubleVecDoubleMap::value_type(categoryPair, pc)); } } - LOG_DEBUG(<< "true probabilities = " << core::CContainerPrinter::print(trueProbabilities)); + LOG_DEBUG(<< "true probabilities = " + << core::CContainerPrinter::print(trueProbabilities)); CMultinomialConjugate filter(CMultinomialConjugate::nonInformativePrior(6u)); @@ -667,27 +723,35 @@ void CMultinomialConjugateTest::testProbabilityOfLessLikelySamples() { double expectedProbabilities[] = {0.2, 0.32, 0.61, 1.0, 0.04, 0.1}; - for (TDoubleVecDoubleMapCItr itr = trueProbabilities.begin(); itr != trueProbabilities.end(); ++itr) { + for (TDoubleVecDoubleMapCItr itr = trueProbabilities.begin(); + itr != trueProbabilities.end(); ++itr) { TDoubleVec categoryPair; categoryPair.push_back(itr->first[0]); categoryPair.push_back(itr->first[1]); double lowerBound, upperBound; - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, categoryPair, lowerBound, upperBound); + filter.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, categoryPair, lowerBound, upperBound); CPPUNIT_ASSERT_EQUAL(lowerBound, upperBound); double probability = lowerBound; maths::CJointProbabilityOfLessLikelySamples expectedProbabilityCalculator; { - ptrdiff_t i = std::lower_bound(categories.begin(), categories.end(), itr->first[0]) - categories.begin(); - ptrdiff_t j = std::lower_bound(categories.begin(), categories.end(), itr->first[1]) - categories.begin(); + ptrdiff_t i = std::lower_bound(categories.begin(), + categories.end(), itr->first[0]) - + categories.begin(); + ptrdiff_t j = std::lower_bound(categories.begin(), + categories.end(), itr->first[1]) - + categories.begin(); expectedProbabilityCalculator.add(expectedProbabilities[i]); expectedProbabilityCalculator.add(expectedProbabilities[j]); } double expectedProbability; CPPUNIT_ASSERT(expectedProbabilityCalculator.calculate(expectedProbability)); - LOG_DEBUG(<< "category pair = " << core::CContainerPrinter::print(itr->first) << ", probability = " << probability - << ", expected probability = " << expectedProbability << ", true probability = " << itr->second); + LOG_DEBUG(<< "category pair = " << core::CContainerPrinter::print(itr->first) + << ", probability = " << probability + << ", expected probability = " << expectedProbability + << ", true probability = " << itr->second); CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedProbability, probability, 1e-10); } @@ -695,11 +759,14 @@ void CMultinomialConjugateTest::testProbabilityOfLessLikelySamples() { } { // Test the function to compute all category probabilities. - const double rawCategories[] = {1.1, 1.2, 2.1, 2.2, 3.2, 5.1, 5.5, 6.0, 6.2, 6.6, 7.8, 8.0, 9.0, 9.9, 10.0, - 10.1, 10.2, 12.0, 12.1, 12.8, 13.1, 13.7, 15.2, 17.1, 17.5, 17.9, 18.2, 19.6, 20.0, 20.2}; - const double rawProbabilities[] = {0.02, 0.05, 0.01, 0.2, 0.001, 0.03, 0.02, 0.005, 0.1, 0.03, - 0.04, 0.01, 0.001, 0.006, 0.02, 0.05, 0.001, 0.001, 0.01, 0.01, - 0.2, 0.01, 0.02, 0.07, 0.01, 0.002, 0.01, 0.02, 0.03, 0.013}; + const double rawCategories[] = { + 1.1, 1.2, 2.1, 2.2, 3.2, 5.1, 5.5, 6.0, 6.2, 6.6, + 7.8, 8.0, 9.0, 9.9, 10.0, 10.1, 10.2, 12.0, 12.1, 12.8, + 13.1, 13.7, 15.2, 17.1, 17.5, 17.9, 18.2, 19.6, 20.0, 20.2}; + const double rawProbabilities[] = { + 0.02, 0.05, 0.01, 0.2, 0.001, 0.03, 0.02, 0.005, 0.1, 0.03, + 0.04, 0.01, 0.001, 0.006, 0.02, 0.05, 0.001, 0.001, 0.01, 0.01, + 0.2, 0.01, 0.02, 0.07, 0.01, 0.002, 0.01, 0.02, 0.03, 0.013}; CPPUNIT_ASSERT_EQUAL(boost::size(rawCategories), boost::size(rawProbabilities)); @@ -736,26 +803,32 @@ void CMultinomialConjugateTest::testProbabilityOfLessLikelySamples() { } probabilities.push_back(TDoubleSizePr(1.0, probabilities.size())); for (std::size_t j = 0u; j < probabilities.size() - 1; ++j) { - expectedProbabilities[probabilities[j].second] += probabilities[j + 1].first; + expectedProbabilities[probabilities[j].second] += + probabilities[j + 1].first; } } for (std::size_t i = 0u; i < expectedProbabilities.size(); ++i) { expectedProbabilities[i] /= static_cast(numberSamples); } - LOG_DEBUG(<< "expectedProbabilities = " << core::CContainerPrinter::print(expectedProbabilities)); + LOG_DEBUG(<< "expectedProbabilities = " + << core::CContainerPrinter::print(expectedProbabilities)); TDoubleVec categories(boost::begin(rawCategories), boost::end(rawCategories)); - CMultinomialConjugate filter(CMultinomialConjugate::nonInformativePrior(categories.size())); + CMultinomialConjugate filter( + CMultinomialConjugate::nonInformativePrior(categories.size())); for (std::size_t i = 0u; i < categories.size(); ++i) { - filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[i]), - TDouble4Vec1Vec(1, TDouble4Vec(1, rawProbabilities[i] * 100.0))); + filter.addSamples( + maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), + TDouble1Vec(1, categories[i]), + TDouble4Vec1Vec(1, TDouble4Vec(1, rawProbabilities[i] * 100.0))); } TDoubleVec lowerBounds, upperBounds; - filter.probabilitiesOfLessLikelyCategories(maths_t::E_TwoSided, lowerBounds, upperBounds); + filter.probabilitiesOfLessLikelyCategories(maths_t::E_TwoSided, + lowerBounds, upperBounds); LOG_DEBUG(<< "probabilities = " << core::CContainerPrinter::print(lowerBounds)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(lowerBounds), core::CContainerPrinter::print(upperBounds)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(lowerBounds), + core::CContainerPrinter::print(upperBounds)); double totalError = 0.0; for (std::size_t i = 0u; i < lowerBounds.size(); ++i) { @@ -767,8 +840,8 @@ void CMultinomialConjugateTest::testProbabilityOfLessLikelySamples() { for (std::size_t i = 0u; i < categories.size(); ++i) { double lowerBound, upperBound; - CPPUNIT_ASSERT( - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, TDouble1Vec(1, categories[i]), lowerBound, upperBound)); + CPPUNIT_ASSERT(filter.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, TDouble1Vec(1, categories[i]), lowerBound, upperBound)); CPPUNIT_ASSERT_EQUAL(lowerBound, upperBound); CPPUNIT_ASSERT_DOUBLES_EQUAL(lowerBounds[i], lowerBound, 1e-10); } @@ -792,10 +865,12 @@ void CMultinomialConjugateTest::testRemoveCategories() { double rawConcentrations[] = {1.0, 2.0, 1.5, 12.0, 10.0, 2.0}; TDoubleVec categories(boost::begin(rawCategories), boost::end(rawCategories)); - TDoubleVec concentrationParameters(boost::begin(rawConcentrations), boost::end(rawConcentrations)); + TDoubleVec concentrationParameters(boost::begin(rawConcentrations), + boost::end(rawConcentrations)); { - CMultinomialConjugate prior(maths::CMultinomialConjugate(100, categories, concentrationParameters)); + CMultinomialConjugate prior( + maths::CMultinomialConjugate(100, categories, concentrationParameters)); TDoubleVec categoriesToRemove; categoriesToRemove.push_back(3.0); @@ -811,15 +886,19 @@ void CMultinomialConjugateTest::testRemoveCategories() { expectedConcentrations.push_back(1.0); expectedConcentrations.push_back(1.5); expectedConcentrations.push_back(2.0); - CMultinomialConjugate expectedPrior(maths::CMultinomialConjugate(100, expectedCategories, expectedConcentrations)); + CMultinomialConjugate expectedPrior(maths::CMultinomialConjugate( + 100, expectedCategories, expectedConcentrations)); - LOG_DEBUG(<< "expectedCategories = " << core::CContainerPrinter::print(expectedCategories)); - LOG_DEBUG(<< "expectedConcentrations = " << core::CContainerPrinter::print(expectedConcentrations)); + LOG_DEBUG(<< "expectedCategories = " + << core::CContainerPrinter::print(expectedCategories)); + LOG_DEBUG(<< "expectedConcentrations = " + << core::CContainerPrinter::print(expectedConcentrations)); CPPUNIT_ASSERT_EQUAL(expectedPrior.checksum(), prior.checksum()); } { - CMultinomialConjugate prior(maths::CMultinomialConjugate(90, categories, concentrationParameters)); + CMultinomialConjugate prior( + maths::CMultinomialConjugate(90, categories, concentrationParameters)); TDoubleVec categoriesToRemove; categoriesToRemove.push_back(1.0); @@ -835,15 +914,19 @@ void CMultinomialConjugateTest::testRemoveCategories() { expectedConcentrations.push_back(2.0); expectedConcentrations.push_back(12.0); expectedConcentrations.push_back(10.0); - CMultinomialConjugate expectedPrior(maths::CMultinomialConjugate(90, expectedCategories, expectedConcentrations)); + CMultinomialConjugate expectedPrior(maths::CMultinomialConjugate( + 90, expectedCategories, expectedConcentrations)); - LOG_DEBUG(<< "expectedCategories = " << core::CContainerPrinter::print(expectedCategories)); - LOG_DEBUG(<< "expectedConcentrations = " << core::CContainerPrinter::print(expectedConcentrations)); + LOG_DEBUG(<< "expectedCategories = " + << core::CContainerPrinter::print(expectedCategories)); + LOG_DEBUG(<< "expectedConcentrations = " + << core::CContainerPrinter::print(expectedConcentrations)); CPPUNIT_ASSERT_EQUAL(expectedPrior.checksum(), prior.checksum()); } { - CMultinomialConjugate prior(maths::CMultinomialConjugate(10, categories, concentrationParameters)); + CMultinomialConjugate prior( + maths::CMultinomialConjugate(10, categories, concentrationParameters)); prior.removeCategories(categories); @@ -861,7 +944,8 @@ void CMultinomialConjugateTest::testPersist() { const double rawCategories[] = {-1.0, 5.0, 2.1, 78.0, 15.3}; const double rawProbabilities[] = {0.1, 0.2, 0.35, 0.3, 0.05}; const TDoubleVec categories(boost::begin(rawCategories), boost::end(rawCategories)); - const TDoubleVec probabilities(boost::begin(rawProbabilities), boost::end(rawProbabilities)); + const TDoubleVec probabilities(boost::begin(rawProbabilities), + boost::end(rawProbabilities)); test::CRandomNumbers rng; @@ -869,8 +953,9 @@ void CMultinomialConjugateTest::testPersist() { rng.generateMultinomialSamples(categories, probabilities, 100, samples); maths::CMultinomialConjugate origFilter(CMultinomialConjugate::nonInformativePrior(5)); for (std::size_t i = 0u; i < samples.size(); ++i) { - origFilter.addSamples( - maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), TDouble1Vec(1, samples[i]), TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0))); + origFilter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), + TDouble1Vec(1, samples[i]), + TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0))); } double decayRate = origFilter.decayRate(); uint64_t checksum = origFilter.checksum(); @@ -889,14 +974,13 @@ void CMultinomialConjugateTest::testPersist() { CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - maths::SDistributionRestoreParams params(maths_t::E_ContinuousData, - decayRate + 0.1, - maths::MINIMUM_CLUSTER_SPLIT_FRACTION, - maths::MINIMUM_CLUSTER_SPLIT_COUNT, - maths::MINIMUM_CATEGORY_COUNT); + maths::SDistributionRestoreParams params( + maths_t::E_ContinuousData, decayRate + 0.1, maths::MINIMUM_CLUSTER_SPLIT_FRACTION, + maths::MINIMUM_CLUSTER_SPLIT_COUNT, maths::MINIMUM_CATEGORY_COUNT); maths::CMultinomialConjugate restoredFilter(params, traverser); - LOG_DEBUG(<< "orig checksum = " << checksum << " restored checksum = " << restoredFilter.checksum()); + LOG_DEBUG(<< "orig checksum = " << checksum + << " restored checksum = " << restoredFilter.checksum()); CPPUNIT_ASSERT_EQUAL(checksum, restoredFilter.checksum()); @@ -930,28 +1014,36 @@ void CMultinomialConjugateTest::testConcentration() { CppUnit::Test* CMultinomialConjugateTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CMultinomialConjugateTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CMultinomialConjugateTest::testMultipleUpdate", - &CMultinomialConjugateTest::testMultipleUpdate)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMultinomialConjugateTest::testPropagation", - &CMultinomialConjugateTest::testPropagation)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMultinomialConjugateTest::testProbabilityEstimation", - &CMultinomialConjugateTest::testProbabilityEstimation)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMultinomialConjugateTest::testMarginalLikelihood", - &CMultinomialConjugateTest::testMarginalLikelihood)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMultinomialConjugateTest::testSampleMarginalLikelihood", - &CMultinomialConjugateTest::testSampleMarginalLikelihood)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CMultinomialConjugateTest::testProbabilityOfLessLikelySamples", &CMultinomialConjugateTest::testProbabilityOfLessLikelySamples)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMultinomialConjugateTest::testAnomalyScore", - &CMultinomialConjugateTest::testAnomalyScore)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMultinomialConjugateTest::testRemoveCategories", - &CMultinomialConjugateTest::testRemoveCategories)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMultinomialConjugateTest::testPersist", - &CMultinomialConjugateTest::testPersist)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMultinomialConjugateTest::testOverflow", - &CMultinomialConjugateTest::testOverflow)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMultinomialConjugateTest::testConcentration", - &CMultinomialConjugateTest::testConcentration)); + "CMultinomialConjugateTest::testMultipleUpdate", + &CMultinomialConjugateTest::testMultipleUpdate)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultinomialConjugateTest::testPropagation", &CMultinomialConjugateTest::testPropagation)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultinomialConjugateTest::testProbabilityEstimation", + &CMultinomialConjugateTest::testProbabilityEstimation)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultinomialConjugateTest::testMarginalLikelihood", + &CMultinomialConjugateTest::testMarginalLikelihood)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultinomialConjugateTest::testSampleMarginalLikelihood", + &CMultinomialConjugateTest::testSampleMarginalLikelihood)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultinomialConjugateTest::testProbabilityOfLessLikelySamples", + &CMultinomialConjugateTest::testProbabilityOfLessLikelySamples)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultinomialConjugateTest::testAnomalyScore", + &CMultinomialConjugateTest::testAnomalyScore)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultinomialConjugateTest::testRemoveCategories", + &CMultinomialConjugateTest::testRemoveCategories)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultinomialConjugateTest::testPersist", &CMultinomialConjugateTest::testPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultinomialConjugateTest::testOverflow", &CMultinomialConjugateTest::testOverflow)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultinomialConjugateTest::testConcentration", + &CMultinomialConjugateTest::testConcentration)); return suiteOfTests; } diff --git a/lib/maths/unittest/CMultivariateConstantPriorTest.cc b/lib/maths/unittest/CMultivariateConstantPriorTest.cc index 57f8dc8243..e125c3b277 100644 --- a/lib/maths/unittest/CMultivariateConstantPriorTest.cc +++ b/lib/maths/unittest/CMultivariateConstantPriorTest.cc @@ -48,18 +48,26 @@ void CMultivariateConstantPriorTest::testAddSamples() { double wrongDimension[] = {1.3, 2.1, 7.9}; - filter.addSamples( - COUNT_WEIGHT, TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(wrongDimension), boost::end(wrongDimension))), singleUnitWeight(3)); + filter.addSamples(COUNT_WEIGHT, + TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(wrongDimension), + boost::end(wrongDimension))), + singleUnitWeight(3)); CPPUNIT_ASSERT(filter.isNonInformative()); double nans[] = {1.3, std::numeric_limits::quiet_NaN()}; - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(nans), boost::end(nans))), singleUnitWeight(3)); + filter.addSamples( + COUNT_WEIGHT, + TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(nans), boost::end(nans))), + singleUnitWeight(3)); CPPUNIT_ASSERT(filter.isNonInformative()); double constant[] = {1.4, 1.0}; - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(constant), boost::end(constant))), singleUnitWeight(2)); + filter.addSamples(COUNT_WEIGHT, + TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(constant), + boost::end(constant))), + singleUnitWeight(2)); CPPUNIT_ASSERT(!filter.isNonInformative()); } @@ -79,35 +87,42 @@ void CMultivariateConstantPriorTest::testMarginalLikelihood() { double likelihood; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpFailed, - filter.jointLogMarginalLikelihood(COUNT_WEIGHT, TDouble10Vec1Vec(), singleUnitWeight(2), likelihood)); - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpFailed, - filter.jointLogMarginalLikelihood(COUNT_WEIGHT, - TDouble10Vec1Vec(2, TDouble10Vec(boost::begin(constant), boost::end(constant))), - singleUnitWeight(2), - likelihood)); - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpOverflowed, - filter.jointLogMarginalLikelihood(COUNT_WEIGHT, - TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(constant), boost::end(constant))), - singleUnitWeight(2), - likelihood)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpFailed, filter.jointLogMarginalLikelihood( + COUNT_WEIGHT, TDouble10Vec1Vec(), + singleUnitWeight(2), likelihood)); + CPPUNIT_ASSERT_EQUAL( + maths_t::E_FpFailed, + filter.jointLogMarginalLikelihood( + COUNT_WEIGHT, + TDouble10Vec1Vec(2, TDouble10Vec(boost::begin(constant), boost::end(constant))), + singleUnitWeight(2), likelihood)); + CPPUNIT_ASSERT_EQUAL( + maths_t::E_FpOverflowed, + filter.jointLogMarginalLikelihood( + COUNT_WEIGHT, + TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(constant), boost::end(constant))), + singleUnitWeight(2), likelihood)); CPPUNIT_ASSERT_EQUAL(boost::numeric::bounds::lowest(), likelihood); - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(2, TDouble10Vec(boost::begin(constant), boost::end(constant))), singleUnitWeight(2)); + filter.addSamples(COUNT_WEIGHT, + TDouble10Vec1Vec(2, TDouble10Vec(boost::begin(constant), + boost::end(constant))), + singleUnitWeight(2)); - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, - filter.jointLogMarginalLikelihood(COUNT_WEIGHT, - TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(constant), boost::end(constant))), - singleUnitWeight(2), - likelihood)); + CPPUNIT_ASSERT_EQUAL( + maths_t::E_FpNoErrors, + filter.jointLogMarginalLikelihood( + COUNT_WEIGHT, + TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(constant), boost::end(constant))), + singleUnitWeight(2), likelihood)); CPPUNIT_ASSERT_EQUAL(std::log(boost::numeric::bounds::highest()), likelihood); CPPUNIT_ASSERT_EQUAL( maths_t::E_FpOverflowed, - filter.jointLogMarginalLikelihood(COUNT_WEIGHT, - TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(different), boost::end(different))), - singleUnitWeight(2), - likelihood)); + filter.jointLogMarginalLikelihood( + COUNT_WEIGHT, + TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(different), boost::end(different))), + singleUnitWeight(2), likelihood)); CPPUNIT_ASSERT_EQUAL(boost::numeric::bounds::lowest(), likelihood); } @@ -121,12 +136,17 @@ void CMultivariateConstantPriorTest::testMarginalLikelihoodMean() { maths::CMultivariateConstantPrior filter(3); - CPPUNIT_ASSERT_EQUAL(std::string("[0, 0, 0]"), core::CContainerPrinter::print(filter.marginalLikelihoodMean())); + CPPUNIT_ASSERT_EQUAL(std::string("[0, 0, 0]"), + core::CContainerPrinter::print(filter.marginalLikelihoodMean())); double constant[] = {1.2, 6.0, 14.1}; - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(constant), boost::end(constant))), singleUnitWeight(3)); + filter.addSamples(COUNT_WEIGHT, + TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(constant), + boost::end(constant))), + singleUnitWeight(3)); - CPPUNIT_ASSERT_EQUAL(std::string("[1.2, 6, 14.1]"), core::CContainerPrinter::print(filter.marginalLikelihoodMean())); + CPPUNIT_ASSERT_EQUAL(std::string("[1.2, 6, 14.1]"), + core::CContainerPrinter::print(filter.marginalLikelihoodMean())); } void CMultivariateConstantPriorTest::testMarginalLikelihoodMode() { @@ -140,13 +160,18 @@ void CMultivariateConstantPriorTest::testMarginalLikelihoodMode() { maths::CMultivariateConstantPrior filter(4); CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(filter.marginalLikelihoodMean()), - core::CContainerPrinter::print(filter.marginalLikelihoodMode(COUNT_WEIGHT, unitWeight(4)))); + core::CContainerPrinter::print(filter.marginalLikelihoodMode( + COUNT_WEIGHT, unitWeight(4)))); double constant[] = {1.1, 6.5, 12.3, 14.1}; - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(constant), boost::end(constant))), singleUnitWeight(4)); + filter.addSamples(COUNT_WEIGHT, + TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(constant), + boost::end(constant))), + singleUnitWeight(4)); CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(filter.marginalLikelihoodMean()), - core::CContainerPrinter::print(filter.marginalLikelihoodMode(COUNT_WEIGHT, unitWeight(4)))); + core::CContainerPrinter::print(filter.marginalLikelihoodMode( + COUNT_WEIGHT, unitWeight(4)))); } void CMultivariateConstantPriorTest::testMarginalLikelihoodCovariance() { @@ -173,7 +198,10 @@ void CMultivariateConstantPriorTest::testMarginalLikelihoodCovariance() { } double constant[] = {1.1, 6.5, 12.3, 14.1}; - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(constant), boost::end(constant))), singleUnitWeight(4)); + filter.addSamples(COUNT_WEIGHT, + TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(constant), + boost::end(constant))), + singleUnitWeight(4)); covariance = filter.marginalLikelihoodCovariance(); CPPUNIT_ASSERT_EQUAL(std::size_t(4), covariance.size()); @@ -201,12 +229,16 @@ void CMultivariateConstantPriorTest::testSampleMarginalLikelihood() { double constant[] = {1.2, 4.1}; - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(2, TDouble10Vec(boost::begin(constant), boost::end(constant))), singleUnitWeight(2)); + filter.addSamples(COUNT_WEIGHT, + TDouble10Vec1Vec(2, TDouble10Vec(boost::begin(constant), + boost::end(constant))), + singleUnitWeight(2)); filter.sampleMarginalLikelihood(4, samples); CPPUNIT_ASSERT_EQUAL(std::size_t(4), samples.size()); for (std::size_t i = 0u; i < 4; ++i) { - CPPUNIT_ASSERT_EQUAL(std::string("[1.2, 4.1]"), core::CContainerPrinter::print(samples[i])); + CPPUNIT_ASSERT_EQUAL(std::string("[1.2, 4.1]"), + core::CContainerPrinter::print(samples[i])); } } @@ -221,13 +253,17 @@ void CMultivariateConstantPriorTest::testProbabilityOfLessLikelySamples() { maths::CMultivariateConstantPrior filter(2); double samples_[][2] = {{1.3, 1.4}, {1.1, 1.6}, {1.0, 5.4}}; - TDouble10Vec1Vec samples[] = {TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(samples_[0]), boost::end(samples_[0]))), - TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(samples_[1]), boost::end(samples_[1]))), - TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(samples_[2]), boost::end(samples_[2])))}; + TDouble10Vec1Vec samples[] = { + TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(samples_[0]), boost::end(samples_[0]))), + TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(samples_[1]), boost::end(samples_[1]))), + TDouble10Vec1Vec( + 1, TDouble10Vec(boost::begin(samples_[2]), boost::end(samples_[2])))}; for (std::size_t i = 0u; i < boost::size(samples); ++i) { double lb, ub; maths::CMultivariateConstantPrior::TTail10Vec tail; - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, COUNT_WEIGHT, samples[i], singleUnitWeight(2), lb, ub, tail); + filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, COUNT_WEIGHT, + samples[i], singleUnitWeight(2), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(1.0, lb); CPPUNIT_ASSERT_EQUAL(1.0, ub); LOG_DEBUG(<< "tail = " << core::CContainerPrinter::print(tail)); @@ -241,7 +277,9 @@ void CMultivariateConstantPriorTest::testProbabilityOfLessLikelySamples() { for (std::size_t i = 0u; i < boost::size(samples); ++i) { double lb, ub; maths::CMultivariateConstantPrior::TTail10Vec tail; - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, COUNT_WEIGHT, samples[i], singleUnitWeight(2), lb, ub, tail); + filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, COUNT_WEIGHT, + samples[i], singleUnitWeight(2), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(i == 0 ? 1.0 : 0.0, lb); CPPUNIT_ASSERT_EQUAL(i == 0 ? 1.0 : 0.0, ub); LOG_DEBUG(<< "tail = " << core::CContainerPrinter::print(tail)); @@ -277,7 +315,8 @@ void CMultivariateConstantPriorTest::testPersist() { maths::CMultivariateConstantPrior restoredFilter(3, traverser); - LOG_DEBUG(<< "orig checksum = " << checksum << " restored checksum = " << restoredFilter.checksum()); + LOG_DEBUG(<< "orig checksum = " << checksum + << " restored checksum = " << restoredFilter.checksum()); CPPUNIT_ASSERT_EQUAL(checksum, restoredFilter.checksum()); // The XML representation of the new filter should be the same as the original @@ -294,7 +333,8 @@ void CMultivariateConstantPriorTest::testPersist() { { double constant[] = {1.2, 4.1, 1.0 / 3.0}; - maths::CMultivariateConstantPrior origFilter(3, TDouble10Vec(boost::begin(constant), boost::end(constant))); + maths::CMultivariateConstantPrior origFilter( + 3, TDouble10Vec(boost::begin(constant), boost::end(constant))); uint64_t checksum = origFilter.checksum(); std::string origXml; @@ -313,7 +353,8 @@ void CMultivariateConstantPriorTest::testPersist() { maths::CMultivariateConstantPrior restoredFilter(3, traverser); - LOG_DEBUG(<< "orig checksum = " << checksum << " restored checksum = " << restoredFilter.checksum()); + LOG_DEBUG(<< "orig checksum = " << checksum + << " restored checksum = " << restoredFilter.checksum()); CPPUNIT_ASSERT_EQUAL(checksum, restoredFilter.checksum()); // The XML representation of the new filter should be the same as the original @@ -330,24 +371,30 @@ void CMultivariateConstantPriorTest::testPersist() { CppUnit::Test* CMultivariateConstantPriorTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CMultivariateConstantPriorTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CMultivariateConstantPriorTest::testAddSamples", - &CMultivariateConstantPriorTest::testAddSamples)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMultivariateConstantPriorTest::testMarginalLikelihood", - &CMultivariateConstantPriorTest::testMarginalLikelihood)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CMultivariateConstantPriorTest::testMarginalLikelihoodMean", &CMultivariateConstantPriorTest::testMarginalLikelihoodMean)); + "CMultivariateConstantPriorTest::testAddSamples", + &CMultivariateConstantPriorTest::testAddSamples)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultivariateConstantPriorTest::testMarginalLikelihood", + &CMultivariateConstantPriorTest::testMarginalLikelihood)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultivariateConstantPriorTest::testMarginalLikelihoodMean", + &CMultivariateConstantPriorTest::testMarginalLikelihoodMean)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultivariateConstantPriorTest::testMarginalLikelihoodMode", + &CMultivariateConstantPriorTest::testMarginalLikelihoodMode)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultivariateConstantPriorTest::testMarginalLikelihoodCovariance", + &CMultivariateConstantPriorTest::testMarginalLikelihoodCovariance)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultivariateConstantPriorTest::testSampleMarginalLikelihood", + &CMultivariateConstantPriorTest::testSampleMarginalLikelihood)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CMultivariateConstantPriorTest::testMarginalLikelihoodMode", &CMultivariateConstantPriorTest::testMarginalLikelihoodMode)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CMultivariateConstantPriorTest::testMarginalLikelihoodCovariance", - &CMultivariateConstantPriorTest::testMarginalLikelihoodCovariance)); + "CMultivariateConstantPriorTest::testProbabilityOfLessLikelySamples", + &CMultivariateConstantPriorTest::testProbabilityOfLessLikelySamples)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CMultivariateConstantPriorTest::testSampleMarginalLikelihood", &CMultivariateConstantPriorTest::testSampleMarginalLikelihood)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CMultivariateConstantPriorTest::testProbabilityOfLessLikelySamples", - &CMultivariateConstantPriorTest::testProbabilityOfLessLikelySamples)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMultivariateConstantPriorTest::testPersist", - &CMultivariateConstantPriorTest::testPersist)); + "CMultivariateConstantPriorTest::testPersist", + &CMultivariateConstantPriorTest::testPersist)); return suiteOfTests; } diff --git a/lib/maths/unittest/CMultivariateMultimodalPriorTest.cc b/lib/maths/unittest/CMultivariateMultimodalPriorTest.cc index f94642bed2..028ffa6d13 100644 --- a/lib/maths/unittest/CMultivariateMultimodalPriorTest.cc +++ b/lib/maths/unittest/CMultivariateMultimodalPriorTest.cc @@ -43,7 +43,8 @@ const TDouble10Vec UNIT_WEIGHT_2(2, 1.0); const TDouble10Vec4Vec1Vec SINGLE_UNIT_WEIGHT_2(1, TDouble10Vec4Vec(1, UNIT_WEIGHT_2)); template -class CMultivariateMultimodalPriorForTest : public maths::CMultivariateMultimodalPrior { +class CMultivariateMultimodalPriorForTest + : public maths::CMultivariateMultimodalPrior { public: using TClusterer = typename maths::CMultivariateMultimodalPrior::TClusterer; using TModeVec = typename maths::CMultivariateMultimodalPrior::TModeVec; @@ -52,14 +53,20 @@ class CMultivariateMultimodalPriorForTest : public maths::CMultivariateMultimoda CMultivariateMultimodalPriorForTest(const maths::CMultivariateMultimodalPrior& prior) : maths::CMultivariateMultimodalPrior(prior) {} - const TModeVec& modes() const { return this->maths::CMultivariateMultimodalPrior::modes(); } + const TModeVec& modes() const { + return this->maths::CMultivariateMultimodalPrior::modes(); + } }; template -maths::CMultivariateMultimodalPrior makePrior(maths_t::EDataType dataType, double decayRate = 0.0) { - maths::CXMeansOnline clusterer(dataType, maths_t::E_ClustersFractionWeight, decayRate); +maths::CMultivariateMultimodalPrior +makePrior(maths_t::EDataType dataType, double decayRate = 0.0) { + maths::CXMeansOnline clusterer( + dataType, maths_t::E_ClustersFractionWeight, decayRate); return maths::CMultivariateMultimodalPrior( - dataType, clusterer, maths::CMultivariateNormalConjugate::nonInformativePrior(dataType, decayRate), decayRate); + dataType, clusterer, + maths::CMultivariateNormalConjugate::nonInformativePrior(dataType, decayRate), + decayRate); } void gaussianSamples(test::CRandomNumbers& rng, @@ -72,7 +79,9 @@ void gaussianSamples(test::CRandomNumbers& rng, TVector2 mean(means[i], means[i] + 2); TMatrix2 covariance(covariances[i], covariances[i] + 3); TDoubleVecVec samples_; - rng.generateMultivariateNormalSamples(mean.toVector(), covariance.toVectors(), n[i], samples_); + rng.generateMultivariateNormalSamples(mean.toVector(), + covariance.toVectors(), + n[i], samples_); samples.reserve(samples.size() + samples_.size()); for (std::size_t j = 0u; j < samples_.size(); ++j) { samples.push_back(TDouble10Vec(samples_[j].begin(), samples_[j].end())); @@ -82,7 +91,10 @@ void gaussianSamples(test::CRandomNumbers& rng, } template -double logLikelihood(const double w[N], const double means[N][2], const double covariances[N][3], const TDouble10Vec& x) { +double logLikelihood(const double w[N], + const double means[N][2], + const double covariances[N][3], + const TDouble10Vec& x) { double lx = 0.0; for (std::size_t i = 0u; i < N; ++i) { TVector2 mean(means[i]); @@ -94,11 +106,15 @@ double logLikelihood(const double w[N], const double means[N][2], const double c return std::log(lx); } -double logLikelihood(const TDoubleVec& w, const TDoubleVecVec& means, const TDoubleVecVecVec& covariances, const TDoubleVec& x) { +double logLikelihood(const TDoubleVec& w, + const TDoubleVecVec& means, + const TDoubleVecVecVec& covariances, + const TDoubleVec& x) { double lx = 0.0; for (std::size_t i = 0u; i < w.size(); ++i) { double ll; - maths::gaussianLogLikelihood(TMatrix2(covariances[i]), TVector2(x) - TVector2(means[i]), ll); + maths::gaussianLogLikelihood(TMatrix2(covariances[i]), + TVector2(x) - TVector2(means[i]), ll); lx += w[i] * std::exp(ll); } return std::log(lx); @@ -114,7 +130,8 @@ void empiricalProbabilityOfLessLikelySamples(const TDoubleVec& w, for (std::size_t i = 0u; i < w.size(); ++i) { TDoubleVecVec samples; - rng.generateMultivariateNormalSamples(means[i], covariances[i], static_cast(w[i] * 1000.0 * m), samples); + rng.generateMultivariateNormalSamples( + means[i], covariances[i], static_cast(w[i] * 1000.0 * m), samples); result.reserve(samples.size()); for (std::size_t j = 0u; j < samples.size(); ++j) { result.push_back(logLikelihood(w, means, covariances, samples[j])); @@ -167,10 +184,13 @@ void CMultivariateMultimodalPriorTest::testMultipleUpdate() { maths::CSampling::seed(); for (std::size_t j = 0; j < samples.size(); ++j) { - filter1.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[j]), SINGLE_UNIT_WEIGHT_2); + filter1.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[j]), + SINGLE_UNIT_WEIGHT_2); } maths::CSampling::seed(); - filter2.addSamples(COUNT_WEIGHT, samples, TDouble10Vec4Vec1Vec(samples.size(), TDouble10Vec4Vec(1, UNIT_WEIGHT_2))); + filter2.addSamples( + COUNT_WEIGHT, samples, + TDouble10Vec4Vec1Vec(samples.size(), TDouble10Vec4Vec(1, UNIT_WEIGHT_2))); LOG_DEBUG(<< "checksum 1 " << filter1.checksum()); LOG_DEBUG(<< "checksum 2 " << filter2.checksum()); @@ -228,8 +248,10 @@ void CMultivariateMultimodalPriorTest::testPropagation() { const double decayRate = 0.1; - maths::CMultivariateMultimodalPrior<2> filter(makePrior<2>(maths_t::E_ContinuousData, decayRate)); - filter.addSamples(COUNT_WEIGHT, samples, TDouble10Vec4Vec1Vec(samples.size(), TDouble10Vec4Vec(1, UNIT_WEIGHT_2))); + maths::CMultivariateMultimodalPrior<2> filter( + makePrior<2>(maths_t::E_ContinuousData, decayRate)); + filter.addSamples(COUNT_WEIGHT, samples, + TDouble10Vec4Vec1Vec(samples.size(), TDouble10Vec4Vec(1, UNIT_WEIGHT_2))); double numberSamples = filter.numberSamples(); TDouble10Vec mean = filter.marginalLikelihoodMean(); @@ -246,10 +268,12 @@ void CMultivariateMultimodalPriorTest::testPropagation() { LOG_DEBUG(<< "mean = " << core::CContainerPrinter::print(mean)); LOG_DEBUG(<< "propagatedMean = " << core::CContainerPrinter::print(propagatedMean)); LOG_DEBUG(<< "covariance = " << core::CContainerPrinter::print(covariance)); - LOG_DEBUG(<< "propagatedCovariance = " << core::CContainerPrinter::print(propagatedCovariance)); + LOG_DEBUG(<< "propagatedCovariance = " + << core::CContainerPrinter::print(propagatedCovariance)); CPPUNIT_ASSERT(propagatedNumberSamples < numberSamples); - CPPUNIT_ASSERT((TVector2(propagatedMean) - TVector2(mean)).euclidean() < eps * TVector2(mean).euclidean()); + CPPUNIT_ASSERT((TVector2(propagatedMean) - TVector2(mean)).euclidean() < + eps * TVector2(mean).euclidean()); Eigen::MatrixXd c(2, 2); Eigen::MatrixXd cp(2, 2); for (std::size_t i = 0u; i < 2; ++i) { @@ -320,7 +344,8 @@ void CMultivariateMultimodalPriorTest::testMultipleModes() { TDouble10Vec1Vec samples; gaussianSamples(rng, boost::size(n), n, means, covariances, samples); - double w[] = {n[0] / static_cast(n[0] + n[1]), n[1] / static_cast(n[0] + n[1])}; + double w[] = {n[0] / static_cast(n[0] + n[1]), + n[1] / static_cast(n[0] + n[1])}; double loss = 0.0; TMeanAccumulator differentialEntropy_; @@ -332,12 +357,17 @@ void CMultivariateMultimodalPriorTest::testMultipleModes() { for (std::size_t i = 0; i < 10; ++i) { rng.random_shuffle(samples.begin(), samples.end()); - maths::CMultivariateMultimodalPrior<2> filter1(makePrior<2>(maths_t::E_ContinuousData)); + maths::CMultivariateMultimodalPrior<2> filter1( + makePrior<2>(maths_t::E_ContinuousData)); maths::CMultivariateNormalConjugate<2> filter2 = maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData); - filter1.addSamples(COUNT_WEIGHT, samples, TDouble10Vec4Vec1Vec(samples.size(), TDouble10Vec4Vec(1, UNIT_WEIGHT_2))); - filter2.addSamples(COUNT_WEIGHT, samples, TDouble10Vec4Vec1Vec(samples.size(), TDouble10Vec4Vec(1, UNIT_WEIGHT_2))); + filter1.addSamples( + COUNT_WEIGHT, samples, + TDouble10Vec4Vec1Vec(samples.size(), TDouble10Vec4Vec(1, UNIT_WEIGHT_2))); + filter2.addSamples( + COUNT_WEIGHT, samples, + TDouble10Vec4Vec1Vec(samples.size(), TDouble10Vec4Vec(1, UNIT_WEIGHT_2))); CPPUNIT_ASSERT_EQUAL(std::size_t(2), filter1.numberModes()); @@ -350,15 +380,18 @@ void CMultivariateMultimodalPriorTest::testMultipleModes() { TDouble10Vec1Vec sample(1, samples[j]); double l1; CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, - filter1.jointLogMarginalLikelihood(COUNT_WEIGHT, sample, SINGLE_UNIT_WEIGHT_2, l1)); + filter1.jointLogMarginalLikelihood( + COUNT_WEIGHT, sample, SINGLE_UNIT_WEIGHT_2, l1)); loss1G.add(ll - l1); double l2; CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, - filter2.jointLogMarginalLikelihood(COUNT_WEIGHT, sample, SINGLE_UNIT_WEIGHT_2, l2)); + filter2.jointLogMarginalLikelihood( + COUNT_WEIGHT, sample, SINGLE_UNIT_WEIGHT_2, l2)); loss12.add(l2 - l1); } - LOG_DEBUG(<< "loss1G = " << maths::CBasicStatistics::mean(loss1G) << ", loss12 = " << maths::CBasicStatistics::mean(loss12) + LOG_DEBUG(<< "loss1G = " << maths::CBasicStatistics::mean(loss1G) + << ", loss12 = " << maths::CBasicStatistics::mean(loss12) << ", differential entropy " << differentialEntropy); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(loss12) < 0.0); @@ -384,14 +417,16 @@ void CMultivariateMultimodalPriorTest::testSplitAndMerge() { test::CRandomNumbers rng; double means_[][2] = {{10, 15}, {30, 10}, {10, 15}, {30, 10}}; - double covariances_[][2][2] = {{{10, 2}, {2, 15}}, {{30, 8}, {8, 15}}, {{100, 2}, {2, 15}}, {{100, 2}, {2, 15}}}; + double covariances_[][2][2] = { + {{10, 2}, {2, 15}}, {{30, 8}, {8, 15}}, {{100, 2}, {2, 15}}, {{100, 2}, {2, 15}}}; TDoubleVecVec means(boost::size(means_)); TDoubleVecVecVec covariances(boost::size(means_)); for (std::size_t i = 0u; i < boost::size(means_); ++i) { means[i].assign(&means_[i][0], &means_[i][2]); for (std::size_t j = 0u; j < 2; ++j) { - covariances[i].push_back(TDoubleVec(&covariances_[i][j][0], &covariances_[i][j][2])); + covariances[i].push_back( + TDoubleVec(&covariances_[i][j][0], &covariances_[i][j][2])); } } @@ -416,7 +451,8 @@ void CMultivariateMultimodalPriorTest::testSplitAndMerge() { TDoubleVecVec samples; for (std::size_t j = 0u; j < boost::size(n[i]); ++j) { TDoubleVecVec samples_; - rng.generateMultivariateNormalSamples(means[j], covariances[j], n[i][j], samples_); + rng.generateMultivariateNormalSamples(means[j], covariances[j], + n[i][j], samples_); for (std::size_t k = 0u; k < samples_.size(); ++k) { modeCovariances[j].add(TVector2(samples_[k])); totalCovariances.add(TVector2(samples_[k])); @@ -427,8 +463,8 @@ void CMultivariateMultimodalPriorTest::testSplitAndMerge() { LOG_DEBUG(<< "# samples = " << samples.size()); for (std::size_t j = 0u; j < samples.size(); ++j) { - filter.addSamples( - COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[j]), TDouble10Vec4Vec1Vec(1, TDouble10Vec4Vec(1, UNIT_WEIGHT_2))); + filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[j]), + TDouble10Vec4Vec1Vec(1, TDouble10Vec4Vec(1, UNIT_WEIGHT_2))); //pointsToDate.push_back(samples[j]); //if (pointsToDate.size() == subplotCounts[subplot]) @@ -448,7 +484,8 @@ void CMultivariateMultimodalPriorTest::testSplitAndMerge() { //} } - const CMultivariateMultimodalPriorForTest<2>::TModeVec& modes = filter.modes(); + const CMultivariateMultimodalPriorForTest<2>::TModeVec& modes = + filter.modes(); LOG_DEBUG(<< "# modes = " << modes.size()); LOG_DEBUG(<< "prior = " << filter.print()); @@ -457,7 +494,8 @@ void CMultivariateMultimodalPriorTest::testSplitAndMerge() { maths::CBasicStatistics::COrderStatisticsStack covError; if (modes.size() == 1) { - meanError.add((TVector2(modes[j].s_Prior->marginalLikelihoodMean()) - maths::CBasicStatistics::mean(totalCovariances)) + meanError.add((TVector2(modes[j].s_Prior->marginalLikelihoodMean()) - + maths::CBasicStatistics::mean(totalCovariances)) .euclidean()); TMatrix2 mlc(modes[j].s_Prior->marginalLikelihoodCovariance()); TMatrix2 tcm = maths::CBasicStatistics::covariances(totalCovariances); @@ -465,13 +503,16 @@ void CMultivariateMultimodalPriorTest::testSplitAndMerge() { } else { for (std::size_t k = 0u; k < boost::size(modeCovariances); ++k) { meanError.add( - (TVector2(modes[j].s_Prior->marginalLikelihoodMean()) - maths::CBasicStatistics::mean(modeCovariances[k])) + (TVector2(modes[j].s_Prior->marginalLikelihoodMean()) - + maths::CBasicStatistics::mean(modeCovariances[k])) .euclidean() / maths::CBasicStatistics::mean(modeCovariances[k]).euclidean()); - covError.add((TMatrix2(modes[j].s_Prior->marginalLikelihoodCovariance()) - - maths::CBasicStatistics::covariances(modeCovariances[k])) - .frobenius() / - maths::CBasicStatistics::covariances(modeCovariances[k]).frobenius()); + covError.add( + (TMatrix2(modes[j].s_Prior->marginalLikelihoodCovariance()) - + maths::CBasicStatistics::covariances(modeCovariances[k])) + .frobenius() / + maths::CBasicStatistics::covariances(modeCovariances[k]) + .frobenius()); } } @@ -528,7 +569,9 @@ void CMultivariateMultimodalPriorTest::testMarginalLikelihood() { rng.random_shuffle(samples.begin(), samples.end()); maths::CMultivariateMultimodalPrior<2> filter(makePrior<2>(maths_t::E_ContinuousData)); - filter.addSamples(COUNT_WEIGHT, samples, TDouble10Vec4Vec1Vec(samples.size(), TDouble10Vec4Vec(1, UNIT_WEIGHT_2))); + filter.addSamples(COUNT_WEIGHT, samples, + TDouble10Vec4Vec1Vec(samples.size(), + TDouble10Vec4Vec(1, UNIT_WEIGHT_2))); LOG_DEBUG(<< "# modes = " << filter.numberModes()); if (filter.numberModes() != 3) { continue; @@ -551,15 +594,16 @@ void CMultivariateMultimodalPriorTest::testMarginalLikelihood() { LOG_DEBUG(<< "m = " << means[i]); LOG_DEBUG(<< "v = " << trace); - double intervals[][2] = {{means[i](0) - 3.0 * std::sqrt(trace), means[i](1) - 3.0 * std::sqrt(trace)}, - {means[i](0) - 3.0 * std::sqrt(trace), means[i](1) - 1.0 * std::sqrt(trace)}, - {means[i](0) - 3.0 * std::sqrt(trace), means[i](1) + 1.0 * std::sqrt(trace)}, - {means[i](0) - 1.0 * std::sqrt(trace), means[i](1) - 3.0 * std::sqrt(trace)}, - {means[i](0) - 1.0 * std::sqrt(trace), means[i](1) - 1.0 * std::sqrt(trace)}, - {means[i](0) - 1.0 * std::sqrt(trace), means[i](1) + 1.0 * std::sqrt(trace)}, - {means[i](0) + 1.0 * std::sqrt(trace), means[i](1) - 3.0 * std::sqrt(trace)}, - {means[i](0) + 1.0 * std::sqrt(trace), means[i](1) - 1.0 * std::sqrt(trace)}, - {means[i](0) + 1.0 * std::sqrt(trace), means[i](1) + 1.0 * std::sqrt(trace)}}; + double intervals[][2] = { + {means[i](0) - 3.0 * std::sqrt(trace), means[i](1) - 3.0 * std::sqrt(trace)}, + {means[i](0) - 3.0 * std::sqrt(trace), means[i](1) - 1.0 * std::sqrt(trace)}, + {means[i](0) - 3.0 * std::sqrt(trace), means[i](1) + 1.0 * std::sqrt(trace)}, + {means[i](0) - 1.0 * std::sqrt(trace), means[i](1) - 3.0 * std::sqrt(trace)}, + {means[i](0) - 1.0 * std::sqrt(trace), means[i](1) - 1.0 * std::sqrt(trace)}, + {means[i](0) - 1.0 * std::sqrt(trace), means[i](1) + 1.0 * std::sqrt(trace)}, + {means[i](0) + 1.0 * std::sqrt(trace), means[i](1) - 3.0 * std::sqrt(trace)}, + {means[i](0) + 1.0 * std::sqrt(trace), means[i](1) - 1.0 * std::sqrt(trace)}, + {means[i](0) + 1.0 * std::sqrt(trace), means[i](1) + 1.0 * std::sqrt(trace)}}; CUnitKernel<2> likelihoodKernel(filter); CMeanKernel<2> meanKernel(filter); CCovarianceKernel<2> covarianceKernel(filter, expectedMean); @@ -608,7 +652,8 @@ void CMultivariateMultimodalPriorTest::testMarginalLikelihood() { LOG_DEBUG(<< "Mean Z = " << maths::CBasicStatistics::mean(meanZ)); LOG_DEBUG(<< "Mean mean error = " << maths::CBasicStatistics::mean(meanMeanError)); - LOG_DEBUG(<< "Mean covariance error = " << maths::CBasicStatistics::mean(meanCovarianceError)); + LOG_DEBUG(<< "Mean covariance error = " + << maths::CBasicStatistics::mean(meanCovarianceError)); CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, maths::CBasicStatistics::mean(meanZ), 0.1); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanMeanError) < 0.1); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanCovarianceError) < 0.04); @@ -647,10 +692,13 @@ void CMultivariateMultimodalPriorTest::testMarginalLikelihoodMean() { if (i % 10 == 0) { LOG_DEBUG(<< "sample mean = " << maths::CBasicStatistics::mean(expectedMean)); - LOG_DEBUG(<< "distribution mean = " << core::CContainerPrinter::print(filter.marginalLikelihoodMean())); + LOG_DEBUG(<< "distribution mean = " + << core::CContainerPrinter::print(filter.marginalLikelihoodMean())); } - double error = (maths::CBasicStatistics::mean(expectedMean) - TVector2(filter.marginalLikelihoodMean())).euclidean() / + double error = (maths::CBasicStatistics::mean(expectedMean) - + TVector2(filter.marginalLikelihoodMean())) + .euclidean() / maths::CBasicStatistics::mean(expectedMean).euclidean(); CPPUNIT_ASSERT(error < eps); meanError.add(error); @@ -667,7 +715,8 @@ void CMultivariateMultimodalPriorTest::testMarginalLikelihoodMode() { // Test that the sample mode is close to the generating distribution mode. - using TMaxAccumulator = maths::CBasicStatistics::COrderStatisticsStack>; + using TMaxAccumulator = + maths::CBasicStatistics::COrderStatisticsStack>; maths::CSampling::seed(); @@ -694,15 +743,19 @@ void CMultivariateMultimodalPriorTest::testMarginalLikelihoodMode() { rng.random_shuffle(samples.begin(), samples.end()); CMultivariateMultimodalPriorForTest<2> filter(makePrior<2>(maths_t::E_ContinuousData)); - filter.addSamples(COUNT_WEIGHT, samples, TDouble10Vec4Vec1Vec(samples.size(), SINGLE_UNIT_WEIGHT_2[0])); - TDouble10Vec mode = filter.marginalLikelihoodMode(COUNT_WEIGHT, SINGLE_UNIT_WEIGHT_2[0]); + filter.addSamples(COUNT_WEIGHT, samples, + TDouble10Vec4Vec1Vec(samples.size(), SINGLE_UNIT_WEIGHT_2[0])); + TDouble10Vec mode = + filter.marginalLikelihoodMode(COUNT_WEIGHT, SINGLE_UNIT_WEIGHT_2[0]); TVector2 expectedMode; TMaxAccumulator maxLikelihood; for (std::size_t i = 0u; i < filter.modes().size(); ++i) { - TDouble10Vec mi = (filter.modes())[i].s_Prior->marginalLikelihoodMode(COUNT_WEIGHT, SINGLE_UNIT_WEIGHT_2[0]); + TDouble10Vec mi = (filter.modes())[i].s_Prior->marginalLikelihoodMode( + COUNT_WEIGHT, SINGLE_UNIT_WEIGHT_2[0]); double likelihood; - filter.jointLogMarginalLikelihood(COUNT_WEIGHT, TDouble10Vec1Vec(1, mi), SINGLE_UNIT_WEIGHT_2, likelihood); + filter.jointLogMarginalLikelihood(COUNT_WEIGHT, TDouble10Vec1Vec(1, mi), + SINGLE_UNIT_WEIGHT_2, likelihood); if (maxLikelihood.add(likelihood)) { expectedMode = TVector2(mi); } @@ -711,7 +764,8 @@ void CMultivariateMultimodalPriorTest::testMarginalLikelihoodMode() { LOG_DEBUG(<< "# modes = " << filter.numberModes()); LOG_DEBUG(<< "mode = " << core::CContainerPrinter::print(mode)); LOG_DEBUG(<< "expected mode = " << expectedMode); - double error = (TVector2(mode) - expectedMode).euclidean() / expectedMode.euclidean(); + double error = (TVector2(mode) - expectedMode).euclidean() / + expectedMode.euclidean(); CPPUNIT_ASSERT(error < eps); meanError.add(error); } @@ -750,7 +804,9 @@ void CMultivariateMultimodalPriorTest::testSampleMarginalLikelihood() { means.push_back(mean); covariances.push_back(covariance); TDoubleVecVec samples_; - rng.generateMultivariateNormalSamples(mean.toVector(), covariance.toVectors(), n[i], samples_); + rng.generateMultivariateNormalSamples(mean.toVector(), + covariance.toVectors(), + n[i], samples_); samples.reserve(samples.size() + samples_.size()); for (std::size_t j = 0u; j < samples_.size(); ++j) { samples.push_back(TDouble10Vec(samples_[j].begin(), samples_[j].end())); @@ -760,7 +816,8 @@ void CMultivariateMultimodalPriorTest::testSampleMarginalLikelihood() { LOG_DEBUG(<< "# samples = " << samples.size()); maths::CMultivariateMultimodalPrior<2> filter(makePrior<2>(maths_t::E_ContinuousData)); - filter.addSamples(COUNT_WEIGHT, samples, TDouble10Vec4Vec1Vec(samples.size(), TDouble10Vec4Vec(1, UNIT_WEIGHT_2))); + filter.addSamples(COUNT_WEIGHT, samples, + TDouble10Vec4Vec1Vec(samples.size(), TDouble10Vec4Vec(1, UNIT_WEIGHT_2))); TDouble10Vec1Vec sampled; filter.sampleMarginalLikelihood(300, sampled); @@ -781,8 +838,10 @@ void CMultivariateMultimodalPriorTest::testSampleMarginalLikelihood() { LOG_DEBUG(<< "expected covariance = " << expectedCovariance); LOG_DEBUG(<< "sampled mean = " << sampledMean); LOG_DEBUG(<< "sampled covariance = " << sampledCovariance); - CPPUNIT_ASSERT((sampledMean - expectedMean).euclidean() < 1e-3 * expectedMean.euclidean()); - CPPUNIT_ASSERT((sampledCovariance - expectedCovariance).frobenius() < 5e-3 * expectedCovariance.frobenius()); + CPPUNIT_ASSERT((sampledMean - expectedMean).euclidean() < + 1e-3 * expectedMean.euclidean()); + CPPUNIT_ASSERT((sampledCovariance - expectedCovariance).frobenius() < + 5e-3 * expectedCovariance.frobenius()); TCovariances2 modeSampledCovariances[2]; for (std::size_t i = 0u; i < sampled.size(); ++i) { @@ -793,18 +852,23 @@ void CMultivariateMultimodalPriorTest::testSampleMarginalLikelihood() { } for (std::size_t i = 0u; i < 2; ++i) { - TVector2 modeSampledMean = maths::CBasicStatistics::mean(modeSampledCovariances[i]); - TMatrix2 modeSampledCovariance = maths::CBasicStatistics::covariances(modeSampledCovariances[i]); + TVector2 modeSampledMean = + maths::CBasicStatistics::mean(modeSampledCovariances[i]); + TMatrix2 modeSampledCovariance = + maths::CBasicStatistics::covariances(modeSampledCovariances[i]); LOG_DEBUG(<< "sample mean = " << means[i]); LOG_DEBUG(<< "sample covariance = " << covariances[i]); LOG_DEBUG(<< "sampled mean = " << modeSampledMean); LOG_DEBUG(<< "sampled covariance = " << modeSampledCovariance); - CPPUNIT_ASSERT((modeSampledMean - means[i]).euclidean() < 0.03 * means[i].euclidean()); - CPPUNIT_ASSERT((modeSampledCovariance - covariances[i]).frobenius() < 0.2 * covariances[i].frobenius()); + CPPUNIT_ASSERT((modeSampledMean - means[i]).euclidean() < + 0.03 * means[i].euclidean()); + CPPUNIT_ASSERT((modeSampledCovariance - covariances[i]).frobenius() < + 0.2 * covariances[i].frobenius()); } CPPUNIT_ASSERT_DOUBLES_EQUAL( static_cast(n[0]) / static_cast(n[1]), - maths::CBasicStatistics::count(modeSampledCovariances[0]) / maths::CBasicStatistics::count(modeSampledCovariances[1]), + maths::CBasicStatistics::count(modeSampledCovariances[0]) / + maths::CBasicStatistics::count(modeSampledCovariances[1]), 0.02); } @@ -819,10 +883,14 @@ void CMultivariateMultimodalPriorTest::testProbabilityOfLessLikelySamples() { maths::CSampling::seed(); const double w_[][3] = {{0.25, 0.3, 0.45}, {0.1, 0.3, 0.6}}; - const double means_[][3][2] = {{{10, 10}, {15, 18}, {10, 60}}, {{0, 0}, {-20, -30}, {40, 15}}}; - const double covariances_[][3][2][2] = {{{{10, 0}, {0, 10}}, {{10, 9}, {9, 10}}, {{10, -9}, {-9, 10}}}, - {{{5, 0}, {0, 5}}, {{40, 9}, {9, 40}}, {{30, -27}, {-27, 30}}}}; - const double offsets[][2] = {{0.0, 0.0}, {0.0, 6.0}, {4.0, 0.0}, {6.0, 6.0}, {6.0, -6.0}, {-8.0, 8.0}, {-8.0, -8.0}}; + const double means_[][3][2] = {{{10, 10}, {15, 18}, {10, 60}}, + {{0, 0}, {-20, -30}, {40, 15}}}; + const double covariances_[][3][2][2] = { + {{{10, 0}, {0, 10}}, {{10, 9}, {9, 10}}, {{10, -9}, {-9, 10}}}, + {{{5, 0}, {0, 5}}, {{40, 9}, {9, 40}}, {{30, -27}, {-27, 30}}}}; + const double offsets[][2] = {{0.0, 0.0}, {0.0, 6.0}, {4.0, 0.0}, + {6.0, 6.0}, {6.0, -6.0}, {-8.0, 8.0}, + {-8.0, -8.0}}; test::CRandomNumbers rng; @@ -846,7 +914,8 @@ void CMultivariateMultimodalPriorTest::testProbabilityOfLessLikelySamples() { TDoubleVecVec samples; for (std::size_t j = 0u; j < w.size(); ++j) { TDoubleVecVec samples_; - rng.generateMultivariateNormalSamples(means[j], covariances[j], static_cast(w[j] * 1000.0), samples_); + rng.generateMultivariateNormalSamples( + means[j], covariances[j], static_cast(w[j] * 1000.0), samples_); samples.insert(samples.end(), samples_.begin(), samples_.end()); } rng.random_shuffle(samples.begin(), samples.end()); @@ -868,25 +937,32 @@ void CMultivariateMultimodalPriorTest::testProbabilityOfLessLikelySamples() { TVector2 x = TVector2(means[j]) + TVector2(offsets[k]); double ll = logLikelihood(w, means, covariances, x.toVector()); - double px = static_cast(std::lower_bound(p.begin(), p.end(), ll) - p.begin()) / static_cast(p.size()); + double px = static_cast( + std::lower_bound(p.begin(), p.end(), ll) - p.begin()) / + static_cast(p.size()); double lb, ub; maths::CMultivariatePrior::TTail10Vec tail; filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, COUNT_WEIGHT, TDouble10Vec1Vec(1, x.toVector()), SINGLE_UNIT_WEIGHT_2, lb, ub, tail); + maths_t::E_TwoSided, COUNT_WEIGHT, + TDouble10Vec1Vec(1, x.toVector()), + SINGLE_UNIT_WEIGHT_2, lb, ub, tail); double pa = (lb + ub) / 2.0; LOG_DEBUG(<< " p(" << x << "), actual = " << pa << ", expected = " << px); meanAbsError.add(std::fabs(px - pa)); if (px < 1.0 && px > 0.0) { - meanRelError.add(std::fabs(std::log(px) - std::log(pa)) / std::fabs(std::log(px))); + meanRelError.add(std::fabs(std::log(px) - std::log(pa)) / + std::fabs(std::log(px))); } } - LOG_DEBUG(<< "mean absolute error = " << maths::CBasicStatistics::mean(meanAbsError)); + LOG_DEBUG(<< "mean absolute error = " + << maths::CBasicStatistics::mean(meanAbsError)); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanAbsError) < 0.25); - LOG_DEBUG(<< "mean relative error = " << maths::CBasicStatistics::mean(meanRelError)); + LOG_DEBUG(<< "mean relative error = " + << maths::CBasicStatistics::mean(meanRelError)); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanRelError) < 0.6); } } @@ -917,26 +993,29 @@ void CMultivariateMultimodalPriorTest::testLatLongData() { using TTimeDoubleVecPrVec = std::vector; TTimeDoubleVecPrVec timeseries; - CPPUNIT_ASSERT( - test::CTimeSeriesTestData::parse("testfiles/lat_lng.csv", timeseries, test::CTimeSeriesTestData::CSV_UNIX_BIVALUED_REGEX)); + CPPUNIT_ASSERT(test::CTimeSeriesTestData::parse( + "testfiles/lat_lng.csv", timeseries, test::CTimeSeriesTestData::CSV_UNIX_BIVALUED_REGEX)); CPPUNIT_ASSERT(!timeseries.empty()); - LOG_DEBUG(<< "timeseries = " << core::CContainerPrinter::print(timeseries.begin(), timeseries.begin() + 10) << " ..."); + LOG_DEBUG(<< "timeseries = " + << core::CContainerPrinter::print(timeseries.begin(), timeseries.begin() + 10) + << " ..."); maths_t::EDataType dataType = maths_t::E_ContinuousData; - boost::shared_ptr modePrior = maths::CMultivariateNormalConjugateFactory::nonInformative(2, dataType, 0.001); + boost::shared_ptr modePrior = + maths::CMultivariateNormalConjugateFactory::nonInformative(2, dataType, 0.001); boost::shared_ptr filter = - maths::CMultivariateMultimodalPriorFactory::nonInformative(2, // dimension - dataType, - 0.0005, - maths_t::E_ClustersFractionWeight, - 0.02, // minimumClusterFraction - 4, // minimumClusterCount - 0.8, // minimumCategoryCount - *modePrior); + maths::CMultivariateMultimodalPriorFactory::nonInformative( + 2, // dimension + dataType, 0.0005, maths_t::E_ClustersFractionWeight, + 0.02, // minimumClusterFraction + 4, // minimumClusterCount + 0.8, // minimumCategoryCount + *modePrior); for (std::size_t i = 0u; i < timeseries.size(); ++i) { - filter->addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, timeseries[i].second), SINGLE_UNIT_WEIGHT_2); + filter->addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, timeseries[i].second), + SINGLE_UNIT_WEIGHT_2); filter->propagateForwardsByTime(1.0); } LOG_DEBUG(<< filter->print()); @@ -1005,14 +1084,13 @@ void CMultivariateMultimodalPriorTest::testPersist() { CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - maths::SDistributionRestoreParams params(dataType, - decayRate + 0.1, - maths::MINIMUM_CLUSTER_SPLIT_FRACTION, - maths::MINIMUM_CLUSTER_SPLIT_COUNT, - maths::MINIMUM_CATEGORY_COUNT); + maths::SDistributionRestoreParams params( + dataType, decayRate + 0.1, maths::MINIMUM_CLUSTER_SPLIT_FRACTION, + maths::MINIMUM_CLUSTER_SPLIT_COUNT, maths::MINIMUM_CATEGORY_COUNT); maths::CMultivariateMultimodalPrior<2> restoredFilter(params, traverser); - LOG_DEBUG(<< "orig checksum = " << checksum << " restored checksum = " << restoredFilter.checksum()); + LOG_DEBUG(<< "orig checksum = " << checksum + << " restored checksum = " << restoredFilter.checksum()); CPPUNIT_ASSERT_EQUAL(checksum, restoredFilter.checksum()); // The XML representation of the new filter should be the same as the original @@ -1028,35 +1106,48 @@ void CMultivariateMultimodalPriorTest::testPersist() { CppUnit::Test* CMultivariateMultimodalPriorTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CMultivariateMultimodalPriorTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CMultivariateMultimodalPriorTest::testMultipleUpdate", - &CMultivariateMultimodalPriorTest::testMultipleUpdate)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMultivariateMultimodalPriorTest::testPropagation", - &CMultivariateMultimodalPriorTest::testPropagation)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMultivariateMultimodalPriorTest::testSingleMode", - &CMultivariateMultimodalPriorTest::testSingleMode)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMultivariateMultimodalPriorTest::testMultipleModes", - &CMultivariateMultimodalPriorTest::testMultipleModes)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMultivariateMultimodalPriorTest::testSplitAndMerge", - &CMultivariateMultimodalPriorTest::testSplitAndMerge)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CMultivariateMultimodalPriorTest::testMarginalLikelihood", &CMultivariateMultimodalPriorTest::testMarginalLikelihood)); + "CMultivariateMultimodalPriorTest::testMultipleUpdate", + &CMultivariateMultimodalPriorTest::testMultipleUpdate)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultivariateMultimodalPriorTest::testPropagation", + &CMultivariateMultimodalPriorTest::testPropagation)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultivariateMultimodalPriorTest::testSingleMode", + &CMultivariateMultimodalPriorTest::testSingleMode)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultivariateMultimodalPriorTest::testMultipleModes", + &CMultivariateMultimodalPriorTest::testMultipleModes)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultivariateMultimodalPriorTest::testSplitAndMerge", + &CMultivariateMultimodalPriorTest::testSplitAndMerge)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultivariateMultimodalPriorTest::testMarginalLikelihood", + &CMultivariateMultimodalPriorTest::testMarginalLikelihood)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultivariateMultimodalPriorTest::testMarginalLikelihoodMean", + &CMultivariateMultimodalPriorTest::testMarginalLikelihoodMean)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultivariateMultimodalPriorTest::testMarginalLikelihoodMode", + &CMultivariateMultimodalPriorTest::testMarginalLikelihoodMode)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultivariateMultimodalPriorTest::testSampleMarginalLikelihood", + &CMultivariateMultimodalPriorTest::testSampleMarginalLikelihood)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultivariateMultimodalPriorTest::testProbabilityOfLessLikelySamples", + &CMultivariateMultimodalPriorTest::testProbabilityOfLessLikelySamples)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CMultivariateMultimodalPriorTest::testMarginalLikelihoodMean", &CMultivariateMultimodalPriorTest::testMarginalLikelihoodMean)); + "CMultivariateMultimodalPriorTest::testIntegerData", + &CMultivariateMultimodalPriorTest::testIntegerData)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CMultivariateMultimodalPriorTest::testMarginalLikelihoodMode", &CMultivariateMultimodalPriorTest::testMarginalLikelihoodMode)); + "CMultivariateMultimodalPriorTest::testLowVariationData", + &CMultivariateMultimodalPriorTest::testLowVariationData)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CMultivariateMultimodalPriorTest::testSampleMarginalLikelihood", &CMultivariateMultimodalPriorTest::testSampleMarginalLikelihood)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CMultivariateMultimodalPriorTest::testProbabilityOfLessLikelySamples", - &CMultivariateMultimodalPriorTest::testProbabilityOfLessLikelySamples)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMultivariateMultimodalPriorTest::testIntegerData", - &CMultivariateMultimodalPriorTest::testIntegerData)); + "CMultivariateMultimodalPriorTest::testLatLongData", + &CMultivariateMultimodalPriorTest::testLatLongData)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CMultivariateMultimodalPriorTest::testLowVariationData", &CMultivariateMultimodalPriorTest::testLowVariationData)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMultivariateMultimodalPriorTest::testLatLongData", - &CMultivariateMultimodalPriorTest::testLatLongData)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMultivariateMultimodalPriorTest::testPersist", - &CMultivariateMultimodalPriorTest::testPersist)); + "CMultivariateMultimodalPriorTest::testPersist", + &CMultivariateMultimodalPriorTest::testPersist)); return suiteOfTests; } diff --git a/lib/maths/unittest/CMultivariateNormalConjugateTest.cc b/lib/maths/unittest/CMultivariateNormalConjugateTest.cc index 83e1a85e34..e43cde8967 100644 --- a/lib/maths/unittest/CMultivariateNormalConjugateTest.cc +++ b/lib/maths/unittest/CMultivariateNormalConjugateTest.cc @@ -33,15 +33,19 @@ namespace { const maths_t::TWeightStyleVec COUNT_WEIGHT(1, maths_t::E_SampleCountWeight); const maths_t::TWeightStyleVec VARIANCE_WEIGHT(1, maths_t::E_SampleCountVarianceScaleWeight); const TDouble10Vec4Vec UNIT_WEIGHT_2(1, TDouble10Vec(2, 1.0)); -const TDouble10Vec4Vec1Vec SINGLE_UNIT_WEIGHT_2(1, TDouble10Vec4Vec(1, TDouble10Vec(2, 1.0))); +const TDouble10Vec4Vec1Vec + SINGLE_UNIT_WEIGHT_2(1, TDouble10Vec4Vec(1, TDouble10Vec(2, 1.0))); -void empiricalProbabilityOfLessLikelySamples(const TDoubleVec& mean, const TDoubleVecVec& covariance, TDoubleVec& result) { +void empiricalProbabilityOfLessLikelySamples(const TDoubleVec& mean, + const TDoubleVecVec& covariance, + TDoubleVec& result) { test::CRandomNumbers rng; TDoubleVecVec samples; rng.generateMultivariateNormalSamples(mean, covariance, 1000, samples); result.resize(samples.size()); for (std::size_t i = 0u; i < samples.size(); ++i) { - maths::gaussianLogLikelihood(TMatrix2(covariance), TVector2(samples[i]) - TVector2(mean), result[i]); + maths::gaussianLogLikelihood( + TMatrix2(covariance), TVector2(samples[i]) - TVector2(mean), result[i]); } std::sort(result.begin(), result.end()); } @@ -68,7 +72,8 @@ void gaussianSamples(test::CRandomNumbers& rng, TVector2 mean(means, means + 2); TMatrix2 covariance(covariances, covariances + 3); TDoubleVecVec samples_; - rng.generateMultivariateNormalSamples(mean.toVector(), covariance.toVectors(), n, samples_); + rng.generateMultivariateNormalSamples( + mean.toVector(), covariance.toVectors(), n, samples_); samples.reserve(samples.size() + samples_.size()); for (std::size_t j = 0u; j < samples_.size(); ++j) { samples.push_back(TDouble10Vec(samples_[j].begin(), samples_[j].end())); @@ -101,23 +106,27 @@ void CMultivariateNormalConjugateTest::testMultipleUpdate() { for (std::size_t i = 0; i < boost::size(dataTypes); ++i) { LOG_DEBUG(<< "*** data type = " << print(dataTypes[i]) << " ***"); - maths::CMultivariateNormalConjugate<2> filter1(maths::CMultivariateNormalConjugate<2>::nonInformativePrior(dataTypes[i])); + maths::CMultivariateNormalConjugate<2> filter1( + maths::CMultivariateNormalConjugate<2>::nonInformativePrior(dataTypes[i])); maths::CMultivariateNormalConjugate<2> filter2(filter1); for (std::size_t j = 0u; j < samples.size(); ++j) { - filter1.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[j]), SINGLE_UNIT_WEIGHT_2); + filter1.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[j]), + SINGLE_UNIT_WEIGHT_2); } TDouble10Vec4Vec1Vec weights(samples.size(), UNIT_WEIGHT_2); filter2.addSamples(COUNT_WEIGHT, samples, weights); - CPPUNIT_ASSERT(filter1.equalTolerance(filter2, maths::CToleranceTypes::E_AbsoluteTolerance, 1e-5)); + CPPUNIT_ASSERT(filter1.equalTolerance( + filter2, maths::CToleranceTypes::E_AbsoluteTolerance, 1e-5)); } LOG_DEBUG(<< "****** Test with variance scale ******"); for (size_t i = 0; i < boost::size(dataTypes); ++i) { LOG_DEBUG(<< "*** data type = " << print(dataTypes[i]) << " ***"); - maths::CMultivariateNormalConjugate<2> filter1(maths::CMultivariateNormalConjugate<2>::nonInformativePrior(dataTypes[i])); + maths::CMultivariateNormalConjugate<2> filter1( + maths::CMultivariateNormalConjugate<2>::nonInformativePrior(dataTypes[i])); maths::CMultivariateNormalConjugate<2> filter2(filter1); TDouble10Vec4Vec1Vec weights; @@ -131,7 +140,8 @@ void CMultivariateNormalConjugateTest::testMultipleUpdate() { } filter2.addSamples(VARIANCE_WEIGHT, samples, weights); - CPPUNIT_ASSERT(filter1.equalTolerance(filter2, maths::CToleranceTypes::E_RelativeTolerance, 1e-5)); + CPPUNIT_ASSERT(filter1.equalTolerance( + filter2, maths::CToleranceTypes::E_RelativeTolerance, 1e-5)); } // Test the count weight is equivalent to adding repeated samples. @@ -140,19 +150,23 @@ void CMultivariateNormalConjugateTest::testMultipleUpdate() { for (size_t i = 0; i < boost::size(dataTypes); ++i) { LOG_DEBUG(<< "*** data type = " << print(dataTypes[i]) << " ***"); - maths::CMultivariateNormalConjugate<2> filter1(maths::CMultivariateNormalConjugate<2>::nonInformativePrior(dataTypes[i])); + maths::CMultivariateNormalConjugate<2> filter1( + maths::CMultivariateNormalConjugate<2>::nonInformativePrior(dataTypes[i])); maths::CMultivariateNormalConjugate<2> filter2(filter1); double x = 3.0; std::size_t count = 10; for (std::size_t j = 0u; j < count; ++j) { - filter1.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, TDouble10Vec(2, x)), SINGLE_UNIT_WEIGHT_2); + filter1.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, TDouble10Vec(2, x)), + SINGLE_UNIT_WEIGHT_2); } TDouble10Vec1Vec sample(1, TDouble10Vec(2, x)); - TDouble10Vec4Vec1Vec weight(1, TDouble10Vec4Vec(1, TDouble10Vec(2, static_cast(count)))); + TDouble10Vec4Vec1Vec weight( + 1, TDouble10Vec4Vec(1, TDouble10Vec(2, static_cast(count)))); filter2.addSamples(COUNT_WEIGHT, sample, weight); - CPPUNIT_ASSERT(filter1.equalTolerance(filter2, maths::CToleranceTypes::E_AbsoluteTolerance, 1e-5)); + CPPUNIT_ASSERT(filter1.equalTolerance( + filter2, maths::CToleranceTypes::E_AbsoluteTolerance, 1e-5)); } } @@ -181,7 +195,8 @@ void CMultivariateNormalConjugateTest::testPropagation() { for (std::size_t i = 0u; i < boost::size(dataTypes); ++i) { LOG_DEBUG(<< "*** data type = " << print(dataTypes[i]) << " ***"); - maths::CMultivariateNormalConjugate<2> filter(maths::CMultivariateNormalConjugate<2>::nonInformativePrior(dataTypes[i], 0.1)); + maths::CMultivariateNormalConjugate<2> filter( + maths::CMultivariateNormalConjugate<2>::nonInformativePrior(dataTypes[i], 0.1)); TDouble10Vec4Vec1Vec weights(samples.size(), UNIT_WEIGHT_2); filter.addSamples(COUNT_WEIGHT, samples, weights); @@ -200,7 +215,8 @@ void CMultivariateNormalConjugateTest::testPropagation() { LOG_DEBUG(<< "propagated precision = " << propagatedPrecision); CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, (propagatedMean - initialMean).euclidean(), eps); - CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, (propagatedPrecision - initialPrecision).frobenius(), eps); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + 0.0, (propagatedPrecision - initialPrecision).frobenius(), eps); } } @@ -219,14 +235,16 @@ void CMultivariateNormalConjugateTest::testMeanVectorEstimation() { const double decayRates[] = {0.0, 0.001, 0.01}; const unsigned int nt = 500u; - const double testIntervals[] = {50.0, 60.0, 70.0, 80.0, 85.0, 90.0, 95.0, 99.0}; + const double testIntervals[] = {50.0, 60.0, 70.0, 80.0, + 85.0, 90.0, 95.0, 99.0}; test::CRandomNumbers rng; for (std::size_t i = 0; i < boost::size(decayRates); ++i) { LOG_DEBUG(<< "decay rate = " << decayRates[i]); - unsigned int errors[][8] = {{0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u}, {0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u}}; + unsigned int errors[][8] = {{0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u}, + {0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u}}; for (unsigned int t = 0; t < nt; ++t) { if ((t % 50) == 0) { @@ -245,9 +263,11 @@ void CMultivariateNormalConjugateTest::testMeanVectorEstimation() { // Create the posterior. maths::CMultivariateNormalConjugate<2> filter( - maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData, decayRates[i])); + maths::CMultivariateNormalConjugate<2>::nonInformativePrior( + maths_t::E_ContinuousData, decayRates[i])); for (std::size_t j = 0u; j < samples.size(); ++j) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[j]), SINGLE_UNIT_WEIGHT_2); + filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[j]), + SINGLE_UNIT_WEIGHT_2); filter.propagateForwardsByTime(1.0); } @@ -264,8 +284,10 @@ void CMultivariateNormalConjugateTest::testMeanVectorEstimation() { std::sort(componentSamples[1].begin(), componentSamples[1].end()); for (std::size_t j = 0; j < boost::size(testIntervals); ++j) { - std::size_t l = static_cast(static_cast(n) * (0.5 - testIntervals[j] / 200.0)); - std::size_t u = static_cast(static_cast(n) * (0.5 + testIntervals[j] / 200.0)); + std::size_t l = static_cast( + static_cast(n) * (0.5 - testIntervals[j] / 200.0)); + std::size_t u = static_cast( + static_cast(n) * (0.5 + testIntervals[j] / 200.0)); for (std::size_t k = 0u; k < 2; ++k) { double a = componentSamples[k][l]; double b = componentSamples[k][u]; @@ -280,7 +302,8 @@ void CMultivariateNormalConjugateTest::testMeanVectorEstimation() { for (std::size_t k = 0u; k < 2; ++k) { double interval = 100.0 * errors[k][j] / static_cast(nt); - LOG_DEBUG(<< "interval = " << interval << ", expectedInterval = " << (100.0 - testIntervals[j])); + LOG_DEBUG(<< "interval = " << interval + << ", expectedInterval = " << (100.0 - testIntervals[j])); // If the decay rate is zero the intervals should be accurate. // Otherwise, they should be an upper bound. @@ -309,14 +332,17 @@ void CMultivariateNormalConjugateTest::testPrecisionMatrixEstimation() { const double decayRates[] = {0.0, 0.004, 0.04}; const unsigned int nt = 500u; - const double testIntervals[] = {50.0, 60.0, 70.0, 80.0, 85.0, 90.0, 95.0, 99.0}; + const double testIntervals[] = {50.0, 60.0, 70.0, 80.0, + 85.0, 90.0, 95.0, 99.0}; test::CRandomNumbers rng; for (std::size_t i = 0; i < boost::size(decayRates); ++i) { LOG_DEBUG(<< "decay rate = " << decayRates[i]); - unsigned int errors[][8] = {{0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u}, {0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u}, {0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u}}; + unsigned int errors[][8] = {{0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u}, + {0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u}, + {0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u}}; std::size_t ij[][2] = {{0, 0}, {0, 1}, {1, 1}}; TDoubleVec covariancesii; @@ -331,7 +357,8 @@ void CMultivariateNormalConjugateTest::testPrecisionMatrixEstimation() { // Generate the samples. double mean_[] = {10.0, 10.0}; - double covariances_[] = {covariancesii[2 * t], covariancesij[t], covariancesii[2 * t + 1]}; + double covariances_[] = {covariancesii[2 * t], covariancesij[t], + covariancesii[2 * t + 1]}; TDoubleVec mean(mean_, mean_ + 2); TDoubleVecVec covariances; covariances.push_back(TDoubleVec(covariances_, covariances_ + 2)); @@ -341,9 +368,11 @@ void CMultivariateNormalConjugateTest::testPrecisionMatrixEstimation() { // Create the posterior. maths::CMultivariateNormalConjugate<2> filter( - maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData, decayRates[i])); + maths::CMultivariateNormalConjugate<2>::nonInformativePrior( + maths_t::E_ContinuousData, decayRates[i])); for (std::size_t j = 0u; j < samples.size(); ++j) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[j]), SINGLE_UNIT_WEIGHT_2); + filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[j]), + SINGLE_UNIT_WEIGHT_2); filter.propagateForwardsByTime(1.0); } @@ -362,15 +391,19 @@ void CMultivariateNormalConjugateTest::testPrecisionMatrixEstimation() { std::sort(elementSamples[2].begin(), elementSamples[2].end()); TMatrix2 covarianceMatrix(covariances_, covariances_ + 3); - TMatrix2 precisionMatrix(maths::fromDenseMatrix(maths::toDenseMatrix(covarianceMatrix).inverse())); + TMatrix2 precisionMatrix(maths::fromDenseMatrix( + maths::toDenseMatrix(covarianceMatrix).inverse())); for (std::size_t j = 0; j < boost::size(testIntervals); ++j) { - std::size_t l = static_cast(static_cast(n) * (0.5 - testIntervals[j] / 200.0)); - std::size_t u = static_cast(static_cast(n) * (0.5 + testIntervals[j] / 200.0)); + std::size_t l = static_cast( + static_cast(n) * (0.5 - testIntervals[j] / 200.0)); + std::size_t u = static_cast( + static_cast(n) * (0.5 + testIntervals[j] / 200.0)); for (std::size_t k = 0u; k < elementSamples.size(); ++k) { double a = elementSamples[k][l]; double b = elementSamples[k][u]; - if (precisionMatrix(ij[k][0], ij[k][1]) < a || precisionMatrix(ij[k][0], ij[k][1]) > b) { + if (precisionMatrix(ij[k][0], ij[k][1]) < a || + precisionMatrix(ij[k][0], ij[k][1]) > b) { ++errors[k][j]; } } @@ -381,7 +414,8 @@ void CMultivariateNormalConjugateTest::testPrecisionMatrixEstimation() { for (std::size_t k = 0u; k < boost::size(errors); ++k) { double interval = 100.0 * errors[k][j] / static_cast(nt); - LOG_DEBUG(<< "interval = " << interval << ", expectedInterval = " << (100.0 - testIntervals[j])); + LOG_DEBUG(<< "interval = " << interval + << ", expectedInterval = " << (100.0 - testIntervals[j])); // If the decay rate is zero the intervals should be accurate. // Otherwise, they should be an upper bound. @@ -424,7 +458,8 @@ void CMultivariateNormalConjugateTest::testMarginalLikelihood() { // Generate the samples. double mean_[] = {meani[2 * t], meani[2 * t + 1]}; - double covariances_[] = {covariancesii[2 * t], covariancesij[t], covariancesii[2 * t + 1]}; + double covariances_[] = {covariancesii[2 * t], covariancesij[t], + covariancesii[2 * t + 1]}; TDoubleVec mean(mean_, mean_ + 2); TDoubleVecVec covariances; covariances.push_back(TDoubleVec(covariances_, covariances_ + 2)); @@ -450,15 +485,16 @@ void CMultivariateNormalConjugateTest::testMarginalLikelihood() { for (std::size_t j = 0u; j < v.size(); ++j) { trace += v[j][j]; } - double intervals[][2] = {{m[0] - 3.0 * std::sqrt(trace), m[1] - 3.0 * std::sqrt(trace)}, - {m[0] - 3.0 * std::sqrt(trace), m[1] - 1.0 * std::sqrt(trace)}, - {m[0] - 3.0 * std::sqrt(trace), m[1] + 1.0 * std::sqrt(trace)}, - {m[0] - 1.0 * std::sqrt(trace), m[1] - 3.0 * std::sqrt(trace)}, - {m[0] - 1.0 * std::sqrt(trace), m[1] - 1.0 * std::sqrt(trace)}, - {m[0] - 1.0 * std::sqrt(trace), m[1] + 1.0 * std::sqrt(trace)}, - {m[0] + 1.0 * std::sqrt(trace), m[1] - 3.0 * std::sqrt(trace)}, - {m[0] + 1.0 * std::sqrt(trace), m[1] - 1.0 * std::sqrt(trace)}, - {m[0] + 1.0 * std::sqrt(trace), m[1] + 1.0 * std::sqrt(trace)}}; + double intervals[][2] = { + {m[0] - 3.0 * std::sqrt(trace), m[1] - 3.0 * std::sqrt(trace)}, + {m[0] - 3.0 * std::sqrt(trace), m[1] - 1.0 * std::sqrt(trace)}, + {m[0] - 3.0 * std::sqrt(trace), m[1] + 1.0 * std::sqrt(trace)}, + {m[0] - 1.0 * std::sqrt(trace), m[1] - 3.0 * std::sqrt(trace)}, + {m[0] - 1.0 * std::sqrt(trace), m[1] - 1.0 * std::sqrt(trace)}, + {m[0] - 1.0 * std::sqrt(trace), m[1] + 1.0 * std::sqrt(trace)}, + {m[0] + 1.0 * std::sqrt(trace), m[1] - 3.0 * std::sqrt(trace)}, + {m[0] + 1.0 * std::sqrt(trace), m[1] - 1.0 * std::sqrt(trace)}, + {m[0] + 1.0 * std::sqrt(trace), m[1] + 1.0 * std::sqrt(trace)}}; TVector2 expectedMean(m.begin(), m.end()); double elements[] = {v[0][0], v[0][1], v[1][1]}; @@ -502,12 +538,14 @@ void CMultivariateNormalConjugateTest::testMarginalLikelihood() { CPPUNIT_ASSERT(covarianceError.frobenius() < expectedCovariance.frobenius()); meanMeanError.add(meanError.euclidean() / expectedMean.euclidean()); - meanCovarianceError.add(covarianceError.frobenius() / expectedCovariance.frobenius()); + meanCovarianceError.add(covarianceError.frobenius() / + expectedCovariance.frobenius()); } } LOG_DEBUG(<< "Mean mean error = " << maths::CBasicStatistics::mean(meanMeanError)); - LOG_DEBUG(<< "Mean covariance error = " << maths::CBasicStatistics::mean(meanCovarianceError)); + LOG_DEBUG(<< "Mean covariance error = " + << maths::CBasicStatistics::mean(meanCovarianceError)); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanMeanError) < 0.12); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanCovarianceError) < 0.07); } @@ -529,7 +567,8 @@ void CMultivariateNormalConjugateTest::testMarginalLikelihoodMode() { TDouble10Vec1Vec samples; gaussianSamples(rng, 100, mean, covariance, samples); - maths::CMultivariateNormalConjugate<2> filter(maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData)); + maths::CMultivariateNormalConjugate<2> filter( + maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData)); for (std::size_t i = 0u; i < samples.size(); ++i) { filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[i]), SINGLE_UNIT_WEIGHT_2); } @@ -554,9 +593,12 @@ void CMultivariateNormalConjugateTest::testMarginalLikelihoodMode() { norm = std::sqrt(norm); double llm, ll, llp; - filter.jointLogMarginalLikelihood(COUNT_WEIGHT, modeMinusEps, SINGLE_UNIT_WEIGHT_2, llm); - filter.jointLogMarginalLikelihood(COUNT_WEIGHT, TDouble10Vec1Vec(1, mode), SINGLE_UNIT_WEIGHT_2, ll); - filter.jointLogMarginalLikelihood(COUNT_WEIGHT, modePlusEps, SINGLE_UNIT_WEIGHT_2, llp); + filter.jointLogMarginalLikelihood(COUNT_WEIGHT, modeMinusEps, + SINGLE_UNIT_WEIGHT_2, llm); + filter.jointLogMarginalLikelihood(COUNT_WEIGHT, TDouble10Vec1Vec(1, mode), + SINGLE_UNIT_WEIGHT_2, ll); + filter.jointLogMarginalLikelihood(COUNT_WEIGHT, modePlusEps, + SINGLE_UNIT_WEIGHT_2, llp); double gradient = std::fabs(std::exp(llp) - std::exp(llm)) / norm; LOG_DEBUG(<< "gradient = " << gradient); CPPUNIT_ASSERT(gradient < 1e-6); @@ -586,7 +628,8 @@ void CMultivariateNormalConjugateTest::testSampleMarginalLikelihood() { TDouble10Vec1Vec samples; gaussianSamples(rng, 50, mean_, covariance_, samples); - maths::CMultivariateNormalConjugate<2> filter(maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData)); + maths::CMultivariateNormalConjugate<2> filter( + maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData)); std::size_t i = 0u; for (/**/; i < samples.size(); ++i) { @@ -600,15 +643,17 @@ void CMultivariateNormalConjugateTest::testSampleMarginalLikelihood() { CPPUNIT_ASSERT(resamples.empty()); } else { CPPUNIT_ASSERT(resamples.size() == 1); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(filter.marginalLikelihoodMean()), - core::CContainerPrinter::print(resamples[0])); + CPPUNIT_ASSERT_EQUAL( + core::CContainerPrinter::print(filter.marginalLikelihoodMean()), + core::CContainerPrinter::print(resamples[0])); } filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[i]), SINGLE_UNIT_WEIGHT_2); } TDoubleVec p; - empiricalProbabilityOfLessLikelySamples(mean.toVector(), covariance.toVectors(), p); + empiricalProbabilityOfLessLikelySamples( + mean.toVector(), covariance.toVectors(), p); TMeanAccumulator pAbsError; TMeanAccumulator pRelError; @@ -633,21 +678,26 @@ void CMultivariateNormalConjugateTest::testSampleMarginalLikelihood() { LOG_DEBUG(<< "likelihood cov = " << likelihoodCov); LOG_DEBUG(<< "sample cov = " << sampleCov); - CPPUNIT_ASSERT((sampleMean - likelihoodMean).euclidean() / likelihoodMean.euclidean() < 1e-6); + CPPUNIT_ASSERT( + (sampleMean - likelihoodMean).euclidean() / likelihoodMean.euclidean() < 1e-6); CPPUNIT_ASSERT((sampleCov - likelihoodCov).frobenius() / likelihoodCov.frobenius() < 0.01); TDoubleVec sampleProbabilities; for (std::size_t j = 0u; j < resamples.size(); ++j) { double ll; - filter.jointLogMarginalLikelihood(COUNT_WEIGHT, TDouble10Vec1Vec(1, resamples[j]), SINGLE_UNIT_WEIGHT_2, ll); - sampleProbabilities.push_back(static_cast(std::lower_bound(p.begin(), p.end(), ll) - p.begin()) / - static_cast(p.size())); + filter.jointLogMarginalLikelihood(COUNT_WEIGHT, + TDouble10Vec1Vec(1, resamples[j]), + SINGLE_UNIT_WEIGHT_2, ll); + sampleProbabilities.push_back( + static_cast(std::lower_bound(p.begin(), p.end(), ll) - p.begin()) / + static_cast(p.size())); } std::sort(sampleProbabilities.begin(), sampleProbabilities.end()); LOG_DEBUG(<< "sample p = " << core::CContainerPrinter::print(sampleProbabilities)); for (std::size_t j = 0u; j < sampleProbabilities.size(); ++j) { - double expectedProbability = static_cast(j + 1) / static_cast(sampleProbabilities.size()); + double expectedProbability = static_cast(j + 1) / + static_cast(sampleProbabilities.size()); double error = std::fabs(sampleProbabilities[j] - expectedProbability); pAbsError.add(error); pRelError.add(error / expectedProbability); @@ -676,8 +726,11 @@ void CMultivariateNormalConjugateTest::testProbabilityOfLessLikelySamples() { {0.0, 0.0}, {100.0, 50.0}, }; - const double covariances[][3] = {{10.0, 0.0, 10.0}, {10.0, 9.0, 10.0}, {10.0, -9.0, 10.0}}; - const double offsets[][2] = {{0.0, 0.0}, {0.0, 6.0}, {4.0, 0.0}, {6.0, 6.0}, {6.0, -6.0}, {-8.0, 8.0}, {-8.0, -8.0}}; + const double covariances[][3] = { + {10.0, 0.0, 10.0}, {10.0, 9.0, 10.0}, {10.0, -9.0, 10.0}}; + const double offsets[][2] = {{0.0, 0.0}, {0.0, 6.0}, {4.0, 0.0}, + {6.0, 6.0}, {6.0, -6.0}, {-8.0, 8.0}, + {-8.0, -8.0}}; test::CRandomNumbers rng; @@ -695,9 +748,11 @@ void CMultivariateNormalConjugateTest::testProbabilityOfLessLikelySamples() { rng.generateMultivariateNormalSamples(mean, covariance, 500, samples); maths::CMultivariateNormalConjugate<2> filter( - maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData)); + maths::CMultivariateNormalConjugate<2>::nonInformativePrior( + maths_t::E_ContinuousData)); for (std::size_t k = 0u; k < samples.size(); ++k) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[k]), SINGLE_UNIT_WEIGHT_2); + filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[k]), + SINGLE_UNIT_WEIGHT_2); } TDoubleVec p; @@ -710,26 +765,34 @@ void CMultivariateNormalConjugateTest::testProbabilityOfLessLikelySamples() { TVector2 x = TVector2(mean) + TVector2(offsets[k]); double ll; - maths::gaussianLogLikelihood(TMatrix2(covariance), TVector2(offsets[k]), ll); - double px = static_cast(std::lower_bound(p.begin(), p.end(), ll) - p.begin()) / static_cast(p.size()); + maths::gaussianLogLikelihood(TMatrix2(covariance), + TVector2(offsets[k]), ll); + double px = static_cast( + std::lower_bound(p.begin(), p.end(), ll) - p.begin()) / + static_cast(p.size()); double lb, ub; maths::CMultivariatePrior::TTail10Vec tail; filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, COUNT_WEIGHT, TDouble10Vec1Vec(1, x.toVector()), SINGLE_UNIT_WEIGHT_2, lb, ub, tail); + maths_t::E_TwoSided, COUNT_WEIGHT, + TDouble10Vec1Vec(1, x.toVector()), + SINGLE_UNIT_WEIGHT_2, lb, ub, tail); double pa = (lb + ub) / 2.0; LOG_DEBUG(<< " p(" << x << "), actual = " << pa << ", expected = " << px); meanAbsError.add(std::fabs(px - pa)); if (px < 1.0 && px > 0.0) { - meanRelError.add(std::fabs(std::log(px) - std::log(pa)) / std::fabs(std::log(px))); + meanRelError.add(std::fabs(std::log(px) - std::log(pa)) / + std::fabs(std::log(px))); } } - LOG_DEBUG(<< "mean absolute error = " << maths::CBasicStatistics::mean(meanAbsError)); + LOG_DEBUG(<< "mean absolute error = " + << maths::CBasicStatistics::mean(meanAbsError)); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanAbsError) < 0.018); - LOG_DEBUG(<< "mean relative error = " << maths::CBasicStatistics::mean(meanRelError)); + LOG_DEBUG(<< "mean relative error = " + << maths::CBasicStatistics::mean(meanRelError)); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanRelError) < 0.15); } } @@ -761,7 +824,9 @@ void CMultivariateNormalConjugateTest::testIntegerData() { TMatrix2 covariance(covariances[j], covariances[j] + 3); TDoubleVecVec samples; - rng.generateMultivariateNormalSamples(mean.toVector(), covariance.toVectors(), n, samples); + rng.generateMultivariateNormalSamples( + mean.toVector(), + covariance.toVectors(), n, samples); TDoubleVecVec uniform; TDoubleVec uniform_; @@ -773,7 +838,8 @@ void CMultivariateNormalConjugateTest::testIntegerData() { maths::CMultivariateNormalConjugate<2> filter1( maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_IntegerData)); maths::CMultivariateNormalConjugate<2> filter2( - maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData)); + maths::CMultivariateNormalConjugate<2>::nonInformativePrior( + maths_t::E_ContinuousData)); for (std::size_t k = 0u; k < n; ++k) { TVector2 x(samples[k]); @@ -784,7 +850,8 @@ void CMultivariateNormalConjugateTest::testIntegerData() { } CPPUNIT_ASSERT(filter1.equalTolerance( - filter2, maths::CToleranceTypes::E_RelativeTolerance | maths::CToleranceTypes::E_AbsoluteTolerance, 0.005)); + filter2, maths::CToleranceTypes::E_RelativeTolerance | maths::CToleranceTypes::E_AbsoluteTolerance, + 0.005)); TMeanAccumulator meanLogLikelihood1; TMeanAccumulator meanLogLikelihood2; @@ -793,20 +860,24 @@ void CMultivariateNormalConjugateTest::testIntegerData() { TDouble10Vec1Vec sample(1, x.toVector()); double ll1; - filter1.jointLogMarginalLikelihood(COUNT_WEIGHT, sample, SINGLE_UNIT_WEIGHT_2, ll1); + filter1.jointLogMarginalLikelihood(COUNT_WEIGHT, sample, + SINGLE_UNIT_WEIGHT_2, ll1); meanLogLikelihood1.add(-ll1); sample[0] = (x + TVector2(uniform[k])).toVector(); double ll2; - filter2.jointLogMarginalLikelihood(COUNT_WEIGHT, sample, SINGLE_UNIT_WEIGHT_2, ll2); + filter2.jointLogMarginalLikelihood(COUNT_WEIGHT, sample, + SINGLE_UNIT_WEIGHT_2, ll2); meanLogLikelihood2.add(-ll2); } LOG_DEBUG(<< "meanLogLikelihood1 = " << maths::CBasicStatistics::mean(meanLogLikelihood1) - << ", meanLogLikelihood2 = " << maths::CBasicStatistics::mean(meanLogLikelihood2)); + << ", meanLogLikelihood2 = " + << maths::CBasicStatistics::mean(meanLogLikelihood2)); CPPUNIT_ASSERT_DOUBLES_EQUAL( - maths::CBasicStatistics::mean(meanLogLikelihood1), maths::CBasicStatistics::mean(meanLogLikelihood2), 0.03); + maths::CBasicStatistics::mean(meanLogLikelihood1), + maths::CBasicStatistics::mean(meanLogLikelihood2), 0.03); } } } @@ -817,27 +888,31 @@ void CMultivariateNormalConjugateTest::testLowVariationData() { LOG_DEBUG(<< "+----------------------------------------------------------+"); { - maths::CMultivariateNormalConjugate<2> filter(maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_IntegerData)); + maths::CMultivariateNormalConjugate<2> filter( + maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_IntegerData)); for (std::size_t i = 0u; i < 100; ++i) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, TDouble10Vec(2, 430.0)), SINGLE_UNIT_WEIGHT_2); + filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, TDouble10Vec(2, 430.0)), + SINGLE_UNIT_WEIGHT_2); } TDouble10Vec10Vec covariances = filter.marginalLikelihoodCovariance(); LOG_DEBUG(<< "covariance matrix " << core::CContainerPrinter::print(covariances)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(12.0, 2.0 / (covariances[0][0] + covariances[1][1]), 0.3); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + 12.0, 2.0 / (covariances[0][0] + covariances[1][1]), 0.3); } { maths::CMultivariateNormalConjugate<2> filter( maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData)); for (std::size_t i = 0u; i < 100; ++i) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, TDouble10Vec(2, 430.0)), SINGLE_UNIT_WEIGHT_2); + filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, TDouble10Vec(2, 430.0)), + SINGLE_UNIT_WEIGHT_2); } TDouble10Vec10Vec covariances = filter.marginalLikelihoodCovariance(); LOG_DEBUG(<< "covariance matrix " << core::CContainerPrinter::print(covariances)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0 / maths::MINIMUM_COEFFICIENT_OF_VARIATION / std::sqrt(2.0) / 430.5, - std::sqrt(2.0 / (covariances[0][0] + covariances[1][1])), - 0.4); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + 1.0 / maths::MINIMUM_COEFFICIENT_OF_VARIATION / std::sqrt(2.0) / 430.5, + std::sqrt(2.0 / (covariances[0][0] + covariances[1][1])), 0.4); } } @@ -858,7 +933,8 @@ void CMultivariateNormalConjugateTest::testPersist() { maths_t::EDataType dataType = maths_t::E_ContinuousData; - maths::CMultivariateNormalConjugate<2> origFilter(maths::CMultivariateNormalConjugate<2>::nonInformativePrior(dataType)); + maths::CMultivariateNormalConjugate<2> origFilter( + maths::CMultivariateNormalConjugate<2>::nonInformativePrior(dataType)); for (std::size_t i = 0u; i < samples.size(); ++i) { origFilter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[i]), SINGLE_UNIT_WEIGHT_2); } @@ -879,14 +955,13 @@ void CMultivariateNormalConjugateTest::testPersist() { CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - maths::SDistributionRestoreParams params(dataType, - decayRate + 0.1, - maths::MINIMUM_CLUSTER_SPLIT_FRACTION, - maths::MINIMUM_CLUSTER_SPLIT_COUNT, - maths::MINIMUM_CATEGORY_COUNT); + maths::SDistributionRestoreParams params( + dataType, decayRate + 0.1, maths::MINIMUM_CLUSTER_SPLIT_FRACTION, + maths::MINIMUM_CLUSTER_SPLIT_COUNT, maths::MINIMUM_CATEGORY_COUNT); maths::CMultivariateNormalConjugate<2> restoredFilter(params, traverser); - LOG_DEBUG(<< "orig checksum = " << checksum << " restored checksum = " << restoredFilter.checksum()); + LOG_DEBUG(<< "orig checksum = " << checksum + << " restored checksum = " << restoredFilter.checksum()); CPPUNIT_ASSERT_EQUAL(checksum, restoredFilter.checksum()); // The XML representation of the new filter should be the same as the original @@ -907,16 +982,22 @@ void CMultivariateNormalConjugateTest::calibrationExperiment() { using TVector10 = maths::CVectorNx1; using TMatrix10 = maths::CSymmetricMatrixNxN; - double means[] = {10.0, 10.0, 20.0, 20.0, 30.0, 20.0, 10.0, 40.0, 30.0, 20.0}; - double covariances[] = {10.0, 9.0, 10.0, -5.0, 1.0, 6.0, -8.0, 9.0, 4.0, 20.0, 8.0, 3.0, 1.0, 12.0, 12.0, -4.0, 2.0, 1.0, 1.0, - 4.0, 4.0, 5.0, 1.0, 3.0, 8.0, 10.0, 3.0, 10.0, 9.0, 9.0, 5.0, 19.0, 11.0, 3.0, 9.0, 25.0, 5.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 20.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}; + double means[] = {10.0, 10.0, 20.0, 20.0, 30.0, + 20.0, 10.0, 40.0, 30.0, 20.0}; + double covariances[] = { + 10.0, 9.0, 10.0, -5.0, 1.0, 6.0, -8.0, 9.0, 4.0, 20.0, 8.0, + 3.0, 1.0, 12.0, 12.0, -4.0, 2.0, 1.0, 1.0, 4.0, 4.0, 5.0, + 1.0, 3.0, 8.0, 10.0, 3.0, 10.0, 9.0, 9.0, 5.0, 19.0, 11.0, + 3.0, 9.0, 25.0, 5.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 20.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}; TVector10 mean(means, means + boost::size(means)); TMatrix10 covariance(covariances, covariances + boost::size(covariances)); test::CRandomNumbers rng; TDoubleVecVec samples_; - rng.generateMultivariateNormalSamples(mean.toVector(), covariance.toVectors(), 2000, samples_); + rng.generateMultivariateNormalSamples(mean.toVector(), + covariance.toVectors(), + 2000, samples_); TDouble10Vec1Vec samples; samples.reserve(samples.size() + samples_.size()); @@ -934,7 +1015,8 @@ void CMultivariateNormalConjugateTest::calibrationExperiment() { maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData), maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData), maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData)}; - std::size_t indices[][2] = {{0, 1}, {0, 2}, {0, 3}, {0, 4}, {0, 5}, {0, 6}, {0, 7}, {0, 8}, {0, 9}}; + std::size_t indices[][2] = {{0, 1}, {0, 2}, {0, 3}, {0, 4}, {0, 5}, + {0, 6}, {0, 7}, {0, 8}, {0, 9}}; for (std::size_t i = 0u; i < 200; ++i) { for (std::size_t j = 0u; j < boost::size(filters); ++j) { @@ -957,7 +1039,9 @@ void CMultivariateNormalConjugateTest::calibrationExperiment() { sample[0][1] = samples[i][indices[j][1]]; double lb, ub; maths::CMultivariatePrior::TTail10Vec tail; - filters[j].probabilityOfLessLikelySamples(maths_t::E_TwoSided, COUNT_WEIGHT, sample, SINGLE_UNIT_WEIGHT_2, lb, ub, tail); + filters[j].probabilityOfLessLikelySamples(maths_t::E_TwoSided, COUNT_WEIGHT, + sample, SINGLE_UNIT_WEIGHT_2, + lb, ub, tail); p[j].push_back((lb + ub) / 2.0); mpi = std::min(mpi, (lb + ub) / 2.0); epi.add((lb + ub) / 2.0, 0.5); @@ -978,13 +1062,19 @@ void CMultivariateNormalConjugateTest::calibrationExperiment() { for (std::size_t i = 0u; i < boost::size(test); ++i) { for (std::size_t j = 0u; j < p.size(); ++j) { LOG_DEBUG(<< j << ") " << test[i] << " " - << static_cast(std::lower_bound(p[j].begin(), p[j].end(), test[i]) - p[j].begin()) / + << static_cast( + std::lower_bound(p[j].begin(), p[j].end(), test[i]) - + p[j].begin()) / static_cast(p[j].size())); } LOG_DEBUG(<< "min " << test[i] << " " - << static_cast(std::lower_bound(mp.begin(), mp.end(), test[i]) - mp.begin()) / static_cast(mp.size())); + << static_cast( + std::lower_bound(mp.begin(), mp.end(), test[i]) - mp.begin()) / + static_cast(mp.size())); LOG_DEBUG(<< "corrected min " << test[i] << " " - << static_cast(std::lower_bound(ep.begin(), ep.end(), test[i]) - ep.begin()) / static_cast(ep.size())); + << static_cast( + std::lower_bound(ep.begin(), ep.end(), test[i]) - ep.begin()) / + static_cast(ep.size())); } } @@ -994,18 +1084,14 @@ void CMultivariateNormalConjugateTest::dataGenerator() { LOG_DEBUG(<< "+---------------------------------------------------+"); const double means[][2] = {{10.0, 20.0}, {30.0, 25.0}, {50.0, 5.0}, {100.0, 50.0}}; - const double covariances[][3] = {{3.0, 2.0, 2.0}, {6.0, -4.0, 5.0}, {4.0, 1.0, 3.0}, {20.0, -12.0, 12.0}}; - - double anomalies[][4] = {{7000.0, 0.0, 2.8, -2.8}, - {7001.0, 0.0, 2.8, -2.8}, - {7002.0, 0.0, 2.8, -2.8}, - {7003.0, 0.0, 2.8, -2.8}, - {8000.0, 3.0, 3.5, 4.9}, - {8001.0, 3.0, 3.5, 4.9}, - {8002.0, 3.0, 3.5, 4.9}, - {8003.0, 3.0, 3.5, 4.9}, - {8004.0, 3.0, 3.5, 4.9}, - {8005.0, 3.0, 3.5, 4.9}}; + const double covariances[][3] = { + {3.0, 2.0, 2.0}, {6.0, -4.0, 5.0}, {4.0, 1.0, 3.0}, {20.0, -12.0, 12.0}}; + + double anomalies[][4] = {{7000.0, 0.0, 2.8, -2.8}, {7001.0, 0.0, 2.8, -2.8}, + {7002.0, 0.0, 2.8, -2.8}, {7003.0, 0.0, 2.8, -2.8}, + {8000.0, 3.0, 3.5, 4.9}, {8001.0, 3.0, 3.5, 4.9}, + {8002.0, 3.0, 3.5, 4.9}, {8003.0, 3.0, 3.5, 4.9}, + {8004.0, 3.0, 3.5, 4.9}, {8005.0, 3.0, 3.5, 4.9}}; test::CRandomNumbers rng; @@ -1035,30 +1121,39 @@ void CMultivariateNormalConjugateTest::dataGenerator() { CppUnit::Test* CMultivariateNormalConjugateTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CMultivariateNormalConjugateTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CMultivariateNormalConjugateTest::testMultipleUpdate", - &CMultivariateNormalConjugateTest::testMultipleUpdate)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMultivariateNormalConjugateTest::testPropagation", - &CMultivariateNormalConjugateTest::testPropagation)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CMultivariateNormalConjugateTest::testMeanVectorEstimation", &CMultivariateNormalConjugateTest::testMeanVectorEstimation)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CMultivariateNormalConjugateTest::testPrecisionMatrixEstimation", - &CMultivariateNormalConjugateTest::testPrecisionMatrixEstimation)); + "CMultivariateNormalConjugateTest::testMultipleUpdate", + &CMultivariateNormalConjugateTest::testMultipleUpdate)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultivariateNormalConjugateTest::testPropagation", + &CMultivariateNormalConjugateTest::testPropagation)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultivariateNormalConjugateTest::testMeanVectorEstimation", + &CMultivariateNormalConjugateTest::testMeanVectorEstimation)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultivariateNormalConjugateTest::testPrecisionMatrixEstimation", + &CMultivariateNormalConjugateTest::testPrecisionMatrixEstimation)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultivariateNormalConjugateTest::testMarginalLikelihood", + &CMultivariateNormalConjugateTest::testMarginalLikelihood)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultivariateNormalConjugateTest::testMarginalLikelihoodMode", + &CMultivariateNormalConjugateTest::testMarginalLikelihoodMode)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultivariateNormalConjugateTest::testSampleMarginalLikelihood", + &CMultivariateNormalConjugateTest::testSampleMarginalLikelihood)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CMultivariateNormalConjugateTest::testMarginalLikelihood", &CMultivariateNormalConjugateTest::testMarginalLikelihood)); + "CMultivariateNormalConjugateTest::testProbabilityOfLessLikelySamples", + &CMultivariateNormalConjugateTest::testProbabilityOfLessLikelySamples)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CMultivariateNormalConjugateTest::testMarginalLikelihoodMode", &CMultivariateNormalConjugateTest::testMarginalLikelihoodMode)); + "CMultivariateNormalConjugateTest::testIntegerData", + &CMultivariateNormalConjugateTest::testIntegerData)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CMultivariateNormalConjugateTest::testSampleMarginalLikelihood", &CMultivariateNormalConjugateTest::testSampleMarginalLikelihood)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CMultivariateNormalConjugateTest::testProbabilityOfLessLikelySamples", - &CMultivariateNormalConjugateTest::testProbabilityOfLessLikelySamples)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMultivariateNormalConjugateTest::testIntegerData", - &CMultivariateNormalConjugateTest::testIntegerData)); + "CMultivariateNormalConjugateTest::testLowVariationData", + &CMultivariateNormalConjugateTest::testLowVariationData)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CMultivariateNormalConjugateTest::testLowVariationData", &CMultivariateNormalConjugateTest::testLowVariationData)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMultivariateNormalConjugateTest::testPersist", - &CMultivariateNormalConjugateTest::testPersist)); + "CMultivariateNormalConjugateTest::testPersist", + &CMultivariateNormalConjugateTest::testPersist)); //suiteOfTests->addTest( new CppUnit::TestCaller( // "CMultivariateNormalConjugateTest::calibrationExperiment", // &CMultivariateNormalConjugateTest::calibrationExperiment) ); diff --git a/lib/maths/unittest/CMultivariateOneOfNPriorTest.cc b/lib/maths/unittest/CMultivariateOneOfNPriorTest.cc index 230ac69bfa..acbea2ca37 100644 --- a/lib/maths/unittest/CMultivariateOneOfNPriorTest.cc +++ b/lib/maths/unittest/CMultivariateOneOfNPriorTest.cc @@ -45,12 +45,14 @@ const TDouble10Vec4Vec1Vec SINGLE_UNIT_WEIGHT_2(1, UNIT_WEIGHT_2); class CMinusLogLikelihood : public maths::CGradientDescent::CFunction { public: - CMinusLogLikelihood(const maths::CMultivariateOneOfNPrior& prior) : m_Prior(&prior) {} + CMinusLogLikelihood(const maths::CMultivariateOneOfNPrior& prior) + : m_Prior(&prior) {} bool operator()(const maths::CGradientDescent::TVector& x, double& result) const { if (m_Prior->jointLogMarginalLikelihood( - COUNT_WEIGHT, TDouble10Vec1Vec(1, TDouble10Vec(x.toVector())), SINGLE_UNIT_WEIGHT_2, result) == - maths_t::E_FpNoErrors) { + COUNT_WEIGHT, + TDouble10Vec1Vec(1, TDouble10Vec(x.toVector())), + SINGLE_UNIT_WEIGHT_2, result) == maths_t::E_FpNoErrors) { result = -result; return true; } @@ -62,16 +64,23 @@ class CMinusLogLikelihood : public maths::CGradientDescent::CFunction { }; template -maths::CMultivariateMultimodalPrior makeMultimodal(maths_t::EDataType dataType, double decayRate = 0.0) { - maths::CXMeansOnline clusterer(dataType, maths_t::E_ClustersFractionWeight, decayRate); +maths::CMultivariateMultimodalPrior +makeMultimodal(maths_t::EDataType dataType, double decayRate = 0.0) { + maths::CXMeansOnline clusterer( + dataType, maths_t::E_ClustersFractionWeight, decayRate); return maths::CMultivariateMultimodalPrior( - dataType, clusterer, maths::CMultivariateNormalConjugate::nonInformativePrior(dataType, decayRate), decayRate); + dataType, clusterer, + maths::CMultivariateNormalConjugate::nonInformativePrior(dataType, decayRate), + decayRate); } template -maths::CMultivariateOneOfNPrior makeOneOfN(maths_t::EDataType dataType, double decayRate = 0.0) { +maths::CMultivariateOneOfNPrior +makeOneOfN(maths_t::EDataType dataType, double decayRate = 0.0) { TPriorPtrVec priors; - priors.push_back(TPriorPtr(maths::CMultivariateNormalConjugate::nonInformativePrior(dataType, decayRate).clone())); + priors.push_back(TPriorPtr( + maths::CMultivariateNormalConjugate::nonInformativePrior(dataType, decayRate) + .clone())); priors.push_back(TPriorPtr(makeMultimodal(dataType, decayRate).clone())); return maths::CMultivariateOneOfNPrior(N, priors, dataType, decayRate); } @@ -86,7 +95,9 @@ void gaussianSamples(test::CRandomNumbers& rng, TVector2 mean(means[i], means[i] + 2); TMatrix2 covariance(covariances[i], covariances[i] + 3); TDoubleVecVec samples_; - rng.generateMultivariateNormalSamples(mean.toVector(), covariance.toVectors(), n[i], samples_); + rng.generateMultivariateNormalSamples(mean.toVector(), + covariance.toVectors(), + n[i], samples_); samples.reserve(samples.size() + samples_.size()); for (std::size_t j = 0u; j < samples_.size(); ++j) { samples.push_back(TDouble10Vec(samples_[j].begin(), samples_[j].end())); @@ -138,7 +149,9 @@ void CMultivariateOneOfNPriorTest::testMultipleUpdate() { TDouble10Vec1Vec samples; { TDoubleVecVec samples_; - rng.generateMultivariateNormalSamples(mean.toVector(), covariance.toVectors(), 100, samples_); + rng.generateMultivariateNormalSamples(mean.toVector(), + covariance.toVectors(), + 100, samples_); seedSamples.reserve(10); for (std::size_t i = 0u; i < 10; ++i) { seedSamples.push_back(TDouble10Vec(samples_[i].begin(), samples_[i].end())); @@ -167,7 +180,8 @@ void CMultivariateOneOfNPriorTest::testMultipleUpdate() { TDouble10Vec4Vec1Vec weight(1, TDouble10Vec4Vec(1, TDouble10Vec(2, 1.0))); filter1.addSamples(COUNT_WEIGHT, sample, weight); } - TDouble10Vec4Vec1Vec weights(samples.size(), TDouble10Vec4Vec(1, TDouble10Vec(2, 1.0))); + TDouble10Vec4Vec1Vec weights(samples.size(), + TDouble10Vec4Vec(1, TDouble10Vec(2, 1.0))); filter2.addSamples(COUNT_WEIGHT, samples, weights); LOG_DEBUG(<< "checksum 1 " << filter1.checksum()); @@ -232,7 +246,8 @@ void CMultivariateOneOfNPriorTest::testPropagation() { maths::CMultivariateOneOfNPrior filter(makeOneOfN<2>(maths_t::E_ContinuousData, decayRate)); for (std::size_t i = 0u; i < samples.size(); ++i) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[i]), TDouble10Vec4Vec1Vec(1, UNIT_WEIGHT_2)); + filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[i]), + TDouble10Vec4Vec1Vec(1, UNIT_WEIGHT_2)); } double numberSamples = filter.numberSamples(); @@ -245,19 +260,22 @@ void CMultivariateOneOfNPriorTest::testPropagation() { double propagatedNumberSamples = filter.numberSamples(); TDouble10Vec propagatedMean = filter.marginalLikelihoodMean(); TDouble10Vec10Vec propagatedCovariance = filter.marginalLikelihoodCovariance(); - double propagatedLogWeightRatio = std::fabs(filter.logWeights()[0] - filter.logWeights()[1]); + double propagatedLogWeightRatio = + std::fabs(filter.logWeights()[0] - filter.logWeights()[1]); LOG_DEBUG(<< "numberSamples = " << numberSamples); LOG_DEBUG(<< "propagatedNumberSamples = " << propagatedNumberSamples); LOG_DEBUG(<< "mean = " << core::CContainerPrinter::print(mean)); LOG_DEBUG(<< "propagatedMean = " << core::CContainerPrinter::print(propagatedMean)); LOG_DEBUG(<< "covariance = " << core::CContainerPrinter::print(covariance)); - LOG_DEBUG(<< "propagatedCovariance = " << core::CContainerPrinter::print(propagatedCovariance)); + LOG_DEBUG(<< "propagatedCovariance = " + << core::CContainerPrinter::print(propagatedCovariance)); LOG_DEBUG(<< "logWeightRatio = " << logWeightRatio); LOG_DEBUG(<< "propagatedLogWeightRatio = " << propagatedLogWeightRatio); CPPUNIT_ASSERT(propagatedNumberSamples < numberSamples); - CPPUNIT_ASSERT((TVector2(propagatedMean) - TVector2(mean)).euclidean() < eps * TVector2(mean).euclidean()); + CPPUNIT_ASSERT((TVector2(propagatedMean) - TVector2(mean)).euclidean() < + eps * TVector2(mean).euclidean()); Eigen::MatrixXd c(2, 2); Eigen::MatrixXd cp(2, 2); for (std::size_t i = 0u; i < 2; ++i) { @@ -300,9 +318,11 @@ void CMultivariateOneOfNPriorTest::testWeightUpdate() { const double decayRates[] = {0.0, 0.004, 0.04}; for (std::size_t i = 0; i < boost::size(decayRates); ++i) { - maths::CMultivariateOneOfNPrior filter(makeOneOfN<2>(maths_t::E_ContinuousData, decayRates[i])); + maths::CMultivariateOneOfNPrior filter( + makeOneOfN<2>(maths_t::E_ContinuousData, decayRates[i])); for (std::size_t j = 0u; j < samples.size(); ++j) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[j]), SINGLE_UNIT_WEIGHT_2); + filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[j]), + SINGLE_UNIT_WEIGHT_2); CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, sum(filter.weights()), 1e-6); filter.propagateForwardsByTime(1.0); CPPUNIT_ASSERT(equal(sum(filter.weights()), 1.0)); @@ -326,10 +346,12 @@ void CMultivariateOneOfNPriorTest::testWeightUpdate() { double previousLogWeightRatio = -6700; for (std::size_t i = 0u; i < boost::size(decayRates); ++i) { - maths::CMultivariateOneOfNPrior filter(makeOneOfN<2>(maths_t::E_ContinuousData, decayRates[i])); + maths::CMultivariateOneOfNPrior filter( + makeOneOfN<2>(maths_t::E_ContinuousData, decayRates[i])); for (std::size_t j = 0u; j < samples.size(); ++j) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[j]), SINGLE_UNIT_WEIGHT_2); + filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[j]), + SINGLE_UNIT_WEIGHT_2); filter.propagateForwardsByTime(1.0); } @@ -365,13 +387,18 @@ void CMultivariateOneOfNPriorTest::testModelUpdate() { const maths_t::EDataType dataTypes[] = {maths_t::E_IntegerData, maths_t::E_ContinuousData}; for (std::size_t i = 0u; i < boost::size(dataTypes); ++i) { - maths::CMultivariateNormalConjugate<2> normal = maths::CMultivariateNormalConjugate<2>::nonInformativePrior(dataTypes[i]); - maths::CMultivariateMultimodalPrior<2> multimodal = makeMultimodal<2>(dataTypes[i]); + maths::CMultivariateNormalConjugate<2> normal = + maths::CMultivariateNormalConjugate<2>::nonInformativePrior(dataTypes[i]); + maths::CMultivariateMultimodalPrior<2> multimodal = + makeMultimodal<2>(dataTypes[i]); maths::CMultivariateOneOfNPrior oneOfN(makeOneOfN<2>(dataTypes[i])); - normal.addSamples(COUNT_WEIGHT, samples, TDouble10Vec4Vec1Vec(samples.size(), UNIT_WEIGHT_2)); - multimodal.addSamples(COUNT_WEIGHT, samples, TDouble10Vec4Vec1Vec(samples.size(), UNIT_WEIGHT_2)); - oneOfN.addSamples(COUNT_WEIGHT, samples, TDouble10Vec4Vec1Vec(samples.size(), UNIT_WEIGHT_2)); + normal.addSamples(COUNT_WEIGHT, samples, + TDouble10Vec4Vec1Vec(samples.size(), UNIT_WEIGHT_2)); + multimodal.addSamples(COUNT_WEIGHT, samples, + TDouble10Vec4Vec1Vec(samples.size(), UNIT_WEIGHT_2)); + oneOfN.addSamples(COUNT_WEIGHT, samples, + TDouble10Vec4Vec1Vec(samples.size(), UNIT_WEIGHT_2)); CPPUNIT_ASSERT_EQUAL(normal.checksum(), oneOfN.models()[0]->checksum()); CPPUNIT_ASSERT_EQUAL(multimodal.checksum(), oneOfN.models()[1]->checksum()); @@ -419,7 +446,8 @@ void CMultivariateOneOfNPriorTest::testMarginalLikelihood() { // Generate the samples. double mean_[] = {meani[2 * t], meani[2 * t + 1]}; - double covariances_[] = {covariancesii[2 * t], covariancesij[t], covariancesii[2 * t + 1]}; + double covariances_[] = {covariancesii[2 * t], covariancesij[t], + covariancesii[2 * t + 1]}; TDoubleVec mean(mean_, mean_ + 2); TDoubleVecVec covariances; covariances.push_back(TDoubleVec(covariances_, covariances_ + 2)); @@ -433,7 +461,8 @@ void CMultivariateOneOfNPriorTest::testMarginalLikelihood() { TMeanAccumulator meanCovarianceError; for (std::size_t i = 0u; i < samples.size(); ++i) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[i]), SINGLE_UNIT_WEIGHT_2); + filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[i]), + SINGLE_UNIT_WEIGHT_2); if (!filter.isNonInformative()) { TDouble10Vec m = filter.marginalLikelihoodMean(); @@ -444,15 +473,16 @@ void CMultivariateOneOfNPriorTest::testMarginalLikelihood() { for (std::size_t j = 0u; j < v.size(); ++j) { trace += v[j][j]; } - double intervals[][2] = {{m[0] - 3.0 * std::sqrt(trace), m[1] - 3.0 * std::sqrt(trace)}, - {m[0] - 3.0 * std::sqrt(trace), m[1] - 1.0 * std::sqrt(trace)}, - {m[0] - 3.0 * std::sqrt(trace), m[1] + 1.0 * std::sqrt(trace)}, - {m[0] - 1.0 * std::sqrt(trace), m[1] - 3.0 * std::sqrt(trace)}, - {m[0] - 1.0 * std::sqrt(trace), m[1] - 1.0 * std::sqrt(trace)}, - {m[0] - 1.0 * std::sqrt(trace), m[1] + 1.0 * std::sqrt(trace)}, - {m[0] + 1.0 * std::sqrt(trace), m[1] - 3.0 * std::sqrt(trace)}, - {m[0] + 1.0 * std::sqrt(trace), m[1] - 1.0 * std::sqrt(trace)}, - {m[0] + 1.0 * std::sqrt(trace), m[1] + 1.0 * std::sqrt(trace)}}; + double intervals[][2] = { + {m[0] - 3.0 * std::sqrt(trace), m[1] - 3.0 * std::sqrt(trace)}, + {m[0] - 3.0 * std::sqrt(trace), m[1] - 1.0 * std::sqrt(trace)}, + {m[0] - 3.0 * std::sqrt(trace), m[1] + 1.0 * std::sqrt(trace)}, + {m[0] - 1.0 * std::sqrt(trace), m[1] - 3.0 * std::sqrt(trace)}, + {m[0] - 1.0 * std::sqrt(trace), m[1] - 1.0 * std::sqrt(trace)}, + {m[0] - 1.0 * std::sqrt(trace), m[1] + 1.0 * std::sqrt(trace)}, + {m[0] + 1.0 * std::sqrt(trace), m[1] - 3.0 * std::sqrt(trace)}, + {m[0] + 1.0 * std::sqrt(trace), m[1] - 1.0 * std::sqrt(trace)}, + {m[0] + 1.0 * std::sqrt(trace), m[1] + 1.0 * std::sqrt(trace)}}; TVector2 expectedMean(m.begin(), m.end()); double elements[] = {v[0][0], v[0][1], v[1][1]}; @@ -493,15 +523,18 @@ void CMultivariateOneOfNPriorTest::testMarginalLikelihood() { TVector2 meanError = actualMean - expectedMean; TMatrix2 covarianceError = actualCovariance - expectedCovariance; CPPUNIT_ASSERT(meanError.euclidean() < expectedMean.euclidean()); - CPPUNIT_ASSERT(covarianceError.frobenius() < expectedCovariance.frobenius()); + CPPUNIT_ASSERT(covarianceError.frobenius() < + expectedCovariance.frobenius()); meanMeanError.add(meanError.euclidean() / expectedMean.euclidean()); - meanCovarianceError.add(covarianceError.frobenius() / expectedCovariance.frobenius()); + meanCovarianceError.add(covarianceError.frobenius() / + expectedCovariance.frobenius()); } } LOG_DEBUG(<< "Mean mean error = " << maths::CBasicStatistics::mean(meanMeanError)); - LOG_DEBUG(<< "Mean covariance error = " << maths::CBasicStatistics::mean(meanCovarianceError)); + LOG_DEBUG(<< "Mean covariance error = " + << maths::CBasicStatistics::mean(meanCovarianceError)); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanMeanError) < 0.16); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanCovarianceError) < 0.09); } @@ -529,14 +562,16 @@ void CMultivariateOneOfNPriorTest::testMarginalLikelihood() { } for (std::size_t i = 0u; i < samples_.size(); ++i) { for (std::size_t j = 0u; j < samples_[i].size(); ++j) { - samples.push_back((TVector2(20.0) + samples_[i][j]).toVector()); + samples.push_back( + (TVector2(20.0) + samples_[i][j]).toVector()); } } rng.random_shuffle(samples.begin(), samples.end()); maths::CMultivariateOneOfNPrior filter(makeOneOfN<2>(maths_t::E_ContinuousData)); for (std::size_t i = 0u; i < samples.size(); ++i) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[i]), SINGLE_UNIT_WEIGHT_2); + filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[i]), + SINGLE_UNIT_WEIGHT_2); } TDouble10Vec m = filter.marginalLikelihoodMean(); @@ -554,15 +589,24 @@ void CMultivariateOneOfNPriorTest::testMarginalLikelihood() { LOG_DEBUG(<< "m = " << means[i]); LOG_DEBUG(<< "v = " << trace); - double intervals[][2] = {{means[i](0) - 3.0 * std::sqrt(trace), means[i](1) - 3.0 * std::sqrt(trace)}, - {means[i](0) - 3.0 * std::sqrt(trace), means[i](1) - 1.0 * std::sqrt(trace)}, - {means[i](0) - 3.0 * std::sqrt(trace), means[i](1) + 1.0 * std::sqrt(trace)}, - {means[i](0) - 1.0 * std::sqrt(trace), means[i](1) - 3.0 * std::sqrt(trace)}, - {means[i](0) - 1.0 * std::sqrt(trace), means[i](1) - 1.0 * std::sqrt(trace)}, - {means[i](0) - 1.0 * std::sqrt(trace), means[i](1) + 1.0 * std::sqrt(trace)}, - {means[i](0) + 1.0 * std::sqrt(trace), means[i](1) - 3.0 * std::sqrt(trace)}, - {means[i](0) + 1.0 * std::sqrt(trace), means[i](1) - 1.0 * std::sqrt(trace)}, - {means[i](0) + 1.0 * std::sqrt(trace), means[i](1) + 1.0 * std::sqrt(trace)}}; + double intervals[][2] = {{means[i](0) - 3.0 * std::sqrt(trace), + means[i](1) - 3.0 * std::sqrt(trace)}, + {means[i](0) - 3.0 * std::sqrt(trace), + means[i](1) - 1.0 * std::sqrt(trace)}, + {means[i](0) - 3.0 * std::sqrt(trace), + means[i](1) + 1.0 * std::sqrt(trace)}, + {means[i](0) - 1.0 * std::sqrt(trace), + means[i](1) - 3.0 * std::sqrt(trace)}, + {means[i](0) - 1.0 * std::sqrt(trace), + means[i](1) - 1.0 * std::sqrt(trace)}, + {means[i](0) - 1.0 * std::sqrt(trace), + means[i](1) + 1.0 * std::sqrt(trace)}, + {means[i](0) + 1.0 * std::sqrt(trace), + means[i](1) - 3.0 * std::sqrt(trace)}, + {means[i](0) + 1.0 * std::sqrt(trace), + means[i](1) - 1.0 * std::sqrt(trace)}, + {means[i](0) + 1.0 * std::sqrt(trace), + means[i](1) + 1.0 * std::sqrt(trace)}}; CUnitKernel<2> likelihoodKernel(filter); CMeanKernel<2> meanKernel(filter); CCovarianceKernel<2> covarianceKernel(filter, expectedMean); @@ -602,16 +646,19 @@ void CMultivariateOneOfNPriorTest::testMarginalLikelihood() { TMatrix2 covarianceError = actualCovariance - expectedCovariance; CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, z, 0.7); CPPUNIT_ASSERT(meanError.euclidean() < 0.3 * expectedMean.euclidean()); - CPPUNIT_ASSERT(covarianceError.frobenius() < 0.25 * expectedCovariance.frobenius()); + CPPUNIT_ASSERT(covarianceError.frobenius() < + 0.25 * expectedCovariance.frobenius()); meanZ.add(z); meanMeanError.add(meanError.euclidean() / expectedMean.euclidean()); - meanCovarianceError.add(covarianceError.frobenius() / expectedCovariance.frobenius()); + meanCovarianceError.add(covarianceError.frobenius() / + expectedCovariance.frobenius()); } LOG_DEBUG(<< "Mean Z = " << maths::CBasicStatistics::mean(meanZ)); LOG_DEBUG(<< "Mean mean error = " << maths::CBasicStatistics::mean(meanMeanError)); - LOG_DEBUG(<< "Mean covariance error = " << maths::CBasicStatistics::mean(meanCovarianceError)); + LOG_DEBUG(<< "Mean covariance error = " + << maths::CBasicStatistics::mean(meanCovarianceError)); CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, maths::CBasicStatistics::mean(meanZ), 0.3); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanMeanError) < 0.1); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanCovarianceError) < 0.16); @@ -670,9 +717,12 @@ void CMultivariateOneOfNPriorTest::testMarginalLikelihoodMean() { if (!filter.isNonInformative()) { if (j % 10 == 0) { LOG_DEBUG(<< "expected = " << maths::CBasicStatistics::mean(expectedMean) - << " actual = " << core::CContainerPrinter::print(filter.marginalLikelihoodMean())); + << " actual = " + << core::CContainerPrinter::print(filter.marginalLikelihoodMean())); } - double error = (TVector2(filter.marginalLikelihoodMean()) - maths::CBasicStatistics::mean(expectedMean)).euclidean() / + double error = (TVector2(filter.marginalLikelihoodMean()) - + maths::CBasicStatistics::mean(expectedMean)) + .euclidean() / maths::CBasicStatistics::mean(expectedMean).euclidean(); meanError.add(error); CPPUNIT_ASSERT(error < 0.2); @@ -713,36 +763,39 @@ void CMultivariateOneOfNPriorTest::testMarginalLikelihoodMode() { for (std::size_t j = 0u; j < boost::size(covariances); ++j) { std::size_t n[] = {100}; const double mean[][2] = {{means[i][0], means[i][1]}}; - const double covariance[][3] = {{covariances[i][0], covariances[i][1], covariances[i][2]}}; - LOG_DEBUG(<< "*** mean = " << core::CContainerPrinter::print(mean[0], mean[0] + 2) << ", variance = " << covariance[0][0] - << " ***"); + const double covariance[][3] = { + {covariances[i][0], covariances[i][1], covariances[i][2]}}; + LOG_DEBUG(<< "*** mean = " + << core::CContainerPrinter::print(mean[0], mean[0] + 2) + << ", variance = " << covariance[0][0] << " ***"); TDouble10Vec1Vec samples; gaussianSamples(rng, 1, n, mean, covariance, samples); maths::CMultivariateOneOfNPrior filter(makeOneOfN<2>(maths_t::E_ContinuousData)); for (std::size_t k = 0u; k < samples.size(); ++k) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[k]), SINGLE_UNIT_WEIGHT_2); + filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[k]), + SINGLE_UNIT_WEIGHT_2); } CMinusLogLikelihood likelihood(filter); - maths::CGradientDescent::CEmpiricalCentralGradient gradientOfLikelihood(likelihood, 1e-3); + maths::CGradientDescent::CEmpiricalCentralGradient gradientOfLikelihood( + likelihood, 1e-3); maths::CGradientDescent gd(learnRates[j], 0.75); maths::CVector expectedMode; TDoubleVec likelihoods; gd.run(20, // iterations - maths::CVector(mean[0], mean[0] + 2), - likelihood, - gradientOfLikelihood, - expectedMode, - likelihoods); + maths::CVector(mean[0], mean[0] + 2), likelihood, + gradientOfLikelihood, expectedMode, likelihoods); TDouble10Vec mode = filter.marginalLikelihoodMode(COUNT_WEIGHT, UNIT_WEIGHT_2); - LOG_DEBUG(<< "marginalLikelihoodMode = " << core::CContainerPrinter::print(mode) << ", expectedMode = " << expectedMode); + LOG_DEBUG(<< "marginalLikelihoodMode = " << core::CContainerPrinter::print(mode) + << ", expectedMode = " << expectedMode); for (std::size_t k = 0u; k < 2; ++k) { - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMode(k), mode[k], 0.01 * expectedMode(k)); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMode(k), mode[k], + 0.01 * expectedMode(k)); } } } @@ -770,20 +823,19 @@ void CMultivariateOneOfNPriorTest::testMarginalLikelihoodMode() { } CMinusLogLikelihood likelihood(filter); - maths::CGradientDescent::CEmpiricalCentralGradient gradientOfLikelihood(likelihood, 1e-3); + maths::CGradientDescent::CEmpiricalCentralGradient gradientOfLikelihood( + likelihood, 1e-3); maths::CGradientDescent gd(0.2, 0.75); maths::CVector expectedMode; TDoubleVec likelihoods; gd.run(20, // iterations - maths::CVector(means[0], means[0] + 2), - likelihood, - gradientOfLikelihood, - expectedMode, - likelihoods); + maths::CVector(means[0], means[0] + 2), likelihood, + gradientOfLikelihood, expectedMode, likelihoods); TDouble10Vec mode = filter.marginalLikelihoodMode(COUNT_WEIGHT, UNIT_WEIGHT_2); - LOG_DEBUG(<< "marginalLikelihoodMode = " << core::CContainerPrinter::print(mode) << ", expectedMode = " << expectedMode); + LOG_DEBUG(<< "marginalLikelihoodMode = " << core::CContainerPrinter::print(mode) + << ", expectedMode = " << expectedMode); for (std::size_t i = 0u; i < 2; ++i) { CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMode(i), mode[i], 0.2 * expectedMode(i)); @@ -834,18 +886,22 @@ void CMultivariateOneOfNPriorTest::testSampleMarginalLikelihood() { maths::CSampling::weightedSample(20, weights, counts); LOG_DEBUG(<< "counts = " << core::CContainerPrinter::print(counts)); - maths::CMultivariateOneOfNPrior::TPriorCPtr3Vec posteriorModels = filter.models(); + maths::CMultivariateOneOfNPrior::TPriorCPtr3Vec posteriorModels = + filter.models(); TDouble10Vec1Vec normalSamples; posteriorModels[0]->sampleMarginalLikelihood(counts[0], normalSamples); TDouble10Vec1Vec multimodalSamples; posteriorModels[1]->sampleMarginalLikelihood(counts[1], multimodalSamples); TDouble10Vec1Vec expectedSampled(normalSamples); - expectedSampled.insert(expectedSampled.end(), multimodalSamples.begin(), multimodalSamples.end()); + expectedSampled.insert(expectedSampled.end(), multimodalSamples.begin(), + multimodalSamples.end()); std::sort(expectedSampled.begin(), expectedSampled.end()); - LOG_DEBUG(<< "expected samples = " << core::CContainerPrinter::print(expectedSampled)); + LOG_DEBUG(<< "expected samples = " + << core::CContainerPrinter::print(expectedSampled)); LOG_DEBUG(<< "samples = " << core::CContainerPrinter::print(sampled)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedSampled), core::CContainerPrinter::print(sampled)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedSampled), + core::CContainerPrinter::print(sampled)); } } } @@ -887,7 +943,8 @@ void CMultivariateOneOfNPriorTest::testProbabilityOfLessLikelySamples() { double lowerBound, upperBound; maths::CMultivariatePrior::TTail10Vec tail; CPPUNIT_ASSERT(filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, COUNT_WEIGHT, sample, SINGLE_UNIT_WEIGHT_2, lowerBound, upperBound, tail)); + maths_t::E_TwoSided, COUNT_WEIGHT, sample, SINGLE_UNIT_WEIGHT_2, + lowerBound, upperBound, tail)); CPPUNIT_ASSERT_EQUAL(lowerBound, upperBound); double probability = (lowerBound + upperBound) / 2.0; @@ -900,15 +957,18 @@ void CMultivariateOneOfNPriorTest::testProbabilityOfLessLikelySamples() { double modelLowerBound, modelUpperBound; double weight = weights[j]; CPPUNIT_ASSERT(models[j]->probabilityOfLessLikelySamples( - maths_t::E_TwoSided, COUNT_WEIGHT, sample, SINGLE_UNIT_WEIGHT_2, modelLowerBound, modelUpperBound, tail)); + maths_t::E_TwoSided, COUNT_WEIGHT, sample, SINGLE_UNIT_WEIGHT_2, + modelLowerBound, modelUpperBound, tail)); CPPUNIT_ASSERT_EQUAL(modelLowerBound, modelUpperBound); double modelProbability = (modelLowerBound + modelUpperBound) / 2.0; expectedProbability += weight * modelProbability; } - LOG_DEBUG(<< "weights = " << core::CContainerPrinter::print(weights) << ", expectedProbability = " << expectedProbability + LOG_DEBUG(<< "weights = " << core::CContainerPrinter::print(weights) + << ", expectedProbability = " << expectedProbability << ", probability = " << probability); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedProbability, probability, 0.3 * std::max(expectedProbability, probability)); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedProbability, probability, + 0.3 * std::max(expectedProbability, probability)); error.add(std::fabs(probability - expectedProbability)); } @@ -956,14 +1016,13 @@ void CMultivariateOneOfNPriorTest::testPersist() { CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - maths::SDistributionRestoreParams params(dataType, - decayRate + 0.1, - maths::MINIMUM_CLUSTER_SPLIT_FRACTION, - maths::MINIMUM_CLUSTER_SPLIT_COUNT, - maths::MINIMUM_CATEGORY_COUNT); + maths::SDistributionRestoreParams params( + dataType, decayRate + 0.1, maths::MINIMUM_CLUSTER_SPLIT_FRACTION, + maths::MINIMUM_CLUSTER_SPLIT_COUNT, maths::MINIMUM_CATEGORY_COUNT); maths::CMultivariateOneOfNPrior restoredFilter(dimension, params, traverser); - LOG_DEBUG(<< "orig checksum = " << checksum << " restored checksum = " << restoredFilter.checksum()); + LOG_DEBUG(<< "orig checksum = " << checksum + << " restored checksum = " << restoredFilter.checksum()); CPPUNIT_ASSERT_EQUAL(checksum, restoredFilter.checksum()); // The XML representation of the new filter should be the same as the original @@ -979,29 +1038,38 @@ void CMultivariateOneOfNPriorTest::testPersist() { CppUnit::Test* CMultivariateOneOfNPriorTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CMultivariateOneOfNPriorTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CMultivariateOneOfNPriorTest::testMultipleUpdate", - &CMultivariateOneOfNPriorTest::testMultipleUpdate)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMultivariateOneOfNPriorTest::testPropagation", - &CMultivariateOneOfNPriorTest::testPropagation)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMultivariateOneOfNPriorTest::testWeightUpdate", - &CMultivariateOneOfNPriorTest::testWeightUpdate)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMultivariateOneOfNPriorTest::testModelUpdate", - &CMultivariateOneOfNPriorTest::testModelUpdate)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMultivariateOneOfNPriorTest::testModelSelection", - &CMultivariateOneOfNPriorTest::testModelSelection)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMultivariateOneOfNPriorTest::testMarginalLikelihood", - &CMultivariateOneOfNPriorTest::testMarginalLikelihood)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMultivariateOneOfNPriorTest::testMarginalLikelihoodMean", - &CMultivariateOneOfNPriorTest::testMarginalLikelihoodMean)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMultivariateOneOfNPriorTest::testMarginalLikelihoodMode", - &CMultivariateOneOfNPriorTest::testMarginalLikelihoodMode)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CMultivariateOneOfNPriorTest::testSampleMarginalLikelihood", &CMultivariateOneOfNPriorTest::testSampleMarginalLikelihood)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CMultivariateOneOfNPriorTest::testProbabilityOfLessLikelySamples", - &CMultivariateOneOfNPriorTest::testProbabilityOfLessLikelySamples)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMultivariateOneOfNPriorTest::testPersist", - &CMultivariateOneOfNPriorTest::testPersist)); + "CMultivariateOneOfNPriorTest::testMultipleUpdate", + &CMultivariateOneOfNPriorTest::testMultipleUpdate)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultivariateOneOfNPriorTest::testPropagation", + &CMultivariateOneOfNPriorTest::testPropagation)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultivariateOneOfNPriorTest::testWeightUpdate", + &CMultivariateOneOfNPriorTest::testWeightUpdate)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultivariateOneOfNPriorTest::testModelUpdate", + &CMultivariateOneOfNPriorTest::testModelUpdate)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultivariateOneOfNPriorTest::testModelSelection", + &CMultivariateOneOfNPriorTest::testModelSelection)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultivariateOneOfNPriorTest::testMarginalLikelihood", + &CMultivariateOneOfNPriorTest::testMarginalLikelihood)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultivariateOneOfNPriorTest::testMarginalLikelihoodMean", + &CMultivariateOneOfNPriorTest::testMarginalLikelihoodMean)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultivariateOneOfNPriorTest::testMarginalLikelihoodMode", + &CMultivariateOneOfNPriorTest::testMarginalLikelihoodMode)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultivariateOneOfNPriorTest::testSampleMarginalLikelihood", + &CMultivariateOneOfNPriorTest::testSampleMarginalLikelihood)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultivariateOneOfNPriorTest::testProbabilityOfLessLikelySamples", + &CMultivariateOneOfNPriorTest::testProbabilityOfLessLikelySamples)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMultivariateOneOfNPriorTest::testPersist", &CMultivariateOneOfNPriorTest::testPersist)); return suiteOfTests; } diff --git a/lib/maths/unittest/CNaiveBayesTest.cc b/lib/maths/unittest/CNaiveBayesTest.cc index 18a43c8ed3..0786eec880 100644 --- a/lib/maths/unittest/CNaiveBayesTest.cc +++ b/lib/maths/unittest/CNaiveBayesTest.cc @@ -62,7 +62,8 @@ void CNaiveBayesTest::testClassification() { TMeanAccumulator meanMeanError; for (auto initialCount : {0.0, 100.0}) { - maths::CNormalMeanPrecConjugate normal{maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData)}; + maths::CNormalMeanPrecConjugate normal{ + maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData)}; maths::CNaiveBayes nb{maths::CNaiveBayesFeatureDensityFromPrior(normal)}; if (initialCount > 0) { @@ -103,11 +104,15 @@ void CNaiveBayesTest::testClassification() { // ratios for those feature values. boost::math::normal class1[]{ - boost::math::normal{maths::CBasicStatistics::mean(moments[0]), std::sqrt(maths::CBasicStatistics::variance(moments[0]))}, - boost::math::normal{maths::CBasicStatistics::mean(moments[1]), std::sqrt(maths::CBasicStatistics::variance(moments[1]))}}; + boost::math::normal{maths::CBasicStatistics::mean(moments[0]), + std::sqrt(maths::CBasicStatistics::variance(moments[0]))}, + boost::math::normal{maths::CBasicStatistics::mean(moments[1]), + std::sqrt(maths::CBasicStatistics::variance(moments[1]))}}; boost::math::normal class2[]{ - boost::math::normal{maths::CBasicStatistics::mean(moments[2]), std::sqrt(maths::CBasicStatistics::variance(moments[2]))}, - boost::math::normal{maths::CBasicStatistics::mean(moments[3]), std::sqrt(maths::CBasicStatistics::variance(moments[3]))}}; + boost::math::normal{maths::CBasicStatistics::mean(moments[2]), + std::sqrt(maths::CBasicStatistics::variance(moments[2]))}, + boost::math::normal{maths::CBasicStatistics::mean(moments[3]), + std::sqrt(maths::CBasicStatistics::variance(moments[3]))}}; TDoubleVec xtest; rng.generateNormalSamples(0.0, 64.0, 40, xtest); @@ -115,7 +120,8 @@ void CNaiveBayesTest::testClassification() { TMeanAccumulator meanErrors[3]; for (std::size_t i = 0u; i < xtest.size(); i += 2) { - auto test = [i](double p1, double p2, const TDoubleSizePrVec& p, TMeanAccumulator& meanError) { + auto test = [i](double p1, double p2, const TDoubleSizePrVec& p, + TMeanAccumulator& meanError) { double Z{p1 + p2}; p1 /= Z; p2 /= Z; @@ -123,7 +129,8 @@ void CNaiveBayesTest::testClassification() { double p2_{p[0].second == 1 ? p[1].first : p[0].first}; if (i % 10 == 0) { - LOG_DEBUG(i << ") expected P(1) = " << p1 << ", P(2) = " << p2 << " got P(1) = " << p1_ << ", P(2) = " << p2_); + LOG_DEBUG(i << ") expected P(1) = " << p1 << ", P(2) = " << p2 + << " got P(1) = " << p1_ << ", P(2) = " << p2_); } CPPUNIT_ASSERT_EQUAL(std::size_t(2), p.size()); @@ -138,8 +145,10 @@ void CNaiveBayesTest::testClassification() { }; // Supply both feature values. - double p1{P1 * maths::CTools::safePdf(class1[0], xtest[i]) * maths::CTools::safePdf(class1[1], xtest[i + 1])}; - double p2{P2 * maths::CTools::safePdf(class2[0], xtest[i]) * maths::CTools::safePdf(class2[1], xtest[i + 1])}; + double p1{P1 * maths::CTools::safePdf(class1[0], xtest[i]) * + maths::CTools::safePdf(class1[1], xtest[i + 1])}; + double p2{P2 * maths::CTools::safePdf(class2[0], xtest[i]) * + maths::CTools::safePdf(class2[1], xtest[i + 1])}; probabilities = nb.highestClassProbabilities(2, {{xtest[i]}, {xtest[i + 1]}}); test(p1, p2, probabilities, meanErrors[0]); @@ -157,7 +166,8 @@ void CNaiveBayesTest::testClassification() { } for (std::size_t i = 0u; i < 3; ++i) { - LOG_DEBUG("Mean relative error = " << maths::CBasicStatistics::mean(meanErrors[i])); + LOG_DEBUG("Mean relative error = " + << maths::CBasicStatistics::mean(meanErrors[i])); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanErrors[i]) < 0.05); meanMeanError += meanErrors[i]; } @@ -174,9 +184,11 @@ void CNaiveBayesTest::testPropagationByTime() { test::CRandomNumbers rng; - maths::CNormalMeanPrecConjugate normal{maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.05)}; - maths::CNaiveBayes nb[]{maths::CNaiveBayes{maths::CNaiveBayesFeatureDensityFromPrior(normal), 0.05}, - maths::CNaiveBayes{maths::CNaiveBayesFeatureDensityFromPrior(normal), 0.05}}; + maths::CNormalMeanPrecConjugate normal{maths::CNormalMeanPrecConjugate::nonInformativePrior( + maths_t::E_ContinuousData, 0.05)}; + maths::CNaiveBayes nb[]{ + maths::CNaiveBayes{maths::CNaiveBayesFeatureDensityFromPrior(normal), 0.05}, + maths::CNaiveBayes{maths::CNaiveBayesFeatureDensityFromPrior(normal), 0.05}}; TDoubleVec trainingData[4]; for (std::size_t i = 0u; i < 1000; ++i) { @@ -200,20 +212,26 @@ void CNaiveBayesTest::testPropagationByTime() { // for the aged classifier and vice versa. { - TDoubleSizePrVec probabilities[]{nb[0].highestClassProbabilities(2, {{-10.0}, {-10.0}}), - nb[1].highestClassProbabilities(2, {{-10.0}, {-10.0}})}; - LOG_DEBUG("Aged class probabilities = " << core::CContainerPrinter::print(probabilities[0])); - LOG_DEBUG("Class probabilities = " << core::CContainerPrinter::print(probabilities[1])); + TDoubleSizePrVec probabilities[]{ + nb[0].highestClassProbabilities(2, {{-10.0}, {-10.0}}), + nb[1].highestClassProbabilities(2, {{-10.0}, {-10.0}})}; + LOG_DEBUG("Aged class probabilities = " + << core::CContainerPrinter::print(probabilities[0])); + LOG_DEBUG("Class probabilities = " + << core::CContainerPrinter::print(probabilities[1])); CPPUNIT_ASSERT_EQUAL(std::size_t(2), probabilities[0][0].second); CPPUNIT_ASSERT(probabilities[0][0].first > 0.99); CPPUNIT_ASSERT_EQUAL(std::size_t(1), probabilities[1][0].second); CPPUNIT_ASSERT(probabilities[1][0].first > 0.95); } { - TDoubleSizePrVec probabilities[]{nb[0].highestClassProbabilities(2, {{10.0}, {10.0}}), - nb[1].highestClassProbabilities(2, {{10.0}, {10.0}})}; - LOG_DEBUG("Aged class probabilities = " << core::CContainerPrinter::print(probabilities[0])); - LOG_DEBUG("Class probabilities = " << core::CContainerPrinter::print(probabilities[1])); + TDoubleSizePrVec probabilities[]{ + nb[0].highestClassProbabilities(2, {{10.0}, {10.0}}), + nb[1].highestClassProbabilities(2, {{10.0}, {10.0}})}; + LOG_DEBUG("Aged class probabilities = " + << core::CContainerPrinter::print(probabilities[0])); + LOG_DEBUG("Class probabilities = " + << core::CContainerPrinter::print(probabilities[1])); CPPUNIT_ASSERT_EQUAL(std::size_t(1), probabilities[0][0].second); CPPUNIT_ASSERT(probabilities[0][0].first > 0.99); CPPUNIT_ASSERT_EQUAL(std::size_t(2), probabilities[1][0].second); @@ -241,8 +259,10 @@ void CNaiveBayesTest::testMemoryUsage() { TMeanAccumulator meanMeanError; - maths::CNormalMeanPrecConjugate normal{maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.1)}; - TNaiveBayesPtr nb{new maths::CNaiveBayes{maths::CNaiveBayesFeatureDensityFromPrior(normal), 0.1}}; + maths::CNormalMeanPrecConjugate normal{maths::CNormalMeanPrecConjugate::nonInformativePrior( + maths_t::E_ContinuousData, 0.1)}; + TNaiveBayesPtr nb{new maths::CNaiveBayes{ + maths::CNaiveBayesFeatureDensityFromPrior(normal), 0.1}}; for (std::size_t i = 0u; i < 100; ++i) { nb->addTrainingDataPoint(1, {{trainingData[0][i]}, {trainingData[1][i]}}); @@ -259,7 +279,8 @@ void CNaiveBayesTest::testMemoryUsage() { CPPUNIT_ASSERT_EQUAL(memoryUsage, mem->usage()); LOG_DEBUG("Memory = " << core::CMemory::dynamicSize(nb)); - CPPUNIT_ASSERT_EQUAL(memoryUsage + sizeof(maths::CNaiveBayes), core::CMemory::dynamicSize(nb)); + CPPUNIT_ASSERT_EQUAL(memoryUsage + sizeof(maths::CNaiveBayes), + core::CMemory::dynamicSize(nb)); } void CNaiveBayesTest::testPersist() { @@ -277,7 +298,8 @@ void CNaiveBayesTest::testPersist() { TMeanAccumulator meanMeanError; - maths::CNormalMeanPrecConjugate normal{maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.1)}; + maths::CNormalMeanPrecConjugate normal{maths::CNormalMeanPrecConjugate::nonInformativePrior( + maths_t::E_ContinuousData, 0.1)}; maths::CNaiveBayes origNb{maths::CNaiveBayesFeatureDensityFromPrior(normal), 0.1}; for (std::size_t i = 0u; i < 100; ++i) { @@ -317,12 +339,14 @@ void CNaiveBayesTest::testPersist() { CppUnit::Test* CNaiveBayesTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CNaiveBayesTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CNaiveBayesTest::testClassification", &CNaiveBayesTest::testClassification)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CNaiveBayesTest::testPropagationByTime", &CNaiveBayesTest::testPropagationByTime)); - suiteOfTests->addTest(new CppUnit::TestCaller("CNaiveBayesTest::testMemoryUsage", &CNaiveBayesTest::testMemoryUsage)); - suiteOfTests->addTest(new CppUnit::TestCaller("CNaiveBayesTest::testPersist", &CNaiveBayesTest::testPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CNaiveBayesTest::testClassification", &CNaiveBayesTest::testClassification)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CNaiveBayesTest::testPropagationByTime", &CNaiveBayesTest::testPropagationByTime)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CNaiveBayesTest::testMemoryUsage", &CNaiveBayesTest::testMemoryUsage)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CNaiveBayesTest::testPersist", &CNaiveBayesTest::testPersist)); return suiteOfTests; } diff --git a/lib/maths/unittest/CNaturalBreaksClassifierTest.cc b/lib/maths/unittest/CNaturalBreaksClassifierTest.cc index 991161f6a5..ec693a7534 100644 --- a/lib/maths/unittest/CNaturalBreaksClassifierTest.cc +++ b/lib/maths/unittest/CNaturalBreaksClassifierTest.cc @@ -39,7 +39,10 @@ bool computeDeviation(const TTuple& category, std::size_t p, double& result) { } //! Branch and bound exhaustive search for the optimum split. -bool naturalBreaksBranchAndBound(const TTupleVec& categories, std::size_t n, std::size_t p, TTupleVec& result) { +bool naturalBreaksBranchAndBound(const TTupleVec& categories, + std::size_t n, + std::size_t p, + TTupleVec& result) { using TSizeVec = std::vector; // Find the minimum variance partition. @@ -86,7 +89,8 @@ bool naturalBreaksBranchAndBound(const TTupleVec& categories, std::size_t n, std } double categoryDeviation; - if (!computeDeviation(category, p, categoryDeviation) || (deviation >= deviationMin && i < m - 1)) { + if (!computeDeviation(category, p, categoryDeviation) || + (deviation >= deviationMin && i < m - 1)) { // We can prune all possible solutions which have // sub-split (split[0], ... split[i]) since their // deviation is necessarily larger than the minimum @@ -97,7 +101,8 @@ bool naturalBreaksBranchAndBound(const TTupleVec& categories, std::size_t n, std split[i] = N - (m - i); } deviation = INF; - LOG_TRACE(<< "Pruning solutions variation = " << deviation << ", deviationMin = " << deviationMin + LOG_TRACE(<< "Pruning solutions variation = " << deviation + << ", deviationMin = " << deviationMin << ", split = " << core::CContainerPrinter::print(split)); } else { deviation += categoryDeviation; @@ -107,7 +112,8 @@ bool naturalBreaksBranchAndBound(const TTupleVec& categories, std::size_t n, std if (deviation < deviationMin) { bestSplit = split; deviationMin = deviation; - LOG_TRACE(<< "splitMin = " << core::CContainerPrinter::print(result) << ", deviationMin = " << deviationMin); + LOG_TRACE(<< "splitMin = " << core::CContainerPrinter::print(result) + << ", deviationMin = " << deviationMin); } if (split == end) { @@ -170,10 +176,12 @@ void CNaturalBreaksClassifierTest::testCategories() { TTupleVec expectedSplit; naturalBreaksBranchAndBound(all, j, 0, expectedSplit); - LOG_DEBUG(<< "expected = " << core::CContainerPrinter::print(expectedSplit)); + LOG_DEBUG(<< "expected = " + << core::CContainerPrinter::print(expectedSplit)); LOG_DEBUG(<< "actual = " << core::CContainerPrinter::print(split)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedSplit), core::CContainerPrinter::print(split)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedSplit), + core::CContainerPrinter::print(split)); } } } @@ -197,7 +205,8 @@ void CNaturalBreaksClassifierTest::testCategories() { do { k *= 2; - LOG_DEBUG(<< "# samples = " << i << ", # splits = " << j << ", minimum cluster size = " << k); + LOG_DEBUG(<< "# samples = " << i << ", # splits = " << j + << ", minimum cluster size = " << k); TTupleVec split; bool haveSplit = classifier.categories(j, k, split); @@ -210,10 +219,13 @@ void CNaturalBreaksClassifierTest::testCategories() { CPPUNIT_ASSERT_EQUAL(expectSplit, haveSplit); if (expectSplit && haveSplit) { - LOG_DEBUG(<< "expected = " << core::CContainerPrinter::print(expectedSplit)); - LOG_DEBUG(<< "actual = " << core::CContainerPrinter::print(split)); + LOG_DEBUG(<< "expected = " + << core::CContainerPrinter::print(expectedSplit)); + LOG_DEBUG(<< "actual = " + << core::CContainerPrinter::print(split)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedSplit), core::CContainerPrinter::print(split)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedSplit), + core::CContainerPrinter::print(split)); } } while (k < i / j); } @@ -251,8 +263,10 @@ void CNaturalBreaksClassifierTest::testCategories() { double c2 = CBasicStatistics::count(twoSplit[1]); LOG_DEBUG(<< "count ratio = " << c1 / c2); CPPUNIT_ASSERT(std::fabs(c1 / c2 - 1.0) < 0.8); - double separation = std::fabs(CBasicStatistics::mean(twoSplit[0]) - CBasicStatistics::mean(twoSplit[1])) / - (std::sqrt(CBasicStatistics::variance(twoSplit[0])) + std::sqrt(CBasicStatistics::variance(twoSplit[1]))); + double separation = std::fabs(CBasicStatistics::mean(twoSplit[0]) - + CBasicStatistics::mean(twoSplit[1])) / + (std::sqrt(CBasicStatistics::variance(twoSplit[0])) + + std::sqrt(CBasicStatistics::variance(twoSplit[1]))); LOG_DEBUG(<< "separation = " << separation); CPPUNIT_ASSERT(std::fabs(separation - 1.0) < 0.4); } @@ -284,16 +298,23 @@ void CNaturalBreaksClassifierTest::testCategories() { TTupleVec twoSplit; classifier.categories(2u, 0, twoSplit); - LOG_DEBUG(<< "split 1 = " << CBasicStatistics::print(twoSplit[0]) << ", split 2 = " << CBasicStatistics::print(twoSplit[1]) + LOG_DEBUG(<< "split 1 = " << CBasicStatistics::print(twoSplit[0]) + << ", split 2 = " << CBasicStatistics::print(twoSplit[1]) << ", (mean1,var1) = (" << mean1 << "," << var1 << ")" << ", (mean2,var2) = (" << mean2 << "," << var2 << ")"); CPPUNIT_ASSERT(std::fabs(CBasicStatistics::mean(twoSplit[0]) - mean1) < 0.5); CPPUNIT_ASSERT(std::fabs(CBasicStatistics::variance(twoSplit[0]) - var1) < 0.6); - CPPUNIT_ASSERT(std::fabs(CBasicStatistics::count(twoSplit[0]) - static_cast(n1)) / static_cast(n1) < 0.33); + CPPUNIT_ASSERT(std::fabs(CBasicStatistics::count(twoSplit[0]) - + static_cast(n1)) / + static_cast(n1) < + 0.33); CPPUNIT_ASSERT(std::fabs(CBasicStatistics::mean(twoSplit[1]) - mean2) < 0.4); CPPUNIT_ASSERT(std::fabs(CBasicStatistics::variance(twoSplit[1]) - var2) < 0.63); - CPPUNIT_ASSERT(std::fabs(CBasicStatistics::count(twoSplit[1]) - static_cast(n2)) / static_cast(n2) < 0.11); + CPPUNIT_ASSERT(std::fabs(CBasicStatistics::count(twoSplit[1]) - + static_cast(n2)) / + static_cast(n2) < + 0.11); } } @@ -317,21 +338,32 @@ void CNaturalBreaksClassifierTest::testCategories() { TTupleVec twoSplit; classifier.categories(2u, 0, twoSplit); - LOG_DEBUG(<< "split 1 = " << CBasicStatistics::print(twoSplit[0]) << ", split 2 = " << CBasicStatistics::print(twoSplit[1])); + LOG_DEBUG(<< "split 1 = " << CBasicStatistics::print(twoSplit[0]) + << ", split 2 = " << CBasicStatistics::print(twoSplit[1])); CPPUNIT_ASSERT(std::fabs(CBasicStatistics::mean(twoSplit[0]) - mean1) < 0.7); CPPUNIT_ASSERT(std::fabs(CBasicStatistics::variance(twoSplit[0]) - var1) < 0.4); - CPPUNIT_ASSERT(std::fabs(CBasicStatistics::count(twoSplit[0]) - static_cast(n1)) / static_cast(n1) < 0.7); + CPPUNIT_ASSERT(std::fabs(CBasicStatistics::count(twoSplit[0]) - + static_cast(n1)) / + static_cast(n1) < + 0.7); CPPUNIT_ASSERT(std::fabs(CBasicStatistics::mean(twoSplit[1]) - mean2) < 0.6); CPPUNIT_ASSERT(std::fabs(CBasicStatistics::variance(twoSplit[1]) - var2) < 1.0); - CPPUNIT_ASSERT(std::fabs(CBasicStatistics::count(twoSplit[1]) - static_cast(n2)) / static_cast(n2) < 0.3); + CPPUNIT_ASSERT(std::fabs(CBasicStatistics::count(twoSplit[1]) - + static_cast(n2)) / + static_cast(n2) < + 0.3); totalMeanError1 += std::fabs(CBasicStatistics::mean(twoSplit[0]) - mean1); totalVarError1 += std::fabs(CBasicStatistics::variance(twoSplit[0]) - var1); - totalCountError1 += std::fabs(CBasicStatistics::count(twoSplit[0]) - static_cast(n1)) / static_cast(n1); + totalCountError1 += std::fabs(CBasicStatistics::count(twoSplit[0]) - + static_cast(n1)) / + static_cast(n1); totalMeanError2 += std::fabs(CBasicStatistics::mean(twoSplit[1]) - mean2); totalVarError2 += std::fabs(CBasicStatistics::variance(twoSplit[1]) - var2); - totalCountError2 += std::fabs(CBasicStatistics::count(twoSplit[1]) - static_cast(n2)) / static_cast(n2); + totalCountError2 += std::fabs(CBasicStatistics::count(twoSplit[1]) - + static_cast(n2)) / + static_cast(n2); } totalMeanError1 /= 500.0; @@ -341,13 +373,15 @@ void CNaturalBreaksClassifierTest::testCategories() { totalVarError2 /= 500.0; totalCountError2 /= 500.0; - LOG_DEBUG(<< "mean mean error 1 = " << totalMeanError1 << ", mean variance error 1 = " << totalVarError1 + LOG_DEBUG(<< "mean mean error 1 = " << totalMeanError1 + << ", mean variance error 1 = " << totalVarError1 << ", mean count error 1 = " << totalCountError1); CPPUNIT_ASSERT(totalMeanError1 < 0.21); CPPUNIT_ASSERT(totalVarError1 < 0.2); CPPUNIT_ASSERT(totalCountError1 < 0.3); - LOG_DEBUG(<< "mean mean error 2 = " << totalMeanError2 << ", mean variance error 2 = " << totalVarError2 + LOG_DEBUG(<< "mean mean error 2 = " << totalMeanError2 + << ", mean variance error 2 = " << totalVarError2 << ", mean count error 2 = " << totalCountError2); CPPUNIT_ASSERT(totalMeanError2 < 0.3); CPPUNIT_ASSERT(totalVarError2 < 0.56); @@ -403,22 +437,33 @@ void CNaturalBreaksClassifierTest::testCategories() { TTupleVec twoSplit; classifier.categories(3u, 0, twoSplit); - LOG_DEBUG(<< "split 1 = " << CBasicStatistics::print(twoSplit[0]) << ", split 2 = " << CBasicStatistics::print(twoSplit[1]) + LOG_DEBUG(<< "split 1 = " << CBasicStatistics::print(twoSplit[0]) + << ", split 2 = " << CBasicStatistics::print(twoSplit[1]) << ", split 3 = " << CBasicStatistics::print(twoSplit[2])); CPPUNIT_ASSERT(std::fabs(CBasicStatistics::mean(twoSplit[0]) - mean1) < 0.15); CPPUNIT_ASSERT(std::fabs(CBasicStatistics::variance(twoSplit[0]) - var1) < 0.4); - CPPUNIT_ASSERT(std::fabs(CBasicStatistics::count(twoSplit[0]) - static_cast(n1)) / static_cast(n1) < 0.05); + CPPUNIT_ASSERT(std::fabs(CBasicStatistics::count(twoSplit[0]) - + static_cast(n1)) / + static_cast(n1) < + 0.05); CPPUNIT_ASSERT(std::fabs(CBasicStatistics::mean(twoSplit[1]) - mean2) < 0.5); CPPUNIT_ASSERT(std::fabs(CBasicStatistics::variance(twoSplit[1]) - var2) < 2.5); - CPPUNIT_ASSERT(std::fabs(CBasicStatistics::count(twoSplit[1]) - static_cast(n2)) / static_cast(n2) < 0.15); + CPPUNIT_ASSERT(std::fabs(CBasicStatistics::count(twoSplit[1]) - + static_cast(n2)) / + static_cast(n2) < + 0.15); totalMeanError1 += std::fabs(CBasicStatistics::mean(twoSplit[0]) - mean1); totalVarError1 += std::fabs(CBasicStatistics::variance(twoSplit[0]) - var1); - totalCountError1 += std::fabs(CBasicStatistics::count(twoSplit[0]) - static_cast(n1)) / static_cast(n1); + totalCountError1 += std::fabs(CBasicStatistics::count(twoSplit[0]) - + static_cast(n1)) / + static_cast(n1); totalMeanError2 += std::fabs(CBasicStatistics::mean(twoSplit[1]) - mean2); totalVarError2 += std::fabs(CBasicStatistics::variance(twoSplit[1]) - var2); - totalCountError2 += std::fabs(CBasicStatistics::count(twoSplit[1]) - static_cast(n2)) / static_cast(n2); + totalCountError2 += std::fabs(CBasicStatistics::count(twoSplit[1]) - + static_cast(n2)) / + static_cast(n2); } totalMeanError1 /= 500.0; @@ -428,13 +473,15 @@ void CNaturalBreaksClassifierTest::testCategories() { totalVarError2 /= 500.0; totalCountError2 /= 500.0; - LOG_DEBUG(<< "mean mean error 1 = " << totalMeanError1 << ", mean variance error 1 = " << totalVarError1 + LOG_DEBUG(<< "mean mean error 1 = " << totalMeanError1 + << ", mean variance error 1 = " << totalVarError1 << ", mean count error 1 = " << totalCountError1); CPPUNIT_ASSERT(totalMeanError1 < 0.05); CPPUNIT_ASSERT(totalVarError1 < 0.1); CPPUNIT_ASSERT(totalCountError1 < 0.01); - LOG_DEBUG(<< "mean mean error 2 = " << totalMeanError2 << ", mean variance error 2 = " << totalVarError2 + LOG_DEBUG(<< "mean mean error 2 = " << totalMeanError2 + << ", mean variance error 2 = " << totalVarError2 << ", mean count error 2 = " << totalCountError2); CPPUNIT_ASSERT(totalMeanError2 < 0.15); CPPUNIT_ASSERT(totalVarError2 < 1.0); @@ -561,10 +608,14 @@ void CNaturalBreaksClassifierTest::testSample() { LOG_DEBUG(<< "mean, variance 1 = " << meanVar1); LOG_DEBUG(<< "expected mean, variance 2 = " << expectedMeanVar2); LOG_DEBUG(<< "mean, variance 2 = " << meanVar2); - CPPUNIT_ASSERT_DOUBLES_EQUAL(CBasicStatistics::mean(expectedMeanVar1), CBasicStatistics::mean(meanVar1), 0.01); - CPPUNIT_ASSERT_DOUBLES_EQUAL(CBasicStatistics::variance(expectedMeanVar1), CBasicStatistics::variance(meanVar1), 0.1); - CPPUNIT_ASSERT_DOUBLES_EQUAL(CBasicStatistics::mean(expectedMeanVar2), CBasicStatistics::mean(meanVar2), 0.01); - CPPUNIT_ASSERT_DOUBLES_EQUAL(CBasicStatistics::variance(expectedMeanVar2), CBasicStatistics::variance(meanVar2), 0.1); + CPPUNIT_ASSERT_DOUBLES_EQUAL(CBasicStatistics::mean(expectedMeanVar1), + CBasicStatistics::mean(meanVar1), 0.01); + CPPUNIT_ASSERT_DOUBLES_EQUAL(CBasicStatistics::variance(expectedMeanVar1), + CBasicStatistics::variance(meanVar1), 0.1); + CPPUNIT_ASSERT_DOUBLES_EQUAL(CBasicStatistics::mean(expectedMeanVar2), + CBasicStatistics::mean(meanVar2), 0.01); + CPPUNIT_ASSERT_DOUBLES_EQUAL(CBasicStatistics::variance(expectedMeanVar2), + CBasicStatistics::variance(meanVar2), 0.1); } void CNaturalBreaksClassifierTest::testPersist() { @@ -607,15 +658,15 @@ void CNaturalBreaksClassifierTest::testPersist() { // Restore the XML into a new classifier. CNaturalBreaksClassifier restoredClassifier(8); - maths::SDistributionRestoreParams params(maths_t::E_ContinuousData, - 0.2, - maths::MINIMUM_CLUSTER_SPLIT_FRACTION, - maths::MINIMUM_CLUSTER_SPLIT_COUNT, - maths::MINIMUM_CATEGORY_COUNT); + maths::SDistributionRestoreParams params( + maths_t::E_ContinuousData, 0.2, maths::MINIMUM_CLUSTER_SPLIT_FRACTION, + maths::MINIMUM_CLUSTER_SPLIT_COUNT, maths::MINIMUM_CATEGORY_COUNT); CPPUNIT_ASSERT(traverser.traverseSubLevel( - boost::bind(&CNaturalBreaksClassifier::acceptRestoreTraverser, &restoredClassifier, boost::cref(params), _1))); + boost::bind(&CNaturalBreaksClassifier::acceptRestoreTraverser, + &restoredClassifier, boost::cref(params), _1))); - LOG_DEBUG(<< "orig checksum = " << checksum << " restored checksum = " << restoredClassifier.checksum()); + LOG_DEBUG(<< "orig checksum = " << checksum + << " restored checksum = " << restoredClassifier.checksum()); CPPUNIT_ASSERT_EQUAL(checksum, restoredClassifier.checksum()); // The XML representation of the new filter should be the same @@ -632,14 +683,16 @@ void CNaturalBreaksClassifierTest::testPersist() { CppUnit::Test* CNaturalBreaksClassifierTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CNaturalBreaksClassifierTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CNaturalBreaksClassifierTest::testCategories", - &CNaturalBreaksClassifierTest::testCategories)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CNaturalBreaksClassifierTest::testPropagateForwardsByTime", &CNaturalBreaksClassifierTest::testPropagateForwardsByTime)); - suiteOfTests->addTest(new CppUnit::TestCaller("CNaturalBreaksClassifierTest::testSample", - &CNaturalBreaksClassifierTest::testSample)); - suiteOfTests->addTest(new CppUnit::TestCaller("CNaturalBreaksClassifierTest::testPersist", - &CNaturalBreaksClassifierTest::testPersist)); + "CNaturalBreaksClassifierTest::testCategories", + &CNaturalBreaksClassifierTest::testCategories)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CNaturalBreaksClassifierTest::testPropagateForwardsByTime", + &CNaturalBreaksClassifierTest::testPropagateForwardsByTime)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CNaturalBreaksClassifierTest::testSample", &CNaturalBreaksClassifierTest::testSample)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CNaturalBreaksClassifierTest::testPersist", &CNaturalBreaksClassifierTest::testPersist)); return suiteOfTests; } diff --git a/lib/maths/unittest/CNormalMeanPrecConjugateTest.cc b/lib/maths/unittest/CNormalMeanPrecConjugateTest.cc index f5b059d31b..8c8448d4bb 100644 --- a/lib/maths/unittest/CNormalMeanPrecConjugateTest.cc +++ b/lib/maths/unittest/CNormalMeanPrecConjugateTest.cc @@ -43,7 +43,8 @@ using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumula using TMeanVarAccumulator = maths::CBasicStatistics::SSampleMeanVar::TAccumulator; using CNormalMeanPrecConjugate = CPriorTestInterfaceMixin; -CNormalMeanPrecConjugate makePrior(maths_t::EDataType dataType = maths_t::E_ContinuousData, const double& decayRate = 0.0) { +CNormalMeanPrecConjugate makePrior(maths_t::EDataType dataType = maths_t::E_ContinuousData, + const double& decayRate = 0.0) { return CNormalMeanPrecConjugate::nonInformativePrior(dataType, decayRate); } } @@ -92,9 +93,11 @@ void CNormalMeanPrecConjugateTest::testMultipleUpdate() { maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); for (std::size_t j = 0u; j < samples.size(); ++j) { - filter1.addSamples(weightStyle, TDouble1Vec(1, samples[j]), TDouble4Vec1Vec(1, TDouble4Vec(1, 2.0))); + filter1.addSamples(weightStyle, TDouble1Vec(1, samples[j]), + TDouble4Vec1Vec(1, TDouble4Vec(1, 2.0))); } - filter2.addSamples(weightStyle, samples, TDouble4Vec1Vec(samples.size(), TDouble4Vec(1, 2.0))); + filter2.addSamples(weightStyle, samples, + TDouble4Vec1Vec(samples.size(), TDouble4Vec(1, 2.0))); LOG_DEBUG(<< filter1.print()); LOG_DEBUG(<< "vs"); @@ -152,7 +155,8 @@ void CNormalMeanPrecConjugateTest::testPropagation() { double propagatedMean = filter.mean(); double propagatedPrecision = filter.precision(); - LOG_DEBUG(<< "mean = " << mean << ", precision = " << precision << ", propagatedMean = " << propagatedMean + LOG_DEBUG(<< "mean = " << mean << ", precision = " << precision + << ", propagatedMean = " << propagatedMean << ", propagatedPrecision = " << propagatedPrecision); CPPUNIT_ASSERT_DOUBLES_EQUAL(mean, propagatedMean, eps); @@ -172,7 +176,8 @@ void CNormalMeanPrecConjugateTest::testMeanEstimation() { const double decayRates[] = {0.0, 0.001, 0.01}; const unsigned int nTests = 500u; - const double testIntervals[] = {50.0, 60.0, 70.0, 80.0, 85.0, 90.0, 95.0, 99.0}; + const double testIntervals[] = {50.0, 60.0, 70.0, 80.0, + 85.0, 90.0, 95.0, 99.0}; for (std::size_t i = 0; i < boost::size(decayRates); ++i) { test::CRandomNumbers rng; @@ -186,7 +191,8 @@ void CNormalMeanPrecConjugateTest::testMeanEstimation() { TDoubleVec samples; rng.generateNormalSamples(mean, variance, 500, samples); - CNormalMeanPrecConjugate filter(makePrior(maths_t::E_ContinuousData, decayRates[i])); + CNormalMeanPrecConjugate filter( + makePrior(maths_t::E_ContinuousData, decayRates[i])); for (std::size_t j = 0u; j < samples.size(); ++j) { filter.addSamples(TDouble1Vec(1, samples[j])); @@ -194,7 +200,8 @@ void CNormalMeanPrecConjugateTest::testMeanEstimation() { } for (std::size_t j = 0; j < boost::size(testIntervals); ++j) { - TDoubleDoublePr confidenceInterval = filter.confidenceIntervalMean(testIntervals[j]); + TDoubleDoublePr confidenceInterval = + filter.confidenceIntervalMean(testIntervals[j]); if (mean < confidenceInterval.first || mean > confidenceInterval.second) { ++errors[j]; @@ -205,7 +212,8 @@ void CNormalMeanPrecConjugateTest::testMeanEstimation() { for (std::size_t j = 0; j < boost::size(testIntervals); ++j) { double interval = 100.0 * errors[j] / static_cast(nTests); - LOG_DEBUG(<< "interval = " << interval << ", expectedInterval = " << (100.0 - testIntervals[j])); + LOG_DEBUG(<< "interval = " << interval + << ", expectedInterval = " << (100.0 - testIntervals[j])); // If the decay rate is zero the intervals should be accurate. // Otherwise, they should be an upper bound. @@ -231,7 +239,8 @@ void CNormalMeanPrecConjugateTest::testPrecisionEstimation() { const double decayRates[] = {0.0, 0.001, 0.01}; const unsigned int nTests = 1000u; - const double testIntervals[] = {50.0, 60.0, 70.0, 80.0, 85.0, 90.0, 95.0, 99.0}; + const double testIntervals[] = {50.0, 60.0, 70.0, 80.0, + 85.0, 90.0, 95.0, 99.0}; for (std::size_t i = 0; i < boost::size(decayRates); ++i) { test::CRandomNumbers rng; @@ -246,7 +255,8 @@ void CNormalMeanPrecConjugateTest::testPrecisionEstimation() { TDoubleVec samples; rng.generateNormalSamples(mean, variance, 1000, samples); - CNormalMeanPrecConjugate filter(makePrior(maths_t::E_ContinuousData, decayRates[i])); + CNormalMeanPrecConjugate filter( + makePrior(maths_t::E_ContinuousData, decayRates[i])); for (std::size_t j = 0u; j < samples.size(); ++j) { filter.addSamples(TDouble1Vec(1, samples[j])); @@ -254,9 +264,11 @@ void CNormalMeanPrecConjugateTest::testPrecisionEstimation() { } for (std::size_t j = 0; j < boost::size(testIntervals); ++j) { - TDoubleDoublePr confidenceInterval = filter.confidenceIntervalPrecision(testIntervals[j]); + TDoubleDoublePr confidenceInterval = + filter.confidenceIntervalPrecision(testIntervals[j]); - if (precision < confidenceInterval.first || precision > confidenceInterval.second) { + if (precision < confidenceInterval.first || + precision > confidenceInterval.second) { ++errors[j]; } } @@ -265,7 +277,8 @@ void CNormalMeanPrecConjugateTest::testPrecisionEstimation() { for (std::size_t j = 0; j < boost::size(testIntervals); ++j) { double interval = 100.0 * errors[j] / static_cast(nTests); - LOG_DEBUG(<< "interval = " << interval << ", expectedInterval = " << (100.0 - testIntervals[j])); + LOG_DEBUG(<< "interval = " << interval + << ", expectedInterval = " << (100.0 - testIntervals[j])); // If the decay rate is zero the intervals should be accurate. // Otherwise, they should be an upper bound. @@ -298,17 +311,16 @@ void CNormalMeanPrecConjugateTest::testMarginalLikelihood() { filter.addSamples(samples); maths_t::ESampleWeightStyle weightStyles[] = { - maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight, maths_t::E_SampleCountWeight}; + maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight, + maths_t::E_SampleCountWeight}; double weights[] = {0.1, 1.0, 10.0}; for (std::size_t i = 0u; i < boost::size(weightStyles); ++i) { for (std::size_t j = 0u; j < boost::size(weights); ++j) { double lb, ub; - filter.minusLogJointCdf(maths_t::TWeightStyleVec(1, weightStyles[i]), - TDouble1Vec(1, 1000.0), - TDouble4Vec1Vec(1, TDouble4Vec(1, weights[j])), - lb, - ub); + filter.minusLogJointCdf( + maths_t::TWeightStyleVec(1, weightStyles[i]), TDouble1Vec(1, 1000.0), + TDouble4Vec1Vec(1, TDouble4Vec(1, weights[j])), lb, ub); LOG_DEBUG(<< "-log(c.d.f) = " << (lb + ub) / 2.0); CPPUNIT_ASSERT(lb >= 0.0); CPPUNIT_ASSERT(ub >= 0.0); @@ -333,7 +345,8 @@ void CNormalMeanPrecConjugateTest::testMarginalLikelihood() { rng.generateNormalSamples(mean, variance, numberSamples[i], samples); for (std::size_t j = 0; j < boost::size(decayRates); ++j) { - CNormalMeanPrecConjugate filter(makePrior(maths_t::E_ContinuousData, decayRates[j])); + CNormalMeanPrecConjugate filter( + makePrior(maths_t::E_ContinuousData, decayRates[j])); for (std::size_t k = 0u; k < samples.size(); ++k) { filter.addSamples(TDouble1Vec(1, samples[k])); @@ -344,7 +357,8 @@ void CNormalMeanPrecConjugateTest::testMarginalLikelihood() { // c.d.f. at a range of deltas from the true mean. const double eps = 1e-4; - double deltas[] = {-5.0, -4.0, -3.0, -2.0, -1.0, -0.5, 0.0, 0.5, 1.0, 2.0, 3.0, 4.0, 5.0}; + double deltas[] = {-5.0, -4.0, -3.0, -2.0, -1.0, -0.5, 0.0, + 0.5, 1.0, 2.0, 3.0, 4.0, 5.0}; for (std::size_t k = 0; k < boost::size(deltas); ++k) { double x = mean + deltas[k] * std::sqrt(variance); @@ -353,7 +367,8 @@ void CNormalMeanPrecConjugateTest::testMarginalLikelihood() { LOG_DEBUG(<< "number = " << numberSamples[i] << ", sample = " << sample[0]); double logLikelihood = 0.0; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter.jointLogMarginalLikelihood(sample, logLikelihood)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, + filter.jointLogMarginalLikelihood(sample, logLikelihood)); double pdf = std::exp(logLikelihood); double lowerBound = 0.0, upperBound = 0.0; @@ -403,27 +418,32 @@ void CNormalMeanPrecConjugateTest::testMarginalLikelihood() { TDouble1Vec sample(1, samples[i]); filter.addSamples(sample); double logLikelihood = 0.0; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter.jointLogMarginalLikelihood(sample, logLikelihood)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, + filter.jointLogMarginalLikelihood(sample, logLikelihood)); differentialEntropy -= logLikelihood; } differentialEntropy /= static_cast(samples.size()); - LOG_DEBUG(<< "differentialEntropy = " << differentialEntropy << ", expectedDifferentialEntropy = " << expectedDifferentialEntropy); + LOG_DEBUG(<< "differentialEntropy = " << differentialEntropy + << ", expectedDifferentialEntropy = " << expectedDifferentialEntropy); CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedDifferentialEntropy, differentialEntropy, 2e-3); } { boost::math::normal_distribution<> normal(mean, std::sqrt(variance)); - const double varianceScales[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.2, 1.5, 2.0, 2.5, 3.0, 4.0, 5.0}; + const double varianceScales[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, + 0.7, 0.8, 0.9, 1.0, 1.2, 1.5, + 2.0, 2.5, 3.0, 4.0, 5.0}; CNormalMeanPrecConjugate filter(makePrior()); TDoubleVec samples; rng.generateNormalSamples(mean, variance, 1000, samples); filter.addSamples(samples); - const double percentages[] = {5.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 95.0}; + const double percentages[] = {5.0, 10.0, 20.0, 30.0, 40.0, + 50.0, 60.0, 70.0, 80.0, 95.0}; { // Test that marginal likelihood confidence intervals are @@ -432,9 +452,12 @@ void CNormalMeanPrecConjugateTest::testMarginalLikelihood() { TMeanAccumulator error; for (std::size_t i = 0u; i < boost::size(percentages); ++i) { double q1, q2; - filter.marginalLikelihoodQuantileForTest(50.0 - percentages[i] / 2.0, 1e-3, q1); - filter.marginalLikelihoodQuantileForTest(50.0 + percentages[i] / 2.0, 1e-3, q2); - TDoubleDoublePr interval = filter.marginalLikelihoodConfidenceInterval(percentages[i]); + filter.marginalLikelihoodQuantileForTest(50.0 - percentages[i] / 2.0, + 1e-3, q1); + filter.marginalLikelihoodQuantileForTest(50.0 + percentages[i] / 2.0, + 1e-3, q2); + TDoubleDoublePr interval = + filter.marginalLikelihoodConfidenceInterval(percentages[i]); LOG_DEBUG(<< "[q1, q2] = [" << q1 << ", " << q2 << "]" << ", interval = " << core::CContainerPrinter::print(interval)); CPPUNIT_ASSERT_DOUBLES_EQUAL(q1, interval.first, 0.005); @@ -453,10 +476,14 @@ void CNormalMeanPrecConjugateTest::testMarginalLikelihood() { boost::math::normal_distribution<> scaledNormal(mean, std::sqrt(vs * variance)); LOG_DEBUG(<< "*** vs = " << vs << " ***"); for (std::size_t j = 0u; j < boost::size(percentages); ++j) { - double q1 = boost::math::quantile(scaledNormal, (50.0 - percentages[j] / 2.0) / 100.0); - double q2 = boost::math::quantile(scaledNormal, (50.0 + percentages[j] / 2.0) / 100.0); + double q1 = boost::math::quantile( + scaledNormal, (50.0 - percentages[j] / 2.0) / 100.0); + double q2 = boost::math::quantile( + scaledNormal, (50.0 + percentages[j] / 2.0) / 100.0); TDoubleDoublePr interval = filter.marginalLikelihoodConfidenceInterval( - percentages[j], maths_t::TWeightStyleVec(1, maths_t::E_SampleCountVarianceScaleWeight), TDouble4Vec(1, vs)); + percentages[j], + maths_t::TWeightStyleVec(1, maths_t::E_SampleCountVarianceScaleWeight), + TDouble4Vec(1, vs)); LOG_DEBUG(<< "[q1, q2] = [" << q1 << ", " << q2 << "]" << ", interval = " << core::CContainerPrinter::print(interval)); CPPUNIT_ASSERT_DOUBLES_EQUAL(q1, interval.first, 0.3); @@ -489,7 +516,8 @@ void CNormalMeanPrecConjugateTest::testMarginalLikelihoodMean() { for (std::size_t i = 0u; i < boost::size(means); ++i) { for (std::size_t j = 0u; j < boost::size(variances); ++j) { - LOG_DEBUG(<< "*** mean = " << means[i] << ", variance = " << variances[j] << " ***"); + LOG_DEBUG(<< "*** mean = " << means[i] + << ", variance = " << variances[j] << " ***"); CNormalMeanPrecConjugate filter(makePrior()); @@ -508,13 +536,16 @@ void CNormalMeanPrecConjugateTest::testMarginalLikelihoodMean() { CPPUNIT_ASSERT(filter.marginalLikelihoodMeanForTest(expectedMean)); if (k % 10 == 0) { - LOG_DEBUG(<< "marginalLikelihoodMean = " << filter.marginalLikelihoodMean() << ", expectedMean = " << expectedMean); + LOG_DEBUG(<< "marginalLikelihoodMean = " << filter.marginalLikelihoodMean() + << ", expectedMean = " << expectedMean); } // The error is at the precision of the numerical integration. - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMean, filter.marginalLikelihoodMean(), 0.01); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMean, + filter.marginalLikelihoodMean(), 0.01); - relativeError.add(std::fabs(expectedMean - filter.marginalLikelihoodMean()) / expectedMean); + relativeError.add( + std::fabs(expectedMean - filter.marginalLikelihoodMean()) / expectedMean); } LOG_DEBUG(<< "relativeError = " << maths::CBasicStatistics::mean(relativeError)); @@ -533,13 +564,16 @@ void CNormalMeanPrecConjugateTest::testMarginalLikelihoodMode() { const double means[] = {1.0, 5.0, 100.0}; const double variances[] = {2.0, 5.0, 20.0}; - const double varianceScales[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.2, 1.5, 2.0, 2.5, 3.0, 4.0, 5.0}; + const double varianceScales[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, + 0.7, 0.8, 0.9, 1.0, 1.2, 1.5, + 2.0, 2.5, 3.0, 4.0, 5.0}; test::CRandomNumbers rng; for (std::size_t i = 0u; i < boost::size(means); ++i) { for (std::size_t j = 0u; j < boost::size(variances); ++j) { - LOG_DEBUG(<< "*** mean = " << means[i] << ", variance = " << variances[j] << " ***"); + LOG_DEBUG(<< "*** mean = " << means[i] + << ", variance = " << variances[j] << " ***"); CNormalMeanPrecConjugate filter(makePrior()); @@ -553,12 +587,15 @@ void CNormalMeanPrecConjugateTest::testMarginalLikelihoodMode() { for (std::size_t k = 0u; k < boost::size(varianceScales); ++k) { double vs = varianceScales[i]; weight[0] = vs; - boost::math::normal_distribution<> scaledNormal(means[i], std::sqrt(vs * variances[j])); + boost::math::normal_distribution<> scaledNormal( + means[i], std::sqrt(vs * variances[j])); double expectedMode = boost::math::mode(scaledNormal); - LOG_DEBUG(<< "marginalLikelihoodMode = " << filter.marginalLikelihoodMode(weightStyle, weight) + LOG_DEBUG(<< "marginalLikelihoodMode = " + << filter.marginalLikelihoodMode(weightStyle, weight) << ", expectedMode = " << expectedMode); CPPUNIT_ASSERT_DOUBLES_EQUAL( - expectedMode, filter.marginalLikelihoodMode(weightStyle, weight), 0.12 * std::sqrt(variances[j])); + expectedMode, filter.marginalLikelihoodMode(weightStyle, weight), + 0.12 * std::sqrt(variances[j])); } } } @@ -580,7 +617,8 @@ void CNormalMeanPrecConjugateTest::testMarginalLikelihoodVariance() { for (std::size_t i = 0u; i < boost::size(means); ++i) { for (std::size_t j = 0u; j < boost::size(variances); ++j) { - LOG_DEBUG(<< "*** mean = " << means[i] << ", variance = " << variances[j] << " ***"); + LOG_DEBUG(<< "*** mean = " << means[i] + << ", variance = " << variances[j] << " ***"); CNormalMeanPrecConjugate filter(makePrior()); @@ -597,14 +635,18 @@ void CNormalMeanPrecConjugateTest::testMarginalLikelihoodVariance() { double expectedVariance; CPPUNIT_ASSERT(filter.marginalLikelihoodVarianceForTest(expectedVariance)); if (k % 10 == 0) { - LOG_DEBUG(<< "marginalLikelihoodVariance = " << filter.marginalLikelihoodVariance() + LOG_DEBUG(<< "marginalLikelihoodVariance = " + << filter.marginalLikelihoodVariance() << ", expectedVariance = " << expectedVariance); } // The error is at the precision of the numerical integration. - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedVariance, filter.marginalLikelihoodVariance(), 0.2); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + expectedVariance, filter.marginalLikelihoodVariance(), 0.2); - relativeError.add(std::fabs(expectedVariance - filter.marginalLikelihoodVariance()) / expectedVariance); + relativeError.add(std::fabs(expectedVariance - + filter.marginalLikelihoodVariance()) / + expectedVariance); } LOG_DEBUG(<< "relativeError = " << maths::CBasicStatistics::mean(relativeError)); @@ -666,14 +708,16 @@ void CNormalMeanPrecConjugateTest::testSampleMarginalLikelihood() { LOG_DEBUG(<< "expectedMean = " << filter.marginalLikelihoodMean() << ", sampledMean = " << maths::CBasicStatistics::mean(sampledMoments)); - LOG_DEBUG(<< "expectedVariance = " << filter.marginalLikelihoodVariance() - << ", sampledVariance = " << maths::CBasicStatistics::variance(sampledMoments)); + LOG_DEBUG(<< "expectedVariance = " << filter.marginalLikelihoodVariance() << ", sampledVariance = " + << maths::CBasicStatistics::variance(sampledMoments)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(filter.marginalLikelihoodMean(), maths::CBasicStatistics::mean(sampledMoments), 1e-8); + CPPUNIT_ASSERT_DOUBLES_EQUAL(filter.marginalLikelihoodMean(), + maths::CBasicStatistics::mean(sampledMoments), 1e-8); CPPUNIT_ASSERT_DOUBLES_EQUAL(filter.marginalLikelihoodVariance(), maths::CBasicStatistics::variance(sampledMoments), 0.2 * filter.marginalLikelihoodVariance()); - meanVarError.add(std::fabs(filter.marginalLikelihoodVariance() - maths::CBasicStatistics::variance(sampledMoments)) / + meanVarError.add(std::fabs(filter.marginalLikelihoodVariance() - + maths::CBasicStatistics::variance(sampledMoments)) / filter.marginalLikelihoodVariance()); std::sort(sampled.begin(), sampled.end()); @@ -683,8 +727,8 @@ void CNormalMeanPrecConjugateTest::testSampleMarginalLikelihood() { double expectedQuantile; CPPUNIT_ASSERT(filter.marginalLikelihoodQuantileForTest(q, eps, expectedQuantile)); - LOG_DEBUG(<< "quantile = " << q << ", x_quantile = " << expectedQuantile << ", quantile range = [" << sampled[j - 1] << "," - << sampled[j] << "]"); + LOG_DEBUG(<< "quantile = " << q << ", x_quantile = " << expectedQuantile << ", quantile range = [" + << sampled[j - 1] << "," << sampled[j] << "]"); CPPUNIT_ASSERT(expectedQuantile >= sampled[j - 1]); CPPUNIT_ASSERT(expectedQuantile <= sampled[j]); @@ -729,11 +773,12 @@ void CNormalMeanPrecConjugateTest::testCdf() { CPPUNIT_ASSERT(filter.minusLogJointCdf(TDouble1Vec(1, x), lowerBound, upperBound)); double f = (lowerBound + upperBound) / 2.0; - CPPUNIT_ASSERT(filter.minusLogJointCdfComplement(TDouble1Vec(1, x), lowerBound, upperBound)); + CPPUNIT_ASSERT(filter.minusLogJointCdfComplement( + TDouble1Vec(1, x), lowerBound, upperBound)); double fComplement = (lowerBound + upperBound) / 2.0; - LOG_DEBUG(<< "log(F(x)) = " << (f == 0.0 ? f : -f) - << ", log(1 - F(x)) = " << (fComplement == 0.0 ? fComplement : -fComplement)); + LOG_DEBUG(<< "log(F(x)) = " << (f == 0.0 ? f : -f) << ", log(1 - F(x)) = " + << (fComplement == 0.0 ? fComplement : -fComplement)); CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, std::exp(-f) + std::exp(-fComplement), 1e-10); } } @@ -788,15 +833,19 @@ void CNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples() { double fx; filter.jointLogMarginalLikelihood(sample, fx); - double px = static_cast(std::lower_bound(likelihoods.begin(), likelihoods.end(), fx) - likelihoods.begin()) / + double px = static_cast(std::lower_bound(likelihoods.begin(), + likelihoods.end(), fx) - + likelihoods.begin()) / static_cast(likelihoods.size()); double lb, ub; filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, sample, lb, ub); - double ssd = std::sqrt(px * (1.0 - px) / static_cast(samples.size())); + double ssd = std::sqrt(px * (1.0 - px) / + static_cast(samples.size())); - LOG_DEBUG(<< "expected P(x) = " << px << ", actual P(x) = " << (lb + ub) / 2.0 << " sample sd = " << ssd); + LOG_DEBUG(<< "expected P(x) = " << px << ", actual P(x) = " + << (lb + ub) / 2.0 << " sample sd = " << ssd); CPPUNIT_ASSERT_DOUBLES_EQUAL(px, (lb + ub) / 2.0, 3.0 * ssd); @@ -806,7 +855,8 @@ void CNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples() { maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); for (std::size_t k = 0u; k < boost::size(vs); ++k) { - double mode = filter.marginalLikelihoodMode(weightStyle, TDouble4Vec(1, vs[k])); + double mode = filter.marginalLikelihoodMode(weightStyle, + TDouble4Vec(1, vs[k])); double ss[] = {0.9 * mode, 1.1 * mode}; LOG_DEBUG(<< "vs = " << vs[k] << ", mode = " << mode); @@ -816,57 +866,42 @@ void CNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples() { { filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(1, ss[0]), TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, weightStyle, TDouble1Vec(1, ss[0]), + TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); if (mode > 0.0) { - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - weightStyle, - TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, - ub, - tail); + filter.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, weightStyle, TDouble1Vec(ss, ss + 2), + TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); - filter.probabilityOfLessLikelySamples(maths_t::E_OneSidedBelow, - weightStyle, - TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, - ub, - tail); + filter.probabilityOfLessLikelySamples( + maths_t::E_OneSidedBelow, weightStyle, + TDouble1Vec(ss, ss + 2), + TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); - filter.probabilityOfLessLikelySamples(maths_t::E_OneSidedAbove, - weightStyle, - TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, - ub, - tail); + filter.probabilityOfLessLikelySamples( + maths_t::E_OneSidedAbove, weightStyle, + TDouble1Vec(ss, ss + 2), + TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } } if (mode > 0.0) { filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(1, ss[1]), TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, weightStyle, TDouble1Vec(1, ss[1]), + TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(ss, ss + 2), TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, weightStyle, TDouble1Vec(ss, ss + 2), + TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); - filter.probabilityOfLessLikelySamples(maths_t::E_OneSidedBelow, - weightStyle, - TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, - ub, - tail); + filter.probabilityOfLessLikelySamples( + maths_t::E_OneSidedBelow, weightStyle, TDouble1Vec(ss, ss + 2), + TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); - filter.probabilityOfLessLikelySamples(maths_t::E_OneSidedAbove, - weightStyle, - TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, - ub, - tail); + filter.probabilityOfLessLikelySamples( + maths_t::E_OneSidedAbove, weightStyle, TDouble1Vec(ss, ss + 2), + TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } } @@ -921,7 +956,8 @@ void CNormalMeanPrecConjugateTest::testAnomalyScore() { rng.generateNormalSamples(means[i], variances[j], 500, samples); for (std::size_t k = 0; k < boost::size(decayRates); ++k) { - CNormalMeanPrecConjugate filter(makePrior(maths_t::E_ContinuousData, decayRates[k])); + CNormalMeanPrecConjugate filter( + makePrior(maths_t::E_ContinuousData, decayRates[k])); ++test; @@ -934,7 +970,8 @@ void CNormalMeanPrecConjugateTest::testAnomalyScore() { for (unsigned int time = 0; time < samples.size(); ++time) { double sample = samples[time] + - (anomalies[std::find(boost::begin(anomalyTimes), boost::end(anomalyTimes), time) - boost::begin(anomalyTimes)] * + (anomalies[std::find(boost::begin(anomalyTimes), boost::end(anomalyTimes), time) - + boost::begin(anomalyTimes)] * boost::math::standard_deviation(normal)); TDouble1Vec sampleVec(1, sample); @@ -954,28 +991,29 @@ void CNormalMeanPrecConjugateTest::testAnomalyScore() { x << "];\n"; scores << "];\n"; - file << x.str() << scores.str() << "plot(x" << test << ", score" << test << ");\n" + file << x.str() << scores.str() << "plot(x" << test << ", score" + << test << ");\n" << "input(\"Hit any key for next test\");\n\n"; TUIntVec falsePositives; std::set_difference(candidateAnomalies.begin(), candidateAnomalies.end(), - boost::begin(anomalyTimes), - boost::end(anomalyTimes), + boost::begin(anomalyTimes), boost::end(anomalyTimes), std::back_inserter(falsePositives)); - double falsePositiveRate = static_cast(falsePositives.size()) / static_cast(samples.size()); + double falsePositiveRate = static_cast(falsePositives.size()) / + static_cast(samples.size()); totalFalsePositiveRate += falsePositiveRate; TUIntVec positives; - std::set_intersection(candidateAnomalies.begin(), - candidateAnomalies.end(), - boost::begin(anomalyTimes), - boost::end(anomalyTimes), - std::back_inserter(positives)); + std::set_intersection( + candidateAnomalies.begin(), candidateAnomalies.end(), + boost::begin(anomalyTimes), boost::end(anomalyTimes), + std::back_inserter(positives)); - LOG_DEBUG(<< "falsePositiveRate = " << falsePositiveRate << ", positives = " << positives.size()); + LOG_DEBUG(<< "falsePositiveRate = " << falsePositiveRate + << ", positives = " << positives.size()); // False alarm rate should be less than 0.6%. CPPUNIT_ASSERT(falsePositiveRate <= 0.006); @@ -1060,10 +1098,12 @@ void CNormalMeanPrecConjugateTest::testIntegerData() { } LOG_DEBUG(<< "meanLogLikelihood1 = " << maths::CBasicStatistics::mean(meanLogLikelihood1) - << ", meanLogLikelihood2 = " << maths::CBasicStatistics::mean(meanLogLikelihood2)); + << ", meanLogLikelihood2 = " + << maths::CBasicStatistics::mean(meanLogLikelihood2)); CPPUNIT_ASSERT_DOUBLES_EQUAL( - maths::CBasicStatistics::mean(meanLogLikelihood1), maths::CBasicStatistics::mean(meanLogLikelihood2), 0.02); + maths::CBasicStatistics::mean(meanLogLikelihood1), + maths::CBasicStatistics::mean(meanLogLikelihood2), 0.02); } { @@ -1084,14 +1124,16 @@ void CNormalMeanPrecConjugateTest::testIntegerData() { TDouble1Vec sample(1, x); double l1, u1; - CPPUNIT_ASSERT(filter1.probabilityOfLessLikelySamples(maths_t::E_TwoSided, sample, l1, u1)); + CPPUNIT_ASSERT(filter1.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, sample, l1, u1)); CPPUNIT_ASSERT_EQUAL(l1, u1); double p1 = (l1 + u1) / 2.0; meanProbability1.add(p1); sample[0] += uniform[i]; double l2, u2; - CPPUNIT_ASSERT(filter2.probabilityOfLessLikelySamples(maths_t::E_TwoSided, sample, l2, u2)); + CPPUNIT_ASSERT(filter2.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, sample, l2, u2)); CPPUNIT_ASSERT_EQUAL(l2, u2); double p2 = (l2 + u2) / 2.0; meanProbability2.add(p2); @@ -1130,8 +1172,10 @@ void CNormalMeanPrecConjugateTest::testLowVariationData() { TDoubleDoublePr interval = filter.marginalLikelihoodConfidenceInterval(68.0); double sigma = (interval.second - interval.first) / 2.0; - LOG_DEBUG(<< "68% confidence interval " << core::CContainerPrinter::print(interval) << ", approximate s.t.d. = " << sigma); - CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0 / maths::MINIMUM_COEFFICIENT_OF_VARIATION / 430.5, 1.0 / sigma, 7.0); + LOG_DEBUG(<< "68% confidence interval " << core::CContainerPrinter::print(interval) + << ", approximate s.t.d. = " << sigma); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + 1.0 / maths::MINIMUM_COEFFICIENT_OF_VARIATION / 430.5, 1.0 / sigma, 7.0); } } @@ -1152,8 +1196,9 @@ void CNormalMeanPrecConjugateTest::testPersist() { maths::CNormalMeanPrecConjugate origFilter(makePrior()); for (std::size_t i = 0u; i < samples.size(); ++i) { - origFilter.addSamples( - maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), TDouble1Vec(1, samples[i]), TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0))); + origFilter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), + TDouble1Vec(1, samples[i]), + TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0))); } double decayRate = origFilter.decayRate(); uint64_t checksum = origFilter.checksum(); @@ -1172,14 +1217,13 @@ void CNormalMeanPrecConjugateTest::testPersist() { CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - maths::SDistributionRestoreParams params(maths_t::E_ContinuousData, - decayRate + 0.1, - maths::MINIMUM_CLUSTER_SPLIT_FRACTION, - maths::MINIMUM_CLUSTER_SPLIT_COUNT, - maths::MINIMUM_CATEGORY_COUNT); + maths::SDistributionRestoreParams params( + maths_t::E_ContinuousData, decayRate + 0.1, maths::MINIMUM_CLUSTER_SPLIT_FRACTION, + maths::MINIMUM_CLUSTER_SPLIT_COUNT, maths::MINIMUM_CATEGORY_COUNT); maths::CNormalMeanPrecConjugate restoredFilter(params, traverser); - LOG_DEBUG(<< "orig checksum = " << checksum << " restored checksum = " << restoredFilter.checksum()); + LOG_DEBUG(<< "orig checksum = " << checksum + << " restored checksum = " << restoredFilter.checksum()); CPPUNIT_ASSERT_EQUAL(checksum, restoredFilter.checksum()); // The XML representation of the new filter should be the same as the original @@ -1237,7 +1281,8 @@ void CNormalMeanPrecConjugateTest::testSeasonalVarianceScale() { double points[] = {m - 3.0 * s, m - s, m, m + s, m + 3.0 * s}; double unscaledExpectationVariance; - filter.expectation(CVarianceKernel(filter.marginalLikelihoodMean()), 100, unscaledExpectationVariance); + filter.expectation(CVarianceKernel(filter.marginalLikelihoodMean()), + 100, unscaledExpectationVariance); LOG_DEBUG(<< "unscaledExpectationVariance = " << unscaledExpectationVariance); for (std::size_t k = 0u; k < boost::size(varianceScales); ++k) { @@ -1251,27 +1296,34 @@ void CNormalMeanPrecConjugateTest::testSeasonalVarianceScale() { LOG_DEBUG(<< "Z = " << Z); CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, Z, 1e-3); - LOG_DEBUG(<< "sv = " << filter.marginalLikelihoodVariance(weightStyle, weight)); + LOG_DEBUG(<< "sv = " + << filter.marginalLikelihoodVariance(weightStyle, weight)); double expectationVariance; - filter.expectation(CVarianceKernel(filter.marginalLikelihoodMean()), 100, expectationVariance, weightStyle, weight); + filter.expectation(CVarianceKernel(filter.marginalLikelihoodMean()), + 100, expectationVariance, weightStyle, weight); LOG_DEBUG(<< "expectationVariance = " << expectationVariance); CPPUNIT_ASSERT_DOUBLES_EQUAL( - vs * unscaledExpectationVariance, expectationVariance, 0.01 * vs * unscaledExpectationVariance); - CPPUNIT_ASSERT_DOUBLES_EQUAL(filter.marginalLikelihoodVariance(weightStyle, weight), - expectationVariance, - 0.01 * filter.marginalLikelihoodVariance(weightStyle, weight)); + vs * unscaledExpectationVariance, expectationVariance, + 0.01 * vs * unscaledExpectationVariance); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + filter.marginalLikelihoodVariance(weightStyle, weight), expectationVariance, + 0.01 * filter.marginalLikelihoodVariance(weightStyle, weight)); double mode = filter.marginalLikelihoodMode(weightStyle, weight); double fm; double fmMinusEps, fmPlusEps; - filter.jointLogMarginalLikelihood(weightStyle, TDouble1Vec(1, mode - 1e-3), weights, fmMinusEps); - filter.jointLogMarginalLikelihood(weightStyle, TDouble1Vec(1, mode), weights, fm); - filter.jointLogMarginalLikelihood(weightStyle, TDouble1Vec(1, mode + 1e-3), weights, fmPlusEps); + filter.jointLogMarginalLikelihood( + weightStyle, TDouble1Vec(1, mode - 1e-3), weights, fmMinusEps); + filter.jointLogMarginalLikelihood( + weightStyle, TDouble1Vec(1, mode), weights, fm); + filter.jointLogMarginalLikelihood( + weightStyle, TDouble1Vec(1, mode + 1e-3), weights, fmPlusEps); LOG_DEBUG(<< "log(f(mode)) = " << fm << ", log(f(mode - eps)) = " << fmMinusEps << ", log(f(mode + eps)) = " << fmPlusEps); CPPUNIT_ASSERT(fm > fmMinusEps); CPPUNIT_ASSERT(fm > fmPlusEps); - CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, (std::exp(fmPlusEps) - std::exp(fmMinusEps)) / 2e-3, 1e-6); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + 0.0, (std::exp(fmPlusEps) - std::exp(fmMinusEps)) / 2e-3, 1e-6); TDouble1Vec sample(1, 0.0); for (std::size_t l = 0u; l < boost::size(points); ++l) { TDouble1Vec x(1, points[l]); @@ -1284,9 +1336,13 @@ void CNormalMeanPrecConjugateTest::testSeasonalVarianceScale() { double FxPlusEps = std::exp(-(lb + ub) / 2.0); filter.minusLogJointCdf(weightStyle, xMinusEps, weights, lb, ub); double FxMinusEps = std::exp(-(lb + ub) / 2.0); - LOG_DEBUG(<< "x = " << points[l] << ", log(f(x)) = " << fx << ", F(x - eps) = " << FxMinusEps - << ", F(x + eps) = " << FxPlusEps << ", log(dF/dx)) = " << std::log((FxPlusEps - FxMinusEps) / 2e-3)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(fx, std::log((FxPlusEps - FxMinusEps) / 2e-3), 0.05 * std::fabs(fx)); + LOG_DEBUG(<< "x = " << points[l] << ", log(f(x)) = " << fx + << ", F(x - eps) = " << FxMinusEps + << ", F(x + eps) = " << FxPlusEps << ", log(dF/dx)) = " + << std::log((FxPlusEps - FxMinusEps) / 2e-3)); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + fx, std::log((FxPlusEps - FxMinusEps) / 2e-3), + 0.05 * std::fabs(fx)); sample[0] = m + (points[l] - m) / std::sqrt(vs); weights[0][0] = 1.0; @@ -1294,7 +1350,8 @@ void CNormalMeanPrecConjugateTest::testSeasonalVarianceScale() { double expectedUpperBound; maths_t::ETail expectedTail; filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, sample, weights, expectedLowerBound, expectedUpperBound, expectedTail); + maths_t::E_TwoSided, weightStyle, sample, weights, + expectedLowerBound, expectedUpperBound, expectedTail); sample[0] = points[l]; weights[0][0] = vs; @@ -1302,7 +1359,8 @@ void CNormalMeanPrecConjugateTest::testSeasonalVarianceScale() { double upperBound; maths_t::ETail tail; filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, sample, weights, lowerBound, upperBound, tail); + maths_t::E_TwoSided, weightStyle, sample, weights, + lowerBound, upperBound, tail); LOG_DEBUG(<< "expectedLowerBound = " << expectedLowerBound); LOG_DEBUG(<< "lowerBound = " << lowerBound); @@ -1313,12 +1371,16 @@ void CNormalMeanPrecConjugateTest::testSeasonalVarianceScale() { if ((expectedLowerBound + expectedUpperBound) < 0.02) { CPPUNIT_ASSERT_DOUBLES_EQUAL( - std::log(expectedLowerBound), std::log(lowerBound), 0.1 * std::fabs(std::log(expectedLowerBound))); + std::log(expectedLowerBound), std::log(lowerBound), + 0.1 * std::fabs(std::log(expectedLowerBound))); CPPUNIT_ASSERT_DOUBLES_EQUAL( - std::log(expectedUpperBound), std::log(upperBound), 0.1 * std::fabs(std::log(expectedUpperBound))); + std::log(expectedUpperBound), std::log(upperBound), + 0.1 * std::fabs(std::log(expectedUpperBound))); } else { - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedLowerBound, lowerBound, 0.01 * expectedLowerBound); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedUpperBound, upperBound, 0.01 * expectedUpperBound); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedLowerBound, lowerBound, + 0.01 * expectedLowerBound); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedUpperBound, upperBound, + 0.01 * expectedUpperBound); } CPPUNIT_ASSERT_EQUAL(expectedTail, tail); } @@ -1378,7 +1440,8 @@ void CNormalMeanPrecConjugateTest::testCountVarianceScale() { LOG_DEBUG(<< ""); LOG_DEBUG(<< "****** probabilityOfLessLikelySamples ******"); - const double percentiles[] = {10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0}; + const double percentiles[] = {10.0, 20.0, 30.0, 40.0, 50.0, + 60.0, 70.0, 80.0, 90.0}; const std::size_t nSamples[] = {30u, 1000u}; const std::size_t nScaledSamples = 10000u; @@ -1409,7 +1472,8 @@ void CNormalMeanPrecConjugateTest::testCountVarianceScale() { TDouble1Vec sample(1, unscaledSamples[j]); double lowerBound, upperBound; - CPPUNIT_ASSERT(filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, sample, lowerBound, upperBound)); + CPPUNIT_ASSERT(filter.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, sample, lowerBound, upperBound)); CPPUNIT_ASSERT_EQUAL(lowerBound, upperBound); double probability = (lowerBound + upperBound) / 2.0; probabilities.push_back(probability); @@ -1417,7 +1481,8 @@ void CNormalMeanPrecConjugateTest::testCountVarianceScale() { std::sort(probabilities.begin(), probabilities.end()); for (std::size_t j = 0; j < boost::size(percentiles); ++j) { - std::size_t index = static_cast(static_cast(nScaledSamples) * percentiles[j] / 100.0); + std::size_t index = static_cast( + static_cast(nScaledSamples) * percentiles[j] / 100.0); double error = std::fabs(probabilities[index] - percentiles[j] / 100.0); expectedPercentileErrors.push_back(error); expectedTotalError += error; @@ -1428,7 +1493,8 @@ void CNormalMeanPrecConjugateTest::testCountVarianceScale() { LOG_DEBUG(<< "**** variance scale = " << varianceScales[j] << " ****"); TDoubleVec scaledSamples; - rng.generateNormalSamples(mean, varianceScales[j] * variance, nScaledSamples, scaledSamples); + rng.generateNormalSamples(mean, varianceScales[j] * variance, + nScaledSamples, scaledSamples); TDoubleVec probabilities; probabilities.reserve(nScaledSamples); @@ -1436,13 +1502,12 @@ void CNormalMeanPrecConjugateTest::testCountVarianceScale() { double lowerBound, upperBound; maths_t::ETail tail; - CPPUNIT_ASSERT(filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - maths_t::TWeightStyleVec(1, maths_t::E_SampleCountVarianceScaleWeight), - TDouble1Vec(1, scaledSamples[k]), - TDouble4Vec1Vec(1, TDouble4Vec(1, varianceScales[j])), - lowerBound, - upperBound, - tail)); + CPPUNIT_ASSERT(filter.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, + maths_t::TWeightStyleVec(1, maths_t::E_SampleCountVarianceScaleWeight), + TDouble1Vec(1, scaledSamples[k]), + TDouble4Vec1Vec(1, TDouble4Vec(1, varianceScales[j])), + lowerBound, upperBound, tail)); CPPUNIT_ASSERT_EQUAL(lowerBound, upperBound); double probability = (lowerBound + upperBound) / 2.0; probabilities.push_back(probability); @@ -1451,12 +1516,15 @@ void CNormalMeanPrecConjugateTest::testCountVarianceScale() { double totalError = 0.0; for (std::size_t k = 0; k < boost::size(percentiles); ++k) { - std::size_t index = static_cast(static_cast(nScaledSamples) * percentiles[k] / 100.0); + std::size_t index = static_cast( + static_cast(nScaledSamples) * percentiles[k] / 100.0); double error = fabs(probabilities[index] - percentiles[k] / 100.0); totalError += error; - double errorThreshold = percentileErrorTolerances[i] + expectedPercentileErrors[k]; + double errorThreshold = percentileErrorTolerances[i] + + expectedPercentileErrors[k]; - LOG_DEBUG(<< "percentile = " << percentiles[k] << ", probability = " << probabilities[index] << ", error = " << error + LOG_DEBUG(<< "percentile = " << percentiles[k] << ", probability = " + << probabilities[index] << ", error = " << error << ", error threshold = " << errorThreshold); CPPUNIT_ASSERT(error < errorThreshold); @@ -1464,7 +1532,8 @@ void CNormalMeanPrecConjugateTest::testCountVarianceScale() { double totalErrorThreshold = totalErrorTolerances[i] + expectedTotalError; - LOG_DEBUG(<< "totalError = " << totalError << ", totalError threshold = " << totalErrorThreshold); + LOG_DEBUG(<< "totalError = " << totalError + << ", totalError threshold = " << totalErrorThreshold); CPPUNIT_ASSERT(totalError < totalErrorThreshold); totalTotalError += totalError; @@ -1482,7 +1551,8 @@ void CNormalMeanPrecConjugateTest::testCountVarianceScale() { for (std::size_t i = 0; i < boost::size(varianceScales); ++i) { LOG_DEBUG(<< "**** variance scale = " << varianceScales[i] << " ****"); - boost::math::normal_distribution<> normal(mean, std::sqrt(varianceScales[i] * variance)); + boost::math::normal_distribution<> normal( + mean, std::sqrt(varianceScales[i] * variance)); double expectedDifferentialEntropy = maths::CTools::differentialEntropy(normal); CNormalMeanPrecConjugate filter(makePrior()); @@ -1497,17 +1567,19 @@ void CNormalMeanPrecConjugateTest::testCountVarianceScale() { rng.generateNormalSamples(mean, varianceScales[i] * variance, 10000, scaledSamples); for (std::size_t j = 0u; j < scaledSamples.size(); ++j) { double logLikelihood = 0.0; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, - filter.jointLogMarginalLikelihood(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountVarianceScaleWeight), - TDouble1Vec(1, scaledSamples[j]), - TDouble4Vec1Vec(1, TDouble4Vec(1, varianceScales[i])), - logLikelihood)); + CPPUNIT_ASSERT_EQUAL( + maths_t::E_FpNoErrors, + filter.jointLogMarginalLikelihood( + maths_t::TWeightStyleVec(1, maths_t::E_SampleCountVarianceScaleWeight), + TDouble1Vec(1, scaledSamples[j]), + TDouble4Vec1Vec(1, TDouble4Vec(1, varianceScales[i])), logLikelihood)); differentialEntropy -= logLikelihood; } differentialEntropy /= static_cast(scaledSamples.size()); - LOG_DEBUG(<< "differentialEntropy = " << differentialEntropy << ", expectedDifferentialEntropy = " << expectedDifferentialEntropy); + LOG_DEBUG(<< "differentialEntropy = " << differentialEntropy + << ", expectedDifferentialEntropy = " << expectedDifferentialEntropy); CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedDifferentialEntropy, differentialEntropy, 0.03); } @@ -1520,7 +1592,8 @@ void CNormalMeanPrecConjugateTest::testCountVarianceScale() { // the variance is correctly estimated if we compensate using a // variance scale. - const double testIntervals[] = {50.0, 60.0, 70.0, 80.0, 85.0, 90.0, 95.0, 99.0}; + const double testIntervals[] = {50.0, 60.0, 70.0, 80.0, + 85.0, 90.0, 95.0, 99.0}; unsigned int errors[] = {0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u}; maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); @@ -1538,8 +1611,10 @@ void CNormalMeanPrecConjugateTest::testCountVarianceScale() { } for (std::size_t i = 0; i < boost::size(testIntervals); ++i) { - TDoubleDoublePr confidenceInterval = filter.confidenceIntervalPrecision(testIntervals[i]); - if (precision < confidenceInterval.first || precision > confidenceInterval.second) { + TDoubleDoublePr confidenceInterval = + filter.confidenceIntervalPrecision(testIntervals[i]); + if (precision < confidenceInterval.first || + precision > confidenceInterval.second) { ++errors[i]; } } @@ -1547,7 +1622,8 @@ void CNormalMeanPrecConjugateTest::testCountVarianceScale() { for (std::size_t i = 0; i < boost::size(testIntervals); ++i) { double interval = 100.0 * errors[i] / 1000.0; - LOG_DEBUG(<< "interval = " << interval << ", expectedInterval = " << (100.0 - testIntervals[i])); + LOG_DEBUG(<< "interval = " << interval + << ", expectedInterval = " << (100.0 - testIntervals[i])); CPPUNIT_ASSERT_DOUBLES_EQUAL(interval, (100.0 - testIntervals[i]), 4.0); } } @@ -1555,41 +1631,55 @@ void CNormalMeanPrecConjugateTest::testCountVarianceScale() { CppUnit::Test* CNormalMeanPrecConjugateTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CNormalMeanPrecConjugateTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CNormalMeanPrecConjugateTest::testMultipleUpdate", - &CNormalMeanPrecConjugateTest::testMultipleUpdate)); - suiteOfTests->addTest(new CppUnit::TestCaller("CNormalMeanPrecConjugateTest::testPropagation", - &CNormalMeanPrecConjugateTest::testPropagation)); - suiteOfTests->addTest(new CppUnit::TestCaller("CNormalMeanPrecConjugateTest::testMeanEstimation", - &CNormalMeanPrecConjugateTest::testMeanEstimation)); - suiteOfTests->addTest(new CppUnit::TestCaller("CNormalMeanPrecConjugateTest::testPrecisionEstimation", - &CNormalMeanPrecConjugateTest::testPrecisionEstimation)); - suiteOfTests->addTest(new CppUnit::TestCaller("CNormalMeanPrecConjugateTest::testMarginalLikelihood", - &CNormalMeanPrecConjugateTest::testMarginalLikelihood)); - suiteOfTests->addTest(new CppUnit::TestCaller("CNormalMeanPrecConjugateTest::testMarginalLikelihoodMean", - &CNormalMeanPrecConjugateTest::testMarginalLikelihoodMean)); - suiteOfTests->addTest(new CppUnit::TestCaller("CNormalMeanPrecConjugateTest::testMarginalLikelihoodMode", - &CNormalMeanPrecConjugateTest::testMarginalLikelihoodMode)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CNormalMeanPrecConjugateTest::testMarginalLikelihoodVariance", &CNormalMeanPrecConjugateTest::testMarginalLikelihoodVariance)); + "CNormalMeanPrecConjugateTest::testMultipleUpdate", + &CNormalMeanPrecConjugateTest::testMultipleUpdate)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CNormalMeanPrecConjugateTest::testPropagation", + &CNormalMeanPrecConjugateTest::testPropagation)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CNormalMeanPrecConjugateTest::testMeanEstimation", + &CNormalMeanPrecConjugateTest::testMeanEstimation)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CNormalMeanPrecConjugateTest::testPrecisionEstimation", + &CNormalMeanPrecConjugateTest::testPrecisionEstimation)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CNormalMeanPrecConjugateTest::testMarginalLikelihood", + &CNormalMeanPrecConjugateTest::testMarginalLikelihood)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CNormalMeanPrecConjugateTest::testMarginalLikelihoodMean", + &CNormalMeanPrecConjugateTest::testMarginalLikelihoodMean)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CNormalMeanPrecConjugateTest::testMarginalLikelihoodMode", + &CNormalMeanPrecConjugateTest::testMarginalLikelihoodMode)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CNormalMeanPrecConjugateTest::testMarginalLikelihoodVariance", + &CNormalMeanPrecConjugateTest::testMarginalLikelihoodVariance)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CNormalMeanPrecConjugateTest::testSampleMarginalLikelihood", + &CNormalMeanPrecConjugateTest::testSampleMarginalLikelihood)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CNormalMeanPrecConjugateTest::testCdf", &CNormalMeanPrecConjugateTest::testCdf)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples", + &CNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CNormalMeanPrecConjugateTest::testAnomalyScore", + &CNormalMeanPrecConjugateTest::testAnomalyScore)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CNormalMeanPrecConjugateTest::testIntegerData", + &CNormalMeanPrecConjugateTest::testIntegerData)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CNormalMeanPrecConjugateTest::testLowVariationData", + &CNormalMeanPrecConjugateTest::testLowVariationData)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CNormalMeanPrecConjugateTest::testPersist", &CNormalMeanPrecConjugateTest::testPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CNormalMeanPrecConjugateTest::testSeasonalVarianceScale", + &CNormalMeanPrecConjugateTest::testSeasonalVarianceScale)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CNormalMeanPrecConjugateTest::testSampleMarginalLikelihood", &CNormalMeanPrecConjugateTest::testSampleMarginalLikelihood)); - suiteOfTests->addTest(new CppUnit::TestCaller("CNormalMeanPrecConjugateTest::testCdf", - &CNormalMeanPrecConjugateTest::testCdf)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples", - &CNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples)); - suiteOfTests->addTest(new CppUnit::TestCaller("CNormalMeanPrecConjugateTest::testAnomalyScore", - &CNormalMeanPrecConjugateTest::testAnomalyScore)); - suiteOfTests->addTest(new CppUnit::TestCaller("CNormalMeanPrecConjugateTest::testIntegerData", - &CNormalMeanPrecConjugateTest::testIntegerData)); - suiteOfTests->addTest(new CppUnit::TestCaller("CNormalMeanPrecConjugateTest::testLowVariationData", - &CNormalMeanPrecConjugateTest::testLowVariationData)); - suiteOfTests->addTest(new CppUnit::TestCaller("CNormalMeanPrecConjugateTest::testPersist", - &CNormalMeanPrecConjugateTest::testPersist)); - suiteOfTests->addTest(new CppUnit::TestCaller("CNormalMeanPrecConjugateTest::testSeasonalVarianceScale", - &CNormalMeanPrecConjugateTest::testSeasonalVarianceScale)); - suiteOfTests->addTest(new CppUnit::TestCaller("CNormalMeanPrecConjugateTest::testCountVarianceScale", - &CNormalMeanPrecConjugateTest::testCountVarianceScale)); + "CNormalMeanPrecConjugateTest::testCountVarianceScale", + &CNormalMeanPrecConjugateTest::testCountVarianceScale)); return suiteOfTests; } diff --git a/lib/maths/unittest/COneOfNPriorTest.cc b/lib/maths/unittest/COneOfNPriorTest.cc index 82ecf0ab5b..cb944b2888 100644 --- a/lib/maths/unittest/COneOfNPriorTest.cc +++ b/lib/maths/unittest/COneOfNPriorTest.cc @@ -58,7 +58,8 @@ using CNormalMeanPrecConjugate = CPriorTestInterfaceMixin; using CPoissonMeanConjugate = CPriorTestInterfaceMixin; -COneOfNPrior::TPriorPtrVec clone(const TPriorPtrVec& models, const TOptionalDouble& decayRate = TOptionalDouble()) { +COneOfNPrior::TPriorPtrVec clone(const TPriorPtrVec& models, + const TOptionalDouble& decayRate = TOptionalDouble()) { COneOfNPrior::TPriorPtrVec result; result.reserve(models.size()); for (std::size_t i = 0u; i < models.size(); ++i) { @@ -90,10 +91,14 @@ void COneOfNPriorTest::testFilter() { LOG_DEBUG(<< "+--------------------------------+"); TPriorPtrVec models; - models.push_back(TPriorPtr(maths::CGammaRateConjugate::nonInformativePrior(E_ContinuousData).clone())); - models.push_back(TPriorPtr(maths::CLogNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData).clone())); - models.push_back(TPriorPtr(maths::CNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData).clone())); - models.push_back(TPriorPtr(maths::CPoissonMeanConjugate::nonInformativePrior().clone())); + models.push_back(TPriorPtr( + maths::CGammaRateConjugate::nonInformativePrior(E_ContinuousData).clone())); + models.push_back(TPriorPtr(maths::CLogNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData) + .clone())); + models.push_back(TPriorPtr( + maths::CNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData).clone())); + models.push_back( + TPriorPtr(maths::CPoissonMeanConjugate::nonInformativePrior().clone())); COneOfNPrior filter(maths::COneOfNPrior(clone(models), E_ContinuousData)); @@ -113,13 +118,15 @@ void COneOfNPriorTest::testFilter() { CPPUNIT_ASSERT_EQUAL(std::size_t(4), filter.models().size()); - filter.removeModels(maths::CPrior::CModelFilter().remove(maths::CPrior::E_Poisson).remove(maths::CPrior::E_Gamma)); + filter.removeModels( + maths::CPrior::CModelFilter().remove(maths::CPrior::E_Poisson).remove(maths::CPrior::E_Gamma)); CPPUNIT_ASSERT_EQUAL(std::size_t(2), filter.models().size()); CPPUNIT_ASSERT_EQUAL(maths::CPrior::E_LogNormal, filter.models()[0]->type()); CPPUNIT_ASSERT_EQUAL(maths::CPrior::E_Normal, filter.models()[1]->type()); TDoubleVec weights = filter.weights(); - CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, std::accumulate(weights.begin(), weights.end(), 0.0), 1e-6); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + 1.0, std::accumulate(weights.begin(), weights.end(), 0.0), 1e-6); } void COneOfNPriorTest::testMultipleUpdate() { @@ -133,8 +140,10 @@ void COneOfNPriorTest::testMultipleUpdate() { using TEqual = maths::CEqualWithTolerance; TPriorPtrVec models; - models.push_back(TPriorPtr(maths::CPoissonMeanConjugate::nonInformativePrior().clone())); - models.push_back(TPriorPtr(maths::CNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData).clone())); + models.push_back( + TPriorPtr(maths::CPoissonMeanConjugate::nonInformativePrior().clone())); + models.push_back(TPriorPtr( + maths::CNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData).clone())); const double mean = 10.0; const double variance = 3.0; @@ -180,13 +189,17 @@ void COneOfNPriorTest::testMultipleUpdate() { COneOfNPrior::TPriorCPtrVec models2 = filter2.models(); CPPUNIT_ASSERT(models1.size() == models2.size()); - const maths::CPoissonMeanConjugate* poisson1 = dynamic_cast(models1[0]); - const maths::CPoissonMeanConjugate* poisson2 = dynamic_cast(models2[0]); + const maths::CPoissonMeanConjugate* poisson1 = + dynamic_cast(models1[0]); + const maths::CPoissonMeanConjugate* poisson2 = + dynamic_cast(models2[0]); CPPUNIT_ASSERT(poisson1 && poisson2); CPPUNIT_ASSERT(poisson1->equalTolerance(*poisson2, equal)); - const maths::CNormalMeanPrecConjugate* normal1 = dynamic_cast(models1[1]); - const maths::CNormalMeanPrecConjugate* normal2 = dynamic_cast(models2[1]); + const maths::CNormalMeanPrecConjugate* normal1 = + dynamic_cast(models1[1]); + const maths::CNormalMeanPrecConjugate* normal2 = + dynamic_cast(models2[1]); CPPUNIT_ASSERT(normal1 && normal2); CPPUNIT_ASSERT(normal1->equalTolerance(*normal2, equal)); @@ -215,7 +228,8 @@ void COneOfNPriorTest::testWeights() { { TPriorPtrVec models; models.push_back(TPriorPtr(CPoissonMeanConjugate::nonInformativePrior().clone())); - models.push_back(TPriorPtr(CNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData).clone())); + models.push_back(TPriorPtr( + CNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData).clone())); using TEqual = maths::CEqualWithTolerance; TEqual equal(maths::CToleranceTypes::E_AbsoluteTolerance, 1e-10); @@ -223,7 +237,8 @@ void COneOfNPriorTest::testWeights() { for (std::size_t rate = 0; rate < boost::size(decayRates); ++rate) { // Test that the filter weights stay normalized. - COneOfNPrior filter(maths::COneOfNPrior(clone(models, decayRates[rate]), E_ContinuousData, decayRates[rate])); + COneOfNPrior filter(maths::COneOfNPrior( + clone(models, decayRates[rate]), E_ContinuousData, decayRates[rate])); const double mean = 20.0; const double variance = 3.0; @@ -254,9 +269,12 @@ void COneOfNPriorTest::testWeights() { for (std::size_t decayRate = 0; decayRate < boost::size(decayRates); ++decayRate) { TPriorPtrVec models; - models.push_back(TPriorPtr(CPoissonMeanConjugate::nonInformativePrior().clone())); - models.push_back(TPriorPtr(CNormalMeanPrecConjugate::nonInformativePrior(E_IntegerData).clone())); - COneOfNPrior filter(maths::COneOfNPrior(clone(models, decayRates[decayRate]), E_IntegerData, decayRates[decayRate])); + models.push_back( + TPriorPtr(CPoissonMeanConjugate::nonInformativePrior().clone())); + models.push_back(TPriorPtr( + CNormalMeanPrecConjugate::nonInformativePrior(E_IntegerData).clone())); + COneOfNPrior filter(maths::COneOfNPrior( + clone(models, decayRates[decayRate]), E_IntegerData, decayRates[decayRate])); TUIntVec samples; rng.generatePoissonSamples(rate, 10000, samples); @@ -268,7 +286,8 @@ void COneOfNPriorTest::testWeights() { TDoubleVec logWeights = filter.logWeights(); - LOG_DEBUG(<< "log weights ratio = " << (logWeights[1] - logWeights[0]) / previousLogWeightRatio); + LOG_DEBUG(<< "log weights ratio = " + << (logWeights[1] - logWeights[0]) / previousLogWeightRatio); // Should be approximately 0.2: we reduce the filter memory // by a factor of 5 each iteration. @@ -295,7 +314,8 @@ void COneOfNPriorTest::testModels() { { TPriorPtrVec models; models.push_back(TPriorPtr(CPoissonMeanConjugate::nonInformativePrior().clone())); - models.push_back(TPriorPtr(CNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData).clone())); + models.push_back(TPriorPtr( + CNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData).clone())); // The mean of the Poisson model and the mean and variance of the // Normal model are all close to the rate r. @@ -313,12 +333,16 @@ void COneOfNPriorTest::testModels() { } COneOfNPrior::TPriorCPtrVec posteriorModels = filter.models(); - const maths::CPoissonMeanConjugate* poissonModel = dynamic_cast(posteriorModels[0]); - const maths::CNormalMeanPrecConjugate* normalModel = dynamic_cast(posteriorModels[1]); + const maths::CPoissonMeanConjugate* poissonModel = + dynamic_cast(posteriorModels[0]); + const maths::CNormalMeanPrecConjugate* normalModel = + dynamic_cast(posteriorModels[1]); CPPUNIT_ASSERT(poissonModel && normalModel); - LOG_DEBUG(<< "Poisson mean = " << poissonModel->priorMean() << ", expectedMean = " << rate); - LOG_DEBUG(<< "Normal mean = " << normalModel->mean() << ", expectedMean = " << mean << ", precision = " << normalModel->precision() + LOG_DEBUG(<< "Poisson mean = " << poissonModel->priorMean() + << ", expectedMean = " << rate); + LOG_DEBUG(<< "Normal mean = " << normalModel->mean() << ", expectedMean = " << mean + << ", precision = " << normalModel->precision() << ", expectedPrecision " << (1.0 / variance)); CPPUNIT_ASSERT(std::fabs(poissonModel->priorMean() - rate) / rate < 0.01); @@ -329,7 +353,8 @@ void COneOfNPriorTest::testModels() { { TPriorPtrVec models; models.push_back(TPriorPtr(CPoissonMeanConjugate::nonInformativePrior().clone())); - models.push_back(TPriorPtr(CNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData).clone())); + models.push_back(TPriorPtr( + CNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData).clone())); const double mean = 10.0; const double variance = 2.0; @@ -347,12 +372,16 @@ void COneOfNPriorTest::testModels() { } COneOfNPrior::TPriorCPtrVec posteriorModels = filter.models(); - const maths::CPoissonMeanConjugate* poissonModel = dynamic_cast(posteriorModels[0]); - const maths::CNormalMeanPrecConjugate* normalModel = dynamic_cast(posteriorModels[1]); + const maths::CPoissonMeanConjugate* poissonModel = + dynamic_cast(posteriorModels[0]); + const maths::CNormalMeanPrecConjugate* normalModel = + dynamic_cast(posteriorModels[1]); CPPUNIT_ASSERT(poissonModel && normalModel); - LOG_DEBUG(<< "Poisson mean = " << poissonModel->priorMean() << ", expectedMean = " << rate); - LOG_DEBUG(<< "Normal mean = " << normalModel->mean() << ", expectedMean = " << mean << ", precision = " << normalModel->precision() + LOG_DEBUG(<< "Poisson mean = " << poissonModel->priorMean() + << ", expectedMean = " << rate); + LOG_DEBUG(<< "Normal mean = " << normalModel->mean() << ", expectedMean = " << mean + << ", precision = " << normalModel->precision() << ", expectedPrecision " << (1.0 / variance)); CPPUNIT_ASSERT(std::fabs(poissonModel->priorMean() - rate) / rate < 0.01); @@ -380,7 +409,8 @@ void COneOfNPriorTest::testModelSelection() { TPriorPtrVec models; models.push_back(TPriorPtr(CPoissonMeanConjugate::nonInformativePrior().clone())); - models.push_back(TPriorPtr(CNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData).clone())); + models.push_back(TPriorPtr( + CNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData).clone())); const unsigned int nSamples = 10000u; const double rate = 2.0; @@ -402,14 +432,18 @@ void COneOfNPriorTest::testModelSelection() { filter.addSamples(TDouble1Vec(1, static_cast(samples[i]))); } - double expectedLogWeightRatio = (normalExpectedLogWeight - poissonExpectedLogWeight) * static_cast(nSamples); + double expectedLogWeightRatio = (normalExpectedLogWeight - poissonExpectedLogWeight) * + static_cast(nSamples); TDoubleVec logWeights = filter.logWeights(); double logWeightRatio = logWeights[1] - logWeights[0]; - LOG_DEBUG(<< "expectedLogWeightRatio = " << expectedLogWeightRatio << ", logWeightRatio = " << logWeightRatio); + LOG_DEBUG(<< "expectedLogWeightRatio = " << expectedLogWeightRatio + << ", logWeightRatio = " << logWeightRatio); - CPPUNIT_ASSERT(std::fabs(logWeightRatio - expectedLogWeightRatio) / std::fabs(expectedLogWeightRatio) < 0.05); + CPPUNIT_ASSERT(std::fabs(logWeightRatio - expectedLogWeightRatio) / + std::fabs(expectedLogWeightRatio) < + 0.05); } { @@ -431,7 +465,8 @@ void COneOfNPriorTest::testModelSelection() { TPriorPtrVec models; models.push_back(TPriorPtr(CPoissonMeanConjugate::nonInformativePrior().clone())); - models.push_back(TPriorPtr(CNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData).clone())); + models.push_back(TPriorPtr( + CNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData).clone())); const unsigned int nSamples[] = {1000u, 2000u, 3000u}; const double mean = 100.0; @@ -456,14 +491,18 @@ void COneOfNPriorTest::testModelSelection() { filter.addSamples(TDouble1Vec(1, samples[i])); } - double expectedLogWeightRatio = (poissonExpectedLogWeight - normalExpectedLogWeight) * static_cast(nSamples[n]); + double expectedLogWeightRatio = (poissonExpectedLogWeight - normalExpectedLogWeight) * + static_cast(nSamples[n]); TDoubleVec logWeights = filter.logWeights(); double logWeightRatio = logWeights[0] - logWeights[1]; - LOG_DEBUG(<< "expectedLogWeightRatio = " << expectedLogWeightRatio << ", logWeightRatio = " << logWeightRatio); + LOG_DEBUG(<< "expectedLogWeightRatio = " << expectedLogWeightRatio + << ", logWeightRatio = " << logWeightRatio); - CPPUNIT_ASSERT(std::fabs(logWeightRatio - expectedLogWeightRatio) / std::fabs(expectedLogWeightRatio) < 0.35); + CPPUNIT_ASSERT(std::fabs(logWeightRatio - expectedLogWeightRatio) / + std::fabs(expectedLogWeightRatio) < + 0.35); } } { @@ -479,14 +518,18 @@ void COneOfNPriorTest::testModelSelection() { rng.random_shuffle(samples.begin(), samples.end()); TPriorPtrVec models; - models.push_back(TPriorPtr(CNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData).clone())); - maths::CXMeansOnline1d clusterer( - maths_t::E_ContinuousData, maths::CAvailableModeDistributions::ALL, maths_t::E_ClustersFractionWeight); - maths::CNormalMeanPrecConjugate normal = maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); + models.push_back(TPriorPtr( + CNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData).clone())); + maths::CXMeansOnline1d clusterer(maths_t::E_ContinuousData, + maths::CAvailableModeDistributions::ALL, + maths_t::E_ClustersFractionWeight); + maths::CNormalMeanPrecConjugate normal = + maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); maths::COneOfNPrior::TPriorPtrVec mode; mode.push_back(COneOfNPrior::TPriorPtr(normal.clone())); - models.push_back(TPriorPtr( - new maths::CMultimodalPrior(maths_t::E_ContinuousData, clusterer, maths::COneOfNPrior(mode, maths_t::E_ContinuousData)))); + models.push_back(TPriorPtr(new maths::CMultimodalPrior( + maths_t::E_ContinuousData, clusterer, + maths::COneOfNPrior(mode, maths_t::E_ContinuousData)))); COneOfNPrior filter(maths::COneOfNPrior(clone(models), E_ContinuousData)); for (std::size_t i = 0u; i < samples.size(); ++i) { @@ -512,9 +555,12 @@ void COneOfNPriorTest::testMarginalLikelihood() { for (std::size_t t = 0u; t < boost::size(dataTypes); ++t) { TPriorPtrVec models; models.push_back(TPriorPtr(CPoissonMeanConjugate::nonInformativePrior().clone())); - models.push_back(TPriorPtr(CNormalMeanPrecConjugate::nonInformativePrior(dataTypes[t]).clone())); - models.push_back(TPriorPtr(CLogNormalMeanPrecConjugate::nonInformativePrior(dataTypes[t]).clone())); - models.push_back(TPriorPtr(CGammaRateConjugate::nonInformativePrior(dataTypes[t]).clone())); + models.push_back(TPriorPtr( + CNormalMeanPrecConjugate::nonInformativePrior(dataTypes[t]).clone())); + models.push_back(TPriorPtr( + CLogNormalMeanPrecConjugate::nonInformativePrior(dataTypes[t]).clone())); + models.push_back(TPriorPtr( + CGammaRateConjugate::nonInformativePrior(dataTypes[t]).clone())); COneOfNPrior filter(maths::COneOfNPrior(clone(models), dataTypes[t])); const double location = 1.0; @@ -527,17 +573,16 @@ void COneOfNPriorTest::testMarginalLikelihood() { filter.addSamples(samples); maths_t::ESampleWeightStyle weightStyles[] = { - maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight, maths_t::E_SampleCountWeight}; + maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight, + maths_t::E_SampleCountWeight}; double weights[] = {0.1, 1.0, 10.0}; for (std::size_t i = 0u; i < boost::size(weightStyles); ++i) { for (std::size_t j = 0u; j < boost::size(weights); ++j) { double lb, ub; - filter.minusLogJointCdf(maths_t::TWeightStyleVec(1, weightStyles[i]), - TDouble1Vec(1, 10000.0), - TDouble4Vec1Vec(1, TDouble4Vec(1, weights[j])), - lb, - ub); + filter.minusLogJointCdf( + maths_t::TWeightStyleVec(1, weightStyles[i]), TDouble1Vec(1, 10000.0), + TDouble4Vec1Vec(1, TDouble4Vec(1, weights[j])), lb, ub); LOG_DEBUG(<< "-log(c.d.f) = " << (lb + ub) / 2.0); CPPUNIT_ASSERT(lb >= 0.0); CPPUNIT_ASSERT(ub >= 0.0); @@ -553,8 +598,10 @@ void COneOfNPriorTest::testMarginalLikelihood() { test::CRandomNumbers rng; TPriorPtrVec models; - models.push_back(TPriorPtr(CLogNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData).clone())); - models.push_back(TPriorPtr(CNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData).clone())); + models.push_back(TPriorPtr( + CLogNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData).clone())); + models.push_back(TPriorPtr( + CNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData).clone())); COneOfNPrior filter(maths::COneOfNPrior(models, E_ContinuousData)); @@ -574,7 +621,8 @@ void COneOfNPriorTest::testMarginalLikelihood() { double dx = (interval.second - interval.first) / 20.0; for (std::size_t j = 0u; j < 20; ++j, x += dx) { double fx; - CPPUNIT_ASSERT(filter.jointLogMarginalLikelihood(TDouble1Vec(1, x), fx) == maths_t::E_FpNoErrors); + CPPUNIT_ASSERT(filter.jointLogMarginalLikelihood(TDouble1Vec(1, x), fx) == + maths_t::E_FpNoErrors); fx = std::exp(fx); double lb; @@ -620,11 +668,14 @@ void COneOfNPriorTest::testMarginalLikelihoodMean() { for (std::size_t i = 0u; i < boost::size(means); ++i) { for (std::size_t j = 0u; j < boost::size(variances); ++j) { - LOG_DEBUG(<< "*** mean = " << means[i] << ", variance = " << variances[j] << " ***"); + LOG_DEBUG(<< "*** mean = " << means[i] + << ", variance = " << variances[j] << " ***"); TPriorPtrVec models; - models.push_back(TPriorPtr(CLogNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData).clone())); - models.push_back(TPriorPtr(CNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData).clone())); + models.push_back(TPriorPtr(CLogNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData) + .clone())); + models.push_back(TPriorPtr(CNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData) + .clone())); COneOfNPrior filter(maths::COneOfNPrior(models, E_ContinuousData)); @@ -642,11 +693,14 @@ void COneOfNPriorTest::testMarginalLikelihoodMean() { CPPUNIT_ASSERT(filter.marginalLikelihoodMeanForTest(expectedMean)); if (k % 10 == 0) { - LOG_DEBUG(<< "marginalLikelihoodMean = " << filter.marginalLikelihoodMean() << ", expectedMean = " << expectedMean); + LOG_DEBUG(<< "marginalLikelihoodMean = " << filter.marginalLikelihoodMean() + << ", expectedMean = " << expectedMean); } // The error is at the precision of the numerical integration. - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMean, filter.marginalLikelihoodMean(), 0.01 * expectedMean); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMean, + filter.marginalLikelihoodMean(), + 0.01 * expectedMean); } } } @@ -660,11 +714,14 @@ void COneOfNPriorTest::testMarginalLikelihoodMean() { for (std::size_t i = 0u; i < boost::size(locations); ++i) { for (std::size_t j = 0u; j < boost::size(squareScales); ++j) { - LOG_DEBUG(<< "*** location = " << locations[i] << ", squareScale = " << squareScales[j] << " ***"); + LOG_DEBUG(<< "*** location = " << locations[i] + << ", squareScale = " << squareScales[j] << " ***"); TPriorPtrVec models; - models.push_back(TPriorPtr(CLogNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData).clone())); - models.push_back(TPriorPtr(CNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData).clone())); + models.push_back(TPriorPtr(CLogNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData) + .clone())); + models.push_back(TPriorPtr(CNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData) + .clone())); COneOfNPrior filter(maths::COneOfNPrior(models, E_ContinuousData)); @@ -684,15 +741,20 @@ void COneOfNPriorTest::testMarginalLikelihoodMean() { CPPUNIT_ASSERT(filter.marginalLikelihoodMeanForTest(expectedMean)); if (k % 10 == 0) { - LOG_DEBUG(<< "marginalLikelihoodMean = " << filter.marginalLikelihoodMean() << ", expectedMean = " << expectedMean); + LOG_DEBUG(<< "marginalLikelihoodMean = " << filter.marginalLikelihoodMean() + << ", expectedMean = " << expectedMean); } - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMean, filter.marginalLikelihoodMean(), 0.2 * expectedMean); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMean, + filter.marginalLikelihoodMean(), + 0.2 * expectedMean); - relativeError.add(std::fabs(filter.marginalLikelihoodMean() - expectedMean) / expectedMean); + relativeError.add(std::fabs(filter.marginalLikelihoodMean() - expectedMean) / + expectedMean); } - LOG_DEBUG(<< "relativeError = " << maths::CBasicStatistics::mean(relativeError)); + LOG_DEBUG(<< "relativeError = " + << maths::CBasicStatistics::mean(relativeError)); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(relativeError) < 0.02); } } @@ -717,11 +779,14 @@ void COneOfNPriorTest::testMarginalLikelihoodMode() { for (std::size_t i = 0u; i < boost::size(means); ++i) { for (std::size_t j = 0u; j < boost::size(variances); ++j) { - LOG_DEBUG(<< "*** mean = " << means[i] << ", variance = " << variances[j] << " ***"); + LOG_DEBUG(<< "*** mean = " << means[i] + << ", variance = " << variances[j] << " ***"); TPriorPtrVec models; - models.push_back(TPriorPtr(CLogNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData).clone())); - models.push_back(TPriorPtr(CNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData).clone())); + models.push_back(TPriorPtr(CLogNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData) + .clone())); + models.push_back(TPriorPtr(CNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData) + .clone())); COneOfNPrior filter(maths::COneOfNPrior(models, E_ContinuousData)); @@ -735,14 +800,18 @@ void COneOfNPriorTest::testMarginalLikelihoodMode() { std::size_t iterations = 12; double mode; double fmode; - maths::CCompositeFunctions::CExp likelihood(filter); + maths::CCompositeFunctions::CExp likelihood( + filter); double a = means[i] - 2.0 * std::sqrt(variances[j]); double b = means[i] + 2.0 * std::sqrt(variances[j]); - maths::CSolvers::maximize(a, b, likelihood(a), likelihood(b), likelihood, 0.0, iterations, mode, fmode); + maths::CSolvers::maximize(a, b, likelihood(a), likelihood(b), + likelihood, 0.0, iterations, mode, fmode); - LOG_DEBUG(<< "marginalLikelihoodMode = " << filter.marginalLikelihoodMode() << ", expectedMode = " << mode); + LOG_DEBUG(<< "marginalLikelihoodMode = " << filter.marginalLikelihoodMode() + << ", expectedMode = " << mode); - CPPUNIT_ASSERT_DOUBLES_EQUAL(mode, filter.marginalLikelihoodMode(), 0.01 * mode); + CPPUNIT_ASSERT_DOUBLES_EQUAL(mode, filter.marginalLikelihoodMode(), + 0.01 * mode); } } } @@ -755,11 +824,14 @@ void COneOfNPriorTest::testMarginalLikelihoodMode() { for (std::size_t i = 0u; i < boost::size(locations); ++i) { for (std::size_t j = 0u; j < boost::size(squareScales); ++j) { - LOG_DEBUG(<< "*** location = " << locations[i] << ", squareScale = " << squareScales[j] << " ***"); + LOG_DEBUG(<< "*** location = " << locations[i] + << ", squareScale = " << squareScales[j] << " ***"); TPriorPtrVec models; - models.push_back(TPriorPtr(CLogNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData).clone())); - models.push_back(TPriorPtr(CNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData).clone())); + models.push_back(TPriorPtr(CLogNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData) + .clone())); + models.push_back(TPriorPtr(CNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData) + .clone())); COneOfNPrior filter(maths::COneOfNPrior(models, E_ContinuousData)); @@ -773,15 +845,21 @@ void COneOfNPriorTest::testMarginalLikelihoodMode() { std::size_t iterations = 12; double mode; double fmode; - maths::CCompositeFunctions::CExp likelihood(filter); - boost::math::lognormal_distribution<> logNormal(locations[i], std::sqrt(squareScales[j])); + maths::CCompositeFunctions::CExp likelihood( + filter); + boost::math::lognormal_distribution<> logNormal( + locations[i], std::sqrt(squareScales[j])); double a = 0.01; - double b = boost::math::mode(logNormal) + 1.0 * boost::math::standard_deviation(logNormal); - maths::CSolvers::maximize(a, b, likelihood(a), likelihood(b), likelihood, 0.0, iterations, mode, fmode); + double b = boost::math::mode(logNormal) + + 1.0 * boost::math::standard_deviation(logNormal); + maths::CSolvers::maximize(a, b, likelihood(a), likelihood(b), + likelihood, 0.0, iterations, mode, fmode); - LOG_DEBUG(<< "marginalLikelihoodMode = " << filter.marginalLikelihoodMode() << ", expectedMode = " << mode); + LOG_DEBUG(<< "marginalLikelihoodMode = " << filter.marginalLikelihoodMode() + << ", expectedMode = " << mode); - CPPUNIT_ASSERT_DOUBLES_EQUAL(mode, filter.marginalLikelihoodMode(), 0.05 * mode); + CPPUNIT_ASSERT_DOUBLES_EQUAL(mode, filter.marginalLikelihoodMode(), + 0.05 * mode); } } } @@ -806,11 +884,14 @@ void COneOfNPriorTest::testMarginalLikelihoodVariance() { for (std::size_t i = 0u; i < boost::size(means); ++i) { for (std::size_t j = 0u; j < boost::size(variances); ++j) { - LOG_DEBUG(<< "*** mean = " << means[i] << ", variance = " << variances[j] << " ***"); + LOG_DEBUG(<< "*** mean = " << means[i] + << ", variance = " << variances[j] << " ***"); TPriorPtrVec models; - models.push_back(TPriorPtr(CNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData).clone())); - models.push_back(TPriorPtr(CGammaRateConjugate::nonInformativePrior(E_ContinuousData).clone())); + models.push_back(TPriorPtr(CNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData) + .clone())); + models.push_back(TPriorPtr( + CGammaRateConjugate::nonInformativePrior(E_ContinuousData).clone())); COneOfNPrior filter(maths::COneOfNPrior(clone(models), E_ContinuousData)); @@ -827,17 +908,23 @@ void COneOfNPriorTest::testMarginalLikelihoodVariance() { double expectedVariance; CPPUNIT_ASSERT(filter.marginalLikelihoodVarianceForTest(expectedVariance)); if (k % 10 == 0) { - LOG_DEBUG(<< "marginalLikelihoodVariance = " << filter.marginalLikelihoodVariance() + LOG_DEBUG(<< "marginalLikelihoodVariance = " + << filter.marginalLikelihoodVariance() << ", expectedVariance = " << expectedVariance); } // The error is at the precision of the numerical integration. - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedVariance, filter.marginalLikelihoodVariance(), 0.02 * expectedVariance); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedVariance, + filter.marginalLikelihoodVariance(), + 0.02 * expectedVariance); - relativeError.add(std::fabs(expectedVariance - filter.marginalLikelihoodVariance()) / expectedVariance); + relativeError.add(std::fabs(expectedVariance - + filter.marginalLikelihoodVariance()) / + expectedVariance); } - LOG_DEBUG(<< "relativeError = " << maths::CBasicStatistics::mean(relativeError)); + LOG_DEBUG(<< "relativeError = " + << maths::CBasicStatistics::mean(relativeError)); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(relativeError) < 2e-3); } } @@ -851,11 +938,14 @@ void COneOfNPriorTest::testMarginalLikelihoodVariance() { for (std::size_t i = 0u; i < boost::size(shapes); ++i) { for (std::size_t j = 0u; j < boost::size(scales); ++j) { - LOG_DEBUG(<< "*** shape = " << shapes[i] << ", scale = " << scales[j] << " ***"); + LOG_DEBUG(<< "*** shape = " << shapes[i] + << ", scale = " << scales[j] << " ***"); TPriorPtrVec models; - models.push_back(TPriorPtr(CNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData).clone())); - models.push_back(TPriorPtr(CGammaRateConjugate::nonInformativePrior(E_ContinuousData).clone())); + models.push_back(TPriorPtr(CNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData) + .clone())); + models.push_back(TPriorPtr( + CGammaRateConjugate::nonInformativePrior(E_ContinuousData).clone())); COneOfNPrior filter(maths::COneOfNPrior(clone(models), E_ContinuousData)); @@ -875,18 +965,24 @@ void COneOfNPriorTest::testMarginalLikelihoodVariance() { CPPUNIT_ASSERT(filter.marginalLikelihoodVarianceForTest(expectedVariance)); if (k % 10 == 0) { - LOG_DEBUG(<< "marginalLikelihoodVariance = " << filter.marginalLikelihoodVariance() + LOG_DEBUG(<< "marginalLikelihoodVariance = " + << filter.marginalLikelihoodVariance() << ", expectedVariance = " << expectedVariance); } // The error is mainly due to the truncation in the // integration range used to compute the expected mean. - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedVariance, filter.marginalLikelihoodVariance(), 0.01 * expectedVariance); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedVariance, + filter.marginalLikelihoodVariance(), + 0.01 * expectedVariance); - relativeError.add(std::fabs(expectedVariance - filter.marginalLikelihoodVariance()) / expectedVariance); + relativeError.add(std::fabs(expectedVariance - + filter.marginalLikelihoodVariance()) / + expectedVariance); } - LOG_DEBUG(<< "relativeError = " << maths::CBasicStatistics::mean(relativeError)); + LOG_DEBUG(<< "relativeError = " + << maths::CBasicStatistics::mean(relativeError)); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(relativeError) < 3e-3); } } @@ -904,8 +1000,10 @@ void COneOfNPriorTest::testSampleMarginalLikelihood() { const double variance = 2.0; TPriorPtrVec models; - models.push_back(TPriorPtr(CNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData).clone())); - models.push_back(TPriorPtr(CLogNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData).clone())); + models.push_back(TPriorPtr( + CNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData).clone())); + models.push_back(TPriorPtr( + CLogNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData).clone())); COneOfNPrior filter(maths::COneOfNPrior(clone(models), E_ContinuousData)); @@ -932,12 +1030,14 @@ void COneOfNPriorTest::testSampleMarginalLikelihood() { posteriorModels[1]->sampleMarginalLikelihood(5, logNormalSamples); TDoubleVec expectedSampled(normalSamples); - expectedSampled.insert(expectedSampled.end(), logNormalSamples.begin(), logNormalSamples.end()); + expectedSampled.insert(expectedSampled.end(), logNormalSamples.begin(), + logNormalSamples.end()); LOG_DEBUG(<< "expected samples = " << core::CContainerPrinter::print(expectedSampled) << ", samples = " << core::CContainerPrinter::print(sampled)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedSampled), core::CContainerPrinter::print(sampled)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedSampled), + core::CContainerPrinter::print(sampled)); rng.generateNormalSamples(mean, variance, 80, samples); @@ -956,12 +1056,14 @@ void COneOfNPriorTest::testSampleMarginalLikelihood() { posteriorModels[1]->sampleMarginalLikelihood(0, logNormalSamples); expectedSampled = normalSamples; - expectedSampled.insert(expectedSampled.end(), logNormalSamples.begin(), logNormalSamples.end()); + expectedSampled.insert(expectedSampled.end(), logNormalSamples.begin(), + logNormalSamples.end()); LOG_DEBUG(<< "expected samples = " << core::CContainerPrinter::print(expectedSampled) << ", samples = " << core::CContainerPrinter::print(sampled)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedSampled), core::CContainerPrinter::print(sampled)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedSampled), + core::CContainerPrinter::print(sampled)); } void COneOfNPriorTest::testCdf() { @@ -978,9 +1080,12 @@ void COneOfNPriorTest::testCdf() { test::CRandomNumbers rng; TPriorPtrVec models; - models.push_back(TPriorPtr(CNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData).clone())); - models.push_back(TPriorPtr(CLogNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData).clone())); - models.push_back(TPriorPtr(CGammaRateConjugate::nonInformativePrior(E_ContinuousData).clone())); + models.push_back(TPriorPtr( + CNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData).clone())); + models.push_back(TPriorPtr( + CLogNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData).clone())); + models.push_back(TPriorPtr( + CGammaRateConjugate::nonInformativePrior(E_ContinuousData).clone())); COneOfNPrior filter(maths::COneOfNPrior(clone(models), E_ContinuousData)); for (std::size_t i = 0u; i < boost::size(n); ++i) { @@ -1003,8 +1108,8 @@ void COneOfNPriorTest::testCdf() { CPPUNIT_ASSERT(filter.minusLogJointCdfComplement(TDouble1Vec(1, x), lb, ub)); double fComplement = (lb + ub) / 2.0; - LOG_DEBUG(<< "log(F(x)) = " << (f == 0.0 ? f : -f) - << ", log(1 - F(x)) = " << (fComplement == 0.0 ? fComplement : -fComplement)); + LOG_DEBUG(<< "log(F(x)) = " << (f == 0.0 ? f : -f) << ", log(1 - F(x)) = " + << (fComplement == 0.0 ? fComplement : -fComplement)); CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, std::exp(-f) + std::exp(-fComplement), 1e-10); } } @@ -1023,8 +1128,10 @@ void COneOfNPriorTest::testProbabilityOfLessLikelySamples() { const double vs[] = {0.5, 1.0, 2.0}; TPriorPtrVec initialModels; - initialModels.push_back(TPriorPtr(CNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData).clone())); - initialModels.push_back(TPriorPtr(CLogNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData).clone())); + initialModels.push_back(TPriorPtr( + CNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData).clone())); + initialModels.push_back(TPriorPtr( + CLogNormalMeanPrecConjugate::nonInformativePrior(E_ContinuousData).clone())); COneOfNPrior filter(maths::COneOfNPrior(clone(initialModels), E_ContinuousData)); @@ -1040,7 +1147,8 @@ void COneOfNPriorTest::testProbabilityOfLessLikelySamples() { double lb, ub; maths_t::ETail tail; - CPPUNIT_ASSERT(filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, sample, lb, ub)); + CPPUNIT_ASSERT(filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, + sample, lb, ub)); CPPUNIT_ASSERT_EQUAL(lb, ub); double probability = (lb + ub) / 2.0; @@ -1051,78 +1159,66 @@ void COneOfNPriorTest::testProbabilityOfLessLikelySamples() { COneOfNPrior::TPriorCPtrVec models(filter.models()); for (std::size_t j = 0u; j < weights.size(); ++j) { double weight = weights[j]; - CPPUNIT_ASSERT(models[j]->probabilityOfLessLikelySamples(maths_t::E_TwoSided, - maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, sample[0]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0)), - lb, - ub, - tail)); + CPPUNIT_ASSERT(models[j]->probabilityOfLessLikelySamples( + maths_t::E_TwoSided, maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), + TDouble1Vec(1, sample[0]), + TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0)), lb, ub, tail)); CPPUNIT_ASSERT_EQUAL(lb, ub); double modelProbability = (lb + ub) / 2.0; expectedProbability += weight * modelProbability; } - LOG_DEBUG(<< "weights = " << core::CContainerPrinter::print(weights) << ", expectedProbability = " << expectedProbability + LOG_DEBUG(<< "weights = " << core::CContainerPrinter::print(weights) + << ", expectedProbability = " << expectedProbability << ", probability = " << probability); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedProbability, probability, 1e-3 * std::max(expectedProbability, probability)); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedProbability, probability, + 1e-3 * std::max(expectedProbability, probability)); maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); for (std::size_t k = 0u; ((i + 1) % 11 == 0) && k < boost::size(vs); ++k) { - double mode = filter.marginalLikelihoodMode(weightStyle, TDouble4Vec(1, vs[k])); + double mode = filter.marginalLikelihoodMode(weightStyle, + TDouble4Vec(1, vs[k])); double ss[] = {0.9 * mode, 1.1 * mode}; LOG_DEBUG(<< "vs = " << vs[k] << ", mode = " << mode); if (mode > 0.0) { filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(1, ss[0]), TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, weightStyle, TDouble1Vec(1, ss[0]), + TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); if (mode > 0.0) { filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(ss, ss + 2), TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, weightStyle, TDouble1Vec(ss, ss + 2), + TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); - filter.probabilityOfLessLikelySamples(maths_t::E_OneSidedBelow, - weightStyle, - TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, - ub, - tail); + filter.probabilityOfLessLikelySamples( + maths_t::E_OneSidedBelow, weightStyle, TDouble1Vec(ss, ss + 2), + TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); - filter.probabilityOfLessLikelySamples(maths_t::E_OneSidedAbove, - weightStyle, - TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, - ub, - tail); + filter.probabilityOfLessLikelySamples( + maths_t::E_OneSidedAbove, weightStyle, TDouble1Vec(ss, ss + 2), + TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } } if (mode > 0.0) { filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(1, ss[1]), TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, weightStyle, TDouble1Vec(1, ss[1]), + TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(ss, ss + 2), TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, weightStyle, TDouble1Vec(ss, ss + 2), + TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); - filter.probabilityOfLessLikelySamples(maths_t::E_OneSidedBelow, - weightStyle, - TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, - ub, - tail); + filter.probabilityOfLessLikelySamples( + maths_t::E_OneSidedBelow, weightStyle, TDouble1Vec(ss, ss + 2), + TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); - filter.probabilityOfLessLikelySamples(maths_t::E_OneSidedAbove, - weightStyle, - TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, - ub, - tail); + filter.probabilityOfLessLikelySamples( + maths_t::E_OneSidedAbove, weightStyle, TDouble1Vec(ss, ss + 2), + TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } } @@ -1138,7 +1234,8 @@ void COneOfNPriorTest::testPersist() { TPriorPtrVec models; models.push_back(TPriorPtr(CPoissonMeanConjugate::nonInformativePrior().clone())); - models.push_back(TPriorPtr(CNormalMeanPrecConjugate::nonInformativePrior(E_IntegerData).clone())); + models.push_back(TPriorPtr( + CNormalMeanPrecConjugate::nonInformativePrior(E_IntegerData).clone())); const double mean = 10.0; const double variance = 3.0; @@ -1153,8 +1250,9 @@ void COneOfNPriorTest::testPersist() { maths::COneOfNPrior origFilter(clone(models), E_IntegerData); for (std::size_t i = 0u; i < samples.size(); ++i) { - origFilter.addSamples( - maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), TDouble1Vec(1, samples[i]), TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0))); + origFilter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), + TDouble1Vec(1, samples[i]), + TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0))); } double decayRate = origFilter.decayRate(); uint64_t checksum = origFilter.checksum(); @@ -1173,14 +1271,13 @@ void COneOfNPriorTest::testPersist() { CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - maths::SDistributionRestoreParams params(E_IntegerData, - decayRate + 0.1, - maths::MINIMUM_CLUSTER_SPLIT_FRACTION, - maths::MINIMUM_CLUSTER_SPLIT_COUNT, - maths::MINIMUM_CATEGORY_COUNT); + maths::SDistributionRestoreParams params( + E_IntegerData, decayRate + 0.1, maths::MINIMUM_CLUSTER_SPLIT_FRACTION, + maths::MINIMUM_CLUSTER_SPLIT_COUNT, maths::MINIMUM_CATEGORY_COUNT); maths::COneOfNPrior restoredFilter(params, traverser); - LOG_DEBUG(<< "orig checksum = " << checksum << " restored checksum = " << restoredFilter.checksum()); + LOG_DEBUG(<< "orig checksum = " << checksum + << " restored checksum = " << restoredFilter.checksum()); CPPUNIT_ASSERT_EQUAL(checksum, restoredFilter.checksum()); // The XML representation of the new filter should be the same as the original @@ -1196,27 +1293,37 @@ void COneOfNPriorTest::testPersist() { CppUnit::Test* COneOfNPriorTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("COneOfNPriorTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("COneOfNPriorTest::testFilter", &COneOfNPriorTest::testFilter)); - suiteOfTests->addTest( - new CppUnit::TestCaller("COneOfNPriorTest::testMultipleUpdate", &COneOfNPriorTest::testMultipleUpdate)); - suiteOfTests->addTest(new CppUnit::TestCaller("COneOfNPriorTest::testWeights", &COneOfNPriorTest::testWeights)); - suiteOfTests->addTest(new CppUnit::TestCaller("COneOfNPriorTest::testModels", &COneOfNPriorTest::testModels)); - suiteOfTests->addTest( - new CppUnit::TestCaller("COneOfNPriorTest::testModelSelection", &COneOfNPriorTest::testModelSelection)); - suiteOfTests->addTest( - new CppUnit::TestCaller("COneOfNPriorTest::testMarginalLikelihood", &COneOfNPriorTest::testMarginalLikelihood)); - suiteOfTests->addTest(new CppUnit::TestCaller("COneOfNPriorTest::testSampleMarginalLikelihood", - &COneOfNPriorTest::testSampleMarginalLikelihood)); - suiteOfTests->addTest(new CppUnit::TestCaller("COneOfNPriorTest::testMarginalLikelihoodMean", - &COneOfNPriorTest::testMarginalLikelihoodMean)); - suiteOfTests->addTest(new CppUnit::TestCaller("COneOfNPriorTest::testMarginalLikelihoodMode", - &COneOfNPriorTest::testMarginalLikelihoodMode)); - suiteOfTests->addTest(new CppUnit::TestCaller("COneOfNPriorTest::testMarginalLikelihoodVariance", - &COneOfNPriorTest::testMarginalLikelihoodVariance)); - suiteOfTests->addTest(new CppUnit::TestCaller("COneOfNPriorTest::testCdf", &COneOfNPriorTest::testCdf)); - suiteOfTests->addTest(new CppUnit::TestCaller("COneOfNPriorTest::testProbabilityOfLessLikelySamples", - &COneOfNPriorTest::testProbabilityOfLessLikelySamples)); - suiteOfTests->addTest(new CppUnit::TestCaller("COneOfNPriorTest::testPersist", &COneOfNPriorTest::testPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "COneOfNPriorTest::testFilter", &COneOfNPriorTest::testFilter)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "COneOfNPriorTest::testMultipleUpdate", &COneOfNPriorTest::testMultipleUpdate)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "COneOfNPriorTest::testWeights", &COneOfNPriorTest::testWeights)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "COneOfNPriorTest::testModels", &COneOfNPriorTest::testModels)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "COneOfNPriorTest::testModelSelection", &COneOfNPriorTest::testModelSelection)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "COneOfNPriorTest::testMarginalLikelihood", &COneOfNPriorTest::testMarginalLikelihood)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "COneOfNPriorTest::testSampleMarginalLikelihood", + &COneOfNPriorTest::testSampleMarginalLikelihood)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "COneOfNPriorTest::testMarginalLikelihoodMean", + &COneOfNPriorTest::testMarginalLikelihoodMean)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "COneOfNPriorTest::testMarginalLikelihoodMode", + &COneOfNPriorTest::testMarginalLikelihoodMode)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "COneOfNPriorTest::testMarginalLikelihoodVariance", + &COneOfNPriorTest::testMarginalLikelihoodVariance)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "COneOfNPriorTest::testCdf", &COneOfNPriorTest::testCdf)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "COneOfNPriorTest::testProbabilityOfLessLikelySamples", + &COneOfNPriorTest::testProbabilityOfLessLikelySamples)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "COneOfNPriorTest::testPersist", &COneOfNPriorTest::testPersist)); return suiteOfTests; } diff --git a/lib/maths/unittest/COrderingsTest.cc b/lib/maths/unittest/COrderingsTest.cc index 0f312ce219..639bb2c19a 100644 --- a/lib/maths/unittest/COrderingsTest.cc +++ b/lib/maths/unittest/COrderingsTest.cc @@ -43,7 +43,9 @@ class CDictionary { void swap(CDictionary& other) { m_Words.swap(other.m_Words); } - std::string print() const { return core::CContainerPrinter::print(m_Words); } + std::string print() const { + return core::CContainerPrinter::print(m_Words); + } private: TStrVec m_Words; @@ -183,22 +185,36 @@ void COrderingsTest::testLess() { double three(3.0); { - CPPUNIT_ASSERT(less(std::make_pair(std::make_pair(one, three), three), std::make_pair(std::make_pair(two, two), two))); - CPPUNIT_ASSERT(less(std::make_pair(std::make_pair(one, two), three), std::make_pair(std::make_pair(one, three), two))); - CPPUNIT_ASSERT(less(std::make_pair(std::make_pair(one, two), two), std::make_pair(std::make_pair(one, two), three))); - CPPUNIT_ASSERT(!less(std::make_pair(std::make_pair(one, two), three), std::make_pair(std::make_pair(one, two), three))); - CPPUNIT_ASSERT(!less(std::make_pair(std::make_pair(two, two), two), std::make_pair(std::make_pair(one, three), three))); - CPPUNIT_ASSERT(!less(std::make_pair(std::make_pair(one, three), two), std::make_pair(std::make_pair(one, two), three))); - CPPUNIT_ASSERT(!less(std::make_pair(std::make_pair(one, two), three), std::make_pair(std::make_pair(one, two), two))); + CPPUNIT_ASSERT(less(std::make_pair(std::make_pair(one, three), three), + std::make_pair(std::make_pair(two, two), two))); + CPPUNIT_ASSERT(less(std::make_pair(std::make_pair(one, two), three), + std::make_pair(std::make_pair(one, three), two))); + CPPUNIT_ASSERT(less(std::make_pair(std::make_pair(one, two), two), + std::make_pair(std::make_pair(one, two), three))); + CPPUNIT_ASSERT(!less(std::make_pair(std::make_pair(one, two), three), + std::make_pair(std::make_pair(one, two), three))); + CPPUNIT_ASSERT(!less(std::make_pair(std::make_pair(two, two), two), + std::make_pair(std::make_pair(one, three), three))); + CPPUNIT_ASSERT(!less(std::make_pair(std::make_pair(one, three), two), + std::make_pair(std::make_pair(one, two), three))); + CPPUNIT_ASSERT(!less(std::make_pair(std::make_pair(one, two), three), + std::make_pair(std::make_pair(one, two), two))); } { - CPPUNIT_ASSERT(less(std::make_pair(std::make_pair(&one, three), three), std::make_pair(std::make_pair(&two, two), two))); - CPPUNIT_ASSERT(less(std::make_pair(std::make_pair(&one, two), three), std::make_pair(std::make_pair(&one, three), two))); - CPPUNIT_ASSERT(less(std::make_pair(std::make_pair(one, &two), two), std::make_pair(std::make_pair(one, &two), three))); - CPPUNIT_ASSERT(!less(std::make_pair(std::make_pair(one, &two), three), std::make_pair(std::make_pair(one, &two), three))); - CPPUNIT_ASSERT(!less(std::make_pair(std::make_pair(two, two), &two), std::make_pair(std::make_pair(one, three), &three))); - CPPUNIT_ASSERT(!less(std::make_pair(std::make_pair(&one, &three), &two), std::make_pair(std::make_pair(&one, &two), &three))); - CPPUNIT_ASSERT(!less(std::make_pair(std::make_pair(one, two), three), std::make_pair(std::make_pair(one, two), two))); + CPPUNIT_ASSERT(less(std::make_pair(std::make_pair(&one, three), three), + std::make_pair(std::make_pair(&two, two), two))); + CPPUNIT_ASSERT(less(std::make_pair(std::make_pair(&one, two), three), + std::make_pair(std::make_pair(&one, three), two))); + CPPUNIT_ASSERT(less(std::make_pair(std::make_pair(one, &two), two), + std::make_pair(std::make_pair(one, &two), three))); + CPPUNIT_ASSERT(!less(std::make_pair(std::make_pair(one, &two), three), + std::make_pair(std::make_pair(one, &two), three))); + CPPUNIT_ASSERT(!less(std::make_pair(std::make_pair(two, two), &two), + std::make_pair(std::make_pair(one, three), &three))); + CPPUNIT_ASSERT(!less(std::make_pair(std::make_pair(&one, &three), &two), + std::make_pair(std::make_pair(&one, &two), &three))); + CPPUNIT_ASSERT(!less(std::make_pair(std::make_pair(one, two), three), + std::make_pair(std::make_pair(one, two), two))); } } @@ -219,9 +235,12 @@ void COrderingsTest::testFirstLess() { CPPUNIT_ASSERT(!less(2.0, std::make_pair(1.0, 1.0))); CPPUNIT_ASSERT(!less(std::make_pair(2.0, 2.0), 1.0)); - CPPUNIT_ASSERT(less(std::make_pair(std::make_pair(1.0, 1.0), 1.0), std::make_pair(std::make_pair(1.0, 2.0), 1.0))); - CPPUNIT_ASSERT(!less(std::make_pair(std::make_pair(1.0, 1.0), 1.0), std::make_pair(std::make_pair(1.0, 1.0), 1.0))); - CPPUNIT_ASSERT(!less(std::make_pair(std::make_pair(1.0, 2.0), 1.0), std::make_pair(std::make_pair(1.0, 1.0), 1.0))); + CPPUNIT_ASSERT(less(std::make_pair(std::make_pair(1.0, 1.0), 1.0), + std::make_pair(std::make_pair(1.0, 2.0), 1.0))); + CPPUNIT_ASSERT(!less(std::make_pair(std::make_pair(1.0, 1.0), 1.0), + std::make_pair(std::make_pair(1.0, 1.0), 1.0))); + CPPUNIT_ASSERT(!less(std::make_pair(std::make_pair(1.0, 2.0), 1.0), + std::make_pair(std::make_pair(1.0, 1.0), 1.0))); double one(1.0); double two(2.0); @@ -255,9 +274,12 @@ void COrderingsTest::testFirstGreater() { CPPUNIT_ASSERT(greater(2.0, std::make_pair(1.0, 1.0))); CPPUNIT_ASSERT(greater(std::make_pair(2.0, 2.0), 1.0)); - CPPUNIT_ASSERT(!greater(std::make_pair(std::make_pair(1.0, 1.0), 1.0), std::make_pair(std::make_pair(1.0, 2.0), 1.0))); - CPPUNIT_ASSERT(!greater(std::make_pair(std::make_pair(1.0, 1.0), 1.0), std::make_pair(std::make_pair(1.0, 1.0), 1.0))); - CPPUNIT_ASSERT(greater(std::make_pair(std::make_pair(1.0, 2.0), 1.0), std::make_pair(std::make_pair(1.0, 1.0), 1.0))); + CPPUNIT_ASSERT(!greater(std::make_pair(std::make_pair(1.0, 1.0), 1.0), + std::make_pair(std::make_pair(1.0, 2.0), 1.0))); + CPPUNIT_ASSERT(!greater(std::make_pair(std::make_pair(1.0, 1.0), 1.0), + std::make_pair(std::make_pair(1.0, 1.0), 1.0))); + CPPUNIT_ASSERT(greater(std::make_pair(std::make_pair(1.0, 2.0), 1.0), + std::make_pair(std::make_pair(1.0, 1.0), 1.0))); double one(1.0); double two(2.0); @@ -291,11 +313,16 @@ void COrderingsTest::testSecondLess() { CPPUNIT_ASSERT(less(2.0, std::make_pair(1.0, 3.0))); CPPUNIT_ASSERT(less(std::make_pair(2.0, 1.0), 2.0)); - CPPUNIT_ASSERT(less(std::make_pair(1.0, std::make_pair(1.0, 2.0)), std::make_pair(2.0, std::make_pair(2.0, 1.0)))); - CPPUNIT_ASSERT(!less(std::make_pair(1.0, std::make_pair(1.0, 2.0)), std::make_pair(2.0, std::make_pair(1.0, 2.0)))); - CPPUNIT_ASSERT(!less(std::make_pair(1.0, std::make_pair(2.0, 2.0)), std::make_pair(2.0, std::make_pair(2.0, 1.0)))); - CPPUNIT_ASSERT(less(std::make_pair(1.0, 1.0), std::make_pair(3.0, std::make_pair(1.0, 2.0)))); - CPPUNIT_ASSERT(less(std::make_pair(1.0, std::make_pair(3.0, 1.0)), std::make_pair(3.0, 2.0))); + CPPUNIT_ASSERT(less(std::make_pair(1.0, std::make_pair(1.0, 2.0)), + std::make_pair(2.0, std::make_pair(2.0, 1.0)))); + CPPUNIT_ASSERT(!less(std::make_pair(1.0, std::make_pair(1.0, 2.0)), + std::make_pair(2.0, std::make_pair(1.0, 2.0)))); + CPPUNIT_ASSERT(!less(std::make_pair(1.0, std::make_pair(2.0, 2.0)), + std::make_pair(2.0, std::make_pair(2.0, 1.0)))); + CPPUNIT_ASSERT(less(std::make_pair(1.0, 1.0), + std::make_pair(3.0, std::make_pair(1.0, 2.0)))); + CPPUNIT_ASSERT(less(std::make_pair(1.0, std::make_pair(3.0, 1.0)), + std::make_pair(3.0, 2.0))); double one(1.0); double two(2.0); @@ -329,11 +356,16 @@ void COrderingsTest::testSecondGreater() { CPPUNIT_ASSERT(!greater(2.0, std::make_pair(1.0, 3.0))); CPPUNIT_ASSERT(!greater(std::make_pair(2.0, 1.0), 2.0)); - CPPUNIT_ASSERT(greater(std::make_pair(1.0, std::make_pair(2.0, 2.0)), std::make_pair(2.0, std::make_pair(2.0, 1.0)))); - CPPUNIT_ASSERT(!greater(std::make_pair(1.0, std::make_pair(2.0, 2.0)), std::make_pair(2.0, std::make_pair(2.0, 2.0)))); - CPPUNIT_ASSERT(!greater(std::make_pair(1.0, std::make_pair(2.0, 2.0)), std::make_pair(2.0, std::make_pair(2.0, 3.0)))); - CPPUNIT_ASSERT(greater(std::make_pair(2.0, 2.0), std::make_pair(3.0, std::make_pair(1.0, 2.0)))); - CPPUNIT_ASSERT(greater(std::make_pair(1.0, std::make_pair(3.0, 3.0)), std::make_pair(3.0, 2.0))); + CPPUNIT_ASSERT(greater(std::make_pair(1.0, std::make_pair(2.0, 2.0)), + std::make_pair(2.0, std::make_pair(2.0, 1.0)))); + CPPUNIT_ASSERT(!greater(std::make_pair(1.0, std::make_pair(2.0, 2.0)), + std::make_pair(2.0, std::make_pair(2.0, 2.0)))); + CPPUNIT_ASSERT(!greater(std::make_pair(1.0, std::make_pair(2.0, 2.0)), + std::make_pair(2.0, std::make_pair(2.0, 3.0)))); + CPPUNIT_ASSERT(greater(std::make_pair(2.0, 2.0), + std::make_pair(3.0, std::make_pair(1.0, 2.0)))); + CPPUNIT_ASSERT(greater(std::make_pair(1.0, std::make_pair(3.0, 3.0)), + std::make_pair(3.0, 2.0))); double one(1.0); double two(2.0); @@ -366,7 +398,8 @@ void COrderingsTest::testDereference() { iterators.push_back(i); } - std::sort(iterators.begin(), iterators.end(), core::CFunctional::SDereference()); + std::sort(iterators.begin(), iterators.end(), + core::CFunctional::SDereference()); std::sort(boost::begin(values_), boost::end(values_)); for (std::size_t i = 0u; i < boost::size(values); ++i) { LOG_DEBUG(<< "expected " << values_[i] << ", got " << *iterators[i]); @@ -441,47 +474,87 @@ void COrderingsTest::testLexicographicalCompare() { CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p2, d1, i1, p1, d1, greater)); CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p1, d2, i1, p1, d1, greater)); - CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p1, d1, v1, i2, p1, d1, v1)); - CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p1, d1, v1, i1, p2, d1, v1)); - CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p1, d1, v1, i1, p1, d2, v1)); - CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p1, d1, v1, i1, p1, d1, v2)); - CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, p1, d1, v1, i1, p1, d1, v1)); - CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i2, p1, d1, v1, i1, p1, d1, v1)); - CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, p2, d1, v1, i1, p1, d1, v1)); - CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, p1, d2, v1, i1, p1, d1, v1)); - CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, p1, d1, v2, i1, p1, d1, v1)); - CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, p1, d1, v1, i2, p1, d1, v1, greater)); - CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, p1, d1, v1, i1, p2, d1, v1, greater)); - CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, p1, d1, v1, i1, p1, d2, v1, greater)); - CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, p1, d1, v1, i1, p1, d1, v2, greater)); - CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, p1, d1, v1, i1, p1, d1, v1, greater)); - CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i2, p1, d1, v1, i1, p1, d1, v1, greater)); - CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p2, d1, v1, i1, p1, d1, v1, greater)); - CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p1, d2, v1, i1, p1, d1, v1, greater)); - CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p1, d1, v2, i1, p1, d1, v1, greater)); - - CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p1, d1, v1, s1, i2, p1, d1, v1, s1)); - CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p1, d1, v1, s1, i1, p2, d1, v1, s1)); - CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p1, d1, v1, s1, i1, p1, d2, v1, s1)); - CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p1, d1, v1, s1, i1, p1, d1, v2, s1)); - CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p1, d1, v1, s1, i1, p1, d1, v1, s2)); - CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, p1, d1, v1, s1, i1, p1, d1, v1, s1)); - CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i2, p1, d1, v1, s1, i1, p1, d1, v1, s1)); - CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, p2, d1, v1, s1, i1, p1, d1, v1, s1)); - CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, p1, d2, v1, s1, i1, p1, d1, v1, s1)); - CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, p1, d1, v2, s1, i1, p1, d1, v1, s1)); - CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, p1, d1, v1, s2, i1, p1, d1, v1, s1)); - CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, p1, d1, v1, s1, i2, p1, d1, v1, s1, greater)); - CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, p1, d1, v1, s1, i1, p2, d1, v1, s1, greater)); - CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, p1, d1, v1, s1, i1, p1, d2, v1, s1, greater)); - CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, p1, d1, v1, s1, i1, p1, d1, v2, s1, greater)); - CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, p1, d1, v1, s1, i1, p1, d1, v1, s2, greater)); - CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, p1, d1, v1, s1, i1, p1, d1, v1, s1, greater)); - CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i2, p1, d1, v1, s1, i1, p1, d1, v1, s1, greater)); - CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p2, d1, v1, s1, i1, p1, d1, v1, s1, greater)); - CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p1, d2, v1, s1, i1, p1, d1, v1, s1, greater)); - CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p1, d1, v2, s1, i1, p1, d1, v1, s1, greater)); - CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p1, d1, v1, s2, i1, p1, d1, v1, s1, greater)); + CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p1, d1, v1, + i2, p1, d1, v1)); + CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p1, d1, v1, + i1, p2, d1, v1)); + CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p1, d1, v1, + i1, p1, d2, v1)); + CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p1, d1, v1, + i1, p1, d1, v2)); + CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, p1, d1, v1, + i1, p1, d1, v1)); + CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i2, p1, d1, v1, + i1, p1, d1, v1)); + CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, p2, d1, v1, + i1, p1, d1, v1)); + CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, p1, d2, v1, + i1, p1, d1, v1)); + CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, p1, d1, v2, + i1, p1, d1, v1)); + CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare( + i1, p1, d1, v1, i2, p1, d1, v1, greater)); + CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare( + i1, p1, d1, v1, i1, p2, d1, v1, greater)); + CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare( + i1, p1, d1, v1, i1, p1, d2, v1, greater)); + CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare( + i1, p1, d1, v1, i1, p1, d1, v2, greater)); + CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare( + i1, p1, d1, v1, i1, p1, d1, v1, greater)); + CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i2, p1, d1, v1, i1, p1, + d1, v1, greater)); + CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p2, d1, v1, i1, p1, + d1, v1, greater)); + CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p1, d2, v1, i1, p1, + d1, v1, greater)); + CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p1, d1, v2, i1, p1, + d1, v1, greater)); + + CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p1, d1, v1, s1, + i2, p1, d1, v1, s1)); + CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p1, d1, v1, s1, + i1, p2, d1, v1, s1)); + CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p1, d1, v1, s1, + i1, p1, d2, v1, s1)); + CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p1, d1, v1, s1, + i1, p1, d1, v2, s1)); + CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare(i1, p1, d1, v1, s1, + i1, p1, d1, v1, s2)); + CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, p1, d1, v1, s1, + i1, p1, d1, v1, s1)); + CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i2, p1, d1, v1, s1, + i1, p1, d1, v1, s1)); + CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, p2, d1, v1, s1, + i1, p1, d1, v1, s1)); + CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, p1, d2, v1, s1, + i1, p1, d1, v1, s1)); + CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, p1, d1, v2, s1, + i1, p1, d1, v1, s1)); + CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare(i1, p1, d1, v1, s2, + i1, p1, d1, v1, s1)); + CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare( + i1, p1, d1, v1, s1, i2, p1, d1, v1, s1, greater)); + CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare( + i1, p1, d1, v1, s1, i1, p2, d1, v1, s1, greater)); + CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare( + i1, p1, d1, v1, s1, i1, p1, d2, v1, s1, greater)); + CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare( + i1, p1, d1, v1, s1, i1, p1, d1, v2, s1, greater)); + CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare( + i1, p1, d1, v1, s1, i1, p1, d1, v1, s2, greater)); + CPPUNIT_ASSERT(!maths::COrderings::lexicographical_compare( + i1, p1, d1, v1, s1, i1, p1, d1, v1, s1, greater)); + CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare( + i2, p1, d1, v1, s1, i1, p1, d1, v1, s1, greater)); + CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare( + i1, p2, d1, v1, s1, i1, p1, d1, v1, s1, greater)); + CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare( + i1, p1, d2, v1, s1, i1, p1, d1, v1, s1, greater)); + CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare( + i1, p1, d1, v2, s1, i1, p1, d1, v1, s1, greater)); + CPPUNIT_ASSERT(maths::COrderings::lexicographical_compare( + i1, p1, d1, v1, s2, i1, p1, d1, v1, s1, greater)); } void COrderingsTest::testSimultaneousSort() { @@ -501,8 +574,9 @@ void COrderingsTest::testSimultaneousSort() { { TDoubleVec keys{0.0, 1.0, 0.2, 1.1, 0.7, 5.0}; - TStrVec values{ - std::string(1, 'c'), std::string(1, 'q'), std::string(1, '!'), std::string(1, 'a'), std::string(1, 'z'), std::string(1, 'p')}; + TStrVec values{std::string(1, 'c'), std::string(1, 'q'), + std::string(1, '!'), std::string(1, 'a'), + std::string(1, 'z'), std::string(1, 'p')}; std::string expectedKeys("[0, 0.2, 0.7, 1, 1.1, 5]"); std::string expectedValues("[c, !, z, q, a, p]"); @@ -519,9 +593,11 @@ void COrderingsTest::testSimultaneousSort() { } { TDouble1Vec keys{7.0, 1.0, 0.2, 1.1, 0.7, 5.0}; - TStrVec values1{ - std::string(1, 'w'), std::string(1, 'q'), std::string(1, '~'), std::string(1, 'e'), std::string(1, ';'), std::string(1, 'y')}; - TDoubleDoublePrVec values2{{2.0, 1.0}, {2.1, 1.1}, {1.3, 1.9}, {3.2, 12.9}, {1.2, 10.1}, {1.3, 6.2}}; + TStrVec values1{std::string(1, 'w'), std::string(1, 'q'), + std::string(1, '~'), std::string(1, 'e'), + std::string(1, ';'), std::string(1, 'y')}; + TDoubleDoublePrVec values2{{2.0, 1.0}, {2.1, 1.1}, {1.3, 1.9}, + {3.2, 12.9}, {1.2, 10.1}, {1.3, 6.2}}; std::string expectedKeys("[0.2, 0.7, 1, 1.1, 5, 7]"); std::string expectedValues1("[~, ;, q, e, y, w]"); @@ -538,15 +614,13 @@ void COrderingsTest::testSimultaneousSort() { test::CRandomNumbers rng; { TDoubleVec keys{7.1, 0.1, 0.9, 1.4, 0.7, 5.1, 80.0, 4.0}; - TStrVec values1{std::string("a1"), - std::string("23"), - std::string("~1"), - std::string("b4"), - std::string(";;"), - std::string("zz"), - std::string("sss"), - std::string("pq")}; - TDoubleDoublePrVec values2{{1.0, 1.0}, {4.1, 1.1}, {5.3, 3.9}, {7.2, 22.9}, {2.2, 1.1}, {0.3, 16.2}, {21.2, 11.1}, {10.3, 13.2}}; + TStrVec values1{std::string("a1"), std::string("23"), + std::string("~1"), std::string("b4"), + std::string(";;"), std::string("zz"), + std::string("sss"), std::string("pq")}; + TDoubleDoublePrVec values2{{1.0, 1.0}, {4.1, 1.1}, {5.3, 3.9}, + {7.2, 22.9}, {2.2, 1.1}, {0.3, 16.2}, + {21.2, 11.1}, {10.3, 13.2}}; TStrVec rawWords; rng.generateWords(5, keys.size() * 5, rawWords); TDictionaryVec values3; @@ -597,17 +671,22 @@ void COrderingsTest::testSimultaneousSort() { LOG_DEBUG(<< "values1 = " << core::CContainerPrinter::print(values1)); LOG_DEBUG(<< "values2 = " << core::CContainerPrinter::print(values2)); - CPPUNIT_ASSERT_EQUAL(std::string("[5, 2, 3, 4, 1]"), core::CContainerPrinter::print(values1)); - CPPUNIT_ASSERT_EQUAL(std::string("[1, 5, 2, 3, 4]"), core::CContainerPrinter::print(values2)); + CPPUNIT_ASSERT_EQUAL(std::string("[5, 2, 3, 4, 1]"), + core::CContainerPrinter::print(values1)); + CPPUNIT_ASSERT_EQUAL(std::string("[1, 5, 2, 3, 4]"), + core::CContainerPrinter::print(values2)); maths::COrderings::simultaneousSort(range2, range1, range3); LOG_DEBUG(<< "values1 = " << core::CContainerPrinter::print(values1)); LOG_DEBUG(<< "values2 = " << core::CContainerPrinter::print(values2)); LOG_DEBUG(<< "values3 = " << core::CContainerPrinter::print(values3)); - CPPUNIT_ASSERT_EQUAL(std::string("[5, 3, 4, 2, 1]"), core::CContainerPrinter::print(values1)); - CPPUNIT_ASSERT_EQUAL(std::string("[1, 2, 3, 5, 4]"), core::CContainerPrinter::print(values2)); - CPPUNIT_ASSERT_EQUAL(std::string("[4, 3, 3, 2, 5]"), core::CContainerPrinter::print(values3)); + CPPUNIT_ASSERT_EQUAL(std::string("[5, 3, 4, 2, 1]"), + core::CContainerPrinter::print(values1)); + CPPUNIT_ASSERT_EQUAL(std::string("[1, 2, 3, 5, 4]"), + core::CContainerPrinter::print(values2)); + CPPUNIT_ASSERT_EQUAL(std::string("[4, 3, 3, 2, 5]"), + core::CContainerPrinter::print(values3)); maths::COrderings::simultaneousSort(range4, range1, range2, range3); @@ -615,10 +694,14 @@ void COrderingsTest::testSimultaneousSort() { LOG_DEBUG(<< "values2 = " << core::CContainerPrinter::print(values2)); LOG_DEBUG(<< "values3 = " << core::CContainerPrinter::print(values3)); LOG_DEBUG(<< "values4 = " << core::CContainerPrinter::print(values4)); - CPPUNIT_ASSERT_EQUAL(std::string("[5, 3, 2, 4, 1]"), core::CContainerPrinter::print(values1)); - CPPUNIT_ASSERT_EQUAL(std::string("[1, 2, 5, 3, 4]"), core::CContainerPrinter::print(values2)); - CPPUNIT_ASSERT_EQUAL(std::string("[4, 3, 2, 3, 5]"), core::CContainerPrinter::print(values3)); - CPPUNIT_ASSERT_EQUAL(std::string("[2, 1, 4, 5, 1]"), core::CContainerPrinter::print(values4)); + CPPUNIT_ASSERT_EQUAL(std::string("[5, 3, 2, 4, 1]"), + core::CContainerPrinter::print(values1)); + CPPUNIT_ASSERT_EQUAL(std::string("[1, 2, 5, 3, 4]"), + core::CContainerPrinter::print(values2)); + CPPUNIT_ASSERT_EQUAL(std::string("[4, 3, 2, 3, 5]"), + core::CContainerPrinter::print(values3)); + CPPUNIT_ASSERT_EQUAL(std::string("[2, 1, 4, 5, 1]"), + core::CContainerPrinter::print(values4)); } { for (std::size_t i = 0u; i < 50; ++i) { @@ -633,7 +716,8 @@ void COrderingsTest::testSimultaneousSort() { TDoubleDoubleTupleMap expected; for (std::size_t j = 0u; j < 10; ++j) { - expected[keys[j]] = TDoubleTuple(values1[j], values2[j], values3[j], values4[j]); + expected[keys[j]] = + TDoubleTuple(values1[j], values2[j], values3[j], values4[j]); } maths::COrderings::simultaneousSort(keys, values1, values2, values3, values4); @@ -654,19 +738,26 @@ void COrderingsTest::testSimultaneousSort() { CppUnit::Test* COrderingsTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("COrderingsTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("COrderingsTest::testOptionalOrdering", &COrderingsTest::testOptionalOrdering)); - suiteOfTests->addTest(new CppUnit::TestCaller("COrderingsTest::testPtrOrdering", &COrderingsTest::testPtrOrdering)); - suiteOfTests->addTest(new CppUnit::TestCaller("COrderingsTest::testLess", &COrderingsTest::testLess)); - suiteOfTests->addTest(new CppUnit::TestCaller("COrderingsTest::testFirstLess", &COrderingsTest::testFirstLess)); - suiteOfTests->addTest(new CppUnit::TestCaller("COrderingsTest::testFirstGreater", &COrderingsTest::testFirstGreater)); - suiteOfTests->addTest(new CppUnit::TestCaller("COrderingsTest::testSecondLess", &COrderingsTest::testSecondLess)); - suiteOfTests->addTest(new CppUnit::TestCaller("COrderingsTest::testSecondGreater", &COrderingsTest::testSecondGreater)); - suiteOfTests->addTest(new CppUnit::TestCaller("COrderingsTest::testDereference", &COrderingsTest::testDereference)); - suiteOfTests->addTest( - new CppUnit::TestCaller("COrderingsTest::testLexicographicalCompare", &COrderingsTest::testLexicographicalCompare)); - suiteOfTests->addTest( - new CppUnit::TestCaller("COrderingsTest::testSimultaneousSort", &COrderingsTest::testSimultaneousSort)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "COrderingsTest::testOptionalOrdering", &COrderingsTest::testOptionalOrdering)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "COrderingsTest::testPtrOrdering", &COrderingsTest::testPtrOrdering)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "COrderingsTest::testLess", &COrderingsTest::testLess)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "COrderingsTest::testFirstLess", &COrderingsTest::testFirstLess)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "COrderingsTest::testFirstGreater", &COrderingsTest::testFirstGreater)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "COrderingsTest::testSecondLess", &COrderingsTest::testSecondLess)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "COrderingsTest::testSecondGreater", &COrderingsTest::testSecondGreater)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "COrderingsTest::testDereference", &COrderingsTest::testDereference)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "COrderingsTest::testLexicographicalCompare", &COrderingsTest::testLexicographicalCompare)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "COrderingsTest::testSimultaneousSort", &COrderingsTest::testSimultaneousSort)); return suiteOfTests; } diff --git a/lib/maths/unittest/COrdinalTest.cc b/lib/maths/unittest/COrdinalTest.cc index cbdebccfe1..3952e5276d 100644 --- a/lib/maths/unittest/COrdinalTest.cc +++ b/lib/maths/unittest/COrdinalTest.cc @@ -45,25 +45,33 @@ void COrdinalTest::testEqual() { for (std::size_t i = 0u; i < 1000; ++i) { TDoubleVec sample; rng.generateUniformSamples(-10000.0, 10000.0, 1, sample); - bool equal = maths::COrdinal(static_cast(sample[0])) == maths::COrdinal(static_cast(sample[0])); + bool equal = maths::COrdinal(static_cast(sample[0])) == + maths::COrdinal(static_cast(sample[0])); CPPUNIT_ASSERT_EQUAL(true, equal); - equal = maths::COrdinal(static_cast(sample[0])) == maths::COrdinal(static_cast(sample[0])); + equal = maths::COrdinal(static_cast(sample[0])) == + maths::COrdinal(static_cast(sample[0])); CPPUNIT_ASSERT_EQUAL(true, equal); equal = maths::COrdinal(sample[0]) == maths::COrdinal(sample[0]); CPPUNIT_ASSERT_EQUAL(true, equal); if (sample[0] >= 0.0) { - equal = maths::COrdinal(static_cast(sample[0])) == maths::COrdinal(std::floor(sample[0])); + equal = maths::COrdinal(static_cast(sample[0])) == + maths::COrdinal(std::floor(sample[0])); CPPUNIT_ASSERT_EQUAL(true, equal); - equal = maths::COrdinal(std::floor(sample[0])) == maths::COrdinal(static_cast(sample[0])); + equal = maths::COrdinal(std::floor(sample[0])) == + maths::COrdinal(static_cast(sample[0])); CPPUNIT_ASSERT_EQUAL(true, equal); - equal = maths::COrdinal(static_cast(sample[0])) == maths::COrdinal(std::floor(sample[0])); + equal = maths::COrdinal(static_cast(sample[0])) == + maths::COrdinal(std::floor(sample[0])); CPPUNIT_ASSERT_EQUAL(true, equal); - equal = maths::COrdinal(std::floor(sample[0])) == maths::COrdinal(static_cast(sample[0])); + equal = maths::COrdinal(std::floor(sample[0])) == + maths::COrdinal(static_cast(sample[0])); CPPUNIT_ASSERT_EQUAL(true, equal); } else { - equal = maths::COrdinal(static_cast(sample[0])) == maths::COrdinal(std::ceil(sample[0])); + equal = maths::COrdinal(static_cast(sample[0])) == + maths::COrdinal(std::ceil(sample[0])); CPPUNIT_ASSERT_EQUAL(true, equal); - equal = maths::COrdinal(std::ceil(sample[0])) == maths::COrdinal(static_cast(sample[0])); + equal = maths::COrdinal(std::ceil(sample[0])) == + maths::COrdinal(static_cast(sample[0])); CPPUNIT_ASSERT_EQUAL(true, equal); } } @@ -71,17 +79,22 @@ void COrdinalTest::testEqual() { // Test doubles outside the integer range. double small = -1e37; double large = 1e23; - CPPUNIT_ASSERT(maths::COrdinal(small) != maths::COrdinal(boost::numeric::bounds::lowest())); - CPPUNIT_ASSERT(maths::COrdinal(large) != maths::COrdinal(boost::numeric::bounds::highest())); - CPPUNIT_ASSERT(maths::COrdinal(boost::numeric::bounds::lowest()) != maths::COrdinal(small)); - CPPUNIT_ASSERT(maths::COrdinal(boost::numeric::bounds::highest()) != maths::COrdinal(large)); + CPPUNIT_ASSERT(maths::COrdinal(small) != + maths::COrdinal(boost::numeric::bounds::lowest())); + CPPUNIT_ASSERT(maths::COrdinal(large) != + maths::COrdinal(boost::numeric::bounds::highest())); + CPPUNIT_ASSERT(maths::COrdinal(boost::numeric::bounds::lowest()) != + maths::COrdinal(small)); + CPPUNIT_ASSERT(maths::COrdinal(boost::numeric::bounds::highest()) != + maths::COrdinal(large)); // Check some integer values which can't be represented as doubles. maths::COrdinal s1[] = {maths::COrdinal(int64_t(-179809067369808278)), maths::COrdinal(int64_t(-179809067369808277)), maths::COrdinal(int64_t(569817345679111267)), maths::COrdinal(int64_t(569817345679111268))}; - maths::COrdinal s2[] = {maths::COrdinal(uint64_t(569817345679111267)), maths::COrdinal(uint64_t(569817345679111268))}; + maths::COrdinal s2[] = {maths::COrdinal(uint64_t(569817345679111267)), + maths::COrdinal(uint64_t(569817345679111268))}; for (std::size_t i = 0u; i < boost::size(s1); ++i) { LOG_DEBUG(<< s1[i] << " (as double " << precisePrint(s1[i].asDouble()) << ")"); for (std::size_t j = 0u; j < i; ++j) { @@ -113,32 +126,40 @@ void COrdinalTest::testLess() { TDoubleVec samples; rng.generateUniformSamples(-10000.0, 10000.0, 2, samples); bool less = static_cast(samples[0]) < static_cast(samples[1]); - bool ordinalLess = maths::COrdinal(static_cast(samples[0])) < maths::COrdinal(static_cast(samples[1])); + bool ordinalLess = maths::COrdinal(static_cast(samples[0])) < + maths::COrdinal(static_cast(samples[1])); CPPUNIT_ASSERT_EQUAL(less, ordinalLess); if (samples[0] >= 0.0) { less = static_cast(samples[0]) < static_cast(samples[1]); - ordinalLess = maths::COrdinal(static_cast(samples[0])) < maths::COrdinal(static_cast(samples[1])); + ordinalLess = maths::COrdinal(static_cast(samples[0])) < + maths::COrdinal(static_cast(samples[1])); } if (samples[1] >= 0.0) { less = static_cast(samples[0]) < static_cast(samples[1]); - ordinalLess = maths::COrdinal(static_cast(samples[0])) < maths::COrdinal(static_cast(samples[1])); + ordinalLess = maths::COrdinal(static_cast(samples[0])) < + maths::COrdinal(static_cast(samples[1])); } if (samples[0] >= 0.0 && samples[1] >= 0.0) { less = static_cast(samples[0]) < static_cast(samples[1]); - ordinalLess = maths::COrdinal(static_cast(samples[0])) < maths::COrdinal(static_cast(samples[1])); + ordinalLess = maths::COrdinal(static_cast(samples[0])) < + maths::COrdinal(static_cast(samples[1])); } less = static_cast(static_cast(samples[0])) < samples[1]; - ordinalLess = maths::COrdinal(static_cast(samples[0])) < maths::COrdinal(samples[1]); + ordinalLess = maths::COrdinal(static_cast(samples[0])) < + maths::COrdinal(samples[1]); less = samples[0] < static_cast(static_cast(samples[1])); - ordinalLess = maths::COrdinal(samples[0]) < maths::COrdinal(static_cast(samples[1])); + ordinalLess = maths::COrdinal(samples[0]) < + maths::COrdinal(static_cast(samples[1])); less = samples[0] < samples[1]; if (samples[0] >= 0.0) { less = static_cast(static_cast(samples[0])) < samples[1]; - ordinalLess = maths::COrdinal(static_cast(samples[0])) < maths::COrdinal(samples[1]); + ordinalLess = maths::COrdinal(static_cast(samples[0])) < + maths::COrdinal(samples[1]); } if (samples[1] >= 0.0) { less = samples[0] < static_cast(static_cast(samples[1])); - ordinalLess = maths::COrdinal(samples[0]) < maths::COrdinal(static_cast(samples[1])); + ordinalLess = maths::COrdinal(samples[0]) < + maths::COrdinal(static_cast(samples[1])); } ordinalLess = maths::COrdinal(samples[0]) < maths::COrdinal(samples[1]); CPPUNIT_ASSERT_EQUAL(less, ordinalLess); @@ -147,19 +168,26 @@ void COrdinalTest::testLess() { // Test doubles outside the integer range. double small = -1e37; double large = 1e23; - CPPUNIT_ASSERT(maths::COrdinal(small) < maths::COrdinal(boost::numeric::bounds::lowest())); - CPPUNIT_ASSERT(!(maths::COrdinal(boost::numeric::bounds::lowest()) < maths::COrdinal(small))); - CPPUNIT_ASSERT(maths::COrdinal(large) > maths::COrdinal(boost::numeric::bounds::highest())); - CPPUNIT_ASSERT(!(maths::COrdinal(boost::numeric::bounds::highest()) > maths::COrdinal(large))); - CPPUNIT_ASSERT(maths::COrdinal(large) > maths::COrdinal(boost::numeric::bounds::highest())); - CPPUNIT_ASSERT(!(maths::COrdinal(boost::numeric::bounds::highest()) > maths::COrdinal(large))); + CPPUNIT_ASSERT(maths::COrdinal(small) < + maths::COrdinal(boost::numeric::bounds::lowest())); + CPPUNIT_ASSERT(!(maths::COrdinal(boost::numeric::bounds::lowest()) < + maths::COrdinal(small))); + CPPUNIT_ASSERT(maths::COrdinal(large) > + maths::COrdinal(boost::numeric::bounds::highest())); + CPPUNIT_ASSERT(!(maths::COrdinal(boost::numeric::bounds::highest()) > + maths::COrdinal(large))); + CPPUNIT_ASSERT(maths::COrdinal(large) > + maths::COrdinal(boost::numeric::bounds::highest())); + CPPUNIT_ASSERT(!(maths::COrdinal(boost::numeric::bounds::highest()) > + maths::COrdinal(large))); // Check some integer values which can't be represented as doubles. maths::COrdinal s1[] = {maths::COrdinal(int64_t(-179809067369808278)), maths::COrdinal(int64_t(-179809067369808277)), maths::COrdinal(int64_t(569817345679111267)), maths::COrdinal(int64_t(569817345679111268))}; - maths::COrdinal s2[] = {maths::COrdinal(uint64_t(569817345679111267)), maths::COrdinal(uint64_t(569817345679111268))}; + maths::COrdinal s2[] = {maths::COrdinal(uint64_t(569817345679111267)), + maths::COrdinal(uint64_t(569817345679111268))}; for (std::size_t i = 0u; i < boost::size(s1); ++i) { LOG_DEBUG(<< s1[i] << " (as double " << precisePrint(s1[i].asDouble()) << ")"); for (std::size_t j = 0u; j < i; ++j) { @@ -248,7 +276,8 @@ void COrdinalTest::testAsDouble() { // Check some integer values which can't be represented as doubles. - int64_t s[] = {-179809067369808278, -179809067369808277, 569817345679111267, 569817345679111268}; + int64_t s[] = {-179809067369808278, -179809067369808277, 569817345679111267, + 569817345679111268}; for (std::size_t i = 0u; i < boost::size(s); ++i) { maths::COrdinal o(s[i]); @@ -299,12 +328,18 @@ void COrdinalTest::testHash() { CppUnit::Test* COrdinalTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("COrdinalTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("COrdinalTest::testEqual", &COrdinalTest::testEqual)); - suiteOfTests->addTest(new CppUnit::TestCaller("COrdinalTest::testLess", &COrdinalTest::testLess)); - suiteOfTests->addTest(new CppUnit::TestCaller("COrdinalTest::testLess", &COrdinalTest::testLess)); - suiteOfTests->addTest(new CppUnit::TestCaller("COrdinalTest::testIsNan", &COrdinalTest::testIsNan)); - suiteOfTests->addTest(new CppUnit::TestCaller("COrdinalTest::testAsDouble", &COrdinalTest::testAsDouble)); - suiteOfTests->addTest(new CppUnit::TestCaller("COrdinalTest::testHash", &COrdinalTest::testHash)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "COrdinalTest::testEqual", &COrdinalTest::testEqual)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "COrdinalTest::testLess", &COrdinalTest::testLess)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "COrdinalTest::testLess", &COrdinalTest::testLess)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "COrdinalTest::testIsNan", &COrdinalTest::testIsNan)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "COrdinalTest::testAsDouble", &COrdinalTest::testAsDouble)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "COrdinalTest::testHash", &COrdinalTest::testHash)); return suiteOfTests; } diff --git a/lib/maths/unittest/CPRNGTest.cc b/lib/maths/unittest/CPRNGTest.cc index 2b28a14f10..4ebe063231 100644 --- a/lib/maths/unittest/CPRNGTest.cc +++ b/lib/maths/unittest/CPRNGTest.cc @@ -38,9 +38,12 @@ void CPRNGTest::testSplitMix64() { max.add(x); } LOG_DEBUG(<< "min = " << min[0] << ", max = " << max[0]); - CPPUNIT_ASSERT(min[0] < (maths::CPRNG::CSplitMix64::max() - maths::CPRNG::CSplitMix64::min()) / 2000); - CPPUNIT_ASSERT(max[0] > - maths::CPRNG::CSplitMix64::max() - (maths::CPRNG::CSplitMix64::max() - maths::CPRNG::CSplitMix64::min()) / 2000); + CPPUNIT_ASSERT( + min[0] < (maths::CPRNG::CSplitMix64::max() - maths::CPRNG::CSplitMix64::min()) / 2000); + CPPUNIT_ASSERT(max[0] > maths::CPRNG::CSplitMix64::max() - + (maths::CPRNG::CSplitMix64::max() - + maths::CPRNG::CSplitMix64::min()) / + 2000); // Test generate. maths::CPRNG::CSplitMix64 rng2 = rng1; @@ -75,7 +78,8 @@ void CPRNGTest::testSplitMix64() { LOG_DEBUG(<< "p2 = " << core::CContainerPrinter::print(p2)); LOG_DEBUG(<< "m1 = " << maths::CBasicStatistics::mean(m1)); LOG_DEBUG(<< "m2 = " << maths::CBasicStatistics::mean(m2)); - CPPUNIT_ASSERT(maths::CBasicStatistics::mean(m1) > 0.95 * maths::CBasicStatistics::mean(m2)); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(m1) > + 0.95 * maths::CBasicStatistics::mean(m2)); } { boost::random::mt11213b mt; @@ -100,7 +104,8 @@ void CPRNGTest::testSplitMix64() { LOG_DEBUG(<< "p2 = " << core::CContainerPrinter::print(p2)); LOG_DEBUG(<< "m1 = " << maths::CBasicStatistics::mean(m1)); LOG_DEBUG(<< "m2 = " << maths::CBasicStatistics::mean(m2)); - CPPUNIT_ASSERT(maths::CBasicStatistics::mean(m1) > 0.95 * maths::CBasicStatistics::mean(m2)); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(m1) > + 0.95 * maths::CBasicStatistics::mean(m2)); } // Test discard. @@ -142,9 +147,13 @@ void CPRNGTest::testXorOShiro128Plus() { max.add(x); } LOG_DEBUG(<< "min = " << min[0] << ", max = " << max[0]); - CPPUNIT_ASSERT(min[0] < (maths::CPRNG::CXorOShiro128Plus::max() - maths::CPRNG::CXorOShiro128Plus::min()) / 2000); + CPPUNIT_ASSERT(min[0] < (maths::CPRNG::CXorOShiro128Plus::max() - + maths::CPRNG::CXorOShiro128Plus::min()) / + 2000); CPPUNIT_ASSERT(max[0] > maths::CPRNG::CXorOShiro128Plus::max() - - (maths::CPRNG::CXorOShiro128Plus::max() - maths::CPRNG::CXorOShiro128Plus::min()) / 2000); + (maths::CPRNG::CXorOShiro128Plus::max() - + maths::CPRNG::CXorOShiro128Plus::min()) / + 2000); // Test generate. maths::CPRNG::CXorOShiro128Plus rng2 = rng1; @@ -179,7 +188,8 @@ void CPRNGTest::testXorOShiro128Plus() { LOG_DEBUG(<< "p2 = " << core::CContainerPrinter::print(p2)); LOG_DEBUG(<< "m1 = " << maths::CBasicStatistics::mean(m1)); LOG_DEBUG(<< "m2 = " << maths::CBasicStatistics::mean(m2)); - CPPUNIT_ASSERT(maths::CBasicStatistics::mean(m1) > 0.95 * maths::CBasicStatistics::mean(m2)); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(m1) > + 0.95 * maths::CBasicStatistics::mean(m2)); } { boost::random::mt19937_64 mt; @@ -204,7 +214,8 @@ void CPRNGTest::testXorOShiro128Plus() { LOG_DEBUG(<< "p2 = " << core::CContainerPrinter::print(p2)); LOG_DEBUG(<< "m1 = " << maths::CBasicStatistics::mean(m1)); LOG_DEBUG(<< "m2 = " << maths::CBasicStatistics::mean(m2)); - CPPUNIT_ASSERT(maths::CBasicStatistics::mean(m1) > 0.95 * maths::CBasicStatistics::mean(m2)); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(m1) > + 0.95 * maths::CBasicStatistics::mean(m2)); } // Test discard. @@ -262,9 +273,13 @@ void CPRNGTest::testXorShift1024Mult() { max.add(x); } LOG_DEBUG(<< "min = " << min[0] << ", max = " << max[0]); - CPPUNIT_ASSERT(min[0] < (maths::CPRNG::CXorShift1024Mult::max() - maths::CPRNG::CXorShift1024Mult::min()) / 2000); + CPPUNIT_ASSERT(min[0] < (maths::CPRNG::CXorShift1024Mult::max() - + maths::CPRNG::CXorShift1024Mult::min()) / + 2000); CPPUNIT_ASSERT(max[0] > maths::CPRNG::CXorShift1024Mult::max() - - (maths::CPRNG::CXorShift1024Mult::max() - maths::CPRNG::CXorShift1024Mult::min()) / 2000); + (maths::CPRNG::CXorShift1024Mult::max() - + maths::CPRNG::CXorShift1024Mult::min()) / + 2000); // Test generate. maths::CPRNG::CXorShift1024Mult rng2 = rng1; @@ -299,7 +314,8 @@ void CPRNGTest::testXorShift1024Mult() { LOG_DEBUG(<< "p2 = " << core::CContainerPrinter::print(p2)); LOG_DEBUG(<< "m1 = " << maths::CBasicStatistics::mean(m1)); LOG_DEBUG(<< "m2 = " << maths::CBasicStatistics::mean(m2)); - CPPUNIT_ASSERT(maths::CBasicStatistics::mean(m1) > 0.95 * maths::CBasicStatistics::mean(m2)); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(m1) > + 0.95 * maths::CBasicStatistics::mean(m2)); } { boost::random::mt11213b mt; @@ -324,7 +340,8 @@ void CPRNGTest::testXorShift1024Mult() { LOG_DEBUG(<< "p2 = " << core::CContainerPrinter::print(p2)); LOG_DEBUG(<< "m1 = " << maths::CBasicStatistics::mean(m1)); LOG_DEBUG(<< "m2 = " << maths::CBasicStatistics::mean(m2)); - CPPUNIT_ASSERT(maths::CBasicStatistics::mean(m1) > 0.95 * maths::CBasicStatistics::mean(m2)); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(m1) > + 0.95 * maths::CBasicStatistics::mean(m2)); } // Test discard. @@ -367,9 +384,12 @@ void CPRNGTest::testXorShift1024Mult() { CppUnit::Test* CPRNGTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CPRNGTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CPRNGTest::testSplitMix64", &CPRNGTest::testSplitMix64)); - suiteOfTests->addTest(new CppUnit::TestCaller("CPRNGTest::testXorOShiro128Plus", &CPRNGTest::testXorOShiro128Plus)); - suiteOfTests->addTest(new CppUnit::TestCaller("CPRNGTest::testXorShift1024Mult", &CPRNGTest::testXorShift1024Mult)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CPRNGTest::testSplitMix64", &CPRNGTest::testSplitMix64)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CPRNGTest::testXorOShiro128Plus", &CPRNGTest::testXorOShiro128Plus)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CPRNGTest::testXorShift1024Mult", &CPRNGTest::testXorShift1024Mult)); return suiteOfTests; } diff --git a/lib/maths/unittest/CPackedBitVectorTest.cc b/lib/maths/unittest/CPackedBitVectorTest.cc index 143238e244..a398de0fbb 100644 --- a/lib/maths/unittest/CPackedBitVectorTest.cc +++ b/lib/maths/unittest/CPackedBitVectorTest.cc @@ -50,46 +50,55 @@ void CPackedBitVectorTest::testCreation() { maths::CPackedBitVector test1(3, true); LOG_DEBUG(<< "test1 = " << test1); CPPUNIT_ASSERT_EQUAL(std::size_t(3), test1.dimension()); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(TBoolVec(3, true)), core::CContainerPrinter::print(test1.toBitVector())); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(TBoolVec(3, true)), + core::CContainerPrinter::print(test1.toBitVector())); maths::CPackedBitVector test2(5, false); LOG_DEBUG(<< "test2 = " << test2); CPPUNIT_ASSERT_EQUAL(std::size_t(5), test2.dimension()); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(TBoolVec(5, false)), core::CContainerPrinter::print(test2.toBitVector())); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(TBoolVec(5, false)), + core::CContainerPrinter::print(test2.toBitVector())); maths::CPackedBitVector test3(255, true); LOG_DEBUG(<< "test3 = " << test3); CPPUNIT_ASSERT_EQUAL(std::size_t(255), test3.dimension()); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(TBoolVec(255, true)), core::CContainerPrinter::print(test3.toBitVector())); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(TBoolVec(255, true)), + core::CContainerPrinter::print(test3.toBitVector())); maths::CPackedBitVector test4(279, true); LOG_DEBUG(<< "test4 = " << test4); CPPUNIT_ASSERT_EQUAL(std::size_t(279), test4.dimension()); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(TBoolVec(279, true)), core::CContainerPrinter::print(test4.toBitVector())); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(TBoolVec(279, true)), + core::CContainerPrinter::print(test4.toBitVector())); maths::CPackedBitVector test5(512, false); LOG_DEBUG(<< "test5 = " << test5); CPPUNIT_ASSERT_EQUAL(std::size_t(512), test5.dimension()); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(TBoolVec(512, false)), core::CContainerPrinter::print(test5.toBitVector())); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(TBoolVec(512, false)), + core::CContainerPrinter::print(test5.toBitVector())); maths::CPackedBitVector test6((TBoolVec())); LOG_DEBUG(<< "test6 = " << test6); CPPUNIT_ASSERT_EQUAL(std::size_t(0), test6.dimension()); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print((TBoolVec())), core::CContainerPrinter::print(test6.toBitVector())); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print((TBoolVec())), + core::CContainerPrinter::print(test6.toBitVector())); bool bits1_[] = {true, true}; TBoolVec bits1(boost::begin(bits1_), boost::end(bits1_)); maths::CPackedBitVector test7(bits1); LOG_DEBUG(<< "test7 = " << test7); CPPUNIT_ASSERT_EQUAL(bits1.size(), test7.dimension()); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(bits1), core::CContainerPrinter::print(test7.toBitVector())); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(bits1), + core::CContainerPrinter::print(test7.toBitVector())); - bool bits2_[] = {true, false, false, true, true, false, false, false, false, true, true, true, true, false}; + bool bits2_[] = {true, false, false, true, true, false, false, + false, false, true, true, true, true, false}; TBoolVec bits2(boost::begin(bits2_), boost::end(bits2_)); maths::CPackedBitVector test8(bits2); LOG_DEBUG(<< "test8 = " << test8); CPPUNIT_ASSERT_EQUAL(bits2.size(), test8.dimension()); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(bits2), core::CContainerPrinter::print(test8.toBitVector())); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(bits2), + core::CContainerPrinter::print(test8.toBitVector())); test::CRandomNumbers rng; @@ -102,7 +111,8 @@ void CPackedBitVectorTest::testCreation() { LOG_DEBUG(<< "test9 = " << test9); } CPPUNIT_ASSERT_EQUAL(bits3.size(), test9.dimension()); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(bits3), core::CContainerPrinter::print(test9.toBitVector())); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(bits3), + core::CContainerPrinter::print(test9.toBitVector())); } } @@ -142,18 +152,21 @@ void CPackedBitVectorTest::testExtend() { LOG_DEBUG(<< "test2 = " << test2); CPPUNIT_ASSERT_EQUAL(std::size_t(255), test2.dimension()); TBoolVec bits1(255, true); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(bits1), core::CContainerPrinter::print(test2.toBitVector())); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(bits1), + core::CContainerPrinter::print(test2.toBitVector())); test2.extend(false); bits1.push_back(false); LOG_DEBUG(<< "test2 = " << test2); CPPUNIT_ASSERT_EQUAL(std::size_t(256), test2.dimension()); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(bits1), core::CContainerPrinter::print(test2.toBitVector())); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(bits1), + core::CContainerPrinter::print(test2.toBitVector())); maths::CPackedBitVector test3(255, true); test3.extend(false); LOG_DEBUG(<< "test3 = " << test2); CPPUNIT_ASSERT_EQUAL(std::size_t(256), test3.dimension()); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(bits1), core::CContainerPrinter::print(test3.toBitVector())); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(bits1), + core::CContainerPrinter::print(test3.toBitVector())); test::CRandomNumbers rng; @@ -166,7 +179,8 @@ void CPackedBitVectorTest::testExtend() { for (std::size_t i = 0u; i < components.size(); ++i) { bits2.push_back(components[i] > 0); test4.extend(components[i] > 0); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(bits2), core::CContainerPrinter::print(test4.toBitVector())); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(bits2), + core::CContainerPrinter::print(test4.toBitVector())); } } @@ -196,7 +210,8 @@ void CPackedBitVectorTest::testContract() { bits1.erase(bits1.begin()); test2.contract(); LOG_DEBUG(<< "test2 = " << test2); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(bits1), core::CContainerPrinter::print(test2.toBitVector())); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(bits1), + core::CContainerPrinter::print(test2.toBitVector())); } TBoolVec bits2(1024, true); @@ -207,7 +222,8 @@ void CPackedBitVectorTest::testContract() { bits2.erase(bits2.begin()); test3.contract(); LOG_DEBUG(<< "test3 = " << test3); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(bits2), core::CContainerPrinter::print(test3.toBitVector())); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(bits2), + core::CContainerPrinter::print(test3.toBitVector())); } } @@ -244,9 +260,11 @@ void CPackedBitVectorTest::testInner() { maths::CPackedBitVector test1(10, true); maths::CPackedBitVector test2(10, false); - bool bits1[] = {true, true, false, false, true, false, false, false, true, true}; + bool bits1[] = {true, true, false, false, true, + false, false, false, true, true}; maths::CPackedBitVector test3(TBoolVec(boost::begin(bits1), boost::end(bits1))); - bool bits2[] = {false, false, true, false, true, false, false, false, false, false}; + bool bits2[] = {false, false, true, false, true, + false, false, false, false, false}; maths::CPackedBitVector test4(TBoolVec(boost::begin(bits2), boost::end(bits2))); CPPUNIT_ASSERT_EQUAL(10.0, test1.inner(test1)); @@ -292,7 +310,8 @@ void CPackedBitVectorTest::testInner() { for (std::size_t i = 0u; i < test7.size(); ++i) { LOG_DEBUG(<< "Testing " << test7[i]); for (std::size_t j = 0u; j < test7.size(); ++j) { - CPPUNIT_ASSERT_EQUAL(comparison[i].inner(comparison[j]), test7[i].inner(test7[j])); + CPPUNIT_ASSERT_EQUAL(comparison[i].inner(comparison[j]), + test7[i].inner(test7[j])); } } } @@ -329,7 +348,8 @@ void CPackedBitVectorTest::testBitwiseOr() { if (j % 10 == 0) { LOG_DEBUG(<< "or = " << expected); } - CPPUNIT_ASSERT_EQUAL(expected, test[i].inner(test[j], maths::CPackedBitVector::E_OR)); + CPPUNIT_ASSERT_EQUAL( + expected, test[i].inner(test[j], maths::CPackedBitVector::E_OR)); } { double expected = 0.0; @@ -340,7 +360,8 @@ void CPackedBitVectorTest::testBitwiseOr() { if (j % 10 == 0) { LOG_DEBUG(<< "xor = " << expected); } - CPPUNIT_ASSERT_EQUAL(expected, test[i].inner(test[j], maths::CPackedBitVector::E_XOR)); + CPPUNIT_ASSERT_EQUAL( + expected, test[i].inner(test[j], maths::CPackedBitVector::E_XOR)); } } } @@ -351,10 +372,12 @@ void CPackedBitVectorTest::testPersist() { LOG_DEBUG(<< "| CPackedBitVectorTest::testPersist |"); LOG_DEBUG(<< "+-------------------------------------+"); - bool bits[] = {true, true, false, false, true, false, false, false, true, true}; + bool bits[] = {true, true, false, false, true, + false, false, false, true, true}; for (std::size_t t = 0u; t < boost::size(bits); ++t) { - maths::CPackedBitVector origVector(TBoolVec(boost::begin(bits), boost::begin(bits) + t)); + maths::CPackedBitVector origVector( + TBoolVec(boost::begin(bits), boost::begin(bits) + t)); std::string origXml = origVector.toDelimited(); LOG_DEBUG(<< "xml = " << origXml); @@ -369,20 +392,20 @@ void CPackedBitVectorTest::testPersist() { CppUnit::Test* CPackedBitVectorTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CPackedBitVectorTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CPackedBitVectorTest::testCreation", &CPackedBitVectorTest::testCreation)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CPackedBitVectorTest::testExtend", &CPackedBitVectorTest::testExtend)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CPackedBitVectorTest::testContract", &CPackedBitVectorTest::testContract)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CPackedBitVectorTest::testOperators", &CPackedBitVectorTest::testOperators)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CPackedBitVectorTest::testInner", &CPackedBitVectorTest::testInner)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CPackedBitVectorTest::testBitwiseOr", &CPackedBitVectorTest::testBitwiseOr)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CPackedBitVectorTest::testPersist", &CPackedBitVectorTest::testPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CPackedBitVectorTest::testCreation", &CPackedBitVectorTest::testCreation)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CPackedBitVectorTest::testExtend", &CPackedBitVectorTest::testExtend)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CPackedBitVectorTest::testContract", &CPackedBitVectorTest::testContract)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CPackedBitVectorTest::testOperators", &CPackedBitVectorTest::testOperators)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CPackedBitVectorTest::testInner", &CPackedBitVectorTest::testInner)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CPackedBitVectorTest::testBitwiseOr", &CPackedBitVectorTest::testBitwiseOr)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CPackedBitVectorTest::testPersist", &CPackedBitVectorTest::testPersist)); return suiteOfTests; } diff --git a/lib/maths/unittest/CPeriodicityHypothesisTestsTest.cc b/lib/maths/unittest/CPeriodicityHypothesisTestsTest.cc index f871d36a66..fe0f60a0b3 100644 --- a/lib/maths/unittest/CPeriodicityHypothesisTestsTest.cc +++ b/lib/maths/unittest/CPeriodicityHypothesisTestsTest.cc @@ -82,10 +82,12 @@ void CPeriodicityHypothesisTestsTest::testNonPeriodic() { rng.generateUniformSamples(3, 20, 1, repeats); maths::CPeriodicityHypothesisTests hypotheses; - hypotheses.initialize(bucketLength, window, window / static_cast(repeats[0])); + hypotheses.initialize(bucketLength, window, + window / static_cast(repeats[0])); for (core_t::TTime time = 10000; time < 10000 + window; time += bucketLength) { - hypotheses.add(time, generators[index[0]](time) + noise[(time - 10000) / bucketLength]); + hypotheses.add(time, generators[index[0]](time) + + noise[(time - 10000) / bucketLength]); } maths::CPeriodicityHypothesisTestsResult result{hypotheses.test()}; @@ -116,9 +118,7 @@ void CPeriodicityHypothesisTestsTest::testDiurnal() { TTimeVec bucketLengths{TEN_MINS, HALF_HOUR}; TSizeVec permittedGenerators{2, 4, 4, 5}; TGeneratorVec generators{smoothDaily, spikeyDaily, smoothWeekly, weekends, spikeyWeekly}; - TStrVec expected{"{ 'daily' }", - "{ 'daily' }", - "{ 'weekly' }", + TStrVec expected{"{ 'daily' }", "{ 'daily' }", "{ 'weekly' }", "{ 'weekend daily' 'weekday daily' 'weekend weekly' 'weekday weekly' }", "{ 'daily' 'weekly' }"}; @@ -154,15 +154,18 @@ void CPeriodicityHypothesisTestsTest::testDiurnal() { rng.generateUniformSamples(3, 20, 1, repeats); maths::CPeriodicityHypothesisTests hypotheses; - hypotheses.initialize(bucketLength, window, window / static_cast(repeats[0])); + hypotheses.initialize(bucketLength, window, + window / static_cast(repeats[0])); for (core_t::TTime time = 10000; time < 10000 + window; time += bucketLength) { - hypotheses.add(time, 20.0 * generators[index[0]](time) + noise[(time - 10000) / bucketLength]); + hypotheses.add(time, 20.0 * generators[index[0]](time) + + noise[(time - 10000) / bucketLength]); } maths::CPeriodicityHypothesisTestsResult result{hypotheses.test()}; if (result.print() != expected[index[0]]) { - LOG_DEBUG(<< "result = " << result.print() << " expected " << expected[index[0]]); + LOG_DEBUG(<< "result = " << result.print() + << " expected " << expected[index[0]]); } TP += result.print() == expected[index[0]] ? 1.0 : 0.0; FN += result.print() == expected[index[0]] ? 0.0 : 1.0; @@ -181,10 +184,14 @@ void CPeriodicityHypothesisTestsTest::testDiurnal() { core_t::TTime startTime; core_t::TTime endTime; CPPUNIT_ASSERT(test::CTimeSeriesTestData::parse( - "testfiles/spikey_data.csv", timeseries, startTime, endTime, test::CTimeSeriesTestData::CSV_UNIX_REGEX)); + "testfiles/spikey_data.csv", timeseries, startTime, endTime, + test::CTimeSeriesTestData::CSV_UNIX_REGEX)); CPPUNIT_ASSERT(!timeseries.empty()); - LOG_DEBUG(<< "timeseries = " << core::CContainerPrinter::print(timeseries.begin(), timeseries.begin() + 10) << " ..."); + LOG_DEBUG(<< "timeseries = " + << core::CContainerPrinter::print(timeseries.begin(), + timeseries.begin() + 10) + << " ..."); TTimeVec lastTests{timeseries[0].first, timeseries[0].first}; TTimeVec windows{4 * DAY, 14 * DAY}; @@ -197,7 +204,8 @@ void CPeriodicityHypothesisTestsTest::testDiurnal() { core_t::TTime time{timeseries[i].first}; for (std::size_t j = 0u; j < 2; ++j) { if (time > lastTests[j] + windows[j]) { - maths::CPeriodicityHypothesisTestsResult result{hypotheses[j].test()}; + maths::CPeriodicityHypothesisTestsResult result{ + hypotheses[j].test()}; CPPUNIT_ASSERT_EQUAL(std::string("{ 'daily' }"), result.print()); hypotheses[j] = maths::CPeriodicityHypothesisTests(); hypotheses[j].initialize(HOUR, windows[j], DAY); @@ -215,10 +223,14 @@ void CPeriodicityHypothesisTestsTest::testDiurnal() { core_t::TTime startTime; core_t::TTime endTime; CPPUNIT_ASSERT(test::CTimeSeriesTestData::parse( - "testfiles/diurnal.csv", timeseries, startTime, endTime, test::CTimeSeriesTestData::CSV_UNIX_REGEX)); + "testfiles/diurnal.csv", timeseries, startTime, endTime, + test::CTimeSeriesTestData::CSV_UNIX_REGEX)); CPPUNIT_ASSERT(!timeseries.empty()); - LOG_DEBUG(<< "timeseries = " << core::CContainerPrinter::print(timeseries.begin(), timeseries.begin() + 10) << " ..."); + LOG_DEBUG(<< "timeseries = " + << core::CContainerPrinter::print(timeseries.begin(), + timeseries.begin() + 10) + << " ..."); core_t::TTime lastTest{timeseries[0].first}; core_t::TTime window{14 * DAY}; @@ -230,7 +242,8 @@ void CPeriodicityHypothesisTestsTest::testDiurnal() { core_t::TTime time{timeseries[i].first}; if (time > lastTest + window) { maths::CPeriodicityHypothesisTestsResult result{hypotheses.test()}; - CPPUNIT_ASSERT_EQUAL(std::string("{ 'weekend daily' 'weekday daily' }"), result.print()); + CPPUNIT_ASSERT_EQUAL(std::string("{ 'weekend daily' 'weekday daily' }"), + result.print()); hypotheses = maths::CPeriodicityHypothesisTests(); hypotheses.initialize(HOUR, window, DAY); lastTest += window; @@ -245,15 +258,16 @@ void CPeriodicityHypothesisTestsTest::testDiurnal() { TTimeDoublePrVec timeseries; core_t::TTime startTime; core_t::TTime endTime; - CPPUNIT_ASSERT(test::CTimeSeriesTestData::parse("testfiles/no_periods.csv", - timeseries, - startTime, - endTime, - test::CTimeSeriesTestData::CSV_ISO8601_REGEX, - test::CTimeSeriesTestData::CSV_ISO8601_DATE_FORMAT)); + CPPUNIT_ASSERT(test::CTimeSeriesTestData::parse( + "testfiles/no_periods.csv", timeseries, startTime, endTime, + test::CTimeSeriesTestData::CSV_ISO8601_REGEX, + test::CTimeSeriesTestData::CSV_ISO8601_DATE_FORMAT)); CPPUNIT_ASSERT(!timeseries.empty()); - LOG_DEBUG(<< "timeseries = " << core::CContainerPrinter::print(timeseries.begin(), timeseries.begin() + 10) << " ..."); + LOG_DEBUG(<< "timeseries = " + << core::CContainerPrinter::print(timeseries.begin(), + timeseries.begin() + 10) + << " ..."); core_t::TTime lastTest{timeseries[0].first}; core_t::TTime window{14 * DAY}; @@ -280,15 +294,16 @@ void CPeriodicityHypothesisTestsTest::testDiurnal() { TTimeDoublePrVec timeseries; core_t::TTime startTime; core_t::TTime endTime; - CPPUNIT_ASSERT(test::CTimeSeriesTestData::parse("testfiles/thirty_minute_samples.csv", - timeseries, - startTime, - endTime, - test::CTimeSeriesTestData::CSV_ISO8601_REGEX, - test::CTimeSeriesTestData::CSV_ISO8601_DATE_FORMAT)); + CPPUNIT_ASSERT(test::CTimeSeriesTestData::parse( + "testfiles/thirty_minute_samples.csv", timeseries, startTime, + endTime, test::CTimeSeriesTestData::CSV_ISO8601_REGEX, + test::CTimeSeriesTestData::CSV_ISO8601_DATE_FORMAT)); CPPUNIT_ASSERT(!timeseries.empty()); - LOG_DEBUG(<< "timeseries = " << core::CContainerPrinter::print(timeseries.begin(), timeseries.begin() + 10) << " ..."); + LOG_DEBUG(<< "timeseries = " + << core::CContainerPrinter::print(timeseries.begin(), + timeseries.begin() + 10) + << " ..."); core_t::TTime lastTest{timeseries[0].first}; core_t::TTime window{14 * DAY}; @@ -345,15 +360,16 @@ void CPeriodicityHypothesisTestsTest::testNonDiurnal() { for (std::size_t j = 0u; j < bucketLengths.size(); ++j) { core_t::TTime bucketLength{bucketLengths[j]}; - core_t::TTime period{ - maths::CIntegerTools::floor(static_cast(static_cast(DAY) / scaling), bucketLength)}; + core_t::TTime period{maths::CIntegerTools::floor( + static_cast(static_cast(DAY) / scaling), bucketLength)}; scaling = static_cast(DAY) / static_cast(period); if (scaling == 1.0 || window < 3 * period) { continue; } maths::CPeriodicityHypothesisTestsResult expected; - expected.add(core::CStringUtils::typeToString(period), false, 0, period, {0, period}); + expected.add(core::CStringUtils::typeToString(period), false, 0, + period, {0, period}); switch (test % 3) { case 0: @@ -373,12 +389,14 @@ void CPeriodicityHypothesisTestsTest::testNonDiurnal() { hypotheses.initialize(bucketLength, window, period); for (core_t::TTime time = 10000; time < 10000 + window; time += bucketLength) { - hypotheses.add(time, 20.0 * scale(scaling, time, generators[index[0]]) + noise[(time - 10000) / bucketLength]); + hypotheses.add(time, 20.0 * scale(scaling, time, generators[index[0]]) + + noise[(time - 10000) / bucketLength]); } maths::CPeriodicityHypothesisTestsResult result{hypotheses.test()}; if (result.print() != expected.print()) { - LOG_DEBUG(<< "result = " << result.print() << " expected " << expected.print()); + LOG_DEBUG(<< "result = " << result.print() << " expected " + << expected.print()); } TP += result.print() == expected.print() ? 1.0 : 0.0; FN += result.print() == expected.print() ? 0.0 : 1.0; @@ -403,9 +421,12 @@ void CPeriodicityHypothesisTestsTest::testWithSparseData() { core_t::TTime time = 0; for (std::size_t t = 0u; t < 7; ++t) { - for (auto value : {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 20.0, 18.0, 10.0, 4.0, - 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, 10.0, 10.0, 8.0, 4.0, 3.0, 1.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0}) { + for (auto value : + {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, + 6.0, 8.0, 9.0, 9.0, 10.0, 10.0, 8.0, 4.0, 3.0, 1.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0}) { if (value > 0.0) { hypotheses.add(time, value); } @@ -425,9 +446,12 @@ void CPeriodicityHypothesisTestsTest::testWithSparseData() { core_t::TTime time = 0; for (std::size_t t = 0u; t < 7; ++t) { - for (auto value : {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 20.0, 18.0, 10.0, 4.0, - 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, 10.0, 10.0, 8.0, 4.0, 3.0, 1.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0}) { + for (auto value : + {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, + 6.0, 8.0, 9.0, 9.0, 10.0, 10.0, 8.0, 4.0, 3.0, 1.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0}) { if (value > 0.0) { TDoubleVec rand; rng.generateUniformSamples(-1.0, 1.0, 1, rand); @@ -449,14 +473,23 @@ void CPeriodicityHypothesisTestsTest::testWithSparseData() { core_t::TTime time = 0; for (std::size_t t = 0u; t < 4; ++t) { for (auto value : - {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 10.0, 10.0, 8.0, 4.0, 3.0, 1.0, 1.0, 3.0, 0.0, - 0.0, 0.0, 0.0, 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 20.0, 18.0, 10.0, - 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, - 8.0, 9.0, 9.0, 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, 10.0, 10.0, 8.0, 4.0, 3.0, 1.0, - 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, 10.0, 10.0, 8.0, - 4.0, 3.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}) { + {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 10.0, 10.0, 8.0, 4.0, 3.0, 1.0, 1.0, 3.0, + 0.0, 0.0, 0.0, 0.0, 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, + 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, + 9.0, 9.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 20.0, 18.0, 10.0, 4.0, + 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, 20.0, 18.0, + 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, + 10.0, 10.0, 8.0, 4.0, 3.0, 1.0, 1.0, 3.0, 0.0, 0.0, + 0.0, 0.0, 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, + 6.0, 8.0, 9.0, 9.0, 10.0, 10.0, 8.0, 4.0, 3.0, 1.0, + 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}) { if (value > 0.0) { hypotheses.add(time, value); } @@ -478,14 +511,23 @@ void CPeriodicityHypothesisTestsTest::testWithSparseData() { core_t::TTime time = 0; for (std::size_t t = 0u; t < 4; ++t) { for (auto value : - {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 10.0, 10.0, 8.0, 4.0, 3.0, 1.0, 1.0, 3.0, 0.0, - 0.0, 0.0, 0.0, 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 20.0, 18.0, 10.0, - 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, - 8.0, 9.0, 9.0, 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, 10.0, 10.0, 8.0, 4.0, 3.0, 1.0, - 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, 10.0, 10.0, 8.0, - 4.0, 3.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}) { + {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 10.0, 10.0, 8.0, 4.0, 3.0, 1.0, 1.0, 3.0, + 0.0, 0.0, 0.0, 0.0, 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, + 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, + 9.0, 9.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 20.0, 18.0, 10.0, 4.0, + 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, 20.0, 18.0, + 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, + 10.0, 10.0, 8.0, 4.0, 3.0, 1.0, 1.0, 3.0, 0.0, 0.0, + 0.0, 0.0, 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, + 6.0, 8.0, 9.0, 9.0, 10.0, 10.0, 8.0, 4.0, 3.0, 1.0, + 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}) { if (value > 0.0) { TDoubleVec rand; rng.generateUniformSamples(-1.0, 1.0, 1, rand); @@ -537,15 +579,16 @@ void CPeriodicityHypothesisTestsTest::testTestForPeriods() { for (std::size_t j = 0u; j < bucketLengths.size(); ++j) { core_t::TTime bucketLength{bucketLengths[j]}; - core_t::TTime period{ - maths::CIntegerTools::floor(static_cast(static_cast(DAY) / scaling), bucketLength)}; + core_t::TTime period{maths::CIntegerTools::floor( + static_cast(static_cast(DAY) / scaling), bucketLength)}; scaling = static_cast(DAY) / static_cast(period); if (scaling == 1.0 || window < 3 * period) { continue; } maths::CPeriodicityHypothesisTestsResult expected; - expected.add(core::CStringUtils::typeToString(period), false, 0, period, {0, period}); + expected.add(core::CStringUtils::typeToString(period), false, 0, + period, {0, period}); switch (test % 3) { case 0: @@ -565,23 +608,28 @@ void CPeriodicityHypothesisTestsTest::testTestForPeriods() { hypotheses.initialize(bucketLength, window, period); maths::TFloatMeanAccumulatorVec values(window / bucketLength); - for (core_t::TTime time = startTime; time < startTime + window; time += bucketLength) { + for (core_t::TTime time = startTime; time < startTime + window; + time += bucketLength) { std::size_t bucket((time - startTime) / bucketLength); - double value{20.0 * scale(scaling, time, generators[index[0]]) + noise[bucket]}; + double value{20.0 * scale(scaling, time, generators[index[0]]) + + noise[bucket]}; values[bucket].add(value); } maths::CPeriodicityHypothesisTestsConfig config; - maths::CPeriodicityHypothesisTestsResult result{maths::testForPeriods(config, startTime, bucketLength, values)}; + maths::CPeriodicityHypothesisTestsResult result{ + maths::testForPeriods(config, startTime, bucketLength, values)}; if (result.print() != expected.print()) { - LOG_DEBUG(<< "result = " << result.print() << " expected " << expected.print()); + LOG_DEBUG(<< "result = " << result.print() << " expected " + << expected.print()); } TP[0] += result.print() == expected.print() ? 1.0 : 0.0; FN[0] += result.print() == expected.print() ? 0.0 : 1.0; if (result.components().size() == 1) { core_t::TTime modp{result.components()[0].s_Period % period}; - double error{static_cast(std::min(modp, std::abs(period - modp))) / static_cast(period)}; + double error{static_cast(std::min(modp, std::abs(period - modp))) / + static_cast(period)}; TP[1] += error < 0.01 ? 1.0 : 0.0; FN[1] += error < 0.01 ? 0.0 : 1.0; TP[2] += error < 0.05 ? 1.0 : 0.0; @@ -606,16 +654,21 @@ void CPeriodicityHypothesisTestsTest::testTestForPeriods() { CppUnit::Test* CPeriodicityHypothesisTestsTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CPeriodicityHypothesisTestsTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CPeriodicityHypothesisTestsTest::testNonPeriodic", - &CPeriodicityHypothesisTestsTest::testNonPeriodic)); - suiteOfTests->addTest(new CppUnit::TestCaller("CPeriodicityHypothesisTestsTest::testDiurnal", - &CPeriodicityHypothesisTestsTest::testDiurnal)); - suiteOfTests->addTest(new CppUnit::TestCaller("CPeriodicityHypothesisTestsTest::testNonDiurnal", - &CPeriodicityHypothesisTestsTest::testNonDiurnal)); - suiteOfTests->addTest(new CppUnit::TestCaller("CPeriodicityHypothesisTestsTest::testWithSparseData", - &CPeriodicityHypothesisTestsTest::testWithSparseData)); - suiteOfTests->addTest(new CppUnit::TestCaller("CPeriodicityHypothesisTestsTest::testTestForPeriods", - &CPeriodicityHypothesisTestsTest::testTestForPeriods)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CPeriodicityHypothesisTestsTest::testNonPeriodic", + &CPeriodicityHypothesisTestsTest::testNonPeriodic)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CPeriodicityHypothesisTestsTest::testDiurnal", + &CPeriodicityHypothesisTestsTest::testDiurnal)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CPeriodicityHypothesisTestsTest::testNonDiurnal", + &CPeriodicityHypothesisTestsTest::testNonDiurnal)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CPeriodicityHypothesisTestsTest::testWithSparseData", + &CPeriodicityHypothesisTestsTest::testWithSparseData)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CPeriodicityHypothesisTestsTest::testTestForPeriods", + &CPeriodicityHypothesisTestsTest::testTestForPeriods)); return suiteOfTests; } diff --git a/lib/maths/unittest/CPoissonMeanConjugateTest.cc b/lib/maths/unittest/CPoissonMeanConjugateTest.cc index 01fab355bb..e16f8f2a34 100644 --- a/lib/maths/unittest/CPoissonMeanConjugateTest.cc +++ b/lib/maths/unittest/CPoissonMeanConjugateTest.cc @@ -85,9 +85,11 @@ void CPoissonMeanConjugateTest::testMultipleUpdate() { maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); for (std::size_t j = 0u; j < samples.size(); ++j) { - filter1.addSamples(weightStyle, TDouble1Vec(1, samples[j]), TDouble4Vec1Vec(1, TDouble4Vec(1, 2.0))); + filter1.addSamples(weightStyle, TDouble1Vec(1, samples[j]), + TDouble4Vec1Vec(1, TDouble4Vec(1, 2.0))); } - filter2.addSamples(weightStyle, samples, TDouble4Vec1Vec(samples.size(), TDouble4Vec(1, 2.0))); + filter2.addSamples(weightStyle, samples, + TDouble4Vec1Vec(samples.size(), TDouble4Vec(1, 2.0))); LOG_DEBUG(<< filter1.print()); LOG_DEBUG(<< "vs"); @@ -107,8 +109,8 @@ void CPoissonMeanConjugateTest::testMultipleUpdate() { for (std::size_t j = 0u; j < count; ++j) { filter1.addSamples(TDouble1Vec(1, x)); } - filter2.addSamples( - maths::CConstantWeights::COUNT, TDouble1Vec(1, x), TDouble4Vec1Vec(1, TDouble4Vec(1, static_cast(count)))); + filter2.addSamples(maths::CConstantWeights::COUNT, TDouble1Vec(1, x), + TDouble4Vec1Vec(1, TDouble4Vec(1, static_cast(count)))); LOG_DEBUG(<< filter1.print()); LOG_DEBUG(<< "vs"); @@ -163,7 +165,8 @@ void CPoissonMeanConjugateTest::testMeanEstimation() { const double decayRates[] = {0.0, 0.001, 0.01}; const unsigned int nTests = 500u; - const double testIntervals[] = {50.0, 60.0, 70.0, 80.0, 85.0, 90.0, 95.0, 99.0}; + const double testIntervals[] = {50.0, 60.0, 70.0, 80.0, + 85.0, 90.0, 95.0, 99.0}; for (std::size_t i = 0; i < boost::size(decayRates); ++i) { test::CRandomNumbers rng; @@ -175,7 +178,8 @@ void CPoissonMeanConjugateTest::testMeanEstimation() { TUIntVec samples; rng.generatePoissonSamples(rate, 500, samples); - CPoissonMeanConjugate filter(CPoissonMeanConjugate::nonInformativePrior(0.0, decayRates[i])); + CPoissonMeanConjugate filter( + CPoissonMeanConjugate::nonInformativePrior(0.0, decayRates[i])); for (std::size_t j = 0u; j < samples.size(); ++j) { filter.addSamples(TDouble1Vec(1, static_cast(samples[j]))); @@ -183,7 +187,8 @@ void CPoissonMeanConjugateTest::testMeanEstimation() { } for (std::size_t j = 0; j < boost::size(testIntervals); ++j) { - TDoubleDoublePr confidenceInterval = filter.meanConfidenceInterval(testIntervals[j]); + TDoubleDoublePr confidenceInterval = + filter.meanConfidenceInterval(testIntervals[j]); if (rate < confidenceInterval.first || rate > confidenceInterval.second) { errors[j] += 1.0; @@ -194,7 +199,8 @@ void CPoissonMeanConjugateTest::testMeanEstimation() { for (std::size_t j = 0; j < boost::size(testIntervals); ++j) { double interval = 100.0 * errors[j] / static_cast(nTests); - LOG_DEBUG(<< "interval = " << interval << ", expectedInterval = " << (100.0 - testIntervals[j])); + LOG_DEBUG(<< "interval = " << interval + << ", expectedInterval = " << (100.0 - testIntervals[j])); // If the decay rate is zero the intervals should be accurate. // Otherwise, they should be an upper bound. @@ -226,7 +232,8 @@ void CPoissonMeanConjugateTest::testMarginalLikelihood() { const double decayRates[] = {0.0, 0.001, 0.01}; for (std::size_t i = 0u; i < boost::size(decayRates); ++i) { - CPoissonMeanConjugate filter(CPoissonMeanConjugate::nonInformativePrior(0.0, decayRates[i])); + CPoissonMeanConjugate filter( + CPoissonMeanConjugate::nonInformativePrior(0.0, decayRates[i])); for (std::size_t j = 0u; j < samples.size(); ++j) { filter.addSamples(TDouble1Vec(1, static_cast(samples[j]))); @@ -237,7 +244,8 @@ void CPoissonMeanConjugateTest::testMarginalLikelihood() { for (unsigned int x = 0; x < 20; ++x) { double logLikelihood = 0.0; TDouble1Vec sample(1, static_cast(x)); - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter.jointLogMarginalLikelihood(sample, logLikelihood)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, + filter.jointLogMarginalLikelihood(sample, logLikelihood)); cdf += std::exp(logLikelihood); double lb, ub; @@ -245,7 +253,8 @@ void CPoissonMeanConjugateTest::testMarginalLikelihood() { CPPUNIT_ASSERT_EQUAL(lb, ub); double minusLogCdf = (lb + ub) / 2.0; - LOG_DEBUG(<< "sample = " << x << ", -log(cdf) = " << (-std::log(cdf)) << ", minusLogCdf = " << minusLogCdf); + LOG_DEBUG(<< "sample = " << x << ", -log(cdf) = " << (-std::log(cdf)) + << ", minusLogCdf = " << minusLogCdf); CPPUNIT_ASSERT_DOUBLES_EQUAL(minusLogCdf, -std::log(cdf), epsilon); CPPUNIT_ASSERT(minusLogCdf >= 0.0); @@ -264,14 +273,17 @@ void CPoissonMeanConjugateTest::testMarginalLikelihood() { const double sampleStds[] = {-2.0, -1.0, 0.0, 1.0, 2.0}; for (std::size_t i = 0; i < boost::size(shapes); ++i) { - CPoissonMeanConjugate filter(maths::CPoissonMeanConjugate(0.0, shapes[i], rates[i])); + CPoissonMeanConjugate filter( + maths::CPoissonMeanConjugate(0.0, shapes[i], rates[i])); for (std::size_t j = 0; j < boost::size(sampleStds); ++j) { double mean = filter.marginalLikelihoodMean(); - unsigned int sample = static_cast(mean + sampleStds[j] * std::sqrt(mean)); + unsigned int sample = + static_cast(mean + sampleStds[j] * std::sqrt(mean)); double lb = 0.0, ub = 0.0; - CPPUNIT_ASSERT(filter.minusLogJointCdf(TDouble1Vec(1, static_cast(sample)), lb, ub)); + CPPUNIT_ASSERT(filter.minusLogJointCdf( + TDouble1Vec(1, static_cast(sample)), lb, ub)); CPPUNIT_ASSERT_EQUAL(lb, ub); double minusLogCdf = (lb + ub) / 2.0; CPPUNIT_ASSERT(minusLogCdf >= 0.0); @@ -279,17 +291,21 @@ void CPoissonMeanConjugateTest::testMarginalLikelihood() { double cdf = 0.0; for (unsigned int x = 0; x <= sample; ++x) { double logLikelihood = 0.0; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, - filter.jointLogMarginalLikelihood(TDouble1Vec(1, static_cast(x)), logLikelihood)); + CPPUNIT_ASSERT_EQUAL( + maths_t::E_FpNoErrors, + filter.jointLogMarginalLikelihood( + TDouble1Vec(1, static_cast(x)), logLikelihood)); cdf += std::exp(logLikelihood); cdf = std::min(cdf, 1.0); } - LOG_DEBUG(<< "-log(cdf) = " << -std::log(cdf) << ", minusLogCdf = " << minusLogCdf); + LOG_DEBUG(<< "-log(cdf) = " << -std::log(cdf) + << ", minusLogCdf = " << minusLogCdf); // We'll tolerate a 5% error in the -log(c.d.f.) since // we're approximating for large mean. - CPPUNIT_ASSERT_DOUBLES_EQUAL(minusLogCdf, -std::log(cdf), -0.05 * std::log(cdf)); + CPPUNIT_ASSERT_DOUBLES_EQUAL(minusLogCdf, -std::log(cdf), + -0.05 * std::log(cdf)); } } } @@ -323,13 +339,15 @@ void CPoissonMeanConjugateTest::testMarginalLikelihood() { TDouble1Vec sample(1, static_cast(samples[i])); filter.addSamples(sample); double logLikelihood = 0.0; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, filter.jointLogMarginalLikelihood(sample, logLikelihood)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, + filter.jointLogMarginalLikelihood(sample, logLikelihood)); differentialEntropy -= logLikelihood; } differentialEntropy /= static_cast(samples.size()); - LOG_DEBUG(<< "differentialEntropy = " << differentialEntropy << ", expectedDifferentialEntropy = " << expectedDifferentialEntropy); + LOG_DEBUG(<< "differentialEntropy = " << differentialEntropy + << ", expectedDifferentialEntropy = " << expectedDifferentialEntropy); CPPUNIT_ASSERT(std::fabs(differentialEntropy - expectedDifferentialEntropy) < 0.01); } @@ -344,7 +362,9 @@ void CPoissonMeanConjugateTest::testMarginalLikelihoodMode() { // with variances variance scales. const double rates[] = {0.1, 5.0, 100.0}; - const double varianceScales[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.2, 1.5, 2.0, 2.5, 3.0, 4.0, 5.0}; + const double varianceScales[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, + 0.7, 0.8, 0.9, 1.0, 1.2, 1.5, + 2.0, 2.5, 3.0, 4.0, 5.0}; test::CRandomNumbers rng; @@ -367,9 +387,11 @@ void CPoissonMeanConjugateTest::testMarginalLikelihoodMode() { double vs = varianceScales[j]; weight[0] = vs; double expectedMode = boost::math::mode(poisson); - LOG_DEBUG(<< "marginalLikelihoodMode = " << filter.marginalLikelihoodMode(weightStyle, weight) + LOG_DEBUG(<< "marginalLikelihoodMode = " + << filter.marginalLikelihoodMode(weightStyle, weight) << ", expectedMode = " << expectedMode); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMode, filter.marginalLikelihoodMode(weightStyle, weight), 1.0); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + expectedMode, filter.marginalLikelihoodMode(weightStyle, weight), 1.0); } } } @@ -407,9 +429,12 @@ void CPoissonMeanConjugateTest::testMarginalLikelihoodVariance() { } // The error is at the precision of the numerical integration. - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedVariance, filter.marginalLikelihoodVariance(), 0.3 * expectedVariance); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedVariance, + filter.marginalLikelihoodVariance(), + 0.3 * expectedVariance); - relativeError.add(std::fabs(expectedVariance - filter.marginalLikelihoodVariance()) / expectedVariance); + relativeError.add(std::fabs(expectedVariance - filter.marginalLikelihoodVariance()) / + expectedVariance); } LOG_DEBUG(<< "relativeError = " << maths::CBasicStatistics::mean(relativeError)); @@ -458,16 +483,18 @@ void CPoissonMeanConjugateTest::testSampleMarginalLikelihood() { TMeanVarAccumulator sampledMomemts; sampledMomemts = std::for_each(sampled.begin(), sampled.end(), sampledMomemts); - LOG_DEBUG(<< "expectedMean = " << filter.marginalLikelihoodMean() - << ", sampledMean = " << maths::CBasicStatistics::mean(sampledMomemts)); - LOG_DEBUG(<< "expectedMean = " << filter.marginalLikelihoodVariance() - << ", sampledVariance = " << maths::CBasicStatistics::variance(sampledMomemts)); + LOG_DEBUG(<< "expectedMean = " << filter.marginalLikelihoodMean() << ", sampledMean = " + << maths::CBasicStatistics::mean(sampledMomemts)); + LOG_DEBUG(<< "expectedMean = " << filter.marginalLikelihoodVariance() << ", sampledVariance = " + << maths::CBasicStatistics::variance(sampledMomemts)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(filter.marginalLikelihoodMean(), maths::CBasicStatistics::mean(sampledMomemts), 1e-8); + CPPUNIT_ASSERT_DOUBLES_EQUAL(filter.marginalLikelihoodMean(), + maths::CBasicStatistics::mean(sampledMomemts), 1e-8); CPPUNIT_ASSERT_DOUBLES_EQUAL(filter.marginalLikelihoodVariance(), maths::CBasicStatistics::variance(sampledMomemts), 0.15 * filter.marginalLikelihoodVariance()); - meanVarError.add(std::fabs(filter.marginalLikelihoodVariance() - maths::CBasicStatistics::variance(sampledMomemts)) / + meanVarError.add(std::fabs(filter.marginalLikelihoodVariance() - + maths::CBasicStatistics::variance(sampledMomemts)) / filter.marginalLikelihoodVariance()); std::sort(sampled.begin(), sampled.end()); @@ -477,7 +504,8 @@ void CPoissonMeanConjugateTest::testSampleMarginalLikelihood() { double expectedQuantile; CPPUNIT_ASSERT(filter.marginalLikelihoodQuantileForTest(q, eps, expectedQuantile)); - LOG_DEBUG(<< "quantile = " << q << ", x_quantile = " << expectedQuantile << ", quantile range = [" << sampled[k - 3] << "," + LOG_DEBUG(<< "quantile = " << q << ", x_quantile = " << expectedQuantile + << ", quantile range = [" << sampled[k - 3] << "," << sampled[k] << "]"); // Because the c.d.f. function for discrete R.V.s includes @@ -545,8 +573,8 @@ void CPoissonMeanConjugateTest::testCdf() { CPPUNIT_ASSERT(filter.minusLogJointCdfComplement(TDouble1Vec(1, x), lb, ub)); fComplement = (lb + ub) / 2.0; - LOG_DEBUG(<< "log(F(x)) = " << (f == 0.0 ? f : -f) - << ", log(1 - F(x)) = " << (fComplement == 0.0 ? fComplement : -fComplement)); + LOG_DEBUG(<< "log(F(x)) = " << (f == 0.0 ? f : -f) << ", log(1 - F(x)) = " + << (fComplement == 0.0 ? fComplement : -fComplement)); CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, std::exp(-f) + std::exp(-fComplement), 1e-10); } } @@ -600,7 +628,9 @@ void CPoissonMeanConjugateTest::testProbabilityOfLessLikelySamples() { double fx; filter.jointLogMarginalLikelihood(sample, fx); - double px = static_cast(std::upper_bound(likelihoods.begin(), likelihoods.end(), fx) - likelihoods.begin()) / + double px = static_cast(std::upper_bound(likelihoods.begin(), + likelihoods.end(), fx) - + likelihoods.begin()) / static_cast(likelihoods.size()); double lb, ub; @@ -608,7 +638,8 @@ void CPoissonMeanConjugateTest::testProbabilityOfLessLikelySamples() { double ssd = std::sqrt(px * (1.0 - px) / static_cast(samples.size())); - LOG_DEBUG(<< "x = " << x << ", expected P(x) = " << px << ", actual P(x) = " << (lb + ub) / 2.0 << " sample sd = " << ssd); + LOG_DEBUG(<< "x = " << x << ", expected P(x) = " << px << ", actual P(x) = " + << (lb + ub) / 2.0 << " sample sd = " << ssd); CPPUNIT_ASSERT_DOUBLES_EQUAL(px, (lb + ub) / 2.0, 8.0 * ssd); @@ -618,7 +649,8 @@ void CPoissonMeanConjugateTest::testProbabilityOfLessLikelySamples() { maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); for (std::size_t k = 0u; k < boost::size(vs); ++k) { - double mode = filter.marginalLikelihoodMode(weightStyle, TDouble4Vec(1, vs[k])); + double mode = filter.marginalLikelihoodMode(weightStyle, + TDouble4Vec(1, vs[k])); double ss[] = {0.9 * mode, 1.1 * mode}; LOG_DEBUG(<< "vs = " << vs[k] << ", mode = " << mode); @@ -628,52 +660,40 @@ void CPoissonMeanConjugateTest::testProbabilityOfLessLikelySamples() { if (mode > 0.0) { filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(1, ss[0]), TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, weightStyle, TDouble1Vec(1, ss[0]), + TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); if (mode > 0.0) { filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(ss, ss + 2), TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, weightStyle, TDouble1Vec(ss, ss + 2), + TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); - filter.probabilityOfLessLikelySamples(maths_t::E_OneSidedBelow, - weightStyle, - TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, - ub, - tail); + filter.probabilityOfLessLikelySamples( + maths_t::E_OneSidedBelow, weightStyle, TDouble1Vec(ss, ss + 2), + TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); - filter.probabilityOfLessLikelySamples(maths_t::E_OneSidedAbove, - weightStyle, - TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, - ub, - tail); + filter.probabilityOfLessLikelySamples( + maths_t::E_OneSidedAbove, weightStyle, TDouble1Vec(ss, ss + 2), + TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } } if (mode > 0.0) { filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(1, ss[1]), TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, weightStyle, TDouble1Vec(1, ss[1]), + TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(ss, ss + 2), TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, weightStyle, TDouble1Vec(ss, ss + 2), + TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); - filter.probabilityOfLessLikelySamples(maths_t::E_OneSidedBelow, - weightStyle, - TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, - ub, - tail); + filter.probabilityOfLessLikelySamples( + maths_t::E_OneSidedBelow, weightStyle, TDouble1Vec(ss, ss + 2), + TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); - filter.probabilityOfLessLikelySamples(maths_t::E_OneSidedAbove, - weightStyle, - TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), - lb, - ub, - tail); + filter.probabilityOfLessLikelySamples( + maths_t::E_OneSidedAbove, weightStyle, TDouble1Vec(ss, ss + 2), + TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } } @@ -723,7 +743,8 @@ void CPoissonMeanConjugateTest::testAnomalyScore() { rng.generatePoissonSamples(processRates[i], 500, samples); for (std::size_t j = 0; j < boost::size(decayRates); ++j) { - CPoissonMeanConjugate filter(CPoissonMeanConjugate::nonInformativePrior(0.0, decayRates[j])); + CPoissonMeanConjugate filter( + CPoissonMeanConjugate::nonInformativePrior(0.0, decayRates[j])); ++test; @@ -736,7 +757,8 @@ void CPoissonMeanConjugateTest::testAnomalyScore() { for (unsigned int time = 0; time < samples.size(); ++time) { double sample = samples[time] + - (anomalies[std::find(boost::begin(anomalyTimes), boost::end(anomalyTimes), time) - boost::begin(anomalyTimes)] * + (anomalies[std::find(boost::begin(anomalyTimes), boost::end(anomalyTimes), time) - + boost::begin(anomalyTimes)] * boost::math::standard_deviation(poisson)); TDouble1Vec sampleVec(1, sample); @@ -756,28 +778,27 @@ void CPoissonMeanConjugateTest::testAnomalyScore() { x << "];\n"; scores << "];\n"; - file << x.str() << scores.str() << "plot(x" << test << ", score" << test << ");\n" + file << x.str() << scores.str() << "plot(x" << test << ", score" + << test << ");\n" << "input(\"Hit any key for next test\");\n\n"; TUIntVec falsePositives; - std::set_difference(candidateAnomalies.begin(), - candidateAnomalies.end(), - boost::begin(anomalyTimes), - boost::end(anomalyTimes), + std::set_difference(candidateAnomalies.begin(), candidateAnomalies.end(), + boost::begin(anomalyTimes), boost::end(anomalyTimes), std::back_inserter(falsePositives)); - double falsePositiveRate = static_cast(falsePositives.size()) / static_cast(samples.size()); + double falsePositiveRate = static_cast(falsePositives.size()) / + static_cast(samples.size()); totalFalsePositiveRate += falsePositiveRate; TUIntVec positives; - std::set_intersection(candidateAnomalies.begin(), - candidateAnomalies.end(), - boost::begin(anomalyTimes), - boost::end(anomalyTimes), + std::set_intersection(candidateAnomalies.begin(), candidateAnomalies.end(), + boost::begin(anomalyTimes), boost::end(anomalyTimes), std::back_inserter(positives)); - LOG_DEBUG(<< "falsePositiveRate = " << falsePositiveRate << ", positives = " << positives.size()); + LOG_DEBUG(<< "falsePositiveRate = " << falsePositiveRate + << ", positives = " << positives.size()); // False alarm rate should be less than 0.4%. CPPUNIT_ASSERT(falsePositiveRate <= 0.02); @@ -825,8 +846,10 @@ void CPoissonMeanConjugateTest::testOffset() { for (std::size_t i = 0; i < boost::size(offsets); ++i) { for (std::size_t j = 0; j < boost::size(decayRates); ++j) { - CPoissonMeanConjugate filter1(CPoissonMeanConjugate::nonInformativePrior(offsets[i], decayRates[j])); - CPoissonMeanConjugate filter2(CPoissonMeanConjugate::nonInformativePrior(0.0, decayRates[j])); + CPoissonMeanConjugate filter1(CPoissonMeanConjugate::nonInformativePrior( + offsets[i], decayRates[j])); + CPoissonMeanConjugate filter2( + CPoissonMeanConjugate::nonInformativePrior(0.0, decayRates[j])); for (std::size_t k = 0u; k < samples.size(); ++k) { TDouble1Vec offsetSample(1, samples[k] - offsets[i]); @@ -841,14 +864,16 @@ void CPoissonMeanConjugateTest::testOffset() { double likelihood1; filter1.jointLogMarginalLikelihood(offsetSample, likelihood1); double lb1, ub1; - filter1.probabilityOfLessLikelySamples(maths_t::E_TwoSided, offsetSample, lb1, ub1); + filter1.probabilityOfLessLikelySamples(maths_t::E_TwoSided, + offsetSample, lb1, ub1); CPPUNIT_ASSERT_EQUAL(lb1, ub1); double probability1 = (lb1 + ub1) / 2.0; double likelihood2; filter2.jointLogMarginalLikelihood(sample, likelihood2); double lb2, ub2; - filter2.probabilityOfLessLikelySamples(maths_t::E_TwoSided, sample, lb2, ub2); + filter2.probabilityOfLessLikelySamples(maths_t::E_TwoSided, + sample, lb2, ub2); CPPUNIT_ASSERT_EQUAL(lb2, ub2); double probability2 = (lb2 + ub2) / 2.0; @@ -877,8 +902,9 @@ void CPoissonMeanConjugateTest::testPersist() { maths::CPoissonMeanConjugate origFilter(CPoissonMeanConjugate::nonInformativePrior()); for (std::size_t i = 0u; i < samples.size(); ++i) { - origFilter.addSamples( - maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), TDouble1Vec(1, samples[i]), TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0))); + origFilter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), + TDouble1Vec(1, samples[i]), + TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0))); } double decayRate = origFilter.decayRate(); uint64_t checksum = origFilter.checksum(); @@ -897,14 +923,13 @@ void CPoissonMeanConjugateTest::testPersist() { CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - maths::SDistributionRestoreParams params(maths_t::E_ContinuousData, - decayRate + 0.1, - maths::MINIMUM_CLUSTER_SPLIT_FRACTION, - maths::MINIMUM_CLUSTER_SPLIT_COUNT, - maths::MINIMUM_CATEGORY_COUNT); + maths::SDistributionRestoreParams params( + maths_t::E_ContinuousData, decayRate + 0.1, maths::MINIMUM_CLUSTER_SPLIT_FRACTION, + maths::MINIMUM_CLUSTER_SPLIT_COUNT, maths::MINIMUM_CATEGORY_COUNT); maths::CPoissonMeanConjugate restoredFilter(params, traverser); - LOG_DEBUG(<< "orig checksum = " << checksum << " restored checksum = " << restoredFilter.checksum()); + LOG_DEBUG(<< "orig checksum = " << checksum + << " restored checksum = " << restoredFilter.checksum()); CPPUNIT_ASSERT_EQUAL(checksum, restoredFilter.checksum()); // The XML representation of the new filter should be the same @@ -961,32 +986,41 @@ void CPoissonMeanConjugateTest::testNegativeSample() { CppUnit::Test* CPoissonMeanConjugateTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CPoissonMeanConjugateTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CPoissonMeanConjugateTest::testMultipleUpdate", - &CPoissonMeanConjugateTest::testMultipleUpdate)); - suiteOfTests->addTest(new CppUnit::TestCaller("CPoissonMeanConjugateTest::testPropagation", - &CPoissonMeanConjugateTest::testPropagation)); - suiteOfTests->addTest(new CppUnit::TestCaller("CPoissonMeanConjugateTest::testMeanEstimation", - &CPoissonMeanConjugateTest::testMeanEstimation)); - suiteOfTests->addTest(new CppUnit::TestCaller("CPoissonMeanConjugateTest::testMarginalLikelihood", - &CPoissonMeanConjugateTest::testMarginalLikelihood)); - suiteOfTests->addTest(new CppUnit::TestCaller("CPoissonMeanConjugateTest::testMarginalLikelihoodMode", - &CPoissonMeanConjugateTest::testMarginalLikelihoodMode)); - suiteOfTests->addTest(new CppUnit::TestCaller("CPoissonMeanConjugateTest::testMarginalLikelihoodVariance", - &CPoissonMeanConjugateTest::testMarginalLikelihoodVariance)); - suiteOfTests->addTest(new CppUnit::TestCaller("CPoissonMeanConjugateTest::testSampleMarginalLikelihood", - &CPoissonMeanConjugateTest::testSampleMarginalLikelihood)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CPoissonMeanConjugateTest::testCdf", &CPoissonMeanConjugateTest::testCdf)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CPoissonMeanConjugateTest::testProbabilityOfLessLikelySamples", &CPoissonMeanConjugateTest::testProbabilityOfLessLikelySamples)); - suiteOfTests->addTest(new CppUnit::TestCaller("CPoissonMeanConjugateTest::testAnomalyScore", - &CPoissonMeanConjugateTest::testAnomalyScore)); - suiteOfTests->addTest(new CppUnit::TestCaller("CPoissonMeanConjugateTest::testOffset", - &CPoissonMeanConjugateTest::testOffset)); - suiteOfTests->addTest(new CppUnit::TestCaller("CPoissonMeanConjugateTest::testPersist", - &CPoissonMeanConjugateTest::testPersist)); - suiteOfTests->addTest(new CppUnit::TestCaller("CPoissonMeanConjugateTest::testNegativeSample", - &CPoissonMeanConjugateTest::testNegativeSample)); + "CPoissonMeanConjugateTest::testMultipleUpdate", + &CPoissonMeanConjugateTest::testMultipleUpdate)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CPoissonMeanConjugateTest::testPropagation", &CPoissonMeanConjugateTest::testPropagation)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CPoissonMeanConjugateTest::testMeanEstimation", + &CPoissonMeanConjugateTest::testMeanEstimation)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CPoissonMeanConjugateTest::testMarginalLikelihood", + &CPoissonMeanConjugateTest::testMarginalLikelihood)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CPoissonMeanConjugateTest::testMarginalLikelihoodMode", + &CPoissonMeanConjugateTest::testMarginalLikelihoodMode)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CPoissonMeanConjugateTest::testMarginalLikelihoodVariance", + &CPoissonMeanConjugateTest::testMarginalLikelihoodVariance)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CPoissonMeanConjugateTest::testSampleMarginalLikelihood", + &CPoissonMeanConjugateTest::testSampleMarginalLikelihood)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CPoissonMeanConjugateTest::testCdf", &CPoissonMeanConjugateTest::testCdf)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CPoissonMeanConjugateTest::testProbabilityOfLessLikelySamples", + &CPoissonMeanConjugateTest::testProbabilityOfLessLikelySamples)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CPoissonMeanConjugateTest::testAnomalyScore", + &CPoissonMeanConjugateTest::testAnomalyScore)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CPoissonMeanConjugateTest::testOffset", &CPoissonMeanConjugateTest::testOffset)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CPoissonMeanConjugateTest::testPersist", &CPoissonMeanConjugateTest::testPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CPoissonMeanConjugateTest::testNegativeSample", + &CPoissonMeanConjugateTest::testNegativeSample)); return suiteOfTests; } diff --git a/lib/maths/unittest/CPriorTest.cc b/lib/maths/unittest/CPriorTest.cc index 48ec879426..b5014760a8 100644 --- a/lib/maths/unittest/CPriorTest.cc +++ b/lib/maths/unittest/CPriorTest.cc @@ -57,11 +57,13 @@ class CMinusLogLikelihood { public: CMinusLogLikelihood(const maths::CPrior& prior) - : m_Prior(&prior), m_WeightStyle(1, maths_t::E_SampleCountWeight), m_X(1, 0.0), m_Weight(1, TDoubleVec(1, 1.0)) {} + : m_Prior(&prior), m_WeightStyle(1, maths_t::E_SampleCountWeight), + m_X(1, 0.0), m_Weight(1, TDoubleVec(1, 1.0)) {} bool operator()(const double& x, double& result) const { m_X[0] = x; - maths_t::EFloatingPointErrorStatus status = m_Prior->jointLogMarginalLikelihood(m_WeightStyle, m_X, m_Weight, result); + maths_t::EFloatingPointErrorStatus status = + m_Prior->jointLogMarginalLikelihood(m_WeightStyle, m_X, m_Weight, result); result = -result; return !(status & maths_t::E_FpFailed); } @@ -84,7 +86,8 @@ void CPriorTest::testExpectation() { test::CRandomNumbers rng; - CNormalMeanPrecConjugate prior(maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData)); + CNormalMeanPrecConjugate prior( + maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData)); TDoubleVec samples; rng.generateNormalSamples(1.0, 1.5, 10000u, samples); @@ -98,28 +101,33 @@ void CPriorTest::testExpectation() { for (std::size_t n = 1; n < 10; ++n) { double mean; CPPUNIT_ASSERT(prior.expectation(CX(), n, mean)); - LOG_DEBUG(<< "n = " << n << ", mean = " << mean << ", error = " << std::fabs(mean - trueMean)); + LOG_DEBUG(<< "n = " << n << ", mean = " << mean + << ", error = " << std::fabs(mean - trueMean)); CPPUNIT_ASSERT_DOUBLES_EQUAL(trueMean, mean, 1e-10); } - double varianceErrors[] = {1.4, 0.1, 0.05, 0.01, 0.005, 0.0008, 0.0008, 0.0007, 0.0005}; + double varianceErrors[] = {1.4, 0.1, 0.05, 0.01, 0.005, + 0.0008, 0.0008, 0.0007, 0.0005}; double trueVariance = maths::CBasicStatistics::variance(moments); LOG_DEBUG(<< "true variance = " << trueVariance); for (std::size_t n = 1; n < 10; ++n) { double variance; CPPUNIT_ASSERT(prior.expectation(CVariance(prior.mean()), n, variance)); - LOG_DEBUG(<< "n = " << n << ", variance = " << variance << ", error = " << std::fabs(variance - trueVariance)); + LOG_DEBUG(<< "n = " << n << ", variance = " << variance + << ", error = " << std::fabs(variance - trueVariance)); CPPUNIT_ASSERT_DOUBLES_EQUAL(trueVariance, variance, varianceErrors[n - 1]); } - double entropyErrors[] = {0.5, 0.05, 0.01, 0.005, 0.001, 0.0003, 0.0003, 0.0002, 0.0002}; + double entropyErrors[] = {0.5, 0.05, 0.01, 0.005, 0.001, + 0.0003, 0.0003, 0.0002, 0.0002}; boost::math::normal_distribution<> normal(trueMean, std::sqrt(trueVariance)); double trueEntropy = maths::CTools::differentialEntropy(normal); LOG_DEBUG(<< "true differential entropy = " << trueEntropy); for (std::size_t n = 1; n < 10; ++n) { double entropy; CPPUNIT_ASSERT(prior.expectation(CMinusLogLikelihood(prior), n, entropy)); - LOG_DEBUG(<< "n = " << n << ", differential entropy = " << entropy << ", error = " << std::fabs(entropy - trueEntropy)); + LOG_DEBUG(<< "n = " << n << ", differential entropy = " << entropy + << ", error = " << std::fabs(entropy - trueEntropy)); CPPUNIT_ASSERT_DOUBLES_EQUAL(trueEntropy, entropy, entropyErrors[n - 1]); } } @@ -127,7 +135,8 @@ void CPriorTest::testExpectation() { CppUnit::Test* CPriorTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CPriorTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CPriorTest::testExpectation", &CPriorTest::testExpectation)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CPriorTest::testExpectation", &CPriorTest::testExpectation)); return suiteOfTests; } diff --git a/lib/maths/unittest/CProbabilityAggregatorsTest.cc b/lib/maths/unittest/CProbabilityAggregatorsTest.cc index af1599bb0a..934c81c2dd 100644 --- a/lib/maths/unittest/CProbabilityAggregatorsTest.cc +++ b/lib/maths/unittest/CProbabilityAggregatorsTest.cc @@ -85,7 +85,8 @@ class CExpectedLogProbabilityOfMFromNExtremeSamples { class CLogIntegrand { public: - CLogIntegrand(const TDoubleVec& limits, std::size_t n, std::size_t m, std::size_t i) : m_Limits(limits), m_N(n), m_M(m), m_I(i) {} + CLogIntegrand(const TDoubleVec& limits, std::size_t n, std::size_t m, std::size_t i) + : m_Limits(limits), m_N(n), m_M(m), m_I(i) {} bool operator()(double x, double& result) const { result = this->evaluate(x); @@ -110,7 +111,8 @@ class CExpectedLogProbabilityOfMFromNExtremeSamples { }; public: - CExpectedLogProbabilityOfMFromNExtremeSamples(std::size_t m) : m_P(m), m_N(0u) {} + CExpectedLogProbabilityOfMFromNExtremeSamples(std::size_t m) + : m_P(m), m_N(0u) {} void add(const double& probability) { m_P.add(probability); @@ -123,7 +125,8 @@ class CExpectedLogProbabilityOfMFromNExtremeSamples { TDoubleVec p(m_P.begin(), m_P.end()); CLogIntegrand f(p, m_N, p.size(), 1u); CIntegration::logGaussLegendre(f, 0, p[0], result); - result += boost::math::lgamma(static_cast(m_N) + 1.0) - boost::math::lgamma(static_cast(m_N - p.size()) + 1.0); + result += boost::math::lgamma(static_cast(m_N) + 1.0) - + boost::math::lgamma(static_cast(m_N - p.size()) + 1.0); return result; } @@ -180,10 +183,12 @@ void CProbabilityAggregatorsTest::testJointProbabilityOfLessLikelySamples() { for (size_t i = 0; i < boost::size(percentiles); ++i) { for (size_t j = 0; j < boost::size(percentiles); ++j) { for (size_t k = 0; k < boost::size(percentiles); ++k) { - LOG_DEBUG(<< "percentile1 = " << percentiles[i] << ", percentile2 = " << percentiles[j] + LOG_DEBUG(<< "percentile1 = " << percentiles[i] + << ", percentile2 = " << percentiles[j] << ", percentile3 = " << percentiles[k]); - double probabilities[] = {2.0 * percentiles[i], 2.0 * percentiles[j], 2.0 * percentiles[k]}; + double probabilities[] = {2.0 * percentiles[i], 2.0 * percentiles[j], + 2.0 * percentiles[k]}; CJointProbabilityOfLessLikelySamples jointProbability; for (size_t l = 0; l < boost::size(probabilities); ++l) { @@ -200,12 +205,15 @@ void CProbabilityAggregatorsTest::testJointProbabilityOfLessLikelySamples() { double quantile1 = boost::math::quantile(normal1, percentiles[i]); double quantile2 = boost::math::quantile(normal2, percentiles[j]); double quantile3 = boost::math::quantile(normal3, percentiles[k]); - double likelihood = - CTools::safePdf(normal1, quantile1) * CTools::safePdf(normal2, quantile2) * CTools::safePdf(normal3, quantile3); + double likelihood = CTools::safePdf(normal1, quantile1) * + CTools::safePdf(normal2, quantile2) * + CTools::safePdf(normal3, quantile3); for (unsigned int sample = 0; sample < numberSamples; ++sample) { - double sampleLikelihood = CTools::safePdf(normal1, samples1[sample]) * CTools::safePdf(normal2, samples2[sample]) * - CTools::safePdf(normal3, samples3[sample]); + double sampleLikelihood = + CTools::safePdf(normal1, samples1[sample]) * + CTools::safePdf(normal2, samples2[sample]) * + CTools::safePdf(normal3, samples3[sample]); if (sampleLikelihood < likelihood) { count += 1.0; } @@ -213,7 +221,8 @@ void CProbabilityAggregatorsTest::testJointProbabilityOfLessLikelySamples() { LOG_DEBUG(<< "count = " << count << ", expectedCount = " << expectedCount); - double error = std::fabs(count - expectedCount) / std::max(count, expectedCount); + double error = std::fabs(count - expectedCount) / + std::max(count, expectedCount); CPPUNIT_ASSERT(error < 0.2); totalExpectedCount += expectedCount; @@ -222,7 +231,8 @@ void CProbabilityAggregatorsTest::testJointProbabilityOfLessLikelySamples() { } } - double totalError = std::fabs(totalCount - totalExpectedCount) / std::max(totalCount, totalExpectedCount); + double totalError = std::fabs(totalCount - totalExpectedCount) / + std::max(totalCount, totalExpectedCount); LOG_DEBUG(<< "totalError = " << totalError); CPPUNIT_ASSERT(totalError < 0.01); } @@ -247,9 +257,11 @@ void CProbabilityAggregatorsTest::testJointProbabilityOfLessLikelySamples() { double probability; CPPUNIT_ASSERT(jointProbability.calculate(probability)); - LOG_DEBUG(<< "probability = " << probability << ", expectedProbability = " << expectedProbability); + LOG_DEBUG(<< "probability = " << probability + << ", expectedProbability = " << expectedProbability); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedProbability, probability, 1e-5 * expectedProbability); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedProbability, probability, + 1e-5 * expectedProbability); } } } @@ -284,7 +296,8 @@ void CProbabilityAggregatorsTest::testLogJointProbabilityOfLessLikelySamples() { double lowerBound, upperBound; CPPUNIT_ASSERT(logJointProbability.calculateLowerBound(lowerBound)); CPPUNIT_ASSERT(logJointProbability.calculateUpperBound(upperBound)); - LOG_DEBUG(<< "log(pu) - log(p) = " << upperBound - logP << ", log(p) - log(pl) " << logP - lowerBound); + LOG_DEBUG(<< "log(pu) - log(p) = " << upperBound - logP + << ", log(p) - log(pl) " << logP - lowerBound); CPPUNIT_ASSERT(logP < upperBound); CPPUNIT_ASSERT(logP > lowerBound); @@ -329,12 +342,14 @@ void CProbabilityAggregatorsTest::testLogJointProbabilityOfLessLikelySamples() { double lowerBound, upperBound; CPPUNIT_ASSERT(logJointProbability.calculateLowerBound(lowerBound)); CPPUNIT_ASSERT(logJointProbability.calculateUpperBound(upperBound)); - LOG_DEBUG(<< "log(pu) - log(p) = " << upperBound - logP << ", log(p) - log(pl) " << logP - lowerBound); + LOG_DEBUG(<< "log(pu) - log(p) = " << upperBound - logP + << ", log(p) - log(pl) " << logP - lowerBound); CPPUNIT_ASSERT(logP < upperBound); CPPUNIT_ASSERT(logP > lowerBound); - CPPUNIT_ASSERT_DOUBLES_EQUAL(upperBound, lowerBound, std::fabs(8e-4 * upperBound)); + CPPUNIT_ASSERT_DOUBLES_EQUAL(upperBound, lowerBound, + std::fabs(8e-4 * upperBound)); error += (upperBound - lowerBound) / std::fabs(upperBound); } else if (jointProbability.numberSamples() > 1.0) { @@ -392,8 +407,8 @@ void CProbabilityAggregatorsTest::testProbabilityOfExtremeSample() { double probability; CPPUNIT_ASSERT(probabilityCalculator.calculate(probability)); - LOG_DEBUG(<< "sample size = " << sampleSizes[i] << ", extreme sample probability = " << probabilities[j] - << ", probability = " << probability); + LOG_DEBUG(<< "sample size = " << sampleSizes[i] << ", extreme sample probability = " + << probabilities[j] << ", probability = " << probability); unsigned int nTrials = 10000u; unsigned int count = 0; @@ -416,7 +431,8 @@ void CProbabilityAggregatorsTest::testProbabilityOfExtremeSample() { } } - double expectedProbability = static_cast(count) / static_cast(nTrials); + double expectedProbability = static_cast(count) / + static_cast(nTrials); LOG_DEBUG(<< "count = " << count << ", expectedProbability = " << expectedProbability << ", error = " << std::fabs(probability - expectedProbability)); @@ -451,7 +467,8 @@ void CProbabilityAggregatorsTest::testProbabilityOfMFromNExtremeSamples() { // 10) Underflow of numerical integration. { - double probabilities[] = {0.5, 0.5, 0.4, 0.02, 0.7, 0.9, 0.4, 0.2, 0.03, 0.5, 0.6}; + double probabilities[] = {0.5, 0.5, 0.4, 0.02, 0.7, 0.9, + 0.4, 0.2, 0.03, 0.5, 0.6}; for (std::size_t i = 1u; i < 6u; ++i) { CExpectedLogProbabilityOfMFromNExtremeSamples expectedProbabilityCalculator(i); @@ -466,7 +483,8 @@ void CProbabilityAggregatorsTest::testProbabilityOfMFromNExtremeSamples() { double p2; CPPUNIT_ASSERT(probabilityCalculator.calculate(p2)); - LOG_DEBUG(<< "log(probability) = " << p2 << ", expected log(probability) = " << p1); + LOG_DEBUG(<< "log(probability) = " << p2 + << ", expected log(probability) = " << p1); CPPUNIT_ASSERT_DOUBLES_EQUAL(p1, p2, 1e-8 * std::fabs(std::max(p1, p2))); } @@ -503,7 +521,8 @@ void CProbabilityAggregatorsTest::testProbabilityOfMFromNExtremeSamples() { for (std::size_t j = 0u; j < index.size(); ++j) { extremeSampleProbabilities.push_back(probabilities[index[j]]); } - LOG_DEBUG(<< "extreme samples probabilities = " << core::CContainerPrinter::print(extremeSampleProbabilities)); + LOG_DEBUG(<< "extreme samples probabilities = " + << core::CContainerPrinter::print(extremeSampleProbabilities)); CLogProbabilityOfMFromNExtremeSamples probabilityCalculator(i); @@ -545,13 +564,14 @@ void CProbabilityAggregatorsTest::testProbabilityOfMFromNExtremeSamples() { } } - double expectedProbability = static_cast(count) / static_cast(nTrials); + double expectedProbability = static_cast(count) / + static_cast(nTrials); double error = std::fabs(p - expectedProbability); double relativeError = error / std::max(p, expectedProbability); - LOG_DEBUG(<< "probability = " << p << ", expectedProbability = " << expectedProbability << ", error = " << error - << ", relative error = " << relativeError); + LOG_DEBUG(<< "probability = " << p << ", expectedProbability = " << expectedProbability + << ", error = " << error << ", relative error = " << relativeError); CPPUNIT_ASSERT(relativeError < 0.33); @@ -574,32 +594,19 @@ void CProbabilityAggregatorsTest::testProbabilityOfMFromNExtremeSamples() { } } - LOG_DEBUG(<< "totalError = " << totalError << ", totalRelativeError = " << (totalError / totalProbability)); + LOG_DEBUG(<< "totalError = " << totalError + << ", totalRelativeError = " << (totalError / totalProbability)); CPPUNIT_ASSERT(totalError < 0.01 * totalProbability); } } { - double probabilities[] = {1.90005e-6, - 2.09343e-5, - 2.36102e-5, - 2.36102e-4, - 3.21197e-4, - 0.104481, - 0.311476, - 0.46037, - 0.958691, - 0.144973, - 0.345924, - 0.111316, - 0.346185, - 0.993074, - 0.0902145, - 0.0902145, - 0.673371, - 0.346075, - 0.346025}; + double probabilities[] = { + 1.90005e-6, 2.09343e-5, 2.36102e-5, 2.36102e-4, 3.21197e-4, + 0.104481, 0.311476, 0.46037, 0.958691, 0.144973, + 0.345924, 0.111316, 0.346185, 0.993074, 0.0902145, + 0.0902145, 0.673371, 0.346075, 0.346025}; std::size_t n = boost::size(probabilities); std::size_t numberSamples[] = {n, 10 * n, 1000 * n}; @@ -621,7 +628,8 @@ void CProbabilityAggregatorsTest::testProbabilityOfMFromNExtremeSamples() { double p2; CPPUNIT_ASSERT(probabilityCalculator.calculate(p2)); - LOG_DEBUG(<< "log(probability) = " << p2 << ", expected log(probability) = " << p1); + LOG_DEBUG(<< "log(probability) = " << p2 + << ", expected log(probability) = " << p1); CPPUNIT_ASSERT_DOUBLES_EQUAL(p1, p2, 1e-4 * std::fabs(std::max(p1, p2))); } } @@ -629,21 +637,32 @@ void CProbabilityAggregatorsTest::testProbabilityOfMFromNExtremeSamples() { { double probabilities[] = { - 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, - 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, - 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, - 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, - 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, - 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, - 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, - 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, - 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, - 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, - 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, - 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, - 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, - 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, - 0.9917012}; + 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, + 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, + 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, + 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, + 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, + 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, + 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, + 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, + 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, + 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, + 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, + 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, + 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, + 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, + 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, + 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, + 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, + 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, + 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, + 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, + 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, + 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, + 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, + 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, + 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012, + 0.9917012, 0.9917012, 0.9917012, 0.9917012, 0.9917012}; CLogProbabilityOfMFromNExtremeSamples probabilityCalculator(5); for (std::size_t i = 0u; i < boost::size(probabilities); ++i) { @@ -690,7 +709,8 @@ void CProbabilityAggregatorsTest::testProbabilityOfMFromNExtremeSamples() { CExpectedLogProbabilityOfMFromNExtremeSamples expectedProbabilityCalculator(5); CLogProbabilityOfMFromNExtremeSamples probabilityCalculator(5); - double pmin[] = {0.004703117, 0.05059556, 1.0 - std::numeric_limits::epsilon(), 1.0, 1.0}; + double pmin[] = {0.004703117, 0.05059556, + 1.0 - std::numeric_limits::epsilon(), 1.0, 1.0}; for (std::size_t i = 0; i < boost::size(pmin); ++i) { probabilityCalculator.add(pmin[i]); expectedProbabilityCalculator.add(pmin[i]); @@ -733,7 +753,8 @@ void CProbabilityAggregatorsTest::testProbabilityOfMFromNExtremeSamples() { } { - double probabilities[] = {0.08528782661735056, 0.3246988524001009, 0.5428693993904167, 0.9999999999999999, 0.9999999999999999}; + double probabilities[] = {0.08528782661735056, 0.3246988524001009, 0.5428693993904167, + 0.9999999999999999, 0.9999999999999999}; CExpectedLogProbabilityOfMFromNExtremeSamples expectedProbabilityCalculator(5); CLogProbabilityOfMFromNExtremeSamples probabilityCalculator(5); @@ -755,7 +776,8 @@ void CProbabilityAggregatorsTest::testProbabilityOfMFromNExtremeSamples() { } { - double probabilities[] = {3.622684004911715e-76, 3.622684004911715e-76, 0.1534837115755979, 0.1608058997234747, 0.5143979767475618}; + double probabilities[] = {3.622684004911715e-76, 3.622684004911715e-76, 0.1534837115755979, + 0.1608058997234747, 0.5143979767475618}; CLogProbabilityOfMFromNExtremeSamples probabilityCalculator(5); for (std::size_t i = 0; i < 21402; ++i) { @@ -775,17 +797,18 @@ void CProbabilityAggregatorsTest::testProbabilityOfMFromNExtremeSamples() { CppUnit::Test* CProbabilityAggregatorsTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CProbabilityAggregatorsTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CProbabilityAggregatorsTest::testJointProbabilityOfLessLikelySamples", - &CProbabilityAggregatorsTest::testJointProbabilityOfLessLikelySamples)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CProbabilityAggregatorsTest::testLogJointProbabilityOfLessLikelySamples", - &CProbabilityAggregatorsTest::testLogJointProbabilityOfLessLikelySamples)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CProbabilityAggregatorsTest::testProbabilityOfExtremeSample", &CProbabilityAggregatorsTest::testProbabilityOfExtremeSample)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CProbabilityAggregatorsTest::testProbabilityOfMFromNExtremeSamples", - &CProbabilityAggregatorsTest::testProbabilityOfMFromNExtremeSamples)); + "CProbabilityAggregatorsTest::testJointProbabilityOfLessLikelySamples", + &CProbabilityAggregatorsTest::testJointProbabilityOfLessLikelySamples)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CProbabilityAggregatorsTest::testLogJointProbabilityOfLessLikelySamples", + &CProbabilityAggregatorsTest::testLogJointProbabilityOfLessLikelySamples)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CProbabilityAggregatorsTest::testProbabilityOfExtremeSample", + &CProbabilityAggregatorsTest::testProbabilityOfExtremeSample)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CProbabilityAggregatorsTest::testProbabilityOfMFromNExtremeSamples", + &CProbabilityAggregatorsTest::testProbabilityOfMFromNExtremeSamples)); return suiteOfTests; } diff --git a/lib/maths/unittest/CProbabilityCalibratorTest.cc b/lib/maths/unittest/CProbabilityCalibratorTest.cc index 331310b3bb..6412dba110 100644 --- a/lib/maths/unittest/CProbabilityCalibratorTest.cc +++ b/lib/maths/unittest/CProbabilityCalibratorTest.cc @@ -29,15 +29,17 @@ void CProbabilityCalibratorTest::testCalibration() { LOG_DEBUG(<< "+-----------------------------------------------+"); using TDoubleVec = std::vector; - using CLogNormalMeanPrecConjugate = CPriorTestInterfaceMixin; + using CLogNormalMeanPrecConjugate = + CPriorTestInterfaceMixin; using CNormalMeanPrecConjugate = CPriorTestInterfaceMixin; // Test some things which we know will give poorly calibrated // probabilities, i.e. fitting a normal a log-normal and multi- // modal distributions. - maths::CProbabilityCalibrator::EStyle styles[] = {maths::CProbabilityCalibrator::E_PartialCalibration, - maths::CProbabilityCalibrator::E_FullCalibration}; + maths::CProbabilityCalibrator::EStyle styles[] = { + maths::CProbabilityCalibrator::E_PartialCalibration, + maths::CProbabilityCalibrator::E_FullCalibration}; test::CRandomNumbers rng; @@ -52,8 +54,10 @@ void CProbabilityCalibratorTest::testCalibration() { for (std::size_t i = 0u; i < boost::size(styles); ++i) { maths::CProbabilityCalibrator calibrator(styles[i], 0.99); - CNormalMeanPrecConjugate normal = CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); - CLogNormalMeanPrecConjugate lognormal = CLogNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); + CNormalMeanPrecConjugate normal = + CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); + CLogNormalMeanPrecConjugate lognormal = + CLogNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); double rawError = 0.0; double calibratedError = 0.0; @@ -69,7 +73,8 @@ void CProbabilityCalibratorTest::testCalibration() { double upperBound; double rawProbability = 1.0; - if (normal.probabilityOfLessLikelySamples(maths_t::E_TwoSided, sample, lowerBound, upperBound)) { + if (normal.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, sample, lowerBound, upperBound)) { rawProbability = (lowerBound + upperBound) / 2.0; } @@ -77,12 +82,14 @@ void CProbabilityCalibratorTest::testCalibration() { double calibratedProbability = calibrator.calibrate(rawProbability); double trueProbability = 1.0; - if (lognormal.probabilityOfLessLikelySamples(maths_t::E_TwoSided, sample, lowerBound, upperBound)) { + if (lognormal.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, sample, lowerBound, upperBound)) { trueProbability = (lowerBound + upperBound) / 2.0; } double raw = std::fabs(std::log(rawProbability) - std::log(trueProbability)); - double calibrated = std::fabs(std::log(calibratedProbability) - std::log(trueProbability)); + double calibrated = std::fabs(std::log(calibratedProbability) - + std::log(trueProbability)); rawError += raw; calibratedError += calibrated; @@ -90,10 +97,13 @@ void CProbabilityCalibratorTest::testCalibration() { maxCalibratedError = std::max(maxCalibratedError, calibrated); } - LOG_DEBUG(<< "totalRawError = " << rawError << ", maxRawError = " << maxRawError); - LOG_DEBUG(<< "totalCalibratedError = " << calibratedError << ", maxCalibratedError = " << maxCalibratedError); + LOG_DEBUG(<< "totalRawError = " << rawError + << ", maxRawError = " << maxRawError); + LOG_DEBUG(<< "totalCalibratedError = " << calibratedError + << ", maxCalibratedError = " << maxCalibratedError); CPPUNIT_ASSERT((rawError - calibratedError) / rawError > improvements[i]); - CPPUNIT_ASSERT((maxRawError - maxCalibratedError) / maxRawError > maxImprovements[i]); + CPPUNIT_ASSERT((maxRawError - maxCalibratedError) / maxRawError > + maxImprovements[i]); } } @@ -115,9 +125,12 @@ void CProbabilityCalibratorTest::testCalibration() { for (std::size_t i = 0u; i < boost::size(styles); ++i) { maths::CProbabilityCalibrator calibrator(styles[i], 0.99); - CNormalMeanPrecConjugate normal = CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); - CNormalMeanPrecConjugate normal1 = CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); - CNormalMeanPrecConjugate normal2 = CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); + CNormalMeanPrecConjugate normal = + CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); + CNormalMeanPrecConjugate normal1 = + CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); + CNormalMeanPrecConjugate normal2 = + CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); double rawError = 0.0; double calibratedError = 0.0; @@ -134,7 +147,8 @@ void CProbabilityCalibratorTest::testCalibration() { double upperBound; double rawProbability = 1.0; - if (normal.probabilityOfLessLikelySamples(maths_t::E_TwoSided, sample, lowerBound, upperBound)) { + if (normal.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, sample, lowerBound, upperBound)) { rawProbability = (lowerBound + upperBound) / 2.0; } @@ -142,12 +156,14 @@ void CProbabilityCalibratorTest::testCalibration() { double calibratedProbability = calibrator.calibrate(rawProbability); double trueProbability = 1.0; - if (mode.probabilityOfLessLikelySamples(maths_t::E_TwoSided, sample, lowerBound, upperBound)) { + if (mode.probabilityOfLessLikelySamples(maths_t::E_TwoSided, sample, + lowerBound, upperBound)) { trueProbability = (lowerBound + upperBound) / 2.0; } double raw = std::fabs(std::log(rawProbability) - std::log(trueProbability)); - double calibrated = std::fabs(std::log(calibratedProbability) - std::log(trueProbability)); + double calibrated = std::fabs(std::log(calibratedProbability) - + std::log(trueProbability)); rawError += raw; calibratedError += calibrated; @@ -155,10 +171,13 @@ void CProbabilityCalibratorTest::testCalibration() { maxCalibratedError = std::max(maxCalibratedError, calibrated); } - LOG_DEBUG(<< "totalRawError = " << rawError << ", maxRawError = " << maxRawError); - LOG_DEBUG(<< "totalCalibratedError = " << calibratedError << ", maxCalibratedError = " << maxCalibratedError); + LOG_DEBUG(<< "totalRawError = " << rawError + << ", maxRawError = " << maxRawError); + LOG_DEBUG(<< "totalCalibratedError = " << calibratedError + << ", maxCalibratedError = " << maxCalibratedError); CPPUNIT_ASSERT((rawError - calibratedError) / rawError >= improvements[i]); - CPPUNIT_ASSERT((maxRawError - maxCalibratedError) / maxRawError >= maxImprovements[i]); + CPPUNIT_ASSERT((maxRawError - maxCalibratedError) / maxRawError >= + maxImprovements[i]); } } } @@ -166,8 +185,9 @@ void CProbabilityCalibratorTest::testCalibration() { CppUnit::Test* CProbabilityCalibratorTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CProbabilityCalibratorTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CProbabilityCalibratorTest::testCalibration", - &CProbabilityCalibratorTest::testCalibration)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CProbabilityCalibratorTest::testCalibration", + &CProbabilityCalibratorTest::testCalibration)); return suiteOfTests; } diff --git a/lib/maths/unittest/CQDigestTest.cc b/lib/maths/unittest/CQDigestTest.cc index fe247cdaef..a5fda450a5 100644 --- a/lib/maths/unittest/CQDigestTest.cc +++ b/lib/maths/unittest/CQDigestTest.cc @@ -50,7 +50,8 @@ void CQDigestTest::testAdd() { CPPUNIT_ASSERT(qDigest.checkInvariants()); - CPPUNIT_ASSERT_EQUAL(std::string("50 | 10 | { \"[5,5],50,50\" \"[0,7],0,50\" }"), qDigest.print()); + CPPUNIT_ASSERT_EQUAL(std::string("50 | 10 | { \"[5,5],50,50\" \"[0,7],0,50\" }"), + qDigest.print()); } { @@ -80,7 +81,8 @@ void CQDigestTest::testAdd() { { using TUInt64Set = std::multiset; - const double expectedMaxErrors[] = {0.007, 0.01, 0.12, 0.011, 0.016, 0.018, 0.023, 0.025, 0.02}; + const double expectedMaxErrors[] = {0.007, 0.01, 0.12, 0.011, 0.016, + 0.018, 0.023, 0.025, 0.02}; CRandomNumbers generator; @@ -109,13 +111,15 @@ void CQDigestTest::testAdd() { uint32_t quantile; qDigest.quantile(q, quantile); - std::size_t rank = std::distance(orderedSamples.begin(), orderedSamples.lower_bound(quantile)); + std::size_t rank = std::distance( + orderedSamples.begin(), orderedSamples.lower_bound(quantile)); double error = (static_cast(rank) - q * n) / n; if ((i + 1) % 1000 == 0) { - LOG_DEBUG(<< "q = " << q << ", quantile = " << quantile << ", rank = " << rank << ", n = " << n << ", error " - << error); + LOG_DEBUG(<< "q = " << q << ", quantile = " << quantile + << ", rank = " << rank << ", n = " << n + << ", error " << error); } CPPUNIT_ASSERT(std::fabs(error) < 0.06); @@ -173,7 +177,8 @@ void CQDigestTest::testCdf() { double upperBound; qDigest.cdf(summary[i].first, 0.0, lowerBound, upperBound); - LOG_DEBUG(<< "x = " << summary[i].first << ", F(x) >= " << lowerBound << ", F(x) <= " << upperBound); + LOG_DEBUG(<< "x = " << summary[i].first << ", F(x) >= " << lowerBound + << ", F(x) <= " << upperBound); double fx = static_cast(summary[i].second) / 100.0; @@ -199,7 +204,8 @@ void CQDigestTest::testCdf() { // Get the true c.d.f. value. double ft = std::min(static_cast(summary[i].first) / 500.0, 1.0); - LOG_DEBUG(<< "x = " << summary[i].first << ", F(x) = " << ft << ", F(x) >= " << lowerBound << ", F(x) <= " << upperBound); + LOG_DEBUG(<< "x = " << summary[i].first << ", F(x) = " << ft + << ", F(x) >= " << lowerBound << ", F(x) <= " << upperBound); CPPUNIT_ASSERT(fx >= lowerBound && fx <= upperBound); CPPUNIT_ASSERT(ft >= lowerBound - 0.01 && ft <= upperBound + 0.01); @@ -236,7 +242,8 @@ void CQDigestTest::testSummary() { uint32_t xq; qDigest.quantile(q, xq); - LOG_DEBUG(<< "q = " << q << ", x(q) = " << summary[i].first << ", expected x(q) = " << xq); + LOG_DEBUG(<< "q = " << q << ", x(q) = " << summary[i].first + << ", expected x(q) = " << xq); CPPUNIT_ASSERT_EQUAL(xq, summary[i].first); } @@ -251,7 +258,8 @@ void CQDigestTest::testSummary() { TUInt32UInt64PrVec summary; qDigest.summary(summary); - CPPUNIT_ASSERT_EQUAL(std::string("[(3, 1)]"), core::CContainerPrinter::print(summary)); + CPPUNIT_ASSERT_EQUAL(std::string("[(3, 1)]"), + core::CContainerPrinter::print(summary)); } // Edge case: non-zero count at the root. @@ -266,7 +274,8 @@ void CQDigestTest::testSummary() { TUInt32UInt64PrVec summary; qDigest.summary(summary); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 3), (7, 4)]"), core::CContainerPrinter::print(summary)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 3), (7, 4)]"), + core::CContainerPrinter::print(summary)); } } @@ -362,8 +371,10 @@ void CQDigestTest::testPropagateForwardByTime() { TMeanAccumlator diff; for (std::size_t i = 0; i < cdfLower.size(); ++i) { - CPPUNIT_ASSERT_DOUBLES_EQUAL(cdfLower[i], cdfLowerAged[i], std::min(5e-5, 2e-3 * cdfLower[i])); - CPPUNIT_ASSERT_DOUBLES_EQUAL(cdfUpper[i], cdfUpperAged[i], std::min(5e-5, 2e-3 * cdfUpper[i])); + CPPUNIT_ASSERT_DOUBLES_EQUAL(cdfLower[i], cdfLowerAged[i], + std::min(5e-5, 2e-3 * cdfLower[i])); + CPPUNIT_ASSERT_DOUBLES_EQUAL(cdfUpper[i], cdfUpperAged[i], + std::min(5e-5, 2e-3 * cdfUpper[i])); diff.add(std::fabs(cdfLower[i] - cdfLowerAged[i])); diff.add(std::fabs(cdfUpper[i] - cdfUpperAged[i])); } @@ -497,7 +508,10 @@ void CQDigestTest::testScale() { double maxType2 = 0.0; double totalType2 = 0.0; - uint32_t end = static_cast(scales[i] * *std::max_element(samples.begin(), samples.end())) + 1; + uint32_t end = + static_cast( + scales[i] * *std::max_element(samples.begin(), samples.end())) + + 1; for (uint32_t j = 0; j < end; ++j) { double expectedLowerBound; double expectedUpperBound; @@ -506,8 +520,10 @@ void CQDigestTest::testScale() { double lowerBound; double upperBound; qDigest.cdf(j, 0.0, lowerBound, upperBound); - double type1 = std::fabs(expectedLowerBound - lowerBound) + std::fabs(expectedUpperBound - upperBound); - double type2 = std::max(lowerBound - expectedLowerBound, 0.0) + std::max(expectedUpperBound - upperBound, 0.0); + double type1 = std::fabs(expectedLowerBound - lowerBound) + + std::fabs(expectedUpperBound - upperBound); + double type2 = std::max(lowerBound - expectedLowerBound, 0.0) + + std::max(expectedUpperBound - upperBound, 0.0); maxType1 = std::max(maxType1, type1); totalType1 += type1; maxType2 = std::max(maxType2, type2); @@ -559,7 +575,8 @@ void CQDigestTest::testPersist() { core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind(&CQDigest::acceptRestoreTraverser, &restoredQDigest, _1))); + CPPUNIT_ASSERT(traverser.traverseSubLevel( + boost::bind(&CQDigest::acceptRestoreTraverser, &restoredQDigest, _1))); } CPPUNIT_ASSERT(restoredQDigest.checkInvariants()); @@ -578,14 +595,20 @@ void CQDigestTest::testPersist() { CppUnit::Test* CQDigestTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CQDigestTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CQDigestTest::testAdd", &CQDigestTest::testAdd)); - suiteOfTests->addTest(new CppUnit::TestCaller("CQDigestTest::testMerge", &CQDigestTest::testMerge)); - suiteOfTests->addTest(new CppUnit::TestCaller("CQDigestTest::testCdf", &CQDigestTest::testCdf)); - suiteOfTests->addTest(new CppUnit::TestCaller("CQDigestTest::testSummary", &CQDigestTest::testSummary)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CQDigestTest::testPropagateForwardByTime", &CQDigestTest::testPropagateForwardByTime)); - suiteOfTests->addTest(new CppUnit::TestCaller("CQDigestTest::testScale", &CQDigestTest::testScale)); - suiteOfTests->addTest(new CppUnit::TestCaller("CQDigestTest::testPersist", &CQDigestTest::testPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CQDigestTest::testAdd", &CQDigestTest::testAdd)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CQDigestTest::testMerge", &CQDigestTest::testMerge)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CQDigestTest::testCdf", &CQDigestTest::testCdf)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CQDigestTest::testSummary", &CQDigestTest::testSummary)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CQDigestTest::testPropagateForwardByTime", &CQDigestTest::testPropagateForwardByTime)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CQDigestTest::testScale", &CQDigestTest::testScale)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CQDigestTest::testPersist", &CQDigestTest::testPersist)); return suiteOfTests; } diff --git a/lib/maths/unittest/CQuantileSketchTest.cc b/lib/maths/unittest/CQuantileSketchTest.cc index d3117e16b0..69b96e2d00 100644 --- a/lib/maths/unittest/CQuantileSketchTest.cc +++ b/lib/maths/unittest/CQuantileSketchTest.cc @@ -59,12 +59,15 @@ void testSketch(maths::CQuantileSketch::EInterpolation interpolation, sketch.quantile(100.0, max); double scale = max - min; - LOG_DEBUG(<< "bias = " << maths::CBasicStatistics::mean(bias) << ", error " << maths::CBasicStatistics::mean(error)); + LOG_DEBUG(<< "bias = " << maths::CBasicStatistics::mean(bias) << ", error " + << maths::CBasicStatistics::mean(error)); CPPUNIT_ASSERT(std::fabs(maths::CBasicStatistics::mean(bias)) < maxBias); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(error) < maxError); - meanBias += maths::CBasicStatistics::accumulator(maths::CBasicStatistics::count(bias), maths::CBasicStatistics::mean(bias) / scale); - meanError += maths::CBasicStatistics::accumulator(maths::CBasicStatistics::count(error), maths::CBasicStatistics::mean(error) / scale); + meanBias += maths::CBasicStatistics::accumulator( + maths::CBasicStatistics::count(bias), maths::CBasicStatistics::mean(bias) / scale); + meanError += maths::CBasicStatistics::accumulator( + maths::CBasicStatistics::count(error), maths::CBasicStatistics::mean(error) / scale); } } @@ -90,7 +93,8 @@ void CQuantileSketchTest::testAdd() { LOG_DEBUG(<< "sketch = " << core::CContainerPrinter::print(sketch.knots())); CPPUNIT_ASSERT_EQUAL(6.0, sketch.count()); - CPPUNIT_ASSERT_EQUAL(std::string("[(1.2, 1), (0.9, 3), (1.8, 1), (2.1, 1)]"), core::CContainerPrinter::print(sketch.knots())); + CPPUNIT_ASSERT_EQUAL(std::string("[(1.2, 1), (0.9, 3), (1.8, 1), (2.1, 1)]"), + core::CContainerPrinter::print(sketch.knots())); } void CQuantileSketchTest::testReduce() { @@ -104,14 +108,16 @@ void CQuantileSketchTest::testReduce() { // Test duplicate points. - double points[][2] = {{5.0, 1.0}, {0.4, 2.0}, {0.4, 1.0}, {1.0, 1.0}, {1.2, 2.0}, {1.2, 1.5}, {5.0, 1.0}}; + double points[][2] = {{5.0, 1.0}, {0.4, 2.0}, {0.4, 1.0}, {1.0, 1.0}, + {1.2, 2.0}, {1.2, 1.5}, {5.0, 1.0}}; for (std::size_t i = 0u; i < boost::size(points); ++i) { sketch.add(points[i][0], points[i][1]); CPPUNIT_ASSERT(sketch.checkInvariants()); } LOG_DEBUG(<< "sketch = " << core::CContainerPrinter::print(sketch.knots())); - CPPUNIT_ASSERT_EQUAL(std::string("[(0.4, 3), (1, 1), (1.2, 3.5), (5, 2)]"), core::CContainerPrinter::print(sketch.knots())); + CPPUNIT_ASSERT_EQUAL(std::string("[(0.4, 3), (1, 1), (1.2, 3.5), (5, 2)]"), + core::CContainerPrinter::print(sketch.knots())); // Regular compress (merging two point). @@ -143,16 +149,20 @@ void CQuantileSketchTest::testReduce() { { // Test the quantiles are reasonable at a compression ratio of 2:1. - double points[] = {1.0, 2.0, 40.0, 13.0, 5.0, 6.0, 4.0, 7.0, 15.0, 17.0, 19.0, 44.0, 42.0, 3.0, 46.0, 48.0, 50.0, 21.0, 23.0, 52.0}; - double cdf[] = {5.0, 10.0, 15.0, 20.0, 25.0, 30.0, 35.0, 40.0, 45.0, 50.0, - 55.0, 60.0, 65.0, 70.0, 75.0, 80.0, 85.0, 90.0, 95.0, 100.0}; + double points[] = {1.0, 2.0, 40.0, 13.0, 5.0, 6.0, 4.0, + 7.0, 15.0, 17.0, 19.0, 44.0, 42.0, 3.0, + 46.0, 48.0, 50.0, 21.0, 23.0, 52.0}; + double cdf[] = {5.0, 10.0, 15.0, 20.0, 25.0, 30.0, 35.0, + 40.0, 45.0, 50.0, 55.0, 60.0, 65.0, 70.0, + 75.0, 80.0, 85.0, 90.0, 95.0, 100.0}; maths::CQuantileSketch sketch(maths::CQuantileSketch::E_Linear, 10); for (std::size_t i = 0u; i < boost::size(points); ++i) { sketch.add(points[i]); CPPUNIT_ASSERT(sketch.checkInvariants()); if ((i + 1) % 5 == 0) { - LOG_DEBUG(<< "sketch = " << core::CContainerPrinter::print(sketch.knots())); + LOG_DEBUG(<< "sketch = " + << core::CContainerPrinter::print(sketch.knots())); } } @@ -175,14 +185,16 @@ void CQuantileSketchTest::testReduce() { // Test duplicate points. - double points[][2] = {{5.0, 1.0}, {0.4, 2.0}, {0.4, 1.0}, {1.0, 1.0}, {1.2, 2.0}, {1.2, 1.5}, {5.0, 1.0}}; + double points[][2] = {{5.0, 1.0}, {0.4, 2.0}, {0.4, 1.0}, {1.0, 1.0}, + {1.2, 2.0}, {1.2, 1.5}, {5.0, 1.0}}; for (std::size_t i = 0u; i < boost::size(points); ++i) { sketch.add(points[i][0], points[i][1]); CPPUNIT_ASSERT(sketch.checkInvariants()); } LOG_DEBUG(<< "sketch = " << core::CContainerPrinter::print(sketch.knots())); - CPPUNIT_ASSERT_EQUAL(std::string("[(0.4, 3), (1, 1), (1.2, 3.5), (5, 2)]"), core::CContainerPrinter::print(sketch.knots())); + CPPUNIT_ASSERT_EQUAL(std::string("[(0.4, 3), (1, 1), (1.2, 3.5), (5, 2)]"), + core::CContainerPrinter::print(sketch.knots())); // Regular compress (merging two point). @@ -214,16 +226,20 @@ void CQuantileSketchTest::testReduce() { { // Test the quantiles are reasonable at a compression ratio of 2:1. - double points[] = {1.0, 2.0, 40.0, 13.0, 5.0, 6.0, 4.0, 7.0, 15.0, 17.0, 19.0, 44.0, 42.0, 3.0, 46.0, 48.0, 50.0, 21.0, 23.0, 52.0}; - double cdf[] = {5.0, 10.0, 15.0, 20.0, 25.0, 30.0, 35.0, 40.0, 45.0, 50.0, - 55.0, 60.0, 65.0, 70.0, 75.0, 80.0, 85.0, 90.0, 95.0, 100.0}; + double points[] = {1.0, 2.0, 40.0, 13.0, 5.0, 6.0, 4.0, + 7.0, 15.0, 17.0, 19.0, 44.0, 42.0, 3.0, + 46.0, 48.0, 50.0, 21.0, 23.0, 52.0}; + double cdf[] = {5.0, 10.0, 15.0, 20.0, 25.0, 30.0, 35.0, + 40.0, 45.0, 50.0, 55.0, 60.0, 65.0, 70.0, + 75.0, 80.0, 85.0, 90.0, 95.0, 100.0}; maths::CQuantileSketch sketch(maths::CQuantileSketch::E_PiecewiseConstant, 10); for (std::size_t i = 0u; i < boost::size(points); ++i) { sketch.add(points[i]); CPPUNIT_ASSERT(sketch.checkInvariants()); if ((i + 1) % 5 == 0) { - LOG_DEBUG(<< "sketch = " << core::CContainerPrinter::print(sketch.knots())); + LOG_DEBUG(<< "sketch = " + << core::CContainerPrinter::print(sketch.knots())); } } @@ -263,7 +279,8 @@ void CQuantileSketchTest::testMerge() { sketch2.add(5.1); sketch1 += sketch2; - LOG_DEBUG(<< "merged sketch = " << core::CContainerPrinter::print(sketch1.knots())); + LOG_DEBUG(<< "merged sketch = " + << core::CContainerPrinter::print(sketch1.knots())); CPPUNIT_ASSERT_EQUAL(std::string("[(1, 3.6), (1.1, 1), (2, 1), (3, 1), (3.1, 2), (5.1, 2)]"), core::CContainerPrinter::print(sketch1.knots())); } @@ -271,9 +288,12 @@ void CQuantileSketchTest::testMerge() { { // Test the quantiles are reasonable at a compression ratio of 2:1. - double points[] = {1.0, 2.0, 40.0, 13.0, 5.0, 6.0, 4.0, 7.0, 15.0, 17.0, 19.0, 44.0, 42.0, 3.0, 46.0, 48.0, 50.0, 21.0, 23.0, 52.0}; - double cdf[] = {5.0, 10.0, 15.0, 20.0, 25.0, 30.0, 35.0, 40.0, 45.0, 50.0, - 55.0, 60.0, 65.0, 70.0, 75.0, 80.0, 85.0, 90.0, 95.0, 100.0}; + double points[] = {1.0, 2.0, 40.0, 13.0, 5.0, 6.0, 4.0, + 7.0, 15.0, 17.0, 19.0, 44.0, 42.0, 3.0, + 46.0, 48.0, 50.0, 21.0, 23.0, 52.0}; + double cdf[] = {5.0, 10.0, 15.0, 20.0, 25.0, 30.0, 35.0, + 40.0, 45.0, 50.0, 55.0, 60.0, 65.0, 70.0, + 75.0, 80.0, 85.0, 90.0, 95.0, 100.0}; maths::CQuantileSketch sketch1(maths::CQuantileSketch::E_Linear, 10); maths::CQuantileSketch sketch2(maths::CQuantileSketch::E_Linear, 10); @@ -285,7 +305,8 @@ void CQuantileSketchTest::testMerge() { LOG_DEBUG(<< "sketch 2 = " << core::CContainerPrinter::print(sketch2.knots())); maths::CQuantileSketch sketch3 = sketch1 + sketch2; - LOG_DEBUG(<< "merged sketch = " << core::CContainerPrinter::print(sketch3.knots())); + LOG_DEBUG(<< "merged sketch = " + << core::CContainerPrinter::print(sketch3.knots())); std::sort(boost::begin(points), boost::end(points)); TMeanAccumulator error; @@ -401,10 +422,11 @@ void CQuantileSketchTest::testQuantileAccuracy() { for (std::size_t t = 0u; t < 5; ++t) { TDoubleVec samples; rng.generateUniformSamples(0.0, 20.0 * static_cast(t + 1), 1000, samples); - testSketch(maths::CQuantileSketch::E_Linear, 20, samples, 0.15, 0.3, meanBias, meanError); + testSketch(maths::CQuantileSketch::E_Linear, 20, samples, 0.15, 0.3, + meanBias, meanError); } - LOG_DEBUG(<< "mean bias = " << std::fabs(maths::CBasicStatistics::mean(meanBias)) << ", mean error " - << maths::CBasicStatistics::mean(meanError)); + LOG_DEBUG(<< "mean bias = " << std::fabs(maths::CBasicStatistics::mean(meanBias)) + << ", mean error " << maths::CBasicStatistics::mean(meanError)); CPPUNIT_ASSERT(std::fabs(maths::CBasicStatistics::mean(meanBias)) < 0.0007); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError) < 0.003); } @@ -415,11 +437,13 @@ void CQuantileSketchTest::testQuantileAccuracy() { TMeanAccumulator meanError; for (std::size_t t = 0u; t < 5; ++t) { TDoubleVec samples; - rng.generateNormalSamples(20.0 * static_cast(t), 20.0 * static_cast(t + 1), 1000, samples); - testSketch(maths::CQuantileSketch::E_Linear, 20, samples, 0.16, 0.2, meanBias, meanError); + rng.generateNormalSamples(20.0 * static_cast(t), + 20.0 * static_cast(t + 1), 1000, samples); + testSketch(maths::CQuantileSketch::E_Linear, 20, samples, 0.16, 0.2, + meanBias, meanError); } - LOG_DEBUG(<< "mean bias = " << maths::CBasicStatistics::mean(meanBias) << ", mean error " - << maths::CBasicStatistics::mean(meanError)); + LOG_DEBUG(<< "mean bias = " << maths::CBasicStatistics::mean(meanBias) + << ", mean error " << maths::CBasicStatistics::mean(meanError)); CPPUNIT_ASSERT(std::fabs(maths::CBasicStatistics::mean(meanBias)) < 0.002); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError) < 0.003); } @@ -430,11 +454,13 @@ void CQuantileSketchTest::testQuantileAccuracy() { TMeanAccumulator meanError; for (std::size_t t = 0u; t < 5; ++t) { TDoubleVec samples; - rng.generateLogNormalSamples(0.1 * static_cast(t), 0.4 * static_cast(t + 1), 1000, samples); - testSketch(maths::CQuantileSketch::E_Linear, 20, samples, 0.11, 0.12, meanBias, meanError); + rng.generateLogNormalSamples(0.1 * static_cast(t), + 0.4 * static_cast(t + 1), 1000, samples); + testSketch(maths::CQuantileSketch::E_Linear, 20, samples, 0.11, + 0.12, meanBias, meanError); } - LOG_DEBUG(<< "mean bias = " << maths::CBasicStatistics::mean(meanBias) << ", mean error " - << maths::CBasicStatistics::mean(meanError)); + LOG_DEBUG(<< "mean bias = " << maths::CBasicStatistics::mean(meanBias) + << ", mean error " << maths::CBasicStatistics::mean(meanError)); CPPUNIT_ASSERT(std::fabs(maths::CBasicStatistics::mean(meanBias)) < 0.0006); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError) < 0.0009); } @@ -446,24 +472,34 @@ void CQuantileSketchTest::testQuantileAccuracy() { TMeanAccumulator meanErrorPiecewise; for (std::size_t t = 0u; t < 5; ++t) { TDoubleVec samples_[4] = {}; - rng.generateNormalSamples(10.0 * static_cast(t), 20.0 * static_cast(t + 1), 400, samples_[0]); - rng.generateNormalSamples(20.0 * static_cast(t), 20.0 * static_cast(t + 1), 600, samples_[1]); - rng.generateNormalSamples(100.0 * static_cast(t), 40.0 * static_cast(t + 1), 400, samples_[2]); - rng.generateUniformSamples(500.0 * static_cast(t), 550.0 * static_cast(t + 1), 600, samples_[3]); + rng.generateNormalSamples(10.0 * static_cast(t), + 20.0 * static_cast(t + 1), 400, + samples_[0]); + rng.generateNormalSamples(20.0 * static_cast(t), + 20.0 * static_cast(t + 1), 600, + samples_[1]); + rng.generateNormalSamples(100.0 * static_cast(t), + 40.0 * static_cast(t + 1), 400, + samples_[2]); + rng.generateUniformSamples(500.0 * static_cast(t), + 550.0 * static_cast(t + 1), 600, + samples_[3]); TDoubleVec samples; for (std::size_t i = 0u; i < 4; ++i) { samples.insert(samples.end(), samples_[i].begin(), samples_[i].end()); } rng.random_shuffle(samples.begin(), samples.end()); - testSketch(maths::CQuantileSketch::E_Linear, 40, samples, 49, 50, meanBiasLinear, meanErrorLinear); - testSketch(maths::CQuantileSketch::E_PiecewiseConstant, 40, samples, 55, 56, meanBiasPiecewise, meanErrorPiecewise); + testSketch(maths::CQuantileSketch::E_Linear, 40, samples, 49, 50, + meanBiasLinear, meanErrorLinear); + testSketch(maths::CQuantileSketch::E_PiecewiseConstant, 40, samples, + 55, 56, meanBiasPiecewise, meanErrorPiecewise); } - LOG_DEBUG(<< "linear mean bias = " << maths::CBasicStatistics::mean(meanBiasLinear) << ", mean error " - << maths::CBasicStatistics::mean(meanErrorLinear)); + LOG_DEBUG(<< "linear mean bias = " << maths::CBasicStatistics::mean(meanBiasLinear) + << ", mean error " << maths::CBasicStatistics::mean(meanErrorLinear)); CPPUNIT_ASSERT(std::fabs(maths::CBasicStatistics::mean(meanBiasLinear)) < 0.012); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanErrorLinear) < 0.013); - LOG_DEBUG(<< "piecewise mean bias = " << maths::CBasicStatistics::mean(meanBiasPiecewise) << ", mean error " - << maths::CBasicStatistics::mean(meanErrorPiecewise)); + LOG_DEBUG(<< "piecewise mean bias = " << maths::CBasicStatistics::mean(meanBiasPiecewise) + << ", mean error " << maths::CBasicStatistics::mean(meanErrorPiecewise)); CPPUNIT_ASSERT(std::fabs(maths::CBasicStatistics::mean(meanBiasPiecewise)) < 0.015); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanErrorPiecewise) < 0.015); } @@ -490,7 +526,9 @@ void CQuantileSketchTest::testCdf() { sketch.quantile(10.0 * static_cast(i) + 5.0, x); double f; sketch.cdf(x, f); - LOG_DEBUG(<< "x = " << x << ", f(exact) = " << static_cast(i) / 10.0 + 0.05 << ", f(actual) = " << f); + LOG_DEBUG(<< "x = " << x + << ", f(exact) = " << static_cast(i) / 10.0 + 0.05 + << ", f(actual) = " << f); CPPUNIT_ASSERT_DOUBLES_EQUAL(static_cast(i) / 10.0 + 0.05, f, 1e-6); } } @@ -502,7 +540,9 @@ void CQuantileSketchTest::testCdf() { sketch.quantile(10.0 * static_cast(i) + 5.0, x); double f; sketch.cdf(x, f); - LOG_DEBUG(<< "x = " << x << ", f(exact) = " << static_cast(i) / 10.0 + 0.05 << ", f(actual) = " << f); + LOG_DEBUG(<< "x = " << x + << ", f(exact) = " << static_cast(i) / 10.0 + 0.05 + << ", f(actual) = " << f); CPPUNIT_ASSERT_DOUBLES_EQUAL(static_cast(i) / 10.0 + 0.05, f, 1e-6); } @@ -532,7 +572,9 @@ void CQuantileSketchTest::testCdf() { double f; sketch.cdf(x, f); if (i % 10 == 0) { - LOG_DEBUG(<< " x = " << x << ", f(exact) = " << static_cast(i) / 100.0 << ", f(actual) = " << f); + LOG_DEBUG(<< " x = " << x + << ", f(exact) = " << static_cast(i) / 100.0 + << ", f(actual) = " << f); } CPPUNIT_ASSERT_DOUBLES_EQUAL(static_cast(i) / 100.0, f, 1e-6); } @@ -569,7 +611,8 @@ void CQuantileSketchTest::testPersist() { core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind(&maths::CQuantileSketch::acceptRestoreTraverser, &restoredSketch, _1))); + CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind( + &maths::CQuantileSketch::acceptRestoreTraverser, &restoredSketch, _1))); } // Checksums should agree. @@ -588,19 +631,23 @@ void CQuantileSketchTest::testPersist() { CppUnit::Test* CQuantileSketchTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CQuantileSketchTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CQuantileSketchTest::testAdd", &CQuantileSketchTest::testAdd)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CQuantileSketchTest::testReduce", &CQuantileSketchTest::testReduce)); - suiteOfTests->addTest(new CppUnit::TestCaller("CQuantileSketchTest::testMerge", &CQuantileSketchTest::testMerge)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CQuantileSketchTest::testMedian", &CQuantileSketchTest::testMedian)); - suiteOfTests->addTest(new CppUnit::TestCaller("CQuantileSketchTest::testPropagateForwardByTime", - &CQuantileSketchTest::testPropagateForwardByTime)); - suiteOfTests->addTest(new CppUnit::TestCaller("CQuantileSketchTest::testQuantileAccuracy", - &CQuantileSketchTest::testQuantileAccuracy)); - suiteOfTests->addTest(new CppUnit::TestCaller("CQuantileSketchTest::testCdf", &CQuantileSketchTest::testCdf)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CQuantileSketchTest::testPersist", &CQuantileSketchTest::testPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CQuantileSketchTest::testAdd", &CQuantileSketchTest::testAdd)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CQuantileSketchTest::testReduce", &CQuantileSketchTest::testReduce)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CQuantileSketchTest::testMerge", &CQuantileSketchTest::testMerge)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CQuantileSketchTest::testMedian", &CQuantileSketchTest::testMedian)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CQuantileSketchTest::testPropagateForwardByTime", + &CQuantileSketchTest::testPropagateForwardByTime)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CQuantileSketchTest::testQuantileAccuracy", &CQuantileSketchTest::testQuantileAccuracy)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CQuantileSketchTest::testCdf", &CQuantileSketchTest::testCdf)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CQuantileSketchTest::testPersist", &CQuantileSketchTest::testPersist)); return suiteOfTests; } diff --git a/lib/maths/unittest/CRadialBasisFunctionTest.cc b/lib/maths/unittest/CRadialBasisFunctionTest.cc index 3c7de55241..fc302848c1 100644 --- a/lib/maths/unittest/CRadialBasisFunctionTest.cc +++ b/lib/maths/unittest/CRadialBasisFunctionTest.cc @@ -77,7 +77,9 @@ void CRadialBasisFunctionTest::testDerivative() { for (std::size_t k = 0u; k < 10; ++k) { double x = a + static_cast(k) / 10.0 * (b - a); double d = gaussian.derivative(x, centres[i], scales[j]); - double e = (gaussian.value(x + eps, centres[i], scales[j]) - gaussian.value(x - eps, centres[i], scales[j])) / 2.0 / eps; + double e = (gaussian.value(x + eps, centres[i], scales[j]) - + gaussian.value(x - eps, centres[i], scales[j])) / + 2.0 / eps; // Centred difference nuemrical derivative should // be accurate to o(eps^2). @@ -96,9 +98,9 @@ void CRadialBasisFunctionTest::testDerivative() { for (std::size_t k = 0u; k < 10; ++k) { double x = a + static_cast(k) / 10.0 * (b - a); double d = inverseQuadratic.derivative(x, centres[i], scales[j]); - double e = - (inverseQuadratic.value(x + eps, centres[i], scales[j]) - inverseQuadratic.value(x - eps, centres[i], scales[j])) / - 2.0 / eps; + double e = (inverseQuadratic.value(x + eps, centres[i], scales[j]) - + inverseQuadratic.value(x - eps, centres[i], scales[j])) / + 2.0 / eps; // Centred difference nuemrical derivative should // be accurate to o(eps^2). @@ -133,7 +135,8 @@ void CRadialBasisFunctionTest::testMean() { double aa = a + static_cast(k) / 20.0 * (b - a); double bb = a + static_cast(k + 1) / 20.0 * (b - a); double interval; - maths::CIntegration::gaussLegendre(f, aa, bb, interval); + maths::CIntegration::gaussLegendre( + f, aa, bb, interval); expectedMean += interval; } expectedMean /= (b - a); @@ -157,7 +160,8 @@ void CRadialBasisFunctionTest::testMean() { double aa = a + static_cast(k) / 20.0 * (b - a); double bb = a + static_cast(k + 1) / 20.0 * (b - a); double interval; - maths::CIntegration::gaussLegendre(f, aa, bb, interval); + maths::CIntegration::gaussLegendre( + f, aa, bb, interval); expectedMean += interval; } expectedMean /= (b - a); @@ -194,7 +198,8 @@ void CRadialBasisFunctionTest::testMeanSquareDerivative() { double aa = a + static_cast(k) / 50.0 * (b - a); double bb = a + static_cast(k + 1) / 50.0 * (b - a); double interval; - maths::CIntegration::gaussLegendre(f, aa, bb, interval); + maths::CIntegration::gaussLegendre( + f, aa, bb, interval); expectedMean += interval; } expectedMean /= (b - a); @@ -218,12 +223,14 @@ void CRadialBasisFunctionTest::testMeanSquareDerivative() { double aa = a + static_cast(k) / 50.0 * (b - a); double bb = a + static_cast(k + 1) / 50.0 * (b - a); double interval; - maths::CIntegration::gaussLegendre(f, aa, bb, interval); + maths::CIntegration::gaussLegendre( + f, aa, bb, interval); expectedMean += interval; } expectedMean /= (b - a); - double mean = inverseQuadratic.meanSquareDerivative(a, b, centres[i], scales[j]); + double mean = inverseQuadratic.meanSquareDerivative(a, b, centres[i], + scales[j]); LOG_DEBUG(<< "expectedMean = " << expectedMean << ", mean = " << mean); CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMean, mean, eps * mean); } @@ -248,25 +255,30 @@ void CRadialBasisFunctionTest::testProduct() { for (std::size_t j = 0u; j < boost::size(centres); ++j) { for (std::size_t k = 0u; k < boost::size(scales); ++k) { for (std::size_t l = 0u; l < boost::size(scales); ++l) { - LOG_DEBUG(<< "centre1 = " << centres[i] << ", centre2 = " << centres[j] << ", scale1 = " << scales[k] + LOG_DEBUG(<< "centre1 = " << centres[i] << ", centre2 = " + << centres[j] << ", scale1 = " << scales[k] << ", scale2 = " << scales[l]); maths::CGaussianBasisFunction gaussian; CValueAdaptor f1(gaussian, centres[i], scales[k]); CValueAdaptor f2(gaussian, centres[j], scales[l]); - maths::CCompositeFunctions::CProduct f(f1, f2); + maths::CCompositeFunctions::CProduct f( + f1, f2); double expectedProduct = 0.0; for (std::size_t m = 0u; m < 50; ++m) { double aa = a + static_cast(m) / 50.0 * (b - a); double bb = a + static_cast(m + 1) / 50.0 * (b - a); double interval; - maths::CIntegration::gaussLegendre(f, aa, bb, interval); + maths::CIntegration::gaussLegendre( + f, aa, bb, interval); expectedProduct += interval; } expectedProduct /= (b - a); - double product = gaussian.product(a, b, centres[i], centres[j], scales[k], scales[l]); - LOG_DEBUG(<< "expectedMean = " << expectedProduct << ", mean = " << product); + double product = gaussian.product(a, b, centres[i], centres[j], + scales[k], scales[l]); + LOG_DEBUG(<< "expectedMean = " << expectedProduct + << ", mean = " << product); CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedProduct, product, eps * product); } } @@ -279,25 +291,30 @@ void CRadialBasisFunctionTest::testProduct() { for (std::size_t j = 0u; j < boost::size(centres); ++j) { for (std::size_t k = 0u; k < boost::size(scales); ++k) { for (std::size_t l = 0u; l < boost::size(scales); ++l) { - LOG_DEBUG(<< "centre1 = " << centres[i] << ", centre2 = " << centres[j] << ", scale1 = " << scales[k] + LOG_DEBUG(<< "centre1 = " << centres[i] << ", centre2 = " + << centres[j] << ", scale1 = " << scales[k] << ", scale2 = " << scales[l]); maths::CInverseQuadraticBasisFunction inverseQuadratic; CValueAdaptor f1(inverseQuadratic, centres[i], scales[k]); CValueAdaptor f2(inverseQuadratic, centres[j], scales[l]); double expectedProduct = 0.0; - maths::CCompositeFunctions::CProduct f(f1, f2); + maths::CCompositeFunctions::CProduct f( + f1, f2); for (std::size_t m = 0u; m < 50; ++m) { double aa = a + static_cast(m) / 50.0 * (b - a); double bb = a + static_cast(m + 1) / 50.0 * (b - a); double interval; - maths::CIntegration::gaussLegendre(f, aa, bb, interval); + maths::CIntegration::gaussLegendre( + f, aa, bb, interval); expectedProduct += interval; } expectedProduct /= (b - a); - double product = inverseQuadratic.product(a, b, centres[i], centres[j], scales[k], scales[l]); - LOG_DEBUG(<< "expectedProduct = " << expectedProduct << ", product = " << product); + double product = inverseQuadratic.product( + a, b, centres[i], centres[j], scales[k], scales[l]); + LOG_DEBUG(<< "expectedProduct = " << expectedProduct + << ", product = " << product); CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedProduct, product, eps * product); } } @@ -308,14 +325,15 @@ void CRadialBasisFunctionTest::testProduct() { CppUnit::Test* CRadialBasisFunctionTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CRadialBasisFunctionTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CRadialBasisFunctionTest::testDerivative", - &CRadialBasisFunctionTest::testDerivative)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CRadialBasisFunctionTest::testMean", &CRadialBasisFunctionTest::testMean)); - suiteOfTests->addTest(new CppUnit::TestCaller("CRadialBasisFunctionTest::testMeanSquareDerivative", - &CRadialBasisFunctionTest::testMeanSquareDerivative)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CRadialBasisFunctionTest::testProduct", &CRadialBasisFunctionTest::testProduct)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRadialBasisFunctionTest::testDerivative", &CRadialBasisFunctionTest::testDerivative)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRadialBasisFunctionTest::testMean", &CRadialBasisFunctionTest::testMean)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRadialBasisFunctionTest::testMeanSquareDerivative", + &CRadialBasisFunctionTest::testMeanSquareDerivative)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRadialBasisFunctionTest::testProduct", &CRadialBasisFunctionTest::testProduct)); return suiteOfTests; } diff --git a/lib/maths/unittest/CRandomProjectionClustererTest.cc b/lib/maths/unittest/CRandomProjectionClustererTest.cc index 6392ce5433..c76e63e679 100644 --- a/lib/maths/unittest/CRandomProjectionClustererTest.cc +++ b/lib/maths/unittest/CRandomProjectionClustererTest.cc @@ -25,37 +25,54 @@ using TVector5 = maths::CVectorNx1; using TCovariances = maths::CBasicStatistics::SSampleCovariances; struct SFirstLess { - bool operator()(const TSizeVec& lhs, const TSizeVec& rhs) const { return lhs[0] < rhs[0]; } + bool operator()(const TSizeVec& lhs, const TSizeVec& rhs) const { + return lhs[0] < rhs[0]; + } }; template -class CRandomProjectionClustererForTest : public maths::CRandomProjectionClustererBatch { +class CRandomProjectionClustererForTest + : public maths::CRandomProjectionClustererBatch { public: using TVectorArrayVec = typename maths::CRandomProjectionClustererBatch::TVectorArrayVec; using TDoubleVecVec = typename maths::CRandomProjectionClustererBatch::TDoubleVecVec; using TVectorNx1VecVec = typename maths::CRandomProjectionClustererBatch::TVectorNx1VecVec; using TSvdNxNVecVec = typename maths::CRandomProjectionClustererBatch::TSvdNxNVecVec; using TSizeUSet = typename maths::CRandomProjectionClustererBatch::TSizeUSet; - using TMeanAccumulatorVecVec = typename maths::CRandomProjectionClustererBatch::TMeanAccumulatorVecVec; + using TMeanAccumulatorVecVec = + typename maths::CRandomProjectionClustererBatch::TMeanAccumulatorVecVec; public: - CRandomProjectionClustererForTest(double compression = 1.0) : maths::CRandomProjectionClustererBatch(compression) {} + CRandomProjectionClustererForTest(double compression = 1.0) + : maths::CRandomProjectionClustererBatch(compression) {} - const TVectorArrayVec& projections() const { return this->maths::CRandomProjectionClustererBatch::projections(); } + const TVectorArrayVec& projections() const { + return this->maths::CRandomProjectionClustererBatch::projections(); + } template - void clusterProjections(CLUSTERER clusterer, TDoubleVecVec& W, TVectorNx1VecVec& M, TSvdNxNVecVec& C, TSizeUSet& I) const { + void clusterProjections(CLUSTERER clusterer, + TDoubleVecVec& W, + TVectorNx1VecVec& M, + TSvdNxNVecVec& C, + TSizeUSet& I) const { std::size_t b = this->projectedData().size(); W.resize(b); M.resize(b); C.resize(b); - this->maths::CRandomProjectionClustererBatch::clusterProjections(clusterer, W, M, C, I); + this->maths::CRandomProjectionClustererBatch::clusterProjections( + clusterer, W, M, C, I); } - void neighbourhoods(const TSizeUSet& I, TSizeVecVec& H) const { this->maths::CRandomProjectionClustererBatch::neighbourhoods(I, H); } + void neighbourhoods(const TSizeUSet& I, TSizeVecVec& H) const { + this->maths::CRandomProjectionClustererBatch::neighbourhoods(I, H); + } - void - similarities(const TDoubleVecVec& W, const TVectorNx1VecVec& M, const TSvdNxNVecVec& C, const TSizeVecVec& H, TDoubleVecVec& S) const { + void similarities(const TDoubleVecVec& W, + const TVectorNx1VecVec& M, + const TSvdNxNVecVec& C, + const TSizeVecVec& H, + TDoubleVecVec& S) const { this->maths::CRandomProjectionClustererBatch::similarities(W, M, C, H, S); } @@ -85,7 +102,8 @@ void CRandomProjectionClustererTest::testGenerateProjections() { const TVectorArrayVec& projections = clusterer.projections(); LOG_DEBUG(<< "projections = " << core::CContainerPrinter::print(projections)); - CPPUNIT_ASSERT_EQUAL(std::string("[[[1 0 0], [0 1 0], [0 0 1], [0 0 0], [0 0 0]]]"), core::CContainerPrinter::print(projections)); + CPPUNIT_ASSERT_EQUAL(std::string("[[[1 0 0], [0 1 0], [0 0 1], [0 0 0], [0 0 0]]]"), + core::CContainerPrinter::print(projections)); } // Test that the projections are mutually orthonormal and @@ -105,10 +123,12 @@ void CRandomProjectionClustererTest::testGenerateProjections() { for (std::size_t i = 0u; i < projections.size(); ++i) { for (std::size_t j = 0u; j < 5; ++j) { - CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, projections[i][j].inner(projections[i][j]), 1e-10); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + 1.0, projections[i][j].inner(projections[i][j]), 1e-10); for (std::size_t k = j + 1; k < 5; ++k) { - CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, projections[i][j].inner(projections[i][k]), 1e-10); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + 0.0, projections[i][j].inner(projections[i][k]), 1e-10); } } } @@ -128,12 +148,16 @@ void CRandomProjectionClustererTest::testGenerateProjections() { LOG_DEBUG(<< "Expected variance = " << 1.0 / static_cast(t)); LOG_DEBUG(<< "Actual variance = " << maths::CBasicStatistics::variance(moments)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, maths::CBasicStatistics::mean(moments), 1.0 / static_cast(t)); + CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, maths::CBasicStatistics::mean(moments), + 1.0 / static_cast(t)); - CPPUNIT_ASSERT_DOUBLES_EQUAL( - 1.0 / static_cast(t), maths::CBasicStatistics::variance(moments), 0.2 / static_cast(t)); + CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0 / static_cast(t), + maths::CBasicStatistics::variance(moments), + 0.2 / static_cast(t)); - error.add(static_cast(t) * std::fabs(maths::CBasicStatistics::variance(moments) - 1.0 / static_cast(t))); + error.add(static_cast(t) * + std::fabs(maths::CBasicStatistics::variance(moments) - + 1.0 / static_cast(t))); } LOG_DEBUG(<< "Relative error = " << 100.0 * maths::CBasicStatistics::mean(error) << "%"); @@ -176,8 +200,10 @@ void CRandomProjectionClustererTest::testClusterProjections() { TDoubleVec expectedWeights; CRandomProjectionClustererForTest<5>::TVectorNx1VecVec expectedMeans; - expectedWeights.push_back(static_cast(samples1.size()) / static_cast(samples1.size() + samples2.size())); - expectedWeights.push_back(static_cast(samples2.size()) / static_cast(samples1.size() + samples2.size())); + expectedWeights.push_back(static_cast(samples1.size()) / + static_cast(samples1.size() + samples2.size())); + expectedWeights.push_back(static_cast(samples2.size()) / + static_cast(samples1.size() + samples2.size())); std::sort(expectedWeights.begin(), expectedWeights.end()); for (std::size_t i = 0u; i < clusterer.projections().size(); ++i) { CRandomProjectionClustererForTest<5>::TVectorNx1Vec means; @@ -216,7 +242,8 @@ void CRandomProjectionClustererTest::testClusterProjections() { CRandomProjectionClustererForTest<5>::TSvdNxNVecVec covariances; CRandomProjectionClustererForTest<5>::TSizeUSet samples; clusterer.clusterProjections( - maths::forRandomProjectionClusterer(maths::CKMeansFast(), 2, 5), weights_, means, covariances, samples); + maths::forRandomProjectionClusterer(maths::CKMeansFast(), 2, 5), + weights_, means, covariances, samples); CPPUNIT_ASSERT_EQUAL(std::size_t(4), weights_.size()); CPPUNIT_ASSERT_EQUAL(std::size_t(4), means.size()); @@ -233,8 +260,10 @@ void CRandomProjectionClustererTest::testClusterProjections() { LOG_DEBUG(<< "weights = " << core::CContainerPrinter::print(weights)); LOG_DEBUG(<< "means = " << core::CContainerPrinter::print(means)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedWeights), core::CContainerPrinter::print(weights)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedMeans), core::CContainerPrinter::print(means)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedWeights), + core::CContainerPrinter::print(weights)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedMeans), + core::CContainerPrinter::print(means)); } void CRandomProjectionClustererTest::testNeighbourhoods() { @@ -258,9 +287,10 @@ void CRandomProjectionClustererTest::testNeighbourhoods() { rng.generateUniformSamples(0.0, 10.0, dimension, means[i]); LOG_DEBUG(<< "mean = " << core::CContainerPrinter::print(means[i])); } - TDoubleVecVec covariances[] = {TDoubleVecVec(dimension, TDoubleVec(dimension, 0.0)), - TDoubleVecVec(dimension, TDoubleVec(dimension, 0.0)), - TDoubleVecVec(dimension, TDoubleVec(dimension, 0.0))}; + TDoubleVecVec covariances[] = { + TDoubleVecVec(dimension, TDoubleVec(dimension, 0.0)), + TDoubleVecVec(dimension, TDoubleVec(dimension, 0.0)), + TDoubleVecVec(dimension, TDoubleVec(dimension, 0.0))}; for (std::size_t i = 0u; i < boost::size(covariances); ++i) { for (std::size_t j = 0u; j < 30; ++j) { covariances[i][j][j] = 1.0 + static_cast(i); @@ -285,7 +315,8 @@ void CRandomProjectionClustererTest::testNeighbourhoods() { CRandomProjectionClustererForTest<5>::TSvdNxNVecVec clusterCovariances; CRandomProjectionClustererForTest<5>::TSizeUSet examples; clusterer.clusterProjections( - maths::forRandomProjectionClusterer(maths::CKMeansFast(), 3, 5), weights, clusterMeans, clusterCovariances, examples); + maths::forRandomProjectionClusterer(maths::CKMeansFast(), 3, 5), + weights, clusterMeans, clusterCovariances, examples); LOG_DEBUG(<< "examples = " << core::CContainerPrinter::print(examples)); TSizeVecVec neighbourhoods(examples.size()); @@ -312,12 +343,14 @@ void CRandomProjectionClustererTest::testNeighbourhoods() { } LOG_DEBUG(<< "neighbours = " << core::CContainerPrinter::print(neighbourhoods)); - LOG_DEBUG(<< "expected neighbours = " << core::CContainerPrinter::print(expectedNeighbourhoods)); + LOG_DEBUG(<< "expected neighbours = " + << core::CContainerPrinter::print(expectedNeighbourhoods)); maths::CBasicStatistics::SSampleMean::TAccumulator meanJaccard; for (std::size_t i = 0u; i < neighbourhoods.size(); ++i) { double jaccard = maths::CSetTools::jaccard( - neighbourhoods[i].begin(), neighbourhoods[i].end(), expectedNeighbourhoods[i].begin(), expectedNeighbourhoods[i].end()); + neighbourhoods[i].begin(), neighbourhoods[i].end(), + expectedNeighbourhoods[i].begin(), expectedNeighbourhoods[i].end()); LOG_DEBUG(<< "jaccard = " << jaccard); meanJaccard.add(jaccard, static_cast(expectedNeighbourhoods[i].size())); CPPUNIT_ASSERT(jaccard > 0.1); @@ -341,9 +374,10 @@ void CRandomProjectionClustererTest::testSimilarities() { rng.generateUniformSamples(0.0, 10.0, dimension, means[i]); LOG_DEBUG(<< "mean = " << core::CContainerPrinter::print(means[i])); } - TDoubleVecVec covariances[] = {TDoubleVecVec(dimension, TDoubleVec(dimension, 0.0)), - TDoubleVecVec(dimension, TDoubleVec(dimension, 0.0)), - TDoubleVecVec(dimension, TDoubleVec(dimension, 0.0))}; + TDoubleVecVec covariances[] = { + TDoubleVecVec(dimension, TDoubleVec(dimension, 0.0)), + TDoubleVecVec(dimension, TDoubleVec(dimension, 0.0)), + TDoubleVecVec(dimension, TDoubleVec(dimension, 0.0))}; for (std::size_t i = 0u; i < boost::size(covariances); ++i) { for (std::size_t j = 0u; j < 30; ++j) { covariances[i][j][j] = 1.0 + static_cast(i); @@ -368,14 +402,16 @@ void CRandomProjectionClustererTest::testSimilarities() { CRandomProjectionClustererForTest<5>::TSvdNxNVecVec clusterCovariances; CRandomProjectionClustererForTest<5>::TSizeUSet examples; clusterer.clusterProjections( - maths::forRandomProjectionClusterer(maths::CKMeansFast(), 3, 5), weights, clusterMeans, clusterCovariances, examples); + maths::forRandomProjectionClusterer(maths::CKMeansFast(), 3, 5), + weights, clusterMeans, clusterCovariances, examples); LOG_DEBUG(<< "examples = " << core::CContainerPrinter::print(examples)); TSizeVecVec expectedConnectivity(examples.size(), TSizeVec(examples.size())); TSizeVec examples_(examples.begin(), examples.end()); for (std::size_t i = 0u; i < examples_.size(); ++i) { for (std::size_t j = 0u; j <= i; ++j) { - expectedConnectivity[i][j] = expectedConnectivity[j][i] = clusters[examples_[i]] == clusters[examples_[j]] ? 1 : 0; + expectedConnectivity[i][j] = expectedConnectivity[j][i] = + clusters[examples_[i]] == clusters[examples_[j]] ? 1 : 0; } } LOG_DEBUG(<< "expected connectivity ="); @@ -387,7 +423,8 @@ void CRandomProjectionClustererTest::testSimilarities() { clusterer.neighbourhoods(examples, neighbourhoods); TDoubleVecVec similarities(examples.size()); - clusterer.similarities(weights, clusterMeans, clusterCovariances, neighbourhoods, similarities); + clusterer.similarities(weights, clusterMeans, clusterCovariances, + neighbourhoods, similarities); TSizeVecVec connectivity(examples.size(), TSizeVec(examples.size())); for (std::size_t i = 0u; i < similarities.size(); ++i) { @@ -426,9 +463,10 @@ void CRandomProjectionClustererTest::testClusterNeighbourhoods() { rng.generateUniformSamples(0.0, 10.0, dimension, means[i]); LOG_DEBUG(<< "mean = " << core::CContainerPrinter::print(means[i])); } - TDoubleVecVec covariances[] = {TDoubleVecVec(dimension, TDoubleVec(dimension, 0.0)), - TDoubleVecVec(dimension, TDoubleVec(dimension, 0.0)), - TDoubleVecVec(dimension, TDoubleVec(dimension, 0.0))}; + TDoubleVecVec covariances[] = { + TDoubleVecVec(dimension, TDoubleVec(dimension, 0.0)), + TDoubleVecVec(dimension, TDoubleVec(dimension, 0.0)), + TDoubleVecVec(dimension, TDoubleVec(dimension, 0.0))}; for (std::size_t i = 0u; i < boost::size(covariances); ++i) { for (std::size_t j = 0u; j < 30; ++j) { covariances[i][j][j] = 1.0 + static_cast(i); @@ -453,14 +491,16 @@ void CRandomProjectionClustererTest::testClusterNeighbourhoods() { CRandomProjectionClustererForTest<5>::TSvdNxNVecVec clusterCovariances; CRandomProjectionClustererForTest<5>::TSizeUSet examples; clusterer.clusterProjections( - maths::forRandomProjectionClusterer(maths::CKMeansFast(), 3, 5), weights, clusterMeans, clusterCovariances, examples); + maths::forRandomProjectionClusterer(maths::CKMeansFast(), 3, 5), + weights, clusterMeans, clusterCovariances, examples); LOG_DEBUG(<< "examples = " << core::CContainerPrinter::print(examples)); TSizeVecVec neighbourhoods(examples.size()); clusterer.neighbourhoods(examples, neighbourhoods); TDoubleVecVec similarities(examples.size()); - clusterer.similarities(weights, clusterMeans, clusterCovariances, neighbourhoods, similarities); + clusterer.similarities(weights, clusterMeans, clusterCovariances, + neighbourhoods, similarities); TSizeVecVec expectedClustering(boost::size(n)); LOG_DEBUG(<< "expected clustering ="); @@ -485,7 +525,8 @@ void CRandomProjectionClustererTest::testClusterNeighbourhoods() { } for (std::size_t i = 0u; i < expectedClustering.size(); ++i) { - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedClustering[i]), core::CContainerPrinter::print(clustering[i])); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedClustering[i]), + core::CContainerPrinter::print(clustering[i])); } } @@ -499,17 +540,23 @@ CppUnit::Test* CRandomProjectionClustererTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CRandomProjectionClustererTest"); suiteOfTests->addTest(new CppUnit::TestCaller( - "CRandomProjectionClustererTest::testGenerateProjections", &CRandomProjectionClustererTest::testGenerateProjections)); - suiteOfTests->addTest(new CppUnit::TestCaller("CRandomProjectionClustererTest::testClusterProjections", - &CRandomProjectionClustererTest::testClusterProjections)); - suiteOfTests->addTest(new CppUnit::TestCaller("CRandomProjectionClustererTest::testNeighbourhoods", - &CRandomProjectionClustererTest::testNeighbourhoods)); - suiteOfTests->addTest(new CppUnit::TestCaller("CRandomProjectionClustererTest::testSimilarities", - &CRandomProjectionClustererTest::testSimilarities)); + "CRandomProjectionClustererTest::testGenerateProjections", + &CRandomProjectionClustererTest::testGenerateProjections)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRandomProjectionClustererTest::testClusterProjections", + &CRandomProjectionClustererTest::testClusterProjections)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRandomProjectionClustererTest::testNeighbourhoods", + &CRandomProjectionClustererTest::testNeighbourhoods)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRandomProjectionClustererTest::testSimilarities", + &CRandomProjectionClustererTest::testSimilarities)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRandomProjectionClustererTest::testClusterNeighbourhoods", + &CRandomProjectionClustererTest::testClusterNeighbourhoods)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CRandomProjectionClustererTest::testClusterNeighbourhoods", &CRandomProjectionClustererTest::testClusterNeighbourhoods)); - suiteOfTests->addTest(new CppUnit::TestCaller("CRandomProjectionClustererTest::testAccuracy", - &CRandomProjectionClustererTest::testAccuracy)); + "CRandomProjectionClustererTest::testAccuracy", + &CRandomProjectionClustererTest::testAccuracy)); return suiteOfTests; } diff --git a/lib/maths/unittest/CRegressionTest.cc b/lib/maths/unittest/CRegressionTest.cc index edf3c250af..dafeec6549 100644 --- a/lib/maths/unittest/CRegressionTest.cc +++ b/lib/maths/unittest/CRegressionTest.cc @@ -304,7 +304,8 @@ void CRegressionTest::testShiftOrdinate() { LOG_DEBUG(<< "parameters 1 = " << core::CContainerPrinter::print(params1)); LOG_DEBUG(<< "parameters 2 = " << core::CContainerPrinter::print(params2)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(1000.0 + params1[0], params2[0], 1e-6 * std::fabs(params1[0])); + CPPUNIT_ASSERT_DOUBLES_EQUAL(1000.0 + params1[0], params2[0], + 1e-6 * std::fabs(params1[0])); CPPUNIT_ASSERT_DOUBLES_EQUAL(params1[1], params2[1], 1e-6 * std::fabs(params1[1])); CPPUNIT_ASSERT_DOUBLES_EQUAL(params1[2], params2[2], 1e-6 * std::fabs(params1[2])); CPPUNIT_ASSERT_DOUBLES_EQUAL(params1[3], params2[3], 1e-6 * std::fabs(params1[3])); @@ -336,7 +337,8 @@ void CRegressionTest::testShiftGradient() { LOG_DEBUG(<< "parameters 2 = " << core::CContainerPrinter::print(params2)); CPPUNIT_ASSERT_DOUBLES_EQUAL(params1[0], params2[0], 1e-6 * std::fabs(params1[0])); - CPPUNIT_ASSERT_DOUBLES_EQUAL(10.0 + params1[1], params2[1], 1e-6 * std::fabs(params1[1])); + CPPUNIT_ASSERT_DOUBLES_EQUAL(10.0 + params1[1], params2[1], + 1e-6 * std::fabs(params1[1])); CPPUNIT_ASSERT_DOUBLES_EQUAL(params1[2], params2[2], 1e-6 * std::fabs(params1[2])); CPPUNIT_ASSERT_DOUBLES_EQUAL(params1[3], params2[3], 1e-6 * std::fabs(params1[3])); } @@ -558,24 +560,32 @@ void CRegressionTest::testPrediction() { TDoubleArray4 params4; ls3.parameters(params4); - double y4 = params4[3] * (x - x0) * (x - x0) * (x - x0) + params4[2] * (x - x0) * (x - x0) + params4[1] * (x - x0) + params4[0]; + double y4 = params4[3] * (x - x0) * (x - x0) * (x - x0) + + params4[2] * (x - x0) * (x - x0) + params4[1] * (x - x0) + + params4[0]; if (i % 10 == 0) { - LOG_DEBUG(<< "y = " << y << ", m = " << maths::CBasicStatistics::mean(m) << ", y2 = " << y2 << ", y3 = " << y3 - << ", y4 = " << y4); + LOG_DEBUG(<< "y = " << y << ", m = " << maths::CBasicStatistics::mean(m) + << ", y2 = " << y2 << ", y3 = " << y3 << ", y4 = " << y4); } - em.add((y - maths::CBasicStatistics::mean(m)) * (y - maths::CBasicStatistics::mean(m))); + em.add((y - maths::CBasicStatistics::mean(m)) * + (y - maths::CBasicStatistics::mean(m))); e2.add((y - y2) * (y - y2)); e3.add((y - y3) * (y - y3)); e4.add((y - y4) * (y - y4)); } - LOG_DEBUG(<< "em = " << maths::CBasicStatistics::mean(em) << ", e2 = " << maths::CBasicStatistics::mean(e2) - << ", e3 = " << maths::CBasicStatistics::mean(e3) << ", e4 = " << maths::CBasicStatistics::mean(e4)); - CPPUNIT_ASSERT(maths::CBasicStatistics::mean(e2) < 0.27 * maths::CBasicStatistics::mean(em)); - CPPUNIT_ASSERT(maths::CBasicStatistics::mean(e3) < 0.08 * maths::CBasicStatistics::mean(em)); - CPPUNIT_ASSERT(maths::CBasicStatistics::mean(e4) < 0.025 * maths::CBasicStatistics::mean(em)); + LOG_DEBUG(<< "em = " << maths::CBasicStatistics::mean(em) + << ", e2 = " << maths::CBasicStatistics::mean(e2) + << ", e3 = " << maths::CBasicStatistics::mean(e3) + << ", e4 = " << maths::CBasicStatistics::mean(e4)); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(e2) < + 0.27 * maths::CBasicStatistics::mean(em)); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(e3) < + 0.08 * maths::CBasicStatistics::mean(em)); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(e4) < + 0.025 * maths::CBasicStatistics::mean(em)); } void CRegressionTest::testCombination() { @@ -632,7 +642,8 @@ void CRegressionTest::testCombination() { LOG_DEBUG(<< "params A + B = " << core::CContainerPrinter::print(paramsAPlusB)); for (std::size_t i = 0u; i < params.size(); ++i) { - CPPUNIT_ASSERT_DOUBLES_EQUAL(params[i], paramsAPlusB[i], 5e-3 * std::fabs(params[i])); + CPPUNIT_ASSERT_DOUBLES_EQUAL(params[i], paramsAPlusB[i], + 5e-3 * std::fabs(params[i])); } } @@ -781,14 +792,16 @@ void CRegressionTest::testScale() { LOG_DEBUG(<< "statistic = " << regression2.statistic()); TDoubleArray2 params2; regression2.parameters(params2); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(params1), core::CContainerPrinter::print(params2)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(params1), + core::CContainerPrinter::print(params2)); CPPUNIT_ASSERT_EQUAL(maths::CBasicStatistics::count(regression2.statistic()), 10.0); maths::CRegression::CLeastSquaresOnline<1, double> regression3 = regression2.scaled(0.5); LOG_DEBUG(<< "statistic = " << regression3.statistic()); TDoubleArray2 params3; regression3.parameters(params3); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(params1), core::CContainerPrinter::print(params3)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(params1), + core::CContainerPrinter::print(params3)); CPPUNIT_ASSERT_EQUAL(maths::CBasicStatistics::count(regression3.statistic()), 5.0); } @@ -798,7 +811,8 @@ class CRegressionPrediction { using result_type = double; public: - CRegressionPrediction(const maths::CRegression::CLeastSquaresOnline& regression) : m_Regression(regression) {} + CRegressionPrediction(const maths::CRegression::CLeastSquaresOnline& regression) + : m_Regression(regression) {} bool operator()(double x, double& result) const { result = m_Regression.predict(x); @@ -823,11 +837,13 @@ void CRegressionTest::testMean() { rng.generateUniformSamples(-1.0, 1.0, 4, coeffs); maths::CRegression::CLeastSquaresOnline<3, double> regression; for (double x = 0.0; x < 10.0; x += 1.0) { - regression.add(x, 0.2 * coeffs[0] * x * x * x + 0.4 * coeffs[1] * x * x + coeffs[2] * x + 2.0 * coeffs[3]); + regression.add(x, 0.2 * coeffs[0] * x * x * x + 0.4 * coeffs[1] * x * x + + coeffs[2] * x + 2.0 * coeffs[3]); } double expected; - maths::CIntegration::gaussLegendre(CRegressionPrediction<3>(regression), 10.0, 15.0, expected); + maths::CIntegration::gaussLegendre( + CRegressionPrediction<3>(regression), 10.0, 15.0, expected); expected /= 5.0; double actual = regression.mean(10.0, 15.0); LOG_DEBUG(<< "expected = " << expected); @@ -835,7 +851,8 @@ void CRegressionTest::testMean() { CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, 1e-6); // Test interval spanning 0.0. - maths::CIntegration::gaussLegendre(CRegressionPrediction<3>(regression), -3.0, 0.0, expected); + maths::CIntegration::gaussLegendre( + CRegressionPrediction<3>(regression), -3.0, 0.0, expected); expected /= 3.0; actual = regression.mean(-3.0, 0.0); LOG_DEBUG(<< "expected = " << expected); @@ -911,7 +928,8 @@ void CRegressionTest::testCovariances() { rng.generateNormalSamples(0.0, variance, static_cast(n), noise); maths::CRegression::CLeastSquaresOnline<2, double> regression; for (double x = 0.0; x < n; x += 1.0) { - regression.add(x, 0.25 * x * x + 1.5 * x + noise[static_cast(x)]); + regression.add(x, 0.25 * x * x + 1.5 * x + + noise[static_cast(x)]); } TDoubleArray3 params; regression.parameters(params); @@ -954,7 +972,8 @@ void CRegressionTest::testParameters() { LOG_DEBUG(<< "params 1 = " << core::CContainerPrinter::print(params1)); LOG_DEBUG(<< "params 2 = " << core::CContainerPrinter::print(params2)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(params2), core::CContainerPrinter::print(params1)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(params2), + core::CContainerPrinter::print(params1)); } } @@ -987,8 +1006,9 @@ void CRegressionTest::testPersist() { core::CRapidXmlStateRestoreTraverser traverser(parser); maths::CRegression::CLeastSquaresOnline<2, double> restoredRegression; - CPPUNIT_ASSERT(traverser.traverseSubLevel( - boost::bind(&maths::CRegression::CLeastSquaresOnline<2, double>::acceptRestoreTraverser, &restoredRegression, _1))); + CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind( + &maths::CRegression::CLeastSquaresOnline<2, double>::acceptRestoreTraverser, + &restoredRegression, _1))); CPPUNIT_ASSERT_EQUAL(origRegression.checksum(), restoredRegression.checksum()); @@ -1022,9 +1042,11 @@ void CRegressionTest::testParameterProcess() { test::CRandomNumbers rng; double variances[] = {1.0, 0.5, 0.1, 5.0, 10.0}; - double intervals[] = {0.4, 0.4, 0.8, 0.6, 0.7, 0.5, 0.6, 1.3, 0.3, 1.7, 0.3, 0.5, 1.0, 0.2, 0.3, 0.1, 0.5, - 1.4, 0.7, 0.9, 0.1, 0.4, 0.8, 1.0, 0.6, 0.5, 0.8, 1.3, 0.3, 1.7, 0.3, 1.2, 0.3, 1.2, - 0.3, 0.1, 0.5, 0.4, 0.7, 0.9, 0.8, 0.6, 0.8, 1.1, 0.6, 0.5, 0.5, 1.3, 0.3, 0.7}; + double intervals[] = {0.4, 0.4, 0.8, 0.6, 0.7, 0.5, 0.6, 1.3, 0.3, 1.7, + 0.3, 0.5, 1.0, 0.2, 0.3, 0.1, 0.5, 1.4, 0.7, 0.9, + 0.1, 0.4, 0.8, 1.0, 0.6, 0.5, 0.8, 1.3, 0.3, 1.7, + 0.3, 1.2, 0.3, 1.2, 0.3, 0.1, 0.5, 0.4, 0.7, 0.9, + 0.8, 0.6, 0.8, 1.1, 0.6, 0.5, 0.5, 1.3, 0.3, 0.7}; TMeanAccumulator error; @@ -1045,7 +1067,8 @@ void CRegressionTest::testParameterProcess() { for (std::size_t i = 0u; i < boost::size(intervals); t += intervals[i], ++i) { double dt = intervals[i]; TDoubleVec da; - rng.generateNormalSamples(0.0, variances[test], static_cast(dt / 0.05), da); + rng.generateNormalSamples(0.0, variances[test], + static_cast(dt / 0.05), da); for (auto da_ : da) { x += (v + 0.5 * a * 0.05) * 0.05; v += a * 0.05; @@ -1070,7 +1093,8 @@ void CRegressionTest::testParameterProcess() { for (std::size_t i = 0u; i < 5; ++i) { double dt = intervals[i]; TDoubleVec da; - rng.generateNormalSamples(0.0, variances[test], static_cast(dt / 0.05), da); + rng.generateNormalSamples(0.0, variances[test], + static_cast(dt / 0.05), da); for (auto da_ : da) { xt += (vt + 0.5 * at * 0.05) * 0.05; vt += at * 0.05; @@ -1082,7 +1106,8 @@ void CRegressionTest::testParameterProcess() { double interval = std::accumulate(intervals, intervals + 5, 0.0); if (run % 5 == 0) { - LOG_DEBUG(<< " " << maths::CBasicStatistics::variance(moments) << " vs " << parameterProcess.predictionVariance(interval)); + LOG_DEBUG(<< " " << maths::CBasicStatistics::variance(moments) << " vs " + << parameterProcess.predictionVariance(interval)); } actual.add(maths::CBasicStatistics::variance(moments)); estimate.add(parameterProcess.predictionVariance(interval)); @@ -1090,10 +1115,12 @@ void CRegressionTest::testParameterProcess() { LOG_DEBUG(<< "actual = " << maths::CBasicStatistics::mean(actual)); LOG_DEBUG(<< "estimate = " << maths::CBasicStatistics::mean(estimate)); - CPPUNIT_ASSERT_DOUBLES_EQUAL( - maths::CBasicStatistics::mean(actual), maths::CBasicStatistics::mean(estimate), 0.25 * maths::CBasicStatistics::mean(actual)); + CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(actual), + maths::CBasicStatistics::mean(estimate), + 0.25 * maths::CBasicStatistics::mean(actual)); - error.add((maths::CBasicStatistics::mean(actual) - maths::CBasicStatistics::mean(estimate)) / + error.add((maths::CBasicStatistics::mean(actual) - + maths::CBasicStatistics::mean(estimate)) / maths::CBasicStatistics::mean(actual)); } @@ -1104,26 +1131,38 @@ void CRegressionTest::testParameterProcess() { CppUnit::Test* CRegressionTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CRegressionTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CRegressionTest::testInvariants", &CRegressionTest::testInvariants)); - suiteOfTests->addTest(new CppUnit::TestCaller("CRegressionTest::testFit", &CRegressionTest::testFit)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CRegressionTest::testShiftAbscissa", &CRegressionTest::testShiftAbscissa)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CRegressionTest::testShiftOrdinate", &CRegressionTest::testShiftOrdinate)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CRegressionTest::testShiftGradient", &CRegressionTest::testShiftGradient)); - suiteOfTests->addTest(new CppUnit::TestCaller("CRegressionTest::testLinearScale", &CRegressionTest::testLinearScale)); - suiteOfTests->addTest(new CppUnit::TestCaller("CRegressionTest::testAge", &CRegressionTest::testAge)); - suiteOfTests->addTest(new CppUnit::TestCaller("CRegressionTest::testPrediction", &CRegressionTest::testPrediction)); - suiteOfTests->addTest(new CppUnit::TestCaller("CRegressionTest::testCombination", &CRegressionTest::testCombination)); - suiteOfTests->addTest(new CppUnit::TestCaller("CRegressionTest::testSingular", &CRegressionTest::testSingular)); - suiteOfTests->addTest(new CppUnit::TestCaller("CRegressionTest::testScale", &CRegressionTest::testScale)); - suiteOfTests->addTest(new CppUnit::TestCaller("CRegressionTest::testMean", &CRegressionTest::testMean)); - suiteOfTests->addTest(new CppUnit::TestCaller("CRegressionTest::testCovariances", &CRegressionTest::testCovariances)); - suiteOfTests->addTest(new CppUnit::TestCaller("CRegressionTest::testParameters", &CRegressionTest::testParameters)); - suiteOfTests->addTest(new CppUnit::TestCaller("CRegressionTest::testPersist", &CRegressionTest::testPersist)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CRegressionTest::testParameterProcess", &CRegressionTest::testParameterProcess)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRegressionTest::testInvariants", &CRegressionTest::testInvariants)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRegressionTest::testFit", &CRegressionTest::testFit)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRegressionTest::testShiftAbscissa", &CRegressionTest::testShiftAbscissa)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRegressionTest::testShiftOrdinate", &CRegressionTest::testShiftOrdinate)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRegressionTest::testShiftGradient", &CRegressionTest::testShiftGradient)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRegressionTest::testLinearScale", &CRegressionTest::testLinearScale)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRegressionTest::testAge", &CRegressionTest::testAge)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRegressionTest::testPrediction", &CRegressionTest::testPrediction)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRegressionTest::testCombination", &CRegressionTest::testCombination)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRegressionTest::testSingular", &CRegressionTest::testSingular)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRegressionTest::testScale", &CRegressionTest::testScale)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRegressionTest::testMean", &CRegressionTest::testMean)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRegressionTest::testCovariances", &CRegressionTest::testCovariances)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRegressionTest::testParameters", &CRegressionTest::testParameters)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRegressionTest::testPersist", &CRegressionTest::testPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRegressionTest::testParameterProcess", &CRegressionTest::testParameterProcess)); return suiteOfTests; } diff --git a/lib/maths/unittest/CSamplingTest.cc b/lib/maths/unittest/CSamplingTest.cc index 8ff65c1d3d..d5fb6f745b 100644 --- a/lib/maths/unittest/CSamplingTest.cc +++ b/lib/maths/unittest/CSamplingTest.cc @@ -139,9 +139,11 @@ void CSamplingTest::testMultinomialSample() { double error = 0.0; double pTotal = 0.0; - for (TSizeVecDoubleMapCItr pItr = empiricalProbabilities.begin(); pItr != empiricalProbabilities.end(); ++pItr) { + for (TSizeVecDoubleMapCItr pItr = empiricalProbabilities.begin(); + pItr != empiricalProbabilities.end(); ++pItr) { LOG_DEBUG(<< "counts = " << core::CContainerPrinter::print(pItr->first)); - CPPUNIT_ASSERT_EQUAL(size_t(20), std::accumulate(pItr->first.begin(), pItr->first.end(), size_t(0))); + CPPUNIT_ASSERT_EQUAL(size_t(20), std::accumulate(pItr->first.begin(), + pItr->first.end(), size_t(0))); double p = multinomialProbability(probabilities, pItr->first); double pe = pItr->second; @@ -193,13 +195,16 @@ void CSamplingTest::testMultivariateNormalSample() { TDoubleVec error = test_detail::minus(mean_, m_); LOG_DEBUG(<< "||error|| = " << test_detail::euclidean(error)); LOG_DEBUG(<< "||m|| = " << test_detail::euclidean(m_)); - CPPUNIT_ASSERT(test_detail::euclidean(error) < 0.02 * test_detail::euclidean(m_)); + CPPUNIT_ASSERT(test_detail::euclidean(error) < + 0.02 * test_detail::euclidean(m_)); } // Get the sample covariance matrix. TDoubleVecVec covariance(3, TDoubleVec(3, 0.0)); for (std::size_t i = 0u; i < samples.size(); ++i) { - test_detail::add(test_detail::outer(test_detail::minus(samples[i], mean_), test_detail::minus(samples[i], mean_)), covariance); + test_detail::add(test_detail::outer(test_detail::minus(samples[i], mean_), + test_detail::minus(samples[i], mean_)), + covariance); } test_detail::divide(covariance, static_cast(samples.size() - 1)); LOG_DEBUG(<< "actual covariance = " << core::CContainerPrinter::print(covariance)); @@ -219,10 +224,11 @@ void CSamplingTest::testMultivariateNormalSample() { CppUnit::Test* CSamplingTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CSamplingTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CSamplingTest::testMultinomialSample", &CSamplingTest::testMultinomialSample)); - suiteOfTests->addTest(new CppUnit::TestCaller("CSamplingTest::testMultivariateNormalSample", - &CSamplingTest::testMultivariateNormalSample)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSamplingTest::testMultinomialSample", &CSamplingTest::testMultinomialSample)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSamplingTest::testMultivariateNormalSample", + &CSamplingTest::testMultivariateNormalSample)); return suiteOfTests; } diff --git a/lib/maths/unittest/CSeasonalComponentAdaptiveBucketingTest.cc b/lib/maths/unittest/CSeasonalComponentAdaptiveBucketingTest.cc index ee30219f6b..518fdeebad 100644 --- a/lib/maths/unittest/CSeasonalComponentAdaptiveBucketingTest.cc +++ b/lib/maths/unittest/CSeasonalComponentAdaptiveBucketingTest.cc @@ -82,7 +82,8 @@ void CSeasonalComponentAdaptiveBucketingTest::testSwap() { for (std::size_t i = 0u; i < 100; ++i) { core_t::TTime x = static_cast(100 * p + i); - double y = 0.02 * (static_cast(i) - 50.0) * (static_cast(i) - 50.0); + double y = 0.02 * (static_cast(i) - 50.0) * + (static_cast(i) - 50.0); bucketing1.add(x, y + noise[i], y); } bucketing1.refine(static_cast(100 * (p + 1))); @@ -114,8 +115,11 @@ void CSeasonalComponentAdaptiveBucketingTest::testRefine() { { // Test that refine reduces the function approximation error. - core_t::TTime times[] = {-1, 3600, 10800, 18000, 25200, 32400, 39600, 46800, 54000, 61200, 68400, 75600, 82800, 86400}; - double function[] = {10, 10, 10, 10, 100, 90, 80, 90, 100, 20, 10, 10, 10, 10}; + core_t::TTime times[] = {-1, 3600, 10800, 18000, 25200, + 32400, 39600, 46800, 54000, 61200, + 68400, 75600, 82800, 86400}; + double function[] = {10, 10, 10, 10, 100, 90, 80, + 90, 100, 20, 10, 10, 10, 10}; maths::CDiurnalTime time(0, 0, 86400, 86400); maths::CSeasonalComponentAdaptiveBucketing bucketing1(time); @@ -132,7 +136,8 @@ void CSeasonalComponentAdaptiveBucketingTest::testRefine() { for (core_t::TTime t = 0; t < 86400; t += 1800) { core_t::TTime x = start + t; - ptrdiff_t i = std::lower_bound(boost::begin(times), boost::end(times), t) - boost::begin(times); + ptrdiff_t i = std::lower_bound(boost::begin(times), boost::end(times), t) - + boost::begin(times); double x0 = static_cast(times[i - 1]); double x1 = static_cast(times[i]); @@ -151,8 +156,10 @@ void CSeasonalComponentAdaptiveBucketingTest::testRefine() { const TFloatVec& endpoints1 = bucketing1.endpoints(); TDoubleVec values1 = bucketing1.values(20 * 86400); for (std::size_t i = 1; i < endpoints1.size(); ++i) { - core_t::TTime t = static_cast(0.5 * (endpoints1[i] + endpoints1[i - 1] + 1.0)); - ptrdiff_t j = std::lower_bound(boost::begin(times), boost::end(times), t) - boost::begin(times); + core_t::TTime t = static_cast( + 0.5 * (endpoints1[i] + endpoints1[i - 1] + 1.0)); + ptrdiff_t j = std::lower_bound(boost::begin(times), boost::end(times), t) - + boost::begin(times); double x0 = static_cast(times[j - 1]); double x1 = static_cast(times[j]); double y0 = function[j - 1]; @@ -167,8 +174,10 @@ void CSeasonalComponentAdaptiveBucketingTest::testRefine() { const TFloatVec& endpoints2 = bucketing2.endpoints(); TDoubleVec values2 = bucketing2.values(20 * 86400); for (std::size_t i = 1; i < endpoints1.size(); ++i) { - core_t::TTime t = static_cast(0.5 * (endpoints2[i] + endpoints2[i - 1] + 1.0)); - ptrdiff_t j = std::lower_bound(boost::begin(times), boost::end(times), t) - boost::begin(times); + core_t::TTime t = static_cast( + 0.5 * (endpoints2[i] + endpoints2[i - 1] + 1.0)); + ptrdiff_t j = std::lower_bound(boost::begin(times), boost::end(times), t) - + boost::begin(times); double x0 = static_cast(times[j - 1]); double x1 = static_cast(times[j]); double y0 = function[j - 1]; @@ -182,7 +191,8 @@ void CSeasonalComponentAdaptiveBucketingTest::testRefine() { LOG_DEBUG(<< "max error = " << maxError1[0]); LOG_DEBUG(<< "refined mean error = " << maths::CBasicStatistics::mean(meanError2)); LOG_DEBUG(<< "refined max error = " << maxError2[0]); - CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError2) < 0.85 * maths::CBasicStatistics::mean(meanError1)); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError2) < + 0.85 * maths::CBasicStatistics::mean(meanError1)); CPPUNIT_ASSERT(maxError2[0] < 0.7 * maxError1[0]); } @@ -200,7 +210,8 @@ void CSeasonalComponentAdaptiveBucketingTest::testRefine() { for (std::size_t i = 0u; i < 100; ++i) { core_t::TTime x = static_cast(100 * p + i); - double y = 0.02 * (static_cast(i) - 50.0) * (static_cast(i) - 50.0); + double y = 0.02 * (static_cast(i) - 50.0) * + (static_cast(i) - 50.0); bucketing.add(x, y + noise[i], y); } bucketing.refine(static_cast(100 * (p + 1))); @@ -230,11 +241,16 @@ void CSeasonalComponentAdaptiveBucketingTest::testRefine() { double v = variances[i - 1]; // Function mean and variance. - double m_ = std::fabs(a) < std::fabs(b) ? 0.02 / 3.0 * std::pow(b, 3.0) * (1.0 - std::pow(a / b, 3.0)) / (b - a) - : 0.02 / 3.0 * std::pow(a, 3.0) * (std::pow(b / a, 3.0) - 1.0) / (b - a); + double m_ = std::fabs(a) < std::fabs(b) + ? 0.02 / 3.0 * std::pow(b, 3.0) * + (1.0 - std::pow(a / b, 3.0)) / (b - a) + : 0.02 / 3.0 * std::pow(a, 3.0) * + (std::pow(b / a, 3.0) - 1.0) / (b - a); double v_ = 9.0; - LOG_DEBUG(<< "m = " << m << ", m_ = " << m_ << ", absolute error = " << std::fabs(m - m_)); - LOG_DEBUG(<< "v = " << v << ", v_ = " << v_ << ", relative error = " << std::fabs(v - v_) / v_); + LOG_DEBUG(<< "m = " << m << ", m_ = " << m_ + << ", absolute error = " << std::fabs(m - m_)); + LOG_DEBUG(<< "v = " << v << ", v_ = " << v_ + << ", relative error = " << std::fabs(v - v_) / v_); CPPUNIT_ASSERT_DOUBLES_EQUAL(m_, m, 0.7); CPPUNIT_ASSERT_DOUBLES_EQUAL(v_, v, 0.4 * v_); @@ -248,11 +264,22 @@ void CSeasonalComponentAdaptiveBucketingTest::testRefine() { // Root. double c = b < 0.0 ? -std::sqrt(50.0 * m) : +::sqrt(50.0 * m); // Left and right partial averaging errors. - double l = std::fabs(c) < std::fabs(a) ? 0.02 / 3.0 * a * a * a * ((c / a) * (c / a) * (c / a) - 1.0) - m * (c - a) - : 0.02 / 3.0 * c * c * c * (1.0 - (a / c) * (a / c) * (a / c)) - m * (c - a); - double r = std::fabs(c) < std::fabs(b) ? 0.02 / 3.0 * b * b * b * (1.0 - (c / b) * (c / b) * (c / b)) - m * (b - c) - : 0.02 / 3.0 * c * c * c * ((b / c) * (b / c) * (b / c) - 1.0) - m * (b - c); - LOG_DEBUG(<< "c = " << c << ", l = " << l << " r = " << r << ", error = " << std::fabs(l) + std::fabs(r)); + double l = std::fabs(c) < std::fabs(a) + ? 0.02 / 3.0 * a * a * a * + ((c / a) * (c / a) * (c / a) - 1.0) - + m * (c - a) + : 0.02 / 3.0 * c * c * c * + (1.0 - (a / c) * (a / c) * (a / c)) - + m * (c - a); + double r = std::fabs(c) < std::fabs(b) + ? 0.02 / 3.0 * b * b * b * + (1.0 - (c / b) * (c / b) * (c / b)) - + m * (b - c) + : 0.02 / 3.0 * c * c * c * + ((b / c) * (b / c) * (b / c) - 1.0) - + m * (b - c); + LOG_DEBUG(<< "c = " << c << ", l = " << l << " r = " << r + << ", error = " << std::fabs(l) + std::fabs(r)); avgError.add(std::fabs(l) + std::fabs(r)); } else { avgError.add(std::fabs((m_ - m) * (b - a))); @@ -290,7 +317,8 @@ void CSeasonalComponentAdaptiveBucketingTest::testPropagateForwardsByTime() { for (std::size_t p = 0; p < 10; ++p) { for (std::size_t i = 0u; i < 100; ++i) { core_t::TTime x = static_cast(100 * p + i); - double y = 0.02 * (static_cast(i) - 50.0) * (static_cast(i) - 50.0); + double y = 0.02 * (static_cast(i) - 50.0) * + (static_cast(i) - 50.0); bucketing.add(x, y, y); } bucketing.refine(static_cast(100 * (p + 1))); @@ -301,7 +329,8 @@ void CSeasonalComponentAdaptiveBucketingTest::testPropagateForwardsByTime() { for (std::size_t i = 0u; i < 20; ++i) { bucketing.propagateForwardsByTime(1.0); double count = bucketing.count(); - LOG_DEBUG(<< "count = " << count << ", lastCount = " << lastCount << " count/lastCount = " << count / lastCount); + LOG_DEBUG(<< "count = " << count << ", lastCount = " << lastCount + << " count/lastCount = " << count / lastCount); CPPUNIT_ASSERT(count < lastCount); CPPUNIT_ASSERT_DOUBLES_EQUAL(0.81873, count / lastCount, 5e-6); lastCount = count; @@ -314,12 +343,14 @@ void CSeasonalComponentAdaptiveBucketingTest::testMinimumBucketLength() { LOG_DEBUG(<< "+--------------------------------------------------------------------+"); const double bucketLength = 3600.0; - const double function[] = {0.0, 0.0, 10.0, 12.0, 11.0, 16.0, 15.0, 1.0, 0.0, 0.0, 0.0, 0.0}; + const double function[] = {0.0, 0.0, 10.0, 12.0, 11.0, 16.0, + 15.0, 1.0, 0.0, 0.0, 0.0, 0.0}; std::size_t n = boost::size(function); test::CRandomNumbers rng; - core_t::TTime period = static_cast(n) * static_cast(bucketLength); + core_t::TTime period = static_cast(n) * + static_cast(bucketLength); maths::CDiurnalTime time(0, 0, period, period); maths::CSeasonalComponentAdaptiveBucketing bucketing1(time, 0.0, 0.0); maths::CSeasonalComponentAdaptiveBucketing bucketing2(time, 0.0, 3000.0); @@ -337,7 +368,8 @@ void CSeasonalComponentAdaptiveBucketingTest::testMinimumBucketLength() { for (std::size_t k = 0u; k < times.size(); ++k) { core_t::TTime t = static_cast(i) * period + - static_cast(static_cast(j) * bucketLength) + static_cast(times[k]); + static_cast(static_cast(j) * bucketLength) + + static_cast(times[k]); bucketing1.add(t, values[k], function[j]); bucketing2.add(t, values[k], function[j]); } @@ -355,7 +387,8 @@ void CSeasonalComponentAdaptiveBucketingTest::testMinimumBucketLength() { for (std::size_t j = 1u; j < endpoints1.size(); ++j) { minimumBucketLength1.add(endpoints1[j] - endpoints1[j - 1]); minimumBucketLength2.add(endpoints2[j] - endpoints2[j - 1]); - double minimumShift = std::max(3000.0 - (endpoints1[j] - endpoints1[j - 1]), 0.0) / 2.0; + double minimumShift = + std::max(3000.0 - (endpoints1[j] - endpoints1[j - 1]), 0.0) / 2.0; minimumTotalError += minimumShift; } LOG_DEBUG(<< "minimumBucketLength1 = " << minimumBucketLength1); @@ -441,7 +474,8 @@ void CSeasonalComponentAdaptiveBucketingTest::testKnots() { for (std::size_t i = 0u; i < 100; ++i) { core_t::TTime x = static_cast(p * 86400 + 864 * i); - double y = 0.02 * (static_cast(i) - 50.0) * (static_cast(i) - 50.0); + double y = 0.02 * (static_cast(i) - 50.0) * + (static_cast(i) - 50.0); bucketing.add(x, y + noise[i], y); } bucketing.refine(static_cast(86400 * (p + 1))); @@ -449,7 +483,8 @@ void CSeasonalComponentAdaptiveBucketingTest::testKnots() { TDoubleVec knots; TDoubleVec values; TDoubleVec variances; - bucketing.knots(static_cast(86400 * (p + 1)), maths::CSplineTypes::E_Periodic, knots, values, variances); + bucketing.knots(static_cast(86400 * (p + 1)), + maths::CSplineTypes::E_Periodic, knots, values, variances); LOG_DEBUG(<< "knots = " << core::CContainerPrinter::print(knots)); LOG_DEBUG(<< "values = " << core::CContainerPrinter::print(values)); @@ -465,7 +500,8 @@ void CSeasonalComponentAdaptiveBucketingTest::testKnots() { } LOG_DEBUG(<< "meanError = " << maths::CBasicStatistics::mean(meanError)); LOG_DEBUG(<< "meanValue = " << maths::CBasicStatistics::mean(meanValue)); - CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError) / maths::CBasicStatistics::mean(meanValue) < + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError) / + maths::CBasicStatistics::mean(meanValue) < 0.1 / std::sqrt(static_cast(p + 1))); } } @@ -480,7 +516,8 @@ void CSeasonalComponentAdaptiveBucketingTest::testKnots() { for (std::size_t i = 0u; i < 100; ++i) { core_t::TTime x = static_cast(p * 86400 + 864 * i); - double v = 0.01 * (static_cast(i) - 50.0) * (static_cast(i) - 50.0); + double v = 0.01 * (static_cast(i) - 50.0) * + (static_cast(i) - 50.0); rng.generateNormalSamples(0.0, v, 1, noise); bucketing.add(x, noise[0], 0.0); } @@ -490,7 +527,8 @@ void CSeasonalComponentAdaptiveBucketingTest::testKnots() { TDoubleVec knots; TDoubleVec values; TDoubleVec variances; - bucketing.knots(static_cast(86400 * (p + 1)), maths::CSplineTypes::E_Periodic, knots, values, variances); + bucketing.knots(static_cast(86400 * (p + 1)), + maths::CSplineTypes::E_Periodic, knots, values, variances); LOG_DEBUG(<< "knots = " << core::CContainerPrinter::print(knots)); LOG_DEBUG(<< "variances = " << core::CContainerPrinter::print(variances)); @@ -499,14 +537,17 @@ void CSeasonalComponentAdaptiveBucketingTest::testKnots() { for (std::size_t i = 0u; i < knots.size(); ++i) { double x = knots[i] / 864.0; double expectedVariance = 0.01 * (x - 50.0) * (x - 50.0); - LOG_DEBUG(<< "expected = " << expectedVariance << ", variance = " << variances[i]); + LOG_DEBUG(<< "expected = " << expectedVariance + << ", variance = " << variances[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedVariance, variances[i], 15.0); meanError.add(std::fabs(variances[i] - expectedVariance)); meanVariance.add(std::fabs(expectedVariance)); } LOG_DEBUG(<< "meanError = " << maths::CBasicStatistics::mean(meanError)); LOG_DEBUG(<< "meanVariance = " << maths::CBasicStatistics::mean(meanVariance)); - CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError) / maths::CBasicStatistics::mean(meanVariance) < 0.2); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError) / + maths::CBasicStatistics::mean(meanVariance) < + 0.2); } } } @@ -534,9 +575,11 @@ void CSeasonalComponentAdaptiveBucketingTest::testLongTermTrendKnots() { for (std::size_t i = 0u; i < 144; ++i) { double x = static_cast(i) / 144.0; - double y = 10.0 * (std::min(static_cast(p + 1) + x, 50.0) - std::max(static_cast(p + 1) + x - 50.0, 0.0) + + double y = 10.0 * (std::min(static_cast(p + 1) + x, 50.0) - + std::max(static_cast(p + 1) + x - 50.0, 0.0) + 10.0 * std::sin(boost::math::double_constants::two_pi * x)); - bucketing.add(static_cast(86400 * p + 600 * i), y + noise[i], y); + bucketing.add(static_cast(86400 * p + 600 * i), + y + noise[i], y); } bucketing.refine(static_cast(86400 * (p + 1))); bucketing.propagateForwardsByTime(1.0); @@ -545,7 +588,8 @@ void CSeasonalComponentAdaptiveBucketingTest::testLongTermTrendKnots() { TDoubleVec knots; TDoubleVec values; TDoubleVec variances; - bucketing.knots(static_cast(86400 * (p + 1)), maths::CSplineTypes::E_Periodic, knots, values, variances); + bucketing.knots(static_cast(86400 * (p + 1)), + maths::CSplineTypes::E_Periodic, knots, values, variances); LOG_DEBUG(<< "knots = " << core::CContainerPrinter::print(knots)); LOG_DEBUG(<< "values = " << core::CContainerPrinter::print(values)); LOG_DEBUG(<< "variances = " << core::CContainerPrinter::print(variances)); @@ -555,7 +599,8 @@ void CSeasonalComponentAdaptiveBucketingTest::testLongTermTrendKnots() { for (std::size_t i = 0u; i < knots.size(); ++i) { double x = knots[i] / 86400.0; double expectedValue = - 10.0 * (std::min(static_cast(p + 1) + x, 50.0) - std::max(static_cast(p + 1) + x - 50.0, 0.0) + + 10.0 * (std::min(static_cast(p + 1) + x, 50.0) - + std::max(static_cast(p + 1) + x - 50.0, 0.0) + 10.0 * std::sin(boost::math::double_constants::two_pi * x)); LOG_DEBUG(<< "expected = " << expectedValue << ", value = " << values[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedValue, values[i], 70.0); @@ -564,7 +609,9 @@ void CSeasonalComponentAdaptiveBucketingTest::testLongTermTrendKnots() { } LOG_DEBUG(<< "meanError = " << maths::CBasicStatistics::mean(meanError)); LOG_DEBUG(<< "meanValue = " << maths::CBasicStatistics::mean(meanValue)); - CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError) / maths::CBasicStatistics::mean(meanValue) < 0.15); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError) / + maths::CBasicStatistics::mean(meanValue) < + 0.15); } } } @@ -607,8 +654,10 @@ void CSeasonalComponentAdaptiveBucketingTest::testShiftValue() { TDoubleVec variances2; bucketing.knots(t + 7 * 86400, maths::CSplineTypes::E_Natural, knots2, values2, variances2); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(knots1), core::CContainerPrinter::print(knots2)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(variances1), core::CContainerPrinter::print(variances2)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(knots1), + core::CContainerPrinter::print(knots2)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(variances1), + core::CContainerPrinter::print(variances2)); for (std::size_t i = 0u; i < values1.size(); ++i) { LOG_DEBUG(<< "values = " << values1[i] << " vs " << values2[i]); @@ -671,7 +720,8 @@ void CSeasonalComponentAdaptiveBucketingTest::testPersist() { for (std::size_t p = 0; p < 10; ++p) { for (std::size_t i = 0u; i < 100; ++i) { core_t::TTime x = static_cast(p * 86400 + 864 * i); - double y = 0.02 * (static_cast(i) - 50.0) * (static_cast(i) - 50.0); + double y = 0.02 * (static_cast(i) - 50.0) * + (static_cast(i) - 50.0); origBucketing.add(x, y, y); } origBucketing.refine(static_cast(86400 * (p + 1))); @@ -693,9 +743,11 @@ void CSeasonalComponentAdaptiveBucketingTest::testPersist() { core::CRapidXmlStateRestoreTraverser traverser(parser); // Restore the XML into a new bucketing. - maths::CSeasonalComponentAdaptiveBucketing restoredBucketing(decayRate + 0.1, minimumBucketLength, traverser); + maths::CSeasonalComponentAdaptiveBucketing restoredBucketing( + decayRate + 0.1, minimumBucketLength, traverser); - LOG_DEBUG(<< "orig checksum = " << checksum << " restored checksum = " << restoredBucketing.checksum()); + LOG_DEBUG(<< "orig checksum = " << checksum + << " restored checksum = " << restoredBucketing.checksum()); CPPUNIT_ASSERT_EQUAL(checksum, restoredBucketing.checksum()); // The XML representation of the new bucketing should be the @@ -726,7 +778,8 @@ void CSeasonalComponentAdaptiveBucketingTest::testUpgrade() { for (std::size_t p = 0; p < 10; ++p) { for (std::size_t i = 0u; i < 100; ++i) { core_t::TTime x = static_cast(p * 86400 + 864 * i); - double y = 0.02 * (static_cast(i) - 50.0) * (static_cast(i) - 50.0); + double y = 0.02 * (static_cast(i) - 50.0) * + (static_cast(i) - 50.0); expectedBucketing.add(x, y, y); } expectedBucketing.refine(static_cast(86400 * (p + 1))); @@ -744,7 +797,8 @@ void CSeasonalComponentAdaptiveBucketingTest::testUpgrade() { core::CRapidXmlStateRestoreTraverser traverser(parser); // Restore the XML into a new bucketing. - maths::CSeasonalComponentAdaptiveBucketing restoredBucketing(decayRate + 0.1, minimumBucketLength, traverser); + maths::CSeasonalComponentAdaptiveBucketing restoredBucketing( + decayRate + 0.1, minimumBucketLength, traverser); // Check that the knots points we get back are very nearly // those we expect. @@ -752,37 +806,46 @@ void CSeasonalComponentAdaptiveBucketingTest::testUpgrade() { TDoubleVec expectedKnots; TDoubleVec expectedValues; TDoubleVec expectedVariances; - expectedBucketing.knots(863136, maths::CSplineTypes::E_Periodic, expectedKnots, expectedValues, expectedVariances); + expectedBucketing.knots(863136, maths::CSplineTypes::E_Periodic, + expectedKnots, expectedValues, expectedVariances); TDoubleVec restoredKnots; TDoubleVec restoredValues; TDoubleVec restoredVariances; - expectedBucketing.knots(863136, maths::CSplineTypes::E_Periodic, restoredKnots, restoredValues, restoredVariances); + expectedBucketing.knots(863136, maths::CSplineTypes::E_Periodic, + restoredKnots, restoredValues, restoredVariances); CPPUNIT_ASSERT_EQUAL(expectedBucketing.decayRate(), restoredBucketing.decayRate()); LOG_DEBUG(<< "expected knots = " << core::CContainerPrinter::print(expectedKnots)); LOG_DEBUG(<< "restored knots = " << core::CContainerPrinter::print(restoredKnots)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedKnots), core::CContainerPrinter::print(restoredKnots)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedKnots), + core::CContainerPrinter::print(restoredKnots)); LOG_DEBUG(<< "expected values = " << core::CContainerPrinter::print(expectedValues)); LOG_DEBUG(<< "restored values = " << core::CContainerPrinter::print(restoredValues)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedValues), core::CContainerPrinter::print(restoredValues)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedValues), + core::CContainerPrinter::print(restoredValues)); LOG_DEBUG(<< "expected variances = " << core::CContainerPrinter::print(expectedVariances)); LOG_DEBUG(<< "restored variances = " << core::CContainerPrinter::print(restoredVariances)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedVariances), core::CContainerPrinter::print(restoredVariances)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedVariances), + core::CContainerPrinter::print(restoredVariances)); } CppUnit::Test* CSeasonalComponentAdaptiveBucketingTest::suite() { - CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CSeasonalComponentAdaptiveBucketingTest"); + CppUnit::TestSuite* suiteOfTests = + new CppUnit::TestSuite("CSeasonalComponentAdaptiveBucketingTest"); suiteOfTests->addTest(new CppUnit::TestCaller( - "CSeasonalComponentAdaptiveBucketingTest::testInitialize", &CSeasonalComponentAdaptiveBucketingTest::testInitialize)); + "CSeasonalComponentAdaptiveBucketingTest::testInitialize", + &CSeasonalComponentAdaptiveBucketingTest::testInitialize)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CSeasonalComponentAdaptiveBucketingTest::testSwap", &CSeasonalComponentAdaptiveBucketingTest::testSwap)); + "CSeasonalComponentAdaptiveBucketingTest::testSwap", + &CSeasonalComponentAdaptiveBucketingTest::testSwap)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CSeasonalComponentAdaptiveBucketingTest::testRefine", &CSeasonalComponentAdaptiveBucketingTest::testRefine)); + "CSeasonalComponentAdaptiveBucketingTest::testRefine", + &CSeasonalComponentAdaptiveBucketingTest::testRefine)); suiteOfTests->addTest(new CppUnit::TestCaller( "CSeasonalComponentAdaptiveBucketingTest::testPropagateForwardsByTime", &CSeasonalComponentAdaptiveBucketingTest::testPropagateForwardsByTime)); @@ -790,20 +853,26 @@ CppUnit::Test* CSeasonalComponentAdaptiveBucketingTest::suite() { "CSeasonalComponentAdaptiveBucketingTest::testMinimumBucketLength", &CSeasonalComponentAdaptiveBucketingTest::testMinimumBucketLength)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CSeasonalComponentAdaptiveBucketingTest::testUnintialized", &CSeasonalComponentAdaptiveBucketingTest::testUnintialized)); + "CSeasonalComponentAdaptiveBucketingTest::testUnintialized", + &CSeasonalComponentAdaptiveBucketingTest::testUnintialized)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CSeasonalComponentAdaptiveBucketingTest::testKnots", &CSeasonalComponentAdaptiveBucketingTest::testKnots)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CSeasonalComponentAdaptiveBucketingTest::testLongTermTrendKnots", - &CSeasonalComponentAdaptiveBucketingTest::testLongTermTrendKnots)); + "CSeasonalComponentAdaptiveBucketingTest::testKnots", + &CSeasonalComponentAdaptiveBucketingTest::testKnots)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CSeasonalComponentAdaptiveBucketingTest::testShiftValue", &CSeasonalComponentAdaptiveBucketingTest::testShiftValue)); + "CSeasonalComponentAdaptiveBucketingTest::testLongTermTrendKnots", + &CSeasonalComponentAdaptiveBucketingTest::testLongTermTrendKnots)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CSeasonalComponentAdaptiveBucketingTest::testSlope", &CSeasonalComponentAdaptiveBucketingTest::testSlope)); + "CSeasonalComponentAdaptiveBucketingTest::testShiftValue", + &CSeasonalComponentAdaptiveBucketingTest::testShiftValue)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CSeasonalComponentAdaptiveBucketingTest::testPersist", &CSeasonalComponentAdaptiveBucketingTest::testPersist)); + "CSeasonalComponentAdaptiveBucketingTest::testSlope", + &CSeasonalComponentAdaptiveBucketingTest::testSlope)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CSeasonalComponentAdaptiveBucketingTest::testUpgrade", &CSeasonalComponentAdaptiveBucketingTest::testUpgrade)); + "CSeasonalComponentAdaptiveBucketingTest::testPersist", + &CSeasonalComponentAdaptiveBucketingTest::testPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSeasonalComponentAdaptiveBucketingTest::testUpgrade", + &CSeasonalComponentAdaptiveBucketingTest::testUpgrade)); return suiteOfTests; } diff --git a/lib/maths/unittest/CSeasonalComponentTest.cc b/lib/maths/unittest/CSeasonalComponentTest.cc index 4422edd538..63f79730b2 100644 --- a/lib/maths/unittest/CSeasonalComponentTest.cc +++ b/lib/maths/unittest/CSeasonalComponentTest.cc @@ -41,15 +41,16 @@ class CTestSeasonalComponent : public maths::CSeasonalComponent { using maths::CSeasonalComponent::initialize; public: - CTestSeasonalComponent(core_t::TTime startTime, - core_t::TTime window, - core_t::TTime period, - std::size_t space, - double decayRate = 0.0, - double minimumBucketLength = 0.0, - maths::CSplineTypes::EBoundaryCondition boundaryCondition = maths::CSplineTypes::E_Periodic, - maths::CSplineTypes::EType valueInterpolationType = maths::CSplineTypes::E_Cubic, - maths::CSplineTypes::EType varianceInterpolationType = maths::CSplineTypes::E_Linear) + CTestSeasonalComponent( + core_t::TTime startTime, + core_t::TTime window, + core_t::TTime period, + std::size_t space, + double decayRate = 0.0, + double minimumBucketLength = 0.0, + maths::CSplineTypes::EBoundaryCondition boundaryCondition = maths::CSplineTypes::E_Periodic, + maths::CSplineTypes::EType valueInterpolationType = maths::CSplineTypes::E_Cubic, + maths::CSplineTypes::EType varianceInterpolationType = maths::CSplineTypes::E_Linear) : maths::CSeasonalComponent(maths::CDiurnalTime(0, 0, window, period), space, decayRate, @@ -91,15 +92,21 @@ void generateSeasonalValues(test::CRandomNumbers& rng, core_t::TTime period = function[function.size() - 1].first; TSizeVec times; - rng.generateUniformSamples(static_cast(startTime), static_cast(endTime), numberSamples, times); + rng.generateUniformSamples(static_cast(startTime), + static_cast(endTime), numberSamples, times); std::sort(times.begin(), times.end()); for (std::size_t i = 0u; i < times.size(); ++i) { core_t::TTime offset = static_cast(times[i] % period); - std::size_t b = std::lower_bound(function.begin(), function.end(), offset, maths::COrderings::SFirstLess()) - function.begin(); + std::size_t b = std::lower_bound(function.begin(), function.end(), offset, + maths::COrderings::SFirstLess()) - + function.begin(); b = maths::CTools::truncate(b, std::size_t(1), std::size_t(function.size() - 1)); std::size_t a = b - 1; - double m = (function[b].second - function[a].second) / static_cast(function[b].first - function[a].first); - samples.push_back(TTimeDoublePr(times[i], function[a].second + m * static_cast(offset - function[a].first))); + double m = (function[b].second - function[a].second) / + static_cast(function[b].first - function[a].first); + samples.push_back(TTimeDoublePr( + times[i], function[a].second + + m * static_cast(offset - function[a].first))); } } @@ -127,13 +134,15 @@ void CSeasonalComponentTest::testNoPeriodicity() { std::size_t n = 5000u; TTimeDoublePrVec samples; - generateSeasonalValues(rng, function, startTime, startTime + 31 * core::constants::DAY, n, samples); + generateSeasonalValues(rng, function, startTime, + startTime + 31 * core::constants::DAY, n, samples); TDoubleVec residuals; rng.generateGammaSamples(10.0, 1.2, n, residuals); double residualMean = maths::CBasicStatistics::mean(residuals); - CTestSeasonalComponent seasonal(startTime, core::constants::DAY, core::constants::DAY, 24); + CTestSeasonalComponent seasonal(startTime, core::constants::DAY, + core::constants::DAY, 24); seasonal.initialize(); //std::ofstream file; @@ -171,10 +180,11 @@ void CSeasonalComponentTest::testNoPeriodicity() { //ft << "];\n"; if (d > 1) { - LOG_DEBUG(<< "f(0) = " << mean(seasonal.value(time, 0.0)) - << ", f(T) = " << mean(seasonal.value(time + core::constants::DAY - 1, 0.0))); + LOG_DEBUG(<< "f(0) = " << mean(seasonal.value(time, 0.0)) << ", f(T) = " + << mean(seasonal.value(time + core::constants::DAY - 1, 0.0))); CPPUNIT_ASSERT_DOUBLES_EQUAL( - mean(seasonal.value(time, 0.0)), mean(seasonal.value(time + core::constants::DAY - 1, 0.0)), 0.1); + mean(seasonal.value(time, 0.0)), + mean(seasonal.value(time + core::constants::DAY - 1, 0.0)), 0.1); } error1 /= static_cast(function.size()); error2 /= static_cast(function.size()); @@ -217,20 +227,23 @@ void CSeasonalComponentTest::testConstantPeriodic() { TTimeDoublePrVec function; for (core_t::TTime i = 0u; i < 49; ++i) { core_t::TTime t = (i * core::constants::DAY) / 48; - double ft = 100.0 + 40.0 * std::sin(boost::math::double_constants::two_pi * static_cast(i) / 48.0); + double ft = 100.0 + 40.0 * std::sin(boost::math::double_constants::two_pi * + static_cast(i) / 48.0); function.push_back(TTimeDoublePr(t, ft)); } std::size_t n = 5000u; TTimeDoublePrVec samples; - generateSeasonalValues(rng, function, startTime, startTime + 31 * core::constants::DAY, n, samples); + generateSeasonalValues(rng, function, startTime, + startTime + 31 * core::constants::DAY, n, samples); TDoubleVec residuals; rng.generateGammaSamples(10.0, 1.2, n, residuals); double residualMean = maths::CBasicStatistics::mean(residuals); - CTestSeasonalComponent seasonal(startTime, core::constants::DAY, core::constants::DAY, 24, 0.01); + CTestSeasonalComponent seasonal(startTime, core::constants::DAY, + core::constants::DAY, 24, 0.01); seasonal.initialize(); //std::ofstream file; @@ -267,10 +280,11 @@ void CSeasonalComponentTest::testConstantPeriodic() { //ft << "];\n"; if (d > 1) { - LOG_DEBUG(<< "f(0) = " << mean(seasonal.value(time, 0.0)) - << ", f(T) = " << mean(seasonal.value(time + core::constants::DAY - 1, 0.0))); + LOG_DEBUG(<< "f(0) = " << mean(seasonal.value(time, 0.0)) << ", f(T) = " + << mean(seasonal.value(time + core::constants::DAY - 1, 0.0))); CPPUNIT_ASSERT_DOUBLES_EQUAL( - mean(seasonal.value(time, 0.0)), mean(seasonal.value(time + core::constants::DAY - 1, 0.0)), 0.1); + mean(seasonal.value(time, 0.0)), + mean(seasonal.value(time + core::constants::DAY - 1, 0.0)), 0.1); } error1 /= static_cast(function.size()); @@ -305,18 +319,30 @@ void CSeasonalComponentTest::testConstantPeriodic() { LOG_DEBUG(<< "*** piecewise linear ***"); TTimeDoublePr knotPoints[] = { - TTimeDoublePr(0, 1.0), TTimeDoublePr(1800, 1.0), TTimeDoublePr(3600, 2.0), TTimeDoublePr(5400, 3.0), - TTimeDoublePr(7200, 5.0), TTimeDoublePr(9000, 5.0), TTimeDoublePr(10800, 10.0), TTimeDoublePr(12600, 10.0), - TTimeDoublePr(14400, 12.0), TTimeDoublePr(16200, 12.0), TTimeDoublePr(18000, 14.0), TTimeDoublePr(19800, 12.0), - TTimeDoublePr(21600, 10.0), TTimeDoublePr(23400, 14.0), TTimeDoublePr(25200, 16.0), TTimeDoublePr(27000, 50.0), - TTimeDoublePr(28800, 300.0), TTimeDoublePr(30600, 330.0), TTimeDoublePr(32400, 310.0), TTimeDoublePr(34200, 290.0), - TTimeDoublePr(36000, 280.0), TTimeDoublePr(37800, 260.0), TTimeDoublePr(39600, 250.0), TTimeDoublePr(41400, 230.0), - TTimeDoublePr(43200, 230.0), TTimeDoublePr(45000, 220.0), TTimeDoublePr(46800, 240.0), TTimeDoublePr(48600, 220.0), - TTimeDoublePr(50400, 260.0), TTimeDoublePr(52200, 250.0), TTimeDoublePr(54000, 260.0), TTimeDoublePr(55800, 270.0), - TTimeDoublePr(57600, 280.0), TTimeDoublePr(59400, 290.0), TTimeDoublePr(61200, 290.0), TTimeDoublePr(63000, 60.0), - TTimeDoublePr(64800, 20.0), TTimeDoublePr(66600, 18.0), TTimeDoublePr(68400, 19.0), TTimeDoublePr(70200, 10.0), - TTimeDoublePr(72000, 10.0), TTimeDoublePr(73800, 5.0), TTimeDoublePr(75600, 5.0), TTimeDoublePr(77400, 10.0), - TTimeDoublePr(79200, 5.0), TTimeDoublePr(81000, 3.0), TTimeDoublePr(82800, 1.0), TTimeDoublePr(84600, 1.0), + TTimeDoublePr(0, 1.0), TTimeDoublePr(1800, 1.0), + TTimeDoublePr(3600, 2.0), TTimeDoublePr(5400, 3.0), + TTimeDoublePr(7200, 5.0), TTimeDoublePr(9000, 5.0), + TTimeDoublePr(10800, 10.0), TTimeDoublePr(12600, 10.0), + TTimeDoublePr(14400, 12.0), TTimeDoublePr(16200, 12.0), + TTimeDoublePr(18000, 14.0), TTimeDoublePr(19800, 12.0), + TTimeDoublePr(21600, 10.0), TTimeDoublePr(23400, 14.0), + TTimeDoublePr(25200, 16.0), TTimeDoublePr(27000, 50.0), + TTimeDoublePr(28800, 300.0), TTimeDoublePr(30600, 330.0), + TTimeDoublePr(32400, 310.0), TTimeDoublePr(34200, 290.0), + TTimeDoublePr(36000, 280.0), TTimeDoublePr(37800, 260.0), + TTimeDoublePr(39600, 250.0), TTimeDoublePr(41400, 230.0), + TTimeDoublePr(43200, 230.0), TTimeDoublePr(45000, 220.0), + TTimeDoublePr(46800, 240.0), TTimeDoublePr(48600, 220.0), + TTimeDoublePr(50400, 260.0), TTimeDoublePr(52200, 250.0), + TTimeDoublePr(54000, 260.0), TTimeDoublePr(55800, 270.0), + TTimeDoublePr(57600, 280.0), TTimeDoublePr(59400, 290.0), + TTimeDoublePr(61200, 290.0), TTimeDoublePr(63000, 60.0), + TTimeDoublePr(64800, 20.0), TTimeDoublePr(66600, 18.0), + TTimeDoublePr(68400, 19.0), TTimeDoublePr(70200, 10.0), + TTimeDoublePr(72000, 10.0), TTimeDoublePr(73800, 5.0), + TTimeDoublePr(75600, 5.0), TTimeDoublePr(77400, 10.0), + TTimeDoublePr(79200, 5.0), TTimeDoublePr(81000, 3.0), + TTimeDoublePr(82800, 1.0), TTimeDoublePr(84600, 1.0), TTimeDoublePr(86400, 1.0)}; TTimeDoublePrVec function(boost::begin(knotPoints), boost::end(knotPoints)); @@ -324,13 +350,15 @@ void CSeasonalComponentTest::testConstantPeriodic() { std::size_t n = 6000u; TTimeDoublePrVec samples; - generateSeasonalValues(rng, function, startTime, startTime + 41 * core::constants::DAY, n, samples); + generateSeasonalValues(rng, function, startTime, + startTime + 41 * core::constants::DAY, n, samples); TDoubleVec residuals; rng.generateGammaSamples(10.0, 1.2, n, residuals); double residualMean = maths::CBasicStatistics::mean(residuals); - CTestSeasonalComponent seasonal(startTime, core::constants::DAY, core::constants::DAY, 24, 0.01); + CTestSeasonalComponent seasonal(startTime, core::constants::DAY, + core::constants::DAY, 24, 0.01); seasonal.initialize(); //std::ofstream file; @@ -368,10 +396,11 @@ void CSeasonalComponentTest::testConstantPeriodic() { //ft << "];\n"; if (d > 1) { - LOG_DEBUG(<< "f(0) = " << mean(seasonal.value(time, 0.0)) - << ", f(T) = " << mean(seasonal.value(time + core::constants::DAY - 1, 0.0))); + LOG_DEBUG(<< "f(0) = " << mean(seasonal.value(time, 0.0)) << ", f(T) = " + << mean(seasonal.value(time + core::constants::DAY - 1, 0.0))); CPPUNIT_ASSERT_DOUBLES_EQUAL( - mean(seasonal.value(time, 0.0)), mean(seasonal.value(time + core::constants::DAY - 1, 0.0)), 0.1); + mean(seasonal.value(time, 0.0)), + mean(seasonal.value(time + core::constants::DAY - 1, 0.0)), 0.1); } error1 /= static_cast(function.size()); @@ -413,25 +442,38 @@ void CSeasonalComponentTest::testTimeVaryingPeriodic() { core_t::TTime startTime = 0; TTimeDoublePr knotPoints[] = { - TTimeDoublePr(0, 1.0), TTimeDoublePr(1800, 1.0), TTimeDoublePr(3600, 2.0), TTimeDoublePr(5400, 3.0), - TTimeDoublePr(7200, 5.0), TTimeDoublePr(9000, 5.0), TTimeDoublePr(10800, 10.0), TTimeDoublePr(12600, 10.0), - TTimeDoublePr(14400, 12.0), TTimeDoublePr(16200, 12.0), TTimeDoublePr(18000, 14.0), TTimeDoublePr(19800, 12.0), - TTimeDoublePr(21600, 10.0), TTimeDoublePr(23400, 14.0), TTimeDoublePr(25200, 16.0), TTimeDoublePr(27000, 50.0), - TTimeDoublePr(28800, 300.0), TTimeDoublePr(30600, 330.0), TTimeDoublePr(32400, 310.0), TTimeDoublePr(34200, 290.0), - TTimeDoublePr(36000, 280.0), TTimeDoublePr(37800, 260.0), TTimeDoublePr(39600, 250.0), TTimeDoublePr(41400, 230.0), - TTimeDoublePr(43200, 230.0), TTimeDoublePr(45000, 220.0), TTimeDoublePr(46800, 240.0), TTimeDoublePr(48600, 220.0), - TTimeDoublePr(50400, 260.0), TTimeDoublePr(52200, 250.0), TTimeDoublePr(54000, 260.0), TTimeDoublePr(55800, 270.0), - TTimeDoublePr(57600, 280.0), TTimeDoublePr(59400, 290.0), TTimeDoublePr(61200, 290.0), TTimeDoublePr(63000, 60.0), - TTimeDoublePr(64800, 20.0), TTimeDoublePr(66600, 18.0), TTimeDoublePr(68400, 19.0), TTimeDoublePr(70200, 10.0), - TTimeDoublePr(72000, 10.0), TTimeDoublePr(73800, 5.0), TTimeDoublePr(75600, 5.0), TTimeDoublePr(77400, 10.0), - TTimeDoublePr(79200, 5.0), TTimeDoublePr(81000, 3.0), TTimeDoublePr(82800, 1.0), TTimeDoublePr(84600, 1.0), + TTimeDoublePr(0, 1.0), TTimeDoublePr(1800, 1.0), + TTimeDoublePr(3600, 2.0), TTimeDoublePr(5400, 3.0), + TTimeDoublePr(7200, 5.0), TTimeDoublePr(9000, 5.0), + TTimeDoublePr(10800, 10.0), TTimeDoublePr(12600, 10.0), + TTimeDoublePr(14400, 12.0), TTimeDoublePr(16200, 12.0), + TTimeDoublePr(18000, 14.0), TTimeDoublePr(19800, 12.0), + TTimeDoublePr(21600, 10.0), TTimeDoublePr(23400, 14.0), + TTimeDoublePr(25200, 16.0), TTimeDoublePr(27000, 50.0), + TTimeDoublePr(28800, 300.0), TTimeDoublePr(30600, 330.0), + TTimeDoublePr(32400, 310.0), TTimeDoublePr(34200, 290.0), + TTimeDoublePr(36000, 280.0), TTimeDoublePr(37800, 260.0), + TTimeDoublePr(39600, 250.0), TTimeDoublePr(41400, 230.0), + TTimeDoublePr(43200, 230.0), TTimeDoublePr(45000, 220.0), + TTimeDoublePr(46800, 240.0), TTimeDoublePr(48600, 220.0), + TTimeDoublePr(50400, 260.0), TTimeDoublePr(52200, 250.0), + TTimeDoublePr(54000, 260.0), TTimeDoublePr(55800, 270.0), + TTimeDoublePr(57600, 280.0), TTimeDoublePr(59400, 290.0), + TTimeDoublePr(61200, 290.0), TTimeDoublePr(63000, 60.0), + TTimeDoublePr(64800, 20.0), TTimeDoublePr(66600, 18.0), + TTimeDoublePr(68400, 19.0), TTimeDoublePr(70200, 10.0), + TTimeDoublePr(72000, 10.0), TTimeDoublePr(73800, 5.0), + TTimeDoublePr(75600, 5.0), TTimeDoublePr(77400, 10.0), + TTimeDoublePr(79200, 5.0), TTimeDoublePr(81000, 3.0), + TTimeDoublePr(82800, 1.0), TTimeDoublePr(84600, 1.0), TTimeDoublePr(86400, 1.0)}; TTimeDoublePrVec function(boost::begin(knotPoints), boost::end(knotPoints)); test::CRandomNumbers rng; - CTestSeasonalComponent seasonal(startTime, core::constants::DAY, core::constants::DAY, 24, 0.048); + CTestSeasonalComponent seasonal(startTime, core::constants::DAY, + core::constants::DAY, 24, 0.048); seasonal.initialize(); core_t::TTime time = startTime; @@ -481,10 +523,11 @@ void CSeasonalComponentTest::testTimeVaryingPeriodic() { //ft << "];\n"; if (d > 1) { - LOG_DEBUG(<< "f(0) = " << mean(seasonal.value(time, 0.0)) - << ", f(T) = " << mean(seasonal.value(time + core::constants::DAY - 1, 0.0))); + LOG_DEBUG(<< "f(0) = " << mean(seasonal.value(time, 0.0)) << ", f(T) = " + << mean(seasonal.value(time + core::constants::DAY - 1, 0.0))); CPPUNIT_ASSERT_DOUBLES_EQUAL( - mean(seasonal.value(time, 0.0)), mean(seasonal.value(time + core::constants::DAY - 1, 0.0)), 0.1); + mean(seasonal.value(time, 0.0)), + mean(seasonal.value(time + core::constants::DAY - 1, 0.0)), 0.1); } error1 /= static_cast(function.size()); @@ -532,7 +575,8 @@ void CSeasonalComponentTest::testVeryLowVariation() { std::size_t n = 5000u; TTimeDoublePrVec samples; - generateSeasonalValues(rng, function, startTime, startTime + 31 * core::constants::DAY, n, samples); + generateSeasonalValues(rng, function, startTime, + startTime + 31 * core::constants::DAY, n, samples); TDoubleVec residuals; rng.generateNormalSamples(0.0, 1e-3, n, residuals); @@ -540,7 +584,8 @@ void CSeasonalComponentTest::testVeryLowVariation() { double deviation = std::sqrt(1e-3); - CTestSeasonalComponent seasonal(startTime, core::constants::DAY, core::constants::DAY, 24); + CTestSeasonalComponent seasonal(startTime, core::constants::DAY, + core::constants::DAY, 24); seasonal.initialize(startTime); //std::ofstream file; @@ -578,10 +623,11 @@ void CSeasonalComponentTest::testVeryLowVariation() { //ft << "];\n"; if (d > 1) { - LOG_DEBUG(<< "f(0) = " << mean(seasonal.value(time, 0.0)) - << ", f(T) = " << mean(seasonal.value(time + core::constants::DAY - 1, 0.0))); + LOG_DEBUG(<< "f(0) = " << mean(seasonal.value(time, 0.0)) << ", f(T) = " + << mean(seasonal.value(time + core::constants::DAY - 1, 0.0))); CPPUNIT_ASSERT_DOUBLES_EQUAL( - mean(seasonal.value(time, 0.0)), mean(seasonal.value(time + core::constants::DAY - 1, 0.0)), 0.1); + mean(seasonal.value(time, 0.0)), + mean(seasonal.value(time + core::constants::DAY - 1, 0.0)), 0.1); } error1 /= static_cast(function.size()); error2 /= static_cast(function.size()); @@ -622,7 +668,8 @@ void CSeasonalComponentTest::testVariance() { TTimeDoublePrVec function; for (core_t::TTime i = 0u; i < 481; ++i) { core_t::TTime t = (i * core::constants::DAY) / 48; - double vt = 80.0 + 20.0 * std::sin(boost::math::double_constants::two_pi * static_cast(i % 48) / 48.0); + double vt = 80.0 + 20.0 * std::sin(boost::math::double_constants::two_pi * + static_cast(i % 48) / 48.0); TDoubleVec sample; rng.generateNormalSamples(0.0, vt, 10, sample); for (std::size_t j = 0u; j < sample.size(); ++j) { @@ -640,10 +687,12 @@ void CSeasonalComponentTest::testVariance() { TMeanAccumulator error; for (core_t::TTime i = 0u; i < 48; ++i) { core_t::TTime t = (i * core::constants::DAY) / 48; - double v_ = 80.0 + 20.0 * std::sin(boost::math::double_constants::two_pi * static_cast(i) / 48.0); + double v_ = 80.0 + 20.0 * std::sin(boost::math::double_constants::two_pi * + static_cast(i) / 48.0); TDoubleDoublePr vv = seasonal.variance(t, 98.0); double v = (vv.first + vv.second) / 2.0; - LOG_DEBUG(<< "v_ = " << v_ << ", v = " << core::CContainerPrinter::print(vv) << ", relative error = " << std::fabs(v - v_) / v_); + LOG_DEBUG(<< "v_ = " << v_ << ", v = " << core::CContainerPrinter::print(vv) + << ", relative error = " << std::fabs(v - v_) / v_); CPPUNIT_ASSERT_DOUBLES_EQUAL(v_, v, 0.4 * v_); CPPUNIT_ASSERT(v_ > vv.first && v_ < vv.second); @@ -671,19 +720,22 @@ void CSeasonalComponentTest::testPersist() { TTimeDoublePrVec function; for (core_t::TTime i = 0u; i < 49; ++i) { core_t::TTime t = (i * core::constants::DAY) / 48; - double ft = 100.0 + 40.0 * std::sin(boost::math::double_constants::two_pi * static_cast(i) / 48.0); + double ft = 100.0 + 40.0 * std::sin(boost::math::double_constants::two_pi * + static_cast(i) / 48.0); function.push_back(TTimeDoublePr(t, ft)); } std::size_t n = 3300u; TTimeDoublePrVec samples; - generateSeasonalValues(rng, function, startTime, startTime + 31 * core::constants::DAY, n, samples); + generateSeasonalValues(rng, function, startTime, + startTime + 31 * core::constants::DAY, n, samples); TDoubleVec residuals; rng.generateGammaSamples(10.0, 1.2, n, residuals); - CTestSeasonalComponent origSeasonal(startTime, core::constants::DAY, core::constants::DAY, 24, decayRate); + CTestSeasonalComponent origSeasonal(startTime, core::constants::DAY, + core::constants::DAY, 24, decayRate); origSeasonal.initialize(startTime); for (std::size_t i = 0u; i < n; ++i) { @@ -720,14 +772,16 @@ void CSeasonalComponentTest::testPersist() { TDoubleDoublePr xo = origSeasonal.value(time, 80.0); TDoubleDoublePr xn = restoredSeasonal.value(time, 80.0); if (time % (15 * minute) == 0) { - LOG_DEBUG(<< "xo = " << core::CContainerPrinter::print(xo) << ", xn = " << core::CContainerPrinter::print(xn)); + LOG_DEBUG(<< "xo = " << core::CContainerPrinter::print(xo) + << ", xn = " << core::CContainerPrinter::print(xn)); } CPPUNIT_ASSERT_DOUBLES_EQUAL(xo.first, xn.first, 0.3); CPPUNIT_ASSERT_DOUBLES_EQUAL(xo.second, xn.second, 0.3); TDoubleDoublePr vo = origSeasonal.variance(time, 80.0); TDoubleDoublePr vn = origSeasonal.variance(time, 80.0); if (time % (15 * minute) == 0) { - LOG_DEBUG(<< "vo = " << core::CContainerPrinter::print(vo) << ", vn = " << core::CContainerPrinter::print(vn)); + LOG_DEBUG(<< "vo = " << core::CContainerPrinter::print(vo) + << ", vn = " << core::CContainerPrinter::print(vn)); } CPPUNIT_ASSERT_DOUBLES_EQUAL(vo.first, vn.first, 1e-3); CPPUNIT_ASSERT_DOUBLES_EQUAL(vo.second, vn.second, 1e-3); @@ -737,18 +791,21 @@ void CSeasonalComponentTest::testPersist() { CppUnit::Test* CSeasonalComponentTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CSeasonalComponentTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CSeasonalComponentTest::testNoPeriodicity", - &CSeasonalComponentTest::testNoPeriodicity)); - suiteOfTests->addTest(new CppUnit::TestCaller("CSeasonalComponentTest::testConstantPeriodic", - &CSeasonalComponentTest::testConstantPeriodic)); - suiteOfTests->addTest(new CppUnit::TestCaller("CSeasonalComponentTest::testTimeVaryingPeriodic", - &CSeasonalComponentTest::testTimeVaryingPeriodic)); - suiteOfTests->addTest(new CppUnit::TestCaller("CSeasonalComponentTest::testVeryLowVariation", - &CSeasonalComponentTest::testVeryLowVariation)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CSeasonalComponentTest::testVariance", &CSeasonalComponentTest::testVariance)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CSeasonalComponentTest::testPersist", &CSeasonalComponentTest::testPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSeasonalComponentTest::testNoPeriodicity", &CSeasonalComponentTest::testNoPeriodicity)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSeasonalComponentTest::testConstantPeriodic", + &CSeasonalComponentTest::testConstantPeriodic)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSeasonalComponentTest::testTimeVaryingPeriodic", + &CSeasonalComponentTest::testTimeVaryingPeriodic)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSeasonalComponentTest::testVeryLowVariation", + &CSeasonalComponentTest::testVeryLowVariation)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSeasonalComponentTest::testVariance", &CSeasonalComponentTest::testVariance)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSeasonalComponentTest::testPersist", &CSeasonalComponentTest::testPersist)); return suiteOfTests; } diff --git a/lib/maths/unittest/CSetToolsTest.cc b/lib/maths/unittest/CSetToolsTest.cc index 083a10e1dc..9c4120ba0f 100644 --- a/lib/maths/unittest/CSetToolsTest.cc +++ b/lib/maths/unittest/CSetToolsTest.cc @@ -42,24 +42,30 @@ void CSetToolsTest::testInplaceSetDifference() { left.push_back(a[j]); } TDoubleVec expected; - std::set_difference(A.begin(), A.end(), left.begin(), left.end(), std::back_inserter(expected)); + std::set_difference(A.begin(), A.end(), left.begin(), left.end(), + std::back_inserter(expected)); TDoubleVec test = A; maths::CSetTools::inplace_set_difference(test, left.begin(), left.end()); - LOG_DEBUG(<< "A = " << core::CContainerPrinter::print(A) << ", B = " << core::CContainerPrinter::print(left) + LOG_DEBUG(<< "A = " << core::CContainerPrinter::print(A) + << ", B = " << core::CContainerPrinter::print(left) << ", A - B = " << core::CContainerPrinter::print(test)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expected), core::CContainerPrinter::print(test)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expected), + core::CContainerPrinter::print(test)); TDoubleVec right; for (std::size_t j = i; j < boost::size(a); ++j) { right.push_back(a[j]); } expected.clear(); - std::set_difference(A.begin(), A.end(), right.begin(), right.end(), std::back_inserter(expected)); + std::set_difference(A.begin(), A.end(), right.begin(), right.end(), + std::back_inserter(expected)); test = A; maths::CSetTools::inplace_set_difference(test, right.begin(), right.end()); - LOG_DEBUG(<< "A = " << core::CContainerPrinter::print(A) << ", B = " << core::CContainerPrinter::print(right) + LOG_DEBUG(<< "A = " << core::CContainerPrinter::print(A) + << ", B = " << core::CContainerPrinter::print(right) << ", A - B = " << core::CContainerPrinter::print(test)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expected), core::CContainerPrinter::print(test)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expected), + core::CContainerPrinter::print(test)); } } @@ -82,7 +88,8 @@ void CSetToolsTest::testInplaceSetDifference() { } TDoubleVec expected; - std::set_difference(A.begin(), A.end(), B.begin(), B.end(), std::back_inserter(expected)); + std::set_difference(A.begin(), A.end(), B.begin(), B.end(), + std::back_inserter(expected)); if ((t + 1) % 10 == 0) { LOG_DEBUG(<< "A = " << core::CContainerPrinter::print(A)); @@ -95,7 +102,8 @@ void CSetToolsTest::testInplaceSetDifference() { LOG_DEBUG(<< "A - B = " << core::CContainerPrinter::print(A)); } - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expected), core::CContainerPrinter::print(A)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expected), + core::CContainerPrinter::print(A)); } } @@ -116,9 +124,12 @@ void CSetToolsTest::testSetSizes() { left.push_back(a[j]); } TDoubleVec expected; - std::set_intersection(A.begin(), A.end(), left.begin(), left.end(), std::back_inserter(expected)); - std::size_t test = maths::CSetTools::setIntersectSize(A.begin(), A.end(), left.begin(), left.end()); - LOG_DEBUG(<< "A = " << core::CContainerPrinter::print(A) << ", B = " << core::CContainerPrinter::print(left) + std::set_intersection(A.begin(), A.end(), left.begin(), left.end(), + std::back_inserter(expected)); + std::size_t test = maths::CSetTools::setIntersectSize( + A.begin(), A.end(), left.begin(), left.end()); + LOG_DEBUG(<< "A = " << core::CContainerPrinter::print(A) + << ", B = " << core::CContainerPrinter::print(left) << ", |A ^ B| = " << test); CPPUNIT_ASSERT_EQUAL(expected.size(), test); @@ -127,16 +138,22 @@ void CSetToolsTest::testSetSizes() { right.push_back(a[j]); } expected.clear(); - std::set_intersection(A.begin(), A.end(), right.begin(), right.end(), std::back_inserter(expected)); - test = maths::CSetTools::setIntersectSize(A.begin(), A.end(), right.begin(), right.end()); - LOG_DEBUG(<< "A = " << core::CContainerPrinter::print(A) << ", B = " << core::CContainerPrinter::print(right) + std::set_intersection(A.begin(), A.end(), right.begin(), + right.end(), std::back_inserter(expected)); + test = maths::CSetTools::setIntersectSize(A.begin(), A.end(), + right.begin(), right.end()); + LOG_DEBUG(<< "A = " << core::CContainerPrinter::print(A) + << ", B = " << core::CContainerPrinter::print(right) << ", |A ^ B| = " << test); CPPUNIT_ASSERT_EQUAL(expected.size(), test); expected.clear(); - std::set_union(left.begin(), left.end(), right.begin(), right.end(), std::back_inserter(expected)); - test = maths::CSetTools::setUnionSize(left.begin(), left.end(), right.begin(), right.end()); - LOG_DEBUG(<< "A = " << core::CContainerPrinter::print(left) << ", B = " << core::CContainerPrinter::print(right) + std::set_union(left.begin(), left.end(), right.begin(), right.end(), + std::back_inserter(expected)); + test = maths::CSetTools::setUnionSize(left.begin(), left.end(), + right.begin(), right.end()); + LOG_DEBUG(<< "A = " << core::CContainerPrinter::print(left) + << ", B = " << core::CContainerPrinter::print(right) << ", |A U B| = " << test); CPPUNIT_ASSERT_EQUAL(expected.size(), test); } @@ -161,14 +178,16 @@ void CSetToolsTest::testSetSizes() { } TDoubleVec expected; - std::set_intersection(A.begin(), A.end(), B.begin(), B.end(), std::back_inserter(expected)); + std::set_intersection(A.begin(), A.end(), B.begin(), B.end(), + std::back_inserter(expected)); if ((t + 1) % 10 == 0) { LOG_DEBUG(<< "A = " << core::CContainerPrinter::print(A)); LOG_DEBUG(<< "B = " << core::CContainerPrinter::print(B)); } - std::size_t test = maths::CSetTools::setIntersectSize(A.begin(), A.end(), B.begin(), B.end()); + std::size_t test = maths::CSetTools::setIntersectSize(A.begin(), A.end(), + B.begin(), B.end()); if ((t + 1) % 10 == 0) { LOG_DEBUG(<< "|A ^ B| = " << test); @@ -225,12 +244,14 @@ void CSetToolsTest::testJaccard() { B.erase(std::unique(B.begin(), B.end()), B.end()); TSizeVec AIntersectB; - std::set_intersection(A.begin(), A.end(), B.begin(), B.end(), std::back_inserter(AIntersectB)); + std::set_intersection(A.begin(), A.end(), B.begin(), B.end(), + std::back_inserter(AIntersectB)); TSizeVec AUnionB; std::set_union(A.begin(), A.end(), B.begin(), B.end(), std::back_inserter(AUnionB)); - double expected = static_cast(AIntersectB.size()) / static_cast(AUnionB.size()); + double expected = static_cast(AIntersectB.size()) / + static_cast(AUnionB.size()); double actual = maths::CSetTools::jaccard(A.begin(), A.end(), B.begin(), B.end()); if ((t + 1) % 10 == 0) { @@ -277,11 +298,13 @@ void CSetToolsTest::testOverlap() { B.erase(std::unique(B.begin(), B.end()), B.end()); TSizeVec AIntersectB; - std::set_intersection(A.begin(), A.end(), B.begin(), B.end(), std::back_inserter(AIntersectB)); + std::set_intersection(A.begin(), A.end(), B.begin(), B.end(), + std::back_inserter(AIntersectB)); std::size_t min = std::min(A.size(), B.size()); - double expected = static_cast(AIntersectB.size()) / static_cast(min); + double expected = static_cast(AIntersectB.size()) / + static_cast(min); double actual = maths::CSetTools::overlap(A.begin(), A.end(), B.begin(), B.end()); if ((t + 1) % 10 == 0) { @@ -295,11 +318,14 @@ void CSetToolsTest::testOverlap() { CppUnit::Test* CSetToolsTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CSetToolsTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CSetToolsTest::testInplaceSetDifference", &CSetToolsTest::testInplaceSetDifference)); - suiteOfTests->addTest(new CppUnit::TestCaller("CSetToolsTest::testSetSizes", &CSetToolsTest::testSetSizes)); - suiteOfTests->addTest(new CppUnit::TestCaller("CSetToolsTest::testJaccard", &CSetToolsTest::testJaccard)); - suiteOfTests->addTest(new CppUnit::TestCaller("CSetToolsTest::testOverlap", &CSetToolsTest::testOverlap)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSetToolsTest::testInplaceSetDifference", &CSetToolsTest::testInplaceSetDifference)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSetToolsTest::testSetSizes", &CSetToolsTest::testSetSizes)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSetToolsTest::testJaccard", &CSetToolsTest::testJaccard)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSetToolsTest::testOverlap", &CSetToolsTest::testOverlap)); return suiteOfTests; } diff --git a/lib/maths/unittest/CSignalTest.cc b/lib/maths/unittest/CSignalTest.cc index 05160f3969..d0c16fef3c 100644 --- a/lib/maths/unittest/CSignalTest.cc +++ b/lib/maths/unittest/CSignalTest.cc @@ -34,7 +34,8 @@ void bruteForceDft(maths::CSignal::TComplexVec& f, double sign) { maths::CSignal::TComplexVec result(f.size(), maths::CSignal::TComplex(0.0, 0.0)); for (std::size_t k = 0u; k < f.size(); ++k) { for (std::size_t n = 0u; n < f.size(); ++n) { - double t = -sign * boost::math::double_constants::two_pi * static_cast(k * n) / static_cast(f.size()); + double t = -sign * boost::math::double_constants::two_pi * + static_cast(k * n) / static_cast(f.size()); result[k] += maths::CSignal::TComplex(std::cos(t), std::sin(t)) * f[n]; } if (sign < 0.0) { @@ -52,10 +53,13 @@ void CSignalTest::testFFTVersusOctave() { // Test versus values calculated using octave fft. - double x[][20] = {{2555.33, 1451.79, 465.60, 4394.83, -1553.24, -2772.07, -3977.73, 2249.31, -2006.04, 3540.84, - 4271.63, 4648.81, -727.90, 2285.24, 3129.56, -3596.79, -1968.66, 3795.18, 1627.84, 228.40}, - {4473.77, -4815.63, -818.38, -1953.72, -2323.39, -3007.25, 4444.24, 435.21, 3613.32, 3471.37, - -1735.72, 2560.82, -2383.29, -2370.23, -4921.04, -541.25, 1516.69, -2028.42, 3981.02, 3156.88}}; + double x[][20] = { + {2555.33, 1451.79, 465.60, 4394.83, -1553.24, -2772.07, -3977.73, + 2249.31, -2006.04, 3540.84, 4271.63, 4648.81, -727.90, 2285.24, + 3129.56, -3596.79, -1968.66, 3795.18, 1627.84, 228.40}, + {4473.77, -4815.63, -818.38, -1953.72, -2323.39, -3007.25, 4444.24, + 435.21, 3613.32, 3471.37, -1735.72, 2560.82, -2383.29, -2370.23, + -4921.04, -541.25, 1516.69, -2028.42, 3981.02, 3156.88}}; maths::CSignal::TComplexVec fx; for (std::size_t i = 0u; i < 20; ++i) { @@ -108,7 +112,9 @@ void CSignalTest::testFFTVersusOctave() { double error = 0.0; for (std::size_t j = 0u; j < l; ++j) { - error += std::abs(actual[j] - maths::CSignal::TComplex(expected[i + j][0], expected[i + j][1])); + error += std::abs(actual[j] - + maths::CSignal::TComplex(expected[i + j][0], + expected[i + j][1])); } error /= static_cast(l); LOG_DEBUG(<< "error = " << error); @@ -118,16 +124,21 @@ void CSignalTest::testFFTVersusOctave() { LOG_DEBUG(<< "*** Arbitrary Length ***"); { - double expected[][2] = {{18042.0, 755.0}, {961.0, 5635.6}, {-5261.8, 7542.2}, {-12814.0, 2250.2}, {-8248.5, 6620.5}, - {-21626.0, 3570.6}, {6551.5, -12732.0}, {6009.5, 10622.0}, {9954.0, -1224.2}, {-2871.5, 7073.6}, - {-14409.0, 10939.0}, {13682.0, 25304.0}, {-10468.0, -6338.5}, {6506.0, 6283.3}, {32665.0, 5127.7}, - {3190.7, 4323.4}, {-6988.7, -3865.0}, {-3881.4, 4360.8}, {46434.0, 20556.0}, {-6319.6, -7329.0}}; + double expected[][2] = { + {18042.0, 755.0}, {961.0, 5635.6}, {-5261.8, 7542.2}, + {-12814.0, 2250.2}, {-8248.5, 6620.5}, {-21626.0, 3570.6}, + {6551.5, -12732.0}, {6009.5, 10622.0}, {9954.0, -1224.2}, + {-2871.5, 7073.6}, {-14409.0, 10939.0}, {13682.0, 25304.0}, + {-10468.0, -6338.5}, {6506.0, 6283.3}, {32665.0, 5127.7}, + {3190.7, 4323.4}, {-6988.7, -3865.0}, {-3881.4, 4360.8}, + {46434.0, 20556.0}, {-6319.6, -7329.0}}; maths::CSignal::TComplexVec actual(fx.begin(), fx.end()); maths::CSignal::fft(actual); double error = 0.0; for (std::size_t j = 0u; j < actual.size(); ++j) { - error += std::abs(actual[j] - maths::CSignal::TComplex(expected[j][0], expected[j][1])); + error += std::abs(actual[j] - maths::CSignal::TComplex(expected[j][0], + expected[j][1])); } error /= static_cast(actual.size()); LOG_DEBUG(<< "error = " << error); @@ -142,10 +153,13 @@ void CSignalTest::testIFFTVersusOctave() { // Test versus values calculated using octave ifft. - double x[][20] = {{2555.33, 1451.79, 465.60, 4394.83, -1553.24, -2772.07, -3977.73, 2249.31, -2006.04, 3540.84, - 4271.63, 4648.81, -727.90, 2285.24, 3129.56, -3596.79, -1968.66, 3795.18, 1627.84, 228.40}, - {4473.77, -4815.63, -818.38, -1953.72, -2323.39, -3007.25, 4444.24, 435.21, 3613.32, 3471.37, - -1735.72, 2560.82, -2383.29, -2370.23, -4921.04, -541.25, 1516.69, -2028.42, 3981.02, 3156.88}}; + double x[][20] = { + {2555.33, 1451.79, 465.60, 4394.83, -1553.24, -2772.07, -3977.73, + 2249.31, -2006.04, 3540.84, 4271.63, 4648.81, -727.90, 2285.24, + 3129.56, -3596.79, -1968.66, 3795.18, 1627.84, 228.40}, + {4473.77, -4815.63, -818.38, -1953.72, -2323.39, -3007.25, 4444.24, + 435.21, 3613.32, 3471.37, -1735.72, 2560.82, -2383.29, -2370.23, + -4921.04, -541.25, 1516.69, -2028.42, 3981.02, 3156.88}}; maths::CSignal::TComplexVec fx; for (std::size_t i = 0u; i < 20; ++i) { @@ -198,7 +212,9 @@ void CSignalTest::testIFFTVersusOctave() { double error = 0.0; for (std::size_t j = 0u; j < l; ++j) { - error += std::abs(actual[j] - maths::CSignal::TComplex(expected[i + j][0], expected[i + j][1])); + error += std::abs(actual[j] - + maths::CSignal::TComplex(expected[i + j][0], + expected[i + j][1])); } error /= static_cast(l); LOG_DEBUG(<< "error = " << error); @@ -222,10 +238,13 @@ void CSignalTest::testFFTRandomized() { TSizeVec lengths; rng.generateUniformSamples(2, 100, 1000, lengths); - for (std::size_t i = 0u, j = 0u; i < lengths.size() && j + 2 * lengths[i] < components.size(); ++i, j += 2 * lengths[i]) { + for (std::size_t i = 0u, j = 0u; + i < lengths.size() && j + 2 * lengths[i] < components.size(); + ++i, j += 2 * lengths[i]) { maths::CSignal::TComplexVec expected; for (std::size_t k = 0u; k < lengths[i]; ++k) { - expected.push_back(maths::CSignal::TComplex(components[j + 2 * k], components[j + 2 * k + 1])); + expected.push_back(maths::CSignal::TComplex(components[j + 2 * k], + components[j + 2 * k + 1])); } maths::CSignal::TComplexVec actual(expected); @@ -259,10 +278,13 @@ void CSignalTest::testIFFTRandomized() { TSizeVec lengths; rng.generateUniformSamples(2, 100, 1000, lengths); - for (std::size_t i = 0u, j = 0u; i < lengths.size() && j + 2 * lengths[i] < components.size(); ++i, j += 2 * lengths[i]) { + for (std::size_t i = 0u, j = 0u; + i < lengths.size() && j + 2 * lengths[i] < components.size(); + ++i, j += 2 * lengths[i]) { maths::CSignal::TComplexVec expected; for (std::size_t k = 0u; k < lengths[i]; ++k) { - expected.push_back(maths::CSignal::TComplex(components[j + 2 * k], components[j + 2 * k + 1])); + expected.push_back(maths::CSignal::TComplex(components[j + 2 * k], + components[j + 2 * k + 1])); } maths::CSignal::TComplexVec actual(expected); @@ -296,10 +318,13 @@ void CSignalTest::testFFTIFFTIdempotency() { TSizeVec lengths; rng.generateUniformSamples(2, 100, 1000, lengths); - for (std::size_t i = 0u, j = 0u; i < lengths.size() && j + 2 * lengths[i] < components.size(); ++i, j += 2 * lengths[i]) { + for (std::size_t i = 0u, j = 0u; + i < lengths.size() && j + 2 * lengths[i] < components.size(); + ++i, j += 2 * lengths[i]) { maths::CSignal::TComplexVec expected; for (std::size_t k = 0u; k < lengths[i]; ++k) { - expected.push_back(maths::CSignal::TComplex(components[j + 2 * k], components[j + 2 * k + 1])); + expected.push_back(maths::CSignal::TComplex(components[j + 2 * k], + components[j + 2 * k + 1])); } maths::CSignal::TComplexVec actual(expected); @@ -349,20 +374,26 @@ void CSignalTest::testAutocorrelations() { LOG_DEBUG(<< "expected = " << core::CContainerPrinter::print(expected)); LOG_DEBUG(<< "actual = " << core::CContainerPrinter::print(actual)); } - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expected), core::CContainerPrinter::print(actual)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expected), + core::CContainerPrinter::print(actual)); } } CppUnit::Test* CSignalTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CSignalTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CSignalTest::testFFTVersusOctave", &CSignalTest::testFFTVersusOctave)); - suiteOfTests->addTest(new CppUnit::TestCaller("CSignalTest::testIFFTVersusOctave", &CSignalTest::testIFFTVersusOctave)); - suiteOfTests->addTest(new CppUnit::TestCaller("CSignalTest::testFFTRandomized", &CSignalTest::testFFTRandomized)); - suiteOfTests->addTest(new CppUnit::TestCaller("CSignalTest::testIFFTRandomized", &CSignalTest::testIFFTRandomized)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CSignalTest::testFFTIFFTIdempotency", &CSignalTest::testFFTIFFTIdempotency)); - suiteOfTests->addTest(new CppUnit::TestCaller("CSignalTest::testAutocorrelations", &CSignalTest::testAutocorrelations)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSignalTest::testFFTVersusOctave", &CSignalTest::testFFTVersusOctave)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSignalTest::testIFFTVersusOctave", &CSignalTest::testIFFTVersusOctave)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSignalTest::testFFTRandomized", &CSignalTest::testFFTRandomized)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSignalTest::testIFFTRandomized", &CSignalTest::testIFFTRandomized)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSignalTest::testFFTIFFTIdempotency", &CSignalTest::testFFTIFFTIdempotency)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSignalTest::testAutocorrelations", &CSignalTest::testAutocorrelations)); return suiteOfTests; } diff --git a/lib/maths/unittest/CSolversTest.cc b/lib/maths/unittest/CSolversTest.cc index 16c7611281..110cf8be48 100644 --- a/lib/maths/unittest/CSolversTest.cc +++ b/lib/maths/unittest/CSolversTest.cc @@ -41,17 +41,20 @@ double f3(const double& x) { //! Root at x = 2/3. double f4(const double& x) { - return x <= 2.0 / 3.0 ? std::pow(std::fabs(x - 2.0 / 3.0), 0.2) : -std::pow(std::fabs(x - 2.0 / 3.0), 0.2); + return x <= 2.0 / 3.0 ? std::pow(std::fabs(x - 2.0 / 3.0), 0.2) + : -std::pow(std::fabs(x - 2.0 / 3.0), 0.2); } //! This has local maxima at 4 and 10. double f5(const double& x) { - return 1.1 * std::exp(-(x - 4.0) * (x - 4.0)) + 0.4 * std::exp(-(x - 10.0) * (x - 10.0) / 4.0); + return 1.1 * std::exp(-(x - 4.0) * (x - 4.0)) + + 0.4 * std::exp(-(x - 10.0) * (x - 10.0) / 4.0); } //! This has local maxima at 4, 6 and 10. double f6(const double& x) { - return 1.1 * std::exp(-2.0 * (x - 4.0) * (x - 4.0)) + 0.1 * std::exp(-(x - 6.0) * (x - 6.0)) + + return 1.1 * std::exp(-2.0 * (x - 4.0) * (x - 4.0)) + + 0.1 * std::exp(-(x - 6.0) * (x - 6.0)) + 0.4 * std::exp(-(x - 10.0) * (x - 10.0) / 2.0); } @@ -62,7 +65,8 @@ class CLog { public: double operator()(const double& x) const { if (x <= 0.0) { - throw std::range_error("Bad value to log " + core::CStringUtils::typeToString(x)); + throw std::range_error("Bad value to log " + + core::CStringUtils::typeToString(x)); } return std::log(x); } @@ -80,7 +84,8 @@ void CSolversTest::testBracket() { double a = 0.5, b = 0.5; double fa = f(a), fb = f(b); CPPUNIT_ASSERT(CSolvers::rightBracket(a, b, fa, fb, f, maxIterations)); - LOG_DEBUG(<< "a = " << a << ", b = " << b << ", f(a) = " << fa << ", f(b) = " << fb << ", maxIterations = " << maxIterations); + LOG_DEBUG(<< "a = " << a << ", b = " << b << ", f(a) = " << fa + << ", f(b) = " << fb << ", maxIterations = " << maxIterations); CPPUNIT_ASSERT_EQUAL(f(a), fa); CPPUNIT_ASSERT_EQUAL(f(b), fb); CPPUNIT_ASSERT(fa * fb <= 0.0); @@ -92,7 +97,8 @@ void CSolversTest::testBracket() { double a = 0.5, b = 0.6; double fa = f(a), fb = f(b); CPPUNIT_ASSERT(CSolvers::rightBracket(a, b, fa, fb, f, maxIterations)); - LOG_DEBUG(<< "a = " << a << ", b = " << b << ", f(a) = " << fa << ", f(b) = " << fb << ", maxIterations = " << maxIterations); + LOG_DEBUG(<< "a = " << a << ", b = " << b << ", f(a) = " << fa + << ", f(b) = " << fb << ", maxIterations = " << maxIterations); CPPUNIT_ASSERT_EQUAL(f(a), fa); CPPUNIT_ASSERT_EQUAL(f(b), fb); CPPUNIT_ASSERT(fa * fb <= 0.0); @@ -104,7 +110,8 @@ void CSolversTest::testBracket() { double a = 0.5, b = 5.0; double fa = f(a), fb = f(b); CPPUNIT_ASSERT(CSolvers::rightBracket(a, b, fa, fb, f, maxIterations)); - LOG_DEBUG(<< "a = " << a << ", b = " << b << ", f(a) = " << fa << ", f(b) = " << fb << ", maxIterations = " << maxIterations); + LOG_DEBUG(<< "a = " << a << ", b = " << b << ", f(a) = " << fa + << ", f(b) = " << fb << ", maxIterations = " << maxIterations); CPPUNIT_ASSERT_EQUAL(f(a), fa); CPPUNIT_ASSERT_EQUAL(f(b), fb); CPPUNIT_ASSERT(fa * fb <= 0.0); @@ -115,8 +122,10 @@ void CSolversTest::testBracket() { std::size_t maxIterations = 10u; double a = 100.0, b = 100.0; double fa = f(a), fb = f(b); - CPPUNIT_ASSERT(CSolvers::leftBracket(a, b, fa, fb, f, maxIterations, std::numeric_limits::min())); - LOG_DEBUG(<< "a = " << a << ", b = " << b << ", f(a) = " << fa << ", f(b) = " << fb << ", maxIterations = " << maxIterations); + CPPUNIT_ASSERT(CSolvers::leftBracket(a, b, fa, fb, f, maxIterations, + std::numeric_limits::min())); + LOG_DEBUG(<< "a = " << a << ", b = " << b << ", f(a) = " << fa + << ", f(b) = " << fb << ", maxIterations = " << maxIterations); CPPUNIT_ASSERT_EQUAL(f(a), fa); CPPUNIT_ASSERT_EQUAL(f(b), fb); CPPUNIT_ASSERT(fa * fb <= 0.0); @@ -162,7 +171,8 @@ void CSolversTest::testBisection() { iterations = 10; CEqualWithTolerance equal(CToleranceTypes::E_AbsoluteTolerance, 0.1); CPPUNIT_ASSERT(CSolvers::bisection(a, b, -5.0, 5.0, &f1, iterations, equal, bestGuess)); - LOG_DEBUG(<< "a = " << a << ", b = " << b << ", f(a) = " << f1(a) << ", f(b) = " << f1(b) << ", iterations = " << iterations + LOG_DEBUG(<< "a = " << a << ", b = " << b << ", f(a) = " << f1(a) + << ", f(b) = " << f1(b) << ", iterations = " << iterations << ", bestGuess = " << bestGuess); CPPUNIT_ASSERT_EQUAL(5.0, bestGuess); } @@ -182,11 +192,13 @@ void CSolversTest::testBisection() { LOG_DEBUG(<< "iterations = " << iterations); CPPUNIT_ASSERT_EQUAL(i, iterations); - LOG_DEBUG(<< "a = " << a << ", b = " << b << ", f(a) = " << f3(a) << ", f(b) = " << f3(b)); + LOG_DEBUG(<< "a = " << a << ", b = " << b << ", f(a) = " << f3(a) + << ", f(b) = " << f3(b)); CPPUNIT_ASSERT(f3(a) * f3(b) <= 0.0); double error = std::fabs(bestGuess - 0.7390851332151607); - LOG_DEBUG(<< "bestGuess = " << bestGuess << ", f(bestGuess) = " << f3(bestGuess) << ", error = " << error); + LOG_DEBUG(<< "bestGuess = " << bestGuess + << ", f(bestGuess) = " << f3(bestGuess) << ", error = " << error); CPPUNIT_ASSERT(error < std::fabs((a + b) / 2.0 - 0.7390851332151607)); double convergenceFactor = error / lastError; lastError = error; @@ -224,13 +236,15 @@ void CSolversTest::testBisection() { LOG_DEBUG(<< "iterations = " << iterations); CPPUNIT_ASSERT_EQUAL(i, iterations); - LOG_DEBUG(<< "a = " << a << ", b = " << b << ", f(a) = " << f4(a) << ", f(b) = " << f4(b)); + LOG_DEBUG(<< "a = " << a << ", b = " << b << ", f(a) = " << f4(a) + << ", f(b) = " << f4(b)); CPPUNIT_ASSERT(f4(a) * f4(b) <= 0.0); CPPUNIT_ASSERT_DOUBLES_EQUAL(0.5 * lastInterval, b - a, 1e-5); lastInterval = b - a; double error = std::fabs(bestGuess - 2.0 / 3.0); - LOG_DEBUG(<< "bestGuess = " << bestGuess << ", f(bestGuess) = " << f4(bestGuess) << ", error = " << error); + LOG_DEBUG(<< "bestGuess = " << bestGuess + << ", f(bestGuess) = " << f4(bestGuess) << ", error = " << error); CPPUNIT_ASSERT(error < std::fabs((a + b) / 2.0 - 2.0 / 3.0)); convergenceFactor *= (error / lastError); lastError = error; @@ -311,11 +325,13 @@ void CSolversTest::testBrent() { LOG_DEBUG(<< "iterations = " << iterations); CPPUNIT_ASSERT_EQUAL(i, iterations); - LOG_DEBUG(<< "a = " << a << ", b = " << b << ", f(a) = " << f3(a) << ", f(b) = " << f3(b)); + LOG_DEBUG(<< "a = " << a << ", b = " << b << ", f(a) = " << f3(a) + << ", f(b) = " << f3(b)); CPPUNIT_ASSERT(f3(a) * f3(b) <= 0.0); double error = std::fabs(bestGuess - 0.7390851332151607); - LOG_DEBUG(<< "bestGuess = " << bestGuess << ", f(bestGuess) = " << f3(bestGuess) << ", error = " << error); + LOG_DEBUG(<< "bestGuess = " << bestGuess + << ", f(bestGuess) = " << f3(bestGuess) << ", error = " << error); CPPUNIT_ASSERT(error < std::fabs((a + b) / 2.0 - 0.7390851332151607)); double convergenceFactor = error / lastError; lastError = error; @@ -346,11 +362,13 @@ void CSolversTest::testBrent() { LOG_DEBUG(<< "iterations = " << iterations); CPPUNIT_ASSERT_EQUAL(i, iterations); - LOG_DEBUG(<< "a = " << a << ", b = " << b << ", f(a) = " << f4(a) << ", f(b) = " << f4(b)); + LOG_DEBUG(<< "a = " << a << ", b = " << b << ", f(a) = " << f4(a) + << ", f(b) = " << f4(b)); CPPUNIT_ASSERT(f4(a) * f4(b) <= 0.0); double error = std::fabs(bestGuess - 2.0 / 3.0); - LOG_DEBUG(<< "bestGuess = " << bestGuess << ", f(bestGuess) = " << f4(bestGuess) << ", error = " << error); + LOG_DEBUG(<< "bestGuess = " << bestGuess + << ", f(bestGuess) = " << f4(bestGuess) << ", error = " << error); CPPUNIT_ASSERT(error < std::fabs((a + b) / 2.0 - 2.0 / 3.0)); double convergenceFactor = error / lastError; lastError = error; @@ -384,7 +402,8 @@ void CSolversTest::testSublevelSet() { CPPUNIT_ASSERT(sublevelSet.second - sublevelSet.first < 1e-4); } LOG_DEBUG(<< "sublevelSet = " << core::CContainerPrinter::print(sublevelSet)); - LOG_DEBUG(<< "f(a) = " << f5(sublevelSet.first) << ", f(b) = " << f5(sublevelSet.second)); + LOG_DEBUG(<< "f(a) = " << f5(sublevelSet.first) + << ", f(b) = " << f5(sublevelSet.second)); } LOG_DEBUG(<< "*** f(x) = 1.1 * exp(-2.0*(x-4)^2) + 0.1 * exp(-(x-6)^2) + 0.4 * exp(-(x-10)^2/2) ***"); @@ -393,10 +412,12 @@ void CSolversTest::testSublevelSet() { for (std::size_t i = 0u; i < 15u; ++i, fmax *= 0.9) { LOG_DEBUG(<< "fmax = " << fmax); - bool found = CSolvers::sublevelSet(4.0, 10.0, f6(4.0), f6(10.0), &f6, fmax, 15, sublevelSet); + bool found = CSolvers::sublevelSet(4.0, 10.0, f6(4.0), f6(10.0), &f6, + fmax, 15, sublevelSet); LOG_DEBUG(<< "sublevelSet = " << core::CContainerPrinter::print(sublevelSet)); - LOG_DEBUG(<< "f(a) = " << f6(sublevelSet.first) << ", f(b) = " << f6(sublevelSet.second)); + LOG_DEBUG(<< "f(a) = " << f6(sublevelSet.first) + << ", f(b) = " << f6(sublevelSet.second)); if (found) { CPPUNIT_ASSERT_DOUBLES_EQUAL(fmax, f6(sublevelSet.first), 1e-4); @@ -410,10 +431,14 @@ void CSolversTest::testSublevelSet() { CppUnit::Test* CSolversTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CSolversTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CSolversTest::testBracket", &CSolversTest::testBracket)); - suiteOfTests->addTest(new CppUnit::TestCaller("CSolversTest::testBisection", &CSolversTest::testBisection)); - suiteOfTests->addTest(new CppUnit::TestCaller("CSolversTest::testBrent", &CSolversTest::testBrent)); - suiteOfTests->addTest(new CppUnit::TestCaller("CSolversTest::testSublevelSet", &CSolversTest::testSublevelSet)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSolversTest::testBracket", &CSolversTest::testBracket)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSolversTest::testBisection", &CSolversTest::testBisection)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSolversTest::testBrent", &CSolversTest::testBrent)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSolversTest::testSublevelSet", &CSolversTest::testSublevelSet)); return suiteOfTests; } diff --git a/lib/maths/unittest/CSplineTest.cc b/lib/maths/unittest/CSplineTest.cc index a9bfc36f2d..ceed2e0e49 100644 --- a/lib/maths/unittest/CSplineTest.cc +++ b/lib/maths/unittest/CSplineTest.cc @@ -83,7 +83,8 @@ void CSplineTest::testNatural() { const TDoubleVec& curvatures = spline.curvatures(); std::size_t n = curvatures.size(); - LOG_DEBUG(<< "curvatures[0] = " << curvatures[0] << ", curvatures[n] = " << curvatures[n - 1]); + LOG_DEBUG(<< "curvatures[0] = " << curvatures[0] + << ", curvatures[n] = " << curvatures[n - 1]); CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, curvatures[0], 1e-10); CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, curvatures[n - 1], 1e-10); } @@ -103,15 +104,18 @@ void CSplineTest::testNatural() { spline.interpolate(x, y, maths::CSplineTypes::E_Natural); for (std::size_t i = 0u; i < 21; ++i) { - double xx = boost::math::double_constants::two_pi * static_cast(i) / 20.0; + double xx = boost::math::double_constants::two_pi * + static_cast(i) / 20.0; double yy = spline.value(xx); - LOG_DEBUG(<< "spline(" << xx << ") = " << yy << ", f(" << xx << ") = " << std::sin(xx)); + LOG_DEBUG(<< "spline(" << xx << ") = " << yy << ", f(" << xx + << ") = " << std::sin(xx)); CPPUNIT_ASSERT(std::fabs(std::sin(xx) - yy) < 0.02); } const TDoubleVec& curvatures = spline.curvatures(); std::size_t n = curvatures.size(); - LOG_DEBUG(<< "curvatures[0] = " << curvatures[0] << ", curvatures[n] = " << curvatures[n - 1]); + LOG_DEBUG(<< "curvatures[0] = " << curvatures[0] + << ", curvatures[n] = " << curvatures[n - 1]); CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, curvatures[0], 1e-10); CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, curvatures[n - 1], 1e-10); } @@ -148,9 +152,11 @@ void CSplineTest::testParabolicRunout() { const TDoubleVec& curvatures = spline.curvatures(); std::size_t n = curvatures.size(); - LOG_DEBUG(<< "curvatures[0] = " << curvatures[0] << ", curvatures[1] = " << curvatures[1]); + LOG_DEBUG(<< "curvatures[0] = " << curvatures[0] + << ", curvatures[1] = " << curvatures[1]); CPPUNIT_ASSERT_DOUBLES_EQUAL(curvatures[0], curvatures[1], 1e-10); - LOG_DEBUG(<< "curvatures[n-1] = " << curvatures[n - 2] << ", curvatures[n] = " << curvatures[n - 1]); + LOG_DEBUG(<< "curvatures[n-1] = " << curvatures[n - 2] + << ", curvatures[n] = " << curvatures[n - 1]); CPPUNIT_ASSERT_DOUBLES_EQUAL(curvatures[n - 2], curvatures[n - 1], 1e-10); } @@ -169,17 +175,21 @@ void CSplineTest::testParabolicRunout() { spline.interpolate(x, y, maths::CSplineTypes::E_ParabolicRunout); for (std::size_t i = 0u; i < 21; ++i) { - double xx = boost::math::double_constants::two_pi * static_cast(i) / 20.0; + double xx = boost::math::double_constants::two_pi * + static_cast(i) / 20.0; double yy = spline.value(xx); - LOG_DEBUG(<< "spline(" << xx << ") = " << yy << ", f(" << xx << ") = " << std::sin(xx)); + LOG_DEBUG(<< "spline(" << xx << ") = " << yy << ", f(" << xx + << ") = " << std::sin(xx)); CPPUNIT_ASSERT(std::fabs(std::sin(xx) - yy) < 0.04); } const TDoubleVec& curvatures = spline.curvatures(); std::size_t n = curvatures.size(); - LOG_DEBUG(<< "curvatures[0] = " << curvatures[0] << ", curvatures[1] = " << curvatures[1]); + LOG_DEBUG(<< "curvatures[0] = " << curvatures[0] + << ", curvatures[1] = " << curvatures[1]); CPPUNIT_ASSERT_DOUBLES_EQUAL(curvatures[0], curvatures[1], 1e-10); - LOG_DEBUG(<< "curvatures[n-1] = " << curvatures[n - 2] << ", curvatures[n] = " << curvatures[n - 1]); + LOG_DEBUG(<< "curvatures[n-1] = " << curvatures[n - 2] + << ", curvatures[n] = " << curvatures[n - 1]); CPPUNIT_ASSERT_DOUBLES_EQUAL(curvatures[n - 2], curvatures[n - 1], 1e-10); } } @@ -204,9 +214,11 @@ void CSplineTest::testPeriodic() { spline.interpolate(x, y, maths::CSplineTypes::E_Periodic); for (std::size_t i = 0u; i < 21; ++i) { - double xx = boost::math::double_constants::two_pi * static_cast(i) / 20.0; + double xx = boost::math::double_constants::two_pi * + static_cast(i) / 20.0; double yy = spline.value(xx); - LOG_DEBUG(<< "spline(" << xx << ") = " << yy << ", f(" << xx << ") = " << std::cos(xx)); + LOG_DEBUG(<< "spline(" << xx << ") = " << yy << ", f(" << xx + << ") = " << std::cos(xx)); CPPUNIT_ASSERT(std::fabs(std::cos(xx) - yy) < 0.02); } } @@ -216,7 +228,9 @@ void CSplineTest::testPeriodic() { for (std::size_t i = 0u; i < 40; ++i) { x.push_back(static_cast(i) * 5.0); } - double y_[] = {10.0, 7.0, 5.0, 3.0, 1.5, 3.5, 7.5, 15.5, 15.6, 15.5, 15.0, 14.0, 13.0, 12.0, 10.0, 8.0, 4.0, 4.1, 10.0, 10.0}; + double y_[] = {10.0, 7.0, 5.0, 3.0, 1.5, 3.5, 7.5, + 15.5, 15.6, 15.5, 15.0, 14.0, 13.0, 12.0, + 10.0, 8.0, 4.0, 4.1, 10.0, 10.0}; TDoubleVec y(boost::begin(y_), boost::end(y_)); y.insert(y.end(), boost::begin(y_), boost::end(y_)); @@ -246,7 +260,8 @@ void CSplineTest::testMean() { // (numerical) integral and the expected mean of the cosine // over a whole number of periods. - maths::CSplineTypes::EType types[] = {maths::CSplineTypes::E_Linear, maths::CSplineTypes::E_Cubic}; + maths::CSplineTypes::EType types[] = {maths::CSplineTypes::E_Linear, + maths::CSplineTypes::E_Cubic}; { double x_[] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0}; @@ -267,13 +282,16 @@ void CSplineTest::testMean() { double a = x[i - 1]; double b = x[i]; double integral; - maths::CIntegration::gaussLegendre(f, a, b, integral); + maths::CIntegration::gaussLegendre( + f, a, b, integral); expectedMean += integral; } expectedMean /= (x[n] - x[0]); - LOG_DEBUG(<< "expectedMean = " << expectedMean << ", mean = " << spline.mean()); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMean, spline.mean(), std::numeric_limits::epsilon() * expectedMean); + LOG_DEBUG(<< "expectedMean = " << expectedMean + << ", mean = " << spline.mean()); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMean, spline.mean(), + std::numeric_limits::epsilon() * expectedMean); } } @@ -303,12 +321,14 @@ void CSplineTest::testMean() { double a = x[j - 1]; double b = x[j]; double integral; - maths::CIntegration::gaussLegendre(f, a, b, integral); + maths::CIntegration::gaussLegendre( + f, a, b, integral); expectedMean += integral; } expectedMean /= (x[n[0] - 1] - x[0]); - LOG_DEBUG(<< "expectedMean = " << expectedMean << ", mean = " << spline.mean()); + LOG_DEBUG(<< "expectedMean = " << expectedMean + << ", mean = " << spline.mean()); CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMean, spline.mean(), 1e-4); } @@ -320,7 +340,8 @@ void CSplineTest::testMean() { TDoubleVec y; for (std::size_t i = 0u; i < 21; ++i) { x.push_back(static_cast(20 * i)); - y.push_back(std::cos(boost::math::double_constants::two_pi * static_cast(i) / 10.0)); + y.push_back(std::cos(boost::math::double_constants::two_pi * + static_cast(i) / 10.0)); } for (std::size_t t = 0u; t < boost::size(types); ++t) { @@ -342,12 +363,14 @@ void CSplineTest::testIllposed() { // Test a case where some of the knot points are colocated. - double x_[] = {0.0, 0.0, 10.0, 10.0, 15.0, 15.5, 20.0, 20.0, 20.0, 28.0, 30.0, 30.0}; + double x_[] = {0.0, 0.0, 10.0, 10.0, 15.0, 15.5, + 20.0, 20.0, 20.0, 28.0, 30.0, 30.0}; TDoubleVec x(boost::begin(x_), boost::end(x_)); double y_[] = {0.0, 0.0, 1.9, 2.1, 3.0, 3.1, 4.0, 4.0, 4.0, 5.6, 5.9, 6.1}; TDoubleVec y(boost::begin(y_), boost::end(y_)); - maths::CSplineTypes::EType types[] = {maths::CSplineTypes::E_Linear, maths::CSplineTypes::E_Cubic}; + maths::CSplineTypes::EType types[] = {maths::CSplineTypes::E_Linear, + maths::CSplineTypes::E_Cubic}; for (std::size_t t = 0u; t < boost::size(types); ++t) { LOG_DEBUG(<< "*** Interpolation '" << print(types[t]) << "' ***"); @@ -365,8 +388,10 @@ void CSplineTest::testIllposed() { } for (std::size_t i = 0u; i <= 30; ++i) { - LOG_DEBUG(<< "expected = " << 0.2 * static_cast(i) << ", actual = " << spline.value(static_cast(i))); - CPPUNIT_ASSERT_DOUBLES_EQUAL(0.2 * static_cast(i), spline.value(static_cast(i)), 5e-7); + LOG_DEBUG(<< "expected = " << 0.2 * static_cast(i) + << ", actual = " << spline.value(static_cast(i))); + CPPUNIT_ASSERT_DOUBLES_EQUAL(0.2 * static_cast(i), + spline.value(static_cast(i)), 5e-7); } } } @@ -379,7 +404,8 @@ void CSplineTest::testSlope() { // Test that the slope and absolute slope agree with the // numerical derivatives of the value. - maths::CSplineTypes::EType types[] = {maths::CSplineTypes::E_Linear, maths::CSplineTypes::E_Cubic}; + maths::CSplineTypes::EType types[] = {maths::CSplineTypes::E_Linear, + maths::CSplineTypes::E_Cubic}; double eps = 1e-4; { @@ -402,8 +428,10 @@ void CSplineTest::testSlope() { double xiPlusEps = xi + eps; double xiMinusEps = xi - eps; double slope = spline.slope(xi); - double numericalSlope = (spline.value(xiPlusEps) - spline.value(xiMinusEps)) / (2 * eps); - LOG_DEBUG(<< "x = " << xi << ", slope = " << slope << ", numerical slope = " << numericalSlope); + double numericalSlope = + (spline.value(xiPlusEps) - spline.value(xiMinusEps)) / (2 * eps); + LOG_DEBUG(<< "x = " << xi << ", slope = " << slope + << ", numerical slope = " << numericalSlope); CPPUNIT_ASSERT_DOUBLES_EQUAL(numericalSlope, slope, 6.0 * eps * eps); } } @@ -437,11 +465,14 @@ void CSplineTest::testSlope() { double xiPlusEps = xj + eps; double xiMinusEps = xj - eps; double slope = spline.slope(xj); - double numericalSlope = (spline.value(xiPlusEps) - spline.value(xiMinusEps)) / (2 * eps); + double numericalSlope = + (spline.value(xiPlusEps) - spline.value(xiMinusEps)) / (2 * eps); if (i % 10 == 0) { - LOG_DEBUG(<< "x = " << xj << ", slope = " << slope << ", numerical slope = " << numericalSlope); + LOG_DEBUG(<< "x = " << xj << ", slope = " << slope + << ", numerical slope = " << numericalSlope); } - CPPUNIT_ASSERT_DOUBLES_EQUAL(numericalSlope, slope, 1e-3 * std::fabs(numericalSlope)); + CPPUNIT_ASSERT_DOUBLES_EQUAL(numericalSlope, slope, + 1e-3 * std::fabs(numericalSlope)); } } } @@ -452,7 +483,8 @@ void CSplineTest::testSlope() { TDoubleVec y; for (std::size_t i = 0u; i < 21; ++i) { x.push_back(static_cast(20 * i)); - y.push_back(std::cos(boost::math::double_constants::two_pi * static_cast(i) / 10.0)); + y.push_back(std::cos(boost::math::double_constants::two_pi * + static_cast(i) / 10.0)); } double range = x[x.size() - 1] - x[0]; @@ -467,8 +499,10 @@ void CSplineTest::testSlope() { double xiPlusEps = xi + eps; double xiMinusEps = xi - eps; double slope = spline.slope(xi); - double numericalSlope = (spline.value(xiPlusEps) - spline.value(xiMinusEps)) / (2 * eps); - LOG_DEBUG(<< "x = " << xi << ", slope = " << slope << ", numerical slope = " << numericalSlope); + double numericalSlope = + (spline.value(xiPlusEps) - spline.value(xiMinusEps)) / (2 * eps); + LOG_DEBUG(<< "x = " << xi << ", slope = " << slope + << ", numerical slope = " << numericalSlope); CPPUNIT_ASSERT_DOUBLES_EQUAL(numericalSlope, slope, eps * eps); } } @@ -501,27 +535,35 @@ void CSplineTest::testSplineReference() { TFloatVec knotsStorage; TFloatVec valuesStorage; TDoubleVec curvaturesStorage; - TSplineRef splineRef(maths::CSplineTypes::E_Cubic, boost::ref(knotsStorage), boost::ref(valuesStorage), boost::ref(curvaturesStorage)); + TSplineRef splineRef(maths::CSplineTypes::E_Cubic, boost::ref(knotsStorage), + boost::ref(valuesStorage), boost::ref(curvaturesStorage)); splineRef.interpolate(x, y, maths::CSplineTypes::E_Natural); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(spline.knots()), core::CContainerPrinter::print(splineRef.knots())); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(spline.values()), core::CContainerPrinter::print(splineRef.values())); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(spline.curvatures()), core::CContainerPrinter::print(splineRef.curvatures())); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(spline.knots()), + core::CContainerPrinter::print(splineRef.knots())); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(spline.values()), + core::CContainerPrinter::print(splineRef.values())); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(spline.curvatures()), + core::CContainerPrinter::print(splineRef.curvatures())); for (std::size_t i = 0u; i < 21; ++i) { double xx = boost::math::double_constants::two_pi * static_cast(i) / 20.0; - LOG_DEBUG(<< "spline.value(" << xx << ") = " << spline.value(xx) << ", splineRef.value(" << xx << ") = " << splineRef.value(xx)); + LOG_DEBUG(<< "spline.value(" << xx << ") = " << spline.value(xx) + << ", splineRef.value(" << xx << ") = " << splineRef.value(xx)); CPPUNIT_ASSERT_EQUAL(spline.value(xx), splineRef.value(xx)); - LOG_DEBUG(<< "spline.slope(" << xx << ") = " << spline.slope(xx) << ", splineRef.slope(" << xx << ") = " << splineRef.slope(xx)); + LOG_DEBUG(<< "spline.slope(" << xx << ") = " << spline.slope(xx) + << ", splineRef.slope(" << xx << ") = " << splineRef.slope(xx)); CPPUNIT_ASSERT_EQUAL(spline.slope(xx), splineRef.slope(xx)); } - LOG_DEBUG(<< "spline.mean() = " << spline.mean() << ", splineRef.mean() = " << splineRef.mean()); + LOG_DEBUG(<< "spline.mean() = " << spline.mean() + << ", splineRef.mean() = " << splineRef.mean()); CPPUNIT_ASSERT_EQUAL(spline.mean(), splineRef.mean()); - LOG_DEBUG(<< "spline.absSlope() = " << spline.absSlope() << ", splineRef.absSlope() = " << splineRef.absSlope()); + LOG_DEBUG(<< "spline.absSlope() = " << spline.absSlope() + << ", splineRef.absSlope() = " << splineRef.absSlope()); CPPUNIT_ASSERT_EQUAL(spline.absSlope(), splineRef.absSlope()); LOG_DEBUG(<< "splineRef.memoryUsage = " << splineRef.memoryUsage()); @@ -531,13 +573,20 @@ void CSplineTest::testSplineReference() { CppUnit::Test* CSplineTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CSplineTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CSplineTest::testNatural", &CSplineTest::testNatural)); - suiteOfTests->addTest(new CppUnit::TestCaller("CSplineTest::testParabolicRunout", &CSplineTest::testParabolicRunout)); - suiteOfTests->addTest(new CppUnit::TestCaller("CSplineTest::testPeriodic", &CSplineTest::testPeriodic)); - suiteOfTests->addTest(new CppUnit::TestCaller("CSplineTest::testMean", &CSplineTest::testMean)); - suiteOfTests->addTest(new CppUnit::TestCaller("CSplineTest::testIllposed", &CSplineTest::testIllposed)); - suiteOfTests->addTest(new CppUnit::TestCaller("CSplineTest::testSlope", &CSplineTest::testSlope)); - suiteOfTests->addTest(new CppUnit::TestCaller("CSplineTest::testSplineReference", &CSplineTest::testSplineReference)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSplineTest::testNatural", &CSplineTest::testNatural)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSplineTest::testParabolicRunout", &CSplineTest::testParabolicRunout)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSplineTest::testPeriodic", &CSplineTest::testPeriodic)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSplineTest::testMean", &CSplineTest::testMean)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSplineTest::testIllposed", &CSplineTest::testIllposed)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSplineTest::testSlope", &CSplineTest::testSlope)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSplineTest::testSplineReference", &CSplineTest::testSplineReference)); return suiteOfTests; } diff --git a/lib/maths/unittest/CStatisticalTestsTest.cc b/lib/maths/unittest/CStatisticalTestsTest.cc index 1102cb5994..78a101a872 100644 --- a/lib/maths/unittest/CStatisticalTestsTest.cc +++ b/lib/maths/unittest/CStatisticalTestsTest.cc @@ -40,7 +40,8 @@ void CStatisticalTestsTest::testCramerVonMises() { // are correct if the random variable and the distribution // function are perfectly matched. - const std::size_t n[] = {2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20, 30, 40, 50, 100, 200, 500}; + const std::size_t n[] = {2, 3, 4, 5, 6, 7, 8, 9, 10, + 15, 20, 30, 40, 50, 100, 200, 500}; test::CRandomNumbers rng; @@ -67,8 +68,9 @@ void CStatisticalTestsTest::testCramerVonMises() { double meanError = 0.0; for (std::size_t j = 0; j < 21; ++j) { double percentile = static_cast(j) / 20.0; - double pp = - static_cast(std::lower_bound(p.begin(), p.end(), percentile) - p.begin()) / static_cast(p.size()); + double pp = static_cast(std::lower_bound(p.begin(), p.end(), percentile) - + p.begin()) / + static_cast(p.size()); LOG_DEBUG(<< "percentile = " << percentile << ", p value percentile = " << pp << ", error = " << std::fabs(pp - percentile)); meanError += std::fabs(pp - percentile); @@ -98,8 +100,9 @@ void CStatisticalTestsTest::testCramerVonMises() { double meanError = 0.0; for (std::size_t j = 0; j < 21; ++j) { double percentile = static_cast(j) / 20.0; - double pp = - static_cast(std::lower_bound(p.begin(), p.end(), percentile) - p.begin()) / static_cast(p.size()); + double pp = static_cast(std::lower_bound(p.begin(), p.end(), percentile) - + p.begin()) / + static_cast(p.size()); LOG_DEBUG(<< "percentile = " << percentile << ", p value percentile = " << pp << ", error = " << std::fabs(pp - percentile)); meanError += std::fabs(pp - percentile); @@ -167,10 +170,10 @@ void CStatisticalTestsTest::testPersist() { CppUnit::Test* CStatisticalTestsTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CStatisticalTestsTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CStatisticalTestsTest::testCramerVonMises", - &CStatisticalTestsTest::testCramerVonMises)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CStatisticalTestsTest::testPersist", &CStatisticalTestsTest::testPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CStatisticalTestsTest::testCramerVonMises", &CStatisticalTestsTest::testCramerVonMises)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CStatisticalTestsTest::testPersist", &CStatisticalTestsTest::testPersist)); return suiteOfTests; } diff --git a/lib/maths/unittest/CTimeSeriesChangeDetectorTest.cc b/lib/maths/unittest/CTimeSeriesChangeDetectorTest.cc index 9bc3b4441e..ce88c4d4ee 100644 --- a/lib/maths/unittest/CTimeSeriesChangeDetectorTest.cc +++ b/lib/maths/unittest/CTimeSeriesChangeDetectorTest.cc @@ -48,10 +48,12 @@ core_t::TTime BUCKET_LENGTH{1800}; const double DECAY_RATE{0.0002}; TPriorPtr makeResidualModel() { - maths::CGammaRateConjugate gamma{maths::CGammaRateConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.1, DECAY_RATE)}; - maths::CLogNormalMeanPrecConjugate lognormal{ - maths::CLogNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, 1.0, DECAY_RATE)}; - maths::CNormalMeanPrecConjugate normal{maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, DECAY_RATE)}; + maths::CGammaRateConjugate gamma{maths::CGammaRateConjugate::nonInformativePrior( + maths_t::E_ContinuousData, 0.1, DECAY_RATE)}; + maths::CLogNormalMeanPrecConjugate lognormal{maths::CLogNormalMeanPrecConjugate::nonInformativePrior( + maths_t::E_ContinuousData, 1.0, DECAY_RATE)}; + maths::CNormalMeanPrecConjugate normal{maths::CNormalMeanPrecConjugate::nonInformativePrior( + maths_t::E_ContinuousData, DECAY_RATE)}; TPriorPtrVec mode; mode.reserve(3u); @@ -59,9 +61,15 @@ TPriorPtr makeResidualModel() { mode.emplace_back(lognormal.clone()); mode.emplace_back(normal.clone()); maths::COneOfNPrior modePrior{mode, maths_t::E_ContinuousData, DECAY_RATE}; - maths::CXMeansOnline1d clusterer{ - maths_t::E_ContinuousData, maths::CAvailableModeDistributions::ALL, maths_t::E_ClustersFractionWeight, DECAY_RATE, 0.05, 12.0, 1.0}; - maths::CMultimodalPrior multimodal{maths_t::E_ContinuousData, clusterer, modePrior, DECAY_RATE}; + maths::CXMeansOnline1d clusterer{maths_t::E_ContinuousData, + maths::CAvailableModeDistributions::ALL, + maths_t::E_ClustersFractionWeight, + DECAY_RATE, + 0.05, + 12.0, + 1.0}; + maths::CMultimodalPrior multimodal{maths_t::E_ContinuousData, clusterer, + modePrior, DECAY_RATE}; TPriorPtrVec models; mode.emplace_back(gamma.clone()); @@ -69,7 +77,8 @@ TPriorPtr makeResidualModel() { mode.emplace_back(normal.clone()); mode.emplace_back(multimodal.clone()); - return TPriorPtr{maths::COneOfNPrior{mode, maths_t::E_ContinuousData, DECAY_RATE}.clone()}; + return TPriorPtr{ + maths::COneOfNPrior{mode, maths_t::E_ContinuousData, DECAY_RATE}.clone()}; } } @@ -91,23 +100,27 @@ void CTimeSeriesChangeDetectorTest::testNoChange() { switch (t % 3) { case 0: - rng.generateNormalSamples(10.0, variances[(t / 3) % variances.size()], 1000, samples); + rng.generateNormalSamples(10.0, variances[(t / 3) % variances.size()], + 1000, samples); break; case 1: rng.generateLogNormalSamples(1.0, scales[(t / 3) % scales.size()], 1000, samples); break; case 2: - rng.generateGammaSamples(10.0, 10.0 * scales[(t / 3) % scales.size()], 1000, samples); + rng.generateGammaSamples(10.0, 10.0 * scales[(t / 3) % scales.size()], + 1000, samples); break; } - TDecompositionPtr trendModel(new maths::CTimeSeriesDecomposition{DECAY_RATE, BUCKET_LENGTH}); + TDecompositionPtr trendModel( + new maths::CTimeSeriesDecomposition{DECAY_RATE, BUCKET_LENGTH}); TPriorPtr residualModel(makeResidualModel()); auto addSampleToModel = [&trendModel, &residualModel](core_t::TTime time, double x) { trendModel->addPoint(time, x); double detrended{trendModel->detrend(time, x, 0.0)}; - residualModel->addSamples(maths::CConstantWeights::COUNT, {detrended}, {{1.0}}); + residualModel->addSamples(maths::CConstantWeights::COUNT, + {detrended}, {{1.0}}); residualModel->propagateForwardsByTime(1.0); }; @@ -118,10 +131,12 @@ void CTimeSeriesChangeDetectorTest::testNoChange() { } maths::CUnivariateTimeSeriesChangeDetector detector{ - trendModel, residualModel, 6 * core::constants::HOUR, 24 * core::constants::HOUR, 14.0}; + trendModel, residualModel, 6 * core::constants::HOUR, + 24 * core::constants::HOUR, 14.0}; for (std::size_t i = 950u; i < samples.size(); ++i) { addSampleToModel(time, samples[i]); - detector.addSamples(maths::CConstantWeights::COUNT, {{time, samples[i]}}, {{1.0}}); + detector.addSamples(maths::CConstantWeights::COUNT, + {{time, samples[i]}}, {{1.0}}); if (detector.stopTesting()) { break; } @@ -141,7 +156,8 @@ void CTimeSeriesChangeDetectorTest::testLevelShift() { TGeneratorVec trends{constant, ramp, smoothDaily, weekends, spikeyDaily}; this->testChange( - trends, maths::SChangeDescription::E_LevelShift, [](TGenerator trend, core_t::TTime time) { return trend(time) + 0.5; }, 5.0, 15.0); + trends, maths::SChangeDescription::E_LevelShift, + [](TGenerator trend, core_t::TTime time) { return trend(time) + 0.5; }, 5.0, 15.0); } void CTimeSeriesChangeDetectorTest::testLinearScale() { @@ -151,11 +167,9 @@ void CTimeSeriesChangeDetectorTest::testLinearScale() { TGeneratorVec trends{smoothDaily, spikeyDaily}; - this->testChange(trends, - maths::SChangeDescription::E_LinearScale, - [](TGenerator trend, core_t::TTime time) { return 3.0 * trend(time); }, - 3.0, - 15.0); + this->testChange( + trends, maths::SChangeDescription::E_LinearScale, + [](TGenerator trend, core_t::TTime time) { return 3.0 * trend(time); }, 3.0, 15.0); } void CTimeSeriesChangeDetectorTest::testTimeShift() { @@ -165,17 +179,17 @@ void CTimeSeriesChangeDetectorTest::testTimeShift() { TGeneratorVec trends{smoothDaily, spikeyDaily}; - this->testChange(trends, - maths::SChangeDescription::E_TimeShift, - [](TGenerator trend, core_t::TTime time) { return trend(time - core::constants::HOUR); }, - -static_cast(core::constants::HOUR), - 24.0); - - this->testChange(trends, - maths::SChangeDescription::E_TimeShift, - [](TGenerator trend, core_t::TTime time) { return trend(time + core::constants::HOUR); }, - +static_cast(core::constants::HOUR), - 24.0); + this->testChange(trends, maths::SChangeDescription::E_TimeShift, + [](TGenerator trend, core_t::TTime time) { + return trend(time - core::constants::HOUR); + }, + -static_cast(core::constants::HOUR), 24.0); + + this->testChange(trends, maths::SChangeDescription::E_TimeShift, + [](TGenerator trend, core_t::TTime time) { + return trend(time + core::constants::HOUR); + }, + +static_cast(core::constants::HOUR), 24.0); } void CTimeSeriesChangeDetectorTest::testPersist() { @@ -194,7 +208,8 @@ void CTimeSeriesChangeDetectorTest::testPersist() { auto addSampleToModel = [&trendModel, &residualModel](core_t::TTime time, double x) { trendModel->addPoint(time, x); double detrended{trendModel->detrend(time, x, 0.0)}; - residualModel->addSamples(maths::CConstantWeights::COUNT, {detrended}, maths::CConstantWeights::SINGLE_UNIT); + residualModel->addSamples(maths::CConstantWeights::COUNT, {detrended}, + maths::CConstantWeights::SINGLE_UNIT); residualModel->propagateForwardsByTime(1.0); }; @@ -205,11 +220,14 @@ void CTimeSeriesChangeDetectorTest::testPersist() { } maths::CUnivariateTimeSeriesChangeDetector origDetector{ - trendModel, residualModel, 6 * core::constants::HOUR, 24 * core::constants::HOUR, 12.0}; + trendModel, residualModel, 6 * core::constants::HOUR, + 24 * core::constants::HOUR, 12.0}; - maths::CModelParams modelParams{BUCKET_LENGTH, 1.0, 0.0, 1.0, 6 * core::constants::HOUR, 24 * core::constants::HOUR}; + maths::CModelParams modelParams{ + BUCKET_LENGTH, 1.0, 0.0, 1.0, 6 * core::constants::HOUR, 24 * core::constants::HOUR}; maths::SDistributionRestoreParams distributionParams{maths_t::E_ContinuousData, DECAY_RATE}; - maths::STimeSeriesDecompositionRestoreParams decompositionParams{DECAY_RATE, BUCKET_LENGTH, distributionParams}; + maths::STimeSeriesDecompositionRestoreParams decompositionParams{ + DECAY_RATE, BUCKET_LENGTH, distributionParams}; maths::SModelRestoreParams params{modelParams, decompositionParams, distributionParams}; for (std::size_t i = 990u; i < samples.size(); ++i) { @@ -222,14 +240,17 @@ void CTimeSeriesChangeDetectorTest::testPersist() { } maths::CUnivariateTimeSeriesChangeDetector restoredDetector{ - trendModel, residualModel, 6 * core::constants::HOUR, 24 * core::constants::HOUR, 12.0}; + trendModel, residualModel, 6 * core::constants::HOUR, + 24 * core::constants::HOUR, 12.0}; core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - traverser.traverseSubLevel( - boost::bind(&maths::CUnivariateTimeSeriesChangeDetector::acceptRestoreTraverser, &restoredDetector, boost::cref(params), _1)); + traverser.traverseSubLevel(boost::bind( + &maths::CUnivariateTimeSeriesChangeDetector::acceptRestoreTraverser, + &restoredDetector, boost::cref(params), _1)); - LOG_DEBUG("expected " << origDetector.checksum() << " got " << restoredDetector.checksum()); + LOG_DEBUG("expected " << origDetector.checksum() << " got " + << restoredDetector.checksum()); CPPUNIT_ASSERT_EQUAL(origDetector.checksum(), restoredDetector.checksum()); } } @@ -237,16 +258,21 @@ void CTimeSeriesChangeDetectorTest::testPersist() { CppUnit::Test* CTimeSeriesChangeDetectorTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CTimeSeriesChangeDetectorTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesChangeDetectorTest::testNoChange", - &CTimeSeriesChangeDetectorTest::testNoChange)); - suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesChangeDetectorTest::testLevelShift", - &CTimeSeriesChangeDetectorTest::testLevelShift)); - suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesChangeDetectorTest::testLinearScale", - &CTimeSeriesChangeDetectorTest::testLinearScale)); - suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesChangeDetectorTest::testTimeShift", - &CTimeSeriesChangeDetectorTest::testTimeShift)); - suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesChangeDetectorTest::testPersist", - &CTimeSeriesChangeDetectorTest::testPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTimeSeriesChangeDetectorTest::testNoChange", + &CTimeSeriesChangeDetectorTest::testNoChange)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTimeSeriesChangeDetectorTest::testLevelShift", + &CTimeSeriesChangeDetectorTest::testLevelShift)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTimeSeriesChangeDetectorTest::testLinearScale", + &CTimeSeriesChangeDetectorTest::testLinearScale)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTimeSeriesChangeDetectorTest::testTimeShift", + &CTimeSeriesChangeDetectorTest::testTimeShift)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTimeSeriesChangeDetectorTest::testPersist", + &CTimeSeriesChangeDetectorTest::testPersist)); return suiteOfTests; } @@ -271,13 +297,16 @@ void CTimeSeriesChangeDetectorTest::testChange(const TGeneratorVec& trends, rng.generateNormalSamples(0.0, 1.0, 1000, samples); - TDecompositionPtr trendModel(new maths::CTimeSeriesDecomposition{DECAY_RATE, BUCKET_LENGTH}); + TDecompositionPtr trendModel( + new maths::CTimeSeriesDecomposition{DECAY_RATE, BUCKET_LENGTH}); TPriorPtr residualModel(makeResidualModel()); - auto addSampleToModel = [&trendModel, &residualModel](core_t::TTime time, double x, double weight) { + auto addSampleToModel = [&trendModel, &residualModel]( + core_t::TTime time, double x, double weight) { trendModel->addPoint(time, x, maths::CConstantWeights::COUNT, {weight}); double detrended{trendModel->detrend(time, x, 0.0)}; - residualModel->addSamples(maths::CConstantWeights::COUNT, {detrended}, {{weight}}); + residualModel->addSamples(maths::CConstantWeights::COUNT, + {detrended}, {{weight}}); residualModel->propagateForwardsByTime(1.0); }; @@ -289,7 +318,8 @@ void CTimeSeriesChangeDetectorTest::testChange(const TGeneratorVec& trends, } maths::CUnivariateTimeSeriesChangeDetector detector{ - trendModel, residualModel, 6 * core::constants::HOUR, 24 * core::constants::HOUR, 14.0}; + trendModel, residualModel, 6 * core::constants::HOUR, + 24 * core::constants::HOUR, 14.0}; TOptionalSize bucketsToDetect; for (std::size_t i = 950u; i < samples.size(); ++i) { @@ -304,7 +334,8 @@ void CTimeSeriesChangeDetectorTest::testChange(const TGeneratorVec& trends, bucketsToDetect.reset(i - 949); } CPPUNIT_ASSERT_EQUAL(change->s_Description, description); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedChange, change->s_Value[0], 0.5 * std::fabs(expectedChange)); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedChange, change->s_Value[0], + 0.5 * std::fabs(expectedChange)); break; } if (detector.stopTesting()) { @@ -318,5 +349,6 @@ void CTimeSeriesChangeDetectorTest::testChange(const TGeneratorVec& trends, } LOG_DEBUG("buckets to detect = " << maths::CBasicStatistics::mean(meanBucketsToDetect)); - CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanBucketsToDetect) < expectedMeanBucketsToDetectChange); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanBucketsToDetect) < + expectedMeanBucketsToDetectChange); } diff --git a/lib/maths/unittest/CTimeSeriesDecompositionTest.cc b/lib/maths/unittest/CTimeSeriesDecompositionTest.cc index 2d12b043b6..a538d965b8 100644 --- a/lib/maths/unittest/CTimeSeriesDecompositionTest.cc +++ b/lib/maths/unittest/CTimeSeriesDecompositionTest.cc @@ -64,9 +64,12 @@ void CTimeSeriesDecompositionTest::testSuperpositionOfSines() { TTimeVec times; TDoubleVec trend; for (core_t::TTime time = 0; time < 100 * WEEK + 1; time += HALF_HOUR) { - double weekly = - 1200.0 + 1000.0 * std::sin(boost::math::double_constants::two_pi * static_cast(time) / static_cast(WEEK)); - double daily = 5.0 + 5.0 * std::sin(boost::math::double_constants::two_pi * static_cast(time) / static_cast(DAY)); + double weekly = 1200.0 + 1000.0 * std::sin(boost::math::double_constants::two_pi * + static_cast(time) / + static_cast(WEEK)); + double daily = 5.0 + 5.0 * std::sin(boost::math::double_constants::two_pi * + static_cast(time) / + static_cast(DAY)); times.push_back(time); trend.push_back(weekly * daily); } @@ -116,7 +119,9 @@ void CTimeSeriesDecompositionTest::testSuperpositionOfSines() { sumValue += std::fabs(trend[t / HALF_HOUR]); maxValue = std::max(maxValue, std::fabs(trend[t / HALF_HOUR])); percentileError += - std::max(std::max(prediction.first - trend[t / HALF_HOUR], trend[t / HALF_HOUR] - prediction.second), 0.0); + std::max(std::max(prediction.first - trend[t / HALF_HOUR], + trend[t / HALF_HOUR] - prediction.second), + 0.0); //f.push_back(mean(value)); //r.push_back(mean(value) - trend[t / HALF_HOUR]); } @@ -162,59 +167,99 @@ void CTimeSeriesDecompositionTest::testDistortedPeriodic() { const core_t::TTime bucketLength = HOUR; const core_t::TTime startTime = 0; const TDoubleVec timeseries{ - 323444, 960510, 880176, 844190, 823993, 814251, 857187, 856791, 862060, 919632, 1083704, 2904437, 4601750, 5447896, - 5827498, 5924161, 5851895, 5768661, 5927840, 5326236, 4037245, 1958521, 1360753, 1005194, 901930, 856605, 838370, 810396, - 776815, 751163, 793055, 823974, 820458, 840647, 878594, 1192154, 2321550, 2646460, 2760957, 2838611, 2784696, 2798327, - 2643123, 2028970, 1331199, 1098105, 930971, 907562, 903603, 873554, 879375, 852853, 828554, 819726, 872418, 856365, - 860880, 867119, 873912, 885405, 1053530, 1487664, 1555301, 1637137, 1672030, 1659346, 1514673, 1228543, 1011740, 928749, - 809702, 838931, 847904, 829188, 822558, 798517, 767446, 750486, 783165, 815612, 825365, 873486, 1165250, 2977382, - 4868975, 6050263, 6470794, 6271899, 6449326, 6352992, 6162712, 6257295, 4570133, 1781374, 1182546, 665858, 522585, 481588, - 395139, 380770, 379182, 356068, 353498, 347707, 350931, 417253, 989129, 2884728, 4640841, 5423474, 6246182, 6432793, - 6338419, 6312346, 6294323, 6102676, 4505021, 2168289, 1411233, 1055797, 954338, 918498, 904236, 870193, 843259, 682538, - 895407, 883550, 897026, 918838, 1262303, 3208919, 5193013, 5787263, 6255837, 6337684, 6335017, 6278740, 6191046, 6183259, - 4455055, 2004058, 1425910, 1069949, 942839, 899157, 895133, 858268, 837338, 820983, 870863, 871873, 881182, 918795, - 1237336, 3069272, 4708229, 5672066, 6291124, 6407806, 6479889, 6533138, 3473382, 6534838, 4800911, 2668073, 1644350, 1282450, - 1131734, 1009042, 891099, 857339, 842849, 816513, 879200, 848292, 858014, 906642, 1208147, 2964568, 5215885, 5777105, - 6332104, 6130733, 6284960, 6157055, 6165520, 5771121, 4309930, 2150044, 1475275, 1065030, 967267, 890413, 887174, 835741, - 814749, 817443, 853085, 851040, 866029, 867612, 917833, 1225383, 2326451, 2837337, 2975288, 3034415, 3056379, 3181951, - 2938511, 2400202, 1444952, 1058781, 845703, 810419, 805781, 789438, 799674, 775703, 756145, 727587, 756489, 789886, - 784948, 788247, 802013, 832272, 845033, 873396, 1018788, 1013089, 1095001, 1022910, 798183, 519186, 320507, 247320, - 139372, 129477, 145576, 122348, 120286, 89370, 95583, 88985, 89009, 97425, 103628, 153229, 675828, 2807240, - 4652249, 5170466, 5642965, 5608709, 5697374, 5546758, 5368913, 5161602, 3793675, 1375703, 593920, 340764, 197075, 174981, - 158274, 130148, 125235, 122526, 113896, 116249, 126881, 213814, 816723, 2690434, 4827493, 5723621, 6219650, 6492638, - 6570160, 6493706, 6495303, 6301872, 4300612, 1543551, 785562, 390012, 234939, 202190, 142855, 135218, 124238, 111981, - 104807, 107687, 129438, 190294, 779698, 2864053, 5079395, 5912629, 6481437, 6284107, 6451007, 6177724, 5993932, 6075918, - 4140658, 1481179, 682711, 328387, 233915, 182721, 170860, 139540, 137613, 121669, 116906, 121780, 127887, 199762, - 783099, 2890355, 4658524, 5535842, 6117719, 6322938, 6570422, 6396874, 6586615, 6332100, 4715160, 2604366, 1525620, 906137, - 499019, 358856, 225543, 171388, 153826, 149910, 141092, 136459, 161202, 240704, 766755, 3011958, 5024254, 5901640, - 6244757, 6257553, 6380236, 6394732, 6385424, 5876960, 4182127, 1868461, 883771, 377159, 264435, 196674, 181845, 138307, - 136055, 133143, 129791, 133694, 127502, 136351, 212305, 777873, 2219051, 2732315, 2965287, 2895288, 2829988, 2818268, - 2513817, 1866217, 985099, 561287, 205195, 173997, 166428, 165294, 130072, 113917, 113282, 112466, 103406, 115687, - 159863, 158310, 225454, 516925, 1268760, 1523357, 1607510, 1560200, 1483823, 1401526, 999236, 495292, 299905, 286900, - 209697, 169881, 157560, 139030, 132342, 187941, 126162, 106587, 108759, 109495, 116386, 208504, 676794, 1549362, - 2080332, 2488707, 2699237, 2862970, 2602994, 2554047, 2364456, 1997686, 1192434, 891293, 697769, 391385, 234311, 231839, - 160520, 155870, 142220, 139360, 142885, 141589, 166792, 443202, 2019645, 4558828, 5982111, 6408009, 6514598, 6567566, - 6686935, 6532886, 6473927, 5475257, 2889913, 1524673, 938262, 557410, 325965, 186484, 174831, 211765, 145477, 148318, - 130425, 136431, 182002, 442272, 2078908, 4628945, 5767034, 6212302, 6566196, 6527687, 6365204, 6226173, 6401203, 5629733, - 3004625, 1555528, 1025549, 492910, 347948, 298725, 272955, 238279, 209290, 188551, 175447, 173960, 190875, 468340, - 1885268, 4133457, 5350137, 5885807, 6331254, 6420279, 6589448, 6483637, 6557769, 5543938, 3482732, 2010293, 1278681, 735111, - 406042, 283694, 181213, 160207, 136347, 113484, 118521, 127725, 151408, 396552, 1900747, 4400918, 5546984, 6213423, - 6464686, 6442904, 6385002, 6248314, 5880523, 4816342, 2597450, 1374071, 751391, 362615, 215644, 175158, 116896, 127935, - 110407, 113054, 105841, 113717, 177240, 206515, 616005, 1718878, 2391747, 2450915, 2653897, 2922320, 2808467, 2490078, - 1829760, 1219997, 643936, 400743, 208976, 119623, 110170, 99338, 93661, 100187, 90803, 83980, 75950, 78805, - 95664, 108467, 128293, 294080, 720811, 965705, 1048021, 1125912, 1194746, 1114704, 799721, 512542, 353694, 291046, - 229723, 206109, 183482, 192225, 191906, 176942, 148163, 145405, 145728, 159016, 181991, 436297, 1983374, 4688246, - 5853284, 6243628, 6730707, 6660743, 6476024, 6422004, 6335113, 5386230, 2761698, 1230646, 763506, 359071, 223956, 189020, - 158090, 145730, 135338, 114941, 108313, 120023, 167161, 440103, 1781778, 4428615, 5701824, 6296598, 6541586, 6809286, - 6716690, 6488941, 6567385, 5633685, 2760255, 1316495, 732572, 316496, 225013, 202664, 171295, 143195, 123555, 125327, - 123357, 135419, 194933, 428197, 2181096, 4672692, 5854393, 6553263, 6653127, 6772664, 6899086, 6794041, 6900871, 6087645, - 2814928, 1393906, 894417, 413459, 280839, 237468, 184947, 214658, 180059, 145215, 134793, 133423, 191388, 417885, - 2081899, 4836758, 5803495, 6451696, 7270708, 7628500, 7208066, 7403079, 7548585, 6323024, 3763029, 2197174, 1359687, 857604, - 471729, 338888, 177156, 150619, 145775, 132845, 110888, 121863, 141321, 440528, 2020529, 4615833, 5772372, 6318037, - 6481658, 6454979, 6489447, 6558612, 6114653, 5009113, 2541519, 1329520, 663124, 311088, 200332, 141768, 120845, 120603, - 114688, 111340, 95757, 91444, 103287, 130905, 551108, 1988083, 2885196, 2962413, 3070689, 3061746, 2999362, 2993871, - 2287683, 1539262, 763592, 393769, 193094, 126535, 131721, 125761, 105550, 89077, 90295, 93853, 84496, 77731, - 89389, 101269, 153379, 443022, 1114121, 1556021, 1607693, 1589743, 1746231, 1432261, 1022052}; + 323444, 960510, 880176, 844190, 823993, 814251, 857187, 856791, + 862060, 919632, 1083704, 2904437, 4601750, 5447896, 5827498, 5924161, + 5851895, 5768661, 5927840, 5326236, 4037245, 1958521, 1360753, 1005194, + 901930, 856605, 838370, 810396, 776815, 751163, 793055, 823974, + 820458, 840647, 878594, 1192154, 2321550, 2646460, 2760957, 2838611, + 2784696, 2798327, 2643123, 2028970, 1331199, 1098105, 930971, 907562, + 903603, 873554, 879375, 852853, 828554, 819726, 872418, 856365, + 860880, 867119, 873912, 885405, 1053530, 1487664, 1555301, 1637137, + 1672030, 1659346, 1514673, 1228543, 1011740, 928749, 809702, 838931, + 847904, 829188, 822558, 798517, 767446, 750486, 783165, 815612, + 825365, 873486, 1165250, 2977382, 4868975, 6050263, 6470794, 6271899, + 6449326, 6352992, 6162712, 6257295, 4570133, 1781374, 1182546, 665858, + 522585, 481588, 395139, 380770, 379182, 356068, 353498, 347707, + 350931, 417253, 989129, 2884728, 4640841, 5423474, 6246182, 6432793, + 6338419, 6312346, 6294323, 6102676, 4505021, 2168289, 1411233, 1055797, + 954338, 918498, 904236, 870193, 843259, 682538, 895407, 883550, + 897026, 918838, 1262303, 3208919, 5193013, 5787263, 6255837, 6337684, + 6335017, 6278740, 6191046, 6183259, 4455055, 2004058, 1425910, 1069949, + 942839, 899157, 895133, 858268, 837338, 820983, 870863, 871873, + 881182, 918795, 1237336, 3069272, 4708229, 5672066, 6291124, 6407806, + 6479889, 6533138, 3473382, 6534838, 4800911, 2668073, 1644350, 1282450, + 1131734, 1009042, 891099, 857339, 842849, 816513, 879200, 848292, + 858014, 906642, 1208147, 2964568, 5215885, 5777105, 6332104, 6130733, + 6284960, 6157055, 6165520, 5771121, 4309930, 2150044, 1475275, 1065030, + 967267, 890413, 887174, 835741, 814749, 817443, 853085, 851040, + 866029, 867612, 917833, 1225383, 2326451, 2837337, 2975288, 3034415, + 3056379, 3181951, 2938511, 2400202, 1444952, 1058781, 845703, 810419, + 805781, 789438, 799674, 775703, 756145, 727587, 756489, 789886, + 784948, 788247, 802013, 832272, 845033, 873396, 1018788, 1013089, + 1095001, 1022910, 798183, 519186, 320507, 247320, 139372, 129477, + 145576, 122348, 120286, 89370, 95583, 88985, 89009, 97425, + 103628, 153229, 675828, 2807240, 4652249, 5170466, 5642965, 5608709, + 5697374, 5546758, 5368913, 5161602, 3793675, 1375703, 593920, 340764, + 197075, 174981, 158274, 130148, 125235, 122526, 113896, 116249, + 126881, 213814, 816723, 2690434, 4827493, 5723621, 6219650, 6492638, + 6570160, 6493706, 6495303, 6301872, 4300612, 1543551, 785562, 390012, + 234939, 202190, 142855, 135218, 124238, 111981, 104807, 107687, + 129438, 190294, 779698, 2864053, 5079395, 5912629, 6481437, 6284107, + 6451007, 6177724, 5993932, 6075918, 4140658, 1481179, 682711, 328387, + 233915, 182721, 170860, 139540, 137613, 121669, 116906, 121780, + 127887, 199762, 783099, 2890355, 4658524, 5535842, 6117719, 6322938, + 6570422, 6396874, 6586615, 6332100, 4715160, 2604366, 1525620, 906137, + 499019, 358856, 225543, 171388, 153826, 149910, 141092, 136459, + 161202, 240704, 766755, 3011958, 5024254, 5901640, 6244757, 6257553, + 6380236, 6394732, 6385424, 5876960, 4182127, 1868461, 883771, 377159, + 264435, 196674, 181845, 138307, 136055, 133143, 129791, 133694, + 127502, 136351, 212305, 777873, 2219051, 2732315, 2965287, 2895288, + 2829988, 2818268, 2513817, 1866217, 985099, 561287, 205195, 173997, + 166428, 165294, 130072, 113917, 113282, 112466, 103406, 115687, + 159863, 158310, 225454, 516925, 1268760, 1523357, 1607510, 1560200, + 1483823, 1401526, 999236, 495292, 299905, 286900, 209697, 169881, + 157560, 139030, 132342, 187941, 126162, 106587, 108759, 109495, + 116386, 208504, 676794, 1549362, 2080332, 2488707, 2699237, 2862970, + 2602994, 2554047, 2364456, 1997686, 1192434, 891293, 697769, 391385, + 234311, 231839, 160520, 155870, 142220, 139360, 142885, 141589, + 166792, 443202, 2019645, 4558828, 5982111, 6408009, 6514598, 6567566, + 6686935, 6532886, 6473927, 5475257, 2889913, 1524673, 938262, 557410, + 325965, 186484, 174831, 211765, 145477, 148318, 130425, 136431, + 182002, 442272, 2078908, 4628945, 5767034, 6212302, 6566196, 6527687, + 6365204, 6226173, 6401203, 5629733, 3004625, 1555528, 1025549, 492910, + 347948, 298725, 272955, 238279, 209290, 188551, 175447, 173960, + 190875, 468340, 1885268, 4133457, 5350137, 5885807, 6331254, 6420279, + 6589448, 6483637, 6557769, 5543938, 3482732, 2010293, 1278681, 735111, + 406042, 283694, 181213, 160207, 136347, 113484, 118521, 127725, + 151408, 396552, 1900747, 4400918, 5546984, 6213423, 6464686, 6442904, + 6385002, 6248314, 5880523, 4816342, 2597450, 1374071, 751391, 362615, + 215644, 175158, 116896, 127935, 110407, 113054, 105841, 113717, + 177240, 206515, 616005, 1718878, 2391747, 2450915, 2653897, 2922320, + 2808467, 2490078, 1829760, 1219997, 643936, 400743, 208976, 119623, + 110170, 99338, 93661, 100187, 90803, 83980, 75950, 78805, + 95664, 108467, 128293, 294080, 720811, 965705, 1048021, 1125912, + 1194746, 1114704, 799721, 512542, 353694, 291046, 229723, 206109, + 183482, 192225, 191906, 176942, 148163, 145405, 145728, 159016, + 181991, 436297, 1983374, 4688246, 5853284, 6243628, 6730707, 6660743, + 6476024, 6422004, 6335113, 5386230, 2761698, 1230646, 763506, 359071, + 223956, 189020, 158090, 145730, 135338, 114941, 108313, 120023, + 167161, 440103, 1781778, 4428615, 5701824, 6296598, 6541586, 6809286, + 6716690, 6488941, 6567385, 5633685, 2760255, 1316495, 732572, 316496, + 225013, 202664, 171295, 143195, 123555, 125327, 123357, 135419, + 194933, 428197, 2181096, 4672692, 5854393, 6553263, 6653127, 6772664, + 6899086, 6794041, 6900871, 6087645, 2814928, 1393906, 894417, 413459, + 280839, 237468, 184947, 214658, 180059, 145215, 134793, 133423, + 191388, 417885, 2081899, 4836758, 5803495, 6451696, 7270708, 7628500, + 7208066, 7403079, 7548585, 6323024, 3763029, 2197174, 1359687, 857604, + 471729, 338888, 177156, 150619, 145775, 132845, 110888, 121863, + 141321, 440528, 2020529, 4615833, 5772372, 6318037, 6481658, 6454979, + 6489447, 6558612, 6114653, 5009113, 2541519, 1329520, 663124, 311088, + 200332, 141768, 120845, 120603, 114688, 111340, 95757, 91444, + 103287, 130905, 551108, 1988083, 2885196, 2962413, 3070689, 3061746, + 2999362, 2993871, 2287683, 1539262, 763592, 393769, 193094, 126535, + 131721, 125761, 105550, 89077, 90295, 93853, 84496, 77731, + 89389, 101269, 153379, 443022, 1114121, 1556021, 1607693, 1589743, + 1746231, 1432261, 1022052}; core_t::TTime time = startTime; core_t::TTime lastWeek = startTime; @@ -246,7 +291,9 @@ void CTimeSeriesDecompositionTest::testDistortedPeriodic() { double maxValue = 0.0; double percentileError = 0.0; - for (core_t::TTime tt = lastWeek; tt < lastWeek + WEEK && static_cast(tt / HOUR) < boost::size(timeseries); + for (core_t::TTime tt = lastWeek; + tt < lastWeek + WEEK && + static_cast(tt / HOUR) < boost::size(timeseries); tt += HOUR) { TDoubleDoublePr prediction = decomposition.value(tt, 70.0); double residual = std::fabs(timeseries[tt / HOUR] - mean(prediction)); @@ -255,7 +302,9 @@ void CTimeSeriesDecompositionTest::testDistortedPeriodic() { sumValue += std::fabs(timeseries[tt / HOUR]); maxValue = std::max(maxValue, std::fabs(timeseries[tt / HOUR])); percentileError += - std::max(std::max(prediction.first - timeseries[tt / HOUR], timeseries[tt / HOUR] - prediction.second), 0.0); + std::max(std::max(prediction.first - timeseries[tt / HOUR], + timeseries[tt / HOUR] - prediction.second), + 0.0); //t.push_back(tt); //f.push_back(timeseries[tt / HOUR]); //fe.push_back(mean(value)); @@ -307,7 +356,9 @@ void CTimeSeriesDecompositionTest::testMinimizeLongComponents() { TDoubleVec trend; for (core_t::TTime time = 0; time < 100 * WEEK; time += HALF_HOUR) { double weight = weights[(time / DAY) % 7]; - double daily = 100.0 * std::sin(boost::math::double_constants::two_pi * static_cast(time) / static_cast(DAY)); + double daily = 100.0 * std::sin(boost::math::double_constants::two_pi * + static_cast(time) / + static_cast(DAY)); times.push_back(time); trend.push_back(weight * daily); } @@ -359,7 +410,9 @@ void CTimeSeriesDecompositionTest::testMinimizeLongComponents() { sumValue += std::fabs(trend[t / HALF_HOUR]); maxValue = std::max(maxValue, std::fabs(trend[t / HALF_HOUR])); percentileError += - std::max(std::max(prediction.first - trend[t / HALF_HOUR], trend[t / HALF_HOUR] - prediction.second), 0.0); + std::max(std::max(prediction.first - trend[t / HALF_HOUR], + trend[t / HALF_HOUR] - prediction.second), + 0.0); //f.push_back(mean(value)); //r.push_back(residual); } @@ -424,7 +477,9 @@ void CTimeSeriesDecompositionTest::testWeekend() { TDoubleVec trend; for (core_t::TTime time = 0; time < 100 * WEEK; time += HALF_HOUR) { double weight = weights[(time / DAY) % 7]; - double daily = 100.0 * std::sin(boost::math::double_constants::two_pi * static_cast(time) / static_cast(DAY)); + double daily = 100.0 * std::sin(boost::math::double_constants::two_pi * + static_cast(time) / + static_cast(DAY)); times.push_back(time); trend.push_back(weight * daily); } @@ -474,7 +529,9 @@ void CTimeSeriesDecompositionTest::testWeekend() { sumValue += std::fabs(trend[t / HALF_HOUR]); maxValue = std::max(maxValue, std::fabs(trend[t / HALF_HOUR])); percentileError += - std::max(std::max(prediction.first - trend[t / HALF_HOUR], trend[t / HALF_HOUR] - prediction.second), 0.0); + std::max(std::max(prediction.first - trend[t / HALF_HOUR], + trend[t / HALF_HOUR] - prediction.second), + 0.0); //f.push_back(mean(value)); //r.push_back(residual); } @@ -521,8 +578,9 @@ void CTimeSeriesDecompositionTest::testSinglePeriodicity() { TTimeVec times; TDoubleVec trend; for (core_t::TTime time = 0; time < 10 * WEEK + 1; time += HALF_HOUR) { - double daily = - 100.0 + 100.0 * std::sin(boost::math::double_constants::two_pi * static_cast(time) / static_cast(DAY)); + double daily = 100.0 + 100.0 * std::sin(boost::math::double_constants::two_pi * + static_cast(time) / + static_cast(DAY)); times.push_back(time); trend.push_back(daily); } @@ -568,13 +626,15 @@ void CTimeSeriesDecompositionTest::testSinglePeriodicity() { for (core_t::TTime t = lastWeek; t < lastWeek + WEEK; t += HALF_HOUR) { TDoubleDoublePr prediction = decomposition.value(t, 70.0); - double residual = std::fabs(trend[t / HALF_HOUR] + noiseMean - mean(prediction)); + double residual = + std::fabs(trend[t / HALF_HOUR] + noiseMean - mean(prediction)); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += std::fabs(trend[t / HALF_HOUR]); maxValue = std::max(maxValue, std::fabs(trend[t / HALF_HOUR])); percentileError += std::max( - std::max(prediction.first - (trend[t / HALF_HOUR] + noiseMean), (trend[t / HALF_HOUR] + noiseMean) - prediction.second), + std::max(prediction.first - (trend[t / HALF_HOUR] + noiseMean), + (trend[t / HALF_HOUR] + noiseMean) - prediction.second), 0.0); //f.push_back(mean(value)); //r.push_back(residual); @@ -630,8 +690,9 @@ void CTimeSeriesDecompositionTest::testSeasonalOnset() { LOG_DEBUG(<< "| CTimeSeriesDecompositionTest::testSeasonalOnset |"); LOG_DEBUG(<< "+---------------------------------------------------+"); - const double daily[] = {0.0, 0.0, 0.0, 0.0, 5.0, 5.0, 5.0, 40.0, 40.0, 40.0, 30.0, 30.0, - 35.0, 35.0, 40.0, 50.0, 60.0, 80.0, 80.0, 10.0, 5.0, 0.0, 0.0, 0.0}; + const double daily[] = {0.0, 0.0, 0.0, 0.0, 5.0, 5.0, 5.0, 40.0, + 40.0, 40.0, 30.0, 30.0, 35.0, 35.0, 40.0, 50.0, + 60.0, 80.0, 80.0, 10.0, 5.0, 0.0, 0.0, 0.0}; const double weekly[] = {0.1, 0.1, 1.2, 1.0, 1.0, 0.9, 1.5}; TTimeVec times; @@ -689,14 +750,20 @@ void CTimeSeriesDecompositionTest::testSeasonalOnset() { maxResidual = std::max(maxResidual, residual); sumValue += std::fabs(trend[t / HOUR]); maxValue = std::max(maxValue, std::fabs(trend[t / HOUR])); - percentileError += std::max(std::max(prediction.first - trend[t / HOUR], trend[t / HOUR] - prediction.second), 0.0); + percentileError += + std::max(std::max(prediction.first - trend[t / HOUR], + trend[t / HOUR] - prediction.second), + 0.0); //f.push_back(mean(value)); //r.push_back(residual); } - LOG_DEBUG(<< "'sum residual' / 'sum value' = " << (sumResidual == 0.0 ? 0.0 : sumResidual / sumValue)); - LOG_DEBUG(<< "'max residual' / 'max value' = " << (maxResidual == 0.0 ? 0.0 : maxResidual / maxValue)); - LOG_DEBUG(<< "70% error = " << (percentileError == 0.0 ? 0.0 : percentileError / sumValue)); + LOG_DEBUG(<< "'sum residual' / 'sum value' = " + << (sumResidual == 0.0 ? 0.0 : sumResidual / sumValue)); + LOG_DEBUG(<< "'max residual' / 'max value' = " + << (maxResidual == 0.0 ? 0.0 : maxResidual / maxValue)); + LOG_DEBUG(<< "70% error = " + << (percentileError == 0.0 ? 0.0 : percentileError / sumValue)); totalSumResidual += sumResidual; totalMaxResidual += maxResidual; @@ -782,7 +849,9 @@ void CTimeSeriesDecompositionTest::testVarianceScale() { double scale = (interval.first + interval.second) / 2.0; error.add(std::fabs(scale - expectedScale)); meanScale.add(scale); - percentileError.add(std::max(std::max(interval.first - expectedScale, expectedScale - interval.second), 0.0)); + percentileError.add(std::max(std::max(interval.first - expectedScale, + expectedScale - interval.second), + 0.0)); } LOG_DEBUG(<< "mean error = " << maths::CBasicStatistics::mean(error)); @@ -799,7 +868,9 @@ void CTimeSeriesDecompositionTest::testVarianceScale() { for (std::size_t i = 0u; i < 50; ++i) { for (core_t::TTime t = 0; t < DAY; t += TEN_MINS) { - double value = 5.0 * std::sin(boost::math::double_constants::two_pi * static_cast(t) / static_cast(DAY)); + double value = 5.0 * std::sin(boost::math::double_constants::two_pi * + static_cast(t) / + static_cast(DAY)); double variance = 1.0; if (t >= 3600 && t < 7200) { variance = 10.0; @@ -828,7 +899,9 @@ void CTimeSeriesDecompositionTest::testVarianceScale() { double scale = (interval.first + interval.second) / 2.0; error.add(std::fabs(scale - expectedScale)); meanScale.add(scale); - percentileError.add(std::max(std::max(interval.first - expectedScale, expectedScale - interval.second), 0.0)); + percentileError.add(std::max(std::max(interval.first - expectedScale, + expectedScale - interval.second), + 0.0)); } LOG_DEBUG(<< "mean error = " << maths::CBasicStatistics::mean(error)); @@ -848,9 +921,11 @@ void CTimeSeriesDecompositionTest::testVarianceScale() { times.push_back(time); double x = static_cast(time); trend.push_back(150.0 + - 100.0 * std::sin(boost::math::double_constants::two_pi * x / static_cast(240 * DAY) / + 100.0 * std::sin(boost::math::double_constants::two_pi * + x / static_cast(240 * DAY) / (1.0 - x / static_cast(2 * length))) + - 10.0 * std::sin(boost::math::double_constants::two_pi * x / static_cast(DAY))); + 10.0 * std::sin(boost::math::double_constants::two_pi * + x / static_cast(DAY))); } TDoubleVec noise; @@ -864,8 +939,10 @@ void CTimeSeriesDecompositionTest::testVarianceScale() { TMeanAccumulator meanScale; double meanVariance = decomposition.meanVariance(); for (core_t::TTime t = 0; t < DAY; t += TEN_MINS) { - TDoubleDoublePr interval = decomposition.scale(times.back() + t, meanVariance, 70.0); - LOG_DEBUG(<< "time = " << t << ", scale = " << core::CContainerPrinter::print(interval)); + TDoubleDoublePr interval = + decomposition.scale(times.back() + t, meanVariance, 70.0); + LOG_DEBUG(<< "time = " << t + << ", scale = " << core::CContainerPrinter::print(interval)); double scale = (interval.first + interval.second) / 2.0; meanScale.add(scale); } @@ -883,10 +960,14 @@ void CTimeSeriesDecompositionTest::testSpikeyDataProblemCase() { TTimeDoublePrVec timeseries; core_t::TTime startTime; core_t::TTime endTime; - CPPUNIT_ASSERT(test::CTimeSeriesTestData::parse("testfiles/spikey_data.csv", timeseries, startTime, endTime, "^([0-9]+),([0-9\\.]+)")); + CPPUNIT_ASSERT(test::CTimeSeriesTestData::parse("testfiles/spikey_data.csv", + timeseries, startTime, endTime, + "^([0-9]+),([0-9\\.]+)")); CPPUNIT_ASSERT(!timeseries.empty()); - LOG_DEBUG(<< "timeseries = " << core::CContainerPrinter::print(timeseries.begin(), timeseries.begin() + 10) << " ..."); + LOG_DEBUG(<< "timeseries = " + << core::CContainerPrinter::print(timeseries.begin(), timeseries.begin() + 10) + << " ..."); double totalSumResidual = 0.0; double totalMaxResidual = 0.0; @@ -895,7 +976,8 @@ void CTimeSeriesDecompositionTest::testSpikeyDataProblemCase() { double totalPercentileError = 0.0; maths::CTimeSeriesDecomposition decomposition(0.01, FIVE_MINS); - maths::CNormalMeanPrecConjugate model = maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.01); + maths::CNormalMeanPrecConjugate model = maths::CNormalMeanPrecConjugate::nonInformativePrior( + maths_t::E_ContinuousData, 0.01); core_t::TTime lastWeek = (startTime / WEEK + 1) * WEEK; TTimeDoublePrVec lastWeekTimeseries; @@ -913,18 +995,23 @@ void CTimeSeriesDecompositionTest::testSpikeyDataProblemCase() { double percentileError = 0.0; for (std::size_t j = 0u; j < lastWeekTimeseries.size(); ++j) { - TDoubleDoublePr prediction = decomposition.value(lastWeekTimeseries[j].first, 70.0); + TDoubleDoublePr prediction = + decomposition.value(lastWeekTimeseries[j].first, 70.0); double residual = std::fabs(lastWeekTimeseries[j].second - mean(prediction)); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += std::fabs(lastWeekTimeseries[j].second); maxValue = std::max(maxValue, std::fabs(lastWeekTimeseries[j].second)); percentileError += std::max( - std::max(prediction.first - lastWeekTimeseries[j].second, lastWeekTimeseries[j].second - prediction.second), 0.0); + std::max(prediction.first - lastWeekTimeseries[j].second, + lastWeekTimeseries[j].second - prediction.second), + 0.0); } - LOG_DEBUG(<< "'sum residual' / 'sum value' = " << (sumResidual == 0.0 ? 0.0 : sumResidual / sumValue)); - LOG_DEBUG(<< "'max residual' / 'max value' = " << (maxResidual == 0.0 ? 0.0 : maxResidual / maxValue)); + LOG_DEBUG(<< "'sum residual' / 'sum value' = " + << (sumResidual == 0.0 ? 0.0 : sumResidual / sumValue)); + LOG_DEBUG(<< "'max residual' / 'max value' = " + << (maxResidual == 0.0 ? 0.0 : maxResidual / maxValue)); LOG_DEBUG(<< "70% error = " << percentileError / sumValue); if (time >= startTime + WEEK) { @@ -975,13 +1062,13 @@ void CTimeSeriesDecompositionTest::testSpikeyDataProblemCase() { double lb, ub; maths_t::ETail tail; - model.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - maths_t::TWeightStyleVec{maths_t::E_SampleSeasonalVarianceScaleWeight}, - TDoubleVec{decomposition.detrend(time, value, 70.0)}, - TDoubleVecVec{TDoubleVec{std::max(decomposition.scale(time, variance, 70.0).second, 0.25)}}, - lb, - ub, - tail); + model.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, + maths_t::TWeightStyleVec{maths_t::E_SampleSeasonalVarianceScaleWeight}, + TDoubleVec{decomposition.detrend(time, value, 70.0)}, + TDoubleVecVec{TDoubleVec{ + std::max(decomposition.scale(time, variance, 70.0).second, 0.25)}}, + lb, ub, tail); double pScaled = (lb + ub) / 2.0; pMinScaled = std::min(pMinScaled, pScaled); @@ -991,13 +1078,11 @@ void CTimeSeriesDecompositionTest::testSpikeyDataProblemCase() { //scales.push_back(mean(decomposition.scale(time, variance, 70.0))); //probs.push_back(-std::log(pScaled)); - model.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - maths_t::TWeightStyleVec(1, maths_t::E_SampleSeasonalVarianceScaleWeight), - TDoubleVec(1, decomposition.detrend(time, value, 70.0)), - TDoubleVecVec(1, TDoubleVec(1, 1.0)), - lb, - ub, - tail); + model.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, + maths_t::TWeightStyleVec(1, maths_t::E_SampleSeasonalVarianceScaleWeight), + TDoubleVec(1, decomposition.detrend(time, value, 70.0)), + TDoubleVecVec(1, TDoubleVec(1, 1.0)), lb, ub, tail); double pUnscaled = (lb + ub) / 2.0; pMinUnscaled = std::min(pMinUnscaled, pUnscaled); } @@ -1025,10 +1110,13 @@ void CTimeSeriesDecompositionTest::testDiurnalProblemCase() { TTimeDoublePrVec timeseries; core_t::TTime startTime; core_t::TTime endTime; - CPPUNIT_ASSERT(test::CTimeSeriesTestData::parse("testfiles/diurnal.csv", timeseries, startTime, endTime, "^([0-9]+),([0-9\\.]+)")); + CPPUNIT_ASSERT(test::CTimeSeriesTestData::parse( + "testfiles/diurnal.csv", timeseries, startTime, endTime, "^([0-9]+),([0-9\\.]+)")); CPPUNIT_ASSERT(!timeseries.empty()); - LOG_DEBUG(<< "timeseries = " << core::CContainerPrinter::print(timeseries.begin(), timeseries.begin() + 10) << " ..."); + LOG_DEBUG(<< "timeseries = " + << core::CContainerPrinter::print(timeseries.begin(), timeseries.begin() + 10) + << " ..."); //std::ofstream file; //file.open("results.m"); @@ -1061,14 +1149,17 @@ void CTimeSeriesDecompositionTest::testDiurnalProblemCase() { double percentileError = 0.0; for (std::size_t j = 0u; j < lastWeekTimeseries.size(); ++j) { - TDoubleDoublePr prediction = decomposition.value(lastWeekTimeseries[j].first, 70.0); + TDoubleDoublePr prediction = + decomposition.value(lastWeekTimeseries[j].first, 70.0); double residual = std::fabs(lastWeekTimeseries[j].second - mean(prediction)); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += std::fabs(lastWeekTimeseries[j].second); maxValue = std::max(maxValue, std::fabs(lastWeekTimeseries[j].second)); percentileError += std::max( - std::max(prediction.first - lastWeekTimeseries[j].second, lastWeekTimeseries[j].second - prediction.second), 0.0); + std::max(prediction.first - lastWeekTimeseries[j].second, + lastWeekTimeseries[j].second - prediction.second), + 0.0); //times.push_back(lastWeekTimeseries[j].first); //values.push_back(lastWeekTimeseries[j].second); //f.push_back(mean(value)); @@ -1133,15 +1224,15 @@ void CTimeSeriesDecompositionTest::testComplexDiurnalProblemCase() { TTimeDoublePrVec timeseries; core_t::TTime startTime; core_t::TTime endTime; - CPPUNIT_ASSERT(test::CTimeSeriesTestData::parse("testfiles/thirty_minute_samples.csv", - timeseries, - startTime, - endTime, - test::CTimeSeriesTestData::CSV_ISO8601_REGEX, - test::CTimeSeriesTestData::CSV_ISO8601_DATE_FORMAT)); + CPPUNIT_ASSERT(test::CTimeSeriesTestData::parse( + "testfiles/thirty_minute_samples.csv", timeseries, startTime, endTime, + test::CTimeSeriesTestData::CSV_ISO8601_REGEX, + test::CTimeSeriesTestData::CSV_ISO8601_DATE_FORMAT)); CPPUNIT_ASSERT(!timeseries.empty()); - LOG_DEBUG(<< "timeseries = " << core::CContainerPrinter::print(timeseries.begin(), timeseries.begin() + 10) << " ..."); + LOG_DEBUG(<< "timeseries = " + << core::CContainerPrinter::print(timeseries.begin(), timeseries.begin() + 10) + << " ..."); //std::ofstream file; //file.open("results.m"); @@ -1174,22 +1265,27 @@ void CTimeSeriesDecompositionTest::testComplexDiurnalProblemCase() { double percentileError = 0.0; for (std::size_t j = 0u; j < lastWeekTimeseries.size(); ++j) { - TDoubleDoublePr prediction = decomposition.value(lastWeekTimeseries[j].first, 70.0); + TDoubleDoublePr prediction = + decomposition.value(lastWeekTimeseries[j].first, 70.0); double residual = std::fabs(lastWeekTimeseries[j].second - mean(prediction)); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += std::fabs(lastWeekTimeseries[j].second); maxValue = std::max(maxValue, std::fabs(lastWeekTimeseries[j].second)); percentileError += std::max( - std::max(prediction.first - lastWeekTimeseries[j].second, lastWeekTimeseries[j].second - prediction.second), 0.0); + std::max(prediction.first - lastWeekTimeseries[j].second, + lastWeekTimeseries[j].second - prediction.second), + 0.0); //times.push_back(lastWeekTimeseries[j].first); //values.push_back(lastWeekTimeseries[j].second); //f.push_back(mean(value)); //r.push_back(residual); } - LOG_DEBUG(<< "'sum residual' / 'sum value' = " << (sumResidual == 0.0 ? 0.0 : sumResidual / sumValue)); - LOG_DEBUG(<< "'max residual' / 'max value' = " << (maxResidual == 0.0 ? 0.0 : maxResidual / maxValue)); + LOG_DEBUG(<< "'sum residual' / 'sum value' = " + << (sumResidual == 0.0 ? 0.0 : sumResidual / sumValue)); + LOG_DEBUG(<< "'max residual' / 'max value' = " + << (maxResidual == 0.0 ? 0.0 : maxResidual / maxValue)); LOG_DEBUG(<< "70% error = " << percentileError / sumValue); if (time >= startTime + 2 * WEEK) { @@ -1246,15 +1342,20 @@ void CTimeSeriesDecompositionTest::testDiurnalPeriodicityWithMissingValues() { maths::CTimeSeriesDecomposition decomposition(0.01, HALF_HOUR); core_t::TTime time = 0; for (std::size_t t = 0u; t < 50; ++t) { - for (auto value : {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 20.0, 18.0, 10.0, 4.0, - 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, 10.0, 10.0, 8.0, 4.0, 3.0, 1.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0}) { + for (auto value : + {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, + 6.0, 8.0, 9.0, 9.0, 10.0, 10.0, 8.0, 4.0, 3.0, 1.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0}) { if (value > 0.0) { TDoubleVec noise; rng.generateNormalSamples(10.0, 2.0, 1, noise); decomposition.addPoint(time, value + noise[0]); if (decomposition.initialized()) { - error.add(std::fabs((value + noise[0] - maths::CBasicStatistics::mean(decomposition.value(time, 0.0)))) / + error.add(std::fabs((value + noise[0] - + maths::CBasicStatistics::mean( + decomposition.value(time, 0.0)))) / std::fabs(value + noise[0])); } //times.push_back(time); @@ -1288,20 +1389,31 @@ void CTimeSeriesDecompositionTest::testDiurnalPeriodicityWithMissingValues() { core_t::TTime time = 0; for (std::size_t t = 0u; t < 10; ++t) { for (auto value : - {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 10.0, 10.0, 8.0, 4.0, 3.0, 1.0, 1.0, 3.0, 0.0, - 0.0, 0.0, 0.0, 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 20.0, 18.0, 10.0, - 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, - 8.0, 9.0, 9.0, 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, 10.0, 10.0, 8.0, 4.0, 3.0, 1.0, - 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, 10.0, 10.0, 8.0, - 4.0, 3.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}) { + {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 10.0, 10.0, 8.0, 4.0, 3.0, 1.0, 1.0, 3.0, + 0.0, 0.0, 0.0, 0.0, 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, + 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, + 9.0, 9.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 20.0, 18.0, 10.0, 4.0, + 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, 20.0, 18.0, + 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, + 10.0, 10.0, 8.0, 4.0, 3.0, 1.0, 1.0, 3.0, 0.0, 0.0, + 0.0, 0.0, 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, + 6.0, 8.0, 9.0, 9.0, 10.0, 10.0, 8.0, 4.0, 3.0, 1.0, + 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}) { if (value > 0.0) { TDoubleVec noise; rng.generateNormalSamples(10.0, 2.0, 1, noise); decomposition.addPoint(time, value + noise[0]); if (decomposition.initialized()) { - error.add(std::fabs((value + noise[0] - maths::CBasicStatistics::mean(decomposition.value(time, 0.0)))) / + error.add(std::fabs((value + noise[0] - + maths::CBasicStatistics::mean( + decomposition.value(time, 0.0)))) / std::fabs(value + noise[0])); } //times.push_back(time); @@ -1378,8 +1490,10 @@ void CTimeSeriesDecompositionTest::testLongTermTrend() { maxValue = std::max(maxValue, std::fabs(trend[j])); } - LOG_DEBUG(<< "'sum residual' / 'sum value' = " << (sumResidual == 0.0 ? 0.0 : sumResidual / sumValue)); - LOG_DEBUG(<< "'max residual' / 'max value' = " << (maxResidual == 0.0 ? 0.0 : maxResidual / maxValue)); + LOG_DEBUG(<< "'sum residual' / 'sum value' = " + << (sumResidual == 0.0 ? 0.0 : sumResidual / sumValue)); + LOG_DEBUG(<< "'max residual' / 'max value' = " + << (maxResidual == 0.0 ? 0.0 : maxResidual / maxValue)); totalSumResidual += sumResidual; totalMaxResidual += maxResidual; @@ -1411,16 +1525,19 @@ void CTimeSeriesDecompositionTest::testLongTermTrend() { LOG_DEBUG(<< "Saw Tooth Not Periodic"); { - core_t::TTime drops[] = {0, 30 * DAY, 50 * DAY, 60 * DAY, 85 * DAY, 100 * DAY, 115 * DAY, 120 * DAY}; + core_t::TTime drops[] = {0, 30 * DAY, 50 * DAY, 60 * DAY, + 85 * DAY, 100 * DAY, 115 * DAY, 120 * DAY}; times.clear(); trend.clear(); { std::size_t i = 0u; - for (core_t::TTime time = 0; time < length; time += HALF_HOUR, (time > drops[i] ? ++i : i)) { + for (core_t::TTime time = 0; time < length; + time += HALF_HOUR, (time > drops[i] ? ++i : i)) { times.push_back(time); - trend.push_back(25.0 * static_cast(time - drops[i - 1]) / static_cast(drops[i] - drops[i - 1] + 1)); + trend.push_back(25.0 * static_cast(time - drops[i - 1]) / + static_cast(drops[i] - drops[i - 1] + 1)); } } @@ -1453,8 +1570,10 @@ void CTimeSeriesDecompositionTest::testLongTermTrend() { maxValue = std::max(maxValue, std::fabs(trend[j])); } - LOG_DEBUG(<< "'sum residual' / 'sum value' = " << (sumResidual == 0.0 ? 0.0 : sumResidual / sumValue)); - LOG_DEBUG(<< "'max residual' / 'max value' = " << (maxResidual == 0.0 ? 0.0 : maxResidual / maxValue)); + LOG_DEBUG(<< "'sum residual' / 'sum value' = " + << (sumResidual == 0.0 ? 0.0 : sumResidual / sumValue)); + LOG_DEBUG(<< "'max residual' / 'max value' = " + << (maxResidual == 0.0 ? 0.0 : maxResidual / maxValue)); totalSumResidual += sumResidual; totalMaxResidual += maxResidual; @@ -1496,9 +1615,11 @@ void CTimeSeriesDecompositionTest::testLongTermTrendAndPeriodicity() { times.push_back(time); double x = static_cast(time); trend.push_back(150.0 + - 100.0 * std::sin(boost::math::double_constants::two_pi * x / static_cast(240 * DAY) / + 100.0 * std::sin(boost::math::double_constants::two_pi * + x / static_cast(240 * DAY) / (1.0 - x / static_cast(2 * length))) + - 10.0 * std::sin(boost::math::double_constants::two_pi * x / static_cast(DAY))); + 10.0 * std::sin(boost::math::double_constants::two_pi * + x / static_cast(DAY))); } test::CRandomNumbers rng; @@ -1539,8 +1660,10 @@ void CTimeSeriesDecompositionTest::testLongTermTrendAndPeriodicity() { maxValue = std::max(maxValue, std::fabs(trend[j])); } - LOG_DEBUG(<< "'sum residual' / 'sum value' = " << (sumResidual == 0.0 ? 0.0 : sumResidual / sumValue)); - LOG_DEBUG(<< "'max residual' / 'max value' = " << (maxResidual == 0.0 ? 0.0 : maxResidual / maxValue)); + LOG_DEBUG(<< "'sum residual' / 'sum value' = " + << (sumResidual == 0.0 ? 0.0 : sumResidual / sumValue)); + LOG_DEBUG(<< "'max residual' / 'max value' = " + << (maxResidual == 0.0 ? 0.0 : maxResidual / maxValue)); totalSumResidual += sumResidual; totalMaxResidual += maxResidual; @@ -1579,7 +1702,8 @@ void CTimeSeriesDecompositionTest::testNonDiurnal() { LOG_DEBUG(<< "Hourly") { const core_t::TTime length = 21 * DAY; - double periodic[]{10.0, 1.0, 0.5, 0.5, 1.0, 5.0, 2.0, 1.0, 0.5, 0.5, 1.0, 3.0}; + double periodic[]{10.0, 1.0, 0.5, 0.5, 1.0, 5.0, + 2.0, 1.0, 0.5, 0.5, 1.0, 3.0}; TTimeVec times; TDoubleVec trends[2]{TDoubleVec(), TDoubleVec(8 * DAY / FIVE_MINS)}; @@ -1622,7 +1746,8 @@ void CTimeSeriesDecompositionTest::testNonDiurnal() { double maxValue = 0.0; for (std::size_t j = i - 12; j < i; ++j) { - TDoubleDoublePr prediction = decomposition.value(times[j], 70.0); + TDoubleDoublePr prediction = + decomposition.value(times[j], 70.0); double residual = std::fabs(trends[t][j] - mean(prediction)); sumResidual += residual; maxResidual = std::max(maxResidual, residual); @@ -1630,8 +1755,10 @@ void CTimeSeriesDecompositionTest::testNonDiurnal() { maxValue = std::max(maxValue, std::fabs(trends[t][j])); } - LOG_DEBUG(<< "'sum residual' / 'sum value' = " << (sumResidual == 0.0 ? 0.0 : sumResidual / sumValue)); - LOG_DEBUG(<< "'max residual' / 'max value' = " << (maxResidual == 0.0 ? 0.0 : maxResidual / maxValue)); + LOG_DEBUG(<< "'sum residual' / 'sum value' = " + << (sumResidual == 0.0 ? 0.0 : sumResidual / sumValue)); + LOG_DEBUG(<< "'max residual' / 'max value' = " + << (maxResidual == 0.0 ? 0.0 : maxResidual / maxValue)); totalSumResidual += sumResidual; totalMaxResidual += maxResidual; @@ -1647,8 +1774,10 @@ void CTimeSeriesDecompositionTest::testNonDiurnal() { //f.push_back(maths::CBasicStatistics::mean(decomposition.value(times[i]))); } - LOG_DEBUG(<< "total 'sum residual' / 'sum value' = " << totalSumResidual / totalSumValue); - LOG_DEBUG(<< "total 'max residual' / 'max value' = " << totalMaxResidual / totalMaxValue); + LOG_DEBUG(<< "total 'sum residual' / 'sum value' = " + << totalSumResidual / totalSumValue); + LOG_DEBUG(<< "total 'max residual' / 'max value' = " + << totalMaxResidual / totalMaxValue); //file << "t = " << core::CContainerPrinter::print(times) << ";\n"; //file << "f = " << core::CContainerPrinter::print(values) << ";\n"; @@ -1665,7 +1794,8 @@ void CTimeSeriesDecompositionTest::testNonDiurnal() { { const core_t::TTime length = 20 * DAY; - double periodic[] = {10.0, 8.0, 5.5, 2.5, 2.0, 5.0, 2.0, 1.0, 1.5, 3.5, 4.0, 7.0}; + double periodic[] = {10.0, 8.0, 5.5, 2.5, 2.0, 5.0, + 2.0, 1.0, 1.5, 3.5, 4.0, 7.0}; TTimeVec times; TDoubleVec trend; @@ -1712,8 +1842,10 @@ void CTimeSeriesDecompositionTest::testNonDiurnal() { maxValue = std::max(maxValue, std::fabs(trend[j])); } - LOG_DEBUG(<< "'sum residual' / 'sum value' = " << (sumResidual == 0.0 ? 0.0 : sumResidual / sumValue)); - LOG_DEBUG(<< "'max residual' / 'max value' = " << (maxResidual == 0.0 ? 0.0 : maxResidual / maxValue)); + LOG_DEBUG(<< "'sum residual' / 'sum value' = " + << (sumResidual == 0.0 ? 0.0 : sumResidual / sumValue)); + LOG_DEBUG(<< "'max residual' / 'max value' = " + << (maxResidual == 0.0 ? 0.0 : maxResidual / maxValue)); totalSumResidual += sumResidual; totalMaxResidual += maxResidual; @@ -1753,20 +1885,24 @@ void CTimeSeriesDecompositionTest::testYearly() { test::CRandomNumbers rng; maths::CTimeSeriesDecomposition decomposition(0.012, 4 * HOUR); - maths::CDecayRateController controller( - maths::CDecayRateController::E_PredictionBias | maths::CDecayRateController::E_PredictionErrorIncrease, 1); + maths::CDecayRateController controller(maths::CDecayRateController::E_PredictionBias | + maths::CDecayRateController::E_PredictionErrorIncrease, + 1); TDoubleVec noise; core_t::TTime time = 0; for (/**/; time < 4 * YEAR; time += 4 * HOUR) { double trend = - 15.0 * (2.0 + std::sin(boost::math::double_constants::two_pi * static_cast(time) / static_cast(YEAR))) + - 7.5 * std::sin(boost::math::double_constants::two_pi * static_cast(time) / static_cast(DAY)); + 15.0 * (2.0 + std::sin(boost::math::double_constants::two_pi * + static_cast(time) / static_cast(YEAR))) + + 7.5 * std::sin(boost::math::double_constants::two_pi * + static_cast(time) / static_cast(DAY)); rng.generateNormalSamples(0.0, 1.0, 1, noise); decomposition.addPoint(time, trend + noise[0]); if (decomposition.initialized()) { TDouble1Vec prediction{decomposition.meanValue(time)}; TDouble1Vec predictionError{decomposition.detrend(time, trend, 0.0)}; - double multiplier{controller.multiplier(prediction, {predictionError}, 4 * HOUR, 1.0, 0.0005)}; + double multiplier{controller.multiplier(prediction, {predictionError}, + 4 * HOUR, 1.0, 0.0005)}; decomposition.decayRate(multiplier * decomposition.decayRate()); } } @@ -1781,8 +1917,10 @@ void CTimeSeriesDecompositionTest::testYearly() { TMeanAccumulator meanError; for (/**/; time < 5 * YEAR; time += 4 * HOUR) { double trend = - 15.0 * (2.0 + std::sin(boost::math::double_constants::two_pi * static_cast(time) / static_cast(YEAR))) + - 7.5 * std::sin(boost::math::double_constants::two_pi * static_cast(time) / static_cast(DAY)); + 15.0 * (2.0 + std::sin(boost::math::double_constants::two_pi * + static_cast(time) / static_cast(YEAR))) + + 7.5 * std::sin(boost::math::double_constants::two_pi * + static_cast(time) / static_cast(DAY)); double prediction = maths::CBasicStatistics::mean(decomposition.value(time, 0.0)); double error = std::fabs((prediction - trend) / trend); meanError.add(error); @@ -1825,9 +1963,12 @@ void CTimeSeriesDecompositionTest::testCalendar() { TDoubleVec errors{5.0, 15.0, 35.0, 32.0, 25.0, 36.0, 22.0, 12.0, 3.0}; auto trend = [&months, &errors](core_t::TTime t) { - double result = 20.0 + 10.0 * std::sin(boost::math::double_constants::two_pi * static_cast(t) / static_cast(DAY)); + double result = 20.0 + 10.0 * std::sin(boost::math::double_constants::two_pi * + static_cast(t) / + static_cast(DAY)); auto i = std::lower_bound(months.begin(), months.end(), t - DAY); - if (t >= *i + 7200 && t < *i + 7200 + static_cast(errors.size()) * HALF_HOUR) { + if (t >= *i + 7200 && + t < *i + 7200 + static_cast(errors.size()) * HALF_HOUR) { result += errors[(t - (*i + 7200)) / HALF_HOUR]; } return result; @@ -1855,8 +1996,10 @@ void CTimeSeriesDecompositionTest::testCalendar() { std::size_t largeErrorCount = 0u; for (core_t::TTime time_ = time - DAY; time_ < time; time_ += TEN_MINS) { - double prediction = maths::CBasicStatistics::mean(decomposition.value(time_)); - double variance = 4.0 * maths::CBasicStatistics::mean(decomposition.scale(time_, 4.0, 0.0)); + double prediction = + maths::CBasicStatistics::mean(decomposition.value(time_)); + double variance = 4.0 * maths::CBasicStatistics::mean( + decomposition.scale(time_, 4.0, 0.0)); double actual = trend(time_); if (std::fabs(prediction - actual) / std::sqrt(variance) > 3.0) { LOG_DEBUG(<< " prediction = " << prediction); @@ -1888,7 +2031,9 @@ void CTimeSeriesDecompositionTest::testConditionOfTrend() { LOG_DEBUG(<< "| CTimeSeriesDecompositionTest::testConditionOfTrend |"); LOG_DEBUG(<< "+------------------------------------------------------+"); - auto trend = [](core_t::TTime time) { return std::pow(static_cast(time) / static_cast(WEEK), 2.0); }; + auto trend = [](core_t::TTime time) { + return std::pow(static_cast(time) / static_cast(WEEK), 2.0); + }; const core_t::TTime bucketLength = 6 * HOUR; @@ -1917,7 +2062,9 @@ void CTimeSeriesDecompositionTest::testSwap() { TDoubleVec trend1; TDoubleVec trend2; for (core_t::TTime time = 0; time < 10 * WEEK + 1; time += HALF_HOUR) { - double daily = 15.0 + 10.0 * std::sin(boost::math::double_constants::two_pi * static_cast(time) / static_cast(DAY)); + double daily = 15.0 + 10.0 * std::sin(boost::math::double_constants::two_pi * + static_cast(time) / + static_cast(DAY)); times.push_back(time); trend1.push_back(daily); trend2.push_back(2.0 * daily); @@ -1958,7 +2105,9 @@ void CTimeSeriesDecompositionTest::testPersist() { TTimeVec times; TDoubleVec trend; for (core_t::TTime time = 0; time < 10 * WEEK + 1; time += HALF_HOUR) { - double daily = 15.0 + 10.0 * std::sin(boost::math::double_constants::two_pi * static_cast(time) / static_cast(DAY)); + double daily = 15.0 + 10.0 * std::sin(boost::math::double_constants::two_pi * + static_cast(time) / + static_cast(DAY)); times.push_back(time); trend.push_back(daily); } @@ -1987,7 +2136,8 @@ void CTimeSeriesDecompositionTest::testPersist() { CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); maths::STimeSeriesDecompositionRestoreParams params{ - decayRate + 0.1, bucketLength, maths::SDistributionRestoreParams{maths_t::E_ContinuousData, decayRate + 0.1}}; + decayRate + 0.1, bucketLength, + maths::SDistributionRestoreParams{maths_t::E_ContinuousData, decayRate + 0.1}}; maths::CTimeSeriesDecomposition restoredDecomposition(params, traverser); @@ -2025,7 +2175,8 @@ void CTimeSeriesDecompositionTest::testUpgrade() { return TDoubleDoublePr{first, second}; }; - maths::STimeSeriesDecompositionRestoreParams params{0.1, HALF_HOUR, maths::SDistributionRestoreParams{maths_t::E_ContinuousData, 0.1}}; + maths::STimeSeriesDecompositionRestoreParams params{ + 0.1, HALF_HOUR, maths::SDistributionRestoreParams{maths_t::E_ContinuousData, 0.1}}; std::string empty; LOG_DEBUG(<< "*** Seasonal and Calendar Components ***"); @@ -2066,15 +2217,21 @@ void CTimeSeriesDecompositionTest::testUpgrade() { CPPUNIT_ASSERT_DOUBLES_EQUAL(5994.36, meanValue, 0.005); CPPUNIT_ASSERT_DOUBLES_EQUAL(286374.0, meanVariance, 0.5); - for (core_t::TTime time = 60480000, i = 0; i < static_cast(expectedValues.size()); time += HALF_HOUR, ++i) { + for (core_t::TTime time = 60480000, i = 0; + i < static_cast(expectedValues.size()); + time += HALF_HOUR, ++i) { TDoubleDoublePr expectedValue{stringToPair(expectedValues[i])}; TDoubleDoublePr expectedScale{stringToPair(expectedScales[i])}; TDoubleDoublePr value{decomposition.value(time, 10.0)}; TDoubleDoublePr scale{decomposition.scale(time, 286374.0, 10.0)}; - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedValue.first, value.first, 0.005 * std::fabs(expectedValue.first)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedValue.second, value.second, 0.005 * std::fabs(expectedValue.second)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedScale.first, scale.first, 0.005 * expectedScale.first); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedScale.second, scale.second, 0.005 * std::max(expectedScale.second, 0.4)); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedValue.first, value.first, + 0.005 * std::fabs(expectedValue.first)); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedValue.second, value.second, + 0.005 * std::fabs(expectedValue.second)); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedScale.first, scale.first, + 0.005 * expectedScale.first); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedScale.second, scale.second, + 0.005 * std::max(expectedScale.second, 0.4)); } } @@ -2085,13 +2242,15 @@ void CTimeSeriesDecompositionTest::testUpgrade() { LOG_DEBUG(<< "Saved state size = " << xml.size()); std::string values; - load("testfiles/CTimeSeriesDecomposition.6.2.trend_and_seasonal.expected_values.txt", values); + load("testfiles/CTimeSeriesDecomposition.6.2.trend_and_seasonal.expected_values.txt", + values); LOG_DEBUG(<< "Expected values size = " << values.size()); TStrVec expectedValues; core::CStringUtils::tokenise(";", values, expectedValues, empty); std::string scales; - load("testfiles/CTimeSeriesDecomposition.6.2.trend_and_seasonal.expected_scales.txt", scales); + load("testfiles/CTimeSeriesDecomposition.6.2.trend_and_seasonal.expected_scales.txt", + scales); LOG_DEBUG(<< "Expected scales size = " << scales.size()); TStrVec expectedScales; core::CStringUtils::tokenise(";", scales, expectedScales, empty); @@ -2120,19 +2279,29 @@ void CTimeSeriesDecompositionTest::testUpgrade() { TMeanAccumulator meanValueError; TMeanAccumulator meanScaleError; - for (core_t::TTime time = 10366200, i = 0; i < static_cast(expectedValues.size()); time += HALF_HOUR, ++i) { + for (core_t::TTime time = 10366200, i = 0; + i < static_cast(expectedValues.size()); + time += HALF_HOUR, ++i) { TDoubleDoublePr expectedValue{stringToPair(expectedValues[i])}; TDoubleDoublePr expectedScale{stringToPair(expectedScales[i])}; TDoubleDoublePr value{decomposition.value(time, 10.0)}; TDoubleDoublePr scale{decomposition.scale(time, 96.1654, 10.0)}; - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedValue.first, value.first, 0.1 * std::fabs(expectedValue.first)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedValue.second, value.second, 0.1 * std::fabs(expectedValue.second)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedScale.first, scale.first, 0.3 * expectedScale.first); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedScale.second, scale.second, 0.3 * expectedScale.second); - meanValueError.add(std::fabs(expectedValue.first - value.first) / std::fabs(expectedValue.first)); - meanValueError.add(std::fabs(expectedValue.second - value.second) / std::fabs(expectedValue.second)); - meanScaleError.add(std::fabs(expectedScale.first - scale.first) / expectedScale.first); - meanScaleError.add(std::fabs(expectedScale.second - scale.second) / expectedScale.second); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedValue.first, value.first, + 0.1 * std::fabs(expectedValue.first)); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedValue.second, value.second, + 0.1 * std::fabs(expectedValue.second)); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedScale.first, scale.first, + 0.3 * expectedScale.first); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedScale.second, scale.second, + 0.3 * expectedScale.second); + meanValueError.add(std::fabs(expectedValue.first - value.first) / + std::fabs(expectedValue.first)); + meanValueError.add(std::fabs(expectedValue.second - value.second) / + std::fabs(expectedValue.second)); + meanScaleError.add(std::fabs(expectedScale.first - scale.first) / + expectedScale.first); + meanScaleError.add(std::fabs(expectedScale.second - scale.second) / + expectedScale.second); } LOG_DEBUG(<< "Mean value error = " << maths::CBasicStatistics::mean(meanValueError)); @@ -2145,47 +2314,61 @@ void CTimeSeriesDecompositionTest::testUpgrade() { CppUnit::Test* CTimeSeriesDecompositionTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CTimeSeriesDecompositionTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesDecompositionTest::testSuperpositionOfSines", - &CTimeSeriesDecompositionTest::testSuperpositionOfSines)); - suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesDecompositionTest::testDistortedPeriodic", - &CTimeSeriesDecompositionTest::testDistortedPeriodic)); - suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesDecompositionTest::testMinimizeLongComponents", - &CTimeSeriesDecompositionTest::testMinimizeLongComponents)); - suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesDecompositionTest::testWeekend", - &CTimeSeriesDecompositionTest::testWeekend)); - suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesDecompositionTest::testSinglePeriodicity", - &CTimeSeriesDecompositionTest::testSinglePeriodicity)); - suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesDecompositionTest::testSeasonalOnset", - &CTimeSeriesDecompositionTest::testSeasonalOnset)); - suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesDecompositionTest::testVarianceScale", - &CTimeSeriesDecompositionTest::testVarianceScale)); - suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesDecompositionTest::testSpikeyDataProblemCase", - &CTimeSeriesDecompositionTest::testSpikeyDataProblemCase)); - suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesDecompositionTest::testDiurnalProblemCase", - &CTimeSeriesDecompositionTest::testDiurnalProblemCase)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CTimeSeriesDecompositionTest::testComplexDiurnalProblemCase", &CTimeSeriesDecompositionTest::testComplexDiurnalProblemCase)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CTimeSeriesDecompositionTest::testDiurnalPeriodicityWithMissingValues", - &CTimeSeriesDecompositionTest::testDiurnalPeriodicityWithMissingValues)); - suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesDecompositionTest::testLongTermTrend", - &CTimeSeriesDecompositionTest::testLongTermTrend)); + "CTimeSeriesDecompositionTest::testSuperpositionOfSines", + &CTimeSeriesDecompositionTest::testSuperpositionOfSines)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTimeSeriesDecompositionTest::testDistortedPeriodic", + &CTimeSeriesDecompositionTest::testDistortedPeriodic)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTimeSeriesDecompositionTest::testMinimizeLongComponents", + &CTimeSeriesDecompositionTest::testMinimizeLongComponents)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTimeSeriesDecompositionTest::testWeekend", &CTimeSeriesDecompositionTest::testWeekend)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTimeSeriesDecompositionTest::testSinglePeriodicity", + &CTimeSeriesDecompositionTest::testSinglePeriodicity)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTimeSeriesDecompositionTest::testSeasonalOnset", + &CTimeSeriesDecompositionTest::testSeasonalOnset)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTimeSeriesDecompositionTest::testVarianceScale", + &CTimeSeriesDecompositionTest::testVarianceScale)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTimeSeriesDecompositionTest::testSpikeyDataProblemCase", + &CTimeSeriesDecompositionTest::testSpikeyDataProblemCase)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTimeSeriesDecompositionTest::testDiurnalProblemCase", + &CTimeSeriesDecompositionTest::testDiurnalProblemCase)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTimeSeriesDecompositionTest::testComplexDiurnalProblemCase", + &CTimeSeriesDecompositionTest::testComplexDiurnalProblemCase)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTimeSeriesDecompositionTest::testDiurnalPeriodicityWithMissingValues", + &CTimeSeriesDecompositionTest::testDiurnalPeriodicityWithMissingValues)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTimeSeriesDecompositionTest::testLongTermTrend", + &CTimeSeriesDecompositionTest::testLongTermTrend)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTimeSeriesDecompositionTest::testLongTermTrendAndPeriodicity", + &CTimeSeriesDecompositionTest::testLongTermTrendAndPeriodicity)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTimeSeriesDecompositionTest::testNonDiurnal", + &CTimeSeriesDecompositionTest::testNonDiurnal)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTimeSeriesDecompositionTest::testYearly", &CTimeSeriesDecompositionTest::testYearly)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTimeSeriesDecompositionTest::testCalendar", + &CTimeSeriesDecompositionTest::testCalendar)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTimeSeriesDecompositionTest::testConditionOfTrend", + &CTimeSeriesDecompositionTest::testConditionOfTrend)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTimeSeriesDecompositionTest::testSwap", &CTimeSeriesDecompositionTest::testSwap)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTimeSeriesDecompositionTest::testPersist", &CTimeSeriesDecompositionTest::testPersist)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CTimeSeriesDecompositionTest::testLongTermTrendAndPeriodicity", &CTimeSeriesDecompositionTest::testLongTermTrendAndPeriodicity)); - suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesDecompositionTest::testNonDiurnal", - &CTimeSeriesDecompositionTest::testNonDiurnal)); - suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesDecompositionTest::testYearly", - &CTimeSeriesDecompositionTest::testYearly)); - suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesDecompositionTest::testCalendar", - &CTimeSeriesDecompositionTest::testCalendar)); - suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesDecompositionTest::testConditionOfTrend", - &CTimeSeriesDecompositionTest::testConditionOfTrend)); - suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesDecompositionTest::testSwap", - &CTimeSeriesDecompositionTest::testSwap)); - suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesDecompositionTest::testPersist", - &CTimeSeriesDecompositionTest::testPersist)); - suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesDecompositionTest::testUpgrade", - &CTimeSeriesDecompositionTest::testUpgrade)); + "CTimeSeriesDecompositionTest::testUpgrade", &CTimeSeriesDecompositionTest::testUpgrade)); return suiteOfTests; } diff --git a/lib/maths/unittest/CTimeSeriesModelTest.cc b/lib/maths/unittest/CTimeSeriesModelTest.cc index d010265d4f..2218b502c8 100644 --- a/lib/maths/unittest/CTimeSeriesModelTest.cc +++ b/lib/maths/unittest/CTimeSeriesModelTest.cc @@ -69,7 +69,9 @@ class CTimeSeriesCorrelateModelAllocator : public maths::CTimeSeriesCorrelateMod virtual bool areAllocationsAllowed() const { return true; } //! Check if \p correlations exceeds the memory limit. - virtual bool exceedsLimit(std::size_t /*correlations*/) const { return false; } + virtual bool exceedsLimit(std::size_t /*correlations*/) const { + return false; + } //! Get the maximum number of correlations we should model. virtual std::size_t maxNumberCorrelations() const { return 5000; } @@ -79,8 +81,9 @@ class CTimeSeriesCorrelateModelAllocator : public maths::CTimeSeriesCorrelateMod //! Create a new prior for a correlation model. virtual TMultivariatePriorPtr newPrior() const { - return TMultivariatePriorPtr( - maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData, DECAY_RATE).clone()); + return TMultivariatePriorPtr(maths::CMultivariateNormalConjugate<2>::nonInformativePrior( + maths_t::E_ContinuousData, DECAY_RATE) + .clone()); } }; @@ -89,15 +92,23 @@ maths::CModelParams modelParams(core_t::TTime bucketLength) { static TTimeDoubleMap learnRates; learnRates[bucketLength] = static_cast(bucketLength) / 1800.0; double minimumSeasonalVarianceScale{MINIMUM_SEASONAL_SCALE}; - return maths::CModelParams{ - bucketLength, learnRates[bucketLength], DECAY_RATE, minimumSeasonalVarianceScale, 12 * core::constants::HOUR, core::constants::DAY}; + return maths::CModelParams{bucketLength, + learnRates[bucketLength], + DECAY_RATE, + minimumSeasonalVarianceScale, + 12 * core::constants::HOUR, + core::constants::DAY}; } -maths::CModelAddSamplesParams -addSampleParams(double interval, const maths_t::TWeightStyleVec& weightStyles, const TDouble2Vec4VecVec& weights) -{ +maths::CModelAddSamplesParams addSampleParams(double interval, + const maths_t::TWeightStyleVec& weightStyles, + const TDouble2Vec4VecVec& weights) { maths::CModelAddSamplesParams params; - params.integer(false).propagationInterval(interval).weightStyles(weightStyles).trendWeights(weights).priorWeights(weights); + params.integer(false) + .propagationInterval(interval) + .weightStyles(weightStyles) + .trendWeights(weights) + .priorWeights(weights); return params; } @@ -116,35 +127,43 @@ maths::CModelProbabilityParams computeProbabilityParams(const TDouble2Vec4Vec& w } maths::CNormalMeanPrecConjugate univariateNormal(double decayRate = DECAY_RATE) { - return maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, decayRate); + return maths::CNormalMeanPrecConjugate::nonInformativePrior( + maths_t::E_ContinuousData, decayRate); } maths::CLogNormalMeanPrecConjugate univariateLogNormal(double decayRate = DECAY_RATE) { - return maths::CLogNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, 0.0, decayRate); + return maths::CLogNormalMeanPrecConjugate::nonInformativePrior( + maths_t::E_ContinuousData, 0.0, decayRate); } maths::CMultimodalPrior univariateMultimodal(double decayRate = DECAY_RATE) { - maths::CXMeansOnline1d clusterer{ - maths_t::E_ContinuousData, maths::CAvailableModeDistributions::ALL, maths_t::E_ClustersFractionWeight, decayRate}; - return maths::CMultimodalPrior{maths_t::E_ContinuousData, clusterer, univariateNormal(), decayRate}; + maths::CXMeansOnline1d clusterer{maths_t::E_ContinuousData, + maths::CAvailableModeDistributions::ALL, + maths_t::E_ClustersFractionWeight, decayRate}; + return maths::CMultimodalPrior{maths_t::E_ContinuousData, clusterer, + univariateNormal(), decayRate}; } maths::CMultivariateNormalConjugate<3> multivariateNormal(double decayRate = DECAY_RATE) { - return maths::CMultivariateNormalConjugate<3>::nonInformativePrior(maths_t::E_ContinuousData, decayRate); + return maths::CMultivariateNormalConjugate<3>::nonInformativePrior( + maths_t::E_ContinuousData, decayRate); } maths::CMultivariateMultimodalPrior<3> multivariateMultimodal(double decayRate = DECAY_RATE) { - maths::CXMeansOnline clusterer(maths_t::E_ContinuousData, maths_t::E_ClustersFractionWeight, decayRate); + maths::CXMeansOnline clusterer( + maths_t::E_ContinuousData, maths_t::E_ClustersFractionWeight, decayRate); return maths::CMultivariateMultimodalPrior<3>( - maths_t::E_ContinuousData, - clusterer, - maths::CMultivariateNormalConjugate<3>::nonInformativePrior(maths_t::E_ContinuousData, decayRate), + maths_t::E_ContinuousData, clusterer, + maths::CMultivariateNormalConjugate<3>::nonInformativePrior( + maths_t::E_ContinuousData, decayRate), decayRate); } -maths::CUnivariateTimeSeriesModel::TDecayRateController2Ary decayRateControllers(std::size_t dimension) { - return {{maths::CDecayRateController( - maths::CDecayRateController::E_PredictionBias | maths::CDecayRateController::E_PredictionErrorIncrease, dimension), +maths::CUnivariateTimeSeriesModel::TDecayRateController2Ary +decayRateControllers(std::size_t dimension) { + return {{maths::CDecayRateController(maths::CDecayRateController::E_PredictionBias | + maths::CDecayRateController::E_PredictionErrorIncrease, + dimension), maths::CDecayRateController(maths::CDecayRateController::E_PredictionBias | maths::CDecayRateController::E_PredictionErrorIncrease | maths::CDecayRateController::E_PredictionErrorDecrease, @@ -162,7 +181,8 @@ void reinitializePrior(double learnRate, for (std::size_t i = 0u; i < value.second.size(); ++i) { detrended_[0][i] = trends[i]->detrend(value.first, value.second[i], 0.0); } - prior.addSamples(maths::CConstantWeights::COUNT, detrended_, {{TDouble10Vec(value.second.size(), learnRate)}}); + prior.addSamples(maths::CConstantWeights::COUNT, detrended_, + {{TDouble10Vec(value.second.size(), learnRate)}}); } if (controllers) { for (auto& trend : trends) { @@ -190,7 +210,8 @@ void CTimeSeriesModelTest::testClone() { maths::CTimeSeriesDecomposition trend{DECAY_RATE, bucketLength}; auto controllers = decayRateControllers(1); maths::CTimeSeriesCorrelations correlations{MINIMUM_SIGNIFICANT_CORRELATION, DECAY_RATE}; - maths::CUnivariateTimeSeriesModel model(modelParams(bucketLength), 1, trend, univariateNormal(), &controllers); + maths::CUnivariateTimeSeriesModel model(modelParams(bucketLength), 1, trend, + univariateNormal(), &controllers); model.modelCorrelations(correlations); TDoubleVec samples; @@ -198,7 +219,8 @@ void CTimeSeriesModelTest::testClone() { TDouble2Vec4VecVec weights{{{1.0}}}; core_t::TTime time{0}; for (auto sample : samples) { - model.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec{sample}, TAG)}); + model.addSamples(addSampleParams(weights), + {core::make_triple(time, TDouble2Vec{sample}, TAG)}); time += bucketLength; } @@ -212,7 +234,8 @@ void CTimeSeriesModelTest::testClone() { { maths::CTimeSeriesDecomposition trend{DECAY_RATE, bucketLength}; auto controllers = decayRateControllers(3); - maths::CMultivariateTimeSeriesModel model(modelParams(bucketLength), trend, multivariateNormal(), &controllers); + maths::CMultivariateTimeSeriesModel model( + modelParams(bucketLength), trend, multivariateNormal(), &controllers); TDoubleVec mean{13.0, 9.0, 10.0}; TDoubleVecVec covariance{{3.5, 2.9, 0.5}, {2.9, 3.6, 0.1}, {0.5, 0.1, 2.1}}; @@ -222,7 +245,8 @@ void CTimeSeriesModelTest::testClone() { TDouble2Vec4VecVec weights{maths::CConstantWeights::unit(3)}; core_t::TTime time{0}; for (const auto& sample : samples) { - model.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec(sample), TAG)}); + model.addSamples(addSampleParams(weights), + {core::make_triple(time, TDouble2Vec(sample), TAG)}); time += bucketLength; } @@ -262,7 +286,8 @@ void CTimeSeriesModelTest::testMode() { for (auto sample : samples) { trend.addPoint(time, sample); TDouble1Vec sample_{trend.detrend(time, sample, 0.0)}; - prior.addSamples(maths::CConstantWeights::COUNT, sample_, maths::CConstantWeights::SINGLE_UNIT); + prior.addSamples(maths::CConstantWeights::COUNT, sample_, + maths::CConstantWeights::SINGLE_UNIT); prior.propagateForwardsByTime(1.0); time += bucketLength; } @@ -271,10 +296,12 @@ void CTimeSeriesModelTest::testMode() { TDouble2Vec4VecVec weights{weight}; time = 0; for (auto sample : samples) { - model.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec{sample}, TAG)}); + model.addSamples(addSampleParams(weights), + {core::make_triple(time, TDouble2Vec{sample}, TAG)}); time += bucketLength; } - double expectedMode{maths::CBasicStatistics::mean(trend.value(time)) + prior.marginalLikelihoodMode()}; + double expectedMode{maths::CBasicStatistics::mean(trend.value(time)) + + prior.marginalLikelihoodMode()}; TDouble2Vec mode(model.mode(time, maths::CConstantWeights::COUNT, weight)); LOG_DEBUG(<< "expected mode = " << expectedMode); @@ -295,8 +322,8 @@ void CTimeSeriesModelTest::testMode() { core_t::TTime time{0}; for (auto& sample : samples) { - sample += 20.0 + 10.0 * std::sin(boost::math::double_constants::two_pi - * static_cast(time) / 86400.0); + sample += 20.0 + 10.0 * std::sin(boost::math::double_constants::two_pi * + static_cast(time) / 86400.0); time += bucketLength; } @@ -304,20 +331,25 @@ void CTimeSeriesModelTest::testMode() { TDouble2Vec4VecVec weights{weight}; time = 0; for (auto sample : samples) { - model.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec{sample}, TAG)}); + model.addSamples(addSampleParams(weights), + {core::make_triple(time, TDouble2Vec{sample}, TAG)}); if (trend.addPoint(time, sample)) { prior.setToNonInformative(0.0, DECAY_RATE); for (const auto& value : model.slidingWindow()) { - prior.addSamples(maths::CConstantWeights::COUNT, {trend.detrend(value.first, value.second, 0.0)}, {{learnRate}}); + prior.addSamples(maths::CConstantWeights::COUNT, + {trend.detrend(value.first, value.second, 0.0)}, + {{learnRate}}); } } TDouble1Vec sample_{trend.detrend(time, sample, 0.0)}; - prior.addSamples(maths::CConstantWeights::COUNT, sample_, maths::CConstantWeights::SINGLE_UNIT); + prior.addSamples(maths::CConstantWeights::COUNT, sample_, + maths::CConstantWeights::SINGLE_UNIT); prior.propagateForwardsByTime(1.0); time += bucketLength; } - double expectedMode{maths::CBasicStatistics::mean(trend.value(time)) + prior.marginalLikelihoodMode()}; + double expectedMode{maths::CBasicStatistics::mean(trend.value(time)) + + prior.marginalLikelihoodMode()}; TDouble2Vec mode(model.mode(time, maths::CConstantWeights::COUNT, weight)); LOG_DEBUG(<< "expected mode = " << expectedMode); @@ -333,11 +365,13 @@ void CTimeSeriesModelTest::testMode() { TDoubleVecVec samples; rng.generateMultivariateNormalSamples(mean, covariance, 1000, samples); - TDecompositionPtr10Vec trends{TDecompositionPtr{new maths::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}}, - TDecompositionPtr{new maths::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}}, - TDecompositionPtr{new maths::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}}}; + TDecompositionPtr10Vec trends{ + TDecompositionPtr{new maths::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}}, + TDecompositionPtr{new maths::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}}, + TDecompositionPtr{new maths::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}}}; maths::CMultivariateNormalConjugate<3> prior{multivariateNormal()}; - maths::CMultivariateTimeSeriesModel model{modelParams(bucketLength), *trends[0], prior}; + maths::CMultivariateTimeSeriesModel model{modelParams(bucketLength), + *trends[0], prior}; core_t::TTime time{0}; for (const auto& sample : samples) { @@ -346,22 +380,25 @@ void CTimeSeriesModelTest::testMode() { trends[i]->addPoint(time, sample[i]); detrended[0][i] = trends[i]->detrend(time, sample[i], 0.0); } - prior.addSamples(maths::CConstantWeights::COUNT, detrended, maths::CConstantWeights::singleUnit(3)); + prior.addSamples(maths::CConstantWeights::COUNT, detrended, + maths::CConstantWeights::singleUnit(3)); prior.propagateForwardsByTime(1.0); } TDouble2Vec4VecVec weights{maths::CConstantWeights::unit(3)}; time = 0; for (const auto& sample : samples) { - model.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec(sample), TAG)}); + model.addSamples(addSampleParams(weights), + {core::make_triple(time, TDouble2Vec(sample), TAG)}); time += bucketLength; } - TDouble2Vec expectedMode( - prior.marginalLikelihoodMode(maths::CConstantWeights::COUNT, maths::CConstantWeights::unit(3))); + TDouble2Vec expectedMode(prior.marginalLikelihoodMode( + maths::CConstantWeights::COUNT, maths::CConstantWeights::unit(3))); for (std::size_t i = 0u; i < trends.size(); ++i) { expectedMode[i] += maths::CBasicStatistics::mean(trends[i]->value(time)); } - TDouble2Vec mode(model.mode(time, maths::CConstantWeights::COUNT, maths::CConstantWeights::unit(3))); + TDouble2Vec mode(model.mode(time, maths::CConstantWeights::COUNT, + maths::CConstantWeights::unit(3))); LOG_DEBUG(<< "expected mode = " << expectedMode); LOG_DEBUG(<< "mode = " << mode); @@ -379,18 +416,21 @@ void CTimeSeriesModelTest::testMode() { rng.generateMultivariateNormalSamples(mean, covariance, 1000, samples); double learnRate{modelParams(bucketLength).learnRate()}; - TDecompositionPtr10Vec trends{TDecompositionPtr{new maths::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}}, - TDecompositionPtr{new maths::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}}, - TDecompositionPtr{new maths::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}}}; + TDecompositionPtr10Vec trends{ + TDecompositionPtr{new maths::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}}, + TDecompositionPtr{new maths::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}}, + TDecompositionPtr{new maths::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}}}; maths::CMultivariateNormalConjugate<3> prior{multivariateNormal()}; - maths::CMultivariateTimeSeriesModel model{modelParams(bucketLength), *trends[0], prior}; + maths::CMultivariateTimeSeriesModel model{modelParams(bucketLength), + *trends[0], prior}; core_t::TTime time{0}; for (auto& sample : samples) { double amplitude{10.0}; for (std::size_t i = 0u; i < sample.size(); ++i) { - sample[i] += 30.0 + amplitude * std::sin(boost::math::double_constants::two_pi - * static_cast(time) / 86400.0); + sample[i] += 30.0 + amplitude * + std::sin(boost::math::double_constants::two_pi * + static_cast(time) / 86400.0); amplitude += 4.0; } time += bucketLength; @@ -399,7 +439,8 @@ void CTimeSeriesModelTest::testMode() { TDouble2Vec4VecVec weights{maths::CConstantWeights::unit(3)}; time = 0; for (const auto& sample : samples) { - model.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec(sample), TAG)}); + model.addSamples(addSampleParams(weights), + {core::make_triple(time, TDouble2Vec(sample), TAG)}); bool reinitialize{false}; TDouble10Vec1Vec detrended{TDouble10Vec(3)}; @@ -410,17 +451,19 @@ void CTimeSeriesModelTest::testMode() { if (reinitialize) { reinitializePrior(learnRate, model, trends, prior); } - prior.addSamples(maths::CConstantWeights::COUNT, detrended, maths::CConstantWeights::singleUnit(3)); + prior.addSamples(maths::CConstantWeights::COUNT, detrended, + maths::CConstantWeights::singleUnit(3)); prior.propagateForwardsByTime(1.0); time += bucketLength; } - TDouble2Vec expectedMode( - prior.marginalLikelihoodMode(maths::CConstantWeights::COUNT, maths::CConstantWeights::unit(3))); + TDouble2Vec expectedMode(prior.marginalLikelihoodMode( + maths::CConstantWeights::COUNT, maths::CConstantWeights::unit(3))); for (std::size_t i = 0u; i < trends.size(); ++i) { expectedMode[i] += maths::CBasicStatistics::mean(trends[i]->value(time)); } - TDouble2Vec mode(model.mode(time, maths::CConstantWeights::COUNT, maths::CConstantWeights::unit(3))); + TDouble2Vec mode(model.mode(time, maths::CConstantWeights::COUNT, + maths::CConstantWeights::unit(3))); LOG_DEBUG(<< "expected mode = " << expectedMode); LOG_DEBUG(<< "mode = " << mode); @@ -453,10 +496,12 @@ void CTimeSeriesModelTest::testAddBucketValue() { TDouble2Vec4VecVec weights{{{1.0}}, {{1.5}}, {{0.9}}, {{1.9}}}; for (std::size_t i = 0u; i < samples.size(); ++i) { - prior.addSamples(maths::CConstantWeights::COUNT, {samples[i].second[0]}, {{weights[i][0][0]}}); + prior.addSamples(maths::CConstantWeights::COUNT, {samples[i].second[0]}, + {{weights[i][0][0]}}); } prior.propagateForwardsByTime(1.0); - prior.adjustOffset(maths::CConstantWeights::COUNT, {-1.0}, maths::CConstantWeights::SINGLE_UNIT); + prior.adjustOffset(maths::CConstantWeights::COUNT, {-1.0}, + maths::CConstantWeights::SINGLE_UNIT); model.addSamples(addSampleParams(weights), samples); model.addBucketValue({core::make_triple(core_t::TTime{20}, TDouble2Vec{-1.0}, TAG)}); @@ -483,17 +528,22 @@ void CTimeSeriesModelTest::testAddSamples() { maths::CNormalMeanPrecConjugate prior{univariateNormal()}; maths::CUnivariateTimeSeriesModel model{modelParams(bucketLength), 0, trend, prior}; - TTimeDouble2VecSizeTrVec samples{core::make_triple(core_t::TTime{20}, TDouble2Vec{3.5}, TAG), - core::make_triple(core_t::TTime{12}, TDouble2Vec{3.9}, TAG), - core::make_triple(core_t::TTime{18}, TDouble2Vec{2.1}, TAG)}; + TTimeDouble2VecSizeTrVec samples{ + core::make_triple(core_t::TTime{20}, TDouble2Vec{3.5}, TAG), + core::make_triple(core_t::TTime{12}, TDouble2Vec{3.9}, TAG), + core::make_triple(core_t::TTime{18}, TDouble2Vec{2.1}, TAG)}; TDouble2Vec4VecVec weights{{{1.0}}, {{1.5}}, {{0.9}}}; model.addSamples(addSampleParams(weights), samples); - trend.addPoint(samples[1].first, samples[1].second[0], maths::CConstantWeights::COUNT, weights[1][0]); - trend.addPoint(samples[2].first, samples[2].second[0], maths::CConstantWeights::COUNT, weights[2][0]); - trend.addPoint(samples[0].first, samples[0].second[0], maths::CConstantWeights::COUNT, weights[0][0]); - TDouble1Vec samples_{samples[2].second[0], samples[0].second[0], samples[1].second[0]}; + trend.addPoint(samples[1].first, samples[1].second[0], + maths::CConstantWeights::COUNT, weights[1][0]); + trend.addPoint(samples[2].first, samples[2].second[0], + maths::CConstantWeights::COUNT, weights[2][0]); + trend.addPoint(samples[0].first, samples[0].second[0], + maths::CConstantWeights::COUNT, weights[0][0]); + TDouble1Vec samples_{samples[2].second[0], samples[0].second[0], + samples[1].second[0]}; TDouble4Vec1Vec weights_{weights[2][0], weights[0][0], weights[1][0]}; prior.addSamples(maths::CConstantWeights::COUNT, samples_, weights_); prior.propagateForwardsByTime(1.0); @@ -510,25 +560,35 @@ void CTimeSeriesModelTest::testAddSamples() { LOG_DEBUG(<< "Multiple samples multivariate"); { - TDecompositionPtr10Vec trends{TDecompositionPtr{new maths::CTimeSeriesDecompositionStub{}}, - TDecompositionPtr{new maths::CTimeSeriesDecompositionStub{}}, - TDecompositionPtr{new maths::CTimeSeriesDecompositionStub{}}}; + TDecompositionPtr10Vec trends{ + TDecompositionPtr{new maths::CTimeSeriesDecompositionStub{}}, + TDecompositionPtr{new maths::CTimeSeriesDecompositionStub{}}, + TDecompositionPtr{new maths::CTimeSeriesDecompositionStub{}}}; maths::CMultivariateNormalConjugate<3> prior{multivariateNormal()}; - maths::CMultivariateTimeSeriesModel model{modelParams(bucketLength), *trends[0], prior}; + maths::CMultivariateTimeSeriesModel model{modelParams(bucketLength), + *trends[0], prior}; - TTimeDouble2VecSizeTrVec samples{core::make_triple(core_t::TTime{20}, TDouble2Vec{3.5, 3.4, 3.3}, TAG), - core::make_triple(core_t::TTime{12}, TDouble2Vec{3.9, 3.8, 3.7}, TAG), - core::make_triple(core_t::TTime{18}, TDouble2Vec{2.1, 2.0, 1.9}, TAG)}; + TTimeDouble2VecSizeTrVec samples{ + core::make_triple(core_t::TTime{20}, TDouble2Vec{3.5, 3.4, 3.3}, TAG), + core::make_triple(core_t::TTime{12}, TDouble2Vec{3.9, 3.8, 3.7}, TAG), + core::make_triple(core_t::TTime{18}, TDouble2Vec{2.1, 2.0, 1.9}, TAG)}; TDouble2Vec4VecVec weights{{{1.0, 1.1, 1.2}}, {{1.5, 1.6, 1.7}}, {{0.9, 1.0, 1.1}}}; model.addSamples(addSampleParams(weights), samples); for (std::size_t i = 0u; i < trends.size(); ++i) { - trends[i]->addPoint(samples[1].first, samples[1].second[i], maths::CConstantWeights::COUNT, TDouble4Vec{weights[1][0][i]}); - trends[i]->addPoint(samples[2].first, samples[2].second[i], maths::CConstantWeights::COUNT, TDouble4Vec{weights[2][0][i]}); - trends[i]->addPoint(samples[0].first, samples[0].second[i], maths::CConstantWeights::COUNT, TDouble4Vec{weights[0][0][i]}); + trends[i]->addPoint(samples[1].first, samples[1].second[i], + maths::CConstantWeights::COUNT, + TDouble4Vec{weights[1][0][i]}); + trends[i]->addPoint(samples[2].first, samples[2].second[i], + maths::CConstantWeights::COUNT, + TDouble4Vec{weights[2][0][i]}); + trends[i]->addPoint(samples[0].first, samples[0].second[i], + maths::CConstantWeights::COUNT, + TDouble4Vec{weights[0][0][i]}); } - TDouble10Vec1Vec samples_{samples[2].second, samples[0].second, samples[1].second}; + TDouble10Vec1Vec samples_{samples[2].second, samples[0].second, + samples[1].second}; TDouble10Vec4Vec1Vec weights_{{weights[2][0]}, {weights[0][0]}, {weights[1][0]}}; prior.addSamples(maths::CConstantWeights::COUNT, samples_, weights_); prior.propagateForwardsByTime(1.0); @@ -545,8 +605,9 @@ void CTimeSeriesModelTest::testAddSamples() { CPPUNIT_ASSERT_EQUAL(checksum1, checksum2); } - maths_t::TWeightStyleVec weightStyles{ - maths_t::E_SampleWinsorisationWeight, maths_t::E_SampleCountWeight, maths_t::E_SampleCountVarianceScaleWeight}; + maths_t::TWeightStyleVec weightStyles{maths_t::E_SampleWinsorisationWeight, + maths_t::E_SampleCountWeight, + maths_t::E_SampleCountVarianceScaleWeight}; LOG_DEBUG(<< "Propagation interval univariate"); { @@ -578,24 +639,29 @@ void CTimeSeriesModelTest::testAddSamples() { LOG_DEBUG(<< "Propagation interval multivariate"); { - TDecompositionPtr10Vec trends{TDecompositionPtr{new maths::CTimeSeriesDecompositionStub{}}, - TDecompositionPtr{new maths::CTimeSeriesDecompositionStub{}}, - TDecompositionPtr{new maths::CTimeSeriesDecompositionStub{}}}; + TDecompositionPtr10Vec trends{ + TDecompositionPtr{new maths::CTimeSeriesDecompositionStub{}}, + TDecompositionPtr{new maths::CTimeSeriesDecompositionStub{}}, + TDecompositionPtr{new maths::CTimeSeriesDecompositionStub{}}}; maths::CMultivariateNormalConjugate<3> prior{multivariateNormal()}; - maths::CMultivariateTimeSeriesModel model{modelParams(bucketLength), *trends[0], prior}; + maths::CMultivariateTimeSeriesModel model{modelParams(bucketLength), + *trends[0], prior}; double interval[]{1.0, 1.1, 0.4}; TDouble2Vec samples[]{{13.5, 13.4, 13.3}, {13.9, 13.8, 13.7}, {20.1, 20.0, 10.9}}; - TDouble2Vec4VecVec weights{{{0.1, 0.1, 0.2}, {1.0, 1.1, 1.2}, {2.0, 2.1, 2.2}}, - {{0.5, 0.6, 0.7}, {2.0, 2.1, 2.2}, {1.0, 1.1, 1.2}}, - {{0.9, 1.0, 1.0}, {0.9, 1.0, 1.0}, {1.9, 2.0, 2.0}}}; + TDouble2Vec4VecVec weights{ + {{0.1, 0.1, 0.2}, {1.0, 1.1, 1.2}, {2.0, 2.1, 2.2}}, + {{0.5, 0.6, 0.7}, {2.0, 2.1, 2.2}, {1.0, 1.1, 1.2}}, + {{0.9, 1.0, 1.0}, {0.9, 1.0, 1.0}, {1.9, 2.0, 2.0}}}; core_t::TTime time{0}; for (std::size_t i = 0u; i < 3; ++i) { TTimeDouble2VecSizeTrVec sample{core::make_triple(time, samples[i], TAG)}; model.addSamples(addSampleParams(interval[i], weightStyles, weights), sample); - TDouble10Vec4Vec weight{TDouble10Vec(weights[0][0]), TDouble10Vec(weights[0][1]), TDouble10Vec(weights[0][2])}; + TDouble10Vec4Vec weight{TDouble10Vec(weights[0][0]), + TDouble10Vec(weights[0][1]), + TDouble10Vec(weights[0][2])}; prior.addSamples(weightStyles, {TDouble10Vec(samples[i])}, {weight}); prior.propagateForwardsByTime(interval[i]); @@ -614,7 +680,8 @@ void CTimeSeriesModelTest::testAddSamples() { maths::CTimeSeriesDecomposition trend{DECAY_RATE, bucketLength}; maths::CNormalMeanPrecConjugate prior{univariateNormal()}; auto controllers = decayRateControllers(1); - maths::CUnivariateTimeSeriesModel model(modelParams(bucketLength), 1, trend, prior, &controllers); + maths::CUnivariateTimeSeriesModel model(modelParams(bucketLength), 1, + trend, prior, &controllers); TDoubleVec samples; rng.generateNormalSamples(1.0, 4.0, 2000, samples); @@ -625,10 +692,11 @@ void CTimeSeriesModelTest::testAddSamples() { core_t::TTime time{0}; for (auto noise : samples) { double sample{20.0 + - 4.0 * std::sin(boost::math::double_constants::two_pi - * static_cast(time) / 86400.0) - + (time / bucketLength > 1800 ? 10.0 : 0.0) + noise}; - TTimeDouble2VecSizeTrVec sample_{core::make_triple(time, TDouble2Vec{sample}, TAG)}; + 4.0 * std::sin(boost::math::double_constants::two_pi * + static_cast(time) / 86400.0) + + (time / bucketLength > 1800 ? 10.0 : 0.0) + noise}; + TTimeDouble2VecSizeTrVec sample_{ + core::make_triple(time, TDouble2Vec{sample}, TAG)}; model.addSamples(addSampleParams(weights), sample_); @@ -636,7 +704,9 @@ void CTimeSeriesModelTest::testAddSamples() { trend.decayRate(trend.decayRate() / controllers[0].multiplier()); prior.setToNonInformative(0.0, prior.decayRate()); for (const auto& value : model.slidingWindow()) { - prior.addSamples(maths::CConstantWeights::COUNT, {trend.detrend(value.first, value.second, 0.0)}, {{learnRate}}); + prior.addSamples(maths::CConstantWeights::COUNT, + {trend.detrend(value.first, value.second, 0.0)}, + {{learnRate}}); } prior.decayRate(prior.decayRate() / controllers[1].multiplier()); controllers[0].reset(); @@ -648,15 +718,15 @@ void CTimeSeriesModelTest::testAddSamples() { if (trend.initialized()) { double multiplier{controllers[0].multiplier( - {trend.meanValue(time)}, {{detrended}}, bucketLength, model.params().learnRate(), DECAY_RATE)}; + {trend.meanValue(time)}, {{detrended}}, bucketLength, + model.params().learnRate(), DECAY_RATE)}; trend.decayRate(multiplier * trend.decayRate()); } if (prior.numberSamples() > 20.0) { - double multiplier{controllers[1].multiplier({prior.marginalLikelihoodMean()}, - {{detrended - prior.marginalLikelihoodMean()}}, - bucketLength, - model.params().learnRate(), - DECAY_RATE)}; + double multiplier{controllers[1].multiplier( + {prior.marginalLikelihoodMean()}, + {{detrended - prior.marginalLikelihoodMean()}}, + bucketLength, model.params().learnRate(), DECAY_RATE)}; prior.decayRate(multiplier * prior.decayRate()); } @@ -674,12 +744,14 @@ void CTimeSeriesModelTest::testAddSamples() { LOG_DEBUG(<< "Decay rate control multivariate"); { double learnRate{modelParams(bucketLength).learnRate()}; - TDecompositionPtr10Vec trends{TDecompositionPtr{new maths::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}}, - TDecompositionPtr{new maths::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}}, - TDecompositionPtr{new maths::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}}}; + TDecompositionPtr10Vec trends{ + TDecompositionPtr{new maths::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}}, + TDecompositionPtr{new maths::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}}, + TDecompositionPtr{new maths::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}}}; maths::CMultivariateNormalConjugate<3> prior{multivariateNormal()}; auto controllers = decayRateControllers(3); - maths::CMultivariateTimeSeriesModel model{modelParams(bucketLength), *trends[0], prior, &controllers}; + maths::CMultivariateTimeSeriesModel model{modelParams(bucketLength), + *trends[0], prior, &controllers}; TDoubleVecVec samples; { @@ -701,9 +773,9 @@ void CTimeSeriesModelTest::testAddSamples() { double amplitude{10.0}; for (std::size_t i = 0u; i < sample.size(); ++i) { sample[i] = 30.0 + - amplitude * std::sin(boost::math::double_constants::two_pi - * static_cast(time) / 86400.0) - + (time / bucketLength > 1800 ? 10.0 : 0.0) + sample[i]; + amplitude * std::sin(boost::math::double_constants::two_pi * + static_cast(time) / 86400.0) + + (time / bucketLength > 1800 ? 10.0 : 0.0) + sample[i]; reinitialize |= trends[i]->addPoint(time, sample[i]); detrended[0][i] = trends[i]->detrend(time, sample[i], 0.0); mean[i] = trends[i]->meanValue(time); @@ -711,7 +783,8 @@ void CTimeSeriesModelTest::testAddSamples() { amplitude += 4.0; } - TTimeDouble2VecSizeTrVec sample_{core::make_triple(time, TDouble2Vec(sample), TAG)}; + TTimeDouble2VecSizeTrVec sample_{ + core::make_triple(time, TDouble2Vec(sample), TAG)}; model.addSamples(addSampleParams(weights), sample_); @@ -722,7 +795,8 @@ void CTimeSeriesModelTest::testAddSamples() { prior.propagateForwardsByTime(1.0); if (hasTrend) { - double multiplier{controllers[0].multiplier(mean, {detrended[0]}, bucketLength, model.params().learnRate(), DECAY_RATE)}; + double multiplier{controllers[0].multiplier( + mean, {detrended[0]}, bucketLength, model.params().learnRate(), DECAY_RATE)}; for (const auto& trend : trends) { trend->decayRate(multiplier * trend->decayRate()); } @@ -733,8 +807,9 @@ void CTimeSeriesModelTest::testAddSamples() { for (std::size_t d = 0u; d < 3; ++d) { predictionError[d] = detrended[0][d] - prediction[d]; } - double multiplier{ - controllers[1].multiplier(prediction, {predictionError}, bucketLength, model.params().learnRate(), DECAY_RATE)}; + double multiplier{controllers[1].multiplier( + prediction, {predictionError}, bucketLength, + model.params().learnRate(), DECAY_RATE)}; prior.decayRate(multiplier * prior.decayRate()); } @@ -769,25 +844,31 @@ void CTimeSeriesModelTest::testPredict() { maths::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; maths::CNormalMeanPrecConjugate prior{univariateNormal()}; auto controllers = decayRateControllers(1); - maths::CUnivariateTimeSeriesModel model{modelParams(bucketLength), 0, trend, prior, &controllers}; + maths::CUnivariateTimeSeriesModel model{modelParams(bucketLength), 0, + trend, prior, &controllers}; TDoubleVec samples; rng.generateNormalSamples(0.0, 4.0, 1008, samples); TDouble2Vec4VecVec weights{maths::CConstantWeights::unit(1)}; core_t::TTime time{0}; for (auto sample : samples) { - sample += 10.0 + 5.0 * std::sin(boost::math::double_constants::two_pi - * static_cast(time) / 86400.0); + sample += 10.0 + 5.0 * std::sin(boost::math::double_constants::two_pi * + static_cast(time) / 86400.0); - model.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec{sample}, TAG)}); + model.addSamples(addSampleParams(weights), + {core::make_triple(time, TDouble2Vec{sample}, TAG)}); if (trend.addPoint(time, sample)) { prior.setToNonInformative(0.0, DECAY_RATE); for (const auto& value : model.slidingWindow()) { - prior.addSamples(maths::CConstantWeights::COUNT, {trend.detrend(value.first, value.second, 0.0)}, {{learnRate}}); + prior.addSamples(maths::CConstantWeights::COUNT, + {trend.detrend(value.first, value.second, 0.0)}, + {{learnRate}}); } } - prior.addSamples(maths::CConstantWeights::COUNT, {trend.detrend(time, sample, 0.0)}, maths::CConstantWeights::SINGLE_UNIT); + prior.addSamples(maths::CConstantWeights::COUNT, + {trend.detrend(time, sample, 0.0)}, + maths::CConstantWeights::SINGLE_UNIT); prior.propagateForwardsByTime(1.0); time += bucketLength; @@ -795,12 +876,14 @@ void CTimeSeriesModelTest::testPredict() { TMeanAccumulator meanError; for (core_t::TTime time_ = time; time_ < time + 86400; time_ += 3600) { - double trend_{10.0 + 5.0 * std::sin(boost::math::double_constants::two_pi - * static_cast(time_) / 86400.0)}; - double expected{maths::CBasicStatistics::mean(trend.value(time_)) - + maths::CBasicStatistics::mean(prior.marginalLikelihoodConfidenceInterval(0.0))}; + double trend_{10.0 + 5.0 * std::sin(boost::math::double_constants::two_pi * + static_cast(time_) / 86400.0)}; + double expected{maths::CBasicStatistics::mean(trend.value(time_)) + + maths::CBasicStatistics::mean( + prior.marginalLikelihoodConfidenceInterval(0.0))}; double predicted{model.predict(time_)[0]}; - LOG_DEBUG(<< "expected = " << expected << " predicted = " << predicted << " (trend = " << trend_ << ")"); + LOG_DEBUG(<< "expected = " << expected << " predicted = " << predicted + << " (trend = " << trend_ << ")"); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, predicted, 1e-3 * expected); CPPUNIT_ASSERT(std::fabs(trend_ - predicted) / trend_ < 0.3); meanError.add(std::fabs(trend_ - predicted) / trend_); @@ -828,38 +911,49 @@ void CTimeSeriesModelTest::testPredict() { TDouble2Vec4VecVec weights{maths::CConstantWeights::unit(1)}; core_t::TTime time{0}; for (auto sample : samples) { - model.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec{sample}, TAG)}); + model.addSamples(addSampleParams(weights), + {core::make_triple(time, TDouble2Vec{sample}, TAG)}); time += bucketLength; } maths::CModel::TSizeDoublePr1Vec empty; - double predicted[]{model.predict(time, empty, {-2.0})[0], model.predict(time, empty, {12.0})[0]}; - - LOG_DEBUG(<< "expected(0) = " << maths::CBasicStatistics::mean(modes[0]) << " actual(0) = " << predicted[0]); - LOG_DEBUG(<< "expected(1) = " << maths::CBasicStatistics::mean(modes[1]) << " actual(1) = " << predicted[1]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(modes[0]), predicted[0], 0.1 * maths::CBasicStatistics::mean(modes[0])); - CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(modes[1]), predicted[1], 0.01 * maths::CBasicStatistics::mean(modes[1])); + double predicted[]{model.predict(time, empty, {-2.0})[0], + model.predict(time, empty, {12.0})[0]}; + + LOG_DEBUG(<< "expected(0) = " << maths::CBasicStatistics::mean(modes[0]) + << " actual(0) = " << predicted[0]); + LOG_DEBUG(<< "expected(1) = " << maths::CBasicStatistics::mean(modes[1]) + << " actual(1) = " << predicted[1]); + CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(modes[0]), + predicted[0], + 0.1 * maths::CBasicStatistics::mean(modes[0])); + CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(modes[1]), + predicted[1], + 0.01 * maths::CBasicStatistics::mean(modes[1])); } LOG_DEBUG(<< "Multivariate Seasonal"); { double learnRate{modelParams(bucketLength).learnRate()}; - TDecompositionPtr10Vec trends{TDecompositionPtr{new maths::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}}, - TDecompositionPtr{new maths::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}}, - TDecompositionPtr{new maths::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}}}; + TDecompositionPtr10Vec trends{ + TDecompositionPtr{new maths::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}}, + TDecompositionPtr{new maths::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}}, + TDecompositionPtr{new maths::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}}}; maths::CMultivariateNormalConjugate<3> prior{multivariateNormal()}; - maths::CMultivariateTimeSeriesModel model{maths::CMultivariateTimeSeriesModel{modelParams(bucketLength), *trends[0], prior}}; + maths::CMultivariateTimeSeriesModel model{maths::CMultivariateTimeSeriesModel{ + modelParams(bucketLength), *trends[0], prior}}; TDoubleVecVec samples; TDoubleVec mean{0.0, 2.0, 1.0}; - rng.generateMultivariateNormalSamples(mean, {{3.0, 2.9, 0.5}, {2.9, 2.6, 0.1}, {0.5, 0.1, 2.0}}, 1000, samples); + rng.generateMultivariateNormalSamples( + mean, {{3.0, 2.9, 0.5}, {2.9, 2.6, 0.1}, {0.5, 0.1, 2.0}}, 1000, samples); TDouble2Vec4VecVec weights{maths::CConstantWeights::unit(3)}; core_t::TTime time{0}; for (auto& sample : samples) { for (auto& coordinate : sample) { - coordinate += 10.0 + 5.0 * std::sin(boost::math::double_constants::two_pi - * static_cast(time) / 86400.0); + coordinate += 10.0 + 5.0 * std::sin(boost::math::double_constants::two_pi * + static_cast(time) / 86400.0); } bool reinitialize{false}; TDouble10Vec detrended; @@ -870,10 +964,12 @@ void CTimeSeriesModelTest::testPredict() { if (reinitialize) { reinitializePrior(learnRate, model, trends, prior); } - prior.addSamples(maths::CConstantWeights::COUNT, {detrended}, maths::CConstantWeights::singleUnit(3)); + prior.addSamples(maths::CConstantWeights::COUNT, {detrended}, + maths::CConstantWeights::singleUnit(3)); prior.propagateForwardsByTime(1.0); - model.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec(sample), TAG)}); + model.addSamples(addSampleParams(weights), + {core::make_triple(time, TDouble2Vec(sample), TAG)}); time += bucketLength; } @@ -883,14 +979,17 @@ void CTimeSeriesModelTest::testPredict() { maths::CMultivariatePrior::TSizeDoublePr10Vec condition; for (std::size_t i = 0u; i < mean.size(); ++i) { double trend_{mean[i] + 10.0 + - 5.0 * std::sin(boost::math::double_constants::two_pi - * static_cast(time_) / 86400.0)}; - maths::CMultivariatePrior::TUnivariatePriorPtr margin{prior.univariate(marginalize, condition).first}; + 5.0 * std::sin(boost::math::double_constants::two_pi * + static_cast(time_) / 86400.0)}; + maths::CMultivariatePrior::TUnivariatePriorPtr margin{ + prior.univariate(marginalize, condition).first}; double expected{maths::CBasicStatistics::mean(trends[i]->value(time_)) + - maths::CBasicStatistics::mean(margin->marginalLikelihoodConfidenceInterval(0.0))}; + maths::CBasicStatistics::mean( + margin->marginalLikelihoodConfidenceInterval(0.0))}; double predicted{model.predict(time_)[i]}; --marginalize[std::min(i, marginalize.size() - 1)]; - LOG_DEBUG(<< "expected = " << expected << " predicted = " << predicted << " (trend = " << trend_ << ")"); + LOG_DEBUG(<< "expected = " << expected << " predicted = " << predicted + << " (trend = " << trend_ << ")"); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, predicted, 1e-3 * expected); CPPUNIT_ASSERT(std::fabs(trend_ - predicted) / trend_ < 0.3); } @@ -899,11 +998,13 @@ void CTimeSeriesModelTest::testPredict() { LOG_DEBUG(<< "Multivariate nearest mode"); { - TDecompositionPtr10Vec trends{TDecompositionPtr{new maths::CTimeSeriesDecompositionStub{}}, - TDecompositionPtr{new maths::CTimeSeriesDecompositionStub{}}, - TDecompositionPtr{new maths::CTimeSeriesDecompositionStub{}}}; + TDecompositionPtr10Vec trends{ + TDecompositionPtr{new maths::CTimeSeriesDecompositionStub{}}, + TDecompositionPtr{new maths::CTimeSeriesDecompositionStub{}}, + TDecompositionPtr{new maths::CTimeSeriesDecompositionStub{}}}; maths::CMultivariateMultimodalPrior<3> prior{multivariateMultimodal()}; - maths::CMultivariateTimeSeriesModel model{modelParams(bucketLength), *trends[0], prior}; + maths::CMultivariateTimeSeriesModel model{modelParams(bucketLength), + *trends[0], prior}; TMeanAccumulator2Vec modes[2]{TMeanAccumulator2Vec(3), TMeanAccumulator2Vec(3)}; TDoubleVecVec samples; @@ -930,18 +1031,25 @@ void CTimeSeriesModelTest::testPredict() { TDouble2Vec4VecVec weights{maths::CConstantWeights::unit(3)}; core_t::TTime time{0}; for (const auto& sample : samples) { - model.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec(sample), TAG)}); + model.addSamples(addSampleParams(weights), + {core::make_triple(time, TDouble2Vec(sample), TAG)}); time += bucketLength; } maths::CModel::TSizeDoublePr1Vec empty; - TDouble2Vec expected[]{maths::CBasicStatistics::mean(modes[0]), maths::CBasicStatistics::mean(modes[1])}; - TDouble2Vec predicted[]{model.predict(time, empty, {0.0, 0.0, 0.0}), model.predict(time, empty, {10.0, 10.0, 10.0})}; + TDouble2Vec expected[]{maths::CBasicStatistics::mean(modes[0]), + maths::CBasicStatistics::mean(modes[1])}; + TDouble2Vec predicted[]{model.predict(time, empty, {0.0, 0.0, 0.0}), + model.predict(time, empty, {10.0, 10.0, 10.0})}; for (std::size_t i = 0u; i < 3; ++i) { - LOG_DEBUG(<< "expected(0) = " << expected[0][i] << " actual(0) = " << predicted[0][i]); - LOG_DEBUG(<< "expected(1) = " << expected[1][i] << " actual(1) = " << predicted[1][i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expected[0][i], predicted[0][i], std::fabs(0.2 * expected[0][i])); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expected[1][i], predicted[1][i], std::fabs(0.01 * expected[1][i])); + LOG_DEBUG(<< "expected(0) = " << expected[0][i] + << " actual(0) = " << predicted[0][i]); + LOG_DEBUG(<< "expected(1) = " << expected[1][i] + << " actual(1) = " << predicted[1][i]); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expected[0][i], predicted[0][i], + std::fabs(0.2 * expected[0][i])); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expected[1][i], predicted[1][i], + std::fabs(0.01 * expected[1][i])); } } } @@ -967,14 +1075,13 @@ void CTimeSeriesModelTest::testProbability() { LOG_DEBUG(<< "Univariate"); { maths::CUnivariateTimeSeriesModel models[]{ + maths::CUnivariateTimeSeriesModel{modelParams(bucketLength), 1, + maths::CTimeSeriesDecompositionStub{}, + univariateNormal(), 0, false}, maths::CUnivariateTimeSeriesModel{ - modelParams(bucketLength), 1, maths::CTimeSeriesDecompositionStub{}, univariateNormal(), 0, false}, - maths::CUnivariateTimeSeriesModel{modelParams(bucketLength), - 1, - maths::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}, - univariateNormal(), - nullptr, - false}}; + modelParams(bucketLength), 1, + maths::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}, + univariateNormal(), nullptr, false}}; TDoubleVec samples; rng.generateNormalSamples(10.0, 4.0, 1000, samples); @@ -982,20 +1089,27 @@ void CTimeSeriesModelTest::testProbability() { core_t::TTime time{0}; const TDouble2Vec4VecVec weight{maths::CConstantWeights::unit(1)}; for (auto sample : samples) { - double trend{5.0 + 5.0 * std::sin(boost::math::double_constants::two_pi * static_cast(time) / 86400.0)}; - models[0].addSamples(addSampleParams(weight), {core::make_triple(time, TDouble2Vec{sample}, TAG)}); - models[1].addSamples(addSampleParams(weight), {core::make_triple(time, TDouble2Vec{trend + sample}, TAG)}); + double trend{5.0 + 5.0 * std::sin(boost::math::double_constants::two_pi * + static_cast(time) / 86400.0)}; + models[0].addSamples(addSampleParams(weight), + {core::make_triple(time, TDouble2Vec{sample}, TAG)}); + models[1].addSamples( + addSampleParams(weight), + {core::make_triple(time, TDouble2Vec{trend + sample}, TAG)}); time += bucketLength; } TTime2Vec1Vec time_{{time}}; TDouble2Vec sample{15.0}; - maths_t::EProbabilityCalculation calculations[]{maths_t::E_TwoSided, maths_t::E_OneSidedAbove}; + maths_t::EProbabilityCalculation calculations[]{maths_t::E_TwoSided, + maths_t::E_OneSidedAbove}; double confidences[]{0.0, 20.0, 50.0}; bool empties[]{true, false}; - maths_t::TWeightStyleVec weightStyles[]{{maths_t::E_SampleCountVarianceScaleWeight}, - {maths_t::E_SampleCountVarianceScaleWeight, maths_t::E_SampleSeasonalVarianceScaleWeight}}; + maths_t::TWeightStyleVec weightStyles[]{ + {maths_t::E_SampleCountVarianceScaleWeight}, + {maths_t::E_SampleCountVarianceScaleWeight, + maths_t::E_SampleSeasonalVarianceScaleWeight}}; TDouble2Vec4Vec weights[]{{{0.9}}, {{1.1}, {1.8}}}; for (auto calculation : calculations) { @@ -1005,7 +1119,8 @@ void CTimeSeriesModelTest::testProbability() { for (auto empty : empties) { LOG_DEBUG(<< " empty = " << empty); for (std::size_t i = 0u; i < boost::size(weightStyles); ++i) { - LOG_DEBUG(<< " weights = " << core::CContainerPrinter::print(weights[i])); + LOG_DEBUG(<< " weights = " + << core::CContainerPrinter::print(weights[i])); double expectedProbability[2]; maths_t::ETail expectedTail[2]; { @@ -1015,15 +1130,12 @@ void CTimeSeriesModelTest::testProbability() { } double lb[2], ub[2]; models[0].residualModel().probabilityOfLessLikelySamples( - calculation, weightStyles[i], sample, {weights_}, lb[0], ub[0], expectedTail[0]); + calculation, weightStyles[i], sample, + {weights_}, lb[0], ub[0], expectedTail[0]); models[1].residualModel().probabilityOfLessLikelySamples( - calculation, - weightStyles[i], + calculation, weightStyles[i], {models[1].trendModel().detrend(time, sample[0], confidence)}, - {weights_}, - lb[1], - ub[1], - expectedTail[1]); + {weights_}, lb[1], ub[1], expectedTail[1]); expectedProbability[0] = (lb[0] + ub[0]) / 2.0; expectedProbability[1] = (lb[1] + ub[1]) / 2.0; } @@ -1039,8 +1151,12 @@ void CTimeSeriesModelTest::testProbability() { .addWeights(weights[i]); bool conditional; TSize1Vec mostAnomalousCorrelate; - models[0].probability(params, time_, {sample}, probability[0], tail[0], conditional, mostAnomalousCorrelate); - models[1].probability(params, time_, {sample}, probability[1], tail[1], conditional, mostAnomalousCorrelate); + models[0].probability(params, time_, {sample}, + probability[0], tail[0], conditional, + mostAnomalousCorrelate); + models[1].probability(params, time_, {sample}, + probability[1], tail[1], conditional, + mostAnomalousCorrelate); } CPPUNIT_ASSERT_EQUAL(expectedProbability[0], probability[0]); @@ -1056,31 +1172,34 @@ void CTimeSeriesModelTest::testProbability() { LOG_DEBUG(<< "Multivariate"); { maths::CMultivariateTimeSeriesModel models[]{ - maths::CMultivariateTimeSeriesModel{ - modelParams(bucketLength), maths::CTimeSeriesDecompositionStub{}, multivariateNormal(), 0, false}, maths::CMultivariateTimeSeriesModel{modelParams(bucketLength), - maths::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}, - multivariateNormal(), - nullptr, - false}}; + maths::CTimeSeriesDecompositionStub{}, + multivariateNormal(), 0, false}, + maths::CMultivariateTimeSeriesModel{ + modelParams(bucketLength), + maths::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}, + multivariateNormal(), nullptr, false}}; TDoubleVecVec samples; - rng.generateMultivariateNormalSamples({10.0, 15.0, 11.0}, {{3.0, 2.9, 0.5}, {2.9, 2.6, 0.1}, {0.5, 0.1, 2.0}}, 1000, samples); + rng.generateMultivariateNormalSamples( + {10.0, 15.0, 11.0}, + {{3.0, 2.9, 0.5}, {2.9, 2.6, 0.1}, {0.5, 0.1, 2.0}}, 1000, samples); core_t::TTime time{0}; const TDouble2Vec4VecVec weight{maths::CConstantWeights::unit(3)}; - for (const auto& sample : samples) - { + for (const auto& sample : samples) { TDouble2Vec sample_(sample); - models[0].addSamples(addSampleParams(weight), {core::make_triple(time, sample_, TAG)}); + models[0].addSamples(addSampleParams(weight), + {core::make_triple(time, sample_, TAG)}); - double trend{5.0 + 5.0 * std::sin(boost::math::double_constants::two_pi - * static_cast(time) / 86400.0)}; + double trend{5.0 + 5.0 * std::sin(boost::math::double_constants::two_pi * + static_cast(time) / 86400.0)}; for (auto& component : sample_) { component += trend; } - models[1].addSamples(addSampleParams(weight), {core::make_triple(time, sample_, TAG)}); + models[1].addSamples(addSampleParams(weight), + {core::make_triple(time, sample_, TAG)}); time += bucketLength; } @@ -1088,11 +1207,14 @@ void CTimeSeriesModelTest::testProbability() { TTime2Vec1Vec time_{{time}}; TDouble2Vec sample{15.0, 14.0, 16.0}; - maths_t::EProbabilityCalculation calculations[]{maths_t::E_TwoSided, maths_t::E_OneSidedAbove}; + maths_t::EProbabilityCalculation calculations[]{maths_t::E_TwoSided, + maths_t::E_OneSidedAbove}; double confidences[]{0.0, 20.0, 50.0}; bool empties[]{true, false}; - maths_t::TWeightStyleVec weightStyles[]{{maths_t::E_SampleCountVarianceScaleWeight}, - {maths_t::E_SampleCountVarianceScaleWeight, maths_t::E_SampleSeasonalVarianceScaleWeight}}; + maths_t::TWeightStyleVec weightStyles[]{ + {maths_t::E_SampleCountVarianceScaleWeight}, + {maths_t::E_SampleCountVarianceScaleWeight, + maths_t::E_SampleSeasonalVarianceScaleWeight}}; TDouble2Vec4Vec weights[]{{{0.9, 0.9, 0.8}}, {{1.1, 1.0, 1.2}, {1.8, 1.7, 1.6}}}; for (auto calculation : calculations) { @@ -1102,7 +1224,8 @@ void CTimeSeriesModelTest::testProbability() { for (auto empty : empties) { LOG_DEBUG(<< " empty = " << empty); for (std::size_t i = 0u; i < boost::size(weightStyles); ++i) { - LOG_DEBUG(<< " weights = " << core::CContainerPrinter::print(weights[i])); + LOG_DEBUG(<< " weights = " + << core::CContainerPrinter::print(weights[i])); double expectedProbability[2]; TTail10Vec expectedTail[2]; { @@ -1112,13 +1235,16 @@ void CTimeSeriesModelTest::testProbability() { } double lb[2], ub[2]; models[0].residualModel().probabilityOfLessLikelySamples( - calculation, weightStyles[i], {TDouble10Vec(sample)}, {weights_}, lb[0], ub[0], expectedTail[0]); + calculation, weightStyles[i], {TDouble10Vec(sample)}, + {weights_}, lb[0], ub[0], expectedTail[0]); TDouble10Vec detrended; for (std::size_t j = 0u; j < sample.size(); ++j) { - detrended.push_back(models[1].trendModel()[j]->detrend(time, sample[j], confidence)); + detrended.push_back(models[1].trendModel()[j]->detrend( + time, sample[j], confidence)); } models[1].residualModel().probabilityOfLessLikelySamples( - calculation, weightStyles[i], {detrended}, {weights_}, lb[1], ub[1], expectedTail[1]); + calculation, weightStyles[i], {detrended}, + {weights_}, lb[1], ub[1], expectedTail[1]); expectedProbability[0] = (lb[0] + ub[0]) / 2.0; expectedProbability[1] = (lb[1] + ub[1]) / 2.0; } @@ -1134,8 +1260,12 @@ void CTimeSeriesModelTest::testProbability() { .addWeights(weights[i]); bool conditional; TSize1Vec mostAnomalousCorrelate; - models[0].probability(params, time_, {sample}, probability[0], tail[0], conditional, mostAnomalousCorrelate); - models[1].probability(params, time_, {sample}, probability[1], tail[1], conditional, mostAnomalousCorrelate); + models[0].probability(params, time_, {sample}, + probability[0], tail[0], conditional, + mostAnomalousCorrelate); + models[1].probability(params, time_, {sample}, + probability[1], tail[1], conditional, + mostAnomalousCorrelate); } CPPUNIT_ASSERT_EQUAL(expectedProbability[0], probability[0]); @@ -1153,7 +1283,8 @@ void CTimeSeriesModelTest::testProbability() { LOG_DEBUG(<< "Anomalies"); { maths::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; - maths::CUnivariateTimeSeriesModel model{modelParams(bucketLength), 1, trend, univariateNormal()}; + maths::CUnivariateTimeSeriesModel model{modelParams(bucketLength), 1, + trend, univariateNormal()}; TSizeVec anomalies; rng.generateUniformSamples(100, 1000, 10, anomalies); @@ -1167,32 +1298,33 @@ void CTimeSeriesModelTest::testProbability() { TDouble2Vec4VecVec weights{weight}; std::size_t bucket{0}; core_t::TTime time{0}; - for (auto sample : samples) - { + for (auto sample : samples) { if (std::binary_search(anomalies.begin(), anomalies.end(), bucket++)) { sample += 10.0; } - model.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec{sample}, TAG)}); + model.addSamples(addSampleParams(weights), + {core::make_triple(time, TDouble2Vec{sample}, TAG)}); TTail2Vec tail; double probability; bool conditional; TSize1Vec mostAnomalousCorrelate; - model.probability( - computeProbabilityParams(weight), {{time}}, {{sample}}, probability, tail, conditional, mostAnomalousCorrelate); + model.probability(computeProbabilityParams(weight), {{time}}, {{sample}}, + probability, tail, conditional, mostAnomalousCorrelate); smallest.add({probability, bucket - 1}); time += bucketLength; } TSizeVec anomalies_; - std::transform( - smallest.begin(), smallest.end(), std::back_inserter(anomalies_), [](const TDoubleSizePr& value) { return value.second; }); + std::transform(smallest.begin(), smallest.end(), std::back_inserter(anomalies_), + [](const TDoubleSizePr& value) { return value.second; }); std::sort(anomalies_.begin(), anomalies_.end()); LOG_DEBUG(<< "expected anomalies = " << core::CContainerPrinter::print(anomalies)); LOG_DEBUG(<< "actual anomalies = " << core::CContainerPrinter::print(anomalies_)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(anomalies), core::CContainerPrinter::print(anomalies_)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(anomalies), + core::CContainerPrinter::print(anomalies_)); } } @@ -1223,24 +1355,30 @@ void CTimeSeriesModelTest::testWeights() { TDouble2Vec4VecVec weights{{{1.0}}}; core_t::TTime time{0}; for (auto sample : samples) { - double scale{10.0 + 5.0 * std::sin(boost::math::double_constants::two_pi - * static_cast(time) / 86400.0)}; + double scale{10.0 + 5.0 * std::sin(boost::math::double_constants::two_pi * + static_cast(time) / 86400.0)}; sample = scale * (1.0 + 0.1 * sample); - model.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec{sample}, TAG)}); + model.addSamples(addSampleParams(weights), + {core::make_triple(time, TDouble2Vec{sample}, TAG)}); time += bucketLength; } LOG_DEBUG(<< "Seasonal"); TMeanAccumulator error; for (core_t::TTime time_ = time; time_ < time + 86400; time_ += 3600) { - double dataScale{std::pow(1.0 + 0.5 * std::sin(boost::math::double_constants::two_pi - * static_cast(time_) / 86400.0), - 2.0)}; - - double expectedScale{model.trendModel().scale(time_, model.residualModel().marginalLikelihoodVariance(), 0.0).second}; + double dataScale{std::pow( + 1.0 + 0.5 * std::sin(boost::math::double_constants::two_pi * + static_cast(time_) / 86400.0), + 2.0)}; + + double expectedScale{ + model.trendModel() + .scale(time_, model.residualModel().marginalLikelihoodVariance(), 0.0) + .second}; double scale{model.seasonalWeight(0.0, time_)[0]}; - LOG_DEBUG(<< "expected weight = " << expectedScale << ", weight = " << scale << " (data weight = " << dataScale << ")"); + LOG_DEBUG(<< "expected weight = " << expectedScale << ", weight = " << scale + << " (data weight = " << dataScale << ")"); CPPUNIT_ASSERT_EQUAL(std::max(expectedScale, MINIMUM_SEASONAL_SCALE), scale); error.add(std::fabs(scale - dataScale) / dataScale); @@ -1267,33 +1405,39 @@ void CTimeSeriesModelTest::testWeights() { maths::CMultivariateTimeSeriesModel model{modelParams(bucketLength), trend, prior}; TDoubleVecVec samples; - rng.generateMultivariateNormalSamples({10.0, 15.0, 11.0}, {{3.0, 2.9, 0.5}, {2.9, 2.6, 0.1}, {0.5, 0.1, 2.0}}, 1008, samples); + rng.generateMultivariateNormalSamples( + {10.0, 15.0, 11.0}, + {{3.0, 2.9, 0.5}, {2.9, 2.6, 0.1}, {0.5, 0.1, 2.0}}, 1008, samples); TDouble2Vec4VecVec weights{{{1.0, 1.0, 1.0}}}; core_t::TTime time{0}; for (auto& sample : samples) { - double scale{10.0 + 5.0 * std::sin(boost::math::double_constants::two_pi - * static_cast(time) / 86400.0)}; - for (auto& component : sample) - { + double scale{10.0 + 5.0 * std::sin(boost::math::double_constants::two_pi * + static_cast(time) / 86400.0)}; + for (auto& component : sample) { component = scale * (1.0 + 0.1 * component); } - model.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec(sample), TAG)}); + model.addSamples(addSampleParams(weights), + {core::make_triple(time, TDouble2Vec(sample), TAG)}); time += bucketLength; } LOG_DEBUG(<< "Seasonal"); TMeanAccumulator error; for (core_t::TTime time_ = time; time_ < time + 86400; time_ += 3600) { - double dataScale{std::pow(1.0 + 0.5 * std::sin(boost::math::double_constants::two_pi - * static_cast(time_) / 86400.0), - 2.0)}; + double dataScale{std::pow( + 1.0 + 0.5 * std::sin(boost::math::double_constants::two_pi * + static_cast(time_) / 86400.0), + 2.0)}; for (std::size_t i = 0u; i < 3; ++i) { double expectedScale{ - model.trendModel()[i]->scale(time_, model.residualModel().marginalLikelihoodVariances()[i], 0.0).second}; + model.trendModel()[i] + ->scale(time_, model.residualModel().marginalLikelihoodVariances()[i], 0.0) + .second}; double scale{model.seasonalWeight(0.0, time_)[i]}; - LOG_DEBUG(<< "expected weight = " << expectedScale << ", weight = " << scale << " (data weight = " << dataScale << ")"); + LOG_DEBUG(<< "expected weight = " << expectedScale << ", weight = " << scale + << " (data weight = " << dataScale << ")"); CPPUNIT_ASSERT_EQUAL(std::max(expectedScale, MINIMUM_SEASONAL_SCALE), scale); error.add(std::fabs(scale - dataScale) / dataScale); } @@ -1329,22 +1473,27 @@ void CTimeSeriesModelTest::testMemoryUsage() { { maths::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; auto controllers = decayRateControllers(1); - boost::scoped_ptr model{ - new maths::CUnivariateTimeSeriesModel{modelParams(bucketLength), 0, trend, univariateNormal(), &controllers}}; + boost::scoped_ptr model{new maths::CUnivariateTimeSeriesModel{ + modelParams(bucketLength), 0, trend, univariateNormal(), &controllers}}; TDoubleVec samples; rng.generateNormalSamples(1.0, 4.0, 1000, samples); TDouble2Vec4VecVec weights{{{1.0}}}; core_t::TTime time{0}; for (auto sample : samples) { - sample += 10.0 + 5.0 * std::sin(boost::math::double_constants::two_pi * static_cast(time) / 86400.0); + sample += 10.0 + 5.0 * std::sin(boost::math::double_constants::two_pi * + static_cast(time) / 86400.0); trend.addPoint(time, sample); - model->addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec{sample}, TAG)}); + model->addSamples(addSampleParams(weights), + {core::make_triple(time, TDouble2Vec{sample}, TAG)}); time += bucketLength; } - std::size_t expectedSize{sizeof(maths::CTimeSeriesDecomposition) + trend.memoryUsage() + sizeof(maths::CNormalMeanPrecConjugate) + - sizeof(maths::CUnivariateTimeSeriesModel::TDecayRateController2Ary) + 2 * controllers[0].memoryUsage()}; + std::size_t expectedSize{ + sizeof(maths::CTimeSeriesDecomposition) + trend.memoryUsage() + + sizeof(maths::CNormalMeanPrecConjugate) + + sizeof(maths::CUnivariateTimeSeriesModel::TDecayRateController2Ary) + + 2 * controllers[0].memoryUsage()}; std::size_t size = model->memoryUsage(); LOG_DEBUG(<< "size " << size << " expected " << expectedSize); CPPUNIT_ASSERT(size < 1.1 * expectedSize); @@ -1360,23 +1509,27 @@ void CTimeSeriesModelTest::testMemoryUsage() { maths::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; maths::CMultivariateNormalConjugate<3> prior{multivariateNormal()}; auto controllers = decayRateControllers(3); - boost::scoped_ptr model{ - new maths::CMultivariateTimeSeriesModel{modelParams(bucketLength), trend, prior, &controllers}}; + boost::scoped_ptr model{new maths::CMultivariateTimeSeriesModel{ + modelParams(bucketLength), trend, prior, &controllers}}; TDouble2Vec4VecVec weights{maths::CConstantWeights::unit(3)}; core_t::TTime time{0}; for (auto& sample : samples) { for (auto& coordinate : sample) { - coordinate += 10.0 + 5.0 * std::sin(boost::math::double_constants::two_pi * static_cast(time) / 86400.0); + coordinate += 10.0 + 5.0 * std::sin(boost::math::double_constants::two_pi * + static_cast(time) / 86400.0); } trend.addPoint(time, sample[0]); - model->addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec(sample), TAG)}); + model->addSamples(addSampleParams(weights), + {core::make_triple(time, TDouble2Vec(sample), TAG)}); time += bucketLength; } - std::size_t expectedSize{3 * sizeof(maths::CTimeSeriesDecomposition) + 3 * trend.memoryUsage() + - sizeof(maths::CMultivariateNormalConjugate<3>) + - sizeof(maths::CUnivariateTimeSeriesModel::TDecayRateController2Ary) + 2 * controllers[0].memoryUsage()}; + std::size_t expectedSize{ + 3 * sizeof(maths::CTimeSeriesDecomposition) + 3 * trend.memoryUsage() + + sizeof(maths::CMultivariateNormalConjugate<3>) + + sizeof(maths::CUnivariateTimeSeriesModel::TDecayRateController2Ary) + + 2 * controllers[0].memoryUsage()}; std::size_t size = model->memoryUsage(); LOG_DEBUG(<< "size " << size << " expected " << expectedSize); CPPUNIT_ASSERT(size < 1.1 * expectedSize); @@ -1401,14 +1554,16 @@ void CTimeSeriesModelTest::testPersist() { { maths::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; auto controllers = decayRateControllers(1); - maths::CUnivariateTimeSeriesModel origModel{params, 1, trend, univariateNormal(), &controllers}; + maths::CUnivariateTimeSeriesModel origModel{ + params, 1, trend, univariateNormal(), &controllers}; TDoubleVec samples; rng.generateNormalSamples(1.0, 4.0, 1000, samples); TDouble2Vec4VecVec weights{{{1.0}}}; core_t::TTime time{0}; for (auto sample : samples) { - origModel.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec{sample}, TAG)}); + origModel.addSamples(addSampleParams(weights), + {core::make_triple(time, TDouble2Vec{sample}, TAG)}); time += bucketLength; } @@ -1427,8 +1582,10 @@ void CTimeSeriesModelTest::testPersist() { CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - maths::SDistributionRestoreParams distributionParams{maths_t::E_ContinuousData, DECAY_RATE}; - maths::STimeSeriesDecompositionRestoreParams decompositionParams{24.0 * DECAY_RATE, bucketLength, distributionParams}; + maths::SDistributionRestoreParams distributionParams{maths_t::E_ContinuousData, + DECAY_RATE}; + maths::STimeSeriesDecompositionRestoreParams decompositionParams{ + 24.0 * DECAY_RATE, bucketLength, distributionParams}; maths::SModelRestoreParams restoreParams{params, decompositionParams, distributionParams}; maths::CUnivariateTimeSeriesModel restoredModel{restoreParams, traverser}; @@ -1438,17 +1595,21 @@ void CTimeSeriesModelTest::testPersist() { LOG_DEBUG(<< "Multivariate"); { TDoubleVecVec samples; - rng.generateMultivariateNormalSamples({11.0, 10.0, 12.0}, {{4.0, 2.9, 0.5}, {2.9, 2.6, 0.1}, {0.5, 0.1, 2.0}}, 1000, samples); + rng.generateMultivariateNormalSamples( + {11.0, 10.0, 12.0}, + {{4.0, 2.9, 0.5}, {2.9, 2.6, 0.1}, {0.5, 0.1, 2.0}}, 1000, samples); maths::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; maths::CMultivariateNormalConjugate<3> prior{multivariateNormal()}; auto controllers = decayRateControllers(3); - maths::CMultivariateTimeSeriesModel origModel{modelParams(bucketLength), trend, prior, &controllers}; + maths::CMultivariateTimeSeriesModel origModel{modelParams(bucketLength), + trend, prior, &controllers}; TDouble2Vec4VecVec weights{maths::CConstantWeights::unit(3)}; core_t::TTime time{0}; for (const auto& sample : samples) { - origModel.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec(sample), TAG)}); + origModel.addSamples(addSampleParams(weights), + {core::make_triple(time, TDouble2Vec(sample), TAG)}); time += bucketLength; } @@ -1467,8 +1628,10 @@ void CTimeSeriesModelTest::testPersist() { CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - maths::SDistributionRestoreParams distributionParams{maths_t::E_ContinuousData, DECAY_RATE}; - maths::STimeSeriesDecompositionRestoreParams decompositionParams{24.0 * DECAY_RATE, bucketLength, distributionParams}; + maths::SDistributionRestoreParams distributionParams{maths_t::E_ContinuousData, + DECAY_RATE}; + maths::STimeSeriesDecompositionRestoreParams decompositionParams{ + 24.0 * DECAY_RATE, bucketLength, distributionParams}; maths::SModelRestoreParams restoreParams{params, decompositionParams, distributionParams}; maths::CMultivariateTimeSeriesModel restoredModel{restoreParams, traverser}; @@ -1519,20 +1682,25 @@ void CTimeSeriesModelTest::testUpgrade() { CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(xml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - maths::SDistributionRestoreParams distributionParams{maths_t::E_ContinuousData, DECAY_RATE}; - maths::STimeSeriesDecompositionRestoreParams decompositionParams{24.0 * DECAY_RATE, bucketLength, distributionParams}; + maths::SDistributionRestoreParams distributionParams{maths_t::E_ContinuousData, + DECAY_RATE}; + maths::STimeSeriesDecompositionRestoreParams decompositionParams{ + 24.0 * DECAY_RATE, bucketLength, distributionParams}; maths::SModelRestoreParams restoreParams{params, decompositionParams, distributionParams}; maths::CUnivariateTimeSeriesModel restoredModel{restoreParams, traverser}; TStrVec expectedInterval; TStrVec interval; - for (core_t::TTime time = 600000, i = 0; i < static_cast(expectedIntervals.size()); time += halfHour, ++i) { + for (core_t::TTime time = 600000, i = 0; + i < static_cast(expectedIntervals.size()); + time += halfHour, ++i) { expectedInterval.clear(); interval.clear(); core::CStringUtils::tokenise(",", expectedIntervals[i], expectedInterval, empty); std::string interval_{core::CContainerPrinter::print(restoredModel.confidenceInterval( - time, 90.0, maths::CConstantWeights::COUNT, maths::CConstantWeights::unit(1)))}; + time, 90.0, maths::CConstantWeights::COUNT, + maths::CConstantWeights::unit(1)))}; core::CStringUtils::replace("[", "", interval_); core::CStringUtils::replace("]", "", interval_); core::CStringUtils::replace(" ", "", interval_); @@ -1542,7 +1710,8 @@ void CTimeSeriesModelTest::testUpgrade() { CPPUNIT_ASSERT_EQUAL(expectedInterval.size(), interval.size()); for (std::size_t j = 0u; j < expectedInterval.size(); ++j) { CPPUNIT_ASSERT_DOUBLES_EQUAL( - boost::lexical_cast(expectedInterval[j]), boost::lexical_cast(interval[j]), 0.0001); + boost::lexical_cast(expectedInterval[j]), + boost::lexical_cast(interval[j]), 0.0001); } } } @@ -1563,20 +1732,25 @@ void CTimeSeriesModelTest::testUpgrade() { CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(xml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - maths::SDistributionRestoreParams distributionParams{maths_t::E_ContinuousData, DECAY_RATE}; - maths::STimeSeriesDecompositionRestoreParams decompositionParams{24.0 * DECAY_RATE, bucketLength, distributionParams}; + maths::SDistributionRestoreParams distributionParams{maths_t::E_ContinuousData, + DECAY_RATE}; + maths::STimeSeriesDecompositionRestoreParams decompositionParams{ + 24.0 * DECAY_RATE, bucketLength, distributionParams}; maths::SModelRestoreParams restoreParams{params, decompositionParams, distributionParams}; maths::CMultivariateTimeSeriesModel restoredModel{restoreParams, traverser}; TStrVec expectedInterval; TStrVec interval; - for (core_t::TTime time = 600000, i = 0; i < static_cast(expectedIntervals.size()); time += halfHour, ++i) { + for (core_t::TTime time = 600000, i = 0; + i < static_cast(expectedIntervals.size()); + time += halfHour, ++i) { expectedInterval.clear(); interval.clear(); core::CStringUtils::tokenise(",", expectedIntervals[i], expectedInterval, empty); std::string interval_{core::CContainerPrinter::print(restoredModel.confidenceInterval( - time, 90.0, maths::CConstantWeights::COUNT, maths::CConstantWeights::unit(3)))}; + time, 90.0, maths::CConstantWeights::COUNT, + maths::CConstantWeights::unit(3)))}; core::CStringUtils::replace("[", "", interval_); core::CStringUtils::replace("]", "", interval_); core::CStringUtils::replace(" ", "", interval_); @@ -1586,7 +1760,8 @@ void CTimeSeriesModelTest::testUpgrade() { CPPUNIT_ASSERT_EQUAL(expectedInterval.size(), interval.size()); for (std::size_t j = 0u; j < expectedInterval.size(); ++j) { CPPUNIT_ASSERT_DOUBLES_EQUAL( - boost::lexical_cast(expectedInterval[j]), boost::lexical_cast(interval[j]), 0.0001); + boost::lexical_cast(expectedInterval[j]), + boost::lexical_cast(interval[j]), 0.0001); } } } @@ -1605,13 +1780,15 @@ void CTimeSeriesModelTest::testAddSamplesWithCorrelations() { { TDoubleVecVec samples; - rng.generateMultivariateNormalSamples({10.0, 15.0}, {{3.0, 2.9}, {2.9, 2.6}}, 1000, samples); + rng.generateMultivariateNormalSamples({10.0, 15.0}, {{3.0, 2.9}, {2.9, 2.6}}, + 1000, samples); maths::CTimeSeriesDecomposition trend{DECAY_RATE, bucketLength}; maths::CTimeSeriesCorrelations correlations{MINIMUM_SIGNIFICANT_CORRELATION, DECAY_RATE}; maths::CNormalMeanPrecConjugate prior{univariateNormal()}; - maths::CUnivariateTimeSeriesModel models[]{{modelParams(bucketLength), 0, trend, prior, nullptr}, - {modelParams(bucketLength), 1, trend, prior, nullptr}}; + maths::CUnivariateTimeSeriesModel models[]{ + {modelParams(bucketLength), 0, trend, prior, nullptr}, + {modelParams(bucketLength), 1, trend, prior, nullptr}}; models[0].modelCorrelations(correlations); models[1].modelCorrelations(correlations); CTimeSeriesCorrelateModelAllocator allocator; @@ -1620,8 +1797,10 @@ void CTimeSeriesModelTest::testAddSamplesWithCorrelations() { core_t::TTime time{0}; for (auto sample : samples) { correlations.refresh(allocator); - models[0].addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec{sample[0]}, TAG)}); - models[1].addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec{sample[1]}, TAG)}); + models[0].addSamples(addSampleParams(weights), + {core::make_triple(time, TDouble2Vec{sample[0]}, TAG)}); + models[1].addSamples(addSampleParams(weights), + {core::make_triple(time, TDouble2Vec{sample[1]}, TAG)}); correlations.processSamples(maths::CConstantWeights::COUNT); time += bucketLength; } @@ -1662,7 +1841,8 @@ void CTimeSeriesModelTest::testAnomalyModel() { core_t::TTime bucketLength{600}; maths::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; - maths::CUnivariateTimeSeriesModel model{modelParams(bucketLength), 1, trend, univariateNormal()}; + maths::CUnivariateTimeSeriesModel model{modelParams(bucketLength), 1, + trend, univariateNormal()}; //std::ofstream file; //file.open("results.m"); @@ -1680,14 +1860,15 @@ void CTimeSeriesModelTest::testAnomalyModel() { if (bucket >= length - 100 && bucket < length - 92) { sample += 8.0; } - model.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec{sample}, TAG)}); + model.addSamples(addSampleParams(weights), + {core::make_triple(time, TDouble2Vec{sample}, TAG)}); TTail2Vec tail; double probability; bool conditional; TSize1Vec mostAnomalousCorrelate; - model.probability( - computeProbabilityParams(weight), {{time}}, {{sample}}, probability, tail, conditional, mostAnomalousCorrelate); + model.probability(computeProbabilityParams(weight), {{time}}, {{sample}}, + probability, tail, conditional, mostAnomalousCorrelate); mostAnomalous.add({std::log(probability), bucket}); //scores.push_back(maths::CTools::deviation(probability)); @@ -1703,10 +1884,14 @@ void CTimeSeriesModelTest::testAnomalyModel() { anomalyProbabilities.push_back(std::exp(anomaly.first)); } LOG_DEBUG(<< "anomalies = " << core::CContainerPrinter::print(anomalyBuckets)); - LOG_DEBUG(<< "probabilities = " << core::CContainerPrinter::print(anomalyProbabilities)); - CPPUNIT_ASSERT(std::find(anomalyBuckets.begin(), anomalyBuckets.end(), 1905) != anomalyBuckets.end()); - CPPUNIT_ASSERT(std::find(anomalyBuckets.begin(), anomalyBuckets.end(), 1906) != anomalyBuckets.end()); - CPPUNIT_ASSERT(std::find(anomalyBuckets.begin(), anomalyBuckets.end(), 1907) != anomalyBuckets.end()); + LOG_DEBUG(<< "probabilities = " + << core::CContainerPrinter::print(anomalyProbabilities)); + CPPUNIT_ASSERT(std::find(anomalyBuckets.begin(), anomalyBuckets.end(), + 1905) != anomalyBuckets.end()); + CPPUNIT_ASSERT(std::find(anomalyBuckets.begin(), anomalyBuckets.end(), + 1906) != anomalyBuckets.end()); + CPPUNIT_ASSERT(std::find(anomalyBuckets.begin(), anomalyBuckets.end(), + 1907) != anomalyBuckets.end()); //file << "v = " << core::CContainerPrinter::print(samples) << ";\n"; //file << "s = " << core::CContainerPrinter::print(scores) << ";\n"; @@ -1724,7 +1909,9 @@ void CTimeSeriesModelTest::testAnomalyModel() { std::sort(anomalies.begin(), anomalies.end()); core_t::TTime bucketLength{600}; TDoubleVecVec samples; - rng.generateMultivariateNormalSamples({10.0, 10.0, 10.0}, {{4.0, 0.9, 0.5}, {0.9, 2.6, 0.1}, {0.5, 0.1, 3.0}}, length, samples); + rng.generateMultivariateNormalSamples( + {10.0, 10.0, 10.0}, + {{4.0, 0.9, 0.5}, {0.9, 2.6, 0.1}, {0.5, 0.1, 3.0}}, length, samples); maths::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; maths::CMultivariateNormalConjugate<3> prior{multivariateNormal()}; @@ -1749,14 +1936,15 @@ void CTimeSeriesModelTest::testAnomalyModel() { } } ++bucket; - model.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec(sample), TAG)}); + model.addSamples(addSampleParams(weights), + {core::make_triple(time, TDouble2Vec(sample), TAG)}); TTail2Vec tail; double probability; bool conditional; TSize1Vec mostAnomalousCorrelate; - model.probability( - computeProbabilityParams(weight), {{time}}, {(sample)}, probability, tail, conditional, mostAnomalousCorrelate); + model.probability(computeProbabilityParams(weight), {{time}}, {(sample)}, + probability, tail, conditional, mostAnomalousCorrelate); mostAnomalous.add({std::log(probability), bucket}); //scores.push_back(maths::CTools::deviation(probability)); @@ -1772,8 +1960,10 @@ void CTimeSeriesModelTest::testAnomalyModel() { anomalyProbabilities.push_back(std::exp(anomaly.first)); } LOG_DEBUG(<< "anomalies = " << core::CContainerPrinter::print(anomalyBuckets)); - LOG_DEBUG(<< "probabilities = " << core::CContainerPrinter::print(anomalyProbabilities)); - CPPUNIT_ASSERT(std::find(anomalyBuckets.begin(), anomalyBuckets.end(), 1908) != anomalyBuckets.end()); + LOG_DEBUG(<< "probabilities = " + << core::CContainerPrinter::print(anomalyProbabilities)); + CPPUNIT_ASSERT(std::find(anomalyBuckets.begin(), anomalyBuckets.end(), + 1908) != anomalyBuckets.end()); //file << "v = ["; //for (const auto &sample : samples) @@ -1807,7 +1997,8 @@ void CTimeSeriesModelTest::testStepChangeDiscontinuities() { using TDouble3VecVec = std::vector; TDouble2Vec4VecVec weight{{{1.0}}}; - auto updateModel = [&](core_t::TTime time, double value, maths::CUnivariateTimeSeriesModel& model) { + auto updateModel = [&](core_t::TTime time, double value, + maths::CUnivariateTimeSeriesModel& model) { weight[0][0] = model.winsorisationWeight(0.0, time, {value}); model.addSamples(addSampleParams(1.0, {maths_t::E_SampleWinsorisationWeight}, weight), {core::make_triple(time, TDouble2Vec{value}, TAG)}); @@ -1834,7 +2025,9 @@ void CTimeSeriesModelTest::testStepChangeDiscontinuities() { core_t::TTime bucketLength{600}; maths::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; auto controllers = decayRateControllers(1); - maths::CUnivariateTimeSeriesModel model{modelParams(bucketLength), 0, trend, univariateNormal(DECAY_RATE / 3.0), &controllers}; + maths::CUnivariateTimeSeriesModel model{modelParams(bucketLength), 0, trend, + univariateNormal(DECAY_RATE / 3.0), + &controllers}; // Add some data to the model. @@ -1843,7 +2036,8 @@ void CTimeSeriesModelTest::testStepChangeDiscontinuities() { double level{20.0}; for (auto dl : {10.0, 20.0, 15.0, 50.0, 30.0, 40.0, 15.0, 40.0, 25.0}) { level += dl; - rng.generateNormalSamples(level, 2.0, 300 + static_cast(2.0 * dl), samples); + rng.generateNormalSamples( + level, 2.0, 300 + static_cast(2.0 * dl), samples); for (auto sample : samples) { updateModel(time, sample, model); //updateTestDebug(time, sample, model); @@ -1864,7 +2058,8 @@ void CTimeSeriesModelTest::testStepChangeDiscontinuities() { rng.generateNormalSamples(level, 2.0, 260, expected); for (auto dl : {25.0, 40.0}) { level += dl; - rng.generateNormalSamples(level, 2.0, 300 + static_cast(2.0 * dl), samples); + rng.generateNormalSamples( + level, 2.0, 300 + static_cast(2.0 * dl), samples); expected.insert(expected.end(), samples.begin(), samples.end()); } //std::for_each(expected.begin(), expected.end(), @@ -1877,23 +2072,27 @@ void CTimeSeriesModelTest::testStepChangeDiscontinuities() { //file << "y = ["; TDouble3VecVec forecast; auto pushErrorBar = [&](const maths::SErrorBar& errorBar) { - forecast.push_back({errorBar.s_LowerBound, errorBar.s_Predicted, errorBar.s_UpperBound}); + forecast.push_back({errorBar.s_LowerBound, errorBar.s_Predicted, + errorBar.s_UpperBound}); //file << errorBar.s_LowerBound << "," // << errorBar.s_Predicted << "," // << errorBar.s_UpperBound << std::endl; }; std::string m; - model.forecast(time, time + 800 * bucketLength, 90.0, {-1000.0}, {1000.0}, pushErrorBar, m); + model.forecast(time, time + 800 * bucketLength, 90.0, {-1000.0}, + {1000.0}, pushErrorBar, m); //file << "];"; double outOfBounds{0.0}; for (std::size_t i = 0u; i < forecast.size(); ++i) { CPPUNIT_ASSERT_DOUBLES_EQUAL(expected[i], forecast[i][1], 0.1 * expected[i]); - outOfBounds += static_cast(expected[i] < forecast[i][0] || expected[i] > forecast[i][2]); + outOfBounds += static_cast(expected[i] < forecast[i][0] || + expected[i] > forecast[i][2]); } - double percentageOutOfBounds{100.0 * outOfBounds / static_cast(forecast.size())}; + double percentageOutOfBounds{100.0 * outOfBounds / + static_cast(forecast.size())}; LOG_DEBUG("% out-of-bounds = " << percentageOutOfBounds); CPPUNIT_ASSERT(percentageOutOfBounds < 1.0); } @@ -1903,7 +2102,8 @@ void CTimeSeriesModelTest::testStepChangeDiscontinuities() { core_t::TTime bucketLength{1800}; maths::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; auto controllers = decayRateControllers(1); - maths::CUnivariateTimeSeriesModel model{modelParams(bucketLength), 0, trend, univariateNormal(), &controllers}; + maths::CUnivariateTimeSeriesModel model{modelParams(bucketLength), 0, trend, + univariateNormal(), &controllers}; // Add some data to the model. @@ -1953,22 +2153,26 @@ void CTimeSeriesModelTest::testStepChangeDiscontinuities() { //file << "y = ["; TDouble3VecVec forecast; auto pushErrorBar = [&](const maths::SErrorBar& errorBar) { - forecast.push_back({errorBar.s_LowerBound, errorBar.s_Predicted, errorBar.s_UpperBound}); + forecast.push_back({errorBar.s_LowerBound, errorBar.s_Predicted, + errorBar.s_UpperBound}); //file << errorBar.s_LowerBound << "," // << errorBar.s_Predicted << "," // << errorBar.s_UpperBound << std::endl; }; std::string m; - model.forecast(time, time + 2000 * bucketLength, 90.0, {-1000.0}, {1000.0}, pushErrorBar, m); + model.forecast(time, time + 2000 * bucketLength, 90.0, {-1000.0}, + {1000.0}, pushErrorBar, m); //file << "];"; double outOfBounds{0.0}; for (std::size_t i = 0u; i < forecast.size(); ++i) { - outOfBounds += static_cast(expected[i] < forecast[i][0] || expected[i] > forecast[i][2]); + outOfBounds += static_cast(expected[i] < forecast[i][0] || + expected[i] > forecast[i][2]); } - double percentageOutOfBounds{100.0 * outOfBounds / static_cast(forecast.size())}; + double percentageOutOfBounds{100.0 * outOfBounds / + static_cast(forecast.size())}; LOG_DEBUG("% out-of-bounds = " << percentageOutOfBounds); CPPUNIT_ASSERT(percentageOutOfBounds < 5.0); } @@ -1985,7 +2189,8 @@ void CTimeSeriesModelTest::testLinearScaling() { // 2) linearly scale up the same periodic pattern. TDouble2Vec4VecVec weight{{{1.0}}}; - auto updateModel = [&](core_t::TTime time, double value, maths::CUnivariateTimeSeriesModel& model) { + auto updateModel = [&](core_t::TTime time, double value, + maths::CUnivariateTimeSeriesModel& model) { weight[0][0] = model.winsorisationWeight(0.0, time, {value}); model.addSamples(addSampleParams(1.0, {maths_t::E_SampleWinsorisationWeight}, weight), {core::make_triple(time, TDouble2Vec{value}, TAG)}); @@ -2012,7 +2217,8 @@ void CTimeSeriesModelTest::testLinearScaling() { core_t::TTime bucketLength{600}; maths::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; auto controllers = decayRateControllers(1); - maths::CUnivariateTimeSeriesModel model{modelParams(bucketLength), 0, trend, univariateNormal(DECAY_RATE / 3.0), &controllers}; + maths::CUnivariateTimeSeriesModel model{modelParams(bucketLength), 0, trend, + univariateNormal(DECAY_RATE / 3.0), &controllers}; core_t::TTime time{0}; TDoubleVec samples; @@ -2038,7 +2244,8 @@ void CTimeSeriesModelTest::testLinearScaling() { sample = 0.3 * (12.0 + 10.0 * smoothDaily(time) + sample); updateModel(time, sample, model); //updateTestDebug(time, sample, model); - auto x = model.confidenceInterval(time, 90.0, {maths_t::E_SampleCountWeight}, {{1.0}}); + auto x = model.confidenceInterval(time, 90.0, + {maths_t::E_SampleCountWeight}, {{1.0}}); CPPUNIT_ASSERT(::fabs(sample - x[1][0]) < 1.2 * std::sqrt(noiseVariance)); CPPUNIT_ASSERT(::fabs(x[2][0] - x[0][0]) < 3.3 * std::sqrt(noiseVariance)); time += bucketLength; @@ -2058,7 +2265,8 @@ void CTimeSeriesModelTest::testLinearScaling() { sample = 2.0 * (12.0 + 10.0 * smoothDaily(time)) + sample; updateModel(time, sample, model); //updateTestDebug(time, sample, model); - auto x = model.confidenceInterval(time, 90.0, {maths_t::E_SampleCountWeight}, {{1.0}}); + auto x = model.confidenceInterval(time, 90.0, + {maths_t::E_SampleCountWeight}, {{1.0}}); CPPUNIT_ASSERT(::fabs(sample - x[1][0]) < 3.1 * std::sqrt(noiseVariance)); CPPUNIT_ASSERT(::fabs(x[2][0] - x[0][0]) < 3.3 * std::sqrt(noiseVariance)); time += bucketLength; @@ -2076,7 +2284,8 @@ void CTimeSeriesModelTest::testDaylightSaving() { LOG_DEBUG("+--------------------------------------------+"); TDouble2Vec4VecVec weight{{{1.0}}}; - auto updateModel = [&](core_t::TTime time, double value, maths::CUnivariateTimeSeriesModel& model) { + auto updateModel = [&](core_t::TTime time, double value, + maths::CUnivariateTimeSeriesModel& model) { weight[0][0] = model.winsorisationWeight(0.0, time, {value}); model.addSamples(addSampleParams(1.0, {maths_t::E_SampleWinsorisationWeight}, weight), {core::make_triple(time, TDouble2Vec{value}, TAG)}); @@ -2104,7 +2313,8 @@ void CTimeSeriesModelTest::testDaylightSaving() { core_t::TTime bucketLength{600}; maths::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; auto controllers = decayRateControllers(1); - maths::CUnivariateTimeSeriesModel model{modelParams(bucketLength), 0, trend, univariateNormal(DECAY_RATE / 3.0), &controllers}; + maths::CUnivariateTimeSeriesModel model{modelParams(bucketLength), 0, trend, + univariateNormal(DECAY_RATE / 3.0), &controllers}; core_t::TTime time{0}; TDoubleVec samples; @@ -2131,7 +2341,8 @@ void CTimeSeriesModelTest::testDaylightSaving() { updateModel(time, sample, model); //updateTestDebug(time, sample, model); CPPUNIT_ASSERT_EQUAL(hour, model.trendModel().timeShift()); - auto x = model.confidenceInterval(time, 90.0, {maths_t::E_SampleCountWeight}, {{1.0}}); + auto x = model.confidenceInterval(time, 90.0, + {maths_t::E_SampleCountWeight}, {{1.0}}); CPPUNIT_ASSERT(::fabs(sample - x[1][0]) < 3.6 * std::sqrt(noiseVariance)); CPPUNIT_ASSERT(::fabs(x[2][0] - x[0][0]) < 3.6 * std::sqrt(noiseVariance)); time += bucketLength; @@ -2152,7 +2363,8 @@ void CTimeSeriesModelTest::testDaylightSaving() { updateModel(time, sample, model); //updateTestDebug(time, sample, model); CPPUNIT_ASSERT_EQUAL(core_t::TTime(0), model.trendModel().timeShift()); - auto x = model.confidenceInterval(time, 90.0, {maths_t::E_SampleCountWeight}, {{1.0}}); + auto x = model.confidenceInterval(time, 90.0, + {maths_t::E_SampleCountWeight}, {{1.0}}); CPPUNIT_ASSERT(::fabs(sample - x[1][0]) < 4.1 * std::sqrt(noiseVariance)); CPPUNIT_ASSERT(::fabs(x[2][0] - x[0][0]) < 3.8 * std::sqrt(noiseVariance)); time += bucketLength; @@ -2167,37 +2379,41 @@ void CTimeSeriesModelTest::testDaylightSaving() { CppUnit::Test* CTimeSeriesModelTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CTimeSeriesModelTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CTimeSeriesModelTest::testClone", &CTimeSeriesModelTest::testClone)); - suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesModelTest::testMode", &CTimeSeriesModelTest::testMode)); - suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesModelTest::testAddBucketValue", - &CTimeSeriesModelTest::testAddBucketValue)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CTimeSeriesModelTest::testAddSamples", &CTimeSeriesModelTest::testAddSamples)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CTimeSeriesModelTest::testPredict", &CTimeSeriesModelTest::testPredict)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CTimeSeriesModelTest::testProbability", &CTimeSeriesModelTest::testProbability)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CTimeSeriesModelTest::testWeights", &CTimeSeriesModelTest::testWeights)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CTimeSeriesModelTest::testMemoryUsage", &CTimeSeriesModelTest::testMemoryUsage)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CTimeSeriesModelTest::testPersist", &CTimeSeriesModelTest::testPersist)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CTimeSeriesModelTest::testUpgrade", &CTimeSeriesModelTest::testUpgrade)); - suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesModelTest::testAddSamplesWithCorrelations", - &CTimeSeriesModelTest::testAddSamplesWithCorrelations)); - suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesModelTest::testProbabilityWithCorrelations", - &CTimeSeriesModelTest::testProbabilityWithCorrelations)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CTimeSeriesModelTest::testAnomalyModel", &CTimeSeriesModelTest::testAnomalyModel)); - suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesModelTest::testStepChangeDiscontinuities", - &CTimeSeriesModelTest::testStepChangeDiscontinuities)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CTimeSeriesModelTest::testLinearScaling", &CTimeSeriesModelTest::testLinearScaling)); - suiteOfTests->addTest(new CppUnit::TestCaller("CTimeSeriesModelTest::testDaylightSaving", - &CTimeSeriesModelTest::testDaylightSaving)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTimeSeriesModelTest::testClone", &CTimeSeriesModelTest::testClone)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTimeSeriesModelTest::testMode", &CTimeSeriesModelTest::testMode)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTimeSeriesModelTest::testAddBucketValue", &CTimeSeriesModelTest::testAddBucketValue)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTimeSeriesModelTest::testAddSamples", &CTimeSeriesModelTest::testAddSamples)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTimeSeriesModelTest::testPredict", &CTimeSeriesModelTest::testPredict)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTimeSeriesModelTest::testProbability", &CTimeSeriesModelTest::testProbability)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTimeSeriesModelTest::testWeights", &CTimeSeriesModelTest::testWeights)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTimeSeriesModelTest::testMemoryUsage", &CTimeSeriesModelTest::testMemoryUsage)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTimeSeriesModelTest::testPersist", &CTimeSeriesModelTest::testPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTimeSeriesModelTest::testUpgrade", &CTimeSeriesModelTest::testUpgrade)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTimeSeriesModelTest::testAddSamplesWithCorrelations", + &CTimeSeriesModelTest::testAddSamplesWithCorrelations)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTimeSeriesModelTest::testProbabilityWithCorrelations", + &CTimeSeriesModelTest::testProbabilityWithCorrelations)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTimeSeriesModelTest::testAnomalyModel", &CTimeSeriesModelTest::testAnomalyModel)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTimeSeriesModelTest::testStepChangeDiscontinuities", + &CTimeSeriesModelTest::testStepChangeDiscontinuities)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTimeSeriesModelTest::testLinearScaling", &CTimeSeriesModelTest::testLinearScaling)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTimeSeriesModelTest::testDaylightSaving", &CTimeSeriesModelTest::testDaylightSaving)); return suiteOfTests; } diff --git a/lib/maths/unittest/CToolsTest.cc b/lib/maths/unittest/CToolsTest.cc index bc782879ff..b6f2dba0ea 100644 --- a/lib/maths/unittest/CToolsTest.cc +++ b/lib/maths/unittest/CToolsTest.cc @@ -52,7 +52,8 @@ TDoubleDoublePr support(const DISTRIBUTION& distribution) { } TDoubleDoublePr support(const CLogTDistribution& logt) { CLogTDistribution::TOptionalDouble minimum = localMinimum(logt); - return TDoubleDoublePr(minimum ? *minimum : 0.0, boost::math::tools::max_value()); + return TDoubleDoublePr(minimum ? *minimum : 0.0, + boost::math::tools::max_value()); } template @@ -102,14 +103,17 @@ double numericalProbabilityOfLessLikelySampleImpl(const DISTRIBUTION& distributi double eps = 1e-8; double pdf = adapters::pdf(distribution, x); - LOG_TRACE(<< "x = " << x << ", f(x) = " << pdf << ", stationaryPoint = " << stationaryPoint.first); + LOG_TRACE(<< "x = " << x << ", f(x) = " << pdf + << ", stationaryPoint = " << stationaryPoint.first); double x1 = stationaryPoint.first; if (x > stationaryPoint.first) { // Search for lower bound. double minX = adapters::support(distribution).first + eps; - for (double increment = std::max(x1 / 2.0, 1.0); x1 > minX && ((stationaryPoint.second && adapters::pdf(distribution, x1) > pdf) || - (!stationaryPoint.second && adapters::pdf(distribution, x1) < pdf)); + for (double increment = std::max(x1 / 2.0, 1.0); + x1 > minX && + ((stationaryPoint.second && adapters::pdf(distribution, x1) > pdf) || + (!stationaryPoint.second && adapters::pdf(distribution, x1) < pdf)); x1 = std::max(x1 - increment, minX), increment *= 2.0) { // Empty. } @@ -119,8 +123,10 @@ double numericalProbabilityOfLessLikelySampleImpl(const DISTRIBUTION& distributi if (x < stationaryPoint.first) { // Search for upper bound. double maxX = adapters::support(distribution).second - eps; - for (double increment = std::max(x2 / 2.0, 1.0); x2 < maxX && ((stationaryPoint.second && adapters::pdf(distribution, x2) > pdf) || - (!stationaryPoint.second && adapters::pdf(distribution, x2) < pdf)); + for (double increment = std::max(x2 / 2.0, 1.0); + x2 < maxX && + ((stationaryPoint.second && adapters::pdf(distribution, x2) > pdf) || + (!stationaryPoint.second && adapters::pdf(distribution, x2) < pdf)); x2 = std::min(x2 + increment, maxX), increment *= 2.0) { // Empty. } @@ -148,12 +154,14 @@ double numericalProbabilityOfLessLikelySampleImpl(const DISTRIBUTION& distributi std::swap(x, y); } - LOG_TRACE(<< "x = " << x << ", y = " << y << ", f(x) = " << adapters::pdf(distribution, x) + LOG_TRACE(<< "x = " << x << ", y = " << y + << ", f(x) = " << adapters::pdf(distribution, x) << ", f(y) = " << adapters::pdf(distribution, y)); if (stationaryPoint.second) { double cdfy = - adapters::cdfComplement(distribution, y) + (adapters::isDiscrete(distribution) ? adapters::pdf(distribution, y) : 0.0); + adapters::cdfComplement(distribution, y) + + (adapters::isDiscrete(distribution) ? adapters::pdf(distribution, y) : 0.0); double cdfx = adapters::cdf(distribution, x); LOG_TRACE(<< "F(x) = " << cdfx << ", 1 - F(y) = " << cdfy); @@ -161,7 +169,8 @@ double numericalProbabilityOfLessLikelySampleImpl(const DISTRIBUTION& distributi return cdfx + cdfy; } - double cdfy = adapters::cdf(distribution, y) + (adapters::isDiscrete(distribution) ? adapters::pdf(distribution, y) : 0.0); + double cdfy = adapters::cdf(distribution, y) + + (adapters::isDiscrete(distribution) ? adapters::pdf(distribution, y) : 0.0); double cdfx = adapters::cdf(distribution, x); LOG_TRACE(<< "F(x) = " << cdfx << ", F(y) = " << cdfy); @@ -174,7 +183,8 @@ double numericalProbabilityOfLessLikelySample(const DISTRIBUTION& distribution, return numericalProbabilityOfLessLikelySampleImpl(distribution, x); } -double numericalProbabilityOfLessLikelySample(const boost::math::negative_binomial_distribution<>& negativeBinomial, double x) { +double numericalProbabilityOfLessLikelySample(const boost::math::negative_binomial_distribution<>& negativeBinomial, + double x) { double fx = CTools::safePdf(negativeBinomial, x); double m = boost::math::mode(negativeBinomial); @@ -184,7 +194,8 @@ double numericalProbabilityOfLessLikelySample(const boost::math::negative_binomi } double f0 = CTools::safePdf(negativeBinomial, 0.0); if (x > m && fx < f0) { - return CTools::safeCdfComplement(negativeBinomial, x) + CTools::safePdf(negativeBinomial, x); + return CTools::safeCdfComplement(negativeBinomial, x) + + CTools::safePdf(negativeBinomial, x); } return numericalProbabilityOfLessLikelySampleImpl(negativeBinomial, x); } @@ -212,7 +223,8 @@ double numericalProbabilityOfLessLikelySample(const CLogTDistribution& logt, dou return numericalProbabilityOfLessLikelySampleImpl(logt, x); } -double numericalProbabilityOfLessLikelySample(const boost::math::beta_distribution<>& beta, double x) { +double numericalProbabilityOfLessLikelySample(const boost::math::beta_distribution<>& beta, + double x) { // We need special handling of the case that the equal p.d.f. // point is very close to 0 or 1. @@ -223,7 +235,8 @@ double numericalProbabilityOfLessLikelySample(const boost::math::beta_distributi double xmin = 1000.0 * std::numeric_limits::min(); if (a >= 1.0 && fx < CTools::safePdf(beta, xmin)) { - return std::exp(a * std::log(xmin) - std::log(a) + boost::math::lgamma(a + b) - boost::math::lgamma(a) - boost::math::lgamma(b)) + + return std::exp(a * std::log(xmin) - std::log(a) + boost::math::lgamma(a + b) - + boost::math::lgamma(a) - boost::math::lgamma(b)) + CTools::safeCdfComplement(beta, x); } @@ -274,9 +287,11 @@ double numericalIntervalExpectation(const DISTRIBUTION& distribution, double a, double dx = (b - a) / 10.0; for (std::size_t i = 0u; i < 10; ++i, a += dx) { double fxi; - CPPUNIT_ASSERT(maths::CIntegration::gaussLegendre(fx, a, a + dx, fxi)); + CPPUNIT_ASSERT(maths::CIntegration::gaussLegendre( + fx, a, a + dx, fxi)); double xfxi; - CPPUNIT_ASSERT(maths::CIntegration::gaussLegendre(xfx, a, a + dx, xfxi)); + CPPUNIT_ASSERT(maths::CIntegration::gaussLegendre( + xfx, a, a + dx, xfxi)); numerator += xfxi; denominator += fxi; } @@ -287,7 +302,8 @@ double numericalIntervalExpectation(const DISTRIBUTION& distribution, double a, template class CTruncatedPdf { public: - CTruncatedPdf(const maths::CMixtureDistribution& mixture, double cutoff) : m_Mixture(mixture), m_Cutoff(cutoff) {} + CTruncatedPdf(const maths::CMixtureDistribution& mixture, double cutoff) + : m_Mixture(mixture), m_Cutoff(cutoff) {} bool operator()(double x, double& fx) const { fx = maths::pdf(m_Mixture, x); @@ -305,9 +321,12 @@ class CTruncatedPdf { template class CLogPdf { public: - CLogPdf(const maths::CMixtureDistribution& mixture) : m_Mixture(mixture) {} + CLogPdf(const maths::CMixtureDistribution& mixture) + : m_Mixture(mixture) {} - double operator()(double x) const { return std::log(maths::pdf(m_Mixture, x)); } + double operator()(double x) const { + return std::log(maths::pdf(m_Mixture, x)); + } bool operator()(double x, double& fx) const { fx = std::log(maths::pdf(m_Mixture, x)); @@ -402,9 +421,11 @@ void CToolsTest::testProbabilityOfLessLikelySample() { for (size_t i = 0; i < boost::size(successFraction); ++i) { for (size_t j = 0; j < boost::size(successProbability); ++j) { - LOG_DEBUG(<< "**** r = " << successFraction[i] << ", p = " << successProbability[j] << " ****"); + LOG_DEBUG(<< "**** r = " << successFraction[i] + << ", p = " << successProbability[j] << " ****"); - boost::math::negative_binomial_distribution<> negativeBinomial(successFraction[i], successProbability[j]); + boost::math::negative_binomial_distribution<> negativeBinomial( + successFraction[i], successProbability[j]); if (successFraction[i] <= 1.0) { // Monotone decreasing. @@ -412,7 +433,8 @@ void CToolsTest::testProbabilityOfLessLikelySample() { for (int l = 0; l < 10; ++l) { tail = maths_t::E_UndeterminedTail; x = std::floor(2.0 * x + 0.5); - p1 = CTools::safeCdfComplement(negativeBinomial, x) + CTools::safePdf(negativeBinomial, x); + p1 = CTools::safeCdfComplement(negativeBinomial, x) + + CTools::safePdf(negativeBinomial, x); p2 = probabilityOfLessLikelySample(negativeBinomial, x, tail); LOG_DEBUG(<< "x = " << x << ", p1 = " << p1 << ", p2 = " << p2); CPPUNIT_ASSERT_DOUBLES_EQUAL(p1, p2, 1e-3 * std::max(p1, p2)); @@ -423,7 +445,8 @@ void CToolsTest::testProbabilityOfLessLikelySample() { double m1 = boost::math::mode(negativeBinomial); - CPPUNIT_ASSERT_EQUAL(1.0, probabilityOfLessLikelySample(negativeBinomial, m1, tail)); + CPPUNIT_ASSERT_EQUAL( + 1.0, probabilityOfLessLikelySample(negativeBinomial, m1, tail)); CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); double offset = m1; @@ -434,10 +457,13 @@ void CToolsTest::testProbabilityOfLessLikelySample() { tail = maths_t::E_UndeterminedTail; p1 = numericalProbabilityOfLessLikelySample(negativeBinomial, x); p2 = probabilityOfLessLikelySample(negativeBinomial, x, tail); - LOG_DEBUG(<< "x = " << x << ", p1 = " << p1 << ", p2 = " << p2 << ", log(p1) = " << std::log(p1) + LOG_DEBUG(<< "x = " << x << ", p1 = " << p1 << ", p2 = " << p2 + << ", log(p1) = " << std::log(p1) << ", log(p2) = " << std::log(p2)); - CPPUNIT_ASSERT(std::fabs(p1 - p2) <= 0.02 * std::max(p1, p2) || - std::fabs(std::log(p1) - std::log(p2)) <= 0.02 * std::fabs(std::min(std::log(p1), std::log(p2)))); + CPPUNIT_ASSERT( + std::fabs(p1 - p2) <= 0.02 * std::max(p1, p2) || + std::fabs(std::log(p1) - std::log(p2)) <= + 0.02 * std::fabs(std::min(std::log(p1), std::log(p2)))); if (offset > 0.0) CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); if (offset == 0.0) @@ -463,10 +489,13 @@ void CToolsTest::testProbabilityOfLessLikelySample() { tail = maths_t::E_UndeterminedTail; p1 = numericalProbabilityOfLessLikelySample(negativeBinomial, x); p2 = probabilityOfLessLikelySample(negativeBinomial, x, tail); - LOG_DEBUG(<< "x = " << x << ", p1 = " << p1 << ", p2 = " << p2 << ", log(p1) = " << std::log(p1) + LOG_DEBUG(<< "x = " << x << ", p1 = " << p1 << ", p2 = " << p2 + << ", log(p1) = " << std::log(p1) << ", log(p2) = " << std::log(p2)); - CPPUNIT_ASSERT(std::fabs(p1 - p2) <= 0.01 * std::max(p1, p2) || - std::fabs(std::log(p1) - std::log(p2)) <= 0.05 * std::fabs(std::min(std::log(p1), std::log(p2)))); + CPPUNIT_ASSERT( + std::fabs(p1 - p2) <= 0.01 * std::max(p1, p2) || + std::fabs(std::log(p1) - std::log(p2)) <= + 0.05 * std::fabs(std::min(std::log(p1), std::log(p2)))); if (x != m1) CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); if (x == m1) @@ -476,10 +505,13 @@ void CToolsTest::testProbabilityOfLessLikelySample() { tail = maths_t::E_UndeterminedTail; p1 = numericalProbabilityOfLessLikelySample(negativeBinomial, x); p2 = probabilityOfLessLikelySample(negativeBinomial, x, tail); - LOG_DEBUG(<< "x = " << x << ", p1 = " << p1 << ", p2 = " << p2 << ", log(p1) = " << std::log(p1) + LOG_DEBUG(<< "x = " << x << ", p1 = " << p1 << ", p2 = " << p2 + << ", log(p1) = " << std::log(p1) << ", log(p2) = " << std::log(p2)); - CPPUNIT_ASSERT(std::fabs(p1 - p2) <= 0.01 * std::max(p1, p2) || - std::fabs(std::log(p1) - std::log(p2)) <= 0.05 * std::fabs(std::min(std::log(p1), std::log(p2)))); + CPPUNIT_ASSERT( + std::fabs(p1 - p2) <= 0.01 * std::max(p1, p2) || + std::fabs(std::log(p1) - std::log(p2)) <= + 0.05 * std::fabs(std::min(std::log(p1), std::log(p2)))); if (x != m1) CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); if (x == m1) @@ -528,7 +560,8 @@ void CToolsTest::testProbabilityOfLessLikelySample() { for (size_t i = 0; i < boost::size(degreesFreedom); ++i) { for (size_t j = 0; j < boost::size(locations); ++j) { for (size_t k = 0; k < boost::size(scales); ++k) { - LOG_DEBUG(<< "**** v = " << degreesFreedom[i] << ", l = " << locations[j] << ", s = " << scales[k] << " ****"); + LOG_DEBUG(<< "**** v = " << degreesFreedom[i] << ", l = " + << locations[j] << ", s = " << scales[k] << " ****"); CLogTDistribution logt(degreesFreedom[i], locations[j], scales[k]); @@ -577,21 +610,27 @@ void CToolsTest::testProbabilityOfLessLikelySample() { tail = maths_t::E_UndeterminedTail; p1 = numericalProbabilityOfLessLikelySample(logt, x); p2 = probabilityOfLessLikelySample(logt, x, tail); - LOG_DEBUG(<< "x = " << x << ", p1 = " << p1 << ", p2 = " << p2 << ", log(p1) = " << std::log(p1) + LOG_DEBUG(<< "x = " << x << ", p1 = " << p1 << ", p2 = " << p2 + << ", log(p1) = " << std::log(p1) << ", log(p2) = " << std::log(p2)); - CPPUNIT_ASSERT(std::fabs(p1 - p2) <= 0.01 * std::max(p1, p2) || - std::fabs(std::log(p1) - std::log(p2)) <= 0.05 * std::fabs(std::min(std::log(p1), std::log(p2)))); + CPPUNIT_ASSERT( + std::fabs(p1 - p2) <= 0.01 * std::max(p1, p2) || + std::fabs(std::log(p1) - std::log(p2)) <= + 0.05 * std::fabs(std::min(std::log(p1), std::log(p2)))); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); x = m1 * factor; tail = maths_t::E_UndeterminedTail; p1 = numericalProbabilityOfLessLikelySample(logt, x); p2 = probabilityOfLessLikelySample(logt, x, tail); - LOG_DEBUG(<< "x = " << x << ", p1 = " << p1 << ", p2 = " << p2 << ", log(p1) = " << std::log(p1) + LOG_DEBUG(<< "x = " << x << ", p1 = " << p1 << ", p2 = " << p2 + << ", log(p1) = " << std::log(p1) << ", log(p2) = " << std::log(p2)); - CPPUNIT_ASSERT(std::fabs(p1 - p2) <= 0.01 * std::max(p1, p2) || - std::fabs(std::log(p1) - std::log(p2)) <= 0.05 * std::fabs(std::min(std::log(p1), std::log(p2)))); + CPPUNIT_ASSERT( + std::fabs(p1 - p2) <= 0.01 * std::max(p1, p2) || + std::fabs(std::log(p1) - std::log(p2)) <= + 0.05 * std::fabs(std::min(std::log(p1), std::log(p2)))); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } } @@ -607,7 +646,8 @@ void CToolsTest::testProbabilityOfLessLikelySample() { for (size_t i = 0; i < boost::size(shapes); ++i) { for (size_t j = 0; j < boost::size(scales); ++j) { - LOG_DEBUG(<< "***** shape = " << shapes[i] << ", scale = " << scales[j] << " *****"); + LOG_DEBUG(<< "***** shape = " << shapes[i] + << ", scale = " << scales[j] << " *****"); boost::math::gamma_distribution<> gamma(shapes[i], scales[j]); @@ -636,20 +676,26 @@ void CToolsTest::testProbabilityOfLessLikelySample() { tail = maths_t::E_UndeterminedTail; p1 = numericalProbabilityOfLessLikelySample(gamma, x); p2 = probabilityOfLessLikelySample(gamma, x, tail); - LOG_DEBUG(<< "x = " << x << ", p1 = " << p1 << ", p2 = " << p2 << ", log(p1) = " << std::log(p1) + LOG_DEBUG(<< "x = " << x << ", p1 = " << p1 << ", p2 = " << p2 + << ", log(p1) = " << std::log(p1) << ", log(p2) = " << std::log(p2)); - CPPUNIT_ASSERT(std::fabs(p1 - p2) <= 0.06 * std::max(p1, p2) || - std::fabs(std::log(p1) - std::log(p2)) <= 0.01 * std::fabs(std::min(std::log(p1), std::log(p2)))); + CPPUNIT_ASSERT( + std::fabs(p1 - p2) <= 0.06 * std::max(p1, p2) || + std::fabs(std::log(p1) - std::log(p2)) <= + 0.01 * std::fabs(std::min(std::log(p1), std::log(p2)))); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); double y = (1.0 + offset) * m1; tail = maths_t::E_UndeterminedTail; p1 = numericalProbabilityOfLessLikelySample(gamma, y); p2 = probabilityOfLessLikelySample(gamma, y, tail); - LOG_DEBUG(<< "y = " << y << ", p1 = " << p1 << ", p2 = " << p2 << ", log(p1) = " << std::log(p1) + LOG_DEBUG(<< "y = " << y << ", p1 = " << p1 << ", p2 = " << p2 + << ", log(p1) = " << std::log(p1) << ", log(p2) = " << std::log(p2)); - CPPUNIT_ASSERT(std::fabs(p1 - p2) <= 0.06 * std::max(p1, p2) || - std::fabs(std::log(p1) - std::log(p2)) <= 0.01 * std::fabs(std::min(std::log(p1), std::log(p2)))); + CPPUNIT_ASSERT( + std::fabs(p1 - p2) <= 0.06 * std::max(p1, p2) || + std::fabs(std::log(p1) - std::log(p2)) <= + 0.01 * std::fabs(std::min(std::log(p1), std::log(p2)))); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } @@ -661,20 +707,26 @@ void CToolsTest::testProbabilityOfLessLikelySample() { tail = maths_t::E_UndeterminedTail; p1 = numericalProbabilityOfLessLikelySample(gamma, x); p2 = probabilityOfLessLikelySample(gamma, x, tail); - LOG_DEBUG(<< "x = " << x << ", p1 = " << p1 << ", p2 = " << p2 << ", log(p1) = " << std::log(p1) + LOG_DEBUG(<< "x = " << x << ", p1 = " << p1 << ", p2 = " << p2 + << ", log(p1) = " << std::log(p1) << ", log(p2) = " << std::log(p2)); - CPPUNIT_ASSERT(std::fabs(p1 - p2) <= 0.1 * std::max(p1, p2) || - std::fabs(std::log(p1) - std::log(p2)) <= 0.01 * std::fabs(std::min(std::log(p1), std::log(p2)))); + CPPUNIT_ASSERT( + std::fabs(p1 - p2) <= 0.1 * std::max(p1, p2) || + std::fabs(std::log(p1) - std::log(p2)) <= + 0.01 * std::fabs(std::min(std::log(p1), std::log(p2)))); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); double y = factor * m1; tail = maths_t::E_UndeterminedTail; p1 = numericalProbabilityOfLessLikelySample(gamma, y); p2 = probabilityOfLessLikelySample(gamma, y, tail); - LOG_DEBUG(<< "y = " << y << ", p1 = " << p1 << ", p2 = " << p2 << ", log(p1) = " << std::log(p1) + LOG_DEBUG(<< "y = " << y << ", p1 = " << p1 << ", p2 = " << p2 + << ", log(p1) = " << std::log(p1) << ", log(p2) = " << std::log(p2)); - CPPUNIT_ASSERT(std::fabs(p1 - p2) <= 0.1 * std::max(p1, p2) || - std::fabs(std::log(p1) - std::log(p2)) <= 0.01 * std::fabs(std::min(std::log(p1), std::log(p2)))); + CPPUNIT_ASSERT( + std::fabs(p1 - p2) <= 0.1 * std::max(p1, p2) || + std::fabs(std::log(p1) - std::log(p2)) <= + 0.01 * std::fabs(std::min(std::log(p1), std::log(p2)))); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } } @@ -689,7 +741,8 @@ void CToolsTest::testProbabilityOfLessLikelySample() { for (size_t i = 0; i < boost::size(alphas); ++i) { for (size_t j = 0; j < boost::size(betas); ++j) { - LOG_DEBUG(<< "**** alpha = " << alphas[i] << ", beta = " << betas[j] << " ****"); + LOG_DEBUG(<< "**** alpha = " << alphas[i] + << ", beta = " << betas[j] << " ****"); boost::math::beta_distribution<> beta(alphas[i], betas[j]); @@ -700,7 +753,8 @@ void CToolsTest::testProbabilityOfLessLikelySample() { tail = maths_t::E_UndeterminedTail; p1 = 1.0; p2 = probabilityOfLessLikelySample(beta, x, tail); - LOG_DEBUG(<< "x = " << x << ", f(x) = " << CTools::safePdf(beta, x) << ", p1 = " << p1 << ", p2 = " << p2); + LOG_DEBUG(<< "x = " << x << ", f(x) = " << CTools::safePdf(beta, x) + << ", p1 = " << p1 << ", p2 = " << p2); CPPUNIT_ASSERT_EQUAL(p1, p2); CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); } @@ -711,7 +765,8 @@ void CToolsTest::testProbabilityOfLessLikelySample() { tail = maths_t::E_UndeterminedTail; p1 = CTools::safeCdfComplement(beta, x); p2 = probabilityOfLessLikelySample(beta, x, tail); - LOG_DEBUG(<< "x = " << x << ", f(x) = " << CTools::safePdf(beta, x) << ", p1 = " << p1 << ", p2 = " << p2); + LOG_DEBUG(<< "x = " << x << ", f(x) = " << CTools::safePdf(beta, x) + << ", p1 = " << p1 << ", p2 = " << p2); CPPUNIT_ASSERT_DOUBLES_EQUAL(p1, p2, 1e-3 * std::max(p1, p2)); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } @@ -722,7 +777,8 @@ void CToolsTest::testProbabilityOfLessLikelySample() { tail = maths_t::E_UndeterminedTail; p1 = CTools::safeCdf(beta, x); p2 = probabilityOfLessLikelySample(beta, x, tail); - LOG_DEBUG(<< "x = " << x << ", f(x) = " << CTools::safePdf(beta, x) << ", p1 = " << p1 << ", p2 = " << p2); + LOG_DEBUG(<< "x = " << x << ", f(x) = " << CTools::safePdf(beta, x) + << ", p1 = " << p1 << ", p2 = " << p2); CPPUNIT_ASSERT_DOUBLES_EQUAL(p1, p2, 1e-3 * std::max(p1, p2)); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); } @@ -739,10 +795,13 @@ void CToolsTest::testProbabilityOfLessLikelySample() { tail = maths_t::E_UndeterminedTail; p1 = numericalProbabilityOfLessLikelySample(beta, xMinus); p2 = probabilityOfLessLikelySample(beta, xMinus, tail); - LOG_DEBUG(<< "x- = " << xMinus << ", p1 = " << p1 << ", p2 = " << p2 << ", log(p1) = " << log(p1) + LOG_DEBUG(<< "x- = " << xMinus << ", p1 = " << p1 + << ", p2 = " << p2 << ", log(p1) = " << log(p1) << ", log(p2) = " << std::log(p2)); - CPPUNIT_ASSERT(std::fabs(p1 - p2) <= 0.05 * std::max(p1, p2) || - std::fabs(std::log(p1) - std::log(p2)) < 0.25 * std::fabs(std::min(std::log(p1), std::log(p2)))); + CPPUNIT_ASSERT( + std::fabs(p1 - p2) <= 0.05 * std::max(p1, p2) || + std::fabs(std::log(p1) - std::log(p2)) < + 0.25 * std::fabs(std::min(std::log(p1), std::log(p2)))); if (maximum) CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); if (!maximum) @@ -823,7 +882,8 @@ void CToolsTest::testIntervalExpectation() { CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, 1e-12 * expected); expected = 8.1; - actual = expectation(normal, 8.1, 8.1 * (1.0 + std::numeric_limits::epsilon())); + actual = expectation(normal, 8.1, + 8.1 * (1.0 + std::numeric_limits::epsilon())); LOG_DEBUG(<< "expected = " << expected << ", actual = " << actual); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, 1e-12 * expected); } @@ -847,7 +907,8 @@ void CToolsTest::testIntervalExpectation() { CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, 1e-12 * expected); expected = 8.1; - actual = expectation(logNormal, 8.1, 8.1 * (1.0 + std::numeric_limits::epsilon())); + actual = expectation(logNormal, 8.1, + 8.1 * (1.0 + std::numeric_limits::epsilon())); LOG_DEBUG(<< "expected = " << expected << ", actual = " << actual); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, 1e-12 * expected); } @@ -871,7 +932,8 @@ void CToolsTest::testIntervalExpectation() { CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, 1e-12 * expected); expected = 8.1; - actual = expectation(gamma, 8.1, 8.1 * (1.0 + std::numeric_limits::epsilon())); + actual = expectation(gamma, 8.1, + 8.1 * (1.0 + std::numeric_limits::epsilon())); LOG_DEBUG(<< "expected = " << expected << ", actual = " << actual); CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, 1e-12 * expected); } @@ -923,15 +985,19 @@ void CToolsTest::testMixtureProbabilityOfLessLikelySample() { logFx = std::log(logFx); } - maths::CTools::CMixtureProbabilityOfLessLikelySample calculator(i, x[k], logFx, a, b); + maths::CTools::CMixtureProbabilityOfLessLikelySample calculator( + i, x[k], logFx, a, b); for (std::size_t l = 0u; l < modeWeights.size(); ++l) { - calculator.addMode((mixture.weights())[l], boost::math::mean(modes[l]), boost::math::standard_deviation(modes[l])); + calculator.addMode((mixture.weights())[l], + boost::math::mean(modes[l]), + boost::math::standard_deviation(modes[l])); } double pTails = 0.0; CLogPdf logPdf(mixture); - maths::CEqualWithTolerance equal(maths::CToleranceTypes::E_AbsoluteTolerance, 0.5); + maths::CEqualWithTolerance equal( + maths::CToleranceTypes::E_AbsoluteTolerance, 0.5); double xleft; CPPUNIT_ASSERT(calculator.leftTail(logPdf, 10, equal, xleft)); pTails += maths::cdf(mixture, xleft); @@ -943,29 +1009,34 @@ void CToolsTest::testMixtureProbabilityOfLessLikelySample() { double pExpected = pTails; CTruncatedPdf pdf(mixture, std::exp(logFx)); - for (double xi = a, l = 0, step = 0.5 * (b - a) / std::floor(b - a); l < 2 * static_cast(b - a); - xi += step, ++l) { + for (double xi = a, l = 0, step = 0.5 * (b - a) / std::floor(b - a); + l < 2 * static_cast(b - a); xi += step, ++l) { double pi; - maths::CIntegration::gaussLegendre(pdf, xi, xi + step, pi); + maths::CIntegration::gaussLegendre( + pdf, xi, xi + step, pi); pExpected += pi; } if (j % 50 == 0) { LOG_DEBUG(<< "pTails = " << pTails); - LOG_DEBUG(<< "x = " << x[k] << ", log(f(x)) = " << logFx << ", P(x) = " << p << ", expected P(x) = " << pExpected); + LOG_DEBUG(<< "x = " << x[k] << ", log(f(x)) = " << logFx + << ", P(x) = " << p << ", expected P(x) = " << pExpected); } CPPUNIT_ASSERT(pExpected > 0.0); if (pExpected > 0.1) { CPPUNIT_ASSERT_DOUBLES_EQUAL(pExpected, p, 0.12); } else if (pExpected > 1e-10) { - CPPUNIT_ASSERT_DOUBLES_EQUAL(std::log(pExpected), std::log(p), 0.15 * std::fabs(std::log(pExpected))); + CPPUNIT_ASSERT_DOUBLES_EQUAL(std::log(pExpected), std::log(p), + 0.15 * std::fabs(std::log(pExpected))); } else { - CPPUNIT_ASSERT_DOUBLES_EQUAL(std::log(pExpected), std::log(p), 0.015 * std::fabs(std::log(pExpected))); + CPPUNIT_ASSERT_DOUBLES_EQUAL(std::log(pExpected), std::log(p), + 0.015 * std::fabs(std::log(pExpected))); } meanError.add(std::fabs(p - pExpected)); meanLogError.add(std::fabs(std::log(p) - std::log(pExpected)) / - std::max(std::fabs(std::log(pExpected)), std::fabs(std::log(p)))); + std::max(std::fabs(std::log(pExpected)), + std::fabs(std::log(p)))); } } @@ -998,7 +1069,8 @@ void CToolsTest::testSpread() { double period = 86400.0; { - double raw[] = {15.0, 120.0, 4500.0, 9000.0, 25700.0, 43100.0, 73000.0, 74000.0, 84300.0}; + double raw[] = {15.0, 120.0, 4500.0, 9000.0, 25700.0, + 43100.0, 73000.0, 74000.0, 84300.0}; double separation = 20.0; TDoubleVec points(boost::begin(raw), boost::end(raw)); std::string expected = core::CContainerPrinter::print(points); @@ -1011,7 +1083,8 @@ void CToolsTest::testSpread() { CPPUNIT_ASSERT_EQUAL(expected, core::CContainerPrinter::print(points)); } { - double raw[] = {150.0, 170.0, 4500.0, 4650.0, 4700.0, 4800.0, 73000.0, 73150.0, 73500.0, 73600.0, 73800.0, 74000.0}; + double raw[] = {150.0, 170.0, 4500.0, 4650.0, 4700.0, 4800.0, + 73000.0, 73150.0, 73500.0, 73600.0, 73800.0, 74000.0}; double separation = 126.0; std::string expected = "[97, 223, 4473.5, 4599.5, 4725.5, 4851.5, 73000, 73150, 73487, 73613, 73800, 74000]"; TDoubleVec points(boost::begin(raw), boost::end(raw)); @@ -1023,18 +1096,23 @@ void CToolsTest::testSpread() { CRandomNumbers rng; for (std::size_t i = 0u; i < 100; ++i) { TDoubleVec origSamples; - rng.generateUniformSamples(1000.0, static_cast(period) - 1000.0, 100, origSamples); + rng.generateUniformSamples(1000.0, static_cast(period) - 1000.0, + 100, origSamples); TDoubleVec samples(origSamples); CTools::spread(0.0, period, 150.0, samples); std::sort(origSamples.begin(), origSamples.end()); double eps = 1e-3; - double dcost = (samples[0] + eps - origSamples[0]) * (samples[0] + eps - origSamples[0]) - - (samples[0] - eps - origSamples[0]) * (samples[0] - eps - origSamples[0]); + double dcost = (samples[0] + eps - origSamples[0]) * + (samples[0] + eps - origSamples[0]) - + (samples[0] - eps - origSamples[0]) * + (samples[0] - eps - origSamples[0]); for (std::size_t j = 1u; j < samples.size(); ++j) { CPPUNIT_ASSERT(samples[j] - samples[j - 1] >= 150.0 - eps); - dcost += (samples[j] + eps - origSamples[j]) * (samples[j] + eps - origSamples[j]) - - (samples[j] - eps - origSamples[j]) * (samples[j] - eps - origSamples[j]); + dcost += (samples[j] + eps - origSamples[j]) * + (samples[j] + eps - origSamples[j]) - + (samples[j] - eps - origSamples[j]) * + (samples[j] - eps - origSamples[j]); } dcost /= 2.0 * eps; LOG_DEBUG(<< "d(cost)/dx = " << dcost); @@ -1056,8 +1134,8 @@ void CToolsTest::testFastLog() { rng.generateUniformSamples(-100.0, 0.0, 10000, x); for (std::size_t i = 0u; i < x.size(); ++i) { if (i % 100 == 0) { - LOG_DEBUG(<< "x = " << std::exp(x[i]) << ", log(x) = " << x[i] - << ", fast log(x) = " << maths::CTools::fastLog(std::exp(x[i]))); + LOG_DEBUG(<< "x = " << std::exp(x[i]) << ", log(x) = " << x[i] << ", fast log(x) = " + << maths::CTools::fastLog(std::exp(x[i]))); } CPPUNIT_ASSERT_DOUBLES_EQUAL(x[i], maths::CTools::fastLog(std::exp(x[i])), 5e-5); } @@ -1068,9 +1146,11 @@ void CToolsTest::testFastLog() { rng.generateUniformSamples(1.0, 1e6, 10000, x); for (std::size_t i = 0u; i < x.size(); ++i) { if (i % 100 == 0) { - LOG_DEBUG(<< "x = " << x[i] << ", log(x) = " << std::log(x[i]) << ", fast log(x) = " << maths::CTools::fastLog(x[i])); + LOG_DEBUG(<< "x = " << x[i] << ", log(x) = " << std::log(x[i]) + << ", fast log(x) = " << maths::CTools::fastLog(x[i])); } - CPPUNIT_ASSERT_DOUBLES_EQUAL(std::log(x[i]), maths::CTools::fastLog(x[i]), 5e-5); + CPPUNIT_ASSERT_DOUBLES_EQUAL(std::log(x[i]), + maths::CTools::fastLog(x[i]), 5e-5); } } // Large @@ -1079,8 +1159,8 @@ void CToolsTest::testFastLog() { rng.generateUniformSamples(20.0, 80.0, 10000, x); for (std::size_t i = 0u; i < x.size(); ++i) { if (i % 100 == 0) { - LOG_DEBUG(<< "x = " << std::exp(x[i]) << ", log(x) = " << x[i] - << ", fast log(x) = " << maths::CTools::fastLog(std::exp(x[i]))); + LOG_DEBUG(<< "x = " << std::exp(x[i]) << ", log(x) = " << x[i] << ", fast log(x) = " + << maths::CTools::fastLog(std::exp(x[i]))); } CPPUNIT_ASSERT_DOUBLES_EQUAL(x[i], maths::CTools::fastLog(std::exp(x[i])), 5e-5); } @@ -1098,14 +1178,11 @@ void CToolsTest::testMiscellaneous() { maths::CVectorNx1 a(-2.0); maths::CVectorNx1 b(5.0); - double expected[][5] = {{0.0, 3.2, 2.1, -1.8, 4.5}, - {0.0, 3.2, 2.1, -1.5, 4.5}, - {0.0, 3.2, 2.1, -1.0, 4.0}, - {0.0, 3.2, 2.1, -0.5, 3.5}, - {0.0, 3.0, 2.1, 0.0, 3.0}, - {0.5, 2.5, 2.1, 0.5, 2.5}, - {1.0, 2.0, 2.0, 1.0, 2.0}, - {1.5, 1.5, 1.5, 1.5, 1.5}}; + double expected[][5] = { + {0.0, 3.2, 2.1, -1.8, 4.5}, {0.0, 3.2, 2.1, -1.5, 4.5}, + {0.0, 3.2, 2.1, -1.0, 4.0}, {0.0, 3.2, 2.1, -0.5, 3.5}, + {0.0, 3.0, 2.1, 0.0, 3.0}, {0.5, 2.5, 2.1, 0.5, 2.5}, + {1.0, 2.0, 2.0, 1.0, 2.0}, {1.5, 1.5, 1.5, 1.5, 1.5}}; for (std::size_t i = 0u; a <= b; ++i) { maths::CVectorNx1 expect(expected[i]); @@ -1122,15 +1199,22 @@ void CToolsTest::testMiscellaneous() { CppUnit::Test* CToolsTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CToolsTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CToolsTest::testProbabilityOfLessLikelySample", - &CToolsTest::testProbabilityOfLessLikelySample)); - suiteOfTests->addTest(new CppUnit::TestCaller("CToolsTest::testIntervalExpectation", &CToolsTest::testIntervalExpectation)); - suiteOfTests->addTest(new CppUnit::TestCaller("CToolsTest::testMixtureProbabilityOfLessLikelySample", - &CToolsTest::testMixtureProbabilityOfLessLikelySample)); - suiteOfTests->addTest(new CppUnit::TestCaller("CToolsTest::testAnomalyScore", &CToolsTest::testAnomalyScore)); - suiteOfTests->addTest(new CppUnit::TestCaller("CToolsTest::testSpread", &CToolsTest::testSpread)); - suiteOfTests->addTest(new CppUnit::TestCaller("CToolsTest::testFastLog", &CToolsTest::testFastLog)); - suiteOfTests->addTest(new CppUnit::TestCaller("CToolsTest::testMiscellaneous", &CToolsTest::testMiscellaneous)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CToolsTest::testProbabilityOfLessLikelySample", + &CToolsTest::testProbabilityOfLessLikelySample)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CToolsTest::testIntervalExpectation", &CToolsTest::testIntervalExpectation)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CToolsTest::testMixtureProbabilityOfLessLikelySample", + &CToolsTest::testMixtureProbabilityOfLessLikelySample)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CToolsTest::testAnomalyScore", &CToolsTest::testAnomalyScore)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CToolsTest::testSpread", &CToolsTest::testSpread)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CToolsTest::testFastLog", &CToolsTest::testFastLog)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CToolsTest::testMiscellaneous", &CToolsTest::testMiscellaneous)); return suiteOfTests; } diff --git a/lib/maths/unittest/CTrendComponentTest.cc b/lib/maths/unittest/CTrendComponentTest.cc index 263df9d123..81511c3f8b 100644 --- a/lib/maths/unittest/CTrendComponentTest.cc +++ b/lib/maths/unittest/CTrendComponentTest.cc @@ -37,7 +37,10 @@ using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumula using TMeanVarAccumulator = maths::CBasicStatistics::SSampleMeanVar::TAccumulator; using TRegression = maths::CRegression::CLeastSquaresOnline<2, double>; -TDoubleVec multiscaleRandomWalk(test::CRandomNumbers& rng, core_t::TTime bucketLength, core_t::TTime start, core_t::TTime end) { +TDoubleVec multiscaleRandomWalk(test::CRandomNumbers& rng, + core_t::TTime bucketLength, + core_t::TTime start, + core_t::TTime end) { TDoubleVecVec noise(4); core_t::TTime buckets{(end - start) / bucketLength + 1}; @@ -66,7 +69,10 @@ TDoubleVec multiscaleRandomWalk(test::CRandomNumbers& rng, core_t::TTime bucketL return result; } -TDoubleVec piecewiseLinear(test::CRandomNumbers& rng, core_t::TTime bucketLength, core_t::TTime start, core_t::TTime end) { +TDoubleVec piecewiseLinear(test::CRandomNumbers& rng, + core_t::TTime bucketLength, + core_t::TTime start, + core_t::TTime end) { core_t::TTime buckets{(end - start) / bucketLength + 1}; TDoubleVec knots; @@ -96,7 +102,10 @@ TDoubleVec piecewiseLinear(test::CRandomNumbers& rng, core_t::TTime bucketLength return result; } -TDoubleVec staircase(test::CRandomNumbers& rng, core_t::TTime bucketLength, core_t::TTime start, core_t::TTime end) { +TDoubleVec staircase(test::CRandomNumbers& rng, + core_t::TTime bucketLength, + core_t::TTime start, + core_t::TTime end) { core_t::TTime buckets{(end - start) / bucketLength + 1}; TDoubleVec knots; @@ -126,7 +135,10 @@ TDoubleVec staircase(test::CRandomNumbers& rng, core_t::TTime bucketLength, core return result; } -TDoubleVec switching(test::CRandomNumbers& rng, core_t::TTime bucketLength, core_t::TTime start, core_t::TTime end) { +TDoubleVec switching(test::CRandomNumbers& rng, + core_t::TTime bucketLength, + core_t::TTime start, + core_t::TTime end) { core_t::TTime buckets{(end - start) / bucketLength + 1}; TDoubleVec knots; @@ -175,8 +187,9 @@ void CTrendComponentTest::testValueAndVariance() { TDoubleVec values(multiscaleRandomWalk(rng, bucketLength, start, end)); maths::CTrendComponent component{0.012}; - maths::CDecayRateController controller( - maths::CDecayRateController::E_PredictionBias | maths::CDecayRateController::E_PredictionErrorIncrease, 1); + maths::CDecayRateController controller(maths::CDecayRateController::E_PredictionBias | + maths::CDecayRateController::E_PredictionErrorIncrease, + 1); TMeanVarAccumulator normalisedResiduals; for (core_t::TTime time = start; time < end; time += bucketLength) { @@ -189,7 +202,9 @@ void CTrendComponentTest::testValueAndVariance() { } component.add(time, value); - controller.multiplier({prediction}, {{values[(time - start) / bucketLength] - prediction}}, bucketLength, 1.0, 0.012); + controller.multiplier({prediction}, + {{values[(time - start) / bucketLength] - prediction}}, + bucketLength, 1.0, 0.012); component.decayRate(0.012 * controller.multiplier()); component.propagateForwardsByTime(bucketLength); } @@ -222,8 +237,9 @@ void CTrendComponentTest::testDecayRate() { maths::CTrendComponent component{0.012}; TRegression regression; - maths::CDecayRateController controller( - maths::CDecayRateController::E_PredictionBias | maths::CDecayRateController::E_PredictionErrorIncrease, 1); + maths::CDecayRateController controller(maths::CDecayRateController::E_PredictionBias | + maths::CDecayRateController::E_PredictionErrorIncrease, + 1); TMeanAccumulator error; TMeanAccumulator level; @@ -237,7 +253,9 @@ void CTrendComponentTest::testDecayRate() { error.add(std::fabs(prediction - expectedPrediction)); level.add(value); - controller.multiplier({prediction}, {{values[(time - start) / bucketLength] - prediction}}, bucketLength, 1.0, 0.012); + controller.multiplier({prediction}, + {{values[(time - start) / bucketLength] - prediction}}, + bucketLength, 1.0, 0.012); component.decayRate(0.012 * controller.multiplier()); component.propagateForwardsByTime(bucketLength); regression.age(std::exp(-0.012 * controller.multiplier() * 600.0 / 86400.0)); @@ -246,7 +264,8 @@ void CTrendComponentTest::testDecayRate() { //expectedPredictions.push_back(expectedPrediction); } - double relativeError{maths::CBasicStatistics::mean(error) / std::fabs(maths::CBasicStatistics::mean(level))}; + double relativeError{maths::CBasicStatistics::mean(error) / + std::fabs(maths::CBasicStatistics::mean(level))}; LOG_DEBUG(<< "relative error = " << relativeError); //file << "f = " << core::CContainerPrinter::print(values) << ";" << std::endl; @@ -276,7 +295,9 @@ void CTrendComponentTest::testForecast() { maths::CTrendComponent component{0.012}; maths::CDecayRateController controller( - maths::CDecayRateController::E_PredictionBias | maths::CDecayRateController::E_PredictionErrorIncrease, 1); + maths::CDecayRateController::E_PredictionBias | + maths::CDecayRateController::E_PredictionErrorIncrease, + 1); core_t::TTime time{0}; for (/**/; time < end; time += bucketLength) { @@ -284,7 +305,9 @@ void CTrendComponentTest::testForecast() { component.propagateForwardsByTime(bucketLength); double prediction{maths::CBasicStatistics::mean(component.value(time, 0.0))}; - controller.multiplier({prediction}, {{values[time / bucketLength] - prediction}}, bucketLength, 0.3, 0.012); + controller.multiplier({prediction}, + {{values[time / bucketLength] - prediction}}, + bucketLength, 0.3, 0.012); component.decayRate(0.012 * controller.multiplier()); //predictions.push_back(prediction); } @@ -292,19 +315,21 @@ void CTrendComponentTest::testForecast() { component.shiftOrigin(time); TDouble3VecVec forecast; - component.forecast(time, - time + 1000 * bucketLength, - 3600, - 95.0, + component.forecast(time, time + 1000 * bucketLength, 3600, 95.0, [](core_t::TTime) { return TDouble3Vec(3, 0.0); }, - [&forecast](core_t::TTime, const TDouble3Vec& value) { forecast.push_back(value); }); + [&forecast](core_t::TTime, const TDouble3Vec& value) { + forecast.push_back(value); + }); TMeanAccumulator meanError; TMeanAccumulator meanErrorAt95; for (auto& errorbar : forecast) { core_t::TTime bucket{(time - start) / bucketLength}; - meanError.add(std::fabs((values[bucket] - errorbar[1]) / std::fabs(values[bucket]))); - meanErrorAt95.add(std::max(std::max(values[bucket] - errorbar[2], errorbar[0] - values[bucket]), 0.0) / + meanError.add(std::fabs((values[bucket] - errorbar[1]) / + std::fabs(values[bucket]))); + meanErrorAt95.add(std::max(std::max(values[bucket] - errorbar[2], + errorbar[0] - values[bucket]), + 0.0) / std::fabs(values[bucket])); //forecastLower.push_back(errorbar[0]); //forecastPredictions.push_back(errorbar[1]); @@ -320,7 +345,8 @@ void CTrendComponentTest::testForecast() { LOG_DEBUG(<< "error = " << maths::CBasicStatistics::mean(meanError)); LOG_DEBUG(<< "error @ 95% = " << maths::CBasicStatistics::mean(meanErrorAt95)); - return std::make_pair(maths::CBasicStatistics::mean(meanError), maths::CBasicStatistics::mean(meanErrorAt95)); + return std::make_pair(maths::CBasicStatistics::mean(meanError), + maths::CBasicStatistics::mean(meanErrorAt95)); }; double error; @@ -394,7 +420,8 @@ void CTrendComponentTest::testPersist() { maths::SDistributionRestoreParams params{maths_t::E_ContinuousData, 0.1}; maths::CTrendComponent restoredComponent{0.1}; - traverser.traverseSubLevel(boost::bind(&maths::CTrendComponent::acceptRestoreTraverser, &restoredComponent, boost::cref(params), _1)); + traverser.traverseSubLevel(boost::bind(&maths::CTrendComponent::acceptRestoreTraverser, + &restoredComponent, boost::cref(params), _1)); CPPUNIT_ASSERT_EQUAL(origComponent.checksum(), restoredComponent.checksum()); @@ -410,14 +437,14 @@ void CTrendComponentTest::testPersist() { CppUnit::Test* CTrendComponentTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CTrendComponentTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CTrendComponentTest::testValueAndVariance", - &CTrendComponentTest::testValueAndVariance)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CTrendComponentTest::testDecayRate", &CTrendComponentTest::testDecayRate)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CTrendComponentTest::testForecast", &CTrendComponentTest::testForecast)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CTrendComponentTest::testPersist", &CTrendComponentTest::testPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTrendComponentTest::testValueAndVariance", &CTrendComponentTest::testValueAndVariance)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTrendComponentTest::testDecayRate", &CTrendComponentTest::testDecayRate)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTrendComponentTest::testForecast", &CTrendComponentTest::testForecast)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTrendComponentTest::testPersist", &CTrendComponentTest::testPersist)); return suiteOfTests; } diff --git a/lib/maths/unittest/CTrendTestsTest.cc b/lib/maths/unittest/CTrendTestsTest.cc index 1ee8db982f..54d8b0f530 100644 --- a/lib/maths/unittest/CTrendTestsTest.cc +++ b/lib/maths/unittest/CTrendTestsTest.cc @@ -48,7 +48,8 @@ void CTrendTestsTest::testRandomizedPeriodicity() { using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; using TMeanVarAccumulator = maths::CBasicStatistics::SSampleMeanVar::TAccumulator; - using TMaxAccumulator = maths::CBasicStatistics::COrderStatisticsStack>; + using TMaxAccumulator = + maths::CBasicStatistics::COrderStatisticsStack>; using TFunction = double (*)(core_t::TTime); test::CRandomNumbers rng; @@ -74,7 +75,9 @@ void CTrendTestsTest::testRandomizedPeriodicity() { TMeanVarAccumulator timeToDetectionMoments[5]; TMaxAccumulator timeToDetectionMax[5]; core_t::TTime lastTruePositive[5] = {time, time, time, time, time}; - TFunction functions[] = {&constant, &ramp, &markov, &smoothDaily, &smoothWeekly, &spikeyDaily, &spikeyWeekly, &weekends}; + TFunction functions[] = {&constant, &ramp, &markov, + &smoothDaily, &smoothWeekly, &spikeyDaily, + &spikeyWeekly, &weekends}; for (std::size_t i = 0u; i < samples.size(); ++i) { for (std::size_t j = 0u; j < boost::size(functions); ++j) { @@ -85,10 +88,13 @@ void CTrendTestsTest::testRandomizedPeriodicity() { if (j < 3) { (rtests[j].test() ? falsePositives[j] : trueNegatives[j]) += 1.0; } else { - (rtests[j].test() ? truePositives[j - 3] : falseNegatives[j - 3]) += 1.0; + (rtests[j].test() ? truePositives[j - 3] + : falseNegatives[j - 3]) += 1.0; if (rtests[j].test()) { - timeToDetectionMoments[j - 3].add(time - lastTruePositive[j - 3]); - timeToDetectionMax[j - 3].add(static_cast(time - lastTruePositive[j - 3])); + timeToDetectionMoments[j - 3].add( + time - lastTruePositive[j - 3]); + timeToDetectionMax[j - 3].add( + static_cast(time - lastTruePositive[j - 3])); lastTruePositive[j - 3] = time; } } @@ -114,8 +120,10 @@ void CTrendTestsTest::testRandomizedPeriodicity() { for (std::size_t i = 0u; i < boost::size(timeToDetectionMoments); ++i) { LOG_DEBUG(<< "time to detect moments = " << timeToDetectionMoments[i]); LOG_DEBUG(<< "maximum time to detect = " << timeToDetectionMax[i][0]); - CPPUNIT_ASSERT(maths::CBasicStatistics::mean(timeToDetectionMoments[i]) < 1.5 * DAY); - CPPUNIT_ASSERT(std::sqrt(maths::CBasicStatistics::variance(timeToDetectionMoments[i])) < 5 * DAY); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(timeToDetectionMoments[i]) < + 1.5 * DAY); + CPPUNIT_ASSERT(std::sqrt(maths::CBasicStatistics::variance( + timeToDetectionMoments[i])) < 5 * DAY); CPPUNIT_ASSERT(timeToDetectionMax[i][0] <= 27 * WEEK); } } @@ -153,9 +161,10 @@ void CTrendTestsTest::testCalendarCyclic() { TDoubleVec error; for (core_t::TTime time = 0; time <= end; time += HALF_HOUR) { - ptrdiff_t i = maths::CTools::truncate(std::lower_bound(boost::begin(months), boost::end(months), time) - boost::begin(months), - ptrdiff_t(1), - ptrdiff_t(boost::size(months))); + ptrdiff_t i = maths::CTools::truncate( + std::lower_bound(boost::begin(months), boost::end(months), time) - + boost::begin(months), + ptrdiff_t(1), ptrdiff_t(boost::size(months))); rng.generateNormalSamples(0.0, 10.0, 1, error); if (time >= months[i - 1] + 30000 && time < months[i - 1] + 50000) { @@ -165,7 +174,8 @@ void CTrendTestsTest::testCalendarCyclic() { if (time > 121 * DAY && time % DAY == 0) { TOptionalFeature feature = cyclic.test(); - CPPUNIT_ASSERT_EQUAL(std::string("2nd day of month"), core::CContainerPrinter::print(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("2nd day of month"), + core::CContainerPrinter::print(feature)); } } } @@ -187,9 +197,10 @@ void CTrendTestsTest::testCalendarCyclic() { TDoubleVec error; for (core_t::TTime time = 0; time <= end; time += HALF_HOUR) { - ptrdiff_t i = maths::CTools::truncate(std::lower_bound(boost::begin(months), boost::end(months), time) - boost::begin(months), - ptrdiff_t(1), - ptrdiff_t(boost::size(months))); + ptrdiff_t i = maths::CTools::truncate( + std::lower_bound(boost::begin(months), boost::end(months), time) - + boost::begin(months), + ptrdiff_t(1), ptrdiff_t(boost::size(months))); rng.generateNormalSamples(0.0, 10.0, 1, error); if (time >= months[i - 1] + 10000 && time < months[i - 1] + 20000) { @@ -199,7 +210,8 @@ void CTrendTestsTest::testCalendarCyclic() { if (time > 121 * DAY && time % DAY == 0) { TOptionalFeature feature = cyclic.test(); - CPPUNIT_ASSERT_EQUAL(std::string("0 days before end of month"), core::CContainerPrinter::print(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("0 days before end of month"), + core::CContainerPrinter::print(feature)); } } } @@ -221,9 +233,10 @@ void CTrendTestsTest::testCalendarCyclic() { TDoubleVec error; for (core_t::TTime time = 0; time <= end; time += HALF_HOUR) { - ptrdiff_t i = maths::CTools::truncate(std::lower_bound(boost::begin(months), boost::end(months), time) - boost::begin(months), - ptrdiff_t(1), - ptrdiff_t(boost::size(months))); + ptrdiff_t i = maths::CTools::truncate( + std::lower_bound(boost::begin(months), boost::end(months), time) - + boost::begin(months), + ptrdiff_t(1), ptrdiff_t(boost::size(months))); rng.generateNormalSamples(0.0, 10.0, 1, error); if (time >= months[i - 1] + 45000 && time < months[i - 1] + 60000) { @@ -233,7 +246,8 @@ void CTrendTestsTest::testCalendarCyclic() { if (time > 121 * DAY && time % DAY == 0) { TOptionalFeature feature = cyclic.test(); - CPPUNIT_ASSERT_EQUAL(std::string("1st Monday of month"), core::CContainerPrinter::print(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("1st Monday of month"), + core::CContainerPrinter::print(feature)); } } } @@ -254,9 +268,10 @@ void CTrendTestsTest::testCalendarCyclic() { TDoubleVec error; for (core_t::TTime time = 0; time <= end; time += HALF_HOUR) { - ptrdiff_t i = maths::CTools::truncate(std::lower_bound(boost::begin(months), boost::end(months), time) - boost::begin(months), - ptrdiff_t(1), - ptrdiff_t(boost::size(months))); + ptrdiff_t i = maths::CTools::truncate( + std::lower_bound(boost::begin(months), boost::end(months), time) - + boost::begin(months), + ptrdiff_t(1), ptrdiff_t(boost::size(months))); rng.generateNormalSamples(0.0, 10.0, 1, error); if (time >= months[i - 1] + 45000 && time < months[i - 1] + 60000) { @@ -266,7 +281,8 @@ void CTrendTestsTest::testCalendarCyclic() { if (time > 121 * DAY && time % DAY == 0) { TOptionalFeature feature = cyclic.test(); - CPPUNIT_ASSERT_EQUAL(std::string("0 Fridays before end of month"), core::CContainerPrinter::print(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("0 Fridays before end of month"), + core::CContainerPrinter::print(feature)); } } } @@ -311,7 +327,8 @@ void CTrendTestsTest::testPersist() { core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind(&maths::CRandomizedPeriodicityTest::acceptRestoreTraverser, &test2, _1))); + CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind( + &maths::CRandomizedPeriodicityTest::acceptRestoreTraverser, &test2, _1))); } std::string newXml; { @@ -325,7 +342,8 @@ void CTrendTestsTest::testPersist() { core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origStaticsXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - CPPUNIT_ASSERT(traverser.traverseSubLevel(&maths::CRandomizedPeriodicityTest::staticsAcceptRestoreTraverser)); + CPPUNIT_ASSERT(traverser.traverseSubLevel( + &maths::CRandomizedPeriodicityTest::staticsAcceptRestoreTraverser)); } std::string newStaticsXml; { @@ -365,7 +383,8 @@ void CTrendTestsTest::testPersist() { core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind(&maths::CCalendarCyclicTest::acceptRestoreTraverser, &restored, _1))); + CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind( + &maths::CCalendarCyclicTest::acceptRestoreTraverser, &restored, _1))); } CPPUNIT_ASSERT_EQUAL(orig.checksum(), restored.checksum()); @@ -382,11 +401,12 @@ void CTrendTestsTest::testPersist() { CppUnit::Test* CTrendTestsTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CTrendTestsTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CTrendTestsTest::testRandomizedPeriodicity", - &CTrendTestsTest::testRandomizedPeriodicity)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CTrendTestsTest::testCalendarCyclic", &CTrendTestsTest::testCalendarCyclic)); - suiteOfTests->addTest(new CppUnit::TestCaller("CTrendTestsTest::testPersist", &CTrendTestsTest::testPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTrendTestsTest::testRandomizedPeriodicity", &CTrendTestsTest::testRandomizedPeriodicity)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTrendTestsTest::testCalendarCyclic", &CTrendTestsTest::testCalendarCyclic)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTrendTestsTest::testPersist", &CTrendTestsTest::testPersist)); return suiteOfTests; } diff --git a/lib/maths/unittest/CXMeansOnline1dTest.cc b/lib/maths/unittest/CXMeansOnline1dTest.cc index d1982aec55..7f3ced0e37 100644 --- a/lib/maths/unittest/CXMeansOnline1dTest.cc +++ b/lib/maths/unittest/CXMeansOnline1dTest.cc @@ -36,15 +36,16 @@ bool restore(const maths::SDistributionRestoreParams& params, core::CRapidXmlStateRestoreTraverser& traverser, maths::CXMeansOnline1d::CCluster& result) { return traverser.traverseSubLevel( - boost::bind(&maths::CXMeansOnline1d::CCluster::acceptRestoreTraverser, &result, boost::cref(params), _1)); + boost::bind(&maths::CXMeansOnline1d::CCluster::acceptRestoreTraverser, + &result, boost::cref(params), _1)); } void debug(const TClusterVec& clusters) { std::ostringstream c; c << "["; for (std::size_t j = 0u; j < clusters.size(); ++j) { - c << " (" << clusters[j].weight(maths_t::E_ClustersFractionWeight) << ", " << clusters[j].centre() << ", " << clusters[j].spread() - << ")"; + c << " (" << clusters[j].weight(maths_t::E_ClustersFractionWeight) + << ", " << clusters[j].centre() << ", " << clusters[j].spread() << ")"; } c << " ]"; LOG_DEBUG(<< "clusters = " << c.str()); @@ -56,8 +57,9 @@ void CXMeansOnline1dTest::testCluster() { LOG_DEBUG(<< "| CXMeansOnline1dTest::testCluster |"); LOG_DEBUG(<< "+------------------------------------+"); - maths::CXMeansOnline1d clusterer( - maths_t::E_ContinuousData, maths::CAvailableModeDistributions::ALL, maths_t::E_ClustersFractionWeight, 0.1); + maths::CXMeansOnline1d clusterer(maths_t::E_ContinuousData, + maths::CAvailableModeDistributions::ALL, + maths_t::E_ClustersFractionWeight, 0.1); maths::CXMeansOnline1d::CCluster cluster(clusterer); double x1[] = {1.1, 2.3, 1.5, 0.9, 4.7, 3.2, 2.8, 2.3, 1.9, 2.6}; @@ -104,7 +106,8 @@ void CXMeansOnline1dTest::testCluster() { std::sort(values.begin(), values.end()); for (std::size_t i = 0u; i < 10; ++i) { double p = static_cast(10 * i) + 5.0; - double expectedPercentile = values[static_cast(p / 100.0 * static_cast(values.size()) + 0.5)]; + double expectedPercentile = values[static_cast( + p / 100.0 * static_cast(values.size()) + 0.5)]; LOG_DEBUG(<< p << " percentile = " << cluster.percentile(p)); LOG_DEBUG(<< p << " expected percentile = " << expectedPercentile); double error = std::fabs(cluster.percentile(p) - expectedPercentile); @@ -129,10 +132,11 @@ void CXMeansOnline1dTest::testCluster() { CPPUNIT_ASSERT_DOUBLES_EQUAL(cluster.centre(), sampleCentre, 0.02); CPPUNIT_ASSERT_DOUBLES_EQUAL(cluster.spread(), sampleSpread, 0.2); - CPPUNIT_ASSERT_DOUBLES_EQUAL(std::log(cluster.count()), - -cluster.logLikelihoodFromCluster(maths_t::E_ClustersEqualWeight, 1.5) + - cluster.logLikelihoodFromCluster(maths_t::E_ClustersFractionWeight, 1.5), - 1e-10); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + std::log(cluster.count()), + -cluster.logLikelihoodFromCluster(maths_t::E_ClustersEqualWeight, 1.5) + + cluster.logLikelihoodFromCluster(maths_t::E_ClustersFractionWeight, 1.5), + 1e-10); uint64_t origChecksum = cluster.checksum(0); std::string origXml; @@ -150,11 +154,9 @@ void CXMeansOnline1dTest::testCluster() { core::CRapidXmlStateRestoreTraverser traverser(parser); maths::CXMeansOnline1d::CCluster restoredCluster(clusterer); - maths::SDistributionRestoreParams params(maths_t::E_ContinuousData, - 0.1, - maths::MINIMUM_CLUSTER_SPLIT_FRACTION, - maths::MINIMUM_CLUSTER_SPLIT_COUNT, - maths::MINIMUM_CATEGORY_COUNT); + maths::SDistributionRestoreParams params( + maths_t::E_ContinuousData, 0.1, maths::MINIMUM_CLUSTER_SPLIT_FRACTION, + maths::MINIMUM_CLUSTER_SPLIT_COUNT, maths::MINIMUM_CATEGORY_COUNT); restore(params, traverser, restoredCluster); uint64_t restoredChecksum = restoredCluster.checksum(0); CPPUNIT_ASSERT_EQUAL(origChecksum, restoredChecksum); @@ -165,7 +167,8 @@ void CXMeansOnline1dTest::testCluster() { cluster.add(x2[i], c2[i]); } maths::CXMeansOnline1d::TOptionalClusterClusterPr split = - cluster.split(maths::CAvailableModeDistributions::ALL, 5.0, 0.0, std::make_pair(0.0, 15.0), clusterer.indexGenerator()); + cluster.split(maths::CAvailableModeDistributions::ALL, 5.0, 0.0, + std::make_pair(0.0, 15.0), clusterer.indexGenerator()); CPPUNIT_ASSERT(split); LOG_DEBUG(<< "left centre = " << split->first.centre()); LOG_DEBUG(<< "left spread = " << split->first.spread()); @@ -199,7 +202,8 @@ void CXMeansOnline1dTest::testMixtureOfGaussians() { TDoubleVec mode3; rng.generateNormalSamples(35.0, 2.25, 150u, mode3); - TMeanVarAccumulator expectedClusters[] = {TMeanVarAccumulator(), TMeanVarAccumulator(), TMeanVarAccumulator()}; + TMeanVarAccumulator expectedClusters[] = { + TMeanVarAccumulator(), TMeanVarAccumulator(), TMeanVarAccumulator()}; expectedClusters[0].add(mode1); expectedClusters[1].add(mode2); expectedClusters[2].add(mode3); @@ -217,8 +221,9 @@ void CXMeansOnline1dTest::testMixtureOfGaussians() { // Randomize the input order. rng.random_shuffle(samples.begin(), samples.end()); - maths::CXMeansOnline1d clusterer( - maths_t::E_ContinuousData, maths::CAvailableModeDistributions::ALL, maths_t::E_ClustersFractionWeight, 0.001); + maths::CXMeansOnline1d clusterer(maths_t::E_ContinuousData, + maths::CAvailableModeDistributions::ALL, + maths_t::E_ClustersFractionWeight, 0.001); //std::ostringstream name; //name << "results.m." << i; @@ -258,10 +263,17 @@ void CXMeansOnline1dTest::testMixtureOfGaussians() { CPPUNIT_ASSERT_EQUAL(std::size_t(3), clusters.size()); for (std::size_t j = 0u; j < clusters.size(); ++j) { - CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(expectedClusters[j]), clusters[j].centre(), 0.1); - CPPUNIT_ASSERT_DOUBLES_EQUAL(std::sqrt(maths::CBasicStatistics::variance(expectedClusters[j])), clusters[j].spread(), 0.4); - meanError += std::fabs(clusters[j].centre() - maths::CBasicStatistics::mean(expectedClusters[j])); - spreadError += std::fabs(clusters[j].spread() - std::sqrt(maths::CBasicStatistics::variance(expectedClusters[j]))); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + maths::CBasicStatistics::mean(expectedClusters[j]), + clusters[j].centre(), 0.1); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + std::sqrt(maths::CBasicStatistics::variance(expectedClusters[j])), + clusters[j].spread(), 0.4); + meanError += std::fabs(clusters[j].centre() - + maths::CBasicStatistics::mean(expectedClusters[j])); + spreadError += std::fabs( + clusters[j].spread() - + std::sqrt(maths::CBasicStatistics::variance(expectedClusters[j]))); } } @@ -292,8 +304,9 @@ void CXMeansOnline1dTest::testMixtureOfGaussians() { std::copy(mode1.begin(), mode1.end(), std::back_inserter(samples)); std::copy(mode2.begin(), mode2.end(), std::back_inserter(samples)); - maths::CXMeansOnline1d clusterer( - maths_t::E_ContinuousData, maths::CAvailableModeDistributions::ALL, maths_t::E_ClustersFractionWeight, 0.001); + maths::CXMeansOnline1d clusterer(maths_t::E_ContinuousData, + maths::CAvailableModeDistributions::ALL, + maths_t::E_ClustersFractionWeight, 0.001); for (std::size_t j = 0u; j < samples.size(); ++j) { if (j % 50 == 0) { @@ -313,8 +326,11 @@ void CXMeansOnline1dTest::testMixtureOfGaussians() { LOG_DEBUG(<< "expected = " << expectedClusters); CPPUNIT_ASSERT_EQUAL(std::size_t(1), clusters.size()); - CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(expectedClusters), clusters[0].centre(), 0.05); - CPPUNIT_ASSERT_DOUBLES_EQUAL(std::sqrt(maths::CBasicStatistics::variance(expectedClusters)), clusters[0].spread(), 0.3); + CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(expectedClusters), + clusters[0].centre(), 0.05); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + std::sqrt(maths::CBasicStatistics::variance(expectedClusters)), + clusters[0].spread(), 0.3); } // Test 3: @@ -328,7 +344,8 @@ void CXMeansOnline1dTest::testMixtureOfGaussians() { TDoubleVec mode2; rng.generateNormalSamples(11.0, 1.0, 200u, mode2); - TMeanVarAccumulator expectedClusters[] = {TMeanVarAccumulator(), TMeanVarAccumulator()}; + TMeanVarAccumulator expectedClusters[] = {TMeanVarAccumulator(), + TMeanVarAccumulator()}; expectedClusters[0].add(mode1); expectedClusters[1].add(mode2); @@ -344,8 +361,9 @@ void CXMeansOnline1dTest::testMixtureOfGaussians() { // Randomize the input order. rng.random_shuffle(samples.begin(), samples.end()); - maths::CXMeansOnline1d clusterer( - maths_t::E_ContinuousData, maths::CAvailableModeDistributions::ALL, maths_t::E_ClustersFractionWeight, 0.001); + maths::CXMeansOnline1d clusterer(maths_t::E_ContinuousData, + maths::CAvailableModeDistributions::ALL, + maths_t::E_ClustersFractionWeight, 0.001); for (std::size_t j = 0u; j < samples.size(); ++j) { if (j % 50 == 0) { @@ -364,10 +382,17 @@ void CXMeansOnline1dTest::testMixtureOfGaussians() { CPPUNIT_ASSERT_EQUAL(std::size_t(2), clusters.size()); for (std::size_t j = 0u; j < clusters.size(); ++j) { - CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(expectedClusters[j]), clusters[j].centre(), 0.4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(std::sqrt(maths::CBasicStatistics::variance(expectedClusters[j])), clusters[j].spread(), 0.3); - meanError += std::fabs(clusters[j].centre() - maths::CBasicStatistics::mean(expectedClusters[j])); - spreadError += std::fabs(clusters[j].spread() - std::sqrt(maths::CBasicStatistics::variance(expectedClusters[j]))); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + maths::CBasicStatistics::mean(expectedClusters[j]), + clusters[j].centre(), 0.4); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + std::sqrt(maths::CBasicStatistics::variance(expectedClusters[j])), + clusters[j].spread(), 0.3); + meanError += std::fabs(clusters[j].centre() - + maths::CBasicStatistics::mean(expectedClusters[j])); + spreadError += std::fabs( + clusters[j].spread() - + std::sqrt(maths::CBasicStatistics::variance(expectedClusters[j]))); } } @@ -411,8 +436,9 @@ void CXMeansOnline1dTest::testMixtureOfUniforms() { // Randomize the input order. rng.random_shuffle(samples.begin(), samples.end()); - maths::CXMeansOnline1d clusterer( - maths_t::E_ContinuousData, maths::CAvailableModeDistributions::ALL, maths_t::E_ClustersFractionWeight, 0.001); + maths::CXMeansOnline1d clusterer(maths_t::E_ContinuousData, + maths::CAvailableModeDistributions::ALL, + maths_t::E_ClustersFractionWeight, 0.001); for (std::size_t j = 0u; j < samples.size(); ++j) { if (j % 50 == 0) { @@ -432,10 +458,16 @@ void CXMeansOnline1dTest::testMixtureOfUniforms() { CPPUNIT_ASSERT_EQUAL(std::size_t(2), clusters.size()); for (std::size_t j = 0u; j < clusters.size(); ++j) { - CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(expectedClusters[j]), clusters[j].centre(), 0.01); - CPPUNIT_ASSERT_DOUBLES_EQUAL(std::sqrt(maths::CBasicStatistics::variance(expectedClusters[j])), clusters[j].spread(), 0.02); - meanError += std::fabs(clusters[j].centre() - maths::CBasicStatistics::mean(expectedClusters[j])); - spreadError += std::fabs(clusters[j].spread() - std::sqrt(maths::CBasicStatistics::variance(expectedClusters[j]))); + CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(expectedClusters[j]), + clusters[j].centre(), 0.01); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + std::sqrt(maths::CBasicStatistics::variance(expectedClusters[j])), + clusters[j].spread(), 0.02); + meanError += std::fabs(clusters[j].centre() - + maths::CBasicStatistics::mean(expectedClusters[j])); + spreadError += std::fabs( + clusters[j].spread() - + std::sqrt(maths::CBasicStatistics::variance(expectedClusters[j]))); } } @@ -478,8 +510,9 @@ void CXMeansOnline1dTest::testMixtureOfLogNormals() { // Randomize the input order. rng.random_shuffle(samples.begin(), samples.end()); - maths::CXMeansOnline1d clusterer( - maths_t::E_ContinuousData, maths::CAvailableModeDistributions::ALL, maths_t::E_ClustersFractionWeight, 0.001); + maths::CXMeansOnline1d clusterer(maths_t::E_ContinuousData, + maths::CAvailableModeDistributions::ALL, + maths_t::E_ClustersFractionWeight, 0.001); //std::ostringstream name; //name << "results.m." << i; @@ -517,15 +550,20 @@ void CXMeansOnline1dTest::testMixtureOfLogNormals() { CPPUNIT_ASSERT_EQUAL(std::size_t(2), clusters.size()); for (std::size_t j = 0u; j < clusters.size(); ++j) { - CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(expectedClusters[j]), - clusters[j].centre(), - 0.03 * std::max(maths::CBasicStatistics::mean(expectedClusters[j]), clusters[j].centre())); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + maths::CBasicStatistics::mean(expectedClusters[j]), clusters[j].centre(), + 0.03 * std::max(maths::CBasicStatistics::mean(expectedClusters[j]), + clusters[j].centre())); CPPUNIT_ASSERT_DOUBLES_EQUAL( std::sqrt(maths::CBasicStatistics::variance(expectedClusters[j])), clusters[j].spread(), - 0.5 * std::max(std::sqrt(maths::CBasicStatistics::variance(expectedClusters[j])), clusters[j].spread())); - meanError += std::fabs(clusters[j].centre() - maths::CBasicStatistics::mean(expectedClusters[j])); - spreadError += std::fabs(clusters[j].spread() - std::sqrt(maths::CBasicStatistics::variance(expectedClusters[j]))); + 0.5 * std::max(std::sqrt(maths::CBasicStatistics::variance(expectedClusters[j])), + clusters[j].spread())); + meanError += std::fabs(clusters[j].centre() - + maths::CBasicStatistics::mean(expectedClusters[j])); + spreadError += std::fabs( + clusters[j].spread() - + std::sqrt(maths::CBasicStatistics::variance(expectedClusters[j]))); } } @@ -598,22 +636,28 @@ void CXMeansOnline1dTest::testOutliers() { CPPUNIT_ASSERT_EQUAL(std::size_t(2), clusters.size()); for (std::size_t j = 0u; j < clusters.size(); ++j) { - CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CBasicStatistics::mean(expectedClusters[j]), - clusters[j].centre(), - 0.01 * std::max(maths::CBasicStatistics::mean(expectedClusters[j]), clusters[j].centre())); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + maths::CBasicStatistics::mean(expectedClusters[j]), clusters[j].centre(), + 0.01 * std::max(maths::CBasicStatistics::mean(expectedClusters[j]), + clusters[j].centre())); CPPUNIT_ASSERT_DOUBLES_EQUAL( std::sqrt(maths::CBasicStatistics::variance(expectedClusters[j])), clusters[j].spread(), - 0.03 * std::max(std::sqrt(maths::CBasicStatistics::variance(expectedClusters[j])), clusters[j].spread())); - meanError += std::fabs(clusters[j].centre() - maths::CBasicStatistics::mean(expectedClusters[j])); - spreadError += std::fabs(clusters[j].spread() - std::sqrt(maths::CBasicStatistics::variance(expectedClusters[j]))); + 0.03 * std::max(std::sqrt(maths::CBasicStatistics::variance(expectedClusters[j])), + clusters[j].spread())); + meanError += std::fabs(clusters[j].centre() - + maths::CBasicStatistics::mean(expectedClusters[j])); + spreadError += std::fabs( + clusters[j].spread() - + std::sqrt(maths::CBasicStatistics::variance(expectedClusters[j]))); } } meanError /= n; spreadError /= n; - LOG_DEBUG(<< "meanError = " << meanError << ", spreadError = " << spreadError << ", n = " << n); + LOG_DEBUG(<< "meanError = " << meanError + << ", spreadError = " << spreadError << ", n = " << n); CPPUNIT_ASSERT(meanError < 0.15); CPPUNIT_ASSERT(spreadError < 1.0); @@ -630,11 +674,14 @@ void CXMeansOnline1dTest::testManyClusters() { TTimeDoublePrVec timeseries; core_t::TTime startTime; core_t::TTime endTime; - CPPUNIT_ASSERT( - test::CTimeSeriesTestData::parse("testfiles/times.csv", timeseries, startTime, endTime, test::CTimeSeriesTestData::CSV_UNIX_REGEX)); + CPPUNIT_ASSERT(test::CTimeSeriesTestData::parse( + "testfiles/times.csv", timeseries, startTime, endTime, + test::CTimeSeriesTestData::CSV_UNIX_REGEX)); CPPUNIT_ASSERT(!timeseries.empty()); - LOG_DEBUG(<< "timeseries = " << core::CContainerPrinter::print(timeseries.begin(), timeseries.begin() + 10) << " ..."); + LOG_DEBUG(<< "timeseries = " + << core::CContainerPrinter::print(timeseries.begin(), timeseries.begin() + 10) + << " ..."); maths::CXMeansOnline1d clusterer(maths_t::E_IntegerData, maths::CAvailableModeDistributions::ALL, @@ -661,7 +708,9 @@ void CXMeansOnline1dTest::testLowVariation() { LOG_DEBUG(<< "| CXMeansOnline1dTest::testLowVariation |"); LOG_DEBUG(<< "+-----------------------------------------+"); - maths::CXMeansOnline1d clusterer(maths_t::E_ContinuousData, maths::CAvailableModeDistributions::ALL, maths_t::E_ClustersFractionWeight); + maths::CXMeansOnline1d clusterer(maths_t::E_ContinuousData, + maths::CAvailableModeDistributions::ALL, + maths_t::E_ClustersFractionWeight); maths::CXMeansOnline1d::TSizeDoublePr2Vec dummy; for (std::size_t i = 0u; i < 200; ++i) { @@ -751,8 +800,9 @@ void CXMeansOnline1dTest::testPersist() { std::copy(mode2.begin(), mode2.end(), std::back_inserter(samples)); std::copy(mode3.begin(), mode3.end(), std::back_inserter(samples)); - maths::CXMeansOnline1d clusterer( - maths_t::E_ContinuousData, maths::CAvailableModeDistributions::ALL, maths_t::E_ClustersEqualWeight, 0.05); + maths::CXMeansOnline1d clusterer(maths_t::E_ContinuousData, + maths::CAvailableModeDistributions::ALL, + maths_t::E_ClustersEqualWeight, 0.05); maths::CXMeansOnline1d::TSizeDoublePr2Vec dummy; for (std::size_t j = 0u; j < samples.size(); ++j) { @@ -770,11 +820,9 @@ void CXMeansOnline1dTest::testPersist() { LOG_DEBUG(<< "Clusterer XML representation:\n" << origXml); // Restore the XML into a new clusterer. - maths::SDistributionRestoreParams params(maths_t::E_ContinuousData, - 0.15, - maths::MINIMUM_CLUSTER_SPLIT_FRACTION, - maths::MINIMUM_CLUSTER_SPLIT_COUNT, - maths::MINIMUM_CATEGORY_COUNT); + maths::SDistributionRestoreParams params( + maths_t::E_ContinuousData, 0.15, maths::MINIMUM_CLUSTER_SPLIT_FRACTION, + maths::MINIMUM_CLUSTER_SPLIT_COUNT, maths::MINIMUM_CATEGORY_COUNT); core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); @@ -796,7 +844,9 @@ void CXMeansOnline1dTest::testPruneEmptyCluster() { LOG_DEBUG(<< "| CXMeansOnline1dTest::testPruneEmptyCluster |"); LOG_DEBUG(<< "+----------------------------------------------+"); - maths::CXMeansOnline1d clusterer(maths_t::E_ContinuousData, maths::CAvailableModeDistributions::ALL, maths_t::E_ClustersFractionWeight); + maths::CXMeansOnline1d clusterer(maths_t::E_ContinuousData, + maths::CAvailableModeDistributions::ALL, + maths_t::E_ClustersFractionWeight); maths::CXMeansOnline1d::CCluster cluster1(clusterer); cluster1.add(1.0, 12.0); @@ -829,28 +879,30 @@ void CXMeansOnline1dTest::testPruneEmptyCluster() { CppUnit::Test* CXMeansOnline1dTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CXMeansOnline1dTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CXMeansOnline1dTest::testCluster", &CXMeansOnline1dTest::testCluster)); - suiteOfTests->addTest(new CppUnit::TestCaller("CXMeansOnline1dTest::testMixtureOfGaussians", - &CXMeansOnline1dTest::testMixtureOfGaussians)); - suiteOfTests->addTest(new CppUnit::TestCaller("CXMeansOnline1dTest::testMixtureOfUniforms", - &CXMeansOnline1dTest::testMixtureOfUniforms)); - suiteOfTests->addTest(new CppUnit::TestCaller("CXMeansOnline1dTest::testMixtureOfLogNormals", - &CXMeansOnline1dTest::testMixtureOfLogNormals)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CXMeansOnline1dTest::testOutliers", &CXMeansOnline1dTest::testOutliers)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CXMeansOnline1dTest::testManyClusters", &CXMeansOnline1dTest::testManyClusters)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CXMeansOnline1dTest::testLowVariation", &CXMeansOnline1dTest::testLowVariation)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CXMeansOnline1dTest::testAdaption", &CXMeansOnline1dTest::testAdaption)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CXMeansOnline1dTest::testLargeHistory", &CXMeansOnline1dTest::testLargeHistory)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CXMeansOnline1dTest::testPersist", &CXMeansOnline1dTest::testPersist)); - suiteOfTests->addTest(new CppUnit::TestCaller("CXMeansOnline1dTest::testPruneEmptyCluster", - &CXMeansOnline1dTest::testPruneEmptyCluster)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXMeansOnline1dTest::testCluster", &CXMeansOnline1dTest::testCluster)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXMeansOnline1dTest::testMixtureOfGaussians", + &CXMeansOnline1dTest::testMixtureOfGaussians)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXMeansOnline1dTest::testMixtureOfUniforms", &CXMeansOnline1dTest::testMixtureOfUniforms)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXMeansOnline1dTest::testMixtureOfLogNormals", + &CXMeansOnline1dTest::testMixtureOfLogNormals)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXMeansOnline1dTest::testOutliers", &CXMeansOnline1dTest::testOutliers)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXMeansOnline1dTest::testManyClusters", &CXMeansOnline1dTest::testManyClusters)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXMeansOnline1dTest::testLowVariation", &CXMeansOnline1dTest::testLowVariation)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXMeansOnline1dTest::testAdaption", &CXMeansOnline1dTest::testAdaption)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXMeansOnline1dTest::testLargeHistory", &CXMeansOnline1dTest::testLargeHistory)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXMeansOnline1dTest::testPersist", &CXMeansOnline1dTest::testPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXMeansOnline1dTest::testPruneEmptyCluster", &CXMeansOnline1dTest::testPruneEmptyCluster)); return suiteOfTests; } diff --git a/lib/maths/unittest/CXMeansOnlineTest.cc b/lib/maths/unittest/CXMeansOnlineTest.cc index 8be2334aa3..a71c498699 100644 --- a/lib/maths/unittest/CXMeansOnlineTest.cc +++ b/lib/maths/unittest/CXMeansOnlineTest.cc @@ -48,21 +48,27 @@ class CXMeansOnlineForTest : public maths::CXMeansOnline { maths_t::EClusterWeightCalc weightCalc, double decayRate = 0.0, double minimumClusterFraction = 0.0) - : maths::CXMeansOnline(dataType, weightCalc, decayRate, minimumClusterFraction) {} + : maths::CXMeansOnline(dataType, weightCalc, decayRate, minimumClusterFraction) { + } void add(const TPoint& x, double count = 1.0) { TSizeDoublePr2Vec dummy; this->maths::CXMeansOnline::add(x, dummy, count); } - const TClusterVec& clusters() const { return this->maths::CXMeansOnline::clusters(); } + const TClusterVec& clusters() const { + return this->maths::CXMeansOnline::clusters(); + } }; using TXMeans2ForTest = CXMeansOnlineForTest; using TXMeans2FloatForTest = CXMeansOnlineForTest; -bool restore(const maths::SDistributionRestoreParams& params, core::CRapidXmlStateRestoreTraverser& traverser, TXMeans2::CCluster& result) { - return traverser.traverseSubLevel(boost::bind(&TXMeans2::CCluster::acceptRestoreTraverser, &result, boost::cref(params), _1)); +bool restore(const maths::SDistributionRestoreParams& params, + core::CRapidXmlStateRestoreTraverser& traverser, + TXMeans2::CCluster& result) { + return traverser.traverseSubLevel(boost::bind(&TXMeans2::CCluster::acceptRestoreTraverser, + &result, boost::cref(params), _1)); } } @@ -76,21 +82,12 @@ void CXMeansOnlineTest::testCluster() { TXMeans2 clusterer(maths_t::E_ContinuousData, maths_t::E_ClustersFractionWeight, 0.1); TXMeans2::CCluster cluster(clusterer); - double x1[][2] = {{1.1, 2.0}, - {2.3, 2.1}, - {1.5, 1.4}, - {0.9, 0.8}, - {4.7, 3.9}, - {3.2, 3.2}, - {2.8, 2.7}, - {2.3, 1.5}, - {1.9, 1.6}, - {2.6, 2.1}, - {2.0, 2.2}, - {1.7, 1.9}, - {1.8, 1.7}, - {2.1, 1.9}}; - double c1[] = {1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}; + double x1[][2] = {{1.1, 2.0}, {2.3, 2.1}, {1.5, 1.4}, {0.9, 0.8}, + {4.7, 3.9}, {3.2, 3.2}, {2.8, 2.7}, {2.3, 1.5}, + {1.9, 1.6}, {2.6, 2.1}, {2.0, 2.2}, {1.7, 1.9}, + {1.8, 1.7}, {2.1, 1.9}}; + double c1[] = {1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, + 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}; TCovariances2 moments; for (std::size_t i = 0u; i < boost::size(x1); ++i) { @@ -104,7 +101,8 @@ void CXMeansOnlineTest::testCluster() { double expectedCount = maths::CBasicStatistics::count(moments); TPoint expectedCentre = maths::CBasicStatistics::mean(moments); - double expectedSpread = std::sqrt(maths::CBasicStatistics::maximumLikelihoodCovariances(moments).trace() / 2.0); + double expectedSpread = std::sqrt( + maths::CBasicStatistics::maximumLikelihoodCovariances(moments).trace() / 2.0); LOG_DEBUG(<< "expected count = " << expectedCount); LOG_DEBUG(<< "expected centre = " << expectedCentre); LOG_DEBUG(<< "expected spread = " << expectedSpread); @@ -134,16 +132,18 @@ void CXMeansOnlineTest::testCluster() { sampleMoments.add(samples[i]); } TPoint sampleCentre = maths::CBasicStatistics::mean(sampleMoments); - double sampleSpread = std::sqrt(maths::CBasicStatistics::covariances(sampleMoments).trace() / 2.0); + double sampleSpread = + std::sqrt(maths::CBasicStatistics::covariances(sampleMoments).trace() / 2.0); LOG_DEBUG(<< "sample centre = " << sampleCentre); LOG_DEBUG(<< "sample spread = " << sampleSpread); CPPUNIT_ASSERT((sampleCentre - cluster.centre()).euclidean() < 1e-10); CPPUNIT_ASSERT_DOUBLES_EQUAL(cluster.spread(), sampleSpread, 0.1); - CPPUNIT_ASSERT_DOUBLES_EQUAL(std::log(cluster.count()), - -cluster.logLikelihoodFromCluster(maths_t::E_ClustersEqualWeight, TPoint(1.5)) + - cluster.logLikelihoodFromCluster(maths_t::E_ClustersFractionWeight, TPoint(1.5)), - 1e-10); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + std::log(cluster.count()), + -cluster.logLikelihoodFromCluster(maths_t::E_ClustersEqualWeight, TPoint(1.5)) + + cluster.logLikelihoodFromCluster(maths_t::E_ClustersFractionWeight, TPoint(1.5)), + 1e-10); uint64_t origChecksum = cluster.checksum(0); std::string origXml; @@ -161,22 +161,22 @@ void CXMeansOnlineTest::testCluster() { core::CRapidXmlStateRestoreTraverser traverser(parser); TXMeans2::CCluster restoredCluster(clusterer); - maths::SDistributionRestoreParams params(maths_t::E_ContinuousData, - 0.1, - maths::MINIMUM_CLUSTER_SPLIT_FRACTION, - maths::MINIMUM_CLUSTER_SPLIT_COUNT, - maths::MINIMUM_CATEGORY_COUNT); + maths::SDistributionRestoreParams params( + maths_t::E_ContinuousData, 0.1, maths::MINIMUM_CLUSTER_SPLIT_FRACTION, + maths::MINIMUM_CLUSTER_SPLIT_COUNT, maths::MINIMUM_CATEGORY_COUNT); restore(params, traverser, restoredCluster); uint64_t restoredChecksum = restoredCluster.checksum(0); CPPUNIT_ASSERT_EQUAL(origChecksum, restoredChecksum); - double x2[][2] = {{10.3, 10.4}, {10.6, 10.5}, {10.7, 11.0}, {9.8, 10.2}, {11.2, 11.4}, {11.0, 10.7}, {11.5, 11.3}}; + double x2[][2] = {{10.3, 10.4}, {10.6, 10.5}, {10.7, 11.0}, {9.8, 10.2}, + {11.2, 11.4}, {11.0, 10.7}, {11.5, 11.3}}; double c2[] = {2.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0}; for (std::size_t i = 0u; i < boost::size(x2); ++i) { cluster.add(TPoint(x2[i]), c2[i]); } maths::CPRNG::CXorOShiro128Plus rng; - TXMeans2::TOptionalClusterClusterPr split = cluster.split(rng, 5.0, clusterer.indexGenerator()); + TXMeans2::TOptionalClusterClusterPr split = + cluster.split(rng, 5.0, clusterer.indexGenerator()); CPPUNIT_ASSERT(split); TPointVec centres; centres.push_back(split->first.centre()); @@ -232,7 +232,8 @@ void CXMeansOnlineTest::testClusteringVanilla() { test::CRandomNumbers rng; double means[][2] = {{10, 15}, {40, 10}, {12, 35}}; - double covariances[][2][2] = {{{10, 2}, {2, 15}}, {{30, 8}, {8, 15}}, {{20, -11}, {-11, 25}}}; + double covariances[][2][2] = { + {{10, 2}, {2, 15}}, {{30, 8}, {8, 15}}, {{20, -11}, {-11, 25}}}; for (std::size_t t = 0u; t < 10; ++t) { LOG_DEBUG(<< "*** test " << t << " ***"); @@ -274,7 +275,8 @@ void CXMeansOnlineTest::testClusteringVanilla() { TDoubleVec mean(&means[i][0], &means[i][2]); TDoubleVecVec covariance; for (std::size_t j = 0u; j < 2; ++j) { - covariance.push_back(TDoubleVec(&covariances[i][j][0], &covariances[i][j][2])); + covariance.push_back( + TDoubleVec(&covariances[i][j][0], &covariances[i][j][2])); } TDoubleVecVec samples_; rng.generateMultivariateNormalSamples(mean, covariance, 200, samples_); @@ -295,18 +297,22 @@ void CXMeansOnlineTest::testClusteringVanilla() { CPPUNIT_ASSERT_EQUAL(std::size_t(3), clusters.size()); for (std::size_t i = 0u; i < clusters.size(); ++i) { - LOG_DEBUG(<< "moments = " << maths::CBasicStatistics::print(clusters[i].covariances())); + LOG_DEBUG(<< "moments = " + << maths::CBasicStatistics::print(clusters[i].covariances())); maths::CBasicStatistics::COrderStatisticsStack meanError; maths::CBasicStatistics::COrderStatisticsStack covError; for (std::size_t j = 0u; j < expectedMoments.size(); ++j) { - meanError.add((maths::CBasicStatistics::mean(clusters[i].covariances()) - maths::CBasicStatistics::mean(expectedMoments[j])) - .euclidean() / - maths::CBasicStatistics::mean(expectedMoments[j]).euclidean()); - covError.add((maths::CBasicStatistics::covariances(clusters[i].covariances()) - - maths::CBasicStatistics::covariances(expectedMoments[j])) - .frobenius() / - maths::CBasicStatistics::covariances(expectedMoments[j]).frobenius()); + meanError.add( + (maths::CBasicStatistics::mean(clusters[i].covariances()) - + maths::CBasicStatistics::mean(expectedMoments[j])) + .euclidean() / + maths::CBasicStatistics::mean(expectedMoments[j]).euclidean()); + covError.add( + (maths::CBasicStatistics::covariances(clusters[i].covariances()) - + maths::CBasicStatistics::covariances(expectedMoments[j])) + .frobenius() / + maths::CBasicStatistics::covariances(expectedMoments[j]).frobenius()); } LOG_DEBUG(<< "mean error = " << meanError[0]); LOG_DEBUG(<< "covariance error = " << covError[0]); @@ -341,7 +347,8 @@ void CXMeansOnlineTest::testClusteringWithOutliers() { double outliers_[][2] = {{600, 10}, {650, 11}, {610, 12}, {700, 16}, {690, 14}}; TDoubleVecVec outliers; for (std::size_t i = 0u; i < boost::size(outliers_); ++i) { - outliers.push_back(TDoubleVec(boost::begin(outliers_[i]), boost::end(outliers_[i]))); + outliers.push_back( + TDoubleVec(boost::begin(outliers_[i]), boost::end(outliers_[i]))); } // We use the cluster moments to indirectly measure the purity @@ -361,7 +368,8 @@ void CXMeansOnlineTest::testClusteringWithOutliers() { TDoubleVec mean(&means[i][0], &means[i][2]); TDoubleVecVec covariance; for (std::size_t j = 0u; j < 2; ++j) { - covariance.push_back(TDoubleVec(&covariances[i][j][0], &covariances[i][j][2])); + covariance.push_back( + TDoubleVec(&covariances[i][j][0], &covariances[i][j][2])); } TDoubleVecVec samples_; rng.generateMultivariateNormalSamples(mean, covariance, 200, samples_); @@ -375,7 +383,8 @@ void CXMeansOnlineTest::testClusteringWithOutliers() { } rng.random_shuffle(samples.begin(), samples.end()); - TXMeans2ForTest clusterer(maths_t::E_ContinuousData, maths_t::E_ClustersFractionWeight, 0.0, 0.01); + TXMeans2ForTest clusterer(maths_t::E_ContinuousData, + maths_t::E_ClustersFractionWeight, 0.0, 0.01); for (std::size_t i = 0u; i < outliers.size(); ++i) { clusterer.add(TPoint(outliers[i])); @@ -389,18 +398,22 @@ void CXMeansOnlineTest::testClusteringWithOutliers() { CPPUNIT_ASSERT_EQUAL(std::size_t(2), clusters.size()); for (std::size_t i = 0u; i < clusters.size(); ++i) { - LOG_DEBUG(<< "moments = " << maths::CBasicStatistics::print(clusters[i].covariances())); + LOG_DEBUG(<< "moments = " + << maths::CBasicStatistics::print(clusters[i].covariances())); maths::CBasicStatistics::COrderStatisticsStack meanError; maths::CBasicStatistics::COrderStatisticsStack covError; for (std::size_t j = 0u; j < expectedMoments.size(); ++j) { - meanError.add((maths::CBasicStatistics::mean(clusters[i].covariances()) - maths::CBasicStatistics::mean(expectedMoments[j])) - .euclidean() / - maths::CBasicStatistics::mean(expectedMoments[j]).euclidean()); - covError.add((maths::CBasicStatistics::covariances(clusters[i].covariances()) - - maths::CBasicStatistics::covariances(expectedMoments[j])) - .frobenius() / - maths::CBasicStatistics::covariances(expectedMoments[j]).frobenius()); + meanError.add( + (maths::CBasicStatistics::mean(clusters[i].covariances()) - + maths::CBasicStatistics::mean(expectedMoments[j])) + .euclidean() / + maths::CBasicStatistics::mean(expectedMoments[j]).euclidean()); + covError.add( + (maths::CBasicStatistics::covariances(clusters[i].covariances()) - + maths::CBasicStatistics::covariances(expectedMoments[j])) + .frobenius() / + maths::CBasicStatistics::covariances(expectedMoments[j]).frobenius()); } LOG_DEBUG(<< "meanError = " << meanError[0]); @@ -433,7 +446,9 @@ void CXMeansOnlineTest::testManyClusters() { // close on the order of the data's differential entropy given the // generating distribution. - const std::size_t sizes_[] = {1800, 800, 1100, 400, 600, 400, 600, 1300, 400, 900, 500, 700, 400, 800, 1500, 1200, 500, 300, 1200, 800}; + const std::size_t sizes_[] = {1800, 800, 1100, 400, 600, 400, 600, + 1300, 400, 900, 500, 700, 400, 800, + 1500, 1200, 500, 300, 1200, 800}; TSizeVec sizes(boost::begin(sizes_), boost::end(sizes_)); double Z = static_cast(std::accumulate(sizes.begin(), sizes.end(), 0)); @@ -463,7 +478,8 @@ void CXMeansOnlineTest::testManyClusters() { lgenerating[i] /= Z; differentialEntropy.add(-std::log(lgenerating[i])); } - LOG_DEBUG(<< "differentialEntropy = " << maths::CBasicStatistics::mean(differentialEntropy)); + LOG_DEBUG(<< "differentialEntropy = " + << maths::CBasicStatistics::mean(differentialEntropy)); for (std::size_t t = 0u; t < 5; ++t) { LOG_DEBUG(<< "*** test " << t << " ***"); @@ -484,8 +500,10 @@ void CXMeansOnlineTest::testManyClusters() { double l = 0.0; for (std::size_t j = 0u; j < clusters.size(); ++j) { double n = maths::CBasicStatistics::count(clusters[j].covariances()); - const TPoint& mean = maths::CBasicStatistics::mean(clusters[j].covariances()); - const TMatrix& covariance = maths::CBasicStatistics::maximumLikelihoodCovariances(clusters[j].covariances()); + const TPoint& mean = + maths::CBasicStatistics::mean(clusters[j].covariances()); + const TMatrix& covariance = maths::CBasicStatistics::maximumLikelihoodCovariances( + clusters[j].covariances()); double lj; maths::gaussianLogLikelihood(covariance, samples[i] - mean, lj); l += n * std::exp(lj); @@ -494,7 +512,8 @@ void CXMeansOnlineTest::testManyClusters() { loss.add(std::log(lgenerating[i]) - std::log(l)); } LOG_DEBUG(<< "loss = " << maths::CBasicStatistics::mean(loss)); - CPPUNIT_ASSERT(maths::CBasicStatistics::mean(loss) < 0.02 * maths::CBasicStatistics::mean(differentialEntropy)); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(loss) < + 0.02 * maths::CBasicStatistics::mean(differentialEntropy)); } } @@ -514,14 +533,16 @@ void CXMeansOnlineTest::testAdaption() { test::CRandomNumbers rng; double means_[][2] = {{10, 15}, {30, 10}, {10, 15}, {30, 10}}; - double covariances_[][2][2] = {{{10, 2}, {2, 15}}, {{30, 8}, {8, 15}}, {{100, 2}, {2, 15}}, {{100, 2}, {2, 15}}}; + double covariances_[][2][2] = { + {{10, 2}, {2, 15}}, {{30, 8}, {8, 15}}, {{100, 2}, {2, 15}}, {{100, 2}, {2, 15}}}; TDoubleVecVec means(boost::size(means_)); TDoubleVecVecVec covariances(boost::size(means_)); for (std::size_t i = 0u; i < boost::size(means_); ++i) { means[i].assign(&means_[i][0], &means_[i][2]); for (std::size_t j = 0u; j < 2; ++j) { - covariances[i].push_back(TDoubleVec(&covariances_[i][j][0], &covariances_[i][j][2])); + covariances[i].push_back( + TDoubleVec(&covariances_[i][j][0], &covariances_[i][j][2])); } } @@ -540,7 +561,8 @@ void CXMeansOnlineTest::testAdaption() { TDoubleVecVec samples; for (std::size_t j = 0u; j < boost::size(n[i]); ++j) { TDoubleVecVec samples_; - rng.generateMultivariateNormalSamples(means[j], covariances[j], n[i][j], samples_); + rng.generateMultivariateNormalSamples(means[j], covariances[j], + n[i][j], samples_); for (std::size_t k = 0u; k < samples_.size(); ++k) { modeCovariances[j].add(TPoint(samples_[k])); totalCovariances.add(TPoint(samples_[k])); @@ -562,23 +584,27 @@ void CXMeansOnlineTest::testAdaption() { maths::CBasicStatistics::COrderStatisticsStack covError; if (clusters.size() == 1) { - meanError.add( - (maths::CBasicStatistics::mean(clusters[j].covariances()) - maths::CBasicStatistics::mean(totalCovariances)) - .euclidean()); - covError.add((maths::CBasicStatistics::covariances(clusters[j].covariances()) - - maths::CBasicStatistics::covariances(totalCovariances)) - .frobenius() / - maths::CBasicStatistics::covariances(totalCovariances).frobenius()); + meanError.add((maths::CBasicStatistics::mean(clusters[j].covariances()) - + maths::CBasicStatistics::mean(totalCovariances)) + .euclidean()); + covError.add( + (maths::CBasicStatistics::covariances(clusters[j].covariances()) - + maths::CBasicStatistics::covariances(totalCovariances)) + .frobenius() / + maths::CBasicStatistics::covariances(totalCovariances).frobenius()); } else { for (std::size_t k = 0u; k < boost::size(modeCovariances); ++k) { meanError.add( - (maths::CBasicStatistics::mean(clusters[j].covariances()) - maths::CBasicStatistics::mean(modeCovariances[k])) + (maths::CBasicStatistics::mean(clusters[j].covariances()) - + maths::CBasicStatistics::mean(modeCovariances[k])) .euclidean() / maths::CBasicStatistics::mean(modeCovariances[k]).euclidean()); - covError.add((maths::CBasicStatistics::covariances(clusters[j].covariances()) - - maths::CBasicStatistics::covariances(modeCovariances[k])) - .frobenius() / - maths::CBasicStatistics::covariances(modeCovariances[k]).frobenius()); + covError.add( + (maths::CBasicStatistics::covariances(clusters[j].covariances()) - + maths::CBasicStatistics::covariances(modeCovariances[k])) + .frobenius() / + maths::CBasicStatistics::covariances(modeCovariances[k]) + .frobenius()); } } @@ -608,12 +634,10 @@ void CXMeansOnlineTest::testLargeHistory() { // is stable and reduce the decay rate then we should also reduce // the fraction of points required to create a cluster. - TXMeans2ForTest reference(maths_t::E_ContinuousData, - maths_t::E_ClustersFractionWeight, + TXMeans2ForTest reference(maths_t::E_ContinuousData, maths_t::E_ClustersFractionWeight, 0.001, // decay rate 0.05); // minimum cluster fraction - TXMeans2ForTest clusterer(maths_t::E_ContinuousData, - maths_t::E_ClustersFractionWeight, + TXMeans2ForTest clusterer(maths_t::E_ContinuousData, maths_t::E_ClustersFractionWeight, 0.001, // decay rate 0.05); // minimum cluster fraction @@ -655,16 +679,19 @@ void CXMeansOnlineTest::testLatLongData() { using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; TTimeDoubleVecPrVec timeseries; - CPPUNIT_ASSERT( - test::CTimeSeriesTestData::parse("testfiles/lat_lng.csv", timeseries, test::CTimeSeriesTestData::CSV_UNIX_BIVALUED_REGEX)); + CPPUNIT_ASSERT(test::CTimeSeriesTestData::parse( + "testfiles/lat_lng.csv", timeseries, test::CTimeSeriesTestData::CSV_UNIX_BIVALUED_REGEX)); CPPUNIT_ASSERT(!timeseries.empty()); - LOG_DEBUG(<< "timeseries = " << core::CContainerPrinter::print(timeseries.begin(), timeseries.begin() + 10) << " ..."); + LOG_DEBUG(<< "timeseries = " + << core::CContainerPrinter::print(timeseries.begin(), timeseries.begin() + 10) + << " ..."); std::size_t n = timeseries.size(); TCovariances2 reference; - TXMeans2FloatForTest clusterer(maths_t::E_ContinuousData, maths_t::E_ClustersFractionWeight, 0.0005); + TXMeans2FloatForTest clusterer(maths_t::E_ContinuousData, + maths_t::E_ClustersFractionWeight, 0.0005); for (std::size_t i = 0u; i < n; ++i) { TPoint x(timeseries[i].second); @@ -694,7 +721,8 @@ void CXMeansOnlineTest::testLatLongData() { for (std::size_t j = 0u; j < clusters.size(); ++j) { double w = maths::CBasicStatistics::count(clusters[j].covariances()); TPoint mean = maths::CBasicStatistics::mean(clusters[j].covariances()); - TMatrix covariance = maths::CBasicStatistics::covariances(clusters[j].covariances()); + TMatrix covariance = + maths::CBasicStatistics::covariances(clusters[j].covariances()); double llj; maths::gaussianLogLikelihood(covariance, x - mean, llj); ll += w * std::exp(llj); @@ -707,7 +735,8 @@ void CXMeansOnlineTest::testLatLongData() { LOG_DEBUG(<< "gaussian log(L) = " << maths::CBasicStatistics::mean(LLR)); LOG_DEBUG(<< "clustered log(L) = " << maths::CBasicStatistics::mean(LLC)); - CPPUNIT_ASSERT(maths::CBasicStatistics::mean(LLC) < 0.6 * maths::CBasicStatistics::mean(LLR)); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(LLC) < + 0.6 * maths::CBasicStatistics::mean(LLR)); } void CXMeansOnlineTest::testPersist() { @@ -720,7 +749,8 @@ void CXMeansOnlineTest::testPersist() { test::CRandomNumbers rng; double means[][2] = {{10, 15}, {40, 10}, {12, 35}}; - double covariances[][2][2] = {{{10, 2}, {2, 15}}, {{30, 8}, {8, 15}}, {{20, -11}, {-11, 25}}}; + double covariances[][2][2] = { + {{10, 2}, {2, 15}}, {{30, 8}, {8, 15}}, {{20, -11}, {-11, 25}}}; TDoubleVecVec samples; TPointVec centres; @@ -752,11 +782,9 @@ void CXMeansOnlineTest::testPersist() { LOG_DEBUG(<< "Clusterer XML representation:\n" << origXml); // Restore the XML into a new clusterer. - maths::SDistributionRestoreParams params(maths_t::E_ContinuousData, - 0.15, - maths::MINIMUM_CLUSTER_SPLIT_FRACTION, - maths::MINIMUM_CLUSTER_SPLIT_COUNT, - maths::MINIMUM_CATEGORY_COUNT); + maths::SDistributionRestoreParams params( + maths_t::E_ContinuousData, 0.15, maths::MINIMUM_CLUSTER_SPLIT_FRACTION, + maths::MINIMUM_CLUSTER_SPLIT_COUNT, maths::MINIMUM_CATEGORY_COUNT); core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); @@ -774,19 +802,23 @@ void CXMeansOnlineTest::testPersist() { CppUnit::Test* CXMeansOnlineTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CXMeansOnlineTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CXMeansOnlineTest::testCluster", &CXMeansOnlineTest::testCluster)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CXMeansOnlineTest::testClusteringVanilla", &CXMeansOnlineTest::testClusteringVanilla)); - suiteOfTests->addTest(new CppUnit::TestCaller("CXMeansOnlineTest::testClusteringWithOutliers", - &CXMeansOnlineTest::testClusteringWithOutliers)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CXMeansOnlineTest::testManyClusters", &CXMeansOnlineTest::testManyClusters)); - suiteOfTests->addTest(new CppUnit::TestCaller("CXMeansOnlineTest::testAdaption", &CXMeansOnlineTest::testAdaption)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CXMeansOnlineTest::testLargeHistory", &CXMeansOnlineTest::testLargeHistory)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CXMeansOnlineTest::testLatLongData", &CXMeansOnlineTest::testLatLongData)); - suiteOfTests->addTest(new CppUnit::TestCaller("CXMeansOnlineTest::testPersist", &CXMeansOnlineTest::testPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXMeansOnlineTest::testCluster", &CXMeansOnlineTest::testCluster)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXMeansOnlineTest::testClusteringVanilla", &CXMeansOnlineTest::testClusteringVanilla)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXMeansOnlineTest::testClusteringWithOutliers", + &CXMeansOnlineTest::testClusteringWithOutliers)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXMeansOnlineTest::testManyClusters", &CXMeansOnlineTest::testManyClusters)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXMeansOnlineTest::testAdaption", &CXMeansOnlineTest::testAdaption)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXMeansOnlineTest::testLargeHistory", &CXMeansOnlineTest::testLargeHistory)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXMeansOnlineTest::testLatLongData", &CXMeansOnlineTest::testLatLongData)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXMeansOnlineTest::testPersist", &CXMeansOnlineTest::testPersist)); return suiteOfTests; } diff --git a/lib/maths/unittest/CXMeansTest.cc b/lib/maths/unittest/CXMeansTest.cc index e1ae8e2ae4..56c418eb6d 100644 --- a/lib/maths/unittest/CXMeansTest.cc +++ b/lib/maths/unittest/CXMeansTest.cc @@ -53,25 +53,33 @@ class CXMeansForTest : public maths::CXMeans { public: CXMeansForTest(std::size_t kmax) : maths::CXMeans(kmax) {} - void improveParams(std::size_t kmeansIterations) { this->maths::CXMeans::improveParams(kmeansIterations); } + void improveParams(std::size_t kmeansIterations) { + this->maths::CXMeans::improveParams(kmeansIterations); + } bool improveStructure(std::size_t clusterSeeds, std::size_t kmeansIterations) { return this->maths::CXMeans::improveStructure(clusterSeeds, kmeansIterations); } - const TUInt64USet& inactive() const { return this->maths::CXMeans::inactive(); } + const TUInt64USet& inactive() const { + return this->maths::CXMeans::inactive(); + } }; template double logfSphericalGaussian(const POINT& mean, double variance, const POINT& x) { double d = static_cast(x.dimension()); double r = (x - mean).euclidean(); - return -0.5 * d * std::log(boost::math::double_constants::two_pi * variance) - 0.5 * r * r / variance; + return -0.5 * d * std::log(boost::math::double_constants::two_pi * variance) - + 0.5 * r * r / variance; } class CEmpiricalKullbackLeibler { public: - double value() const { return maths::CBasicStatistics::mean(m_Divergence) - std::log(maths::CBasicStatistics::count(m_Divergence)); } + double value() const { + return maths::CBasicStatistics::mean(m_Divergence) - + std::log(maths::CBasicStatistics::count(m_Divergence)); + } template void add(const std::vector& points) { @@ -105,7 +113,9 @@ void computePurities(const TSizeVecVec& clusters, TDoubleVec& purities) { counts.resize(std::max(counts.size(), clusters[i][j] + 1)); ++counts[clusters[i][j]]; } - purities[i] = static_cast(*std::max_element(counts.begin(), counts.end())) / static_cast(clusters[i].size()); + purities[i] = + static_cast(*std::max_element(counts.begin(), counts.end())) / + static_cast(clusters[i].size()); } } } @@ -233,8 +243,9 @@ void CXMeansTest::testImproveStructure() { std::sort(newChecksums.begin(), newChecksums.end()); TUInt64Vec inactive; - std::set_intersection( - oldChecksums.begin(), oldChecksums.end(), newChecksums.begin(), newChecksums.end(), std::back_inserter(inactive)); + std::set_intersection(oldChecksums.begin(), oldChecksums.end(), + newChecksums.begin(), newChecksums.end(), + std::back_inserter(inactive)); LOG_DEBUG(<< "inactive = " << core::CContainerPrinter::print(inactive)); for (std::size_t i = 0u; i < inactive.size(); ++i) { CPPUNIT_ASSERT(xmeans.inactive().count(inactive[i]) > 0); @@ -300,7 +311,8 @@ void CXMeansTest::testImproveParams() { LOG_DEBUG(<< "expected centres = " << core::CContainerPrinter::print(expectedCentres)); LOG_DEBUG(<< "centres = " << core::CContainerPrinter::print(centres)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedCentres), core::CContainerPrinter::print(centres)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedCentres), + core::CContainerPrinter::print(centres)); } } @@ -345,7 +357,8 @@ void CXMeansTest::testOneCluster() { klc.add(xmeans.clusters()[i].points()); } - LOG_DEBUG(<< " centres = " << core::CContainerPrinter::print(xmeans.centres())); + LOG_DEBUG(<< " centres = " + << core::CContainerPrinter::print(xmeans.centres())); LOG_DEBUG(<< " points empirical KL = " << kl.value()); LOG_DEBUG(<< " clusters empirical KL = " << klc.value()); @@ -355,7 +368,8 @@ void CXMeansTest::testOneCluster() { } } - LOG_DEBUG(<< "mean number clusters = " << maths::CBasicStatistics::mean(meanNumberClusters)); + LOG_DEBUG(<< "mean number clusters = " + << maths::CBasicStatistics::mean(meanNumberClusters)); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanNumberClusters) < 1.15); } @@ -423,10 +437,12 @@ void CXMeansTest::testFiveClusters() { std::size_t k = 0u; for (/**/; k < points.size(); ++k) { - for (TVector2VecCItr itr = std::lower_bound(points[k].begin(), points[k].end(), clusterPoints[j]), - end = std::upper_bound(points[k].begin(), points[k].end(), clusterPoints[j]); - itr != end; - ++itr) { + for (TVector2VecCItr + itr = std::lower_bound(points[k].begin(), points[k].end(), + clusterPoints[j]), + end = std::upper_bound(points[k].begin(), points[k].end(), + clusterPoints[j]); + itr != end; ++itr) { if (clusterPoints[j] == *itr) { goto FoundPoint; } @@ -451,15 +467,19 @@ void CXMeansTest::testFiveClusters() { TMeanAccumulator totalPurity; for (std::size_t i = 0u; i < purities.size(); ++i) { minPurity = std::min(minPurity, purities[i]); - totalPurity.add(purities[i], static_cast(xmeans.clusters()[i].size())); + totalPurity.add(purities[i], + static_cast(xmeans.clusters()[i].size())); } - LOG_DEBUG(<< " centres = " << core::CContainerPrinter::print(xmeans.centres())); - LOG_DEBUG(<< " purities = " << core::CContainerPrinter::print(purities)); + LOG_DEBUG(<< " centres = " + << core::CContainerPrinter::print(xmeans.centres())); + LOG_DEBUG(<< " purities = " + << core::CContainerPrinter::print(purities)); LOG_DEBUG(<< " points empirical KL = " << kl.value()); LOG_DEBUG(<< " clusters empirical KL = " << klc.value()); LOG_DEBUG(<< " minPurity = " << minPurity); - LOG_DEBUG(<< " totalPurity = " << maths::CBasicStatistics::mean(totalPurity)); + LOG_DEBUG(<< " totalPurity = " + << maths::CBasicStatistics::mean(totalPurity)); CPPUNIT_ASSERT(minPurity > 0.39); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(totalPurity) > 0.54); @@ -468,8 +488,10 @@ void CXMeansTest::testFiveClusters() { meanTotalPurity.add(maths::CBasicStatistics::mean(totalPurity)); } - LOG_DEBUG(<< "mean number clusters = " << maths::CBasicStatistics::mean(meanNumberClusters)); - LOG_DEBUG(<< "sd number clusters = " << std::sqrt(maths::CBasicStatistics::variance(meanNumberClusters))); + LOG_DEBUG(<< "mean number clusters = " + << maths::CBasicStatistics::mean(meanNumberClusters)); + LOG_DEBUG(<< "sd number clusters = " + << std::sqrt(maths::CBasicStatistics::variance(meanNumberClusters))); LOG_DEBUG(<< "KL gain = " << maths::CBasicStatistics::mean(klgain)); LOG_DEBUG(<< "mean total purity = " << maths::CBasicStatistics::mean(meanTotalPurity)); @@ -488,7 +510,9 @@ void CXMeansTest::testTwentyClusters() { maths::CSampling::seed(); - const std::size_t sizes_[] = {1800, 800, 1100, 400, 600, 400, 600, 1300, 400, 900, 500, 700, 400, 800, 1500, 1200, 500, 300, 1200, 800}; + const std::size_t sizes_[] = {1800, 800, 1100, 400, 600, 400, 600, + 1300, 400, 900, 500, 700, 400, 800, + 1500, 1200, 500, 300, 1200, 800}; TSizeVec sizes(boost::begin(sizes_), boost::end(sizes_)); test::CRandomNumbers rng; @@ -535,10 +559,12 @@ void CXMeansTest::testTwentyClusters() { std::size_t k = 0u; for (/**/; k < points.size(); ++k) { - for (TVector2VecCItr itr = std::lower_bound(points[k].begin(), points[k].end(), clusterPoints[j]), - end = std::upper_bound(points[k].begin(), points[k].end(), clusterPoints[j]); - itr != end; - ++itr) { + for (TVector2VecCItr + itr = std::lower_bound(points[k].begin(), + points[k].end(), clusterPoints[j]), + end = std::upper_bound(points[k].begin(), + points[k].end(), clusterPoints[j]); + itr != end; ++itr) { if (clusterPoints[j] == *itr) { goto FoundPoint; } @@ -573,7 +599,8 @@ void CXMeansTest::testTwentyClusters() { LOG_DEBUG(<< "totalPurity = " << maths::CBasicStatistics::mean(totalPurity)); CPPUNIT_ASSERT_DOUBLES_EQUAL(20.0, static_cast(xmeans.clusters().size()), 6.0); - CPPUNIT_ASSERT(klc.value() < kl.value() + 0.05 * std::max(std::fabs(klc.value()), std::fabs(kl.value()))); + CPPUNIT_ASSERT(klc.value() < kl.value() + 0.05 * std::max(std::fabs(klc.value()), + std::fabs(kl.value()))); CPPUNIT_ASSERT(minPurity > 0.4); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(totalPurity) > 0.8); } @@ -587,11 +614,16 @@ void CXMeansTest::testPoorlyConditioned() { maths::CSampling::seed(); - double points_[][2] = {{0.0, 0.0}, {1.0, 0.5}, {2.0, 1.0}, {3.0, 1.5}, {4.0, 2.0}, {5.0, 2.5}, {6.0, 3.0}, - {7.0, 3.5}, {8.0, 4.0}, {9.0, 4.5}, {101.0, 21.9}, {102.0, 21.2}, {101.5, 22.0}, {104.0, 23.0}, - {102.6, 21.4}, {101.3, 22.0}, {101.2, 21.0}, {101.1, 22.1}, {101.7, 23.0}, {101.0, 24.0}, {50.0, 50.0}, - {51.0, 51.0}, {50.0, 51.0}, {54.0, 53.0}, {52.0, 51.0}, {51.0, 52.0}, {51.0, 52.0}, {53.0, 53.0}, - {53.0, 52.0}, {52.0, 54.0}, {52.0, 52.0}, {52.0, 52.0}, {53.0, 52.0}, {51.0, 52.0}}; + double points_[][2] = { + {0.0, 0.0}, {1.0, 0.5}, {2.0, 1.0}, {3.0, 1.5}, + {4.0, 2.0}, {5.0, 2.5}, {6.0, 3.0}, {7.0, 3.5}, + {8.0, 4.0}, {9.0, 4.5}, {101.0, 21.9}, {102.0, 21.2}, + {101.5, 22.0}, {104.0, 23.0}, {102.6, 21.4}, {101.3, 22.0}, + {101.2, 21.0}, {101.1, 22.1}, {101.7, 23.0}, {101.0, 24.0}, + {50.0, 50.0}, {51.0, 51.0}, {50.0, 51.0}, {54.0, 53.0}, + {52.0, 51.0}, {51.0, 52.0}, {51.0, 52.0}, {53.0, 53.0}, + {53.0, 52.0}, {52.0, 54.0}, {52.0, 52.0}, {52.0, 52.0}, + {53.0, 52.0}, {51.0, 52.0}}; TVector2Vec cluster1; for (std::size_t i = 0u; i < 10; ++i) { @@ -626,7 +658,8 @@ void CXMeansTest::testPoorlyConditioned() { TVector2Vec clusterPoints = xmeans.clusters()[i].points(); std::sort(clusterPoints.begin(), clusterPoints.end()); LOG_DEBUG(<< "points = " << core::CContainerPrinter::print(clusterPoints)); - CPPUNIT_ASSERT(clusterPoints == cluster1 || clusterPoints == cluster2 || clusterPoints == cluster3); + CPPUNIT_ASSERT(clusterPoints == cluster1 || clusterPoints == cluster2 || + clusterPoints == cluster3); } } } @@ -634,13 +667,20 @@ void CXMeansTest::testPoorlyConditioned() { CppUnit::Test* CXMeansTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CXMeansTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CXMeansTest::testCluster", &CXMeansTest::testCluster)); - suiteOfTests->addTest(new CppUnit::TestCaller("CXMeansTest::testImproveStructure", &CXMeansTest::testImproveStructure)); - suiteOfTests->addTest(new CppUnit::TestCaller("CXMeansTest::testImproveParams", &CXMeansTest::testImproveParams)); - suiteOfTests->addTest(new CppUnit::TestCaller("CXMeansTest::testOneCluster", &CXMeansTest::testOneCluster)); - suiteOfTests->addTest(new CppUnit::TestCaller("CXMeansTest::testFiveClusters", &CXMeansTest::testFiveClusters)); - suiteOfTests->addTest(new CppUnit::TestCaller("CXMeansTest::testTwentyClusters", &CXMeansTest::testTwentyClusters)); - suiteOfTests->addTest(new CppUnit::TestCaller("CXMeansTest::testPoorlyConditioned", &CXMeansTest::testPoorlyConditioned)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXMeansTest::testCluster", &CXMeansTest::testCluster)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXMeansTest::testImproveStructure", &CXMeansTest::testImproveStructure)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXMeansTest::testImproveParams", &CXMeansTest::testImproveParams)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXMeansTest::testOneCluster", &CXMeansTest::testOneCluster)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXMeansTest::testFiveClusters", &CXMeansTest::testFiveClusters)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXMeansTest::testTwentyClusters", &CXMeansTest::testTwentyClusters)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CXMeansTest::testPoorlyConditioned", &CXMeansTest::testPoorlyConditioned)); return suiteOfTests; } diff --git a/lib/maths/unittest/TestUtils.cc b/lib/maths/unittest/TestUtils.cc index 8124891ba4..907469cc50 100644 --- a/lib/maths/unittest/TestUtils.cc +++ b/lib/maths/unittest/TestUtils.cc @@ -30,13 +30,15 @@ class CCdf : public std::unary_function { enum EStyle { E_Lower, E_Upper, E_GeometricMean }; public: - CCdf(EStyle style, const CPrior& prior, double target) : m_Style(style), m_Prior(&prior), m_Target(target), m_X(1u) {} + CCdf(EStyle style, const CPrior& prior, double target) + : m_Style(style), m_Prior(&prior), m_Target(target), m_X(1u) {} double operator()(double x) const { double lowerBound, upperBound; m_X[0] = x; - if (!m_Prior->minusLogJointCdf(CConstantWeights::COUNT_VARIANCE, m_X, CConstantWeights::SINGLE_UNIT, lowerBound, upperBound)) { + if (!m_Prior->minusLogJointCdf(CConstantWeights::COUNT_VARIANCE, m_X, + CConstantWeights::SINGLE_UNIT, lowerBound, upperBound)) { // We have no choice but to throw because this is // invoked inside a boost root finding function. @@ -94,19 +96,26 @@ void CPriorTestInterface::addSamples(const TDouble1Vec& samples) { m_Prior->addSamples(TWeights::COUNT, samples, weights); } -maths_t::EFloatingPointErrorStatus CPriorTestInterface::jointLogMarginalLikelihood(const TDouble1Vec& samples, double& result) const { +maths_t::EFloatingPointErrorStatus +CPriorTestInterface::jointLogMarginalLikelihood(const TDouble1Vec& samples, + double& result) const { TDouble4Vec1Vec weights(samples.size(), TWeights::UNIT); return m_Prior->jointLogMarginalLikelihood(TWeights::COUNT, samples, weights, result); } -bool CPriorTestInterface::minusLogJointCdf(const TDouble1Vec& samples, double& lowerBound, double& upperBound) const { +bool CPriorTestInterface::minusLogJointCdf(const TDouble1Vec& samples, + double& lowerBound, + double& upperBound) const { TDouble4Vec1Vec weights(samples.size(), TWeights::UNIT); return m_Prior->minusLogJointCdf(TWeights::COUNT, samples, weights, lowerBound, upperBound); } -bool CPriorTestInterface::minusLogJointCdfComplement(const TDouble1Vec& samples, double& lowerBound, double& upperBound) const { +bool CPriorTestInterface::minusLogJointCdfComplement(const TDouble1Vec& samples, + double& lowerBound, + double& upperBound) const { TDouble4Vec1Vec weights(samples.size(), TWeights::UNIT); - return m_Prior->minusLogJointCdfComplement(TWeights::COUNT, samples, weights, lowerBound, upperBound); + return m_Prior->minusLogJointCdfComplement(TWeights::COUNT, samples, + weights, lowerBound, upperBound); } bool CPriorTestInterface::probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, @@ -115,16 +124,20 @@ bool CPriorTestInterface::probabilityOfLessLikelySamples(maths_t::EProbabilityCa double& upperBound) const { TDouble4Vec1Vec weights(samples.size(), TWeights::UNIT); maths_t::ETail tail; - return m_Prior->probabilityOfLessLikelySamples(calculation, TWeights::COUNT, samples, weights, lowerBound, upperBound, tail); + return m_Prior->probabilityOfLessLikelySamples( + calculation, TWeights::COUNT, samples, weights, lowerBound, upperBound, tail); } -bool CPriorTestInterface::anomalyScore(maths_t::EProbabilityCalculation calculation, const TDouble1Vec& samples, double& result) const { +bool CPriorTestInterface::anomalyScore(maths_t::EProbabilityCalculation calculation, + const TDouble1Vec& samples, + double& result) const { TDoubleDoublePr1Vec weightedSamples; weightedSamples.reserve(samples.size()); for (std::size_t i = 0u; i < samples.size(); ++i) { weightedSamples.push_back(std::make_pair(samples[i], 1.0)); } - return this->anomalyScore(calculation, maths_t::E_SampleCountWeight, weightedSamples, result); + return this->anomalyScore(calculation, maths_t::E_SampleCountWeight, + weightedSamples, result); } bool CPriorTestInterface::anomalyScore(maths_t::EProbabilityCalculation calculation, @@ -143,7 +156,8 @@ bool CPriorTestInterface::anomalyScore(maths_t::EProbabilityCalculation calculat double lowerBound, upperBound; maths_t::ETail tail; - if (!m_Prior->probabilityOfLessLikelySamples(calculation, weightStyles, samples_, weights, lowerBound, upperBound, tail)) { + if (!m_Prior->probabilityOfLessLikelySamples(calculation, weightStyles, samples_, weights, + lowerBound, upperBound, tail)) { LOG_ERROR(<< "Failed computing probability of less likely samples"); return false; } @@ -153,7 +167,9 @@ bool CPriorTestInterface::anomalyScore(maths_t::EProbabilityCalculation calculat return true; } -bool CPriorTestInterface::marginalLikelihoodQuantileForTest(double percentage, double eps, double& result) const { +bool CPriorTestInterface::marginalLikelihoodQuantileForTest(double percentage, + double eps, + double& result) const { result = 0.0; percentage /= 100.0; @@ -178,9 +194,11 @@ bool CPriorTestInterface::marginalLikelihoodQuantileForTest(double percentage, d CEqualWithTolerance equal(CToleranceTypes::E_AbsoluteTolerance, 2.0 * eps); - CSolvers::solve(bracket.first, bracket.second, fBracket.first, fBracket.second, cdf, maxIterations, equal, result); + CSolvers::solve(bracket.first, bracket.second, fBracket.first, + fBracket.second, cdf, maxIterations, equal, result); } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to compute quantile: " << e.what() << ", quantile = " << percentage); + LOG_ERROR(<< "Failed to compute quantile: " << e.what() + << ", quantile = " << percentage); return false; } @@ -188,8 +206,10 @@ bool CPriorTestInterface::marginalLikelihoodQuantileForTest(double percentage, d } bool CPriorTestInterface::marginalLikelihoodMeanForTest(double& result) const { - using TMarginalLikelihood = CCompositeFunctions::CExp; - using TFunctionTimesMarginalLikelihood = CCompositeFunctions::CProduct; + using TMarginalLikelihood = + CCompositeFunctions::CExp; + using TFunctionTimesMarginalLikelihood = + CCompositeFunctions::CProduct; const double eps = 1e-3; unsigned int steps = 100u; @@ -197,7 +217,8 @@ bool CPriorTestInterface::marginalLikelihoodMeanForTest(double& result) const { result = 0.0; double a, b; - if (!this->marginalLikelihoodQuantileForTest(0.001, eps, a) || !this->marginalLikelihoodQuantileForTest(99.999, eps, b)) { + if (!this->marginalLikelihoodQuantileForTest(0.001, eps, a) || + !this->marginalLikelihoodQuantileForTest(99.999, eps, b)) { LOG_ERROR(<< "Unable to compute mean likelihood"); return false; } @@ -209,14 +230,16 @@ bool CPriorTestInterface::marginalLikelihoodMeanForTest(double& result) const { } CPrior::CLogMarginalLikelihood logLikelihood(*m_Prior); - TFunctionTimesMarginalLikelihood xTimesLikelihood(identity, TMarginalLikelihood(logLikelihood)); + TFunctionTimesMarginalLikelihood xTimesLikelihood( + identity, TMarginalLikelihood(logLikelihood)); double x = a; double step = (b - a) / static_cast(steps); for (unsigned int i = 0; i < steps; ++i, x += step) { double integral; - if (!CIntegration::gaussLegendre(xTimesLikelihood, x, x + step, integral)) { + if (!CIntegration::gaussLegendre( + xTimesLikelihood, x, x + step, integral)) { return false; } result += integral; @@ -226,8 +249,10 @@ bool CPriorTestInterface::marginalLikelihoodMeanForTest(double& result) const { } bool CPriorTestInterface::marginalLikelihoodVarianceForTest(double& result) const { - using TMarginalLikelihood = CCompositeFunctions::CExp; - using TResidualTimesMarginalLikelihood = CCompositeFunctions::CProduct; + using TMarginalLikelihood = + CCompositeFunctions::CExp; + using TResidualTimesMarginalLikelihood = + CCompositeFunctions::CProduct; const double eps = 1e-3; unsigned int steps = 100u; @@ -235,7 +260,8 @@ bool CPriorTestInterface::marginalLikelihoodVarianceForTest(double& result) cons result = 0.0; double a, b; - if (!this->marginalLikelihoodQuantileForTest(0.001, eps, a) || !this->marginalLikelihoodQuantileForTest(99.999, eps, b)) { + if (!this->marginalLikelihoodQuantileForTest(0.001, eps, a) || + !this->marginalLikelihoodQuantileForTest(99.999, eps, b)) { LOG_ERROR(<< "Unable to compute mean likelihood"); return false; } @@ -247,14 +273,15 @@ bool CPriorTestInterface::marginalLikelihoodVarianceForTest(double& result) cons } CPrior::CLogMarginalLikelihood logLikelihood(*m_Prior); - TResidualTimesMarginalLikelihood residualTimesLikelihood(CResidual(m_Prior->marginalLikelihoodMean()), - TMarginalLikelihood(logLikelihood)); + TResidualTimesMarginalLikelihood residualTimesLikelihood( + CResidual(m_Prior->marginalLikelihoodMean()), TMarginalLikelihood(logLikelihood)); double x = a; double step = (b - a) / static_cast(steps); for (unsigned int i = 0; i < steps; ++i, x += step) { double integral; - if (!CIntegration::gaussLegendre(residualTimesLikelihood, x, x + step, integral)) { + if (!CIntegration::gaussLegendre( + residualTimesLikelihood, x, x + step, integral)) { return false; } result += integral; @@ -275,48 +302,65 @@ double markov(core_t::TTime time) { static double state{0.2}; if (time % WEEK == 0) { core::CHashing::CMurmurHash2BT hasher; - state = 2.0 * static_cast(hasher(time)) / static_cast(std::numeric_limits::max()); + state = 2.0 * static_cast(hasher(time)) / + static_cast(std::numeric_limits::max()); } return state; } double smoothDaily(core_t::TTime time) { - return std::sin(boost::math::double_constants::two_pi * static_cast(time) / static_cast(DAY)); + return std::sin(boost::math::double_constants::two_pi * + static_cast(time) / static_cast(DAY)); } double smoothWeekly(core_t::TTime time) { - return std::sin(boost::math::double_constants::two_pi * static_cast(time) / static_cast(WEEK)); + return std::sin(boost::math::double_constants::two_pi * + static_cast(time) / static_cast(WEEK)); } double spikeyDaily(core_t::TTime time) { - double pattern[]{1.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, - 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, - 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.1}; + double pattern[]{1.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, + 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, + 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, + 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, + 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.1}; return pattern[(time % DAY) / HALF_HOUR]; } double spikeyWeekly(core_t::TTime time) { double pattern[]{ - 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, - 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.1, 0.0, 0.1, 0.1, 0.1, - 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, - 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.1, 0.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, - 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, - 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.1, 0.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, - 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, - 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.1, 0.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, - 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, - 1.0, 0.1, 0.1, 0.1, 0.1, 0.1, 0.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, - 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, - 0.1, 0.1, 0.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, - 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.1}; + 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, + 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, + 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, + 1.0, 0.1, 0.1, 0.1, 0.1, 0.1, 0.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, + 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, + 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, + 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.1, 0.0, 0.1, + 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, + 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, + 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, + 0.1, 0.1, 0.1, 0.1, 0.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, + 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, + 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, + 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.1, 0.0, 0.1, 0.1, 0.1, + 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, + 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, + 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, + 0.1, 0.1, 0.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, + 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, + 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, + 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.1, 0.0, 0.1, 0.1, 0.1, 0.1, 0.2, + 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, + 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, + 0.1, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, 0.1}; return pattern[(time % WEEK) / HALF_HOUR]; } double weekends(core_t::TTime time) { double amplitude[] = {1.0, 0.9, 0.8, 0.9, 1.1, 0.2, 0.05}; return amplitude[(time % WEEK) / DAY] * - std::sin(boost::math::double_constants::two_pi * static_cast(time) / static_cast(DAY)); + std::sin(boost::math::double_constants::two_pi * + static_cast(time) / static_cast(DAY)); } double scale(double scale, core_t::TTime time, TGenerator generator) { diff --git a/lib/maths/unittest/TestUtils.h b/lib/maths/unittest/TestUtils.h index 9a77f2bd91..c95f3d6910 100644 --- a/lib/maths/unittest/TestUtils.h +++ b/lib/maths/unittest/TestUtils.h @@ -57,13 +57,18 @@ class CPriorTestInterface { void addSamples(const handy_typedefs::TDouble1Vec& samples); //! Wrapper which takes care of weights. - maths_t::EFloatingPointErrorStatus jointLogMarginalLikelihood(const handy_typedefs::TDouble1Vec& samples, double& result) const; + maths_t::EFloatingPointErrorStatus + jointLogMarginalLikelihood(const handy_typedefs::TDouble1Vec& samples, double& result) const; //! Wrapper which takes care of weights. - bool minusLogJointCdf(const handy_typedefs::TDouble1Vec& samples, double& lowerBound, double& upperBound) const; + bool minusLogJointCdf(const handy_typedefs::TDouble1Vec& samples, + double& lowerBound, + double& upperBound) const; //! Wrapper which takes care of weights. - bool minusLogJointCdfComplement(const handy_typedefs::TDouble1Vec& samples, double& lowerBound, double& upperBound) const; + bool minusLogJointCdfComplement(const handy_typedefs::TDouble1Vec& samples, + double& lowerBound, + double& upperBound) const; //! Wrapper which takes care of weights. bool probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, @@ -73,7 +78,9 @@ class CPriorTestInterface { //! A wrapper around weighted compute anomaly scores which uses unit //! weights for all samples. - bool anomalyScore(maths_t::EProbabilityCalculation calculation, const handy_typedefs::TDouble1Vec& samples, double& result) const; + bool anomalyScore(maths_t::EProbabilityCalculation calculation, + const handy_typedefs::TDouble1Vec& samples, + double& result) const; //! Calculate an anomaly score for a collection of independent samples //! from the variable. @@ -152,10 +159,12 @@ class CPriorTestInterfaceMixin : public PRIOR, public CPriorTestInterface { using PRIOR::probabilityOfLessLikelySamples; public: - CPriorTestInterfaceMixin(const PRIOR& prior) : PRIOR(prior), CPriorTestInterface(static_cast(*this)) {} + CPriorTestInterfaceMixin(const PRIOR& prior) + : PRIOR(prior), CPriorTestInterface(static_cast(*this)) {} CPriorTestInterfaceMixin(const CPriorTestInterfaceMixin& other) - : PRIOR(static_cast(other)), CPriorTestInterface(static_cast(*this)) {} + : PRIOR(static_cast(other)), + CPriorTestInterface(static_cast(*this)) {} virtual ~CPriorTestInterfaceMixin() {} @@ -163,7 +172,9 @@ class CPriorTestInterfaceMixin : public PRIOR, public CPriorTestInterface { void swap(CPriorTestInterfaceMixin& other) { this->PRIOR::swap(other); } //! Clone the object. - virtual CPriorTestInterfaceMixin* clone() const { return new CPriorTestInterfaceMixin(*this); } + virtual CPriorTestInterfaceMixin* clone() const { + return new CPriorTestInterfaceMixin(*this); + } }; //! \brief Kernel for checking normalization with CPrior::expectation. @@ -193,11 +204,13 @@ class CVarianceKernel { template class CUnitKernel { public: - CUnitKernel(const maths::CMultivariatePrior& prior) : m_Prior(&prior), m_X(1) {} + CUnitKernel(const maths::CMultivariatePrior& prior) + : m_Prior(&prior), m_X(1) {} bool operator()(const maths::CVectorNx1& x, double& result) const { m_X[0].assign(x.begin(), x.end()); - m_Prior->jointLogMarginalLikelihood(maths::CConstantWeights::COUNT, m_X, SINGLE_UNIT, result); + m_Prior->jointLogMarginalLikelihood(maths::CConstantWeights::COUNT, m_X, + SINGLE_UNIT, result); result = std::exp(result); return true; } @@ -211,19 +224,23 @@ class CUnitKernel { }; template -handy_typedefs::TDouble10Vec4Vec1Vec CUnitKernel::SINGLE_UNIT(1, - handy_typedefs::TDouble10Vec4Vec(1, handy_typedefs::TDouble10Vec(N, 1.0))); +handy_typedefs::TDouble10Vec4Vec1Vec CUnitKernel::SINGLE_UNIT( + 1, + handy_typedefs::TDouble10Vec4Vec(1, handy_typedefs::TDouble10Vec(N, 1.0))); //! \brief The kernel for computing the mean of a multivariate prior. template class CMeanKernel { public: - CMeanKernel(const maths::CMultivariatePrior& prior) : m_Prior(&prior), m_X(1) {} + CMeanKernel(const maths::CMultivariatePrior& prior) + : m_Prior(&prior), m_X(1) {} - bool operator()(const maths::CVectorNx1& x, maths::CVectorNx1& result) const { + bool operator()(const maths::CVectorNx1& x, + maths::CVectorNx1& result) const { m_X[0].assign(x.begin(), x.end()); double likelihood; - m_Prior->jointLogMarginalLikelihood(maths::CConstantWeights::COUNT, m_X, SINGLE_UNIT, likelihood); + m_Prior->jointLogMarginalLikelihood(maths::CConstantWeights::COUNT, m_X, + SINGLE_UNIT, likelihood); likelihood = std::exp(likelihood); result = x * likelihood; return true; @@ -238,20 +255,24 @@ class CMeanKernel { }; template -handy_typedefs::TDouble10Vec4Vec1Vec CMeanKernel::SINGLE_UNIT(1, - handy_typedefs::TDouble10Vec4Vec(1, handy_typedefs::TDouble10Vec(N, 1.0))); +handy_typedefs::TDouble10Vec4Vec1Vec CMeanKernel::SINGLE_UNIT( + 1, + handy_typedefs::TDouble10Vec4Vec(1, handy_typedefs::TDouble10Vec(N, 1.0))); //! \brief The kernel for computing the variance of a multivariate prior. template class CCovarianceKernel { public: - CCovarianceKernel(const maths::CMultivariatePrior& prior, const maths::CVectorNx1& mean) + CCovarianceKernel(const maths::CMultivariatePrior& prior, + const maths::CVectorNx1& mean) : m_Prior(&prior), m_Mean(mean), m_X(1) {} - bool operator()(const maths::CVectorNx1& x, maths::CSymmetricMatrixNxN& result) const { + bool operator()(const maths::CVectorNx1& x, + maths::CSymmetricMatrixNxN& result) const { m_X[0].assign(x.begin(), x.end()); double likelihood; - m_Prior->jointLogMarginalLikelihood(maths::CConstantWeights::COUNT, m_X, SINGLE_UNIT, likelihood); + m_Prior->jointLogMarginalLikelihood(maths::CConstantWeights::COUNT, m_X, + SINGLE_UNIT, likelihood); likelihood = std::exp(likelihood); result = (x - m_Mean).outer() * likelihood; return true; @@ -267,8 +288,9 @@ class CCovarianceKernel { }; template -handy_typedefs::TDouble10Vec4Vec1Vec - CCovarianceKernel::SINGLE_UNIT(1, handy_typedefs::TDouble10Vec4Vec(1, handy_typedefs::TDouble10Vec(N, 1.0))); +handy_typedefs::TDouble10Vec4Vec1Vec CCovarianceKernel::SINGLE_UNIT( + 1, + handy_typedefs::TDouble10Vec4Vec(1, handy_typedefs::TDouble10Vec(N, 1.0))); //! A constant function. double constant(core_t::TTime time); diff --git a/lib/model/CAnnotatedProbability.cc b/lib/model/CAnnotatedProbability.cc index a8d5fd6c21..85317f9840 100644 --- a/lib/model/CAnnotatedProbability.cc +++ b/lib/model/CAnnotatedProbability.cc @@ -33,7 +33,8 @@ const std::string CORRELATED_ATTRIBUTE_TAG("n"); } SAttributeProbability::SAttributeProbability() - : s_Cid(0), s_Probability(1.0), s_Type(model_t::CResultType::E_Unconditional), s_Feature(model_t::E_IndividualCountByBucketAndPerson) { + : s_Cid(0), s_Probability(1.0), s_Type(model_t::CResultType::E_Unconditional), + s_Feature(model_t::E_IndividualCountByBucketAndPerson) { } SAttributeProbability::SAttributeProbability(std::size_t cid, @@ -43,26 +44,16 @@ SAttributeProbability::SAttributeProbability(std::size_t cid, model_t::EFeature feature, const TStoredStringPtr1Vec& correlatedAttributes, const TSizeDoublePr1Vec& correlated) - : s_Cid(cid), - s_Attribute(attribute), - s_Probability(probability), - s_Type(type), - s_Feature(feature), - s_CorrelatedAttributes(correlatedAttributes), - s_Correlated(correlated) { + : s_Cid(cid), s_Attribute(attribute), s_Probability(probability), + s_Type(type), s_Feature(feature), + s_CorrelatedAttributes(correlatedAttributes), s_Correlated(correlated) { } bool SAttributeProbability::operator<(const SAttributeProbability& other) const { - return maths::COrderings::lexicographical_compare(s_Probability, - *s_Attribute, - s_Feature, - s_Type.asUint(), - s_Correlated, - other.s_Probability, - *other.s_Attribute, - other.s_Feature, - other.s_Type.asUint(), - other.s_Correlated); + return maths::COrderings::lexicographical_compare( + s_Probability, *s_Attribute, s_Feature, s_Type.asUint(), s_Correlated, + other.s_Probability, *other.s_Attribute, other.s_Feature, + other.s_Type.asUint(), other.s_Correlated); } void SAttributeProbability::acceptPersistInserter(core::CStatePersistInserter& inserter) const { @@ -88,7 +79,8 @@ bool SAttributeProbability::acceptRestoreTraverser(core::CStateRestoreTraverser& } else if (name == ANOMALY_TYPE_TAG) { unsigned int type; if (!core::CStringUtils::stringToType(traverser.value(), type)) { - LOG_ERROR(<< "Failed to restore " << traverser.name() << " / " << traverser.value()); + LOG_ERROR(<< "Failed to restore " << traverser.name() << " / " + << traverser.value()); return false; } s_Type = model_t::CResultType(type); @@ -96,13 +88,15 @@ bool SAttributeProbability::acceptRestoreTraverser(core::CStateRestoreTraverser& s_CorrelatedAttributes.push_back(CStringStore::names().get(traverser.value())); } else if (name == PROBABILITY_TAG) { if (!core::CPersistUtils::restore(PROBABILITY_TAG, s_Probability, traverser)) { - LOG_ERROR(<< "Failed to restore " << traverser.name() << " / " << traverser.value()); + LOG_ERROR(<< "Failed to restore " << traverser.name() << " / " + << traverser.value()); return false; } } else if (name == FEATURE_TAG) { std::size_t feature; if (!core::CPersistUtils::restore(FEATURE_TAG, feature, traverser)) { - LOG_ERROR(<< "Failed to restore " << traverser.name() << " / " << traverser.value()); + LOG_ERROR(<< "Failed to restore " << traverser.name() << " / " + << traverser.value()); return false; } s_Feature = model_t::EFeature(feature); @@ -110,21 +104,27 @@ bool SAttributeProbability::acceptRestoreTraverser(core::CStateRestoreTraverser& using TSizeDoublePrVec = std::vector; TSizeDoublePrVec data; if (!core::CPersistUtils::restore(DESCRIPTIVE_DATA_TAG, data, traverser)) { - LOG_ERROR(<< "Failed to restore " << traverser.name() << " / " << traverser.value()); + LOG_ERROR(<< "Failed to restore " << traverser.name() << " / " + << traverser.value()); return false; } s_DescriptiveData.reserve(data.size()); for (const auto& data_ : data) { - s_DescriptiveData.emplace_back(annotated_probability::EDescriptiveData(data_.first), data_.second); + s_DescriptiveData.emplace_back( + annotated_probability::EDescriptiveData(data_.first), data_.second); } } else if (name == CURRENT_BUCKET_VALUE_TAG) { - if (!core::CPersistUtils::restore(CURRENT_BUCKET_VALUE_TAG, s_CurrentBucketValue, traverser)) { - LOG_ERROR(<< "Failed to restore " << traverser.name() << " / " << traverser.value()); + if (!core::CPersistUtils::restore(CURRENT_BUCKET_VALUE_TAG, + s_CurrentBucketValue, traverser)) { + LOG_ERROR(<< "Failed to restore " << traverser.name() << " / " + << traverser.value()); return false; } } else if (name == BASELINE_BUCKET_MEAN_TAG) { - if (!core::CPersistUtils::restore(BASELINE_BUCKET_MEAN_TAG, s_BaselineBucketMean, traverser)) { - LOG_ERROR(<< "Failed to restore " << traverser.name() << " / " << traverser.value()); + if (!core::CPersistUtils::restore(BASELINE_BUCKET_MEAN_TAG, + s_BaselineBucketMean, traverser)) { + LOG_ERROR(<< "Failed to restore " << traverser.name() << " / " + << traverser.value()); return false; } } @@ -132,17 +132,21 @@ bool SAttributeProbability::acceptRestoreTraverser(core::CStateRestoreTraverser& return true; } -void SAttributeProbability::addDescriptiveData(annotated_probability::EDescriptiveData key, double value) { +void SAttributeProbability::addDescriptiveData(annotated_probability::EDescriptiveData key, + double value) { s_DescriptiveData.emplace_back(key, value); } -SAnnotatedProbability::SAnnotatedProbability() : s_Probability(1.0), s_ResultType(model_t::CResultType::E_Final) { +SAnnotatedProbability::SAnnotatedProbability() + : s_Probability(1.0), s_ResultType(model_t::CResultType::E_Final) { } -SAnnotatedProbability::SAnnotatedProbability(double p) : s_Probability(p), s_ResultType(model_t::CResultType::E_Final) { +SAnnotatedProbability::SAnnotatedProbability(double p) + : s_Probability(p), s_ResultType(model_t::CResultType::E_Final) { } -void SAnnotatedProbability::addDescriptiveData(annotated_probability::EDescriptiveData key, double value) { +void SAnnotatedProbability::addDescriptiveData(annotated_probability::EDescriptiveData key, + double value) { s_DescriptiveData.emplace_back(key, value); } @@ -158,7 +162,8 @@ void SAnnotatedProbability::swap(SAnnotatedProbability& other) { void SAnnotatedProbability::acceptPersistInserter(core::CStatePersistInserter& inserter) const { core::CPersistUtils::persist(PROBABILITY_TAG, s_Probability, inserter); - core::CPersistUtils::persist(ATTRIBUTE_PROBABILITIES_TAG, s_AttributeProbabilities, inserter); + core::CPersistUtils::persist(ATTRIBUTE_PROBABILITIES_TAG, + s_AttributeProbabilities, inserter); for (const auto& influence : s_Influences) { inserter.insertValue(INFLUENCE_NAME_TAG, *influence.first.first); @@ -170,7 +175,8 @@ void SAnnotatedProbability::acceptPersistInserter(core::CStatePersistInserter& i core::CPersistUtils::persist(CURRENT_BUCKET_COUNT_TAG, *s_CurrentBucketCount, inserter); } if (s_BaselineBucketCount) { - core::CPersistUtils::persist(BASELINE_BUCKET_COUNT_TAG, *s_BaselineBucketCount, inserter); + core::CPersistUtils::persist(BASELINE_BUCKET_COUNT_TAG, + *s_BaselineBucketCount, inserter); } } @@ -187,12 +193,15 @@ bool SAnnotatedProbability::acceptRestoreTraverser(core::CStateRestoreTraverser& if (name == PROBABILITY_TAG) { if (!core::CPersistUtils::restore(PROBABILITY_TAG, s_Probability, traverser)) { - LOG_ERROR(<< "Restore error for " << traverser.name() << " / " << traverser.value()); + LOG_ERROR(<< "Restore error for " << traverser.name() << " / " + << traverser.value()); return false; } } else if (name == ATTRIBUTE_PROBABILITIES_TAG) { - if (!core::CPersistUtils::restore(ATTRIBUTE_PROBABILITIES_TAG, s_AttributeProbabilities, traverser)) { - LOG_ERROR(<< "Restore error for " << traverser.name() << " / " << traverser.value()); + if (!core::CPersistUtils::restore(ATTRIBUTE_PROBABILITIES_TAG, + s_AttributeProbabilities, traverser)) { + LOG_ERROR(<< "Restore error for " << traverser.name() << " / " + << traverser.value()); return false; } } else if (name == INFLUENCE_NAME_TAG) { @@ -201,20 +210,24 @@ bool SAnnotatedProbability::acceptRestoreTraverser(core::CStateRestoreTraverser& influencerValue = CStringStore::influencers().get(traverser.value()); } else if (name == INFLUENCE_TAG) { if (!core::CStringUtils::stringToType(traverser.value(), d)) { - LOG_ERROR(<< "Restore error for " << traverser.name() << " / " << traverser.value()); + LOG_ERROR(<< "Restore error for " << traverser.name() << " / " + << traverser.value()); return false; } - s_Influences.emplace_back(TStoredStringPtrStoredStringPtrPr(influencerName, influencerValue), d); + s_Influences.emplace_back( + TStoredStringPtrStoredStringPtrPr(influencerName, influencerValue), d); } else if (name == CURRENT_BUCKET_COUNT_TAG) { uint64_t i; if (!core::CPersistUtils::restore(CURRENT_BUCKET_COUNT_TAG, i, traverser)) { - LOG_ERROR(<< "Restore error for " << traverser.name() << " / " << traverser.value()); + LOG_ERROR(<< "Restore error for " << traverser.name() << " / " + << traverser.value()); return false; } s_CurrentBucketCount.reset(i); } else if (name == BASELINE_BUCKET_COUNT_TAG) { if (!core::CPersistUtils::restore(BASELINE_BUCKET_COUNT_TAG, d, traverser)) { - LOG_ERROR(<< "Restore error for " << traverser.name() << " / " << traverser.value()); + LOG_ERROR(<< "Restore error for " << traverser.name() << " / " + << traverser.value()); return false; } s_BaselineBucketCount.reset(d); diff --git a/lib/model/CAnnotatedProbabilityBuilder.cc b/lib/model/CAnnotatedProbabilityBuilder.cc index 7feabe7d06..23c1621500 100644 --- a/lib/model/CAnnotatedProbabilityBuilder.cc +++ b/lib/model/CAnnotatedProbabilityBuilder.cc @@ -15,18 +15,11 @@ namespace ml { namespace model { CAnnotatedProbabilityBuilder::CAnnotatedProbabilityBuilder(SAnnotatedProbability& annotatedProbability) - : m_Result(annotatedProbability), - m_NumberAttributeProbabilities(1), - m_NumberOfPeople(0), - m_AttributeProbabilityPrior(nullptr), - m_PersonAttributeProbabilityPrior(nullptr), - m_MinAttributeProbabilities(1), - m_DistinctTotalAttributes(0), - m_DistinctRareAttributes(0), - m_RareAttributes(0), - m_IsPopulation(false), - m_IsRare(false), - m_IsFreqRare(false) { + : m_Result(annotatedProbability), m_NumberAttributeProbabilities(1), + m_NumberOfPeople(0), m_AttributeProbabilityPrior(nullptr), + m_PersonAttributeProbabilityPrior(nullptr), m_MinAttributeProbabilities(1), + m_DistinctTotalAttributes(0), m_DistinctRareAttributes(0), m_RareAttributes(0), + m_IsPopulation(false), m_IsRare(false), m_IsFreqRare(false) { m_Result.s_AttributeProbabilities.clear(); m_Result.s_Influences.clear(); } @@ -37,16 +30,12 @@ CAnnotatedProbabilityBuilder::CAnnotatedProbabilityBuilder(SAnnotatedProbability std::size_t numberOfPeople) : m_Result(annotatedProbability), m_NumberAttributeProbabilities(numberAttributeProbabilities), - m_NumberOfPeople(numberOfPeople), - m_AttributeProbabilityPrior(nullptr), + m_NumberOfPeople(numberOfPeople), m_AttributeProbabilityPrior(nullptr), m_PersonAttributeProbabilityPrior(nullptr), m_MinAttributeProbabilities(numberAttributeProbabilities), - m_DistinctTotalAttributes(0), - m_DistinctRareAttributes(0), - m_RareAttributes(0), - m_IsPopulation(function_t::isPopulation(function)), - m_IsRare(false), - m_IsFreqRare(false) { + m_DistinctTotalAttributes(0), m_DistinctRareAttributes(0), + m_RareAttributes(0), m_IsPopulation(function_t::isPopulation(function)), + m_IsRare(false), m_IsFreqRare(false) { m_Result.s_AttributeProbabilities.clear(); m_Result.s_Influences.clear(); @@ -80,16 +69,18 @@ void CAnnotatedProbabilityBuilder::probability(double p) { m_Result.s_Probability = p; } -void CAnnotatedProbabilityBuilder::addAttributeProbability(std::size_t cid, - const core::CStoredStringPtr& attribute, - double pAttribute, - double pGivenAttribute_, - model_t::CResultType type, - model_t::EFeature feature, - const TStoredStringPtr1Vec& correlatedAttributes, - const TSizeDoublePr1Vec& correlated) { +void CAnnotatedProbabilityBuilder::addAttributeProbability( + std::size_t cid, + const core::CStoredStringPtr& attribute, + double pAttribute, + double pGivenAttribute_, + model_t::CResultType type, + model_t::EFeature feature, + const TStoredStringPtr1Vec& correlatedAttributes, + const TSizeDoublePr1Vec& correlated) { type.set(m_Result.s_ResultType.asInterimOrFinal()); - SAttributeProbability pGivenAttribute(cid, attribute, pGivenAttribute_, type, feature, correlatedAttributes, correlated); + SAttributeProbability pGivenAttribute(cid, attribute, pGivenAttribute_, type, + feature, correlatedAttributes, correlated); this->addAttributeDescriptiveData(cid, pAttribute, pGivenAttribute); m_MinAttributeProbabilities.add(pGivenAttribute); ++m_DistinctTotalAttributes; @@ -101,11 +92,14 @@ void CAnnotatedProbabilityBuilder::addAttributeDescriptiveData(std::size_t cid, if (m_IsPopulation && (m_IsRare || m_IsFreqRare)) { double concentration; m_AttributeProbabilityPrior->concentration(static_cast(cid), concentration); - attributeProbability.addDescriptiveData(annotated_probability::E_ATTRIBUTE_CONCENTRATION, concentration); + attributeProbability.addDescriptiveData( + annotated_probability::E_ATTRIBUTE_CONCENTRATION, concentration); double activityConcentration; - m_PersonAttributeProbabilityPrior->concentration(static_cast(cid), activityConcentration); - attributeProbability.addDescriptiveData(annotated_probability::E_ACTIVITY_CONCENTRATION, activityConcentration); + m_PersonAttributeProbabilityPrior->concentration(static_cast(cid), + activityConcentration); + attributeProbability.addDescriptiveData( + annotated_probability::E_ACTIVITY_CONCENTRATION, activityConcentration); if (pAttribute < maths::LARGEST_SIGNIFICANT_PROBABILITY) { m_DistinctRareAttributes++; @@ -120,9 +114,12 @@ void CAnnotatedProbabilityBuilder::build() { if (m_NumberAttributeProbabilities > 0 && m_MinAttributeProbabilities.count() > 0) { m_MinAttributeProbabilities.sort(); m_Result.s_AttributeProbabilities.reserve(m_MinAttributeProbabilities.count()); - double cutoff = std::max(1.1 * m_MinAttributeProbabilities[0].s_Probability, maths::LARGEST_SIGNIFICANT_PROBABILITY); + double cutoff = std::max(1.1 * m_MinAttributeProbabilities[0].s_Probability, + maths::LARGEST_SIGNIFICANT_PROBABILITY); - for (std::size_t i = 0u; i < m_MinAttributeProbabilities.count() && m_MinAttributeProbabilities[i].s_Probability <= cutoff; ++i) { + for (std::size_t i = 0u; i < m_MinAttributeProbabilities.count() && + m_MinAttributeProbabilities[i].s_Probability <= cutoff; + ++i) { m_Result.s_AttributeProbabilities.push_back(m_MinAttributeProbabilities[i]); } } @@ -130,16 +127,20 @@ void CAnnotatedProbabilityBuilder::build() { void CAnnotatedProbabilityBuilder::addDescriptiveData() { if (m_IsPopulation && (m_IsRare || m_IsFreqRare)) { - m_Result.addDescriptiveData(annotated_probability::E_PERSON_COUNT, static_cast(m_NumberOfPeople)); + m_Result.addDescriptiveData(annotated_probability::E_PERSON_COUNT, + static_cast(m_NumberOfPeople)); if (m_IsRare) { m_Result.addDescriptiveData(annotated_probability::E_DISTINCT_RARE_ATTRIBUTES_COUNT, static_cast(m_DistinctRareAttributes)); m_Result.addDescriptiveData(annotated_probability::E_DISTINCT_TOTAL_ATTRIBUTES_COUNT, static_cast(m_DistinctTotalAttributes)); } else if (m_IsFreqRare) { - double totalConcentration = m_PersonAttributeProbabilityPrior->totalConcentration(); - m_Result.addDescriptiveData(annotated_probability::E_RARE_ATTRIBUTES_COUNT, m_RareAttributes); - m_Result.addDescriptiveData(annotated_probability::E_TOTAL_ATTRIBUTES_COUNT, totalConcentration); + double totalConcentration = + m_PersonAttributeProbabilityPrior->totalConcentration(); + m_Result.addDescriptiveData(annotated_probability::E_RARE_ATTRIBUTES_COUNT, + m_RareAttributes); + m_Result.addDescriptiveData(annotated_probability::E_TOTAL_ATTRIBUTES_COUNT, + totalConcentration); } } } diff --git a/lib/model/CAnomalyDetector.cc b/lib/model/CAnomalyDetector.cc index 3cfcbc89f5..8173b6df39 100644 --- a/lib/model/CAnomalyDetector.cc +++ b/lib/model/CAnomalyDetector.cc @@ -60,7 +60,9 @@ const std::string MODELS_TAG("b"); const std::string MODEL_TAG("d"); CAnomalyDetector::TDataGathererPtr -makeDataGatherer(const CAnomalyDetector::TModelFactoryCPtr& factory, core_t::TTime startTime, const std::string& partitionFieldValue) { +makeDataGatherer(const CAnomalyDetector::TModelFactoryCPtr& factory, + core_t::TTime startTime, + const std::string& partitionFieldValue) { CModelFactory::SGathererInitializationData initData(startTime, partitionFieldValue); return CAnomalyDetector::TDataGathererPtr(factory->makeDataGatherer(initData)); } @@ -96,29 +98,28 @@ CAnomalyDetector::CAnomalyDetector(int detectorIndex, const std::string& partitionFieldValue, core_t::TTime firstTime, const TModelFactoryCPtr& modelFactory) - : m_Limits(limits), - m_DetectorIndex(detectorIndex), - m_ModelConfig(modelConfig), + : m_Limits(limits), m_DetectorIndex(detectorIndex), m_ModelConfig(modelConfig), m_LastBucketEndTime(maths::CIntegerTools::ceil(firstTime, modelConfig.bucketLength())), m_DataGatherer(makeDataGatherer(modelFactory, m_LastBucketEndTime, partitionFieldValue)), m_ModelFactory(modelFactory), - m_Model(makeModel(modelFactory, m_DataGatherer)), - m_IsForPersistence(false) { + m_Model(makeModel(modelFactory, m_DataGatherer)), m_IsForPersistence(false) { if (m_DataGatherer == nullptr) { - LOG_ABORT(<< "Failed to construct data gatherer for detector: " << this->description()); + LOG_ABORT(<< "Failed to construct data gatherer for detector: " + << this->description()); } if (m_Model == nullptr) { LOG_ABORT(<< "Failed to construct model for detector: " << this->description()); } limits.resourceMonitor().registerComponent(*this); - LOG_DEBUG(<< "CAnomalyDetector(): " << this->description() << " for '" << m_DataGatherer->partitionFieldValue() << "'" - << ", first time = " << firstTime << ", bucketLength = " << modelConfig.bucketLength() + LOG_DEBUG(<< "CAnomalyDetector(): " << this->description() << " for '" + << m_DataGatherer->partitionFieldValue() << "'" + << ", first time = " << firstTime + << ", bucketLength = " << modelConfig.bucketLength() << ", m_LastBucketEndTime = " << m_LastBucketEndTime); } CAnomalyDetector::CAnomalyDetector(bool isForPersistence, const CAnomalyDetector& other) - : m_Limits(other.m_Limits), - m_DetectorIndex(other.m_DetectorIndex), + : m_Limits(other.m_Limits), m_DetectorIndex(other.m_DetectorIndex), m_ModelConfig(other.m_ModelConfig), // Empty result function is fine in this case // Empty result count function is fine in this case @@ -162,7 +163,8 @@ void CAnomalyDetector::zeroModelsToTime(core_t::TTime time) { core_t::TTime bucketStartTime = m_LastBucketEndTime; m_LastBucketEndTime += bucketLength; - LOG_TRACE(<< "sample: m_DetectorKey = '" << this->description() << "', bucketStartTime = " << bucketStartTime + LOG_TRACE(<< "sample: m_DetectorKey = '" << this->description() + << "', bucketStartTime = " << bucketStartTime << ", m_LastBucketEndTime = " << m_LastBucketEndTime); // Update the statistical models. @@ -170,7 +172,8 @@ void CAnomalyDetector::zeroModelsToTime(core_t::TTime time) { } } -bool CAnomalyDetector::acceptRestoreTraverser(const std::string& partitionFieldValue, core::CStateRestoreTraverser& traverser) { +bool CAnomalyDetector::acceptRestoreTraverser(const std::string& partitionFieldValue, + core::CStateRestoreTraverser& traverser) { // As the model pointer will change during restore, we unregister // the detector from the resource monitor. We can register it // again at the end of restore. @@ -186,12 +189,14 @@ bool CAnomalyDetector::acceptRestoreTraverser(const std::string& partitionFieldV const std::string& name = traverser.name(); if (name == MODEL_AND_GATHERER_TAG) { if (traverser.traverseSubLevel(boost::bind( - &CAnomalyDetector::legacyModelEnsembleAcceptRestoreTraverser, this, boost::cref(partitionFieldValue), _1)) == false) { + &CAnomalyDetector::legacyModelEnsembleAcceptRestoreTraverser, + this, boost::cref(partitionFieldValue), _1)) == false) { LOG_ERROR(<< "Invalid model ensemble section in " << traverser.value()); return false; } } else if (name == SIMPLE_COUNT_STATICS) { - if (traverser.traverseSubLevel(boost::bind(&CAnomalyDetector::staticsAcceptRestoreTraverser, this, _1)) == false) { + if (traverser.traverseSubLevel(boost::bind(&CAnomalyDetector::staticsAcceptRestoreTraverser, + this, _1)) == false) { LOG_ERROR(<< "Invalid simple count statics in " << traverser.value()); return false; } @@ -208,13 +213,16 @@ bool CAnomalyDetector::legacyModelEnsembleAcceptRestoreTraverser(const std::stri do { const std::string& name = traverser.name(); if (name == DATA_GATHERER_TAG) { - m_DataGatherer.reset(m_ModelFactory->makeDataGatherer(partitionFieldValue, traverser)); + m_DataGatherer.reset( + m_ModelFactory->makeDataGatherer(partitionFieldValue, traverser)); if (!m_DataGatherer) { - LOG_ERROR(<< "Failed to restore the data gatherer from " << traverser.value()); + LOG_ERROR(<< "Failed to restore the data gatherer from " + << traverser.value()); return false; } } else if (name == MODELS_TAG) { - if (traverser.traverseSubLevel(boost::bind(&CAnomalyDetector::legacyModelsAcceptRestoreTraverser, this, _1)) == false) { + if (traverser.traverseSubLevel(boost::bind(&CAnomalyDetector::legacyModelsAcceptRestoreTraverser, + this, _1)) == false) { LOG_ERROR(<< "Failed to restore live models from " << traverser.value()); return false; } @@ -244,17 +252,20 @@ bool CAnomalyDetector::staticsAcceptRestoreTraverser(core::CStateRestoreTraverse do { const std::string& name = traverser.name(); if (name == RANDOMIZED_PERIODIC_TAG) { - if (traverser.traverseSubLevel(&maths::CRandomizedPeriodicityTest::staticsAcceptRestoreTraverser) == false) { + if (traverser.traverseSubLevel(&maths::CRandomizedPeriodicityTest::staticsAcceptRestoreTraverser) == + false) { LOG_ERROR(<< "Failed to restore randomized periodic test state"); return false; } } else if (name == STATISTICS_TAG) { - if (traverser.traverseSubLevel(&core::CStatistics::staticsAcceptRestoreTraverser) == false) { + if (traverser.traverseSubLevel( + &core::CStatistics::staticsAcceptRestoreTraverser) == false) { LOG_ERROR(<< "Failed to restore statistics"); return false; } } else if (name == SAMPLING_TAG) { - if (traverser.traverseSubLevel(&maths::CSampling::staticsAcceptRestoreTraverser) == false) { + if (traverser.traverseSubLevel( + &maths::CSampling::staticsAcceptRestoreTraverser) == false) { LOG_ERROR(<< "Failed to restore sampling state"); return false; } @@ -264,7 +275,8 @@ bool CAnomalyDetector::staticsAcceptRestoreTraverser(core::CStateRestoreTraverse return true; } -bool CAnomalyDetector::partitionFieldAcceptRestoreTraverser(core::CStateRestoreTraverser& traverser, std::string& partitionFieldValue) { +bool CAnomalyDetector::partitionFieldAcceptRestoreTraverser(core::CStateRestoreTraverser& traverser, + std::string& partitionFieldValue) { do { const std::string& name = traverser.name(); if (name == PARTITION_FIELD_VALUE_TAG) { @@ -276,7 +288,8 @@ bool CAnomalyDetector::partitionFieldAcceptRestoreTraverser(core::CStateRestoreT return false; } -bool CAnomalyDetector::keyAcceptRestoreTraverser(core::CStateRestoreTraverser& traverser, CSearchKey& key) { +bool CAnomalyDetector::keyAcceptRestoreTraverser(core::CStateRestoreTraverser& traverser, + CSearchKey& key) { do { const std::string& name = traverser.name(); if (name == KEY_TAG) { @@ -294,7 +307,8 @@ bool CAnomalyDetector::keyAcceptRestoreTraverser(core::CStateRestoreTraverser& t } void CAnomalyDetector::keyAcceptPersistInserter(core::CStatePersistInserter& inserter) const { - inserter.insertLevel(KEY_TAG, boost::bind(&CSearchKey::acceptPersistInserter, &m_DataGatherer->searchKey(), _1)); + inserter.insertLevel(KEY_TAG, boost::bind(&CSearchKey::acceptPersistInserter, + &m_DataGatherer->searchKey(), _1)); } void CAnomalyDetector::partitionFieldAcceptPersistInserter(core::CStatePersistInserter& inserter) const { @@ -306,28 +320,36 @@ void CAnomalyDetector::acceptPersistInserter(core::CStatePersistInserter& insert // and do this first so that other model components can use // static strings if (this->isSimpleCount()) { - inserter.insertLevel(SIMPLE_COUNT_STATICS, boost::bind(&CAnomalyDetector::staticsAcceptPersistInserter, this, _1)); + inserter.insertLevel( + SIMPLE_COUNT_STATICS, + boost::bind(&CAnomalyDetector::staticsAcceptPersistInserter, this, _1)); } // Persist what used to belong in model ensemble at a separate level to ensure BWC - inserter.insertLevel(MODEL_AND_GATHERER_TAG, boost::bind(&CAnomalyDetector::legacyModelEnsembleAcceptPersistInserter, this, _1)); + inserter.insertLevel(MODEL_AND_GATHERER_TAG, boost::bind(&CAnomalyDetector::legacyModelEnsembleAcceptPersistInserter, + this, _1)); } void CAnomalyDetector::staticsAcceptPersistInserter(core::CStatePersistInserter& inserter) const { - inserter.insertLevel(RANDOMIZED_PERIODIC_TAG, &maths::CRandomizedPeriodicityTest::staticsAcceptPersistInserter); + inserter.insertLevel(RANDOMIZED_PERIODIC_TAG, + &maths::CRandomizedPeriodicityTest::staticsAcceptPersistInserter); inserter.insertLevel(STATISTICS_TAG, &core::CStatistics::staticsAcceptPersistInserter); inserter.insertLevel(SAMPLING_TAG, &maths::CSampling::staticsAcceptPersistInserter); } void CAnomalyDetector::legacyModelEnsembleAcceptPersistInserter(core::CStatePersistInserter& inserter) const { - inserter.insertLevel(DATA_GATHERER_TAG, boost::bind(&CDataGatherer::acceptPersistInserter, boost::cref(*m_DataGatherer), _1)); + inserter.insertLevel(DATA_GATHERER_TAG, + boost::bind(&CDataGatherer::acceptPersistInserter, + boost::cref(*m_DataGatherer), _1)); // This level seems redundant but it is simulating state as it was when CModelEnsemble // was around. - inserter.insertLevel(MODELS_TAG, boost::bind(&CAnomalyDetector::legacyModelsAcceptPersistInserter, this, _1)); + inserter.insertLevel(MODELS_TAG, boost::bind(&CAnomalyDetector::legacyModelsAcceptPersistInserter, + this, _1)); } void CAnomalyDetector::legacyModelsAcceptPersistInserter(core::CStatePersistInserter& inserter) const { - inserter.insertLevel(MODEL_TAG, boost::bind(&CAnomalyDetectorModel::acceptPersistInserter, m_Model.get(), _1)); + inserter.insertLevel(MODEL_TAG, boost::bind(&CAnomalyDetectorModel::acceptPersistInserter, + m_Model.get(), _1)); } const CAnomalyDetector::TStrVec& CAnomalyDetector::fieldsOfInterest() const { @@ -343,11 +365,14 @@ void CAnomalyDetector::addRecord(core_t::TTime time, const TStrCPtrVec& fieldVal m_DataGatherer->addArrival(processedFieldValues, eventData, m_Limits.resourceMonitor()); } -const CAnomalyDetector::TStrCPtrVec& CAnomalyDetector::preprocessFieldValues(const TStrCPtrVec& fieldValues) { +const CAnomalyDetector::TStrCPtrVec& +CAnomalyDetector::preprocessFieldValues(const TStrCPtrVec& fieldValues) { return fieldValues; } -void CAnomalyDetector::buildResults(core_t::TTime bucketStartTime, core_t::TTime bucketEndTime, CHierarchicalResults& results) { +void CAnomalyDetector::buildResults(core_t::TTime bucketStartTime, + core_t::TTime bucketEndTime, + CHierarchicalResults& results) { core_t::TTime bucketLength = m_ModelConfig.bucketLength(); if (m_ModelConfig.bucketResultsDelay()) { bucketLength /= 2; @@ -360,14 +385,16 @@ void CAnomalyDetector::buildResults(core_t::TTime bucketStartTime, core_t::TTime m_Limits.resourceMonitor().clearExtraMemory(); - this->buildResultsHelper(bucketStartTime, - bucketEndTime, - boost::bind(&CAnomalyDetector::sample, this, _1, _2, boost::ref(m_Limits.resourceMonitor())), - boost::bind(&CAnomalyDetector::updateLastSampledBucket, this, _1), - results); + this->buildResultsHelper( + bucketStartTime, bucketEndTime, + boost::bind(&CAnomalyDetector::sample, this, _1, _2, + boost::ref(m_Limits.resourceMonitor())), + boost::bind(&CAnomalyDetector::updateLastSampledBucket, this, _1), results); } -void CAnomalyDetector::sample(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) { +void CAnomalyDetector::sample(core_t::TTime startTime, + core_t::TTime endTime, + CResourceMonitor& resourceMonitor) { if (endTime <= startTime) { // Nothing to sample. return; @@ -401,7 +428,9 @@ void CAnomalyDetector::sample(core_t::TTime startTime, core_t::TTime endTime, CR } } -void CAnomalyDetector::sampleBucketStatistics(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) { +void CAnomalyDetector::sampleBucketStatistics(core_t::TTime startTime, + core_t::TTime endTime, + CResourceMonitor& resourceMonitor) { if (endTime <= startTime) { // Nothing to sample. return; @@ -428,21 +457,20 @@ void CAnomalyDetector::generateModelPlot(core_t::TTime bucketStartTime, TModelDetailsViewPtr view = m_Model.get()->details(); if (view.get()) { core_t::TTime bucketLength = m_ModelConfig.bucketLength(); - for (core_t::TTime time = bucketStartTime; time < bucketEndTime; time += bucketLength) { - modelPlots.emplace_back(time, - key.partitionFieldName(), + for (core_t::TTime time = bucketStartTime; time < bucketEndTime; + time += bucketLength) { + modelPlots.emplace_back(time, key.partitionFieldName(), m_DataGatherer->partitionFieldValue(), - key.overFieldName(), - key.byFieldName(), - bucketLength, - m_DetectorIndex); + key.overFieldName(), key.byFieldName(), + bucketLength, m_DetectorIndex); view->modelPlot(time, boundsPercentile, terms, modelPlots.back()); } } } } -CForecastDataSink::SForecastModelPrerequisites CAnomalyDetector::getForecastPrerequisites() const { +CForecastDataSink::SForecastModelPrerequisites +CAnomalyDetector::getForecastPrerequisites() const { CForecastDataSink::SForecastModelPrerequisites prerequisites{0, 0, 0, true, false}; TModelDetailsViewPtr view = m_Model->details(); @@ -513,7 +541,9 @@ CForecastDataSink::SForecastResultSeries CAnomalyDetector::getForecastModels() c const maths::CModel* model = view->model(feature, pid); if (model != nullptr && model->isForecastPossible()) { series.s_ToForecast.emplace_back( - feature, CForecastDataSink::TMathsModelPtr(model->cloneForForecast()), m_DataGatherer->personName(pid)); + feature, + CForecastDataSink::TMathsModelPtr(model->cloneForForecast()), + m_DataGatherer->personName(pid)); } } } @@ -522,12 +552,14 @@ CForecastDataSink::SForecastResultSeries CAnomalyDetector::getForecastModels() c return series; } -void CAnomalyDetector::buildInterimResults(core_t::TTime bucketStartTime, core_t::TTime bucketEndTime, CHierarchicalResults& results) { - this->buildResultsHelper(bucketStartTime, - bucketEndTime, - boost::bind(&CAnomalyDetector::sampleBucketStatistics, this, _1, _2, boost::ref(m_Limits.resourceMonitor())), - boost::bind(&CAnomalyDetector::noUpdateLastSampledBucket, this, _1), - results); +void CAnomalyDetector::buildInterimResults(core_t::TTime bucketStartTime, + core_t::TTime bucketEndTime, + CHierarchicalResults& results) { + this->buildResultsHelper( + bucketStartTime, bucketEndTime, + boost::bind(&CAnomalyDetector::sampleBucketStatistics, this, _1, _2, + boost::ref(m_Limits.resourceMonitor())), + boost::bind(&CAnomalyDetector::noUpdateLastSampledBucket, this, _1), results); } void CAnomalyDetector::pruneModels() { @@ -549,7 +581,8 @@ void CAnomalyDetector::showMemoryUsage(std::ostream& stream) const { mem.compress(); mem.print(stream); if (mem.usage() != this->memoryUsage()) { - LOG_ERROR(<< "Discrepancy in memory report: " << mem.usage() << " from debug, but " << this->memoryUsage() << " from normal"); + LOG_ERROR(<< "Discrepancy in memory report: " << mem.usage() + << " from debug, but " << this->memoryUsage() << " from normal"); } } @@ -580,9 +613,12 @@ core_t::TTime CAnomalyDetector::modelBucketLength() const { std::string CAnomalyDetector::description() const { auto beginInfluencers = m_DataGatherer->beginInfluencers(); auto endInfluencers = m_DataGatherer->endInfluencers(); - return m_DataGatherer->description() + (m_DataGatherer->partitionFieldValue().empty() ? "" : "/") + + return m_DataGatherer->description() + + (m_DataGatherer->partitionFieldValue().empty() ? "" : "/") + m_DataGatherer->partitionFieldValue() + - (beginInfluencers != endInfluencers ? (" " + core::CContainerPrinter::print(beginInfluencers, endInfluencers)) : ""); + (beginInfluencers != endInfluencers + ? (" " + core::CContainerPrinter::print(beginInfluencers, endInfluencers)) + : ""); } void CAnomalyDetector::timeNow(core_t::TTime time) { @@ -602,8 +638,8 @@ void CAnomalyDetector::buildResultsHelper(core_t::TTime bucketStartTime, CHierarchicalResults& results) { core_t::TTime bucketLength = m_ModelConfig.bucketLength(); - LOG_TRACE(<< "sample: m_DetectorKey = '" << this->description() << "', bucketStartTime = " << bucketStartTime - << ", bucketEndTime = " << bucketEndTime); + LOG_TRACE(<< "sample: m_DetectorKey = '" << this->description() << "', bucketStartTime = " + << bucketStartTime << ", bucketEndTime = " << bucketEndTime); // Update the statistical models. sampleFunc(bucketStartTime, bucketEndTime); @@ -613,9 +649,7 @@ void CAnomalyDetector::buildResultsHelper(core_t::TTime bucketStartTime, CSearchKey key = m_DataGatherer->searchKey(); LOG_TRACE(<< "OutputResults, for " << key.toCue()); - if (m_Model->addResults(m_DetectorIndex, - bucketStartTime, - bucketEndTime, + if (m_Model->addResults(m_DetectorIndex, bucketStartTime, bucketEndTime, 10, // TODO max number of attributes results)) { if (bucketEndTime % bucketLength == 0) { @@ -633,7 +667,8 @@ void CAnomalyDetector::noUpdateLastSampledBucket(core_t::TTime /*bucketEndTime*/ } std::string CAnomalyDetector::toCue() const { - return m_DataGatherer->searchKey().toCue() + m_DataGatherer->searchKey().CUE_DELIMITER + m_DataGatherer->partitionFieldValue(); + return m_DataGatherer->searchKey().toCue() + m_DataGatherer->searchKey().CUE_DELIMITER + + m_DataGatherer->partitionFieldValue(); } std::string CAnomalyDetector::debug() const { @@ -658,7 +693,8 @@ CAnomalyDetector::TModelPtr& CAnomalyDetector::model() { } std::ostream& operator<<(std::ostream& strm, const CAnomalyDetector& detector) { - strm << detector.m_DataGatherer->searchKey() << '/' << detector.m_DataGatherer->partitionFieldValue(); + strm << detector.m_DataGatherer->searchKey() << '/' + << detector.m_DataGatherer->partitionFieldValue(); return strm; } } diff --git a/lib/model/CAnomalyDetectorModel.cc b/lib/model/CAnomalyDetectorModel.cc index 525a9fc92d..69fa682dad 100644 --- a/lib/model/CAnomalyDetectorModel.cc +++ b/lib/model/CAnomalyDetectorModel.cc @@ -54,7 +54,8 @@ bool checkRules(const SModelParams::TDetectionRuleVec& detectionRules, core_t::TTime time) { bool isIgnored{false}; for (auto& rule : detectionRules) { - isIgnored = isIgnored || rule.apply(action, model, feature, resultType, pid, cid, time); + isIgnored = isIgnored || + rule.apply(action, model, feature, resultType, pid, cid, time); } return isIgnored; } @@ -69,7 +70,8 @@ bool checkScheduledEvents(const SModelParams::TStrDetectionRulePrVec& scheduledE core_t::TTime time) { bool isIgnored{false}; for (auto& event : scheduledEvents) { - isIgnored = isIgnored || event.second.apply(action, model, feature, resultType, pid, cid, time); + isIgnored = isIgnored || event.second.apply(action, model, feature, + resultType, pid, cid, time); } return isIgnored; } @@ -78,9 +80,7 @@ bool checkScheduledEvents(const SModelParams::TStrDetectionRulePrVec& scheduledE CAnomalyDetectorModel::CAnomalyDetectorModel(const SModelParams& params, const TDataGathererPtr& dataGatherer, const TFeatureInfluenceCalculatorCPtrPrVecVec& influenceCalculators) - : m_Params(params), - m_DataGatherer(dataGatherer), - m_BucketCount(0.0), + : m_Params(params), m_DataGatherer(dataGatherer), m_BucketCount(0.0), m_InfluenceCalculators(influenceCalculators), m_InterimBucketCorrector(new CInterimBucketCorrector(dataGatherer->bucketLength())) { if (!m_DataGatherer) { @@ -91,18 +91,17 @@ CAnomalyDetectorModel::CAnomalyDetectorModel(const SModelParams& params, } } -CAnomalyDetectorModel::CAnomalyDetectorModel(bool isForPersistence, const CAnomalyDetectorModel& other) +CAnomalyDetectorModel::CAnomalyDetectorModel(bool isForPersistence, + const CAnomalyDetectorModel& other) : // The copy of m_DataGatherer is a shallow copy. This would be unacceptable // if we were going to persist the data gatherer from within this class. // We don't, so that's OK, but the next issue is that another thread will be // modifying the data gatherer m_DataGatherer points to whilst this object // is being persisted. Therefore, persistence must only call methods on the // data gatherer that are invariant. - m_Params(other.m_Params), - m_DataGatherer(other.m_DataGatherer), + m_Params(other.m_Params), m_DataGatherer(other.m_DataGatherer), m_PersonBucketCounts(other.m_PersonBucketCounts), - m_BucketCount(other.m_BucketCount), - m_InfluenceCalculators(), + m_BucketCount(other.m_BucketCount), m_InfluenceCalculators(), m_InterimBucketCorrector(new CInterimBucketCorrector(*other.m_InterimBucketCorrector)) { if (!isForPersistence) { LOG_ABORT(<< "This constructor only creates clones for persistence"); @@ -117,7 +116,8 @@ const std::string& CAnomalyDetectorModel::personName(std::size_t pid) const { return m_DataGatherer->personName(pid, core::CStringUtils::typeToString(pid)); } -const std::string& CAnomalyDetectorModel::personName(std::size_t pid, const std::string& fallback) const { +const std::string& CAnomalyDetectorModel::personName(std::size_t pid, + const std::string& fallback) const { return m_DataGatherer->personName(pid, fallback); } @@ -149,11 +149,13 @@ const std::string& CAnomalyDetectorModel::attributeName(std::size_t cid) const { return m_DataGatherer->attributeName(cid, core::CStringUtils::typeToString(cid)); } -const std::string& CAnomalyDetectorModel::attributeName(std::size_t cid, const std::string& fallback) const { +const std::string& CAnomalyDetectorModel::attributeName(std::size_t cid, + const std::string& fallback) const { return m_DataGatherer->attributeName(cid, fallback); } -std::string CAnomalyDetectorModel::printAttributes(const TSizeVec& cids, std::size_t limit) const { +std::string CAnomalyDetectorModel::printAttributes(const TSizeVec& cids, + std::size_t limit) const { if (cids.empty()) { return std::string(); } @@ -173,7 +175,9 @@ std::string CAnomalyDetectorModel::printAttributes(const TSizeVec& cids, std::si return result; } -void CAnomalyDetectorModel::sampleBucketStatistics(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& /*resourceMonitor*/) { +void CAnomalyDetectorModel::sampleBucketStatistics(core_t::TTime startTime, + core_t::TTime endTime, + CResourceMonitor& /*resourceMonitor*/) { const CDataGatherer& gatherer{this->dataGatherer()}; core_t::TTime bucketLength{this->bucketLength()}; for (core_t::TTime time = startTime; time < endTime; time += bucketLength) { @@ -186,7 +190,9 @@ void CAnomalyDetectorModel::sampleBucketStatistics(core_t::TTime startTime, core } } -void CAnomalyDetectorModel::sample(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& /*resourceMonitor*/) { +void CAnomalyDetectorModel::sample(core_t::TTime startTime, + core_t::TTime endTime, + CResourceMonitor& /*resourceMonitor*/) { using TSizeUSet = boost::unordered_set; const CDataGatherer& gatherer{this->dataGatherer()}; @@ -243,36 +249,35 @@ bool CAnomalyDetectorModel::addResults(int detector, this->currentBucketPersonIds(startTime, personIds); LOG_TRACE(<< "Outputting results for " << personIds.size() << " people"); - CPartitioningFields partitioningFields(m_DataGatherer->partitionFieldName(), m_DataGatherer->partitionFieldValue()); + CPartitioningFields partitioningFields(m_DataGatherer->partitionFieldName(), + m_DataGatherer->partitionFieldValue()); partitioningFields.add(m_DataGatherer->personFieldName(), EMPTY); for (auto pid : personIds) { if (this->category() == model_t::E_Counting) { SAnnotatedProbability annotatedProbability; - this->computeProbability(pid, startTime, endTime, partitioningFields, numberAttributeProbabilities, annotatedProbability); + this->computeProbability(pid, startTime, endTime, partitioningFields, + numberAttributeProbabilities, annotatedProbability); results.addSimpleCountResult(annotatedProbability, this, startTime); } else { LOG_TRACE(<< "AddResult, for time [" << startTime << "," << endTime << ")"); partitioningFields.back().second = boost::cref(this->personName(pid)); - std::for_each(m_DataGatherer->beginInfluencers(), m_DataGatherer->endInfluencers(), [&results](const std::string& influencer) { - results.addInfluencer(influencer); - }); + std::for_each(m_DataGatherer->beginInfluencers(), + m_DataGatherer->endInfluencers(), + [&results](const std::string& influencer) { + results.addInfluencer(influencer); + }); SAnnotatedProbability annotatedProbability; annotatedProbability.s_ResultType = results.resultType(); - if (this->computeProbability(pid, startTime, endTime, partitioningFields, numberAttributeProbabilities, annotatedProbability)) { + if (this->computeProbability(pid, startTime, endTime, partitioningFields, + numberAttributeProbabilities, annotatedProbability)) { function_t::EFunction function{m_DataGatherer->function()}; - results.addModelResult(detector, - this->isPopulation(), - function_t::name(function), - function, - m_DataGatherer->partitionFieldName(), - m_DataGatherer->partitionFieldValue(), - m_DataGatherer->personFieldName(), - this->personName(pid), - m_DataGatherer->valueFieldName(), - annotatedProbability, - this, - startTime); + results.addModelResult( + detector, this->isPopulation(), function_t::name(function), + function, m_DataGatherer->partitionFieldName(), + m_DataGatherer->partitionFieldValue(), + m_DataGatherer->personFieldName(), this->personName(pid), + m_DataGatherer->valueFieldName(), annotatedProbability, this, startTime); } } } @@ -284,13 +289,17 @@ std::size_t CAnomalyDetectorModel::defaultPruneWindow() const { // The longest we'll consider keeping priors for is 1M buckets. double decayRate{this->params().s_DecayRate}; double factor{this->params().s_PruneWindowScaleMaximum}; - return (decayRate == 0.0) ? MAXIMUM_PERMITTED_AGE : std::min(static_cast(factor / decayRate), MAXIMUM_PERMITTED_AGE); + return (decayRate == 0.0) + ? MAXIMUM_PERMITTED_AGE + : std::min(static_cast(factor / decayRate), MAXIMUM_PERMITTED_AGE); } std::size_t CAnomalyDetectorModel::minimumPruneWindow() const { double decayRate{this->params().s_DecayRate}; double factor{this->params().s_PruneWindowScaleMinimum}; - return (decayRate == 0.0) ? MAXIMUM_PERMITTED_AGE : std::min(static_cast(factor / decayRate), MAXIMUM_PERMITTED_AGE); + return (decayRate == 0.0) + ? MAXIMUM_PERMITTED_AGE + : std::min(static_cast(factor / decayRate), MAXIMUM_PERMITTED_AGE); } void CAnomalyDetectorModel::prune() { @@ -320,7 +329,8 @@ void CAnomalyDetectorModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr core::CMemoryDebug::dynamicSize("m_Params", m_Params, mem); core::CMemoryDebug::dynamicSize("m_PersonBucketCounts", m_PersonBucketCounts, mem); core::CMemoryDebug::dynamicSize("m_InfluenceCalculators", m_InfluenceCalculators, mem); - core::CMemoryDebug::dynamicSize("m_InterimBucketCorrector", m_InterimBucketCorrector, mem); + core::CMemoryDebug::dynamicSize("m_InterimBucketCorrector", + m_InterimBucketCorrector, mem); } std::size_t CAnomalyDetectorModel::memoryUsage() const { @@ -333,7 +343,9 @@ std::size_t CAnomalyDetectorModel::memoryUsage() const { } CAnomalyDetectorModel::TOptionalSize -CAnomalyDetectorModel::estimateMemoryUsage(std::size_t numberPeople, std::size_t numberAttributes, std::size_t numberCorrelations) const { +CAnomalyDetectorModel::estimateMemoryUsage(std::size_t numberPeople, + std::size_t numberAttributes, + std::size_t numberCorrelations) const { CMemoryUsageEstimator::TSizeArray predictors; predictors[CMemoryUsageEstimator::E_People] = numberPeople; predictors[CMemoryUsageEstimator::E_Attributes] = numberAttributes; @@ -341,10 +353,12 @@ CAnomalyDetectorModel::estimateMemoryUsage(std::size_t numberPeople, std::size_t return this->memoryUsageEstimator()->estimate(predictors); } -std::size_t CAnomalyDetectorModel::estimateMemoryUsageOrComputeAndUpdate(std::size_t numberPeople, - std::size_t numberAttributes, - std::size_t numberCorrelations) { - TOptionalSize estimate{this->estimateMemoryUsage(numberPeople, numberAttributes, numberCorrelations)}; +std::size_t +CAnomalyDetectorModel::estimateMemoryUsageOrComputeAndUpdate(std::size_t numberPeople, + std::size_t numberAttributes, + std::size_t numberCorrelations) { + TOptionalSize estimate{this->estimateMemoryUsage(numberPeople, numberAttributes, + numberCorrelations)}; if (estimate) { return estimate.get(); } @@ -394,14 +408,18 @@ double CAnomalyDetectorModel::learnRate(model_t::EFeature feature) const { return model_t::learnRate(feature, m_Params); } -const CInfluenceCalculator* CAnomalyDetectorModel::influenceCalculator(model_t::EFeature feature, std::size_t iid) const { +const CInfluenceCalculator* +CAnomalyDetectorModel::influenceCalculator(model_t::EFeature feature, std::size_t iid) const { if (iid >= m_InfluenceCalculators.size()) { LOG_ERROR(<< "Influencer identifier " << iid << " out of range"); return nullptr; } const TFeatureInfluenceCalculatorCPtrPrVec& calculators{m_InfluenceCalculators[iid]}; - auto result = std::lower_bound(calculators.begin(), calculators.end(), feature, maths::COrderings::SFirstLess()); - return result != calculators.end() && result->first == feature ? result->second.get() : nullptr; + auto result = std::lower_bound(calculators.begin(), calculators.end(), + feature, maths::COrderings::SFirstLess()); + return result != calculators.end() && result->first == feature + ? result->second.get() + : nullptr; } const CAnomalyDetectorModel::TDoubleVec& CAnomalyDetectorModel::personBucketCounts() const { @@ -444,62 +462,49 @@ bool CAnomalyDetectorModel::shouldIgnoreResult(model_t::EFeature feature, std::size_t pid, std::size_t cid, core_t::TTime time) const { - bool shouldIgnore = checkScheduledEvents(this->params().s_ScheduledEvents.get(), - boost::cref(*this), - feature, - CDetectionRule::E_FilterResults, - resultType, - pid, - cid, - time) || - checkRules(this->params().s_DetectionRules.get(), - boost::cref(*this), - feature, - CDetectionRule::E_FilterResults, - resultType, - pid, - cid, - time); + bool shouldIgnore = + checkScheduledEvents(this->params().s_ScheduledEvents.get(), + boost::cref(*this), feature, CDetectionRule::E_FilterResults, + resultType, pid, cid, time) || + checkRules(this->params().s_DetectionRules.get(), boost::cref(*this), feature, + CDetectionRule::E_FilterResults, resultType, pid, cid, time); return shouldIgnore; } -bool CAnomalyDetectorModel::shouldIgnoreSample(model_t::EFeature feature, std::size_t pid, std::size_t cid, core_t::TTime time) const { - bool shouldIgnore = checkScheduledEvents(this->params().s_ScheduledEvents.get(), - boost::cref(*this), - feature, - CDetectionRule::E_SkipSampling, - SKIP_SAMPLING_RESULT_TYPE, - pid, - cid, - time) || - checkRules(this->params().s_DetectionRules.get(), - boost::cref(*this), - feature, - CDetectionRule::E_SkipSampling, - SKIP_SAMPLING_RESULT_TYPE, - pid, - cid, - time); +bool CAnomalyDetectorModel::shouldIgnoreSample(model_t::EFeature feature, + std::size_t pid, + std::size_t cid, + core_t::TTime time) const { + bool shouldIgnore = + checkScheduledEvents(this->params().s_ScheduledEvents.get(), + boost::cref(*this), feature, CDetectionRule::E_SkipSampling, + SKIP_SAMPLING_RESULT_TYPE, pid, cid, time) || + checkRules(this->params().s_DetectionRules.get(), boost::cref(*this), + feature, CDetectionRule::E_SkipSampling, + SKIP_SAMPLING_RESULT_TYPE, pid, cid, time); return shouldIgnore; } bool CAnomalyDetectorModel::interimBucketCorrectorAcceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { - if (traverser.traverseSubLevel(boost::bind(&CInterimBucketCorrector::acceptRestoreTraverser, m_InterimBucketCorrector.get(), _1)) == - false) { + if (traverser.traverseSubLevel(boost::bind(&CInterimBucketCorrector::acceptRestoreTraverser, + m_InterimBucketCorrector.get(), _1)) == false) { LOG_ERROR(<< "Invalid interim bucket corrector"); return false; } return true; } -void CAnomalyDetectorModel::interimBucketCorrectorAcceptPersistInserter(const std::string& tag, - core::CStatePersistInserter& inserter) const { - inserter.insertLevel(tag, boost::bind(&CInterimBucketCorrector::acceptPersistInserter, m_InterimBucketCorrector.get(), _1)); +void CAnomalyDetectorModel::interimBucketCorrectorAcceptPersistInserter( + const std::string& tag, + core::CStatePersistInserter& inserter) const { + inserter.insertLevel(tag, boost::bind(&CInterimBucketCorrector::acceptPersistInserter, + m_InterimBucketCorrector.get(), _1)); } -const CAnomalyDetectorModel::TStr1Vec& CAnomalyDetectorModel::scheduledEventDescriptions(core_t::TTime /*time*/) const { +const CAnomalyDetectorModel::TStr1Vec& +CAnomalyDetectorModel::scheduledEventDescriptions(core_t::TTime /*time*/) const { return EMPTY_STRING_LIST; } @@ -511,19 +516,23 @@ const std::size_t CAnomalyDetectorModel::MAXIMUM_PERMITTED_AGE(1000000); const core_t::TTime CAnomalyDetectorModel::TIME_UNSET(-1); const std::string CAnomalyDetectorModel::EMPTY_STRING; -CAnomalyDetectorModel::SFeatureModels::SFeatureModels(model_t::EFeature feature, TMathsModelPtr newModel) +CAnomalyDetectorModel::SFeatureModels::SFeatureModels(model_t::EFeature feature, + TMathsModelPtr newModel) : s_Feature(feature), s_NewModel(newModel) { } -bool CAnomalyDetectorModel::SFeatureModels::acceptRestoreTraverser(const SModelParams& params_, core::CStateRestoreTraverser& traverser) { +bool CAnomalyDetectorModel::SFeatureModels::acceptRestoreTraverser(const SModelParams& params_, + core::CStateRestoreTraverser& traverser) { maths_t::EDataType dataType{s_NewModel->dataType()}; - maths::SModelRestoreParams params{ - s_NewModel->params(), params_.decompositionRestoreParams(dataType), params_.distributionRestoreParams(dataType)}; + maths::SModelRestoreParams params{s_NewModel->params(), + params_.decompositionRestoreParams(dataType), + params_.distributionRestoreParams(dataType)}; do { if (traverser.name() == MODEL_TAG) { TMathsModelPtr prior; if (!traverser.traverseSubLevel( - boost::bind(maths::CModelStateSerialiser(), boost::cref(params), boost::ref(prior), _1))) { + boost::bind(maths::CModelStateSerialiser(), + boost::cref(params), boost::ref(prior), _1))) { return false; } s_Models.push_back(prior); @@ -534,7 +543,8 @@ bool CAnomalyDetectorModel::SFeatureModels::acceptRestoreTraverser(const SModelP void CAnomalyDetectorModel::SFeatureModels::acceptPersistInserter(core::CStatePersistInserter& inserter) const { for (const auto& model : s_Models) { - inserter.insertLevel(MODEL_TAG, boost::bind(maths::CModelStateSerialiser(), boost::cref(*model), _1)); + inserter.insertLevel(MODEL_TAG, boost::bind(maths::CModelStateSerialiser(), + boost::cref(*model), _1)); } } @@ -554,15 +564,17 @@ CAnomalyDetectorModel::SFeatureCorrelateModels::SFeatureCorrelateModels(model_t: : s_Feature(feature), s_ModelPrior(modelPrior), s_Models(model->clone()) { } -bool CAnomalyDetectorModel::SFeatureCorrelateModels::acceptRestoreTraverser(const SModelParams& params_, - core::CStateRestoreTraverser& traverser) { +bool CAnomalyDetectorModel::SFeatureCorrelateModels::acceptRestoreTraverser( + const SModelParams& params_, + core::CStateRestoreTraverser& traverser) { maths_t::EDataType dataType{s_ModelPrior->dataType()}; maths::SDistributionRestoreParams params{params_.distributionRestoreParams(dataType)}; std::size_t count{0u}; do { if (traverser.name() == MODEL_TAG) { if (!traverser.traverseSubLevel( - boost::bind(&maths::CTimeSeriesCorrelations::acceptRestoreTraverser, s_Models.get(), boost::cref(params), _1)) || + boost::bind(&maths::CTimeSeriesCorrelations::acceptRestoreTraverser, + s_Models.get(), boost::cref(params), _1)) || count++ > 0) { return false; } @@ -571,11 +583,14 @@ bool CAnomalyDetectorModel::SFeatureCorrelateModels::acceptRestoreTraverser(cons return true; } -void CAnomalyDetectorModel::SFeatureCorrelateModels::acceptPersistInserter(core::CStatePersistInserter& inserter) const { - inserter.insertLevel(MODEL_TAG, boost::bind(&maths::CTimeSeriesCorrelations::acceptPersistInserter, s_Models.get(), _1)); +void CAnomalyDetectorModel::SFeatureCorrelateModels::acceptPersistInserter( + core::CStatePersistInserter& inserter) const { + inserter.insertLevel(MODEL_TAG, boost::bind(&maths::CTimeSeriesCorrelations::acceptPersistInserter, + s_Models.get(), _1)); } -void CAnomalyDetectorModel::SFeatureCorrelateModels::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { +void CAnomalyDetectorModel::SFeatureCorrelateModels::debugMemoryUsage( + core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("SFeatureCorrelateModels"); core::CMemoryDebug::dynamicSize("s_ModelPrior", s_ModelPrior, mem); core::CMemoryDebug::dynamicSize("s_Models", s_Models, mem); @@ -585,14 +600,13 @@ std::size_t CAnomalyDetectorModel::SFeatureCorrelateModels::memoryUsage() const return core::CMemory::dynamicSize(s_ModelPrior) + core::CMemory::dynamicSize(s_Models); } -CAnomalyDetectorModel::CTimeSeriesCorrelateModelAllocator::CTimeSeriesCorrelateModelAllocator(CResourceMonitor& resourceMonitor, - TMemoryUsage memoryUsage, - std::size_t resourceLimit, - std::size_t maxNumberCorrelations) - : m_ResourceMonitor(&resourceMonitor), - m_MemoryUsage(memoryUsage), - m_ResourceLimit(resourceLimit), - m_MaxNumberCorrelations(maxNumberCorrelations) { +CAnomalyDetectorModel::CTimeSeriesCorrelateModelAllocator::CTimeSeriesCorrelateModelAllocator( + CResourceMonitor& resourceMonitor, + TMemoryUsage memoryUsage, + std::size_t resourceLimit, + std::size_t maxNumberCorrelations) + : m_ResourceMonitor(&resourceMonitor), m_MemoryUsage(memoryUsage), + m_ResourceLimit(resourceLimit), m_MaxNumberCorrelations(maxNumberCorrelations) { } bool CAnomalyDetectorModel::CTimeSeriesCorrelateModelAllocator::areAllocationsAllowed() const { @@ -611,11 +625,13 @@ std::size_t CAnomalyDetectorModel::CTimeSeriesCorrelateModelAllocator::chunkSize return 500; } -CAnomalyDetectorModel::TMultivariatePriorPtr CAnomalyDetectorModel::CTimeSeriesCorrelateModelAllocator::newPrior() const { +CAnomalyDetectorModel::TMultivariatePriorPtr +CAnomalyDetectorModel::CTimeSeriesCorrelateModelAllocator::newPrior() const { return TMultivariatePriorPtr(m_PrototypePrior->clone()); } -void CAnomalyDetectorModel::CTimeSeriesCorrelateModelAllocator::prototypePrior(const TMultivariatePriorPtr& prior) { +void CAnomalyDetectorModel::CTimeSeriesCorrelateModelAllocator::prototypePrior( + const TMultivariatePriorPtr& prior) { m_PrototypePrior = prior; } } diff --git a/lib/model/CAnomalyDetectorModelConfig.cc b/lib/model/CAnomalyDetectorModelConfig.cc index 100e4fc8ca..a095bb0e61 100644 --- a/lib/model/CAnomalyDetectorModelConfig.cc +++ b/lib/model/CAnomalyDetectorModelConfig.cc @@ -66,17 +66,18 @@ const double CAnomalyDetectorModelConfig::DEFAULT_MINIMUM_CLUSTER_SPLIT_COUNT(12 const double CAnomalyDetectorModelConfig::DEFAULT_CATEGORY_DELETE_FRACTION(0.8); const double CAnomalyDetectorModelConfig::DEFAULT_CUTOFF_TO_MODEL_EMPTY_BUCKETS(0.2); const std::size_t CAnomalyDetectorModelConfig::DEFAULT_COMPONENT_SIZE(36u); -const core_t::TTime CAnomalyDetectorModelConfig::DEFAULT_MINIMUM_TIME_TO_DETECT_CHANGE(12 * core::constants::HOUR); -const core_t::TTime CAnomalyDetectorModelConfig::DEFAULT_MAXIMUM_TIME_TO_TEST_FOR_CHANGE(core::constants::DAY); +const core_t::TTime + CAnomalyDetectorModelConfig::DEFAULT_MINIMUM_TIME_TO_DETECT_CHANGE(12 * core::constants::HOUR); +const core_t::TTime + CAnomalyDetectorModelConfig::DEFAULT_MAXIMUM_TIME_TO_TEST_FOR_CHANGE(core::constants::DAY); const double CAnomalyDetectorModelConfig::DEFAULT_MAXIMUM_UPDATES_PER_BUCKET(1.0); const double CAnomalyDetectorModelConfig::DEFAULT_INFLUENCE_CUTOFF(0.5); const double CAnomalyDetectorModelConfig::DEFAULT_PRUNE_WINDOW_SCALE_MINIMUM(0.25); const double CAnomalyDetectorModelConfig::DEFAULT_PRUNE_WINDOW_SCALE_MAXIMUM(4.0); const double CAnomalyDetectorModelConfig::DEFAULT_CORRELATION_MODELS_OVERHEAD(3.0); const double CAnomalyDetectorModelConfig::DEFAULT_MINIMUM_SIGNIFICANT_CORRELATION(0.3); -const double CAnomalyDetectorModelConfig::DEFAULT_AGGREGATION_STYLE_PARAMS[][model_t::NUMBER_AGGREGATION_PARAMS] = {{0.0, 1.0, 1.0, 1.0}, - {0.5, 0.5, 1.0, 5.0}, - {0.5, 0.5, 1.0, 1.0}}; +const double CAnomalyDetectorModelConfig::DEFAULT_AGGREGATION_STYLE_PARAMS[][model_t::NUMBER_AGGREGATION_PARAMS] = + {{0.0, 1.0, 1.0, 1.0}, {0.5, 0.5, 1.0, 5.0}, {0.5, 0.5, 1.0, 1.0}}; // The default for maximumanomalousprobability now matches the default // for unusualprobabilitythreshold in mllimits.conf - this avoids // inconsistencies in output @@ -94,13 +95,14 @@ const CAnomalyDetectorModelConfig::TDoubleDoublePr CAnomalyDetectorModelConfig:: CAnomalyDetectorModelConfig::TDoubleDoublePr(99.9, 90.0), CAnomalyDetectorModelConfig::TDoubleDoublePr(100.0, 100.0)}; -CAnomalyDetectorModelConfig CAnomalyDetectorModelConfig::defaultConfig(core_t::TTime bucketLength, - model_t::ESummaryMode summaryMode, - const std::string& summaryCountFieldName, - core_t::TTime latency, - std::size_t bucketResultsDelay, - bool multivariateByFields, - const std::string& multipleBucketLengths) { +CAnomalyDetectorModelConfig +CAnomalyDetectorModelConfig::defaultConfig(core_t::TTime bucketLength, + model_t::ESummaryMode summaryMode, + const std::string& summaryCountFieldName, + core_t::TTime latency, + std::size_t bucketResultsDelay, + bool multivariateByFields, + const std::string& multipleBucketLengths) { bucketLength = detail::validateBucketLength(bucketLength); double learnRate = DEFAULT_LEARN_RATE * bucketNormalizationFactor(bucketLength); @@ -112,17 +114,23 @@ CAnomalyDetectorModelConfig CAnomalyDetectorModelConfig::defaultConfig(core_t::T params.s_ExcludeFrequent = model_t::E_XF_None; params.configureLatency(latency, bucketLength); params.s_BucketResultsDelay = bucketResultsDelay; - params.s_MultipleBucketLengths = CAnomalyDetectorModelConfig::multipleBucketLengths(bucketLength, multipleBucketLengths); + params.s_MultipleBucketLengths = CAnomalyDetectorModelConfig::multipleBucketLengths( + bucketLength, multipleBucketLengths); TFactoryTypeFactoryPtrMap factories; params.s_MinimumModeFraction = DEFAULT_INDIVIDUAL_MINIMUM_MODE_FRACTION; - factories[E_EventRateFactory].reset(new CEventRateModelFactory(params, summaryMode, summaryCountFieldName)); - factories[E_MetricFactory].reset(new CMetricModelFactory(params, summaryMode, summaryCountFieldName)); - factories[E_EventRatePopulationFactory].reset(new CEventRatePopulationModelFactory(params, summaryMode, summaryCountFieldName)); + factories[E_EventRateFactory].reset( + new CEventRateModelFactory(params, summaryMode, summaryCountFieldName)); + factories[E_MetricFactory].reset( + new CMetricModelFactory(params, summaryMode, summaryCountFieldName)); + factories[E_EventRatePopulationFactory].reset(new CEventRatePopulationModelFactory( + params, summaryMode, summaryCountFieldName)); params.s_MinimumModeFraction = DEFAULT_POPULATION_MINIMUM_MODE_FRACTION; - factories[E_MetricPopulationFactory].reset(new CMetricPopulationModelFactory(params, summaryMode, summaryCountFieldName)); + factories[E_MetricPopulationFactory].reset(new CMetricPopulationModelFactory( + params, summaryMode, summaryCountFieldName)); params.s_MinimumModeFraction = 1.0; - factories[E_CountingFactory].reset(new CCountingModelFactory(params, summaryMode, summaryCountFieldName)); + factories[E_CountingFactory].reset( + new CCountingModelFactory(params, summaryMode, summaryCountFieldName)); CAnomalyDetectorModelConfig result; result.bucketLength(bucketLength); @@ -135,27 +143,30 @@ CAnomalyDetectorModelConfig CAnomalyDetectorModelConfig::defaultConfig(core_t::T // De-rates the decay and learn rate to account for differences from the // standard bucket length. double CAnomalyDetectorModelConfig::bucketNormalizationFactor(core_t::TTime bucketLength) { - return std::min(1.0, static_cast(bucketLength) / static_cast(STANDARD_BUCKET_LENGTH)); + return std::min(1.0, static_cast(bucketLength) / + static_cast(STANDARD_BUCKET_LENGTH)); } // Standard decay rate for time series decompositions given the specified // model decay rate and bucket length. -double CAnomalyDetectorModelConfig::trendDecayRate(double modelDecayRate, core_t::TTime bucketLength) { +double CAnomalyDetectorModelConfig::trendDecayRate(double modelDecayRate, + core_t::TTime bucketLength) { double scale = static_cast(bucketLength / 24 / STANDARD_BUCKET_LENGTH); - return std::min(24.0 * modelDecayRate / bucketNormalizationFactor(bucketLength) / std::max(scale, 1.0), 0.1); + return std::min(24.0 * modelDecayRate / bucketNormalizationFactor(bucketLength) / + std::max(scale, 1.0), + 0.1); } CAnomalyDetectorModelConfig::CAnomalyDetectorModelConfig() : m_BucketLength(STANDARD_BUCKET_LENGTH), m_BucketResultsDelay(DEFAULT_BUCKET_RESULTS_DELAY), - m_MultivariateByFields(false), - m_ModelPlotBoundsPercentile(-1.0), + m_MultivariateByFields(false), m_ModelPlotBoundsPercentile(-1.0), m_MaximumAnomalousProbability(DEFAULT_MAXIMUM_ANOMALOUS_PROBABILITY), m_NoisePercentile(DEFAULT_NOISE_PERCENTILE), m_NoiseMultiplier(DEFAULT_NOISE_MULTIPLIER), - m_NormalizedScoreKnotPoints(boost::begin(DEFAULT_NORMALIZED_SCORE_KNOT_POINTS), boost::end(DEFAULT_NORMALIZED_SCORE_KNOT_POINTS)), - m_PerPartitionNormalisation(false), - m_DetectionRules(EMPTY_RULES_MAP), + m_NormalizedScoreKnotPoints(boost::begin(DEFAULT_NORMALIZED_SCORE_KNOT_POINTS), + boost::end(DEFAULT_NORMALIZED_SCORE_KNOT_POINTS)), + m_PerPartitionNormalisation(false), m_DetectionRules(EMPTY_RULES_MAP), m_ScheduledEvents(EMPTY_EVENTS) { for (std::size_t i = 0u; i < model_t::NUMBER_AGGREGATION_STYLES; ++i) { for (std::size_t j = 0u; j < model_t::NUMBER_AGGREGATION_PARAMS; ++j) { @@ -175,18 +186,21 @@ void CAnomalyDetectorModelConfig::bucketResultsDelay(std::size_t delay) { m_BucketResultsDelay = delay; } -CAnomalyDetectorModelConfig::TTimeVec CAnomalyDetectorModelConfig::multipleBucketLengths(core_t::TTime bucketLength, - const std::string& multipleBucketLengths) { +CAnomalyDetectorModelConfig::TTimeVec +CAnomalyDetectorModelConfig::multipleBucketLengths(core_t::TTime bucketLength, + const std::string& multipleBucketLengths) { TStrVec multiBucketTokens; core::CRegex regex; regex.init(","); regex.split(multipleBucketLengths, multiBucketTokens); TTimeVec multiBuckets; - for (TStrVecCItr itr = multiBucketTokens.begin(); itr != multiBucketTokens.end(); ++itr) { + for (TStrVecCItr itr = multiBucketTokens.begin(); + itr != multiBucketTokens.end(); ++itr) { core_t::TTime t = 0; if (core::CStringUtils::stringToType(*itr, t)) { if ((t <= bucketLength) || (t % bucketLength != 0)) { - LOG_ERROR(<< "MultipleBucketLength " << t << " must be a multiple of " << bucketLength); + LOG_ERROR(<< "MultipleBucketLength " << t + << " must be a multiple of " << bucketLength); return TTimeVec(); } multiBuckets.push_back(t); @@ -204,7 +218,9 @@ void CAnomalyDetectorModelConfig::factories(const TFactoryTypeFactoryPtrMap& fac m_Factories = factories; } -bool CAnomalyDetectorModelConfig::aggregationStyleParams(model_t::EAggregationStyle style, model_t::EAggregationParam param, double value) { +bool CAnomalyDetectorModelConfig::aggregationStyleParams(model_t::EAggregationStyle style, + model_t::EAggregationParam param, + double value) { switch (param) { case model_t::E_JointProbabilityWeight: if (value < 0.0 || value > 1.0) { @@ -226,8 +242,8 @@ bool CAnomalyDetectorModelConfig::aggregationStyleParams(model_t::EAggregationSt return false; } m_AggregationStyleParams[style][model_t::E_MinExtremeSamples] = value; - m_AggregationStyleParams[style][model_t::E_MaxExtremeSamples] = - std::max(value, m_AggregationStyleParams[style][model_t::E_MaxExtremeSamples]); + m_AggregationStyleParams[style][model_t::E_MaxExtremeSamples] = std::max( + value, m_AggregationStyleParams[style][model_t::E_MaxExtremeSamples]); break; case model_t::E_MaxExtremeSamples: if (value < 1.0 || value > 10.0) { @@ -235,8 +251,8 @@ bool CAnomalyDetectorModelConfig::aggregationStyleParams(model_t::EAggregationSt return false; } m_AggregationStyleParams[style][model_t::E_MaxExtremeSamples] = value; - m_AggregationStyleParams[style][model_t::E_MinExtremeSamples] = - std::min(value, m_AggregationStyleParams[style][model_t::E_MinExtremeSamples]); + m_AggregationStyleParams[style][model_t::E_MinExtremeSamples] = std::min( + value, m_AggregationStyleParams[style][model_t::E_MinExtremeSamples]); break; } return true; @@ -245,7 +261,8 @@ bool CAnomalyDetectorModelConfig::aggregationStyleParams(model_t::EAggregationSt void CAnomalyDetectorModelConfig::maximumAnomalousProbability(double probability) { double minimum = 100 * maths::MINUSCULE_PROBABILITY; if (probability < minimum || probability > 1.0) { - LOG_INFO(<< "Maximum anomalous probability " << probability << " out of range [" << minimum << "," << 1.0 << "] truncating"); + LOG_INFO(<< "Maximum anomalous probability " << probability + << " out of range [" << minimum << "," << 1.0 << "] truncating"); } m_MaximumAnomalousProbability = maths::CTools::truncate(probability, minimum, 1.0); } @@ -291,17 +308,22 @@ bool CAnomalyDetectorModelConfig::normalizedScoreKnotPoints(const TDoubleDoubleP return false; } } - if (!boost::algorithm::is_sorted(points.begin(), points.end(), maths::COrderings::SFirstLess())) { - LOG_ERROR(<< "Percentiles must be monotonic increasing " << core::CContainerPrinter::print(points)); + if (!boost::algorithm::is_sorted(points.begin(), points.end(), + maths::COrderings::SFirstLess())) { + LOG_ERROR(<< "Percentiles must be monotonic increasing " + << core::CContainerPrinter::print(points)); return false; } - if (!boost::algorithm::is_sorted(points.begin(), points.end(), maths::COrderings::SSecondLess())) { - LOG_ERROR(<< "Scores must be monotonic increasing " << core::CContainerPrinter::print(points)); + if (!boost::algorithm::is_sorted(points.begin(), points.end(), + maths::COrderings::SSecondLess())) { + LOG_ERROR(<< "Scores must be monotonic increasing " + << core::CContainerPrinter::print(points)); return false; } m_NormalizedScoreKnotPoints = points; - m_NormalizedScoreKnotPoints.erase(std::unique(m_NormalizedScoreKnotPoints.begin(), m_NormalizedScoreKnotPoints.end()), + m_NormalizedScoreKnotPoints.erase(std::unique(m_NormalizedScoreKnotPoints.begin(), + m_NormalizedScoreKnotPoints.end()), m_NormalizedScoreKnotPoints.end()); return true; } @@ -311,7 +333,8 @@ bool CAnomalyDetectorModelConfig::init(const std::string& configFile) { return this->init(configFile, propTree); } -bool CAnomalyDetectorModelConfig::init(const std::string& configFile, boost::property_tree::ptree& propTree) { +bool CAnomalyDetectorModelConfig::init(const std::string& configFile, + boost::property_tree::ptree& propTree) { LOG_DEBUG(<< "Reading config file " << configFile); try { @@ -342,7 +365,8 @@ bool CAnomalyDetectorModelConfig::init(const boost::property_tree::ptree& propTr bool result = true; - for (boost::property_tree::ptree::const_iterator i = propTree.begin(); i != propTree.end(); ++i) { + for (boost::property_tree::ptree::const_iterator i = propTree.begin(); + i != propTree.end(); ++i) { const std::string& stanzaName = i->first; const boost::property_tree::ptree& propertyTree = i->second; @@ -378,7 +402,8 @@ bool CAnomalyDetectorModelConfig::configureModelPlot(const std::string& modelPlo boost::property_tree::ini_parser::read_ini(strm, propTree); } catch (boost::property_tree::ptree_error& e) { - LOG_ERROR(<< "Error reading model plot config file " << modelPlotConfigFile << " : " << e.what()); + LOG_ERROR(<< "Error reading model plot config file " + << modelPlotConfigFile << " : " << e.what()); return false; } @@ -404,7 +429,8 @@ bool CAnomalyDetectorModelConfig::configureModelPlot(const boost::property_tree: return false; } } catch (boost::property_tree::ptree_error&) { - LOG_ERROR(<< "Error reading model debug config. Property '" << BOUNDS_PERCENTILE_PROPERTY << "' is missing"); + LOG_ERROR(<< "Error reading model debug config. Property '" + << BOUNDS_PERCENTILE_PROPERTY << "' is missing"); return false; } @@ -422,34 +448,27 @@ bool CAnomalyDetectorModelConfig::configureModelPlot(const boost::property_tree: m_ModelPlotTerms.insert(tokens[i]); } } catch (boost::property_tree::ptree_error&) { - LOG_ERROR(<< "Error reading model debug config. Property '" << TERMS_PROPERTY << "' is missing"); + LOG_ERROR(<< "Error reading model debug config. Property '" + << TERMS_PROPERTY << "' is missing"); return false; } return true; } -CAnomalyDetectorModelConfig::TModelFactoryCPtr CAnomalyDetectorModelConfig::factory(const CSearchKey& key) const { +CAnomalyDetectorModelConfig::TModelFactoryCPtr +CAnomalyDetectorModelConfig::factory(const CSearchKey& key) const { TModelFactoryCPtr result = m_FactoryCache[key]; if (!result) { - result = key.isSimpleCount() ? this->factory(key.identifier(), - key.function(), - true, - key.excludeFrequent(), - key.partitionFieldName(), - key.overFieldName(), - key.byFieldName(), - key.fieldName(), - key.influenceFieldNames()) - : this->factory(key.identifier(), - key.function(), - key.useNull(), - key.excludeFrequent(), - key.partitionFieldName(), - key.overFieldName(), - key.byFieldName(), - key.fieldName(), - key.influenceFieldNames()); + result = key.isSimpleCount() + ? this->factory(key.identifier(), key.function(), true, + key.excludeFrequent(), key.partitionFieldName(), + key.overFieldName(), key.byFieldName(), + key.fieldName(), key.influenceFieldNames()) + : this->factory(key.identifier(), key.function(), key.useNull(), + key.excludeFrequent(), key.partitionFieldName(), + key.overFieldName(), key.byFieldName(), + key.fieldName(), key.influenceFieldNames()); } return result; } @@ -566,7 +585,9 @@ CAnomalyDetectorModelConfig::factory(int identifier, case E_UnknownFactory: switch (model_t::analysisCategory(features[i])) { case model_t::E_EventRate: - factory = CSearchKey::isSimpleCount(function, byFieldName) ? E_CountingFactory : E_EventRateFactory; + factory = CSearchKey::isSimpleCount(function, byFieldName) + ? E_CountingFactory + : E_EventRateFactory; break; case model_t::E_Metric: factory = E_MetricFactory; @@ -594,7 +615,8 @@ CAnomalyDetectorModelConfig::factory(int identifier, TFactoryTypeFactoryPtrMapCItr prototype = m_Factories.find(factory); if (prototype == m_Factories.end()) { - LOG_ABORT(<< "No factory for features = " << core::CContainerPrinter::print(features)); + LOG_ABORT(<< "No factory for features = " + << core::CContainerPrinter::print(features)); } TModelFactoryPtr result(prototype->second->clone()); @@ -604,7 +626,8 @@ CAnomalyDetectorModelConfig::factory(int identifier, for (const auto& influenceFieldName : influenceFieldNames) { influences.push_back(*influenceFieldName); } - result->fieldNames(partitionFieldName, overFieldName, byFieldName, valueFieldName, influences); + result->fieldNames(partitionFieldName, overFieldName, byFieldName, + valueFieldName, influences); result->useNull(useNull); result->excludeFrequent(excludeFrequent); result->features(features); @@ -669,7 +692,8 @@ const CAnomalyDetectorModelConfig::TStrSet& CAnomalyDetectorModelConfig::modelPl return m_ModelPlotTerms; } -double CAnomalyDetectorModelConfig::aggregationStyleParam(model_t::EAggregationStyle style, model_t::EAggregationParam param) const { +double CAnomalyDetectorModelConfig::aggregationStyleParam(model_t::EAggregationStyle style, + model_t::EAggregationParam param) const { return m_AggregationStyleParams[style][param]; } @@ -685,7 +709,8 @@ double CAnomalyDetectorModelConfig::noiseMultiplier() const { return m_NoiseMultiplier; } -const CAnomalyDetectorModelConfig::TDoubleDoublePrVec& CAnomalyDetectorModelConfig::normalizedScoreKnotPoints() const { +const CAnomalyDetectorModelConfig::TDoubleDoublePrVec& +CAnomalyDetectorModelConfig::normalizedScoreKnotPoints() const { return m_NormalizedScoreKnotPoints; } @@ -739,7 +764,8 @@ bool CAnomalyDetectorModelConfig::processStanza(const boost::property_tree::ptre if (propName == ONLINE_LEARN_RATE_PROPERTY) { double learnRate = DEFAULT_LEARN_RATE; - if (core::CStringUtils::stringToType(propValue, learnRate) == false || learnRate <= 0.0) { + if (core::CStringUtils::stringToType(propValue, learnRate) == false || + learnRate <= 0.0) { LOG_ERROR(<< "Invalid value for property " << propName << " : " << propValue); result = false; continue; @@ -751,7 +777,8 @@ bool CAnomalyDetectorModelConfig::processStanza(const boost::property_tree::ptre } } else if (propName == DECAY_RATE_PROPERTY) { double decayRate = DEFAULT_DECAY_RATE; - if (core::CStringUtils::stringToType(propValue, decayRate) == false || decayRate <= 0.0) { + if (core::CStringUtils::stringToType(propValue, decayRate) == false || + decayRate <= 0.0) { LOG_ERROR(<< "Invalid value for property " << propName << " : " << propValue); result = false; continue; @@ -763,7 +790,8 @@ bool CAnomalyDetectorModelConfig::processStanza(const boost::property_tree::ptre } } else if (propName == INITIAL_DECAY_RATE_MULTIPLIER_PROPERTY) { double multiplier = DEFAULT_INITIAL_DECAY_RATE_MULTIPLIER; - if (core::CStringUtils::stringToType(propValue, multiplier) == false || multiplier < 1.0) { + if (core::CStringUtils::stringToType(propValue, multiplier) == false || + multiplier < 1.0) { LOG_ERROR(<< "Invalid value for property " << propName << " : " << propValue); result = false; continue; @@ -774,7 +802,8 @@ bool CAnomalyDetectorModelConfig::processStanza(const boost::property_tree::ptre } } else if (propName == MAXIMUM_UPDATES_PER_BUCKET_PROPERTY) { double maximumUpdatesPerBucket; - if (core::CStringUtils::stringToType(propValue, maximumUpdatesPerBucket) == false || maximumUpdatesPerBucket < 0.0) { + if (core::CStringUtils::stringToType(propValue, maximumUpdatesPerBucket) == false || + maximumUpdatesPerBucket < 0.0) { LOG_ERROR(<< "Invalid value for property " << propName << " : " << propValue); result = false; continue; @@ -785,7 +814,8 @@ bool CAnomalyDetectorModelConfig::processStanza(const boost::property_tree::ptre } } else if (propName == INDIVIDUAL_MODE_FRACTION_PROPERTY) { double fraction; - if (core::CStringUtils::stringToType(propValue, fraction) == false || fraction < 0.0 || fraction > 1.0) { + if (core::CStringUtils::stringToType(propValue, fraction) == false || + fraction < 0.0 || fraction > 1.0) { LOG_ERROR(<< "Invalid value for property " << propName << " : " << propValue); result = false; continue; @@ -799,7 +829,8 @@ bool CAnomalyDetectorModelConfig::processStanza(const boost::property_tree::ptre } } else if (propName == POPULATION_MODE_FRACTION_PROPERTY) { double fraction; - if (core::CStringUtils::stringToType(propValue, fraction) == false || fraction < 0.0 || fraction > 1.0) { + if (core::CStringUtils::stringToType(propValue, fraction) == false || + fraction < 0.0 || fraction > 1.0) { LOG_ERROR(<< "Invalid value for property " << propName << " : " << propValue); result = false; continue; @@ -813,7 +844,8 @@ bool CAnomalyDetectorModelConfig::processStanza(const boost::property_tree::ptre } } else if (propName == PEERS_MODE_FRACTION_PROPERTY) { double fraction; - if (core::CStringUtils::stringToType(propValue, fraction) == false || fraction < 0.0 || fraction > 1.0) { + if (core::CStringUtils::stringToType(propValue, fraction) == false || + fraction < 0.0 || fraction > 1.0) { LOG_ERROR(<< "Invalid value for property " << propName << " : " << propValue); result = false; continue; @@ -824,7 +856,8 @@ bool CAnomalyDetectorModelConfig::processStanza(const boost::property_tree::ptre } } else if (propName == COMPONENT_SIZE_PROPERTY) { int componentSize; - if (core::CStringUtils::stringToType(propValue, componentSize) == false || componentSize < 0) { + if (core::CStringUtils::stringToType(propValue, componentSize) == false || + componentSize < 0) { LOG_ERROR(<< "Invalid value of property " << propName << " : " << propValue); result = false; continue; @@ -882,13 +915,15 @@ bool CAnomalyDetectorModelConfig::processStanza(const boost::property_tree::ptre for (std::size_t k = 0u; k < model_t::NUMBER_AGGREGATION_PARAMS; ++k, ++l) { double value; if (core::CStringUtils::stringToType(strings[l], value) == false) { - LOG_ERROR(<< "Unexpected value " << strings[l] << " in property " << propName); + LOG_ERROR(<< "Unexpected value " << strings[l] + << " in property " << propName); result = false; continue; } this->aggregationStyleParams( - static_cast(j), static_cast(k), value); + static_cast(j), + static_cast(k), value); } } } else if (propName == MAXIMUM_ANOMALOUS_PROBABILITY_PROPERTY) { @@ -901,14 +936,16 @@ bool CAnomalyDetectorModelConfig::processStanza(const boost::property_tree::ptre this->maximumAnomalousProbability(probability); } else if (propName == NOISE_PERCENTILE_PROPERTY) { double percentile; - if (core::CStringUtils::stringToType(propValue, percentile) == false || this->noisePercentile(percentile) == false) { + if (core::CStringUtils::stringToType(propValue, percentile) == false || + this->noisePercentile(percentile) == false) { LOG_ERROR(<< "Invalid value for property " << propName << " : " << propValue); result = false; continue; } } else if (propName == NOISE_MULTIPLIER_PROPERTY) { double multiplier; - if (core::CStringUtils::stringToType(propValue, multiplier) == false || this->noiseMultiplier(multiplier) == false) { + if (core::CStringUtils::stringToType(propValue, multiplier) == false || + this->noiseMultiplier(multiplier) == false) { LOG_ERROR(<< "Invalid value for property " << propName << " : " << propValue); result = false; continue; @@ -924,7 +961,8 @@ bool CAnomalyDetectorModelConfig::processStanza(const boost::property_tree::ptre strings.push_back(remainder); } if (strings.empty() || (strings.size() % 2) != 0) { - LOG_ERROR(<< "Expected even number of values for property " << propName << " " << core::CContainerPrinter::print(strings)); + LOG_ERROR(<< "Expected even number of values for property " << propName + << " " << core::CContainerPrinter::print(strings)); result = false; continue; } @@ -936,12 +974,14 @@ bool CAnomalyDetectorModelConfig::processStanza(const boost::property_tree::ptre double rate; double score; if (core::CStringUtils::stringToType(strings[j], rate) == false) { - LOG_ERROR(<< "Unexpected value " << strings[j] << " for rate in property " << propName); + LOG_ERROR(<< "Unexpected value " << strings[j] + << " for rate in property " << propName); result = false; continue; } if (core::CStringUtils::stringToType(strings[j + 1], score) == false) { - LOG_ERROR(<< "Unexpected value " << strings[j + 1] << " for score in property " << propName); + LOG_ERROR(<< "Unexpected value " << strings[j + 1] + << " for score in property " << propName); result = false; continue; } diff --git a/lib/model/CAnomalyScore.cc b/lib/model/CAnomalyScore.cc index b66e0c4e91..11ef2cd6ea 100644 --- a/lib/model/CAnomalyScore.cc +++ b/lib/model/CAnomalyScore.cc @@ -111,7 +111,8 @@ bool CAnomalyScore::compute(double jointProbabilityWeight, } maths::CLogJointProbabilityOfLessLikelySamples logPJointCalculator; - std::size_t n = std::min(addProbabilities(probabilities, logPJointCalculator), maxExtremeSamples); + std::size_t n = std::min(addProbabilities(probabilities, logPJointCalculator), + maxExtremeSamples); // Note the upper bound is significantly tighter, so we just // use that in the following calculation. @@ -124,13 +125,14 @@ bool CAnomalyScore::compute(double jointProbabilityWeight, // Sanity check the probability not greater than 1.0. if (logPJoint > 0.0) { - LOG_ERROR(<< "Invalid log joint probability " << logPJoint - << ", probabilities = " << core::CContainerPrinter::print(probabilities)); + LOG_ERROR(<< "Invalid log joint probability " << logPJoint << ", probabilities = " + << core::CContainerPrinter::print(probabilities)); return false; } double logPExtreme = 0.0; - for (std::size_t m = 1u, i = maths::CTools::truncate(minExtremeSamples, m, n); i <= n; ++i) { + for (std::size_t m = 1u, i = maths::CTools::truncate(minExtremeSamples, m, n); + i <= n; ++i) { maths::CLogProbabilityOfMFromNExtremeSamples logPExtremeCalculator(i); addProbabilities(probabilities, logPExtremeCalculator); double logPi; @@ -146,14 +148,15 @@ bool CAnomalyScore::compute(double jointProbabilityWeight, // Sanity check the probability in the range [0, 1]. if (logPExtreme > 0.0) { - LOG_ERROR(<< "Invalid log extreme probability " << logPExtreme - << ", probabilities = " << core::CContainerPrinter::print(probabilities)); + LOG_ERROR(<< "Invalid log extreme probability " << logPExtreme << ", probabilities = " + << core::CContainerPrinter::print(probabilities)); return false; } double logMaximumAnomalousProbability = std::log(maximumAnomalousProbability); if (logPJoint > logMaximumAnomalousProbability && logPExtreme > logMaximumAnomalousProbability) { - overallProbability = std::exp(jointProbabilityWeight * logPJoint) * std::exp(extremeProbabilityWeight * logPExtreme); + overallProbability = std::exp(jointProbabilityWeight * logPJoint) * + std::exp(extremeProbabilityWeight * logPExtreme); return true; } @@ -161,40 +164,53 @@ bool CAnomalyScore::compute(double jointProbabilityWeight, // [e^-100000, 1]. static const double NORMAL_RANGE_SCORE_FRACTION = 0.8; - static const double LOG_SMALLEST_PROBABILITY = std::log(maths::CTools::smallestProbability()); - static const double SMALLEST_PROBABILITY_DEVIATION = probabilityToScore(maths::CTools::smallestProbability()); + static const double LOG_SMALLEST_PROBABILITY = + std::log(maths::CTools::smallestProbability()); + static const double SMALLEST_PROBABILITY_DEVIATION = + probabilityToScore(maths::CTools::smallestProbability()); static const double SMALLEST_LOG_JOINT_PROBABILTY = -100000.0; static const double SMALLEST_LOG_EXTREME_PROBABILTY = -1500.0; if (logPJoint < LOG_SMALLEST_PROBABILITY) { - double interpolate = - std::min((logPJoint - LOG_SMALLEST_PROBABILITY) / (SMALLEST_LOG_JOINT_PROBABILTY - LOG_SMALLEST_PROBABILITY), 1.0); - overallAnomalyScore = (NORMAL_RANGE_SCORE_FRACTION + (1.0 - NORMAL_RANGE_SCORE_FRACTION) * interpolate) * jointProbabilityWeight * - SMALLEST_PROBABILITY_DEVIATION; + double interpolate = std::min((logPJoint - LOG_SMALLEST_PROBABILITY) / + (SMALLEST_LOG_JOINT_PROBABILTY - LOG_SMALLEST_PROBABILITY), + 1.0); + overallAnomalyScore = (NORMAL_RANGE_SCORE_FRACTION + + (1.0 - NORMAL_RANGE_SCORE_FRACTION) * interpolate) * + jointProbabilityWeight * SMALLEST_PROBABILITY_DEVIATION; } else { - overallAnomalyScore = NORMAL_RANGE_SCORE_FRACTION * jointProbabilityWeight * probabilityToScore(std::exp(logPJoint)); + overallAnomalyScore = NORMAL_RANGE_SCORE_FRACTION * jointProbabilityWeight * + probabilityToScore(std::exp(logPJoint)); } if (logPExtreme < LOG_SMALLEST_PROBABILITY) { - double interpolate = - std::min((logPExtreme - LOG_SMALLEST_PROBABILITY) / (SMALLEST_LOG_EXTREME_PROBABILTY - LOG_SMALLEST_PROBABILITY), 1.0); - overallAnomalyScore += (NORMAL_RANGE_SCORE_FRACTION + (1.0 - NORMAL_RANGE_SCORE_FRACTION) * interpolate) * + double interpolate = std::min((logPExtreme - LOG_SMALLEST_PROBABILITY) / + (SMALLEST_LOG_EXTREME_PROBABILTY - LOG_SMALLEST_PROBABILITY), + 1.0); + overallAnomalyScore += (NORMAL_RANGE_SCORE_FRACTION + + (1.0 - NORMAL_RANGE_SCORE_FRACTION) * interpolate) * extremeProbabilityWeight * SMALLEST_PROBABILITY_DEVIATION; } else { - overallAnomalyScore += NORMAL_RANGE_SCORE_FRACTION * extremeProbabilityWeight * probabilityToScore(std::exp(logPExtreme)); + overallAnomalyScore += NORMAL_RANGE_SCORE_FRACTION * extremeProbabilityWeight * + probabilityToScore(std::exp(logPExtreme)); } // Invert the deviation in the region it is 1-to-1 otherwise // use the weighted harmonic mean. overallProbability = overallAnomalyScore > 0.0 - ? scoreToProbability(std::min(overallAnomalyScore / NORMAL_RANGE_SCORE_FRACTION, SMALLEST_PROBABILITY_DEVIATION)) - : std::exp(jointProbabilityWeight * logPJoint) * std::exp(extremeProbabilityWeight * logPExtreme); - - LOG_TRACE(<< "logJointProbability = " << logPJoint << ", jointProbabilityWeight = " << jointProbabilityWeight - << ", logExtremeProbability = " << logPExtreme << ", extremeProbabilityWeight = " << extremeProbabilityWeight - << ", overallProbability = " << overallProbability << ", overallAnomalyScore = " << overallAnomalyScore - << ", # probabilities = " << probabilities.size() << ", probabilities = " << core::CContainerPrinter::print(probabilities)); + ? scoreToProbability(std::min(overallAnomalyScore / NORMAL_RANGE_SCORE_FRACTION, + SMALLEST_PROBABILITY_DEVIATION)) + : std::exp(jointProbabilityWeight * logPJoint) * + std::exp(extremeProbabilityWeight * logPExtreme); + + LOG_TRACE(<< "logJointProbability = " << logPJoint << ", jointProbabilityWeight = " + << jointProbabilityWeight << ", logExtremeProbability = " << logPExtreme + << ", extremeProbabilityWeight = " << extremeProbabilityWeight + << ", overallProbability = " << overallProbability + << ", overallAnomalyScore = " << overallAnomalyScore + << ", # probabilities = " << probabilities.size() + << ", probabilities = " << core::CContainerPrinter::print(probabilities)); return true; } @@ -211,15 +227,13 @@ CAnomalyScore::CComputer::CComputer(double jointProbabilityWeight, m_MaximumAnomalousProbability(maximumAnomalousProbability) { } -bool CAnomalyScore::CComputer::operator()(const TDoubleVec& probabilities, double& overallAnomalyScore, double& overallProbability) const { - return CAnomalyScore::compute(m_JointProbabilityWeight, - m_ExtremeProbabilityWeight, - m_MinExtremeSamples, - m_MaxExtremeSamples, - m_MaximumAnomalousProbability, - probabilities, - overallAnomalyScore, - overallProbability); +bool CAnomalyScore::CComputer::operator()(const TDoubleVec& probabilities, + double& overallAnomalyScore, + double& overallProbability) const { + return CAnomalyScore::compute(m_JointProbabilityWeight, m_ExtremeProbabilityWeight, + m_MinExtremeSamples, m_MaxExtremeSamples, + m_MaximumAnomalousProbability, probabilities, + overallAnomalyScore, overallProbability); } CAnomalyScore::CNormalizer::CNormalizer(const CAnomalyDetectorModelConfig& config) @@ -232,9 +246,10 @@ CAnomalyScore::CNormalizer::CNormalizer(const CAnomalyDetectorModelConfig& confi m_BucketNormalizationFactor(config.bucketNormalizationFactor()), m_RawScoreQuantileSummary(201, config.decayRate()), m_RawScoreHighQuantileSummary(201, config.decayRate()), - m_DecayRate(config.decayRate() * std::max(static_cast(config.bucketLength()) / - static_cast(CAnomalyDetectorModelConfig::STANDARD_BUCKET_LENGTH), - 1.0)), + m_DecayRate(config.decayRate() * + std::max(static_cast(config.bucketLength()) / + static_cast(CAnomalyDetectorModelConfig::STANDARD_BUCKET_LENGTH), + 1.0)), m_TimeToQuantileDecay(QUANTILE_DECAY_TIME) { } @@ -279,7 +294,8 @@ bool CAnomalyScore::CNormalizer::normalize(double& score) const { static const double CONFIDENCE_INTERVAL = 70.0; - double normalizedScores[] = {m_MaximumNormalizedScore, m_MaximumNormalizedScore, m_MaximumNormalizedScore, m_MaximumNormalizedScore}; + double normalizedScores[] = {m_MaximumNormalizedScore, m_MaximumNormalizedScore, + m_MaximumNormalizedScore, m_MaximumNormalizedScore}; uint32_t discreteScore = this->discreteScore(score); @@ -322,18 +338,23 @@ bool CAnomalyScore::CNormalizer::normalize(double& score) const { // to the score. uint32_t noiseScore; m_RawScoreQuantileSummary.quantile(m_NoisePercentile / 100.0, noiseScore); - TDoubleDoublePrVecCItr knotPoint = - std::lower_bound(m_NormalizedScoreKnotPoints.begin(), m_NormalizedScoreKnotPoints.end(), TDoubleDoublePr(m_NoisePercentile, 0.0)); + TDoubleDoublePrVecCItr knotPoint = std::lower_bound( + m_NormalizedScoreKnotPoints.begin(), m_NormalizedScoreKnotPoints.end(), + TDoubleDoublePr(m_NoisePercentile, 0.0)); double signalStrength = - m_NoiseMultiplier * 10.0 / DISCRETIZATION_FACTOR * (static_cast(discreteScore) - static_cast(noiseScore)); + m_NoiseMultiplier * 10.0 / DISCRETIZATION_FACTOR * + (static_cast(discreteScore) - static_cast(noiseScore)); double l0; double u0; m_RawScoreQuantileSummary.cdf(0, 0.0, l0, u0); - normalizedScores[0] = knotPoint->second * std::max(1.0 + signalStrength, 0.0) + - m_MaximumNormalizedScore * std::max(2.0 * std::min(50.0 * (l0 + u0) / m_NoisePercentile, 1.0) - 1.0, 0.0); - LOG_TRACE(<< "normalizedScores[0] = " << normalizedScores[0] << ", knotPoint = " << knotPoint->second - << ", discreteScore = " << discreteScore << ", noiseScore = " << noiseScore << ", l(0) = " << l0 << ", u(0) = " << u0 - << ", signalStrength = " << signalStrength); + normalizedScores[0] = + knotPoint->second * std::max(1.0 + signalStrength, 0.0) + + m_MaximumNormalizedScore * + std::max(2.0 * std::min(50.0 * (l0 + u0) / m_NoisePercentile, 1.0) - 1.0, 0.0); + LOG_TRACE(<< "normalizedScores[0] = " << normalizedScores[0] + << ", knotPoint = " << knotPoint->second << ", discreteScore = " << discreteScore + << ", noiseScore = " << noiseScore << ", l(0) = " << l0 + << ", u(0) = " << u0 << ", signalStrength = " << signalStrength); // Compute the raw normalized score. Note we compute the probability // of seeing a lower score on the normal bucket length and convert @@ -351,21 +372,25 @@ bool CAnomalyScore::CNormalizer::normalize(double& score) const { lowerPercentile = maths::CTools::truncate(lowerPercentile, 0.0, 100.0); upperPercentile = maths::CTools::truncate(upperPercentile, 0.0, 100.0); - std::size_t lowerKnotPoint = std::max( - std::lower_bound( - m_NormalizedScoreKnotPoints.begin(), m_NormalizedScoreKnotPoints.end(), lowerPercentile, maths::COrderings::SFirstLess()) - - m_NormalizedScoreKnotPoints.begin(), - ptrdiff_t(1)); - std::size_t upperKnotPoint = std::max( - std::lower_bound( - m_NormalizedScoreKnotPoints.begin(), m_NormalizedScoreKnotPoints.end(), upperPercentile, maths::COrderings::SFirstLess()) - - m_NormalizedScoreKnotPoints.begin(), - ptrdiff_t(1)); + std::size_t lowerKnotPoint = + std::max(std::lower_bound(m_NormalizedScoreKnotPoints.begin(), + m_NormalizedScoreKnotPoints.end(), lowerPercentile, + maths::COrderings::SFirstLess()) - + m_NormalizedScoreKnotPoints.begin(), + ptrdiff_t(1)); + std::size_t upperKnotPoint = + std::max(std::lower_bound(m_NormalizedScoreKnotPoints.begin(), + m_NormalizedScoreKnotPoints.end(), upperPercentile, + maths::COrderings::SFirstLess()) - + m_NormalizedScoreKnotPoints.begin(), + ptrdiff_t(1)); if (lowerKnotPoint < m_NormalizedScoreKnotPoints.size()) { const TDoubleDoublePr& left = m_NormalizedScoreKnotPoints[lowerKnotPoint - 1]; const TDoubleDoublePr& right = m_NormalizedScoreKnotPoints[lowerKnotPoint]; // Linearly interpolate between the two knot points. - normalizedScores[1] = left.second + (right.second - left.second) * (lowerPercentile - left.first) / (right.first - left.first); + normalizedScores[1] = left.second + (right.second - left.second) * + (lowerPercentile - left.first) / + (right.first - left.first); } else { normalizedScores[1] = m_MaximumNormalizedScore; } @@ -374,36 +399,48 @@ bool CAnomalyScore::CNormalizer::normalize(double& score) const { const TDoubleDoublePr& right = m_NormalizedScoreKnotPoints[upperKnotPoint]; // Linearly interpolate between the two knot points. normalizedScores[1] = (normalizedScores[1] + left.second + - (right.second - left.second) * (upperPercentile - left.first) / (right.first - left.first)) / + (right.second - left.second) * (upperPercentile - left.first) / + (right.first - left.first)) / 2.0; } else { normalizedScores[1] = (normalizedScores[1] + m_MaximumNormalizedScore) / 2.0; } - LOG_TRACE(<< "normalizedScores[1] = " << normalizedScores[1] << ", lowerBound = " << lowerBound << ", upperBound = " << upperBound - << ", lowerPercentile = " << lowerPercentile << ", upperPercentile = " << upperPercentile); + LOG_TRACE(<< "normalizedScores[1] = " << normalizedScores[1] << ", lowerBound = " << lowerBound + << ", upperBound = " << upperBound << ", lowerPercentile = " << lowerPercentile + << ", upperPercentile = " << upperPercentile); // Compute the maximum score ceiling. double ratio = score / m_MaxScore[0]; double curves[] = {0.0 + 1.5 * ratio, 0.5 + 0.5 * ratio}; - normalizedScores[2] = m_MaximumNormalizedScore * (*std::min_element(curves, curves + 2)); - LOG_TRACE(<< "normalizedScores[2] = " << normalizedScores[2] << ", score = " << score << ", maxScore = " << m_MaxScore[0]); + normalizedScores[2] = m_MaximumNormalizedScore * + (*std::min_element(curves, curves + 2)); + LOG_TRACE(<< "normalizedScores[2] = " << normalizedScores[2] + << ", score = " << score << ", maxScore = " << m_MaxScore[0]); // Logarithmically interpolate the maximum score between the // largest significant and small probability. - static const double M = (probabilityToScore(maths::SMALL_PROBABILITY) - probabilityToScore(maths::LARGEST_SIGNIFICANT_PROBABILITY)) / - (std::log(maths::SMALL_PROBABILITY) - std::log(maths::LARGEST_SIGNIFICANT_PROBABILITY)); + static const double M = (probabilityToScore(maths::SMALL_PROBABILITY) - + probabilityToScore(maths::LARGEST_SIGNIFICANT_PROBABILITY)) / + (std::log(maths::SMALL_PROBABILITY) - + std::log(maths::LARGEST_SIGNIFICANT_PROBABILITY)); static const double C = std::log(maths::LARGEST_SIGNIFICANT_PROBABILITY); - normalizedScores[3] = m_MaximumNormalizedScore * (0.95 * M * (std::log(scoreToProbability(score)) - C) + 0.05); + normalizedScores[3] = m_MaximumNormalizedScore * + (0.95 * M * (std::log(scoreToProbability(score)) - C) + 0.05); LOG_TRACE(<< "normalizedScores[3] = " << normalizedScores[3] << ", score = " << score << ", probability = " << scoreToProbability(score)); - score = std::min(*std::min_element(boost::begin(normalizedScores), boost::end(normalizedScores)), m_MaximumNormalizedScore); + score = std::min(*std::min_element(boost::begin(normalizedScores), + boost::end(normalizedScores)), + m_MaximumNormalizedScore); LOG_TRACE(<< "normalizedScore = " << score); return true; } -void CAnomalyScore::CNormalizer::quantile(double score, double confidence, double& lowerBound, double& upperBound) const { +void CAnomalyScore::CNormalizer::quantile(double score, + double confidence, + double& lowerBound, + double& upperBound) const { uint32_t discreteScore = this->discreteScore(score); double n = static_cast(m_RawScoreQuantileSummary.n()); double lowerQuantile = (100.0 - confidence) / 200.0; @@ -425,8 +462,10 @@ void CAnomalyScore::CNormalizer::quantile(double score, double confidence, doubl m_RawScoreQuantileSummary.pdf(discreteScore, 0.0, pdfLowerBound, pdfUpperBound); lowerBound = maths::CTools::truncate(lowerBound - pdfUpperBound, 0.0, fl); upperBound = maths::CTools::truncate(upperBound - pdfLowerBound, 0.0, fu); - if (!(lowerBound >= 0.0 && lowerBound <= 1.0) || !(upperBound >= 0.0 && upperBound <= 1.0)) { - LOG_ERROR(<< "score = " << score << ", cdf = [" << lowerBound << "," << upperBound << "]" + if (!(lowerBound >= 0.0 && lowerBound <= 1.0) || + !(upperBound >= 0.0 && upperBound <= 1.0)) { + LOG_ERROR(<< "score = " << score << ", cdf = [" << lowerBound << "," + << upperBound << "]" << ", pdf = [" << pdfLowerBound << "," << pdfUpperBound << "]"); } lowerBound = maths::CQDigest::cdfQuantile(n, lowerBound, lowerQuantile); @@ -448,16 +487,22 @@ void CAnomalyScore::CNormalizer::quantile(double score, double confidence, doubl double cutoffCdfLowerBound; double cutoffCdfUpperBound; - m_RawScoreHighQuantileSummary.cdf(m_HighPercentileScore, 0.0, cutoffCdfLowerBound, cutoffCdfUpperBound); + m_RawScoreHighQuantileSummary.cdf(m_HighPercentileScore, 0.0, + cutoffCdfLowerBound, cutoffCdfUpperBound); double pdfLowerBound; double pdfUpperBound; m_RawScoreHighQuantileSummary.pdf(discreteScore, 0.0, pdfLowerBound, pdfUpperBound); - lowerBound = fl + (1.0 - fl) * std::max(lowerBound - cutoffCdfUpperBound - pdfUpperBound, 0.0) / - std::max(1.0 - cutoffCdfUpperBound, std::numeric_limits::epsilon()); - upperBound = fu + (1.0 - fu) * std::max(upperBound - cutoffCdfLowerBound - pdfLowerBound, 0.0) / - std::max(1.0 - cutoffCdfLowerBound, std::numeric_limits::epsilon()); - if (!(lowerBound >= 0.0 && lowerBound <= 1.0) || !(upperBound >= 0.0 && upperBound <= 1.0)) { + lowerBound = fl + (1.0 - fl) * + std::max(lowerBound - cutoffCdfUpperBound - pdfUpperBound, 0.0) / + std::max(1.0 - cutoffCdfUpperBound, + std::numeric_limits::epsilon()); + upperBound = fu + (1.0 - fu) * + std::max(upperBound - cutoffCdfLowerBound - pdfLowerBound, 0.0) / + std::max(1.0 - cutoffCdfLowerBound, + std::numeric_limits::epsilon()); + if (!(lowerBound >= 0.0 && lowerBound <= 1.0) || + !(upperBound >= 0.0 && upperBound <= 1.0)) { LOG_ERROR(<< "score = " << score << ", cdf = [" << lowerBound << "," << upperBound << "]" << ", cutoff = [" << cutoffCdfLowerBound << "," << cutoffCdfUpperBound << "]" << ", pdf = [" << pdfLowerBound << "," << pdfUpperBound << "]" @@ -485,10 +530,12 @@ bool CAnomalyScore::CNormalizer::updateQuantiles(double score) { m_MaxScore.add(score); if (m_MaxScore[0] > BIG_CHANGE_FACTOR * oldMaxScore) { bigChange = true; - LOG_DEBUG(<< "Big change in normalizer - max score updated from " << oldMaxScore << " to " << m_MaxScore[0]); + LOG_DEBUG(<< "Big change in normalizer - max score updated from " + << oldMaxScore << " to " << m_MaxScore[0]); } uint32_t discreteScore = this->discreteScore(score); - LOG_TRACE(<< "score = " << score << ", discreteScore = " << discreteScore << ", maxScore = " << m_MaxScore[0]); + LOG_TRACE(<< "score = " << score << ", discreteScore = " << discreteScore + << ", maxScore = " << m_MaxScore[0]); uint64_t n = m_RawScoreQuantileSummary.n(); uint64_t k = m_RawScoreQuantileSummary.k(); @@ -504,9 +551,11 @@ bool CAnomalyScore::CNormalizer::updateQuantiles(double score) { TUInt32UInt64PrVec L; m_RawScoreQuantileSummary.summary(L); if (L.empty()) { - LOG_ERROR(<< "High quantile summary is empty: " << m_RawScoreQuantileSummary.print()); + LOG_ERROR(<< "High quantile summary is empty: " + << m_RawScoreQuantileSummary.print()); } else { - uint64_t highPercentileCount = static_cast((HIGH_PERCENTILE / 100.0) * static_cast(n) + 0.5); + uint64_t highPercentileCount = static_cast( + (HIGH_PERCENTILE / 100.0) * static_cast(n) + 0.5); // Estimate the high percentile score and update the count. std::size_t i = 1u; @@ -525,8 +574,11 @@ bool CAnomalyScore::CNormalizer::updateQuantiles(double score) { LOG_ERROR(<< "L " << core::CContainerPrinter::print(L)); m_HighPercentileCount = n; } - LOG_TRACE(<< "s(H) = " << m_HighPercentileScore << ", c(H) = " << m_HighPercentileCount - << ", percentile = " << 100.0 * static_cast(m_HighPercentileCount) / static_cast(n) << "%" + LOG_TRACE(<< "s(H) = " << m_HighPercentileScore + << ", c(H) = " << m_HighPercentileCount << ", percentile = " + << 100.0 * static_cast(m_HighPercentileCount) / + static_cast(n) + << "%" << ", desired c(H) = " << highPercentileCount); // Populate the high quantile summary. @@ -547,13 +599,15 @@ bool CAnomalyScore::CNormalizer::updateQuantiles(double score) { } else { m_RawScoreHighQuantileSummary.add(discreteScore); } - LOG_TRACE(<< "percentile = " << static_cast(m_HighPercentileCount) / static_cast(n + 1)); + LOG_TRACE(<< "percentile = " + << static_cast(m_HighPercentileCount) / static_cast(n + 1)); // Periodically refresh the high percentile score. if ((n + 1) > k && (n + 1) % k == 0) { LOG_TRACE(<< "Refreshing high quantile summary"); - uint64_t highPercentileCount = static_cast((HIGH_PERCENTILE / 100.0) * static_cast(n + 1) + 0.5); + uint64_t highPercentileCount = static_cast( + (HIGH_PERCENTILE / 100.0) * static_cast(n + 1) + 0.5); LOG_TRACE(<< "s(H) = " << m_HighPercentileScore << ", c(H) = " << m_HighPercentileCount << ", desired c(H) = " << highPercentileCount); @@ -564,22 +618,29 @@ bool CAnomalyScore::CNormalizer::updateQuantiles(double score) { TUInt32UInt64PrVec H; m_RawScoreHighQuantileSummary.summary(H); - std::size_t i0 = - std::min(static_cast( - std::lower_bound(L.begin(), L.end(), highPercentileCount, maths::COrderings::SSecondLess()) - L.begin()), - L.size() - 1); - std::size_t j = - std::min(static_cast(std::upper_bound(H.begin(), H.end(), L[i0], maths::COrderings::SFirstLess()) - H.begin()), - H.size() - 1); + std::size_t i0 = std::min( + static_cast(std::lower_bound(L.begin(), L.end(), highPercentileCount, + maths::COrderings::SSecondLess()) - + L.begin()), + L.size() - 1); + std::size_t j = std::min( + static_cast(std::upper_bound(H.begin(), H.end(), L[i0], + maths::COrderings::SFirstLess()) - + H.begin()), + H.size() - 1); uint64_t r = L[i0].second; - for (std::size_t i = i0 + 1; i < L.size() && L[i0].second + m_RawScoreHighQuantileSummary.n() < n + 1; ++i) { + for (std::size_t i = i0 + 1; + i < L.size() && L[i0].second + m_RawScoreHighQuantileSummary.n() < n + 1; + ++i) { for (/**/; j < H.size() && H[j].first <= L[i].first; ++j) { - r += (H[j].second - (j == 0 ? static_cast(0) : H[j - 1].second)); + r += (H[j].second - + (j == 0 ? static_cast(0) : H[j - 1].second)); } uint32_t x = L[i].first; - uint64_t m = r < L[i].second ? L[i].second - r : static_cast(0); + uint64_t m = r < L[i].second ? L[i].second - r + : static_cast(0); r += m; if (m > 0) { LOG_TRACE(<< "Adding (" << x << ',' << m << ") to H"); @@ -596,13 +657,17 @@ bool CAnomalyScore::CNormalizer::updateQuantiles(double score) { m_HighPercentileCount = n; } - LOG_TRACE(<< "s(H) = " << m_HighPercentileScore << ", c(H) = " << m_HighPercentileCount - << ", percentile = " << 100.0 * static_cast(m_HighPercentileCount) / static_cast(n + 1) << "%"); + LOG_TRACE(<< "s(H) = " << m_HighPercentileScore + << ", c(H) = " << m_HighPercentileCount << ", percentile = " + << 100.0 * static_cast(m_HighPercentileCount) / + static_cast(n + 1) + << "%"); } else { m_RawScoreQuantileSummary.quantile(HIGH_PERCENTILE / 100.0, m_HighPercentileScore); double lowerBound, upperBound; m_RawScoreQuantileSummary.cdf(m_HighPercentileScore, 0.0, lowerBound, upperBound); - m_HighPercentileCount = static_cast(static_cast(n + 1) * lowerBound + 0.5); + m_HighPercentileCount = + static_cast(static_cast(n + 1) * lowerBound + 0.5); LOG_TRACE(<< "s(H) = " << m_HighPercentileScore << ", c(H) = " << m_HighPercentileCount << ", percentile = " << 100.0 * lowerBound << "%"); @@ -632,22 +697,28 @@ void CAnomalyScore::CNormalizer::propagateForwardByTime(double time) { m_RawScoreQuantileSummary.propagateForwardsByTime(time); m_RawScoreHighQuantileSummary.propagateForwardsByTime(time); if (n > 0) { - m_HighPercentileCount = static_cast(static_cast(m_RawScoreQuantileSummary.n()) / static_cast(n) * - static_cast(m_HighPercentileCount) + - 0.5); + m_HighPercentileCount = static_cast( + static_cast(m_RawScoreQuantileSummary.n()) / + static_cast(n) * static_cast(m_HighPercentileCount) + + 0.5); } - m_TimeToQuantileDecay += QUANTILE_DECAY_TIME + std::floor(-m_TimeToQuantileDecay / QUANTILE_DECAY_TIME); + m_TimeToQuantileDecay += QUANTILE_DECAY_TIME + + std::floor(-m_TimeToQuantileDecay / QUANTILE_DECAY_TIME); } } -bool CAnomalyScore::CNormalizer::isUpgradable(const std::string& fromVersion, const std::string& toVersion) { +bool CAnomalyScore::CNormalizer::isUpgradable(const std::string& fromVersion, + const std::string& toVersion) { // Any changes to this method need to be reflected in the upgrade() method // below to prevent an inconsistency where this method says an upgrade is // possible but the upgrade() method can't do it. - return (fromVersion == "1" && toVersion == "2") || (fromVersion == "1" && toVersion == "3") || (fromVersion == "2" && toVersion == "3"); + return (fromVersion == "1" && toVersion == "2") || + (fromVersion == "1" && toVersion == "3") || + (fromVersion == "2" && toVersion == "3"); } -bool CAnomalyScore::CNormalizer::upgrade(const std::string& loadedVersion, const std::string& currentVersion) { +bool CAnomalyScore::CNormalizer::upgrade(const std::string& loadedVersion, + const std::string& currentVersion) { if (loadedVersion == currentVersion) { // No upgrade required. return true; @@ -659,20 +730,25 @@ bool CAnomalyScore::CNormalizer::upgrade(const std::string& loadedVersion, const {1.0 / 0.3, 1.0, 1.0}, {1.0 / 0.3, 1.0, 1.0}, }; - static const double Q_DIGEST_UPGRADE_FACTOR[][3] = {{1.0, 3.0, 30.0}, {1.0 / 3.0, 1.0, 10.0}, {1.0 / 30.0, 1.0 / 10.0, 1.0}}; + static const double Q_DIGEST_UPGRADE_FACTOR[][3] = { + {1.0, 3.0, 30.0}, {1.0 / 3.0, 1.0, 10.0}, {1.0 / 30.0, 1.0 / 10.0, 1.0}}; std::size_t i, j; - if (!core::CStringUtils::stringToType(loadedVersion, i) || !core::CStringUtils::stringToType(currentVersion, j) || - i - 1 >= boost::size(HIGH_SCORE_UPGRADE_FACTOR) || j - 1 >= boost::size(HIGH_SCORE_UPGRADE_FACTOR[0])) { - LOG_ERROR(<< "Don't know how to upgrade quantiles from version " << loadedVersion << " to version " << currentVersion); + if (!core::CStringUtils::stringToType(loadedVersion, i) || + !core::CStringUtils::stringToType(currentVersion, j) || + i - 1 >= boost::size(HIGH_SCORE_UPGRADE_FACTOR) || + j - 1 >= boost::size(HIGH_SCORE_UPGRADE_FACTOR[0])) { + LOG_ERROR(<< "Don't know how to upgrade quantiles from version " + << loadedVersion << " to version " << currentVersion); return false; } double highScoreUpgradeFactor = HIGH_SCORE_UPGRADE_FACTOR[i - 1][j - 1]; double qDigestUpgradeFactor = Q_DIGEST_UPGRADE_FACTOR[i - 1][j - 1]; - LOG_INFO(<< "Upgrading quantiles from version " << loadedVersion << " to version " << currentVersion - << " - will scale highest score by " << highScoreUpgradeFactor << " and Q digest min/max values by " << qDigestUpgradeFactor); + LOG_INFO(<< "Upgrading quantiles from version " << loadedVersion << " to version " + << currentVersion << " - will scale highest score by " << highScoreUpgradeFactor + << " and Q digest min/max values by " << qDigestUpgradeFactor); // For the maximum score aging is equivalent to scaling. m_MaxScore.age(highScoreUpgradeFactor); @@ -703,9 +779,12 @@ void CAnomalyScore::CNormalizer::acceptPersistInserter(core::CStatePersistInsert inserter.insertValue(HIGH_PERCENTILE_SCORE_TAG, m_HighPercentileScore); inserter.insertValue(HIGH_PERCENTILE_COUNT_TAG, m_HighPercentileCount); inserter.insertValue(MAX_SCORE_TAG, m_MaxScore.toDelimited()); - inserter.insertLevel(RAW_SCORE_QUANTILE_SUMMARY, boost::bind(&maths::CQDigest::acceptPersistInserter, &m_RawScoreQuantileSummary, _1)); + inserter.insertLevel(RAW_SCORE_QUANTILE_SUMMARY, + boost::bind(&maths::CQDigest::acceptPersistInserter, + &m_RawScoreQuantileSummary, _1)); inserter.insertLevel(RAW_SCORE_HIGH_QUANTILE_SUMMARY, - boost::bind(&maths::CQDigest::acceptPersistInserter, &m_RawScoreHighQuantileSummary, _1)); + boost::bind(&maths::CQDigest::acceptPersistInserter, + &m_RawScoreHighQuantileSummary, _1)); inserter.insertValue(TIME_TO_QUANTILE_DECAY_TAG, m_TimeToQuantileDecay); } @@ -717,14 +796,17 @@ bool CAnomalyScore::CNormalizer::acceptRestoreTraverser(core::CStateRestoreTrave // This used to be 64 bit but is now 32 bit, so may need adjusting // on restoration uint64_t highPercentileScore64(0); - if (core::CStringUtils::stringToType(traverser.value(), highPercentileScore64) == false) { + if (core::CStringUtils::stringToType(traverser.value(), + highPercentileScore64) == false) { LOG_ERROR(<< "Invalid high percentile score in " << traverser.value()); return false; } - m_HighPercentileScore = - static_cast(std::min(highPercentileScore64, static_cast(std::numeric_limits::max()))); + m_HighPercentileScore = static_cast(std::min( + highPercentileScore64, + static_cast(std::numeric_limits::max()))); } else if (name == HIGH_PERCENTILE_COUNT_TAG) { - if (core::CStringUtils::stringToType(traverser.value(), m_HighPercentileCount) == false) { + if (core::CStringUtils::stringToType(traverser.value(), + m_HighPercentileCount) == false) { LOG_ERROR(<< "Invalid high percentile count in " << traverser.value()); return false; } @@ -734,15 +816,19 @@ bool CAnomalyScore::CNormalizer::acceptRestoreTraverser(core::CStateRestoreTrave return false; } } else if (name == RAW_SCORE_QUANTILE_SUMMARY) { - if (traverser.traverseSubLevel(boost::bind(&maths::CQDigest::acceptRestoreTraverser, &m_RawScoreQuantileSummary, _1)) == - false) { - LOG_ERROR(<< "Invalid raw score quantile summary in " << traverser.value()); + if (traverser.traverseSubLevel( + boost::bind(&maths::CQDigest::acceptRestoreTraverser, + &m_RawScoreQuantileSummary, _1)) == false) { + LOG_ERROR(<< "Invalid raw score quantile summary in " + << traverser.value()); return false; } } else if (name == RAW_SCORE_HIGH_QUANTILE_SUMMARY) { - if (traverser.traverseSubLevel(boost::bind(&maths::CQDigest::acceptRestoreTraverser, &m_RawScoreHighQuantileSummary, _1)) == - false) { - LOG_ERROR(<< "Invalid raw score high quantile summary in " << traverser.value()); + if (traverser.traverseSubLevel( + boost::bind(&maths::CQDigest::acceptRestoreTraverser, + &m_RawScoreHighQuantileSummary, _1)) == false) { + LOG_ERROR(<< "Invalid raw score high quantile summary in " + << traverser.value()); return false; } } @@ -807,7 +893,8 @@ bool CAnomalyScore::normalizerFromJson(const std::string& json, CNormalizer& nor return normalizerFromJson(traverser, normalizer); } -bool CAnomalyScore::normalizerFromJson(core::CStateRestoreTraverser& traverser, CNormalizer& normalizer) { +bool CAnomalyScore::normalizerFromJson(core::CStateRestoreTraverser& traverser, + CNormalizer& normalizer) { bool restoredNormalizer(false); std::string restoredVersion(MISSING_VERSION_FORMAT_VERSION); @@ -818,20 +905,22 @@ bool CAnomalyScore::normalizerFromJson(core::CStateRestoreTraverser& traverser, restoredVersion = traverser.value(); if (restoredVersion != CURRENT_FORMAT_VERSION) { if (normalizer.isUpgradable(restoredVersion, CURRENT_FORMAT_VERSION)) { - LOG_DEBUG(<< "Restored quantiles JSON version is " << restoredVersion << "; current JSON version is " - << CURRENT_FORMAT_VERSION << " - will upgrade quantiles"); + LOG_DEBUG(<< "Restored quantiles JSON version is " << restoredVersion + << "; current JSON version is " << CURRENT_FORMAT_VERSION + << " - will upgrade quantiles"); } else { // If the version has changed and the format is too different to // even upgrade then start again from scratch - this counts as a // successful load - LOG_INFO(<< "Restored quantiles JSON version is " << restoredVersion << "; current JSON version is " - << CURRENT_FORMAT_VERSION << " - will restart quantiles from scratch"); + LOG_INFO(<< "Restored quantiles JSON version is " << restoredVersion + << "; current JSON version is " << CURRENT_FORMAT_VERSION + << " - will restart quantiles from scratch"); return true; } } } else if (name == NORMALIZER_TAG) { - restoredNormalizer = - traverser.traverseSubLevel(boost::bind(&CAnomalyScore::CNormalizer::acceptRestoreTraverser, &normalizer, _1)); + restoredNormalizer = traverser.traverseSubLevel(boost::bind( + &CAnomalyScore::CNormalizer::acceptRestoreTraverser, &normalizer, _1)); if (!restoredNormalizer) { LOG_ERROR(<< "Unable to restore quantiles to the normaliser"); } @@ -839,11 +928,12 @@ bool CAnomalyScore::normalizerFromJson(core::CStateRestoreTraverser& traverser, } if (restoredNormalizer && restoredVersion != CURRENT_FORMAT_VERSION) { - LOG_INFO(<< "Restored quantiles JSON version is " << restoredVersion << "; current JSON version is " << CURRENT_FORMAT_VERSION - << " - will attempt upgrade"); + LOG_INFO(<< "Restored quantiles JSON version is " << restoredVersion << "; current JSON version is " + << CURRENT_FORMAT_VERSION << " - will attempt upgrade"); if (normalizer.upgrade(restoredVersion, CURRENT_FORMAT_VERSION) == false) { - LOG_ERROR(<< "Failed to upgrade quantiles from version " << restoredVersion << " to version " << CURRENT_FORMAT_VERSION); + LOG_ERROR(<< "Failed to upgrade quantiles from version " << restoredVersion + << " to version " << CURRENT_FORMAT_VERSION); return false; } } @@ -872,7 +962,8 @@ void CAnomalyScore::normalizerToJson(const CNormalizer& normalizer, inserter.insertValue(MLVERSION_ATTRIBUTE, CURRENT_FORMAT_VERSION); inserter.insertValue(TIME_ATTRIBUTE, core::CStringUtils::typeToString(time)); - inserter.insertLevel(NORMALIZER_TAG, boost::bind(&CNormalizer::acceptPersistInserter, &normalizer, _1)); + inserter.insertLevel(NORMALIZER_TAG, boost::bind(&CNormalizer::acceptPersistInserter, + &normalizer, _1)); } json = ss.str(); diff --git a/lib/model/CBucketGatherer.cc b/lib/model/CBucketGatherer.cc index 2f5a2ce0b3..dd7d0ccf0e 100644 --- a/lib/model/CBucketGatherer.cc +++ b/lib/model/CBucketGatherer.cc @@ -41,7 +41,8 @@ namespace detail { using TSizeSizePr = std::pair; using TSizeSizePrUInt64Pr = std::pair; using TSizeSizePrStoredStringPtrPrUInt64UMap = CBucketGatherer::TSizeSizePrStoredStringPtrPrUInt64UMap; -using TSizeSizePrStoredStringPtrPrUInt64UMapCItr = CBucketGatherer::TSizeSizePrStoredStringPtrPrUInt64UMapCItr; +using TSizeSizePrStoredStringPtrPrUInt64UMapCItr = + CBucketGatherer::TSizeSizePrStoredStringPtrPrUInt64UMapCItr; const std::string PERSON_ATTRIBUTE_COUNT_TAG("f"); const std::string PERSON_UID_TAG("a"); @@ -52,14 +53,17 @@ const std::string INFLUENCE_ITEM_TAG("a"); const std::string INFLUENCE_COUNT_TAG("b"); //! Persist a person, attribute and count tuple. -void insertPersonAttributeCounts(const TSizeSizePrUInt64Pr& tuple, core::CStatePersistInserter& inserter) { +void insertPersonAttributeCounts(const TSizeSizePrUInt64Pr& tuple, + core::CStatePersistInserter& inserter) { inserter.insertValue(PERSON_UID_TAG, CDataGatherer::extractPersonId(tuple)); inserter.insertValue(ATTRIBUTE_UID_TAG, CDataGatherer::extractAttributeId(tuple)); inserter.insertValue(COUNT_TAG, CDataGatherer::extractData(tuple)); } //! Restore a person, attribute and count. -bool restorePersonAttributeCounts(core::CStateRestoreTraverser& traverser, TSizeSizePr& key, uint64_t& count) { +bool restorePersonAttributeCounts(core::CStateRestoreTraverser& traverser, + TSizeSizePr& key, + uint64_t& count) { do { const std::string& name = traverser.name(); RESTORE_BUILT_IN(PERSON_UID_TAG, key.first) @@ -70,28 +74,35 @@ bool restorePersonAttributeCounts(core::CStateRestoreTraverser& traverser, TSize } //! Persist a collection of influencer person and attribute counts. -void insertInfluencerPersonAttributeCounts(const TSizeSizePrStoredStringPtrPrUInt64UMap& map, core::CStatePersistInserter& inserter) { +void insertInfluencerPersonAttributeCounts(const TSizeSizePrStoredStringPtrPrUInt64UMap& map, + core::CStatePersistInserter& inserter) { std::vector ordered; ordered.reserve(map.size()); for (auto i = map.begin(); i != map.end(); ++i) { ordered.push_back(i); } - std::sort( - ordered.begin(), ordered.end(), [](TSizeSizePrStoredStringPtrPrUInt64UMapCItr lhs, TSizeSizePrStoredStringPtrPrUInt64UMapCItr rhs) { - return maths::COrderings::lexicographical_compare( - lhs->first.first, *lhs->first.second, lhs->second, rhs->first.first, *rhs->first.second, rhs->second); - }); + std::sort(ordered.begin(), ordered.end(), + [](TSizeSizePrStoredStringPtrPrUInt64UMapCItr lhs, + TSizeSizePrStoredStringPtrPrUInt64UMapCItr rhs) { + return maths::COrderings::lexicographical_compare( + lhs->first.first, *lhs->first.second, lhs->second, + rhs->first.first, *rhs->first.second, rhs->second); + }); for (std::size_t i = 0u; i < ordered.size(); ++i) { - inserter.insertValue(PERSON_UID_TAG, CDataGatherer::extractPersonId(ordered[i]->first)); - inserter.insertValue(ATTRIBUTE_UID_TAG, CDataGatherer::extractAttributeId(ordered[i]->first)); - inserter.insertValue(INFLUENCER_TAG, *CDataGatherer::extractData(ordered[i]->first)); + inserter.insertValue(PERSON_UID_TAG, + CDataGatherer::extractPersonId(ordered[i]->first)); + inserter.insertValue(ATTRIBUTE_UID_TAG, + CDataGatherer::extractAttributeId(ordered[i]->first)); + inserter.insertValue(INFLUENCER_TAG, + *CDataGatherer::extractData(ordered[i]->first)); inserter.insertValue(COUNT_TAG, ordered[i]->second); } } //! Restore a collection of influencer person and attribute counts. -bool restoreInfluencerPersonAttributeCounts(core::CStateRestoreTraverser& traverser, TSizeSizePrStoredStringPtrPrUInt64UMap& map) { +bool restoreInfluencerPersonAttributeCounts(core::CStateRestoreTraverser& traverser, + TSizeSizePrStoredStringPtrPrUInt64UMap& map) { std::size_t person = 0; std::size_t attribute = 0; std::string influence = ""; @@ -116,25 +127,30 @@ bool restoreInfluencerPersonAttributeCounts(core::CStateRestoreTraverser& traver struct SBucketCountsPersister { using TSizeSizePrUInt64UMap = CBucketGatherer::TSizeSizePrUInt64UMap; - void operator()(const TSizeSizePrUInt64UMap& bucketCounts, core::CStatePersistInserter& inserter) { + void operator()(const TSizeSizePrUInt64UMap& bucketCounts, + core::CStatePersistInserter& inserter) { CBucketGatherer::TSizeSizePrUInt64PrVec personAttributeCounts; personAttributeCounts.reserve(bucketCounts.size()); personAttributeCounts.assign(bucketCounts.begin(), bucketCounts.end()); std::sort(personAttributeCounts.begin(), personAttributeCounts.end()); for (std::size_t i = 0; i < personAttributeCounts.size(); ++i) { inserter.insertLevel(PERSON_ATTRIBUTE_COUNT_TAG, - boost::bind(&insertPersonAttributeCounts, boost::cref(personAttributeCounts[i]), _1)); + boost::bind(&insertPersonAttributeCounts, + boost::cref(personAttributeCounts[i]), _1)); } } - bool operator()(TSizeSizePrUInt64UMap& bucketCounts, core::CStateRestoreTraverser& traverser) { + bool operator()(TSizeSizePrUInt64UMap& bucketCounts, + core::CStateRestoreTraverser& traverser) { do { TSizeSizePr key; uint64_t count; if (!traverser.hasSubLevel()) { continue; } - if (traverser.traverseSubLevel(boost::bind(&restorePersonAttributeCounts, _1, boost::ref(key), boost::ref(count))) == false) { + if (traverser.traverseSubLevel( + boost::bind(&restorePersonAttributeCounts, _1, + boost::ref(key), boost::ref(count))) == false) { LOG_ERROR(<< "Invalid person attribute count"); continue; } @@ -146,24 +162,29 @@ struct SBucketCountsPersister { //! \brief Manages persistence influencer bucket counts. struct SInfluencerCountsPersister { - using TSizeSizePrStoredStringPtrPrUInt64UMapVec = CBucketGatherer::TSizeSizePrStoredStringPtrPrUInt64UMapVec; + using TSizeSizePrStoredStringPtrPrUInt64UMapVec = + CBucketGatherer::TSizeSizePrStoredStringPtrPrUInt64UMapVec; - void operator()(const TSizeSizePrStoredStringPtrPrUInt64UMapVec& data, core::CStatePersistInserter& inserter) { + void operator()(const TSizeSizePrStoredStringPtrPrUInt64UMapVec& data, + core::CStatePersistInserter& inserter) { for (std::size_t i = 0; i < data.size(); ++i) { inserter.insertValue(INFLUENCE_COUNT_TAG, i); - inserter.insertLevel(INFLUENCE_ITEM_TAG, boost::bind(&insertInfluencerPersonAttributeCounts, boost::cref(data[i]), _1)); + inserter.insertLevel(INFLUENCE_ITEM_TAG, + boost::bind(&insertInfluencerPersonAttributeCounts, + boost::cref(data[i]), _1)); } } - bool operator()(TSizeSizePrStoredStringPtrPrUInt64UMapVec& data, core::CStateRestoreTraverser& traverser) const { + bool operator()(TSizeSizePrStoredStringPtrPrUInt64UMapVec& data, + core::CStateRestoreTraverser& traverser) const { std::size_t i = 0; do { const std::string name = traverser.name(); RESTORE_BUILT_IN(INFLUENCE_COUNT_TAG, i) RESTORE_SETUP_TEARDOWN( - INFLUENCE_ITEM_TAG, - data.resize(std::max(data.size(), i + 1)), - traverser.traverseSubLevel(boost::bind(&restoreInfluencerPersonAttributeCounts, _1, boost::ref(data[i]))), + INFLUENCE_ITEM_TAG, data.resize(std::max(data.size(), i + 1)), + traverser.traverseSubLevel(boost::bind(&restoreInfluencerPersonAttributeCounts, + _1, boost::ref(data[i]))), /**/) } while (traverser.next()); return true; @@ -177,9 +198,7 @@ const std::string CBucketGatherer::EVENTRATE_BUCKET_GATHERER_TAG("a"); const std::string CBucketGatherer::METRIC_BUCKET_GATHERER_TAG("b"); CBucketGatherer::CBucketGatherer(CDataGatherer& dataGatherer, core_t::TTime startTime) - : m_DataGatherer(dataGatherer), - m_EarliestTime(startTime), - m_BucketStart(startTime), + : m_DataGatherer(dataGatherer), m_EarliestTime(startTime), m_BucketStart(startTime), m_PersonAttributeCounts(dataGatherer.params().s_LatencyBuckets, dataGatherer.params().s_BucketLength, startTime, @@ -188,13 +207,14 @@ CBucketGatherer::CBucketGatherer(CDataGatherer& dataGatherer, core_t::TTime star dataGatherer.params().s_BucketLength, startTime, TSizeSizePrUSet(1)), - m_InfluencerCounts(dataGatherer.params().s_LatencyBuckets + 3, dataGatherer.params().s_BucketLength, startTime) { + m_InfluencerCounts(dataGatherer.params().s_LatencyBuckets + 3, + dataGatherer.params().s_BucketLength, + startTime) { } CBucketGatherer::CBucketGatherer(bool isForPersistence, const CBucketGatherer& other) : m_DataGatherer(other.m_DataGatherer), - m_EarliestTime(other.m_EarliestTime), - m_BucketStart(other.m_BucketStart), + m_EarliestTime(other.m_EarliestTime), m_BucketStart(other.m_BucketStart), m_PersonAttributeCounts(other.m_PersonAttributeCounts), m_MultiBucketPersonAttributeCounts(other.m_MultiBucketPersonAttributeCounts), m_PersonAttributeExplicitNulls(other.m_PersonAttributeExplicitNulls), @@ -226,7 +246,8 @@ bool CBucketGatherer::addEventData(CEventData& data) { std::size_t pid = *data.personId(); std::size_t cid = *data.attributeId(); std::size_t count = *data.count(); - if ((pid != CDynamicStringIdRegistry::INVALID_ID) && (cid != CDynamicStringIdRegistry::INVALID_ID)) { + if ((pid != CDynamicStringIdRegistry::INVALID_ID) && + (cid != CDynamicStringIdRegistry::INVALID_ID)) { // Has the person/attribute been deleted from the gatherer? if (!m_DataGatherer.isPersonActive(pid)) { LOG_DEBUG(<< "Not adding value for deleted person " << pid); @@ -242,7 +263,8 @@ bool CBucketGatherer::addEventData(CEventData& data) { // If record is explicit null just note that a null record has been seen // for the given (pid, cid) pair. if (data.isExplicitNull()) { - TSizeSizePrUSet& bucketExplicitNulls = m_PersonAttributeExplicitNulls.get(time); + TSizeSizePrUSet& bucketExplicitNulls = + m_PersonAttributeExplicitNulls.get(time); bucketExplicitNulls.insert(pidCid); return true; } @@ -253,24 +275,29 @@ bool CBucketGatherer::addEventData(CEventData& data) { } const CEventData::TOptionalStrVec influences = data.influences(); - TSizeSizePrStoredStringPtrPrUInt64UMapVec& influencerCounts = m_InfluencerCounts.get(time); + TSizeSizePrStoredStringPtrPrUInt64UMapVec& influencerCounts = + m_InfluencerCounts.get(time); influencerCounts.resize(influences.size()); TStoredStringPtrVec canonicalInfluences(influences.size()); for (std::size_t i = 0u; i < influences.size(); ++i) { const CEventData::TOptionalStr& influence = influences[i]; if (influence) { - const core::CStoredStringPtr& inf = CStringStore::influencers().get(*influence); + const core::CStoredStringPtr& inf = + CStringStore::influencers().get(*influence); canonicalInfluences[i] = inf; if (count > 0) { influencerCounts[i] - .emplace(boost::unordered::piecewise_construct, boost::make_tuple(pidCid, inf), boost::make_tuple(uint64_t(0))) + .emplace(boost::unordered::piecewise_construct, + boost::make_tuple(pidCid, inf), + boost::make_tuple(uint64_t(0))) .first->second += count; } } } - this->addValue(pid, cid, time, data.values(), count, data.stringValue(), canonicalInfluences); + this->addValue(pid, cid, time, data.values(), count, data.stringValue(), + canonicalInfluences); } return true; } @@ -310,30 +337,37 @@ void CBucketGatherer::hiddenTimeNow(core_t::TTime time, bool skipUpdates) { } void CBucketGatherer::sampleNow(core_t::TTime sampleBucketStart) { - core_t::TTime timeNow = sampleBucketStart + (m_DataGatherer.params().s_LatencyBuckets + 1) * this->bucketLength() - 1; + core_t::TTime timeNow = + sampleBucketStart + + (m_DataGatherer.params().s_LatencyBuckets + 1) * this->bucketLength() - 1; this->timeNow(timeNow); this->sample(sampleBucketStart); } void CBucketGatherer::skipSampleNow(core_t::TTime sampleBucketStart) { - core_t::TTime timeNow = sampleBucketStart + (m_DataGatherer.params().s_LatencyBuckets + 1) * this->bucketLength() - 1; + core_t::TTime timeNow = + sampleBucketStart + + (m_DataGatherer.params().s_LatencyBuckets + 1) * this->bucketLength() - 1; this->hiddenTimeNow(timeNow, true); } void CBucketGatherer::sample(core_t::TTime time) { // Merge the current bucket's statistics into multiple bucket statistics. for (auto bucketLength : m_DataGatherer.params().s_MultipleBucketLengths) { - auto& multipleBucketPersonAttributeCounts = m_MultiBucketPersonAttributeCounts[bucketLength]; + auto& multipleBucketPersonAttributeCounts = + m_MultiBucketPersonAttributeCounts[bucketLength]; for (const auto& count : m_PersonAttributeCounts.get(time)) { multipleBucketPersonAttributeCounts[count.first] += count.second; } - auto& multipleBucketPersonAttributeExplicitNulls = m_MultiBucketPersonAttributeExplicitNulls[bucketLength]; + auto& multipleBucketPersonAttributeExplicitNulls = + m_MultiBucketPersonAttributeExplicitNulls[bucketLength]; for (const auto& nulls : m_PersonAttributeExplicitNulls.get(time)) { multipleBucketPersonAttributeExplicitNulls.insert(nulls); } - const TSizeSizePrStoredStringPtrPrUInt64UMapVec& influencerCounts = m_InfluencerCounts.get(time); + const TSizeSizePrStoredStringPtrPrUInt64UMapVec& influencerCounts = + m_InfluencerCounts.get(time); auto& multiBucketInfluencerCounts = m_MultiBucketInfluencerCounts[bucketLength]; multiBucketInfluencerCounts.resize(influencerCounts.size()); for (std::size_t i = 0u; i < influencerCounts.size(); ++i) { @@ -350,13 +384,15 @@ void CBucketGatherer::personNonZeroCounts(core_t::TTime time, TSizeUInt64PrVec& result.clear(); if (!this->dataAvailable(time)) { - LOG_ERROR(<< "No statistics at " << time << ", current bucket = " << this->printCurrentBucket()); + LOG_ERROR(<< "No statistics at " << time + << ", current bucket = " << this->printCurrentBucket()); return; } TSizeUInt64Map personCounts; for (const auto& count : this->bucketCounts(time)) { - personCounts[CDataGatherer::extractPersonId(count)] += CDataGatherer::extractData(count); + personCounts[CDataGatherer::extractPersonId(count)] += + CDataGatherer::extractData(count); } result.reserve(personCounts.size()); result.assign(personCounts.begin(), personCounts.end()); @@ -387,7 +423,8 @@ void CBucketGatherer::removePeople(std::size_t lowestPersonToRemove) { void CBucketGatherer::recycleAttributes(const TSizeVec& attributesToRemove) { if (!attributesToRemove.empty()) { remove(attributesToRemove, CDataGatherer::SExtractAttributeId(), m_PersonAttributeCounts); - remove(attributesToRemove, CDataGatherer::SExtractAttributeId(), m_PersonAttributeExplicitNulls); + remove(attributesToRemove, CDataGatherer::SExtractAttributeId(), + m_PersonAttributeExplicitNulls); remove(attributesToRemove, CDataGatherer::SExtractAttributeId(), m_InfluencerCounts); } } @@ -401,7 +438,8 @@ void CBucketGatherer::removeAttributes(std::size_t lowestAttributeToRemove) { attributesToRemove.push_back(cid); } remove(attributesToRemove, CDataGatherer::SExtractAttributeId(), m_PersonAttributeCounts); - remove(attributesToRemove, CDataGatherer::SExtractAttributeId(), m_PersonAttributeExplicitNulls); + remove(attributesToRemove, CDataGatherer::SExtractAttributeId(), + m_PersonAttributeExplicitNulls); remove(attributesToRemove, CDataGatherer::SExtractAttributeId(), m_InfluencerCounts); } } @@ -415,7 +453,8 @@ void CBucketGatherer::currentBucketStartTime(core_t::TTime time) { } core_t::TTime CBucketGatherer::earliestBucketStartTime() const { - return this->currentBucketStartTime() - (m_DataGatherer.params().s_LatencyBuckets * this->bucketLength()); + return this->currentBucketStartTime() - + (m_DataGatherer.params().s_LatencyBuckets * this->bucketLength()); } core_t::TTime CBucketGatherer::bucketLength() const { @@ -445,12 +484,14 @@ bool CBucketGatherer::validateSampleTimes(core_t::TTime& startTime, core_t::TTim return false; } if (endTime <= startTime) { - LOG_ERROR(<< "End time " << endTime << " is not greater than the start time " << startTime); + LOG_ERROR(<< "End time " << endTime + << " is not greater than the start time " << startTime); return false; } for (/**/; startTime < endTime; startTime += this->bucketLength()) { if (!this->dataAvailable(startTime)) { - LOG_ERROR(<< "No counts available at " << startTime << ", current bucket = " << this->printCurrentBucket()); + LOG_ERROR(<< "No counts available at " << startTime + << ", current bucket = " << this->printCurrentBucket()); continue; } return true; @@ -469,11 +510,13 @@ std::string CBucketGatherer::printCurrentBucket() const { return result.str(); } -const CBucketGatherer::TSizeSizePrUInt64UMap& CBucketGatherer::bucketCounts(core_t::TTime time) const { +const CBucketGatherer::TSizeSizePrUInt64UMap& +CBucketGatherer::bucketCounts(core_t::TTime time) const { return m_PersonAttributeCounts.get(time); } -const CBucketGatherer::TSizeSizePrStoredStringPtrPrUInt64UMapVec& CBucketGatherer::influencerCounts(core_t::TTime time) const { +const CBucketGatherer::TSizeSizePrStoredStringPtrPrUInt64UMapVec& +CBucketGatherer::influencerCounts(core_t::TTime time) const { return m_InfluencerCounts.get(time); } @@ -484,7 +527,8 @@ bool CBucketGatherer::hasExplicitNullsOnly(core_t::TTime time, std::size_t pid, } const TSizeSizePrUInt64UMap& bucketCounts = m_PersonAttributeCounts.get(time); TSizeSizePr pidCid = std::make_pair(pid, cid); - return bucketExplicitNulls.find(pidCid) != bucketExplicitNulls.end() && bucketCounts.find(pidCid) == bucketCounts.end(); + return bucketExplicitNulls.find(pidCid) != bucketExplicitNulls.end() && + bucketCounts.find(pidCid) == bucketCounts.end(); } uint64_t CBucketGatherer::checksum() const { @@ -503,24 +547,30 @@ uint64_t CBucketGatherer::checksum() const { for (const auto& count : bucketCounts) { std::size_t pid = CDataGatherer::extractPersonId(count); std::size_t cid = CDataGatherer::extractAttributeId(count); - TStrCRefStrCRefPr key(TStrCRef(m_DataGatherer.personName(pid)), TStrCRef(m_DataGatherer.attributeName(cid))); + TStrCRefStrCRefPr key(TStrCRef(m_DataGatherer.personName(pid)), + TStrCRef(m_DataGatherer.attributeName(cid))); personAttributeCounts.emplace_back(key, CDataGatherer::extractData(count)); } - std::sort(personAttributeCounts.begin(), personAttributeCounts.end(), maths::COrderings::SLexicographicalCompare()); + std::sort(personAttributeCounts.begin(), personAttributeCounts.end(), + maths::COrderings::SLexicographicalCompare()); result = maths::CChecksum::calculate(result, personAttributeCounts); } - result = maths::CChecksum::calculate(result, m_PersonAttributeExplicitNulls.latestBucketEnd()); + result = maths::CChecksum::calculate( + result, m_PersonAttributeExplicitNulls.latestBucketEnd()); for (const auto& bucketExplicitNulls : m_PersonAttributeExplicitNulls) { TStrCRefStrCRefPrVec personAttributeExplicitNulls; personAttributeExplicitNulls.reserve(bucketExplicitNulls.size()); for (const auto& nulls : bucketExplicitNulls) { std::size_t pid = CDataGatherer::extractPersonId(nulls); std::size_t cid = CDataGatherer::extractAttributeId(nulls); - TStrCRefStrCRefPr key(TStrCRef(m_DataGatherer.personName(pid)), TStrCRef(m_DataGatherer.attributeName(cid))); + TStrCRefStrCRefPr key(TStrCRef(m_DataGatherer.personName(pid)), + TStrCRef(m_DataGatherer.attributeName(cid))); personAttributeExplicitNulls.push_back(key); } - std::sort(personAttributeExplicitNulls.begin(), personAttributeExplicitNulls.end(), maths::COrderings::SLexicographicalCompare()); + std::sort(personAttributeExplicitNulls.begin(), + personAttributeExplicitNulls.end(), + maths::COrderings::SLexicographicalCompare()); result = maths::CChecksum::calculate(result, personAttributeExplicitNulls); } @@ -532,7 +582,8 @@ uint64_t CBucketGatherer::checksum() const { void CBucketGatherer::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CBucketGatherer"); core::CMemoryDebug::dynamicSize("m_PersonAttributeCounts", m_PersonAttributeCounts, mem); - core::CMemoryDebug::dynamicSize("m_PersonAttributeExplicitNulls", m_PersonAttributeExplicitNulls, mem); + core::CMemoryDebug::dynamicSize("m_PersonAttributeExplicitNulls", + m_PersonAttributeExplicitNulls, mem); core::CMemoryDebug::dynamicSize("m_Influencers", m_InfluencerCounts, mem); } @@ -555,8 +606,10 @@ bool CBucketGatherer::resetBucket(core_t::TTime bucketStart) { return false; } - if (!this->dataAvailable(bucketStart) || bucketStart >= this->currentBucketStartTime() + this->bucketLength()) { - LOG_WARN(<< "No data available at " << bucketStart << ", current bucket = " << this->printCurrentBucket()); + if (!this->dataAvailable(bucketStart) || + bucketStart >= this->currentBucketStartTime() + this->bucketLength()) { + LOG_WARN(<< "No data available at " << bucketStart + << ", current bucket = " << this->printCurrentBucket()); return false; } @@ -569,16 +622,17 @@ bool CBucketGatherer::resetBucket(core_t::TTime bucketStart) { void CBucketGatherer::baseAcceptPersistInserter(core::CStatePersistInserter& inserter) const { inserter.insertValue(BUCKET_START_TAG, m_BucketStart); - inserter.insertLevel(BUCKET_COUNT_TAG, - boost::bind(TSizeSizePrUInt64UMapQueue::CSerializer(), - boost::cref(m_PersonAttributeCounts), - _1)); + inserter.insertLevel( + BUCKET_COUNT_TAG, + boost::bind(TSizeSizePrUInt64UMapQueue::CSerializer(), + boost::cref(m_PersonAttributeCounts), _1)); inserter.insertLevel( INFLUENCERS_COUNT_TAG, - boost::bind(TSizeSizePrStoredStringPtrPrUInt64UMapVecQueue::CSerializer(), - boost::cref(m_InfluencerCounts), - _1)); - core::CPersistUtils::persist(BUCKET_EXPLICIT_NULLS_TAG, m_PersonAttributeExplicitNulls, inserter); + boost::bind( + TSizeSizePrStoredStringPtrPrUInt64UMapVecQueue::CSerializer(), + boost::cref(m_InfluencerCounts), _1)); + core::CPersistUtils::persist(BUCKET_EXPLICIT_NULLS_TAG, + m_PersonAttributeExplicitNulls, inserter); } bool CBucketGatherer::baseAcceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { @@ -586,27 +640,32 @@ bool CBucketGatherer::baseAcceptRestoreTraverser(core::CStateRestoreTraverser& t do { const std::string& name = traverser.name(); RESTORE_BUILT_IN(BUCKET_START_TAG, m_BucketStart) - RESTORE_SETUP_TEARDOWN(BUCKET_COUNT_TAG, - m_PersonAttributeCounts = TSizeSizePrUInt64UMapQueue( - m_DataGatherer.params().s_LatencyBuckets, this->bucketLength(), m_BucketStart, TSizeSizePrUInt64UMap(1)), - traverser.traverseSubLevel(boost::bind( - TSizeSizePrUInt64UMapQueue::CSerializer(TSizeSizePrUInt64UMap(1)), - boost::ref(m_PersonAttributeCounts), - _1)), - /**/) - RESTORE_SETUP_TEARDOWN(INFLUENCERS_COUNT_TAG, - m_InfluencerCounts = TSizeSizePrStoredStringPtrPrUInt64UMapVecQueue( - m_DataGatherer.params().s_LatencyBuckets + 3, this->bucketLength(), m_BucketStart), - traverser.traverseSubLevel(boost::bind( - TSizeSizePrStoredStringPtrPrUInt64UMapVecQueue::CSerializer(), - boost::ref(m_InfluencerCounts), - _1)), - /**/) - RESTORE_SETUP_TEARDOWN(BUCKET_EXPLICIT_NULLS_TAG, - m_PersonAttributeExplicitNulls = TSizeSizePrUSetQueue( - m_DataGatherer.params().s_LatencyBuckets, this->bucketLength(), m_BucketStart, TSizeSizePrUSet(1)), - core::CPersistUtils::restore(BUCKET_EXPLICIT_NULLS_TAG, m_PersonAttributeExplicitNulls, traverser), - /**/) + RESTORE_SETUP_TEARDOWN( + BUCKET_COUNT_TAG, + m_PersonAttributeCounts = TSizeSizePrUInt64UMapQueue( + m_DataGatherer.params().s_LatencyBuckets, this->bucketLength(), + m_BucketStart, TSizeSizePrUInt64UMap(1)), + traverser.traverseSubLevel(boost::bind( + TSizeSizePrUInt64UMapQueue::CSerializer( + TSizeSizePrUInt64UMap(1)), + boost::ref(m_PersonAttributeCounts), _1)), + /**/) + RESTORE_SETUP_TEARDOWN( + INFLUENCERS_COUNT_TAG, + m_InfluencerCounts = TSizeSizePrStoredStringPtrPrUInt64UMapVecQueue( + m_DataGatherer.params().s_LatencyBuckets + 3, this->bucketLength(), m_BucketStart), + traverser.traverseSubLevel(boost::bind( + TSizeSizePrStoredStringPtrPrUInt64UMapVecQueue::CSerializer(), + boost::ref(m_InfluencerCounts), _1)), + /**/) + RESTORE_SETUP_TEARDOWN( + BUCKET_EXPLICIT_NULLS_TAG, + m_PersonAttributeExplicitNulls = TSizeSizePrUSetQueue( + m_DataGatherer.params().s_LatencyBuckets, this->bucketLength(), + m_BucketStart, TSizeSizePrUSet(1)), + core::CPersistUtils::restore(BUCKET_EXPLICIT_NULLS_TAG, + m_PersonAttributeExplicitNulls, traverser), + /**/) } while (traverser.next()); return true; } diff --git a/lib/model/CCountingModel.cc b/lib/model/CCountingModel.cc index c10b82ad05..bffdc9546e 100644 --- a/lib/model/CCountingModel.cc +++ b/lib/model/CCountingModel.cc @@ -35,22 +35,28 @@ CCountingModel::CCountingModel(const SModelParams& params, const TDataGathererPt m_StartTime(CAnomalyDetectorModel::TIME_UNSET) { } -CCountingModel::CCountingModel(const SModelParams& params, const TDataGathererPtr& dataGatherer, core::CStateRestoreTraverser& traverser) +CCountingModel::CCountingModel(const SModelParams& params, + const TDataGathererPtr& dataGatherer, + core::CStateRestoreTraverser& traverser) : CAnomalyDetectorModel(params, dataGatherer, TFeatureInfluenceCalculatorCPtrPrVecVec()), m_StartTime(CAnomalyDetectorModel::TIME_UNSET) { - traverser.traverseSubLevel(boost::bind(&CCountingModel::acceptRestoreTraverser, this, _1)); + traverser.traverseSubLevel( + boost::bind(&CCountingModel::acceptRestoreTraverser, this, _1)); } CCountingModel::CCountingModel(bool isForPersistence, const CCountingModel& other) - : CAnomalyDetectorModel(isForPersistence, other), m_StartTime(0), m_MeanCounts(other.m_MeanCounts) { + : CAnomalyDetectorModel(isForPersistence, other), m_StartTime(0), + m_MeanCounts(other.m_MeanCounts) { if (!isForPersistence) { LOG_ABORT(<< "This constructor only creates clones for persistence"); } } void CCountingModel::acceptPersistInserter(core::CStatePersistInserter& inserter) const { - inserter.insertValue(WINDOW_BUCKET_COUNT_TAG, this->windowBucketCount(), core::CIEEE754::E_SinglePrecision); - core::CPersistUtils::persist(PERSON_BUCKET_COUNT_TAG, this->personBucketCounts(), inserter); + inserter.insertValue(WINDOW_BUCKET_COUNT_TAG, this->windowBucketCount(), + core::CIEEE754::E_SinglePrecision); + core::CPersistUtils::persist(PERSON_BUCKET_COUNT_TAG, + this->personBucketCounts(), inserter); core::CPersistUtils::persist(MEAN_COUNT_TAG, m_MeanCounts, inserter); this->interimBucketCorrectorAcceptPersistInserter(INTERIM_BUCKET_CORRECTOR_TAG, inserter); } @@ -66,7 +72,8 @@ bool CCountingModel::acceptRestoreTraverser(core::CStateRestoreTraverser& traver } this->windowBucketCount(count); } else if (name == PERSON_BUCKET_COUNT_TAG) { - if (core::CPersistUtils::restore(name, this->personBucketCounts(), traverser) == false) { + if (core::CPersistUtils::restore(name, this->personBucketCounts(), + traverser) == false) { LOG_ERROR(<< "Invalid bucket counts in " << traverser.value()); return false; } @@ -105,33 +112,41 @@ bool CCountingModel::isMetric() const { return false; } -CCountingModel::TOptionalUInt64 CCountingModel::currentBucketCount(std::size_t pid, core_t::TTime time) const { +CCountingModel::TOptionalUInt64 +CCountingModel::currentBucketCount(std::size_t pid, core_t::TTime time) const { if (!this->bucketStatsAvailable(time)) { - LOG_ERROR(<< "No statistics at " << time << ", current bucket = " << this->printCurrentBucket()); + LOG_ERROR(<< "No statistics at " << time + << ", current bucket = " << this->printCurrentBucket()); return TOptionalUInt64(); } - auto result = std::lower_bound(m_Counts.begin(), m_Counts.end(), pid, maths::COrderings::SFirstLess()); + auto result = std::lower_bound(m_Counts.begin(), m_Counts.end(), pid, + maths::COrderings::SFirstLess()); - return result != m_Counts.end() && result->first == pid ? result->second : static_cast(0); + return result != m_Counts.end() && result->first == pid + ? result->second + : static_cast(0); } CCountingModel::TOptionalDouble CCountingModel::baselineBucketCount(std::size_t pid) const { return pid < m_MeanCounts.size() ? maths::CBasicStatistics::mean(m_MeanCounts[pid]) : 0.0; } -CCountingModel::TDouble1Vec -CCountingModel::currentBucketValue(model_t::EFeature /*feature*/, std::size_t pid, std::size_t /*cid*/, core_t::TTime time) const { +CCountingModel::TDouble1Vec CCountingModel::currentBucketValue(model_t::EFeature /*feature*/, + std::size_t pid, + std::size_t /*cid*/, + core_t::TTime time) const { TOptionalUInt64 count = this->currentBucketCount(pid, time); return count ? TDouble1Vec(1, static_cast(*count)) : TDouble1Vec(); } -CCountingModel::TDouble1Vec CCountingModel::baselineBucketMean(model_t::EFeature /*feature*/, - std::size_t pid, - std::size_t /*cid*/, - model_t::CResultType /*type*/, - const TSizeDoublePr1Vec& /*correlated*/, - core_t::TTime /*time*/) const { +CCountingModel::TDouble1Vec +CCountingModel::baselineBucketMean(model_t::EFeature /*feature*/, + std::size_t pid, + std::size_t /*cid*/, + model_t::CResultType /*type*/, + const TSizeDoublePr1Vec& /*correlated*/, + core_t::TTime /*time*/) const { TOptionalDouble count = this->baselineBucketCount(pid); return count ? TDouble1Vec(1, *count) : TDouble1Vec(); } @@ -142,7 +157,8 @@ void CCountingModel::currentBucketPersonIds(core_t::TTime time, TSizeVec& result result.clear(); if (!this->bucketStatsAvailable(time)) { - LOG_ERROR(<< "No statistics at " << time << ", current bucket = " << this->printCurrentBucket()); + LOG_ERROR(<< "No statistics at " << time + << ", current bucket = " << this->printCurrentBucket()); return; } @@ -154,11 +170,15 @@ void CCountingModel::currentBucketPersonIds(core_t::TTime time, TSizeVec& result result.assign(people.begin(), people.end()); } -void CCountingModel::sampleOutOfPhase(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) { +void CCountingModel::sampleOutOfPhase(core_t::TTime startTime, + core_t::TTime endTime, + CResourceMonitor& resourceMonitor) { this->sampleBucketStatistics(startTime, endTime, resourceMonitor); } -void CCountingModel::sampleBucketStatistics(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) { +void CCountingModel::sampleBucketStatistics(core_t::TTime startTime, + core_t::TTime endTime, + CResourceMonitor& resourceMonitor) { CDataGatherer& gatherer = this->dataGatherer(); m_ScheduledEventDescriptions.clear(); @@ -169,17 +189,21 @@ void CCountingModel::sampleBucketStatistics(core_t::TTime startTime, core_t::TTi core_t::TTime bucketLength = gatherer.bucketLength(); for (core_t::TTime time = startTime; time < endTime; time += bucketLength) { - this->CAnomalyDetectorModel::sampleBucketStatistics(time, time + bucketLength, resourceMonitor); + this->CAnomalyDetectorModel::sampleBucketStatistics(time, time + bucketLength, + resourceMonitor); gatherer.timeNow(time); this->updateCurrentBucketsStats(time); // Check for scheduled events - core_t::TTime sampleTime = model_t::sampleTime(model_t::E_IndividualCountByBucketAndPerson, time, bucketLength); + core_t::TTime sampleTime = model_t::sampleTime( + model_t::E_IndividualCountByBucketAndPerson, time, bucketLength); setMatchedEventsDescriptions(sampleTime, time); } } -void CCountingModel::sample(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) { +void CCountingModel::sample(core_t::TTime startTime, + core_t::TTime endTime, + CResourceMonitor& resourceMonitor) { CDataGatherer& gatherer = this->dataGatherer(); m_ScheduledEventDescriptions.clear(); @@ -200,12 +224,14 @@ void CCountingModel::sample(core_t::TTime startTime, core_t::TTime endTime, CRes } // Check for scheduled events - core_t::TTime sampleTime = model_t::sampleTime(model_t::E_IndividualCountByBucketAndPerson, time, bucketLength); + core_t::TTime sampleTime = model_t::sampleTime( + model_t::E_IndividualCountByBucketAndPerson, time, bucketLength); setMatchedEventsDescriptions(sampleTime, time); } } -void CCountingModel::setMatchedEventsDescriptions(core_t::TTime sampleTime, core_t::TTime bucketStartTime) { +void CCountingModel::setMatchedEventsDescriptions(core_t::TTime sampleTime, + core_t::TTime bucketStartTime) { SModelParams::TStrDetectionRulePrVec matchedEvents = this->checkScheduledEvents(sampleTime); if (matchedEvents.empty() == false) { @@ -217,20 +243,19 @@ void CCountingModel::setMatchedEventsDescriptions(core_t::TTime sampleTime, core } } -SModelParams::TStrDetectionRulePrVec CCountingModel::checkScheduledEvents(core_t::TTime sampleTime) const { - const SModelParams::TStrDetectionRulePrVec& events = this->params().s_ScheduledEvents.get(); +SModelParams::TStrDetectionRulePrVec +CCountingModel::checkScheduledEvents(core_t::TTime sampleTime) const { + const SModelParams::TStrDetectionRulePrVec& events = + this->params().s_ScheduledEvents.get(); SModelParams::TStrDetectionRulePrVec matchedEvents; for (auto& event : events) { // Note that as the counting model is not aware of partitions // scheduled events cannot support partitions as the code stands. - if (event.second.apply(CDetectionRule::E_SkipSampling, - boost::cref(*this), + if (event.second.apply(CDetectionRule::E_SkipSampling, boost::cref(*this), model_t::E_IndividualCountByBucketAndPerson, - model_t::CResultType(), - model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID, - model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID, - sampleTime)) { + model_t::CResultType(), model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID, + model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID, sampleTime)) { matchedEvents.push_back(event); } } @@ -256,7 +281,8 @@ bool CCountingModel::computeProbability(std::size_t pid, std::size_t /*numberAttributeProbabilities*/, SAnnotatedProbability& result) const { result = SAnnotatedProbability(1.0); - result.s_CurrentBucketCount = this->currentBucketCount(pid, (startTime + endTime) / 2 - 1); + result.s_CurrentBucketCount = + this->currentBucketCount(pid, (startTime + endTime) / 2 - 1); result.s_BaselineBucketCount = this->baselineBucketCount(pid); return true; } @@ -288,7 +314,8 @@ void CCountingModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) c } std::size_t CCountingModel::memoryUsage() const { - return this->CAnomalyDetectorModel::memoryUsage() + core::CMemory::dynamicSize(m_Counts) + core::CMemory::dynamicSize(m_MeanCounts); + return this->CAnomalyDetectorModel::memoryUsage() + + core::CMemory::dynamicSize(m_Counts) + core::CMemory::dynamicSize(m_MeanCounts); } std::size_t CCountingModel::computeMemoryUsage() const { @@ -311,7 +338,8 @@ void CCountingModel::currentBucketStartTime(core_t::TTime time) { m_StartTime = time; } -const CCountingModel::TStr1Vec& CCountingModel::scheduledEventDescriptions(core_t::TTime time) const { +const CCountingModel::TStr1Vec& +CCountingModel::scheduledEventDescriptions(core_t::TTime time) const { auto it = m_ScheduledEventDescriptions.find(time); if (it == m_ScheduledEventDescriptions.end()) { return EMPTY_STRING_LIST; @@ -323,12 +351,14 @@ double CCountingModel::attributeFrequency(std::size_t /*cid*/) const { return 1.0; } -void CCountingModel::createUpdateNewModels(core_t::TTime /*time*/, CResourceMonitor& /*resourceMonitor*/) { +void CCountingModel::createUpdateNewModels(core_t::TTime /*time*/, + CResourceMonitor& /*resourceMonitor*/) { this->updateRecycledModels(); CDataGatherer& gatherer = this->dataGatherer(); std::size_t numberNewPeople = gatherer.numberPeople(); std::size_t numberExistingPeople = m_MeanCounts.size(); - numberNewPeople = numberNewPeople > numberExistingPeople ? numberNewPeople - numberExistingPeople : 0; + numberNewPeople = numberNewPeople > numberExistingPeople ? numberNewPeople - numberExistingPeople + : 0; if (numberNewPeople > 0) { LOG_TRACE(<< "Creating " << numberNewPeople << " new people"); this->createNewModels(numberNewPeople, 0); @@ -364,7 +394,8 @@ void CCountingModel::updateRecycledModels() { this->CAnomalyDetectorModel::updateRecycledModels(); } -void CCountingModel::clearPrunedResources(const TSizeVec& /*people*/, const TSizeVec& /*attributes*/) { +void CCountingModel::clearPrunedResources(const TSizeVec& /*people*/, + const TSizeVec& /*attributes*/) { // Nothing to prune } diff --git a/lib/model/CCountingModelFactory.cc b/lib/model/CCountingModelFactory.cc index b6cff67ff8..53c8634ede 100644 --- a/lib/model/CCountingModelFactory.cc +++ b/lib/model/CCountingModelFactory.cc @@ -23,11 +23,8 @@ namespace model { CCountingModelFactory::CCountingModelFactory(const SModelParams& params, model_t::ESummaryMode summaryMode, const std::string& summaryCountFieldName) - : CModelFactory(params), - m_Identifier(), - m_SummaryMode(summaryMode), - m_SummaryCountFieldName(summaryCountFieldName), - m_UseNull(false), + : CModelFactory(params), m_Identifier(), m_SummaryMode(summaryMode), + m_SummaryCountFieldName(summaryCountFieldName), m_UseNull(false), m_BucketResultsDelay(0) { } @@ -35,7 +32,8 @@ CCountingModelFactory* CCountingModelFactory::clone() const { return new CCountingModelFactory(*this); } -CAnomalyDetectorModel* CCountingModelFactory::makeModel(const SModelInitializationData& initData) const { +CAnomalyDetectorModel* +CCountingModelFactory::makeModel(const SModelInitializationData& initData) const { TDataGathererPtr dataGatherer = initData.s_DataGatherer; if (!dataGatherer) { LOG_ERROR(<< "NULL data gatherer"); @@ -44,8 +42,9 @@ CAnomalyDetectorModel* CCountingModelFactory::makeModel(const SModelInitializati return new CCountingModel(this->modelParams(), dataGatherer); } -CAnomalyDetectorModel* CCountingModelFactory::makeModel(const SModelInitializationData& initData, - core::CStateRestoreTraverser& traverser) const { +CAnomalyDetectorModel* +CCountingModelFactory::makeModel(const SModelInitializationData& initData, + core::CStateRestoreTraverser& traverser) const { TDataGathererPtr dataGatherer = initData.s_DataGatherer; if (!dataGatherer) { LOG_ERROR(<< "NULL data gatherer"); @@ -54,65 +53,49 @@ CAnomalyDetectorModel* CCountingModelFactory::makeModel(const SModelInitializati return new CCountingModel(this->modelParams(), dataGatherer, traverser); } -CDataGatherer* CCountingModelFactory::makeDataGatherer(const SGathererInitializationData& initData) const { - return new CDataGatherer(model_t::E_EventRate, - m_SummaryMode, - this->modelParams(), - m_SummaryCountFieldName, - m_PartitionFieldName, - initData.s_PartitionFieldValue, - m_PersonFieldName, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - m_UseNull, - this->searchKey(), - m_Features, - initData.s_StartTime, - 0); -} - -CDataGatherer* CCountingModelFactory::makeDataGatherer(const std::string& partitionFieldValue, - core::CStateRestoreTraverser& traverser) const { - return new CDataGatherer(model_t::E_EventRate, - m_SummaryMode, - this->modelParams(), - m_SummaryCountFieldName, - m_PartitionFieldName, - partitionFieldValue, - m_PersonFieldName, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - m_UseNull, - this->searchKey(), - traverser); -} - -CCountingModelFactory::TPriorPtr CCountingModelFactory::defaultPrior(model_t::EFeature /*feature*/, const SModelParams& /*params*/) const { +CDataGatherer* +CCountingModelFactory::makeDataGatherer(const SGathererInitializationData& initData) const { + return new CDataGatherer(model_t::E_EventRate, m_SummaryMode, this->modelParams(), + m_SummaryCountFieldName, m_PartitionFieldName, + initData.s_PartitionFieldValue, m_PersonFieldName, + EMPTY_STRING, EMPTY_STRING, TStrVec(), m_UseNull, + this->searchKey(), m_Features, initData.s_StartTime, 0); +} + +CDataGatherer* +CCountingModelFactory::makeDataGatherer(const std::string& partitionFieldValue, + core::CStateRestoreTraverser& traverser) const { + return new CDataGatherer(model_t::E_EventRate, m_SummaryMode, + this->modelParams(), m_SummaryCountFieldName, + m_PartitionFieldName, partitionFieldValue, + m_PersonFieldName, EMPTY_STRING, EMPTY_STRING, + TStrVec(), m_UseNull, this->searchKey(), traverser); +} + +CCountingModelFactory::TPriorPtr +CCountingModelFactory::defaultPrior(model_t::EFeature /*feature*/, + const SModelParams& /*params*/) const { return boost::make_shared(); } -CCountingModelFactory::TMultivariatePriorPtr CCountingModelFactory::defaultMultivariatePrior(model_t::EFeature feature, - const SModelParams& /*params*/) const { +CCountingModelFactory::TMultivariatePriorPtr +CCountingModelFactory::defaultMultivariatePrior(model_t::EFeature feature, + const SModelParams& /*params*/) const { return boost::make_shared(model_t::dimension(feature)); } -CCountingModelFactory::TMultivariatePriorPtr CCountingModelFactory::defaultCorrelatePrior(model_t::EFeature /*feature*/, - const SModelParams& /*params*/) const { +CCountingModelFactory::TMultivariatePriorPtr +CCountingModelFactory::defaultCorrelatePrior(model_t::EFeature /*feature*/, + const SModelParams& /*params*/) const { return boost::make_shared(2); } const CSearchKey& CCountingModelFactory::searchKey() const { if (!m_SearchKeyCache) { - m_SearchKeyCache.reset(CSearchKey(m_Identifier, - function_t::function(m_Features), - m_UseNull, - this->modelParams().s_ExcludeFrequent, - "", - m_PersonFieldName, - "", - m_PartitionFieldName)); + m_SearchKeyCache.reset( + CSearchKey(m_Identifier, function_t::function(m_Features), + m_UseNull, this->modelParams().s_ExcludeFrequent, "", + m_PersonFieldName, "", m_PartitionFieldName)); } return *m_SearchKeyCache; } diff --git a/lib/model/CDataGatherer.cc b/lib/model/CDataGatherer.cc index 8dea0afd80..fd1178992a 100644 --- a/lib/model/CDataGatherer.cc +++ b/lib/model/CDataGatherer.cc @@ -50,7 +50,8 @@ namespace detail { //! Make sure \p features only includes supported features, doesn't //! contain any duplicates, etc. -const CDataGatherer::TFeatureVec& sanitize(CDataGatherer::TFeatureVec& features, model_t::EAnalysisCategory gathererType) { +const CDataGatherer::TFeatureVec& sanitize(CDataGatherer::TFeatureVec& features, + model_t::EAnalysisCategory gathererType) { std::size_t j = 0u; for (std::size_t i = 0u; i < features.size(); ++i) { @@ -132,7 +133,8 @@ const CDataGatherer::TFeatureVec& sanitize(CDataGatherer::TFeatureVec& features, } //! Wrapper which copies \p features. -CDataGatherer::TFeatureVec sanitize(const CDataGatherer::TFeatureVec& features, model_t::EAnalysisCategory gathererType) { +CDataGatherer::TFeatureVec sanitize(const CDataGatherer::TFeatureVec& features, + model_t::EAnalysisCategory gathererType) { CDataGatherer::TFeatureVec result(features); return sanitize(result, gathererType); } @@ -157,7 +159,8 @@ bool isPopulation(model_t::EAnalysisCategory gathererType) { } // unnamed:: const std::string CDataGatherer::EXPLICIT_NULL("null"); -const std::size_t CDataGatherer::EXPLICIT_NULL_SUMMARY_COUNT(std::numeric_limits::max()); +const std::size_t + CDataGatherer::EXPLICIT_NULL_SUMMARY_COUNT(std::numeric_limits::max()); const std::size_t CDataGatherer::ESTIMATED_MEM_USAGE_PER_BY_FIELD(10000); const std::size_t CDataGatherer::ESTIMATED_MEM_USAGE_PER_OVER_FIELD(1000); @@ -177,43 +180,32 @@ CDataGatherer::CDataGatherer(model_t::EAnalysisCategory gathererType, core_t::TTime startTime, int sampleCountOverride) : m_GathererType(gathererType), - m_Features(detail::sanitize(features, gathererType)), - m_SummaryMode(summaryMode), - m_Params(modelParams), - m_PartitionFieldName(partitionFieldName), + m_Features(detail::sanitize(features, gathererType)), m_SummaryMode(summaryMode), + m_Params(modelParams), m_PartitionFieldName(partitionFieldName), m_PartitionFieldValue(CStringStore::names().get(partitionFieldValue)), - m_SearchKey(key), - m_PeopleRegistry(PERSON, stat_t::E_NumberNewPeople, stat_t::E_NumberNewPeopleNotAllowed, stat_t::E_NumberNewPeopleRecycled), + m_SearchKey(key), m_PeopleRegistry(PERSON, + stat_t::E_NumberNewPeople, + stat_t::E_NumberNewPeopleNotAllowed, + stat_t::E_NumberNewPeopleRecycled), m_AttributesRegistry(ATTRIBUTE, stat_t::E_NumberNewAttributes, stat_t::E_NumberNewAttributesNotAllowed, stat_t::E_NumberNewAttributesRecycled), - m_Population(detail::isPopulation(gathererType)), - m_UseNull(useNull) { + m_Population(detail::isPopulation(gathererType)), m_UseNull(useNull) { // Constructor needs to create 1 bucket gatherer at the startTime // and possibly 1 bucket gatherer at (startTime + bucketLength / 2). std::sort(m_Features.begin(), m_Features.end()); core_t::TTime bucketLength = modelParams.s_BucketLength; - this->createBucketGatherer(gathererType, - summaryCountFieldName, - personFieldName, - attributeFieldName, - valueFieldName, - influenceFieldNames, - startTime, - sampleCountOverride); + this->createBucketGatherer(gathererType, summaryCountFieldName, + personFieldName, attributeFieldName, valueFieldName, + influenceFieldNames, startTime, sampleCountOverride); if (modelParams.s_BucketResultsDelay > 0) { - this->createBucketGatherer(gathererType, - summaryCountFieldName, - personFieldName, - attributeFieldName, - valueFieldName, - influenceFieldNames, - startTime + (bucketLength / 2), - sampleCountOverride); + this->createBucketGatherer(gathererType, summaryCountFieldName, personFieldName, + attributeFieldName, valueFieldName, influenceFieldNames, + startTime + (bucketLength / 2), sampleCountOverride); } } @@ -230,47 +222,40 @@ CDataGatherer::CDataGatherer(model_t::EAnalysisCategory gathererType, bool useNull, const CSearchKey& key, core::CStateRestoreTraverser& traverser) - : m_GathererType(gathererType), - m_SummaryMode(summaryMode), - m_Params(modelParams), - m_PartitionFieldName(partitionFieldName), + : m_GathererType(gathererType), m_SummaryMode(summaryMode), + m_Params(modelParams), m_PartitionFieldName(partitionFieldName), m_PartitionFieldValue(CStringStore::names().get(partitionFieldValue)), - m_SearchKey(key), - m_PeopleRegistry(PERSON, stat_t::E_NumberNewPeople, stat_t::E_NumberNewPeopleNotAllowed, stat_t::E_NumberNewPeopleRecycled), + m_SearchKey(key), m_PeopleRegistry(PERSON, + stat_t::E_NumberNewPeople, + stat_t::E_NumberNewPeopleNotAllowed, + stat_t::E_NumberNewPeopleRecycled), m_AttributesRegistry(ATTRIBUTE, stat_t::E_NumberNewAttributes, stat_t::E_NumberNewAttributesNotAllowed, stat_t::E_NumberNewAttributesRecycled), - m_Population(detail::isPopulation(gathererType)), - m_UseNull(useNull) { - if (traverser.traverseSubLevel(boost::bind(&CDataGatherer::acceptRestoreTraverser, - this, - boost::cref(summaryCountFieldName), - boost::cref(personFieldName), - boost::cref(attributeFieldName), - boost::cref(valueFieldName), - boost::cref(influenceFieldNames), - _1)) == false) { + m_Population(detail::isPopulation(gathererType)), m_UseNull(useNull) { + if (traverser.traverseSubLevel(boost::bind( + &CDataGatherer::acceptRestoreTraverser, this, boost::cref(summaryCountFieldName), + boost::cref(personFieldName), boost::cref(attributeFieldName), + boost::cref(valueFieldName), boost::cref(influenceFieldNames), _1)) == false) { LOG_ERROR(<< "Failed to correctly restore data gatherer"); } } CDataGatherer::CDataGatherer(bool isForPersistence, const CDataGatherer& other) - : m_GathererType(other.m_GathererType), - m_Features(other.m_Features), - m_SummaryMode(other.m_SummaryMode), - m_Params(other.m_Params), + : m_GathererType(other.m_GathererType), m_Features(other.m_Features), + m_SummaryMode(other.m_SummaryMode), m_Params(other.m_Params), m_PartitionFieldName(other.m_PartitionFieldName), m_PartitionFieldValue(other.m_PartitionFieldValue), m_SearchKey(other.m_SearchKey), m_PeopleRegistry(isForPersistence, other.m_PeopleRegistry), m_AttributesRegistry(isForPersistence, other.m_AttributesRegistry), - m_Population(other.m_Population), - m_UseNull(other.m_UseNull) { + m_Population(other.m_Population), m_UseNull(other.m_UseNull) { if (!isForPersistence) { LOG_ABORT(<< "This constructor only creates clones for persistence"); } - for (TBucketGathererPVecCItr i = other.m_Gatherers.begin(); i != other.m_Gatherers.end(); ++i) { + for (TBucketGathererPVecCItr i = other.m_Gatherers.begin(); + i != other.m_Gatherers.end(); ++i) { m_Gatherers.push_back((*i)->cloneForPersistence()); } if (other.m_SampleCounts) { @@ -345,18 +330,23 @@ const CDataGatherer::TStrVec& CDataGatherer::fieldsOfInterest() const { } std::size_t CDataGatherer::numberByFieldValues() const { - return this->isPopulation() ? this->numberActiveAttributes() : this->numberActivePeople(); + return this->isPopulation() ? this->numberActiveAttributes() + : this->numberActivePeople(); } std::size_t CDataGatherer::numberOverFieldValues() const { return this->isPopulation() ? this->numberActivePeople() : 0; } -bool CDataGatherer::processFields(const TStrCPtrVec& fieldValues, CEventData& result, CResourceMonitor& resourceMonitor) { +bool CDataGatherer::processFields(const TStrCPtrVec& fieldValues, + CEventData& result, + CResourceMonitor& resourceMonitor) { return m_Gatherers.front()->processFields(fieldValues, result, resourceMonitor); } -bool CDataGatherer::addArrival(const TStrCPtrVec& fieldValues, CEventData& data, CResourceMonitor& resourceMonitor) { +bool CDataGatherer::addArrival(const TStrCPtrVec& fieldValues, + CEventData& data, + CResourceMonitor& resourceMonitor) { // We process fields even if we are in the first partial bucket so that // we add enough extra memory to the resource monitor in order to control // the number of partitions created. @@ -449,7 +439,9 @@ void CDataGatherer::recyclePeople(const TSizeVec& peopleToRemove) { } m_PeopleRegistry.recycleNames(peopleToRemove, DEFAULT_PERSON_NAME); - core::CStatistics::instance().stat(stat_t::E_NumberPrunedItems).increment(peopleToRemove.size()); + core::CStatistics::instance() + .stat(stat_t::E_NumberPrunedItems) + .increment(peopleToRemove.size()); } void CDataGatherer::removePeople(std::size_t lowestPersonToRemove) { @@ -476,8 +468,11 @@ bool CDataGatherer::isPersonActive(std::size_t pid) const { return m_PeopleRegistry.isIdActive(pid); } -std::size_t CDataGatherer::addPerson(const std::string& person, CResourceMonitor& resourceMonitor, bool& addedPerson) { - return m_PeopleRegistry.addName(person, this->chooseBucketGatherer(0).currentBucketStartTime(), resourceMonitor, addedPerson); +std::size_t CDataGatherer::addPerson(const std::string& person, + CResourceMonitor& resourceMonitor, + bool& addedPerson) { + return m_PeopleRegistry.addName(person, this->chooseBucketGatherer(0).currentBucketStartTime(), + resourceMonitor, addedPerson); } std::size_t CDataGatherer::numberActiveAttributes() const { @@ -496,7 +491,8 @@ const std::string& CDataGatherer::attributeName(std::size_t cid) const { return this->attributeName(cid, DEFAULT_ATTRIBUTE_NAME); } -const std::string& CDataGatherer::attributeName(std::size_t cid, const std::string& fallback) const { +const std::string& CDataGatherer::attributeName(std::size_t cid, + const std::string& fallback) const { return m_AttributesRegistry.name(cid, fallback); } @@ -518,7 +514,9 @@ void CDataGatherer::recycleAttributes(const TSizeVec& attributesToRemove) { } m_AttributesRegistry.recycleNames(attributesToRemove, DEFAULT_ATTRIBUTE_NAME); - core::CStatistics::instance().stat(stat_t::E_NumberPrunedItems).increment(attributesToRemove.size()); + core::CStatistics::instance() + .stat(stat_t::E_NumberPrunedItems) + .increment(attributesToRemove.size()); } void CDataGatherer::removeAttributes(std::size_t lowestAttributeToRemove) { @@ -545,8 +543,12 @@ bool CDataGatherer::isAttributeActive(std::size_t cid) const { return m_AttributesRegistry.isIdActive(cid); } -std::size_t CDataGatherer::addAttribute(const std::string& attribute, CResourceMonitor& resourceMonitor, bool& addedAttribute) { - return m_AttributesRegistry.addName(attribute, this->chooseBucketGatherer(0).currentBucketStartTime(), resourceMonitor, addedAttribute); +std::size_t CDataGatherer::addAttribute(const std::string& attribute, + CResourceMonitor& resourceMonitor, + bool& addedAttribute) { + return m_AttributesRegistry.addName( + attribute, this->chooseBucketGatherer(0).currentBucketStartTime(), + resourceMonitor, addedAttribute); } double CDataGatherer::sampleCount(std::size_t id) const { @@ -586,7 +588,8 @@ core_t::TTime CDataGatherer::currentBucketStartTime() const { void CDataGatherer::currentBucketStartTime(core_t::TTime bucketStart) { m_Gatherers[0]->currentBucketStartTime(bucketStart); if (m_Gatherers.size() > 1) { - m_Gatherers[1]->currentBucketStartTime(bucketStart - (m_Gatherers[1]->bucketLength() / 2)); + m_Gatherers[1]->currentBucketStartTime( + bucketStart - (m_Gatherers[1]->bucketLength() / 2)); } } @@ -616,7 +619,8 @@ const CDataGatherer::TSizeSizePrUInt64UMap& CDataGatherer::bucketCounts(core_t:: return this->chooseBucketGatherer(time).bucketCounts(time); } -const CDataGatherer::TSizeSizePrStoredStringPtrPrUInt64UMapVec& CDataGatherer::influencerCounts(core_t::TTime time) const { +const CDataGatherer::TSizeSizePrStoredStringPtrPrUInt64UMapVec& +CDataGatherer::influencerCounts(core_t::TTime time) const { return this->chooseBucketGatherer(time).influencerCounts(time); } @@ -700,14 +704,20 @@ void CDataGatherer::acceptPersistInserter(core::CStatePersistInserter& inserter) for (std::size_t i = 0u; i < m_Features.size(); ++i) { inserter.insertValue(FEATURE_TAG, static_cast(m_Features[i])); } - inserter.insertLevel(PEOPLE_REGISTRY_TAG, boost::bind(&CDynamicStringIdRegistry::acceptPersistInserter, m_PeopleRegistry, _1)); - inserter.insertLevel(ATTRIBUTES_REGISTRY_TAG, boost::bind(&CDynamicStringIdRegistry::acceptPersistInserter, m_AttributesRegistry, _1)); + inserter.insertLevel(PEOPLE_REGISTRY_TAG, boost::bind(&CDynamicStringIdRegistry::acceptPersistInserter, + m_PeopleRegistry, _1)); + inserter.insertLevel(ATTRIBUTES_REGISTRY_TAG, + boost::bind(&CDynamicStringIdRegistry::acceptPersistInserter, + m_AttributesRegistry, _1)); if (m_SampleCounts) { - inserter.insertLevel(SAMPLE_COUNTS_TAG, boost::bind(&CSampleCounts::acceptPersistInserter, m_SampleCounts.get(), _1)); + inserter.insertLevel(SAMPLE_COUNTS_TAG, + boost::bind(&CSampleCounts::acceptPersistInserter, + m_SampleCounts.get(), _1)); } - inserter.insertLevel(BUCKET_GATHERER_TAG, boost::bind(&CDataGatherer::persistBucketGatherers, this, _1)); + inserter.insertLevel(BUCKET_GATHERER_TAG, + boost::bind(&CDataGatherer::persistBucketGatherers, this, _1)); } bool CDataGatherer::determineMetricCategory(TMetricCategoryVec& fieldMetricCategories) const { @@ -724,7 +734,8 @@ bool CDataGatherer::determineMetricCategory(TMetricCategoryVec& fieldMetricCateg model_t::EMetricCategory result; if (model_t::metricCategory(m_Features.front(), result) == false) { - LOG_ERROR(<< "Unable to map feature " << model_t::print(m_Features.front()) << " to a metric category"); + LOG_ERROR(<< "Unable to map feature " << model_t::print(m_Features.front()) + << " to a metric category"); return false; } @@ -733,7 +744,9 @@ bool CDataGatherer::determineMetricCategory(TMetricCategoryVec& fieldMetricCateg return true; } -bool CDataGatherer::extractCountFromField(const std::string& fieldName, const std::string* fieldValue, std::size_t& count) const { +bool CDataGatherer::extractCountFromField(const std::string& fieldName, + const std::string* fieldValue, + std::size_t& count) const { if (fieldValue == nullptr) { // Treat not present as explicit null count = EXPLICIT_NULL_SUMMARY_COUNT; @@ -758,7 +771,9 @@ bool CDataGatherer::extractCountFromField(const std::string& fieldName, const st return count > 0; } -bool CDataGatherer::extractMetricFromField(const std::string& fieldName, std::string fieldValue, TDouble1Vec& result) const { +bool CDataGatherer::extractMetricFromField(const std::string& fieldName, + std::string fieldValue, + TDouble1Vec& result) const { result.clear(); core::CStringUtils::trimWhitespace(fieldValue); @@ -777,7 +792,8 @@ bool CDataGatherer::extractMetricFromField(const std::string& fieldName, std::st // Avoid a string duplication in the (common) case of only one value bool convertedOk = (first == 0 && last == std::string::npos) ? core::CStringUtils::stringToType(fieldValue, value) - : core::CStringUtils::stringToType(fieldValue.substr(first, last - first), value); + : core::CStringUtils::stringToType( + fieldValue.substr(first, last - first), value); if (!convertedOk) { LOG_ERROR(<< "Unable to extract " << fieldName << " from " << fieldValue); result.clear(); @@ -833,7 +849,8 @@ bool CDataGatherer::acceptRestoreTraverser(const std::string& summaryCountFieldN const std::string& name = traverser.name(); if (name == FEATURE_TAG) { int feature(-1); - if (core::CStringUtils::stringToType(traverser.value(), feature) == false || feature < 0) { + if (core::CStringUtils::stringToType(traverser.value(), feature) == false || + feature < 0) { LOG_ERROR(<< "Invalid feature in " << traverser.value()); return false; } @@ -841,22 +858,21 @@ bool CDataGatherer::acceptRestoreTraverser(const std::string& summaryCountFieldN continue; } RESTORE(PEOPLE_REGISTRY_TAG, - traverser.traverseSubLevel(boost::bind(&CDynamicStringIdRegistry::acceptRestoreTraverser, &m_PeopleRegistry, _1))) + traverser.traverseSubLevel(boost::bind(&CDynamicStringIdRegistry::acceptRestoreTraverser, + &m_PeopleRegistry, _1))) RESTORE(ATTRIBUTES_REGISTRY_TAG, - traverser.traverseSubLevel(boost::bind(&CDynamicStringIdRegistry::acceptRestoreTraverser, &m_AttributesRegistry, _1))) - RESTORE_SETUP_TEARDOWN(SAMPLE_COUNTS_TAG, - m_SampleCounts.reset(new CSampleCounts(0)), - traverser.traverseSubLevel(boost::bind(&CSampleCounts::acceptRestoreTraverser, m_SampleCounts.get(), _1)), - /**/) + traverser.traverseSubLevel(boost::bind(&CDynamicStringIdRegistry::acceptRestoreTraverser, + &m_AttributesRegistry, _1))) + RESTORE_SETUP_TEARDOWN( + SAMPLE_COUNTS_TAG, m_SampleCounts.reset(new CSampleCounts(0)), + traverser.traverseSubLevel(boost::bind(&CSampleCounts::acceptRestoreTraverser, + m_SampleCounts.get(), _1)), + /**/) RESTORE(BUCKET_GATHERER_TAG, - traverser.traverseSubLevel(boost::bind(&CDataGatherer::restoreBucketGatherer, - this, - boost::cref(summaryCountFieldName), - boost::cref(personFieldName), - boost::cref(attributeFieldName), - boost::cref(valueFieldName), - boost::cref(influenceFieldNames), - _1))) + traverser.traverseSubLevel(boost::bind( + &CDataGatherer::restoreBucketGatherer, this, boost::cref(summaryCountFieldName), + boost::cref(personFieldName), boost::cref(attributeFieldName), + boost::cref(valueFieldName), boost::cref(influenceFieldNames), _1))) } while (traverser.next()); return true; @@ -872,7 +888,8 @@ bool CDataGatherer::restoreBucketGatherer(const std::string& summaryCountFieldNa const std::string& name = traverser.name(); if (name == CBucketGatherer::EVENTRATE_BUCKET_GATHERER_TAG) { CEventRateBucketGatherer* gatherer = new CEventRateBucketGatherer( - *this, summaryCountFieldName, personFieldName, attributeFieldName, valueFieldName, influenceFieldNames, traverser); + *this, summaryCountFieldName, personFieldName, attributeFieldName, + valueFieldName, influenceFieldNames, traverser); if (gatherer == nullptr) { LOG_ERROR(<< "Failed to create gatherer"); @@ -881,7 +898,8 @@ bool CDataGatherer::restoreBucketGatherer(const std::string& summaryCountFieldNa m_Gatherers.push_back(gatherer); } else if (name == CBucketGatherer::METRIC_BUCKET_GATHERER_TAG) { CMetricBucketGatherer* gatherer = new CMetricBucketGatherer( - *this, summaryCountFieldName, personFieldName, attributeFieldName, valueFieldName, influenceFieldNames, traverser); + *this, summaryCountFieldName, personFieldName, attributeFieldName, + valueFieldName, influenceFieldNames, traverser); if (gatherer == nullptr) { LOG_ERROR(<< "Failed to create gatherer"); return false; @@ -898,11 +916,15 @@ void CDataGatherer::persistBucketGatherers(core::CStatePersistInserter& inserter const std::string& tag = (*i)->persistenceTag(); if (tag == CBucketGatherer::EVENTRATE_BUCKET_GATHERER_TAG) { - CEventRateBucketGatherer* const gatherer = dynamic_cast(*i); - inserter.insertLevel(tag, boost::bind(&CEventRateBucketGatherer::acceptPersistInserter, boost::cref(gatherer), _1)); + CEventRateBucketGatherer* const gatherer = + dynamic_cast(*i); + inserter.insertLevel(tag, boost::bind(&CEventRateBucketGatherer::acceptPersistInserter, + boost::cref(gatherer), _1)); } else if (tag == CBucketGatherer::METRIC_BUCKET_GATHERER_TAG) { - CMetricBucketGatherer* const gatherer = dynamic_cast(*i); - inserter.insertLevel(tag, boost::bind(&CMetricBucketGatherer::acceptPersistInserter, boost::cref(gatherer), _1)); + CMetricBucketGatherer* const gatherer = + dynamic_cast(*i); + inserter.insertLevel(tag, boost::bind(&CMetricBucketGatherer::acceptPersistInserter, + boost::cref(gatherer), _1)); } } } @@ -920,14 +942,16 @@ void CDataGatherer::createBucketGatherer(model_t::EAnalysisCategory gathererType case model_t::E_PopulationEventRate: case model_t::E_PeersEventRate: m_Gatherers.push_back(new CEventRateBucketGatherer( - *this, summaryCountFieldName, personFieldName, attributeFieldName, valueFieldName, influenceFieldNames, startTime)); + *this, summaryCountFieldName, personFieldName, attributeFieldName, + valueFieldName, influenceFieldNames, startTime)); break; case model_t::E_Metric: case model_t::E_PopulationMetric: case model_t::E_PeersMetric: m_SampleCounts.reset(new CSampleCounts(sampleCountOverride)); m_Gatherers.push_back(new CMetricBucketGatherer( - *this, summaryCountFieldName, personFieldName, attributeFieldName, valueFieldName, influenceFieldNames, startTime)); + *this, summaryCountFieldName, personFieldName, attributeFieldName, + valueFieldName, influenceFieldNames, startTime)); break; } } diff --git a/lib/model/CDetectionRule.cc b/lib/model/CDetectionRule.cc index 35cbcc09cf..a0b34789ec 100644 --- a/lib/model/CDetectionRule.cc +++ b/lib/model/CDetectionRule.cc @@ -13,7 +13,8 @@ namespace ml { namespace model { CDetectionRule::CDetectionRule() - : m_Action(E_FilterResults), m_Conditions(), m_ConditionsConnective(E_Or), m_TargetFieldName(), m_TargetFieldValue() { + : m_Action(E_FilterResults), m_Conditions(), m_ConditionsConnective(E_Or), + m_TargetFieldName(), m_TargetFieldValue() { m_Conditions.reserve(1); } @@ -53,7 +54,8 @@ bool CDetectionRule::apply(ERuleAction action, } for (std::size_t i = 0; i < m_Conditions.size(); ++i) { - bool conditionResult = m_Conditions[i].test(model, feature, resultType, !m_TargetFieldName.empty(), pid, cid, time); + bool conditionResult = m_Conditions[i].test( + model, feature, resultType, !m_TargetFieldName.empty(), pid, cid, time); switch (m_ConditionsConnective) { case E_Or: if (conditionResult == true) { @@ -77,7 +79,9 @@ bool CDetectionRule::apply(ERuleAction action, return false; } -bool CDetectionRule::isInScope(const CAnomalyDetectorModel& model, std::size_t pid, std::size_t cid) const { +bool CDetectionRule::isInScope(const CAnomalyDetectorModel& model, + std::size_t pid, + std::size_t cid) const { if (m_TargetFieldName.empty() || m_TargetFieldValue.empty()) { return true; } diff --git a/lib/model/CDetectorEqualizer.cc b/lib/model/CDetectorEqualizer.cc index 2ee5c60dfd..eec018412b 100644 --- a/lib/model/CDetectorEqualizer.cc +++ b/lib/model/CDetectorEqualizer.cc @@ -31,7 +31,8 @@ void CDetectorEqualizer::acceptPersistInserter(core::CStatePersistInserter& inse } for (const auto& sketch : m_Sketches) { inserter.insertValue(DETECTOR_TAG, sketch.first); - inserter.insertLevel(SKETCH_TAG, boost::bind(&maths::CQuantileSketch::acceptPersistInserter, boost::cref(sketch.second), _1)); + inserter.insertLevel(SKETCH_TAG, boost::bind(&maths::CQuantileSketch::acceptPersistInserter, + boost::cref(sketch.second), _1)); } } @@ -39,16 +40,19 @@ bool CDetectorEqualizer::acceptRestoreTraverser(core::CStateRestoreTraverser& tr boost::optional detector; do { const std::string& name = traverser.name(); - RESTORE_SETUP_TEARDOWN(DETECTOR_TAG, detector.reset(0), core::CStringUtils::stringToType(traverser.value(), *detector), + RESTORE_SETUP_TEARDOWN(DETECTOR_TAG, detector.reset(0), + core::CStringUtils::stringToType(traverser.value(), *detector), /**/) if (name == SKETCH_TAG) { if (!detector) { LOG_ERROR(<< "Expected the detector label first"); return false; } - m_Sketches.emplace_back(*detector, maths::CQuantileSketch(SKETCH_INTERPOLATION, SKETCH_SIZE)); + m_Sketches.emplace_back( + *detector, maths::CQuantileSketch(SKETCH_INTERPOLATION, SKETCH_SIZE)); if (traverser.traverseSubLevel( - boost::bind(&maths::CQuantileSketch::acceptRestoreTraverser, boost::ref(m_Sketches.back().second), _1)) == false) { + boost::bind(&maths::CQuantileSketch::acceptRestoreTraverser, + boost::ref(m_Sketches.back().second), _1)) == false) { LOG_ERROR(<< "Failed to restore SKETCH_TAG, got " << traverser.value()); m_Sketches.pop_back(); return false; @@ -101,7 +105,8 @@ double CDetectorEqualizer::correct(int detector, double probability) { LOG_TRACE(<< "quantiles = " << core::CContainerPrinter::print(logps)); std::size_t n = logps.size(); - double logpc = n % 2 == 0 ? (logps[n / 2 - 1] + logps[n / 2]) / 2.0 : logps[n / 2]; + double logpc = n % 2 == 0 ? (logps[n / 2 - 1] + logps[n / 2]) / 2.0 + : logps[n / 2]; double alpha = maths::CTools::truncate((logp - A) / (B - A), 0.0, 1.0); LOG_TRACE(<< "Corrected log(p) = " << -alpha * logpc - (1.0 - alpha) * logp); @@ -130,14 +135,17 @@ double CDetectorEqualizer::largestProbabilityToCorrect() { } maths::CQuantileSketch& CDetectorEqualizer::sketch(int detector) { - auto i = std::lower_bound(m_Sketches.begin(), m_Sketches.end(), detector, maths::COrderings::SFirstLess()); + auto i = std::lower_bound(m_Sketches.begin(), m_Sketches.end(), detector, + maths::COrderings::SFirstLess()); if (i == m_Sketches.end() || i->first != detector) { - i = m_Sketches.insert(i, {detector, maths::CQuantileSketch(SKETCH_INTERPOLATION, SKETCH_SIZE)}); + i = m_Sketches.insert( + i, {detector, maths::CQuantileSketch(SKETCH_INTERPOLATION, SKETCH_SIZE)}); } return i->second; } -const maths::CQuantileSketch::EInterpolation CDetectorEqualizer::SKETCH_INTERPOLATION(maths::CQuantileSketch::E_Linear); +const maths::CQuantileSketch::EInterpolation + CDetectorEqualizer::SKETCH_INTERPOLATION(maths::CQuantileSketch::E_Linear); const std::size_t CDetectorEqualizer::SKETCH_SIZE(100); const double CDetectorEqualizer::MINIMUM_COUNT_FOR_CORRECTION(1.5); } diff --git a/lib/model/CDynamicStringIdRegistry.cc b/lib/model/CDynamicStringIdRegistry.cc index 6a30d69d1f..18d6fdbf2f 100644 --- a/lib/model/CDynamicStringIdRegistry.cc +++ b/lib/model/CDynamicStringIdRegistry.cc @@ -29,25 +29,24 @@ CDynamicStringIdRegistry::CDynamicStringIdRegistry(const std::string& nameType, stat_t::EStatTypes addedStat, stat_t::EStatTypes addNotAllowedStat, stat_t::EStatTypes recycledStat) - : m_NameType(nameType), m_AddedStat(addedStat), m_AddNotAllowedStat(addNotAllowedStat), m_RecycledStat(recycledStat), m_Uids(1) { + : m_NameType(nameType), m_AddedStat(addedStat), + m_AddNotAllowedStat(addNotAllowedStat), m_RecycledStat(recycledStat), m_Uids(1) { } -CDynamicStringIdRegistry::CDynamicStringIdRegistry(bool isForPersistence, const CDynamicStringIdRegistry& other) - : m_NameType(other.m_NameType), - m_AddedStat(other.m_AddedStat), +CDynamicStringIdRegistry::CDynamicStringIdRegistry(bool isForPersistence, + const CDynamicStringIdRegistry& other) + : m_NameType(other.m_NameType), m_AddedStat(other.m_AddedStat), m_AddNotAllowedStat(other.m_AddNotAllowedStat), - m_RecycledStat(other.m_RecycledStat), - m_Dictionary(other.m_Dictionary), - m_Uids(other.m_Uids), - m_Names(other.m_Names), - m_FreeUids(other.m_FreeUids), - m_RecycledUids(other.m_RecycledUids) { + m_RecycledStat(other.m_RecycledStat), m_Dictionary(other.m_Dictionary), + m_Uids(other.m_Uids), m_Names(other.m_Names), + m_FreeUids(other.m_FreeUids), m_RecycledUids(other.m_RecycledUids) { if (!isForPersistence) { LOG_ABORT(<< "This constructor only creates clones for persistence"); } } -const std::string& CDynamicStringIdRegistry::name(std::size_t id, const std::string& fallback) const { +const std::string& CDynamicStringIdRegistry::name(std::size_t id, + const std::string& fallback) const { return id >= m_Names.size() ? fallback : *m_Names[id]; } @@ -84,11 +83,15 @@ std::size_t CDynamicStringIdRegistry::numberNames() const { } bool CDynamicStringIdRegistry::isIdActive(std::size_t id) const { - return id < m_Names.size() && !std::binary_search(m_FreeUids.begin(), m_FreeUids.end(), id, std::greater()); + return id < m_Names.size() && + !std::binary_search(m_FreeUids.begin(), m_FreeUids.end(), id, + std::greater()); } -std::size_t -CDynamicStringIdRegistry::addName(const std::string& name, core_t::TTime time, CResourceMonitor& resourceMonitor, bool& addedPerson) { +std::size_t CDynamicStringIdRegistry::addName(const std::string& name, + core_t::TTime time, + CResourceMonitor& resourceMonitor, + bool& addedPerson) { // Get the identifier or create one if this is the // first time we've seen them. (Use emplace to avoid copying // the string if it is already in the collection.) @@ -143,7 +146,8 @@ void CDynamicStringIdRegistry::removeNames(std::size_t lowestNameToRemove) { m_Names.erase(m_Names.begin() + lowestNameToRemove, m_Names.end()); } -void CDynamicStringIdRegistry::recycleNames(const TSizeVec& namesToRemove, const std::string& defaultName) { +void CDynamicStringIdRegistry::recycleNames(const TSizeVec& namesToRemove, + const std::string& defaultName) { for (std::size_t i = 0u; i < namesToRemove.size(); ++i) { std::size_t id = namesToRemove[i]; if (id >= m_Names.size()) { @@ -156,7 +160,8 @@ void CDynamicStringIdRegistry::recycleNames(const TSizeVec& namesToRemove, const m_Names[id] = CStringStore::names().get(defaultName); } std::sort(m_FreeUids.begin(), m_FreeUids.end(), std::greater()); - m_FreeUids.erase(std::unique(m_FreeUids.begin(), m_FreeUids.end()), m_FreeUids.end()); + m_FreeUids.erase(std::unique(m_FreeUids.begin(), m_FreeUids.end()), + m_FreeUids.end()); } CDynamicStringIdRegistry::TSizeVec& CDynamicStringIdRegistry::recycledIds() { @@ -168,7 +173,8 @@ bool CDynamicStringIdRegistry::checkInvariants() const { bool result = true; if (m_Uids.size() > m_Names.size()) { - LOG_ERROR(<< "Unexpected extra " << (m_Uids.size() - m_Names.size()) << " " << m_NameType << " uids"); + LOG_ERROR(<< "Unexpected extra " << (m_Uids.size() - m_Names.size()) + << " " << m_NameType << " uids"); result = false; } @@ -179,7 +185,8 @@ bool CDynamicStringIdRegistry::checkInvariants() const { result = false; } if (i->second > m_Names.size()) { - LOG_ERROR(<< m_NameType << " id " << i->second << " out of range [0, " << m_Names.size() << ")"); + LOG_ERROR(<< m_NameType << " id " << i->second + << " out of range [0, " << m_Names.size() << ")"); result = false; } } @@ -258,8 +265,10 @@ bool CDynamicStringIdRegistry::acceptRestoreTraverser(core::CStateRestoreTravers // reuse. We mustn't add these to the ID maps. for (std::size_t id = 0; id < m_Names.size(); ++id) { - if (std::binary_search(m_FreeUids.begin(), m_FreeUids.end(), id, std::greater())) { - LOG_TRACE(<< "Restore ignoring free " << m_NameType << " name " << *m_Names[id] << " = id " << id); + if (std::binary_search(m_FreeUids.begin(), m_FreeUids.end(), id, + std::greater())) { + LOG_TRACE(<< "Restore ignoring free " << m_NameType << " name " + << *m_Names[id] << " = id " << id); } else { m_Uids[m_Dictionary.word(*m_Names[id])] = id; } @@ -268,6 +277,7 @@ bool CDynamicStringIdRegistry::acceptRestoreTraverser(core::CStateRestoreTravers return true; } -const std::size_t CDynamicStringIdRegistry::INVALID_ID(std::numeric_limits::max()); +const std::size_t + CDynamicStringIdRegistry::INVALID_ID(std::numeric_limits::max()); } } diff --git a/lib/model/CEventData.cc b/lib/model/CEventData.cc index b929233f63..d9a3c92310 100644 --- a/lib/model/CEventData.cc +++ b/lib/model/CEventData.cc @@ -19,7 +19,8 @@ const CEventData::TDouble1VecArray DUMMY_ARRAY = CEventData::TDouble1VecArray(); const std::string DASH("-"); } -CEventData::CEventData() : m_Time(0), m_Pid(), m_Cids(), m_Values(), m_IsExplicitNull(false) { +CEventData::CEventData() + : m_Time(0), m_Pid(), m_Cids(), m_Values(), m_IsExplicitNull(false) { } void CEventData::swap(CEventData& other) { @@ -98,7 +99,8 @@ CEventData::TOptionalSize CEventData::personId() const { CEventData::TOptionalSize CEventData::attributeId() const { if (m_Cids.size() != 1) { - LOG_ERROR(<< "Call to attribute identifier ambiguous: " << core::CContainerPrinter::print(m_Cids)); + LOG_ERROR(<< "Call to attribute identifier ambiguous: " + << core::CContainerPrinter::print(m_Cids)); return TOptionalSize(); } return m_Cids[0]; @@ -129,8 +131,10 @@ CEventData::TOptionalSize CEventData::count() const { } std::string CEventData::print() const { - return core::CStringUtils::typeToString(m_Time) + ' ' + (m_Pid ? core::CStringUtils::typeToString(*m_Pid) : DASH) + ' ' + - core::CContainerPrinter::print(m_Cids) + ' ' + core::CContainerPrinter::print(m_Values); + return core::CStringUtils::typeToString(m_Time) + ' ' + + (m_Pid ? core::CStringUtils::typeToString(*m_Pid) : DASH) + ' ' + + core::CContainerPrinter::print(m_Cids) + ' ' + + core::CContainerPrinter::print(m_Values); } CEventData::TOptionalSize CEventData::attributeId(std::size_t i) const { diff --git a/lib/model/CEventRateBucketGatherer.cc b/lib/model/CEventRateBucketGatherer.cc index 30da65e587..6ff2e28cb4 100644 --- a/lib/model/CEventRateBucketGatherer.cc +++ b/lib/model/CEventRateBucketGatherer.cc @@ -79,28 +79,34 @@ const std::string UNIQUE_STRINGS_TAG("b"); //! \brief Manages persistence of time-of-day feature data maps. struct STimesBucketSerializer { - void operator()(const TSizeSizePrMeanAccumulatorUMap& times, core::CStatePersistInserter& inserter) { + void operator()(const TSizeSizePrMeanAccumulatorUMap& times, + core::CStatePersistInserter& inserter) { std::vector ordered; ordered.reserve(times.size()); for (auto i = times.begin(); i != times.end(); ++i) { ordered.push_back(i); } - std::sort(ordered.begin(), ordered.end(), core::CFunctional::SDereference()); + std::sort(ordered.begin(), ordered.end(), + core::CFunctional::SDereference()); for (std::size_t i = 0u; i < ordered.size(); ++i) { inserter.insertValue(PERSON_TAG, CDataGatherer::extractPersonId(*ordered[i])); - inserter.insertValue(ATTRIBUTE_TAG, CDataGatherer::extractAttributeId(*ordered[i])); - inserter.insertValue(MEAN_TIMES_TAG, CDataGatherer::extractData(*ordered[i]).toDelimited()); + inserter.insertValue(ATTRIBUTE_TAG, + CDataGatherer::extractAttributeId(*ordered[i])); + inserter.insertValue(MEAN_TIMES_TAG, + CDataGatherer::extractData(*ordered[i]).toDelimited()); } } - bool operator()(TSizeSizePrMeanAccumulatorUMap& times, core::CStateRestoreTraverser& traverser) const { + bool operator()(TSizeSizePrMeanAccumulatorUMap& times, + core::CStateRestoreTraverser& traverser) const { std::size_t pid = 0; std::size_t cid = 0; do { const std::string& name = traverser.name(); RESTORE_BUILT_IN(PERSON_TAG, pid) RESTORE_BUILT_IN(ATTRIBUTE_TAG, cid) - RESTORE(MEAN_TIMES_TAG, times[TSizeSizePr(pid, cid)].fromDelimited(traverser.value())) + RESTORE(MEAN_TIMES_TAG, + times[TSizeSizePr(pid, cid)].fromDelimited(traverser.value())) } while (traverser.next()); return true; @@ -109,19 +115,23 @@ struct STimesBucketSerializer { //! \brief Manages persistence of unique string feature data maps. struct SStrDataBucketSerializer { - void operator()(const TSizeSizePrStrDataUMap& strings, core::CStatePersistInserter& inserter) { + void operator()(const TSizeSizePrStrDataUMap& strings, + core::CStatePersistInserter& inserter) { std::vector ordered; ordered.reserve(strings.size()); for (auto i = strings.begin(); i != strings.end(); ++i) { ordered.push_back(i); } - std::sort(ordered.begin(), ordered.end(), core::CFunctional::SDereference()); + std::sort(ordered.begin(), ordered.end(), + core::CFunctional::SDereference()); for (std::size_t i = 0u; i != ordered.size(); ++i) { inserter.insertValue(PERSON_TAG, CDataGatherer::extractPersonId(*ordered[i])); - inserter.insertValue(ATTRIBUTE_TAG, CDataGatherer::extractAttributeId(*ordered[i])); + inserter.insertValue(ATTRIBUTE_TAG, + CDataGatherer::extractAttributeId(*ordered[i])); inserter.insertLevel( STRING_ITEM_TAG, - boost::bind(&CUniqueStringFeatureData::acceptPersistInserter, boost::cref(CDataGatherer::extractData(*ordered[i])), _1)); + boost::bind(&CUniqueStringFeatureData::acceptPersistInserter, + boost::cref(CDataGatherer::extractData(*ordered[i])), _1)); } } bool operator()(TSizeSizePrStrDataUMap& map, core::CStateRestoreTraverser& traverser) const { @@ -131,9 +141,9 @@ struct SStrDataBucketSerializer { const std::string& name = traverser.name(); RESTORE_BUILT_IN(PERSON_TAG, pid) RESTORE_BUILT_IN(ATTRIBUTE_TAG, cid) - RESTORE(STRING_ITEM_TAG, - traverser.traverseSubLevel( - boost::bind(&CUniqueStringFeatureData::acceptRestoreTraverser, boost::ref(map[TSizeSizePr(pid, cid)]), _1))) + RESTORE(STRING_ITEM_TAG, traverser.traverseSubLevel(boost::bind( + &CUniqueStringFeatureData::acceptRestoreTraverser, + boost::ref(map[TSizeSizePr(pid, cid)]), _1))) } while (traverser.next()); return true; @@ -141,7 +151,8 @@ struct SStrDataBucketSerializer { }; //! Serialize \p data. -void persistAttributePeopleData(const TSizeUSetVec& data, core::CStatePersistInserter& inserter) { +void persistAttributePeopleData(const TSizeUSetVec& data, + core::CStatePersistInserter& inserter) { // Persist the vector in reverse order, because it means we'll // find out the correct size more efficiently on restore. std::size_t index = data.size(); @@ -161,33 +172,42 @@ void persistAttributePeopleData(const TSizeUSetVec& data, core::CStatePersistIns } //! Serialize \p featureData. -void persistFeatureData(const TCategoryAnyMap& featureData, core::CStatePersistInserter& inserter) { +void persistFeatureData(const TCategoryAnyMap& featureData, + core::CStatePersistInserter& inserter) { for (const auto& data_ : featureData) { model_t::EEventRateCategory category = data_.first; const boost::any& data = data_.second; try { switch (category) { case model_t::E_DiurnalTimes: - inserter.insertLevel(TIMES_OF_DAY_TAG, - boost::bind(TSizeSizePrMeanAccumulatorUMapQueue::CSerializer(), - boost::cref(boost::any_cast(data)), - _1)); + inserter.insertLevel( + TIMES_OF_DAY_TAG, + boost::bind( + TSizeSizePrMeanAccumulatorUMapQueue::CSerializer(), + boost::cref(boost::any_cast(data)), + _1)); break; case model_t::E_MeanArrivalTimes: // TODO break; case model_t::E_AttributePeople: - inserter.insertLevel(ATTRIBUTE_PEOPLE_TAG, - boost::bind(&persistAttributePeopleData, boost::cref(boost::any_cast(data)), _1)); + inserter.insertLevel( + ATTRIBUTE_PEOPLE_TAG, + boost::bind(&persistAttributePeopleData, + boost::cref(boost::any_cast(data)), _1)); break; case model_t::E_UniqueValues: - inserter.insertLevel(UNIQUE_VALUES_TAG, - boost::bind(TSizeSizePrStrDataUMapQueue::CSerializer(), - boost::cref(boost::any_cast(data)), - _1)); + inserter.insertLevel( + UNIQUE_VALUES_TAG, + boost::bind( + TSizeSizePrStrDataUMapQueue::CSerializer(), + boost::cref(boost::any_cast(data)), + _1)); break; } - } catch (const std::exception& e) { LOG_ERROR(<< "Failed to serialize data for " << category << ": " << e.what()); } + } catch (const std::exception& e) { + LOG_ERROR(<< "Failed to serialize data for " << category << ": " << e.what()); + } } } @@ -209,7 +229,8 @@ bool restoreAttributePeopleData(core::CStateRestoreTraverser& traverser, TSizeUS } } else if (name == PERSON_TAG) { if (!seenCid) { - LOG_ERROR(<< "Incorrect format - person ID before attribute ID in " << traverser.value()); + LOG_ERROR(<< "Incorrect format - person ID before attribute ID in " + << traverser.value()); return false; } std::size_t pid = 0; @@ -232,9 +253,11 @@ bool restoreFeatureData(core::CStateRestoreTraverser& traverser, core_t::TTime currentBucketStartTime) { const std::string& name = traverser.name(); if (name == ATTRIBUTE_PEOPLE_TAG) { - TSizeUSetVec* data{ - boost::unsafe_any_cast(&featureData.emplace(model_t::E_AttributePeople, TSizeUSetVec()).first->second)}; - if (traverser.traverseSubLevel(boost::bind(&restoreAttributePeopleData, _1, boost::ref(*data))) == false) { + TSizeUSetVec* data{boost::unsafe_any_cast( + &featureData.emplace(model_t::E_AttributePeople, TSizeUSetVec()) + .first->second)}; + if (traverser.traverseSubLevel(boost::bind(&restoreAttributePeopleData, _1, + boost::ref(*data))) == false) { LOG_ERROR(<< "Invalid attribute/people mapping in " << traverser.value()); return false; } @@ -245,11 +268,13 @@ bool restoreFeatureData(core::CStateRestoreTraverser& traverser, TSizeSizePrStrDataUMapQueue* data{boost::unsafe_any_cast( &featureData .emplace(model_t::E_UniqueValues, - TSizeSizePrStrDataUMapQueue(latencyBuckets, bucketLength, currentBucketStartTime, TSizeSizePrStrDataUMap(1))) + TSizeSizePrStrDataUMapQueue(latencyBuckets, bucketLength, currentBucketStartTime, + TSizeSizePrStrDataUMap(1))) .first->second)}; if (traverser.traverseSubLevel(boost::bind( - TSizeSizePrStrDataUMapQueue::CSerializer(TSizeSizePrStrDataUMap(1)), boost::ref(*data), _1)) == - false) { + TSizeSizePrStrDataUMapQueue::CSerializer( + TSizeSizePrStrDataUMap(1)), + boost::ref(*data), _1)) == false) { LOG_ERROR(<< "Invalid unique value mapping in " << traverser.value()); return false; } @@ -263,7 +288,8 @@ bool restoreFeatureData(core::CStateRestoreTraverser& traverser, TSizeSizePrMeanAccumulatorUMapQueue(latencyBuckets, bucketLength, currentBucketStartTime)) .first->second)}; if (traverser.traverseSubLevel(boost::bind( - TSizeSizePrMeanAccumulatorUMapQueue::CSerializer(), boost::ref(*data), _1)) == false) { + TSizeSizePrMeanAccumulatorUMapQueue::CSerializer(), + boost::ref(*data), _1)) == false) { LOG_ERROR(<< "Invalid times mapping in " << traverser.value()); return false; } @@ -300,7 +326,8 @@ void apply(ITR begin, ITR end, const F& f) { try { switch (category) { case model_t::E_DiurnalTimes: { - f(boost::any_cast::TRef>(itr->second)); + f(boost::any_cast::TRef>( + itr->second)); break; } case model_t::E_MeanArrivalTimes: { @@ -312,10 +339,13 @@ void apply(ITR begin, ITR end, const F& f) { break; } case model_t::E_UniqueValues: - f(boost::any_cast::TRef>(itr->second)); + f(boost::any_cast::TRef>( + itr->second)); break; } - } catch (const std::exception& e) { LOG_ERROR(<< "Apply failed for " << category << ": " << e.what()); } + } catch (const std::exception& e) { + LOG_ERROR(<< "Apply failed for " << category << ": " << e.what()); + } } } @@ -327,7 +357,9 @@ void apply(T& featureData, const F& f) { //! \brief Removes people from the feature data. struct SRemovePeople { - void operator()(TSizeUSetVec& attributePeople, std::size_t lowestPersonToRemove, std::size_t endPeople) const { + void operator()(TSizeUSetVec& attributePeople, + std::size_t lowestPersonToRemove, + std::size_t endPeople) const { for (std::size_t cid = 0u; cid < attributePeople.size(); ++cid) { for (std::size_t pid = lowestPersonToRemove; pid < endPeople; ++pid) { attributePeople[cid].erase(pid); @@ -341,11 +373,13 @@ struct SRemovePeople { } } } - void - operator()(TSizeSizePrStrDataUMapQueue& peopleAttributeUniqueValues, std::size_t lowestPersonToRemove, std::size_t endPeople) const { + void operator()(TSizeSizePrStrDataUMapQueue& peopleAttributeUniqueValues, + std::size_t lowestPersonToRemove, + std::size_t endPeople) const { for (auto& bucket : peopleAttributeUniqueValues) { for (auto i = bucket.begin(); i != bucket.end(); /**/) { - if (CDataGatherer::extractPersonId(*i) >= lowestPersonToRemove && CDataGatherer::extractPersonId(*i) < endPeople) { + if (CDataGatherer::extractPersonId(*i) >= lowestPersonToRemove && + CDataGatherer::extractPersonId(*i) < endPeople) { i = bucket.erase(i); } else { ++i; @@ -353,13 +387,18 @@ struct SRemovePeople { } } } - void operator()(TSizeSizePrStrDataUMapQueue& peopleAttributeUniqueValues, const TSizeVec& peopleToRemove) const { - CBucketGatherer::remove(peopleToRemove, CDataGatherer::SExtractPersonId(), peopleAttributeUniqueValues); + void operator()(TSizeSizePrStrDataUMapQueue& peopleAttributeUniqueValues, + const TSizeVec& peopleToRemove) const { + CBucketGatherer::remove(peopleToRemove, CDataGatherer::SExtractPersonId(), + peopleAttributeUniqueValues); } - void operator()(TSizeSizePrMeanAccumulatorUMapQueue& arrivalTimes, std::size_t lowestPersonToRemove, std::size_t endPeople) const { + void operator()(TSizeSizePrMeanAccumulatorUMapQueue& arrivalTimes, + std::size_t lowestPersonToRemove, + std::size_t endPeople) const { for (auto& bucket : arrivalTimes) { for (auto i = bucket.begin(); i != bucket.end(); /**/) { - if (CDataGatherer::extractPersonId(*i) >= lowestPersonToRemove && CDataGatherer::extractPersonId(*i) < endPeople) { + if (CDataGatherer::extractPersonId(*i) >= lowestPersonToRemove && + CDataGatherer::extractPersonId(*i) < endPeople) { i = bucket.erase(i); } else { ++i; @@ -367,7 +406,8 @@ struct SRemovePeople { } } } - void operator()(TSizeSizePrMeanAccumulatorUMapQueue& arrivalTimes, const TSizeVec& peopleToRemove) const { + void operator()(TSizeSizePrMeanAccumulatorUMapQueue& arrivalTimes, + const TSizeVec& peopleToRemove) const { CBucketGatherer::remove(peopleToRemove, CDataGatherer::SExtractPersonId(), arrivalTimes); } }; @@ -376,7 +416,8 @@ struct SRemovePeople { struct SRemoveAttributes { void operator()(TSizeUSetVec& attributePeople, std::size_t lowestAttributeToRemove) const { if (lowestAttributeToRemove < attributePeople.size()) { - attributePeople.erase(attributePeople.begin() + lowestAttributeToRemove, attributePeople.end()); + attributePeople.erase(attributePeople.begin() + lowestAttributeToRemove, + attributePeople.end()); } } void operator()(TSizeUSetVec& attributePeople, const TSizeVec& attributesToRemove) const { @@ -384,7 +425,8 @@ struct SRemoveAttributes { attributePeople[attributesToRemove[i]].clear(); } } - void operator()(TSizeSizePrStrDataUMapQueue& peopleAttributeUniqueValues, std::size_t lowestAttributeToRemove) const { + void operator()(TSizeSizePrStrDataUMapQueue& peopleAttributeUniqueValues, + std::size_t lowestAttributeToRemove) const { for (auto& bucket : peopleAttributeUniqueValues) { for (auto i = bucket.begin(); i != bucket.end(); /**/) { if (CDataGatherer::extractAttributeId(*i) >= lowestAttributeToRemove) { @@ -395,10 +437,13 @@ struct SRemoveAttributes { } } } - void operator()(TSizeSizePrStrDataUMapQueue& peopleAttributeUniqueValues, const TSizeVec& attributesToRemove) const { - CBucketGatherer::remove(attributesToRemove, CDataGatherer::SExtractAttributeId(), peopleAttributeUniqueValues); + void operator()(TSizeSizePrStrDataUMapQueue& peopleAttributeUniqueValues, + const TSizeVec& attributesToRemove) const { + CBucketGatherer::remove(attributesToRemove, CDataGatherer::SExtractAttributeId(), + peopleAttributeUniqueValues); } - void operator()(TSizeSizePrMeanAccumulatorUMapQueue& arrivalTimes, std::size_t lowestAttributeToRemove) const { + void operator()(TSizeSizePrMeanAccumulatorUMapQueue& arrivalTimes, + std::size_t lowestAttributeToRemove) const { for (auto& bucket : arrivalTimes) { for (auto i = bucket.begin(); i != bucket.end(); /**/) { if (CDataGatherer::extractAttributeId(*i) >= lowestAttributeToRemove) { @@ -409,14 +454,18 @@ struct SRemoveAttributes { } } } - void operator()(TSizeSizePrMeanAccumulatorUMapQueue& arrivalTimes, const TSizeVec& attributesToRemove) const { - CBucketGatherer::remove(attributesToRemove, CDataGatherer::SExtractAttributeId(), arrivalTimes); + void operator()(TSizeSizePrMeanAccumulatorUMapQueue& arrivalTimes, + const TSizeVec& attributesToRemove) const { + CBucketGatherer::remove(attributesToRemove, + CDataGatherer::SExtractAttributeId(), arrivalTimes); } }; //! \brief Computes a checksum for the feature data. struct SChecksum { - void operator()(const TSizeUSetVec& attributePeople, const CDataGatherer& gatherer, TStrUInt64Map& hashes) const { + void operator()(const TSizeUSetVec& attributePeople, + const CDataGatherer& gatherer, + TStrUInt64Map& hashes) const { using TStrCRef = boost::reference_wrapper; using TStrCRefVec = std::vector; @@ -435,19 +484,24 @@ struct SChecksum { } } } - void - operator()(const TSizeSizePrStrDataUMapQueue& peopleAttributeUniqueValues, const CDataGatherer& gatherer, TStrUInt64Map& hashes) const { + void operator()(const TSizeSizePrStrDataUMapQueue& peopleAttributeUniqueValues, + const CDataGatherer& gatherer, + TStrUInt64Map& hashes) const { for (const auto& uniques : peopleAttributeUniqueValues) { this->checksum(uniques, gatherer, hashes); } } - void operator()(const TSizeSizePrMeanAccumulatorUMapQueue& arrivalTimes, const CDataGatherer& gatherer, TStrUInt64Map& hashes) const { + void operator()(const TSizeSizePrMeanAccumulatorUMapQueue& arrivalTimes, + const CDataGatherer& gatherer, + TStrUInt64Map& hashes) const { for (const auto& time : arrivalTimes) { this->checksum(time, gatherer, hashes); } } template - void checksum(const boost::unordered_map& bucket, const CDataGatherer& gatherer, TStrUInt64Map& hashes) const { + void checksum(const boost::unordered_map& bucket, + const CDataGatherer& gatherer, + TStrUInt64Map& hashes) const { using TSizeUInt64VecUMap = boost::unordered_map; TSizeUInt64VecUMap attributeHashes; @@ -456,7 +510,8 @@ struct SChecksum { std::size_t pid = CDataGatherer::extractPersonId(value); std::size_t cid = CDataGatherer::extractAttributeId(value); if (gatherer.isPersonActive(pid) && gatherer.isAttributeActive(cid)) { - attributeHashes[cid].push_back(maths::CChecksum::calculate(0, value.second)); + attributeHashes[cid].push_back( + maths::CChecksum::calculate(0, value.second)); } } @@ -476,10 +531,14 @@ struct SResize { attributePeople.resize(cid + 1); } } - void operator()(TSizeSizePrStrDataUMapQueue& /*data*/, std::size_t /*pid*/, std::size_t /*cid*/) const { + void operator()(TSizeSizePrStrDataUMapQueue& /*data*/, + std::size_t /*pid*/, + std::size_t /*cid*/) const { // Not needed } - void operator()(const TSizeSizePrMeanAccumulatorUMapQueue& /*arrivalTimes*/, std::size_t /*pid*/, std::size_t /*cid*/) const { + void operator()(const TSizeSizePrMeanAccumulatorUMapQueue& /*arrivalTimes*/, + std::size_t /*pid*/, + std::size_t /*cid*/) const { // Not needed } }; @@ -536,14 +595,16 @@ struct SAddValue { //! \brief Updates the feature data for the start of a new bucket. struct SNewBucket { void operator()(TSizeUSetVec& /*attributePeople*/, core_t::TTime /*time*/) const {} - void operator()(TSizeSizePrStrDataUMapQueue& personAttributeUniqueCounts, core_t::TTime time) const { + void operator()(TSizeSizePrStrDataUMapQueue& personAttributeUniqueCounts, + core_t::TTime time) const { if (time > personAttributeUniqueCounts.latestBucketEnd()) { personAttributeUniqueCounts.push(TSizeSizePrStrDataUMap(1), time); } else { personAttributeUniqueCounts.get(time).clear(); } } - void operator()(TSizeSizePrMeanAccumulatorUMapQueue& arrivalTimes, core_t::TTime time) const { + void operator()(TSizeSizePrMeanAccumulatorUMapQueue& arrivalTimes, + core_t::TTime time) const { if (time > arrivalTimes.latestBucketEnd()) { arrivalTimes.push(TSizeSizePrMeanAccumulatorUMap(1), time); } else { @@ -557,7 +618,8 @@ const std::string DICTIONARY_WORD_TAG("a"); const std::string UNIQUE_WORD_TAG("b"); //! Persist a collection of unique strings. -void persistUniqueStrings(const CUniqueStringFeatureData::TWordStringUMap& map, core::CStatePersistInserter& inserter) { +void persistUniqueStrings(const CUniqueStringFeatureData::TWordStringUMap& map, + core::CStatePersistInserter& inserter) { using TWordVec = std::vector; if (!map.empty()) { @@ -577,7 +639,8 @@ void persistUniqueStrings(const CUniqueStringFeatureData::TWordStringUMap& map, } //! Restore a collection of unique strings. -bool restoreUniqueStrings(core::CStateRestoreTraverser& traverser, CUniqueStringFeatureData::TWordStringUMap& map) { +bool restoreUniqueStrings(core::CStateRestoreTraverser& traverser, + CUniqueStringFeatureData::TWordStringUMap& map) { CUniqueStringFeatureData::TWord word; do { const std::string& name = traverser.name(); @@ -609,7 +672,8 @@ void persistInfluencerUniqueStrings(const CUniqueStringFeatureData::TStoredStrin } //! Restore influencer collections of unique strings. -bool restoreInfluencerUniqueStrings(core::CStateRestoreTraverser& traverser, CUniqueStringFeatureData::TStoredStringPtrWordSetUMap& data) { +bool restoreInfluencerUniqueStrings(core::CStateRestoreTraverser& traverser, + CUniqueStringFeatureData::TStoredStringPtrWordSetUMap& data) { std::string key; do { const std::string& name = traverser.name(); @@ -664,8 +728,10 @@ CEventRateBucketGatherer::CEventRateBucketGatherer(CDataGatherer& dataGatherer, const std::string& valueFieldName, const TStrVec& influenceFieldNames, core_t::TTime startTime) - : CBucketGatherer(dataGatherer, startTime), m_BeginInfluencingFields(0), m_BeginValueField(0), m_BeginSummaryFields(0) { - this->initializeFieldNames(personFieldName, attributeFieldName, valueFieldName, summaryCountFieldName, influenceFieldNames); + : CBucketGatherer(dataGatherer, startTime), m_BeginInfluencingFields(0), + m_BeginValueField(0), m_BeginSummaryFields(0) { + this->initializeFieldNames(personFieldName, attributeFieldName, valueFieldName, + summaryCountFieldName, influenceFieldNames); this->initializeFeatureData(); } @@ -676,14 +742,17 @@ CEventRateBucketGatherer::CEventRateBucketGatherer(CDataGatherer& dataGatherer, const std::string& valueFieldName, const TStrVec& influenceFieldNames, core::CStateRestoreTraverser& traverser) - : CBucketGatherer(dataGatherer, 0), m_BeginInfluencingFields(0), m_BeginValueField(0), m_BeginSummaryFields(0) { - this->initializeFieldNames(personFieldName, attributeFieldName, valueFieldName, summaryCountFieldName, influenceFieldNames); - traverser.traverseSubLevel(boost::bind(&CEventRateBucketGatherer::acceptRestoreTraverser, this, _1)); + : CBucketGatherer(dataGatherer, 0), m_BeginInfluencingFields(0), + m_BeginValueField(0), m_BeginSummaryFields(0) { + this->initializeFieldNames(personFieldName, attributeFieldName, valueFieldName, + summaryCountFieldName, influenceFieldNames); + traverser.traverseSubLevel( + boost::bind(&CEventRateBucketGatherer::acceptRestoreTraverser, this, _1)); } -CEventRateBucketGatherer::CEventRateBucketGatherer(bool isForPersistence, const CEventRateBucketGatherer& other) - : CBucketGatherer(isForPersistence, other), - m_FieldNames(other.m_FieldNames), +CEventRateBucketGatherer::CEventRateBucketGatherer(bool isForPersistence, + const CEventRateBucketGatherer& other) + : CBucketGatherer(isForPersistence, other), m_FieldNames(other.m_FieldNames), m_BeginInfluencingFields(other.m_BeginInfluencingFields), m_BeginValueField(other.m_BeginValueField), m_BeginSummaryFields(other.m_BeginSummaryFields), @@ -697,10 +766,10 @@ bool CEventRateBucketGatherer::acceptRestoreTraverser(core::CStateRestoreTravers this->clear(); do { const std::string& name = traverser.name(); - RESTORE(BASE_TAG, traverser.traverseSubLevel(boost::bind(&CBucketGatherer::baseAcceptRestoreTraverser, this, _1))) - if (restoreFeatureData( - traverser, m_FeatureData, m_DataGatherer.params().s_LatencyBuckets, this->bucketLength(), this->currentBucketStartTime()) == - false) { + RESTORE(BASE_TAG, traverser.traverseSubLevel(boost::bind( + &CBucketGatherer::baseAcceptRestoreTraverser, this, _1))) + if (restoreFeatureData(traverser, m_FeatureData, m_DataGatherer.params().s_LatencyBuckets, + this->bucketLength(), this->currentBucketStartTime()) == false) { LOG_ERROR(<< "Invalid feature data in " << traverser.value()); return false; } @@ -710,7 +779,8 @@ bool CEventRateBucketGatherer::acceptRestoreTraverser(core::CStateRestoreTravers } void CEventRateBucketGatherer::acceptPersistInserter(core::CStatePersistInserter& inserter) const { - inserter.insertLevel(BASE_TAG, boost::bind(&CBucketGatherer::baseAcceptPersistInserter, this, _1)); + inserter.insertLevel( + BASE_TAG, boost::bind(&CBucketGatherer::baseAcceptPersistInserter, this, _1)); persistFeatureData(m_FeatureData, inserter); } @@ -731,7 +801,8 @@ const std::string& CEventRateBucketGatherer::attributeFieldName() const { } const std::string& CEventRateBucketGatherer::valueFieldName() const { - return m_BeginValueField != m_BeginSummaryFields ? m_FieldNames[m_BeginValueField] : EMPTY_STRING; + return m_BeginValueField != m_BeginSummaryFields ? m_FieldNames[m_BeginValueField] + : EMPTY_STRING; } CEventRateBucketGatherer::TStrVecCItr CEventRateBucketGatherer::beginInfluencers() const { @@ -748,15 +819,20 @@ const CEventRateBucketGatherer::TStrVec& CEventRateBucketGatherer::fieldsOfInter std::string CEventRateBucketGatherer::description() const { return function_t::name(function_t::function(m_DataGatherer.features())) + - (m_BeginValueField == m_BeginSummaryFields ? "" : (" " + m_FieldNames[m_BeginValueField])) + + (m_BeginValueField == m_BeginSummaryFields + ? "" + : (" " + m_FieldNames[m_BeginValueField])) + (byField(m_DataGatherer.isPopulation(), m_FieldNames).empty() ? "" : " by ") + byField(m_DataGatherer.isPopulation(), m_FieldNames) + (overField(m_DataGatherer.isPopulation(), m_FieldNames).empty() ? "" : " over ") + - overField(m_DataGatherer.isPopulation(), m_FieldNames) + (m_DataGatherer.partitionFieldName().empty() ? "" : " partition=") + + overField(m_DataGatherer.isPopulation(), m_FieldNames) + + (m_DataGatherer.partitionFieldName().empty() ? "" : " partition=") + m_DataGatherer.partitionFieldName(); } -bool CEventRateBucketGatherer::processFields(const TStrCPtrVec& fieldValues, CEventData& result, CResourceMonitor& resourceMonitor) { +bool CEventRateBucketGatherer::processFields(const TStrCPtrVec& fieldValues, + CEventData& result, + CResourceMonitor& resourceMonitor) { using TOptionalSize = boost::optional; using TOptionalStr = boost::optional; @@ -766,7 +842,9 @@ bool CEventRateBucketGatherer::processFields(const TStrCPtrVec& fieldValues, CEv return false; } - const std::string* person = (fieldValues[0] == nullptr && m_DataGatherer.useNull()) ? &EMPTY_STRING : fieldValues[0]; + const std::string* person = (fieldValues[0] == nullptr && m_DataGatherer.useNull()) + ? &EMPTY_STRING + : fieldValues[0]; if (person == nullptr) { // Just ignore: the "person" field wasn't present in the // record. Note that we don't warn here since we'll permit @@ -787,7 +865,9 @@ bool CEventRateBucketGatherer::processFields(const TStrCPtrVec& fieldValues, CEv std::size_t count = 1; if (m_DataGatherer.summaryMode() != model_t::E_None) { - if (m_DataGatherer.extractCountFromField(m_FieldNames[m_BeginSummaryFields], fieldValues[m_BeginSummaryFields], count) == false) { + if (m_DataGatherer.extractCountFromField(m_FieldNames[m_BeginSummaryFields], + fieldValues[m_BeginSummaryFields], + count) == false) { result.addValue(); return true; } @@ -825,10 +905,12 @@ bool CEventRateBucketGatherer::processFields(const TStrCPtrVec& fieldValues, CEv return false; } if (addedPerson) { - resourceMonitor.addExtraMemory(m_DataGatherer.isPopulation() ? CDataGatherer::ESTIMATED_MEM_USAGE_PER_OVER_FIELD - : CDataGatherer::ESTIMATED_MEM_USAGE_PER_BY_FIELD); - (m_DataGatherer.isPopulation() ? core::CStatistics::stat(stat_t::E_NumberOverFields) - : core::CStatistics::stat(stat_t::E_NumberByFields)) + resourceMonitor.addExtraMemory(m_DataGatherer.isPopulation() + ? CDataGatherer::ESTIMATED_MEM_USAGE_PER_OVER_FIELD + : CDataGatherer::ESTIMATED_MEM_USAGE_PER_BY_FIELD); + (m_DataGatherer.isPopulation() + ? core::CStatistics::stat(stat_t::E_NumberOverFields) + : core::CStatistics::stat(stat_t::E_NumberByFields)) .increment(); } if (!result.person(personId)) { @@ -837,7 +919,9 @@ bool CEventRateBucketGatherer::processFields(const TStrCPtrVec& fieldValues, CEv } if (m_DataGatherer.isPopulation()) { - const std::string* attribute = (fieldValues[1] == nullptr && m_DataGatherer.useNull()) ? &EMPTY_STRING : fieldValues[1]; + const std::string* attribute = + (fieldValues[1] == nullptr && m_DataGatherer.useNull()) ? &EMPTY_STRING + : fieldValues[1]; if (attribute == nullptr) { // Just ignore: the "by" field wasn't present in the @@ -855,7 +939,8 @@ bool CEventRateBucketGatherer::processFields(const TStrCPtrVec& fieldValues, CEv if (result.isExplicitNull()) { m_DataGatherer.attributeId(*attribute, newAttribute); } else { - newAttribute = m_DataGatherer.addAttribute(*attribute, resourceMonitor, addedAttribute); + newAttribute = m_DataGatherer.addAttribute(*attribute, resourceMonitor, + addedAttribute); } result.addAttribute(TOptionalSize(newAttribute)); @@ -875,13 +960,15 @@ void CEventRateBucketGatherer::recyclePeople(const TSizeVec& peopleToRemove) { return; } - apply(m_FeatureData, boost::bind(SRemovePeople(), _1, boost::cref(peopleToRemove))); + apply(m_FeatureData, + boost::bind(SRemovePeople(), _1, boost::cref(peopleToRemove))); this->CBucketGatherer::recyclePeople(peopleToRemove); } void CEventRateBucketGatherer::removePeople(std::size_t lowestPersonToRemove) { - apply(m_FeatureData, boost::bind(SRemovePeople(), _1, lowestPersonToRemove, m_DataGatherer.numberPeople())); + apply(m_FeatureData, boost::bind(SRemovePeople(), _1, lowestPersonToRemove, + m_DataGatherer.numberPeople())); this->CBucketGatherer::removePeople(lowestPersonToRemove); } @@ -890,7 +977,8 @@ void CEventRateBucketGatherer::recycleAttributes(const TSizeVec& attributesToRem return; } - apply(m_FeatureData, boost::bind(SRemoveAttributes(), _1, boost::cref(attributesToRemove))); + apply(m_FeatureData, boost::bind(SRemoveAttributes(), _1, + boost::cref(attributesToRemove))); this->CBucketGatherer::recycleAttributes(attributesToRemove); } @@ -904,7 +992,8 @@ uint64_t CEventRateBucketGatherer::checksum() const { uint64_t seed = this->CBucketGatherer::checksum(); TStrUInt64Map hashes; - apply(m_FeatureData, boost::bind(SChecksum(), _1, boost::cref(m_DataGatherer), boost::ref(hashes))); + apply(m_FeatureData, boost::bind(SChecksum(), _1, boost::cref(m_DataGatherer), + boost::ref(hashes))); LOG_TRACE(<< "seed = " << seed); LOG_TRACE(<< "hashes = " << core::CContainerPrinter::print(hashes)); core::CHashing::CSafeMurmurHash2String64 hasher; @@ -950,11 +1039,15 @@ void CEventRateBucketGatherer::sample(core_t::TTime time) { this->CBucketGatherer::sample(time); } -void CEventRateBucketGatherer::featureData(core_t::TTime time, core_t::TTime /*bucketLength*/, TFeatureAnyPrVec& result) const { +void CEventRateBucketGatherer::featureData(core_t::TTime time, + core_t::TTime /*bucketLength*/, + TFeatureAnyPrVec& result) const { result.clear(); - if (!this->dataAvailable(time) || time >= this->currentBucketStartTime() + this->bucketLength()) { - LOG_DEBUG(<< "No data available at " << time << ", current bucket = " << this->printCurrentBucket()); + if (!this->dataAvailable(time) || + time >= this->currentBucketStartTime() + this->bucketLength()) { + LOG_DEBUG(<< "No data available at " << time + << ", current bucket = " << this->printCurrentBucket()); return; } @@ -1067,50 +1160,62 @@ void CEventRateBucketGatherer::featureData(core_t::TTime time, core_t::TTime /*b } } -void CEventRateBucketGatherer::personCounts(model_t::EFeature feature, core_t::TTime time, TFeatureAnyPrVec& result_) const { +void CEventRateBucketGatherer::personCounts(model_t::EFeature feature, + core_t::TTime time, + TFeatureAnyPrVec& result_) const { if (m_DataGatherer.isPopulation()) { LOG_ERROR(<< "Function does not support population analysis."); return; } result_.emplace_back(feature, TSizeFeatureDataPrVec()); - auto& result = *boost::unsafe_any_cast(&result_.back().second); + auto& result = + *boost::unsafe_any_cast(&result_.back().second); result.reserve(m_DataGatherer.numberActivePeople()); for (std::size_t pid = 0u, n = m_DataGatherer.numberPeople(); pid < n; ++pid) { - if (!m_DataGatherer.isPersonActive(pid) || this->hasExplicitNullsOnly(time, pid, model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID)) { + if (!m_DataGatherer.isPersonActive(pid) || + this->hasExplicitNullsOnly(time, pid, model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID)) { continue; } result.emplace_back(pid, 0); } for (const auto& count_ : this->bucketCounts(time)) { - uint64_t& count = - std::lower_bound(result.begin(), result.end(), CDataGatherer::extractPersonId(count_), maths::COrderings::SFirstLess()) - ->second.s_Count; + uint64_t& count = std::lower_bound(result.begin(), result.end(), + CDataGatherer::extractPersonId(count_), + maths::COrderings::SFirstLess()) + ->second.s_Count; count += CDataGatherer::extractData(count_); } this->addInfluencerCounts(time, result); } -void CEventRateBucketGatherer::nonZeroPersonCounts(model_t::EFeature feature, core_t::TTime time, TFeatureAnyPrVec& result_) const { +void CEventRateBucketGatherer::nonZeroPersonCounts(model_t::EFeature feature, + core_t::TTime time, + TFeatureAnyPrVec& result_) const { result_.emplace_back(feature, TSizeFeatureDataPrVec()); - auto& result = *boost::unsafe_any_cast(&result_.back().second); + auto& result = + *boost::unsafe_any_cast(&result_.back().second); const TSizeSizePrUInt64UMap& personAttributeCounts = this->bucketCounts(time); result.reserve(personAttributeCounts.size()); for (const auto& count : personAttributeCounts) { - result.emplace_back(CDataGatherer::extractPersonId(count), CDataGatherer::extractData(count)); + result.emplace_back(CDataGatherer::extractPersonId(count), + CDataGatherer::extractData(count)); } std::sort(result.begin(), result.end(), maths::COrderings::SFirstLess()); this->addInfluencerCounts(time, result); } -void CEventRateBucketGatherer::personIndicator(model_t::EFeature feature, core_t::TTime time, TFeatureAnyPrVec& result_) const { +void CEventRateBucketGatherer::personIndicator(model_t::EFeature feature, + core_t::TTime time, + TFeatureAnyPrVec& result_) const { result_.emplace_back(feature, TSizeFeatureDataPrVec()); - auto& result = *boost::unsafe_any_cast(&result_.back().second); + auto& result = + *boost::unsafe_any_cast(&result_.back().second); const TSizeSizePrUInt64UMap& personAttributeCounts = this->bucketCounts(time); result.reserve(personAttributeCounts.size()); @@ -1122,14 +1227,19 @@ void CEventRateBucketGatherer::personIndicator(model_t::EFeature feature, core_t this->addInfluencerCounts(time, result); } -void CEventRateBucketGatherer::personArrivalTimes(model_t::EFeature feature, core_t::TTime /*time*/, TFeatureAnyPrVec& result_) const { +void CEventRateBucketGatherer::personArrivalTimes(model_t::EFeature feature, + core_t::TTime /*time*/, + TFeatureAnyPrVec& result_) const { // TODO result_.emplace_back(feature, TSizeFeatureDataPrVec()); } -void CEventRateBucketGatherer::nonZeroAttributeCounts(model_t::EFeature feature, core_t::TTime time, TFeatureAnyPrVec& result_) const { +void CEventRateBucketGatherer::nonZeroAttributeCounts(model_t::EFeature feature, + core_t::TTime time, + TFeatureAnyPrVec& result_) const { result_.emplace_back(feature, TSizeSizePrFeatureDataPrVec()); - auto& result = *boost::unsafe_any_cast(&result_.back().second); + auto& result = + *boost::unsafe_any_cast(&result_.back().second); const TSizeSizePrUInt64UMap& personAttributeCounts = this->bucketCounts(time); result.reserve(personAttributeCounts.size()); @@ -1143,9 +1253,11 @@ void CEventRateBucketGatherer::nonZeroAttributeCounts(model_t::EFeature feature, this->addInfluencerCounts(time, result); } -void CEventRateBucketGatherer::peoplePerAttribute(model_t::EFeature feature, TFeatureAnyPrVec& result_) const { +void CEventRateBucketGatherer::peoplePerAttribute(model_t::EFeature feature, + TFeatureAnyPrVec& result_) const { result_.emplace_back(feature, TSizeSizePrFeatureDataPrVec()); - auto& result = *boost::unsafe_any_cast(&result_.back().second); + auto& result = + *boost::unsafe_any_cast(&result_.back().second); auto i = m_FeatureData.find(model_t::E_AttributePeople); if (i == m_FeatureData.end()) { @@ -1161,13 +1273,18 @@ void CEventRateBucketGatherer::peoplePerAttribute(model_t::EFeature feature, TFe } } } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to extract " << model_t::print(model_t::E_PopulationUniquePersonCountByAttribute) << ": " << e.what()); + LOG_ERROR(<< "Failed to extract " + << model_t::print(model_t::E_PopulationUniquePersonCountByAttribute) + << ": " << e.what()); } } -void CEventRateBucketGatherer::attributeIndicator(model_t::EFeature feature, core_t::TTime time, TFeatureAnyPrVec& result_) const { +void CEventRateBucketGatherer::attributeIndicator(model_t::EFeature feature, + core_t::TTime time, + TFeatureAnyPrVec& result_) const { result_.emplace_back(feature, TSizeSizePrFeatureDataPrVec()); - auto& result = *boost::unsafe_any_cast(&result_.back().second); + auto& result = + *boost::unsafe_any_cast(&result_.back().second); const TSizeSizePrUInt64UMap& counts = this->bucketCounts(time); result.reserve(counts.size()); @@ -1189,9 +1306,12 @@ void CEventRateBucketGatherer::attributeIndicator(model_t::EFeature feature, cor } } -void CEventRateBucketGatherer::bucketUniqueValuesPerPerson(model_t::EFeature feature, core_t::TTime time, TFeatureAnyPrVec& result_) const { +void CEventRateBucketGatherer::bucketUniqueValuesPerPerson(model_t::EFeature feature, + core_t::TTime time, + TFeatureAnyPrVec& result_) const { result_.emplace_back(feature, TSizeFeatureDataPrVec()); - auto& result = *boost::unsafe_any_cast(&result_.back().second); + auto& result = + *boost::unsafe_any_cast(&result_.back().second); auto i = m_FeatureData.find(model_t::E_UniqueValues); if (i == m_FeatureData.end()) { @@ -1199,15 +1319,19 @@ void CEventRateBucketGatherer::bucketUniqueValuesPerPerson(model_t::EFeature fea } try { - const auto& personAttributeUniqueValues = boost::any_cast(i->second).get(time); + const auto& personAttributeUniqueValues = + boost::any_cast(i->second).get(time); result.reserve(personAttributeUniqueValues.size()); for (const auto& uniques : personAttributeUniqueValues) { result.emplace_back(CDataGatherer::extractPersonId(uniques), 0); - CDataGatherer::extractData(uniques).populateDistinctCountFeatureData(result.back().second); + CDataGatherer::extractData(uniques).populateDistinctCountFeatureData( + result.back().second); } std::sort(result.begin(), result.end(), maths::COrderings::SFirstLess()); } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to extract " << model_t::print(model_t::E_IndividualUniqueCountByBucketAndPerson) << ": " << e.what()); + LOG_ERROR(<< "Failed to extract " + << model_t::print(model_t::E_IndividualUniqueCountByBucketAndPerson) + << ": " << e.what()); } } @@ -1215,7 +1339,8 @@ void CEventRateBucketGatherer::bucketUniqueValuesPerPersonAttribute(model_t::EFe core_t::TTime time, TFeatureAnyPrVec& result_) const { result_.emplace_back(feature, TSizeSizePrFeatureDataPrVec()); - auto& result = *boost::unsafe_any_cast(&result_.back().second); + auto& result = + *boost::unsafe_any_cast(&result_.back().second); auto i = m_FeatureData.find(model_t::E_UniqueValues); if (i == m_FeatureData.end()) { @@ -1223,16 +1348,19 @@ void CEventRateBucketGatherer::bucketUniqueValuesPerPersonAttribute(model_t::EFe } try { - const auto& personAttributeUniqueValues = boost::any_cast(i->second).get(time); + const auto& personAttributeUniqueValues = + boost::any_cast(i->second).get(time); result.reserve(personAttributeUniqueValues.size()); for (const auto& uniques : personAttributeUniqueValues) { result.emplace_back(uniques.first, 0); - CDataGatherer::extractData(uniques).populateDistinctCountFeatureData(result.back().second); + CDataGatherer::extractData(uniques).populateDistinctCountFeatureData( + result.back().second); } std::sort(result.begin(), result.end(), maths::COrderings::SFirstLess()); } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to extract " << model_t::print(model_t::E_PopulationUniqueCountByBucketPersonAndAttribute) << ": " - << e.what()); + LOG_ERROR(<< "Failed to extract " + << model_t::print(model_t::E_PopulationUniqueCountByBucketPersonAndAttribute) + << ": " << e.what()); } } @@ -1240,7 +1368,8 @@ void CEventRateBucketGatherer::bucketCompressedLengthPerPerson(model_t::EFeature core_t::TTime time, TFeatureAnyPrVec& result_) const { result_.emplace_back(feature, TSizeFeatureDataPrVec()); - auto& result = *boost::unsafe_any_cast(&result_.back().second); + auto& result = + *boost::unsafe_any_cast(&result_.back().second); auto i = m_FeatureData.find(model_t::E_UniqueValues); if (i == m_FeatureData.end()) { @@ -1248,23 +1377,29 @@ void CEventRateBucketGatherer::bucketCompressedLengthPerPerson(model_t::EFeature } try { - const auto& personAttributeUniqueValues = boost::any_cast(i->second).get(time); + const auto& personAttributeUniqueValues = + boost::any_cast(i->second).get(time); result.reserve(personAttributeUniqueValues.size()); for (const auto& uniques : personAttributeUniqueValues) { result.emplace_back(CDataGatherer::extractPersonId(uniques), 0); - CDataGatherer::extractData(uniques).populateInfoContentFeatureData(result.back().second); + CDataGatherer::extractData(uniques).populateInfoContentFeatureData( + result.back().second); } std::sort(result.begin(), result.end(), maths::COrderings::SFirstLess()); } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to extract " << model_t::print(model_t::E_IndividualInfoContentByBucketAndPerson) << ": " << e.what()); + LOG_ERROR(<< "Failed to extract " + << model_t::print(model_t::E_IndividualInfoContentByBucketAndPerson) + << ": " << e.what()); } } -void CEventRateBucketGatherer::bucketCompressedLengthPerPersonAttribute(model_t::EFeature feature, - core_t::TTime time, - TFeatureAnyPrVec& result_) const { +void CEventRateBucketGatherer::bucketCompressedLengthPerPersonAttribute( + model_t::EFeature feature, + core_t::TTime time, + TFeatureAnyPrVec& result_) const { result_.emplace_back(feature, TSizeSizePrFeatureDataPrVec()); - auto& result = *boost::unsafe_any_cast(&result_.back().second); + auto& result = + *boost::unsafe_any_cast(&result_.back().second); auto i = m_FeatureData.find(model_t::E_UniqueValues); if (i == m_FeatureData.end()) { @@ -1272,22 +1407,28 @@ void CEventRateBucketGatherer::bucketCompressedLengthPerPersonAttribute(model_t: } try { - const auto& personAttributeUniqueValues = boost::any_cast(i->second).get(time); + const auto& personAttributeUniqueValues = + boost::any_cast(i->second).get(time); result.reserve(personAttributeUniqueValues.size()); for (const auto& uniques : personAttributeUniqueValues) { result.emplace_back(uniques.first, 0); - CDataGatherer::extractData(uniques).populateInfoContentFeatureData(result.back().second); + CDataGatherer::extractData(uniques).populateInfoContentFeatureData( + result.back().second); } std::sort(result.begin(), result.end(), maths::COrderings::SFirstLess()); } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to extract " << model_t::print(model_t::E_PopulationInfoContentByBucketPersonAndAttribute) << ": " - << e.what()); + LOG_ERROR(<< "Failed to extract " + << model_t::print(model_t::E_PopulationInfoContentByBucketPersonAndAttribute) + << ": " << e.what()); } } -void CEventRateBucketGatherer::bucketMeanTimesPerPerson(model_t::EFeature feature, core_t::TTime time, TFeatureAnyPrVec& result_) const { +void CEventRateBucketGatherer::bucketMeanTimesPerPerson(model_t::EFeature feature, + core_t::TTime time, + TFeatureAnyPrVec& result_) const { result_.emplace_back(feature, TSizeFeatureDataPrVec()); - auto& result = *boost::unsafe_any_cast(&result_.back().second); + auto& result = + *boost::unsafe_any_cast(&result_.back().second); auto i = m_FeatureData.find(model_t::E_DiurnalTimes); if (i == m_FeatureData.end()) { @@ -1295,11 +1436,14 @@ void CEventRateBucketGatherer::bucketMeanTimesPerPerson(model_t::EFeature featur } try { - const auto& arrivalTimes = boost::any_cast(i->second).get(time); + const auto& arrivalTimes = + boost::any_cast(i->second) + .get(time); result.reserve(arrivalTimes.size()); for (const auto& time_ : arrivalTimes) { result.emplace_back(CDataGatherer::extractPersonId(time_), - static_cast(maths::CBasicStatistics::mean(CDataGatherer::extractData(time_)))); + static_cast(maths::CBasicStatistics::mean( + CDataGatherer::extractData(time_)))); } std::sort(result.begin(), result.end(), maths::COrderings::SFirstLess()); @@ -1311,18 +1455,23 @@ void CEventRateBucketGatherer::bucketMeanTimesPerPerson(model_t::EFeature featur SEventRateFeatureData& data = result[j].second; for (std::size_t k = 0u; k < data.s_InfluenceValues.size(); ++k) { for (std::size_t l = 0u; l < data.s_InfluenceValues[k].size(); ++l) { - data.s_InfluenceValues[k][l].second.first = TDouble1Vec{static_cast(data.s_Count)}; + data.s_InfluenceValues[k][l].second.first = + TDouble1Vec{static_cast(data.s_Count)}; } } } - } catch (const std::exception& e) { LOG_ERROR(<< "Failed to extract " << model_t::print(model_t::E_DiurnalTimes) << ": " << e.what()); } + } catch (const std::exception& e) { + LOG_ERROR(<< "Failed to extract " + << model_t::print(model_t::E_DiurnalTimes) << ": " << e.what()); + } } void CEventRateBucketGatherer::bucketMeanTimesPerPersonAttribute(model_t::EFeature feature, core_t::TTime time, TFeatureAnyPrVec& result_) const { result_.emplace_back(feature, TSizeSizePrFeatureDataPrVec()); - auto& result = *boost::unsafe_any_cast(&result_.back().second); + auto& result = + *boost::unsafe_any_cast(&result_.back().second); auto i = m_FeatureData.find(model_t::E_DiurnalTimes); if (i == m_FeatureData.end()) { @@ -1330,10 +1479,14 @@ void CEventRateBucketGatherer::bucketMeanTimesPerPersonAttribute(model_t::EFeatu } try { - const auto& arrivalTimes = boost::any_cast(i->second).get(time); + const auto& arrivalTimes = + boost::any_cast(i->second) + .get(time); result.reserve(arrivalTimes.size()); for (const auto& time_ : arrivalTimes) { - result.emplace_back(time_.first, static_cast(maths::CBasicStatistics::mean(CDataGatherer::extractData(time_)))); + result.emplace_back(time_.first, + static_cast(maths::CBasicStatistics::mean( + CDataGatherer::extractData(time_)))); } std::sort(result.begin(), result.end(), maths::COrderings::SFirstLess()); @@ -1345,11 +1498,15 @@ void CEventRateBucketGatherer::bucketMeanTimesPerPersonAttribute(model_t::EFeatu SEventRateFeatureData& data = result[j].second; for (std::size_t k = 0u; k < data.s_InfluenceValues.size(); ++k) { for (std::size_t l = 0u; l < data.s_InfluenceValues[k].size(); ++l) { - data.s_InfluenceValues[k][l].second.first = TDouble1Vec{static_cast(data.s_Count)}; + data.s_InfluenceValues[k][l].second.first = + TDouble1Vec{static_cast(data.s_Count)}; } } } - } catch (const std::exception& e) { LOG_ERROR(<< "Failed to extract " << model_t::print(model_t::E_DiurnalTimes) << ": " << e.what()); } + } catch (const std::exception& e) { + LOG_ERROR(<< "Failed to extract " + << model_t::print(model_t::E_DiurnalTimes) << ": " << e.what()); + } } void CEventRateBucketGatherer::resize(std::size_t pid, std::size_t cid) { @@ -1365,9 +1522,9 @@ void CEventRateBucketGatherer::addValue(std::size_t pid, const TStoredStringPtrVec& influences) { // Check that we are correctly sized - a person/attribute might have been added this->resize(pid, cid); - apply( - m_FeatureData, - boost::bind(SAddValue(), _1, pid, cid, time, count, boost::cref(values), boost::cref(stringValue), boost::cref(influences))); + apply(m_FeatureData, + boost::bind(SAddValue(), _1, pid, cid, time, count, boost::cref(values), + boost::cref(stringValue), boost::cref(influences))); } void CEventRateBucketGatherer::startNewBucket(core_t::TTime time, bool /*skipUpdates*/) { @@ -1385,7 +1542,8 @@ void CEventRateBucketGatherer::initializeFieldNames(const std::string& personFie } m_BeginInfluencingFields = m_FieldNames.size(); - m_FieldNames.insert(m_FieldNames.end(), influenceFieldNames.begin(), influenceFieldNames.end()); + m_FieldNames.insert(m_FieldNames.end(), influenceFieldNames.begin(), + influenceFieldNames.end()); m_BeginValueField = m_FieldNames.size(); if (!valueFieldName.empty()) { @@ -1424,7 +1582,8 @@ void CEventRateBucketGatherer::initializeFeatureData() { case model_t::E_IndividualTimeOfDayByBucketAndPerson: case model_t::E_IndividualTimeOfWeekByBucketAndPerson: m_FeatureData[model_t::E_DiurnalTimes] = TSizeSizePrMeanAccumulatorUMapQueue( - m_DataGatherer.params().s_LatencyBuckets, this->bucketLength(), this->currentBucketStartTime()); + m_DataGatherer.params().s_LatencyBuckets, this->bucketLength(), + this->currentBucketStartTime()); break; case model_t::E_IndividualLowNonZeroCountByBucketAndPerson: @@ -1438,7 +1597,8 @@ void CEventRateBucketGatherer::initializeFeatureData() { case model_t::E_IndividualHighInfoContentByBucketAndPerson: case model_t::E_IndividualLowInfoContentByBucketAndPerson: m_FeatureData[model_t::E_UniqueValues] = TSizeSizePrStrDataUMapQueue( - m_DataGatherer.params().s_LatencyBuckets, this->bucketLength(), this->currentBucketStartTime(), TSizeSizePrStrDataUMap(1)); + m_DataGatherer.params().s_LatencyBuckets, this->bucketLength(), + this->currentBucketStartTime(), TSizeSizePrStrDataUMap(1)); break; case model_t::E_PopulationAttributeTotalCountByPerson: @@ -1458,12 +1618,14 @@ void CEventRateBucketGatherer::initializeFeatureData() { case model_t::E_PopulationLowInfoContentByBucketPersonAndAttribute: case model_t::E_PopulationHighInfoContentByBucketPersonAndAttribute: m_FeatureData[model_t::E_UniqueValues] = TSizeSizePrStrDataUMapQueue( - m_DataGatherer.params().s_LatencyBuckets, this->bucketLength(), this->currentBucketStartTime(), TSizeSizePrStrDataUMap(1)); + m_DataGatherer.params().s_LatencyBuckets, this->bucketLength(), + this->currentBucketStartTime(), TSizeSizePrStrDataUMap(1)); break; case model_t::E_PopulationTimeOfDayByBucketPersonAndAttribute: case model_t::E_PopulationTimeOfWeekByBucketPersonAndAttribute: m_FeatureData[model_t::E_DiurnalTimes] = TSizeSizePrMeanAccumulatorUMapQueue( - m_DataGatherer.params().s_LatencyBuckets, this->bucketLength(), this->currentBucketStartTime()); + m_DataGatherer.params().s_LatencyBuckets, this->bucketLength(), + this->currentBucketStartTime()); break; case model_t::E_PeersAttributeTotalCountByPerson: @@ -1479,25 +1641,30 @@ void CEventRateBucketGatherer::initializeFeatureData() { case model_t::E_PeersLowInfoContentByBucketPersonAndAttribute: case model_t::E_PeersHighInfoContentByBucketPersonAndAttribute: m_FeatureData[model_t::E_UniqueValues] = TSizeSizePrStrDataUMapQueue( - m_DataGatherer.params().s_LatencyBuckets, this->bucketLength(), this->currentBucketStartTime(), TSizeSizePrStrDataUMap(1)); + m_DataGatherer.params().s_LatencyBuckets, this->bucketLength(), + this->currentBucketStartTime(), TSizeSizePrStrDataUMap(1)); break; case model_t::E_PeersTimeOfDayByBucketPersonAndAttribute: case model_t::E_PeersTimeOfWeekByBucketPersonAndAttribute: m_FeatureData[model_t::E_DiurnalTimes] = TSizeSizePrMeanAccumulatorUMapQueue( - m_DataGatherer.params().s_LatencyBuckets, this->bucketLength(), this->currentBucketStartTime()); + m_DataGatherer.params().s_LatencyBuckets, this->bucketLength(), + this->currentBucketStartTime()); break; CASE_INDIVIDUAL_METRIC: CASE_POPULATION_METRIC: CASE_PEERS_METRIC: - LOG_ERROR(<< "Unexpected feature = " << model_t::print(m_DataGatherer.feature(i))) + LOG_ERROR(<< "Unexpected feature = " + << model_t::print(m_DataGatherer.feature(i))) break; } } } -void CEventRateBucketGatherer::addInfluencerCounts(core_t::TTime time, TSizeFeatureDataPrVec& result) const { - const TSizeSizePrStoredStringPtrPrUInt64UMapVec& influencers = this->influencerCounts(time); +void CEventRateBucketGatherer::addInfluencerCounts(core_t::TTime time, + TSizeFeatureDataPrVec& result) const { + const TSizeSizePrStoredStringPtrPrUInt64UMapVec& influencers = + this->influencerCounts(time); if (influencers.empty()) { return; } @@ -1509,19 +1676,24 @@ void CEventRateBucketGatherer::addInfluencerCounts(core_t::TTime time, TSizeFeat for (std::size_t i = 0u; i < influencers.size(); ++i) { for (const auto& influence : influencers[i]) { std::size_t pid = CDataGatherer::extractPersonId(influence.first); - auto k = std::lower_bound(result.begin(), result.end(), pid, maths::COrderings::SFirstLess()); + auto k = std::lower_bound(result.begin(), result.end(), pid, + maths::COrderings::SFirstLess()); if (k == result.end() || k->first != pid) { - LOG_ERROR(<< "Missing feature data for person " << m_DataGatherer.personName(pid)); + LOG_ERROR(<< "Missing feature data for person " + << m_DataGatherer.personName(pid)); continue; } - k->second.s_InfluenceValues[i].emplace_back(TStrCRef(*CDataGatherer::extractData(influence.first)), - TDouble1VecDoublePr(TDouble1Vec{static_cast(influence.second)}, 1.0)); + k->second.s_InfluenceValues[i].emplace_back( + TStrCRef(*CDataGatherer::extractData(influence.first)), + TDouble1VecDoublePr(TDouble1Vec{static_cast(influence.second)}, 1.0)); } } } -void CEventRateBucketGatherer::addInfluencerCounts(core_t::TTime time, TSizeSizePrFeatureDataPrVec& result) const { - const TSizeSizePrStoredStringPtrPrUInt64UMapVec& influencers = this->influencerCounts(time); +void CEventRateBucketGatherer::addInfluencerCounts(core_t::TTime time, + TSizeSizePrFeatureDataPrVec& result) const { + const TSizeSizePrStoredStringPtrPrUInt64UMapVec& influencers = + this->influencerCounts(time); if (influencers.empty()) { return; } @@ -1532,23 +1704,28 @@ void CEventRateBucketGatherer::addInfluencerCounts(core_t::TTime time, TSizeSize for (std::size_t i = 0u; i < influencers.size(); ++i) { for (const auto& influence : influencers[i]) { - auto k = std::lower_bound(result.begin(), result.end(), influence.first.first, maths::COrderings::SFirstLess()); + auto k = std::lower_bound(result.begin(), result.end(), + influence.first.first, + maths::COrderings::SFirstLess()); if (k == result.end() || k->first != influence.first.first) { std::size_t pid = CDataGatherer::extractPersonId(influence.first); std::size_t cid = CDataGatherer::extractAttributeId(influence.first); - LOG_ERROR(<< "Missing feature data for person " << m_DataGatherer.personName(pid) << " and attribute " + LOG_ERROR(<< "Missing feature data for person " + << m_DataGatherer.personName(pid) << " and attribute " << m_DataGatherer.attributeName(cid)); continue; } - k->second.s_InfluenceValues[i].emplace_back(TStrCRef(*CDataGatherer::extractData(influence.first)), - TDouble1VecDoublePr(TDouble1Vec{static_cast(influence.second)}, 1.0)); + k->second.s_InfluenceValues[i].emplace_back( + TStrCRef(*CDataGatherer::extractData(influence.first)), + TDouble1VecDoublePr(TDouble1Vec{static_cast(influence.second)}, 1.0)); } } } ////// CUniqueStringFeatureData ////// -void CUniqueStringFeatureData::insert(const std::string& value, const TStoredStringPtrVec& influences) { +void CUniqueStringFeatureData::insert(const std::string& value, + const TStoredStringPtrVec& influences) { TWord valueHash = m_Dictionary1.word(value); m_UniqueStrings.emplace(valueHash, value); if (influences.size() > m_InfluencerUniqueStrings.size()) { @@ -1570,8 +1747,10 @@ void CUniqueStringFeatureData::populateDistinctCountFeatureData(SEventRateFeatur TStrCRefDouble1VecDoublePrPrVec& data = featureData.s_InfluenceValues[i]; data.reserve(m_InfluencerUniqueStrings[i].size()); for (const auto& influence : m_InfluencerUniqueStrings[i]) { - data.emplace_back(TStrCRef(*influence.first), - TDouble1VecDoublePr(TDouble1Vec{static_cast(influence.second.size())}, 1.0)); + data.emplace_back( + TStrCRef(*influence.first), + TDouble1VecDoublePr( + TDouble1Vec{static_cast(influence.second.size())}, 1.0)); } } } @@ -1590,7 +1769,9 @@ void CUniqueStringFeatureData::populateInfoContentFeatureData(SEventRateFeatureD strings.emplace_back(string.second); } std::sort(strings.begin(), strings.end(), maths::COrderings::SLess()); - std::for_each(strings.begin(), strings.end(), [&compressor](const std::string& string) { compressor.addString(string); }); + std::for_each(strings.begin(), strings.end(), [&compressor](const std::string& string) { + compressor.addString(string); + }); std::size_t length = 0u; if (compressor.compressedLength(true, length) == false) { @@ -1602,7 +1783,8 @@ void CUniqueStringFeatureData::populateInfoContentFeatureData(SEventRateFeatureD featureData.s_InfluenceValues.reserve(m_InfluencerUniqueStrings.size()); for (std::size_t i = 0u; i < m_InfluencerUniqueStrings.size(); ++i) { featureData.s_InfluenceValues.push_back(TStrCRefDouble1VecDoublePrPrVec()); - TStrCRefDouble1VecDoublePrPrVec& data = featureData.s_InfluenceValues.back(); + TStrCRefDouble1VecDoublePrPrVec& data = + featureData.s_InfluenceValues.back(); for (const auto& influence : m_InfluencerUniqueStrings[i]) { strings.clear(); strings.reserve(influence.second.size()); @@ -1610,34 +1792,48 @@ void CUniqueStringFeatureData::populateInfoContentFeatureData(SEventRateFeatureD strings.emplace_back(m_UniqueStrings.at(word)); } std::sort(strings.begin(), strings.end(), maths::COrderings::SLess()); - std::for_each(strings.begin(), strings.end(), [&compressor](const std::string& string) { compressor.addString(string); }); + std::for_each(strings.begin(), strings.end(), + [&compressor](const std::string& string) { + compressor.addString(string); + }); length = 0u; if (compressor.compressedLength(true, length) == false) { LOG_ERROR(<< "Failed to get compressed length"); compressor.reset(); } - data.emplace_back(TStrCRef(*influence.first), TDouble1VecDoublePr(TDouble1Vec{static_cast(length)}, 1.0)); + data.emplace_back( + TStrCRef(*influence.first), + TDouble1VecDoublePr(TDouble1Vec{static_cast(length)}, 1.0)); } } - } catch (const std::exception& e) { LOG_ERROR(<< "Failed to get info content: " << e.what()); } + } catch (const std::exception& e) { + LOG_ERROR(<< "Failed to get info content: " << e.what()); + } } void CUniqueStringFeatureData::acceptPersistInserter(core::CStatePersistInserter& inserter) const { - inserter.insertLevel(UNIQUE_STRINGS_TAG, boost::bind(&persistUniqueStrings, boost::cref(m_UniqueStrings), _1)); + inserter.insertLevel( + UNIQUE_STRINGS_TAG, + boost::bind(&persistUniqueStrings, boost::cref(m_UniqueStrings), _1)); for (std::size_t i = 0u; i < m_InfluencerUniqueStrings.size(); ++i) { inserter.insertLevel(INFLUENCER_UNIQUE_STRINGS_TAG, - boost::bind(&persistInfluencerUniqueStrings, boost::cref(m_InfluencerUniqueStrings[i]), _1)); + boost::bind(&persistInfluencerUniqueStrings, + boost::cref(m_InfluencerUniqueStrings[i]), _1)); } } bool CUniqueStringFeatureData::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { do { const std::string& name = traverser.name(); - RESTORE(UNIQUE_STRINGS_TAG, traverser.traverseSubLevel(boost::bind(&restoreUniqueStrings, _1, boost::ref(m_UniqueStrings)))) + RESTORE(UNIQUE_STRINGS_TAG, + traverser.traverseSubLevel(boost::bind( + &restoreUniqueStrings, _1, boost::ref(m_UniqueStrings)))) RESTORE_SETUP_TEARDOWN( INFLUENCER_UNIQUE_STRINGS_TAG, m_InfluencerUniqueStrings.push_back(TStoredStringPtrWordSetUMap()), - traverser.traverseSubLevel(boost::bind(&restoreInfluencerUniqueStrings, _1, boost::ref(m_InfluencerUniqueStrings.back()))), + traverser.traverseSubLevel( + boost::bind(&restoreInfluencerUniqueStrings, _1, + boost::ref(m_InfluencerUniqueStrings.back()))), /**/) } while (traverser.next()); @@ -1652,7 +1848,8 @@ uint64_t CUniqueStringFeatureData::checksum() const { void CUniqueStringFeatureData::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CUniqueStringFeatureData", sizeof(*this)); core::CMemoryDebug::dynamicSize("s_NoInfluenceUniqueStrings", m_UniqueStrings, mem); - core::CMemoryDebug::dynamicSize("s_InfluenceUniqueStrings", m_InfluencerUniqueStrings, mem); + core::CMemoryDebug::dynamicSize("s_InfluenceUniqueStrings", + m_InfluencerUniqueStrings, mem); } std::size_t CUniqueStringFeatureData::memoryUsage() const { @@ -1663,7 +1860,8 @@ std::size_t CUniqueStringFeatureData::memoryUsage() const { } std::string CUniqueStringFeatureData::print() const { - return "(" + core::CContainerPrinter::print(m_UniqueStrings) + ", " + core::CContainerPrinter::print(m_InfluencerUniqueStrings) + ")"; + return "(" + core::CContainerPrinter::print(m_UniqueStrings) + ", " + + core::CContainerPrinter::print(m_InfluencerUniqueStrings) + ")"; } } } diff --git a/lib/model/CEventRateModel.cc b/lib/model/CEventRateModel.cc index 2fa5c73dc0..8e42026142 100644 --- a/lib/model/CEventRateModel.cc +++ b/lib/model/CEventRateModel.cc @@ -58,8 +58,10 @@ using TTime2Vec = core::CSmallVector; const std::string INDIVIDUAL_STATE_TAG("a"); const std::string PROBABILITY_PRIOR_TAG("b"); -const maths_t::TWeightStyleVec SAMPLE_WEIGHT_STYLES{maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight}; -const maths_t::TWeightStyleVec PROBABILITY_WEIGHT_STYLES(1, maths_t::E_SampleSeasonalVarianceScaleWeight); +const maths_t::TWeightStyleVec SAMPLE_WEIGHT_STYLES{ + maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight}; +const maths_t::TWeightStyleVec + PROBABILITY_WEIGHT_STYLES(1, maths_t::E_SampleSeasonalVarianceScaleWeight); } CEventRateModel::CEventRateModel(const SModelParams& params, @@ -93,7 +95,8 @@ CEventRateModel::CEventRateModel(const SModelParams& params, featureCorrelatesModels, influenceCalculators), m_CurrentBucketStats(CAnomalyDetectorModel::TIME_UNSET) { - traverser.traverseSubLevel(boost::bind(&CEventRateModel::acceptRestoreTraverser, this, _1)); + traverser.traverseSubLevel( + boost::bind(&CEventRateModel::acceptRestoreTraverser, this, _1)); } CEventRateModel::CEventRateModel(bool isForPersistence, const CEventRateModel& other) @@ -106,20 +109,25 @@ CEventRateModel::CEventRateModel(bool isForPersistence, const CEventRateModel& o } void CEventRateModel::acceptPersistInserter(core::CStatePersistInserter& inserter) const { - inserter.insertLevel(INDIVIDUAL_STATE_TAG, boost::bind(&CEventRateModel::doAcceptPersistInserter, this, _1)); - inserter.insertLevel(PROBABILITY_PRIOR_TAG, boost::bind(&maths::CMultinomialConjugate::acceptPersistInserter, &m_ProbabilityPrior, _1)); + inserter.insertLevel(INDIVIDUAL_STATE_TAG, + boost::bind(&CEventRateModel::doAcceptPersistInserter, this, _1)); + inserter.insertLevel(PROBABILITY_PRIOR_TAG, + boost::bind(&maths::CMultinomialConjugate::acceptPersistInserter, + &m_ProbabilityPrior, _1)); } bool CEventRateModel::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { do { const std::string& name = traverser.name(); if (name == INDIVIDUAL_STATE_TAG) { - if (traverser.traverseSubLevel(boost::bind(&CEventRateModel::doAcceptRestoreTraverser, this, _1)) == false) { + if (traverser.traverseSubLevel(boost::bind( + &CEventRateModel::doAcceptRestoreTraverser, this, _1)) == false) { // Logging handled already. return false; } } else if (name == PROBABILITY_PRIOR_TAG) { - maths::CMultinomialConjugate prior(this->params().distributionRestoreParams(maths_t::E_DiscreteData), traverser); + maths::CMultinomialConjugate prior( + this->params().distributionRestoreParams(maths_t::E_DiscreteData), traverser); m_ProbabilityPrior.swap(prior); } } while (traverser.next()); @@ -144,15 +152,18 @@ bool CEventRateModel::isMetric() const { } void CEventRateModel::currentBucketPersonIds(core_t::TTime time, TSizeVec& result) const { - this->CIndividualModel::currentBucketPersonIds(time, m_CurrentBucketStats.s_FeatureData, result); + this->CIndividualModel::currentBucketPersonIds( + time, m_CurrentBucketStats.s_FeatureData, result); } CEventRateModel::TOptionalDouble CEventRateModel::baselineBucketCount(std::size_t /*pid*/) const { return TOptionalDouble(); } -CEventRateModel::TDouble1Vec -CEventRateModel::currentBucketValue(model_t::EFeature feature, std::size_t pid, std::size_t /*cid*/, core_t::TTime time) const { +CEventRateModel::TDouble1Vec CEventRateModel::currentBucketValue(model_t::EFeature feature, + std::size_t pid, + std::size_t /*cid*/, + core_t::TTime time) const { const TFeatureData* data = this->featureData(feature, pid, time); if (data) { return TDouble1Vec(1, static_cast(data->s_Count)); @@ -160,12 +171,13 @@ CEventRateModel::currentBucketValue(model_t::EFeature feature, std::size_t pid, return TDouble1Vec(); } -CEventRateModel::TDouble1Vec CEventRateModel::baselineBucketMean(model_t::EFeature feature, - std::size_t pid, - std::size_t cid, - model_t::CResultType type, - const TSizeDoublePr1Vec& correlated, - core_t::TTime time) const { +CEventRateModel::TDouble1Vec +CEventRateModel::baselineBucketMean(model_t::EFeature feature, + std::size_t pid, + std::size_t cid, + model_t::CResultType type, + const TSizeDoublePr1Vec& correlated, + core_t::TTime time) const { const maths::CModel* model{this->model(feature, pid)}; if (!model) { return TDouble1Vec(); @@ -176,7 +188,8 @@ CEventRateModel::TDouble1Vec CEventRateModel::baselineBucketMean(model_t::EFeatu if (model_t::isDiurnal(feature)) { hint = this->currentBucketValue(feature, pid, cid, time); } - TDouble1Vec result(model->predict(time, type.isUnconditional() ? NO_CORRELATED : correlated, hint)); + TDouble1Vec result(model->predict( + time, type.isUnconditional() ? NO_CORRELATED : correlated, hint)); double probability = 1.0; if (model_t::isConstant(feature) && !m_Probabilities.lookup(pid, probability)) { @@ -185,20 +198,26 @@ CEventRateModel::TDouble1Vec CEventRateModel::baselineBucketMean(model_t::EFeatu for (auto& coord : result) { coord = probability * model_t::inverseOffsetCountToZero(feature, coord); } - this->correctBaselineForInterim(feature, pid, type, correlated, this->currentBucketInterimCorrections(), result); + this->correctBaselineForInterim(feature, pid, type, correlated, + this->currentBucketInterimCorrections(), result); TDouble1VecDouble1VecPr support{model_t::support(feature)}; return maths::CTools::truncate(result, support.first, support.second); } -void CEventRateModel::sampleBucketStatistics(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) { +void CEventRateModel::sampleBucketStatistics(core_t::TTime startTime, + core_t::TTime endTime, + CResourceMonitor& resourceMonitor) { this->createUpdateNewModels(startTime, resourceMonitor); this->currentBucketInterimCorrections().clear(); this->CIndividualModel::sampleBucketStatistics( - startTime, endTime, this->personFilter(), m_CurrentBucketStats.s_FeatureData, resourceMonitor); + startTime, endTime, this->personFilter(), + m_CurrentBucketStats.s_FeatureData, resourceMonitor); } -void CEventRateModel::sample(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) { +void CEventRateModel::sample(core_t::TTime startTime, + core_t::TTime endTime, + CResourceMonitor& resourceMonitor) { CDataGatherer& gatherer = this->dataGatherer(); core_t::TTime bucketLength = gatherer.bucketLength(); @@ -234,15 +253,17 @@ void CEventRateModel::sample(core_t::TTime startTime, core_t::TTime endTime, CRe model_t::EFeature feature = featureData.first; TSizeFeatureDataPrVec& data = featureData.second; std::size_t dimension = model_t::dimension(feature); - LOG_TRACE(<< model_t::print(feature) << ": " << core::CContainerPrinter::print(data)); + LOG_TRACE(<< model_t::print(feature) << ": " + << core::CContainerPrinter::print(data)); if (feature == model_t::E_IndividualTotalBucketCountByPerson) { for (const auto& data_ : data) { if (data_.second.s_Count > 0) { LOG_TRACE(<< "person = " << this->personName(data_.first)); - m_ProbabilityPrior.addSamples(maths::CConstantWeights::COUNT, - TDouble1Vec{static_cast(data_.first)}, - maths::CConstantWeights::SINGLE_UNIT); + m_ProbabilityPrior.addSamples( + maths::CConstantWeights::COUNT, + TDouble1Vec{static_cast(data_.first)}, + maths::CConstantWeights::SINGLE_UNIT); } } if (!data.empty()) { @@ -266,7 +287,8 @@ void CEventRateModel::sample(core_t::TTime startTime, core_t::TTime endTime, CRe } core_t::TTime sampleTime = model_t::sampleTime(feature, time, bucketLength); - if (this->shouldIgnoreSample(feature, pid, model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID, sampleTime)) { + if (this->shouldIgnoreSample(feature, pid, model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID, + sampleTime)) { model->skipTime(sampleTime - lastBucketTimesMap[pid]); continue; } @@ -276,15 +298,21 @@ void CEventRateModel::sample(core_t::TTime startTime, core_t::TTime endTime, CRe continue; } - double count = model_t::offsetCountToZero(feature, static_cast(data_.second.s_Count)); + double count = model_t::offsetCountToZero( + feature, static_cast(data_.second.s_Count)); double derate = this->derate(pid, sampleTime); - double interval = (1.0 + (this->params().s_InitialDecayRateMultiplier - 1.0) * derate) * emptyBucketWeight; - - LOG_TRACE(<< "Bucket = " << this->printCurrentBucket() << ", feature = " << model_t::print(feature) << ", count = " << count - << ", person = " << this->personName(pid) << ", empty bucket weight = " << emptyBucketWeight + double interval = + (1.0 + (this->params().s_InitialDecayRateMultiplier - 1.0) * derate) * + emptyBucketWeight; + + LOG_TRACE(<< "Bucket = " << this->printCurrentBucket() + << ", feature = " << model_t::print(feature) << ", count = " + << count << ", person = " << this->personName(pid) + << ", empty bucket weight = " << emptyBucketWeight << ", derate = " << derate << ", interval = " << interval); - model->params().probabilityBucketEmpty(this->probabilityBucketEmpty(feature, pid)); + model->params().probabilityBucketEmpty( + this->probabilityBucketEmpty(feature, pid)); TDouble2Vec value(1, count); values.assign(1, core::make_triple(sampleTime, value, model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID)); @@ -332,10 +360,10 @@ bool CEventRateModel::computeProbability(std::size_t pid, return false; } - CAnnotatedProbabilityBuilder resultBuilder(result, - 1, // # attribute probabilities - function_t::function(gatherer.features()), - gatherer.numberActivePeople()); + CAnnotatedProbabilityBuilder resultBuilder( + result, + 1, // # attribute probabilities + function_t::function(gatherer.features()), gatherer.numberActivePeople()); CProbabilityAndInfluenceCalculator pJoint(this->params().s_InfluenceCutoff); pJoint.addAggregator(maths::CJointProbabilityOfLessLikelySamples()); @@ -355,11 +383,9 @@ bool CEventRateModel::computeProbability(std::size_t pid, if (!data) { continue; } - if (this->shouldIgnoreResult(feature, - result.s_ResultType, - pid, - model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID, - model_t::sampleTime(feature, startTime, bucketLength))) { + if (this->shouldIgnoreResult( + feature, result.s_ResultType, pid, model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID, + model_t::sampleTime(feature, startTime, bucketLength))) { continue; } @@ -371,11 +397,13 @@ bool CEventRateModel::computeProbability(std::size_t pid, CProbabilityAndInfluenceCalculator::SCorrelateParams params(partitioningFields); TStrCRefDouble1VecDouble1VecPrPrVecVecVec influenceValues; this->fill(feature, pid, startTime, result.isInterim(), params, influenceValues); - this->addProbabilityAndInfluences(pid, params, influenceValues, pFeatures, resultBuilder); + this->addProbabilityAndInfluences(pid, params, influenceValues, + pFeatures, resultBuilder); } else { CProbabilityAndInfluenceCalculator::SParams params(partitioningFields); this->fill(feature, pid, startTime, result.isInterim(), params); - this->addProbabilityAndInfluences(pid, params, data->s_InfluenceValues, pFeatures, resultBuilder); + this->addProbabilityAndInfluences(pid, params, data->s_InfluenceValues, + pFeatures, resultBuilder); } } @@ -419,7 +447,8 @@ uint64_t CEventRateModel::checksum(bool includeCurrentBucketStats) const { const TDoubleVec& categories = m_ProbabilityPrior.categories(); const TDoubleVec& concentrations = m_ProbabilityPrior.concentrations(); for (std::size_t i = 0u; i < categories.size(); ++i) { - uint64_t& hash = hashes[boost::cref(this->personName(static_cast(categories[i])))]; + uint64_t& hash = + hashes[boost::cref(this->personName(static_cast(categories[i])))]; hash = maths::CChecksum::calculate(hash, concentrations[i]); } if (includeCurrentBucketStats) { @@ -440,9 +469,12 @@ uint64_t CEventRateModel::checksum(bool includeCurrentBucketStats) const { void CEventRateModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CEventRateModel"); this->CIndividualModel::debugMemoryUsage(mem->addChild()); - core::CMemoryDebug::dynamicSize("m_CurrentBucketStats.s_PersonCounts", m_CurrentBucketStats.s_PersonCounts, mem); - core::CMemoryDebug::dynamicSize("m_CurrentBucketStats.s_FeatureData", m_CurrentBucketStats.s_FeatureData, mem); - core::CMemoryDebug::dynamicSize("m_CurrentBucketStats.s_InterimCorrections", m_CurrentBucketStats.s_InterimCorrections, mem); + core::CMemoryDebug::dynamicSize("m_CurrentBucketStats.s_PersonCounts", + m_CurrentBucketStats.s_PersonCounts, mem); + core::CMemoryDebug::dynamicSize("m_CurrentBucketStats.s_FeatureData", + m_CurrentBucketStats.s_FeatureData, mem); + core::CMemoryDebug::dynamicSize("m_CurrentBucketStats.s_InterimCorrections", + m_CurrentBucketStats.s_InterimCorrections, mem); core::CMemoryDebug::dynamicSize("s_Probabilities", m_Probabilities, mem); core::CMemoryDebug::dynamicSize("m_ProbabilityPrior", m_ProbabilityPrior, mem); } @@ -469,8 +501,10 @@ CEventRateModel::CModelDetailsViewPtr CEventRateModel::details() const { return CModelDetailsViewPtr(new CEventRateModelDetailsView(*this)); } -const CEventRateModel::TFeatureData* CEventRateModel::featureData(model_t::EFeature feature, std::size_t pid, core_t::TTime time) const { - return this->CIndividualModel::featureData(feature, pid, time, m_CurrentBucketStats.s_FeatureData); +const CEventRateModel::TFeatureData* +CEventRateModel::featureData(model_t::EFeature feature, std::size_t pid, core_t::TTime time) const { + return this->CIndividualModel::featureData(feature, pid, time, + m_CurrentBucketStats.s_FeatureData); } core_t::TTime CEventRateModel::currentBucketStartTime() const { @@ -497,7 +531,8 @@ uint64_t CEventRateModel::currentBucketTotalCount() const { return m_CurrentBucketStats.s_TotalCount; } -CIndividualModel::TFeatureSizeSizeTripleDouble1VecUMap& CEventRateModel::currentBucketInterimCorrections() const { +CIndividualModel::TFeatureSizeSizeTripleDouble1VecUMap& +CEventRateModel::currentBucketInterimCorrections() const { return m_CurrentBucketStats.s_InterimCorrections; } @@ -515,7 +550,8 @@ void CEventRateModel::clearPrunedResources(const TSizeVec& people, const TSizeVe // Stop collecting for these people and add them to the free list. gatherer.recyclePeople(people); if (gatherer.dataAvailable(m_CurrentBucketStats.s_StartTime)) { - gatherer.featureData(m_CurrentBucketStats.s_StartTime, gatherer.bucketLength(), m_CurrentBucketStats.s_FeatureData); + gatherer.featureData(m_CurrentBucketStats.s_StartTime, gatherer.bucketLength(), + m_CurrentBucketStats.s_FeatureData); } TDoubleVec categoriesToRemove; @@ -536,7 +572,8 @@ bool CEventRateModel::correlates(model_t::EFeature feature, std::size_t pid, cor const maths::CModel* model{this->model(feature, pid)}; for (const auto& correlate : model->correlates()) { - if (this->featureData(feature, pid == correlate[0] ? correlate[1] : correlate[0], time)) { + if (this->featureData( + feature, pid == correlate[0] ? correlate[1] : correlate[0], time)) { return true; } } @@ -553,7 +590,8 @@ void CEventRateModel::fill(model_t::EFeature feature, core_t::TTime time{model_t::sampleTime(feature, bucketTime, this->bucketLength())}; TOptionalUInt64 count{this->currentBucketCount(pid, bucketTime)}; double value{model_t::offsetCountToZero(feature, static_cast(data->s_Count))}; - TDouble2Vec4Vec weight{model->seasonalWeight(maths::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL, time)}; + TDouble2Vec4Vec weight{ + model->seasonalWeight(maths::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL, time)}; params.s_Feature = feature; params.s_Model = model; @@ -562,12 +600,15 @@ void CEventRateModel::fill(model_t::EFeature feature, params.s_Value.assign(1, TDouble2Vec{value}); if (interim && model_t::requiresInterimResultAdjustment(feature)) { double mode{params.s_Model->mode(time, PROBABILITY_WEIGHT_STYLES, weight)[0]}; - TDouble2Vec correction{this->interimValueCorrector().corrections(time, this->currentBucketTotalCount(), mode, value)}; + TDouble2Vec correction{this->interimValueCorrector().corrections( + time, this->currentBucketTotalCount(), mode, value)}; params.s_Value[0] += correction; - this->currentBucketInterimCorrections().emplace(core::make_triple(feature, pid, pid), correction); + this->currentBucketInterimCorrections().emplace( + core::make_triple(feature, pid, pid), correction); } params.s_Count = 1.0; - params.s_ComputeProbabilityParams.addCalculation(model_t::probabilityCalculation(feature)) + params.s_ComputeProbabilityParams + .addCalculation(model_t::probabilityCalculation(feature)) .weightStyles(PROBABILITY_WEIGHT_STYLES) .addBucketEmpty(TBool2Vec(1, !count || *count == 0)) .addWeights(weight); @@ -596,54 +637,69 @@ void CEventRateModel::fill(model_t::EFeature feature, params.s_Variables.resize(correlates.size()); params.s_CorrelatedLabels.resize(correlates.size()); params.s_Correlated.resize(correlates.size()); - params.s_ComputeProbabilityParams.addCalculation(model_t::probabilityCalculation(feature)).weightStyles(PROBABILITY_WEIGHT_STYLES); + params.s_ComputeProbabilityParams + .addCalculation(model_t::probabilityCalculation(feature)) + .weightStyles(PROBABILITY_WEIGHT_STYLES); // These are indexed as follows: // influenceValues["influencer name"]["correlate"]["influence value"] // This is because we aren't guaranteed that each influence is present for // each feature. - influenceValues.resize(this->featureData(feature, pid, bucketTime)->s_InfluenceValues.size(), - TStrCRefDouble1VecDouble1VecPrPrVecVec(correlates.size())); + influenceValues.resize( + this->featureData(feature, pid, bucketTime)->s_InfluenceValues.size(), + TStrCRefDouble1VecDouble1VecPrPrVecVec(correlates.size())); // Declared outside the loop to minimize the number of times it is created. TDouble1VecDouble1VecPr value; for (std::size_t i = 0u; i < correlates.size(); ++i) { - TSize2Vec variables = pid == correlates[i][0] ? TSize2Vec{0, 1} : TSize2Vec{1, 0}; - params.s_CorrelatedLabels[i] = gatherer.personNamePtr(correlates[i][variables[1]]); + TSize2Vec variables = pid == correlates[i][0] ? TSize2Vec{0, 1} + : TSize2Vec{1, 0}; + params.s_CorrelatedLabels[i] = + gatherer.personNamePtr(correlates[i][variables[1]]); params.s_Correlated[i] = correlates[i][variables[1]]; params.s_Variables[i] = variables; - const maths::CModel* models[]{model, this->model(feature, correlates[i][variables[1]])}; + const maths::CModel* models[]{ + model, this->model(feature, correlates[i][variables[1]])}; TDouble2Vec4Vec weight(1, TDouble2Vec(2)); - weight[0][variables[0]] = models[0]->seasonalWeight(maths::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL, time)[0]; - weight[0][variables[1]] = models[1]->seasonalWeight(maths::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL, time)[0]; + weight[0][variables[0]] = models[0]->seasonalWeight( + maths::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL, time)[0]; + weight[0][variables[1]] = models[1]->seasonalWeight( + maths::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL, time)[0]; TOptionalUInt64 count[2]; count[0] = this->currentBucketCount(correlates[i][0], bucketTime); count[1] = this->currentBucketCount(correlates[i][1], bucketTime); - params.s_ComputeProbabilityParams.addBucketEmpty(TBool2Vec{!count[0] || *count[0] == 0, !count[1] || *count[1] == 0}) + params.s_ComputeProbabilityParams + .addBucketEmpty(TBool2Vec{!count[0] || *count[0] == 0, + !count[1] || *count[1] == 0}) .addWeights(weight); const TFeatureData* data[2]; data[0] = this->featureData(feature, correlates[i][0], bucketTime); data[1] = this->featureData(feature, correlates[i][1], bucketTime); if (data[0] && data[1]) { - params.s_ElapsedTime = std::min(params.s_ElapsedTime, bucketTime - firstBucketTimes[correlates[i][0]]); - params.s_ElapsedTime = std::min(params.s_ElapsedTime, bucketTime - firstBucketTimes[correlates[i][1]]); + params.s_ElapsedTime = std::min( + params.s_ElapsedTime, bucketTime - firstBucketTimes[correlates[i][0]]); + params.s_ElapsedTime = std::min( + params.s_ElapsedTime, bucketTime - firstBucketTimes[correlates[i][1]]); params.s_Times[i] = TTime2Vec(2, time); - params.s_Values[i] = TDouble2Vec{model_t::offsetCountToZero(feature, static_cast(data[0]->s_Count)), - model_t::offsetCountToZero(feature, static_cast(data[1]->s_Count))}; + params.s_Values[i] = TDouble2Vec{ + model_t::offsetCountToZero(feature, static_cast(data[0]->s_Count)), + model_t::offsetCountToZero(feature, static_cast(data[1]->s_Count))}; for (std::size_t j = 0u; j < data[0]->s_InfluenceValues.size(); ++j) { for (const auto& influenceValue : data[0]->s_InfluenceValues[j]) { TStrCRef influence = influenceValue.first; - std::size_t match = static_cast(std::find_if(data[1]->s_InfluenceValues[j].begin(), - data[1]->s_InfluenceValues[j].end(), - [influence](const TStrCRefDouble1VecDoublePrPr& value_) { - return value_.first.get() == influence.get(); - }) - - data[1]->s_InfluenceValues[j].begin()); + std::size_t match = static_cast( + std::find_if(data[1]->s_InfluenceValues[j].begin(), + data[1]->s_InfluenceValues[j].end(), + [influence](const TStrCRefDouble1VecDoublePrPr& value_) { + return value_.first.get() == influence.get(); + }) - + data[1]->s_InfluenceValues[j].begin()); if (match < data[1]->s_InfluenceValues[j].size()) { const TDouble1VecDoublePr& value0 = influenceValue.second; - const TDouble1VecDoublePr& value1 = data[1]->s_InfluenceValues[j][match].second; + const TDouble1VecDoublePr& value1 = + data[1]->s_InfluenceValues[j][match].second; value.first = TDouble1Vec{value0.first[0], value1.first[0]}; value.second = TDouble1Vec{value0.second, value1.second}; influenceValues[j][i].emplace_back(influence, value); @@ -653,15 +709,17 @@ void CEventRateModel::fill(model_t::EFeature feature, } } if (interim && model_t::requiresInterimResultAdjustment(feature)) { - TDouble2Vec1Vec modes = - params.s_Model->correlateModes(time, PROBABILITY_WEIGHT_STYLES, params.s_ComputeProbabilityParams.weights()); + TDouble2Vec1Vec modes = params.s_Model->correlateModes( + time, PROBABILITY_WEIGHT_STYLES, params.s_ComputeProbabilityParams.weights()); for (std::size_t i = 0u; i < modes.size(); ++i) { TDouble2Vec& value_ = params.s_Values[i]; if (!value_.empty()) { - TDouble2Vec correction(this->interimValueCorrector().corrections(time, this->currentBucketTotalCount(), modes[i], value_)); + TDouble2Vec correction(this->interimValueCorrector().corrections( + time, this->currentBucketTotalCount(), modes[i], value_)); value_ += correction; - this->currentBucketInterimCorrections().emplace(core::make_triple(feature, pid, params.s_Correlated[i]), - TDouble1Vec{correction[params.s_Variables[i][0]]}); + this->currentBucketInterimCorrections().emplace( + core::make_triple(feature, pid, params.s_Correlated[i]), + TDouble1Vec{correction[params.s_Variables[i][0]]}); } } } @@ -669,7 +727,8 @@ void CEventRateModel::fill(model_t::EFeature feature, ////////// CEventRateModel::SBucketStats Implementation ////////// -CEventRateModel::SBucketStats::SBucketStats(core_t::TTime startTime) : s_StartTime(startTime), s_TotalCount(0), s_InterimCorrections(1) { +CEventRateModel::SBucketStats::SBucketStats(core_t::TTime startTime) + : s_StartTime(startTime), s_TotalCount(0), s_InterimCorrections(1) { } } } diff --git a/lib/model/CEventRateModelFactory.cc b/lib/model/CEventRateModelFactory.cc index 31b915d477..3edae11a34 100644 --- a/lib/model/CEventRateModelFactory.cc +++ b/lib/model/CEventRateModelFactory.cc @@ -31,11 +31,8 @@ namespace model { CEventRateModelFactory::CEventRateModelFactory(const SModelParams& params, model_t::ESummaryMode summaryMode, const std::string& summaryCountFieldName) - : CModelFactory(params), - m_Identifier(), - m_SummaryMode(summaryMode), - m_SummaryCountFieldName(summaryCountFieldName), - m_UseNull(false), + : CModelFactory(params), m_Identifier(), m_SummaryMode(summaryMode), + m_SummaryCountFieldName(summaryCountFieldName), m_UseNull(false), m_BucketResultsDelay(0) { } @@ -43,7 +40,8 @@ CEventRateModelFactory* CEventRateModelFactory::clone() const { return new CEventRateModelFactory(*this); } -CAnomalyDetectorModel* CEventRateModelFactory::makeModel(const SModelInitializationData& initData) const { +CAnomalyDetectorModel* +CEventRateModelFactory::makeModel(const SModelInitializationData& initData) const { TDataGathererPtr dataGatherer = initData.s_DataGatherer; if (!dataGatherer) { LOG_ERROR(<< "NULL data gatherer"); @@ -57,17 +55,16 @@ CAnomalyDetectorModel* CEventRateModelFactory::makeModel(const SModelInitializat influenceCalculators.push_back(this->defaultInfluenceCalculators(name, features)); } - return new CEventRateModel(this->modelParams(), - dataGatherer, - this->defaultFeatureModels(features, dataGatherer->bucketLength(), 0.4, true), - this->defaultCorrelatePriors(features), - this->defaultCorrelates(features), - this->defaultCategoricalPrior(), - influenceCalculators); + return new CEventRateModel( + this->modelParams(), dataGatherer, + this->defaultFeatureModels(features, dataGatherer->bucketLength(), 0.4, true), + this->defaultCorrelatePriors(features), this->defaultCorrelates(features), + this->defaultCategoricalPrior(), influenceCalculators); } -CAnomalyDetectorModel* CEventRateModelFactory::makeModel(const SModelInitializationData& initData, - core::CStateRestoreTraverser& traverser) const { +CAnomalyDetectorModel* +CEventRateModelFactory::makeModel(const SModelInitializationData& initData, + core::CStateRestoreTraverser& traverser) const { TDataGathererPtr dataGatherer = initData.s_DataGatherer; if (!dataGatherer) { LOG_ERROR(<< "NULL data gatherer"); @@ -81,51 +78,37 @@ CAnomalyDetectorModel* CEventRateModelFactory::makeModel(const SModelInitializat influenceCalculators.push_back(this->defaultInfluenceCalculators(name, features)); } - return new CEventRateModel(this->modelParams(), - dataGatherer, - this->defaultFeatureModels(features, dataGatherer->bucketLength(), 0.4, true), - this->defaultCorrelatePriors(features), - this->defaultCorrelates(features), - influenceCalculators, - traverser); + return new CEventRateModel( + this->modelParams(), dataGatherer, + this->defaultFeatureModels(features, dataGatherer->bucketLength(), 0.4, true), + this->defaultCorrelatePriors(features), + this->defaultCorrelates(features), influenceCalculators, traverser); } -CDataGatherer* CEventRateModelFactory::makeDataGatherer(const SGathererInitializationData& initData) const { - return new CDataGatherer(model_t::E_EventRate, - m_SummaryMode, - this->modelParams(), - m_SummaryCountFieldName, - m_PartitionFieldName, - initData.s_PartitionFieldValue, - m_PersonFieldName, - EMPTY_STRING, // AttributeFieldName - m_ValueFieldName, - m_InfluenceFieldNames, - m_UseNull, - this->searchKey(), - m_Features, - initData.s_StartTime, - initData.s_SampleOverrideCount); +CDataGatherer* +CEventRateModelFactory::makeDataGatherer(const SGathererInitializationData& initData) const { + return new CDataGatherer( + model_t::E_EventRate, m_SummaryMode, this->modelParams(), m_SummaryCountFieldName, + m_PartitionFieldName, initData.s_PartitionFieldValue, m_PersonFieldName, + EMPTY_STRING, // AttributeFieldName + m_ValueFieldName, m_InfluenceFieldNames, m_UseNull, this->searchKey(), + m_Features, initData.s_StartTime, initData.s_SampleOverrideCount); } -CDataGatherer* CEventRateModelFactory::makeDataGatherer(const std::string& partitionFieldValue, - core::CStateRestoreTraverser& traverser) const { - return new CDataGatherer(model_t::E_EventRate, - m_SummaryMode, - this->modelParams(), - m_SummaryCountFieldName, - m_PartitionFieldName, - partitionFieldValue, - m_PersonFieldName, +CDataGatherer* +CEventRateModelFactory::makeDataGatherer(const std::string& partitionFieldValue, + core::CStateRestoreTraverser& traverser) const { + return new CDataGatherer(model_t::E_EventRate, m_SummaryMode, this->modelParams(), + m_SummaryCountFieldName, m_PartitionFieldName, + partitionFieldValue, m_PersonFieldName, EMPTY_STRING, // AttributeFieldName - m_ValueFieldName, - m_InfluenceFieldNames, - m_UseNull, - this->searchKey(), - traverser); + m_ValueFieldName, m_InfluenceFieldNames, m_UseNull, + this->searchKey(), traverser); } -CEventRateModelFactory::TPriorPtr CEventRateModelFactory::defaultPrior(model_t::EFeature feature, const SModelParams& params) const { +CEventRateModelFactory::TPriorPtr +CEventRateModelFactory::defaultPrior(model_t::EFeature feature, + const SModelParams& params) const { // Categorical data all use the multinomial prior. The creation // of these priors is managed by defaultCategoricalPrior. if (model_t::isCategorical(feature)) { @@ -154,14 +137,18 @@ CEventRateModelFactory::TPriorPtr CEventRateModelFactory::defaultPrior(model_t:: maths_t::EDataType dataType = this->dataType(); - maths::CGammaRateConjugate gammaPrior = maths::CGammaRateConjugate::nonInformativePrior(dataType, 0.0, params.s_DecayRate); + maths::CGammaRateConjugate gammaPrior = + maths::CGammaRateConjugate::nonInformativePrior(dataType, 0.0, params.s_DecayRate); maths::CLogNormalMeanPrecConjugate logNormalPrior = - maths::CLogNormalMeanPrecConjugate::nonInformativePrior(dataType, 0.0, params.s_DecayRate); + maths::CLogNormalMeanPrecConjugate::nonInformativePrior(dataType, 0.0, + params.s_DecayRate); - maths::CNormalMeanPrecConjugate normalPrior = maths::CNormalMeanPrecConjugate::nonInformativePrior(dataType, params.s_DecayRate); + maths::CNormalMeanPrecConjugate normalPrior = + maths::CNormalMeanPrecConjugate::nonInformativePrior(dataType, params.s_DecayRate); - maths::CPoissonMeanConjugate poissonPrior = maths::CPoissonMeanConjugate::nonInformativePrior(0.0, params.s_DecayRate); + maths::CPoissonMeanConjugate poissonPrior = + maths::CPoissonMeanConjugate::nonInformativePrior(0.0, params.s_DecayRate); // Create the component priors. TPriorPtrVec priors; @@ -178,22 +165,21 @@ CEventRateModelFactory::TPriorPtr CEventRateModelFactory::defaultPrior(model_t:: modePriors.emplace_back(logNormalPrior.clone()); modePriors.emplace_back(normalPrior.clone()); maths::COneOfNPrior modePrior(modePriors, dataType, params.s_DecayRate); - maths::CXMeansOnline1d clusterer(dataType, - maths::CAvailableModeDistributions::ALL, - maths_t::E_ClustersFractionWeight, - params.s_DecayRate, - params.s_MinimumModeFraction, - params.s_MinimumModeCount, - params.minimumCategoryCount()); - maths::CMultimodalPrior multimodalPrior(dataType, clusterer, modePrior, params.s_DecayRate); + maths::CXMeansOnline1d clusterer( + dataType, maths::CAvailableModeDistributions::ALL, + maths_t::E_ClustersFractionWeight, params.s_DecayRate, params.s_MinimumModeFraction, + params.s_MinimumModeCount, params.minimumCategoryCount()); + maths::CMultimodalPrior multimodalPrior(dataType, clusterer, modePrior, + params.s_DecayRate); priors.emplace_back(multimodalPrior.clone()); } return boost::make_shared(priors, dataType, params.s_DecayRate); } -CEventRateModelFactory::TMultivariatePriorPtr CEventRateModelFactory::defaultMultivariatePrior(model_t::EFeature feature, - const SModelParams& params) const { +CEventRateModelFactory::TMultivariatePriorPtr +CEventRateModelFactory::defaultMultivariatePrior(model_t::EFeature feature, + const SModelParams& params) const { std::size_t dimension = model_t::dimension(feature); TMultivariatePriorPtrVec priors; @@ -207,8 +193,9 @@ CEventRateModelFactory::TMultivariatePriorPtr CEventRateModelFactory::defaultMul return this->multivariateOneOfNPrior(dimension, params, priors); } -CEventRateModelFactory::TMultivariatePriorPtr CEventRateModelFactory::defaultCorrelatePrior(model_t::EFeature /*feature*/, - const SModelParams& params) const { +CEventRateModelFactory::TMultivariatePriorPtr +CEventRateModelFactory::defaultCorrelatePrior(model_t::EFeature /*feature*/, + const SModelParams& params) const { TMultivariatePriorPtrVec priors; priors.reserve(params.s_MinimumModeFraction <= 0.5 ? 2u : 1u); TMultivariatePriorPtr multivariateNormal = this->multivariateNormalPrior(2, params); @@ -221,15 +208,10 @@ CEventRateModelFactory::TMultivariatePriorPtr CEventRateModelFactory::defaultCor const CSearchKey& CEventRateModelFactory::searchKey() const { if (!m_SearchKeyCache) { - m_SearchKeyCache.reset(CSearchKey(m_Identifier, - function_t::function(m_Features), - m_UseNull, - this->modelParams().s_ExcludeFrequent, - m_ValueFieldName, - m_PersonFieldName, - "", - m_PartitionFieldName, - m_InfluenceFieldNames)); + m_SearchKeyCache.reset(CSearchKey( + m_Identifier, function_t::function(m_Features), m_UseNull, + this->modelParams().s_ExcludeFrequent, m_ValueFieldName, + m_PersonFieldName, "", m_PartitionFieldName, m_InfluenceFieldNames)); } return *m_SearchKeyCache; } diff --git a/lib/model/CEventRatePopulationModel.cc b/lib/model/CEventRatePopulationModel.cc index 85cd8543d4..ce2a0e17a2 100644 --- a/lib/model/CEventRatePopulationModel.cc +++ b/lib/model/CEventRatePopulationModel.cc @@ -47,9 +47,11 @@ using TDouble2Vec4VecVec = std::vector; using TBool2Vec = core::CSmallVector; using TTime2Vec = core::CSmallVector; using TSizeSizePrFeatureDataPrVec = CEventRatePopulationModel::TSizeSizePrFeatureDataPrVec; -using TFeatureSizeSizePrFeatureDataPrVecPr = std::pair; +using TFeatureSizeSizePrFeatureDataPrVecPr = + std::pair; using TFeatureSizeSizePrFeatureDataPrVecPrVec = std::vector; -using TSizeFuzzyDeduplicateUMap = boost::unordered_map; +using TSizeFuzzyDeduplicateUMap = + boost::unordered_map; //! \brief The values and weights for an attribute. struct SValuesAndWeights { @@ -67,64 +69,79 @@ const std::string FEATURE_CORRELATE_MODELS_TAG("e"); const std::string MEMORY_ESTIMATOR_TAG("f"); const std::string EMPTY_STRING(""); -const maths_t::TWeightStyleVec SAMPLE_WEIGHT_STYLES{maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight}; -const maths_t::TWeightStyleVec PROBABILITY_WEIGHT_STYLES(1, maths_t::E_SampleSeasonalVarianceScaleWeight); +const maths_t::TWeightStyleVec SAMPLE_WEIGHT_STYLES{ + maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight}; +const maths_t::TWeightStyleVec + PROBABILITY_WEIGHT_STYLES(1, maths_t::E_SampleSeasonalVarianceScaleWeight); } -CEventRatePopulationModel::CEventRatePopulationModel(const SModelParams& params, - const TDataGathererPtr& dataGatherer, - const TFeatureMathsModelPtrPrVec& newFeatureModels, - const TFeatureMultivariatePriorPtrPrVec& newFeatureCorrelateModelPriors, - const TFeatureCorrelationsPtrPrVec& featureCorrelatesModels, - const TFeatureInfluenceCalculatorCPtrPrVecVec& influenceCalculators) +CEventRatePopulationModel::CEventRatePopulationModel( + const SModelParams& params, + const TDataGathererPtr& dataGatherer, + const TFeatureMathsModelPtrPrVec& newFeatureModels, + const TFeatureMultivariatePriorPtrPrVec& newFeatureCorrelateModelPriors, + const TFeatureCorrelationsPtrPrVec& featureCorrelatesModels, + const TFeatureInfluenceCalculatorCPtrPrVecVec& influenceCalculators) : CPopulationModel(params, dataGatherer, influenceCalculators), - m_CurrentBucketStats(dataGatherer->currentBucketStartTime() - dataGatherer->bucketLength()), - m_NewAttributeProbabilityPrior( - maths::CMultinomialConjugate::nonInformativePrior(boost::numeric::bounds::highest(), params.s_DecayRate)), - m_AttributeProbabilityPrior( - maths::CMultinomialConjugate::nonInformativePrior(boost::numeric::bounds::highest(), params.s_DecayRate)), + m_CurrentBucketStats(dataGatherer->currentBucketStartTime() - + dataGatherer->bucketLength()), + m_NewAttributeProbabilityPrior(maths::CMultinomialConjugate::nonInformativePrior( + boost::numeric::bounds::highest(), + params.s_DecayRate)), + m_AttributeProbabilityPrior(maths::CMultinomialConjugate::nonInformativePrior( + boost::numeric::bounds::highest(), + params.s_DecayRate)), m_Probabilities(0.05) { this->initialize(newFeatureModels, newFeatureCorrelateModelPriors, featureCorrelatesModels); } -CEventRatePopulationModel::CEventRatePopulationModel(const SModelParams& params, - const TDataGathererPtr& dataGatherer, - const TFeatureMathsModelPtrPrVec& newFeatureModels, - const TFeatureMultivariatePriorPtrPrVec& newFeatureCorrelateModelPriors, - const TFeatureCorrelationsPtrPrVec& featureCorrelatesModels, - const TFeatureInfluenceCalculatorCPtrPrVecVec& influenceCalculators, - core::CStateRestoreTraverser& traverser) +CEventRatePopulationModel::CEventRatePopulationModel( + const SModelParams& params, + const TDataGathererPtr& dataGatherer, + const TFeatureMathsModelPtrPrVec& newFeatureModels, + const TFeatureMultivariatePriorPtrPrVec& newFeatureCorrelateModelPriors, + const TFeatureCorrelationsPtrPrVec& featureCorrelatesModels, + const TFeatureInfluenceCalculatorCPtrPrVecVec& influenceCalculators, + core::CStateRestoreTraverser& traverser) : CPopulationModel(params, dataGatherer, influenceCalculators), - m_CurrentBucketStats(dataGatherer->currentBucketStartTime() - dataGatherer->bucketLength()), + m_CurrentBucketStats(dataGatherer->currentBucketStartTime() - + dataGatherer->bucketLength()), m_Probabilities(0.05) { this->initialize(newFeatureModels, newFeatureCorrelateModelPriors, featureCorrelatesModels); - traverser.traverseSubLevel(boost::bind(&CEventRatePopulationModel::acceptRestoreTraverser, this, _1)); + traverser.traverseSubLevel( + boost::bind(&CEventRatePopulationModel::acceptRestoreTraverser, this, _1)); } -void CEventRatePopulationModel::initialize(const TFeatureMathsModelPtrPrVec& newFeatureModels, - const TFeatureMultivariatePriorPtrPrVec& newFeatureCorrelateModelPriors, - const TFeatureCorrelationsPtrPrVec& featureCorrelatesModels) { +void CEventRatePopulationModel::initialize( + const TFeatureMathsModelPtrPrVec& newFeatureModels, + const TFeatureMultivariatePriorPtrPrVec& newFeatureCorrelateModelPriors, + const TFeatureCorrelationsPtrPrVec& featureCorrelatesModels) { m_FeatureModels.reserve(newFeatureModels.size()); for (const auto& model : newFeatureModels) { m_FeatureModels.emplace_back(model.first, model.second); } - std::sort(m_FeatureModels.begin(), m_FeatureModels.end(), [](const SFeatureModels& lhs, const SFeatureModels& rhs) { - return lhs.s_Feature < rhs.s_Feature; - }); + std::sort(m_FeatureModels.begin(), m_FeatureModels.end(), + [](const SFeatureModels& lhs, const SFeatureModels& rhs) { + return lhs.s_Feature < rhs.s_Feature; + }); if (this->params().s_MultivariateByFields) { m_FeatureCorrelatesModels.reserve(featureCorrelatesModels.size()); for (std::size_t i = 0u; i < featureCorrelatesModels.size(); ++i) { m_FeatureCorrelatesModels.emplace_back( - featureCorrelatesModels[i].first, newFeatureCorrelateModelPriors[i].second, featureCorrelatesModels[i].second); + featureCorrelatesModels[i].first, + newFeatureCorrelateModelPriors[i].second, + featureCorrelatesModels[i].second); } - std::sort(m_FeatureCorrelatesModels.begin(), - m_FeatureCorrelatesModels.end(), - [](const SFeatureCorrelateModels& lhs, const SFeatureCorrelateModels& rhs) { return lhs.s_Feature < rhs.s_Feature; }); + std::sort(m_FeatureCorrelatesModels.begin(), m_FeatureCorrelatesModels.end(), + [](const SFeatureCorrelateModels& lhs, const SFeatureCorrelateModels& rhs) { + return lhs.s_Feature < rhs.s_Feature; + }); } } -CEventRatePopulationModel::CEventRatePopulationModel(bool isForPersistence, const CEventRatePopulationModel& other) +CEventRatePopulationModel::CEventRatePopulationModel(bool isForPersistence, + const CEventRatePopulationModel& other) : CPopulationModel(isForPersistence, other), m_CurrentBucketStats(0), // Not needed for persistence so minimally constructed m_NewAttributeProbabilityPrior(other.m_NewAttributeProbabilityPrior), @@ -147,21 +164,29 @@ CEventRatePopulationModel::CEventRatePopulationModel(bool isForPersistence, cons m_FeatureCorrelatesModels.reserve(other.m_FeatureCorrelatesModels.size()); for (const auto& feature : other.m_FeatureCorrelatesModels) { m_FeatureCorrelatesModels.emplace_back( - feature.s_Feature, feature.s_ModelPrior, TCorrelationsPtr(feature.s_Models->cloneForPersistence())); + feature.s_Feature, feature.s_ModelPrior, + TCorrelationsPtr(feature.s_Models->cloneForPersistence())); } } void CEventRatePopulationModel::acceptPersistInserter(core::CStatePersistInserter& inserter) const { - inserter.insertLevel(POPULATION_STATE_TAG, boost::bind(&CEventRatePopulationModel::doAcceptPersistInserter, this, _1)); + inserter.insertLevel( + POPULATION_STATE_TAG, + boost::bind(&CEventRatePopulationModel::doAcceptPersistInserter, this, _1)); inserter.insertLevel(NEW_ATTRIBUTE_PROBABILITY_PRIOR_TAG, - boost::bind(&maths::CMultinomialConjugate::acceptPersistInserter, &m_NewAttributeProbabilityPrior, _1)); + boost::bind(&maths::CMultinomialConjugate::acceptPersistInserter, + &m_NewAttributeProbabilityPrior, _1)); inserter.insertLevel(ATTRIBUTE_PROBABILITY_PRIOR_TAG, - boost::bind(&maths::CMultinomialConjugate::acceptPersistInserter, &m_AttributeProbabilityPrior, _1)); + boost::bind(&maths::CMultinomialConjugate::acceptPersistInserter, + &m_AttributeProbabilityPrior, _1)); for (const auto& feature : m_FeatureModels) { - inserter.insertLevel(FEATURE_MODELS_TAG, boost::bind(&SFeatureModels::acceptPersistInserter, &feature, _1)); + inserter.insertLevel(FEATURE_MODELS_TAG, boost::bind(&SFeatureModels::acceptPersistInserter, + &feature, _1)); } for (const auto& feature : m_FeatureCorrelatesModels) { - inserter.insertLevel(FEATURE_CORRELATE_MODELS_TAG, boost::bind(&SFeatureCorrelateModels::acceptPersistInserter, &feature, _1)); + inserter.insertLevel(FEATURE_CORRELATE_MODELS_TAG, + boost::bind(&SFeatureCorrelateModels::acceptPersistInserter, + &feature, _1)); } core::CPersistUtils::persist(MEMORY_ESTIMATOR_TAG, m_MemoryEstimator, inserter); } @@ -171,25 +196,30 @@ bool CEventRatePopulationModel::acceptRestoreTraverser(core::CStateRestoreTraver do { const std::string& name = traverser.name(); RESTORE(POPULATION_STATE_TAG, - traverser.traverseSubLevel(boost::bind(&CEventRatePopulationModel::doAcceptRestoreTraverser, this, _1))) + traverser.traverseSubLevel(boost::bind( + &CEventRatePopulationModel::doAcceptRestoreTraverser, this, _1))) RESTORE_NO_ERROR( NEW_ATTRIBUTE_PROBABILITY_PRIOR_TAG, - maths::CMultinomialConjugate restored(this->params().distributionRestoreParams(maths_t::E_DiscreteData), traverser); + maths::CMultinomialConjugate restored( + this->params().distributionRestoreParams(maths_t::E_DiscreteData), traverser); m_NewAttributeProbabilityPrior.swap(restored)) RESTORE_NO_ERROR( ATTRIBUTE_PROBABILITY_PRIOR_TAG, - maths::CMultinomialConjugate restored(this->params().distributionRestoreParams(maths_t::E_DiscreteData), traverser); + maths::CMultinomialConjugate restored( + this->params().distributionRestoreParams(maths_t::E_DiscreteData), traverser); m_AttributeProbabilityPrior.swap(restored)) RESTORE(FEATURE_MODELS_TAG, i == m_FeatureModels.size() || - traverser.traverseSubLevel( - boost::bind(&SFeatureModels::acceptRestoreTraverser, &m_FeatureModels[i++], boost::cref(this->params()), _1))) - RESTORE( - FEATURE_CORRELATE_MODELS_TAG, - j == m_FeatureCorrelatesModels.size() || - traverser.traverseSubLevel(boost::bind( - &SFeatureCorrelateModels::acceptRestoreTraverser, &m_FeatureCorrelatesModels[j++], boost::cref(this->params()), _1))) - RESTORE(MEMORY_ESTIMATOR_TAG, core::CPersistUtils::restore(MEMORY_ESTIMATOR_TAG, m_MemoryEstimator, traverser)) + traverser.traverseSubLevel(boost::bind( + &SFeatureModels::acceptRestoreTraverser, + &m_FeatureModels[i++], boost::cref(this->params()), _1))) + RESTORE(FEATURE_CORRELATE_MODELS_TAG, + j == m_FeatureCorrelatesModels.size() || + traverser.traverseSubLevel(boost::bind( + &SFeatureCorrelateModels::acceptRestoreTraverser, + &m_FeatureCorrelatesModels[j++], boost::cref(this->params()), _1))) + RESTORE(MEMORY_ESTIMATOR_TAG, + core::CPersistUtils::restore(MEMORY_ESTIMATOR_TAG, m_MemoryEstimator, traverser)) } while (traverser.next()); for (auto& feature : m_FeatureModels) { @@ -222,18 +252,22 @@ bool CEventRatePopulationModel::isMetric() const { } CEventRatePopulationModel::TDouble1Vec -CEventRatePopulationModel::currentBucketValue(model_t::EFeature feature, std::size_t pid, std::size_t cid, core_t::TTime time) const { +CEventRatePopulationModel::currentBucketValue(model_t::EFeature feature, + std::size_t pid, + std::size_t cid, + core_t::TTime time) const { const TSizeSizePrFeatureDataPrVec& featureData = this->featureData(feature, time); auto i = find(featureData, pid, cid); return i != featureData.end() ? extractValue(feature, *i) : TDouble1Vec(1, 0.0); } -CEventRatePopulationModel::TDouble1Vec CEventRatePopulationModel::baselineBucketMean(model_t::EFeature feature, - std::size_t pid, - std::size_t cid, - model_t::CResultType type, - const TSizeDoublePr1Vec& correlated, - core_t::TTime time) const { +CEventRatePopulationModel::TDouble1Vec +CEventRatePopulationModel::baselineBucketMean(model_t::EFeature feature, + std::size_t pid, + std::size_t cid, + model_t::CResultType type, + const TSizeDoublePr1Vec& correlated, + core_t::TTime time) const { const maths::CModel* model{this->model(feature, cid)}; if (!model) { return TDouble1Vec(); @@ -244,7 +278,8 @@ CEventRatePopulationModel::TDouble1Vec CEventRatePopulationModel::baselineBucket if (model_t::isDiurnal(feature)) { hint = this->currentBucketValue(feature, pid, cid, time); } - TDouble1Vec result(model->predict(time, type.isUnconditional() ? NO_CORRELATED : correlated, hint)); + TDouble1Vec result(model->predict( + time, type.isUnconditional() ? NO_CORRELATED : correlated, hint)); double probability = 1.0; if (model_t::isConstant(feature) && !m_AttributeProbabilities.lookup(pid, probability)) { @@ -253,17 +288,21 @@ CEventRatePopulationModel::TDouble1Vec CEventRatePopulationModel::baselineBucket for (auto& coord : result) { coord = probability * model_t::inverseOffsetCountToZero(feature, coord); } - this->correctBaselineForInterim(feature, pid, cid, type, correlated, this->currentBucketInterimCorrections(), result); + this->correctBaselineForInterim(feature, pid, cid, type, correlated, + this->currentBucketInterimCorrections(), result); TDouble1VecDouble1VecPr support{model_t::support(feature)}; return maths::CTools::truncate(result, support.first, support.second); } bool CEventRatePopulationModel::bucketStatsAvailable(core_t::TTime time) const { - return time >= m_CurrentBucketStats.s_StartTime && time < m_CurrentBucketStats.s_StartTime + this->bucketLength(); + return time >= m_CurrentBucketStats.s_StartTime && + time < m_CurrentBucketStats.s_StartTime + this->bucketLength(); } -void CEventRatePopulationModel::sampleBucketStatistics(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) { +void CEventRatePopulationModel::sampleBucketStatistics(core_t::TTime startTime, + core_t::TTime endTime, + CResourceMonitor& resourceMonitor) { CDataGatherer& gatherer = this->dataGatherer(); core_t::TTime bucketLength = gatherer.bucketLength(); if (!gatherer.dataAvailable(startTime)) { @@ -274,7 +313,8 @@ void CEventRatePopulationModel::sampleBucketStatistics(core_t::TTime startTime, this->currentBucketInterimCorrections().clear(); for (core_t::TTime time = startTime; time < endTime; time += bucketLength) { - this->CAnomalyDetectorModel::sampleBucketStatistics(time, time + bucketLength, resourceMonitor); + this->CAnomalyDetectorModel::sampleBucketStatistics(time, time + bucketLength, + resourceMonitor); // Currently, we only remember one bucket. m_CurrentBucketStats.s_StartTime = time; @@ -288,13 +328,16 @@ void CEventRatePopulationModel::sampleBucketStatistics(core_t::TTime startTime, model_t::EFeature feature = featureData_.first; TSizeSizePrFeatureDataPrVec& data = m_CurrentBucketStats.s_FeatureData[feature]; data.swap(featureData_.second); - LOG_TRACE(<< model_t::print(feature) << ": " << core::CContainerPrinter::print(data)); + LOG_TRACE(<< model_t::print(feature) << ": " + << core::CContainerPrinter::print(data)); this->applyFilters(false, this->personFilter(), this->attributeFilter(), data); } } } -void CEventRatePopulationModel::sample(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) { +void CEventRatePopulationModel::sample(core_t::TTime startTime, + core_t::TTime endTime, + CResourceMonitor& resourceMonitor) { CDataGatherer& gatherer = this->dataGatherer(); core_t::TTime bucketLength = gatherer.bucketLength(); if (!gatherer.validateSampleTimes(startTime, endTime)) { @@ -315,7 +358,8 @@ void CEventRatePopulationModel::sample(core_t::TTime startTime, core_t::TTime en const TTimeVec& preSampleAttributeLastBucketTimes = this->attributeLastBucketTimes(); TSizeTimeUMap attributeLastBucketTimesMap; for (const auto& featureData_ : featureData) { - TSizeSizePrFeatureDataPrVec& data = m_CurrentBucketStats.s_FeatureData[featureData_.first]; + TSizeSizePrFeatureDataPrVec& data = + m_CurrentBucketStats.s_FeatureData[featureData_.first]; for (const auto& data_ : data) { std::size_t cid = CDataGatherer::extractAttributeId(data_); attributeLastBucketTimesMap[cid] = preSampleAttributeLastBucketTimes[cid]; @@ -332,7 +376,8 @@ void CEventRatePopulationModel::sample(core_t::TTime startTime, core_t::TTime en model_t::EFeature feature = featureData_.first; TSizeSizePrFeatureDataPrVec& data = m_CurrentBucketStats.s_FeatureData[feature]; data.swap(featureData_.second); - LOG_TRACE(<< model_t::print(feature) << ": " << core::CContainerPrinter::print(data)); + LOG_TRACE(<< model_t::print(feature) << ": " + << core::CContainerPrinter::print(data)); if (feature == model_t::E_PopulationUniquePersonCountByAttribute) { TDoubleVec categories; @@ -340,10 +385,13 @@ void CEventRatePopulationModel::sample(core_t::TTime startTime, core_t::TTime en categories.reserve(data.size()); concentrations.reserve(data.size()); for (const auto& tuple : data) { - categories.push_back(static_cast(CDataGatherer::extractAttributeId(tuple))); - concentrations.push_back(static_cast(CDataGatherer::extractData(tuple).s_Count)); + categories.push_back(static_cast( + CDataGatherer::extractAttributeId(tuple))); + concentrations.push_back(static_cast( + CDataGatherer::extractData(tuple).s_Count)); } - maths::CMultinomialConjugate prior(boost::numeric::bounds::highest(), categories, concentrations); + maths::CMultinomialConjugate prior(boost::numeric::bounds::highest(), + categories, concentrations); m_AttributeProbabilityPrior.swap(prior); continue; } @@ -374,7 +422,8 @@ void CEventRatePopulationModel::sample(core_t::TTime startTime, core_t::TTime en std::size_t pid = CDataGatherer::extractPersonId(data_); std::size_t cid = CDataGatherer::extractAttributeId(data_); uint64_t count = CDataGatherer::extractData(data_).s_Count; - double value = model_t::offsetCountToZero(feature, static_cast(count)); + double value = + model_t::offsetCountToZero(feature, static_cast(count)); maths::CModel* model{this->model(feature, cid)}; if (!model) { @@ -394,19 +443,23 @@ void CEventRatePopulationModel::sample(core_t::TTime startTime, core_t::TTime en continue; } - LOG_TRACE(<< "Adding " << value << " for person = " << gatherer.personName(pid) + LOG_TRACE(<< "Adding " << value + << " for person = " << gatherer.personName(pid) << " and attribute = " << gatherer.attributeName(cid)); SValuesAndWeights& attribute = attributes[cid]; - std::size_t duplicate = data.size() >= this->params().s_MinimumToDeduplicate ? fuzzy[cid].duplicate(sampleTime, {value}) - : attribute.s_Values.size(); + std::size_t duplicate = data.size() >= this->params().s_MinimumToDeduplicate + ? fuzzy[cid].duplicate(sampleTime, {value}) + : attribute.s_Values.size(); if (duplicate < attribute.s_Values.size()) { - attribute.s_Weights[duplicate][0][0] += this->sampleRateWeight(pid, cid) * this->learnRate(feature); + attribute.s_Weights[duplicate][0][0] += + this->sampleRateWeight(pid, cid) * this->learnRate(feature); } else { attribute.s_Values.emplace_back(sampleTime, TDouble2Vec{value}, pid); - attribute.s_Weights.emplace_back(TDouble2Vec4Vec{{this->sampleRateWeight(pid, cid) * this->learnRate(feature)}, - model->winsorisationWeight(1.0, sampleTime, {value})}); + attribute.s_Weights.emplace_back(TDouble2Vec4Vec{ + {this->sampleRateWeight(pid, cid) * this->learnRate(feature)}, + model->winsorisationWeight(1.0, sampleTime, {value})}); } } @@ -420,7 +473,8 @@ void CEventRatePopulationModel::sample(core_t::TTime startTime, core_t::TTime en .trendWeights(attribute.second.s_Weights) .priorWeights(attribute.second.s_Weights); maths::CModel* model{this->model(feature, cid)}; - if (model->addSamples(params, attribute.second.s_Values) == maths::CModel::E_Reset) { + if (model->addSamples(params, attribute.second.s_Values) == + maths::CModel::E_Reset) { gatherer.resetSampleCount(cid); } } @@ -440,7 +494,8 @@ void CEventRatePopulationModel::prune(std::size_t maximumAge) { TSizeVec peopleToRemove; TSizeVec attributesToRemove; - this->peopleAndAttributesToRemove(m_CurrentBucketStats.s_StartTime, maximumAge, peopleToRemove, attributesToRemove); + this->peopleAndAttributesToRemove(m_CurrentBucketStats.s_StartTime, maximumAge, + peopleToRemove, attributesToRemove); if (peopleToRemove.empty() && attributesToRemove.empty()) { return; @@ -449,7 +504,8 @@ void CEventRatePopulationModel::prune(std::size_t maximumAge) { std::sort(peopleToRemove.begin(), peopleToRemove.end()); std::sort(attributesToRemove.begin(), attributesToRemove.end()); LOG_DEBUG(<< "Removing people {" << this->printPeople(peopleToRemove, 20) << '}'); - LOG_DEBUG(<< "Removing attributes {" << this->printAttributes(attributesToRemove, 20) << '}'); + LOG_DEBUG(<< "Removing attributes {" + << this->printAttributes(attributesToRemove, 20) << '}'); // Stop collecting for these people/attributes and add them // to the free list. @@ -458,7 +514,8 @@ void CEventRatePopulationModel::prune(std::size_t maximumAge) { if (gatherer.dataAvailable(m_CurrentBucketStats.s_StartTime)) { TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; - gatherer.featureData(m_CurrentBucketStats.s_StartTime, gatherer.bucketLength(), featureData); + gatherer.featureData(m_CurrentBucketStats.s_StartTime, + gatherer.bucketLength(), featureData); for (auto& feature : featureData) { m_CurrentBucketStats.s_FeatureData[feature.first].swap(feature.second); } @@ -498,10 +555,13 @@ bool CEventRatePopulationModel::computeProbability(std::size_t pid, LOG_TRACE(<< "computeProbability(" << gatherer.personName(pid) << ")"); using TStoredStringPtr1Vec = core::CSmallVector; - using TSizeProbabilityAndInfluenceUMap = boost::unordered_map; + using TSizeProbabilityAndInfluenceUMap = + boost::unordered_map; using TDoubleFeaturePr = std::pair; - using TDoubleFeaturePrMinAccumulator = maths::CBasicStatistics::SMin::TAccumulator; - using TSizeDoubleFeaturePrMinAccumulatorUMap = boost::unordered_map; + using TDoubleFeaturePrMinAccumulator = + maths::CBasicStatistics::SMin::TAccumulator; + using TSizeDoubleFeaturePrMinAccumulatorUMap = + boost::unordered_map; static const TStoredStringPtr1Vec NO_CORRELATED_ATTRIBUTES; static const TSizeDoublePr1Vec NO_CORRELATES; @@ -520,10 +580,9 @@ bool CEventRatePopulationModel::computeProbability(std::size_t pid, maths::CMultinomialConjugate personAttributeProbabilityPrior(m_NewAttributeProbabilityPrior); - CAnnotatedProbabilityBuilder resultBuilder(result, - std::max(numberAttributeProbabilities, std::size_t(1)), - function_t::function(gatherer.features()), - gatherer.numberActivePeople()); + CAnnotatedProbabilityBuilder resultBuilder( + result, std::max(numberAttributeProbabilities, std::size_t(1)), + function_t::function(gatherer.features()), gatherer.numberActivePeople()); resultBuilder.attributeProbabilityPrior(&m_AttributeProbabilityPrior); resultBuilder.personAttributeProbabilityPrior(&personAttributeProbabilityPrior); @@ -535,9 +594,13 @@ bool CEventRatePopulationModel::computeProbability(std::size_t pid, const TSizeSizePrFeatureDataPrVec& data = this->featureData(feature, startTime); TSizeSizePr range = personRange(data, pid); for (std::size_t j = range.first; j < range.second; ++j) { - TDouble1Vec category(1, static_cast(CDataGatherer::extractAttributeId(data[j]))); - TDouble4Vec1Vec weights(1, TDouble4Vec(1, static_cast(CDataGatherer::extractData(data[j]).s_Count))); - personAttributeProbabilityPrior.addSamples(maths::CConstantWeights::COUNT, category, weights); + TDouble1Vec category( + 1, static_cast(CDataGatherer::extractAttributeId(data[j]))); + TDouble4Vec1Vec weights( + 1, TDouble4Vec(1, static_cast( + CDataGatherer::extractData(data[j]).s_Count))); + personAttributeProbabilityPrior.addSamples( + maths::CConstantWeights::COUNT, category, weights); } continue; } @@ -551,7 +614,8 @@ bool CEventRatePopulationModel::computeProbability(std::size_t pid, for (std::size_t j = range.first; j < range.second; ++j) { std::size_t cid = CDataGatherer::extractAttributeId(featureData[j]); - if (this->shouldIgnoreResult(feature, result.s_ResultType, pid, cid, model_t::sampleTime(feature, startTime, bucketLength))) { + if (this->shouldIgnoreResult(feature, result.s_ResultType, pid, cid, + model_t::sampleTime(feature, startTime, bucketLength))) { continue; } @@ -565,30 +629,31 @@ bool CEventRatePopulationModel::computeProbability(std::size_t pid, model_t::CResultType type; TSize1Vec mostAnomalousCorrelate; if (pConditional.emplace(cid, pConditionalTemplate) - .first->second.addProbability(feature, - cid, - *params.s_Model, - params.s_ElapsedTime, - params.s_ComputeProbabilityParams, - params.s_Time, - params.s_Value, - params.s_Probability, - params.s_Tail, - type, - mostAnomalousCorrelate)) { - LOG_TRACE(<< "P(" << params.describe() << ", attribute = " << gatherer.attributeName(cid) - << ", person = " << gatherer.personName(pid) << ") = " << params.s_Probability); - CProbabilityAndInfluenceCalculator& calculator = pConditional.emplace(cid, pConditionalTemplate).first->second; - const auto& influenceValues = CDataGatherer::extractData(featureData[j]).s_InfluenceValues; + .first->second.addProbability( + feature, cid, *params.s_Model, params.s_ElapsedTime, + params.s_ComputeProbabilityParams, params.s_Time, + params.s_Value, params.s_Probability, params.s_Tail, + type, mostAnomalousCorrelate)) { + LOG_TRACE(<< "P(" << params.describe() + << ", attribute = " << gatherer.attributeName(cid) + << ", person = " << gatherer.personName(pid) + << ") = " << params.s_Probability); + CProbabilityAndInfluenceCalculator& calculator = + pConditional.emplace(cid, pConditionalTemplate).first->second; + const auto& influenceValues = + CDataGatherer::extractData(featureData[j]).s_InfluenceValues; for (std::size_t k = 0u; k < influenceValues.size(); ++k) { - if (const CInfluenceCalculator* influenceCalculator = this->influenceCalculator(feature, k)) { + if (const CInfluenceCalculator* influenceCalculator = + this->influenceCalculator(feature, k)) { calculator.plugin(*influenceCalculator); - calculator.addInfluences(*(gatherer.beginInfluencers() + k), influenceValues[k], params); + calculator.addInfluences(*(gatherer.beginInfluencers() + k), + influenceValues[k], params); } } minimumProbabilityFeatures[cid].add({params.s_Probability, feature}); } else { - LOG_ERROR(<< "Unable to compute P(" << params.describe() << ", attribute = " << gatherer.attributeName(cid) + LOG_ERROR(<< "Unable to compute P(" << params.describe() + << ", attribute = " << gatherer.attributeName(cid) << ", person = " << gatherer.personName(pid) << ")"); } } @@ -616,8 +681,10 @@ bool CEventRatePopulationModel::computeProbability(std::size_t pid, // multinomial distribution. double w = 1.0; double pAttributeGivenPerson; - if (personAttributeProbabilityPrior.probability(static_cast(cid), pAttributeGivenPerson)) { - w = maths::CCategoricalTools::probabilityOfCategory(pConditional.size(), pAttributeGivenPerson); + if (personAttributeProbabilityPrior.probability(static_cast(cid), + pAttributeGivenPerson)) { + w = maths::CCategoricalTools::probabilityOfCategory( + pConditional.size(), pAttributeGivenPerson); } LOG_TRACE(<< "w = " << w); @@ -629,14 +696,10 @@ bool CEventRatePopulationModel::computeProbability(std::size_t pid, } else { double p; pPersonAndAttribute.calculate(p); - resultBuilder.addAttributeProbability(cid, - gatherer.attributeNamePtr(cid), - pAttribute, - p, - model_t::CResultType::E_Unconditional, - (feature->second)[0].second, - NO_CORRELATED_ATTRIBUTES, - NO_CORRELATES); + resultBuilder.addAttributeProbability( + cid, gatherer.attributeNamePtr(cid), pAttribute, p, + model_t::CResultType::E_Unconditional, (feature->second)[0].second, + NO_CORRELATED_ATTRIBUTES, NO_CORRELATES); } } @@ -657,10 +720,11 @@ bool CEventRatePopulationModel::computeProbability(std::size_t pid, return true; } -bool CEventRatePopulationModel::computeTotalProbability(const std::string& /*person*/, - std::size_t /*numberAttributeProbabilities*/, - TOptionalDouble& probability, - TAttributeProbability1Vec& attributeProbabilities) const { +bool CEventRatePopulationModel::computeTotalProbability( + const std::string& /*person*/, + std::size_t /*numberAttributeProbabilities*/, + TOptionalDouble& probability, + TAttributeProbability1Vec& attributeProbabilities) const { probability = TOptionalDouble(); attributeProbabilities.clear(); return true; @@ -674,7 +738,8 @@ uint64_t CEventRatePopulationModel::checksum(bool includeCurrentBucketStats) con } using TStrCRefStrCRefPr = std::pair; - using TStrCRefStrCRefPrUInt64Map = std::map; + using TStrCRefStrCRefPrUInt64Map = + std::map; const CDataGatherer& gatherer = this->dataGatherer(); @@ -684,14 +749,16 @@ uint64_t CEventRatePopulationModel::checksum(bool includeCurrentBucketStats) con const TDoubleVec& concentrations = m_AttributeProbabilityPrior.concentrations(); for (std::size_t i = 0u; i < categories.size(); ++i) { std::size_t cid = static_cast(categories[i]); - uint64_t& hash = hashes[{boost::cref(EMPTY_STRING), boost::cref(this->attributeName(cid))}]; + uint64_t& hash = + hashes[{boost::cref(EMPTY_STRING), boost::cref(this->attributeName(cid))}]; hash = maths::CChecksum::calculate(hash, concentrations[i]); } for (const auto& feature : m_FeatureModels) { for (std::size_t cid = 0u; cid < feature.s_Models.size(); ++cid) { if (gatherer.isAttributeActive(cid)) { - uint64_t& hash = hashes[{boost::cref(EMPTY_STRING), boost::cref(gatherer.attributeName(cid))}]; + uint64_t& hash = + hashes[{boost::cref(EMPTY_STRING), boost::cref(gatherer.attributeName(cid))}]; hash = maths::CChecksum::calculate(hash, feature.s_Models[cid]); } } @@ -700,8 +767,11 @@ uint64_t CEventRatePopulationModel::checksum(bool includeCurrentBucketStats) con for (const auto& feature : m_FeatureCorrelatesModels) { for (const auto& model : feature.s_Models->correlationModels()) { std::size_t cids[]{model.first.first, model.first.second}; - if (gatherer.isAttributeActive(cids[0]) && gatherer.isAttributeActive(cids[1])) { - uint64_t& hash = hashes[{boost::cref(gatherer.attributeName(cids[0])), boost::cref(gatherer.attributeName(cids[1]))}]; + if (gatherer.isAttributeActive(cids[0]) && + gatherer.isAttributeActive(cids[1])) { + uint64_t& hash = + hashes[{boost::cref(gatherer.attributeName(cids[0])), + boost::cref(gatherer.attributeName(cids[1]))}]; hash = maths::CChecksum::calculate(hash, model.second); } } @@ -709,15 +779,18 @@ uint64_t CEventRatePopulationModel::checksum(bool includeCurrentBucketStats) con if (includeCurrentBucketStats) { for (const auto& personCount : this->personCounts()) { - uint64_t& hash = hashes[{boost::cref(gatherer.personName(personCount.first)), boost::cref(EMPTY_STRING)}]; + uint64_t& hash = + hashes[{boost::cref(gatherer.personName(personCount.first)), boost::cref(EMPTY_STRING)}]; hash = maths::CChecksum::calculate(hash, personCount.second); } for (const auto& feature : m_CurrentBucketStats.s_FeatureData) { for (const auto& data : feature.second) { std::size_t pid = CDataGatherer::extractPersonId(data); std::size_t cid = CDataGatherer::extractAttributeId(data); - uint64_t& hash = hashes[{boost::cref(this->personName(pid)), boost::cref(this->attributeName(cid))}]; - hash = maths::CChecksum::calculate(hash, CDataGatherer::extractData(data).s_Count); + uint64_t& hash = + hashes[{boost::cref(this->personName(pid)), boost::cref(this->attributeName(cid))}]; + hash = maths::CChecksum::calculate( + hash, CDataGatherer::extractData(data).s_Count); } } } @@ -731,22 +804,29 @@ uint64_t CEventRatePopulationModel::checksum(bool includeCurrentBucketStats) con void CEventRatePopulationModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CEventRatePopulationModel"); this->CPopulationModel::debugMemoryUsage(mem->addChild()); - core::CMemoryDebug::dynamicSize("m_CurrentBucketStats.s_PersonCounts", m_CurrentBucketStats.s_PersonCounts, mem); - core::CMemoryDebug::dynamicSize("m_CurrentBucketStats.s_FeatureData", m_CurrentBucketStats.s_FeatureData, mem); - core::CMemoryDebug::dynamicSize("m_CurrentBucketStats.s_InterimCorrections", m_CurrentBucketStats.s_InterimCorrections, mem); - core::CMemoryDebug::dynamicSize("m_AttributeProbabilities", m_AttributeProbabilities, mem); - core::CMemoryDebug::dynamicSize("m_NewPersonAttributePrior", m_NewAttributeProbabilityPrior, mem); - core::CMemoryDebug::dynamicSize("m_AttributeProbabilityPrior", m_AttributeProbabilityPrior, mem); + core::CMemoryDebug::dynamicSize("m_CurrentBucketStats.s_PersonCounts", + m_CurrentBucketStats.s_PersonCounts, mem); + core::CMemoryDebug::dynamicSize("m_CurrentBucketStats.s_FeatureData", + m_CurrentBucketStats.s_FeatureData, mem); + core::CMemoryDebug::dynamicSize("m_CurrentBucketStats.s_InterimCorrections", + m_CurrentBucketStats.s_InterimCorrections, mem); + core::CMemoryDebug::dynamicSize("m_AttributeProbabilities", + m_AttributeProbabilities, mem); + core::CMemoryDebug::dynamicSize("m_NewPersonAttributePrior", + m_NewAttributeProbabilityPrior, mem); + core::CMemoryDebug::dynamicSize("m_AttributeProbabilityPrior", + m_AttributeProbabilityPrior, mem); core::CMemoryDebug::dynamicSize("m_FeatureModels", m_FeatureModels, mem); - core::CMemoryDebug::dynamicSize("m_FeatureCorrelatesModels", m_FeatureCorrelatesModels, mem); + core::CMemoryDebug::dynamicSize("m_FeatureCorrelatesModels", + m_FeatureCorrelatesModels, mem); core::CMemoryDebug::dynamicSize("m_MemoryEstimator", m_MemoryEstimator, mem); } std::size_t CEventRatePopulationModel::memoryUsage() const { const CDataGatherer& gatherer = this->dataGatherer(); - TOptionalSize estimate = this->estimateMemoryUsage(gatherer.numberActivePeople(), - gatherer.numberActiveAttributes(), - 0); // # correlations + TOptionalSize estimate = this->estimateMemoryUsage( + gatherer.numberActivePeople(), gatherer.numberActiveAttributes(), + 0); // # correlations return estimate ? estimate.get() : this->computeMemoryUsage(); } @@ -776,11 +856,12 @@ CEventRatePopulationModel::CModelDetailsViewPtr CEventRatePopulationModel::detai return CModelDetailsViewPtr(new CEventRatePopulationModelDetailsView(*this)); } -const CEventRatePopulationModel::TSizeSizePrFeatureDataPrVec& CEventRatePopulationModel::featureData(model_t::EFeature feature, - core_t::TTime time) const { +const CEventRatePopulationModel::TSizeSizePrFeatureDataPrVec& +CEventRatePopulationModel::featureData(model_t::EFeature feature, core_t::TTime time) const { static const TSizeSizePrFeatureDataPrVec EMPTY; if (!this->bucketStatsAvailable(time)) { - LOG_ERROR(<< "No statistics at " << time << ", current bucket = [" << m_CurrentBucketStats.s_StartTime << "," + LOG_ERROR(<< "No statistics at " << time << ", current bucket = [" + << m_CurrentBucketStats.s_StartTime << "," << m_CurrentBucketStats.s_StartTime + this->bucketLength() << ")"); return EMPTY; } @@ -804,11 +885,13 @@ uint64_t CEventRatePopulationModel::currentBucketTotalCount() const { return m_CurrentBucketStats.s_TotalCount; } -const CEventRatePopulationModel::TSizeUInt64PrVec& CEventRatePopulationModel::personCounts() const { +const CEventRatePopulationModel::TSizeUInt64PrVec& +CEventRatePopulationModel::personCounts() const { return m_CurrentBucketStats.s_PersonCounts; } -CEventRatePopulationModel::TCorrectionKeyDouble1VecUMap& CEventRatePopulationModel::currentBucketInterimCorrections() const { +CEventRatePopulationModel::TCorrectionKeyDouble1VecUMap& +CEventRatePopulationModel::currentBucketInterimCorrections() const { return m_CurrentBucketStats.s_InterimCorrections; } @@ -845,19 +928,24 @@ void CEventRatePopulationModel::updateRecycledModels() { this->CPopulationModel::updateRecycledModels(); } -void CEventRatePopulationModel::refreshCorrelationModels(std::size_t resourceLimit, CResourceMonitor& resourceMonitor) { +void CEventRatePopulationModel::refreshCorrelationModels(std::size_t resourceLimit, + CResourceMonitor& resourceMonitor) { std::size_t n = this->numberOfPeople(); - double maxNumberCorrelations = this->params().s_CorrelationModelsOverhead * static_cast(n); - auto memoryUsage = boost::bind(&CAnomalyDetectorModel::estimateMemoryUsageOrComputeAndUpdate, this, n, 0, _1); + double maxNumberCorrelations = this->params().s_CorrelationModelsOverhead * + static_cast(n); + auto memoryUsage = boost::bind(&CAnomalyDetectorModel::estimateMemoryUsageOrComputeAndUpdate, + this, n, 0, _1); CTimeSeriesCorrelateModelAllocator allocator( - resourceMonitor, memoryUsage, resourceLimit, static_cast(maxNumberCorrelations + 0.5)); + resourceMonitor, memoryUsage, resourceLimit, + static_cast(maxNumberCorrelations + 0.5)); for (auto& feature : m_FeatureCorrelatesModels) { allocator.prototypePrior(feature.s_ModelPrior); feature.s_Models->refresh(allocator); } } -void CEventRatePopulationModel::clearPrunedResources(const TSizeVec& /*people*/, const TSizeVec& attributes) { +void CEventRatePopulationModel::clearPrunedResources(const TSizeVec& /*people*/, + const TSizeVec& attributes) { for (auto cid : attributes) { for (auto& feature : m_FeatureModels) { feature.s_Models[cid].reset(this->tinyModel()); @@ -875,17 +963,25 @@ void CEventRatePopulationModel::doSkipSampling(core_t::TTime startTime, core_t:: this->CPopulationModel::doSkipSampling(startTime, endTime); } -const maths::CModel* CEventRatePopulationModel::model(model_t::EFeature feature, std::size_t cid) const { +const maths::CModel* CEventRatePopulationModel::model(model_t::EFeature feature, + std::size_t cid) const { return const_cast(this)->model(feature, cid); } maths::CModel* CEventRatePopulationModel::model(model_t::EFeature feature, std::size_t cid) { - auto i = std::find_if( - m_FeatureModels.begin(), m_FeatureModels.end(), [feature](const SFeatureModels& model) { return model.s_Feature == feature; }); - return i != m_FeatureModels.end() && cid < i->s_Models.size() ? i->s_Models[cid].get() : nullptr; + auto i = std::find_if(m_FeatureModels.begin(), m_FeatureModels.end(), + [feature](const SFeatureModels& model) { + return model.s_Feature == feature; + }); + return i != m_FeatureModels.end() && cid < i->s_Models.size() + ? i->s_Models[cid].get() + : nullptr; } -bool CEventRatePopulationModel::correlates(model_t::EFeature feature, std::size_t pid, std::size_t cid, core_t::TTime time) const { +bool CEventRatePopulationModel::correlates(model_t::EFeature feature, + std::size_t pid, + std::size_t cid, + core_t::TTime time) const { if (model_t::dimension(feature) > 1 || !this->params().s_MultivariateByFields) { return false; } @@ -897,7 +993,8 @@ bool CEventRatePopulationModel::correlates(model_t::EFeature feature, std::size_ for (std::size_t j = range.first; j < range.second; ++j) { std::size_t cids[]{cid, CDataGatherer::extractAttributeId(data[j])}; for (const auto& correlate : model->correlates()) { - if ((cids[0] == correlate[0] && cids[1] == correlate[1]) || (cids[1] == correlate[0] && cids[0] == correlate[1])) { + if ((cids[0] == correlate[0] && cids[1] == correlate[1]) || + (cids[1] == correlate[0] && cids[0] == correlate[1])) { return true; } } @@ -914,8 +1011,10 @@ void CEventRatePopulationModel::fill(model_t::EFeature feature, auto data = find(this->featureData(feature, bucketTime), pid, cid); const maths::CModel* model{this->model(feature, cid)}; core_t::TTime time{model_t::sampleTime(feature, bucketTime, this->bucketLength())}; - TDouble2Vec4Vec weight{model->seasonalWeight(maths::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL, time)}; - double value{model_t::offsetCountToZero(feature, static_cast(CDataGatherer::extractData(*data).s_Count))}; + TDouble2Vec4Vec weight{ + model->seasonalWeight(maths::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL, time)}; + double value{model_t::offsetCountToZero( + feature, static_cast(CDataGatherer::extractData(*data).s_Count))}; params.s_Feature = feature; params.s_Model = model; @@ -924,9 +1023,11 @@ void CEventRatePopulationModel::fill(model_t::EFeature feature, params.s_Value.assign(1, TDouble2Vec{value}); if (interim && model_t::requiresInterimResultAdjustment(feature)) { double mode{params.s_Model->mode(time, PROBABILITY_WEIGHT_STYLES, weight)[0]}; - TDouble2Vec correction{this->interimValueCorrector().corrections(time, this->currentBucketTotalCount(), mode, value)}; + TDouble2Vec correction{this->interimValueCorrector().corrections( + time, this->currentBucketTotalCount(), mode, value)}; params.s_Value[0] += correction; - this->currentBucketInterimCorrections().emplace(CCorrectionKey(feature, pid, cid), correction); + this->currentBucketInterimCorrections().emplace( + CCorrectionKey(feature, pid, cid), correction); } params.s_Count = 1.0; params.s_ComputeProbabilityParams.tag(pid) diff --git a/lib/model/CEventRatePopulationModelFactory.cc b/lib/model/CEventRatePopulationModelFactory.cc index 6989cace0b..ce22985467 100644 --- a/lib/model/CEventRatePopulationModelFactory.cc +++ b/lib/model/CEventRatePopulationModelFactory.cc @@ -31,11 +31,8 @@ namespace model { CEventRatePopulationModelFactory::CEventRatePopulationModelFactory(const SModelParams& params, model_t::ESummaryMode summaryMode, const std::string& summaryCountFieldName) - : CModelFactory(params), - m_Identifier(), - m_SummaryMode(summaryMode), - m_SummaryCountFieldName(summaryCountFieldName), - m_UseNull(false), + : CModelFactory(params), m_Identifier(), m_SummaryMode(summaryMode), + m_SummaryCountFieldName(summaryCountFieldName), m_UseNull(false), m_BucketResultsDelay(0) { } @@ -43,7 +40,8 @@ CEventRatePopulationModelFactory* CEventRatePopulationModelFactory::clone() cons return new CEventRatePopulationModelFactory(*this); } -CAnomalyDetectorModel* CEventRatePopulationModelFactory::makeModel(const SModelInitializationData& initData) const { +CAnomalyDetectorModel* +CEventRatePopulationModelFactory::makeModel(const SModelInitializationData& initData) const { TDataGathererPtr dataGatherer = initData.s_DataGatherer; if (!dataGatherer) { LOG_ERROR(<< "NULL data gatherer"); @@ -57,16 +55,16 @@ CAnomalyDetectorModel* CEventRatePopulationModelFactory::makeModel(const SModelI influenceCalculators.push_back(this->defaultInfluenceCalculators(name, features)); } - return new CEventRatePopulationModel(this->modelParams(), - dataGatherer, - this->defaultFeatureModels(features, dataGatherer->bucketLength(), 1.0, false), - this->defaultCorrelatePriors(features), - this->defaultCorrelates(features), - influenceCalculators); + return new CEventRatePopulationModel( + this->modelParams(), dataGatherer, + this->defaultFeatureModels(features, dataGatherer->bucketLength(), 1.0, false), + this->defaultCorrelatePriors(features), + this->defaultCorrelates(features), influenceCalculators); } -CAnomalyDetectorModel* CEventRatePopulationModelFactory::makeModel(const SModelInitializationData& initData, - core::CStateRestoreTraverser& traverser) const { +CAnomalyDetectorModel* +CEventRatePopulationModelFactory::makeModel(const SModelInitializationData& initData, + core::CStateRestoreTraverser& traverser) const { TDataGathererPtr dataGatherer = initData.s_DataGatherer; if (!dataGatherer) { LOG_ERROR(<< "NULL data gatherer"); @@ -80,52 +78,36 @@ CAnomalyDetectorModel* CEventRatePopulationModelFactory::makeModel(const SModelI influenceCalculators.push_back(this->defaultInfluenceCalculators(name, features)); } - return new CEventRatePopulationModel(this->modelParams(), - dataGatherer, - this->defaultFeatureModels(features, dataGatherer->bucketLength(), 1.0, false), - this->defaultCorrelatePriors(features), - this->defaultCorrelates(features), - influenceCalculators, - traverser); + return new CEventRatePopulationModel( + this->modelParams(), dataGatherer, + this->defaultFeatureModels(features, dataGatherer->bucketLength(), 1.0, false), + this->defaultCorrelatePriors(features), + this->defaultCorrelates(features), influenceCalculators, traverser); } -CDataGatherer* CEventRatePopulationModelFactory::makeDataGatherer(const SGathererInitializationData& initData) const { - return new CDataGatherer(model_t::E_PopulationEventRate, - m_SummaryMode, - this->modelParams(), - m_SummaryCountFieldName, - m_PartitionFieldName, - initData.s_PartitionFieldValue, - m_PersonFieldName, - m_AttributeFieldName, - m_ValueFieldName, - m_InfluenceFieldNames, - m_UseNull, - this->searchKey(), - m_Features, - initData.s_StartTime, - 0); +CDataGatherer* +CEventRatePopulationModelFactory::makeDataGatherer(const SGathererInitializationData& initData) const { + return new CDataGatherer(model_t::E_PopulationEventRate, m_SummaryMode, + this->modelParams(), m_SummaryCountFieldName, + m_PartitionFieldName, initData.s_PartitionFieldValue, + m_PersonFieldName, m_AttributeFieldName, + m_ValueFieldName, m_InfluenceFieldNames, m_UseNull, + this->searchKey(), m_Features, initData.s_StartTime, 0); } -CDataGatherer* CEventRatePopulationModelFactory::makeDataGatherer(const std::string& partitionFieldValue, - core::CStateRestoreTraverser& traverser) const { - return new CDataGatherer(model_t::E_PopulationEventRate, - m_SummaryMode, - this->modelParams(), - m_SummaryCountFieldName, - m_PartitionFieldName, - partitionFieldValue, - m_PersonFieldName, - m_AttributeFieldName, - m_ValueFieldName, - m_InfluenceFieldNames, - m_UseNull, - this->searchKey(), - traverser); +CDataGatherer* +CEventRatePopulationModelFactory::makeDataGatherer(const std::string& partitionFieldValue, + core::CStateRestoreTraverser& traverser) const { + return new CDataGatherer( + model_t::E_PopulationEventRate, m_SummaryMode, this->modelParams(), + m_SummaryCountFieldName, m_PartitionFieldName, partitionFieldValue, + m_PersonFieldName, m_AttributeFieldName, m_ValueFieldName, + m_InfluenceFieldNames, m_UseNull, this->searchKey(), traverser); } -CEventRatePopulationModelFactory::TPriorPtr CEventRatePopulationModelFactory::defaultPrior(model_t::EFeature feature, - const SModelParams& params) const { +CEventRatePopulationModelFactory::TPriorPtr +CEventRatePopulationModelFactory::defaultPrior(model_t::EFeature feature, + const SModelParams& params) const { // Categorical data all use the multinomial prior. The creation // of these priors is managed by defaultCategoricalPrior. if (model_t::isCategorical(feature)) { @@ -153,14 +135,18 @@ CEventRatePopulationModelFactory::TPriorPtr CEventRatePopulationModelFactory::de maths_t::EDataType dataType = this->dataType(); - maths::CGammaRateConjugate gammaPrior = maths::CGammaRateConjugate::nonInformativePrior(dataType, 0.0, params.s_DecayRate); + maths::CGammaRateConjugate gammaPrior = + maths::CGammaRateConjugate::nonInformativePrior(dataType, 0.0, params.s_DecayRate); maths::CLogNormalMeanPrecConjugate logNormalPrior = - maths::CLogNormalMeanPrecConjugate::nonInformativePrior(dataType, 0.0, params.s_DecayRate); + maths::CLogNormalMeanPrecConjugate::nonInformativePrior(dataType, 0.0, + params.s_DecayRate); - maths::CNormalMeanPrecConjugate normalPrior = maths::CNormalMeanPrecConjugate::nonInformativePrior(dataType, params.s_DecayRate); + maths::CNormalMeanPrecConjugate normalPrior = + maths::CNormalMeanPrecConjugate::nonInformativePrior(dataType, params.s_DecayRate); - maths::CPoissonMeanConjugate poissonPrior = maths::CPoissonMeanConjugate::nonInformativePrior(0.0, params.s_DecayRate); + maths::CPoissonMeanConjugate poissonPrior = + maths::CPoissonMeanConjugate::nonInformativePrior(0.0, params.s_DecayRate); // Create the component priors. TPriorPtrVec priors; @@ -177,14 +163,12 @@ CEventRatePopulationModelFactory::TPriorPtr CEventRatePopulationModelFactory::de modePriors.emplace_back(logNormalPrior.clone()); modePriors.emplace_back(normalPrior.clone()); maths::COneOfNPrior modePrior(modePriors, dataType, params.s_DecayRate); - maths::CXMeansOnline1d clusterer(dataType, - maths::CAvailableModeDistributions::ALL, - maths_t::E_ClustersFractionWeight, - params.s_DecayRate, - params.s_MinimumModeFraction, - params.s_MinimumModeCount, - params.minimumCategoryCount()); - maths::CMultimodalPrior multimodalPrior(dataType, clusterer, modePrior, params.s_DecayRate); + maths::CXMeansOnline1d clusterer( + dataType, maths::CAvailableModeDistributions::ALL, + maths_t::E_ClustersFractionWeight, params.s_DecayRate, params.s_MinimumModeFraction, + params.s_MinimumModeCount, params.minimumCategoryCount()); + maths::CMultimodalPrior multimodalPrior(dataType, clusterer, modePrior, + params.s_DecayRate); priors.emplace_back(multimodalPrior.clone()); } @@ -192,12 +176,14 @@ CEventRatePopulationModelFactory::TPriorPtr CEventRatePopulationModelFactory::de } CEventRatePopulationModelFactory::TMultivariatePriorPtr -CEventRatePopulationModelFactory::defaultMultivariatePrior(model_t::EFeature feature, const SModelParams& params) const { +CEventRatePopulationModelFactory::defaultMultivariatePrior(model_t::EFeature feature, + const SModelParams& params) const { std::size_t dimension = model_t::dimension(feature); TMultivariatePriorPtrVec priors; priors.reserve(params.s_MinimumModeFraction <= 0.5 ? 2u : 1u); - TMultivariatePriorPtr multivariateNormal = this->multivariateNormalPrior(dimension, params); + TMultivariatePriorPtr multivariateNormal = + this->multivariateNormalPrior(dimension, params); priors.push_back(multivariateNormal); if (params.s_MinimumModeFraction <= 0.5) { priors.push_back(this->multivariateMultimodalPrior(dimension, params, *multivariateNormal)); @@ -207,7 +193,8 @@ CEventRatePopulationModelFactory::defaultMultivariatePrior(model_t::EFeature fea } CEventRatePopulationModelFactory::TMultivariatePriorPtr -CEventRatePopulationModelFactory::defaultCorrelatePrior(model_t::EFeature /*feature*/, const SModelParams& params) const { +CEventRatePopulationModelFactory::defaultCorrelatePrior(model_t::EFeature /*feature*/, + const SModelParams& params) const { TMultivariatePriorPtrVec priors; priors.reserve(params.s_MinimumModeFraction <= 0.5 ? 2u : 1u); TMultivariatePriorPtr multivariateNormal = this->multivariateNormalPrior(2, params); @@ -220,15 +207,10 @@ CEventRatePopulationModelFactory::defaultCorrelatePrior(model_t::EFeature /*feat const CSearchKey& CEventRatePopulationModelFactory::searchKey() const { if (!m_SearchKeyCache) { - m_SearchKeyCache.reset(CSearchKey(m_Identifier, - function_t::function(m_Features), - m_UseNull, - this->modelParams().s_ExcludeFrequent, - m_ValueFieldName, - m_AttributeFieldName, - m_PersonFieldName, - m_PartitionFieldName, - m_InfluenceFieldNames)); + m_SearchKeyCache.reset(CSearchKey( + m_Identifier, function_t::function(m_Features), m_UseNull, + this->modelParams().s_ExcludeFrequent, m_ValueFieldName, m_AttributeFieldName, + m_PersonFieldName, m_PartitionFieldName, m_InfluenceFieldNames)); } return *m_SearchKeyCache; } @@ -277,7 +259,8 @@ void CEventRatePopulationModelFactory::bucketResultsDelay(std::size_t bucketResu m_BucketResultsDelay = bucketResultsDelay; } -CEventRatePopulationModelFactory::TStrCRefVec CEventRatePopulationModelFactory::partitioningFields() const { +CEventRatePopulationModelFactory::TStrCRefVec +CEventRatePopulationModelFactory::partitioningFields() const { TStrCRefVec result; result.reserve(3); if (!m_PartitionFieldName.empty()) { diff --git a/lib/model/CFeatureData.cc b/lib/model/CFeatureData.cc index 31454defb9..a6e95d899a 100644 --- a/lib/model/CFeatureData.cc +++ b/lib/model/CFeatureData.cc @@ -25,7 +25,8 @@ using TSizeVec = std::vector; //! Get the sequence [0, N). template const TSizeVec& sequence() { - static const TSizeVec result(boost::counting_iterator(0), boost::counting_iterator(N)); + static const TSizeVec result(boost::counting_iterator(0), + boost::counting_iterator(N)); return result; } } @@ -101,9 +102,7 @@ SMetricFeatureData::SMetricFeatureData(core_t::TTime bucketTime, bool isNonNegative, const TSampleVec& samples) : s_BucketValue(boost::in_place(bucketTime, bucketValue, bucketVarianceScale, bucketCount)), - s_IsInteger(isInteger), - s_IsNonNegative(isNonNegative), - s_Samples(samples) { + s_IsInteger(isInteger), s_IsNonNegative(isNonNegative), s_Samples(samples) { s_InfluenceValues.swap(influenceValues); } @@ -113,8 +112,9 @@ SMetricFeatureData::SMetricFeatureData(bool isInteger, bool isNonNegative, const std::string SMetricFeatureData::print() const { std::ostringstream result; - result << "value = " << core::CContainerPrinter::print(s_BucketValue) << ", is integer " << s_IsInteger << ", is non-negative " - << s_IsNonNegative << ", samples = " << core::CContainerPrinter::print(s_Samples); + result << "value = " << core::CContainerPrinter::print(s_BucketValue) + << ", is integer " << s_IsInteger << ", is non-negative " << s_IsNonNegative + << ", samples = " << core::CContainerPrinter::print(s_Samples); return result.str(); } diff --git a/lib/model/CForecastDataSink.cc b/lib/model/CForecastDataSink.cc index 6fbc4f1ceb..a4e803ec22 100644 --- a/lib/model/CForecastDataSink.cc +++ b/lib/model/CForecastDataSink.cc @@ -58,11 +58,13 @@ using TScopedAllocator = core::CScopedRapidJsonPoolAllocator; void operator()(const TSampleVec& sample, core::CStatePersistInserter& inserter) const { - inserter.insertValue(SUM_SAMPLE_TAG, core::CPersistUtils::toString(sample, CSample::SToString())); + inserter.insertValue(SUM_SAMPLE_TAG, core::CPersistUtils::toString( + sample, CSample::SToString())); } bool operator()(TSampleVec& sample, core::CStateRestoreTraverser& traverser) const { if (traverser.name() != SUM_SAMPLE_TAG || - core::CPersistUtils::fromString(traverser.value(), CSample::SFromString(), sample) == false) { + core::CPersistUtils::fromString( + traverser.value(), CSample::SFromString(), sample) == false) { LOG_ERROR(<< "Invalid sample in: " << traverser.value()) return false; } @@ -65,7 +67,8 @@ struct SInfluencerSumSerializer { using TStrCRefDoublePr = std::pair; using TStrCRefDoublePrVec = std::vector; - void operator()(const TStoredStringPtrDoubleUMap& map, core::CStatePersistInserter& inserter) const { + void operator()(const TStoredStringPtrDoubleUMap& map, + core::CStatePersistInserter& inserter) const { TStrCRefDoublePrVec ordered; ordered.reserve(map.size()); for (TStoredStringPtrDoubleUMapCItr i = map.begin(); i != map.end(); ++i) { @@ -74,18 +77,21 @@ struct SInfluencerSumSerializer { std::sort(ordered.begin(), ordered.end(), maths::COrderings::SFirstLess()); for (std::size_t i = 0u; i < ordered.size(); ++i) { inserter.insertValue(SUM_MAP_KEY_TAG, ordered[i].first); - inserter.insertValue(SUM_MAP_VALUE_TAG, ordered[i].second, core::CIEEE754::E_SinglePrecision); + inserter.insertValue(SUM_MAP_VALUE_TAG, ordered[i].second, + core::CIEEE754::E_SinglePrecision); } } - bool operator()(TStoredStringPtrDoubleUMap& map, core::CStateRestoreTraverser& traverser) const { + bool operator()(TStoredStringPtrDoubleUMap& map, + core::CStateRestoreTraverser& traverser) const { std::string key; do { const std::string& name = traverser.name(); if (name == SUM_MAP_KEY_TAG) { key = traverser.value(); } else if (name == SUM_MAP_VALUE_TAG) { - if (core::CStringUtils::stringToType(traverser.value(), map[CStringStore::influencers().get(key)]) == false) { + if (core::CStringUtils::stringToType( + traverser.value(), map[CStringStore::influencers().get(key)]) == false) { LOG_ERROR(<< "Invalid sum in " << traverser.value()); return false; } @@ -97,11 +103,14 @@ struct SInfluencerSumSerializer { } // unnamed:: -CGathererTools::CArrivalTimeGatherer::CArrivalTimeGatherer() : m_LastTime(FIRST_TIME) { +CGathererTools::CArrivalTimeGatherer::CArrivalTimeGatherer() + : m_LastTime(FIRST_TIME) { } CGathererTools::TOptionalDouble CGathererTools::CArrivalTimeGatherer::featureData() const { - return maths::CBasicStatistics::count(m_Value) > 0.0 ? TOptionalDouble(maths::CBasicStatistics::mean(m_Value)) : TOptionalDouble(); + return maths::CBasicStatistics::count(m_Value) > 0.0 + ? TOptionalDouble(maths::CBasicStatistics::mean(m_Value)) + : TOptionalDouble(); } void CGathererTools::CArrivalTimeGatherer::startNewBucket() { @@ -144,7 +153,8 @@ std::string CGathererTools::CArrivalTimeGatherer::print() const { return o.str(); } -const core_t::TTime CGathererTools::CArrivalTimeGatherer::FIRST_TIME(std::numeric_limits::min()); +const core_t::TTime CGathererTools::CArrivalTimeGatherer::FIRST_TIME( + std::numeric_limits::min()); CGathererTools::CSumGatherer::CSumGatherer(const SModelParams& params, std::size_t /*dimension*/, @@ -152,11 +162,13 @@ CGathererTools::CSumGatherer::CSumGatherer(const SModelParams& params, core_t::TTime bucketLength, TStrVecCItr beginInfluencers, TStrVecCItr endInfluencers) - : m_Classifier(), - m_BucketSums(params.s_LatencyBuckets, bucketLength, startTime), + : m_Classifier(), m_BucketSums(params.s_LatencyBuckets, bucketLength, startTime), m_InfluencerBucketSums( std::distance(beginInfluencers, endInfluencers), - TStoredStringPtrDoubleUMapQueue(params.s_LatencyBuckets + 3, bucketLength, startTime, TStoredStringPtrDoubleUMap(1))) { + TStoredStringPtrDoubleUMapQueue(params.s_LatencyBuckets + 3, + bucketLength, + startTime, + TStoredStringPtrDoubleUMap(1))) { } std::size_t CGathererTools::CSumGatherer::dimension() const { @@ -164,7 +176,9 @@ std::size_t CGathererTools::CSumGatherer::dimension() const { } SMetricFeatureData -CGathererTools::CSumGatherer::featureData(core_t::TTime time, core_t::TTime /*bucketLength*/, const TSampleVec& emptySample) const { +CGathererTools::CSumGatherer::featureData(core_t::TTime time, + core_t::TTime /*bucketLength*/, + const TSampleVec& emptySample) const { using TStrCRef = boost::reference_wrapper; using TDouble1VecDoublePr = std::pair; using TStrCRefDouble1VecDoublePrPr = std::pair; @@ -177,10 +191,12 @@ CGathererTools::CSumGatherer::featureData(core_t::TTime time, core_t::TTime /*bu } TStrCRefDouble1VecDoublePrPrVecVec influenceValues(m_InfluencerBucketSums.size()); for (std::size_t i = 0u; i < m_InfluencerBucketSums.size(); ++i) { - const TStoredStringPtrDoubleUMap& influencerStats = m_InfluencerBucketSums[i].get(time); + const TStoredStringPtrDoubleUMap& influencerStats = + m_InfluencerBucketSums[i].get(time); influenceValues[i].reserve(influencerStats.size()); for (const auto& stat : influencerStats) { - influenceValues[i].emplace_back(TStrCRef(*stat.first), TDouble1VecDoublePr(TDouble1Vec{stat.second}, 1.0)); + influenceValues[i].emplace_back( + TStrCRef(*stat.first), TDouble1VecDoublePr(TDouble1Vec{stat.second}, 1.0)); } } @@ -190,7 +206,8 @@ CGathererTools::CSumGatherer::featureData(core_t::TTime time, core_t::TTime /*bu (*sum)[0].varianceScale(), (*sum)[0].count(), influenceValues, - m_Classifier.isInteger() && maths::CIntegerTools::isInteger(((*sum)[0].value())[0]), + m_Classifier.isInteger() && + maths::CIntegerTools::isInteger(((*sum)[0].value())[0]), m_Classifier.isNonNegative(), *sum}; } @@ -220,16 +237,18 @@ void CGathererTools::CSumGatherer::resetBucket(core_t::TTime bucketStart) { } void CGathererTools::CSumGatherer::acceptPersistInserter(core::CStatePersistInserter& inserter) const { - inserter.insertLevel(CLASSIFIER_TAG, boost::bind(&CDataClassifier::acceptPersistInserter, &m_Classifier, _1)); + inserter.insertLevel(CLASSIFIER_TAG, boost::bind(&CDataClassifier::acceptPersistInserter, + &m_Classifier, _1)); if (m_BucketSums.size() > 0) { inserter.insertLevel(BUCKET_SUM_QUEUE_TAG, - boost::bind(TSampleVecQueue::CSerializer(), boost::cref(m_BucketSums), _1)); + boost::bind(TSampleVecQueue::CSerializer(), + boost::cref(m_BucketSums), _1)); } for (std::size_t i = 0u; i < m_InfluencerBucketSums.size(); ++i) { - inserter.insertLevel(INFLUENCER_BUCKET_SUM_QUEUE_TAG, - boost::bind(TStoredStringPtrDoubleUMapQueue::CSerializer(), - boost::cref(m_InfluencerBucketSums[i]), - _1)); + inserter.insertLevel( + INFLUENCER_BUCKET_SUM_QUEUE_TAG, + boost::bind(TStoredStringPtrDoubleUMapQueue::CSerializer(), + boost::cref(m_InfluencerBucketSums[i]), _1)); } } @@ -238,22 +257,24 @@ bool CGathererTools::CSumGatherer::acceptRestoreTraverser(core::CStateRestoreTra do { const std::string& name = traverser.name(); if (name == CLASSIFIER_TAG) { - if (traverser.traverseSubLevel(boost::bind(&CDataClassifier::acceptRestoreTraverser, &m_Classifier, _1)) == false) { + if (traverser.traverseSubLevel(boost::bind(&CDataClassifier::acceptRestoreTraverser, + &m_Classifier, _1)) == false) { LOG_ERROR(<< "Invalid classifier in " << traverser.value()); continue; } } else if (name == BUCKET_SUM_QUEUE_TAG) { if (traverser.traverseSubLevel( - boost::bind(TSampleVecQueue::CSerializer(), boost::ref(m_BucketSums), _1)) == false) { + boost::bind(TSampleVecQueue::CSerializer(), + boost::ref(m_BucketSums), _1)) == false) { LOG_ERROR(<< "Invalid bucket queue in " << traverser.value()); return false; } } else if (name == INFLUENCER_BUCKET_SUM_QUEUE_TAG) { if (i < m_InfluencerBucketSums.size() && - traverser.traverseSubLevel( - boost::bind(TStoredStringPtrDoubleUMapQueue::CSerializer(TStoredStringPtrDoubleUMap(1)), - boost::ref(m_InfluencerBucketSums[i++]), - _1)) == false) { + traverser.traverseSubLevel(boost::bind( + TStoredStringPtrDoubleUMapQueue::CSerializer( + TStoredStringPtrDoubleUMap(1)), + boost::ref(m_InfluencerBucketSums[i++]), _1)) == false) { LOG_ERROR(<< "Invalid bucket queue in " << traverser.value()); return false; } @@ -277,12 +298,14 @@ void CGathererTools::CSumGatherer::debugMemoryUsage(core::CMemoryUsage::TMemoryU } std::size_t CGathererTools::CSumGatherer::memoryUsage() const { - return core::CMemory::dynamicSize(m_BucketSums) + core::CMemory::dynamicSize(m_InfluencerBucketSums); + return core::CMemory::dynamicSize(m_BucketSums) + + core::CMemory::dynamicSize(m_InfluencerBucketSums); } std::string CGathererTools::CSumGatherer::print() const { std::ostringstream result; - result << m_Classifier.isInteger() << ' ' << m_BucketSums.print() << ' ' << core::CContainerPrinter::print(m_InfluencerBucketSums); + result << m_Classifier.isInteger() << ' ' << m_BucketSums.print() << ' ' + << core::CContainerPrinter::print(m_InfluencerBucketSums); return result.str(); } diff --git a/lib/model/CHierarchicalResults.cc b/lib/model/CHierarchicalResults.cc index 6eee761814..099110a33a 100644 --- a/lib/model/CHierarchicalResults.cc +++ b/lib/model/CHierarchicalResults.cc @@ -99,46 +99,40 @@ bool equal(const core::CStoredStringPtr& lhs, const core::CStoredStringPtr& rhs) } //! Check if both underlying strings are equal. -bool equal(const TStoredStringPtrStoredStringPtrPr& lhs, const TStoredStringPtrStoredStringPtrPr& rhs) { - return unset(lhs.first) == unset(rhs.first) && *lhs.first == *rhs.first && unset(lhs.second) == unset(rhs.second) && - *lhs.second == *rhs.second; +bool equal(const TStoredStringPtrStoredStringPtrPr& lhs, + const TStoredStringPtrStoredStringPtrPr& rhs) { + return unset(lhs.first) == unset(rhs.first) && *lhs.first == *rhs.first && + unset(lhs.second) == unset(rhs.second) && *lhs.second == *rhs.second; } //! Orders nodes by the value of their person field. struct SPersonValueLess { bool operator()(const TNodeCPtr& lhs, const TNodeCPtr& rhs) const { - return maths::COrderings::lexicographical_compare(*lhs->s_Spec.s_PartitionFieldName, - *lhs->s_Spec.s_PartitionFieldValue, - *lhs->s_Spec.s_PersonFieldName, - *lhs->s_Spec.s_PersonFieldValue, - lhs->s_Spec.s_IsPopulation, - *rhs->s_Spec.s_PartitionFieldName, - *rhs->s_Spec.s_PartitionFieldValue, - *rhs->s_Spec.s_PersonFieldName, - *rhs->s_Spec.s_PersonFieldValue, - rhs->s_Spec.s_IsPopulation); + return maths::COrderings::lexicographical_compare( + *lhs->s_Spec.s_PartitionFieldName, *lhs->s_Spec.s_PartitionFieldValue, + *lhs->s_Spec.s_PersonFieldName, *lhs->s_Spec.s_PersonFieldValue, + lhs->s_Spec.s_IsPopulation, *rhs->s_Spec.s_PartitionFieldName, + *rhs->s_Spec.s_PartitionFieldValue, *rhs->s_Spec.s_PersonFieldName, + *rhs->s_Spec.s_PersonFieldValue, rhs->s_Spec.s_IsPopulation); } }; //! Orders nodes by the name of their person field. struct SPersonNameLess { bool operator()(const TNodeCPtr& lhs, const TNodeCPtr& rhs) const { - return maths::COrderings::lexicographical_compare(*lhs->s_Spec.s_PartitionFieldName, - *lhs->s_Spec.s_PartitionFieldValue, - *lhs->s_Spec.s_PersonFieldName, - *rhs->s_Spec.s_PartitionFieldName, - *rhs->s_Spec.s_PartitionFieldValue, - *rhs->s_Spec.s_PersonFieldName); + return maths::COrderings::lexicographical_compare( + *lhs->s_Spec.s_PartitionFieldName, *lhs->s_Spec.s_PartitionFieldValue, + *lhs->s_Spec.s_PersonFieldName, *rhs->s_Spec.s_PartitionFieldName, + *rhs->s_Spec.s_PartitionFieldValue, *rhs->s_Spec.s_PersonFieldName); } }; //! Orders nodes by the value of their partition field. struct SPartitionValueLess { bool operator()(const TNodeCPtr& lhs, const TNodeCPtr& rhs) const { - return maths::COrderings::lexicographical_compare(*lhs->s_Spec.s_PartitionFieldName, - *lhs->s_Spec.s_PartitionFieldValue, - *rhs->s_Spec.s_PartitionFieldName, - *rhs->s_Spec.s_PartitionFieldValue); + return maths::COrderings::lexicographical_compare( + *lhs->s_Spec.s_PartitionFieldName, *lhs->s_Spec.s_PartitionFieldValue, + *rhs->s_Spec.s_PartitionFieldName, *rhs->s_Spec.s_PartitionFieldValue); } }; @@ -161,7 +155,11 @@ SNode* address(SNode& value) { //! Aggregate the nodes in a layer. template -void aggregateLayer(ITR beginLayer, ITR endLayer, CHierarchicalResults& results, FACTORY newNode, std::vector& newLayer) { +void aggregateLayer(ITR beginLayer, + ITR endLayer, + CHierarchicalResults& results, + FACTORY newNode, + std::vector& newLayer) { using TNodePtrVec = std::vector; using TNodeCPtrNodePtrVecMap = std::map; @@ -212,12 +210,14 @@ class CCommonInfluencePropagator : public CHierarchicalResultsVisitor { } else { for (const auto& child : node.s_Children) { for (const auto& influence : child->s_AnnotatedProbability.s_Influences) { - if (equal({node.s_Spec.s_PartitionFieldName, node.s_Spec.s_PartitionFieldValue}, influence.first) || - equal({node.s_Spec.s_PersonFieldName, node.s_Spec.s_PersonFieldValue}, influence.first)) { - auto i = std::lower_bound(node.s_AnnotatedProbability.s_Influences.begin(), - node.s_AnnotatedProbability.s_Influences.end(), - influence.first, - maths::COrderings::SFirstLess()); + if (equal({node.s_Spec.s_PartitionFieldName, node.s_Spec.s_PartitionFieldValue}, + influence.first) || + equal({node.s_Spec.s_PersonFieldName, node.s_Spec.s_PersonFieldValue}, + influence.first)) { + auto i = std::lower_bound( + node.s_AnnotatedProbability.s_Influences.begin(), + node.s_AnnotatedProbability.s_Influences.end(), + influence.first, maths::COrderings::SFirstLess()); if (i == node.s_AnnotatedProbability.s_Influences.end()) { node.s_AnnotatedProbability.s_Influences.push_back(influence); } else if (!equal(i->first, influence.first)) { @@ -233,23 +233,18 @@ class CCommonInfluencePropagator : public CHierarchicalResultsVisitor { } // unnamed:: SResultSpec::SResultSpec() - : s_Detector(0), - s_IsSimpleCount(false), - s_IsPopulation(false), - s_UseNull(false), - s_PartitionFieldName(UNSET_STRING), - s_PartitionFieldValue(UNSET_STRING), - s_PersonFieldName(UNSET_STRING), - s_PersonFieldValue(UNSET_STRING), - s_ValueFieldName(UNSET_STRING), - s_FunctionName(UNSET_STRING), - s_ByFieldName(UNSET_STRING), - s_Function(function_t::E_IndividualCount) { + : s_Detector(0), s_IsSimpleCount(false), s_IsPopulation(false), s_UseNull(false), + s_PartitionFieldName(UNSET_STRING), s_PartitionFieldValue(UNSET_STRING), + s_PersonFieldName(UNSET_STRING), s_PersonFieldValue(UNSET_STRING), + s_ValueFieldName(UNSET_STRING), s_FunctionName(UNSET_STRING), + s_ByFieldName(UNSET_STRING), s_Function(function_t::E_IndividualCount) { } std::string SResultSpec::print() const { - return '\'' + core::CStringUtils::typeToStringPretty(s_IsSimpleCount) + '/' + core::CStringUtils::typeToStringPretty(s_IsPopulation) + - '/' + *s_FunctionName + '/' + *s_PartitionFieldName + '/' + *s_PartitionFieldValue + '/' + *s_PersonFieldName + '/' + + return '\'' + core::CStringUtils::typeToStringPretty(s_IsSimpleCount) + + '/' + core::CStringUtils::typeToStringPretty(s_IsPopulation) + '/' + + *s_FunctionName + '/' + *s_PartitionFieldName + '/' + + *s_PartitionFieldValue + '/' + *s_PersonFieldName + '/' + *s_PersonFieldValue + '/' + *s_ValueFieldName + '\''; } @@ -289,44 +284,44 @@ bool SResultSpec::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser RESTORE_BUILT_IN(SIMPLE_COUNT_TAG, s_IsSimpleCount) RESTORE_BUILT_IN(POPULATION_TAG, s_IsPopulation) RESTORE_BUILT_IN(USE_NULL_TAG, s_UseNull) - RESTORE_SETUP_TEARDOWN( - FUNCTION_TAG, int f = 0, core::CPersistUtils::restore(FUNCTION_TAG, f, traverser), s_Function = function_t::EFunction(f)) - RESTORE_NO_ERROR(PARTITION_FIELD_NAME_TAG, s_PartitionFieldName = CStringStore::names().get(traverser.value())) - RESTORE_NO_ERROR(PARTITION_FIELD_VALUE_TAG, s_PartitionFieldValue = CStringStore::names().get(traverser.value())) - RESTORE_NO_ERROR(PERSON_FIELD_NAME_TAG, s_PersonFieldName = CStringStore::names().get(traverser.value())) - RESTORE_NO_ERROR(PERSON_FIELD_VALUE_TAG, s_PersonFieldValue = CStringStore::names().get(traverser.value())) - RESTORE_NO_ERROR(VALUE_FIELD_NAME_TAG, s_ValueFieldName = CStringStore::names().get(traverser.value())) - RESTORE_NO_ERROR(FUNCTION_NAME_TAG, s_FunctionName = CStringStore::names().get(traverser.value())) - RESTORE_NO_ERROR(BY_FIELD_NAME_TAG, s_ByFieldName = CStringStore::names().get(traverser.value())) + RESTORE_SETUP_TEARDOWN(FUNCTION_TAG, int f = 0, + core::CPersistUtils::restore(FUNCTION_TAG, f, traverser), + s_Function = function_t::EFunction(f)) + RESTORE_NO_ERROR( + PARTITION_FIELD_NAME_TAG, + s_PartitionFieldName = CStringStore::names().get(traverser.value())) + RESTORE_NO_ERROR( + PARTITION_FIELD_VALUE_TAG, + s_PartitionFieldValue = CStringStore::names().get(traverser.value())) + RESTORE_NO_ERROR(PERSON_FIELD_NAME_TAG, + s_PersonFieldName = CStringStore::names().get(traverser.value())) + RESTORE_NO_ERROR( + PERSON_FIELD_VALUE_TAG, + s_PersonFieldValue = CStringStore::names().get(traverser.value())) + RESTORE_NO_ERROR(VALUE_FIELD_NAME_TAG, + s_ValueFieldName = CStringStore::names().get(traverser.value())) + RESTORE_NO_ERROR(FUNCTION_NAME_TAG, + s_FunctionName = CStringStore::names().get(traverser.value())) + RESTORE_NO_ERROR(BY_FIELD_NAME_TAG, + s_ByFieldName = CStringStore::names().get(traverser.value())) } while (traverser.next()); return true; } SNode::SNode() - : s_Parent(nullptr), - s_AnnotatedProbability(1.0), - s_Detector(-3), - s_AggregationStyle(-1), - s_SmallestChildProbability(1.0), - s_SmallestDescendantProbability(1.0), - s_RawAnomalyScore(0.0), - s_NormalizedAnomalyScore(0.0), - s_Model(nullptr), - s_BucketStartTime(0), + : s_Parent(nullptr), s_AnnotatedProbability(1.0), s_Detector(-3), + s_AggregationStyle(-1), s_SmallestChildProbability(1.0), + s_SmallestDescendantProbability(1.0), s_RawAnomalyScore(0.0), + s_NormalizedAnomalyScore(0.0), s_Model(nullptr), s_BucketStartTime(0), s_BucketLength(0) { } SNode::SNode(const SResultSpec& simpleSearch, SAnnotatedProbability& annotatedProbability) - : s_Parent(nullptr), - s_Spec(simpleSearch), - s_Detector(simpleSearch.s_Detector), - s_AggregationStyle(-1), + : s_Parent(nullptr), s_Spec(simpleSearch), + s_Detector(simpleSearch.s_Detector), s_AggregationStyle(-1), s_SmallestChildProbability(annotatedProbability.s_Probability), - s_SmallestDescendantProbability(1.0), - s_RawAnomalyScore(0.0), - s_NormalizedAnomalyScore(0.0), - s_Model(nullptr), - s_BucketStartTime(0), + s_SmallestDescendantProbability(1.0), s_RawAnomalyScore(0.0), + s_NormalizedAnomalyScore(0.0), s_Model(nullptr), s_BucketStartTime(0), s_BucketLength(0) { s_AnnotatedProbability.swap(annotatedProbability); } @@ -346,30 +341,37 @@ void SNode::propagateFields() { s_Spec.s_PersonFieldValue = s_Children[0]->s_Spec.s_PersonFieldValue; s_BucketStartTime = s_Children[0]->s_BucketStartTime; for (std::size_t i = 1u; i < s_Children.size(); ++i) { - if (!unset(s_Spec.s_PartitionFieldName) && !equal(s_Spec.s_PartitionFieldName, s_Children[i]->s_Spec.s_PartitionFieldName)) { + if (!unset(s_Spec.s_PartitionFieldName) && + !equal(s_Spec.s_PartitionFieldName, s_Children[i]->s_Spec.s_PartitionFieldName)) { s_Spec.s_PartitionFieldName = UNSET_STRING; s_Spec.s_PartitionFieldValue = UNSET_STRING; s_Spec.s_PersonFieldName = UNSET_STRING; s_Spec.s_PersonFieldValue = UNSET_STRING; } - if (!unset(s_Spec.s_PartitionFieldValue) && !equal(s_Spec.s_PartitionFieldValue, s_Children[i]->s_Spec.s_PartitionFieldValue)) { + if (!unset(s_Spec.s_PartitionFieldValue) && + !equal(s_Spec.s_PartitionFieldValue, s_Children[i]->s_Spec.s_PartitionFieldValue)) { s_Spec.s_PartitionFieldValue = UNSET_STRING; s_Spec.s_PersonFieldName = UNSET_STRING; s_Spec.s_PersonFieldValue = UNSET_STRING; } - if (!unset(s_Spec.s_PersonFieldName) && !equal(s_Spec.s_PersonFieldName, s_Children[i]->s_Spec.s_PersonFieldName)) { + if (!unset(s_Spec.s_PersonFieldName) && + !equal(s_Spec.s_PersonFieldName, s_Children[i]->s_Spec.s_PersonFieldName)) { s_Spec.s_PersonFieldName = UNSET_STRING; } - if (!unset(s_Spec.s_PersonFieldValue) && !equal(s_Spec.s_PersonFieldValue, s_Children[i]->s_Spec.s_PersonFieldValue)) { + if (!unset(s_Spec.s_PersonFieldValue) && + !equal(s_Spec.s_PersonFieldValue, s_Children[i]->s_Spec.s_PersonFieldValue)) { s_Spec.s_PersonFieldValue = UNSET_STRING; } } } std::string SNode::print() const { - return s_Spec.print() + ": " + core::CStringUtils::typeToStringPretty(this->probability()) + ", " + + return s_Spec.print() + ": " + + core::CStringUtils::typeToStringPretty(this->probability()) + ", " + core::CStringUtils::typeToStringPretty(s_RawAnomalyScore) + - (s_AnnotatedProbability.s_Influences.empty() ? "" : ", " + core::CContainerPrinter::print(s_AnnotatedProbability.s_Influences)); + (s_AnnotatedProbability.s_Influences.empty() + ? "" + : ", " + core::CContainerPrinter::print(s_AnnotatedProbability.s_Influences)); } void SNode::swap(SNode& other) { @@ -388,7 +390,8 @@ void SNode::swap(SNode& other) { std::swap(s_BucketLength, other.s_BucketLength); } -void SNode::acceptPersistInserter1(core::CStatePersistInserter& inserter, TNodePtrSizeUMap& nodePointers) const { +void SNode::acceptPersistInserter1(core::CStatePersistInserter& inserter, + TNodePtrSizeUMap& nodePointers) const { std::size_t index = nodePointers.emplace(this, nodePointers.size()).first->second; inserter.insertValue(SELF_TAG, index); core::CPersistUtils::persist(SPEC_TAG, s_Spec, inserter); @@ -403,7 +406,8 @@ void SNode::acceptPersistInserter1(core::CStatePersistInserter& inserter, TNodeP inserter.insertValue(BUCKET_LENGTH_TAG, s_BucketLength); } -void SNode::acceptPersistInserter2(core::CStatePersistInserter& inserter, const TNodePtrSizeUMap& nodePointers) const { +void SNode::acceptPersistInserter2(core::CStatePersistInserter& inserter, + const TNodePtrSizeUMap& nodePointers) const { if (s_Parent != nullptr) { auto found = nodePointers.find(s_Parent); if (found == nodePointers.end()) { @@ -423,15 +427,17 @@ void SNode::acceptPersistInserter2(core::CStatePersistInserter& inserter, const } } -bool SNode::acceptRestoreTraverser1(core::CStateRestoreTraverser& traverser, TSizeNodePtrUMap& nodePointers) { +bool SNode::acceptRestoreTraverser1(core::CStateRestoreTraverser& traverser, + TSizeNodePtrUMap& nodePointers) { do { const std::string& name = traverser.name(); - RESTORE_SETUP_TEARDOWN(SELF_TAG, - std::size_t index = 0, + RESTORE_SETUP_TEARDOWN(SELF_TAG, std::size_t index = 0, core::CStringUtils::stringToType(traverser.value(), index), nodePointers.insert(std::make_pair(index, this))) RESTORE(SPEC_TAG, core::CPersistUtils::restore(SPEC_TAG, s_Spec, traverser)) - RESTORE(ANNOTATED_PROBABILITY_TAG, core::CPersistUtils::restore(ANNOTATED_PROBABILITY_TAG, s_AnnotatedProbability, traverser)) + RESTORE(ANNOTATED_PROBABILITY_TAG, + core::CPersistUtils::restore(ANNOTATED_PROBABILITY_TAG, + s_AnnotatedProbability, traverser)) RESTORE_BUILT_IN(DETECTOR_TAG, s_Detector); RESTORE_BUILT_IN(AGGREGATION_STYLE_TAG, s_AggregationStyle); RESTORE_BUILT_IN(SMALLEST_CHILD_TAG, s_SmallestChildProbability) @@ -444,13 +450,15 @@ bool SNode::acceptRestoreTraverser1(core::CStateRestoreTraverser& traverser, TSi return true; } -bool SNode::acceptRestoreTraverser2(core::CStateRestoreTraverser& traverser, const TSizeNodePtrUMap& nodePointers) { +bool SNode::acceptRestoreTraverser2(core::CStateRestoreTraverser& traverser, + const TSizeNodePtrUMap& nodePointers) { do { const std::string& name = traverser.name(); std::size_t index = 0; if (name == PARENT_TAG) { if (!core::CPersistUtils::restore(PARENT_TAG, index, traverser)) { - LOG_ERROR(<< "Restore error for " << traverser.name() << " / " << traverser.value()); + LOG_ERROR(<< "Restore error for " << traverser.name() << " / " + << traverser.value()); return false; } auto found = nodePointers.find(index); @@ -461,7 +469,8 @@ bool SNode::acceptRestoreTraverser2(core::CStateRestoreTraverser& traverser, con s_Parent = found->second; } else if (name == CHILD_TAG) { if (!core::CPersistUtils::restore(CHILD_TAG, index, traverser)) { - LOG_ERROR(<< "Restore error for " << traverser.name() << " / " << traverser.value()); + LOG_ERROR(<< "Restore error for " << traverser.name() << " / " + << traverser.value()); return false; } auto found = nodePointers.find(index); @@ -483,7 +492,8 @@ void swap(SNode& node1, SNode& node2) { using namespace hierarchical_results_detail; -CHierarchicalResults::CHierarchicalResults() : m_ResultType(model_t::CResultType::E_Final) { +CHierarchicalResults::CHierarchicalResults() + : m_ResultType(model_t::CResultType::E_Final) { } void CHierarchicalResults::addSimpleCountResult(SAnnotatedProbability& annotatedProbability, @@ -532,7 +542,9 @@ void CHierarchicalResults::addModelResult(int detector, spec.s_PersonFieldName = CStringStore::names().get(personFieldName); spec.s_PersonFieldValue = CStringStore::names().get(personFieldValue); spec.s_ValueFieldName = CStringStore::names().get(valueFieldName); - spec.s_ByFieldName = (model ? CStringStore::names().get(model->dataGatherer().searchKey().byFieldName()) : UNSET_STRING); + spec.s_ByFieldName = + (model ? CStringStore::names().get(model->dataGatherer().searchKey().byFieldName()) + : UNSET_STRING); TNode& leaf = this->newLeaf(spec, annotatedProbability); leaf.s_Model = model; leaf.s_BucketStartTime = bucketStartTime; @@ -546,7 +558,8 @@ void CHierarchicalResults::addInfluencer(const std::string& name) { void CHierarchicalResults::buildHierarchy() { using TNodePtrVec = std::vector; - m_Nodes.erase(std::remove_if(m_Nodes.begin(), m_Nodes.end(), isAggregate), m_Nodes.end()); + m_Nodes.erase(std::remove_if(m_Nodes.begin(), m_Nodes.end(), isAggregate), + m_Nodes.end()); // To make life easier for downstream code, bring a simple count node // to the front of the deque (if there is one). @@ -569,14 +582,16 @@ void CHierarchicalResults::buildHierarchy() { LOG_TRACE(<< "Distinct values of the person field"); { - aggregateLayer(m_Nodes.begin(), m_Nodes.end(), *this, &CHierarchicalResults::newNode, layer); + aggregateLayer(m_Nodes.begin(), m_Nodes.end(), *this, + &CHierarchicalResults::newNode, layer); LOG_TRACE(<< "layer = " << core::CContainerPrinter::print(layer)); } LOG_TRACE(<< "Distinct person field names"); { newLayer.reserve(layer.size()); - aggregateLayer(layer.begin(), layer.end(), *this, &CHierarchicalResults::newNode, newLayer); + aggregateLayer(layer.begin(), layer.end(), *this, + &CHierarchicalResults::newNode, newLayer); newLayer.swap(layer); LOG_TRACE(<< "layer = " << core::CContainerPrinter::print(layer)); } @@ -584,7 +599,8 @@ void CHierarchicalResults::buildHierarchy() { LOG_TRACE(<< "Distinct partition field values"); { newLayer.reserve(layer.size()); - aggregateLayer(layer.begin(), layer.end(), *this, &CHierarchicalResults::newNode, newLayer); + aggregateLayer(layer.begin(), layer.end(), *this, + &CHierarchicalResults::newNode, newLayer); newLayer.swap(layer); LOG_TRACE(<< "layer = " << core::CContainerPrinter::print(layer)); } @@ -592,7 +608,8 @@ void CHierarchicalResults::buildHierarchy() { LOG_TRACE(<< "Distinct partition field names"); { newLayer.reserve(layer.size()); - aggregateLayer(layer.begin(), layer.end(), *this, &CHierarchicalResults::newNode, newLayer); + aggregateLayer(layer.begin(), layer.end(), *this, + &CHierarchicalResults::newNode, newLayer); newLayer.swap(layer); LOG_TRACE(<< "layer = " << core::CContainerPrinter::print(layer)); } @@ -622,7 +639,8 @@ void CHierarchicalResults::createPivots() { const auto& parentInfluences = node.s_Parent->s_AnnotatedProbability.s_Influences; for (const auto& influence : node.s_AnnotatedProbability.s_Influences) { if (node.s_Parent && - std::binary_search(parentInfluences.begin(), parentInfluences.end(), influence, maths::COrderings::SFirstLess())) { + std::binary_search(parentInfluences.begin(), parentInfluences.end(), + influence, maths::COrderings::SFirstLess())) { continue; } this->newPivot(influence.first).s_Children.push_back(&node); @@ -650,8 +668,9 @@ const CHierarchicalResults::TNode* CHierarchicalResults::root() const { return &result; } -const CHierarchicalResults::TNode* CHierarchicalResults::influencer(const TStoredStringPtr& influencerName, - const TStoredStringPtr& influencerValue) const { +const CHierarchicalResults::TNode* +CHierarchicalResults::influencer(const TStoredStringPtr& influencerName, + const TStoredStringPtr& influencerValue) const { auto i = m_PivotNodes.find({influencerName, influencerValue}); return i != m_PivotNodes.end() ? &i->second : nullptr; } @@ -717,13 +736,17 @@ model_t::CResultType CHierarchicalResults::resultType() const { void CHierarchicalResults::acceptPersistInserter(core::CStatePersistInserter& inserter) const { using TStoredStringPtrNodeMapCItr = TStoredStringPtrNodeMap::const_iterator; using TStoredStringPtrNodeMapCItrVec = std::vector; - using TStoredStringPtrStoredStringPtrPrNodeMapCItr = TStoredStringPtrStoredStringPtrPrNodeMap::const_iterator; - using TStoredStringPtrStoredStringPtrPrNodeMapCItrVec = std::vector; + using TStoredStringPtrStoredStringPtrPrNodeMapCItr = + TStoredStringPtrStoredStringPtrPrNodeMap::const_iterator; + using TStoredStringPtrStoredStringPtrPrNodeMapCItrVec = + std::vector; TNodePtrSizeUMap nodePointers; for (const auto& node : m_Nodes) { - inserter.insertLevel(NODES_1_TAG, boost::bind(&SNode::acceptPersistInserter1, boost::cref(node), _1, boost::ref(nodePointers))); + inserter.insertLevel(NODES_1_TAG, boost::bind(&SNode::acceptPersistInserter1, + boost::cref(node), _1, + boost::ref(nodePointers))); } // Sort the keys by *value* order to ensure consistent persist state. @@ -732,12 +755,15 @@ void CHierarchicalResults::acceptPersistInserter(core::CStatePersistInserter& in for (auto i = m_PivotNodes.begin(); i != m_PivotNodes.end(); ++i) { pivotIterators.push_back(i); } - std::sort(pivotIterators.begin(), pivotIterators.end(), core::CFunctional::SDereference()); + std::sort(pivotIterators.begin(), pivotIterators.end(), + core::CFunctional::SDereference()); for (auto i : pivotIterators) { core::CPersistUtils::persist(PIVOT_NAME_TAG, *i->first.first, inserter); core::CPersistUtils::persist(PIVOT_VALUE_TAG, *i->first.second, inserter); inserter.insertLevel(PIVOT_NODES_1_TAG, - boost::bind(&SNode::acceptPersistInserter1, boost::cref(i->second), _1, boost::ref(nodePointers))); + boost::bind(&SNode::acceptPersistInserter1, + boost::cref(i->second), _1, + boost::ref(nodePointers))); } // Sort the keys by *value* order to ensure consistent persist state. @@ -746,28 +772,37 @@ void CHierarchicalResults::acceptPersistInserter(core::CStatePersistInserter& in for (auto i = m_PivotRootNodes.begin(); i != m_PivotRootNodes.end(); ++i) { pivotRootIterators.push_back(i); } - std::sort(pivotRootIterators.begin(), pivotRootIterators.end(), core::CFunctional::SDereference()); + std::sort(pivotRootIterators.begin(), pivotRootIterators.end(), + core::CFunctional::SDereference()); for (auto i : pivotRootIterators) { core::CPersistUtils::persist(PIVOT_NAME_TAG, *i->first, inserter); inserter.insertLevel(PIVOT_ROOT_NODES_1_TAG, - boost::bind(&SNode::acceptPersistInserter1, boost::cref(i->second), _1, boost::ref(nodePointers))); + boost::bind(&SNode::acceptPersistInserter1, + boost::cref(i->second), _1, + boost::ref(nodePointers))); } for (const auto& node : m_Nodes) { - inserter.insertLevel(NODES_2_TAG, boost::bind(&SNode::acceptPersistInserter2, boost::cref(node), _1, boost::cref(nodePointers))); + inserter.insertLevel(NODES_2_TAG, boost::bind(&SNode::acceptPersistInserter2, + boost::cref(node), _1, + boost::cref(nodePointers))); } for (auto i : pivotIterators) { core::CPersistUtils::persist(PIVOT_NAME_TAG, *i->first.first, inserter); core::CPersistUtils::persist(PIVOT_VALUE_TAG, *i->first.second, inserter); inserter.insertLevel(PIVOT_NODES_2_TAG, - boost::bind(&SNode::acceptPersistInserter2, boost::cref(i->second), _1, boost::cref(nodePointers))); + boost::bind(&SNode::acceptPersistInserter2, + boost::cref(i->second), _1, + boost::cref(nodePointers))); } for (auto i : pivotRootIterators) { core::CPersistUtils::persist(PIVOT_NAME_TAG, *i->first, inserter); inserter.insertLevel(PIVOT_ROOT_NODES_2_TAG, - boost::bind(&SNode::acceptPersistInserter2, boost::cref(i->second), _1, boost::cref(nodePointers))); + boost::bind(&SNode::acceptPersistInserter2, + boost::cref(i->second), _1, + boost::cref(nodePointers))); } } @@ -779,33 +814,40 @@ bool CHierarchicalResults::acceptRestoreTraverser(core::CStateRestoreTraverser& do { const std::string& name = traverser.name(); - RESTORE_SETUP_TEARDOWN(NODES_1_TAG, - m_Nodes.push_back(SNode()), - traverser.traverseSubLevel( - boost::bind(&SNode::acceptRestoreTraverser1, boost::ref(m_Nodes.back()), _1, boost::ref(nodePointers))), - /**/) + RESTORE_SETUP_TEARDOWN( + NODES_1_TAG, m_Nodes.push_back(SNode()), + traverser.traverseSubLevel(boost::bind(&SNode::acceptRestoreTraverser1, + boost::ref(m_Nodes.back()), + _1, boost::ref(nodePointers))), + /**/) if (name == NODES_2_TAG) { if (nodesFullyRestored > m_Nodes.size()) { LOG_ERROR(<< "Invalid restore index for node: " << nodesFullyRestored); } if (traverser.traverseSubLevel(boost::bind( - &SNode::acceptRestoreTraverser2, boost::ref(m_Nodes[nodesFullyRestored]), _1, boost::cref(nodePointers))) == false) { + &SNode::acceptRestoreTraverser2, boost::ref(m_Nodes[nodesFullyRestored]), + _1, boost::cref(nodePointers))) == false) { LOG_ERROR(<< "Failed to restore node"); return false; } ++nodesFullyRestored; continue; } - RESTORE_NO_ERROR(PIVOT_NAME_TAG, influencerName = CStringStore::influencers().get(traverser.value())) - RESTORE_NO_ERROR(PIVOT_VALUE_TAG, influencerValue = CStringStore::influencers().get(traverser.value())) + RESTORE_NO_ERROR( + PIVOT_NAME_TAG, + influencerName = CStringStore::influencers().get(traverser.value())) + RESTORE_NO_ERROR( + PIVOT_VALUE_TAG, + influencerValue = CStringStore::influencers().get(traverser.value())) if (name == PIVOT_NODES_1_TAG) { if (!influencerName || !influencerValue) { LOG_ERROR(<< "Invalid influencers for node"); return false; } SNode& node = m_PivotNodes[TStoredStringPtrStoredStringPtrPr(influencerName, influencerValue)]; - if (traverser.traverseSubLevel(boost::bind(&SNode::acceptRestoreTraverser1, boost::ref(node), _1, boost::ref(nodePointers))) == - false) { + if (traverser.traverseSubLevel( + boost::bind(&SNode::acceptRestoreTraverser1, boost::ref(node), + _1, boost::ref(nodePointers))) == false) { LOG_ERROR(<< "Failed to restore pivot node"); return false; } @@ -818,8 +860,9 @@ bool CHierarchicalResults::acceptRestoreTraverser(core::CStateRestoreTraverser& return false; } SNode& node = m_PivotNodes[TStoredStringPtrStoredStringPtrPr(influencerName, influencerValue)]; - if (traverser.traverseSubLevel(boost::bind(&SNode::acceptRestoreTraverser2, boost::ref(node), _1, boost::cref(nodePointers))) == - false) { + if (traverser.traverseSubLevel( + boost::bind(&SNode::acceptRestoreTraverser2, boost::ref(node), + _1, boost::cref(nodePointers))) == false) { LOG_ERROR(<< "Failed to restore pivot node"); return false; } @@ -833,8 +876,9 @@ bool CHierarchicalResults::acceptRestoreTraverser(core::CStateRestoreTraverser& return false; } SNode& node = m_PivotRootNodes[influencerName]; - if (traverser.traverseSubLevel(boost::bind(&SNode::acceptRestoreTraverser1, boost::ref(node), _1, boost::ref(nodePointers))) == - false) { + if (traverser.traverseSubLevel( + boost::bind(&SNode::acceptRestoreTraverser1, boost::ref(node), + _1, boost::ref(nodePointers))) == false) { LOG_ERROR(<< "Failed to restore pivot node"); return false; } @@ -847,8 +891,9 @@ bool CHierarchicalResults::acceptRestoreTraverser(core::CStateRestoreTraverser& return false; } SNode& node = m_PivotRootNodes[influencerName]; - if (traverser.traverseSubLevel(boost::bind(&SNode::acceptRestoreTraverser2, boost::ref(node), _1, boost::cref(nodePointers))) == - false) { + if (traverser.traverseSubLevel( + boost::bind(&SNode::acceptRestoreTraverser2, boost::ref(node), + _1, boost::cref(nodePointers))) == false) { LOG_ERROR(<< "Failed to restore pivot node"); return false; } @@ -872,12 +917,15 @@ CHierarchicalResults::TNode& CHierarchicalResults::newNode() { return m_Nodes.back(); } -CHierarchicalResults::TNode& CHierarchicalResults::newLeaf(const TResultSpec& simpleSearch, SAnnotatedProbability& annotatedProbability) { +CHierarchicalResults::TNode& +CHierarchicalResults::newLeaf(const TResultSpec& simpleSearch, + SAnnotatedProbability& annotatedProbability) { m_Nodes.emplace_back(simpleSearch, annotatedProbability); return m_Nodes.back(); } -CHierarchicalResults::TNode& CHierarchicalResults::newPivot(TStoredStringPtrStoredStringPtrPr key) { +CHierarchicalResults::TNode& +CHierarchicalResults::newPivot(TStoredStringPtrStoredStringPtrPr key) { TNode& result = m_PivotNodes[key]; result.s_Spec.s_PersonFieldName = key.first; result.s_Spec.s_PersonFieldValue = key.second; @@ -891,7 +939,8 @@ CHierarchicalResults::TNode& CHierarchicalResults::newPivotRoot(const TStoredStr return result; } -void CHierarchicalResults::postorderDepthFirst(const TNode* node, CHierarchicalResultsVisitor& visitor) const { +void CHierarchicalResults::postorderDepthFirst(const TNode* node, + CHierarchicalResultsVisitor& visitor) const { for (const auto& child : node->s_Children) { this->postorderDepthFirst(child, visitor); } @@ -910,12 +959,15 @@ bool CHierarchicalResultsVisitor::isLeaf(const TNode& node) { } bool CHierarchicalResultsVisitor::isPartitioned(const TNode& node) { - return !((*node.s_Spec.s_PartitionFieldName).empty()) && unset(node.s_Spec.s_PartitionFieldValue); + return !((*node.s_Spec.s_PartitionFieldName).empty()) && + unset(node.s_Spec.s_PartitionFieldValue); } bool CHierarchicalResultsVisitor::isPartition(const TNode& node) { - return !((*node.s_Spec.s_PartitionFieldName).empty()) && !unset(node.s_Spec.s_PartitionFieldValue) && - (CHierarchicalResultsVisitor::isRoot(node) || unset(node.s_Parent->s_Spec.s_PartitionFieldValue)); + return !((*node.s_Spec.s_PartitionFieldName).empty()) && + !unset(node.s_Spec.s_PartitionFieldValue) && + (CHierarchicalResultsVisitor::isRoot(node) || + unset(node.s_Parent->s_Spec.s_PartitionFieldValue)); } bool CHierarchicalResultsVisitor::isPerson(const TNode& node) { @@ -923,11 +975,13 @@ bool CHierarchicalResultsVisitor::isPerson(const TNode& node) { return false; } if (!isPopulation(node)) { - return unset(node.s_Spec.s_PersonFieldValue) || CHierarchicalResultsVisitor::isRoot(node) || + return unset(node.s_Spec.s_PersonFieldValue) || + CHierarchicalResultsVisitor::isRoot(node) || unset(node.s_Parent->s_Spec.s_PersonFieldName); } return !unset(node.s_Spec.s_PersonFieldValue) && - (CHierarchicalResultsVisitor::isRoot(node) || (unset(node.s_Parent->s_Spec.s_PersonFieldValue))); + (CHierarchicalResultsVisitor::isRoot(node) || + (unset(node.s_Parent->s_Spec.s_PersonFieldValue))); } bool CHierarchicalResultsVisitor::isAttribute(const TNode& node) { @@ -948,9 +1002,11 @@ bool CHierarchicalResultsVisitor::isPopulation(const TNode& node) { return node.s_Spec.s_IsPopulation; } -const CHierarchicalResultsVisitor::TNode* CHierarchicalResultsVisitor::nearestAncestorForWhichWeWriteResults(const TNode& node) { +const CHierarchicalResultsVisitor::TNode* +CHierarchicalResultsVisitor::nearestAncestorForWhichWeWriteResults(const TNode& node) { const TNode* result = &node; - for (result = result->s_Parent; result && !isTypeForWhichWeWriteResults(*result, false); result = result->s_Parent) { + for (result = result->s_Parent; result && !isTypeForWhichWeWriteResults(*result, false); + result = result->s_Parent) { } return result; } @@ -970,7 +1026,8 @@ bool CHierarchicalResultsVisitor::shouldWriteResult(const CLimits& limits, // condition the UI can be very confusing, as it's not necessarily possible // to find anything when searching upwards from lowest level anomalies to // the aggregated levels above. - if (p < limits.unusualProbabilityThreshold() && isTypeForWhichWeWriteResults(node, pivot)) { + if (p < limits.unusualProbabilityThreshold() && + isTypeForWhichWeWriteResults(node, pivot)) { return true; } @@ -1004,7 +1061,8 @@ bool CHierarchicalResultsVisitor::shouldWriteResult(const CLimits& limits, // a low probability themselves or be in branch of the results tree which // contains low probability results. for (const auto& influence : node.s_AnnotatedProbability.s_Influences) { - const TNode* influencer = results.influencer(influence.first.first, influence.first.second); + const TNode* influencer = + results.influencer(influence.first.first, influence.first.second); if (influencer && p <= OUTPUT_TOLERANCE * influencer->s_SmallestDescendantProbability && shouldWriteResult(limits, results, *influencer, /*pivot = */ true)) { return true; diff --git a/lib/model/CHierarchicalResultsAggregator.cc b/lib/model/CHierarchicalResultsAggregator.cc index 2ee0e1ed71..c49524f153 100644 --- a/lib/model/CHierarchicalResultsAggregator.cc +++ b/lib/model/CHierarchicalResultsAggregator.cc @@ -40,24 +40,33 @@ namespace { using TStoredStringPtr = CHierarchicalResults::TStoredStringPtr; using TStoredStringPtrStoredStringPtrPr = CHierarchicalResults::TStoredStringPtrStoredStringPtrPr; -using TStoredStringPtrStoredStringPtrPrDoublePr = CHierarchicalResults::TStoredStringPtrStoredStringPtrPrDoublePr; -using TStoredStringPtrStoredStringPtrPrDoublePrVec = CHierarchicalResults::TStoredStringPtrStoredStringPtrPrDoublePrVec; +using TStoredStringPtrStoredStringPtrPrDoublePr = + CHierarchicalResults::TStoredStringPtrStoredStringPtrPrDoublePr; +using TStoredStringPtrStoredStringPtrPrDoublePrVec = + CHierarchicalResults::TStoredStringPtrStoredStringPtrPrDoublePrVec; //! \brief Creates new detector equalizers. class CDetectorEqualizerFactory { public: - CDetectorEqualizer - make(const std::string& /*name1*/, const std::string& /*name2*/, const std::string& /*name3*/, const std::string& /*name4*/) const { + CDetectorEqualizer make(const std::string& /*name1*/, + const std::string& /*name2*/, + const std::string& /*name3*/, + const std::string& /*name4*/) const { return CDetectorEqualizer(); } - CDetectorEqualizer make(const std::string& /*name1*/, const std::string& /*name2*/) const { return CDetectorEqualizer(); } + CDetectorEqualizer make(const std::string& /*name1*/, const std::string& /*name2*/) const { + return CDetectorEqualizer(); + } - CDetectorEqualizer make(const std::string& /*name*/) const { return CDetectorEqualizer(); } + CDetectorEqualizer make(const std::string& /*name*/) const { + return CDetectorEqualizer(); + } }; //! Check if the underlying strings are equal. -bool equal(const TStoredStringPtrStoredStringPtrPr& lhs, const TStoredStringPtrStoredStringPtrPr& rhs) { +bool equal(const TStoredStringPtrStoredStringPtrPr& lhs, + const TStoredStringPtrStoredStringPtrPr& rhs) { return *lhs.first == *rhs.first && *lhs.second == *rhs.second; } @@ -70,10 +79,14 @@ bool influenceProbability(const TStoredStringPtrStoredStringPtrPrDoublePrVec& in TStoredStringPtrStoredStringPtrPr influence(influencerName, influencerValue); std::size_t k{static_cast( - std::lower_bound(influences.begin(), influences.end(), influence, maths::COrderings::SFirstLess()) - influences.begin())}; + std::lower_bound(influences.begin(), influences.end(), influence, + maths::COrderings::SFirstLess()) - + influences.begin())}; if (k < influences.size() && equal(influences[k].first, influence)) { - result = influences[k].second == 1.0 ? p : std::exp(influences[k].second * maths::CTools::fastLog(p)); + result = influences[k].second == 1.0 + ? p + : std::exp(influences[k].second * maths::CTools::fastLog(p)); return true; } @@ -91,8 +104,7 @@ const std::string LEAF_TAG("f"); } // unnamed:: CHierarchicalResultsAggregator::CHierarchicalResultsAggregator(const CAnomalyDetectorModelConfig& modelConfig) - : TBase(TDetectorEqualizer()), - m_Job(E_NoOp), + : TBase(TDetectorEqualizer()), m_Job(E_NoOp), m_DecayRate(modelConfig.decayRate()), m_MaximumAnomalousProbability(modelConfig.maximumAnomalousProbability()) { this->refresh(modelConfig); @@ -107,8 +119,9 @@ void CHierarchicalResultsAggregator::refresh(const CAnomalyDetectorModelConfig& m_MaximumAnomalousProbability = modelConfig.maximumAnomalousProbability(); for (std::size_t i = 0u; i < model_t::NUMBER_AGGREGATION_STYLES; ++i) { for (std::size_t j = 0u; j < model_t::NUMBER_AGGREGATION_PARAMS; ++j) { - m_Parameters[i][j] = - modelConfig.aggregationStyleParam(static_cast(i), static_cast(j)); + m_Parameters[i][j] = modelConfig.aggregationStyleParam( + static_cast(i), + static_cast(j)); } } } @@ -117,7 +130,9 @@ void CHierarchicalResultsAggregator::clear() { this->TBase::clear(); } -void CHierarchicalResultsAggregator::visit(const CHierarchicalResults& /*results*/, const TNode& node, bool pivot) { +void CHierarchicalResultsAggregator::visit(const CHierarchicalResults& /*results*/, + const TNode& node, + bool pivot) { if (isLeaf(node)) { this->aggregateLeaf(node); } else { @@ -130,12 +145,15 @@ void CHierarchicalResultsAggregator::propagateForwardByTime(double time) { LOG_ERROR(<< "Can't propagate normalizer backwards in time"); return; } - double factor{std::exp(-m_DecayRate * CDetectorEqualizer::largestProbabilityToCorrect() * time)}; + double factor{std::exp( + -m_DecayRate * CDetectorEqualizer::largestProbabilityToCorrect() * time)}; this->age(boost::bind(&TDetectorEqualizer::age, _1, factor)); } void CHierarchicalResultsAggregator::acceptPersistInserter(core::CStatePersistInserter& inserter) const { - inserter.insertLevel(BUCKET_TAG, boost::bind(&TDetectorEqualizer::acceptPersistInserter, boost::cref(this->bucketElement()), _1)); + inserter.insertLevel(BUCKET_TAG, + boost::bind(&TDetectorEqualizer::acceptPersistInserter, + boost::cref(this->bucketElement()), _1)); core::CPersistUtils::persist(INFLUENCER_BUCKET_TAG, this->influencerBucketSet(), inserter); core::CPersistUtils::persist(INFLUENCER_TAG, this->influencerSet(), inserter); core::CPersistUtils::persist(PARTITION_TAG, this->partitionSet(), inserter); @@ -146,12 +164,18 @@ void CHierarchicalResultsAggregator::acceptPersistInserter(core::CStatePersistIn bool CHierarchicalResultsAggregator::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { do { const std::string& name = traverser.name(); - RESTORE(BUCKET_TAG, - traverser.traverseSubLevel(boost::bind(&TDetectorEqualizer::acceptRestoreTraverser, boost::ref(this->bucketElement()), _1))) - RESTORE(INFLUENCER_BUCKET_TAG, core::CPersistUtils::restore(INFLUENCER_BUCKET_TAG, this->influencerBucketSet(), traverser)); - RESTORE(INFLUENCER_TAG, core::CPersistUtils::restore(INFLUENCER_TAG, this->influencerSet(), traverser)); - RESTORE(PARTITION_TAG, core::CPersistUtils::restore(PARTITION_TAG, this->partitionSet(), traverser)); - RESTORE(PERSON_TAG, core::CPersistUtils::restore(PERSON_TAG, this->personSet(), traverser)); + RESTORE(BUCKET_TAG, traverser.traverseSubLevel(boost::bind( + &TDetectorEqualizer::acceptRestoreTraverser, + boost::ref(this->bucketElement()), _1))) + RESTORE(INFLUENCER_BUCKET_TAG, + core::CPersistUtils::restore(INFLUENCER_BUCKET_TAG, + this->influencerBucketSet(), traverser)); + RESTORE(INFLUENCER_TAG, core::CPersistUtils::restore( + INFLUENCER_TAG, this->influencerSet(), traverser)); + RESTORE(PARTITION_TAG, core::CPersistUtils::restore( + PARTITION_TAG, this->partitionSet(), traverser)); + RESTORE(PERSON_TAG, + core::CPersistUtils::restore(PERSON_TAG, this->personSet(), traverser)); RESTORE(LEAF_TAG, core::CPersistUtils::restore(LEAF_TAG, this->leafSet(), traverser)); } while (traverser.next()); return true; @@ -174,9 +198,11 @@ void CHierarchicalResultsAggregator::aggregateLeaf(const TNode& node) { if (!maths::CMathsFuncs::isFinite(probability)) { probability = 1.0; } - probability = maths::CTools::truncate(probability, maths::CTools::smallestProbability(), 1.0); + probability = maths::CTools::truncate(probability, + maths::CTools::smallestProbability(), 1.0); this->correctProbability(node, false, detector, probability); - model_t::EAggregationStyle style{isAttribute(node) ? model_t::E_AggregateAttributes : model_t::E_AggregatePeople}; + model_t::EAggregationStyle style{isAttribute(node) ? model_t::E_AggregateAttributes + : model_t::E_AggregatePeople}; node.s_AnnotatedProbability.s_Probability = probability; node.s_AggregationStyle = style; @@ -198,27 +224,29 @@ void CHierarchicalResultsAggregator::aggregateNode(const TNode& node, bool pivot int detector; int aggregation; TDouble1Vec detectorProbabilities; - this->detectorProbabilities(node, pivot, numberDetectors, partition, detector, aggregation, detectorProbabilities); - LOG_TRACE(<< "detector = " << detector << ", aggregation = " << aggregation << ", detector probabilities = " << detectorProbabilities); + this->detectorProbabilities(node, pivot, numberDetectors, partition, + detector, aggregation, detectorProbabilities); + LOG_TRACE(<< "detector = " << detector << ", aggregation = " << aggregation + << ", detector probabilities = " << detectorProbabilities); const double* params{m_Parameters[model_t::E_AggregateDetectors]}; - CAnomalyScore::compute(params[model_t::E_JointProbabilityWeight], - params[model_t::E_ExtremeProbabilityWeight], - static_cast(params[model_t::E_MinExtremeSamples]), - static_cast(params[model_t::E_MaxExtremeSamples]), - m_MaximumAnomalousProbability, - detectorProbabilities, - node.s_RawAnomalyScore, - node.s_AnnotatedProbability.s_Probability); + CAnomalyScore::compute( + params[model_t::E_JointProbabilityWeight], + params[model_t::E_ExtremeProbabilityWeight], + static_cast(params[model_t::E_MinExtremeSamples]), + static_cast(params[model_t::E_MaxExtremeSamples]), + m_MaximumAnomalousProbability, detectorProbabilities, + node.s_RawAnomalyScore, node.s_AnnotatedProbability.s_Probability); node.s_Detector = detector; node.s_AggregationStyle = aggregation; LOG_TRACE(<< "probability = " << node.probability()); } -bool CHierarchicalResultsAggregator::partitionChildProbabilities(const TNode& node, - bool pivot, - std::size_t& numberDetectors, - TIntSizePrDouble1VecUMap (&partition)[N]) { +bool CHierarchicalResultsAggregator::partitionChildProbabilities( + const TNode& node, + bool pivot, + std::size_t& numberDetectors, + TIntSizePrDouble1VecUMap (&partition)[N]) { using TSizeFSet = boost::container::flat_set; using TMinAccumulator = maths::CBasicStatistics::SMin::TAccumulator; @@ -240,10 +268,8 @@ bool CHierarchicalResultsAggregator::partitionChildProbabilities(const TNode& no std::size_t key{0}; if (pivot && !isRoot(node) && !influenceProbability(child->s_AnnotatedProbability.s_Influences, - node.s_Spec.s_PersonFieldName, - node.s_Spec.s_PersonFieldValue, - probability, - probability)) { + node.s_Spec.s_PersonFieldName, node.s_Spec.s_PersonFieldValue, + probability, probability)) { LOG_ERROR(<< "Couldn't find influence for " << child->print()); continue; } else { @@ -257,7 +283,8 @@ bool CHierarchicalResultsAggregator::partitionChildProbabilities(const TNode& no } pMinDescendent.add(child->s_SmallestDescendantProbability); - model_t::EAggregationStyle style{static_cast(child->s_AggregationStyle)}; + model_t::EAggregationStyle style{ + static_cast(child->s_AggregationStyle)}; switch (style) { case model_t::E_AggregatePeople: case model_t::E_AggregateAttributes: @@ -271,8 +298,10 @@ bool CHierarchicalResultsAggregator::partitionChildProbabilities(const TNode& no } if (haveResult) { - node.s_SmallestChildProbability = maths::CTools::truncate(pMinChild[0], maths::CTools::smallestProbability(), 1.0); - node.s_SmallestDescendantProbability = maths::CTools::truncate(pMinDescendent[0], maths::CTools::smallestProbability(), 1.0); + node.s_SmallestChildProbability = maths::CTools::truncate( + pMinChild[0], maths::CTools::smallestProbability(), 1.0); + node.s_SmallestDescendantProbability = maths::CTools::truncate( + pMinDescendent[0], maths::CTools::smallestProbability(), 1.0); } numberDetectors = detectors.size(); LOG_TRACE(<< "detector = " << core::CContainerPrinter::print(detectors)); @@ -280,18 +309,21 @@ bool CHierarchicalResultsAggregator::partitionChildProbabilities(const TNode& no return haveResult; } -void CHierarchicalResultsAggregator::detectorProbabilities(const TNode& node, - bool pivot, - std::size_t numberDetectors, - const TIntSizePrDouble1VecUMap (&partition)[N], - int& detector, - int& aggregation, - TDouble1Vec& probabilities) { +void CHierarchicalResultsAggregator::detectorProbabilities( + const TNode& node, + bool pivot, + std::size_t numberDetectors, + const TIntSizePrDouble1VecUMap (&partition)[N], + int& detector, + int& aggregation, + TDouble1Vec& probabilities) { using TIntDouble1VecFMap = boost::container::flat_map; int fallback{static_cast(model_t::E_AggregatePeople)}; detector = -3; - aggregation = (pivot || isPartition(node) || (isPopulation(node) && isPerson(node))) ? fallback : -1; + aggregation = (pivot || isPartition(node) || (isPopulation(node) && isPerson(node))) + ? fallback + : -1; TIntDouble1VecFMap detectorProbabilities; detectorProbabilities.reserve(numberDetectors); @@ -304,14 +336,13 @@ void CHierarchicalResultsAggregator::detectorProbabilities(const TNode& node, probability = subset.second[0]; } else { double rawAnomalyScore; - CAnomalyScore::compute(params[model_t::E_JointProbabilityWeight], - params[model_t::E_ExtremeProbabilityWeight], - static_cast(params[model_t::E_MinExtremeSamples]), - static_cast(params[model_t::E_MaxExtremeSamples]), - m_MaximumAnomalousProbability, - subset.second, - rawAnomalyScore, - probability); + CAnomalyScore::compute( + params[model_t::E_JointProbabilityWeight], + params[model_t::E_ExtremeProbabilityWeight], + static_cast(params[model_t::E_MinExtremeSamples]), + static_cast(params[model_t::E_MaxExtremeSamples]), + m_MaximumAnomalousProbability, subset.second, + rawAnomalyScore, probability); } if (!maths::CMathsFuncs::isFinite(probability)) { probability = 1.0; @@ -346,14 +377,12 @@ void CHierarchicalResultsAggregator::detectorProbabilities(const TNode& node, if (dp.second.size() > 1) { const double* params{m_Parameters[model_t::E_AggregatePeople]}; double rawAnomalyScore; - CAnomalyScore::compute(params[model_t::E_JointProbabilityWeight], - params[model_t::E_ExtremeProbabilityWeight], - static_cast(params[model_t::E_MinExtremeSamples]), - static_cast(params[model_t::E_MaxExtremeSamples]), - m_MaximumAnomalousProbability, - dp.second, - rawAnomalyScore, - probability); + CAnomalyScore::compute( + params[model_t::E_JointProbabilityWeight], + params[model_t::E_ExtremeProbabilityWeight], + static_cast(params[model_t::E_MinExtremeSamples]), + static_cast(params[model_t::E_MaxExtremeSamples]), + m_MaximumAnomalousProbability, dp.second, rawAnomalyScore, probability); } probabilities.push_back(this->correctProbability(node, pivot, dp.first, probability)); } @@ -367,7 +396,10 @@ std::size_t CHierarchicalResultsAggregator::hash(const TNode& node) const { return result; } -double CHierarchicalResultsAggregator::correctProbability(const TNode& node, bool pivot, int detector, double probability) { +double CHierarchicalResultsAggregator::correctProbability(const TNode& node, + bool pivot, + int detector, + double probability) { using TMaxAccumulator = maths::CBasicStatistics::SMax::TAccumulator; if (probability < CDetectorEqualizer::largestProbabilityToCorrect()) { diff --git a/lib/model/CHierarchicalResultsNormalizer.cc b/lib/model/CHierarchicalResultsNormalizer.cc index db2beb114f..db1705b024 100644 --- a/lib/model/CHierarchicalResultsNormalizer.cc +++ b/lib/model/CHierarchicalResultsNormalizer.cc @@ -32,13 +32,19 @@ class CNormalizerFactory { public: using TNormalizer = CHierarchicalResultsNormalizer::TNormalizer; - CNormalizerFactory(const CAnomalyDetectorModelConfig& modelConfig) : m_ModelConfig(modelConfig) {} + CNormalizerFactory(const CAnomalyDetectorModelConfig& modelConfig) + : m_ModelConfig(modelConfig) {} - TNormalizer make(const std::string& name1, const std::string& name2, const std::string& name3, const std::string& name4) const { + TNormalizer make(const std::string& name1, + const std::string& name2, + const std::string& name3, + const std::string& name4) const { return make(name1 + ' ' + name2 + ' ' + name3 + ' ' + name4); } - TNormalizer make(const std::string& name1, const std::string& name2) const { return make(name1 + ' ' + name2); } + TNormalizer make(const std::string& name1, const std::string& name2) const { + return make(name1 + ' ' + name2); + } TNormalizer make(const std::string& name) const { return TNormalizer(name, boost::make_shared(m_ModelConfig)); @@ -82,10 +88,9 @@ uint64_t SNormalizer::checksum() const { } CHierarchicalResultsNormalizer::CHierarchicalResultsNormalizer(const CAnomalyDetectorModelConfig& modelConfig) - : TBase(TNormalizer(std::string(), boost::make_shared(modelConfig))), - m_Job(E_NoOp), - m_ModelConfig(modelConfig), - m_HasLastUpdateCausedBigChange(false) { + : TBase(TNormalizer(std::string(), + boost::make_shared(modelConfig))), + m_Job(E_NoOp), m_ModelConfig(modelConfig), m_HasLastUpdateCausedBigChange(false) { } void CHierarchicalResultsNormalizer::setJob(EJob job) { @@ -101,10 +106,13 @@ void CHierarchicalResultsNormalizer::resetBigChange() { m_HasLastUpdateCausedBigChange = false; } -void CHierarchicalResultsNormalizer::visit(const CHierarchicalResults& /*results*/, const TNode& node, bool pivot) { +void CHierarchicalResultsNormalizer::visit(const CHierarchicalResults& /*results*/, + const TNode& node, + bool pivot) { CNormalizerFactory factory(m_ModelConfig); TNormalizerPtrVec normalizers; - this->elements(node, pivot, factory, normalizers, m_ModelConfig.perPartitionNormalization()); + this->elements(node, pivot, factory, normalizers, + m_ModelConfig.perPartitionNormalization()); if (normalizers.empty()) { return; @@ -113,18 +121,22 @@ void CHierarchicalResultsNormalizer::visit(const CHierarchicalResults& /*results // This has to use the deviation of the probability rather than // the anomaly score stored on the bucket because the later is // scaled so that it sums to the bucket anomaly score. - double score = node.probability() > m_ModelConfig.maximumAnomalousProbability() ? 0.0 : maths::CTools::anomalyScore(node.probability()); + double score = node.probability() > m_ModelConfig.maximumAnomalousProbability() + ? 0.0 + : maths::CTools::anomalyScore(node.probability()); switch (m_Job) { case E_Update: for (std::size_t i = 0u; i < normalizers.size(); ++i) { - m_HasLastUpdateCausedBigChange |= normalizers[i]->s_Normalizer->updateQuantiles(score); + m_HasLastUpdateCausedBigChange |= + normalizers[i]->s_Normalizer->updateQuantiles(score); } break; case E_Normalize: // Normalize with the lowest suitable normalizer. if (!normalizers[0]->s_Normalizer->normalize(score)) { - LOG_ERROR(<< "Failed to normalize " << score << " for " << node.s_Spec.print()); + LOG_ERROR(<< "Failed to normalize " << score << " for " + << node.s_Spec.print()); } node.s_NormalizedAnomalyScore = score; break; @@ -146,48 +158,60 @@ bool CHierarchicalResultsNormalizer::hasLastUpdateCausedBigChange() const { return m_HasLastUpdateCausedBigChange; } -void CHierarchicalResultsNormalizer::toJson(core_t::TTime time, const std::string& key, std::string& json, bool makeArray) const { +void CHierarchicalResultsNormalizer::toJson(core_t::TTime time, + const std::string& key, + std::string& json, + bool makeArray) const { TStrVec jsonVec(1 // m_RootNormalizer - + this->influencerBucketSet().size() + this->influencerSet().size() + this->partitionSet().size() + + + this->influencerBucketSet().size() + + this->influencerSet().size() + this->partitionSet().size() + this->personSet().size() + this->leafSet().size()); std::size_t index = 0; for (std::size_t i = 0; i < this->leafSet().size(); ++i) { const TWord& word = this->leafSet()[i].first; const TNormalizer& normalizer = this->leafSet()[i].second; - CAnomalyScore::normalizerToJson(*normalizer.s_Normalizer, key, leafCue(word), normalizer.s_Description, time, jsonVec[index++]); + CAnomalyScore::normalizerToJson(*normalizer.s_Normalizer, key, + leafCue(word), normalizer.s_Description, + time, jsonVec[index++]); } for (std::size_t i = 0; i < this->personSet().size(); ++i) { const TWord& word = this->personSet()[i].first; const TNormalizer& normalizer = this->personSet()[i].second; - CAnomalyScore::normalizerToJson(*normalizer.s_Normalizer, key, personCue(word), normalizer.s_Description, time, jsonVec[index++]); + CAnomalyScore::normalizerToJson(*normalizer.s_Normalizer, key, + personCue(word), normalizer.s_Description, + time, jsonVec[index++]); } for (std::size_t i = 0; i < this->partitionSet().size(); ++i) { const TWord& word = this->partitionSet()[i].first; const TNormalizer& normalizer = this->partitionSet()[i].second; - CAnomalyScore::normalizerToJson( - *normalizer.s_Normalizer, key, partitionCue(word), normalizer.s_Description, time, jsonVec[index++]); + CAnomalyScore::normalizerToJson(*normalizer.s_Normalizer, key, + partitionCue(word), normalizer.s_Description, + time, jsonVec[index++]); } for (std::size_t i = 0; i < this->influencerSet().size(); ++i) { const TWord& word = this->influencerSet()[i].first; const TNormalizer& normalizer = this->influencerSet()[i].second; - CAnomalyScore::normalizerToJson( - *normalizer.s_Normalizer, key, influencerCue(word), normalizer.s_Description, time, jsonVec[index++]); + CAnomalyScore::normalizerToJson(*normalizer.s_Normalizer, key, + influencerCue(word), normalizer.s_Description, + time, jsonVec[index++]); } for (std::size_t i = 0; i < this->influencerBucketSet().size(); ++i) { const TWord& word = this->influencerBucketSet()[i].first; const TNormalizer& normalizer = this->influencerBucketSet()[i].second; - CAnomalyScore::normalizerToJson( - *normalizer.s_Normalizer, key, influencerBucketCue(word), normalizer.s_Description, time, jsonVec[index++]); + CAnomalyScore::normalizerToJson(*normalizer.s_Normalizer, key, + influencerBucketCue(word), normalizer.s_Description, + time, jsonVec[index++]); } // Put the bucket normalizer last so that incomplete restorations can be // detected by checking whether the bucket normalizer is restored - CAnomalyScore::normalizerToJson(*this->bucketElement().s_Normalizer, key, bucketCue(), "root", time, jsonVec[index++]); + CAnomalyScore::normalizerToJson(*this->bucketElement().s_Normalizer, key, + bucketCue(), "root", time, jsonVec[index++]); json = core::CStringUtils::join(jsonVec, ","); if (makeArray) { @@ -196,7 +220,8 @@ void CHierarchicalResultsNormalizer::toJson(core_t::TTime time, const std::strin } } -CHierarchicalResultsNormalizer::ERestoreOutcome CHierarchicalResultsNormalizer::fromJsonStream(std::istream& inputStream) { +CHierarchicalResultsNormalizer::ERestoreOutcome +CHierarchicalResultsNormalizer::fromJsonStream(std::istream& inputStream) { bool isBucketNormalizerRestored = false; this->TBase::clear(); @@ -220,15 +245,16 @@ CHierarchicalResultsNormalizer::ERestoreOutcome CHierarchicalResultsNormalizer:: return E_Ok; } - LOG_ERROR(<< "Expected " << CAnomalyScore::MLCUE_ATTRIBUTE << " field in quantiles JSON got " << traverser.name() << " = " - << traverser.value()); + LOG_ERROR(<< "Expected " << CAnomalyScore::MLCUE_ATTRIBUTE << " field in quantiles JSON got " + << traverser.name() << " = " << traverser.value()); return E_Corrupt; } const std::string cue(traverser.value()); if (cue == BUCKET_CUE) { - if (CAnomalyScore::normalizerFromJson(traverser, *this->bucketElement().s_Normalizer) == false) { + if (CAnomalyScore::normalizerFromJson( + traverser, *this->bucketElement().s_Normalizer) == false) { LOG_ERROR(<< "Unable to restore bucket normalizer"); return E_Corrupt; } @@ -243,27 +269,31 @@ CHierarchicalResultsNormalizer::ERestoreOutcome CHierarchicalResultsNormalizer:: if (normalizerVec != nullptr) { if (!traverser.next()) { - LOG_ERROR(<< "Cannot restore hierarchical normalizer - end of object reached when " << CAnomalyScore::MLKEY_ATTRIBUTE - << " was expected"); + LOG_ERROR(<< "Cannot restore hierarchical normalizer - end of object reached when " + << CAnomalyScore::MLKEY_ATTRIBUTE << " was expected"); return E_Corrupt; } if (!traverser.next()) { LOG_ERROR(<< "Cannot restore hierarchical normalizer - end of object reached when " - << CAnomalyScore::MLQUANTILESDESCRIPTION_ATTRIBUTE << " was expected"); + << CAnomalyScore::MLQUANTILESDESCRIPTION_ATTRIBUTE + << " was expected"); return E_Corrupt; } if (traverser.name() != CAnomalyScore::MLQUANTILESDESCRIPTION_ATTRIBUTE) { - LOG_ERROR(<< "Cannot restore hierarchical normalizer - " << CAnomalyScore::MLQUANTILESDESCRIPTION_ATTRIBUTE - << " element expected but found " << traverser.name() << '=' << traverser.value()); + LOG_ERROR(<< "Cannot restore hierarchical normalizer - " + << CAnomalyScore::MLQUANTILESDESCRIPTION_ATTRIBUTE << " element expected but found " + << traverser.name() << '=' << traverser.value()); return E_Corrupt; } std::string quantileDesc(traverser.value()); - boost::shared_ptr normalizer = boost::make_shared(m_ModelConfig); - normalizerVec->emplace_back(TWord(hashArray), TNormalizer(quantileDesc, normalizer)); + boost::shared_ptr normalizer = + boost::make_shared(m_ModelConfig); + normalizerVec->emplace_back(TWord(hashArray), + TNormalizer(quantileDesc, normalizer)); if (CAnomalyScore::normalizerFromJson(traverser, *normalizer) == false) { LOG_ERROR(<< "Unable to restore normalizer with cue " << cue); return E_Corrupt; @@ -274,9 +304,11 @@ CHierarchicalResultsNormalizer::ERestoreOutcome CHierarchicalResultsNormalizer:: this->sort(); - LOG_DEBUG(<< this->influencerBucketSet().size() << " influencer bucket normalizers, " << this->influencerSet().size() - << " influencer normalizers, " << this->partitionSet().size() << " partition normalizers, " << this->personSet().size() - << " person normalizers and " << this->leafSet().size() << " leaf normalizers restored from JSON stream"); + LOG_DEBUG(<< this->influencerBucketSet().size() << " influencer bucket normalizers, " + << this->influencerSet().size() << " influencer normalizers, " + << this->partitionSet().size() << " partition normalizers, " + << this->personSet().size() << " person normalizers and " + << this->leafSet().size() << " leaf normalizers restored from JSON stream"); return isBucketNormalizerRestored ? E_Ok : E_Incomplete; } @@ -285,32 +317,38 @@ const CAnomalyScore::CNormalizer& CHierarchicalResultsNormalizer::bucketNormaliz return *this->bucketElement().s_Normalizer; } -const CAnomalyScore::CNormalizer* CHierarchicalResultsNormalizer::influencerBucketNormalizer(const std::string& influencerFieldName) const { +const CAnomalyScore::CNormalizer* +CHierarchicalResultsNormalizer::influencerBucketNormalizer(const std::string& influencerFieldName) const { const TNormalizer* normalizer = this->influencerBucketElement(influencerFieldName); return normalizer ? normalizer->s_Normalizer.get() : nullptr; } -const CAnomalyScore::CNormalizer* CHierarchicalResultsNormalizer::influencerNormalizer(const std::string& influencerFieldName) const { +const CAnomalyScore::CNormalizer* +CHierarchicalResultsNormalizer::influencerNormalizer(const std::string& influencerFieldName) const { const TNormalizer* normalizer = this->influencerElement(influencerFieldName); return normalizer ? normalizer->s_Normalizer.get() : nullptr; } -const CAnomalyScore::CNormalizer* CHierarchicalResultsNormalizer::partitionNormalizer(const std::string& partitionFieldName) const { +const CAnomalyScore::CNormalizer* +CHierarchicalResultsNormalizer::partitionNormalizer(const std::string& partitionFieldName) const { const TNormalizer* normalizer = this->partitionElement(partitionFieldName); return normalizer ? normalizer->s_Normalizer.get() : nullptr; } -const CAnomalyScore::CNormalizer* CHierarchicalResultsNormalizer::personNormalizer(const std::string& partitionFieldName, - const std::string& personFieldName) const { +const CAnomalyScore::CNormalizer* +CHierarchicalResultsNormalizer::personNormalizer(const std::string& partitionFieldName, + const std::string& personFieldName) const { const TNormalizer* normalizer = this->personElement(partitionFieldName, personFieldName); return normalizer ? normalizer->s_Normalizer.get() : nullptr; } -const CAnomalyScore::CNormalizer* CHierarchicalResultsNormalizer::leafNormalizer(const std::string& partitionFieldName, - const std::string& personFieldName, - const std::string& functionName, - const std::string& valueFieldName) const { - const TNormalizer* normalizer = this->leafElement(partitionFieldName, personFieldName, functionName, valueFieldName); +const CAnomalyScore::CNormalizer* +CHierarchicalResultsNormalizer::leafNormalizer(const std::string& partitionFieldName, + const std::string& personFieldName, + const std::string& functionName, + const std::string& valueFieldName) const { + const TNormalizer* normalizer = this->leafElement( + partitionFieldName, personFieldName, functionName, valueFieldName); return normalizer ? normalizer->s_Normalizer.get() : nullptr; } @@ -320,7 +358,8 @@ bool CHierarchicalResultsNormalizer::parseCue(const std::string& cue, normalizers = nullptr; std::size_t hashStartPos = 0; - if (cue.compare(0, INFLUENCER_BUCKET_CUE_PREFIX.length(), INFLUENCER_BUCKET_CUE_PREFIX) == 0) { + if (cue.compare(0, INFLUENCER_BUCKET_CUE_PREFIX.length(), + INFLUENCER_BUCKET_CUE_PREFIX) == 0) { normalizers = &this->influencerBucketSet(); hashStartPos = INFLUENCER_BUCKET_CUE_PREFIX.length(); } else if (cue.compare(0, INFLUENCER_CUE_PREFIX.length(), INFLUENCER_CUE_PREFIX) == 0) { @@ -343,7 +382,8 @@ bool CHierarchicalResultsNormalizer::parseCue(const std::string& cue, LOG_TRACE(<< "cue = " << cue << ", hash = " << cue.substr(hashStartPos)); if (core::CStringUtils::stringToType(cue.substr(hashStartPos), hashArray[0]) == false) { - LOG_ERROR(<< "Unable to parse normalizer hash from cue " << cue << " starting at position " << hashStartPos); + LOG_ERROR(<< "Unable to parse normalizer hash from cue " << cue + << " starting at position " << hashStartPos); return false; } diff --git a/lib/model/CHierarchicalResultsPopulator.cc b/lib/model/CHierarchicalResultsPopulator.cc index d989c3fa17..ba7aafbc1c 100644 --- a/lib/model/CHierarchicalResultsPopulator.cc +++ b/lib/model/CHierarchicalResultsPopulator.cc @@ -15,10 +15,13 @@ namespace ml { namespace model { -CHierarchicalResultsPopulator::CHierarchicalResultsPopulator(const CLimits& limits) : m_Limits(limits) { +CHierarchicalResultsPopulator::CHierarchicalResultsPopulator(const CLimits& limits) + : m_Limits(limits) { } -void CHierarchicalResultsPopulator::visit(const CHierarchicalResults& results, const TNode& node, bool pivot) { +void CHierarchicalResultsPopulator::visit(const CHierarchicalResults& results, + const TNode& node, + bool pivot) { if (!this->isLeaf(node) || !this->shouldWriteResult(m_Limits, results, node, pivot)) { return; } @@ -39,17 +42,16 @@ void CHierarchicalResultsPopulator::visit(const CHierarchicalResults& results, c SAnnotatedProbability& probability = node.s_AnnotatedProbability; for (std::size_t i = 0; i < probability.s_AttributeProbabilities.size(); ++i) { const SAttributeProbability& attribute = probability.s_AttributeProbabilities[i]; - attribute.s_CurrentBucketValue = - node.s_Model->currentBucketValue(attribute.s_Feature, pid, attribute.s_Cid, node.s_BucketStartTime + node.s_BucketLength / 2); - attribute.s_BaselineBucketMean = node.s_Model->baselineBucketMean(attribute.s_Feature, - pid, - attribute.s_Cid, - attribute.s_Type, - attribute.s_Correlated, - node.s_BucketStartTime + node.s_BucketLength / 2); + attribute.s_CurrentBucketValue = node.s_Model->currentBucketValue( + attribute.s_Feature, pid, attribute.s_Cid, + node.s_BucketStartTime + node.s_BucketLength / 2); + attribute.s_BaselineBucketMean = node.s_Model->baselineBucketMean( + attribute.s_Feature, pid, attribute.s_Cid, attribute.s_Type, + attribute.s_Correlated, node.s_BucketStartTime + node.s_BucketLength / 2); } - probability.s_CurrentBucketCount = node.s_Model->currentBucketCount(pid, node.s_BucketStartTime); + probability.s_CurrentBucketCount = + node.s_Model->currentBucketCount(pid, node.s_BucketStartTime); probability.s_BaselineBucketCount = node.s_Model->baselineBucketCount(pid); } } diff --git a/lib/model/CHierarchicalResultsProbabilityFinalizer.cc b/lib/model/CHierarchicalResultsProbabilityFinalizer.cc index 2223d536e3..77bf74f6ce 100644 --- a/lib/model/CHierarchicalResultsProbabilityFinalizer.cc +++ b/lib/model/CHierarchicalResultsProbabilityFinalizer.cc @@ -11,9 +11,12 @@ namespace ml { namespace model { -void CHierarchicalResultsProbabilityFinalizer::visit(const CHierarchicalResults& /*results*/, const TNode& node, bool /*pivot*/) { +void CHierarchicalResultsProbabilityFinalizer::visit(const CHierarchicalResults& /*results*/, + const TNode& node, + bool /*pivot*/) { if (node.s_RawAnomalyScore > 0.0) { - node.s_AnnotatedProbability.s_Probability = maths::CTools::inverseAnomalyScore(node.s_RawAnomalyScore); + node.s_AnnotatedProbability.s_Probability = + maths::CTools::inverseAnomalyScore(node.s_RawAnomalyScore); } } } diff --git a/lib/model/CIndividualModel.cc b/lib/model/CIndividualModel.cc index 2d7f7dea32..b46e5a03c7 100644 --- a/lib/model/CIndividualModel.cc +++ b/lib/model/CIndividualModel.cc @@ -38,11 +38,14 @@ namespace { using TStrCRef = boost::reference_wrapper; using TStrCRefUInt64Map = std::map; using TStrCRefStrCRefPr = std::pair; -using TStrCRefStrCRefPrUInt64Map = std::map; +using TStrCRefStrCRefPrUInt64Map = + std::map; //! Update \p hashes with the hashes of the active people in \p values. template -void hashActive(const CDataGatherer& gatherer, const std::vector& values, TStrCRefUInt64Map& hashes) { +void hashActive(const CDataGatherer& gatherer, + const std::vector& values, + TStrCRefUInt64Map& hashes) { for (std::size_t pid = 0u; pid < values.size(); ++pid) { if (gatherer.isPersonActive(pid)) { uint64_t& hash = hashes[boost::cref(gatherer.personName(pid))]; @@ -80,19 +83,23 @@ CIndividualModel::CIndividualModel(const SModelParams& params, for (const auto& model : newFeatureModels) { m_FeatureModels.emplace_back(model.first, model.second); } - std::sort(m_FeatureModels.begin(), m_FeatureModels.end(), [](const SFeatureModels& lhs, const SFeatureModels& rhs) { - return lhs.s_Feature < rhs.s_Feature; - }); + std::sort(m_FeatureModels.begin(), m_FeatureModels.end(), + [](const SFeatureModels& lhs, const SFeatureModels& rhs) { + return lhs.s_Feature < rhs.s_Feature; + }); if (this->params().s_MultivariateByFields) { m_FeatureCorrelatesModels.reserve(featureCorrelatesModels.size()); for (std::size_t i = 0u; i < featureCorrelatesModels.size(); ++i) { m_FeatureCorrelatesModels.emplace_back( - featureCorrelatesModels[i].first, newFeatureCorrelateModelPriors[i].second, featureCorrelatesModels[i].second); + featureCorrelatesModels[i].first, + newFeatureCorrelateModelPriors[i].second, + featureCorrelatesModels[i].second); } - std::sort(m_FeatureCorrelatesModels.begin(), - m_FeatureCorrelatesModels.end(), - [](const SFeatureCorrelateModels& lhs, const SFeatureCorrelateModels& rhs) { return lhs.s_Feature < rhs.s_Feature; }); + std::sort(m_FeatureCorrelatesModels.begin(), m_FeatureCorrelatesModels.end(), + [](const SFeatureCorrelateModels& lhs, const SFeatureCorrelateModels& rhs) { + return lhs.s_Feature < rhs.s_Feature; + }); } } @@ -117,7 +124,8 @@ CIndividualModel::CIndividualModel(bool isForPersistence, const CIndividualModel m_FeatureCorrelatesModels.reserve(other.m_FeatureCorrelatesModels.size()); for (const auto& feature : other.m_FeatureCorrelatesModels) { m_FeatureCorrelatesModels.emplace_back( - feature.s_Feature, feature.s_ModelPrior, TCorrelationsPtr(feature.s_Models->cloneForPersistence())); + feature.s_Feature, feature.s_ModelPrior, + TCorrelationsPtr(feature.s_Models->cloneForPersistence())); } } @@ -125,31 +133,41 @@ bool CIndividualModel::isPopulation() const { return false; } -CIndividualModel::TOptionalUInt64 CIndividualModel::currentBucketCount(std::size_t pid, core_t::TTime time) const { +CIndividualModel::TOptionalUInt64 +CIndividualModel::currentBucketCount(std::size_t pid, core_t::TTime time) const { if (!this->bucketStatsAvailable(time)) { - LOG_ERROR(<< "No statistics at " << time << ", current bucket = " << this->printCurrentBucket()); + LOG_ERROR(<< "No statistics at " << time + << ", current bucket = " << this->printCurrentBucket()); return TOptionalUInt64(); } - auto result = std::lower_bound( - this->currentBucketPersonCounts().begin(), this->currentBucketPersonCounts().end(), pid, maths::COrderings::SFirstLess()); + auto result = std::lower_bound(this->currentBucketPersonCounts().begin(), + this->currentBucketPersonCounts().end(), pid, + maths::COrderings::SFirstLess()); - return result != this->currentBucketPersonCounts().end() && result->first == pid ? result->second : static_cast(0); + return result != this->currentBucketPersonCounts().end() && result->first == pid + ? result->second + : static_cast(0); } bool CIndividualModel::bucketStatsAvailable(core_t::TTime time) const { - return time >= this->currentBucketStartTime() && time < this->currentBucketStartTime() + this->bucketLength(); + return time >= this->currentBucketStartTime() && + time < this->currentBucketStartTime() + this->bucketLength(); } -void CIndividualModel::sampleBucketStatistics(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) { +void CIndividualModel::sampleBucketStatistics(core_t::TTime startTime, + core_t::TTime endTime, + CResourceMonitor& resourceMonitor) { CDataGatherer& gatherer = this->dataGatherer(); if (!gatherer.dataAvailable(startTime)) { return; } - for (core_t::TTime time = startTime, bucketLength = gatherer.bucketLength(); time < endTime; time += bucketLength) { - this->CAnomalyDetectorModel::sampleBucketStatistics(time, time + bucketLength, resourceMonitor); + for (core_t::TTime time = startTime, bucketLength = gatherer.bucketLength(); + time < endTime; time += bucketLength) { + this->CAnomalyDetectorModel::sampleBucketStatistics(time, time + bucketLength, + resourceMonitor); // Currently, we only remember one bucket. this->currentBucketStartTime(time); @@ -159,22 +177,28 @@ void CIndividualModel::sampleBucketStatistics(core_t::TTime startTime, core_t::T } } -void CIndividualModel::sampleOutOfPhase(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) { +void CIndividualModel::sampleOutOfPhase(core_t::TTime startTime, + core_t::TTime endTime, + CResourceMonitor& resourceMonitor) { CDataGatherer& gatherer = this->dataGatherer(); if (!gatherer.dataAvailable(startTime)) { return; } - for (core_t::TTime time = startTime, bucketLength = gatherer.bucketLength(); time < endTime; time += bucketLength) { + for (core_t::TTime time = startTime, bucketLength = gatherer.bucketLength(); + time < endTime; time += bucketLength) { gatherer.sampleNow(time); this->sampleBucketStatistics(time, time + bucketLength, resourceMonitor); } } -void CIndividualModel::sample(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) { +void CIndividualModel::sample(core_t::TTime startTime, + core_t::TTime endTime, + CResourceMonitor& resourceMonitor) { const CDataGatherer& gatherer = this->dataGatherer(); - for (core_t::TTime time = startTime, bucketLength = gatherer.bucketLength(); time < endTime; time += bucketLength) { + for (core_t::TTime time = startTime, bucketLength = gatherer.bucketLength(); + time < endTime; time += bucketLength) { this->CAnomalyDetectorModel::sample(time, time + bucketLength, resourceMonitor); this->currentBucketStartTime(time); @@ -202,8 +226,10 @@ void CIndividualModel::prune(std::size_t maximumAge) { TSizeVec peopleToRemove; for (std::size_t pid = 0u; pid < m_LastBucketTimes.size(); ++pid) { - if (gatherer.isPersonActive(pid) && !CAnomalyDetectorModel::isTimeUnset(m_LastBucketTimes[pid])) { - std::size_t bucketsSinceLastEvent = static_cast((time - m_LastBucketTimes[pid]) / gatherer.bucketLength()); + if (gatherer.isPersonActive(pid) && + !CAnomalyDetectorModel::isTimeUnset(m_LastBucketTimes[pid])) { + std::size_t bucketsSinceLastEvent = static_cast( + (time - m_LastBucketTimes[pid]) / gatherer.bucketLength()); if (bucketsSinceLastEvent > maximumAge) { LOG_TRACE(<< gatherer.personName(pid) << ", bucketsSinceLastEvent = " << bucketsSinceLastEvent << ", maximumAge = " << maximumAge); @@ -251,7 +277,8 @@ uint64_t CIndividualModel::checksum(bool includeCurrentBucketStats) const { for (const auto& model : feature.s_Models->correlationModels()) { std::size_t pids[]{model.first.first, model.first.second}; if (gatherer.isPersonActive(pids[0]) && gatherer.isPersonActive(pids[1])) { - uint64_t& hash = hashes2[{boost::cref(this->personName(pids[0])), boost::cref(this->personName(pids[1]))}]; + uint64_t& hash = hashes2[{boost::cref(this->personName(pids[0])), + boost::cref(this->personName(pids[1]))}]; hash = maths::CChecksum::calculate(hash, model.second); } } @@ -280,14 +307,16 @@ void CIndividualModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) core::CMemoryDebug::dynamicSize("m_FirstBucketTimes", m_FirstBucketTimes, mem); core::CMemoryDebug::dynamicSize("m_LastBucketTimes", m_LastBucketTimes, mem); core::CMemoryDebug::dynamicSize("m_FeatureModels", m_FeatureModels, mem); - core::CMemoryDebug::dynamicSize("m_FeatureCorrelatesModels", m_FeatureCorrelatesModels, mem); + core::CMemoryDebug::dynamicSize("m_FeatureCorrelatesModels", + m_FeatureCorrelatesModels, mem); core::CMemoryDebug::dynamicSize("m_MemoryEstimator", m_MemoryEstimator, mem); } std::size_t CIndividualModel::memoryUsage() const { const CDataGatherer& gatherer = this->dataGatherer(); - TOptionalSize estimate = - this->estimateMemoryUsage(gatherer.numberActivePeople(), gatherer.numberActiveAttributes(), this->numberCorrelations()); + TOptionalSize estimate = this->estimateMemoryUsage( + gatherer.numberActivePeople(), gatherer.numberActiveAttributes(), + this->numberCorrelations()); return estimate ? estimate.get() : this->computeMemoryUsage(); } @@ -310,15 +339,20 @@ std::size_t CIndividualModel::staticSize() const { } void CIndividualModel::doAcceptPersistInserter(core::CStatePersistInserter& inserter) const { - inserter.insertValue(WINDOW_BUCKET_COUNT_TAG, this->windowBucketCount(), core::CIEEE754::E_SinglePrecision); - core::CPersistUtils::persist(PERSON_BUCKET_COUNT_TAG, this->personBucketCounts(), inserter); + inserter.insertValue(WINDOW_BUCKET_COUNT_TAG, this->windowBucketCount(), + core::CIEEE754::E_SinglePrecision); + core::CPersistUtils::persist(PERSON_BUCKET_COUNT_TAG, + this->personBucketCounts(), inserter); core::CPersistUtils::persist(FIRST_BUCKET_TIME_TAG, m_FirstBucketTimes, inserter); core::CPersistUtils::persist(LAST_BUCKET_TIME_TAG, m_LastBucketTimes, inserter); for (const auto& feature : m_FeatureModels) { - inserter.insertLevel(FEATURE_MODELS_TAG, boost::bind(&SFeatureModels::acceptPersistInserter, &feature, _1)); + inserter.insertLevel(FEATURE_MODELS_TAG, boost::bind(&SFeatureModels::acceptPersistInserter, + &feature, _1)); } for (const auto& feature : m_FeatureCorrelatesModels) { - inserter.insertLevel(FEATURE_CORRELATE_MODELS_TAG, boost::bind(&SFeatureCorrelateModels::acceptPersistInserter, &feature, _1)); + inserter.insertLevel(FEATURE_CORRELATE_MODELS_TAG, + boost::bind(&SFeatureCorrelateModels::acceptPersistInserter, + &feature, _1)); } this->interimBucketCorrectorAcceptPersistInserter(INTERIM_BUCKET_CORRECTOR_TAG, inserter); core::CPersistUtils::persist(MEMORY_ESTIMATOR_TAG, m_MemoryEstimator, inserter); @@ -328,24 +362,29 @@ bool CIndividualModel::doAcceptRestoreTraverser(core::CStateRestoreTraverser& tr std::size_t i = 0u, j = 0u; do { const std::string& name = traverser.name(); - RESTORE_SETUP_TEARDOWN(WINDOW_BUCKET_COUNT_TAG, - double count, + RESTORE_SETUP_TEARDOWN(WINDOW_BUCKET_COUNT_TAG, double count, core::CStringUtils::stringToType(traverser.value(), count), this->windowBucketCount(count)) - RESTORE(PERSON_BUCKET_COUNT_TAG, core::CPersistUtils::restore(name, this->personBucketCounts(), traverser)) - RESTORE(FIRST_BUCKET_TIME_TAG, core::CPersistUtils::restore(name, m_FirstBucketTimes, traverser)) - RESTORE(LAST_BUCKET_TIME_TAG, core::CPersistUtils::restore(name, m_LastBucketTimes, traverser)) + RESTORE(PERSON_BUCKET_COUNT_TAG, + core::CPersistUtils::restore(name, this->personBucketCounts(), traverser)) + RESTORE(FIRST_BUCKET_TIME_TAG, + core::CPersistUtils::restore(name, m_FirstBucketTimes, traverser)) + RESTORE(LAST_BUCKET_TIME_TAG, + core::CPersistUtils::restore(name, m_LastBucketTimes, traverser)) RESTORE(FEATURE_MODELS_TAG, i == m_FeatureModels.size() || - traverser.traverseSubLevel( - boost::bind(&SFeatureModels::acceptRestoreTraverser, &m_FeatureModels[i++], boost::cref(this->params()), _1))) - RESTORE( - FEATURE_CORRELATE_MODELS_TAG, - j == m_FeatureCorrelatesModels.size() || - traverser.traverseSubLevel(boost::bind( - &SFeatureCorrelateModels::acceptRestoreTraverser, &m_FeatureCorrelatesModels[j++], boost::cref(this->params()), _1))) - RESTORE(INTERIM_BUCKET_CORRECTOR_TAG, this->interimBucketCorrectorAcceptRestoreTraverser(traverser)) - RESTORE(MEMORY_ESTIMATOR_TAG, core::CPersistUtils::restore(MEMORY_ESTIMATOR_TAG, m_MemoryEstimator, traverser)) + traverser.traverseSubLevel(boost::bind( + &SFeatureModels::acceptRestoreTraverser, + &m_FeatureModels[i++], boost::cref(this->params()), _1))) + RESTORE(FEATURE_CORRELATE_MODELS_TAG, + j == m_FeatureCorrelatesModels.size() || + traverser.traverseSubLevel(boost::bind( + &SFeatureCorrelateModels::acceptRestoreTraverser, + &m_FeatureCorrelatesModels[j++], boost::cref(this->params()), _1))) + RESTORE(INTERIM_BUCKET_CORRECTOR_TAG, + this->interimBucketCorrectorAcceptRestoreTraverser(traverser)) + RESTORE(MEMORY_ESTIMATOR_TAG, + core::CPersistUtils::restore(MEMORY_ESTIMATOR_TAG, m_MemoryEstimator, traverser)) } while (traverser.next()); for (auto& feature : m_FeatureModels) { @@ -361,7 +400,8 @@ bool CIndividualModel::doAcceptRestoreTraverser(core::CStateRestoreTraverser& tr return true; } -void CIndividualModel::createUpdateNewModels(core_t::TTime time, CResourceMonitor& resourceMonitor) { +void CIndividualModel::createUpdateNewModels(core_t::TTime time, + CResourceMonitor& resourceMonitor) { this->updateRecycledModels(); CDataGatherer& gatherer = this->dataGatherer(); @@ -369,32 +409,40 @@ void CIndividualModel::createUpdateNewModels(core_t::TTime time, CResourceMonito std::size_t numberExistingPeople = m_FirstBucketTimes.size(); std::size_t numberCorrelations = this->numberCorrelations(); - TOptionalSize usageEstimate = this->estimateMemoryUsage(std::min(numberExistingPeople, gatherer.numberActivePeople()), - 0, // # attributes - numberCorrelations); - std::size_t ourUsage = usageEstimate ? usageEstimate.get() : this->computeMemoryUsage(); + TOptionalSize usageEstimate = this->estimateMemoryUsage( + std::min(numberExistingPeople, gatherer.numberActivePeople()), + 0, // # attributes + numberCorrelations); + std::size_t ourUsage = usageEstimate ? usageEstimate.get() + : this->computeMemoryUsage(); std::size_t resourceLimit = ourUsage + resourceMonitor.allocationLimit(); std::size_t numberNewPeople = gatherer.numberPeople(); - numberNewPeople = numberNewPeople > numberExistingPeople ? numberNewPeople - numberExistingPeople : 0; + numberNewPeople = numberNewPeople > numberExistingPeople ? numberNewPeople - numberExistingPeople + : 0; - while (numberNewPeople > 0 && resourceMonitor.areAllocationsAllowed() && (resourceMonitor.haveNoLimit() || ourUsage < resourceLimit)) { + while (numberNewPeople > 0 && resourceMonitor.areAllocationsAllowed() && + (resourceMonitor.haveNoLimit() || ourUsage < resourceLimit)) { // We batch people in CHUNK_SIZE (500) and create models in chunks // and test usage after each chunk. std::size_t numberToCreate = std::min(numberNewPeople, CHUNK_SIZE); - LOG_TRACE(<< "Creating batch of " << numberToCreate << " people of remaining " << numberNewPeople << ". " + LOG_TRACE(<< "Creating batch of " << numberToCreate + << " people of remaining " << numberNewPeople << ". " << resourceLimit - ourUsage << " free bytes remaining"); this->createNewModels(numberToCreate, 0); numberExistingPeople += numberToCreate; numberNewPeople -= numberToCreate; if (numberNewPeople > 0 && resourceMonitor.haveNoLimit() == false) { - ourUsage = this->estimateMemoryUsageOrComputeAndUpdate(numberExistingPeople, 0, numberCorrelations); + ourUsage = this->estimateMemoryUsageOrComputeAndUpdate( + numberExistingPeople, 0, numberCorrelations); } } if (numberNewPeople > 0) { resourceMonitor.acceptAllocationFailureResult(time); LOG_DEBUG(<< "Not enough memory to create models"); - core::CStatistics::instance().stat(stat_t::E_NumberMemoryLimitModelCreationFailures).increment(numberNewPeople); + core::CStatistics::instance() + .stat(stat_t::E_NumberMemoryLimitModelCreationFailures) + .increment(numberNewPeople); std::size_t toRemove = gatherer.numberPeople() - numberNewPeople; gatherer.removePeople(toRemove); } @@ -405,8 +453,10 @@ void CIndividualModel::createUpdateNewModels(core_t::TTime time, CResourceMonito void CIndividualModel::createNewModels(std::size_t n, std::size_t m) { if (n > 0) { std::size_t newN = m_FirstBucketTimes.size() + n; - core::CAllocationStrategy::resize(m_FirstBucketTimes, newN, CAnomalyDetectorModel::TIME_UNSET); - core::CAllocationStrategy::resize(m_LastBucketTimes, newN, CAnomalyDetectorModel::TIME_UNSET); + core::CAllocationStrategy::resize(m_FirstBucketTimes, newN, + CAnomalyDetectorModel::TIME_UNSET); + core::CAllocationStrategy::resize(m_LastBucketTimes, newN, + CAnomalyDetectorModel::TIME_UNSET); for (auto& feature : m_FeatureModels) { core::CAllocationStrategy::reserve(feature.s_Models, newN); for (std::size_t pid = feature.s_Models.size(); pid < newN; ++pid) { @@ -438,19 +488,24 @@ void CIndividualModel::updateRecycledModels() { this->CAnomalyDetectorModel::updateRecycledModels(); } -void CIndividualModel::refreshCorrelationModels(std::size_t resourceLimit, CResourceMonitor& resourceMonitor) { +void CIndividualModel::refreshCorrelationModels(std::size_t resourceLimit, + CResourceMonitor& resourceMonitor) { std::size_t n = this->numberOfPeople(); - double maxNumberCorrelations = this->params().s_CorrelationModelsOverhead * static_cast(n); - auto memoryUsage = boost::bind(&CAnomalyDetectorModel::estimateMemoryUsageOrComputeAndUpdate, this, n, 0, _1); + double maxNumberCorrelations = this->params().s_CorrelationModelsOverhead * + static_cast(n); + auto memoryUsage = boost::bind(&CAnomalyDetectorModel::estimateMemoryUsageOrComputeAndUpdate, + this, n, 0, _1); CTimeSeriesCorrelateModelAllocator allocator( - resourceMonitor, memoryUsage, resourceLimit, static_cast(maxNumberCorrelations)); + resourceMonitor, memoryUsage, resourceLimit, + static_cast(maxNumberCorrelations)); for (auto& feature : m_FeatureCorrelatesModels) { allocator.prototypePrior(feature.s_ModelPrior); feature.s_Models->refresh(allocator); } } -void CIndividualModel::clearPrunedResources(const TSizeVec& people, const TSizeVec& /*attributes*/) { +void CIndividualModel::clearPrunedResources(const TSizeVec& people, + const TSizeVec& /*attributes*/) { for (auto pid : people) { for (auto& feature : m_FeatureModels) { feature.s_Models[pid].reset(this->tinyModel()); @@ -458,23 +513,28 @@ void CIndividualModel::clearPrunedResources(const TSizeVec& people, const TSizeV } } -double CIndividualModel::emptyBucketWeight(model_t::EFeature feature, std::size_t pid, core_t::TTime time) const { +double CIndividualModel::emptyBucketWeight(model_t::EFeature feature, + std::size_t pid, + core_t::TTime time) const { double result = 1.0; if (model_t::countsEmptyBuckets(feature)) { TOptionalUInt64 count = this->currentBucketCount(pid, time); if (!count || *count == 0) { double frequency = this->personFrequency(pid); - result = model_t::emptyBucketCountWeight(feature, frequency, this->params().s_CutoffToModelEmptyBuckets); + result = model_t::emptyBucketCountWeight( + feature, frequency, this->params().s_CutoffToModelEmptyBuckets); } } return result; } -double CIndividualModel::probabilityBucketEmpty(model_t::EFeature feature, std::size_t pid) const { +double CIndividualModel::probabilityBucketEmpty(model_t::EFeature feature, + std::size_t pid) const { double result = 0.0; if (model_t::countsEmptyBuckets(feature)) { double frequency = this->personFrequency(pid); - double emptyBucketWeight = model_t::emptyBucketCountWeight(feature, frequency, this->params().s_CutoffToModelEmptyBuckets); + double emptyBucketWeight = model_t::emptyBucketCountWeight( + feature, frequency, this->params().s_CutoffToModelEmptyBuckets); result = (1.0 - frequency) * (1.0 - emptyBucketWeight); } return result; @@ -485,9 +545,13 @@ const maths::CModel* CIndividualModel::model(model_t::EFeature feature, std::siz } maths::CModel* CIndividualModel::model(model_t::EFeature feature, std::size_t pid) { - auto i = std::find_if( - m_FeatureModels.begin(), m_FeatureModels.end(), [feature](const SFeatureModels& model) { return model.s_Feature == feature; }); - return i != m_FeatureModels.end() && pid < i->s_Models.size() ? i->s_Models[pid].get() : nullptr; + auto i = std::find_if(m_FeatureModels.begin(), m_FeatureModels.end(), + [feature](const SFeatureModels& model) { + return model.s_Feature == feature; + }); + return i != m_FeatureModels.end() && pid < i->s_Models.size() + ? i->s_Models[pid].get() + : nullptr; } void CIndividualModel::sampleCorrelateModels(const maths_t::TWeightStyleVec& weightStyles) { @@ -529,12 +593,15 @@ const CIndividualModel::TTimeVec& CIndividualModel::lastBucketTimes() const { } double CIndividualModel::derate(std::size_t pid, core_t::TTime time) const { - return std::max(1.0 - static_cast(time - m_FirstBucketTimes[pid]) / static_cast(3 * core::constants::WEEK), 0.0); + return std::max(1.0 - static_cast(time - m_FirstBucketTimes[pid]) / + static_cast(3 * core::constants::WEEK), + 0.0); } std::string CIndividualModel::printCurrentBucket() const { std::ostringstream result; - result << "[" << this->currentBucketStartTime() << "," << this->currentBucketStartTime() + this->bucketLength() << ")"; + result << "[" << this->currentBucketStartTime() << "," + << this->currentBucketStartTime() + this->bucketLength() << ")"; return result.str(); } diff --git a/lib/model/CInterimBucketCorrector.cc b/lib/model/CInterimBucketCorrector.cc index afa4702905..d4020e6b4f 100644 --- a/lib/model/CInterimBucketCorrector.cc +++ b/lib/model/CInterimBucketCorrector.cc @@ -26,7 +26,8 @@ const std::string COUNT_TREND_TAG("a"); const std::string COUNT_MEAN_TAG("b"); double decayRate(core_t::TTime bucketLength) { - return CAnomalyDetectorModelConfig::DEFAULT_DECAY_RATE * CAnomalyDetectorModelConfig::bucketNormalizationFactor(bucketLength); + return CAnomalyDetectorModelConfig::DEFAULT_DECAY_RATE * + CAnomalyDetectorModelConfig::bucketNormalizationFactor(bucketLength); } double trendDecayRate(core_t::TTime bucketLength) { @@ -35,11 +36,13 @@ double trendDecayRate(core_t::TTime bucketLength) { } CInterimBucketCorrector::CInterimBucketCorrector(core_t::TTime bucketLength) - : m_BucketLength(bucketLength), m_CountTrend(trendDecayRate(bucketLength), bucketLength, COMPONENT_SIZE) { + : m_BucketLength(bucketLength), + m_CountTrend(trendDecayRate(bucketLength), bucketLength, COMPONENT_SIZE) { } CInterimBucketCorrector::CInterimBucketCorrector(const CInterimBucketCorrector& other) - : m_BucketLength(other.m_BucketLength), m_CountTrend(other.m_CountTrend), m_CountMean(other.m_CountMean) { + : m_BucketLength(other.m_BucketLength), m_CountTrend(other.m_CountTrend), + m_CountMean(other.m_CountMean) { } core_t::TTime CInterimBucketCorrector::calcBucketMidPoint(core_t::TTime time) const { @@ -56,28 +59,39 @@ void CInterimBucketCorrector::update(core_t::TTime time, std::size_t bucketCount m_CountMean.add(bucketCount); } -double CInterimBucketCorrector::estimateBucketCompleteness(core_t::TTime time, std::size_t currentCount) const { +double CInterimBucketCorrector::estimateBucketCompleteness(core_t::TTime time, + std::size_t currentCount) const { core_t::TTime bucketMidPoint = this->calcBucketMidPoint(time); - double bucketCount = m_CountTrend.initialized() ? maths::CBasicStatistics::mean(m_CountTrend.value(bucketMidPoint)) - : maths::CBasicStatistics::mean(m_CountMean); - return bucketCount > 0.0 ? maths::CTools::truncate(static_cast(currentCount) / bucketCount, 0.0, 1.0) : 1.0; -} - -double CInterimBucketCorrector::corrections(core_t::TTime time, std::size_t currentCount, double mode, double value) const { + double bucketCount = m_CountTrend.initialized() + ? maths::CBasicStatistics::mean(m_CountTrend.value(bucketMidPoint)) + : maths::CBasicStatistics::mean(m_CountMean); + return bucketCount > 0.0 + ? maths::CTools::truncate( + static_cast(currentCount) / bucketCount, 0.0, 1.0) + : 1.0; +} + +double CInterimBucketCorrector::corrections(core_t::TTime time, + std::size_t currentCount, + double mode, + double value) const { double correction = (1.0 - this->estimateBucketCompleteness(time, currentCount)) * mode; - return maths::CTools::truncate(mode - value, std::min(0.0, correction), std::max(0.0, correction)); + return maths::CTools::truncate(mode - value, std::min(0.0, correction), + std::max(0.0, correction)); } -CInterimBucketCorrector::TDouble10Vec CInterimBucketCorrector::corrections(core_t::TTime time, - std::size_t currentCount, - const TDouble10Vec& modes, - const TDouble10Vec& values) const { +CInterimBucketCorrector::TDouble10Vec +CInterimBucketCorrector::corrections(core_t::TTime time, + std::size_t currentCount, + const TDouble10Vec& modes, + const TDouble10Vec& values) const { TDouble10Vec corrections(values.size(), 0.0); double incompleteBucketFraction = 1.0 - this->estimateBucketCompleteness(time, currentCount); double correction = 0.0; for (std::size_t i = 0; i < corrections.size(); ++i) { correction = incompleteBucketFraction * modes[i]; - corrections[i] = maths::CTools::truncate(modes[i] - values[i], std::min(0.0, correction), std::max(0.0, correction)); + corrections[i] = maths::CTools::truncate( + modes[i] - values[i], std::min(0.0, correction), std::max(0.0, correction)); } return corrections; } @@ -100,7 +114,8 @@ bool CInterimBucketCorrector::acceptRestoreTraverser(core::CStateRestoreTraverse do { const std::string& name = traverser.name(); if (name == COUNT_TREND_TAG) { - maths::SDistributionRestoreParams changeModelParams{maths_t::E_ContinuousData, decayRate(m_BucketLength)}; + maths::SDistributionRestoreParams changeModelParams{ + maths_t::E_ContinuousData, decayRate(m_BucketLength)}; maths::STimeSeriesDecompositionRestoreParams params{ trendDecayRate(m_BucketLength), m_BucketLength, COMPONENT_SIZE, changeModelParams}; maths::CTimeSeriesDecomposition restored(params, traverser); diff --git a/lib/model/CLimits.cc b/lib/model/CLimits.cc index 9aa9ed6e9b..4e6f4f516a 100644 --- a/lib/model/CLimits.cc +++ b/lib/model/CLimits.cc @@ -26,8 +26,7 @@ CLimits::CLimits() m_AnomalyMaxTimeBuckets(DEFAULT_ANOMALY_MAX_TIME_BUCKETS), m_MaxExamples(DEFAULT_RESULTS_MAX_EXAMPLES), m_UnusualProbabilityThreshold(DEFAULT_RESULTS_UNUSUAL_PROBABILITY_THRESHOLD), - m_MemoryLimitMB(CResourceMonitor::DEFAULT_MEMORY_LIMIT_MB), - m_ResourceMonitor() { + m_MemoryLimitMB(CResourceMonitor::DEFAULT_MEMORY_LIMIT_MB), m_ResourceMonitor() { } CLimits::~CLimits() { @@ -49,14 +48,17 @@ bool CLimits::init(const std::string& configFile) { return false; } - if (this->processSetting(propTree, "autoconfig.events", DEFAULT_AUTOCONFIG_EVENTS, m_AutoConfigEvents) == false || - this->processSetting(propTree, "anomaly.maxtimebuckets", DEFAULT_ANOMALY_MAX_TIME_BUCKETS, m_AnomalyMaxTimeBuckets) == false || - this->processSetting(propTree, "results.maxexamples", DEFAULT_RESULTS_MAX_EXAMPLES, m_MaxExamples) == false || - this->processSetting(propTree, - "results.unusualprobabilitythreshold", + if (this->processSetting(propTree, "autoconfig.events", + DEFAULT_AUTOCONFIG_EVENTS, m_AutoConfigEvents) == false || + this->processSetting(propTree, "anomaly.maxtimebuckets", DEFAULT_ANOMALY_MAX_TIME_BUCKETS, + m_AnomalyMaxTimeBuckets) == false || + this->processSetting(propTree, "results.maxexamples", + DEFAULT_RESULTS_MAX_EXAMPLES, m_MaxExamples) == false || + this->processSetting(propTree, "results.unusualprobabilitythreshold", DEFAULT_RESULTS_UNUSUAL_PROBABILITY_THRESHOLD, m_UnusualProbabilityThreshold) == false || - this->processSetting(propTree, "memory.modelmemorylimit", CResourceMonitor::DEFAULT_MEMORY_LIMIT_MB, m_MemoryLimitMB) == false) { + this->processSetting(propTree, "memory.modelmemorylimit", CResourceMonitor::DEFAULT_MEMORY_LIMIT_MB, + m_MemoryLimitMB) == false) { LOG_ERROR(<< "Error processing config file " << configFile); return false; } diff --git a/lib/model/CMemoryUsageEstimator.cc b/lib/model/CMemoryUsageEstimator.cc index d6a83daca6..4c6c0ece87 100644 --- a/lib/model/CMemoryUsageEstimator.cc +++ b/lib/model/CMemoryUsageEstimator.cc @@ -33,7 +33,8 @@ CMemoryUsageEstimator::CMemoryUsageEstimator() m_NumEstimatesSinceValue(MAXIMUM_ESTIMATES_BEFORE_NEW_VALUE - 1) { } -CMemoryUsageEstimator::TOptionalSize CMemoryUsageEstimator::estimate(const TSizeArray& predictors) { +CMemoryUsageEstimator::TOptionalSize +CMemoryUsageEstimator::estimate(const TSizeArray& predictors) { using TDoubleArray = boost::array; if (m_Values.size() < E_NumberPredictors) { @@ -54,8 +55,10 @@ CMemoryUsageEstimator::TOptionalSize CMemoryUsageEstimator::estimate(const TSize bool origin = true; for (std::size_t i = 0u; i < predictors.size(); ++i) { origin &= (predictors[i] == 0); - if (predictors[i] - static_cast(x0[i]) > this->maximumExtrapolation(static_cast(i))) { - LOG_TRACE(<< "Sample too big for variance of predictor(" << i << "): " << predictors[i] << " > " + if (predictors[i] - static_cast(x0[i]) > + this->maximumExtrapolation(static_cast(i))) { + LOG_TRACE(<< "Sample too big for variance of predictor(" << i + << "): " << predictors[i] << " > " << this->maximumExtrapolation(static_cast(i))); return TOptionalSize(); } @@ -75,11 +78,13 @@ CMemoryUsageEstimator::TOptionalSize CMemoryUsageEstimator::estimate(const TSize } y(i) = static_cast(m_Values[i].second) - c0; } - Eigen::MatrixXd theta = X.jacobiSvd(Eigen::ComputeThinU | Eigen::ComputeThinV).solve(y); + Eigen::MatrixXd theta = + X.jacobiSvd(Eigen::ComputeThinU | Eigen::ComputeThinV).solve(y); double predicted = c0; for (std::size_t i = 0u; i < E_NumberPredictors; ++i) { - predicted += std::max(theta(i), 0.0) * (static_cast(predictors[i]) - x0[i]); + predicted += std::max(theta(i), 0.0) * + (static_cast(predictors[i]) - x0[i]); } std::size_t mem = static_cast(predicted + 0.5); ++m_NumEstimatesSinceValue; @@ -89,7 +94,8 @@ CMemoryUsageEstimator::TOptionalSize CMemoryUsageEstimator::estimate(const TSize } void CMemoryUsageEstimator::addValue(const TSizeArray& predictors, std::size_t memory) { - LOG_TRACE(<< "Add Value for " << core::CContainerPrinter::print(predictors) << ": " << memory); + LOG_TRACE(<< "Add Value for " << core::CContainerPrinter::print(predictors) + << ": " << memory); m_NumEstimatesSinceValue = 0; @@ -100,7 +106,8 @@ void CMemoryUsageEstimator::addValue(const TSizeArray& predictors, std::size_t m for (std::size_t i = 0u; closestDistance > 0 && i < m_Values.size(); ++i) { std::size_t distance = 0u; for (std::size_t j = 0u; j < predictors.size(); ++j) { - distance += std::max(m_Values[i].first[j], predictors[j]) - std::min(m_Values[i].first[j], predictors[j]); + distance += std::max(m_Values[i].first[j], predictors[j]) - + std::min(m_Values[i].first[j], predictors[j]); } if (distance < closestDistance) { closest = i; diff --git a/lib/model/CMetricBucketGatherer.cc b/lib/model/CMetricBucketGatherer.cc index fd4d92a4f1..4c2a49f040 100644 --- a/lib/model/CMetricBucketGatherer.cc +++ b/lib/model/CMetricBucketGatherer.cc @@ -43,26 +43,36 @@ using TSizeVec = std::vector; using TStrVec = std::vector; using TStrCRef = boost::reference_wrapper; using TStrCRefStrCRefPr = std::pair; -using TStrCRefStrCRefPrUInt64Map = std::map; +using TStrCRefStrCRefPrUInt64Map = + std::map; using TSampleVec = std::vector; using TSizeMeanGathererUMap = boost::unordered_map; using TSizeSizeMeanGathererUMapUMap = boost::unordered_map; -using TSizeMedianGathererUMap = boost::unordered_map; +using TSizeMedianGathererUMap = + boost::unordered_map; using TSizeSizeMedianGathererUMapUMap = boost::unordered_map; using TSizeMinGathererUMap = boost::unordered_map; using TSizeSizeMinGathererUMapUMap = boost::unordered_map; using TSizeMaxGathererUMap = boost::unordered_map; using TSizeSizeMaxGathererUMapUMap = boost::unordered_map; -using TSizeVarianceGathererUMap = boost::unordered_map; -using TSizeSizeVarianceGathererUMapUMap = boost::unordered_map; +using TSizeVarianceGathererUMap = + boost::unordered_map; +using TSizeSizeVarianceGathererUMapUMap = + boost::unordered_map; using TSizeSumGathererUMap = boost::unordered_map; using TSizeSizeSumGathererUMapUMap = boost::unordered_map; -using TSizeMultivariateMeanGathererUMap = boost::unordered_map; -using TSizeSizeMultivariateMeanGathererUMapUMap = boost::unordered_map; -using TSizeMultivariateMinGathererUMap = boost::unordered_map; -using TSizeSizeMultivariateMinGathererUMapUMap = boost::unordered_map; -using TSizeMultivariateMaxGathererUMap = boost::unordered_map; -using TSizeSizeMultivariateMaxGathererUMapUMap = boost::unordered_map; +using TSizeMultivariateMeanGathererUMap = + boost::unordered_map; +using TSizeSizeMultivariateMeanGathererUMapUMap = + boost::unordered_map; +using TSizeMultivariateMinGathererUMap = + boost::unordered_map; +using TSizeSizeMultivariateMinGathererUMapUMap = + boost::unordered_map; +using TSizeMultivariateMaxGathererUMap = + boost::unordered_map; +using TSizeSizeMultivariateMaxGathererUMapUMap = + boost::unordered_map; using TSizeFeatureDataPr = std::pair; using TSizeFeatureDataPrVec = std::vector; using TSizeSizePrFeatureDataPr = std::pair; @@ -253,13 +263,16 @@ void initializeFeatureDataInstance(std::size_t dimension, TCategorySizePrAnyMap& class CPersistFeatureData { public: template - void operator()(const TCategorySizePr& category, const TSizeSizeTUMapUMap& data, core::CStatePersistInserter& inserter) const { + void operator()(const TCategorySizePr& category, + const TSizeSizeTUMapUMap& data, + core::CStatePersistInserter& inserter) const { if (data.empty()) { inserter.insertValue(this->tagName(category), EMPTY_STRING); return; } - inserter.insertLevel(this->tagName(category), boost::bind(SDoPersist(), boost::cref(data), _1)); + inserter.insertLevel(this->tagName(category), + boost::bind(SDoPersist(), boost::cref(data), _1)); } private: @@ -278,35 +291,44 @@ class CPersistFeatureData { case model_t::E_Sum: return SUM_TAG; case model_t::E_MultivariateMean: - return MULTIVARIATE_MEAN_TAG + core::CStringUtils::typeToString(category.second); + return MULTIVARIATE_MEAN_TAG + + core::CStringUtils::typeToString(category.second); case model_t::E_MultivariateMin: - return MULTIVARIATE_MIN_TAG + core::CStringUtils::typeToString(category.second); + return MULTIVARIATE_MIN_TAG + + core::CStringUtils::typeToString(category.second); case model_t::E_MultivariateMax: - return MULTIVARIATE_MAX_TAG + core::CStringUtils::typeToString(category.second); + return MULTIVARIATE_MAX_TAG + + core::CStringUtils::typeToString(category.second); } return EMPTY_STRING; } struct SDoPersist { template - void operator()(const TSizeSizeTUMapUMap& data, core::CStatePersistInserter& inserter) const { + void operator()(const TSizeSizeTUMapUMap& data, + core::CStatePersistInserter& inserter) const { using TSizeSizeTUMapUMapCItr = typename TSizeSizeTUMapUMap::const_iterator; std::vector dataItrs; dataItrs.reserve(data.size()); for (auto i = data.cbegin(); i != data.cend(); ++i) { dataItrs.push_back(i); } - std::sort(dataItrs.begin(), dataItrs.end(), [](TSizeSizeTUMapUMapCItr lhs, TSizeSizeTUMapUMapCItr rhs) { - return lhs->first < rhs->first; - }); + std::sort(dataItrs.begin(), dataItrs.end(), + [](TSizeSizeTUMapUMapCItr lhs, TSizeSizeTUMapUMapCItr rhs) { + return lhs->first < rhs->first; + }); for (auto itr : dataItrs) { - inserter.insertLevel(ATTRIBUTE_TAG, boost::bind(SDoPersist(), itr->first, boost::cref(itr->second), _1)); + inserter.insertLevel(ATTRIBUTE_TAG, + boost::bind(SDoPersist(), itr->first, + boost::cref(itr->second), _1)); } } template - void operator()(std::size_t cid, const TSizeTUMap& pidMap, core::CStatePersistInserter& inserter) const { + void operator()(std::size_t cid, + const TSizeTUMap& pidMap, + core::CStatePersistInserter& inserter) const { inserter.insertValue(ATTRIBUTE_TAG, cid); using TSizeTUMapCItr = typename TSizeTUMap::const_iterator; @@ -315,17 +337,23 @@ class CPersistFeatureData { for (auto i = pidMap.cbegin(); i != pidMap.cend(); ++i) { pidItrs.push_back(i); } - std::sort(pidItrs.begin(), pidItrs.end(), [](TSizeTUMapCItr lhs, TSizeTUMapCItr rhs) { return lhs->first < rhs->first; }); + std::sort(pidItrs.begin(), pidItrs.end(), + [](TSizeTUMapCItr lhs, TSizeTUMapCItr rhs) { + return lhs->first < rhs->first; + }); for (auto itr : pidItrs) { - inserter.insertLevel(PERSON_TAG, boost::bind(SDoPersist(), itr->first, boost::cref(itr->second), _1)); + inserter.insertLevel( + PERSON_TAG, boost::bind(SDoPersist(), itr->first, + boost::cref(itr->second), _1)); } } template void operator()(std::size_t pid, const T& data, core::CStatePersistInserter& inserter) const { inserter.insertValue(PERSON_TAG, pid); - inserter.insertLevel(DATA_TAG, boost::bind(&T::acceptPersistInserter, &data, _1)); + inserter.insertLevel( + DATA_TAG, boost::bind(&T::acceptPersistInserter, &data, _1)); } }; }; @@ -362,9 +390,11 @@ class CRestoreFeatureData { } if (isNewVersion) { - return traverser.traverseSubLevel(boost::bind(CDoNewRestore(dimension), _1, boost::cref(gatherer), boost::ref(data))); + return traverser.traverseSubLevel(boost::bind( + CDoNewRestore(dimension), _1, boost::cref(gatherer), boost::ref(data))); } else { - return traverser.traverseSubLevel(boost::bind(CDoOldRestore(dimension), _1, boost::cref(gatherer), boost::ref(data))); + return traverser.traverseSubLevel(boost::bind( + CDoOldRestore(dimension), _1, boost::cref(gatherer), boost::ref(data))); } } @@ -374,13 +404,15 @@ class CRestoreFeatureData { CDoNewRestore(std::size_t dimension) : m_Dimension(dimension) {} template - bool - operator()(core::CStateRestoreTraverser& traverser, const CMetricBucketGatherer& gatherer, TSizeSizeTUMapUMap& result) const { + bool operator()(core::CStateRestoreTraverser& traverser, + const CMetricBucketGatherer& gatherer, + TSizeSizeTUMapUMap& result) const { do { const std::string& name = traverser.name(); if (name == ATTRIBUTE_TAG) { if (traverser.traverseSubLevel(boost::bind( - &CDoNewRestore::restoreAttributes, this, _1, boost::cref(gatherer), boost::ref(result))) == false) { + &CDoNewRestore::restoreAttributes, this, _1, + boost::cref(gatherer), boost::ref(result))) == false) { LOG_ERROR(<< "Invalid data in " << traverser.value()); return false; } @@ -408,11 +440,13 @@ class CRestoreFeatureData { result[lastCid] = TSizeTUMap(1); } else if (name == PERSON_TAG) { if (!seenCid) { - LOG_ERROR(<< "Incorrect format - person before attribute ID in " << traverser.value()); + LOG_ERROR(<< "Incorrect format - person before attribute ID in " + << traverser.value()); return false; } if (traverser.traverseSubLevel(boost::bind( - &CDoNewRestore::restorePeople, this, _1, boost::cref(gatherer), boost::ref(result[lastCid]))) == false) { + &CDoNewRestore::restorePeople, this, _1, + boost::cref(gatherer), boost::ref(result[lastCid]))) == false) { LOG_ERROR(<< "Invalid data in " << traverser.value()); return false; } @@ -423,7 +457,9 @@ class CRestoreFeatureData { } template - bool restorePeople(core::CStateRestoreTraverser& traverser, const CMetricBucketGatherer& gatherer, TSizeTUMap& result) const { + bool restorePeople(core::CStateRestoreTraverser& traverser, + const CMetricBucketGatherer& gatherer, + TSizeTUMap& result) const { std::size_t lastPid(0); bool seenPid(false); @@ -437,16 +473,16 @@ class CRestoreFeatureData { seenPid = true; } else if (name == DATA_TAG) { if (!seenPid) { - LOG_ERROR(<< "Incorrect format - data before person ID in " << traverser.value()); + LOG_ERROR(<< "Incorrect format - data before person ID in " + << traverser.value()); return false; } - T initial(gatherer.dataGatherer().params(), - m_Dimension, + T initial(gatherer.dataGatherer().params(), m_Dimension, gatherer.currentBucketStartTime(), - gatherer.bucketLength(), - gatherer.beginInfluencers(), + gatherer.bucketLength(), gatherer.beginInfluencers(), gatherer.endInfluencers()); - if (traverser.traverseSubLevel(boost::bind(&T::acceptRestoreTraverser, &initial, _1)) == false) { + if (traverser.traverseSubLevel(boost::bind( + &T::acceptRestoreTraverser, &initial, _1)) == false) { LOG_ERROR(<< "Invalid data in " << traverser.value()); return false; } @@ -467,8 +503,9 @@ class CRestoreFeatureData { CDoOldRestore(std::size_t dimension) : m_Dimension(dimension) {} template - bool - operator()(core::CStateRestoreTraverser& traverser, const CMetricBucketGatherer& gatherer, TSizeSizeTUMapUMap& result) const { + bool operator()(core::CStateRestoreTraverser& traverser, + const CMetricBucketGatherer& gatherer, + TSizeSizeTUMapUMap& result) const { bool isPopulation = gatherer.dataGatherer().isPopulation(); if (isPopulation) { this->restorePopulation(traverser, gatherer, result); @@ -486,13 +523,12 @@ class CRestoreFeatureData { do { const std::string& name = traverser.name(); if (name == DATA_TAG) { - T initial(gatherer.dataGatherer().params(), - m_Dimension, + T initial(gatherer.dataGatherer().params(), m_Dimension, gatherer.currentBucketStartTime(), - gatherer.bucketLength(), - gatherer.beginInfluencers(), + gatherer.bucketLength(), gatherer.beginInfluencers(), gatherer.endInfluencers()); - if (traverser.traverseSubLevel(boost::bind(&T::acceptRestoreTraverser, &initial, _1)) == false) { + if (traverser.traverseSubLevel(boost::bind( + &T::acceptRestoreTraverser, &initial, _1)) == false) { LOG_ERROR(<< "Invalid data in " << traverser.value()); return false; } @@ -523,17 +559,17 @@ class CRestoreFeatureData { seenCid = true; } else if (name == DATA_TAG) { if (!seenCid) { - LOG_ERROR(<< "Incorrect format - data before attribute ID in " << traverser.value()); + LOG_ERROR(<< "Incorrect format - data before attribute ID in " + << traverser.value()); return false; } - T initial(gatherer.dataGatherer().params(), - m_Dimension, + T initial(gatherer.dataGatherer().params(), m_Dimension, gatherer.currentBucketStartTime(), - gatherer.bucketLength(), - gatherer.beginInfluencers(), + gatherer.bucketLength(), gatherer.beginInfluencers(), gatherer.endInfluencers()); - if (traverser.traverseSubLevel(boost::bind(&T::acceptRestoreTraverser, &initial, _1)) == false) { + if (traverser.traverseSubLevel(boost::bind( + &T::acceptRestoreTraverser, &initial, _1)) == false) { LOG_ERROR(<< "Invalid data in " << traverser.value()); return false; } @@ -556,7 +592,10 @@ class CRestoreFeatureData { struct SRemovePeople { public: template - void operator()(const TCategorySizePr& /*category*/, TSizeSizeTUMapUMap& data, std::size_t begin, std::size_t end) const { + void operator()(const TCategorySizePr& /*category*/, + TSizeSizeTUMapUMap& data, + std::size_t begin, + std::size_t end) const { for (auto& cidEntry : data) { for (std::size_t pid = begin; pid < end; ++pid) { cidEntry.second.erase(pid); @@ -565,7 +604,9 @@ struct SRemovePeople { } template - void operator()(const TCategorySizePr& /*category*/, TSizeSizeTUMapUMap& data, const TSizeVec& peopleToRemove) const { + void operator()(const TCategorySizePr& /*category*/, + TSizeSizeTUMapUMap& data, + const TSizeVec& peopleToRemove) const { for (auto& cidEntry : data) { for (auto pid : peopleToRemove) { cidEntry.second.erase(pid); @@ -577,14 +618,19 @@ struct SRemovePeople { //! Removes attributes from the data gatherers. struct SRemoveAttributes { template - void operator()(const TCategorySizePr& /*category*/, TSizeSizeTUMapUMap& data, const TSizeVec& attributesToRemove) const { + void operator()(const TCategorySizePr& /*category*/, + TSizeSizeTUMapUMap& data, + const TSizeVec& attributesToRemove) const { for (auto cid : attributesToRemove) { data.erase(cid); } } template - void operator()(const TCategorySizePr& /*category*/, TSizeSizeTUMapUMap& data, std::size_t begin, std::size_t end) const { + void operator()(const TCategorySizePr& /*category*/, + TSizeSizeTUMapUMap& data, + std::size_t begin, + std::size_t end) const { for (std::size_t cid = begin; cid < end; ++cid) { data.erase(cid); } @@ -606,12 +652,14 @@ struct SDoSample { std::size_t activeId = gatherer.dataGatherer().isPopulation() ? cid : pid; auto cidEntry = data.find(cid); if (cidEntry == data.end()) { - LOG_ERROR(<< "No gatherer for attribute " << gatherer.dataGatherer().attributeName(cid) << " of person " + LOG_ERROR(<< "No gatherer for attribute " + << gatherer.dataGatherer().attributeName(cid) << " of person " << gatherer.dataGatherer().personName(pid)); } else { auto pidEntry = cidEntry->second.find(pid); if (pidEntry == cidEntry->second.end()) { - LOG_ERROR(<< "No gatherer for attribute " << gatherer.dataGatherer().attributeName(cid) << " of person " + LOG_ERROR(<< "No gatherer for attribute " + << gatherer.dataGatherer().attributeName(cid) << " of person " << gatherer.dataGatherer().personName(pid)); } else if (pidEntry->second.sample(time, sampleCounts->count(activeId))) { sampleCounts->updateSampleVariance(activeId); @@ -636,7 +684,8 @@ struct SHash { for (const auto& pidEntry : cidEntry.second) { std::size_t pid = pidEntry.first; if (gatherer.dataGatherer().isPersonActive(pid)) { - TStrCRef pidName = TStrCRef(gatherer.dataGatherer().personName(pid)); + TStrCRef pidName = + TStrCRef(gatherer.dataGatherer().personName(pid)); hashes.emplace(std::piecewise_construct, std::forward_as_tuple(cidName, pidName), std::forward_as_tuple(pidEntry.second.checksum())); @@ -664,20 +713,14 @@ struct SExtractFeatureData { TFeatureAnyPrVec& result) const { if (gatherer.dataGatherer().isPopulation()) { result.emplace_back(feature, TSizeSizePrFeatureDataPrVec()); - this->featureData(data, - gatherer, - time, - bucketLength, - this->isSum(feature), - *boost::unsafe_any_cast(&result.back().second)); + this->featureData(data, gatherer, time, bucketLength, this->isSum(feature), + *boost::unsafe_any_cast( + &result.back().second)); } else { result.emplace_back(feature, TSizeFeatureDataPrVec()); - this->featureData(data, - gatherer, - time, - bucketLength, - this->isSum(feature), - *boost::unsafe_any_cast(&result.back().second)); + this->featureData( + data, gatherer, time, bucketLength, this->isSum(feature), + *boost::unsafe_any_cast(&result.back().second)); } } @@ -686,7 +729,8 @@ struct SExtractFeatureData { private: bool isSum(model_t::EFeature feature) const { - return feature == model_t::E_IndividualSumByBucketAndPerson || feature == model_t::E_IndividualLowSumByBucketAndPerson || + return feature == model_t::E_IndividualSumByBucketAndPerson || + feature == model_t::E_IndividualLowSumByBucketAndPerson || feature == model_t::E_IndividualHighSumByBucketAndPerson; } @@ -704,9 +748,11 @@ struct SExtractFeatureData { result.reserve(pidMap.size()); for (auto& pidEntry : pidMap) { std::size_t pid = pidEntry.first; - if (gatherer.hasExplicitNullsOnly(time, pid, model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID) == false) { - this->featureData( - pidEntry.second, gatherer, pid, model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID, time, bucketLength, result); + if (gatherer.hasExplicitNullsOnly( + time, pid, model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID) == false) { + this->featureData(pidEntry.second, gatherer, pid, + model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID, + time, bucketLength, result); } } } @@ -717,17 +763,20 @@ struct SExtractFeatureData { std::size_t cid = CDataGatherer::extractAttributeId(count); auto cidEntry = data.find(cid); if (cidEntry == data.end()) { - LOG_ERROR(<< "No gatherers for attribute " << gatherer.dataGatherer().attributeName(cid)); + LOG_ERROR(<< "No gatherers for attribute " + << gatherer.dataGatherer().attributeName(cid)); continue; } std::size_t pid = CDataGatherer::extractPersonId(count); auto pidEntry = cidEntry->second.find(pid); if (pidEntry == cidEntry->second.end()) { - LOG_ERROR(<< "No gatherers for person " << gatherer.dataGatherer().personName(pid)); + LOG_ERROR(<< "No gatherers for person " + << gatherer.dataGatherer().personName(pid)); continue; } - this->featureData(pidEntry->second, gatherer, pid, cid, time, bucketLength, result); + this->featureData(pidEntry->second, gatherer, pid, cid, time, + bucketLength, result); } } std::sort(result.begin(), result.end(), maths::COrderings::SFirstLess()); @@ -742,7 +791,9 @@ struct SExtractFeatureData { core_t::TTime time, core_t::TTime bucketLength, TSizeFeatureDataPrVec& result) const { - result.emplace_back(pid, this->featureData(data, time, bucketLength, gatherer.dataGatherer().effectiveSampleCount(pid))); + result.emplace_back( + pid, this->featureData(data, time, bucketLength, + gatherer.dataGatherer().effectiveSampleCount(pid))); } //! Population model specialization @@ -754,8 +805,10 @@ struct SExtractFeatureData { core_t::TTime time, core_t::TTime bucketLength, TSizeSizePrFeatureDataPrVec& result) const { - result.emplace_back(TSizeSizePr(pid, cid), - this->featureData(data, time, bucketLength, gatherer.dataGatherer().effectiveSampleCount(cid))); + result.emplace_back( + TSizeSizePr(pid, cid), + this->featureData(data, time, bucketLength, + gatherer.dataGatherer().effectiveSampleCount(cid))); } SMetricFeatureData featureData(const CGathererTools::CSumGatherer& data, @@ -766,13 +819,16 @@ struct SExtractFeatureData { } template - inline SMetricFeatureData - featureData(const T& data, core_t::TTime time, core_t::TTime bucketLength, double effectiveSampleCount) const { + inline SMetricFeatureData featureData(const T& data, + core_t::TTime time, + core_t::TTime bucketLength, + double effectiveSampleCount) const { return data.featureData(time, bucketLength, effectiveSampleCount); } }; -const TSampleVec SExtractFeatureData::ZERO_SAMPLE(1, CSample(0, TDoubleVec(1, 0.0), 1.0, 1.0)); +const TSampleVec + SExtractFeatureData::ZERO_SAMPLE(1, CSample(0, TDoubleVec(1, 0.0), 1.0, 1.0)); //! Adds a value to the specified data gatherers. struct SAddValue { @@ -791,17 +847,17 @@ struct SAddValue { std::size_t cid, const CMetricBucketGatherer& gatherer, const SStatistic& stat) const { - auto& entry = data[cid] - .emplace(boost::unordered::piecewise_construct, - boost::make_tuple(pid), - boost::make_tuple(boost::cref(gatherer.dataGatherer().params()), - category.second, - gatherer.currentBucketStartTime(), - gatherer.bucketLength(), - gatherer.beginInfluencers(), - gatherer.endInfluencers())) - .first->second; - entry.add(stat.s_Time, (*stat.s_Values)[category.first], stat.s_Count, stat.s_SampleCount, *stat.s_Influences); + auto& entry = + data[cid] + .emplace(boost::unordered::piecewise_construct, boost::make_tuple(pid), + boost::make_tuple( + boost::cref(gatherer.dataGatherer().params()), + category.second, gatherer.currentBucketStartTime(), + gatherer.bucketLength(), gatherer.beginInfluencers(), + gatherer.endInfluencers())) + .first->second; + entry.add(stat.s_Time, (*stat.s_Values)[category.first], stat.s_Count, + stat.s_SampleCount, *stat.s_Influences); } }; @@ -809,7 +865,9 @@ struct SAddValue { struct SStartNewBucket { public: template - void operator()(const TCategorySizePr& /*category*/, TSizeSizeTUMapUMap& data, core_t::TTime time) const { + void operator()(const TCategorySizePr& /*category*/, + TSizeSizeTUMapUMap& data, + core_t::TTime time) const { for (auto& cidEntry : data) { for (auto& pidEntry : cidEntry.second) { pidEntry.second.startNewBucket(time); @@ -822,7 +880,9 @@ struct SStartNewBucket { struct SResetBucket { public: template - void operator()(const TCategorySizePr& /*category*/, TSizeSizeTUMapUMap& data, core_t::TTime bucketStart) const { + void operator()(const TCategorySizePr& /*category*/, + TSizeSizeTUMapUMap& data, + core_t::TTime bucketStart) const { for (auto& cidEntry : data) { for (auto& pidEntry : cidEntry.second) { pidEntry.second.resetBucket(bucketStart); @@ -835,7 +895,9 @@ struct SResetBucket { struct SReleaseMemory { public: template - void operator()(const TCategorySizePr& /*category*/, TSizeSizeTUMapUMap& data, core_t::TTime samplingCutoffTime) const { + void operator()(const TCategorySizePr& /*category*/, + TSizeSizeTUMapUMap& data, + core_t::TTime samplingCutoffTime) const { for (auto& cidEntry : data) { auto& pidMap = cidEntry.second; for (auto i = pidMap.begin(); i != pidMap.end(); /**/) { @@ -858,7 +920,8 @@ CMetricBucketGatherer::CMetricBucketGatherer(CDataGatherer& dataGatherer, const std::string& valueFieldName, const TStrVec& influenceFieldNames, core_t::TTime startTime) - : CBucketGatherer(dataGatherer, startTime), m_ValueFieldName(valueFieldName), m_BeginInfluencingFields(0), m_BeginValueFields(0) { + : CBucketGatherer(dataGatherer, startTime), m_ValueFieldName(valueFieldName), + m_BeginInfluencingFields(0), m_BeginValueFields(0) { this->initializeFieldNamesPart1(personFieldName, attributeFieldName, influenceFieldNames); this->initializeFieldNamesPart2(valueFieldName, summaryCountFieldName); this->initializeFeatureData(); @@ -871,28 +934,31 @@ CMetricBucketGatherer::CMetricBucketGatherer(CDataGatherer& dataGatherer, const std::string& valueFieldName, const TStrVec& influenceFieldNames, core::CStateRestoreTraverser& traverser) - : CBucketGatherer(dataGatherer, 0), m_ValueFieldName(valueFieldName), m_BeginValueFields(0) { + : CBucketGatherer(dataGatherer, 0), m_ValueFieldName(valueFieldName), + m_BeginValueFields(0) { this->initializeFieldNamesPart1(personFieldName, attributeFieldName, influenceFieldNames); - traverser.traverseSubLevel(boost::bind(&CMetricBucketGatherer::acceptRestoreTraverser, this, _1)); + traverser.traverseSubLevel( + boost::bind(&CMetricBucketGatherer::acceptRestoreTraverser, this, _1)); this->initializeFieldNamesPart2(valueFieldName, summaryCountFieldName); } -CMetricBucketGatherer::CMetricBucketGatherer(bool isForPersistence, const CMetricBucketGatherer& other) +CMetricBucketGatherer::CMetricBucketGatherer(bool isForPersistence, + const CMetricBucketGatherer& other) : CBucketGatherer(isForPersistence, other), m_ValueFieldName(other.m_ValueFieldName), - m_FieldNames(other.m_FieldNames), - m_BeginInfluencingFields(0), - m_BeginValueFields(0), - m_FeatureData(other.m_FeatureData) { + m_FieldNames(other.m_FieldNames), m_BeginInfluencingFields(0), + m_BeginValueFields(0), m_FeatureData(other.m_FeatureData) { if (!isForPersistence) { LOG_ABORT(<< "This constructor only creates clones for persistence"); } } void CMetricBucketGatherer::acceptPersistInserter(core::CStatePersistInserter& inserter) const { - inserter.insertLevel(BASE_TAG, boost::bind(&CBucketGatherer::baseAcceptPersistInserter, this, _1)); + inserter.insertLevel( + BASE_TAG, boost::bind(&CBucketGatherer::baseAcceptPersistInserter, this, _1)); inserter.insertValue(VERSION_TAG, CURRENT_VERSION); - apply(m_FeatureData, boost::bind(CPersistFeatureData(), _1, _2, boost::ref(inserter))); + apply(m_FeatureData, + boost::bind(CPersistFeatureData(), _1, _2, boost::ref(inserter))); } bool CMetricBucketGatherer::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { @@ -902,7 +968,8 @@ bool CMetricBucketGatherer::acceptRestoreTraverser(core::CStateRestoreTraverser& do { const std::string& name = traverser.name(); if (name == BASE_TAG) { - if (traverser.traverseSubLevel(boost::bind(&CBucketGatherer::baseAcceptRestoreTraverser, this, _1)) == false) { + if (traverser.traverseSubLevel(boost::bind( + &CBucketGatherer::baseAcceptRestoreTraverser, this, _1)) == false) { LOG_ERROR(<< "Invalid data gatherer in " << traverser.value()); return false; } @@ -920,7 +987,8 @@ bool CMetricBucketGatherer::acceptRestoreTraverser(core::CStateRestoreTraverser& return true; } -bool CMetricBucketGatherer::acceptRestoreTraverserInternal(core::CStateRestoreTraverser& traverser, bool isCurrentVersion) { +bool CMetricBucketGatherer::acceptRestoreTraverserInternal(core::CStateRestoreTraverser& traverser, + bool isCurrentVersion) { const std::string& name = traverser.name(); if (name == MEAN_TAG) { CRestoreFeatureData restore; @@ -960,7 +1028,8 @@ bool CMetricBucketGatherer::acceptRestoreTraverserInternal(core::CStateRestoreTr } } else if (name.find(MULTIVARIATE_MEAN_TAG) != std::string::npos) { std::size_t dimension; - if (core::CStringUtils::stringToType(name.substr(MULTIVARIATE_MEAN_TAG.length()), dimension) == false) { + if (core::CStringUtils::stringToType( + name.substr(MULTIVARIATE_MEAN_TAG.length()), dimension) == false) { LOG_ERROR(<< "Invalid dimension in " << name); return false; } @@ -971,7 +1040,8 @@ bool CMetricBucketGatherer::acceptRestoreTraverserInternal(core::CStateRestoreTr } } else if (name.find(MULTIVARIATE_MIN_TAG) != std::string::npos) { std::size_t dimension; - if (core::CStringUtils::stringToType(name.substr(MULTIVARIATE_MIN_TAG.length()), dimension) == false) { + if (core::CStringUtils::stringToType( + name.substr(MULTIVARIATE_MIN_TAG.length()), dimension) == false) { LOG_ERROR(<< "Invalid dimension in " << name); return false; } @@ -982,7 +1052,8 @@ bool CMetricBucketGatherer::acceptRestoreTraverserInternal(core::CStateRestoreTr } } else if (name.find(MULTIVARIATE_MAX_TAG) != std::string::npos) { std::size_t dimension; - if (core::CStringUtils::stringToType(name.substr(MULTIVARIATE_MAX_TAG.length()), dimension) == false) { + if (core::CStringUtils::stringToType( + name.substr(MULTIVARIATE_MAX_TAG.length()), dimension) == false) { LOG_ERROR(<< "Invalid dimension in " << name); return false; } @@ -1029,15 +1100,19 @@ const TStrVec& CMetricBucketGatherer::fieldsOfInterest() const { } std::string CMetricBucketGatherer::description() const { - return function_t::name(function_t::function(m_DataGatherer.features())) + (m_ValueFieldName.empty() ? "" : " ") + m_ValueFieldName + + return function_t::name(function_t::function(m_DataGatherer.features())) + + (m_ValueFieldName.empty() ? "" : " ") + m_ValueFieldName + +(byField(m_DataGatherer.isPopulation(), m_FieldNames).empty() ? "" : " by ") + byField(m_DataGatherer.isPopulation(), m_FieldNames) + (overField(m_DataGatherer.isPopulation(), m_FieldNames).empty() ? "" : " over ") + - overField(m_DataGatherer.isPopulation(), m_FieldNames) + (m_DataGatherer.partitionFieldName().empty() ? "" : " partition=") + + overField(m_DataGatherer.isPopulation(), m_FieldNames) + + (m_DataGatherer.partitionFieldName().empty() ? "" : " partition=") + m_DataGatherer.partitionFieldName(); } -bool CMetricBucketGatherer::processFields(const TStrCPtrVec& fieldValues, CEventData& result, CResourceMonitor& resourceMonitor) { +bool CMetricBucketGatherer::processFields(const TStrCPtrVec& fieldValues, + CEventData& result, + CResourceMonitor& resourceMonitor) { using TOptionalStr = boost::optional; if (fieldValues.size() != m_FieldNames.size()) { @@ -1046,7 +1121,9 @@ bool CMetricBucketGatherer::processFields(const TStrCPtrVec& fieldValues, CEvent return false; } - const std::string* person = (fieldValues[0] == nullptr && m_DataGatherer.useNull()) ? &EMPTY_STRING : fieldValues[0]; + const std::string* person = (fieldValues[0] == nullptr && m_DataGatherer.useNull()) + ? &EMPTY_STRING + : fieldValues[0]; if (person == nullptr) { // Just ignore: the "person" field wasn't present in the // record. Since all models in an aggregate share this @@ -1067,7 +1144,8 @@ bool CMetricBucketGatherer::processFields(const TStrCPtrVec& fieldValues, CEvent if (m_DataGatherer.summaryMode() != model_t::E_None) { CEventData::TDouble1VecArraySizePr statistics; statistics.first.fill(TDouble1Vec(1, 0.0)); - if (m_DataGatherer.extractCountFromField(m_FieldNames[i], fieldValues[i], statistics.second) == false) { + if (m_DataGatherer.extractCountFromField(m_FieldNames[i], fieldValues[i], + statistics.second) == false) { result.addValue(); return true; } @@ -1075,19 +1153,21 @@ bool CMetricBucketGatherer::processFields(const TStrCPtrVec& fieldValues, CEvent bool allOk = true; if (m_FieldNames.size() > statistics.first.size() + i) { - LOG_ERROR(<< "Inconsistency - more statistic field names than allowed " << m_FieldNames.size() - i << " > " - << statistics.first.size()); + LOG_ERROR(<< "Inconsistency - more statistic field names than allowed " + << m_FieldNames.size() - i << " > " << statistics.first.size()); allOk = false; } if (m_FieldNames.size() > m_FieldMetricCategories.size() + i) { - LOG_ERROR(<< "Inconsistency - more statistic field names than metric categories " << m_FieldNames.size() - i << " > " + LOG_ERROR(<< "Inconsistency - more statistic field names than metric categories " + << m_FieldNames.size() - i << " > " << m_FieldMetricCategories.size()); allOk = false; } for (std::size_t j = 0u; allOk && i < m_FieldNames.size(); ++i, ++j) { model_t::EMetricCategory category = m_FieldMetricCategories[j]; if (fieldValues[i] == nullptr || - m_DataGatherer.extractMetricFromField(m_FieldNames[i], *fieldValues[i], statistics.first[category]) == false) { + m_DataGatherer.extractMetricFromField( + m_FieldNames[i], *fieldValues[i], statistics.first[category]) == false) { allOk = false; } } @@ -1102,7 +1182,9 @@ bool CMetricBucketGatherer::processFields(const TStrCPtrVec& fieldValues, CEvent } } else { TDouble1Vec value; - if (fieldValues[i] != nullptr && m_DataGatherer.extractMetricFromField(m_FieldNames[i], *fieldValues[i], value) == true) { + if (fieldValues[i] != nullptr && + m_DataGatherer.extractMetricFromField(m_FieldNames[i], *fieldValues[i], + value) == true) { result.addValue(value); } else { result.addValue(); @@ -1124,10 +1206,12 @@ bool CMetricBucketGatherer::processFields(const TStrCPtrVec& fieldValues, CEvent return false; } if (addedPerson) { - resourceMonitor.addExtraMemory(m_DataGatherer.isPopulation() ? CDataGatherer::ESTIMATED_MEM_USAGE_PER_OVER_FIELD - : CDataGatherer::ESTIMATED_MEM_USAGE_PER_BY_FIELD); - (m_DataGatherer.isPopulation() ? core::CStatistics::stat(stat_t::E_NumberOverFields) - : core::CStatistics::stat(stat_t::E_NumberByFields)) + resourceMonitor.addExtraMemory(m_DataGatherer.isPopulation() + ? CDataGatherer::ESTIMATED_MEM_USAGE_PER_OVER_FIELD + : CDataGatherer::ESTIMATED_MEM_USAGE_PER_BY_FIELD); + (m_DataGatherer.isPopulation() + ? core::CStatistics::stat(stat_t::E_NumberOverFields) + : core::CStatistics::stat(stat_t::E_NumberByFields)) .increment(); } @@ -1136,7 +1220,9 @@ bool CMetricBucketGatherer::processFields(const TStrCPtrVec& fieldValues, CEvent return false; } - const std::string* attribute = (fieldValues[1] == nullptr && m_DataGatherer.useNull()) ? &EMPTY_STRING : fieldValues[1]; + const std::string* attribute = + (fieldValues[1] == nullptr && m_DataGatherer.useNull()) ? &EMPTY_STRING + : fieldValues[1]; if (m_DataGatherer.isPopulation()) { if (attribute == nullptr) { @@ -1177,13 +1263,15 @@ void CMetricBucketGatherer::recyclePeople(const TSizeVec& peopleToRemove) { return; } - apply(m_FeatureData, boost::bind(SRemovePeople(), _1, _2, boost::cref(peopleToRemove))); + apply(m_FeatureData, + boost::bind(SRemovePeople(), _1, _2, boost::cref(peopleToRemove))); this->CBucketGatherer::recyclePeople(peopleToRemove); } void CMetricBucketGatherer::removePeople(std::size_t lowestPersonToRemove) { - apply(m_FeatureData, boost::bind(SRemovePeople(), _1, _2, lowestPersonToRemove, m_DataGatherer.numberPeople())); + apply(m_FeatureData, boost::bind(SRemovePeople(), _1, _2, lowestPersonToRemove, + m_DataGatherer.numberPeople())); this->CBucketGatherer::removePeople(lowestPersonToRemove); } @@ -1194,7 +1282,8 @@ void CMetricBucketGatherer::recycleAttributes(const TSizeVec& attributesToRemove } if (m_DataGatherer.isPopulation()) { - apply(m_FeatureData, boost::bind(SRemoveAttributes(), _1, _2, boost::cref(attributesToRemove))); + apply(m_FeatureData, boost::bind(SRemoveAttributes(), _1, _2, + boost::cref(attributesToRemove))); } this->CBucketGatherer::recycleAttributes(attributesToRemove); @@ -1202,7 +1291,8 @@ void CMetricBucketGatherer::recycleAttributes(const TSizeVec& attributesToRemove void CMetricBucketGatherer::removeAttributes(std::size_t lowestAttributeToRemove) { if (m_DataGatherer.isPopulation()) { - apply(m_FeatureData, boost::bind(SRemoveAttributes(), _1, _2, lowestAttributeToRemove, m_DataGatherer.numberAttributes())); + apply(m_FeatureData, boost::bind(SRemoveAttributes(), _1, _2, lowestAttributeToRemove, + m_DataGatherer.numberAttributes())); } this->CBucketGatherer::removeAttributes(lowestAttributeToRemove); @@ -1212,7 +1302,8 @@ uint64_t CMetricBucketGatherer::checksum() const { uint64_t seed = this->CBucketGatherer::checksum(); seed = maths::CChecksum::calculate(seed, m_DataGatherer.params().s_DecayRate); TStrCRefStrCRefPrUInt64Map hashes; - apply(m_FeatureData, boost::bind(SHash(), _1, _2, boost::cref(*this), boost::ref(hashes))); + apply(m_FeatureData, + boost::bind(SHash(), _1, _2, boost::cref(*this), boost::ref(hashes))); LOG_TRACE(<< "seed = " << seed); LOG_TRACE(<< "hashes = " << core::CContainerPrinter::print(hashes)); return maths::CChecksum::calculate(seed, hashes); @@ -1262,16 +1353,20 @@ void CMetricBucketGatherer::releaseMemory(core_t::TTime samplingCutoffTime) { void CMetricBucketGatherer::sample(core_t::TTime time) { if (m_DataGatherer.sampleCounts()) { - apply(m_FeatureData, boost::bind(SDoSample(), _1, _2, time, boost::cref(*this), m_DataGatherer.sampleCounts())); + apply(m_FeatureData, boost::bind(SDoSample(), _1, _2, time, boost::cref(*this), + m_DataGatherer.sampleCounts())); } // Merge smallest bucket into longer buckets, if they exist this->CBucketGatherer::sample(time); } -void CMetricBucketGatherer::featureData(core_t::TTime time, core_t::TTime bucketLength, TFeatureAnyPrVec& result) const { +void CMetricBucketGatherer::featureData(core_t::TTime time, + core_t::TTime bucketLength, + TFeatureAnyPrVec& result) const { result.clear(); - if (!this->dataAvailable(time) || time >= this->currentBucketStartTime() + this->bucketLength()) { + if (!this->dataAvailable(time) || + time >= this->currentBucketStartTime() + this->bucketLength()) { LOG_DEBUG(<< "No data available at " << time); return; } @@ -1285,10 +1380,10 @@ void CMetricBucketGatherer::featureData(core_t::TTime time, core_t::TTime bucket if (begin != m_FeatureData.end()) { auto end = begin; ++end; - apply( - begin, - end, - boost::bind(SExtractFeatureData(), _1, _2, boost::cref(*this), feature, time, bucketLength, boost::ref(result))); + apply(begin, end, + boost::bind(SExtractFeatureData(), _1, _2, + boost::cref(*this), feature, time, + bucketLength, boost::ref(result))); } else { LOG_ERROR(<< "No data for category " << model_t::print(category)); } @@ -1321,14 +1416,16 @@ void CMetricBucketGatherer::addValue(std::size_t pid, stat.s_Values = &values; stat.s_Count = static_cast(count); if (m_DataGatherer.sampleCounts()) { - stat.s_SampleCount = m_DataGatherer.sampleCounts()->count(m_DataGatherer.isPopulation() ? cid : pid); + stat.s_SampleCount = m_DataGatherer.sampleCounts()->count( + m_DataGatherer.isPopulation() ? cid : pid); } else { LOG_ERROR(<< "Invalid sample counts for gatherer"); stat.s_SampleCount = 0.0; } stat.s_Influences = &influences; - apply(m_FeatureData, boost::bind(SAddValue(), _1, _2, pid, cid, boost::cref(*this), boost::ref(stat))); + apply(m_FeatureData, boost::bind(SAddValue(), _1, _2, pid, cid, + boost::cref(*this), boost::ref(stat))); } void CMetricBucketGatherer::startNewBucket(core_t::TTime time, bool skipUpdates) { @@ -1343,13 +1440,16 @@ void CMetricBucketGatherer::startNewBucket(core_t::TTime time, bool skipUpdates) core_t::TTime earliestAvailableBucketStartTime = this->earliestBucketStartTime(); if (this->dataAvailable(earliestAvailableBucketStartTime)) { TSizeUInt64VecUMap counts; - const TSizeSizePrUInt64UMap& counts_ = this->bucketCounts(earliestAvailableBucketStartTime); + const TSizeSizePrUInt64UMap& counts_ = + this->bucketCounts(earliestAvailableBucketStartTime); for (const auto& count : counts_) { if (m_DataGatherer.isPopulation()) { - counts[CDataGatherer::extractAttributeId(count)].push_back(CDataGatherer::extractData(count)); + counts[CDataGatherer::extractAttributeId(count)].push_back( + CDataGatherer::extractData(count)); } else { - counts.emplace(CDataGatherer::extractPersonId(count), TUInt64Vec{0}).first->second[0] += - CDataGatherer::extractData(count); + counts + .emplace(CDataGatherer::extractPersonId(count), TUInt64Vec{0}) + .first->second[0] += CDataGatherer::extractData(count); } } double alpha = std::exp(-m_DataGatherer.params().s_DecayRate); @@ -1357,9 +1457,12 @@ void CMetricBucketGatherer::startNewBucket(core_t::TTime time, bool skipUpdates) for (auto& count : counts) { std::sort(count.second.begin(), count.second.end()); std::size_t n = count.second.size() / 2; - double median = count.second.size() % 2 == 0 ? static_cast(count.second[n - 1] + count.second[n]) / 2.0 - : static_cast(count.second[n]); - m_DataGatherer.sampleCounts()->updateMeanNonZeroBucketCount(count.first, median, alpha); + double median = + count.second.size() % 2 == 0 + ? static_cast(count.second[n - 1] + count.second[n]) / 2.0 + : static_cast(count.second[n]); + m_DataGatherer.sampleCounts()->updateMeanNonZeroBucketCount( + count.first, median, alpha); } m_DataGatherer.sampleCounts()->refresh(m_DataGatherer); } @@ -1372,27 +1475,32 @@ void CMetricBucketGatherer::initializeFieldNamesPart1(const std::string& personF const TStrVec& influenceFieldNames) { switch (m_DataGatherer.summaryMode()) { case model_t::E_None: - m_FieldNames.reserve(2 + static_cast(m_DataGatherer.isPopulation()) + influenceFieldNames.size()); + m_FieldNames.reserve(2 + static_cast(m_DataGatherer.isPopulation()) + + influenceFieldNames.size()); m_FieldNames.push_back(personFieldName); if (m_DataGatherer.isPopulation()) m_FieldNames.push_back(attributeFieldName); m_BeginInfluencingFields = m_FieldNames.size(); - m_FieldNames.insert(m_FieldNames.end(), influenceFieldNames.begin(), influenceFieldNames.end()); + m_FieldNames.insert(m_FieldNames.end(), influenceFieldNames.begin(), + influenceFieldNames.end()); m_BeginValueFields = m_FieldNames.size(); break; case model_t::E_Manual: - m_FieldNames.reserve(3 + static_cast(m_DataGatherer.isPopulation()) + influenceFieldNames.size()); + m_FieldNames.reserve(3 + static_cast(m_DataGatherer.isPopulation()) + + influenceFieldNames.size()); m_FieldNames.push_back(personFieldName); if (m_DataGatherer.isPopulation()) m_FieldNames.push_back(attributeFieldName); m_BeginInfluencingFields = m_FieldNames.size(); - m_FieldNames.insert(m_FieldNames.end(), influenceFieldNames.begin(), influenceFieldNames.end()); + m_FieldNames.insert(m_FieldNames.end(), influenceFieldNames.begin(), + influenceFieldNames.end()); m_BeginValueFields = m_FieldNames.size(); break; }; } -void CMetricBucketGatherer::initializeFieldNamesPart2(const std::string& valueFieldName, const std::string& summaryCountFieldName) { +void CMetricBucketGatherer::initializeFieldNamesPart2(const std::string& valueFieldName, + const std::string& summaryCountFieldName) { switch (m_DataGatherer.summaryMode()) { case model_t::E_None: m_FieldNames.push_back(valueFieldName); @@ -1441,7 +1549,8 @@ void CMetricBucketGatherer::initializeFeatureData() { break; } } else { - LOG_ERROR(<< "Unexpected feature = " << model_t::print(m_DataGatherer.feature(i))); + LOG_ERROR(<< "Unexpected feature = " + << model_t::print(m_DataGatherer.feature(i))); } } } diff --git a/lib/model/CMetricModel.cc b/lib/model/CMetricModel.cc index c9b81e1c2b..abb9a7690d 100644 --- a/lib/model/CMetricModel.cc +++ b/lib/model/CMetricModel.cc @@ -59,11 +59,11 @@ using TBool2Vec = core::CSmallVector; // We use short field names to reduce the state size const std::string INDIVIDUAL_STATE_TAG("a"); -const maths_t::TWeightStyleVec SAMPLE_WEIGHT_STYLES{maths_t::E_SampleCountWeight, - maths_t::E_SampleWinsorisationWeight, - maths_t::E_SampleCountVarianceScaleWeight}; -const maths_t::TWeightStyleVec PROBABILITY_WEIGHT_STYLES{maths_t::E_SampleSeasonalVarianceScaleWeight, - maths_t::E_SampleCountVarianceScaleWeight}; +const maths_t::TWeightStyleVec SAMPLE_WEIGHT_STYLES{ + maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight, + maths_t::E_SampleCountVarianceScaleWeight}; +const maths_t::TWeightStyleVec PROBABILITY_WEIGHT_STYLES{ + maths_t::E_SampleSeasonalVarianceScaleWeight, maths_t::E_SampleCountVarianceScaleWeight}; } CMetricModel::CMetricModel(const SModelParams& params, @@ -108,14 +108,16 @@ CMetricModel::CMetricModel(bool isForPersistence, const CMetricModel& other) } void CMetricModel::acceptPersistInserter(core::CStatePersistInserter& inserter) const { - inserter.insertLevel(INDIVIDUAL_STATE_TAG, boost::bind(&CMetricModel::doAcceptPersistInserter, this, _1)); + inserter.insertLevel(INDIVIDUAL_STATE_TAG, + boost::bind(&CMetricModel::doAcceptPersistInserter, this, _1)); } bool CMetricModel::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { do { const std::string& name = traverser.name(); if (name == INDIVIDUAL_STATE_TAG) { - if (traverser.traverseSubLevel(boost::bind(&CMetricModel::doAcceptRestoreTraverser, this, _1)) == false) { + if (traverser.traverseSubLevel(boost::bind( + &CMetricModel::doAcceptRestoreTraverser, this, _1)) == false) { // Logging handled already. return false; } @@ -142,15 +144,18 @@ bool CMetricModel::isMetric() const { } void CMetricModel::currentBucketPersonIds(core_t::TTime time, TSizeVec& result) const { - this->CIndividualModel::currentBucketPersonIds(time, m_CurrentBucketStats.s_FeatureData, result); + this->CIndividualModel::currentBucketPersonIds( + time, m_CurrentBucketStats.s_FeatureData, result); } CMetricModel::TOptionalDouble CMetricModel::baselineBucketCount(const std::size_t /*pid*/) const { return TOptionalDouble(); } -CMetricModel::TDouble1Vec -CMetricModel::currentBucketValue(model_t::EFeature feature, std::size_t pid, std::size_t /*cid*/, core_t::TTime time) const { +CMetricModel::TDouble1Vec CMetricModel::currentBucketValue(model_t::EFeature feature, + std::size_t pid, + std::size_t /*cid*/, + core_t::TTime time) const { const TFeatureData* data = this->featureData(feature, pid, time); if (data) { const TOptionalSample& value = data->s_BucketValue; @@ -171,19 +176,25 @@ CMetricModel::TDouble1Vec CMetricModel::baselineBucketMean(model_t::EFeature fea } static const TSizeDoublePr1Vec NO_CORRELATED; TDouble1Vec result(model->predict(time, type.isUnconditional() ? NO_CORRELATED : correlated)); - this->correctBaselineForInterim(feature, pid, type, correlated, this->currentBucketInterimCorrections(), result); + this->correctBaselineForInterim(feature, pid, type, correlated, + this->currentBucketInterimCorrections(), result); TDouble1VecDouble1VecPr support = model_t::support(feature); return maths::CTools::truncate(result, support.first, support.second); } -void CMetricModel::sampleBucketStatistics(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) { +void CMetricModel::sampleBucketStatistics(core_t::TTime startTime, + core_t::TTime endTime, + CResourceMonitor& resourceMonitor) { this->createUpdateNewModels(startTime, resourceMonitor); m_CurrentBucketStats.s_InterimCorrections.clear(); this->CIndividualModel::sampleBucketStatistics( - startTime, endTime, this->personFilter(), m_CurrentBucketStats.s_FeatureData, resourceMonitor); + startTime, endTime, this->personFilter(), + m_CurrentBucketStats.s_FeatureData, resourceMonitor); } -void CMetricModel::sample(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) { +void CMetricModel::sample(core_t::TTime startTime, + core_t::TTime endTime, + CResourceMonitor& resourceMonitor) { CDataGatherer& gatherer = this->dataGatherer(); core_t::TTime bucketLength = gatherer.bucketLength(); @@ -220,7 +231,8 @@ void CMetricModel::sample(core_t::TTime startTime, core_t::TTime endTime, CResou model_t::EFeature feature = featureData.first; TSizeFeatureDataPrVec& data = featureData.second; std::size_t dimension = model_t::dimension(feature); - LOG_TRACE(<< model_t::print(feature) << " data = " << core::CContainerPrinter::print(data)); + LOG_TRACE(<< model_t::print(feature) + << " data = " << core::CContainerPrinter::print(data)); this->applyFilter(model_t::E_XF_By, true, this->personFilter(), data); for (const auto& data_ : data) { @@ -234,16 +246,17 @@ void CMetricModel::sample(core_t::TTime startTime, core_t::TTime endTime, CResou } core_t::TTime sampleTime = model_t::sampleTime(feature, time, bucketLength); - if (this->shouldIgnoreSample(feature, pid, model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID, sampleTime)) { + if (this->shouldIgnoreSample(feature, pid, model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID, + sampleTime)) { model->skipTime(time - lastBucketTimesMap[pid]); continue; } const TOptionalSample& bucket = data_.second.s_BucketValue; if (model_t::isSampled(feature) && bucket) { - values.assign(1, - core::make_triple( - bucket->time(), TDouble2Vec(bucket->value(dimension)), model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID)); + values.assign(1, core::make_triple( + bucket->time(), TDouble2Vec(bucket->value(dimension)), + model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID)); model->addBucketValue(values); } @@ -253,17 +266,25 @@ void CMetricModel::sample(core_t::TTime startTime, core_t::TTime endTime, CResou } double derate = this->derate(pid, sampleTime); - double interval = (1.0 + (this->params().s_InitialDecayRateMultiplier - 1.0) * derate) * emptyBucketWeight; - double count = this->params().s_MaximumUpdatesPerBucket > 0.0 && samples.size() > 0 - ? this->params().s_MaximumUpdatesPerBucket / static_cast(samples.size()) + double interval = + (1.0 + (this->params().s_InitialDecayRateMultiplier - 1.0) * derate) * + emptyBucketWeight; + double count = this->params().s_MaximumUpdatesPerBucket > 0.0 && + samples.size() > 0 + ? this->params().s_MaximumUpdatesPerBucket / + static_cast(samples.size()) : 1.0; - LOG_TRACE(<< "Bucket = " << gatherer.printCurrentBucket(time) << ", feature = " << model_t::print(feature) - << ", samples = " << core::CContainerPrinter::print(samples) << ", isInteger = " << data_.second.s_IsInteger - << ", person = " << this->personName(pid) << ", count weight = " << count << ", dimension = " << dimension + LOG_TRACE(<< "Bucket = " << gatherer.printCurrentBucket(time) + << ", feature = " << model_t::print(feature) + << ", samples = " << core::CContainerPrinter::print(samples) + << ", isInteger = " << data_.second.s_IsInteger + << ", person = " << this->personName(pid) + << ", count weight = " << count << ", dimension = " << dimension << ", empty bucket weight = " << emptyBucketWeight); - model->params().probabilityBucketEmpty(this->probabilityBucketEmpty(feature, pid)); + model->params().probabilityBucketEmpty( + this->probabilityBucketEmpty(feature, pid)); values.resize(samples.size()); trendWeights.resize(samples.size(), TDouble2Vec4Vec(3)); @@ -273,11 +294,14 @@ void CMetricModel::sample(core_t::TTime startTime, core_t::TTime endTime, CResou TDouble2Vec vi(samples[i].value(dimension)); double vs = samples[i].varianceScale(); values[i] = core::make_triple( - model_t::sampleTime(feature, time, bucketLength, ti), vi, model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID); - trendWeights[i][0].assign(dimension, emptyBucketWeight * count * this->learnRate(feature) / vs); + model_t::sampleTime(feature, time, bucketLength, ti), + vi, model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID); + trendWeights[i][0].assign(dimension, emptyBucketWeight * count * + this->learnRate(feature) / vs); trendWeights[i][1] = model->winsorisationWeight(derate, ti, vi); trendWeights[i][2].assign(dimension, vs); - priorWeights[i][0].assign(dimension, emptyBucketWeight * count * this->learnRate(feature)); + priorWeights[i][0].assign(dimension, emptyBucketWeight * count * + this->learnRate(feature)); priorWeights[i][1] = trendWeights[i][1]; priorWeights[i][2].assign(dimension, vs); } @@ -337,11 +361,9 @@ bool CMetricModel::computeProbability(const std::size_t pid, continue; } const TOptionalSample& bucket = data->s_BucketValue; - if (this->shouldIgnoreResult(feature, - result.s_ResultType, - pid, - model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID, - model_t::sampleTime(feature, startTime, bucketLength, bucket->time()))) { + if (this->shouldIgnoreResult( + feature, result.s_ResultType, pid, model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID, + model_t::sampleTime(feature, startTime, bucketLength, bucket->time()))) { continue; } @@ -351,11 +373,13 @@ bool CMetricModel::computeProbability(const std::size_t pid, CProbabilityAndInfluenceCalculator::SCorrelateParams params(partitioningFields); TStrCRefDouble1VecDouble1VecPrPrVecVecVec influenceValues; this->fill(feature, pid, startTime, result.isInterim(), params, influenceValues); - this->addProbabilityAndInfluences(pid, params, influenceValues, pJoint, resultBuilder); + this->addProbabilityAndInfluences(pid, params, influenceValues, + pJoint, resultBuilder); } else { CProbabilityAndInfluenceCalculator::SParams params(partitioningFields); this->fill(feature, pid, startTime, result.isInterim(), params); - this->addProbabilityAndInfluences(pid, params, data->s_InfluenceValues, pJoint, resultBuilder); + this->addProbabilityAndInfluences(pid, params, data->s_InfluenceValues, + pJoint, resultBuilder); } } @@ -386,13 +410,15 @@ uint64_t CMetricModel::checksum(bool includeCurrentBucketStats) const { TStrCRefUInt64Map hashes; if (includeCurrentBucketStats) { - const TFeatureSizeFeatureDataPrVecPrVec& featureData = m_CurrentBucketStats.s_FeatureData; + const TFeatureSizeFeatureDataPrVecPrVec& featureData = + m_CurrentBucketStats.s_FeatureData; for (std::size_t i = 0u; i < featureData.size(); ++i) { for (std::size_t j = 0u; j < featureData[i].second.size(); ++j) { uint64_t& hash = hashes[KEY(featureData[i].second[j].first)]; const TFeatureData& data = featureData[i].second[j].second; hash = maths::CChecksum::calculate(hash, data.s_BucketValue); - hash = core::CHashing::hashCombine(hash, static_cast(data.s_IsInteger)); + hash = core::CHashing::hashCombine( + hash, static_cast(data.s_IsInteger)); hash = maths::CChecksum::calculate(hash, data.s_Samples); } } @@ -409,9 +435,12 @@ uint64_t CMetricModel::checksum(bool includeCurrentBucketStats) const { void CMetricModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CMetricModel"); this->CIndividualModel::debugMemoryUsage(mem->addChild()); - core::CMemoryDebug::dynamicSize("m_CurrentBucketStats.s_PersonCounts", m_CurrentBucketStats.s_PersonCounts, mem); - core::CMemoryDebug::dynamicSize("m_CurrentBucketStats.s_FeatureData", m_CurrentBucketStats.s_FeatureData, mem); - core::CMemoryDebug::dynamicSize("m_CurrentBucketStats.s_InterimCorrections", m_CurrentBucketStats.s_InterimCorrections, mem); + core::CMemoryDebug::dynamicSize("m_CurrentBucketStats.s_PersonCounts", + m_CurrentBucketStats.s_PersonCounts, mem); + core::CMemoryDebug::dynamicSize("m_CurrentBucketStats.s_FeatureData", + m_CurrentBucketStats.s_FeatureData, mem); + core::CMemoryDebug::dynamicSize("m_CurrentBucketStats.s_InterimCorrections", + m_CurrentBucketStats.s_InterimCorrections, mem); } std::size_t CMetricModel::memoryUsage() const { @@ -434,8 +463,10 @@ CMetricModel::CModelDetailsViewPtr CMetricModel::details() const { return CModelDetailsViewPtr(new CMetricModelDetailsView(*this)); } -const CMetricModel::TFeatureData* CMetricModel::featureData(model_t::EFeature feature, std::size_t pid, core_t::TTime time) const { - return this->CIndividualModel::featureData(feature, pid, time, m_CurrentBucketStats.s_FeatureData); +const CMetricModel::TFeatureData* +CMetricModel::featureData(model_t::EFeature feature, std::size_t pid, core_t::TTime time) const { + return this->CIndividualModel::featureData(feature, pid, time, + m_CurrentBucketStats.s_FeatureData); } void CMetricModel::createNewModels(std::size_t n, std::size_t m) { @@ -452,7 +483,8 @@ void CMetricModel::clearPrunedResources(const TSizeVec& people, const TSizeVec& // Stop collecting for these people and add them to the free list. gatherer.recyclePeople(people); if (gatherer.dataAvailable(m_CurrentBucketStats.s_StartTime)) { - gatherer.featureData(m_CurrentBucketStats.s_StartTime, gatherer.bucketLength(), m_CurrentBucketStats.s_FeatureData); + gatherer.featureData(m_CurrentBucketStats.s_StartTime, gatherer.bucketLength(), + m_CurrentBucketStats.s_FeatureData); } this->CIndividualModel::clearPrunedResources(people, attributes); @@ -470,7 +502,8 @@ uint64_t CMetricModel::currentBucketTotalCount() const { return m_CurrentBucketStats.s_TotalCount; } -CIndividualModel::TFeatureSizeSizeTripleDouble1VecUMap& CMetricModel::currentBucketInterimCorrections() const { +CIndividualModel::TFeatureSizeSizeTripleDouble1VecUMap& +CMetricModel::currentBucketInterimCorrections() const { return m_CurrentBucketStats.s_InterimCorrections; } @@ -493,7 +526,8 @@ bool CMetricModel::correlates(model_t::EFeature feature, std::size_t pid, core_t const maths::CModel* model{this->model(feature, pid)}; for (const auto& correlate : model->correlates()) { - if (this->featureData(feature, pid == correlate[0] ? correlate[1] : correlate[0], time)) { + if (this->featureData( + feature, pid == correlate[0] ? correlate[1] : correlate[0], time)) { return true; } } @@ -509,7 +543,8 @@ void CMetricModel::fill(model_t::EFeature feature, const TFeatureData* data{this->featureData(feature, pid, bucketTime)}; const TOptionalSample& bucket{data->s_BucketValue}; const maths::CModel* model{this->model(feature, pid)}; - core_t::TTime time{model_t::sampleTime(feature, bucketTime, this->bucketLength(), bucket->time())}; + core_t::TTime time{model_t::sampleTime(feature, bucketTime, + this->bucketLength(), bucket->time())}; TDouble2Vec4Vec weights(2); weights[0] = model->seasonalWeight(maths::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL, time); weights[1].assign(dimension, bucket->varianceScale()); @@ -522,13 +557,15 @@ void CMetricModel::fill(model_t::EFeature feature, params.s_Value.assign(1, bucket->value()); if (interim && model_t::requiresInterimResultAdjustment(feature)) { TDouble2Vec mode(params.s_Model->mode(time, PROBABILITY_WEIGHT_STYLES, weights)); - TDouble2Vec correction( - this->interimValueCorrector().corrections(time, this->currentBucketTotalCount(), mode, bucket->value(dimension))); + TDouble2Vec correction(this->interimValueCorrector().corrections( + time, this->currentBucketTotalCount(), mode, bucket->value(dimension))); params.s_Value[0] += correction; - this->currentBucketInterimCorrections().emplace(core::make_triple(feature, pid, pid), correction); + this->currentBucketInterimCorrections().emplace( + core::make_triple(feature, pid, pid), correction); } params.s_Count = bucket->count(); - params.s_ComputeProbabilityParams.addCalculation(model_t::probabilityCalculation(feature)) + params.s_ComputeProbabilityParams + .addCalculation(model_t::probabilityCalculation(feature)) .weightStyles(PROBABILITY_WEIGHT_STYLES) .addBucketEmpty(TBool2Vec(1, !count || *count == 0)) .addWeights(weights); @@ -557,27 +594,34 @@ void CMetricModel::fill(model_t::EFeature feature, params.s_Variables.resize(correlates.size()); params.s_CorrelatedLabels.resize(correlates.size()); params.s_Correlated.resize(correlates.size()); - params.s_ComputeProbabilityParams.addCalculation(model_t::probabilityCalculation(feature)).weightStyles(PROBABILITY_WEIGHT_STYLES); + params.s_ComputeProbabilityParams + .addCalculation(model_t::probabilityCalculation(feature)) + .weightStyles(PROBABILITY_WEIGHT_STYLES); // These are indexed as follows: // influenceValues["influencer name"]["correlate"]["influence value"] // This is because we aren't guaranteed that each influence is present for // each feature. - influenceValues.resize(this->featureData(feature, pid, bucketTime)->s_InfluenceValues.size(), - TStrCRefDouble1VecDouble1VecPrPrVecVec(correlates.size())); + influenceValues.resize( + this->featureData(feature, pid, bucketTime)->s_InfluenceValues.size(), + TStrCRefDouble1VecDouble1VecPrPrVecVec(correlates.size())); // Declared outside the loop to minimize the number of times it is created. TDouble1VecDouble1VecPr value; for (std::size_t i = 0u; i < correlates.size(); ++i) { TSize2Vec variables(pid == correlates[i][0] ? TSize2Vec{0, 1} : TSize2Vec{1, 0}); - params.s_CorrelatedLabels[i] = gatherer.personNamePtr(correlates[i][variables[1]]); + params.s_CorrelatedLabels[i] = + gatherer.personNamePtr(correlates[i][variables[1]]); params.s_Correlated[i] = correlates[i][variables[1]]; params.s_Variables[i] = variables; - const maths::CModel* models[]{model, this->model(feature, correlates[i][variables[1]])}; + const maths::CModel* models[]{ + model, this->model(feature, correlates[i][variables[1]])}; TDouble2Vec4Vec weight(2, TDouble2Vec(2, 1.0)); - weight[0][variables[0]] = models[0]->seasonalWeight(maths::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL, bucketTime)[0]; - weight[0][variables[1]] = models[1]->seasonalWeight(maths::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL, bucketTime)[0]; + weight[0][variables[0]] = models[0]->seasonalWeight( + maths::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL, bucketTime)[0]; + weight[0][variables[1]] = models[1]->seasonalWeight( + maths::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL, bucketTime)[0]; const TFeatureData* data[2]; data[0] = this->featureData(feature, correlates[i][0], bucketTime); @@ -585,10 +629,13 @@ void CMetricModel::fill(model_t::EFeature feature, if (data[0] && data[1] && data[0]->s_BucketValue && data[1]->s_BucketValue) { const TOptionalSample& bucket0{data[0]->s_BucketValue}; const TOptionalSample& bucket1{data[1]->s_BucketValue}; - core_t::TTime times[] = {model_t::sampleTime(feature, bucketTime, bucketLength, bucket0->time()), - model_t::sampleTime(feature, bucketTime, bucketLength, bucket1->time())}; - params.s_ElapsedTime = std::min(params.s_ElapsedTime, times[0] - firstBucketTimes[correlates[i][0]]); - params.s_ElapsedTime = std::min(params.s_ElapsedTime, times[1] - firstBucketTimes[correlates[i][1]]); + core_t::TTime times[] = { + model_t::sampleTime(feature, bucketTime, bucketLength, bucket0->time()), + model_t::sampleTime(feature, bucketTime, bucketLength, bucket1->time())}; + params.s_ElapsedTime = std::min( + params.s_ElapsedTime, times[0] - firstBucketTimes[correlates[i][0]]); + params.s_ElapsedTime = std::min( + params.s_ElapsedTime, times[1] - firstBucketTimes[correlates[i][1]]); params.s_Times[i] = TTime2Vec{times[0], times[1]}; params.s_Values[i].resize(2 * bucket0->value().size()); for (std::size_t j = 0u; j < bucket0->value().size(); ++j) { @@ -600,15 +647,17 @@ void CMetricModel::fill(model_t::EFeature feature, for (std::size_t j = 0u; j < data[0]->s_InfluenceValues.size(); ++j) { for (const auto& influenceValue : data[0]->s_InfluenceValues[j]) { TStrCRef influence = influenceValue.first; - std::size_t match = static_cast(std::find_if(data[1]->s_InfluenceValues[j].begin(), - data[1]->s_InfluenceValues[j].end(), - [influence](const TStrCRefDouble1VecDoublePrPr& value_) { - return value_.first.get() == influence.get(); - }) - - data[1]->s_InfluenceValues[j].begin()); + std::size_t match = static_cast( + std::find_if(data[1]->s_InfluenceValues[j].begin(), + data[1]->s_InfluenceValues[j].end(), + [influence](const TStrCRefDouble1VecDoublePrPr& value_) { + return value_.first.get() == influence.get(); + }) - + data[1]->s_InfluenceValues[j].begin()); if (match < data[1]->s_InfluenceValues[j].size()) { const TDouble1VecDoublePr& value0 = influenceValue.second; - const TDouble1VecDoublePr& value1 = data[1]->s_InfluenceValues[j][match].second; + const TDouble1VecDoublePr& value1 = + data[1]->s_InfluenceValues[j][match].second; value.first.resize(2 * value0.first.size()); for (std::size_t k = 0u; k < value0.first.size(); ++k) { value.first[2 * k + 0] = value0.first[k]; @@ -623,21 +672,26 @@ void CMetricModel::fill(model_t::EFeature feature, TOptionalUInt64 count[2]; count[0] = this->currentBucketCount(correlates[i][0], bucketTime); count[1] = this->currentBucketCount(correlates[i][1], bucketTime); - params.s_ComputeProbabilityParams.addBucketEmpty(TBool2Vec{!count[0] || *count[0] == 0, !count[1] || *count[1] == 0}) + params.s_ComputeProbabilityParams + .addBucketEmpty(TBool2Vec{!count[0] || *count[0] == 0, + !count[1] || *count[1] == 0}) .addWeights(weight); } if (interim && model_t::requiresInterimResultAdjustment(feature)) { core_t::TTime time{bucketTime + bucketLength / 2}; - TDouble2Vec1Vec modes(params.s_Model->correlateModes(time, PROBABILITY_WEIGHT_STYLES, params.s_ComputeProbabilityParams.weights())); + TDouble2Vec1Vec modes(params.s_Model->correlateModes( + time, PROBABILITY_WEIGHT_STYLES, params.s_ComputeProbabilityParams.weights())); for (std::size_t i = 0u; i < modes.size(); ++i) { if (!params.s_Values.empty()) { TDouble2Vec value_{params.s_Values[i][0], params.s_Values[i][1]}; - TDouble2Vec correction(this->interimValueCorrector().corrections(time, this->currentBucketTotalCount(), modes[i], value_)); + TDouble2Vec correction(this->interimValueCorrector().corrections( + time, this->currentBucketTotalCount(), modes[i], value_)); for (std::size_t j = 0u; j < 2; ++j) { params.s_Values[i][j] += correction[j]; } - this->currentBucketInterimCorrections().emplace(core::make_triple(feature, pid, params.s_Correlated[i]), - TDouble1Vec(1, correction[params.s_Variables[i][0]])); + this->currentBucketInterimCorrections().emplace( + core::make_triple(feature, pid, params.s_Correlated[i]), + TDouble1Vec(1, correction[params.s_Variables[i][0]])); } } } @@ -646,7 +700,8 @@ void CMetricModel::fill(model_t::EFeature feature, ////////// CMetricModel::SBucketStats Implementation ////////// CMetricModel::SBucketStats::SBucketStats(core_t::TTime startTime) - : s_StartTime(startTime), s_PersonCounts(), s_TotalCount(0), s_FeatureData(), s_InterimCorrections(1) { + : s_StartTime(startTime), s_PersonCounts(), s_TotalCount(0), + s_FeatureData(), s_InterimCorrections(1) { } } } diff --git a/lib/model/CMetricModelFactory.cc b/lib/model/CMetricModelFactory.cc index d52d96c3eb..6e18939479 100644 --- a/lib/model/CMetricModelFactory.cc +++ b/lib/model/CMetricModelFactory.cc @@ -29,11 +29,8 @@ namespace model { CMetricModelFactory::CMetricModelFactory(const SModelParams& params, model_t::ESummaryMode summaryMode, const std::string& summaryCountFieldName) - : CModelFactory(params), - m_Identifier(), - m_SummaryMode(summaryMode), - m_SummaryCountFieldName(summaryCountFieldName), - m_UseNull(false), + : CModelFactory(params), m_Identifier(), m_SummaryMode(summaryMode), + m_SummaryCountFieldName(summaryCountFieldName), m_UseNull(false), m_BucketLength(CAnomalyDetectorModelConfig::DEFAULT_BUCKET_LENGTH), m_BucketResultsDelay(0) { } @@ -42,7 +39,8 @@ CMetricModelFactory* CMetricModelFactory::clone() const { return new CMetricModelFactory(*this); } -CAnomalyDetectorModel* CMetricModelFactory::makeModel(const SModelInitializationData& initData) const { +CAnomalyDetectorModel* +CMetricModelFactory::makeModel(const SModelInitializationData& initData) const { TDataGathererPtr dataGatherer = initData.s_DataGatherer; if (!dataGatherer) { LOG_ERROR(<< "NULL data gatherer"); @@ -56,16 +54,16 @@ CAnomalyDetectorModel* CMetricModelFactory::makeModel(const SModelInitialization influenceCalculators.push_back(this->defaultInfluenceCalculators(name, features)); } - return new CMetricModel(this->modelParams(), - dataGatherer, - this->defaultFeatureModels(features, dataGatherer->bucketLength(), 0.4, true), - this->defaultCorrelatePriors(features), - this->defaultCorrelates(features), - influenceCalculators); + return new CMetricModel( + this->modelParams(), dataGatherer, + this->defaultFeatureModels(features, dataGatherer->bucketLength(), 0.4, true), + this->defaultCorrelatePriors(features), + this->defaultCorrelates(features), influenceCalculators); } -CAnomalyDetectorModel* CMetricModelFactory::makeModel(const SModelInitializationData& initData, - core::CStateRestoreTraverser& traverser) const { +CAnomalyDetectorModel* +CMetricModelFactory::makeModel(const SModelInitializationData& initData, + core::CStateRestoreTraverser& traverser) const { TDataGathererPtr dataGatherer = initData.s_DataGatherer; if (!dataGatherer) { LOG_ERROR(<< "NULL data gatherer"); @@ -79,51 +77,36 @@ CAnomalyDetectorModel* CMetricModelFactory::makeModel(const SModelInitialization influenceCalculators.push_back(this->defaultInfluenceCalculators(name, features)); } - return new CMetricModel(this->modelParams(), - dataGatherer, - this->defaultFeatureModels(features, dataGatherer->bucketLength(), 0.4, true), - this->defaultCorrelatePriors(features), - this->defaultCorrelates(features), - influenceCalculators, - traverser); + return new CMetricModel( + this->modelParams(), dataGatherer, + this->defaultFeatureModels(features, dataGatherer->bucketLength(), 0.4, true), + this->defaultCorrelatePriors(features), + this->defaultCorrelates(features), influenceCalculators, traverser); } -CDataGatherer* CMetricModelFactory::makeDataGatherer(const SGathererInitializationData& initData) const { - return new CDataGatherer(model_t::E_Metric, - m_SummaryMode, - this->modelParams(), - m_SummaryCountFieldName, - m_PartitionFieldName, - initData.s_PartitionFieldValue, - m_PersonFieldName, - EMPTY_STRING, // AttributeFieldName - m_ValueFieldName, - m_InfluenceFieldNames, - m_UseNull, - this->searchKey(), - m_Features, - initData.s_StartTime, - initData.s_SampleOverrideCount); +CDataGatherer* +CMetricModelFactory::makeDataGatherer(const SGathererInitializationData& initData) const { + return new CDataGatherer( + model_t::E_Metric, m_SummaryMode, this->modelParams(), m_SummaryCountFieldName, + m_PartitionFieldName, initData.s_PartitionFieldValue, m_PersonFieldName, + EMPTY_STRING, // AttributeFieldName + m_ValueFieldName, m_InfluenceFieldNames, m_UseNull, this->searchKey(), + m_Features, initData.s_StartTime, initData.s_SampleOverrideCount); } -CDataGatherer* CMetricModelFactory::makeDataGatherer(const std::string& partitionFieldValue, - core::CStateRestoreTraverser& traverser) const { - return new CDataGatherer(model_t::E_Metric, - m_SummaryMode, - this->modelParams(), - m_SummaryCountFieldName, - m_PartitionFieldName, - partitionFieldValue, - m_PersonFieldName, +CDataGatherer* +CMetricModelFactory::makeDataGatherer(const std::string& partitionFieldValue, + core::CStateRestoreTraverser& traverser) const { + return new CDataGatherer(model_t::E_Metric, m_SummaryMode, this->modelParams(), + m_SummaryCountFieldName, m_PartitionFieldName, + partitionFieldValue, m_PersonFieldName, EMPTY_STRING, // AttributeFieldName - m_ValueFieldName, - m_InfluenceFieldNames, - m_UseNull, - this->searchKey(), - traverser); + m_ValueFieldName, m_InfluenceFieldNames, m_UseNull, + this->searchKey(), traverser); } -CMetricModelFactory::TPriorPtr CMetricModelFactory::defaultPrior(model_t::EFeature feature, const SModelParams& params) const { +CMetricModelFactory::TPriorPtr +CMetricModelFactory::defaultPrior(model_t::EFeature feature, const SModelParams& params) const { // Categorical data all use the multinomial prior. The creation // of these priors is managed by defaultCategoricalPrior. if (model_t::isCategorical(feature)) { @@ -149,12 +132,15 @@ CMetricModelFactory::TPriorPtr CMetricModelFactory::defaultPrior(model_t::EFeatu maths_t::EDataType dataType = this->dataType(); - maths::CGammaRateConjugate gammaPrior = maths::CGammaRateConjugate::nonInformativePrior(dataType, 0.0, params.s_DecayRate); + maths::CGammaRateConjugate gammaPrior = + maths::CGammaRateConjugate::nonInformativePrior(dataType, 0.0, params.s_DecayRate); maths::CLogNormalMeanPrecConjugate logNormalPrior = - maths::CLogNormalMeanPrecConjugate::nonInformativePrior(dataType, 0.0, params.s_DecayRate); + maths::CLogNormalMeanPrecConjugate::nonInformativePrior(dataType, 0.0, + params.s_DecayRate); - maths::CNormalMeanPrecConjugate normalPrior = maths::CNormalMeanPrecConjugate::nonInformativePrior(dataType, params.s_DecayRate); + maths::CNormalMeanPrecConjugate normalPrior = + maths::CNormalMeanPrecConjugate::nonInformativePrior(dataType, params.s_DecayRate); // Create the component priors. TPriorPtrVec priors; @@ -170,22 +156,21 @@ CMetricModelFactory::TPriorPtr CMetricModelFactory::defaultPrior(model_t::EFeatu modePriors.emplace_back(logNormalPrior.clone()); modePriors.emplace_back(normalPrior.clone()); maths::COneOfNPrior modePrior(modePriors, dataType, params.s_DecayRate); - maths::CXMeansOnline1d clusterer(dataType, - maths::CAvailableModeDistributions::ALL, - maths_t::E_ClustersFractionWeight, - params.s_DecayRate, - params.s_MinimumModeFraction, - params.s_MinimumModeCount, - params.minimumCategoryCount()); - maths::CMultimodalPrior multimodalPrior(dataType, clusterer, modePrior, params.s_DecayRate); + maths::CXMeansOnline1d clusterer( + dataType, maths::CAvailableModeDistributions::ALL, + maths_t::E_ClustersFractionWeight, params.s_DecayRate, params.s_MinimumModeFraction, + params.s_MinimumModeCount, params.minimumCategoryCount()); + maths::CMultimodalPrior multimodalPrior(dataType, clusterer, modePrior, + params.s_DecayRate); priors.emplace_back(multimodalPrior.clone()); } return boost::make_shared(priors, dataType, params.s_DecayRate); } -CMetricModelFactory::TMultivariatePriorPtr CMetricModelFactory::defaultMultivariatePrior(model_t::EFeature feature, - const SModelParams& params) const { +CMetricModelFactory::TMultivariatePriorPtr +CMetricModelFactory::defaultMultivariatePrior(model_t::EFeature feature, + const SModelParams& params) const { std::size_t dimension = model_t::dimension(feature); // Gaussian mixture for modeling (latitude, longitude). @@ -195,7 +180,8 @@ CMetricModelFactory::TMultivariatePriorPtr CMetricModelFactory::defaultMultivari TMultivariatePriorPtrVec priors; priors.reserve(params.s_MinimumModeFraction <= 0.5 ? 2u : 1u); - TMultivariatePriorPtr multivariateNormal = this->multivariateNormalPrior(dimension, params); + TMultivariatePriorPtr multivariateNormal = + this->multivariateNormalPrior(dimension, params); priors.push_back(multivariateNormal); if (params.s_MinimumModeFraction <= 0.5) { priors.push_back(this->multivariateMultimodalPrior(dimension, params, *multivariateNormal)); @@ -204,8 +190,9 @@ CMetricModelFactory::TMultivariatePriorPtr CMetricModelFactory::defaultMultivari return this->multivariateOneOfNPrior(dimension, params, priors); } -CMetricModelFactory::TMultivariatePriorPtr CMetricModelFactory::defaultCorrelatePrior(model_t::EFeature /*feature*/, - const SModelParams& params) const { +CMetricModelFactory::TMultivariatePriorPtr +CMetricModelFactory::defaultCorrelatePrior(model_t::EFeature /*feature*/, + const SModelParams& params) const { TMultivariatePriorPtrVec priors; priors.reserve(params.s_MinimumModeFraction <= 0.5 ? 2u : 1u); TMultivariatePriorPtr multivariateNormal = this->multivariateNormalPrior(2, params); @@ -218,15 +205,10 @@ CMetricModelFactory::TMultivariatePriorPtr CMetricModelFactory::defaultCorrelate const CSearchKey& CMetricModelFactory::searchKey() const { if (!m_SearchKeyCache) { - m_SearchKeyCache.reset(CSearchKey(m_Identifier, - function_t::function(m_Features), - m_UseNull, - this->modelParams().s_ExcludeFrequent, - m_ValueFieldName, - m_PersonFieldName, - "", - m_PartitionFieldName, - m_InfluenceFieldNames)); + m_SearchKeyCache.reset(CSearchKey( + m_Identifier, function_t::function(m_Features), m_UseNull, + this->modelParams().s_ExcludeFrequent, m_ValueFieldName, + m_PersonFieldName, "", m_PartitionFieldName, m_InfluenceFieldNames)); } return *m_SearchKeyCache; } diff --git a/lib/model/CMetricPopulationModel.cc b/lib/model/CMetricPopulationModel.cc index 10f9e80c12..e34c4d87a3 100644 --- a/lib/model/CMetricPopulationModel.cc +++ b/lib/model/CMetricPopulationModel.cc @@ -52,9 +52,11 @@ using TBool2Vec = core::CSmallVector; using TTime2Vec = core::CSmallVector; using TOptionalSample = boost::optional; using TSizeSizePrFeatureDataPrVec = CMetricPopulationModel::TSizeSizePrFeatureDataPrVec; -using TFeatureSizeSizePrFeatureDataPrVecPr = std::pair; +using TFeatureSizeSizePrFeatureDataPrVecPr = + std::pair; using TFeatureSizeSizePrFeatureDataPrVecPrVec = std::vector; -using TSizeFuzzyDeduplicateUMap = boost::unordered_map; +using TSizeFuzzyDeduplicateUMap = + boost::unordered_map; //! \brief The values and weights for an attribute. struct SValuesAndWeights { @@ -73,38 +75,43 @@ const std::string FEATURE_MODELS_TAG("b"); const std::string FEATURE_CORRELATE_MODELS_TAG("c"); const std::string MEMORY_ESTIMATOR_TAG("d"); -const maths_t::TWeightStyleVec SAMPLE_WEIGHT_STYLES{maths_t::E_SampleCountWeight, - maths_t::E_SampleWinsorisationWeight, - maths_t::E_SampleCountVarianceScaleWeight}; -const maths_t::TWeightStyleVec PROBABILITY_WEIGHT_STYLES{maths_t::E_SampleSeasonalVarianceScaleWeight, - maths_t::E_SampleCountVarianceScaleWeight}; +const maths_t::TWeightStyleVec SAMPLE_WEIGHT_STYLES{ + maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight, + maths_t::E_SampleCountVarianceScaleWeight}; +const maths_t::TWeightStyleVec PROBABILITY_WEIGHT_STYLES{ + maths_t::E_SampleSeasonalVarianceScaleWeight, maths_t::E_SampleCountVarianceScaleWeight}; } // unnamed:: -CMetricPopulationModel::CMetricPopulationModel(const SModelParams& params, - const TDataGathererPtr& dataGatherer, - const TFeatureMathsModelPtrPrVec& newFeatureModels, - const TFeatureMultivariatePriorPtrPrVec& newFeatureCorrelateModelPriors, - const TFeatureCorrelationsPtrPrVec& featureCorrelatesModels, - const TFeatureInfluenceCalculatorCPtrPrVecVec& influenceCalculators) +CMetricPopulationModel::CMetricPopulationModel( + const SModelParams& params, + const TDataGathererPtr& dataGatherer, + const TFeatureMathsModelPtrPrVec& newFeatureModels, + const TFeatureMultivariatePriorPtrPrVec& newFeatureCorrelateModelPriors, + const TFeatureCorrelationsPtrPrVec& featureCorrelatesModels, + const TFeatureInfluenceCalculatorCPtrPrVecVec& influenceCalculators) : CPopulationModel(params, dataGatherer, influenceCalculators), - m_CurrentBucketStats(dataGatherer->currentBucketStartTime() - dataGatherer->bucketLength()), + m_CurrentBucketStats(dataGatherer->currentBucketStartTime() - + dataGatherer->bucketLength()), m_Probabilities(0.05) { this->initialize(newFeatureModels, newFeatureCorrelateModelPriors, featureCorrelatesModels); } -CMetricPopulationModel::CMetricPopulationModel(const SModelParams& params, - const TDataGathererPtr& dataGatherer, - const TFeatureMathsModelPtrPrVec& newFeatureModels, - const TFeatureMultivariatePriorPtrPrVec& newFeatureCorrelateModelPriors, - const TFeatureCorrelationsPtrPrVec& featureCorrelatesModels, - const TFeatureInfluenceCalculatorCPtrPrVecVec& influenceCalculators, - core::CStateRestoreTraverser& traverser) +CMetricPopulationModel::CMetricPopulationModel( + const SModelParams& params, + const TDataGathererPtr& dataGatherer, + const TFeatureMathsModelPtrPrVec& newFeatureModels, + const TFeatureMultivariatePriorPtrPrVec& newFeatureCorrelateModelPriors, + const TFeatureCorrelationsPtrPrVec& featureCorrelatesModels, + const TFeatureInfluenceCalculatorCPtrPrVecVec& influenceCalculators, + core::CStateRestoreTraverser& traverser) : CPopulationModel(params, dataGatherer, influenceCalculators), - m_CurrentBucketStats(dataGatherer->currentBucketStartTime() - dataGatherer->bucketLength()), + m_CurrentBucketStats(dataGatherer->currentBucketStartTime() - + dataGatherer->bucketLength()), m_Probabilities(0.05) { this->initialize(newFeatureModels, newFeatureCorrelateModelPriors, featureCorrelatesModels); - traverser.traverseSubLevel(boost::bind(&CMetricPopulationModel::acceptRestoreTraverser, this, _1)); + traverser.traverseSubLevel( + boost::bind(&CMetricPopulationModel::acceptRestoreTraverser, this, _1)); } void CMetricPopulationModel::initialize(const TFeatureMathsModelPtrPrVec& newFeatureModels, @@ -114,26 +121,31 @@ void CMetricPopulationModel::initialize(const TFeatureMathsModelPtrPrVec& newFea for (const auto& model : newFeatureModels) { m_FeatureModels.emplace_back(model.first, model.second); } - std::sort(m_FeatureModels.begin(), m_FeatureModels.end(), [](const SFeatureModels& lhs, const SFeatureModels& rhs) { - return lhs.s_Feature < rhs.s_Feature; - }); + std::sort(m_FeatureModels.begin(), m_FeatureModels.end(), + [](const SFeatureModels& lhs, const SFeatureModels& rhs) { + return lhs.s_Feature < rhs.s_Feature; + }); if (this->params().s_MultivariateByFields) { m_FeatureCorrelatesModels.reserve(featureCorrelatesModels.size()); for (std::size_t i = 0u; i < featureCorrelatesModels.size(); ++i) { m_FeatureCorrelatesModels.emplace_back( - featureCorrelatesModels[i].first, newFeatureCorrelateModelPriors[i].second, featureCorrelatesModels[i].second); + featureCorrelatesModels[i].first, + newFeatureCorrelateModelPriors[i].second, + featureCorrelatesModels[i].second); } - std::sort(m_FeatureCorrelatesModels.begin(), - m_FeatureCorrelatesModels.end(), - [](const SFeatureCorrelateModels& lhs, const SFeatureCorrelateModels& rhs) { return lhs.s_Feature < rhs.s_Feature; }); + std::sort(m_FeatureCorrelatesModels.begin(), m_FeatureCorrelatesModels.end(), + [](const SFeatureCorrelateModels& lhs, const SFeatureCorrelateModels& rhs) { + return lhs.s_Feature < rhs.s_Feature; + }); } } -CMetricPopulationModel::CMetricPopulationModel(bool isForPersistence, const CMetricPopulationModel& other) +CMetricPopulationModel::CMetricPopulationModel(bool isForPersistence, + const CMetricPopulationModel& other) : CPopulationModel(isForPersistence, other), m_CurrentBucketStats(0), // Not needed for persistence so minimally constructed - m_Probabilities(0.05), // Not needed for persistence so minimally construct + m_Probabilities(0.05), // Not needed for persistence so minimally construct m_MemoryEstimator(other.m_MemoryEstimator) { if (!isForPersistence) { LOG_ABORT(<< "This constructor only creates clones for persistence"); @@ -151,17 +163,23 @@ CMetricPopulationModel::CMetricPopulationModel(bool isForPersistence, const CMet m_FeatureCorrelatesModels.reserve(other.m_FeatureCorrelatesModels.size()); for (const auto& feature : other.m_FeatureCorrelatesModels) { m_FeatureCorrelatesModels.emplace_back( - feature.s_Feature, feature.s_ModelPrior, TCorrelationsPtr(feature.s_Models->cloneForPersistence())); + feature.s_Feature, feature.s_ModelPrior, + TCorrelationsPtr(feature.s_Models->cloneForPersistence())); } } void CMetricPopulationModel::acceptPersistInserter(core::CStatePersistInserter& inserter) const { - inserter.insertLevel(POPULATION_STATE_TAG, boost::bind(&CMetricPopulationModel::doAcceptPersistInserter, this, _1)); + inserter.insertLevel( + POPULATION_STATE_TAG, + boost::bind(&CMetricPopulationModel::doAcceptPersistInserter, this, _1)); for (const auto& feature : m_FeatureModels) { - inserter.insertLevel(FEATURE_MODELS_TAG, boost::bind(&SFeatureModels::acceptPersistInserter, &feature, _1)); + inserter.insertLevel(FEATURE_MODELS_TAG, boost::bind(&SFeatureModels::acceptPersistInserter, + &feature, _1)); } for (const auto& feature : m_FeatureCorrelatesModels) { - inserter.insertLevel(FEATURE_CORRELATE_MODELS_TAG, boost::bind(&SFeatureCorrelateModels::acceptPersistInserter, &feature, _1)); + inserter.insertLevel(FEATURE_CORRELATE_MODELS_TAG, + boost::bind(&SFeatureCorrelateModels::acceptPersistInserter, + &feature, _1)); } core::CPersistUtils::persist(MEMORY_ESTIMATOR_TAG, m_MemoryEstimator, inserter); } @@ -170,17 +188,21 @@ bool CMetricPopulationModel::acceptRestoreTraverser(core::CStateRestoreTraverser std::size_t i = 0u, j = 0u; do { const std::string& name = traverser.name(); - RESTORE(POPULATION_STATE_TAG, traverser.traverseSubLevel(boost::bind(&CMetricPopulationModel::doAcceptRestoreTraverser, this, _1))) + RESTORE(POPULATION_STATE_TAG, + traverser.traverseSubLevel(boost::bind( + &CMetricPopulationModel::doAcceptRestoreTraverser, this, _1))) RESTORE(FEATURE_MODELS_TAG, i == m_FeatureModels.size() || - traverser.traverseSubLevel( - boost::bind(&SFeatureModels::acceptRestoreTraverser, &m_FeatureModels[i++], boost::cref(this->params()), _1))) - RESTORE( - FEATURE_CORRELATE_MODELS_TAG, - j == m_FeatureCorrelatesModels.size() || - traverser.traverseSubLevel(boost::bind( - &SFeatureCorrelateModels::acceptRestoreTraverser, &m_FeatureCorrelatesModels[j++], boost::cref(this->params()), _1))) - RESTORE(MEMORY_ESTIMATOR_TAG, core::CPersistUtils::restore(MEMORY_ESTIMATOR_TAG, m_MemoryEstimator, traverser)) + traverser.traverseSubLevel(boost::bind( + &SFeatureModels::acceptRestoreTraverser, + &m_FeatureModels[i++], boost::cref(this->params()), _1))) + RESTORE(FEATURE_CORRELATE_MODELS_TAG, + j == m_FeatureCorrelatesModels.size() || + traverser.traverseSubLevel(boost::bind( + &SFeatureCorrelateModels::acceptRestoreTraverser, + &m_FeatureCorrelatesModels[j++], boost::cref(this->params()), _1))) + RESTORE(MEMORY_ESTIMATOR_TAG, + core::CPersistUtils::restore(MEMORY_ESTIMATOR_TAG, m_MemoryEstimator, traverser)) } while (traverser.next()); for (auto& feature : m_FeatureModels) { @@ -213,34 +235,42 @@ bool CMetricPopulationModel::isMetric() const { } CMetricPopulationModel::TDouble1Vec -CMetricPopulationModel::currentBucketValue(model_t::EFeature feature, std::size_t pid, std::size_t cid, core_t::TTime time) const { +CMetricPopulationModel::currentBucketValue(model_t::EFeature feature, + std::size_t pid, + std::size_t cid, + core_t::TTime time) const { const TSizeSizePrFeatureDataPrVec& featureData = this->featureData(feature, time); auto i = find(featureData, pid, cid); return i != featureData.end() ? extractValue(feature, *i) : TDouble1Vec(); } -CMetricPopulationModel::TDouble1Vec CMetricPopulationModel::baselineBucketMean(model_t::EFeature feature, - std::size_t pid, - std::size_t cid, - model_t::CResultType type, - const TSizeDoublePr1Vec& correlated, - core_t::TTime time) const { +CMetricPopulationModel::TDouble1Vec +CMetricPopulationModel::baselineBucketMean(model_t::EFeature feature, + std::size_t pid, + std::size_t cid, + model_t::CResultType type, + const TSizeDoublePr1Vec& correlated, + core_t::TTime time) const { const maths::CModel* model{this->model(feature, cid)}; if (!model) { return TDouble1Vec(); } static const TSizeDoublePr1Vec NO_CORRELATED; TDouble1Vec result(model->predict(time, type.isUnconditional() ? NO_CORRELATED : correlated)); - this->correctBaselineForInterim(feature, pid, cid, type, correlated, this->currentBucketInterimCorrections(), result); + this->correctBaselineForInterim(feature, pid, cid, type, correlated, + this->currentBucketInterimCorrections(), result); TDouble1VecDouble1VecPr support = model_t::support(feature); return maths::CTools::truncate(result, support.first, support.second); } bool CMetricPopulationModel::bucketStatsAvailable(core_t::TTime time) const { - return time >= m_CurrentBucketStats.s_StartTime && time < m_CurrentBucketStats.s_StartTime + this->bucketLength(); + return time >= m_CurrentBucketStats.s_StartTime && + time < m_CurrentBucketStats.s_StartTime + this->bucketLength(); } -void CMetricPopulationModel::sampleBucketStatistics(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) { +void CMetricPopulationModel::sampleBucketStatistics(core_t::TTime startTime, + core_t::TTime endTime, + CResourceMonitor& resourceMonitor) { CDataGatherer& gatherer = this->dataGatherer(); core_t::TTime bucketLength = gatherer.bucketLength(); if (!gatherer.dataAvailable(startTime)) { @@ -251,7 +281,8 @@ void CMetricPopulationModel::sampleBucketStatistics(core_t::TTime startTime, cor this->currentBucketInterimCorrections().clear(); for (core_t::TTime time = startTime; time < endTime; time += bucketLength) { - this->CAnomalyDetectorModel::sampleBucketStatistics(time, time + bucketLength, resourceMonitor); + this->CAnomalyDetectorModel::sampleBucketStatistics(time, time + bucketLength, + resourceMonitor); // Currently, we only remember one bucket. m_CurrentBucketStats.s_StartTime = time; @@ -265,13 +296,16 @@ void CMetricPopulationModel::sampleBucketStatistics(core_t::TTime startTime, cor model_t::EFeature feature = featureData_.first; TSizeSizePrFeatureDataPrVec& data = m_CurrentBucketStats.s_FeatureData[feature]; data.swap(featureData_.second); - LOG_TRACE(<< model_t::print(feature) << ": " << core::CContainerPrinter::print(data)); + LOG_TRACE(<< model_t::print(feature) << ": " + << core::CContainerPrinter::print(data)); this->applyFilters(false, this->personFilter(), this->attributeFilter(), data); } } } -void CMetricPopulationModel::sample(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) { +void CMetricPopulationModel::sample(core_t::TTime startTime, + core_t::TTime endTime, + CResourceMonitor& resourceMonitor) { CDataGatherer& gatherer = this->dataGatherer(); core_t::TTime bucketLength = gatherer.bucketLength(); if (!gatherer.validateSampleTimes(startTime, endTime)) { @@ -292,7 +326,8 @@ void CMetricPopulationModel::sample(core_t::TTime startTime, core_t::TTime endTi const TTimeVec& preSampleAttributeLastBucketTimes = this->attributeLastBucketTimes(); TSizeTimeUMap attributeLastBucketTimesMap; for (const auto& featureData_ : featureData) { - TSizeSizePrFeatureDataPrVec& data = m_CurrentBucketStats.s_FeatureData[featureData_.first]; + TSizeSizePrFeatureDataPrVec& data = + m_CurrentBucketStats.s_FeatureData[featureData_.first]; for (const auto& data_ : data) { std::size_t cid = CDataGatherer::extractAttributeId(data_); attributeLastBucketTimesMap[cid] = preSampleAttributeLastBucketTimes[cid]; @@ -314,7 +349,8 @@ void CMetricPopulationModel::sample(core_t::TTime startTime, core_t::TTime endTi std::size_t dimension = model_t::dimension(feature); TSizeSizePrFeatureDataPrVec& data = m_CurrentBucketStats.s_FeatureData[feature]; data.swap(featureData_.second); - LOG_TRACE(<< model_t::print(feature) << ": " << core::CContainerPrinter::print(data)); + LOG_TRACE(<< model_t::print(feature) << ": " + << core::CContainerPrinter::print(data)); this->applyFilters(true, this->personFilter(), this->attributeFilter(), data); TSizeValuesAndWeightsUMap attributes; @@ -324,7 +360,8 @@ void CMetricPopulationModel::sample(core_t::TTime startTime, core_t::TTime endTi if (data.size() >= this->params().s_MinimumToDeduplicate) { for (const auto& data_ : data) { std::size_t cid = CDataGatherer::extractAttributeId(data_); - const CGathererTools::TSampleVec& samples = CDataGatherer::extractData(data_).s_Samples; + const CGathererTools::TSampleVec& samples = + CDataGatherer::extractData(data_).s_Samples; for (const auto& sample : samples) { fuzzy[cid].add(TDouble2Vec(sample.value(dimension))); } @@ -337,11 +374,14 @@ void CMetricPopulationModel::sample(core_t::TTime startTime, core_t::TTime endTi for (const auto& data_ : data) { std::size_t pid = CDataGatherer::extractPersonId(data_); std::size_t cid = CDataGatherer::extractAttributeId(data_); - const TOptionalSample& bucket = CDataGatherer::extractData(data_).s_BucketValue; - const CGathererTools::TSampleVec& samples = CDataGatherer::extractData(data_).s_Samples; + const TOptionalSample& bucket = + CDataGatherer::extractData(data_).s_BucketValue; + const CGathererTools::TSampleVec& samples = + CDataGatherer::extractData(data_).s_Samples; bool isInteger = CDataGatherer::extractData(data_).s_IsInteger; bool isNonNegative = CDataGatherer::extractData(data_).s_IsNonNegative; - core_t::TTime cutoff = attributeLastBucketTimes[cid] - this->params().s_SamplingAgeCutoff; + core_t::TTime cutoff = attributeLastBucketTimes[cid] - + this->params().s_SamplingAgeCutoff; maths::CModel* model{this->model(feature, cid)}; if (!model) { @@ -361,7 +401,8 @@ void CMetricPopulationModel::sample(core_t::TTime startTime, core_t::TTime endTi continue; } - LOG_TRACE(<< "Adding " << CDataGatherer::extractData(data_) << " for person = " << gatherer.personName(pid) + LOG_TRACE(<< "Adding " << CDataGatherer::extractData(data_) + << " for person = " << gatherer.personName(pid) << " and attribute = " << gatherer.attributeName(cid)); SValuesAndWeights& attribute = attributes[cid]; @@ -369,14 +410,20 @@ void CMetricPopulationModel::sample(core_t::TTime startTime, core_t::TTime endTi attribute.s_IsInteger &= isInteger; attribute.s_IsNonNegative &= isNonNegative; if (model_t::isSampled(feature) && bucket) { - attribute.s_BucketValues.emplace_back(bucket->time(), TDouble2Vec(bucket->value(dimension)), pid); + attribute.s_BucketValues.emplace_back( + bucket->time(), TDouble2Vec(bucket->value(dimension)), pid); } - std::size_t n = - std::count_if(samples.begin(), samples.end(), [cutoff](const CSample& sample) { return sample.time() >= cutoff; }); + std::size_t n = std::count_if(samples.begin(), samples.end(), + [cutoff](const CSample& sample) { + return sample.time() >= cutoff; + }); double updatesPerBucket = this->params().s_MaximumUpdatesPerBucket; - double countWeight = this->sampleRateWeight(pid, cid) * this->learnRate(feature) * - (updatesPerBucket > 0.0 && n > 0 ? updatesPerBucket / static_cast(n) : 1.0); + double countWeight = this->sampleRateWeight(pid, cid) * + this->learnRate(feature) * + (updatesPerBucket > 0.0 && n > 0 + ? updatesPerBucket / static_cast(n) + : 1.0); LOG_TRACE(<< "countWeight = " << countWeight); for (const auto& sample : samples) { @@ -393,18 +440,24 @@ void CMetricPopulationModel::sample(core_t::TTime startTime, core_t::TTime endTi if (duplicate < attribute.s_Values.size()) { std::for_each(attribute.s_TrendWeights[duplicate][0].begin(), attribute.s_TrendWeights[duplicate][0].end(), - [countWeight, vs](double& weight) { weight += countWeight / vs; }); + [countWeight, vs](double& weight) { + weight += countWeight / vs; + }); std::for_each(attribute.s_PriorWeights[duplicate][0].begin(), attribute.s_PriorWeights[duplicate][0].end(), - [countWeight](double& weight) { weight += countWeight; }); + [countWeight](double& weight) { + weight += countWeight; + }); } else { attribute.s_Values.emplace_back(sample.time(), value, pid); - attribute.s_TrendWeights.push_back({TDouble2Vec(dimension, countWeight / vs), - model->winsorisationWeight(1.0, sample.time(), value), - TDouble2Vec(dimension, vs)}); - attribute.s_PriorWeights.push_back({TDouble2Vec(dimension, countWeight), - model->winsorisationWeight(1.0, sample.time(), value), - TDouble2Vec(dimension, vs)}); + attribute.s_TrendWeights.push_back( + {TDouble2Vec(dimension, countWeight / vs), + model->winsorisationWeight(1.0, sample.time(), value), + TDouble2Vec(dimension, vs)}); + attribute.s_PriorWeights.push_back( + {TDouble2Vec(dimension, countWeight), + model->winsorisationWeight(1.0, sample.time(), value), + TDouble2Vec(dimension, vs)}); } } } @@ -425,7 +478,8 @@ void CMetricPopulationModel::sample(core_t::TTime startTime, core_t::TTime endTi .priorWeights(attribute.second.s_PriorWeights); maths::CModel* model{this->model(feature, cid)}; - if (model->addSamples(params, attribute.second.s_Values) == maths::CModel::E_Reset) { + if (model->addSamples(params, attribute.second.s_Values) == + maths::CModel::E_Reset) { gatherer.resetSampleCount(cid); } } @@ -444,7 +498,8 @@ void CMetricPopulationModel::prune(std::size_t maximumAge) { TSizeVec peopleToRemove; TSizeVec attributesToRemove; - this->peopleAndAttributesToRemove(m_CurrentBucketStats.s_StartTime, maximumAge, peopleToRemove, attributesToRemove); + this->peopleAndAttributesToRemove(m_CurrentBucketStats.s_StartTime, maximumAge, + peopleToRemove, attributesToRemove); if (peopleToRemove.empty() && attributesToRemove.empty()) { return; @@ -453,7 +508,8 @@ void CMetricPopulationModel::prune(std::size_t maximumAge) { std::sort(attributesToRemove.begin(), attributesToRemove.end()); LOG_DEBUG(<< "Removing people {" << this->printPeople(peopleToRemove, 20) << '}'); - LOG_DEBUG(<< "Removing attributes {" << this->printAttributes(attributesToRemove, 20) << '}'); + LOG_DEBUG(<< "Removing attributes {" + << this->printAttributes(attributesToRemove, 20) << '}'); // Stop collecting for these people/attributes and add them // to the free list. @@ -462,7 +518,8 @@ void CMetricPopulationModel::prune(std::size_t maximumAge) { if (gatherer.dataAvailable(m_CurrentBucketStats.s_StartTime)) { TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; - gatherer.featureData(m_CurrentBucketStats.s_StartTime, gatherer.bucketLength(), featureData); + gatherer.featureData(m_CurrentBucketStats.s_StartTime, + gatherer.bucketLength(), featureData); for (auto& feature : featureData) { m_CurrentBucketStats.s_FeatureData[feature.first].swap(feature.second); } @@ -497,10 +554,9 @@ bool CMetricPopulationModel::computeProbability(std::size_t pid, partitioningFields.add(gatherer.attributeFieldName(), EMPTY_STRING); - CAnnotatedProbabilityBuilder resultBuilder(result, - std::max(numberAttributeProbabilities, std::size_t(1)), - function_t::function(gatherer.features()), - gatherer.numberActivePeople()); + CAnnotatedProbabilityBuilder resultBuilder( + result, std::max(numberAttributeProbabilities, std::size_t(1)), + function_t::function(gatherer.features()), gatherer.numberActivePeople()); LOG_TRACE(<< "computeProbability(" << gatherer.personName(pid) << ")"); @@ -526,15 +582,18 @@ bool CMetricPopulationModel::computeProbability(std::size_t pid, partitioningFields.back().second = TStrCRef(gatherer.attributeName(cid)); - const TOptionalSample& bucket = CDataGatherer::extractData(featureData[j]).s_BucketValue; + const TOptionalSample& bucket = + CDataGatherer::extractData(featureData[j]).s_BucketValue; if (!bucket) { - LOG_ERROR(<< "Expected a value for feature = " << model_t::print(feature) << ", person = " << gatherer.personName(pid) + LOG_ERROR(<< "Expected a value for feature = " << model_t::print(feature) + << ", person = " << gatherer.personName(pid) << ", attribute = " << gatherer.attributeName(cid)); continue; } - if (this->shouldIgnoreResult( - feature, result.s_ResultType, pid, cid, model_t::sampleTime(feature, startTime, bucketLength, bucket->time()))) { + if (this->shouldIgnoreResult(feature, result.s_ResultType, pid, cid, + model_t::sampleTime(feature, startTime, bucketLength, + bucket->time()))) { continue; } @@ -545,36 +604,31 @@ bool CMetricPopulationModel::computeProbability(std::size_t pid, this->fill(feature, pid, cid, startTime, result.isInterim(), params); model_t::CResultType type; TSize1Vec mostAnomalousCorrelate; - if (pJoint.addProbability(feature, - cid, - *params.s_Model, - params.s_ElapsedTime, + if (pJoint.addProbability(feature, cid, *params.s_Model, params.s_ElapsedTime, params.s_ComputeProbabilityParams, - params.s_Time, - params.s_Value, - params.s_Probability, - params.s_Tail, - type, - mostAnomalousCorrelate)) { - LOG_TRACE(<< "P(" << params.describe() << ", attribute = " << gatherer.attributeName(cid) - << ", person = " << this->personName(pid) << ") = " << params.s_Probability); - const auto& influenceValues = CDataGatherer::extractData(featureData[j]).s_InfluenceValues; + params.s_Time, params.s_Value, params.s_Probability, + params.s_Tail, type, mostAnomalousCorrelate)) { + LOG_TRACE(<< "P(" << params.describe() + << ", attribute = " << gatherer.attributeName(cid) + << ", person = " << this->personName(pid) + << ") = " << params.s_Probability); + const auto& influenceValues = + CDataGatherer::extractData(featureData[j]).s_InfluenceValues; for (std::size_t k = 0u; k < influenceValues.size(); ++k) { - if (const CInfluenceCalculator* influenceCalculator = this->influenceCalculator(feature, k)) { + if (const CInfluenceCalculator* influenceCalculator = + this->influenceCalculator(feature, k)) { pJoint.plugin(*influenceCalculator); - pJoint.addInfluences(*(gatherer.beginInfluencers() + k), influenceValues[k], params); + pJoint.addInfluences(*(gatherer.beginInfluencers() + k), + influenceValues[k], params); } } - resultBuilder.addAttributeProbability(cid, - gatherer.attributeNamePtr(cid), - 1.0, - params.s_Probability, - model_t::CResultType::E_Unconditional, - feature, - NO_CORRELATED_ATTRIBUTES, - NO_CORRELATES); + resultBuilder.addAttributeProbability( + cid, gatherer.attributeNamePtr(cid), 1.0, + params.s_Probability, model_t::CResultType::E_Unconditional, + feature, NO_CORRELATED_ATTRIBUTES, NO_CORRELATES); } else { - LOG_ERROR(<< "Failed to compute P(" << params.describe() << ", attribute = " << gatherer.attributeName(cid) + LOG_ERROR(<< "Failed to compute P(" << params.describe() + << ", attribute = " << gatherer.attributeName(cid) << ", person = " << this->personName(pid) << ")"); } } @@ -614,7 +668,8 @@ uint64_t CMetricPopulationModel::checksum(bool includeCurrentBucketStats) const } using TStrCRefStrCRefPr = std::pair; - using TStrCRefStrCRefPrUInt64Map = std::map; + using TStrCRefStrCRefPrUInt64Map = + std::map; const CDataGatherer& gatherer = this->dataGatherer(); @@ -623,7 +678,8 @@ uint64_t CMetricPopulationModel::checksum(bool includeCurrentBucketStats) const for (const auto& feature : m_FeatureModels) { for (std::size_t cid = 0u; cid < feature.s_Models.size(); ++cid) { if (gatherer.isAttributeActive(cid)) { - uint64_t& hash = hashes[{boost::cref(EMPTY_STRING), boost::cref(gatherer.attributeName(cid))}]; + uint64_t& hash = + hashes[{boost::cref(EMPTY_STRING), boost::cref(gatherer.attributeName(cid))}]; hash = maths::CChecksum::calculate(hash, feature.s_Models[cid]); } } @@ -632,8 +688,11 @@ uint64_t CMetricPopulationModel::checksum(bool includeCurrentBucketStats) const for (const auto& feature : m_FeatureCorrelatesModels) { for (const auto& model : feature.s_Models->correlationModels()) { std::size_t cids[]{model.first.first, model.first.second}; - if (gatherer.isAttributeActive(cids[0]) && gatherer.isAttributeActive(cids[1])) { - uint64_t& hash = hashes[{boost::cref(gatherer.attributeName(cids[0])), boost::cref(gatherer.attributeName(cids[1]))}]; + if (gatherer.isAttributeActive(cids[0]) && + gatherer.isAttributeActive(cids[1])) { + uint64_t& hash = + hashes[{boost::cref(gatherer.attributeName(cids[0])), + boost::cref(gatherer.attributeName(cids[1]))}]; hash = maths::CChecksum::calculate(hash, model.second); } } @@ -641,7 +700,8 @@ uint64_t CMetricPopulationModel::checksum(bool includeCurrentBucketStats) const if (includeCurrentBucketStats) { for (const auto& personCount : this->personCounts()) { - uint64_t& hash = hashes[{boost::cref(gatherer.personName(personCount.first)), boost::cref(EMPTY_STRING)}]; + uint64_t& hash = + hashes[{boost::cref(gatherer.personName(personCount.first)), boost::cref(EMPTY_STRING)}]; hash = maths::CChecksum::calculate(hash, personCount.second); } for (const auto& feature : m_CurrentBucketStats.s_FeatureData) { @@ -649,7 +709,8 @@ uint64_t CMetricPopulationModel::checksum(bool includeCurrentBucketStats) const std::size_t pid = CDataGatherer::extractPersonId(data_); std::size_t cid = CDataGatherer::extractAttributeId(data_); const TFeatureData& data = CDataGatherer::extractData(data_); - uint64_t& hash = hashes[{boost::cref(this->personName(pid)), boost::cref(this->attributeName(cid))}]; + uint64_t& hash = + hashes[{boost::cref(this->personName(pid)), boost::cref(this->attributeName(cid))}]; hash = maths::CChecksum::calculate(hash, data.s_BucketValue); hash = maths::CChecksum::calculate(hash, data.s_IsInteger); hash = maths::CChecksum::calculate(hash, data.s_Samples); @@ -666,19 +727,23 @@ uint64_t CMetricPopulationModel::checksum(bool includeCurrentBucketStats) const void CMetricPopulationModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CMetricPopulationModel"); this->CPopulationModel::debugMemoryUsage(mem->addChild()); - core::CMemoryDebug::dynamicSize("m_CurrentBucketStats.s_PersonCounts", m_CurrentBucketStats.s_PersonCounts, mem); - core::CMemoryDebug::dynamicSize("m_CurrentBucketStats.s_FeatureData", m_CurrentBucketStats.s_FeatureData, mem); - core::CMemoryDebug::dynamicSize("m_CurrentBucketStats.s_InterimCorrections", m_CurrentBucketStats.s_InterimCorrections, mem); + core::CMemoryDebug::dynamicSize("m_CurrentBucketStats.s_PersonCounts", + m_CurrentBucketStats.s_PersonCounts, mem); + core::CMemoryDebug::dynamicSize("m_CurrentBucketStats.s_FeatureData", + m_CurrentBucketStats.s_FeatureData, mem); + core::CMemoryDebug::dynamicSize("m_CurrentBucketStats.s_InterimCorrections", + m_CurrentBucketStats.s_InterimCorrections, mem); core::CMemoryDebug::dynamicSize("m_FeatureModels", m_FeatureModels, mem); - core::CMemoryDebug::dynamicSize("m_FeatureCorrelatesModels", m_FeatureCorrelatesModels, mem); + core::CMemoryDebug::dynamicSize("m_FeatureCorrelatesModels", + m_FeatureCorrelatesModels, mem); core::CMemoryDebug::dynamicSize("m_MemoryEstimator", m_MemoryEstimator, mem); } std::size_t CMetricPopulationModel::memoryUsage() const { const CDataGatherer& gatherer = this->dataGatherer(); - TOptionalSize estimate = this->estimateMemoryUsage(gatherer.numberActivePeople(), - gatherer.numberActiveAttributes(), - 0); // # correlations + TOptionalSize estimate = this->estimateMemoryUsage( + gatherer.numberActivePeople(), gatherer.numberActiveAttributes(), + 0); // # correlations return estimate ? estimate.get() : this->computeMemoryUsage(); } @@ -705,10 +770,12 @@ CMetricPopulationModel::CModelDetailsViewPtr CMetricPopulationModel::details() c return CModelDetailsViewPtr(new CMetricPopulationModelDetailsView(*this)); } -const TSizeSizePrFeatureDataPrVec& CMetricPopulationModel::featureData(model_t::EFeature feature, core_t::TTime time) const { +const TSizeSizePrFeatureDataPrVec& +CMetricPopulationModel::featureData(model_t::EFeature feature, core_t::TTime time) const { static const TSizeSizePrFeatureDataPrVec EMPTY; if (!this->bucketStatsAvailable(time)) { - LOG_ERROR(<< "No statistics at " << time << ", current bucket = [" << m_CurrentBucketStats.s_StartTime << "," + LOG_ERROR(<< "No statistics at " << time << ", current bucket = [" + << m_CurrentBucketStats.s_StartTime << "," << m_CurrentBucketStats.s_StartTime + this->bucketLength() << ")"); return EMPTY; } @@ -736,7 +803,8 @@ const CMetricPopulationModel::TSizeUInt64PrVec& CMetricPopulationModel::personCo return m_CurrentBucketStats.s_PersonCounts; } -CPopulationModel::TCorrectionKeyDouble1VecUMap& CMetricPopulationModel::currentBucketInterimCorrections() const { +CPopulationModel::TCorrectionKeyDouble1VecUMap& +CMetricPopulationModel::currentBucketInterimCorrections() const { return m_CurrentBucketStats.s_InterimCorrections; } @@ -773,19 +841,24 @@ void CMetricPopulationModel::updateRecycledModels() { this->CPopulationModel::updateRecycledModels(); } -void CMetricPopulationModel::refreshCorrelationModels(std::size_t resourceLimit, CResourceMonitor& resourceMonitor) { +void CMetricPopulationModel::refreshCorrelationModels(std::size_t resourceLimit, + CResourceMonitor& resourceMonitor) { std::size_t n = this->numberOfPeople(); - double maxNumberCorrelations = this->params().s_CorrelationModelsOverhead * static_cast(n); - auto memoryUsage = boost::bind(&CAnomalyDetectorModel::estimateMemoryUsageOrComputeAndUpdate, this, n, 0, _1); + double maxNumberCorrelations = this->params().s_CorrelationModelsOverhead * + static_cast(n); + auto memoryUsage = boost::bind(&CAnomalyDetectorModel::estimateMemoryUsageOrComputeAndUpdate, + this, n, 0, _1); CTimeSeriesCorrelateModelAllocator allocator( - resourceMonitor, memoryUsage, resourceLimit, static_cast(maxNumberCorrelations + 0.5)); + resourceMonitor, memoryUsage, resourceLimit, + static_cast(maxNumberCorrelations + 0.5)); for (auto& feature : m_FeatureCorrelatesModels) { allocator.prototypePrior(feature.s_ModelPrior); feature.s_Models->refresh(allocator); } } -void CMetricPopulationModel::clearPrunedResources(const TSizeVec& /*people*/, const TSizeVec& /*attributes*/) { +void CMetricPopulationModel::clearPrunedResources(const TSizeVec& /*people*/, + const TSizeVec& /*attributes*/) { CDataGatherer& gatherer = this->dataGatherer(); for (auto cid : gatherer.recycledAttributeIds()) { for (auto& feature : m_FeatureModels) { @@ -809,17 +882,25 @@ void CMetricPopulationModel::doSkipSampling(core_t::TTime startTime, core_t::TTi this->CPopulationModel::doSkipSampling(startTime, endTime); } -const maths::CModel* CMetricPopulationModel::model(model_t::EFeature feature, std::size_t cid) const { +const maths::CModel* CMetricPopulationModel::model(model_t::EFeature feature, + std::size_t cid) const { return const_cast(this)->model(feature, cid); } maths::CModel* CMetricPopulationModel::model(model_t::EFeature feature, std::size_t cid) { - auto i = std::find_if( - m_FeatureModels.begin(), m_FeatureModels.end(), [feature](const SFeatureModels& model) { return model.s_Feature == feature; }); - return i != m_FeatureModels.end() && cid < i->s_Models.size() ? i->s_Models[cid].get() : nullptr; -} - -bool CMetricPopulationModel::correlates(model_t::EFeature feature, std::size_t pid, std::size_t cid, core_t::TTime time) const { + auto i = std::find_if(m_FeatureModels.begin(), m_FeatureModels.end(), + [feature](const SFeatureModels& model) { + return model.s_Feature == feature; + }); + return i != m_FeatureModels.end() && cid < i->s_Models.size() + ? i->s_Models[cid].get() + : nullptr; +} + +bool CMetricPopulationModel::correlates(model_t::EFeature feature, + std::size_t pid, + std::size_t cid, + core_t::TTime time) const { if (model_t::dimension(feature) > 1 || !this->params().s_MultivariateByFields) { return false; } @@ -831,7 +912,8 @@ bool CMetricPopulationModel::correlates(model_t::EFeature feature, std::size_t p for (std::size_t j = range.first; j < range.second; ++j) { std::size_t cids[]{cid, CDataGatherer::extractAttributeId(data[j])}; for (const auto& correlate : model->correlates()) { - if ((cids[0] == correlate[0] && cids[1] == correlate[1]) || (cids[1] == correlate[0] && cids[0] == correlate[1])) { + if ((cids[0] == correlate[0] && cids[1] == correlate[1]) || + (cids[1] == correlate[0] && cids[0] == correlate[1])) { return true; } } @@ -849,9 +931,11 @@ void CMetricPopulationModel::fill(model_t::EFeature feature, auto data = find(this->featureData(feature, bucketTime), pid, cid); const maths::CModel* model{this->model(feature, cid)}; const TOptionalSample& bucket{CDataGatherer::extractData(*data).s_BucketValue}; - core_t::TTime time{model_t::sampleTime(feature, bucketTime, this->bucketLength(), bucket->time())}; - TDouble2Vec4Vec weights{model->seasonalWeight(maths::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL, time), - TDouble2Vec(dimension, bucket->varianceScale())}; + core_t::TTime time{model_t::sampleTime(feature, bucketTime, + this->bucketLength(), bucket->time())}; + TDouble2Vec4Vec weights{ + model->seasonalWeight(maths::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL, time), + TDouble2Vec(dimension, bucket->varianceScale())}; params.s_Feature = feature; params.s_Model = model; @@ -860,10 +944,11 @@ void CMetricPopulationModel::fill(model_t::EFeature feature, params.s_Value.assign(1, bucket->value()); if (interim && model_t::requiresInterimResultAdjustment(feature)) { TDouble2Vec mode(params.s_Model->mode(time, PROBABILITY_WEIGHT_STYLES, weights)); - TDouble2Vec correction( - this->interimValueCorrector().corrections(time, this->currentBucketTotalCount(), mode, bucket->value(dimension))); + TDouble2Vec correction(this->interimValueCorrector().corrections( + time, this->currentBucketTotalCount(), mode, bucket->value(dimension))); params.s_Value[0] += correction; - this->currentBucketInterimCorrections().emplace(CCorrectionKey(feature, pid, cid), correction); + this->currentBucketInterimCorrections().emplace( + CCorrectionKey(feature, pid, cid), correction); } params.s_Count = 1.0; params.s_ComputeProbabilityParams.tag(pid) diff --git a/lib/model/CMetricPopulationModelFactory.cc b/lib/model/CMetricPopulationModelFactory.cc index 1d75fdb716..2d78e9d6e5 100644 --- a/lib/model/CMetricPopulationModelFactory.cc +++ b/lib/model/CMetricPopulationModelFactory.cc @@ -29,11 +29,8 @@ namespace model { CMetricPopulationModelFactory::CMetricPopulationModelFactory(const SModelParams& params, model_t::ESummaryMode summaryMode, const std::string& summaryCountFieldName) - : CModelFactory(params), - m_Identifier(), - m_SummaryMode(summaryMode), - m_SummaryCountFieldName(summaryCountFieldName), - m_UseNull(false), + : CModelFactory(params), m_Identifier(), m_SummaryMode(summaryMode), + m_SummaryCountFieldName(summaryCountFieldName), m_UseNull(false), m_BucketResultsDelay(0) { } @@ -41,7 +38,8 @@ CMetricPopulationModelFactory* CMetricPopulationModelFactory::clone() const { return new CMetricPopulationModelFactory(*this); } -CAnomalyDetectorModel* CMetricPopulationModelFactory::makeModel(const SModelInitializationData& initData) const { +CAnomalyDetectorModel* +CMetricPopulationModelFactory::makeModel(const SModelInitializationData& initData) const { TDataGathererPtr dataGatherer = initData.s_DataGatherer; if (!dataGatherer) { LOG_ERROR(<< "NULL data gatherer"); @@ -55,16 +53,16 @@ CAnomalyDetectorModel* CMetricPopulationModelFactory::makeModel(const SModelInit influenceCalculators.push_back(this->defaultInfluenceCalculators(name, features)); } - return new CMetricPopulationModel(this->modelParams(), - dataGatherer, - this->defaultFeatureModels(features, dataGatherer->bucketLength(), 1.0, false), - this->defaultCorrelatePriors(features), - this->defaultCorrelates(features), - influenceCalculators); + return new CMetricPopulationModel( + this->modelParams(), dataGatherer, + this->defaultFeatureModels(features, dataGatherer->bucketLength(), 1.0, false), + this->defaultCorrelatePriors(features), + this->defaultCorrelates(features), influenceCalculators); } -CAnomalyDetectorModel* CMetricPopulationModelFactory::makeModel(const SModelInitializationData& initData, - core::CStateRestoreTraverser& traverser) const { +CAnomalyDetectorModel* +CMetricPopulationModelFactory::makeModel(const SModelInitializationData& initData, + core::CStateRestoreTraverser& traverser) const { TDataGathererPtr dataGatherer = initData.s_DataGatherer; if (!dataGatherer) { LOG_ERROR(<< "NULL data gatherer"); @@ -78,52 +76,36 @@ CAnomalyDetectorModel* CMetricPopulationModelFactory::makeModel(const SModelInit influenceCalculators.push_back(this->defaultInfluenceCalculators(name, features)); } - return new CMetricPopulationModel(this->modelParams(), - dataGatherer, - this->defaultFeatureModels(features, dataGatherer->bucketLength(), 1.0, false), - this->defaultCorrelatePriors(features), - this->defaultCorrelates(features), - influenceCalculators, - traverser); + return new CMetricPopulationModel( + this->modelParams(), dataGatherer, + this->defaultFeatureModels(features, dataGatherer->bucketLength(), 1.0, false), + this->defaultCorrelatePriors(features), + this->defaultCorrelates(features), influenceCalculators, traverser); } -CDataGatherer* CMetricPopulationModelFactory::makeDataGatherer(const SGathererInitializationData& initData) const { - return new CDataGatherer(model_t::E_PopulationMetric, - m_SummaryMode, - this->modelParams(), - m_SummaryCountFieldName, - m_PartitionFieldName, - initData.s_PartitionFieldValue, - m_PersonFieldName, - m_AttributeFieldName, - m_ValueFieldName, - m_InfluenceFieldNames, - m_UseNull, - this->searchKey(), - m_Features, - initData.s_StartTime, - initData.s_SampleOverrideCount); +CDataGatherer* +CMetricPopulationModelFactory::makeDataGatherer(const SGathererInitializationData& initData) const { + return new CDataGatherer( + model_t::E_PopulationMetric, m_SummaryMode, this->modelParams(), + m_SummaryCountFieldName, m_PartitionFieldName, + initData.s_PartitionFieldValue, m_PersonFieldName, m_AttributeFieldName, + m_ValueFieldName, m_InfluenceFieldNames, m_UseNull, this->searchKey(), + m_Features, initData.s_StartTime, initData.s_SampleOverrideCount); } -CDataGatherer* CMetricPopulationModelFactory::makeDataGatherer(const std::string& partitionFieldValue, - core::CStateRestoreTraverser& traverser) const { - return new CDataGatherer(model_t::E_PopulationMetric, - m_SummaryMode, - this->modelParams(), - m_SummaryCountFieldName, - m_PartitionFieldName, - partitionFieldValue, - m_PersonFieldName, - m_AttributeFieldName, - m_ValueFieldName, - m_InfluenceFieldNames, - m_UseNull, - this->searchKey(), - traverser); +CDataGatherer* +CMetricPopulationModelFactory::makeDataGatherer(const std::string& partitionFieldValue, + core::CStateRestoreTraverser& traverser) const { + return new CDataGatherer( + model_t::E_PopulationMetric, m_SummaryMode, this->modelParams(), + m_SummaryCountFieldName, m_PartitionFieldName, partitionFieldValue, + m_PersonFieldName, m_AttributeFieldName, m_ValueFieldName, + m_InfluenceFieldNames, m_UseNull, this->searchKey(), traverser); } -CMetricPopulationModelFactory::TPriorPtr CMetricPopulationModelFactory::defaultPrior(model_t::EFeature feature, - const SModelParams& params) const { +CMetricPopulationModelFactory::TPriorPtr +CMetricPopulationModelFactory::defaultPrior(model_t::EFeature feature, + const SModelParams& params) const { // Categorical data all use the multinomial prior. The creation // of these priors is managed by defaultCategoricalPrior. if (model_t::isCategorical(feature)) { @@ -149,12 +131,15 @@ CMetricPopulationModelFactory::TPriorPtr CMetricPopulationModelFactory::defaultP maths_t::EDataType dataType = this->dataType(); - maths::CGammaRateConjugate gammaPrior = maths::CGammaRateConjugate::nonInformativePrior(dataType, 0.0, params.s_DecayRate); + maths::CGammaRateConjugate gammaPrior = + maths::CGammaRateConjugate::nonInformativePrior(dataType, 0.0, params.s_DecayRate); maths::CLogNormalMeanPrecConjugate logNormalPrior = - maths::CLogNormalMeanPrecConjugate::nonInformativePrior(dataType, 0.0, params.s_DecayRate); + maths::CLogNormalMeanPrecConjugate::nonInformativePrior(dataType, 0.0, + params.s_DecayRate); - maths::CNormalMeanPrecConjugate normalPrior = maths::CNormalMeanPrecConjugate::nonInformativePrior(dataType, params.s_DecayRate); + maths::CNormalMeanPrecConjugate normalPrior = + maths::CNormalMeanPrecConjugate::nonInformativePrior(dataType, params.s_DecayRate); // Create the component priors. TPriorPtrVec priors; @@ -170,14 +155,12 @@ CMetricPopulationModelFactory::TPriorPtr CMetricPopulationModelFactory::defaultP modePriors.emplace_back(logNormalPrior.clone()); modePriors.emplace_back(normalPrior.clone()); maths::COneOfNPrior modePrior(modePriors, dataType, params.s_DecayRate); - maths::CXMeansOnline1d clusterer(dataType, - maths::CAvailableModeDistributions::ALL, - maths_t::E_ClustersFractionWeight, - params.s_DecayRate, - params.s_MinimumModeFraction, - params.s_MinimumModeCount, - params.minimumCategoryCount()); - maths::CMultimodalPrior multimodalPrior(dataType, clusterer, modePrior, params.s_DecayRate); + maths::CXMeansOnline1d clusterer( + dataType, maths::CAvailableModeDistributions::ALL, + maths_t::E_ClustersFractionWeight, params.s_DecayRate, params.s_MinimumModeFraction, + params.s_MinimumModeCount, params.minimumCategoryCount()); + maths::CMultimodalPrior multimodalPrior(dataType, clusterer, modePrior, + params.s_DecayRate); priors.emplace_back(multimodalPrior.clone()); } @@ -185,7 +168,8 @@ CMetricPopulationModelFactory::TPriorPtr CMetricPopulationModelFactory::defaultP } CMetricPopulationModelFactory::TMultivariatePriorPtr -CMetricPopulationModelFactory::defaultMultivariatePrior(model_t::EFeature feature, const SModelParams& params) const { +CMetricPopulationModelFactory::defaultMultivariatePrior(model_t::EFeature feature, + const SModelParams& params) const { std::size_t dimension = model_t::dimension(feature); // Gaussian mixture for modeling (latitude, longitude). @@ -195,7 +179,8 @@ CMetricPopulationModelFactory::defaultMultivariatePrior(model_t::EFeature featur TMultivariatePriorPtrVec priors; priors.reserve(params.s_MinimumModeFraction <= 0.5 ? 2u : 1u); - TMultivariatePriorPtr multivariateNormal = this->multivariateNormalPrior(dimension, params); + TMultivariatePriorPtr multivariateNormal = + this->multivariateNormalPrior(dimension, params); priors.push_back(multivariateNormal); if (params.s_MinimumModeFraction <= 0.5) { priors.push_back(this->multivariateMultimodalPrior(dimension, params, *multivariateNormal)); @@ -205,7 +190,8 @@ CMetricPopulationModelFactory::defaultMultivariatePrior(model_t::EFeature featur } CMetricPopulationModelFactory::TMultivariatePriorPtr -CMetricPopulationModelFactory::defaultCorrelatePrior(model_t::EFeature /*feature*/, const SModelParams& params) const { +CMetricPopulationModelFactory::defaultCorrelatePrior(model_t::EFeature /*feature*/, + const SModelParams& params) const { TMultivariatePriorPtrVec priors; priors.reserve(params.s_MinimumModeFraction <= 0.5 ? 2u : 1u); TMultivariatePriorPtr multivariateNormal = this->multivariateNormalPrior(2, params); @@ -218,15 +204,10 @@ CMetricPopulationModelFactory::defaultCorrelatePrior(model_t::EFeature /*feature const CSearchKey& CMetricPopulationModelFactory::searchKey() const { if (!m_SearchKeyCache) { - m_SearchKeyCache.reset(CSearchKey(m_Identifier, - function_t::function(m_Features), - m_UseNull, - this->modelParams().s_ExcludeFrequent, - m_ValueFieldName, - m_AttributeFieldName, - m_PersonFieldName, - m_PartitionFieldName, - m_InfluenceFieldNames)); + m_SearchKeyCache.reset(CSearchKey( + m_Identifier, function_t::function(m_Features), m_UseNull, + this->modelParams().s_ExcludeFrequent, m_ValueFieldName, m_AttributeFieldName, + m_PersonFieldName, m_PartitionFieldName, m_InfluenceFieldNames)); } return *m_SearchKeyCache; } @@ -275,7 +256,8 @@ void CMetricPopulationModelFactory::bucketResultsDelay(std::size_t bucketResults m_BucketResultsDelay = bucketResultsDelay; } -CMetricPopulationModelFactory::TStrCRefVec CMetricPopulationModelFactory::partitioningFields() const { +CMetricPopulationModelFactory::TStrCRefVec +CMetricPopulationModelFactory::partitioningFields() const { TStrCRefVec result; result.reserve(3); if (!m_PartitionFieldName.empty()) { diff --git a/lib/model/CModelDetailsView.cc b/lib/model/CModelDetailsView.cc index 447e482f73..a1bb7238d5 100644 --- a/lib/model/CModelDetailsView.cc +++ b/lib/model/CModelDetailsView.cc @@ -20,7 +20,8 @@ namespace ml { namespace model { namespace { -const maths_t::TWeightStyleVec WEIGHT_STYLES{maths_t::E_SampleSeasonalVarianceScaleWeight, maths_t::E_SampleCountVarianceScaleWeight}; +const maths_t::TWeightStyleVec WEIGHT_STYLES{maths_t::E_SampleSeasonalVarianceScaleWeight, + maths_t::E_SampleCountVarianceScaleWeight}; const std::string EMPTY_STRING(""); } @@ -42,18 +43,23 @@ const CModelDetailsView::TFeatureVec& CModelDetailsView::features() const { return this->base().dataGatherer().features(); } -void CModelDetailsView::modelPlot(core_t::TTime time, double boundsPercentile, const TStrSet& terms, CModelPlotData& modelPlotData) const { +void CModelDetailsView::modelPlot(core_t::TTime time, + double boundsPercentile, + const TStrSet& terms, + CModelPlotData& modelPlotData) const { for (auto feature : this->features()) { if (!model_t::isConstant(feature) && !model_t::isCategorical(feature)) { if (terms.empty() || !this->hasByField()) { for (std::size_t byFieldId = 0; byFieldId < this->maxByFieldId(); ++byFieldId) { - this->modelPlotForByFieldId(time, boundsPercentile, feature, byFieldId, modelPlotData); + this->modelPlotForByFieldId(time, boundsPercentile, feature, + byFieldId, modelPlotData); } } else { for (const auto& term : terms) { std::size_t byFieldId(0); if (this->byFieldId(term, byFieldId)) { - this->modelPlotForByFieldId(time, boundsPercentile, feature, byFieldId, modelPlotData); + this->modelPlotForByFieldId(time, boundsPercentile, feature, + byFieldId, modelPlotData); } } } @@ -88,7 +94,8 @@ void CModelDetailsView::modelPlotForByFieldId(core_t::TTime time, TDouble2Vec supportLower(support.first); TDouble2Vec supportUpper(support.second); - TDouble2Vec3Vec interval(model->confidenceInterval(time, boundsPercentile, WEIGHT_STYLES, weights)); + TDouble2Vec3Vec interval(model->confidenceInterval(time, boundsPercentile, + WEIGHT_STYLES, weights)); if (interval.size() == 3) { TDouble2Vec lower = maths::CTools::truncate(interval[0], supportLower, supportUpper); @@ -96,7 +103,8 @@ void CModelDetailsView::modelPlotForByFieldId(core_t::TTime time, TDouble2Vec median = maths::CTools::truncate(interval[1], lower, upper); // TODO This data structure should support multivariate features. - modelPlotData.get(feature, this->byFieldValue(byFieldId)) = CModelPlotData::SByFieldData(lower[0], upper[0], median[0]); + modelPlotData.get(feature, this->byFieldValue(byFieldId)) = + CModelPlotData::SByFieldData(lower[0], upper[0], median[0]); } } } @@ -117,7 +125,8 @@ void CModelDetailsView::addCurrentBucketValues(core_t::TTime time, if (this->contains(terms, byFieldValue)) { TDouble1Vec value(this->base().currentBucketValue(feature, pid, cid, time)); if (!value.empty()) { - const std::string& overFieldValue{isPopulation ? this->base().personName(pid) : EMPTY_STRING}; + const std::string& overFieldValue{ + isPopulation ? this->base().personName(pid) : EMPTY_STRING}; modelPlotData.get(feature, byFieldValue).addValue(overFieldValue, value[0]); } } @@ -151,38 +160,48 @@ bool CModelDetailsView::contains(const TStrSet& terms, const std::string& key) { } bool CModelDetailsView::hasByField() const { - return (this->base().isPopulation() ? this->base().dataGatherer().attributeFieldName() : this->base().dataGatherer().personFieldName()) + return (this->base().isPopulation() + ? this->base().dataGatherer().attributeFieldName() + : this->base().dataGatherer().personFieldName()) .empty(); } std::size_t CModelDetailsView::maxByFieldId() const { - return this->base().isPopulation() ? this->base().dataGatherer().numberAttributes() : this->base().dataGatherer().numberPeople(); + return this->base().isPopulation() + ? this->base().dataGatherer().numberAttributes() + : this->base().dataGatherer().numberPeople(); } bool CModelDetailsView::byFieldId(const std::string& byFieldValue, std::size_t& result) const { - return this->base().isPopulation() ? this->base().dataGatherer().attributeId(byFieldValue, result) - : this->base().dataGatherer().personId(byFieldValue, result); + return this->base().isPopulation() + ? this->base().dataGatherer().attributeId(byFieldValue, result) + : this->base().dataGatherer().personId(byFieldValue, result); } const std::string& CModelDetailsView::byFieldValue(std::size_t byFieldId) const { - return this->base().isPopulation() ? this->base().attributeName(byFieldId) : this->base().personName(byFieldId); + return this->base().isPopulation() ? this->base().attributeName(byFieldId) + : this->base().personName(byFieldId); } const std::string& CModelDetailsView::byFieldValue(std::size_t pid, std::size_t cid) const { - return this->base().isPopulation() ? this->base().attributeName(cid) : this->base().personName(pid); + return this->base().isPopulation() ? this->base().attributeName(cid) + : this->base().personName(pid); } bool CModelDetailsView::isByFieldIdActive(std::size_t byFieldId) const { - return this->base().isPopulation() ? this->base().dataGatherer().isAttributeActive(byFieldId) - : this->base().dataGatherer().isPersonActive(byFieldId); + return this->base().isPopulation() + ? this->base().dataGatherer().isAttributeActive(byFieldId) + : this->base().dataGatherer().isPersonActive(byFieldId); } ////////// CEventRateModelDetailsView Implementation ////////// -CEventRateModelDetailsView::CEventRateModelDetailsView(const CEventRateModel& model) : m_Model(&model) { +CEventRateModelDetailsView::CEventRateModelDetailsView(const CEventRateModel& model) + : m_Model(&model) { } -const maths::CModel* CEventRateModelDetailsView::model(model_t::EFeature feature, std::size_t byFieldId) const { +const maths::CModel* CEventRateModelDetailsView::model(model_t::EFeature feature, + std::size_t byFieldId) const { return m_Model->model(feature, byFieldId); } @@ -190,17 +209,20 @@ const CAnomalyDetectorModel& CEventRateModelDetailsView::base() const { return *m_Model; } -double -CEventRateModelDetailsView::countVarianceScale(model_t::EFeature /*feature*/, std::size_t /*byFieldId*/, core_t::TTime /*time*/) const { +double CEventRateModelDetailsView::countVarianceScale(model_t::EFeature /*feature*/, + std::size_t /*byFieldId*/, + core_t::TTime /*time*/) const { return 1.0; } ////////// CEventRatePopulationModelDetailsView Implementation ////////// -CEventRatePopulationModelDetailsView::CEventRatePopulationModelDetailsView(const CEventRatePopulationModel& model) : m_Model(&model) { +CEventRatePopulationModelDetailsView::CEventRatePopulationModelDetailsView(const CEventRatePopulationModel& model) + : m_Model(&model) { } -const maths::CModel* CEventRatePopulationModelDetailsView::model(model_t::EFeature feature, std::size_t byFieldId) const { +const maths::CModel* CEventRatePopulationModelDetailsView::model(model_t::EFeature feature, + std::size_t byFieldId) const { return m_Model->model(feature, byFieldId); } @@ -216,10 +238,12 @@ double CEventRatePopulationModelDetailsView::countVarianceScale(model_t::EFeatur ////////// CMetricModelDetailsView Implementation ////////// -CMetricModelDetailsView::CMetricModelDetailsView(const CMetricModel& model) : m_Model(&model) { +CMetricModelDetailsView::CMetricModelDetailsView(const CMetricModel& model) + : m_Model(&model) { } -const maths::CModel* CMetricModelDetailsView::model(model_t::EFeature feature, std::size_t byFieldId) const { +const maths::CModel* CMetricModelDetailsView::model(model_t::EFeature feature, + std::size_t byFieldId) const { return m_Model->model(feature, byFieldId); } @@ -227,20 +251,26 @@ const CAnomalyDetectorModel& CMetricModelDetailsView::base() const { return *m_Model; } -double CMetricModelDetailsView::countVarianceScale(model_t::EFeature feature, std::size_t byFieldId, core_t::TTime time) const { +double CMetricModelDetailsView::countVarianceScale(model_t::EFeature feature, + std::size_t byFieldId, + core_t::TTime time) const { TOptionalUInt64 count = m_Model->currentBucketCount(byFieldId, time); if (!count) { return 1.0; } - return model_t::varianceScale(feature, m_Model->dataGatherer().effectiveSampleCount(byFieldId), static_cast(*count)); + return model_t::varianceScale(feature, + m_Model->dataGatherer().effectiveSampleCount(byFieldId), + static_cast(*count)); } ////////// CMetricPopulationModelDetailsView Implementation ////////// -CMetricPopulationModelDetailsView::CMetricPopulationModelDetailsView(const CMetricPopulationModel& model) : m_Model(&model) { +CMetricPopulationModelDetailsView::CMetricPopulationModelDetailsView(const CMetricPopulationModel& model) + : m_Model(&model) { } -const maths::CModel* CMetricPopulationModelDetailsView::model(model_t::EFeature feature, std::size_t byFieldId) const { +const maths::CModel* CMetricPopulationModelDetailsView::model(model_t::EFeature feature, + std::size_t byFieldId) const { return m_Model->model(feature, byFieldId); } @@ -248,12 +278,16 @@ const CAnomalyDetectorModel& CMetricPopulationModelDetailsView::base() const { return *m_Model; } -double CMetricPopulationModelDetailsView::countVarianceScale(model_t::EFeature feature, std::size_t byFieldId, core_t::TTime time) const { +double CMetricPopulationModelDetailsView::countVarianceScale(model_t::EFeature feature, + std::size_t byFieldId, + core_t::TTime time) const { TOptionalUInt64 count = m_Model->currentBucketCount(byFieldId, time); if (!count) { return 1.0; } - return model_t::varianceScale(feature, m_Model->dataGatherer().effectiveSampleCount(byFieldId), static_cast(*count)); + return model_t::varianceScale(feature, + m_Model->dataGatherer().effectiveSampleCount(byFieldId), + static_cast(*count)); } } } diff --git a/lib/model/CModelFactory.cc b/lib/model/CModelFactory.cc index 4790dafef8..97018e3831 100644 --- a/lib/model/CModelFactory.cc +++ b/lib/model/CModelFactory.cc @@ -37,13 +37,15 @@ namespace model { const std::string CModelFactory::EMPTY_STRING(""); -CModelFactory::CModelFactory(const SModelParams& params) : m_ModelParams(params) { +CModelFactory::CModelFactory(const SModelParams& params) + : m_ModelParams(params) { } -const CModelFactory::TFeatureMathsModelPtrPrVec& CModelFactory::defaultFeatureModels(const TFeatureVec& features, - core_t::TTime bucketLength, - double minimumSeasonalVarianceScale, - bool modelAnomalies) const { +const CModelFactory::TFeatureMathsModelPtrPrVec& +CModelFactory::defaultFeatureModels(const TFeatureVec& features, + core_t::TTime bucketLength, + double minimumSeasonalVarianceScale, + bool modelAnomalies) const { auto result = m_MathsModelCache.insert({features, TFeatureMathsModelPtrPrVec()}); if (result.second) { result.first->second.reserve(features.size()); @@ -52,16 +54,18 @@ const CModelFactory::TFeatureMathsModelPtrPrVec& CModelFactory::defaultFeatureMo continue; } result.first->second.emplace_back( - feature, this->defaultFeatureModel(feature, bucketLength, minimumSeasonalVarianceScale, modelAnomalies)); + feature, this->defaultFeatureModel(feature, bucketLength, minimumSeasonalVarianceScale, + modelAnomalies)); } } return result.first->second; } -CModelFactory::TMathsModelPtr CModelFactory::defaultFeatureModel(model_t::EFeature feature, - core_t::TTime bucketLength, - double minimumSeasonalVarianceScale, - bool modelAnomalies) const { +CModelFactory::TMathsModelPtr +CModelFactory::defaultFeatureModel(model_t::EFeature feature, + core_t::TTime bucketLength, + double minimumSeasonalVarianceScale, + bool modelAnomalies) const { if (model_t::isCategorical(feature)) { return TMathsModelPtr(); } @@ -79,7 +83,8 @@ CModelFactory::TMathsModelPtr CModelFactory::defaultFeatureModel(model_t::EFeatu bool controlDecayRate{m_ModelParams.s_ControlDecayRate && !model_t::isConstant(feature)}; TDecayRateController2Ary controllers{ - {maths::CDecayRateController{maths::CDecayRateController::E_PredictionBias | maths::CDecayRateController::E_PredictionErrorIncrease, + {maths::CDecayRateController{maths::CDecayRateController::E_PredictionBias | + maths::CDecayRateController::E_PredictionErrorIncrease, dimension}, maths::CDecayRateController{maths::CDecayRateController::E_PredictionBias | maths::CDecayRateController::E_PredictionErrorIncrease | @@ -89,21 +94,23 @@ CModelFactory::TMathsModelPtr CModelFactory::defaultFeatureModel(model_t::EFeatu if (dimension == 1) { TPriorPtr prior{this->defaultPrior(feature)}; - return boost::make_shared(params, - 0, // identifier (unused). - *trend, - *prior, - controlDecayRate ? &controllers : nullptr, - modelAnomalies && !model_t::isConstant(feature)); + return boost::make_shared( + params, + 0, // identifier (unused). + *trend, *prior, controlDecayRate ? &controllers : nullptr, + modelAnomalies && !model_t::isConstant(feature)); } TMultivariatePriorPtr prior{this->defaultMultivariatePrior(feature)}; return boost::make_shared( - params, *trend, *prior, controlDecayRate ? &controllers : nullptr, modelAnomalies && !model_t::isConstant(feature)); + params, *trend, *prior, controlDecayRate ? &controllers : nullptr, + modelAnomalies && !model_t::isConstant(feature)); } -const CModelFactory::TFeatureMultivariatePriorPtrPrVec& CModelFactory::defaultCorrelatePriors(const TFeatureVec& features) const { - auto result = m_CorrelatePriorCache.insert({features, TFeatureMultivariatePriorPtrPrVec()}); +const CModelFactory::TFeatureMultivariatePriorPtrPrVec& +CModelFactory::defaultCorrelatePriors(const TFeatureVec& features) const { + auto result = m_CorrelatePriorCache.insert( + {features, TFeatureMultivariatePriorPtrPrVec()}); if (result.second) { result.first->second.reserve(features.size()); for (auto feature : features) { @@ -116,15 +123,17 @@ const CModelFactory::TFeatureMultivariatePriorPtrPrVec& CModelFactory::defaultCo return result.first->second; } -const CModelFactory::TFeatureCorrelationsPtrPrVec& CModelFactory::defaultCorrelates(const TFeatureVec& features) const { +const CModelFactory::TFeatureCorrelationsPtrPrVec& +CModelFactory::defaultCorrelates(const TFeatureVec& features) const { auto result = m_CorrelationsCache.insert({features, TFeatureCorrelationsPtrPrVec()}); if (result.second) { result.first->second.reserve(features.size()); for (auto feature : features) { if (!model_t::isCategorical(feature) && model_t::dimension(feature) == 1) { - result.first->second.emplace_back(feature, - TCorrelationsPtr(new maths::CTimeSeriesCorrelations( - m_ModelParams.s_MinimumSignificantCorrelation, m_ModelParams.s_DecayRate))); + result.first->second.emplace_back( + feature, TCorrelationsPtr(new maths::CTimeSeriesCorrelations( + m_ModelParams.s_MinimumSignificantCorrelation, + m_ModelParams.s_DecayRate))); } } } @@ -135,45 +144,56 @@ CModelFactory::TPriorPtr CModelFactory::defaultPrior(model_t::EFeature feature) return this->defaultPrior(feature, m_ModelParams); } -CModelFactory::TMultivariatePriorPtr CModelFactory::defaultMultivariatePrior(model_t::EFeature feature) const { +CModelFactory::TMultivariatePriorPtr +CModelFactory::defaultMultivariatePrior(model_t::EFeature feature) const { return this->defaultMultivariatePrior(feature, m_ModelParams); } -CModelFactory::TMultivariatePriorPtr CModelFactory::defaultCorrelatePrior(model_t::EFeature feature) const { +CModelFactory::TMultivariatePriorPtr +CModelFactory::defaultCorrelatePrior(model_t::EFeature feature) const { return this->defaultCorrelatePrior(feature, m_ModelParams); } maths::CMultinomialConjugate CModelFactory::defaultCategoricalPrior() const { - return maths::CMultinomialConjugate::nonInformativePrior(boost::numeric::bounds::highest(), m_ModelParams.s_DecayRate); + return maths::CMultinomialConjugate::nonInformativePrior( + boost::numeric::bounds::highest(), m_ModelParams.s_DecayRate); } -CModelFactory::TDecompositionCPtr CModelFactory::defaultDecomposition(model_t::EFeature feature, core_t::TTime bucketLength) const { +CModelFactory::TDecompositionCPtr +CModelFactory::defaultDecomposition(model_t::EFeature feature, core_t::TTime bucketLength) const { if (model_t::isCategorical(feature)) { return TDecompositionCPtr(); } else if (model_t::isDiurnal(feature) || model_t::isConstant(feature)) { return boost::make_shared(); } - double decayRate = CAnomalyDetectorModelConfig::trendDecayRate(m_ModelParams.s_DecayRate, bucketLength); - return boost::make_shared(decayRate, bucketLength, m_ModelParams.s_ComponentSize); + double decayRate = CAnomalyDetectorModelConfig::trendDecayRate( + m_ModelParams.s_DecayRate, bucketLength); + return boost::make_shared( + decayRate, bucketLength, m_ModelParams.s_ComponentSize); } -const CModelFactory::TFeatureInfluenceCalculatorCPtrPrVec& CModelFactory::defaultInfluenceCalculators(const std::string& influencerName, - const TFeatureVec& features) const { - TFeatureInfluenceCalculatorCPtrPrVec& result = m_InfluenceCalculatorCache[TStrFeatureVecPr(influencerName, features)]; +const CModelFactory::TFeatureInfluenceCalculatorCPtrPrVec& +CModelFactory::defaultInfluenceCalculators(const std::string& influencerName, + const TFeatureVec& features) const { + TFeatureInfluenceCalculatorCPtrPrVec& result = + m_InfluenceCalculatorCache[TStrFeatureVecPr(influencerName, features)]; if (result.empty()) { result.reserve(features.size()); TStrCRefVec partitioningFields = this->partitioningFields(); - std::sort(partitioningFields.begin(), partitioningFields.end(), maths::COrderings::SReferenceLess()); + std::sort(partitioningFields.begin(), partitioningFields.end(), + maths::COrderings::SReferenceLess()); for (auto feature : features) { if (model_t::isCategorical(feature)) { continue; } - if (std::binary_search( - partitioningFields.begin(), partitioningFields.end(), influencerName, maths::COrderings::SReferenceLess())) { - result.emplace_back(feature, boost::make_shared()); + if (std::binary_search(partitioningFields.begin(), + partitioningFields.end(), influencerName, + maths::COrderings::SReferenceLess())) { + result.emplace_back( + feature, boost::make_shared()); } else { result.emplace_back(feature, model_t::influenceCalculator(feature)); } @@ -261,33 +281,36 @@ void CModelFactory::swap(CModelFactory& other) { m_InfluenceCalculatorCache.swap(other.m_InfluenceCalculatorCache); } -CModelFactory::TMultivariatePriorPtr CModelFactory::multivariateNormalPrior(std::size_t dimension, const SModelParams& params) const { - return maths::CMultivariateNormalConjugateFactory::nonInformative(dimension, this->dataType(), params.s_DecayRate); +CModelFactory::TMultivariatePriorPtr +CModelFactory::multivariateNormalPrior(std::size_t dimension, const SModelParams& params) const { + return maths::CMultivariateNormalConjugateFactory::nonInformative( + dimension, this->dataType(), params.s_DecayRate); } -CModelFactory::TMultivariatePriorPtr CModelFactory::multivariateMultimodalPrior(std::size_t dimension, - const SModelParams& params, - const maths::CMultivariatePrior& modePrior) const { - return maths::CMultivariateMultimodalPriorFactory::nonInformative(dimension, - this->dataType(), - params.s_DecayRate, - maths_t::E_ClustersFractionWeight, - params.s_MinimumModeFraction, - params.s_MinimumModeCount, - params.minimumCategoryCount(), - modePrior); +CModelFactory::TMultivariatePriorPtr +CModelFactory::multivariateMultimodalPrior(std::size_t dimension, + const SModelParams& params, + const maths::CMultivariatePrior& modePrior) const { + return maths::CMultivariateMultimodalPriorFactory::nonInformative( + dimension, this->dataType(), params.s_DecayRate, + maths_t::E_ClustersFractionWeight, params.s_MinimumModeFraction, + params.s_MinimumModeCount, params.minimumCategoryCount(), modePrior); } CModelFactory::TMultivariatePriorPtr -CModelFactory::multivariateOneOfNPrior(std::size_t dimension, const SModelParams& params, const TMultivariatePriorPtrVec& models) const { - return maths::CMultivariateOneOfNPriorFactory::nonInformative(dimension, this->dataType(), params.s_DecayRate, models); +CModelFactory::multivariateOneOfNPrior(std::size_t dimension, + const SModelParams& params, + const TMultivariatePriorPtrVec& models) const { + return maths::CMultivariateOneOfNPriorFactory::nonInformative( + dimension, this->dataType(), params.s_DecayRate, models); } CModelFactory::TPriorPtr CModelFactory::timeOfDayPrior(const SModelParams& params) const { using TPriorPtrVec = std::vector; maths_t::EDataType dataType = this->dataType(); - maths::CNormalMeanPrecConjugate normalPrior = maths::CNormalMeanPrecConjugate::nonInformativePrior(dataType, params.s_DecayRate); + maths::CNormalMeanPrecConjugate normalPrior = + maths::CNormalMeanPrecConjugate::nonInformativePrior(dataType, params.s_DecayRate); // Create a multimodal prior with purely normal distributions // - don't bother with long-tail distributions @@ -296,45 +319,49 @@ CModelFactory::TPriorPtr CModelFactory::timeOfDayPrior(const SModelParams& param modePriors.reserve(1u); modePriors.emplace_back(normalPrior.clone()); maths::COneOfNPrior modePrior(modePriors, dataType, params.s_DecayRate); - maths::CXMeansOnline1d clusterer(dataType, - maths::CAvailableModeDistributions::NORMAL, - maths_t::E_ClustersFractionWeight, - params.s_DecayRate, - 0.03, // minimumClusterFraction - 4, // minimumClusterCount - CAnomalyDetectorModelConfig::DEFAULT_CATEGORY_DELETE_FRACTION); + maths::CXMeansOnline1d clusterer( + dataType, maths::CAvailableModeDistributions::NORMAL, + maths_t::E_ClustersFractionWeight, params.s_DecayRate, + 0.03, // minimumClusterFraction + 4, // minimumClusterCount + CAnomalyDetectorModelConfig::DEFAULT_CATEGORY_DELETE_FRACTION); - return boost::make_shared(dataType, clusterer, modePrior, params.s_DecayRate); + return boost::make_shared(dataType, clusterer, modePrior, + params.s_DecayRate); } -CModelFactory::TMultivariatePriorPtr CModelFactory::latLongPrior(const SModelParams& params) const { +CModelFactory::TMultivariatePriorPtr +CModelFactory::latLongPrior(const SModelParams& params) const { maths_t::EDataType dataType = this->dataType(); - TMultivariatePriorPtr modePrior = maths::CMultivariateNormalConjugateFactory::nonInformative(2, dataType, params.s_DecayRate); - return maths::CMultivariateMultimodalPriorFactory::nonInformative(2, // dimension - dataType, - params.s_DecayRate, - maths_t::E_ClustersFractionWeight, - 0.03, // minimumClusterFraction - 4, // minimumClusterCount - CAnomalyDetectorModelConfig::DEFAULT_CATEGORY_DELETE_FRACTION, - *modePrior); + TMultivariatePriorPtr modePrior = maths::CMultivariateNormalConjugateFactory::nonInformative( + 2, dataType, params.s_DecayRate); + return maths::CMultivariateMultimodalPriorFactory::nonInformative( + 2, // dimension + dataType, params.s_DecayRate, maths_t::E_ClustersFractionWeight, + 0.03, // minimumClusterFraction + 4, // minimumClusterCount + CAnomalyDetectorModelConfig::DEFAULT_CATEGORY_DELETE_FRACTION, *modePrior); } const SModelParams& CModelFactory::modelParams() const { return m_ModelParams; } -CModelFactory::SModelInitializationData::SModelInitializationData(const TDataGathererPtr& dataGatherer) : s_DataGatherer(dataGatherer) { +CModelFactory::SModelInitializationData::SModelInitializationData(const TDataGathererPtr& dataGatherer) + : s_DataGatherer(dataGatherer) { } -CModelFactory::SGathererInitializationData::SGathererInitializationData(core_t::TTime startTime, - const std::string& partitionFieldValue, - unsigned int sampleOverrideCount) - : s_StartTime(startTime), s_PartitionFieldValue(partitionFieldValue), s_SampleOverrideCount(sampleOverrideCount) { +CModelFactory::SGathererInitializationData::SGathererInitializationData( + core_t::TTime startTime, + const std::string& partitionFieldValue, + unsigned int sampleOverrideCount) + : s_StartTime(startTime), s_PartitionFieldValue(partitionFieldValue), + s_SampleOverrideCount(sampleOverrideCount) { } CModelFactory::SGathererInitializationData::SGathererInitializationData(core_t::TTime startTime) - : s_StartTime(startTime), s_PartitionFieldValue(EMPTY_STRING), s_SampleOverrideCount(0u) { + : s_StartTime(startTime), s_PartitionFieldValue(EMPTY_STRING), + s_SampleOverrideCount(0u) { } } } diff --git a/lib/model/CModelParams.cc b/lib/model/CModelParams.cc index e11b0c8b1e..bad077c2da 100644 --- a/lib/model/CModelParams.cc +++ b/lib/model/CModelParams.cc @@ -27,19 +27,17 @@ const core_t::TTime SAMPLING_AGE_CUTOFF_DEFAULT(2 * core::constants::DAY); SModelParams::SModelParams(core_t::TTime bucketLength) : s_BucketLength(bucketLength), - s_MultivariateComponentDelimiter(CAnomalyDetectorModelConfig::DEFAULT_MULTIVARIATE_COMPONENT_DELIMITER), - s_LearnRate(1.0), - s_DecayRate(0.0), + s_MultivariateComponentDelimiter( + CAnomalyDetectorModelConfig::DEFAULT_MULTIVARIATE_COMPONENT_DELIMITER), + s_LearnRate(1.0), s_DecayRate(0.0), s_InitialDecayRateMultiplier(CAnomalyDetectorModelConfig::DEFAULT_INITIAL_DECAY_RATE_MULTIPLIER), - s_ControlDecayRate(true), - s_MinimumModeFraction(0.0), + s_ControlDecayRate(true), s_MinimumModeFraction(0.0), s_MinimumModeCount(CAnomalyDetectorModelConfig::DEFAULT_MINIMUM_CLUSTER_SPLIT_COUNT), s_CutoffToModelEmptyBuckets(CAnomalyDetectorModelConfig::DEFAULT_CUTOFF_TO_MODEL_EMPTY_BUCKETS), s_ComponentSize(CAnomalyDetectorModelConfig::DEFAULT_COMPONENT_SIZE), s_MinimumTimeToDetectChange(CAnomalyDetectorModelConfig::DEFAULT_MINIMUM_TIME_TO_DETECT_CHANGE), s_MaximumTimeToTestForChange(CAnomalyDetectorModelConfig::DEFAULT_MAXIMUM_TIME_TO_TEST_FOR_CHANGE), - s_ExcludeFrequent(model_t::E_XF_None), - s_ExcludePersonFrequency(0.1), + s_ExcludeFrequent(model_t::E_XF_None), s_ExcludePersonFrequency(0.1), s_ExcludeAttributeFrequency(0.1), s_MaximumUpdatesPerBucket(CAnomalyDetectorModelConfig::DEFAULT_MAXIMUM_UPDATES_PER_BUCKET), s_InfluenceCutoff(CAnomalyDetectorModelConfig::DEFAULT_INFLUENCE_CUTOFF), @@ -50,13 +48,11 @@ SModelParams::SModelParams(core_t::TTime bucketLength) s_PruneWindowScaleMaximum(CAnomalyDetectorModelConfig::DEFAULT_PRUNE_WINDOW_SCALE_MAXIMUM), s_CorrelationModelsOverhead(CAnomalyDetectorModelConfig::DEFAULT_CORRELATION_MODELS_OVERHEAD), s_MultivariateByFields(false), - s_MinimumSignificantCorrelation(CAnomalyDetectorModelConfig::DEFAULT_MINIMUM_SIGNIFICANT_CORRELATION), - s_DetectionRules(EMPTY_RULES), - s_ScheduledEvents(EMPTY_SCHEDULED_EVENTS), - s_BucketResultsDelay(0), - s_MinimumToDeduplicate(10000), - s_CacheProbabilities(true), - s_SamplingAgeCutoff(SAMPLING_AGE_CUTOFF_DEFAULT) { + s_MinimumSignificantCorrelation( + CAnomalyDetectorModelConfig::DEFAULT_MINIMUM_SIGNIFICANT_CORRELATION), + s_DetectionRules(EMPTY_RULES), s_ScheduledEvents(EMPTY_SCHEDULED_EVENTS), + s_BucketResultsDelay(0), s_MinimumToDeduplicate(10000), + s_CacheProbabilities(true), s_SamplingAgeCutoff(SAMPLING_AGE_CUTOFF_DEFAULT) { } void SModelParams::configureLatency(core_t::TTime latency, core_t::TTime bucketLength) { @@ -74,13 +70,17 @@ double SModelParams::minimumCategoryCount() const { return s_LearnRate * CAnomalyDetectorModelConfig::DEFAULT_CATEGORY_DELETE_FRACTION; } -maths::STimeSeriesDecompositionRestoreParams SModelParams::decompositionRestoreParams(maths_t::EDataType dataType) const { +maths::STimeSeriesDecompositionRestoreParams +SModelParams::decompositionRestoreParams(maths_t::EDataType dataType) const { double decayRate{CAnomalyDetectorModelConfig::trendDecayRate(s_DecayRate, s_BucketLength)}; - return {decayRate, s_BucketLength, s_ComponentSize, this->distributionRestoreParams(dataType)}; + return {decayRate, s_BucketLength, s_ComponentSize, + this->distributionRestoreParams(dataType)}; } -maths::SDistributionRestoreParams SModelParams::distributionRestoreParams(maths_t::EDataType dataType) const { - return {dataType, s_DecayRate, s_MinimumModeFraction, s_MinimumModeCount, this->minimumCategoryCount()}; +maths::SDistributionRestoreParams +SModelParams::distributionRestoreParams(maths_t::EDataType dataType) const { + return {dataType, s_DecayRate, s_MinimumModeFraction, s_MinimumModeCount, + this->minimumCategoryCount()}; } uint64_t SModelParams::checksum(uint64_t seed) const { diff --git a/lib/model/CModelPlotData.cc b/lib/model/CModelPlotData.cc index bba33315bd..277652d603 100644 --- a/lib/model/CModelPlotData.cc +++ b/lib/model/CModelPlotData.cc @@ -38,20 +38,19 @@ CModelPlotData::CModelPlotData(core_t::TTime time, const std::string& byFieldName, core_t::TTime bucketSpan, int detectorIndex) - : m_Time(time), - m_PartitionFieldName(partitionFieldName), + : m_Time(time), m_PartitionFieldName(partitionFieldName), m_PartitionFieldValue(partitionFieldValue), - m_OverFieldName(overFieldName), - m_ByFieldName(byFieldName), - m_BucketSpan(bucketSpan), - m_DetectorIndex(detectorIndex) { + m_OverFieldName(overFieldName), m_ByFieldName(byFieldName), + m_BucketSpan(bucketSpan), m_DetectorIndex(detectorIndex) { } -CModelPlotData::SByFieldData::SByFieldData() : s_LowerBound(0.0), s_UpperBound(0.0), s_Median(0.0), s_ValuesPerOverField() { +CModelPlotData::SByFieldData::SByFieldData() + : s_LowerBound(0.0), s_UpperBound(0.0), s_Median(0.0), s_ValuesPerOverField() { } CModelPlotData::SByFieldData::SByFieldData(double lowerBound, double upperBound, double median) - : s_LowerBound(lowerBound), s_UpperBound(upperBound), s_Median(median), s_ValuesPerOverField() { + : s_LowerBound(lowerBound), s_UpperBound(upperBound), s_Median(median), + s_ValuesPerOverField() { } void CModelPlotData::SByFieldData::acceptPersistInserter(core::CStatePersistInserter& inserter) const { @@ -77,7 +76,8 @@ bool CModelPlotData::SByFieldData::acceptRestoreTraverser(core::CStateRestoreTra return false; } } else if (name == VALUES_PER_OVERFIELD_TAG) { - if (!core::CPersistUtils::restore(VALUES_PER_OVERFIELD_TAG, s_ValuesPerOverField, traverser)) { + if (!core::CPersistUtils::restore(VALUES_PER_OVERFIELD_TAG, + s_ValuesPerOverField, traverser)) { return false; } } @@ -106,19 +106,23 @@ bool CModelPlotData::acceptRestoreTraverser(core::CStateRestoreTraverser& traver } m_DataPerFeature.clear(); - for (TIntStrByFieldDataUMapUMap::const_iterator i = data.begin(); i != data.end(); ++i) { - m_DataPerFeature.insert(TFeatureStrByFieldDataUMapPr(model_t::EFeature(i->first), i->second)); + for (TIntStrByFieldDataUMapUMap::const_iterator i = data.begin(); + i != data.end(); ++i) { + m_DataPerFeature.insert(TFeatureStrByFieldDataUMapPr( + model_t::EFeature(i->first), i->second)); } } else if (name == TIME_TAG) { if (!core::CPersistUtils::restore(TIME_TAG, m_Time, traverser)) { return false; } } else if (name == PARTITION_FIELD_NAME_TAG) { - if (!core::CPersistUtils::restore(PARTITION_FIELD_NAME_TAG, m_PartitionFieldName, traverser)) { + if (!core::CPersistUtils::restore(PARTITION_FIELD_NAME_TAG, + m_PartitionFieldName, traverser)) { return false; } } else if (name == PARTITION_FIELD_VALUE_TAG) { - if (!core::CPersistUtils::restore(PARTITION_FIELD_VALUE_TAG, m_PartitionFieldValue, traverser)) { + if (!core::CPersistUtils::restore(PARTITION_FIELD_VALUE_TAG, + m_PartitionFieldValue, traverser)) { return false; } } else if (name == OVER_FIELD_NAME_TAG) { @@ -175,7 +179,8 @@ CModelPlotData::TFeatureStrByFieldDataUMapUMapCItr CModelPlotData::end() const { return m_DataPerFeature.end(); } -CModelPlotData::SByFieldData& CModelPlotData::get(const model_t::EFeature& feature, const std::string& byFieldValue) { +CModelPlotData::SByFieldData& CModelPlotData::get(const model_t::EFeature& feature, + const std::string& byFieldValue) { // note: This creates/inserts! elements and returns a reference for writing // data insert happens here return m_DataPerFeature[feature][byFieldValue]; diff --git a/lib/model/CModelTools.cc b/lib/model/CModelTools.cc index f6aed87618..e4525826e7 100644 --- a/lib/model/CModelTools.cc +++ b/lib/model/CModelTools.cc @@ -31,10 +31,14 @@ using TMinAccumulator = maths::CBasicStatistics::COrderStatisticsStack { - void operator()(double probability, double weight, maths::CJointProbabilityOfLessLikelySamples& aggregator) const { + void operator()(double probability, + double weight, + maths::CJointProbabilityOfLessLikelySamples& aggregator) const { aggregator.add(probability, weight); } - void operator()(double probability, double /*weight*/, maths::CProbabilityOfExtremeSample& aggregator) const { + void operator()(double probability, + double /*weight*/, + maths::CProbabilityOfExtremeSample& aggregator) const { aggregator.add(probability); } }; @@ -110,13 +114,16 @@ void CModelTools::CFuzzyDeduplicate::add(TDouble2Vec value) { ++m_Count; if (m_RandomSample.size() < 100) { m_RandomSample.push_back(std::move(value)); - } else if (maths::CSampling::uniformSample(m_Rng, 0.0, 1.0) < 100.0 / static_cast(m_Count)) { - std::size_t evict{maths::CSampling::uniformSample(m_Rng, 0, m_RandomSample.size())}; + } else if (maths::CSampling::uniformSample(m_Rng, 0.0, 1.0) < + 100.0 / static_cast(m_Count)) { + std::size_t evict{ + maths::CSampling::uniformSample(m_Rng, 0, m_RandomSample.size())}; m_RandomSample[evict].swap(value); } } -void CModelTools::CFuzzyDeduplicate::computeEpsilons(core_t::TTime bucketLength, std::size_t desiredNumberSamples) { +void CModelTools::CFuzzyDeduplicate::computeEpsilons(core_t::TTime bucketLength, + std::size_t desiredNumberSamples) { m_Quantize = m_Count > 0; if (m_Quantize) { m_QuantizedValues.reserve(std::min(m_Count, desiredNumberSamples)); @@ -131,8 +138,10 @@ void CModelTools::CFuzzyDeduplicate::computeEpsilons(core_t::TTime bucketLength, std::size_t p10{values.size() / 10}; std::size_t p90{(9 * values.size()) / 10}; std::nth_element(values.begin(), values.begin() + p10, values.end()); - std::nth_element(values.begin() + p10 + 1, values.begin() + p90, values.end()); - m_ValueEps[i] = (values[p90] - values[p10]) / static_cast(desiredNumberSamples); + std::nth_element(values.begin() + p10 + 1, values.begin() + p90, + values.end()); + m_ValueEps[i] = (values[p90] - values[p10]) / + static_cast(desiredNumberSamples); } } m_Count = 0; @@ -140,17 +149,21 @@ void CModelTools::CFuzzyDeduplicate::computeEpsilons(core_t::TTime bucketLength, } std::size_t CModelTools::CFuzzyDeduplicate::duplicate(core_t::TTime time, TDouble2Vec value) { - return !m_Quantize ? m_Count++ - : m_QuantizedValues - .emplace(boost::unordered::piecewise_construct, - std::forward_as_tuple(this->quantize(time), this->quantize(value)), - std::forward_as_tuple(m_QuantizedValues.size())) - .first->second; + return !m_Quantize + ? m_Count++ + : m_QuantizedValues + .emplace(boost::unordered::piecewise_construct, + std::forward_as_tuple(this->quantize(time), + this->quantize(value)), + std::forward_as_tuple(m_QuantizedValues.size())) + .first->second; } CModelTools::TDouble2Vec CModelTools::CFuzzyDeduplicate::quantize(TDouble2Vec value) const { for (std::size_t i = 0u; i < value.size(); ++i) { - value[i] = m_ValueEps[i] > 0.0 ? m_ValueEps[i] * std::floor(value[i] / m_ValueEps[i]) : value[i]; + value[i] = m_ValueEps[i] > 0.0 + ? m_ValueEps[i] * std::floor(value[i] / m_ValueEps[i]) + : value[i]; } return value; } @@ -159,14 +172,17 @@ core_t::TTime CModelTools::CFuzzyDeduplicate::quantize(core_t::TTime time) const return maths::CIntegerTools::floor(time, m_TimeEps); } -std::size_t CModelTools::CFuzzyDeduplicate::SDuplicateValueHash::operator()(const TTimeDouble2VecPr& value) const { - return static_cast( - std::accumulate(value.second.begin(), value.second.end(), static_cast(value.first), [](uint64_t seed, double v) { +std::size_t CModelTools::CFuzzyDeduplicate::SDuplicateValueHash:: +operator()(const TTimeDouble2VecPr& value) const { + return static_cast(std::accumulate( + value.second.begin(), value.second.end(), + static_cast(value.first), [](uint64_t seed, double v) { return core::CHashing::hashCombine(seed, static_cast(v)); })); } -CModelTools::CProbabilityAggregator::CProbabilityAggregator(EStyle style) : m_Style(style), m_TotalWeight(0.0) { +CModelTools::CProbabilityAggregator::CProbabilityAggregator(EStyle style) + : m_Style(style), m_TotalWeight(0.0) { } bool CModelTools::CProbabilityAggregator::empty() const { @@ -190,7 +206,8 @@ void CModelTools::CProbabilityAggregator::add(const TAggregator& aggregator, dou void CModelTools::CProbabilityAggregator::add(double probability, double weight) { m_TotalWeight += weight; for (auto& aggregator : m_Aggregators) { - boost::apply_visitor(boost::bind(SAddProbability(), probability, weight, _1), aggregator.first); + boost::apply_visitor(boost::bind(SAddProbability(), probability, weight, _1), + aggregator.first); } } @@ -216,7 +233,10 @@ bool CModelTools::CProbabilityAggregator::calculate(double& result) const { n += aggregator.second; } for (const auto& aggregator : m_Aggregators) { - if (!boost::apply_visitor(boost::bind(SReadProbability(), aggregator.second / n, boost::ref(p), _1), aggregator.first)) { + if (!boost::apply_visitor(boost::bind(SReadProbability(), + aggregator.second / n, + boost::ref(p), _1), + aggregator.first)) { return false; } } @@ -225,7 +245,9 @@ bool CModelTools::CProbabilityAggregator::calculate(double& result) const { case E_Min: { TMinAccumulator p_; for (const auto& aggregator : m_Aggregators) { - if (!boost::apply_visitor(boost::bind(SReadProbability(), boost::ref(p_), _1), aggregator.first)) { + if (!boost::apply_visitor( + boost::bind(SReadProbability(), boost::ref(p_), _1), + aggregator.first)) { return false; } } @@ -244,7 +266,8 @@ bool CModelTools::CProbabilityAggregator::calculate(double& result) const { return true; } -CModelTools::CCategoryProbabilityCache::CCategoryProbabilityCache() : m_Prior(nullptr), m_SmallestProbability(1.0) { +CModelTools::CCategoryProbabilityCache::CCategoryProbabilityCache() + : m_Prior(nullptr), m_SmallestProbability(1.0) { } CModelTools::CCategoryProbabilityCache::CCategoryProbabilityCache(const maths::CMultinomialConjugate& prior) @@ -272,7 +295,10 @@ bool CModelTools::CCategoryProbabilityCache::lookup(std::size_t attribute, doubl } std::size_t index; - result = (!m_Prior->index(static_cast(attribute), index) || index >= m_Cache.size()) ? m_SmallestProbability : m_Cache[index]; + result = (!m_Prior->index(static_cast(attribute), index) || + index >= m_Cache.size()) + ? m_SmallestProbability + : m_Cache[index]; return true; } @@ -292,19 +318,23 @@ std::size_t CModelTools::CCategoryProbabilityCache::memoryUsage() const { return mem; } -CModelTools::CProbabilityCache::CProbabilityCache(double maximumError) : m_MaximumError(maximumError) { +CModelTools::CProbabilityCache::CProbabilityCache(double maximumError) + : m_MaximumError(maximumError) { } void CModelTools::CProbabilityCache::clear() { m_Caches.clear(); } -void CModelTools::CProbabilityCache::addModes(model_t::EFeature feature, std::size_t id, const maths::CModel& model) { +void CModelTools::CProbabilityCache::addModes(model_t::EFeature feature, + std::size_t id, + const maths::CModel& model) { if (model_t::dimension(feature) == 1) { TDouble1Vec& modes{m_Caches[{feature, id}].s_Modes}; if (modes.empty()) { TDouble2Vec1Vec modes_( - model.residualModes(maths::CConstantWeights::COUNT_VARIANCE, maths::CConstantWeights::unit(1))); + model.residualModes(maths::CConstantWeights::COUNT_VARIANCE, + maths::CConstantWeights::unit(1))); for (const auto& mode : modes_) { modes.push_back(mode[0]); } @@ -321,7 +351,8 @@ void CModelTools::CProbabilityCache::addProbability(model_t::EFeature feature, bool conditional, const TSize1Vec& mostAnomalousCorrelate) { if (m_MaximumError > 0.0 && value.size() == 1 && value[0].size() == 1) { - m_Caches[{feature, id}].s_Probabilities.emplace(value[0][0], SProbability{probability, tail, conditional, mostAnomalousCorrelate}); + m_Caches[{feature, id}].s_Probabilities.emplace( + value[0][0], SProbability{probability, tail, conditional, mostAnomalousCorrelate}); } } @@ -358,28 +389,35 @@ bool CModelTools::CProbabilityCache::lookup(model_t::EFeature feature, conditional = right->second.s_Conditional; mostAnomalousCorrelate = right->second.s_MostAnomalousCorrelate; return true; - } else if (right != probabilities.end() && right + 1 != probabilities.end() && right != probabilities.begin() && - right - 1 != probabilities.begin() && right - 2 != probabilities.begin()) { + } else if (right != probabilities.end() && + right + 1 != probabilities.end() && + right != probabilities.begin() && + right - 1 != probabilities.begin() && + right - 2 != probabilities.begin()) { auto left = right - 1; - double v[]{(left - 1)->first, left->first, right->first, (right + 1)->first}; + double v[]{(left - 1)->first, left->first, right->first, + (right + 1)->first}; auto beginModes = std::lower_bound(modes.begin(), modes.end(), v[0]); auto endModes = std::lower_bound(modes.begin(), modes.end(), v[3]); LOG_TRACE(<< "v = " << core::CContainerPrinter::print(v)); - if (beginModes == endModes && left->second.s_Tail == right->second.s_Tail) { + if (beginModes == endModes && + left->second.s_Tail == right->second.s_Tail) { double p[]{(left - 1)->second.s_Probability, - (left)->second.s_Probability, - (right)->second.s_Probability, + (left)->second.s_Probability, (right)->second.s_Probability, (right + 1)->second.s_Probability}; LOG_TRACE(<< "p(v) = " << core::CContainerPrinter::print(p)); - if (std::is_sorted(p, p + 4, std::less()) || std::is_sorted(p, p + 4, std::greater())) { + if (std::is_sorted(p, p + 4, std::less()) || + std::is_sorted(p, p + 4, std::greater())) { auto nearest = x - v[1] < v[2] - x ? left : right; - probability = (p[2] * (x - v[1]) + p[1] * (v[2] - x)) / (v[2] - v[1]); + probability = (p[2] * (x - v[1]) + p[1] * (v[2] - x)) / + (v[2] - v[1]); tail = nearest->second.s_Tail; conditional = nearest->second.s_Conditional; mostAnomalousCorrelate = nearest->second.s_MostAnomalousCorrelate; - return std::fabs(p[2] - p[1]) <= m_MaximumError * std::min(p[1], p[2]); + return std::fabs(p[2] - p[1]) <= + m_MaximumError * std::min(p[1], p[2]); } } } diff --git a/lib/model/CPartitioningFields.cc b/lib/model/CPartitioningFields.cc index 7c552f7fd9..089b4f2a1a 100644 --- a/lib/model/CPartitioningFields.cc +++ b/lib/model/CPartitioningFields.cc @@ -9,7 +9,8 @@ namespace ml { namespace model { -CPartitioningFields::CPartitioningFields(const std::string& partitionFieldName, const std::string& partitionFieldValue) { +CPartitioningFields::CPartitioningFields(const std::string& partitionFieldName, + const std::string& partitionFieldValue) { m_PartitioningFields.reserve(3); this->add(partitionFieldName, partitionFieldValue); } @@ -22,7 +23,8 @@ std::size_t CPartitioningFields::size() const { return m_PartitioningFields.size(); } -const CPartitioningFields::TStrCRefStrCRefPr& CPartitioningFields::operator[](std::size_t i) const { +const CPartitioningFields::TStrCRefStrCRefPr& CPartitioningFields:: +operator[](std::size_t i) const { return m_PartitioningFields[i]; } diff --git a/lib/model/CPopulationModel.cc b/lib/model/CPopulationModel.cc index 2b162433bd..3474594747 100644 --- a/lib/model/CPopulationModel.cc +++ b/lib/model/CPopulationModel.cc @@ -65,7 +65,10 @@ const std::string& name(EEntity entity, const CDataGatherer& gatherer, std::size //! Update \p hashes with the hash of the active entities in \p values. template -void hashActive(EEntity entity, const CDataGatherer& gatherer, const std::vector& values, TStrCRefUInt64Map& hashes) { +void hashActive(EEntity entity, + const CDataGatherer& gatherer, + const std::vector& values, + TStrCRefUInt64Map& hashes) { for (std::size_t id = 0u; id < values.size(); ++id) { if (isActive(entity, gatherer, id)) { uint64_t& hash = hashes[boost::cref(name(entity, gatherer, id))]; @@ -110,11 +113,13 @@ const std::string INTERIM_BUCKET_CORRECTOR_TAG("i"); CPopulationModel::CPopulationModel(const SModelParams& params, const TDataGathererPtr& dataGatherer, const TFeatureInfluenceCalculatorCPtrPrVecVec& influenceCalculators) - : CAnomalyDetectorModel(params, dataGatherer, influenceCalculators), m_NewDistinctPersonCounts(BJKST_HASHES, BJKST_MAX_SIZE) { + : CAnomalyDetectorModel(params, dataGatherer, influenceCalculators), + m_NewDistinctPersonCounts(BJKST_HASHES, BJKST_MAX_SIZE) { const model_t::TFeatureVec& features = dataGatherer->features(); for (std::size_t i = 0u; i < features.size(); ++i) { if (!model_t::isCategorical(features[i]) && !model_t::isConstant(features[i])) { - m_NewPersonBucketCounts.reset(maths::CCountMinSketch(COUNT_MIN_SKETCH_ROWS, COUNT_MIN_SKETCH_COLUMNS)); + m_NewPersonBucketCounts.reset(maths::CCountMinSketch( + COUNT_MIN_SKETCH_ROWS, COUNT_MIN_SKETCH_COLUMNS)); break; } } @@ -137,15 +142,18 @@ bool CPopulationModel::isPopulation() const { return true; } -CPopulationModel::TOptionalUInt64 CPopulationModel::currentBucketCount(std::size_t pid, core_t::TTime time) const { +CPopulationModel::TOptionalUInt64 +CPopulationModel::currentBucketCount(std::size_t pid, core_t::TTime time) const { if (!this->bucketStatsAvailable(time)) { LOG_ERROR(<< "No statistics at " << time); return TOptionalUInt64(); } const TSizeUInt64PrVec& personCounts = this->personCounts(); - auto i = std::lower_bound(personCounts.begin(), personCounts.end(), pid, maths::COrderings::SFirstLess()); - return (i != personCounts.end() && i->first == pid) ? TOptionalUInt64(i->second) : TOptionalUInt64(); + auto i = std::lower_bound(personCounts.begin(), personCounts.end(), pid, + maths::COrderings::SFirstLess()); + return (i != personCounts.end() && i->first == pid) ? TOptionalUInt64(i->second) + : TOptionalUInt64(); } CPopulationModel::TOptionalDouble CPopulationModel::baselineBucketCount(std::size_t /*pid*/) const { @@ -166,20 +174,25 @@ void CPopulationModel::currentBucketPersonIds(core_t::TTime time, TSizeVec& resu } } -void CPopulationModel::sampleOutOfPhase(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) { +void CPopulationModel::sampleOutOfPhase(core_t::TTime startTime, + core_t::TTime endTime, + CResourceMonitor& resourceMonitor) { CDataGatherer& gatherer = this->dataGatherer(); if (!gatherer.dataAvailable(startTime)) { return; } - for (core_t::TTime time = startTime, bucketLength = gatherer.bucketLength(); time < endTime; time += bucketLength) { + for (core_t::TTime time = startTime, bucketLength = gatherer.bucketLength(); + time < endTime; time += bucketLength) { gatherer.sampleNow(time); this->sampleBucketStatistics(time, time + bucketLength, resourceMonitor); } } -void CPopulationModel::sample(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) { +void CPopulationModel::sample(core_t::TTime startTime, + core_t::TTime endTime, + CResourceMonitor& resourceMonitor) { this->CAnomalyDetectorModel::sample(startTime, endTime, resourceMonitor); const CDataGatherer& gatherer = this->dataGatherer(); @@ -223,12 +236,16 @@ void CPopulationModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) mem->setName("CPopulationModel"); this->CAnomalyDetectorModel::debugMemoryUsage(mem->addChild()); core::CMemoryDebug::dynamicSize("m_PersonLastBucketTimes", m_PersonLastBucketTimes, mem); - core::CMemoryDebug::dynamicSize("m_AttributeFirstBucketTimes", m_AttributeFirstBucketTimes, mem); - core::CMemoryDebug::dynamicSize("m_AttributeLastBucketTimes", m_AttributeLastBucketTimes, mem); - core::CMemoryDebug::dynamicSize("m_NewDistinctPersonCounts", m_NewDistinctPersonCounts, mem); + core::CMemoryDebug::dynamicSize("m_AttributeFirstBucketTimes", + m_AttributeFirstBucketTimes, mem); + core::CMemoryDebug::dynamicSize("m_AttributeLastBucketTimes", + m_AttributeLastBucketTimes, mem); + core::CMemoryDebug::dynamicSize("m_NewDistinctPersonCounts", + m_NewDistinctPersonCounts, mem); core::CMemoryDebug::dynamicSize("m_DistinctPersonCounts", m_DistinctPersonCounts, mem); core::CMemoryDebug::dynamicSize("m_NewPersonBucketCounts", m_NewPersonBucketCounts, mem); - core::CMemoryDebug::dynamicSize("m_PersonAttributeBucketCounts", m_PersonAttributeBucketCounts, mem); + core::CMemoryDebug::dynamicSize("m_PersonAttributeBucketCounts", + m_PersonAttributeBucketCounts, mem); } std::size_t CPopulationModel::memoryUsage() const { @@ -245,18 +262,22 @@ std::size_t CPopulationModel::memoryUsage() const { double CPopulationModel::attributeFrequency(std::size_t cid) const { std::size_t active = this->dataGatherer().numberActivePeople(); - return active == 0 ? 0.5 : static_cast(m_DistinctPersonCounts[cid].number()) / static_cast(active); + return active == 0 ? 0.5 + : static_cast(m_DistinctPersonCounts[cid].number()) / + static_cast(active); } double CPopulationModel::sampleRateWeight(std::size_t pid, std::size_t cid) const { - if (cid >= m_PersonAttributeBucketCounts.size() || cid >= m_DistinctPersonCounts.size()) { + if (cid >= m_PersonAttributeBucketCounts.size() || + cid >= m_DistinctPersonCounts.size()) { return 1.0; } const maths::CCountMinSketch& counts = m_PersonAttributeBucketCounts[cid]; const maths::CBjkstUniqueValues& distinctPeople = m_DistinctPersonCounts[cid]; - double personCount = counts.count(static_cast(pid)) - counts.oneMinusDeltaError(); + double personCount = counts.count(static_cast(pid)) - + counts.oneMinusDeltaError(); if (personCount <= 0.0) { return 1.0; } @@ -264,7 +285,8 @@ double CPopulationModel::sampleRateWeight(std::size_t pid, std::size_t cid) cons double totalCount = counts.totalCount(); double distinctPeopleCount = - std::min(static_cast(distinctPeople.number()), static_cast(this->dataGatherer().numberActivePeople())); + std::min(static_cast(distinctPeople.number()), + static_cast(this->dataGatherer().numberActivePeople())); double meanPersonCount = totalCount / distinctPeopleCount; LOG_TRACE(<< "meanPersonCount = " << meanPersonCount); @@ -272,18 +294,25 @@ double CPopulationModel::sampleRateWeight(std::size_t pid, std::size_t cid) cons } void CPopulationModel::doAcceptPersistInserter(core::CStatePersistInserter& inserter) const { - inserter.insertValue(WINDOW_BUCKET_COUNT_TAG, this->windowBucketCount(), core::CIEEE754::E_SinglePrecision); - core::CPersistUtils::persist(PERSON_BUCKET_COUNT_TAG, this->personBucketCounts(), inserter); - core::CPersistUtils::persist(PERSON_LAST_BUCKET_TIME_TAG, m_PersonLastBucketTimes, inserter); - core::CPersistUtils::persist(ATTRIBUTE_FIRST_BUCKET_TIME_TAG, m_AttributeFirstBucketTimes, inserter); - core::CPersistUtils::persist(ATTRIBUTE_LAST_BUCKET_TIME_TAG, m_AttributeLastBucketTimes, inserter); + inserter.insertValue(WINDOW_BUCKET_COUNT_TAG, this->windowBucketCount(), + core::CIEEE754::E_SinglePrecision); + core::CPersistUtils::persist(PERSON_BUCKET_COUNT_TAG, + this->personBucketCounts(), inserter); + core::CPersistUtils::persist(PERSON_LAST_BUCKET_TIME_TAG, + m_PersonLastBucketTimes, inserter); + core::CPersistUtils::persist(ATTRIBUTE_FIRST_BUCKET_TIME_TAG, + m_AttributeFirstBucketTimes, inserter); + core::CPersistUtils::persist(ATTRIBUTE_LAST_BUCKET_TIME_TAG, + m_AttributeLastBucketTimes, inserter); for (std::size_t cid = 0; cid < m_PersonAttributeBucketCounts.size(); ++cid) { inserter.insertLevel(PERSON_ATTRIBUTE_BUCKET_COUNT_TAG, - boost::bind(&maths::CCountMinSketch::acceptPersistInserter, &m_PersonAttributeBucketCounts[cid], _1)); + boost::bind(&maths::CCountMinSketch::acceptPersistInserter, + &m_PersonAttributeBucketCounts[cid], _1)); } for (std::size_t cid = 0; cid < m_DistinctPersonCounts.size(); ++cid) { inserter.insertLevel(DISTINCT_PERSON_COUNT_TAG, - boost::bind(&maths::CBjkstUniqueValues::acceptPersistInserter, &m_DistinctPersonCounts[cid], _1)); + boost::bind(&maths::CBjkstUniqueValues::acceptPersistInserter, + &m_DistinctPersonCounts[cid], _1)); } this->interimBucketCorrectorAcceptPersistInserter(INTERIM_BUCKET_CORRECTOR_TAG, inserter); } @@ -291,14 +320,17 @@ void CPopulationModel::doAcceptPersistInserter(core::CStatePersistInserter& inse bool CPopulationModel::doAcceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { do { const std::string& name = traverser.name(); - RESTORE_SETUP_TEARDOWN(WINDOW_BUCKET_COUNT_TAG, - double count, + RESTORE_SETUP_TEARDOWN(WINDOW_BUCKET_COUNT_TAG, double count, core::CStringUtils::stringToType(traverser.value(), count), this->windowBucketCount(count)); - RESTORE(PERSON_BUCKET_COUNT_TAG, core::CPersistUtils::restore(name, this->personBucketCounts(), traverser)) - RESTORE(PERSON_LAST_BUCKET_TIME_TAG, core::CPersistUtils::restore(name, m_PersonLastBucketTimes, traverser)) - RESTORE(ATTRIBUTE_FIRST_BUCKET_TIME_TAG, core::CPersistUtils::restore(name, m_AttributeFirstBucketTimes, traverser)) - RESTORE(ATTRIBUTE_LAST_BUCKET_TIME_TAG, core::CPersistUtils::restore(name, m_AttributeLastBucketTimes, traverser)) + RESTORE(PERSON_BUCKET_COUNT_TAG, + core::CPersistUtils::restore(name, this->personBucketCounts(), traverser)) + RESTORE(PERSON_LAST_BUCKET_TIME_TAG, + core::CPersistUtils::restore(name, m_PersonLastBucketTimes, traverser)) + RESTORE(ATTRIBUTE_FIRST_BUCKET_TIME_TAG, + core::CPersistUtils::restore(name, m_AttributeFirstBucketTimes, traverser)) + RESTORE(ATTRIBUTE_LAST_BUCKET_TIME_TAG, + core::CPersistUtils::restore(name, m_AttributeLastBucketTimes, traverser)) if (name == PERSON_ATTRIBUTE_BUCKET_COUNT_TAG) { maths::CCountMinSketch sketch(traverser); m_PersonAttributeBucketCounts.push_back(maths::CCountMinSketch(0, 0)); @@ -311,40 +343,51 @@ bool CPopulationModel::doAcceptRestoreTraverser(core::CStateRestoreTraverser& tr m_DistinctPersonCounts.back().swap(sketch); continue; } - RESTORE(INTERIM_BUCKET_CORRECTOR_TAG, this->interimBucketCorrectorAcceptRestoreTraverser(traverser)) + RESTORE(INTERIM_BUCKET_CORRECTOR_TAG, + this->interimBucketCorrectorAcceptRestoreTraverser(traverser)) } while (traverser.next()); return true; } -void CPopulationModel::createUpdateNewModels(core_t::TTime time, CResourceMonitor& resourceMonitor) { +void CPopulationModel::createUpdateNewModels(core_t::TTime time, + CResourceMonitor& resourceMonitor) { this->updateRecycledModels(); CDataGatherer& gatherer = this->dataGatherer(); std::size_t numberExistingPeople = m_PersonLastBucketTimes.size(); std::size_t numberExistingAttributes = m_AttributeLastBucketTimes.size(); - TOptionalSize usageEstimate = this->estimateMemoryUsage(std::min(numberExistingPeople, gatherer.numberActivePeople()), - std::min(numberExistingAttributes, gatherer.numberActiveAttributes()), - 0); // # correlations - std::size_t ourUsage = usageEstimate ? usageEstimate.get() : this->computeMemoryUsage(); + TOptionalSize usageEstimate = this->estimateMemoryUsage( + std::min(numberExistingPeople, gatherer.numberActivePeople()), + std::min(numberExistingAttributes, gatherer.numberActiveAttributes()), + 0); // # correlations + std::size_t ourUsage = usageEstimate ? usageEstimate.get() + : this->computeMemoryUsage(); std::size_t resourceLimit = ourUsage + resourceMonitor.allocationLimit(); std::size_t numberNewPeople = gatherer.numberPeople(); - numberNewPeople = numberNewPeople > numberExistingPeople ? numberNewPeople - numberExistingPeople : 0; + numberNewPeople = numberNewPeople > numberExistingPeople ? numberNewPeople - numberExistingPeople + : 0; std::size_t numberNewAttributes = gatherer.numberAttributes(); - numberNewAttributes = numberNewAttributes > numberExistingAttributes ? numberNewAttributes - numberExistingAttributes : 0; + numberNewAttributes = numberNewAttributes > numberExistingAttributes + ? numberNewAttributes - numberExistingAttributes + : 0; - while (numberNewPeople > 0 && resourceMonitor.areAllocationsAllowed() && (resourceMonitor.haveNoLimit() || ourUsage < resourceLimit)) { + while (numberNewPeople > 0 && resourceMonitor.areAllocationsAllowed() && + (resourceMonitor.haveNoLimit() || ourUsage < resourceLimit)) { // We batch people in CHUNK_SIZE (500) and create models in chunks // and test usage after each chunk. std::size_t numberToCreate = std::min(numberNewPeople, CHUNK_SIZE); - LOG_TRACE(<< "Creating batch of " << numberToCreate << " people of remaining " << numberNewPeople << ". " + LOG_TRACE(<< "Creating batch of " << numberToCreate + << " people of remaining " << numberNewPeople << ". " << resourceLimit - ourUsage << " free bytes remaining"); this->createNewModels(numberToCreate, 0); numberExistingPeople += numberToCreate; numberNewPeople -= numberToCreate; - if ((numberNewPeople > 0 || numberNewAttributes > 0) && resourceMonitor.haveNoLimit() == false) { - ourUsage = this->estimateMemoryUsageOrComputeAndUpdate(numberExistingPeople, numberExistingAttributes, 0); + if ((numberNewPeople > 0 || numberNewAttributes > 0) && + resourceMonitor.haveNoLimit() == false) { + ourUsage = this->estimateMemoryUsageOrComputeAndUpdate( + numberExistingPeople, numberExistingAttributes, 0); } } @@ -353,27 +396,33 @@ void CPopulationModel::createUpdateNewModels(core_t::TTime time, CResourceMonito // We batch attributes in CHUNK_SIZE (500) and create models in chunks // and test usage after each chunk. std::size_t numberToCreate = std::min(numberNewAttributes, CHUNK_SIZE); - LOG_TRACE(<< "Creating batch of " << numberToCreate << " attributes of remaining " << numberNewAttributes << ". " + LOG_TRACE(<< "Creating batch of " << numberToCreate + << " attributes of remaining " << numberNewAttributes << ". " << resourceLimit - ourUsage << " free bytes remaining"); this->createNewModels(0, numberToCreate); numberExistingAttributes += numberToCreate; numberNewAttributes -= numberToCreate; if (numberNewAttributes > 0 && resourceMonitor.haveNoLimit() == false) { - ourUsage = this->estimateMemoryUsageOrComputeAndUpdate(numberExistingPeople, numberExistingAttributes, 0); + ourUsage = this->estimateMemoryUsageOrComputeAndUpdate( + numberExistingPeople, numberExistingAttributes, 0); } } if (numberNewPeople > 0) { resourceMonitor.acceptAllocationFailureResult(time); LOG_DEBUG(<< "Not enough memory to create person models"); - core::CStatistics::instance().stat(stat_t::E_NumberMemoryLimitModelCreationFailures).increment(numberNewPeople); + core::CStatistics::instance() + .stat(stat_t::E_NumberMemoryLimitModelCreationFailures) + .increment(numberNewPeople); std::size_t toRemove = gatherer.numberPeople() - numberNewPeople; gatherer.removePeople(toRemove); } if (numberNewAttributes > 0) { resourceMonitor.acceptAllocationFailureResult(time); LOG_DEBUG(<< "Not enough memory to create attribute models"); - core::CStatistics::instance().stat(stat_t::E_NumberMemoryLimitModelCreationFailures).increment(numberNewAttributes); + core::CStatistics::instance() + .stat(stat_t::E_NumberMemoryLimitModelCreationFailures) + .increment(numberNewAttributes); std::size_t toRemove = gatherer.numberAttributes() - numberNewAttributes; gatherer.removeAttributes(toRemove); } @@ -383,16 +432,21 @@ void CPopulationModel::createUpdateNewModels(core_t::TTime time, CResourceMonito void CPopulationModel::createNewModels(std::size_t n, std::size_t m) { if (n > 0) { - core::CAllocationStrategy::resize(m_PersonLastBucketTimes, n + m_PersonLastBucketTimes.size(), CAnomalyDetectorModel::TIME_UNSET); + core::CAllocationStrategy::resize(m_PersonLastBucketTimes, + n + m_PersonLastBucketTimes.size(), + CAnomalyDetectorModel::TIME_UNSET); } if (m > 0) { std::size_t newM = m + m_AttributeFirstBucketTimes.size(); - core::CAllocationStrategy::resize(m_AttributeFirstBucketTimes, newM, CAnomalyDetectorModel::TIME_UNSET); - core::CAllocationStrategy::resize(m_AttributeLastBucketTimes, newM, CAnomalyDetectorModel::TIME_UNSET); + core::CAllocationStrategy::resize(m_AttributeFirstBucketTimes, newM, + CAnomalyDetectorModel::TIME_UNSET); + core::CAllocationStrategy::resize(m_AttributeLastBucketTimes, newM, + CAnomalyDetectorModel::TIME_UNSET); core::CAllocationStrategy::resize(m_DistinctPersonCounts, newM, m_NewDistinctPersonCounts); if (m_NewPersonBucketCounts) { - core::CAllocationStrategy::resize(m_PersonAttributeBucketCounts, newM, *m_NewPersonBucketCounts); + core::CAllocationStrategy::resize(m_PersonAttributeBucketCounts, + newM, *m_NewPersonBucketCounts); } } @@ -446,10 +500,10 @@ void CPopulationModel::correctBaselineForInterim(model_t::EFeature feature, double CPopulationModel::propagationTime(std::size_t cid, core_t::TTime time) const { return 1.0 + (this->params().s_InitialDecayRateMultiplier - 1.0) * - maths::CTools::truncate(1.0 - static_cast(time - m_AttributeFirstBucketTimes[cid]) / - static_cast(3 * core::constants::WEEK), - 0.0, - 1.0); + maths::CTools::truncate( + 1.0 - static_cast(time - m_AttributeFirstBucketTimes[cid]) / + static_cast(3 * core::constants::WEEK), + 0.0, 1.0); } const CPopulationModel::TTimeVec& CPopulationModel::attributeFirstBucketTimes() const { @@ -471,8 +525,10 @@ void CPopulationModel::peopleAndAttributesToRemove(core_t::TTime time, const CDataGatherer& gatherer = this->dataGatherer(); for (std::size_t pid = 0u; pid < m_PersonLastBucketTimes.size(); ++pid) { - if ((gatherer.isPersonActive(pid)) && (!CAnomalyDetectorModel::isTimeUnset(m_PersonLastBucketTimes[pid]))) { - std::size_t bucketsSinceLastEvent = static_cast((time - m_PersonLastBucketTimes[pid]) / gatherer.bucketLength()); + if ((gatherer.isPersonActive(pid)) && + (!CAnomalyDetectorModel::isTimeUnset(m_PersonLastBucketTimes[pid]))) { + std::size_t bucketsSinceLastEvent = static_cast( + (time - m_PersonLastBucketTimes[pid]) / gatherer.bucketLength()); if (bucketsSinceLastEvent > maximumAge) { LOG_TRACE(<< gatherer.personName(pid) << ", bucketsSinceLastEvent = " << bucketsSinceLastEvent << ", maximumAge = " << maximumAge); @@ -482,11 +538,13 @@ void CPopulationModel::peopleAndAttributesToRemove(core_t::TTime time, } for (std::size_t cid = 0u; cid < m_AttributeLastBucketTimes.size(); ++cid) { - if ((gatherer.isAttributeActive(cid)) && (!CAnomalyDetectorModel::isTimeUnset(m_AttributeLastBucketTimes[cid]))) { - std::size_t bucketsSinceLastEvent = - static_cast((time - m_AttributeLastBucketTimes[cid]) / gatherer.bucketLength()); + if ((gatherer.isAttributeActive(cid)) && + (!CAnomalyDetectorModel::isTimeUnset(m_AttributeLastBucketTimes[cid]))) { + std::size_t bucketsSinceLastEvent = static_cast( + (time - m_AttributeLastBucketTimes[cid]) / gatherer.bucketLength()); if (bucketsSinceLastEvent > maximumAge) { - LOG_TRACE(<< gatherer.attributeName(cid) << ", bucketsSinceLastEvent = " << bucketsSinceLastEvent + LOG_TRACE(<< gatherer.attributeName(cid) + << ", bucketsSinceLastEvent = " << bucketsSinceLastEvent << ", maximumAge = " << maximumAge); attributesToRemove.push_back(cid); } @@ -511,24 +569,30 @@ void CPopulationModel::doSkipSampling(core_t::TTime startTime, core_t::TTime end core_t::TTime gapDuration = endTime - startTime; for (std::size_t pid = 0u; pid < m_PersonLastBucketTimes.size(); ++pid) { - if (gatherer.isPersonActive(pid) && !CAnomalyDetectorModel::isTimeUnset(m_PersonLastBucketTimes[pid])) { + if (gatherer.isPersonActive(pid) && + !CAnomalyDetectorModel::isTimeUnset(m_PersonLastBucketTimes[pid])) { m_PersonLastBucketTimes[pid] = m_PersonLastBucketTimes[pid] + gapDuration; } } for (std::size_t cid = 0u; cid < m_AttributeLastBucketTimes.size(); ++cid) { - if (gatherer.isAttributeActive(cid) && !CAnomalyDetectorModel::isTimeUnset(m_AttributeLastBucketTimes[cid])) { + if (gatherer.isAttributeActive(cid) && + !CAnomalyDetectorModel::isTimeUnset(m_AttributeLastBucketTimes[cid])) { m_AttributeLastBucketTimes[cid] = m_AttributeLastBucketTimes[cid] + gapDuration; } } } -CPopulationModel::CCorrectionKey::CCorrectionKey(model_t::EFeature feature, std::size_t pid, std::size_t cid, std::size_t correlated) +CPopulationModel::CCorrectionKey::CCorrectionKey(model_t::EFeature feature, + std::size_t pid, + std::size_t cid, + std::size_t correlated) : m_Feature(feature), m_Pid(pid), m_Cid(cid), m_Correlate(correlated) { } bool CPopulationModel::CCorrectionKey::operator==(const CCorrectionKey& rhs) const { - return m_Feature == rhs.m_Feature && m_Pid == rhs.m_Pid && m_Cid == rhs.m_Cid && m_Correlate == rhs.m_Correlate; + return m_Feature == rhs.m_Feature && m_Pid == rhs.m_Pid && + m_Cid == rhs.m_Cid && m_Correlate == rhs.m_Correlate; } std::size_t CPopulationModel::CCorrectionKey::hash() const { diff --git a/lib/model/CProbabilityAndInfluenceCalculator.cc b/lib/model/CProbabilityAndInfluenceCalculator.cc index 08e61272da..c039b8fe25 100644 --- a/lib/model/CProbabilityAndInfluenceCalculator.cc +++ b/lib/model/CProbabilityAndInfluenceCalculator.cc @@ -36,11 +36,16 @@ using TTime2Vec = CProbabilityAndInfluenceCalculator::TTime2Vec; using TTime2Vec1Vec = CProbabilityAndInfluenceCalculator::TTime2Vec1Vec; using TStrCRefDouble1VecDoublePrPr = CProbabilityAndInfluenceCalculator::TStrCRefDouble1VecDoublePrPr; using TStrCRefDouble1VecDoublePrPrVec = CProbabilityAndInfluenceCalculator::TStrCRefDouble1VecDoublePrPrVec; -using TStrCRefDouble1VecDouble1VecPrPr = CProbabilityAndInfluenceCalculator::TStrCRefDouble1VecDouble1VecPrPr; -using TStrCRefDouble1VecDouble1VecPrPrVec = CProbabilityAndInfluenceCalculator::TStrCRefDouble1VecDouble1VecPrPrVec; -using TStoredStringPtrStoredStringPtrPr = CProbabilityAndInfluenceCalculator::TStoredStringPtrStoredStringPtrPr; -using TStoredStringPtrStoredStringPtrPrDoublePr = CProbabilityAndInfluenceCalculator::TStoredStringPtrStoredStringPtrPrDoublePr; -using TStoredStringPtrStoredStringPtrPrDoublePrVec = CProbabilityAndInfluenceCalculator::TStoredStringPtrStoredStringPtrPrDoublePrVec; +using TStrCRefDouble1VecDouble1VecPrPr = + CProbabilityAndInfluenceCalculator::TStrCRefDouble1VecDouble1VecPrPr; +using TStrCRefDouble1VecDouble1VecPrPrVec = + CProbabilityAndInfluenceCalculator::TStrCRefDouble1VecDouble1VecPrPrVec; +using TStoredStringPtrStoredStringPtrPr = + CProbabilityAndInfluenceCalculator::TStoredStringPtrStoredStringPtrPr; +using TStoredStringPtrStoredStringPtrPrDoublePr = + CProbabilityAndInfluenceCalculator::TStoredStringPtrStoredStringPtrPrDoublePr; +using TStoredStringPtrStoredStringPtrPrDoublePrVec = + CProbabilityAndInfluenceCalculator::TStoredStringPtrStoredStringPtrPrDoublePrVec; using TTail2Vec = core::CSmallVector; using TProbabilityCalculation2Vec = core::CSmallVector; using TDouble2Vec4Vec = core::CSmallVector; @@ -58,8 +63,10 @@ class CDecreasingValueInfluence { public: CDecreasingValueInfluence(maths_t::ETail tail) : m_Tail(tail) {} - bool operator()(const TStrCRefDouble1VecDoublePrPr& lhs, const TStrCRefDouble1VecDoublePrPr& rhs) const { - return m_Tail == maths_t::E_LeftTail ? lhs.second.first < rhs.second.first : lhs.second.first > rhs.second.first; + bool operator()(const TStrCRefDouble1VecDoublePrPr& lhs, + const TStrCRefDouble1VecDoublePrPr& rhs) const { + return m_Tail == maths_t::E_LeftTail ? lhs.second.first < rhs.second.first + : lhs.second.first > rhs.second.first; } private: @@ -73,17 +80,22 @@ class CDecreasingMeanInfluence { public: CDecreasingMeanInfluence(maths_t::ETail tail, const TDouble2Vec& value, double count) - : m_Tail(tail), m_Mean(maths::CBasicStatistics::accumulator(count, value[0])) {} - - bool operator()(const TStrCRefDouble1VecDoublePrPr& lhs, const TStrCRefDouble1VecDoublePrPr& rhs) const { - TMeanAccumulator l = m_Mean - maths::CBasicStatistics::accumulator(lhs.second.second, lhs.second.first[0]); - TMeanAccumulator r = m_Mean - maths::CBasicStatistics::accumulator(rhs.second.second, rhs.second.first[0]); + : m_Tail(tail), + m_Mean(maths::CBasicStatistics::accumulator(count, value[0])) {} + + bool operator()(const TStrCRefDouble1VecDoublePrPr& lhs, + const TStrCRefDouble1VecDoublePrPr& rhs) const { + TMeanAccumulator l = m_Mean - maths::CBasicStatistics::accumulator( + lhs.second.second, lhs.second.first[0]); + TMeanAccumulator r = m_Mean - maths::CBasicStatistics::accumulator( + rhs.second.second, rhs.second.first[0]); double ml = maths::CBasicStatistics::mean(l); double nl = maths::CBasicStatistics::count(l); double mr = maths::CBasicStatistics::mean(r); double nr = maths::CBasicStatistics::count(r); - return m_Tail == maths_t::E_LeftTail ? maths::COrderings::lexicographical_compare(mr, nl, ml, nr) - : maths::COrderings::lexicographical_compare(ml, nl, mr, nr); + return m_Tail == maths_t::E_LeftTail + ? maths::COrderings::lexicographical_compare(mr, nl, ml, nr) + : maths::COrderings::lexicographical_compare(ml, nl, mr, nr); } private: @@ -98,19 +110,24 @@ class CDecreasingVarianceInfluence { public: CDecreasingVarianceInfluence(maths_t::ETail tail, const TDouble2Vec& value, double count) - : m_Tail(tail), m_Variance(maths::CBasicStatistics::accumulator(count, value[1], value[0])) {} - - bool operator()(const TStrCRefDouble1VecDoublePrPr& lhs, const TStrCRefDouble1VecDoublePrPr& rhs) const { - TMeanVarAccumulator l = - m_Variance - maths::CBasicStatistics::accumulator(lhs.second.second, lhs.second.first[1], lhs.second.first[0]); - TMeanVarAccumulator r = - m_Variance - maths::CBasicStatistics::accumulator(rhs.second.second, rhs.second.first[1], rhs.second.first[0]); + : m_Tail(tail), + m_Variance(maths::CBasicStatistics::accumulator(count, value[1], value[0])) {} + + bool operator()(const TStrCRefDouble1VecDoublePrPr& lhs, + const TStrCRefDouble1VecDoublePrPr& rhs) const { + TMeanVarAccumulator l = m_Variance - maths::CBasicStatistics::accumulator( + lhs.second.second, lhs.second.first[1], + lhs.second.first[0]); + TMeanVarAccumulator r = m_Variance - maths::CBasicStatistics::accumulator( + rhs.second.second, rhs.second.first[1], + rhs.second.first[0]); double vl = maths::CBasicStatistics::maximumLikelihoodVariance(l); double nl = maths::CBasicStatistics::count(l); double vr = maths::CBasicStatistics::maximumLikelihoodVariance(r); double nr = maths::CBasicStatistics::count(r); - return m_Tail == maths_t::E_LeftTail ? maths::COrderings::lexicographical_compare(vr, nl, vl, nr) - : maths::COrderings::lexicographical_compare(vl, nl, vr, nr); + return m_Tail == maths_t::E_LeftTail + ? maths::COrderings::lexicographical_compare(vr, nl, vl, nr) + : maths::COrderings::lexicographical_compare(vl, nl, vr, nr); } private: @@ -125,7 +142,8 @@ double ratio(double numerator, double denominator, double zeroDividedByZero) { if (numerator == 0.0) { return zeroDividedByZero; } - return numerator < 0.0 ? -std::numeric_limits::max() : std::numeric_limits::max(); + return numerator < 0.0 ? -std::numeric_limits::max() + : std::numeric_limits::max(); } return numerator / denominator; } @@ -212,8 +230,9 @@ class CMeanDifference { break; } } - difference[d] = maths::CBasicStatistics::mean(maths::CBasicStatistics::accumulator(n, v[d]) - - maths::CBasicStatistics::accumulator(ni, vi[d])); + difference[d] = maths::CBasicStatistics::mean( + maths::CBasicStatistics::accumulator(n, v[d]) - + maths::CBasicStatistics::accumulator(ni, vi[d])); } } @@ -233,8 +252,9 @@ class CMeanDifference { break; } } - difference[d] = maths::CBasicStatistics::mean(maths::CBasicStatistics::accumulator(n[d], v[d]) - - maths::CBasicStatistics::accumulator(ni[d], vi[d])); + difference[d] = maths::CBasicStatistics::mean( + maths::CBasicStatistics::accumulator(n[d], v[d]) - + maths::CBasicStatistics::accumulator(ni[d], vi[d])); } params.addBucketEmpty(bucketEmpty); } @@ -259,9 +279,9 @@ class CVarianceDifference { break; } } - difference[d] = - maths::CBasicStatistics::maximumLikelihoodVariance(maths::CBasicStatistics::accumulator(n, v[dimension + d], v[d]) - - maths::CBasicStatistics::accumulator(ni, vi[dimension + d], vi[d])); + difference[d] = maths::CBasicStatistics::maximumLikelihoodVariance( + maths::CBasicStatistics::accumulator(n, v[dimension + d], v[d]) - + maths::CBasicStatistics::accumulator(ni, vi[dimension + d], vi[d])); } } @@ -282,7 +302,8 @@ class CVarianceDifference { } } difference[d] = maths::CBasicStatistics::maximumLikelihoodVariance( - maths::CBasicStatistics::accumulator(n[d], v[2 + d], v[d]) - maths::CBasicStatistics::accumulator(ni[d], vi[2 + d], vi[d])); + maths::CBasicStatistics::accumulator(n[d], v[2 + d], v[d]) - + maths::CBasicStatistics::accumulator(ni[d], vi[2 + d], vi[d])); } params.addBucketEmpty(bucketEmpty); } @@ -300,7 +321,9 @@ void doComputeIndicatorInfluences(const core::CStoredStringPtr& influencerName, TStoredStringPtrStoredStringPtrPrDoublePrVec& result) { result.reserve(influencerValues.size()); for (const auto& influencerValue : influencerValues) { - result.emplace_back(TStoredStringPtrStoredStringPtrPr(influencerName, canonical(influencerValue.first)), 1.0); + result.emplace_back(TStoredStringPtrStoredStringPtrPr( + influencerName, canonical(influencerValue.first)), + 1.0); } } @@ -342,7 +365,9 @@ void doComputeInfluences(model_t::EFeature feature, bool includeCutoff, TStoredStringPtrStoredStringPtrPrDoublePrVec& result) { if (influencerValues.size() == 1) { - result.emplace_back(TStoredStringPtrStoredStringPtrPr(influencerName, canonical(influencerValues[0].first)), 1.0); + result.emplace_back(TStoredStringPtrStoredStringPtrPr( + influencerName, canonical(influencerValues[0].first)), + 1.0); return; } if (probability == 1.0) { @@ -363,12 +388,15 @@ void doComputeInfluences(model_t::EFeature feature, for (auto i = influencerValues.begin(); i != influencerValues.end(); ++i) { params.weights(weights).updateAnomalyModel(false); - computeInfluencedValue(value, count, i->second.first, i->second.second, params, influencedValue[0]); + computeInfluencedValue(value, count, i->second.first, i->second.second, + params, influencedValue[0]); double pi; bool conditional; - if (!model.probability(params, time, influencedValue, pi, tail, conditional, mostAnomalousCorrelate)) { - LOG_ERROR(<< "Failed to compute P(" << influencedValue[0] << " | influencer = " << core::CContainerPrinter::print(*i) << ")"); + if (!model.probability(params, time, influencedValue, pi, tail, + conditional, mostAnomalousCorrelate)) { + LOG_ERROR(<< "Failed to compute P(" << influencedValue[0] + << " | influencer = " << core::CContainerPrinter::print(*i) << ")"); continue; } pi = maths::CTools::truncate(pi, maths::CTools::smallestProbability(), 1.0); @@ -376,25 +404,36 @@ void doComputeInfluences(model_t::EFeature feature, double influence = computeInfluence(logp, maths::CTools::fastLog(pi)); - LOG_TRACE(<< "log(p) = " << logp << ", tail = " << core::CContainerPrinter::print(tail) - << ", v(i) = " << core::CContainerPrinter::print(influencedValue) << ", log(p(i)) = " << std::log(pi) - << ", weight = " << core::CContainerPrinter::print(params.weights()) << ", influence = " << influence + LOG_TRACE(<< "log(p) = " << logp + << ", tail = " << core::CContainerPrinter::print(tail) + << ", v(i) = " << core::CContainerPrinter::print(influencedValue) + << ", log(p(i)) = " << std::log(pi) + << ", weight = " << core::CContainerPrinter::print(params.weights()) + << ", influence = " << influence << ", influencer field value = " << i->first.get()); if (dimension == 1 && influence >= cutoff) { - result.emplace_back(TStoredStringPtrStoredStringPtrPr(influencerName, canonical(i->first)), influence); + result.emplace_back( + TStoredStringPtrStoredStringPtrPr(influencerName, canonical(i->first)), influence); } else if (dimension == 1) { if (includeCutoff) { - result.emplace_back(TStoredStringPtrStoredStringPtrPr(influencerName, canonical(i->first)), influence); + result.emplace_back(TStoredStringPtrStoredStringPtrPr( + influencerName, canonical(i->first)), + influence); for (++i; i != influencerValues.end(); ++i) { - result.emplace_back(TStoredStringPtrStoredStringPtrPr(influencerName, canonical(i->first)), 0.5 * influence); + result.emplace_back(TStoredStringPtrStoredStringPtrPr( + influencerName, canonical(i->first)), + 0.5 * influence); } } break; } else if (influence >= cutoff) { - result.emplace_back(TStoredStringPtrStoredStringPtrPr(influencerName, canonical(i->first)), influence); + result.emplace_back( + TStoredStringPtrStoredStringPtrPr(influencerName, canonical(i->first)), influence); } else if (includeCutoff) { - result.emplace_back(TStoredStringPtrStoredStringPtrPr(influencerName, canonical(i->first)), 0.5 * influence); + result.emplace_back(TStoredStringPtrStoredStringPtrPr( + influencerName, canonical(i->first)), + 0.5 * influence); } } } @@ -420,7 +459,9 @@ void doComputeCorrelateInfluences(model_t::EFeature feature, bool includeCutoff, TStoredStringPtrStoredStringPtrPrDoublePrVec& result) { if (influencerValues.size() == 1) { - result.emplace_back(TStoredStringPtrStoredStringPtrPr(influencerName, canonical(influencerValues[0].first)), 1.0); + result.emplace_back(TStoredStringPtrStoredStringPtrPr( + influencerName, canonical(influencerValues[0].first)), + 1.0); return; } if (probability == 1.0) { @@ -440,13 +481,16 @@ void doComputeCorrelateInfluences(model_t::EFeature feature, for (const auto& influence_ : influencerValues) { params.weights(weights).updateAnomalyModel(false); - computeInfluencedValue(value, count, influence_.second.first, influence_.second.second, params, influencedValue[0]); + computeInfluencedValue(value, count, influence_.second.first, + influence_.second.second, params, influencedValue[0]); double pi; bool conditional; - if (!model.probability(params, TTime2Vec1Vec{time}, influencedValue, pi, tail, conditional, mostAnomalousCorrelate)) { - LOG_ERROR(<< "Failed to compute P(" << core::CContainerPrinter::print(influencedValue) - << " | influencer = " << core::CContainerPrinter::print(influence_) << ")"); + if (!model.probability(params, TTime2Vec1Vec{time}, influencedValue, pi, + tail, conditional, mostAnomalousCorrelate)) { + LOG_ERROR(<< "Failed to compute P(" + << core::CContainerPrinter::print(influencedValue) << " | influencer = " + << core::CContainerPrinter::print(influence_) << ")"); continue; } pi = maths::CTools::truncate(pi, maths::CTools::smallestProbability(), 1.0); @@ -454,20 +498,24 @@ void doComputeCorrelateInfluences(model_t::EFeature feature, double influence = computeInfluence(logp, std::log(pi)); - LOG_TRACE(<< "log(p) = " << logp << ", v(i) = " << core::CContainerPrinter::print(influencedValue) - << ", log(p(i)) = " << std::log(pi) << ", weight(i) = " << core::CContainerPrinter::print(params.weights()) - << ", influence = " << influence << ", influencer field value = " << influence_.first.get()); + LOG_TRACE(<< "log(p) = " << logp + << ", v(i) = " << core::CContainerPrinter::print(influencedValue) + << ", log(p(i)) = " << std::log(pi) << ", weight(i) = " + << core::CContainerPrinter::print(params.weights()) + << ", influence = " << influence + << ", influencer field value = " << influence_.first.get()); if (includeCutoff || influence >= cutoff) { - result.emplace_back(TStoredStringPtrStoredStringPtrPr(influencerName, canonical(influence_.first)), influence); + result.emplace_back(TStoredStringPtrStoredStringPtrPr( + influencerName, canonical(influence_.first)), + influence); } } } } CProbabilityAndInfluenceCalculator::CProbabilityAndInfluenceCalculator(double cutoff) - : m_Cutoff(cutoff), - m_InfluenceCalculator(nullptr), + : m_Cutoff(cutoff), m_InfluenceCalculator(nullptr), m_ProbabilityTemplate(CModelTools::CProbabilityAggregator::E_Min), m_Probability(CModelTools::CProbabilityAggregator::E_Min), m_ProbabilityCache(nullptr) { @@ -485,7 +533,8 @@ void CProbabilityAndInfluenceCalculator::plugin(const CInfluenceCalculator& infl m_InfluenceCalculator = &influenceCalculator; } -void CProbabilityAndInfluenceCalculator::addAggregator(const maths::CJointProbabilityOfLessLikelySamples& aggregator) { +void CProbabilityAndInfluenceCalculator::addAggregator( + const maths::CJointProbabilityOfLessLikelySamples& aggregator) { m_ProbabilityTemplate.add(aggregator); m_Probability.add(aggregator); } @@ -499,7 +548,8 @@ void CProbabilityAndInfluenceCalculator::addCache(CModelTools::CProbabilityCache m_ProbabilityCache = &cache; } -void CProbabilityAndInfluenceCalculator::add(const CProbabilityAndInfluenceCalculator& other, double weight) { +void CProbabilityAndInfluenceCalculator::add(const CProbabilityAndInfluenceCalculator& other, + double weight) { double p = 0.0; if (!other.m_Probability.calculate(p)) { return; @@ -509,7 +559,9 @@ void CProbabilityAndInfluenceCalculator::add(const CProbabilityAndInfluenceCalcu } for (const auto& aggregator : other.m_InfluencerProbabilities) { if (aggregator.second.calculate(p)) { - auto& aggregator_ = m_InfluencerProbabilities.emplace(aggregator.first, other.m_ProbabilityTemplate).first->second; + auto& aggregator_ = m_InfluencerProbabilities + .emplace(aggregator.first, other.m_ProbabilityTemplate) + .first->second; if (!aggregator.second.empty()) { aggregator_.add(p, weight); } @@ -517,70 +569,54 @@ void CProbabilityAndInfluenceCalculator::add(const CProbabilityAndInfluenceCalcu } } -bool CProbabilityAndInfluenceCalculator::addAttributeProbability(const core::CStoredStringPtr& attribute, - std::size_t cid, - double pAttribute, - SParams& params, - CAnnotatedProbabilityBuilder& builder, - double weight) { +bool CProbabilityAndInfluenceCalculator::addAttributeProbability( + const core::CStoredStringPtr& attribute, + std::size_t cid, + double pAttribute, + SParams& params, + CAnnotatedProbabilityBuilder& builder, + double weight) { model_t::CResultType type; TSize1Vec mostAnomalousCorrelate; - if (this->addProbability(params.s_Feature, - cid, - *params.s_Model, - params.s_ElapsedTime, - params.s_ComputeProbabilityParams, - params.s_Time, - params.s_Value, - params.s_Probability, - params.s_Tail, - type, - mostAnomalousCorrelate, - weight)) { + if (this->addProbability(params.s_Feature, cid, *params.s_Model, + params.s_ElapsedTime, params.s_ComputeProbabilityParams, + params.s_Time, params.s_Value, params.s_Probability, + params.s_Tail, type, mostAnomalousCorrelate, weight)) { static const TStoredStringPtr1Vec NO_CORRELATED_ATTRIBUTES; static const TSizeDoublePr1Vec NO_CORRELATES; - builder.addAttributeProbability(cid, - attribute, - pAttribute, - params.s_Probability, + builder.addAttributeProbability(cid, attribute, pAttribute, params.s_Probability, model_t::CResultType::E_Unconditional, params.s_Feature, - NO_CORRELATED_ATTRIBUTES, - NO_CORRELATES); + NO_CORRELATED_ATTRIBUTES, NO_CORRELATES); return true; } return false; } -bool CProbabilityAndInfluenceCalculator::addAttributeProbability(const core::CStoredStringPtr& attribute, - std::size_t cid, - double pAttribute, - SCorrelateParams& params, - CAnnotatedProbabilityBuilder& builder, - double weight) { +bool CProbabilityAndInfluenceCalculator::addAttributeProbability( + const core::CStoredStringPtr& attribute, + std::size_t cid, + double pAttribute, + SCorrelateParams& params, + CAnnotatedProbabilityBuilder& builder, + double weight) { model_t::CResultType type; params.s_MostAnomalousCorrelate.clear(); - if (this->addProbability(params.s_Feature, - cid, - *params.s_Model, - params.s_ElapsedTime, - params.s_ComputeProbabilityParams, - params.s_Times, - params.s_Values, - params.s_Probability, - params.s_Tail, - type, - params.s_MostAnomalousCorrelate, - weight)) { + if (this->addProbability(params.s_Feature, cid, *params.s_Model, params.s_ElapsedTime, + params.s_ComputeProbabilityParams, params.s_Times, + params.s_Values, params.s_Probability, params.s_Tail, + type, params.s_MostAnomalousCorrelate, weight)) { TStoredStringPtr1Vec correlatedLabels_; TSizeDoublePr1Vec correlated_; if (!params.s_MostAnomalousCorrelate.empty()) { std::size_t i = params.s_MostAnomalousCorrelate[0]; correlatedLabels_.push_back(params.s_CorrelatedLabels[i]); - correlated_.emplace_back(params.s_Correlated[i], params.s_Values[i][params.s_Variables[i][1]]); + correlated_.emplace_back(params.s_Correlated[i], + params.s_Values[i][params.s_Variables[i][1]]); } - builder.addAttributeProbability( - cid, attribute, pAttribute, params.s_Probability, type, params.s_Feature, correlatedLabels_, correlated_); + builder.addAttributeProbability(cid, attribute, pAttribute, + params.s_Probability, type, params.s_Feature, + correlatedLabels_, correlated_); return true; } return false; @@ -607,9 +643,11 @@ bool CProbabilityAndInfluenceCalculator::addProbability(model_t::EFeature featur TDouble2Vec1Vec values(model_t::stripExtraStatistics(feature, values_)); model.detrend(time, params.seasonalConfidenceInterval(), values); bool conditional; - if (m_ProbabilityCache->lookup(feature, id, values, probability, tail, conditional, mostAnomalousCorrelate)) { + if (m_ProbabilityCache->lookup(feature, id, values, probability, tail, + conditional, mostAnomalousCorrelate)) { m_Probability.add(probability, weight); - type.set(conditional ? model_t::CResultType::E_Conditional : model_t::CResultType::E_Unconditional); + type.set(conditional ? model_t::CResultType::E_Conditional + : model_t::CResultType::E_Unconditional); return true; } } @@ -618,14 +656,18 @@ bool CProbabilityAndInfluenceCalculator::addProbability(model_t::EFeature featur // to calculating. TDouble2Vec1Vec values(model_t::stripExtraStatistics(feature, values_)); bool conditional; - if (model.probability(params, time, values, probability, tail, conditional, mostAnomalousCorrelate)) { + if (model.probability(params, time, values, probability, tail, conditional, + mostAnomalousCorrelate)) { if (!model_t::isConstant(feature)) { probability = model_t::adjustProbability(feature, elapsedTime, probability); m_Probability.add(probability, weight); - type.set(conditional ? model_t::CResultType::E_Conditional : model_t::CResultType::E_Unconditional); + type.set(conditional ? model_t::CResultType::E_Conditional + : model_t::CResultType::E_Unconditional); if (m_ProbabilityCache) { m_ProbabilityCache->addModes(feature, id, model); - m_ProbabilityCache->addProbability(feature, id, values, probability, tail, conditional, mostAnomalousCorrelate); + m_ProbabilityCache->addProbability(feature, id, values, + probability, tail, conditional, + mostAnomalousCorrelate); } } else { type.set(model_t::CResultType::E_Unconditional); @@ -666,7 +708,8 @@ void CProbabilityAndInfluenceCalculator::addInfluences(const std::string& influe } } - double logp = std::log(std::max(params.s_Probability, maths::CTools::smallestProbability())); + double logp = std::log( + std::max(params.s_Probability, maths::CTools::smallestProbability())); params.s_InfluencerName = canonical(influencerName); params.s_InfluencerValues = influencerValues; @@ -676,7 +719,9 @@ void CProbabilityAndInfluenceCalculator::addInfluences(const std::string& influe m_InfluenceCalculator->computeInfluences(params); m_Influences.swap(params.s_Influences); if (m_Influences.empty() && influencerValue) { - m_Influences.emplace_back(TStoredStringPtrStoredStringPtrPr(params.s_InfluencerName, canonical(*influencerValue)), 1.0); + m_Influences.emplace_back(TStoredStringPtrStoredStringPtrPr( + params.s_InfluencerName, canonical(*influencerValue)), + 1.0); } this->commitInfluences(params.s_Feature, logp, weight); } @@ -703,7 +748,8 @@ void CProbabilityAndInfluenceCalculator::addInfluences(const std::string& influe } } - double logp = std::log(std::max(params.s_Probability, maths::CTools::smallestProbability())); + double logp = std::log( + std::max(params.s_Probability, maths::CTools::smallestProbability())); params.s_InfluencerName = canonical(influencerName); params.s_InfluencerValues = influencerValues[params.s_MostAnomalousCorrelate[0]]; @@ -713,7 +759,9 @@ void CProbabilityAndInfluenceCalculator::addInfluences(const std::string& influe m_InfluenceCalculator->computeInfluences(params); m_Influences.swap(params.s_Influences); if (m_Influences.empty() && influencerValue) { - m_Influences.emplace_back(TStoredStringPtrStoredStringPtrPr(params.s_InfluencerName, canonical(*influencerValue)), 1.0); + m_Influences.emplace_back(TStoredStringPtrStoredStringPtrPr( + params.s_InfluencerName, canonical(*influencerValue)), + 1.0); } this->commitInfluences(params.s_Feature, logp, weight); } @@ -722,7 +770,9 @@ bool CProbabilityAndInfluenceCalculator::calculate(double& probability) const { return m_Probability.calculate(probability); } -bool CProbabilityAndInfluenceCalculator::calculate(double& probability, TStoredStringPtrStoredStringPtrPrDoublePrVec& influences) const { +bool CProbabilityAndInfluenceCalculator::calculate( + double& probability, + TStoredStringPtrStoredStringPtrPrDoublePrVec& influences) const { if (!m_Probability.calculate(probability)) { return false; } @@ -735,10 +785,12 @@ bool CProbabilityAndInfluenceCalculator::calculate(double& probability, TStoredS for (const auto& aggregator : m_InfluencerProbabilities) { double probability_; if (!aggregator.second.calculate(probability_)) { - LOG_ERROR(<< "Couldn't calculate probability for influencer " << core::CContainerPrinter::print(aggregator.first)); + LOG_ERROR(<< "Couldn't calculate probability for influencer " + << core::CContainerPrinter::print(aggregator.first)); } LOG_TRACE(<< "influence probability = " << probability_); - double influence = CInfluenceCalculator::intersectionInfluence(logp, std::log(probability_)); + double influence = CInfluenceCalculator::intersectionInfluence( + logp, std::log(probability_)); if (influence >= m_Cutoff) { influences.emplace_back(aggregator.first, influence); } @@ -748,12 +800,16 @@ bool CProbabilityAndInfluenceCalculator::calculate(double& probability, TStoredS return true; } -void CProbabilityAndInfluenceCalculator::commitInfluences(model_t::EFeature feature, double logp, double weight) { +void CProbabilityAndInfluenceCalculator::commitInfluences(model_t::EFeature feature, + double logp, + double weight) { LOG_TRACE(<< "influences = " << core::CContainerPrinter::print(m_Influences)); for (const auto& influence : m_Influences) { CModelTools::CProbabilityAggregator& aggregator = - m_InfluencerProbabilities.emplace(influence.first, m_ProbabilityTemplate).first->second; + m_InfluencerProbabilities + .emplace(influence.first, m_ProbabilityTemplate) + .first->second; if (!model_t::isConstant(feature)) { double probability = std::exp(influence.second * logp); LOG_TRACE(<< "Adding = " << influence.first.second.get() << " " << probability); @@ -763,34 +819,28 @@ void CProbabilityAndInfluenceCalculator::commitInfluences(model_t::EFeature feat } CProbabilityAndInfluenceCalculator::SParams::SParams(const CPartitioningFields& partitioningFields) - : s_Feature(), - s_Model(nullptr), - s_ElapsedTime(0), - s_Count(0.0), - s_Probability(1.0), - s_PartitioningFields(partitioningFields), - s_Cutoff(1.0), - s_IncludeCutoff(false) { + : s_Feature(), s_Model(nullptr), s_ElapsedTime(0), s_Count(0.0), + s_Probability(1.0), s_PartitioningFields(partitioningFields), + s_Cutoff(1.0), s_IncludeCutoff(false) { } std::string CProbabilityAndInfluenceCalculator::SParams::describe() const { - return core::CContainerPrinter::print(s_Value) + " | feature = " + model_t::print(s_Feature) + ", @ " + - core::CContainerPrinter::print(s_Time) + ", elapsedTime = " + core::CStringUtils::typeToString(s_ElapsedTime); + return core::CContainerPrinter::print(s_Value) + + " | feature = " + model_t::print(s_Feature) + ", @ " + + core::CContainerPrinter::print(s_Time) + + ", elapsedTime = " + core::CStringUtils::typeToString(s_ElapsedTime); } CProbabilityAndInfluenceCalculator::SCorrelateParams::SCorrelateParams(const CPartitioningFields& partitioningFields) - : s_Feature(), - s_Model(nullptr), - s_ElapsedTime(0), - s_Probability(1.0), - s_PartitioningFields(partitioningFields), - s_Cutoff(1.0), - s_IncludeCutoff(false) { + : s_Feature(), s_Model(nullptr), s_ElapsedTime(0), s_Probability(1.0), + s_PartitioningFields(partitioningFields), s_Cutoff(1.0), s_IncludeCutoff(false) { } std::string CProbabilityAndInfluenceCalculator::SCorrelateParams::describe() const { - return core::CContainerPrinter::print(s_Values) + " | feature = " + model_t::print(s_Feature) + ", @ " + - core::CContainerPrinter::print(s_Times) + ", elapsedTime = " + core::CStringUtils::typeToString(s_ElapsedTime); + return core::CContainerPrinter::print(s_Values) + + " | feature = " + model_t::print(s_Feature) + ", @ " + + core::CContainerPrinter::print(s_Times) + + ", elapsedTime = " + core::CStringUtils::typeToString(s_ElapsedTime); } ////// CInfluenceCalculator ////// @@ -820,12 +870,14 @@ void CInfluenceUnavailableCalculator::computeInfluences(TCorrelateParams& params void CIndicatorInfluenceCalculator::computeInfluences(TParams& params) const { params.s_Influences.clear(); - doComputeIndicatorInfluences(params.s_InfluencerName, params.s_InfluencerValues, params.s_Influences); + doComputeIndicatorInfluences(params.s_InfluencerName, + params.s_InfluencerValues, params.s_Influences); } void CIndicatorInfluenceCalculator::computeInfluences(TCorrelateParams& params) const { params.s_Influences.clear(); - doComputeIndicatorInfluences(params.s_InfluencerName, params.s_InfluencerValues, params.s_Influences); + doComputeIndicatorInfluences(params.s_InfluencerName, + params.s_InfluencerValues, params.s_Influences); } ////// CLogProbabilityComplementInfluenceCalculator ////// @@ -841,31 +893,26 @@ void CLogProbabilityComplementInfluenceCalculator::computeInfluences(TParams& pa } if (params_.calculations() > 0) { - params_.seasonalConfidenceInterval(params.s_ComputeProbabilityParams.seasonalConfidenceInterval()) + params_ + .seasonalConfidenceInterval( + params.s_ComputeProbabilityParams.seasonalConfidenceInterval()) .weightStyles(params.s_ComputeProbabilityParams.weightStyles()) .addWeights(params.s_ComputeProbabilityParams.weights()[0]); TStrCRefDouble1VecDoublePrPrVec& influencerValues = params.s_InfluencerValues; if (model_t::dimension(params.s_Feature) == 1) { - std::sort(influencerValues.begin(), influencerValues.end(), CDecreasingValueInfluence(params.s_Tail[0])); + std::sort(influencerValues.begin(), influencerValues.end(), + CDecreasingValueInfluence(params.s_Tail[0])); } - LOG_TRACE(<< "influencerValues = " << core::CContainerPrinter::print(influencerValues)); - - doComputeInfluences(params.s_Feature, - CValueDifference(), - complementInfluence, - *params.s_Model, - params.s_ElapsedTime, - params_, - params.s_Time, - params.s_Value[0], - params.s_Count, - params.s_Probability, - params.s_InfluencerName, - params.s_InfluencerValues, - params.s_Cutoff, - params.s_IncludeCutoff, - params.s_Influences); + LOG_TRACE(<< "influencerValues = " + << core::CContainerPrinter::print(influencerValues)); + + doComputeInfluences(params.s_Feature, CValueDifference(), complementInfluence, + *params.s_Model, params.s_ElapsedTime, params_, + params.s_Time, params.s_Value[0], params.s_Count, + params.s_Probability, params.s_InfluencerName, + params.s_InfluencerValues, params.s_Cutoff, + params.s_IncludeCutoff, params.s_Influences); } } @@ -876,26 +923,19 @@ void CLogProbabilityComplementInfluenceCalculator::computeInfluences(TCorrelateP std::size_t correlate = params.s_MostAnomalousCorrelate[0]; maths::CModelProbabilityParams params_; params_.addCalculation(maths_t::E_OneSidedAbove) - .seasonalConfidenceInterval(params.s_ComputeProbabilityParams.seasonalConfidenceInterval()) + .seasonalConfidenceInterval( + params.s_ComputeProbabilityParams.seasonalConfidenceInterval()) .weightStyles(params.s_ComputeProbabilityParams.weightStyles()) .addWeights(params.s_ComputeProbabilityParams.weights()[correlate]) .mostAnomalousCorrelate(correlate); - LOG_TRACE(<< "influencerValues = " << core::CContainerPrinter::print(params.s_InfluencerValues)); - doComputeCorrelateInfluences(params.s_Feature, - CValueDifference(), - complementInfluence, - *params.s_Model, - params.s_ElapsedTime, - params_, - params.s_Times[correlate], - params.s_Values[correlate], - params.s_Counts[correlate], - params.s_Probability, - params.s_InfluencerName, - params.s_InfluencerValues, - params.s_Cutoff, - params.s_IncludeCutoff, - params.s_Influences); + LOG_TRACE(<< "influencerValues = " + << core::CContainerPrinter::print(params.s_InfluencerValues)); + doComputeCorrelateInfluences( + params.s_Feature, CValueDifference(), complementInfluence, *params.s_Model, + params.s_ElapsedTime, params_, params.s_Times[correlate], + params.s_Values[correlate], params.s_Counts[correlate], + params.s_Probability, params.s_InfluencerName, params.s_InfluencerValues, + params.s_Cutoff, params.s_IncludeCutoff, params.s_Influences); } } @@ -904,7 +944,9 @@ void CLogProbabilityComplementInfluenceCalculator::computeInfluences(TCorrelateP namespace { //! Maybe add \p coordinate and the appropriate calculation to \p params. -void addCoordinate(maths_t::ETail tail, std::size_t coordinate, maths::CModelProbabilityParams& params) { +void addCoordinate(maths_t::ETail tail, + std::size_t coordinate, + maths::CModelProbabilityParams& params) { switch (tail) { case maths_t::E_LeftTail: { params.addCalculation(maths_t::E_OneSidedBelow).addCoordinate(coordinate); @@ -930,31 +972,26 @@ void CLogProbabilityInfluenceCalculator::computeInfluences(TParams& params) cons } if (params_.calculations() > 0) { - params_.seasonalConfidenceInterval(params.s_ComputeProbabilityParams.seasonalConfidenceInterval()) + params_ + .seasonalConfidenceInterval( + params.s_ComputeProbabilityParams.seasonalConfidenceInterval()) .weightStyles(params.s_ComputeProbabilityParams.weightStyles()) .addWeights(params.s_ComputeProbabilityParams.weights()[0]); TStrCRefDouble1VecDoublePrPrVec& influencerValues = params.s_InfluencerValues; if (model_t::dimension(params.s_Feature) == 1) { - std::sort(influencerValues.begin(), influencerValues.end(), CDecreasingValueInfluence(params.s_Tail[0])); + std::sort(influencerValues.begin(), influencerValues.end(), + CDecreasingValueInfluence(params.s_Tail[0])); } - LOG_TRACE(<< "influencerValues = " << core::CContainerPrinter::print(influencerValues)); - - doComputeInfluences(params.s_Feature, - CValueIntersection(), - intersectionInfluence, - *params.s_Model, - params.s_ElapsedTime, - params_, - params.s_Time, - params.s_Value[0], - params.s_Count, - params.s_Probability, - params.s_InfluencerName, - params.s_InfluencerValues, - params.s_Cutoff, - params.s_IncludeCutoff, - params.s_Influences); + LOG_TRACE(<< "influencerValues = " + << core::CContainerPrinter::print(influencerValues)); + + doComputeInfluences(params.s_Feature, CValueIntersection(), intersectionInfluence, + *params.s_Model, params.s_ElapsedTime, params_, + params.s_Time, params.s_Value[0], params.s_Count, + params.s_Probability, params.s_InfluencerName, + params.s_InfluencerValues, params.s_Cutoff, + params.s_IncludeCutoff, params.s_Influences); } } @@ -966,26 +1003,20 @@ void CLogProbabilityInfluenceCalculator::computeInfluences(TCorrelateParams& par if (params_.calculations() > 0) { std::size_t correlate = params.s_MostAnomalousCorrelate[0]; - params_.seasonalConfidenceInterval(params.s_ComputeProbabilityParams.seasonalConfidenceInterval()) + params_ + .seasonalConfidenceInterval( + params.s_ComputeProbabilityParams.seasonalConfidenceInterval()) .weightStyles(params.s_ComputeProbabilityParams.weightStyles()) .addWeights(params.s_ComputeProbabilityParams.weights()[correlate]) .mostAnomalousCorrelate(correlate); - LOG_TRACE(<< "influencerValues = " << core::CContainerPrinter::print(params.s_InfluencerValues)); - doComputeCorrelateInfluences(params.s_Feature, - CValueDifference(), - intersectionInfluence, - *params.s_Model, - params.s_ElapsedTime, - params_, - params.s_Times[correlate], - params.s_Values[correlate], - params.s_Counts[correlate], - params.s_Probability, - params.s_InfluencerName, - params.s_InfluencerValues, - params.s_Cutoff, - params.s_IncludeCutoff, - params.s_Influences); + LOG_TRACE(<< "influencerValues = " + << core::CContainerPrinter::print(params.s_InfluencerValues)); + doComputeCorrelateInfluences( + params.s_Feature, CValueDifference(), intersectionInfluence, + *params.s_Model, params.s_ElapsedTime, params_, params.s_Times[correlate], + params.s_Values[correlate], params.s_Counts[correlate], + params.s_Probability, params.s_InfluencerName, params.s_InfluencerValues, + params.s_Cutoff, params.s_IncludeCutoff, params.s_Influences); } } @@ -1000,32 +1031,26 @@ void CMeanInfluenceCalculator::computeInfluences(TParams& params) const { } if (params_.calculations() > 0) { - params_.seasonalConfidenceInterval(params.s_ComputeProbabilityParams.seasonalConfidenceInterval()) + params_ + .seasonalConfidenceInterval( + params.s_ComputeProbabilityParams.seasonalConfidenceInterval()) .weightStyles(params.s_ComputeProbabilityParams.weightStyles()) .addWeights(params.s_ComputeProbabilityParams.weights()[0]); TStrCRefDouble1VecDoublePrPrVec& influencerValues = params.s_InfluencerValues; if (model_t::dimension(params.s_Feature) == 1) { - std::sort(influencerValues.begin(), - influencerValues.end(), - CDecreasingMeanInfluence(params.s_Tail[0], params.s_Value[0], params.s_Count)); + std::sort(influencerValues.begin(), influencerValues.end(), + CDecreasingMeanInfluence(params.s_Tail[0], + params.s_Value[0], params.s_Count)); } - LOG_TRACE(<< "influencerValues = " << core::CContainerPrinter::print(params.s_InfluencerValues)); - doComputeInfluences(params.s_Feature, - CMeanDifference(), - complementInfluence, - *params.s_Model, - params.s_ElapsedTime, - params_, - params.s_Time, - params.s_Value[0], - params.s_Count, - params.s_Probability, - params.s_InfluencerName, - params.s_InfluencerValues, - params.s_Cutoff, - params.s_IncludeCutoff, - params.s_Influences); + LOG_TRACE(<< "influencerValues = " + << core::CContainerPrinter::print(params.s_InfluencerValues)); + doComputeInfluences(params.s_Feature, CMeanDifference(), complementInfluence, + *params.s_Model, params.s_ElapsedTime, params_, + params.s_Time, params.s_Value[0], params.s_Count, + params.s_Probability, params.s_InfluencerName, + params.s_InfluencerValues, params.s_Cutoff, + params.s_IncludeCutoff, params.s_Influences); } } @@ -1037,26 +1062,20 @@ void CMeanInfluenceCalculator::computeInfluences(TCorrelateParams& params) const if (params_.calculations() > 0) { std::size_t correlate = params.s_MostAnomalousCorrelate[0]; - params_.seasonalConfidenceInterval(params.s_ComputeProbabilityParams.seasonalConfidenceInterval()) + params_ + .seasonalConfidenceInterval( + params.s_ComputeProbabilityParams.seasonalConfidenceInterval()) .weightStyles(params.s_ComputeProbabilityParams.weightStyles()) .addWeights(params.s_ComputeProbabilityParams.weights()[correlate]) .mostAnomalousCorrelate(correlate); - LOG_TRACE(<< "influencerValues = " << core::CContainerPrinter::print(params.s_InfluencerValues)); - doComputeCorrelateInfluences(params.s_Feature, - CMeanDifference(), - complementInfluence, - *params.s_Model, - params.s_ElapsedTime, - params_, - params.s_Times[correlate], - params.s_Values[correlate], - params.s_Counts[correlate], - params.s_Probability, - params.s_InfluencerName, - params.s_InfluencerValues, - params.s_Cutoff, - params.s_IncludeCutoff, - params.s_Influences); + LOG_TRACE(<< "influencerValues = " + << core::CContainerPrinter::print(params.s_InfluencerValues)); + doComputeCorrelateInfluences( + params.s_Feature, CMeanDifference(), complementInfluence, *params.s_Model, + params.s_ElapsedTime, params_, params.s_Times[correlate], + params.s_Values[correlate], params.s_Counts[correlate], + params.s_Probability, params.s_InfluencerName, params.s_InfluencerValues, + params.s_Cutoff, params.s_IncludeCutoff, params.s_Influences); } } @@ -1071,33 +1090,27 @@ void CVarianceInfluenceCalculator::computeInfluences(TParams& params) const { } if (params_.calculations() > 0) { - params_.seasonalConfidenceInterval(params.s_ComputeProbabilityParams.seasonalConfidenceInterval()) + params_ + .seasonalConfidenceInterval( + params.s_ComputeProbabilityParams.seasonalConfidenceInterval()) .weightStyles(params.s_ComputeProbabilityParams.weightStyles()) .addWeights(params.s_ComputeProbabilityParams.weights()[0]); TStrCRefDouble1VecDoublePrPrVec& influencerValues = params.s_InfluencerValues; if (model_t::dimension(params.s_Feature) == 1) { - std::sort(influencerValues.begin(), - influencerValues.end(), - CDecreasingVarianceInfluence(params.s_Tail[0], params.s_Value[0], params.s_Count)); + std::sort(influencerValues.begin(), influencerValues.end(), + CDecreasingVarianceInfluence( + params.s_Tail[0], params.s_Value[0], params.s_Count)); } - LOG_TRACE(<< "influencerValues = " << core::CContainerPrinter::print(influencerValues)); - - doComputeInfluences(params.s_Feature, - CVarianceDifference(), - complementInfluence, - *params.s_Model, - params.s_ElapsedTime, - params_, - params.s_Time, - params.s_Value[0], - params.s_Count, - params.s_Probability, - params.s_InfluencerName, - params.s_InfluencerValues, - params.s_Cutoff, - params.s_IncludeCutoff, - params.s_Influences); + LOG_TRACE(<< "influencerValues = " + << core::CContainerPrinter::print(influencerValues)); + + doComputeInfluences(params.s_Feature, CVarianceDifference(), complementInfluence, + *params.s_Model, params.s_ElapsedTime, params_, + params.s_Time, params.s_Value[0], params.s_Count, + params.s_Probability, params.s_InfluencerName, + params.s_InfluencerValues, params.s_Cutoff, + params.s_IncludeCutoff, params.s_Influences); } } @@ -1109,26 +1122,20 @@ void CVarianceInfluenceCalculator::computeInfluences(TCorrelateParams& params) c if (params_.calculations() > 0) { std::size_t correlate = params.s_MostAnomalousCorrelate[0]; - params_.seasonalConfidenceInterval(params.s_ComputeProbabilityParams.seasonalConfidenceInterval()) + params_ + .seasonalConfidenceInterval( + params.s_ComputeProbabilityParams.seasonalConfidenceInterval()) .weightStyles(params.s_ComputeProbabilityParams.weightStyles()) .addWeights(params.s_ComputeProbabilityParams.weights()[correlate]) .mostAnomalousCorrelate(correlate); - LOG_TRACE(<< "influencerValues = " << core::CContainerPrinter::print(params.s_InfluencerValues)); - doComputeCorrelateInfluences(params.s_Feature, - CVarianceDifference(), - complementInfluence, - *params.s_Model, - params.s_ElapsedTime, - params_, - params.s_Times[correlate], - params.s_Values[correlate], - params.s_Counts[correlate], - params.s_Probability, - params.s_InfluencerName, - params.s_InfluencerValues, - params.s_Cutoff, - params.s_IncludeCutoff, - params.s_Influences); + LOG_TRACE(<< "influencerValues = " + << core::CContainerPrinter::print(params.s_InfluencerValues)); + doComputeCorrelateInfluences( + params.s_Feature, CVarianceDifference(), complementInfluence, + *params.s_Model, params.s_ElapsedTime, params_, params.s_Times[correlate], + params.s_Values[correlate], params.s_Counts[correlate], + params.s_Probability, params.s_InfluencerName, params.s_InfluencerValues, + params.s_Cutoff, params.s_IncludeCutoff, params.s_Influences); } } } diff --git a/lib/model/CResourceMonitor.cc b/lib/model/CResourceMonitor.cc index ae600d6fed..6e1fd65a25 100644 --- a/lib/model/CResourceMonitor.cc +++ b/lib/model/CResourceMonitor.cc @@ -26,22 +26,14 @@ const core_t::TTime CResourceMonitor::MINIMUM_PRUNE_FREQUENCY(60 * 60); const std::size_t CResourceMonitor::DEFAULT_MEMORY_LIMIT_MB(4096); CResourceMonitor::CResourceMonitor() - : m_AllowAllocations(true), - m_ByteLimitHigh(0), - m_ByteLimitLow(0), - m_CurrentAnomalyDetectorMemory(0), - m_ExtraMemory(0), - m_PreviousTotal(this->totalMemory()), - m_Peak(m_PreviousTotal), - m_LastAllocationFailureReport(0), - m_MemoryStatus(model_t::E_MemoryStatusOk), - m_HasPruningStarted(false), - m_PruneThreshold(0), - m_LastPruneTime(0), + : m_AllowAllocations(true), m_ByteLimitHigh(0), m_ByteLimitLow(0), + m_CurrentAnomalyDetectorMemory(0), m_ExtraMemory(0), + m_PreviousTotal(this->totalMemory()), m_Peak(m_PreviousTotal), + m_LastAllocationFailureReport(0), m_MemoryStatus(model_t::E_MemoryStatusOk), + m_HasPruningStarted(false), m_PruneThreshold(0), m_LastPruneTime(0), m_PruneWindow(std::numeric_limits::max()), m_PruneWindowMaximum(std::numeric_limits::max()), - m_PruneWindowMinimum(std::numeric_limits::max()), - m_NoLimit(false) { + m_PruneWindowMinimum(std::numeric_limits::max()), m_NoLimit(false) { this->updateMemoryLimitsAndPruneThreshold(DEFAULT_MEMORY_LIMIT_MB); } @@ -57,7 +49,8 @@ void CResourceMonitor::registerComponent(CAnomalyDetector& detector) { void CResourceMonitor::unRegisterComponent(CAnomalyDetector& detector) { auto iter = m_Models.find(detector.model().get()); if (iter == m_Models.end()) { - LOG_ERROR(<< "Inconsistency - component has not been registered: " << detector.model()); + LOG_ERROR(<< "Inconsistency - component has not been registered: " + << detector.model()); return; } @@ -123,7 +116,8 @@ void CResourceMonitor::updateAllowAllocations() { std::size_t total{this->totalMemory()}; if (m_AllowAllocations) { if (total > m_ByteLimitHigh) { - LOG_INFO(<< "Over allocation limit. " << total << " bytes used, the limit is " << m_ByteLimitHigh); + LOG_INFO(<< "Over allocation limit. " << total + << " bytes used, the limit is " << m_ByteLimitHigh); m_AllowAllocations = false; } } else { @@ -184,16 +178,20 @@ bool CResourceMonitor::pruneIfRequired(core_t::TTime endTime) { this->updateAllowAllocations(); } - LOG_TRACE(<< "Pruning models. Usage: " << total << ". Current window: " << m_PruneWindow << " buckets"); + LOG_TRACE(<< "Pruning models. Usage: " << total + << ". Current window: " << m_PruneWindow << " buckets"); if (total < m_PruneThreshold) { // Expand the window - m_PruneWindow = std::min(m_PruneWindow + std::size_t((endTime - m_LastPruneTime) / m_Models.begin()->first->bucketLength()), - m_PruneWindowMaximum); + m_PruneWindow = std::min( + m_PruneWindow + std::size_t((endTime - m_LastPruneTime) / + m_Models.begin()->first->bucketLength()), + m_PruneWindowMaximum); LOG_TRACE(<< "Expanding window, to " << m_PruneWindow); } else { // Shrink the window - m_PruneWindow = std::max(static_cast(m_PruneWindow * 99 / 100), m_PruneWindowMinimum); + m_PruneWindow = std::max(static_cast(m_PruneWindow * 99 / 100), + m_PruneWindowMinimum); LOG_TRACE(<< "Shrinking window, to " << m_PruneWindow); } @@ -237,7 +235,8 @@ void CResourceMonitor::sendMemoryUsageReportIfSignificantlyChanged(core_t::TTime bool CResourceMonitor::needToSendReport() { // Has the usage changed by more than 1% ? std::size_t total{this->totalMemory()}; - if ((std::max(total, m_PreviousTotal) - std::min(total, m_PreviousTotal)) > m_PreviousTotal / 100) { + if ((std::max(total, m_PreviousTotal) - std::min(total, m_PreviousTotal)) > + m_PreviousTotal / 100) { return true; } @@ -308,7 +307,9 @@ void CResourceMonitor::clearExtraMemory() { } std::size_t CResourceMonitor::totalMemory() const { - return m_CurrentAnomalyDetectorMemory + m_ExtraMemory + CStringStore::names().memoryUsage() + CStringStore::influencers().memoryUsage(); + return m_CurrentAnomalyDetectorMemory + m_ExtraMemory + + CStringStore::names().memoryUsage() + + CStringStore::influencers().memoryUsage(); } } // model diff --git a/lib/model/CResultsQueue.cc b/lib/model/CResultsQueue.cc index d43c839293..99e945e387 100644 --- a/lib/model/CResultsQueue.cc +++ b/lib/model/CResultsQueue.cc @@ -26,7 +26,8 @@ CResultsQueue::CResultsQueue(std::size_t delayBuckets, core_t::TTime bucketLengt void CResultsQueue::push(const CHierarchicalResults& result, core_t::TTime time) { if (m_Results.latestBucketEnd() + 1 - m_Results.bucketLength() == 0) { m_Results.reset(time - m_Results.bucketLength()); - LOG_TRACE(<< "Resetting results queue. Queue's latestBucketEnd is " << m_Results.latestBucketEnd()); + LOG_TRACE(<< "Resetting results queue. Queue's latestBucketEnd is " + << m_Results.latestBucketEnd()); } m_Results.push(result, time); } @@ -64,17 +65,20 @@ bool CResultsQueue::hasInterimResults() const { return m_Results.size() > 2 && m_LastResultsIndex == 0; } -core_t::TTime -CResultsQueue::chooseResultTime(core_t::TTime bucketStartTime, core_t::TTime bucketLength, model::CHierarchicalResults& results) { +core_t::TTime CResultsQueue::chooseResultTime(core_t::TTime bucketStartTime, + core_t::TTime bucketLength, + model::CHierarchicalResults& results) { if (m_Results.size() == 1) { return bucketStartTime; } // Select the correct bucket to use - LOG_TRACE(<< "Asking for queue items at " << (bucketStartTime - bucketLength) << " and " << (bucketStartTime - (bucketLength / 2))); + LOG_TRACE(<< "Asking for queue items at " << (bucketStartTime - bucketLength) + << " and " << (bucketStartTime - (bucketLength / 2))); core_t::TTime resultsTime = 0; - const model::CHierarchicalResults::TNode* node = m_Results.get(bucketStartTime - bucketLength).root(); + const model::CHierarchicalResults::TNode* node = + m_Results.get(bucketStartTime - bucketLength).root(); double r1 = 0.0; if (node) { r1 = node->s_NormalizedAnomalyScore; @@ -113,7 +117,8 @@ CResultsQueue::chooseResultTime(core_t::TTime bucketStartTime, core_t::TTime buc } void CResultsQueue::acceptPersistInserter(core::CStatePersistInserter& inserter) const { - core_t::TTime initialisationTime = m_Results.latestBucketEnd() + 1 - m_Results.bucketLength(); + core_t::TTime initialisationTime = m_Results.latestBucketEnd() + 1 - + m_Results.bucketLength(); core::CPersistUtils::persist(INITIALISATION_TIME_TAG, initialisationTime, inserter); core::CPersistUtils::persist(RESULTS_TAG, m_Results, inserter); core::CPersistUtils::persist(LAST_RESULTS_INDEX_TAG, m_LastResultsIndex, inserter); @@ -127,12 +132,14 @@ bool CResultsQueue::acceptRestoreTraverser(core::CStateRestoreTraverser& travers return false; } } else if (name == LAST_RESULTS_INDEX_TAG) { - if (!core::CPersistUtils::restore(LAST_RESULTS_INDEX_TAG, m_LastResultsIndex, traverser)) { + if (!core::CPersistUtils::restore(LAST_RESULTS_INDEX_TAG, + m_LastResultsIndex, traverser)) { return false; } } else if (name == INITIALISATION_TIME_TAG) { core_t::TTime initialisationTime = 0; - if (!core::CPersistUtils::restore(INITIALISATION_TIME_TAG, initialisationTime, traverser)) { + if (!core::CPersistUtils::restore(INITIALISATION_TIME_TAG, + initialisationTime, traverser)) { return false; } m_Results.reset(initialisationTime); diff --git a/lib/model/CRuleCondition.cc b/lib/model/CRuleCondition.cc index 4820707e0b..83904c02c5 100644 --- a/lib/model/CRuleCondition.cc +++ b/lib/model/CRuleCondition.cc @@ -22,7 +22,8 @@ const core::CPatternSet EMPTY_FILTER; using TDouble1Vec = CAnomalyDetectorModel::TDouble1Vec; -CRuleCondition::SCondition::SCondition(EConditionOperator op, double threshold) : s_Op(op), s_Threshold(threshold) { +CRuleCondition::SCondition::SCondition(EConditionOperator op, double threshold) + : s_Op(op), s_Threshold(threshold) { } bool CRuleCondition::SCondition::test(double value) const { @@ -40,7 +41,8 @@ bool CRuleCondition::SCondition::test(double value) const { } CRuleCondition::CRuleCondition() - : m_Type(E_NumericalActual), m_Condition(E_LT, 0.0), m_FieldName(), m_FieldValue(), m_ValueFilter(EMPTY_FILTER) { + : m_Type(E_NumericalActual), m_Condition(E_LT, 0.0), m_FieldName(), + m_FieldValue(), m_ValueFilter(EMPTY_FILTER) { } void CRuleCondition::type(ERuleConditionType ruleType) { @@ -103,7 +105,8 @@ bool CRuleCondition::test(const CAnomalyDetectorModel& model, // Thus we ignore the supplied pid/cid and instead look up // the time series identifier that matches the condition's m_FieldValue. bool successfullyResolvedId = - model.isPopulation() ? gatherer.attributeId(m_FieldValue, cid) : gatherer.personId(m_FieldValue, pid); + model.isPopulation() ? gatherer.attributeId(m_FieldValue, cid) + : gatherer.personId(m_FieldValue, pid); if (successfullyResolvedId == false) { return false; } @@ -112,9 +115,10 @@ bool CRuleCondition::test(const CAnomalyDetectorModel& model, // - empty // - the person field name if the detector has only an over field or only a by field // - the attribute field name if the detector has both over and by fields - const std::string& fieldValue = model.isPopulation() && m_FieldName == gatherer.attributeFieldName() - ? gatherer.attributeName(cid) - : gatherer.personName(pid); + const std::string& fieldValue = + model.isPopulation() && m_FieldName == gatherer.attributeFieldName() + ? gatherer.attributeName(cid) + : gatherer.personName(pid); if (m_FieldValue != fieldValue) { return false; } @@ -142,7 +146,8 @@ bool CRuleCondition::checkCondition(const CAnomalyDetectorModel& model, break; } case E_NumericalTypical: { - value = model.baselineBucketMean(feature, pid, cid, resultType, EMPTY_CORRELATED, time); + value = model.baselineBucketMean(feature, pid, cid, resultType, + EMPTY_CORRELATED, time); if (value.empty()) { // Means prior is non-informative return false; @@ -151,7 +156,8 @@ bool CRuleCondition::checkCondition(const CAnomalyDetectorModel& model, } case E_NumericalDiffAbs: { value = model.currentBucketValue(feature, pid, cid, time); - TDouble1Vec typical = model.baselineBucketMean(feature, pid, cid, resultType, EMPTY_CORRELATED, time); + TDouble1Vec typical = model.baselineBucketMean(feature, pid, cid, resultType, + EMPTY_CORRELATED, time); if (typical.empty()) { // Means prior is non-informative return false; @@ -200,7 +206,8 @@ std::string CRuleCondition::print() const { } result += "IN FILTER"; } else { - result += this->print(m_Condition.s_Op) + " " + core::CStringUtils::typeToString(m_Condition.s_Threshold); + result += this->print(m_Condition.s_Op) + " " + + core::CStringUtils::typeToString(m_Condition.s_Threshold); } return result; } diff --git a/lib/model/CSample.cc b/lib/model/CSample.cc index 544735092a..49baa7d1ae 100644 --- a/lib/model/CSample.cc +++ b/lib/model/CSample.cc @@ -20,13 +20,17 @@ namespace ml { namespace model { std::string CSample::SToString::operator()(const CSample& sample) const { - std::string result = core::CStringUtils::typeToString(sample.m_Time) + core::CPersistUtils::PAIR_DELIMITER + - core::CStringUtils::typeToStringPrecise(sample.m_VarianceScale, core::CIEEE754::E_SinglePrecision) + + std::string result = core::CStringUtils::typeToString(sample.m_Time) + core::CPersistUtils::PAIR_DELIMITER + - core::CStringUtils::typeToStringPrecise(sample.m_Count, core::CIEEE754::E_SinglePrecision); + core::CStringUtils::typeToStringPrecise( + sample.m_VarianceScale, core::CIEEE754::E_SinglePrecision) + + core::CPersistUtils::PAIR_DELIMITER + + core::CStringUtils::typeToStringPrecise( + sample.m_Count, core::CIEEE754::E_SinglePrecision); for (std::size_t i = 0u; i < sample.m_Value.size(); ++i) { result += core::CPersistUtils::PAIR_DELIMITER + - core::CStringUtils::typeToStringPrecise(sample.m_Value[i], core::CIEEE754::E_SinglePrecision); + core::CStringUtils::typeToStringPrecise( + sample.m_Value[i], core::CIEEE754::E_SinglePrecision); } return result; } @@ -34,12 +38,14 @@ std::string CSample::SToString::operator()(const CSample& sample) const { bool CSample::SFromString::operator()(const std::string& token, CSample& value) const { core::CStringUtils::TStrVec tokens; std::string remainder; - core::CStringUtils::tokenise(std::string(1, core::CPersistUtils::PAIR_DELIMITER), token, tokens, remainder); + core::CStringUtils::tokenise(std::string(1, core::CPersistUtils::PAIR_DELIMITER), + token, tokens, remainder); if (!remainder.empty()) { tokens.push_back(remainder); } - if (!core::CStringUtils::stringToType(tokens[0], value.m_Time) || !core::CStringUtils::stringToType(tokens[1], value.m_VarianceScale) || + if (!core::CStringUtils::stringToType(tokens[0], value.m_Time) || + !core::CStringUtils::stringToType(tokens[1], value.m_VarianceScale) || !core::CStringUtils::stringToType(tokens[2], value.m_Count)) { LOG_ERROR(<< "Cannot parse as sample: " << token); return false; @@ -83,7 +89,8 @@ uint64_t CSample::checksum() const { std::string CSample::print() const { std::ostringstream result; - result << '(' << m_Time << ' ' << core::CContainerPrinter::print(m_Value) << ' ' << m_VarianceScale << ' ' << m_Count << ')'; + result << '(' << m_Time << ' ' << core::CContainerPrinter::print(m_Value) + << ' ' << m_VarianceScale << ' ' << m_Count << ')'; return result.str(); } diff --git a/lib/model/CSampleCounts.cc b/lib/model/CSampleCounts.cc index 68cfeac867..fb669750bb 100644 --- a/lib/model/CSampleCounts.cc +++ b/lib/model/CSampleCounts.cc @@ -35,7 +35,8 @@ using TStrCRef = boost::reference_wrapper; using TStrCRefUInt64Map = std::map; } -CSampleCounts::CSampleCounts(unsigned int sampleCountOverride) : m_SampleCountOverride(sampleCountOverride) { +CSampleCounts::CSampleCounts(unsigned int sampleCountOverride) + : m_SampleCountOverride(sampleCountOverride) { } CSampleCounts::CSampleCounts(bool isForPersistence, const CSampleCounts& other) @@ -57,8 +58,10 @@ void CSampleCounts::acceptPersistInserter(core::CStatePersistInserter& inserter) // hence not persisted or restored. core::CPersistUtils::persist(SAMPLE_COUNT_TAG, m_SampleCounts, inserter); - core::CPersistUtils::persist(MEAN_NON_ZERO_BUCKET_COUNT_TAG, m_MeanNonZeroBucketCounts, inserter); - core::CPersistUtils::persist(EFFECTIVE_SAMPLE_VARIANCE_TAG, m_EffectiveSampleVariances, inserter); + core::CPersistUtils::persist(MEAN_NON_ZERO_BUCKET_COUNT_TAG, + m_MeanNonZeroBucketCounts, inserter); + core::CPersistUtils::persist(EFFECTIVE_SAMPLE_VARIANCE_TAG, + m_EffectiveSampleVariances, inserter); } bool CSampleCounts::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { @@ -70,12 +73,14 @@ bool CSampleCounts::acceptRestoreTraverser(core::CStateRestoreTraverser& travers return false; } } else if (name == MEAN_NON_ZERO_BUCKET_COUNT_TAG) { - if (core::CPersistUtils::restore(name, m_MeanNonZeroBucketCounts, traverser) == false) { + if (core::CPersistUtils::restore(name, m_MeanNonZeroBucketCounts, + traverser) == false) { LOG_ERROR(<< "Invalid non-zero bucket count means"); return false; } } else if (name == EFFECTIVE_SAMPLE_VARIANCE_TAG) { - if (core::CPersistUtils::restore(name, m_EffectiveSampleVariances, traverser) == false) { + if (core::CPersistUtils::restore(name, m_EffectiveSampleVariances, + traverser) == false) { LOG_ERROR(<< "Invalid effective sample variances"); return false; } @@ -85,7 +90,9 @@ bool CSampleCounts::acceptRestoreTraverser(core::CStateRestoreTraverser& travers } unsigned int CSampleCounts::count(std::size_t id) const { - return m_SampleCountOverride > 0 ? m_SampleCountOverride : id < m_SampleCounts.size() ? m_SampleCounts[id] : 0; + return m_SampleCountOverride > 0 + ? m_SampleCountOverride + : id < m_SampleCounts.size() ? m_SampleCounts[id] : 0; } double CSampleCounts::effectiveSampleCount(std::size_t id) const { @@ -112,12 +119,16 @@ void CSampleCounts::resetSampleCount(const CDataGatherer& gatherer, std::size_t if (maths::CBasicStatistics::count(count_) >= NUMBER_BUCKETS_TO_ESTIMATE_SAMPLE_COUNT) { unsigned sampleCountThreshold = 0; const CDataGatherer::TFeatureVec& features = gatherer.features(); - for (CDataGatherer::TFeatureVecCItr i = features.begin(); i != features.end(); ++i) { - sampleCountThreshold = std::max(sampleCountThreshold, model_t::minimumSampleCount(*i)); + for (CDataGatherer::TFeatureVecCItr i = features.begin(); + i != features.end(); ++i) { + sampleCountThreshold = + std::max(sampleCountThreshold, model_t::minimumSampleCount(*i)); } double count = maths::CBasicStatistics::mean(count_); - m_SampleCounts[id] = std::max(sampleCountThreshold, static_cast(count + 0.5)); - LOG_DEBUG(<< "Setting sample count to " << m_SampleCounts[id] << " for " << this->name(gatherer, id)); + m_SampleCounts[id] = std::max(sampleCountThreshold, + static_cast(count + 0.5)); + LOG_DEBUG(<< "Setting sample count to " << m_SampleCounts[id] << " for " + << this->name(gatherer, id)); } } @@ -129,7 +140,8 @@ void CSampleCounts::refresh(const CDataGatherer& gatherer) { unsigned sampleCountThreshold = 0; const CDataGatherer::TFeatureVec& features = gatherer.features(); for (CDataGatherer::TFeatureVecCItr i = features.begin(); i != features.end(); ++i) { - sampleCountThreshold = std::max(sampleCountThreshold, model_t::minimumSampleCount(*i)); + sampleCountThreshold = + std::max(sampleCountThreshold, model_t::minimumSampleCount(*i)); } for (std::size_t id = 0u; id < m_MeanNonZeroBucketCounts.size(); ++id) { @@ -138,12 +150,15 @@ void CSampleCounts::refresh(const CDataGatherer& gatherer) { if (maths::CBasicStatistics::count(count_) >= NUMBER_BUCKETS_TO_REFRESH_SAMPLE_COUNT) { double count = maths::CBasicStatistics::mean(count_); double scale = count / static_cast(m_SampleCounts[id]); - if (scale < maths::MINIMUM_ACCURATE_VARIANCE_SCALE || scale > maths::MAXIMUM_ACCURATE_VARIANCE_SCALE) { + if (scale < maths::MINIMUM_ACCURATE_VARIANCE_SCALE || + scale > maths::MAXIMUM_ACCURATE_VARIANCE_SCALE) { unsigned int oldCount = m_SampleCounts[id]; - unsigned int newCount = std::max(sampleCountThreshold, static_cast(count + 0.5)); - LOG_TRACE(<< "Sample count " << oldCount << " is too far from the bucket mean " << count << " count, resetting to " - << newCount << ". This may cause temporary instability" - << " for " << this->name(gatherer, id) << " (" << id << "). (Mean count " << count_ << ")"); + unsigned int newCount = std::max( + sampleCountThreshold, static_cast(count + 0.5)); + LOG_TRACE(<< "Sample count " << oldCount << " is too far from the bucket mean " + << count << " count, resetting to " << newCount << ". This may cause temporary instability" + << " for " << this->name(gatherer, id) << " (" + << id << "). (Mean count " << count_ << ")"); m_SampleCounts[id] = newCount; // Avoid compiler warning in the case of LOG_TRACE being compiled out static_cast(oldCount); @@ -151,8 +166,10 @@ void CSampleCounts::refresh(const CDataGatherer& gatherer) { } } else if (maths::CBasicStatistics::count(count_) >= NUMBER_BUCKETS_TO_ESTIMATE_SAMPLE_COUNT) { double count = maths::CBasicStatistics::mean(count_); - m_SampleCounts[id] = std::max(sampleCountThreshold, static_cast(count + 0.5)); - LOG_TRACE(<< "Setting sample count to " << m_SampleCounts[id] << " for " << this->name(gatherer, id) << " (" << id + m_SampleCounts[id] = std::max(sampleCountThreshold, + static_cast(count + 0.5)); + LOG_TRACE(<< "Setting sample count to " << m_SampleCounts[id] + << " for " << this->name(gatherer, id) << " (" << id << "). (Mean count " << count_ << ")"); } } @@ -179,18 +196,25 @@ void CSampleCounts::recycle(const TSizeVec& idsToRemove) { m_EffectiveSampleVariances[id] = TMeanAccumulator(); } LOG_TRACE(<< "m_SampleCounts = " << core::CContainerPrinter::print(m_SampleCounts)); - LOG_TRACE(<< "m_MeanNonZeroBucketCounts = " << core::CContainerPrinter::print(m_MeanNonZeroBucketCounts)); - LOG_TRACE(<< "m_EffectiveSampleVariances = " << core::CContainerPrinter::print(m_EffectiveSampleVariances)); + LOG_TRACE(<< "m_MeanNonZeroBucketCounts = " + << core::CContainerPrinter::print(m_MeanNonZeroBucketCounts)); + LOG_TRACE(<< "m_EffectiveSampleVariances = " + << core::CContainerPrinter::print(m_EffectiveSampleVariances)); } void CSampleCounts::remove(std::size_t lowestIdToRemove) { if (lowestIdToRemove < m_SampleCounts.size()) { - m_SampleCounts.erase(m_SampleCounts.begin() + lowestIdToRemove, m_SampleCounts.end()); - m_MeanNonZeroBucketCounts.erase(m_MeanNonZeroBucketCounts.begin() + lowestIdToRemove, m_MeanNonZeroBucketCounts.end()); - m_EffectiveSampleVariances.erase(m_EffectiveSampleVariances.begin() + lowestIdToRemove, m_EffectiveSampleVariances.end()); + m_SampleCounts.erase(m_SampleCounts.begin() + lowestIdToRemove, + m_SampleCounts.end()); + m_MeanNonZeroBucketCounts.erase(m_MeanNonZeroBucketCounts.begin() + lowestIdToRemove, + m_MeanNonZeroBucketCounts.end()); + m_EffectiveSampleVariances.erase(m_EffectiveSampleVariances.begin() + lowestIdToRemove, + m_EffectiveSampleVariances.end()); LOG_TRACE(<< "m_SampleCounts = " << core::CContainerPrinter::print(m_SampleCounts)); - LOG_TRACE(<< "m_MeanNonZeroBucketCounts = " << core::CContainerPrinter::print(m_MeanNonZeroBucketCounts)); - LOG_TRACE(<< "m_EffectiveSampleVariances = " << core::CContainerPrinter::print(m_EffectiveSampleVariances)); + LOG_TRACE(<< "m_MeanNonZeroBucketCounts = " + << core::CContainerPrinter::print(m_MeanNonZeroBucketCounts)); + LOG_TRACE(<< "m_EffectiveSampleVariances = " + << core::CContainerPrinter::print(m_EffectiveSampleVariances)); } } @@ -205,7 +229,8 @@ void CSampleCounts::resize(std::size_t id) { uint64_t CSampleCounts::checksum(const CDataGatherer& gatherer) const { TStrCRefUInt64Map hashes; for (std::size_t id = 0u; id < m_SampleCounts.size(); ++id) { - if (gatherer.isPopulation() ? gatherer.isAttributeActive(id) : gatherer.isPersonActive(id)) { + if (gatherer.isPopulation() ? gatherer.isAttributeActive(id) + : gatherer.isPersonActive(id)) { uint64_t& hash = hashes[TStrCRef(this->name(gatherer, id))]; hash = maths::CChecksum::calculate(hash, m_SampleCounts[id]); hash = maths::CChecksum::calculate(hash, m_MeanNonZeroBucketCounts[id]); @@ -219,8 +244,10 @@ uint64_t CSampleCounts::checksum(const CDataGatherer& gatherer) const { void CSampleCounts::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("CSampleCounts"); core::CMemoryDebug::dynamicSize("m_SampleCounts", m_SampleCounts, mem); - core::CMemoryDebug::dynamicSize("m_MeanNonZeroBucketCounts", m_MeanNonZeroBucketCounts, mem); - core::CMemoryDebug::dynamicSize("m_EffectiveSampleVariances", m_EffectiveSampleVariances, mem); + core::CMemoryDebug::dynamicSize("m_MeanNonZeroBucketCounts", + m_MeanNonZeroBucketCounts, mem); + core::CMemoryDebug::dynamicSize("m_EffectiveSampleVariances", + m_EffectiveSampleVariances, mem); } std::size_t CSampleCounts::memoryUsage() const { diff --git a/lib/model/CSearchKey.cc b/lib/model/CSearchKey.cc index 6bfe8c765e..7de4d28ba2 100644 --- a/lib/model/CSearchKey.cc +++ b/lib/model/CSearchKey.cc @@ -61,19 +61,23 @@ CSearchKey::CSearchKey(int identifier, std::string overFieldName, std::string partitionFieldName, const TStrVec& influenceFieldNames) - : m_Identifier(identifier), m_Function(function), m_UseNull(useNull), m_ExcludeFrequent(excludeFrequent), m_Hash(0) { + : m_Identifier(identifier), m_Function(function), m_UseNull(useNull), + m_ExcludeFrequent(excludeFrequent), m_Hash(0) { m_FieldName = CStringStore::names().get(fieldName); m_ByFieldName = CStringStore::names().get(byFieldName); m_OverFieldName = CStringStore::names().get(overFieldName); m_PartitionFieldName = CStringStore::names().get(partitionFieldName); - for (TStrVec::const_iterator i = influenceFieldNames.begin(); i != influenceFieldNames.end(); ++i) { + for (TStrVec::const_iterator i = influenceFieldNames.begin(); + i != influenceFieldNames.end(); ++i) { m_InfluenceFieldNames.push_back(CStringStore::influencers().get(*i)); } } CSearchKey::CSearchKey(core::CStateRestoreTraverser& traverser, bool& successful) - : m_Identifier(0), m_Function(function_t::E_IndividualCount), m_UseNull(false), m_ExcludeFrequent(model_t::E_XF_None), m_Hash(0) { - successful = traverser.traverseSubLevel(boost::bind(&CSearchKey::acceptRestoreTraverser, this, _1)); + : m_Identifier(0), m_Function(function_t::E_IndividualCount), + m_UseNull(false), m_ExcludeFrequent(model_t::E_XF_None), m_Hash(0) { + successful = traverser.traverseSubLevel( + boost::bind(&CSearchKey::acceptRestoreTraverser, this, _1)); } bool CSearchKey::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { @@ -86,7 +90,8 @@ bool CSearchKey::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) } } else if (name == FUNCTION_NAME_TAG) { int function(-1); - if (core::CStringUtils::stringToType(traverser.value(), function) == false || function < 0) { + if (core::CStringUtils::stringToType(traverser.value(), function) == false || + function < 0) { LOG_ERROR(<< "Invalid function in " << traverser.value()); return false; } @@ -100,7 +105,8 @@ bool CSearchKey::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) m_UseNull = (useNull != 0); } else if (name == EXCLUDE_FREQUENT_TAG) { int excludeFrequent(-1); - if ((core::CStringUtils::stringToType(traverser.value(), excludeFrequent) == false) || (excludeFrequent < 0)) { + if ((core::CStringUtils::stringToType(traverser.value(), excludeFrequent) == false) || + (excludeFrequent < 0)) { LOG_ERROR(<< "Invalid excludeFrequent flag in " << traverser.value()); return false; } @@ -114,7 +120,8 @@ bool CSearchKey::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) } else if (name == PARTITION_FIELD_NAME_TAG) { m_PartitionFieldName = CStringStore::names().get(traverser.value()); } else if (name == INFLUENCE_FIELD_NAME_TAG) { - m_InfluenceFieldNames.push_back(CStringStore::influencers().get(traverser.value())); + m_InfluenceFieldNames.push_back( + CStringStore::influencers().get(traverser.value())); } } while (traverser.next()); @@ -151,15 +158,17 @@ void CSearchKey::swap(CSearchKey& other) { bool CSearchKey::operator==(const CSearchKey& rhs) const { using TStrEqualTo = std::equal_to; - return this->hash() == rhs.hash() && m_Identifier == rhs.m_Identifier && m_Function == rhs.m_Function && m_UseNull == rhs.m_UseNull && - m_ExcludeFrequent == rhs.m_ExcludeFrequent && m_FieldName == rhs.m_FieldName && m_ByFieldName == rhs.m_ByFieldName && - m_OverFieldName == rhs.m_OverFieldName && m_PartitionFieldName == rhs.m_PartitionFieldName && + return this->hash() == rhs.hash() && m_Identifier == rhs.m_Identifier && + m_Function == rhs.m_Function && m_UseNull == rhs.m_UseNull && + m_ExcludeFrequent == rhs.m_ExcludeFrequent && + m_FieldName == rhs.m_FieldName && m_ByFieldName == rhs.m_ByFieldName && + m_OverFieldName == rhs.m_OverFieldName && + m_PartitionFieldName == rhs.m_PartitionFieldName && m_InfluenceFieldNames.size() == rhs.m_InfluenceFieldNames.size() // Compare dereferenced strings rather than pointers as there's a // (small) possibility that the string store will not always return // the same pointer for the same string - && std::equal(m_InfluenceFieldNames.begin(), - m_InfluenceFieldNames.end(), + && std::equal(m_InfluenceFieldNames.begin(), m_InfluenceFieldNames.end(), rhs.m_InfluenceFieldNames.begin(), core::CFunctional::SDereference()); } @@ -186,14 +195,17 @@ bool CSearchKey::operator<(const CSearchKey& rhs) const { return comp < 0; } - if (m_InfluenceFieldNames.size() < rhs.m_InfluenceFieldNames.size()) { + if (m_InfluenceFieldNames.size() < + rhs.m_InfluenceFieldNames.size()) { return true; } - if (m_InfluenceFieldNames.size() > rhs.m_InfluenceFieldNames.size()) { + if (m_InfluenceFieldNames.size() > + rhs.m_InfluenceFieldNames.size()) { return false; } for (std::size_t i = 0u; i < m_InfluenceFieldNames.size(); ++i) { - comp = m_InfluenceFieldNames[i]->compare(*rhs.m_InfluenceFieldNames[i]); + comp = m_InfluenceFieldNames[i]->compare( + *rhs.m_InfluenceFieldNames[i]); if (comp != 0) { return comp < 0; } @@ -252,7 +264,8 @@ bool CSearchKey::isPopulation() const { std::string CSearchKey::toCue() const { std::string cue; cue.reserve(64 + // hopefully covers function description and slashes - m_FieldName->length() + m_ByFieldName->length() + m_OverFieldName->length() + m_PartitionFieldName->length()); + m_FieldName->length() + m_ByFieldName->length() + + m_OverFieldName->length() + m_PartitionFieldName->length()); cue += function_t::print(m_Function); cue += CUE_DELIMITER; cue += m_UseNull ? '1' : '0'; @@ -296,7 +309,8 @@ model_t::EExcludeFrequent CSearchKey::excludeFrequent() const { } bool CSearchKey::hasField(const std::string& name) const { - return *m_PartitionFieldName == name || *m_OverFieldName == name || *m_ByFieldName == name || *m_FieldName == name; + return *m_PartitionFieldName == name || *m_OverFieldName == name || + *m_ByFieldName == name || *m_FieldName == name; } const std::string& CSearchKey::fieldName() const { @@ -340,9 +354,10 @@ std::ostream& operator<<(std::ostream& strm, const CSearchKey& key) { // The format for this is very similar to the format used by toCue() at the // time of writing. However, do NOT combine the code because the intention // is to simplify toCue() in the future. - strm << key.m_Identifier << "==" << function_t::print(key.m_Function) << '/' << (key.m_UseNull ? '1' : '0') << '/' - << static_cast(key.m_ExcludeFrequent) << '/' << *key.m_FieldName << '/' << *key.m_ByFieldName << '/' << *key.m_OverFieldName - << '/' << *key.m_PartitionFieldName << '/'; + strm << key.m_Identifier << "==" << function_t::print(key.m_Function) << '/' + << (key.m_UseNull ? '1' : '0') << '/' << static_cast(key.m_ExcludeFrequent) + << '/' << *key.m_FieldName << '/' << *key.m_ByFieldName << '/' + << *key.m_OverFieldName << '/' << *key.m_PartitionFieldName << '/'; for (size_t i = 0; i < key.m_InfluenceFieldNames.size(); ++i) { if (i > 0) { diff --git a/lib/model/CSimpleCountDetector.cc b/lib/model/CSimpleCountDetector.cc index 8d35f7b7c0..bbcb47954d 100644 --- a/lib/model/CSimpleCountDetector.cc +++ b/lib/model/CSimpleCountDetector.cc @@ -40,7 +40,8 @@ void CSimpleCountDetector::pruneModels() { return; } -const CAnomalyDetector::TStrCPtrVec& CSimpleCountDetector::preprocessFieldValues(const TStrCPtrVec& fieldValues) { +const CAnomalyDetector::TStrCPtrVec& +CSimpleCountDetector::preprocessFieldValues(const TStrCPtrVec& fieldValues) { // The first field value is always the magic word "count", but for // summarised input we need to pass on the true value of the second field if (m_FieldValues.size() > 1) { diff --git a/lib/model/CStringStore.cc b/lib/model/CStringStore.cc index 9edd640deb..f8deea3d98 100644 --- a/lib/model/CStringStore.cc +++ b/lib/model/CStringStore.cc @@ -29,7 +29,9 @@ struct SStrHash { //! \brief Helper class to compare a std::string and a CStoredStringPtr. struct SStrStoredStringPtrEqual { - bool operator()(const std::string& lhs, const core::CStoredStringPtr& rhs) const { return lhs == *rhs; } + bool operator()(const std::string& lhs, const core::CStoredStringPtr& rhs) const { + return lhs == *rhs; + } } STR_EQUAL; // To ensure the singletons are constructed before multiple threads may @@ -150,7 +152,8 @@ void CStringStore::pruneNotThreadSafe() { void CStringStore::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName(this == &CStringStore::names() ? "names StringStore" - : (this == &CStringStore::influencers() ? "influencers StringStore" : "unknown StringStore")); + : (this == &CStringStore::influencers() ? "influencers StringStore" + : "unknown StringStore")); mem->addItem("empty string ptr", m_EmptyString.actualMemoryUsage()); core::CScopedFastLock lock(m_Mutex); core::CMemoryDebug::dynamicSize("stored strings", m_Strings, mem); @@ -176,7 +179,9 @@ std::size_t CStringStore::memoryUsage() const { } CStringStore::CStringStore() - : m_Reading(0), m_Writing(0), m_EmptyString(core::CStoredStringPtr::makeStoredString(std::string())), m_StoredStringsMemUse(0) { + : m_Reading(0), m_Writing(0), + m_EmptyString(core::CStoredStringPtr::makeStoredString(std::string())), + m_StoredStringsMemUse(0) { } void CStringStore::clearEverythingTestOnly() { diff --git a/lib/model/FrequencyPredicates.cc b/lib/model/FrequencyPredicates.cc index de665a7056..6aa7e33723 100644 --- a/lib/model/FrequencyPredicates.cc +++ b/lib/model/FrequencyPredicates.cc @@ -9,11 +9,13 @@ namespace ml { namespace model { -CPersonFrequencyGreaterThan::CPersonFrequencyGreaterThan(const CAnomalyDetectorModel& model, double threshold) +CPersonFrequencyGreaterThan::CPersonFrequencyGreaterThan(const CAnomalyDetectorModel& model, + double threshold) : m_Model(&model), m_Threshold(threshold) { } -CAttributeFrequencyGreaterThan::CAttributeFrequencyGreaterThan(const CAnomalyDetectorModel& model, double threshold) +CAttributeFrequencyGreaterThan::CAttributeFrequencyGreaterThan(const CAnomalyDetectorModel& model, + double threshold) : m_Model(&model), m_Threshold(threshold) { } } diff --git a/lib/model/FunctionTypes.cc b/lib/model/FunctionTypes.cc index 5cbf0b2dd4..7568ddd06b 100644 --- a/lib/model/FunctionTypes.cc +++ b/lib/model/FunctionTypes.cc @@ -534,106 +534,174 @@ using TFeatureFunctionVecMapCItr = TFeatureFunctionVecMap::const_iterator; namespace detail { const model_t::EFeature INDIVIDUAL_COUNT_FEATURES[] = {model_t::E_IndividualCountByBucketAndPerson}; -const model_t::EFeature INDIVIDUAL_NON_ZERO_COUNT_FEATURES[] = {model_t::E_IndividualNonZeroCountByBucketAndPerson}; +const model_t::EFeature INDIVIDUAL_NON_ZERO_COUNT_FEATURES[] = { + model_t::E_IndividualNonZeroCountByBucketAndPerson}; const model_t::EFeature INDIVIDUAL_RARE_COUNT_FEATURES[] = { model_t::E_IndividualCountByBucketAndPerson, model_t::E_IndividualTotalBucketCountByPerson, }; -const model_t::EFeature INDIVIDUAL_RARE_NON_ZERO_COUNT_FEATURES[] = {model_t::E_IndividualNonZeroCountByBucketAndPerson, - model_t::E_IndividualTotalBucketCountByPerson}; -const model_t::EFeature INDIVIDUAL_RARE_FEATURES[] = {model_t::E_IndividualTotalBucketCountByPerson, - model_t::E_IndividualIndicatorOfBucketPerson}; -const model_t::EFeature INDIVIDUAL_LOW_COUNTS_FEATURES[] = {model_t::E_IndividualLowCountsByBucketAndPerson}; -const model_t::EFeature INDIVIDUAL_HIGH_COUNTS_FEATURES[] = {model_t::E_IndividualHighCountsByBucketAndPerson}; -const model_t::EFeature INDIVIDUAL_LOW_NON_ZERO_COUNT_FEATURES[] = {model_t::E_IndividualLowNonZeroCountByBucketAndPerson}; -const model_t::EFeature INDIVIDUAL_HIGH_NON_ZERO_COUNT_FEATURES[] = {model_t::E_IndividualHighNonZeroCountByBucketAndPerson}; -const model_t::EFeature INDIVIDUAL_DISTINCT_COUNT_FEATURES[] = {model_t::E_IndividualUniqueCountByBucketAndPerson}; -const model_t::EFeature INDIVIDUAL_LOW_DISTINCT_COUNT_FEATURES[] = {model_t::E_IndividualLowUniqueCountByBucketAndPerson}; -const model_t::EFeature INDIVIDUAL_HIGH_DISTINCT_COUNT_FEATURES[] = {model_t::E_IndividualHighUniqueCountByBucketAndPerson}; -const model_t::EFeature INDIVIDUAL_INFO_CONTENT_FEATURES[] = {model_t::E_IndividualInfoContentByBucketAndPerson}; -const model_t::EFeature INDIVIDUAL_HIGH_INFO_CONTENT_FEATURES[] = {model_t::E_IndividualHighInfoContentByBucketAndPerson}; -const model_t::EFeature INDIVIDUAL_LOW_INFO_CONTENT_FEATURES[] = {model_t::E_IndividualLowInfoContentByBucketAndPerson}; -const model_t::EFeature INDIVIDUAL_TIME_OF_DAY_FEATURES[] = {model_t::E_IndividualTimeOfDayByBucketAndPerson}; -const model_t::EFeature INDIVIDUAL_TIME_OF_WEEK_FEATURES[] = {model_t::E_IndividualTimeOfWeekByBucketAndPerson}; -const model_t::EFeature INDIVIDUAL_METRIC_FEATURES[] = {model_t::E_IndividualMeanByPerson, - model_t::E_IndividualMinByPerson, - model_t::E_IndividualMaxByPerson}; +const model_t::EFeature INDIVIDUAL_RARE_NON_ZERO_COUNT_FEATURES[] = { + model_t::E_IndividualNonZeroCountByBucketAndPerson, + model_t::E_IndividualTotalBucketCountByPerson}; +const model_t::EFeature INDIVIDUAL_RARE_FEATURES[] = { + model_t::E_IndividualTotalBucketCountByPerson, model_t::E_IndividualIndicatorOfBucketPerson}; +const model_t::EFeature INDIVIDUAL_LOW_COUNTS_FEATURES[] = { + model_t::E_IndividualLowCountsByBucketAndPerson}; +const model_t::EFeature INDIVIDUAL_HIGH_COUNTS_FEATURES[] = { + model_t::E_IndividualHighCountsByBucketAndPerson}; +const model_t::EFeature INDIVIDUAL_LOW_NON_ZERO_COUNT_FEATURES[] = { + model_t::E_IndividualLowNonZeroCountByBucketAndPerson}; +const model_t::EFeature INDIVIDUAL_HIGH_NON_ZERO_COUNT_FEATURES[] = { + model_t::E_IndividualHighNonZeroCountByBucketAndPerson}; +const model_t::EFeature INDIVIDUAL_DISTINCT_COUNT_FEATURES[] = { + model_t::E_IndividualUniqueCountByBucketAndPerson}; +const model_t::EFeature INDIVIDUAL_LOW_DISTINCT_COUNT_FEATURES[] = { + model_t::E_IndividualLowUniqueCountByBucketAndPerson}; +const model_t::EFeature INDIVIDUAL_HIGH_DISTINCT_COUNT_FEATURES[] = { + model_t::E_IndividualHighUniqueCountByBucketAndPerson}; +const model_t::EFeature INDIVIDUAL_INFO_CONTENT_FEATURES[] = { + model_t::E_IndividualInfoContentByBucketAndPerson}; +const model_t::EFeature INDIVIDUAL_HIGH_INFO_CONTENT_FEATURES[] = { + model_t::E_IndividualHighInfoContentByBucketAndPerson}; +const model_t::EFeature INDIVIDUAL_LOW_INFO_CONTENT_FEATURES[] = { + model_t::E_IndividualLowInfoContentByBucketAndPerson}; +const model_t::EFeature INDIVIDUAL_TIME_OF_DAY_FEATURES[] = { + model_t::E_IndividualTimeOfDayByBucketAndPerson}; +const model_t::EFeature INDIVIDUAL_TIME_OF_WEEK_FEATURES[] = { + model_t::E_IndividualTimeOfWeekByBucketAndPerson}; +const model_t::EFeature INDIVIDUAL_METRIC_FEATURES[] = { + model_t::E_IndividualMeanByPerson, model_t::E_IndividualMinByPerson, + model_t::E_IndividualMaxByPerson}; const model_t::EFeature INDIVIDUAL_METRIC_MEAN_FEATURES[] = {model_t::E_IndividualMeanByPerson}; const model_t::EFeature INDIVIDUAL_METRIC_LOW_MEAN_FEATURES[] = {model_t::E_IndividualLowMeanByPerson}; const model_t::EFeature INDIVIDUAL_METRIC_HIGH_MEAN_FEATURES[] = {model_t::E_IndividualHighMeanByPerson}; const model_t::EFeature INDIVIDUAL_METRIC_MEDIAN_FEATURES[] = {model_t::E_IndividualMedianByPerson}; -const model_t::EFeature INDIVIDUAL_METRIC_LOW_MEDIAN_FEATURES[] = {model_t::E_IndividualLowMedianByPerson}; -const model_t::EFeature INDIVIDUAL_METRIC_HIGH_MEDIAN_FEATURES[] = {model_t::E_IndividualHighMedianByPerson}; +const model_t::EFeature INDIVIDUAL_METRIC_LOW_MEDIAN_FEATURES[] = { + model_t::E_IndividualLowMedianByPerson}; +const model_t::EFeature INDIVIDUAL_METRIC_HIGH_MEDIAN_FEATURES[] = { + model_t::E_IndividualHighMedianByPerson}; const model_t::EFeature INDIVIDUAL_METRIC_MIN_FEATURES[] = {model_t::E_IndividualMinByPerson}; const model_t::EFeature INDIVIDUAL_METRIC_MAX_FEATURES[] = {model_t::E_IndividualMaxByPerson}; const model_t::EFeature INDIVIDUAL_METRIC_VARIANCE_FEATURES[] = {model_t::E_IndividualVarianceByPerson}; -const model_t::EFeature INDIVIDUAL_METRIC_LOW_VARIANCE_FEATURES[] = {model_t::E_IndividualLowVarianceByPerson}; -const model_t::EFeature INDIVIDUAL_METRIC_HIGH_VARIANCE_FEATURES[] = {model_t::E_IndividualHighVarianceByPerson}; +const model_t::EFeature INDIVIDUAL_METRIC_LOW_VARIANCE_FEATURES[] = { + model_t::E_IndividualLowVarianceByPerson}; +const model_t::EFeature INDIVIDUAL_METRIC_HIGH_VARIANCE_FEATURES[] = { + model_t::E_IndividualHighVarianceByPerson}; const model_t::EFeature INDIVIDUAL_METRIC_SUM_FEATURES[] = {model_t::E_IndividualSumByBucketAndPerson}; -const model_t::EFeature INDIVIDUAL_METRIC_LOW_SUM_FEATURES[] = {model_t::E_IndividualLowSumByBucketAndPerson}; -const model_t::EFeature INDIVIDUAL_METRIC_HIGH_SUM_FEATURES[] = {model_t::E_IndividualHighSumByBucketAndPerson}; -const model_t::EFeature INDIVIDUAL_METRIC_NON_NULL_SUM_FEATURES[] = {model_t::E_IndividualNonNullSumByBucketAndPerson}; -const model_t::EFeature INDIVIDUAL_METRIC_LOW_NON_NULL_SUM_FEATURES[] = {model_t::E_IndividualLowNonNullSumByBucketAndPerson}; -const model_t::EFeature INDIVIDUAL_METRIC_HIGH_NON_NULL_SUM_FEATURES[] = {model_t::E_IndividualHighNonNullSumByBucketAndPerson}; +const model_t::EFeature INDIVIDUAL_METRIC_LOW_SUM_FEATURES[] = { + model_t::E_IndividualLowSumByBucketAndPerson}; +const model_t::EFeature INDIVIDUAL_METRIC_HIGH_SUM_FEATURES[] = { + model_t::E_IndividualHighSumByBucketAndPerson}; +const model_t::EFeature INDIVIDUAL_METRIC_NON_NULL_SUM_FEATURES[] = { + model_t::E_IndividualNonNullSumByBucketAndPerson}; +const model_t::EFeature INDIVIDUAL_METRIC_LOW_NON_NULL_SUM_FEATURES[] = { + model_t::E_IndividualLowNonNullSumByBucketAndPerson}; +const model_t::EFeature INDIVIDUAL_METRIC_HIGH_NON_NULL_SUM_FEATURES[] = { + model_t::E_IndividualHighNonNullSumByBucketAndPerson}; const model_t::EFeature INDIVIDUAL_LAT_LONG_FEATURES[] = {model_t::E_IndividualMeanLatLongByPerson}; const model_t::EFeature INDIVIDUAL_MAX_VELOCITY_FEATURES[] = {model_t::E_IndividualMaxVelocityByPerson}; const model_t::EFeature INDIVIDUAL_MIN_VELOCITY_FEATURES[] = {model_t::E_IndividualMinVelocityByPerson}; -const model_t::EFeature INDIVIDUAL_MEAN_VELOCITY_FEATURES[] = {model_t::E_IndividualMeanVelocityByPerson}; +const model_t::EFeature INDIVIDUAL_MEAN_VELOCITY_FEATURES[] = { + model_t::E_IndividualMeanVelocityByPerson}; const model_t::EFeature INDIVIDUAL_SUM_VELOCITY_FEATURES[] = {model_t::E_IndividualSumVelocityByPerson}; -const model_t::EFeature POPULATION_COUNT_FEATURES[] = {model_t::E_PopulationCountByBucketPersonAndAttribute}; -const model_t::EFeature POPULATION_DISTINCT_COUNT_FEATURES[] = {model_t::E_PopulationUniqueCountByBucketPersonAndAttribute}; -const model_t::EFeature POPULATION_LOW_DISTINCT_COUNT_FEATURES[] = {model_t::E_PopulationLowUniqueCountByBucketPersonAndAttribute}; -const model_t::EFeature POPULATION_HIGH_DISTINCT_COUNT_FEATURES[] = {model_t::E_PopulationHighUniqueCountByBucketPersonAndAttribute}; -const model_t::EFeature POPULATION_RARE_FEATURES[] = {model_t::E_PopulationIndicatorOfBucketPersonAndAttribute, - model_t::E_PopulationUniquePersonCountByAttribute}; -const model_t::EFeature POPULATION_RARE_COUNT_FEATURES[] = {model_t::E_PopulationCountByBucketPersonAndAttribute, - model_t::E_PopulationUniquePersonCountByAttribute}; -const model_t::EFeature POPULATION_FREQ_RARE_FEATURES[] = {model_t::E_PopulationAttributeTotalCountByPerson, - model_t::E_PopulationIndicatorOfBucketPersonAndAttribute, - model_t::E_PopulationUniquePersonCountByAttribute}; -const model_t::EFeature POPULATION_FREQ_RARE_COUNT_FEATURES[] = {model_t::E_PopulationAttributeTotalCountByPerson, - model_t::E_PopulationCountByBucketPersonAndAttribute, - model_t::E_PopulationUniquePersonCountByAttribute}; -const model_t::EFeature POPULATION_LOW_COUNTS_FEATURES[] = {model_t::E_PopulationLowCountsByBucketPersonAndAttribute}; -const model_t::EFeature POPULATION_HIGH_COUNTS_FEATURES[] = {model_t::E_PopulationHighCountsByBucketPersonAndAttribute}; -const model_t::EFeature POPULATION_INFO_CONTENT_FEATURES[] = {model_t::E_PopulationInfoContentByBucketPersonAndAttribute}; -const model_t::EFeature POPULATION_LOW_INFO_CONTENT_FEATURES[] = {model_t::E_PopulationLowInfoContentByBucketPersonAndAttribute}; -const model_t::EFeature POPULATION_HIGH_INFO_CONTENT_FEATURES[] = {model_t::E_PopulationHighInfoContentByBucketPersonAndAttribute}; -const model_t::EFeature POPULATION_TIME_OF_DAY_FEATURES[] = {model_t::E_PopulationTimeOfDayByBucketPersonAndAttribute}; -const model_t::EFeature POPULATION_TIME_OF_WEEK_FEATURES[] = {model_t::E_PopulationTimeOfWeekByBucketPersonAndAttribute}; -const model_t::EFeature POPULATION_METRIC_FEATURES[] = {model_t::E_PopulationMeanByPersonAndAttribute, - model_t::E_PopulationMinByPersonAndAttribute, - model_t::E_PopulationMaxByPersonAndAttribute}; -const model_t::EFeature POPULATION_METRIC_MEAN_FEATURES[] = {model_t::E_PopulationMeanByPersonAndAttribute}; -const model_t::EFeature POPULATION_METRIC_LOW_MEAN_FEATURES[] = {model_t::E_PopulationLowMeanByPersonAndAttribute}; -const model_t::EFeature POPULATION_METRIC_HIGH_MEAN_FEATURES[] = {model_t::E_PopulationHighMeanByPersonAndAttribute}; -const model_t::EFeature POPULATION_METRIC_MEDIAN_FEATURES[] = {model_t::E_PopulationMedianByPersonAndAttribute}; -const model_t::EFeature POPULATION_METRIC_LOW_MEDIAN_FEATURES[] = {model_t::E_PopulationLowMedianByPersonAndAttribute}; -const model_t::EFeature POPULATION_METRIC_HIGH_MEDIAN_FEATURES[] = {model_t::E_PopulationHighMedianByPersonAndAttribute}; -const model_t::EFeature POPULATION_METRIC_MIN_FEATURES[] = {model_t::E_PopulationMinByPersonAndAttribute}; -const model_t::EFeature POPULATION_METRIC_MAX_FEATURES[] = {model_t::E_PopulationMaxByPersonAndAttribute}; -const model_t::EFeature POPULATION_METRIC_VARIANCE_FEATURES[] = {model_t::E_PopulationVarianceByPersonAndAttribute}; -const model_t::EFeature POPULATION_METRIC_LOW_VARIANCE_FEATURES[] = {model_t::E_PopulationLowVarianceByPersonAndAttribute}; -const model_t::EFeature POPULATION_METRIC_HIGH_VARIANCE_FEATURES[] = {model_t::E_PopulationHighVarianceByPersonAndAttribute}; -const model_t::EFeature POPULATION_METRIC_SUM_FEATURES[] = {model_t::E_PopulationSumByBucketPersonAndAttribute}; -const model_t::EFeature POPULATION_METRIC_LOW_SUM_FEATURES[] = {model_t::E_PopulationLowSumByBucketPersonAndAttribute}; -const model_t::EFeature POPULATION_METRIC_HIGH_SUM_FEATURES[] = {model_t::E_PopulationHighSumByBucketPersonAndAttribute}; -const model_t::EFeature POPULATION_LAT_LONG_FEATURES[] = {model_t::E_PopulationMeanLatLongByPersonAndAttribute}; -const model_t::EFeature POPULATION_MAX_VELOCITY_FEATURES[] = {model_t::E_PopulationMaxVelocityByPersonAndAttribute}; -const model_t::EFeature POPULATION_MIN_VELOCITY_FEATURES[] = {model_t::E_PopulationMinVelocityByPersonAndAttribute}; -const model_t::EFeature POPULATION_MEAN_VELOCITY_FEATURES[] = {model_t::E_PopulationMeanVelocityByPersonAndAttribute}; -const model_t::EFeature POPULATION_SUM_VELOCITY_FEATURES[] = {model_t::E_PopulationSumVelocityByPersonAndAttribute}; +const model_t::EFeature POPULATION_COUNT_FEATURES[] = { + model_t::E_PopulationCountByBucketPersonAndAttribute}; +const model_t::EFeature POPULATION_DISTINCT_COUNT_FEATURES[] = { + model_t::E_PopulationUniqueCountByBucketPersonAndAttribute}; +const model_t::EFeature POPULATION_LOW_DISTINCT_COUNT_FEATURES[] = { + model_t::E_PopulationLowUniqueCountByBucketPersonAndAttribute}; +const model_t::EFeature POPULATION_HIGH_DISTINCT_COUNT_FEATURES[] = { + model_t::E_PopulationHighUniqueCountByBucketPersonAndAttribute}; +const model_t::EFeature POPULATION_RARE_FEATURES[] = { + model_t::E_PopulationIndicatorOfBucketPersonAndAttribute, + model_t::E_PopulationUniquePersonCountByAttribute}; +const model_t::EFeature POPULATION_RARE_COUNT_FEATURES[] = { + model_t::E_PopulationCountByBucketPersonAndAttribute, + model_t::E_PopulationUniquePersonCountByAttribute}; +const model_t::EFeature POPULATION_FREQ_RARE_FEATURES[] = { + model_t::E_PopulationAttributeTotalCountByPerson, + model_t::E_PopulationIndicatorOfBucketPersonAndAttribute, + model_t::E_PopulationUniquePersonCountByAttribute}; +const model_t::EFeature POPULATION_FREQ_RARE_COUNT_FEATURES[] = { + model_t::E_PopulationAttributeTotalCountByPerson, + model_t::E_PopulationCountByBucketPersonAndAttribute, + model_t::E_PopulationUniquePersonCountByAttribute}; +const model_t::EFeature POPULATION_LOW_COUNTS_FEATURES[] = { + model_t::E_PopulationLowCountsByBucketPersonAndAttribute}; +const model_t::EFeature POPULATION_HIGH_COUNTS_FEATURES[] = { + model_t::E_PopulationHighCountsByBucketPersonAndAttribute}; +const model_t::EFeature POPULATION_INFO_CONTENT_FEATURES[] = { + model_t::E_PopulationInfoContentByBucketPersonAndAttribute}; +const model_t::EFeature POPULATION_LOW_INFO_CONTENT_FEATURES[] = { + model_t::E_PopulationLowInfoContentByBucketPersonAndAttribute}; +const model_t::EFeature POPULATION_HIGH_INFO_CONTENT_FEATURES[] = { + model_t::E_PopulationHighInfoContentByBucketPersonAndAttribute}; +const model_t::EFeature POPULATION_TIME_OF_DAY_FEATURES[] = { + model_t::E_PopulationTimeOfDayByBucketPersonAndAttribute}; +const model_t::EFeature POPULATION_TIME_OF_WEEK_FEATURES[] = { + model_t::E_PopulationTimeOfWeekByBucketPersonAndAttribute}; +const model_t::EFeature POPULATION_METRIC_FEATURES[] = { + model_t::E_PopulationMeanByPersonAndAttribute, model_t::E_PopulationMinByPersonAndAttribute, + model_t::E_PopulationMaxByPersonAndAttribute}; +const model_t::EFeature POPULATION_METRIC_MEAN_FEATURES[] = { + model_t::E_PopulationMeanByPersonAndAttribute}; +const model_t::EFeature POPULATION_METRIC_LOW_MEAN_FEATURES[] = { + model_t::E_PopulationLowMeanByPersonAndAttribute}; +const model_t::EFeature POPULATION_METRIC_HIGH_MEAN_FEATURES[] = { + model_t::E_PopulationHighMeanByPersonAndAttribute}; +const model_t::EFeature POPULATION_METRIC_MEDIAN_FEATURES[] = { + model_t::E_PopulationMedianByPersonAndAttribute}; +const model_t::EFeature POPULATION_METRIC_LOW_MEDIAN_FEATURES[] = { + model_t::E_PopulationLowMedianByPersonAndAttribute}; +const model_t::EFeature POPULATION_METRIC_HIGH_MEDIAN_FEATURES[] = { + model_t::E_PopulationHighMedianByPersonAndAttribute}; +const model_t::EFeature POPULATION_METRIC_MIN_FEATURES[] = { + model_t::E_PopulationMinByPersonAndAttribute}; +const model_t::EFeature POPULATION_METRIC_MAX_FEATURES[] = { + model_t::E_PopulationMaxByPersonAndAttribute}; +const model_t::EFeature POPULATION_METRIC_VARIANCE_FEATURES[] = { + model_t::E_PopulationVarianceByPersonAndAttribute}; +const model_t::EFeature POPULATION_METRIC_LOW_VARIANCE_FEATURES[] = { + model_t::E_PopulationLowVarianceByPersonAndAttribute}; +const model_t::EFeature POPULATION_METRIC_HIGH_VARIANCE_FEATURES[] = { + model_t::E_PopulationHighVarianceByPersonAndAttribute}; +const model_t::EFeature POPULATION_METRIC_SUM_FEATURES[] = { + model_t::E_PopulationSumByBucketPersonAndAttribute}; +const model_t::EFeature POPULATION_METRIC_LOW_SUM_FEATURES[] = { + model_t::E_PopulationLowSumByBucketPersonAndAttribute}; +const model_t::EFeature POPULATION_METRIC_HIGH_SUM_FEATURES[] = { + model_t::E_PopulationHighSumByBucketPersonAndAttribute}; +const model_t::EFeature POPULATION_LAT_LONG_FEATURES[] = { + model_t::E_PopulationMeanLatLongByPersonAndAttribute}; +const model_t::EFeature POPULATION_MAX_VELOCITY_FEATURES[] = { + model_t::E_PopulationMaxVelocityByPersonAndAttribute}; +const model_t::EFeature POPULATION_MIN_VELOCITY_FEATURES[] = { + model_t::E_PopulationMinVelocityByPersonAndAttribute}; +const model_t::EFeature POPULATION_MEAN_VELOCITY_FEATURES[] = { + model_t::E_PopulationMeanVelocityByPersonAndAttribute}; +const model_t::EFeature POPULATION_SUM_VELOCITY_FEATURES[] = { + model_t::E_PopulationSumVelocityByPersonAndAttribute}; const model_t::EFeature PEERS_COUNT_FEATURES[] = {model_t::E_PeersCountByBucketPersonAndAttribute}; -const model_t::EFeature PEERS_DISTINCT_COUNT_FEATURES[] = {model_t::E_PeersUniqueCountByBucketPersonAndAttribute}; -const model_t::EFeature PEERS_LOW_DISTINCT_COUNT_FEATURES[] = {model_t::E_PeersLowUniqueCountByBucketPersonAndAttribute}; -const model_t::EFeature PEERS_HIGH_DISTINCT_COUNT_FEATURES[] = {model_t::E_PeersHighUniqueCountByBucketPersonAndAttribute}; -const model_t::EFeature PEERS_LOW_COUNTS_FEATURES[] = {model_t::E_PeersLowCountsByBucketPersonAndAttribute}; -const model_t::EFeature PEERS_HIGH_COUNTS_FEATURES[] = {model_t::E_PeersHighCountsByBucketPersonAndAttribute}; -const model_t::EFeature PEERS_INFO_CONTENT_FEATURES[] = {model_t::E_PeersInfoContentByBucketPersonAndAttribute}; -const model_t::EFeature PEERS_LOW_INFO_CONTENT_FEATURES[] = {model_t::E_PeersLowInfoContentByBucketPersonAndAttribute}; -const model_t::EFeature PEERS_HIGH_INFO_CONTENT_FEATURES[] = {model_t::E_PeersHighInfoContentByBucketPersonAndAttribute}; -const model_t::EFeature PEERS_TIME_OF_DAY_FEATURES[] = {model_t::E_PeersTimeOfDayByBucketPersonAndAttribute}; -const model_t::EFeature PEERS_TIME_OF_WEEK_FEATURES[] = {model_t::E_PeersTimeOfWeekByBucketPersonAndAttribute}; +const model_t::EFeature PEERS_DISTINCT_COUNT_FEATURES[] = { + model_t::E_PeersUniqueCountByBucketPersonAndAttribute}; +const model_t::EFeature PEERS_LOW_DISTINCT_COUNT_FEATURES[] = { + model_t::E_PeersLowUniqueCountByBucketPersonAndAttribute}; +const model_t::EFeature PEERS_HIGH_DISTINCT_COUNT_FEATURES[] = { + model_t::E_PeersHighUniqueCountByBucketPersonAndAttribute}; +const model_t::EFeature PEERS_LOW_COUNTS_FEATURES[] = { + model_t::E_PeersLowCountsByBucketPersonAndAttribute}; +const model_t::EFeature PEERS_HIGH_COUNTS_FEATURES[] = { + model_t::E_PeersHighCountsByBucketPersonAndAttribute}; +const model_t::EFeature PEERS_INFO_CONTENT_FEATURES[] = { + model_t::E_PeersInfoContentByBucketPersonAndAttribute}; +const model_t::EFeature PEERS_LOW_INFO_CONTENT_FEATURES[] = { + model_t::E_PeersLowInfoContentByBucketPersonAndAttribute}; +const model_t::EFeature PEERS_HIGH_INFO_CONTENT_FEATURES[] = { + model_t::E_PeersHighInfoContentByBucketPersonAndAttribute}; +const model_t::EFeature PEERS_TIME_OF_DAY_FEATURES[] = { + model_t::E_PeersTimeOfDayByBucketPersonAndAttribute}; +const model_t::EFeature PEERS_TIME_OF_WEEK_FEATURES[] = { + model_t::E_PeersTimeOfWeekByBucketPersonAndAttribute}; // Function names const std::string COUNT("count"); @@ -685,331 +753,419 @@ const std::string UNEXPECTED_FUNCTION("-"); #define END(x) x + sizeof(x) / sizeof(x[0]) //! The features for the count by function. -const TFeatureVec INDIVIDUAL_COUNT_FEATURES(BEGIN(detail::INDIVIDUAL_COUNT_FEATURES), END(detail::INDIVIDUAL_COUNT_FEATURES)); +const TFeatureVec INDIVIDUAL_COUNT_FEATURES(BEGIN(detail::INDIVIDUAL_COUNT_FEATURES), + END(detail::INDIVIDUAL_COUNT_FEATURES)); //! The features for the non-zero count by function. -const TFeatureVec INDIVIDUAL_NON_ZERO_COUNT_FEATURES(BEGIN(detail::INDIVIDUAL_NON_ZERO_COUNT_FEATURES), - END(detail::INDIVIDUAL_NON_ZERO_COUNT_FEATURES)); +const TFeatureVec + INDIVIDUAL_NON_ZERO_COUNT_FEATURES(BEGIN(detail::INDIVIDUAL_NON_ZERO_COUNT_FEATURES), + END(detail::INDIVIDUAL_NON_ZERO_COUNT_FEATURES)); //! The features for the rare count by function. -const TFeatureVec INDIVIDUAL_RARE_COUNT_FEATURES(BEGIN(detail::INDIVIDUAL_RARE_COUNT_FEATURES), - END(detail::INDIVIDUAL_RARE_COUNT_FEATURES)); +const TFeatureVec + INDIVIDUAL_RARE_COUNT_FEATURES(BEGIN(detail::INDIVIDUAL_RARE_COUNT_FEATURES), + END(detail::INDIVIDUAL_RARE_COUNT_FEATURES)); //! The features for the rare non-zero count by function. -const TFeatureVec INDIVIDUAL_RARE_NON_ZERO_COUNT_FEATURES(BEGIN(detail::INDIVIDUAL_RARE_NON_ZERO_COUNT_FEATURES), - END(detail::INDIVIDUAL_RARE_NON_ZERO_COUNT_FEATURES)); +const TFeatureVec INDIVIDUAL_RARE_NON_ZERO_COUNT_FEATURES( + BEGIN(detail::INDIVIDUAL_RARE_NON_ZERO_COUNT_FEATURES), + END(detail::INDIVIDUAL_RARE_NON_ZERO_COUNT_FEATURES)); //! The features for the rare in time by function. -const TFeatureVec INDIVIDUAL_RARE_FEATURES(BEGIN(detail::INDIVIDUAL_RARE_FEATURES), END(detail::INDIVIDUAL_RARE_FEATURES)); +const TFeatureVec INDIVIDUAL_RARE_FEATURES(BEGIN(detail::INDIVIDUAL_RARE_FEATURES), + END(detail::INDIVIDUAL_RARE_FEATURES)); //! The features for the low count by function. -const TFeatureVec INDIVIDUAL_LOW_COUNTS_FEATURES(BEGIN(detail::INDIVIDUAL_LOW_COUNTS_FEATURES), - END(detail::INDIVIDUAL_LOW_COUNTS_FEATURES)); +const TFeatureVec + INDIVIDUAL_LOW_COUNTS_FEATURES(BEGIN(detail::INDIVIDUAL_LOW_COUNTS_FEATURES), + END(detail::INDIVIDUAL_LOW_COUNTS_FEATURES)); //! The features for the high count by function. -const TFeatureVec INDIVIDUAL_HIGH_COUNTS_FEATURES(BEGIN(detail::INDIVIDUAL_HIGH_COUNTS_FEATURES), - END(detail::INDIVIDUAL_HIGH_COUNTS_FEATURES)); +const TFeatureVec + INDIVIDUAL_HIGH_COUNTS_FEATURES(BEGIN(detail::INDIVIDUAL_HIGH_COUNTS_FEATURES), + END(detail::INDIVIDUAL_HIGH_COUNTS_FEATURES)); //! The features for the low non zero count by function. -const TFeatureVec INDIVIDUAL_LOW_NON_ZERO_COUNT_FEATURES(BEGIN(detail::INDIVIDUAL_LOW_NON_ZERO_COUNT_FEATURES), - END(detail::INDIVIDUAL_LOW_NON_ZERO_COUNT_FEATURES)); +const TFeatureVec INDIVIDUAL_LOW_NON_ZERO_COUNT_FEATURES( + BEGIN(detail::INDIVIDUAL_LOW_NON_ZERO_COUNT_FEATURES), + END(detail::INDIVIDUAL_LOW_NON_ZERO_COUNT_FEATURES)); //! The features for the high non zero count by function. -const TFeatureVec INDIVIDUAL_HIGH_NON_ZERO_COUNT_FEATURES(BEGIN(detail::INDIVIDUAL_HIGH_NON_ZERO_COUNT_FEATURES), - END(detail::INDIVIDUAL_HIGH_NON_ZERO_COUNT_FEATURES)); +const TFeatureVec INDIVIDUAL_HIGH_NON_ZERO_COUNT_FEATURES( + BEGIN(detail::INDIVIDUAL_HIGH_NON_ZERO_COUNT_FEATURES), + END(detail::INDIVIDUAL_HIGH_NON_ZERO_COUNT_FEATURES)); //! The features for the distinct count function. -const TFeatureVec INDIVIDUAL_DISTINCT_COUNT_FEATURES(BEGIN(detail::INDIVIDUAL_DISTINCT_COUNT_FEATURES), - END(detail::INDIVIDUAL_DISTINCT_COUNT_FEATURES)); +const TFeatureVec + INDIVIDUAL_DISTINCT_COUNT_FEATURES(BEGIN(detail::INDIVIDUAL_DISTINCT_COUNT_FEATURES), + END(detail::INDIVIDUAL_DISTINCT_COUNT_FEATURES)); //! The features for the distinct count function. -const TFeatureVec INDIVIDUAL_LOW_DISTINCT_COUNT_FEATURES(BEGIN(detail::INDIVIDUAL_LOW_DISTINCT_COUNT_FEATURES), - END(detail::INDIVIDUAL_LOW_DISTINCT_COUNT_FEATURES)); +const TFeatureVec INDIVIDUAL_LOW_DISTINCT_COUNT_FEATURES( + BEGIN(detail::INDIVIDUAL_LOW_DISTINCT_COUNT_FEATURES), + END(detail::INDIVIDUAL_LOW_DISTINCT_COUNT_FEATURES)); //! The features for the distinct count function. -const TFeatureVec INDIVIDUAL_HIGH_DISTINCT_COUNT_FEATURES(BEGIN(detail::INDIVIDUAL_HIGH_DISTINCT_COUNT_FEATURES), - END(detail::INDIVIDUAL_HIGH_DISTINCT_COUNT_FEATURES)); +const TFeatureVec INDIVIDUAL_HIGH_DISTINCT_COUNT_FEATURES( + BEGIN(detail::INDIVIDUAL_HIGH_DISTINCT_COUNT_FEATURES), + END(detail::INDIVIDUAL_HIGH_DISTINCT_COUNT_FEATURES)); //! The features for the individual info_content function -const TFeatureVec INDIVIDUAL_INFO_CONTENT_FEATURES(BEGIN(detail::INDIVIDUAL_INFO_CONTENT_FEATURES), - END(detail::INDIVIDUAL_INFO_CONTENT_FEATURES)); +const TFeatureVec + INDIVIDUAL_INFO_CONTENT_FEATURES(BEGIN(detail::INDIVIDUAL_INFO_CONTENT_FEATURES), + END(detail::INDIVIDUAL_INFO_CONTENT_FEATURES)); //! The features for the individual high_info_content function -const TFeatureVec INDIVIDUAL_HIGH_INFO_CONTENT_FEATURES(BEGIN(detail::INDIVIDUAL_HIGH_INFO_CONTENT_FEATURES), - END(detail::INDIVIDUAL_HIGH_INFO_CONTENT_FEATURES)); +const TFeatureVec INDIVIDUAL_HIGH_INFO_CONTENT_FEATURES( + BEGIN(detail::INDIVIDUAL_HIGH_INFO_CONTENT_FEATURES), + END(detail::INDIVIDUAL_HIGH_INFO_CONTENT_FEATURES)); //! The features for the individual low_info_content function -const TFeatureVec INDIVIDUAL_LOW_INFO_CONTENT_FEATURES(BEGIN(detail::INDIVIDUAL_LOW_INFO_CONTENT_FEATURES), - END(detail::INDIVIDUAL_LOW_INFO_CONTENT_FEATURES)); +const TFeatureVec + INDIVIDUAL_LOW_INFO_CONTENT_FEATURES(BEGIN(detail::INDIVIDUAL_LOW_INFO_CONTENT_FEATURES), + END(detail::INDIVIDUAL_LOW_INFO_CONTENT_FEATURES)); //! The features for the time-of-day function. -const TFeatureVec INDIVIDUAL_TIME_OF_DAY_FEATURES(BEGIN(detail::INDIVIDUAL_TIME_OF_DAY_FEATURES), - END(detail::INDIVIDUAL_TIME_OF_DAY_FEATURES)); +const TFeatureVec + INDIVIDUAL_TIME_OF_DAY_FEATURES(BEGIN(detail::INDIVIDUAL_TIME_OF_DAY_FEATURES), + END(detail::INDIVIDUAL_TIME_OF_DAY_FEATURES)); //! The features for the time-of-week function. -const TFeatureVec INDIVIDUAL_TIME_OF_WEEK_FEATURES(BEGIN(detail::INDIVIDUAL_TIME_OF_WEEK_FEATURES), - END(detail::INDIVIDUAL_TIME_OF_WEEK_FEATURES)); +const TFeatureVec + INDIVIDUAL_TIME_OF_WEEK_FEATURES(BEGIN(detail::INDIVIDUAL_TIME_OF_WEEK_FEATURES), + END(detail::INDIVIDUAL_TIME_OF_WEEK_FEATURES)); //! The features for the metric by function. -const TFeatureVec INDIVIDUAL_METRIC_FEATURES(BEGIN(detail::INDIVIDUAL_METRIC_FEATURES), END(detail::INDIVIDUAL_METRIC_FEATURES)); +const TFeatureVec INDIVIDUAL_METRIC_FEATURES(BEGIN(detail::INDIVIDUAL_METRIC_FEATURES), + END(detail::INDIVIDUAL_METRIC_FEATURES)); //! The features for the metric mean by function. -const TFeatureVec INDIVIDUAL_METRIC_MEAN_FEATURES(BEGIN(detail::INDIVIDUAL_METRIC_MEAN_FEATURES), - END(detail::INDIVIDUAL_METRIC_MEAN_FEATURES)); +const TFeatureVec + INDIVIDUAL_METRIC_MEAN_FEATURES(BEGIN(detail::INDIVIDUAL_METRIC_MEAN_FEATURES), + END(detail::INDIVIDUAL_METRIC_MEAN_FEATURES)); //! The features for the metric low mean by function. -const TFeatureVec INDIVIDUAL_METRIC_LOW_MEAN_FEATURES(BEGIN(detail::INDIVIDUAL_METRIC_LOW_MEAN_FEATURES), - END(detail::INDIVIDUAL_METRIC_LOW_MEAN_FEATURES)); +const TFeatureVec + INDIVIDUAL_METRIC_LOW_MEAN_FEATURES(BEGIN(detail::INDIVIDUAL_METRIC_LOW_MEAN_FEATURES), + END(detail::INDIVIDUAL_METRIC_LOW_MEAN_FEATURES)); //! The features for the metric high mean by function. -const TFeatureVec INDIVIDUAL_METRIC_HIGH_MEAN_FEATURES(BEGIN(detail::INDIVIDUAL_METRIC_HIGH_MEAN_FEATURES), - END(detail::INDIVIDUAL_METRIC_HIGH_MEAN_FEATURES)); +const TFeatureVec + INDIVIDUAL_METRIC_HIGH_MEAN_FEATURES(BEGIN(detail::INDIVIDUAL_METRIC_HIGH_MEAN_FEATURES), + END(detail::INDIVIDUAL_METRIC_HIGH_MEAN_FEATURES)); //! The features for the metric median by function. -const TFeatureVec INDIVIDUAL_METRIC_MEDIAN_FEATURES(BEGIN(detail::INDIVIDUAL_METRIC_MEDIAN_FEATURES), - END(detail::INDIVIDUAL_METRIC_MEDIAN_FEATURES)); +const TFeatureVec + INDIVIDUAL_METRIC_MEDIAN_FEATURES(BEGIN(detail::INDIVIDUAL_METRIC_MEDIAN_FEATURES), + END(detail::INDIVIDUAL_METRIC_MEDIAN_FEATURES)); //! The features for the metric low median by function. -const TFeatureVec INDIVIDUAL_METRIC_LOW_MEDIAN_FEATURES(BEGIN(detail::INDIVIDUAL_METRIC_LOW_MEDIAN_FEATURES), - END(detail::INDIVIDUAL_METRIC_LOW_MEDIAN_FEATURES)); +const TFeatureVec INDIVIDUAL_METRIC_LOW_MEDIAN_FEATURES( + BEGIN(detail::INDIVIDUAL_METRIC_LOW_MEDIAN_FEATURES), + END(detail::INDIVIDUAL_METRIC_LOW_MEDIAN_FEATURES)); //! The features for the metric high median by function. -const TFeatureVec INDIVIDUAL_METRIC_HIGH_MEDIAN_FEATURES(BEGIN(detail::INDIVIDUAL_METRIC_HIGH_MEDIAN_FEATURES), - END(detail::INDIVIDUAL_METRIC_HIGH_MEDIAN_FEATURES)); +const TFeatureVec INDIVIDUAL_METRIC_HIGH_MEDIAN_FEATURES( + BEGIN(detail::INDIVIDUAL_METRIC_HIGH_MEDIAN_FEATURES), + END(detail::INDIVIDUAL_METRIC_HIGH_MEDIAN_FEATURES)); //! The features for the metric min by function. -const TFeatureVec INDIVIDUAL_METRIC_MIN_FEATURES(BEGIN(detail::INDIVIDUAL_METRIC_MIN_FEATURES), - END(detail::INDIVIDUAL_METRIC_MIN_FEATURES)); +const TFeatureVec + INDIVIDUAL_METRIC_MIN_FEATURES(BEGIN(detail::INDIVIDUAL_METRIC_MIN_FEATURES), + END(detail::INDIVIDUAL_METRIC_MIN_FEATURES)); //! The features for the metric max by function. -const TFeatureVec INDIVIDUAL_METRIC_MAX_FEATURES(BEGIN(detail::INDIVIDUAL_METRIC_MAX_FEATURES), - END(detail::INDIVIDUAL_METRIC_MAX_FEATURES)); +const TFeatureVec + INDIVIDUAL_METRIC_MAX_FEATURES(BEGIN(detail::INDIVIDUAL_METRIC_MAX_FEATURES), + END(detail::INDIVIDUAL_METRIC_MAX_FEATURES)); //! The features for the metric variance by function. -const TFeatureVec INDIVIDUAL_METRIC_VARIANCE_FEATURES(BEGIN(detail::INDIVIDUAL_METRIC_VARIANCE_FEATURES), - END(detail::INDIVIDUAL_METRIC_VARIANCE_FEATURES)); +const TFeatureVec + INDIVIDUAL_METRIC_VARIANCE_FEATURES(BEGIN(detail::INDIVIDUAL_METRIC_VARIANCE_FEATURES), + END(detail::INDIVIDUAL_METRIC_VARIANCE_FEATURES)); //! The features for the metric low variance by function. -const TFeatureVec INDIVIDUAL_METRIC_LOW_VARIANCE_FEATURES(BEGIN(detail::INDIVIDUAL_METRIC_LOW_VARIANCE_FEATURES), - END(detail::INDIVIDUAL_METRIC_LOW_VARIANCE_FEATURES)); +const TFeatureVec INDIVIDUAL_METRIC_LOW_VARIANCE_FEATURES( + BEGIN(detail::INDIVIDUAL_METRIC_LOW_VARIANCE_FEATURES), + END(detail::INDIVIDUAL_METRIC_LOW_VARIANCE_FEATURES)); //! The features for the metric high variance by function. -const TFeatureVec INDIVIDUAL_METRIC_HIGH_VARIANCE_FEATURES(BEGIN(detail::INDIVIDUAL_METRIC_HIGH_VARIANCE_FEATURES), - END(detail::INDIVIDUAL_METRIC_HIGH_VARIANCE_FEATURES)); +const TFeatureVec INDIVIDUAL_METRIC_HIGH_VARIANCE_FEATURES( + BEGIN(detail::INDIVIDUAL_METRIC_HIGH_VARIANCE_FEATURES), + END(detail::INDIVIDUAL_METRIC_HIGH_VARIANCE_FEATURES)); //! The features for the metric sum by function. -const TFeatureVec INDIVIDUAL_METRIC_SUM_FEATURES(BEGIN(detail::INDIVIDUAL_METRIC_SUM_FEATURES), - END(detail::INDIVIDUAL_METRIC_SUM_FEATURES)); +const TFeatureVec + INDIVIDUAL_METRIC_SUM_FEATURES(BEGIN(detail::INDIVIDUAL_METRIC_SUM_FEATURES), + END(detail::INDIVIDUAL_METRIC_SUM_FEATURES)); //! The features for the metric low sum by function. -const TFeatureVec INDIVIDUAL_METRIC_LOW_SUM_FEATURES(BEGIN(detail::INDIVIDUAL_METRIC_LOW_SUM_FEATURES), - END(detail::INDIVIDUAL_METRIC_LOW_SUM_FEATURES)); +const TFeatureVec + INDIVIDUAL_METRIC_LOW_SUM_FEATURES(BEGIN(detail::INDIVIDUAL_METRIC_LOW_SUM_FEATURES), + END(detail::INDIVIDUAL_METRIC_LOW_SUM_FEATURES)); //! The features for the metric high sum by function. -const TFeatureVec INDIVIDUAL_METRIC_HIGH_SUM_FEATURES(BEGIN(detail::INDIVIDUAL_METRIC_HIGH_SUM_FEATURES), - END(detail::INDIVIDUAL_METRIC_HIGH_SUM_FEATURES)); +const TFeatureVec + INDIVIDUAL_METRIC_HIGH_SUM_FEATURES(BEGIN(detail::INDIVIDUAL_METRIC_HIGH_SUM_FEATURES), + END(detail::INDIVIDUAL_METRIC_HIGH_SUM_FEATURES)); //! The features for the metric non-null sum by function. -const TFeatureVec INDIVIDUAL_METRIC_NON_NULL_SUM_FEATURES(BEGIN(detail::INDIVIDUAL_METRIC_NON_NULL_SUM_FEATURES), - END(detail::INDIVIDUAL_METRIC_NON_NULL_SUM_FEATURES)); +const TFeatureVec INDIVIDUAL_METRIC_NON_NULL_SUM_FEATURES( + BEGIN(detail::INDIVIDUAL_METRIC_NON_NULL_SUM_FEATURES), + END(detail::INDIVIDUAL_METRIC_NON_NULL_SUM_FEATURES)); //! The features for the metric low non-null sum by function. -const TFeatureVec INDIVIDUAL_METRIC_LOW_NON_NULL_SUM_FEATURES(BEGIN(detail::INDIVIDUAL_METRIC_LOW_NON_NULL_SUM_FEATURES), - END(detail::INDIVIDUAL_METRIC_LOW_NON_NULL_SUM_FEATURES)); +const TFeatureVec INDIVIDUAL_METRIC_LOW_NON_NULL_SUM_FEATURES( + BEGIN(detail::INDIVIDUAL_METRIC_LOW_NON_NULL_SUM_FEATURES), + END(detail::INDIVIDUAL_METRIC_LOW_NON_NULL_SUM_FEATURES)); //! The features for the metric high non-null sum by function. -const TFeatureVec INDIVIDUAL_METRIC_HIGH_NON_NULL_SUM_FEATURES(BEGIN(detail::INDIVIDUAL_METRIC_HIGH_NON_NULL_SUM_FEATURES), - END(detail::INDIVIDUAL_METRIC_HIGH_NON_NULL_SUM_FEATURES)); +const TFeatureVec INDIVIDUAL_METRIC_HIGH_NON_NULL_SUM_FEATURES( + BEGIN(detail::INDIVIDUAL_METRIC_HIGH_NON_NULL_SUM_FEATURES), + END(detail::INDIVIDUAL_METRIC_HIGH_NON_NULL_SUM_FEATURES)); //! The features for the metric latitude and longitude by function. -const TFeatureVec INDIVIDUAL_LAT_LONG_FEATURES(BEGIN(detail::INDIVIDUAL_LAT_LONG_FEATURES), END(detail::INDIVIDUAL_LAT_LONG_FEATURES)); +const TFeatureVec INDIVIDUAL_LAT_LONG_FEATURES(BEGIN(detail::INDIVIDUAL_LAT_LONG_FEATURES), + END(detail::INDIVIDUAL_LAT_LONG_FEATURES)); //! The features for the metric max velocity by function. -const TFeatureVec INDIVIDUAL_MAX_VELOCITY_FEATURES(BEGIN(detail::INDIVIDUAL_MAX_VELOCITY_FEATURES), - END(detail::INDIVIDUAL_MAX_VELOCITY_FEATURES)); +const TFeatureVec + INDIVIDUAL_MAX_VELOCITY_FEATURES(BEGIN(detail::INDIVIDUAL_MAX_VELOCITY_FEATURES), + END(detail::INDIVIDUAL_MAX_VELOCITY_FEATURES)); //! The features for the metric min velocity by function. -const TFeatureVec INDIVIDUAL_MIN_VELOCITY_FEATURES(BEGIN(detail::INDIVIDUAL_MIN_VELOCITY_FEATURES), - END(detail::INDIVIDUAL_MIN_VELOCITY_FEATURES)); +const TFeatureVec + INDIVIDUAL_MIN_VELOCITY_FEATURES(BEGIN(detail::INDIVIDUAL_MIN_VELOCITY_FEATURES), + END(detail::INDIVIDUAL_MIN_VELOCITY_FEATURES)); //! The features for the metric mean velocity by function. -const TFeatureVec INDIVIDUAL_MEAN_VELOCITY_FEATURES(BEGIN(detail::INDIVIDUAL_MEAN_VELOCITY_FEATURES), - END(detail::INDIVIDUAL_MEAN_VELOCITY_FEATURES)); +const TFeatureVec + INDIVIDUAL_MEAN_VELOCITY_FEATURES(BEGIN(detail::INDIVIDUAL_MEAN_VELOCITY_FEATURES), + END(detail::INDIVIDUAL_MEAN_VELOCITY_FEATURES)); //! The features for the metric sum velocity by function. -const TFeatureVec INDIVIDUAL_SUM_VELOCITY_FEATURES(BEGIN(detail::INDIVIDUAL_SUM_VELOCITY_FEATURES), - END(detail::INDIVIDUAL_SUM_VELOCITY_FEATURES)); +const TFeatureVec + INDIVIDUAL_SUM_VELOCITY_FEATURES(BEGIN(detail::INDIVIDUAL_SUM_VELOCITY_FEATURES), + END(detail::INDIVIDUAL_SUM_VELOCITY_FEATURES)); //! The features for the count over function. -const TFeatureVec POPULATION_COUNT_FEATURES(BEGIN(detail::POPULATION_COUNT_FEATURES), END(detail::POPULATION_COUNT_FEATURES)); +const TFeatureVec POPULATION_COUNT_FEATURES(BEGIN(detail::POPULATION_COUNT_FEATURES), + END(detail::POPULATION_COUNT_FEATURES)); //! The features for the distinct count over function. -const TFeatureVec POPULATION_DISTINCT_COUNT_FEATURES(BEGIN(detail::POPULATION_DISTINCT_COUNT_FEATURES), - END(detail::POPULATION_DISTINCT_COUNT_FEATURES)); +const TFeatureVec + POPULATION_DISTINCT_COUNT_FEATURES(BEGIN(detail::POPULATION_DISTINCT_COUNT_FEATURES), + END(detail::POPULATION_DISTINCT_COUNT_FEATURES)); //! The features for the low distinct count over function. -const TFeatureVec POPULATION_LOW_DISTINCT_COUNT_FEATURES(BEGIN(detail::POPULATION_LOW_DISTINCT_COUNT_FEATURES), - END(detail::POPULATION_LOW_DISTINCT_COUNT_FEATURES)); +const TFeatureVec POPULATION_LOW_DISTINCT_COUNT_FEATURES( + BEGIN(detail::POPULATION_LOW_DISTINCT_COUNT_FEATURES), + END(detail::POPULATION_LOW_DISTINCT_COUNT_FEATURES)); //! The features for the high distinct count over function. -const TFeatureVec POPULATION_HIGH_DISTINCT_COUNT_FEATURES(BEGIN(detail::POPULATION_HIGH_DISTINCT_COUNT_FEATURES), - END(detail::POPULATION_HIGH_DISTINCT_COUNT_FEATURES)); +const TFeatureVec POPULATION_HIGH_DISTINCT_COUNT_FEATURES( + BEGIN(detail::POPULATION_HIGH_DISTINCT_COUNT_FEATURES), + END(detail::POPULATION_HIGH_DISTINCT_COUNT_FEATURES)); //! The features for the rare over function. -const TFeatureVec POPULATION_RARE_FEATURES(BEGIN(detail::POPULATION_RARE_FEATURES), END(detail::POPULATION_RARE_FEATURES)); +const TFeatureVec POPULATION_RARE_FEATURES(BEGIN(detail::POPULATION_RARE_FEATURES), + END(detail::POPULATION_RARE_FEATURES)); //! The features for the rare count over function. -const TFeatureVec POPULATION_RARE_COUNT_FEATURES(BEGIN(detail::POPULATION_RARE_COUNT_FEATURES), - END(detail::POPULATION_RARE_COUNT_FEATURES)); +const TFeatureVec + POPULATION_RARE_COUNT_FEATURES(BEGIN(detail::POPULATION_RARE_COUNT_FEATURES), + END(detail::POPULATION_RARE_COUNT_FEATURES)); //! The features for the rare in population over function. -const TFeatureVec POPULATION_FREQ_RARE_FEATURES(BEGIN(detail::POPULATION_FREQ_RARE_FEATURES), END(detail::POPULATION_FREQ_RARE_FEATURES)); +const TFeatureVec + POPULATION_FREQ_RARE_FEATURES(BEGIN(detail::POPULATION_FREQ_RARE_FEATURES), + END(detail::POPULATION_FREQ_RARE_FEATURES)); //! The features for the frequent rare count over function. -const TFeatureVec POPULATION_FREQ_RARE_COUNT_FEATURES(BEGIN(detail::POPULATION_FREQ_RARE_COUNT_FEATURES), - END(detail::POPULATION_FREQ_RARE_COUNT_FEATURES)); +const TFeatureVec + POPULATION_FREQ_RARE_COUNT_FEATURES(BEGIN(detail::POPULATION_FREQ_RARE_COUNT_FEATURES), + END(detail::POPULATION_FREQ_RARE_COUNT_FEATURES)); //! The features for the low count over function. -const TFeatureVec POPULATION_LOW_COUNTS_FEATURES(BEGIN(detail::POPULATION_LOW_COUNTS_FEATURES), - END(detail::POPULATION_LOW_COUNTS_FEATURES)); +const TFeatureVec + POPULATION_LOW_COUNTS_FEATURES(BEGIN(detail::POPULATION_LOW_COUNTS_FEATURES), + END(detail::POPULATION_LOW_COUNTS_FEATURES)); //! The features for the high count over function. -const TFeatureVec POPULATION_HIGH_COUNTS_FEATURES(BEGIN(detail::POPULATION_HIGH_COUNTS_FEATURES), - END(detail::POPULATION_HIGH_COUNTS_FEATURES)); +const TFeatureVec + POPULATION_HIGH_COUNTS_FEATURES(BEGIN(detail::POPULATION_HIGH_COUNTS_FEATURES), + END(detail::POPULATION_HIGH_COUNTS_FEATURES)); //! The features for the information content over function. -const TFeatureVec POPULATION_INFO_CONTENT_FEATURES(BEGIN(detail::POPULATION_INFO_CONTENT_FEATURES), - END(detail::POPULATION_INFO_CONTENT_FEATURES)); +const TFeatureVec + POPULATION_INFO_CONTENT_FEATURES(BEGIN(detail::POPULATION_INFO_CONTENT_FEATURES), + END(detail::POPULATION_INFO_CONTENT_FEATURES)); //! The features for the low information content over function. -const TFeatureVec POPULATION_LOW_INFO_CONTENT_FEATURES(BEGIN(detail::POPULATION_LOW_INFO_CONTENT_FEATURES), - END(detail::POPULATION_LOW_INFO_CONTENT_FEATURES)); +const TFeatureVec + POPULATION_LOW_INFO_CONTENT_FEATURES(BEGIN(detail::POPULATION_LOW_INFO_CONTENT_FEATURES), + END(detail::POPULATION_LOW_INFO_CONTENT_FEATURES)); //! The features for the high information content over function. -const TFeatureVec POPULATION_HIGH_INFO_CONTENT_FEATURES(BEGIN(detail::POPULATION_HIGH_INFO_CONTENT_FEATURES), - END(detail::POPULATION_HIGH_INFO_CONTENT_FEATURES)); +const TFeatureVec POPULATION_HIGH_INFO_CONTENT_FEATURES( + BEGIN(detail::POPULATION_HIGH_INFO_CONTENT_FEATURES), + END(detail::POPULATION_HIGH_INFO_CONTENT_FEATURES)); //! The features for the time_of_week over function. -const TFeatureVec POPULATION_TIME_OF_DAY_FEATURES(BEGIN(detail::POPULATION_TIME_OF_DAY_FEATURES), - END(detail::POPULATION_TIME_OF_DAY_FEATURES)); +const TFeatureVec + POPULATION_TIME_OF_DAY_FEATURES(BEGIN(detail::POPULATION_TIME_OF_DAY_FEATURES), + END(detail::POPULATION_TIME_OF_DAY_FEATURES)); //! The features for the time_of_week over function. -const TFeatureVec POPULATION_TIME_OF_WEEK_FEATURES(BEGIN(detail::POPULATION_TIME_OF_WEEK_FEATURES), - END(detail::POPULATION_TIME_OF_WEEK_FEATURES)); +const TFeatureVec + POPULATION_TIME_OF_WEEK_FEATURES(BEGIN(detail::POPULATION_TIME_OF_WEEK_FEATURES), + END(detail::POPULATION_TIME_OF_WEEK_FEATURES)); //! The features for the metric over function. -const TFeatureVec POPULATION_METRIC_FEATURES(BEGIN(detail::POPULATION_METRIC_FEATURES), END(detail::POPULATION_METRIC_FEATURES)); +const TFeatureVec POPULATION_METRIC_FEATURES(BEGIN(detail::POPULATION_METRIC_FEATURES), + END(detail::POPULATION_METRIC_FEATURES)); //! The features for the metric mean over function. -const TFeatureVec POPULATION_METRIC_MEAN_FEATURES(BEGIN(detail::POPULATION_METRIC_MEAN_FEATURES), - END(detail::POPULATION_METRIC_MEAN_FEATURES)); +const TFeatureVec + POPULATION_METRIC_MEAN_FEATURES(BEGIN(detail::POPULATION_METRIC_MEAN_FEATURES), + END(detail::POPULATION_METRIC_MEAN_FEATURES)); //! The features for the metric low mean over function. -const TFeatureVec POPULATION_METRIC_LOW_MEAN_FEATURES(BEGIN(detail::POPULATION_METRIC_LOW_MEAN_FEATURES), - END(detail::POPULATION_METRIC_LOW_MEAN_FEATURES)); +const TFeatureVec + POPULATION_METRIC_LOW_MEAN_FEATURES(BEGIN(detail::POPULATION_METRIC_LOW_MEAN_FEATURES), + END(detail::POPULATION_METRIC_LOW_MEAN_FEATURES)); //! The features for the metric high mean over function. -const TFeatureVec POPULATION_METRIC_HIGH_MEAN_FEATURES(BEGIN(detail::POPULATION_METRIC_HIGH_MEAN_FEATURES), - END(detail::POPULATION_METRIC_HIGH_MEAN_FEATURES)); +const TFeatureVec + POPULATION_METRIC_HIGH_MEAN_FEATURES(BEGIN(detail::POPULATION_METRIC_HIGH_MEAN_FEATURES), + END(detail::POPULATION_METRIC_HIGH_MEAN_FEATURES)); //! The features for the metric median over function. -const TFeatureVec POPULATION_METRIC_MEDIAN_FEATURES(BEGIN(detail::POPULATION_METRIC_MEDIAN_FEATURES), - END(detail::POPULATION_METRIC_MEDIAN_FEATURES)); +const TFeatureVec + POPULATION_METRIC_MEDIAN_FEATURES(BEGIN(detail::POPULATION_METRIC_MEDIAN_FEATURES), + END(detail::POPULATION_METRIC_MEDIAN_FEATURES)); //! The features for the metric low median over function. -const TFeatureVec POPULATION_METRIC_LOW_MEDIAN_FEATURES(BEGIN(detail::POPULATION_METRIC_LOW_MEDIAN_FEATURES), - END(detail::POPULATION_METRIC_LOW_MEDIAN_FEATURES)); +const TFeatureVec POPULATION_METRIC_LOW_MEDIAN_FEATURES( + BEGIN(detail::POPULATION_METRIC_LOW_MEDIAN_FEATURES), + END(detail::POPULATION_METRIC_LOW_MEDIAN_FEATURES)); //! The features for the metric high median over function. -const TFeatureVec POPULATION_METRIC_HIGH_MEDIAN_FEATURES(BEGIN(detail::POPULATION_METRIC_HIGH_MEDIAN_FEATURES), - END(detail::POPULATION_METRIC_HIGH_MEDIAN_FEATURES)); +const TFeatureVec POPULATION_METRIC_HIGH_MEDIAN_FEATURES( + BEGIN(detail::POPULATION_METRIC_HIGH_MEDIAN_FEATURES), + END(detail::POPULATION_METRIC_HIGH_MEDIAN_FEATURES)); //! The features for the metric min over function. -const TFeatureVec POPULATION_METRIC_MIN_FEATURES(BEGIN(detail::POPULATION_METRIC_MIN_FEATURES), - END(detail::POPULATION_METRIC_MIN_FEATURES)); +const TFeatureVec + POPULATION_METRIC_MIN_FEATURES(BEGIN(detail::POPULATION_METRIC_MIN_FEATURES), + END(detail::POPULATION_METRIC_MIN_FEATURES)); //! The features for the metric max over function. -const TFeatureVec POPULATION_METRIC_MAX_FEATURES(BEGIN(detail::POPULATION_METRIC_MAX_FEATURES), - END(detail::POPULATION_METRIC_MAX_FEATURES)); +const TFeatureVec + POPULATION_METRIC_MAX_FEATURES(BEGIN(detail::POPULATION_METRIC_MAX_FEATURES), + END(detail::POPULATION_METRIC_MAX_FEATURES)); //! The features for the metric variance over function. -const TFeatureVec POPULATION_METRIC_VARIANCE_FEATURES(BEGIN(detail::POPULATION_METRIC_VARIANCE_FEATURES), - END(detail::POPULATION_METRIC_VARIANCE_FEATURES)); +const TFeatureVec + POPULATION_METRIC_VARIANCE_FEATURES(BEGIN(detail::POPULATION_METRIC_VARIANCE_FEATURES), + END(detail::POPULATION_METRIC_VARIANCE_FEATURES)); //! The features for the metric low variance over function. -const TFeatureVec POPULATION_METRIC_LOW_VARIANCE_FEATURES(BEGIN(detail::POPULATION_METRIC_LOW_VARIANCE_FEATURES), - END(detail::POPULATION_METRIC_LOW_VARIANCE_FEATURES)); +const TFeatureVec POPULATION_METRIC_LOW_VARIANCE_FEATURES( + BEGIN(detail::POPULATION_METRIC_LOW_VARIANCE_FEATURES), + END(detail::POPULATION_METRIC_LOW_VARIANCE_FEATURES)); //! The features for the metric high variance over function. -const TFeatureVec POPULATION_METRIC_HIGH_VARIANCE_FEATURES(BEGIN(detail::POPULATION_METRIC_HIGH_VARIANCE_FEATURES), - END(detail::POPULATION_METRIC_HIGH_VARIANCE_FEATURES)); +const TFeatureVec POPULATION_METRIC_HIGH_VARIANCE_FEATURES( + BEGIN(detail::POPULATION_METRIC_HIGH_VARIANCE_FEATURES), + END(detail::POPULATION_METRIC_HIGH_VARIANCE_FEATURES)); //! The features for the metric sum over function. -const TFeatureVec POPULATION_METRIC_SUM_FEATURES(BEGIN(detail::POPULATION_METRIC_SUM_FEATURES), - END(detail::POPULATION_METRIC_SUM_FEATURES)); +const TFeatureVec + POPULATION_METRIC_SUM_FEATURES(BEGIN(detail::POPULATION_METRIC_SUM_FEATURES), + END(detail::POPULATION_METRIC_SUM_FEATURES)); //! The features for the metric low sum over function. -const TFeatureVec POPULATION_METRIC_LOW_SUM_FEATURES(BEGIN(detail::POPULATION_METRIC_LOW_SUM_FEATURES), - END(detail::POPULATION_METRIC_LOW_SUM_FEATURES)); +const TFeatureVec + POPULATION_METRIC_LOW_SUM_FEATURES(BEGIN(detail::POPULATION_METRIC_LOW_SUM_FEATURES), + END(detail::POPULATION_METRIC_LOW_SUM_FEATURES)); //! The features for the metric high sum over function. -const TFeatureVec POPULATION_METRIC_HIGH_SUM_FEATURES(BEGIN(detail::POPULATION_METRIC_HIGH_SUM_FEATURES), - END(detail::POPULATION_METRIC_HIGH_SUM_FEATURES)); +const TFeatureVec + POPULATION_METRIC_HIGH_SUM_FEATURES(BEGIN(detail::POPULATION_METRIC_HIGH_SUM_FEATURES), + END(detail::POPULATION_METRIC_HIGH_SUM_FEATURES)); //! The features for the metric lat/long over function. -const TFeatureVec POPULATION_LAT_LONG_FEATURES(BEGIN(detail::POPULATION_LAT_LONG_FEATURES), END(detail::POPULATION_LAT_LONG_FEATURES)); +const TFeatureVec POPULATION_LAT_LONG_FEATURES(BEGIN(detail::POPULATION_LAT_LONG_FEATURES), + END(detail::POPULATION_LAT_LONG_FEATURES)); //! The features for the metric max velocity over function. -const TFeatureVec POPULATION_MAX_VELOCITY_FEATURES(BEGIN(detail::POPULATION_MAX_VELOCITY_FEATURES), - END(detail::POPULATION_MAX_VELOCITY_FEATURES)); +const TFeatureVec + POPULATION_MAX_VELOCITY_FEATURES(BEGIN(detail::POPULATION_MAX_VELOCITY_FEATURES), + END(detail::POPULATION_MAX_VELOCITY_FEATURES)); //! The features for the metric min velocity over function. -const TFeatureVec POPULATION_MIN_VELOCITY_FEATURES(BEGIN(detail::POPULATION_MIN_VELOCITY_FEATURES), - END(detail::POPULATION_MIN_VELOCITY_FEATURES)); +const TFeatureVec + POPULATION_MIN_VELOCITY_FEATURES(BEGIN(detail::POPULATION_MIN_VELOCITY_FEATURES), + END(detail::POPULATION_MIN_VELOCITY_FEATURES)); //! The features for the metric mean velocity over function. -const TFeatureVec POPULATION_MEAN_VELOCITY_FEATURES(BEGIN(detail::POPULATION_MEAN_VELOCITY_FEATURES), - END(detail::POPULATION_MEAN_VELOCITY_FEATURES)); +const TFeatureVec + POPULATION_MEAN_VELOCITY_FEATURES(BEGIN(detail::POPULATION_MEAN_VELOCITY_FEATURES), + END(detail::POPULATION_MEAN_VELOCITY_FEATURES)); //! The features for the metric sum velocity over function. -const TFeatureVec POPULATION_SUM_VELOCITY_FEATURES(BEGIN(detail::POPULATION_SUM_VELOCITY_FEATURES), - END(detail::POPULATION_SUM_VELOCITY_FEATURES)); +const TFeatureVec + POPULATION_SUM_VELOCITY_FEATURES(BEGIN(detail::POPULATION_SUM_VELOCITY_FEATURES), + END(detail::POPULATION_SUM_VELOCITY_FEATURES)); //! The features for the count over function. -const TFeatureVec PEERS_COUNT_FEATURES(BEGIN(detail::PEERS_COUNT_FEATURES), END(detail::PEERS_COUNT_FEATURES)); +const TFeatureVec PEERS_COUNT_FEATURES(BEGIN(detail::PEERS_COUNT_FEATURES), + END(detail::PEERS_COUNT_FEATURES)); //! The features for the low count over function. -const TFeatureVec PEERS_LOW_COUNTS_FEATURES(BEGIN(detail::PEERS_LOW_COUNTS_FEATURES), END(detail::PEERS_LOW_COUNTS_FEATURES)); +const TFeatureVec PEERS_LOW_COUNTS_FEATURES(BEGIN(detail::PEERS_LOW_COUNTS_FEATURES), + END(detail::PEERS_LOW_COUNTS_FEATURES)); //! The features for the high count over function. -const TFeatureVec PEERS_HIGH_COUNTS_FEATURES(BEGIN(detail::PEERS_HIGH_COUNTS_FEATURES), END(detail::PEERS_HIGH_COUNTS_FEATURES)); +const TFeatureVec PEERS_HIGH_COUNTS_FEATURES(BEGIN(detail::PEERS_HIGH_COUNTS_FEATURES), + END(detail::PEERS_HIGH_COUNTS_FEATURES)); //! The features for the distinct count over function. -const TFeatureVec PEERS_DISTINCT_COUNT_FEATURES(BEGIN(detail::PEERS_DISTINCT_COUNT_FEATURES), END(detail::PEERS_DISTINCT_COUNT_FEATURES)); +const TFeatureVec + PEERS_DISTINCT_COUNT_FEATURES(BEGIN(detail::PEERS_DISTINCT_COUNT_FEATURES), + END(detail::PEERS_DISTINCT_COUNT_FEATURES)); //! The features for the low distinct count over function. -const TFeatureVec PEERS_LOW_DISTINCT_COUNT_FEATURES(BEGIN(detail::PEERS_LOW_DISTINCT_COUNT_FEATURES), - END(detail::PEERS_LOW_DISTINCT_COUNT_FEATURES)); +const TFeatureVec + PEERS_LOW_DISTINCT_COUNT_FEATURES(BEGIN(detail::PEERS_LOW_DISTINCT_COUNT_FEATURES), + END(detail::PEERS_LOW_DISTINCT_COUNT_FEATURES)); //! The features for the high distinct count over function. -const TFeatureVec PEERS_HIGH_DISTINCT_COUNT_FEATURES(BEGIN(detail::PEERS_HIGH_DISTINCT_COUNT_FEATURES), - END(detail::PEERS_HIGH_DISTINCT_COUNT_FEATURES)); +const TFeatureVec + PEERS_HIGH_DISTINCT_COUNT_FEATURES(BEGIN(detail::PEERS_HIGH_DISTINCT_COUNT_FEATURES), + END(detail::PEERS_HIGH_DISTINCT_COUNT_FEATURES)); //! The features for the information content over function. -const TFeatureVec PEERS_INFO_CONTENT_FEATURES(BEGIN(detail::PEERS_INFO_CONTENT_FEATURES), END(detail::PEERS_INFO_CONTENT_FEATURES)); +const TFeatureVec PEERS_INFO_CONTENT_FEATURES(BEGIN(detail::PEERS_INFO_CONTENT_FEATURES), + END(detail::PEERS_INFO_CONTENT_FEATURES)); //! The features for the low information content over function. -const TFeatureVec PEERS_LOW_INFO_CONTENT_FEATURES(BEGIN(detail::PEERS_LOW_INFO_CONTENT_FEATURES), - END(detail::PEERS_LOW_INFO_CONTENT_FEATURES)); +const TFeatureVec + PEERS_LOW_INFO_CONTENT_FEATURES(BEGIN(detail::PEERS_LOW_INFO_CONTENT_FEATURES), + END(detail::PEERS_LOW_INFO_CONTENT_FEATURES)); //! The features for the high information content over function. -const TFeatureVec PEERS_HIGH_INFO_CONTENT_FEATURES(BEGIN(detail::PEERS_HIGH_INFO_CONTENT_FEATURES), - END(detail::PEERS_HIGH_INFO_CONTENT_FEATURES)); +const TFeatureVec + PEERS_HIGH_INFO_CONTENT_FEATURES(BEGIN(detail::PEERS_HIGH_INFO_CONTENT_FEATURES), + END(detail::PEERS_HIGH_INFO_CONTENT_FEATURES)); //! The features for the time_of_week over function. -const TFeatureVec PEERS_TIME_OF_DAY_FEATURES(BEGIN(detail::PEERS_TIME_OF_DAY_FEATURES), END(detail::PEERS_TIME_OF_DAY_FEATURES)); +const TFeatureVec PEERS_TIME_OF_DAY_FEATURES(BEGIN(detail::PEERS_TIME_OF_DAY_FEATURES), + END(detail::PEERS_TIME_OF_DAY_FEATURES)); //! The features for the time_of_week over function. -const TFeatureVec PEERS_TIME_OF_WEEK_FEATURES(BEGIN(detail::PEERS_TIME_OF_WEEK_FEATURES), END(detail::PEERS_TIME_OF_WEEK_FEATURES)); +const TFeatureVec PEERS_TIME_OF_WEEK_FEATURES(BEGIN(detail::PEERS_TIME_OF_WEEK_FEATURES), + END(detail::PEERS_TIME_OF_WEEK_FEATURES)); const TFeatureVec EMPTY_FEATURES; const TFunctionVec EMPTY_FUNCTIONS; @@ -1534,16 +1690,19 @@ EFunction function(const TFeatureVec& features) { continue; } - LOG_TRACE(<< "candidate = " << core::CContainerPrinter::print(functionsItr->second)); - std::set_intersection( - candidates.begin(), candidates.end(), functionsItr->second.begin(), functionsItr->second.end(), std::back_inserter(tmp)); + LOG_TRACE(<< "candidate = " + << core::CContainerPrinter::print(functionsItr->second)); + std::set_intersection(candidates.begin(), candidates.end(), + functionsItr->second.begin(), + functionsItr->second.end(), std::back_inserter(tmp)); candidates.swap(tmp); tmp.clear(); } if (candidates.empty()) { EFunction result = mostSpecific(fallback); - LOG_ERROR(<< "Inconsistent features " << core::CContainerPrinter::print(features) << " defaulting to '" << print(result) << "'"); + LOG_ERROR(<< "Inconsistent features " << core::CContainerPrinter::print(features) + << " defaulting to '" << print(result) << "'"); return result; } diff --git a/lib/model/ModelTypes.cc b/lib/model/ModelTypes.cc index 8611b44b12..9ef30172b5 100644 --- a/lib/model/ModelTypes.cc +++ b/lib/model/ModelTypes.cc @@ -421,7 +421,8 @@ bool isConstant(EFeature feature) { bool isMeanFeature(EFeature feature) { EMetricCategory category; - return metricCategory(feature, category) && (category == E_Mean || category == E_MultivariateMean); + return metricCategory(feature, category) && + (category == E_Mean || category == E_MultivariateMean); } bool isMedianFeature(EFeature feature) { @@ -431,12 +432,14 @@ bool isMedianFeature(EFeature feature) { bool isMinFeature(EFeature feature) { EMetricCategory category; - return metricCategory(feature, category) && (category == E_Min || category == E_MultivariateMin); + return metricCategory(feature, category) && + (category == E_Min || category == E_MultivariateMin); } bool isMaxFeature(EFeature feature) { EMetricCategory category; - return metricCategory(feature, category) && (category == E_Max || category == E_MultivariateMax); + return metricCategory(feature, category) && + (category == E_Max || category == E_MultivariateMax); } bool isVarianceFeature(EFeature feature) { @@ -896,7 +899,10 @@ maths_t::EProbabilityCalculation probabilityCalculation(EFeature feature) { return maths_t::E_TwoSided; } -core_t::TTime sampleTime(EFeature feature, core_t::TTime bucketStartTime, core_t::TTime bucketLength, core_t::TTime time) { +core_t::TTime sampleTime(EFeature feature, + core_t::TTime bucketStartTime, + core_t::TTime bucketLength, + core_t::TTime time) { switch (feature) { CASE_INDIVIDUAL_COUNT: return bucketStartTime + bucketLength / 2; @@ -996,9 +1002,11 @@ TDouble1VecDouble1VecPr support(EFeature feature) { case E_IndividualLowInfoContentByBucketAndPerson: return {TDouble1Vec(d, 0.0), TDouble1Vec(d, MAX_DOUBLE)}; case E_IndividualTimeOfDayByBucketAndPerson: - return {TDouble1Vec(d, 0.0), TDouble1Vec(d, static_cast(core::constants::DAY))}; + return {TDouble1Vec(d, 0.0), + TDouble1Vec(d, static_cast(core::constants::DAY))}; case E_IndividualTimeOfWeekByBucketAndPerson: - return {TDouble1Vec(d, 0.0), TDouble1Vec(d, static_cast(core::constants::WEEK))}; + return {TDouble1Vec(d, 0.0), + TDouble1Vec(d, static_cast(core::constants::WEEK))}; case E_IndividualMeanByPerson: case E_IndividualLowMeanByPerson: @@ -1040,9 +1048,11 @@ TDouble1VecDouble1VecPr support(EFeature feature) { case E_PopulationHighInfoContentByBucketPersonAndAttribute: return {TDouble1Vec(d, 0.0), TDouble1Vec(d, MAX_DOUBLE)}; case E_PopulationTimeOfDayByBucketPersonAndAttribute: - return {TDouble1Vec(d, 0.0), TDouble1Vec(d, static_cast(core::constants::DAY))}; + return {TDouble1Vec(d, 0.0), + TDouble1Vec(d, static_cast(core::constants::DAY))}; case E_PopulationTimeOfWeekByBucketPersonAndAttribute: - return {TDouble1Vec(d, 0.0), TDouble1Vec(d, static_cast(core::constants::WEEK))}; + return {TDouble1Vec(d, 0.0), + TDouble1Vec(d, static_cast(core::constants::WEEK))}; case E_PopulationMeanByPersonAndAttribute: case E_PopulationMedianByPersonAndAttribute: @@ -1119,10 +1129,12 @@ double adjustProbability(EFeature feature, core_t::TTime elapsedTime, double pro case E_IndividualHighInfoContentByBucketAndPerson: break; case E_IndividualTimeOfDayByBucketAndPerson: - pNewCluster = std::exp(-pow4(static_cast(elapsedTime) / static_cast(core::constants::DAY))); + pNewCluster = std::exp(-pow4(static_cast(elapsedTime) / + static_cast(core::constants::DAY))); break; case E_IndividualTimeOfWeekByBucketAndPerson: - pNewCluster = std::exp(-pow4(static_cast(elapsedTime) / static_cast(core::constants::WEEK))); + pNewCluster = std::exp(-pow4(static_cast(elapsedTime) / + static_cast(core::constants::WEEK))); break; CASE_INDIVIDUAL_METRIC: @@ -1142,10 +1154,12 @@ double adjustProbability(EFeature feature, core_t::TTime elapsedTime, double pro case E_PopulationHighInfoContentByBucketPersonAndAttribute: break; case E_PopulationTimeOfDayByBucketPersonAndAttribute: - pNewCluster = std::exp(-pow4(static_cast(elapsedTime) / static_cast(core::constants::DAY))); + pNewCluster = std::exp(-pow4(static_cast(elapsedTime) / + static_cast(core::constants::DAY))); break; case E_PopulationTimeOfWeekByBucketPersonAndAttribute: - pNewCluster = std::exp(-pow4(static_cast(elapsedTime) / static_cast(core::constants::WEEK))); + pNewCluster = std::exp(-pow4(static_cast(elapsedTime) / + static_cast(core::constants::WEEK))); break; CASE_POPULATION_METRIC: @@ -1163,10 +1177,12 @@ double adjustProbability(EFeature feature, core_t::TTime elapsedTime, double pro case E_PeersHighInfoContentByBucketPersonAndAttribute: break; case E_PeersTimeOfDayByBucketPersonAndAttribute: - pNewCluster = std::exp(-pow4(static_cast(elapsedTime) / static_cast(core::constants::DAY))); + pNewCluster = std::exp(-pow4(static_cast(elapsedTime) / + static_cast(core::constants::DAY))); break; case E_PeersTimeOfWeekByBucketPersonAndAttribute: - pNewCluster = std::exp(-pow4(static_cast(elapsedTime) / static_cast(core::constants::WEEK))); + pNewCluster = std::exp(-pow4(static_cast(elapsedTime) / + static_cast(core::constants::WEEK))); break; CASE_PEERS_METRIC: diff --git a/lib/model/unittest/CAnnotatedProbabilityBuilderTest.cc b/lib/model/unittest/CAnnotatedProbabilityBuilderTest.cc index b9d5f098f3..2fda9a9ca0 100644 --- a/lib/model/unittest/CAnnotatedProbabilityBuilderTest.cc +++ b/lib/model/unittest/CAnnotatedProbabilityBuilderTest.cc @@ -41,13 +41,17 @@ const TSizeDoublePr1Vec NO_CORRELATES; class CAnnotatedProbabilityBuilderForTest : public CAnnotatedProbabilityBuilder { public: - CAnnotatedProbabilityBuilderForTest(SAnnotatedProbability& annotatedProbability) : CAnnotatedProbabilityBuilder(annotatedProbability) {} + CAnnotatedProbabilityBuilderForTest(SAnnotatedProbability& annotatedProbability) + : CAnnotatedProbabilityBuilder(annotatedProbability) {} CAnnotatedProbabilityBuilderForTest(SAnnotatedProbability& annotatedProbability, std::size_t numberAttributeProbabilities, function_t::EFunction function, std::size_t numberOfPeople) - : CAnnotatedProbabilityBuilder(annotatedProbability, numberAttributeProbabilities, function, numberOfPeople) {} + : CAnnotatedProbabilityBuilder(annotatedProbability, + numberAttributeProbabilities, + function, + numberOfPeople) {} }; } @@ -66,20 +70,17 @@ void CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenIndividua SAnnotatedProbability result; CAnnotatedProbabilityBuilderForTest builder(result, 1, function_t::E_IndividualCount, 42); - builder.addAttributeProbability(0, - EMPTY_STRING_PTR, - 1.0, - 0.68, + builder.addAttributeProbability(0, EMPTY_STRING_PTR, 1.0, 0.68, model_t::CResultType::E_Unconditional, model_t::E_IndividualCountByBucketAndPerson, - NO_CORRELATED_ATTRIBUTES, - NO_CORRELATES); + NO_CORRELATED_ATTRIBUTES, NO_CORRELATES); builder.build(); CPPUNIT_ASSERT_EQUAL(std::size_t(1), result.s_AttributeProbabilities.size()); CPPUNIT_ASSERT_EQUAL(EMPTY_STRING, *result.s_AttributeProbabilities[0].s_Attribute); CPPUNIT_ASSERT_EQUAL(0.68, result.s_AttributeProbabilities[0].s_Probability); - CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualCountByBucketAndPerson, result.s_AttributeProbabilities[0].s_Feature); + CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualCountByBucketAndPerson, + result.s_AttributeProbabilities[0].s_Feature); CPPUNIT_ASSERT(result.s_AttributeProbabilities[0].s_DescriptiveData.empty()); } @@ -87,50 +88,33 @@ void CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenPopulatio SAnnotatedProbability result; CAnnotatedProbabilityBuilderForTest builder(result, 3, function_t::E_PopulationCount, 42); - builder.addAttributeProbability(0, - EMPTY_STRING_PTR, - 1.0, - 0.09, + builder.addAttributeProbability(0, EMPTY_STRING_PTR, 1.0, 0.09, model_t::CResultType::E_Unconditional, model_t::E_PopulationCountByBucketPersonAndAttribute, - NO_CORRELATED_ATTRIBUTES, - NO_CORRELATES); - builder.addAttributeProbability(1, - C1_PTR, - 1.0, - 0.05, - model_t::CResultType::E_Unconditional, + NO_CORRELATED_ATTRIBUTES, NO_CORRELATES); + builder.addAttributeProbability(1, C1_PTR, 1.0, 0.05, model_t::CResultType::E_Unconditional, model_t::E_PopulationCountByBucketPersonAndAttribute, - NO_CORRELATED_ATTRIBUTES, - NO_CORRELATES); - builder.addAttributeProbability(2, - C2_PTR, - 1.0, - 0.04, - model_t::CResultType::E_Unconditional, + NO_CORRELATED_ATTRIBUTES, NO_CORRELATES); + builder.addAttributeProbability(2, C2_PTR, 1.0, 0.04, model_t::CResultType::E_Unconditional, model_t::E_PopulationCountByBucketPersonAndAttribute, - NO_CORRELATED_ATTRIBUTES, - NO_CORRELATES); - builder.addAttributeProbability(3, - C3_PTR, - 1.0, - 0.06, - model_t::CResultType::E_Unconditional, + NO_CORRELATED_ATTRIBUTES, NO_CORRELATES); + builder.addAttributeProbability(3, C3_PTR, 1.0, 0.06, model_t::CResultType::E_Unconditional, model_t::E_PopulationCountByBucketPersonAndAttribute, - NO_CORRELATED_ATTRIBUTES, - NO_CORRELATES); + NO_CORRELATED_ATTRIBUTES, NO_CORRELATES); builder.build(); CPPUNIT_ASSERT_EQUAL(std::size_t(2), result.s_AttributeProbabilities.size()); CPPUNIT_ASSERT_EQUAL(C2, *result.s_AttributeProbabilities[0].s_Attribute); CPPUNIT_ASSERT_EQUAL(0.04, result.s_AttributeProbabilities[0].s_Probability); - CPPUNIT_ASSERT_EQUAL(model_t::E_PopulationCountByBucketPersonAndAttribute, result.s_AttributeProbabilities[0].s_Feature); + CPPUNIT_ASSERT_EQUAL(model_t::E_PopulationCountByBucketPersonAndAttribute, + result.s_AttributeProbabilities[0].s_Feature); CPPUNIT_ASSERT(result.s_AttributeProbabilities[0].s_DescriptiveData.empty()); CPPUNIT_ASSERT_EQUAL(C1, *result.s_AttributeProbabilities[1].s_Attribute); CPPUNIT_ASSERT_EQUAL(0.05, result.s_AttributeProbabilities[1].s_Probability); - CPPUNIT_ASSERT_EQUAL(model_t::E_PopulationCountByBucketPersonAndAttribute, result.s_AttributeProbabilities[1].s_Feature); + CPPUNIT_ASSERT_EQUAL(model_t::E_PopulationCountByBucketPersonAndAttribute, + result.s_AttributeProbabilities[1].s_Feature); CPPUNIT_ASSERT(result.s_AttributeProbabilities[1].s_DescriptiveData.empty()); } @@ -138,14 +122,10 @@ void CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenIndividua SAnnotatedProbability result; CAnnotatedProbabilityBuilderForTest builder(result, 1, function_t::E_IndividualRare, 42); - builder.addAttributeProbability(0, - EMPTY_STRING_PTR, - 1.0, - 0.68, + builder.addAttributeProbability(0, EMPTY_STRING_PTR, 1.0, 0.68, model_t::CResultType::E_Unconditional, model_t::E_IndividualIndicatorOfBucketPerson, - NO_CORRELATED_ATTRIBUTES, - NO_CORRELATES); + NO_CORRELATED_ATTRIBUTES, NO_CORRELATES); builder.build(); CPPUNIT_ASSERT_EQUAL(std::size_t(1), result.s_AttributeProbabilities.size()); @@ -153,14 +133,16 @@ void CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenIndividua } void CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenPopulationRare() { - maths::CMultinomialConjugate attributePrior(maths::CMultinomialConjugate::nonInformativePrior(4u)); + maths::CMultinomialConjugate attributePrior( + maths::CMultinomialConjugate::nonInformativePrior(4u)); for (std::size_t i = 1u; i <= 4u; ++i) { TDouble1Vec samples(i, static_cast(i)); TDouble4Vec1Vec weights(i, maths::CConstantWeights::UNIT); attributePrior.addSamples(maths::CConstantWeights::COUNT, samples, weights); } - maths::CMultinomialConjugate personAttributePrior(maths::CMultinomialConjugate::nonInformativePrior(4u)); + maths::CMultinomialConjugate personAttributePrior( + maths::CMultinomialConjugate::nonInformativePrior(4u)); for (std::size_t i = 1u; i <= 4u; ++i) { TDouble1Vec samples(2 * i, static_cast(i)); TDouble4Vec1Vec weights(2 * i, maths::CConstantWeights::UNIT); @@ -172,78 +154,75 @@ void CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenPopulatio builder.attributeProbabilityPrior(&attributePrior); builder.personAttributeProbabilityPrior(&personAttributePrior); - builder.addAttributeProbability(1, - C1_PTR, - 0.051, - 0.02, - model_t::CResultType::E_Unconditional, + builder.addAttributeProbability(1, C1_PTR, 0.051, 0.02, model_t::CResultType::E_Unconditional, model_t::E_IndividualIndicatorOfBucketPerson, - NO_CORRELATED_ATTRIBUTES, - NO_CORRELATES); - builder.addAttributeProbability(2, - C2_PTR, - 0.06, - 0.06, - model_t::CResultType::E_Unconditional, + NO_CORRELATED_ATTRIBUTES, NO_CORRELATES); + builder.addAttributeProbability(2, C2_PTR, 0.06, 0.06, model_t::CResultType::E_Unconditional, model_t::E_IndividualIndicatorOfBucketPerson, - NO_CORRELATED_ATTRIBUTES, - NO_CORRELATES); - builder.addAttributeProbability(3, - C3_PTR, - 0.07, - 0.01, - model_t::CResultType::E_Unconditional, + NO_CORRELATED_ATTRIBUTES, NO_CORRELATES); + builder.addAttributeProbability(3, C3_PTR, 0.07, 0.01, model_t::CResultType::E_Unconditional, model_t::E_IndividualIndicatorOfBucketPerson, - NO_CORRELATED_ATTRIBUTES, - NO_CORRELATES); - builder.addAttributeProbability(4, - C4_PTR, - 0.03, - 0.03, - model_t::CResultType::E_Unconditional, + NO_CORRELATED_ATTRIBUTES, NO_CORRELATES); + builder.addAttributeProbability(4, C4_PTR, 0.03, 0.03, model_t::CResultType::E_Unconditional, model_t::E_IndividualIndicatorOfBucketPerson, - NO_CORRELATED_ATTRIBUTES, - NO_CORRELATES); + NO_CORRELATED_ATTRIBUTES, NO_CORRELATES); builder.build(); CPPUNIT_ASSERT_EQUAL(std::size_t(2), result.s_AttributeProbabilities.size()); CPPUNIT_ASSERT_EQUAL(std::size_t(3), result.s_DescriptiveData.size()); - CPPUNIT_ASSERT_EQUAL(annotated_probability::E_PERSON_COUNT, result.s_DescriptiveData[0].first); + CPPUNIT_ASSERT_EQUAL(annotated_probability::E_PERSON_COUNT, + result.s_DescriptiveData[0].first); CPPUNIT_ASSERT_EQUAL(42.0, result.s_DescriptiveData[0].second); - CPPUNIT_ASSERT_EQUAL(annotated_probability::E_DISTINCT_RARE_ATTRIBUTES_COUNT, result.s_DescriptiveData[1].first); + CPPUNIT_ASSERT_EQUAL(annotated_probability::E_DISTINCT_RARE_ATTRIBUTES_COUNT, + result.s_DescriptiveData[1].first); CPPUNIT_ASSERT_EQUAL(1.0, result.s_DescriptiveData[1].second); - CPPUNIT_ASSERT_EQUAL(annotated_probability::E_DISTINCT_TOTAL_ATTRIBUTES_COUNT, result.s_DescriptiveData[2].first); + CPPUNIT_ASSERT_EQUAL(annotated_probability::E_DISTINCT_TOTAL_ATTRIBUTES_COUNT, + result.s_DescriptiveData[2].first); CPPUNIT_ASSERT_EQUAL(4.0, result.s_DescriptiveData[2].second); CPPUNIT_ASSERT_EQUAL(C3, *result.s_AttributeProbabilities[0].s_Attribute); CPPUNIT_ASSERT_EQUAL(0.01, result.s_AttributeProbabilities[0].s_Probability); - CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualIndicatorOfBucketPerson, result.s_AttributeProbabilities[0].s_Feature); - CPPUNIT_ASSERT_EQUAL(std::size_t(2), result.s_AttributeProbabilities[0].s_DescriptiveData.size()); - CPPUNIT_ASSERT_EQUAL(annotated_probability::E_ATTRIBUTE_CONCENTRATION, result.s_AttributeProbabilities[0].s_DescriptiveData[0].first); - CPPUNIT_ASSERT_EQUAL(3.0, result.s_AttributeProbabilities[0].s_DescriptiveData[0].second); - CPPUNIT_ASSERT_EQUAL(annotated_probability::E_ACTIVITY_CONCENTRATION, result.s_AttributeProbabilities[0].s_DescriptiveData[1].first); - CPPUNIT_ASSERT_EQUAL(6.0, result.s_AttributeProbabilities[0].s_DescriptiveData[1].second); + CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualIndicatorOfBucketPerson, + result.s_AttributeProbabilities[0].s_Feature); + CPPUNIT_ASSERT_EQUAL(std::size_t(2), + result.s_AttributeProbabilities[0].s_DescriptiveData.size()); + CPPUNIT_ASSERT_EQUAL(annotated_probability::E_ATTRIBUTE_CONCENTRATION, + result.s_AttributeProbabilities[0].s_DescriptiveData[0].first); + CPPUNIT_ASSERT_EQUAL( + 3.0, result.s_AttributeProbabilities[0].s_DescriptiveData[0].second); + CPPUNIT_ASSERT_EQUAL(annotated_probability::E_ACTIVITY_CONCENTRATION, + result.s_AttributeProbabilities[0].s_DescriptiveData[1].first); + CPPUNIT_ASSERT_EQUAL( + 6.0, result.s_AttributeProbabilities[0].s_DescriptiveData[1].second); CPPUNIT_ASSERT_EQUAL(C1, *result.s_AttributeProbabilities[1].s_Attribute); CPPUNIT_ASSERT_EQUAL(0.02, result.s_AttributeProbabilities[1].s_Probability); - CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualIndicatorOfBucketPerson, result.s_AttributeProbabilities[1].s_Feature); - CPPUNIT_ASSERT_EQUAL(std::size_t(2), result.s_AttributeProbabilities[1].s_DescriptiveData.size()); - CPPUNIT_ASSERT_EQUAL(annotated_probability::E_ATTRIBUTE_CONCENTRATION, result.s_AttributeProbabilities[1].s_DescriptiveData[0].first); - CPPUNIT_ASSERT_EQUAL(1.0, result.s_AttributeProbabilities[1].s_DescriptiveData[0].second); - CPPUNIT_ASSERT_EQUAL(annotated_probability::E_ACTIVITY_CONCENTRATION, result.s_AttributeProbabilities[1].s_DescriptiveData[1].first); - CPPUNIT_ASSERT_EQUAL(2.0, result.s_AttributeProbabilities[1].s_DescriptiveData[1].second); + CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualIndicatorOfBucketPerson, + result.s_AttributeProbabilities[1].s_Feature); + CPPUNIT_ASSERT_EQUAL(std::size_t(2), + result.s_AttributeProbabilities[1].s_DescriptiveData.size()); + CPPUNIT_ASSERT_EQUAL(annotated_probability::E_ATTRIBUTE_CONCENTRATION, + result.s_AttributeProbabilities[1].s_DescriptiveData[0].first); + CPPUNIT_ASSERT_EQUAL( + 1.0, result.s_AttributeProbabilities[1].s_DescriptiveData[0].second); + CPPUNIT_ASSERT_EQUAL(annotated_probability::E_ACTIVITY_CONCENTRATION, + result.s_AttributeProbabilities[1].s_DescriptiveData[1].first); + CPPUNIT_ASSERT_EQUAL( + 2.0, result.s_AttributeProbabilities[1].s_DescriptiveData[1].second); } void CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenPopulationFreqRare() { - maths::CMultinomialConjugate attributePrior(maths::CMultinomialConjugate::nonInformativePrior(4u)); + maths::CMultinomialConjugate attributePrior( + maths::CMultinomialConjugate::nonInformativePrior(4u)); for (std::size_t i = 1u; i <= 4u; ++i) { TDouble1Vec samples(i, static_cast(i)); TDouble4Vec1Vec weights(i, maths::CConstantWeights::UNIT); attributePrior.addSamples(maths::CConstantWeights::COUNT, samples, weights); } - maths::CMultinomialConjugate personAttributePrior(maths::CMultinomialConjugate::nonInformativePrior(4u)); + maths::CMultinomialConjugate personAttributePrior( + maths::CMultinomialConjugate::nonInformativePrior(4u)); for (std::size_t i = 1u; i <= 4u; ++i) { TDouble1Vec samples(2 * i, static_cast(i)); TDouble4Vec1Vec weights(2 * i, maths::CConstantWeights::UNIT); @@ -251,71 +230,67 @@ void CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenPopulatio } SAnnotatedProbability result; - CAnnotatedProbabilityBuilderForTest builder(result, 2, function_t::E_PopulationFreqRare, 70); + CAnnotatedProbabilityBuilderForTest builder( + result, 2, function_t::E_PopulationFreqRare, 70); builder.attributeProbabilityPrior(&attributePrior); builder.personAttributeProbabilityPrior(&personAttributePrior); - builder.addAttributeProbability(1, - C1_PTR, - 0.051, - 0.02, - model_t::CResultType::E_Unconditional, + builder.addAttributeProbability(1, C1_PTR, 0.051, 0.02, model_t::CResultType::E_Unconditional, model_t::E_IndividualIndicatorOfBucketPerson, - NO_CORRELATED_ATTRIBUTES, - NO_CORRELATES); - builder.addAttributeProbability(2, - C2_PTR, - 0.06, - 0.06, - model_t::CResultType::E_Unconditional, + NO_CORRELATED_ATTRIBUTES, NO_CORRELATES); + builder.addAttributeProbability(2, C2_PTR, 0.06, 0.06, model_t::CResultType::E_Unconditional, model_t::E_IndividualIndicatorOfBucketPerson, - NO_CORRELATED_ATTRIBUTES, - NO_CORRELATES); - builder.addAttributeProbability(3, - C3_PTR, - 0.07, - 0.01, - model_t::CResultType::E_Unconditional, + NO_CORRELATED_ATTRIBUTES, NO_CORRELATES); + builder.addAttributeProbability(3, C3_PTR, 0.07, 0.01, model_t::CResultType::E_Unconditional, model_t::E_IndividualIndicatorOfBucketPerson, - NO_CORRELATED_ATTRIBUTES, - NO_CORRELATES); - builder.addAttributeProbability(4, - C4_PTR, - 0.03, - 0.03, - model_t::CResultType::E_Unconditional, + NO_CORRELATED_ATTRIBUTES, NO_CORRELATES); + builder.addAttributeProbability(4, C4_PTR, 0.03, 0.03, model_t::CResultType::E_Unconditional, model_t::E_IndividualIndicatorOfBucketPerson, - NO_CORRELATED_ATTRIBUTES, - NO_CORRELATES); + NO_CORRELATED_ATTRIBUTES, NO_CORRELATES); builder.build(); CPPUNIT_ASSERT_EQUAL(std::size_t(2), result.s_AttributeProbabilities.size()); CPPUNIT_ASSERT_EQUAL(std::size_t(3), result.s_DescriptiveData.size()); - CPPUNIT_ASSERT_EQUAL(annotated_probability::E_PERSON_COUNT, result.s_DescriptiveData[0].first); + CPPUNIT_ASSERT_EQUAL(annotated_probability::E_PERSON_COUNT, + result.s_DescriptiveData[0].first); CPPUNIT_ASSERT_EQUAL(70.0, result.s_DescriptiveData[0].second); - CPPUNIT_ASSERT_EQUAL(annotated_probability::E_RARE_ATTRIBUTES_COUNT, result.s_DescriptiveData[1].first); + CPPUNIT_ASSERT_EQUAL(annotated_probability::E_RARE_ATTRIBUTES_COUNT, + result.s_DescriptiveData[1].first); CPPUNIT_ASSERT_EQUAL(8.0, result.s_DescriptiveData[1].second); - CPPUNIT_ASSERT_EQUAL(annotated_probability::E_TOTAL_ATTRIBUTES_COUNT, result.s_DescriptiveData[2].first); + CPPUNIT_ASSERT_EQUAL(annotated_probability::E_TOTAL_ATTRIBUTES_COUNT, + result.s_DescriptiveData[2].first); CPPUNIT_ASSERT_EQUAL(20.0, result.s_DescriptiveData[2].second); CPPUNIT_ASSERT_EQUAL(C3, *result.s_AttributeProbabilities[0].s_Attribute); CPPUNIT_ASSERT_EQUAL(0.01, result.s_AttributeProbabilities[0].s_Probability); - CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualIndicatorOfBucketPerson, result.s_AttributeProbabilities[0].s_Feature); - CPPUNIT_ASSERT_EQUAL(std::size_t(2), result.s_AttributeProbabilities[0].s_DescriptiveData.size()); - CPPUNIT_ASSERT_EQUAL(annotated_probability::E_ATTRIBUTE_CONCENTRATION, result.s_AttributeProbabilities[0].s_DescriptiveData[0].first); - CPPUNIT_ASSERT_EQUAL(3.0, result.s_AttributeProbabilities[0].s_DescriptiveData[0].second); - CPPUNIT_ASSERT_EQUAL(annotated_probability::E_ACTIVITY_CONCENTRATION, result.s_AttributeProbabilities[0].s_DescriptiveData[1].first); - CPPUNIT_ASSERT_EQUAL(6.0, result.s_AttributeProbabilities[0].s_DescriptiveData[1].second); + CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualIndicatorOfBucketPerson, + result.s_AttributeProbabilities[0].s_Feature); + CPPUNIT_ASSERT_EQUAL(std::size_t(2), + result.s_AttributeProbabilities[0].s_DescriptiveData.size()); + CPPUNIT_ASSERT_EQUAL(annotated_probability::E_ATTRIBUTE_CONCENTRATION, + result.s_AttributeProbabilities[0].s_DescriptiveData[0].first); + CPPUNIT_ASSERT_EQUAL( + 3.0, result.s_AttributeProbabilities[0].s_DescriptiveData[0].second); + CPPUNIT_ASSERT_EQUAL(annotated_probability::E_ACTIVITY_CONCENTRATION, + result.s_AttributeProbabilities[0].s_DescriptiveData[1].first); + CPPUNIT_ASSERT_EQUAL( + 6.0, result.s_AttributeProbabilities[0].s_DescriptiveData[1].second); CPPUNIT_ASSERT_EQUAL(C1, *result.s_AttributeProbabilities[1].s_Attribute); CPPUNIT_ASSERT_EQUAL(0.02, result.s_AttributeProbabilities[1].s_Probability); - CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualIndicatorOfBucketPerson, result.s_AttributeProbabilities[1].s_Feature); - CPPUNIT_ASSERT_EQUAL(std::size_t(2), result.s_AttributeProbabilities[1].s_DescriptiveData.size()); - CPPUNIT_ASSERT_EQUAL(annotated_probability::E_ATTRIBUTE_CONCENTRATION, result.s_AttributeProbabilities[1].s_DescriptiveData[0].first); - CPPUNIT_ASSERT_EQUAL(1.0, result.s_AttributeProbabilities[1].s_DescriptiveData[0].second); - CPPUNIT_ASSERT_EQUAL(annotated_probability::E_ACTIVITY_CONCENTRATION, result.s_AttributeProbabilities[1].s_DescriptiveData[1].first); - CPPUNIT_ASSERT_EQUAL(2.0, result.s_AttributeProbabilities[1].s_DescriptiveData[1].second); + CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualIndicatorOfBucketPerson, + result.s_AttributeProbabilities[1].s_Feature); + CPPUNIT_ASSERT_EQUAL(std::size_t(2), + result.s_AttributeProbabilities[1].s_DescriptiveData.size()); + CPPUNIT_ASSERT_EQUAL(annotated_probability::E_ATTRIBUTE_CONCENTRATION, + result.s_AttributeProbabilities[1].s_DescriptiveData[0].first); + CPPUNIT_ASSERT_EQUAL( + 1.0, result.s_AttributeProbabilities[1].s_DescriptiveData[0].second); + CPPUNIT_ASSERT_EQUAL(annotated_probability::E_ACTIVITY_CONCENTRATION, + result.s_AttributeProbabilities[1].s_DescriptiveData[1].first); + CPPUNIT_ASSERT_EQUAL( + 2.0, result.s_AttributeProbabilities[1].s_DescriptiveData[1].second); } void CAnnotatedProbabilityBuilderTest::testPersonFrequencyGivenIndividualCount() { @@ -330,22 +305,26 @@ void CAnnotatedProbabilityBuilderTest::testPersonFrequencyGivenIndividualCount() void CAnnotatedProbabilityBuilderTest::testPersonFrequencyGivenIndividualRare() { { SAnnotatedProbability result; - CAnnotatedProbabilityBuilderForTest builder(result, 1, function_t::E_IndividualRare, 42); + CAnnotatedProbabilityBuilderForTest builder( + result, 1, function_t::E_IndividualRare, 42); builder.personFrequency(0.3, false); CPPUNIT_ASSERT_EQUAL(std::size_t(1), result.s_DescriptiveData.size()); - CPPUNIT_ASSERT_EQUAL(annotated_probability::E_PERSON_NEVER_SEEN_BEFORE, result.s_DescriptiveData[0].first); + CPPUNIT_ASSERT_EQUAL(annotated_probability::E_PERSON_NEVER_SEEN_BEFORE, + result.s_DescriptiveData[0].first); CPPUNIT_ASSERT_EQUAL(1.0, result.s_DescriptiveData[0].second); } { SAnnotatedProbability result; - CAnnotatedProbabilityBuilderForTest builder(result, 1, function_t::E_IndividualRare, 42); + CAnnotatedProbabilityBuilderForTest builder( + result, 1, function_t::E_IndividualRare, 42); builder.personFrequency(0.2, true); CPPUNIT_ASSERT_EQUAL(std::size_t(1), result.s_DescriptiveData.size()); - CPPUNIT_ASSERT_EQUAL(annotated_probability::E_PERSON_PERIOD, result.s_DescriptiveData[0].first); + CPPUNIT_ASSERT_EQUAL(annotated_probability::E_PERSON_PERIOD, + result.s_DescriptiveData[0].first); CPPUNIT_ASSERT_EQUAL(5.0, result.s_DescriptiveData[0].second); } } @@ -362,8 +341,9 @@ void CAnnotatedProbabilityBuilderTest::testPersonFrequencyGivenPopulationRare() CppUnit::Test* CAnnotatedProbabilityBuilderTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CAnnotatedProbabilityBuilderTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CAnnotatedProbabilityBuilderTest::testProbability", - &CAnnotatedProbabilityBuilderTest::testProbability)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CAnnotatedProbabilityBuilderTest::testProbability", + &CAnnotatedProbabilityBuilderTest::testProbability)); suiteOfTests->addTest(new CppUnit::TestCaller( "CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenIndividualCount", &CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenIndividualCount)); diff --git a/lib/model/unittest/CAnomalyDetectorModelConfigTest.cc b/lib/model/unittest/CAnomalyDetectorModelConfigTest.cc index 1c32030d3a..1534258147 100644 --- a/lib/model/unittest/CAnomalyDetectorModelConfigTest.cc +++ b/lib/model/unittest/CAnomalyDetectorModelConfigTest.cc @@ -31,174 +31,255 @@ void CAnomalyDetectorModelConfigTest::testNormal() { CAnomalyDetectorModelConfig config = CAnomalyDetectorModelConfig::defaultConfig(1800); CPPUNIT_ASSERT(config.init("testfiles/mlmodel.conf")); - CPPUNIT_ASSERT_EQUAL(0.5, config.factory(1, INDIVIDUAL_COUNT)->modelParams().s_LearnRate); - CPPUNIT_ASSERT_EQUAL(0.5, config.factory(1, INDIVIDUAL_METRIC)->modelParams().s_LearnRate); - CPPUNIT_ASSERT_EQUAL(0.5, config.factory(1, POPULATION_COUNT)->modelParams().s_LearnRate); - CPPUNIT_ASSERT_EQUAL(0.5, config.factory(1, POPULATION_METRIC)->modelParams().s_LearnRate); + CPPUNIT_ASSERT_EQUAL( + 0.5, config.factory(1, INDIVIDUAL_COUNT)->modelParams().s_LearnRate); + CPPUNIT_ASSERT_EQUAL( + 0.5, config.factory(1, INDIVIDUAL_METRIC)->modelParams().s_LearnRate); + CPPUNIT_ASSERT_EQUAL( + 0.5, config.factory(1, POPULATION_COUNT)->modelParams().s_LearnRate); + CPPUNIT_ASSERT_EQUAL( + 0.5, config.factory(1, POPULATION_METRIC)->modelParams().s_LearnRate); CPPUNIT_ASSERT_EQUAL(0.01, config.decayRate()); - CPPUNIT_ASSERT_EQUAL(0.01, config.factory(1, INDIVIDUAL_COUNT)->modelParams().s_DecayRate); - CPPUNIT_ASSERT_EQUAL(0.01, config.factory(1, INDIVIDUAL_METRIC)->modelParams().s_DecayRate); - CPPUNIT_ASSERT_EQUAL(0.01, config.factory(1, POPULATION_COUNT)->modelParams().s_DecayRate); - CPPUNIT_ASSERT_EQUAL(0.01, config.factory(1, POPULATION_METRIC)->modelParams().s_DecayRate); - CPPUNIT_ASSERT_EQUAL(2.0, config.factory(1, INDIVIDUAL_COUNT)->modelParams().s_InitialDecayRateMultiplier); - CPPUNIT_ASSERT_EQUAL(2.0, config.factory(1, INDIVIDUAL_METRIC)->modelParams().s_InitialDecayRateMultiplier); - CPPUNIT_ASSERT_EQUAL(2.0, config.factory(1, POPULATION_COUNT)->modelParams().s_InitialDecayRateMultiplier); - CPPUNIT_ASSERT_EQUAL(2.0, config.factory(1, POPULATION_METRIC)->modelParams().s_InitialDecayRateMultiplier); - CPPUNIT_ASSERT_EQUAL(0.0, config.factory(1, INDIVIDUAL_COUNT)->modelParams().s_MaximumUpdatesPerBucket); - CPPUNIT_ASSERT_EQUAL(0.0, config.factory(1, INDIVIDUAL_METRIC)->modelParams().s_MaximumUpdatesPerBucket); - CPPUNIT_ASSERT_EQUAL(0.0, config.factory(1, POPULATION_COUNT)->modelParams().s_MaximumUpdatesPerBucket); - CPPUNIT_ASSERT_EQUAL(0.0, config.factory(1, POPULATION_METRIC)->modelParams().s_MaximumUpdatesPerBucket); + CPPUNIT_ASSERT_EQUAL( + 0.01, config.factory(1, INDIVIDUAL_COUNT)->modelParams().s_DecayRate); + CPPUNIT_ASSERT_EQUAL( + 0.01, config.factory(1, INDIVIDUAL_METRIC)->modelParams().s_DecayRate); + CPPUNIT_ASSERT_EQUAL( + 0.01, config.factory(1, POPULATION_COUNT)->modelParams().s_DecayRate); + CPPUNIT_ASSERT_EQUAL( + 0.01, config.factory(1, POPULATION_METRIC)->modelParams().s_DecayRate); + CPPUNIT_ASSERT_EQUAL( + 2.0, config.factory(1, INDIVIDUAL_COUNT)->modelParams().s_InitialDecayRateMultiplier); + CPPUNIT_ASSERT_EQUAL( + 2.0, config.factory(1, INDIVIDUAL_METRIC)->modelParams().s_InitialDecayRateMultiplier); + CPPUNIT_ASSERT_EQUAL( + 2.0, config.factory(1, POPULATION_COUNT)->modelParams().s_InitialDecayRateMultiplier); + CPPUNIT_ASSERT_EQUAL( + 2.0, config.factory(1, POPULATION_METRIC)->modelParams().s_InitialDecayRateMultiplier); + CPPUNIT_ASSERT_EQUAL( + 0.0, config.factory(1, INDIVIDUAL_COUNT)->modelParams().s_MaximumUpdatesPerBucket); + CPPUNIT_ASSERT_EQUAL( + 0.0, config.factory(1, INDIVIDUAL_METRIC)->modelParams().s_MaximumUpdatesPerBucket); + CPPUNIT_ASSERT_EQUAL( + 0.0, config.factory(1, POPULATION_COUNT)->modelParams().s_MaximumUpdatesPerBucket); + CPPUNIT_ASSERT_EQUAL( + 0.0, config.factory(1, POPULATION_METRIC)->modelParams().s_MaximumUpdatesPerBucket); CPPUNIT_ASSERT_EQUAL(0.1, config.factory(1, INDIVIDUAL_COUNT)->minimumModeFraction()); CPPUNIT_ASSERT_EQUAL(0.1, config.factory(1, INDIVIDUAL_METRIC)->minimumModeFraction()); CPPUNIT_ASSERT_EQUAL(0.01, config.factory(1, POPULATION_COUNT)->minimumModeFraction()); CPPUNIT_ASSERT_EQUAL(0.01, config.factory(1, POPULATION_METRIC)->minimumModeFraction()); - CPPUNIT_ASSERT_EQUAL(std::size_t(10), config.factory(1, INDIVIDUAL_COUNT)->componentSize()); - CPPUNIT_ASSERT_EQUAL(std::size_t(10), config.factory(1, INDIVIDUAL_METRIC)->componentSize()); - CPPUNIT_ASSERT_EQUAL(std::size_t(10), config.factory(1, POPULATION_COUNT)->componentSize()); - CPPUNIT_ASSERT_EQUAL(std::size_t(10), config.factory(1, POPULATION_METRIC)->componentSize()); - CPPUNIT_ASSERT_EQUAL(std::size_t(20), config.factory(1, INDIVIDUAL_COUNT)->modelParams().s_SampleCountFactor); - CPPUNIT_ASSERT_EQUAL(std::size_t(20), config.factory(1, INDIVIDUAL_METRIC)->modelParams().s_SampleCountFactor); - CPPUNIT_ASSERT_EQUAL(std::size_t(20), config.factory(1, POPULATION_COUNT)->modelParams().s_SampleCountFactor); - CPPUNIT_ASSERT_EQUAL(std::size_t(20), config.factory(1, POPULATION_METRIC)->modelParams().s_SampleCountFactor); + CPPUNIT_ASSERT_EQUAL(std::size_t(10), + config.factory(1, INDIVIDUAL_COUNT)->componentSize()); + CPPUNIT_ASSERT_EQUAL(std::size_t(10), + config.factory(1, INDIVIDUAL_METRIC)->componentSize()); + CPPUNIT_ASSERT_EQUAL(std::size_t(10), + config.factory(1, POPULATION_COUNT)->componentSize()); + CPPUNIT_ASSERT_EQUAL(std::size_t(10), + config.factory(1, POPULATION_METRIC)->componentSize()); + CPPUNIT_ASSERT_EQUAL(std::size_t(20), + config.factory(1, INDIVIDUAL_COUNT)->modelParams().s_SampleCountFactor); + CPPUNIT_ASSERT_EQUAL(std::size_t(20), + config.factory(1, INDIVIDUAL_METRIC)->modelParams().s_SampleCountFactor); + CPPUNIT_ASSERT_EQUAL(std::size_t(20), + config.factory(1, POPULATION_COUNT)->modelParams().s_SampleCountFactor); + CPPUNIT_ASSERT_EQUAL(std::size_t(20), + config.factory(1, POPULATION_METRIC)->modelParams().s_SampleCountFactor); TDoubleVec params; for (std::size_t i = 0u; i < model_t::NUMBER_AGGREGATION_STYLES; ++i) { for (std::size_t j = 0u; j < model_t::NUMBER_AGGREGATION_PARAMS; ++j) { - params.push_back( - config.aggregationStyleParam(static_cast(i), static_cast(j))); + params.push_back(config.aggregationStyleParam( + static_cast(i), + static_cast(j))); } } - CPPUNIT_ASSERT_EQUAL(std::string("[0.9, 0.1, 2, 4, 0.3, 0.7, 3, 8, 0.6, 0.4, 2, 10]"), core::CContainerPrinter::print(params)); + CPPUNIT_ASSERT_EQUAL(std::string("[0.9, 0.1, 2, 4, 0.3, 0.7, 3, 8, 0.6, 0.4, 2, 10]"), + core::CContainerPrinter::print(params)); CPPUNIT_ASSERT_EQUAL(0.01, config.maximumAnomalousProbability()); CPPUNIT_ASSERT_EQUAL(60.0, config.noisePercentile()); CPPUNIT_ASSERT_EQUAL(1.2, config.noiseMultiplier()); - CPPUNIT_ASSERT_EQUAL(4.0, config.factory(1, INDIVIDUAL_COUNT)->modelParams().s_PruneWindowScaleMaximum); - CPPUNIT_ASSERT_EQUAL(0.5, config.factory(1, INDIVIDUAL_COUNT)->modelParams().s_PruneWindowScaleMinimum); - CPPUNIT_ASSERT_EQUAL(4.0, config.factory(1, INDIVIDUAL_METRIC)->modelParams().s_PruneWindowScaleMaximum); - CPPUNIT_ASSERT_EQUAL(0.5, config.factory(1, INDIVIDUAL_METRIC)->modelParams().s_PruneWindowScaleMinimum); - CPPUNIT_ASSERT_EQUAL(4.0, config.factory(1, POPULATION_COUNT)->modelParams().s_PruneWindowScaleMaximum); - CPPUNIT_ASSERT_EQUAL(0.5, config.factory(1, POPULATION_COUNT)->modelParams().s_PruneWindowScaleMinimum); - CPPUNIT_ASSERT_EQUAL(4.0, config.factory(1, POPULATION_METRIC)->modelParams().s_PruneWindowScaleMaximum); - CPPUNIT_ASSERT_EQUAL(0.5, config.factory(1, POPULATION_METRIC)->modelParams().s_PruneWindowScaleMinimum); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 0), (70, 1.5), (85, 1.6), (90, 1.7), (95, 2), (97, 10), (98, 20), (99.5, 50), (100, 100)]"), - core::CContainerPrinter::print(config.normalizedScoreKnotPoints())); + CPPUNIT_ASSERT_EQUAL( + 4.0, config.factory(1, INDIVIDUAL_COUNT)->modelParams().s_PruneWindowScaleMaximum); + CPPUNIT_ASSERT_EQUAL( + 0.5, config.factory(1, INDIVIDUAL_COUNT)->modelParams().s_PruneWindowScaleMinimum); + CPPUNIT_ASSERT_EQUAL( + 4.0, config.factory(1, INDIVIDUAL_METRIC)->modelParams().s_PruneWindowScaleMaximum); + CPPUNIT_ASSERT_EQUAL( + 0.5, config.factory(1, INDIVIDUAL_METRIC)->modelParams().s_PruneWindowScaleMinimum); + CPPUNIT_ASSERT_EQUAL( + 4.0, config.factory(1, POPULATION_COUNT)->modelParams().s_PruneWindowScaleMaximum); + CPPUNIT_ASSERT_EQUAL( + 0.5, config.factory(1, POPULATION_COUNT)->modelParams().s_PruneWindowScaleMinimum); + CPPUNIT_ASSERT_EQUAL( + 4.0, config.factory(1, POPULATION_METRIC)->modelParams().s_PruneWindowScaleMaximum); + CPPUNIT_ASSERT_EQUAL( + 0.5, config.factory(1, POPULATION_METRIC)->modelParams().s_PruneWindowScaleMinimum); + CPPUNIT_ASSERT_EQUAL( + std::string("[(0, 0), (70, 1.5), (85, 1.6), (90, 1.7), (95, 2), (97, 10), (98, 20), (99.5, 50), (100, 100)]"), + core::CContainerPrinter::print(config.normalizedScoreKnotPoints())); CPPUNIT_ASSERT_EQUAL(false, config.perPartitionNormalization()); } { CAnomalyDetectorModelConfig config = CAnomalyDetectorModelConfig::defaultConfig(); - CPPUNIT_ASSERT(dynamic_cast(config.factory(1, function_t::E_IndividualCount).get())); - CPPUNIT_ASSERT(dynamic_cast(config.factory(1, function_t::E_IndividualNonZeroCount).get())); - CPPUNIT_ASSERT(dynamic_cast(config.factory(1, function_t::E_IndividualRareCount).get())); - CPPUNIT_ASSERT(dynamic_cast(config.factory(1, function_t::E_IndividualMetricMean).get())); - CPPUNIT_ASSERT(dynamic_cast(config.factory(1, function_t::E_IndividualMetricMin).get())); - CPPUNIT_ASSERT(dynamic_cast(config.factory(1, function_t::E_IndividualMetricMax).get())); - CPPUNIT_ASSERT(dynamic_cast(config.factory(1, function_t::E_IndividualMetric).get())); - CPPUNIT_ASSERT( - dynamic_cast(config.factory(1, function_t::E_PopulationDistinctCount).get())); - CPPUNIT_ASSERT(dynamic_cast(config.factory(1, function_t::E_PopulationRare).get())); - CPPUNIT_ASSERT(dynamic_cast(config.factory(CSearchKey::simpleCountKey()).get())); + CPPUNIT_ASSERT(dynamic_cast( + config.factory(1, function_t::E_IndividualCount).get())); + CPPUNIT_ASSERT(dynamic_cast( + config.factory(1, function_t::E_IndividualNonZeroCount).get())); + CPPUNIT_ASSERT(dynamic_cast( + config.factory(1, function_t::E_IndividualRareCount).get())); + CPPUNIT_ASSERT(dynamic_cast( + config.factory(1, function_t::E_IndividualMetricMean).get())); + CPPUNIT_ASSERT(dynamic_cast( + config.factory(1, function_t::E_IndividualMetricMin).get())); + CPPUNIT_ASSERT(dynamic_cast( + config.factory(1, function_t::E_IndividualMetricMax).get())); + CPPUNIT_ASSERT(dynamic_cast( + config.factory(1, function_t::E_IndividualMetric).get())); + CPPUNIT_ASSERT(dynamic_cast( + config.factory(1, function_t::E_PopulationDistinctCount).get())); + CPPUNIT_ASSERT(dynamic_cast( + config.factory(1, function_t::E_PopulationRare).get())); + CPPUNIT_ASSERT(dynamic_cast( + config.factory(CSearchKey::simpleCountKey()).get())); CPPUNIT_ASSERT_EQUAL(false, config.perPartitionNormalization()); } } void CAnomalyDetectorModelConfigTest::testErrors() { { - CAnomalyDetectorModelConfig config1 = CAnomalyDetectorModelConfig::defaultConfig(1800); + CAnomalyDetectorModelConfig config1 = + CAnomalyDetectorModelConfig::defaultConfig(1800); CPPUNIT_ASSERT(!config1.init("testfiles/invalidmlmodel.conf")); - CAnomalyDetectorModelConfig config2 = CAnomalyDetectorModelConfig::defaultConfig(1800); + CAnomalyDetectorModelConfig config2 = + CAnomalyDetectorModelConfig::defaultConfig(1800); CPPUNIT_ASSERT_EQUAL(config2.factory(1, INDIVIDUAL_COUNT)->modelParams().s_LearnRate, config1.factory(1, INDIVIDUAL_COUNT)->modelParams().s_LearnRate); - CPPUNIT_ASSERT_EQUAL(config2.factory(1, INDIVIDUAL_METRIC)->modelParams().s_LearnRate, - config1.factory(1, INDIVIDUAL_METRIC)->modelParams().s_LearnRate); + CPPUNIT_ASSERT_EQUAL( + config2.factory(1, INDIVIDUAL_METRIC)->modelParams().s_LearnRate, + config1.factory(1, INDIVIDUAL_METRIC)->modelParams().s_LearnRate); CPPUNIT_ASSERT_EQUAL(config2.factory(1, POPULATION_COUNT)->modelParams().s_LearnRate, config1.factory(1, POPULATION_COUNT)->modelParams().s_LearnRate); - CPPUNIT_ASSERT_EQUAL(config2.factory(1, POPULATION_METRIC)->modelParams().s_LearnRate, - config1.factory(1, POPULATION_METRIC)->modelParams().s_LearnRate); + CPPUNIT_ASSERT_EQUAL( + config2.factory(1, POPULATION_METRIC)->modelParams().s_LearnRate, + config1.factory(1, POPULATION_METRIC)->modelParams().s_LearnRate); CPPUNIT_ASSERT_EQUAL(config2.decayRate(), config1.decayRate()); CPPUNIT_ASSERT_EQUAL(config2.factory(1, INDIVIDUAL_COUNT)->modelParams().s_DecayRate, config1.factory(1, INDIVIDUAL_COUNT)->modelParams().s_DecayRate); - CPPUNIT_ASSERT_EQUAL(config2.factory(1, INDIVIDUAL_METRIC)->modelParams().s_DecayRate, - config1.factory(1, INDIVIDUAL_METRIC)->modelParams().s_DecayRate); + CPPUNIT_ASSERT_EQUAL( + config2.factory(1, INDIVIDUAL_METRIC)->modelParams().s_DecayRate, + config1.factory(1, INDIVIDUAL_METRIC)->modelParams().s_DecayRate); CPPUNIT_ASSERT_EQUAL(config2.factory(1, POPULATION_COUNT)->modelParams().s_DecayRate, config1.factory(1, POPULATION_COUNT)->modelParams().s_DecayRate); - CPPUNIT_ASSERT_EQUAL(config2.factory(1, POPULATION_METRIC)->modelParams().s_DecayRate, - config1.factory(1, POPULATION_METRIC)->modelParams().s_DecayRate); - CPPUNIT_ASSERT_EQUAL(config2.factory(1, INDIVIDUAL_COUNT)->modelParams().s_InitialDecayRateMultiplier, - config1.factory(1, INDIVIDUAL_COUNT)->modelParams().s_InitialDecayRateMultiplier); - CPPUNIT_ASSERT_EQUAL(config2.factory(1, INDIVIDUAL_METRIC)->modelParams().s_InitialDecayRateMultiplier, - config1.factory(1, INDIVIDUAL_METRIC)->modelParams().s_InitialDecayRateMultiplier); - CPPUNIT_ASSERT_EQUAL(config2.factory(1, POPULATION_COUNT)->modelParams().s_InitialDecayRateMultiplier, - config1.factory(1, POPULATION_COUNT)->modelParams().s_InitialDecayRateMultiplier); - CPPUNIT_ASSERT_EQUAL(config2.factory(1, POPULATION_METRIC)->modelParams().s_InitialDecayRateMultiplier, - config1.factory(1, POPULATION_METRIC)->modelParams().s_InitialDecayRateMultiplier); - CPPUNIT_ASSERT_EQUAL(config2.factory(1, INDIVIDUAL_COUNT)->modelParams().s_MaximumUpdatesPerBucket, - config1.factory(1, INDIVIDUAL_COUNT)->modelParams().s_MaximumUpdatesPerBucket); - CPPUNIT_ASSERT_EQUAL(config2.factory(1, INDIVIDUAL_METRIC)->modelParams().s_MaximumUpdatesPerBucket, - config1.factory(1, INDIVIDUAL_METRIC)->modelParams().s_MaximumUpdatesPerBucket); - CPPUNIT_ASSERT_EQUAL(config2.factory(1, POPULATION_COUNT)->modelParams().s_MaximumUpdatesPerBucket, - config1.factory(1, POPULATION_COUNT)->modelParams().s_MaximumUpdatesPerBucket); - CPPUNIT_ASSERT_EQUAL(config2.factory(1, POPULATION_METRIC)->modelParams().s_MaximumUpdatesPerBucket, - config1.factory(1, POPULATION_METRIC)->modelParams().s_MaximumUpdatesPerBucket); + CPPUNIT_ASSERT_EQUAL( + config2.factory(1, POPULATION_METRIC)->modelParams().s_DecayRate, + config1.factory(1, POPULATION_METRIC)->modelParams().s_DecayRate); + CPPUNIT_ASSERT_EQUAL( + config2.factory(1, INDIVIDUAL_COUNT)->modelParams().s_InitialDecayRateMultiplier, + config1.factory(1, INDIVIDUAL_COUNT)->modelParams().s_InitialDecayRateMultiplier); + CPPUNIT_ASSERT_EQUAL( + config2.factory(1, INDIVIDUAL_METRIC)->modelParams().s_InitialDecayRateMultiplier, + config1.factory(1, INDIVIDUAL_METRIC)->modelParams().s_InitialDecayRateMultiplier); + CPPUNIT_ASSERT_EQUAL( + config2.factory(1, POPULATION_COUNT)->modelParams().s_InitialDecayRateMultiplier, + config1.factory(1, POPULATION_COUNT)->modelParams().s_InitialDecayRateMultiplier); + CPPUNIT_ASSERT_EQUAL( + config2.factory(1, POPULATION_METRIC)->modelParams().s_InitialDecayRateMultiplier, + config1.factory(1, POPULATION_METRIC)->modelParams().s_InitialDecayRateMultiplier); + CPPUNIT_ASSERT_EQUAL( + config2.factory(1, INDIVIDUAL_COUNT)->modelParams().s_MaximumUpdatesPerBucket, + config1.factory(1, INDIVIDUAL_COUNT)->modelParams().s_MaximumUpdatesPerBucket); + CPPUNIT_ASSERT_EQUAL( + config2.factory(1, INDIVIDUAL_METRIC)->modelParams().s_MaximumUpdatesPerBucket, + config1.factory(1, INDIVIDUAL_METRIC)->modelParams().s_MaximumUpdatesPerBucket); + CPPUNIT_ASSERT_EQUAL( + config2.factory(1, POPULATION_COUNT)->modelParams().s_MaximumUpdatesPerBucket, + config1.factory(1, POPULATION_COUNT)->modelParams().s_MaximumUpdatesPerBucket); + CPPUNIT_ASSERT_EQUAL( + config2.factory(1, POPULATION_METRIC)->modelParams().s_MaximumUpdatesPerBucket, + config1.factory(1, POPULATION_METRIC)->modelParams().s_MaximumUpdatesPerBucket); CPPUNIT_ASSERT_EQUAL(config2.factory(1, INDIVIDUAL_COUNT)->minimumModeFraction(), config1.factory(1, INDIVIDUAL_COUNT)->minimumModeFraction()); - CPPUNIT_ASSERT_EQUAL(config2.factory(1, INDIVIDUAL_METRIC)->minimumModeFraction(), - config1.factory(1, INDIVIDUAL_METRIC)->minimumModeFraction()); + CPPUNIT_ASSERT_EQUAL( + config2.factory(1, INDIVIDUAL_METRIC)->minimumModeFraction(), + config1.factory(1, INDIVIDUAL_METRIC)->minimumModeFraction()); CPPUNIT_ASSERT_EQUAL(config2.factory(1, POPULATION_COUNT)->minimumModeFraction(), config1.factory(1, POPULATION_COUNT)->minimumModeFraction()); - CPPUNIT_ASSERT_EQUAL(config2.factory(1, POPULATION_METRIC)->minimumModeFraction(), - config1.factory(1, POPULATION_METRIC)->minimumModeFraction()); - CPPUNIT_ASSERT_EQUAL(config2.factory(1, INDIVIDUAL_COUNT)->componentSize(), config1.factory(1, INDIVIDUAL_COUNT)->componentSize()); + CPPUNIT_ASSERT_EQUAL( + config2.factory(1, POPULATION_METRIC)->minimumModeFraction(), + config1.factory(1, POPULATION_METRIC)->minimumModeFraction()); + CPPUNIT_ASSERT_EQUAL(config2.factory(1, INDIVIDUAL_COUNT)->componentSize(), + config1.factory(1, INDIVIDUAL_COUNT)->componentSize()); CPPUNIT_ASSERT_EQUAL(config2.factory(1, INDIVIDUAL_METRIC)->componentSize(), config1.factory(1, INDIVIDUAL_METRIC)->componentSize()); - CPPUNIT_ASSERT_EQUAL(config2.factory(1, POPULATION_COUNT)->componentSize(), config1.factory(1, POPULATION_COUNT)->componentSize()); + CPPUNIT_ASSERT_EQUAL(config2.factory(1, POPULATION_COUNT)->componentSize(), + config1.factory(1, POPULATION_COUNT)->componentSize()); CPPUNIT_ASSERT_EQUAL(config2.factory(1, POPULATION_METRIC)->componentSize(), config1.factory(1, POPULATION_METRIC)->componentSize()); - CPPUNIT_ASSERT_EQUAL(config2.factory(1, INDIVIDUAL_COUNT)->modelParams().s_SampleCountFactor, - config1.factory(1, INDIVIDUAL_COUNT)->modelParams().s_SampleCountFactor); - CPPUNIT_ASSERT_EQUAL(config2.factory(1, INDIVIDUAL_METRIC)->modelParams().s_SampleCountFactor, - config1.factory(1, INDIVIDUAL_METRIC)->modelParams().s_SampleCountFactor); - CPPUNIT_ASSERT_EQUAL(config2.factory(1, POPULATION_COUNT)->modelParams().s_SampleCountFactor, - config1.factory(1, POPULATION_COUNT)->modelParams().s_SampleCountFactor); - CPPUNIT_ASSERT_EQUAL(config2.factory(1, POPULATION_METRIC)->modelParams().s_SampleCountFactor, - config1.factory(1, POPULATION_METRIC)->modelParams().s_SampleCountFactor); + CPPUNIT_ASSERT_EQUAL( + config2.factory(1, INDIVIDUAL_COUNT)->modelParams().s_SampleCountFactor, + config1.factory(1, INDIVIDUAL_COUNT)->modelParams().s_SampleCountFactor); + CPPUNIT_ASSERT_EQUAL( + config2.factory(1, INDIVIDUAL_METRIC)->modelParams().s_SampleCountFactor, + config1.factory(1, INDIVIDUAL_METRIC)->modelParams().s_SampleCountFactor); + CPPUNIT_ASSERT_EQUAL( + config2.factory(1, POPULATION_COUNT)->modelParams().s_SampleCountFactor, + config1.factory(1, POPULATION_COUNT)->modelParams().s_SampleCountFactor); + CPPUNIT_ASSERT_EQUAL( + config2.factory(1, POPULATION_METRIC)->modelParams().s_SampleCountFactor, + config1.factory(1, POPULATION_METRIC)->modelParams().s_SampleCountFactor); for (std::size_t i = 0u; i < model_t::NUMBER_AGGREGATION_STYLES; ++i) { for (std::size_t j = 0u; j < model_t::NUMBER_AGGREGATION_PARAMS; ++j) { - CPPUNIT_ASSERT_EQUAL( - config2.aggregationStyleParam(static_cast(i), static_cast(j)), - config1.aggregationStyleParam(static_cast(i), static_cast(j))); + CPPUNIT_ASSERT_EQUAL(config2.aggregationStyleParam( + static_cast(i), + static_cast(j)), + config1.aggregationStyleParam( + static_cast(i), + static_cast(j))); } } - CPPUNIT_ASSERT_EQUAL(config2.maximumAnomalousProbability(), config1.maximumAnomalousProbability()); + CPPUNIT_ASSERT_EQUAL(config2.maximumAnomalousProbability(), + config1.maximumAnomalousProbability()); CPPUNIT_ASSERT_EQUAL(config2.noisePercentile(), config1.noisePercentile()); CPPUNIT_ASSERT_EQUAL(config2.noiseMultiplier(), config1.noiseMultiplier()); - CPPUNIT_ASSERT_EQUAL(config2.factory(1, INDIVIDUAL_COUNT)->modelParams().s_PruneWindowScaleMaximum, - config1.factory(1, INDIVIDUAL_COUNT)->modelParams().s_PruneWindowScaleMaximum); - CPPUNIT_ASSERT_EQUAL(config2.factory(1, INDIVIDUAL_COUNT)->modelParams().s_PruneWindowScaleMinimum, - config1.factory(1, INDIVIDUAL_COUNT)->modelParams().s_PruneWindowScaleMinimum); - CPPUNIT_ASSERT_EQUAL(config2.factory(1, INDIVIDUAL_METRIC)->modelParams().s_PruneWindowScaleMaximum, - config1.factory(1, INDIVIDUAL_METRIC)->modelParams().s_PruneWindowScaleMaximum); - CPPUNIT_ASSERT_EQUAL(config2.factory(1, INDIVIDUAL_METRIC)->modelParams().s_PruneWindowScaleMinimum, - config1.factory(1, INDIVIDUAL_METRIC)->modelParams().s_PruneWindowScaleMinimum); - CPPUNIT_ASSERT_EQUAL(config2.factory(1, POPULATION_COUNT)->modelParams().s_PruneWindowScaleMaximum, - config1.factory(1, POPULATION_COUNT)->modelParams().s_PruneWindowScaleMaximum); - CPPUNIT_ASSERT_EQUAL(config2.factory(1, POPULATION_COUNT)->modelParams().s_PruneWindowScaleMinimum, - config1.factory(1, POPULATION_COUNT)->modelParams().s_PruneWindowScaleMinimum); - CPPUNIT_ASSERT_EQUAL(config2.factory(1, POPULATION_METRIC)->modelParams().s_PruneWindowScaleMaximum, - config1.factory(1, POPULATION_METRIC)->modelParams().s_PruneWindowScaleMaximum); - CPPUNIT_ASSERT_EQUAL(config2.factory(1, POPULATION_METRIC)->modelParams().s_PruneWindowScaleMinimum, - config1.factory(1, POPULATION_METRIC)->modelParams().s_PruneWindowScaleMinimum); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(config2.normalizedScoreKnotPoints()), - core::CContainerPrinter::print(config1.normalizedScoreKnotPoints())); + CPPUNIT_ASSERT_EQUAL( + config2.factory(1, INDIVIDUAL_COUNT)->modelParams().s_PruneWindowScaleMaximum, + config1.factory(1, INDIVIDUAL_COUNT)->modelParams().s_PruneWindowScaleMaximum); + CPPUNIT_ASSERT_EQUAL( + config2.factory(1, INDIVIDUAL_COUNT)->modelParams().s_PruneWindowScaleMinimum, + config1.factory(1, INDIVIDUAL_COUNT)->modelParams().s_PruneWindowScaleMinimum); + CPPUNIT_ASSERT_EQUAL( + config2.factory(1, INDIVIDUAL_METRIC)->modelParams().s_PruneWindowScaleMaximum, + config1.factory(1, INDIVIDUAL_METRIC)->modelParams().s_PruneWindowScaleMaximum); + CPPUNIT_ASSERT_EQUAL( + config2.factory(1, INDIVIDUAL_METRIC)->modelParams().s_PruneWindowScaleMinimum, + config1.factory(1, INDIVIDUAL_METRIC)->modelParams().s_PruneWindowScaleMinimum); + CPPUNIT_ASSERT_EQUAL( + config2.factory(1, POPULATION_COUNT)->modelParams().s_PruneWindowScaleMaximum, + config1.factory(1, POPULATION_COUNT)->modelParams().s_PruneWindowScaleMaximum); + CPPUNIT_ASSERT_EQUAL( + config2.factory(1, POPULATION_COUNT)->modelParams().s_PruneWindowScaleMinimum, + config1.factory(1, POPULATION_COUNT)->modelParams().s_PruneWindowScaleMinimum); + CPPUNIT_ASSERT_EQUAL( + config2.factory(1, POPULATION_METRIC)->modelParams().s_PruneWindowScaleMaximum, + config1.factory(1, POPULATION_METRIC)->modelParams().s_PruneWindowScaleMaximum); + CPPUNIT_ASSERT_EQUAL( + config2.factory(1, POPULATION_METRIC)->modelParams().s_PruneWindowScaleMinimum, + config1.factory(1, POPULATION_METRIC)->modelParams().s_PruneWindowScaleMinimum); + CPPUNIT_ASSERT_EQUAL( + core::CContainerPrinter::print(config2.normalizedScoreKnotPoints()), + core::CContainerPrinter::print(config1.normalizedScoreKnotPoints())); } } CppUnit::Test* CAnomalyDetectorModelConfigTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CAnomalyDetectorModelConfigTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CAnomalyDetectorModelConfigTest::testNormal", - &CAnomalyDetectorModelConfigTest::testNormal)); - suiteOfTests->addTest(new CppUnit::TestCaller("CAnomalyDetectorModelConfigTest::testErrors", - &CAnomalyDetectorModelConfigTest::testErrors)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CAnomalyDetectorModelConfigTest::testNormal", + &CAnomalyDetectorModelConfigTest::testNormal)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CAnomalyDetectorModelConfigTest::testErrors", + &CAnomalyDetectorModelConfigTest::testErrors)); return suiteOfTests; } diff --git a/lib/model/unittest/CAnomalyScoreTest.cc b/lib/model/unittest/CAnomalyScoreTest.cc index 87da69fcd8..902e97413b 100644 --- a/lib/model/unittest/CAnomalyScoreTest.cc +++ b/lib/model/unittest/CAnomalyScoreTest.cc @@ -56,38 +56,23 @@ void CAnomalyScoreTest::testComputeScores() { // Expect deviation of the probability. { TDoubleVec p1(1u, 0.001); - TScores::compute(jointProbabilityWeight, - extremeProbabilityWeight, - minExtremeSamples, - maxExtremeSamples, - maximumAnomalousProbability, - p1, - overallScore, - overallProbability); + TScores::compute(jointProbabilityWeight, extremeProbabilityWeight, + minExtremeSamples, maxExtremeSamples, maximumAnomalousProbability, + p1, overallScore, overallProbability); LOG_DEBUG(<< "1) score 1 = " << overallScore); CPPUNIT_ASSERT_DOUBLES_EQUAL(0.078557, overallScore, 5e-7); TDoubleVec p2(1u, 0.02); - TScores::compute(jointProbabilityWeight, - extremeProbabilityWeight, - minExtremeSamples, - maxExtremeSamples, - maximumAnomalousProbability, - p2, - overallScore, - overallProbability); + TScores::compute(jointProbabilityWeight, extremeProbabilityWeight, + minExtremeSamples, maxExtremeSamples, maximumAnomalousProbability, + p2, overallScore, overallProbability); LOG_DEBUG(<< "1) score 2 = " << overallScore); CPPUNIT_ASSERT_DOUBLES_EQUAL(0.002405, overallScore, 5e-7); TDoubleVec p3(1u, 0.1); - TScores::compute(jointProbabilityWeight, - extremeProbabilityWeight, - minExtremeSamples, - maxExtremeSamples, - maximumAnomalousProbability, - p3, - overallScore, - overallProbability); + TScores::compute(jointProbabilityWeight, extremeProbabilityWeight, + minExtremeSamples, maxExtremeSamples, maximumAnomalousProbability, + p3, overallScore, overallProbability); LOG_DEBUG(<< "1) score 3 = " << overallScore); CPPUNIT_ASSERT_EQUAL(0.0, overallScore); } @@ -95,16 +80,12 @@ void CAnomalyScoreTest::testComputeScores() { // Test 2: low anomalousness. // Expect scores of zero. { - double p[] = {0.21, 0.52, 0.13, 0.67, 0.89, 0.32, 0.46, 0.222, 0.35, 0.93}; + double p[] = {0.21, 0.52, 0.13, 0.67, 0.89, + 0.32, 0.46, 0.222, 0.35, 0.93}; TDoubleVec probabilities(boost::begin(p), boost::end(p)); - TScores::compute(jointProbabilityWeight, - extremeProbabilityWeight, - minExtremeSamples, - maxExtremeSamples, - maximumAnomalousProbability, - probabilities, - overallScore, - overallProbability); + TScores::compute(jointProbabilityWeight, extremeProbabilityWeight, + minExtremeSamples, maxExtremeSamples, maximumAnomalousProbability, + probabilities, overallScore, overallProbability); LOG_DEBUG(<< "2) score = " << overallScore); CPPUNIT_ASSERT_EQUAL(0.0, overallScore); } @@ -113,17 +94,13 @@ void CAnomalyScoreTest::testComputeScores() { // Expect a high anomaly score which is generated by the // joint probability of less likely samples. { - double p[] = {0.11, 0.13, 0.12, 0.22, 0.14, 0.09, 0.01, 0.13, 0.15, 0.14, 0.11, 0.13, 0.12, 0.22, 0.09, 0.01}; + double p[] = {0.11, 0.13, 0.12, 0.22, 0.14, 0.09, 0.01, 0.13, + 0.15, 0.14, 0.11, 0.13, 0.12, 0.22, 0.09, 0.01}; TDoubleVec probabilities(boost::begin(p), boost::end(p)); - TScores::compute(jointProbabilityWeight, - extremeProbabilityWeight, - minExtremeSamples, - maxExtremeSamples, - maximumAnomalousProbability, - probabilities, - overallScore, - overallProbability); + TScores::compute(jointProbabilityWeight, extremeProbabilityWeight, + minExtremeSamples, maxExtremeSamples, maximumAnomalousProbability, + probabilities, overallScore, overallProbability); TJointProbabilityCalculator jointProbabilityCalculator; TLogExtremeProbabilityCalculator extremeProbabilityCalculator(2); @@ -139,8 +116,8 @@ void CAnomalyScoreTest::testComputeScores() { extremeProbability = std::exp(extremeProbability); LOG_DEBUG(<< "3) probabilities = " << core::CContainerPrinter::print(p)); - LOG_DEBUG(<< " joint probability = " << jointProbability << ", extreme probability = " << extremeProbability - << ", overallScore = " << overallScore); + LOG_DEBUG(<< " joint probability = " << jointProbability << ", extreme probability = " + << extremeProbability << ", overallScore = " << overallScore); CPPUNIT_ASSERT_DOUBLES_EQUAL(0.318231, overallScore, 5e-7); } @@ -149,17 +126,13 @@ void CAnomalyScoreTest::testComputeScores() { // Expect a high anomaly score which is generated by the // extreme samples probability. { - double p[] = {0.21, 0.52, 0.13, 0.67, 0.89, 0.32, 0.46, 0.222, 0.35, 0.93, 0.89, 0.32, 0.46, 0.000021}; + double p[] = {0.21, 0.52, 0.13, 0.67, 0.89, 0.32, 0.46, + 0.222, 0.35, 0.93, 0.89, 0.32, 0.46, 0.000021}; TDoubleVec probabilities(boost::begin(p), boost::end(p)); - TScores::compute(jointProbabilityWeight, - extremeProbabilityWeight, - minExtremeSamples, - maxExtremeSamples, - maximumAnomalousProbability, - probabilities, - overallScore, - overallProbability); + TScores::compute(jointProbabilityWeight, extremeProbabilityWeight, + minExtremeSamples, maxExtremeSamples, maximumAnomalousProbability, + probabilities, overallScore, overallProbability); TJointProbabilityCalculator jointProbabilityCalculator; TLogExtremeProbabilityCalculator extremeProbabilityCalculator(1); @@ -176,8 +149,8 @@ void CAnomalyScoreTest::testComputeScores() { extremeProbability = std::exp(extremeProbability); LOG_DEBUG(<< "4) probabilities = " << core::CContainerPrinter::print(probabilities)); - LOG_DEBUG(<< " joint probability = " << jointProbability << ", extreme probability = " << extremeProbability - << ", overallScore = " << overallScore); + LOG_DEBUG(<< " joint probability = " << jointProbability << ", extreme probability = " + << extremeProbability << ", overallScore = " << overallScore); CPPUNIT_ASSERT_DOUBLES_EQUAL(0.137591, overallScore, 5e-7); } @@ -186,17 +159,13 @@ void CAnomalyScoreTest::testComputeScores() { // Expect a high anomaly score which is generated by the // extreme samples probability. { - double p[] = {0.21, 0.52, 0.0058, 0.13, 0.67, 0.89, 0.32, 0.03, 0.46, 0.222, 0.35, 0.93, 0.01, 0.89, 0.32, 0.46, 0.0021}; + double p[] = {0.21, 0.52, 0.0058, 0.13, 0.67, 0.89, 0.32, 0.03, 0.46, + 0.222, 0.35, 0.93, 0.01, 0.89, 0.32, 0.46, 0.0021}; TDoubleVec probabilities(boost::begin(p), boost::end(p)); - TScores::compute(jointProbabilityWeight, - extremeProbabilityWeight, - minExtremeSamples, - maxExtremeSamples, - maximumAnomalousProbability, - probabilities, - overallScore, - overallProbability); + TScores::compute(jointProbabilityWeight, extremeProbabilityWeight, + minExtremeSamples, maxExtremeSamples, maximumAnomalousProbability, + probabilities, overallScore, overallProbability); TJointProbabilityCalculator jointProbabilityCalculator; TLogExtremeProbabilityCalculator extremeProbabilityCalculator(4); @@ -213,8 +182,8 @@ void CAnomalyScoreTest::testComputeScores() { extremeProbability = std::exp(extremeProbability); LOG_DEBUG(<< "5) probabilities = " << core::CContainerPrinter::print(probabilities)); - LOG_DEBUG(<< " joint probability = " << jointProbability << ", extreme probability = " << extremeProbability - << ", overallScore = " << overallScore); + LOG_DEBUG(<< " joint probability = " << jointProbability << ", extreme probability = " + << extremeProbability << ", overallScore = " << overallScore); CPPUNIT_ASSERT_DOUBLES_EQUAL(0.029413, overallScore, 5e-7); } @@ -222,17 +191,13 @@ void CAnomalyScoreTest::testComputeScores() { { // Test underflow. - double p[] = {1e-100, 1.7e-20, 1.6e-150, 2.2e-150, 1.3e-180, 1.35e-95, 1.7e-180, 1.21e-300}; + double p[] = {1e-100, 1.7e-20, 1.6e-150, 2.2e-150, + 1.3e-180, 1.35e-95, 1.7e-180, 1.21e-300}; TDoubleVec probabilities(boost::begin(p), boost::end(p)); - TScores::compute(jointProbabilityWeight, - extremeProbabilityWeight, - minExtremeSamples, - maxExtremeSamples, - maximumAnomalousProbability, - probabilities, - overallScore, - overallProbability); + TScores::compute(jointProbabilityWeight, extremeProbabilityWeight, + minExtremeSamples, maxExtremeSamples, maximumAnomalousProbability, + probabilities, overallScore, overallProbability); LOG_DEBUG(<< "6) probabilities = " << core::CContainerPrinter::print(probabilities)); LOG_DEBUG(<< " overallScore = " << overallScore); @@ -254,7 +219,8 @@ void CAnomalyScoreTest::testNormalizeScoresQuantiles() { } } - model::CAnomalyDetectorModelConfig config = model::CAnomalyDetectorModelConfig::defaultConfig(1800); + model::CAnomalyDetectorModelConfig config = + model::CAnomalyDetectorModelConfig::defaultConfig(1800); model::CAnomalyScore::CNormalizer normalizer(config); double totalError = 0.0; @@ -267,7 +233,8 @@ void CAnomalyScoreTest::testNormalizeScoresQuantiles() { normalizer.updateQuantiles(sample); TDoubleMSetItr itr = scores.upper_bound(samples[i]); - double trueQuantile = static_cast(std::distance(scores.begin(), itr)) / static_cast(scores.size()); + double trueQuantile = static_cast(std::distance(scores.begin(), itr)) / + static_cast(scores.size()); if (trueQuantile > 0.9) { double lowerBound; @@ -280,7 +247,8 @@ void CAnomalyScoreTest::testNormalizeScoresQuantiles() { totalError += error; numberSamples += 1.0; - LOG_DEBUG(<< "trueQuantile = " << trueQuantile << ", lowerBound = " << lowerBound << ", upperBound = " << upperBound); + LOG_DEBUG(<< "trueQuantile = " << trueQuantile << ", lowerBound = " << lowerBound + << ", upperBound = " << upperBound); CPPUNIT_ASSERT(error < 0.02); } } @@ -305,16 +273,19 @@ void CAnomalyScoreTest::testNormalizeScoresNoisy() { } } - std::size_t largeAnomalyTimes[] = {50, 110, 190, 220, 290, 310, 600, 620, 790, 900, 1100, 1400, 1600, 1900}; + std::size_t largeAnomalyTimes[] = {50, 110, 190, 220, 290, 310, 600, + 620, 790, 900, 1100, 1400, 1600, 1900}; - double largeAnomalies[] = {50.0, 350.0, 30.0, 100.0, 30.0, 45.0, 100.0, 120.0, 60.0, 130.0, 100.0, 90.0, 45.0, 30.0}; + double largeAnomalies[] = {50.0, 350.0, 30.0, 100.0, 30.0, 45.0, 100.0, + 120.0, 60.0, 130.0, 100.0, 90.0, 45.0, 30.0}; // Add in the big anomalies. for (size_t i = 0; i < boost::size(largeAnomalyTimes); ++i) { samples[largeAnomalyTimes[i]] += largeAnomalies[i]; } - model::CAnomalyDetectorModelConfig config = model::CAnomalyDetectorModelConfig::defaultConfig(1800); + model::CAnomalyDetectorModelConfig config = + model::CAnomalyDetectorModelConfig::defaultConfig(1800); model::CAnomalyScore::CNormalizer normalizer(config); //std::ostringstream raw; @@ -338,7 +309,8 @@ void CAnomalyScoreTest::testNormalizeScoresNoisy() { if (maxScores.size() < boost::size(largeAnomalyTimes)) { maxScores.insert(TDoubleSizeMap::value_type(sample, i)); } else if (sample > maxScores.begin()->first) { - LOG_DEBUG(<< "normalized = " << sample << " removing " << maxScores.begin()->first); + LOG_DEBUG(<< "normalized = " << sample << " removing " + << maxScores.begin()->first); maxScores.erase(maxScores.begin()); maxScores.insert(TDoubleSizeMap::value_type(sample, i)); } @@ -361,7 +333,8 @@ void CAnomalyScoreTest::testNormalizeScoresNoisy() { LOG_DEBUG(<< "times = " << core::CContainerPrinter::print(times)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(largeAnomalyTimes), core::CContainerPrinter::print(times)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(largeAnomalyTimes), + core::CContainerPrinter::print(times)); } void CAnomalyScoreTest::testNormalizeScoresLargeScore() { @@ -380,7 +353,8 @@ void CAnomalyScoreTest::testNormalizeScoresLargeScore() { samples[anomalyTimes[i]] += anomalies[i]; } - model::CAnomalyDetectorModelConfig config = model::CAnomalyDetectorModelConfig::defaultConfig(1800); + model::CAnomalyDetectorModelConfig config = + model::CAnomalyDetectorModelConfig::defaultConfig(1800); model::CAnomalyScore::CNormalizer normalizer(config); for (std::size_t i = 0u; i < samples.size(); ++i) { @@ -397,7 +371,8 @@ void CAnomalyScoreTest::testNormalizeScoresLargeScore() { LOG_DEBUG(<< "scores = " << core::CContainerPrinter::print(scores)); for (std::size_t i = 0u; i + 1 < boost::size(anomalies); ++i) { - double uplift = scores[i] - 100.0 * anomalies[i] / anomalies[boost::size(anomalies) - 1]; + double uplift = scores[i] - 100.0 * anomalies[i] / + anomalies[boost::size(anomalies) - 1]; LOG_DEBUG(<< "uplift = " << uplift); CPPUNIT_ASSERT(uplift > 5.0); CPPUNIT_ASSERT(uplift < 13.0); @@ -412,21 +387,23 @@ void CAnomalyScoreTest::testNormalizeScoresNearZero() { std::size_t nonZeroCounts[] = {0, 100, 200, 249, 251, 300, 400, 450}; - std::string expectedScores[] = {std::string("[41.62776, 32.36435, 26.16873, 32.36435, 37.68726]"), - std::string("[41.62776, 32.36435, 26.16873, 32.36435, 37.68726]"), - std::string("[41.62776, 32.36435, 17.74216, 32.36435, 37.68726]"), - std::string("[41.62776, 32.36435, 11.1645, 32.36435, 37.68726]"), - std::string("[41.62776, 32.36435, 11.05937, 32.36435, 37.68726]"), - std::string("[41.62776, 32.36435, 8.523397, 32.36435, 37.68726]"), - std::string("[1.14, 1.04, 1, 1.04, 1.09]"), - std::string("[1.14, 1.04, 1, 1.04, 1.09]")}; + std::string expectedScores[] = { + std::string("[41.62776, 32.36435, 26.16873, 32.36435, 37.68726]"), + std::string("[41.62776, 32.36435, 26.16873, 32.36435, 37.68726]"), + std::string("[41.62776, 32.36435, 17.74216, 32.36435, 37.68726]"), + std::string("[41.62776, 32.36435, 11.1645, 32.36435, 37.68726]"), + std::string("[41.62776, 32.36435, 11.05937, 32.36435, 37.68726]"), + std::string("[41.62776, 32.36435, 8.523397, 32.36435, 37.68726]"), + std::string("[1.14, 1.04, 1, 1.04, 1.09]"), + std::string("[1.14, 1.04, 1, 1.04, 1.09]")}; for (std::size_t i = 0u; i < boost::size(nonZeroCounts); ++i) { LOG_DEBUG(<< "non-zero count = " << nonZeroCounts[i]); TDoubleVec samples(500u, 0.0); for (std::size_t j = 0u; j < nonZeroCounts[i]; ++j) { - if (std::find(boost::begin(anomalyTimes), boost::end(anomalyTimes), j) == boost::end(anomalyTimes)) { + if (std::find(boost::begin(anomalyTimes), boost::end(anomalyTimes), j) == + boost::end(anomalyTimes)) { samples[j] += 0.0055; } } @@ -434,7 +411,8 @@ void CAnomalyScoreTest::testNormalizeScoresNearZero() { samples[anomalyTimes[j]] += anomalies[j]; } - model::CAnomalyDetectorModelConfig config = model::CAnomalyDetectorModelConfig::defaultConfig(1800); + model::CAnomalyDetectorModelConfig config = + model::CAnomalyDetectorModelConfig::defaultConfig(1800); model::CAnomalyScore::CNormalizer normalizer(config); for (std::size_t j = 0u; j < samples.size(); ++j) { @@ -469,7 +447,8 @@ void CAnomalyScoreTest::testNormalizeScoresOrdering() { TDoubleVec scores(&allScores[0], &allScores[i]); - model::CAnomalyDetectorModelConfig config = model::CAnomalyDetectorModelConfig::defaultConfig(300); + model::CAnomalyDetectorModelConfig config = + model::CAnomalyDetectorModelConfig::defaultConfig(300); model::CAnomalyScore::CNormalizer normalizer(config); for (std::size_t j = 0u; j < i; ++j) { normalizer.updateQuantiles(scores[j]); @@ -500,7 +479,8 @@ void CAnomalyScoreTest::testJsonConversion() { samples.push_back(222.2); samples.push_back(77.7); - model::CAnomalyDetectorModelConfig config = model::CAnomalyDetectorModelConfig::defaultConfig(); + model::CAnomalyDetectorModelConfig config = + model::CAnomalyDetectorModelConfig::defaultConfig(); model::CAnomalyScore::CNormalizer origNormalizer(config); origNormalizer.updateQuantiles(samples); @@ -521,7 +501,8 @@ void CAnomalyScoreTest::testJsonConversion() { model::CAnomalyScore::CNormalizer restoredNormalizer(config); { core::CJsonStateRestoreTraverser traverser(iss); - traverser.traverseSubLevel(boost::bind(&model::CAnomalyScore::CNormalizer::acceptRestoreTraverser, &restoredNormalizer, _1)); + traverser.traverseSubLevel(boost::bind(&model::CAnomalyScore::CNormalizer::acceptRestoreTraverser, + &restoredNormalizer, _1)); } // The new JSON representation of the new filter should be the same as the original @@ -537,14 +518,16 @@ void CAnomalyScoreTest::testJsonConversion() { // representation and extra fields that are used for indexing // in a database std::string toJson; - model::CAnomalyScore::normalizerToJson(origNormalizer, "dummy", "sysChange", "my normalizer", 1234567890, toJson); + model::CAnomalyScore::normalizerToJson(origNormalizer, "dummy", "sysChange", + "my normalizer", 1234567890, toJson); rapidjson::Document doc; doc.Parse(toJson.c_str()); CPPUNIT_ASSERT(doc.HasMember(model::CAnomalyScore::MLCUE_ATTRIBUTE.c_str())); CPPUNIT_ASSERT(doc.HasMember(model::CAnomalyScore::MLKEY_ATTRIBUTE.c_str())); - CPPUNIT_ASSERT(doc.HasMember(model::CAnomalyScore::MLQUANTILESDESCRIPTION_ATTRIBUTE.c_str())); + CPPUNIT_ASSERT(doc.HasMember( + model::CAnomalyScore::MLQUANTILESDESCRIPTION_ATTRIBUTE.c_str())); CPPUNIT_ASSERT(doc.HasMember(model::CAnomalyScore::MLVERSION_ATTRIBUTE.c_str())); CPPUNIT_ASSERT(doc.HasMember(model::CAnomalyScore::TIME_ATTRIBUTE.c_str())); CPPUNIT_ASSERT(doc.HasMember("a")); @@ -569,7 +552,8 @@ void CAnomalyScoreTest::testJsonConversion() { CPPUNIT_ASSERT(model::CAnomalyScore::normalizerFromJson(toJson, fromJsonNormalizer)); std::string restoredJson; - model::CAnomalyScore::normalizerToJson(fromJsonNormalizer, "dummy", "sysChange", "my normalizer", 1234567890, restoredJson); + model::CAnomalyScore::normalizerToJson(fromJsonNormalizer, "dummy", "sysChange", + "my normalizer", 1234567890, restoredJson); CPPUNIT_ASSERT_EQUAL(toJson, restoredJson); } @@ -578,14 +562,16 @@ void CAnomalyScoreTest::testPersistEmpty() { // This tests what happens when we persist and restore quantiles that have // never had any data added - see bug 761 in Bugzilla - model::CAnomalyDetectorModelConfig config = model::CAnomalyDetectorModelConfig::defaultConfig(); + model::CAnomalyDetectorModelConfig config = + model::CAnomalyDetectorModelConfig::defaultConfig(); model::CAnomalyScore::CNormalizer origNormalizer(config); CPPUNIT_ASSERT(!origNormalizer.canNormalize()); std::string origJson; - model::CAnomalyScore::normalizerToJson(origNormalizer, "test", "test", "test", 1234567890, origJson); + model::CAnomalyScore::normalizerToJson(origNormalizer, "test", "test", + "test", 1234567890, origJson); model::CAnomalyScore::CNormalizer newNormalizer(config); @@ -594,7 +580,8 @@ void CAnomalyScoreTest::testPersistEmpty() { CPPUNIT_ASSERT(!newNormalizer.canNormalize()); std::string newJson; - model::CAnomalyScore::normalizerToJson(newNormalizer, "test", "test", "test", 1234567890, newJson); + model::CAnomalyScore::normalizerToJson(newNormalizer, "test", "test", + "test", 1234567890, newJson); CPPUNIT_ASSERT_EQUAL(origJson, newJson); } @@ -602,22 +589,27 @@ void CAnomalyScoreTest::testPersistEmpty() { CppUnit::Test* CAnomalyScoreTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CAnomalyScoreTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CAnomalyScoreTest::testComputeScores", &CAnomalyScoreTest::testComputeScores)); - suiteOfTests->addTest(new CppUnit::TestCaller("CAnomalyScoreTest::testNormalizeScoresQuantiles", - &CAnomalyScoreTest::testNormalizeScoresQuantiles)); - suiteOfTests->addTest(new CppUnit::TestCaller("CAnomalyScoreTest::testNormalizeScoresNoisy", - &CAnomalyScoreTest::testNormalizeScoresNoisy)); - suiteOfTests->addTest(new CppUnit::TestCaller("CAnomalyScoreTest::testNormalizeScoresLargeScore", - &CAnomalyScoreTest::testNormalizeScoresLargeScore)); - suiteOfTests->addTest(new CppUnit::TestCaller("CAnomalyScoreTest::testNormalizeScoresNearZero", - &CAnomalyScoreTest::testNormalizeScoresNearZero)); - suiteOfTests->addTest(new CppUnit::TestCaller("CAnomalyScoreTest::testNormalizeScoresOrdering", - &CAnomalyScoreTest::testNormalizeScoresOrdering)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CAnomalyScoreTest::testJsonConversion", &CAnomalyScoreTest::testJsonConversion)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CAnomalyScoreTest::testPersistEmpty", &CAnomalyScoreTest::testPersistEmpty)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CAnomalyScoreTest::testComputeScores", &CAnomalyScoreTest::testComputeScores)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CAnomalyScoreTest::testNormalizeScoresQuantiles", + &CAnomalyScoreTest::testNormalizeScoresQuantiles)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CAnomalyScoreTest::testNormalizeScoresNoisy", + &CAnomalyScoreTest::testNormalizeScoresNoisy)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CAnomalyScoreTest::testNormalizeScoresLargeScore", + &CAnomalyScoreTest::testNormalizeScoresLargeScore)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CAnomalyScoreTest::testNormalizeScoresNearZero", + &CAnomalyScoreTest::testNormalizeScoresNearZero)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CAnomalyScoreTest::testNormalizeScoresOrdering", + &CAnomalyScoreTest::testNormalizeScoresOrdering)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CAnomalyScoreTest::testJsonConversion", &CAnomalyScoreTest::testJsonConversion)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CAnomalyScoreTest::testPersistEmpty", &CAnomalyScoreTest::testPersistEmpty)); return suiteOfTests; } diff --git a/lib/model/unittest/CBucketQueueTest.cc b/lib/model/unittest/CBucketQueueTest.cc index cb426cca06..29c7dd708d 100644 --- a/lib/model/unittest/CBucketQueueTest.cc +++ b/lib/model/unittest/CBucketQueueTest.cc @@ -174,20 +174,25 @@ void CBucketQueueTest::testBucketQueueUMap() { CppUnit::Test* CBucketQueueTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CBucketQueueTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CBucketQueueTest::testConstructorFillsQueue", - &CBucketQueueTest::testConstructorFillsQueue)); - suiteOfTests->addTest(new CppUnit::TestCaller("CBucketQueueTest::testPushGivenEarlierTime", - &CBucketQueueTest::testPushGivenEarlierTime)); - suiteOfTests->addTest(new CppUnit::TestCaller("CBucketQueueTest::testGetGivenFullQueueWithNoPop", - &CBucketQueueTest::testGetGivenFullQueueWithNoPop)); - suiteOfTests->addTest(new CppUnit::TestCaller("CBucketQueueTest::testGetGivenFullQueueAfterPop", - &CBucketQueueTest::testGetGivenFullQueueAfterPop)); - suiteOfTests->addTest(new CppUnit::TestCaller("CBucketQueueTest::testClear", &CBucketQueueTest::testClear)); - suiteOfTests->addTest(new CppUnit::TestCaller("CBucketQueueTest::testIterators", &CBucketQueueTest::testIterators)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CBucketQueueTest::testReverseIterators", &CBucketQueueTest::testReverseIterators)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CBucketQueueTest::testBucketQueueUMap", &CBucketQueueTest::testBucketQueueUMap)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CBucketQueueTest::testConstructorFillsQueue", + &CBucketQueueTest::testConstructorFillsQueue)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CBucketQueueTest::testPushGivenEarlierTime", &CBucketQueueTest::testPushGivenEarlierTime)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CBucketQueueTest::testGetGivenFullQueueWithNoPop", + &CBucketQueueTest::testGetGivenFullQueueWithNoPop)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CBucketQueueTest::testGetGivenFullQueueAfterPop", + &CBucketQueueTest::testGetGivenFullQueueAfterPop)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CBucketQueueTest::testClear", &CBucketQueueTest::testClear)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CBucketQueueTest::testIterators", &CBucketQueueTest::testIterators)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CBucketQueueTest::testReverseIterators", &CBucketQueueTest::testReverseIterators)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CBucketQueueTest::testBucketQueueUMap", &CBucketQueueTest::testBucketQueueUMap)); return suiteOfTests; } diff --git a/lib/model/unittest/CCountingModelTest.cc b/lib/model/unittest/CCountingModelTest.cc index 9872bc6eee..03dddadd26 100644 --- a/lib/model/unittest/CCountingModelTest.cc +++ b/lib/model/unittest/CCountingModelTest.cc @@ -24,7 +24,9 @@ using namespace ml; using namespace model; namespace { -std::size_t addPerson(const std::string& p, const CModelFactory::TDataGathererPtr& gatherer, CResourceMonitor& resourceMonitor) { +std::size_t addPerson(const std::string& p, + const CModelFactory::TDataGathererPtr& gatherer, + CResourceMonitor& resourceMonitor) { CDataGatherer::TStrCPtrVec person; person.push_back(&p); CEventData result; @@ -32,7 +34,10 @@ std::size_t addPerson(const std::string& p, const CModelFactory::TDataGathererPt return *result.personId(); } -void addArrival(CDataGatherer& gatherer, CResourceMonitor& resourceMonitor, core_t::TTime time, const std::string& person) { +void addArrival(CDataGatherer& gatherer, + CResourceMonitor& resourceMonitor, + core_t::TTime time, + const std::string& person) { CDataGatherer::TStrCPtrVec fieldValues; fieldValues.push_back(&person); @@ -41,7 +46,8 @@ void addArrival(CDataGatherer& gatherer, CResourceMonitor& resourceMonitor, core gatherer.addArrival(fieldValues, eventData, resourceMonitor); } -SModelParams::TStrDetectionRulePr makeScheduledEvent(const std::string& description, double start, double end) { +SModelParams::TStrDetectionRulePr +makeScheduledEvent(const std::string& description, double start, double end) { CRuleCondition conditionGte; conditionGte.type(CRuleCondition::E_Time); conditionGte.condition().s_Op = CRuleCondition::E_GTE; @@ -80,11 +86,13 @@ void CCountingModelTest::testSkipSampling() { // Model where gap is not skipped { CModelFactory::SGathererInitializationData gathererNoGapInitData(startTime); - CModelFactory::TDataGathererPtr gathererNoGap(factory.makeDataGatherer(gathererNoGapInitData)); + CModelFactory::TDataGathererPtr gathererNoGap( + factory.makeDataGatherer(gathererNoGapInitData)); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p", gathererNoGap, m_ResourceMonitor)); CModelFactory::SModelInitializationData modelNoGapInitData(gathererNoGap); CAnomalyDetectorModel::TModelPtr modelHolderNoGap(factory.makeModel(modelNoGapInitData)); - CCountingModel* modelNoGap = dynamic_cast(modelHolderNoGap.get()); + CCountingModel* modelNoGap = + dynamic_cast(modelHolderNoGap.get()); // |2|2|0|0|1| -> 1.0 mean count addArrival(*gathererNoGap, m_ResourceMonitor, 100, "p"); @@ -102,11 +110,15 @@ void CCountingModelTest::testSkipSampling() { // Model where gap is skipped { CModelFactory::SGathererInitializationData gathererWithGapInitData(startTime); - CModelFactory::TDataGathererPtr gathererWithGap(factory.makeDataGatherer(gathererWithGapInitData)); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p", gathererWithGap, m_ResourceMonitor)); + CModelFactory::TDataGathererPtr gathererWithGap( + factory.makeDataGatherer(gathererWithGapInitData)); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + addPerson("p", gathererWithGap, m_ResourceMonitor)); CModelFactory::SModelInitializationData modelWithGapInitData(gathererWithGap); - CAnomalyDetectorModel::TModelPtr modelHolderWithGap(factory.makeModel(modelWithGapInitData)); - CCountingModel* modelWithGap = dynamic_cast(modelHolderWithGap.get()); + CAnomalyDetectorModel::TModelPtr modelHolderWithGap( + factory.makeModel(modelWithGapInitData)); + CCountingModel* modelWithGap = + dynamic_cast(modelHolderWithGap.get()); // |2|2|0|0|1| // |2|X|X|X|1| -> 1.5 mean count where X means skipped bucket @@ -151,9 +163,11 @@ void CCountingModelTest::testCheckScheduledEvents() { addArrival(*gatherer, m_ResourceMonitor, 200, "p"); CAnomalyDetectorModel::TModelPtr modelHolderNoGap(factory.makeModel(modelNoGapInitData)); - CCountingModel* modelNoGap = dynamic_cast(modelHolderNoGap.get()); + CCountingModel* modelNoGap = + dynamic_cast(modelHolderNoGap.get()); - SModelParams::TStrDetectionRulePrVec matchedEvents = modelNoGap->checkScheduledEvents(50); + SModelParams::TStrDetectionRulePrVec matchedEvents = + modelNoGap->checkScheduledEvents(50); CPPUNIT_ASSERT_EQUAL(std::size_t{0}, matchedEvents.size()); matchedEvents = modelNoGap->checkScheduledEvents(200); @@ -174,7 +188,8 @@ void CCountingModelTest::testCheckScheduledEvents() { // Test event descriptions are set modelNoGap->sample(200, 800, m_ResourceMonitor); - std::vector eventDescriptions = modelNoGap->scheduledEventDescriptions(200); + std::vector eventDescriptions = + modelNoGap->scheduledEventDescriptions(200); CPPUNIT_ASSERT_EQUAL(std::size_t{1}, eventDescriptions.size()); CPPUNIT_ASSERT_EQUAL(std::string("first event"), eventDescriptions[0]); @@ -197,11 +212,13 @@ void CCountingModelTest::testCheckScheduledEvents() { addArrival(*gatherer, m_ResourceMonitor, 100, "p"); CAnomalyDetectorModel::TModelPtr modelHolderNoGap(factory.makeModel(modelNoGapInitData)); - CCountingModel* modelNoGap = dynamic_cast(modelHolderNoGap.get()); + CCountingModel* modelNoGap = + dynamic_cast(modelHolderNoGap.get()); // There are no events at this time modelNoGap->sampleBucketStatistics(0, 100, m_ResourceMonitor); - std::vector eventDescriptions = modelNoGap->scheduledEventDescriptions(0); + std::vector eventDescriptions = + modelNoGap->scheduledEventDescriptions(0); CPPUNIT_ASSERT(eventDescriptions.empty()); // Test event descriptions are set @@ -224,9 +241,10 @@ void CCountingModelTest::testCheckScheduledEvents() { CppUnit::Test* CCountingModelTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CCountingModelTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CCountingModelTest::testSkipSampling", &CCountingModelTest::testSkipSampling)); - suiteOfTests->addTest(new CppUnit::TestCaller("CCountingModelTest::testCheckScheduledEvents", - &CCountingModelTest::testCheckScheduledEvents)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCountingModelTest::testSkipSampling", &CCountingModelTest::testSkipSampling)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CCountingModelTest::testCheckScheduledEvents", + &CCountingModelTest::testCheckScheduledEvents)); return suiteOfTests; } diff --git a/lib/model/unittest/CDetectionRuleTest.cc b/lib/model/unittest/CDetectionRuleTest.cc index 5619d3c403..b1269b4d34 100644 --- a/lib/model/unittest/CDetectionRuleTest.cc +++ b/lib/model/unittest/CDetectionRuleTest.cc @@ -35,36 +35,44 @@ const std::string EMPTY_STRING; CppUnit::Test* CDetectionRuleTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CDetectionRuleTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CDetectionRuleTest::testApplyGivenCategoricalCondition", - &CDetectionRuleTest::testApplyGivenCategoricalCondition)); - suiteOfTests->addTest(new CppUnit::TestCaller("CDetectionRuleTest::testApplyGivenNumericalActualCondition", - &CDetectionRuleTest::testApplyGivenNumericalActualCondition)); - suiteOfTests->addTest(new CppUnit::TestCaller("CDetectionRuleTest::testApplyGivenNumericalTypicalCondition", - &CDetectionRuleTest::testApplyGivenNumericalTypicalCondition)); - suiteOfTests->addTest(new CppUnit::TestCaller("CDetectionRuleTest::testApplyGivenNumericalDiffAbsCondition", - &CDetectionRuleTest::testApplyGivenNumericalDiffAbsCondition)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CDetectionRuleTest::testApplyGivenSingleSeriesModelAndConditionWithField", - &CDetectionRuleTest::testApplyGivenSingleSeriesModelAndConditionWithField)); - suiteOfTests->addTest(new CppUnit::TestCaller("CDetectionRuleTest::testApplyGivenNoActualValueAvailable", - &CDetectionRuleTest::testApplyGivenNoActualValueAvailable)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CDetectionRuleTest::testApplyGivenDifferentSeriesAndIndividualModel", - &CDetectionRuleTest::testApplyGivenDifferentSeriesAndIndividualModel)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CDetectionRuleTest::testApplyGivenDifferentSeriesAndPopulationModel", - &CDetectionRuleTest::testApplyGivenDifferentSeriesAndPopulationModel)); - suiteOfTests->addTest(new CppUnit::TestCaller("CDetectionRuleTest::testApplyGivenMultipleConditionsWithOr", - &CDetectionRuleTest::testApplyGivenMultipleConditionsWithOr)); - suiteOfTests->addTest(new CppUnit::TestCaller("CDetectionRuleTest::testApplyGivenMultipleConditionsWithAnd", - &CDetectionRuleTest::testApplyGivenMultipleConditionsWithAnd)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CDetectionRuleTest::testApplyGivenTargetFieldIsPartitionAndIndividualModel", - &CDetectionRuleTest::testApplyGivenTargetFieldIsPartitionAndIndividualModel)); - suiteOfTests->addTest(new CppUnit::TestCaller("CDetectionRuleTest::testApplyGivenTimeCondition", - &CDetectionRuleTest::testApplyGivenTimeCondition)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CDetectionRuleTest::testRuleActions", &CDetectionRuleTest::testRuleActions)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDetectionRuleTest::testApplyGivenCategoricalCondition", + &CDetectionRuleTest::testApplyGivenCategoricalCondition)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDetectionRuleTest::testApplyGivenNumericalActualCondition", + &CDetectionRuleTest::testApplyGivenNumericalActualCondition)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDetectionRuleTest::testApplyGivenNumericalTypicalCondition", + &CDetectionRuleTest::testApplyGivenNumericalTypicalCondition)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDetectionRuleTest::testApplyGivenNumericalDiffAbsCondition", + &CDetectionRuleTest::testApplyGivenNumericalDiffAbsCondition)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDetectionRuleTest::testApplyGivenSingleSeriesModelAndConditionWithField", + &CDetectionRuleTest::testApplyGivenSingleSeriesModelAndConditionWithField)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDetectionRuleTest::testApplyGivenNoActualValueAvailable", + &CDetectionRuleTest::testApplyGivenNoActualValueAvailable)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDetectionRuleTest::testApplyGivenDifferentSeriesAndIndividualModel", + &CDetectionRuleTest::testApplyGivenDifferentSeriesAndIndividualModel)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDetectionRuleTest::testApplyGivenDifferentSeriesAndPopulationModel", + &CDetectionRuleTest::testApplyGivenDifferentSeriesAndPopulationModel)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDetectionRuleTest::testApplyGivenMultipleConditionsWithOr", + &CDetectionRuleTest::testApplyGivenMultipleConditionsWithOr)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDetectionRuleTest::testApplyGivenMultipleConditionsWithAnd", + &CDetectionRuleTest::testApplyGivenMultipleConditionsWithAnd)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDetectionRuleTest::testApplyGivenTargetFieldIsPartitionAndIndividualModel", + &CDetectionRuleTest::testApplyGivenTargetFieldIsPartitionAndIndividualModel)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDetectionRuleTest::testApplyGivenTimeCondition", + &CDetectionRuleTest::testApplyGivenTimeCondition)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDetectionRuleTest::testRuleActions", &CDetectionRuleTest::testRuleActions)); return suiteOfTests; } @@ -84,21 +92,10 @@ void CDetectionRuleTest::testApplyGivenCategoricalCondition() { std::string partitionFieldValue("par_1"); std::string personFieldName("over"); std::string attributeFieldName("by"); - CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer(model_t::E_PopulationMetric, - model_t::E_None, - params, - EMPTY_STRING, - partitionFieldName, - partitionFieldValue, - personFieldName, - attributeFieldName, - EMPTY_STRING, - TStrVec(), - false, - key, - features, - startTime, - 0)); + CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer( + model_t::E_PopulationMetric, model_t::E_None, params, EMPTY_STRING, + partitionFieldName, partitionFieldValue, personFieldName, attributeFieldName, + EMPTY_STRING, TStrVec(), false, key, features, startTime, 0)); std::string person1("p1"); bool added = false; @@ -122,7 +119,8 @@ void CDetectionRuleTest::testApplyGivenCategoricalCondition() { model.mockAddBucketValue(model_t::E_PopulationMeanByPersonAndAttribute, 1, 2, 100, actual); model.mockAddBucketValue(model_t::E_PopulationMeanByPersonAndAttribute, 1, 3, 100, actual); - for (auto conditionType : {CRuleCondition::E_CategoricalMatch, CRuleCondition::E_CategoricalComplement}) { + for (auto conditionType : {CRuleCondition::E_CategoricalMatch, + CRuleCondition::E_CategoricalComplement}) { std::string filterJson("[\"a1_1\",\"a2_2\"]"); core::CPatternSet valueFilter; valueFilter.initFromJson(filterJson); @@ -137,21 +135,22 @@ void CDetectionRuleTest::testApplyGivenCategoricalCondition() { bool isCategoricalMatch = CRuleCondition::E_CategoricalMatch == conditionType; model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT( - rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 0, 0, 100) == - isCategoricalMatch); - CPPUNIT_ASSERT( - rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 0, 1, 100) != - isCategoricalMatch); - CPPUNIT_ASSERT( - rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 1, 2, 100) != - isCategoricalMatch); - CPPUNIT_ASSERT( - rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 1, 3, 100) == - isCategoricalMatch); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, + model_t::E_PopulationMeanByPersonAndAttribute, + resultType, 0, 0, 100) == isCategoricalMatch); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, + model_t::E_PopulationMeanByPersonAndAttribute, + resultType, 0, 1, 100) != isCategoricalMatch); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, + model_t::E_PopulationMeanByPersonAndAttribute, + resultType, 1, 2, 100) != isCategoricalMatch); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, + model_t::E_PopulationMeanByPersonAndAttribute, + resultType, 1, 3, 100) == isCategoricalMatch); } - for (auto conditionType : {CRuleCondition::E_CategoricalMatch, CRuleCondition::E_CategoricalComplement}) { + for (auto conditionType : {CRuleCondition::E_CategoricalMatch, + CRuleCondition::E_CategoricalComplement}) { std::string filterJson("[\"a1*\"]"); core::CPatternSet valueFilter; valueFilter.initFromJson(filterJson); @@ -166,21 +165,22 @@ void CDetectionRuleTest::testApplyGivenCategoricalCondition() { bool isCategoricalMatch = CRuleCondition::E_CategoricalMatch == conditionType; model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT( - rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 0, 0, 100) == - isCategoricalMatch); - CPPUNIT_ASSERT( - rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 0, 1, 100) == - isCategoricalMatch); - CPPUNIT_ASSERT( - rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 1, 2, 100) != - isCategoricalMatch); - CPPUNIT_ASSERT( - rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 1, 3, 100) != - isCategoricalMatch); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, + model_t::E_PopulationMeanByPersonAndAttribute, + resultType, 0, 0, 100) == isCategoricalMatch); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, + model_t::E_PopulationMeanByPersonAndAttribute, + resultType, 0, 1, 100) == isCategoricalMatch); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, + model_t::E_PopulationMeanByPersonAndAttribute, + resultType, 1, 2, 100) != isCategoricalMatch); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, + model_t::E_PopulationMeanByPersonAndAttribute, + resultType, 1, 3, 100) != isCategoricalMatch); } - for (auto conditionType : {CRuleCondition::E_CategoricalMatch, CRuleCondition::E_CategoricalComplement}) { + for (auto conditionType : {CRuleCondition::E_CategoricalMatch, + CRuleCondition::E_CategoricalComplement}) { std::string filterJson("[\"*2\"]"); core::CPatternSet valueFilter; valueFilter.initFromJson(filterJson); @@ -195,21 +195,22 @@ void CDetectionRuleTest::testApplyGivenCategoricalCondition() { bool isCategoricalMatch = CRuleCondition::E_CategoricalMatch == conditionType; model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT( - rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 0, 0, 100) != - isCategoricalMatch); - CPPUNIT_ASSERT( - rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 0, 1, 100) == - isCategoricalMatch); - CPPUNIT_ASSERT( - rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 1, 2, 100) != - isCategoricalMatch); - CPPUNIT_ASSERT( - rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 1, 3, 100) == - isCategoricalMatch); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, + model_t::E_PopulationMeanByPersonAndAttribute, + resultType, 0, 0, 100) != isCategoricalMatch); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, + model_t::E_PopulationMeanByPersonAndAttribute, + resultType, 0, 1, 100) == isCategoricalMatch); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, + model_t::E_PopulationMeanByPersonAndAttribute, + resultType, 1, 2, 100) != isCategoricalMatch); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, + model_t::E_PopulationMeanByPersonAndAttribute, + resultType, 1, 3, 100) == isCategoricalMatch); } - for (auto conditionType : {CRuleCondition::E_CategoricalMatch, CRuleCondition::E_CategoricalComplement}) { + for (auto conditionType : {CRuleCondition::E_CategoricalMatch, + CRuleCondition::E_CategoricalComplement}) { std::string filterJson("[\"*1*\"]"); core::CPatternSet valueFilter; valueFilter.initFromJson(filterJson); @@ -224,21 +225,22 @@ void CDetectionRuleTest::testApplyGivenCategoricalCondition() { bool isCategoricalMatch = CRuleCondition::E_CategoricalMatch == conditionType; model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT( - rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 0, 0, 100) == - isCategoricalMatch); - CPPUNIT_ASSERT( - rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 0, 1, 100) == - isCategoricalMatch); - CPPUNIT_ASSERT( - rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 1, 2, 100) == - isCategoricalMatch); - CPPUNIT_ASSERT( - rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 1, 3, 100) != - isCategoricalMatch); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, + model_t::E_PopulationMeanByPersonAndAttribute, + resultType, 0, 0, 100) == isCategoricalMatch); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, + model_t::E_PopulationMeanByPersonAndAttribute, + resultType, 0, 1, 100) == isCategoricalMatch); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, + model_t::E_PopulationMeanByPersonAndAttribute, + resultType, 1, 2, 100) == isCategoricalMatch); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, + model_t::E_PopulationMeanByPersonAndAttribute, + resultType, 1, 3, 100) != isCategoricalMatch); } - for (auto conditionType : {CRuleCondition::E_CategoricalMatch, CRuleCondition::E_CategoricalComplement}) { + for (auto conditionType : {CRuleCondition::E_CategoricalMatch, + CRuleCondition::E_CategoricalComplement}) { std::string filterJson("[\"p2\"]"); core::CPatternSet valueFilter; valueFilter.initFromJson(filterJson); @@ -253,21 +255,22 @@ void CDetectionRuleTest::testApplyGivenCategoricalCondition() { bool isCategoricalMatch = CRuleCondition::E_CategoricalMatch == conditionType; model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT( - rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 0, 0, 100) != - isCategoricalMatch); - CPPUNIT_ASSERT( - rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 0, 1, 100) != - isCategoricalMatch); - CPPUNIT_ASSERT( - rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 1, 2, 100) == - isCategoricalMatch); - CPPUNIT_ASSERT( - rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 1, 3, 100) == - isCategoricalMatch); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, + model_t::E_PopulationMeanByPersonAndAttribute, + resultType, 0, 0, 100) != isCategoricalMatch); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, + model_t::E_PopulationMeanByPersonAndAttribute, + resultType, 0, 1, 100) != isCategoricalMatch); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, + model_t::E_PopulationMeanByPersonAndAttribute, + resultType, 1, 2, 100) == isCategoricalMatch); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, + model_t::E_PopulationMeanByPersonAndAttribute, + resultType, 1, 3, 100) == isCategoricalMatch); } - for (auto conditionType : {CRuleCondition::E_CategoricalMatch, CRuleCondition::E_CategoricalComplement}) { + for (auto conditionType : {CRuleCondition::E_CategoricalMatch, + CRuleCondition::E_CategoricalComplement}) { std::string filterJson("[\"par_1\"]"); core::CPatternSet valueFilter; valueFilter.initFromJson(filterJson); @@ -282,21 +285,22 @@ void CDetectionRuleTest::testApplyGivenCategoricalCondition() { bool isCategoricalMatch = CRuleCondition::E_CategoricalMatch == conditionType; model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT( - rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 0, 0, 100) == - isCategoricalMatch); - CPPUNIT_ASSERT( - rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 0, 1, 100) == - isCategoricalMatch); - CPPUNIT_ASSERT( - rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 1, 2, 100) == - isCategoricalMatch); - CPPUNIT_ASSERT( - rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 1, 3, 100) == - isCategoricalMatch); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, + model_t::E_PopulationMeanByPersonAndAttribute, + resultType, 0, 0, 100) == isCategoricalMatch); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, + model_t::E_PopulationMeanByPersonAndAttribute, + resultType, 0, 1, 100) == isCategoricalMatch); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, + model_t::E_PopulationMeanByPersonAndAttribute, + resultType, 1, 2, 100) == isCategoricalMatch); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, + model_t::E_PopulationMeanByPersonAndAttribute, + resultType, 1, 3, 100) == isCategoricalMatch); } - for (auto conditionType : {CRuleCondition::E_CategoricalMatch, CRuleCondition::E_CategoricalComplement}) { + for (auto conditionType : {CRuleCondition::E_CategoricalMatch, + CRuleCondition::E_CategoricalComplement}) { std::string filterJson("[\"par_2\"]"); core::CPatternSet valueFilter; valueFilter.initFromJson(filterJson); @@ -311,18 +315,18 @@ void CDetectionRuleTest::testApplyGivenCategoricalCondition() { bool isCategoricalMatch = CRuleCondition::E_CategoricalMatch == conditionType; model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT( - rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 0, 0, 100) != - isCategoricalMatch); - CPPUNIT_ASSERT( - rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 0, 1, 100) != - isCategoricalMatch); - CPPUNIT_ASSERT( - rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 1, 2, 100) != - isCategoricalMatch); - CPPUNIT_ASSERT( - rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 1, 3, 100) != - isCategoricalMatch); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, + model_t::E_PopulationMeanByPersonAndAttribute, + resultType, 0, 0, 100) != isCategoricalMatch); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, + model_t::E_PopulationMeanByPersonAndAttribute, + resultType, 0, 1, 100) != isCategoricalMatch); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, + model_t::E_PopulationMeanByPersonAndAttribute, + resultType, 1, 2, 100) != isCategoricalMatch); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, + model_t::E_PopulationMeanByPersonAndAttribute, + resultType, 1, 3, 100) != isCategoricalMatch); } } @@ -337,21 +341,10 @@ void CDetectionRuleTest::testApplyGivenNumericalActualCondition() { TFeatureVec features; features.push_back(model_t::E_IndividualMeanByPerson); - CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer(model_t::E_Metric, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - key, - features, - startTime, - 0)); + CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer( + model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), + false, key, features, startTime, 0)); std::string person1("p1"); bool addedPerson = false; @@ -377,11 +370,12 @@ void CDetectionRuleTest::testApplyGivenNumericalActualCondition() { model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 200) == - false); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 300) == - false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 0, 0, 100)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 0, 0, 200) == false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 0, 0, 300) == false); } { @@ -396,10 +390,12 @@ void CDetectionRuleTest::testApplyGivenNumericalActualCondition() { model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 200)); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 300) == - false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 0, 0, 100)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 0, 0, 200)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 0, 0, 300) == false); } { // Test rule with condition with operator GT @@ -413,11 +409,12 @@ void CDetectionRuleTest::testApplyGivenNumericalActualCondition() { model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100) == - false); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 200) == - false); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 300)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 0, 0, 100) == false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 0, 0, 200) == false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 0, 0, 300)); } { // Test rule with condition with operator GT @@ -431,10 +428,12 @@ void CDetectionRuleTest::testApplyGivenNumericalActualCondition() { model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100) == - false); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 200)); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 300)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 0, 0, 100) == false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 0, 0, 200)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 0, 0, 300)); } } @@ -449,21 +448,10 @@ void CDetectionRuleTest::testApplyGivenNumericalTypicalCondition() { TFeatureVec features; features.push_back(model_t::E_IndividualMeanByPerson); - CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer(model_t::E_Metric, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - key, - features, - startTime, - 0)); + CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer( + model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), + false, key, features, startTime, 0)); std::string person1("p1"); bool addedPerson = false; @@ -495,11 +483,12 @@ void CDetectionRuleTest::testApplyGivenNumericalTypicalCondition() { model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 200) == - false); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 300) == - false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 0, 0, 100)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 0, 0, 200) == false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 0, 0, 300) == false); } { @@ -514,11 +503,12 @@ void CDetectionRuleTest::testApplyGivenNumericalTypicalCondition() { model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100) == - false); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 200) == - false); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 300)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 0, 0, 100) == false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 0, 0, 200) == false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 0, 0, 300)); } } @@ -533,21 +523,10 @@ void CDetectionRuleTest::testApplyGivenNumericalDiffAbsCondition() { TFeatureVec features; features.push_back(model_t::E_IndividualMeanByPerson); - CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer(model_t::E_Metric, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - key, - features, - startTime, - 0)); + CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer( + model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), + false, key, features, startTime, 0)); std::string person1("p1"); bool addedPerson = false; @@ -591,16 +570,18 @@ void CDetectionRuleTest::testApplyGivenNumericalDiffAbsCondition() { model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100) == - false); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 200) == - false); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 300)); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 400)); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 500) == - false); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 600) == - false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 0, 0, 100) == false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 0, 0, 200) == false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 0, 0, 300)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 0, 0, 400)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 0, 0, 500) == false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 0, 0, 600) == false); } { @@ -615,16 +596,18 @@ void CDetectionRuleTest::testApplyGivenNumericalDiffAbsCondition() { model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 200) == - false); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 300) == - false); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 400) == - false); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 500) == - false); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 600)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 0, 0, 100)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 0, 0, 200) == false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 0, 0, 300) == false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 0, 0, 400) == false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 0, 0, 500) == false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 0, 0, 600)); } } @@ -639,21 +622,10 @@ void CDetectionRuleTest::testApplyGivenSingleSeriesModelAndConditionWithField() TFeatureVec features; features.push_back(model_t::E_IndividualMeanByPerson); - CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer(model_t::E_Metric, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - key, - features, - startTime, - 0)); + CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer( + model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), + false, key, features, startTime, 0)); std::string person1("p1"); bool addedPerson = false; @@ -680,9 +652,12 @@ void CDetectionRuleTest::testApplyGivenSingleSeriesModelAndConditionWithField() model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100) == false); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 200) == false); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 300) == false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 0, 0, 100) == false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 0, 0, 200) == false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 0, 0, 300) == false); } void CDetectionRuleTest::testApplyGivenNoActualValueAvailable() { @@ -696,21 +671,10 @@ void CDetectionRuleTest::testApplyGivenNoActualValueAvailable() { TFeatureVec features; features.push_back(model_t::E_IndividualMeanByPerson); - CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer(model_t::E_Metric, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - key, - features, - startTime, - 0)); + CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer( + model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), + false, key, features, startTime, 0)); std::string person1("p1"); bool addedPerson = false; @@ -733,7 +697,8 @@ void CDetectionRuleTest::testApplyGivenNoActualValueAvailable() { model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 400) == false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 0, 0, 400) == false); } void CDetectionRuleTest::testApplyGivenDifferentSeriesAndIndividualModel() { @@ -748,21 +713,10 @@ void CDetectionRuleTest::testApplyGivenDifferentSeriesAndIndividualModel() { TFeatureVec features; features.push_back(model_t::E_IndividualMeanByPerson); std::string personFieldName("series"); - CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer(model_t::E_Metric, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - personFieldName, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - key, - features, - startTime, - 0)); + CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer( + model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, personFieldName, EMPTY_STRING, EMPTY_STRING, TStrVec(), + false, key, features, startTime, 0)); std::string person1("p1"); bool addedPerson = false; @@ -787,8 +741,10 @@ void CDetectionRuleTest::testApplyGivenDifferentSeriesAndIndividualModel() { model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 1, 0, 100) == false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, + model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 1, 0, 100) == false); } void CDetectionRuleTest::testApplyGivenDifferentSeriesAndPopulationModel() { @@ -804,21 +760,10 @@ void CDetectionRuleTest::testApplyGivenDifferentSeriesAndPopulationModel() { features.push_back(model_t::E_PopulationMeanByPersonAndAttribute); std::string personFieldName("over"); std::string attributeFieldName("by"); - CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer(model_t::E_PopulationMetric, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - personFieldName, - attributeFieldName, - EMPTY_STRING, - TStrVec(), - false, - key, - features, - startTime, - 0)); + CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer( + model_t::E_PopulationMetric, model_t::E_None, params, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, personFieldName, attributeFieldName, + EMPTY_STRING, TStrVec(), false, key, features, startTime, 0)); std::string person1("p1"); bool added = false; @@ -853,14 +798,18 @@ void CDetectionRuleTest::testApplyGivenDifferentSeriesAndPopulationModel() { model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT( - rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 0, 0, 100) == false); - CPPUNIT_ASSERT( - rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 0, 1, 100)); - CPPUNIT_ASSERT( - rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 1, 2, 100) == false); - CPPUNIT_ASSERT( - rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_PopulationMeanByPersonAndAttribute, resultType, 1, 3, 100) == false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, + model_t::E_PopulationMeanByPersonAndAttribute, + resultType, 0, 0, 100) == false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, + model_t::E_PopulationMeanByPersonAndAttribute, + resultType, 0, 1, 100)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, + model_t::E_PopulationMeanByPersonAndAttribute, + resultType, 1, 2, 100) == false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, + model_t::E_PopulationMeanByPersonAndAttribute, + resultType, 1, 3, 100) == false); } void CDetectionRuleTest::testApplyGivenMultipleConditionsWithOr() { @@ -875,21 +824,10 @@ void CDetectionRuleTest::testApplyGivenMultipleConditionsWithOr() { TFeatureVec features; features.push_back(model_t::E_IndividualMeanByPerson); std::string personFieldName("series"); - CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer(model_t::E_Metric, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - personFieldName, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - key, - features, - startTime, - 0)); + CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer( + model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, personFieldName, EMPTY_STRING, EMPTY_STRING, TStrVec(), + false, key, features, startTime, 0)); std::string person1("p1"); bool addedPerson = false; @@ -919,8 +857,8 @@ void CDetectionRuleTest::testApplyGivenMultipleConditionsWithOr() { model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100) == - false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 0, 0, 100) == false); } { // First applies only @@ -942,7 +880,8 @@ void CDetectionRuleTest::testApplyGivenMultipleConditionsWithOr() { model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 0, 0, 100)); } { // Second applies only @@ -964,7 +903,8 @@ void CDetectionRuleTest::testApplyGivenMultipleConditionsWithOr() { model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 0, 0, 100)); } { // Both apply @@ -986,7 +926,8 @@ void CDetectionRuleTest::testApplyGivenMultipleConditionsWithOr() { model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 0, 0, 100)); } } @@ -1002,21 +943,10 @@ void CDetectionRuleTest::testApplyGivenMultipleConditionsWithAnd() { TFeatureVec features; features.push_back(model_t::E_IndividualMeanByPerson); std::string personFieldName("series"); - CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer(model_t::E_Metric, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - personFieldName, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - key, - features, - startTime, - 0)); + CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer( + model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, personFieldName, EMPTY_STRING, EMPTY_STRING, TStrVec(), + false, key, features, startTime, 0)); std::string person1("p1"); bool addedPerson = false; @@ -1047,8 +977,8 @@ void CDetectionRuleTest::testApplyGivenMultipleConditionsWithAnd() { model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100) == - false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 0, 0, 100) == false); } { // First applies only @@ -1071,8 +1001,8 @@ void CDetectionRuleTest::testApplyGivenMultipleConditionsWithAnd() { model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100) == - false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 0, 0, 100) == false); } { // Second applies only @@ -1095,8 +1025,8 @@ void CDetectionRuleTest::testApplyGivenMultipleConditionsWithAnd() { model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100) == - false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 0, 0, 100) == false); } { // Both apply @@ -1119,7 +1049,8 @@ void CDetectionRuleTest::testApplyGivenMultipleConditionsWithAnd() { model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 0, 0, 100)); } } @@ -1137,21 +1068,10 @@ void CDetectionRuleTest::testApplyGivenTargetFieldIsPartitionAndIndividualModel( std::string partitionFieldName("partition"); std::string partitionFieldValue("partition_1"); std::string personFieldName("series"); - CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer(model_t::E_Metric, - model_t::E_None, - params, - EMPTY_STRING, - partitionFieldName, - partitionFieldValue, - personFieldName, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - key, - features, - startTime, - 0)); + CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer( + model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, + partitionFieldName, partitionFieldValue, personFieldName, EMPTY_STRING, + EMPTY_STRING, TStrVec(), false, key, features, startTime, 0)); std::string person1("p1"); bool addedPerson = false; @@ -1179,8 +1099,10 @@ void CDetectionRuleTest::testApplyGivenTargetFieldIsPartitionAndIndividualModel( model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 1, 0, 100)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 0, 0, 100)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 1, 0, 100)); } { // Matching targetFieldValue @@ -1197,8 +1119,10 @@ void CDetectionRuleTest::testApplyGivenTargetFieldIsPartitionAndIndividualModel( model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 1, 0, 100)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 0, 0, 100)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 1, 0, 100)); } { // Non-matching targetFieldValue @@ -1216,10 +1140,10 @@ void CDetectionRuleTest::testApplyGivenTargetFieldIsPartitionAndIndividualModel( model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100) == - false); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 1, 0, 100) == - false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 0, 0, 100) == false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 1, 0, 100) == false); } } @@ -1236,21 +1160,10 @@ void CDetectionRuleTest::testApplyGivenTimeCondition() { features.push_back(model_t::E_IndividualMeanByPerson); std::string partitionFieldName("partition"); std::string personFieldName("series"); - CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer(model_t::E_Metric, - model_t::E_None, - params, - EMPTY_STRING, - partitionFieldName, - EMPTY_STRING, - personFieldName, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - key, - features, - startTime, - 0)); + CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer( + model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, + partitionFieldName, EMPTY_STRING, personFieldName, EMPTY_STRING, + EMPTY_STRING, TStrVec(), false, key, features, startTime, 0)); CMockModel model(params, gathererPtr, influenceCalculators); CRuleCondition conditionGte; @@ -1269,10 +1182,14 @@ void CDetectionRuleTest::testApplyGivenTimeCondition() { model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 99) == false); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 150)); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 200) == false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 0, 0, 99) == false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, + model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, + model_t::E_IndividualMeanByPerson, resultType, 0, 0, 150)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 0, 0, 200) == false); } void CDetectionRuleTest::testRuleActions() { @@ -1288,21 +1205,10 @@ void CDetectionRuleTest::testRuleActions() { features.push_back(model_t::E_IndividualMeanByPerson); std::string partitionFieldName("partition"); std::string personFieldName("series"); - CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer(model_t::E_Metric, - model_t::E_None, - params, - EMPTY_STRING, - partitionFieldName, - EMPTY_STRING, - personFieldName, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - key, - features, - startTime, - 0)); + CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer( + model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, + partitionFieldName, EMPTY_STRING, personFieldName, EMPTY_STRING, + EMPTY_STRING, TStrVec(), false, key, features, startTime, 0)); CMockModel model(params, gathererPtr, influenceCalculators); CRuleCondition conditionGte; @@ -1316,14 +1222,20 @@ void CDetectionRuleTest::testRuleActions() { model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_SkipSampling, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100) == false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, + model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_SkipSampling, model, model_t::E_IndividualMeanByPerson, + resultType, 0, 0, 100) == false); rule.action(CDetectionRule::E_SkipSampling); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100) == false); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_SkipSampling, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, + resultType, 0, 0, 100) == false); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_SkipSampling, model, + model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); rule.action(static_cast(3)); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); - CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_SkipSampling, model, model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_FilterResults, model, + model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); + CPPUNIT_ASSERT(rule.apply(CDetectionRule::E_SkipSampling, model, + model_t::E_IndividualMeanByPerson, resultType, 0, 0, 100)); } diff --git a/lib/model/unittest/CDetectorEqualizerTest.cc b/lib/model/unittest/CDetectorEqualizerTest.cc index 1e4b4cbe9f..34691016e9 100644 --- a/lib/model/unittest/CDetectorEqualizerTest.cc +++ b/lib/model/unittest/CDetectorEqualizerTest.cc @@ -71,13 +71,15 @@ void CDetectorEqualizerTest::testCorrect() { for (std::size_t i = 1u, k = 0u; i < 3; ++i) { for (std::size_t j = 0u; j < i; ++j, ++k) { double increase = - maths::CStatisticalTests::twoSampleKS(corrected[i], corrected[j]) / maths::CStatisticalTests::twoSampleKS(raw[i], raw[j]); + maths::CStatisticalTests::twoSampleKS(corrected[i], corrected[j]) / + maths::CStatisticalTests::twoSampleKS(raw[i], raw[j]); similarityIncrease.add(std::log(increase)); LOG_DEBUG(<< "similarity increase = " << increase); CPPUNIT_ASSERT(increase > 3.0); } } - LOG_DEBUG(<< "mean similarity increase = " << std::exp(maths::CBasicStatistics::mean(similarityIncrease))); + LOG_DEBUG(<< "mean similarity increase = " + << std::exp(maths::CBasicStatistics::mean(similarityIncrease))); CPPUNIT_ASSERT(std::exp(maths::CBasicStatistics::mean(similarityIncrease)) > 40.0); } @@ -164,7 +166,8 @@ void CDetectorEqualizerTest::testPersist() { core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind(&model::CDetectorEqualizer::acceptRestoreTraverser, &restoredEqualizer, _1))); + CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind( + &model::CDetectorEqualizer::acceptRestoreTraverser, &restoredEqualizer, _1))); } // Checksums should agree. @@ -183,12 +186,12 @@ void CDetectorEqualizerTest::testPersist() { CppUnit::Test* CDetectorEqualizerTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CDetectorEqualizerTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CDetectorEqualizerTest::testCorrect", &CDetectorEqualizerTest::testCorrect)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CDetectorEqualizerTest::testAge", &CDetectorEqualizerTest::testAge)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CDetectorEqualizerTest::testPersist", &CDetectorEqualizerTest::testPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDetectorEqualizerTest::testCorrect", &CDetectorEqualizerTest::testCorrect)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDetectorEqualizerTest::testAge", &CDetectorEqualizerTest::testAge)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDetectorEqualizerTest::testPersist", &CDetectorEqualizerTest::testPersist)); return suiteOfTests; } diff --git a/lib/model/unittest/CDynamicStringIdRegistryTest.cc b/lib/model/unittest/CDynamicStringIdRegistryTest.cc index 69672c45c8..e0dbd1079b 100644 --- a/lib/model/unittest/CDynamicStringIdRegistryTest.cc +++ b/lib/model/unittest/CDynamicStringIdRegistryTest.cc @@ -24,10 +24,10 @@ using namespace model; CppUnit::Test* CDynamicStringIdRegistryTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CDynamicStringIdRegistryTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CDynamicStringIdRegistryTest::testAddName", - &CDynamicStringIdRegistryTest::testAddName)); - suiteOfTests->addTest(new CppUnit::TestCaller("CDynamicStringIdRegistryTest::testPersist", - &CDynamicStringIdRegistryTest::testPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDynamicStringIdRegistryTest::testAddName", &CDynamicStringIdRegistryTest::testAddName)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDynamicStringIdRegistryTest::testPersist", &CDynamicStringIdRegistryTest::testPersist)); return suiteOfTests; } @@ -36,25 +36,30 @@ void CDynamicStringIdRegistryTest::testAddName() { LOG_DEBUG(<< "*** testAddName ***"); CResourceMonitor resourceMonitor; - CDynamicStringIdRegistry registry( - "person", stat_t::E_NumberNewPeople, stat_t::E_NumberNewPeopleNotAllowed, stat_t::E_NumberNewPeopleRecycled); + CDynamicStringIdRegistry registry("person", stat_t::E_NumberNewPeople, + stat_t::E_NumberNewPeopleNotAllowed, + stat_t::E_NumberNewPeopleRecycled); bool personAdded = false; std::string person1("foo"); std::string person2("bar"); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), registry.addName(person1, 100, resourceMonitor, personAdded)); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + registry.addName(person1, 100, resourceMonitor, personAdded)); CPPUNIT_ASSERT(personAdded); personAdded = false; - CPPUNIT_ASSERT_EQUAL(std::size_t(1), registry.addName(person2, 200, resourceMonitor, personAdded)); + CPPUNIT_ASSERT_EQUAL(std::size_t(1), + registry.addName(person2, 200, resourceMonitor, personAdded)); CPPUNIT_ASSERT(personAdded); personAdded = false; - CPPUNIT_ASSERT_EQUAL(std::size_t(0), registry.addName(person1, 300, resourceMonitor, personAdded)); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + registry.addName(person1, 300, resourceMonitor, personAdded)); CPPUNIT_ASSERT(personAdded == false); std::string person3("noot"); - CPPUNIT_ASSERT_EQUAL(std::size_t(2), registry.addName(person3, 400, resourceMonitor, personAdded)); + CPPUNIT_ASSERT_EQUAL(std::size_t(2), + registry.addName(person3, 400, resourceMonitor, personAdded)); CPPUNIT_ASSERT(personAdded); personAdded = false; @@ -73,7 +78,8 @@ void CDynamicStringIdRegistryTest::testAddName() { CPPUNIT_ASSERT(registry.isIdActive(2)); std::string person4("recycled"); - CPPUNIT_ASSERT_EQUAL(std::size_t(1), registry.addName(person4, 500, resourceMonitor, personAdded)); + CPPUNIT_ASSERT_EQUAL(std::size_t(1), + registry.addName(person4, 500, resourceMonitor, personAdded)); CPPUNIT_ASSERT_EQUAL(std::size_t(3), registry.numberNames()); CPPUNIT_ASSERT_EQUAL(std::size_t(3), registry.numberActiveNames()); CPPUNIT_ASSERT(registry.isIdActive(0)); @@ -85,8 +91,9 @@ void CDynamicStringIdRegistryTest::testPersist() { LOG_DEBUG(<< "*** testPersist ***"); CResourceMonitor resourceMonitor; - CDynamicStringIdRegistry registry( - "person", stat_t::E_NumberNewPeople, stat_t::E_NumberNewPeopleNotAllowed, stat_t::E_NumberNewPeopleRecycled); + CDynamicStringIdRegistry registry("person", stat_t::E_NumberNewPeople, + stat_t::E_NumberNewPeopleNotAllowed, + stat_t::E_NumberNewPeopleRecycled); bool addedPerson = false; std::string person1("foo"); @@ -105,9 +112,11 @@ void CDynamicStringIdRegistryTest::testPersist() { core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - CDynamicStringIdRegistry restoredRegistry( - "person", stat_t::E_NumberNewPeople, stat_t::E_NumberNewPeopleNotAllowed, stat_t::E_NumberNewPeopleRecycled); - traverser.traverseSubLevel(boost::bind(&CDynamicStringIdRegistry::acceptRestoreTraverser, &restoredRegistry, _1)); + CDynamicStringIdRegistry restoredRegistry("person", stat_t::E_NumberNewPeople, + stat_t::E_NumberNewPeopleNotAllowed, + stat_t::E_NumberNewPeopleRecycled); + traverser.traverseSubLevel(boost::bind( + &CDynamicStringIdRegistry::acceptRestoreTraverser, &restoredRegistry, _1)); std::string restoredXml; { diff --git a/lib/model/unittest/CEventRateAnomalyDetectorTest.cc b/lib/model/unittest/CEventRateAnomalyDetectorTest.cc index ac28910e92..aa95f76975 100644 --- a/lib/model/unittest/CEventRateAnomalyDetectorTest.cc +++ b/lib/model/unittest/CEventRateAnomalyDetectorTest.cc @@ -42,10 +42,13 @@ const std::string EMPTY_STRING; class CResultWriter : public ml::model::CHierarchicalResultsVisitor { public: - CResultWriter(const ml::model::CAnomalyDetectorModelConfig& modelConfig, const ml::model::CLimits& limits) + CResultWriter(const ml::model::CAnomalyDetectorModelConfig& modelConfig, + const ml::model::CLimits& limits) : m_ModelConfig(modelConfig), m_Limits(limits), m_Calls(0) {} - void operator()(ml::model::CAnomalyDetector& detector, ml::core_t::TTime start, ml::core_t::TTime end) { + void operator()(ml::model::CAnomalyDetector& detector, + ml::core_t::TTime start, + ml::core_t::TTime end) { ml::model::CHierarchicalResults results; detector.buildResults(start, end, results); results.buildHierarchy(); @@ -56,7 +59,9 @@ class CResultWriter : public ml::model::CHierarchicalResultsVisitor { results.bottomUpBreadthFirst(*this); } - virtual void visit(const ml::model::CHierarchicalResults& results, const ml::model::CHierarchicalResults::TNode& node, bool pivot) { + virtual void visit(const ml::model::CHierarchicalResults& results, + const ml::model::CHierarchicalResults::TNode& node, + bool pivot) { if (pivot) { return; } @@ -75,14 +80,18 @@ class CResultWriter : public ml::model::CHierarchicalResultsVisitor { const std::string analysisFieldValue = *node.s_Spec.s_PersonFieldValue; ml::core_t::TTime bucketTime = node.s_BucketStartTime; double anomalyFactor = node.s_RawAnomalyScore; - LOG_DEBUG(<< analysisFieldValue << " bucket time " << bucketTime << " anomalyFactor " << anomalyFactor); + LOG_DEBUG(<< analysisFieldValue << " bucket time " << bucketTime + << " anomalyFactor " << anomalyFactor); ++m_Calls; m_AllAnomalies.insert(TTimeStrPr(bucketTime, analysisFieldValue)); m_AnomalyScores[bucketTime] += anomalyFactor; } - bool operator()(ml::core_t::TTime time, const ml::model::CHierarchicalResults::TNode& node, bool isBucketInfluencer) { - LOG_DEBUG(<< (isBucketInfluencer ? "BucketInfluencer" : "Influencer ") << node.s_Spec.print() << " initial score " + bool operator()(ml::core_t::TTime time, + const ml::model::CHierarchicalResults::TNode& node, + bool isBucketInfluencer) { + LOG_DEBUG(<< (isBucketInfluencer ? "BucketInfluencer" : "Influencer ") + << node.s_Spec.print() << " initial score " << node.probability() << ", time: " << time); return true; @@ -170,21 +179,16 @@ void CEventRateAnomalyDetectorTest::testAnomalies() { static const ml::core_t::TTime BUCKET_SIZE(1800); static const double HIGH_ANOMALY_SCORE(0.003); - ml::model::CAnomalyDetectorModelConfig modelConfig = ml::model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); + ml::model::CAnomalyDetectorModelConfig modelConfig = + ml::model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); ml::model::CLimits limits; ml::model::CSearchKey key(1, // identifier - ml::model::function_t::E_IndividualRareCount, - false, - ml::model_t::E_XF_None, - EMPTY_STRING, - "status"); + ml::model::function_t::E_IndividualRareCount, false, + ml::model_t::E_XF_None, EMPTY_STRING, "status"); ml::model::CAnomalyDetector detector(1, // identifier - limits, - modelConfig, - EMPTY_STRING, - FIRST_TIME, - modelConfig.factory(key)); + limits, modelConfig, EMPTY_STRING, + FIRST_TIME, modelConfig.factory(key)); CResultWriter writer(modelConfig, limits); TStrVec files; files.push_back("testfiles/status200.txt"); @@ -228,22 +232,17 @@ void CEventRateAnomalyDetectorTest::testPersist() { static const ml::core_t::TTime LAST_TIME(1347317974); static const ml::core_t::TTime BUCKET_SIZE(3600); - ml::model::CAnomalyDetectorModelConfig modelConfig = ml::model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); + ml::model::CAnomalyDetectorModelConfig modelConfig = + ml::model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); ml::model::CLimits limits; ml::model::CSearchKey key(1, // identifier - ml::model::function_t::E_IndividualCount, - false, - ml::model_t::E_XF_None, - EMPTY_STRING, - "status"); + ml::model::function_t::E_IndividualCount, false, + ml::model_t::E_XF_None, EMPTY_STRING, "status"); ml::model::CAnomalyDetector origDetector(1, // identifier - limits, - modelConfig, - EMPTY_STRING, - FIRST_TIME, - modelConfig.factory(key)); + limits, modelConfig, EMPTY_STRING, + FIRST_TIME, modelConfig.factory(key)); CResultWriter writer(modelConfig, limits); TStrVec files; files.push_back("testfiles/status503.txt"); @@ -261,17 +260,15 @@ void CEventRateAnomalyDetectorTest::testPersist() { // Restore the XML into a new detector ml::model::CAnomalyDetector restoredDetector(1, // identifier - limits, - modelConfig, - "", - 0, + limits, modelConfig, "", 0, modelConfig.factory(key)); { ml::core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); ml::core::CRapidXmlStateRestoreTraverser traverser(parser); CPPUNIT_ASSERT(traverser.traverseSubLevel( - boost::bind(&ml::model::CAnomalyDetector::acceptRestoreTraverser, &restoredDetector, EMPTY_STRING, _1))); + boost::bind(&ml::model::CAnomalyDetector::acceptRestoreTraverser, + &restoredDetector, EMPTY_STRING, _1))); } // The XML representation of the new typer should be the same as the original @@ -287,10 +284,12 @@ void CEventRateAnomalyDetectorTest::testPersist() { CppUnit::Test* CEventRateAnomalyDetectorTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CEventRateAnomalyDetectorTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CEventRateAnomalyDetectorTest::testAnomalies", - &CEventRateAnomalyDetectorTest::testAnomalies)); - suiteOfTests->addTest(new CppUnit::TestCaller("CEventRateAnomalyDetectorTest::testPersist", - &CEventRateAnomalyDetectorTest::testPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRateAnomalyDetectorTest::testAnomalies", + &CEventRateAnomalyDetectorTest::testAnomalies)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRateAnomalyDetectorTest::testPersist", + &CEventRateAnomalyDetectorTest::testPersist)); return suiteOfTests; } diff --git a/lib/model/unittest/CEventRateDataGathererTest.cc b/lib/model/unittest/CEventRateDataGathererTest.cc index aa1341d588..4a6161d1f4 100644 --- a/lib/model/unittest/CEventRateDataGathererTest.cc +++ b/lib/model/unittest/CEventRateDataGathererTest.cc @@ -48,10 +48,12 @@ using TFeatureSizeFeatureDataPrVecPrVec = std::vector; using TSizeSizePrFeatureDataPr = std::pair; using TSizeSizePrFeatureDataPrVec = std::vector; -using TFeatureSizeSizePrFeatureDataPrVecPr = std::pair; +using TFeatureSizeSizePrFeatureDataPrVecPr = + std::pair; using TFeatureSizeSizePrFeatureDataPrVecPrVec = std::vector; using TSizeSizePrStoredStringPtrPr = CBucketGatherer::TSizeSizePrStoredStringPtrPr; -using TSizeSizePrStoredStringPtrPrUInt64UMapVec = CBucketGatherer::TSizeSizePrStoredStringPtrPrUInt64UMapVec; +using TSizeSizePrStoredStringPtrPrUInt64UMapVec = + CBucketGatherer::TSizeSizePrStoredStringPtrPrUInt64UMapVec; using TTimeVec = std::vector; using TStrCPtrVec = CBucketGatherer::TStrCPtrVec; @@ -79,7 +81,10 @@ std::size_t addPerson(CDataGatherer& gatherer, return *result.personId(); } -void addArrival(CDataGatherer& gatherer, CResourceMonitor& resourceMonitor, core_t::TTime time, const std::string& person) { +void addArrival(CDataGatherer& gatherer, + CResourceMonitor& resourceMonitor, + core_t::TTime time, + const std::string& person) { CDataGatherer::TStrCPtrVec fieldValues; fieldValues.push_back(&person); @@ -160,19 +165,10 @@ void testPersistence(const SModelParams& params, const CDataGatherer& gatherer) CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - CDataGatherer restoredGatherer(model_t::E_EventRate, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - key, - traverser); + CDataGatherer restoredGatherer(model_t::E_EventRate, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + TStrVec(), false, key, traverser); // The XML representation of the new filter should be the // same as the original @@ -202,23 +198,13 @@ void testInfluencerPerFeature(model_t::EFeature feature, features.push_back(feature); TStrVec influencerFieldNames; influencerFieldNames.push_back("IF1"); - CDataGatherer gatherer(model_t::E_EventRate, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - valueField, - influencerFieldNames, - false, - key, - features, - startTime, - 0); + CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, valueField, influencerFieldNames, + false, key, features, startTime, 0); CPPUNIT_ASSERT(!gatherer.isPopulation()); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson(gatherer, resourceMonitor, "p", valueField, 1)); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + addPerson(gatherer, resourceMonitor, "p", valueField, 1)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), gatherer.numberFeatures()); for (std::size_t i = 0u; i < features.size(); ++i) { @@ -246,7 +232,8 @@ void testInfluencerPerFeature(model_t::EFeature feature, core_t::TTime time = startTime; for (std::size_t i = 0, j = 0u; i < data.size(); ++i) { - for (/**/; j < 5 && data[i] >= time + bucketLength; time += bucketLength, ++j, gatherer.timeNow(time)) { + for (/**/; j < 5 && data[i] >= time + bucketLength; + time += bucketLength, ++j, gatherer.timeNow(time)) { LOG_DEBUG(<< "Processing bucket [" << time << ", " << time + bucketLength << ")"); TFeatureSizeFeatureDataPrVecPrVec featureData; @@ -255,18 +242,23 @@ void testInfluencerPerFeature(model_t::EFeature feature, CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); CPPUNIT_ASSERT_EQUAL(feature, featureData[0].first); - CPPUNIT_ASSERT_EQUAL(expected[j], core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL( + expected[j], core::CContainerPrinter::print(featureData[0].second)); testPersistence(params, gatherer); } if (j < 5) { - addArrival(gatherer, resourceMonitor, data[i], "p", influencers[i], valueField.empty() ? EMPTY_STRING : "value"); + addArrival(gatherer, resourceMonitor, data[i], "p", influencers[i], + valueField.empty() ? EMPTY_STRING : "value"); } } } -void importCsvData(CDataGatherer& gatherer, CResourceMonitor& resourceMonitor, const std::string& filename, const TSizeVec& fields) { +void importCsvData(CDataGatherer& gatherer, + CResourceMonitor& resourceMonitor, + const std::string& filename, + const TSizeVec& fields) { using TifstreamPtr = boost::shared_ptr; TifstreamPtr ifs(new std::ifstream(filename.c_str())); CPPUNIT_ASSERT(ifs->is_open()); @@ -312,26 +304,16 @@ void CEventRateDataGathererTest::testLatencyPersist() { // Create a gatherer, no influences TFeatureVec features; features.push_back(model_t::E_IndividualUniqueCountByBucketAndPerson); - CDataGatherer gatherer(model_t::E_EventRate, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - "program", - EMPTY_STRING, - "file", - TStrVec(), - false, - key, - features, - startTime, - 0); + CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + "program", EMPTY_STRING, "file", TStrVec(), + false, key, features, startTime, 0); TSizeVec fields; fields.push_back(2); fields.push_back(1); - importCsvData(gatherer, m_ResourceMonitor, "testfiles/files_users_programs.csv", fields); + importCsvData(gatherer, m_ResourceMonitor, + "testfiles/files_users_programs.csv", fields); testPersistence(params, gatherer); } @@ -341,27 +323,17 @@ void CEventRateDataGathererTest::testLatencyPersist() { TStrVec influencers; influencers.push_back("user"); features.push_back(model_t::E_IndividualUniqueCountByBucketAndPerson); - CDataGatherer gatherer(model_t::E_EventRate, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - "program", - EMPTY_STRING, - "file", - influencers, - false, - key, - features, - startTime, - 0); + CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + "program", EMPTY_STRING, "file", influencers, + false, key, features, startTime, 0); TSizeVec fields; fields.push_back(2); fields.push_back(3); fields.push_back(1); - importCsvData(gatherer, m_ResourceMonitor, "testfiles/files_users_programs.csv", fields); + importCsvData(gatherer, m_ResourceMonitor, + "testfiles/files_users_programs.csv", fields); testPersistence(params, gatherer); } @@ -369,25 +341,15 @@ void CEventRateDataGathererTest::testLatencyPersist() { // Create a gatherer, no influences TFeatureVec features; features.push_back(model_t::E_IndividualNonZeroCountByBucketAndPerson); - CDataGatherer gatherer(model_t::E_EventRate, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - "program", - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - key, - features, - startTime, - 0); + CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + "program", EMPTY_STRING, EMPTY_STRING, TStrVec(), + false, key, features, startTime, 0); TSizeVec fields; fields.push_back(2); - importCsvData(gatherer, m_ResourceMonitor, "testfiles/files_users_programs.csv", fields); + importCsvData(gatherer, m_ResourceMonitor, + "testfiles/files_users_programs.csv", fields); testPersistence(params, gatherer); } @@ -397,26 +359,16 @@ void CEventRateDataGathererTest::testLatencyPersist() { TStrVec influencers; influencers.push_back("user"); features.push_back(model_t::E_IndividualNonZeroCountByBucketAndPerson); - CDataGatherer gatherer(model_t::E_EventRate, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - "program", - EMPTY_STRING, - EMPTY_STRING, - influencers, - false, - key, - features, - startTime, - 0); + CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + "program", EMPTY_STRING, EMPTY_STRING, + influencers, false, key, features, startTime, 0); TSizeVec fields; fields.push_back(2); fields.push_back(3); - importCsvData(gatherer, m_ResourceMonitor, "testfiles/files_users_programs.csv", fields); + importCsvData(gatherer, m_ResourceMonitor, + "testfiles/files_users_programs.csv", fields); testPersistence(params, gatherer); } @@ -432,32 +384,29 @@ void CEventRateDataGathererTest::singleSeriesTests() { SModelParams params(bucketLength); core_t::TTime data[] = { - 1, - 15, - 180, - 190, - 400, + 1, 15, 180, 190, 400, 550, // bucket 1 - 600, - 799, + 600, 799, 1199, // bucket 2 1200, 1250, // bucket 3 // bucket 4 - 2420, - 2480, + 2420, 2480, 2490, // bucket 5 10000 // sentinel }; std::string expectedPersonCounts[] = { - std::string("[(0, 6)]"), std::string("[(0, 3)]"), std::string("[(0, 2)]"), std::string("[(0, 0)]"), std::string("[(0, 3)]")}; + std::string("[(0, 6)]"), std::string("[(0, 3)]"), std::string("[(0, 2)]"), + std::string("[(0, 0)]"), std::string("[(0, 3)]")}; std::string expectedPersonNonZeroCounts[] = { - std::string("[(0, 6)]"), std::string("[(0, 3)]"), std::string("[(0, 2)]"), std::string("[]"), std::string("[(0, 3)]")}; + std::string("[(0, 6)]"), std::string("[(0, 3)]"), + std::string("[(0, 2)]"), std::string("[]"), std::string("[(0, 3)]")}; std::string expectedPersonIndicator[] = { - std::string("[(0, 1)]"), std::string("[(0, 1)]"), std::string("[(0, 1)]"), std::string("[]"), std::string("[(0, 1)]")}; + std::string("[(0, 1)]"), std::string("[(0, 1)]"), + std::string("[(0, 1)]"), std::string("[]"), std::string("[(0, 1)]")}; // Test the count by bucket and person and bad feature // (which should be ignored). @@ -465,21 +414,10 @@ void CEventRateDataGathererTest::singleSeriesTests() { TFeatureVec features; features.push_back(model_t::E_IndividualCountByBucketAndPerson); features.push_back(model_t::E_IndividualMinByPerson); - CDataGatherer gatherer(model_t::E_EventRate, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - key, - features, - startTime, - 0); + CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + TStrVec(), false, key, features, startTime, 0); CPPUNIT_ASSERT(!gatherer.isPopulation()); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson(gatherer, m_ResourceMonitor, "p")); @@ -504,22 +442,28 @@ void CEventRateDataGathererTest::singleSeriesTests() { CPPUNIT_ASSERT_EQUAL(startTime, gatherer.currentBucketStartTime()); gatherer.currentBucketStartTime(200); - CPPUNIT_ASSERT_EQUAL(static_cast(200), gatherer.currentBucketStartTime()); + CPPUNIT_ASSERT_EQUAL(static_cast(200), + gatherer.currentBucketStartTime()); gatherer.currentBucketStartTime(startTime); CPPUNIT_ASSERT_EQUAL(bucketLength, gatherer.bucketLength()); core_t::TTime time = startTime; for (std::size_t i = 0, j = 0u; i < boost::size(data); ++i) { - for (/**/; j < 5 && data[i] >= time + bucketLength; time += bucketLength, ++j, gatherer.timeNow(time)) { - LOG_DEBUG(<< "Processing bucket [" << time << ", " << time + bucketLength << ")"); + for (/**/; j < 5 && data[i] >= time + bucketLength; + time += bucketLength, ++j, gatherer.timeNow(time)) { + LOG_DEBUG(<< "Processing bucket [" << time << ", " + << time + bucketLength << ")"); TFeatureSizeFeatureDataPrVecPrVec featureData; gatherer.featureData(time, bucketLength, featureData); LOG_DEBUG(<< "featureData = " << core::CContainerPrinter::print(featureData)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); - CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualCountByBucketAndPerson, featureData[0].first); - CPPUNIT_ASSERT_EQUAL(expectedPersonCounts[j], core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualCountByBucketAndPerson, + featureData[0].first); + CPPUNIT_ASSERT_EQUAL( + expectedPersonCounts[j], + core::CContainerPrinter::print(featureData[0].second)); testPersistence(params, gatherer); } @@ -535,36 +479,33 @@ void CEventRateDataGathererTest::singleSeriesTests() { TFeatureVec features; features.push_back(model_t::E_IndividualNonZeroCountByBucketAndPerson); features.push_back(model_t::E_IndividualTotalBucketCountByPerson); - CDataGatherer gatherer(model_t::E_EventRate, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - key, - features, - startTime, - 0); + CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + TStrVec(), false, key, features, startTime, 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson(gatherer, m_ResourceMonitor, "p")); core_t::TTime time = startTime; for (std::size_t i = 0, j = 0u; i < boost::size(data); ++i) { - for (/**/; j < 5 && data[i] >= time + bucketLength; time += bucketLength, ++j, gatherer.timeNow(time)) { - LOG_DEBUG(<< "Processing bucket [" << time << ", " << time + bucketLength << ")"); + for (/**/; j < 5 && data[i] >= time + bucketLength; + time += bucketLength, ++j, gatherer.timeNow(time)) { + LOG_DEBUG(<< "Processing bucket [" << time << ", " + << time + bucketLength << ")"); TFeatureSizeFeatureDataPrVecPrVec featureData; gatherer.featureData(time, bucketLength, featureData); LOG_DEBUG(<< "featureData = " << core::CContainerPrinter::print(featureData)); CPPUNIT_ASSERT_EQUAL(std::size_t(2), featureData.size()); - CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualNonZeroCountByBucketAndPerson, featureData[0].first); - CPPUNIT_ASSERT_EQUAL(expectedPersonNonZeroCounts[j], core::CContainerPrinter::print(featureData[0].second)); - CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualTotalBucketCountByPerson, featureData[1].first); - CPPUNIT_ASSERT_EQUAL(expectedPersonNonZeroCounts[j], core::CContainerPrinter::print(featureData[1].second)); + CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualNonZeroCountByBucketAndPerson, + featureData[0].first); + CPPUNIT_ASSERT_EQUAL( + expectedPersonNonZeroCounts[j], + core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualTotalBucketCountByPerson, + featureData[1].first); + CPPUNIT_ASSERT_EQUAL( + expectedPersonNonZeroCounts[j], + core::CContainerPrinter::print(featureData[1].second)); testPersistence(params, gatherer); } @@ -579,34 +520,28 @@ void CEventRateDataGathererTest::singleSeriesTests() { { TFeatureVec features; features.push_back(model_t::E_IndividualIndicatorOfBucketPerson); - CDataGatherer gatherer(model_t::E_EventRate, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - key, - features, - startTime, - 0); + CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + TStrVec(), false, key, features, startTime, 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson(gatherer, m_ResourceMonitor, "p")); core_t::TTime time = startTime; for (std::size_t i = 0, j = 0u; i < boost::size(data); ++i) { - for (/**/; j < 5 && data[i] >= time + bucketLength; time += bucketLength, ++j, gatherer.timeNow(time)) { - LOG_DEBUG(<< "Processing bucket [" << time << ", " << time + bucketLength << ")"); + for (/**/; j < 5 && data[i] >= time + bucketLength; + time += bucketLength, ++j, gatherer.timeNow(time)) { + LOG_DEBUG(<< "Processing bucket [" << time << ", " + << time + bucketLength << ")"); TFeatureSizeFeatureDataPrVecPrVec featureData; gatherer.featureData(time, bucketLength, featureData); LOG_DEBUG(<< "featureData = " << core::CContainerPrinter::print(featureData)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); - CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualIndicatorOfBucketPerson, featureData[0].first); - CPPUNIT_ASSERT_EQUAL(expectedPersonIndicator[j], core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualIndicatorOfBucketPerson, + featureData[0].first); + CPPUNIT_ASSERT_EQUAL( + expectedPersonIndicator[j], + core::CContainerPrinter::print(featureData[0].second)); testPersistence(params, gatherer); } @@ -628,20 +563,14 @@ void CEventRateDataGathererTest::multipleSeriesTests() { const core_t::TTime bucketLength = 600; core_t::TTime data1[] = { - 1, - 15, - 180, - 190, - 400, + 1, 15, 180, 190, 400, 550, // bucket 1 - 600, - 799, + 600, 799, 1199, // bucket 2 1200, 1250, // bucket 3 1900, // bucket 4 - 2420, - 2480, + 2420, 2480, 2490, // bucket 5 10000 // sentinel }; @@ -654,47 +583,40 @@ void CEventRateDataGathererTest::multipleSeriesTests() { 10000 // sentinel }; - std::string expectedPersonCounts[] = {std::string("[(0, 6), (1, 8)]"), - std::string("[(0, 3), (1, 5)]"), - std::string("[(0, 2), (1, 6)]"), - std::string("[(0, 1), (1, 2)]"), - std::string("[(0, 3), (1, 6)]")}; + std::string expectedPersonCounts[] = { + std::string("[(0, 6), (1, 8)]"), std::string("[(0, 3), (1, 5)]"), + std::string("[(0, 2), (1, 6)]"), std::string("[(0, 1), (1, 2)]"), + std::string("[(0, 3), (1, 6)]")}; SModelParams params(bucketLength); { TFeatureVec features; features.push_back(model_t::E_IndividualCountByBucketAndPerson); - CDataGatherer gatherer(model_t::E_EventRate, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - key, - features, - startTime, - 0); + CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + TStrVec(), false, key, features, startTime, 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson(gatherer, m_ResourceMonitor, "p1")); CPPUNIT_ASSERT_EQUAL(std::size_t(1), addPerson(gatherer, m_ResourceMonitor, "p2")); core_t::TTime time = startTime; std::size_t i1 = 0u, i2 = 0u, j = 0u; for (;;) { - for (/**/; j < 5 && std::min(data1[i1], data2[i2]) >= time + bucketLength; time += bucketLength, ++j) { - LOG_DEBUG(<< "Processing bucket [" << time << ", " << time + bucketLength << ")"); + for (/**/; j < 5 && std::min(data1[i1], data2[i2]) >= time + bucketLength; + time += bucketLength, ++j) { + LOG_DEBUG(<< "Processing bucket [" << time << ", " + << time + bucketLength << ")"); TFeatureSizeFeatureDataPrVecPrVec featureData; gatherer.featureData(time, bucketLength, featureData); LOG_DEBUG(<< "featureData = " << core::CContainerPrinter::print(featureData)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); - CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualCountByBucketAndPerson, featureData[0].first); - CPPUNIT_ASSERT_EQUAL(expectedPersonCounts[j], core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualCountByBucketAndPerson, + featureData[0].first); + CPPUNIT_ASSERT_EQUAL( + expectedPersonCounts[j], + core::CContainerPrinter::print(featureData[0].second)); testPersistence(params, gatherer); } @@ -731,28 +653,19 @@ void CEventRateDataGathererTest::multipleSeriesTests() { gatherer.featureData(startTime + 4 * bucketLength, bucketLength, featureData); LOG_DEBUG(<< "featureData = " << core::CContainerPrinter::print(featureData)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); - CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualCountByBucketAndPerson, featureData[0].first); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 3)]"), core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualCountByBucketAndPerson, + featureData[0].first); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 3)]"), + core::CContainerPrinter::print(featureData[0].second)); } { TFeatureVec features; features.push_back(model_t::E_IndividualCountByBucketAndPerson); - CDataGatherer gatherer(model_t::E_EventRate, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - key, - features, - startTime, - 0); + CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + TStrVec(), false, key, features, startTime, 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson(gatherer, m_ResourceMonitor, "p1")); CPPUNIT_ASSERT_EQUAL(std::size_t(1), addPerson(gatherer, m_ResourceMonitor, "p2")); CPPUNIT_ASSERT_EQUAL(std::size_t(2), addPerson(gatherer, m_ResourceMonitor, "p3")); @@ -772,7 +685,8 @@ void CEventRateDataGathererTest::multipleSeriesTests() { gatherer.featureData(startTime, bucketLength, featureData); LOG_DEBUG(<< "featureData = " << core::CContainerPrinter::print(featureData)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); - CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualCountByBucketAndPerson, featureData[0].first); + CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualCountByBucketAndPerson, + featureData[0].first); CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1), (1, 1), (2, 2), (3, 1), (4, 3)]"), core::CContainerPrinter::print(featureData[0].second)); @@ -798,8 +712,10 @@ void CEventRateDataGathererTest::multipleSeriesTests() { gatherer.featureData(startTime, bucketLength, featureData); LOG_DEBUG(<< "featureData = " << core::CContainerPrinter::print(featureData)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); - CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualCountByBucketAndPerson, featureData[0].first); - CPPUNIT_ASSERT_EQUAL(std::string("[(2, 2), (4, 3)]"), core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualCountByBucketAndPerson, + featureData[0].first); + CPPUNIT_ASSERT_EQUAL(std::string("[(2, 2), (4, 3)]"), + core::CContainerPrinter::print(featureData[0].second)); } } @@ -819,21 +735,10 @@ void CEventRateDataGathererTest::testRemovePeople() { features.push_back(model_t::E_IndividualLowCountsByBucketAndPerson); features.push_back(model_t::E_IndividualHighCountsByBucketAndPerson); SModelParams params(bucketLength); - CDataGatherer gatherer(model_t::E_EventRate, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - key, - features, - startTime, - 0); + CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), + false, key, features, startTime, 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson(gatherer, m_ResourceMonitor, "p1")); CPPUNIT_ASSERT_EQUAL(std::size_t(1), addPerson(gatherer, m_ResourceMonitor, "p2")); CPPUNIT_ASSERT_EQUAL(std::size_t(2), addPerson(gatherer, m_ResourceMonitor, "p3")); @@ -846,7 +751,8 @@ void CEventRateDataGathererTest::testRemovePeople() { core_t::TTime counts[] = {0, 3, 5, 2, 0, 5, 7, 10}; for (std::size_t i = 0u; i < boost::size(counts); ++i) { for (core_t::TTime time = 0; time < counts[i]; ++time) { - addArrival(gatherer, m_ResourceMonitor, startTime + time, gatherer.personName(i)); + addArrival(gatherer, m_ResourceMonitor, startTime + time, + gatherer.personName(i)); } } @@ -856,32 +762,28 @@ void CEventRateDataGathererTest::testRemovePeople() { peopleToRemove.push_back(1); gatherer.recyclePeople(peopleToRemove); - CDataGatherer expectedGatherer(model_t::E_EventRate, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - key, - features, - startTime, - 0); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson(expectedGatherer, m_ResourceMonitor, "p3")); - CPPUNIT_ASSERT_EQUAL(std::size_t(1), addPerson(expectedGatherer, m_ResourceMonitor, "p4")); - CPPUNIT_ASSERT_EQUAL(std::size_t(2), addPerson(expectedGatherer, m_ResourceMonitor, "p5")); - CPPUNIT_ASSERT_EQUAL(std::size_t(3), addPerson(expectedGatherer, m_ResourceMonitor, "p6")); - CPPUNIT_ASSERT_EQUAL(std::size_t(4), addPerson(expectedGatherer, m_ResourceMonitor, "p7")); - CPPUNIT_ASSERT_EQUAL(std::size_t(5), addPerson(expectedGatherer, m_ResourceMonitor, "p8")); + CDataGatherer expectedGatherer(model_t::E_EventRate, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + TStrVec(), false, key, features, startTime, 0); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + addPerson(expectedGatherer, m_ResourceMonitor, "p3")); + CPPUNIT_ASSERT_EQUAL(std::size_t(1), + addPerson(expectedGatherer, m_ResourceMonitor, "p4")); + CPPUNIT_ASSERT_EQUAL(std::size_t(2), + addPerson(expectedGatherer, m_ResourceMonitor, "p5")); + CPPUNIT_ASSERT_EQUAL(std::size_t(3), + addPerson(expectedGatherer, m_ResourceMonitor, "p6")); + CPPUNIT_ASSERT_EQUAL(std::size_t(4), + addPerson(expectedGatherer, m_ResourceMonitor, "p7")); + CPPUNIT_ASSERT_EQUAL(std::size_t(5), + addPerson(expectedGatherer, m_ResourceMonitor, "p8")); core_t::TTime expectedCounts[] = {5, 2, 0, 5, 7, 10}; for (std::size_t i = 0u; i < boost::size(expectedCounts); ++i) { for (core_t::TTime time = 0; time < expectedCounts[i]; ++time) { - addArrival(expectedGatherer, m_ResourceMonitor, startTime + time, expectedGatherer.personName(i)); + addArrival(expectedGatherer, m_ResourceMonitor, + startTime + time, expectedGatherer.personName(i)); } } @@ -896,29 +798,22 @@ void CEventRateDataGathererTest::testRemovePeople() { peopleToRemove.push_back(7); gatherer.recyclePeople(peopleToRemove); - CDataGatherer expectedGatherer(model_t::E_EventRate, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - key, - features, - startTime, - 0); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson(expectedGatherer, m_ResourceMonitor, "p3")); - CPPUNIT_ASSERT_EQUAL(std::size_t(1), addPerson(expectedGatherer, m_ResourceMonitor, "p6")); - CPPUNIT_ASSERT_EQUAL(std::size_t(2), addPerson(expectedGatherer, m_ResourceMonitor, "p7")); + CDataGatherer expectedGatherer(model_t::E_EventRate, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + TStrVec(), false, key, features, startTime, 0); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + addPerson(expectedGatherer, m_ResourceMonitor, "p3")); + CPPUNIT_ASSERT_EQUAL(std::size_t(1), + addPerson(expectedGatherer, m_ResourceMonitor, "p6")); + CPPUNIT_ASSERT_EQUAL(std::size_t(2), + addPerson(expectedGatherer, m_ResourceMonitor, "p7")); core_t::TTime expectedCounts[] = {5, 5, 7}; for (std::size_t i = 0u; i < boost::size(expectedCounts); ++i) { for (core_t::TTime time = 0; time < expectedCounts[i]; ++time) { - addArrival(expectedGatherer, m_ResourceMonitor, startTime + time, expectedGatherer.personName(i)); + addArrival(expectedGatherer, m_ResourceMonitor, + startTime + time, expectedGatherer.personName(i)); } } @@ -933,21 +828,10 @@ void CEventRateDataGathererTest::testRemovePeople() { peopleToRemove.push_back(6); gatherer.recyclePeople(peopleToRemove); - CDataGatherer expectedGatherer(model_t::E_EventRate, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - key, - features, - startTime, - 0); + CDataGatherer expectedGatherer(model_t::E_EventRate, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + TStrVec(), false, key, features, startTime, 0); LOG_DEBUG(<< "checksum = " << gatherer.checksum()); LOG_DEBUG(<< "expected checksum = " << expectedGatherer.checksum()); @@ -958,9 +842,11 @@ void CEventRateDataGathererTest::testRemovePeople() { expectedRecycled.push_back(addPerson(gatherer, m_ResourceMonitor, "p1")); expectedRecycled.push_back(addPerson(gatherer, m_ResourceMonitor, "p7")); - LOG_DEBUG(<< "recycled = " << core::CContainerPrinter::print(gatherer.recycledPersonIds())); + LOG_DEBUG(<< "recycled = " + << core::CContainerPrinter::print(gatherer.recycledPersonIds())); LOG_DEBUG(<< "expected recycled = " << core::CContainerPrinter::print(expectedRecycled)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedRecycled), core::CContainerPrinter::print(gatherer.recycledPersonIds())); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedRecycled), + core::CContainerPrinter::print(gatherer.recycledPersonIds())); } void CEventRateDataGathererTest::singleSeriesOutOfOrderFinalResultTests() { @@ -976,66 +862,56 @@ void CEventRateDataGathererTest::singleSeriesOutOfOrderFinalResultTests() { params.s_LatencyBuckets = latencyBuckets; core_t::TTime data[] = { - 1, - 180, - 1200, - 190, - 400, + 1, 180, 1200, 190, 400, 600, // bucket 1, 2 & 3 - 550, - 799, - 1199, + 550, 799, 1199, 15, // bucket 1 & 2 2490, // bucket 5 // bucket 4 is empty - 2420, - 2480, + 2420, 2480, 1250, // bucket 3 & 5 10000 // sentinel }; std::string expectedPersonCounts[] = { - std::string("[(0, 6)]"), std::string("[(0, 3)]"), std::string("[(0, 2)]"), std::string("[(0, 0)]"), std::string("[(0, 3)]")}; + std::string("[(0, 6)]"), std::string("[(0, 3)]"), std::string("[(0, 2)]"), + std::string("[(0, 0)]"), std::string("[(0, 3)]")}; std::string expectedPersonNonZeroCounts[] = { - std::string("[(0, 6)]"), std::string("[(0, 3)]"), std::string("[(0, 2)]"), std::string("[]"), std::string("[(0, 3)]")}; + std::string("[(0, 6)]"), std::string("[(0, 3)]"), + std::string("[(0, 2)]"), std::string("[]"), std::string("[(0, 3)]")}; std::string expectedPersonIndicator[] = { - std::string("[(0, 1)]"), std::string("[(0, 1)]"), std::string("[(0, 1)]"), std::string("[]"), std::string("[(0, 1)]")}; + std::string("[(0, 1)]"), std::string("[(0, 1)]"), + std::string("[(0, 1)]"), std::string("[]"), std::string("[(0, 1)]")}; // Test the count by bucket and person and bad feature // (which should be ignored). { TFeatureVec features; features.push_back(model_t::E_IndividualCountByBucketAndPerson); - CDataGatherer gatherer(model_t::E_EventRate, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - key, - features, - startTime, - 0); + CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + TStrVec(), false, key, features, startTime, 0); addPerson(gatherer, m_ResourceMonitor, "p"); core_t::TTime time = startTime; for (std::size_t i = 0, j = 0u; i < boost::size(data); ++i) { - for (/**/; j < 5 && data[i] >= time + latencyTime; time += bucketLength, ++j, gatherer.timeNow(time)) { - LOG_DEBUG(<< "Processing bucket [" << time << ", " << time + bucketLength << ")"); + for (/**/; j < 5 && data[i] >= time + latencyTime; + time += bucketLength, ++j, gatherer.timeNow(time)) { + LOG_DEBUG(<< "Processing bucket [" << time << ", " + << time + bucketLength << ")"); TFeatureSizeFeatureDataPrVecPrVec featureData; gatherer.featureData(time, bucketLength, featureData); LOG_DEBUG(<< "featureData = " << core::CContainerPrinter::print(featureData)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); - CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualCountByBucketAndPerson, featureData[0].first); - CPPUNIT_ASSERT_EQUAL(expectedPersonCounts[j], core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualCountByBucketAndPerson, + featureData[0].first); + CPPUNIT_ASSERT_EQUAL( + expectedPersonCounts[j], + core::CContainerPrinter::print(featureData[0].second)); testPersistence(params, gatherer); } @@ -1052,36 +928,33 @@ void CEventRateDataGathererTest::singleSeriesOutOfOrderFinalResultTests() { TFeatureVec features; features.push_back(model_t::E_IndividualNonZeroCountByBucketAndPerson); features.push_back(model_t::E_IndividualTotalBucketCountByPerson); - CDataGatherer gatherer(model_t::E_EventRate, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - key, - features, - startTime, - 0); + CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + TStrVec(), false, key, features, startTime, 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson(gatherer, m_ResourceMonitor, "p")); core_t::TTime time = startTime; for (std::size_t i = 0, j = 0u; i < boost::size(data); ++i) { - for (/**/; j < 5 && data[i] >= time + latencyTime; time += bucketLength, ++j, gatherer.timeNow(time)) { - LOG_DEBUG(<< "Processing bucket [" << time << ", " << time + bucketLength << ")"); + for (/**/; j < 5 && data[i] >= time + latencyTime; + time += bucketLength, ++j, gatherer.timeNow(time)) { + LOG_DEBUG(<< "Processing bucket [" << time << ", " + << time + bucketLength << ")"); TFeatureSizeFeatureDataPrVecPrVec featureData; gatherer.featureData(time, bucketLength, featureData); LOG_DEBUG(<< "featureData = " << core::CContainerPrinter::print(featureData)); CPPUNIT_ASSERT_EQUAL(std::size_t(2), featureData.size()); - CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualNonZeroCountByBucketAndPerson, featureData[0].first); - CPPUNIT_ASSERT_EQUAL(expectedPersonNonZeroCounts[j], core::CContainerPrinter::print(featureData[0].second)); - CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualTotalBucketCountByPerson, featureData[1].first); - CPPUNIT_ASSERT_EQUAL(expectedPersonNonZeroCounts[j], core::CContainerPrinter::print(featureData[1].second)); + CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualNonZeroCountByBucketAndPerson, + featureData[0].first); + CPPUNIT_ASSERT_EQUAL( + expectedPersonNonZeroCounts[j], + core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualTotalBucketCountByPerson, + featureData[1].first); + CPPUNIT_ASSERT_EQUAL( + expectedPersonNonZeroCounts[j], + core::CContainerPrinter::print(featureData[1].second)); testPersistence(params, gatherer); } @@ -1096,34 +969,28 @@ void CEventRateDataGathererTest::singleSeriesOutOfOrderFinalResultTests() { { TFeatureVec features; features.push_back(model_t::E_IndividualIndicatorOfBucketPerson); - CDataGatherer gatherer(model_t::E_EventRate, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - key, - features, - startTime, - 0); + CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + TStrVec(), false, key, features, startTime, 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson(gatherer, m_ResourceMonitor, "p")); core_t::TTime time = startTime; for (std::size_t i = 0, j = 0u; i < boost::size(data); ++i) { - for (/**/; j < 5 && data[i] >= time + latencyTime; time += bucketLength, ++j, gatherer.timeNow(time)) { - LOG_DEBUG(<< "Processing bucket [" << time << ", " << time + bucketLength << ")"); + for (/**/; j < 5 && data[i] >= time + latencyTime; + time += bucketLength, ++j, gatherer.timeNow(time)) { + LOG_DEBUG(<< "Processing bucket [" << time << ", " + << time + bucketLength << ")"); TFeatureSizeFeatureDataPrVecPrVec featureData; gatherer.featureData(time, bucketLength, featureData); LOG_DEBUG(<< "featureData = " << core::CContainerPrinter::print(featureData)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); - CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualIndicatorOfBucketPerson, featureData[0].first); - CPPUNIT_ASSERT_EQUAL(expectedPersonIndicator[j], core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualIndicatorOfBucketPerson, + featureData[0].first); + CPPUNIT_ASSERT_EQUAL( + expectedPersonIndicator[j], + core::CContainerPrinter::print(featureData[0].second)); testPersistence(params, gatherer); } @@ -1145,8 +1012,7 @@ void CEventRateDataGathererTest::singleSeriesOutOfOrderInterimResultTests() { params.s_LatencyBuckets = latencyBuckets; core_t::TTime data[] = { - 1, - 1200, + 1, 1200, 600, // bucket 1, 3 & 2 1199, 15, // bucket 2 & 1 @@ -1158,21 +1024,10 @@ void CEventRateDataGathererTest::singleSeriesOutOfOrderInterimResultTests() { TFeatureVec features; features.push_back(model_t::E_IndividualCountByBucketAndPerson); - CDataGatherer gatherer(model_t::E_EventRate, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - key, - features, - startTime, - 0); + CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), + false, key, features, startTime, 0); addPerson(gatherer, m_ResourceMonitor, "p"); TFeatureSizeFeatureDataPrVecPrVec featureData; @@ -1180,77 +1035,99 @@ void CEventRateDataGathererTest::singleSeriesOutOfOrderInterimResultTests() { addArrival(gatherer, m_ResourceMonitor, data[0], "p"); gatherer.featureData(0, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), + core::CContainerPrinter::print(featureData[0].second)); // Bucket 1, 2 & 3 addArrival(gatherer, m_ResourceMonitor, data[1], "p"); gatherer.featureData(0, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), + core::CContainerPrinter::print(featureData[0].second)); gatherer.featureData(600, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 0)]"), core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 0)]"), + core::CContainerPrinter::print(featureData[0].second)); gatherer.featureData(1200, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), + core::CContainerPrinter::print(featureData[0].second)); // Bucket 1, 2 & 3 addArrival(gatherer, m_ResourceMonitor, data[2], "p"); gatherer.featureData(0, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), + core::CContainerPrinter::print(featureData[0].second)); gatherer.featureData(600, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), + core::CContainerPrinter::print(featureData[0].second)); gatherer.featureData(1200, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), + core::CContainerPrinter::print(featureData[0].second)); // Bucket 1, 2 & 3 addArrival(gatherer, m_ResourceMonitor, data[3], "p"); gatherer.featureData(0, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), + core::CContainerPrinter::print(featureData[0].second)); gatherer.featureData(600, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 2)]"), core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 2)]"), + core::CContainerPrinter::print(featureData[0].second)); gatherer.featureData(1200, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), + core::CContainerPrinter::print(featureData[0].second)); // Bucket 1, 2 & 3 addArrival(gatherer, m_ResourceMonitor, data[4], "p"); gatherer.featureData(0, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 2)]"), core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 2)]"), + core::CContainerPrinter::print(featureData[0].second)); gatherer.featureData(600, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 2)]"), core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 2)]"), + core::CContainerPrinter::print(featureData[0].second)); gatherer.featureData(1200, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), + core::CContainerPrinter::print(featureData[0].second)); // Bucket 3, 4 & 5 addArrival(gatherer, m_ResourceMonitor, data[5], "p"); gatherer.featureData(1200, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), + core::CContainerPrinter::print(featureData[0].second)); gatherer.featureData(1800, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 0)]"), core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 0)]"), + core::CContainerPrinter::print(featureData[0].second)); gatherer.featureData(2400, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), + core::CContainerPrinter::print(featureData[0].second)); // Bucket 3, 4 & 5 addArrival(gatherer, m_ResourceMonitor, data[6], "p"); gatherer.featureData(1200, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), + core::CContainerPrinter::print(featureData[0].second)); gatherer.featureData(1800, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 0)]"), core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 0)]"), + core::CContainerPrinter::print(featureData[0].second)); gatherer.featureData(2400, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 2)]"), core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 2)]"), + core::CContainerPrinter::print(featureData[0].second)); // Bucket 3, 4 & 5 addArrival(gatherer, m_ResourceMonitor, data[7], "p"); gatherer.featureData(1200, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 2)]"), core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 2)]"), + core::CContainerPrinter::print(featureData[0].second)); gatherer.featureData(1800, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 0)]"), core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 0)]"), + core::CContainerPrinter::print(featureData[0].second)); gatherer.featureData(2400, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 2)]"), core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 2)]"), + core::CContainerPrinter::print(featureData[0].second)); } void CEventRateDataGathererTest::multipleSeriesOutOfOrderFinalResultTests() { @@ -1267,20 +1144,14 @@ void CEventRateDataGathererTest::multipleSeriesOutOfOrderFinalResultTests() { params.s_LatencyBuckets = latencyBuckets; core_t::TTime data1[] = { - 1, - 15, - 1200, - 190, - 400, + 1, 15, 1200, 190, 400, 550, // bucket 1, 2 & 3 - 600, - 1250, + 600, 1250, 1199, // bucket 2 & 3 180, 799, // bucket 1 & 2 2480, // bucket 5 - 2420, - 1900, + 2420, 1900, 2490, // bucket 4 & 5 10000 // sentinel }; @@ -1293,45 +1164,38 @@ void CEventRateDataGathererTest::multipleSeriesOutOfOrderFinalResultTests() { 10000 // sentinel }; - std::string expectedPersonCounts[] = {std::string("[(0, 6), (1, 8)]"), - std::string("[(0, 3), (1, 5)]"), - std::string("[(0, 2), (1, 6)]"), - std::string("[(0, 1), (1, 2)]"), - std::string("[(0, 3), (1, 6)]")}; + std::string expectedPersonCounts[] = { + std::string("[(0, 6), (1, 8)]"), std::string("[(0, 3), (1, 5)]"), + std::string("[(0, 2), (1, 6)]"), std::string("[(0, 1), (1, 2)]"), + std::string("[(0, 3), (1, 6)]")}; { TFeatureVec features; features.push_back(model_t::E_IndividualCountByBucketAndPerson); - CDataGatherer gatherer(model_t::E_EventRate, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - key, - features, - startTime, - 0); + CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + TStrVec(), false, key, features, startTime, 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson(gatherer, m_ResourceMonitor, "p1")); CPPUNIT_ASSERT_EQUAL(std::size_t(1), addPerson(gatherer, m_ResourceMonitor, "p2")); core_t::TTime time = startTime; std::size_t i1 = 0u, i2 = 0u, j = 0u; for (;;) { - for (/**/; j < 5 && std::min(data1[i1], data2[i2]) >= time + latencyTime; time += bucketLength, ++j) { - LOG_DEBUG(<< "Processing bucket [" << time << ", " << time + bucketLength << ")"); + for (/**/; j < 5 && std::min(data1[i1], data2[i2]) >= time + latencyTime; + time += bucketLength, ++j) { + LOG_DEBUG(<< "Processing bucket [" << time << ", " + << time + bucketLength << ")"); TFeatureSizeFeatureDataPrVecPrVec featureData; gatherer.featureData(time, bucketLength, featureData); LOG_DEBUG(<< "featureData = " << core::CContainerPrinter::print(featureData)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); - CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualCountByBucketAndPerson, featureData[0].first); - CPPUNIT_ASSERT_EQUAL(expectedPersonCounts[j], core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualCountByBucketAndPerson, + featureData[0].first); + CPPUNIT_ASSERT_EQUAL( + expectedPersonCounts[j], + core::CContainerPrinter::print(featureData[0].second)); testPersistence(params, gatherer); } @@ -1367,28 +1231,19 @@ void CEventRateDataGathererTest::multipleSeriesOutOfOrderFinalResultTests() { gatherer.featureData(startTime + 4 * bucketLength, bucketLength, featureData); LOG_DEBUG(<< "featureData = " << core::CContainerPrinter::print(featureData)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); - CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualCountByBucketAndPerson, featureData[0].first); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 3)]"), core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualCountByBucketAndPerson, + featureData[0].first); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 3)]"), + core::CContainerPrinter::print(featureData[0].second)); } { TFeatureVec features; features.push_back(model_t::E_IndividualCountByBucketAndPerson); - CDataGatherer gatherer(model_t::E_EventRate, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - key, - features, - startTime, - 0); + CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + TStrVec(), false, key, features, startTime, 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson(gatherer, m_ResourceMonitor, "p1")); CPPUNIT_ASSERT_EQUAL(std::size_t(1), addPerson(gatherer, m_ResourceMonitor, "p2")); CPPUNIT_ASSERT_EQUAL(std::size_t(2), addPerson(gatherer, m_ResourceMonitor, "p3")); @@ -1408,7 +1263,8 @@ void CEventRateDataGathererTest::multipleSeriesOutOfOrderFinalResultTests() { gatherer.featureData(startTime, bucketLength, featureData); LOG_DEBUG(<< "featureData = " << core::CContainerPrinter::print(featureData)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); - CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualCountByBucketAndPerson, featureData[0].first); + CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualCountByBucketAndPerson, + featureData[0].first); CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1), (1, 1), (2, 2), (3, 1), (4, 3)]"), core::CContainerPrinter::print(featureData[0].second)); @@ -1433,8 +1289,10 @@ void CEventRateDataGathererTest::multipleSeriesOutOfOrderFinalResultTests() { gatherer.featureData(startTime, bucketLength, featureData); LOG_DEBUG(<< "featureData = " << core::CContainerPrinter::print(featureData)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); - CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualCountByBucketAndPerson, featureData[0].first); - CPPUNIT_ASSERT_EQUAL(std::string("[(2, 2), (4, 3)]"), core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualCountByBucketAndPerson, + featureData[0].first); + CPPUNIT_ASSERT_EQUAL(std::string("[(2, 2), (4, 3)]"), + core::CContainerPrinter::print(featureData[0].second)); } } @@ -1454,21 +1312,10 @@ void CEventRateDataGathererTest::testArrivalBeforeLatencyWindowIsIgnored() { TFeatureVec features; features.push_back(model_t::E_IndividualCountByBucketAndPerson); - CDataGatherer gatherer(model_t::E_EventRate, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - key, - features, - startTime, - 0); + CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), + false, key, features, startTime, 0); addPerson(gatherer, m_ResourceMonitor, "p"); addArrival(gatherer, m_ResourceMonitor, data[0], "p"); @@ -1480,13 +1327,16 @@ void CEventRateDataGathererTest::testArrivalBeforeLatencyWindowIsIgnored() { CPPUNIT_ASSERT_EQUAL(std::size_t(0), featureData.size()); gatherer.featureData(600, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 0)]"), core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 0)]"), + core::CContainerPrinter::print(featureData[0].second)); gatherer.featureData(1200, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 0)]"), core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 0)]"), + core::CContainerPrinter::print(featureData[0].second)); gatherer.featureData(1800, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), + core::CContainerPrinter::print(featureData[0].second)); } void CEventRateDataGathererTest::testResetBucketGivenSingleSeries() { @@ -1501,29 +1351,17 @@ void CEventRateDataGathererTest::testResetBucketGivenSingleSeries() { core_t::TTime data[] = { 100, 300, // Bucket 1 - 600, - 800, + 600, 800, 850, // Bucket 2 1200 // Bucket 3 }; TFeatureVec features; features.push_back(model_t::E_IndividualCountByBucketAndPerson); - CDataGatherer gatherer(model_t::E_EventRate, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - key, - features, - startTime, - 0); + CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), + false, key, features, startTime, 0); addPerson(gatherer, m_ResourceMonitor, "p"); for (std::size_t i = 0; i < boost::size(data); ++i) { @@ -1533,24 +1371,30 @@ void CEventRateDataGathererTest::testResetBucketGivenSingleSeries() { TFeatureSizeFeatureDataPrVecPrVec featureData; gatherer.featureData(0, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 2)]"), core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 2)]"), + core::CContainerPrinter::print(featureData[0].second)); gatherer.featureData(600, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 3)]"), core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 3)]"), + core::CContainerPrinter::print(featureData[0].second)); gatherer.featureData(1200, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), + core::CContainerPrinter::print(featureData[0].second)); gatherer.resetBucket(600); gatherer.featureData(0, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 2)]"), core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 2)]"), + core::CContainerPrinter::print(featureData[0].second)); gatherer.featureData(600, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 0)]"), core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 0)]"), + core::CContainerPrinter::print(featureData[0].second)); gatherer.featureData(1200, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1)]"), + core::CContainerPrinter::print(featureData[0].second)); } void CEventRateDataGathererTest::testResetBucketGivenMultipleSeries() { @@ -1565,29 +1409,17 @@ void CEventRateDataGathererTest::testResetBucketGivenMultipleSeries() { core_t::TTime data[] = { 100, 300, // Bucket 1 - 600, - 800, + 600, 800, 850, // Bucket 2 1200 // Bucket 3 }; TFeatureVec features; features.push_back(model_t::E_IndividualCountByBucketAndPerson); - CDataGatherer gatherer(model_t::E_EventRate, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - key, - features, - startTime, - 0); + CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), + false, key, features, startTime, 0); addPerson(gatherer, m_ResourceMonitor, "p1"); addPerson(gatherer, m_ResourceMonitor, "p2"); addPerson(gatherer, m_ResourceMonitor, "p3"); @@ -1601,24 +1433,30 @@ void CEventRateDataGathererTest::testResetBucketGivenMultipleSeries() { TFeatureSizeFeatureDataPrVecPrVec featureData; gatherer.featureData(0, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 2), (1, 2), (2, 2)]"), core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 2), (1, 2), (2, 2)]"), + core::CContainerPrinter::print(featureData[0].second)); gatherer.featureData(600, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 3), (1, 3), (2, 3)]"), core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 3), (1, 3), (2, 3)]"), + core::CContainerPrinter::print(featureData[0].second)); gatherer.featureData(1200, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1), (1, 1), (2, 1)]"), core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1), (1, 1), (2, 1)]"), + core::CContainerPrinter::print(featureData[0].second)); gatherer.resetBucket(600); gatherer.featureData(0, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 2), (1, 2), (2, 2)]"), core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 2), (1, 2), (2, 2)]"), + core::CContainerPrinter::print(featureData[0].second)); gatherer.featureData(600, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 0), (1, 0), (2, 0)]"), core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 0), (1, 0), (2, 0)]"), + core::CContainerPrinter::print(featureData[0].second)); gatherer.featureData(1200, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1), (1, 1), (2, 1)]"), core::CContainerPrinter::print(featureData[0].second)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 1), (1, 1), (2, 1)]"), + core::CContainerPrinter::print(featureData[0].second)); } void CEventRateDataGathererTest::testResetBucketGivenBucketNotAvailable() { @@ -1632,21 +1470,10 @@ void CEventRateDataGathererTest::testResetBucketGivenBucketNotAvailable() { TFeatureVec features; features.push_back(model_t::E_IndividualCountByBucketAndPerson); - CDataGatherer gatherer(model_t::E_EventRate, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - key, - features, - startTime, - 0); + CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), + false, key, features, startTime, 0); addPerson(gatherer, m_ResourceMonitor, "p"); addArrival(gatherer, m_ResourceMonitor, 1200, "p"); @@ -1659,20 +1486,14 @@ void CEventRateDataGathererTest::testResetBucketGivenBucketNotAvailable() { void CEventRateDataGathererTest::testInfluencerBucketStatistics() { core_t::TTime data[] = { - 1, - 15, - 180, - 190, - 400, + 1, 15, 180, 190, 400, 550, // bucket 1 - 600, - 799, + 600, 799, 1199, // bucket 2 1200, 1250, // bucket 3 // bucket 4 - 2420, - 2480, + 2420, 2480, 2490, // bucket 5 10000 // sentinel }; @@ -1680,56 +1501,62 @@ void CEventRateDataGathererTest::testInfluencerBucketStatistics() { TStrVecVec influencers(14, TStrVec(1, "i")); - std::string expectedPersonCounts[] = {std::string("[(0, 6, [[(i, ([6], 1))]])]"), - std::string("[(0, 3, [[(i, ([3], 1))]])]"), - std::string("[(0, 2, [[(i, ([2], 1))]])]"), - std::string("[(0, 0)]"), - std::string("[(0, 3, [[(i, ([3], 1))]])]")}; + std::string expectedPersonCounts[] = { + std::string("[(0, 6, [[(i, ([6], 1))]])]"), + std::string("[(0, 3, [[(i, ([3], 1))]])]"), + std::string("[(0, 2, [[(i, ([2], 1))]])]"), std::string("[(0, 0)]"), + std::string("[(0, 3, [[(i, ([3], 1))]])]")}; TStrVec expectedPersonCountsVec(&expectedPersonCounts[0], &expectedPersonCounts[5]); - std::string expectedPersonNonZeroCounts[] = {std::string("[(0, 6, [[(i, ([6], 1))]])]"), - std::string("[(0, 3, [[(i, ([3], 1))]])]"), - std::string("[(0, 2, [[(i, ([2], 1))]])]"), - std::string("[]"), - std::string("[(0, 3, [[(i, ([3], 1))]])]")}; - TStrVec expectedPersonNonZeroCountsVec(&expectedPersonNonZeroCounts[0], &expectedPersonNonZeroCounts[5]); + std::string expectedPersonNonZeroCounts[] = { + std::string("[(0, 6, [[(i, ([6], 1))]])]"), + std::string("[(0, 3, [[(i, ([3], 1))]])]"), + std::string("[(0, 2, [[(i, ([2], 1))]])]"), std::string("[]"), + std::string("[(0, 3, [[(i, ([3], 1))]])]")}; + TStrVec expectedPersonNonZeroCountsVec(&expectedPersonNonZeroCounts[0], + &expectedPersonNonZeroCounts[5]); - std::string expectedPersonIndicator[] = {std::string("[(0, 1, [[(i, ([1], 1))]])]"), - std::string("[(0, 1, [[(i, ([1], 1))]])]"), - std::string("[(0, 1, [[(i, ([1], 1))]])]"), - std::string("[]"), - std::string("[(0, 1, [[(i, ([1], 1))]])]")}; - TStrVec expectedPersonIndicatorVec(&expectedPersonIndicator[0], &expectedPersonIndicator[5]); + std::string expectedPersonIndicator[] = { + std::string("[(0, 1, [[(i, ([1], 1))]])]"), + std::string("[(0, 1, [[(i, ([1], 1))]])]"), + std::string("[(0, 1, [[(i, ([1], 1))]])]"), std::string("[]"), + std::string("[(0, 1, [[(i, ([1], 1))]])]")}; + TStrVec expectedPersonIndicatorVec(&expectedPersonIndicator[0], + &expectedPersonIndicator[5]); TStrVec expectedArrivalTimeVec(6, std::string("[]")); - std::string expectedInfoContent[] = {std::string("[(0, 13, [[(i, ([13], 1))]])]"), - std::string("[(0, 13, [[(i, ([13], 1))]])]"), - std::string("[(0, 13, [[(i, ([13], 1))]])]"), - std::string("[]"), - std::string("[(0, 13, [[(i, ([13], 1))]])]")}; + std::string expectedInfoContent[] = { + std::string("[(0, 13, [[(i, ([13], 1))]])]"), + std::string("[(0, 13, [[(i, ([13], 1))]])]"), + std::string("[(0, 13, [[(i, ([13], 1))]])]"), std::string("[]"), + std::string("[(0, 13, [[(i, ([13], 1))]])]")}; TStrVec expectedInfoContentVec(&expectedInfoContent[0], &expectedInfoContent[5]); - testInfluencerPerFeature( - model_t::E_IndividualCountByBucketAndPerson, dataVec, influencers, expectedPersonCountsVec, "", m_ResourceMonitor); + testInfluencerPerFeature(model_t::E_IndividualCountByBucketAndPerson, dataVec, + influencers, expectedPersonCountsVec, "", m_ResourceMonitor); - testInfluencerPerFeature( - model_t::E_IndividualNonZeroCountByBucketAndPerson, dataVec, influencers, expectedPersonNonZeroCountsVec, "", m_ResourceMonitor); + testInfluencerPerFeature(model_t::E_IndividualNonZeroCountByBucketAndPerson, + dataVec, influencers, expectedPersonNonZeroCountsVec, + "", m_ResourceMonitor); - testInfluencerPerFeature( - model_t::E_IndividualLowCountsByBucketAndPerson, dataVec, influencers, expectedPersonCountsVec, "", m_ResourceMonitor); + testInfluencerPerFeature(model_t::E_IndividualLowCountsByBucketAndPerson, dataVec, + influencers, expectedPersonCountsVec, "", m_ResourceMonitor); - testInfluencerPerFeature( - model_t::E_IndividualArrivalTimesByPerson, dataVec, influencers, expectedArrivalTimeVec, "", m_ResourceMonitor); + testInfluencerPerFeature(model_t::E_IndividualArrivalTimesByPerson, dataVec, + influencers, expectedArrivalTimeVec, "", m_ResourceMonitor); - testInfluencerPerFeature( - model_t::E_IndividualLowNonZeroCountByBucketAndPerson, dataVec, influencers, expectedPersonNonZeroCountsVec, "", m_ResourceMonitor); + testInfluencerPerFeature(model_t::E_IndividualLowNonZeroCountByBucketAndPerson, + dataVec, influencers, expectedPersonNonZeroCountsVec, + "", m_ResourceMonitor); - testInfluencerPerFeature( - model_t::E_IndividualUniqueCountByBucketAndPerson, dataVec, influencers, expectedPersonIndicatorVec, "value", m_ResourceMonitor); + testInfluencerPerFeature(model_t::E_IndividualUniqueCountByBucketAndPerson, + dataVec, influencers, expectedPersonIndicatorVec, + "value", m_ResourceMonitor); - testInfluencerPerFeature( - model_t::E_IndividualInfoContentByBucketAndPerson, dataVec, influencers, expectedInfoContentVec, "value", m_ResourceMonitor); + testInfluencerPerFeature(model_t::E_IndividualInfoContentByBucketAndPerson, + dataVec, influencers, expectedInfoContentVec, + "value", m_ResourceMonitor); } void CEventRateDataGathererTest::testDistinctStrings() { @@ -1790,7 +1617,8 @@ void CEventRateDataGathererTest::testDistinctStrings() { { SEventRateFeatureData featureData(0); data.populateDistinctCountFeatureData(featureData); - CPPUNIT_ASSERT_EQUAL(std::string("1, [[(inf1, ([1], 1))]]"), featureData.print()); + CPPUNIT_ASSERT_EQUAL(std::string("1, [[(inf1, ([1], 1))]]"), + featureData.print()); } data.insert("str2", influencers); @@ -1806,9 +1634,12 @@ void CEventRateDataGathererTest::testDistinctStrings() { SEventRateFeatureData featureData(0); data.populateDistinctCountFeatureData(featureData); - std::sort(featureData.s_InfluenceValues[0].begin(), featureData.s_InfluenceValues[0].end(), maths::COrderings::SFirstLess()); + std::sort(featureData.s_InfluenceValues[0].begin(), + featureData.s_InfluenceValues[0].end(), + maths::COrderings::SFirstLess()); - CPPUNIT_ASSERT_EQUAL(std::string("3, [[(inf1, ([2], 1)), (inf2, ([2], 1)), (inf3, ([1], 1))]]"), featureData.print()); + CPPUNIT_ASSERT_EQUAL(std::string("3, [[(inf1, ([2], 1)), (inf2, ([2], 1)), (inf3, ([1], 1))]]"), + featureData.print()); } } @@ -1834,7 +1665,8 @@ void CEventRateDataGathererTest::testDistinctStrings() { { SEventRateFeatureData featureData(0); data.populateDistinctCountFeatureData(featureData); - CPPUNIT_ASSERT_EQUAL(std::string("2, [[(inf1, ([2], 1))], []]"), featureData.print()); + CPPUNIT_ASSERT_EQUAL(std::string("2, [[(inf1, ([2], 1))], []]"), + featureData.print()); } influencers[1] = CStringStore::influencers().get("inf_v2"); @@ -1848,8 +1680,9 @@ void CEventRateDataGathererTest::testDistinctStrings() { SEventRateFeatureData featureData(0); data.populateDistinctCountFeatureData(featureData); for (std::size_t i = 0; i < 2; i++) { - std::sort( - featureData.s_InfluenceValues[i].begin(), featureData.s_InfluenceValues[i].end(), maths::COrderings::SFirstLess()); + std::sort(featureData.s_InfluenceValues[i].begin(), + featureData.s_InfluenceValues[i].end(), + maths::COrderings::SFirstLess()); } CPPUNIT_ASSERT_EQUAL(std::string("3, [[(inf1, ([2], 1)), (inf2, ([2], 1))], [(inf_v2, ([1], 1)), (inf_v3, ([2], 1))]]"), featureData.print()); @@ -1888,8 +1721,10 @@ void CEventRateDataGathererTest::testDistinctStrings() { data.insert(ss.str(), influencers); SEventRateFeatureData featureData(0); data.populateInfoContentFeatureData(featureData); - CPPUNIT_ASSERT((featureData.s_Count - 12) >= std::max(uint64_t(3), uint64_t(i))); - CPPUNIT_ASSERT((featureData.s_Count - 12) <= std::max(uint64_t(3), uint64_t(i)) * 3); + CPPUNIT_ASSERT((featureData.s_Count - 12) >= + std::max(uint64_t(3), uint64_t(i))); + CPPUNIT_ASSERT((featureData.s_Count - 12) <= + std::max(uint64_t(3), uint64_t(i)) * 3); } } { @@ -1911,7 +1746,8 @@ void CEventRateDataGathererTest::testDistinctStrings() { { SEventRateFeatureData featureData(0); data.populateInfoContentFeatureData(featureData); - CPPUNIT_ASSERT_EQUAL(std::string("12, [[(inf1, ([12], 1))]]"), featureData.print()); + CPPUNIT_ASSERT_EQUAL(std::string("12, [[(inf1, ([12], 1))]]"), + featureData.print()); } data.insert("str2", influencers); @@ -1927,9 +1763,12 @@ void CEventRateDataGathererTest::testDistinctStrings() { SEventRateFeatureData featureData(0); data.populateInfoContentFeatureData(featureData); - std::sort(featureData.s_InfluenceValues[0].begin(), featureData.s_InfluenceValues[0].end(), maths::COrderings::SFirstLess()); + std::sort(featureData.s_InfluenceValues[0].begin(), + featureData.s_InfluenceValues[0].end(), + maths::COrderings::SFirstLess()); - CPPUNIT_ASSERT_EQUAL(std::string("18, [[(inf1, ([16], 1)), (inf2, ([16], 1)), (inf3, ([12], 1))]]"), featureData.print()); + CPPUNIT_ASSERT_EQUAL(std::string("18, [[(inf1, ([16], 1)), (inf2, ([16], 1)), (inf3, ([12], 1))]]"), + featureData.print()); } } { @@ -1954,7 +1793,8 @@ void CEventRateDataGathererTest::testDistinctStrings() { { SEventRateFeatureData featureData(0); data.populateInfoContentFeatureData(featureData); - CPPUNIT_ASSERT_EQUAL(std::string("16, [[(inf1, ([16], 1))], []]"), featureData.print()); + CPPUNIT_ASSERT_EQUAL(std::string("16, [[(inf1, ([16], 1))], []]"), + featureData.print()); } influencers[1] = CStringStore::influencers().get("inf_v2"); @@ -1968,8 +1808,9 @@ void CEventRateDataGathererTest::testDistinctStrings() { SEventRateFeatureData featureData(0); data.populateInfoContentFeatureData(featureData); for (std::size_t i = 0; i < 2; i++) { - std::sort( - featureData.s_InfluenceValues[i].begin(), featureData.s_InfluenceValues[i].end(), maths::COrderings::SFirstLess()); + std::sort(featureData.s_InfluenceValues[i].begin(), + featureData.s_InfluenceValues[i].end(), + maths::COrderings::SFirstLess()); } CPPUNIT_ASSERT_EQUAL(std::string("18, [[(inf1, ([16], 1)), (inf2, ([16], 1))], [(inf_v2, ([12], 1)), (inf_v3, ([16], 1))]]"), featureData.print()); @@ -1985,24 +1826,14 @@ void CEventRateDataGathererTest::testDistinctStrings() { TFeatureVec features; features.push_back(model_t::E_IndividualUniqueCountByBucketAndPerson); - CDataGatherer gatherer(model_t::E_EventRate, - model_t::E_None, - params, - EMPTY_STRING, - "P", - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - "V", - TStrVec(1, "INF"), - false, - key, - features, - startTime, - 0); + CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, + EMPTY_STRING, "P", EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, "V", TStrVec(1, "INF"), false, key, + features, startTime, 0); CPPUNIT_ASSERT(!gatherer.isPopulation()); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson(gatherer, m_ResourceMonitor, "p", "v", 1)); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + addPerson(gatherer, m_ResourceMonitor, "p", "v", 1)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), gatherer.numberFeatures()); for (std::size_t i = 0u; i < 1; ++i) { @@ -2019,10 +1850,14 @@ void CEventRateDataGathererTest::testDistinctStrings() { testPersistence(params, gatherer); // Add data, some of which will be out of order - addArrival(gatherer, m_ResourceMonitor, time - (2 * bucketLength), "p", "stringOne", "inf1"); - addArrival(gatherer, m_ResourceMonitor, time - (2 * bucketLength), "p", "stringTwo", "inf2"); - addArrival(gatherer, m_ResourceMonitor, time - (1 * bucketLength), "p", "stringThree", "inf3"); - addArrival(gatherer, m_ResourceMonitor, time - (1 * bucketLength), "p", "stringFour", "inf1"); + addArrival(gatherer, m_ResourceMonitor, time - (2 * bucketLength), "p", + "stringOne", "inf1"); + addArrival(gatherer, m_ResourceMonitor, time - (2 * bucketLength), "p", + "stringTwo", "inf2"); + addArrival(gatherer, m_ResourceMonitor, time - (1 * bucketLength), "p", + "stringThree", "inf3"); + addArrival(gatherer, m_ResourceMonitor, time - (1 * bucketLength), "p", + "stringFour", "inf1"); addArrival(gatherer, m_ResourceMonitor, time, "p", "stringFive", "inf2"); addArrival(gatherer, m_ResourceMonitor, time, "p", "stringSix", "inf3"); testPersistence(params, gatherer); @@ -2046,21 +1881,10 @@ void CEventRateDataGathererTest::testDiurnalFeatures() { TFeatureVec features; features.push_back(model_t::E_IndividualTimeOfDayByBucketAndPerson); - CDataGatherer gatherer(model_t::E_EventRate, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - "person", - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - key, - features, - startTime, - 0); + CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + "person", EMPTY_STRING, EMPTY_STRING, TStrVec(), + false, key, features, startTime, 0); CPPUNIT_ASSERT(!gatherer.isPopulation()); @@ -2083,7 +1907,8 @@ void CEventRateDataGathererTest::testDiurnalFeatures() { gatherer.featureData(time, bucketLength, featureData); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData[0].second.size()); - CPPUNIT_ASSERT_EQUAL(uint64_t(time % 86400), featureData[0].second[0].second.s_Count); + CPPUNIT_ASSERT_EQUAL(uint64_t(time % 86400), + featureData[0].second[0].second.s_Count); } { addArrival(gatherer, m_ResourceMonitor, time + 100, person); @@ -2092,7 +1917,8 @@ void CEventRateDataGathererTest::testDiurnalFeatures() { gatherer.featureData(time, bucketLength, featureData); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData[0].second.size()); - CPPUNIT_ASSERT_EQUAL(uint64_t((time % 86400) + 50), featureData[0].second[0].second.s_Count); + CPPUNIT_ASSERT_EQUAL(uint64_t((time % 86400) + 50), + featureData[0].second[0].second.s_Count); } time += bucketLength; { @@ -2102,7 +1928,8 @@ void CEventRateDataGathererTest::testDiurnalFeatures() { gatherer.featureData(time, bucketLength, featureData); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData[0].second.size()); - CPPUNIT_ASSERT_EQUAL(uint64_t(time % 86400), featureData[0].second[0].second.s_Count); + CPPUNIT_ASSERT_EQUAL(uint64_t(time % 86400), + featureData[0].second[0].second.s_Count); } { addArrival(gatherer, m_ResourceMonitor, time + 200, person); @@ -2111,7 +1938,8 @@ void CEventRateDataGathererTest::testDiurnalFeatures() { gatherer.featureData(time, bucketLength, featureData); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData[0].second.size()); - CPPUNIT_ASSERT_EQUAL(uint64_t((time % 86400) + 100), featureData[0].second[0].second.s_Count); + CPPUNIT_ASSERT_EQUAL(uint64_t((time % 86400) + 100), + featureData[0].second[0].second.s_Count); } time += bucketLength; { @@ -2121,7 +1949,8 @@ void CEventRateDataGathererTest::testDiurnalFeatures() { gatherer.featureData(time, bucketLength, featureData); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData[0].second.size()); - CPPUNIT_ASSERT_EQUAL(uint64_t(time % 86400), featureData[0].second[0].second.s_Count); + CPPUNIT_ASSERT_EQUAL(uint64_t(time % 86400), + featureData[0].second[0].second.s_Count); } { addArrival(gatherer, m_ResourceMonitor, time + 300, person); @@ -2130,7 +1959,8 @@ void CEventRateDataGathererTest::testDiurnalFeatures() { gatherer.featureData(time, bucketLength, featureData); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData[0].second.size()); - CPPUNIT_ASSERT_EQUAL(uint64_t((time % 86400) + 150), featureData[0].second[0].second.s_Count); + CPPUNIT_ASSERT_EQUAL(uint64_t((time % 86400) + 150), + featureData[0].second[0].second.s_Count); } // Check latency by going backwards in time @@ -2142,7 +1972,8 @@ void CEventRateDataGathererTest::testDiurnalFeatures() { gatherer.featureData(time, bucketLength, featureData); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData[0].second.size()); - CPPUNIT_ASSERT_EQUAL(uint64_t((time % 86400) + 100), featureData[0].second[0].second.s_Count); + CPPUNIT_ASSERT_EQUAL(uint64_t((time % 86400) + 100), + featureData[0].second[0].second.s_Count); } time += bucketLength; { @@ -2152,7 +1983,8 @@ void CEventRateDataGathererTest::testDiurnalFeatures() { gatherer.featureData(time, bucketLength, featureData); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData[0].second.size()); - CPPUNIT_ASSERT_EQUAL(uint64_t((time % 86400) + 200), featureData[0].second[0].second.s_Count); + CPPUNIT_ASSERT_EQUAL(uint64_t((time % 86400) + 200), + featureData[0].second[0].second.s_Count); } CPPUNIT_ASSERT_EQUAL(std::size_t(1), gatherer.numberActivePeople()); @@ -2172,21 +2004,10 @@ void CEventRateDataGathererTest::testDiurnalFeatures() { TFeatureVec features; features.push_back(model_t::E_IndividualTimeOfWeekByBucketAndPerson); - CDataGatherer gatherer(model_t::E_EventRate, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - "person", - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - key, - features, - startTime, - 0); + CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + "person", EMPTY_STRING, EMPTY_STRING, TStrVec(), + false, key, features, startTime, 0); CPPUNIT_ASSERT(!gatherer.isPopulation()); @@ -2209,7 +2030,8 @@ void CEventRateDataGathererTest::testDiurnalFeatures() { gatherer.featureData(time, bucketLength, featureData); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData[0].second.size()); - CPPUNIT_ASSERT_EQUAL(uint64_t(time % 604800), featureData[0].second[0].second.s_Count); + CPPUNIT_ASSERT_EQUAL(uint64_t(time % 604800), + featureData[0].second[0].second.s_Count); } { addArrival(gatherer, m_ResourceMonitor, time + 100, person); @@ -2218,7 +2040,8 @@ void CEventRateDataGathererTest::testDiurnalFeatures() { gatherer.featureData(time, bucketLength, featureData); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData[0].second.size()); - CPPUNIT_ASSERT_EQUAL(uint64_t((time % 604800) + 50), featureData[0].second[0].second.s_Count); + CPPUNIT_ASSERT_EQUAL(uint64_t((time % 604800) + 50), + featureData[0].second[0].second.s_Count); } time += bucketLength; { @@ -2228,7 +2051,8 @@ void CEventRateDataGathererTest::testDiurnalFeatures() { gatherer.featureData(time, bucketLength, featureData); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData[0].second.size()); - CPPUNIT_ASSERT_EQUAL(uint64_t(time % 604800), featureData[0].second[0].second.s_Count); + CPPUNIT_ASSERT_EQUAL(uint64_t(time % 604800), + featureData[0].second[0].second.s_Count); } { addArrival(gatherer, m_ResourceMonitor, time + 200, person); @@ -2237,7 +2061,8 @@ void CEventRateDataGathererTest::testDiurnalFeatures() { gatherer.featureData(time, bucketLength, featureData); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData[0].second.size()); - CPPUNIT_ASSERT_EQUAL(uint64_t((time % 604800) + 100), featureData[0].second[0].second.s_Count); + CPPUNIT_ASSERT_EQUAL(uint64_t((time % 604800) + 100), + featureData[0].second[0].second.s_Count); } time += bucketLength; { @@ -2247,7 +2072,8 @@ void CEventRateDataGathererTest::testDiurnalFeatures() { gatherer.featureData(time, bucketLength, featureData); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData[0].second.size()); - CPPUNIT_ASSERT_EQUAL(uint64_t(time % 604800), featureData[0].second[0].second.s_Count); + CPPUNIT_ASSERT_EQUAL(uint64_t(time % 604800), + featureData[0].second[0].second.s_Count); } { addArrival(gatherer, m_ResourceMonitor, time + 300, person); @@ -2256,7 +2082,8 @@ void CEventRateDataGathererTest::testDiurnalFeatures() { gatherer.featureData(time, bucketLength, featureData); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData[0].second.size()); - CPPUNIT_ASSERT_EQUAL(uint64_t((time % 604800) + 150), featureData[0].second[0].second.s_Count); + CPPUNIT_ASSERT_EQUAL(uint64_t((time % 604800) + 150), + featureData[0].second[0].second.s_Count); } // Check latency by going backwards in time @@ -2268,7 +2095,8 @@ void CEventRateDataGathererTest::testDiurnalFeatures() { gatherer.featureData(time, bucketLength, featureData); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData[0].second.size()); - CPPUNIT_ASSERT_EQUAL(uint64_t((time % 604800) + 100), featureData[0].second[0].second.s_Count); + CPPUNIT_ASSERT_EQUAL(uint64_t((time % 604800) + 100), + featureData[0].second[0].second.s_Count); } time += bucketLength; { @@ -2278,7 +2106,8 @@ void CEventRateDataGathererTest::testDiurnalFeatures() { gatherer.featureData(time, bucketLength, featureData); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData[0].second.size()); - CPPUNIT_ASSERT_EQUAL(uint64_t((time % 604800) + 200), featureData[0].second[0].second.s_Count); + CPPUNIT_ASSERT_EQUAL(uint64_t((time % 604800) + 200), + featureData[0].second[0].second.s_Count); } CPPUNIT_ASSERT_EQUAL(std::size_t(1), gatherer.numberActivePeople()); @@ -2298,21 +2127,10 @@ void CEventRateDataGathererTest::testDiurnalFeatures() { TFeatureVec features; features.push_back(model_t::E_PopulationTimeOfWeekByBucketPersonAndAttribute); - CDataGatherer gatherer(model_t::E_PopulationEventRate, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - "att", - EMPTY_STRING, - TStrVec(), - false, - key, - features, - startTime, - 0); + CDataGatherer gatherer(model_t::E_PopulationEventRate, model_t::E_None, + params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, "att", EMPTY_STRING, TStrVec(), + false, key, features, startTime, 0); CPPUNIT_ASSERT(gatherer.isPopulation()); @@ -2335,7 +2153,8 @@ void CEventRateDataGathererTest::testDiurnalFeatures() { gatherer.featureData(time, bucketLength, featureData); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData[0].second.size()); - CPPUNIT_ASSERT_EQUAL(uint64_t(time % 604800), featureData[0].second[0].second.s_Count); + CPPUNIT_ASSERT_EQUAL(uint64_t(time % 604800), + featureData[0].second[0].second.s_Count); } { addArrival(gatherer, m_ResourceMonitor, time + 100, person, attribute); @@ -2344,7 +2163,8 @@ void CEventRateDataGathererTest::testDiurnalFeatures() { gatherer.featureData(time, bucketLength, featureData); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData[0].second.size()); - CPPUNIT_ASSERT_EQUAL(uint64_t((time % 604800) + 50), featureData[0].second[0].second.s_Count); + CPPUNIT_ASSERT_EQUAL(uint64_t((time % 604800) + 50), + featureData[0].second[0].second.s_Count); } time += bucketLength; { @@ -2354,7 +2174,8 @@ void CEventRateDataGathererTest::testDiurnalFeatures() { gatherer.featureData(time, bucketLength, featureData); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData[0].second.size()); - CPPUNIT_ASSERT_EQUAL(uint64_t(time % 604800), featureData[0].second[0].second.s_Count); + CPPUNIT_ASSERT_EQUAL(uint64_t(time % 604800), + featureData[0].second[0].second.s_Count); } { addArrival(gatherer, m_ResourceMonitor, time + 200, person, attribute); @@ -2363,7 +2184,8 @@ void CEventRateDataGathererTest::testDiurnalFeatures() { gatherer.featureData(time, bucketLength, featureData); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData[0].second.size()); - CPPUNIT_ASSERT_EQUAL(uint64_t((time % 604800) + 100), featureData[0].second[0].second.s_Count); + CPPUNIT_ASSERT_EQUAL(uint64_t((time % 604800) + 100), + featureData[0].second[0].second.s_Count); } time += bucketLength; { @@ -2373,7 +2195,8 @@ void CEventRateDataGathererTest::testDiurnalFeatures() { gatherer.featureData(time, bucketLength, featureData); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData[0].second.size()); - CPPUNIT_ASSERT_EQUAL(uint64_t(time % 604800), featureData[0].second[0].second.s_Count); + CPPUNIT_ASSERT_EQUAL(uint64_t(time % 604800), + featureData[0].second[0].second.s_Count); } { addArrival(gatherer, m_ResourceMonitor, time + 300, person, attribute); @@ -2382,7 +2205,8 @@ void CEventRateDataGathererTest::testDiurnalFeatures() { gatherer.featureData(time, bucketLength, featureData); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData[0].second.size()); - CPPUNIT_ASSERT_EQUAL(uint64_t((time % 604800) + 150), featureData[0].second[0].second.s_Count); + CPPUNIT_ASSERT_EQUAL(uint64_t((time % 604800) + 150), + featureData[0].second[0].second.s_Count); } // Check latency by going backwards in time @@ -2394,7 +2218,8 @@ void CEventRateDataGathererTest::testDiurnalFeatures() { gatherer.featureData(time, bucketLength, featureData); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData[0].second.size()); - CPPUNIT_ASSERT_EQUAL(uint64_t((time % 604800) + 100), featureData[0].second[0].second.s_Count); + CPPUNIT_ASSERT_EQUAL(uint64_t((time % 604800) + 100), + featureData[0].second[0].second.s_Count); } time += bucketLength; { @@ -2404,7 +2229,8 @@ void CEventRateDataGathererTest::testDiurnalFeatures() { gatherer.featureData(time, bucketLength, featureData); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData[0].second.size()); - CPPUNIT_ASSERT_EQUAL(uint64_t((time % 604800) + 200), featureData[0].second[0].second.s_Count); + CPPUNIT_ASSERT_EQUAL(uint64_t((time % 604800) + 200), + featureData[0].second[0].second.s_Count); } CPPUNIT_ASSERT_EQUAL(std::size_t(1), gatherer.numberActivePeople()); @@ -2425,21 +2251,10 @@ void CEventRateDataGathererTest::testDiurnalFeatures() { TFeatureVec features; features.push_back(model_t::E_PopulationTimeOfDayByBucketPersonAndAttribute); - CDataGatherer gatherer(model_t::E_PopulationEventRate, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - "att", - EMPTY_STRING, - TStrVec(), - false, - key, - features, - startTime, - 0); + CDataGatherer gatherer(model_t::E_PopulationEventRate, model_t::E_None, + params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, "att", EMPTY_STRING, TStrVec(), + false, key, features, startTime, 0); CPPUNIT_ASSERT(gatherer.isPopulation()); @@ -2462,7 +2277,8 @@ void CEventRateDataGathererTest::testDiurnalFeatures() { gatherer.featureData(time, bucketLength, featureData); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData[0].second.size()); - CPPUNIT_ASSERT_EQUAL(uint64_t(time % 86400), featureData[0].second[0].second.s_Count); + CPPUNIT_ASSERT_EQUAL(uint64_t(time % 86400), + featureData[0].second[0].second.s_Count); } { addArrival(gatherer, m_ResourceMonitor, time + 100, person, attribute); @@ -2471,7 +2287,8 @@ void CEventRateDataGathererTest::testDiurnalFeatures() { gatherer.featureData(time, bucketLength, featureData); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData[0].second.size()); - CPPUNIT_ASSERT_EQUAL(uint64_t((time % 86400) + 50), featureData[0].second[0].second.s_Count); + CPPUNIT_ASSERT_EQUAL(uint64_t((time % 86400) + 50), + featureData[0].second[0].second.s_Count); } time += bucketLength; { @@ -2481,7 +2298,8 @@ void CEventRateDataGathererTest::testDiurnalFeatures() { gatherer.featureData(time, bucketLength, featureData); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData[0].second.size()); - CPPUNIT_ASSERT_EQUAL(uint64_t(time % 86400), featureData[0].second[0].second.s_Count); + CPPUNIT_ASSERT_EQUAL(uint64_t(time % 86400), + featureData[0].second[0].second.s_Count); } { addArrival(gatherer, m_ResourceMonitor, time + 200, person, attribute); @@ -2490,7 +2308,8 @@ void CEventRateDataGathererTest::testDiurnalFeatures() { gatherer.featureData(time, bucketLength, featureData); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData[0].second.size()); - CPPUNIT_ASSERT_EQUAL(uint64_t((time % 86400) + 100), featureData[0].second[0].second.s_Count); + CPPUNIT_ASSERT_EQUAL(uint64_t((time % 86400) + 100), + featureData[0].second[0].second.s_Count); } time += bucketLength; { @@ -2500,7 +2319,8 @@ void CEventRateDataGathererTest::testDiurnalFeatures() { gatherer.featureData(time, bucketLength, featureData); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData[0].second.size()); - CPPUNIT_ASSERT_EQUAL(uint64_t(time % 86400), featureData[0].second[0].second.s_Count); + CPPUNIT_ASSERT_EQUAL(uint64_t(time % 86400), + featureData[0].second[0].second.s_Count); } { addArrival(gatherer, m_ResourceMonitor, time + 300, person, attribute); @@ -2509,7 +2329,8 @@ void CEventRateDataGathererTest::testDiurnalFeatures() { gatherer.featureData(time, bucketLength, featureData); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData[0].second.size()); - CPPUNIT_ASSERT_EQUAL(uint64_t((time % 86400) + 150), featureData[0].second[0].second.s_Count); + CPPUNIT_ASSERT_EQUAL(uint64_t((time % 86400) + 150), + featureData[0].second[0].second.s_Count); } // Check latency by going backwards in time @@ -2521,7 +2342,8 @@ void CEventRateDataGathererTest::testDiurnalFeatures() { gatherer.featureData(time, bucketLength, featureData); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData[0].second.size()); - CPPUNIT_ASSERT_EQUAL(uint64_t((time % 86400) + 100), featureData[0].second[0].second.s_Count); + CPPUNIT_ASSERT_EQUAL(uint64_t((time % 86400) + 100), + featureData[0].second[0].second.s_Count); } time += bucketLength; { @@ -2531,7 +2353,8 @@ void CEventRateDataGathererTest::testDiurnalFeatures() { gatherer.featureData(time, bucketLength, featureData); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData[0].second.size()); - CPPUNIT_ASSERT_EQUAL(uint64_t((time % 86400) + 200), featureData[0].second[0].second.s_Count); + CPPUNIT_ASSERT_EQUAL(uint64_t((time % 86400) + 200), + featureData[0].second[0].second.s_Count); } CPPUNIT_ASSERT_EQUAL(std::size_t(1), gatherer.numberActivePeople()); @@ -2545,38 +2368,47 @@ void CEventRateDataGathererTest::testDiurnalFeatures() { CppUnit::Test* CEventRateDataGathererTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CEventRateDataGathererTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CEventRateDataGathererTest::singleSeriesTests", - &CEventRateDataGathererTest::singleSeriesTests)); - suiteOfTests->addTest(new CppUnit::TestCaller("CEventRateDataGathererTest::multipleSeriesTests", - &CEventRateDataGathererTest::multipleSeriesTests)); - suiteOfTests->addTest(new CppUnit::TestCaller("CEventRateDataGathererTest::testRemovePeople", - &CEventRateDataGathererTest::testRemovePeople)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CEventRateDataGathererTest::singleSeriesOutOfOrderFinalResultTests", - &CEventRateDataGathererTest::singleSeriesOutOfOrderFinalResultTests)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CEventRateDataGathererTest::singleSeriesOutOfOrderInterimResultTests", - &CEventRateDataGathererTest::singleSeriesOutOfOrderInterimResultTests)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CEventRateDataGathererTest::multipleSeriesOutOfOrderFinalResultTests", - &CEventRateDataGathererTest::multipleSeriesOutOfOrderFinalResultTests)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CEventRateDataGathererTest::testArrivalBeforeLatencyWindowIsIgnored", - &CEventRateDataGathererTest::testArrivalBeforeLatencyWindowIsIgnored)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CEventRateDataGathererTest::testResetBucketGivenSingleSeries", &CEventRateDataGathererTest::testResetBucketGivenSingleSeries)); + "CEventRateDataGathererTest::singleSeriesTests", + &CEventRateDataGathererTest::singleSeriesTests)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRateDataGathererTest::multipleSeriesTests", + &CEventRateDataGathererTest::multipleSeriesTests)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRateDataGathererTest::testRemovePeople", + &CEventRateDataGathererTest::testRemovePeople)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRateDataGathererTest::singleSeriesOutOfOrderFinalResultTests", + &CEventRateDataGathererTest::singleSeriesOutOfOrderFinalResultTests)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRateDataGathererTest::singleSeriesOutOfOrderInterimResultTests", + &CEventRateDataGathererTest::singleSeriesOutOfOrderInterimResultTests)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRateDataGathererTest::multipleSeriesOutOfOrderFinalResultTests", + &CEventRateDataGathererTest::multipleSeriesOutOfOrderFinalResultTests)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRateDataGathererTest::testArrivalBeforeLatencyWindowIsIgnored", + &CEventRateDataGathererTest::testArrivalBeforeLatencyWindowIsIgnored)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRateDataGathererTest::testResetBucketGivenSingleSeries", + &CEventRateDataGathererTest::testResetBucketGivenSingleSeries)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRateDataGathererTest::testResetBucketGivenMultipleSeries", + &CEventRateDataGathererTest::testResetBucketGivenMultipleSeries)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRateDataGathererTest::testResetBucketGivenBucketNotAvailable", + &CEventRateDataGathererTest::testResetBucketGivenBucketNotAvailable)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRateDataGathererTest::testInfluencerBucketStatistics", + &CEventRateDataGathererTest::testInfluencerBucketStatistics)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRateDataGathererTest::testDistinctStrings", + &CEventRateDataGathererTest::testDistinctStrings)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRateDataGathererTest::testLatencyPersist", + &CEventRateDataGathererTest::testLatencyPersist)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CEventRateDataGathererTest::testResetBucketGivenMultipleSeries", &CEventRateDataGathererTest::testResetBucketGivenMultipleSeries)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CEventRateDataGathererTest::testResetBucketGivenBucketNotAvailable", - &CEventRateDataGathererTest::testResetBucketGivenBucketNotAvailable)); - suiteOfTests->addTest(new CppUnit::TestCaller("CEventRateDataGathererTest::testInfluencerBucketStatistics", - &CEventRateDataGathererTest::testInfluencerBucketStatistics)); - suiteOfTests->addTest(new CppUnit::TestCaller("CEventRateDataGathererTest::testDistinctStrings", - &CEventRateDataGathererTest::testDistinctStrings)); - suiteOfTests->addTest(new CppUnit::TestCaller("CEventRateDataGathererTest::testLatencyPersist", - &CEventRateDataGathererTest::testLatencyPersist)); - suiteOfTests->addTest(new CppUnit::TestCaller("CEventRateDataGathererTest::testDiurnalFeatures", - &CEventRateDataGathererTest::testDiurnalFeatures)); + "CEventRateDataGathererTest::testDiurnalFeatures", + &CEventRateDataGathererTest::testDiurnalFeatures)); return suiteOfTests; } diff --git a/lib/model/unittest/CEventRateModelTest.cc b/lib/model/unittest/CEventRateModelTest.cc index 1f1a4c509c..f3cace95d0 100644 --- a/lib/model/unittest/CEventRateModelTest.cc +++ b/lib/model/unittest/CEventRateModelTest.cc @@ -76,7 +76,8 @@ using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumula const std::string EMPTY_STRING; TUInt64Vec rawEventCounts(std::size_t copies = 1) { - uint64_t counts[] = {54, 67, 39, 58, 46, 50, 42, 48, 53, 51, 50, 57, 53, 49}; + uint64_t counts[] = {54, 67, 39, 58, 46, 50, 42, + 48, 53, 51, 50, 57, 53, 49}; TUInt64Vec result; for (std::size_t i = 0u; i < copies; ++i) { result.insert(result.end(), boost::begin(counts), boost::end(counts)); @@ -95,13 +96,15 @@ void generateEvents(const core_t::TTime& startTime, double bucketEndTime = bucketStartTime + static_cast(bucketLength); TDoubleVec bucketEventTimes; - rng.generateUniformSamples(bucketStartTime, bucketEndTime - 1.0, static_cast(count), bucketEventTimes); + rng.generateUniformSamples(bucketStartTime, bucketEndTime - 1.0, + static_cast(count), bucketEventTimes); std::sort(bucketEventTimes.begin(), bucketEventTimes.end()); for (auto time_ : bucketEventTimes) { core_t::TTime time = static_cast(time_); - time = std::min(static_cast(bucketEndTime - 1.0), std::max(static_cast(bucketStartTime), time)); + time = std::min(static_cast(bucketEndTime - 1.0), + std::max(static_cast(bucketStartTime), time)); eventArrivalTimes.push_back(time); } @@ -120,13 +123,15 @@ void generateSporadicEvents(const core_t::TTime& startTime, double bucketEndTime = bucketStartTime + static_cast(bucketLength); TDoubleVec bucketEventTimes; - rng.generateUniformSamples(bucketStartTime, bucketEndTime - 1.0, static_cast(count), bucketEventTimes); + rng.generateUniformSamples(bucketStartTime, bucketEndTime - 1.0, + static_cast(count), bucketEventTimes); std::sort(bucketEventTimes.begin(), bucketEventTimes.end()); for (auto time_ : bucketEventTimes) { core_t::TTime time = static_cast(time_); - time = std::min(static_cast(bucketEndTime - 1.0), std::max(static_cast(bucketStartTime), time)); + time = std::min(static_cast(bucketEndTime - 1.0), + std::max(static_cast(bucketStartTime), time)); eventArrivalTimes.push_back(time); } @@ -136,7 +141,9 @@ void generateSporadicEvents(const core_t::TTime& startTime, } } -std::size_t addPerson(const std::string& p, const CModelFactory::TDataGathererPtr& gatherer, CResourceMonitor& resourceMonitor) { +std::size_t addPerson(const std::string& p, + const CModelFactory::TDataGathererPtr& gatherer, + CResourceMonitor& resourceMonitor) { CDataGatherer::TStrCPtrVec person; person.push_back(&p); CEventData result; @@ -179,7 +186,9 @@ void makeModel(CEventRateModelFactory& factory, CPPUNIT_ASSERT(model); CPPUNIT_ASSERT_EQUAL(bucketLength, model->bucketLength()); for (std::size_t i = 0u; i < numberPeople; ++i) { - CPPUNIT_ASSERT_EQUAL(std::size_t(i), addPerson("p" + core::CStringUtils::typeToString(i + 1), gatherer, resourceMonitor)); + CPPUNIT_ASSERT_EQUAL(std::size_t(i), + addPerson("p" + core::CStringUtils::typeToString(i + 1), + gatherer, resourceMonitor)); } } @@ -237,7 +246,10 @@ void handleEvent(const CDataGatherer::TStrCPtrVec& fields, gatherer->addArrival(fields, eventResult, resourceMonitor); } -void testModelWithValueField(model_t::EFeature feature, TSizeVecVecVec& fields, TStrVec& strings, CResourceMonitor& resourceMonitor) { +void testModelWithValueField(model_t::EFeature feature, + TSizeVecVecVec& fields, + TStrVec& strings, + CResourceMonitor& resourceMonitor) { LOG_DEBUG(<< " *** testing feature " << model_t::print(feature)); const core_t::TTime startTime = 1346968800; @@ -260,7 +272,8 @@ void testModelWithValueField(model_t::EFeature feature, TSizeVecVecVec& fields, const core_t::TTime endTime = startTime + (numberBuckets * bucketLength); std::size_t i = 0u; - for (core_t::TTime bucketStartTime = startTime; bucketStartTime < endTime; bucketStartTime += bucketLength, i++) { + for (core_t::TTime bucketStartTime = startTime; bucketStartTime < endTime; + bucketStartTime += bucketLength, i++) { core_t::TTime bucketEndTime = bucketStartTime + bucketLength; for (std::size_t j = 0; j < fields[i].size(); ++j) { @@ -275,7 +288,8 @@ void testModelWithValueField(model_t::EFeature feature, TSizeVecVecVec& fields, SAnnotatedProbability annotatedProbability; CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); - model->computeProbability(0 /*pid*/, bucketStartTime, bucketEndTime, partitioningFields, 1, annotatedProbability); + model->computeProbability(0 /*pid*/, bucketStartTime, bucketEndTime, + partitioningFields, 1, annotatedProbability); LOG_DEBUG(<< "probability = " << annotatedProbability.s_Probability); if (i == anomalousBucket) { CPPUNIT_ASSERT(annotatedProbability.s_Probability < 0.001); @@ -302,22 +316,27 @@ void CEventRateModelTest::testOnlineCountSample() { model_t::TFeatureVec features(1u, model_t::E_IndividualCountByBucketAndPerson); CModelFactory::TDataGathererPtr gatherer; CAnomalyDetectorModel::TModelPtr model_; - makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, gatherer, model_, 1); + makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, + gatherer, model_, 1); CEventRateModel* model = dynamic_cast(model_.get()); CPPUNIT_ASSERT(model); - TMathsModelPtr timeseriesModel{factory.defaultFeatureModel(model_t::E_IndividualCountByBucketAndPerson, bucketLength, 0.4, true)}; - maths::CModelAddSamplesParams::TDouble2Vec4VecVec weights{maths::CConstantWeights::unit(1)}; + TMathsModelPtr timeseriesModel{factory.defaultFeatureModel( + model_t::E_IndividualCountByBucketAndPerson, bucketLength, 0.4, true)}; + maths::CModelAddSamplesParams::TDouble2Vec4VecVec weights{ + maths::CConstantWeights::unit(1)}; // Generate some events. TTimeVec eventTimes; TUInt64Vec expectedEventCounts(rawEventCounts()); generateEvents(startTime, bucketLength, expectedEventCounts, eventTimes); core_t::TTime endTime = (eventTimes.back() / bucketLength + 1) * bucketLength; - LOG_DEBUG(<< "startTime = " << startTime << ", endTime = " << endTime << ", # events = " << eventTimes.size()); + LOG_DEBUG(<< "startTime = " << startTime << ", endTime = " << endTime + << ", # events = " << eventTimes.size()); std::size_t i = 0u, j = 0u; - for (core_t::TTime bucketStartTime = startTime; bucketStartTime < endTime; bucketStartTime += bucketLength, ++j) { + for (core_t::TTime bucketStartTime = startTime; bucketStartTime < endTime; + bucketStartTime += bucketLength, ++j) { core_t::TTime bucketEndTime = bucketStartTime + bucketLength; double count = 0.0; @@ -339,15 +358,19 @@ void CEventRateModelTest::testOnlineCountSample() { .priorWeights(weights); double sample{static_cast(expectedEventCounts[j])}; maths::CModel::TTimeDouble2VecSizeTrVec expectedSamples{ - core::make_triple((bucketStartTime + bucketEndTime) / 2, maths::CModel::TDouble2Vec{sample}, std::size_t{0})}; + core::make_triple((bucketStartTime + bucketEndTime) / 2, + maths::CModel::TDouble2Vec{sample}, std::size_t{0})}; timeseriesModel->addSamples(params_, expectedSamples); // Test we sample the data correctly. - CPPUNIT_ASSERT_EQUAL( - expectedEventCounts[j], - static_cast(model->currentBucketValue(model_t::E_IndividualCountByBucketAndPerson, 0, 0, bucketStartTime)[0])); + CPPUNIT_ASSERT_EQUAL(expectedEventCounts[j], + static_cast(model->currentBucketValue( + model_t::E_IndividualCountByBucketAndPerson, 0, + 0, bucketStartTime)[0])); CPPUNIT_ASSERT_EQUAL(timeseriesModel->checksum(), - model->details()->model(model_t::E_IndividualCountByBucketAndPerson, 0)->checksum()); + model->details() + ->model(model_t::E_IndividualCountByBucketAndPerson, 0) + ->checksum()); } // Test persistence. (We check for idempotency.) @@ -396,23 +419,27 @@ void CEventRateModelTest::testOnlineNonZeroCountSample() { model_t::TFeatureVec features(1u, model_t::E_IndividualNonZeroCountByBucketAndPerson); CModelFactory::TDataGathererPtr gatherer; CAnomalyDetectorModel::TModelPtr model_; - makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, gatherer, model_, 1); + makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, + gatherer, model_, 1); CEventRateModel* model = dynamic_cast(model_.get()); CPPUNIT_ASSERT(model); - TMathsModelPtr timeseriesModel{ - factory.defaultFeatureModel(model_t::E_IndividualNonZeroCountByBucketAndPerson, bucketLength, 0.4, true)}; - maths::CModelAddSamplesParams::TDouble2Vec4VecVec weights{maths::CConstantWeights::unit(1)}; + TMathsModelPtr timeseriesModel{factory.defaultFeatureModel( + model_t::E_IndividualNonZeroCountByBucketAndPerson, bucketLength, 0.4, true)}; + maths::CModelAddSamplesParams::TDouble2Vec4VecVec weights{ + maths::CConstantWeights::unit(1)}; // Generate some events. TTimeVec eventTimes; TUInt64Vec expectedEventCounts = rawEventCounts(); generateSporadicEvents(startTime, bucketLength, expectedEventCounts, eventTimes); core_t::TTime endTime = (eventTimes.back() / bucketLength + 1) * bucketLength; - LOG_DEBUG(<< "startTime = " << startTime << ", endTime = " << endTime << ", # events = " << eventTimes.size()); + LOG_DEBUG(<< "startTime = " << startTime << ", endTime = " << endTime + << ", # events = " << eventTimes.size()); std::size_t i = 0u, j = 0u; - for (core_t::TTime bucketStartTime = startTime; bucketStartTime < endTime; bucketStartTime += bucketLength) { + for (core_t::TTime bucketStartTime = startTime; bucketStartTime < endTime; + bucketStartTime += bucketLength) { core_t::TTime bucketEndTime = bucketStartTime + bucketLength; double count = 0.0; @@ -433,18 +460,23 @@ void CEventRateModelTest::testOnlineNonZeroCountSample() { .weightStyles(maths::CConstantWeights::COUNT) .trendWeights(weights) .priorWeights(weights); - double sample{static_cast(model_t::offsetCountToZero(model_t::E_IndividualNonZeroCountByBucketAndPerson, - static_cast(expectedEventCounts[j])))}; - maths::CModel::TTimeDouble2VecSizeTrVec expectedSamples{ - core::make_triple((bucketStartTime + bucketEndTime) / 2, maths::CModel::TDouble2Vec{sample}, std::size_t{0})}; + double sample{static_cast(model_t::offsetCountToZero( + model_t::E_IndividualNonZeroCountByBucketAndPerson, + static_cast(expectedEventCounts[j])))}; + maths::CModel::TTimeDouble2VecSizeTrVec expectedSamples{core::make_triple( + (bucketStartTime + bucketEndTime) / 2, + maths::CModel::TDouble2Vec{sample}, std::size_t{0})}; timeseriesModel->addSamples(params_, expectedSamples); // Test we sample the data correctly. CPPUNIT_ASSERT_EQUAL(expectedEventCounts[j], static_cast(model->currentBucketValue( - model_t::E_IndividualNonZeroCountByBucketAndPerson, 0, 0, bucketStartTime)[0])); + model_t::E_IndividualNonZeroCountByBucketAndPerson, + 0, 0, bucketStartTime)[0])); CPPUNIT_ASSERT_EQUAL(timeseriesModel->checksum(), - model->details()->model(model_t::E_IndividualNonZeroCountByBucketAndPerson, 0)->checksum()); + model->details() + ->model(model_t::E_IndividualNonZeroCountByBucketAndPerson, 0) + ->checksum()); ++j; } @@ -463,7 +495,8 @@ void CEventRateModelTest::testOnlineRare() { features.push_back(model_t::E_IndividualIndicatorOfBucketPerson); CModelFactory::TDataGathererPtr gatherer; CAnomalyDetectorModel::TModelPtr model_; - makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, gatherer, model_, 5); + makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, + gatherer, model_, 5); CEventRateModel* model = dynamic_cast(model_.get()); core_t::TTime time = startTime; @@ -491,7 +524,8 @@ void CEventRateModelTest::testOnlineRare() { for (std::size_t pid = 0u; pid < 5; ++pid) { SAnnotatedProbability annotatedProbability; CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); - CPPUNIT_ASSERT(model->computeProbability(pid, time, time + bucketLength, partitioningFields, 0, annotatedProbability)); + CPPUNIT_ASSERT(model->computeProbability(pid, time, time + bucketLength, partitioningFields, + 0, annotatedProbability)); LOG_DEBUG(<< "probability = " << annotatedProbability.s_Probability); probabilities.push_back(annotatedProbability.s_Probability); } @@ -549,7 +583,8 @@ void CEventRateModelTest::testOnlineProbabilityCalculation() { model_t::TFeatureVec features(1u, model_t::E_IndividualCountByBucketAndPerson); CModelFactory::TDataGathererPtr gatherer; CAnomalyDetectorModel::TModelPtr model_; - makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, gatherer, model_, 1); + makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, + gatherer, model_, 1); CEventRateModel* model = dynamic_cast(model_.get()); TMinAccumulator minProbabilities(2u); @@ -560,10 +595,12 @@ void CEventRateModelTest::testOnlineProbabilityCalculation() { expectedEventCounts[anomalousBucket] *= 3; generateEvents(startTime, bucketLength, expectedEventCounts, eventTimes); core_t::TTime endTime = (eventTimes.back() / bucketLength + 1) * bucketLength; - LOG_DEBUG(<< "startTime = " << startTime << ", endTime = " << endTime << ", # events = " << eventTimes.size()); + LOG_DEBUG(<< "startTime = " << startTime << ", endTime = " << endTime + << ", # events = " << eventTimes.size()); std::size_t i = 0u, j = 0u; - for (core_t::TTime bucketStartTime = startTime; bucketStartTime < endTime; bucketStartTime += bucketLength, ++j) { + for (core_t::TTime bucketStartTime = startTime; bucketStartTime < endTime; + bucketStartTime += bucketLength, ++j) { core_t::TTime bucketEndTime = bucketStartTime + bucketLength; double count = 0.0; @@ -578,7 +615,8 @@ void CEventRateModelTest::testOnlineProbabilityCalculation() { SAnnotatedProbability p; CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); - CPPUNIT_ASSERT(model->computeProbability(0 /*pid*/, bucketStartTime, bucketEndTime, partitioningFields, 1, p)); + CPPUNIT_ASSERT(model->computeProbability( + 0 /*pid*/, bucketStartTime, bucketEndTime, partitioningFields, 1, p)); LOG_DEBUG(<< "probability = " << p.s_Probability); minProbabilities.add(TDoubleSizePr(p.s_Probability, j)); } @@ -597,7 +635,8 @@ void CEventRateModelTest::testOnlineProbabilityCalculationForLowNonZeroCount() { std::size_t lowNonZeroCountBucket = 6u; std::size_t highNonZeroCountBucket = 8u; - std::size_t bucketCounts[] = {50, 50, 50, 50, 50, 0, 0, 0, 50, 1, 50, 100, 50, 50}; + std::size_t bucketCounts[] = {50, 50, 50, 50, 50, 0, 0, + 0, 50, 1, 50, 100, 50, 50}; SModelParams params(bucketLength); params.s_DecayRate = 0.001; @@ -605,7 +644,8 @@ void CEventRateModelTest::testOnlineProbabilityCalculationForLowNonZeroCount() { model_t::TFeatureVec features(1u, model_t::E_IndividualLowNonZeroCountByBucketAndPerson); CModelFactory::TDataGathererPtr gatherer; CAnomalyDetectorModel::TModelPtr model_; - makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, gatherer, model_, 1); + makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, + gatherer, model_, 1); CEventRateModel* model = dynamic_cast(model_.get()); TDoubleVec probabilities; @@ -615,13 +655,15 @@ void CEventRateModelTest::testOnlineProbabilityCalculationForLowNonZeroCount() { LOG_DEBUG(<< "Writing " << bucketCounts[i] << " values"); for (std::size_t j = 0u; j < bucketCounts[i]; ++j) { - addArrival(*gatherer, m_ResourceMonitor, time + static_cast(j), "p1"); + addArrival(*gatherer, m_ResourceMonitor, + time + static_cast(j), "p1"); } model->sample(time, time + bucketLength, m_ResourceMonitor); SAnnotatedProbability p; CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); - if (model->computeProbability(0 /*pid*/, time, time + bucketLength, partitioningFields, 0, p) == false) { + if (model->computeProbability(0 /*pid*/, time, time + bucketLength, + partitioningFields, 0, p) == false) { continue; } LOG_DEBUG(<< "probability = " << p.s_Probability); @@ -645,7 +687,8 @@ void CEventRateModelTest::testOnlineProbabilityCalculationForHighNonZeroCount() std::size_t lowNonZeroCountBucket = 6u; std::size_t highNonZeroCountBucket = 8u; - std::size_t bucketCounts[] = {50, 50, 50, 50, 50, 0, 0, 0, 50, 100, 50, 1, 50, 50}; + std::size_t bucketCounts[] = {50, 50, 50, 50, 50, 0, 0, + 0, 50, 100, 50, 1, 50, 50}; SModelParams params(bucketLength); params.s_DecayRate = 0.001; @@ -653,7 +696,8 @@ void CEventRateModelTest::testOnlineProbabilityCalculationForHighNonZeroCount() model_t::TFeatureVec features(1u, model_t::E_IndividualHighNonZeroCountByBucketAndPerson); CModelFactory::TDataGathererPtr gatherer; CAnomalyDetectorModel::TModelPtr model_; - makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, gatherer, model_, 1); + makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, + gatherer, model_, 1); CEventRateModel* model = dynamic_cast(model_.get()); TDoubleVec probabilities; @@ -663,13 +707,15 @@ void CEventRateModelTest::testOnlineProbabilityCalculationForHighNonZeroCount() LOG_DEBUG(<< "Writing " << bucketCounts[i] << " values"); for (std::size_t j = 0u; j < bucketCounts[i]; ++j) { - addArrival(*gatherer, m_ResourceMonitor, time + static_cast(j), "p1"); + addArrival(*gatherer, m_ResourceMonitor, + time + static_cast(j), "p1"); } model->sample(time, time + bucketLength, m_ResourceMonitor); SAnnotatedProbability p; CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); - if (model->computeProbability(0 /*pid*/, time, time + bucketLength, partitioningFields, 1, p) == false) { + if (model->computeProbability(0 /*pid*/, time, time + bucketLength, + partitioningFields, 1, p) == false) { continue; } LOG_DEBUG(<< "probability = " << p.s_Probability); @@ -701,7 +747,10 @@ void CEventRateModelTest::testOnlineCorrelatedNoTrend() { const std::size_t b = 200; const double means_[] = {20.0, 25.0, 100.0, 800.0}; - const double covariances_[][4] = {{3.0, 2.5, 0.0, 0.0}, {2.5, 4.0, 0.0, 0.0}, {0.0, 0.0, 100.0, -500.0}, {0.0, 0.0, -500.0, 3000.0}}; + const double covariances_[][4] = {{3.0, 2.5, 0.0, 0.0}, + {2.5, 4.0, 0.0, 0.0}, + {0.0, 0.0, 100.0, -500.0}, + {0.0, 0.0, -500.0, 3000.0}}; TDoubleVec means(&means_[0], &means_[4]); TDoubleVecVec covariances; @@ -722,27 +771,33 @@ void CEventRateModelTest::testOnlineCorrelatedNoTrend() { model_t::TFeatureVec features(1u, model_t::E_IndividualCountByBucketAndPerson); CModelFactory::TDataGathererPtr gatherer; CAnomalyDetectorModel::TModelPtr model_; - makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, gatherer, model_, 4); + makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, + gatherer, model_, 4); CEventRateModel* model = dynamic_cast(model_.get()); CPPUNIT_ASSERT(model); LOG_DEBUG(<< "Test correlation anomalies"); std::size_t anomalyBuckets[] = {100, 160, 190, b}; - double anomalies[][4] = {{-5.73, 4.29, 0.0, 0.0}, {0.0, 0.0, 89.99, 15.38}, {-7.73, 5.59, 52.99, 9.03}}; + double anomalies[][4] = {{-5.73, 4.29, 0.0, 0.0}, + {0.0, 0.0, 89.99, 15.38}, + {-7.73, 5.59, 52.99, 9.03}}; - TMinAccumulator probabilities[4] = {TMinAccumulator(2), TMinAccumulator(2), TMinAccumulator(2), TMinAccumulator(2)}; + TMinAccumulator probabilities[4] = {TMinAccumulator(2), TMinAccumulator(2), + TMinAccumulator(2), TMinAccumulator(2)}; core_t::TTime time = startTime; for (std::size_t i = 0u, anomaly = 0u; i < b; ++i) { for (std::size_t j = 0u; j < samples[i].size(); ++j) { - std::string person = std::string("p") + core::CStringUtils::typeToString(j + 1); + std::string person = std::string("p") + + core::CStringUtils::typeToString(j + 1); double n = samples[i][j]; if (i == anomalyBuckets[anomaly]) { n += anomalies[anomaly][j]; } for (std::size_t k = 0u; k < static_cast(n); ++k) { - addArrival(*gatherer, m_ResourceMonitor, time + static_cast(j), person); + addArrival(*gatherer, m_ResourceMonitor, + time + static_cast(j), person); } } if (i == anomalyBuckets[anomaly]) { @@ -753,7 +808,8 @@ void CEventRateModelTest::testOnlineCorrelatedNoTrend() { for (std::size_t pid = 0u; pid < samples[i].size(); ++pid) { SAnnotatedProbability p; CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); - CPPUNIT_ASSERT(model->computeProbability(pid, time, time + bucketLength, partitioningFields, 1, p)); + CPPUNIT_ASSERT(model->computeProbability( + pid, time, time + bucketLength, partitioningFields, 1, p)); std::string correlated; if (p.s_AttributeProbabilities[0].s_CorrelatedAttributes.size() > 0 && p.s_AttributeProbabilities[0].s_CorrelatedAttributes[0] != nullptr && @@ -765,12 +821,14 @@ void CEventRateModelTest::testOnlineCorrelatedNoTrend() { time += bucketLength; } - std::string expected[] = {"[(100,p2), (190,p2)]", "[(100,p1), (190,p1)]", "[(160,p4), (190,p4)]", "[(160,p3), (190,p3)]"}; + std::string expected[] = {"[(100,p2), (190,p2)]", "[(100,p1), (190,p1)]", + "[(160,p4), (190,p4)]", "[(160,p3), (190,p3)]"}; for (std::size_t i = 0u; i < boost::size(probabilities); ++i) { std::string actual[2]; for (std::size_t j = 0u; j < 2; ++j) { - actual[j] = - std::string("(") + core::CStringUtils::typeToString(probabilities[i][j].second) + "," + probabilities[i][j].third + ")"; + actual[j] = std::string("(") + + core::CStringUtils::typeToString(probabilities[i][j].second) + + "," + probabilities[i][j].third + ")"; } std::sort(actual, actual + 2); CPPUNIT_ASSERT_EQUAL(expected[i], core::CContainerPrinter::print(actual)); @@ -816,26 +874,32 @@ void CEventRateModelTest::testOnlineCorrelatedNoTrend() { model_t::TFeatureVec features(1u, model_t::E_IndividualCountByBucketAndPerson); CModelFactory::TDataGathererPtr gatherer; CAnomalyDetectorModel::TModelPtr model_; - makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, gatherer, model_, 4); + makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, + gatherer, model_, 4); CEventRateModel* model = dynamic_cast(model_.get()); CPPUNIT_ASSERT(model); std::size_t anomalyBuckets[] = {100, 160, 190, b}; - double anomalies[][4] = {{11.07, 14.19, 0.0, 0.0}, {0.0, 0.0, -66.9, 399.95}, {11.07, 14.19, -48.15, 329.95}}; + double anomalies[][4] = {{11.07, 14.19, 0.0, 0.0}, + {0.0, 0.0, -66.9, 399.95}, + {11.07, 14.19, -48.15, 329.95}}; - TMinAccumulator probabilities[4] = {TMinAccumulator(2), TMinAccumulator(2), TMinAccumulator(2), TMinAccumulator(2)}; + TMinAccumulator probabilities[4] = {TMinAccumulator(2), TMinAccumulator(2), + TMinAccumulator(2), TMinAccumulator(2)}; core_t::TTime time = startTime; for (std::size_t i = 0u, anomaly = 0u; i < b; ++i) { for (std::size_t j = 0u; j < samples[i].size(); ++j) { - std::string person = std::string("p") + core::CStringUtils::typeToString(j + 1); + std::string person = std::string("p") + + core::CStringUtils::typeToString(j + 1); double n = samples[i][j]; if (i == anomalyBuckets[anomaly]) { n += anomalies[anomaly][j]; } n = std::max(n, 0.0); for (std::size_t k = 0u; k < static_cast(n); ++k) { - addArrival(*gatherer, m_ResourceMonitor, time + static_cast(j), person); + addArrival(*gatherer, m_ResourceMonitor, + time + static_cast(j), person); } } if (i == anomalyBuckets[anomaly]) { @@ -846,7 +910,8 @@ void CEventRateModelTest::testOnlineCorrelatedNoTrend() { for (std::size_t pid = 0u; pid < samples[i].size(); ++pid) { SAnnotatedProbability p; CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); - CPPUNIT_ASSERT(model->computeProbability(pid, time, time + bucketLength, partitioningFields, 1, p)); + CPPUNIT_ASSERT(model->computeProbability( + pid, time, time + bucketLength, partitioningFields, 1, p)); std::string correlated; if (p.s_AttributeProbabilities[0].s_CorrelatedAttributes.size() > 0 && p.s_AttributeProbabilities[0].s_CorrelatedAttributes[0] != nullptr && @@ -858,12 +923,14 @@ void CEventRateModelTest::testOnlineCorrelatedNoTrend() { time += bucketLength; } - std::string expected[] = {"[(100,), (190,)]", "[(100,), (190,)]", "[(160,), (190,)]", "[(160,), (190,)]"}; + std::string expected[] = {"[(100,), (190,)]", "[(100,), (190,)]", + "[(160,), (190,)]", "[(160,), (190,)]"}; for (std::size_t i = 0u; i < boost::size(probabilities); ++i) { std::string actual[2]; for (std::size_t j = 0u; j < 2; ++j) { - actual[j] = - std::string("(") + core::CStringUtils::typeToString(probabilities[i][j].second) + "," + probabilities[i][j].third + ")"; + actual[j] = std::string("(") + + core::CStringUtils::typeToString(probabilities[i][j].second) + + "," + probabilities[i][j].third + ")"; } std::sort(actual, actual + 2); CPPUNIT_ASSERT_EQUAL(expected[i], core::CContainerPrinter::print(actual)); @@ -891,14 +958,21 @@ void CEventRateModelTest::testOnlineCorrelatedTrend() { const std::size_t b = 2880; const double means_[] = {20.0, 25.0, 50.0, 100.0}; - const double covariances_[][4] = {{30.0, 20.0, 0.0, 0.0}, {20.0, 40.0, 0.0, 0.0}, {0.0, 0.0, 60.0, -50.0}, {0.0, 0.0, -50.0, 60.0}}; + const double covariances_[][4] = {{30.0, 20.0, 0.0, 0.0}, + {20.0, 40.0, 0.0, 0.0}, + {0.0, 0.0, 60.0, -50.0}, + {0.0, 0.0, -50.0, 60.0}}; double trends[][24] = { - {0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 4.0, 10.0, 11.0, 10.0, 8.0, 8.0, 7.0, 9.0, 12.0, 4.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0}, - {0.0, 0.0, 0.0, 2.0, 2.0, 4.0, 8.0, 15.0, 18.0, 14.0, 12.0, 12.0, 11.0, 10.0, 16.0, 7.0, 4.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0}, - {4.0, 3.0, 5.0, 20.0, 20.0, 40.0, 80.0, 150.0, 180.0, 140.0, 120.0, 120.0, - 110.0, 100.0, 160.0, 70.0, 40.0, 20.0, 10.0, 3.0, 5.0, 2.0, 1.0, 3.0}, - {0.0, 0.0, 0.0, 20.0, 20.0, 40.0, 80.0, 150.0, 180.0, 140.0, 120.0, 120.0, - 110.0, 100.0, 160.0, 70.0, 40.0, 40.0, 30.0, 20.0, 10.0, 0.0, 0.0, 0.0}, + {0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 4.0, 10.0, 11.0, 10.0, 8.0, 8.0, + 7.0, 9.0, 12.0, 4.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0}, + {0.0, 0.0, 0.0, 2.0, 2.0, 4.0, 8.0, 15.0, 18.0, 14.0, 12.0, 12.0, + 11.0, 10.0, 16.0, 7.0, 4.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0}, + {4.0, 3.0, 5.0, 20.0, 20.0, 40.0, 80.0, 150.0, + 180.0, 140.0, 120.0, 120.0, 110.0, 100.0, 160.0, 70.0, + 40.0, 20.0, 10.0, 3.0, 5.0, 2.0, 1.0, 3.0}, + {0.0, 0.0, 0.0, 20.0, 20.0, 40.0, 80.0, 150.0, + 180.0, 140.0, 120.0, 120.0, 110.0, 100.0, 160.0, 70.0, + 40.0, 40.0, 30.0, 20.0, 10.0, 0.0, 0.0, 0.0}, }; TDoubleVec means(&means_[0], &means_[4]); @@ -910,8 +984,10 @@ void CEventRateModelTest::testOnlineCorrelatedTrend() { rng.generateMultivariateNormalSamples(means, covariances, b, samples); std::size_t anomalyBuckets[] = {1950, 2400, 2700, b}; - double anomalies[][4] = {{-23.9, 19.7, 0.0, 0.0}, {0.0, 0.0, 36.4, 36.4}, {-28.7, 30.4, 36.4, 36.4}}; - TMinAccumulator probabilities[4] = {TMinAccumulator(2), TMinAccumulator(2), TMinAccumulator(2), TMinAccumulator(2)}; + double anomalies[][4] = { + {-23.9, 19.7, 0.0, 0.0}, {0.0, 0.0, 36.4, 36.4}, {-28.7, 30.4, 36.4, 36.4}}; + TMinAccumulator probabilities[4] = {TMinAccumulator(2), TMinAccumulator(2), + TMinAccumulator(2), TMinAccumulator(2)}; SModelParams params(bucketLength); params.s_DecayRate = 0.0002; @@ -923,28 +999,33 @@ void CEventRateModelTest::testOnlineCorrelatedTrend() { model_t::TFeatureVec features(1u, model_t::E_IndividualCountByBucketAndPerson); CModelFactory::TDataGathererPtr gatherer; CAnomalyDetectorModel::TModelPtr model_; - makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, gatherer, model_, 4); + makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, + gatherer, model_, 4); CEventRateModel* model = dynamic_cast(model_.get()); CPPUNIT_ASSERT(model); core_t::TTime time = startTime; for (std::size_t i = 0u, anomaly = 0u; i < b; ++i) { - LOG_DEBUG(<< i << ") processing bucket [" << time << ", " << time + bucketLength << ")"); + LOG_DEBUG(<< i << ") processing bucket [" << time << ", " + << time + bucketLength << ")"); std::size_t hour1 = static_cast((time / 3600) % 24); std::size_t hour2 = (hour1 + 1) % 24; double dt = static_cast(time % 3600) / 3600.0; for (std::size_t j = 0u; j < samples[i].size(); ++j) { - std::string person = std::string("p") + core::CStringUtils::typeToString(j + 1); + std::string person = std::string("p") + + core::CStringUtils::typeToString(j + 1); - double n = (1.0 - dt) * trends[j][hour1] + dt * trends[j][hour2] + samples[i][j]; + double n = (1.0 - dt) * trends[j][hour1] + dt * trends[j][hour2] + + samples[i][j]; if (i == anomalyBuckets[anomaly]) { n += anomalies[anomaly][j]; } n = std::max(n / 3.0, 0.0); for (std::size_t k = 0u; k < static_cast(n); ++k) { - addArrival(*gatherer, m_ResourceMonitor, time + static_cast(j), person); + addArrival(*gatherer, m_ResourceMonitor, + time + static_cast(j), person); } } if (i == anomalyBuckets[anomaly]) { @@ -955,7 +1036,8 @@ void CEventRateModelTest::testOnlineCorrelatedTrend() { for (std::size_t pid = 0u; pid < samples[i].size(); ++pid) { SAnnotatedProbability p; CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); - CPPUNIT_ASSERT(model->computeProbability(pid, time, time + bucketLength, partitioningFields, 1, p)); + CPPUNIT_ASSERT(model->computeProbability(pid, time, time + bucketLength, + partitioningFields, 1, p)); std::string correlated; if (p.s_AttributeProbabilities[0].s_CorrelatedAttributes.size() > 0 && p.s_AttributeProbabilities[0].s_CorrelatedAttributes[0] != nullptr && @@ -967,13 +1049,15 @@ void CEventRateModelTest::testOnlineCorrelatedTrend() { time += bucketLength; } - std::string expected[] = {"[(1950,p2), (2700,p2)]", "[(1950,p1), (2700,p1)]", "[(2400,p4), (2700,p4)]", "[(2400,p3), (2700,p3)]"}; + std::string expected[] = {"[(1950,p2), (2700,p2)]", "[(1950,p1), (2700,p1)]", + "[(2400,p4), (2700,p4)]", "[(2400,p3), (2700,p3)]"}; for (std::size_t i = 0u; i < boost::size(probabilities); ++i) { LOG_DEBUG(<< probabilities[i].print()); std::string actual[2]; for (std::size_t j = 0u; j < 2; ++j) { - actual[j] = - std::string("(") + core::CStringUtils::typeToString(probabilities[i][j].second) + "," + probabilities[i][j].third + ")"; + actual[j] = std::string("(") + + core::CStringUtils::typeToString(probabilities[i][j].second) + + "," + probabilities[i][j].third + ")"; } std::sort(actual, actual + 2); CPPUNIT_ASSERT_EQUAL(expected[i], core::CContainerPrinter::print(actual)); @@ -990,8 +1074,9 @@ void CEventRateModelTest::testPrune() { const core_t::TTime startTime = 1346968800; const core_t::TTime bucketLength = 3600; - const std::string people[] = { - std::string("p1"), std::string("p2"), std::string("p3"), std::string("p4"), std::string("p5"), std::string("p6")}; + const std::string people[] = {std::string("p1"), std::string("p2"), + std::string("p3"), std::string("p4"), + std::string("p5"), std::string("p6")}; TUInt64VecVec eventCounts; eventCounts.push_back(TUInt64Vec(1000u, 0)); @@ -1022,13 +1107,16 @@ void CEventRateModelTest::testPrune() { features.push_back(model_t::E_IndividualTotalBucketCountByPerson); CModelFactory::TDataGathererPtr gatherer; CAnomalyDetectorModel::TModelPtr model_; - makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, gatherer, model_, 0); + makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, + gatherer, model_, 0); CEventRateModel* model = dynamic_cast(model_.get()); CPPUNIT_ASSERT(model); CModelFactory::TDataGathererPtr expectedGatherer; CAnomalyDetectorModel::TModelPtr expectedModel_; - makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, expectedGatherer, expectedModel_, 0); - CEventRateModel* expectedModel = dynamic_cast(expectedModel_.get()); + makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, + expectedGatherer, expectedModel_, 0); + CEventRateModel* expectedModel = + dynamic_cast(expectedModel_.get()); CPPUNIT_ASSERT(expectedModel); TEventDataVec events; @@ -1043,7 +1131,9 @@ void CEventRateModelTest::testPrune() { } } } - std::sort(events.begin(), events.end(), [](const CEventData& lhs, const CEventData& rhs) { return lhs.time() < rhs.time(); }); + std::sort(events.begin(), events.end(), [](const CEventData& lhs, const CEventData& rhs) { + return lhs.time() < rhs.time(); + }); TEventDataVec expectedEvents; expectedEvents.reserve(events.size()); @@ -1052,8 +1142,10 @@ void CEventRateModelTest::testPrune() { mapping[person] = addPerson(people[person], expectedGatherer, m_ResourceMonitor); } for (const auto& event : events) { - if (std::binary_search(expectedPeople.begin(), expectedPeople.end(), event.personId())) { - expectedEvents.push_back(makeEventData(event.time(), mapping[*event.personId()])); + if (std::binary_search(expectedPeople.begin(), expectedPeople.end(), + event.personId())) { + expectedEvents.push_back( + makeEventData(event.time(), mapping[*event.personId()])); } } for (auto person : expectedPeople) { @@ -1066,7 +1158,8 @@ void CEventRateModelTest::testPrune() { model->sample(bucketStart, bucketStart + bucketLength, m_ResourceMonitor); bucketStart += bucketLength; } - addArrival(*gatherer, m_ResourceMonitor, event.time(), gatherer->personName(event.personId().get())); + addArrival(*gatherer, m_ResourceMonitor, event.time(), + gatherer->personName(event.personId().get())); } model->sample(bucketStart, bucketStart + bucketLength, m_ResourceMonitor); size_t maxDimensionBeforePrune(model->dataGatherer().maxDimension()); @@ -1080,7 +1173,8 @@ void CEventRateModelTest::testPrune() { expectedModel->sample(bucketStart, bucketStart + bucketLength, m_ResourceMonitor); bucketStart += bucketLength; } - addArrival(*expectedGatherer, m_ResourceMonitor, event.time(), expectedGatherer->personName(event.personId().get())); + addArrival(*expectedGatherer, m_ResourceMonitor, event.time(), + expectedGatherer->personName(event.personId().get())); } expectedModel->sample(bucketStart, bucketStart + bucketLength, m_ResourceMonitor); @@ -1097,10 +1191,14 @@ void CEventRateModelTest::testPrune() { CPPUNIT_ASSERT(newPid < 6); std::size_t expectedNewPid = addPerson(person, expectedGatherer, m_ResourceMonitor); - addArrival(*gatherer, m_ResourceMonitor, bucketStart + 1, gatherer->personName(newPid)); - addArrival(*gatherer, m_ResourceMonitor, bucketStart + 2000, gatherer->personName(newPid)); - addArrival(*expectedGatherer, m_ResourceMonitor, bucketStart + 1, expectedGatherer->personName(expectedNewPid)); - addArrival(*expectedGatherer, m_ResourceMonitor, bucketStart + 2000, expectedGatherer->personName(expectedNewPid)); + addArrival(*gatherer, m_ResourceMonitor, bucketStart + 1, + gatherer->personName(newPid)); + addArrival(*gatherer, m_ResourceMonitor, bucketStart + 2000, + gatherer->personName(newPid)); + addArrival(*expectedGatherer, m_ResourceMonitor, bucketStart + 1, + expectedGatherer->personName(expectedNewPid)); + addArrival(*expectedGatherer, m_ResourceMonitor, bucketStart + 2000, + expectedGatherer->personName(expectedNewPid)); } model->sample(bucketStart, bucketStart + bucketLength, m_ResourceMonitor); expectedModel->sample(bucketStart, bucketStart + bucketLength, m_ResourceMonitor); @@ -1114,7 +1212,8 @@ void CEventRateModelTest::testPrune() { std::size_t numberOfPeopleBeforePrune(clonedModel->dataGatherer().numberActivePeople()); CPPUNIT_ASSERT(numberOfPeopleBeforePrune > 0); clonedModel->prune(clonedModel->defaultPruneWindow()); - CPPUNIT_ASSERT_EQUAL(numberOfPeopleBeforePrune, clonedModel->dataGatherer().numberActivePeople()); + CPPUNIT_ASSERT_EQUAL(numberOfPeopleBeforePrune, + clonedModel->dataGatherer().numberActivePeople()); } void CEventRateModelTest::testKey() { @@ -1136,9 +1235,12 @@ void CEventRateModelTest::testKey() { for (std::size_t j = 0u; j < boost::size(useNull); ++j) { for (std::size_t k = 0u; k < boost::size(byField); ++k) { for (std::size_t l = 0u; l < boost::size(partitionField); ++l) { - CSearchKey key(++identifier, countFunctions[i], useNull[j], model_t::E_XF_None, "", byField[k], "", partitionField[l]); + CSearchKey key(++identifier, countFunctions[i], useNull[j], + model_t::E_XF_None, "", byField[k], "", + partitionField[l]); - CAnomalyDetectorModelConfig::TModelFactoryCPtr factory = config.factory(key); + CAnomalyDetectorModelConfig::TModelFactoryCPtr factory = + config.factory(key); LOG_DEBUG(<< "expected key = " << key); LOG_DEBUG(<< "actual key = " << factory->searchKey()); @@ -1176,7 +1278,8 @@ void CEventRateModelTest::testModelsWithValueFields() { attribute2Strings = 15; } - for (std::size_t j = 0; j < std::max(attribute1Strings, attribute2Strings); j++) { + for (std::size_t j = 0; + j < std::max(attribute1Strings, attribute2Strings); j++) { std::ostringstream ss1; std::ostringstream ss2; ss1 << "one_plus_" << i << "_" << j; @@ -1202,7 +1305,8 @@ void CEventRateModelTest::testModelsWithValueFields() { } fieldsPerBucket.push_back(fields); } - testModelWithValueField(model_t::E_PopulationUniqueCountByBucketPersonAndAttribute, fieldsPerBucket, strings, m_ResourceMonitor); + testModelWithValueField(model_t::E_PopulationUniqueCountByBucketPersonAndAttribute, + fieldsPerBucket, strings, m_ResourceMonitor); } { // Check E_PopulationInfoContentByBucketPersonAndAttribute @@ -1268,7 +1372,8 @@ void CEventRateModelTest::testModelsWithValueFields() { fieldsPerBucket.push_back(fields); } - testModelWithValueField(model_t::E_PopulationInfoContentByBucketPersonAndAttribute, fieldsPerBucket, strings, m_ResourceMonitor); + testModelWithValueField(model_t::E_PopulationInfoContentByBucketPersonAndAttribute, + fieldsPerBucket, strings, m_ResourceMonitor); } } @@ -1290,7 +1395,8 @@ void CEventRateModelTest::testCountProbabilityCalculationWithInfluence() { factory.features(features); CModelFactory::SGathererInitializationData gathererInitData(startTime); CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(gathererInitData)); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPersonWithInfluence("p", gatherer, m_ResourceMonitor, 1)); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + addPersonWithInfluence("p", gatherer, m_ResourceMonitor, 1)); CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr modelHolder(factory.makeModel(modelInitData)); CEventRateModel* model = dynamic_cast(modelHolder.get()); @@ -1302,16 +1408,19 @@ void CEventRateModelTest::testCountProbabilityCalculationWithInfluence() { expectedEventCounts.back() *= 3; generateEvents(startTime, bucketLength, expectedEventCounts, eventTimes); core_t::TTime endTime = (eventTimes.back() / bucketLength + 1) * bucketLength; - LOG_DEBUG(<< "startTime = " << startTime << ", endTime = " << endTime << ", # events = " << eventTimes.size()); + LOG_DEBUG(<< "startTime = " << startTime << ", endTime = " << endTime + << ", # events = " << eventTimes.size()); SAnnotatedProbability::TStoredStringPtrStoredStringPtrPrDoublePrVec lastInfluencersResult; std::size_t i = 0u, j = 0u; - for (core_t::TTime bucketStartTime = startTime; bucketStartTime < endTime; bucketStartTime += bucketLength, ++j) { + for (core_t::TTime bucketStartTime = startTime; + bucketStartTime < endTime; bucketStartTime += bucketLength, ++j) { core_t::TTime bucketEndTime = bucketStartTime + bucketLength; double count = 0.0; for (; i < eventTimes.size() && eventTimes[i] < bucketEndTime; ++i) { - addArrival(*gatherer, m_ResourceMonitor, eventTimes[i], "p", TOptionalStr("inf1")); + addArrival(*gatherer, m_ResourceMonitor, eventTimes[i], "p", + TOptionalStr("inf1")); count += 1.0; } @@ -1321,15 +1430,18 @@ void CEventRateModelTest::testCountProbabilityCalculationWithInfluence() { SAnnotatedProbability annotatedProbability; CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); - CPPUNIT_ASSERT( - model->computeProbability(0 /*pid*/, bucketStartTime, bucketEndTime, partitioningFields, 1, annotatedProbability)); + CPPUNIT_ASSERT(model->computeProbability(0 /*pid*/, bucketStartTime, + bucketEndTime, partitioningFields, + 1, annotatedProbability)); LOG_DEBUG(<< "probability = " << annotatedProbability.s_Probability); - LOG_DEBUG(<< "influencers = " << core::CContainerPrinter::print(annotatedProbability.s_Influences)); + LOG_DEBUG(<< "influencers = " + << core::CContainerPrinter::print(annotatedProbability.s_Influences)); CPPUNIT_ASSERT(annotatedProbability.s_Probability); lastInfluencersResult = annotatedProbability.s_Influences; } // All the influence should be assigned to our one influencer - CPPUNIT_ASSERT_EQUAL(std::string("[((IF1, inf1), 1)]"), core::CContainerPrinter::print(lastInfluencersResult)); + CPPUNIT_ASSERT_EQUAL(std::string("[((IF1, inf1), 1)]"), + core::CContainerPrinter::print(lastInfluencersResult)); } { // Test single influence name, two influence values @@ -1343,7 +1455,8 @@ void CEventRateModelTest::testCountProbabilityCalculationWithInfluence() { factory.features(features); CModelFactory::SGathererInitializationData gathererInitData(startTime); CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(gathererInitData)); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPersonWithInfluence("p", gatherer, m_ResourceMonitor, 1)); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + addPersonWithInfluence("p", gatherer, m_ResourceMonitor, 1)); CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr modelHolder(factory.makeModel(modelInitData)); CEventRateModel* model = dynamic_cast(modelHolder.get()); @@ -1355,11 +1468,13 @@ void CEventRateModelTest::testCountProbabilityCalculationWithInfluence() { expectedEventCounts.back() *= 3; generateEvents(startTime, bucketLength, expectedEventCounts, eventTimes); core_t::TTime endTime = (eventTimes.back() / bucketLength + 1) * bucketLength; - LOG_DEBUG(<< "startTime = " << startTime << ", endTime = " << endTime << ", # events = " << eventTimes.size()); + LOG_DEBUG(<< "startTime = " << startTime << ", endTime = " << endTime + << ", # events = " << eventTimes.size()); SAnnotatedProbability::TStoredStringPtrStoredStringPtrPrDoublePrVec lastInfluencersResult; std::size_t i = 0u, j = 0u; - for (core_t::TTime bucketStartTime = startTime; bucketStartTime < endTime; bucketStartTime += bucketLength, ++j) { + for (core_t::TTime bucketStartTime = startTime; + bucketStartTime < endTime; bucketStartTime += bucketLength, ++j) { core_t::TTime bucketEndTime = bucketStartTime + bucketLength; double count = 0.0; @@ -1377,10 +1492,12 @@ void CEventRateModelTest::testCountProbabilityCalculationWithInfluence() { SAnnotatedProbability annotatedProbability; CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); - CPPUNIT_ASSERT( - model->computeProbability(0 /*pid*/, bucketStartTime, bucketEndTime, partitioningFields, 1, annotatedProbability)); + CPPUNIT_ASSERT(model->computeProbability(0 /*pid*/, bucketStartTime, + bucketEndTime, partitioningFields, + 1, annotatedProbability)); LOG_DEBUG(<< "probability = " << annotatedProbability.s_Probability); - LOG_DEBUG(<< "influencers = " << core::CContainerPrinter::print(annotatedProbability.s_Influences)); + LOG_DEBUG(<< "influencers = " + << core::CContainerPrinter::print(annotatedProbability.s_Influences)); CPPUNIT_ASSERT(annotatedProbability.s_Probability); lastInfluencersResult = annotatedProbability.s_Influences; } @@ -1388,7 +1505,8 @@ void CEventRateModelTest::testCountProbabilityCalculationWithInfluence() { // is about twice the regular count, each influencer contributes a lot to // the anomaly CPPUNIT_ASSERT_EQUAL(std::size_t(2), lastInfluencersResult.size()); - CPPUNIT_ASSERT_DOUBLES_EQUAL(lastInfluencersResult[0].second, lastInfluencersResult[1].second, 0.05); + CPPUNIT_ASSERT_DOUBLES_EQUAL(lastInfluencersResult[0].second, + lastInfluencersResult[1].second, 0.05); CPPUNIT_ASSERT(lastInfluencersResult[0].second > 0.8); } { @@ -1403,7 +1521,8 @@ void CEventRateModelTest::testCountProbabilityCalculationWithInfluence() { factory.features(features); CModelFactory::SGathererInitializationData gathererInitData(startTime); CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(gathererInitData)); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPersonWithInfluence("p", gatherer, m_ResourceMonitor, 1)); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + addPersonWithInfluence("p", gatherer, m_ResourceMonitor, 1)); CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr modelHolder(factory.makeModel(modelInitData)); CEventRateModel* model = dynamic_cast(modelHolder.get()); @@ -1415,11 +1534,13 @@ void CEventRateModelTest::testCountProbabilityCalculationWithInfluence() { expectedEventCounts.back() *= 6; generateEvents(startTime, bucketLength, expectedEventCounts, eventTimes); core_t::TTime endTime = (eventTimes.back() / bucketLength + 1) * bucketLength; - LOG_DEBUG(<< "startTime = " << startTime << ", endTime = " << endTime << ", # events = " << eventTimes.size()); + LOG_DEBUG(<< "startTime = " << startTime << ", endTime = " << endTime + << ", # events = " << eventTimes.size()); SAnnotatedProbability::TStoredStringPtrStoredStringPtrPrDoublePrVec lastInfluencersResult; std::size_t i = 0u, j = 0u; - for (core_t::TTime bucketStartTime = startTime; bucketStartTime < endTime; bucketStartTime += bucketLength, ++j) { + for (core_t::TTime bucketStartTime = startTime; + bucketStartTime < endTime; bucketStartTime += bucketLength, ++j) { core_t::TTime bucketEndTime = bucketStartTime + bucketLength; double count = 0.0; @@ -1437,10 +1558,12 @@ void CEventRateModelTest::testCountProbabilityCalculationWithInfluence() { SAnnotatedProbability annotatedProbability; CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); - CPPUNIT_ASSERT( - model->computeProbability(0 /*pid*/, bucketStartTime, bucketEndTime, partitioningFields, 1, annotatedProbability)); + CPPUNIT_ASSERT(model->computeProbability(0 /*pid*/, bucketStartTime, + bucketEndTime, partitioningFields, + 1, annotatedProbability)); LOG_DEBUG(<< "probability = " << annotatedProbability.s_Probability); - LOG_DEBUG(<< "influencers = " << core::CContainerPrinter::print(annotatedProbability.s_Influences)); + LOG_DEBUG(<< "influencers = " + << core::CContainerPrinter::print(annotatedProbability.s_Influences)); CPPUNIT_ASSERT(annotatedProbability.s_Probability); lastInfluencersResult = annotatedProbability.s_Influences; } @@ -1449,7 +1572,8 @@ void CEventRateModelTest::testCountProbabilityCalculationWithInfluence() { // the anomaly, but less than the previous test as each the results would // be anomalous even without the contribution from the influencer CPPUNIT_ASSERT_EQUAL(std::size_t(2), lastInfluencersResult.size()); - CPPUNIT_ASSERT_DOUBLES_EQUAL(lastInfluencersResult[0].second, lastInfluencersResult[1].second, 0.05); + CPPUNIT_ASSERT_DOUBLES_EQUAL(lastInfluencersResult[0].second, + lastInfluencersResult[1].second, 0.05); CPPUNIT_ASSERT(lastInfluencersResult[0].second > 0.5); CPPUNIT_ASSERT(lastInfluencersResult[0].second < 0.6); } @@ -1465,7 +1589,8 @@ void CEventRateModelTest::testCountProbabilityCalculationWithInfluence() { factory.features(features); CModelFactory::SGathererInitializationData gathererInitData(startTime); CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(gathererInitData)); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPersonWithInfluence("p", gatherer, m_ResourceMonitor, 1)); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + addPersonWithInfluence("p", gatherer, m_ResourceMonitor, 1)); CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr modelHolder(factory.makeModel(modelInitData)); CEventRateModel* model = dynamic_cast(modelHolder.get()); @@ -1477,11 +1602,13 @@ void CEventRateModelTest::testCountProbabilityCalculationWithInfluence() { expectedEventCounts.back() *= 3; generateEvents(startTime, bucketLength, expectedEventCounts, eventTimes); core_t::TTime endTime = (eventTimes.back() / bucketLength + 1) * bucketLength; - LOG_DEBUG(<< "startTime = " << startTime << ", endTime = " << endTime << ", # events = " << eventTimes.size()); + LOG_DEBUG(<< "startTime = " << startTime << ", endTime = " << endTime + << ", # events = " << eventTimes.size()); SAnnotatedProbability::TStoredStringPtrStoredStringPtrPrDoublePrVec lastInfluencersResult; std::size_t i = 0u, j = 0u; - for (core_t::TTime bucketStartTime = startTime; bucketStartTime < endTime; bucketStartTime += bucketLength, ++j) { + for (core_t::TTime bucketStartTime = startTime; + bucketStartTime < endTime; bucketStartTime += bucketLength, ++j) { core_t::TTime bucketEndTime = bucketStartTime + bucketLength; double count = 0.0; @@ -1502,10 +1629,12 @@ void CEventRateModelTest::testCountProbabilityCalculationWithInfluence() { SAnnotatedProbability annotatedProbability; CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); - CPPUNIT_ASSERT( - model->computeProbability(0 /*pid*/, bucketStartTime, bucketEndTime, partitioningFields, 1, annotatedProbability)); + CPPUNIT_ASSERT(model->computeProbability(0 /*pid*/, bucketStartTime, + bucketEndTime, partitioningFields, + 1, annotatedProbability)); LOG_DEBUG(<< "probability = " << annotatedProbability.s_Probability); - LOG_DEBUG(<< "influencers = " << core::CContainerPrinter::print(annotatedProbability.s_Influences)); + LOG_DEBUG(<< "influencers = " + << core::CContainerPrinter::print(annotatedProbability.s_Influences)); CPPUNIT_ASSERT(annotatedProbability.s_Probability); lastInfluencersResult = annotatedProbability.s_Influences; } @@ -1527,7 +1656,8 @@ void CEventRateModelTest::testCountProbabilityCalculationWithInfluence() { factory.features(features); CModelFactory::SGathererInitializationData gathererInitData(startTime); CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(gathererInitData)); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPersonWithInfluence("p", gatherer, m_ResourceMonitor, 2)); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + addPersonWithInfluence("p", gatherer, m_ResourceMonitor, 2)); CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr modelHolder(factory.makeModel(modelInitData)); CEventRateModel* model = dynamic_cast(modelHolder.get()); @@ -1539,11 +1669,13 @@ void CEventRateModelTest::testCountProbabilityCalculationWithInfluence() { expectedEventCounts.back() *= 3; generateEvents(startTime, bucketLength, expectedEventCounts, eventTimes); core_t::TTime endTime = (eventTimes.back() / bucketLength + 1) * bucketLength; - LOG_DEBUG(<< "startTime = " << startTime << ", endTime = " << endTime << ", # events = " << eventTimes.size()); + LOG_DEBUG(<< "startTime = " << startTime << ", endTime = " << endTime + << ", # events = " << eventTimes.size()); SAnnotatedProbability::TStoredStringPtrStoredStringPtrPrDoublePrVec lastInfluencersResult; std::size_t i = 0u, j = 0u; - for (core_t::TTime bucketStartTime = startTime; bucketStartTime < endTime; bucketStartTime += bucketLength, ++j) { + for (core_t::TTime bucketStartTime = startTime; + bucketStartTime < endTime; bucketStartTime += bucketLength, ++j) { core_t::TTime bucketEndTime = bucketStartTime + bucketLength; double count = 0.0; @@ -1567,10 +1699,12 @@ void CEventRateModelTest::testCountProbabilityCalculationWithInfluence() { SAnnotatedProbability annotatedProbability; CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); - CPPUNIT_ASSERT( - model->computeProbability(0 /*pid*/, bucketStartTime, bucketEndTime, partitioningFields, 1, annotatedProbability)); + CPPUNIT_ASSERT(model->computeProbability(0 /*pid*/, bucketStartTime, + bucketEndTime, partitioningFields, + 1, annotatedProbability)); LOG_DEBUG(<< "probability = " << annotatedProbability.s_Probability); - LOG_DEBUG(<< "influencers = " << core::CContainerPrinter::print(annotatedProbability.s_Influences)); + LOG_DEBUG(<< "influencers = " + << core::CContainerPrinter::print(annotatedProbability.s_Influences)); CPPUNIT_ASSERT(annotatedProbability.s_Probability); lastInfluencersResult = annotatedProbability.s_Influences; } @@ -1580,7 +1714,8 @@ void CEventRateModelTest::testCountProbabilityCalculationWithInfluence() { CPPUNIT_ASSERT_EQUAL(std::string("IF1"), *lastInfluencersResult[0].first.first); CPPUNIT_ASSERT_EQUAL(std::string("inf"), *lastInfluencersResult[0].first.second); CPPUNIT_ASSERT_EQUAL(std::string("IF2"), *lastInfluencersResult[1].first.first); - CPPUNIT_ASSERT_EQUAL(std::string("inf_another"), *lastInfluencersResult[1].first.second); + CPPUNIT_ASSERT_EQUAL(std::string("inf_another"), + *lastInfluencersResult[1].first.second); CPPUNIT_ASSERT(lastInfluencersResult[0].second > 0.99); CPPUNIT_ASSERT(lastInfluencersResult[1].second > 0.99); @@ -1605,7 +1740,9 @@ void CEventRateModelTest::testDistinctCountProbabilityCalculationWithInfluence() factory.features(features); CModelFactory::SGathererInitializationData gathererInitData(startTime); CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(gathererInitData)); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPersonWithInfluence("p", gatherer, m_ResourceMonitor, 1, TOptionalStr("v"))); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + addPersonWithInfluence("p", gatherer, m_ResourceMonitor, + 1, TOptionalStr("v"))); CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr modelHolder(factory.makeModel(modelInitData)); CEventRateModel* model = dynamic_cast(modelHolder.get()); @@ -1617,16 +1754,19 @@ void CEventRateModelTest::testDistinctCountProbabilityCalculationWithInfluence() TUInt64Vec expectedEventCounts = rawEventCounts(); generateEvents(startTime, bucketLength, expectedEventCounts, eventTimes); core_t::TTime endTime = (eventTimes.back() / bucketLength + 1) * bucketLength; - LOG_DEBUG(<< "startTime = " << startTime << ", endTime = " << endTime << ", # events = " << eventTimes.size()); + LOG_DEBUG(<< "startTime = " << startTime << ", endTime = " << endTime + << ", # events = " << eventTimes.size()); SAnnotatedProbability::TStoredStringPtrStoredStringPtrPrDoublePrVec lastInfluencersResult; std::size_t i = 0u, j = 0u; - for (core_t::TTime bucketStartTime = startTime; bucketStartTime < endTime; bucketStartTime += bucketLength, ++j) { + for (core_t::TTime bucketStartTime = startTime; + bucketStartTime < endTime; bucketStartTime += bucketLength, ++j) { core_t::TTime bucketEndTime = bucketStartTime + bucketLength; double count = 0.0; for (; i < eventTimes.size() && eventTimes[i] < bucketEndTime; ++i) { - addArrival(*gatherer, m_ResourceMonitor, eventTimes[i], "p", TOptionalStr("inf1"), TOptionalStr(uniqueValue)); + addArrival(*gatherer, m_ResourceMonitor, eventTimes[i], "p", + TOptionalStr("inf1"), TOptionalStr(uniqueValue)); count += 1.0; } if (i == eventTimes.size()) { @@ -1635,7 +1775,8 @@ void CEventRateModelTest::testDistinctCountProbabilityCalculationWithInfluence() for (std::size_t k = 0; k < 20; k++) { std::stringstream ss; ss << uniqueValue << "_" << k; - addArrival(*gatherer, m_ResourceMonitor, eventTimes[i - 1], "p", TOptionalStr("inf1"), TOptionalStr(ss.str())); + addArrival(*gatherer, m_ResourceMonitor, eventTimes[i - 1], + "p", TOptionalStr("inf1"), TOptionalStr(ss.str())); } } @@ -1645,15 +1786,18 @@ void CEventRateModelTest::testDistinctCountProbabilityCalculationWithInfluence() SAnnotatedProbability annotatedProbability; CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); - CPPUNIT_ASSERT( - model->computeProbability(0 /*pid*/, bucketStartTime, bucketEndTime, partitioningFields, 1, annotatedProbability)); + CPPUNIT_ASSERT(model->computeProbability(0 /*pid*/, bucketStartTime, + bucketEndTime, partitioningFields, + 1, annotatedProbability)); LOG_DEBUG(<< "probability = " << annotatedProbability.s_Probability); - LOG_DEBUG(<< "influencers = " << core::CContainerPrinter::print(annotatedProbability.s_Influences)); + LOG_DEBUG(<< "influencers = " + << core::CContainerPrinter::print(annotatedProbability.s_Influences)); CPPUNIT_ASSERT(annotatedProbability.s_Probability); lastInfluencersResult = annotatedProbability.s_Influences; } // All the influence should be assigned to our one influencer - CPPUNIT_ASSERT_EQUAL(std::string("[((IF1, inf1), 1)]"), core::CContainerPrinter::print(lastInfluencersResult)); + CPPUNIT_ASSERT_EQUAL(std::string("[((IF1, inf1), 1)]"), + core::CContainerPrinter::print(lastInfluencersResult)); } { // Test single influence name, two influence values @@ -1667,7 +1811,9 @@ void CEventRateModelTest::testDistinctCountProbabilityCalculationWithInfluence() factory.features(features); CModelFactory::SGathererInitializationData gathererInitData(startTime); CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(gathererInitData)); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPersonWithInfluence("p", gatherer, m_ResourceMonitor, 1, TOptionalStr("v"))); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + addPersonWithInfluence("p", gatherer, m_ResourceMonitor, + 1, TOptionalStr("v"))); CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr modelHolder(factory.makeModel(modelInitData)); CEventRateModel* model = dynamic_cast(modelHolder.get()); @@ -1679,16 +1825,19 @@ void CEventRateModelTest::testDistinctCountProbabilityCalculationWithInfluence() TUInt64Vec expectedEventCounts = rawEventCounts(); generateEvents(startTime, bucketLength, expectedEventCounts, eventTimes); core_t::TTime endTime = (eventTimes.back() / bucketLength + 1) * bucketLength; - LOG_DEBUG(<< "startTime = " << startTime << ", endTime = " << endTime << ", # events = " << eventTimes.size()); + LOG_DEBUG(<< "startTime = " << startTime << ", endTime = " << endTime + << ", # events = " << eventTimes.size()); SAnnotatedProbability::TStoredStringPtrStoredStringPtrPrDoublePrVec lastInfluencersResult; std::size_t i = 0u, j = 0u; - for (core_t::TTime bucketStartTime = startTime; bucketStartTime < endTime; bucketStartTime += bucketLength, ++j) { + for (core_t::TTime bucketStartTime = startTime; + bucketStartTime < endTime; bucketStartTime += bucketLength, ++j) { core_t::TTime bucketEndTime = bucketStartTime + bucketLength; double count = 0.0; for (; i < eventTimes.size() && eventTimes[i] < bucketEndTime; ++i) { - addArrival(*gatherer, m_ResourceMonitor, eventTimes[i], "p", TOptionalStr("inf1"), TOptionalStr(uniqueValue)); + addArrival(*gatherer, m_ResourceMonitor, eventTimes[i], "p", + TOptionalStr("inf1"), TOptionalStr(uniqueValue)); count += 1.0; } if (i == eventTimes.size()) { @@ -1699,9 +1848,11 @@ void CEventRateModelTest::testDistinctCountProbabilityCalculationWithInfluence() ss << uniqueValue << "_" << k; CEventData d = makeEventData(eventTimes[i - 1], 0, ss.str()); if (k % 2 == 0) { - addArrival(*gatherer, m_ResourceMonitor, eventTimes[i - 1], "p", TOptionalStr("inf1"), TOptionalStr(ss.str())); + addArrival(*gatherer, m_ResourceMonitor, eventTimes[i - 1], "p", + TOptionalStr("inf1"), TOptionalStr(ss.str())); } else { - addArrival(*gatherer, m_ResourceMonitor, eventTimes[i - 1], "p", TOptionalStr("inf2"), TOptionalStr(ss.str())); + addArrival(*gatherer, m_ResourceMonitor, eventTimes[i - 1], "p", + TOptionalStr("inf2"), TOptionalStr(ss.str())); } } } @@ -1712,10 +1863,12 @@ void CEventRateModelTest::testDistinctCountProbabilityCalculationWithInfluence() SAnnotatedProbability annotatedProbability; CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); - CPPUNIT_ASSERT( - model->computeProbability(0 /*pid*/, bucketStartTime, bucketEndTime, partitioningFields, 1, annotatedProbability)); + CPPUNIT_ASSERT(model->computeProbability(0 /*pid*/, bucketStartTime, + bucketEndTime, partitioningFields, + 1, annotatedProbability)); LOG_DEBUG(<< "probability = " << annotatedProbability.s_Probability); - LOG_DEBUG(<< "influencers = " << core::CContainerPrinter::print(annotatedProbability.s_Influences)); + LOG_DEBUG(<< "influencers = " + << core::CContainerPrinter::print(annotatedProbability.s_Influences)); CPPUNIT_ASSERT(annotatedProbability.s_Probability); lastInfluencersResult = annotatedProbability.s_Influences; } @@ -1723,7 +1876,8 @@ void CEventRateModelTest::testDistinctCountProbabilityCalculationWithInfluence() // is about twice the regular count, each influencer contributes a lot to // the anomaly CPPUNIT_ASSERT_EQUAL(std::size_t(2), lastInfluencersResult.size()); - CPPUNIT_ASSERT_DOUBLES_EQUAL(lastInfluencersResult[0].second, lastInfluencersResult[1].second, 0.05); + CPPUNIT_ASSERT_DOUBLES_EQUAL(lastInfluencersResult[0].second, + lastInfluencersResult[1].second, 0.05); CPPUNIT_ASSERT(lastInfluencersResult[0].second > 0.6); } { @@ -1738,7 +1892,9 @@ void CEventRateModelTest::testDistinctCountProbabilityCalculationWithInfluence() factory.features(features); CModelFactory::SGathererInitializationData gathererInitData(startTime); CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(gathererInitData)); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPersonWithInfluence("p", gatherer, m_ResourceMonitor, 1, TOptionalStr("v"))); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + addPersonWithInfluence("p", gatherer, m_ResourceMonitor, + 1, TOptionalStr("v"))); CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr modelHolder(factory.makeModel(modelInitData)); CEventRateModel* model = dynamic_cast(modelHolder.get()); @@ -1750,16 +1906,19 @@ void CEventRateModelTest::testDistinctCountProbabilityCalculationWithInfluence() TUInt64Vec expectedEventCounts = rawEventCounts(); generateEvents(startTime, bucketLength, expectedEventCounts, eventTimes); core_t::TTime endTime = (eventTimes.back() / bucketLength + 1) * bucketLength; - LOG_DEBUG(<< "startTime = " << startTime << ", endTime = " << endTime << ", # events = " << eventTimes.size()); + LOG_DEBUG(<< "startTime = " << startTime << ", endTime = " << endTime + << ", # events = " << eventTimes.size()); SAnnotatedProbability::TStoredStringPtrStoredStringPtrPrDoublePrVec lastInfluencersResult; std::size_t i = 0u, j = 0u; - for (core_t::TTime bucketStartTime = startTime; bucketStartTime < endTime; bucketStartTime += bucketLength, ++j) { + for (core_t::TTime bucketStartTime = startTime; + bucketStartTime < endTime; bucketStartTime += bucketLength, ++j) { core_t::TTime bucketEndTime = bucketStartTime + bucketLength; double count = 0.0; for (; i < eventTimes.size() && eventTimes[i] < bucketEndTime; ++i) { - addArrival(*gatherer, m_ResourceMonitor, eventTimes[i], "p", TOptionalStr("inf1"), TOptionalStr(uniqueValue)); + addArrival(*gatherer, m_ResourceMonitor, eventTimes[i], "p", + TOptionalStr("inf1"), TOptionalStr(uniqueValue)); count += 1.0; } if (i == eventTimes.size()) { @@ -1769,9 +1928,11 @@ void CEventRateModelTest::testDistinctCountProbabilityCalculationWithInfluence() std::stringstream ss; ss << uniqueValue << "_" << k; if (k == 1) { - addArrival(*gatherer, m_ResourceMonitor, eventTimes[i - 1], "p", TOptionalStr("inf2"), TOptionalStr(ss.str())); + addArrival(*gatherer, m_ResourceMonitor, eventTimes[i - 1], "p", + TOptionalStr("inf2"), TOptionalStr(ss.str())); } else { - addArrival(*gatherer, m_ResourceMonitor, eventTimes[i - 1], "p", TOptionalStr("inf1"), TOptionalStr(ss.str())); + addArrival(*gatherer, m_ResourceMonitor, eventTimes[i - 1], "p", + TOptionalStr("inf1"), TOptionalStr(ss.str())); } } } @@ -1782,10 +1943,12 @@ void CEventRateModelTest::testDistinctCountProbabilityCalculationWithInfluence() SAnnotatedProbability annotatedProbability; CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); - CPPUNIT_ASSERT( - model->computeProbability(0 /*pid*/, bucketStartTime, bucketEndTime, partitioningFields, 1, annotatedProbability)); + CPPUNIT_ASSERT(model->computeProbability(0 /*pid*/, bucketStartTime, + bucketEndTime, partitioningFields, + 1, annotatedProbability)); LOG_DEBUG(<< "probability = " << annotatedProbability.s_Probability); - LOG_DEBUG(<< "influencers = " << core::CContainerPrinter::print(annotatedProbability.s_Influences)); + LOG_DEBUG(<< "influencers = " + << core::CContainerPrinter::print(annotatedProbability.s_Influences)); CPPUNIT_ASSERT(annotatedProbability.s_Probability); lastInfluencersResult = annotatedProbability.s_Influences; } @@ -1807,7 +1970,9 @@ void CEventRateModelTest::testDistinctCountProbabilityCalculationWithInfluence() factory.features(features); CModelFactory::SGathererInitializationData gathererInitData(startTime); CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(gathererInitData)); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPersonWithInfluence("p", gatherer, m_ResourceMonitor, 2, TOptionalStr("v"))); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + addPersonWithInfluence("p", gatherer, m_ResourceMonitor, + 2, TOptionalStr("v"))); CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr modelHolder(factory.makeModel(modelInitData)); CEventRateModel* model = dynamic_cast(modelHolder.get()); @@ -1819,21 +1984,19 @@ void CEventRateModelTest::testDistinctCountProbabilityCalculationWithInfluence() TUInt64Vec expectedEventCounts = rawEventCounts(); generateEvents(startTime, bucketLength, expectedEventCounts, eventTimes); core_t::TTime endTime = (eventTimes.back() / bucketLength + 1) * bucketLength; - LOG_DEBUG(<< "startTime = " << startTime << ", endTime = " << endTime << ", # events = " << eventTimes.size()); + LOG_DEBUG(<< "startTime = " << startTime << ", endTime = " << endTime + << ", # events = " << eventTimes.size()); SAnnotatedProbability::TStoredStringPtrStoredStringPtrPrDoublePrVec lastInfluencersResult; std::size_t i = 0u, j = 0u; - for (core_t::TTime bucketStartTime = startTime; bucketStartTime < endTime; bucketStartTime += bucketLength, ++j) { + for (core_t::TTime bucketStartTime = startTime; + bucketStartTime < endTime; bucketStartTime += bucketLength, ++j) { core_t::TTime bucketEndTime = bucketStartTime + bucketLength; double count = 0.0; for (; i < eventTimes.size() && eventTimes[i] < bucketEndTime; ++i) { - addArrival(*gatherer, - m_ResourceMonitor, - eventTimes[i], - "p", - TOptionalStr("inf1"), - TOptionalStr("inf1"), + addArrival(*gatherer, m_ResourceMonitor, eventTimes[i], "p", + TOptionalStr("inf1"), TOptionalStr("inf1"), TOptionalStr(uniqueValue)); count += 1.0; } @@ -1854,12 +2017,8 @@ void CEventRateModelTest::testDistinctCountProbabilityCalculationWithInfluence() LOG_DEBUG(<< "Inf1 = " << inf1); LOG_DEBUG(<< "Inf2 = " << inf2); LOG_DEBUG(<< "Value = " << ss1.str()); - addArrival(*gatherer, - m_ResourceMonitor, - eventTimes[i - 1], - "p", - TOptionalStr(inf1), - TOptionalStr(inf2), + addArrival(*gatherer, m_ResourceMonitor, eventTimes[i - 1], + "p", TOptionalStr(inf1), TOptionalStr(inf2), TOptionalStr(ss1.str())); } } @@ -1870,10 +2029,12 @@ void CEventRateModelTest::testDistinctCountProbabilityCalculationWithInfluence() SAnnotatedProbability annotatedProbability; CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); - CPPUNIT_ASSERT( - model->computeProbability(0 /*pid*/, bucketStartTime, bucketEndTime, partitioningFields, 1, annotatedProbability)); + CPPUNIT_ASSERT(model->computeProbability(0 /*pid*/, bucketStartTime, + bucketEndTime, partitioningFields, + 1, annotatedProbability)); LOG_DEBUG(<< "probability = " << annotatedProbability.s_Probability); - LOG_DEBUG(<< "influencers = " << core::CContainerPrinter::print(annotatedProbability.s_Influences)); + LOG_DEBUG(<< "influencers = " + << core::CContainerPrinter::print(annotatedProbability.s_Influences)); CPPUNIT_ASSERT(annotatedProbability.s_Probability); lastInfluencersResult = annotatedProbability.s_Influences; } @@ -1883,7 +2044,8 @@ void CEventRateModelTest::testDistinctCountProbabilityCalculationWithInfluence() CPPUNIT_ASSERT_EQUAL(std::string("IF1"), *lastInfluencersResult[0].first.first); CPPUNIT_ASSERT_EQUAL(std::string("inf"), *lastInfluencersResult[0].first.second); CPPUNIT_ASSERT_EQUAL(std::string("IF2"), *lastInfluencersResult[1].first.first); - CPPUNIT_ASSERT_EQUAL(std::string("inf_another"), *lastInfluencersResult[1].first.second); + CPPUNIT_ASSERT_EQUAL(std::string("inf_another"), + *lastInfluencersResult[1].first.second); CPPUNIT_ASSERT(lastInfluencersResult[0].second > 0.8); CPPUNIT_ASSERT(lastInfluencersResult[1].second > 0.8); @@ -1904,11 +2066,16 @@ void CEventRateModelTest::testOnlineRareWithInfluence() { factory.features(features); CModelFactory::SGathererInitializationData gathererInitData(startTime); CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(gathererInitData)); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPersonWithInfluence("p1", gatherer, m_ResourceMonitor, 1)); - CPPUNIT_ASSERT_EQUAL(std::size_t(1), addPersonWithInfluence("p2", gatherer, m_ResourceMonitor, 1)); - CPPUNIT_ASSERT_EQUAL(std::size_t(2), addPersonWithInfluence("p3", gatherer, m_ResourceMonitor, 1)); - CPPUNIT_ASSERT_EQUAL(std::size_t(3), addPersonWithInfluence("p4", gatherer, m_ResourceMonitor, 1)); - CPPUNIT_ASSERT_EQUAL(std::size_t(4), addPersonWithInfluence("p5", gatherer, m_ResourceMonitor, 1)); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + addPersonWithInfluence("p1", gatherer, m_ResourceMonitor, 1)); + CPPUNIT_ASSERT_EQUAL(std::size_t(1), + addPersonWithInfluence("p2", gatherer, m_ResourceMonitor, 1)); + CPPUNIT_ASSERT_EQUAL(std::size_t(2), + addPersonWithInfluence("p3", gatherer, m_ResourceMonitor, 1)); + CPPUNIT_ASSERT_EQUAL(std::size_t(3), + addPersonWithInfluence("p4", gatherer, m_ResourceMonitor, 1)); + CPPUNIT_ASSERT_EQUAL(std::size_t(4), + addPersonWithInfluence("p5", gatherer, m_ResourceMonitor, 1)); CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr modelHolder(factory.makeModel(modelInitData)); CEventRateModel* model = dynamic_cast(modelHolder.get()); @@ -1919,19 +2086,28 @@ void CEventRateModelTest::testOnlineRareWithInfluence() { core_t::TTime time = startTime; for (/**/; time < startTime + 50 * bucketLength; time += bucketLength) { - addArrival(*gatherer, m_ResourceMonitor, time + bucketLength / 2, "p1", TOptionalStr("inf1")); - addArrival(*gatherer, m_ResourceMonitor, time + bucketLength / 2, "p2", TOptionalStr("inf1")); - addArrival(*gatherer, m_ResourceMonitor, time + bucketLength / 2, "p3", TOptionalStr("inf1")); - addArrival(*gatherer, m_ResourceMonitor, time + bucketLength / 2, "p4", TOptionalStr("inf1")); + addArrival(*gatherer, m_ResourceMonitor, time + bucketLength / 2, "p1", + TOptionalStr("inf1")); + addArrival(*gatherer, m_ResourceMonitor, time + bucketLength / 2, "p2", + TOptionalStr("inf1")); + addArrival(*gatherer, m_ResourceMonitor, time + bucketLength / 2, "p3", + TOptionalStr("inf1")); + addArrival(*gatherer, m_ResourceMonitor, time + bucketLength / 2, "p4", + TOptionalStr("inf1")); model->sample(time, time + bucketLength, m_ResourceMonitor); } { - addArrival(*gatherer, m_ResourceMonitor, time + bucketLength / 2, "p1", TOptionalStr("inf1")); - addArrival(*gatherer, m_ResourceMonitor, time + bucketLength / 2, "p2", TOptionalStr("inf1")); - addArrival(*gatherer, m_ResourceMonitor, time + bucketLength / 2, "p3", TOptionalStr("inf1")); - addArrival(*gatherer, m_ResourceMonitor, time + bucketLength / 2, "p4", TOptionalStr("inf1")); - addArrival(*gatherer, m_ResourceMonitor, time + bucketLength / 2, "p5", TOptionalStr("inf2")); + addArrival(*gatherer, m_ResourceMonitor, time + bucketLength / 2, "p1", + TOptionalStr("inf1")); + addArrival(*gatherer, m_ResourceMonitor, time + bucketLength / 2, "p2", + TOptionalStr("inf1")); + addArrival(*gatherer, m_ResourceMonitor, time + bucketLength / 2, "p3", + TOptionalStr("inf1")); + addArrival(*gatherer, m_ResourceMonitor, time + bucketLength / 2, "p4", + TOptionalStr("inf1")); + addArrival(*gatherer, m_ResourceMonitor, time + bucketLength / 2, "p5", + TOptionalStr("inf2")); } model->sample(time, time + bucketLength, m_ResourceMonitor); @@ -1939,9 +2115,11 @@ void CEventRateModelTest::testOnlineRareWithInfluence() { for (std::size_t pid = 0u; pid < 5; ++pid) { SAnnotatedProbability annotatedProbability; CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); - CPPUNIT_ASSERT(model->computeProbability(pid, time, time + bucketLength, partitioningFields, 1, annotatedProbability)); + CPPUNIT_ASSERT(model->computeProbability(pid, time, time + bucketLength, partitioningFields, + 1, annotatedProbability)); LOG_DEBUG(<< "probability = " << annotatedProbability.s_Probability); - LOG_DEBUG(<< "influencers = " << core::CContainerPrinter::print(annotatedProbability.s_Influences)); + LOG_DEBUG(<< "influencers = " + << core::CContainerPrinter::print(annotatedProbability.s_Influences)); lastInfluencersResult = annotatedProbability.s_Influences; probabilities.push_back(annotatedProbability.s_Probability); } @@ -1972,7 +2150,8 @@ void CEventRateModelTest::testOnlineRareWithInfluence() { CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - CAnomalyDetectorModel::TModelPtr restoredModelPtr(factory.makeModel(modelInitData, traverser)); + CAnomalyDetectorModel::TModelPtr restoredModelPtr( + factory.makeModel(modelInitData, traverser)); // The XML representation of the new filter should be the same as the original std::string newXml; @@ -1998,7 +2177,8 @@ void CEventRateModelTest::testSkipSampling() { model_t::TFeatureVec features(1u, model_t::E_IndividualCountByBucketAndPerson); CModelFactory::TDataGathererPtr gathererNoGap; CAnomalyDetectorModel::TModelPtr modelNoGap_; - makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, gathererNoGap, modelNoGap_, 2); + makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, + gathererNoGap, modelNoGap_, 2); CEventRateModel* modelNoGap = dynamic_cast(modelNoGap_.get()); // p1: |1|1|1| @@ -2013,7 +2193,8 @@ void CEventRateModelTest::testSkipSampling() { CAnomalyDetectorModel::TModelPtr modelWithGap_; CModelFactory::TDataGathererPtr gathererWithGap; - makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, gathererWithGap, modelWithGap_, 2); + makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, + gathererWithGap, modelWithGap_, 2); CEventRateModel* modelWithGap = dynamic_cast(modelWithGap_.get()); // p1: |1|1|0|0|0|0|0|0|0|0|1|1| @@ -2046,7 +2227,8 @@ void CEventRateModelTest::testSkipSampling() { modelWithGap->details()->model(model_t::E_IndividualCountByBucketAndPerson, 0)) ->residualModel() .checksum(), - static_cast(modelNoGap->details()->model(model_t::E_IndividualCountByBucketAndPerson, 0)) + static_cast( + modelNoGap->details()->model(model_t::E_IndividualCountByBucketAndPerson, 0)) ->residualModel() .checksum()); CPPUNIT_ASSERT_EQUAL( @@ -2054,7 +2236,8 @@ void CEventRateModelTest::testSkipSampling() { modelWithGap->details()->model(model_t::E_IndividualCountByBucketAndPerson, 1)) ->residualModel() .checksum(), - static_cast(modelNoGap->details()->model(model_t::E_IndividualCountByBucketAndPerson, 1)) + static_cast( + modelNoGap->details()->model(model_t::E_IndividualCountByBucketAndPerson, 1)) ->residualModel() .checksum()); @@ -2082,7 +2265,8 @@ void CEventRateModelTest::testExplicitNulls() { model_t::TFeatureVec features(1u, model_t::E_IndividualCountByBucketAndPerson); CModelFactory::TDataGathererPtr gathererSkipGap; CAnomalyDetectorModel::TModelPtr modelSkipGap_; - makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, gathererSkipGap, modelSkipGap_, 0); + makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, + gathererSkipGap, modelSkipGap_, 0); CEventRateModel* modelSkipGap = dynamic_cast(modelSkipGap_.get()); // The idea here is to compare a model that has a gap skipped against a model @@ -2090,62 +2274,85 @@ void CEventRateModelTest::testExplicitNulls() { // p1: |1|1|1|X|X|1| // p2: |1|1|0|X|X|0| - addArrival(*gathererSkipGap, m_ResourceMonitor, 100, "p1", TOptionalStr(), TOptionalStr(), TOptionalStr("1")); - addArrival(*gathererSkipGap, m_ResourceMonitor, 100, "p2", TOptionalStr(), TOptionalStr(), TOptionalStr("1")); + addArrival(*gathererSkipGap, m_ResourceMonitor, 100, "p1", TOptionalStr(), + TOptionalStr(), TOptionalStr("1")); + addArrival(*gathererSkipGap, m_ResourceMonitor, 100, "p2", TOptionalStr(), + TOptionalStr(), TOptionalStr("1")); modelSkipGap->sample(100, 200, m_ResourceMonitor); - addArrival(*gathererSkipGap, m_ResourceMonitor, 200, "p1", TOptionalStr(), TOptionalStr(), TOptionalStr("1")); - addArrival(*gathererSkipGap, m_ResourceMonitor, 200, "p2", TOptionalStr(), TOptionalStr(), TOptionalStr("1")); + addArrival(*gathererSkipGap, m_ResourceMonitor, 200, "p1", TOptionalStr(), + TOptionalStr(), TOptionalStr("1")); + addArrival(*gathererSkipGap, m_ResourceMonitor, 200, "p2", TOptionalStr(), + TOptionalStr(), TOptionalStr("1")); modelSkipGap->sample(200, 300, m_ResourceMonitor); - addArrival(*gathererSkipGap, m_ResourceMonitor, 300, "p1", TOptionalStr(), TOptionalStr(), TOptionalStr("1")); + addArrival(*gathererSkipGap, m_ResourceMonitor, 300, "p1", TOptionalStr(), + TOptionalStr(), TOptionalStr("1")); modelSkipGap->sample(300, 400, m_ResourceMonitor); modelSkipGap->skipSampling(600); - addArrival(*gathererSkipGap, m_ResourceMonitor, 600, "p1", TOptionalStr(), TOptionalStr(), TOptionalStr("1")); + addArrival(*gathererSkipGap, m_ResourceMonitor, 600, "p1", TOptionalStr(), + TOptionalStr(), TOptionalStr("1")); modelSkipGap->sample(600, 700, m_ResourceMonitor); CModelFactory::TDataGathererPtr gathererExNull; CAnomalyDetectorModel::TModelPtr modelExNullGap_; - makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, gathererExNull, modelExNullGap_, 0); - CEventRateModel* modelExNullGap = dynamic_cast(modelExNullGap_.get()); + makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, + gathererExNull, modelExNullGap_, 0); + CEventRateModel* modelExNullGap = + dynamic_cast(modelExNullGap_.get()); // p1: |1,"",null|1|1|null|null|1| // p2: |1,""|1|0|null|null|0| - addArrival(*gathererExNull, m_ResourceMonitor, 100, "p1", TOptionalStr(), TOptionalStr(), TOptionalStr("1")); - addArrival(*gathererExNull, m_ResourceMonitor, 100, "p1", TOptionalStr(), TOptionalStr(), TOptionalStr("")); - addArrival(*gathererExNull, m_ResourceMonitor, 100, "p1", TOptionalStr(), TOptionalStr(), TOptionalStr("null")); - addArrival(*gathererExNull, m_ResourceMonitor, 100, "p2", TOptionalStr(), TOptionalStr(), TOptionalStr("1")); - addArrival(*gathererExNull, m_ResourceMonitor, 100, "p2", TOptionalStr(), TOptionalStr(), TOptionalStr("")); + addArrival(*gathererExNull, m_ResourceMonitor, 100, "p1", TOptionalStr(), + TOptionalStr(), TOptionalStr("1")); + addArrival(*gathererExNull, m_ResourceMonitor, 100, "p1", TOptionalStr(), + TOptionalStr(), TOptionalStr("")); + addArrival(*gathererExNull, m_ResourceMonitor, 100, "p1", TOptionalStr(), + TOptionalStr(), TOptionalStr("null")); + addArrival(*gathererExNull, m_ResourceMonitor, 100, "p2", TOptionalStr(), + TOptionalStr(), TOptionalStr("1")); + addArrival(*gathererExNull, m_ResourceMonitor, 100, "p2", TOptionalStr(), + TOptionalStr(), TOptionalStr("")); modelExNullGap->sample(100, 200, m_ResourceMonitor); - addArrival(*gathererExNull, m_ResourceMonitor, 200, "p1", TOptionalStr(), TOptionalStr(), TOptionalStr("1")); - addArrival(*gathererExNull, m_ResourceMonitor, 200, "p2", TOptionalStr(), TOptionalStr(), TOptionalStr("1")); + addArrival(*gathererExNull, m_ResourceMonitor, 200, "p1", TOptionalStr(), + TOptionalStr(), TOptionalStr("1")); + addArrival(*gathererExNull, m_ResourceMonitor, 200, "p2", TOptionalStr(), + TOptionalStr(), TOptionalStr("1")); modelExNullGap->sample(200, 300, m_ResourceMonitor); - addArrival(*gathererExNull, m_ResourceMonitor, 300, "p1", TOptionalStr(), TOptionalStr(), TOptionalStr("1")); + addArrival(*gathererExNull, m_ResourceMonitor, 300, "p1", TOptionalStr(), + TOptionalStr(), TOptionalStr("1")); modelExNullGap->sample(300, 400, m_ResourceMonitor); - addArrival(*gathererExNull, m_ResourceMonitor, 400, "p1", TOptionalStr(), TOptionalStr(), TOptionalStr("null")); - addArrival(*gathererExNull, m_ResourceMonitor, 400, "p2", TOptionalStr(), TOptionalStr(), TOptionalStr("null")); + addArrival(*gathererExNull, m_ResourceMonitor, 400, "p1", TOptionalStr(), + TOptionalStr(), TOptionalStr("null")); + addArrival(*gathererExNull, m_ResourceMonitor, 400, "p2", TOptionalStr(), + TOptionalStr(), TOptionalStr("null")); modelExNullGap->sample(400, 500, m_ResourceMonitor); - addArrival(*gathererExNull, m_ResourceMonitor, 500, "p1", TOptionalStr(), TOptionalStr(), TOptionalStr("null")); - addArrival(*gathererExNull, m_ResourceMonitor, 500, "p2", TOptionalStr(), TOptionalStr(), TOptionalStr("null")); + addArrival(*gathererExNull, m_ResourceMonitor, 500, "p1", TOptionalStr(), + TOptionalStr(), TOptionalStr("null")); + addArrival(*gathererExNull, m_ResourceMonitor, 500, "p2", TOptionalStr(), + TOptionalStr(), TOptionalStr("null")); modelExNullGap->sample(500, 600, m_ResourceMonitor); - addArrival(*gathererExNull, m_ResourceMonitor, 600, "p1", TOptionalStr(), TOptionalStr(), TOptionalStr("1")); + addArrival(*gathererExNull, m_ResourceMonitor, 600, "p1", TOptionalStr(), + TOptionalStr(), TOptionalStr("1")); modelExNullGap->sample(600, 700, m_ResourceMonitor); // Check priors are the same - CPPUNIT_ASSERT_EQUAL(static_cast( - modelExNullGap->details()->model(model_t::E_IndividualCountByBucketAndPerson, 0)) - ->residualModel() - .checksum(), - static_cast( - modelSkipGap->details()->model(model_t::E_IndividualCountByBucketAndPerson, 0)) - ->residualModel() - .checksum()); - CPPUNIT_ASSERT_EQUAL(static_cast( - modelExNullGap->details()->model(model_t::E_IndividualCountByBucketAndPerson, 1)) - ->residualModel() - .checksum(), - static_cast( - modelSkipGap->details()->model(model_t::E_IndividualCountByBucketAndPerson, 1)) - ->residualModel() - .checksum()); + CPPUNIT_ASSERT_EQUAL( + static_cast( + modelExNullGap->details()->model(model_t::E_IndividualCountByBucketAndPerson, 0)) + ->residualModel() + .checksum(), + static_cast( + modelSkipGap->details()->model(model_t::E_IndividualCountByBucketAndPerson, 0)) + ->residualModel() + .checksum()); + CPPUNIT_ASSERT_EQUAL( + static_cast( + modelExNullGap->details()->model(model_t::E_IndividualCountByBucketAndPerson, 1)) + ->residualModel() + .checksum(), + static_cast( + modelSkipGap->details()->model(model_t::E_IndividualCountByBucketAndPerson, 1)) + ->residualModel() + .checksum()); } void CEventRateModelTest::testInterimCorrections() { @@ -2160,7 +2367,8 @@ void CEventRateModelTest::testInterimCorrections() { model_t::TFeatureVec features(1u, model_t::E_IndividualCountByBucketAndPerson); CModelFactory::TDataGathererPtr gatherer; CAnomalyDetectorModel::TModelPtr model_; - makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, gatherer, model_, 3); + makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, + gatherer, model_, 3); CEventRateModel* model = dynamic_cast(model_.get()); test::CRandomNumbers rng; @@ -2193,20 +2401,27 @@ void CEventRateModelTest::testInterimCorrections() { CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); - model_t::CResultType type(model_t::CResultType::E_Unconditional | model_t::CResultType::E_Interim); + model_t::CResultType type(model_t::CResultType::E_Unconditional | + model_t::CResultType::E_Interim); SAnnotatedProbability annotatedProbability1; annotatedProbability1.s_ResultType = type; - CPPUNIT_ASSERT(model->computeProbability(0 /*pid*/, now, now + bucketLength, partitioningFields, 1, annotatedProbability1)); + CPPUNIT_ASSERT(model->computeProbability(0 /*pid*/, now, now + bucketLength, partitioningFields, + 1, annotatedProbability1)); SAnnotatedProbability annotatedProbability2; annotatedProbability2.s_ResultType = type; - CPPUNIT_ASSERT(model->computeProbability(1 /*pid*/, now, now + bucketLength, partitioningFields, 1, annotatedProbability2)); + CPPUNIT_ASSERT(model->computeProbability(1 /*pid*/, now, now + bucketLength, partitioningFields, + 1, annotatedProbability2)); SAnnotatedProbability annotatedProbability3; annotatedProbability3.s_ResultType = type; - CPPUNIT_ASSERT(model->computeProbability(2 /*pid*/, now, now + bucketLength, partitioningFields, 1, annotatedProbability3)); + CPPUNIT_ASSERT(model->computeProbability(2 /*pid*/, now, now + bucketLength, partitioningFields, + 1, annotatedProbability3)); - TDouble1Vec p1Baseline = model->baselineBucketMean(model_t::E_IndividualCountByBucketAndPerson, 0, 0, type, NO_CORRELATES, now); - TDouble1Vec p2Baseline = model->baselineBucketMean(model_t::E_IndividualCountByBucketAndPerson, 1, 0, type, NO_CORRELATES, now); - TDouble1Vec p3Baseline = model->baselineBucketMean(model_t::E_IndividualCountByBucketAndPerson, 2, 0, type, NO_CORRELATES, now); + TDouble1Vec p1Baseline = model->baselineBucketMean( + model_t::E_IndividualCountByBucketAndPerson, 0, 0, type, NO_CORRELATES, now); + TDouble1Vec p2Baseline = model->baselineBucketMean( + model_t::E_IndividualCountByBucketAndPerson, 1, 0, type, NO_CORRELATES, now); + TDouble1Vec p3Baseline = model->baselineBucketMean( + model_t::E_IndividualCountByBucketAndPerson, 2, 0, type, NO_CORRELATES, now); LOG_DEBUG(<< "p1 probability = " << annotatedProbability1.s_Probability); LOG_DEBUG(<< "p2 probability = " << annotatedProbability2.s_Probability); @@ -2233,13 +2448,19 @@ void CEventRateModelTest::testInterimCorrections() { } model->sampleBucketStatistics(now, now + bucketLength, m_ResourceMonitor); - CPPUNIT_ASSERT(model->computeProbability(0 /*pid*/, now, now + bucketLength, partitioningFields, 0, annotatedProbability1)); - CPPUNIT_ASSERT(model->computeProbability(1 /*pid*/, now, now + bucketLength, partitioningFields, 0, annotatedProbability2)); - CPPUNIT_ASSERT(model->computeProbability(2 /*pid*/, now, now + bucketLength, partitioningFields, 0, annotatedProbability3)); + CPPUNIT_ASSERT(model->computeProbability(0 /*pid*/, now, now + bucketLength, partitioningFields, + 0, annotatedProbability1)); + CPPUNIT_ASSERT(model->computeProbability(1 /*pid*/, now, now + bucketLength, partitioningFields, + 0, annotatedProbability2)); + CPPUNIT_ASSERT(model->computeProbability(2 /*pid*/, now, now + bucketLength, partitioningFields, + 0, annotatedProbability3)); - p1Baseline = model->baselineBucketMean(model_t::E_IndividualCountByBucketAndPerson, 0, 0, type, NO_CORRELATES, now); - p2Baseline = model->baselineBucketMean(model_t::E_IndividualCountByBucketAndPerson, 1, 0, type, NO_CORRELATES, now); - p3Baseline = model->baselineBucketMean(model_t::E_IndividualCountByBucketAndPerson, 2, 0, type, NO_CORRELATES, now); + p1Baseline = model->baselineBucketMean(model_t::E_IndividualCountByBucketAndPerson, + 0, 0, type, NO_CORRELATES, now); + p2Baseline = model->baselineBucketMean(model_t::E_IndividualCountByBucketAndPerson, + 1, 0, type, NO_CORRELATES, now); + p3Baseline = model->baselineBucketMean(model_t::E_IndividualCountByBucketAndPerson, + 2, 0, type, NO_CORRELATES, now); LOG_DEBUG(<< "p1 probability = " << annotatedProbability1.s_Probability); LOG_DEBUG(<< "p2 probability = " << annotatedProbability2.s_Probability); @@ -2268,7 +2489,8 @@ void CEventRateModelTest::testInterimCorrectionsWithCorrelations() { model_t::TFeatureVec features(1u, model_t::E_IndividualCountByBucketAndPerson); CModelFactory::TDataGathererPtr gatherer; CAnomalyDetectorModel::TModelPtr model_; - makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, gatherer, model_, 3); + makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, + gatherer, model_, 3); CEventRateModel* model = dynamic_cast(model_.get()); core_t::TTime now = startTime; @@ -2301,23 +2523,30 @@ void CEventRateModelTest::testInterimCorrectionsWithCorrelations() { model->sampleBucketStatistics(now, now + bucketLength, m_ResourceMonitor); CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); - model_t::CResultType type(model_t::CResultType::E_Conditional | model_t::CResultType::E_Interim); + model_t::CResultType type(model_t::CResultType::E_Conditional | + model_t::CResultType::E_Interim); SAnnotatedProbability annotatedProbability1; annotatedProbability1.s_ResultType = type; - CPPUNIT_ASSERT(model->computeProbability(0 /*pid*/, now, now + bucketLength, partitioningFields, 1, annotatedProbability1)); + CPPUNIT_ASSERT(model->computeProbability(0 /*pid*/, now, now + bucketLength, partitioningFields, + 1, annotatedProbability1)); SAnnotatedProbability annotatedProbability2; annotatedProbability2.s_ResultType = type; - CPPUNIT_ASSERT(model->computeProbability(1 /*pid*/, now, now + bucketLength, partitioningFields, 1, annotatedProbability2)); + CPPUNIT_ASSERT(model->computeProbability(1 /*pid*/, now, now + bucketLength, partitioningFields, + 1, annotatedProbability2)); SAnnotatedProbability annotatedProbability3; annotatedProbability3.s_ResultType = type; - CPPUNIT_ASSERT(model->computeProbability(2 /*pid*/, now, now + bucketLength, partitioningFields, 1, annotatedProbability3)); + CPPUNIT_ASSERT(model->computeProbability(2 /*pid*/, now, now + bucketLength, partitioningFields, + 1, annotatedProbability3)); TDouble1Vec p1Baseline = model->baselineBucketMean( - model_t::E_IndividualCountByBucketAndPerson, 0, 0, type, annotatedProbability1.s_AttributeProbabilities[0].s_Correlated, now); + model_t::E_IndividualCountByBucketAndPerson, 0, 0, type, + annotatedProbability1.s_AttributeProbabilities[0].s_Correlated, now); TDouble1Vec p2Baseline = model->baselineBucketMean( - model_t::E_IndividualCountByBucketAndPerson, 1, 0, type, annotatedProbability2.s_AttributeProbabilities[0].s_Correlated, now); + model_t::E_IndividualCountByBucketAndPerson, 1, 0, type, + annotatedProbability2.s_AttributeProbabilities[0].s_Correlated, now); TDouble1Vec p3Baseline = model->baselineBucketMean( - model_t::E_IndividualCountByBucketAndPerson, 2, 0, type, annotatedProbability3.s_AttributeProbabilities[0].s_Correlated, now); + model_t::E_IndividualCountByBucketAndPerson, 2, 0, type, + annotatedProbability3.s_AttributeProbabilities[0].s_Correlated, now); LOG_DEBUG(<< "p1 probability = " << annotatedProbability1.s_Probability); LOG_DEBUG(<< "p2 probability = " << annotatedProbability2.s_Probability); @@ -2348,20 +2577,24 @@ void CEventRateModelTest::testSummaryCountZeroRecordsAreIgnored() { factory.features(features); CModelFactory::SGathererInitializationData gathererWithZerosInitData(startTime); - CModelFactory::TDataGathererPtr gathererWithZeros(factory.makeDataGatherer(gathererWithZerosInitData)); + CModelFactory::TDataGathererPtr gathererWithZeros( + factory.makeDataGatherer(gathererWithZerosInitData)); CModelFactory::SModelInitializationData initDataWithZeros(gathererWithZeros); CAnomalyDetectorModel::TModelPtr modelWithZerosPtr(factory.makeModel(initDataWithZeros)); CPPUNIT_ASSERT(modelWithZerosPtr); CPPUNIT_ASSERT_EQUAL(model_t::E_EventRateOnline, modelWithZerosPtr->category()); - CEventRateModel& modelWithZeros = static_cast(*modelWithZerosPtr.get()); + CEventRateModel& modelWithZeros = + static_cast(*modelWithZerosPtr.get()); CModelFactory::SGathererInitializationData gathererNoZerosInitData(startTime); - CModelFactory::TDataGathererPtr gathererNoZeros(factory.makeDataGatherer(gathererNoZerosInitData)); + CModelFactory::TDataGathererPtr gathererNoZeros( + factory.makeDataGatherer(gathererNoZerosInitData)); CModelFactory::SModelInitializationData initDataNoZeros(gathererNoZeros); CAnomalyDetectorModel::TModelPtr modelNoZerosPtr(factory.makeModel(initDataNoZeros)); CPPUNIT_ASSERT(modelNoZerosPtr); CPPUNIT_ASSERT_EQUAL(model_t::E_EventRateOnline, modelNoZerosPtr->category()); - CEventRateModel& modelNoZeros = static_cast(*modelNoZerosPtr.get()); + CEventRateModel& modelNoZeros = + static_cast(*modelNoZerosPtr.get()); // The idea here is to compare a model that has records with summary count of zero // against a model that has no records at all where the first model had the zero-count records. @@ -2378,11 +2611,13 @@ void CEventRateModelTest::testSummaryCountZeroRecordsAreIgnored() { rng.generateUniformSamples(0.0, 1.0, 1, zeroCountProbability); for (std::size_t i = 0; i < samples[0]; ++i) { if (zeroCountProbability[0] < 0.2) { - addArrival( - *gathererWithZeros, m_ResourceMonitor, now, "p1", TOptionalStr(), TOptionalStr(), TOptionalStr(summaryCountZero)); + addArrival(*gathererWithZeros, m_ResourceMonitor, now, "p1", + TOptionalStr(), TOptionalStr(), TOptionalStr(summaryCountZero)); } else { - addArrival(*gathererWithZeros, m_ResourceMonitor, now, "p1", TOptionalStr(), TOptionalStr(), TOptionalStr(summaryCountOne)); - addArrival(*gathererNoZeros, m_ResourceMonitor, now, "p1", TOptionalStr(), TOptionalStr(), TOptionalStr(summaryCountOne)); + addArrival(*gathererWithZeros, m_ResourceMonitor, now, "p1", + TOptionalStr(), TOptionalStr(), TOptionalStr(summaryCountOne)); + addArrival(*gathererNoZeros, m_ResourceMonitor, now, "p1", + TOptionalStr(), TOptionalStr(), TOptionalStr(summaryCountOne)); } } modelWithZeros.sample(now, now + bucketLength, m_ResourceMonitor); @@ -2415,7 +2650,8 @@ void CEventRateModelTest::testComputeProbabilityGivenDetectionRule() { model_t::TFeatureVec features(1u, model_t::E_IndividualCountByBucketAndPerson); CModelFactory::TDataGathererPtr gatherer; CAnomalyDetectorModel::TModelPtr model_; - makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, gatherer, model_, 1); + makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, + gatherer, model_, 1); CEventRateModel* model = dynamic_cast(model_.get()); test::CRandomNumbers rng; @@ -2437,7 +2673,8 @@ void CEventRateModelTest::testComputeProbabilityGivenDetectionRule() { CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); SAnnotatedProbability annotatedProbability; - CPPUNIT_ASSERT(model->computeProbability(0 /*pid*/, now, now + bucketLength, partitioningFields, 1, annotatedProbability) == false); + CPPUNIT_ASSERT(model->computeProbability(0 /*pid*/, now, now + bucketLength, partitioningFields, + 1, annotatedProbability) == false); } void CEventRateModelTest::testDecayRateControl() { @@ -2466,18 +2703,21 @@ void CEventRateModelTest::testDecayRateControl() { CEventRateModelFactory factory(params); CModelFactory::TDataGathererPtr gatherer; CAnomalyDetectorModel::TModelPtr model; - makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, gatherer, model, 1); + makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, + gatherer, model, 1); params.s_ControlDecayRate = false; params.s_DecayRate = 0.0001; CEventRateModelFactory referenceFactory(params); CModelFactory::TDataGathererPtr referenceGatherer; CAnomalyDetectorModel::TModelPtr referenceModel; - makeModel(referenceFactory, features, m_ResourceMonitor, startTime, bucketLength, referenceGatherer, referenceModel, 1); + makeModel(referenceFactory, features, m_ResourceMonitor, startTime, + bucketLength, referenceGatherer, referenceModel, 1); TMeanAccumulator meanPredictionError; TMeanAccumulator meanReferencePredictionError; - model_t::CResultType type(model_t::CResultType::E_Unconditional | model_t::CResultType::E_Interim); + model_t::CResultType type(model_t::CResultType::E_Unconditional | + model_t::CResultType::E_Interim); for (core_t::TTime t = 0; t < 4 * core::constants::WEEK; t += bucketLength) { if (t % core::constants::WEEK == 0) { LOG_DEBUG(<< "week " << t / core::constants::WEEK + 1); @@ -2485,23 +2725,32 @@ void CEventRateModelTest::testDecayRateControl() { TDoubleVec rate; rng.generateUniformSamples(0.0, 10.0, 1, rate); - rate[0] += 20.0 * (t > 3 * core::constants::WEEK && t < core::constants::WEEK + 4 * 3600 ? 1.0 : 0.0); + rate[0] += 20.0 * (t > 3 * core::constants::WEEK && + t < core::constants::WEEK + 4 * 3600 + ? 1.0 + : 0.0); for (std::size_t i = 0u; i < static_cast(rate[0]); ++i) { addArrival(*gatherer, m_ResourceMonitor, t + bucketLength / 2, "p1"); - addArrival(*referenceGatherer, m_ResourceMonitor, t + bucketLength / 2, "p1"); + addArrival(*referenceGatherer, m_ResourceMonitor, + t + bucketLength / 2, "p1"); } model->sample(t, t + bucketLength, m_ResourceMonitor); referenceModel->sample(t, t + bucketLength, m_ResourceMonitor); - meanPredictionError.add(std::fabs(model->currentBucketValue(feature, 0, 0, t + bucketLength / 2)[0] - - model->baselineBucketMean(feature, 0, 0, type, NO_CORRELATES, t + bucketLength / 2)[0])); - meanReferencePredictionError.add( - std::fabs(referenceModel->currentBucketValue(feature, 0, 0, t + bucketLength / 2)[0] - - referenceModel->baselineBucketMean(feature, 0, 0, type, NO_CORRELATES, t + bucketLength / 2)[0])); + meanPredictionError.add(std::fabs( + model->currentBucketValue(feature, 0, 0, t + bucketLength / 2)[0] - + model->baselineBucketMean(feature, 0, 0, type, NO_CORRELATES, + t + bucketLength / 2)[0])); + meanReferencePredictionError.add(std::fabs( + referenceModel->currentBucketValue(feature, 0, 0, t + bucketLength / 2)[0] - + referenceModel->baselineBucketMean(feature, 0, 0, type, NO_CORRELATES, + t + bucketLength / 2)[0])); } LOG_DEBUG(<< "mean = " << maths::CBasicStatistics::mean(meanPredictionError)); - LOG_DEBUG(<< "reference = " << maths::CBasicStatistics::mean(meanReferencePredictionError)); + LOG_DEBUG(<< "reference = " + << maths::CBasicStatistics::mean(meanReferencePredictionError)); CPPUNIT_ASSERT_DOUBLES_EQUAL( - maths::CBasicStatistics::mean(meanReferencePredictionError), maths::CBasicStatistics::mean(meanPredictionError), 0.05); + maths::CBasicStatistics::mean(meanReferencePredictionError), + maths::CBasicStatistics::mean(meanPredictionError), 0.05); } LOG_DEBUG(<< "*** Test linear scaling ***"); @@ -2515,45 +2764,55 @@ void CEventRateModelTest::testDecayRateControl() { CEventRateModelFactory factory(params); CModelFactory::TDataGathererPtr gatherer; CAnomalyDetectorModel::TModelPtr model; - makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, gatherer, model, 1); + makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, + gatherer, model, 1); params.s_ControlDecayRate = false; params.s_DecayRate = 0.001; CEventRateModelFactory referenceFactory(params); CModelFactory::TDataGathererPtr referenceGatherer; CAnomalyDetectorModel::TModelPtr referenceModel; - makeModel(referenceFactory, features, m_ResourceMonitor, startTime, bucketLength, referenceGatherer, referenceModel, 1); + makeModel(referenceFactory, features, m_ResourceMonitor, startTime, + bucketLength, referenceGatherer, referenceModel, 1); TMeanAccumulator meanPredictionError; TMeanAccumulator meanReferencePredictionError; - model_t::CResultType type(model_t::CResultType::E_Unconditional | model_t::CResultType::E_Interim); + model_t::CResultType type(model_t::CResultType::E_Unconditional | + model_t::CResultType::E_Interim); for (core_t::TTime t = 0; t < 10 * core::constants::WEEK; t += bucketLength) { if (t % core::constants::WEEK == 0) { LOG_DEBUG(<< "week " << t / core::constants::WEEK + 1); } double rate = 10.0 * - (1.0 + std::sin(boost::math::double_constants::two_pi * static_cast(t) / + (1.0 + std::sin(boost::math::double_constants::two_pi * + static_cast(t) / static_cast(core::constants::DAY))) * (t < 5 * core::constants::WEEK ? 1.0 : 2.0); TDoubleVec noise; rng.generateUniformSamples(0.0, 3.0, 1, noise); for (std::size_t i = 0u; i < static_cast(rate + noise[0]); ++i) { addArrival(*gatherer, m_ResourceMonitor, t + bucketLength / 2, "p1"); - addArrival(*referenceGatherer, m_ResourceMonitor, t + bucketLength / 2, "p1"); + addArrival(*referenceGatherer, m_ResourceMonitor, + t + bucketLength / 2, "p1"); } model->sample(t, t + bucketLength, m_ResourceMonitor); referenceModel->sample(t, t + bucketLength, m_ResourceMonitor); - meanPredictionError.add(std::fabs(model->currentBucketValue(feature, 0, 0, t + bucketLength / 2)[0] - - model->baselineBucketMean(feature, 0, 0, type, NO_CORRELATES, t + bucketLength / 2)[0])); - meanReferencePredictionError.add( - std::fabs(referenceModel->currentBucketValue(feature, 0, 0, t + bucketLength / 2)[0] - - referenceModel->baselineBucketMean(feature, 0, 0, type, NO_CORRELATES, t + bucketLength / 2)[0])); + meanPredictionError.add(std::fabs( + model->currentBucketValue(feature, 0, 0, t + bucketLength / 2)[0] - + model->baselineBucketMean(feature, 0, 0, type, NO_CORRELATES, + t + bucketLength / 2)[0])); + meanReferencePredictionError.add(std::fabs( + referenceModel->currentBucketValue(feature, 0, 0, t + bucketLength / 2)[0] - + referenceModel->baselineBucketMean(feature, 0, 0, type, NO_CORRELATES, + t + bucketLength / 2)[0])); } LOG_DEBUG(<< "mean = " << maths::CBasicStatistics::mean(meanPredictionError)); - LOG_DEBUG(<< "reference = " << maths::CBasicStatistics::mean(meanReferencePredictionError)); + LOG_DEBUG(<< "reference = " + << maths::CBasicStatistics::mean(meanReferencePredictionError)); CPPUNIT_ASSERT_DOUBLES_EQUAL( - maths::CBasicStatistics::mean(meanReferencePredictionError), maths::CBasicStatistics::mean(meanPredictionError), 0.05); + maths::CBasicStatistics::mean(meanReferencePredictionError), + maths::CBasicStatistics::mean(meanPredictionError), 0.05); } LOG_DEBUG(<< "*** Test unmodelled cyclic component ***"); @@ -2569,44 +2828,54 @@ void CEventRateModelTest::testDecayRateControl() { CEventRateModelFactory factory(params); CModelFactory::TDataGathererPtr gatherer; CAnomalyDetectorModel::TModelPtr model; - makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, gatherer, model, 1); + makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, + gatherer, model, 1); params.s_ControlDecayRate = false; params.s_DecayRate = 0.001; CEventRateModelFactory referenceFactory(params); CModelFactory::TDataGathererPtr referenceGatherer; CAnomalyDetectorModel::TModelPtr referenceModel; - makeModel(referenceFactory, features, m_ResourceMonitor, startTime, bucketLength, referenceGatherer, referenceModel, 1); + makeModel(referenceFactory, features, m_ResourceMonitor, startTime, + bucketLength, referenceGatherer, referenceModel, 1); TMeanAccumulator meanPredictionError; TMeanAccumulator meanReferencePredictionError; - model_t::CResultType type(model_t::CResultType::E_Unconditional | model_t::CResultType::E_Interim); + model_t::CResultType type(model_t::CResultType::E_Unconditional | + model_t::CResultType::E_Interim); for (core_t::TTime t = 0; t < 20 * core::constants::WEEK; t += bucketLength) { if (t % core::constants::WEEK == 0) { LOG_DEBUG(<< "week " << t / core::constants::WEEK + 1); } double rate = 10.0 * - (1.0 + std::sin(boost::math::double_constants::two_pi * static_cast(t) / + (1.0 + std::sin(boost::math::double_constants::two_pi * + static_cast(t) / static_cast(core::constants::DAY))) * - (1.0 + std::sin(boost::math::double_constants::two_pi * static_cast(t) / 10.0 / + (1.0 + std::sin(boost::math::double_constants::two_pi * + static_cast(t) / 10.0 / static_cast(core::constants::WEEK))); TDoubleVec noise; rng.generateUniformSamples(0.0, 3.0, 1, noise); for (std::size_t i = 0u; i < static_cast(rate + noise[0]); ++i) { addArrival(*gatherer, m_ResourceMonitor, t + bucketLength / 2, "p1"); - addArrival(*referenceGatherer, m_ResourceMonitor, t + bucketLength / 2, "p1"); + addArrival(*referenceGatherer, m_ResourceMonitor, + t + bucketLength / 2, "p1"); } model->sample(t, t + bucketLength, m_ResourceMonitor); referenceModel->sample(t, t + bucketLength, m_ResourceMonitor); - meanPredictionError.add(std::fabs(model->currentBucketValue(feature, 0, 0, t + bucketLength / 2)[0] - - model->baselineBucketMean(feature, 0, 0, type, NO_CORRELATES, t + bucketLength / 2)[0])); - meanReferencePredictionError.add( - std::fabs(referenceModel->currentBucketValue(feature, 0, 0, t + bucketLength / 2)[0] - - referenceModel->baselineBucketMean(feature, 0, 0, type, NO_CORRELATES, t + bucketLength / 2)[0])); + meanPredictionError.add(std::fabs( + model->currentBucketValue(feature, 0, 0, t + bucketLength / 2)[0] - + model->baselineBucketMean(feature, 0, 0, type, NO_CORRELATES, + t + bucketLength / 2)[0])); + meanReferencePredictionError.add(std::fabs( + referenceModel->currentBucketValue(feature, 0, 0, t + bucketLength / 2)[0] - + referenceModel->baselineBucketMean(feature, 0, 0, type, NO_CORRELATES, + t + bucketLength / 2)[0])); } LOG_DEBUG(<< "mean = " << maths::CBasicStatistics::mean(meanPredictionError)); - LOG_DEBUG(<< "reference = " << maths::CBasicStatistics::mean(meanReferencePredictionError)); + LOG_DEBUG(<< "reference = " + << maths::CBasicStatistics::mean(meanReferencePredictionError)); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanPredictionError) < 0.7 * maths::CBasicStatistics::mean(meanReferencePredictionError)); } @@ -2639,7 +2908,8 @@ void CEventRateModelTest::testIgnoreSamplingGivenDetectionRules() { model_t::TFeatureVec features{model_t::E_IndividualCountByBucketAndPerson}; CModelFactory::TDataGathererPtr gathererNoSkip; CAnomalyDetectorModel::TModelPtr modelPtrNoSkip; - makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, gathererNoSkip, modelPtrNoSkip, 1); + makeModel(factory, features, m_ResourceMonitor, startTime, bucketLength, + gathererNoSkip, modelPtrNoSkip, 1); CEventRateModel* modelNoSkip = dynamic_cast(modelPtrNoSkip.get()); // Model with the skip sampling rule @@ -2650,8 +2920,10 @@ void CEventRateModelTest::testIgnoreSamplingGivenDetectionRules() { CEventRateModelFactory factoryWithSkip(paramsWithRules); CModelFactory::TDataGathererPtr gathererWithSkip; CAnomalyDetectorModel::TModelPtr modelPtrWithSkip; - makeModel(factoryWithSkip, features, m_ResourceMonitor, startTime, bucketLength, gathererWithSkip, modelPtrWithSkip, 1); - CEventRateModel* modelWithSkip = dynamic_cast(modelPtrWithSkip.get()); + makeModel(factoryWithSkip, features, m_ResourceMonitor, startTime, + bucketLength, gathererWithSkip, modelPtrWithSkip, 1); + CEventRateModel* modelWithSkip = + dynamic_cast(modelPtrWithSkip.get()); std::size_t endTime = startTime + bucketLength; @@ -2700,80 +2972,100 @@ void CEventRateModelTest::testIgnoreSamplingGivenDetectionRules() { CPPUNIT_ASSERT(modelWithSkip->checksum() != modelNoSkip->checksum()); // but the underlying models should be the same - CAnomalyDetectorModel::CModelDetailsViewPtr modelWithSkipView = modelWithSkip->details(); + CAnomalyDetectorModel::CModelDetailsViewPtr modelWithSkipView = + modelWithSkip->details(); CAnomalyDetectorModel::CModelDetailsViewPtr modelNoSkipView = modelNoSkip->details(); uint64_t withSkipChecksum = - static_cast(modelWithSkipView->model(model_t::E_IndividualCountByBucketAndPerson, 0)) + static_cast( + modelWithSkipView->model(model_t::E_IndividualCountByBucketAndPerson, 0)) ->residualModel() .checksum(); uint64_t noSkipChecksum = - static_cast(modelNoSkipView->model(model_t::E_IndividualCountByBucketAndPerson, 0)) + static_cast( + modelNoSkipView->model(model_t::E_IndividualCountByBucketAndPerson, 0)) ->residualModel() .checksum(); CPPUNIT_ASSERT_EQUAL(withSkipChecksum, noSkipChecksum); // Check the last value times of the underlying models are the same const maths::CUnivariateTimeSeriesModel* timeSeriesModel = - dynamic_cast(modelNoSkipView->model(model_t::E_IndividualCountByBucketAndPerson, 0)); + dynamic_cast( + modelNoSkipView->model(model_t::E_IndividualCountByBucketAndPerson, 0)); CPPUNIT_ASSERT(timeSeriesModel); core_t::TTime time = timeSeriesModel->trendModel().lastValueTime(); - CPPUNIT_ASSERT_EQUAL(model_t::sampleTime(model_t::E_IndividualCountByBucketAndPerson, startTime, bucketLength), time); + CPPUNIT_ASSERT_EQUAL(model_t::sampleTime(model_t::E_IndividualCountByBucketAndPerson, + startTime, bucketLength), + time); // The last times of model with a skip should be the same - timeSeriesModel = - dynamic_cast(modelWithSkipView->model(model_t::E_IndividualCountByBucketAndPerson, 0)); + timeSeriesModel = dynamic_cast( + modelWithSkipView->model(model_t::E_IndividualCountByBucketAndPerson, 0)); CPPUNIT_ASSERT_EQUAL(time, timeSeriesModel->trendModel().lastValueTime()); } CppUnit::Test* CEventRateModelTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CEventRateModelTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CEventRateModelTest::testOnlineCountSample", - &CEventRateModelTest::testOnlineCountSample)); - suiteOfTests->addTest(new CppUnit::TestCaller("CEventRateModelTest::testOnlineNonZeroCountSample", - &CEventRateModelTest::testOnlineNonZeroCountSample)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CEventRateModelTest::testOnlineRare", &CEventRateModelTest::testOnlineRare)); - suiteOfTests->addTest(new CppUnit::TestCaller("CEventRateModelTest::testOnlineProbabilityCalculation", - &CEventRateModelTest::testOnlineProbabilityCalculation)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CEventRateModelTest::testOnlineProbabilityCalculationForLowNonZeroCount", - &CEventRateModelTest::testOnlineProbabilityCalculationForLowNonZeroCount)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CEventRateModelTest::testOnlineProbabilityCalculationForHighNonZeroCount", - &CEventRateModelTest::testOnlineProbabilityCalculationForHighNonZeroCount)); - suiteOfTests->addTest(new CppUnit::TestCaller("CEventRateModelTest::testOnlineCorrelatedNoTrend", - &CEventRateModelTest::testOnlineCorrelatedNoTrend)); - suiteOfTests->addTest(new CppUnit::TestCaller("CEventRateModelTest::testOnlineCorrelatedTrend", - &CEventRateModelTest::testOnlineCorrelatedTrend)); - suiteOfTests->addTest(new CppUnit::TestCaller("CEventRateModelTest::testPrune", &CEventRateModelTest::testPrune)); - suiteOfTests->addTest(new CppUnit::TestCaller("CEventRateModelTest::testKey", &CEventRateModelTest::testKey)); - suiteOfTests->addTest(new CppUnit::TestCaller("CEventRateModelTest::testModelsWithValueFields", - &CEventRateModelTest::testModelsWithValueFields)); - suiteOfTests->addTest(new CppUnit::TestCaller("CEventRateModelTest::testCountProbabilityCalculationWithInfluence", - &CEventRateModelTest::testCountProbabilityCalculationWithInfluence)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CEventRateModelTest::testDistinctCountProbabilityCalculationWithInfluence", - &CEventRateModelTest::testDistinctCountProbabilityCalculationWithInfluence)); - suiteOfTests->addTest(new CppUnit::TestCaller("CEventRateModelTest::testOnlineRareWithInfluence", - &CEventRateModelTest::testOnlineRareWithInfluence)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CEventRateModelTest::testSkipSampling", &CEventRateModelTest::testSkipSampling)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CEventRateModelTest::testExplicitNulls", &CEventRateModelTest::testExplicitNulls)); - suiteOfTests->addTest(new CppUnit::TestCaller("CEventRateModelTest::testInterimCorrections", - &CEventRateModelTest::testInterimCorrections)); - suiteOfTests->addTest(new CppUnit::TestCaller("CEventRateModelTest::testInterimCorrectionsWithCorrelations", - &CEventRateModelTest::testInterimCorrectionsWithCorrelations)); - suiteOfTests->addTest(new CppUnit::TestCaller("CEventRateModelTest::testSummaryCountZeroRecordsAreIgnored", - &CEventRateModelTest::testSummaryCountZeroRecordsAreIgnored)); - suiteOfTests->addTest(new CppUnit::TestCaller("CEventRateModelTest::testComputeProbabilityGivenDetectionRule", - &CEventRateModelTest::testComputeProbabilityGivenDetectionRule)); - suiteOfTests->addTest(new CppUnit::TestCaller("CEventRateModelTest::testDecayRateControl", - &CEventRateModelTest::testDecayRateControl)); - suiteOfTests->addTest(new CppUnit::TestCaller("CEventRateModelTest::testIgnoreSamplingGivenDetectionRules", - &CEventRateModelTest::testIgnoreSamplingGivenDetectionRules)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRateModelTest::testOnlineCountSample", &CEventRateModelTest::testOnlineCountSample)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRateModelTest::testOnlineNonZeroCountSample", + &CEventRateModelTest::testOnlineNonZeroCountSample)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRateModelTest::testOnlineRare", &CEventRateModelTest::testOnlineRare)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRateModelTest::testOnlineProbabilityCalculation", + &CEventRateModelTest::testOnlineProbabilityCalculation)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRateModelTest::testOnlineProbabilityCalculationForLowNonZeroCount", + &CEventRateModelTest::testOnlineProbabilityCalculationForLowNonZeroCount)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRateModelTest::testOnlineProbabilityCalculationForHighNonZeroCount", + &CEventRateModelTest::testOnlineProbabilityCalculationForHighNonZeroCount)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRateModelTest::testOnlineCorrelatedNoTrend", + &CEventRateModelTest::testOnlineCorrelatedNoTrend)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRateModelTest::testOnlineCorrelatedTrend", + &CEventRateModelTest::testOnlineCorrelatedTrend)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRateModelTest::testPrune", &CEventRateModelTest::testPrune)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRateModelTest::testKey", &CEventRateModelTest::testKey)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRateModelTest::testModelsWithValueFields", + &CEventRateModelTest::testModelsWithValueFields)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRateModelTest::testCountProbabilityCalculationWithInfluence", + &CEventRateModelTest::testCountProbabilityCalculationWithInfluence)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRateModelTest::testDistinctCountProbabilityCalculationWithInfluence", + &CEventRateModelTest::testDistinctCountProbabilityCalculationWithInfluence)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRateModelTest::testOnlineRareWithInfluence", + &CEventRateModelTest::testOnlineRareWithInfluence)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRateModelTest::testSkipSampling", &CEventRateModelTest::testSkipSampling)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRateModelTest::testExplicitNulls", &CEventRateModelTest::testExplicitNulls)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRateModelTest::testInterimCorrections", + &CEventRateModelTest::testInterimCorrections)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRateModelTest::testInterimCorrectionsWithCorrelations", + &CEventRateModelTest::testInterimCorrectionsWithCorrelations)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRateModelTest::testSummaryCountZeroRecordsAreIgnored", + &CEventRateModelTest::testSummaryCountZeroRecordsAreIgnored)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRateModelTest::testComputeProbabilityGivenDetectionRule", + &CEventRateModelTest::testComputeProbabilityGivenDetectionRule)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRateModelTest::testDecayRateControl", &CEventRateModelTest::testDecayRateControl)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRateModelTest::testIgnoreSamplingGivenDetectionRules", + &CEventRateModelTest::testIgnoreSamplingGivenDetectionRules)); return suiteOfTests; } diff --git a/lib/model/unittest/CEventRatePopulationDataGathererTest.cc b/lib/model/unittest/CEventRatePopulationDataGathererTest.cc index 7f457ef730..80f5395692 100644 --- a/lib/model/unittest/CEventRatePopulationDataGathererTest.cc +++ b/lib/model/unittest/CEventRatePopulationDataGathererTest.cc @@ -39,7 +39,9 @@ struct SMessage { SMessage(core_t::TTime time, const std::string& attribute, const std::string& person) : s_Time(time), s_Attribute(attribute), s_Person(person) {} - bool operator<(const SMessage& other) const { return s_Time < other.s_Time; } + bool operator<(const SMessage& other) const { + return s_Time < other.s_Time; + } core_t::TTime s_Time; std::string s_Attribute; @@ -63,7 +65,8 @@ using TStrFeatureDataPr = std::pair; using TStrFeatureDataPrVec = std::vector; using TSizeSizePrFeatureDataPr = std::pair; using TSizeSizePrFeatureDataPrVec = std::vector; -using TFeatureSizeSizePrFeatureDataPrVecPr = std::pair; +using TFeatureSizeSizePrFeatureDataPrVecPr = + std::pair; using TFeatureSizeSizePrFeatureDataPrVecPrVec = std::vector; TStrVec allCategories() { @@ -84,7 +87,10 @@ TStrVec allPeople() { return people; } -void generateTestMessages(test::CRandomNumbers& rng, core_t::TTime time, core_t::TTime bucketLength, TMessageVec& messages) { +void generateTestMessages(test::CRandomNumbers& rng, + core_t::TTime time, + core_t::TTime bucketLength, + TMessageVec& messages) { using TUIntVec = std::vector; using TDoubleVec = std::vector; @@ -116,11 +122,13 @@ void generateTestMessages(test::CRandomNumbers& rng, core_t::TTime time, core_t: for (std::size_t i = 0u; i < categories.size(); ++i) { TDoubleVec offsets; - rng.generateUniformSamples(0.0, static_cast(bucketLength) - 1.0, bucketCounts[i], offsets); + rng.generateUniformSamples(0.0, static_cast(bucketLength) - 1.0, + bucketCounts[i], offsets); for (std::size_t j = 0u; j < offsets.size(); ++j) { - messages.push_back( - SMessage(time + static_cast(offsets[j]), categories[i], people[bucketPeople[j % bucketPeople.size()]])); + messages.push_back(SMessage( + time + static_cast(offsets[j]), categories[i], + people[bucketPeople[j % bucketPeople.size()]])); } } @@ -128,7 +136,8 @@ void generateTestMessages(test::CRandomNumbers& rng, core_t::TTime time, core_t: LOG_DEBUG(<< "Generated " << messages.size() << " messages"); } -const TSizeSizePrFeatureDataPrVec& extract(const TFeatureSizeSizePrFeatureDataPrVecPrVec& featureData, model_t::EFeature feature) { +const TSizeSizePrFeatureDataPrVec& +extract(const TFeatureSizeSizePrFeatureDataPrVecPrVec& featureData, model_t::EFeature feature) { for (std::size_t i = 0u; i < featureData.size(); ++i) { if (featureData[i].first == feature) { return featureData[i].second; @@ -139,28 +148,36 @@ const TSizeSizePrFeatureDataPrVec& extract(const TFeatureSizeSizePrFeatureDataPr return EMPTY; } -const TSizeSizePrFeatureDataPrVec& extractPeoplePerAttribute(TFeatureSizeSizePrFeatureDataPrVecPrVec& featureData) { +const TSizeSizePrFeatureDataPrVec& +extractPeoplePerAttribute(TFeatureSizeSizePrFeatureDataPrVecPrVec& featureData) { return extract(featureData, model_t::E_PopulationUniquePersonCountByAttribute); } -const TSizeSizePrFeatureDataPrVec& extractNonZeroAttributeCounts(TFeatureSizeSizePrFeatureDataPrVecPrVec& featureData) { +const TSizeSizePrFeatureDataPrVec& +extractNonZeroAttributeCounts(TFeatureSizeSizePrFeatureDataPrVecPrVec& featureData) { return extract(featureData, model_t::E_PopulationCountByBucketPersonAndAttribute); } -const TSizeSizePrFeatureDataPrVec& extractAttributeIndicator(TFeatureSizeSizePrFeatureDataPrVecPrVec& featureData) { +const TSizeSizePrFeatureDataPrVec& +extractAttributeIndicator(TFeatureSizeSizePrFeatureDataPrVecPrVec& featureData) { return extract(featureData, model_t::E_PopulationIndicatorOfBucketPersonAndAttribute); } -const TSizeSizePrFeatureDataPrVec& extractBucketAttributesPerPerson(TFeatureSizeSizePrFeatureDataPrVecPrVec& featureData) { +const TSizeSizePrFeatureDataPrVec& +extractBucketAttributesPerPerson(TFeatureSizeSizePrFeatureDataPrVecPrVec& featureData) { return extract(featureData, model_t::E_PopulationUniqueCountByBucketPersonAndAttribute); } -const TSizeSizePrFeatureDataPrVec& extractCompressedLengthPerPerson(TFeatureSizeSizePrFeatureDataPrVecPrVec& featureData) { +const TSizeSizePrFeatureDataPrVec& +extractCompressedLengthPerPerson(TFeatureSizeSizePrFeatureDataPrVecPrVec& featureData) { return extract(featureData, model_t::E_PopulationInfoContentByBucketPersonAndAttribute); } -CEventData -addArrival(core_t::TTime time, const std::string& p, const std::string& a, CDataGatherer& gatherer, CResourceMonitor& resourceMonitor) { +CEventData addArrival(core_t::TTime time, + const std::string& p, + const std::string& a, + CDataGatherer& gatherer, + CResourceMonitor& resourceMonitor) { CDataGatherer::TStrCPtrVec fields; fields.push_back(&p); fields.push_back(&a); @@ -212,21 +229,10 @@ void CEventRatePopulationDataGathererTest::testAttributeCounts() { features.push_back(model_t::E_PopulationCountByBucketPersonAndAttribute); features.push_back(model_t::E_PopulationUniquePersonCountByAttribute); SModelParams params(bucketLength); - CDataGatherer dataGatherer(model_t::E_PopulationEventRate, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - searchKey, - features, - startTime, - 0); + CDataGatherer dataGatherer(model_t::E_PopulationEventRate, model_t::E_None, + params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), + false, searchKey, features, startTime, 0); CPPUNIT_ASSERT(dataGatherer.isPopulation()); CPPUNIT_ASSERT_EQUAL(startTime, dataGatherer.currentBucketStartTime()); @@ -244,7 +250,8 @@ void CEventRatePopulationDataGathererTest::testAttributeCounts() { TSizeSizePrUInt64Map expectedAttributeCounts; for (std::size_t j = 0u; j < messages.size(); ++j) { - addArrival(messages[j].s_Time, messages[j].s_Person, messages[j].s_Attribute, dataGatherer, m_ResourceMonitor); + addArrival(messages[j].s_Time, messages[j].s_Person, + messages[j].s_Attribute, dataGatherer, m_ResourceMonitor); std::size_t cid; dataGatherer.attributeId(messages[j].s_Attribute, cid); @@ -254,10 +261,14 @@ void CEventRatePopulationDataGathererTest::testAttributeCounts() { ++expectedAttributeCounts[std::make_pair(pid, cid)]; expectedAttributePeople[cid].insert(pid); - if (expectedAttributeOrder.insert(TStrSizeMap::value_type(messages[j].s_Attribute, attributeOrder)).second) { + if (expectedAttributeOrder + .insert(TStrSizeMap::value_type(messages[j].s_Attribute, attributeOrder)) + .second) { ++attributeOrder; } - if (expectedPeopleOrder.insert(TStrSizeMap::value_type(messages[j].s_Person, personOrder)).second) { + if (expectedPeopleOrder + .insert(TStrSizeMap::value_type(messages[j].s_Person, personOrder)) + .second) { ++personOrder; } } @@ -268,23 +279,28 @@ void CEventRatePopulationDataGathererTest::testAttributeCounts() { TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; dataGatherer.featureData(time, bucketLength, featureData); - const TSizeSizePrFeatureDataPrVec& peoplePerAttribute = extractPeoplePerAttribute(featureData); + const TSizeSizePrFeatureDataPrVec& peoplePerAttribute = + extractPeoplePerAttribute(featureData); CPPUNIT_ASSERT_EQUAL(expectedAttributePeople.size(), peoplePerAttribute.size()); TSizeSizePrFeatureDataPrVec expectedPeoplePerAttribute; for (std::size_t j = 0u; j < peoplePerAttribute.size(); ++j) { - expectedPeoplePerAttribute.push_back(TSizeSizePrFeatureDataPr(std::make_pair(size_t(0), j), expectedAttributePeople[j].size())); + expectedPeoplePerAttribute.push_back(TSizeSizePrFeatureDataPr( + std::make_pair(size_t(0), j), expectedAttributePeople[j].size())); } CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedPeoplePerAttribute), core::CContainerPrinter::print(peoplePerAttribute)); - const TSizeSizePrFeatureDataPrVec& personAttributeCounts = extractNonZeroAttributeCounts(featureData); + const TSizeSizePrFeatureDataPrVec& personAttributeCounts = + extractNonZeroAttributeCounts(featureData); CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedAttributeCounts), core::CContainerPrinter::print(personAttributeCounts)); - const TSizeSizePrFeatureDataPrVec& attributeIndicator = extractAttributeIndicator(featureData); + const TSizeSizePrFeatureDataPrVec& attributeIndicator = + extractAttributeIndicator(featureData); CPPUNIT_ASSERT(attributeIndicator.empty()); - const TSizeSizePrFeatureDataPrVec& bucketAttributesPerPerson = extractBucketAttributesPerPerson(featureData); + const TSizeSizePrFeatureDataPrVec& bucketAttributesPerPerson = + extractBucketAttributesPerPerson(featureData); CPPUNIT_ASSERT(bucketAttributesPerPerson.empty()); dataGatherer.timeNow(time + bucketLength); @@ -299,7 +315,8 @@ void CEventRatePopulationDataGathererTest::testAttributeCounts() { CPPUNIT_ASSERT_EQUAL(expectedAttributeOrder[categories[i]], cid); } LOG_DEBUG(<< "attribute ids = " << core::CContainerPrinter::print(attributeIds)); - LOG_DEBUG(<< "expected attribute ids = " << core::CContainerPrinter::print(expectedAttributeOrder)); + LOG_DEBUG(<< "expected attribute ids = " + << core::CContainerPrinter::print(expectedAttributeOrder)); TStrVec people = allPeople(); TSizeVec peopleIds; @@ -310,7 +327,8 @@ void CEventRatePopulationDataGathererTest::testAttributeCounts() { CPPUNIT_ASSERT_EQUAL(expectedPeopleOrder[people[i]], pid); } LOG_DEBUG(<< "people ids = " << core::CContainerPrinter::print(peopleIds)); - LOG_DEBUG(<< "expected people ids = " << core::CContainerPrinter::print(expectedPeopleOrder)); + LOG_DEBUG(<< "expected people ids = " + << core::CContainerPrinter::print(expectedPeopleOrder)); } void CEventRatePopulationDataGathererTest::testAttributeIndicator() { @@ -328,21 +346,10 @@ void CEventRatePopulationDataGathererTest::testAttributeIndicator() { CDataGatherer::TFeatureVec features; features.push_back(model_t::E_PopulationIndicatorOfBucketPersonAndAttribute); SModelParams params(bucketLength); - CDataGatherer dataGatherer(model_t::E_PopulationEventRate, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - searchKey, - features, - startTime, - 0); + CDataGatherer dataGatherer(model_t::E_PopulationEventRate, model_t::E_None, + params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), + false, searchKey, features, startTime, 0); core_t::TTime time = startTime; for (std::size_t i = 0u; i < numberBuckets; ++i, time += bucketLength) { @@ -351,7 +358,8 @@ void CEventRatePopulationDataGathererTest::testAttributeIndicator() { TSizeSizePrUInt64Map expectedAttributeIndicator; for (std::size_t j = 0u; j < messages.size(); ++j) { - addArrival(messages[j].s_Time, messages[j].s_Person, messages[j].s_Attribute, dataGatherer, m_ResourceMonitor); + addArrival(messages[j].s_Time, messages[j].s_Person, + messages[j].s_Attribute, dataGatherer, m_ResourceMonitor); std::size_t cid; dataGatherer.attributeId(messages[j].s_Attribute, cid); @@ -368,14 +376,17 @@ void CEventRatePopulationDataGathererTest::testAttributeIndicator() { TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; dataGatherer.featureData(time, bucketLength, featureData); - const TSizeSizePrFeatureDataPrVec& peoplePerAttribute = extractPeoplePerAttribute(featureData); + const TSizeSizePrFeatureDataPrVec& peoplePerAttribute = + extractPeoplePerAttribute(featureData); CPPUNIT_ASSERT(peoplePerAttribute.empty()); - const TSizeSizePrFeatureDataPrVec& attributeIndicator = extractAttributeIndicator(featureData); + const TSizeSizePrFeatureDataPrVec& attributeIndicator = + extractAttributeIndicator(featureData); CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedAttributeIndicator), core::CContainerPrinter::print(attributeIndicator)); - const TSizeSizePrFeatureDataPrVec& bucketAttributesPerPerson = extractBucketAttributesPerPerson(featureData); + const TSizeSizePrFeatureDataPrVec& bucketAttributesPerPerson = + extractBucketAttributesPerPerson(featureData); CPPUNIT_ASSERT(bucketAttributesPerPerson.empty()); dataGatherer.timeNow(time + bucketLength); @@ -397,21 +408,10 @@ void CEventRatePopulationDataGathererTest::testUniqueValueCounts() { CDataGatherer::TFeatureVec features; features.push_back(model_t::E_PopulationUniqueCountByBucketPersonAndAttribute); SModelParams params(bucketLength); - CDataGatherer dataGatherer(model_t::E_PopulationEventRate, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - "value", - TStrVec(), - false, - searchKey, - features, - startTime, - 0); + CDataGatherer dataGatherer(model_t::E_PopulationEventRate, model_t::E_None, + params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, "value", TStrVec(), + false, searchKey, features, startTime, 0); core_t::TTime time = startTime; for (std::size_t i = 0u; i < numberBuckets; ++i, time += bucketLength) { @@ -426,7 +426,8 @@ void CEventRatePopulationDataGathererTest::testUniqueValueCounts() { ss << "thing" << "_" << time << "_" << i; std::string value(ss.str()); - addArrival(messages[j].s_Time, messages[j].s_Person, messages[j].s_Attribute, value, dataGatherer, m_ResourceMonitor); + addArrival(messages[j].s_Time, messages[j].s_Person, messages[j].s_Attribute, + value, dataGatherer, m_ResourceMonitor); std::size_t cid; dataGatherer.attributeId(messages[j].s_Attribute, cid); @@ -445,13 +446,16 @@ void CEventRatePopulationDataGathererTest::testUniqueValueCounts() { TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; dataGatherer.featureData(time, bucketLength, featureData); - const TSizeSizePrFeatureDataPrVec& peoplePerAttribute = extractPeoplePerAttribute(featureData); + const TSizeSizePrFeatureDataPrVec& peoplePerAttribute = + extractPeoplePerAttribute(featureData); CPPUNIT_ASSERT(peoplePerAttribute.empty()); - const TSizeSizePrFeatureDataPrVec& attributeIndicator = extractAttributeIndicator(featureData); + const TSizeSizePrFeatureDataPrVec& attributeIndicator = + extractAttributeIndicator(featureData); CPPUNIT_ASSERT(attributeIndicator.empty()); - const TSizeSizePrFeatureDataPrVec& bucketAttributesPerPerson = extractBucketAttributesPerPerson(featureData); + const TSizeSizePrFeatureDataPrVec& bucketAttributesPerPerson = + extractBucketAttributesPerPerson(featureData); CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedUniqueCounts), core::CContainerPrinter::print(bucketAttributesPerPerson)); @@ -475,21 +479,10 @@ void CEventRatePopulationDataGathererTest::testCompressedLength() { CDataGatherer::TFeatureVec features; features.push_back(model_t::E_PopulationInfoContentByBucketPersonAndAttribute); SModelParams params(bucketLength); - CDataGatherer dataGatherer(model_t::E_PopulationEventRate, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - "value", - TStrVec(), - false, - searchKey, - features, - startTime, - 0); + CDataGatherer dataGatherer(model_t::E_PopulationEventRate, model_t::E_None, + params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, "value", TStrVec(), + false, searchKey, features, startTime, 0); core_t::TTime time = startTime; for (std::size_t i = 0u; i < numberBuckets; ++i, time += bucketLength) { @@ -498,7 +491,8 @@ void CEventRatePopulationDataGathererTest::testCompressedLength() { TSizeStrSetMap bucketPeopleCategories; for (std::size_t j = 0u; j < messages.size(); ++j) { - addArrival(messages[j].s_Time, messages[j].s_Person, "attribute", messages[j].s_Attribute, dataGatherer, m_ResourceMonitor); + addArrival(messages[j].s_Time, messages[j].s_Person, "attribute", + messages[j].s_Attribute, dataGatherer, m_ResourceMonitor); std::size_t cid; dataGatherer.attributeId(messages[j].s_Attribute, cid); @@ -515,17 +509,22 @@ void CEventRatePopulationDataGathererTest::testCompressedLength() { TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; dataGatherer.featureData(time, bucketLength, featureData); - const TSizeSizePrFeatureDataPrVec& peoplePerAttribute = extractPeoplePerAttribute(featureData); + const TSizeSizePrFeatureDataPrVec& peoplePerAttribute = + extractPeoplePerAttribute(featureData); CPPUNIT_ASSERT(peoplePerAttribute.empty()); - const TSizeSizePrFeatureDataPrVec& attributeIndicator = extractAttributeIndicator(featureData); + const TSizeSizePrFeatureDataPrVec& attributeIndicator = + extractAttributeIndicator(featureData); CPPUNIT_ASSERT(attributeIndicator.empty()); - const TSizeSizePrFeatureDataPrVec& bucketCompressedLengthPerPerson = extractCompressedLengthPerPerson(featureData); - CPPUNIT_ASSERT_EQUAL(bucketPeopleCategories.size(), bucketCompressedLengthPerPerson.size()); + const TSizeSizePrFeatureDataPrVec& bucketCompressedLengthPerPerson = + extractCompressedLengthPerPerson(featureData); + CPPUNIT_ASSERT_EQUAL(bucketPeopleCategories.size(), + bucketCompressedLengthPerPerson.size()); TSizeSizePrUInt64Map expectedBucketCompressedLengthPerPerson; - for (TSizeStrSetMapItr iter = bucketPeopleCategories.begin(); iter != bucketPeopleCategories.end(); ++iter) { + for (TSizeStrSetMapItr iter = bucketPeopleCategories.begin(); + iter != bucketPeopleCategories.end(); ++iter) { TSizeSizePr key(iter->first, 0); const TStrSet& uniqueValues = iter->second; @@ -533,17 +532,19 @@ void CEventRatePopulationDataGathererTest::testCompressedLength() { CPPUNIT_ASSERT_EQUAL( uniqueValues.size(), static_cast(std::count_if( - uniqueValues.begin(), uniqueValues.end(), boost::bind(&core::CCompressUtils::addString, &compressor, _1)))); + uniqueValues.begin(), uniqueValues.end(), + boost::bind(&core::CCompressUtils::addString, &compressor, _1)))); size_t length(0); CPPUNIT_ASSERT(compressor.compressedLength(true, length)); expectedBucketCompressedLengthPerPerson[key] = length; } LOG_DEBUG(<< "Time " << time << " bucketCompressedLengthPerPerson " << core::CContainerPrinter::print(bucketCompressedLengthPerPerson)); - CPPUNIT_ASSERT_EQUAL(expectedBucketCompressedLengthPerPerson.size(), bucketCompressedLengthPerPerson.size()); - for (TSizeSizePrFeatureDataPrVec::const_iterator j = bucketCompressedLengthPerPerson.begin(); - j != bucketCompressedLengthPerPerson.end(); - ++j) { + CPPUNIT_ASSERT_EQUAL(expectedBucketCompressedLengthPerPerson.size(), + bucketCompressedLengthPerPerson.size()); + for (TSizeSizePrFeatureDataPrVec::const_iterator j = + bucketCompressedLengthPerPerson.begin(); + j != bucketCompressedLengthPerPerson.end(); ++j) { double expectedLength = expectedBucketCompressedLengthPerPerson[j->first]; double actual = j->second.s_Count; CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedLength, actual, expectedLength * 0.1); @@ -569,27 +570,17 @@ void CEventRatePopulationDataGathererTest::testRemovePeople() { CDataGatherer::TFeatureVec features; features.push_back(model_t::E_PopulationCountByBucketPersonAndAttribute); SModelParams params(bucketLength); - CDataGatherer gatherer(model_t::E_PopulationEventRate, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - searchKey, - features, - startTime, - 0); + CDataGatherer gatherer(model_t::E_PopulationEventRate, model_t::E_None, + params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), + false, searchKey, features, startTime, 0); core_t::TTime bucketStart = startTime; for (std::size_t i = 0u; i < numberBuckets; ++i, bucketStart += bucketLength) { TMessageVec messages; generateTestMessages(rng, bucketStart, bucketLength, messages); for (std::size_t j = 0u; j < messages.size(); ++j) { - addArrival(messages[j].s_Time, messages[j].s_Person, messages[j].s_Attribute, gatherer, m_ResourceMonitor); + addArrival(messages[j].s_Time, messages[j].s_Person, + messages[j].s_Attribute, gatherer, m_ResourceMonitor); } } @@ -617,13 +608,16 @@ void CEventRatePopulationDataGathererTest::testRemovePeople() { TSizeUInt64PrVec nonZeroCounts; gatherer.personNonZeroCounts(bucketStart - bucketLength, nonZeroCounts); for (std::size_t i = 0u; i < nonZeroCounts.size(); ++i) { - if (!std::binary_search(peopleToRemove.begin(), peopleToRemove.end(), nonZeroCounts[i].first)) { + if (!std::binary_search(peopleToRemove.begin(), peopleToRemove.end(), + nonZeroCounts[i].first)) { const std::string& name = gatherer.personName(nonZeroCounts[i].first); - expectedNonZeroCounts[name] = static_cast(nonZeroCounts[i].second); + expectedNonZeroCounts[name] = + static_cast(nonZeroCounts[i].second); } } } - LOG_DEBUG(<< "expectedNonZeroCounts = " << core::CContainerPrinter::print(expectedNonZeroCounts)); + LOG_DEBUG(<< "expectedNonZeroCounts = " + << core::CContainerPrinter::print(expectedNonZeroCounts)); std::string expectedFeatureData; { @@ -634,8 +628,10 @@ void CEventRatePopulationDataGathererTest::testRemovePeople() { for (std::size_t i = 0u; i < featureData.size(); ++i) { const TSizeSizePrFeatureDataPrVec& data = featureData[i].second; for (std::size_t j = 0u; j < data.size(); ++j) { - if (!std::binary_search(peopleToRemove.begin(), peopleToRemove.end(), data[j].first.first)) { - std::string key = model_t::print(featureData[i].first) + " " + gatherer.personName(data[j].first.first) + " " + + if (!std::binary_search(peopleToRemove.begin(), + peopleToRemove.end(), data[j].first.first)) { + std::string key = model_t::print(featureData[i].first) + " " + + gatherer.personName(data[j].first.first) + " " + gatherer.attributeName(data[j].first.second); expected.push_back(TStrFeatureDataPr(key, data[j].second)); LOG_DEBUG(<< " " << key << " = " << data[j].second.s_Count); @@ -647,7 +643,8 @@ void CEventRatePopulationDataGathererTest::testRemovePeople() { gatherer.recyclePeople(peopleToRemove); - CPPUNIT_ASSERT_EQUAL(numberPeople - peopleToRemove.size(), gatherer.numberActivePeople()); + CPPUNIT_ASSERT_EQUAL(numberPeople - peopleToRemove.size(), + gatherer.numberActivePeople()); for (std::size_t i = 0u; i < expectedPersonNames.size(); ++i) { std::size_t pid; CPPUNIT_ASSERT(gatherer.personId(expectedPersonNames[i], pid)); @@ -661,9 +658,11 @@ void CEventRatePopulationDataGathererTest::testRemovePeople() { const std::string& name = gatherer.personName(nonZeroCounts[i].first); actualNonZeroCounts[name] = static_cast(nonZeroCounts[i].second); } - LOG_DEBUG(<< "actualNonZeroCounts = " << core::CContainerPrinter::print(actualNonZeroCounts)); + LOG_DEBUG(<< "actualNonZeroCounts = " + << core::CContainerPrinter::print(actualNonZeroCounts)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedNonZeroCounts), core::CContainerPrinter::print(actualNonZeroCounts)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedNonZeroCounts), + core::CContainerPrinter::print(actualNonZeroCounts)); std::string actualFeatureData; { @@ -674,7 +673,8 @@ void CEventRatePopulationDataGathererTest::testRemovePeople() { for (std::size_t i = 0u; i < featureData.size(); ++i) { const TSizeSizePrFeatureDataPrVec& data = featureData[i].second; for (std::size_t j = 0u; j < data.size(); ++j) { - std::string key = model_t::print(featureData[i].first) + " " + gatherer.personName(data[j].first.first) + " " + + std::string key = model_t::print(featureData[i].first) + " " + + gatherer.personName(data[j].first.first) + " " + gatherer.attributeName(data[j].first.second); actual.push_back(TStrFeatureDataPr(key, data[j].second)); LOG_DEBUG(<< " " << key << " = " << data[j].second.s_Count); @@ -698,28 +698,18 @@ void CEventRatePopulationDataGathererTest::testRemoveAttributes() { features.push_back(model_t::E_PopulationCountByBucketPersonAndAttribute); features.push_back(model_t::E_PopulationUniquePersonCountByAttribute); SModelParams params(bucketLength); - CDataGatherer gatherer(model_t::E_PopulationEventRate, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - searchKey, - features, - startTime, - 0); + CDataGatherer gatherer(model_t::E_PopulationEventRate, model_t::E_None, + params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), + false, searchKey, features, startTime, 0); TMessageVec messages; generateTestMessages(rng, startTime, bucketLength, messages); core_t::TTime bucketStart = startTime; for (std::size_t i = 0u; i < messages.size(); ++i) { - addArrival(messages[i].s_Time, messages[i].s_Person, messages[i].s_Attribute, gatherer, m_ResourceMonitor); + addArrival(messages[i].s_Time, messages[i].s_Person, + messages[i].s_Attribute, gatherer, m_ResourceMonitor); } // Remove attributes 1, 2, 3 and 15. @@ -752,8 +742,10 @@ void CEventRatePopulationDataGathererTest::testRemoveAttributes() { for (std::size_t i = 0u; i < featureData.size(); ++i) { const TSizeSizePrFeatureDataPrVec& data = featureData[i].second; for (std::size_t j = 0u; j < data.size(); ++j) { - if (!std::binary_search(attributesToRemove.begin(), attributesToRemove.end(), data[j].first.second)) { - std::string key = model_t::print(featureData[i].first) + " " + gatherer.personName(data[j].first.first) + " " + + if (!std::binary_search(attributesToRemove.begin(), + attributesToRemove.end(), data[j].first.second)) { + std::string key = model_t::print(featureData[i].first) + " " + + gatherer.personName(data[j].first.first) + " " + gatherer.attributeName(data[j].first.second); expected.push_back(TStrFeatureDataPr(key, data[j].second)); LOG_DEBUG(<< " " << key << " = " << data[j].second.s_Count); @@ -765,7 +757,8 @@ void CEventRatePopulationDataGathererTest::testRemoveAttributes() { gatherer.recycleAttributes(attributesToRemove); - CPPUNIT_ASSERT_EQUAL(numberAttributes - attributesToRemove.size(), gatherer.numberActiveAttributes()); + CPPUNIT_ASSERT_EQUAL(numberAttributes - attributesToRemove.size(), + gatherer.numberActiveAttributes()); for (std::size_t i = 0u; i < expectedAttributeNames.size(); ++i) { std::size_t cid; CPPUNIT_ASSERT(gatherer.attributeId(expectedAttributeNames[i], cid)); @@ -781,7 +774,8 @@ void CEventRatePopulationDataGathererTest::testRemoveAttributes() { for (std::size_t i = 0u; i < featureData.size(); ++i) { const TSizeSizePrFeatureDataPrVec& data = featureData[i].second; for (std::size_t j = 0u; j < data.size(); ++j) { - std::string key = model_t::print(featureData[i].first) + " " + gatherer.personName(data[j].first.first) + " " + + std::string key = model_t::print(featureData[i].first) + " " + + gatherer.personName(data[j].first.first) + " " + gatherer.attributeName(data[j].first.second); actual.push_back(TStrFeatureDataPr(key, data[j].second)); LOG_DEBUG(<< " " << key << " = " << data[j].second.s_Count); @@ -812,27 +806,17 @@ void CEventRatePopulationDataGathererTest::testPersistence() { features.push_back(model_t::E_PopulationCountByBucketPersonAndAttribute); features.push_back(model_t::E_PopulationUniquePersonCountByAttribute); SModelParams params(bucketLength); - CDataGatherer origDataGatherer(model_t::E_PopulationEventRate, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - searchKey, - features, - startTime, - 0); + CDataGatherer origDataGatherer( + model_t::E_PopulationEventRate, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, TStrVec(), false, searchKey, features, startTime, 0); TMessageVec messages; generateTestMessages(rng, startTime, bucketLength, messages); for (std::size_t i = 0u; i < messages.size(); ++i) { - addArrival(messages[i].s_Time, messages[i].s_Person, messages[i].s_Attribute, origDataGatherer, m_ResourceMonitor); + addArrival(messages[i].s_Time, messages[i].s_Person, + messages[i].s_Attribute, origDataGatherer, m_ResourceMonitor); } std::string origXml; @@ -843,26 +827,18 @@ void CEventRatePopulationDataGathererTest::testPersistence() { } LOG_DEBUG(<< "origXml = " << origXml); - LOG_DEBUG(<< "length = " << origXml.length() << ", # tabs " << std::count_if(origXml.begin(), origXml.end(), isSpace)); + LOG_DEBUG(<< "length = " << origXml.length() << ", # tabs " + << std::count_if(origXml.begin(), origXml.end(), isSpace)); // Restore the XML into a new data gatherer core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - CDataGatherer restoredDataGatherer(model_t::E_PopulationEventRate, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - searchKey, - traverser); + CDataGatherer restoredDataGatherer( + model_t::E_PopulationEventRate, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, TStrVec(), false, searchKey, traverser); // The XML representation of the new data gatherer should be the same as the // original @@ -882,21 +858,10 @@ void CEventRatePopulationDataGathererTest::testPersistence() { CDataGatherer::TFeatureVec features; features.push_back(model_t::E_PopulationInfoContentByBucketPersonAndAttribute); SModelParams params(bucketLength); - CDataGatherer dataGatherer(model_t::E_PopulationEventRate, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - "value", - TStrVec(), - false, - searchKey, - features, - startTime, - 0); + CDataGatherer dataGatherer(model_t::E_PopulationEventRate, model_t::E_None, + params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, "value", TStrVec(), + false, searchKey, features, startTime, 0); core_t::TTime time = startTime; for (std::size_t i = 0u; i < numberBuckets; ++i, time += bucketLength) { @@ -904,7 +869,8 @@ void CEventRatePopulationDataGathererTest::testPersistence() { generateTestMessages(rng, time, bucketLength, messages); for (std::size_t j = 0u; j < messages.size(); ++j) { - addArrival(messages[j].s_Time, messages[j].s_Person, "attribute", messages[j].s_Attribute, dataGatherer, m_ResourceMonitor); + addArrival(messages[j].s_Time, messages[j].s_Person, "attribute", + messages[j].s_Attribute, dataGatherer, m_ResourceMonitor); std::size_t cid; dataGatherer.attributeId(messages[j].s_Attribute, cid); @@ -927,26 +893,18 @@ void CEventRatePopulationDataGathererTest::testPersistence() { } LOG_DEBUG(<< "origXml = " << origXml); - LOG_DEBUG(<< "length = " << origXml.length() << ", # tabs " << std::count_if(origXml.begin(), origXml.end(), isSpace)); + LOG_DEBUG(<< "length = " << origXml.length() << ", # tabs " + << std::count_if(origXml.begin(), origXml.end(), isSpace)); // Restore the XML into a new data gatherer core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - CDataGatherer restoredDataGatherer(model_t::E_PopulationEventRate, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - searchKey, - traverser); + CDataGatherer restoredDataGatherer( + model_t::E_PopulationEventRate, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, TStrVec(), false, searchKey, traverser); // The XML representation of the new data gatherer should be the same as the // original @@ -962,22 +920,30 @@ void CEventRatePopulationDataGathererTest::testPersistence() { } CppUnit::Test* CEventRatePopulationDataGathererTest::suite() { - CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CEventRatePopulationDataGathererTest"); + CppUnit::TestSuite* suiteOfTests = + new CppUnit::TestSuite("CEventRatePopulationDataGathererTest"); suiteOfTests->addTest(new CppUnit::TestCaller( - "CEventRatePopulationDataGathererTest::testAttributeCounts", &CEventRatePopulationDataGathererTest::testAttributeCounts)); + "CEventRatePopulationDataGathererTest::testAttributeCounts", + &CEventRatePopulationDataGathererTest::testAttributeCounts)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CEventRatePopulationDataGathererTest::testAttributeIndicator", &CEventRatePopulationDataGathererTest::testAttributeIndicator)); + "CEventRatePopulationDataGathererTest::testAttributeIndicator", + &CEventRatePopulationDataGathererTest::testAttributeIndicator)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CEventRatePopulationDataGathererTest::testUniqueValueCounts", &CEventRatePopulationDataGathererTest::testUniqueValueCounts)); + "CEventRatePopulationDataGathererTest::testUniqueValueCounts", + &CEventRatePopulationDataGathererTest::testUniqueValueCounts)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CEventRatePopulationDataGathererTest::testCompressedLength", &CEventRatePopulationDataGathererTest::testCompressedLength)); + "CEventRatePopulationDataGathererTest::testCompressedLength", + &CEventRatePopulationDataGathererTest::testCompressedLength)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CEventRatePopulationDataGathererTest::testRemovePeople", &CEventRatePopulationDataGathererTest::testRemovePeople)); + "CEventRatePopulationDataGathererTest::testRemovePeople", + &CEventRatePopulationDataGathererTest::testRemovePeople)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CEventRatePopulationDataGathererTest::testRemoveAttributes", &CEventRatePopulationDataGathererTest::testRemoveAttributes)); + "CEventRatePopulationDataGathererTest::testRemoveAttributes", + &CEventRatePopulationDataGathererTest::testRemoveAttributes)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CEventRatePopulationDataGathererTest::testPersistence", &CEventRatePopulationDataGathererTest::testPersistence)); + "CEventRatePopulationDataGathererTest::testPersistence", + &CEventRatePopulationDataGathererTest::testPersistence)); return suiteOfTests; } diff --git a/lib/model/unittest/CEventRatePopulationModelTest.cc b/lib/model/unittest/CEventRatePopulationModelTest.cc index 62c1f8de25..a874dc9917 100644 --- a/lib/model/unittest/CEventRatePopulationModelTest.cc +++ b/lib/model/unittest/CEventRatePopulationModelTest.cc @@ -73,7 +73,8 @@ struct SMessage { : s_Time(time), s_Person(person), s_Attribute(attribute) {} bool operator<(const SMessage& other) const { - return maths::COrderings::lexicographical_compare(s_Time, s_Person, s_Attribute, other.s_Time, other.s_Person, other.s_Attribute); + return maths::COrderings::lexicographical_compare( + s_Time, s_Person, s_Attribute, other.s_Time, other.s_Person, other.s_Attribute); } core_t::TTime s_Time; @@ -90,7 +91,9 @@ struct SAnomaly { std::string s_Person; TDoubleStrPrVec s_Attributes; - bool operator<(const SAnomaly& other) const { return s_Bucket < other.s_Bucket; } + bool operator<(const SAnomaly& other) const { + return s_Bucket < other.s_Bucket; + } std::string print() const { std::ostringstream result; @@ -138,9 +141,12 @@ void generateTestMessages(core_t::TTime startTime, core_t::TTime bucketLength, T people.push_back("p" + boost::lexical_cast(i)); } - std::size_t c0People[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19}; - std::size_t c1People[] = {0, 1, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19}; - std::size_t c2People[] = {0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19}; + std::size_t c0People[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19}; + std::size_t c1People[] = {0, 1, 2, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, 19}; + std::size_t c2People[] = {0, 1, 2, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, 19}; std::size_t c3People[] = {3, 4}; std::size_t c4People[] = {3}; @@ -153,12 +159,10 @@ void generateTestMessages(core_t::TTime startTime, core_t::TTime bucketLength, T double attributeRates[] = {10.0, 0.02, 15.0, 2.0, 1.0}; - TSizeSizeSizeTr anomaliesAttributePerson[] = {TSizeSizeSizeTr(10u, 0u, 1u), - TSizeSizeSizeTr(15u, 0u, 11u), - TSizeSizeSizeTr(30u, 2u, 4u), - TSizeSizeSizeTr(35u, 2u, 5u), - TSizeSizeSizeTr(50u, 0u, 11u), - TSizeSizeSizeTr(75u, 2u, 5u)}; + TSizeSizeSizeTr anomaliesAttributePerson[] = { + TSizeSizeSizeTr(10u, 0u, 1u), TSizeSizeSizeTr(15u, 0u, 11u), + TSizeSizeSizeTr(30u, 2u, 4u), TSizeSizeSizeTr(35u, 2u, 5u), + TSizeSizeSizeTr(50u, 0u, 11u), TSizeSizeSizeTr(75u, 2u, 5u)}; test::CRandomNumbers rng; @@ -177,11 +181,13 @@ void generateTestMessages(core_t::TTime startTime, core_t::TTime bucketLength, T } TDoubleVec times; - rng.generateUniformSamples(0.0, static_cast(bucketLength - 1), n, times); + rng.generateUniformSamples( + 0.0, static_cast(bucketLength - 1), n, times); for (std::size_t l = 0u; l < times.size(); ++l) { core_t::TTime time = startTime + static_cast(times[l]); - messages.push_back(SMessage(time, people[attributePeople[j][k]], attributes[j])); + messages.push_back(SMessage(time, people[attributePeople[j][k]], + attributes[j])); } } } @@ -190,7 +196,9 @@ void generateTestMessages(core_t::TTime startTime, core_t::TTime bucketLength, T std::sort(messages.begin(), messages.end()); } -void addArrival(const SMessage& message, const CModelFactory::TDataGathererPtr& gatherer, CResourceMonitor& resourceMonitor) { +void addArrival(const SMessage& message, + const CModelFactory::TDataGathererPtr& gatherer, + CResourceMonitor& resourceMonitor) { CDataGatherer::TStrCPtrVec fields; fields.push_back(&message.s_Person); fields.push_back(&message.s_Attribute); @@ -229,7 +237,8 @@ void CEventRatePopulationModelTest::testBasicAccessors() { features.push_back(model_t::E_PopulationCountByBucketPersonAndAttribute); factory.features(features); CModelFactory::SGathererInitializationData gathererInitData(startTime); - CModelFactory::TDataGathererPtr gatherer(dynamic_cast(factory.makeDataGatherer(gathererInitData))); + CModelFactory::TDataGathererPtr gatherer( + dynamic_cast(factory.makeDataGatherer(gathererInitData))); CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr model(factory.makeModel(modelInitData)); @@ -242,7 +251,8 @@ void CEventRatePopulationModelTest::testBasicAccessors() { if (messages[i].s_Time >= startTime + bucketLength) { model->sample(startTime, startTime + bucketLength, m_ResourceMonitor); - LOG_DEBUG(<< "Testing bucket = [" << startTime << "," << startTime + bucketLength << ")"); + LOG_DEBUG(<< "Testing bucket = [" << startTime << "," + << startTime + bucketLength << ")"); // Test the person and attribute invariants. for (std::size_t j = 0u; j < gatherer->numberActivePeople(); ++j) { @@ -261,7 +271,8 @@ void CEventRatePopulationModelTest::testBasicAccessors() { TSizeVec expectedCurrentBucketPersonIds; // Test the person counts. - for (TStrUInt64MapCItr j = expectedBucketPersonCounts.begin(); j != expectedBucketPersonCounts.end(); ++j) { + for (TStrUInt64MapCItr j = expectedBucketPersonCounts.begin(); + j != expectedBucketPersonCounts.end(); ++j) { std::size_t pid; CPPUNIT_ASSERT(gatherer->personId(j->first, pid)); @@ -273,20 +284,22 @@ void CEventRatePopulationModelTest::testBasicAccessors() { } // Test the person attribute counts. - for (TStrStrPrDoubleMapCItr j = expectedBucketPersonAttributeCounts.begin(); j != expectedBucketPersonAttributeCounts.end(); - ++j) { + for (TStrStrPrDoubleMapCItr j = expectedBucketPersonAttributeCounts.begin(); + j != expectedBucketPersonAttributeCounts.end(); ++j) { std::size_t pid; CPPUNIT_ASSERT(gatherer->personId(j->first.first, pid)); std::size_t cid; CPPUNIT_ASSERT(gatherer->attributeId(j->first.second, cid)); - TDouble1Vec count = model->currentBucketValue(model_t::E_PopulationCountByBucketPersonAndAttribute, pid, cid, startTime); + TDouble1Vec count = model->currentBucketValue( + model_t::E_PopulationCountByBucketPersonAndAttribute, pid, cid, startTime); CPPUNIT_ASSERT(!count.empty()); CPPUNIT_ASSERT_EQUAL(j->second, count[0]); } // Test the current bucket people. - std::sort(expectedCurrentBucketPersonIds.begin(), expectedCurrentBucketPersonIds.end()); + std::sort(expectedCurrentBucketPersonIds.begin(), + expectedCurrentBucketPersonIds.end()); TSizeVec bucketPersonIds; model->currentBucketPersonIds(startTime, bucketPersonIds); CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedCurrentBucketPersonIds), @@ -325,9 +338,11 @@ void CEventRatePopulationModelTest::testFeatures() { using TMathsModelPtr = boost::shared_ptr; using TSizeMathsModelPtrMap = std::map; using TDouble2VecVecDouble2Vec4VecVecPr = std::pair; - using TSizeDouble2VecVecDouble2Vec4VecVecPrMap = std::map; + using TSizeDouble2VecVecDouble2Vec4VecVecPrMap = + std::map; - static const maths_t::TWeightStyleVec WEIGHT_STYLES{maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight}; + static const maths_t::TWeightStyleVec WEIGHT_STYLES{ + maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight}; core_t::TTime startTime = 1367280000; const core_t::TTime bucketLength = 3600; @@ -341,18 +356,23 @@ void CEventRatePopulationModelTest::testFeatures() { SModelParams params(bucketLength); params.s_InitialDecayRateMultiplier = 1.0; CEventRatePopulationModelFactory factory(params); - CModelFactory::TFeatureVec features{model_t::E_PopulationCountByBucketPersonAndAttribute, - model_t::E_PopulationUniquePersonCountByAttribute}; + CModelFactory::TFeatureVec features{ + model_t::E_PopulationCountByBucketPersonAndAttribute, + model_t::E_PopulationUniquePersonCountByAttribute}; factory.features(features); CModelFactory::SGathererInitializationData gathererInitData(startTime); - CModelFactory::TDataGathererPtr gatherer(dynamic_cast(factory.makeDataGatherer(gathererInitData))); + CModelFactory::TDataGathererPtr gatherer( + dynamic_cast(factory.makeDataGatherer(gathererInitData))); CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr modelHolder(factory.makeModel(modelInitData)); - CEventRatePopulationModel* model = dynamic_cast(modelHolder.get()); + CEventRatePopulationModel* model = + dynamic_cast(modelHolder.get()); - model::CModelFactory::TFeatureMathsModelPtrPrVec models{factory.defaultFeatureModels(features, bucketLength, 1.0, false)}; + model::CModelFactory::TFeatureMathsModelPtrPrVec models{ + factory.defaultFeatureModels(features, bucketLength, 1.0, false)}; CPPUNIT_ASSERT_EQUAL(std::size_t(1), models.size()); - CPPUNIT_ASSERT_EQUAL(model_t::E_PopulationCountByBucketPersonAndAttribute, models[0].first); + CPPUNIT_ASSERT_EQUAL(model_t::E_PopulationCountByBucketPersonAndAttribute, + models[0].first); std::size_t numberAttributes = 0u; std::size_t numberPeople = 0u; @@ -382,15 +402,17 @@ void CEventRatePopulationModelTest::testFeatures() { std::size_t pid = count_.first.first; std::size_t cid = count_.first.second; core_t::TTime time = startTime + bucketLength / 2; - double count = model_t::offsetCountToZero(model_t::E_PopulationCountByBucketPersonAndAttribute, - static_cast(count_.second)); + double count = model_t::offsetCountToZero( + model_t::E_PopulationCountByBucketPersonAndAttribute, + static_cast(count_.second)); TMathsModelPtr& model_ = expectedPopulationModels[cid]; if (model_ == nullptr) { model_.reset(models[0].second->clone(cid)); } TDoubleVec sample(1, count); - TDouble2Vec4Vec weight{{model->sampleRateWeight(pid, cid)}, model_->winsorisationWeight(1.0, time, sample)}; + TDouble2Vec4Vec weight{{model->sampleRateWeight(pid, cid)}, + model_->winsorisationWeight(1.0, time, sample)}; populationSamples[cid].first.push_back({sample[0]}); populationSamples[cid].second.push_back(weight); } @@ -415,28 +437,33 @@ void CEventRatePopulationModelTest::testFeatures() { TSizeSizePrFeatureDataPrVec expectedPeoplePerAttribute; expectedPeoplePerAttribute.reserve(numberAttributes); for (std::size_t j = 0u; j < numberAttributes; ++j) { - expectedPeoplePerAttribute.emplace_back(std::make_pair(size_t(0), j), TFeatureData(j)); + expectedPeoplePerAttribute.emplace_back( + std::make_pair(size_t(0), j), TFeatureData(j)); } for (const auto& attribute : attributePeople) { - expectedPeoplePerAttribute[attribute.first].second = attribute.second.size(); + expectedPeoplePerAttribute[attribute.first].second = + attribute.second.size(); } // Check the number of people per attribute. - const TSizeSizePrFeatureDataPrVec& peoplePerAttribute = - model->featureData(model_t::E_PopulationUniquePersonCountByAttribute, startTime); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedPeoplePerAttribute), - core::CContainerPrinter::print(peoplePerAttribute)); + const TSizeSizePrFeatureDataPrVec& peoplePerAttribute = model->featureData( + model_t::E_PopulationUniquePersonCountByAttribute, startTime); + CPPUNIT_ASSERT_EQUAL( + core::CContainerPrinter::print(expectedPeoplePerAttribute), + core::CContainerPrinter::print(peoplePerAttribute)); // Check the non-zero (person, attribute) counts. - const TSizeSizePrFeatureDataPrVec& nonZeroCounts = - model->featureData(model_t::E_PopulationCountByBucketPersonAndAttribute, startTime); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedNonZeroCounts), core::CContainerPrinter::print(nonZeroCounts)); + const TSizeSizePrFeatureDataPrVec& nonZeroCounts = model->featureData( + model_t::E_PopulationCountByBucketPersonAndAttribute, startTime); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedNonZeroCounts), + core::CContainerPrinter::print(nonZeroCounts)); for (std::size_t cid = 0u; cid < numberAttributes; ++cid) { - const maths::CModel* populationModel = - model->details()->model(model_t::E_PopulationCountByBucketPersonAndAttribute, cid); + const maths::CModel* populationModel = model->details()->model( + model_t::E_PopulationCountByBucketPersonAndAttribute, cid); CPPUNIT_ASSERT(populationModel); - CPPUNIT_ASSERT_EQUAL(expectedPopulationModels[cid]->checksum(), populationModel->checksum()); + CPPUNIT_ASSERT_EQUAL(expectedPopulationModels[cid]->checksum(), + populationModel->checksum()); } startTime += bucketLength; @@ -456,7 +483,8 @@ void CEventRatePopulationModelTest::testComputeProbability() { using TAnomalyVec = std::vector; using TDoubleAnomalyPr = std::pair; - using TAnomalyAccumulator = maths::CBasicStatistics::COrderStatisticsHeap; + using TAnomalyAccumulator = + maths::CBasicStatistics::COrderStatisticsHeap; core_t::TTime startTime = 1367280000; const core_t::TTime bucketLength = 3600; @@ -475,28 +503,34 @@ void CEventRatePopulationModelTest::testComputeProbability() { CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(gathererInitData)); CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr modelHolder(factory.makeModel(modelInitData)); - CEventRatePopulationModel* model = dynamic_cast(modelHolder.get()); + CEventRatePopulationModel* model = + dynamic_cast(modelHolder.get()); TAnomalyAccumulator anomalies(6u); for (std::size_t i = 0u, bucket = 0u; i < messages.size(); ++i) { if (messages[i].s_Time >= startTime + bucketLength) { - LOG_DEBUG(<< "Updating and testing bucket = [" << startTime << "," << startTime + bucketLength << ")"); + LOG_DEBUG(<< "Updating and testing bucket = [" << startTime << "," + << startTime + bucketLength << ")"); model->sample(startTime, startTime + bucketLength, m_ResourceMonitor); SAnnotatedProbability annotatedProbability; for (std::size_t pid = 0u; pid < gatherer->numberActivePeople(); ++pid) { CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); - model->computeProbability(pid, startTime, startTime + bucketLength, partitioningFields, 2, annotatedProbability); + model->computeProbability(pid, startTime, startTime + bucketLength, + partitioningFields, 2, annotatedProbability); std::string person = model->personName(pid); TDoubleStrPrVec attributes; - for (std::size_t j = 0u; j < annotatedProbability.s_AttributeProbabilities.size(); ++j) { - attributes.emplace_back(annotatedProbability.s_AttributeProbabilities[j].s_Probability, - *annotatedProbability.s_AttributeProbabilities[j].s_Attribute); + for (std::size_t j = 0u; + j < annotatedProbability.s_AttributeProbabilities.size(); ++j) { + attributes.emplace_back( + annotatedProbability.s_AttributeProbabilities[j].s_Probability, + *annotatedProbability.s_AttributeProbabilities[j].s_Attribute); } - anomalies.add({annotatedProbability.s_Probability, SAnomaly(bucket, person, attributes)}); + anomalies.add({annotatedProbability.s_Probability, + SAnomaly(bucket, person, attributes)}); } startTime += bucketLength; @@ -518,12 +552,10 @@ void CEventRatePopulationModelTest::testComputeProbability() { LOG_DEBUG(<< "orderedAnomalies = " << core::CContainerPrinter::print(orderedAnomalies)); - std::string expectedAnomalies[] = {std::string("[10, p1, c0]"), - std::string("[15, p11, c0]"), - std::string("[30, p4, c2]"), - std::string("[35, p5, c2]"), - std::string("[50, p11, c0]"), - std::string("[75, p5, c2]")}; + std::string expectedAnomalies[] = { + std::string("[10, p1, c0]"), std::string("[15, p11, c0]"), + std::string("[30, p4, c2]"), std::string("[35, p5, c2]"), + std::string("[50, p11, c0]"), std::string("[75, p5, c2]")}; CPPUNIT_ASSERT_EQUAL(boost::size(expectedAnomalies), orderedAnomalies.size()); for (std::size_t i = 0u; i < orderedAnomalies.size(); ++i) { @@ -545,10 +577,13 @@ void CEventRatePopulationModelTest::testPrune() { const core_t::TTime bucketLength = 3600; const std::size_t numberBuckets = 1000u; - std::string people[] = {std::string("p1"), std::string("p2"), std::string("p3"), std::string("p4")}; - std::string attributes[] = {std::string("c1"), std::string("c2"), std::string("c3"), std::string("c4"), std::string("c5")}; + std::string people[] = {std::string("p1"), std::string("p2"), + std::string("p3"), std::string("p4")}; + std::string attributes[] = {std::string("c1"), std::string("c2"), std::string("c3"), + std::string("c4"), std::string("c5")}; - TStrSizePrVecVec eventCounts[] = {TStrSizePrVecVec(), TStrSizePrVecVec(), TStrSizePrVecVec(), TStrSizePrVecVec()}; + TStrSizePrVecVec eventCounts[] = {TStrSizePrVecVec(), TStrSizePrVecVec(), + TStrSizePrVecVec(), TStrSizePrVecVec()}; { TStrSizePrVec attributeCounts; attributeCounts.push_back(TStrSizePr(attributes[0], 0)); @@ -628,7 +663,8 @@ void CEventRatePopulationModelTest::testPrune() { core_t::TTime time = bucketStart; core_t::TTime dt = bucketLength / static_cast(n); for (std::size_t l = 0u; l < n; ++l, time += dt) { - messages.push_back(SMessage(time, people[i], attributeEventCounts[k].first)); + messages.push_back(SMessage(time, people[i], + attributeEventCounts[k].first)); } } } @@ -638,8 +674,10 @@ void CEventRatePopulationModelTest::testPrune() { TMessageVec expectedMessages; expectedMessages.reserve(messages.size()); for (std::size_t i = 0u; i < messages.size(); ++i) { - if (std::binary_search(boost::begin(expectedPeople), boost::end(expectedPeople), messages[i].s_Person) && - std::binary_search(boost::begin(expectedAttributes), boost::end(expectedAttributes), messages[i].s_Attribute)) { + if (std::binary_search(boost::begin(expectedPeople), + boost::end(expectedPeople), messages[i].s_Person) && + std::binary_search(boost::begin(expectedAttributes), + boost::end(expectedAttributes), messages[i].s_Attribute)) { expectedMessages.push_back(messages[i]); } } @@ -676,8 +714,9 @@ void CEventRatePopulationModelTest::testPrune() { bucketStart = gatherer->currentBucketStartTime() + bucketLength; - SMessage newMessages[] = { - SMessage(bucketStart + 10, "p1", "c2"), SMessage(bucketStart + 200, "p5", "c6"), SMessage(bucketStart + 2100, "p5", "c6")}; + SMessage newMessages[] = {SMessage(bucketStart + 10, "p1", "c2"), + SMessage(bucketStart + 200, "p5", "c6"), + SMessage(bucketStart + 2100, "p5", "c6")}; for (std::size_t i = 0u; i < boost::size(newMessages); ++i) { addArrival(newMessages[i], gatherer, m_ResourceMonitor); @@ -692,10 +731,12 @@ void CEventRatePopulationModelTest::testPrune() { // Test that calling prune on a cloned model which has seen no new data does nothing CAnomalyDetectorModel::TModelPtr clonedModelHolder(model->cloneForPersistence()); - std::size_t numberOfPeopleBeforePrune(clonedModelHolder->dataGatherer().numberActivePeople()); + std::size_t numberOfPeopleBeforePrune( + clonedModelHolder->dataGatherer().numberActivePeople()); CPPUNIT_ASSERT(numberOfPeopleBeforePrune > 0); clonedModelHolder->prune(clonedModelHolder->defaultPruneWindow()); - CPPUNIT_ASSERT_EQUAL(numberOfPeopleBeforePrune, clonedModelHolder->dataGatherer().numberActivePeople()); + CPPUNIT_ASSERT_EQUAL(numberOfPeopleBeforePrune, + clonedModelHolder->dataGatherer().numberActivePeople()); } void CEventRatePopulationModelTest::testKey() { @@ -721,10 +762,12 @@ void CEventRatePopulationModelTest::testKey() { for (std::size_t j = 0u; j < boost::size(useNull); ++j) { for (std::size_t k = 0u; k < boost::size(byField); ++k) { for (std::size_t l = 0u; l < boost::size(partitionField); ++l) { - CSearchKey key( - ++identifier, countFunctions[i], useNull[j], model_t::E_XF_None, "", byField[k], "over", partitionField[l]); + CSearchKey key(++identifier, countFunctions[i], + useNull[j], model_t::E_XF_None, "", + byField[k], "over", partitionField[l]); - CAnomalyDetectorModelConfig::TModelFactoryCPtr factory = config.factory(key); + CAnomalyDetectorModelConfig::TModelFactoryCPtr factory = + config.factory(key); LOG_DEBUG(<< "expected key = " << key); LOG_DEBUG(<< "actual key = " << factory->searchKey()); @@ -744,19 +787,23 @@ void CEventRatePopulationModelTest::testFrequency() { // Test we correctly compute frequencies for people and attributes. const core_t::TTime bucketLength = 600; - const std::string attributes[] = {"a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", "a9", "a10"}; - const std::string people[] = {"p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9", "p10"}; + const std::string attributes[] = {"a1", "a2", "a3", "a4", "a5", + "a6", "a7", "a8", "a9", "a10"}; + const std::string people[] = {"p1", "p2", "p3", "p4", "p5", + "p6", "p7", "p8", "p9", "p10"}; std::size_t period[] = {1u, 1u, 10u, 3u, 4u, 5u, 2u, 1u, 3u, 7u}; core_t::TTime startTime = 0; TMessageVec messages; std::size_t bucket = 0u; - for (core_t::TTime bucketStart = startTime; bucketStart < 100 * bucketLength; bucketStart += bucketLength, ++bucket) { + for (core_t::TTime bucketStart = startTime; bucketStart < 100 * bucketLength; + bucketStart += bucketLength, ++bucket) { for (std::size_t i = 0u; i < boost::size(people); ++i) { if (bucket % period[i] == 0) { for (std::size_t j = 0u; j < i + 1; ++j) { - messages.push_back(SMessage(bucketStart + bucketLength / 2, people[i], attributes[j])); + messages.push_back(SMessage(bucketStart + bucketLength / 2, + people[i], attributes[j])); } } } @@ -774,12 +821,14 @@ void CEventRatePopulationModelTest::testFrequency() { factory.features(features); CModelFactory::SGathererInitializationData gathererInitData(startTime); CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(gathererInitData)); - const model::CDataGatherer& populationGatherer(dynamic_cast(*gatherer)); + const model::CDataGatherer& populationGatherer( + dynamic_cast(*gatherer)); CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr model(factory.makeModel(modelInitData)); - CEventRatePopulationModel* populationModel = dynamic_cast(model.get()); + CEventRatePopulationModel* populationModel = + dynamic_cast(model.get()); CPPUNIT_ASSERT(populationModel); core_t::TTime time = startTime; @@ -799,9 +848,11 @@ void CEventRatePopulationModelTest::testFrequency() { CPPUNIT_ASSERT(gatherer->personId(people[i], pid)); LOG_DEBUG(<< "frequency = " << populationModel->personFrequency(pid)); LOG_DEBUG(<< "expected frequency = " << 1.0 / static_cast(period[i])); - CPPUNIT_ASSERT_DOUBLES_EQUAL( - 1.0 / static_cast(period[i]), populationModel->personFrequency(pid), 0.1 / static_cast(period[i])); - meanError.add(std::fabs(populationModel->personFrequency(pid) - 1.0 / static_cast(period[i]))); + CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0 / static_cast(period[i]), + populationModel->personFrequency(pid), + 0.1 / static_cast(period[i])); + meanError.add(std::fabs(populationModel->personFrequency(pid) - + 1.0 / static_cast(period[i]))); } LOG_DEBUG(<< "error = " << maths::CBasicStatistics::mean(meanError)); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError) < 0.002); @@ -813,7 +864,8 @@ void CEventRatePopulationModelTest::testFrequency() { CPPUNIT_ASSERT(populationGatherer.attributeId(attributes[i], cid)); LOG_DEBUG(<< "frequency = " << populationModel->attributeFrequency(cid)); LOG_DEBUG(<< "expected frequency = " << (10.0 - static_cast(i)) / 10.0); - CPPUNIT_ASSERT_EQUAL((10.0 - static_cast(i)) / 10.0, populationModel->attributeFrequency(cid)); + CPPUNIT_ASSERT_EQUAL((10.0 - static_cast(i)) / 10.0, + populationModel->attributeFrequency(cid)); } } } @@ -830,35 +882,45 @@ void CEventRatePopulationModelTest::testSampleRateWeight() { // one message per attribute per 10 buckets. const core_t::TTime bucketLength = 600; - const std::string attributes[] = {"a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", "a9", "a10"}; - const std::string people[] = {"p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9", "p10", - "p11", "p12", "p13", "p14", "p15", "p16", "p17", "p18", "p19", "p20"}; + const std::string attributes[] = {"a1", "a2", "a3", "a4", "a5", + "a6", "a7", "a8", "a9", "a10"}; + const std::string people[] = { + "p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9", "p10", + "p11", "p12", "p13", "p14", "p15", "p16", "p17", "p18", "p19", "p20"}; std::size_t heavyHitters[] = {0u, 4u}; - std::size_t normal[] = {1u, 2u, 3u, 5u, 6u, 7u, 8u, 9u, 10u, 11u, 12u, 13u, 14u, 15u, 16u, 17u, 18u, 19u}; + std::size_t normal[] = {1u, 2u, 3u, 5u, 6u, 7u, 8u, 9u, 10u, + 11u, 12u, 13u, 14u, 15u, 16u, 17u, 18u, 19u}; - std::size_t messagesPerBucket = boost::size(heavyHitters) * boost::size(attributes) + boost::size(normal); + std::size_t messagesPerBucket = + boost::size(heavyHitters) * boost::size(attributes) + boost::size(normal); test::CRandomNumbers rng; core_t::TTime startTime = 0; TMessageVec messages; - for (core_t::TTime bucketStart = startTime; bucketStart < 100 * bucketLength; bucketStart += bucketLength) { + for (core_t::TTime bucketStart = startTime; + bucketStart < 100 * bucketLength; bucketStart += bucketLength) { TSizeVec times; - rng.generateUniformSamples( - static_cast(bucketStart), static_cast(bucketStart + bucketLength), messagesPerBucket, times); + rng.generateUniformSamples(static_cast(bucketStart), + static_cast(bucketStart + bucketLength), + messagesPerBucket, times); std::size_t m = 0u; for (std::size_t i = 0u; i < boost::size(attributes); ++i) { for (std::size_t j = 0u; j < boost::size(heavyHitters); ++j) { - messages.push_back(SMessage(static_cast(times[m++]), people[heavyHitters[j]], attributes[i])); + messages.push_back(SMessage(static_cast(times[m++]), + people[heavyHitters[j]], attributes[i])); } } TSizeVec attributeIndexes; - rng.generateUniformSamples(0, boost::size(attributes), boost::size(normal), attributeIndexes); + rng.generateUniformSamples(0, boost::size(attributes), + boost::size(normal), attributeIndexes); for (std::size_t i = 0u; i < boost::size(normal); ++i) { - messages.push_back(SMessage(static_cast(times[m++]), people[normal[i]], attributes[attributeIndexes[i]])); + messages.push_back(SMessage(static_cast(times[m++]), + people[normal[i]], + attributes[attributeIndexes[i]])); } } @@ -877,7 +939,8 @@ void CEventRatePopulationModelTest::testSampleRateWeight() { CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr model(factory.makeModel(modelInitData)); - CEventRatePopulationModel* populationModel = dynamic_cast(model.get()); + CEventRatePopulationModel* populationModel = + dynamic_cast(model.get()); CPPUNIT_ASSERT(populationModel); core_t::TTime time = startTime; @@ -896,7 +959,8 @@ void CEventRatePopulationModelTest::testSampleRateWeight() { // + ("# heavy hitters")) // / "# people" - double expectedRateWeight = (static_cast(boost::size(normal)) / static_cast(boost::size(attributes)) + + double expectedRateWeight = (static_cast(boost::size(normal)) / + static_cast(boost::size(attributes)) + static_cast(boost::size(heavyHitters))) / static_cast(boost::size(people)); LOG_DEBUG(<< "expectedRateWeight = " << expectedRateWeight); @@ -907,8 +971,10 @@ void CEventRatePopulationModelTest::testSampleRateWeight() { CPPUNIT_ASSERT(gatherer->personId(people[heavyHitters[i]], pid)); for (std::size_t cid = 0u; cid < boost::size(attributes); ++cid) { double sampleRateWeight = populationModel->sampleRateWeight(pid, cid); - LOG_DEBUG(<< "attribute = " << populationModel->attributeName(cid) << ", sampleRateWeight = " << sampleRateWeight); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedRateWeight, sampleRateWeight, 0.15 * expectedRateWeight); + LOG_DEBUG(<< "attribute = " << populationModel->attributeName(cid) + << ", sampleRateWeight = " << sampleRateWeight); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedRateWeight, sampleRateWeight, + 0.15 * expectedRateWeight); } } @@ -918,7 +984,8 @@ void CEventRatePopulationModelTest::testSampleRateWeight() { CPPUNIT_ASSERT(gatherer->personId(people[normal[i]], pid)); for (std::size_t cid = 0u; cid < boost::size(attributes); ++cid) { double sampleRateWeight = populationModel->sampleRateWeight(pid, cid); - LOG_DEBUG(<< "attribute = " << populationModel->attributeName(cid) << ", sampleRateWeight = " << sampleRateWeight); + LOG_DEBUG(<< "attribute = " << populationModel->attributeName(cid) + << ", sampleRateWeight = " << sampleRateWeight); CPPUNIT_ASSERT_EQUAL(1.0, sampleRateWeight); } } @@ -937,10 +1004,12 @@ void CEventRatePopulationModelTest::testPeriodicity() { static const core_t::TTime DAY = 86400; const core_t::TTime bucketLength = 3600; - double rate[] = {1, 1, 2, 2, 3, 5, 6, 6, 20, 21, 4, 3, 4, 4, 8, 25, 7, 6, 5, 1, 1, 4, 1, 1}; + double rate[] = {1, 1, 2, 2, 3, 5, 6, 6, 20, 21, 4, 3, + 4, 4, 8, 25, 7, 6, 5, 1, 1, 4, 1, 1}; const std::string attributes[] = {"a1", "a2"}; double scales[] = {1.0, 1.5}; - const std::string people[] = {"p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9", "p10"}; + const std::string people[] = {"p1", "p2", "p3", "p4", "p5", + "p6", "p7", "p8", "p9", "p10"}; test::CRandomNumbers rng; @@ -951,11 +1020,13 @@ void CEventRatePopulationModelTest::testPeriodicity() { for (core_t::TTime time = startTime; time < endTime; time += bucketLength) { for (std::size_t i = 0u; i < boost::size(attributes); ++i) { TUIntVec rates; - rng.generatePoissonSamples(scales[i] * rate[(time % DAY) / HOUR], boost::size(people), rates); + rng.generatePoissonSamples(scales[i] * rate[(time % DAY) / HOUR], + boost::size(people), rates); for (std::size_t j = 0u; j < rates.size(); ++j) { for (unsigned int t = 0; t < rates[j]; ++t) { - messages.push_back(SMessage(time + (t * bucketLength) / (rates[j] + 1), people[j], attributes[i])); + messages.push_back(SMessage(time + (t * bucketLength) / (rates[j] + 1), + people[j], attributes[i])); } } } @@ -976,7 +1047,8 @@ void CEventRatePopulationModelTest::testPeriodicity() { CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr model(factory.makeModel(modelInitData)); - CEventRatePopulationModel* populationModel = dynamic_cast(model.get()); + CEventRatePopulationModel* populationModel = + dynamic_cast(model.get()); CPPUNIT_ASSERT(populationModel); TStrDoubleMap personProbabilitiesWithoutPeriodicity; @@ -995,19 +1067,26 @@ void CEventRatePopulationModelTest::testPeriodicity() { CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); SAnnotatedProbability annotatedProbability; - if (populationModel->computeProbability(pid, time, time + bucketLength, partitioningFields, 1, annotatedProbability) == - false) { + if (populationModel->computeProbability( + pid, time, time + bucketLength, partitioningFields, 1, + annotatedProbability) == false) { continue; } if (time < startTime + 3 * DAY) { double& minimumProbability = - personProbabilitiesWithoutPeriodicity.insert(TStrDoubleMap::value_type(people[j], 1.0)).first->second; - minimumProbability = std::min(minimumProbability, annotatedProbability.s_Probability); + personProbabilitiesWithoutPeriodicity + .insert(TStrDoubleMap::value_type(people[j], 1.0)) + .first->second; + minimumProbability = std::min( + minimumProbability, annotatedProbability.s_Probability); } else if (time > startTime + 5 * DAY) { double& minimumProbability = - personProbabilitiesWithPeriodicity.insert(TStrDoubleMap::value_type(people[j], 1.0)).first->second; - minimumProbability = std::min(minimumProbability, annotatedProbability.s_Probability); + personProbabilitiesWithPeriodicity + .insert(TStrDoubleMap::value_type(people[j], 1.0)) + .first->second; + minimumProbability = std::min( + minimumProbability, annotatedProbability.s_Probability); } } time += bucketLength; @@ -1050,7 +1129,8 @@ void CEventRatePopulationModelTest::testSkipSampling() { CModelFactory::TDataGathererPtr gathererNoGap(factory.makeDataGatherer(gathererNoGapInitData)); CModelFactory::SModelInitializationData modelNoGapInitData(gathererNoGap); CAnomalyDetectorModel::TModelPtr modelNoGapHolder(factory.makeModel(modelNoGapInitData)); - CEventRatePopulationModel* modelNoGap = dynamic_cast(modelNoGapHolder.get()); + CEventRatePopulationModel* modelNoGap = + dynamic_cast(modelNoGapHolder.get()); addArrival(SMessage(100, "p1", "a1"), gathererNoGap, m_ResourceMonitor); addArrival(SMessage(100, "p1", "a2"), gathererNoGap, m_ResourceMonitor); @@ -1062,10 +1142,12 @@ void CEventRatePopulationModelTest::testSkipSampling() { modelNoGap->sample(300, 400, m_ResourceMonitor); CModelFactory::SGathererInitializationData gathererWithGapInitData(startTime); - CModelFactory::TDataGathererPtr gathererWithGap(factory.makeDataGatherer(gathererWithGapInitData)); + CModelFactory::TDataGathererPtr gathererWithGap( + factory.makeDataGatherer(gathererWithGapInitData)); CModelFactory::SModelInitializationData modelWithGapInitData(gathererWithGap); CAnomalyDetectorModel::TModelPtr modelWithGapHolder(factory.makeModel(modelWithGapInitData)); - CEventRatePopulationModel* modelWithGap = dynamic_cast(modelWithGapHolder.get()); + CEventRatePopulationModel* modelWithGap = + dynamic_cast(modelWithGapHolder.get()); addArrival(SMessage(100, "p1", "a1"), gathererWithGap, m_ResourceMonitor); addArrival(SMessage(100, "p1", "a2"), gathererWithGap, m_ResourceMonitor); @@ -1087,22 +1169,24 @@ void CEventRatePopulationModelTest::testSkipSampling() { modelWithGap->sample(1100, 1200, m_ResourceMonitor); // Check priors are the same - CPPUNIT_ASSERT_EQUAL(static_cast( - modelWithGap->details()->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 0)) - ->residualModel() - .checksum(), - static_cast( - modelNoGap->details()->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 0)) - ->residualModel() - .checksum()); - CPPUNIT_ASSERT_EQUAL(static_cast( - modelWithGap->details()->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 1)) - ->residualModel() - .checksum(), - static_cast( - modelNoGap->details()->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 1)) - ->residualModel() - .checksum()); + CPPUNIT_ASSERT_EQUAL( + static_cast( + modelWithGap->details()->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 0)) + ->residualModel() + .checksum(), + static_cast( + modelNoGap->details()->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 0)) + ->residualModel() + .checksum()); + CPPUNIT_ASSERT_EQUAL( + static_cast( + modelWithGap->details()->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 1)) + ->residualModel() + .checksum(), + static_cast( + modelNoGap->details()->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 1)) + ->residualModel() + .checksum()); // Confirm last seen times are only updated by gap duration by forcing p2 and a2 to be pruned modelWithGap->sample(1200, 1500, m_ResourceMonitor); @@ -1130,7 +1214,8 @@ void CEventRatePopulationModelTest::testInterimCorrections() { CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(gathererInitData)); CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr modelHolder(factory.makeModel(modelInitData)); - CEventRatePopulationModel* model = dynamic_cast(modelHolder.get()); + CEventRatePopulationModel* model = + dynamic_cast(modelHolder.get()); test::CRandomNumbers rng; core_t::TTime now = startTime; @@ -1164,21 +1249,25 @@ void CEventRatePopulationModelTest::testInterimCorrections() { CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); SAnnotatedProbability annotatedProbability1; annotatedProbability1.s_ResultType.set(model_t::CResultType::E_Interim); - CPPUNIT_ASSERT(model->computeProbability(0 /*pid*/, now, now + bucketLength, partitioningFields, 1, annotatedProbability1)); + CPPUNIT_ASSERT(model->computeProbability(0 /*pid*/, now, now + bucketLength, partitioningFields, + 1, annotatedProbability1)); SAnnotatedProbability annotatedProbability2; annotatedProbability2.s_ResultType.set(model_t::CResultType::E_Interim); - CPPUNIT_ASSERT(model->computeProbability(1 /*pid*/, now, now + bucketLength, partitioningFields, 1, annotatedProbability2)); + CPPUNIT_ASSERT(model->computeProbability(1 /*pid*/, now, now + bucketLength, partitioningFields, + 1, annotatedProbability2)); SAnnotatedProbability annotatedProbability3; annotatedProbability3.s_ResultType.set(model_t::CResultType::E_Interim); - CPPUNIT_ASSERT(model->computeProbability(2 /*pid*/, now, now + bucketLength, partitioningFields, 1, annotatedProbability3)); - - model_t::CResultType type(model_t::CResultType::E_Unconditional | model_t::CResultType::E_Interim); - TDouble1Vec p1a1Baseline = - model->baselineBucketMean(model_t::E_PopulationCountByBucketPersonAndAttribute, 0, 0, type, NO_CORRELATES, now); - TDouble1Vec p2a1Baseline = - model->baselineBucketMean(model_t::E_PopulationCountByBucketPersonAndAttribute, 0, 0, type, NO_CORRELATES, now); - TDouble1Vec p3a2Baseline = - model->baselineBucketMean(model_t::E_PopulationCountByBucketPersonAndAttribute, 2, 1, type, NO_CORRELATES, now); + CPPUNIT_ASSERT(model->computeProbability(2 /*pid*/, now, now + bucketLength, partitioningFields, + 1, annotatedProbability3)); + + model_t::CResultType type(model_t::CResultType::E_Unconditional | + model_t::CResultType::E_Interim); + TDouble1Vec p1a1Baseline = model->baselineBucketMean( + model_t::E_PopulationCountByBucketPersonAndAttribute, 0, 0, type, NO_CORRELATES, now); + TDouble1Vec p2a1Baseline = model->baselineBucketMean( + model_t::E_PopulationCountByBucketPersonAndAttribute, 0, 0, type, NO_CORRELATES, now); + TDouble1Vec p3a2Baseline = model->baselineBucketMean( + model_t::E_PopulationCountByBucketPersonAndAttribute, 2, 1, type, NO_CORRELATES, now); LOG_DEBUG(<< "p1 probability = " << annotatedProbability1.s_Probability); LOG_DEBUG(<< "p2 probability = " << annotatedProbability2.s_Probability); @@ -1217,7 +1306,8 @@ void CEventRatePopulationModelTest::testPersistence() { CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr origModel(factory.makeModel(modelInitData)); - CEventRatePopulationModel* populationModel = dynamic_cast(origModel.get()); + CEventRatePopulationModel* populationModel = + dynamic_cast(origModel.get()); CPPUNIT_ASSERT(populationModel); for (const auto& message : messages) { @@ -1293,7 +1383,8 @@ void CEventRatePopulationModelTest::testIgnoreSamplingGivenDetectionRules() { factory.features(features); CModelFactory::SGathererInitializationData gathererNoSkipInitData(startTime); - CModelFactory::TDataGathererPtr gathererNoSkip(factory.makeDataGatherer(gathererNoSkipInitData)); + CModelFactory::TDataGathererPtr gathererNoSkip( + factory.makeDataGatherer(gathererNoSkipInitData)); CModelFactory::SModelInitializationData modelNoSkipInitData(gathererNoSkip); CAnomalyDetectorModel::TModelPtr modelNoSkip(factory.makeModel(modelNoSkipInitData)); @@ -1305,9 +1396,11 @@ void CEventRatePopulationModelTest::testIgnoreSamplingGivenDetectionRules() { factoryWithSkipRule.features(features); CModelFactory::SGathererInitializationData gathererWithSkipInitData(startTime); - CModelFactory::TDataGathererPtr gathererWithSkip(factoryWithSkipRule.makeDataGatherer(gathererWithSkipInitData)); + CModelFactory::TDataGathererPtr gathererWithSkip( + factoryWithSkipRule.makeDataGatherer(gathererWithSkipInitData)); CModelFactory::SModelInitializationData modelWithSkipInitData(gathererWithSkip); - CAnomalyDetectorModel::TModelPtr modelWithSkip(factoryWithSkipRule.makeModel(modelWithSkipInitData)); + CAnomalyDetectorModel::TModelPtr modelWithSkip( + factoryWithSkipRule.makeModel(modelWithSkipInitData)); addArrival(SMessage(100, "p1", "a1"), gathererNoSkip, m_ResourceMonitor); addArrival(SMessage(100, "p1", "a1"), gathererWithSkip, m_ResourceMonitor); @@ -1343,31 +1436,49 @@ void CEventRatePopulationModelTest::testIgnoreSamplingGivenDetectionRules() { // Checksums will be different because a model is created for attribute a3 CPPUNIT_ASSERT(modelWithSkip->checksum() != modelNoSkip->checksum()); - CAnomalyDetectorModel::CModelDetailsViewPtr modelWithSkipView = modelWithSkip->details(); + CAnomalyDetectorModel::CModelDetailsViewPtr modelWithSkipView = + modelWithSkip->details(); CAnomalyDetectorModel::CModelDetailsViewPtr modelNoSkipView = modelNoSkip->details(); // but the underlying models for attributes a1 and a2 are the same - uint64_t withSkipChecksum = modelWithSkipView->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 0)->checksum(); - uint64_t noSkipChecksum = modelNoSkipView->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 0)->checksum(); + uint64_t withSkipChecksum = + modelWithSkipView + ->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 0) + ->checksum(); + uint64_t noSkipChecksum = + modelNoSkipView + ->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 0) + ->checksum(); CPPUNIT_ASSERT_EQUAL(withSkipChecksum, noSkipChecksum); - withSkipChecksum = modelWithSkipView->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 1)->checksum(); - noSkipChecksum = modelNoSkipView->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 1)->checksum(); + withSkipChecksum = modelWithSkipView + ->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 1) + ->checksum(); + noSkipChecksum = modelNoSkipView + ->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 1) + ->checksum(); CPPUNIT_ASSERT_EQUAL(withSkipChecksum, noSkipChecksum); // The no skip model didn't see the a3 attribute only a1, a2 and a4. // The a4 models should be the same. - withSkipChecksum = modelWithSkipView->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 3)->checksum(); - noSkipChecksum = modelNoSkipView->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 2)->checksum(); + withSkipChecksum = modelWithSkipView + ->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 3) + ->checksum(); + noSkipChecksum = modelNoSkipView + ->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 2) + ->checksum(); CPPUNIT_ASSERT_EQUAL(withSkipChecksum, noSkipChecksum); // Check the last value times of all the underlying models are the same - const maths::CUnivariateTimeSeriesModel* timeSeriesModel = dynamic_cast( - modelNoSkipView->model(model_t::E_PopulationCountByBucketPersonAndAttribute, 0)); + const maths::CUnivariateTimeSeriesModel* timeSeriesModel = + dynamic_cast(modelNoSkipView->model( + model_t::E_PopulationCountByBucketPersonAndAttribute, 0)); CPPUNIT_ASSERT(timeSeriesModel); core_t::TTime time = timeSeriesModel->trendModel().lastValueTime(); - CPPUNIT_ASSERT_EQUAL(model_t::sampleTime(model_t::E_PopulationCountByBucketPersonAndAttribute, 200, bucketLength), time); + CPPUNIT_ASSERT_EQUAL(model_t::sampleTime(model_t::E_PopulationCountByBucketPersonAndAttribute, + 200, bucketLength), + time); // The last times of the underlying time series models should all be the same timeSeriesModel = dynamic_cast( @@ -1394,31 +1505,40 @@ void CEventRatePopulationModelTest::testIgnoreSamplingGivenDetectionRules() { CppUnit::Test* CEventRatePopulationModelTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CEventRatePopulationModelTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CEventRatePopulationModelTest::testBasicAccessors", - &CEventRatePopulationModelTest::testBasicAccessors)); - suiteOfTests->addTest(new CppUnit::TestCaller("CEventRatePopulationModelTest::testFeatures", - &CEventRatePopulationModelTest::testFeatures)); - suiteOfTests->addTest(new CppUnit::TestCaller("CEventRatePopulationModelTest::testComputeProbability", - &CEventRatePopulationModelTest::testComputeProbability)); - suiteOfTests->addTest(new CppUnit::TestCaller("CEventRatePopulationModelTest::testPrune", - &CEventRatePopulationModelTest::testPrune)); - suiteOfTests->addTest(new CppUnit::TestCaller("CEventRatePopulationModelTest::testKey", - &CEventRatePopulationModelTest::testKey)); - suiteOfTests->addTest(new CppUnit::TestCaller("CEventRatePopulationModelTest::testFrequency", - &CEventRatePopulationModelTest::testFrequency)); - suiteOfTests->addTest(new CppUnit::TestCaller("CEventRatePopulationModelTest::testSampleRateWeight", - &CEventRatePopulationModelTest::testSampleRateWeight)); - suiteOfTests->addTest(new CppUnit::TestCaller("CEventRatePopulationModelTest::testSkipSampling", - &CEventRatePopulationModelTest::testSkipSampling)); - suiteOfTests->addTest(new CppUnit::TestCaller("CEventRatePopulationModelTest::testInterimCorrections", - &CEventRatePopulationModelTest::testInterimCorrections)); - suiteOfTests->addTest(new CppUnit::TestCaller("CEventRatePopulationModelTest::testPeriodicity", - &CEventRatePopulationModelTest::testPeriodicity)); - suiteOfTests->addTest(new CppUnit::TestCaller("CEventRatePopulationModelTest::testPersistence", - &CEventRatePopulationModelTest::testPersistence)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CEventRatePopulationModelTest::testIgnoreSamplingGivenDetectionRules", - &CEventRatePopulationModelTest::testIgnoreSamplingGivenDetectionRules)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRatePopulationModelTest::testBasicAccessors", + &CEventRatePopulationModelTest::testBasicAccessors)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRatePopulationModelTest::testFeatures", + &CEventRatePopulationModelTest::testFeatures)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRatePopulationModelTest::testComputeProbability", + &CEventRatePopulationModelTest::testComputeProbability)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRatePopulationModelTest::testPrune", &CEventRatePopulationModelTest::testPrune)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRatePopulationModelTest::testKey", &CEventRatePopulationModelTest::testKey)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRatePopulationModelTest::testFrequency", + &CEventRatePopulationModelTest::testFrequency)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRatePopulationModelTest::testSampleRateWeight", + &CEventRatePopulationModelTest::testSampleRateWeight)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRatePopulationModelTest::testSkipSampling", + &CEventRatePopulationModelTest::testSkipSampling)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRatePopulationModelTest::testInterimCorrections", + &CEventRatePopulationModelTest::testInterimCorrections)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRatePopulationModelTest::testPeriodicity", + &CEventRatePopulationModelTest::testPeriodicity)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRatePopulationModelTest::testPersistence", + &CEventRatePopulationModelTest::testPersistence)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CEventRatePopulationModelTest::testIgnoreSamplingGivenDetectionRules", + &CEventRatePopulationModelTest::testIgnoreSamplingGivenDetectionRules)); return suiteOfTests; } diff --git a/lib/model/unittest/CFunctionTypesTest.cc b/lib/model/unittest/CFunctionTypesTest.cc index 9c28df7efd..71cedc7b41 100644 --- a/lib/model/unittest/CFunctionTypesTest.cc +++ b/lib/model/unittest/CFunctionTypesTest.cc @@ -20,120 +20,156 @@ void CFunctionTypesTest::testFeaturesToFunction() { { // Count. features.push_back(model_t::E_IndividualCountByBucketAndPerson); - LOG_DEBUG(<< "function = '" << function_t::name(function_t::function(features)) << "'"); - CPPUNIT_ASSERT_EQUAL(std::string("count"), function_t::name(function_t::function(features))); + LOG_DEBUG(<< "function = '" + << function_t::name(function_t::function(features)) << "'"); + CPPUNIT_ASSERT_EQUAL(std::string("count"), + function_t::name(function_t::function(features))); } { // (Rare) Count. features.clear(); features.push_back(model_t::E_IndividualCountByBucketAndPerson); features.push_back(model_t::E_IndividualTotalBucketCountByPerson); - LOG_DEBUG(<< "function = '" << function_t::name(function_t::function(features)) << "'"); - CPPUNIT_ASSERT_EQUAL(std::string("count"), function_t::name(function_t::function(features))); + LOG_DEBUG(<< "function = '" + << function_t::name(function_t::function(features)) << "'"); + CPPUNIT_ASSERT_EQUAL(std::string("count"), + function_t::name(function_t::function(features))); } { // Non-Zero Count. features.clear(); features.push_back(model_t::E_IndividualNonZeroCountByBucketAndPerson); - LOG_DEBUG(<< "function = '" << function_t::name(function_t::function(features)) << "'"); - CPPUNIT_ASSERT_EQUAL(std::string("non_zero_count"), function_t::name(function_t::function(features))); + LOG_DEBUG(<< "function = '" + << function_t::name(function_t::function(features)) << "'"); + CPPUNIT_ASSERT_EQUAL(std::string("non_zero_count"), + function_t::name(function_t::function(features))); } { // Non-Zero Rare Count. features.clear(); features.push_back(model_t::E_IndividualNonZeroCountByBucketAndPerson); features.push_back(model_t::E_IndividualTotalBucketCountByPerson); - LOG_DEBUG(<< "function = '" << function_t::name(function_t::function(features)) << "'"); - CPPUNIT_ASSERT_EQUAL(std::string("rare_non_zero_count"), function_t::name(function_t::function(features))); + LOG_DEBUG(<< "function = '" + << function_t::name(function_t::function(features)) << "'"); + CPPUNIT_ASSERT_EQUAL(std::string("rare_non_zero_count"), + function_t::name(function_t::function(features))); } { // Low Count. features.clear(); features.push_back(model_t::E_IndividualLowCountsByBucketAndPerson); - LOG_DEBUG(<< "function = '" << function_t::name(function_t::function(features)) << "'"); - CPPUNIT_ASSERT_EQUAL(std::string("low_count"), function_t::name(function_t::function(features))); + LOG_DEBUG(<< "function = '" + << function_t::name(function_t::function(features)) << "'"); + CPPUNIT_ASSERT_EQUAL(std::string("low_count"), + function_t::name(function_t::function(features))); } { // High Count. features.clear(); features.push_back(model_t::E_IndividualHighCountsByBucketAndPerson); - LOG_DEBUG(<< "function = '" << function_t::name(function_t::function(features)) << "'"); - CPPUNIT_ASSERT_EQUAL(std::string("high_count"), function_t::name(function_t::function(features))); + LOG_DEBUG(<< "function = '" + << function_t::name(function_t::function(features)) << "'"); + CPPUNIT_ASSERT_EQUAL(std::string("high_count"), + function_t::name(function_t::function(features))); } { // Rare Count. features.clear(); features.push_back(model_t::E_IndividualIndicatorOfBucketPerson); features.push_back(model_t::E_IndividualTotalBucketCountByPerson); - LOG_DEBUG(<< "function = '" << function_t::name(function_t::function(features)) << "'"); - CPPUNIT_ASSERT_EQUAL(std::string("rare"), function_t::name(function_t::function(features))); + LOG_DEBUG(<< "function = '" + << function_t::name(function_t::function(features)) << "'"); + CPPUNIT_ASSERT_EQUAL(std::string("rare"), + function_t::name(function_t::function(features))); } { // Min. features.clear(); features.push_back(model_t::E_IndividualMinByPerson); - LOG_DEBUG(<< "function = '" << function_t::name(function_t::function(features)) << "'"); - CPPUNIT_ASSERT_EQUAL(std::string("min"), function_t::name(function_t::function(features))); + LOG_DEBUG(<< "function = '" + << function_t::name(function_t::function(features)) << "'"); + CPPUNIT_ASSERT_EQUAL(std::string("min"), + function_t::name(function_t::function(features))); } { // Mean. features.clear(); features.push_back(model_t::E_IndividualMeanByPerson); - LOG_DEBUG(<< "function = '" << function_t::name(function_t::function(features)) << "'"); - CPPUNIT_ASSERT_EQUAL(std::string("mean"), function_t::name(function_t::function(features))); + LOG_DEBUG(<< "function = '" + << function_t::name(function_t::function(features)) << "'"); + CPPUNIT_ASSERT_EQUAL(std::string("mean"), + function_t::name(function_t::function(features))); features.clear(); features.push_back(model_t::E_IndividualLowMeanByPerson); - LOG_DEBUG(<< "function = '" << function_t::name(function_t::function(features)) << "'"); - CPPUNIT_ASSERT_EQUAL(std::string("low_mean"), function_t::name(function_t::function(features))); + LOG_DEBUG(<< "function = '" + << function_t::name(function_t::function(features)) << "'"); + CPPUNIT_ASSERT_EQUAL(std::string("low_mean"), + function_t::name(function_t::function(features))); features.clear(); features.push_back(model_t::E_IndividualHighMeanByPerson); - LOG_DEBUG(<< "function = '" << function_t::name(function_t::function(features)) << "'"); - CPPUNIT_ASSERT_EQUAL(std::string("high_mean"), function_t::name(function_t::function(features))); + LOG_DEBUG(<< "function = '" + << function_t::name(function_t::function(features)) << "'"); + CPPUNIT_ASSERT_EQUAL(std::string("high_mean"), + function_t::name(function_t::function(features))); } { // Median. features.clear(); features.push_back(model_t::E_IndividualMedianByPerson); - LOG_DEBUG(<< "function = '" << function_t::name(function_t::function(features)) << "'"); - CPPUNIT_ASSERT_EQUAL(std::string("median"), function_t::name(function_t::function(features))); + LOG_DEBUG(<< "function = '" + << function_t::name(function_t::function(features)) << "'"); + CPPUNIT_ASSERT_EQUAL(std::string("median"), + function_t::name(function_t::function(features))); features.clear(); } { // Max. features.clear(); features.push_back(model_t::E_IndividualMaxByPerson); - LOG_DEBUG(<< "function = '" << function_t::name(function_t::function(features)) << "'"); - CPPUNIT_ASSERT_EQUAL(std::string("max"), function_t::name(function_t::function(features))); + LOG_DEBUG(<< "function = '" + << function_t::name(function_t::function(features)) << "'"); + CPPUNIT_ASSERT_EQUAL(std::string("max"), + function_t::name(function_t::function(features))); } { // Sum. features.clear(); features.push_back(model_t::E_IndividualSumByBucketAndPerson); - LOG_DEBUG(<< "function = '" << function_t::name(function_t::function(features)) << "'"); - CPPUNIT_ASSERT_EQUAL(std::string("sum"), function_t::name(function_t::function(features))); + LOG_DEBUG(<< "function = '" + << function_t::name(function_t::function(features)) << "'"); + CPPUNIT_ASSERT_EQUAL(std::string("sum"), + function_t::name(function_t::function(features))); features.clear(); features.push_back(model_t::E_IndividualLowSumByBucketAndPerson); - LOG_DEBUG(<< "function = '" << function_t::name(function_t::function(features)) << "'"); - CPPUNIT_ASSERT_EQUAL(std::string("low_sum"), function_t::name(function_t::function(features))); + LOG_DEBUG(<< "function = '" + << function_t::name(function_t::function(features)) << "'"); + CPPUNIT_ASSERT_EQUAL(std::string("low_sum"), + function_t::name(function_t::function(features))); features.clear(); features.push_back(model_t::E_IndividualHighSumByBucketAndPerson); - LOG_DEBUG(<< "function = '" << function_t::name(function_t::function(features)) << "'"); - CPPUNIT_ASSERT_EQUAL(std::string("high_sum"), function_t::name(function_t::function(features))); + LOG_DEBUG(<< "function = '" + << function_t::name(function_t::function(features)) << "'"); + CPPUNIT_ASSERT_EQUAL(std::string("high_sum"), + function_t::name(function_t::function(features))); } { // Non-Zero Sum. features.clear(); features.push_back(model_t::E_IndividualNonNullSumByBucketAndPerson); - LOG_DEBUG(<< "function = '" << function_t::name(function_t::function(features)) << "'"); - CPPUNIT_ASSERT_EQUAL(std::string("non_null_sum"), function_t::name(function_t::function(features))); + LOG_DEBUG(<< "function = '" + << function_t::name(function_t::function(features)) << "'"); + CPPUNIT_ASSERT_EQUAL(std::string("non_null_sum"), + function_t::name(function_t::function(features))); } { // Metric. features.clear(); features.push_back(model_t::E_IndividualMeanByPerson); features.push_back(model_t::E_IndividualMaxByPerson); - LOG_DEBUG(<< "function = '" << function_t::name(function_t::function(features)) << "'"); - CPPUNIT_ASSERT_EQUAL(std::string("metric"), function_t::name(function_t::function(features))); + LOG_DEBUG(<< "function = '" + << function_t::name(function_t::function(features)) << "'"); + CPPUNIT_ASSERT_EQUAL(std::string("metric"), + function_t::name(function_t::function(features))); } { // Metric. @@ -141,87 +177,111 @@ void CFunctionTypesTest::testFeaturesToFunction() { features.push_back(model_t::E_IndividualMinByPerson); features.push_back(model_t::E_IndividualMeanByPerson); features.push_back(model_t::E_IndividualMaxByPerson); - LOG_DEBUG(<< "function = '" << function_t::name(function_t::function(features)) << "'"); - CPPUNIT_ASSERT_EQUAL(std::string("metric"), function_t::name(function_t::function(features))); + LOG_DEBUG(<< "function = '" + << function_t::name(function_t::function(features)) << "'"); + CPPUNIT_ASSERT_EQUAL(std::string("metric"), + function_t::name(function_t::function(features))); } { // Lat-long. features.clear(); features.push_back(model_t::E_IndividualMeanLatLongByPerson); - LOG_DEBUG(<< "function = '" << function_t::name(function_t::function(features)) << "'"); - CPPUNIT_ASSERT_EQUAL(std::string("lat_long"), function_t::name(function_t::function(features))); + LOG_DEBUG(<< "function = '" + << function_t::name(function_t::function(features)) << "'"); + CPPUNIT_ASSERT_EQUAL(std::string("lat_long"), + function_t::name(function_t::function(features))); } { // Count. features.clear(); features.push_back(model_t::E_PopulationCountByBucketPersonAndAttribute); features.push_back(model_t::E_PopulationUniquePersonCountByAttribute); - LOG_DEBUG(<< "function = '" << function_t::name(function_t::function(features)) << "'"); - CPPUNIT_ASSERT_EQUAL(std::string("rare_count"), function_t::name(function_t::function(features))); + LOG_DEBUG(<< "function = '" + << function_t::name(function_t::function(features)) << "'"); + CPPUNIT_ASSERT_EQUAL(std::string("rare_count"), + function_t::name(function_t::function(features))); } { // Low Count. features.clear(); features.push_back(model_t::E_PopulationLowCountsByBucketPersonAndAttribute); - LOG_DEBUG(<< "function = '" << function_t::name(function_t::function(features)) << "'"); - CPPUNIT_ASSERT_EQUAL(std::string("low_count"), function_t::name(function_t::function(features))); + LOG_DEBUG(<< "function = '" + << function_t::name(function_t::function(features)) << "'"); + CPPUNIT_ASSERT_EQUAL(std::string("low_count"), + function_t::name(function_t::function(features))); } { // High Count. features.clear(); features.push_back(model_t::E_PopulationHighCountsByBucketPersonAndAttribute); - LOG_DEBUG(<< "function = '" << function_t::name(function_t::function(features)) << "'"); - CPPUNIT_ASSERT_EQUAL(std::string("high_count"), function_t::name(function_t::function(features))); + LOG_DEBUG(<< "function = '" + << function_t::name(function_t::function(features)) << "'"); + CPPUNIT_ASSERT_EQUAL(std::string("high_count"), + function_t::name(function_t::function(features))); } { // Distinct count. features.clear(); features.push_back(model_t::E_PopulationUniqueCountByBucketPersonAndAttribute); - LOG_DEBUG(<< "function = '" << function_t::name(function_t::function(features)) << "'"); - CPPUNIT_ASSERT_EQUAL(std::string("distinct_count"), function_t::name(function_t::function(features))); + LOG_DEBUG(<< "function = '" + << function_t::name(function_t::function(features)) << "'"); + CPPUNIT_ASSERT_EQUAL(std::string("distinct_count"), + function_t::name(function_t::function(features))); } { // Min. features.clear(); features.push_back(model_t::E_PopulationMinByPersonAndAttribute); - LOG_DEBUG(<< "function = '" << function_t::name(function_t::function(features)) << "'"); - CPPUNIT_ASSERT_EQUAL(std::string("min"), function_t::name(function_t::function(features))); + LOG_DEBUG(<< "function = '" + << function_t::name(function_t::function(features)) << "'"); + CPPUNIT_ASSERT_EQUAL(std::string("min"), + function_t::name(function_t::function(features))); } { // Mean. features.clear(); features.push_back(model_t::E_PopulationMeanByPersonAndAttribute); - LOG_DEBUG(<< "function = '" << function_t::name(function_t::function(features)) << "'"); - CPPUNIT_ASSERT_EQUAL(std::string("mean"), function_t::name(function_t::function(features))); + LOG_DEBUG(<< "function = '" + << function_t::name(function_t::function(features)) << "'"); + CPPUNIT_ASSERT_EQUAL(std::string("mean"), + function_t::name(function_t::function(features))); } { // Median. features.clear(); features.push_back(model_t::E_PopulationMedianByPersonAndAttribute); - LOG_DEBUG(<< "function = '" << function_t::name(function_t::function(features)) << "'"); - CPPUNIT_ASSERT_EQUAL(std::string("median"), function_t::name(function_t::function(features))); + LOG_DEBUG(<< "function = '" + << function_t::name(function_t::function(features)) << "'"); + CPPUNIT_ASSERT_EQUAL(std::string("median"), + function_t::name(function_t::function(features))); } { // Max. features.clear(); features.push_back(model_t::E_PopulationMaxByPersonAndAttribute); - LOG_DEBUG(<< "function = '" << function_t::name(function_t::function(features)) << "'"); - CPPUNIT_ASSERT_EQUAL(std::string("max"), function_t::name(function_t::function(features))); + LOG_DEBUG(<< "function = '" + << function_t::name(function_t::function(features)) << "'"); + CPPUNIT_ASSERT_EQUAL(std::string("max"), + function_t::name(function_t::function(features))); } { // Sum. features.clear(); features.push_back(model_t::E_PopulationSumByBucketPersonAndAttribute); - LOG_DEBUG(<< "function = '" << function_t::name(function_t::function(features)) << "'"); - CPPUNIT_ASSERT_EQUAL(std::string("sum"), function_t::name(function_t::function(features))); + LOG_DEBUG(<< "function = '" + << function_t::name(function_t::function(features)) << "'"); + CPPUNIT_ASSERT_EQUAL(std::string("sum"), + function_t::name(function_t::function(features))); } { // Metric. features.clear(); features.push_back(model_t::E_PopulationMinByPersonAndAttribute); features.push_back(model_t::E_PopulationMeanByPersonAndAttribute); - LOG_DEBUG(<< "function = '" << function_t::name(function_t::function(features)) << "'"); - CPPUNIT_ASSERT_EQUAL(std::string("metric"), function_t::name(function_t::function(features))); + LOG_DEBUG(<< "function = '" + << function_t::name(function_t::function(features)) << "'"); + CPPUNIT_ASSERT_EQUAL(std::string("metric"), + function_t::name(function_t::function(features))); } { // Metric. @@ -229,16 +289,18 @@ void CFunctionTypesTest::testFeaturesToFunction() { features.push_back(model_t::E_PopulationMinByPersonAndAttribute); features.push_back(model_t::E_PopulationMeanByPersonAndAttribute); features.push_back(model_t::E_PopulationMaxByPersonAndAttribute); - LOG_DEBUG(<< "function = '" << function_t::name(function_t::function(features)) << "'"); - CPPUNIT_ASSERT_EQUAL(std::string("metric"), function_t::name(function_t::function(features))); + LOG_DEBUG(<< "function = '" + << function_t::name(function_t::function(features)) << "'"); + CPPUNIT_ASSERT_EQUAL(std::string("metric"), + function_t::name(function_t::function(features))); } } CppUnit::Test* CFunctionTypesTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CFunctionTypesTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CFunctionTypesTest::testFeaturesToFunction", - &CFunctionTypesTest::testFeaturesToFunction)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CFunctionTypesTest::testFeaturesToFunction", &CFunctionTypesTest::testFeaturesToFunction)); return suiteOfTests; } diff --git a/lib/model/unittest/CGathererToolsTest.cc b/lib/model/unittest/CGathererToolsTest.cc index 35ab0a1c82..b9ad415dae 100644 --- a/lib/model/unittest/CGathererToolsTest.cc +++ b/lib/model/unittest/CGathererToolsTest.cc @@ -19,8 +19,9 @@ const CGathererTools::CSumGatherer::TStoredStringPtrVec EMPTY_STR_PTR_VEC; CppUnit::Test* CGathererToolsTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CGathererToolsTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CGathererToolsTest::testSumGathererIsRedundant", - &CGathererToolsTest::testSumGathererIsRedundant)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CGathererToolsTest::testSumGathererIsRedundant", + &CGathererToolsTest::testSumGathererIsRedundant)); return suiteOfTests; } @@ -31,7 +32,8 @@ void CGathererToolsTest::testSumGathererIsRedundant() { core_t::TTime bucketLength(100); SModelParams modelParams(bucketLength); modelParams.s_LatencyBuckets = 3; - CGathererTools::CSumGatherer sumGatherer(modelParams, 0, 100, bucketLength, EMPTY_STR_VEC.begin(), EMPTY_STR_VEC.end()); + CGathererTools::CSumGatherer sumGatherer( + modelParams, 0, 100, bucketLength, EMPTY_STR_VEC.begin(), EMPTY_STR_VEC.end()); sumGatherer.add(100, TDouble1Vec{1.0}, 1, 0, EMPTY_STR_PTR_VEC); sumGatherer.startNewBucket(100); diff --git a/lib/model/unittest/CHierarchicalResultsLevelSetTest.cc b/lib/model/unittest/CHierarchicalResultsLevelSetTest.cc index 582dc685b1..766089ff48 100644 --- a/lib/model/unittest/CHierarchicalResultsLevelSetTest.cc +++ b/lib/model/unittest/CHierarchicalResultsLevelSetTest.cc @@ -33,21 +33,30 @@ class CTestNodeFactory { public: CTestNodeFactory() {} - TestNode make(const std::string& name1, const std::string& name2, const std::string& name3, const std::string& name4) const { + TestNode make(const std::string& name1, + const std::string& name2, + const std::string& name3, + const std::string& name4) const { return make(name1 + ' ' + name2 + ' ' + name3 + ' ' + name4); } - TestNode make(const std::string& name1, const std::string& name2) const { return make(name1 + ' ' + name2); } + TestNode make(const std::string& name1, const std::string& name2) const { + return make(name1 + ' ' + name2); + } TestNode make(const std::string& name) const { return TestNode(name); } }; -class CConcreteHierarchicalResultsLevelSet : public ml::model::CHierarchicalResultsLevelSet { +class CConcreteHierarchicalResultsLevelSet + : public ml::model::CHierarchicalResultsLevelSet { public: - CConcreteHierarchicalResultsLevelSet(const TestNode& root) : ml::model::CHierarchicalResultsLevelSet(root) {} + CConcreteHierarchicalResultsLevelSet(const TestNode& root) + : ml::model::CHierarchicalResultsLevelSet(root) {} //! Visit a node. - virtual void visit(const ml::model::CHierarchicalResults& /*results*/, const TNode& /*node*/, bool /*pivot*/) {} + virtual void visit(const ml::model::CHierarchicalResults& /*results*/, + const TNode& /*node*/, + bool /*pivot*/) {} // make public using ml::model::CHierarchicalResultsLevelSet::elements; @@ -63,14 +72,18 @@ void CHierarchicalResultsLevelSetTest::testElementsWithPerPartitionNormalisation // different to other empty string pointers. (In general, if you need // a pointer to an empty string call CStringStore::getEmpty() instead of // doing this.) - ml::core::CStoredStringPtr UNSET = ml::core::CStoredStringPtr::makeStoredString(std::string()); + ml::core::CStoredStringPtr UNSET = + ml::core::CStoredStringPtr::makeStoredString(std::string()); ml::core::CStoredStringPtr PARTITION_A = ml::model::CStringStore::names().get("pA"); ml::core::CStoredStringPtr PARTITION_B = ml::model::CStringStore::names().get("pB"); ml::core::CStoredStringPtr PARTITION_C = ml::model::CStringStore::names().get("pC"); - ml::core::CStoredStringPtr PARTITION_VALUE_1 = ml::model::CStringStore::names().get("v1"); - ml::core::CStoredStringPtr PARTITION_VALUE_2 = ml::model::CStringStore::names().get("v2"); - ml::core::CStoredStringPtr PARTITION_VALUE_3 = ml::model::CStringStore::names().get("v3"); + ml::core::CStoredStringPtr PARTITION_VALUE_1 = + ml::model::CStringStore::names().get("v1"); + ml::core::CStoredStringPtr PARTITION_VALUE_2 = + ml::model::CStringStore::names().get("v2"); + ml::core::CStoredStringPtr PARTITION_VALUE_3 = + ml::model::CStringStore::names().get("v3"); TestNode root("root"); diff --git a/lib/model/unittest/CHierarchicalResultsTest.cc b/lib/model/unittest/CHierarchicalResultsTest.cc index 764717991f..24380e85e1 100644 --- a/lib/model/unittest/CHierarchicalResultsTest.cc +++ b/lib/model/unittest/CHierarchicalResultsTest.cc @@ -48,8 +48,10 @@ namespace { using TDoubleVec = std::vector; using TAttributeProbabilityVec = model::CHierarchicalResults::TAttributeProbabilityVec; using TStoredStringPtrStoredStringPtrPr = model::CHierarchicalResults::TStoredStringPtrStoredStringPtrPr; -using TStoredStringPtrStoredStringPtrPrDoublePr = model::CHierarchicalResults::TStoredStringPtrStoredStringPtrPrDoublePr; -using TStoredStringPtrStoredStringPtrPrDoublePrVec = model::CHierarchicalResults::TStoredStringPtrStoredStringPtrPrDoublePrVec; +using TStoredStringPtrStoredStringPtrPrDoublePr = + model::CHierarchicalResults::TStoredStringPtrStoredStringPtrPrDoublePr; +using TStoredStringPtrStoredStringPtrPrDoublePrVec = + model::CHierarchicalResults::TStoredStringPtrStoredStringPtrPrDoublePrVec; using TStrVec = std::vector; const std::string EMPTY_STRING; @@ -63,7 +65,9 @@ class CBreadthFirstCheck : public model::CHierarchicalResultsVisitor { public: CBreadthFirstCheck() : m_Layer(0), m_Layers(1, TNodeCPtrSet()) {} - virtual void visit(const model::CHierarchicalResults& /*results*/, const TNode& node, bool /*pivot*/) { + virtual void visit(const model::CHierarchicalResults& /*results*/, + const TNode& node, + bool /*pivot*/) { LOG_DEBUG(<< "Visiting " << node.print()); if (node.s_Children.empty()) { @@ -102,7 +106,8 @@ class CBreadthFirstCheck : public model::CHierarchicalResultsVisitor { for (std::size_t i = 0u; i < m_Layers.size(); ++i) { LOG_DEBUG(<< "Checking layer " << core::CContainerPrinter::print(m_Layers[i])); - for (TNodeCPtrSetCItr itr = m_Layers[i].begin(); itr != m_Layers[i].end(); ++itr) { + for (TNodeCPtrSetCItr itr = m_Layers[i].begin(); + itr != m_Layers[i].end(); ++itr) { if ((*itr)->s_Parent) { std::size_t p = this->layer((*itr)->s_Parent); LOG_DEBUG(<< "layer = " << i << ", parent layer = " << p); @@ -139,7 +144,9 @@ class CDepthFirstCheck : public model::CHierarchicalResultsVisitor { using TNodeCPtrVec = std::vector; public: - virtual void visit(const model::CHierarchicalResults& /*results*/, const TNode& node, bool /*pivot*/) { + virtual void visit(const model::CHierarchicalResults& /*results*/, + const TNode& node, + bool /*pivot*/) { LOG_DEBUG(<< "Visiting " << node.print()); for (std::size_t i = node.s_Children.size(); i > 0; --i) { CPPUNIT_ASSERT(!m_Children.empty()); @@ -158,12 +165,14 @@ class CPrinter : public model::CHierarchicalResultsVisitor { public: CPrinter() : m_ShouldPrintWrittenNodesOnly(false) {} - CPrinter(bool shouldOnlyPrintWrittenNodes) : m_ShouldPrintWrittenNodesOnly(shouldOnlyPrintWrittenNodes) {} + CPrinter(bool shouldOnlyPrintWrittenNodes) + : m_ShouldPrintWrittenNodesOnly(shouldOnlyPrintWrittenNodes) {} virtual void visit(const model::CHierarchicalResults& results, const TNode& node, bool pivot) { - if (m_ShouldPrintWrittenNodesOnly == false || shouldWriteResult(m_Limits, results, node, pivot)) { - m_Result = - std::string(2 * depth(&node), ' ') + node.print() + (pivot ? " pivot" : "") + (m_Result.empty() ? "" : "\n") + m_Result; + if (m_ShouldPrintWrittenNodesOnly == false || + shouldWriteResult(m_Limits, results, node, pivot)) { + m_Result = std::string(2 * depth(&node), ' ') + node.print() + + (pivot ? " pivot" : "") + (m_Result.empty() ? "" : "\n") + m_Result; } } @@ -190,7 +199,9 @@ class CNodeExtractor : public model::CHierarchicalResultsVisitor { using TNodeCPtrVec = std::vector; public: - virtual void visit(const model::CHierarchicalResults& /*results*/, const TNode& node, bool /*pivot*/) { + virtual void visit(const model::CHierarchicalResults& /*results*/, + const TNode& node, + bool /*pivot*/) { if (this->isPartitioned(node)) { m_PartitionedNodes.push_back(&node); } @@ -220,10 +231,13 @@ class CNodeExtractor : public model::CHierarchicalResultsVisitor { //! \brief Checks our anomaly scores are correct post scoring. class CCheckScores : public model::CHierarchicalResultsVisitor { public: - virtual void visit(const model::CHierarchicalResults& /*results*/, const TNode& node, bool /*pivot*/) { - LOG_DEBUG(<< node.s_Spec.print() << " score = " << node.s_RawAnomalyScore - << ", expected score = " << maths::CTools::anomalyScore(node.probability())); - CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CTools::anomalyScore(node.probability()), node.s_RawAnomalyScore, 1e-10); + virtual void visit(const model::CHierarchicalResults& /*results*/, + const TNode& node, + bool /*pivot*/) { + LOG_DEBUG(<< node.s_Spec.print() << " score = " << node.s_RawAnomalyScore << ", expected score = " + << maths::CTools::anomalyScore(node.probability())); + CPPUNIT_ASSERT_DOUBLES_EQUAL(maths::CTools::anomalyScore(node.probability()), + node.s_RawAnomalyScore, 1e-10); } }; @@ -242,7 +256,8 @@ class CWriteConsistencyChecker : public model::CHierarchicalResultsVisitor { bool willWriteAChild(false); for (size_t i = 0; i < node.s_Children.size(); ++i) { CPPUNIT_ASSERT(node.s_Children[i] != nullptr); - willWriteAChild = this->shouldWriteResult(m_Limits, results, *node.s_Children[i], pivot); + willWriteAChild = this->shouldWriteResult( + m_Limits, results, *node.s_Children[i], pivot); if (willWriteAChild) { break; } @@ -253,7 +268,8 @@ class CWriteConsistencyChecker : public model::CHierarchicalResultsVisitor { if (!this->isRoot(node)) { CPPUNIT_ASSERT(node.s_Parent != nullptr); if (isTypeForWhichWeWriteResults(*node.s_Parent, pivot)) { - CPPUNIT_ASSERT(this->shouldWriteResult(m_Limits, results, *node.s_Parent, pivot)); + CPPUNIT_ASSERT(this->shouldWriteResult(m_Limits, results, + *node.s_Parent, pivot)); } } } @@ -281,26 +297,34 @@ class CProbabilityGatherer : public model::CHierarchicalResultsLevelSetelements(node, pivot, factory, probabilities); for (std::size_t i = 0u; i < probabilities.size(); ++i) { - if (node.probability() < model::CDetectorEqualizer::largestProbabilityToCorrect()) { + if (node.probability() < + model::CDetectorEqualizer::largestProbabilityToCorrect()) { (*probabilities[i]).s_Probabilities[node.s_Detector].push_back(node.probability()); } } @@ -315,15 +339,18 @@ class CProbabilityGatherer : public model::CHierarchicalResultsLevelSet -void addAggregateValues(double w1, double w2, std::size_t n, ITR begin, ITR end, TDoubleVec& scores, TDoubleVec& probabilities) { +void addAggregateValues(double w1, + double w2, + std::size_t n, + ITR begin, + ITR end, + TDoubleVec& scores, + TDoubleVec& probabilities) { double score, probability; TDoubleVec probs(begin, end); model::CAnomalyScore::compute(w1, w2, 1, n, 0.05, probs, score, probability); @@ -366,16 +402,9 @@ void addResult(int detector, double p, ml::model::CHierarchicalResults& results) { ml::model::SAnnotatedProbability annotatedProbability(p); - results.addModelResult(detector, - isPopulation, - functionName, - function, - partitionFieldName, - partitionFieldValue, - personFieldName, - personFieldValue, - valueFieldName, - annotatedProbability); + results.addModelResult(detector, isPopulation, functionName, function, + partitionFieldName, partitionFieldValue, personFieldName, + personFieldValue, valueFieldName, annotatedProbability); } void addResult(int detector, @@ -391,17 +420,9 @@ void addResult(int detector, const ml::model::CAnomalyDetectorModel* model, ml::model::CHierarchicalResults& results) { ml::model::SAnnotatedProbability annotatedProbability(p); - results.addModelResult(detector, - isPopulation, - functionName, - function, - partitionFieldName, - partitionFieldValue, - personFieldName, - personFieldValue, - valueFieldName, - annotatedProbability, - model); + results.addModelResult(detector, isPopulation, functionName, function, partitionFieldName, + partitionFieldValue, personFieldName, personFieldValue, + valueFieldName, annotatedProbability, model); } } // unnamed:: @@ -581,22 +602,27 @@ void CHierarchicalResultsTest::testBuildHierarchy() { LOG_DEBUG(<< "*** testBuildHierarchy ***"); static const std::string FUNC("mean"); - static const ml::model::function_t::EFunction function(ml::model::function_t::E_IndividualMetricMean); + static const ml::model::function_t::EFunction function( + ml::model::function_t::E_IndividualMetricMean); // Test vanilla by / over. { model::CHierarchicalResults results; - addResult(1, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 1.0, results); + addResult(1, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 1.0, results); results.buildHierarchy(); CPrinter printer; results.postorderDepthFirst(printer); LOG_DEBUG(<< "\nby:\n" << printer.result()); - CPPUNIT_ASSERT_EQUAL(std::string("'false/false/mean/////': 1, 0"), printer.result()); + CPPUNIT_ASSERT_EQUAL(std::string("'false/false/mean/////': 1, 0"), + printer.result()); } { model::CHierarchicalResults results; - addResult(1, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, 0.01, results); - addResult(1, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p12, EMPTY_STRING, 0.03, results); + addResult(1, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, + p11, EMPTY_STRING, 0.01, results); + addResult(1, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, + p12, EMPTY_STRING, 0.03, results); results.buildHierarchy(); CPrinter printer; results.postorderDepthFirst(printer); @@ -608,10 +634,14 @@ void CHierarchicalResultsTest::testBuildHierarchy() { } { model::CHierarchicalResults results; - addResult(1, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.3, results); - addResult(2, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, 0.01, results); - addResult(2, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p12, EMPTY_STRING, 0.03, results); - addResult(3, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF2, p22, EMPTY_STRING, 0.03, results); + addResult(1, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.3, results); + addResult(2, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, + EMPTY_STRING, 0.01, results); + addResult(2, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p12, + EMPTY_STRING, 0.03, results); + addResult(3, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF2, + p22, EMPTY_STRING, 0.03, results); results.buildHierarchy(); CPrinter printer; results.postorderDepthFirst(printer); @@ -628,9 +658,12 @@ void CHierarchicalResultsTest::testBuildHierarchy() { // Test vanilla partition { model::CHierarchicalResults results; - addResult(1, false, FUNC, function, PNF1, pn11, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.01, results); - addResult(1, false, FUNC, function, PNF1, pn12, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.01, results); - addResult(1, false, FUNC, function, PNF1, pn13, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.05, results); + addResult(1, false, FUNC, function, PNF1, pn11, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, 0.01, results); + addResult(1, false, FUNC, function, PNF1, pn12, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, 0.01, results); + addResult(1, false, FUNC, function, PNF1, pn13, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, 0.05, results); results.buildHierarchy(); CPrinter printer; results.postorderDepthFirst(printer); @@ -645,14 +678,22 @@ void CHierarchicalResultsTest::testBuildHierarchy() { // Test complex. { model::CHierarchicalResults results; - addResult(1, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.01, results); - addResult(2, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, 0.01, results); - addResult(2, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p14, EMPTY_STRING, 0.01, results); - addResult(3, false, FUNC, function, PNF1, pn11, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.01, results); - addResult(3, false, FUNC, function, PNF1, pn12, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.01, results); - addResult(3, false, FUNC, function, PNF1, pn13, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.05, results); - addResult(4, true, FUNC, function, PNF2, pn22, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.01, results); - addResult(4, true, FUNC, function, PNF2, pn23, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.05, results); + addResult(1, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.01, results); + addResult(2, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, + p11, EMPTY_STRING, 0.01, results); + addResult(2, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, + p14, EMPTY_STRING, 0.01, results); + addResult(3, false, FUNC, function, PNF1, pn11, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, 0.01, results); + addResult(3, false, FUNC, function, PNF1, pn12, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, 0.01, results); + addResult(3, false, FUNC, function, PNF1, pn13, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, 0.05, results); + addResult(4, true, FUNC, function, PNF2, pn22, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, 0.01, results); + addResult(4, true, FUNC, function, PNF2, pn23, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, 0.05, results); addResult(5, true, FUNC, function, PNF2, pn21, PF1, p11, EMPTY_STRING, 0.2, results); addResult(5, true, FUNC, function, PNF2, pn22, PF1, p11, EMPTY_STRING, 0.2, results); addResult(5, true, FUNC, function, PNF2, pn22, PF1, p12, EMPTY_STRING, 0.1, results); @@ -664,30 +705,31 @@ void CHierarchicalResultsTest::testBuildHierarchy() { CPrinter printer; results.postorderDepthFirst(printer); LOG_DEBUG(<< "\ncomplex:\n" << printer.result()); - CPPUNIT_ASSERT_EQUAL(std::string("'false/true//////': 1, 0\n" - " 'false/true//PNF2////': 1, 0\n" - " 'false/true/mean/PNF2/pn23///': 0.05, 0\n" - " 'false/true//PNF2/pn22///': 1, 0\n" - " 'false/true//PNF2/pn22/PF2//': 1, 0\n" - " 'false/true/mean/PNF2/pn22/PF2/p23/': 0.12, 0\n" - " 'false/false/mean/PNF2/pn22/PF2/p23/': 0.82, 0\n" - " 'false/true/mean/PNF2/pn22/PF2/p21/': 0.15, 0\n" - " 'false/false/mean/PNF2/pn22/PF2/p21/': 0.12, 0\n" - " 'false/true//PNF2/pn22/PF1//': 1, 0\n" - " 'false/true/mean/PNF2/pn22/PF1/p12/': 0.1, 0\n" - " 'false/true/mean/PNF2/pn22/PF1/p11/': 0.2, 0\n" - " 'false/true/mean/PNF2/pn22///': 0.01, 0\n" - " 'false/true/mean/PNF2/pn21/PF1/p11/': 0.2, 0\n" - " 'false/false//PNF1////': 1, 0\n" - " 'false/false/mean/PNF1/pn13///': 0.05, 0\n" - " 'false/false/mean/PNF1/pn12///': 0.01, 0\n" - " 'false/false/mean/PNF1/pn11///': 0.01, 0\n" - " 'false/false//////': 1, 0\n" - " 'false/false////PF1//': 1, 0\n" - " 'false/false/mean///PF1/p14/': 0.01, 0\n" - " 'false/false/mean///PF1/p11/': 0.01, 0\n" - " 'false/false/mean/////': 0.01, 0"), - printer.result()); + CPPUNIT_ASSERT_EQUAL( + std::string("'false/true//////': 1, 0\n" + " 'false/true//PNF2////': 1, 0\n" + " 'false/true/mean/PNF2/pn23///': 0.05, 0\n" + " 'false/true//PNF2/pn22///': 1, 0\n" + " 'false/true//PNF2/pn22/PF2//': 1, 0\n" + " 'false/true/mean/PNF2/pn22/PF2/p23/': 0.12, 0\n" + " 'false/false/mean/PNF2/pn22/PF2/p23/': 0.82, 0\n" + " 'false/true/mean/PNF2/pn22/PF2/p21/': 0.15, 0\n" + " 'false/false/mean/PNF2/pn22/PF2/p21/': 0.12, 0\n" + " 'false/true//PNF2/pn22/PF1//': 1, 0\n" + " 'false/true/mean/PNF2/pn22/PF1/p12/': 0.1, 0\n" + " 'false/true/mean/PNF2/pn22/PF1/p11/': 0.2, 0\n" + " 'false/true/mean/PNF2/pn22///': 0.01, 0\n" + " 'false/true/mean/PNF2/pn21/PF1/p11/': 0.2, 0\n" + " 'false/false//PNF1////': 1, 0\n" + " 'false/false/mean/PNF1/pn13///': 0.05, 0\n" + " 'false/false/mean/PNF1/pn12///': 0.01, 0\n" + " 'false/false/mean/PNF1/pn11///': 0.01, 0\n" + " 'false/false//////': 1, 0\n" + " 'false/false////PF1//': 1, 0\n" + " 'false/false/mean///PF1/p14/': 0.01, 0\n" + " 'false/false/mean///PF1/p11/': 0.01, 0\n" + " 'false/false/mean/////': 0.01, 0"), + printer.result()); } } @@ -695,7 +737,8 @@ void CHierarchicalResultsTest::testBuildHierarchyGivenPartitionsWithSinglePerson LOG_DEBUG(<< "*** testBuildHierarchyGivenPartitionsWithSinglePersonFieldValue ***"); static const std::string FUNC("mean"); - static const ml::model::function_t::EFunction function(ml::model::function_t::E_IndividualMetricMean); + static const ml::model::function_t::EFunction function( + ml::model::function_t::E_IndividualMetricMean); std::string partition("par"); std::string partition1("par_1"); @@ -705,8 +748,10 @@ void CHierarchicalResultsTest::testBuildHierarchyGivenPartitionsWithSinglePerson std::string valueField("value"); model::CHierarchicalResults results; - addResult(1, false, FUNC, function, partition, partition1, person, person1, valueField, 0.01, results); - addResult(1, false, FUNC, function, partition, partition2, person, person1, valueField, 0.01, results); + addResult(1, false, FUNC, function, partition, partition1, person, person1, + valueField, 0.01, results); + addResult(1, false, FUNC, function, partition, partition2, person, person1, + valueField, 0.01, results); results.buildHierarchy(); CNodeExtractor extract; @@ -755,7 +800,8 @@ void CHierarchicalResultsTest::testBasicVisitor() { // Test by and over { model::CHierarchicalResults results; - addResult(1, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 1.0, results); + addResult(1, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 1.0, results); results.buildHierarchy(); CPrinter printer; results.postorderDepthFirst(printer); @@ -768,7 +814,8 @@ void CHierarchicalResultsTest::testBasicVisitor() { } { model::CHierarchicalResults results; - addResult(1, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, EMPTY_STRING, EMPTY_STRING, 1.0, results); + addResult(1, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, + EMPTY_STRING, EMPTY_STRING, 1.0, results); results.buildHierarchy(); CPrinter printer; results.postorderDepthFirst(printer); @@ -780,13 +827,17 @@ void CHierarchicalResultsTest::testBasicVisitor() { CPPUNIT_ASSERT_EQUAL(std::size_t(1), extract.personNodes().size()); CPPUNIT_ASSERT_EQUAL(PF1, *extract.personNodes()[0]->s_Spec.s_PersonFieldName); CPPUNIT_ASSERT_EQUAL(EMPTY_STRING, *extract.personNodes()[0]->s_Spec.s_PersonFieldValue); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), extract.personNodes()[0]->s_Children.size()); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + extract.personNodes()[0]->s_Children.size()); } { model::CHierarchicalResults results; - addResult(1, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, EMPTY_STRING, EMPTY_STRING, 1.0, results); - addResult(1, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, 1.0, results); - addResult(1, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p12, EMPTY_STRING, 1.0, results); + addResult(1, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, + EMPTY_STRING, EMPTY_STRING, 1.0, results); + addResult(1, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, + p11, EMPTY_STRING, 1.0, results); + addResult(1, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, + p12, EMPTY_STRING, 1.0, results); results.buildHierarchy(); CPrinter printer; results.postorderDepthFirst(printer); @@ -813,14 +864,18 @@ void CHierarchicalResultsTest::testBasicVisitor() { CPPUNIT_ASSERT_EQUAL(EMPTY_STRING, *extract.personNodes()[0]->s_Spec.s_FunctionName); CPPUNIT_ASSERT_EQUAL(PF1, *extract.personNodes()[0]->s_Spec.s_PersonFieldName); CPPUNIT_ASSERT_EQUAL(EMPTY_STRING, *extract.personNodes()[0]->s_Spec.s_PersonFieldValue); - CPPUNIT_ASSERT_EQUAL(std::size_t(3), extract.personNodes()[0]->s_Children.size()); + CPPUNIT_ASSERT_EQUAL(std::size_t(3), + extract.personNodes()[0]->s_Children.size()); } { model::CHierarchicalResults results; - addResult(1, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, EMPTY_STRING, EMPTY_STRING, 1.0, results); - addResult(1, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, 1.0, results); - addResult(2, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF2, p23, EMPTY_STRING, 1.0, results); + addResult(1, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, + EMPTY_STRING, EMPTY_STRING, 1.0, results); + addResult(1, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, + EMPTY_STRING, 1.0, results); + addResult(2, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF2, p23, + EMPTY_STRING, 1.0, results); results.buildHierarchy(); CPrinter printer; results.postorderDepthFirst(printer); @@ -839,17 +894,24 @@ void CHierarchicalResultsTest::testBasicVisitor() { CPPUNIT_ASSERT_EQUAL(EMPTY_STRING, *extract.personNodes()[0]->s_Spec.s_PersonFieldValue); CPPUNIT_ASSERT_EQUAL(p11, *extract.personNodes()[1]->s_Spec.s_PersonFieldValue); CPPUNIT_ASSERT_EQUAL(p23, *extract.personNodes()[2]->s_Spec.s_PersonFieldValue); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), extract.personNodes()[0]->s_Children.size()); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), extract.personNodes()[1]->s_Children.size()); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), extract.personNodes()[2]->s_Children.size()); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + extract.personNodes()[0]->s_Children.size()); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + extract.personNodes()[1]->s_Children.size()); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + extract.personNodes()[2]->s_Children.size()); } { LOG_DEBUG(<< "Clear..."); model::CHierarchicalResults results; - addResult(1, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, 0.2, results); - addResult(1, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, 0.3, results); - addResult(2, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF2, EMPTY_STRING, EMPTY_STRING, 0.01, results); - addResult(3, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 1.0, results); + addResult(1, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, + EMPTY_STRING, 0.2, results); + addResult(1, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, + EMPTY_STRING, 0.3, results); + addResult(2, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF2, + EMPTY_STRING, EMPTY_STRING, 0.01, results); + addResult(3, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 1.0, results); results.buildHierarchy(); CPrinter printer; results.postorderDepthFirst(printer); @@ -865,21 +927,30 @@ void CHierarchicalResultsTest::testBasicVisitor() { CPPUNIT_ASSERT_EQUAL(PF1, *extract.personNodes()[1]->s_Spec.s_PersonFieldName); CPPUNIT_ASSERT_EQUAL(EMPTY_STRING, *extract.personNodes()[0]->s_Spec.s_PersonFieldValue); CPPUNIT_ASSERT_EQUAL(p11, *extract.personNodes()[1]->s_Spec.s_PersonFieldValue); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), extract.personNodes()[0]->s_Children.size()); - CPPUNIT_ASSERT_EQUAL(std::size_t(2), extract.personNodes()[1]->s_Children.size()); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + extract.personNodes()[0]->s_Children.size()); + CPPUNIT_ASSERT_EQUAL(std::size_t(2), + extract.personNodes()[1]->s_Children.size()); } // Test partition { model::CHierarchicalResults results; - addResult(1, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, 0.2, results); - addResult(1, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, 0.3, results); - addResult(2, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF2, EMPTY_STRING, EMPTY_STRING, 0.01, results); - addResult(3, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 1.0, results); - addResult(4, true, FUNC, function, PNF1, EMPTY_STRING, PF1, p11, EMPTY_STRING, 0.2, results); + addResult(1, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, + EMPTY_STRING, 0.2, results); + addResult(1, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, + EMPTY_STRING, 0.3, results); + addResult(2, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF2, + EMPTY_STRING, EMPTY_STRING, 0.01, results); + addResult(3, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 1.0, results); + addResult(4, true, FUNC, function, PNF1, EMPTY_STRING, PF1, p11, + EMPTY_STRING, 0.2, results); addResult(4, true, FUNC, function, PNF1, pn11, PF1, p11, EMPTY_STRING, 0.3, results); - addResult(5, true, FUNC, function, PNF1, pn12, PF2, EMPTY_STRING, EMPTY_STRING, 0.01, results); - addResult(6, true, FUNC, function, PNF1, pn13, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 1.0, results); + addResult(5, true, FUNC, function, PNF1, pn12, PF2, EMPTY_STRING, + EMPTY_STRING, 0.01, results); + addResult(6, true, FUNC, function, PNF1, pn13, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, 1.0, results); results.buildHierarchy(); CPrinter printer; results.postorderDepthFirst(printer); @@ -889,8 +960,10 @@ void CHierarchicalResultsTest::testBasicVisitor() { CPPUNIT_ASSERT_EQUAL(std::size_t(1), extract.partitionedNodes().size()); CPPUNIT_ASSERT_EQUAL(PNF1, *extract.partitionedNodes()[0]->s_Spec.s_PartitionFieldName); - CPPUNIT_ASSERT_EQUAL(EMPTY_STRING, *extract.partitionedNodes()[0]->s_Spec.s_PartitionFieldValue); - CPPUNIT_ASSERT_EQUAL(std::size_t(4), extract.partitionedNodes()[0]->s_Children.size()); + CPPUNIT_ASSERT_EQUAL(EMPTY_STRING, + *extract.partitionedNodes()[0]->s_Spec.s_PartitionFieldValue); + CPPUNIT_ASSERT_EQUAL(std::size_t(4), + extract.partitionedNodes()[0]->s_Children.size()); CPPUNIT_ASSERT_EQUAL(std::size_t(4), extract.partitionNodes().size()); CPPUNIT_ASSERT_EQUAL(PNF1, *extract.partitionNodes()[0]->s_Spec.s_PartitionFieldName); @@ -901,10 +974,14 @@ void CHierarchicalResultsTest::testBasicVisitor() { CPPUNIT_ASSERT_EQUAL(pn11, *extract.partitionNodes()[1]->s_Spec.s_PartitionFieldValue); CPPUNIT_ASSERT_EQUAL(pn12, *extract.partitionNodes()[2]->s_Spec.s_PartitionFieldValue); CPPUNIT_ASSERT_EQUAL(pn13, *extract.partitionNodes()[3]->s_Spec.s_PartitionFieldValue); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), extract.partitionNodes()[0]->s_Children.size()); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), extract.partitionNodes()[1]->s_Children.size()); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), extract.partitionNodes()[2]->s_Children.size()); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), extract.partitionNodes()[3]->s_Children.size()); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + extract.partitionNodes()[0]->s_Children.size()); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + extract.partitionNodes()[1]->s_Children.size()); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + extract.partitionNodes()[2]->s_Children.size()); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + extract.partitionNodes()[3]->s_Children.size()); CPPUNIT_ASSERT_EQUAL(std::size_t(5), extract.personNodes().size()); CPPUNIT_ASSERT_EQUAL(PF2, *extract.personNodes()[0]->s_Spec.s_PersonFieldName); @@ -917,11 +994,16 @@ void CHierarchicalResultsTest::testBasicVisitor() { CPPUNIT_ASSERT_EQUAL(p11, *extract.personNodes()[2]->s_Spec.s_PersonFieldValue); CPPUNIT_ASSERT_EQUAL(EMPTY_STRING, *extract.personNodes()[3]->s_Spec.s_PersonFieldValue); CPPUNIT_ASSERT_EQUAL(p11, *extract.personNodes()[4]->s_Spec.s_PersonFieldValue); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), extract.personNodes()[0]->s_Children.size()); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), extract.personNodes()[1]->s_Children.size()); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), extract.personNodes()[2]->s_Children.size()); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), extract.personNodes()[3]->s_Children.size()); - CPPUNIT_ASSERT_EQUAL(std::size_t(2), extract.personNodes()[4]->s_Children.size()); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + extract.personNodes()[0]->s_Children.size()); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + extract.personNodes()[1]->s_Children.size()); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + extract.personNodes()[2]->s_Children.size()); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + extract.personNodes()[3]->s_Children.size()); + CPPUNIT_ASSERT_EQUAL(std::size_t(2), + extract.personNodes()[4]->s_Children.size()); } } @@ -930,11 +1012,15 @@ void CHierarchicalResultsTest::testAggregator() { using TAnnotatedProbabilityVec = std::vector; - model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(); + model::CAnomalyDetectorModelConfig modelConfig = + model::CAnomalyDetectorModelConfig::defaultConfig(); model::CHierarchicalResultsAggregator aggregator(modelConfig); - model::CAnomalyScore::CComputer attributeComputer(0.5, 0.5, 1, 5, modelConfig.maximumAnomalousProbability()); - model::CAnomalyScore::CComputer personComputer(0.0, 1.0, 1, 1, modelConfig.maximumAnomalousProbability()); - model::CAnomalyScore::CComputer partitionComputer(0.0, 1.0, 1, 1, modelConfig.maximumAnomalousProbability()); + model::CAnomalyScore::CComputer attributeComputer( + 0.5, 0.5, 1, 5, modelConfig.maximumAnomalousProbability()); + model::CAnomalyScore::CComputer personComputer( + 0.0, 1.0, 1, 1, modelConfig.maximumAnomalousProbability()); + model::CAnomalyScore::CComputer partitionComputer( + 0.0, 1.0, 1, 1, modelConfig.maximumAnomalousProbability()); double score = 0.0; double probability = 1.0; static const std::string FUNC("max"); @@ -949,9 +1035,12 @@ void CHierarchicalResultsTest::testAggregator() { } model::CHierarchicalResults results; - results.addModelResult(1, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, annotatedProbabilities[0]); - results.addModelResult(1, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p12, EMPTY_STRING, annotatedProbabilities[1]); - results.addModelResult(1, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p13, EMPTY_STRING, annotatedProbabilities[2]); + results.addModelResult(1, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, + PF1, p11, EMPTY_STRING, annotatedProbabilities[0]); + results.addModelResult(1, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, + PF1, p12, EMPTY_STRING, annotatedProbabilities[1]); + results.addModelResult(1, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, + PF1, p13, EMPTY_STRING, annotatedProbabilities[2]); results.buildHierarchy(); results.bottomUpBreadthFirst(aggregator); CPrinter printer; @@ -973,9 +1062,12 @@ void CHierarchicalResultsTest::testAggregator() { } model::CHierarchicalResults results; - results.addModelResult(1, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, annotatedProbabilities[0]); - results.addModelResult(1, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p12, EMPTY_STRING, annotatedProbabilities[1]); - results.addModelResult(1, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p13, EMPTY_STRING, annotatedProbabilities[2]); + results.addModelResult(1, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, + PF1, p11, EMPTY_STRING, annotatedProbabilities[0]); + results.addModelResult(1, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, + PF1, p12, EMPTY_STRING, annotatedProbabilities[1]); + results.addModelResult(1, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, + PF1, p13, EMPTY_STRING, annotatedProbabilities[2]); results.buildHierarchy(); results.bottomUpBreadthFirst(aggregator); CPrinter printer; @@ -1001,23 +1093,32 @@ void CHierarchicalResultsTest::testAggregator() { model::CHierarchicalResults results; annotatedProbability.s_Probability = p11_[0]; - results.addModelResult(1, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, annotatedProbability); + results.addModelResult(1, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, + PF1, p11, EMPTY_STRING, annotatedProbability); annotatedProbability.s_Probability = p11_[1]; - results.addModelResult(2, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, annotatedProbability); + results.addModelResult(2, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, + PF1, p11, EMPTY_STRING, annotatedProbability); annotatedProbability.s_Probability = p11_[2]; - results.addModelResult(1, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, annotatedProbability); + results.addModelResult(1, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, + PF1, p11, EMPTY_STRING, annotatedProbability); annotatedProbability.s_Probability = p12_[0]; - results.addModelResult(1, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p12, EMPTY_STRING, annotatedProbability); + results.addModelResult(1, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, + PF1, p12, EMPTY_STRING, annotatedProbability); annotatedProbability.s_Probability = p12_[1]; - results.addModelResult(1, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p12, EMPTY_STRING, annotatedProbability); + results.addModelResult(1, true, FUNC, function, EMPTY_STRING, EMPTY_STRING, + PF1, p12, EMPTY_STRING, annotatedProbability); annotatedProbability.s_Probability = p21_[0]; - results.addModelResult(3, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF2, p21, EMPTY_STRING, annotatedProbability); + results.addModelResult(3, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, + PF2, p21, EMPTY_STRING, annotatedProbability); annotatedProbability.s_Probability = p21_[1]; - results.addModelResult(3, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF2, p21, EMPTY_STRING, annotatedProbability); + results.addModelResult(3, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, + PF2, p21, EMPTY_STRING, annotatedProbability); annotatedProbability.s_Probability = p22_[0]; - results.addModelResult(3, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF2, p22, EMPTY_STRING, annotatedProbability); + results.addModelResult(3, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, + PF2, p22, EMPTY_STRING, annotatedProbability); annotatedProbability.s_Probability = p22_[1]; - results.addModelResult(3, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF2, p22, EMPTY_STRING, annotatedProbability); + results.addModelResult(3, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, + PF2, p22, EMPTY_STRING, annotatedProbability); results.buildHierarchy(); results.bottomUpBreadthFirst(aggregator); CPrinter printer; @@ -1034,16 +1135,23 @@ void CHierarchicalResultsTest::testAggregator() { maths::COrderings::simultaneousSort(probabilities, scores); TDoubleVec expectedScores; TDoubleVec expectedProbabilities; - addAggregateValues(0.5, 0.5, 5, boost::begin(rp1), boost::end(rp1), expectedScores, expectedProbabilities); - addAggregateValues(0.5, 0.5, 5, boost::begin(rp2), boost::end(rp2), expectedScores, expectedProbabilities); - addAggregateValues(0.5, 0.5, 5, boost::begin(rp3), boost::end(rp3), expectedScores, expectedProbabilities); + addAggregateValues(0.5, 0.5, 5, boost::begin(rp1), boost::end(rp1), + expectedScores, expectedProbabilities); + addAggregateValues(0.5, 0.5, 5, boost::begin(rp2), boost::end(rp2), + expectedScores, expectedProbabilities); + addAggregateValues(0.5, 0.5, 5, boost::begin(rp3), boost::end(rp3), + expectedScores, expectedProbabilities); maths::COrderings::simultaneousSort(expectedProbabilities, expectedScores); LOG_DEBUG(<< "expectedScores = " << core::CContainerPrinter::print(expectedScores)); LOG_DEBUG(<< "scores = " << core::CContainerPrinter::print(scores)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedScores), core::CContainerPrinter::print(scores)); - LOG_DEBUG(<< "expectedProbabilities = " << core::CContainerPrinter::print(expectedProbabilities)); - LOG_DEBUG(<< "probabilities = " << core::CContainerPrinter::print(probabilities)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedProbabilities), core::CContainerPrinter::print(probabilities)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedScores), + core::CContainerPrinter::print(scores)); + LOG_DEBUG(<< "expectedProbabilities = " + << core::CContainerPrinter::print(expectedProbabilities)); + LOG_DEBUG(<< "probabilities = " + << core::CContainerPrinter::print(probabilities)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedProbabilities), + core::CContainerPrinter::print(probabilities)); } // Test partition @@ -1054,9 +1162,12 @@ void CHierarchicalResultsTest::testAggregator() { annotatedProbabilities.push_back(model::SAnnotatedProbability(p_[i])); } model::CHierarchicalResults results; - results.addModelResult(1, false, FUNC, function, PNF1, pn11, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, annotatedProbabilities[0]); - results.addModelResult(1, false, FUNC, function, PNF1, pn12, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, annotatedProbabilities[1]); - results.addModelResult(1, false, FUNC, function, PNF1, pn13, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, annotatedProbabilities[2]); + results.addModelResult(1, false, FUNC, function, PNF1, pn11, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, annotatedProbabilities[0]); + results.addModelResult(1, false, FUNC, function, PNF1, pn12, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, annotatedProbabilities[1]); + results.addModelResult(1, false, FUNC, function, PNF1, pn13, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, annotatedProbabilities[2]); results.buildHierarchy(); results.bottomUpBreadthFirst(aggregator); CPrinter printer; @@ -1073,7 +1184,8 @@ void CHierarchicalResultsTest::testAggregator() { void CHierarchicalResultsTest::testInfluence() { LOG_DEBUG(<< "*** testInfluence ***"); - model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(); + model::CAnomalyDetectorModelConfig modelConfig = + model::CAnomalyDetectorModelConfig::defaultConfig(); model::CHierarchicalResultsAggregator aggregator(modelConfig); std::string FUNC("max"); static const ml::model::function_t::EFunction function(ml::model::function_t::E_IndividualMetricMax); @@ -1085,21 +1197,24 @@ void CHierarchicalResultsTest::testInfluence() { // Test by. { model::SAnnotatedProbability annotatedProbability1(0.22); - annotatedProbability1.s_Influences.push_back( - TStoredStringPtrStoredStringPtrPrDoublePr(TStoredStringPtrStoredStringPtrPr(I, i1), 0.6)); + annotatedProbability1.s_Influences.push_back(TStoredStringPtrStoredStringPtrPrDoublePr( + TStoredStringPtrStoredStringPtrPr(I, i1), 0.6)); model::SAnnotatedProbability annotatedProbability2(0.003); - annotatedProbability2.s_Influences.push_back( - TStoredStringPtrStoredStringPtrPrDoublePr(TStoredStringPtrStoredStringPtrPr(I, i1), 0.9)); - annotatedProbability2.s_Influences.push_back( - TStoredStringPtrStoredStringPtrPrDoublePr(TStoredStringPtrStoredStringPtrPr(I, i2), 1.0)); + annotatedProbability2.s_Influences.push_back(TStoredStringPtrStoredStringPtrPrDoublePr( + TStoredStringPtrStoredStringPtrPr(I, i1), 0.9)); + annotatedProbability2.s_Influences.push_back(TStoredStringPtrStoredStringPtrPrDoublePr( + TStoredStringPtrStoredStringPtrPr(I, i2), 1.0)); model::SAnnotatedProbability annotatedProbability3(0.01); - annotatedProbability3.s_Influences.push_back( - TStoredStringPtrStoredStringPtrPrDoublePr(TStoredStringPtrStoredStringPtrPr(I, i1), 1.0)); + annotatedProbability3.s_Influences.push_back(TStoredStringPtrStoredStringPtrPrDoublePr( + TStoredStringPtrStoredStringPtrPr(I, i1), 1.0)); model::CHierarchicalResults results; - results.addModelResult(1, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, annotatedProbability1); - results.addModelResult(1, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p12, EMPTY_STRING, annotatedProbability2); - results.addModelResult(1, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p13, EMPTY_STRING, annotatedProbability3); + results.addModelResult(1, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, + PF1, p11, EMPTY_STRING, annotatedProbability1); + results.addModelResult(1, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, + PF1, p12, EMPTY_STRING, annotatedProbability2); + results.addModelResult(1, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, + PF1, p13, EMPTY_STRING, annotatedProbability3); results.buildHierarchy(); results.bottomUpBreadthFirst(aggregator); results.createPivots(); @@ -1108,44 +1223,50 @@ void CHierarchicalResultsTest::testInfluence() { results.postorderDepthFirst(printer); results.pivotsBottomUpBreadthFirst(printer); LOG_DEBUG(<< "\nby:\n" << printer.result()); - CPPUNIT_ASSERT_EQUAL(std::string("'false/false////I//': 0.003600205, 0.02066228 pivot\n" - " 'false/false////I/i2/': 0.003, 0.0251169 pivot\n" - " 'false/false////I/i1/': 0.001801726, 0.04288765 pivot\n" - "'false/false////PF1//': 0.000885378, 0.08893476\n" - " 'false/false/max///PF1/p13/': 0.01, 0.008016032, [((I, i1), 1)]\n" - " 'false/false/max///PF1/p12/': 0.003, 0.03139613, [((I, i1), 0.9), ((I, i2), 1)]\n" - " 'false/false/max///PF1/p11/': 0.22, 0, [((I, i1), 0.6)]"), - printer.result()); + CPPUNIT_ASSERT_EQUAL( + std::string("'false/false////I//': 0.003600205, 0.02066228 pivot\n" + " 'false/false////I/i2/': 0.003, 0.0251169 pivot\n" + " 'false/false////I/i1/': 0.001801726, 0.04288765 pivot\n" + "'false/false////PF1//': 0.000885378, 0.08893476\n" + " 'false/false/max///PF1/p13/': 0.01, 0.008016032, [((I, i1), 1)]\n" + " 'false/false/max///PF1/p12/': 0.003, 0.03139613, [((I, i1), 0.9), ((I, i2), 1)]\n" + " 'false/false/max///PF1/p11/': 0.22, 0, [((I, i1), 0.6)]"), + printer.result()); } // Test complex. { model::SAnnotatedProbability annotatedProbability1(0.22); - annotatedProbability1.s_Influences.push_back( - TStoredStringPtrStoredStringPtrPrDoublePr(TStoredStringPtrStoredStringPtrPr(I, i1), 0.6)); + annotatedProbability1.s_Influences.push_back(TStoredStringPtrStoredStringPtrPrDoublePr( + TStoredStringPtrStoredStringPtrPr(I, i1), 0.6)); model::SAnnotatedProbability annotatedProbability2(0.003); - annotatedProbability2.s_Influences.push_back( - TStoredStringPtrStoredStringPtrPrDoublePr(TStoredStringPtrStoredStringPtrPr(I, i1), 0.9)); - annotatedProbability2.s_Influences.push_back( - TStoredStringPtrStoredStringPtrPrDoublePr(TStoredStringPtrStoredStringPtrPr(I, i2), 1.0)); + annotatedProbability2.s_Influences.push_back(TStoredStringPtrStoredStringPtrPrDoublePr( + TStoredStringPtrStoredStringPtrPr(I, i1), 0.9)); + annotatedProbability2.s_Influences.push_back(TStoredStringPtrStoredStringPtrPrDoublePr( + TStoredStringPtrStoredStringPtrPr(I, i2), 1.0)); model::SAnnotatedProbability annotatedProbability3(0.01); - annotatedProbability3.s_Influences.push_back( - TStoredStringPtrStoredStringPtrPrDoublePr(TStoredStringPtrStoredStringPtrPr(I, i1), 1.0)); + annotatedProbability3.s_Influences.push_back(TStoredStringPtrStoredStringPtrPrDoublePr( + TStoredStringPtrStoredStringPtrPr(I, i1), 1.0)); model::SAnnotatedProbability annotatedProbability4(0.03); - annotatedProbability4.s_Influences.push_back( - TStoredStringPtrStoredStringPtrPrDoublePr(TStoredStringPtrStoredStringPtrPr(I, i1), 0.6)); - annotatedProbability4.s_Influences.push_back( - TStoredStringPtrStoredStringPtrPrDoublePr(TStoredStringPtrStoredStringPtrPr(I, i2), 0.8)); + annotatedProbability4.s_Influences.push_back(TStoredStringPtrStoredStringPtrPrDoublePr( + TStoredStringPtrStoredStringPtrPr(I, i1), 0.6)); + annotatedProbability4.s_Influences.push_back(TStoredStringPtrStoredStringPtrPrDoublePr( + TStoredStringPtrStoredStringPtrPr(I, i2), 0.8)); model::SAnnotatedProbability annotatedProbability5(0.56); - annotatedProbability5.s_Influences.push_back( - TStoredStringPtrStoredStringPtrPrDoublePr(TStoredStringPtrStoredStringPtrPr(I, i1), 0.8)); + annotatedProbability5.s_Influences.push_back(TStoredStringPtrStoredStringPtrPrDoublePr( + TStoredStringPtrStoredStringPtrPr(I, i1), 0.8)); model::CHierarchicalResults results; - results.addModelResult(1, true, FUNC, function, PNF1, pn11, PF1, p11, EMPTY_STRING, annotatedProbability1); - results.addModelResult(1, true, FUNC, function, PNF1, pn12, PF1, p12, EMPTY_STRING, annotatedProbability2); - results.addModelResult(2, false, FUNC, function, PNF2, pn21, PF1, p13, EMPTY_STRING, annotatedProbability3); - results.addModelResult(2, false, FUNC, function, PNF2, pn22, PF1, p12, EMPTY_STRING, annotatedProbability4); - results.addModelResult(2, false, FUNC, function, PNF2, pn23, PF1, p12, EMPTY_STRING, annotatedProbability5); + results.addModelResult(1, true, FUNC, function, PNF1, pn11, PF1, p11, + EMPTY_STRING, annotatedProbability1); + results.addModelResult(1, true, FUNC, function, PNF1, pn12, PF1, p12, + EMPTY_STRING, annotatedProbability2); + results.addModelResult(2, false, FUNC, function, PNF2, pn21, PF1, p13, + EMPTY_STRING, annotatedProbability3); + results.addModelResult(2, false, FUNC, function, PNF2, pn22, PF1, p12, + EMPTY_STRING, annotatedProbability4); + results.addModelResult(2, false, FUNC, function, PNF2, pn23, PF1, p12, + EMPTY_STRING, annotatedProbability5); results.buildHierarchy(); results.bottomUpBreadthFirst(aggregator); results.createPivots(); @@ -1154,25 +1275,26 @@ void CHierarchicalResultsTest::testInfluence() { results.postorderDepthFirst(printer); results.pivotsBottomUpBreadthFirst(printer); LOG_DEBUG(<< "\ncomplex:\n" << printer.result()); - CPPUNIT_ASSERT_EQUAL(std::string("'false/false////I//': 0.006210884, 0.01130322 pivot\n" - " 'false/false////I/i2/': 0.003110279, 0.0241695 pivot\n" - " 'false/false////I/i1/': 0.00619034, 0.01134605 pivot\n" - "'false/true//////': 0.003651953, 0.02034678\n" - " 'false/false//PNF2////': 0.029701, 0.001095703\n" - " 'false/false/max/PNF2/pn23/PF1/p12/': 0.56, 0, [((I, i1), 0.8)]\n" - " 'false/false/max/PNF2/pn22/PF1/p12/': 0.03, 0.001336005, [((I, i1), 0.6), ((I, i2), 0.8)]\n" - " 'false/false/max/PNF2/pn21/PF1/p13/': 0.01, 0.008016032, [((I, i1), 1)]\n" - " 'false/true//PNF1////': 0.005991, 0.01177692\n" - " 'false/true/max/PNF1/pn12/PF1/p12/': 0.003, 0.03139613, [((I, i1), 0.9), ((I, i2), 1)]\n" - " 'false/true/max/PNF1/pn11/PF1/p11/': 0.22, 0, [((I, i1), 0.6)]"), - printer.result()); + CPPUNIT_ASSERT_EQUAL( + std::string("'false/false////I//': 0.006210884, 0.01130322 pivot\n" + " 'false/false////I/i2/': 0.003110279, 0.0241695 pivot\n" + " 'false/false////I/i1/': 0.00619034, 0.01134605 pivot\n" + "'false/true//////': 0.003651953, 0.02034678\n" + " 'false/false//PNF2////': 0.029701, 0.001095703\n" + " 'false/false/max/PNF2/pn23/PF1/p12/': 0.56, 0, [((I, i1), 0.8)]\n" + " 'false/false/max/PNF2/pn22/PF1/p12/': 0.03, 0.001336005, [((I, i1), 0.6), ((I, i2), 0.8)]\n" + " 'false/false/max/PNF2/pn21/PF1/p13/': 0.01, 0.008016032, [((I, i1), 1)]\n" + " 'false/true//PNF1////': 0.005991, 0.01177692\n" + " 'false/true/max/PNF1/pn12/PF1/p12/': 0.003, 0.03139613, [((I, i1), 0.9), ((I, i2), 1)]\n" + " 'false/true/max/PNF1/pn11/PF1/p11/': 0.22, 0, [((I, i1), 0.6)]"), + printer.result()); } // Test high probability records are written due to low probability influencer { model::SAnnotatedProbability annotatedProbability1Low(0.06); - annotatedProbability1Low.s_Influences.push_back( - TStoredStringPtrStoredStringPtrPrDoublePr(TStoredStringPtrStoredStringPtrPr(I, i1), 1.0)); + annotatedProbability1Low.s_Influences.push_back(TStoredStringPtrStoredStringPtrPrDoublePr( + TStoredStringPtrStoredStringPtrPr(I, i1), 1.0)); model::SAnnotatedProbability annotatedProbability1High(0.8); model::SAnnotatedProbability annotatedProbability11 = annotatedProbability1Low; model::SAnnotatedProbability annotatedProbability12 = annotatedProbability1High; @@ -1181,18 +1303,25 @@ void CHierarchicalResultsTest::testInfluence() { model::SAnnotatedProbability annotatedProbability15 = annotatedProbability1High; model::SAnnotatedProbability annotatedProbability16 = annotatedProbability1High; model::SAnnotatedProbability annotatedProbability2(0.001); - annotatedProbability2.s_Influences.push_back( - TStoredStringPtrStoredStringPtrPrDoublePr(TStoredStringPtrStoredStringPtrPr(I, i2), 1.0)); + annotatedProbability2.s_Influences.push_back(TStoredStringPtrStoredStringPtrPrDoublePr( + TStoredStringPtrStoredStringPtrPr(I, i2), 1.0)); model::CHierarchicalResults results; results.addInfluencer(*I); - results.addModelResult(1, false, FUNC, function, PNF1, pn11, PF1, p11, EMPTY_STRING, annotatedProbability11); - results.addModelResult(1, false, FUNC, function, PNF1, pn11, PF1, p12, EMPTY_STRING, annotatedProbability12); - results.addModelResult(1, false, FUNC, function, PNF1, pn11, PF1, p13, EMPTY_STRING, annotatedProbability13); - results.addModelResult(1, false, FUNC, function, PNF1, pn11, PF1, p14, EMPTY_STRING, annotatedProbability14); - results.addModelResult(1, false, FUNC, function, PNF1, pn11, PF1, p15, EMPTY_STRING, annotatedProbability15); - results.addModelResult(1, false, FUNC, function, PNF1, pn11, PF1, p16, EMPTY_STRING, annotatedProbability16); - results.addModelResult(1, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF2, p21, EMPTY_STRING, annotatedProbability2); + results.addModelResult(1, false, FUNC, function, PNF1, pn11, PF1, p11, + EMPTY_STRING, annotatedProbability11); + results.addModelResult(1, false, FUNC, function, PNF1, pn11, PF1, p12, + EMPTY_STRING, annotatedProbability12); + results.addModelResult(1, false, FUNC, function, PNF1, pn11, PF1, p13, + EMPTY_STRING, annotatedProbability13); + results.addModelResult(1, false, FUNC, function, PNF1, pn11, PF1, p14, + EMPTY_STRING, annotatedProbability14); + results.addModelResult(1, false, FUNC, function, PNF1, pn11, PF1, p15, + EMPTY_STRING, annotatedProbability15); + results.addModelResult(1, false, FUNC, function, PNF1, pn11, PF1, p16, + EMPTY_STRING, annotatedProbability16); + results.addModelResult(1, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, + PF2, p21, EMPTY_STRING, annotatedProbability2); results.buildHierarchy(); results.bottomUpBreadthFirst(aggregator); results.createPivots(); @@ -1201,22 +1330,25 @@ void CHierarchicalResultsTest::testInfluence() { CPrinter writtenNodesOnlyPrinter(true); results.postorderDepthFirst(writtenNodesOnlyPrinter); results.pivotsBottomUpBreadthFirst(writtenNodesOnlyPrinter); - LOG_DEBUG(<< "\nhigh p records with low p influencer:\n" << writtenNodesOnlyPrinter.result()); - CPPUNIT_ASSERT_EQUAL(std::string("'false/false////I//': 0.001999, 0.038497 pivot\n" - " 'false/false////I/i2/': 0.001, 0.07855711 pivot\n" - " 'false/false////I/i1/': 0.01939367, 0.002530117 pivot\n" - "'false/false//////': 0.001999, 0.038497\n" - " 'false/false/max/PNF1/pn11/PF1/p13/': 0.06, 0, [((I, i1), 1)]\n" - " 'false/false/max/PNF1/pn11/PF1/p11/': 0.06, 0, [((I, i1), 1)]\n" - " 'false/false/max///PF2/p21/': 0.001, 0.09819639, [((I, i2), 1)]"), - writtenNodesOnlyPrinter.result()); + LOG_DEBUG(<< "\nhigh p records with low p influencer:\n" + << writtenNodesOnlyPrinter.result()); + CPPUNIT_ASSERT_EQUAL( + std::string("'false/false////I//': 0.001999, 0.038497 pivot\n" + " 'false/false////I/i2/': 0.001, 0.07855711 pivot\n" + " 'false/false////I/i1/': 0.01939367, 0.002530117 pivot\n" + "'false/false//////': 0.001999, 0.038497\n" + " 'false/false/max/PNF1/pn11/PF1/p13/': 0.06, 0, [((I, i1), 1)]\n" + " 'false/false/max/PNF1/pn11/PF1/p11/': 0.06, 0, [((I, i1), 1)]\n" + " 'false/false/max///PF2/p21/': 0.001, 0.09819639, [((I, i2), 1)]"), + writtenNodesOnlyPrinter.result()); } } void CHierarchicalResultsTest::testScores() { LOG_DEBUG(<< "*** testScores ***"); - model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(); + model::CAnomalyDetectorModelConfig modelConfig = + model::CAnomalyDetectorModelConfig::defaultConfig(); model::CLimits limits; model::CHierarchicalResultsAggregator aggregator(modelConfig); model::CHierarchicalResultsProbabilityFinalizer finalizer; @@ -1228,7 +1360,8 @@ void CHierarchicalResultsTest::testScores() { // Test vanilla by / over. { model::CHierarchicalResults results; - addResult(1, false, MAX, function, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 1.0, results); + addResult(1, false, MAX, function, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 1.0, results); results.buildHierarchy(); results.bottomUpBreadthFirst(aggregator); results.bottomUpBreadthFirst(finalizer); @@ -1239,8 +1372,10 @@ void CHierarchicalResultsTest::testScores() { } { model::CHierarchicalResults results; - addResult(1, false, MAX, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, 0.6, results); - addResult(1, false, MAX, function, EMPTY_STRING, EMPTY_STRING, PF1, p12, EMPTY_STRING, 0.7, results); + addResult(1, false, MAX, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, + EMPTY_STRING, 0.6, results); + addResult(1, false, MAX, function, EMPTY_STRING, EMPTY_STRING, PF1, p12, + EMPTY_STRING, 0.7, results); results.buildHierarchy(); results.bottomUpBreadthFirst(aggregator); results.bottomUpBreadthFirst(finalizer); @@ -1251,10 +1386,14 @@ void CHierarchicalResultsTest::testScores() { } { model::CHierarchicalResults results; - addResult(1, false, MAX, function, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.3, results); - addResult(2, true, MAX, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, 0.01, results); - addResult(2, true, MAX, function, EMPTY_STRING, EMPTY_STRING, PF1, p12, EMPTY_STRING, 0.03, results); - addResult(3, false, MAX, function, EMPTY_STRING, EMPTY_STRING, PF2, p22, EMPTY_STRING, 0.03, results); + addResult(1, false, MAX, function, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.3, results); + addResult(2, true, MAX, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, + EMPTY_STRING, 0.01, results); + addResult(2, true, MAX, function, EMPTY_STRING, EMPTY_STRING, PF1, p12, + EMPTY_STRING, 0.03, results); + addResult(3, false, MAX, function, EMPTY_STRING, EMPTY_STRING, PF2, p22, + EMPTY_STRING, 0.03, results); results.buildHierarchy(); results.bottomUpBreadthFirst(aggregator); results.bottomUpBreadthFirst(finalizer); @@ -1265,10 +1404,14 @@ void CHierarchicalResultsTest::testScores() { } { model::CHierarchicalResults results; - addResult(1, true, MAX, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, 0.01, results); - addResult(1, true, MAX, function, EMPTY_STRING, EMPTY_STRING, PF1, p12, EMPTY_STRING, 0.03, results); - addResult(2, true, RARE, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, 0.07, results); - addResult(2, true, RARE, function, EMPTY_STRING, EMPTY_STRING, PF1, p12, EMPTY_STRING, 0.3, results); + addResult(1, true, MAX, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, + EMPTY_STRING, 0.01, results); + addResult(1, true, MAX, function, EMPTY_STRING, EMPTY_STRING, PF1, p12, + EMPTY_STRING, 0.03, results); + addResult(2, true, RARE, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, + EMPTY_STRING, 0.07, results); + addResult(2, true, RARE, function, EMPTY_STRING, EMPTY_STRING, PF1, p12, + EMPTY_STRING, 0.3, results); results.buildHierarchy(); results.bottomUpBreadthFirst(aggregator); results.bottomUpBreadthFirst(finalizer); @@ -1281,9 +1424,12 @@ void CHierarchicalResultsTest::testScores() { // Test vanilla partition { model::CHierarchicalResults results; - addResult(1, false, MAX, function, PNF1, pn11, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.01, results); - addResult(1, false, MAX, function, PNF1, pn12, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.01, results); - addResult(1, false, MAX, function, PNF1, pn13, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.05, results); + addResult(1, false, MAX, function, PNF1, pn11, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, 0.01, results); + addResult(1, false, MAX, function, PNF1, pn12, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, 0.01, results); + addResult(1, false, MAX, function, PNF1, pn13, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, 0.05, results); results.buildHierarchy(); results.bottomUpBreadthFirst(aggregator); results.bottomUpBreadthFirst(finalizer); @@ -1296,14 +1442,22 @@ void CHierarchicalResultsTest::testScores() { // Test complex. { model::CHierarchicalResults results; - addResult(1, false, MAX, function, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.01, results); - addResult(2, false, MAX, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, 0.01, results); - addResult(2, false, MAX, function, EMPTY_STRING, EMPTY_STRING, PF1, p14, EMPTY_STRING, 0.01, results); - addResult(3, false, MAX, function, PNF1, pn11, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.01, results); - addResult(3, false, MAX, function, PNF1, pn12, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.01, results); - addResult(3, false, MAX, function, PNF1, pn13, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.05, results); - addResult(4, true, MAX, function, PNF2, pn22, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.01, results); - addResult(4, true, MAX, function, PNF2, pn23, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.05, results); + addResult(1, false, MAX, function, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.01, results); + addResult(2, false, MAX, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, + EMPTY_STRING, 0.01, results); + addResult(2, false, MAX, function, EMPTY_STRING, EMPTY_STRING, PF1, p14, + EMPTY_STRING, 0.01, results); + addResult(3, false, MAX, function, PNF1, pn11, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, 0.01, results); + addResult(3, false, MAX, function, PNF1, pn12, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, 0.01, results); + addResult(3, false, MAX, function, PNF1, pn13, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, 0.05, results); + addResult(4, true, MAX, function, PNF2, pn22, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, 0.01, results); + addResult(4, true, MAX, function, PNF2, pn23, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, 0.05, results); addResult(5, true, MAX, function, PNF2, pn21, PF1, p11, EMPTY_STRING, 0.2, results); addResult(5, true, MAX, function, PNF2, pn22, PF1, p11, EMPTY_STRING, 0.2, results); addResult(5, true, MAX, function, PNF2, pn22, PF1, p12, EMPTY_STRING, 0.1, results); @@ -1324,7 +1478,8 @@ void CHierarchicalResultsTest::testScores() { void CHierarchicalResultsTest::testWriter() { LOG_DEBUG(<< "*** testWriter ***"); - model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(); + model::CAnomalyDetectorModelConfig modelConfig = + model::CAnomalyDetectorModelConfig::defaultConfig(); model::CLimits limits; model::CResourceMonitor resourceMonitor; model::CHierarchicalResultsAggregator aggregator(modelConfig); @@ -1338,22 +1493,11 @@ void CHierarchicalResultsTest::testWriter() { using TStrCPtrVec = model::CDataGatherer::TStrCPtrVec; model::SModelParams params(modelConfig.bucketLength()); model::CSearchKey key; - model::CAnomalyDetectorModel::TDataGathererPtr dataGatherer( - new model::CDataGatherer(model_t::E_EventRate, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - key, - model_t::TFeatureVec(1, model_t::E_IndividualCountByBucketAndPerson), - modelConfig.bucketLength(), - 0)); + model::CAnomalyDetectorModel::TDataGathererPtr dataGatherer(new model::CDataGatherer( + model_t::E_EventRate, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), false, + key, model_t::TFeatureVec(1, model_t::E_IndividualCountByBucketAndPerson), + modelConfig.bucketLength(), 0)); model::CEventData dummy; dataGatherer->addArrival(TStrCPtrVec(1, &EMPTY_STRING), dummy, resourceMonitor); dummy.clear(); @@ -1368,21 +1512,36 @@ void CHierarchicalResultsTest::testWriter() { dataGatherer->addArrival(TStrCPtrVec(1, &p23), dummy, resourceMonitor); model::CCountingModel model(params, dataGatherer); model::CHierarchicalResults results; - addResult(1, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.001, &model, results); - addResult(2, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p11, EMPTY_STRING, 0.001, &model, results); - addResult(2, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, p14, EMPTY_STRING, 0.001, &model, results); - addResult(3, false, FUNC, function, PNF1, pn11, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.001, &model, results); - addResult(3, false, FUNC, function, PNF1, pn12, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.001, &model, results); - addResult(3, false, FUNC, function, PNF1, pn13, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.005, &model, results); - addResult(4, true, FUNC, function, PNF2, pn22, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.001, &model, results); - addResult(4, true, FUNC, function, PNF2, pn23, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.005, &model, results); - addResult(5, true, FUNC, function, PNF2, pn21, PF1, p11, EMPTY_STRING, 0.008, &model, results); - addResult(5, true, FUNC, function, PNF2, pn22, PF1, p11, EMPTY_STRING, 0.009, &model, results); - addResult(5, true, FUNC, function, PNF2, pn22, PF1, p12, EMPTY_STRING, 0.01, &model, results); - addResult(6, true, FUNC, function, PNF2, pn22, PF2, p21, EMPTY_STRING, 0.007, &model, results); - addResult(7, false, FUNC, function, PNF2, pn22, PF2, p21, EMPTY_STRING, 0.006, &model, results); - addResult(6, true, FUNC, function, PNF2, pn22, PF2, p23, EMPTY_STRING, 0.004, &model, results); - addResult(7, false, FUNC, function, PNF2, pn22, PF2, p23, EMPTY_STRING, 0.003, &model, results); + addResult(1, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, 0.001, &model, results); + addResult(2, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, + p11, EMPTY_STRING, 0.001, &model, results); + addResult(2, false, FUNC, function, EMPTY_STRING, EMPTY_STRING, PF1, + p14, EMPTY_STRING, 0.001, &model, results); + addResult(3, false, FUNC, function, PNF1, pn11, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, 0.001, &model, results); + addResult(3, false, FUNC, function, PNF1, pn12, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, 0.001, &model, results); + addResult(3, false, FUNC, function, PNF1, pn13, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, 0.005, &model, results); + addResult(4, true, FUNC, function, PNF2, pn22, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, 0.001, &model, results); + addResult(4, true, FUNC, function, PNF2, pn23, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, 0.005, &model, results); + addResult(5, true, FUNC, function, PNF2, pn21, PF1, p11, EMPTY_STRING, + 0.008, &model, results); + addResult(5, true, FUNC, function, PNF2, pn22, PF1, p11, EMPTY_STRING, + 0.009, &model, results); + addResult(5, true, FUNC, function, PNF2, pn22, PF1, p12, EMPTY_STRING, + 0.01, &model, results); + addResult(6, true, FUNC, function, PNF2, pn22, PF2, p21, EMPTY_STRING, + 0.007, &model, results); + addResult(7, false, FUNC, function, PNF2, pn22, PF2, p21, EMPTY_STRING, + 0.006, &model, results); + addResult(6, true, FUNC, function, PNF2, pn22, PF2, p23, EMPTY_STRING, + 0.004, &model, results); + addResult(7, false, FUNC, function, PNF2, pn22, PF2, p23, EMPTY_STRING, + 0.003, &model, results); results.buildHierarchy(); results.bottomUpBreadthFirst(aggregator); CPrinter printer; @@ -1400,7 +1559,8 @@ void CHierarchicalResultsTest::testNormalizer() { using TStrNormalizerPtrMapItr = TStrNormalizerPtrMap::iterator; using TNodeCPtrSet = std::set; - model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(); + model::CAnomalyDetectorModelConfig modelConfig = + model::CAnomalyDetectorModelConfig::defaultConfig(); model::CHierarchicalResultsAggregator aggregator(modelConfig); model::CHierarchicalResultsProbabilityFinalizer finalizer; model::CHierarchicalResultsNormalizer normalizer(modelConfig); @@ -1409,21 +1569,22 @@ void CHierarchicalResultsTest::testNormalizer() { // Not using TRUE and FALSE as they clash with Windows macros - const std::string fields[][7] = {{"1", FALSE_STR, PNF1, pn11, PF2, p21, EMPTY_STRING}, - {"1", FALSE_STR, PNF1, pn11, PF2, p22, EMPTY_STRING}, - {"1", FALSE_STR, PNF1, pn11, PF2, p23, EMPTY_STRING}, - {"2", FALSE_STR, PNF1, pn12, PF1, p11, EMPTY_STRING}, - {"2", FALSE_STR, PNF1, pn12, PF1, p12, EMPTY_STRING}, - {"2", FALSE_STR, PNF1, pn12, PF1, p13, EMPTY_STRING}, - {"3", TRUE_STR, PNF1, pn12, PF1, p11, EMPTY_STRING}, - {"3", TRUE_STR, PNF1, pn12, PF1, p12, EMPTY_STRING}, - {"3", TRUE_STR, PNF1, pn12, PF1, p13, EMPTY_STRING}, - {"4", FALSE_STR, PNF2, pn21, PF1, p11, EMPTY_STRING}, - {"4", FALSE_STR, PNF2, pn22, PF1, p12, EMPTY_STRING}, - {"4", FALSE_STR, PNF2, pn23, PF1, p13, EMPTY_STRING}}; + const std::string fields[][7] = { + {"1", FALSE_STR, PNF1, pn11, PF2, p21, EMPTY_STRING}, + {"1", FALSE_STR, PNF1, pn11, PF2, p22, EMPTY_STRING}, + {"1", FALSE_STR, PNF1, pn11, PF2, p23, EMPTY_STRING}, + {"2", FALSE_STR, PNF1, pn12, PF1, p11, EMPTY_STRING}, + {"2", FALSE_STR, PNF1, pn12, PF1, p12, EMPTY_STRING}, + {"2", FALSE_STR, PNF1, pn12, PF1, p13, EMPTY_STRING}, + {"3", TRUE_STR, PNF1, pn12, PF1, p11, EMPTY_STRING}, + {"3", TRUE_STR, PNF1, pn12, PF1, p12, EMPTY_STRING}, + {"3", TRUE_STR, PNF1, pn12, PF1, p13, EMPTY_STRING}, + {"4", FALSE_STR, PNF2, pn21, PF1, p11, EMPTY_STRING}, + {"4", FALSE_STR, PNF2, pn22, PF1, p12, EMPTY_STRING}, + {"4", FALSE_STR, PNF2, pn23, PF1, p13, EMPTY_STRING}}; TStrNormalizerPtrMap expectedNormalizers; - expectedNormalizers.insert( - TStrNormalizerPtrMap::value_type(std::string("r"), TNormalizerPtr(new model::CAnomalyScore::CNormalizer(modelConfig)))); + expectedNormalizers.insert(TStrNormalizerPtrMap::value_type( + std::string("r"), TNormalizerPtr(new model::CAnomalyScore::CNormalizer(modelConfig)))); test::CRandomNumbers rng; for (std::size_t i = 0u; i < 300; ++i) { @@ -1432,17 +1593,9 @@ void CHierarchicalResultsTest::testNormalizer() { rng.generateUniformSamples(0.0, 1.0, boost::size(fields), p); TAttributeProbabilityVec empty; for (std::size_t j = 0u; j < boost::size(fields); ++j) { - addResult(boost::lexical_cast(fields[j][0]), - fields[j][1] == TRUE_STR, - FUNC, - function, - fields[j][2], - fields[j][3], - fields[j][4], - fields[j][5], - fields[j][6], - p[j], - results); + addResult(boost::lexical_cast(fields[j][0]), fields[j][1] == TRUE_STR, + FUNC, function, fields[j][2], fields[j][3], fields[j][4], + fields[j][5], fields[j][6], p[j], results); } results.buildHierarchy(); results.bottomUpBreadthFirst(aggregator); @@ -1462,70 +1615,84 @@ void CHierarchicalResultsTest::testNormalizer() { TDoubleVec expectedNormalized; for (std::size_t j = 0u; j < extract.leafNodes().size(); ++j) { - std::string key = - 'l' + *extract.leafNodes()[j]->s_Spec.s_PartitionFieldName + ' ' + *extract.leafNodes()[j]->s_Spec.s_PersonFieldName; + std::string key = 'l' + *extract.leafNodes()[j]->s_Spec.s_PartitionFieldName + + ' ' + *extract.leafNodes()[j]->s_Spec.s_PersonFieldName; TStrNormalizerPtrMapItr itr = expectedNormalizers.find(key); if (itr == expectedNormalizers.end()) { itr = expectedNormalizers - .insert(TStrNormalizerPtrMap::value_type(key, TNormalizerPtr(new model::CAnomalyScore::CNormalizer(modelConfig)))) + .insert(TStrNormalizerPtrMap::value_type( + key, TNormalizerPtr(new model::CAnomalyScore::CNormalizer(modelConfig)))) .first; } double probability = extract.leafNodes()[j]->probability(); // This truncation condition needs to be kept the same as the one in CHierarchicalResultsNormalizer::visit() - double score = probability > modelConfig.maximumAnomalousProbability() ? 0.0 : maths::CTools::anomalyScore(probability); + double score = probability > modelConfig.maximumAnomalousProbability() + ? 0.0 + : maths::CTools::anomalyScore(probability); itr->second->updateQuantiles(score); } for (std::size_t j = 0u; j < extract.leafNodes().size(); ++j) { - std::string key = - 'l' + *extract.leafNodes()[j]->s_Spec.s_PartitionFieldName + ' ' + *extract.leafNodes()[j]->s_Spec.s_PersonFieldName; + std::string key = 'l' + *extract.leafNodes()[j]->s_Spec.s_PartitionFieldName + + ' ' + *extract.leafNodes()[j]->s_Spec.s_PersonFieldName; TStrNormalizerPtrMapItr itr = expectedNormalizers.find(key); if (nodes.insert(extract.leafNodes()[j]).second) { double probability = extract.leafNodes()[j]->probability(); // This truncation condition needs to be kept the same as the one in CHierarchicalResultsNormalizer::visit() - double score = probability > modelConfig.maximumAnomalousProbability() ? 0.0 : maths::CTools::anomalyScore(probability); + double score = probability > modelConfig.maximumAnomalousProbability() + ? 0.0 + : maths::CTools::anomalyScore(probability); normalized.push_back(extract.leafNodes()[j]->s_NormalizedAnomalyScore); CPPUNIT_ASSERT(itr->second->normalize(score)); expectedNormalized.push_back(score); } } LOG_DEBUG(<< "* leaf *") - LOG_DEBUG(<< "expectedNormalized = " << core::CContainerPrinter::print(expectedNormalized)); + LOG_DEBUG(<< "expectedNormalized = " + << core::CContainerPrinter::print(expectedNormalized)); LOG_DEBUG(<< "normalized = " << core::CContainerPrinter::print(normalized)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedNormalized), core::CContainerPrinter::print(normalized)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedNormalized), + core::CContainerPrinter::print(normalized)); normalized.clear(); expectedNormalized.clear(); for (std::size_t j = 0u; j < extract.personNodes().size(); ++j) { - std::string key = - 'p' + *extract.personNodes()[j]->s_Spec.s_PartitionFieldName + ' ' + *extract.personNodes()[j]->s_Spec.s_PersonFieldName; + std::string key = 'p' + *extract.personNodes()[j]->s_Spec.s_PartitionFieldName + + ' ' + *extract.personNodes()[j]->s_Spec.s_PersonFieldName; TStrNormalizerPtrMapItr itr = expectedNormalizers.find(key); if (itr == expectedNormalizers.end()) { itr = expectedNormalizers - .insert(TStrNormalizerPtrMap::value_type(key, TNormalizerPtr(new model::CAnomalyScore::CNormalizer(modelConfig)))) + .insert(TStrNormalizerPtrMap::value_type( + key, TNormalizerPtr(new model::CAnomalyScore::CNormalizer(modelConfig)))) .first; } double probability = extract.personNodes()[j]->probability(); // This truncation condition needs to be kept the same as the one in CHierarchicalResultsNormalizer::visit() - double score = probability > modelConfig.maximumAnomalousProbability() ? 0.0 : maths::CTools::anomalyScore(probability); + double score = probability > modelConfig.maximumAnomalousProbability() + ? 0.0 + : maths::CTools::anomalyScore(probability); itr->second->updateQuantiles(score); } for (std::size_t j = 0u; j < extract.personNodes().size(); ++j) { - std::string key = - 'p' + *extract.personNodes()[j]->s_Spec.s_PartitionFieldName + ' ' + *extract.personNodes()[j]->s_Spec.s_PersonFieldName; + std::string key = 'p' + *extract.personNodes()[j]->s_Spec.s_PartitionFieldName + + ' ' + *extract.personNodes()[j]->s_Spec.s_PersonFieldName; TStrNormalizerPtrMapItr itr = expectedNormalizers.find(key); if (nodes.insert(extract.personNodes()[j]).second) { double probability = extract.personNodes()[j]->probability(); // This truncation condition needs to be kept the same as the one in CHierarchicalResultsNormalizer::visit() - double score = probability > modelConfig.maximumAnomalousProbability() ? 0.0 : maths::CTools::anomalyScore(probability); + double score = probability > modelConfig.maximumAnomalousProbability() + ? 0.0 + : maths::CTools::anomalyScore(probability); normalized.push_back(extract.personNodes()[j]->s_NormalizedAnomalyScore); CPPUNIT_ASSERT(itr->second->normalize(score)); expectedNormalized.push_back(score); } } LOG_DEBUG(<< "* person *") - LOG_DEBUG(<< "expectedNormalized = " << core::CContainerPrinter::print(expectedNormalized)); + LOG_DEBUG(<< "expectedNormalized = " + << core::CContainerPrinter::print(expectedNormalized)); LOG_DEBUG(<< "normalized = " << core::CContainerPrinter::print(normalized)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedNormalized), core::CContainerPrinter::print(normalized)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedNormalized), + core::CContainerPrinter::print(normalized)); normalized.clear(); expectedNormalized.clear(); @@ -1534,12 +1701,15 @@ void CHierarchicalResultsTest::testNormalizer() { TStrNormalizerPtrMapItr itr = expectedNormalizers.find(key); if (itr == expectedNormalizers.end()) { itr = expectedNormalizers - .insert(TStrNormalizerPtrMap::value_type(key, TNormalizerPtr(new model::CAnomalyScore::CNormalizer(modelConfig)))) + .insert(TStrNormalizerPtrMap::value_type( + key, TNormalizerPtr(new model::CAnomalyScore::CNormalizer(modelConfig)))) .first; } double probability = extract.partitionNodes()[j]->probability(); // This truncation condition needs to be kept the same as the one in CHierarchicalResultsNormalizer::visit() - double score = probability > modelConfig.maximumAnomalousProbability() ? 0.0 : maths::CTools::anomalyScore(probability); + double score = probability > modelConfig.maximumAnomalousProbability() + ? 0.0 + : maths::CTools::anomalyScore(probability); itr->second->updateQuantiles(score); } for (std::size_t j = 0u; j < extract.partitionNodes().size(); ++j) { @@ -1548,20 +1718,26 @@ void CHierarchicalResultsTest::testNormalizer() { if (nodes.insert(extract.partitionNodes()[j]).second) { double probability = extract.partitionNodes()[j]->probability(); // This truncation condition needs to be kept the same as the one in CHierarchicalResultsNormalizer::visit() - double score = probability > modelConfig.maximumAnomalousProbability() ? 0.0 : maths::CTools::anomalyScore(probability); + double score = probability > modelConfig.maximumAnomalousProbability() + ? 0.0 + : maths::CTools::anomalyScore(probability); normalized.push_back(extract.partitionNodes()[j]->s_NormalizedAnomalyScore); CPPUNIT_ASSERT(itr->second->normalize(score)); expectedNormalized.push_back(score); } } LOG_DEBUG(<< "* partition *") - LOG_DEBUG(<< "expectedNormalized = " << core::CContainerPrinter::print(expectedNormalized)); + LOG_DEBUG(<< "expectedNormalized = " + << core::CContainerPrinter::print(expectedNormalized)); LOG_DEBUG(<< "normalized = " << core::CContainerPrinter::print(normalized)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedNormalized), core::CContainerPrinter::print(normalized)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedNormalized), + core::CContainerPrinter::print(normalized)); double probability = results.root()->probability(); // This truncation condition needs to be kept the same as the one in CHierarchicalResultsNormalizer::visit() - double score = probability > modelConfig.maximumAnomalousProbability() ? 0.0 : maths::CTools::anomalyScore(probability); + double score = probability > modelConfig.maximumAnomalousProbability() + ? 0.0 + : maths::CTools::anomalyScore(probability); expectedNormalizers.find(std::string("r"))->second->updateQuantiles(score); expectedNormalizers.find(std::string("r"))->second->normalize(score); @@ -1580,7 +1756,8 @@ void CHierarchicalResultsTest::testNormalizer() { model::CHierarchicalResultsNormalizer newNormalizerJson(modelConfig); std::stringstream stream(origJson); - CPPUNIT_ASSERT_EQUAL(model::CHierarchicalResultsNormalizer::E_Ok, newNormalizerJson.fromJsonStream(stream)); + CPPUNIT_ASSERT_EQUAL(model::CHierarchicalResultsNormalizer::E_Ok, + newNormalizerJson.fromJsonStream(stream)); std::string newJson; newNormalizerJson.toJson(123, "mykey", newJson, true); @@ -1590,7 +1767,8 @@ void CHierarchicalResultsTest::testNormalizer() { void CHierarchicalResultsTest::testDetectorEqualizing() { LOG_DEBUG(<< "*** testDetectorEqualizing ***"); - model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(); + model::CAnomalyDetectorModelConfig modelConfig = + model::CAnomalyDetectorModelConfig::defaultConfig(); test::CRandomNumbers rng; { @@ -1598,20 +1776,22 @@ void CHierarchicalResultsTest::testDetectorEqualizing() { aggregator.setJob(model::CHierarchicalResultsAggregator::E_UpdateAndCorrect); CProbabilityGatherer probabilityGatherer; static const std::string FUNC("max"); - static const ml::model::function_t::EFunction function(ml::model::function_t::E_IndividualMetricMax); - - const std::string fields[][7] = {{"0", FALSE_STR, PNF1, pn11, PF1, p11, EMPTY_STRING}, - {"0", FALSE_STR, PNF1, pn12, PF1, p12, EMPTY_STRING}, - {"0", FALSE_STR, PNF1, pn11, PF1, p12, EMPTY_STRING}, - {"1", FALSE_STR, PNF1, pn11, PF1, p11, EMPTY_STRING}, - {"1", FALSE_STR, PNF1, pn12, PF1, p12, EMPTY_STRING}, - {"1", FALSE_STR, PNF1, pn11, PF1, p12, EMPTY_STRING}, - {"2", TRUE_STR, PNF1, pn12, PF1, p11, EMPTY_STRING}, - {"2", TRUE_STR, PNF1, pn12, PF1, p12, EMPTY_STRING}, - {"2", TRUE_STR, PNF1, pn11, PF1, p12, EMPTY_STRING}, - {"3", FALSE_STR, PNF1, pn11, PF1, p11, EMPTY_STRING}, - {"3", FALSE_STR, PNF1, pn12, PF1, p12, EMPTY_STRING}, - {"3", FALSE_STR, PNF1, pn12, PF1, p12, EMPTY_STRING}}; + static const ml::model::function_t::EFunction function( + ml::model::function_t::E_IndividualMetricMax); + + const std::string fields[][7] = { + {"0", FALSE_STR, PNF1, pn11, PF1, p11, EMPTY_STRING}, + {"0", FALSE_STR, PNF1, pn12, PF1, p12, EMPTY_STRING}, + {"0", FALSE_STR, PNF1, pn11, PF1, p12, EMPTY_STRING}, + {"1", FALSE_STR, PNF1, pn11, PF1, p11, EMPTY_STRING}, + {"1", FALSE_STR, PNF1, pn12, PF1, p12, EMPTY_STRING}, + {"1", FALSE_STR, PNF1, pn11, PF1, p12, EMPTY_STRING}, + {"2", TRUE_STR, PNF1, pn12, PF1, p11, EMPTY_STRING}, + {"2", TRUE_STR, PNF1, pn12, PF1, p12, EMPTY_STRING}, + {"2", TRUE_STR, PNF1, pn11, PF1, p12, EMPTY_STRING}, + {"3", FALSE_STR, PNF1, pn11, PF1, p11, EMPTY_STRING}, + {"3", FALSE_STR, PNF1, pn12, PF1, p12, EMPTY_STRING}, + {"3", FALSE_STR, PNF1, pn12, PF1, p12, EMPTY_STRING}}; double scales[] = {1.9, 2.5, 1.7, 2.9}; for (std::size_t i = 0u; i < 300; ++i) { @@ -1622,17 +1802,9 @@ void CHierarchicalResultsTest::testDetectorEqualizing() { TDoubleVec p; rng.generateGammaSamples(1.0, scales[detector], 1, p); p[0] = std::exp(-p[0]); - addResult(detector, - fields[j][1] == TRUE_STR, - FUNC, - function, - fields[j][2], - fields[j][3], - fields[j][4], - fields[j][5], - fields[j][6], - p[0], - results); + addResult(detector, fields[j][1] == TRUE_STR, FUNC, function, + fields[j][2], fields[j][3], fields[j][4], + fields[j][5], fields[j][6], p[0], results); } results.buildHierarchy(); results.bottomUpBreadthFirst(aggregator); @@ -1646,17 +1818,9 @@ void CHierarchicalResultsTest::testDetectorEqualizing() { TDoubleVec p; rng.generateGammaSamples(1.0, scales[detector], 1, p); p[0] = std::exp(-p[0]); - addResult(detector, - fields[j][1] == TRUE_STR, - FUNC, - function, - fields[j][2], - fields[j][3], - fields[j][4], - fields[j][5], - fields[j][6], - p[0], - results); + addResult(detector, fields[j][1] == TRUE_STR, FUNC, function, + fields[j][2], fields[j][3], fields[j][4], + fields[j][5], fields[j][6], p[0], results); } results.buildHierarchy(); results.bottomUpBreadthFirst(aggregator); @@ -1682,8 +1846,9 @@ void CHierarchicalResultsTest::testDetectorEqualizing() { core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - CPPUNIT_ASSERT(traverser.traverseSubLevel( - boost::bind(&model::CHierarchicalResultsAggregator::acceptRestoreTraverser, &restoredAggregator, _1))); + CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind( + &model::CHierarchicalResultsAggregator::acceptRestoreTraverser, + &restoredAggregator, _1))); } // Checksums should agree. @@ -1702,10 +1867,12 @@ void CHierarchicalResultsTest::testDetectorEqualizing() { model::CHierarchicalResultsAggregator aggregator(modelConfig); aggregator.setJob(model::CHierarchicalResultsAggregator::E_UpdateAndCorrect); static const std::string FUNC("max"); - static const ml::model::function_t::EFunction function(ml::model::function_t::E_IndividualMetricMax); + static const ml::model::function_t::EFunction function( + ml::model::function_t::E_IndividualMetricMax); - const std::string fields[][7] = {{"0", FALSE_STR, PNF1, pn11, PF1, p11, EMPTY_STRING}, - {"1", FALSE_STR, PNF1, pn11, PF1, p11, EMPTY_STRING}}; + const std::string fields[][7] = { + {"0", FALSE_STR, PNF1, pn11, PF1, p11, EMPTY_STRING}, + {"1", FALSE_STR, PNF1, pn11, PF1, p11, EMPTY_STRING}}; double scales[] = {1.0, 3.5}; for (std::size_t i = 0u; i < 500; ++i) { @@ -1716,17 +1883,9 @@ void CHierarchicalResultsTest::testDetectorEqualizing() { TDoubleVec p; rng.generateGammaSamples(1.0, scales[detector], 1, p); p[0] = std::exp(-p[0]); - addResult(detector, - fields[j][1] == TRUE_STR, - FUNC, - function, - fields[j][2], - fields[j][3], - fields[j][4], - fields[j][5], - fields[j][6], - p[0], - results); + addResult(detector, fields[j][1] == TRUE_STR, FUNC, function, + fields[j][2], fields[j][3], fields[j][4], + fields[j][5], fields[j][6], p[0], results); } results.buildHierarchy(); results.bottomUpBreadthFirst(aggregator); @@ -1743,22 +1902,15 @@ void CHierarchicalResultsTest::testDetectorEqualizing() { TDoubleVec p; rng.generateGammaSamples(1.0, scales[detector], 1, p); p[0] = detector == 0 && i == 70 ? 2.1e-5 : std::exp(-p[0]); - addResult(detector, - fields[j][1] == TRUE_STR, - FUNC, - function, - fields[j][2], - fields[j][3], - fields[j][4], - fields[j][5], - fields[j][6], - p[0], - results); + addResult(detector, fields[j][1] == TRUE_STR, FUNC, function, + fields[j][2], fields[j][3], fields[j][4], + fields[j][5], fields[j][6], p[0], results); } results.buildHierarchy(); results.bottomUpBreadthFirst(aggregator); - mostAnomalous.add(std::make_pair(results.root()->s_AnnotatedProbability.s_Probability, i)); + mostAnomalous.add(std::make_pair( + results.root()->s_AnnotatedProbability.s_Probability, i)); } mostAnomalous.sort(); @@ -1778,7 +1930,8 @@ void CHierarchicalResultsTest::testShouldWritePartition() { std::string partition2("par_2"); static const std::string FUNC("mean"); - static const ml::model::function_t::EFunction function(ml::model::function_t::E_IndividualMetricMean); + static const ml::model::function_t::EFunction function( + ml::model::function_t::E_IndividualMetricMean); model::CHierarchicalResults results; addResult(1, false, FUNC, function, PART1, partition1, PERS, pers1, VAL1, 0.001, results); @@ -1801,49 +1954,59 @@ void CHierarchicalResultsTest::testShouldWritePartition() { CPPUNIT_ASSERT_EQUAL(std::size_t(2), extract.personNodes().size()); CPPUNIT_ASSERT_EQUAL(std::size_t(3), extract.leafNodes().size()); - LOG_DEBUG(<< "Partition 1 child count " << extract.partitionNodes()[0]->s_Children.size()); - LOG_DEBUG(<< "Partition 2 child count " << extract.partitionNodes()[1]->s_Children.size()); + LOG_DEBUG(<< "Partition 1 child count " + << extract.partitionNodes()[0]->s_Children.size()); + LOG_DEBUG(<< "Partition 2 child count " + << extract.partitionNodes()[1]->s_Children.size()); CPPUNIT_ASSERT_EQUAL(std::size_t(0), extract.partitionNodes()[0]->s_Children.size()); CPPUNIT_ASSERT_EQUAL(std::size_t(2), extract.partitionNodes()[1]->s_Children.size()); - model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(); + model::CAnomalyDetectorModelConfig modelConfig = + model::CAnomalyDetectorModelConfig::defaultConfig(); ml::model::CHierarchicalResultsAggregator aggregator(modelConfig); results.bottomUpBreadthFirst(aggregator); model::CLimits limits; - CPPUNIT_ASSERT(ml::model::CHierarchicalResultsVisitor::shouldWriteResult(limits, results, *extract.partitionNodes()[0], false)); - CPPUNIT_ASSERT(ml::model::CHierarchicalResultsVisitor::shouldWriteResult(limits, results, *extract.partitionNodes()[1], false)); + CPPUNIT_ASSERT(ml::model::CHierarchicalResultsVisitor::shouldWriteResult( + limits, results, *extract.partitionNodes()[0], false)); + CPPUNIT_ASSERT(ml::model::CHierarchicalResultsVisitor::shouldWriteResult( + limits, results, *extract.partitionNodes()[1], false)); } CppUnit::Test* CHierarchicalResultsTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CHierarchicalResultsTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CHierarchicalResultsTest::testBreadthFirstVisit", - &CHierarchicalResultsTest::testBreadthFirstVisit)); - suiteOfTests->addTest(new CppUnit::TestCaller("CHierarchicalResultsTest::testpostorderDepthFirstVisit", - &CHierarchicalResultsTest::testDepthFirstVisit)); - suiteOfTests->addTest(new CppUnit::TestCaller("CHierarchicalResultsTest::testBuildHierarchy", - &CHierarchicalResultsTest::testBuildHierarchy)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CHierarchicalResultsTest::testBreadthFirstVisit", + &CHierarchicalResultsTest::testBreadthFirstVisit)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CHierarchicalResultsTest::testpostorderDepthFirstVisit", + &CHierarchicalResultsTest::testDepthFirstVisit)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CHierarchicalResultsTest::testBuildHierarchy", + &CHierarchicalResultsTest::testBuildHierarchy)); suiteOfTests->addTest(new CppUnit::TestCaller( "CHierarchicalResultsTest::testBuildHierarchyGivenPartitionsWithSinglePersonFieldValue", &CHierarchicalResultsTest::testBuildHierarchyGivenPartitionsWithSinglePersonFieldValue)); - suiteOfTests->addTest(new CppUnit::TestCaller("CHierarchicalResultsTest::testBasicVisitor", - &CHierarchicalResultsTest::testBasicVisitor)); - suiteOfTests->addTest(new CppUnit::TestCaller("CHierarchicalResultsTest::testAggregator", - &CHierarchicalResultsTest::testAggregator)); - suiteOfTests->addTest(new CppUnit::TestCaller("CHierarchicalResultsTest::testInfluence", - &CHierarchicalResultsTest::testInfluence)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CHierarchicalResultsTest::testScores", &CHierarchicalResultsTest::testScores)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CHierarchicalResultsTest::testWriter", &CHierarchicalResultsTest::testWriter)); - suiteOfTests->addTest(new CppUnit::TestCaller("CHierarchicalResultsTest::testNormalizer", - &CHierarchicalResultsTest::testNormalizer)); - suiteOfTests->addTest(new CppUnit::TestCaller("CHierarchicalResultsTest::testDetectorEqualizing", - &CHierarchicalResultsTest::testDetectorEqualizing)); - suiteOfTests->addTest(new CppUnit::TestCaller("CHierarchicalResultsTest::testShouldWritePartition", - &CHierarchicalResultsTest::testShouldWritePartition)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CHierarchicalResultsTest::testBasicVisitor", &CHierarchicalResultsTest::testBasicVisitor)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CHierarchicalResultsTest::testAggregator", &CHierarchicalResultsTest::testAggregator)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CHierarchicalResultsTest::testInfluence", &CHierarchicalResultsTest::testInfluence)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CHierarchicalResultsTest::testScores", &CHierarchicalResultsTest::testScores)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CHierarchicalResultsTest::testWriter", &CHierarchicalResultsTest::testWriter)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CHierarchicalResultsTest::testNormalizer", &CHierarchicalResultsTest::testNormalizer)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CHierarchicalResultsTest::testDetectorEqualizing", + &CHierarchicalResultsTest::testDetectorEqualizing)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CHierarchicalResultsTest::testShouldWritePartition", + &CHierarchicalResultsTest::testShouldWritePartition)); return suiteOfTests; } diff --git a/lib/model/unittest/CInterimBucketCorrectorTest.cc b/lib/model/unittest/CInterimBucketCorrectorTest.cc index 03fda45b4e..3b9b4909c8 100644 --- a/lib/model/unittest/CInterimBucketCorrectorTest.cc +++ b/lib/model/unittest/CInterimBucketCorrectorTest.cc @@ -30,15 +30,16 @@ CppUnit::Test* CInterimBucketCorrectorTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CInterimBucketCorrectorTest"); suiteOfTests->addTest(new CppUnit::TestCaller( - "CInterimBucketCorrectorTest::testCorrectionsGivenSingleValue", &CInterimBucketCorrectorTest::testCorrectionsGivenSingleValue)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CInterimBucketCorrectorTest::testCorrectionsGivenSingleValueAndNoBaseline", - &CInterimBucketCorrectorTest::testCorrectionsGivenSingleValueAndNoBaseline)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CInterimBucketCorrectorTest::testCorrectionsGivenMultiValueAndMultiMode", - &CInterimBucketCorrectorTest::testCorrectionsGivenMultiValueAndMultiMode)); - suiteOfTests->addTest(new CppUnit::TestCaller("CInterimBucketCorrectorTest::testPersist", - &CInterimBucketCorrectorTest::testPersist)); + "CInterimBucketCorrectorTest::testCorrectionsGivenSingleValue", + &CInterimBucketCorrectorTest::testCorrectionsGivenSingleValue)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CInterimBucketCorrectorTest::testCorrectionsGivenSingleValueAndNoBaseline", + &CInterimBucketCorrectorTest::testCorrectionsGivenSingleValueAndNoBaseline)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CInterimBucketCorrectorTest::testCorrectionsGivenMultiValueAndMultiMode", + &CInterimBucketCorrectorTest::testCorrectionsGivenMultiValueAndMultiMode)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CInterimBucketCorrectorTest::testPersist", &CInterimBucketCorrectorTest::testPersist)); return suiteOfTests; } @@ -198,7 +199,8 @@ void CInterimBucketCorrectorTest::testPersist() { CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); CInterimBucketCorrector restoredCorrector(bucketLength); - traverser.traverseSubLevel(boost::bind(&CInterimBucketCorrector::acceptRestoreTraverser, &restoredCorrector, _1)); + traverser.traverseSubLevel(boost::bind( + &CInterimBucketCorrector::acceptRestoreTraverser, &restoredCorrector, _1)); correction = restoredCorrector.corrections(now, 50, 1000, value); CPPUNIT_ASSERT_DOUBLES_EQUAL(500.0, correction, EPSILON); diff --git a/lib/model/unittest/CLimitsTest.cc b/lib/model/unittest/CLimitsTest.cc index 31af2c416a..aaa7f3c198 100644 --- a/lib/model/unittest/CLimitsTest.cc +++ b/lib/model/unittest/CLimitsTest.cc @@ -10,9 +10,12 @@ CppUnit::Test* CLimitsTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CLimitsTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CLimitsTest::testTrivial", &CLimitsTest::testTrivial)); - suiteOfTests->addTest(new CppUnit::TestCaller("CLimitsTest::testValid", &CLimitsTest::testValid)); - suiteOfTests->addTest(new CppUnit::TestCaller("CLimitsTest::testInvalid", &CLimitsTest::testInvalid)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLimitsTest::testTrivial", &CLimitsTest::testTrivial)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLimitsTest::testValid", &CLimitsTest::testValid)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CLimitsTest::testInvalid", &CLimitsTest::testInvalid)); return suiteOfTests; } @@ -20,11 +23,16 @@ CppUnit::Test* CLimitsTest::suite() { void CLimitsTest::testTrivial() { ml::model::CLimits config; - CPPUNIT_ASSERT_EQUAL(ml::model::CLimits::DEFAULT_AUTOCONFIG_EVENTS, config.autoConfigEvents()); - CPPUNIT_ASSERT_EQUAL(ml::model::CLimits::DEFAULT_ANOMALY_MAX_TIME_BUCKETS, config.anomalyMaxTimeBuckets()); - CPPUNIT_ASSERT_EQUAL(ml::model::CLimits::DEFAULT_RESULTS_MAX_EXAMPLES, config.maxExamples()); - CPPUNIT_ASSERT_EQUAL(ml::model::CLimits::DEFAULT_RESULTS_UNUSUAL_PROBABILITY_THRESHOLD / 100.0, config.unusualProbabilityThreshold()); - CPPUNIT_ASSERT_EQUAL(ml::model::CResourceMonitor::DEFAULT_MEMORY_LIMIT_MB, config.memoryLimitMB()); + CPPUNIT_ASSERT_EQUAL(ml::model::CLimits::DEFAULT_AUTOCONFIG_EVENTS, + config.autoConfigEvents()); + CPPUNIT_ASSERT_EQUAL(ml::model::CLimits::DEFAULT_ANOMALY_MAX_TIME_BUCKETS, + config.anomalyMaxTimeBuckets()); + CPPUNIT_ASSERT_EQUAL(ml::model::CLimits::DEFAULT_RESULTS_MAX_EXAMPLES, + config.maxExamples()); + CPPUNIT_ASSERT_EQUAL(ml::model::CLimits::DEFAULT_RESULTS_UNUSUAL_PROBABILITY_THRESHOLD / 100.0, + config.unusualProbabilityThreshold()); + CPPUNIT_ASSERT_EQUAL(ml::model::CResourceMonitor::DEFAULT_MEMORY_LIMIT_MB, + config.memoryLimitMB()); } void CLimitsTest::testValid() { @@ -32,7 +40,8 @@ void CLimitsTest::testValid() { CPPUNIT_ASSERT(config.init("testfiles/mllimits.conf")); // This one isn't present in the config file so should be defaulted - CPPUNIT_ASSERT_EQUAL(ml::model::CLimits::DEFAULT_ANOMALY_MAX_TIME_BUCKETS, config.anomalyMaxTimeBuckets()); + CPPUNIT_ASSERT_EQUAL(ml::model::CLimits::DEFAULT_ANOMALY_MAX_TIME_BUCKETS, + config.anomalyMaxTimeBuckets()); CPPUNIT_ASSERT_EQUAL(size_t(8), config.maxExamples()); diff --git a/lib/model/unittest/CMemoryUsageEstimatorTest.cc b/lib/model/unittest/CMemoryUsageEstimatorTest.cc index 19180a4633..0ff6c33934 100644 --- a/lib/model/unittest/CMemoryUsageEstimatorTest.cc +++ b/lib/model/unittest/CMemoryUsageEstimatorTest.cc @@ -32,8 +32,10 @@ void addValue(CMemoryUsageEstimator& estimator, estimator.addValue(predictors, memory); } -CMemoryUsageEstimator::TOptionalSize -estimate(CMemoryUsageEstimator& estimator, std::size_t people, std::size_t attributes, std::size_t correlations = 0) { +CMemoryUsageEstimator::TOptionalSize estimate(CMemoryUsageEstimator& estimator, + std::size_t people, + std::size_t attributes, + std::size_t correlations = 0) { CMemoryUsageEstimator::TSizeArray predictors; predictors[CMemoryUsageEstimator::E_People] = people; predictors[CMemoryUsageEstimator::E_Attributes] = attributes; @@ -130,18 +132,26 @@ void CMemoryUsageEstimatorTest::testEstimateNonlinear() { int cScale = 30; CMemoryUsageEstimator estimator; - addValue(estimator, pScale * 10 * 10 + aScale * 9 * 9 + cScale * 15 * 15, 10, 9, 15); - addValue(estimator, pScale * 11 * 11 + aScale * 11 * 11 + cScale * 20 * 20, 11, 11, 20); - addValue(estimator, pScale * 12 * 12 + aScale * 13 * 13 + cScale * 25 * 25, 12, 13, 25); - addValue(estimator, pScale * 13 * 13 + aScale * 15 * 15 + cScale * 26 * 26, 13, 15, 26); - addValue(estimator, pScale * 17 * 17 + aScale * 19 * 19 + cScale * 27 * 27, 17, 19, 27); - addValue(estimator, pScale * 20 * 20 + aScale * 19 * 19 + cScale * 30 * 30, 20, 19, 30); - addValue(estimator, pScale * 20 * 20 + aScale * 25 * 25 + cScale * 40 * 40, 20, 25, 40); + addValue(estimator, pScale * 10 * 10 + aScale * 9 * 9 + cScale * 15 * 15, + 10, 9, 15); + addValue(estimator, pScale * 11 * 11 + aScale * 11 * 11 + cScale * 20 * 20, + 11, 11, 20); + addValue(estimator, pScale * 12 * 12 + aScale * 13 * 13 + cScale * 25 * 25, + 12, 13, 25); + addValue(estimator, pScale * 13 * 13 + aScale * 15 * 15 + cScale * 26 * 26, + 13, 15, 26); + addValue(estimator, pScale * 17 * 17 + aScale * 19 * 19 + cScale * 27 * 27, + 17, 19, 27); + addValue(estimator, pScale * 20 * 20 + aScale * 19 * 19 + cScale * 30 * 30, + 20, 19, 30); + addValue(estimator, pScale * 20 * 20 + aScale * 25 * 25 + cScale * 40 * 40, + 20, 25, 40); CMemoryUsageEstimator::TOptionalSize mem = estimate(estimator, 25, 35, 45); std::size_t actual = pScale * 25 * 25 + aScale * 35 * 35 + cScale * 45 * 45; LOG_DEBUG(<< "actual = " << actual << ", estimated = " << mem.get()); - CPPUNIT_ASSERT(static_cast(actual - mem.get()) / static_cast(actual) < 0.15); + CPPUNIT_ASSERT( + static_cast(actual - mem.get()) / static_cast(actual) < 0.15); } } @@ -164,7 +174,8 @@ void CMemoryUsageEstimatorTest::testPersist() { core::CRapidXmlStateRestoreTraverser traverser(parser); CMemoryUsageEstimator restoredEstimator; - CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind(&CMemoryUsageEstimator::acceptRestoreTraverser, &restoredEstimator, _1))); + CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind( + &CMemoryUsageEstimator::acceptRestoreTraverser, &restoredEstimator, _1))); // The XML representation of the new data gatherer should be the same // as the original. @@ -201,7 +212,8 @@ void CMemoryUsageEstimatorTest::testPersist() { core::CRapidXmlStateRestoreTraverser traverser(parser); CMemoryUsageEstimator restoredEstimator; - CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind(&CMemoryUsageEstimator::acceptRestoreTraverser, &restoredEstimator, _1))); + CPPUNIT_ASSERT(traverser.traverseSubLevel(boost::bind( + &CMemoryUsageEstimator::acceptRestoreTraverser, &restoredEstimator, _1))); // The XML representation of the new data gatherer should be the same // as the original. @@ -218,12 +230,14 @@ void CMemoryUsageEstimatorTest::testPersist() { CppUnit::Test* CMemoryUsageEstimatorTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CMemoryUsageEstimatorTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CMemoryUsageEstimatorTest::testEstimateLinear", - &CMemoryUsageEstimatorTest::testEstimateLinear)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMemoryUsageEstimatorTest::testEstimateNonlinear", - &CMemoryUsageEstimatorTest::testEstimateNonlinear)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMemoryUsageEstimatorTest::testPersist", - &CMemoryUsageEstimatorTest::testPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMemoryUsageEstimatorTest::testEstimateLinear", + &CMemoryUsageEstimatorTest::testEstimateLinear)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMemoryUsageEstimatorTest::testEstimateNonlinear", + &CMemoryUsageEstimatorTest::testEstimateNonlinear)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMemoryUsageEstimatorTest::testPersist", &CMemoryUsageEstimatorTest::testPersist)); return suiteOfTests; } diff --git a/lib/model/unittest/CMetricAnomalyDetectorTest.cc b/lib/model/unittest/CMetricAnomalyDetectorTest.cc index 70581d26bd..98bac04401 100644 --- a/lib/model/unittest/CMetricAnomalyDetectorTest.cc +++ b/lib/model/unittest/CMetricAnomalyDetectorTest.cc @@ -51,10 +51,14 @@ class CResultWriter : public ml::model::CHierarchicalResultsVisitor { static const double HIGH_ANOMALY_SCORE; public: - CResultWriter(const model::CAnomalyDetectorModelConfig& modelConfig, const model::CLimits& limits, core_t::TTime bucketLength) + CResultWriter(const model::CAnomalyDetectorModelConfig& modelConfig, + const model::CLimits& limits, + core_t::TTime bucketLength) : m_ModelConfig(modelConfig), m_Limits(limits), m_BucketLength(bucketLength) {} - void operator()(ml::model::CAnomalyDetector& detector, ml::core_t::TTime start, ml::core_t::TTime end) { + void operator()(ml::model::CAnomalyDetector& detector, + ml::core_t::TTime start, + ml::core_t::TTime end) { ml::model::CHierarchicalResults results; detector.buildResults(start, end, results); results.buildHierarchy(); @@ -68,7 +72,9 @@ class CResultWriter : public ml::model::CHierarchicalResultsVisitor { } //! Visit a node. - virtual void visit(const ml::model::CHierarchicalResults& results, const ml::model::CHierarchicalResults::TNode& node, bool pivot) { + virtual void visit(const ml::model::CHierarchicalResults& results, + const ml::model::CHierarchicalResults::TNode& node, + bool pivot) { if (pivot) { return; } @@ -99,16 +105,23 @@ class CResultWriter : public ml::model::CHierarchicalResultsVisitor { } } - bool operator()(ml::core_t::TTime time, const ml::model::CHierarchicalResults::TNode& node, bool isBucketInfluencer) { - LOG_DEBUG(<< (isBucketInfluencer ? "BucketInfluencer" : "Influencer ") << node.s_Spec.print() << " initial score " + bool operator()(ml::core_t::TTime time, + const ml::model::CHierarchicalResults::TNode& node, + bool isBucketInfluencer) { + LOG_DEBUG(<< (isBucketInfluencer ? "BucketInfluencer" : "Influencer ") + << node.s_Spec.print() << " initial score " << node.probability() << ", time: " << time); return true; } - const TTimeTimePrVec& highAnomalyTimes() const { return m_HighAnomalyTimes; } + const TTimeTimePrVec& highAnomalyTimes() const { + return m_HighAnomalyTimes; + } - const TDoubleVec& highAnomalyFactors() const { return m_HighAnomalyFactors; } + const TDoubleVec& highAnomalyFactors() const { + return m_HighAnomalyFactors; + } const TDoubleVec& anomalyFactors() const { return m_AnomalyFactors; } @@ -239,30 +252,28 @@ void CMetricAnomalyDetectorTest::testAnomalies() { static const core_t::TTime FIRST_TIME(1360540800); static const core_t::TTime LAST_TIME(FIRST_TIME + 86400); - static const core_t::TTime BUCKET_LENGTHS[] = {120, 150, 180, 210, 240, 300, 450, 600, 900, 1200}; - static const TTimeTimePr ANOMALOUS_INTERVALS[] = {TTimeTimePr(1360576852, 1360578629), TTimeTimePr(1360617335, 1360617481)}; + static const core_t::TTime BUCKET_LENGTHS[] = {120, 150, 180, 210, 240, + 300, 450, 600, 900, 1200}; + static const TTimeTimePr ANOMALOUS_INTERVALS[] = { + TTimeTimePr(1360576852, 1360578629), TTimeTimePr(1360617335, 1360617481)}; double highRateNoise = 0.0; double lowRateNoise = 0.0; for (size_t i = 0; i < boost::size(BUCKET_LENGTHS); ++i) { - model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTHS[i]); + model::CAnomalyDetectorModelConfig modelConfig = + model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTHS[i]); model::CLimits limits; model::CSearchKey key(1, // identifier - model::function_t::E_IndividualMetric, - false, - model_t::E_XF_None, - "n/a", - "n/a"); + model::function_t::E_IndividualMetric, false, + model_t::E_XF_None, "n/a", "n/a"); model::CAnomalyDetector detector(1, // identifier - limits, - modelConfig, - "", - FIRST_TIME, + limits, modelConfig, "", FIRST_TIME, modelConfig.factory(key)); CResultWriter writer(modelConfig, limits, BUCKET_LENGTHS[i]); - importData(FIRST_TIME, LAST_TIME, BUCKET_LENGTHS[i], writer, "testfiles/variable_rate_metric.data", detector); + importData(FIRST_TIME, LAST_TIME, BUCKET_LENGTHS[i], writer, + "testfiles/variable_rate_metric.data", detector); TTimeTimePrVec highAnomalyTimes(writer.highAnomalyTimes()); TDoubleVec highAnomalyFactors(writer.highAnomalyFactors()); @@ -270,20 +281,25 @@ void CMetricAnomalyDetectorTest::testAnomalies() { TDoubleVec anomalyRates(writer.anomalyRates()); LOG_DEBUG(<< "bucket length = " << BUCKET_LENGTHS[i]); - LOG_DEBUG(<< "high anomalies in = " << core::CContainerPrinter::print(highAnomalyTimes)); - LOG_DEBUG(<< "high anomaly factors = " << core::CContainerPrinter::print(highAnomalyFactors)); + LOG_DEBUG(<< "high anomalies in = " + << core::CContainerPrinter::print(highAnomalyTimes)); + LOG_DEBUG(<< "high anomaly factors = " + << core::CContainerPrinter::print(highAnomalyFactors)); LOG_DEBUG(<< "anomaly factors = " << core::CContainerPrinter::print(anomalyFactors)); LOG_DEBUG(<< "anomaly rates = " << core::CContainerPrinter::print(anomalyRates)); for (std::size_t j = 0u; j < highAnomalyTimes.size(); ++j) { - LOG_DEBUG(<< "Testing " << core::CContainerPrinter::print(highAnomalyTimes[j]) << ' ' << highAnomalyFactors[j]); + LOG_DEBUG(<< "Testing " << core::CContainerPrinter::print(highAnomalyTimes[j]) + << ' ' << highAnomalyFactors[j]); CPPUNIT_ASSERT(doIntersect(highAnomalyTimes[j], ANOMALOUS_INTERVALS[0]) || doIntersect(highAnomalyTimes[j], ANOMALOUS_INTERVALS[1])); } if (!anomalyFactors.empty()) { - double signal = std::accumulate(highAnomalyFactors.begin(), highAnomalyFactors.end(), 0.0); - double noise = std::accumulate(anomalyFactors.begin(), anomalyFactors.end(), 0.0); + double signal = std::accumulate(highAnomalyFactors.begin(), + highAnomalyFactors.end(), 0.0); + double noise = std::accumulate(anomalyFactors.begin(), + anomalyFactors.end(), 0.0); LOG_DEBUG(<< "S/N = " << (signal / noise)); CPPUNIT_ASSERT(signal / noise > 90.0); } @@ -293,19 +309,22 @@ void CMetricAnomalyDetectorTest::testAnomalies() { std::sort(orderedAnomalyRates.begin(), orderedAnomalyRates.end()); std::size_t maxStep = 1; for (std::size_t j = 2; j < orderedAnomalyRates.size(); ++j) { - if (orderedAnomalyRates[j] - orderedAnomalyRates[j - 1] > orderedAnomalyRates[maxStep] - orderedAnomalyRates[maxStep - 1]) { + if (orderedAnomalyRates[j] - orderedAnomalyRates[j - 1] > + orderedAnomalyRates[maxStep] - orderedAnomalyRates[maxStep - 1]) { maxStep = j; } } double partitionRate = 0.0; if (maxStep < orderedAnomalyRates.size()) { - partitionRate = 0.5 * (orderedAnomalyRates[maxStep] + orderedAnomalyRates[maxStep - 1]); + partitionRate = 0.5 * (orderedAnomalyRates[maxStep] + + orderedAnomalyRates[maxStep - 1]); } LOG_DEBUG(<< "partition rate = " << partitionRate); // Compute the ratio of noise in the two rate channels. for (std::size_t j = 0u; j < anomalyFactors.size(); ++j) { - (anomalyRates[j] > partitionRate ? highRateNoise : lowRateNoise) += anomalyFactors[j]; + (anomalyRates[j] > partitionRate ? highRateNoise : lowRateNoise) += + anomalyFactors[j]; } } @@ -320,23 +339,19 @@ void CMetricAnomalyDetectorTest::testPersist() { static const core_t::TTime LAST_TIME(FIRST_TIME + 86400); static const core_t::TTime BUCKET_LENGTH(300); - model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); + model::CAnomalyDetectorModelConfig modelConfig = + model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); model::CLimits limits; model::CSearchKey key(1, // identifier - model::function_t::E_IndividualMetric, - false, - model_t::E_XF_None, - "responsetime", - "Airline"); + model::function_t::E_IndividualMetric, false, + model_t::E_XF_None, "responsetime", "Airline"); model::CAnomalyDetector origDetector(1, // identifier - limits, - modelConfig, - EMPTY_STRING, - FIRST_TIME, - modelConfig.factory(key)); + limits, modelConfig, EMPTY_STRING, + FIRST_TIME, modelConfig.factory(key)); CResultWriter writer(modelConfig, limits, BUCKET_LENGTH); - importData(FIRST_TIME, LAST_TIME, BUCKET_LENGTH, writer, "testfiles/variable_rate_metric.data", origDetector); + importData(FIRST_TIME, LAST_TIME, BUCKET_LENGTH, writer, + "testfiles/variable_rate_metric.data", origDetector); std::string origXml; { @@ -349,17 +364,15 @@ void CMetricAnomalyDetectorTest::testPersist() { // Restore the XML into a new detector model::CAnomalyDetector restoredDetector(1, // identifier - limits, - modelConfig, - EMPTY_STRING, - 0, - modelConfig.factory(key)); + limits, modelConfig, EMPTY_STRING, + 0, modelConfig.factory(key)); { core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - CPPUNIT_ASSERT( - traverser.traverseSubLevel(boost::bind(&model::CAnomalyDetector::acceptRestoreTraverser, &restoredDetector, EMPTY_STRING, _1))); + CPPUNIT_ASSERT(traverser.traverseSubLevel( + boost::bind(&model::CAnomalyDetector::acceptRestoreTraverser, + &restoredDetector, EMPTY_STRING, _1))); } // The XML representation of the new typer should be the same as the original @@ -377,58 +390,54 @@ void CMetricAnomalyDetectorTest::testExcludeFrequent() { static const core_t::TTime BUCKET_LENGTH(3600); { - model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); + model::CAnomalyDetectorModelConfig modelConfig = + model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); model::CLimits limits; model::CSearchKey key(1, // identifier - model::function_t::E_IndividualMetric, - false, - model_t::E_XF_None, - "bytes", - "host"); + model::function_t::E_IndividualMetric, false, + model_t::E_XF_None, "bytes", "host"); model::CAnomalyDetector detector(1, // identifier - limits, - modelConfig, - "", - FIRST_TIME, + limits, modelConfig, "", FIRST_TIME, modelConfig.factory(key)); CResultWriter writer(modelConfig, limits, BUCKET_LENGTH); - importCsvData(FIRST_TIME, BUCKET_LENGTH, writer, "testfiles/excludefrequent_two_series.txt", detector); + importCsvData(FIRST_TIME, BUCKET_LENGTH, writer, + "testfiles/excludefrequent_two_series.txt", detector); TTimeTimePrVec highAnomalyTimes(writer.highAnomalyTimes()); TDoubleVec highAnomalyFactors(writer.highAnomalyFactors()); - LOG_DEBUG(<< "high anomalies in = " << core::CContainerPrinter::print(highAnomalyTimes)); - LOG_DEBUG(<< "high anomaly factors = " << core::CContainerPrinter::print(highAnomalyFactors)); + LOG_DEBUG(<< "high anomalies in = " + << core::CContainerPrinter::print(highAnomalyTimes)); + LOG_DEBUG(<< "high anomaly factors = " + << core::CContainerPrinter::print(highAnomalyFactors)); // expect there to be 2 anomalies CPPUNIT_ASSERT_EQUAL(std::size_t(2), highAnomalyTimes.size()); CPPUNIT_ASSERT_DOUBLES_EQUAL(99.0, highAnomalyFactors[1], 0.5); } { - model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); + model::CAnomalyDetectorModelConfig modelConfig = + model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); model::CLimits limits; model::CSearchKey key(1, // identifier - model::function_t::E_IndividualMetric, - false, - model_t::E_XF_By, - "bytes", - "host"); + model::function_t::E_IndividualMetric, false, + model_t::E_XF_By, "bytes", "host"); model::CAnomalyDetector detector(1, // identifier - limits, - modelConfig, - "", - FIRST_TIME, + limits, modelConfig, "", FIRST_TIME, modelConfig.factory(key)); CResultWriter writer(modelConfig, limits, BUCKET_LENGTH); - importCsvData(FIRST_TIME, BUCKET_LENGTH, writer, "testfiles/excludefrequent_two_series.txt", detector); + importCsvData(FIRST_TIME, BUCKET_LENGTH, writer, + "testfiles/excludefrequent_two_series.txt", detector); TTimeTimePrVec highAnomalyTimes(writer.highAnomalyTimes()); TDoubleVec highAnomalyFactors(writer.highAnomalyFactors()); - LOG_DEBUG(<< "high anomalies in = " << core::CContainerPrinter::print(highAnomalyTimes)); - LOG_DEBUG(<< "high anomaly factors = " << core::CContainerPrinter::print(highAnomalyFactors)); + LOG_DEBUG(<< "high anomalies in = " + << core::CContainerPrinter::print(highAnomalyTimes)); + LOG_DEBUG(<< "high anomaly factors = " + << core::CContainerPrinter::print(highAnomalyFactors)); // expect there to be 1 anomaly CPPUNIT_ASSERT_EQUAL(std::size_t(1), highAnomalyTimes.size()); @@ -439,12 +448,13 @@ void CMetricAnomalyDetectorTest::testExcludeFrequent() { CppUnit::Test* CMetricAnomalyDetectorTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CMetricAnomalyDetectorTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CMetricAnomalyDetectorTest::testAnomalies", - &CMetricAnomalyDetectorTest::testAnomalies)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMetricAnomalyDetectorTest::testPersist", - &CMetricAnomalyDetectorTest::testPersist)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMetricAnomalyDetectorTest::testExcludeFrequent", - &CMetricAnomalyDetectorTest::testExcludeFrequent)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricAnomalyDetectorTest::testAnomalies", &CMetricAnomalyDetectorTest::testAnomalies)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricAnomalyDetectorTest::testPersist", &CMetricAnomalyDetectorTest::testPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricAnomalyDetectorTest::testExcludeFrequent", + &CMetricAnomalyDetectorTest::testExcludeFrequent)); return suiteOfTests; } diff --git a/lib/model/unittest/CMetricDataGathererTest.cc b/lib/model/unittest/CMetricDataGathererTest.cc index 67c09927e9..1272ec28cf 100644 --- a/lib/model/unittest/CMetricDataGathererTest.cc +++ b/lib/model/unittest/CMetricDataGathererTest.cc @@ -49,7 +49,10 @@ using TTimeDoublePrVec = std::vector; using TTimeDoublePrVecVec = std::vector; using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; -std::size_t addPerson(const std::string& p, CDataGatherer& gatherer, CResourceMonitor& resourceMonitor, std::size_t numInfluencers = 0) { +std::size_t addPerson(const std::string& p, + CDataGatherer& gatherer, + CResourceMonitor& resourceMonitor, + std::size_t numInfluencers = 0) { CDataGatherer::TStrCPtrVec person; person.push_back(&p); std::string i("i"); @@ -62,10 +65,15 @@ std::size_t addPerson(const std::string& p, CDataGatherer& gatherer, CResourceMo return *result.personId(); } -void addArrival(CDataGatherer& gatherer, CResourceMonitor& resourceMonitor, core_t::TTime time, const std::string& person, double value) { +void addArrival(CDataGatherer& gatherer, + CResourceMonitor& resourceMonitor, + core_t::TTime time, + const std::string& person, + double value) { CDataGatherer::TStrCPtrVec fieldValues; fieldValues.push_back(&person); - std::string valueAsString(core::CStringUtils::typeToStringPrecise(value, core::CIEEE754::E_DoublePrecision)); + std::string valueAsString(core::CStringUtils::typeToStringPrecise( + value, core::CIEEE754::E_DoublePrecision)); fieldValues.push_back(&valueAsString); CEventData eventData; @@ -116,7 +124,8 @@ void addArrival(CDataGatherer& gatherer, } double doubleToStringToDouble(double value) { - std::string valueAsString(core::CStringUtils::typeToStringPrecise(value, core::CIEEE754::E_DoublePrecision)); + std::string valueAsString(core::CStringUtils::typeToStringPrecise( + value, core::CIEEE754::E_DoublePrecision)); double result(0.0); core::CStringUtils::stringToType(valueAsString, result); return result; @@ -161,18 +170,18 @@ void CMetricDataGathererTest::singleSeriesTests() { const core_t::TTime startTime = 0; const core_t::TTime bucketLength = 600; - TTimeDoublePr bucket1[] = {TTimeDoublePr(1, 1.0), - TTimeDoublePr(15, 2.1), - TTimeDoublePr(180, 0.9), - TTimeDoublePr(190, 1.5), - TTimeDoublePr(400, 1.5), - TTimeDoublePr(550, 2.0)}; - TTimeDoublePr bucket2[] = {TTimeDoublePr(600, 2.0), TTimeDoublePr(799, 2.2), TTimeDoublePr(1199, 1.8)}; + TTimeDoublePr bucket1[] = { + TTimeDoublePr(1, 1.0), TTimeDoublePr(15, 2.1), + TTimeDoublePr(180, 0.9), TTimeDoublePr(190, 1.5), + TTimeDoublePr(400, 1.5), TTimeDoublePr(550, 2.0)}; + TTimeDoublePr bucket2[] = {TTimeDoublePr(600, 2.0), TTimeDoublePr(799, 2.2), + TTimeDoublePr(1199, 1.8)}; TTimeDoublePr bucket3[] = {TTimeDoublePr(1200, 2.1), TTimeDoublePr(1250, 2.5)}; TTimeDoublePr bucket4[] = { TTimeDoublePr(1900, 3.5), }; - TTimeDoublePr bucket5[] = {TTimeDoublePr(2420, 3.5), TTimeDoublePr(2480, 3.2), TTimeDoublePr(2490, 3.8)}; + TTimeDoublePr bucket5[] = {TTimeDoublePr(2420, 3.5), TTimeDoublePr(2480, 3.2), + TTimeDoublePr(2490, 3.8)}; { TFeatureVec features; features.push_back(model_t::E_IndividualMeanByPerson); @@ -181,21 +190,10 @@ void CMetricDataGathererTest::singleSeriesTests() { features.push_back(model_t::E_IndividualSumByBucketAndPerson); features.push_back(model_t::E_IndividualCountByBucketAndPerson); SModelParams params(bucketLength); - CDataGatherer gatherer(model_t::E_Metric, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - KEY, - features, - startTime, - 2u); + CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + TStrVec(), false, KEY, features, startTime, 2u); CPPUNIT_ASSERT(!gatherer.isPopulation()); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p", gatherer, m_ResourceMonitor)); @@ -214,14 +212,19 @@ void CMetricDataGathererTest::singleSeriesTests() { CPPUNIT_ASSERT(!gatherer.personId("a.n.other p", pid)); { - addArrival(gatherer, m_ResourceMonitor, bucket1[0].first, "p", bucket1[0].second); + addArrival(gatherer, m_ResourceMonitor, bucket1[0].first, "p", + bucket1[0].second); TFeatureSizeFeatureDataPrVecPrVec featureData; gatherer.featureData(startTime, bucketLength, featureData); LOG_DEBUG(<< "featureData = " << core::CContainerPrinter::print(featureData)); - CPPUNIT_ASSERT_EQUAL(1.0, featureData[0].second[0].second.s_BucketValue->value()[0]); - CPPUNIT_ASSERT_EQUAL(1.0, featureData[1].second[0].second.s_BucketValue->value()[0]); - CPPUNIT_ASSERT_EQUAL(1.0, featureData[2].second[0].second.s_BucketValue->value()[0]); - CPPUNIT_ASSERT_EQUAL(1.0, featureData[3].second[0].second.s_BucketValue->value()[0]); + CPPUNIT_ASSERT_EQUAL( + 1.0, featureData[0].second[0].second.s_BucketValue->value()[0]); + CPPUNIT_ASSERT_EQUAL( + 1.0, featureData[1].second[0].second.s_BucketValue->value()[0]); + CPPUNIT_ASSERT_EQUAL( + 1.0, featureData[2].second[0].second.s_BucketValue->value()[0]); + CPPUNIT_ASSERT_EQUAL( + 1.0, featureData[3].second[0].second.s_BucketValue->value()[0]); CPPUNIT_ASSERT_EQUAL(true, featureData[0].second[0].second.s_IsInteger); CPPUNIT_ASSERT_EQUAL(true, featureData[1].second[0].second.s_IsInteger); CPPUNIT_ASSERT_EQUAL(true, featureData[2].second[0].second.s_IsInteger); @@ -229,29 +232,40 @@ void CMetricDataGathererTest::singleSeriesTests() { } for (size_t i = 1; i < boost::size(bucket1); ++i) { - addArrival(gatherer, m_ResourceMonitor, bucket1[i].first, "p", bucket1[i].second); + addArrival(gatherer, m_ResourceMonitor, bucket1[i].first, "p", + bucket1[i].second); } { TFeatureSizeFeatureDataPrVecPrVec featureData; gatherer.sampleNow(startTime); - gatherer.featureData(core_t::TTime(startTime + bucketLength - 1), bucketLength, featureData); + gatherer.featureData(core_t::TTime(startTime + bucketLength - 1), + bucketLength, featureData); LOG_DEBUG(<< "featureData = " << core::CContainerPrinter::print(featureData)); CPPUNIT_ASSERT(!featureData.empty()); - CPPUNIT_ASSERT_EQUAL(1.5, featureData[0].second[0].second.s_BucketValue->value()[0]); - CPPUNIT_ASSERT_EQUAL(0.9, featureData[1].second[0].second.s_BucketValue->value()[0]); - CPPUNIT_ASSERT_EQUAL(2.1, featureData[2].second[0].second.s_BucketValue->value()[0]); - CPPUNIT_ASSERT_EQUAL(9.0, featureData[3].second[0].second.s_BucketValue->value()[0]); + CPPUNIT_ASSERT_EQUAL( + 1.5, featureData[0].second[0].second.s_BucketValue->value()[0]); + CPPUNIT_ASSERT_EQUAL( + 0.9, featureData[1].second[0].second.s_BucketValue->value()[0]); + CPPUNIT_ASSERT_EQUAL( + 2.1, featureData[2].second[0].second.s_BucketValue->value()[0]); + CPPUNIT_ASSERT_EQUAL( + 9.0, featureData[3].second[0].second.s_BucketValue->value()[0]); CPPUNIT_ASSERT_EQUAL(false, featureData[0].second[0].second.s_IsInteger); CPPUNIT_ASSERT_EQUAL(false, featureData[1].second[0].second.s_IsInteger); CPPUNIT_ASSERT_EQUAL(false, featureData[2].second[0].second.s_IsInteger); CPPUNIT_ASSERT_EQUAL(true, featureData[3].second[0].second.s_IsInteger); - CPPUNIT_ASSERT_EQUAL(std::string("[(8 [1.55] 1 2), (185 [1.2] 1 2), (475 [1.75] 1 2)]"), - core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(8 [1] 1 2), (185 [0.9] 1 2), (475 [1.5] 1 2)]"), - core::CContainerPrinter::print(featureData[1].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(8 [2.1] 1 2), (185 [1.5] 1 2), (475 [2] 1 2)]"), - core::CContainerPrinter::print(featureData[2].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(0 [9] 1 6)]"), core::CContainerPrinter::print(featureData[3].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(8 [1.55] 1 2), (185 [1.2] 1 2), (475 [1.75] 1 2)]"), + core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(8 [1] 1 2), (185 [0.9] 1 2), (475 [1.5] 1 2)]"), + core::CContainerPrinter::print(featureData[1].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(8 [2.1] 1 2), (185 [1.5] 1 2), (475 [2] 1 2)]"), + core::CContainerPrinter::print(featureData[2].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0 [9] 1 6)]"), + core::CContainerPrinter::print( + featureData[3].second[0].second.s_Samples)); // Test persistence. (We check for idempotency.) std::string origXml; @@ -268,19 +282,10 @@ void CMetricDataGathererTest::singleSeriesTests() { CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - CDataGatherer restoredGatherer(model_t::E_Metric, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - KEY, - traverser); + CDataGatherer restoredGatherer(model_t::E_Metric, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + TStrVec(), false, KEY, traverser); // The XML representation of the new filter should be the // same as the original @@ -295,24 +300,35 @@ void CMetricDataGathererTest::singleSeriesTests() { gatherer.timeNow(startTime + bucketLength); for (size_t i = 0; i < boost::size(bucket2); ++i) { - addArrival(gatherer, m_ResourceMonitor, bucket2[i].first, "p", bucket2[i].second); + addArrival(gatherer, m_ResourceMonitor, bucket2[i].first, "p", + bucket2[i].second); } { TFeatureSizeFeatureDataPrVecPrVec featureData; gatherer.sampleNow(startTime + bucketLength); gatherer.featureData(startTime + bucketLength, bucketLength, featureData); CPPUNIT_ASSERT(!featureData.empty()); - CPPUNIT_ASSERT_EQUAL(2.0, featureData[0].second[0].second.s_BucketValue->value()[0]); - CPPUNIT_ASSERT_EQUAL(1.8, featureData[1].second[0].second.s_BucketValue->value()[0]); - CPPUNIT_ASSERT_EQUAL(2.2, featureData[2].second[0].second.s_BucketValue->value()[0]); - CPPUNIT_ASSERT_EQUAL(6.0, featureData[3].second[0].second.s_BucketValue->value()[0]); + CPPUNIT_ASSERT_EQUAL( + 2.0, featureData[0].second[0].second.s_BucketValue->value()[0]); + CPPUNIT_ASSERT_EQUAL( + 1.8, featureData[1].second[0].second.s_BucketValue->value()[0]); + CPPUNIT_ASSERT_EQUAL( + 2.2, featureData[2].second[0].second.s_BucketValue->value()[0]); + CPPUNIT_ASSERT_EQUAL( + 6.0, featureData[3].second[0].second.s_BucketValue->value()[0]); CPPUNIT_ASSERT_EQUAL(true, featureData[3].second[0].second.s_IsInteger); CPPUNIT_ASSERT_EQUAL(std::string("[(700 [2.1] 1 2)]"), - core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(700 [2] 1 2)]"), core::CContainerPrinter::print(featureData[1].second[0].second.s_Samples)); + core::CContainerPrinter::print( + featureData[0].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(700 [2] 1 2)]"), + core::CContainerPrinter::print( + featureData[1].second[0].second.s_Samples)); CPPUNIT_ASSERT_EQUAL(std::string("[(700 [2.2] 1 2)]"), - core::CContainerPrinter::print(featureData[2].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(600 [6] 1 3)]"), core::CContainerPrinter::print(featureData[3].second[0].second.s_Samples)); + core::CContainerPrinter::print( + featureData[2].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(600 [6] 1 3)]"), + core::CContainerPrinter::print( + featureData[3].second[0].second.s_Samples)); // Test persistence. (We check for idempotency.) std::string origXml; @@ -329,19 +345,10 @@ void CMetricDataGathererTest::singleSeriesTests() { CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - CDataGatherer restoredGatherer(model_t::E_Metric, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - KEY, - traverser); + CDataGatherer restoredGatherer(model_t::E_Metric, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + TStrVec(), false, KEY, traverser); // The XML representation of the new filter should be the // same as the original @@ -363,21 +370,10 @@ void CMetricDataGathererTest::singleSeriesTests() { features.push_back(model_t::E_IndividualMaxByPerson); features.push_back(model_t::E_IndividualSumByBucketAndPerson); SModelParams params(bucketLength); - CDataGatherer gatherer(model_t::E_Metric, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - KEY, - features, - startTime, - 0); + CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + TStrVec(), false, KEY, features, startTime, 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p", gatherer, m_ResourceMonitor)); TTimeDoublePrVecVec buckets; @@ -392,7 +388,8 @@ void CMetricDataGathererTest::singleSeriesTests() { gatherer.timeNow(startTime + i * bucketLength); const TTimeDoublePrVec& bucket = buckets[i]; for (std::size_t j = 0u; j < bucket.size(); ++j) { - addArrival(gatherer, m_ResourceMonitor, bucket[j].first, "p", bucket[j].second); + addArrival(gatherer, m_ResourceMonitor, bucket[j].first, "p", + bucket[j].second); } } @@ -402,19 +399,31 @@ void CMetricDataGathererTest::singleSeriesTests() { gatherer.sampleNow(featureBucketStart); gatherer.featureData(featureBucketStart, bucketLength, featureData); CPPUNIT_ASSERT(!featureData.empty()); - CPPUNIT_ASSERT_DOUBLES_EQUAL(3.5, featureData[0].second[0].second.s_BucketValue->value()[0], 1e-10); - CPPUNIT_ASSERT_EQUAL(3.2, featureData[1].second[0].second.s_BucketValue->value()[0]); - CPPUNIT_ASSERT_EQUAL(3.8, featureData[2].second[0].second.s_BucketValue->value()[0]); - CPPUNIT_ASSERT_EQUAL(10.5, featureData[3].second[0].second.s_BucketValue->value()[0]); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + 3.5, featureData[0].second[0].second.s_BucketValue->value()[0], 1e-10); + CPPUNIT_ASSERT_EQUAL( + 3.2, featureData[1].second[0].second.s_BucketValue->value()[0]); + CPPUNIT_ASSERT_EQUAL( + 3.8, featureData[2].second[0].second.s_BucketValue->value()[0]); + CPPUNIT_ASSERT_EQUAL( + 10.5, featureData[3].second[0].second.s_BucketValue->value()[0]); CPPUNIT_ASSERT_EQUAL(false, featureData[0].second[0].second.s_IsInteger); CPPUNIT_ASSERT_EQUAL(false, featureData[1].second[0].second.s_IsInteger); CPPUNIT_ASSERT_EQUAL(false, featureData[2].second[0].second.s_IsInteger); CPPUNIT_ASSERT_EQUAL(false, featureData[3].second[0].second.s_IsInteger); - CPPUNIT_ASSERT_EQUAL(std::string("[(2323 [3.5] 1 4)]"), core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(2323 [3.2] 1 4)]"), core::CContainerPrinter::print(featureData[1].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(2323 [3.8] 1 4)]"), core::CContainerPrinter::print(featureData[2].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(2400 [10.5] 1 3)]"), core::CContainerPrinter::print(featureData[3].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(2323 [3.5] 1 4)]"), + core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(2323 [3.2] 1 4)]"), + core::CContainerPrinter::print(featureData[1].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(2323 [3.8] 1 4)]"), + core::CContainerPrinter::print(featureData[2].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(2400 [10.5] 1 3)]"), + core::CContainerPrinter::print(featureData[3].second[0].second.s_Samples)); } } @@ -433,36 +442,24 @@ void CMetricDataGathererTest::multipleSeriesTests() { features.push_back(model_t::E_IndividualMaxByPerson); features.push_back(model_t::E_IndividualSumByBucketAndPerson); SModelParams params(bucketLength); - CDataGatherer gatherer(model_t::E_Metric, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - KEY, - features, - startTime, - 0); + CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, TStrVec(), false, KEY, features, startTime, 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p1", gatherer, m_ResourceMonitor)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), addPerson("p2", gatherer, m_ResourceMonitor)); - TTimeDoublePr bucket11[] = {TTimeDoublePr(1, 1.0), - TTimeDoublePr(15, 2.1), - TTimeDoublePr(180, 0.9), - TTimeDoublePr(190, 1.5), - TTimeDoublePr(400, 1.5), - TTimeDoublePr(550, 2.0)}; - TTimeDoublePr bucket12[] = {TTimeDoublePr(600, 2.0), TTimeDoublePr(799, 2.2), TTimeDoublePr(1199, 1.8)}; + TTimeDoublePr bucket11[] = { + TTimeDoublePr(1, 1.0), TTimeDoublePr(15, 2.1), + TTimeDoublePr(180, 0.9), TTimeDoublePr(190, 1.5), + TTimeDoublePr(400, 1.5), TTimeDoublePr(550, 2.0)}; + TTimeDoublePr bucket12[] = {TTimeDoublePr(600, 2.0), TTimeDoublePr(799, 2.2), + TTimeDoublePr(1199, 1.8)}; TTimeDoublePr bucket13[] = {TTimeDoublePr(1200, 2.1), TTimeDoublePr(1250, 2.5)}; TTimeDoublePr bucket14[] = { TTimeDoublePr(1900, 3.5), }; - TTimeDoublePr bucket15[] = {TTimeDoublePr(2420, 3.5), TTimeDoublePr(2480, 3.2), TTimeDoublePr(2490, 3.8)}; + TTimeDoublePr bucket15[] = {TTimeDoublePr(2420, 3.5), TTimeDoublePr(2480, 3.2), + TTimeDoublePr(2490, 3.8)}; TTimeDoublePrVecVec buckets1; buckets1.push_back(TTimeDoublePrVec(boost::begin(bucket11), boost::end(bucket11))); buckets1.push_back(TTimeDoublePrVec(boost::begin(bucket12), boost::end(bucket12))); @@ -470,29 +467,23 @@ void CMetricDataGathererTest::multipleSeriesTests() { buckets1.push_back(TTimeDoublePrVec(boost::begin(bucket14), boost::end(bucket14))); buckets1.push_back(TTimeDoublePrVec(boost::begin(bucket15), boost::end(bucket15))); - TTimeDoublePr bucket21[] = {TTimeDoublePr(1, 1.0), - TTimeDoublePr(5, 1.0), - TTimeDoublePr(15, 2.1), - TTimeDoublePr(25, 2.0), - TTimeDoublePr(180, 0.9), - TTimeDoublePr(190, 1.5), - TTimeDoublePr(400, 1.5), - TTimeDoublePr(550, 2.0)}; - TTimeDoublePr bucket22[] = { - TTimeDoublePr(600, 2.0), TTimeDoublePr(605, 2.0), TTimeDoublePr(609, 2.0), TTimeDoublePr(799, 2.2), TTimeDoublePr(1199, 1.8)}; - TTimeDoublePr bucket23[] = {TTimeDoublePr(1200, 2.1), - TTimeDoublePr(1250, 2.5), - TTimeDoublePr(1255, 2.2), - TTimeDoublePr(1256, 2.4), - TTimeDoublePr(1300, 2.2), - TTimeDoublePr(1400, 2.5)}; + TTimeDoublePr bucket21[] = { + TTimeDoublePr(1, 1.0), TTimeDoublePr(5, 1.0), + TTimeDoublePr(15, 2.1), TTimeDoublePr(25, 2.0), + TTimeDoublePr(180, 0.9), TTimeDoublePr(190, 1.5), + TTimeDoublePr(400, 1.5), TTimeDoublePr(550, 2.0)}; + TTimeDoublePr bucket22[] = {TTimeDoublePr(600, 2.0), TTimeDoublePr(605, 2.0), + TTimeDoublePr(609, 2.0), TTimeDoublePr(799, 2.2), + TTimeDoublePr(1199, 1.8)}; + TTimeDoublePr bucket23[] = { + TTimeDoublePr(1200, 2.1), TTimeDoublePr(1250, 2.5), + TTimeDoublePr(1255, 2.2), TTimeDoublePr(1256, 2.4), + TTimeDoublePr(1300, 2.2), TTimeDoublePr(1400, 2.5)}; TTimeDoublePr bucket24[] = {TTimeDoublePr(1900, 3.5), TTimeDoublePr(1950, 3.5)}; - TTimeDoublePr bucket25[] = {TTimeDoublePr(2420, 3.5), - TTimeDoublePr(2480, 2.9), - TTimeDoublePr(2490, 3.9), - TTimeDoublePr(2500, 3.4), - TTimeDoublePr(2550, 4.1), - TTimeDoublePr(2600, 3.8)}; + TTimeDoublePr bucket25[] = { + TTimeDoublePr(2420, 3.5), TTimeDoublePr(2480, 2.9), + TTimeDoublePr(2490, 3.9), TTimeDoublePr(2500, 3.4), + TTimeDoublePr(2550, 4.1), TTimeDoublePr(2600, 3.8)}; TTimeDoublePrVecVec buckets2; buckets2.push_back(TTimeDoublePrVec(boost::begin(bucket21), boost::end(bucket21))); buckets2.push_back(TTimeDoublePrVec(boost::begin(bucket22), boost::end(bucket22))); @@ -506,13 +497,15 @@ void CMetricDataGathererTest::multipleSeriesTests() { const TTimeDoublePrVec& bucket1 = buckets1[i]; for (std::size_t j = 0u; j < bucket1.size(); ++j) { - addArrival(gatherer, m_ResourceMonitor, bucket1[j].first, "p1", bucket1[j].second); + addArrival(gatherer, m_ResourceMonitor, bucket1[j].first, "p1", + bucket1[j].second); } const TTimeDoublePrVec& bucket2 = buckets2[i]; TMeanAccumulator a; for (std::size_t j = 0u; j < bucket2.size(); ++j) { - addArrival(gatherer, m_ResourceMonitor, bucket2[j].first, "p2", bucket2[j].second); + addArrival(gatherer, m_ResourceMonitor, bucket2[j].first, "p2", + bucket2[j].second); a.add(bucket2[j].second); } } @@ -522,7 +515,8 @@ void CMetricDataGathererTest::multipleSeriesTests() { TSizeUInt64PrVec nonZeroCounts; gatherer.personNonZeroCounts(startTime + 4 * bucketLength, nonZeroCounts); - CPPUNIT_ASSERT_EQUAL(std::string("[(0, 3), (1, 6)]"), core::CContainerPrinter::print(nonZeroCounts)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0, 3), (1, 6)]"), + core::CContainerPrinter::print(nonZeroCounts)); TFeatureSizeFeatureDataPrVecPrVec featureData; core_t::TTime featureBucketStart = core_t::TTime(startTime + 4 * bucketLength); @@ -535,7 +529,8 @@ void CMetricDataGathererTest::multipleSeriesTests() { CPPUNIT_ASSERT_EQUAL(std::size_t(2), featureData[2].second.size()); CPPUNIT_ASSERT_EQUAL(std::size_t(2), featureData[3].second.size()); - CPPUNIT_ASSERT_DOUBLES_EQUAL(3.5, featureData[0].second[0].second.s_BucketValue->value()[0], 1e-10); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + 3.5, featureData[0].second[0].second.s_BucketValue->value()[0], 1e-10); CPPUNIT_ASSERT_EQUAL(3.2, featureData[1].second[0].second.s_BucketValue->value()[0]); CPPUNIT_ASSERT_EQUAL(3.8, featureData[2].second[0].second.s_BucketValue->value()[0]); CPPUNIT_ASSERT_EQUAL(10.5, featureData[3].second[0].second.s_BucketValue->value()[0]); @@ -544,12 +539,21 @@ void CMetricDataGathererTest::multipleSeriesTests() { CPPUNIT_ASSERT_EQUAL(false, featureData[2].second[0].second.s_IsInteger); CPPUNIT_ASSERT_EQUAL(false, featureData[3].second[0].second.s_IsInteger); - CPPUNIT_ASSERT_EQUAL(std::string("[(2323 [3.5] 1 4)]"), core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(2323 [3.2] 1 4)]"), core::CContainerPrinter::print(featureData[1].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(2323 [3.8] 1 4)]"), core::CContainerPrinter::print(featureData[2].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(2400 [10.5] 1 3)]"), core::CContainerPrinter::print(featureData[3].second[0].second.s_Samples)); - - CPPUNIT_ASSERT_DOUBLES_EQUAL(3.6, featureData[0].second[1].second.s_BucketValue->value()[0], 1e-10); + CPPUNIT_ASSERT_EQUAL( + std::string("[(2323 [3.5] 1 4)]"), + core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(2323 [3.2] 1 4)]"), + core::CContainerPrinter::print(featureData[1].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(2323 [3.8] 1 4)]"), + core::CContainerPrinter::print(featureData[2].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(2400 [10.5] 1 3)]"), + core::CContainerPrinter::print(featureData[3].second[0].second.s_Samples)); + + CPPUNIT_ASSERT_DOUBLES_EQUAL( + 3.6, featureData[0].second[1].second.s_BucketValue->value()[0], 1e-10); CPPUNIT_ASSERT_EQUAL(2.9, featureData[1].second[1].second.s_BucketValue->value()[0]); CPPUNIT_ASSERT_EQUAL(4.1, featureData[2].second[1].second.s_BucketValue->value()[0]); CPPUNIT_ASSERT_EQUAL(21.6, featureData[3].second[1].second.s_BucketValue->value()[0]); @@ -558,10 +562,18 @@ void CMetricDataGathererTest::multipleSeriesTests() { CPPUNIT_ASSERT_EQUAL(false, featureData[2].second[1].second.s_IsInteger); CPPUNIT_ASSERT_EQUAL(false, featureData[3].second[1].second.s_IsInteger); - CPPUNIT_ASSERT_EQUAL(std::string("[(2290 [3.45] 1 6)]"), core::CContainerPrinter::print(featureData[0].second[1].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(2290 [2.9] 1 6)]"), core::CContainerPrinter::print(featureData[1].second[1].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(2290 [3.9] 1 6)]"), core::CContainerPrinter::print(featureData[2].second[1].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(2400 [21.6] 1 6)]"), core::CContainerPrinter::print(featureData[3].second[1].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(2290 [3.45] 1 6)]"), + core::CContainerPrinter::print(featureData[0].second[1].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(2290 [2.9] 1 6)]"), + core::CContainerPrinter::print(featureData[1].second[1].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(2290 [3.9] 1 6)]"), + core::CContainerPrinter::print(featureData[2].second[1].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(2400 [21.6] 1 6)]"), + core::CContainerPrinter::print(featureData[3].second[1].second.s_Samples)); // Test persistence. (We check for idempotency.) std::string origXml; @@ -578,19 +590,10 @@ void CMetricDataGathererTest::multipleSeriesTests() { CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - CDataGatherer restoredGatherer(model_t::E_Metric, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - KEY, - traverser); + CDataGatherer restoredGatherer(model_t::E_Metric, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + TStrVec(), false, KEY, traverser); // The XML representation of the new filter should be the // same as the original @@ -620,7 +623,8 @@ void CMetricDataGathererTest::multipleSeriesTests() { CPPUNIT_ASSERT_EQUAL(std::size_t(0), gatherer.numberOverFieldValues()); gatherer.personNonZeroCounts(startTime + 4 * bucketLength, nonZeroCounts); - CPPUNIT_ASSERT_EQUAL(std::string("[(1, 6)]"), core::CContainerPrinter::print(nonZeroCounts)); + CPPUNIT_ASSERT_EQUAL(std::string("[(1, 6)]"), + core::CContainerPrinter::print(nonZeroCounts)); CPPUNIT_ASSERT_DOUBLES_EQUAL(6.0, gatherer.effectiveSampleCount(1), 1e-10); @@ -631,7 +635,8 @@ void CMetricDataGathererTest::multipleSeriesTests() { CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData[1].second.size()); CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData[2].second.size()); - CPPUNIT_ASSERT_DOUBLES_EQUAL(3.6, featureData[0].second[0].second.s_BucketValue->value()[0], 1e-10); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + 3.6, featureData[0].second[0].second.s_BucketValue->value()[0], 1e-10); CPPUNIT_ASSERT_EQUAL(2.9, featureData[1].second[0].second.s_BucketValue->value()[0]); CPPUNIT_ASSERT_EQUAL(4.1, featureData[2].second[0].second.s_BucketValue->value()[0]); CPPUNIT_ASSERT_EQUAL(21.6, featureData[3].second[0].second.s_BucketValue->value()[0]); @@ -640,10 +645,18 @@ void CMetricDataGathererTest::multipleSeriesTests() { CPPUNIT_ASSERT_EQUAL(false, featureData[2].second[0].second.s_IsInteger); CPPUNIT_ASSERT_EQUAL(false, featureData[3].second[0].second.s_IsInteger); - CPPUNIT_ASSERT_EQUAL(std::string("[(2290 [3.45] 1 6)]"), core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(2290 [2.9] 1 6)]"), core::CContainerPrinter::print(featureData[1].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(2290 [3.9] 1 6)]"), core::CContainerPrinter::print(featureData[2].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(2400 [21.6] 1 6)]"), core::CContainerPrinter::print(featureData[3].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(2290 [3.45] 1 6)]"), + core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(2290 [2.9] 1 6)]"), + core::CContainerPrinter::print(featureData[1].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(2290 [3.9] 1 6)]"), + core::CContainerPrinter::print(featureData[2].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(2400 [21.6] 1 6)]"), + core::CContainerPrinter::print(featureData[3].second[0].second.s_Samples)); } void CMetricDataGathererTest::testSampleCount() { @@ -663,21 +676,9 @@ void CMetricDataGathererTest::testSampleCount() { features.push_back(model_t::E_IndividualMinByPerson); features.push_back(model_t::E_IndividualMaxByPerson); SModelParams params(bucketLength); - CDataGatherer gatherer(model_t::E_Metric, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - KEY, - features, - startTime, - 0); + CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, TStrVec(), false, KEY, features, startTime, 0); std::size_t pid1 = addPerson("p1", gatherer, m_ResourceMonitor); std::size_t pid2 = addPerson("p2", gatherer, m_ResourceMonitor); @@ -690,19 +691,26 @@ void CMetricDataGathererTest::testSampleCount() { { LOG_DEBUG(<< "count p1 = 6"); - addArrival(gatherer, m_ResourceMonitor, startTime + i * bucketLength + 20, "p1", 1.0); - addArrival(gatherer, m_ResourceMonitor, startTime + i * bucketLength + 40, "p1", 1.0); - addArrival(gatherer, m_ResourceMonitor, startTime + i * bucketLength + 60, "p1", 1.0); - addArrival(gatherer, m_ResourceMonitor, startTime + i * bucketLength + 80, "p1", 1.0); - addArrival(gatherer, m_ResourceMonitor, startTime + i * bucketLength + 100, "p1", 1.0); - addArrival(gatherer, m_ResourceMonitor, startTime + i * bucketLength + 120, "p1", 1.0); + addArrival(gatherer, m_ResourceMonitor, + startTime + i * bucketLength + 20, "p1", 1.0); + addArrival(gatherer, m_ResourceMonitor, + startTime + i * bucketLength + 40, "p1", 1.0); + addArrival(gatherer, m_ResourceMonitor, + startTime + i * bucketLength + 60, "p1", 1.0); + addArrival(gatherer, m_ResourceMonitor, + startTime + i * bucketLength + 80, "p1", 1.0); + addArrival(gatherer, m_ResourceMonitor, + startTime + i * bucketLength + 100, "p1", 1.0); + addArrival(gatherer, m_ResourceMonitor, + startTime + i * bucketLength + 120, "p1", 1.0); } { TDoubleVec count; rng.generateUniformSamples(1.0, 5.0, 1, count); LOG_DEBUG(<< "count p2 = " << std::floor(count[0])); for (std::size_t j = 0u; j < static_cast(count[0]); ++j) { - addArrival(gatherer, m_ResourceMonitor, startTime + i * bucketLength + 100 * (j + 1), "p2", 1.0); + addArrival(gatherer, m_ResourceMonitor, + startTime + i * bucketLength + 100 * (j + 1), "p2", 1.0); } } } @@ -716,7 +724,8 @@ void CMetricDataGathererTest::testSampleCount() { for (std::size_t i = numberBuckets; i < 100; ++i) { LOG_DEBUG(<< "Processing bucket " << i); gatherer.timeNow(startTime + i * bucketLength); - addArrival(gatherer, m_ResourceMonitor, startTime + i * bucketLength + 10, "p1", 1.0); + addArrival(gatherer, m_ResourceMonitor, + startTime + i * bucketLength + 10, "p1", 1.0); } LOG_DEBUG(<< "p1 sample count = " << gatherer.effectiveSampleCount(pid1)); CPPUNIT_ASSERT_DOUBLES_EQUAL(2.0, gatherer.effectiveSampleCount(pid1), 0.5); @@ -736,21 +745,9 @@ void CMetricDataGathererTest::testRemovePeople() { features.push_back(model_t::E_IndividualMaxByPerson); features.push_back(model_t::E_IndividualSumByBucketAndPerson); SModelParams params(bucketLength); - CDataGatherer gatherer(model_t::E_Metric, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - KEY, - features, - startTime, - 0); + CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, TStrVec(), false, KEY, features, startTime, 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p1", gatherer, m_ResourceMonitor)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), addPerson("p2", gatherer, m_ResourceMonitor)); CPPUNIT_ASSERT_EQUAL(std::size_t(2), addPerson("p3", gatherer, m_ResourceMonitor)); @@ -783,7 +780,8 @@ void CMetricDataGathererTest::testRemovePeople() { for (std::size_t i = 0u; i < boost::size(values); ++i) { for (std::size_t j = 0u; j < boost::size(values[i]); ++j) { if (values[i][j] > 0.0) { - addArrival(gatherer, m_ResourceMonitor, startTime + times[i][j], gatherer.personName(i), values[i][j]); + addArrival(gatherer, m_ResourceMonitor, startTime + times[i][j], + gatherer.personName(i), values[i][j]); } } } @@ -794,37 +792,30 @@ void CMetricDataGathererTest::testRemovePeople() { peopleToRemove.push_back(1); gatherer.recyclePeople(peopleToRemove); - CDataGatherer expectedGatherer(model_t::E_Metric, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - KEY, - features, - startTime, - 0); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p3", expectedGatherer, m_ResourceMonitor)); - CPPUNIT_ASSERT_EQUAL(std::size_t(1), addPerson("p4", expectedGatherer, m_ResourceMonitor)); - CPPUNIT_ASSERT_EQUAL(std::size_t(2), addPerson("p5", expectedGatherer, m_ResourceMonitor)); - CPPUNIT_ASSERT_EQUAL(std::size_t(3), addPerson("p6", expectedGatherer, m_ResourceMonitor)); - CPPUNIT_ASSERT_EQUAL(std::size_t(4), addPerson("p7", expectedGatherer, m_ResourceMonitor)); - CPPUNIT_ASSERT_EQUAL(std::size_t(5), addPerson("p8", expectedGatherer, m_ResourceMonitor)); + CDataGatherer expectedGatherer(model_t::E_Metric, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + TStrVec(), false, KEY, features, startTime, 0); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + addPerson("p3", expectedGatherer, m_ResourceMonitor)); + CPPUNIT_ASSERT_EQUAL(std::size_t(1), + addPerson("p4", expectedGatherer, m_ResourceMonitor)); + CPPUNIT_ASSERT_EQUAL(std::size_t(2), + addPerson("p5", expectedGatherer, m_ResourceMonitor)); + CPPUNIT_ASSERT_EQUAL(std::size_t(3), + addPerson("p6", expectedGatherer, m_ResourceMonitor)); + CPPUNIT_ASSERT_EQUAL(std::size_t(4), + addPerson("p7", expectedGatherer, m_ResourceMonitor)); + CPPUNIT_ASSERT_EQUAL(std::size_t(5), + addPerson("p8", expectedGatherer, m_ResourceMonitor)); std::size_t people[] = {2, 3, 4, 5, 6, 7}; for (std::size_t i = 0u; i < boost::size(people); ++i) { for (std::size_t j = 0u; j < boost::size(values[people[i]]); ++j) { if (values[people[i]][j] > 0.0) { - addArrival(expectedGatherer, - m_ResourceMonitor, + addArrival(expectedGatherer, m_ResourceMonitor, startTime + times[people[i]][j], - expectedGatherer.personName(i), - values[people[i]][j]); + expectedGatherer.personName(i), values[people[i]][j]); } } } @@ -840,34 +831,24 @@ void CMetricDataGathererTest::testRemovePeople() { peopleToRemove.push_back(7); gatherer.recyclePeople(peopleToRemove); - CDataGatherer expectedGatherer(model_t::E_Metric, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - KEY, - features, - startTime, - 0); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p3", expectedGatherer, m_ResourceMonitor)); - CPPUNIT_ASSERT_EQUAL(std::size_t(1), addPerson("p6", expectedGatherer, m_ResourceMonitor)); - CPPUNIT_ASSERT_EQUAL(std::size_t(2), addPerson("p7", expectedGatherer, m_ResourceMonitor)); + CDataGatherer expectedGatherer(model_t::E_Metric, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + TStrVec(), false, KEY, features, startTime, 0); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + addPerson("p3", expectedGatherer, m_ResourceMonitor)); + CPPUNIT_ASSERT_EQUAL(std::size_t(1), + addPerson("p6", expectedGatherer, m_ResourceMonitor)); + CPPUNIT_ASSERT_EQUAL(std::size_t(2), + addPerson("p7", expectedGatherer, m_ResourceMonitor)); std::size_t people[] = {2, 5, 6}; for (std::size_t i = 0u; i < boost::size(people); ++i) { for (std::size_t j = 0u; j < boost::size(values[people[i]]); ++j) { if (values[people[i]][j] > 0.0) { - addArrival(expectedGatherer, - m_ResourceMonitor, + addArrival(expectedGatherer, m_ResourceMonitor, startTime + times[people[i]][j], - expectedGatherer.personName(i), - values[people[i]][j]); + expectedGatherer.personName(i), values[people[i]][j]); } } } @@ -883,21 +864,10 @@ void CMetricDataGathererTest::testRemovePeople() { peopleToRemove.push_back(6); gatherer.recyclePeople(peopleToRemove); - CDataGatherer expectedGatherer(model_t::E_Metric, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - KEY, - features, - startTime, - 0); + CDataGatherer expectedGatherer(model_t::E_Metric, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + TStrVec(), false, KEY, features, startTime, 0); LOG_DEBUG(<< "checksum = " << gatherer.checksum()); LOG_DEBUG(<< "expected checksum = " << expectedGatherer.checksum()); @@ -908,9 +878,11 @@ void CMetricDataGathererTest::testRemovePeople() { expectedRecycled.push_back(addPerson("p1", gatherer, m_ResourceMonitor)); expectedRecycled.push_back(addPerson("p7", gatherer, m_ResourceMonitor)); - LOG_DEBUG(<< "recycled = " << core::CContainerPrinter::print(gatherer.recycledPersonIds())); + LOG_DEBUG(<< "recycled = " + << core::CContainerPrinter::print(gatherer.recycledPersonIds())); LOG_DEBUG(<< "expected recycled = " << core::CContainerPrinter::print(expectedRecycled)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedRecycled), core::CContainerPrinter::print(gatherer.recycledPersonIds())); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedRecycled), + core::CContainerPrinter::print(gatherer.recycledPersonIds())); } void CMetricDataGathererTest::testSum() { @@ -927,41 +899,18 @@ void CMetricDataGathererTest::testSum() { TFeatureVec sumFeatures; sumFeatures.push_back(model_t::E_IndividualSumByBucketAndPerson); SModelParams params(bucketLength); - CDataGatherer sum(model_t::E_Metric, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - KEY, - sumFeatures, - startTime, - 0); + CDataGatherer sum(model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, TStrVec(), false, KEY, sumFeatures, startTime, 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p1", sum, m_ResourceMonitor)); TFeatureVec nonZeroSumFeatures; nonZeroSumFeatures.push_back(model_t::E_IndividualNonNullSumByBucketAndPerson); - CDataGatherer nonZeroSum(model_t::E_Metric, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - KEY, - nonZeroSumFeatures, - startTime, - 0); + CDataGatherer nonZeroSum(model_t::E_Metric, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), + false, KEY, nonZeroSumFeatures, startTime, 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p1", nonZeroSum, m_ResourceMonitor)); core_t::TTime bucketStart = startTime; @@ -977,8 +926,12 @@ void CMetricDataGathererTest::testSum() { double expected = 0.0; for (std::size_t j = 0u; j < times.size(); ++j) { - addArrival(sum, m_ResourceMonitor, bucketStart + static_cast(times[j]), "p1", values[j]); - addArrival(nonZeroSum, m_ResourceMonitor, bucketStart + static_cast(times[j]), "p1", values[j]); + addArrival(sum, m_ResourceMonitor, + bucketStart + static_cast(times[j]), "p1", + values[j]); + addArrival(nonZeroSum, m_ResourceMonitor, + bucketStart + static_cast(times[j]), "p1", + values[j]); expected += doubleToStringToDouble(values[j]); } @@ -990,9 +943,14 @@ void CMetricDataGathererTest::testSum() { for (std::size_t j = 0u; j < data.size(); ++j) { const TSizeFeatureDataPrVec& featureData = data[j].second; CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); - CPPUNIT_ASSERT_EQUAL(expected, featureData[j].second.s_BucketValue->value()[0]); - CPPUNIT_ASSERT_EQUAL(std::size_t(1), boost::unwrap_ref(featureData[j].second.s_Samples).size()); - CPPUNIT_ASSERT_EQUAL(expected, boost::unwrap_ref(featureData[j].second.s_Samples)[0].value()[0]); + CPPUNIT_ASSERT_EQUAL( + expected, featureData[j].second.s_BucketValue->value()[0]); + CPPUNIT_ASSERT_EQUAL( + std::size_t(1), + boost::unwrap_ref(featureData[j].second.s_Samples).size()); + CPPUNIT_ASSERT_EQUAL( + expected, + boost::unwrap_ref(featureData[j].second.s_Samples)[0].value()[0]); } } { @@ -1005,9 +963,14 @@ void CMetricDataGathererTest::testSum() { CPPUNIT_ASSERT_EQUAL(std::size_t(0), featureData.size()); } else { CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); - CPPUNIT_ASSERT_EQUAL(expected, featureData[j].second.s_BucketValue->value()[0]); - CPPUNIT_ASSERT_EQUAL(std::size_t(1), boost::unwrap_ref(featureData[j].second.s_Samples).size()); - CPPUNIT_ASSERT_EQUAL(expected, boost::unwrap_ref(featureData[j].second.s_Samples)[0].value()[0]); + CPPUNIT_ASSERT_EQUAL( + expected, featureData[j].second.s_BucketValue->value()[0]); + CPPUNIT_ASSERT_EQUAL( + std::size_t(1), + boost::unwrap_ref(featureData[j].second.s_Samples).size()); + CPPUNIT_ASSERT_EQUAL( + expected, + boost::unwrap_ref(featureData[j].second.s_Samples)[0].value()[0]); } } } @@ -1030,9 +993,11 @@ void CMetricDataGathererTest::singleSeriesOutOfOrderTests() { params.s_SampleCountFactor = 1; params.s_SampleQueueGrowthFactor = 0.1; - TTimeDoublePr bucket1[] = { - TTimeDoublePr(1, 1.0), TTimeDoublePr(15, 2.1), TTimeDoublePr(180, 0.9), TTimeDoublePr(400, 1.5), TTimeDoublePr(550, 2.0)}; - TTimeDoublePr bucket2[] = {TTimeDoublePr(600, 2.0), TTimeDoublePr(190, 1.5), TTimeDoublePr(799, 2.2), TTimeDoublePr(1199, 1.8)}; + TTimeDoublePr bucket1[] = {TTimeDoublePr(1, 1.0), TTimeDoublePr(15, 2.1), + TTimeDoublePr(180, 0.9), TTimeDoublePr(400, 1.5), + TTimeDoublePr(550, 2.0)}; + TTimeDoublePr bucket2[] = {TTimeDoublePr(600, 2.0), TTimeDoublePr(190, 1.5), + TTimeDoublePr(799, 2.2), TTimeDoublePr(1199, 1.8)}; { TFeatureVec features; @@ -1041,21 +1006,10 @@ void CMetricDataGathererTest::singleSeriesOutOfOrderTests() { features.push_back(model_t::E_IndividualMaxByPerson); features.push_back(model_t::E_IndividualSumByBucketAndPerson); features.push_back(model_t::E_IndividualCountByBucketAndPerson); - CDataGatherer gatherer(model_t::E_Metric, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - KEY, - features, - startTime, - 2u); + CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + TStrVec(), false, KEY, features, startTime, 2u); CPPUNIT_ASSERT(!gatherer.isPopulation()); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p", gatherer, m_ResourceMonitor)); @@ -1074,13 +1028,18 @@ void CMetricDataGathererTest::singleSeriesOutOfOrderTests() { CPPUNIT_ASSERT(!gatherer.personId("a.n.other p", pid)); { - addArrival(gatherer, m_ResourceMonitor, bucket1[0].first, "p", bucket1[0].second); + addArrival(gatherer, m_ResourceMonitor, bucket1[0].first, "p", + bucket1[0].second); TFeatureSizeFeatureDataPrVecPrVec featureData; gatherer.featureData(startTime, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(1.0, featureData[0].second[0].second.s_BucketValue->value()[0]); - CPPUNIT_ASSERT_EQUAL(1.0, featureData[1].second[0].second.s_BucketValue->value()[0]); - CPPUNIT_ASSERT_EQUAL(1.0, featureData[2].second[0].second.s_BucketValue->value()[0]); - CPPUNIT_ASSERT_EQUAL(1.0, featureData[3].second[0].second.s_BucketValue->value()[0]); + CPPUNIT_ASSERT_EQUAL( + 1.0, featureData[0].second[0].second.s_BucketValue->value()[0]); + CPPUNIT_ASSERT_EQUAL( + 1.0, featureData[1].second[0].second.s_BucketValue->value()[0]); + CPPUNIT_ASSERT_EQUAL( + 1.0, featureData[2].second[0].second.s_BucketValue->value()[0]); + CPPUNIT_ASSERT_EQUAL( + 1.0, featureData[3].second[0].second.s_BucketValue->value()[0]); CPPUNIT_ASSERT_EQUAL(true, featureData[0].second[0].second.s_IsInteger); CPPUNIT_ASSERT_EQUAL(true, featureData[1].second[0].second.s_IsInteger); CPPUNIT_ASSERT_EQUAL(true, featureData[2].second[0].second.s_IsInteger); @@ -1088,25 +1047,39 @@ void CMetricDataGathererTest::singleSeriesOutOfOrderTests() { } for (size_t i = 1; i < boost::size(bucket1); ++i) { - addArrival(gatherer, m_ResourceMonitor, bucket1[i].first, "p", bucket1[i].second); + addArrival(gatherer, m_ResourceMonitor, bucket1[i].first, "p", + bucket1[i].second); } { TFeatureSizeFeatureDataPrVecPrVec featureData; - gatherer.featureData(core_t::TTime(startTime + bucketLength - 1), bucketLength, featureData); + gatherer.featureData(core_t::TTime(startTime + bucketLength - 1), + bucketLength, featureData); LOG_DEBUG(<< "featureData = " << core::CContainerPrinter::print(featureData)); CPPUNIT_ASSERT(!featureData.empty()); - CPPUNIT_ASSERT_EQUAL(1.5, featureData[0].second[0].second.s_BucketValue->value()[0]); - CPPUNIT_ASSERT_EQUAL(0.9, featureData[1].second[0].second.s_BucketValue->value()[0]); - CPPUNIT_ASSERT_EQUAL(2.1, featureData[2].second[0].second.s_BucketValue->value()[0]); - CPPUNIT_ASSERT_EQUAL(7.5, featureData[3].second[0].second.s_BucketValue->value()[0]); + CPPUNIT_ASSERT_EQUAL( + 1.5, featureData[0].second[0].second.s_BucketValue->value()[0]); + CPPUNIT_ASSERT_EQUAL( + 0.9, featureData[1].second[0].second.s_BucketValue->value()[0]); + CPPUNIT_ASSERT_EQUAL( + 2.1, featureData[2].second[0].second.s_BucketValue->value()[0]); + CPPUNIT_ASSERT_EQUAL( + 7.5, featureData[3].second[0].second.s_BucketValue->value()[0]); CPPUNIT_ASSERT_EQUAL(false, featureData[1].second[0].second.s_IsInteger); CPPUNIT_ASSERT_EQUAL(false, featureData[1].second[0].second.s_IsInteger); CPPUNIT_ASSERT_EQUAL(false, featureData[2].second[0].second.s_IsInteger); CPPUNIT_ASSERT_EQUAL(false, featureData[3].second[0].second.s_IsInteger); - CPPUNIT_ASSERT_EQUAL(std::string("[]"), core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[]"), core::CContainerPrinter::print(featureData[1].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[]"), core::CContainerPrinter::print(featureData[2].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(0 [7.5] 1 5)]"), core::CContainerPrinter::print(featureData[3].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[]"), + core::CContainerPrinter::print( + featureData[0].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[]"), + core::CContainerPrinter::print( + featureData[1].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[]"), + core::CContainerPrinter::print( + featureData[2].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0 [7.5] 1 5)]"), + core::CContainerPrinter::print( + featureData[3].second[0].second.s_Samples)); // Test persistence. (We check for idempotency.) std::string origXml; @@ -1123,19 +1096,10 @@ void CMetricDataGathererTest::singleSeriesOutOfOrderTests() { CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - CDataGatherer restoredGatherer(model_t::E_Metric, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - KEY, - traverser); + CDataGatherer restoredGatherer(model_t::E_Metric, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + TStrVec(), false, KEY, traverser); // The XML representation of the new filter should be the // same as the original @@ -1150,25 +1114,35 @@ void CMetricDataGathererTest::singleSeriesOutOfOrderTests() { gatherer.timeNow(startTime + bucketLength); for (size_t i = 0; i < boost::size(bucket2); ++i) { - addArrival(gatherer, m_ResourceMonitor, bucket2[i].first, "p", bucket2[i].second); + addArrival(gatherer, m_ResourceMonitor, bucket2[i].first, "p", + bucket2[i].second); } { TFeatureSizeFeatureDataPrVecPrVec featureData; gatherer.sampleNow(startTime); gatherer.featureData(startTime, bucketLength, featureData); CPPUNIT_ASSERT(!featureData.empty()); - CPPUNIT_ASSERT_EQUAL(1.5, featureData[0].second[0].second.s_BucketValue->value()[0]); - CPPUNIT_ASSERT_EQUAL(0.9, featureData[1].second[0].second.s_BucketValue->value()[0]); - CPPUNIT_ASSERT_EQUAL(2.1, featureData[2].second[0].second.s_BucketValue->value()[0]); - CPPUNIT_ASSERT_EQUAL(9.0, featureData[3].second[0].second.s_BucketValue->value()[0]); + CPPUNIT_ASSERT_EQUAL( + 1.5, featureData[0].second[0].second.s_BucketValue->value()[0]); + CPPUNIT_ASSERT_EQUAL( + 0.9, featureData[1].second[0].second.s_BucketValue->value()[0]); + CPPUNIT_ASSERT_EQUAL( + 2.1, featureData[2].second[0].second.s_BucketValue->value()[0]); + CPPUNIT_ASSERT_EQUAL( + 9.0, featureData[3].second[0].second.s_BucketValue->value()[0]); CPPUNIT_ASSERT_EQUAL(true, featureData[3].second[0].second.s_IsInteger); - CPPUNIT_ASSERT_EQUAL(std::string("[(8 [1.55] 1 2), (257 [1.3] 0.666667 3)]"), - core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(8 [1.55] 1 2), (257 [1.3] 0.666667 3)]"), + core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); CPPUNIT_ASSERT_EQUAL(std::string("[(8 [1] 1 2), (257 [0.9] 1 3)]"), - core::CContainerPrinter::print(featureData[1].second[0].second.s_Samples)); + core::CContainerPrinter::print( + featureData[1].second[0].second.s_Samples)); CPPUNIT_ASSERT_EQUAL(std::string("[(8 [2.1] 1 2), (257 [1.5] 1 3)]"), - core::CContainerPrinter::print(featureData[2].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(0 [9] 1 6)]"), core::CContainerPrinter::print(featureData[3].second[0].second.s_Samples)); + core::CContainerPrinter::print( + featureData[2].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL(std::string("[(0 [9] 1 6)]"), + core::CContainerPrinter::print( + featureData[3].second[0].second.s_Samples)); // Test persistence. (We check for idempotency.) std::string origXml; @@ -1185,19 +1159,10 @@ void CMetricDataGathererTest::singleSeriesOutOfOrderTests() { CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - CDataGatherer restoredGatherer(model_t::E_Metric, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - KEY, - traverser); + CDataGatherer restoredGatherer(model_t::E_Metric, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + TStrVec(), false, KEY, traverser); // The XML representation of the new filter should be the // same as the original @@ -1223,11 +1188,9 @@ void CMetricDataGathererTest::testResetBucketGivenSingleSeries() { params.s_SampleQueueGrowthFactor = 0.1; TTimeDoublePr data[] = { - TTimeDoublePr(1, 1.0), // Bucket 1 - TTimeDoublePr(550, 2.0), - TTimeDoublePr(600, 3.0), // Bucket 2 - TTimeDoublePr(700, 4.0), - TTimeDoublePr(1000, 5.0), + TTimeDoublePr(1, 1.0), // Bucket 1 + TTimeDoublePr(550, 2.0), TTimeDoublePr(600, 3.0), // Bucket 2 + TTimeDoublePr(700, 4.0), TTimeDoublePr(1000, 5.0), TTimeDoublePr(1200, 6.0) // Bucket 3 }; @@ -1236,21 +1199,9 @@ void CMetricDataGathererTest::testResetBucketGivenSingleSeries() { features.push_back(model_t::E_IndividualMinByPerson); features.push_back(model_t::E_IndividualMaxByPerson); features.push_back(model_t::E_IndividualSumByBucketAndPerson); - CDataGatherer gatherer(model_t::E_Metric, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - KEY, - features, - startTime, - 2u); + CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, TStrVec(), false, KEY, features, startTime, 2u); addPerson("p", gatherer, m_ResourceMonitor); for (std::size_t i = 0; i < boost::size(data); ++i) { @@ -1279,7 +1230,8 @@ void CMetricDataGathererTest::testResetBucketGivenSingleSeries() { CPPUNIT_ASSERT_EQUAL(6.0, featureData[1].second[0].second.s_BucketValue->value()[0]); CPPUNIT_ASSERT_EQUAL(6.0, featureData[2].second[0].second.s_BucketValue->value()[0]); CPPUNIT_ASSERT_EQUAL(6.0, featureData[3].second[0].second.s_BucketValue->value()[0]); - CPPUNIT_ASSERT_EQUAL(uint64_t(1), gatherer.bucketCounts(1200).find(pidCidPr)->second); + CPPUNIT_ASSERT_EQUAL(uint64_t(1), + gatherer.bucketCounts(1200).find(pidCidPr)->second); gatherer.resetBucket(600); addArrival(gatherer, m_ResourceMonitor, 610, "p", 2.0); @@ -1304,23 +1256,40 @@ void CMetricDataGathererTest::testResetBucketGivenSingleSeries() { CPPUNIT_ASSERT_EQUAL(6.0, featureData[1].second[0].second.s_BucketValue->value()[0]); CPPUNIT_ASSERT_EQUAL(6.0, featureData[2].second[0].second.s_BucketValue->value()[0]); CPPUNIT_ASSERT_EQUAL(6.0, featureData[3].second[0].second.s_BucketValue->value()[0]); - CPPUNIT_ASSERT_EQUAL(uint64_t(1), gatherer.bucketCounts(1200).find(pidCidPr)->second); + CPPUNIT_ASSERT_EQUAL(uint64_t(1), + gatherer.bucketCounts(1200).find(pidCidPr)->second); gatherer.sampleNow(0); gatherer.featureData(0, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(276 [1.5] 1 2)]"), core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(276 [1] 1 2)]"), core::CContainerPrinter::print(featureData[1].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(276 [2] 1 2)]"), core::CContainerPrinter::print(featureData[2].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(0 [3] 1 2)]"), core::CContainerPrinter::print(featureData[3].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(276 [1.5] 1 2)]"), + core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(276 [1] 1 2)]"), + core::CContainerPrinter::print(featureData[1].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(276 [2] 1 2)]"), + core::CContainerPrinter::print(featureData[2].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(0 [3] 1 2)]"), + core::CContainerPrinter::print(featureData[3].second[0].second.s_Samples)); gatherer.sampleNow(600); gatherer.featureData(600, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(615 [2.5] 1 2)]"), core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(615 [2] 1 2)]"), core::CContainerPrinter::print(featureData[1].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(615 [3] 1 2)]"), core::CContainerPrinter::print(featureData[2].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(600 [5] 1 2)]"), core::CContainerPrinter::print(featureData[3].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(615 [2.5] 1 2)]"), + core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(615 [2] 1 2)]"), + core::CContainerPrinter::print(featureData[1].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(615 [3] 1 2)]"), + core::CContainerPrinter::print(featureData[2].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(600 [5] 1 2)]"), + core::CContainerPrinter::print(featureData[3].second[0].second.s_Samples)); } void CMetricDataGathererTest::testResetBucketGivenMultipleSeries() { @@ -1334,11 +1303,9 @@ void CMetricDataGathererTest::testResetBucketGivenMultipleSeries() { params.s_SampleQueueGrowthFactor = 0.1; TTimeDoublePr data[] = { - TTimeDoublePr(1, 1.0), // Bucket 1 - TTimeDoublePr(550, 2.0), - TTimeDoublePr(600, 3.0), // Bucket 2 - TTimeDoublePr(700, 4.0), - TTimeDoublePr(1000, 5.0), + TTimeDoublePr(1, 1.0), // Bucket 1 + TTimeDoublePr(550, 2.0), TTimeDoublePr(600, 3.0), // Bucket 2 + TTimeDoublePr(700, 4.0), TTimeDoublePr(1000, 5.0), TTimeDoublePr(1200, 6.0) // Bucket 3 }; @@ -1347,28 +1314,17 @@ void CMetricDataGathererTest::testResetBucketGivenMultipleSeries() { features.push_back(model_t::E_IndividualMinByPerson); features.push_back(model_t::E_IndividualMaxByPerson); features.push_back(model_t::E_IndividualSumByBucketAndPerson); - CDataGatherer gatherer(model_t::E_Metric, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - KEY, - features, - startTime, - 2u); + CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, TStrVec(), false, KEY, features, startTime, 2u); addPerson("p1", gatherer, m_ResourceMonitor); addPerson("p2", gatherer, m_ResourceMonitor); addPerson("p3", gatherer, m_ResourceMonitor); for (std::size_t i = 0; i < boost::size(data); ++i) { for (std::size_t pid = 0; pid < gatherer.numberActivePeople(); ++pid) { - addArrival(gatherer, m_ResourceMonitor, data[i].first, gatherer.personName(pid), data[i].second); + addArrival(gatherer, m_ResourceMonitor, data[i].first, + gatherer.personName(pid), data[i].second); } } @@ -1407,9 +1363,12 @@ void CMetricDataGathererTest::testResetBucketGivenMultipleSeries() { CPPUNIT_ASSERT_EQUAL(12.0, featureData[3].second[2].second.s_BucketValue->value()[0]); CPPUNIT_ASSERT_EQUAL(12.0, featureData[3].second[2].second.s_BucketValue->value()[0]); CPPUNIT_ASSERT_EQUAL(12.0, featureData[3].second[2].second.s_BucketValue->value()[0]); - CPPUNIT_ASSERT_EQUAL(uint64_t(3), gatherer.bucketCounts(600).find(pidCidPr0)->second); - CPPUNIT_ASSERT_EQUAL(uint64_t(3), gatherer.bucketCounts(600).find(pidCidPr1)->second); - CPPUNIT_ASSERT_EQUAL(uint64_t(3), gatherer.bucketCounts(600).find(pidCidPr2)->second); + CPPUNIT_ASSERT_EQUAL(uint64_t(3), + gatherer.bucketCounts(600).find(pidCidPr0)->second); + CPPUNIT_ASSERT_EQUAL(uint64_t(3), + gatherer.bucketCounts(600).find(pidCidPr1)->second); + CPPUNIT_ASSERT_EQUAL(uint64_t(3), + gatherer.bucketCounts(600).find(pidCidPr2)->second); gatherer.featureData(1200, bucketLength, featureData); CPPUNIT_ASSERT_EQUAL(6.0, featureData[0].second[0].second.s_BucketValue->value()[0]); @@ -1424,9 +1383,12 @@ void CMetricDataGathererTest::testResetBucketGivenMultipleSeries() { CPPUNIT_ASSERT_EQUAL(6.0, featureData[3].second[2].second.s_BucketValue->value()[0]); CPPUNIT_ASSERT_EQUAL(6.0, featureData[3].second[2].second.s_BucketValue->value()[0]); CPPUNIT_ASSERT_EQUAL(6.0, featureData[3].second[2].second.s_BucketValue->value()[0]); - CPPUNIT_ASSERT_EQUAL(uint64_t(1), gatherer.bucketCounts(1200).find(pidCidPr0)->second); - CPPUNIT_ASSERT_EQUAL(uint64_t(1), gatherer.bucketCounts(1200).find(pidCidPr1)->second); - CPPUNIT_ASSERT_EQUAL(uint64_t(1), gatherer.bucketCounts(1200).find(pidCidPr2)->second); + CPPUNIT_ASSERT_EQUAL(uint64_t(1), + gatherer.bucketCounts(1200).find(pidCidPr0)->second); + CPPUNIT_ASSERT_EQUAL(uint64_t(1), + gatherer.bucketCounts(1200).find(pidCidPr1)->second); + CPPUNIT_ASSERT_EQUAL(uint64_t(1), + gatherer.bucketCounts(1200).find(pidCidPr2)->second); gatherer.resetBucket(600); for (std::size_t pid = 0; pid < gatherer.numberActivePeople(); ++pid) { @@ -1464,9 +1426,12 @@ void CMetricDataGathererTest::testResetBucketGivenMultipleSeries() { CPPUNIT_ASSERT_EQUAL(5.0, featureData[3].second[2].second.s_BucketValue->value()[0]); CPPUNIT_ASSERT_EQUAL(5.0, featureData[3].second[2].second.s_BucketValue->value()[0]); CPPUNIT_ASSERT_EQUAL(5.0, featureData[3].second[2].second.s_BucketValue->value()[0]); - CPPUNIT_ASSERT_EQUAL(uint64_t(2), gatherer.bucketCounts(600).find(pidCidPr0)->second); - CPPUNIT_ASSERT_EQUAL(uint64_t(2), gatherer.bucketCounts(600).find(pidCidPr1)->second); - CPPUNIT_ASSERT_EQUAL(uint64_t(2), gatherer.bucketCounts(600).find(pidCidPr2)->second); + CPPUNIT_ASSERT_EQUAL(uint64_t(2), + gatherer.bucketCounts(600).find(pidCidPr0)->second); + CPPUNIT_ASSERT_EQUAL(uint64_t(2), + gatherer.bucketCounts(600).find(pidCidPr1)->second); + CPPUNIT_ASSERT_EQUAL(uint64_t(2), + gatherer.bucketCounts(600).find(pidCidPr2)->second); gatherer.featureData(1200, bucketLength, featureData); CPPUNIT_ASSERT_EQUAL(6.0, featureData[0].second[0].second.s_BucketValue->value()[0]); @@ -1481,47 +1446,99 @@ void CMetricDataGathererTest::testResetBucketGivenMultipleSeries() { CPPUNIT_ASSERT_EQUAL(6.0, featureData[3].second[2].second.s_BucketValue->value()[0]); CPPUNIT_ASSERT_EQUAL(6.0, featureData[3].second[2].second.s_BucketValue->value()[0]); CPPUNIT_ASSERT_EQUAL(6.0, featureData[3].second[2].second.s_BucketValue->value()[0]); - CPPUNIT_ASSERT_EQUAL(uint64_t(1), gatherer.bucketCounts(1200).find(pidCidPr0)->second); - CPPUNIT_ASSERT_EQUAL(uint64_t(1), gatherer.bucketCounts(1200).find(pidCidPr1)->second); - CPPUNIT_ASSERT_EQUAL(uint64_t(1), gatherer.bucketCounts(1200).find(pidCidPr2)->second); + CPPUNIT_ASSERT_EQUAL(uint64_t(1), + gatherer.bucketCounts(1200).find(pidCidPr0)->second); + CPPUNIT_ASSERT_EQUAL(uint64_t(1), + gatherer.bucketCounts(1200).find(pidCidPr1)->second); + CPPUNIT_ASSERT_EQUAL(uint64_t(1), + gatherer.bucketCounts(1200).find(pidCidPr2)->second); gatherer.sampleNow(0); gatherer.featureData(0, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(276 [1.5] 1 2)]"), core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(276 [1.5] 1 2)]"), core::CContainerPrinter::print(featureData[0].second[1].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(276 [1.5] 1 2)]"), core::CContainerPrinter::print(featureData[0].second[2].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(276 [1] 1 2)]"), core::CContainerPrinter::print(featureData[1].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(276 [1] 1 2)]"), core::CContainerPrinter::print(featureData[1].second[1].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(276 [1] 1 2)]"), core::CContainerPrinter::print(featureData[1].second[2].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(276 [2] 1 2)]"), core::CContainerPrinter::print(featureData[2].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(276 [2] 1 2)]"), core::CContainerPrinter::print(featureData[2].second[1].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(276 [2] 1 2)]"), core::CContainerPrinter::print(featureData[2].second[2].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(0 [3] 1 2)]"), core::CContainerPrinter::print(featureData[3].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(0 [3] 1 2)]"), core::CContainerPrinter::print(featureData[3].second[1].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(0 [3] 1 2)]"), core::CContainerPrinter::print(featureData[3].second[2].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(276 [1.5] 1 2)]"), + core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(276 [1.5] 1 2)]"), + core::CContainerPrinter::print(featureData[0].second[1].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(276 [1.5] 1 2)]"), + core::CContainerPrinter::print(featureData[0].second[2].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(276 [1] 1 2)]"), + core::CContainerPrinter::print(featureData[1].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(276 [1] 1 2)]"), + core::CContainerPrinter::print(featureData[1].second[1].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(276 [1] 1 2)]"), + core::CContainerPrinter::print(featureData[1].second[2].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(276 [2] 1 2)]"), + core::CContainerPrinter::print(featureData[2].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(276 [2] 1 2)]"), + core::CContainerPrinter::print(featureData[2].second[1].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(276 [2] 1 2)]"), + core::CContainerPrinter::print(featureData[2].second[2].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(0 [3] 1 2)]"), + core::CContainerPrinter::print(featureData[3].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(0 [3] 1 2)]"), + core::CContainerPrinter::print(featureData[3].second[1].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(0 [3] 1 2)]"), + core::CContainerPrinter::print(featureData[3].second[2].second.s_Samples)); gatherer.sampleNow(600); gatherer.featureData(600, bucketLength, featureData); - CPPUNIT_ASSERT_EQUAL(std::string("[(615 [2.5] 1 2)]"), core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(615 [2.5] 1 2)]"), core::CContainerPrinter::print(featureData[0].second[1].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(615 [2.5] 1 2)]"), core::CContainerPrinter::print(featureData[0].second[2].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(615 [2] 1 2)]"), core::CContainerPrinter::print(featureData[1].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(615 [2] 1 2)]"), core::CContainerPrinter::print(featureData[1].second[1].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(615 [2] 1 2)]"), core::CContainerPrinter::print(featureData[1].second[2].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(615 [3] 1 2)]"), core::CContainerPrinter::print(featureData[2].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(615 [3] 1 2)]"), core::CContainerPrinter::print(featureData[2].second[1].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(615 [3] 1 2)]"), core::CContainerPrinter::print(featureData[2].second[2].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(600 [5] 1 2)]"), core::CContainerPrinter::print(featureData[3].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(600 [5] 1 2)]"), core::CContainerPrinter::print(featureData[3].second[1].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(600 [5] 1 2)]"), core::CContainerPrinter::print(featureData[3].second[2].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(615 [2.5] 1 2)]"), + core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(615 [2.5] 1 2)]"), + core::CContainerPrinter::print(featureData[0].second[1].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(615 [2.5] 1 2)]"), + core::CContainerPrinter::print(featureData[0].second[2].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(615 [2] 1 2)]"), + core::CContainerPrinter::print(featureData[1].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(615 [2] 1 2)]"), + core::CContainerPrinter::print(featureData[1].second[1].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(615 [2] 1 2)]"), + core::CContainerPrinter::print(featureData[1].second[2].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(615 [3] 1 2)]"), + core::CContainerPrinter::print(featureData[2].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(615 [3] 1 2)]"), + core::CContainerPrinter::print(featureData[2].second[1].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(615 [3] 1 2)]"), + core::CContainerPrinter::print(featureData[2].second[2].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(600 [5] 1 2)]"), + core::CContainerPrinter::print(featureData[3].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(600 [5] 1 2)]"), + core::CContainerPrinter::print(featureData[3].second[1].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(600 [5] 1 2)]"), + core::CContainerPrinter::print(featureData[3].second[2].second.s_Samples)); } void CMetricDataGathererTest::testInfluenceStatistics() { LOG_DEBUG(<< "*** CMetricDataGathererTest::testInfluenceStatistics ***"); - using TTimeDoubleStrStrTuple = boost::tuple; + using TTimeDoubleStrStrTuple = + boost::tuple; using TDoubleDoublePr = std::pair; using TStrDoubleDoublePrPr = std::pair; using TStrDoubleDoublePrPrVec = std::vector; @@ -1591,21 +1608,9 @@ void CMetricDataGathererTest::testInfluenceStatistics() { features.push_back(model_t::E_IndividualMaxByPerson); features.push_back(model_t::E_IndividualSumByBucketAndPerson); TStrVec influencerNames(boost::begin(influencerNames_), boost::end(influencerNames_)); - CDataGatherer gatherer(model_t::E_Metric, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - influencerNames, - false, - KEY, - features, - startTime, - 2u); + CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + influencerNames, false, KEY, features, startTime, 2u); addPerson("p1", gatherer, m_ResourceMonitor, influencerNames.size()); addPerson("p2", gatherer, m_ResourceMonitor, influencerNames.size()); @@ -1623,19 +1628,29 @@ void CMetricDataGathererTest::testInfluenceStatistics() { const TSizeFeatureDataPrVec& data_ = featureData[j].second; for (std::size_t k = 0u; k < data_.size(); ++k) { TStrDoubleDoublePrPrVec statistics; - for (std::size_t m = 0u; m < data_[k].second.s_InfluenceValues.size(); ++m) { - for (std::size_t n = 0u; n < data_[k].second.s_InfluenceValues[m].size(); ++n) { - statistics.push_back( - TStrDoubleDoublePrPr(data_[k].second.s_InfluenceValues[m][n].first, - TDoubleDoublePr(data_[k].second.s_InfluenceValues[m][n].second.first[0], - data_[k].second.s_InfluenceValues[m][n].second.second))); + for (std::size_t m = 0u; + m < data_[k].second.s_InfluenceValues.size(); ++m) { + for (std::size_t n = 0u; + n < data_[k].second.s_InfluenceValues[m].size(); ++n) { + statistics.push_back(TStrDoubleDoublePrPr( + data_[k].second.s_InfluenceValues[m][n].first, + TDoubleDoublePr( + data_[k] + .second.s_InfluenceValues[m][n] + .second.first[0], + data_[k] + .second.s_InfluenceValues[m][n] + .second.second))); } } - std::sort(statistics.begin(), statistics.end(), maths::COrderings::SFirstLess()); + std::sort(statistics.begin(), statistics.end(), + maths::COrderings::SFirstLess()); - LOG_DEBUG(<< "statistics = " << core::CContainerPrinter::print(statistics)); + LOG_DEBUG(<< "statistics = " + << core::CContainerPrinter::print(statistics)); LOG_DEBUG(<< "expected = " << *expected); - CPPUNIT_ASSERT_EQUAL((*expected++), core::CContainerPrinter::print(statistics)); + CPPUNIT_ASSERT_EQUAL((*expected++), + core::CContainerPrinter::print(statistics)); } } @@ -1643,13 +1658,9 @@ void CMetricDataGathererTest::testInfluenceStatistics() { ++b; } for (std::size_t pid = 0; pid < gatherer.numberActivePeople(); ++pid) { - addArrival(gatherer, - m_ResourceMonitor, - data[i].get<0>(), - gatherer.personName(pid), - data[i].get<1>(), - data[i].get<2>(), - data[i].get<3>()); + addArrival(gatherer, m_ResourceMonitor, data[i].get<0>(), + gatherer.personName(pid), data[i].get<1>(), + data[i].get<2>(), data[i].get<3>()); } } } @@ -1673,34 +1684,26 @@ void CMetricDataGathererTest::testMultivariate() { TTimeDoubleDoubleTuple(190, 1.5, 1.4), TTimeDoubleDoubleTuple(400, 1.5, 1.4), TTimeDoubleDoubleTuple(550, 2.0, 1.8)}; - TTimeDoubleDoubleTuple bucket2[] = { - TTimeDoubleDoubleTuple(600, 2.0, 1.8), TTimeDoubleDoubleTuple(799, 2.2, 2.0), TTimeDoubleDoubleTuple(1199, 1.8, 1.6)}; - TTimeDoubleDoubleTuple bucket3[] = {TTimeDoubleDoubleTuple(1200, 2.1, 2.0), TTimeDoubleDoubleTuple(1250, 2.5, 2.4)}; + TTimeDoubleDoubleTuple bucket2[] = {TTimeDoubleDoubleTuple(600, 2.0, 1.8), + TTimeDoubleDoubleTuple(799, 2.2, 2.0), + TTimeDoubleDoubleTuple(1199, 1.8, 1.6)}; + TTimeDoubleDoubleTuple bucket3[] = {TTimeDoubleDoubleTuple(1200, 2.1, 2.0), + TTimeDoubleDoubleTuple(1250, 2.5, 2.4)}; TTimeDoubleDoubleTuple bucket4[] = { TTimeDoubleDoubleTuple(1900, 3.5, 3.2), }; - TTimeDoubleDoubleTuple bucket5[] = { - TTimeDoubleDoubleTuple(2420, 3.5, 3.2), TTimeDoubleDoubleTuple(2480, 3.2, 3.0), TTimeDoubleDoubleTuple(2490, 3.8, 3.8)}; + TTimeDoubleDoubleTuple bucket5[] = {TTimeDoubleDoubleTuple(2420, 3.5, 3.2), + TTimeDoubleDoubleTuple(2480, 3.2, 3.0), + TTimeDoubleDoubleTuple(2490, 3.8, 3.8)}; { TFeatureVec features; features.push_back(model_t::E_IndividualMeanLatLongByPerson); TStrVec influencerNames; - CDataGatherer gatherer(model_t::E_Metric, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - influencerNames, - false, - KEY, - features, - startTime, - 2u); + CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, influencerNames, + false, KEY, features, startTime, 2u); CPPUNIT_ASSERT(!gatherer.isPopulation()); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p", gatherer, m_ResourceMonitor)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), gatherer.numberFeatures()); @@ -1716,30 +1719,39 @@ void CMetricDataGathererTest::testMultivariate() { CPPUNIT_ASSERT(!gatherer.personId("a.n.other p", pid)); { - addArrival(gatherer, m_ResourceMonitor, bucket1[0].get<0>(), "p", bucket1[0].get<1>(), bucket1[0].get<2>(), DELIMITER); + addArrival(gatherer, m_ResourceMonitor, bucket1[0].get<0>(), "p", + bucket1[0].get<1>(), bucket1[0].get<2>(), DELIMITER); TFeatureSizeFeatureDataPrVecPrVec featureData; gatherer.featureData(startTime, bucketLength, featureData); LOG_DEBUG(<< "featureData = " << core::CContainerPrinter::print(featureData)); - CPPUNIT_ASSERT_EQUAL(1.0, featureData[0].second[0].second.s_BucketValue->value()[0]); - CPPUNIT_ASSERT_EQUAL(1.0, featureData[0].second[0].second.s_BucketValue->value()[1]); + CPPUNIT_ASSERT_EQUAL( + 1.0, featureData[0].second[0].second.s_BucketValue->value()[0]); + CPPUNIT_ASSERT_EQUAL( + 1.0, featureData[0].second[0].second.s_BucketValue->value()[1]); CPPUNIT_ASSERT_EQUAL(true, featureData[0].second[0].second.s_IsInteger); } for (size_t i = 1; i < boost::size(bucket1); ++i) { - addArrival(gatherer, m_ResourceMonitor, bucket1[i].get<0>(), "p", bucket1[i].get<1>(), bucket1[i].get<2>(), DELIMITER); + addArrival(gatherer, m_ResourceMonitor, bucket1[i].get<0>(), "p", + bucket1[i].get<1>(), bucket1[i].get<2>(), DELIMITER); } { TFeatureSizeFeatureDataPrVecPrVec featureData; gatherer.sampleNow(startTime); - gatherer.featureData(core_t::TTime(startTime + bucketLength - 1), bucketLength, featureData); + gatherer.featureData(core_t::TTime(startTime + bucketLength - 1), + bucketLength, featureData); LOG_DEBUG(<< "featureData = " << core::CContainerPrinter::print(featureData)); CPPUNIT_ASSERT(!featureData.empty()); - CPPUNIT_ASSERT_DOUBLES_EQUAL(1.5, featureData[0].second[0].second.s_BucketValue->value()[0], 1e-10); - CPPUNIT_ASSERT_DOUBLES_EQUAL(1.4, featureData[0].second[0].second.s_BucketValue->value()[1], 1e-10); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + 1.5, featureData[0].second[0].second.s_BucketValue->value()[0], 1e-10); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + 1.4, featureData[0].second[0].second.s_BucketValue->value()[1], 1e-10); CPPUNIT_ASSERT_EQUAL(false, featureData[0].second[0].second.s_IsInteger); - LOG_DEBUG(<< core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); - CPPUNIT_ASSERT_EQUAL(std::string("[(8 [1.55, 1.5] 1 2), (185 [1.2, 1.1] 1 2), (475 [1.75, 1.6] 1 2)]"), - core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); + LOG_DEBUG(<< core::CContainerPrinter::print( + featureData[0].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(8 [1.55, 1.5] 1 2), (185 [1.2, 1.1] 1 2), (475 [1.75, 1.6] 1 2)]"), + core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); // Test persistence. (We check for idempotency.) std::string origXml; @@ -1756,19 +1768,10 @@ void CMetricDataGathererTest::testMultivariate() { CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - CDataGatherer restoredGatherer(model_t::E_Metric, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - KEY, - traverser); + CDataGatherer restoredGatherer(model_t::E_Metric, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + TStrVec(), false, KEY, traverser); // The XML representation of the new filter should be the // same as the original @@ -1783,7 +1786,8 @@ void CMetricDataGathererTest::testMultivariate() { gatherer.timeNow(startTime + bucketLength); for (size_t i = 0; i < boost::size(bucket2); ++i) { - addArrival(gatherer, m_ResourceMonitor, bucket2[i].get<0>(), "p", bucket2[i].get<1>(), bucket2[i].get<2>(), DELIMITER); + addArrival(gatherer, m_ResourceMonitor, bucket2[i].get<0>(), "p", + bucket2[i].get<1>(), bucket2[i].get<2>(), DELIMITER); } { TFeatureSizeFeatureDataPrVecPrVec featureData; @@ -1791,10 +1795,13 @@ void CMetricDataGathererTest::testMultivariate() { gatherer.featureData(startTime + bucketLength, bucketLength, featureData); LOG_DEBUG(<< "featureData = " << core::CContainerPrinter::print(featureData)); CPPUNIT_ASSERT(!featureData.empty()); - CPPUNIT_ASSERT_DOUBLES_EQUAL(2.0, featureData[0].second[0].second.s_BucketValue->value()[0], 1e-10); - CPPUNIT_ASSERT_DOUBLES_EQUAL(1.8, featureData[0].second[0].second.s_BucketValue->value()[1], 1e-10); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + 2.0, featureData[0].second[0].second.s_BucketValue->value()[0], 1e-10); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + 1.8, featureData[0].second[0].second.s_BucketValue->value()[1], 1e-10); CPPUNIT_ASSERT_EQUAL(std::string("[(700 [2.1, 1.9] 1 2)]"), - core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); + core::CContainerPrinter::print( + featureData[0].second[0].second.s_Samples)); // Test persistence. (We check for idempotency.) std::string origXml; @@ -1811,19 +1818,10 @@ void CMetricDataGathererTest::testMultivariate() { CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - CDataGatherer restoredGatherer(model_t::E_Metric, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - KEY, - traverser); + CDataGatherer restoredGatherer(model_t::E_Metric, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + TStrVec(), false, KEY, traverser); // The XML representation of the new filter should be the // same as the original @@ -1838,7 +1836,8 @@ void CMetricDataGathererTest::testMultivariate() { gatherer.timeNow(startTime + 2 * bucketLength); for (size_t i = 0; i < boost::size(bucket3); ++i) { - addArrival(gatherer, m_ResourceMonitor, bucket3[i].get<0>(), "p", bucket3[i].get<1>(), bucket3[i].get<2>(), DELIMITER); + addArrival(gatherer, m_ResourceMonitor, bucket3[i].get<0>(), "p", + bucket3[i].get<1>(), bucket3[i].get<2>(), DELIMITER); } { TFeatureSizeFeatureDataPrVecPrVec featureData; @@ -1846,10 +1845,13 @@ void CMetricDataGathererTest::testMultivariate() { gatherer.featureData(startTime + 2 * bucketLength, bucketLength, featureData); LOG_DEBUG(<< "featureData = " << core::CContainerPrinter::print(featureData)); CPPUNIT_ASSERT(!featureData.empty()); - CPPUNIT_ASSERT_DOUBLES_EQUAL(2.3, featureData[0].second[0].second.s_BucketValue->value()[0], 1e-10); - CPPUNIT_ASSERT_DOUBLES_EQUAL(2.2, featureData[0].second[0].second.s_BucketValue->value()[1], 1e-10); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + 2.3, featureData[0].second[0].second.s_BucketValue->value()[0], 1e-10); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + 2.2, featureData[0].second[0].second.s_BucketValue->value()[1], 1e-10); CPPUNIT_ASSERT_EQUAL(std::string("[(1200 [1.95, 1.8] 1 2)]"), - core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); + core::CContainerPrinter::print( + featureData[0].second[0].second.s_Samples)); } } @@ -1857,36 +1859,31 @@ void CMetricDataGathererTest::testMultivariate() { { TFeatureVec features; features.push_back(model_t::E_IndividualMeanLatLongByPerson); - CDataGatherer gatherer(model_t::E_Metric, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - KEY, - features, - startTime, - 0); + CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + TStrVec(), false, KEY, features, startTime, 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p", gatherer, m_ResourceMonitor)); TTimeDoubleDoubleTupleVecVec buckets; - buckets.push_back(TTimeDoubleDoubleTupleVec(boost::begin(bucket1), boost::end(bucket1))); - buckets.push_back(TTimeDoubleDoubleTupleVec(boost::begin(bucket2), boost::end(bucket2))); - buckets.push_back(TTimeDoubleDoubleTupleVec(boost::begin(bucket3), boost::end(bucket3))); - buckets.push_back(TTimeDoubleDoubleTupleVec(boost::begin(bucket4), boost::end(bucket4))); - buckets.push_back(TTimeDoubleDoubleTupleVec(boost::begin(bucket5), boost::end(bucket5))); + buckets.push_back(TTimeDoubleDoubleTupleVec(boost::begin(bucket1), + boost::end(bucket1))); + buckets.push_back(TTimeDoubleDoubleTupleVec(boost::begin(bucket2), + boost::end(bucket2))); + buckets.push_back(TTimeDoubleDoubleTupleVec(boost::begin(bucket3), + boost::end(bucket3))); + buckets.push_back(TTimeDoubleDoubleTupleVec(boost::begin(bucket4), + boost::end(bucket4))); + buckets.push_back(TTimeDoubleDoubleTupleVec(boost::begin(bucket5), + boost::end(bucket5))); for (std::size_t i = 0u; i < buckets.size(); ++i) { LOG_DEBUG(<< "Processing bucket " << i); gatherer.timeNow(startTime + i * bucketLength); const TTimeDoubleDoubleTupleVec& bucket = buckets[i]; for (std::size_t j = 0u; j < bucket.size(); ++j) { - addArrival(gatherer, m_ResourceMonitor, bucket[j].get<0>(), "p", bucket[j].get<1>(), bucket[j].get<2>(), DELIMITER); + addArrival(gatherer, m_ResourceMonitor, bucket[j].get<0>(), "p", + bucket[j].get<1>(), bucket[j].get<2>(), DELIMITER); } } @@ -1896,11 +1893,13 @@ void CMetricDataGathererTest::testMultivariate() { gatherer.sampleNow(featureBucketStart); gatherer.featureData(featureBucketStart, bucketLength, featureData); CPPUNIT_ASSERT(!featureData.empty()); - CPPUNIT_ASSERT_DOUBLES_EQUAL(3.5, featureData[0].second[0].second.s_BucketValue->value()[0], 1e-10); + CPPUNIT_ASSERT_DOUBLES_EQUAL( + 3.5, featureData[0].second[0].second.s_BucketValue->value()[0], 1e-10); CPPUNIT_ASSERT_EQUAL(false, featureData[0].second[0].second.s_IsInteger); LOG_DEBUG(<< "featureData = " << core::CContainerPrinter::print(featureData)); - CPPUNIT_ASSERT_EQUAL(std::string("[(2323 [3.5, 3.3] 1 4)]"), - core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); + CPPUNIT_ASSERT_EQUAL( + std::string("[(2323 [3.5, 3.3] 1 4)]"), + core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); } } @@ -1926,7 +1925,9 @@ void CMetricDataGathererTest::testStatisticsPersist() { CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); CGathererTools::TMeanGatherer::TMetricPartialStatistic restored(1); - traverser.traverseSubLevel(boost::bind(&CGathererTools::TMeanGatherer::TMetricPartialStatistic::restore, boost::ref(restored), _1)); + traverser.traverseSubLevel(boost::bind( + &CGathererTools::TMeanGatherer::TMetricPartialStatistic::restore, + boost::ref(restored), _1)); restoredTime = restored.time(); { @@ -1954,21 +1955,10 @@ void CMetricDataGathererTest::testVarp() { { TFeatureVec features; features.push_back(model_t::E_IndividualVarianceByPerson); - CDataGatherer gatherer(model_t::E_Metric, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - KEY, - features, - startTime, - 2u); + CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + TStrVec(), false, KEY, features, startTime, 2u); CPPUNIT_ASSERT(!gatherer.isPopulation()); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson(person, gatherer, m_ResourceMonitor)); @@ -1988,7 +1978,8 @@ void CMetricDataGathererTest::testVarp() { CPPUNIT_ASSERT_EQUAL(std::size_t(1), featureData.size()); TFeatureSizeFeatureDataPrVecPr fsfd = featureData[0]; CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualVarianceByPerson, fsfd.first); - CSample::TDouble1Vec v = featureData[0].second[0].second.s_BucketValue->value(); + CSample::TDouble1Vec v = + featureData[0].second[0].second.s_BucketValue->value(); double expectedMean = 0; double expectedVariance = ::variance(values, expectedMean); CPPUNIT_ASSERT_DOUBLES_EQUAL(v[0], expectedVariance, 0.0001); @@ -2010,7 +2001,8 @@ void CMetricDataGathererTest::testVarp() { TFeatureSizeFeatureDataPrVecPrVec featureData; gatherer.featureData(startTime, bucketLength, featureData); LOG_DEBUG(<< "featureData = " << core::CContainerPrinter::print(featureData)); - CSample::TDouble1Vec v = featureData[0].second[0].second.s_BucketValue->value(); + CSample::TDouble1Vec v = + featureData[0].second[0].second.s_BucketValue->value(); double expectedMean = 0; double expectedVariance = ::variance(values, expectedMean); CPPUNIT_ASSERT_DOUBLES_EQUAL(v[0], expectedVariance, 0.0001); @@ -2039,28 +2031,19 @@ void CMetricDataGathererTest::testVarp() { TStrVec influencerFieldNames; influencerFieldNames.push_back("i"); influencerFieldNames.push_back("j"); - CDataGatherer gatherer(model_t::E_Metric, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - influencerFieldNames, - false, - KEY, - features, - startTime, - 2u); + CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, influencerFieldNames, + false, KEY, features, startTime, 2u); CPPUNIT_ASSERT(!gatherer.isPopulation()); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson(person, gatherer, m_ResourceMonitor, influencerFieldNames.size())); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson(person, gatherer, m_ResourceMonitor, + influencerFieldNames.size())); TStrVec testInf(gatherer.beginInfluencers(), gatherer.endInfluencers()); LOG_DEBUG(<< "Influencer fields: " << core::CContainerPrinter::print(testInf)); - LOG_DEBUG(<< "FOI: " << core::CContainerPrinter::print(gatherer.fieldsOfInterest())); + LOG_DEBUG(<< "FOI: " + << core::CContainerPrinter::print(gatherer.fieldsOfInterest())); CPPUNIT_ASSERT_EQUAL(std::size_t(1), gatherer.numberFeatures()); { @@ -2074,7 +2057,8 @@ void CMetricDataGathererTest::testVarp() { addArrival(gatherer, m_ResourceMonitor, startTime + 600, person, 5.1, inf1, ""); addArrival(gatherer, m_ResourceMonitor, startTime + 650, person, 1.0, "", ""); addArrival(gatherer, m_ResourceMonitor, startTime + 700, person, 5.0, inf1, ""); - addArrival(gatherer, m_ResourceMonitor, startTime + 800, person, 12.12, inf1, inf2); + addArrival(gatherer, m_ResourceMonitor, startTime + 800, person, + 12.12, inf1, inf2); addArrival(gatherer, m_ResourceMonitor, startTime + 900, person, 5.2, inf1, ""); addArrival(gatherer, m_ResourceMonitor, startTime + 950, person, 5.0, inf1, inf3); @@ -2084,7 +2068,8 @@ void CMetricDataGathererTest::testVarp() { TFeatureSizeFeatureDataPrVecPr fsfd = featureData[0]; CPPUNIT_ASSERT_EQUAL(model_t::E_IndividualVarianceByPerson, fsfd.first); - CSample::TDouble1Vec v = featureData[0].second[0].second.s_BucketValue->value(); + CSample::TDouble1Vec v = + featureData[0].second[0].second.s_BucketValue->value(); values.clear(); values.push_back(5.0); values.push_back(5.5); @@ -2155,29 +2140,35 @@ void CMetricDataGathererTest::testVarp() { CppUnit::Test* CMetricDataGathererTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CMetricDataGathererTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CMetricDataGathererTest::singleSeriesTests", - &CMetricDataGathererTest::singleSeriesTests)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMetricDataGathererTest::multipleSeriesTests", - &CMetricDataGathererTest::multipleSeriesTests)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMetricDataGathererTest::testSampleCount", - &CMetricDataGathererTest::testSampleCount)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMetricDataGathererTest::testRemovePeople", - &CMetricDataGathererTest::testRemovePeople)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CMetricDataGathererTest::testSum", &CMetricDataGathererTest::testSum)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMetricDataGathererTest::singleSeriesOutOfOrderTests", - &CMetricDataGathererTest::singleSeriesOutOfOrderTests)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMetricDataGathererTest::testResetBucketGivenSingleSeries", - &CMetricDataGathererTest::testResetBucketGivenSingleSeries)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMetricDataGathererTest::testResetBucketGivenMultipleSeries", - &CMetricDataGathererTest::testResetBucketGivenMultipleSeries)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMetricDataGathererTest::testInfluenceStatistics", - &CMetricDataGathererTest::testInfluenceStatistics)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMetricDataGathererTest::testMultivariate", - &CMetricDataGathererTest::testMultivariate)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMetricDataGathererTest::testStatisticsPersist", - &CMetricDataGathererTest::testStatisticsPersist)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CMetricDataGathererTest::testVarp", &CMetricDataGathererTest::testVarp)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricDataGathererTest::singleSeriesTests", &CMetricDataGathererTest::singleSeriesTests)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricDataGathererTest::multipleSeriesTests", + &CMetricDataGathererTest::multipleSeriesTests)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricDataGathererTest::testSampleCount", &CMetricDataGathererTest::testSampleCount)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricDataGathererTest::testRemovePeople", &CMetricDataGathererTest::testRemovePeople)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricDataGathererTest::testSum", &CMetricDataGathererTest::testSum)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricDataGathererTest::singleSeriesOutOfOrderTests", + &CMetricDataGathererTest::singleSeriesOutOfOrderTests)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricDataGathererTest::testResetBucketGivenSingleSeries", + &CMetricDataGathererTest::testResetBucketGivenSingleSeries)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricDataGathererTest::testResetBucketGivenMultipleSeries", + &CMetricDataGathererTest::testResetBucketGivenMultipleSeries)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricDataGathererTest::testInfluenceStatistics", + &CMetricDataGathererTest::testInfluenceStatistics)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricDataGathererTest::testMultivariate", &CMetricDataGathererTest::testMultivariate)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricDataGathererTest::testStatisticsPersist", + &CMetricDataGathererTest::testStatisticsPersist)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricDataGathererTest::testVarp", &CMetricDataGathererTest::testVarp)); return suiteOfTests; } diff --git a/lib/model/unittest/CMetricModelTest.cc b/lib/model/unittest/CMetricModelTest.cc index 513e489beb..641fae4f7d 100644 --- a/lib/model/unittest/CMetricModelTest.cc +++ b/lib/model/unittest/CMetricModelTest.cc @@ -69,7 +69,8 @@ using TTimeDoublePr = std::pair; using TOptionalTimeDoublePr = boost::optional; using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; using TMinAccumulator = maths::CBasicStatistics::COrderStatisticsStack; -using TMaxAccumulator = maths::CBasicStatistics::COrderStatisticsStack>; +using TMaxAccumulator = + maths::CBasicStatistics::COrderStatisticsStack>; using TMathsModelPtr = boost::shared_ptr; using TPriorPtr = boost::shared_ptr; using TMultivariatePriorPtr = boost::shared_ptr; @@ -88,7 +89,9 @@ const std::string EMPTY_STRING; class CTimeLess { public: - bool operator()(const CEventData& lhs, const CEventData& rhs) const { return lhs.time() < rhs.time(); } + bool operator()(const CEventData& lhs, const CEventData& rhs) const { + return lhs.time() < rhs.time(); + } }; void makeModel(CMetricModelFactory& factory, @@ -112,7 +115,9 @@ void makeModel(CMetricModelFactory& factory, CPPUNIT_ASSERT_EQUAL(bucketLength, model->bucketLength()); } -std::size_t addPerson(const std::string& p, const CModelFactory::TDataGathererPtr& gatherer, CResourceMonitor& resourceMonitor) { +std::size_t addPerson(const std::string& p, + const CModelFactory::TDataGathererPtr& gatherer, + CResourceMonitor& resourceMonitor) { CDataGatherer::TStrCPtrVec person; person.push_back(&p); person.resize(gatherer->fieldsOfInterest().size(), nullptr); @@ -140,7 +145,8 @@ void addArrival(CDataGatherer& gatherer, if (count) { fieldValues.push_back(&(count.get())); } - std::string valueAsString(core::CStringUtils::typeToStringPrecise(value, core::CIEEE754::E_DoublePrecision)); + std::string valueAsString(core::CStringUtils::typeToStringPrecise( + value, core::CIEEE754::E_DoublePrecision)); fieldValues.push_back(&valueAsString); CEventData eventData; @@ -177,7 +183,10 @@ void addArrival(CDataGatherer& gatherer, gatherer.addArrival(fieldValues, eventData, resourceMonitor); } -CEventData makeEventData(core_t::TTime time, std::size_t pid, double value, const TOptionalStr& influence = TOptionalStr()) { +CEventData makeEventData(core_t::TTime time, + std::size_t pid, + double value, + const TOptionalStr& influence = TOptionalStr()) { CEventData result; result.time(time); result.person(pid); @@ -187,7 +196,10 @@ CEventData makeEventData(core_t::TTime time, std::size_t pid, double value, cons return result; } -TDouble1Vec featureData(const CMetricModel& model, model_t::EFeature feature, std::size_t pid, core_t::TTime time) { +TDouble1Vec featureData(const CMetricModel& model, + model_t::EFeature feature, + std::size_t pid, + core_t::TTime time) { const CMetricModel::TFeatureData* data = model.featureData(feature, pid, time); if (!data) { return TDouble1Vec(); @@ -195,7 +207,10 @@ TDouble1Vec featureData(const CMetricModel& model, model_t::EFeature feature, st return data->s_BucketValue ? data->s_BucketValue->value() : TDouble1Vec(); } -TDouble1Vec multivariateFeatureData(const CMetricModel& model, model_t::EFeature feature, std::size_t pid, core_t::TTime time) { +TDouble1Vec multivariateFeatureData(const CMetricModel& model, + model_t::EFeature feature, + std::size_t pid, + core_t::TTime time) { const CMetricModel::TFeatureData* data = model.featureData(feature, pid, time); if (!data) { return TDouble1Vec(); @@ -212,11 +227,13 @@ void processBucket(core_t::TTime time, CMetricModel& model, SAnnotatedProbability& probability) { for (std::size_t i = 0u; i < bucket.size(); ++i) { - addArrival(gatherer, resourceMonitor, time, "p", bucket[i], TOptionalStr(influencerValues[i])); + addArrival(gatherer, resourceMonitor, time, "p", bucket[i], + TOptionalStr(influencerValues[i])); } model.sample(time, time + bucketLength, resourceMonitor); CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); - model.computeProbability(0 /*pid*/, time, time + bucketLength, partitioningFields, 1, probability); + model.computeProbability(0 /*pid*/, time, time + bucketLength, + partitioningFields, 1, probability); LOG_DEBUG(<< "influences = " << core::CContainerPrinter::print(probability.s_Influences)); } @@ -238,7 +255,8 @@ void processBucket(core_t::TTime time, fieldValues.push_back(&person2); } - std::string valueAsString(core::CStringUtils::typeToStringPrecise(bucket[i], core::CIEEE754::E_DoublePrecision)); + std::string valueAsString(core::CStringUtils::typeToStringPrecise( + bucket[i], core::CIEEE754::E_DoublePrecision)); fieldValues.push_back(&valueAsString); CEventData eventData; @@ -248,8 +266,10 @@ void processBucket(core_t::TTime time, } model.sample(time, time + bucketLength, resourceMonitor); CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); - model.computeProbability(0 /*pid*/, time, time + bucketLength, partitioningFields, 1, probability); - model.computeProbability(1 /*pid*/, time, time + bucketLength, partitioningFields, 1, probability2); + model.computeProbability(0 /*pid*/, time, time + bucketLength, + partitioningFields, 1, probability); + model.computeProbability(1 /*pid*/, time, time + bucketLength, + partitioningFields, 1, probability2); } const maths_t::TWeightStyleVec COUNT_WEIGHT(1, maths_t::E_SampleCountWeight); @@ -269,20 +289,14 @@ void CMetricModelTest::testSample() { // Check basic sampling. { - TTimeDoublePr data[] = {TTimeDoublePr(49, 1.5), - TTimeDoublePr(60, 1.3), - TTimeDoublePr(61, 1.3), - TTimeDoublePr(62, 1.6), - TTimeDoublePr(65, 1.7), - TTimeDoublePr(66, 1.33), - TTimeDoublePr(68, 1.5), - TTimeDoublePr(84, 1.58), - TTimeDoublePr(87, 1.69), - TTimeDoublePr(157, 1.6), - TTimeDoublePr(164, 1.66), - TTimeDoublePr(199, 1.28), - TTimeDoublePr(202, 1.2), - TTimeDoublePr(204, 1.5)}; + TTimeDoublePr data[] = { + TTimeDoublePr(49, 1.5), TTimeDoublePr(60, 1.3), + TTimeDoublePr(61, 1.3), TTimeDoublePr(62, 1.6), + TTimeDoublePr(65, 1.7), TTimeDoublePr(66, 1.33), + TTimeDoublePr(68, 1.5), TTimeDoublePr(84, 1.58), + TTimeDoublePr(87, 1.69), TTimeDoublePr(157, 1.6), + TTimeDoublePr(164, 1.66), TTimeDoublePr(199, 1.28), + TTimeDoublePr(202, 1.2), TTimeDoublePr(204, 1.5)}; unsigned int sampleCounts[] = {2, 1}; unsigned int expectedSampleCounts[] = {2, 1}; @@ -294,7 +308,8 @@ void CMetricModelTest::testSample() { features.push_back(model_t::E_IndividualMaxByPerson); CModelFactory::TDataGathererPtr gatherer; CAnomalyDetectorModel::TModelPtr model_; - makeModel(factory, features, startTime, bucketLength, gatherer, model_, &sampleCounts[i]); + makeModel(factory, features, startTime, bucketLength, gatherer, + model_, &sampleCounts[i]); CMetricModel& model = static_cast(*model_.get()); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p", gatherer, m_ResourceMonitor)); @@ -317,17 +332,22 @@ void CMetricModelTest::testSample() { TDouble1Vec expectedMaxSamples; std::size_t numberSamples = 0; - TMathsModelPtr expectedMeanModel = factory.defaultFeatureModel(model_t::E_IndividualMeanByPerson, bucketLength, 0.4, true); - TMathsModelPtr expectedMinModel = factory.defaultFeatureModel(model_t::E_IndividualMinByPerson, bucketLength, 0.4, true); - TMathsModelPtr expectedMaxModel = factory.defaultFeatureModel(model_t::E_IndividualMaxByPerson, bucketLength, 0.4, true); + TMathsModelPtr expectedMeanModel = factory.defaultFeatureModel( + model_t::E_IndividualMeanByPerson, bucketLength, 0.4, true); + TMathsModelPtr expectedMinModel = factory.defaultFeatureModel( + model_t::E_IndividualMinByPerson, bucketLength, 0.4, true); + TMathsModelPtr expectedMaxModel = factory.defaultFeatureModel( + model_t::E_IndividualMaxByPerson, bucketLength, 0.4, true); std::size_t j = 0; core_t::TTime time = startTime; for (;;) { if (j < boost::size(data) && data[j].first < time + bucketLength) { - LOG_DEBUG(<< "Adding " << data[j].second << " at " << data[j].first); + LOG_DEBUG(<< "Adding " << data[j].second << " at " + << data[j].first); - addArrival(*gatherer, m_ResourceMonitor, data[j].first, "p", data[j].second); + addArrival(*gatherer, m_ResourceMonitor, data[j].first, "p", + data[j].second); ++expectedCount; expectedMean.add(data[j].second); @@ -343,8 +363,10 @@ void CMetricModelTest::testSample() { if (j % expectedSampleCounts[i] == 0) { ++numberSamples; - expectedSampleTimes.push_back(maths::CBasicStatistics::mean(expectedSampleTime)); - expectedMeanSamples.push_back(maths::CBasicStatistics::mean(expectedMeanSample)); + expectedSampleTimes.push_back( + maths::CBasicStatistics::mean(expectedSampleTime)); + expectedMeanSamples.push_back( + maths::CBasicStatistics::mean(expectedMeanSample)); expectedMinSamples.push_back(expectedMinSample[0]); expectedMaxSamples.push_back(expectedMaxSample[0]); expectedSampleTime = TMeanAccumulator(); @@ -360,12 +382,13 @@ void CMetricModelTest::testSample() { expectedBaselineMean.add(maths::CBasicStatistics::mean(expectedMean)); } if (numberSamples > 0) { - LOG_DEBUG(<< "Adding mean samples = " << core::CContainerPrinter::print(expectedMeanSamples) - << ", min samples = " << core::CContainerPrinter::print(expectedMinSamples) - << ", max samples = " << core::CContainerPrinter::print(expectedMaxSamples)); + LOG_DEBUG(<< "Adding mean samples = " + << core::CContainerPrinter::print(expectedMeanSamples) << ", min samples = " + << core::CContainerPrinter::print(expectedMinSamples) << ", max samples = " + << core::CContainerPrinter::print(expectedMaxSamples)); - maths::CModelAddSamplesParams::TDouble2Vec4VecVec weights(numberSamples, - maths::CConstantWeights::unit(1)); + maths::CModelAddSamplesParams::TDouble2Vec4VecVec weights( + numberSamples, maths::CConstantWeights::unit(1)); maths::CModelAddSamplesParams params_; params_.integer(false) .nonNegative(true) @@ -380,10 +403,17 @@ void CMetricModelTest::testSample() { for (std::size_t k = 0u; k < numberSamples; ++k) { // We round to the nearest integer time (note this has to match // the behaviour of CMetricPartialStatistic::time). - core_t::TTime sampleTime{static_cast(expectedSampleTimes[k] + 0.5)}; - expectedMeanSamples_.emplace_back(sampleTime, TDouble2Vec{expectedMeanSamples[k]}, std::size_t(0)); - expectedMinSamples_.emplace_back(sampleTime, TDouble2Vec{expectedMinSamples[k]}, std::size_t(0)); - expectedMaxSamples_.emplace_back(sampleTime, TDouble2Vec{expectedMaxSamples[k]}, std::size_t(0)); + core_t::TTime sampleTime{static_cast( + expectedSampleTimes[k] + 0.5)}; + expectedMeanSamples_.emplace_back( + sampleTime, TDouble2Vec{expectedMeanSamples[k]}, + std::size_t(0)); + expectedMinSamples_.emplace_back( + sampleTime, TDouble2Vec{expectedMinSamples[k]}, + std::size_t(0)); + expectedMaxSamples_.emplace_back( + sampleTime, TDouble2Vec{expectedMaxSamples[k]}, + std::size_t(0)); } expectedMeanModel->addSamples(params_, expectedMeanSamples_); expectedMinModel->addSamples(params_, expectedMinSamples_); @@ -395,40 +425,60 @@ void CMetricModelTest::testSample() { expectedMaxSamples.clear(); } - model_t::CResultType type(model_t::CResultType::E_Unconditional | model_t::CResultType::E_Final); + model_t::CResultType type(model_t::CResultType::E_Unconditional | + model_t::CResultType::E_Final); TOptionalUInt64 currentCount = model.currentBucketCount(0, time); - TDouble1Vec bucketMean = model.currentBucketValue(model_t::E_IndividualMeanByPerson, 0, 0, time); - TDouble1Vec baselineMean = model.baselineBucketMean(model_t::E_IndividualMeanByPerson, 0, 0, type, NO_CORRELATES, time); - - LOG_DEBUG(<< "bucket count = " << core::CContainerPrinter::print(currentCount)); - LOG_DEBUG(<< "current bucket mean = " << core::CContainerPrinter::print(bucketMean) - << ", expected baseline bucket mean = " << maths::CBasicStatistics::mean(expectedBaselineMean) - << ", baseline bucket mean = " << core::CContainerPrinter::print(baselineMean)); + TDouble1Vec bucketMean = model.currentBucketValue( + model_t::E_IndividualMeanByPerson, 0, 0, time); + TDouble1Vec baselineMean = model.baselineBucketMean( + model_t::E_IndividualMeanByPerson, 0, 0, type, NO_CORRELATES, time); + + LOG_DEBUG(<< "bucket count = " + << core::CContainerPrinter::print(currentCount)); + LOG_DEBUG(<< "current bucket mean = " + << core::CContainerPrinter::print(bucketMean) << ", expected baseline bucket mean = " + << maths::CBasicStatistics::mean(expectedBaselineMean) << ", baseline bucket mean = " + << core::CContainerPrinter::print(baselineMean)); CPPUNIT_ASSERT(currentCount); CPPUNIT_ASSERT_EQUAL(expectedCount, *currentCount); - TDouble1Vec mean = maths::CBasicStatistics::count(expectedMean) > 0.0 - ? TDouble1Vec(1, maths::CBasicStatistics::mean(expectedMean)) - : TDouble1Vec(); - TDouble1Vec min = expectedMin.count() > 0 ? TDouble1Vec(1, expectedMin[0]) : TDouble1Vec(); - TDouble1Vec max = expectedMax.count() > 0 ? TDouble1Vec(1, expectedMax[0]) : TDouble1Vec(); + TDouble1Vec mean = + maths::CBasicStatistics::count(expectedMean) > 0.0 + ? TDouble1Vec(1, maths::CBasicStatistics::mean(expectedMean)) + : TDouble1Vec(); + TDouble1Vec min = expectedMin.count() > 0 + ? TDouble1Vec(1, expectedMin[0]) + : TDouble1Vec(); + TDouble1Vec max = expectedMax.count() > 0 + ? TDouble1Vec(1, expectedMax[0]) + : TDouble1Vec(); CPPUNIT_ASSERT(mean == bucketMean); if (!baselineMean.empty()) { - baselineMeanError.add(std::fabs(baselineMean[0] - maths::CBasicStatistics::mean(expectedBaselineMean))); + baselineMeanError.add(std::fabs( + baselineMean[0] - maths::CBasicStatistics::mean(expectedBaselineMean))); } - CPPUNIT_ASSERT(mean == featureData(model, model_t::E_IndividualMeanByPerson, 0, time)); - CPPUNIT_ASSERT(min == featureData(model, model_t::E_IndividualMinByPerson, 0, time)); - CPPUNIT_ASSERT(max == featureData(model, model_t::E_IndividualMaxByPerson, 0, time)); + CPPUNIT_ASSERT(mean == featureData(model, model_t::E_IndividualMeanByPerson, + 0, time)); + CPPUNIT_ASSERT(min == featureData(model, model_t::E_IndividualMinByPerson, + 0, time)); + CPPUNIT_ASSERT(max == featureData(model, model_t::E_IndividualMaxByPerson, + 0, time)); CPPUNIT_ASSERT_EQUAL(expectedMeanModel->checksum(), - model.details()->model(model_t::E_IndividualMeanByPerson, 0)->checksum()); + model.details() + ->model(model_t::E_IndividualMeanByPerson, 0) + ->checksum()); CPPUNIT_ASSERT_EQUAL(expectedMinModel->checksum(), - model.details()->model(model_t::E_IndividualMinByPerson, 0)->checksum()); + model.details() + ->model(model_t::E_IndividualMinByPerson, 0) + ->checksum()); CPPUNIT_ASSERT_EQUAL(expectedMaxModel->checksum(), - model.details()->model(model_t::E_IndividualMaxByPerson, 0)->checksum()); + model.details() + ->model(model_t::E_IndividualMaxByPerson, 0) + ->checksum()); // Test persistence. (We check for idempotency.) std::string origXml; @@ -444,7 +494,8 @@ void CMetricModelTest::testSample() { core::CRapidXmlStateRestoreTraverser traverser(parser); CModelFactory::SModelInitializationData initData(gatherer); - CAnomalyDetectorModel::TModelPtr restoredModel(factory.makeModel(initData, traverser)); + CAnomalyDetectorModel::TModelPtr restoredModel( + factory.makeModel(initData, traverser)); // The XML representation of the new filter should be the same as the original std::string newXml; @@ -473,7 +524,8 @@ void CMetricModelTest::testSample() { time += bucketLength; } } - LOG_DEBUG(<< "baseline mean error = " << maths::CBasicStatistics::mean(baselineMeanError)); + LOG_DEBUG(<< "baseline mean error = " + << maths::CBasicStatistics::mean(baselineMeanError)); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(baselineMeanError) < 0.25); } } @@ -494,20 +546,11 @@ void CMetricModelTest::testMultivariateSample() { params.s_MaximumUpdatesPerBucket = 0.0; CMetricModelFactory factory(params); - double data_[][3] = {{49, 1.5, 1.1}, - {60, 1.3, 1.2}, - {61, 1.3, 2.1}, - {62, 1.6, 1.5}, - {65, 1.7, 1.4}, - {66, 1.33, 1.6}, - {68, 1.5, 1.37}, - {84, 1.58, 1.42}, - {87, 1.6, 1.6}, - {157, 1.6, 1.6}, - {164, 1.66, 1.55}, - {199, 1.28, 1.4}, - {202, 1.3, 1.1}, - {204, 1.5, 1.8}}; + double data_[][3] = {{49, 1.5, 1.1}, {60, 1.3, 1.2}, {61, 1.3, 2.1}, + {62, 1.6, 1.5}, {65, 1.7, 1.4}, {66, 1.33, 1.6}, + {68, 1.5, 1.37}, {84, 1.58, 1.42}, {87, 1.6, 1.6}, + {157, 1.6, 1.6}, {164, 1.66, 1.55}, {199, 1.28, 1.4}, + {202, 1.3, 1.1}, {204, 1.5, 1.8}}; TTimeDouble2AryPrVec data; for (std::size_t i = 0u; i < boost::size(data_); ++i) { boost::array values = {{data_[i][1], data_[i][2]}}; @@ -523,7 +566,8 @@ void CMetricModelTest::testMultivariateSample() { CDataGatherer::TFeatureVec features(1, model_t::E_IndividualMeanLatLongByPerson); CModelFactory::TDataGathererPtr gatherer; CAnomalyDetectorModel::TModelPtr model_; - makeModel(factory, features, startTime, bucketLength, gatherer, model_, &sampleCounts[i]); + makeModel(factory, features, startTime, bucketLength, gatherer, model_, + &sampleCounts[i]); CMetricModel& model = static_cast(*model_.get()); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p", gatherer, m_ResourceMonitor)); @@ -537,15 +581,18 @@ void CMetricModelTest::testMultivariateSample() { TMean2Accumulator expectedLatLongSample; std::size_t numberSamples = 0; TDoubleVecVec expectedLatLongSamples; - TMultivariatePriorPtr expectedMeanPrior = factory.defaultMultivariatePrior(model_t::E_IndividualMeanLatLongByPerson); + TMultivariatePriorPtr expectedMeanPrior = + factory.defaultMultivariatePrior(model_t::E_IndividualMeanLatLongByPerson); std::size_t j = 0; core_t::TTime time = startTime; for (;;) { if (j < data.size() && data[j].first < time + bucketLength) { - LOG_DEBUG(<< "Adding " << data[j].second[0] << "," << data[j].second[1] << " at " << data[j].first); + LOG_DEBUG(<< "Adding " << data[j].second[0] << "," + << data[j].second[1] << " at " << data[j].first); - addArrival(*gatherer, m_ResourceMonitor, data[j].first, "p", data[j].second[0], data[j].second[1]); + addArrival(*gatherer, m_ResourceMonitor, data[j].first, "p", + data[j].second[0], data[j].second[1]); ++expectedCount; expectedLatLong.add(TVector2(data[j].second)); @@ -553,8 +600,9 @@ void CMetricModelTest::testMultivariateSample() { if (++j % expectedSampleCounts[i] == 0) { ++numberSamples; - expectedLatLongSamples.push_back(TDoubleVec(maths::CBasicStatistics::mean(expectedLatLongSample).begin(), - maths::CBasicStatistics::mean(expectedLatLongSample).end())); + expectedLatLongSamples.push_back(TDoubleVec( + maths::CBasicStatistics::mean(expectedLatLongSample).begin(), + maths::CBasicStatistics::mean(expectedLatLongSample).end())); expectedLatLongSample = TMean2Accumulator(); } } else { @@ -565,26 +613,34 @@ void CMetricModelTest::testMultivariateSample() { expectedBaselineLatLong.add(maths::CBasicStatistics::mean(expectedLatLong)); } if (numberSamples > 0) { - std::sort(expectedLatLongSamples.begin(), expectedLatLongSamples.end()); - LOG_DEBUG(<< "Adding mean samples = " << core::CContainerPrinter::print(expectedLatLongSamples)); + std::sort(expectedLatLongSamples.begin(), + expectedLatLongSamples.end()); + LOG_DEBUG(<< "Adding mean samples = " + << core::CContainerPrinter::print(expectedLatLongSamples)); expectedMeanPrior->dataType(maths_t::E_ContinuousData); - expectedMeanPrior->addSamples(COUNT_WEIGHT, - expectedLatLongSamples, - TDoubleVecVecVec(expectedLatLongSamples.size(), TDoubleVecVec(1, TDoubleVec(2, 1.0)))); + expectedMeanPrior->addSamples( + COUNT_WEIGHT, expectedLatLongSamples, + TDoubleVecVecVec(expectedLatLongSamples.size(), + TDoubleVecVec(1, TDoubleVec(2, 1.0)))); numberSamples = 0u; expectedLatLongSamples.clear(); } - model_t::CResultType type(model_t::CResultType::E_Unconditional | model_t::CResultType::E_Final); + model_t::CResultType type(model_t::CResultType::E_Unconditional | + model_t::CResultType::E_Final); TOptionalUInt64 currentCount = model.currentBucketCount(0, time); - TDouble1Vec bucketLatLong = model.currentBucketValue(model_t::E_IndividualMeanLatLongByPerson, 0, 0, time); + TDouble1Vec bucketLatLong = model.currentBucketValue( + model_t::E_IndividualMeanLatLongByPerson, 0, 0, time); TDouble1Vec baselineLatLong = - model.baselineBucketMean(model_t::E_IndividualMeanLatLongByPerson, 0, 0, type, NO_CORRELATES, time); + model.baselineBucketMean(model_t::E_IndividualMeanLatLongByPerson, + 0, 0, type, NO_CORRELATES, time); - LOG_DEBUG(<< "bucket count = " << core::CContainerPrinter::print(currentCount)); - LOG_DEBUG(<< "current bucket mean = " << core::CContainerPrinter::print(bucketLatLong) - << ", expected baseline bucket mean = " << maths::CBasicStatistics::mean(expectedBaselineLatLong) - << ", baseline bucket mean = " << core::CContainerPrinter::print(baselineLatLong)); + LOG_DEBUG(<< "bucket count = " + << core::CContainerPrinter::print(currentCount)); + LOG_DEBUG(<< "current bucket mean = " + << core::CContainerPrinter::print(bucketLatLong) << ", expected baseline bucket mean = " + << maths::CBasicStatistics::mean(expectedBaselineLatLong) << ", baseline bucket mean = " + << core::CContainerPrinter::print(baselineLatLong)); CPPUNIT_ASSERT(currentCount); CPPUNIT_ASSERT_EQUAL(expectedCount, *currentCount); @@ -596,15 +652,18 @@ void CMetricModelTest::testMultivariateSample() { } CPPUNIT_ASSERT(latLong == bucketLatLong); if (!baselineLatLong.empty()) { - baselineLatLongError.add( - maths::fabs(TVector2(baselineLatLong) - maths::CBasicStatistics::mean(expectedBaselineLatLong))); + baselineLatLongError.add(maths::fabs( + TVector2(baselineLatLong) - + maths::CBasicStatistics::mean(expectedBaselineLatLong))); } - CPPUNIT_ASSERT(latLong == multivariateFeatureData(model, model_t::E_IndividualMeanLatLongByPerson, 0, time)); - CPPUNIT_ASSERT_EQUAL(expectedMeanPrior->checksum(), - dynamic_cast( - model.details()->model(model_t::E_IndividualMeanLatLongByPerson, 0)) - ->residualModel() - .checksum()); + CPPUNIT_ASSERT(latLong == multivariateFeatureData(model, model_t::E_IndividualMeanLatLongByPerson, + 0, time)); + CPPUNIT_ASSERT_EQUAL( + expectedMeanPrior->checksum(), + dynamic_cast( + model.details()->model(model_t::E_IndividualMeanLatLongByPerson, 0)) + ->residualModel() + .checksum()); // Test persistence. (We check for idempotency.) std::string origXml; @@ -620,7 +679,8 @@ void CMetricModelTest::testMultivariateSample() { core::CRapidXmlStateRestoreTraverser traverser(parser); CModelFactory::SModelInitializationData initData(gatherer); - CAnomalyDetectorModel::TModelPtr restoredModel(factory.makeModel(initData, traverser)); + CAnomalyDetectorModel::TModelPtr restoredModel( + factory.makeModel(initData, traverser)); // The XML representation of the new filter should be the same as the original std::string newXml; @@ -647,7 +707,8 @@ void CMetricModelTest::testMultivariateSample() { time += bucketLength; } } - LOG_DEBUG(<< "baseline mean error = " << maths::CBasicStatistics::mean(baselineLatLongError)); + LOG_DEBUG(<< "baseline mean error = " + << maths::CBasicStatistics::mean(baselineLatLongError)); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(baselineLatLongError)(0) < 0.25); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(baselineLatLongError)(1) < 0.25); } @@ -686,20 +747,20 @@ void CMetricModelTest::testProbabilityCalculationForMetric() { TDoubleVec values; rng.generateNormalSamples(mean, variance, bucketCounts[i], values); LOG_DEBUG(<< "values = " << core::CContainerPrinter::print(values)); - LOG_DEBUG(<< "i = " << i << ", anomalousBucket = " << anomalousBucket << ", offset = " << (i == anomalousBucket ? anomaly : 0.0)); + LOG_DEBUG(<< "i = " << i << ", anomalousBucket = " << anomalousBucket + << ", offset = " << (i == anomalousBucket ? anomaly : 0.0)); for (std::size_t j = 0u; j < values.size(); ++j) { - addArrival(*gatherer, - m_ResourceMonitor, - time + static_cast(j), - "p", + addArrival(*gatherer, m_ResourceMonitor, + time + static_cast(j), "p", values[j] + (i == anomalousBucket ? anomaly : 0.0)); } model.sample(time, time + bucketLength, m_ResourceMonitor); CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); SAnnotatedProbability annotatedProbability; - if (model.computeProbability(0 /*pid*/, time, time + bucketLength, partitioningFields, 1, annotatedProbability) == false) { + if (model.computeProbability(0 /*pid*/, time, time + bucketLength, partitioningFields, + 1, annotatedProbability) == false) { continue; } LOG_DEBUG(<< "probability = " << annotatedProbability.s_Probability); @@ -710,7 +771,9 @@ void CMetricModelTest::testProbabilityCalculationForMetric() { } minProbabilities.sort(); - LOG_DEBUG(<< "minProbabilities = " << core::CContainerPrinter::print(minProbabilities.begin(), minProbabilities.end())); + LOG_DEBUG(<< "minProbabilities = " + << core::CContainerPrinter::print(minProbabilities.begin(), + minProbabilities.end())); CPPUNIT_ASSERT_EQUAL(anomalousBucket, minProbabilities[0].second); CPPUNIT_ASSERT(minProbabilities[0].first / minProbabilities[1].first < 0.05); } @@ -755,14 +818,16 @@ void CMetricModelTest::testProbabilityCalculationForMedian() { LOG_DEBUG(<< "values = " << core::CContainerPrinter::print(values)); for (std::size_t j = 0u; j < values.size(); ++j) { - addArrival(*gatherer, m_ResourceMonitor, time + static_cast(j), "p", values[j]); + addArrival(*gatherer, m_ResourceMonitor, + time + static_cast(j), "p", values[j]); } model.sample(time, time + bucketLength, m_ResourceMonitor); CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); SAnnotatedProbability annotatedProbability; - if (model.computeProbability(0 /*pid*/, time, time + bucketLength, partitioningFields, 1, annotatedProbability) == false) { + if (model.computeProbability(0 /*pid*/, time, time + bucketLength, partitioningFields, + 1, annotatedProbability) == false) { continue; } @@ -774,12 +839,15 @@ void CMetricModelTest::testProbabilityCalculationForMedian() { } minProbabilities.sort(); - LOG_DEBUG(<< "minProbabilities = " << core::CContainerPrinter::print(minProbabilities.begin(), minProbabilities.end())); + LOG_DEBUG(<< "minProbabilities = " + << core::CContainerPrinter::print(minProbabilities.begin(), + minProbabilities.end())); CPPUNIT_ASSERT_EQUAL(anomalousBucket, minProbabilities[0].second); CPPUNIT_ASSERT(minProbabilities[0].first / minProbabilities[1].first < 0.05); std::size_t pid(0); - const CMetricModel::TFeatureData* fd = model.featureData(ml::model_t::E_IndividualMedianByPerson, pid, time - bucketLength); + const CMetricModel::TFeatureData* fd = model.featureData( + ml::model_t::E_IndividualMedianByPerson, pid, time - bucketLength); // assert there is only 1 value in the last bucket and its the median CPPUNIT_ASSERT_EQUAL(fd->s_BucketValue->value()[0], mean * 3.0); @@ -827,20 +895,24 @@ void CMetricModelTest::testProbabilityCalculationForLowMean() { LOG_DEBUG(<< "values = " << core::CContainerPrinter::print(values)); for (std::size_t j = 0u; j < values.size(); ++j) { - addArrival(*gatherer, m_ResourceMonitor, time + static_cast(j), "p", values[j]); + addArrival(*gatherer, m_ResourceMonitor, + time + static_cast(j), "p", values[j]); } model.sample(time, time + bucketLength, m_ResourceMonitor); CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); SAnnotatedProbability annotatedProbability; - CPPUNIT_ASSERT(model.computeProbability(0 /*pid*/, time, time + bucketLength, partitioningFields, 1, annotatedProbability)); + CPPUNIT_ASSERT(model.computeProbability(0 /*pid*/, time, time + bucketLength, partitioningFields, + 1, annotatedProbability)); LOG_DEBUG(<< "probability = " << annotatedProbability.s_Probability); probabilities.push_back(annotatedProbability.s_Probability); time += bucketLength; } - LOG_DEBUG(<< "probabilities = " << core::CContainerPrinter::print(probabilities.begin(), probabilities.end())); + LOG_DEBUG(<< "probabilities = " + << core::CContainerPrinter::print(probabilities.begin(), + probabilities.end())); CPPUNIT_ASSERT(probabilities[lowMeanBucket] < 0.01); CPPUNIT_ASSERT(probabilities[highMeanBucket] > 0.1); @@ -887,13 +959,15 @@ void CMetricModelTest::testProbabilityCalculationForHighMean() { LOG_DEBUG(<< "values = " << core::CContainerPrinter::print(values)); for (std::size_t j = 0u; j < values.size(); ++j) { - addArrival(*gatherer, m_ResourceMonitor, time + static_cast(j), "p", values[j]); + addArrival(*gatherer, m_ResourceMonitor, + time + static_cast(j), "p", values[j]); } model.sample(time, time + bucketLength, m_ResourceMonitor); CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); SAnnotatedProbability annotatedProbability; - CPPUNIT_ASSERT(model.computeProbability(0 /*pid*/, time, time + bucketLength, partitioningFields, 1, annotatedProbability)); + CPPUNIT_ASSERT(model.computeProbability(0 /*pid*/, time, time + bucketLength, partitioningFields, + 1, annotatedProbability)); LOG_DEBUG(<< "probability = " << annotatedProbability.s_Probability); probabilities.push_back(annotatedProbability.s_Probability); @@ -947,13 +1021,15 @@ void CMetricModelTest::testProbabilityCalculationForLowSum() { LOG_DEBUG(<< "values = " << core::CContainerPrinter::print(values)); for (std::size_t j = 0u; j < values.size(); ++j) { - addArrival(*gatherer, m_ResourceMonitor, time + static_cast(j), "p", values[j]); + addArrival(*gatherer, m_ResourceMonitor, + time + static_cast(j), "p", values[j]); } model.sample(time, time + bucketLength, m_ResourceMonitor); CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); SAnnotatedProbability annotatedProbability; - CPPUNIT_ASSERT(model.computeProbability(0 /*pid*/, time, time + bucketLength, partitioningFields, 1, annotatedProbability)); + CPPUNIT_ASSERT(model.computeProbability(0 /*pid*/, time, time + bucketLength, partitioningFields, + 1, annotatedProbability)); LOG_DEBUG(<< "probability = " << annotatedProbability.s_Probability); probabilities.push_back(annotatedProbability.s_Probability); @@ -1006,13 +1082,15 @@ void CMetricModelTest::testProbabilityCalculationForHighSum() { LOG_DEBUG(<< "values = " << core::CContainerPrinter::print(values)); for (std::size_t j = 0u; j < values.size(); ++j) { - addArrival(*gatherer, m_ResourceMonitor, time + static_cast(j), "p", values[j]); + addArrival(*gatherer, m_ResourceMonitor, + time + static_cast(j), "p", values[j]); } model.sample(time, time + bucketLength, m_ResourceMonitor); CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); SAnnotatedProbability annotatedProbability; - CPPUNIT_ASSERT(model.computeProbability(0 /*pid*/, time, time + bucketLength, partitioningFields, 1, annotatedProbability)); + CPPUNIT_ASSERT(model.computeProbability(0 /*pid*/, time, time + bucketLength, partitioningFields, + 1, annotatedProbability)); LOG_DEBUG(<< "probability = " << annotatedProbability.s_Probability); probabilities.push_back(annotatedProbability.s_Probability); @@ -1073,7 +1151,8 @@ void CMetricModelTest::testInfluence() { maths::CBasicStatistics::SMin::TAccumulator min; maths::CBasicStatistics::SMax::TAccumulator max; for (std::size_t j = 0u; j < samples.size(); ++j) { - addArrival(*gatherer, m_ResourceMonitor, time, "p", samples[j], TOptionalStr(influencerValues[j])); + addArrival(*gatherer, m_ResourceMonitor, time, "p", samples[j], + TOptionalStr(influencerValues[j])); min.add(TDoubleStrPr(samples[j], influencerValues[j])); max.add(TDoubleStrPr(samples[j], influencerValues[j])); } @@ -1082,19 +1161,23 @@ void CMetricModelTest::testInfluence() { CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); SAnnotatedProbability annotatedProbability; - model.computeProbability(0 /*pid*/, time, time + bucketLength, partitioningFields, 1, annotatedProbability); + model.computeProbability(0 /*pid*/, time, time + bucketLength, + partitioningFields, 1, annotatedProbability); - LOG_DEBUG(<< "influences = " << core::CContainerPrinter::print(annotatedProbability.s_Influences)); + LOG_DEBUG(<< "influences = " + << core::CContainerPrinter::print(annotatedProbability.s_Influences)); if (!annotatedProbability.s_Influences.empty()) { std::size_t j = 0u; for (/**/; j < annotatedProbability.s_Influences.size(); ++j) { if (feature == model_t::E_IndividualMinByPerson && - *annotatedProbability.s_Influences[j].first.second == min[0].second && + *annotatedProbability.s_Influences[j].first.second == + min[0].second && std::fabs(annotatedProbability.s_Influences[j].second - 1.0) < 1e-10) { break; } if (feature == model_t::E_IndividualMaxByPerson && - *annotatedProbability.s_Influences[j].first.second == max[0].second && + *annotatedProbability.s_Influences[j].first.second == + max[0].second && std::fabs(annotatedProbability.s_Influences[j].second - 1.0) < 1e-10) { break; } @@ -1104,8 +1187,7 @@ void CMetricModelTest::testInfluence() { } } - auto testFeature = [this](model_t::EFeature feature, - const TDoubleVecVec& values, + auto testFeature = [this](model_t::EFeature feature, const TDoubleVecVec& values, const TStrVecVec& influencers, const TStrDoubleDoubleTrVecVec& influences) { core_t::TTime startTime{0}; @@ -1130,14 +1212,17 @@ void CMetricModelTest::testInfluence() { core_t::TTime time{startTime}; for (std::size_t i = 0u; i < values.size(); ++i) { - processBucket(time, bucketLength, values[i], influencers[i], *gatherer, m_ResourceMonitor, model, annotatedProbability); - CPPUNIT_ASSERT_EQUAL(influences[i].size(), annotatedProbability.s_Influences.size()); + processBucket(time, bucketLength, values[i], influencers[i], *gatherer, + m_ResourceMonitor, model, annotatedProbability); + CPPUNIT_ASSERT_EQUAL(influences[i].size(), + annotatedProbability.s_Influences.size()); if (influences[i].size() > 0) { for (const auto& expected : influences[i]) { bool found{false}; for (const auto& actual : annotatedProbability.s_Influences) { if (expected.first == *actual.first.second) { - CPPUNIT_ASSERT(actual.second >= expected.second && actual.second <= expected.third); + CPPUNIT_ASSERT(actual.second >= expected.second && + actual.second <= expected.third); found = true; break; } @@ -1172,16 +1257,17 @@ void CMetricModelTest::testInfluence() { {"i1", "i2"}, {"i1", "i2", "i3", "i4", "i5", "i6"}, {"i2"}}; - TStrDoubleDoubleTrVecVec influences{{}, - {}, - {}, - {}, - {}, - {}, - {core::make_triple(std::string{"i1"}, 0.9, 1.0)}, - {core::make_triple(std::string{"i1"}, 0.8, 0.9)}, - {}, - {core::make_triple(std::string{"i2"}, 1.0, 1.0)}}; + TStrDoubleDoubleTrVecVec influences{ + {}, + {}, + {}, + {}, + {}, + {}, + {core::make_triple(std::string{"i1"}, 0.9, 1.0)}, + {core::make_triple(std::string{"i1"}, 0.8, 0.9)}, + {}, + {core::make_triple(std::string{"i2"}, 1.0, 1.0)}}; testFeature(model_t::E_IndividualMeanByPerson, values, influencers, influences); } @@ -1212,8 +1298,10 @@ void CMetricModelTest::testInfluence() { {}, {}, {}, - {core::make_triple(std::string{"i1"}, 0.6, 0.7), core::make_triple(std::string{"i2"}, 0.9, 1.0)}, - {core::make_triple(std::string{"i1"}, 0.9, 1.0), core::make_triple(std::string{"i2"}, 0.9, 1.0)}, + {core::make_triple(std::string{"i1"}, 0.6, 0.7), + core::make_triple(std::string{"i2"}, 0.9, 1.0)}, + {core::make_triple(std::string{"i1"}, 0.9, 1.0), + core::make_triple(std::string{"i2"}, 0.9, 1.0)}, {}, {core::make_triple(std::string{"i1"}, 1.0, 1.0)}, {}, @@ -1256,7 +1344,8 @@ void CMetricModelTest::testInfluence() { {}, {}, {}, - {core::make_triple(std::string{"i1"}, 0.9, 1.0), core::make_triple(std::string{"i3"}, 0.9, 1.0)}, + {core::make_triple(std::string{"i1"}, 0.9, 1.0), + core::make_triple(std::string{"i3"}, 0.9, 1.0)}, {core::make_triple(std::string{"i1"}, 0.9, 1.0)}, {core::make_triple(std::string{"i5"}, 0.9, 1.0)}, {}}; @@ -1283,14 +1372,10 @@ void CMetricModelTest::testPrune() { const core_t::TTime startTime = 1346968800; const core_t::TTime bucketLength = 3600; - const std::string people[] = {std::string("p1"), - std::string("p2"), - std::string("p3"), - std::string("p4"), - std::string("p5"), - std::string("p6"), - std::string("p7"), - std::string("p8")}; + const std::string people[] = {std::string("p1"), std::string("p2"), + std::string("p3"), std::string("p4"), + std::string("p5"), std::string("p6"), + std::string("p7"), std::string("p8")}; TSizeVecVec eventCounts; eventCounts.push_back(TSizeVec(1000u, 0)); @@ -1339,7 +1424,8 @@ void CMetricModelTest::testPrune() { CModelFactory::TDataGathererPtr expectedGatherer(factory.makeDataGatherer(gathererInitData)); CModelFactory::SModelInitializationData expectedModelInitData(expectedGatherer); CAnomalyDetectorModel::TModelPtr expectedModelHolder(factory.makeModel(expectedModelInitData)); - CMetricModel* expectedModel = dynamic_cast(expectedModelHolder.get()); + CMetricModel* expectedModel = + dynamic_cast(expectedModelHolder.get()); CPPUNIT_ASSERT(expectedModel); test::CRandomNumbers rng; @@ -1353,9 +1439,11 @@ void CMetricModelTest::testPrune() { TDoubleVec samples; rng.generateUniformSamples(0.0, 5.0, static_cast(n), samples); - for (core_t::TTime k = 0, time = bucketStart, dt = bucketLength / n; k < n; ++k, time += dt) { + for (core_t::TTime k = 0, time = bucketStart, dt = bucketLength / n; + k < n; ++k, time += dt) { std::size_t pid = addPerson(people[i], gatherer, m_ResourceMonitor); - events.push_back(makeEventData(time, pid, samples[static_cast(k)])); + events.push_back( + makeEventData(time, pid, samples[static_cast(k)])); } } } @@ -1366,12 +1454,16 @@ void CMetricModelTest::testPrune() { expectedEvents.reserve(events.size()); TSizeSizeMap mapping; for (std::size_t i = 0u; i < boost::size(expectedPeople); ++i) { - std::size_t pid = addPerson(people[expectedPeople[i]], expectedGatherer, m_ResourceMonitor); + std::size_t pid = addPerson(people[expectedPeople[i]], expectedGatherer, + m_ResourceMonitor); mapping[expectedPeople[i]] = pid; } for (std::size_t i = 0u; i < events.size(); ++i) { - if (std::binary_search(boost::begin(expectedPeople), boost::end(expectedPeople), events[i].personId())) { - expectedEvents.push_back(makeEventData(events[i].time(), mapping[*events[i].personId()], events[i].values()[0][0])); + if (std::binary_search(boost::begin(expectedPeople), + boost::end(expectedPeople), events[i].personId())) { + expectedEvents.push_back(makeEventData(events[i].time(), + mapping[*events[i].personId()], + events[i].values()[0][0])); } } @@ -1381,8 +1473,9 @@ void CMetricModelTest::testPrune() { model->sample(bucketStart, bucketStart + bucketLength, m_ResourceMonitor); bucketStart += bucketLength; } - addArrival( - *gatherer, m_ResourceMonitor, events[i].time(), gatherer->personName(events[i].personId().get()), events[i].values()[0][0]); + addArrival(*gatherer, m_ResourceMonitor, events[i].time(), + gatherer->personName(events[i].personId().get()), + events[i].values()[0][0]); } model->sample(bucketStart, bucketStart + bucketLength, m_ResourceMonitor); size_t maxDimensionBeforePrune(model->dataGatherer().maxDimension()); @@ -1397,9 +1490,7 @@ void CMetricModelTest::testPrune() { bucketStart += bucketLength; } - addArrival(*expectedGatherer, - m_ResourceMonitor, - expectedEvents[i].time(), + addArrival(*expectedGatherer, m_ResourceMonitor, expectedEvents[i].time(), expectedGatherer->personName(expectedEvents[i].personId().get()), expectedEvents[i].values()[0][0]); } @@ -1419,10 +1510,14 @@ void CMetricModelTest::testPrune() { std::size_t expectedNewPid = addPerson(newPersons[i], expectedGatherer, m_ResourceMonitor); - addArrival(*gatherer, m_ResourceMonitor, bucketStart + 1, gatherer->personName(newPid), 10.0); - addArrival(*gatherer, m_ResourceMonitor, bucketStart + 2000, gatherer->personName(newPid), 15.0); - addArrival(*expectedGatherer, m_ResourceMonitor, bucketStart + 1, expectedGatherer->personName(expectedNewPid), 10.0); - addArrival(*expectedGatherer, m_ResourceMonitor, bucketStart + 2000, expectedGatherer->personName(expectedNewPid), 15.0); + addArrival(*gatherer, m_ResourceMonitor, bucketStart + 1, + gatherer->personName(newPid), 10.0); + addArrival(*gatherer, m_ResourceMonitor, bucketStart + 2000, + gatherer->personName(newPid), 15.0); + addArrival(*expectedGatherer, m_ResourceMonitor, bucketStart + 1, + expectedGatherer->personName(expectedNewPid), 10.0); + addArrival(*expectedGatherer, m_ResourceMonitor, bucketStart + 2000, + expectedGatherer->personName(expectedNewPid), 15.0); } model->sample(bucketStart, bucketStart + bucketLength, m_ResourceMonitor); expectedModel->sample(bucketStart, bucketStart + bucketLength, m_ResourceMonitor); @@ -1433,18 +1528,19 @@ void CMetricModelTest::testPrune() { // Test that calling prune on a cloned model which has seen no new data does nothing CAnomalyDetectorModel::TModelPtr clonedModelHolder(model->cloneForPersistence()); - std::size_t numberOfPeopleBeforePrune(clonedModelHolder->dataGatherer().numberActivePeople()); + std::size_t numberOfPeopleBeforePrune( + clonedModelHolder->dataGatherer().numberActivePeople()); CPPUNIT_ASSERT(numberOfPeopleBeforePrune > 0); clonedModelHolder->prune(clonedModelHolder->defaultPruneWindow()); - CPPUNIT_ASSERT_EQUAL(numberOfPeopleBeforePrune, clonedModelHolder->dataGatherer().numberActivePeople()); + CPPUNIT_ASSERT_EQUAL(numberOfPeopleBeforePrune, + clonedModelHolder->dataGatherer().numberActivePeople()); } void CMetricModelTest::testKey() { - function_t::EFunction countFunctions[] = {function_t::E_IndividualMetric, - function_t::E_IndividualMetricMean, - function_t::E_IndividualMetricMin, - function_t::E_IndividualMetricMax, - function_t::E_IndividualMetricSum}; + function_t::EFunction countFunctions[] = { + function_t::E_IndividualMetric, function_t::E_IndividualMetricMean, + function_t::E_IndividualMetricMin, function_t::E_IndividualMetricMax, + function_t::E_IndividualMetricSum}; bool useNull[] = {true, false}; std::string byField[] = {"", "by"}; std::string partitionField[] = {"", "partition"}; @@ -1456,10 +1552,12 @@ void CMetricModelTest::testKey() { for (std::size_t j = 0u; j < boost::size(useNull); ++j) { for (std::size_t k = 0u; k < boost::size(byField); ++k) { for (std::size_t l = 0u; l < boost::size(partitionField); ++l) { - CSearchKey key( - ++identifier, countFunctions[i], useNull[j], model_t::E_XF_None, "value", byField[k], "", partitionField[l]); + CSearchKey key(++identifier, countFunctions[i], useNull[j], + model_t::E_XF_None, "value", byField[k], "", + partitionField[l]); - CAnomalyDetectorModelConfig::TModelFactoryCPtr factory = config.factory(key); + CAnomalyDetectorModelConfig::TModelFactoryCPtr factory = + config.factory(key); LOG_DEBUG(<< "expected key = " << key); LOG_DEBUG(<< "actual key = " << factory->searchKey()); @@ -1501,17 +1599,21 @@ void CMetricModelTest::testSkipSampling() { SAnnotatedProbability annotatedProbability; core_t::TTime time = startTime; - processBucket(time, bucketLength, bucket1, influencerValues1, *gathererNoGap, m_ResourceMonitor, modelNoGap, annotatedProbability); + processBucket(time, bucketLength, bucket1, influencerValues1, *gathererNoGap, + m_ResourceMonitor, modelNoGap, annotatedProbability); time += bucketLength; - processBucket(time, bucketLength, bucket2, influencerValues1, *gathererNoGap, m_ResourceMonitor, modelNoGap, annotatedProbability); + processBucket(time, bucketLength, bucket2, influencerValues1, *gathererNoGap, + m_ResourceMonitor, modelNoGap, annotatedProbability); time += bucketLength; - processBucket(time, bucketLength, bucket3, influencerValues1, *gathererNoGap, m_ResourceMonitor, modelNoGap, annotatedProbability); + processBucket(time, bucketLength, bucket3, influencerValues1, *gathererNoGap, + m_ResourceMonitor, modelNoGap, annotatedProbability); } CModelFactory::SGathererInitializationData gathererWithGapInitData(startTime); - CModelFactory::TDataGathererPtr gathererWithGap(factory.makeDataGatherer(gathererWithGapInitData)); + CModelFactory::TDataGathererPtr gathererWithGap( + factory.makeDataGatherer(gathererWithGapInitData)); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p", gathererWithGap, m_ResourceMonitor)); CModelFactory::SModelInitializationData initDataWithGap(gathererWithGap); CAnomalyDetectorModel::TModelPtr modelWithGapPtr(factory.makeModel(initDataWithGap)); @@ -1529,27 +1631,29 @@ void CMetricModelTest::testSkipSampling() { SAnnotatedProbability annotatedProbability; core_t::TTime time = startTime; - processBucket( - time, bucketLength, bucket1, influencerValues1, *gathererWithGap, m_ResourceMonitor, modelWithGap, annotatedProbability); + processBucket(time, bucketLength, bucket1, influencerValues1, *gathererWithGap, + m_ResourceMonitor, modelWithGap, annotatedProbability); time += gap; modelWithGap.skipSampling(time); LOG_DEBUG(<< "Calling sample over skipped interval should do nothing except print some ERRORs"); modelWithGap.sample(startTime + bucketLength, time, m_ResourceMonitor); - processBucket( - time, bucketLength, bucket2, influencerValues1, *gathererWithGap, m_ResourceMonitor, modelWithGap, annotatedProbability); + processBucket(time, bucketLength, bucket2, influencerValues1, *gathererWithGap, + m_ResourceMonitor, modelWithGap, annotatedProbability); time += bucketLength; - processBucket( - time, bucketLength, bucket3, influencerValues1, *gathererWithGap, m_ResourceMonitor, modelWithGap, annotatedProbability); + processBucket(time, bucketLength, bucket3, influencerValues1, *gathererWithGap, + m_ResourceMonitor, modelWithGap, annotatedProbability); } CPPUNIT_ASSERT_EQUAL( - static_cast(modelNoGap.details()->model(model_t::E_IndividualSumByBucketAndPerson, 0)) + static_cast( + modelNoGap.details()->model(model_t::E_IndividualSumByBucketAndPerson, 0)) ->residualModel() .checksum(), - static_cast(modelWithGap.details()->model(model_t::E_IndividualSumByBucketAndPerson, 0)) + static_cast( + modelWithGap.details()->model(model_t::E_IndividualSumByBucketAndPerson, 0)) ->residualModel() .checksum()); } @@ -1569,7 +1673,8 @@ void CMetricModelTest::testExplicitNulls() { factory.fieldNames("", "", "P", "V", TStrVec(1, "I")); CModelFactory::SGathererInitializationData gathererSkipGapInitData(startTime); - CModelFactory::TDataGathererPtr gathererSkipGap(factory.makeDataGatherer(gathererSkipGapInitData)); + CModelFactory::TDataGathererPtr gathererSkipGap( + factory.makeDataGatherer(gathererSkipGapInitData)); CModelFactory::SModelInitializationData initDataSkipGap(gathererSkipGap); CAnomalyDetectorModel::TModelPtr modelSkipGapPtr(factory.makeModel(initDataSkipGap)); CPPUNIT_ASSERT(modelSkipGapPtr); @@ -1581,19 +1686,25 @@ void CMetricModelTest::testExplicitNulls() { // p1: |(1, 42.0)|(1, 1.0)|(1, 1.0)|X|X|(1, 42.0)| // p2: |(1, 42.)|(0, 0.0)|(0, 0.0)|X|X|(0, 0.0)| - addArrival(*gathererSkipGap, m_ResourceMonitor, 100, "p1", 42.0, TOptionalStr("i1"), TOptionalStr(), TOptionalStr("1")); - addArrival(*gathererSkipGap, m_ResourceMonitor, 100, "p2", 42.0, TOptionalStr("i2"), TOptionalStr(), TOptionalStr("1")); + addArrival(*gathererSkipGap, m_ResourceMonitor, 100, "p1", 42.0, + TOptionalStr("i1"), TOptionalStr(), TOptionalStr("1")); + addArrival(*gathererSkipGap, m_ResourceMonitor, 100, "p2", 42.0, + TOptionalStr("i2"), TOptionalStr(), TOptionalStr("1")); modelSkipGap.sample(100, 200, m_ResourceMonitor); - addArrival(*gathererSkipGap, m_ResourceMonitor, 200, "p1", 1.0, TOptionalStr("i1"), TOptionalStr(), TOptionalStr("1")); + addArrival(*gathererSkipGap, m_ResourceMonitor, 200, "p1", 1.0, + TOptionalStr("i1"), TOptionalStr(), TOptionalStr("1")); modelSkipGap.sample(200, 300, m_ResourceMonitor); - addArrival(*gathererSkipGap, m_ResourceMonitor, 300, "p1", 1.0, TOptionalStr("i1"), TOptionalStr(), TOptionalStr("1")); + addArrival(*gathererSkipGap, m_ResourceMonitor, 300, "p1", 1.0, + TOptionalStr("i1"), TOptionalStr(), TOptionalStr("1")); modelSkipGap.sample(300, 400, m_ResourceMonitor); modelSkipGap.skipSampling(600); - addArrival(*gathererSkipGap, m_ResourceMonitor, 600, "p1", 42.0, TOptionalStr("i1"), TOptionalStr(), TOptionalStr("1")); + addArrival(*gathererSkipGap, m_ResourceMonitor, 600, "p1", 42.0, + TOptionalStr("i1"), TOptionalStr(), TOptionalStr("1")); modelSkipGap.sample(600, 700, m_ResourceMonitor); CModelFactory::SGathererInitializationData gathererExNullInitData(startTime); - CModelFactory::TDataGathererPtr gathererExNull(factory.makeDataGatherer(gathererExNullInitData)); + CModelFactory::TDataGathererPtr gathererExNull( + factory.makeDataGatherer(gathererExNullInitData)); CModelFactory::SModelInitializationData initDataExNull(gathererExNull); CAnomalyDetectorModel::TModelPtr modelExNullPtr(factory.makeModel(initDataExNull)); CPPUNIT_ASSERT(modelExNullPtr); @@ -1602,30 +1713,44 @@ void CMetricModelTest::testExplicitNulls() { // p1: |(1, 42.0), ("", 42.0), (null, 42.0)|(1, 1.0)|(1, 1.0)|(null, 100.0)|(null, 100.0)|(1, 42.0)| // p2: |(1, 42.0), ("", 42.0)|(0, 0.0)|(0, 0.0)|(null, 100.0)|(null, 100.0)|(0, 0.0)| - addArrival(*gathererExNull, m_ResourceMonitor, 100, "p1", 42.0, TOptionalStr("i1"), TOptionalStr(), TOptionalStr("1")); - addArrival(*gathererExNull, m_ResourceMonitor, 100, "p1", 42.0, TOptionalStr("i1"), TOptionalStr(), TOptionalStr("")); - addArrival(*gathererExNull, m_ResourceMonitor, 100, "p1", 42.0, TOptionalStr("i1"), TOptionalStr(), TOptionalStr("null")); - addArrival(*gathererExNull, m_ResourceMonitor, 100, "p2", 42.0, TOptionalStr("i2"), TOptionalStr(), TOptionalStr("1")); - addArrival(*gathererExNull, m_ResourceMonitor, 100, "p2", 42.0, TOptionalStr("i2"), TOptionalStr(), TOptionalStr("")); + addArrival(*gathererExNull, m_ResourceMonitor, 100, "p1", 42.0, + TOptionalStr("i1"), TOptionalStr(), TOptionalStr("1")); + addArrival(*gathererExNull, m_ResourceMonitor, 100, "p1", 42.0, + TOptionalStr("i1"), TOptionalStr(), TOptionalStr("")); + addArrival(*gathererExNull, m_ResourceMonitor, 100, "p1", 42.0, + TOptionalStr("i1"), TOptionalStr(), TOptionalStr("null")); + addArrival(*gathererExNull, m_ResourceMonitor, 100, "p2", 42.0, + TOptionalStr("i2"), TOptionalStr(), TOptionalStr("1")); + addArrival(*gathererExNull, m_ResourceMonitor, 100, "p2", 42.0, + TOptionalStr("i2"), TOptionalStr(), TOptionalStr("")); modelExNullGap.sample(100, 200, m_ResourceMonitor); - addArrival(*gathererExNull, m_ResourceMonitor, 200, "p1", 1.0, TOptionalStr("i1"), TOptionalStr(), TOptionalStr("1")); + addArrival(*gathererExNull, m_ResourceMonitor, 200, "p1", 1.0, + TOptionalStr("i1"), TOptionalStr(), TOptionalStr("1")); modelExNullGap.sample(200, 300, m_ResourceMonitor); - addArrival(*gathererExNull, m_ResourceMonitor, 300, "p1", 1.0, TOptionalStr("i1"), TOptionalStr(), TOptionalStr("1")); + addArrival(*gathererExNull, m_ResourceMonitor, 300, "p1", 1.0, + TOptionalStr("i1"), TOptionalStr(), TOptionalStr("1")); modelExNullGap.sample(300, 400, m_ResourceMonitor); - addArrival(*gathererExNull, m_ResourceMonitor, 400, "p1", 100.0, TOptionalStr("i1"), TOptionalStr(), TOptionalStr("null")); - addArrival(*gathererExNull, m_ResourceMonitor, 400, "p2", 100.0, TOptionalStr("i2"), TOptionalStr(), TOptionalStr("null")); + addArrival(*gathererExNull, m_ResourceMonitor, 400, "p1", 100.0, + TOptionalStr("i1"), TOptionalStr(), TOptionalStr("null")); + addArrival(*gathererExNull, m_ResourceMonitor, 400, "p2", 100.0, + TOptionalStr("i2"), TOptionalStr(), TOptionalStr("null")); modelExNullGap.sample(400, 500, m_ResourceMonitor); - addArrival(*gathererExNull, m_ResourceMonitor, 500, "p1", 100.0, TOptionalStr("i1"), TOptionalStr(), TOptionalStr("null")); - addArrival(*gathererExNull, m_ResourceMonitor, 500, "p2", 100.0, TOptionalStr("i2"), TOptionalStr(), TOptionalStr("null")); + addArrival(*gathererExNull, m_ResourceMonitor, 500, "p1", 100.0, + TOptionalStr("i1"), TOptionalStr(), TOptionalStr("null")); + addArrival(*gathererExNull, m_ResourceMonitor, 500, "p2", 100.0, + TOptionalStr("i2"), TOptionalStr(), TOptionalStr("null")); modelExNullGap.sample(500, 600, m_ResourceMonitor); - addArrival(*gathererExNull, m_ResourceMonitor, 600, "p1", 42.0, TOptionalStr("i1"), TOptionalStr(), TOptionalStr("1")); + addArrival(*gathererExNull, m_ResourceMonitor, 600, "p1", 42.0, + TOptionalStr("i1"), TOptionalStr(), TOptionalStr("1")); modelExNullGap.sample(600, 700, m_ResourceMonitor); CPPUNIT_ASSERT_EQUAL( - static_cast(modelSkipGap.details()->model(model_t::E_IndividualSumByBucketAndPerson, 0)) + static_cast( + modelSkipGap.details()->model(model_t::E_IndividualSumByBucketAndPerson, 0)) ->residualModel() .checksum(), - static_cast(modelExNullGap.details()->model(model_t::E_IndividualSumByBucketAndPerson, 0)) + static_cast( + modelExNullGap.details()->model(model_t::E_IndividualSumByBucketAndPerson, 0)) ->residualModel() .checksum()); } @@ -1670,68 +1795,90 @@ void CMetricModelTest::testVarp() { SAnnotatedProbability annotatedProbability2; core_t::TTime time = startTime; - processBucket(time, bucketLength, bucket1, *gatherer, m_ResourceMonitor, model, annotatedProbability, annotatedProbability2); - LOG_DEBUG(<< "P1 " << annotatedProbability.s_Probability << ", P2 " << annotatedProbability2.s_Probability); + processBucket(time, bucketLength, bucket1, *gatherer, m_ResourceMonitor, + model, annotatedProbability, annotatedProbability2); + LOG_DEBUG(<< "P1 " << annotatedProbability.s_Probability << ", P2 " + << annotatedProbability2.s_Probability); CPPUNIT_ASSERT(annotatedProbability.s_Probability > 0.8); CPPUNIT_ASSERT(annotatedProbability2.s_Probability > 0.8); time += bucketLength; - processBucket(time, bucketLength, bucket2, *gatherer, m_ResourceMonitor, model, annotatedProbability, annotatedProbability2); - LOG_DEBUG(<< "P1 " << annotatedProbability.s_Probability << ", P2 " << annotatedProbability2.s_Probability); + processBucket(time, bucketLength, bucket2, *gatherer, m_ResourceMonitor, + model, annotatedProbability, annotatedProbability2); + LOG_DEBUG(<< "P1 " << annotatedProbability.s_Probability << ", P2 " + << annotatedProbability2.s_Probability); CPPUNIT_ASSERT(annotatedProbability.s_Probability > 0.8); CPPUNIT_ASSERT(annotatedProbability2.s_Probability > 0.8); time += bucketLength; - processBucket(time, bucketLength, bucket3, *gatherer, m_ResourceMonitor, model, annotatedProbability, annotatedProbability2); - LOG_DEBUG(<< "P1 " << annotatedProbability.s_Probability << ", P2 " << annotatedProbability2.s_Probability); + processBucket(time, bucketLength, bucket3, *gatherer, m_ResourceMonitor, + model, annotatedProbability, annotatedProbability2); + LOG_DEBUG(<< "P1 " << annotatedProbability.s_Probability << ", P2 " + << annotatedProbability2.s_Probability); CPPUNIT_ASSERT(annotatedProbability.s_Probability > 0.8); CPPUNIT_ASSERT(annotatedProbability2.s_Probability > 0.8); time += bucketLength; - processBucket(time, bucketLength, bucket4, *gatherer, m_ResourceMonitor, model, annotatedProbability, annotatedProbability2); - LOG_DEBUG(<< "P1 " << annotatedProbability.s_Probability << ", P2 " << annotatedProbability2.s_Probability); + processBucket(time, bucketLength, bucket4, *gatherer, m_ResourceMonitor, + model, annotatedProbability, annotatedProbability2); + LOG_DEBUG(<< "P1 " << annotatedProbability.s_Probability << ", P2 " + << annotatedProbability2.s_Probability); CPPUNIT_ASSERT(annotatedProbability.s_Probability > 0.8); CPPUNIT_ASSERT(annotatedProbability2.s_Probability > 0.8); time += bucketLength; - processBucket(time, bucketLength, bucket5, *gatherer, m_ResourceMonitor, model, annotatedProbability, annotatedProbability2); - LOG_DEBUG(<< "P1 " << annotatedProbability.s_Probability << ", P2 " << annotatedProbability2.s_Probability); + processBucket(time, bucketLength, bucket5, *gatherer, m_ResourceMonitor, + model, annotatedProbability, annotatedProbability2); + LOG_DEBUG(<< "P1 " << annotatedProbability.s_Probability << ", P2 " + << annotatedProbability2.s_Probability); CPPUNIT_ASSERT(annotatedProbability.s_Probability > 0.8); CPPUNIT_ASSERT(annotatedProbability2.s_Probability > 0.8); time += bucketLength; - processBucket(time, bucketLength, bucket6, *gatherer, m_ResourceMonitor, model, annotatedProbability, annotatedProbability2); - LOG_DEBUG(<< "P1 " << annotatedProbability.s_Probability << ", P2 " << annotatedProbability2.s_Probability); + processBucket(time, bucketLength, bucket6, *gatherer, m_ResourceMonitor, + model, annotatedProbability, annotatedProbability2); + LOG_DEBUG(<< "P1 " << annotatedProbability.s_Probability << ", P2 " + << annotatedProbability2.s_Probability); CPPUNIT_ASSERT(annotatedProbability.s_Probability > 0.8); CPPUNIT_ASSERT(annotatedProbability2.s_Probability > 0.8); time += bucketLength; - processBucket(time, bucketLength, bucket7, *gatherer, m_ResourceMonitor, model, annotatedProbability, annotatedProbability2); - LOG_DEBUG(<< "P1 " << annotatedProbability.s_Probability << ", P2 " << annotatedProbability2.s_Probability); + processBucket(time, bucketLength, bucket7, *gatherer, m_ResourceMonitor, + model, annotatedProbability, annotatedProbability2); + LOG_DEBUG(<< "P1 " << annotatedProbability.s_Probability << ", P2 " + << annotatedProbability2.s_Probability); CPPUNIT_ASSERT(annotatedProbability.s_Probability > 0.8); CPPUNIT_ASSERT(annotatedProbability2.s_Probability > 0.8); time += bucketLength; - processBucket(time, bucketLength, bucket8, *gatherer, m_ResourceMonitor, model, annotatedProbability, annotatedProbability2); - LOG_DEBUG(<< "P1 " << annotatedProbability.s_Probability << ", P2 " << annotatedProbability2.s_Probability); + processBucket(time, bucketLength, bucket8, *gatherer, m_ResourceMonitor, + model, annotatedProbability, annotatedProbability2); + LOG_DEBUG(<< "P1 " << annotatedProbability.s_Probability << ", P2 " + << annotatedProbability2.s_Probability); CPPUNIT_ASSERT(annotatedProbability.s_Probability > 0.5); CPPUNIT_ASSERT(annotatedProbability2.s_Probability > 0.5); time += bucketLength; - processBucket(time, bucketLength, bucket9, *gatherer, m_ResourceMonitor, model, annotatedProbability, annotatedProbability2); - LOG_DEBUG(<< "P1 " << annotatedProbability.s_Probability << ", P2 " << annotatedProbability2.s_Probability); + processBucket(time, bucketLength, bucket9, *gatherer, m_ResourceMonitor, + model, annotatedProbability, annotatedProbability2); + LOG_DEBUG(<< "P1 " << annotatedProbability.s_Probability << ", P2 " + << annotatedProbability2.s_Probability); CPPUNIT_ASSERT(annotatedProbability.s_Probability > 0.5); CPPUNIT_ASSERT(annotatedProbability2.s_Probability > 0.5); time += bucketLength; - processBucket(time, bucketLength, bucket10, *gatherer, m_ResourceMonitor, model, annotatedProbability, annotatedProbability2); - LOG_DEBUG(<< "P1 " << annotatedProbability.s_Probability << ", P2 " << annotatedProbability2.s_Probability); + processBucket(time, bucketLength, bucket10, *gatherer, m_ResourceMonitor, + model, annotatedProbability, annotatedProbability2); + LOG_DEBUG(<< "P1 " << annotatedProbability.s_Probability << ", P2 " + << annotatedProbability2.s_Probability); CPPUNIT_ASSERT(annotatedProbability.s_Probability > 0.5); CPPUNIT_ASSERT(annotatedProbability2.s_Probability > 0.5); time += bucketLength; - processBucket(time, bucketLength, bucket11, *gatherer, m_ResourceMonitor, model, annotatedProbability, annotatedProbability2); - LOG_DEBUG(<< "P1 " << annotatedProbability.s_Probability << ", P2 " << annotatedProbability2.s_Probability); + processBucket(time, bucketLength, bucket11, *gatherer, m_ResourceMonitor, + model, annotatedProbability, annotatedProbability2); + LOG_DEBUG(<< "P1 " << annotatedProbability.s_Probability << ", P2 " + << annotatedProbability2.s_Probability); CPPUNIT_ASSERT(annotatedProbability.s_Probability > 0.5); CPPUNIT_ASSERT(annotatedProbability2.s_Probability > 0.5); } @@ -1791,20 +1938,27 @@ void CMetricModelTest::testInterimCorrections() { model.sampleBucketStatistics(now, now + bucketLength, m_ResourceMonitor); CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); - model_t::CResultType type(model_t::CResultType::E_Unconditional | model_t::CResultType::E_Interim); + model_t::CResultType type(model_t::CResultType::E_Unconditional | + model_t::CResultType::E_Interim); SAnnotatedProbability annotatedProbability1; annotatedProbability1.s_ResultType = type; - CPPUNIT_ASSERT(model.computeProbability(pid1, now, now + bucketLength, partitioningFields, 1, annotatedProbability1)); + CPPUNIT_ASSERT(model.computeProbability(pid1, now, now + bucketLength, partitioningFields, + 1, annotatedProbability1)); SAnnotatedProbability annotatedProbability2; annotatedProbability2.s_ResultType = type; - CPPUNIT_ASSERT(model.computeProbability(pid2, now, now + bucketLength, partitioningFields, 1, annotatedProbability2)); + CPPUNIT_ASSERT(model.computeProbability(pid2, now, now + bucketLength, partitioningFields, + 1, annotatedProbability2)); SAnnotatedProbability annotatedProbability3; annotatedProbability3.s_ResultType = type; - CPPUNIT_ASSERT(model.computeProbability(pid3, now, now + bucketLength, partitioningFields, 1, annotatedProbability3)); + CPPUNIT_ASSERT(model.computeProbability(pid3, now, now + bucketLength, partitioningFields, + 1, annotatedProbability3)); - TDouble1Vec p1Baseline = model.baselineBucketMean(model_t::E_IndividualSumByBucketAndPerson, pid1, 0, type, NO_CORRELATES, now); - TDouble1Vec p2Baseline = model.baselineBucketMean(model_t::E_IndividualSumByBucketAndPerson, pid2, 0, type, NO_CORRELATES, now); - TDouble1Vec p3Baseline = model.baselineBucketMean(model_t::E_IndividualSumByBucketAndPerson, pid3, 0, type, NO_CORRELATES, now); + TDouble1Vec p1Baseline = model.baselineBucketMean( + model_t::E_IndividualSumByBucketAndPerson, pid1, 0, type, NO_CORRELATES, now); + TDouble1Vec p2Baseline = model.baselineBucketMean( + model_t::E_IndividualSumByBucketAndPerson, pid2, 0, type, NO_CORRELATES, now); + TDouble1Vec p3Baseline = model.baselineBucketMean( + model_t::E_IndividualSumByBucketAndPerson, pid3, 0, type, NO_CORRELATES, now); LOG_DEBUG(<< "p1 probability = " << annotatedProbability1.s_Probability); LOG_DEBUG(<< "p2 probability = " << annotatedProbability2.s_Probability); @@ -1877,23 +2031,30 @@ void CMetricModelTest::testInterimCorrectionsWithCorrelations() { model.sampleBucketStatistics(now, now + bucketLength, m_ResourceMonitor); CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); - model_t::CResultType type(model_t::CResultType::E_Conditional | model_t::CResultType::E_Interim); + model_t::CResultType type(model_t::CResultType::E_Conditional | + model_t::CResultType::E_Interim); SAnnotatedProbability annotatedProbability1; annotatedProbability1.s_ResultType = type; - CPPUNIT_ASSERT(model.computeProbability(pid1, now, now + bucketLength, partitioningFields, 1, annotatedProbability1)); + CPPUNIT_ASSERT(model.computeProbability(pid1, now, now + bucketLength, partitioningFields, + 1, annotatedProbability1)); SAnnotatedProbability annotatedProbability2; annotatedProbability2.s_ResultType = type; - CPPUNIT_ASSERT(model.computeProbability(pid2, now, now + bucketLength, partitioningFields, 1, annotatedProbability2)); + CPPUNIT_ASSERT(model.computeProbability(pid2, now, now + bucketLength, partitioningFields, + 1, annotatedProbability2)); SAnnotatedProbability annotatedProbability3; annotatedProbability3.s_ResultType = type; - CPPUNIT_ASSERT(model.computeProbability(pid3, now, now + bucketLength, partitioningFields, 1, annotatedProbability3)); + CPPUNIT_ASSERT(model.computeProbability(pid3, now, now + bucketLength, partitioningFields, + 1, annotatedProbability3)); TDouble1Vec p1Baseline = model.baselineBucketMean( - model_t::E_IndividualSumByBucketAndPerson, pid1, 0, type, annotatedProbability1.s_AttributeProbabilities[0].s_Correlated, now); + model_t::E_IndividualSumByBucketAndPerson, pid1, 0, type, + annotatedProbability1.s_AttributeProbabilities[0].s_Correlated, now); TDouble1Vec p2Baseline = model.baselineBucketMean( - model_t::E_IndividualSumByBucketAndPerson, pid2, 0, type, annotatedProbability2.s_AttributeProbabilities[0].s_Correlated, now); + model_t::E_IndividualSumByBucketAndPerson, pid2, 0, type, + annotatedProbability2.s_AttributeProbabilities[0].s_Correlated, now); TDouble1Vec p3Baseline = model.baselineBucketMean( - model_t::E_IndividualSumByBucketAndPerson, pid3, 0, type, annotatedProbability3.s_AttributeProbabilities[0].s_Correlated, now); + model_t::E_IndividualSumByBucketAndPerson, pid3, 0, type, + annotatedProbability3.s_AttributeProbabilities[0].s_Correlated, now); LOG_DEBUG(<< "p1 probability = " << annotatedProbability1.s_Probability); LOG_DEBUG(<< "p2 probability = " << annotatedProbability2.s_Probability); @@ -1926,7 +2087,9 @@ void CMetricModelTest::testCorrelatePersist() { test::CRandomNumbers rng; TDoubleVecVec samples; - rng.generateMultivariateNormalSamples(mean.toVector(), covariance.toVectors(), 10000, samples); + rng.generateMultivariateNormalSamples(mean.toVector(), + covariance.toVectors(), + 10000, samples); SModelParams params(bucketLength); params.s_DecayRate = 0.001; @@ -1965,7 +2128,8 @@ void CMetricModelTest::testCorrelatePersist() { core::CRapidXmlStateRestoreTraverser traverser(parser); CModelFactory::SModelInitializationData initData(gatherer); - CAnomalyDetectorModel::TModelPtr restoredModel(factory.makeModel(initData, traverser)); + CAnomalyDetectorModel::TModelPtr restoredModel( + factory.makeModel(initData, traverser)); // The XML representation of the new filter should be the same as the original std::string newXml; @@ -2001,7 +2165,8 @@ void CMetricModelTest::testSummaryCountZeroRecordsAreIgnored() { factory.fieldNames("", "", "P", "V", TStrVec(1, "I")); CModelFactory::SGathererInitializationData gathererWithZerosInitData(startTime); - CModelFactory::TDataGathererPtr gathererWithZeros(factory.makeDataGatherer(gathererWithZerosInitData)); + CModelFactory::TDataGathererPtr gathererWithZeros( + factory.makeDataGatherer(gathererWithZerosInitData)); CModelFactory::SModelInitializationData initDataWithZeros(gathererWithZeros); CAnomalyDetectorModel::TModelPtr modelWithZerosPtr(factory.makeModel(initDataWithZeros)); CPPUNIT_ASSERT(modelWithZerosPtr); @@ -2009,7 +2174,8 @@ void CMetricModelTest::testSummaryCountZeroRecordsAreIgnored() { CMetricModel& modelWithZeros = static_cast(*modelWithZerosPtr.get()); CModelFactory::SGathererInitializationData gathererNoZerosInitData(startTime); - CModelFactory::TDataGathererPtr gathererNoZeros(factory.makeDataGatherer(gathererNoZerosInitData)); + CModelFactory::TDataGathererPtr gathererNoZeros( + factory.makeDataGatherer(gathererNoZerosInitData)); CModelFactory::SModelInitializationData initDataNoZeros(gathererNoZeros); CAnomalyDetectorModel::TModelPtr modelNoZerosPtr(factory.makeModel(initDataNoZeros)); CPPUNIT_ASSERT(modelNoZerosPtr); @@ -2033,30 +2199,15 @@ void CMetricModelTest::testSummaryCountZeroRecordsAreIgnored() { double value = values[0]; rng.generateUniformSamples(0.0, 1.0, 1, values); if (values[0] < 0.05) { - addArrival(*gathererWithZeros, - m_ResourceMonitor, - now, - "p1", - value, - TOptionalStr("i1"), - TOptionalStr(), + addArrival(*gathererWithZeros, m_ResourceMonitor, now, "p1", + value, TOptionalStr("i1"), TOptionalStr(), TOptionalStr(summaryCountZero)); } else { - addArrival(*gathererWithZeros, - m_ResourceMonitor, - now, - "p1", - value, - TOptionalStr("i1"), - TOptionalStr(), + addArrival(*gathererWithZeros, m_ResourceMonitor, now, "p1", + value, TOptionalStr("i1"), TOptionalStr(), TOptionalStr(summaryCountOne)); - addArrival(*gathererNoZeros, - m_ResourceMonitor, - now, - "p1", - value, - TOptionalStr("i1"), - TOptionalStr(), + addArrival(*gathererNoZeros, m_ResourceMonitor, now, "p1", + value, TOptionalStr("i1"), TOptionalStr(), TOptionalStr(summaryCountOne)); } } @@ -2101,11 +2252,13 @@ void CMetricModelTest::testDecayRateControl() { CMetricModelFactory referenceFactory(params); CModelFactory::TDataGathererPtr referenceGatherer; CAnomalyDetectorModel::TModelPtr referenceModel; - makeModel(referenceFactory, features, startTime, bucketLength, referenceGatherer, referenceModel); + makeModel(referenceFactory, features, startTime, bucketLength, + referenceGatherer, referenceModel); TMeanAccumulator meanPredictionError; TMeanAccumulator meanReferencePredictionError; - model_t::CResultType type(model_t::CResultType::E_Unconditional | model_t::CResultType::E_Interim); + model_t::CResultType type(model_t::CResultType::E_Unconditional | + model_t::CResultType::E_Interim); for (core_t::TTime t = 0; t < 4 * core::constants::WEEK; t += bucketLength) { if (t % core::constants::WEEK == 0) { LOG_DEBUG(<< "week " << t / core::constants::WEEK + 1); @@ -2113,21 +2266,30 @@ void CMetricModelTest::testDecayRateControl() { TDoubleVec value; rng.generateUniformSamples(0.0, 10.0, 1, value); - value[0] += 20.0 * (t > 3 * core::constants::WEEK && t < core::constants::WEEK + 4 * 3600 ? 1.0 : 0.0); + value[0] += 20.0 * (t > 3 * core::constants::WEEK && + t < core::constants::WEEK + 4 * 3600 + ? 1.0 + : 0.0); addArrival(*gatherer, m_ResourceMonitor, t + bucketLength / 2, "p1", value[0]); - addArrival(*referenceGatherer, m_ResourceMonitor, t + bucketLength / 2, "p1", value[0]); + addArrival(*referenceGatherer, m_ResourceMonitor, + t + bucketLength / 2, "p1", value[0]); model->sample(t, t + bucketLength, m_ResourceMonitor); referenceModel->sample(t, t + bucketLength, m_ResourceMonitor); - meanPredictionError.add(std::fabs(model->currentBucketValue(feature, 0, 0, t + bucketLength / 2)[0] - - model->baselineBucketMean(feature, 0, 0, type, NO_CORRELATES, t + bucketLength / 2)[0])); - meanReferencePredictionError.add( - std::fabs(referenceModel->currentBucketValue(feature, 0, 0, t + bucketLength / 2)[0] - - referenceModel->baselineBucketMean(feature, 0, 0, type, NO_CORRELATES, t + bucketLength / 2)[0])); + meanPredictionError.add(std::fabs( + model->currentBucketValue(feature, 0, 0, t + bucketLength / 2)[0] - + model->baselineBucketMean(feature, 0, 0, type, NO_CORRELATES, + t + bucketLength / 2)[0])); + meanReferencePredictionError.add(std::fabs( + referenceModel->currentBucketValue(feature, 0, 0, t + bucketLength / 2)[0] - + referenceModel->baselineBucketMean(feature, 0, 0, type, NO_CORRELATES, + t + bucketLength / 2)[0])); } LOG_DEBUG(<< "mean = " << maths::CBasicStatistics::mean(meanPredictionError)); - LOG_DEBUG(<< "reference = " << maths::CBasicStatistics::mean(meanReferencePredictionError)); + LOG_DEBUG(<< "reference = " + << maths::CBasicStatistics::mean(meanReferencePredictionError)); CPPUNIT_ASSERT_DOUBLES_EQUAL( - maths::CBasicStatistics::mean(meanReferencePredictionError), maths::CBasicStatistics::mean(meanPredictionError), 0.05); + maths::CBasicStatistics::mean(meanReferencePredictionError), + maths::CBasicStatistics::mean(meanPredictionError), 0.05); } LOG_DEBUG(<< "*** Test step change ***"); @@ -2148,36 +2310,45 @@ void CMetricModelTest::testDecayRateControl() { CMetricModelFactory referenceFactory(params); CModelFactory::TDataGathererPtr referenceGatherer; CAnomalyDetectorModel::TModelPtr referenceModel; - makeModel(referenceFactory, features, startTime, bucketLength, referenceGatherer, referenceModel); + makeModel(referenceFactory, features, startTime, bucketLength, + referenceGatherer, referenceModel); TMeanAccumulator meanPredictionError; TMeanAccumulator meanReferencePredictionError; - model_t::CResultType type(model_t::CResultType::E_Unconditional | model_t::CResultType::E_Interim); + model_t::CResultType type(model_t::CResultType::E_Unconditional | + model_t::CResultType::E_Interim); for (core_t::TTime t = 0; t < 10 * core::constants::WEEK; t += bucketLength) { if (t % core::constants::WEEK == 0) { LOG_DEBUG(<< "week " << t / core::constants::WEEK + 1); } double value = 10.0 * - (1.0 + std::sin(boost::math::double_constants::two_pi * static_cast(t) / + (1.0 + std::sin(boost::math::double_constants::two_pi * + static_cast(t) / static_cast(core::constants::DAY))) * (t < 5 * core::constants::WEEK ? 1.0 : 2.0); TDoubleVec noise; rng.generateUniformSamples(0.0, 3.0, 1, noise); - addArrival(*gatherer, m_ResourceMonitor, t + bucketLength / 2, "p1", value + noise[0]); - addArrival(*referenceGatherer, m_ResourceMonitor, t + bucketLength / 2, "p1", value + noise[0]); + addArrival(*gatherer, m_ResourceMonitor, t + bucketLength / 2, "p1", + value + noise[0]); + addArrival(*referenceGatherer, m_ResourceMonitor, + t + bucketLength / 2, "p1", value + noise[0]); model->sample(t, t + bucketLength, m_ResourceMonitor); referenceModel->sample(t, t + bucketLength, m_ResourceMonitor); - meanPredictionError.add(std::fabs(model->currentBucketValue(feature, 0, 0, t + bucketLength / 2)[0] - - model->baselineBucketMean(feature, 0, 0, type, NO_CORRELATES, t + bucketLength / 2)[0])); - meanReferencePredictionError.add( - std::fabs(referenceModel->currentBucketValue(feature, 0, 0, t + bucketLength / 2)[0] - - referenceModel->baselineBucketMean(feature, 0, 0, type, NO_CORRELATES, t + bucketLength / 2)[0])); + meanPredictionError.add(std::fabs( + model->currentBucketValue(feature, 0, 0, t + bucketLength / 2)[0] - + model->baselineBucketMean(feature, 0, 0, type, NO_CORRELATES, + t + bucketLength / 2)[0])); + meanReferencePredictionError.add(std::fabs( + referenceModel->currentBucketValue(feature, 0, 0, t + bucketLength / 2)[0] - + referenceModel->baselineBucketMean(feature, 0, 0, type, NO_CORRELATES, + t + bucketLength / 2)[0])); } LOG_DEBUG("mean = " << maths::CBasicStatistics::mean(meanPredictionError)); LOG_DEBUG("reference = " << maths::CBasicStatistics::mean(meanReferencePredictionError)); CPPUNIT_ASSERT_DOUBLES_EQUAL( - maths::CBasicStatistics::mean(meanReferencePredictionError), maths::CBasicStatistics::mean(meanPredictionError), 0.05); + maths::CBasicStatistics::mean(meanReferencePredictionError), + maths::CBasicStatistics::mean(meanPredictionError), 0.05); } LOG_DEBUG(<< "*** Test unmodelled cyclic component ***"); @@ -2200,35 +2371,46 @@ void CMetricModelTest::testDecayRateControl() { CMetricModelFactory referenceFactory(params); CModelFactory::TDataGathererPtr referenceGatherer; CAnomalyDetectorModel::TModelPtr referenceModel; - makeModel(referenceFactory, features, startTime, bucketLength, referenceGatherer, referenceModel); + makeModel(referenceFactory, features, startTime, bucketLength, + referenceGatherer, referenceModel); TMeanAccumulator meanPredictionError; TMeanAccumulator meanReferencePredictionError; - model_t::CResultType type(model_t::CResultType::E_Unconditional | model_t::CResultType::E_Interim); + model_t::CResultType type(model_t::CResultType::E_Unconditional | + model_t::CResultType::E_Interim); for (core_t::TTime t = 0; t < 20 * core::constants::WEEK; t += bucketLength) { if (t % core::constants::WEEK == 0) { LOG_DEBUG(<< "week " << t / core::constants::WEEK + 1); } - double value = 10.0 * - (1.0 + std::sin(boost::math::double_constants::two_pi * static_cast(t) / - static_cast(core::constants::DAY))) * - (1.0 + std::sin(boost::math::double_constants::two_pi * static_cast(t) / 10.0 / - static_cast(core::constants::WEEK))); + double value = + 10.0 * + (1.0 + std::sin(boost::math::double_constants::two_pi * + static_cast(t) / + static_cast(core::constants::DAY))) * + (1.0 + std::sin(boost::math::double_constants::two_pi * + static_cast(t) / 10.0 / + static_cast(core::constants::WEEK))); TDoubleVec noise; rng.generateUniformSamples(0.0, 3.0, 1, noise); - addArrival(*gatherer, m_ResourceMonitor, t + bucketLength / 2, "p1", value + noise[0]); - addArrival(*referenceGatherer, m_ResourceMonitor, t + bucketLength / 2, "p1", value + noise[0]); + addArrival(*gatherer, m_ResourceMonitor, t + bucketLength / 2, "p1", + value + noise[0]); + addArrival(*referenceGatherer, m_ResourceMonitor, + t + bucketLength / 2, "p1", value + noise[0]); model->sample(t, t + bucketLength, m_ResourceMonitor); referenceModel->sample(t, t + bucketLength, m_ResourceMonitor); - meanPredictionError.add(std::fabs(model->currentBucketValue(feature, 0, 0, t + bucketLength / 2)[0] - - model->baselineBucketMean(feature, 0, 0, type, NO_CORRELATES, t + bucketLength / 2)[0])); - meanReferencePredictionError.add( - std::fabs(referenceModel->currentBucketValue(feature, 0, 0, t + bucketLength / 2)[0] - - referenceModel->baselineBucketMean(feature, 0, 0, type, NO_CORRELATES, t + bucketLength / 2)[0])); + meanPredictionError.add(std::fabs( + model->currentBucketValue(feature, 0, 0, t + bucketLength / 2)[0] - + model->baselineBucketMean(feature, 0, 0, type, NO_CORRELATES, + t + bucketLength / 2)[0])); + meanReferencePredictionError.add(std::fabs( + referenceModel->currentBucketValue(feature, 0, 0, t + bucketLength / 2)[0] - + referenceModel->baselineBucketMean(feature, 0, 0, type, NO_CORRELATES, + t + bucketLength / 2)[0])); } LOG_DEBUG(<< "mean = " << maths::CBasicStatistics::mean(meanPredictionError)); - LOG_DEBUG(<< "reference = " << maths::CBasicStatistics::mean(meanReferencePredictionError)); + LOG_DEBUG(<< "reference = " + << maths::CBasicStatistics::mean(meanReferencePredictionError)); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanPredictionError) < 0.7 * maths::CBasicStatistics::mean(meanReferencePredictionError)); } @@ -2275,20 +2457,24 @@ void CMetricModelTest::testProbabilityCalculationForLowMedian() { LOG_DEBUG(<< "values = " << core::CContainerPrinter::print(values)); for (std::size_t j = 0u; j < values.size(); ++j) { - addArrival(*gatherer, m_ResourceMonitor, time + static_cast(j), "p", values[j]); + addArrival(*gatherer, m_ResourceMonitor, + time + static_cast(j), "p", values[j]); } model.sample(time, time + bucketLength, m_ResourceMonitor); CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); SAnnotatedProbability annotatedProbability; - CPPUNIT_ASSERT(model.computeProbability(0 /*pid*/, time, time + bucketLength, partitioningFields, 1, annotatedProbability)); + CPPUNIT_ASSERT(model.computeProbability(0 /*pid*/, time, time + bucketLength, partitioningFields, + 1, annotatedProbability)); LOG_DEBUG(<< "probability = " << annotatedProbability.s_Probability); probabilities.push_back(annotatedProbability.s_Probability); time += bucketLength; } - LOG_DEBUG(<< "probabilities = " << core::CContainerPrinter::print(probabilities.begin(), probabilities.end())); + LOG_DEBUG(<< "probabilities = " + << core::CContainerPrinter::print(probabilities.begin(), + probabilities.end())); CPPUNIT_ASSERT(probabilities[lowMedianBucket] < 0.01); CPPUNIT_ASSERT(probabilities[highMedianBucket] > 0.1); @@ -2335,13 +2521,15 @@ void CMetricModelTest::testProbabilityCalculationForHighMedian() { LOG_DEBUG(<< "values = " << core::CContainerPrinter::print(values)); for (std::size_t j = 0u; j < values.size(); ++j) { - addArrival(*gatherer, m_ResourceMonitor, time + static_cast(j), "p", values[j]); + addArrival(*gatherer, m_ResourceMonitor, + time + static_cast(j), "p", values[j]); } model.sample(time, time + bucketLength, m_ResourceMonitor); CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); SAnnotatedProbability annotatedProbability; - CPPUNIT_ASSERT(model.computeProbability(0 /*pid*/, time, time + bucketLength, partitioningFields, 1, annotatedProbability)); + CPPUNIT_ASSERT(model.computeProbability(0 /*pid*/, time, time + bucketLength, partitioningFields, + 1, annotatedProbability)); LOG_DEBUG(<< "probability = " << annotatedProbability.s_Probability); probabilities.push_back(annotatedProbability.s_Probability); @@ -2392,7 +2580,8 @@ void CMetricModelTest::testIgnoreSamplingGivenDetectionRules() { CMetricModelFactory factoryWithSkip(paramsWithRules); CModelFactory::TDataGathererPtr gathererWithSkip; CAnomalyDetectorModel::TModelPtr modelPtrWithSkip; - makeModel(factoryWithSkip, features, startTime, bucketLength, gathererWithSkip, modelPtrWithSkip); + makeModel(factoryWithSkip, features, startTime, bucketLength, + gathererWithSkip, modelPtrWithSkip); CMetricModel* modelWithSkip = dynamic_cast(modelPtrWithSkip.get()); std::size_t endTime = startTime + bucketLength; @@ -2442,7 +2631,8 @@ void CMetricModelTest::testIgnoreSamplingGivenDetectionRules() { CPPUNIT_ASSERT(modelWithSkip->checksum() != modelNoSkip->checksum()); // but the underlying models should be the same - CAnomalyDetectorModel::CModelDetailsViewPtr modelWithSkipView = modelWithSkip->details(); + CAnomalyDetectorModel::CModelDetailsViewPtr modelWithSkipView = + modelWithSkip->details(); CAnomalyDetectorModel::CModelDetailsViewPtr modelNoSkipView = modelNoSkip->details(); // TODO this test fails due a different checksums for the decay rate and prior @@ -2467,51 +2657,69 @@ void CMetricModelTest::testIgnoreSamplingGivenDetectionRules() { CppUnit::Test* CMetricModelTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CMetricModelTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CMetricModelTest::testSample", &CMetricModelTest::testSample)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CMetricModelTest::testMultivariateSample", &CMetricModelTest::testMultivariateSample)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMetricModelTest::testProbabilityCalculationForMetric", - &CMetricModelTest::testProbabilityCalculationForMetric)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMetricModelTest::testProbabilityCalculationForMedian", - &CMetricModelTest::testProbabilityCalculationForMedian)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMetricModelTest::testProbabilityCalculationForLowMean", - &CMetricModelTest::testProbabilityCalculationForLowMean)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMetricModelTest::testProbabilityCalculationForHighMean", - &CMetricModelTest::testProbabilityCalculationForHighMean)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMetricModelTest::testProbabilityCalculationForLowSum", - &CMetricModelTest::testProbabilityCalculationForLowSum)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMetricModelTest::testProbabilityCalculationForHighSum", - &CMetricModelTest::testProbabilityCalculationForHighSum)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMetricModelTest::testProbabilityCalculationForLatLong", - &CMetricModelTest::testProbabilityCalculationForLatLong)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMetricModelTest::testInfluence", &CMetricModelTest::testInfluence)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CMetricModelTest::testLatLongInfluence", &CMetricModelTest::testLatLongInfluence)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMetricModelTest::testPrune", &CMetricModelTest::testPrune)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMetricModelTest::testKey", &CMetricModelTest::testKey)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CMetricModelTest::testSkipSampling", &CMetricModelTest::testSkipSampling)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CMetricModelTest::testExplicitNulls", &CMetricModelTest::testExplicitNulls)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMetricModelTest::testVarp", &CMetricModelTest::testVarp)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CMetricModelTest::testInterimCorrections", &CMetricModelTest::testInterimCorrections)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMetricModelTest::testInterimCorrectionsWithCorrelations", - &CMetricModelTest::testInterimCorrectionsWithCorrelations)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CMetricModelTest::testCorrelatePersist", &CMetricModelTest::testCorrelatePersist)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMetricModelTest::testSummaryCountZeroRecordsAreIgnored", - &CMetricModelTest::testSummaryCountZeroRecordsAreIgnored)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMetricModelTest::testSummaryCountZeroRecordsAreIgnored", - &CMetricModelTest::testSummaryCountZeroRecordsAreIgnored)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CMetricModelTest::testDecayRateControl", &CMetricModelTest::testDecayRateControl)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMetricModelTest::testProbabilityCalculationForLowMedian", - &CMetricModelTest::testProbabilityCalculationForLowMedian)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMetricModelTest::testProbabilityCalculationForHighMedian", - &CMetricModelTest::testProbabilityCalculationForHighMedian)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMetricModelTest::testIgnoreSamplingGivenDetectionRules", - &CMetricModelTest::testIgnoreSamplingGivenDetectionRules)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricModelTest::testSample", &CMetricModelTest::testSample)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricModelTest::testMultivariateSample", &CMetricModelTest::testMultivariateSample)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricModelTest::testProbabilityCalculationForMetric", + &CMetricModelTest::testProbabilityCalculationForMetric)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricModelTest::testProbabilityCalculationForMedian", + &CMetricModelTest::testProbabilityCalculationForMedian)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricModelTest::testProbabilityCalculationForLowMean", + &CMetricModelTest::testProbabilityCalculationForLowMean)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricModelTest::testProbabilityCalculationForHighMean", + &CMetricModelTest::testProbabilityCalculationForHighMean)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricModelTest::testProbabilityCalculationForLowSum", + &CMetricModelTest::testProbabilityCalculationForLowSum)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricModelTest::testProbabilityCalculationForHighSum", + &CMetricModelTest::testProbabilityCalculationForHighSum)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricModelTest::testProbabilityCalculationForLatLong", + &CMetricModelTest::testProbabilityCalculationForLatLong)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricModelTest::testInfluence", &CMetricModelTest::testInfluence)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricModelTest::testLatLongInfluence", &CMetricModelTest::testLatLongInfluence)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricModelTest::testPrune", &CMetricModelTest::testPrune)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricModelTest::testKey", &CMetricModelTest::testKey)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricModelTest::testSkipSampling", &CMetricModelTest::testSkipSampling)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricModelTest::testExplicitNulls", &CMetricModelTest::testExplicitNulls)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricModelTest::testVarp", &CMetricModelTest::testVarp)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricModelTest::testInterimCorrections", &CMetricModelTest::testInterimCorrections)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricModelTest::testInterimCorrectionsWithCorrelations", + &CMetricModelTest::testInterimCorrectionsWithCorrelations)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricModelTest::testCorrelatePersist", &CMetricModelTest::testCorrelatePersist)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricModelTest::testSummaryCountZeroRecordsAreIgnored", + &CMetricModelTest::testSummaryCountZeroRecordsAreIgnored)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricModelTest::testSummaryCountZeroRecordsAreIgnored", + &CMetricModelTest::testSummaryCountZeroRecordsAreIgnored)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricModelTest::testDecayRateControl", &CMetricModelTest::testDecayRateControl)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricModelTest::testProbabilityCalculationForLowMedian", + &CMetricModelTest::testProbabilityCalculationForLowMedian)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricModelTest::testProbabilityCalculationForHighMedian", + &CMetricModelTest::testProbabilityCalculationForHighMedian)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricModelTest::testIgnoreSamplingGivenDetectionRules", + &CMetricModelTest::testIgnoreSamplingGivenDetectionRules)); return suiteOfTests; } diff --git a/lib/model/unittest/CMetricPopulationDataGathererTest.cc b/lib/model/unittest/CMetricPopulationDataGathererTest.cc index e358228765..647b6d608b 100644 --- a/lib/model/unittest/CMetricPopulationDataGathererTest.cc +++ b/lib/model/unittest/CMetricPopulationDataGathererTest.cc @@ -42,7 +42,8 @@ using TOptionalStr = boost::optional; using TSizeSizePr = std::pair; using TSizeSizePrFeatureDataPr = std::pair; using TSizeSizePrFeatureDataPrVec = std::vector; -using TFeatureSizeSizePrFeatureDataPrVecPr = std::pair; +using TFeatureSizeSizePrFeatureDataPrVecPr = + std::pair; using TFeatureSizeSizePrFeatureDataPrVecPrVec = std::vector; struct SMessage { @@ -51,7 +52,8 @@ struct SMessage { const std::string& attribute, const double& value, const TStrVec& influences = TStrVec()) - : s_Time(time), s_Person(person), s_Attribute(attribute), s_Value(value), s_Influences(influences) {} + : s_Time(time), s_Person(person), s_Attribute(attribute), + s_Value(value), s_Influences(influences) {} core_t::TTime s_Time; std::string s_Person; @@ -71,7 +73,8 @@ void generateTestMessages(const core_t::TTime& startTime, TMessageVec& result) { const std::size_t numberMessages = 100000; const std::size_t numberPeople = 40; const std::size_t numberCategories = 10; - const double locations[] = {1.0, 2.0, 5.0, 15.0, 3.0, 0.5, 10.0, 17.0, 8.5, 1.5}; + const double locations[] = {1.0, 2.0, 5.0, 15.0, 3.0, + 0.5, 10.0, 17.0, 8.5, 1.5}; const double scales[] = {1.0, 1.0, 3.0, 2.0, 0.5, 0.5, 2.0, 3.0, 4.0, 1.0}; result.clear(); @@ -84,10 +87,12 @@ void generateTestMessages(const core_t::TTime& startTime, TMessageVec& result) { std::sort(times.begin(), times.end()); TDoubleVec people; - rng.generateUniformSamples(0.0, static_cast(numberPeople) - 0.01, numberMessages, people); + rng.generateUniformSamples(0.0, static_cast(numberPeople) - 0.01, + numberMessages, people); TDoubleVec categories; - rng.generateUniformSamples(0.0, static_cast(numberCategories) - 0.01, numberMessages, categories); + rng.generateUniformSamples(0.0, static_cast(numberCategories) - 0.01, + numberMessages, categories); for (std::size_t i = 0u; i < numberMessages; ++i) { core_t::TTime time = startTime + static_cast(times[i]); @@ -95,10 +100,9 @@ void generateTestMessages(const core_t::TTime& startTime, TMessageVec& result) { std::size_t attribute = static_cast(categories[i]); TDoubleVec value; rng.generateNormalSamples(locations[attribute], scales[attribute], 1u, value); - result.push_back(SMessage(time, - std::string("p") + boost::lexical_cast(person), - std::string("c") + boost::lexical_cast(attribute), - value[0])); + result.push_back(SMessage( + time, std::string("p") + boost::lexical_cast(person), + std::string("c") + boost::lexical_cast(attribute), value[0])); } } @@ -113,7 +117,8 @@ void addArrival(const SMessage& message, CDataGatherer& gatherer, CResourceMonit fields.push_back(&message.s_Influences[i]); } } - std::string value = core::CStringUtils::typeToStringPrecise(message.s_Value, core::CIEEE754::E_DoublePrecision); + std::string value = core::CStringUtils::typeToStringPrecise( + message.s_Value, core::CIEEE754::E_DoublePrecision); fields.push_back(&value); CEventData result; result.time(message.s_Time); @@ -158,7 +163,8 @@ void CMetricPopulationDataGathererTest::testMean() { core_t::TTime bucketStart = startTime; for (std::size_t i = 0u; i < messages.size(); ++i) { if (messages[i].s_Time >= bucketStart + bucketLength) { - LOG_DEBUG(<< "Processing bucket [" << bucketStart << ", " << bucketStart + bucketLength << ")"); + LOG_DEBUG(<< "Processing bucket [" << bucketStart << ", " + << bucketStart + bucketLength << ")"); TFeatureSizeSizePrFeatureDataPrVecPrVec tmp; gatherer.featureData(bucketStart, bucketLength, tmp); @@ -170,24 +176,28 @@ void CMetricPopulationDataGathererTest::testMean() { TStrStrPrDoubleMap means; for (std::size_t j = 0u; j < data.size(); ++j) { if (data[j].second.s_BucketValue) { - means[TStrStrPr(gatherer.personName(data[j].first.first), gatherer.attributeName(data[j].first.second))] = + means[TStrStrPr(gatherer.personName(data[j].first.first), + gatherer.attributeName(data[j].first.second))] = data[j].second.s_BucketValue->value()[0]; } } TStrStrPrDoubleMap expectedMeans; - for (TStrStrPrMeanAccumulatorMapCItr itr = accumulators.begin(); itr != accumulators.end(); ++itr) { + for (TStrStrPrMeanAccumulatorMapCItr itr = accumulators.begin(); + itr != accumulators.end(); ++itr) { expectedMeans[itr->first] = maths::CBasicStatistics::mean(itr->second); } - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedMeans), core::CContainerPrinter::print(means)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedMeans), + core::CContainerPrinter::print(means)); bucketStart += bucketLength; accumulators.clear(); } addArrival(messages[i], gatherer, m_ResourceMonitor); - accumulators[TStrStrPr(messages[i].s_Person, messages[i].s_Attribute)].add(messages[i].s_Value); + accumulators[TStrStrPr(messages[i].s_Person, messages[i].s_Attribute)].add( + messages[i].s_Value); } } @@ -219,7 +229,8 @@ void CMetricPopulationDataGathererTest::testMin() { core_t::TTime bucketStart = startTime; for (std::size_t i = 0u; i < messages.size(); ++i) { if (messages[i].s_Time >= bucketStart + bucketLength) { - LOG_DEBUG(<< "Processing bucket [" << bucketStart << ", " << bucketStart + bucketLength << ")"); + LOG_DEBUG(<< "Processing bucket [" << bucketStart << ", " + << bucketStart + bucketLength << ")"); TFeatureSizeSizePrFeatureDataPrVecPrVec tmp; gatherer.featureData(bucketStart, bucketLength, tmp); @@ -231,24 +242,28 @@ void CMetricPopulationDataGathererTest::testMin() { TStrStrPrDoubleMap mins; for (std::size_t j = 0u; j < data.size(); ++j) { if (data[j].second.s_BucketValue) { - mins[TStrStrPr(gatherer.personName(data[j].first.first), gatherer.attributeName(data[j].first.second))] = + mins[TStrStrPr(gatherer.personName(data[j].first.first), + gatherer.attributeName(data[j].first.second))] = data[j].second.s_BucketValue->value()[0]; } } TStrStrPrDoubleMap expectedMins; - for (TStrStrPrMinAccumulatorMapCItr itr = accumulators.begin(); itr != accumulators.end(); ++itr) { + for (TStrStrPrMinAccumulatorMapCItr itr = accumulators.begin(); + itr != accumulators.end(); ++itr) { expectedMins[itr->first] = itr->second[0]; } - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedMins), core::CContainerPrinter::print(mins)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedMins), + core::CContainerPrinter::print(mins)); bucketStart += bucketLength; accumulators.clear(); } addArrival(messages[i], gatherer, m_ResourceMonitor); - accumulators[TStrStrPr(messages[i].s_Person, messages[i].s_Attribute)].add(messages[i].s_Value); + accumulators[TStrStrPr(messages[i].s_Person, messages[i].s_Attribute)].add( + messages[i].s_Value); } } @@ -257,7 +272,8 @@ void CMetricPopulationDataGathererTest::testMax() { // Test that we correctly sample the bucket maximums. - using TMaxAccumulator = maths::CBasicStatistics::COrderStatisticsStack>; + using TMaxAccumulator = + maths::CBasicStatistics::COrderStatisticsStack>; using TStrStrPrMaxAccumulatorMap = std::map; using TStrStrPrMaxAccumulatorMapCItr = TStrStrPrMaxAccumulatorMap::const_iterator; @@ -280,7 +296,8 @@ void CMetricPopulationDataGathererTest::testMax() { core_t::TTime bucketStart = startTime; for (std::size_t i = 0u; i < messages.size(); ++i) { if (messages[i].s_Time >= bucketStart + bucketLength) { - LOG_DEBUG(<< "Processing bucket [" << bucketStart << ", " << bucketStart + bucketLength << ")"); + LOG_DEBUG(<< "Processing bucket [" << bucketStart << ", " + << bucketStart + bucketLength << ")"); TFeatureSizeSizePrFeatureDataPrVecPrVec tmp; gatherer.featureData(bucketStart, bucketLength, tmp); @@ -292,24 +309,28 @@ void CMetricPopulationDataGathererTest::testMax() { TStrStrPrDoubleMap maxs; for (std::size_t j = 0u; j < data.size(); ++j) { if (data[j].second.s_BucketValue) { - maxs[TStrStrPr(gatherer.personName(data[j].first.first), gatherer.attributeName(data[j].first.second))] = + maxs[TStrStrPr(gatherer.personName(data[j].first.first), + gatherer.attributeName(data[j].first.second))] = data[j].second.s_BucketValue->value()[0]; } } TStrStrPrDoubleMap expectedMaxs; - for (TStrStrPrMaxAccumulatorMapCItr itr = accumulators.begin(); itr != accumulators.end(); ++itr) { + for (TStrStrPrMaxAccumulatorMapCItr itr = accumulators.begin(); + itr != accumulators.end(); ++itr) { expectedMaxs[itr->first] = itr->second[0]; } - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedMaxs), core::CContainerPrinter::print(maxs)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedMaxs), + core::CContainerPrinter::print(maxs)); bucketStart += bucketLength; accumulators.clear(); } addArrival(messages[i], gatherer, m_ResourceMonitor); - accumulators[TStrStrPr(messages[i].s_Person, messages[i].s_Attribute)].add(messages[i].s_Value); + accumulators[TStrStrPr(messages[i].s_Person, messages[i].s_Attribute)].add( + messages[i].s_Value); } } @@ -337,7 +358,8 @@ void CMetricPopulationDataGathererTest::testSum() { core_t::TTime bucketStart = startTime; for (std::size_t i = 0u; i < messages.size(); ++i) { if (messages[i].s_Time >= bucketStart + bucketLength) { - LOG_DEBUG(<< "Processing bucket [" << bucketStart << ", " << bucketStart + bucketLength << ")"); + LOG_DEBUG(<< "Processing bucket [" << bucketStart << ", " + << bucketStart + bucketLength << ")"); TFeatureSizeSizePrFeatureDataPrVecPrVec tmp; gatherer.featureData(bucketStart, bucketLength, tmp); @@ -349,19 +371,22 @@ void CMetricPopulationDataGathererTest::testSum() { TStrStrPrDoubleMap sums; for (std::size_t j = 0u; j < data.size(); ++j) { if (data[j].second.s_BucketValue) { - sums[TStrStrPr(gatherer.personName(data[j].first.first), gatherer.attributeName(data[j].first.second))] = + sums[TStrStrPr(gatherer.personName(data[j].first.first), + gatherer.attributeName(data[j].first.second))] = data[j].second.s_BucketValue->value()[0]; } } - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedSums), core::CContainerPrinter::print(sums)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedSums), + core::CContainerPrinter::print(sums)); bucketStart += bucketLength; expectedSums.clear(); } addArrival(messages[i], gatherer, m_ResourceMonitor); - expectedSums[TStrStrPr(messages[i].s_Person, messages[i].s_Attribute)] += messages[i].s_Value; + expectedSums[TStrStrPr(messages[i].s_Person, messages[i].s_Attribute)] += + messages[i].s_Value; } } @@ -376,13 +401,16 @@ void CMetricPopulationDataGathererTest::testSampleCount() { const std::string attribute("c1"); const std::string person("p1"); const std::size_t numberBuckets = 40; - const std::size_t personMessageCount[numberBuckets] = {11, 11, 11, 11, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, - 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, - 110, 110, 110, 97, 97, 97, 97, 97, 97, 97, 97, 97}; - const double expectedSampleCounts[] = {0.0, 0.0, 0.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, - 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, - 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, - 11.3597, 11.7164, 12.0701, 12.421, 12.7689, 13.114, 13.4562, 13.7957, 14.1325, 14.4665}; + const std::size_t personMessageCount[numberBuckets] = { + 11, 11, 11, 11, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, + 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, + 110, 110, 110, 97, 97, 97, 97, 97, 97, 97, 97, 97}; + const double expectedSampleCounts[] = { + 0.0, 0.0, 0.0, 11.0, 11.0, 11.0, 11.0, 11.0, + 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, + 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, + 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.3597, 11.7164, + 12.0701, 12.421, 12.7689, 13.114, 13.4562, 13.7957, 14.1325, 14.4665}; const double tolerance = 5e-4; TMessageVec messages; @@ -391,7 +419,9 @@ void CMetricPopulationDataGathererTest::testSampleCount() { std::size_t n = personMessageCount[bucket]; for (std::size_t i = 0u; i < n; ++i) { - core_t::TTime time = bucketStart + bucketLength * static_cast(i) / static_cast(n); + core_t::TTime time = bucketStart + bucketLength * + static_cast(i) / + static_cast(n); messages.push_back(SMessage(time, person, attribute, 1.0)); } } @@ -411,7 +441,8 @@ void CMetricPopulationDataGathererTest::testSampleCount() { if (messages[i].s_Time >= bucketStart + bucketLength) { gatherer.sampleNow(bucketStart); LOG_DEBUG(<< gatherer.effectiveSampleCount(0)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedSampleCounts[bucket], gatherer.effectiveSampleCount(0), tolerance); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedSampleCounts[bucket], + gatherer.effectiveSampleCount(0), tolerance); ++bucket; } @@ -420,7 +451,8 @@ void CMetricPopulationDataGathererTest::testSampleCount() { core_t::TTime bucketStart = startTime + static_cast(bucket) * bucketLength; gatherer.sampleNow(bucketStart); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedSampleCounts[bucket], gatherer.effectiveSampleCount(0), tolerance); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedSampleCounts[bucket], + gatherer.effectiveSampleCount(0), tolerance); } void CMetricPopulationDataGathererTest::testFeatureData() { @@ -434,7 +466,8 @@ void CMetricPopulationDataGathererTest::testFeatureData() { using TMinAccumulator = maths::CBasicStatistics::COrderStatisticsStack; using TStrStrPrMinAccumulatorMap = std::map; using TStrStrPrMinAccumulatorMapCItr = TStrStrPrMinAccumulatorMap::const_iterator; - using TMaxAccumulator = maths::CBasicStatistics::COrderStatisticsStack>; + using TMaxAccumulator = + maths::CBasicStatistics::COrderStatisticsStack>; using TStrStrPrMaxAccumulatorMap = std::map; using TStrStrPrMaxAccumulatorMapCItr = TStrStrPrMaxAccumulatorMap::const_iterator; using TStrStrPrDoubleVecMap = std::map; @@ -469,7 +502,8 @@ void CMetricPopulationDataGathererTest::testFeatureData() { core_t::TTime bucketStart = startTime; for (std::size_t i = 0u; i < messages.size(); ++i) { if (messages[i].s_Time >= bucketStart + bucketLength) { - LOG_DEBUG(<< "Processing bucket [" << bucketStart << ", " << bucketStart + bucketLength << ")"); + LOG_DEBUG(<< "Processing bucket [" << bucketStart << ", " + << bucketStart + bucketLength << ")"); gatherer.sampleNow(bucketStart); @@ -477,73 +511,94 @@ void CMetricPopulationDataGathererTest::testFeatureData() { gatherer.featureData(bucketStart, bucketLength, tmp); CPPUNIT_ASSERT_EQUAL(static_cast(3), tmp.size()); - CPPUNIT_ASSERT_EQUAL(model_t::E_PopulationMeanByPersonAndAttribute, tmp[0].first); + CPPUNIT_ASSERT_EQUAL(model_t::E_PopulationMeanByPersonAndAttribute, + tmp[0].first); TStrStrPrDoubleMap means; TStrStrPrDoubleVecMap meanSamples; for (std::size_t j = 0u; j < tmp[0].second.size(); ++j) { const TSizeSizePrFeatureDataPr& data = tmp[0].second[j]; - TStrStrPr key(gatherer.personName(data.first.first), gatherer.attributeName(data.first.second)); + TStrStrPr key(gatherer.personName(data.first.first), + gatherer.attributeName(data.first.second)); if (data.second.s_BucketValue) { means[key] = data.second.s_BucketValue->value()[0]; } TDoubleVec& samples = meanSamples[key]; - for (std::size_t k = 0u; k < boost::unwrap_ref(data.second.s_Samples).size(); ++k) { - samples.push_back(boost::unwrap_ref(data.second.s_Samples)[k].value()[0]); + for (std::size_t k = 0u; + k < boost::unwrap_ref(data.second.s_Samples).size(); ++k) { + samples.push_back( + boost::unwrap_ref(data.second.s_Samples)[k].value()[0]); } } - CPPUNIT_ASSERT_EQUAL(model_t::E_PopulationMinByPersonAndAttribute, tmp[1].first); + CPPUNIT_ASSERT_EQUAL(model_t::E_PopulationMinByPersonAndAttribute, + tmp[1].first); TStrStrPrDoubleMap mins; TStrStrPrDoubleVecMap minSamples; for (std::size_t j = 0u; j < tmp[1].second.size(); ++j) { const TSizeSizePrFeatureDataPr& data = tmp[1].second[j]; - TStrStrPr key(gatherer.personName(data.first.first), gatherer.attributeName(data.first.second)); + TStrStrPr key(gatherer.personName(data.first.first), + gatherer.attributeName(data.first.second)); if (data.second.s_BucketValue) { mins[key] = data.second.s_BucketValue->value()[0]; } TDoubleVec& samples = minSamples[key]; - for (std::size_t k = 0u; k < boost::unwrap_ref(data.second.s_Samples).size(); ++k) { - samples.push_back(boost::unwrap_ref(data.second.s_Samples)[k].value()[0]); + for (std::size_t k = 0u; + k < boost::unwrap_ref(data.second.s_Samples).size(); ++k) { + samples.push_back( + boost::unwrap_ref(data.second.s_Samples)[k].value()[0]); } } - CPPUNIT_ASSERT_EQUAL(model_t::E_PopulationMaxByPersonAndAttribute, tmp[2].first); + CPPUNIT_ASSERT_EQUAL(model_t::E_PopulationMaxByPersonAndAttribute, + tmp[2].first); TStrStrPrDoubleMap maxs; TStrStrPrDoubleVecMap maxSamples; for (std::size_t j = 0u; j < tmp[2].second.size(); ++j) { const TSizeSizePrFeatureDataPr& data = tmp[2].second[j]; - TStrStrPr key(gatherer.personName(data.first.first), gatherer.attributeName(data.first.second)); + TStrStrPr key(gatherer.personName(data.first.first), + gatherer.attributeName(data.first.second)); if (data.second.s_BucketValue) { maxs[key] = data.second.s_BucketValue->value()[0]; } TDoubleVec& samples = maxSamples[key]; - for (std::size_t k = 0u; k < boost::unwrap_ref(data.second.s_Samples).size(); ++k) { - samples.push_back(boost::unwrap_ref(data.second.s_Samples)[k].value()[0]); + for (std::size_t k = 0u; + k < boost::unwrap_ref(data.second.s_Samples).size(); ++k) { + samples.push_back( + boost::unwrap_ref(data.second.s_Samples)[k].value()[0]); } } TStrStrPrDoubleMap expectedMeans; - for (TStrStrPrMeanAccumulatorMapCItr itr = bucketMeanAccumulators.begin(); itr != bucketMeanAccumulators.end(); ++itr) { + for (TStrStrPrMeanAccumulatorMapCItr itr = bucketMeanAccumulators.begin(); + itr != bucketMeanAccumulators.end(); ++itr) { expectedMeans[itr->first] = maths::CBasicStatistics::mean(itr->second); } TStrStrPrDoubleMap expectedMins; - for (TStrStrPrMinAccumulatorMapCItr itr = bucketMinAccumulators.begin(); itr != bucketMinAccumulators.end(); ++itr) { + for (TStrStrPrMinAccumulatorMapCItr itr = bucketMinAccumulators.begin(); + itr != bucketMinAccumulators.end(); ++itr) { expectedMins[itr->first] = itr->second[0]; } TStrStrPrDoubleMap expectedMaxs; - for (TStrStrPrMaxAccumulatorMapCItr itr = bucketMaxAccumulators.begin(); itr != bucketMaxAccumulators.end(); ++itr) { + for (TStrStrPrMaxAccumulatorMapCItr itr = bucketMaxAccumulators.begin(); + itr != bucketMaxAccumulators.end(); ++itr) { expectedMaxs[itr->first] = itr->second[0]; } - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedMeans), core::CContainerPrinter::print(means)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedMins), core::CContainerPrinter::print(mins)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedMaxs), core::CContainerPrinter::print(maxs)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedMeans), + core::CContainerPrinter::print(means)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedMins), + core::CContainerPrinter::print(mins)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedMaxs), + core::CContainerPrinter::print(maxs)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedMeanSamples), core::CContainerPrinter::print(meanSamples)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedMinSamples), core::CContainerPrinter::print(minSamples)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedMaxSamples), core::CContainerPrinter::print(maxSamples)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedMeanSamples), + core::CContainerPrinter::print(meanSamples)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedMinSamples), + core::CContainerPrinter::print(minSamples)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedMaxSamples), + core::CContainerPrinter::print(maxSamples)); bucketStart += bucketLength; bucketMeanAccumulators.clear(); @@ -572,8 +627,10 @@ void CMetricPopulationDataGathererTest::testFeatureData() { sampleMeanAccumulators[key].add(messages[i].s_Value); sampleMinAccumulators[key].add(messages[i].s_Value); sampleMaxAccumulators[key].add(messages[i].s_Value); - if (maths::CBasicStatistics::count(sampleMeanAccumulators[key]) == std::floor(sampleCount + 0.5)) { - expectedMeanSamples[key].push_back(maths::CBasicStatistics::mean(sampleMeanAccumulators[key])); + if (maths::CBasicStatistics::count(sampleMeanAccumulators[key]) == + std::floor(sampleCount + 0.5)) { + expectedMeanSamples[key].push_back( + maths::CBasicStatistics::mean(sampleMeanAccumulators[key])); expectedMinSamples[key].push_back(sampleMinAccumulators[key][0]); expectedMaxSamples[key].push_back(sampleMaxAccumulators[key][0]); sampleMeanAccumulators[key] = TMeanAccumulator(); @@ -606,21 +663,10 @@ void CMetricPopulationDataGathererTest::testRemovePeople() { features.push_back(model_t::E_PopulationMinByPersonAndAttribute); features.push_back(model_t::E_PopulationMaxByPersonAndAttribute); features.push_back(model_t::E_PopulationSumByBucketPersonAndAttribute); - CDataGatherer gatherer(model_t::E_PopulationMetric, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - searchKey, - features, - startTime, - 0); + CDataGatherer gatherer(model_t::E_PopulationMetric, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), + false, searchKey, features, startTime, 0); TMessageVec messages; generateTestMessages(startTime, messages); @@ -628,7 +674,8 @@ void CMetricPopulationDataGathererTest::testRemovePeople() { core_t::TTime bucketStart = startTime; for (std::size_t i = 0u; i < messages.size(); ++i) { if (messages[i].s_Time >= bucketStart + bucketLength) { - LOG_DEBUG(<< "Processing bucket [" << bucketStart << ", " << bucketStart + bucketLength << ")"); + LOG_DEBUG(<< "Processing bucket [" << bucketStart << ", " + << bucketStart + bucketLength << ")"); gatherer.sampleNow(bucketStart); bucketStart += bucketLength; } @@ -662,13 +709,15 @@ void CMetricPopulationDataGathererTest::testRemovePeople() { TSizeUInt64PrVec nonZeroCounts; gatherer.personNonZeroCounts(bucketStart, nonZeroCounts); for (std::size_t i = 0u; i < nonZeroCounts.size(); ++i) { - if (!std::binary_search(peopleToRemove.begin(), peopleToRemove.end(), nonZeroCounts[i].first)) { + if (!std::binary_search(peopleToRemove.begin(), peopleToRemove.end(), + nonZeroCounts[i].first)) { const std::string& name = gatherer.personName(nonZeroCounts[i].first); expectedNonZeroCounts[name] = nonZeroCounts[i].second; } } } - LOG_DEBUG(<< "expectedNonZeroCounts = " << core::CContainerPrinter::print(expectedNonZeroCounts)); + LOG_DEBUG(<< "expectedNonZeroCounts = " + << core::CContainerPrinter::print(expectedNonZeroCounts)); LOG_DEBUG(<< "Expected"); TStrFeatureDataPrVec expectedFeatureData; @@ -678,8 +727,10 @@ void CMetricPopulationDataGathererTest::testRemovePeople() { for (std::size_t i = 0u; i < featureData.size(); ++i) { const TSizeSizePrFeatureDataPrVec& data = featureData[i].second; for (std::size_t j = 0u; j < data.size(); ++j) { - if (!std::binary_search(peopleToRemove.begin(), peopleToRemove.end(), data[j].first.first)) { - std::string key = model_t::print(featureData[i].first) + " " + gatherer.personName(data[j].first.first) + " " + + if (!std::binary_search(peopleToRemove.begin(), + peopleToRemove.end(), data[j].first.first)) { + std::string key = model_t::print(featureData[i].first) + " " + + gatherer.personName(data[j].first.first) + " " + gatherer.attributeName(data[j].first.second); expectedFeatureData.push_back(TStrFeatureDataPr(key, data[j].second)); LOG_DEBUG(<< " " << key); @@ -691,7 +742,8 @@ void CMetricPopulationDataGathererTest::testRemovePeople() { gatherer.recyclePeople(peopleToRemove); - CPPUNIT_ASSERT_EQUAL(numberPeople - peopleToRemove.size(), gatherer.numberActivePeople()); + CPPUNIT_ASSERT_EQUAL(numberPeople - peopleToRemove.size(), + gatherer.numberActivePeople()); for (std::size_t i = 0u; i < expectedPersonNames.size(); ++i) { std::size_t pid; CPPUNIT_ASSERT(gatherer.personId(expectedPersonNames[i], pid)); @@ -705,9 +757,11 @@ void CMetricPopulationDataGathererTest::testRemovePeople() { const std::string& name = gatherer.personName(nonZeroCounts[i].first); actualNonZeroCounts[name] = nonZeroCounts[i].second; } - LOG_DEBUG(<< "actualNonZeroCounts = " << core::CContainerPrinter::print(actualNonZeroCounts)); + LOG_DEBUG(<< "actualNonZeroCounts = " + << core::CContainerPrinter::print(actualNonZeroCounts)); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedNonZeroCounts), core::CContainerPrinter::print(actualNonZeroCounts)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedNonZeroCounts), + core::CContainerPrinter::print(actualNonZeroCounts)); LOG_DEBUG(<< "Actual"); TStrFeatureDataPrVec actualFeatureData; @@ -717,7 +771,8 @@ void CMetricPopulationDataGathererTest::testRemovePeople() { for (std::size_t i = 0u; i < featureData.size(); ++i) { const TSizeSizePrFeatureDataPrVec& data = featureData[i].second; for (std::size_t j = 0u; j < data.size(); ++j) { - std::string key = model_t::print(featureData[i].first) + " " + gatherer.personName(data[j].first.first) + " " + + std::string key = model_t::print(featureData[i].first) + " " + + gatherer.personName(data[j].first.first) + " " + gatherer.attributeName(data[j].first.second); actualFeatureData.push_back(TStrFeatureDataPr(key, data[j].second)); LOG_DEBUG(<< " " << key); @@ -726,7 +781,8 @@ void CMetricPopulationDataGathererTest::testRemovePeople() { } } - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedFeatureData), core::CContainerPrinter::print(actualFeatureData)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedFeatureData), + core::CContainerPrinter::print(actualFeatureData)); } void CMetricPopulationDataGathererTest::testRemoveAttributes() { @@ -748,21 +804,10 @@ void CMetricPopulationDataGathererTest::testRemoveAttributes() { features.push_back(model_t::E_PopulationMinByPersonAndAttribute); features.push_back(model_t::E_PopulationMaxByPersonAndAttribute); features.push_back(model_t::E_PopulationSumByBucketPersonAndAttribute); - CDataGatherer gatherer(model_t::E_PopulationMetric, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - searchKey, - features, - startTime, - 0); + CDataGatherer gatherer(model_t::E_PopulationMetric, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), + false, searchKey, features, startTime, 0); TMessageVec messages; generateTestMessages(startTime, messages); @@ -770,7 +815,8 @@ void CMetricPopulationDataGathererTest::testRemoveAttributes() { core_t::TTime bucketStart = startTime; for (std::size_t i = 0u; i < messages.size(); ++i) { if (messages[i].s_Time >= bucketStart + bucketLength) { - LOG_DEBUG(<< "Processing bucket [" << bucketStart << ", " << bucketStart + bucketLength << ")"); + LOG_DEBUG(<< "Processing bucket [" << bucketStart << ", " + << bucketStart + bucketLength << ")"); gatherer.sampleNow(bucketStart); bucketStart += bucketLength; } @@ -808,8 +854,10 @@ void CMetricPopulationDataGathererTest::testRemoveAttributes() { for (std::size_t i = 0u; i < featureData.size(); ++i) { const TSizeSizePrFeatureDataPrVec& data = featureData[i].second; for (std::size_t j = 0u; j < data.size(); ++j) { - if (!std::binary_search(attributesToRemove.begin(), attributesToRemove.end(), data[j].first.second)) { - std::string key = model_t::print(featureData[i].first) + " " + gatherer.personName(data[j].first.first) + " " + + if (!std::binary_search(attributesToRemove.begin(), + attributesToRemove.end(), data[j].first.second)) { + std::string key = model_t::print(featureData[i].first) + " " + + gatherer.personName(data[j].first.first) + " " + gatherer.attributeName(data[j].first.second); expected.push_back(TStrFeatureDataPr(key, data[j].second)); LOG_DEBUG(<< " " << key); @@ -822,7 +870,8 @@ void CMetricPopulationDataGathererTest::testRemoveAttributes() { gatherer.recycleAttributes(attributesToRemove); - CPPUNIT_ASSERT_EQUAL(numberAttributes - attributesToRemove.size(), gatherer.numberActiveAttributes()); + CPPUNIT_ASSERT_EQUAL(numberAttributes - attributesToRemove.size(), + gatherer.numberActiveAttributes()); for (std::size_t i = 0u; i < expectedAttributeNames.size(); ++i) { std::size_t cid; CPPUNIT_ASSERT(gatherer.attributeId(expectedAttributeNames[i], cid)); @@ -834,7 +883,8 @@ void CMetricPopulationDataGathererTest::testRemoveAttributes() { for (std::size_t i = 0u; i < numberAttributes; ++i) { actualSampleCounts.push_back(gatherer.effectiveSampleCount(expectedAttributeIds[i])); } - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedSampleCounts), core::CContainerPrinter::print(actualSampleCounts)); + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(expectedSampleCounts), + core::CContainerPrinter::print(actualSampleCounts)); std::string actualFeatureData; { @@ -845,7 +895,8 @@ void CMetricPopulationDataGathererTest::testRemoveAttributes() { for (std::size_t i = 0u; i < featureData.size(); ++i) { const TSizeSizePrFeatureDataPrVec& data = featureData[i].second; for (std::size_t j = 0u; j < data.size(); ++j) { - std::string key = model_t::print(featureData[i].first) + " " + gatherer.personName(data[j].first.first) + " " + + std::string key = model_t::print(featureData[i].first) + " " + + gatherer.personName(data[j].first.first) + " " + gatherer.attributeName(data[j].first.second); actual.push_back(TStrFeatureDataPr(key, data[j].second)); LOG_DEBUG(<< " " << key); @@ -924,21 +975,10 @@ void CMetricPopulationDataGathererTest::testInfluenceStatistics() { features.push_back(model_t::E_PopulationMaxByPersonAndAttribute); features.push_back(model_t::E_PopulationHighSumByBucketPersonAndAttribute); TStrVec influencerNames(boost::begin(influencerNames_), boost::end(influencerNames_)); - CDataGatherer gatherer(model_t::E_PopulationMetric, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - influencerNames, - false, - searchKey, - features, - startTime, - 2u); + CDataGatherer gatherer(model_t::E_PopulationMetric, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, influencerNames, false, + searchKey, features, startTime, 2u); core_t::TTime bucketStart = startTime; for (std::size_t i = 0u, b = 0u; i < boost::size(data); ++i) { @@ -953,19 +993,29 @@ void CMetricPopulationDataGathererTest::testInfluenceStatistics() { const TSizeSizePrFeatureDataPrVec& data_ = featureData[j].second; for (std::size_t k = 0u; k < data_.size(); ++k) { TStrDoubleDoublePrPrVec statistics; - for (std::size_t m = 0u; m < data_[k].second.s_InfluenceValues.size(); ++m) { - for (std::size_t n = 0u; n < data_[k].second.s_InfluenceValues[m].size(); ++n) { - statistics.push_back( - TStrDoubleDoublePrPr(data_[k].second.s_InfluenceValues[m][n].first, - TDoubleDoublePr(data_[k].second.s_InfluenceValues[m][n].second.first[0], - data_[k].second.s_InfluenceValues[m][n].second.second))); + for (std::size_t m = 0u; + m < data_[k].second.s_InfluenceValues.size(); ++m) { + for (std::size_t n = 0u; + n < data_[k].second.s_InfluenceValues[m].size(); ++n) { + statistics.push_back(TStrDoubleDoublePrPr( + data_[k].second.s_InfluenceValues[m][n].first, + TDoubleDoublePr( + data_[k] + .second.s_InfluenceValues[m][n] + .second.first[0], + data_[k] + .second.s_InfluenceValues[m][n] + .second.second))); } } - std::sort(statistics.begin(), statistics.end(), maths::COrderings::SFirstLess()); + std::sort(statistics.begin(), statistics.end(), + maths::COrderings::SFirstLess()); - LOG_DEBUG(<< "statistics = " << core::CContainerPrinter::print(statistics)); + LOG_DEBUG(<< "statistics = " + << core::CContainerPrinter::print(statistics)); LOG_DEBUG(<< "expected = " << *expected); - CPPUNIT_ASSERT_EQUAL(*(expected++), core::CContainerPrinter::print(statistics)); + CPPUNIT_ASSERT_EQUAL(*(expected++), + core::CContainerPrinter::print(statistics)); } } @@ -989,21 +1039,10 @@ void CMetricPopulationDataGathererTest::testPersistence() { features.push_back(model_t::E_PopulationMinByPersonAndAttribute); features.push_back(model_t::E_PopulationMaxByPersonAndAttribute); features.push_back(model_t::E_PopulationHighSumByBucketPersonAndAttribute); - CDataGatherer origDataGatherer(model_t::E_PopulationMetric, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - searchKey, - features, - startTime, - 0); + CDataGatherer origDataGatherer(model_t::E_PopulationMetric, model_t::E_None, + params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), + false, searchKey, features, startTime, 0); TMessageVec messages; generateTestMessages(startTime, messages); @@ -1011,7 +1050,8 @@ void CMetricPopulationDataGathererTest::testPersistence() { core_t::TTime bucketStart = startTime; for (std::size_t i = 0u; i < messages.size(); ++i) { if (messages[i].s_Time >= bucketStart + bucketLength) { - LOG_DEBUG(<< "Processing bucket [" << bucketStart << ", " << bucketStart + bucketLength << ")"); + LOG_DEBUG(<< "Processing bucket [" << bucketStart << ", " + << bucketStart + bucketLength << ")"); origDataGatherer.sampleNow(bucketStart); bucketStart += bucketLength; } @@ -1026,9 +1066,11 @@ void CMetricPopulationDataGathererTest::testPersistence() { inserter.toXml(origXml); } //LOG_DEBUG(<< "origXml = " << origXml); - LOG_DEBUG(<< "origXml length = " << origXml.length() << ", # tabs " << std::count_if(origXml.begin(), origXml.end(), isSpace)); + LOG_DEBUG(<< "origXml length = " << origXml.length() << ", # tabs " + << std::count_if(origXml.begin(), origXml.end(), isSpace)); - std::size_t length = origXml.length() - std::count_if(origXml.begin(), origXml.end(), isSpace); + std::size_t length = origXml.length() - + std::count_if(origXml.begin(), origXml.end(), isSpace); CPPUNIT_ASSERT(length < 645000); // Restore the XML into a new data gatherer @@ -1036,19 +1078,10 @@ void CMetricPopulationDataGathererTest::testPersistence() { CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - CDataGatherer restoredDataGatherer(model_t::E_PopulationMetric, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - searchKey, - traverser); + CDataGatherer restoredDataGatherer(model_t::E_PopulationMetric, model_t::E_None, + params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + TStrVec(), false, searchKey, traverser); // The XML representation of the new data gatherer should be the same as the // original @@ -1059,7 +1092,8 @@ void CMetricPopulationDataGathererTest::testPersistence() { inserter.toXml(newXml); } //LOG_DEBUG(<< "newXml = " << newXml); - LOG_DEBUG(<< "newXml length = " << newXml.length() << ", # tabs " << std::count_if(newXml.begin(), newXml.end(), isSpace)); + LOG_DEBUG(<< "newXml length = " << newXml.length() << ", # tabs " + << std::count_if(newXml.begin(), newXml.end(), isSpace)); CPPUNIT_ASSERT_EQUAL(origXml, newXml); } @@ -1120,28 +1154,39 @@ void CMetricPopulationDataGathererTest::testReleaseMemory() { CppUnit::Test* CMetricPopulationDataGathererTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CMetricPopulationDataGathererTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CMetricPopulationDataGathererTest::testMean", - &CMetricPopulationDataGathererTest::testMean)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMetricPopulationDataGathererTest::testMin", - &CMetricPopulationDataGathererTest::testMin)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMetricPopulationDataGathererTest::testMax", - &CMetricPopulationDataGathererTest::testMax)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMetricPopulationDataGathererTest::testSum", - &CMetricPopulationDataGathererTest::testSum)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMetricPopulationDataGathererTest::testSampleCount", - &CMetricPopulationDataGathererTest::testSampleCount)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMetricPopulationDataGathererTest::testFeatureData", - &CMetricPopulationDataGathererTest::testFeatureData)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMetricPopulationDataGathererTest::testRemovePeople", - &CMetricPopulationDataGathererTest::testRemovePeople)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CMetricPopulationDataGathererTest::testRemoveAttributes", &CMetricPopulationDataGathererTest::testRemoveAttributes)); + "CMetricPopulationDataGathererTest::testMean", + &CMetricPopulationDataGathererTest::testMean)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CMetricPopulationDataGathererTest::testInfluenceStatistics", &CMetricPopulationDataGathererTest::testInfluenceStatistics)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMetricPopulationDataGathererTest::testPersistence", - &CMetricPopulationDataGathererTest::testPersistence)); + "CMetricPopulationDataGathererTest::testMin", + &CMetricPopulationDataGathererTest::testMin)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CMetricPopulationDataGathererTest::testReleaseMemory", &CMetricPopulationDataGathererTest::testReleaseMemory)); + "CMetricPopulationDataGathererTest::testMax", + &CMetricPopulationDataGathererTest::testMax)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricPopulationDataGathererTest::testSum", + &CMetricPopulationDataGathererTest::testSum)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricPopulationDataGathererTest::testSampleCount", + &CMetricPopulationDataGathererTest::testSampleCount)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricPopulationDataGathererTest::testFeatureData", + &CMetricPopulationDataGathererTest::testFeatureData)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricPopulationDataGathererTest::testRemovePeople", + &CMetricPopulationDataGathererTest::testRemovePeople)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricPopulationDataGathererTest::testRemoveAttributes", + &CMetricPopulationDataGathererTest::testRemoveAttributes)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricPopulationDataGathererTest::testInfluenceStatistics", + &CMetricPopulationDataGathererTest::testInfluenceStatistics)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricPopulationDataGathererTest::testPersistence", + &CMetricPopulationDataGathererTest::testPersistence)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricPopulationDataGathererTest::testReleaseMemory", + &CMetricPopulationDataGathererTest::testReleaseMemory)); return suiteOfTests; } diff --git a/lib/model/unittest/CMetricPopulationModelTest.cc b/lib/model/unittest/CMetricPopulationModelTest.cc index b8e9a4ccee..b37074a4b3 100644 --- a/lib/model/unittest/CMetricPopulationModelTest.cc +++ b/lib/model/unittest/CMetricPopulationModelTest.cc @@ -63,7 +63,8 @@ using TSizeVec = std::vector; using TSizeVecVec = std::vector; using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; using TMinAccumulator = maths::CBasicStatistics::COrderStatisticsStack; -using TMaxAccumulator = maths::CBasicStatistics::COrderStatisticsStack>; +using TMaxAccumulator = + maths::CBasicStatistics::COrderStatisticsStack>; using TDouble1Vec = core::CSmallVector; using TDouble2Vec = core::CSmallVector; @@ -78,7 +79,9 @@ struct SAnomaly { std::string s_Person; TDoubleStrPrVec s_Attributes; - bool operator<(const SAnomaly& other) const { return s_Bucket < other.s_Bucket; } + bool operator<(const SAnomaly& other) const { + return s_Bucket < other.s_Bucket; + } std::string print() const { std::ostringstream result; @@ -94,11 +97,15 @@ struct SAnomaly { }; struct SMessage { - SMessage(core_t::TTime time, const std::string& person, const std::string& attribute, const TDouble1Vec& value) + SMessage(core_t::TTime time, + const std::string& person, + const std::string& attribute, + const TDouble1Vec& value) : s_Time(time), s_Person(person), s_Attribute(attribute), s_Value(value) {} bool operator<(const SMessage& other) const { - return maths::COrderings::lexicographical_compare(s_Time, s_Person, s_Attribute, other.s_Time, other.s_Person, other.s_Attribute); + return maths::COrderings::lexicographical_compare( + s_Time, s_Person, s_Attribute, other.s_Time, other.s_Person, other.s_Attribute); } core_t::TTime s_Time; @@ -113,13 +120,17 @@ const std::size_t numberAttributes = 5u; const std::size_t numberPeople = 10u; double roundToNearestPersisted(double value) { - std::string valueAsString(core::CStringUtils::typeToStringPrecise(value, core::CIEEE754::E_DoublePrecision)); + std::string valueAsString(core::CStringUtils::typeToStringPrecise( + value, core::CIEEE754::E_DoublePrecision)); double result = 0.0; core::CStringUtils::stringToType(valueAsString, result); return result; } -void generateTestMessages(std::size_t dimension, core_t::TTime startTime, core_t::TTime bucketLength, TMessageVec& messages) { +void generateTestMessages(std::size_t dimension, + core_t::TTime startTime, + core_t::TTime bucketLength, + TMessageVec& messages) { // The test case is as follows: // // attribute | 0 | 1 | 2 | 3 | 4 @@ -154,17 +165,24 @@ void generateTestMessages(std::size_t dimension, core_t::TTime startTime, core_t double means[] = {5.0, 10.0, 7.0, 3.0, 15.0}; double variances[] = {1.0, 0.5, 2.0, 0.1, 4.0}; - TSizeSizePr attribute0AnomalyBucketPerson[] = {TSizeSizePr(40u, 6u), TSizeSizePr(15u, 3u), TSizeSizePr(12u, 2u)}; - TSizeSizePr attribute2AnomalyBucketPerson[] = {TSizeSizePr(44u, 9u), TSizeSizePr(30u, 5u)}; - TSizeSizePr attribute3AnomalyBucketPerson[] = {TSizeSizePr(80u, 1u), TSizeSizePr(12u, 2u)}; + TSizeSizePr attribute0AnomalyBucketPerson[] = { + TSizeSizePr(40u, 6u), TSizeSizePr(15u, 3u), TSizeSizePr(12u, 2u)}; + TSizeSizePr attribute2AnomalyBucketPerson[] = {TSizeSizePr(44u, 9u), + TSizeSizePr(30u, 5u)}; + TSizeSizePr attribute3AnomalyBucketPerson[] = {TSizeSizePr(80u, 1u), + TSizeSizePr(12u, 2u)}; TSizeSizePr attribute4AnomalyBucketPerson[] = {TSizeSizePr(60u, 2u)}; TSizeSizePrVecVec anomalies; - anomalies.push_back(TSizeSizePrVec(boost::begin(attribute0AnomalyBucketPerson), boost::end(attribute0AnomalyBucketPerson))); + anomalies.push_back(TSizeSizePrVec(boost::begin(attribute0AnomalyBucketPerson), + boost::end(attribute0AnomalyBucketPerson))); anomalies.push_back(TSizeSizePrVec()); - anomalies.push_back(TSizeSizePrVec(boost::begin(attribute2AnomalyBucketPerson), boost::end(attribute2AnomalyBucketPerson))); - anomalies.push_back(TSizeSizePrVec(boost::begin(attribute3AnomalyBucketPerson), boost::end(attribute3AnomalyBucketPerson))); - anomalies.push_back(TSizeSizePrVec(boost::begin(attribute4AnomalyBucketPerson), boost::end(attribute4AnomalyBucketPerson))); + anomalies.push_back(TSizeSizePrVec(boost::begin(attribute2AnomalyBucketPerson), + boost::end(attribute2AnomalyBucketPerson))); + anomalies.push_back(TSizeSizePrVec(boost::begin(attribute3AnomalyBucketPerson), + boost::end(attribute3AnomalyBucketPerson))); + anomalies.push_back(TSizeSizePrVec(boost::begin(attribute4AnomalyBucketPerson), + boost::end(attribute4AnomalyBucketPerson))); test::CRandomNumbers rng; @@ -174,7 +192,8 @@ void generateTestMessages(std::size_t dimension, core_t::TTime startTime, core_t rng.generatePoissonSamples(attributeRates[j], numberPeople, samples); for (std::size_t k = 0u; k < numberPeople; ++k) { - bool anomaly = !anomalies[j].empty() && anomalies[j].back().first == i && anomalies[j].back().second == k; + bool anomaly = !anomalies[j].empty() && anomalies[j].back().first == i && + anomalies[j].back().second == k; if (anomaly) { samples[k] += 4; anomalies[j].pop_back(); @@ -185,7 +204,8 @@ void generateTestMessages(std::size_t dimension, core_t::TTime startTime, core_t } TDoubleVec values; - rng.generateNormalSamples(means[j], variances[j], dimension * samples[k], values); + rng.generateNormalSamples(means[j], variances[j], + dimension * samples[k], values); for (std::size_t l = 0u; l < values.size(); l += dimension) { TDouble1Vec value(dimension); @@ -196,8 +216,10 @@ void generateTestMessages(std::size_t dimension, core_t::TTime startTime, core_t } value[d] = roundToNearestPersisted(vd); } - core_t::TTime dt = (static_cast(l) * bucketLength) / static_cast(values.size()); - messages.push_back(SMessage(startTime + dt, people[k], attributes[j], value)); + core_t::TTime dt = (static_cast(l) * bucketLength) / + static_cast(values.size()); + messages.push_back(SMessage(startTime + dt, people[k], + attributes[j], value)); } } } @@ -208,15 +230,19 @@ void generateTestMessages(std::size_t dimension, core_t::TTime startTime, core_t } std::string valueAsString(const TDouble1Vec& value) { - std::string result = core::CStringUtils::typeToStringPrecise(value[0], core::CIEEE754::E_DoublePrecision); + std::string result = core::CStringUtils::typeToStringPrecise( + value[0], core::CIEEE754::E_DoublePrecision); for (std::size_t i = 1u; i < value.size(); ++i) { result += CAnomalyDetectorModelConfig::DEFAULT_MULTIVARIATE_COMPONENT_DELIMITER + - core::CStringUtils::typeToStringPrecise(value[i], core::CIEEE754::E_DoublePrecision); + core::CStringUtils::typeToStringPrecise( + value[i], core::CIEEE754::E_DoublePrecision); } return result; } -CEventData addArrival(const SMessage& message, const CModelFactory::TDataGathererPtr& gatherer, CResourceMonitor& resourceMonitor) { +CEventData addArrival(const SMessage& message, + const CModelFactory::TDataGathererPtr& gatherer, + CResourceMonitor& resourceMonitor) { CDataGatherer::TStrCPtrVec fields; fields.push_back(&message.s_Person); fields.push_back(&message.s_Attribute); @@ -244,7 +270,8 @@ void processBucket(core_t::TTime time, fieldValues.push_back(&person); fieldValues.push_back(&attribute); fieldValues.push_back(&influencerValues[i]); - std::string valueAsString(core::CStringUtils::typeToStringPrecise(bucket[i], core::CIEEE754::E_DoublePrecision)); + std::string valueAsString(core::CStringUtils::typeToStringPrecise( + bucket[i], core::CIEEE754::E_DoublePrecision)); fieldValues.push_back(&valueAsString); CEventData eventData; @@ -254,7 +281,8 @@ void processBucket(core_t::TTime time, } model.sample(time, time + bucketLength, resourceMonitor); CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); - model.computeProbability(0 /*pid*/, time, time + bucketLength, partitioningFields, 1, probability); + model.computeProbability(0 /*pid*/, time, time + bucketLength, + partitioningFields, 1, probability); LOG_DEBUG(<< "influences = " << core::CContainerPrinter::print(probability.s_Influences)); } } @@ -286,7 +314,8 @@ void CMetricPopulationModelTest::testBasicAccessors() { features.push_back(model_t::E_PopulationMaxByPersonAndAttribute); factory.features(features); CModelFactory::SGathererInitializationData gathererInitData(startTime); - CModelFactory::TDataGathererPtr gatherer(dynamic_cast(factory.makeDataGatherer(gathererInitData))); + CModelFactory::TDataGathererPtr gatherer( + dynamic_cast(factory.makeDataGatherer(gathererInitData))); CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr model(factory.makeModel(modelInitData)); @@ -301,7 +330,8 @@ void CMetricPopulationModelTest::testBasicAccessors() { if (message.s_Time >= startTime + bucketLength) { model->sample(startTime, startTime + bucketLength, m_ResourceMonitor); - LOG_DEBUG(<< "Testing bucket = [" << startTime << "," << startTime + bucketLength << ")"); + LOG_DEBUG(<< "Testing bucket = [" << startTime << "," + << startTime + bucketLength << ")"); CPPUNIT_ASSERT_EQUAL(numberPeople, gatherer->numberActivePeople()); CPPUNIT_ASSERT_EQUAL(numberAttributes, gatherer->numberActiveAttributes()); @@ -320,7 +350,8 @@ void CMetricPopulationModelTest::testBasicAccessors() { CPPUNIT_ASSERT_EQUAL(j, cid); } - LOG_DEBUG(<< "expected counts = " << core::CContainerPrinter::print(expectedBucketPersonCounts)); + LOG_DEBUG(<< "expected counts = " + << core::CContainerPrinter::print(expectedBucketPersonCounts)); TSizeVec expectedCurrentBucketPersonIds; @@ -334,7 +365,8 @@ void CMetricPopulationModelTest::testBasicAccessors() { CPPUNIT_ASSERT_EQUAL(count_.second, *count); } - std::sort(expectedCurrentBucketPersonIds.begin(), expectedCurrentBucketPersonIds.end()); + std::sort(expectedCurrentBucketPersonIds.begin(), + expectedCurrentBucketPersonIds.end()); TSizeVec bucketPersonIds; model->currentBucketPersonIds(startTime, bucketPersonIds); @@ -343,30 +375,43 @@ void CMetricPopulationModelTest::testBasicAccessors() { core::CContainerPrinter::print(bucketPersonIds)); if ((startTime / bucketLength) % 10 == 0) { - LOG_DEBUG(<< "expected means = " << core::CContainerPrinter::print(expectedBucketMeans)); - LOG_DEBUG(<< "expected mins = " << core::CContainerPrinter::print(expectedBucketMins)); - LOG_DEBUG(<< "expected maxs = " << core::CContainerPrinter::print(expectedBucketMaxs)); + LOG_DEBUG(<< "expected means = " + << core::CContainerPrinter::print(expectedBucketMeans)); + LOG_DEBUG(<< "expected mins = " + << core::CContainerPrinter::print(expectedBucketMins)); + LOG_DEBUG(<< "expected maxs = " + << core::CContainerPrinter::print(expectedBucketMaxs)); } for (std::size_t cid = 0u; cid < numberAttributes; ++cid) { for (std::size_t pid = 0u; pid < numberPeople; ++pid) { - const TMeanAccumulator& expectedMean = expectedBucketMeans[pid * numberAttributes + cid]; - const TMinAccumulator& expectedMin = expectedBucketMins[pid * numberAttributes + cid]; - const TMaxAccumulator& expectedMax = expectedBucketMaxs[pid * numberAttributes + cid]; - - TDouble1Vec mean = model->currentBucketValue(model_t::E_PopulationMeanByPersonAndAttribute, pid, cid, startTime); - TDouble1Vec min = model->currentBucketValue(model_t::E_PopulationMinByPersonAndAttribute, pid, cid, startTime); - TDouble1Vec max = model->currentBucketValue(model_t::E_PopulationMaxByPersonAndAttribute, pid, cid, startTime); - - CPPUNIT_ASSERT((!mean.empty() && maths::CBasicStatistics::count(expectedMean) > 0.0) || - (mean.empty() && maths::CBasicStatistics::count(expectedMean) == 0.0)); + const TMeanAccumulator& expectedMean = + expectedBucketMeans[pid * numberAttributes + cid]; + const TMinAccumulator& expectedMin = + expectedBucketMins[pid * numberAttributes + cid]; + const TMaxAccumulator& expectedMax = + expectedBucketMaxs[pid * numberAttributes + cid]; + + TDouble1Vec mean = model->currentBucketValue( + model_t::E_PopulationMeanByPersonAndAttribute, pid, cid, startTime); + TDouble1Vec min = model->currentBucketValue( + model_t::E_PopulationMinByPersonAndAttribute, pid, cid, startTime); + TDouble1Vec max = model->currentBucketValue( + model_t::E_PopulationMaxByPersonAndAttribute, pid, cid, startTime); + + CPPUNIT_ASSERT( + (!mean.empty() && maths::CBasicStatistics::count(expectedMean) > 0.0) || + (mean.empty() && maths::CBasicStatistics::count(expectedMean) == 0.0)); if (!mean.empty()) { - CPPUNIT_ASSERT_EQUAL(maths::CBasicStatistics::mean(expectedMean), mean[0]); + CPPUNIT_ASSERT_EQUAL( + maths::CBasicStatistics::mean(expectedMean), mean[0]); } - CPPUNIT_ASSERT((!min.empty() && expectedMin.count() > 0u) || (min.empty() && expectedMin.count() == 0u)); + CPPUNIT_ASSERT((!min.empty() && expectedMin.count() > 0u) || + (min.empty() && expectedMin.count() == 0u)); if (!min.empty()) { CPPUNIT_ASSERT_EQUAL(expectedMin[0], min[0]); } - CPPUNIT_ASSERT((!max.empty() && expectedMax.count() > 0u) || (max.empty() && expectedMax.count() == 0u)); + CPPUNIT_ASSERT((!max.empty() && expectedMax.count() > 0u) || + (max.empty() && expectedMax.count() == 0u)); if (!max.empty()) { CPPUNIT_ASSERT_EQUAL(expectedMax[0], max[0]); } @@ -406,12 +451,15 @@ void CMetricPopulationModelTest::testMinMaxAndMean() { using TSizeSizePrMaxAccumulatorMap = std::map; using TMathsModelPtr = boost::shared_ptr; using TSizeMathsModelPtrMap = std::map; - using TTimeDouble2VecSizeTrVecDouble2Vec4VecVecPr = std::pair; - using TSizeTimeDouble2VecSizeTrVecDouble2Vec4VecVecPrMap = std::map; + using TTimeDouble2VecSizeTrVecDouble2Vec4VecVecPr = + std::pair; + using TSizeTimeDouble2VecSizeTrVecDouble2Vec4VecVecPrMap = + std::map; using TSizeSizeTimeDouble2VecSizeTrVecDouble2Vec4VecVecPrMapMap = std::map; - static const maths_t::TWeightStyleVec WEIGHT_STYLES{maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight}; + static const maths_t::TWeightStyleVec WEIGHT_STYLES{ + maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight}; core_t::TTime startTime = 1367280000; const core_t::TTime bucketLength = 3600; @@ -428,12 +476,15 @@ void CMetricPopulationModelTest::testMinMaxAndMean() { model_t::E_PopulationMaxByPersonAndAttribute}; factory.features(features); CModelFactory::SGathererInitializationData gathererInitData(startTime); - CModelFactory::TDataGathererPtr gatherer(dynamic_cast(factory.makeDataGatherer(gathererInitData))); + CModelFactory::TDataGathererPtr gatherer( + dynamic_cast(factory.makeDataGatherer(gathererInitData))); CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr modelHolder(factory.makeModel(modelInitData)); - CMetricPopulationModel* model = dynamic_cast(modelHolder.get()); + CMetricPopulationModel* model = + dynamic_cast(modelHolder.get()); - CModelFactory::TFeatureMathsModelPtrPrVec models{factory.defaultFeatureModels(features, bucketLength, 1.0, false)}; + CModelFactory::TFeatureMathsModelPtrPrVec models{ + factory.defaultFeatureModels(features, bucketLength, 1.0, false)}; CPPUNIT_ASSERT_EQUAL(features.size(), models.size()); CPPUNIT_ASSERT_EQUAL(features[0], models[0].first); CPPUNIT_ASSERT_EQUAL(features[1], models[1].first); @@ -458,19 +509,24 @@ void CMetricPopulationModelTest::testMinMaxAndMean() { std::size_t pid = samples_.first.first; std::size_t cid = samples_.first.second; double weight = model->sampleRateWeight(pid, cid); - TTimeDouble2VecSizeTrVec& samples = populationWeightedSamples[feature][cid].first; - TDouble2Vec4VecVec& weights = populationWeightedSamples[feature][cid].second; + TTimeDouble2VecSizeTrVec& samples = + populationWeightedSamples[feature][cid].first; + TDouble2Vec4VecVec& weights = + populationWeightedSamples[feature][cid].second; TMathsModelPtr& model_ = expectedPopulationModels[feature][cid]; if (!model_) { - model_ = factory.defaultFeatureModel(features[feature], bucketLength, 1.0, false); + model_ = factory.defaultFeatureModel( + features[feature], bucketLength, 1.0, false); } for (std::size_t j = 0u; j < samples_.second.size(); ++j) { // We round to the nearest integer time (note this has to match // the behaviour of CMetricPartialStatistic::time). - core_t::TTime time_ = static_cast(expectedSampleTimes[{pid, cid}][j] + 0.5); + core_t::TTime time_ = static_cast( + expectedSampleTimes[{pid, cid}][j] + 0.5); TDouble2Vec sample{samples_.second[j]}; samples.emplace_back(time_, sample, pid); - weights.push_back({{weight}, model_->winsorisationWeight(1.0, time_, sample)}); + weights.push_back( + {{weight}, model_->winsorisationWeight(1.0, time_, sample)}); } } } @@ -493,12 +549,14 @@ void CMetricPopulationModelTest::testMinMaxAndMean() { for (std::size_t feature = 0u; feature < features.size(); ++feature) { if ((startTime / bucketLength) % 10 == 0) { - LOG_DEBUG(<< "Testing priors for feature " << model_t::print(features[feature])); + LOG_DEBUG(<< "Testing priors for feature " + << model_t::print(features[feature])); } for (std::size_t cid = 0u; cid < numberAttributes; ++cid) { if (expectedPopulationModels[feature].count(cid) > 0) { - CPPUNIT_ASSERT_EQUAL(expectedPopulationModels[feature][cid]->checksum(), - model->details()->model(features[feature], cid)->checksum()); + CPPUNIT_ASSERT_EQUAL( + expectedPopulationModels[feature][cid]->checksum(), + model->details()->model(features[feature], cid)->checksum()); } } } @@ -523,8 +581,10 @@ void CMetricPopulationModelTest::testMinMaxAndMean() { sampleMins[key].add(message.s_Value[0]); sampleMaxs[key].add(message.s_Value[0]); if (maths::CBasicStatistics::count(sampleTimes[key]) == sampleCount) { - expectedSampleTimes[key].push_back(maths::CBasicStatistics::mean(sampleTimes[key])); - expectedSamples[0][key].push_back(maths::CBasicStatistics::mean(sampleMeans[key])); + expectedSampleTimes[key].push_back( + maths::CBasicStatistics::mean(sampleTimes[key])); + expectedSamples[0][key].push_back( + maths::CBasicStatistics::mean(sampleMeans[key])); expectedSamples[1][key].push_back(sampleMins[key][0]); expectedSamples[2][key].push_back(sampleMaxs[key][0]); sampleTimes[key] = TMeanAccumulator(); @@ -576,62 +636,65 @@ void CMetricPopulationModelTest::testVarp() { std::string influencerValues9[] = {"i1", "i2", "i3", "i4", "i5", "i6"}; // This last bucket is much more improbable, with influencer i2 being responsible double bucket10[] = {0.3, 15.4, 77.62, 112.999, 5.1, 5.1, 5.1, 5.1, 5.1}; - std::string influencerValues10[] = {"i2", "i2", "i2", "i2", "i1", "i1", "i1", "i1", "i1"}; + std::string influencerValues10[] = {"i2", "i2", "i2", "i2", "i1", + "i1", "i1", "i1", "i1"}; SAnnotatedProbability annotatedProbability; core_t::TTime time = startTime; - processBucket( - time, bucketLength, boost::size(bucket1), bucket1, influencerValues1, *gatherer, m_ResourceMonitor, model, annotatedProbability); + processBucket(time, bucketLength, boost::size(bucket1), bucket1, influencerValues1, + *gatherer, m_ResourceMonitor, model, annotatedProbability); CPPUNIT_ASSERT(annotatedProbability.s_Probability > 0.8); time += bucketLength; - processBucket( - time, bucketLength, boost::size(bucket2), bucket2, influencerValues2, *gatherer, m_ResourceMonitor, model, annotatedProbability); + processBucket(time, bucketLength, boost::size(bucket2), bucket2, influencerValues2, + *gatherer, m_ResourceMonitor, model, annotatedProbability); CPPUNIT_ASSERT(annotatedProbability.s_Probability > 0.8); time += bucketLength; - processBucket( - time, bucketLength, boost::size(bucket3), bucket3, influencerValues3, *gatherer, m_ResourceMonitor, model, annotatedProbability); + processBucket(time, bucketLength, boost::size(bucket3), bucket3, influencerValues3, + *gatherer, m_ResourceMonitor, model, annotatedProbability); CPPUNIT_ASSERT(annotatedProbability.s_Probability > 0.8); time += bucketLength; - processBucket( - time, bucketLength, boost::size(bucket4), bucket4, influencerValues4, *gatherer, m_ResourceMonitor, model, annotatedProbability); + processBucket(time, bucketLength, boost::size(bucket4), bucket4, influencerValues4, + *gatherer, m_ResourceMonitor, model, annotatedProbability); CPPUNIT_ASSERT(annotatedProbability.s_Probability > 0.8); time += bucketLength; - processBucket( - time, bucketLength, boost::size(bucket5), bucket5, influencerValues5, *gatherer, m_ResourceMonitor, model, annotatedProbability); + processBucket(time, bucketLength, boost::size(bucket5), bucket5, influencerValues5, + *gatherer, m_ResourceMonitor, model, annotatedProbability); CPPUNIT_ASSERT(annotatedProbability.s_Probability > 0.8); time += bucketLength; - processBucket( - time, bucketLength, boost::size(bucket6), bucket6, influencerValues6, *gatherer, m_ResourceMonitor, model, annotatedProbability); + processBucket(time, bucketLength, boost::size(bucket6), bucket6, influencerValues6, + *gatherer, m_ResourceMonitor, model, annotatedProbability); CPPUNIT_ASSERT(annotatedProbability.s_Probability > 0.8); time += bucketLength; - processBucket( - time, bucketLength, boost::size(bucket7), bucket7, influencerValues7, *gatherer, m_ResourceMonitor, model, annotatedProbability); + processBucket(time, bucketLength, boost::size(bucket7), bucket7, influencerValues7, + *gatherer, m_ResourceMonitor, model, annotatedProbability); CPPUNIT_ASSERT(annotatedProbability.s_Probability > 0.8); time += bucketLength; - processBucket( - time, bucketLength, boost::size(bucket8), bucket8, influencerValues8, *gatherer, m_ResourceMonitor, model, annotatedProbability); + processBucket(time, bucketLength, boost::size(bucket8), bucket8, influencerValues8, + *gatherer, m_ResourceMonitor, model, annotatedProbability); CPPUNIT_ASSERT(annotatedProbability.s_Probability > 0.8); time += bucketLength; - processBucket( - time, bucketLength, boost::size(bucket9), bucket9, influencerValues9, *gatherer, m_ResourceMonitor, model, annotatedProbability); + processBucket(time, bucketLength, boost::size(bucket9), bucket9, influencerValues9, + *gatherer, m_ResourceMonitor, model, annotatedProbability); CPPUNIT_ASSERT(annotatedProbability.s_Probability < 0.85); time += bucketLength; - processBucket( - time, bucketLength, boost::size(bucket10), bucket10, influencerValues10, *gatherer, m_ResourceMonitor, model, annotatedProbability); + processBucket(time, bucketLength, boost::size(bucket10), bucket10, influencerValues10, + *gatherer, m_ResourceMonitor, model, annotatedProbability); CPPUNIT_ASSERT(annotatedProbability.s_Probability < 0.1); CPPUNIT_ASSERT_EQUAL(std::size_t(1), annotatedProbability.s_Influences.size()); - CPPUNIT_ASSERT_EQUAL(std::string("I"), *annotatedProbability.s_Influences[0].first.first); - CPPUNIT_ASSERT_EQUAL(std::string("i2"), *annotatedProbability.s_Influences[0].first.second); + CPPUNIT_ASSERT_EQUAL(std::string("I"), + *annotatedProbability.s_Influences[0].first.first); + CPPUNIT_ASSERT_EQUAL(std::string("i2"), + *annotatedProbability.s_Influences[0].first.second); CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, annotatedProbability.s_Influences[0].second, 0.00001); } @@ -645,18 +708,21 @@ void CMetricPopulationModelTest::testComputeProbability() { using TAnomalyVec = std::vector; using TDoubleAnomalyPr = std::pair; - using TAnomalyAccumulator = maths::CBasicStatistics::COrderStatisticsHeap; + using TAnomalyAccumulator = + maths::CBasicStatistics::COrderStatisticsHeap; core_t::TTime startTime = 1367280000; const core_t::TTime bucketLength = 3600; - model_t::EFeature features_[] = {model_t::E_PopulationMaxByPersonAndAttribute, model_t::E_PopulationMeanLatLongByPersonAndAttribute}; + model_t::EFeature features_[] = {model_t::E_PopulationMaxByPersonAndAttribute, + model_t::E_PopulationMeanLatLongByPersonAndAttribute}; for (std::size_t i = 0u; i < boost::size(features_); ++i) { LOG_DEBUG(<< "Testing " << model_t::print(features_[i])); TMessageVec messages; - generateTestMessages(model_t::dimension(features_[i]), startTime, bucketLength, messages); + generateTestMessages(model_t::dimension(features_[i]), startTime, + bucketLength, messages); SModelParams params(bucketLength); CMetricPopulationModelFactory factory(params); @@ -666,7 +732,8 @@ void CMetricPopulationModelTest::testComputeProbability() { CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(gathererInitData)); CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr modelHolder(factory.makeModel(modelInitData)); - CMetricPopulationModel* model = dynamic_cast(modelHolder.get()); + CMetricPopulationModel* model = + dynamic_cast(modelHolder.get()); TAnomalyAccumulator anomalies(7); @@ -675,22 +742,27 @@ void CMetricPopulationModelTest::testComputeProbability() { if (message.s_Time >= startTime + bucketLength) { model->sample(startTime, startTime + bucketLength, m_ResourceMonitor); - LOG_DEBUG(<< "Testing bucket " << bucket << " = [" << startTime << "," << startTime + bucketLength << ")"); + LOG_DEBUG(<< "Testing bucket " << bucket << " = [" << startTime + << "," << startTime + bucketLength << ")"); CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); SAnnotatedProbability annotatedProbability; for (std::size_t pid = 0u; pid < numberPeople; ++pid) { - model->computeProbability(pid, startTime, startTime + bucketLength, partitioningFields, 2, annotatedProbability); + model->computeProbability(pid, startTime, startTime + bucketLength, + partitioningFields, 2, annotatedProbability); if ((startTime / bucketLength) % 10 == 0) { - LOG_DEBUG(<< "person = " << model->personName(pid) << ", probability = " << annotatedProbability.s_Probability); + LOG_DEBUG(<< "person = " << model->personName(pid) << ", probability = " + << annotatedProbability.s_Probability); } std::string person = model->personName(pid); TDoubleStrPrVec attributes; for (const auto& probability : annotatedProbability.s_AttributeProbabilities) { - attributes.emplace_back(probability.s_Probability, *probability.s_Attribute); + attributes.emplace_back(probability.s_Probability, + *probability.s_Attribute); } - anomalies.add({annotatedProbability.s_Probability, SAnomaly(bucket, person, attributes)}); + anomalies.add({annotatedProbability.s_Probability, + SAnomaly(bucket, person, attributes)}); } startTime += bucketLength; @@ -709,15 +781,14 @@ void CMetricPopulationModelTest::testComputeProbability() { } std::sort(orderedAnomalies.begin(), orderedAnomalies.end()); - LOG_DEBUG(<< "orderedAnomalies = " << core::CContainerPrinter::print(orderedAnomalies)); + LOG_DEBUG(<< "orderedAnomalies = " + << core::CContainerPrinter::print(orderedAnomalies)); - std::string expectedAnomalies[] = {std::string("[12, p2, c0 c3]"), - std::string("[15, p3, c0]"), - std::string("[30, p5, c2]"), - std::string("[40, p6, c0]"), - std::string("[44, p9, c2]"), - std::string("[60, p2, c4]"), - std::string("[80, p1, c3]")}; + std::string expectedAnomalies[] = { + std::string("[12, p2, c0 c3]"), std::string("[15, p3, c0]"), + std::string("[30, p5, c2]"), std::string("[40, p6, c0]"), + std::string("[44, p9, c2]"), std::string("[60, p2, c4]"), + std::string("[80, p1, c3]")}; CPPUNIT_ASSERT_EQUAL(boost::size(expectedAnomalies), orderedAnomalies.size()); for (std::size_t j = 0u; j < orderedAnomalies.size(); ++j) { @@ -740,10 +811,13 @@ void CMetricPopulationModelTest::testPrune() { const core_t::TTime bucketLength = 3600; const std::size_t numberBuckets = 1000u; - std::string people[] = {std::string("p1"), std::string("p2"), std::string("p3"), std::string("p4")}; - std::string attributes[] = {std::string("c1"), std::string("c2"), std::string("c3"), std::string("c4"), std::string("c5")}; + std::string people[] = {std::string("p1"), std::string("p2"), + std::string("p3"), std::string("p4")}; + std::string attributes[] = {std::string("c1"), std::string("c2"), std::string("c3"), + std::string("c4"), std::string("c5")}; - TStrSizePrVecVec eventCounts[] = {TStrSizePrVecVec(), TStrSizePrVecVec(), TStrSizePrVecVec(), TStrSizePrVecVec()}; + TStrSizePrVecVec eventCounts[] = {TStrSizePrVecVec(), TStrSizePrVecVec(), + TStrSizePrVecVec(), TStrSizePrVecVec()}; { TStrSizePrVec attributeCounts; attributeCounts.push_back(TStrSizePr(attributes[0], 0)); @@ -849,7 +923,9 @@ void CMetricPopulationModelTest::testPrune() { core_t::TTime dt = bucketLength / static_cast(n); for (std::size_t l = 0u; l < n; ++l, time += dt) { - messages.push_back(SMessage(time, people[i], attributeEventCounts[k].first, TDouble1Vec(1, samples[l]))); + messages.push_back(SMessage(time, people[i], + attributeEventCounts[k].first, + TDouble1Vec(1, samples[l]))); } } } @@ -859,8 +935,10 @@ void CMetricPopulationModelTest::testPrune() { TMessageVec expectedMessages; expectedMessages.reserve(messages.size()); for (std::size_t i = 0u; i < messages.size(); ++i) { - if (std::binary_search(boost::begin(expectedPeople), boost::end(expectedPeople), messages[i].s_Person) && - std::binary_search(boost::begin(expectedAttributes), boost::end(expectedAttributes), messages[i].s_Attribute)) { + if (std::binary_search(boost::begin(expectedPeople), + boost::end(expectedPeople), messages[i].s_Person) && + std::binary_search(boost::begin(expectedAttributes), + boost::end(expectedAttributes), messages[i].s_Attribute)) { expectedMessages.push_back(messages[i]); } } @@ -897,9 +975,10 @@ void CMetricPopulationModelTest::testPrune() { bucketStart = gatherer->currentBucketStartTime() + bucketLength; - SMessage newMessages[] = {SMessage(bucketStart + 10, "p1", "c2", TDouble1Vec(1, 20.0)), - SMessage(bucketStart + 200, "p5", "c6", TDouble1Vec(1, 10.0)), - SMessage(bucketStart + 2100, "p5", "c6", TDouble1Vec(1, 15.0))}; + SMessage newMessages[] = { + SMessage(bucketStart + 10, "p1", "c2", TDouble1Vec(1, 20.0)), + SMessage(bucketStart + 200, "p5", "c6", TDouble1Vec(1, 10.0)), + SMessage(bucketStart + 2100, "p5", "c6", TDouble1Vec(1, 15.0))}; for (std::size_t i = 0u; i < boost::size(newMessages); ++i) { addArrival(newMessages[i], gatherer, m_ResourceMonitor); @@ -914,20 +993,21 @@ void CMetricPopulationModelTest::testPrune() { // Test that calling prune on a cloned model which has seen no new data does nothing CAnomalyDetectorModel::TModelPtr clonedModelHolder(model->cloneForPersistence()); - std::size_t numberOfPeopleBeforePrune(clonedModelHolder->dataGatherer().numberActivePeople()); + std::size_t numberOfPeopleBeforePrune( + clonedModelHolder->dataGatherer().numberActivePeople()); CPPUNIT_ASSERT(numberOfPeopleBeforePrune > 0); clonedModelHolder->prune(clonedModelHolder->defaultPruneWindow()); - CPPUNIT_ASSERT_EQUAL(numberOfPeopleBeforePrune, clonedModelHolder->dataGatherer().numberActivePeople()); + CPPUNIT_ASSERT_EQUAL(numberOfPeopleBeforePrune, + clonedModelHolder->dataGatherer().numberActivePeople()); } void CMetricPopulationModelTest::testKey() { LOG_DEBUG(<< "*** testKey ***"); - function_t::EFunction countFunctions[] = {function_t::E_PopulationMetric, - function_t::E_PopulationMetricMean, - function_t::E_PopulationMetricMin, - function_t::E_PopulationMetricMax, - function_t::E_PopulationMetricSum}; + function_t::EFunction countFunctions[] = { + function_t::E_PopulationMetric, function_t::E_PopulationMetricMean, + function_t::E_PopulationMetricMin, function_t::E_PopulationMetricMax, + function_t::E_PopulationMetricSum}; bool useNull[] = {true, false}; std::string byField[] = {"", "by"}; std::string partitionField[] = {"", "partition"}; @@ -940,16 +1020,12 @@ void CMetricPopulationModelTest::testKey() { for (std::size_t j = 0u; j < boost::size(useNull); ++j) { for (std::size_t k = 0u; k < boost::size(byField); ++k) { for (std::size_t l = 0u; l < boost::size(partitionField); ++l) { - CSearchKey key(++identifier, - countFunctions[i], - useNull[j], - model_t::E_XF_None, - "value", - byField[k], - "over", - partitionField[l]); + CSearchKey key(++identifier, countFunctions[i], + useNull[j], model_t::E_XF_None, "value", + byField[k], "over", partitionField[l]); - CAnomalyDetectorModelConfig::TModelFactoryCPtr factory = config.factory(key); + CAnomalyDetectorModelConfig::TModelFactoryCPtr factory = + config.factory(key); LOG_DEBUG(<< "expected key = " << key); LOG_DEBUG(<< "actual key = " << factory->searchKey()); @@ -967,19 +1043,23 @@ void CMetricPopulationModelTest::testFrequency() { // Test we correctly compute frequencies for people and attributes. const core_t::TTime bucketLength = 600; - const std::string attributes[] = {"a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", "a9", "a10"}; - const std::string people[] = {"p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9", "p10"}; + const std::string attributes[] = {"a1", "a2", "a3", "a4", "a5", + "a6", "a7", "a8", "a9", "a10"}; + const std::string people[] = {"p1", "p2", "p3", "p4", "p5", + "p6", "p7", "p8", "p9", "p10"}; std::size_t period[] = {1u, 1u, 10u, 3u, 4u, 5u, 2u, 1u, 3u, 7u}; core_t::TTime startTime = 0; TMessageVec messages; std::size_t bucket = 0u; - for (core_t::TTime bucketStart = startTime; bucketStart < 100 * bucketLength; bucketStart += bucketLength, ++bucket) { + for (core_t::TTime bucketStart = startTime; bucketStart < 100 * bucketLength; + bucketStart += bucketLength, ++bucket) { for (std::size_t i = 0u; i < boost::size(people); ++i) { if (bucket % period[i] == 0) { for (std::size_t j = 0u; j < i + 1; ++j) { - messages.push_back(SMessage(bucketStart + bucketLength / 2, people[i], attributes[j], TDouble1Vec(1, 0.0))); + messages.push_back(SMessage(bucketStart + bucketLength / 2, people[i], + attributes[j], TDouble1Vec(1, 0.0))); } } } @@ -996,12 +1076,14 @@ void CMetricPopulationModelTest::testFrequency() { factory.features(features); CModelFactory::SGathererInitializationData gathererInitData(startTime); CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(gathererInitData)); - const model::CDataGatherer& populationGatherer(dynamic_cast(*gatherer)); + const model::CDataGatherer& populationGatherer( + dynamic_cast(*gatherer)); CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr model(factory.makeModel(modelInitData)); - CMetricPopulationModel* populationModel = dynamic_cast(model.get()); + CMetricPopulationModel* populationModel = + dynamic_cast(model.get()); CPPUNIT_ASSERT(populationModel); core_t::TTime time = startTime; @@ -1021,9 +1103,11 @@ void CMetricPopulationModelTest::testFrequency() { CPPUNIT_ASSERT(gatherer->personId(people[i], pid)); LOG_DEBUG(<< "frequency = " << populationModel->personFrequency(pid)); LOG_DEBUG(<< "expected frequency = " << 1.0 / static_cast(period[i])); - CPPUNIT_ASSERT_DOUBLES_EQUAL( - 1.0 / static_cast(period[i]), populationModel->personFrequency(pid), 0.1 / static_cast(period[i])); - meanError.add(std::fabs(populationModel->personFrequency(pid) - 1.0 / static_cast(period[i]))); + CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0 / static_cast(period[i]), + populationModel->personFrequency(pid), + 0.1 / static_cast(period[i])); + meanError.add(std::fabs(populationModel->personFrequency(pid) - + 1.0 / static_cast(period[i]))); } LOG_DEBUG(<< "error = " << maths::CBasicStatistics::mean(meanError)); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError) < 0.002); @@ -1035,7 +1119,8 @@ void CMetricPopulationModelTest::testFrequency() { CPPUNIT_ASSERT(populationGatherer.attributeId(attributes[i], cid)); LOG_DEBUG(<< "frequency = " << populationModel->attributeFrequency(cid)); LOG_DEBUG(<< "expected frequency = " << (10.0 - static_cast(i)) / 10.0); - CPPUNIT_ASSERT_EQUAL((10.0 - static_cast(i)) / 10.0, populationModel->attributeFrequency(cid)); + CPPUNIT_ASSERT_EQUAL((10.0 - static_cast(i)) / 10.0, + populationModel->attributeFrequency(cid)); } } } @@ -1052,37 +1137,46 @@ void CMetricPopulationModelTest::testSampleRateWeight() { // one message per attribute per 10 buckets. const core_t::TTime bucketLength = 600; - const std::string attributes[] = {"a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", "a9", "a10"}; - const std::string people[] = {"p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9", "p10", - "p11", "p12", "p13", "p14", "p15", "p16", "p17", "p18", "p19", "p20"}; + const std::string attributes[] = {"a1", "a2", "a3", "a4", "a5", + "a6", "a7", "a8", "a9", "a10"}; + const std::string people[] = { + "p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9", "p10", + "p11", "p12", "p13", "p14", "p15", "p16", "p17", "p18", "p19", "p20"}; std::size_t heavyHitters[] = {0u, 4u}; - std::size_t normal[] = {1u, 2u, 3u, 5u, 6u, 7u, 8u, 9u, 10u, 11u, 12u, 13u, 14u, 15u, 16u, 17u, 18u, 19u}; + std::size_t normal[] = {1u, 2u, 3u, 5u, 6u, 7u, 8u, 9u, 10u, + 11u, 12u, 13u, 14u, 15u, 16u, 17u, 18u, 19u}; - std::size_t messagesPerBucket = boost::size(heavyHitters) * boost::size(attributes) + boost::size(normal); + std::size_t messagesPerBucket = + boost::size(heavyHitters) * boost::size(attributes) + boost::size(normal); test::CRandomNumbers rng; core_t::TTime startTime = 0; TMessageVec messages; - for (core_t::TTime bucketStart = startTime; bucketStart < 100 * bucketLength; bucketStart += bucketLength) { + for (core_t::TTime bucketStart = startTime; + bucketStart < 100 * bucketLength; bucketStart += bucketLength) { TSizeVec times; - rng.generateUniformSamples( - static_cast(bucketStart), static_cast(bucketStart + bucketLength), messagesPerBucket, times); + rng.generateUniformSamples(static_cast(bucketStart), + static_cast(bucketStart + bucketLength), + messagesPerBucket, times); std::size_t m = 0u; for (std::size_t i = 0u; i < boost::size(attributes); ++i) { for (std::size_t j = 0u; j < boost::size(heavyHitters); ++j) { - messages.push_back( - SMessage(static_cast(times[m++]), people[heavyHitters[j]], attributes[i], TDouble1Vec(1, 0.0))); + messages.push_back(SMessage(static_cast(times[m++]), + people[heavyHitters[j]], + attributes[i], TDouble1Vec(1, 0.0))); } } TSizeVec attributeIndexes; - rng.generateUniformSamples(0, boost::size(attributes), boost::size(normal), attributeIndexes); + rng.generateUniformSamples(0, boost::size(attributes), + boost::size(normal), attributeIndexes); for (std::size_t i = 0u; i < boost::size(normal); ++i) { messages.push_back( - SMessage(static_cast(times[m++]), people[normal[i]], attributes[attributeIndexes[i]], TDouble1Vec(1, 0.0))); + SMessage(static_cast(times[m++]), people[normal[i]], + attributes[attributeIndexes[i]], TDouble1Vec(1, 0.0))); } } @@ -1100,7 +1194,8 @@ void CMetricPopulationModelTest::testSampleRateWeight() { CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr model(factory.makeModel(modelInitData)); - CMetricPopulationModel* populationModel = dynamic_cast(model.get()); + CMetricPopulationModel* populationModel = + dynamic_cast(model.get()); CPPUNIT_ASSERT(populationModel); core_t::TTime time = startTime; @@ -1119,7 +1214,8 @@ void CMetricPopulationModelTest::testSampleRateWeight() { // + ("# heavy hitters")) // / "# people" - double expectedRateWeight = (static_cast(boost::size(normal)) / static_cast(boost::size(attributes)) + + double expectedRateWeight = (static_cast(boost::size(normal)) / + static_cast(boost::size(attributes)) + static_cast(boost::size(heavyHitters))) / static_cast(boost::size(people)); LOG_DEBUG(<< "expectedRateWeight = " << expectedRateWeight); @@ -1130,8 +1226,10 @@ void CMetricPopulationModelTest::testSampleRateWeight() { CPPUNIT_ASSERT(gatherer->personId(people[heavyHitters[i]], pid)); for (std::size_t cid = 0u; cid < boost::size(attributes); ++cid) { double sampleRateWeight = populationModel->sampleRateWeight(pid, cid); - LOG_DEBUG(<< "attribute = " << populationModel->attributeName(cid) << ", sampleRateWeight = " << sampleRateWeight); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedRateWeight, sampleRateWeight, 0.15 * expectedRateWeight); + LOG_DEBUG(<< "attribute = " << populationModel->attributeName(cid) + << ", sampleRateWeight = " << sampleRateWeight); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedRateWeight, sampleRateWeight, + 0.15 * expectedRateWeight); } } @@ -1141,7 +1239,8 @@ void CMetricPopulationModelTest::testSampleRateWeight() { CPPUNIT_ASSERT(gatherer->personId(people[normal[i]], pid)); for (std::size_t cid = 0u; cid < boost::size(attributes); ++cid) { double sampleRateWeight = populationModel->sampleRateWeight(pid, cid); - LOG_DEBUG(<< "attribute = " << populationModel->attributeName(cid) << ", sampleRateWeight = " << sampleRateWeight); + LOG_DEBUG(<< "attribute = " << populationModel->attributeName(cid) + << ", sampleRateWeight = " << sampleRateWeight); CPPUNIT_ASSERT_EQUAL(1.0, sampleRateWeight); } } @@ -1159,10 +1258,12 @@ void CMetricPopulationModelTest::testPeriodicity() { static const core_t::TTime DAY = 86400; const core_t::TTime bucketLength = 3600; - double baseline[] = {1, 1, 2, 2, 3, 5, 6, 6, 20, 21, 4, 3, 4, 4, 8, 25, 7, 6, 5, 1, 1, 4, 1, 1}; + double baseline[] = {1, 1, 2, 2, 3, 5, 6, 6, 20, 21, 4, 3, + 4, 4, 8, 25, 7, 6, 5, 1, 1, 4, 1, 1}; const std::string attributes[] = {"a1", "a2"}; double scales[] = {2.0, 3.0}; - const std::string people[] = {"p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9", "p10"}; + const std::string people[] = {"p1", "p2", "p3", "p4", "p5", + "p6", "p7", "p8", "p9", "p10"}; test::CRandomNumbers rng; @@ -1173,11 +1274,14 @@ void CMetricPopulationModelTest::testPeriodicity() { for (core_t::TTime time = startTime; time < endTime; time += bucketLength) { for (std::size_t i = 0u; i < boost::size(attributes); ++i) { TDoubleVec values; - rng.generateNormalSamples(baseline[(time % DAY) / HOUR], scales[i] * scales[i], boost::size(people), values); + rng.generateNormalSamples(baseline[(time % DAY) / HOUR], + scales[i] * scales[i], boost::size(people), values); for (std::size_t j = 0u; j < values.size(); ++j) { for (unsigned int t = 0; t < 4; ++t) { - messages.push_back(SMessage(time + (t * bucketLength) / 4, people[j], attributes[i], TDouble1Vec(1, values[j]))); + messages.push_back(SMessage(time + (t * bucketLength) / 4, + people[j], attributes[i], + TDouble1Vec(1, values[j]))); } } } @@ -1196,7 +1300,8 @@ void CMetricPopulationModelTest::testPeriodicity() { CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr model(factory.makeModel(modelInitData)); - CMetricPopulationModel* populationModel = dynamic_cast(model.get()); + CMetricPopulationModel* populationModel = + dynamic_cast(model.get()); CPPUNIT_ASSERT(populationModel); TStrDoubleMap personProbabilitiesWithoutPeriodicity; @@ -1215,17 +1320,24 @@ void CMetricPopulationModelTest::testPeriodicity() { CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); SAnnotatedProbability annotatedProbability; - if (populationModel->computeProbability(pid, time, time + bucketLength, partitioningFields, 1, annotatedProbability) == - false) { + if (populationModel->computeProbability( + pid, time, time + bucketLength, partitioningFields, 1, + annotatedProbability) == false) { continue; } if (time < startTime + 3 * DAY) { - double& minimumProbability = personProbabilitiesWithoutPeriodicity.insert({people[j], 1.0}).first->second; - minimumProbability = std::min(minimumProbability, annotatedProbability.s_Probability); + double& minimumProbability = personProbabilitiesWithoutPeriodicity + .insert({people[j], 1.0}) + .first->second; + minimumProbability = std::min( + minimumProbability, annotatedProbability.s_Probability); } else if (time > startTime + 5 * DAY) { - double& minimumProbability = personProbabilitiesWithPeriodicity.insert({people[j], 1.0}).first->second; - minimumProbability = std::min(minimumProbability, annotatedProbability.s_Probability); + double& minimumProbability = personProbabilitiesWithPeriodicity + .insert({people[j], 1.0}) + .first->second; + minimumProbability = std::min( + minimumProbability, annotatedProbability.s_Probability); } } time += bucketLength; @@ -1273,7 +1385,8 @@ void CMetricPopulationModelTest::testPersistence() { CModelFactory::SModelInitializationData modelInitData(gatherer); CAnomalyDetectorModel::TModelPtr origModel(factory.makeModel(modelInitData)); - CMetricPopulationModel* populationModel = dynamic_cast(origModel.get()); + CMetricPopulationModel* populationModel = + dynamic_cast(origModel.get()); CPPUNIT_ASSERT(populationModel); for (std::size_t i = 0u; i < messages.size(); ++i) { @@ -1350,7 +1463,8 @@ void CMetricPopulationModelTest::testIgnoreSamplingGivenDetectionRules() { CMetricPopulationModelFactory factoryNoSkip(paramsNoRules); factoryNoSkip.features(features); CModelFactory::SGathererInitializationData gathererInitData(startTime); - CModelFactory::TDataGathererPtr gathererNoSkip(factoryNoSkip.makeDataGatherer(gathererInitData)); + CModelFactory::TDataGathererPtr gathererNoSkip( + factoryNoSkip.makeDataGatherer(gathererInitData)); CModelFactory::SModelInitializationData modelNoSkipInitData(gathererNoSkip); CAnomalyDetectorModel::TModelPtr modelNoSkip(factoryNoSkip.makeModel(modelNoSkipInitData)); @@ -1360,9 +1474,11 @@ void CMetricPopulationModelTest::testIgnoreSamplingGivenDetectionRules() { CMetricPopulationModelFactory factoryWithSkip(paramsWithRules); factoryWithSkip.features(features); - CModelFactory::TDataGathererPtr gathererWithSkip(factoryWithSkip.makeDataGatherer(gathererInitData)); + CModelFactory::TDataGathererPtr gathererWithSkip( + factoryWithSkip.makeDataGatherer(gathererInitData)); CModelFactory::SModelInitializationData modelWithSkipInitData(gathererWithSkip); - CAnomalyDetectorModel::TModelPtr modelWithSkip(factoryWithSkip.makeModel(modelWithSkipInitData)); + CAnomalyDetectorModel::TModelPtr modelWithSkip( + factoryWithSkip.makeModel(modelWithSkipInitData)); std::vector messages; messages.push_back(SMessage(startTime + 10, "p1", "c1", TDouble1Vec(1, 20.0))); @@ -1394,8 +1510,10 @@ void CMetricPopulationModelTest::testIgnoreSamplingGivenDetectionRules() { } // This should be filtered out - addArrival(SMessage(startTime + 10, "p1", "c3", TDouble1Vec(1, 21.0)), gathererWithSkip, m_ResourceMonitor); - addArrival(SMessage(startTime + 10, "p2", "c3", TDouble1Vec(1, 21.0)), gathererWithSkip, m_ResourceMonitor); + addArrival(SMessage(startTime + 10, "p1", "c3", TDouble1Vec(1, 21.0)), + gathererWithSkip, m_ResourceMonitor); + addArrival(SMessage(startTime + 10, "p2", "c3", TDouble1Vec(1, 21.0)), + gathererWithSkip, m_ResourceMonitor); modelNoSkip->sample(startTime, endTime, m_ResourceMonitor); modelWithSkip->sample(startTime, endTime, m_ResourceMonitor); @@ -1403,16 +1521,23 @@ void CMetricPopulationModelTest::testIgnoreSamplingGivenDetectionRules() { // Checksums will be different because a 3rd model is created for attribute c3 CPPUNIT_ASSERT(modelWithSkip->checksum() != modelNoSkip->checksum()); - CAnomalyDetectorModel::CModelDetailsViewPtr modelWithSkipView = modelWithSkip->details(); + CAnomalyDetectorModel::CModelDetailsViewPtr modelWithSkipView = + modelWithSkip->details(); CAnomalyDetectorModel::CModelDetailsViewPtr modelNoSkipView = modelNoSkip->details(); // but the underlying models for people p1 and p2 are the same - uint64_t withSkipChecksum = modelWithSkipView->model(model_t::E_PopulationMeanByPersonAndAttribute, 0)->checksum(); - uint64_t noSkipChecksum = modelNoSkipView->model(model_t::E_PopulationMeanByPersonAndAttribute, 0)->checksum(); + uint64_t withSkipChecksum = modelWithSkipView + ->model(model_t::E_PopulationMeanByPersonAndAttribute, 0) + ->checksum(); + uint64_t noSkipChecksum = + modelNoSkipView->model(model_t::E_PopulationMeanByPersonAndAttribute, 0)->checksum(); CPPUNIT_ASSERT_EQUAL(withSkipChecksum, noSkipChecksum); - withSkipChecksum = modelWithSkipView->model(model_t::E_PopulationMeanByPersonAndAttribute, 1)->checksum(); - noSkipChecksum = modelNoSkipView->model(model_t::E_PopulationMeanByPersonAndAttribute, 1)->checksum(); + withSkipChecksum = modelWithSkipView + ->model(model_t::E_PopulationMeanByPersonAndAttribute, 1) + ->checksum(); + noSkipChecksum = + modelNoSkipView->model(model_t::E_PopulationMeanByPersonAndAttribute, 1)->checksum(); CPPUNIT_ASSERT_EQUAL(withSkipChecksum, noSkipChecksum); // TODO These checks fail see elastic/machine-learning-cpp/issues/485 @@ -1439,27 +1564,33 @@ void CMetricPopulationModelTest::testIgnoreSamplingGivenDetectionRules() { CppUnit::Test* CMetricPopulationModelTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CMetricPopulationModelTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CMetricPopulationModelTest::testBasicAccessors", - &CMetricPopulationModelTest::testBasicAccessors)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMetricPopulationModelTest::testMinMaxAndMean", - &CMetricPopulationModelTest::testMinMaxAndMean)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMetricPopulationModelTest::testComputeProbability", - &CMetricPopulationModelTest::testComputeProbability)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMetricPopulationModelTest::testPrune", - &CMetricPopulationModelTest::testPrune)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CMetricPopulationModelTest::testKey", &CMetricPopulationModelTest::testKey)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMetricPopulationModelTest::testFrequency", - &CMetricPopulationModelTest::testFrequency)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMetricPopulationModelTest::testSampleRateWeight", - &CMetricPopulationModelTest::testSampleRateWeight)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMetricPopulationModelTest::testPeriodicity", - &CMetricPopulationModelTest::testPeriodicity)); - suiteOfTests->addTest(new CppUnit::TestCaller("CMetricPopulationModelTest::testPersistence", - &CMetricPopulationModelTest::testPersistence)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CMetricPopulationModelTest::testIgnoreSamplingGivenDetectionRules", - &CMetricPopulationModelTest::testIgnoreSamplingGivenDetectionRules)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricPopulationModelTest::testBasicAccessors", + &CMetricPopulationModelTest::testBasicAccessors)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricPopulationModelTest::testMinMaxAndMean", + &CMetricPopulationModelTest::testMinMaxAndMean)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricPopulationModelTest::testComputeProbability", + &CMetricPopulationModelTest::testComputeProbability)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricPopulationModelTest::testPrune", &CMetricPopulationModelTest::testPrune)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricPopulationModelTest::testKey", &CMetricPopulationModelTest::testKey)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricPopulationModelTest::testFrequency", &CMetricPopulationModelTest::testFrequency)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricPopulationModelTest::testSampleRateWeight", + &CMetricPopulationModelTest::testSampleRateWeight)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricPopulationModelTest::testPeriodicity", + &CMetricPopulationModelTest::testPeriodicity)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricPopulationModelTest::testPersistence", + &CMetricPopulationModelTest::testPersistence)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMetricPopulationModelTest::testIgnoreSamplingGivenDetectionRules", + &CMetricPopulationModelTest::testIgnoreSamplingGivenDetectionRules)); return suiteOfTests; } diff --git a/lib/model/unittest/CModelDetailsViewTest.cc b/lib/model/unittest/CModelDetailsViewTest.cc index ee7c64cc93..d8f0e3a23e 100644 --- a/lib/model/unittest/CModelDetailsViewTest.cc +++ b/lib/model/unittest/CModelDetailsViewTest.cc @@ -47,21 +47,10 @@ void CModelDetailsViewTest::testModelPlot() { TMockModelPtr model; auto setupTest = [&]() { - gatherer.reset(new model::CDataGatherer{model_t::analysisCategory(features[0]), - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - "p", - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - key, - features, - 0, - 0}); + gatherer.reset(new model::CDataGatherer{ + model_t::analysisCategory(features[0]), model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, "p", EMPTY_STRING, + EMPTY_STRING, TStrVec(), false, key, features, 0, 0}); std::string person11{"p11"}; std::string person12{"p12"}; std::string person21{"p21"}; @@ -75,13 +64,17 @@ void CModelDetailsViewTest::testModelPlot() { model.reset(new model::CMockModel{params, gatherer, {/*we don't care about influence*/}}); maths::CTimeSeriesDecomposition trend; - maths::CNormalMeanPrecConjugate prior{maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData)}; - maths::CModelParams timeSeriesModelParams{bucketLength, 1.0, 0.001, 0.2, 6 * core::constants::HOUR, 24 * core::constants::HOUR}; - maths::CUnivariateTimeSeriesModel timeSeriesModel{timeSeriesModelParams, 0, trend, prior}; - model->mockTimeSeriesModels({model::CMockModel::TMathsModelPtr(timeSeriesModel.clone(0)), - model::CMockModel::TMathsModelPtr(timeSeriesModel.clone(1)), - model::CMockModel::TMathsModelPtr(timeSeriesModel.clone(2)), - model::CMockModel::TMathsModelPtr(timeSeriesModel.clone(3))}); + maths::CNormalMeanPrecConjugate prior{ + maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData)}; + maths::CModelParams timeSeriesModelParams{ + bucketLength, 1.0, 0.001, 0.2, 6 * core::constants::HOUR, 24 * core::constants::HOUR}; + maths::CUnivariateTimeSeriesModel timeSeriesModel{timeSeriesModelParams, + 0, trend, prior}; + model->mockTimeSeriesModels( + {model::CMockModel::TMathsModelPtr(timeSeriesModel.clone(0)), + model::CMockModel::TMathsModelPtr(timeSeriesModel.clone(1)), + model::CMockModel::TMathsModelPtr(timeSeriesModel.clone(2)), + model::CMockModel::TMathsModelPtr(timeSeriesModel.clone(3))}); }; LOG_DEBUG(<< "Individual sum"); @@ -93,7 +86,8 @@ void CModelDetailsViewTest::testModelPlot() { { std::size_t pid{0}; for (auto value : values) { - model->mockAddBucketValue(model_t::E_IndividualSumByBucketAndPerson, pid++, 0, 0, {value}); + model->mockAddBucketValue(model_t::E_IndividualSumByBucketAndPerson, + pid++, 0, 0, {value}); } } @@ -105,7 +99,8 @@ void CModelDetailsViewTest::testModelPlot() { for (const auto& byFieldData : featureByFieldData.second) { std::size_t pid; CPPUNIT_ASSERT(gatherer->personId(byFieldData.first, pid)); - CPPUNIT_ASSERT_EQUAL(std::size_t(1), byFieldData.second.s_ValuesPerOverField.size()); + CPPUNIT_ASSERT_EQUAL(std::size_t(1), + byFieldData.second.s_ValuesPerOverField.size()); for (const auto& currentBucketValue : byFieldData.second.s_ValuesPerOverField) { CPPUNIT_ASSERT_EQUAL(values[pid], currentBucketValue.second); } @@ -122,7 +117,8 @@ void CModelDetailsViewTest::testModelPlot() { { std::size_t pid{0}; for (auto value : values) { - model->mockAddBucketValue(model_t::E_IndividualCountByBucketAndPerson, pid++, 0, 0, {value}); + model->mockAddBucketValue(model_t::E_IndividualCountByBucketAndPerson, + pid++, 0, 0, {value}); } } @@ -134,7 +130,8 @@ void CModelDetailsViewTest::testModelPlot() { for (const auto& byFieldData : featureByFieldData.second) { std::size_t pid; CPPUNIT_ASSERT(gatherer->personId(byFieldData.first, pid)); - CPPUNIT_ASSERT_EQUAL(std::size_t(1), byFieldData.second.s_ValuesPerOverField.size()); + CPPUNIT_ASSERT_EQUAL(std::size_t(1), + byFieldData.second.s_ValuesPerOverField.size()); for (const auto& currentBucketValue : byFieldData.second.s_ValuesPerOverField) { CPPUNIT_ASSERT_EQUAL(values[pid], currentBucketValue.second); } @@ -146,8 +143,8 @@ void CModelDetailsViewTest::testModelPlot() { CppUnit::Test* CModelDetailsViewTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CModelDetailsViewTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CModelDetailsViewTest::testModelPlot", &CModelDetailsViewTest::testModelPlot)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CModelDetailsViewTest::testModelPlot", &CModelDetailsViewTest::testModelPlot)); return suiteOfTests; } diff --git a/lib/model/unittest/CModelMemoryTest.cc b/lib/model/unittest/CModelMemoryTest.cc index 41379cf1bf..ffde207141 100644 --- a/lib/model/unittest/CModelMemoryTest.cc +++ b/lib/model/unittest/CModelMemoryTest.cc @@ -142,7 +142,8 @@ void CModelMemoryTest::testOnlineMetricModel() { std::size_t startMemoryUsage = model.memoryUsage(); CResourceMonitor resourceMonitor; - LOG_DEBUG(<< "Memory used by model: " << model.memoryUsage() << " / " << core::CMemory::dynamicSize(model)); + LOG_DEBUG(<< "Memory used by model: " << model.memoryUsage() << " / " + << core::CMemory::dynamicSize(model)); test::CRandomNumbers rng; @@ -152,7 +153,8 @@ void CModelMemoryTest::testOnlineMetricModel() { rng.generateNormalSamples(mean, variance, bucketCounts[i], values); for (std::size_t j = 0u; j < values.size(); ++j) { - addArrival(*gatherer, time + static_cast(j), "p", values[j] + (i == anomalousBucket ? anomaly : 0.0)); + addArrival(*gatherer, time + static_cast(j), "p", + values[j] + (i == anomalousBucket ? anomaly : 0.0)); } model.sample(time, time + bucketLength, resourceMonitor); @@ -170,10 +172,10 @@ void CModelMemoryTest::testOnlineMetricModel() { CppUnit::Test* CModelMemoryTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CModelMemoryTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CModelMemoryTest::testOnlineEventRateModel", - &CModelMemoryTest::testOnlineEventRateModel)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CModelMemoryTest::testOnlineMetricModel", &CModelMemoryTest::testOnlineMetricModel)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CModelMemoryTest::testOnlineEventRateModel", &CModelMemoryTest::testOnlineEventRateModel)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CModelMemoryTest::testOnlineMetricModel", &CModelMemoryTest::testOnlineMetricModel)); return suiteOfTests; } diff --git a/lib/model/unittest/CModelToolsTest.cc b/lib/model/unittest/CModelToolsTest.cc index 294ebf4143..fac6206087 100644 --- a/lib/model/unittest/CModelToolsTest.cc +++ b/lib/model/unittest/CModelToolsTest.cc @@ -49,12 +49,14 @@ maths::CModelParams params(core_t::TTime bucketLength) { } maths::CNormalMeanPrecConjugate normal() { - return maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, DECAY_RATE); + return maths::CNormalMeanPrecConjugate::nonInformativePrior( + maths_t::E_ContinuousData, DECAY_RATE); } maths::CMultimodalPrior multimodal() { - maths::CXMeansOnline1d clusterer{ - maths_t::E_ContinuousData, maths::CAvailableModeDistributions::ALL, maths_t::E_ClustersFractionWeight, DECAY_RATE}; + maths::CXMeansOnline1d clusterer{maths_t::E_ContinuousData, + maths::CAvailableModeDistributions::ALL, + maths_t::E_ClustersFractionWeight, DECAY_RATE}; return maths::CMultimodalPrior{maths_t::E_ContinuousData, clusterer, normal(), DECAY_RATE}; } } @@ -80,7 +82,8 @@ void CModelToolsTest::testFuzzyDeduplicate() { fuzzy.computeEpsilons(600, 10000); boost::math::normal normal{variance, std::sqrt(variance)}; - double eps{(boost::math::quantile(normal, 0.9) - boost::math::quantile(normal, 0.1)) / 10000.0}; + double eps{(boost::math::quantile(normal, 0.9) - boost::math::quantile(normal, 0.1)) / + 10000.0}; LOG_DEBUG(<< "eps = " << eps); uniques.clear(); @@ -163,7 +166,9 @@ void CModelToolsTest::testFuzzyDeduplicate() { fuzzy.computeEpsilons(600, 10000); boost::math::lognormal lognormal{variance, std::sqrt(variance)}; - double eps{(boost::math::quantile(lognormal, 0.9) - boost::math::quantile(lognormal, 0.1)) / 10000.0}; + double eps{(boost::math::quantile(lognormal, 0.9) - + boost::math::quantile(lognormal, 0.1)) / + 10000.0}; LOG_DEBUG(<< "eps = " << eps); uniques.clear(); @@ -197,7 +202,8 @@ void CModelToolsTest::testProbabilityCache() { core_t::TTime bucketLength{1800}; maths::CTimeSeriesDecomposition trend{DECAY_RATE, bucketLength}; - maths::CUnivariateTimeSeriesModel model{params(bucketLength), 0, trend, multimodal(), nullptr, false}; + maths::CUnivariateTimeSeriesModel model{ + params(bucketLength), 0, trend, multimodal(), nullptr, false}; test::CRandomNumbers rng; core_t::TTime time_{0}; @@ -221,7 +227,8 @@ void CModelToolsTest::testProbabilityCache() { .weightStyles(maths::CConstantWeights::COUNT) .trendWeights(weights) .priorWeights(weights); - model.addSamples(params, {core::make_triple(time_, TDouble2Vec(1, sample), TAG)}); + model.addSamples( + params, {core::make_triple(time_, TDouble2Vec(1, sample), TAG)}); } } @@ -261,20 +268,24 @@ void CModelToolsTest::testProbabilityCache() { TTail2Vec expectedTail; bool conditional; TSize1Vec mostAnomalousCorrelate; - model.probability(params, time, sample, expectedProbability, expectedTail, conditional, mostAnomalousCorrelate); + model.probability(params, time, sample, expectedProbability, + expectedTail, conditional, mostAnomalousCorrelate); double probability; TTail2Vec tail; - if (cache.lookup(feature, id, sample, probability, tail, conditional, mostAnomalousCorrelate)) { + if (cache.lookup(feature, id, sample, probability, tail, + conditional, mostAnomalousCorrelate)) { ++hits; error.add(std::fabs(probability - expectedProbability) / expectedProbability); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedProbability, probability, 0.05 * expectedProbability); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedProbability, probability, + 0.05 * expectedProbability); CPPUNIT_ASSERT_EQUAL(expectedTail[0], tail[0]); CPPUNIT_ASSERT_EQUAL(false, conditional); CPPUNIT_ASSERT(mostAnomalousCorrelate.empty()); } else { cache.addModes(feature, id, model); - cache.addProbability(feature, id, sample, expectedProbability, expectedTail, false, mostAnomalousCorrelate); + cache.addProbability(feature, id, sample, expectedProbability, + expectedTail, false, mostAnomalousCorrelate); } } @@ -300,17 +311,20 @@ void CModelToolsTest::testProbabilityCache() { TTail2Vec expectedTail; bool conditional; TSize1Vec mostAnomalousCorrelate; - model.probability(params, time, sample, expectedProbability, expectedTail, conditional, mostAnomalousCorrelate); + model.probability(params, time, sample, expectedProbability, + expectedTail, conditional, mostAnomalousCorrelate); LOG_DEBUG(<< "probability = " << expectedProbability << ", tail = " << expectedTail); double probability; TTail2Vec tail; - if (cache.lookup(feature, id, sample, probability, tail, conditional, mostAnomalousCorrelate)) { + if (cache.lookup(feature, id, sample, probability, tail, + conditional, mostAnomalousCorrelate)) { // Shouldn't have any cache hits. CPPUNIT_ASSERT(false); } else { cache.addModes(feature, id, model); - cache.addProbability(feature, id, sample, expectedProbability, expectedTail, false, mostAnomalousCorrelate); + cache.addProbability(feature, id, sample, expectedProbability, + expectedTail, false, mostAnomalousCorrelate); } } } @@ -319,10 +333,10 @@ void CModelToolsTest::testProbabilityCache() { CppUnit::Test* CModelToolsTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CModelToolsTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CModelToolsTest::testFuzzyDeduplicate", &CModelToolsTest::testFuzzyDeduplicate)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CModelToolsTest::testProbabilityCache", &CModelToolsTest::testProbabilityCache)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CModelToolsTest::testFuzzyDeduplicate", &CModelToolsTest::testFuzzyDeduplicate)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CModelToolsTest::testProbabilityCache", &CModelToolsTest::testProbabilityCache)); return suiteOfTests; } diff --git a/lib/model/unittest/CModelTypesTest.cc b/lib/model/unittest/CModelTypesTest.cc index d6d3fe98bc..13be2adf10 100644 --- a/lib/model/unittest/CModelTypesTest.cc +++ b/lib/model/unittest/CModelTypesTest.cc @@ -14,7 +14,8 @@ void CModelTypesTest::testAll() { { // test print categories CPPUNIT_ASSERT_EQUAL(std::string("'counting'"), model_t::print(model_t::E_Counting)); - CPPUNIT_ASSERT_EQUAL(std::string("'event rate'"), model_t::print(model_t::E_EventRate)); + CPPUNIT_ASSERT_EQUAL(std::string("'event rate'"), + model_t::print(model_t::E_EventRate)); CPPUNIT_ASSERT_EQUAL(std::string("'metric'"), model_t::print(model_t::E_Metric)); } @@ -38,9 +39,12 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(2.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::inverseOffsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(maths_t::E_TwoSided, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); CPPUNIT_ASSERT_EQUAL(std::string("count"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'count per bucket by person'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'count per bucket by person'"), + model_t::print(feature)); feature = model_t::E_IndividualNonZeroCountByBucketAndPerson; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); @@ -55,9 +59,12 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(1.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(3.0, model_t::inverseOffsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(maths_t::E_TwoSided, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); CPPUNIT_ASSERT_EQUAL(std::string("count"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'non-zero count per bucket by person'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'non-zero count per bucket by person'"), + model_t::print(feature)); feature = model_t::E_IndividualTotalBucketCountByPerson; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); @@ -72,9 +79,12 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(1.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(3.0, model_t::inverseOffsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(maths_t::E_TwoSided, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); CPPUNIT_ASSERT_EQUAL(std::string("count"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'bucket count by person'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'bucket count by person'"), + model_t::print(feature)); feature = model_t::E_IndividualIndicatorOfBucketPerson; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); @@ -89,9 +99,12 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(1.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(3.0, model_t::inverseOffsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(maths_t::E_TwoSided, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); CPPUNIT_ASSERT_EQUAL(std::string("rare"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'indicator per bucket of person'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'indicator per bucket of person'"), + model_t::print(feature)); feature = model_t::E_IndividualLowCountsByBucketAndPerson; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); @@ -105,10 +118,14 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(1.0, model_t::varianceScale(feature, 4.0, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::inverseOffsetCountToZero(feature, 2.0)); - CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedBelow, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedBelow, + model_t::probabilityCalculation(feature)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); CPPUNIT_ASSERT_EQUAL(std::string("count"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'low values of count per bucket by person'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'low values of count per bucket by person'"), + model_t::print(feature)); feature = model_t::E_IndividualHighCountsByBucketAndPerson; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); @@ -122,10 +139,14 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(1.0, model_t::varianceScale(feature, 4.0, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::inverseOffsetCountToZero(feature, 2.0)); - CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedAbove, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedAbove, + model_t::probabilityCalculation(feature)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); CPPUNIT_ASSERT_EQUAL(std::string("count"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'high values of count per bucket by person'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'high values of count per bucket by person'"), + model_t::print(feature)); feature = model_t::E_IndividualArrivalTimesByPerson; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); @@ -140,9 +161,12 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(2.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::inverseOffsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(maths_t::E_TwoSided, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); CPPUNIT_ASSERT_EQUAL(std::string("count"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'mean arrival time by person'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'mean arrival time by person'"), + model_t::print(feature)); feature = model_t::E_IndividualLongArrivalTimesByPerson; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); @@ -156,10 +180,14 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(1.0, model_t::varianceScale(feature, 4.0, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::inverseOffsetCountToZero(feature, 2.0)); - CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedAbove, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedAbove, + model_t::probabilityCalculation(feature)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); CPPUNIT_ASSERT_EQUAL(std::string("count"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'long mean arrival time by person'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'long mean arrival time by person'"), + model_t::print(feature)); feature = model_t::E_IndividualShortArrivalTimesByPerson; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); @@ -173,10 +201,14 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(1.0, model_t::varianceScale(feature, 4.0, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::inverseOffsetCountToZero(feature, 2.0)); - CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedBelow, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedBelow, + model_t::probabilityCalculation(feature)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); CPPUNIT_ASSERT_EQUAL(std::string("count"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'short mean arrival time by person'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'short mean arrival time by person'"), + model_t::print(feature)); feature = model_t::E_IndividualLowNonZeroCountByBucketAndPerson; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); @@ -190,10 +222,14 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(1.0, model_t::varianceScale(feature, 4.0, 2.0)); CPPUNIT_ASSERT_EQUAL(1.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(3.0, model_t::inverseOffsetCountToZero(feature, 2.0)); - CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedBelow, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedBelow, + model_t::probabilityCalculation(feature)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); CPPUNIT_ASSERT_EQUAL(std::string("count"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'low non-zero count per bucket by person'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'low non-zero count per bucket by person'"), + model_t::print(feature)); feature = model_t::E_IndividualHighNonZeroCountByBucketAndPerson; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); @@ -207,10 +243,14 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(1.0, model_t::varianceScale(feature, 4.0, 2.0)); CPPUNIT_ASSERT_EQUAL(1.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(3.0, model_t::inverseOffsetCountToZero(feature, 2.0)); - CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedAbove, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedAbove, + model_t::probabilityCalculation(feature)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); CPPUNIT_ASSERT_EQUAL(std::string("count"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'high non-zero count per bucket by person'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'high non-zero count per bucket by person'"), + model_t::print(feature)); feature = model_t::E_IndividualUniqueCountByBucketAndPerson; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); @@ -225,9 +265,13 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(1.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(3.0, model_t::inverseOffsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(maths_t::E_TwoSided, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); - CPPUNIT_ASSERT_EQUAL(std::string("distinct_count"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'unique count per bucket by person'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL(std::string("distinct_count"), + model_t::outputFunctionName(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'unique count per bucket by person'"), + model_t::print(feature)); feature = model_t::E_IndividualLowUniqueCountByBucketAndPerson; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); @@ -241,10 +285,15 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(1.0, model_t::varianceScale(feature, 4.0, 2.0)); CPPUNIT_ASSERT_EQUAL(1.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(3.0, model_t::inverseOffsetCountToZero(feature, 2.0)); - CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedBelow, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); - CPPUNIT_ASSERT_EQUAL(std::string("distinct_count"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'low unique count per bucket by person'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedBelow, + model_t::probabilityCalculation(feature)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL(std::string("distinct_count"), + model_t::outputFunctionName(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'low unique count per bucket by person'"), + model_t::print(feature)); feature = model_t::E_IndividualHighUniqueCountByBucketAndPerson; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); @@ -258,10 +307,15 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(1.0, model_t::varianceScale(feature, 4.0, 2.0)); CPPUNIT_ASSERT_EQUAL(1.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(3.0, model_t::inverseOffsetCountToZero(feature, 2.0)); - CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedAbove, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); - CPPUNIT_ASSERT_EQUAL(std::string("distinct_count"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'high unique count per bucket by person'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedAbove, + model_t::probabilityCalculation(feature)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL(std::string("distinct_count"), + model_t::outputFunctionName(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'high unique count per bucket by person'"), + model_t::print(feature)); feature = model_t::E_IndividualInfoContentByBucketAndPerson; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); @@ -276,9 +330,13 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(1.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(3.0, model_t::inverseOffsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(maths_t::E_TwoSided, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); - CPPUNIT_ASSERT_EQUAL(std::string("info_content"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'information content of value per bucket by person'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL(std::string("info_content"), + model_t::outputFunctionName(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'information content of value per bucket by person'"), + model_t::print(feature)); feature = model_t::E_IndividualLowInfoContentByBucketAndPerson; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); @@ -292,10 +350,15 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(1.0, model_t::varianceScale(feature, 4.0, 2.0)); CPPUNIT_ASSERT_EQUAL(1.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(3.0, model_t::inverseOffsetCountToZero(feature, 2.0)); - CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedBelow, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); - CPPUNIT_ASSERT_EQUAL(std::string("info_content"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'low information content of value per bucket by person'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedBelow, + model_t::probabilityCalculation(feature)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL(std::string("info_content"), + model_t::outputFunctionName(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'low information content of value per bucket by person'"), + model_t::print(feature)); feature = model_t::E_IndividualHighInfoContentByBucketAndPerson; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); @@ -309,10 +372,15 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(1.0, model_t::varianceScale(feature, 4.0, 2.0)); CPPUNIT_ASSERT_EQUAL(1.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(3.0, model_t::inverseOffsetCountToZero(feature, 2.0)); - CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedAbove, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); - CPPUNIT_ASSERT_EQUAL(std::string("info_content"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'high information content of value per bucket by person'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedAbove, + model_t::probabilityCalculation(feature)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL(std::string("info_content"), + model_t::outputFunctionName(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'high information content of value per bucket by person'"), + model_t::print(feature)); feature = model_t::E_IndividualTimeOfDayByBucketAndPerson; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); @@ -327,9 +395,12 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(2.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::inverseOffsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(maths_t::E_TwoSided, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); CPPUNIT_ASSERT_EQUAL(std::string("time"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'time-of-day per bucket by person'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'time-of-day per bucket by person'"), + model_t::print(feature)); feature = model_t::E_IndividualTimeOfWeekByBucketAndPerson; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); @@ -344,9 +415,12 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(2.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::inverseOffsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(maths_t::E_TwoSided, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); CPPUNIT_ASSERT_EQUAL(std::string("time"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'time-of-week per bucket by person'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'time-of-week per bucket by person'"), + model_t::print(feature)); // Individual metric features @@ -364,9 +438,12 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(2.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::inverseOffsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(maths_t::E_TwoSided, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); CPPUNIT_ASSERT_EQUAL(std::string("mean"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'arithmetic mean value by person'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'arithmetic mean value by person'"), + model_t::print(feature)); feature = model_t::E_IndividualMedianByPerson; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); @@ -382,9 +459,12 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(2.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::inverseOffsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(maths_t::E_TwoSided, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); CPPUNIT_ASSERT_EQUAL(std::string("median"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'median value by person'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'median value by person'"), + model_t::print(feature)); feature = model_t::E_IndividualMinByPerson; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); @@ -399,10 +479,14 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(1.0, model_t::varianceScale(feature, 4.0, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::inverseOffsetCountToZero(feature, 2.0)); - CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedBelow, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedBelow, + model_t::probabilityCalculation(feature)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); CPPUNIT_ASSERT_EQUAL(std::string("min"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'minimum value by person'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'minimum value by person'"), + model_t::print(feature)); feature = model_t::E_IndividualMaxByPerson; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); @@ -417,10 +501,14 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(1.0, model_t::varianceScale(feature, 4.0, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::inverseOffsetCountToZero(feature, 2.0)); - CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedAbove, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedAbove, + model_t::probabilityCalculation(feature)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); CPPUNIT_ASSERT_EQUAL(std::string("max"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'maximum value by person'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'maximum value by person'"), + model_t::print(feature)); feature = model_t::E_IndividualSumByBucketAndPerson; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); @@ -436,7 +524,9 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(2.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::inverseOffsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(maths_t::E_TwoSided, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); CPPUNIT_ASSERT_EQUAL(std::string("sum"), model_t::outputFunctionName(feature)); CPPUNIT_ASSERT_EQUAL(std::string("'bucket sum by person'"), model_t::print(feature)); @@ -453,10 +543,14 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(2.0, model_t::varianceScale(feature, 4.0, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::inverseOffsetCountToZero(feature, 2.0)); - CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedBelow, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedBelow, + model_t::probabilityCalculation(feature)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); CPPUNIT_ASSERT_EQUAL(std::string("mean"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'low mean value by person'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'low mean value by person'"), + model_t::print(feature)); feature = model_t::E_IndividualHighMeanByPerson; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); @@ -471,10 +565,14 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(2.0, model_t::varianceScale(feature, 4.0, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::inverseOffsetCountToZero(feature, 2.0)); - CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedAbove, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedAbove, + model_t::probabilityCalculation(feature)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); CPPUNIT_ASSERT_EQUAL(std::string("mean"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'high mean value by person'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'high mean value by person'"), + model_t::print(feature)); feature = model_t::E_IndividualLowSumByBucketAndPerson; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); @@ -489,10 +587,14 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(1.0, model_t::varianceScale(feature, 4.0, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::inverseOffsetCountToZero(feature, 2.0)); - CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedBelow, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedBelow, + model_t::probabilityCalculation(feature)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); CPPUNIT_ASSERT_EQUAL(std::string("sum"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'low bucket sum by person'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'low bucket sum by person'"), + model_t::print(feature)); feature = model_t::E_IndividualHighSumByBucketAndPerson; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); @@ -507,10 +609,14 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(1.0, model_t::varianceScale(feature, 4.0, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::inverseOffsetCountToZero(feature, 2.0)); - CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedAbove, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedAbove, + model_t::probabilityCalculation(feature)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); CPPUNIT_ASSERT_EQUAL(std::string("sum"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'high bucket sum by person'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'high bucket sum by person'"), + model_t::print(feature)); feature = model_t::E_IndividualNonNullSumByBucketAndPerson; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); @@ -526,9 +632,12 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(2.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::inverseOffsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(maths_t::E_TwoSided, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); CPPUNIT_ASSERT_EQUAL(std::string("sum"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'bucket non-null sum by person'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'bucket non-null sum by person'"), + model_t::print(feature)); feature = model_t::E_IndividualLowNonNullSumByBucketAndPerson; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); @@ -543,10 +652,14 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(1.0, model_t::varianceScale(feature, 4.0, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::inverseOffsetCountToZero(feature, 2.0)); - CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedBelow, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedBelow, + model_t::probabilityCalculation(feature)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); CPPUNIT_ASSERT_EQUAL(std::string("sum"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'low bucket non-null sum by person'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'low bucket non-null sum by person'"), + model_t::print(feature)); feature = model_t::E_IndividualHighNonNullSumByBucketAndPerson; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); @@ -561,10 +674,14 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(1.0, model_t::varianceScale(feature, 4.0, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::inverseOffsetCountToZero(feature, 2.0)); - CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedAbove, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedAbove, + model_t::probabilityCalculation(feature)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); CPPUNIT_ASSERT_EQUAL(std::string("sum"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'high bucket non-null sum by person'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'high bucket non-null sum by person'"), + model_t::print(feature)); feature = model_t::E_IndividualMeanLatLongByPerson; CPPUNIT_ASSERT_EQUAL(std::size_t(2), model_t::dimension(feature)); @@ -580,9 +697,12 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(2.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::inverseOffsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(maths_t::E_TwoSided, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); CPPUNIT_ASSERT_EQUAL(std::string("lat_long"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'mean lat/long by person'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'mean lat/long by person'"), + model_t::print(feature)); // Population event rate features @@ -600,8 +720,11 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(1.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(3.0, model_t::inverseOffsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(maths_t::E_TwoSided, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); - CPPUNIT_ASSERT_EQUAL(std::string("'attribute counts by person'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL(std::string("'attribute counts by person'"), + model_t::print(feature)); feature = model_t::E_PopulationCountByBucketPersonAndAttribute; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); @@ -617,9 +740,12 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(1.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(3.0, model_t::inverseOffsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(maths_t::E_TwoSided, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); CPPUNIT_ASSERT_EQUAL(std::string("count"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'non-zero count per bucket by person and attribute'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'non-zero count per bucket by person and attribute'"), + model_t::print(feature)); feature = model_t::E_PopulationIndicatorOfBucketPersonAndAttribute; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); @@ -635,9 +761,12 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(1.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(3.0, model_t::inverseOffsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(maths_t::E_TwoSided, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); CPPUNIT_ASSERT_EQUAL(std::string("rare"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'indicator per bucket of person and attribute'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'indicator per bucket of person and attribute'"), + model_t::print(feature)); feature = model_t::E_PopulationUniquePersonCountByAttribute; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); @@ -653,8 +782,11 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(1.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(3.0, model_t::inverseOffsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(maths_t::E_TwoSided, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); - CPPUNIT_ASSERT_EQUAL(std::string("'unique person count by attribute'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL(std::string("'unique person count by attribute'"), + model_t::print(feature)); feature = model_t::E_PopulationUniqueCountByBucketPersonAndAttribute; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); @@ -670,9 +802,13 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(1.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(3.0, model_t::inverseOffsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(maths_t::E_TwoSided, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); - CPPUNIT_ASSERT_EQUAL(std::string("distinct_count"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'unique count per bucket by person and attribute'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL(std::string("distinct_count"), + model_t::outputFunctionName(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'unique count per bucket by person and attribute'"), + model_t::print(feature)); feature = model_t::E_PopulationLowCountsByBucketPersonAndAttribute; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); @@ -687,10 +823,14 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(1.0, model_t::varianceScale(feature, 4.0, 2.0)); CPPUNIT_ASSERT_EQUAL(1.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(3.0, model_t::inverseOffsetCountToZero(feature, 2.0)); - CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedBelow, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedBelow, + model_t::probabilityCalculation(feature)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); CPPUNIT_ASSERT_EQUAL(std::string("count"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'low values of non-zero count per bucket by person and attribute'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'low values of non-zero count per bucket by person and attribute'"), + model_t::print(feature)); feature = model_t::E_PopulationHighCountsByBucketPersonAndAttribute; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); @@ -705,10 +845,14 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(1.0, model_t::varianceScale(feature, 4.0, 2.0)); CPPUNIT_ASSERT_EQUAL(1.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(3.0, model_t::inverseOffsetCountToZero(feature, 2.0)); - CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedAbove, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedAbove, + model_t::probabilityCalculation(feature)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); CPPUNIT_ASSERT_EQUAL(std::string("count"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'high values of non-zero count per bucket by person and attribute'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'high values of non-zero count per bucket by person and attribute'"), + model_t::print(feature)); feature = model_t::E_PopulationInfoContentByBucketPersonAndAttribute; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); @@ -724,9 +868,13 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(1.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(3.0, model_t::inverseOffsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(maths_t::E_TwoSided, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); - CPPUNIT_ASSERT_EQUAL(std::string("info_content"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'information content of value per bucket by person and attribute'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL(std::string("info_content"), + model_t::outputFunctionName(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'information content of value per bucket by person and attribute'"), + model_t::print(feature)); feature = model_t::E_PopulationLowInfoContentByBucketPersonAndAttribute; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); @@ -741,10 +889,15 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(1.0, model_t::varianceScale(feature, 4.0, 2.0)); CPPUNIT_ASSERT_EQUAL(1.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(3.0, model_t::inverseOffsetCountToZero(feature, 2.0)); - CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedBelow, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); - CPPUNIT_ASSERT_EQUAL(std::string("info_content"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'low information content of value per bucket by person and attribute'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedBelow, + model_t::probabilityCalculation(feature)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL(std::string("info_content"), + model_t::outputFunctionName(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'low information content of value per bucket by person and attribute'"), + model_t::print(feature)); feature = model_t::E_PopulationHighInfoContentByBucketPersonAndAttribute; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); @@ -759,9 +912,13 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(1.0, model_t::varianceScale(feature, 4.0, 2.0)); CPPUNIT_ASSERT_EQUAL(1.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(3.0, model_t::inverseOffsetCountToZero(feature, 2.0)); - CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedAbove, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); - CPPUNIT_ASSERT_EQUAL(std::string("info_content"), model_t::outputFunctionName(feature)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedAbove, + model_t::probabilityCalculation(feature)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL(std::string("info_content"), + model_t::outputFunctionName(feature)); CPPUNIT_ASSERT_EQUAL(std::string("'high information content of value per bucket by person and attribute'"), model_t::print(feature)); @@ -778,10 +935,15 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(1.0, model_t::varianceScale(feature, 4.0, 2.0)); CPPUNIT_ASSERT_EQUAL(1.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(3.0, model_t::inverseOffsetCountToZero(feature, 2.0)); - CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedBelow, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); - CPPUNIT_ASSERT_EQUAL(std::string("distinct_count"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'low unique count per bucket by person and attribute'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedBelow, + model_t::probabilityCalculation(feature)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL(std::string("distinct_count"), + model_t::outputFunctionName(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'low unique count per bucket by person and attribute'"), + model_t::print(feature)); feature = model_t::E_PopulationHighUniqueCountByBucketPersonAndAttribute; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); @@ -796,10 +958,15 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(1.0, model_t::varianceScale(feature, 4.0, 2.0)); CPPUNIT_ASSERT_EQUAL(1.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(3.0, model_t::inverseOffsetCountToZero(feature, 2.0)); - CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedAbove, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); - CPPUNIT_ASSERT_EQUAL(std::string("distinct_count"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'high unique count per bucket by person and attribute'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedAbove, + model_t::probabilityCalculation(feature)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL(std::string("distinct_count"), + model_t::outputFunctionName(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'high unique count per bucket by person and attribute'"), + model_t::print(feature)); feature = model_t::E_PopulationTimeOfDayByBucketPersonAndAttribute; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); @@ -815,9 +982,12 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(1.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(3.0, model_t::inverseOffsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(maths_t::E_TwoSided, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); CPPUNIT_ASSERT_EQUAL(std::string("time"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'time-of-day per bucket by person and attribute'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'time-of-day per bucket by person and attribute'"), + model_t::print(feature)); feature = model_t::E_PopulationTimeOfWeekByBucketPersonAndAttribute; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); @@ -833,9 +1003,12 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(1.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(3.0, model_t::inverseOffsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(maths_t::E_TwoSided, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); CPPUNIT_ASSERT_EQUAL(std::string("time"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'time-of-week per bucket by person and attribute'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'time-of-week per bucket by person and attribute'"), + model_t::print(feature)); // Population metric features @@ -853,9 +1026,12 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(2.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::inverseOffsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(maths_t::E_TwoSided, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); CPPUNIT_ASSERT_EQUAL(std::string("mean"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'mean value by person and attribute'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'mean value by person and attribute'"), + model_t::print(feature)); feature = model_t::E_PopulationMedianByPersonAndAttribute; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); @@ -871,9 +1047,12 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(2.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::inverseOffsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(maths_t::E_TwoSided, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); CPPUNIT_ASSERT_EQUAL(std::string("median"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'median value by person and attribute'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'median value by person and attribute'"), + model_t::print(feature)); feature = model_t::E_PopulationMinByPersonAndAttribute; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); @@ -888,10 +1067,14 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(1.0, model_t::varianceScale(feature, 4.0, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::inverseOffsetCountToZero(feature, 2.0)); - CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedBelow, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedBelow, + model_t::probabilityCalculation(feature)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); CPPUNIT_ASSERT_EQUAL(std::string("min"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'minimum value by person and attribute'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'minimum value by person and attribute'"), + model_t::print(feature)); feature = model_t::E_PopulationMaxByPersonAndAttribute; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); @@ -906,10 +1089,14 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(1.0, model_t::varianceScale(feature, 4.0, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::inverseOffsetCountToZero(feature, 2.0)); - CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedAbove, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedAbove, + model_t::probabilityCalculation(feature)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); CPPUNIT_ASSERT_EQUAL(std::string("max"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'maximum value by person and attribute'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'maximum value by person and attribute'"), + model_t::print(feature)); feature = model_t::E_PopulationSumByBucketPersonAndAttribute; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); @@ -925,9 +1112,12 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(2.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::inverseOffsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(maths_t::E_TwoSided, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); CPPUNIT_ASSERT_EQUAL(std::string("sum"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'bucket sum by person and attribute'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'bucket sum by person and attribute'"), + model_t::print(feature)); feature = model_t::E_PopulationLowMeanByPersonAndAttribute; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); @@ -942,10 +1132,14 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(2.0, model_t::varianceScale(feature, 4.0, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::inverseOffsetCountToZero(feature, 2.0)); - CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedBelow, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedBelow, + model_t::probabilityCalculation(feature)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); CPPUNIT_ASSERT_EQUAL(std::string("mean"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'low mean by person and attribute'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'low mean by person and attribute'"), + model_t::print(feature)); feature = model_t::E_PopulationHighMeanByPersonAndAttribute; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); @@ -960,10 +1154,14 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(2.0, model_t::varianceScale(feature, 4.0, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::inverseOffsetCountToZero(feature, 2.0)); - CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedAbove, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedAbove, + model_t::probabilityCalculation(feature)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); CPPUNIT_ASSERT_EQUAL(std::string("mean"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'high mean by person and attribute'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'high mean by person and attribute'"), + model_t::print(feature)); feature = model_t::E_PopulationLowSumByBucketPersonAndAttribute; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); @@ -978,10 +1176,14 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(1.0, model_t::varianceScale(feature, 4.0, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::inverseOffsetCountToZero(feature, 2.0)); - CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedBelow, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedBelow, + model_t::probabilityCalculation(feature)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); CPPUNIT_ASSERT_EQUAL(std::string("sum"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'low bucket sum by person and attribute'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'low bucket sum by person and attribute'"), + model_t::print(feature)); feature = model_t::E_PopulationHighSumByBucketPersonAndAttribute; CPPUNIT_ASSERT_EQUAL(std::size_t(1), model_t::dimension(feature)); @@ -996,17 +1198,22 @@ void CModelTypesTest::testAll() { CPPUNIT_ASSERT_EQUAL(1.0, model_t::varianceScale(feature, 4.0, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::offsetCountToZero(feature, 2.0)); CPPUNIT_ASSERT_EQUAL(2.0, model_t::inverseOffsetCountToZero(feature, 2.0)); - CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedAbove, model_t::probabilityCalculation(feature)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10050), model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_OneSidedAbove, + model_t::probabilityCalculation(feature)); + CPPUNIT_ASSERT_EQUAL( + core_t::TTime(10050), + model_t::sampleTime(feature, bucketStartTime, bucketLength, time)); CPPUNIT_ASSERT_EQUAL(std::string("sum"), model_t::outputFunctionName(feature)); - CPPUNIT_ASSERT_EQUAL(std::string("'high bucket sum by person and attribute'"), model_t::print(feature)); + CPPUNIT_ASSERT_EQUAL(std::string("'high bucket sum by person and attribute'"), + model_t::print(feature)); } } CppUnit::Test* CModelTypesTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CModelTypesTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CModelTypesTest::testAll", &CModelTypesTest::testAll)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CModelTypesTest::testAll", &CModelTypesTest::testAll)); return suiteOfTests; } diff --git a/lib/model/unittest/CProbabilityAndInfluenceCalculatorTest.cc b/lib/model/unittest/CProbabilityAndInfluenceCalculatorTest.cc index 4dc77659c9..c033b089c5 100644 --- a/lib/model/unittest/CProbabilityAndInfluenceCalculatorTest.cc +++ b/lib/model/unittest/CProbabilityAndInfluenceCalculatorTest.cc @@ -55,11 +55,15 @@ using TTime2Vec = model::CProbabilityAndInfluenceCalculator::TTime2Vec; using TTime2Vec1Vec = model::CProbabilityAndInfluenceCalculator::TTime2Vec1Vec; using TDouble1VecDoublePr = model::CProbabilityAndInfluenceCalculator::TDouble1VecDoublePr; using TDouble1VecDouble1VecPr = model::CProbabilityAndInfluenceCalculator::TDouble1VecDouble1VecPr; -using TStrCRefDouble1VecDoublePrPr = model::CProbabilityAndInfluenceCalculator::TStrCRefDouble1VecDoublePrPr; -using TStrCRefDouble1VecDoublePrPrVec = model::CProbabilityAndInfluenceCalculator::TStrCRefDouble1VecDoublePrPrVec; +using TStrCRefDouble1VecDoublePrPr = + model::CProbabilityAndInfluenceCalculator::TStrCRefDouble1VecDoublePrPr; +using TStrCRefDouble1VecDoublePrPrVec = + model::CProbabilityAndInfluenceCalculator::TStrCRefDouble1VecDoublePrPrVec; using TStrCRefDouble1VecDoublePrPrVecVec = std::vector; -using TStrCRefDouble1VecDouble1VecPrPr = model::CProbabilityAndInfluenceCalculator::TStrCRefDouble1VecDouble1VecPrPr; -using TStrCRefDouble1VecDouble1VecPrPrVec = model::CProbabilityAndInfluenceCalculator::TStrCRefDouble1VecDouble1VecPrPrVec; +using TStrCRefDouble1VecDouble1VecPrPr = + model::CProbabilityAndInfluenceCalculator::TStrCRefDouble1VecDouble1VecPrPr; +using TStrCRefDouble1VecDouble1VecPrPrVec = + model::CProbabilityAndInfluenceCalculator::TStrCRefDouble1VecDouble1VecPrPrVec; using TStoredStringPtrStoredStringPtrPrDoublePrVec = model::CProbabilityAndInfluenceCalculator::TStoredStringPtrStoredStringPtrPrDoublePrVec; using TInfluenceCalculatorCPtr = boost::shared_ptr; @@ -80,8 +84,12 @@ TDouble1VecDoublePr make_pair(double first1, double first2, double second) { maths::CModelParams params(core_t::TTime bucketLength) { double learnRate{static_cast(bucketLength) / 1800.0}; double minimumSeasonalVarianceScale{0.4}; - return maths::CModelParams{ - bucketLength, learnRate, 0.0, minimumSeasonalVarianceScale, 6 * core::constants::HOUR, 24 * core::constants::HOUR}; + return maths::CModelParams{bucketLength, + learnRate, + 0.0, + minimumSeasonalVarianceScale, + 6 * core::constants::HOUR, + 24 * core::constants::HOUR}; } std::size_t dimension(double) { @@ -99,10 +107,16 @@ TTimeDouble2VecSizeTr sample(core_t::TTime time, const TDoubleVec& sample) { } template -core_t::TTime addSamples(core_t::TTime bucketLength, const SAMPLES& samples, maths::CModel& model) { - TDouble2Vec4VecVec weights{maths::CConstantWeights::unit(dimension(samples[0]))}; +core_t::TTime +addSamples(core_t::TTime bucketLength, const SAMPLES& samples, maths::CModel& model) { + TDouble2Vec4VecVec weights{ + maths::CConstantWeights::unit(dimension(samples[0]))}; maths::CModelAddSamplesParams params; - params.integer(false).propagationInterval(1.0).weightStyles(maths::CConstantWeights::COUNT).trendWeights(weights).priorWeights(weights); + params.integer(false) + .propagationInterval(1.0) + .weightStyles(maths::CConstantWeights::COUNT) + .trendWeights(weights) + .priorWeights(weights); core_t::TTime time{0}; for (const auto& sample_ : samples) { model.addSamples(params, TTimeDouble2VecSizeTrVec{sample(time, sample_)}); @@ -125,8 +139,8 @@ void computeProbability(core_t::TTime time, .addWeights(weight); bool conditional; TSize1Vec mostAnomalousCorrelate; - model.probability( - params, TTime2Vec1Vec{TTime2Vec{time}}, TDouble2Vec1Vec{sample}, probablity, tail, conditional, mostAnomalousCorrelate); + model.probability(params, TTime2Vec1Vec{TTime2Vec{time}}, TDouble2Vec1Vec{sample}, + probablity, tail, conditional, mostAnomalousCorrelate); } const std::string I("I"); @@ -251,7 +265,8 @@ void testProbabilityAndGetInfluences(model_t::EFeature feature, TTail2Vec tail; model_t::CResultType type; TSize1Vec mostAnomalousCorrelate; - calculator.addProbability(feature, 0, model, 0 /*elapsedTime*/, params_, time, value, p, tail, type, mostAnomalousCorrelate); + calculator.addProbability(feature, 0, model, 0 /*elapsedTime*/, params_, time, + value, p, tail, type, mostAnomalousCorrelate); LOG_DEBUG(<< " p = " << p); pJoint.add(p); @@ -276,7 +291,8 @@ void testProbabilityAndGetInfluences(model_t::EFeature feature, CPPUNIT_ASSERT(pJoint.calculate(pj)); CPPUNIT_ASSERT(pExtreme.calculate(pe)); - LOG_DEBUG(<< " probability = " << probability << ", expected probability = " << std::min(pj, pe)); + LOG_DEBUG(<< " probability = " << probability + << ", expected probability = " << std::min(pj, pe)); CPPUNIT_ASSERT_DOUBLES_EQUAL(std::min(pe, pj), probability, 1e-10); } } @@ -293,7 +309,8 @@ void CProbabilityAndInfluenceCalculatorTest::testInfluenceUnavailableCalculator( model::CInfluenceUnavailableCalculator calculator; maths::CTimeSeriesDecomposition trend{0.0, bucketLength}; - maths::CNormalMeanPrecConjugate prior = maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); + maths::CNormalMeanPrecConjugate prior = + maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); maths::CUnivariateTimeSeriesModel model(params(bucketLength), 0, trend, prior); TDoubleVec samples; @@ -301,20 +318,15 @@ void CProbabilityAndInfluenceCalculatorTest::testInfluenceUnavailableCalculator( addSamples(bucketLength, samples, model); TStrCRefDouble1VecDoublePrPrVec influencerValues{ - {TStrCRef(i1), make_pair(11.0, 1.0)}, {TStrCRef(i2), make_pair(11.0, 1.0)}, {TStrCRef(i3), make_pair(15.0, 1.0)}}; + {TStrCRef(i1), make_pair(11.0, 1.0)}, + {TStrCRef(i2), make_pair(11.0, 1.0)}, + {TStrCRef(i3), make_pair(15.0, 1.0)}}; TStoredStringPtrStoredStringPtrPrDoublePrVec influences; - computeInfluences(calculator, - model_t::E_IndividualLowCountsByBucketAndPerson, - model, - 0 /*time*/, - 15.0 /*value*/, - 1.0 /*count*/, - 0.001 /*probability*/, - TTail2Vec{maths_t::E_RightTail}, - I, - influencerValues, - influences); + computeInfluences(calculator, model_t::E_IndividualLowCountsByBucketAndPerson, + model, 0 /*time*/, 15.0 /*value*/, 1.0 /*count*/, + 0.001 /*probability*/, TTail2Vec{maths_t::E_RightTail}, + I, influencerValues, influences); LOG_DEBUG(<< "influences = " << core::CContainerPrinter::print(influences)); CPPUNIT_ASSERT(influences.empty()); @@ -375,7 +387,8 @@ void CProbabilityAndInfluenceCalculatorTest::testLogProbabilityComplementInfluen LOG_DEBUG(<< "One influencer value"); maths::CTimeSeriesDecomposition trend{0.0, bucketLength}; - maths::CNormalMeanPrecConjugate prior = maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); + maths::CNormalMeanPrecConjugate prior = + maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); maths::CUnivariateTimeSeriesModel model(params(bucketLength), 0, trend, prior); TDoubleVec samples; @@ -386,29 +399,24 @@ void CProbabilityAndInfluenceCalculatorTest::testLogProbabilityComplementInfluen TTail2Vec tail; computeProbability(now, maths_t::E_TwoSided, TDouble1Vec{20.0}, model, p, tail); - TStrCRefDouble1VecDoublePrPrVec influencerValues{{TStrCRef(i1), make_pair(10.0, 1.0)}}; + TStrCRefDouble1VecDoublePrPrVec influencerValues{ + {TStrCRef(i1), make_pair(10.0, 1.0)}}; TStoredStringPtrStoredStringPtrPrDoublePrVec influences; - computeInfluences(calculator, - model_t::E_IndividualCountByBucketAndPerson, - model, - 0 /*time*/, - 20.0 /*value*/, - 1.0 /*count*/, - p, - tail, - I, - influencerValues, - influences); + computeInfluences(calculator, model_t::E_IndividualCountByBucketAndPerson, + model, 0 /*time*/, 20.0 /*value*/, 1.0 /*count*/, + p, tail, I, influencerValues, influences); LOG_DEBUG(<< " influences = " << core::CContainerPrinter::print(influences)); - CPPUNIT_ASSERT_EQUAL(std::string("[((I, i1), 1)]"), core::CContainerPrinter::print(influences)); + CPPUNIT_ASSERT_EQUAL(std::string("[((I, i1), 1)]"), + core::CContainerPrinter::print(influences)); } { LOG_DEBUG(<< "No trend"); maths::CTimeSeriesDecomposition trend{0.0, bucketLength}; - maths::CNormalMeanPrecConjugate prior = maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); + maths::CNormalMeanPrecConjugate prior = + maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); maths::CUnivariateTimeSeriesModel model(params(bucketLength), 0, trend, prior); TDoubleVec samples; @@ -420,29 +428,25 @@ void CProbabilityAndInfluenceCalculatorTest::testLogProbabilityComplementInfluen computeProbability(now, maths_t::E_TwoSided, TDouble1Vec{20.0}, model, p, tail); TStrCRefDouble1VecDoublePrPrVec influencerValues{ - {TStrCRef(i1), make_pair(1.0, 1.0)}, {TStrCRef(i2), make_pair(1.0, 1.0)}, {TStrCRef(i3), make_pair(18.0, 1.0)}}; + {TStrCRef(i1), make_pair(1.0, 1.0)}, + {TStrCRef(i2), make_pair(1.0, 1.0)}, + {TStrCRef(i3), make_pair(18.0, 1.0)}}; TStoredStringPtrStoredStringPtrPrDoublePrVec influences; - computeInfluences(calculator, - model_t::E_IndividualCountByBucketAndPerson, - model, - 0 /*time*/, - 20.0 /*value*/, - 1.0 /*count*/, - p, - tail, - I, - influencerValues, - influences); + computeInfluences(calculator, model_t::E_IndividualCountByBucketAndPerson, + model, 0 /*time*/, 20.0 /*value*/, 1.0 /*count*/, + p, tail, I, influencerValues, influences); LOG_DEBUG(<< " influences = " << core::CContainerPrinter::print(influences)); - CPPUNIT_ASSERT_EQUAL(std::string("[((I, i3), 1)]"), core::CContainerPrinter::print(influences)); + CPPUNIT_ASSERT_EQUAL(std::string("[((I, i3), 1)]"), + core::CContainerPrinter::print(influences)); } { LOG_DEBUG(<< "Trend"); maths::CTimeSeriesDecomposition trend{0.0, bucketLength}; - maths::CNormalMeanPrecConjugate prior = maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); + maths::CNormalMeanPrecConjugate prior = + maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); maths::CUnivariateTimeSeriesModel model(params(bucketLength), 0, trend, prior); TDoubleVec samples; @@ -450,14 +454,17 @@ void CProbabilityAndInfluenceCalculatorTest::testLogProbabilityComplementInfluen rng.generateNormalSamples(0.0, 100.0, 10 * 86400 / 600, samples); core_t::TTime time{0}; for (auto& sample : samples) { - sample += 100.0 + 100.0 * std::sin(2.0 * 3.1416 * static_cast(time) / 86400.0); + sample += 100.0 + 100.0 * std::sin(2.0 * 3.1416 * + static_cast(time) / 86400.0); time += bucketLength; } } addSamples(bucketLength, samples, model); TTimeVec testTimes{0, 86400 / 4, 86400 / 2, (3 * 86400) / 4}; - TStrCRefDouble1VecDoublePrPrVec influencerValues{{TStrCRef(i1), make_pair(70.0, 1.0)}, {TStrCRef(i2), make_pair(50.0, 1.0)}}; + TStrCRefDouble1VecDoublePrPrVec influencerValues{ + {TStrCRef(i1), make_pair(70.0, 1.0)}, + {TStrCRef(i2), make_pair(50.0, 1.0)}}; std::string expectedInfluencerValues[]{"i1", "i2"}; TDoubleVecVec expectedInfluences{{1.0, 1.0}, {0.0, 0.0}, {1.0, 1.0}, {0.8, 0.6}}; @@ -468,26 +475,21 @@ void CProbabilityAndInfluenceCalculatorTest::testLogProbabilityComplementInfluen double p; TTail2Vec tail; - computeProbability(time, maths_t::E_TwoSided, TDouble2Vec{120.0}, model, p, tail); + computeProbability(time, maths_t::E_TwoSided, + TDouble2Vec{120.0}, model, p, tail); LOG_DEBUG(<< " p = " << p << ", tail = " << tail); TStoredStringPtrStoredStringPtrPrDoublePrVec influences; - computeInfluences(calculator, - model_t::E_IndividualCountByBucketAndPerson, - model, - time, - 120.0 /*value*/, - 1.0 /*count*/, - p, - tail, - I, - influencerValues, - influences); + computeInfluences(calculator, model_t::E_IndividualCountByBucketAndPerson, + model, time, 120.0 /*value*/, 1.0 /*count*/, + p, tail, I, influencerValues, influences); LOG_DEBUG(<< " influences = " << core::CContainerPrinter::print(influences)); for (std::size_t j = 0u; j < influences.size(); ++j) { - CPPUNIT_ASSERT_EQUAL(expectedInfluencerValues[j], *influences[j].first.second); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedInfluences[i][j], influences[j].second, 0.06); + CPPUNIT_ASSERT_EQUAL(expectedInfluencerValues[j], + *influences[j].first.second); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedInfluences[i][j], + influences[j].second, 0.06); } } } @@ -708,7 +710,8 @@ void CProbabilityAndInfluenceCalculatorTest::testMeanInfluenceCalculator() { LOG_DEBUG(<< "One influencer value"); maths::CTimeSeriesDecomposition trend{0.0, bucketLength}; - maths::CNormalMeanPrecConjugate prior = maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); + maths::CNormalMeanPrecConjugate prior = + maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); maths::CUnivariateTimeSeriesModel model(params(bucketLength), 0, trend, prior); TDoubleVec samples; @@ -719,29 +722,24 @@ void CProbabilityAndInfluenceCalculatorTest::testMeanInfluenceCalculator() { TTail2Vec tail; computeProbability(now, maths_t::E_TwoSided, TDouble1Vec{20.0}, model, p, tail); - TStrCRefDouble1VecDoublePrPrVec influencerValues{{TStrCRef(i1), make_pair(5.0, 1.0)}}; + TStrCRefDouble1VecDoublePrPrVec influencerValues{ + {TStrCRef(i1), make_pair(5.0, 1.0)}}; TStoredStringPtrStoredStringPtrPrDoublePrVec influences; - computeInfluences(calculator, - model_t::E_IndividualMeanByPerson, - model, - 0 /*time*/, - 5.0 /*value*/, - 1.0 /*count*/, - p, - tail, - I, - influencerValues, - influences); + computeInfluences(calculator, model_t::E_IndividualMeanByPerson, + model, 0 /*time*/, 5.0 /*value*/, 1.0 /*count*/, + p, tail, I, influencerValues, influences); LOG_DEBUG(<< " influences = " << core::CContainerPrinter::print(influences)); - CPPUNIT_ASSERT_EQUAL(std::string("[((I, i1), 1)]"), core::CContainerPrinter::print(influences)); + CPPUNIT_ASSERT_EQUAL(std::string("[((I, i1), 1)]"), + core::CContainerPrinter::print(influences)); } { LOG_DEBUG(<< "No trend"); maths::CTimeSeriesDecomposition trend{0.0, bucketLength}; - maths::CNormalMeanPrecConjugate prior = maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); + maths::CNormalMeanPrecConjugate prior = + maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); maths::CUnivariateTimeSeriesModel model(params(bucketLength), 0, trend, prior); TDoubleVec samples; @@ -753,49 +751,39 @@ void CProbabilityAndInfluenceCalculatorTest::testMeanInfluenceCalculator() { double p; TTail2Vec tail; - computeProbability(now, maths_t::E_TwoSided, TDouble1Vec{12.5}, model, p, tail); + computeProbability(now, maths_t::E_TwoSided, TDouble1Vec{12.5}, + model, p, tail); TStrCRefDouble1VecDoublePrPrVec influencerValues{ - {TStrCRef(i1), make_pair(20.0, 5.0)}, {TStrCRef(i2), make_pair(10.0, 7.0)}, {TStrCRef(i3), make_pair(10.0, 8.0)}}; + {TStrCRef(i1), make_pair(20.0, 5.0)}, + {TStrCRef(i2), make_pair(10.0, 7.0)}, + {TStrCRef(i3), make_pair(10.0, 8.0)}}; TStoredStringPtrStoredStringPtrPrDoublePrVec influences; - computeInfluences(calculator, - model_t::E_IndividualMeanByPerson, - model, - 0 /*time*/, - 12.5 /*value*/, - 20.0 /*count*/, - p, - tail, - I, - influencerValues, - influences); + computeInfluences(calculator, model_t::E_IndividualMeanByPerson, + model, 0 /*time*/, 12.5 /*value*/, 20.0 /*count*/, + p, tail, I, influencerValues, influences); LOG_DEBUG(<< " influences = " << core::CContainerPrinter::print(influences)); - CPPUNIT_ASSERT_EQUAL(std::string("[((I, i1), 1)]"), core::CContainerPrinter::print(influences)); + CPPUNIT_ASSERT_EQUAL(std::string("[((I, i1), 1)]"), + core::CContainerPrinter::print(influences)); } { LOG_DEBUG(<< "Right tail, no clear influences"); double p; TTail2Vec tail; - computeProbability(now, maths_t::E_TwoSided, TDouble1Vec{15.0}, model, p, tail); + computeProbability(now, maths_t::E_TwoSided, TDouble1Vec{15.0}, + model, p, tail); - TStrCRefDouble1VecDoublePrPrVec influencerValues{{TStrCRef(i1), make_pair(15.0, 5.0)}, - {TStrCRef(i2), make_pair(15.0, 6.0)}}; + TStrCRefDouble1VecDoublePrPrVec influencerValues{ + {TStrCRef(i1), make_pair(15.0, 5.0)}, + {TStrCRef(i2), make_pair(15.0, 6.0)}}; TStoredStringPtrStoredStringPtrPrDoublePrVec influences; - computeInfluences(calculator, - model_t::E_IndividualMeanByPerson, - model, - 0 /*time*/, - 15.0 /*value*/, - 11.0 /*count*/, - p, - tail, - I, - influencerValues, - influences); + computeInfluences(calculator, model_t::E_IndividualMeanByPerson, + model, 0 /*time*/, 15.0 /*value*/, 11.0 /*count*/, + p, tail, I, influencerValues, influences); LOG_DEBUG(<< " influences = " << core::CContainerPrinter::print(influences)); CPPUNIT_ASSERT(influences.empty()); @@ -805,22 +793,17 @@ void CProbabilityAndInfluenceCalculatorTest::testMeanInfluenceCalculator() { double p; TTail2Vec tail; - computeProbability(now, maths_t::E_TwoSided, TDouble1Vec{5.0}, model, p, tail); + computeProbability(now, maths_t::E_TwoSided, TDouble1Vec{5.0}, + model, p, tail); - TStrCRefDouble1VecDoublePrPrVec influencerValues{{TStrCRef(i1), make_pair(5.0, 5.0)}, {TStrCRef(i2), make_pair(5.0, 6.0)}}; + TStrCRefDouble1VecDoublePrPrVec influencerValues{ + {TStrCRef(i1), make_pair(5.0, 5.0)}, + {TStrCRef(i2), make_pair(5.0, 6.0)}}; TStoredStringPtrStoredStringPtrPrDoublePrVec influences; - computeInfluences(calculator, - model_t::E_IndividualMeanByPerson, - model, - 0 /*time*/, - 5.0 /*value*/, - 11.0 /*count*/, - p, - tail, - I, - influencerValues, - influences); + computeInfluences(calculator, model_t::E_IndividualMeanByPerson, + model, 0 /*time*/, 5.0 /*value*/, 11.0 /*count*/, + p, tail, I, influencerValues, influences); LOG_DEBUG(<< " influences = " << core::CContainerPrinter::print(influences)); CPPUNIT_ASSERT(influences.empty()); @@ -830,23 +813,18 @@ void CProbabilityAndInfluenceCalculatorTest::testMeanInfluenceCalculator() { double p; TTail2Vec tail; - computeProbability(now, maths_t::E_TwoSided, TDouble1Vec{8.0}, model, p, tail); + computeProbability(now, maths_t::E_TwoSided, TDouble1Vec{8.0}, + model, p, tail); TStrCRefDouble1VecDoublePrPrVec influencerValues{ - {TStrCRef(i1), make_pair(5.0, 9.0)}, {TStrCRef(i2), make_pair(11.0, 20.0)}, {TStrCRef(i3), make_pair(5.0, 11.0)}}; + {TStrCRef(i1), make_pair(5.0, 9.0)}, + {TStrCRef(i2), make_pair(11.0, 20.0)}, + {TStrCRef(i3), make_pair(5.0, 11.0)}}; TStoredStringPtrStoredStringPtrPrDoublePrVec influences; - computeInfluences(calculator, - model_t::E_IndividualMeanByPerson, - model, - 0 /*time*/, - 8.0 /*value*/, - 40.0 /*count*/, - p, - tail, - I, - influencerValues, - influences); + computeInfluences(calculator, model_t::E_IndividualMeanByPerson, + model, 0 /*time*/, 8.0 /*value*/, 40.0 /*count*/, + p, tail, I, influencerValues, influences); LOG_DEBUG(<< " influences = " << core::CContainerPrinter::print(influences)); CPPUNIT_ASSERT_EQUAL(std::size_t(2), influences.size()); @@ -1083,7 +1061,8 @@ void CProbabilityAndInfluenceCalculatorTest::testLogProbabilityInfluenceCalculat LOG_DEBUG(<< "One influencer value"); maths::CTimeSeriesDecomposition trend{0.0, bucketLength}; - maths::CNormalMeanPrecConjugate prior = maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); + maths::CNormalMeanPrecConjugate prior = + maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); maths::CUnivariateTimeSeriesModel model(params(bucketLength), 0, trend, prior); TDoubleVec samples; @@ -1094,29 +1073,24 @@ void CProbabilityAndInfluenceCalculatorTest::testLogProbabilityInfluenceCalculat TTail2Vec tail; computeProbability(now, maths_t::E_TwoSided, TDouble1Vec{5.0}, model, p, tail); - TStrCRefDouble1VecDoublePrPrVec influencerValues{{TStrCRef(i1), make_pair(5.0, 1.0)}}; + TStrCRefDouble1VecDoublePrPrVec influencerValues{ + {TStrCRef(i1), make_pair(5.0, 1.0)}}; TStoredStringPtrStoredStringPtrPrDoublePrVec influences; - computeInfluences(calculator, - model_t::E_IndividualUniqueCountByBucketAndPerson, - model, - now /*time*/, - 5.0 /*value*/, - 1.0 /*count*/, - p, - tail, - I, - influencerValues, - influences); + computeInfluences(calculator, model_t::E_IndividualUniqueCountByBucketAndPerson, + model, now /*time*/, 5.0 /*value*/, 1.0 /*count*/, + p, tail, I, influencerValues, influences); LOG_DEBUG(<< " influences = " << core::CContainerPrinter::print(influences)); - CPPUNIT_ASSERT_EQUAL(std::string("[((I, i1), 1)]"), core::CContainerPrinter::print(influences)); + CPPUNIT_ASSERT_EQUAL(std::string("[((I, i1), 1)]"), + core::CContainerPrinter::print(influences)); } { LOG_DEBUG(<< "No trend"); maths::CTimeSeriesDecomposition trend{0.0, bucketLength}; - maths::CNormalMeanPrecConjugate prior = maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); + maths::CNormalMeanPrecConjugate prior = + maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); maths::CUnivariateTimeSeriesModel model(params(bucketLength), 0, trend, prior); TDoubleVec samples; @@ -1128,29 +1102,25 @@ void CProbabilityAndInfluenceCalculatorTest::testLogProbabilityInfluenceCalculat computeProbability(now, maths_t::E_TwoSided, TDouble1Vec{6.0}, model, p, tail); TStrCRefDouble1VecDoublePrPrVec influencerValues{ - {TStrCRef(i1), make_pair(9.0, 1.0)}, {TStrCRef(i2), make_pair(6.0, 1.0)}, {TStrCRef(i3), make_pair(6.0, 1.0)}}; + {TStrCRef(i1), make_pair(9.0, 1.0)}, + {TStrCRef(i2), make_pair(6.0, 1.0)}, + {TStrCRef(i3), make_pair(6.0, 1.0)}}; TStoredStringPtrStoredStringPtrPrDoublePrVec influences; - computeInfluences(calculator, - model_t::E_IndividualUniqueCountByBucketAndPerson, - model, - now /*time*/, - 6.0 /*value*/, - 1.0 /*count*/, - p, - tail, - I, - influencerValues, - influences); + computeInfluences(calculator, model_t::E_IndividualUniqueCountByBucketAndPerson, + model, now /*time*/, 6.0 /*value*/, 1.0 /*count*/, + p, tail, I, influencerValues, influences); LOG_DEBUG(<< " influences = " << core::CContainerPrinter::print(influences)); - CPPUNIT_ASSERT_EQUAL(std::string("[((I, i2), 1), ((I, i3), 1)]"), core::CContainerPrinter::print(influences)); + CPPUNIT_ASSERT_EQUAL(std::string("[((I, i2), 1), ((I, i3), 1)]"), + core::CContainerPrinter::print(influences)); } { LOG_DEBUG(<< "Trend"); maths::CTimeSeriesDecomposition trend{0.0, bucketLength}; - maths::CNormalMeanPrecConjugate prior = maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); + maths::CNormalMeanPrecConjugate prior = + maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); maths::CUnivariateTimeSeriesModel model(params(bucketLength), 0, trend, prior); TDoubleVec samples; @@ -1158,7 +1128,8 @@ void CProbabilityAndInfluenceCalculatorTest::testLogProbabilityInfluenceCalculat rng.generateNormalSamples(0.0, 100.0, 10 * 86400 / 600, samples); core_t::TTime time{0}; for (auto& sample : samples) { - sample += 100.0 + 100.0 * std::sin(2.0 * 3.1416 * static_cast(time) / 86400.0); + sample += 100.0 + 100.0 * std::sin(2.0 * 3.1416 * + static_cast(time) / 86400.0); time += bucketLength; } } @@ -1166,7 +1137,9 @@ void CProbabilityAndInfluenceCalculatorTest::testLogProbabilityInfluenceCalculat addSamples(bucketLength, samples, model); TTimeVec testTimes{0, 86400 / 4, 86400 / 2, (3 * 86400) / 4}; - TStrCRefDouble1VecDoublePrPrVec influencerValues{{TStrCRef(i1), make_pair(60.0, 1.0)}, {TStrCRef(i2), make_pair(50.0, 1.0)}}; + TStrCRefDouble1VecDoublePrPrVec influencerValues{ + {TStrCRef(i1), make_pair(60.0, 1.0)}, + {TStrCRef(i2), make_pair(50.0, 1.0)}}; std::string expectedInfluencerValues[] = {"i1", "i2"}; TDoubleVecVec expectedInfluences{{1.0, 1.0}, {1.0, 1.0}, {1.0, 1.0}, {1.0, 0.7}}; @@ -1176,27 +1149,23 @@ void CProbabilityAndInfluenceCalculatorTest::testLogProbabilityInfluenceCalculat double p; TTail2Vec tail; - computeProbability(time, maths_t::E_TwoSided, TDouble2Vec{60.0}, model, p, tail); + computeProbability(time, maths_t::E_TwoSided, TDouble2Vec{60.0}, + model, p, tail); LOG_DEBUG(<< " p = " << p << ", tail = " << tail); TStoredStringPtrStoredStringPtrPrDoublePrVec influences; - computeInfluences(calculator, - model_t::E_IndividualHighUniqueCountByBucketAndPerson, - model, - time, - 60.0 /*value*/, - 1.0 /*count*/, - p, - tail, - I, - influencerValues, - influences); + computeInfluences(calculator, model_t::E_IndividualHighUniqueCountByBucketAndPerson, + model, time, 60.0 /*value*/, 1.0 /*count*/, p, + tail, I, influencerValues, influences); LOG_DEBUG(<< " influences = " << core::CContainerPrinter::print(influences)); - std::sort(influences.begin(), influences.end(), maths::COrderings::SFirstLess()); + std::sort(influences.begin(), influences.end(), + maths::COrderings::SFirstLess()); for (std::size_t j = 0u; j < influences.size(); ++j) { - CPPUNIT_ASSERT_EQUAL(expectedInfluencerValues[j], *influences[j].first.second); - CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedInfluences[i][j], influences[j].second, 0.03); + CPPUNIT_ASSERT_EQUAL(expectedInfluencerValues[j], + *influences[j].first.second); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedInfluences[i][j], + influences[j].second, 0.03); } } } @@ -1419,27 +1388,24 @@ void CProbabilityAndInfluenceCalculatorTest::testIndicatorInfluenceCalculator() model::CIndicatorInfluenceCalculator calculator; maths::CTimeSeriesDecomposition trend{0.0, 600}; - maths::CNormalMeanPrecConjugate prior = maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); + maths::CNormalMeanPrecConjugate prior = + maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); maths::CUnivariateTimeSeriesModel model(params(600), 0, trend, prior); TStrCRefDouble1VecDoublePrPrVec influencerValues{ - {TStrCRef(i1), make_pair(1.0, 1.0)}, {TStrCRef(i2), make_pair(1.0, 1.0)}, {TStrCRef(i3), make_pair(1.0, 1.0)}}; + {TStrCRef(i1), make_pair(1.0, 1.0)}, + {TStrCRef(i2), make_pair(1.0, 1.0)}, + {TStrCRef(i3), make_pair(1.0, 1.0)}}; TStoredStringPtrStoredStringPtrPrDoublePrVec influences; - computeInfluences(calculator, - model_t::E_IndividualIndicatorOfBucketPerson, - model, - 0 /*time*/, - 1.0 /*value*/, - 1.0 /*count*/, - 0.1 /*probability*/, - TTail2Vec{maths_t::E_RightTail}, - I, - influencerValues, - influences); + computeInfluences(calculator, model_t::E_IndividualIndicatorOfBucketPerson, + model, 0 /*time*/, 1.0 /*value*/, 1.0 /*count*/, + 0.1 /*probability*/, TTail2Vec{maths_t::E_RightTail}, + I, influencerValues, influences); LOG_DEBUG(<< "influences = " << core::CContainerPrinter::print(influences)); - CPPUNIT_ASSERT_EQUAL(std::string("[((I, i1), 1), ((I, i2), 1), ((I, i3), 1)]"), core::CContainerPrinter::print(influences)); + CPPUNIT_ASSERT_EQUAL(std::string("[((I, i1), 1), ((I, i2), 1), ((I, i3), 1)]"), + core::CContainerPrinter::print(influences)); } /*{ LOG_DEBUG(<< "Test correlated"); @@ -1478,11 +1444,13 @@ void CProbabilityAndInfluenceCalculatorTest::testProbabilityAndInfluenceCalculat core_t::TTime bucketLength{600}; maths::CTimeSeriesDecomposition trend{0.0, bucketLength}; - maths::CNormalMeanPrecConjugate prior = maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); + maths::CNormalMeanPrecConjugate prior = + maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData); maths::CMultivariateNormalConjugate<2> multivariatePrior = maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData); maths::CUnivariateTimeSeriesModel univariateModel(params(bucketLength), 0, trend, prior); - maths::CMultivariateTimeSeriesModel multivariateModel(params(bucketLength), trend, multivariatePrior); + maths::CMultivariateTimeSeriesModel multivariateModel(params(bucketLength), + trend, multivariatePrior); TDoubleVec samples; rng.generateNormalSamples(10.0, 1.0, 50, samples); @@ -1494,7 +1462,8 @@ void CProbabilityAndInfluenceCalculatorTest::testProbabilityAndInfluenceCalculat rng.generateMultivariateNormalSamples(mean, covariances, 50, multivariateSamples); core_t::TTime now{addSamples(bucketLength, multivariateSamples, multivariateModel)}; - model_t::TFeatureVec features{model_t::E_IndividualSumByBucketAndPerson, model_t::E_IndividualMeanLatLongByPerson}; + model_t::TFeatureVec features{model_t::E_IndividualSumByBucketAndPerson, + model_t::E_IndividualMeanLatLongByPerson}; const maths::CModel* models[]{&univariateModel, &multivariateModel}; maths_t::TWeightStyleVec weightStyles; @@ -1509,34 +1478,32 @@ void CProbabilityAndInfluenceCalculatorTest::testProbabilityAndInfluenceCalculat calculator.addAggregator(maths::CJointProbabilityOfLessLikelySamples()); calculator.addAggregator(maths::CProbabilityOfExtremeSample()); - TDoubleVecVec values{{12.0, 1.0}, - {15.0, 1.0}, - {7.0, 1.5}, - {9.0, 1.0}, - {17.0, 2.0}, - {12.0, 17.0, 1.0}, - {15.0, 20.0, 1.0}, - {7.0, 12.0, 1.5}, - {15.0, 10.0, 1.0}, - {17.0, 22.0, 2.0}}; - TStrCRefDouble1VecDoublePrPrVec influencerValues{{TStrCRef(i2), make_pair(12.0, 1.0)}, - {TStrCRef(i1), make_pair(15.0, 1.0)}, - {TStrCRef(i2), make_pair(7.0, 1.0)}, - {TStrCRef(i2), make_pair(9.0, 1.0)}, - {TStrCRef(i1), make_pair(17.0, 1.0)}, - {TStrCRef(i2), make_pair(12.0, 17.0, 1.0)}, - {TStrCRef(i1), make_pair(15.0, 20.0, 1.0)}, - {TStrCRef(i2), make_pair(7.0, 12.0, 1.0)}, - {TStrCRef(i2), make_pair(9.0, 14.0, 1.0)}, - {TStrCRef(i1), make_pair(17.0, 22.0, 1.0)}}; + TDoubleVecVec values{{12.0, 1.0}, {15.0, 1.0}, + {7.0, 1.5}, {9.0, 1.0}, + {17.0, 2.0}, {12.0, 17.0, 1.0}, + {15.0, 20.0, 1.0}, {7.0, 12.0, 1.5}, + {15.0, 10.0, 1.0}, {17.0, 22.0, 2.0}}; + TStrCRefDouble1VecDoublePrPrVec influencerValues{ + {TStrCRef(i2), make_pair(12.0, 1.0)}, + {TStrCRef(i1), make_pair(15.0, 1.0)}, + {TStrCRef(i2), make_pair(7.0, 1.0)}, + {TStrCRef(i2), make_pair(9.0, 1.0)}, + {TStrCRef(i1), make_pair(17.0, 1.0)}, + {TStrCRef(i2), make_pair(12.0, 17.0, 1.0)}, + {TStrCRef(i1), make_pair(15.0, 20.0, 1.0)}, + {TStrCRef(i2), make_pair(7.0, 12.0, 1.0)}, + {TStrCRef(i2), make_pair(9.0, 14.0, 1.0)}, + {TStrCRef(i1), make_pair(17.0, 22.0, 1.0)}}; maths::CJointProbabilityOfLessLikelySamples pJoint; maths::CProbabilityOfExtremeSample pExtreme; for (std::size_t i = 0u; i < 5; ++i) { for (std::size_t j = 0u; j < features.size(); ++j) { - TDouble2Vec1Vec value{TDouble2Vec(&values[i + 5 * j][0], &values[i + 5 * j][1 + j])}; - TDouble2Vec4Vec weights{TDouble2Vec(1 + j, values[i + 5 * j][1 + j]), TDouble2Vec(1 + j, 1.0)}; + TDouble2Vec1Vec value{TDouble2Vec(&values[i + 5 * j][0], + &values[i + 5 * j][1 + j])}; + TDouble2Vec4Vec weights{TDouble2Vec(1 + j, values[i + 5 * j][1 + j]), + TDouble2Vec(1 + j, 1.0)}; maths::CModelProbabilityParams params_; params_.addCalculation(maths_t::E_TwoSided) .seasonalConfidenceInterval(0.0) @@ -1547,17 +1514,9 @@ void CProbabilityAndInfluenceCalculatorTest::testProbabilityAndInfluenceCalculat TTail2Vec tail; model_t::CResultType type; TSize1Vec mostAnomalousCorrelate; - calculator.addProbability(features[j], - 0, - *models[j], - 0 /*elapsedTime*/, - params_, - TTime2Vec1Vec{TTime2Vec{now}}, - value, - p, - tail, - type, - mostAnomalousCorrelate); + calculator.addProbability(features[j], 0, *models[j], 0 /*elapsedTime*/, + params_, TTime2Vec1Vec{TTime2Vec{now}}, value, + p, tail, type, mostAnomalousCorrelate); pJoint.add(p); pExtreme.add(p); model::CProbabilityAndInfluenceCalculator::SParams params(partitioningFields); @@ -1570,7 +1529,8 @@ void CProbabilityAndInfluenceCalculatorTest::testProbabilityAndInfluenceCalculat params.s_ComputeProbabilityParams = params_; params.s_Probability = p; params.s_Tail = tail; - calculator.addInfluences(I, TStrCRefDouble1VecDoublePrPrVec{influencerValues[i]}, params); + calculator.addInfluences( + I, TStrCRefDouble1VecDoublePrPrVec{influencerValues[i]}, params); } } @@ -1586,28 +1546,37 @@ void CProbabilityAndInfluenceCalculatorTest::testProbabilityAndInfluenceCalculat CPPUNIT_ASSERT(pJoint.calculate(pj)); CPPUNIT_ASSERT(pExtreme.calculate(pe)); - LOG_DEBUG(<< " probability = " << probability << ", expected probability = " << std::min(pj, pe)); + LOG_DEBUG(<< " probability = " << probability + << ", expected probability = " << std::min(pj, pe)); CPPUNIT_ASSERT_DOUBLES_EQUAL(std::min(pe, pj), probability, 1e-10); } { LOG_DEBUG(<< "influencing joint probability"); - TDoubleVecVec values[]{TDoubleVecVec{{12.0, 1.0}, {15.0, 1.0}, {7.0, 1.5}, {9.0, 1.0}, {17.0, 2.0}}, - TDoubleVecVec{{12.0, 17.0, 1.0}, {15.0, 20.0, 1.0}, {7.0, 12.0, 1.5}, {9.0, 14.0, 1.0}, {17.0, 22.0, 2.0}}}; + TDoubleVecVec values[]{ + TDoubleVecVec{{12.0, 1.0}, {15.0, 1.0}, {7.0, 1.5}, {9.0, 1.0}, {17.0, 2.0}}, + TDoubleVecVec{{12.0, 17.0, 1.0}, + {15.0, 20.0, 1.0}, + {7.0, 12.0, 1.5}, + {9.0, 14.0, 1.0}, + {17.0, 22.0, 2.0}}}; TStrCRefDouble1VecDoublePrPrVecVec influencerValues[]{ - TStrCRefDouble1VecDoublePrPrVecVec{TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i2), make_pair(12.0, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(15.0, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i2), make_pair(7.0, 1.5)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i2), make_pair(9.0, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(17.0, 2.0)}}}, - TStrCRefDouble1VecDoublePrPrVecVec{TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i2), make_pair(12.0, 17.0, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(15.0, 20.0, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i2), make_pair(7.0, 12.0, 1.5)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i2), make_pair(9.0, 14.0, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(17.0, 22.0, 2.0)}}}}; + TStrCRefDouble1VecDoublePrPrVecVec{ + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i2), make_pair(12.0, 1.0)}}, + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(15.0, 1.0)}}, + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i2), make_pair(7.0, 1.5)}}, + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i2), make_pair(9.0, 1.0)}}, + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(17.0, 2.0)}}}, + TStrCRefDouble1VecDoublePrPrVecVec{ + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i2), make_pair(12.0, 17.0, 1.0)}}, + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(15.0, 20.0, 1.0)}}, + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i2), make_pair(7.0, 12.0, 1.5)}}, + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i2), make_pair(9.0, 14.0, 1.0)}}, + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(17.0, 22.0, 2.0)}}}}; for (std::size_t i = 0u; i < features.size(); ++i) { TStoredStringPtrStoredStringPtrPrDoublePrVec influences; - testProbabilityAndGetInfluences(features[i], *models[i], now, values[i], influencerValues[i], influences); + testProbabilityAndGetInfluences(features[i], *models[i], now, values[i], + influencerValues[i], influences); LOG_DEBUG(<< " influences = " << core::CContainerPrinter::print(influences)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), influences.size()); CPPUNIT_ASSERT_EQUAL(i1, *influences[0].first.second); @@ -1617,23 +1586,31 @@ void CProbabilityAndInfluenceCalculatorTest::testProbabilityAndInfluenceCalculat { LOG_DEBUG(<< "influencing extreme probability"); - TDoubleVecVec values[]{TDoubleVecVec{{11.0, 1.0}, {10.5, 1.0}, {8.5, 1.5}, {10.8, 1.5}, {19.0, 1.0}}, - TDoubleVecVec{{11.0, 16.0, 1.0}, {10.5, 15.5, 1.0}, {8.5, 13.5, 1.5}, {10.8, 15.8, 1.5}, {19.0, 24.0, 1.0}}}; + TDoubleVecVec values[]{ + TDoubleVecVec{{11.0, 1.0}, {10.5, 1.0}, {8.5, 1.5}, {10.8, 1.5}, {19.0, 1.0}}, + TDoubleVecVec{{11.0, 16.0, 1.0}, + {10.5, 15.5, 1.0}, + {8.5, 13.5, 1.5}, + {10.8, 15.8, 1.5}, + {19.0, 24.0, 1.0}}}; TStrCRefDouble1VecDoublePrPrVecVec influencerValues[]{ - TStrCRefDouble1VecDoublePrPrVecVec{TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(11.0, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(10.5, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(8.5, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(10.8, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i2), make_pair(19.0, 1.0)}}}, - TStrCRefDouble1VecDoublePrPrVecVec{TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(11.0, 16.0, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(10.5, 15.5, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(8.5, 13.5, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(10.8, 15.8, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i2), make_pair(19.0, 24.0, 1.0)}}}}; + TStrCRefDouble1VecDoublePrPrVecVec{ + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(11.0, 1.0)}}, + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(10.5, 1.0)}}, + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(8.5, 1.0)}}, + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(10.8, 1.0)}}, + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i2), make_pair(19.0, 1.0)}}}, + TStrCRefDouble1VecDoublePrPrVecVec{ + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(11.0, 16.0, 1.0)}}, + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(10.5, 15.5, 1.0)}}, + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(8.5, 13.5, 1.0)}}, + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(10.8, 15.8, 1.0)}}, + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i2), make_pair(19.0, 24.0, 1.0)}}}}; for (std::size_t i = 0u; i < features.size(); ++i) { TStoredStringPtrStoredStringPtrPrDoublePrVec influences; - testProbabilityAndGetInfluences(features[i], *models[i], now, values[i], influencerValues[i], influences); + testProbabilityAndGetInfluences(features[i], *models[i], now, values[i], + influencerValues[i], influences); LOG_DEBUG(<< " influences = " << core::CContainerPrinter::print(influences)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), influences.size()); CPPUNIT_ASSERT_EQUAL(i2, *influences[0].first.second); @@ -1643,26 +1620,43 @@ void CProbabilityAndInfluenceCalculatorTest::testProbabilityAndInfluenceCalculat { LOG_DEBUG(<< "marginal influence"); - TDoubleVecVec values[]{TDoubleVecVec{{11.0, 1.0}, {10.5, 1.0}, {8.0, 1.0}, {10.8, 1.0}, {14.0, 1.0}}, - TDoubleVecVec{{11.0, 16.0, 1.0}, {10.5, 15.5, 1.0}, {8.0, 13.0, 1.0}, {10.8, 15.8, 1.0}, {14.0, 19.0, 1.0}}}; + TDoubleVecVec values[]{ + TDoubleVecVec{{11.0, 1.0}, {10.5, 1.0}, {8.0, 1.0}, {10.8, 1.0}, {14.0, 1.0}}, + TDoubleVecVec{{11.0, 16.0, 1.0}, + {10.5, 15.5, 1.0}, + {8.0, 13.0, 1.0}, + {10.8, 15.8, 1.0}, + {14.0, 19.0, 1.0}}}; TStrCRefDouble1VecDoublePrPrVecVec influencerValues[]{ TStrCRefDouble1VecDoublePrPrVecVec{ - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(12.0, 1.0)}, {TStrCRef(i2), make_pair(10.0, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(10.5, 1.0)}, {TStrCRef(i2), make_pair(10.5, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(9.0, 1.0)}, {TStrCRef(i2), make_pair(7.0, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(11.0, 1.0)}, {TStrCRef(i2), make_pair(10.6, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(16.0, 1.0)}, {TStrCRef(i2), make_pair(12.0, 1.0)}}}, + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(12.0, 1.0)}, + {TStrCRef(i2), make_pair(10.0, 1.0)}}, + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(10.5, 1.0)}, + {TStrCRef(i2), make_pair(10.5, 1.0)}}, + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(9.0, 1.0)}, + {TStrCRef(i2), make_pair(7.0, 1.0)}}, + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(11.0, 1.0)}, + {TStrCRef(i2), make_pair(10.6, 1.0)}}, + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(16.0, 1.0)}, + {TStrCRef(i2), make_pair(12.0, 1.0)}}}, TStrCRefDouble1VecDoublePrPrVecVec{ - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(12.0, 17.0, 1.0)}, {TStrCRef(i2), make_pair(10.0, 15.0, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(10.5, 15.5, 1.0)}, {TStrCRef(i2), make_pair(10.5, 15.5, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(9.0, 14.0, 1.0)}, {TStrCRef(i2), make_pair(7.0, 12.0, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(11.0, 16.0, 1.0)}, {TStrCRef(i2), make_pair(10.6, 15.6, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(16.0, 21.0, 1.0)}, {TStrCRef(i2), make_pair(12.0, 17.0, 1.0)}}}}; + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(12.0, 17.0, 1.0)}, + {TStrCRef(i2), make_pair(10.0, 15.0, 1.0)}}, + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(10.5, 15.5, 1.0)}, + {TStrCRef(i2), make_pair(10.5, 15.5, 1.0)}}, + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(9.0, 14.0, 1.0)}, + {TStrCRef(i2), make_pair(7.0, 12.0, 1.0)}}, + TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(11.0, 16.0, 1.0)}, + {TStrCRef(i2), make_pair(10.6, 15.6, 1.0)}}, + TStrCRefDouble1VecDoublePrPrVec{ + {TStrCRef(i1), make_pair(16.0, 21.0, 1.0)}, + {TStrCRef(i2), make_pair(12.0, 17.0, 1.0)}}}}; { TStoredStringPtrStoredStringPtrPrDoublePrVec influences; - testProbabilityAndGetInfluences( - model_t::E_IndividualMeanByPerson, univariateModel, now, values[0], influencerValues[0], influences); + testProbabilityAndGetInfluences(model_t::E_IndividualMeanByPerson, + univariateModel, now, values[0], + influencerValues[0], influences); LOG_DEBUG(<< " influences = " << core::CContainerPrinter::print(influences)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), influences.size()); CPPUNIT_ASSERT_EQUAL(i1, *influences[0].first.second); @@ -1670,8 +1664,9 @@ void CProbabilityAndInfluenceCalculatorTest::testProbabilityAndInfluenceCalculat } { TStoredStringPtrStoredStringPtrPrDoublePrVec influences; - testProbabilityAndGetInfluences( - model_t::E_IndividualMeanLatLongByPerson, multivariateModel, now, values[1], influencerValues[1], influences); + testProbabilityAndGetInfluences(model_t::E_IndividualMeanLatLongByPerson, + multivariateModel, now, values[1], + influencerValues[1], influences); LOG_DEBUG(<< " influences = " << core::CContainerPrinter::print(influences)); CPPUNIT_ASSERT_EQUAL(std::size_t(2), influences.size()); CPPUNIT_ASSERT_EQUAL(i2, *influences[0].first.second); @@ -1683,7 +1678,8 @@ void CProbabilityAndInfluenceCalculatorTest::testProbabilityAndInfluenceCalculat } CppUnit::Test* CProbabilityAndInfluenceCalculatorTest::suite() { - CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CProbabilityAndInfluenceCalculatorTest"); + CppUnit::TestSuite* suiteOfTests = + new CppUnit::TestSuite("CProbabilityAndInfluenceCalculatorTest"); suiteOfTests->addTest(new CppUnit::TestCaller( "CProbabilityAndInfluenceCalculatorTest::testInfluenceUnavailableCalculator", diff --git a/lib/model/unittest/CResourceLimitTest.cc b/lib/model/unittest/CResourceLimitTest.cc index e72de0c8e2..b8a80635e1 100644 --- a/lib/model/unittest/CResourceLimitTest.cc +++ b/lib/model/unittest/CResourceLimitTest.cc @@ -37,15 +37,13 @@ using TStrVec = std::vector; class CResultWriter : public ml::model::CHierarchicalResultsVisitor { public: - using TResultsTp = boost::tuple; + using TResultsTp = + boost::tuple; using TResultsVec = std::vector; public: - CResultWriter(const CAnomalyDetectorModelConfig& modelConfig, const CLimits& limits) : m_ModelConfig(modelConfig), m_Limits(limits) {} + CResultWriter(const CAnomalyDetectorModelConfig& modelConfig, const CLimits& limits) + : m_ModelConfig(modelConfig), m_Limits(limits) {} void operator()(CAnomalyDetector& detector, core_t::TTime start, core_t::TTime end) { CHierarchicalResults results; @@ -60,7 +58,9 @@ class CResultWriter : public ml::model::CHierarchicalResultsVisitor { results.bottomUpBreadthFirst(*this); } - virtual void visit(const ml::model::CHierarchicalResults& results, const ml::model::CHierarchicalResults::TNode& node, bool pivot) { + virtual void visit(const ml::model::CHierarchicalResults& results, + const ml::model::CHierarchicalResults::TNode& node, + bool pivot) { if (pivot) { return; } @@ -74,19 +74,23 @@ class CResultWriter : public ml::model::CHierarchicalResultsVisitor { return; } - LOG_DEBUG(<< "Got anomaly @ " << node.s_BucketStartTime << ": " << node.probability()); + LOG_DEBUG(<< "Got anomaly @ " << node.s_BucketStartTime << ": " + << node.probability()); - ml::model::SAnnotatedProbability::TAttributeProbability1Vec& attributes = node.s_AnnotatedProbability.s_AttributeProbabilities; + ml::model::SAnnotatedProbability::TAttributeProbability1Vec& attributes = + node.s_AnnotatedProbability.s_AttributeProbabilities; - m_Results.push_back(TResultsTp(node.s_BucketStartTime, - node.probability(), - (attributes.empty() ? "" : *attributes[0].s_Attribute), - *node.s_Spec.s_PersonFieldValue, - *node.s_Spec.s_PartitionFieldValue)); + m_Results.push_back(TResultsTp( + node.s_BucketStartTime, node.probability(), + (attributes.empty() ? "" : *attributes[0].s_Attribute), + *node.s_Spec.s_PersonFieldValue, *node.s_Spec.s_PartitionFieldValue)); } - bool operator()(ml::core_t::TTime time, const ml::model::CHierarchicalResults::TNode& node, bool isBucketInfluencer) { - LOG_DEBUG(<< (isBucketInfluencer ? "BucketInfluencer" : "Influencer ") << node.s_Spec.print() << " initial score " + bool operator()(ml::core_t::TTime time, + const ml::model::CHierarchicalResults::TNode& node, + bool isBucketInfluencer) { + LOG_DEBUG(<< (isBucketInfluencer ? "BucketInfluencer" : "Influencer ") + << node.s_Spec.print() << " initial score " << node.probability() << ", time: " << time); return true; @@ -103,11 +107,12 @@ class CResultWriter : public ml::model::CHierarchicalResultsVisitor { CppUnit::Test* CResourceLimitTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CResourceLimitTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CResourceLimitTest::testLimitBy", &CResourceLimitTest::testLimitBy)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CResourceLimitTest::testLimitByOver", &CResourceLimitTest::testLimitByOver)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CResourceLimitTest::testLargeAllocations", &CResourceLimitTest::testLargeAllocations)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CResourceLimitTest::testLimitBy", &CResourceLimitTest::testLimitBy)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CResourceLimitTest::testLimitByOver", &CResourceLimitTest::testLimitByOver)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CResourceLimitTest::testLargeAllocations", &CResourceLimitTest::testLargeAllocations)); return suiteOfTests; } @@ -116,32 +121,25 @@ void CResourceLimitTest::testLimitBy() { // turn on resource limiting and still get the same results static const core_t::TTime BUCKET_LENGTH(3600); - static const core_t::TTime FIRST_TIME(maths::CIntegerTools::ceil(core_t::TTime(1407428000), BUCKET_LENGTH)); + static const core_t::TTime FIRST_TIME( + maths::CIntegerTools::ceil(core_t::TTime(1407428000), BUCKET_LENGTH)); ::CResultWriter::TResultsVec results; { - CAnomalyDetectorModelConfig modelConfig = CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); + CAnomalyDetectorModelConfig modelConfig = + CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); CLimits limits; CSearchKey key(1, // identifier - function_t::E_IndividualMetric, - false, - model_t::E_XF_None, - "value", - "colour"); + function_t::E_IndividualMetric, false, + model_t::E_XF_None, "value", "colour"); CAnomalyDetector detector(1, // identifier - limits, - modelConfig, - "", - FIRST_TIME, + limits, modelConfig, "", FIRST_TIME, modelConfig.factory(key)); ::CResultWriter writer(modelConfig, limits); - importCsvDataWithLimiter(FIRST_TIME, - BUCKET_LENGTH, - writer, + importCsvDataWithLimiter(FIRST_TIME, BUCKET_LENGTH, writer, "testfiles/resource_limits_8_series.csv", - detector, - std::numeric_limits::max(), + detector, std::numeric_limits::max(), limits.resourceMonitor()); results = writer.results(); @@ -155,24 +153,20 @@ void CResourceLimitTest::testLimitBy() { { // This time, repeat the test but set a resource limit to prevent // any models from being created. - CAnomalyDetectorModelConfig modelConfig = CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); + CAnomalyDetectorModelConfig modelConfig = + CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); CLimits limits; CSearchKey key(1, // identifier - function_t::E_IndividualMetric, - false, - model_t::E_XF_None, - "value", - "colour"); + function_t::E_IndividualMetric, false, + model_t::E_XF_None, "value", "colour"); CAnomalyDetector detector(1, // identifier - limits, - modelConfig, - "", - FIRST_TIME, + limits, modelConfig, "", FIRST_TIME, modelConfig.factory(key)); ::CResultWriter writer(modelConfig, limits); - importCsvDataWithLimiter( - FIRST_TIME, BUCKET_LENGTH, writer, "testfiles/resource_limits_8_series.csv", detector, 1, limits.resourceMonitor()); + importCsvDataWithLimiter(FIRST_TIME, BUCKET_LENGTH, writer, + "testfiles/resource_limits_8_series.csv", + detector, 1, limits.resourceMonitor()); const ::CResultWriter::TResultsVec& secondResults = writer.results(); @@ -186,32 +180,24 @@ void CResourceLimitTest::testLimitByOver() { // non-limited data, but not results from limited data static const core_t::TTime BUCKET_LENGTH(3600); - static const core_t::TTime FIRST_TIME(maths::CIntegerTools::ceil(core_t::TTime(1407441600), BUCKET_LENGTH)); + static const core_t::TTime FIRST_TIME( + maths::CIntegerTools::ceil(core_t::TTime(1407441600), BUCKET_LENGTH)); ::CResultWriter::TResultsVec results; { - CAnomalyDetectorModelConfig modelConfig = CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); + CAnomalyDetectorModelConfig modelConfig = + CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); CLimits limits; CSearchKey key(1, // identifier - function_t::E_PopulationMetric, - false, - model_t::E_XF_None, - "value", - "colour", - "species"); + function_t::E_PopulationMetric, false, + model_t::E_XF_None, "value", "colour", "species"); CAnomalyDetector detector(1, // identifier - limits, - modelConfig, - "", - FIRST_TIME, + limits, modelConfig, "", FIRST_TIME, modelConfig.factory(key)); ::CResultWriter writer(modelConfig, limits); - importCsvDataWithLimiter(FIRST_TIME, - BUCKET_LENGTH, - writer, - "testfiles/resource_limits_8_2over.csv", - detector, + importCsvDataWithLimiter(FIRST_TIME, BUCKET_LENGTH, writer, + "testfiles/resource_limits_8_2over.csv", detector, std::numeric_limits::max(), limits.resourceMonitor()); @@ -224,25 +210,20 @@ void CResourceLimitTest::testLimitByOver() { } // Now limit after 1 sample, so only expect no results - CAnomalyDetectorModelConfig modelConfig = CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); + CAnomalyDetectorModelConfig modelConfig = + CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); CLimits limits; CSearchKey key(1, // identifier - function_t::E_PopulationMetric, - false, - model_t::E_XF_None, - "value", - "colour", - "species"); + function_t::E_PopulationMetric, false, model_t::E_XF_None, + "value", "colour", "species"); CAnomalyDetector detector(1, // identifier - limits, - modelConfig, - "", - FIRST_TIME, + limits, modelConfig, "", FIRST_TIME, modelConfig.factory(key)); ::CResultWriter writer(modelConfig, limits); - importCsvDataWithLimiter( - FIRST_TIME, BUCKET_LENGTH, writer, "testfiles/resource_limits_8_2over.csv", detector, 1, limits.resourceMonitor()); + importCsvDataWithLimiter(FIRST_TIME, BUCKET_LENGTH, writer, + "testfiles/resource_limits_8_2over.csv", detector, + 1, limits.resourceMonitor()); const ::CResultWriter::TResultsVec& secondResults = writer.results(); @@ -269,9 +250,7 @@ class CMockEventRateModel : public ml::model::CEventRateModel { TFeatureCorrelationsPtrPrVec(), personProbabilityPrior, influenceCalculators), - m_ResourceMonitor(resourceMonitor), - m_NewPeople(0), - m_NewAttributes(0) {} + m_ResourceMonitor(resourceMonitor), m_NewPeople(0), m_NewAttributes(0) {} virtual void updateRecycledModels() { // Do nothing @@ -283,7 +262,9 @@ class CMockEventRateModel : public ml::model::CEventRateModel { this->CEventRateModel::createNewModels(n, m); } - void test(core_t::TTime time) { this->createUpdateNewModels(time, m_ResourceMonitor); } + void test(core_t::TTime time) { + this->createUpdateNewModels(time, m_ResourceMonitor); + } std::size_t getNewPeople() const { return m_NewPeople; } @@ -310,9 +291,7 @@ class CMockMetricModel : public ml::model::CMetricModel { TFeatureMultivariatePriorPtrPrVec(), TFeatureCorrelationsPtrPrVec(), influenceCalculators), - m_ResourceMonitor(resourceMonitor), - m_NewPeople(0), - m_NewAttributes(0) {} + m_ResourceMonitor(resourceMonitor), m_NewPeople(0), m_NewAttributes(0) {} virtual void updateRecycledModels() { // Do nothing @@ -324,7 +303,9 @@ class CMockMetricModel : public ml::model::CMetricModel { this->CMetricModel::createNewModels(n, m); } - void test(core_t::TTime time) { this->createUpdateNewModels(time, m_ResourceMonitor); } + void test(core_t::TTime time) { + this->createUpdateNewModels(time, m_ResourceMonitor); + } std::size_t getNewPeople() const { return m_NewPeople; } @@ -336,7 +317,10 @@ class CMockMetricModel : public ml::model::CMetricModel { std::size_t m_NewAttributes; }; -void addArrival(core_t::TTime time, const std::string& p, CDataGatherer& gatherer, CResourceMonitor& resourceMonitor) { +void addArrival(core_t::TTime time, + const std::string& p, + CDataGatherer& gatherer, + CResourceMonitor& resourceMonitor) { CDataGatherer::TStrCPtrVec fields; fields.push_back(&p); CEventData result; @@ -344,7 +328,11 @@ void addArrival(core_t::TTime time, const std::string& p, CDataGatherer& gathere gatherer.addArrival(fields, result, resourceMonitor); } -void addPersonData(std::size_t start, std::size_t end, core_t::TTime time, CDataGatherer& gatherer, CResourceMonitor& resourceMonitor) { +void addPersonData(std::size_t start, + std::size_t end, + core_t::TTime time, + CDataGatherer& gatherer, + CResourceMonitor& resourceMonitor) { for (std::size_t i = start; i < end; i++) { std::ostringstream ssA; ssA << "person" << i; @@ -354,7 +342,10 @@ void addPersonData(std::size_t start, std::size_t end, core_t::TTime time, CData const std::string VALUE("23"); -void addMetricArrival(core_t::TTime time, const std::string& p, CDataGatherer& gatherer, CResourceMonitor& resourceMonitor) { +void addMetricArrival(core_t::TTime time, + const std::string& p, + CDataGatherer& gatherer, + CResourceMonitor& resourceMonitor) { CDataGatherer::TStrCPtrVec fields; fields.push_back(&p); fields.push_back(&VALUE); @@ -393,17 +384,17 @@ void CResourceLimitTest::testLargeAllocations() { factory.features(features); CModelFactory::SGathererInitializationData gathererInitData(FIRST_TIME); - CModelFactory::TDataGathererPtr gatherer(dynamic_cast(factory.makeDataGatherer(gathererInitData))); + CModelFactory::TDataGathererPtr gatherer( + dynamic_cast(factory.makeDataGatherer(gathererInitData))); CResourceMonitor resourceMonitor; resourceMonitor.memoryLimit(std::size_t(70)); const maths::CMultinomialConjugate conjugate; - ::CMockEventRateModel model(factory.modelParams(), - gatherer, - factory.defaultFeatureModels(features, BUCKET_LENGTH, 0.4, true), - conjugate, - CAnomalyDetectorModel::TFeatureInfluenceCalculatorCPtrPrVecVec(), - resourceMonitor); + ::CMockEventRateModel model( + factory.modelParams(), gatherer, + factory.defaultFeatureModels(features, BUCKET_LENGTH, 0.4, true), conjugate, + CAnomalyDetectorModel::TFeatureInfluenceCalculatorCPtrPrVecVec(), + resourceMonitor); CPPUNIT_ASSERT_EQUAL(model_t::E_EventRateOnline, model.category()); CPPUNIT_ASSERT(model.isPopulation() == false); @@ -472,11 +463,11 @@ void CResourceLimitTest::testLargeAllocations() { CResourceMonitor resourceMonitor; resourceMonitor.memoryLimit(std::size_t(100)); - ::CMockMetricModel model(factory.modelParams(), - gatherer, - factory.defaultFeatureModels(features, BUCKET_LENGTH, 0.4, true), - CAnomalyDetectorModel::TFeatureInfluenceCalculatorCPtrPrVecVec(), - resourceMonitor); + ::CMockMetricModel model( + factory.modelParams(), gatherer, + factory.defaultFeatureModels(features, BUCKET_LENGTH, 0.4, true), + CAnomalyDetectorModel::TFeatureInfluenceCalculatorCPtrPrVecVec(), + resourceMonitor); CPPUNIT_ASSERT_EQUAL(model_t::E_MetricOnline, model.category()); CPPUNIT_ASSERT(model.isPopulation() == false); diff --git a/lib/model/unittest/CResourceMonitorTest.cc b/lib/model/unittest/CResourceMonitorTest.cc index ddbbed7de8..e4d65f4f51 100644 --- a/lib/model/unittest/CResourceMonitorTest.cc +++ b/lib/model/unittest/CResourceMonitorTest.cc @@ -21,12 +21,12 @@ using namespace model; CppUnit::Test* CResourceMonitorTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CResourceMonitorTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CResourceMonitorTest::testMonitor", &CResourceMonitorTest::testMonitor)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CResourceMonitorTest::testPruning", &CResourceMonitorTest::testPruning)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CResourceMonitorTest::testExtraMemory", &CResourceMonitorTest::testExtraMemory)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CResourceMonitorTest::testMonitor", &CResourceMonitorTest::testMonitor)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CResourceMonitorTest::testPruning", &CResourceMonitorTest::testPruning)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CResourceMonitorTest::testExtraMemory", &CResourceMonitorTest::testExtraMemory)); return suiteOfTests; } @@ -42,32 +42,25 @@ void CResourceMonitorTest::testMonitor() { const core_t::TTime FIRST_TIME(358556400); const core_t::TTime BUCKET_LENGTH(3600); - CAnomalyDetectorModelConfig modelConfig = CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); + CAnomalyDetectorModelConfig modelConfig = + CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); CLimits limits; CSearchKey key(1, // identifier - function_t::E_IndividualMetric, - false, - model_t::E_XF_None, - "value", - "colour"); + function_t::E_IndividualMetric, false, model_t::E_XF_None, + "value", "colour"); CAnomalyDetector detector1(1, // identifier - limits, - modelConfig, - EMPTY_STRING, - FIRST_TIME, + limits, modelConfig, EMPTY_STRING, FIRST_TIME, modelConfig.factory(key)); CAnomalyDetector detector2(2, // identifier - limits, - modelConfig, - EMPTY_STRING, - FIRST_TIME, + limits, modelConfig, EMPTY_STRING, FIRST_TIME, modelConfig.factory(key)); - std::size_t mem = - detector1.memoryUsage() + detector2.memoryUsage() + CStringStore::names().memoryUsage() + CStringStore::influencers().memoryUsage(); + std::size_t mem = detector1.memoryUsage() + detector2.memoryUsage() + + CStringStore::names().memoryUsage() + + CStringStore::influencers().memoryUsage(); { // Test default constructor @@ -280,7 +273,8 @@ void CResourceMonitorTest::testMonitor() { mon.m_CurrentAnomalyDetectorMemory += 1 + (origTotalMemory + 9) / 10; CPPUNIT_ASSERT(mon.needToSendReport()); mon.sendMemoryUsageReport(0); - CPPUNIT_ASSERT_EQUAL(origTotalMemory + 11 + (origTotalMemory + 9) / 10, m_CallbackResults.s_Usage); + CPPUNIT_ASSERT_EQUAL(origTotalMemory + 11 + (origTotalMemory + 9) / 10, + m_CallbackResults.s_Usage); // Huge increase should trigger a need mon.m_CurrentAnomalyDetectorMemory = 1000; @@ -309,24 +303,19 @@ void CResourceMonitorTest::testPruning() { const core_t::TTime FIRST_TIME(358556400); const core_t::TTime BUCKET_LENGTH(3600); - CAnomalyDetectorModelConfig modelConfig = CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); + CAnomalyDetectorModelConfig modelConfig = + CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); CLimits limits; CSearchKey key(1, // identifier - function_t::E_IndividualMetric, - false, - model_t::E_XF_None, - "value", - "colour"); + function_t::E_IndividualMetric, false, model_t::E_XF_None, + "value", "colour"); CResourceMonitor& monitor = limits.resourceMonitor(); monitor.memoryLimit(140); CAnomalyDetector detector(1, // identifier - limits, - modelConfig, - EMPTY_STRING, - FIRST_TIME, + limits, modelConfig, EMPTY_STRING, FIRST_TIME, modelConfig.factory(key)); core_t::TTime bucket = FIRST_TIME; @@ -381,25 +370,20 @@ void CResourceMonitorTest::testExtraMemory() { const core_t::TTime FIRST_TIME(358556400); const core_t::TTime BUCKET_LENGTH(3600); - CAnomalyDetectorModelConfig modelConfig = CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); + CAnomalyDetectorModelConfig modelConfig = + CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); CLimits limits; CSearchKey key(1, // identifier - function_t::E_IndividualMetric, - false, - model_t::E_XF_None, - "value", - "colour"); + function_t::E_IndividualMetric, false, model_t::E_XF_None, + "value", "colour"); CResourceMonitor& monitor = limits.resourceMonitor(); // set the limit to 1 MB monitor.memoryLimit(1); CAnomalyDetector detector(1, // identifier - limits, - modelConfig, - EMPTY_STRING, - FIRST_TIME, + limits, modelConfig, EMPTY_STRING, FIRST_TIME, modelConfig.factory(key)); monitor.forceRefresh(detector); @@ -439,7 +423,8 @@ void CResourceMonitorTest::addTestData(core_t::TTime& firstTime, std::size_t numBuckets = 0; - for (core_t::TTime time = firstTime; time < static_cast(firstTime + bucketLength * buckets); + for (core_t::TTime time = firstTime; + time < static_cast(firstTime + bucketLength * buckets); time += (bucketLength / std::max(std::size_t(1), newPeoplePerBucket))) { bool newBucket = false; for (; bucketStart + bucketLength <= time; bucketStart += bucketLength) { diff --git a/lib/model/unittest/CRuleConditionTest.cc b/lib/model/unittest/CRuleConditionTest.cc index 31f998bdfc..0489763142 100644 --- a/lib/model/unittest/CRuleConditionTest.cc +++ b/lib/model/unittest/CRuleConditionTest.cc @@ -33,8 +33,8 @@ const std::string EMPTY_STRING; CppUnit::Test* CRuleConditionTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CRuleConditionTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CRuleConditionTest::testTimeContition", &CRuleConditionTest::testTimeContition)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CRuleConditionTest::testTimeContition", &CRuleConditionTest::testTimeContition)); return suiteOfTests; } @@ -48,21 +48,10 @@ void CRuleConditionTest::testTimeContition() { model_t::TFeatureVec features; features.push_back(model_t::E_IndividualMeanByPerson); - CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer(model_t::E_Metric, - model_t::E_None, - params, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - EMPTY_STRING, - TStrVec(), - false, - key, - features, - startTime, - 0)); + CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer( + model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), + false, key, features, startTime, 0)); CMockModel model(params, gathererPtr, influenceCalculators); @@ -76,15 +65,12 @@ void CRuleConditionTest::testTimeContition() { CPPUNIT_ASSERT(condition.isCategorical() == false); model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(condition.test(model, - model_t::E_IndividualCountByBucketAndPerson, - resultType, - false, - std::size_t(0), - std::size_t(1), - core_t::TTime(450)) == false); - CPPUNIT_ASSERT(condition.test( - model, model_t::E_IndividualCountByBucketAndPerson, resultType, false, std::size_t(0), std::size_t(1), core_t::TTime(550))); + CPPUNIT_ASSERT(condition.test(model, model_t::E_IndividualCountByBucketAndPerson, + resultType, false, std::size_t(0), + std::size_t(1), core_t::TTime(450)) == false); + CPPUNIT_ASSERT(condition.test(model, model_t::E_IndividualCountByBucketAndPerson, + resultType, false, std::size_t(0), + std::size_t(1), core_t::TTime(550))); } { @@ -97,14 +83,11 @@ void CRuleConditionTest::testTimeContition() { CPPUNIT_ASSERT(condition.isCategorical() == false); model_t::CResultType resultType(model_t::CResultType::E_Final); - CPPUNIT_ASSERT(condition.test(model, - model_t::E_IndividualCountByBucketAndPerson, - resultType, - false, - std::size_t(0), - std::size_t(1), - core_t::TTime(600)) == false); - CPPUNIT_ASSERT(condition.test( - model, model_t::E_IndividualCountByBucketAndPerson, resultType, false, std::size_t(0), std::size_t(1), core_t::TTime(599))); + CPPUNIT_ASSERT(condition.test(model, model_t::E_IndividualCountByBucketAndPerson, + resultType, false, std::size_t(0), + std::size_t(1), core_t::TTime(600)) == false); + CPPUNIT_ASSERT(condition.test(model, model_t::E_IndividualCountByBucketAndPerson, + resultType, false, std::size_t(0), + std::size_t(1), core_t::TTime(599))); } } diff --git a/lib/model/unittest/CSampleQueueTest.cc b/lib/model/unittest/CSampleQueueTest.cc index 460bc44781..edf5551dfb 100644 --- a/lib/model/unittest/CSampleQueueTest.cc +++ b/lib/model/unittest/CSampleQueueTest.cc @@ -926,9 +926,12 @@ void CSampleQueueTest::testSubSamplesNeverSpanOverDifferentBuckets() { TTestSampleQueue queue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); for (std::size_t measurementId = 0; measurementId < numberOfMeasurements; ++measurementId) { TDoubleVec testData; - rng.generateUniformSamples(static_cast(latestTime - latency), static_cast(latestTime), 1, testData); - latestTime += 60 + static_cast(40.0 * std::sin(boost::math::constants::two_pi() * - static_cast(latestTime % 86400) / 86400.0)); + rng.generateUniformSamples(static_cast(latestTime - latency), + static_cast(latestTime), 1, testData); + latestTime += + 60 + static_cast( + 40.0 * std::sin(boost::math::constants::two_pi() * + static_cast(latestTime % 86400) / 86400.0)); core_t::TTime measurementTime = static_cast(testData[0]); queue.add(measurementTime, {1.0}, 1u, sampleCount); } @@ -963,8 +966,10 @@ void CSampleQueueTest::testPersistence() { CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - TTestSampleQueue restoredQueue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); - traverser.traverseSubLevel(boost::bind(&TTestSampleQueue::acceptRestoreTraverser, &restoredQueue, _1)); + TTestSampleQueue restoredQueue(1, sampleCountFactor, latencyBuckets, + growthFactor, bucketLength); + traverser.traverseSubLevel( + boost::bind(&TTestSampleQueue::acceptRestoreTraverser, &restoredQueue, _1)); CPPUNIT_ASSERT_EQUAL(std::size_t(2), restoredQueue.size()); @@ -1002,9 +1007,11 @@ void CSampleQueueTest::testQualityOfSamplesGivenConstantRate() { TSampleVec samples; core_t::TTime latestTime = bucketLength * (latencyBuckets + 1); TTestSampleQueue queue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); - for (std::size_t measurementId = 0; measurementId < numberOfMeasurements; ++measurementId) { + for (std::size_t measurementId = 0; + measurementId < numberOfMeasurements; ++measurementId) { TDoubleVec testData; - rng.generateUniformSamples(static_cast(latestTime - latency), static_cast(latestTime), 1, testData); + rng.generateUniformSamples(static_cast(latestTime - latency), + static_cast(latestTime), 1, testData); latestTime += 60; core_t::TTime measurementTime = static_cast(testData[0]); queue.add(measurementTime, {1.0}, 1u, sampleCount); @@ -1027,7 +1034,8 @@ void CSampleQueueTest::testQualityOfSamplesGivenConstantRate() { LOG_DEBUG(<< "Results for run: " << runId); LOG_DEBUG(<< "Mean variance scale = " << maths::CBasicStatistics::mean(varianceStat)); - LOG_DEBUG(<< "Variance of variance scale = " << maths::CBasicStatistics::variance(varianceStat)); + LOG_DEBUG(<< "Variance of variance scale = " + << maths::CBasicStatistics::variance(varianceStat)); LOG_DEBUG(<< "Top min variance scale = " << varianceMin.print()); LOG_DEBUG(<< "Top max variance scale = " << varianceMax.print()); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(varianceStat) > 0.98); @@ -1064,11 +1072,15 @@ void CSampleQueueTest::testQualityOfSamplesGivenVariableRate() { TSampleVec samples; core_t::TTime latestTime = bucketLength * (latencyBuckets + 1); TTestSampleQueue queue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); - for (std::size_t measurementId = 0; measurementId < numberOfMeasurements; ++measurementId) { + for (std::size_t measurementId = 0; + measurementId < numberOfMeasurements; ++measurementId) { TDoubleVec testData; - rng.generateUniformSamples(static_cast(latestTime - latency), static_cast(latestTime), 1, testData); - latestTime += 60 + static_cast(40.0 * std::sin(boost::math::constants::two_pi() * - static_cast(latestTime % 86400) / 86400.0)); + rng.generateUniformSamples(static_cast(latestTime - latency), + static_cast(latestTime), 1, testData); + latestTime += + 60 + static_cast( + 40.0 * std::sin(boost::math::constants::two_pi() * + static_cast(latestTime % 86400) / 86400.0)); core_t::TTime measurementTime = static_cast(testData[0]); queue.add(measurementTime, {1.0}, 1u, sampleCount); } @@ -1090,7 +1102,8 @@ void CSampleQueueTest::testQualityOfSamplesGivenVariableRate() { LOG_DEBUG(<< "Results for run: " << runId); LOG_DEBUG(<< "Mean variance scale = " << maths::CBasicStatistics::mean(varianceStat)); - LOG_DEBUG(<< "Variance of variance scale = " << maths::CBasicStatistics::variance(varianceStat)); + LOG_DEBUG(<< "Variance of variance scale = " + << maths::CBasicStatistics::variance(varianceStat)); LOG_DEBUG(<< "Top min variance scale = " << varianceMin.print()); LOG_DEBUG(<< "Top max variance scale = " << varianceMax.print()); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(varianceStat) > 0.97); @@ -1138,7 +1151,8 @@ void CSampleQueueTest::testQualityOfSamplesGivenHighLatencyAndDataInReverseOrder } LOG_DEBUG(<< "Mean variance scale = " << maths::CBasicStatistics::mean(varianceStat)); - LOG_DEBUG(<< "Variance of variance scale = " << maths::CBasicStatistics::variance(varianceStat)); + LOG_DEBUG(<< "Variance of variance scale = " + << maths::CBasicStatistics::variance(varianceStat)); LOG_DEBUG(<< "Min variance scale = " << varianceMin[0]); LOG_DEBUG(<< "Max variance scale = " << varianceMax[0]); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(varianceStat) >= 0.999); @@ -1151,121 +1165,142 @@ void CSampleQueueTest::testQualityOfSamplesGivenHighLatencyAndDataInReverseOrder CppUnit::Test* CSampleQueueTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CSampleQueueTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CSampleQueueTest::testSampleToString", &CSampleQueueTest::testSampleToString)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CSampleQueueTest::testSampleFromString", &CSampleQueueTest::testSampleFromString)); - - suiteOfTests->addTest(new CppUnit::TestCaller("CSampleQueueTest::testAddGivenQueueIsEmptyShouldCreateNewSubSample", - &CSampleQueueTest::testAddGivenQueueIsEmptyShouldCreateNewSubSample)); - suiteOfTests->addTest(new CppUnit::TestCaller("CSampleQueueTest::testAddGivenQueueIsFullShouldResize", - &CSampleQueueTest::testAddGivenQueueIsFullShouldResize)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CSampleQueueTest::testAddGivenTimeIsInOrderAndCloseToNonFullLatestSubSample", - &CSampleQueueTest::testAddGivenTimeIsInOrderAndCloseToNonFullLatestSubSample)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSampleQueueTest::testSampleToString", &CSampleQueueTest::testSampleToString)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSampleQueueTest::testSampleFromString", &CSampleQueueTest::testSampleFromString)); + + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSampleQueueTest::testAddGivenQueueIsEmptyShouldCreateNewSubSample", + &CSampleQueueTest::testAddGivenQueueIsEmptyShouldCreateNewSubSample)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSampleQueueTest::testAddGivenQueueIsFullShouldResize", + &CSampleQueueTest::testAddGivenQueueIsFullShouldResize)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSampleQueueTest::testAddGivenTimeIsInOrderAndCloseToNonFullLatestSubSample", + &CSampleQueueTest::testAddGivenTimeIsInOrderAndCloseToNonFullLatestSubSample)); suiteOfTests->addTest(new CppUnit::TestCaller( "CSampleQueueTest::testAddGivenTimeIsInOrderAndCloseToNonFullLatestSubSampleButDifferentBucket", &CSampleQueueTest::testAddGivenTimeIsInOrderAndCloseToNonFullLatestSubSampleButDifferentBucket)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CSampleQueueTest::testAddGivenTimeIsInOrderAndCloseToFullLatestSubSample", - &CSampleQueueTest::testAddGivenTimeIsInOrderAndCloseToFullLatestSubSample)); - suiteOfTests->addTest(new CppUnit::TestCaller("CSampleQueueTest::testAddGivenTimeIsInOrderAndFarFromLatestSubSample", - &CSampleQueueTest::testAddGivenTimeIsInOrderAndFarFromLatestSubSample)); - suiteOfTests->addTest(new CppUnit::TestCaller("CSampleQueueTest::testAddGivenTimeIsWithinFullLatestSubSample", - &CSampleQueueTest::testAddGivenTimeIsWithinFullLatestSubSample)); - - suiteOfTests->addTest( - new CppUnit::TestCaller("CSampleQueueTest::testAddGivenTimeIsHistoricalAndFarBeforeEarliestSubSample", - &CSampleQueueTest::testAddGivenTimeIsHistoricalAndFarBeforeEarliestSubSample)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloseBeforeFullEarliestSubSample", - &CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloseBeforeFullEarliestSubSample)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloseBeforeNonFullEarliestSubSample", - &CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloseBeforeNonFullEarliestSubSample)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSampleQueueTest::testAddGivenTimeIsInOrderAndCloseToFullLatestSubSample", + &CSampleQueueTest::testAddGivenTimeIsInOrderAndCloseToFullLatestSubSample)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSampleQueueTest::testAddGivenTimeIsInOrderAndFarFromLatestSubSample", + &CSampleQueueTest::testAddGivenTimeIsInOrderAndFarFromLatestSubSample)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSampleQueueTest::testAddGivenTimeIsWithinFullLatestSubSample", + &CSampleQueueTest::testAddGivenTimeIsWithinFullLatestSubSample)); + + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSampleQueueTest::testAddGivenTimeIsHistoricalAndFarBeforeEarliestSubSample", + &CSampleQueueTest::testAddGivenTimeIsHistoricalAndFarBeforeEarliestSubSample)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloseBeforeFullEarliestSubSample", + &CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloseBeforeFullEarliestSubSample)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloseBeforeNonFullEarliestSubSample", + &CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloseBeforeNonFullEarliestSubSample)); suiteOfTests->addTest(new CppUnit::TestCaller( "CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloseBeforeNonFullEarliestSubSampleButDifferentBucket", &CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloseBeforeNonFullEarliestSubSampleButDifferentBucket)); - suiteOfTests->addTest(new CppUnit::TestCaller("CSampleQueueTest::testAddGivenTimeIsHistoricalAndWithinSomeSubSample", - &CSampleQueueTest::testAddGivenTimeIsHistoricalAndWithinSomeSubSample)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToSubSampleBeforeLatest", - &CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToSubSampleBeforeLatest)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSampleQueueTest::testAddGivenTimeIsHistoricalAndWithinSomeSubSample", + &CSampleQueueTest::testAddGivenTimeIsHistoricalAndWithinSomeSubSample)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToSubSampleBeforeLatest", + &CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToSubSampleBeforeLatest)); suiteOfTests->addTest(new CppUnit::TestCaller( "CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToSubSampleBeforeLatestButDifferentBucket", &CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToSubSampleBeforeLatestButDifferentBucket)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToPreviousOfNonFullSubSamples", - &CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToPreviousOfNonFullSubSamples)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToNextOfNonFullSubSamples", - &CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToNextOfNonFullSubSamples)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToPreviousOfFullSubSamples", - &CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToPreviousOfFullSubSamples)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToNextOfFullSubSamples", - &CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToNextOfFullSubSamples)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToPreviousOfNonFullSubSamples", + &CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToPreviousOfNonFullSubSamples)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToNextOfNonFullSubSamples", + &CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToNextOfNonFullSubSamples)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToPreviousOfFullSubSamples", + &CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToPreviousOfFullSubSamples)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToNextOfFullSubSamples", + &CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToNextOfFullSubSamples)); suiteOfTests->addTest(new CppUnit::TestCaller( "CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToPreviousSubSampleButOnlyNextHasSpace", &CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToPreviousSubSampleButOnlyNextHasSpace)); suiteOfTests->addTest(new CppUnit::TestCaller( "CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToNextSubSampleButOnlyPreviousHasSpace", &CSampleQueueTest::testAddGivenTimeIsHistoricalAndCloserToNextSubSampleButOnlyPreviousHasSpace)); - suiteOfTests->addTest(new CppUnit::TestCaller("CSampleQueueTest::testAddGivenTimeIsHistoricalAndFallsInBigEnoughGap", - &CSampleQueueTest::testAddGivenTimeIsHistoricalAndFallsInBigEnoughGap)); - suiteOfTests->addTest(new CppUnit::TestCaller("CSampleQueueTest::testAddGivenTimeIsHistoricalAndFallsInTooSmallGap", - &CSampleQueueTest::testAddGivenTimeIsHistoricalAndFallsInTooSmallGap)); - - suiteOfTests->addTest(new CppUnit::TestCaller("CSampleQueueTest::testCanSampleGivenEmptyQueue", - &CSampleQueueTest::testCanSampleGivenEmptyQueue)); - suiteOfTests->addTest(new CppUnit::TestCaller("CSampleQueueTest::testCanSample", &CSampleQueueTest::testCanSample)); - - suiteOfTests->addTest( - new CppUnit::TestCaller("CSampleQueueTest::testSampleGivenExactlyOneSampleOfExactCountToBeCreated", - &CSampleQueueTest::testSampleGivenExactlyOneSampleOfExactCountToBeCreated)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CSampleQueueTest::testSampleGivenExactlyOneSampleOfOverCountToBeCreated", - &CSampleQueueTest::testSampleGivenExactlyOneSampleOfOverCountToBeCreated)); - suiteOfTests->addTest(new CppUnit::TestCaller("CSampleQueueTest::testSampleGivenOneSampleToBeCreatedAndRemainder", - &CSampleQueueTest::testSampleGivenOneSampleToBeCreatedAndRemainder)); - suiteOfTests->addTest(new CppUnit::TestCaller("CSampleQueueTest::testSampleGivenTwoSamplesToBeCreatedAndRemainder", - &CSampleQueueTest::testSampleGivenTwoSamplesToBeCreatedAndRemainder)); - suiteOfTests->addTest(new CppUnit::TestCaller("CSampleQueueTest::testSampleGivenNoSampleToBeCreated", - &CSampleQueueTest::testSampleGivenNoSampleToBeCreated)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CSampleQueueTest::testSampleGivenUsingSubSamplesUpToCountExceedItMoreThanUsingOneLess", - &CSampleQueueTest::testSampleGivenUsingSubSamplesUpToCountExceedItMoreThanUsingOneLess)); - - suiteOfTests->addTest(new CppUnit::TestCaller("CSampleQueueTest::testResetBucketGivenEmptyQueue", - &CSampleQueueTest::testResetBucketGivenEmptyQueue)); - suiteOfTests->addTest(new CppUnit::TestCaller("CSampleQueueTest::testResetBucketGivenBucketBeforeEarliestSubSample", - &CSampleQueueTest::testResetBucketGivenBucketBeforeEarliestSubSample)); - suiteOfTests->addTest(new CppUnit::TestCaller("CSampleQueueTest::testResetBucketGivenBucketAtEarliestSubSample", - &CSampleQueueTest::testResetBucketGivenBucketAtEarliestSubSample)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CSampleQueueTest::testResetBucketGivenBucketInBetweenWithoutAnySubSamples", - &CSampleQueueTest::testResetBucketGivenBucketInBetweenWithoutAnySubSamples)); - suiteOfTests->addTest(new CppUnit::TestCaller("CSampleQueueTest::testResetBucketGivenBucketAtInBetweenSubSample", - &CSampleQueueTest::testResetBucketGivenBucketAtInBetweenSubSample)); - suiteOfTests->addTest(new CppUnit::TestCaller("CSampleQueueTest::testResetBucketGivenBucketAtLatestSubSample", - &CSampleQueueTest::testResetBucketGivenBucketAtLatestSubSample)); - suiteOfTests->addTest(new CppUnit::TestCaller("CSampleQueueTest::testResetBucketGivenBucketAfterLatestSubSample", - &CSampleQueueTest::testResetBucketGivenBucketAfterLatestSubSample)); - - suiteOfTests->addTest(new CppUnit::TestCaller("CSampleQueueTest::testSubSamplesNeverSpanOverDifferentBuckets", - &CSampleQueueTest::testSubSamplesNeverSpanOverDifferentBuckets)); - - suiteOfTests->addTest( - new CppUnit::TestCaller("CSampleQueueTest::testPersistence", &CSampleQueueTest::testPersistence)); - - suiteOfTests->addTest(new CppUnit::TestCaller("CSampleQueueTest::testQualityOfSamplesGivenConstantRate", - &CSampleQueueTest::testQualityOfSamplesGivenConstantRate)); - suiteOfTests->addTest(new CppUnit::TestCaller("CSampleQueueTest::testQualityOfSamplesGivenVariableRate", - &CSampleQueueTest::testQualityOfSamplesGivenVariableRate)); - suiteOfTests->addTest( - new CppUnit::TestCaller("CSampleQueueTest::testQualityOfSamplesGivenHighLatencyAndDataInReverseOrder", - &CSampleQueueTest::testQualityOfSamplesGivenHighLatencyAndDataInReverseOrder)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSampleQueueTest::testAddGivenTimeIsHistoricalAndFallsInBigEnoughGap", + &CSampleQueueTest::testAddGivenTimeIsHistoricalAndFallsInBigEnoughGap)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSampleQueueTest::testAddGivenTimeIsHistoricalAndFallsInTooSmallGap", + &CSampleQueueTest::testAddGivenTimeIsHistoricalAndFallsInTooSmallGap)); + + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSampleQueueTest::testCanSampleGivenEmptyQueue", + &CSampleQueueTest::testCanSampleGivenEmptyQueue)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSampleQueueTest::testCanSample", &CSampleQueueTest::testCanSample)); + + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSampleQueueTest::testSampleGivenExactlyOneSampleOfExactCountToBeCreated", + &CSampleQueueTest::testSampleGivenExactlyOneSampleOfExactCountToBeCreated)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSampleQueueTest::testSampleGivenExactlyOneSampleOfOverCountToBeCreated", + &CSampleQueueTest::testSampleGivenExactlyOneSampleOfOverCountToBeCreated)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSampleQueueTest::testSampleGivenOneSampleToBeCreatedAndRemainder", + &CSampleQueueTest::testSampleGivenOneSampleToBeCreatedAndRemainder)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSampleQueueTest::testSampleGivenTwoSamplesToBeCreatedAndRemainder", + &CSampleQueueTest::testSampleGivenTwoSamplesToBeCreatedAndRemainder)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSampleQueueTest::testSampleGivenNoSampleToBeCreated", + &CSampleQueueTest::testSampleGivenNoSampleToBeCreated)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSampleQueueTest::testSampleGivenUsingSubSamplesUpToCountExceedItMoreThanUsingOneLess", + &CSampleQueueTest::testSampleGivenUsingSubSamplesUpToCountExceedItMoreThanUsingOneLess)); + + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSampleQueueTest::testResetBucketGivenEmptyQueue", + &CSampleQueueTest::testResetBucketGivenEmptyQueue)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSampleQueueTest::testResetBucketGivenBucketBeforeEarliestSubSample", + &CSampleQueueTest::testResetBucketGivenBucketBeforeEarliestSubSample)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSampleQueueTest::testResetBucketGivenBucketAtEarliestSubSample", + &CSampleQueueTest::testResetBucketGivenBucketAtEarliestSubSample)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSampleQueueTest::testResetBucketGivenBucketInBetweenWithoutAnySubSamples", + &CSampleQueueTest::testResetBucketGivenBucketInBetweenWithoutAnySubSamples)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSampleQueueTest::testResetBucketGivenBucketAtInBetweenSubSample", + &CSampleQueueTest::testResetBucketGivenBucketAtInBetweenSubSample)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSampleQueueTest::testResetBucketGivenBucketAtLatestSubSample", + &CSampleQueueTest::testResetBucketGivenBucketAtLatestSubSample)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSampleQueueTest::testResetBucketGivenBucketAfterLatestSubSample", + &CSampleQueueTest::testResetBucketGivenBucketAfterLatestSubSample)); + + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSampleQueueTest::testSubSamplesNeverSpanOverDifferentBuckets", + &CSampleQueueTest::testSubSamplesNeverSpanOverDifferentBuckets)); + + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSampleQueueTest::testPersistence", &CSampleQueueTest::testPersistence)); + + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSampleQueueTest::testQualityOfSamplesGivenConstantRate", + &CSampleQueueTest::testQualityOfSamplesGivenConstantRate)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSampleQueueTest::testQualityOfSamplesGivenVariableRate", + &CSampleQueueTest::testQualityOfSamplesGivenVariableRate)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CSampleQueueTest::testQualityOfSamplesGivenHighLatencyAndDataInReverseOrder", + &CSampleQueueTest::testQualityOfSamplesGivenHighLatencyAndDataInReverseOrder)); return suiteOfTests; } diff --git a/lib/model/unittest/CStringStoreTest.cc b/lib/model/unittest/CStringStoreTest.cc index 950776bd1d..25add384b7 100644 --- a/lib/model/unittest/CStringStoreTest.cc +++ b/lib/model/unittest/CStringStoreTest.cc @@ -30,13 +30,16 @@ class CStringThread : public core::CThread { using TCppUnitExceptionP = boost::shared_ptr; public: - CStringThread(std::size_t i, const TStrVec& strings) : m_I(i), m_Strings(strings) {} + CStringThread(std::size_t i, const TStrVec& strings) + : m_I(i), m_Strings(strings) {} - void uniques(TStrCPtrUSet& result) const { result.insert(m_UniquePtrs.begin(), m_UniquePtrs.end()); } + void uniques(TStrCPtrUSet& result) const { + result.insert(m_UniquePtrs.begin(), m_UniquePtrs.end()); + } void propagateLastThreadAssert() { if (m_LastException) { - throw *m_LastException; + throw * m_LastException; } } @@ -143,7 +146,8 @@ void CStringStoreTest::testStringStore() { CPPUNIT_ASSERT_EQUAL(strings.size(), CStringStore::names().m_Strings.size()); CStringStore::names().pruneNotThreadSafe(); CPPUNIT_ASSERT_EQUAL(strings.size(), CStringStore::names().m_Strings.size()); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), CStringStore::influencers().m_Strings.size()); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), + CStringStore::influencers().m_Strings.size()); for (std::size_t i = 0; i < threads.size(); ++i) { // CppUnit won't automatically catch the exceptions thrown by @@ -242,9 +246,10 @@ void CStringStoreTest::testMemUsage() { CppUnit::Test* CStringStoreTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CStringStoreTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CStringStoreTest::testStringStore", &CStringStoreTest::testStringStore)); - suiteOfTests->addTest(new CppUnit::TestCaller("CStringStoreTest::testMemUsage", &CStringStoreTest::testMemUsage)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CStringStoreTest::testStringStore", &CStringStoreTest::testStringStore)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CStringStoreTest::testMemUsage", &CStringStoreTest::testMemUsage)); return suiteOfTests; } diff --git a/lib/model/unittest/CToolsTest.cc b/lib/model/unittest/CToolsTest.cc index ab0018cb52..4ef5fd24c5 100644 --- a/lib/model/unittest/CToolsTest.cc +++ b/lib/model/unittest/CToolsTest.cc @@ -150,8 +150,8 @@ void CToolsTest::testProbabilityAggregator() { CppUnit::Test* CToolsTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CToolsTest"); - suiteOfTests->addTest( - new CppUnit::TestCaller("CToolsTest::testProbabilityAggregator", &CToolsTest::testProbabilityAggregator)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CToolsTest::testProbabilityAggregator", &CToolsTest::testProbabilityAggregator)); return suiteOfTests; } diff --git a/lib/model/unittest/Mocks.cc b/lib/model/unittest/Mocks.cc index 7cba82b375..08e36aec7f 100644 --- a/lib/model/unittest/Mocks.cc +++ b/lib/model/unittest/Mocks.cc @@ -14,7 +14,8 @@ namespace model { CMockModel::CMockModel(const SModelParams& params, const TDataGathererPtr& dataGatherer, const TFeatureInfluenceCalculatorCPtrPrVecVec& influenceCalculators) - : CAnomalyDetectorModel(params, dataGatherer, influenceCalculators), m_IsPopulation(false) { + : CAnomalyDetectorModel(params, dataGatherer, influenceCalculators), + m_IsPopulation(false) { } void CMockModel::acceptPersistInserter(core::CStatePersistInserter& /*inserter*/) const { @@ -44,7 +45,8 @@ bool CMockModel::isMetric() const { return false; } -CMockModel::TOptionalUInt64 CMockModel::currentBucketCount(std::size_t /*pid*/, core_t::TTime /*time*/) const { +CMockModel::TOptionalUInt64 +CMockModel::currentBucketCount(std::size_t /*pid*/, core_t::TTime /*time*/) const { CAnomalyDetectorModel::TOptionalUInt64 count; return count; } @@ -54,8 +56,10 @@ CMockModel::TOptionalDouble CMockModel::baselineBucketCount(std::size_t /*pid*/) return count; } -CMockModel::TDouble1Vec -CMockModel::currentBucketValue(model_t::EFeature feature, std::size_t pid, std::size_t cid, core_t::TTime time) const { +CMockModel::TDouble1Vec CMockModel::currentBucketValue(model_t::EFeature feature, + std::size_t pid, + std::size_t cid, + core_t::TTime time) const { auto i = m_BucketValues.find({feature, core::make_triple(pid, cid, time)}); return i != m_BucketValues.end() ? i->second : TDouble1Vec(); } @@ -77,13 +81,19 @@ bool CMockModel::bucketStatsAvailable(core_t::TTime /*time*/) const { void CMockModel::currentBucketPersonIds(core_t::TTime /*time*/, TSizeVec& /*result*/) const { } -void CMockModel::sampleBucketStatistics(core_t::TTime /*startTime*/, core_t::TTime /*endTime*/, CResourceMonitor& /*resourceMonitor*/) { +void CMockModel::sampleBucketStatistics(core_t::TTime /*startTime*/, + core_t::TTime /*endTime*/, + CResourceMonitor& /*resourceMonitor*/) { } -void CMockModel::sample(core_t::TTime /*startTime*/, core_t::TTime /*endTime*/, CResourceMonitor& /*resourceMonitor*/) { +void CMockModel::sample(core_t::TTime /*startTime*/, + core_t::TTime /*endTime*/, + CResourceMonitor& /*resourceMonitor*/) { } -void CMockModel::sampleOutOfPhase(core_t::TTime /*startTime*/, core_t::TTime /*endTime*/, CResourceMonitor& /*resourceMonitor*/) { +void CMockModel::sampleOutOfPhase(core_t::TTime /*startTime*/, + core_t::TTime /*endTime*/, + CResourceMonitor& /*resourceMonitor*/) { } void CMockModel::prune(std::size_t /*maximumAge*/) { @@ -187,10 +197,12 @@ CMemoryUsageEstimator* CMockModel::memoryUsageEstimator() const { return nullptr; } -CMockModelDetailsView::CMockModelDetailsView(const CMockModel& model) : m_Model{&model} { +CMockModelDetailsView::CMockModelDetailsView(const CMockModel& model) + : m_Model{&model} { } -const maths::CModel* CMockModelDetailsView::model(model_t::EFeature /*feature*/, std::size_t byFieldId) const { +const maths::CModel* CMockModelDetailsView::model(model_t::EFeature /*feature*/, + std::size_t byFieldId) const { return m_Model->model(byFieldId); } @@ -198,7 +210,9 @@ const CAnomalyDetectorModel& CMockModelDetailsView::base() const { return *m_Model; } -double CMockModelDetailsView::countVarianceScale(model_t::EFeature /*feature*/, std::size_t /*byFieldId*/, core_t::TTime /*time*/) const { +double CMockModelDetailsView::countVarianceScale(model_t::EFeature /*feature*/, + std::size_t /*byFieldId*/, + core_t::TTime /*time*/) const { return 1.0; } } diff --git a/lib/model/unittest/Mocks.h b/lib/model/unittest/Mocks.h index 540fea1fb8..7e5caf72fd 100644 --- a/lib/model/unittest/Mocks.h +++ b/lib/model/unittest/Mocks.h @@ -45,7 +45,10 @@ class CMockModel : public CAnomalyDetectorModel { virtual TOptionalDouble baselineBucketCount(std::size_t pid) const; - virtual TDouble1Vec currentBucketValue(model_t::EFeature feature, std::size_t pid, std::size_t cid, core_t::TTime time) const; + virtual TDouble1Vec currentBucketValue(model_t::EFeature feature, + std::size_t pid, + std::size_t cid, + core_t::TTime time) const; virtual TDouble1Vec baselineBucketMean(model_t::EFeature feature, std::size_t pid, @@ -58,11 +61,15 @@ class CMockModel : public CAnomalyDetectorModel { virtual void currentBucketPersonIds(core_t::TTime time, TSizeVec& result) const; - virtual void sampleBucketStatistics(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor); + virtual void sampleBucketStatistics(core_t::TTime startTime, + core_t::TTime endTime, + CResourceMonitor& resourceMonitor); virtual void sample(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor); - virtual void sampleOutOfPhase(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor); + virtual void sampleOutOfPhase(core_t::TTime startTime, + core_t::TTime endTime, + CResourceMonitor& resourceMonitor); virtual void prune(std::size_t maximumAge); @@ -98,10 +105,17 @@ class CMockModel : public CAnomalyDetectorModel { void mockPopulation(bool isPopulation); - void mockAddBucketValue(model_t::EFeature feature, std::size_t pid, std::size_t cid, core_t::TTime time, const TDouble1Vec& value); + void mockAddBucketValue(model_t::EFeature feature, + std::size_t pid, + std::size_t cid, + core_t::TTime time, + const TDouble1Vec& value); - void - mockAddBucketBaselineMean(model_t::EFeature feature, std::size_t pid, std::size_t cid, core_t::TTime time, const TDouble1Vec& value); + void mockAddBucketBaselineMean(model_t::EFeature feature, + std::size_t pid, + std::size_t cid, + core_t::TTime time, + const TDouble1Vec& value); void mockTimeSeriesModels(const TMathsModelPtrVec& model); @@ -116,7 +130,8 @@ class CMockModel : public CAnomalyDetectorModel { using TDouble1Vec = CAnomalyDetectorModel::TDouble1Vec; using TSizeSizeTimeTriple = core::CTriple; using TFeatureSizeSizeTimeTriplePr = std::pair; - using TFeatureSizeSizeTimeTriplePrDouble1VecUMap = boost::unordered_map; + using TFeatureSizeSizeTimeTriplePrDouble1VecUMap = + boost::unordered_map; private: virtual void currentBucketTotalCount(uint64_t totalCount); @@ -138,7 +153,9 @@ class CMockModelDetailsView : public CModelDetailsView { private: virtual const maths::CModel* model(model_t::EFeature feature, std::size_t byFieldId) const; virtual const CAnomalyDetectorModel& base() const; - virtual double countVarianceScale(model_t::EFeature feature, std::size_t byFieldId, core_t::TTime time) const; + virtual double countVarianceScale(model_t::EFeature feature, + std::size_t byFieldId, + core_t::TTime time) const; private: //! The model. diff --git a/lib/test/CMultiFileDataAdder.cc b/lib/test/CMultiFileDataAdder.cc index ec13079f5a..14628e7ad4 100644 --- a/lib/test/CMultiFileDataAdder.cc +++ b/lib/test/CMultiFileDataAdder.cc @@ -24,7 +24,8 @@ CMultiFileDataAdder::CMultiFileDataAdder(std::string baseFilename, std::string f m_FileExtension.swap(fileExtension); } -CMultiFileDataAdder::TOStreamP CMultiFileDataAdder::addStreamed(const std::string& index, const std::string& id) { +CMultiFileDataAdder::TOStreamP +CMultiFileDataAdder::addStreamed(const std::string& index, const std::string& id) { const std::string& filename = this->makeFilename(index, id); TOStreamP strm(boost::make_shared(filename.c_str())); @@ -47,7 +48,8 @@ bool CMultiFileDataAdder::streamComplete(TOStreamP& strm, bool /*force*/) { return !ofs->bad(); } -std::string CMultiFileDataAdder::makeFilename(const std::string& index, const std::string& id) const { +std::string CMultiFileDataAdder::makeFilename(const std::string& index, + const std::string& id) const { // NB: The logic in here must mirror that of CMultiFileSearcher::search() std::string filename(m_BaseFilename); @@ -61,7 +63,9 @@ std::string CMultiFileDataAdder::makeFilename(const std::string& index, const st // boost::filesystem, and this is what we want boost::filesystem::path directoryPath(filename); boost::filesystem::create_directories(directoryPath); - } catch (std::exception& e) { LOG_ERROR(<< "Failed to create directory " << filename << " - " << e.what()); } + } catch (std::exception& e) { + LOG_ERROR(<< "Failed to create directory " << filename << " - " << e.what()); + } filename += '/'; filename += id; diff --git a/lib/test/CMultiFileSearcher.cc b/lib/test/CMultiFileSearcher.cc index 067479c26e..230edc1621 100644 --- a/lib/test/CMultiFileSearcher.cc +++ b/lib/test/CMultiFileSearcher.cc @@ -18,8 +18,11 @@ namespace test { const std::string CMultiFileSearcher::JSON_FILE_EXT(".json"); -CMultiFileSearcher::CMultiFileSearcher(std::string baseFilename, std::string baseDocId, std::string fileExtension) - : m_BaseFilename(std::move(baseFilename)), m_BaseDocId(std::move(baseDocId)), m_FileExtension(std::move(fileExtension)) { +CMultiFileSearcher::CMultiFileSearcher(std::string baseFilename, + std::string baseDocId, + std::string fileExtension) + : m_BaseFilename(std::move(baseFilename)), m_BaseDocId(std::move(baseDocId)), + m_FileExtension(std::move(fileExtension)) { } CMultiFileSearcher::TIStreamP CMultiFileSearcher::search(size_t currentDocNum, size_t limit) { diff --git a/lib/test/CRandomNumbers.cc b/lib/test/CRandomNumbers.cc index 02b26a4176..e02fb74054 100644 --- a/lib/test/CRandomNumbers.cc +++ b/lib/test/CRandomNumbers.cc @@ -25,7 +25,10 @@ namespace ml { namespace test { -void CRandomNumbers::generateNormalSamples(double mean, double variance, std::size_t numberSamples, TDoubleVec& samples) { +void CRandomNumbers::generateNormalSamples(double mean, + double variance, + std::size_t numberSamples, + TDoubleVec& samples) { boost::random::normal_distribution<> normal(mean, std::sqrt(variance)); generateSamples(m_Generator, normal, numberSamples, samples); } @@ -51,7 +54,8 @@ void CRandomNumbers::generateMultivariateNormalSamples(const TDoubleVec& mean, TDoubleVecVec residuals(r); for (std::size_t i = 0u; i < r; ++i) { - this->generateNormalSamples(0.0, svd.singularValues()(i), numberSamples, residuals[i]); + this->generateNormalSamples(0.0, svd.singularValues()(i), numberSamples, + residuals[i]); } Eigen::VectorXd ri(d); @@ -74,27 +78,41 @@ void CRandomNumbers::generatePoissonSamples(double rate, std::size_t numberSampl generateSamples(m_Generator, poisson, numberSamples, samples); } -void CRandomNumbers::generateStudentsSamples(double degreesFreedom, std::size_t numberSamples, TDoubleVec& samples) { +void CRandomNumbers::generateStudentsSamples(double degreesFreedom, + std::size_t numberSamples, + TDoubleVec& samples) { boost::random::student_t_distribution<> students(degreesFreedom); generateSamples(m_Generator, students, numberSamples, samples); } -void CRandomNumbers::generateLogNormalSamples(double location, double squareScale, std::size_t numberSamples, TDoubleVec& samples) { +void CRandomNumbers::generateLogNormalSamples(double location, + double squareScale, + std::size_t numberSamples, + TDoubleVec& samples) { boost::random::lognormal_distribution<> logNormal(location, std::sqrt(squareScale)); generateSamples(m_Generator, logNormal, numberSamples, samples); } -void CRandomNumbers::generateUniformSamples(double a, double b, std::size_t numberSamples, TDoubleVec& samples) { +void CRandomNumbers::generateUniformSamples(double a, + double b, + std::size_t numberSamples, + TDoubleVec& samples) { boost::random::uniform_real_distribution<> uniform(a, b); generateSamples(m_Generator, uniform, numberSamples, samples); } -void CRandomNumbers::generateUniformSamples(std::size_t a, std::size_t b, std::size_t numberSamples, TSizeVec& samples) { +void CRandomNumbers::generateUniformSamples(std::size_t a, + std::size_t b, + std::size_t numberSamples, + TSizeVec& samples) { boost::random::uniform_int_distribution uniform(a, b - 1); generateSamples(m_Generator, uniform, numberSamples, samples); } -void CRandomNumbers::generateGammaSamples(double shape, double scale, std::size_t numberSamples, TDoubleVec& samples) { +void CRandomNumbers::generateGammaSamples(double shape, + double scale, + std::size_t numberSamples, + TDoubleVec& samples) { boost::random::gamma_distribution<> gamma(shape, scale); generateSamples(m_Generator, gamma, numberSamples, samples); } @@ -116,11 +134,13 @@ void CRandomNumbers::generateMultinomialSamples(const TDoubleVec& categories, // Construct the transform function. TDoubleVec transform; transform.reserve(probabilities.size()); - std::partial_sum(probabilities.begin(), probabilities.end(), std::back_inserter(transform)); + std::partial_sum(probabilities.begin(), probabilities.end(), + std::back_inserter(transform)); // Map the samples to categories. for (std::size_t i = 0u; i < samples.size(); ++i) { - std::size_t j = std::lower_bound(transform.begin(), transform.end(), samples[i]) - transform.begin(); + std::size_t j = std::lower_bound(transform.begin(), transform.end(), samples[i]) - + transform.begin(); if (j == transform.size()) { LOG_ERROR(<< "Expected sample " << samples[i] << " to be less than largest value in " << core::CContainerPrinter::print(transform)); @@ -130,7 +150,9 @@ void CRandomNumbers::generateMultinomialSamples(const TDoubleVec& categories, } } -void CRandomNumbers::generateDirichletSamples(const TDoubleVec& concentrations, std::size_t numberSamples, TDoubleVecVec& samples) { +void CRandomNumbers::generateDirichletSamples(const TDoubleVec& concentrations, + std::size_t numberSamples, + TDoubleVecVec& samples) { samples.resize(numberSamples); for (std::size_t i = 0; i < concentrations.size(); ++i) { TDoubleVec raw; @@ -153,8 +175,10 @@ void CRandomNumbers::generateDirichletSamples(const TDoubleVec& concentrations, } void CRandomNumbers::generateWords(std::size_t length, std::size_t numberSamples, TStrVec& samples) { - const char characterSet[] = {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', - 't', 'u', 'v', 'x', 'y', 'z', '-', '_', ' ', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0'}; + const char characterSet[] = { + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', + 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'x', 'y', 'z', '-', + '_', ' ', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0'}; boost::random::uniform_int_distribution uniform(0u, boost::size(characterSet) - 1); @@ -176,7 +200,8 @@ void CRandomNumbers::discard(std::size_t n) { m_Generator.discard(n); } -CRandomNumbers::CUniform0nGenerator::CUniform0nGenerator(const TGenerator& generator) : m_Generator(new TGenerator(generator)) { +CRandomNumbers::CUniform0nGenerator::CUniform0nGenerator(const TGenerator& generator) + : m_Generator(new TGenerator(generator)) { } std::size_t CRandomNumbers::CUniform0nGenerator::operator()(std::size_t n) const { diff --git a/lib/test/CTestRunner.cc b/lib/test/CTestRunner.cc index fc032c22b5..84f2b69a5d 100644 --- a/lib/test/CTestRunner.cc +++ b/lib/test/CTestRunner.cc @@ -81,29 +81,36 @@ void CTestRunner::processCmdLine(int argc, const char** argv) { for (int i = 1; i < argc; ++i) { m_TestCases.push_back(argv[i]); std::string& testName = m_TestCases.back(); - if (testName.length() > SRC_EXT.length() && testName.rfind(SRC_EXT) == testName.length() - SRC_EXT.length()) { + if (testName.length() > SRC_EXT.length() && + testName.rfind(SRC_EXT) == testName.length() - SRC_EXT.length()) { testName.erase(testName.length() - SRC_EXT.length()); ++numSrcStrips; lastSrcIndex = i; - } else if (testName.length() > HDR_EXT.length() && testName.rfind(HDR_EXT) == testName.length() - HDR_EXT.length()) { + } else if (testName.length() > HDR_EXT.length() && + testName.rfind(HDR_EXT) == testName.length() - HDR_EXT.length()) { testName.erase(testName.length() - HDR_EXT.length()); ++numHdrStrips; lastHdrIndex = i; } } if (numSrcStrips == 1) { - LOG_INFO(<< "Source file extension " << SRC_EXT << " stripped from supplied test name " << argv[lastSrcIndex]); + LOG_INFO(<< "Source file extension " << SRC_EXT + << " stripped from supplied test name " << argv[lastSrcIndex]); } else if (numSrcStrips > 0) { - LOG_INFO(<< "Source file extension " << SRC_EXT << " stripped from " << numSrcStrips << " supplied test names"); + LOG_INFO(<< "Source file extension " << SRC_EXT << " stripped from " + << numSrcStrips << " supplied test names"); } if (numHdrStrips == 1) { - LOG_INFO(<< "Header file extension " << HDR_EXT << " stripped from supplied test name " << argv[lastHdrIndex]); + LOG_INFO(<< "Header file extension " << HDR_EXT + << " stripped from supplied test name " << argv[lastHdrIndex]); } else if (numHdrStrips > 0) { - LOG_INFO(<< "Header file extension " << HDR_EXT << " stripped from " << numHdrStrips << " supplied test names"); + LOG_INFO(<< "Header file extension " << HDR_EXT << " stripped from " + << numHdrStrips << " supplied test names"); } std::sort(m_TestCases.begin(), m_TestCases.end()); size_t numDuplicates(m_TestCases.size()); - m_TestCases.erase(std::unique(m_TestCases.begin(), m_TestCases.end()), m_TestCases.end()); + m_TestCases.erase(std::unique(m_TestCases.begin(), m_TestCases.end()), + m_TestCases.end()); numDuplicates -= m_TestCases.size(); if (numDuplicates > 0) { LOG_WARN(<< numDuplicates @@ -124,7 +131,8 @@ bool CTestRunner::runTests() { bool passed(false); if (this->checkSkipFile(cwd.string(), passed) == true) { - LOG_WARN(<< "Skipping tests for directory " << cwd << " and using previous test result " << std::boolalpha << passed); + LOG_WARN(<< "Skipping tests for directory " << cwd + << " and using previous test result " << std::boolalpha << passed); return passed; } @@ -152,7 +160,8 @@ bool CTestRunner::runTests() { passed = this->timeTests(topPath, testPath); if (this->updateSkipFile(cwd.string(), passed) == true) { - LOG_INFO(<< "Added directory " << cwd << " to skip file with result " << std::boolalpha << passed); + LOG_INFO(<< "Added directory " << cwd << " to skip file with result " + << std::boolalpha << passed); } return passed; @@ -175,10 +184,13 @@ bool CTestRunner::timeTests(const std::string& topPath, const std::string& testP if (m_TestCases.empty()) { allPassed = this->run(); } else { - for (TStrVecItr itr = m_TestCases.begin(); itr != m_TestCases.end() && allPassed; ++itr) { + for (TStrVecItr itr = m_TestCases.begin(); + itr != m_TestCases.end() && allPassed; ++itr) { try { allPassed = this->run(*itr); - } catch (std::invalid_argument&) { LOG_ERROR(<< "No Test called " << *itr << " in testsuite"); } + } catch (std::invalid_argument&) { + LOG_ERROR(<< "No Test called " << *itr << " in testsuite"); + } } } @@ -220,7 +232,8 @@ bool CTestRunner::updateSkipFile(const std::string& cwd, bool passed) const { // Don't create the file if it doesn't already exist, and don't write to it // if it's not writable - if (core::COsFileFuncs::access(fullPath.c_str(), core::COsFileFuncs::READABLE | core::COsFileFuncs::WRITABLE) == -1) { + if (core::COsFileFuncs::access(fullPath.c_str(), core::COsFileFuncs::READABLE | + core::COsFileFuncs::WRITABLE) == -1) { LOG_TRACE(<< "Will not update skip file " << fullPath << " : " << ::strerror(errno)); return false; } diff --git a/lib/test/CTimeSeriesTestData.cc b/lib/test/CTimeSeriesTestData.cc index c3820e8598..d4419e3d94 100644 --- a/lib/test/CTimeSeriesTestData.cc +++ b/lib/test/CTimeSeriesTestData.cc @@ -81,7 +81,8 @@ bool CTimeSeriesTestData::parseCounter(const std::string& fileName, TTimeDoubleP } else { result.second = value - last; if (result.second < 0) { - LOG_WARN(<< "Negative value " << value << "<" << last << "@" << result.first << " setting counter to 0 "); + LOG_WARN(<< "Negative value " << value << "<" << last << "@" + << result.first << " setting counter to 0 "); result.second = 0; } } @@ -122,7 +123,10 @@ void CTimeSeriesTestData::derive(const TTimeDoublePrVec& data, TTimeDoublePrVec& } } -bool CTimeSeriesTestData::pad(const TTimeDoublePrVec& data, core_t::TTime minTime, core_t::TTime maxTime, TTimeDoublePrVec& results) { +bool CTimeSeriesTestData::pad(const TTimeDoublePrVec& data, + core_t::TTime minTime, + core_t::TTime maxTime, + TTimeDoublePrVec& results) { results.clear(); if (minTime > maxTime) { @@ -215,7 +219,8 @@ bool CTimeSeriesTestData::parseLine(const core::CRegex& tokenRegex, const std::string& dateFormat, const std::string& line, std::vector>& results) { - if (line.empty() || line.find_first_not_of(core::CStringUtils::WHITESPACE_CHARS) == std::string::npos) { + if (line.empty() || line.find_first_not_of(core::CStringUtils::WHITESPACE_CHARS) == + std::string::npos) { LOG_DEBUG(<< "Ignoring blank line"); return true; } diff --git a/lib/test/CTimingXmlOutputterHook.cc b/lib/test/CTimingXmlOutputterHook.cc index e4285df65a..40ca7c4152 100644 --- a/lib/test/CTimingXmlOutputterHook.cc +++ b/lib/test/CTimingXmlOutputterHook.cc @@ -24,7 +24,9 @@ const std::string TOTAL_ELAPSED_TIME_TAG("TotalElapsedTime"); const std::string AVERAGE_TEST_CASE_TIME_TAG("AverageTestCaseTime"); } -CTimingXmlOutputterHook::CTimingXmlOutputterHook(const CTestTimer& testTimer, const std::string& topPath, const std::string& testPath) +CTimingXmlOutputterHook::CTimingXmlOutputterHook(const CTestTimer& testTimer, + const std::string& topPath, + const std::string& testPath) : m_TestTimer(testTimer), m_TopPath(topPath), m_TestPath(testPath) { } @@ -53,16 +55,20 @@ void CTimingXmlOutputterHook::successfulTestAdded(CppUnit::XmlDocument* /*docume testElement->elementFor(NAME_TAG)->setContent(m_TopPath + '.' + m_TestPath + '.' + testName); testElement->addElement(new CppUnit::XmlElement(TEST_PATH_TAG, m_TestPath + '/' + testName)); - testElement->addElement(new CppUnit::XmlElement(TIME_TAG, this->toSecondsStr(m_TestTimer.timeForTest(testName)))); + testElement->addElement(new CppUnit::XmlElement( + TIME_TAG, this->toSecondsStr(m_TestTimer.timeForTest(testName)))); } -void CTimingXmlOutputterHook::statisticsAdded(CppUnit::XmlDocument* /*document*/, CppUnit::XmlElement* statisticsElement) { +void CTimingXmlOutputterHook::statisticsAdded(CppUnit::XmlDocument* /*document*/, + CppUnit::XmlElement* statisticsElement) { if (statisticsElement == nullptr) { return; } - statisticsElement->addElement(new CppUnit::XmlElement(TOTAL_ELAPSED_TIME_TAG, this->toSecondsStr(m_TestTimer.totalTime()))); - statisticsElement->addElement(new CppUnit::XmlElement(AVERAGE_TEST_CASE_TIME_TAG, this->toSecondsStr(m_TestTimer.averageTime()))); + statisticsElement->addElement(new CppUnit::XmlElement( + TOTAL_ELAPSED_TIME_TAG, this->toSecondsStr(m_TestTimer.totalTime()))); + statisticsElement->addElement(new CppUnit::XmlElement( + AVERAGE_TEST_CASE_TIME_TAG, this->toSecondsStr(m_TestTimer.averageTime()))); } std::string CTimingXmlOutputterHook::toSecondsStr(uint64_t ms) { diff --git a/lib/ver/unittest/CBuildInfoTest.cc b/lib/ver/unittest/CBuildInfoTest.cc index 1c471f0154..b41c110296 100644 --- a/lib/ver/unittest/CBuildInfoTest.cc +++ b/lib/ver/unittest/CBuildInfoTest.cc @@ -15,7 +15,8 @@ CppUnit::Test* CBuildInfoTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CBuildInfoTest"); - suiteOfTests->addTest(new CppUnit::TestCaller("CBuildInfoTest::testFullInfo", &CBuildInfoTest::testFullInfo)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CBuildInfoTest::testFullInfo", &CBuildInfoTest::testFullInfo)); return suiteOfTests; } @@ -24,7 +25,8 @@ void CBuildInfoTest::testFullInfo(void) { std::string fullInfo(ml::ver::CBuildInfo::fullInfo()); LOG_DEBUG(<< fullInfo); - std::string currentYear(ml::core::CTimeUtils::toIso8601(ml::core::CTimeUtils::now()), 0, 4); + std::string currentYear( + ml::core::CTimeUtils::toIso8601(ml::core::CTimeUtils::now()), 0, 4); LOG_DEBUG(<< "Current year is " << currentYear); CPPUNIT_ASSERT(fullInfo.find("ml_test") != std::string::npos); From 77c6fd8a042f653f3dc056a689b21658261c5574 Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Fri, 13 Apr 2018 23:29:37 +0100 Subject: [PATCH 17/29] Fixing format merge --- bin/autoconfig/CCmdLineParser.cc | 51 ++++--- bin/autodetect/CCmdLineParser.cc | 127 +++++++++--------- bin/categorize/CCmdLineParser.cc | 64 ++++----- bin/controller/CCmdLineParser.cc | 20 +-- .../CBlockingCallCancellerThreadTest.cc | 2 +- bin/normalize/CCmdLineParser.cc | 52 ++++--- devbin/unixtime_to_string/CCmdLineParser.cc | 6 +- include/core/CCondition.h | 2 +- include/core/CMaskIterator.h | 13 +- include/core/CMemory.h | 12 +- include/core/COsFileFuncs.h | 10 +- include/core/CProcess.h | 2 +- include/core/CThread.h | 2 +- include/maths/CBasicStatistics.h | 5 +- include/maths/CBootstrapClusterer.h | 5 +- include/maths/CLinearAlgebra.h | 78 ++++++----- include/maths/COrderings.h | 30 ++--- include/maths/CPeriodicityHypothesisTests.h | 6 +- include/maths/CSampling.h | 8 +- 19 files changed, 267 insertions(+), 228 deletions(-) diff --git a/bin/autoconfig/CCmdLineParser.cc b/bin/autoconfig/CCmdLineParser.cc index b647f44695..736bc5a54b 100644 --- a/bin/autoconfig/CCmdLineParser.cc +++ b/bin/autoconfig/CCmdLineParser.cc @@ -35,28 +35,35 @@ bool CCmdLineParser::parse(int argc, bool& writeDetectorConfigs) { try { boost::program_options::options_description desc(DESCRIPTION); - desc.add_options()("help", "Display this information and exit")( - "version", "Display version information and exit")( - "logProperties", boost::program_options::value(), - "Optional logger properties file")( - "logPipe", boost::program_options::value(), "Optional log to named pipe")( - "delimiter", boost::program_options::value(), - "Optional delimiter character for delimited data formats - default is ',' (comma separated)")( - "lengthEncodedInput", - "Take input in length encoded binary format - default is delimited")( - "timefield", boost::program_options::value(), - "Optional name of the field containing the timestamp - default is 'time'")( - "timeformat", boost::program_options::value(), - "Optional format of the date in the time field in strptime code - default is the epoch time in seconds")( - "config", boost::program_options::value(), "Optional configuration file")( - "input", boost::program_options::value(), - "Optional file to read input from - not present means read from STDIN")( - "inputIsPipe", "Specified input file is a named pipe")( - "output", boost::program_options::value(), - "Optional file to write output to - not present means write to STDOUT")( - "outputIsPipe", "Specified output file is a named pipe")( - "verbose", "Output information about all detectors including those that have been discarded")( - "writeDetectorConfigs", "Output the detector configurations in JSON format"); + // clang-format off + desc.add_options() + ("help", "Display this information and exit") + ("version", "Display version information and exit") + ("logProperties", boost::program_options::value(), + "Optional logger properties file") + ("logPipe", boost::program_options::value(), + "Optional log to named pipe") + ("delimiter", boost::program_options::value(), + "Optional delimiter character for delimited data formats - default is ',' (comma separated)") + ("lengthEncodedInput", + "Take input in length encoded binary format - default is delimited") + ("timefield", boost::program_options::value(), + "Optional name of the field containing the timestamp - default is 'time'") + ("timeformat", boost::program_options::value(), + "Optional format of the date in the time field in strptime code - default is the epoch time in seconds") + ("config", boost::program_options::value(), + "Optional configuration file") + ("input", boost::program_options::value(), + "Optional file to read input from - not present means read from STDIN") + ("inputIsPipe", "Specified input file is a named pipe") + ("output", boost::program_options::value(), + "Optional file to write output to - not present means write to STDOUT") + ("outputIsPipe", "Specified output file is a named pipe") + ("verbose", "Output information about all detectors including those that have been discarded") + ("writeDetectorConfigs", + "Output the detector configurations in JSON format") + ; + // clang-format on boost::program_options::variables_map vm; boost::program_options::store( diff --git a/bin/autodetect/CCmdLineParser.cc b/bin/autodetect/CCmdLineParser.cc index 39a5c3c01e..2f48cbbeb7 100644 --- a/bin/autodetect/CCmdLineParser.cc +++ b/bin/autodetect/CCmdLineParser.cc @@ -57,67 +57,72 @@ bool CCmdLineParser::parse(int argc, TStrVec& clauseTokens) { try { boost::program_options::options_description desc(DESCRIPTION); - desc.add_options()("help", "Display this information and exit")( - "version", "Display version information and exit")( - "limitconfig", boost::program_options::value(), - "Optional limit config file")("modelconfig", - boost::program_options::value(), - "Optional model config file")( - "fieldconfig", boost::program_options::value(), - "Optional field config file")("modelplotconfig", - boost::program_options::value(), - "Optional model plot config file")( - "jobid", boost::program_options::value(), - "ID of the job this process is associated with")( - "logProperties", boost::program_options::value(), - "Optional logger properties file")( - "logPipe", boost::program_options::value(), "Optional log to named pipe")( - "bucketspan", boost::program_options::value(), - "Optional aggregation bucket span (in seconds) - default is 300")( - "latency", boost::program_options::value(), - "Optional maximum delay for out-of-order records (in seconds) - default is 0")( - "summarycountfield", boost::program_options::value(), - "Optional field to that contains counts for pre-summarized input - default is none")( - "delimiter", boost::program_options::value(), - "Optional delimiter character for delimited data formats - default is '\t' (tab separated)")( - "lengthEncodedInput", - "Take input in length encoded binary format - default is delimited")( - "timefield", boost::program_options::value(), - "Optional name of the field containing the timestamp - default is 'time'")( - "timeformat", boost::program_options::value(), - "Optional format of the date in the time field in strptime code - default is the epoch time in seconds")( - "quantilesState", boost::program_options::value(), - "Optional file to quantiles for normalization")( - "deleteStateFiles", - "If the 'quantilesState' option is used and this flag is set then delete the model state files once they have been read")( - "input", boost::program_options::value(), - "Optional file to read input from - not present means read from STDIN")( - "inputIsPipe", "Specified input file is a named pipe")( - "output", boost::program_options::value(), - "Optional file to write output to - not present means write to STDOUT")( - "outputIsPipe", "Specified output file is a named pipe")( - "restore", boost::program_options::value(), - "Optional file to restore state from - not present means no state restoration")( - "restoreIsPipe", "Specified restore file is a named pipe")( - "persist", boost::program_options::value(), - "Optional file to persist state to - not present means no state persistence")( - "persistIsPipe", "Specified persist file is a named pipe")( - "persistInterval", boost::program_options::value(), - "Optional interval at which to periodically persist model state - if not specified then models will only be persisted at " - "program exit")( - "maxQuantileInterval", boost::program_options::value(), - "Optional interval at which to periodically output quantiles if they have not been output due to an anomaly - " - "if not specified then quantiles will only be output following a big anomaly")( - "maxAnomalyRecords", boost::program_options::value(), - "The maximum number of records to be outputted for each bucket. Defaults to 100, a value 0 removes the limit.")( - "memoryUsage", "Log the model memory usage at the end of the job")( - "resultFinalizationWindow", boost::program_options::value(), - "The numer of half buckets to store before choosing which overlapping bucket has the biggest anomaly")( - "multivariateByFields", - "Optional flag to enable multi-variate analysis of correlated by fields")( - "multipleBucketspans", boost::program_options::value(), - "Optional comma-separated list of additional bucketspans - must be direct multiples of the main bucketspan")( - "perPartitionNormalization", "Optional flag to enable per partition normalization"); + // clang-format off + desc.add_options() + ("help", "Display this information and exit") + ("version", "Display version information and exit") + ("limitconfig", boost::program_options::value(), + "Optional limit config file") + ("modelconfig", boost::program_options::value(), + "Optional model config file") + ("fieldconfig", boost::program_options::value(), + "Optional field config file") + ("modelplotconfig", boost::program_options::value(), + "Optional model plot config file") + ("jobid", boost::program_options::value(), + "ID of the job this process is associated with") + ("logProperties", boost::program_options::value(), + "Optional logger properties file") + ("logPipe", boost::program_options::value(), + "Optional log to named pipe") + ("bucketspan", boost::program_options::value(), + "Optional aggregation bucket span (in seconds) - default is 300") + ("latency", boost::program_options::value(), + "Optional maximum delay for out-of-order records (in seconds) - default is 0") + ("summarycountfield", boost::program_options::value(), + "Optional field to that contains counts for pre-summarized input - default is none") + ("delimiter", boost::program_options::value(), + "Optional delimiter character for delimited data formats - default is '\t' (tab separated)") + ("lengthEncodedInput", + "Take input in length encoded binary format - default is delimited") + ("timefield", boost::program_options::value(), + "Optional name of the field containing the timestamp - default is 'time'") + ("timeformat", boost::program_options::value(), + "Optional format of the date in the time field in strptime code - default is the epoch time in seconds") + ("quantilesState", boost::program_options::value(), + "Optional file to quantiles for normalization") + ("deleteStateFiles", + "If the 'quantilesState' option is used and this flag is set then delete the model state files once they have been read") + ("input", boost::program_options::value(), + "Optional file to read input from - not present means read from STDIN") + ("inputIsPipe", "Specified input file is a named pipe") + ("output", boost::program_options::value(), + "Optional file to write output to - not present means write to STDOUT") + ("outputIsPipe", "Specified output file is a named pipe") + ("restore", boost::program_options::value(), + "Optional file to restore state from - not present means no state restoration") + ("restoreIsPipe", "Specified restore file is a named pipe") + ("persist", boost::program_options::value(), + "Optional file to persist state to - not present means no state persistence") + ("persistIsPipe", "Specified persist file is a named pipe") + ("persistInterval", boost::program_options::value(), + "Optional interval at which to periodically persist model state - if not specified then models will only be persisted at program exit") + ("maxQuantileInterval", boost::program_options::value(), + "Optional interval at which to periodically output quantiles if they have not been output due to an anomaly - if not specified then quantiles will only be output following a big anomaly") + ("maxAnomalyRecords", boost::program_options::value(), + "The maximum number of records to be outputted for each bucket. Defaults to 100, a value 0 removes the limit.") + ("memoryUsage", + "Log the model memory usage at the end of the job") + ("resultFinalizationWindow", boost::program_options::value(), + "The numer of half buckets to store before choosing which overlapping bucket has the biggest anomaly") + ("multivariateByFields", + "Optional flag to enable multi-variate analysis of correlated by fields") + ("multipleBucketspans", boost::program_options::value(), + "Optional comma-separated list of additional bucketspans - must be direct multiples of the main bucketspan") + ("perPartitionNormalization", + "Optional flag to enable per partition normalization") + ; + // clang-format on boost::program_options::variables_map vm; boost::program_options::parsed_options parsed = diff --git a/bin/categorize/CCmdLineParser.cc b/bin/categorize/CCmdLineParser.cc index dd993c4715..6ee05a7a5e 100644 --- a/bin/categorize/CCmdLineParser.cc +++ b/bin/categorize/CCmdLineParser.cc @@ -37,36 +37,40 @@ bool CCmdLineParser::parse(int argc, std::string& categorizationFieldName) { try { boost::program_options::options_description desc(DESCRIPTION); - desc.add_options()("help", "Display this information and exit")( - "version", "Display version information and exit")( - "limitconfig", boost::program_options::value(), - "Optional limit config file")( - "jobid", boost::program_options::value(), - "ID of the job this process is associated with")( - "logProperties", boost::program_options::value(), - "Optional logger properties file")( - "logPipe", boost::program_options::value(), "Optional log to named pipe")( - "delimiter", boost::program_options::value(), - "Optional delimiter character for delimited data formats - default is '\t' (tab separated)")( - "lengthEncodedInput", - "Take input in length encoded binary format - default is delimited")( - "input", boost::program_options::value(), - "Optional file to read input from - not present means read from STDIN")( - "inputIsPipe", "Specified input file is a named pipe")( - "output", boost::program_options::value(), - "Optional file to write output to - not present means write to STDOUT")( - "outputIsPipe", "Specified output file is a named pipe")( - "restore", boost::program_options::value(), - "Optional file to restore state from - not present means no state restoration")( - "restoreIsPipe", "Specified restore file is a named pipe")( - "persist", boost::program_options::value(), - "Optional file to persist state to - not present means no state persistence")( - "persistIsPipe", "Specified persist file is a named pipe")( - "persistInterval", boost::program_options::value(), - "Optional interval at which to periodically persist model state - if not specified then models will only be persisted at " - "program exit")("categorizationfield", - boost::program_options::value(), - "Field to compute mlcategory from"); + // clang-format off + desc.add_options() + ("help", "Display this information and exit") + ("version", "Display version information and exit") + ("limitconfig", boost::program_options::value(), + "Optional limit config file") + ("jobid", boost::program_options::value(), + "ID of the job this process is associated with") + ("logProperties", boost::program_options::value(), + "Optional logger properties file") + ("logPipe", boost::program_options::value(), + "Optional log to named pipe") + ("delimiter", boost::program_options::value(), + "Optional delimiter character for delimited data formats - default is '\t' (tab separated)") + ("lengthEncodedInput", + "Take input in length encoded binary format - default is delimited") + ("input", boost::program_options::value(), + "Optional file to read input from - not present means read from STDIN") + ("inputIsPipe", "Specified input file is a named pipe") + ("output", boost::program_options::value(), + "Optional file to write output to - not present means write to STDOUT") + ("outputIsPipe", "Specified output file is a named pipe") + ("restore", boost::program_options::value(), + "Optional file to restore state from - not present means no state restoration") + ("restoreIsPipe", "Specified restore file is a named pipe") + ("persist", boost::program_options::value(), + "Optional file to persist state to - not present means no state persistence") + ("persistIsPipe", "Specified persist file is a named pipe") + ("persistInterval", boost::program_options::value(), + "Optional interval at which to periodically persist model state - if not specified then models will only be persisted at program exit") + ("categorizationfield", boost::program_options::value(), + "Field to compute mlcategory from") + ; + // clang-format on boost::program_options::variables_map vm; boost::program_options::store( diff --git a/bin/controller/CCmdLineParser.cc b/bin/controller/CCmdLineParser.cc index 3420df8e79..9361598b22 100644 --- a/bin/controller/CCmdLineParser.cc +++ b/bin/controller/CCmdLineParser.cc @@ -24,14 +24,18 @@ bool CCmdLineParser::parse(int argc, std::string& commandPipe) { try { boost::program_options::options_description desc(DESCRIPTION); - desc.add_options()("help", "Display this information and exit")( - "version", "Display version information and exit")( - "jvmPid", boost::program_options::value(), - "Process ID of the JVM to communicate with - default is parent process PID")( - "logPipe", boost::program_options::value(), - "Named pipe to log to - default is controller_log_")( - "commandPipe", boost::program_options::value(), - "Named pipe to accept commands from - default is controller_command_"); + // clang-format off + desc.add_options() + ("help", "Display this information and exit") + ("version", "Display version information and exit") + ("jvmPid", boost::program_options::value(), + "Process ID of the JVM to communicate with - default is parent process PID") + ("logPipe", boost::program_options::value(), + "Named pipe to log to - default is controller_log_") + ("commandPipe", boost::program_options::value(), + "Named pipe to accept commands from - default is controller_command_") + ; + // clang-format on boost::program_options::variables_map vm; boost::program_options::store( diff --git a/bin/controller/unittest/CBlockingCallCancellerThreadTest.cc b/bin/controller/unittest/CBlockingCallCancellerThreadTest.cc index 1b98895156..ceb3f09201 100644 --- a/bin/controller/unittest/CBlockingCallCancellerThreadTest.cc +++ b/bin/controller/unittest/CBlockingCallCancellerThreadTest.cc @@ -64,7 +64,7 @@ void CBlockingCallCancellerThreadTest::testCancelBlock() { ml::core::CNamedPipeFactory::TIStreamP pipeStrm = ml::core::CNamedPipeFactory::openPipeStreamRead( ml::core::CNamedPipeFactory::defaultPath() + "test_pipe"); - CPPUNIT_ASSERT(pipeStrm == 0); + CPPUNIT_ASSERT(pipeStrm == nullptr); CPPUNIT_ASSERT(cancellerThread.stop()); diff --git a/bin/normalize/CCmdLineParser.cc b/bin/normalize/CCmdLineParser.cc index e3c28ba6df..4bb00e6ea7 100644 --- a/bin/normalize/CCmdLineParser.cc +++ b/bin/normalize/CCmdLineParser.cc @@ -34,28 +34,36 @@ bool CCmdLineParser::parse(int argc, bool& perPartitionNormalization) { try { boost::program_options::options_description desc(DESCRIPTION); - desc.add_options()("help", "Display this information and exit")( - "version", "Display version information and exit")( - "modelconfig", boost::program_options::value(), - "Optional model config file")("logProperties", - boost::program_options::value(), - "Optional logger properties file")( - "logPipe", boost::program_options::value(), "Optional log to named pipe")( - "bucketspan", boost::program_options::value(), - "Optional aggregation bucket span (in seconds) - default is 300")( - "lengthEncodedInput", "Take input in length encoded binary format - default is CSV")( - "input", boost::program_options::value(), - "Optional file to read input from - not present means read from STDIN")( - "inputIsPipe", "Specified input file is a named pipe")( - "output", boost::program_options::value(), - "Optional file to write output to - not present means write to STDOUT")( - "outputIsPipe", "Specified output file is a named pipe")( - "quantilesState", boost::program_options::value(), - "Optional file to initialization data for normalization (in JSON)")( - "deleteStateFiles", - "If this flag is set then delete the normalizer state files once they have been read")( - "writeCsv", "Write the results in CSV format (default is lineified JSON)")( - "perPartitionNormalization", "Optional flag to enable per partition normalization"); + // clang-format off + desc.add_options() + ("help", "Display this information and exit") + ("version", "Display version information and exit") + ("modelconfig", boost::program_options::value(), + "Optional model config file") + ("logProperties", boost::program_options::value(), + "Optional logger properties file") + ("logPipe", boost::program_options::value(), + "Optional log to named pipe") + ("bucketspan", boost::program_options::value(), + "Optional aggregation bucket span (in seconds) - default is 300") + ("lengthEncodedInput", + "Take input in length encoded binary format - default is CSV") + ("input", boost::program_options::value(), + "Optional file to read input from - not present means read from STDIN") + ("inputIsPipe", "Specified input file is a named pipe") + ("output", boost::program_options::value(), + "Optional file to write output to - not present means write to STDOUT") + ("outputIsPipe", "Specified output file is a named pipe") + ("quantilesState", boost::program_options::value(), + "Optional file to initialization data for normalization (in JSON)") + ("deleteStateFiles", + "If this flag is set then delete the normalizer state files once they have been read") + ("writeCsv", + "Write the results in CSV format (default is lineified JSON)") + ("perPartitionNormalization", + "Optional flag to enable per partition normalization") + ; + // clang-format on boost::program_options::variables_map vm; boost::program_options::store( diff --git a/devbin/unixtime_to_string/CCmdLineParser.cc b/devbin/unixtime_to_string/CCmdLineParser.cc index 3a96a40720..d5b9a4ef81 100644 --- a/devbin/unixtime_to_string/CCmdLineParser.cc +++ b/devbin/unixtime_to_string/CCmdLineParser.cc @@ -17,8 +17,10 @@ namespace syslogparsertester { const std::string CCmdLineParser::DESCRIPTION = "Usage: syslog_parser_tester [options]\n" "Development tool to verify format of syslog parsing config XML files\n" - "E.g. ./syslog_parser_tester --config syslog_parser.xml --syslogline 'ml1234.log: " - "Transport node error on node 0x9876 '\n" + "E.g. ./syslog_parser_tester --config syslog_parser.xml --syslogline " + "'ml1234.log: Transport node error on node 0x9876 " + " '\n" "Options:"; bool CCmdLineParser::parse(int argc, diff --git a/include/core/CCondition.h b/include/core/CCondition.h index 6d61a19aff..34186a97d7 100644 --- a/include/core/CCondition.h +++ b/include/core/CCondition.h @@ -63,7 +63,7 @@ class CORE_EXPORT CCondition : private CNonCopyable { //! Reference to associated mutex CMutex& m_Mutex; -//! The condition variable + //! The condition variable #ifdef Windows CONDITION_VARIABLE m_Condition; #else diff --git a/include/core/CMaskIterator.h b/include/core/CMaskIterator.h index dfc15cb8e2..74f546b0c2 100644 --- a/include/core/CMaskIterator.h +++ b/include/core/CMaskIterator.h @@ -30,14 +30,13 @@ namespace core { //! comparable if both the underlying container and underlying mask //! are the same, although the relevant comparison operators work for //! both const and non-const versions of the underlying iterator. +// clang-format off template -class CMaskIterator - : private boost::incrementable< - CMaskIterator, - boost::decrementable, - boost::addable2, - typename std::iterator_traits::difference_type, - boost::subtractable2, typename std::iterator_traits::difference_type>>>> { +class CMaskIterator : private boost::incrementable< CMaskIterator, + boost::decrementable< CMaskIterator, + boost::addable2< CMaskIterator, typename std::iterator_traits::difference_type, + boost::subtractable2< CMaskIterator, typename std::iterator_traits::difference_type > > > > { + // clang-format on public: using difference_type = typename std::iterator_traits::difference_type; using value_type = typename std::iterator_traits::value_type; diff --git a/include/core/CMemory.h b/include/core/CMemory.h index d25f7a2410..0fc0f92661 100644 --- a/include/core/CMemory.h +++ b/include/core/CMemory.h @@ -343,12 +343,12 @@ class CORE_EXPORT CMemory : private CNonInstantiatable { //! Overload for std::string. static std::size_t dynamicSize(const std::string& t) { std::size_t capacity = t.capacity(); -// The different STLs we use on various platforms all have different -// allocation strategies for strings -// These are hard-coded here, on the assumption that they will not -// change frequently - but checked by unittests that do runtime -// verification -// See http://linux/wiki/index.php/Technical_design_issues#std::string + // The different STLs we use on various platforms all have different + // allocation strategies for strings + // These are hard-coded here, on the assumption that they will not + // change frequently - but checked by unittests that do runtime + // verification + // See http://linux/wiki/index.php/Technical_design_issues#std::string #ifdef MacOSX if (capacity <= 22) { // For lengths up to 22 bytes there is no allocation diff --git a/include/core/COsFileFuncs.h b/include/core/COsFileFuncs.h index 013d53f446..c440b848ee 100644 --- a/include/core/COsFileFuncs.h +++ b/include/core/COsFileFuncs.h @@ -73,35 +73,35 @@ class CORE_EXPORT COsFileFuncs : private CNonInstantiatable { static const char* NULL_FILENAME; public: -//! Signed size type (to be used instead of ssize_t) + //! Signed size type (to be used instead of ssize_t) #ifdef Windows using TSignedSize = int; #else using TSignedSize = ssize_t; #endif -//! Offset type (to be used instead of off_t) + //! Offset type (to be used instead of off_t) #ifdef Windows using TOffset = __int64; #else using TOffset = off_t; #endif -//! Mode type (to be used instead of mode_t) + //! Mode type (to be used instead of mode_t) #ifdef Windows using TMode = int; #else using TMode = mode_t; #endif -//! Inode type (to be used instead of ino_t) + //! Inode type (to be used instead of ino_t) #ifdef Windows using TIno = uint64_t; #else using TIno = ino_t; #endif -//! Stat buffer struct (to be used instead of struct stat) + //! Stat buffer struct (to be used instead of struct stat) #ifdef Windows struct SStat { // Member names don't conform to the coding standards because they diff --git a/include/core/CProcess.h b/include/core/CProcess.h index 9d7a2ea3cb..ae405a3893 100644 --- a/include/core/CProcess.h +++ b/include/core/CProcess.h @@ -56,7 +56,7 @@ class CORE_EXPORT CProcess : private CNonCopyable { //! The shutdown function using TShutdownFunc = std::function; -//! Process ID type + //! Process ID type #ifdef Windows using TPid = DWORD; #else diff --git a/include/core/CThread.h b/include/core/CThread.h index d7ec1507c2..edfbbbb54c 100644 --- a/include/core/CThread.h +++ b/include/core/CThread.h @@ -31,7 +31,7 @@ namespace core { //! class CORE_EXPORT CThread : private CNonCopyable { public: -//! Thread ID type + //! Thread ID type #ifdef Windows using TThreadId = DWORD; using TThreadRet = unsigned int; diff --git a/include/maths/CBasicStatistics.h b/include/maths/CBasicStatistics.h index b02fa0e152..37271bc647 100644 --- a/include/maths/CBasicStatistics.h +++ b/include/maths/CBasicStatistics.h @@ -1072,12 +1072,11 @@ class MATHS_EXPORT CBasicStatistics { return result; } -//! Update the statistics with \p x. + //! Update the statistics with \p x. #if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ == 3) __attribute__((__noinline__)) #endif // defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ == 3) - bool - add(const T& x) { + bool add(const T& x) { if (m_UnusedCount > 0) { m_Statistics[--m_UnusedCount] = x; diff --git a/include/maths/CBootstrapClusterer.h b/include/maths/CBootstrapClusterer.h index a54420129d..4ae947cf04 100644 --- a/include/maths/CBootstrapClusterer.h +++ b/include/maths/CBootstrapClusterer.h @@ -173,12 +173,11 @@ class CBootstrapClusterer { } } -//! Initialize the priority queue of vertices to visit. + //! Initialize the priority queue of vertices to visit. #if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ == 3) __attribute__((__noinline__)) #endif // defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ == 3) - void - initializeQueue() { + void initializeQueue() { s_Queue.clear(); s_Queue.reserve(s_ToVisit.size()); for (std::size_t i = 0u; i < s_ToVisit.size(); ++i) { diff --git a/include/maths/CLinearAlgebra.h b/include/maths/CLinearAlgebra.h index e5a265049f..52da4ca065 100644 --- a/include/maths/CLinearAlgebra.h +++ b/include/maths/CLinearAlgebra.h @@ -229,17 +229,18 @@ struct SSymmetricMatrix { //! //! \tparam T The floating point type. //! \tparam N The matrix dimension. +// clang-format off template -class CSymmetricMatrixNxN - : private boost::equality_comparable< - CSymmetricMatrixNxN, - boost::partially_ordered< - CSymmetricMatrixNxN, - boost::addable, - boost::subtractable, - boost::multipliable, boost::multipliable2, T, boost::dividable2, T>>>>>>>, - private linear_algebra_detail::SSymmetricMatrix>, - private linear_algebra_detail::CBoundsCheck::InRange { +class CSymmetricMatrixNxN : private boost::equality_comparable< CSymmetricMatrixNxN, + boost::partially_ordered< CSymmetricMatrixNxN, + boost::addable< CSymmetricMatrixNxN, + boost::subtractable< CSymmetricMatrixNxN, + boost::multipliable< CSymmetricMatrixNxN, + boost::multipliable2< CSymmetricMatrixNxN, T, + boost::dividable2< CSymmetricMatrixNxN, T > > > > > > >, + private linear_algebra_detail::SSymmetricMatrix >, + private linear_algebra_detail::CBoundsCheck::InRange { + // clang-format on private: using TBase = linear_algebra_detail::SSymmetricMatrix>; template @@ -494,15 +495,17 @@ struct SZero> { //! premium. //! //! \tparam T The floating point type. +// clang-format off template -class CSymmetricMatrix - : private boost::equality_comparable< - CSymmetricMatrix, - boost::partially_ordered< - CSymmetricMatrix, - boost::addable, - boost::subtractable, boost::multipliable, boost::multipliable2, T, boost::dividable2, T>>>>>>>, - private linear_algebra_detail::SSymmetricMatrix> { +class CSymmetricMatrix : private boost::equality_comparable< CSymmetricMatrix, + boost::partially_ordered< CSymmetricMatrix, + boost::addable< CSymmetricMatrix, + boost::subtractable< CSymmetricMatrix, + boost::multipliable< CSymmetricMatrix, + boost::multipliable2< CSymmetricMatrix, T, + boost::dividable2< CSymmetricMatrix, T > > > > > > >, + private linear_algebra_detail::SSymmetricMatrix > { + // clang-format on private: using TBase = linear_algebra_detail::SSymmetricMatrix>; template @@ -905,16 +908,19 @@ struct SVector { //! //! \tparam T The floating point type. //! \tparam N The vector dimension. +// clang-format off template -class CVectorNx1 - : private boost::equality_comparable< - CVectorNx1, - boost::partially_ordered< - CVectorNx1, - boost::addable, - boost::subtractable, boost::multipliable, boost::multipliable2, T, boost::dividable, boost::dividable2, T>>>>>>>>, - private linear_algebra_detail::SVector>, - private linear_algebra_detail::CBoundsCheck::InRange { +class CVectorNx1 : private boost::equality_comparable< CVectorNx1, + boost::partially_ordered< CVectorNx1, + boost::addable< CVectorNx1, + boost::subtractable< CVectorNx1, + boost::multipliable< CVectorNx1, + boost::multipliable2< CVectorNx1, T, + boost::dividable< CVectorNx1, + boost::dividable2< CVectorNx1, T > > > > > > > >, + private linear_algebra_detail::SVector >, + private linear_algebra_detail::CBoundsCheck::InRange { + // clang-format on private: using TBase = linear_algebra_detail::SVector>; template @@ -1200,14 +1206,18 @@ struct SZero> { //! so that one can use float when space is really at a premium. //! //! \tparam T The floating point type. +// clang-format off template -class CVector - : private boost::equality_comparable< - CVector, - boost::partially_ordered< - CVector, - boost::addable, boost::subtractable, boost::multipliable, boost::multipliable2, T, boost::dividable, boost::dividable2, T>>>>>>>>, - private linear_algebra_detail::SVector> { +class CVector : private boost::equality_comparable< CVector, + boost::partially_ordered< CVector, + boost::addable< CVector, + boost::subtractable< CVector, + boost::multipliable< CVector, + boost::multipliable2< CVector, T, + boost::dividable< CVector, + boost::dividable2< CVector, T > > > > > > > >, + private linear_algebra_detail::SVector > { + // clang-format on private: using TBase = linear_algebra_detail::SVector>; template diff --git a/include/maths/COrderings.h b/include/maths/COrderings.h index df5ba0d26a..eae1edccb9 100644 --- a/include/maths/COrderings.h +++ b/include/maths/COrderings.h @@ -614,21 +614,21 @@ class COrderings : private core::CNonInstantiatable { }; public: -// The logic in this function is rather subtle because we want to -// sort the collections in place. In particular, we create a sorted -// collection of indices where each index tells us where to get the -// element from at that location and we want to re-order all the -// collections by that ordering in place. If an index matches its -// position then we can move to the next position. Otherwise, we -// need to swap the items at the index in to its position. To work -// in place we need to do something with the items which are displaced. -// If these are the items required at the swapped in position then -// we are done. Otherwise, we just repeat until we find this position. -// It is easy to verify that this process finds a closed cycle with -// at most N steps. Each time a swap is made at least one more item -// is in its correct place, and we update the ordering accordingly. -// So the containers are sorted in at most O(N) additional steps to -// the N * log(N) taken to sort the indices. + // The logic in this function is rather subtle because we want to + // sort the collections in place. In particular, we create a sorted + // collection of indices where each index tells us where to get the + // element from at that location and we want to re-order all the + // collections by that ordering in place. If an index matches its + // position then we can move to the next position. Otherwise, we + // need to swap the items at the index in to its position. To work + // in place we need to do something with the items which are displaced. + // If these are the items required at the swapped in position then + // we are done. Otherwise, we just repeat until we find this position. + // It is easy to verify that this process finds a closed cycle with + // at most N steps. Each time a swap is made at least one more item + // is in its correct place, and we update the ordering accordingly. + // So the containers are sorted in at most O(N) additional steps to + // the N * log(N) taken to sort the indices. #define SIMULTANEOUS_SORT_IMPL \ if (boost::algorithm::is_sorted(keys.begin(), keys.end(), comp)) { \ return true; \ diff --git a/include/maths/CPeriodicityHypothesisTests.h b/include/maths/CPeriodicityHypothesisTests.h index dcc0ea5106..fe63c58a31 100644 --- a/include/maths/CPeriodicityHypothesisTests.h +++ b/include/maths/CPeriodicityHypothesisTests.h @@ -27,8 +27,10 @@ class CSeasonalTime; //! \brief Represents the result of running the periodicity //! hypothesis tests. -class MATHS_EXPORT CPeriodicityHypothesisTestsResult - : boost::equality_comparable> { +// clang-format off +class MATHS_EXPORT CPeriodicityHypothesisTestsResult : boost::equality_comparable > { + // clang-format on public: using TTimeTimePr = std::pair; diff --git a/include/maths/CSampling.h b/include/maths/CSampling.h index cd372c7cf4..3f15dea61a 100644 --- a/include/maths/CSampling.h +++ b/include/maths/CSampling.h @@ -123,10 +123,10 @@ class MATHS_EXPORT CSampling : private core::CNonInstantiatable { //! Reinitialize the random number generator. static void seed(); -//! \name Uniform Sampling -//! -//! Sample uniformly from a specified range -//@{ + //! \name Uniform Sampling + //! + //! Sample uniformly from a specified range + //@{ #define UNIFORM_SAMPLE(TYPE) \ static TYPE uniformSample(TYPE a, TYPE b); \ static TYPE uniformSample(CPRNG::CXorOShiro128Plus& rng, TYPE a, TYPE b); \ From 8665f500273ac11595254369d21290b93ecbc8c9 Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Fri, 13 Apr 2018 23:39:50 +0100 Subject: [PATCH 18/29] More fixing of the format merge --- lib/core/CMonotonicTime.cc | 8 +- lib/maths/CBjkstUniqueValues.cc | 9 +- lib/maths/CPeriodicityHypothesisTests.cc | 192 +++++++++++++---------- lib/model/unittest/CStringStoreTest.cc | 2 +- 4 files changed, 118 insertions(+), 93 deletions(-) diff --git a/lib/core/CMonotonicTime.cc b/lib/core/CMonotonicTime.cc index 45c170ca5f..29dbff9ae3 100644 --- a/lib/core/CMonotonicTime.cc +++ b/lib/core/CMonotonicTime.cc @@ -22,8 +22,8 @@ uint64_t CMonotonicTime::milliseconds() const { int rc(-1); -// For milliseconds, use the coarse timers if available, as millisecond -// granularity is good enough + // For milliseconds, use the coarse timers if available, as millisecond + // granularity is good enough #if defined(CLOCK_MONOTONIC_COARSE) rc = ::clock_gettime(CLOCK_MONOTONIC_COARSE, &ts); #elif defined(CLOCK_MONOTONIC) @@ -54,8 +54,8 @@ uint64_t CMonotonicTime::nanoseconds() const { int rc(-1); -// Don't use the coarse timers here, as they only provide around millisecond -// granularity + // Don't use the coarse timers here, as they only provide around millisecond + // granularity #if defined(CLOCK_MONOTONIC) rc = ::clock_gettime(CLOCK_MONOTONIC, &ts); #else diff --git a/lib/maths/CBjkstUniqueValues.cc b/lib/maths/CBjkstUniqueValues.cc index f310acca5b..19d8e50ab2 100644 --- a/lib/maths/CBjkstUniqueValues.cc +++ b/lib/maths/CBjkstUniqueValues.cc @@ -50,9 +50,12 @@ using TUInt8UInt8Pr = std::pair; //! |<-----8 bits---->|<-----8 bits---->|<-----8 bits---->| //! |(g(x) >> 8) % 256| g(x) % 256 | zeros(x) | //! \endcode -class CHashIterator - : public std::iterator, - private boost::less_than_comparable>> { +// clang-format off +class CHashIterator : public std::iterator, + private boost::less_than_comparable> > { + // clang-format on public: //! The STL that comes with g++ requires a default constructor - this //! will create an object that's suitable only to be assigned to, which diff --git a/lib/maths/CPeriodicityHypothesisTests.cc b/lib/maths/CPeriodicityHypothesisTests.cc index 8e85e1327e..d180cfa9cd 100644 --- a/lib/maths/CPeriodicityHypothesisTests.cc +++ b/lib/maths/CPeriodicityHypothesisTests.cc @@ -658,42 +658,44 @@ void CPeriodicityHypothesisTests::hypothesesForWeekly( hypotheses.resize(1); if (DAY % m_Period == 0) { - hypotheses[0] - .null(testForNull) - .addNested(testForPeriod) - .addNested(testForDaily) - .addNested(testForDailyWithWeekend) - .addNested(testForWeeklyGivenWeekend) - .finishedNested() - .addAlternative(testForWeekly) - .finishedNested() - .finishedNested() - .addAlternative(testForDaily) - .addNested(testForDailyWithWeekend) - .addNested(testForWeeklyGivenWeekend) - .finishedNested() - .addAlternative(testForWeekly) - .finishedNested() - .addAlternative(testForDailyWithWeekend) - .addNested(testForWeeklyGivenWeekend) - .finishedNested() - .addAlternative(testForWeekly); + // clang-format off + hypotheses[0].null(testForNull) + .addNested(testForPeriod) + .addNested(testForDaily) + .addNested(testForDailyWithWeekend) + .addNested(testForWeeklyGivenWeekend) + .finishedNested() + .addAlternative(testForWeekly) + .finishedNested() + .finishedNested() + .addAlternative(testForDaily) + .addNested(testForDailyWithWeekend) + .addNested(testForWeeklyGivenWeekend) + .finishedNested() + .addAlternative(testForWeekly) + .finishedNested() + .addAlternative(testForDailyWithWeekend) + .addNested(testForWeeklyGivenWeekend) + .finishedNested() + .addAlternative(testForWeekly); + // clang-format on } else { - hypotheses[0] - .null(testForNull) - .addNested(testForDaily) - .addNested(testForDailyWithWeekend) - .addNested(testForWeeklyGivenWeekend) - .finishedNested() - .addAlternative(testForWeekly) - .finishedNested() - .addAlternative(testForDailyWithWeekend) - .addNested(testForWeeklyGivenWeekend) - .finishedNested() - .addAlternative(testForPeriod) - .addNested(testForWeekly) - .finishedNested() - .addAlternative(testForWeekly); + // clang-format off + hypotheses[0].null(testForNull) + .addNested(testForDaily) + .addNested(testForDailyWithWeekend) + .addNested(testForWeeklyGivenWeekend) + .finishedNested() + .addAlternative(testForWeekly) + .finishedNested() + .addAlternative(testForDailyWithWeekend) + .addNested(testForWeeklyGivenWeekend) + .finishedNested() + .addAlternative(testForPeriod) + .addNested(testForWeekly) + .finishedNested() + .addAlternative(testForWeekly); + // clang-format on } } else if (m_Period % WEEK == 0) { auto testForNull = boost::bind(&CPeriodicityHypothesisTests::testForNull, @@ -717,27 +719,28 @@ void CPeriodicityHypothesisTests::hypothesesForWeekly( boost::cref(bucketsForTestingPeriod), _1); hypotheses.resize(1); - hypotheses[0] - .null(testForNull) - .addNested(testForDaily) - .addNested(testForDailyWithWeekend) - .addNested(testForWeeklyGivenWeekend) - .addNested(testForPeriod) - .finishedNested() - .finishedNested() - .addAlternative(testForWeekly) - .addNested(testForPeriod) - .finishedNested() - .finishedNested() - .addAlternative(testForDailyWithWeekend) - .addNested(testForWeeklyGivenWeekend) - .addNested(testForPeriod) - .finishedNested() - .finishedNested() - .addAlternative(testForWeekly) - .addNested(testForPeriod) - .finishedNested() - .addAlternative(testForPeriod); + // clang-format off + hypotheses[0].null(testForNull) + .addNested(testForDaily) + .addNested(testForDailyWithWeekend) + .addNested(testForWeeklyGivenWeekend) + .addNested(testForPeriod) + .finishedNested() + .finishedNested() + .addAlternative(testForWeekly) + .addNested(testForPeriod) + .finishedNested() + .finishedNested() + .addAlternative(testForDailyWithWeekend) + .addNested(testForWeeklyGivenWeekend) + .addNested(testForPeriod) + .finishedNested() + .finishedNested() + .addAlternative(testForWeekly) + .addNested(testForPeriod) + .finishedNested() + .addAlternative(testForPeriod); + // clang-format on } else { { auto testForNull = boost::bind(&CPeriodicityHypothesisTests::testForNull, @@ -759,18 +762,19 @@ void CPeriodicityHypothesisTests::hypothesesForWeekly( boost::cref(bucketsForTestingWeekly), _1); hypotheses.resize(2); - hypotheses[0] - .null(testForNull) - .addNested(testForDaily) - .addNested(testForDailyWithWeekend) - .addNested(testForWeeklyGivenWeekend) - .finishedNested() - .addAlternative(testForWeekly) - .finishedNested() - .addAlternative(testForDailyWithWeekend) - .addNested(testForWeeklyGivenWeekend) - .finishedNested() - .addAlternative(testForWeekly); + // clang-format off + hypotheses[0].null(testForNull) + .addNested(testForDaily) + .addNested(testForDailyWithWeekend) + .addNested(testForWeeklyGivenWeekend) + .finishedNested() + .addAlternative(testForWeekly) + .finishedNested() + .addAlternative(testForDailyWithWeekend) + .addNested(testForWeeklyGivenWeekend) + .finishedNested() + .addAlternative(testForWeekly); + // clang-format on } if (m_Period % DAY == 0) { auto testForNull = boost::bind(&CPeriodicityHypothesisTests::testForNull, @@ -784,12 +788,13 @@ void CPeriodicityHypothesisTests::hypothesesForWeekly( boost::cref(windowForTestingPeriod), boost::cref(bucketsForTestingPeriod), _1); - hypotheses[1] - .null(testForNull) - .addNested(testForDaily) - .addNested(testForPeriod) - .finishedNested() - .addAlternative(testForPeriod); + // clang-format off + hypotheses[1].null(testForNull) + .addNested(testForDaily) + .addNested(testForPeriod) + .finishedNested() + .addAlternative(testForPeriod); + // clang-format on } else { auto testForNull = boost::bind(&CPeriodicityHypothesisTests::testForNull, this, boost::cref(windowForTestingPeriod), @@ -799,7 +804,10 @@ void CPeriodicityHypothesisTests::hypothesesForWeekly( boost::cref(windowForTestingPeriod), boost::cref(bucketsForTestingPeriod), _1); - hypotheses[1].null(testForNull).addNested(testForPeriod); + // clang-format off + hypotheses[1].null(testForNull) + .addNested(testForPeriod); + // clang-format on } } } @@ -822,12 +830,13 @@ void CPeriodicityHypothesisTests::hypothesesForDaily( boost::cref(bucketsForTestingDaily), _1); hypotheses.resize(1); - hypotheses[0] - .null(testForNull) - .addNested(testForPeriod) - .addNested(testForDaily) - .finishedNested() - .addAlternative(testForDaily); + // clang-format off + hypotheses[0].null(testForNull) + .addNested(testForPeriod) + .addNested(testForDaily) + .finishedNested() + .addAlternative(testForDaily); + // clang-format on } else if (m_Period % DAY == 0) { auto testForNull = boost::bind(&CPeriodicityHypothesisTests::testForNull, this, boost::cref(windowForTestingPeriod), @@ -840,7 +849,11 @@ void CPeriodicityHypothesisTests::hypothesesForDaily( boost::cref(bucketsForTestingPeriod), _1); hypotheses.resize(1); - hypotheses[0].null(testForNull).addNested(testForDaily).addNested(testForPeriod); + // clang-format off + hypotheses[0].null(testForNull) + .addNested(testForDaily) + .addNested(testForPeriod); + // clang-format on } else { { auto testForNull = boost::bind(&CPeriodicityHypothesisTests::testForNull, @@ -851,7 +864,10 @@ void CPeriodicityHypothesisTests::hypothesesForDaily( boost::cref(bucketsForTestingDaily), _1); hypotheses.resize(2); - hypotheses[0].null(testForNull).addNested(testForDaily); + // clang-format off + hypotheses[0].null(testForNull) + .addNested(testForDaily); + // clang-format on } { auto testForNull = boost::bind(&CPeriodicityHypothesisTests::testForNull, @@ -861,7 +877,10 @@ void CPeriodicityHypothesisTests::hypothesesForDaily( boost::bind(&CPeriodicityHypothesisTests::testForPeriod, this, boost::cref(windowForTestingPeriod), boost::cref(bucketsForTestingPeriod), _1); - hypotheses[1].null(testForNull).addNested(testForPeriod); + // clang-format off + hypotheses[1].null(testForNull) + .addNested(testForPeriod); + // clang-format on } } } @@ -875,7 +894,10 @@ void CPeriodicityHypothesisTests::hypothesesForPeriod(const TTimeTimePr2Vec& win boost::cref(windows), boost::cref(buckets), _1); hypotheses.resize(1); - hypotheses[0].null(testForNull).addNested(testForPeriod); + // clang-format off + hypotheses[0].null(testForNull) + .addNested(testForPeriod); + // clang-format on } CPeriodicityHypothesisTestsResult diff --git a/lib/model/unittest/CStringStoreTest.cc b/lib/model/unittest/CStringStoreTest.cc index d2763301e0..e8d39cce60 100644 --- a/lib/model/unittest/CStringStoreTest.cc +++ b/lib/model/unittest/CStringStoreTest.cc @@ -39,7 +39,7 @@ class CStringThread : public core::CThread { void propagateLastThreadAssert() { if (m_LastException) { - throw * m_LastException; + throw *m_LastException; } } From 9a6e471c6337cc9619341ea203e4589eb17f42f2 Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Fri, 13 Apr 2018 23:48:24 +0100 Subject: [PATCH 19/29] Switch to std shared pointers --- include/core/CContainerPrinter.h | 2 +- include/maths/CNaiveBayes.h | 5 +++-- include/maths/CTimeSeriesChangeDetector.h | 15 ++++++++------- include/maths/CTimeSeriesModel.h | 4 ++-- lib/maths/CTimeSeriesChangeDetector.cc | 19 +++++++++---------- lib/maths/CTimeSeriesDecompositionDetail.cc | 6 +++--- lib/maths/CTimeSeriesModel.cc | 8 ++++---- 7 files changed, 30 insertions(+), 29 deletions(-) diff --git a/include/core/CContainerPrinter.h b/include/core/CContainerPrinter.h index fb8331c59a..9b533bd346 100644 --- a/include/core/CContainerPrinter.h +++ b/include/core/CContainerPrinter.h @@ -274,7 +274,7 @@ class CORE_EXPORT CContainerPrinter : private CNonInstantiatable { return *value; } - //! Print a boost::shared_pointer. + //! Print a std::shared_pointer. template static std::string printElement(const std::shared_ptr& value) { if (value == std::shared_ptr()) { diff --git a/include/maths/CNaiveBayes.h b/include/maths/CNaiveBayes.h index fbef5ccbbb..c5a7cfb5a0 100644 --- a/include/maths/CNaiveBayes.h +++ b/include/maths/CNaiveBayes.h @@ -15,6 +15,7 @@ #include #include +#include #include #include @@ -128,7 +129,7 @@ class MATHS_EXPORT CNaiveBayesFeatureDensityFromPrior final : public CNaiveBayes virtual std::string print() const; private: - using TPriorPtr = boost::shared_ptr; + using TPriorPtr = std::shared_ptr; private: //! The density model. @@ -221,7 +222,7 @@ class MATHS_EXPORT CNaiveBayes { std::string print() const; private: - using TFeatureDensityPtr = boost::shared_ptr; + using TFeatureDensityPtr = std::shared_ptr; using TFeatureDensityPtrVec = std::vector; //! \brief The data associated with a class. diff --git a/include/maths/CTimeSeriesChangeDetector.h b/include/maths/CTimeSeriesChangeDetector.h index fcb7ac4344..68c59bc0c5 100644 --- a/include/maths/CTimeSeriesChangeDetector.h +++ b/include/maths/CTimeSeriesChangeDetector.h @@ -18,7 +18,8 @@ #include #include -#include + +#include namespace ml { namespace core { @@ -39,7 +40,7 @@ class CUnivariateChangeModel; //! \brief A description of a time series change. struct MATHS_EXPORT SChangeDescription { using TDouble2Vec = core::CSmallVector; - using TPriorPtr = boost::shared_ptr; + using TPriorPtr = std::shared_ptr; //! The types of change we can detect. enum EDescription { E_LevelShift, E_LinearScale, E_TimeShift }; @@ -69,8 +70,8 @@ class MATHS_EXPORT CUnivariateTimeSeriesChangeDetector { using TTimeDoublePr = std::pair; using TTimeDoublePr1Vec = core::CSmallVector; using TWeightStyleVec = maths_t::TWeightStyleVec; - using TDecompositionPtr = boost::shared_ptr; - using TPriorPtr = boost::shared_ptr; + using TDecompositionPtr = std::shared_ptr; + using TPriorPtr = std::shared_ptr; using TOptionalChangeDescription = boost::optional; public: @@ -117,7 +118,7 @@ class MATHS_EXPORT CUnivariateTimeSeriesChangeDetector { private: using TChangeModel = time_series_change_detector_detail::CUnivariateChangeModel; - using TChangeModelPtr = boost::shared_ptr; + using TChangeModelPtr = std::shared_ptr; using TChangeModelPtr5Vec = core::CSmallVector; using TMinMaxAccumulator = CBasicStatistics::CMinMax; @@ -157,8 +158,8 @@ class MATHS_EXPORT CUnivariateChangeModel : private core::CNonCopyable { using TTimeDoublePr = std::pair; using TTimeDoublePr1Vec = core::CSmallVector; using TWeightStyleVec = maths_t::TWeightStyleVec; - using TDecompositionPtr = boost::shared_ptr; - using TPriorPtr = boost::shared_ptr; + using TDecompositionPtr = std::shared_ptr; + using TPriorPtr = std::shared_ptr; using TOptionalChangeDescription = boost::optional; public: diff --git a/include/maths/CTimeSeriesModel.h b/include/maths/CTimeSeriesModel.h index 9529f85444..4f80bbbc30 100644 --- a/include/maths/CTimeSeriesModel.h +++ b/include/maths/CTimeSeriesModel.h @@ -50,7 +50,7 @@ class MATHS_EXPORT CUnivariateTimeSeriesModel : public CModel { using TDouble4Vec = core::CSmallVector; using TTimeDoublePr = std::pair; using TTimeDoublePrCBuf = boost::circular_buffer; - using TDecompositionPtr = boost::shared_ptr; + using TDecompositionPtr = std::shared_ptr; using TDecayRateController2Ary = boost::array; public: @@ -221,7 +221,7 @@ class MATHS_EXPORT CUnivariateTimeSeriesModel : public CModel { using TMultivariatePriorCPtrSizePr1Vec = core::CSmallVector; using TModelCPtr1Vec = core::CSmallVector; - using TChangeDetectorPtr = boost::shared_ptr; + using TChangeDetectorPtr = std::shared_ptr; private: CUnivariateTimeSeriesModel(const CUnivariateTimeSeriesModel& other, diff --git a/lib/maths/CTimeSeriesChangeDetector.cc b/lib/maths/CTimeSeriesChangeDetector.cc index b9a28e39c6..e5415b823b 100644 --- a/lib/maths/CTimeSeriesChangeDetector.cc +++ b/lib/maths/CTimeSeriesChangeDetector.cc @@ -26,7 +26,6 @@ #include #include -#include #include #include #include @@ -90,16 +89,16 @@ CUnivariateTimeSeriesChangeDetector::CUnivariateTimeSeriesChangeDetector( : m_MinimumTimeToDetect{minimumTimeToDetect}, m_MaximumTimeToDetect{maximumTimeToDetect}, m_MinimumDeltaBicToDetect{minimumDeltaBicToDetect}, m_SampleCount{0}, m_CurrentEvidenceOfChange{0.0}, m_ChangeModels{ - boost::make_shared(trendModel, residualModel), - boost::make_shared(trendModel, residualModel), - boost::make_shared(trendModel, - residualModel, - -core::constants::HOUR), - boost::make_shared(trendModel, - residualModel, - +core::constants::HOUR)} { + std::make_shared(trendModel, residualModel), + std::make_shared(trendModel, residualModel), + std::make_shared(trendModel, + residualModel, + -core::constants::HOUR), + std::make_shared(trendModel, + residualModel, + +core::constants::HOUR)} { if (trendModel->seasonalComponents().size() > 0) { - m_ChangeModels.push_back(boost::make_shared( + m_ChangeModels.push_back(std::make_shared( trendModel, residualModel)); } } diff --git a/lib/maths/CTimeSeriesDecompositionDetail.cc b/lib/maths/CTimeSeriesDecompositionDetail.cc index c4b536de50..f77669a945 100644 --- a/lib/maths/CTimeSeriesDecompositionDetail.cc +++ b/lib/maths/CTimeSeriesDecompositionDetail.cc @@ -739,7 +739,7 @@ CTimeSeriesDecompositionDetail::CCalendarTest::CCalendarTest(const CCalendarTest bool isForForecast) : m_Machine{other.m_Machine}, m_DecayRate{other.m_DecayRate}, m_LastMonth{other.m_LastMonth}, m_Test{!isForForecast && other.m_Test - ? boost::make_shared( + ? std::make_shared( *other.m_Test) : 0} { } @@ -753,7 +753,7 @@ bool CTimeSeriesDecompositionDetail::CCalendarTest::acceptRestoreTraverser(core: RESTORE_BUILT_IN(LAST_MONTH_6_3_TAG, m_LastMonth); RESTORE_SETUP_TEARDOWN( CALENDAR_TEST_6_3_TAG, - m_Test = boost::make_shared(m_DecayRate), + m_Test = std::make_shared(m_DecayRate), traverser.traverseSubLevel(boost::bind( &CCalendarCyclicTest::acceptRestoreTraverser, m_Test.get(), _1)), /**/) @@ -893,7 +893,7 @@ void CTimeSeriesDecompositionDetail::CCalendarTest::apply(std::size_t symbol, switch (state) { case CC_TEST: if (!m_Test) { - m_Test = boost::make_shared(m_DecayRate); + m_Test = std::make_shared(m_DecayRate); m_LastMonth = this->month(time) + 2; } break; diff --git a/lib/maths/CTimeSeriesModel.cc b/lib/maths/CTimeSeriesModel.cc index d0a6c13738..0006a5d5ad 100644 --- a/lib/maths/CTimeSeriesModel.cc +++ b/lib/maths/CTimeSeriesModel.cc @@ -57,7 +57,7 @@ using TSizeDoublePr10Vec = core::CSmallVector; using TTail10Vec = core::CSmallVector; using TOptionalSize = boost::optional; using TMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; -using TChangeDetectorPtr = boost::shared_ptr; +using TChangeDetectorPtr = std::shared_ptr; using TMultivariatePriorCPtrSizePr1Vec = CTimeSeriesCorrelations::TMultivariatePriorCPtrSizePr1Vec; //! The decay rate controllers we maintain. @@ -269,7 +269,7 @@ double tailWinsorisationWeight(const CMultivariatePrior& prior, condition[j++] = std::make_pair(i, value[i]); } } - boost::shared_ptr conditional( + std::shared_ptr conditional( prior.univariate(NOTHING_TO_MARGINALIZE, condition).first); return tailWinsorisationWeight(*conditional, derate, scale, value[dimension]); } @@ -1230,7 +1230,7 @@ bool CUnivariateTimeSeriesModel::acceptRestoreTraverser(const SModelRestoreParam RESTORE_BUILT_IN(CURRENT_CHANGE_INTERVAL_6_3_TAG, m_CurrentChangeInterval) RESTORE_SETUP_TEARDOWN( CHANGE_DETECTOR_6_3_TAG, - m_ChangeDetector = boost::make_shared( + m_ChangeDetector = std::make_shared( m_TrendModel, m_ResidualModel), traverser.traverseSubLevel(boost::bind( &CUnivariateTimeSeriesChangeDetector::acceptRestoreTraverser, @@ -1386,7 +1386,7 @@ CUnivariateTimeSeriesModel::testAndApplyChange(const CModelAddSamplesParams& par pValueFromTailWinsorisationWeight(weight) <= 1e-5) { m_CurrentChangeInterval += this->params().bucketLength(); if (this->params().testForChange(m_CurrentChangeInterval)) { - m_ChangeDetector = boost::make_shared( + m_ChangeDetector = std::make_shared( m_TrendModel, m_ResidualModel, minimumTimeToDetect, maximumTimeToTest); m_CurrentChangeInterval = 0; } From 3fb0911c21c3a036ebcabca238ea5b02734c079f Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Mon, 16 Apr 2018 12:38:54 +0100 Subject: [PATCH 20/29] Fix fallout from merge --- lib/maths/unittest/CNaiveBayesTest.cc | 44 ++++++++--------- .../unittest/CTimeSeriesChangeDetectorTest.cc | 47 +++++++++---------- 2 files changed, 45 insertions(+), 46 deletions(-) diff --git a/lib/maths/unittest/CNaiveBayesTest.cc b/lib/maths/unittest/CNaiveBayesTest.cc index 0786eec880..df14117fb0 100644 --- a/lib/maths/unittest/CNaiveBayesTest.cc +++ b/lib/maths/unittest/CNaiveBayesTest.cc @@ -35,9 +35,9 @@ using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumula using TMeanVarAccumulator = maths::CBasicStatistics::SSampleMeanVar::TAccumulator; void CNaiveBayesTest::testClassification() { - LOG_DEBUG("+---------------------------------------+"); - LOG_DEBUG("| CNaiveBayesTest::testClassification |"); - LOG_DEBUG("+---------------------------------------+"); + LOG_DEBUG(<< "+---------------------------------------+"); + LOG_DEBUG(<< "| CNaiveBayesTest::testClassification |"); + LOG_DEBUG(<< "+---------------------------------------+"); // We'll test classification using Gaussian naive Bayes. We // test: @@ -129,8 +129,8 @@ void CNaiveBayesTest::testClassification() { double p2_{p[0].second == 1 ? p[1].first : p[0].first}; if (i % 10 == 0) { - LOG_DEBUG(i << ") expected P(1) = " << p1 << ", P(2) = " << p2 - << " got P(1) = " << p1_ << ", P(2) = " << p2_); + LOG_DEBUG(<< i << ") expected P(1) = " << p1 << ", P(2) = " << p2 + << " got P(1) = " << p1_ << ", P(2) = " << p2_); } CPPUNIT_ASSERT_EQUAL(std::size_t(2), p.size()); @@ -166,7 +166,7 @@ void CNaiveBayesTest::testClassification() { } for (std::size_t i = 0u; i < 3; ++i) { - LOG_DEBUG("Mean relative error = " + LOG_DEBUG(<< "Mean relative error = " << maths::CBasicStatistics::mean(meanErrors[i])); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanErrors[i]) < 0.05); meanMeanError += meanErrors[i]; @@ -175,9 +175,9 @@ void CNaiveBayesTest::testClassification() { } void CNaiveBayesTest::testPropagationByTime() { - LOG_DEBUG("+------------------------------------------+"); - LOG_DEBUG("| CNaiveBayesTest::testPropagationByTime |"); - LOG_DEBUG("+------------------------------------------+"); + LOG_DEBUG(<< "+------------------------------------------+"); + LOG_DEBUG(<< "| CNaiveBayesTest::testPropagationByTime |"); + LOG_DEBUG(<< "+------------------------------------------+"); // Make feature distributions drift over time and verify that // the classifier adapts. @@ -215,9 +215,9 @@ void CNaiveBayesTest::testPropagationByTime() { TDoubleSizePrVec probabilities[]{ nb[0].highestClassProbabilities(2, {{-10.0}, {-10.0}}), nb[1].highestClassProbabilities(2, {{-10.0}, {-10.0}})}; - LOG_DEBUG("Aged class probabilities = " + LOG_DEBUG(<< "Aged class probabilities = " << core::CContainerPrinter::print(probabilities[0])); - LOG_DEBUG("Class probabilities = " + LOG_DEBUG(<< "Class probabilities = " << core::CContainerPrinter::print(probabilities[1])); CPPUNIT_ASSERT_EQUAL(std::size_t(2), probabilities[0][0].second); CPPUNIT_ASSERT(probabilities[0][0].first > 0.99); @@ -228,9 +228,9 @@ void CNaiveBayesTest::testPropagationByTime() { TDoubleSizePrVec probabilities[]{ nb[0].highestClassProbabilities(2, {{10.0}, {10.0}}), nb[1].highestClassProbabilities(2, {{10.0}, {10.0}})}; - LOG_DEBUG("Aged class probabilities = " + LOG_DEBUG(<< "Aged class probabilities = " << core::CContainerPrinter::print(probabilities[0])); - LOG_DEBUG("Class probabilities = " + LOG_DEBUG(<< "Class probabilities = " << core::CContainerPrinter::print(probabilities[1])); CPPUNIT_ASSERT_EQUAL(std::size_t(1), probabilities[0][0].second); CPPUNIT_ASSERT(probabilities[0][0].first > 0.99); @@ -240,9 +240,9 @@ void CNaiveBayesTest::testPropagationByTime() { } void CNaiveBayesTest::testMemoryUsage() { - LOG_DEBUG("+------------------------------------+"); - LOG_DEBUG("| CNaiveBayesTest::testMemoryUsage |"); - LOG_DEBUG("+------------------------------------+"); + LOG_DEBUG(<< "+------------------------------------+"); + LOG_DEBUG(<< "| CNaiveBayesTest::testMemoryUsage |"); + LOG_DEBUG(<< "+------------------------------------+"); // Check invariants. @@ -275,18 +275,18 @@ void CNaiveBayesTest::testMemoryUsage() { TMemoryUsagePtr mem{new core::CMemoryUsage}; nb->debugMemoryUsage(mem.get()); - LOG_DEBUG("Memory = " << memoryUsage); + LOG_DEBUG(<< "Memory = " << memoryUsage); CPPUNIT_ASSERT_EQUAL(memoryUsage, mem->usage()); - LOG_DEBUG("Memory = " << core::CMemory::dynamicSize(nb)); + LOG_DEBUG(<< "Memory = " << core::CMemory::dynamicSize(nb)); CPPUNIT_ASSERT_EQUAL(memoryUsage + sizeof(maths::CNaiveBayes), core::CMemory::dynamicSize(nb)); } void CNaiveBayesTest::testPersist() { - LOG_DEBUG("+--------------------------------+"); - LOG_DEBUG("| CNaiveBayesTest::testPersist |"); - LOG_DEBUG("+--------------------------------+"); + LOG_DEBUG(<< "+--------------------------------+"); + LOG_DEBUG(<< "| CNaiveBayesTest::testPersist |"); + LOG_DEBUG(<< "+--------------------------------+"); test::CRandomNumbers rng; @@ -316,7 +316,7 @@ void CNaiveBayesTest::testPersist() { inserter.toXml(origXml); } - LOG_DEBUG("Naive Bayes XML representation:\n" << origXml); + LOG_DEBUG(<< "Naive Bayes XML representation:\n" << origXml); core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); diff --git a/lib/maths/unittest/CTimeSeriesChangeDetectorTest.cc b/lib/maths/unittest/CTimeSeriesChangeDetectorTest.cc index ce88c4d4ee..3b167a441d 100644 --- a/lib/maths/unittest/CTimeSeriesChangeDetectorTest.cc +++ b/lib/maths/unittest/CTimeSeriesChangeDetectorTest.cc @@ -27,9 +27,8 @@ #include "TestUtils.h" -#include - #include +#include #include using namespace ml; @@ -40,8 +39,8 @@ using TDoubleVec = std::vector; using TDouble2Vec = core::CSmallVector; using TTimeDoublePr = std::pair; using TTimeDoublePrCBuf = boost::circular_buffer; -using TDecompositionPtr = boost::shared_ptr; -using TPriorPtr = boost::shared_ptr; +using TDecompositionPtr = std::shared_ptr; +using TPriorPtr = std::shared_ptr; using TPriorPtrVec = std::vector; core_t::TTime BUCKET_LENGTH{1800}; @@ -83,9 +82,9 @@ TPriorPtr makeResidualModel() { } void CTimeSeriesChangeDetectorTest::testNoChange() { - LOG_DEBUG("+-----------------------------------------------+"); - LOG_DEBUG("| CTimeSeriesChangeDetectorTest::testNoChange |"); - LOG_DEBUG("+-----------------------------------------------+"); + LOG_DEBUG(<< "+-----------------------------------------------+"); + LOG_DEBUG(<< "| CTimeSeriesChangeDetectorTest::testNoChange |"); + LOG_DEBUG(<< "+-----------------------------------------------+"); test::CRandomNumbers rng; @@ -95,7 +94,7 @@ void CTimeSeriesChangeDetectorTest::testNoChange() { TDoubleVec samples; for (std::size_t t = 0u; t < 100; ++t) { if (t % 10 == 0) { - LOG_DEBUG(t << "%"); + LOG_DEBUG(<< t << "%"); } switch (t % 3) { @@ -149,9 +148,9 @@ void CTimeSeriesChangeDetectorTest::testNoChange() { } void CTimeSeriesChangeDetectorTest::testLevelShift() { - LOG_DEBUG("+-------------------------------------------------+"); - LOG_DEBUG("| CTimeSeriesChangeDetectorTest::testLevelShift |"); - LOG_DEBUG("+-------------------------------------------------+"); + LOG_DEBUG(<< "+-------------------------------------------------+"); + LOG_DEBUG(<< "| CTimeSeriesChangeDetectorTest::testLevelShift |"); + LOG_DEBUG(<< "+-------------------------------------------------+"); TGeneratorVec trends{constant, ramp, smoothDaily, weekends, spikeyDaily}; @@ -161,9 +160,9 @@ void CTimeSeriesChangeDetectorTest::testLevelShift() { } void CTimeSeriesChangeDetectorTest::testLinearScale() { - LOG_DEBUG("+--------------------------------------------------+"); - LOG_DEBUG("| CTimeSeriesChangeDetectorTest::testLinearScale |"); - LOG_DEBUG("+--------------------------------------------------+"); + LOG_DEBUG(<< "+--------------------------------------------------+"); + LOG_DEBUG(<< "| CTimeSeriesChangeDetectorTest::testLinearScale |"); + LOG_DEBUG(<< "+--------------------------------------------------+"); TGeneratorVec trends{smoothDaily, spikeyDaily}; @@ -173,9 +172,9 @@ void CTimeSeriesChangeDetectorTest::testLinearScale() { } void CTimeSeriesChangeDetectorTest::testTimeShift() { - LOG_DEBUG("+------------------------------------------------+"); - LOG_DEBUG("| CTimeSeriesChangeDetectorTest::testTimeShift |"); - LOG_DEBUG("+------------------------------------------------+"); + LOG_DEBUG(<< "+------------------------------------------------+"); + LOG_DEBUG(<< "| CTimeSeriesChangeDetectorTest::testTimeShift |"); + LOG_DEBUG(<< "+------------------------------------------------+"); TGeneratorVec trends{smoothDaily, spikeyDaily}; @@ -193,9 +192,9 @@ void CTimeSeriesChangeDetectorTest::testTimeShift() { } void CTimeSeriesChangeDetectorTest::testPersist() { - LOG_DEBUG("+----------------------------------------------+"); - LOG_DEBUG("| CTimeSeriesChangeDetectorTest::testPersist |"); - LOG_DEBUG("+----------------------------------------------+"); + LOG_DEBUG(<< "+----------------------------------------------+"); + LOG_DEBUG(<< "| CTimeSeriesChangeDetectorTest::testPersist |"); + LOG_DEBUG(<< "+----------------------------------------------+"); test::CRandomNumbers rng; @@ -249,8 +248,8 @@ void CTimeSeriesChangeDetectorTest::testPersist() { &maths::CUnivariateTimeSeriesChangeDetector::acceptRestoreTraverser, &restoredDetector, boost::cref(params), _1)); - LOG_DEBUG("expected " << origDetector.checksum() << " got " - << restoredDetector.checksum()); + LOG_DEBUG(<< "expected " << origDetector.checksum() << " got " + << restoredDetector.checksum()); CPPUNIT_ASSERT_EQUAL(origDetector.checksum(), restoredDetector.checksum()); } } @@ -292,7 +291,7 @@ void CTimeSeriesChangeDetectorTest::testChange(const TGeneratorVec& trends, TDoubleVec samples; for (std::size_t t = 0u; t < 100; ++t) { if (t % 10 == 0) { - LOG_DEBUG(t << "%"); + LOG_DEBUG(<< t << "%"); } rng.generateNormalSamples(0.0, 1.0, 1000, samples); @@ -348,7 +347,7 @@ void CTimeSeriesChangeDetectorTest::testChange(const TGeneratorVec& trends, meanBucketsToDetect.add(static_cast(*bucketsToDetect)); } - LOG_DEBUG("buckets to detect = " << maths::CBasicStatistics::mean(meanBucketsToDetect)); + LOG_DEBUG(<< "buckets to detect = " << maths::CBasicStatistics::mean(meanBucketsToDetect)); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanBucketsToDetect) < expectedMeanBucketsToDetectChange); } From 5a250d12b5bdcabb8c740578496b1feb393d7657 Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Mon, 23 Apr 2018 18:12:51 +0100 Subject: [PATCH 21/29] Merge master --- bin/autoconfig/Main.cc | 9 +- bin/autodetect/Main.cc | 44 +- bin/categorize/Main.cc | 45 +- bin/normalize/Main.cc | 23 +- dev-tools/clang-format.sh | 23 +- include/maths/CBasicStatistics.h | 3 - include/maths/CBootstrapClusterer.h | 3 - include/maths/CConstantPrior.h | 41 +- include/maths/CGammaRateConjugate.h | 58 +-- include/maths/CLogNormalMeanPrecConjugate.h | 58 +-- include/maths/CModel.h | 78 +-- include/maths/CMultimodalPrior.h | 60 +-- include/maths/CMultimodalPriorUtils.h | 236 ++++----- include/maths/CMultinomialConjugate.h | 54 +- include/maths/CMultivariateConstantPrior.h | 18 +- include/maths/CMultivariateMultimodalPrior.h | 183 +++---- include/maths/CMultivariateNormalConjugate.h | 79 ++- include/maths/CMultivariateOneOfNPrior.h | 24 +- include/maths/CMultivariatePrior.h | 51 +- include/maths/CNormalMeanPrecConjugate.h | 54 +- include/maths/COneOfNPrior.h | 57 +-- include/maths/CPoissonMeanConjugate.h | 57 +-- include/maths/CPrior.h | 94 ++-- include/maths/CPriorDetail.h | 10 +- include/maths/CTimeSeriesChangeDetector.h | 34 +- include/maths/CTimeSeriesDecomposition.h | 6 +- .../maths/CTimeSeriesDecompositionDetail.h | 7 +- .../maths/CTimeSeriesDecompositionInterface.h | 10 +- include/maths/CTimeSeriesDecompositionStub.h | 3 +- include/maths/CTimeSeriesModel.h | 59 +-- include/maths/Constants.h | 34 -- include/maths/MathsTypes.h | 318 +++++++++--- include/model/CAnomalyDetectorModel.h | 4 - include/model/CIndividualModel.h | 2 +- lib/api/dump_state/Main.cc | 8 +- lib/api/unittest/CAnomalyJobTest.cc | 4 - .../unittest/CDetectionRulesJsonParserTest.cc | 40 -- lib/api/unittest/CMultiFileDataAdderTest.cc | 8 +- .../unittest/CSingleStreamDataAdderTest.cc | 8 +- .../unittest/CAutoconfigurerParamsTest.cc | 10 - lib/config/unittest/CDataSemanticsTest.cc | 25 - .../unittest/CDataSummaryStatisticsTest.cc | 25 - .../unittest/CDetectorEnumeratorTest.cc | 5 - lib/config/unittest/CReportWriterTest.cc | 9 - lib/core/unittest/CJsonLogLayoutTest.cc | 1 - lib/core/unittest/CMemoryUsageTest.cc | 4 - lib/core/unittest/CStatisticsTest.cc | 4 - lib/core/unittest/CVectorRangeTest.cc | 12 - lib/maths/CConstantPrior.cc | 58 +-- lib/maths/CGammaRateConjugate.cc | 208 ++++---- lib/maths/CKMeansOnline1d.cc | 16 +- lib/maths/CLogNormalMeanPrecConjugate.cc | 361 ++++++-------- lib/maths/CModel.cc | 56 +-- lib/maths/CMultimodalPrior.cc | 209 ++++---- lib/maths/CMultinomialConjugate.cc | 75 ++- lib/maths/CMultivariateConstantPrior.cc | 21 +- lib/maths/CMultivariateMultimodalPrior.cc | 63 ++- lib/maths/CMultivariateOneOfNPrior.cc | 70 ++- lib/maths/CMultivariatePrior.cc | 66 ++- lib/maths/CNaiveBayes.cc | 11 +- lib/maths/CNormalMeanPrecConjugate.cc | 224 ++++----- lib/maths/COneOfNPrior.cc | 106 ++-- lib/maths/CPoissonMeanConjugate.cc | 134 +++-- lib/maths/CPrior.cc | 70 ++- lib/maths/CTimeSeriesChangeDetector.cc | 107 ++-- lib/maths/CTimeSeriesDecomposition.cc | 4 +- lib/maths/CTimeSeriesDecompositionDetail.cc | 55 ++- lib/maths/CTimeSeriesDecompositionStub.cc | 3 +- lib/maths/CTimeSeriesModel.cc | 238 ++++----- lib/maths/CTools.cc | 1 + lib/maths/CTrendComponent.cc | 3 +- lib/maths/CXMeansOnline1d.cc | 51 +- lib/maths/Constants.cc | 8 - lib/maths/MathsTypes.cc | 408 +++------------ .../unittest/CAgglomerativeClustererTest.cc | 16 - lib/maths/unittest/CAssignmentTest.cc | 4 - lib/maths/unittest/CBasicStatisticsTest.cc | 32 -- lib/maths/unittest/CBjkstUniqueValuesTest.cc | 24 - lib/maths/unittest/CBootstrapClustererTest.cc | 28 -- lib/maths/unittest/CBoundingBoxTest.cc | 12 +- ...CCalendarComponentAdaptiveBucketingTest.cc | 32 -- lib/maths/unittest/CCalendarFeatureTest.cc | 16 - lib/maths/unittest/CCategoricalToolsTest.cc | 18 - lib/maths/unittest/CChecksumTest.cc | 28 -- lib/maths/unittest/CClustererTest.cc | 4 - lib/maths/unittest/CCountMinSketchTest.cc | 12 - .../unittest/CDecayRateControllerTest.cc | 12 - lib/maths/unittest/CEntropySketchTest.cc | 4 - lib/maths/unittest/CEqualWithToleranceTest.cc | 12 - lib/maths/unittest/CForecastTest.cc | 55 +-- lib/maths/unittest/CGammaRateConjugateTest.cc | 225 +++------ lib/maths/unittest/CGramSchmidtTest.cc | 16 - .../unittest/CInformationCriteriaTest.cc | 16 - lib/maths/unittest/CIntegerToolsTest.cc | 16 - lib/maths/unittest/CIntegrationTest.cc | 16 - lib/maths/unittest/CKMeansFastTest.cc | 28 -- lib/maths/unittest/CKMeansOnlineTest.cc | 36 -- lib/maths/unittest/CKMostCorrelatedTest.cc | 40 -- lib/maths/unittest/CKdTreeTest.cc | 8 - .../unittest/CLassoLogisticRegressionTest.cc | 7 - lib/maths/unittest/CLinearAlgebraTest.cc | 44 -- .../CLogNormalMeanPrecConjugateTest.cc | 224 +++------ lib/maths/unittest/CLogTDistributionTest.cc | 16 - lib/maths/unittest/CMathsMemoryTest.cc | 21 +- .../unittest/CMixtureDistributionTest.cc | 20 - lib/maths/unittest/CModelTest.cc | 49 +- lib/maths/unittest/CMultimodalPriorTest.cc | 169 ++----- .../unittest/CMultinomialConjugateTest.cc | 138 +----- .../CMultivariateConstantPriorTest.cc | 139 ++---- .../CMultivariateMultimodalPriorTest.cc | 201 +++----- .../CMultivariateNormalConjugateTest.cc | 185 +++---- .../unittest/CMultivariateOneOfNPriorTest.cc | 200 +++----- lib/maths/unittest/CNaiveBayesTest.cc | 23 +- .../unittest/CNaturalBreaksClassifierTest.cc | 16 - .../unittest/CNormalMeanPrecConjugateTest.cc | 241 +++------ lib/maths/unittest/COneOfNPriorTest.cc | 129 ++--- lib/maths/unittest/COrderingsTest.cc | 47 +- lib/maths/unittest/COrdinalTest.cc | 25 - lib/maths/unittest/CPRNGTest.cc | 12 - lib/maths/unittest/CPackedBitVectorTest.cc | 28 -- .../CPeriodicityHypothesisTestsTest.cc | 23 +- .../unittest/CPoissonMeanConjugateTest.cc | 130 ++--- lib/maths/unittest/CPriorTest.cc | 19 +- .../unittest/CProbabilityAggregatorsTest.cc | 16 - .../unittest/CProbabilityCalibratorTest.cc | 4 - lib/maths/unittest/CQDigestTest.cc | 24 - lib/maths/unittest/CQuantileSketchTest.cc | 32 -- .../unittest/CRadialBasisFunctionTest.cc | 16 - .../CRandomProjectionClustererTest.cc | 23 - lib/maths/unittest/CRegressionTest.cc | 60 --- lib/maths/unittest/CSamplingTest.cc | 8 - ...CSeasonalComponentAdaptiveBucketingTest.cc | 48 -- lib/maths/unittest/CSeasonalComponentTest.cc | 24 - lib/maths/unittest/CSetToolsTest.cc | 16 - lib/maths/unittest/CSignalTest.cc | 24 - lib/maths/unittest/CSolversTest.cc | 12 - lib/maths/unittest/CSplineTest.cc | 28 -- lib/maths/unittest/CStatisticalTestsTest.cc | 8 - .../unittest/CTimeSeriesChangeDetectorTest.cc | 40 +- .../unittest/CTimeSeriesDecompositionTest.cc | 104 +--- lib/maths/unittest/CTimeSeriesModelTest.cc | 466 +++++++----------- lib/maths/unittest/CToolsTest.cc | 30 +- lib/maths/unittest/CTrendComponentTest.cc | 16 - lib/maths/unittest/CTrendTestsTest.cc | 12 - lib/maths/unittest/CXMeansOnline1dTest.cc | 44 -- lib/maths/unittest/CXMeansOnlineTest.cc | 32 -- lib/maths/unittest/CXMeansTest.cc | 28 -- lib/maths/unittest/TestUtils.cc | 53 +- lib/maths/unittest/TestUtils.h | 37 +- lib/model/CEventRateModel.cc | 56 +-- lib/model/CEventRatePopulationModel.cc | 53 +- lib/model/CIndividualModel.cc | 4 +- lib/model/CMetricModel.cc | 90 ++-- lib/model/CMetricPopulationModel.cc | 65 ++- lib/model/CModelDetailsView.cc | 15 +- lib/model/CModelTools.cc | 3 +- .../CProbabilityAndInfluenceCalculator.cc | 60 +-- lib/model/CRuleCondition.cc | 6 +- lib/model/ModelTypes.cc | 33 +- .../CAnnotatedProbabilityBuilderTest.cc | 16 +- lib/model/unittest/CCountingModelTest.cc | 4 - lib/model/unittest/CDetectionRuleTest.cc | 26 - lib/model/unittest/CDetectorEqualizerTest.cc | 6 - .../unittest/CDynamicStringIdRegistryTest.cc | 4 - .../unittest/CEventRateDataGathererTest.cc | 13 - lib/model/unittest/CEventRateModelTest.cc | 63 +-- .../CEventRatePopulationDataGathererTest.cc | 14 - .../unittest/CEventRatePopulationModelTest.cc | 55 +-- .../unittest/CHierarchicalResultsTest.cc | 22 - .../unittest/CMemoryUsageEstimatorTest.cc | 6 - lib/model/unittest/CMetricDataGathererTest.cc | 18 - lib/model/unittest/CMetricModelTest.cc | 110 ++--- .../CMetricPopulationDataGathererTest.cc | 22 - .../unittest/CMetricPopulationModelTest.cc | 55 +-- lib/model/unittest/CModelDetailsViewTest.cc | 2 - lib/model/unittest/CModelToolsTest.cc | 22 +- .../CProbabilityAndInfluenceCalculatorTest.cc | 437 +++++++--------- lib/model/unittest/CToolsTest.cc | 2 - lib/test/CTestTimer.cc | 8 +- lib/ver/CBuildInfo.cc.dev_template | 31 +- lib/ver/CBuildInfo.cc.tagged_template | 31 +- 181 files changed, 3172 insertions(+), 6829 deletions(-) diff --git a/bin/autoconfig/Main.cc b/bin/autoconfig/Main.cc index d6d1e8eec2..1672576de0 100644 --- a/bin/autoconfig/Main.cc +++ b/bin/autoconfig/Main.cc @@ -81,13 +81,12 @@ int main(int argc, char** argv) { return EXIT_FAILURE; } - using TInputParserCUPtr = const std::unique_ptr; - TInputParserCUPtr inputParser{[lengthEncodedInput, &ioMgr, - delimiter]() -> ml::api::CInputParser* { + using TInputParserUPtr = std::unique_ptr; + const TInputParserUPtr inputParser{[lengthEncodedInput, &ioMgr, delimiter]() -> TInputParserUPtr { if (lengthEncodedInput) { - return new ml::api::CLengthEncodedInputParser(ioMgr.inputStream()); + return std::make_unique(ioMgr.inputStream()); } - return new ml::api::CCsvInputParser(ioMgr.inputStream(), delimiter); + return std::make_unique(ioMgr.inputStream(), delimiter); }()}; // This manages the full parameterization of the autoconfigurer. diff --git a/bin/autodetect/Main.cc b/bin/autodetect/Main.cc index 5d90b93977..eab9f3223b 100644 --- a/bin/autodetect/Main.cc +++ b/bin/autodetect/Main.cc @@ -162,9 +162,8 @@ int main(int argc, char** argv) { return EXIT_FAILURE; } - using TDataSearcherCUPtr = const std::unique_ptr; - TDataSearcherCUPtr restoreSearcher{[isRestoreFileNamedPipe, - &ioMgr]() -> ml::core::CDataSearcher* { + using TDataSearcherUPtr = std::unique_ptr; + const TDataSearcherUPtr restoreSearcher{[isRestoreFileNamedPipe, &ioMgr]() -> TDataSearcherUPtr { if (ioMgr.restoreStream()) { // Check whether state is restored from a file, if so we assume that this is a debugging case // and therefore does not originate from X-Pack. @@ -173,44 +172,41 @@ int main(int argc, char** argv) { auto strm = std::make_shared(); strm->push(ml::api::CStateRestoreStreamFilter()); strm->push(*ioMgr.restoreStream()); - return new ml::api::CSingleStreamSearcher(strm); + return std::make_unique(strm); } - return new ml::api::CSingleStreamSearcher(ioMgr.restoreStream()); + return std::make_unique(ioMgr.restoreStream()); } return nullptr; }()}; - using TDataAdderCUPtr = const std::unique_ptr; - TDataAdderCUPtr persister{[&ioMgr]() -> ml::core::CDataAdder* { + using TDataAdderUPtr = std::unique_ptr; + const TDataAdderUPtr persister{[&ioMgr]() -> TDataAdderUPtr { if (ioMgr.persistStream()) { - return new ml::api::CSingleStreamDataAdder(ioMgr.persistStream()); + return std::make_unique(ioMgr.persistStream()); } return nullptr; }()}; if (persistInterval >= 0 && persister == nullptr) { - LOG_FATAL(<< "Periodic persistence cannot be enabled using the " - "'persistInterval' argument " - "unless a place to persist to has been specified " - "using the 'persist' argument"); + LOG_FATAL(<< "Periodic persistence cannot be enabled using the 'persistInterval' argument " + "unless a place to persist to has been specified using the 'persist' argument"); return EXIT_FAILURE; } - using TBackgroundPersisterCUPtr = const std::unique_ptr; - TBackgroundPersisterCUPtr periodicPersister{ - [persistInterval, &persister]() -> ml::api::CBackgroundPersister* { - if (persistInterval >= 0) { - return new ml::api::CBackgroundPersister(persistInterval, *persister); - } - return nullptr; - }()}; + using TBackgroundPersisterUPtr = std::unique_ptr; + const TBackgroundPersisterUPtr periodicPersister{[persistInterval, &persister]() -> TBackgroundPersisterUPtr { + if (persistInterval >= 0) { + return std::make_unique(persistInterval, *persister); + } + return nullptr; + }()}; - using InputParserCUPtr = const std::unique_ptr; - InputParserCUPtr inputParser{[lengthEncodedInput, &ioMgr, delimiter]() -> ml::api::CInputParser* { + using InputParserCUPtr = std::unique_ptr; + const InputParserCUPtr inputParser{[lengthEncodedInput, &ioMgr, delimiter]() -> InputParserCUPtr { if (lengthEncodedInput) { - return new ml::api::CLengthEncodedInputParser(ioMgr.inputStream()); + return std::make_unique(ioMgr.inputStream()); } - return new ml::api::CCsvInputParser(ioMgr.inputStream(), delimiter); + return std::make_unique(ioMgr.inputStream(), delimiter); }()}; ml::core::CJsonOutputStreamWrapper wrappedOutputStream(ioMgr.outputStream()); diff --git a/bin/categorize/Main.cc b/bin/categorize/Main.cc index d8d281853b..e20b63668b 100644 --- a/bin/categorize/Main.cc +++ b/bin/categorize/Main.cc @@ -113,9 +113,8 @@ int main(int argc, char** argv) { } ml::api::CFieldConfig fieldConfig(categorizationFieldName); - using TDataSearcherCUPtr = const std::unique_ptr; - TDataSearcherCUPtr restoreSearcher{[isRestoreFileNamedPipe, - &ioMgr]() -> ml::core::CDataSearcher* { + using TDataSearcherUPtr = std::unique_ptr; + const TDataSearcherUPtr restoreSearcher{[isRestoreFileNamedPipe, &ioMgr]() -> TDataSearcherUPtr { if (ioMgr.restoreStream()) { // Check whether state is restored from a file, if so we assume that this is a debugging case // and therefore does not originate from X-Pack. @@ -124,44 +123,40 @@ int main(int argc, char** argv) { auto strm = std::make_shared(); strm->push(ml::api::CStateRestoreStreamFilter()); strm->push(*ioMgr.restoreStream()); - return new ml::api::CSingleStreamSearcher(strm); + return std::make_unique(strm); } - return new ml::api::CSingleStreamSearcher(ioMgr.restoreStream()); + return std::make_unique(ioMgr.restoreStream()); } return nullptr; }()}; - using TDataAdderCUPtr = const std::unique_ptr; - TDataAdderCUPtr persister{[&ioMgr]() -> ml::core::CDataAdder* { + using TDataAdderUPtr = std::unique_ptr; + const TDataAdderUPtr persister{[&ioMgr]() -> TDataAdderUPtr { if (ioMgr.persistStream()) { - return new ml::api::CSingleStreamDataAdder(ioMgr.persistStream()); + return std::make_unique(ioMgr.persistStream()); } return nullptr; }()}; if (persistInterval >= 0 && persister == nullptr) { - LOG_FATAL(<< "Periodic persistence cannot be enabled using the " - "'persistInterval' argument " - "unless a place to persist to has been specified " - "using the 'persist' argument"); + LOG_FATAL(<< "Periodic persistence cannot be enabled using the 'persistInterval' argument " + "unless a place to persist to has been specified using the 'persist' argument"); return EXIT_FAILURE; } - using TBackgroundPersisterCUPtr = const std::unique_ptr; - TBackgroundPersisterCUPtr periodicPersister{ - [persistInterval, &persister]() -> ml::api::CBackgroundPersister* { - if (persistInterval >= 0) { - return new ml::api::CBackgroundPersister(persistInterval, *persister); - } - return nullptr; - }()}; + using TBackgroundPersisterUPtr = std::unique_ptr; + const TBackgroundPersisterUPtr periodicPersister{[persistInterval, &persister]() -> TBackgroundPersisterUPtr { + if (persistInterval >= 0) { + return std::make_unique(persistInterval, *persister); + } + return nullptr; + }()}; - using TInputParserCUPtr = const std::unique_ptr; - TInputParserCUPtr inputParser{[lengthEncodedInput, &ioMgr, - delimiter]() -> ml::api::CInputParser* { + using TInputParserUPtr = std::unique_ptr; + const TInputParserUPtr inputParser{[lengthEncodedInput, &ioMgr, delimiter]() -> TInputParserUPtr { if (lengthEncodedInput) { - return new ml::api::CLengthEncodedInputParser(ioMgr.inputStream()); + return std::make_unique(ioMgr.inputStream()); } - return new ml::api::CCsvInputParser(ioMgr.inputStream(), delimiter); + return std::make_unique(ioMgr.inputStream(), delimiter); }()}; ml::core::CJsonOutputStreamWrapper wrappedOutputStream(ioMgr.outputStream()); diff --git a/bin/normalize/Main.cc b/bin/normalize/Main.cc index 7773cb97a0..bb62253b28 100644 --- a/bin/normalize/Main.cc +++ b/bin/normalize/Main.cc @@ -92,23 +92,24 @@ int main(int argc, char** argv) { modelConfig.perPartitionNormalization(perPartitionNormalization); // There's a choice of input and output formats for the numbers to be normalised - using TInputParserCUPtr = const std::unique_ptr; - TInputParserCUPtr inputParser{[lengthEncodedInput, &ioMgr]() -> ml::api::CInputParser* { + using TInputParserUPtr = std::unique_ptr; + const TInputParserUPtr inputParser{[lengthEncodedInput, &ioMgr]() -> TInputParserUPtr { if (lengthEncodedInput) { - return new ml::api::CLengthEncodedInputParser(ioMgr.inputStream()); + return std::make_unique(ioMgr.inputStream()); } - return new ml::api::CCsvInputParser(ioMgr.inputStream(), - ml::api::CCsvInputParser::COMMA); + return std::make_unique( + ioMgr.inputStream(), ml::api::CCsvInputParser::COMMA); }()}; - using TOutputHandlerCUPtr = const std::unique_ptr; - TOutputHandlerCUPtr outputWriter{[writeCsv, &ioMgr]() -> ml::api::COutputHandler* { + using TOutputHandlerUPtr = std::unique_ptr; + const TOutputHandlerUPtr outputWriter{[writeCsv, &ioMgr]() -> TOutputHandlerUPtr { if (writeCsv) { - return new ml::api::CCsvOutputWriter(ioMgr.outputStream()); + return std::make_unique(ioMgr.outputStream()); } - return new ml::api::CLineifiedJsonOutputWriter( - {ml::api::CResultNormalizer::PROBABILITY_NAME, - ml::api::CResultNormalizer::NORMALIZED_SCORE_NAME}, + return std::make_unique( + ml::api::CLineifiedJsonOutputWriter::TStrSet{ + ml::api::CResultNormalizer::PROBABILITY_NAME, + ml::api::CResultNormalizer::NORMALIZED_SCORE_NAME}, ioMgr.outputStream()); }()}; diff --git a/dev-tools/clang-format.sh b/dev-tools/clang-format.sh index 94b2e6bde2..6b28844a1a 100755 --- a/dev-tools/clang-format.sh +++ b/dev-tools/clang-format.sh @@ -8,25 +8,28 @@ # Reformats Ml native source code, using clang-format, to ensure consistency. # Ensure $CPP_SRC_HOME is set -if [ -z "$CPP_SRC_HOME" ] ; then - echo '$CPP_SRC_HOME is not set' - exit 1 -fi +CPP_SRC_HOME=${CPP_SRC_HOME:-`git rev-parse --show-toplevel`} # Ensure clang-format is available which clang-format > /dev/null 2>&1 - if [ $? != 0 ] ; then echo "ERROR: The clang-format code formatter is not available. Exiting." - exit 1; + exit 1 fi -CLANG_FORMAT_MAJOR_VERSION=5 -CLANG_FORMAT_VERSION=$(expr "`clang-format --version`" : ".* \(${CLANG_FORMAT_MAJOR_VERSION}.[0-9].[0-9]\) ") +REQUIRED_CLANG_FORMAT_VERSION=5.0.1 +FOUND_CLANG_FORMAT_VERSION=$(expr "`clang-format --version`" : ".* \([0-9].[0-9].[0-9]\)") -if [ -z ${CLANG_FORMAT_VERSION} ]; then - echo "ERROR: Require clang-format major version ${CLANG_FORMAT_MAJOR_VERSION}" +if [ -z "${FOUND_CLANG_FORMAT_VERSION}" ] ; then + echo "ERROR: Required clang-format major version ${REQUIRED_CLANG_FORMAT_VERSION} not found." + echo " Could not determine clang-format version." exit 2 fi +if [ "${REQUIRED_CLANG_FORMAT_VERSION}" != "${FOUND_CLANG_FORMAT_VERSION}" ] ; then + echo "ERROR: Required clang-format major version ${REQUIRED_CLANG_FORMAT_VERSION} not found." + echo " Detected clang-format version ${FOUND_CLANG_FORMAT_VERSION}" + exit 3 +fi + find $CPP_SRC_HOME \( -name 3rd_party -o -name build-setup \) -prune -o \( -name \*.cc -o -name \*.h \) -exec clang-format -i {} \; diff --git a/include/maths/CBasicStatistics.h b/include/maths/CBasicStatistics.h index 37271bc647..11d6ae0676 100644 --- a/include/maths/CBasicStatistics.h +++ b/include/maths/CBasicStatistics.h @@ -1073,9 +1073,6 @@ class MATHS_EXPORT CBasicStatistics { } //! Update the statistics with \p x. -#if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ == 3) - __attribute__((__noinline__)) -#endif // defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ == 3) bool add(const T& x) { if (m_UnusedCount > 0) { m_Statistics[--m_UnusedCount] = x; diff --git a/include/maths/CBootstrapClusterer.h b/include/maths/CBootstrapClusterer.h index 4ae947cf04..2c18cf1dfd 100644 --- a/include/maths/CBootstrapClusterer.h +++ b/include/maths/CBootstrapClusterer.h @@ -174,9 +174,6 @@ class CBootstrapClusterer { } //! Initialize the priority queue of vertices to visit. -#if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ == 3) - __attribute__((__noinline__)) -#endif // defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ == 3) void initializeQueue() { s_Queue.clear(); s_Queue.reserve(s_ToVisit.size()); diff --git a/include/maths/CConstantPrior.h b/include/maths/CConstantPrior.h index c8462aac2c..fc93615fdf 100644 --- a/include/maths/CConstantPrior.h +++ b/include/maths/CConstantPrior.h @@ -65,17 +65,14 @@ class MATHS_EXPORT CConstantPrior : public CPrior { virtual bool needsOffset() const; //! No-op. - virtual double adjustOffset(const TWeightStyleVec& weightStyle, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights); + virtual double adjustOffset(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights); //! Returns zero. virtual double offset() const; //! Set the constant if it hasn't been set. - virtual void addSamples(const TWeightStyleVec& weightStyle, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights); + virtual void addSamples(const TDouble1Vec& samples, const TDoubleWeightsAry1Vec& weights); //! No-op. virtual void propagateForwardsByTime(double time); @@ -87,27 +84,22 @@ class MATHS_EXPORT CConstantPrior : public CPrior { virtual double marginalLikelihoodMean() const; //! Returns constant or zero if unset (by equidistribution). - virtual double - marginalLikelihoodMode(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual double marginalLikelihoodMode(const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! All confidence intervals are the point [constant, constant]. - virtual TDoubleDoublePr marginalLikelihoodConfidenceInterval( - double percentage, - const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual TDoubleDoublePr + marginalLikelihoodConfidenceInterval(double percentage, + const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Get the variance of the marginal likelihood. virtual double - marginalLikelihoodVariance(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + marginalLikelihoodVariance(const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Returns a large value if all samples are equal to the constant //! and zero otherwise. virtual maths_t::EFloatingPointErrorStatus - jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + jointLogMarginalLikelihood(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& result) const; //! Get \p numberSamples times the constant. @@ -115,25 +107,22 @@ class MATHS_EXPORT CConstantPrior : public CPrior { //! A large number if any sample is less than the constant and //! zero otherwise. - virtual bool minusLogJointCdf(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + virtual bool minusLogJointCdf(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const; //! A large number if any sample is larger than the constant and //! zero otherwise. - virtual bool minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + virtual bool minusLogJointCdfComplement(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const; //! Returns one if all samples equal the constant and one otherwise. virtual bool probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound, maths_t::ETail& tail) const; diff --git a/include/maths/CGammaRateConjugate.h b/include/maths/CGammaRateConjugate.h index 31fc0e90cc..8f9125ec2b 100644 --- a/include/maths/CGammaRateConjugate.h +++ b/include/maths/CGammaRateConjugate.h @@ -13,6 +13,7 @@ #include #include #include +#include #include namespace ml { @@ -133,15 +134,11 @@ class MATHS_EXPORT CGammaRateConjugate : public CPrior { //! This samples the current marginal likelihood and uses these samples //! to reconstruct the prior with adjusted offset. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples The samples from which to determine the offset. //! \param[in] weights The weights of each sample in \p samples. //! \return The penalty to apply in model selection. - virtual double adjustOffset(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights); + virtual double adjustOffset(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights); //! Get the current offset. virtual double offset() const; @@ -149,14 +146,9 @@ class MATHS_EXPORT CGammaRateConjugate : public CPrior { //! Update the prior with a collection of independent samples from the //! gamma variable. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. - virtual void addSamples(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights); + virtual void addSamples(const TDouble1Vec& samples, const TDoubleWeightsAry1Vec& weights); //! Propagate the prior density function forwards by \p time. //! @@ -175,14 +167,11 @@ class MATHS_EXPORT CGammaRateConjugate : public CPrior { virtual double marginalLikelihoodMean() const; //! Get the mode of the marginal likelihood function. - virtual double - marginalLikelihoodMode(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual double marginalLikelihoodMode(const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Get the variance of the marginal likelihood. virtual double - marginalLikelihoodVariance(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + marginalLikelihoodVariance(const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Get the \p percentage symmetric confidence interval for the marginal //! likelihood function, i.e. the values \f$a\f$ and \f$b\f$ such that: @@ -194,29 +183,23 @@ class MATHS_EXPORT CGammaRateConjugate : public CPrior { //! the percentage of interest \p percentage. //! //! \param[in] percentage The percentage of interest. - //! \param[in] weightStyles Optional variance scale weight styles. //! \param[in] weights Optional variance scale weights. //! \note \p percentage should be in the range [0.0, 100.0). - virtual TDoubleDoublePr marginalLikelihoodConfidenceInterval( - double percentage, - const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual TDoubleDoublePr + marginalLikelihoodConfidenceInterval(double percentage, + const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Compute the log marginal likelihood function at \p samples integrating //! over the prior density function for the gamma rate. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. //! \param[out] result Filled in with the joint likelihood of \p samples. //! \note The samples are assumed to be independent and identically //! distributed. virtual maths_t::EFloatingPointErrorStatus - jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + jointLogMarginalLikelihood(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& result) const; //! Sample the marginal likelihood function. @@ -231,9 +214,6 @@ class MATHS_EXPORT CGammaRateConjugate : public CPrior { //! Compute minus the log of the joint c.d.f. of the marginal likelihood //! at \p samples. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples The samples of interest. //! \param[in] weights The weights of each sample in \p samples. For the //! count variance scale weight style the weight is interpreted as a scale @@ -260,9 +240,8 @@ class MATHS_EXPORT CGammaRateConjugate : public CPrior { //! \f$(0,\infty)\f$, i.e. a value of zero is not well defined and //! a value of infinity is not well handled. (Very large values are //! handled though.) - virtual bool minusLogJointCdf(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + virtual bool minusLogJointCdf(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const; @@ -272,9 +251,8 @@ class MATHS_EXPORT CGammaRateConjugate : public CPrior { //! can return is the minimum double rather than epsilon. //! //! \see minusLogJointCdf for more details. - virtual bool minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + virtual bool minusLogJointCdfComplement(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const; @@ -283,9 +261,6 @@ class MATHS_EXPORT CGammaRateConjugate : public CPrior { //! //! \param[in] calculation The style of the probability calculation //! (see CTools::EProbabilityCalculation for details). - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples The samples of interest. //! \param[in] weights The weights. See minusLogJointCdf for discussion. //! \param[out] lowerBound Filled in with the probability of the set @@ -299,9 +274,8 @@ class MATHS_EXPORT CGammaRateConjugate : public CPrior { //! i.e. a value of zero is not well defined and a value of infinity //! is not well handled. (Very large values are handled though.) virtual bool probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound, maths_t::ETail& tail) const; diff --git a/include/maths/CLogNormalMeanPrecConjugate.h b/include/maths/CLogNormalMeanPrecConjugate.h index 56e36b9f9d..68c0b8dda0 100644 --- a/include/maths/CLogNormalMeanPrecConjugate.h +++ b/include/maths/CLogNormalMeanPrecConjugate.h @@ -11,6 +11,7 @@ #include #include +#include #include #include @@ -136,15 +137,11 @@ class MATHS_EXPORT CLogNormalMeanPrecConjugate : public CPrior { //! This samples the current marginal likelihood and uses these samples //! to reconstruct the prior with adjusted offset. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples The samples from which to determine the offset. //! \param[in] weights The weights of each sample in \p samples. //! \return The penalty to apply in model selection. - virtual double adjustOffset(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights); + virtual double adjustOffset(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights); //! Get the current offset. virtual double offset() const; @@ -152,14 +149,9 @@ class MATHS_EXPORT CLogNormalMeanPrecConjugate : public CPrior { //! Update the prior with a collection of independent samples from //! the log-normal variable. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. - virtual void addSamples(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights); + virtual void addSamples(const TDouble1Vec& samples, const TDoubleWeightsAry1Vec& weights); //! Propagate the prior density function forwards by \p time. //! @@ -178,14 +170,11 @@ class MATHS_EXPORT CLogNormalMeanPrecConjugate : public CPrior { virtual double marginalLikelihoodMean() const; //! Get the mode of the marginal likelihood function. - virtual double - marginalLikelihoodMode(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual double marginalLikelihoodMode(const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Get the variance of the marginal likelihood. virtual double - marginalLikelihoodVariance(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + marginalLikelihoodVariance(const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Get the \p percentage symmetric confidence interval for the marginal //! likelihood function, i.e. the values \f$a\f$ and \f$b\f$ such that: @@ -197,30 +186,24 @@ class MATHS_EXPORT CLogNormalMeanPrecConjugate : public CPrior { //! the percentage of interest \p percentage. //! //! \param[in] percentage The percentage of interest. - //! \param[in] weightStyles Optional variance scale weight styles. //! \param[in] weights Optional variance scale weights. //! \note \p percentage should be in the range [0.0, 100.0). - virtual TDoubleDoublePr marginalLikelihoodConfidenceInterval( - double percentage, - const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual TDoubleDoublePr + marginalLikelihoodConfidenceInterval(double percentage, + const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Compute the log marginal likelihood function at \p samples integrating //! over the prior density function for the exponentiated normal mean //! and precision. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. //! \param[out] result Filled in with the joint likelihood of \p samples. //! \note The samples are assumed to be independent and identically //! distributed. virtual maths_t::EFloatingPointErrorStatus - jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + jointLogMarginalLikelihood(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& result) const; //! Sample the marginal likelihood function. @@ -235,9 +218,6 @@ class MATHS_EXPORT CLogNormalMeanPrecConjugate : public CPrior { //! Compute minus the log of the joint c.d.f. of the marginal likelihood //! at \p samples. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples The samples of interest. //! \param[in] weights The weights of each sample in \p samples. For the //! count variance scale weight style the weight is interpreted as a scale @@ -271,9 +251,8 @@ class MATHS_EXPORT CLogNormalMeanPrecConjugate : public CPrior { //! \f$(0,\infty)\f$, i.e. a value of zero is not well defined and //! a value of infinity is not well handled. The approximations we //! make are less good for \f$\gamma_i\f$ a long way from one. - virtual bool minusLogJointCdf(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + virtual bool minusLogJointCdf(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const; @@ -283,9 +262,8 @@ class MATHS_EXPORT CLogNormalMeanPrecConjugate : public CPrior { //! can return is the minimum double rather than epsilon. //! //! \see minusLogJointCdf for more details. - virtual bool minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + virtual bool minusLogJointCdfComplement(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const; @@ -294,9 +272,6 @@ class MATHS_EXPORT CLogNormalMeanPrecConjugate : public CPrior { //! //! \param[in] calculation The style of the probability calculation //! (see model_t::EProbabilityCalculation for details). - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples The samples of interest. //! \param[in] weights The weights. See minusLogJointCdf for discussion. //! \param[out] lowerBound Filled in with the probability of the set @@ -310,9 +285,8 @@ class MATHS_EXPORT CLogNormalMeanPrecConjugate : public CPrior { //! i.e. a value of zero is not well defined and a value of infinity //! is not well handled. (Very large values are handled though.) virtual bool probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound, maths_t::ETail& tail) const; diff --git a/include/maths/CModel.h b/include/maths/CModel.h index a72e394d69..46cc4a243d 100644 --- a/include/maths/CModel.h +++ b/include/maths/CModel.h @@ -101,8 +101,7 @@ class MATHS_EXPORT CModelParams { class MATHS_EXPORT CModelAddSamplesParams { public: using TDouble2Vec = core::CSmallVector; - using TDouble2Vec4Vec = core::CSmallVector; - using TDouble2Vec4VecVec = std::vector; + using TDouble2VecWeightsAryVec = std::vector; public: CModelAddSamplesParams(); @@ -122,20 +121,15 @@ class MATHS_EXPORT CModelAddSamplesParams { //! Get the model propagation interval. double propagationInterval() const; - //! Set the weight styles. - CModelAddSamplesParams& weightStyles(const maths_t::TWeightStyleVec& styles); - //! Get the weight styles. - const maths_t::TWeightStyleVec& weightStyles() const; - //! Set the trend samples weights. - CModelAddSamplesParams& trendWeights(const TDouble2Vec4VecVec& weights); + CModelAddSamplesParams& trendWeights(const TDouble2VecWeightsAryVec& weights); //! Get the trend sample weights. - const TDouble2Vec4VecVec& trendWeights() const; + const TDouble2VecWeightsAryVec& trendWeights() const; //! Set the prior samples weights. - CModelAddSamplesParams& priorWeights(const TDouble2Vec4VecVec& weights); + CModelAddSamplesParams& priorWeights(const TDouble2VecWeightsAryVec& weights); //! Get the prior sample weights. - const TDouble2Vec4VecVec& priorWeights() const; + const TDouble2VecWeightsAryVec& priorWeights() const; private: //! The data type. @@ -144,12 +138,10 @@ class MATHS_EXPORT CModelAddSamplesParams { bool m_IsNonNegative; //! The propagation interval. double m_PropagationInterval; - //! Controls the interpretation of the weights. - const maths_t::TWeightStyleVec* m_WeightStyles; //! The trend sample weights. - const TDouble2Vec4VecVec* m_TrendWeights; + const TDouble2VecWeightsAryVec* m_TrendWeights; //! The prior sample weights. - const TDouble2Vec4VecVec* m_PriorWeights; + const TDouble2VecWeightsAryVec* m_PriorWeights; }; //! \brief The extra parameters needed by CModel::probability. @@ -158,10 +150,9 @@ class MATHS_EXPORT CModelProbabilityParams { using TOptionalSize = boost::optional; using TBool2Vec = core::CSmallVector; using TBool2Vec1Vec = core::CSmallVector; - using TDouble2Vec = core::CSmallVector; - using TDouble2Vec4Vec = core::CSmallVector; - using TDouble2Vec4Vec1Vec = core::CSmallVector; using TSize2Vec = core::CSmallVector; + using TDouble2VecWeightsAry = maths_t::TDouble2VecWeightsAry; + using TDouble2VecWeightsAry1Vec = maths_t::TDouble2VecWeightsAry1Vec; using TProbabilityCalculation2Vec = core::CSmallVector; @@ -190,19 +181,14 @@ class MATHS_EXPORT CModelProbabilityParams { //! Get whether the values' bucket is empty. const TBool2Vec1Vec& bucketEmpty() const; - //! Set the weight styles. - CModelProbabilityParams& weightStyles(const maths_t::TWeightStyleVec& styles); - //! Get the weight styles. - const maths_t::TWeightStyleVec& weightStyles() const; - //! Add a value's weights. - CModelProbabilityParams& addWeights(const TDouble2Vec4Vec& weights); + CModelProbabilityParams& addWeights(const TDouble2VecWeightsAry& weights); //! Set the values' weights. - CModelProbabilityParams& weights(const TDouble2Vec4Vec1Vec& weights); + CModelProbabilityParams& weights(const TDouble2VecWeightsAry1Vec& weights); //! Get the values' weights. - const TDouble2Vec4Vec1Vec& weights() const; + const TDouble2VecWeightsAry1Vec& weights() const; //! Get writable values' weights. - TDouble2Vec4Vec1Vec& weights(); + TDouble2VecWeightsAry1Vec& weights(); //! Add a coordinate for which to compute probability. CModelProbabilityParams& addCoordinate(std::size_t coordinate); @@ -228,10 +214,8 @@ class MATHS_EXPORT CModelProbabilityParams { double m_SeasonalConfidenceInterval; //! True if the bucket is empty and false otherwise. TBool2Vec1Vec m_BucketEmpty; - //! Controls the interpretation of the weights. - const maths_t::TWeightStyleVec* m_WeightStyles; //! The sample weights. - TDouble2Vec4Vec1Vec m_Weights; + TDouble2VecWeightsAry1Vec m_Weights; //! The coordinates for which to compute the probability. TSize2Vec m_Coordinates; //! The most anomalous coordinate (if there is one). @@ -261,8 +245,6 @@ class MATHS_EXPORT CModel { using TDouble10Vec = core::CSmallVector; using TDouble2Vec1Vec = core::CSmallVector; using TDouble2Vec3Vec = core::CSmallVector; - using TDouble2Vec4Vec = core::CSmallVector; - using TDouble2Vec4Vec1Vec = core::CSmallVector; using TSize1Vec = core::CSmallVector; using TSize2Vec = core::CSmallVector; using TSize2Vec1Vec = core::CSmallVector; @@ -272,6 +254,8 @@ class MATHS_EXPORT CModel { using TSizeDoublePr1Vec = core::CSmallVector; using TTimeDouble2VecSizeTr = core::CTriple; using TTimeDouble2VecSizeTrVec = std::vector; + using TDouble2VecWeightsAry = maths_t::TDouble2VecWeightsAry; + using TDouble2VecWeightsAry1Vec = maths_t::TDouble2VecWeightsAry1Vec; using TTail2Vec = core::CSmallVector; //! Possible statuses for updating a model. @@ -327,18 +311,15 @@ class MATHS_EXPORT CModel { //! Get the most likely value for the time series at \p time. virtual TDouble2Vec mode(core_t::TTime time, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec& weights) const = 0; + const TDouble2VecWeightsAry& weights) const = 0; //! Get the most likely value for each correlate time series at //! \p time, if there are any. - virtual TDouble2Vec1Vec correlateModes(core_t::TTime time, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec1Vec& weights) const = 0; + virtual TDouble2Vec1Vec + correlateModes(core_t::TTime time, const TDouble2VecWeightsAry1Vec& weights) const = 0; //! Get the local maxima of the residual distribution. - virtual TDouble2Vec1Vec residualModes(const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec& weights) const = 0; + virtual TDouble2Vec1Vec residualModes(const TDouble2VecWeightsAry& weights) const = 0; //! Remove any trend components from \p value. virtual void detrend(const TTime2Vec1Vec& time, @@ -354,8 +335,7 @@ class MATHS_EXPORT CModel { //! confidence interval for the time series at \p time. virtual TDouble2Vec3Vec confidenceInterval(core_t::TTime time, double confidenceInterval, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec& weights) const = 0; + const TDouble2VecWeightsAry& weights) const = 0; //! Forecast the time series and get its \p confidenceInterval //! percentage confidence interval between \p startTime and @@ -486,18 +466,14 @@ class MATHS_EXPORT CModelStub : public CModel { virtual void skipTime(core_t::TTime gap); //! Returns empty. - virtual TDouble2Vec mode(core_t::TTime time, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec& weights) const; + virtual TDouble2Vec mode(core_t::TTime time, const TDouble2VecWeightsAry& weights) const; //! Returns empty. - virtual TDouble2Vec1Vec correlateModes(core_t::TTime time, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec1Vec& weights) const; + virtual TDouble2Vec1Vec + correlateModes(core_t::TTime time, const TDouble2VecWeightsAry1Vec& weights) const; //! Returns empty. - virtual TDouble2Vec1Vec residualModes(const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec& weights) const; + virtual TDouble2Vec1Vec residualModes(const TDouble2VecWeightsAry& weights) const; //! No-op. virtual void detrend(const TTime2Vec1Vec& time, @@ -512,8 +488,8 @@ class MATHS_EXPORT CModelStub : public CModel { //! Returns empty. virtual TDouble2Vec3Vec confidenceInterval(core_t::TTime time, double confidenceInterval, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec& weights) const; + const TDouble2VecWeightsAry& weights) const; + //! Returns empty. virtual bool forecast(core_t::TTime startTime, core_t::TTime endTime, diff --git a/include/maths/CMultimodalPrior.h b/include/maths/CMultimodalPrior.h index 6536c8bb33..b34d4e7096 100644 --- a/include/maths/CMultimodalPrior.h +++ b/include/maths/CMultimodalPrior.h @@ -125,9 +125,8 @@ class MATHS_EXPORT CMultimodalPrior : public CPrior { //! Forward the offset to the mode priors. //! //! \return The penalty to apply in model selection. - virtual double adjustOffset(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights); + virtual double adjustOffset(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights); //! Get the current offset. virtual double offset() const; @@ -135,14 +134,9 @@ class MATHS_EXPORT CMultimodalPrior : public CPrior { //! Update the prior with a collection of independent samples from //! the variable. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. - virtual void addSamples(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights); + virtual void addSamples(const TDouble1Vec& samples, const TDoubleWeightsAry1Vec& weights); //! Propagate the prior density function forwards by \p time. //! @@ -163,19 +157,15 @@ class MATHS_EXPORT CMultimodalPrior : public CPrior { virtual double nearestMarginalLikelihoodMean(double value) const; //! Get the mode of the marginal likelihood function. - virtual double - marginalLikelihoodMode(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual double marginalLikelihoodMode(const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Get the local maxima of the marginal likelihood function. virtual TDouble1Vec - marginalLikelihoodModes(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + marginalLikelihoodModes(const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Get the variance of the marginal likelihood. virtual double - marginalLikelihoodVariance(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + marginalLikelihoodVariance(const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Get the \p percentage symmetric confidence interval for the marginal //! likelihood function, i.e. the values \f$a\f$ and \f$b\f$ such that: @@ -187,30 +177,24 @@ class MATHS_EXPORT CMultimodalPrior : public CPrior { //! the percentage of interest \p percentage. //! //! \param[in] percentage The percentage of interest. - //! \param[in] weightStyles Optional variance scale weight styles. //! \param[in] weights Optional variance scale weights. //! \note \p percentage should be in the range [0.0, 100.0). - virtual TDoubleDoublePr marginalLikelihoodConfidenceInterval( - double percentage, - const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual TDoubleDoublePr + marginalLikelihoodConfidenceInterval(double percentage, + const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Compute the log marginal likelihood function at \p samples integrating //! over the prior density function for the mode parameters and summing //! over modes. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. //! \param[out] result Filled in with the joint likelihood of \p samples. //! \note The samples are assumed to be independent and identically //! distributed. virtual maths_t::EFloatingPointErrorStatus - jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + jointLogMarginalLikelihood(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& result) const; //! Sample the marginal likelihood function. @@ -225,9 +209,6 @@ class MATHS_EXPORT CMultimodalPrior : public CPrior { //! Compute minus the log of the joint c.d.f. of the marginal likelihood //! at \p samples. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. //! \param[out] lowerBound Filled in with \f$-\log(\prod_i{F(x_i)})\f$ @@ -238,9 +219,8 @@ class MATHS_EXPORT CMultimodalPrior : public CPrior { //! \f$(0,\infty)\f$, i.e. a value of zero is not well defined and //! a value of infinity is not well handled. (Very large values are //! handled though.) - virtual bool minusLogJointCdf(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + virtual bool minusLogJointCdf(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const; @@ -250,9 +230,8 @@ class MATHS_EXPORT CMultimodalPrior : public CPrior { //! can return is the minimum double rather than epsilon. //! //! \see minusLogJointCdf for more details. - virtual bool minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + virtual bool minusLogJointCdfComplement(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const; @@ -261,9 +240,6 @@ class MATHS_EXPORT CMultimodalPrior : public CPrior { //! //! \param[in] calculation The style of the probability calculation //! (see model_t::EProbabilityCalculation for details). - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. //! \param[out] lowerBound Filled in with the probability of the set @@ -277,9 +253,8 @@ class MATHS_EXPORT CMultimodalPrior : public CPrior { //! i.e. a value of zero is not well defined and a value of infinity is //! not well handled. (Very large values are handled though.) virtual bool probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound, maths_t::ETail& tail) const; @@ -321,9 +296,6 @@ class MATHS_EXPORT CMultimodalPrior : public CPrior { bool checkInvariants(const std::string& tag = std::string()) const; private: - using TDouble1VecVec = std::vector; - using TDouble4Vec1VecVec = std::vector; - //! The callback invoked when a mode is split. class MATHS_EXPORT CModeSplitCallback { public: diff --git a/include/maths/CMultimodalPriorUtils.h b/include/maths/CMultimodalPriorUtils.h index 63caee87b2..76a907f64b 100644 --- a/include/maths/CMultimodalPriorUtils.h +++ b/include/maths/CMultimodalPriorUtils.h @@ -42,29 +42,29 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { using TDoubleDoublePr = std::pair; using TDoubleVec = std::vector; using TDouble1Vec = core::CSmallVector; - using TDouble4Vec = core::CSmallVector; - using TDouble4Vec1Vec = core::CSmallVector; + using TDoubleWeightsAry = maths_t::TDoubleWeightsAry; + using TDoubleWeightsAry1Vec = maths_t::TDoubleWeightsAry1Vec; using TMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; - using TWeights = CConstantWeights; + using TWeights = maths_t::CUnitWeights; //! Get the mode of the marginal likelihood function. template static TDoubleDoublePr marginalLikelihoodSupport(const std::vector>& modes) { if (modes.size() == 0) { - return std::make_pair(boost::numeric::bounds::lowest(), - boost::numeric::bounds::highest()); + return {boost::numeric::bounds::lowest(), + boost::numeric::bounds::highest()}; } if (modes.size() == 1) { return modes[0].s_Prior->marginalLikelihoodSupport(); } - TDoubleDoublePr result(boost::numeric::bounds::highest(), - boost::numeric::bounds::lowest()); + TDoubleDoublePr result{boost::numeric::bounds::highest(), + boost::numeric::bounds::lowest()}; // We define this is as the union of the mode supports. - for (std::size_t i = 0u; i < modes.size(); ++i) { - TDoubleDoublePr s = modes[i].s_Prior->marginalLikelihoodSupport(); + for (const auto& mode : modes) { + TDoubleDoublePr s = mode.s_Prior->marginalLikelihoodSupport(); result.first = std::min(result.first, s.first); result.second = std::max(result.second, s.second); } @@ -99,43 +99,34 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { //! Get the mode of the marginal likelihood function. template static double marginalLikelihoodMode(const std::vector>& modes, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble4Vec& weights) { + const TDoubleWeightsAry& weights) { if (modes.size() == 0) { return 0.0; } if (modes.size() == 1) { - return modes[0].s_Prior->marginalLikelihoodMode(weightStyles, weights); + return modes[0].s_Prior->marginalLikelihoodMode(weights); } - using TMaxAccumulator = - CBasicStatistics::COrderStatisticsStack>; + using TMaxAccumulator = CBasicStatistics::SMax::TAccumulator; // We'll approximate this as the maximum likelihood mode (mode). double result = 0.0; - double seasonalScale = 1.0; - double countVarianceScale = 1.0; - try { - seasonalScale = std::sqrt(maths_t::seasonalVarianceScale(weightStyles, weights)); - countVarianceScale = maths_t::countVarianceScale(weightStyles, weights); - } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to get variance scale " << e.what()); - } + double seasonalScale = std::sqrt(maths_t::seasonalVarianceScale(weights)); + double countVarianceScale = maths_t::countVarianceScale(weights); // Declared outside the loop to minimize number of times they // are created. TDouble1Vec mode(1); - TDouble4Vec1Vec weight(1, TDouble4Vec(1, countVarianceScale)); + TDoubleWeightsAry1Vec weight{maths_t::countVarianceScaleWeight(countVarianceScale)}; TMaxAccumulator maxLikelihood; for (std::size_t i = 0u; i < modes.size(); ++i) { double w = modes[i].weight(); const T& prior = modes[i].s_Prior; - mode[0] = prior->marginalLikelihoodMode(TWeights::COUNT_VARIANCE, weight[0]); + mode[0] = prior->marginalLikelihoodMode(weight[0]); double likelihood; - if (prior->jointLogMarginalLikelihood(TWeights::COUNT_VARIANCE, - mode, weight, likelihood) & + if (prior->jointLogMarginalLikelihood(mode, weight, likelihood) & (maths_t::E_FpFailed | maths_t::E_FpOverflowed)) { continue; } @@ -144,7 +135,7 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { } } - if (maths_t::hasSeasonalVarianceScale(weightStyles, weights)) { + if (maths_t::hasSeasonalVarianceScale(weights)) { double mean = marginalLikelihoodMean(modes); result = mean + seasonalScale * (result - mean); } @@ -156,13 +147,12 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { template static double marginalLikelihoodVariance(const std::vector>& modes, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble4Vec& weights) { + const TDoubleWeightsAry& weights) { if (modes.size() == 0) { return boost::numeric::bounds::highest(); } if (modes.size() == 1) { - return modes[0].s_Prior->marginalLikelihoodVariance(weightStyles, weights); + return modes[0].s_Prior->marginalLikelihoodVariance(weights); } // By linearity we have that: @@ -170,14 +160,8 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { // = Sum_i{ w(i) * (Integral{ x^2 * f(x | i) } - m^2) } // = Sum_i{ w(i) * ((mi^2 + vi) - m^2) } - double varianceScale = 1.0; - try { - varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) * - maths_t::countVarianceScale(weightStyles, weights); - } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to get variance scale " << e.what()); - } - + double varianceScale = maths_t::seasonalVarianceScale(weights) * + maths_t::countVarianceScale(weights); double mean = marginalLikelihoodMean(modes); TMeanAccumulator result; @@ -205,8 +189,7 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { marginalLikelihoodConfidenceInterval(const PRIOR& prior, const std::vector& modes, double percentage, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble4Vec& weights) { + const TDoubleWeightsAry& weights) { TDoubleDoublePr support = marginalLikelihoodSupport(modes); if (isNonInformative(modes)) { @@ -214,8 +197,7 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { } if (modes.size() == 1) { - return modes[0].s_Prior->marginalLikelihoodConfidenceInterval( - percentage, weightStyles, weights); + return modes[0].s_Prior->marginalLikelihoodConfidenceInterval(percentage, weights); } percentage /= 100.0; @@ -227,8 +209,8 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { double p1 = std::log((1.0 - percentage) / 2.0); double p2 = std::log((1.0 + percentage) / 2.0); - CLogCdf fl(CLogCdf::E_Lower, prior, weightStyles, weights); - CLogCdf fu(CLogCdf::E_Upper, prior, weightStyles, weights); + CLogCdf fl(CLogCdf::E_Lower, prior, weights); + CLogCdf fu(CLogCdf::E_Upper, prior, weights); CCompositeFunctions::CMinusConstant&> f1(fl, p1); CCompositeFunctions::CMinusConstant&> f2(fu, p2); @@ -238,7 +220,7 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { TDoubleDoublePr result; - double x0 = marginalLikelihoodMode(modes, weightStyles, weights); + double x0 = marginalLikelihoodMode(modes, weights); try { double f10 = f1(x0); @@ -309,9 +291,8 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { template static maths_t::EFloatingPointErrorStatus jointLogMarginalLikelihood(const std::vector>& modes, - const maths_t::TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, double& result) { // The likelihood can be computed from the conditional likelihood // that a sample is from each mode. In particular, the likelihood @@ -350,20 +331,21 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { TSizeDoublePr5Vec modeLogLikelihoods; modeLogLikelihoods.reserve(modes.size()); - double mean = maths_t::hasSeasonalVarianceScale(weightStyles, weights) + double mean = maths_t::hasSeasonalVarianceScale(weights) ? marginalLikelihoodMean(modes) : 0.0; - TDouble4Vec1Vec weight(1, TDouble4Vec(1, 1.0)); + TDoubleWeightsAry1Vec weight{TWeights::UNIT}; try { for (std::size_t i = 0u; i < samples.size(); ++i) { - double n = maths_t::countForUpdate(weightStyles, weights[i]); - double seasonalScale = std::sqrt( - maths_t::seasonalVarianceScale(weightStyles, weights[i])); + double n = maths_t::countForUpdate(weights[i]); + double seasonalScale = + std::sqrt(maths_t::seasonalVarianceScale(weights[i])); double logSeasonalScale = seasonalScale != 1.0 ? std::log(seasonalScale) : 0.0; sample[0] = mean + (samples[i] - mean) / seasonalScale; - weight[0][0] = maths_t::countVarianceScale(weightStyles, weights[i]); + maths_t::setCountVarianceScale( + maths_t::countVarianceScale(weights[i]), weight[0]); // We re-normalize so that the maximum log likelihood is one // to avoid underflow. @@ -374,13 +356,13 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { double modeLogLikelihood; maths_t::EFloatingPointErrorStatus status = modes[j].s_Prior->jointLogMarginalLikelihood( - TWeights::COUNT_VARIANCE, sample, weight, modeLogLikelihood); + sample, weight, modeLogLikelihood); if (status & maths_t::E_FpFailed) { // Logging handled at a lower level. return status; } if (!(status & maths_t::E_FpOverflowed)) { - modeLogLikelihoods.push_back(std::make_pair(j, modeLogLikelihood)); + modeLogLikelihoods.emplace_back(j, modeLogLikelihood); maxLogLikelihood = std::max(maxLogLikelihood, modeLogLikelihood); } } @@ -489,13 +471,12 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { //! variable. template static bool minusLogJointCdf(const std::vector>& modes, - const maths_t::TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) { - return minusLogJointCdf(modes, CMinusLogJointCdf(), weightStyles, - samples, weights, lowerBound, upperBound); + return minusLogJointCdf(modes, CMinusLogJointCdf(), samples, weights, + lowerBound, upperBound); } //! Compute minus the log of the one minus the joint c.d.f. of the @@ -503,15 +484,13 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { //! to cancellation errors at one, i.e. the smallest non-zero value //! this can return is the minimum double rather than epsilon. template - static bool - minusLogJointCdfComplement(const std::vector>& modes, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, - double& lowerBound, - double& upperBound) { - return minusLogJointCdf(modes, CMinusLogJointCdfComplement(), weightStyles, - samples, weights, lowerBound, upperBound); + static bool minusLogJointCdfComplement(const std::vector>& modes, + const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, + double& lowerBound, + double& upperBound) { + return minusLogJointCdf(modes, CMinusLogJointCdfComplement(), samples, + weights, lowerBound, upperBound); } //! Calculate the joint probability of seeing a lower likelihood @@ -521,9 +500,8 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { static bool probabilityOfLessLikelySamples(const PRIOR& prior, const std::vector& modes, maths_t::EProbabilityCalculation calculation, - const maths_t::TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound, maths_t::ETail& tail) { @@ -534,14 +512,13 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { LOG_ERROR(<< "Can't compute distribution for empty sample set"); return false; } - if (isNonInformative(modes)) { return true; } if (modes.size() == 1) { return modes[0].s_Prior->probabilityOfLessLikelySamples( - calculation, weightStyles, samples, weights, lowerBound, upperBound, tail); + calculation, samples, weights, lowerBound, upperBound, tail); } // Ideally we'd find the probability of the set of samples whose @@ -586,7 +563,7 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { switch (calculation) { case maths_t::E_OneSidedBelow: - if (!minusLogJointCdf(modes, weightStyles, samples, weights, upperBound, lowerBound)) { + if (!minusLogJointCdf(modes, samples, weights, upperBound, lowerBound)) { LOG_ERROR(<< "Failed computing probability of less likely samples: " << core::CContainerPrinter::print(samples)); return false; @@ -607,7 +584,8 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { support.first = (1.0 + (support.first > 0.0 ? EPS : -EPS)) * support.first; support.second = (1.0 + (support.first > 0.0 ? EPS : -EPS)) * support.second; - double mean = marginalLikelihoodMean(modes); + bool hasSeasonalScale = maths_t::hasSeasonalVarianceScale(weights); + double mean = hasSeasonalScale ? marginalLikelihoodMean(modes) : 0.0; double a = boost::numeric::bounds::highest(); double b = boost::numeric::bounds::lowest(); @@ -622,29 +600,23 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { b = CTools::truncate(b, support.first, support.second); LOG_TRACE(<< "a = " << a << ", b = " << b << ", Z = " << Z); - std::size_t svi = static_cast( - std::find(weightStyles.begin(), weightStyles.end(), - maths_t::E_SampleSeasonalVarianceScaleWeight) - - weightStyles.begin()); - // Declared outside the loop to minimize the number of times - // they are created. - TDouble4Vec1Vec weight(1); - TDouble1Vec wt(1); + // it is created. + TDoubleWeightsAry1Vec weight(1); int tail_ = 0; for (std::size_t i = 0u; i < samples.size(); ++i) { double x = samples[i]; weight[0] = weights[i]; - - if (svi < weight.size()) { - x = mean + (x - mean) / std::sqrt(weights[i][svi]); - weight[0][svi] = 1.0; + if (hasSeasonalScale) { + x = mean + (x - mean) / + std::sqrt(maths_t::seasonalVarianceScale(weight[0])); + maths_t::setSeasonalVarianceScale(1.0, weight[0]); } double fx; maths_t::EFloatingPointErrorStatus status = - jointLogMarginalLikelihood(modes, weightStyles, {x}, weight, fx); + jointLogMarginalLikelihood(modes, {x}, weight, fx); if (status & maths_t::E_FpFailed) { LOG_ERROR(<< "Unable to compute likelihood for " << x); return false; @@ -655,16 +627,15 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { } LOG_TRACE(<< "x = " << x << ", f(x) = " << fx); - CPrior::CLogMarginalLikelihood logLikelihood(prior, weightStyles, weight); + CPrior::CLogMarginalLikelihood logLikelihood(prior, weight); CTools::CMixtureProbabilityOfLessLikelySample calculator( modes.size(), x, fx, a, b); for (const auto& mode : modes) { double w = mode.weight() / Z; - double centre = mode.s_Prior->marginalLikelihoodMode( - weightStyles, weight[0]); - double spread = std::sqrt(mode.s_Prior->marginalLikelihoodVariance( - weightStyles, weight[0])); + double centre = mode.s_Prior->marginalLikelihoodMode(weight[0]); + double spread = std::sqrt( + mode.s_Prior->marginalLikelihoodVariance(weight[0])); calculator.addMode(w, centre, spread); tail_ = tail_ | (x < centre ? maths_t::E_LeftTail : maths_t::E_RightTail); } @@ -674,31 +645,27 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { double lb, ub; - double l; + double xl; CEqualWithTolerance lequal( CToleranceTypes::E_AbsoluteTolerance, EPS * a); - if (calculator.leftTail(logLikelihood, MAX_ITERATIONS, lequal, l)) { - wt[0] = l; - minusLogJointCdf(modes, weightStyles, wt, weight, lb, ub); + if (calculator.leftTail(logLikelihood, MAX_ITERATIONS, lequal, xl)) { + minusLogJointCdf(modes, {xl}, weight, lb, ub); sampleLowerBound += std::exp(std::min(-lb, -ub)); sampleUpperBound += std::exp(std::max(-lb, -ub)); } else { - wt[0] = l; - minusLogJointCdf(modes, weightStyles, wt, weight, lb, ub); + minusLogJointCdf(modes, {xl}, weight, lb, ub); sampleUpperBound += std::exp(std::max(-lb, -ub)); } - double r; + double xr; CEqualWithTolerance requal( CToleranceTypes::E_AbsoluteTolerance, EPS * b); - if (calculator.rightTail(logLikelihood, MAX_ITERATIONS, requal, r)) { - wt[0] = r; - minusLogJointCdfComplement(modes, weightStyles, wt, weight, lb, ub); + if (calculator.rightTail(logLikelihood, MAX_ITERATIONS, requal, xr)) { + minusLogJointCdfComplement(modes, {xr}, weight, lb, ub); sampleLowerBound += std::exp(std::min(-lb, -ub)); sampleUpperBound += std::exp(std::max(-lb, -ub)); } else { - wt[0] = r; - minusLogJointCdfComplement(modes, weightStyles, wt, weight, lb, ub); + minusLogJointCdfComplement(modes, {xr}, weight, lb, ub); sampleUpperBound += std::exp(std::max(-lb, -ub)); } @@ -724,8 +691,7 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { } break; case maths_t::E_OneSidedAbove: - if (!minusLogJointCdfComplement(modes, weightStyles, samples, - weights, upperBound, lowerBound)) { + if (!minusLogJointCdfComplement(modes, samples, weights, upperBound, lowerBound)) { LOG_ERROR(<< "Failed computing probability of less likely samples: " << core::CContainerPrinter::print(samples)); return false; @@ -776,13 +742,11 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { public: template bool operator()(const T& prior, - const maths_t::TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const { - return prior->minusLogJointCdf(weightStyles, samples, weights, - lowerBound, upperBound); + return prior->minusLogJointCdf(samples, weights, lowerBound, upperBound); } }; @@ -791,13 +755,11 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { public: template bool operator()(const T& prior, - const maths_t::TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const { - return prior->minusLogJointCdfComplement(weightStyles, samples, weights, - lowerBound, upperBound); + return prior->minusLogJointCdfComplement(samples, weights, lowerBound, upperBound); } }; @@ -811,18 +773,13 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { enum EStyle { E_Lower, E_Upper, E_Mean }; public: - CLogCdf(EStyle style, - const PRIOR& prior, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble4Vec& weights) - : m_Style(style), m_Prior(&prior), m_WeightStyles(&weightStyles), - m_Weights(1, weights), m_X(1u, 0.0) {} + CLogCdf(EStyle style, const PRIOR& prior, const TDoubleWeightsAry& weights) + : m_Style(style), m_Prior(&prior), m_Weights(1, weights), m_X(1u, 0.0) {} double operator()(double x) const { m_X[0] = x; double lowerBound, upperBound; - if (!m_Prior->minusLogJointCdf(*m_WeightStyles, m_X, m_Weights, - lowerBound, upperBound)) { + if (!m_Prior->minusLogJointCdf(m_X, m_Weights, lowerBound, upperBound)) { throw std::runtime_error("Unable to compute c.d.f. at " + core::CStringUtils::typeToString(x)); } @@ -840,8 +797,7 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { private: EStyle m_Style; const PRIOR* m_Prior; - const maths_t::TWeightStyleVec* m_WeightStyles; - TDouble4Vec1Vec m_Weights; + TDoubleWeightsAry1Vec m_Weights; //! Avoids creating the vector argument to minusLogJointCdf //! more than once. mutable TDouble1Vec m_X; @@ -853,9 +809,8 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { template static bool minusLogJointCdf(const std::vector>& modes, CDF minusLogCdf, - const maths_t::TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) { lowerBound = upperBound = 0.0; @@ -866,8 +821,7 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { } if (modes.size() == 1) { - return minusLogCdf(modes[0].s_Prior, weightStyles, samples, weights, - lowerBound, upperBound); + return minusLogCdf(modes[0].s_Prior, samples, weights, lowerBound, upperBound); } using TMinAccumulator = CBasicStatistics::COrderStatisticsStack; @@ -881,23 +835,21 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { // Declared outside the loop to minimize the number of times // they are created. TDouble1Vec sample(1); - TDouble4Vec1Vec weight(1, TDouble4Vec(1, 1.0)); - TDouble4Vec modeLowerBounds; - TDouble4Vec modeUpperBounds; + TDoubleWeightsAry1Vec weight{TWeights::UNIT}; + TDoubleVec modeLowerBounds; + TDoubleVec modeUpperBounds; modeLowerBounds.reserve(modes.size()); modeUpperBounds.reserve(modes.size()); try { - double mean = maths_t::hasSeasonalVarianceScale(weightStyles, weights) + double mean = maths_t::hasSeasonalVarianceScale(weights) ? marginalLikelihoodMean(modes) : 0.0; for (std::size_t i = 0; i < samples.size(); ++i) { - double n = maths_t::count(weightStyles, weights[i]); - double seasonalScale = std::sqrt( - maths_t::seasonalVarianceScale(weightStyles, weights[i])); - double countVarianceScale = - maths_t::countVarianceScale(weightStyles, weights[i]); + double n = maths_t::count(weights[i]); + double seasonalScale = + std::sqrt(maths_t::seasonalVarianceScale(weights[i])); if (isNonInformative(modes)) { lowerBound -= n * std::log(CTools::IMPROPER_CDF); @@ -905,9 +857,9 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { continue; } - sample[0] = seasonalScale != 1.0 ? mean + (samples[i] - mean) / seasonalScale - : samples[i]; - weight[0][0] = countVarianceScale; + sample[0] = mean + (samples[i] - mean) / seasonalScale; + maths_t::setCountVarianceScale( + maths_t::countVarianceScale(weights[i]), weight[0]); // We re-normalize so that the maximum log c.d.f. is one // to avoid underflow. @@ -919,8 +871,8 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { for (std::size_t j = 0u; j < modes.size(); ++j) { double modeLowerBound; double modeUpperBound; - if (!minusLogCdf(modes[j].s_Prior, TWeights::COUNT_VARIANCE, sample, - weight, modeLowerBound, modeUpperBound)) { + if (!minusLogCdf(modes[j].s_Prior, sample, weight, + modeLowerBound, modeUpperBound)) { LOG_ERROR(<< "Unable to compute c.d.f. for " << core::CContainerPrinter::print(samples)); return false; diff --git a/include/maths/CMultinomialConjugate.h b/include/maths/CMultinomialConjugate.h index 86c1451fb5..92b857a137 100644 --- a/include/maths/CMultinomialConjugate.h +++ b/include/maths/CMultinomialConjugate.h @@ -98,9 +98,8 @@ class MATHS_EXPORT CMultinomialConjugate : public CPrior { virtual bool needsOffset() const; //! No-op. - virtual double adjustOffset(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights); + virtual double adjustOffset(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights); //! Returns zero. virtual double offset() const; @@ -108,14 +107,9 @@ class MATHS_EXPORT CMultinomialConjugate : public CPrior { //! Update the prior with a collection of independent samples from the //! multinomial variable. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. - virtual void addSamples(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights); + virtual void addSamples(const TDouble1Vec& samples, const TDoubleWeightsAry1Vec& weights); //! Propagate the prior density function forwards by \p time. //! @@ -134,14 +128,11 @@ class MATHS_EXPORT CMultinomialConjugate : public CPrior { virtual double marginalLikelihoodMean() const; //! Get the mode of the marginal likelihood function. - virtual double - marginalLikelihoodMode(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual double marginalLikelihoodMode(const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Get the variance of the marginal likelihood. virtual double - marginalLikelihoodVariance(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + marginalLikelihoodVariance(const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Get the \p percentage symmetric confidence interval for the marginal //! likelihood function, i.e. the values \f$a\f$ and \f$b\f$ such that: @@ -154,20 +145,15 @@ class MATHS_EXPORT CMultinomialConjugate : public CPrior { //! distribution is discrete we can only approximate the probability. //! //! \param[in] percentage The percentage of interest. - //! \param[in] weightStyles Ignored. //! \param[in] weights Ignored. //! \note \p percentage should be in the range [0.0, 100.0). - virtual TDoubleDoublePr marginalLikelihoodConfidenceInterval( - double percentage, - const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual TDoubleDoublePr + marginalLikelihoodConfidenceInterval(double percentage, + const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Compute the log marginal likelihood function at \p samples integrating //! over the prior density function for the category probability parameters. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. //! \param[out] result Filled in with the log likelihood of \p samples. @@ -177,9 +163,8 @@ class MATHS_EXPORT CMultinomialConjugate : public CPrior { //! \note The samples are assumed to be independent and identically //! distributed. virtual maths_t::EFloatingPointErrorStatus - jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + jointLogMarginalLikelihood(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& result) const; //! Sample the marginal likelihood function. @@ -196,9 +181,6 @@ class MATHS_EXPORT CMultinomialConjugate : public CPrior { //! Compute minus the log of the joint cumulative density function //! of the marginal likelihood at \p samples. //! - //! \param[in] weightStyles Controls the interpretation of the weights - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. Note variance scales are ignored. //! \param[in] samples The samples of interest. //! \param[in] weights The weights of each sample in \p samples. //! \param[out] lowerBound If the model has not overflowed this is @@ -210,9 +192,8 @@ class MATHS_EXPORT CMultinomialConjugate : public CPrior { //! the c.d.f. and \f$\{x_i\}\f$ are the samples. Otherwise, it is //! filled in with a sharp upper bound. //! \note The samples are assumed to be independent. - virtual bool minusLogJointCdf(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + virtual bool minusLogJointCdf(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const; @@ -223,9 +204,8 @@ class MATHS_EXPORT CMultinomialConjugate : public CPrior { //! epsilon. //! //! \see minusLogJointCdf for more details. - virtual bool minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + virtual bool minusLogJointCdfComplement(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const; @@ -234,9 +214,6 @@ class MATHS_EXPORT CMultinomialConjugate : public CPrior { //! //! \param[in] calculation The style of the probability calculation //! (see model_t::EProbabilityCalculation for details). - //! \param[in] weightStyles Controls the interpretation of the weights - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. Note variance scales are ignored. //! \param[in] samples The samples of interest. //! \param[in] weights The weights. See minusLogJointCdf for discussion. //! \param[out] lowerBound If the model has not overflowed this is filled @@ -251,9 +228,8 @@ class MATHS_EXPORT CMultinomialConjugate : public CPrior { //! are in or neither. //! \note The samples are assumed to be independent. virtual bool probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound, maths_t::ETail& tail) const; diff --git a/include/maths/CMultivariateConstantPrior.h b/include/maths/CMultivariateConstantPrior.h index da9f024ead..2ad0850573 100644 --- a/include/maths/CMultivariateConstantPrior.h +++ b/include/maths/CMultivariateConstantPrior.h @@ -64,14 +64,12 @@ class MATHS_EXPORT CMultivariateConstantPrior : public CMultivariatePrior { virtual void setToNonInformative(double offset = 0.0, double decayRate = 0.0); //! No-op. - virtual void adjustOffset(const TWeightStyleVec& weightStyle, - const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights); + virtual void adjustOffset(const TDouble10Vec1Vec& samples, + const TDouble10VecWeightsAry1Vec& weights); //! Set the constant if it hasn't been set. - virtual void addSamples(const TWeightStyleVec& weightStyle, - const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights); + virtual void addSamples(const TDouble10Vec1Vec& samples, + const TDouble10VecWeightsAry1Vec& weights); //! No-op. virtual void propagateForwardsByTime(double time); @@ -91,8 +89,7 @@ class MATHS_EXPORT CMultivariateConstantPrior : public CMultivariatePrior { virtual TDouble10Vec marginalLikelihoodMean() const; //! Returns constant or zero if unset (by equidistribution). - virtual TDouble10Vec marginalLikelihoodMode(const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec& weights) const; + virtual TDouble10Vec marginalLikelihoodMode(const TDouble10VecWeightsAry& weights) const; //! Get the covariance matrix of the marginal likelihood. virtual TDouble10Vec10Vec marginalLikelihoodCovariance() const; @@ -103,9 +100,8 @@ class MATHS_EXPORT CMultivariateConstantPrior : public CMultivariatePrior { //! Returns a large value if all samples are equal to the constant //! and zero otherwise. virtual maths_t::EFloatingPointErrorStatus - jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights, + jointLogMarginalLikelihood(const TDouble10Vec1Vec& samples, + const TDouble10VecWeightsAry1Vec& weights, double& result) const; //! Get \p numberSamples times the constant. diff --git a/include/maths/CMultivariateMultimodalPrior.h b/include/maths/CMultivariateMultimodalPrior.h index 2cb8dcceba..af509bbab3 100644 --- a/include/maths/CMultivariateMultimodalPrior.h +++ b/include/maths/CMultivariateMultimodalPrior.h @@ -52,7 +52,7 @@ using TSizeDoublePr = std::pair; using TSizeDoublePr3Vec = core::CSmallVector; using TPriorPtr = std::shared_ptr; using TDouble10Vec1Vec = CMultivariatePrior::TDouble10Vec1Vec; -using TDouble10Vec4Vec1Vec = CMultivariatePrior::TDouble10Vec4Vec1Vec; +using TDouble10VecWeightsAry1Vec = CMultivariatePrior::TDouble10VecWeightsAry1Vec; using TMode = SMultimodalPriorMode>; using TModeVec = std::vector; @@ -60,9 +60,8 @@ using TModeVec = std::vector; MATHS_EXPORT maths_t::EFloatingPointErrorStatus jointLogMarginalLikelihood(const TModeVec& modes, - const maths_t::TWeightStyleVec& weightStyles, const TDouble10Vec1Vec& sample, - const TDouble10Vec4Vec1Vec& weights, + const TDouble10VecWeightsAry1Vec& weights, TSizeDoublePr3Vec& modeLogLikelihoods, double& result); @@ -137,7 +136,6 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { using TClusterer = CClusterer; using TClustererPtr = std::shared_ptr; using TPriorPtrVec = std::vector; - using TWeights = CConstantWeights; // Lift all overloads of into scope. //{ @@ -274,34 +272,26 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { //! For priors with non-negative support this adjusts the offset used //! to extend the support to handle negative samples. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples The samples from which to determine the offset. //! \param[in] weights The weights of each sample in \p samples. - virtual void adjustOffset(const TWeightStyleVec& weightStyles, - const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights) { + virtual void adjustOffset(const TDouble10Vec1Vec& samples, + const TDouble10VecWeightsAry1Vec& weights) { // This has to adjust offsets for its modes because it must be // possible to call jointLogMarginalLikelihood before the samples // have been added to the prior in order for model selection to // work. for (const auto& mode : m_Modes) { - mode.s_Prior->adjustOffset(weightStyles, samples, weights); + mode.s_Prior->adjustOffset(samples, weights); } } //! Update the prior with a collection of independent samples from the //! process. //! - //! \param[in] weightStyles_ Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the process. //! \param[in] weights The weights of each sample in \p samples. - virtual void addSamples(const TWeightStyleVec& weightStyles_, - const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights) { + virtual void addSamples(const TDouble10Vec1Vec& samples, + const TDouble10VecWeightsAry1Vec& weights) { if (samples.empty()) { return; } @@ -315,27 +305,13 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { // Declared outside the loop to minimize the number of times it // is initialized. - TWeightStyleVec weightStyles(weightStyles_); TDouble10Vec1Vec sample(1); - TDouble10Vec4Vec1Vec weight(1); + TDouble10VecWeightsAry1Vec weight{TWeights::unit(N)}; TSizeDoublePr2Vec clusters; - std::size_t indices[maths_t::NUMBER_WEIGHT_STYLES]; - std::size_t missing = weightStyles.size() + 1; - std::fill_n(indices, maths_t::NUMBER_WEIGHT_STYLES, missing); - for (std::size_t i = 0u; i < weightStyles.size(); ++i) { - indices[weightStyles[i]] = i; - } - std::size_t seasonal = indices[maths_t::E_SampleSeasonalVarianceScaleWeight]; - std::size_t count = indices[maths_t::E_SampleCountWeight]; - std::size_t winsorisation = indices[maths_t::E_SampleWinsorisationWeight]; - if (count == missing) { - count = weightStyles.size(); - weightStyles.push_back(maths_t::E_SampleCountWeight); - } - try { - bool hasSeasonalScale = !this->isNonInformative() && seasonal != missing; + bool hasSeasonalScale = !this->isNonInformative() && + maths_t::hasSeasonalVarianceScale(weights); TPoint mean = hasSeasonalScale ? this->mean() : TPoint(0.0); @@ -347,19 +323,16 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { } if (hasSeasonalScale) { - TPoint seasonalScale = sqrt(TPoint(maths_t::seasonalVarianceScale( - N, weightStyles_, weights[i]))); + TPoint seasonalScale = + sqrt(TPoint(maths_t::seasonalVarianceScale(weights[i]))); x = mean + (x - mean) / seasonalScale; } sample[0] = x.template toVector(); weight[0] = weights[i]; - weight[0].resize(weightStyles.size(), TDouble10Vec(N, 1.0)); - if (seasonal != missing) { - weight[0][seasonal].assign(N, 1.0); - } + maths_t::setSeasonalVarianceScale(1.0, N, weight[0]); - double smallestCountWeight = this->smallest(weight[0][count]); + double smallestCountWeight = this->smallest(maths_t::count(weight[0])); clusters.clear(); m_Clusterer->add(x, clusters, smallestCountWeight); @@ -377,16 +350,17 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { m_Modes.emplace_back(cluster.first, m_SeedPrior); k = m_Modes.end() - 1; } - weight[0][count].assign(N, cluster.second); - if (winsorisation != missing) { - TDouble10Vec& ww = weight[0][winsorisation]; + maths_t::setCount(cluster.second, N, weight[0]); + if (maths_t::isWinsorised(weight)) { + TDouble10Vec ww = maths_t::winsorisationWeight(weight[0]); double f = (k->weight() + cluster.second) / Z; for (auto& w : ww) { w = std::max(1.0 - (1.0 - w) / f, w * f); } + maths_t::setWinsorisationWeight(ww, weight[0]); } - k->s_Prior->addSamples(weightStyles, sample, weight); - n += this->smallest(maths_t::countForUpdate(N, weightStyles, weight[0])); + k->s_Prior->addSamples(sample, weight); + n += this->smallest(maths_t::countForUpdate(weight[0])); } this->addSamples(n); } @@ -401,7 +375,6 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { LOG_ERROR(<< "Bad propagation time " << time); return; } - if (this->isNonInformative()) { // Nothing to be done. return; @@ -440,6 +413,7 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { //! is univariate. virtual TUnivariatePriorPtrDoublePr univariate(const TSize10Vec& marginalize, const TSizeDoublePr10Vec& condition) const { + std::size_t n = m_Modes.size(); CMultimodalPrior::TPriorPtrVec modes; @@ -489,6 +463,7 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { //! is univariate. virtual TPriorPtrDoublePr bivariate(const TSize10Vec& marginalize, const TSizeDoublePr10Vec& condition) const { + if (N == 2) { return TPriorPtrDoublePr(TPriorPtr(this->clone()), 0.0); } @@ -529,6 +504,7 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { //! Get the support for the marginal likelihood function. virtual TDouble10VecDouble10VecPr marginalLikelihoodSupport() const { + if (m_Modes.size() == 0) { return {TPoint::smallest().template toVector(), TPoint::largest().template toVector()}; @@ -559,13 +535,13 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { if (m_Modes.size() == 1) { return m_Modes[0].s_Prior->marginalLikelihoodMean(); } - return this->mean().template toVector(); } //! Get the nearest mean of the multimodal prior marginal likelihood, //! otherwise the marginal likelihood mean. virtual TDouble10Vec nearestMarginalLikelihoodMean(const TDouble10Vec& value_) const { + if (m_Modes.empty()) { return TDouble10Vec(N, 0.0); } @@ -590,13 +566,13 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { } //! Get the mode of the marginal likelihood function. - virtual TDouble10Vec marginalLikelihoodMode(const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec& weight) const { + virtual TDouble10Vec marginalLikelihoodMode(const TDouble10VecWeightsAry& weight) const { + if (m_Modes.size() == 0) { return TDouble10Vec(N, 0.0); } if (m_Modes.size() == 1) { - return m_Modes[0].s_Prior->marginalLikelihoodMode(weightStyles, weight); + return m_Modes[0].s_Prior->marginalLikelihoodMode(weight); } using TMaxAccumulator = @@ -605,15 +581,9 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { // We'll approximate this as the mode with the maximum likelihood. TPoint result(0.0); - TPoint seasonalScale(1.0); - TDouble10Vec4Vec1Vec weight_(1, TDouble10Vec4Vec(1)); - try { - seasonalScale = - sqrt(TPoint(maths_t::seasonalVarianceScale(N, weightStyles, weight))); - weight_[0][0] = maths_t::countVarianceScale(N, weightStyles, weight); - } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to get variance scale " << e.what()); - } + TPoint seasonalScale = sqrt(TPoint(maths_t::seasonalVarianceScale(weight))); + TDouble10VecWeightsAry1Vec weight_{TWeights::unit(N)}; + maths_t::setCountVarianceScale(maths_t::countVarianceScale(weight), weight_[0]); // Declared outside the loop to minimize number of times it is created. TDouble10Vec1Vec mode(1); @@ -622,10 +592,9 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { for (const auto& mode_ : m_Modes) { double w = mode_.weight(); const TPriorPtr& prior = mode_.s_Prior; - mode[0] = prior->marginalLikelihoodMode(TWeights::COUNT_VARIANCE, weight_[0]); + mode[0] = prior->marginalLikelihoodMode(weight_[0]); double likelihood; - if (prior->jointLogMarginalLikelihood(TWeights::COUNT_VARIANCE, - mode, weight_, likelihood) & + if (prior->jointLogMarginalLikelihood(mode, weight_, likelihood) & maths_t::E_FpAllErrors) { continue; } @@ -640,12 +609,11 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { } //! Get the local maxima of the marginal likelihood functions. - TDouble10Vec1Vec marginalLikelihoodModes(const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec& weights) const { + TDouble10Vec1Vec marginalLikelihoodModes(const TDouble10VecWeightsAry& weights) const { TDouble10Vec1Vec result; result.reserve(m_Modes.size()); for (const auto& mode : m_Modes) { - result.push_back(mode.s_Prior->marginalLikelihoodMode(weightStyles, weights)); + result.push_back(mode.s_Prior->marginalLikelihoodMode(weights)); } return result; } @@ -675,17 +643,14 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { //! Calculate the log marginal likelihood function, integrating over the //! prior density function. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the process. //! \param[in] weights The weights of each sample in \p samples. //! \param[out] result Filled in with the joint likelihood of \p samples. virtual maths_t::EFloatingPointErrorStatus - jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights, + jointLogMarginalLikelihood(const TDouble10Vec1Vec& samples, + const TDouble10VecWeightsAry1Vec& weights, double& result) const { + result = 0.0; if (samples.empty()) { @@ -695,7 +660,6 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { if (!this->check(samples, weights)) { return maths_t::E_FpFailed; } - if (this->isNonInformative()) { // The non-informative likelihood is improper and effectively // zero everywhere. We use minus max double because @@ -712,8 +676,8 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { if (m_Modes.size() == 1) { // Apply a small penalty to kill off this model if the data are // single mode. - maths_t::EFloatingPointErrorStatus status = m_Modes[0].s_Prior->jointLogMarginalLikelihood( - weightStyles, samples, weights, result); + maths_t::EFloatingPointErrorStatus status = + m_Modes[0].s_Prior->jointLogMarginalLikelihood(samples, weights, result); result -= 10.0 * this->decayRate(); return status; } @@ -727,32 +691,29 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { detail::TSizeDoublePr3Vec modeLogLikelihoods; modeLogLikelihoods.reserve(m_Modes.size()); - bool hasSeasonalScale = maths_t::hasSeasonalVarianceScale(weightStyles, weights); + TPoint mean = maths_t::hasSeasonalVarianceScale(weights) ? this->mean() + : TPoint(0.0); - TPoint mean = hasSeasonalScale ? this->mean() : TPoint(0.0); - TDouble10Vec4Vec1Vec weights_(1, TDouble10Vec4Vec(1, TDouble10Vec(N, 1.0))); + TDouble10VecWeightsAry1Vec weight{TWeights::unit(N)}; try { for (std::size_t i = 0u; i < samples.size(); ++i) { - double n = this->smallest( - maths_t::countForUpdate(N, weightStyles, weights[i])); - TPoint seasonalScale = sqrt(TPoint( - maths_t::seasonalVarianceScale(N, weightStyles, weights[i]))); + double n = this->smallest(maths_t::countForUpdate(weights[i])); + TPoint seasonalScale = + sqrt(TPoint(maths_t::seasonalVarianceScale(weights[i]))); double logSeasonalScale = 0.0; for (std::size_t j = 0u; j < seasonalScale.dimension(); ++j) { logSeasonalScale += std::log(seasonalScale(j)); } TPoint x(samples[i]); - if (hasSeasonalScale) { - x = mean + (x - mean) / seasonalScale; - } + x = mean + (x - mean) / seasonalScale; sample[0] = x.template toVector(); - weights_[0][0] = maths_t::countVarianceScale(N, weightStyles, weights[i]); + maths_t::setCountVarianceScale( + maths_t::countVarianceScale(weights[i]), weight[0]); double sampleLogLikelihood; maths_t::EFloatingPointErrorStatus status = detail::jointLogMarginalLikelihood( - m_Modes, TWeights::COUNT_VARIANCE, sample, weights_, - modeLogLikelihoods, sampleLogLikelihood); + m_Modes, sample, weight, modeLogLikelihoods, sampleLogLikelihood); if (status & maths_t::E_FpOverflowed) { result = boost::numeric::bounds::lowest(); return status; @@ -921,6 +882,7 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { void operator()(std::size_t sourceIndex, std::size_t leftSplitIndex, std::size_t rightSplitIndex) const { + LOG_TRACE(<< "Splitting mode with index " << sourceIndex); TModeVec& modes = m_Prior->m_Modes; @@ -952,24 +914,22 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { } LOG_TRACE(<< "samples = " << core::CContainerPrinter::print(samples)); - double nl = pLeft * numberSamples; - double ns = std::min(nl, static_cast(N + 2)); - double s = static_cast(samples.size()); - LOG_TRACE(<< "# left = " << nl); + double wl = pLeft * numberSamples; + double ws = std::min(wl, static_cast(N + 2)); + double n = static_cast(samples.size()); + LOG_TRACE(<< "# left = " << wl); TDouble10Vec1Vec samples_; samples_.reserve(samples.size()); for (const auto& sample : samples) { samples_.push_back(sample.template toVector()); } - TDouble10Vec seedWeight(N, ns / s); - TDouble10Vec4Vec1Vec weights(samples_.size(), TDouble10Vec4Vec(1, seedWeight)); - modes.back().s_Prior->addSamples(TWeights::COUNT, samples_, weights); - double weight = (nl - ns) / s; - if (weight > 0.0) { - weights.assign(weights.size(), - TDouble10Vec4Vec(1, TDouble10Vec(N, weight))); - modes.back().s_Prior->addSamples(TWeights::COUNT, samples_, weights); + TDouble10VecWeightsAry1Vec weights(samples_.size(), + maths_t::countWeight(ws / n, N)); + modes.back().s_Prior->addSamples(samples_, weights); + if (wl > ws) { + weights.assign(weights.size(), maths_t::countWeight((wl - ws) / n, N)); + modes.back().s_Prior->addSamples(samples_, weights); LOG_TRACE(<< modes.back().s_Prior->print()); } } @@ -984,24 +944,22 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { } LOG_TRACE(<< "samples = " << core::CContainerPrinter::print(samples)); - double nr = pRight * numberSamples; - double ns = std::min(nr, static_cast(N + 2)); - double s = static_cast(samples.size()); - LOG_TRACE(<< "# right = " << nr); + double wr = pRight * numberSamples; + double ws = std::min(wr, static_cast(N + 2)); + double n = static_cast(samples.size()); + LOG_TRACE(<< "# right = " << wr); TDouble10Vec1Vec samples_; samples_.reserve(samples.size()); for (const auto& sample : samples) { samples_.push_back(sample.template toVector()); } - TDouble10Vec seedWeight(N, ns / s); - TDouble10Vec4Vec1Vec weights(samples_.size(), TDouble10Vec4Vec(1, seedWeight)); - modes.back().s_Prior->addSamples(TWeights::COUNT, samples_, weights); - double weight = (nr - ns) / s; - if (weight > 0.0) { - weights.assign(weights.size(), - TDouble10Vec4Vec(1, TDouble10Vec(N, weight))); - modes.back().s_Prior->addSamples(TWeights::COUNT, samples_, weights); + TDouble10VecWeightsAry1Vec weights(samples_.size(), + maths_t::countWeight(ws / n, N)); + modes.back().s_Prior->addSamples(samples_, weights); + if (wr > ws) { + weights.assign(weights.size(), maths_t::countWeight((wr - ws) / n, N)); + modes.back().s_Prior->addSamples(samples_, weights); LOG_TRACE(<< modes.back().s_Prior->print()); } } @@ -1097,6 +1055,7 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { //! Get the convariance matrix for the marginal likelihood. TMatrix covarianceMatrix() const { + // By linearity we have that: // Integral{ (x - m)' * (x - m) * Sum_i{ w(i) * f(x | i) } } // = Sum_i{ w(i) * (Integral{ x' * x * f(x | i) } - m' * m) } diff --git a/include/maths/CMultivariateNormalConjugate.h b/include/maths/CMultivariateNormalConjugate.h index 7bbf2c4e9a..e85ad90ff6 100644 --- a/include/maths/CMultivariateNormalConjugate.h +++ b/include/maths/CMultivariateNormalConjugate.h @@ -178,21 +178,16 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { } //! No-op. - virtual void adjustOffset(const TWeightStyleVec& /*weightStyles*/, - const TDouble10Vec1Vec& /*samples*/, - const TDouble10Vec4Vec1Vec& /*weights*/) {} + virtual void adjustOffset(const TDouble10Vec1Vec& /*samples*/, + const TDouble10VecWeightsAry1Vec& /*weights*/) {} //! Update the prior with a collection of independent samples from the //! process. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the process. //! \param[in] weights The weights of each sample in \p samples. - virtual void addSamples(const TWeightStyleVec& weightStyles, - const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights) { + virtual void addSamples(const TDouble10Vec1Vec& samples, + const TDouble10VecWeightsAry1Vec& weights) { if (samples.empty()) { return; } @@ -200,7 +195,7 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { return; } - this->CMultivariatePrior::addSamples(weightStyles, samples, weights); + this->CMultivariatePrior::addSamples(samples, weights); // Note that if either count weight or Winsorisation weights are supplied // the weight of the sample x(i) is interpreted as its count, so for example @@ -233,19 +228,13 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { TPoint numberSamples(0.0); TCovariance covariancePost; - try { - for (std::size_t i = 0u; i < samples.size(); ++i) { - TPoint x(samples[i]); - TPoint n(maths_t::countForUpdate(N, weightStyles, weights[i])); - TPoint varianceScale = - TPoint(maths_t::seasonalVarianceScale(N, weightStyles, weights[i])) * - TPoint(maths_t::countVarianceScale(N, weightStyles, weights[i])); - numberSamples += n; - covariancePost.add(x, n / varianceScale); - } - } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to update likelihood: " << e.what()); - return; + for (std::size_t i = 0u; i < samples.size(); ++i) { + TPoint x(samples[i]); + TPoint n(maths_t::countForUpdate(weights[i])); + TPoint varianceScale = TPoint(maths_t::seasonalVarianceScale(weights[i])) * + TPoint(maths_t::countVarianceScale(weights[i])); + numberSamples += n; + covariancePost.add(x, n / varianceScale); } TPoint scaledNumberSamples = covariancePost.s_Count; @@ -305,7 +294,6 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { LOG_ERROR(<< "Bad propagation time " << time); return; } - if (this->isNonInformative()) { // Nothing to be done. return; @@ -358,6 +346,7 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { //! is univariate. virtual TUnivariatePriorPtrDoublePr univariate(const TSize10Vec& marginalize, const TSizeDoublePr10Vec& condition) const { + if (!this->check(marginalize, condition)) { return TUnivariatePriorPtrDoublePr(); } @@ -446,7 +435,6 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { if (N == 2) { return TPriorPtrDoublePr(std::shared_ptr(this->clone()), 0.0); } - if (!this->check(marginalize, condition)) { return TPriorPtrDoublePr(); } @@ -539,8 +527,7 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { } //! Get the mode of the marginal likelihood function. - virtual TDouble10Vec marginalLikelihoodMode(const TWeightStyleVec& /*weightStyles*/, - const TDouble10Vec4Vec& /*weights*/) const { + virtual TDouble10Vec marginalLikelihoodMode(const TDouble10VecWeightsAry& /*weights*/) const { return this->marginalLikelihoodMean(); } @@ -557,16 +544,12 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { //! Calculate the log marginal likelihood function, integrating over the //! prior density function. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the process. //! \param[in] weights The weights of each sample in \p samples. //! \param[out] result Filled in with the joint likelihood of \p samples. virtual maths_t::EFloatingPointErrorStatus - jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights, + jointLogMarginalLikelihood(const TDouble10Vec1Vec& samples, + const TDouble10VecWeightsAry1Vec& weights, double& result) const { result = 0.0; @@ -599,8 +582,8 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { if (this->isInteger()) { double logLikelihood; - status = this->jointLogMarginalLikelihood( - weightStyles, samples, TPoint(0.5), weights, logLikelihood); + status = this->jointLogMarginalLikelihood(samples, TPoint(0.5), + weights, logLikelihood); if (status != maths_t::E_FpNoErrors) { return status; } @@ -613,7 +596,7 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { CSampling::uniformSample(0.0, 1.0, 3 * N, z); for (std::size_t i = 0u; i < z.size(); i += N) { status = this->jointLogMarginalLikelihood( - weightStyles, samples, TPoint(&z[i], &z[i + N]), weights, logLikelihood); + samples, TPoint(&z[i], &z[i + N]), weights, logLikelihood); if (status & maths_t::E_FpFailed) { return maths_t::E_FpFailed; } @@ -631,8 +614,7 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { result = maxLogLikelihood + std::log(sum / n); } else { - status = this->jointLogMarginalLikelihood(weightStyles, samples, - TPoint(0.0), weights, result); + status = this->jointLogMarginalLikelihood(samples, TPoint(0.0), weights, result); } if (status & maths_t::E_FpFailed) { @@ -945,6 +927,7 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { //! Get the covariance matrix for the marginal likelihood. TMatrix covarianceMatrix() const { + // This can be found by change of variables from the prior on the // precision matrix. In particular, if X ~ W_d(V, n) and Y = X^(-1), // then Y ~ W_d^(-1)(V^(-1), n), i.e. the inverse Wishart with the @@ -968,7 +951,6 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { if (this->isNonInformative()) { return TMatrix(0.0); } - TMatrix result(m_WishartScaleMatrix / m_WishartDegreesFreedom); return TMatrix(fromDenseMatrix(toDenseMatrix(result).inverse())); } @@ -977,6 +959,7 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { bool equalTolerance(const CMultivariateNormalConjugate& rhs, unsigned int toleranceType, double epsilon) const { + LOG_DEBUG(<< m_GaussianMean << " " << rhs.m_GaussianMean); LOG_DEBUG(<< m_GaussianPrecision << " " << rhs.m_GaussianPrecision); LOG_DEBUG(<< m_WishartDegreesFreedom << " " << rhs.m_WishartDegreesFreedom); @@ -1035,18 +1018,16 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { //! Compute the marginal likelihood for \p samples at the offset //! \p offset. maths_t::EFloatingPointErrorStatus - jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble10Vec1Vec& samples, + jointLogMarginalLikelihood(const TDouble10Vec1Vec& samples, const TPoint& offset, - const TDouble10Vec4Vec1Vec& weights, + const TDouble10VecWeightsAry1Vec& weights, double& result) const { + // As usual, one can find the marginal likelihood by noting that // it is proportional to the ratio of the normalization factors // of the conjugate distribution before and after update with the // samples. - double d = static_cast(N); - double numberSamples = 0.0; TCovariance covariancePost; double logCountVarianceScales = 0.0; @@ -1054,11 +1035,10 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { TPoint m(this->marginalLikelihoodMean()); for (std::size_t i = 0u; i < samples.size(); ++i) { TPoint x(samples[i]); - TPoint n(maths_t::countForUpdate(N, weightStyles, weights[i])); - TPoint seasonalScale = sqrt(TPoint( - maths_t::seasonalVarianceScale(N, weightStyles, weights[i]))); - TPoint countVarianceScale( - maths_t::countVarianceScale(N, weightStyles, weights[i])); + TPoint n(maths_t::countForUpdate(weights[i])); + TPoint seasonalScale = + sqrt(TPoint(maths_t::seasonalVarianceScale(weights[i]))); + TPoint countVarianceScale(maths_t::countVarianceScale(weights[i])); x = m + (x + offset - m) / seasonalScale; numberSamples += this->smallest(n.template toVector()); covariancePost.add(x, n / countVarianceScale); @@ -1120,6 +1100,7 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { LOG_TRACE(<< "logGammaPostMinusPrior = " << logGammaPostMinusPrior); LOG_TRACE(<< "logCountVarianceScales = " << logCountVarianceScales); + double d = static_cast(N); result = 0.5 * (wishartDegreesFreedomPrior * logDeterminantPrior - wishartDegreesFreedomPost * logDeterminantPost - d * (logGaussianPrecisionPost - logGaussianPrecisionPrior) + diff --git a/include/maths/CMultivariateOneOfNPrior.h b/include/maths/CMultivariateOneOfNPrior.h index 6bba5b6ba7..0ee44b1cc0 100644 --- a/include/maths/CMultivariateOneOfNPrior.h +++ b/include/maths/CMultivariateOneOfNPrior.h @@ -149,21 +149,16 @@ class MATHS_EXPORT CMultivariateOneOfNPrior : public CMultivariatePrior { virtual void setToNonInformative(double offset = 0.0, double decayRate = 0.0); //! Forward the offset to the model priors. - virtual void adjustOffset(const TWeightStyleVec& weightStyles, - const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights); + virtual void adjustOffset(const TDouble10Vec1Vec& samples, + const TDouble10VecWeightsAry1Vec& weights); //! Update the model weights using the marginal likelihoods for //! the data. The component prior parameters are then updated. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the process. //! \param[in] weights The weights of each sample in \p samples. - virtual void addSamples(const TWeightStyleVec& weightStyles, - const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights); + virtual void addSamples(const TDouble10Vec1Vec& samples, + const TDouble10VecWeightsAry1Vec& weights); //! Propagate the prior density function forwards by \p time. //! @@ -222,24 +217,19 @@ class MATHS_EXPORT CMultivariateOneOfNPrior : public CMultivariatePrior { virtual TDouble10Vec marginalLikelihoodVariances() const; //! Get the mode of the marginal likelihood function. - virtual TDouble10Vec marginalLikelihoodMode(const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec& weights) const; + virtual TDouble10Vec marginalLikelihoodMode(const TDouble10VecWeightsAry& weights) const; //! Compute the log marginal likelihood function at \p samples integrating //! over the prior density function for the distribution parameters. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the process. //! \param[in] weights The weights of each sample in \p samples. //! \param[out] result Filled in with the joint likelihood of \p samples. //! \note The samples are assumed to be independent and identically //! distributed. virtual maths_t::EFloatingPointErrorStatus - jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights, + jointLogMarginalLikelihood(const TDouble10Vec1Vec& samples, + const TDouble10VecWeightsAry1Vec& weights, double& result) const; //! Sample the marginal likelihood function. diff --git a/include/maths/CMultivariatePrior.h b/include/maths/CMultivariatePrior.h index bfa7f0274a..96c51fc04e 100644 --- a/include/maths/CMultivariatePrior.h +++ b/include/maths/CMultivariatePrior.h @@ -38,19 +38,19 @@ class MATHS_EXPORT CMultivariatePrior { using TDouble10Vec = core::CSmallVector; using TDouble10Vec1Vec = core::CSmallVector; using TDouble10Vec2Vec = core::CSmallVector; - using TDouble10Vec4Vec = core::CSmallVector; using TDouble10Vec10Vec = core::CSmallVector; - using TDouble10Vec4Vec1Vec = core::CSmallVector; using TDouble10VecDouble10VecPr = std::pair; using TSize10Vec = core::CSmallVector; using TSizeDoublePr = std::pair; using TSizeDoublePr10Vec = core::CSmallVector; - using TWeightStyleVec = maths_t::TWeightStyleVec; using TTail10Vec = core::CSmallVector; + using TDouble10VecWeightsAry = maths_t::TDouble10VecWeightsAry; + using TDouble10VecWeightsAry1Vec = maths_t::TDouble10VecWeightsAry1Vec; using TUnivariatePriorPtr = std::shared_ptr; using TUnivariatePriorPtrDoublePr = std::pair; using TPriorPtr = std::shared_ptr; using TPriorPtrDoublePr = std::pair; + using TWeights = maths_t::CUnitWeights; public: //! The value of the decay rate to fall back to using if the input @@ -128,26 +128,18 @@ class MATHS_EXPORT CMultivariatePrior { //! For priors with non-negative support this adjusts the offset used //! to extend the support to handle negative samples. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples The samples from which to determine the offset. //! \param[in] weights The weights of each sample in \p samples. - virtual void adjustOffset(const TWeightStyleVec& weightStyles, - const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights) = 0; + virtual void adjustOffset(const TDouble10Vec1Vec& samples, + const TDouble10VecWeightsAry1Vec& weights) = 0; //! Update the prior with a collection of independent samples from the //! process. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the process. //! \param[in] weights The weights of each sample in \p samples. - virtual void addSamples(const TWeightStyleVec& weightStyles, - const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights) = 0; + virtual void addSamples(const TDouble10Vec1Vec& samples, + const TDouble10VecWeightsAry1Vec& weights) = 0; //! Update the prior for the specified elapsed time. virtual void propagateForwardsByTime(double time) = 0; @@ -193,12 +185,11 @@ class MATHS_EXPORT CMultivariatePrior { virtual TDouble10Vec nearestMarginalLikelihoodMean(const TDouble10Vec& value) const; //! Get the mode of the marginal likelihood function. - virtual TDouble10Vec marginalLikelihoodMode(const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec& weights) const = 0; + virtual TDouble10Vec + marginalLikelihoodMode(const TDouble10VecWeightsAry& weights) const = 0; //! Get the local maxima of the marginal likelihood function. - virtual TDouble10Vec1Vec marginalLikelihoodModes(const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec& weights) const; + virtual TDouble10Vec1Vec marginalLikelihoodModes(const TDouble10VecWeightsAry& weights) const; //! Get the covariance matrix for the marginal likelihood. virtual TDouble10Vec10Vec marginalLikelihoodCovariance() const = 0; @@ -209,16 +200,12 @@ class MATHS_EXPORT CMultivariatePrior { //! Calculate the log marginal likelihood function, integrating over the //! prior density function. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the process. //! \param[in] weights The weights of each sample in \p samples. //! \param[out] result Filled in with the joint likelihood of \p samples. virtual maths_t::EFloatingPointErrorStatus - jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights, + jointLogMarginalLikelihood(const TDouble10Vec1Vec& samples, + const TDouble10VecWeightsAry1Vec& weights, double& result) const = 0; //! Sample the marginal likelihood function. @@ -249,9 +236,6 @@ class MATHS_EXPORT CMultivariatePrior { //! //! \param[in] calculation The style of the probability calculation //! (see model_t::EProbabilityCalculation for details). - //! \param[in] weightStyles Controls the interpretation of the weights - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the process. //! \param[in] weights The weights of each sample in \p samples. //! \param[in] coordinates The coordinates for which to compute probabilities. @@ -266,9 +250,8 @@ class MATHS_EXPORT CMultivariatePrior { //! a value of zero is not well defined and a value of infinity is not well //! handled. (Very large values are handled though.) bool probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec& weightStyles, const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights, + const TDouble10VecWeightsAry1Vec& weights, const TSize10Vec& coordinates, TDouble10Vec2Vec& lowerBounds, TDouble10Vec2Vec& upperBounds, @@ -280,9 +263,6 @@ class MATHS_EXPORT CMultivariatePrior { //! //! \param[in] calculation The style of the probability calculation //! (see model_t::EProbabilityCalculation for details). - //! \param[in] weightStyles Controls the interpretation of the weights - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the process. //! \param[in] weights The weights of each sample in \p samples. //! \param[out] lowerBound Filled in with a lower bound for the probability @@ -298,9 +278,8 @@ class MATHS_EXPORT CMultivariatePrior { //! a value of zero is not well defined and a value of infinity is not well //! handled. (Very large values are handled though.) bool probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec& weightStyles, const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights, + const TDouble10VecWeightsAry1Vec& weights, double& lowerBound, double& upperBound, TTail10Vec& tail) const; @@ -391,7 +370,7 @@ class MATHS_EXPORT CMultivariatePrior { void addSamples(double n); //! Check that the samples and weights are consistent. - bool check(const TDouble10Vec1Vec& samples, const TDouble10Vec4Vec1Vec& weights) const; + bool check(const TDouble10Vec1Vec& samples, const TDouble10VecWeightsAry1Vec& weights) const; //! Check that the variables to marginalize out and condition on //! are consistent. diff --git a/include/maths/CNormalMeanPrecConjugate.h b/include/maths/CNormalMeanPrecConjugate.h index 9f8bea298f..a7ee6e2aee 100644 --- a/include/maths/CNormalMeanPrecConjugate.h +++ b/include/maths/CNormalMeanPrecConjugate.h @@ -121,9 +121,8 @@ class MATHS_EXPORT CNormalMeanPrecConjugate : public CPrior { virtual bool needsOffset() const; //! No-op. - virtual double adjustOffset(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights); + virtual double adjustOffset(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights); //! Returns zero. virtual double offset() const; @@ -131,14 +130,9 @@ class MATHS_EXPORT CNormalMeanPrecConjugate : public CPrior { //! Update the prior with a collection of independent samples from //! the normal variable. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. - virtual void addSamples(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights); + virtual void addSamples(const TDouble1Vec& samples, const TDoubleWeightsAry1Vec& weights); //! Propagate the prior density function forwards by \p time. //! @@ -157,14 +151,11 @@ class MATHS_EXPORT CNormalMeanPrecConjugate : public CPrior { virtual double marginalLikelihoodMean() const; //! Get the mode of the marginal likelihood function. - virtual double - marginalLikelihoodMode(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual double marginalLikelihoodMode(const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Get the variance of the marginal likelihood. virtual double - marginalLikelihoodVariance(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + marginalLikelihoodVariance(const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Get the \p percentage symmetric confidence interval for the marginal //! likelihood function, i.e. the values \f$a\f$ and \f$b\f$ such that: @@ -176,29 +167,23 @@ class MATHS_EXPORT CNormalMeanPrecConjugate : public CPrior { //! the percentage of interest \p percentage. //! //! \param[in] percentage The percentage of interest. - //! \param[in] weightStyles Optional variance scale weight styles. //! \param[in] weights Optional variance scale weights. //! \note \p percentage should be in the range [0.0, 100.0). - virtual TDoubleDoublePr marginalLikelihoodConfidenceInterval( - double percentage, - const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual TDoubleDoublePr + marginalLikelihoodConfidenceInterval(double percentage, + const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Compute the log marginal likelihood function at \p samples integrating //! over the prior density function for the normal mean and precision. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. //! \param[out] result Filled in with the joint likelihood of \p samples. //! \note The samples are assumed to be independent and identically //! distributed. virtual maths_t::EFloatingPointErrorStatus - jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + jointLogMarginalLikelihood(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& result) const; //! Sample the marginal likelihood function. @@ -213,9 +198,6 @@ class MATHS_EXPORT CNormalMeanPrecConjugate : public CPrior { //! Compute minus the log of the joint c.d.f. of the marginal likelihood //! at \p samples. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples The samples of interest. //! \param[in] weights The weights of each sample in \p samples. For //! the count variance scale weight style the weight is interpreted as @@ -236,9 +218,8 @@ class MATHS_EXPORT CNormalMeanPrecConjugate : public CPrior { //! \f$(0,\infty)\f$, i.e. a value of zero is not well defined and //! a value of infinity is not well handled. (Very large values are //! handled though.) - virtual bool minusLogJointCdf(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + virtual bool minusLogJointCdf(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const; @@ -248,9 +229,8 @@ class MATHS_EXPORT CNormalMeanPrecConjugate : public CPrior { //! can return is the minimum double rather than epsilon. //! //! \see minusLogJointCdf for more details. - virtual bool minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + virtual bool minusLogJointCdfComplement(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const; @@ -259,9 +239,6 @@ class MATHS_EXPORT CNormalMeanPrecConjugate : public CPrior { //! //! \param[in] calculation The style of the probability calculation //! (see model_t::EProbabilityCalculation for details). - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples The samples of interest. //! \param[in] weights The weights. See minusLogJointCdf for discussion. //! \param[out] lowerBound Filled in with the probability of the set @@ -275,9 +252,8 @@ class MATHS_EXPORT CNormalMeanPrecConjugate : public CPrior { //! i.e. a value of zero is not well defined and a value of infinity //! is not well handled. (Very large values are handled though.) virtual bool probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound, maths_t::ETail& tail) const; diff --git a/include/maths/COneOfNPrior.h b/include/maths/COneOfNPrior.h index e754bb5ec9..f04e54ebe0 100644 --- a/include/maths/COneOfNPrior.h +++ b/include/maths/COneOfNPrior.h @@ -133,9 +133,8 @@ class MATHS_EXPORT COneOfNPrior : public CPrior { //! Forward the offset to the model priors. //! //! \return The penalty to apply in model selection. - virtual double adjustOffset(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights); + virtual double adjustOffset(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights); //! Get the maximum model offset. virtual double offset() const; @@ -143,14 +142,9 @@ class MATHS_EXPORT COneOfNPrior : public CPrior { //! Update the model weights using the marginal likelihoods for //! the data. The component prior parameters are then updated. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. - virtual void addSamples(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights); + virtual void addSamples(const TDouble1Vec& samples, const TDoubleWeightsAry1Vec& weights); //! Propagate the prior density function forwards by \p time. //! @@ -172,14 +166,11 @@ class MATHS_EXPORT COneOfNPrior : public CPrior { virtual double nearestMarginalLikelihoodMean(double value) const; //! Get the mode of the marginal likelihood function. - virtual double - marginalLikelihoodMode(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual double marginalLikelihoodMode(const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Get the variance of the marginal likelihood. virtual double - marginalLikelihoodVariance(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + marginalLikelihoodVariance(const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Get the \p percentage symmetric confidence interval for the marginal //! likelihood function, i.e. the values \f$a\f$ and \f$b\f$ such that: @@ -191,29 +182,23 @@ class MATHS_EXPORT COneOfNPrior : public CPrior { //! the percentage of interest \p percentage. //! //! \param[in] percentage The percentage of interest. - //! \param[in] weightStyles Optional variance scale weight styles. //! \param[in] weights Optional variance scale weights. //! \note \p percentage should be in the range (0.0, 100.0]. - virtual TDoubleDoublePr marginalLikelihoodConfidenceInterval( - double percentage, - const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual TDoubleDoublePr + marginalLikelihoodConfidenceInterval(double percentage, + const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Compute the log marginal likelihood function at \p samples integrating //! over the prior density function for the distribution parameters. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. //! \param[out] result Filled in with the joint likelihood of \p samples. //! \note The samples are assumed to be independent and identically //! distributed. virtual maths_t::EFloatingPointErrorStatus - jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + jointLogMarginalLikelihood(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& result) const; //! Sample the marginal likelihood function. @@ -231,9 +216,8 @@ class MATHS_EXPORT COneOfNPrior : public CPrior { private: //! The common c.d.f. implementation. bool minusLogJointCdfImpl(bool complement, - const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const; @@ -241,9 +225,6 @@ class MATHS_EXPORT COneOfNPrior : public CPrior { //! Compute minus the log of the joint c.d.f. of the marginal likelihood //! at \p samples. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. //! \param[out] lowerBound Filled in with a lower bound to acceptable @@ -256,9 +237,8 @@ class MATHS_EXPORT COneOfNPrior : public CPrior { //! \warning The variance scales must be in the range \f$(0,\infty)\f$, //! i.e. a value of zero is not well defined and a value of infinity is //! not well handled. (Very large values are handled though.) - virtual bool minusLogJointCdf(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + virtual bool minusLogJointCdf(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const; @@ -268,9 +248,8 @@ class MATHS_EXPORT COneOfNPrior : public CPrior { //! can return is the minimum double rather than epsilon. //! //! \see minusLogJointCdf for more details. - virtual bool minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + virtual bool minusLogJointCdfComplement(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const; @@ -279,9 +258,6 @@ class MATHS_EXPORT COneOfNPrior : public CPrior { //! //! \param[in] calculation The style of the probability calculation //! (see model_t::EProbabilityCalculation for details). - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. //! \param[out] lowerBound Filled in with the probability of the set @@ -295,9 +271,8 @@ class MATHS_EXPORT COneOfNPrior : public CPrior { //! i.e. a value of zero is not well defined and a value of infinity is //! not well handled. (Very large values are handled though.) virtual bool probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound, maths_t::ETail& tail) const; diff --git a/include/maths/CPoissonMeanConjugate.h b/include/maths/CPoissonMeanConjugate.h index 7c9154707a..9667b72baa 100644 --- a/include/maths/CPoissonMeanConjugate.h +++ b/include/maths/CPoissonMeanConjugate.h @@ -104,15 +104,11 @@ class MATHS_EXPORT CPoissonMeanConjugate : public CPrior { //! This samples the current marginal likelihood and uses these samples //! to reconstruct the prior with adjusted offset. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. //! \return The penalty to apply in model selection. - virtual double adjustOffset(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights); + virtual double adjustOffset(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights); //! Get the current offset. virtual double offset() const; @@ -120,14 +116,9 @@ class MATHS_EXPORT CPoissonMeanConjugate : public CPrior { //! Update the prior with a collection of independent samples from the //! Poisson variable. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. - virtual void addSamples(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights); + virtual void addSamples(const TDouble1Vec& samples, const TDoubleWeightsAry1Vec& weights); //! Propagate the prior density function forwards by \p time. //! @@ -145,14 +136,11 @@ class MATHS_EXPORT CPoissonMeanConjugate : public CPrior { virtual double marginalLikelihoodMean() const; //! Get the mode of the marginal likelihood function. - virtual double - marginalLikelihoodMode(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual double marginalLikelihoodMode(const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Get the variance of the marginal likelihood. virtual double - marginalLikelihoodVariance(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + marginalLikelihoodVariance(const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Get the \p percentage symmetric confidence interval for the marginal //! likelihood function, i.e. the values \f$a\f$ and \f$b\f$ such that: @@ -164,29 +152,23 @@ class MATHS_EXPORT CPoissonMeanConjugate : public CPrior { //! the percentage of interest \p percentage. //! //! \param[in] percentage The percentage of interest. - //! \param[in] weightStyles Optional variance scale weight styles. //! \param[in] weights Optional variance scale weights. //! \note \p percentage should be in the range [0.0, 100.0). - virtual TDoubleDoublePr marginalLikelihoodConfidenceInterval( - double percentage, - const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual TDoubleDoublePr + marginalLikelihoodConfidenceInterval(double percentage, + const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Compute the log marginal likelihood function at \p samples integrating //! over the prior density function for the Poisson mean. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. //! \param[out] result Filled in with the joint likelihood of \p samples. //! \note The samples are assumed to be independent and identically //! distributed. virtual maths_t::EFloatingPointErrorStatus - jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + jointLogMarginalLikelihood(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& result) const; //! Sample the marginal likelihood function. @@ -201,18 +183,14 @@ class MATHS_EXPORT CPoissonMeanConjugate : public CPrior { //! Compute minus the log of the joint c.d.f. of the marginal likelihood //! at \p samples. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples The samples of interest. //! \param[in] weights The weights of each sample in \p samples. //! \param[out] lowerBound Filled in with \f$-\log(\prod_i{F(x_i)})\f$ //! where \f$F(.)\f$ is the c.d.f. and \f$\{x_i\}\f$ are the samples. //! \param[out] upperBound Equal to \p lowerBound. //! \note The samples are assumed to be independent. - virtual bool minusLogJointCdf(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + virtual bool minusLogJointCdf(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const; @@ -222,9 +200,8 @@ class MATHS_EXPORT CPoissonMeanConjugate : public CPrior { //! can return is the minimum double rather than epsilon. //! //! \see minusLogJointCdf for more details. - virtual bool minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + virtual bool minusLogJointCdfComplement(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const; @@ -233,9 +210,6 @@ class MATHS_EXPORT CPoissonMeanConjugate : public CPrior { //! //! \param[in] calculation The style of the probability calculation //! (see model_t::EProbabilityCalculation for details). - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples The samples of interest. //! \param[in] weights The weights of each sample in \p samples. //! \param[out] lowerBound Filled in with the probability of the set @@ -246,9 +220,8 @@ class MATHS_EXPORT CPoissonMeanConjugate : public CPrior { //! are in or neither. //! \note The samples are assumed to be independent. virtual bool probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound, maths_t::ETail& tail) const; diff --git a/include/maths/CPrior.h b/include/maths/CPrior.h index c8b2d6a00e..92510ea057 100644 --- a/include/maths/CPrior.h +++ b/include/maths/CPrior.h @@ -12,7 +12,6 @@ #include #include -#include #include #include @@ -42,11 +41,10 @@ class MATHS_EXPORT CPrior { using TDoubleVecVec = std::vector; using TDoubleDoublePr = std::pair; using TDoubleDoublePrVec = std::vector; - using TWeightStyleVec = maths_t::TWeightStyleVec; using TDouble1Vec = core::CSmallVector; - using TDouble4Vec = core::CSmallVector; - using TDouble4Vec1Vec = core::CSmallVector; - using TWeights = CConstantWeights; + using TDoubleWeightsAry = maths_t::TDoubleWeightsAry; + using TDoubleWeightsAry1Vec = maths_t::TDoubleWeightsAry1Vec; + using TWeights = maths_t::CUnitWeights; //! \brief Data for plotting a series struct MATHS_EXPORT SPlot { @@ -93,16 +91,14 @@ class MATHS_EXPORT CPrior { public: CLogMarginalLikelihood(const CPrior& prior, - const TWeightStyleVec& weightStyles = CConstantWeights::COUNT, - const TDouble4Vec1Vec& weights = CConstantWeights::SINGLE_UNIT); + const TDoubleWeightsAry1Vec& weights = TWeights::SINGLE_UNIT); double operator()(double x) const; bool operator()(double x, double& result) const; private: const CPrior* m_Prior; - const TWeightStyleVec* m_WeightStyles; - const TDouble4Vec1Vec* m_Weights; + const TDoubleWeightsAry1Vec* m_Weights; //! Avoids creating the vector argument to jointLogMarginalLikelihood //! more than once. mutable TDouble1Vec m_X; @@ -178,15 +174,11 @@ class MATHS_EXPORT CPrior { //! For priors with non-negative support this adjusts the offset used //! to extend the support to handle negative samples. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples The samples from which to determine the offset. //! \param[in] weights The weights of each sample in \p samples. //! \return The penalty to apply in model selection. - virtual double adjustOffset(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights) = 0; + virtual double adjustOffset(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights) = 0; //! Get the current sample offset. virtual double offset() const = 0; @@ -194,14 +186,10 @@ class MATHS_EXPORT CPrior { //! Update the prior with a collection of independent samples from the //! variable. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. - virtual void addSamples(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights) = 0; + virtual void addSamples(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights) = 0; //! Update the prior for the specified elapsed time. virtual void propagateForwardsByTime(double time) = 0; @@ -218,13 +206,11 @@ class MATHS_EXPORT CPrior { //! Get the mode of the marginal likelihood function. virtual double - marginalLikelihoodMode(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const = 0; + marginalLikelihoodMode(const TDoubleWeightsAry& weights = TWeights::UNIT) const = 0; //! Get the local maxima of the marginal likelihood function. virtual TDouble1Vec - marginalLikelihoodModes(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + marginalLikelihoodModes(const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Get the \p percentage symmetric confidence interval for the marginal //! likelihood function, i.e. the values \f$a\f$ and \f$b\f$ such that: @@ -236,32 +222,25 @@ class MATHS_EXPORT CPrior { //! the percentage of interest \p percentage. //! //! \param[in] percentage The percentage of interest. - //! \param[in] weightStyles Optional variance scale weight styles. //! \param[in] weights Optional variance scale weights. //! \note \p percentage should be in the range [0.0, 100.0). virtual TDoubleDoublePr marginalLikelihoodConfidenceInterval( double percentage, - const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const = 0; + const TDoubleWeightsAry& weights = TWeights::UNIT) const = 0; //! Get the variance of the marginal likelihood. virtual double - marginalLikelihoodVariance(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const = 0; + marginalLikelihoodVariance(const TDoubleWeightsAry& weights = TWeights::UNIT) const = 0; //! Calculate the log marginal likelihood function integrating over the //! prior density function. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. //! \param[out] result Filled in with the joint likelihood of \p samples. virtual maths_t::EFloatingPointErrorStatus - jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + jointLogMarginalLikelihood(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& result) const = 0; //! Sample the marginal likelihood function. @@ -287,9 +266,6 @@ class MATHS_EXPORT CPrior { //! Calculate minus the log of the joint c.d.f. of the marginal likelihood //! for a collection of independent samples from the variable. //! - //! \param[in] weightStyles Controls the interpretation of the weights - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. //! \param[out] lowerBound Filled in with a lower bound for @@ -305,9 +281,8 @@ class MATHS_EXPORT CPrior { //! \warning The variance scales must be in the range \f$(0,\infty)\f$, //! i.e. a value of zero is not well defined and a value of infinity //! is not well handled. (Very large values are handled though.) - virtual bool minusLogJointCdf(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + virtual bool minusLogJointCdf(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const = 0; @@ -317,9 +292,8 @@ class MATHS_EXPORT CPrior { //! can return is the minimum double rather than epsilon. //! //! \see minusLogJointCdf for more details. - virtual bool minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + virtual bool minusLogJointCdfComplement(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const = 0; @@ -329,9 +303,6 @@ class MATHS_EXPORT CPrior { //! //! \param[in] calculation The style of the probability calculation //! (see model_t::EProbabilityCalculation for details). - //! \param[in] weightStyles Controls the interpretation of the weights - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. //! \param[out] lowerBound Filled in with a lower bound for the probability @@ -350,9 +321,8 @@ class MATHS_EXPORT CPrior { //! i.e. a value of zero is not well defined and a value of infinity //! is not well handled. (Very large values are handled though.) virtual bool probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound, maths_t::ETail& tail) const = 0; @@ -440,8 +410,7 @@ class MATHS_EXPORT CPrior { bool expectation(const F& f, const std::size_t numberIntervals, T& result, - const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Get the number of samples received to date. double numberSamples() const; @@ -465,7 +434,7 @@ class MATHS_EXPORT CPrior { //! Get a set of sample for the prior to use in adjust offset. void adjustOffsetResamples(double minimumSample, TDouble1Vec& resamples, - TDouble4Vec1Vec& resamplesWeights) const; + TDoubleWeightsAry1Vec& resamplesWeights) const; protected: //! \brief Defines a set of operations to adjust the offset parameter @@ -476,28 +445,24 @@ class MATHS_EXPORT CPrior { virtual ~COffsetParameters() = default; //! Add a collection of samples. - void samples(const maths_t::TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights); + void samples(const TDouble1Vec& samples, const TDoubleWeightsAry1Vec& weights); //! Capture a collection of re-samples from the prior. virtual void resample(double minimumSample); protected: CPrior& prior() const; - const maths_t::TWeightStyleVec& weightStyles() const; const TDouble1Vec& samples() const; - const TDouble4Vec1Vec& weights() const; + const TDoubleWeightsAry1Vec& weights() const; const TDouble1Vec& resamples() const; - const TDouble4Vec1Vec& resamplesWeights() const; + const TDoubleWeightsAry1Vec& resamplesWeights() const; private: CPrior* m_Prior; - const maths_t::TWeightStyleVec* m_WeightStyles; const TDouble1Vec* m_Samples; - const TDouble4Vec1Vec* m_Weights; + const TDoubleWeightsAry1Vec* m_Weights; TDouble1Vec m_Resamples; - TDouble4Vec1Vec m_ResamplesWeights; + TDoubleWeightsAry1Vec m_ResamplesWeights; }; //! \brief Computes the likelihood of a collection of samples and @@ -539,9 +504,8 @@ class MATHS_EXPORT CPrior { //! specified reward. //! //! \return The penalty to apply to the model in selection. - double adjustOffsetWithCost(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + double adjustOffsetWithCost(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, COffsetCost& cost, CApplyOffset& apply); diff --git a/include/maths/CPriorDetail.h b/include/maths/CPriorDetail.h index 3d066f947c..3a9c3c1006 100644 --- a/include/maths/CPriorDetail.h +++ b/include/maths/CPriorDetail.h @@ -15,8 +15,8 @@ template bool CPrior::expectation(const F& f, std::size_t numberIntervals, T& result, - const TWeightStyleVec& weightStyles, - const TDouble4Vec& weight) const { + const TDoubleWeightsAry& weight) const { + if (numberIntervals == 0) { LOG_ERROR(<< "Must specify non-zero number of intervals"); return false; @@ -26,13 +26,13 @@ bool CPrior::expectation(const F& f, double n{static_cast(numberIntervals)}; TDoubleDoublePr interval{this->marginalLikelihoodConfidenceInterval( - 100.0 - 1.0 / (100.0 * n), weightStyles, weight)}; + 100.0 - 1.0 / (100.0 * n), weight)}; double x{interval.first}; double dx{(interval.second - interval.first) / n}; double normalizationFactor{0.0}; - TDouble4Vec1Vec weights{weight}; - CPrior::CLogMarginalLikelihood logLikelihood(*this, weightStyles, weights); + TDoubleWeightsAry1Vec weights{weight}; + CPrior::CLogMarginalLikelihood logLikelihood(*this, weights); CCompositeFunctions::CExp likelihood(logLikelihood); for (std::size_t i = 0u; i < numberIntervals; ++i, x += dx) { T productIntegral; diff --git a/include/maths/CTimeSeriesChangeDetector.h b/include/maths/CTimeSeriesChangeDetector.h index 68c59bc0c5..c249636d83 100644 --- a/include/maths/CTimeSeriesChangeDetector.h +++ b/include/maths/CTimeSeriesChangeDetector.h @@ -65,11 +65,9 @@ struct MATHS_EXPORT SChangeDescription { //! good explanation of the recent behaviour. class MATHS_EXPORT CUnivariateTimeSeriesChangeDetector { public: - using TDouble4Vec = core::CSmallVector; - using TDouble4Vec1Vec = core::CSmallVector; using TTimeDoublePr = std::pair; using TTimeDoublePr1Vec = core::CSmallVector; - using TWeightStyleVec = maths_t::TWeightStyleVec; + using TDoubleWeightsAry1Vec = maths_t::TDoubleWeightsAry1Vec; using TDecompositionPtr = std::shared_ptr; using TPriorPtr = std::shared_ptr; using TOptionalChangeDescription = boost::optional; @@ -100,9 +98,7 @@ class MATHS_EXPORT CUnivariateTimeSeriesChangeDetector { double decisionFunction(std::size_t& change) const; //! Add \p samples to the change detector. - void addSamples(const TWeightStyleVec& weightStyles, - const TTimeDoublePr1Vec& samples, - const TDouble4Vec1Vec& weights); + void addSamples(const TTimeDoublePr1Vec& samples, const TDoubleWeightsAry1Vec& weights); //! Check if we should stop testing. bool stopTesting() const; @@ -153,11 +149,9 @@ namespace time_series_change_detector_detail { class MATHS_EXPORT CUnivariateChangeModel : private core::CNonCopyable { public: using TDouble1Vec = core::CSmallVector; - using TDouble4Vec = core::CSmallVector; - using TDouble4Vec1Vec = core::CSmallVector; using TTimeDoublePr = std::pair; using TTimeDoublePr1Vec = core::CSmallVector; - using TWeightStyleVec = maths_t::TWeightStyleVec; + using TDoubleWeightsAry1Vec = maths_t::TDoubleWeightsAry1Vec; using TDecompositionPtr = std::shared_ptr; using TPriorPtr = std::shared_ptr; using TOptionalChangeDescription = boost::optional; @@ -184,9 +178,8 @@ class MATHS_EXPORT CUnivariateChangeModel : private core::CNonCopyable { //! Update the change model with \p samples. virtual void addSamples(const std::size_t count, - TWeightStyleVec weightStyles, const TTimeDoublePr1Vec& samples, - TDouble4Vec1Vec weights) = 0; + TDoubleWeightsAry1Vec weights) = 0; //! Debug the memory used by this object. void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const; @@ -212,13 +205,10 @@ class MATHS_EXPORT CUnivariateChangeModel : private core::CNonCopyable { double expectedLogLikelihood() const; //! Update the log-likelihood with \p samples. - void updateLogLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights); + void updateLogLikelihood(const TDouble1Vec& samples, const TDoubleWeightsAry1Vec& weights); //! Update the expected log-likelihoods. - void updateExpectedLogLikelihood(const TWeightStyleVec& weightStyles, - const TDouble4Vec1Vec& weights); + void updateExpectedLogLikelihood(const TDoubleWeightsAry1Vec& weights); //! Get the time series trend model. const CTimeSeriesDecompositionInterface& trendModel() const; @@ -267,9 +257,8 @@ class MATHS_EXPORT CUnivariateNoChangeModel final : public CUnivariateChangeMode //! Get the log likelihood of \p samples. virtual void addSamples(const std::size_t count, - TWeightStyleVec weightStyles, const TTimeDoublePr1Vec& samples, - TDouble4Vec1Vec weights); + TDoubleWeightsAry1Vec weights); //! Get the static size of this object. virtual std::size_t staticSize() const; @@ -303,9 +292,8 @@ class MATHS_EXPORT CUnivariateLevelShiftModel final : public CUnivariateChangeMo //! Update with \p samples. virtual void addSamples(const std::size_t count, - TWeightStyleVec weightStyles, const TTimeDoublePr1Vec& samples, - TDouble4Vec1Vec weights); + TDoubleWeightsAry1Vec weights); //! Get the static size of this object. virtual std::size_t staticSize() const; @@ -352,9 +340,8 @@ class MATHS_EXPORT CUnivariateLinearScaleModel final : public CUnivariateChangeM //! Update with \p samples. virtual void addSamples(const std::size_t count, - TWeightStyleVec weightStyles, const TTimeDoublePr1Vec& samples, - TDouble4Vec1Vec weights); + TDoubleWeightsAry1Vec weights); //! Get the static size of this object. virtual std::size_t staticSize() const; @@ -402,9 +389,8 @@ class MATHS_EXPORT CUnivariateTimeShiftModel final : public CUnivariateChangeMod //! Update with \p samples. virtual void addSamples(const std::size_t count, - TWeightStyleVec weightStyles, const TTimeDoublePr1Vec& samples, - TDouble4Vec1Vec weights); + TDoubleWeightsAry1Vec weights); //! Get the static size of this object. virtual std::size_t staticSize() const; diff --git a/include/maths/CTimeSeriesDecomposition.h b/include/maths/CTimeSeriesDecomposition.h index 252fbd1726..0b5233468e 100644 --- a/include/maths/CTimeSeriesDecomposition.h +++ b/include/maths/CTimeSeriesDecomposition.h @@ -98,9 +98,6 @@ class MATHS_EXPORT CTimeSeriesDecomposition : public CTimeSeriesDecompositionInt //! //! \param[in] time The time of the function point. //! \param[in] value The function value at \p time. - //! \param[in] weightStyles The styles of \p weights. Both the count - //! and the Winsorisation weight styles have an effect. See also - //! maths_t::ESampleWeightStyle for more details. //! \param[in] weights The weights of \p value. The smaller //! the count weight the less influence \p value has on the trend //! and it's local variance. @@ -108,8 +105,7 @@ class MATHS_EXPORT CTimeSeriesDecomposition : public CTimeSeriesDecompositionInt //! and false otherwise. virtual bool addPoint(core_t::TTime time, double value, - const maths_t::TWeightStyleVec& weightStyles = TWeights::COUNT, - const maths_t::TDouble4Vec& weights = TWeights::UNIT); + const maths_t::TDoubleWeightsAry& weights = TWeights::UNIT); //! Apply \p change at \p time. //! diff --git a/include/maths/CTimeSeriesDecompositionDetail.h b/include/maths/CTimeSeriesDecompositionDetail.h index ac5a458b38..f904c6172a 100644 --- a/include/maths/CTimeSeriesDecompositionDetail.h +++ b/include/maths/CTimeSeriesDecompositionDetail.h @@ -54,8 +54,7 @@ class MATHS_EXPORT CTimeSeriesDecompositionDetail { SAddValue(core_t::TTime time, core_t::TTime lastTime, double value, - const maths_t::TWeightStyleVec& weightStyles, - const maths_t::TDouble4Vec& weights, + const maths_t::TDoubleWeightsAry& weights, double trend, double seasonal, double calendar, @@ -64,10 +63,8 @@ class MATHS_EXPORT CTimeSeriesDecompositionDetail { //! The value to add. double s_Value; - //! The styles of the weights. - const maths_t::TWeightStyleVec& s_WeightStyles; //! The weights of associated with the value. - const maths_t::TDouble4Vec& s_Weights; + const maths_t::TDoubleWeightsAry& s_Weights; //! The trend component prediction at the value's time. double s_Trend; //! The seasonal component prediction at the value's time. diff --git a/include/maths/CTimeSeriesDecompositionInterface.h b/include/maths/CTimeSeriesDecompositionInterface.h index 6257830f0a..109756f594 100644 --- a/include/maths/CTimeSeriesDecompositionInterface.h +++ b/include/maths/CTimeSeriesDecompositionInterface.h @@ -11,7 +11,6 @@ #include #include -#include #include #include @@ -36,8 +35,7 @@ class MATHS_EXPORT CTimeSeriesDecompositionInterface { public: using TDouble3Vec = core::CSmallVector; using TDouble3VecVec = std::vector; - using TDoubleAry = boost::array; - using TWeights = CConstantWeights; + using TWeights = maths_t::CUnitWeights; using TWriteForecastResult = std::function; //! The components of the decomposition. @@ -75,9 +73,6 @@ class MATHS_EXPORT CTimeSeriesDecompositionInterface { //! //! \param[in] time The time of the function point. //! \param[in] value The function value at \p time. - //! \param[in] weightStyles The styles of \p weights. Both the - //! count and the Winsorisation weight styles have an effect. - //! See maths_t::ESampleWeightStyle for more details. //! \param[in] weights The weights of \p value. The smaller //! the product count weight the less influence \p value has //! on the trend and it's local variance. @@ -85,8 +80,7 @@ class MATHS_EXPORT CTimeSeriesDecompositionInterface { //! and false otherwise. virtual bool addPoint(core_t::TTime time, double value, - const maths_t::TWeightStyleVec& weightStyles = TWeights::COUNT, - const maths_t::TDouble4Vec& weights = TWeights::UNIT) = 0; + const maths_t::TDoubleWeightsAry& weights = TWeights::UNIT) = 0; //! Apply \p change at \p time. //! diff --git a/include/maths/CTimeSeriesDecompositionStub.h b/include/maths/CTimeSeriesDecompositionStub.h index 374d5ca1a3..0ed47c2e6e 100644 --- a/include/maths/CTimeSeriesDecompositionStub.h +++ b/include/maths/CTimeSeriesDecompositionStub.h @@ -40,8 +40,7 @@ class MATHS_EXPORT CTimeSeriesDecompositionStub : public CTimeSeriesDecompositio //! No-op returning false. virtual bool addPoint(core_t::TTime time, double value, - const maths_t::TWeightStyleVec& weightStyles = TWeights::COUNT, - const maths_t::TDouble4Vec& weights = TWeights::UNIT); + const maths_t::TDoubleWeightsAry& weights = TWeights::UNIT); //! No-op returning false. virtual bool applyChange(core_t::TTime time, double value, const SChangeDescription& change); diff --git a/include/maths/CTimeSeriesModel.h b/include/maths/CTimeSeriesModel.h index 4f80bbbc30..16cc62e6f0 100644 --- a/include/maths/CTimeSeriesModel.h +++ b/include/maths/CTimeSeriesModel.h @@ -47,9 +47,9 @@ double tailWinsorisationWeight(const CMultivariatePrior& prior, //! \brief A CModel implementation for modeling a univariate time series. class MATHS_EXPORT CUnivariateTimeSeriesModel : public CModel { public: - using TDouble4Vec = core::CSmallVector; using TTimeDoublePr = std::pair; using TTimeDoublePrCBuf = boost::circular_buffer; + using TDoubleWeightsAry = maths_t::TDoubleWeightsAry; using TDecompositionPtr = std::shared_ptr; using TDecayRateController2Ary = boost::array; @@ -105,19 +105,15 @@ class MATHS_EXPORT CUnivariateTimeSeriesModel : public CModel { virtual void skipTime(core_t::TTime gap); //! Get the most likely value for the time series at \p time. - virtual TDouble2Vec mode(core_t::TTime time, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec& weights) const; + virtual TDouble2Vec mode(core_t::TTime time, const TDouble2VecWeightsAry& weights) const; //! Get the most likely value for each correlate time series //! at \p time, if there are any. - virtual TDouble2Vec1Vec correlateModes(core_t::TTime time, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec1Vec& weights) const; + virtual TDouble2Vec1Vec + correlateModes(core_t::TTime time, const TDouble2VecWeightsAry1Vec& weights) const; //! Get the local maxima of the residual distribution. - virtual TDouble2Vec1Vec residualModes(const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec& weights) const; + virtual TDouble2Vec1Vec residualModes(const TDouble2VecWeightsAry& weights) const; //! Remove any trend components from \p value. virtual void detrend(const TTime2Vec1Vec& time, @@ -133,8 +129,7 @@ class MATHS_EXPORT CUnivariateTimeSeriesModel : public CModel { //! confidence interval for the time series at \p time. virtual TDouble2Vec3Vec confidenceInterval(core_t::TTime time, double confidenceInterval, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec& weights) const; + const TDouble2VecWeightsAry& weights) const; //! Forecast the time series and get its \p confidenceInterval //! percentage confidence interval between \p startTime and @@ -185,7 +180,7 @@ class MATHS_EXPORT CUnivariateTimeSeriesModel : public CModel { //! \name Helpers //@{ //! Unpack the weights in \p weights. - static TDouble4Vec unpack(const TDouble2Vec4Vec& weights); + static TDoubleWeightsAry unpack(const TDouble2VecWeightsAry& weights); //! Reinitialize \p residualModel using the detrended values //! from \p slidingWindow. @@ -211,7 +206,7 @@ class MATHS_EXPORT CUnivariateTimeSeriesModel : public CModel { using TSizeVec = std::vector; using TDouble1Vec = core::CSmallVector; using TDouble1VecVec = std::vector; - using TDouble2Vec4VecVec = std::vector; + using TDouble2VecWeightsAryVec = std::vector; using TVector = CVectorNx1; using TVectorMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; using TDecayRateController2AryPtr = std::shared_ptr; @@ -237,9 +232,8 @@ class MATHS_EXPORT CUnivariateTimeSeriesModel : public CModel { EUpdateResult applyChange(const SChangeDescription& change); //! Update the trend with \p samples. - EUpdateResult updateTrend(const maths_t::TWeightStyleVec& trendStyles, - const TTimeDouble2VecSizeTrVec& samples, - const TDouble2Vec4VecVec& trendWeights); + EUpdateResult updateTrend(const TTimeDouble2VecSizeTrVec& samples, + const TDouble2VecWeightsAryVec& trendWeights); //! Compute the prediction errors for \p sample. void appendPredictionErrors(double interval, double sample, TDouble1VecVec (&result)[2]); @@ -344,8 +338,7 @@ class MATHS_EXPORT CTimeSeriesCorrelations { using TTime1Vec = core::CSmallVector; using TDouble1Vec = core::CSmallVector; using TDouble2Vec = core::CSmallVector; - using TDouble4Vec = core::CSmallVector; - using TDouble4Vec1Vec = core::CSmallVector; + using TDoubleWeightsAry1Vec = maths_t::TDoubleWeightsAry1Vec; using TSize1Vec = core::CSmallVector; using TSizeSize1VecUMap = boost::unordered_map; using TSize2Vec = core::CSmallVector; @@ -370,7 +363,7 @@ class MATHS_EXPORT CTimeSeriesCorrelations { //! The tags for each sample. TSize1Vec s_Tags; //! The sample weights. - TDouble4Vec1Vec s_Weights; + TDoubleWeightsAry1Vec s_Weights; //! The interval by which to age the correlation model. double s_Interval; //! The decay rate multiplier. @@ -394,7 +387,7 @@ class MATHS_EXPORT CTimeSeriesCorrelations { //! //! \note This should be called exactly once after every univariate //! time series model has added its samples. - void processSamples(const maths_t::TWeightStyleVec& weightStyles); + void processSamples(); //! Refresh the models to account for any changes to the correlation //! estimates. @@ -505,9 +498,9 @@ class MATHS_EXPORT CTimeSeriesCorrelations { class MATHS_EXPORT CMultivariateTimeSeriesModel : public CModel { public: using TDouble10Vec = core::CSmallVector; - using TDouble10Vec4Vec = core::CSmallVector; using TTimeDouble2VecPr = std::pair; using TTimeDouble2VecPrCBuf = boost::circular_buffer; + using TDouble10VecWeightsAry = maths_t::TDouble10VecWeightsAry; using TDecompositionPtr = std::shared_ptr; using TDecompositionPtr10Vec = core::CSmallVector; using TDecayRateController2Ary = boost::array; @@ -562,18 +555,14 @@ class MATHS_EXPORT CMultivariateTimeSeriesModel : public CModel { virtual void skipTime(core_t::TTime gap); //! Get the most likely value for the time series at \p time. - virtual TDouble2Vec mode(core_t::TTime time, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec& weights) const; + virtual TDouble2Vec mode(core_t::TTime time, const TDouble2VecWeightsAry& weights) const; //! Returns empty. - virtual TDouble2Vec1Vec correlateModes(core_t::TTime time, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec1Vec& weights) const; + virtual TDouble2Vec1Vec + correlateModes(core_t::TTime time, const TDouble2VecWeightsAry1Vec& weights) const; //! Get the local maxima of the residual distribution. - virtual TDouble2Vec1Vec residualModes(const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec& weights) const; + virtual TDouble2Vec1Vec residualModes(const TDouble2VecWeightsAry& weights) const; //! Remove any trend components from \p value. virtual void detrend(const TTime2Vec1Vec& time, @@ -589,8 +578,7 @@ class MATHS_EXPORT CMultivariateTimeSeriesModel : public CModel { //! confidence interval for the time series at \p time. virtual TDouble2Vec3Vec confidenceInterval(core_t::TTime time, double confidenceInterval, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec& weights) const; + const TDouble2VecWeightsAry& weights) const; //! Not currently supported. virtual bool forecast(core_t::TTime startTime, @@ -639,7 +627,7 @@ class MATHS_EXPORT CMultivariateTimeSeriesModel : public CModel { //! \name Helpers //@{ //! Unpack the weights in \p weights. - static TDouble10Vec4Vec unpack(const TDouble2Vec4Vec& weights); + static TDouble10VecWeightsAry unpack(const TDouble2VecWeightsAry& weights); //! Reinitialize \p residualModel using the detrended values //! from \p slidingWindow. @@ -664,7 +652,7 @@ class MATHS_EXPORT CMultivariateTimeSeriesModel : public CModel { private: using TDouble1Vec = core::CSmallVector; using TDouble1VecVec = std::vector; - using TDouble2Vec4VecVec = std::vector; + using TDouble2VecWeightsAryVec = std::vector; using TVector = CVectorNx1; using TVectorMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; using TDecayRateController2AryPtr = std::shared_ptr; @@ -673,9 +661,8 @@ class MATHS_EXPORT CMultivariateTimeSeriesModel : public CModel { private: //! Update the trend with \p samples. - EUpdateResult updateTrend(const maths_t::TWeightStyleVec& trendStyles, - const TTimeDouble2VecSizeTrVec& samples, - const TDouble2Vec4VecVec& trendWeights); + EUpdateResult updateTrend(const TTimeDouble2VecSizeTrVec& samples, + const TDouble2VecWeightsAryVec& trendWeights); //! Compute the prediction errors for \p sample. void appendPredictionErrors(double interval, diff --git a/include/maths/Constants.h b/include/maths/Constants.h index 7c73843308..65f2de2d4e 100644 --- a/include/maths/Constants.h +++ b/include/maths/Constants.h @@ -98,40 +98,6 @@ const double MINIMUM_CLUSTER_SPLIT_COUNT{24.0}; //! The minimum count of a category in the sketch to cluster. const double MINIMUM_CATEGORY_COUNT{0.5}; -//! \brief A collection of weight styles and weights. -class MATHS_EXPORT CConstantWeights { -public: - using TDouble2Vec = core::CSmallVector; - using TDouble4Vec = core::CSmallVector; - using TDouble2Vec4Vec = core::CSmallVector; - using TDouble4Vec1Vec = core::CSmallVector; - using TDouble2Vec4Vec1Vec = core::CSmallVector; - -public: - //! A single count weight style. - static const maths_t::TWeightStyleVec COUNT; - //! A single count variance weight style. - static const maths_t::TWeightStyleVec COUNT_VARIANCE; - //! A single seasonal variance weight style. - static const maths_t::TWeightStyleVec SEASONAL_VARIANCE; - //! A unit weight. - static const TDouble4Vec UNIT; - //! A single unit weight. - static const TDouble4Vec1Vec SINGLE_UNIT; - //! Get a unit weight for data with \p dimension. - template - static core::CSmallVector unit(std::size_t dimension) { - return TDouble2Vec4Vec{VECTOR(dimension, 1.0)}; - } - //! Get a single unit weight for data with \p dimension. - template - static core::CSmallVector, 1> - singleUnit(std::size_t dimension) { - return core::CSmallVector, 1>{ - core::CSmallVector{VECTOR(dimension, 1.0)}}; - } -}; - //! Get the maximum amount we'll penalize a model in addSamples. MATHS_EXPORT double maxModelPenalty(double numberSamples); } diff --git a/include/maths/MathsTypes.h b/include/maths/MathsTypes.h index 26bf7c84f7..5dafbe62ef 100644 --- a/include/maths/MathsTypes.h +++ b/include/maths/MathsTypes.h @@ -12,6 +12,9 @@ #include +#include + +#include #include #include @@ -24,11 +27,8 @@ class CSeasonalComponent; namespace maths_t { using TDoubleDoublePr = std::pair; -using TDouble4Vec = core::CSmallVector; +using TDouble2Vec = core::CSmallVector; using TDouble10Vec = core::CSmallVector; -using TDouble4Vec1Vec = core::CSmallVector; -using TDouble10Vec4Vec = core::CSmallVector; -using TDouble10Vec4Vec1Vec = core::CSmallVector; using TSeasonalComponentVec = std::vector; using TCalendarComponentVec = std::vector; @@ -71,107 +71,307 @@ enum ESampleWeightStyle { //! IMPORTANT: this must be kept this up-to-date with ESampleWeightStyle. const std::size_t NUMBER_WEIGHT_STYLES = 4; -using TWeightStyleVec = core::CSmallVector; +template +using TWeightsAry = boost::array; +using TDoubleWeightsAry = TWeightsAry; +using TDoubleWeightsAry1Vec = core::CSmallVector; +using TDouble2VecWeightsAry = TWeightsAry; +using TDouble2VecWeightsAry1Vec = core::CSmallVector; +using TDouble10VecWeightsAry = TWeightsAry; +using TDouble10VecWeightsAry1Vec = core::CSmallVector; + +namespace maths_types_detail { + +//! \brief Constructs a unit weight. +template +struct SUnitWeightFactory { + static std::size_t dimension(const VECTOR& weight) { return weight.size(); } + static VECTOR weight(std::size_t dimension) { + return VECTOR(dimension, 1.0); + } +}; +//! \brief Constructs a unit weight. +template<> +struct SUnitWeightFactory { + static std::size_t dimension(double) { return 1; } + static double weight(std::size_t) { return 1.0; } +}; -//! Extract the effective sample count from a collection of weights. +//! \brief Add two weights. +template +struct SWeightArithmetic { + static void add(const VECTOR& lhs, VECTOR& rhs) { + for (std::size_t i = 0u; i < lhs.size(); ++i) { + rhs[i] += lhs[i]; + } + } + static void multiply(const VECTOR& lhs, VECTOR& rhs) { + for (std::size_t i = 0u; i < lhs.size(); ++i) { + rhs[i] *= lhs[i]; + } + } +}; +//! \brief Add two weights. +template<> +struct SWeightArithmetic { + static void add(double lhs, double& rhs) { rhs += lhs; } + static void multiply(double lhs, double& rhs) { rhs *= lhs; } +}; +} + +//! \brief A collection of weight styles and weights. +class MATHS_EXPORT CUnitWeights { +public: + //! A unit weight. + static const TDoubleWeightsAry UNIT; + //! A single unit weight. + static const TDoubleWeightsAry1Vec SINGLE_UNIT; + //! Get a conformable unit weight for \p weight. + template + static TWeightsAry unit(const VECTOR& weight) { + return unit(maths_types_detail::SUnitWeightFactory::dimension(weight)); + } + //! Get a unit weight for data with \p dimension. + template + static TWeightsAry unit(std::size_t dimension) { + TWeightsAry result; + result.fill(maths_types_detail::SUnitWeightFactory::weight(dimension)); + return result; + } + //! Get a single conformable unit weight for \p weight. + template + static core::CSmallVector, 1> singleUnit(const VECTOR& weight) { + return {unit(weight)}; + } + //! Get a single unit weight for data with \p dimension. + template + static core::CSmallVector, 1> singleUnit(std::size_t dimension) { + return {unit(dimension)}; + } +}; + +//! Get a weights array with count weight \p weight. +template +TWeightsAry countWeight(const VECTOR& weight) { + TWeightsAry result(CUnitWeights::unit(weight)); + result[E_SampleCountWeight] = weight; + return result; +} + +//! Get a weights array with count weight \p weight. MATHS_EXPORT -double count(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights); +TDoubleWeightsAry countWeight(double weight); -//! Extract the effective sample count from a collection of weights. +//! Get a weights array with count weight \p weight. +MATHS_EXPORT +TDouble10VecWeightsAry countWeight(double weight, std::size_t dimension); + +//! Set the count weight in \p weights to \p weight. +template +void setCount(const VECTOR& weight, TWeightsAry& weights) { + weights[E_SampleCountWeight] = weight; +} + +//! Set the count weight in \p weights to \p weight. MATHS_EXPORT -TDouble10Vec count(std::size_t dimension, - const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec& weights); +void setCount(double weight, std::size_t dimension, TDouble10VecWeightsAry& weights); + +//! Add \p weight to the count weight of \p weights. +template +void addCount(const VECTOR& weight, TWeightsAry& weights) { + maths_types_detail::SWeightArithmetic::add(weight, weights[E_SampleCountWeight]); +} + +//! Extract the effective sample count from a collection of weights. +template +const VECTOR& count(const TWeightsAry& weights) { + return weights[E_SampleCountWeight]; +} //! Extract the effective sample count with which to update a model //! from a collection of weights. MATHS_EXPORT -double countForUpdate(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights); +double countForUpdate(const TDoubleWeightsAry& weights); //! Extract the effective sample count with which to update a model //! from a collection of weights. MATHS_EXPORT -TDouble10Vec countForUpdate(std::size_t dimension, - const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec& weights); +TDouble10Vec countForUpdate(const TDouble10VecWeightsAry& weights); + +//! Get a weights array with Winsorisation weight \p weight. +template +TWeightsAry winsorisationWeight(const VECTOR& weight) { + TWeightsAry result(CUnitWeights::unit(weight)); + result[E_SampleWinsorisationWeight] = weight; + return result; +} -//! Extract the winsorisation weight from a collection of weights. +//! Get a weights array with Winsorisation weight \p weight. MATHS_EXPORT -double winsorisationWeight(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights); +TDoubleWeightsAry winsorisationWeight(double weight); -//! Extract the winsorisation weight from a collection of weights. +//! Get a weights array with Winsorisation weight \p weight. MATHS_EXPORT -TDouble10Vec winsorisationWeight(const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec& weights); +TDouble10VecWeightsAry winsorisationWeight(double weight, std::size_t dimension); -//! Extract the variance scale from a collection of weights. +//! Set the Winsorisation weight in \p weights to \p weight. +template +void setWinsorisationWeight(const VECTOR& weight, TWeightsAry& weights) { + weights[E_SampleWinsorisationWeight] = weight; +} + +//! Set the Winsorisation weight in \p weights to \p weight. MATHS_EXPORT -double seasonalVarianceScale(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights); +void setWinsorisationWeight(double weight, std::size_t dimension, TDouble10VecWeightsAry& weights); -//! Extract the variance scale from a collection of weights. +//! Extract the Winsorisation weight from a collection of weights. +template +const VECTOR& winsorisationWeight(const TWeightsAry& weights) { + return weights[E_SampleWinsorisationWeight]; +} + +//! Check if a non-unit Winsorisation weight applies. MATHS_EXPORT -TDouble10Vec seasonalVarianceScale(std::size_t dimension, - const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec& weights); +bool isWinsorised(const TDoubleWeightsAry& weights); -//! Extract the variance scale from a collection of weights. +//! Check if a non-unit Winsorisation weight applies. MATHS_EXPORT -double countVarianceScale(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights); +bool isWinsorised(const TDoubleWeightsAry1Vec& weights); + +//! Check if a non-unit Winsorisation weight applies. +template +bool isWinsorised(const TWeightsAry& weights) { + return std::any_of(weights[E_SampleWinsorisationWeight].begin(), + weights[E_SampleWinsorisationWeight].end(), + [](double weight) { return weight != 1.0; }); +} -//! Extract the variance scale from a collection of weights. +//! Check if a non-unit Winsorisation weight applies. +template +bool isWinsorised(const core::CSmallVector, 1>& weights) { + return std::any_of(weights.begin(), weights.end(), [](const TWeightsAry& weight) { + return isWinsorised(weight); + }); +} + +//! Get a weights array with seasonal variance scale \p weight. +template +TWeightsAry seasonalVarianceScaleWeight(const VECTOR& weight) { + TWeightsAry result(CUnitWeights::unit(weight)); + result[E_SampleSeasonalVarianceScaleWeight] = weight; + return result; +} + +//! Get a weights vector with seasonal variance scale \p weight. MATHS_EXPORT -TDouble10Vec countVarianceScale(std::size_t dimension, - const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec& weights); +TDoubleWeightsAry seasonalVarianceScaleWeight(double weight); -//! Check if a non-unit seasonal variance scale applies. +//! Get a weights vector with seasonal variance scale \p weight. MATHS_EXPORT -bool hasSeasonalVarianceScale(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights); +TDouble10VecWeightsAry seasonalVarianceScaleWeight(double weight, std::size_t dimension); -//! Check if a non-unit seasonal variance scale applies. +//! Set the seasonal variance scale weight in \p weights to \p weight. +template +void setSeasonalVarianceScale(const VECTOR& weight, TWeightsAry& weights) { + weights[E_SampleSeasonalVarianceScaleWeight] = weight; +} + +//! Set the seasonal variance scale weight in \p weights to \p weight. MATHS_EXPORT -bool hasSeasonalVarianceScale(const TWeightStyleVec& weightStyles, - const TDouble4Vec1Vec& weights); +void setSeasonalVarianceScale(double weight, std::size_t dimension, TDouble10VecWeightsAry& weights); + +//! Extract the variance scale from a collection of weights. +template +const VECTOR& seasonalVarianceScale(const TWeightsAry& weights) { + return weights[E_SampleSeasonalVarianceScaleWeight]; +} //! Check if a non-unit seasonal variance scale applies. MATHS_EXPORT -bool hasSeasonalVarianceScale(const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec& weights); +bool hasSeasonalVarianceScale(const TDoubleWeightsAry& weights); //! Check if a non-unit seasonal variance scale applies. MATHS_EXPORT -bool hasSeasonalVarianceScale(const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec1Vec& weights); +bool hasSeasonalVarianceScale(const TDoubleWeightsAry1Vec& weights); -//! Check if a non-unit count variance scale applies. -MATHS_EXPORT -bool hasCountVarianceScale(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights); +//! Check if a non-unit seasonal variance scale applies. +template +bool hasSeasonalVarianceScale(const TWeightsAry& weights) { + return std::any_of(weights[E_SampleSeasonalVarianceScaleWeight].begin(), + weights[E_SampleSeasonalVarianceScaleWeight].end(), + [](double weight) { return weight != 1.0; }); +} //! Check if a non-unit seasonal variance scale applies. +template +bool hasSeasonalVarianceScale(const core::CSmallVector, 1>& weights) { + return std::any_of(weights.begin(), weights.end(), [](const TWeightsAry& weight) { + return hasSeasonalVarianceScale(weight); + }); +} + +//! Get a weights array with count variance scale \p weight. +template +TWeightsAry countVarianceScaleWeight(const VECTOR& weight) { + TWeightsAry result(CUnitWeights::unit(weight)); + result[E_SampleCountVarianceScaleWeight] = weight; + return result; +} + +//! Get a weights vector with count variance scale \p weight. MATHS_EXPORT -bool hasCountVarianceScale(const TWeightStyleVec& weightStyles, const TDouble4Vec1Vec& weights); +TDoubleWeightsAry countVarianceScaleWeight(double weight); -//! Check if a non-unit seasonal variance scale applies. +//! Get a weights vector with count variance scale \p weight. MATHS_EXPORT -bool hasCountVarianceScale(const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec& weights); +TDouble10VecWeightsAry countVarianceScaleWeight(double weight, std::size_t dimension); -//! Check if a non-unit seasonal variance scale applies. +//! Set the count variance scale weight in \p weights to \p weight. +template +void setCountVarianceScale(const VECTOR& weight, TWeightsAry& weights) { + weights[E_SampleCountVarianceScaleWeight] = weight; +} + +//! Set the count variance scale weight in \p weights to \p weight. MATHS_EXPORT -bool hasCountVarianceScale(const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec1Vec& weights); +void setCountVarianceScale(double weight, std::size_t dimension, TDouble10VecWeightsAry& weights); + +//! Multiply the count variance scale of \p weights by \p weight. +template +void multiplyCountVarianceScale(const VECTOR& weight, TWeightsAry& weights) { + maths_types_detail::SWeightArithmetic::multiply( + weight, weights[E_SampleCountVarianceScaleWeight]); +} + +//! Extract the variance scale from a collection of weights. +template +const VECTOR& countVarianceScale(const TWeightsAry& weights) { + return weights[E_SampleCountVarianceScaleWeight]; +} -//! Set \p style to weight or append if it isn't in \p weightStyles. +//! Check if a non-unit count variance scale applies. MATHS_EXPORT -void setWeight(ESampleWeightStyle style, double weight, TWeightStyleVec& weightStyles, TDouble4Vec& weights); +bool hasCountVarianceScale(const TDoubleWeightsAry& weights); -//! Set \p style to weight or append if it isn't in \p weightStyles. +//! Check if a non-unit seasonal variance scale applies. MATHS_EXPORT -void setWeight(ESampleWeightStyle style, - double weight, - std::size_t dimension, - TWeightStyleVec& weightStyles, - TDouble10Vec4Vec& weights); +bool hasCountVarianceScale(const TDoubleWeightsAry1Vec& weights); + +//! Check if a non-unit seasonal variance scale applies. +template +bool hasCountVarianceScale(const TWeightsAry& weights) { + return std::any_of(weights[E_SampleCountVarianceScaleWeight].begin(), + weights[E_SampleCountVarianceScaleWeight].end(), + [](double weight) { return weight != 1.0; }); +} + +//! Check if a non-unit seasonal variance scale applies. +template +bool hasCountVarianceScale(const core::CSmallVector, 1>& weights) { + return std::any_of(weights.begin(), weights.end(), [](const TWeightsAry& weight) { + return hasCountVarianceScale(weight); + }); +} //! Enumerates the possible probability of less likely sample calculations. //! diff --git a/include/model/CAnomalyDetectorModel.h b/include/model/CAnomalyDetectorModel.h index 9240938342..00dc1df1b9 100644 --- a/include/model/CAnomalyDetectorModel.h +++ b/include/model/CAnomalyDetectorModel.h @@ -128,12 +128,8 @@ class MODEL_EXPORT CAnomalyDetectorModel : private core::CNonCopyable { using TSizeVec = std::vector; using TDoubleVec = std::vector; using TDouble1Vec = core::CSmallVector; - using TDouble4Vec = core::CSmallVector; using TDouble10Vec = core::CSmallVector; - using TDouble4Vec1Vec = core::CSmallVector; using TDouble10Vec1Vec = core::CSmallVector; - using TDouble10Vec4Vec = core::CSmallVector; - using TDouble10Vec4Vec1Vec = core::CSmallVector; using TDouble1VecDoublePr = std::pair; using TDouble1VecDouble1VecPr = std::pair; using TSizeDoublePr = std::pair; diff --git a/include/model/CIndividualModel.h b/include/model/CIndividualModel.h index ceb52c6e83..88ad1047a4 100644 --- a/include/model/CIndividualModel.h +++ b/include/model/CIndividualModel.h @@ -253,7 +253,7 @@ class MODEL_EXPORT CIndividualModel : public CAnomalyDetectorModel { maths::CModel* model(model_t::EFeature feature, std::size_t pid); //! Sample the correlate models. - void sampleCorrelateModels(const maths_t::TWeightStyleVec& weightStyles); + void sampleCorrelateModels(); //! Correct \p baseline with \p corrections for interim results. void correctBaselineForInterim(model_t::EFeature feature, diff --git a/lib/api/dump_state/Main.cc b/lib/api/dump_state/Main.cc index 244b06682b..0deedd6242 100644 --- a/lib/api/dump_state/Main.cc +++ b/lib/api/dump_state/Main.cc @@ -150,12 +150,12 @@ bool persistAnomalyDetectorStateToFile(const std::string& configFileName, boost::bind(&reportPersistComplete, _1), nullptr, -1, "time", timeFormat); - using TInputParserCUPtr = const std::unique_ptr; - TInputParserCUPtr parser{[&inputFilename, &inputStrm]() -> ml::api::CInputParser* { + using TInputParserUPtr = std::unique_ptr; + const TInputParserUPtr parser{[&inputFilename, &inputStrm]() -> TInputParserUPtr { if (inputFilename.rfind(".csv") == inputFilename.length() - 4) { - return new ml::api::CCsvInputParser(inputStrm); + return std::make_unique(inputStrm); } - return new ml::api::CLineifiedJsonInputParser(inputStrm); + return std::make_unique(inputStrm); }()}; if (!parser->readStream(boost::bind(&ml::api::CAnomalyJob::handleRecord, &origJob, _1))) { diff --git a/lib/api/unittest/CAnomalyJobTest.cc b/lib/api/unittest/CAnomalyJobTest.cc index 31e0e43d0b..9a967bc88f 100644 --- a/lib/api/unittest/CAnomalyJobTest.cc +++ b/lib/api/unittest/CAnomalyJobTest.cc @@ -1444,7 +1444,6 @@ void CAnomalyJobTest::testOutOfPhase() { } void CAnomalyJobTest::testBucketSelection() { - LOG_DEBUG(<< "*** testBucketSelection ***"); core_t::TTime bucketSize = 100; model::CLimits limits; api::CFieldConfig fieldConfig; @@ -1579,7 +1578,6 @@ void CAnomalyJobTest::testBucketSelection() { } void CAnomalyJobTest::testModelPlot() { - LOG_DEBUG(<< "*** testModelPlot ***"); { // Test non-overlapping buckets core_t::TTime bucketSize = 10000; @@ -1766,8 +1764,6 @@ void CAnomalyJobTest::testModelPlot() { } void CAnomalyJobTest::testInterimResultEdgeCases() { - LOG_DEBUG(<< "*** testInterimResultEdgeCases ***"); - const char* logFile = "test.log"; core_t::TTime bucketSize = 3600; diff --git a/lib/api/unittest/CDetectionRulesJsonParserTest.cc b/lib/api/unittest/CDetectionRulesJsonParserTest.cc index b1da763fe5..10f2654aab 100644 --- a/lib/api/unittest/CDetectionRulesJsonParserTest.cc +++ b/lib/api/unittest/CDetectionRulesJsonParserTest.cc @@ -87,8 +87,6 @@ CppUnit::Test* CDetectionRulesJsonParserTest::suite() { } void CDetectionRulesJsonParserTest::testParseRulesGivenEmptyString() { - LOG_DEBUG(<< "*** testParseRulesGivenEmptyString ***"); - CDetectionRulesJsonParser parser(EMPTY_VALUE_FILTER_MAP); CDetectionRulesJsonParser::TDetectionRuleVec rules; std::string rulesJson = ""; @@ -99,8 +97,6 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenEmptyString() { } void CDetectionRulesJsonParserTest::testParseRulesGivenEmptyArray() { - LOG_DEBUG(<< "*** testParseRulesGivenEmptyArray ***"); - CDetectionRulesJsonParser parser(EMPTY_VALUE_FILTER_MAP); CDetectionRulesJsonParser::TDetectionRuleVec rules; std::string rulesJson = "[]"; @@ -111,8 +107,6 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenEmptyArray() { } void CDetectionRulesJsonParserTest::testParseRulesGivenArrayContainsStrings() { - LOG_DEBUG(<< "*** testParseRulesGivenArrayContainsStrings ***"); - CDetectionRulesJsonParser parser(EMPTY_VALUE_FILTER_MAP); CDetectionRulesJsonParser::TDetectionRuleVec rules; std::string rulesJson = "[\"a\", \"b\"]"; @@ -123,8 +117,6 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenArrayContainsStrings() { } void CDetectionRulesJsonParserTest::testParseRulesGivenMissingRuleAction() { - LOG_DEBUG(<< "*** testParseRulesGivenMissingRuleAction ***"); - CDetectionRulesJsonParser parser(EMPTY_VALUE_FILTER_MAP); CDetectionRulesJsonParser::TDetectionRuleVec rules; std::string rulesJson = "["; @@ -141,8 +133,6 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenMissingRuleAction() { } void CDetectionRulesJsonParserTest::testParseRulesGivenRuleActionIsNotArray() { - LOG_DEBUG(<< "*** testParseRulesGivenRuleActionIsNotArray ***"); - CDetectionRulesJsonParser parser(EMPTY_VALUE_FILTER_MAP); CDetectionRulesJsonParser::TDetectionRuleVec rules; std::string rulesJson = "["; @@ -160,8 +150,6 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenRuleActionIsNotArray() { } void CDetectionRulesJsonParserTest::testParseRulesGivenInvalidRuleAction() { - LOG_DEBUG(<< "*** testParseRulesGivenInvalidRuleAction ***"); - CDetectionRulesJsonParser parser(EMPTY_VALUE_FILTER_MAP); CDetectionRulesJsonParser::TDetectionRuleVec rules; std::string rulesJson = "["; @@ -179,8 +167,6 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenInvalidRuleAction() { } void CDetectionRulesJsonParserTest::testParseRulesGivenMissingConditionsConnective() { - LOG_DEBUG(<< "*** testParseRulesGivenMissingConditionsConnective ***"); - CDetectionRulesJsonParser parser(EMPTY_VALUE_FILTER_MAP); CDetectionRulesJsonParser::TDetectionRuleVec rules; std::string rulesJson = "["; @@ -196,8 +182,6 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenMissingConditionsConnecti } void CDetectionRulesJsonParserTest::testParseRulesGivenInvalidConditionsConnective() { - LOG_DEBUG(<< "*** testParseRulesGivenInvalidConditionsConnective ***"); - CDetectionRulesJsonParser parser(EMPTY_VALUE_FILTER_MAP); CDetectionRulesJsonParser::TDetectionRuleVec rules; std::string rulesJson = "["; @@ -214,8 +198,6 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenInvalidConditionsConnecti } void CDetectionRulesJsonParserTest::testParseRulesGivenMissingRuleConditions() { - LOG_DEBUG(<< "*** testParseRulesGivenMissingRuleConditions ***"); - CDetectionRulesJsonParser parser(EMPTY_VALUE_FILTER_MAP); CDetectionRulesJsonParser::TDetectionRuleVec rules; std::string rulesJson = "["; @@ -229,8 +211,6 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenMissingRuleConditions() { } void CDetectionRulesJsonParserTest::testParseRulesGivenRuleConditionsIsNotArray() { - LOG_DEBUG(<< "*** testParseRulesGivenRuleConditionsIsNotArray ***"); - CDetectionRulesJsonParser parser(EMPTY_VALUE_FILTER_MAP); CDetectionRulesJsonParser::TDetectionRuleVec rules; std::string rulesJson = "["; @@ -245,8 +225,6 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenRuleConditionsIsNotArray( } void CDetectionRulesJsonParserTest::testParseRulesGivenMissingConditionOperator() { - LOG_DEBUG(<< "*** testParseRulesGivenMissingConditionOperator ***"); - CDetectionRulesJsonParser parser(EMPTY_VALUE_FILTER_MAP); CDetectionRulesJsonParser::TDetectionRuleVec rules; std::string rulesJson = "["; @@ -262,8 +240,6 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenMissingConditionOperator( } void CDetectionRulesJsonParserTest::testParseRulesGivenInvalidConditionOperator() { - LOG_DEBUG(<< "*** testParseRulesGivenInvalidConditionOperator ***"); - CDetectionRulesJsonParser parser(EMPTY_VALUE_FILTER_MAP); CDetectionRulesJsonParser::TDetectionRuleVec rules; std::string rulesJson = "["; @@ -279,8 +255,6 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenInvalidConditionOperator( } void CDetectionRulesJsonParserTest::testParseRulesGivenNumericalActualRuleWithConnectiveOr() { - LOG_DEBUG(<< "*** testParseRulesGivenNumericalActualRuleWithConnectiveOr ***"); - CDetectionRulesJsonParser parser(EMPTY_VALUE_FILTER_MAP); CDetectionRulesJsonParser::TDetectionRuleVec rules; std::string rulesJson = "["; @@ -302,8 +276,6 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenNumericalActualRuleWithCo } void CDetectionRulesJsonParserTest::testParseRulesGivenNumericalTypicalAndDiffAbsRuleWithConnectiveAnd() { - LOG_DEBUG(<< "*** testParseRulesGivenNumericalTypicalAndDiffAbsRuleWithConnectiveAnd ***"); - CDetectionRulesJsonParser parser(EMPTY_VALUE_FILTER_MAP); CDetectionRulesJsonParser::TDetectionRuleVec rules; std::string rulesJson = "["; @@ -326,8 +298,6 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenNumericalTypicalAndDiffAb } void CDetectionRulesJsonParserTest::testParseRulesGivenMultipleRules() { - LOG_DEBUG(<< "*** testParseRulesGivenMultipleRules ***"); - CDetectionRulesJsonParser parser(EMPTY_VALUE_FILTER_MAP); CDetectionRulesJsonParser::TDetectionRuleVec rules; std::string rulesJson = "["; @@ -361,8 +331,6 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenMultipleRules() { } void CDetectionRulesJsonParserTest::testParseRulesGivenCategoricalMatchRule() { - LOG_DEBUG(<< "*** testParseRulesGivenCategoricalMatchRule ***"); - TStrPatternSetUMap filtersById; core::CPatternSet filter; filter.initFromJson("[\"b\", \"a\"]"); @@ -388,8 +356,6 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenCategoricalMatchRule() { } void CDetectionRulesJsonParserTest::testParseRulesGivenOldStyleCategoricalRule() { - LOG_DEBUG(<< "*** testParseRulesGivenOldStyleCategoricalRule ***"); - // Tests that the rule type can be parsed as categorical_match // when the type is categorical @@ -418,8 +384,6 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenOldStyleCategoricalRule() } void CDetectionRulesJsonParserTest::testParseRulesGivenCategoricalComplementRule() { - LOG_DEBUG(<< "*** testParseRulesGivenCategoricalComplementRule ***"); - TStrPatternSetUMap filtersById; core::CPatternSet filter; filter.initFromJson("[\"b\", \"a\"]"); @@ -445,8 +409,6 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenCategoricalComplementRule } void CDetectionRulesJsonParserTest::testParseRulesGivenTimeRule() { - LOG_DEBUG(<< "*** testParseRulesGivenTimeRule ***"); - CDetectionRulesJsonParser parser(EMPTY_VALUE_FILTER_MAP); CDetectionRulesJsonParser::TDetectionRuleVec rules; std::string rulesJson = "["; @@ -467,8 +429,6 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenTimeRule() { } void CDetectionRulesJsonParserTest::testParseRulesGivenDifferentActions() { - LOG_DEBUG(<< "*** testParseRulesGivenDifferentActions ***"); - { CDetectionRulesJsonParser parser(EMPTY_VALUE_FILTER_MAP); CDetectionRulesJsonParser::TDetectionRuleVec rules; diff --git a/lib/api/unittest/CMultiFileDataAdderTest.cc b/lib/api/unittest/CMultiFileDataAdderTest.cc index 2634e7cafd..62c3843548 100644 --- a/lib/api/unittest/CMultiFileDataAdderTest.cc +++ b/lib/api/unittest/CMultiFileDataAdderTest.cc @@ -191,12 +191,12 @@ void CMultiFileDataAdderTest::detectorPersistHelper(const std::string& configFil boost::ref(numOrigDocs)), nullptr, -1, "time", timeFormat); - using TInputParserCUPtr = const std::unique_ptr; - TInputParserCUPtr parser{[&inputFilename, &inputStrm]() -> ml::api::CInputParser* { + using TInputParserUPtr = std::unique_ptr; + const TInputParserUPtr parser{[&inputFilename, &inputStrm]() -> TInputParserUPtr { if (inputFilename.rfind(".csv") == inputFilename.length() - 4) { - return new ml::api::CCsvInputParser(inputStrm); + return std::make_unique(inputStrm); } - return new ml::api::CLineifiedJsonInputParser(inputStrm); + return std::make_unique(inputStrm); }()}; CPPUNIT_ASSERT(parser->readStream( diff --git a/lib/api/unittest/CSingleStreamDataAdderTest.cc b/lib/api/unittest/CSingleStreamDataAdderTest.cc index bdca53a6fb..2cd9e60f9b 100644 --- a/lib/api/unittest/CSingleStreamDataAdderTest.cc +++ b/lib/api/unittest/CSingleStreamDataAdderTest.cc @@ -147,12 +147,12 @@ void CSingleStreamDataAdderTest::detectorPersistHelper(const std::string& config firstProcessor = &typer; } - using TInputParserCUPtr = const std::unique_ptr; - TInputParserCUPtr parser{[&inputFilename, &inputStrm]() -> ml::api::CInputParser* { + using TInputParserUPtr = std::unique_ptr; + const TInputParserUPtr parser{[&inputFilename, &inputStrm]() -> TInputParserUPtr { if (inputFilename.rfind(".csv") == inputFilename.length() - 4) { - return new ml::api::CCsvInputParser(inputStrm); + return std::make_unique(inputStrm); } - return new ml::api::CLineifiedJsonInputParser(inputStrm); + return std::make_unique(inputStrm); }()}; CPPUNIT_ASSERT(parser->readStream( diff --git a/lib/config/unittest/CAutoconfigurerParamsTest.cc b/lib/config/unittest/CAutoconfigurerParamsTest.cc index 0725742721..926a65848a 100644 --- a/lib/config/unittest/CAutoconfigurerParamsTest.cc +++ b/lib/config/unittest/CAutoconfigurerParamsTest.cc @@ -13,11 +13,6 @@ using namespace ml; void CAutoconfigurerParamsTest::testDefaults() { - LOG_DEBUG(<< ""); - LOG_DEBUG(<< "+-------------------------------------------+"); - LOG_DEBUG(<< "| CAutoconfigurerParamsTest::testDefaults |"); - LOG_DEBUG(<< "+-------------------------------------------+"); - config::CAutoconfigurerParams params("time", "", false, false); std::string actual = params.print(); std::string expected = @@ -69,11 +64,6 @@ void CAutoconfigurerParamsTest::testDefaults() { } void CAutoconfigurerParamsTest::testInit() { - LOG_DEBUG(<< ""); - LOG_DEBUG(<< "+---------------------------------------+"); - LOG_DEBUG(<< "| CAutoconfigurerParamsTest::testInit |"); - LOG_DEBUG(<< "+---------------------------------------+"); - config::CAutoconfigurerParams params("time", "", false, false); params.init("testfiles/parameters.conf"); diff --git a/lib/config/unittest/CDataSemanticsTest.cc b/lib/config/unittest/CDataSemanticsTest.cc index 15c2478ada..aae5ed2976 100644 --- a/lib/config/unittest/CDataSemanticsTest.cc +++ b/lib/config/unittest/CDataSemanticsTest.cc @@ -23,11 +23,6 @@ using TSizeVec = std::vector; using TStrVec = std::vector; void CDataSemanticsTest::testBinary() { - LOG_DEBUG(<< ""); - LOG_DEBUG(<< "+----------------------------------+"); - LOG_DEBUG(<< "| CDataSemanticsTest::testBinary |"); - LOG_DEBUG(<< "+----------------------------------+"); - // Try a numeric and non-numeric example of a binary variable. std::string categories[][2] = {{"false", "true"}, {"0", "1"}}; @@ -49,11 +44,6 @@ void CDataSemanticsTest::testBinary() { } void CDataSemanticsTest::testNonNumericCategorical() { - LOG_DEBUG(<< ""); - LOG_DEBUG(<< "+-------------------------------------------------+"); - LOG_DEBUG(<< "| CDataSemanticsTest::testNonNumericCategorical |"); - LOG_DEBUG(<< "+-------------------------------------------------+"); - // Test we identify non-numerical non-binary data as categorical. test::CRandomNumbers rng; @@ -77,11 +67,6 @@ void CDataSemanticsTest::testNonNumericCategorical() { } void CDataSemanticsTest::testNumericCategorical() { - LOG_DEBUG(<< ""); - LOG_DEBUG(<< "+----------------------------------------------+"); - LOG_DEBUG(<< "| CDataSemanticsTest::testNumericCategorical |"); - LOG_DEBUG(<< "+----------------------------------------------+"); - // Test plausible http status code distribution is correctly // identified as categorical. @@ -109,11 +94,6 @@ void CDataSemanticsTest::testNumericCategorical() { } void CDataSemanticsTest::testInteger() { - LOG_DEBUG(<< ""); - LOG_DEBUG(<< "+-----------------------------------+"); - LOG_DEBUG(<< "| CDataSemanticsTest::testInteger |"); - LOG_DEBUG(<< "+-----------------------------------+"); - // Test a variety of uni- and multi-modal distributions. test::CRandomNumbers rng; @@ -207,11 +187,6 @@ void CDataSemanticsTest::testInteger() { } void CDataSemanticsTest::testReal() { - LOG_DEBUG(<< ""); - LOG_DEBUG(<< "+--------------------------------+"); - LOG_DEBUG(<< "| CDataSemanticsTest::testReal |"); - LOG_DEBUG(<< "+--------------------------------+"); - // Test a variety of uni- and multi-modal distributions. test::CRandomNumbers rng; diff --git a/lib/config/unittest/CDataSummaryStatisticsTest.cc b/lib/config/unittest/CDataSummaryStatisticsTest.cc index 48d7aa27a7..49927f46cf 100644 --- a/lib/config/unittest/CDataSummaryStatisticsTest.cc +++ b/lib/config/unittest/CDataSummaryStatisticsTest.cc @@ -32,11 +32,6 @@ using TStrVec = std::vector; using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; void CDataSummaryStatisticsTest::testRate() { - LOG_DEBUG(<< ""); - LOG_DEBUG(<< "+----------------------------------------+"); - LOG_DEBUG(<< "| CDataSummaryStatisticsTest::testRate |"); - LOG_DEBUG(<< "+----------------------------------------+"); - // Test we correctly estimate a range of rates. double rate[] = {10.0, 100.0, 500.0}; @@ -62,11 +57,6 @@ void CDataSummaryStatisticsTest::testRate() { } void CDataSummaryStatisticsTest::testCategoricalDistinctCount() { - LOG_DEBUG(<< ""); - LOG_DEBUG(<< "+------------------------------------------------------------+"); - LOG_DEBUG(<< "| CDataSummaryStatisticsTest::testCategoricalDistinctCount |"); - LOG_DEBUG(<< "+------------------------------------------------------------+"); - // Test we correctly compute the distinct count with and // without sketching. @@ -105,11 +95,6 @@ void CDataSummaryStatisticsTest::testCategoricalDistinctCount() { } void CDataSummaryStatisticsTest::testCategoricalTopN() { - LOG_DEBUG(<< ""); - LOG_DEBUG(<< "+---------------------------------------------------+"); - LOG_DEBUG(<< "| CDataSummaryStatisticsTest::testCategoricalTopN |"); - LOG_DEBUG(<< "+---------------------------------------------------+"); - // Test we are able to accurately estimate the counts of some // heavy hitting categories. @@ -172,11 +157,6 @@ void CDataSummaryStatisticsTest::testCategoricalTopN() { } void CDataSummaryStatisticsTest::testNumericBasicStatistics() { - LOG_DEBUG(<< ""); - LOG_DEBUG(<< "+----------------------------------------------------------+"); - LOG_DEBUG(<< "| CDataSummaryStatisticsTest::testNumericBasicStatistics |"); - LOG_DEBUG(<< "+----------------------------------------------------------+"); - // Test the minimum, median and maximum of a variety of data sets. { @@ -250,11 +230,6 @@ void CDataSummaryStatisticsTest::testNumericBasicStatistics() { } void CDataSummaryStatisticsTest::testNumericDistribution() { - LOG_DEBUG(<< ""); - LOG_DEBUG(<< "+-------------------------------------------------------+"); - LOG_DEBUG(<< "| CDataSummaryStatisticsTest::testNumericDistribution |"); - LOG_DEBUG(<< "+-------------------------------------------------------+"); - test::CRandomNumbers rng; TDoubleVec samples; diff --git a/lib/config/unittest/CDetectorEnumeratorTest.cc b/lib/config/unittest/CDetectorEnumeratorTest.cc index d77063b8d2..5d0753876a 100644 --- a/lib/config/unittest/CDetectorEnumeratorTest.cc +++ b/lib/config/unittest/CDetectorEnumeratorTest.cc @@ -28,11 +28,6 @@ std::string print(const config::CDetectorEnumerator::TDetectorSpecificationVec& } void CDetectorEnumeratorTest::testAll() { - LOG_DEBUG(<< ""); - LOG_DEBUG(<< "+------------------------------------+"); - LOG_DEBUG(<< "| CDetectorEnumeratorTest::testAll |"); - LOG_DEBUG(<< "+------------------------------------+"); - std::string empty; config::CAutoconfigurerParams params(empty, empty, false, false); config::CDetectorEnumerator enumerator(params); diff --git a/lib/config/unittest/CReportWriterTest.cc b/lib/config/unittest/CReportWriterTest.cc index 78a48198b3..75821ed013 100644 --- a/lib/config/unittest/CReportWriterTest.cc +++ b/lib/config/unittest/CReportWriterTest.cc @@ -24,11 +24,6 @@ using TSizeVec = std::vector; using TStrVec = std::vector; void CReportWriterTest::testPretty() { - LOG_DEBUG(<< ""); - LOG_DEBUG(<< "+---------------------------------+"); - LOG_DEBUG(<< "| CReportWriterTest::testPretty |"); - LOG_DEBUG(<< "+---------------------------------+"); - test::CRandomNumbers rng; core_t::TTime startTime = 1459468810; @@ -128,10 +123,6 @@ void CReportWriterTest::testPretty() { } void CReportWriterTest::testJSON() { - LOG_DEBUG(<< ""); - LOG_DEBUG(<< "+-------------------------------+"); - LOG_DEBUG(<< "| CReportWriterTest::testJSON |"); - LOG_DEBUG(<< "+-------------------------------+"); } CppUnit::Test* CReportWriterTest::suite() { diff --git a/lib/core/unittest/CJsonLogLayoutTest.cc b/lib/core/unittest/CJsonLogLayoutTest.cc index 7b3fb17ad7..ab578c1aa2 100644 --- a/lib/core/unittest/CJsonLogLayoutTest.cc +++ b/lib/core/unittest/CJsonLogLayoutTest.cc @@ -18,7 +18,6 @@ CppUnit::Test* CJsonLogLayoutTest::suite() { } void CJsonLogLayoutTest::testPathCropping() { - LOG_DEBUG(<< "CJsonLogLayoutTest::testPathCropping"); #ifdef Windows CPPUNIT_ASSERT_EQUAL(std::string("source.h"), log4cxx::helpers::CJsonLogLayout::cropPath( diff --git a/lib/core/unittest/CMemoryUsageTest.cc b/lib/core/unittest/CMemoryUsageTest.cc index 6904e06041..c9818835b7 100644 --- a/lib/core/unittest/CMemoryUsageTest.cc +++ b/lib/core/unittest/CMemoryUsageTest.cc @@ -1065,7 +1065,6 @@ void CMemoryUsageTest::testStringClear() { } void CMemoryUsageTest::testSharedPointer() { - LOG_DEBUG(<< "*** testSharedPointer ***"); using TIntVecPtr = std::shared_ptr; using TIntVecPtrVec = std::vector; using TStrPtr = std::shared_ptr; @@ -1146,7 +1145,6 @@ void CMemoryUsageTest::testSharedPointer() { } void CMemoryUsageTest::testRawPointer() { - LOG_DEBUG(<< "*** testRawPointer ***"); std::string* strPtr = nullptr; CPPUNIT_ASSERT_EQUAL(std::size_t(0), core::CMemory::dynamicSize(strPtr)); @@ -1160,8 +1158,6 @@ void CMemoryUsageTest::testRawPointer() { } void CMemoryUsageTest::testSmallVector() { - LOG_DEBUG(<< "*** testSmallVector ***"); - using TSizeVec = std::vector; using TDouble1Vec = core::CSmallVector; using TDouble6Vec = core::CSmallVector; diff --git a/lib/core/unittest/CStatisticsTest.cc b/lib/core/unittest/CStatisticsTest.cc index 9b87804002..3e1a2d7dbc 100644 --- a/lib/core/unittest/CStatisticsTest.cc +++ b/lib/core/unittest/CStatisticsTest.cc @@ -58,7 +58,6 @@ CppUnit::Test* CStatisticsTest::suite() { } void CStatisticsTest::testStatistics() { - LOG_TRACE(<< "Starting Statistics test"); ml::core::CStatistics& stats = ml::core::CStatistics::instance(); static const int N = 6; @@ -96,12 +95,9 @@ void CStatisticsTest::testStatistics() { stats.stat(TEST_STAT).increment(); } CPPUNIT_ASSERT_EQUAL(uint64_t(0x1000000), stats.stat(TEST_STAT).value()); - - LOG_TRACE(<< "Finished Statistics test"); } void CStatisticsTest::testPersist() { - LOG_DEBUG(<< "Starting persist test"); ml::core::CStatistics& stats = ml::core::CStatistics::instance(); // Check that a save/restore with all zeros is Ok diff --git a/lib/core/unittest/CVectorRangeTest.cc b/lib/core/unittest/CVectorRangeTest.cc index d19f10cb3f..d695cfbce7 100644 --- a/lib/core/unittest/CVectorRangeTest.cc +++ b/lib/core/unittest/CVectorRangeTest.cc @@ -19,8 +19,6 @@ using TDoubleRng = core::CVectorRange; using TDoubleCRng = core::CVectorRange; void CVectorRangeTest::testCreation() { - LOG_DEBUG(<< "*** CVectorRangeTest::testCreation ***"); - { TDoubleVec values1{1.0, 0.1, 0.7, 9.8}; TDoubleVec values2{3.1, 1.4, 5.7, 1.2}; @@ -56,8 +54,6 @@ void CVectorRangeTest::testCreation() { } void CVectorRangeTest::testAccessors() { - LOG_DEBUG(<< "*** CVectorRangeTest::testAccessors ***"); - TDoubleVec values{1.0, 0.1, 0.7, 9.8, 8.0}; TDoubleRng range14{values, 1, 4}; @@ -89,8 +85,6 @@ void CVectorRangeTest::testAccessors() { } void CVectorRangeTest::testIterators() { - LOG_DEBUG(<< "*** CVectorRangeTest::testIterators ***"); - TDoubleVec values{1.0, 0.1, 0.7, 9.8, 8.0}; TDoubleRng range14{values, 1, 4}; @@ -115,8 +109,6 @@ void CVectorRangeTest::testIterators() { } void CVectorRangeTest::testSizing() { - LOG_DEBUG(<< "*** CVectorRangeTest::testSizing ***"); - TDoubleVec values{1.0, 0.1, 0.7, 9.8, 8.0}; TDoubleRng range11{values, 1, 1}; @@ -140,8 +132,6 @@ void CVectorRangeTest::testSizing() { } void CVectorRangeTest::testModifiers() { - LOG_DEBUG(<< "*** CVectorRangeTest::testModifiers ***"); - TDoubleVec values1{1.0, 0.1, 0.7, 9.8, 8.0}; TDoubleVec values2{2.0, 3.5, 8.1, 1.8}; @@ -222,8 +212,6 @@ void CVectorRangeTest::testModifiers() { } void CVectorRangeTest::testComparisons() { - LOG_DEBUG(<< "*** CVectorRangeTest::testComparisons ***"); - TDoubleVec values1{1.0, 0.1, 0.7, 9.8, 8.0}; TDoubleVec values2{1.2, 0.1, 0.7, 9.8, 18.0}; diff --git a/lib/maths/CConstantPrior.cc b/lib/maths/CConstantPrior.cc index 26f9ef5923..fecf6d7e6a 100644 --- a/lib/maths/CConstantPrior.cc +++ b/lib/maths/CConstantPrior.cc @@ -66,7 +66,6 @@ bool CConstantPrior::acceptRestoreTraverser(core::CStateRestoreTraverser& traver core::CStringUtils::stringToType(traverser.value(), constant), m_Constant.reset(constant)) } while (traverser.next()); - return true; } @@ -86,9 +85,8 @@ bool CConstantPrior::needsOffset() const { return false; } -double CConstantPrior::adjustOffset(const TWeightStyleVec& /*weightStyle*/, - const TDouble1Vec& /*samples*/, - const TDouble4Vec1Vec& /*weights*/) { +double CConstantPrior::adjustOffset(const TDouble1Vec& /*samples*/, + const TDoubleWeightsAry1Vec& /*weights*/) { return 0.0; } @@ -96,9 +94,8 @@ double CConstantPrior::offset() const { return 0.0; } -void CConstantPrior::addSamples(const TWeightStyleVec& /*weightStyle*/, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& /*weights*/) { +void CConstantPrior::addSamples(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& /*weights*/) { if (m_Constant || samples.empty()) { return; } @@ -109,44 +106,39 @@ void CConstantPrior::propagateForwardsByTime(double /*time*/) { } CConstantPrior::TDoubleDoublePr CConstantPrior::marginalLikelihoodSupport() const { - return std::make_pair(boost::numeric::bounds::lowest(), - boost::numeric::bounds::highest()); + return {boost::numeric::bounds::lowest(), + boost::numeric::bounds::highest()}; } double CConstantPrior::marginalLikelihoodMean() const { if (this->isNonInformative()) { return 0.0; } - return *m_Constant; } -double CConstantPrior::marginalLikelihoodMode(const TWeightStyleVec& /*weightStyles*/, - const TDouble4Vec& /*weights*/) const { +double CConstantPrior::marginalLikelihoodMode(const TDoubleWeightsAry& /*weights*/) const { return this->marginalLikelihoodMean(); } CConstantPrior::TDoubleDoublePr CConstantPrior::marginalLikelihoodConfidenceInterval(double /*percentage*/, - const TWeightStyleVec& /*weightStyles*/, - const TDouble4Vec& /*weights*/) const { + const TDoubleWeightsAry& /*weights*/) const { if (this->isNonInformative()) { return this->marginalLikelihoodSupport(); } - - return std::make_pair(*m_Constant, *m_Constant); + return {*m_Constant, *m_Constant}; } -double CConstantPrior::marginalLikelihoodVariance(const TWeightStyleVec& /*weightStyles*/, - const TDouble4Vec& /*weights*/) const { +double CConstantPrior::marginalLikelihoodVariance(const TDoubleWeightsAry& /*weights*/) const { return this->isNonInformative() ? boost::numeric::bounds::highest() : 0.0; } maths_t::EFloatingPointErrorStatus -CConstantPrior::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +CConstantPrior::jointLogMarginalLikelihood(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& result) const { + result = 0.0; if (samples.empty()) { @@ -183,7 +175,7 @@ CConstantPrior::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, return maths_t::E_FpOverflowed; } - numberSamples += maths_t::countForUpdate(weightStyles, weights[i]); + numberSamples += maths_t::countForUpdate(weights[i]); } result = numberSamples * core::constants::LOG_MAX_DOUBLE; @@ -193,19 +185,17 @@ CConstantPrior::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, void CConstantPrior::sampleMarginalLikelihood(std::size_t numberSamples, TDouble1Vec& samples) const { samples.clear(); - if (this->isNonInformative()) { return; } - samples.resize(numberSamples, *m_Constant); } -bool CConstantPrior::minusLogJointCdf(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +bool CConstantPrior::minusLogJointCdf(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const { + lowerBound = upperBound = 0.0; if (samples.empty()) { @@ -216,7 +206,7 @@ bool CConstantPrior::minusLogJointCdf(const TWeightStyleVec& weightStyles, double numberSamples = 0.0; try { for (std::size_t i = 0u; i < samples.size(); ++i) { - numberSamples += maths_t::count(weightStyles, weights[i]); + numberSamples += maths_t::count(weights[i]); } } catch (const std::exception& e) { LOG_ERROR(<< "Failed to compute c.d.f. " << e.what()); @@ -242,11 +232,11 @@ bool CConstantPrior::minusLogJointCdf(const TWeightStyleVec& weightStyles, return true; } -bool CConstantPrior::minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +bool CConstantPrior::minusLogJointCdfComplement(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const { + lowerBound = upperBound = 0.0; if (samples.empty()) { @@ -257,7 +247,7 @@ bool CConstantPrior::minusLogJointCdfComplement(const TWeightStyleVec& weightSty double numberSamples = 0.0; try { for (std::size_t i = 0u; i < samples.size(); ++i) { - numberSamples += maths_t::count(weightStyles, weights[i]); + numberSamples += maths_t::count(weights[i]); } } catch (const std::exception& e) { LOG_ERROR(<< "Failed to compute c.d.f. " << e.what()); @@ -284,12 +274,12 @@ bool CConstantPrior::minusLogJointCdfComplement(const TWeightStyleVec& weightSty } bool CConstantPrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation /*calculation*/, - const TWeightStyleVec& /*weightStyles*/, const TDouble1Vec& samples, - const TDouble4Vec1Vec& /*weights*/, + const TDoubleWeightsAry1Vec& /*weights*/, double& lowerBound, double& upperBound, maths_t::ETail& tail) const { + lowerBound = upperBound = 0.0; tail = maths_t::E_UndeterminedTail; diff --git a/lib/maths/CGammaRateConjugate.cc b/lib/maths/CGammaRateConjugate.cc index 5cedb059fb..2e49c1a825 100644 --- a/lib/maths/CGammaRateConjugate.cc +++ b/lib/maths/CGammaRateConjugate.cc @@ -46,10 +46,8 @@ namespace { namespace detail { using TDoubleDoublePr = std::pair; -using TWeightStyleVec = maths_t::TWeightStyleVec; using TDouble1Vec = core::CSmallVector; -using TDouble4Vec = core::CSmallVector; -using TDouble4Vec1Vec = core::CSmallVector; +using TDoubleWeightsAry1Vec = maths_t::TDoubleWeightsAry1Vec; using TMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; using TMeanVarAccumulator = CBasicStatistics::SSampleMeanVar::TAccumulator; @@ -312,9 +310,6 @@ struct SPlusWeight { //! (integrating over the prior for the gamma rate) and aggregate the //! results using \p aggregate. //! -//! \param[in] weightStyles Controls the interpretation of the weight(s) -//! that are associated with each sample. See maths_t::ESampleWeightStyle -//! for more details. //! \param[in] samples The weighted samples. //! \param[in] func The function to evaluate. //! \param[in] aggregate The function to aggregate the results of \p func. @@ -329,9 +324,8 @@ struct SPlusWeight { //! of the likelihood for \p samples. //! \param[out] result Filled in with the aggregation of results of \p func. template -bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +bool evaluateFunctionOnJointDistribution(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, FUNC func, AGGREGATOR aggregate, bool isNonInformative, @@ -371,7 +365,7 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, // everywhere. (It is acceptable to approximate all finite samples // as at the median of this distribution.) for (std::size_t i = 0u; i < samples.size(); ++i) { - double n = maths_t::count(weightStyles, weights[i]); + double n = maths_t::count(weights[i]); double x = samples[i] + offset; result = aggregate(result, func(CTools::SImproperDistribution(), x), n); } @@ -405,10 +399,9 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, // gamma distributed and u is a constant offset. This means // that {x(i) + u} are gamma distributed. - double n = maths_t::count(weightStyles, weights[i]); - double varianceScale = - maths_t::seasonalVarianceScale(weightStyles, weights[i]) * - maths_t::countVarianceScale(weightStyles, weights[i]); + double n = maths_t::count(weights[i]); + double varianceScale = maths_t::seasonalVarianceScale(weights[i]) * + maths_t::countVarianceScale(weights[i]); double x = samples[i] + offset; LOG_TRACE(<< "x = " << x); @@ -433,10 +426,9 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, // gamma distributed and u is a constant offset. This means // that {x(i) + u} are gamma distributed. - double n = maths_t::count(weightStyles, weights[i]); - double varianceScale = - maths_t::seasonalVarianceScale(weightStyles, weights[i]) * - maths_t::countVarianceScale(weightStyles, weights[i]); + double n = maths_t::count(weights[i]); + double varianceScale = maths_t::seasonalVarianceScale(weights[i]) * + maths_t::countVarianceScale(weights[i]); double x = samples[i] + offset; double scaledLikelihoodShape = likelihoodShape / varianceScale; double scaledPriorRate = varianceScale * priorRate; @@ -469,29 +461,26 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, template class CEvaluateOnSamples : core::CNonCopyable { public: - CEvaluateOnSamples(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + CEvaluateOnSamples(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, bool isNonInformative, double offset, double likelihoodShape, double priorShape, double priorRate) - : m_WeightStyles(weightStyles), m_Samples(samples), m_Weights(weights), - m_IsNonInformative(isNonInformative), m_Offset(offset), - m_LikelihoodShape(likelihoodShape), m_PriorShape(priorShape), - m_PriorRate(priorRate) {} + : m_Samples(samples), m_Weights(weights), m_IsNonInformative(isNonInformative), + m_Offset(offset), m_LikelihoodShape(likelihoodShape), + m_PriorShape(priorShape), m_PriorRate(priorRate) {} bool operator()(double x, double& result) const { return evaluateFunctionOnJointDistribution( - m_WeightStyles, m_Samples, m_Weights, F(), SPlusWeight(), m_IsNonInformative, + m_Samples, m_Weights, F(), SPlusWeight(), m_IsNonInformative, m_Offset + x, m_LikelihoodShape, m_PriorShape, m_PriorRate, result); } private: - const TWeightStyleVec& m_WeightStyles; const TDouble1Vec& m_Samples; - const TDouble4Vec1Vec& m_Weights; + const TDoubleWeightsAry1Vec& m_Weights; bool m_IsNonInformative; double m_Offset; double m_LikelihoodShape; @@ -507,25 +496,24 @@ class CEvaluateOnSamples : core::CNonCopyable { class CProbabilityOfLessLikelySamples : core::CNonCopyable { public: CProbabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, bool isNonInformative, double offset, double likelihoodShape, double priorShape, double priorRate) - : m_Calculation(calculation), m_WeightStyles(weightStyles), - m_Samples(samples), m_Weights(weights), m_IsNonInformative(isNonInformative), - m_Offset(offset), m_LikelihoodShape(likelihoodShape), - m_PriorShape(priorShape), m_PriorRate(priorRate), m_Tail(0) {} + : m_Calculation(calculation), m_Samples(samples), m_Weights(weights), + m_IsNonInformative(isNonInformative), m_Offset(offset), + m_LikelihoodShape(likelihoodShape), m_PriorShape(priorShape), + m_PriorRate(priorRate), m_Tail(0) {} bool operator()(double x, double& result) const { CJointProbabilityOfLessLikelySamples probability; maths_t::ETail tail = maths_t::E_UndeterminedTail; if (!evaluateFunctionOnJointDistribution( - m_WeightStyles, m_Samples, m_Weights, + m_Samples, m_Weights, boost::bind(CTools::CProbabilityOfLessLikelySample(m_Calculation), _1, _2, boost::ref(tail)), CJointProbabilityOfLessLikelySamples::SAddProbability(), m_IsNonInformative, @@ -544,9 +532,8 @@ class CProbabilityOfLessLikelySamples : core::CNonCopyable { private: maths_t::EProbabilityCalculation m_Calculation; - const TWeightStyleVec& m_WeightStyles; const TDouble1Vec& m_Samples; - const TDouble4Vec1Vec& m_Weights; + const TDoubleWeightsAry1Vec& m_Weights; bool m_IsNonInformative; double m_Offset; double m_LikelihoodShape; @@ -574,22 +561,22 @@ class CProbabilityOfLessLikelySamples : core::CNonCopyable { //! a and b are the prior gamma shape and rate, respectively. class CLogMarginalLikelihood : core::CNonCopyable { public: - CLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + CLogMarginalLikelihood(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double offset, double likelihoodShape, double priorShape, double priorRate) - : m_WeightStyles(weightStyles), m_Samples(samples), m_Weights(weights), - m_Offset(offset), m_LikelihoodShape(likelihoodShape), - m_PriorShape(priorShape), m_PriorRate(priorRate), m_NumberSamples(0.0), - m_ImpliedShape(0.0), m_Constant(0.0), m_ErrorStatus(maths_t::E_FpNoErrors) { + : m_Samples(samples), m_Weights(weights), m_Offset(offset), + m_LikelihoodShape(likelihoodShape), m_PriorShape(priorShape), + m_PriorRate(priorRate), m_NumberSamples(0.0), m_ImpliedShape(0.0), + m_Constant(0.0), m_ErrorStatus(maths_t::E_FpNoErrors) { this->precompute(); } //! Evaluate the log marginal likelihood at the offset \p x. bool operator()(double x, double& result) const { + if (m_ErrorStatus & maths_t::E_FpFailed) { return false; } @@ -600,10 +587,9 @@ class CLogMarginalLikelihood : core::CNonCopyable { try { for (std::size_t i = 0u; i < m_Samples.size(); ++i) { - double n = maths_t::countForUpdate(m_WeightStyles, m_Weights[i]); - double varianceScale = - maths_t::seasonalVarianceScale(m_WeightStyles, m_Weights[i]) * - maths_t::countVarianceScale(m_WeightStyles, m_Weights[i]); + double n = maths_t::countForUpdate(m_Weights[i]); + double varianceScale = maths_t::seasonalVarianceScale(m_Weights[i]) * + maths_t::countVarianceScale(m_Weights[i]); double sample = m_Samples[i] + x + m_Offset; @@ -653,10 +639,9 @@ class CLogMarginalLikelihood : core::CNonCopyable { try { for (std::size_t i = 0u; i < m_Weights.size(); ++i) { - double n = maths_t::countForUpdate(m_WeightStyles, m_Weights[i]); - double varianceScale = - maths_t::seasonalVarianceScale(m_WeightStyles, m_Weights[i]) * - maths_t::countVarianceScale(m_WeightStyles, m_Weights[i]); + double n = maths_t::countForUpdate(m_Weights[i]); + double varianceScale = maths_t::seasonalVarianceScale(m_Weights[i]) * + maths_t::countVarianceScale(m_Weights[i]); m_NumberSamples += n; if (varianceScale != 1.0) { logVarianceScaleSum -= m_LikelihoodShape / varianceScale * @@ -690,9 +675,8 @@ class CLogMarginalLikelihood : core::CNonCopyable { } private: - const TWeightStyleVec& m_WeightStyles; const TDouble1Vec& m_Samples; - const TDouble4Vec1Vec& m_Weights; + const TDoubleWeightsAry1Vec& m_Weights; double m_Offset; double m_LikelihoodShape; double m_PriorShape; @@ -787,21 +771,19 @@ bool CGammaRateConjugate::needsOffset() const { return true; } -double CGammaRateConjugate::adjustOffset(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights) { +double CGammaRateConjugate::adjustOffset(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights) { COffsetCost cost(*this); CApplyOffset apply(*this); - return this->adjustOffsetWithCost(weightStyles, samples, weights, cost, apply); + return this->adjustOffsetWithCost(samples, weights, cost, apply); } double CGammaRateConjugate::offset() const { return m_Offset; } -void CGammaRateConjugate::addSamples(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights) { +void CGammaRateConjugate::addSamples(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights) { if (samples.empty()) { return; } @@ -813,8 +795,8 @@ void CGammaRateConjugate::addSamples(const TWeightStyleVec& weightStyles, return; } - this->adjustOffset(weightStyles, samples, weights); - this->CPrior::addSamples(weightStyles, samples, weights); + this->adjustOffset(samples, weights); + this->CPrior::addSamples(samples, weights); // We assume the data are described by X = Y - u where, Y is gamma // distributed and u is a constant offset. @@ -888,10 +870,9 @@ void CGammaRateConjugate::addSamples(const TWeightStyleVec& weightStyles, try { double shift = boost::math::digamma(m_LikelihoodShape); for (std::size_t i = 0u; i < samples.size(); ++i) { - double n = maths_t::countForUpdate(weightStyles, weights[i]); - double varianceScale = - maths_t::seasonalVarianceScale(weightStyles, weights[i]) * - maths_t::countVarianceScale(weightStyles, weights[i]); + double n = maths_t::countForUpdate(weights[i]); + double varianceScale = maths_t::seasonalVarianceScale(weights[i]) * + maths_t::countVarianceScale(weights[i]); double x = samples[i] + m_Offset; if (!CMathsFuncs::isFinite(x) || x <= 0.0) { @@ -998,22 +979,17 @@ void CGammaRateConjugate::propagateForwardsByTime(double time) { } CGammaRateConjugate::TDoubleDoublePr CGammaRateConjugate::marginalLikelihoodSupport() const { - return std::make_pair(-m_Offset, boost::numeric::bounds::highest()); + return {-m_Offset, boost::numeric::bounds::highest()}; } double CGammaRateConjugate::marginalLikelihoodMean() const { return this->isInteger() ? this->mean() - 0.5 : this->mean(); } -double CGammaRateConjugate::marginalLikelihoodMode(const TWeightStyleVec& weightStyles, - const TDouble4Vec& weights) const { - double varianceScale = 1.0; - try { - varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) * - maths_t::countVarianceScale(weightStyles, weights); - } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to get variance scale: " << e.what()); - } +double CGammaRateConjugate::marginalLikelihoodMode(const TDoubleWeightsAry& weights) const { + + double varianceScale = maths_t::seasonalVarianceScale(weights) * + maths_t::countVarianceScale(weights); if (!this->isNonInformative()) { // We use the fact that the marginal likelihood is the distribution @@ -1057,8 +1033,8 @@ double CGammaRateConjugate::marginalLikelihoodMode(const TWeightStyleVec& weight return std::max(mean == 0.0 ? 0.0 : mean - variance / mean, 0.0) - m_Offset; } -double CGammaRateConjugate::marginalLikelihoodVariance(const TWeightStyleVec& weightStyles, - const TDouble4Vec& weights) const { +double CGammaRateConjugate::marginalLikelihoodVariance(const TDoubleWeightsAry& weights) const { + if (this->isNonInformative()) { return boost::numeric::bounds::highest(); } @@ -1076,13 +1052,8 @@ double CGammaRateConjugate::marginalLikelihoodVariance(const TWeightStyleVec& we // to show that Var(a' / B) = a'^2 * E[ 1.0 / B^2 - (b / (a - 1))^2] // whence... - double varianceScale = 1.0; - try { - varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) * - maths_t::countVarianceScale(weightStyles, weights); - } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to get variance scale: " << e.what()); - } + double varianceScale = maths_t::seasonalVarianceScale(weights) * + maths_t::countVarianceScale(weights); double a = this->priorShape(); if (a <= 2.0) { return varianceScale * CBasicStatistics::variance(m_SampleMoments); @@ -1094,8 +1065,7 @@ double CGammaRateConjugate::marginalLikelihoodVariance(const TWeightStyleVec& we CGammaRateConjugate::TDoubleDoublePr CGammaRateConjugate::marginalLikelihoodConfidenceInterval(double percentage, - const TWeightStyleVec& weightStyles, - const TDouble4Vec& weights) const { + const TDoubleWeightsAry& weights) const { if (this->isNonInformative()) { return this->marginalLikelihoodSupport(); } @@ -1114,8 +1084,8 @@ CGammaRateConjugate::marginalLikelihoodConfidenceInterval(double percentage, // and beta equal to m_PriorShape. try { - double varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) * - maths_t::countVarianceScale(weightStyles, weights); + double varianceScale = maths_t::seasonalVarianceScale(weights) * + maths_t::countVarianceScale(weights); double scaledLikelihoodShape = m_LikelihoodShape / varianceScale; double scaledPriorRate = varianceScale * this->priorRate(); boost::math::beta_distribution<> beta(scaledLikelihoodShape, this->priorShape()); @@ -1128,18 +1098,17 @@ CGammaRateConjugate::marginalLikelihoodConfidenceInterval(double percentage, (this->isInteger() ? 0.5 : 0.0); } LOG_TRACE(<< "x1 = " << x1 << ", x2 = " << x2); - return std::make_pair(x1, x2); + return {x1, x2}; } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to compute confidence interval: " << e.what()); + LOG_ERROR("Failed to compute confidence interval: " << e.what()); } return this->marginalLikelihoodSupport(); } maths_t::EFloatingPointErrorStatus -CGammaRateConjugate::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +CGammaRateConjugate::jointLogMarginalLikelihood(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& result) const { result = 0.0; @@ -1171,8 +1140,8 @@ CGammaRateConjugate::jointLogMarginalLikelihood(const TWeightStyleVec& weightSty maths_t::EFloatingPointErrorStatus status = maths_t::E_FpFailed; try { detail::CLogMarginalLikelihood logMarginalLikelihood( - weightStyles, samples, weights, m_Offset, m_LikelihoodShape, - this->priorShape(), this->priorRate()); + samples, weights, m_Offset, m_LikelihoodShape, this->priorShape(), + this->priorRate()); if (this->isInteger()) { // If the data are discrete we compute the approximate expectation // w.r.t. to the hidden offset of the samples Z, which is uniform @@ -1329,18 +1298,17 @@ void CGammaRateConjugate::sampleMarginalLikelihood(std::size_t numberSamples, } } -bool CGammaRateConjugate::minusLogJointCdf(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +bool CGammaRateConjugate::minusLogJointCdf(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const { + using TMinusLogCdf = detail::CEvaluateOnSamples; lowerBound = upperBound = 0.0; - TMinusLogCdf minusLogCdf(weightStyles, samples, weights, - this->isNonInformative(), m_Offset, m_LikelihoodShape, - this->priorShape(), this->priorRate()); + TMinusLogCdf minusLogCdf(samples, weights, this->isNonInformative(), m_Offset, + m_LikelihoodShape, this->priorShape(), this->priorRate()); if (this->isInteger()) { // If the data are discrete we compute the approximate expectation @@ -1369,18 +1337,18 @@ bool CGammaRateConjugate::minusLogJointCdf(const TWeightStyleVec& weightStyles, return true; } -bool CGammaRateConjugate::minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +bool CGammaRateConjugate::minusLogJointCdfComplement(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const { + using TMinusLogCdfComplement = detail::CEvaluateOnSamples; lowerBound = upperBound = 0.0; TMinusLogCdfComplement minusLogCdfComplement( - weightStyles, samples, weights, this->isNonInformative(), m_Offset, - m_LikelihoodShape, this->priorShape(), this->priorRate()); + samples, weights, this->isNonInformative(), m_Offset, m_LikelihoodShape, + this->priorShape(), this->priorRate()); if (this->isInteger()) { // If the data are discrete we compute the approximate expectation @@ -1410,9 +1378,8 @@ bool CGammaRateConjugate::minusLogJointCdfComplement(const TWeightStyleVec& weig } bool CGammaRateConjugate::probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound, maths_t::ETail& tail) const { @@ -1420,8 +1387,8 @@ bool CGammaRateConjugate::probabilityOfLessLikelySamples(maths_t::EProbabilityCa tail = maths_t::E_UndeterminedTail; detail::CProbabilityOfLessLikelySamples probability( - calculation, weightStyles, samples, weights, this->isNonInformative(), - m_Offset, m_LikelihoodShape, this->priorShape(), this->priorRate()); + calculation, samples, weights, this->isNonInformative(), m_Offset, + m_LikelihoodShape, this->priorShape(), this->priorRate()); if (this->isInteger()) { // If the data are discrete we compute the approximate expectation @@ -1460,6 +1427,7 @@ bool CGammaRateConjugate::isNonInformative() const { } void CGammaRateConjugate::print(const std::string& indent, std::string& result) const { + result += core_t::LINE_ENDING + indent + "gamma "; if (this->isNonInformative()) { result += "non-informative"; @@ -1486,6 +1454,7 @@ void CGammaRateConjugate::print(const std::string& indent, std::string& result) } std::string CGammaRateConjugate::printJointDensityFunction() const { + if (this->isNonInformative()) { // The non-informative likelihood is improper 0 everywhere. return EMPTY_STRING; @@ -1562,6 +1531,7 @@ double CGammaRateConjugate::likelihoodShape() const { } double CGammaRateConjugate::likelihoodRate() const { + if (this->isNonInformative()) { return 0.0; } @@ -1579,9 +1549,10 @@ double CGammaRateConjugate::likelihoodRate() const { CGammaRateConjugate::TDoubleDoublePr CGammaRateConjugate::confidenceIntervalRate(double percentage) const { + if (this->isNonInformative()) { - return std::make_pair(boost::numeric::bounds::lowest(), - boost::numeric::bounds::highest()); + return {boost::numeric::bounds::lowest(), + boost::numeric::bounds::highest()}; } percentage /= 100.0; @@ -1591,16 +1562,16 @@ CGammaRateConjugate::confidenceIntervalRate(double percentage) const { try { // The prior distribution for the rate is gamma. boost::math::gamma_distribution<> gamma(this->priorShape(), 1.0 / this->priorRate()); - return std::make_pair(boost::math::quantile(gamma, lowerPercentile), - boost::math::quantile(gamma, upperPercentile)); + return {boost::math::quantile(gamma, lowerPercentile), + boost::math::quantile(gamma, upperPercentile)}; } catch (const std::exception& e) { LOG_ERROR(<< "Failed to compute confidence interval: " << e.what() << ", prior shape = " << this->priorShape() << ", prior rate = " << this->priorRate()); } - return std::make_pair(boost::numeric::bounds::lowest(), - boost::numeric::bounds::highest()); + return {boost::numeric::bounds::lowest(), + boost::numeric::bounds::highest()}; } bool CGammaRateConjugate::equalTolerance(const CGammaRateConjugate& rhs, @@ -1614,6 +1585,7 @@ bool CGammaRateConjugate::equalTolerance(const CGammaRateConjugate& rhs, } double CGammaRateConjugate::mean() const { + if (this->isNonInformative()) { return CBasicStatistics::mean(m_SampleMoments); } diff --git a/lib/maths/CKMeansOnline1d.cc b/lib/maths/CKMeansOnline1d.cc index 05ecb07a06..7313539589 100644 --- a/lib/maths/CKMeansOnline1d.cc +++ b/lib/maths/CKMeansOnline1d.cc @@ -30,11 +30,10 @@ namespace maths { namespace { -using TDouble1Vec = core::CSmallVector; -using TDouble4Vec = core::CSmallVector; -using TDouble4Vec1Vec = core::CSmallVector; using TDoubleDoublePr = std::pair; using TDoubleDoublePrVec = std::vector; +using TDouble1Vec = core::CSmallVector; +using TDoubleWeightsAry1Vec = maths_t::TDoubleWeightsAry1Vec; namespace detail { @@ -58,7 +57,7 @@ double logLikelihoodFromCluster(const TDouble1Vec& sample, const CNormalMeanPrecConjugate& normal) { double likelihood; maths_t::EFloatingPointErrorStatus status = normal.jointLogMarginalLikelihood( - CConstantWeights::COUNT, sample, CConstantWeights::SINGLE_UNIT, likelihood); + sample, maths_t::CUnitWeights::SINGLE_UNIT, likelihood); if (status & maths_t::E_FpFailed) { LOG_ERROR(<< "Unable to compute probability for: " << sample[0]); return core::constants::LOG_MIN_DOUBLE - 1.0; @@ -159,6 +158,7 @@ bool CKMeansOnline1d::clusterSpread(std::size_t index, double& result) const { } void CKMeansOnline1d::cluster(const double& point, TSizeDoublePr2Vec& result, double count) const { + result.clear(); if (m_Clusters.empty()) { @@ -201,6 +201,7 @@ void CKMeansOnline1d::cluster(const double& point, TSizeDoublePr2Vec& result, do } void CKMeansOnline1d::add(const double& point, TSizeDoublePr2Vec& clusters, double count) { + clusters.clear(); if (m_Clusters.empty()) { @@ -210,11 +211,10 @@ void CKMeansOnline1d::add(const double& point, TSizeDoublePr2Vec& clusters, doub this->cluster(point, clusters, count); TDouble1Vec sample{point}; - TDouble4Vec1Vec weight{TDouble4Vec(1)}; - for (std::size_t i = 0u; i < clusters.size(); ++i) { - weight[0][0] = clusters[i].second; - m_Clusters[clusters[i].first].addSamples(CConstantWeights::COUNT, sample, weight); + for (const auto& cluster : clusters) { + m_Clusters[cluster.first].addSamples( + sample, {maths_t::countWeight(cluster.second)}); } } diff --git a/lib/maths/CLogNormalMeanPrecConjugate.cc b/lib/maths/CLogNormalMeanPrecConjugate.cc index 8202bcd851..c5f7ac647f 100644 --- a/lib/maths/CLogNormalMeanPrecConjugate.cc +++ b/lib/maths/CLogNormalMeanPrecConjugate.cc @@ -45,13 +45,11 @@ namespace maths { namespace { -using TDouble1Vec = core::CSmallVector; -using TDouble4Vec = core::CSmallVector; -using TDouble4Vec1Vec = core::CSmallVector; using TSizeVec = std::vector; +using TDouble1Vec = core::CSmallVector; +using TDoubleWeightsAry1Vec = maths_t::TDoubleWeightsAry1Vec; using TMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; using TMeanVarAccumulator = CBasicStatistics::SSampleMeanVar::TAccumulator; -using TWeightStyleVec = maths_t::TWeightStyleVec; //! Compute x * x. inline double pow2(double x) { @@ -101,9 +99,6 @@ inline void locationAndScale(double vs, //! (integrating over the prior for the exponentiated normal mean and //! precision) and aggregate the results using \p aggregate. //! -//! \param weightStyles Controls the interpretation of weights that are -//! associated with each sample. See maths_t::ESampleWeightStyle for more -//! details. //! \param samples The weighted samples. //! \param weights The weights of each sample in \p samples. //! \param func The function to evaluate. @@ -118,9 +113,8 @@ inline void locationAndScale(double vs, //! \param precision The precision of the conditional mean prior. //! \param result Filled in with the aggregation of results of \p func. template -bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +bool evaluateFunctionOnJointDistribution(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, FUNC func, AGGREGATOR aggregate, bool isNonInformative, @@ -156,7 +150,7 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, // (It is acceptable to approximate all finite samples as at the median // of this distribution.) for (std::size_t i = 0u; i < samples.size(); ++i) { - double n = maths_t::count(weightStyles, weights[i]); + double n = maths_t::count(weights[i]); result = aggregate( result, func(CTools::SImproperDistribution(), samples[i] + offset), n); } @@ -182,15 +176,14 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, double s = std::exp(-r); for (std::size_t i = 0u; i < samples.size(); ++i) { - double n = maths_t::count(weightStyles, weights[i]); - double varianceScale = - maths_t::seasonalVarianceScale(weightStyles, weights[i]) * - maths_t::countVarianceScale(weightStyles, weights[i]); + double n = maths_t::count(weights[i]); + double varianceScale = maths_t::seasonalVarianceScale(weights[i]) * + maths_t::countVarianceScale(weights[i]); double location; double scale; locationAndScale(varianceScale, r, s, mean, precision, rate, shape, location, scale); - boost::math::lognormal_distribution<> lognormal(location, scale); + boost::math::lognormal lognormal(location, scale); result = aggregate(result, func(lognormal, samples[i] + offset), n); } } else { @@ -201,10 +194,9 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, double s = std::exp(-r); for (std::size_t i = 0u; i < samples.size(); ++i) { - double n = maths_t::count(weightStyles, weights[i]); - double varianceScale = - maths_t::seasonalVarianceScale(weightStyles, weights[i]) * - maths_t::countVarianceScale(weightStyles, weights[i]); + double n = maths_t::count(weights[i]); + double varianceScale = maths_t::seasonalVarianceScale(weights[i]) * + maths_t::countVarianceScale(weights[i]); double location; double scale; locationAndScale(varianceScale, r, s, mean, precision, rate, @@ -232,29 +224,27 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, template class CEvaluateOnSamples : core::CNonCopyable { public: - CEvaluateOnSamples(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + CEvaluateOnSamples(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, bool isNonInformative, double offset, double mean, double precision, double shape, double rate) - : m_WeightStyles(weightStyles), m_Samples(samples), m_Weights(weights), + : m_Samples(samples), m_Weights(weights), m_IsNonInformative(isNonInformative), m_Offset(offset), m_Mean(mean), m_Precision(precision), m_Shape(shape), m_Rate(rate) {} bool operator()(double x, double& result) const { return evaluateFunctionOnJointDistribution( - m_WeightStyles, m_Samples, m_Weights, F(), SPlusWeight(), m_IsNonInformative, + m_Samples, m_Weights, F(), SPlusWeight(), m_IsNonInformative, m_Offset + x, m_Shape, m_Rate, m_Mean, m_Precision, result); } private: - const TWeightStyleVec& m_WeightStyles; const TDouble1Vec& m_Samples; - const TDouble4Vec1Vec& m_Weights; + const TDoubleWeightsAry1Vec& m_Weights; bool m_IsNonInformative; double m_Offset; double m_Mean; @@ -316,7 +306,7 @@ class CVarianceKernel { bool operator()(const TValue& x, TValue& result) const { try { boost::math::gamma_distribution<> gamma(m_A, 1.0 / m_B); - boost::math::normal_distribution<> normal(m_M, std::sqrt(1.0 / x(0) / m_P)); + boost::math::normal normal(m_M, std::sqrt(1.0 / x(0) / m_P)); double fx = boost::math::pdf(normal, x(1)) * boost::math::pdf(gamma, x(0)); double m = std::exp(x(1) + 0.5 / x(0)); result(0) = (m * m * (std::exp(1.0 / x(0)) - 1.0) + pow2(m - m_Mean)) * fx; @@ -343,26 +333,25 @@ class CVarianceKernel { class CProbabilityOfLessLikelySamples : core::CNonCopyable { public: CProbabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, bool isNonInformative, double offset, double mean, double precision, double shape, double rate) - : m_Calculation(calculation), m_WeightStyles(weightStyles), - m_Samples(samples), m_Weights(weights), + : m_Calculation(calculation), m_Samples(samples), m_Weights(weights), m_IsNonInformative(isNonInformative), m_Offset(offset), m_Mean(mean), m_Precision(precision), m_Shape(shape), m_Rate(rate), m_Tail(0) {} bool operator()(double x, double& result) const { + CJointProbabilityOfLessLikelySamples probability; maths_t::ETail tail = maths_t::E_UndeterminedTail; if (!evaluateFunctionOnJointDistribution( - m_WeightStyles, m_Samples, m_Weights, + m_Samples, m_Weights, boost::bind(CTools::CProbabilityOfLessLikelySample(m_Calculation), _1, _2, boost::ref(tail)), CJointProbabilityOfLessLikelySamples::SAddProbability(), m_IsNonInformative, @@ -383,9 +372,8 @@ class CProbabilityOfLessLikelySamples : core::CNonCopyable { private: maths_t::EProbabilityCalculation m_Calculation; - const TWeightStyleVec& m_WeightStyles; const TDouble1Vec& m_Samples; - const TDouble4Vec1Vec& m_Weights; + const TDoubleWeightsAry1Vec& m_Weights; bool m_IsNonInformative; double m_Offset; double m_Mean; @@ -422,23 +410,22 @@ class CProbabilityOfLessLikelySamples : core::CNonCopyable { //! a and b are the prior Gamma shape and rate, respectively. class CLogMarginalLikelihood : core::CNonCopyable { public: - CLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + CLogMarginalLikelihood(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double offset, double mean, double precision, double shape, double rate) - : m_WeightStyles(weightStyles), m_Samples(samples), m_Weights(weights), - m_Offset(offset), m_Mean(mean), m_Precision(precision), - m_Shape(shape), m_Rate(rate), m_NumberSamples(0.0), m_Scales(), - m_Constant(0.0), m_ErrorStatus(maths_t::E_FpNoErrors) { + : m_Samples(samples), m_Weights(weights), m_Offset(offset), m_Mean(mean), + m_Precision(precision), m_Shape(shape), m_Rate(rate), m_NumberSamples(0.0), + m_Scales(), m_Constant(0.0), m_ErrorStatus(maths_t::E_FpNoErrors) { this->precompute(); } //! Evaluate the log marginal likelihood at the offset \p x. bool operator()(double x, double& result) const { + if (m_ErrorStatus & maths_t::E_FpFailed) { return false; } @@ -448,7 +435,7 @@ class CLogMarginalLikelihood : core::CNonCopyable { try { for (std::size_t i = 0u; i < m_Samples.size(); ++i) { - double n = maths_t::countForUpdate(m_WeightStyles, m_Weights[i]); + double n = maths_t::countForUpdate(m_Weights[i]); double sample = m_Samples[i] + m_Offset + x; if (sample <= 0.0) { // Technically, the marginal likelihood is zero here @@ -508,15 +495,14 @@ class CLogMarginalLikelihood : core::CNonCopyable { try { double logVarianceScaleSum = 0.0; - if (maths_t::hasSeasonalVarianceScale(m_WeightStyles, m_Weights) || - maths_t::hasCountVarianceScale(m_WeightStyles, m_Weights)) { + if (maths_t::hasSeasonalVarianceScale(m_Weights) || + maths_t::hasCountVarianceScale(m_Weights)) { m_Scales.reserve(m_Weights.size()); double r = m_Rate / m_Shape; double s = std::exp(-r); for (std::size_t i = 0u; i < m_Weights.size(); ++i) { - double varianceScale = - maths_t::seasonalVarianceScale(m_WeightStyles, m_Weights[i]) * - maths_t::countVarianceScale(m_WeightStyles, m_Weights[i]); + double varianceScale = maths_t::seasonalVarianceScale(m_Weights[i]) * + maths_t::countVarianceScale(m_Weights[i]); // Get the scale and shift of the exponentiated Gaussian. if (varianceScale == 1.0) { @@ -533,7 +519,7 @@ class CLogMarginalLikelihood : core::CNonCopyable { double weightedNumberSamples = 0.0; for (std::size_t i = 0u; i < m_Weights.size(); ++i) { - double n = maths_t::countForUpdate(m_WeightStyles, m_Weights[i]); + double n = maths_t::countForUpdate(m_Weights[i]); m_NumberSamples += n; weightedNumberSamples += n / (m_Scales.empty() ? 1.0 : m_Scales[i].first); @@ -558,9 +544,8 @@ class CLogMarginalLikelihood : core::CNonCopyable { } private: - const TWeightStyleVec& m_WeightStyles; const TDouble1Vec& m_Samples; - const TDouble4Vec1Vec& m_Weights; + const TDoubleWeightsAry1Vec& m_Weights; double m_Offset; double m_Mean; double m_Precision; @@ -586,12 +571,10 @@ const double CLogMarginalLikelihood::LOG_2_PI = //! [n, n+1]. class CLogSampleSquareDeviation : core::CNonCopyable { public: - CLogSampleSquareDeviation(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + CLogSampleSquareDeviation(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double mean) - : m_WeightStyles(weightStyles), m_Samples(samples), m_Weights(weights), - m_Mean(mean) {} + : m_Samples(samples), m_Weights(weights), m_Mean(mean) {} bool operator()(double x, double& result) const { result = 0.0; @@ -600,7 +583,7 @@ class CLogSampleSquareDeviation : core::CNonCopyable { if (residual <= 0.0) { continue; } - double n = maths_t::countForUpdate(m_WeightStyles, m_Weights[i]); + double n = maths_t::countForUpdate(m_Weights[i]); residual = std::log(residual + x) - m_Mean; result += n * pow2(residual); } @@ -608,9 +591,8 @@ class CLogSampleSquareDeviation : core::CNonCopyable { } private: - const TWeightStyleVec& m_WeightStyles; const TDouble1Vec& m_Samples; - const TDouble4Vec1Vec& m_Weights; + const TDoubleWeightsAry1Vec& m_Weights; double m_Mean; }; @@ -702,25 +684,22 @@ bool CLogNormalMeanPrecConjugate::needsOffset() const { return true; } -double CLogNormalMeanPrecConjugate::adjustOffset(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights) { +double CLogNormalMeanPrecConjugate::adjustOffset(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights) { COffsetCost cost(*this); CApplyOffset apply(*this); - return this->adjustOffsetWithCost(weightStyles, samples, weights, cost, apply); + return this->adjustOffsetWithCost(samples, weights, cost, apply); } double CLogNormalMeanPrecConjugate::offset() const { return m_Offset; } -void CLogNormalMeanPrecConjugate::addSamples(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights) { +void CLogNormalMeanPrecConjugate::addSamples(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights) { if (samples.empty()) { return; } - if (samples.size() != weights.size()) { LOG_ERROR(<< "Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '" @@ -728,8 +707,8 @@ void CLogNormalMeanPrecConjugate::addSamples(const TWeightStyleVec& weightStyles return; } - this->adjustOffset(weightStyles, samples, weights); - this->CPrior::addSamples(weightStyles, samples, weights); + this->adjustOffset(samples, weights); + this->CPrior::addSamples(samples, weights); // We assume the data are described by X = exp(Y) - u where, Y is normally // distributed and u is a constant offset. @@ -794,78 +773,69 @@ void CLogNormalMeanPrecConjugate::addSamples(const TWeightStyleVec& weightStyles double r = m_GammaRate / m_GammaShape; double s = std::exp(-r); - try { - if (this->isInteger()) { - // Filled in with samples rescaled to have approximately unit - // variance scale. - TDouble1Vec scaledSamples; - scaledSamples.resize(samples.size(), 1.0); - - TMeanAccumulator logSamplesMean_; - for (std::size_t i = 0u; i < samples.size(); ++i) { - double n = maths_t::countForUpdate(weightStyles, weights[i]); - double varianceScale = - maths_t::seasonalVarianceScale(weightStyles, weights[i]) * - maths_t::countVarianceScale(weightStyles, weights[i]); - double x = samples[i] + m_Offset; - numberSamples += n; - double t = varianceScale == 1.0 - ? r - : r + std::log(s + varianceScale * (1.0 - s)); - double shift = (r - t) / 2.0; - double scale = r == t ? 1.0 : t / r; - scaledSamples[i] = scale; - double logxInvPlus1 = std::log(1.0 / x + 1.0); - double logxPlus1 = std::log(x + 1.0); - logSamplesMean_.add(x * logxInvPlus1 + logxPlus1 - 1.0 - shift, n / scale); - } - scaledNumberSamples = CBasicStatistics::count(logSamplesMean_); - logSamplesMean = CBasicStatistics::mean(logSamplesMean_); - - double mean = (m_GaussianPrecision * m_GaussianMean + - scaledNumberSamples * logSamplesMean) / - (m_GaussianPrecision + scaledNumberSamples); - for (std::size_t i = 0u; i < scaledSamples.size(); ++i) { - double scale = scaledSamples[i]; - scaledSamples[i] = - scale == 1.0 - ? samples[i] + m_Offset - : std::exp(mean + (std::log(samples[i] + m_Offset) - mean) / - std::sqrt(scale)); - } + if (this->isInteger()) { + // Filled in with samples rescaled to have approximately unit + // variance scale. + TDouble1Vec scaledSamples; + scaledSamples.resize(samples.size(), 1.0); + + TMeanAccumulator logSamplesMean_; + for (std::size_t i = 0u; i < samples.size(); ++i) { + double n = maths_t::countForUpdate(weights[i]); + double varianceScale = maths_t::seasonalVarianceScale(weights[i]) * + maths_t::countVarianceScale(weights[i]); + double x = samples[i] + m_Offset; + numberSamples += n; + double t = varianceScale == 1.0 + ? r + : r + std::log(s + varianceScale * (1.0 - s)); + double shift = (r - t) / 2.0; + double scale = r == t ? 1.0 : t / r; + scaledSamples[i] = scale; + double logxInvPlus1 = std::log(1.0 / x + 1.0); + double logxPlus1 = std::log(x + 1.0); + logSamplesMean_.add(x * logxInvPlus1 + logxPlus1 - 1.0 - shift, n / scale); + } + scaledNumberSamples = CBasicStatistics::count(logSamplesMean_); + logSamplesMean = CBasicStatistics::mean(logSamplesMean_); + + double mean = (m_GaussianPrecision * m_GaussianMean + scaledNumberSamples * logSamplesMean) / + (m_GaussianPrecision + scaledNumberSamples); + for (std::size_t i = 0u; i < scaledSamples.size(); ++i) { + double scale = scaledSamples[i]; + scaledSamples[i] = + scale == 1.0 ? samples[i] + m_Offset + : std::exp(mean + (std::log(samples[i] + m_Offset) - mean) / + std::sqrt(scale)); + } - detail::CLogSampleSquareDeviation deviationFunction( - weightStyles, scaledSamples, weights, logSamplesMean); - CIntegration::gaussLegendre( - deviationFunction, 0.0, 1.0, logSamplesSquareDeviation); - } else { - TMeanVarAccumulator logSamplesMoments; - for (std::size_t i = 0u; i < samples.size(); ++i) { - double n = maths_t::countForUpdate(weightStyles, weights[i]); - double varianceScale = - maths_t::seasonalVarianceScale(weightStyles, weights[i]) * - maths_t::countVarianceScale(weightStyles, weights[i]); - double x = samples[i] + m_Offset; - if (x <= 0.0) { - LOG_ERROR(<< "Discarding " << x << " it's not log-normal"); - continue; - } - numberSamples += n; - double t = varianceScale == 1.0 - ? r - : r + std::log(s + varianceScale * (1.0 - s)); - double scale = r == t ? 1.0 : t / r; - double shift = (r - t) / 2.0; - logSamplesMoments.add(std::log(x) - shift, n / scale); + detail::CLogSampleSquareDeviation deviationFunction(scaledSamples, weights, + logSamplesMean); + CIntegration::gaussLegendre( + deviationFunction, 0.0, 1.0, logSamplesSquareDeviation); + } else { + TMeanVarAccumulator logSamplesMoments; + for (std::size_t i = 0u; i < samples.size(); ++i) { + double n = maths_t::countForUpdate(weights[i]); + double varianceScale = maths_t::seasonalVarianceScale(weights[i]) * + maths_t::countVarianceScale(weights[i]); + double x = samples[i] + m_Offset; + if (x <= 0.0) { + LOG_ERROR(<< "Discarding " << x << " it's not log-normal"); + continue; } - scaledNumberSamples = CBasicStatistics::count(logSamplesMoments); - logSamplesMean = CBasicStatistics::mean(logSamplesMoments); - logSamplesSquareDeviation = (scaledNumberSamples - 1.0) * - CBasicStatistics::variance(logSamplesMoments); + numberSamples += n; + double t = varianceScale == 1.0 + ? r + : r + std::log(s + varianceScale * (1.0 - s)); + double scale = r == t ? 1.0 : t / r; + double shift = (r - t) / 2.0; + logSamplesMoments.add(std::log(x) - shift, n / scale); } - } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to update likelihood: " << e.what()); - return; + scaledNumberSamples = CBasicStatistics::count(logSamplesMoments); + logSamplesMean = CBasicStatistics::mean(logSamplesMoments); + logSamplesSquareDeviation = (scaledNumberSamples - 1.0) * + CBasicStatistics::variance(logSamplesMoments); } m_GammaShape += 0.5 * numberSamples; @@ -933,7 +903,6 @@ void CLogNormalMeanPrecConjugate::propagateForwardsByTime(double time) { LOG_ERROR(<< "Bad propagation time " << time); return; } - if (this->isNonInformative()) { // Nothing to be done. return; @@ -970,15 +939,15 @@ void CLogNormalMeanPrecConjugate::propagateForwardsByTime(double time) { CLogNormalMeanPrecConjugate::TDoubleDoublePr CLogNormalMeanPrecConjugate::marginalLikelihoodSupport() const { - return std::make_pair(-m_Offset, boost::numeric::bounds::highest()); + return {-m_Offset, boost::numeric::bounds::highest()}; } double CLogNormalMeanPrecConjugate::marginalLikelihoodMean() const { return this->isInteger() ? this->mean() - 0.5 : this->mean(); } -double CLogNormalMeanPrecConjugate::marginalLikelihoodMode(const TWeightStyleVec& weightStyles, - const TDouble4Vec& weights) const { +double CLogNormalMeanPrecConjugate::marginalLikelihoodMode(const TDoubleWeightsAry& weights) const { + if (this->isNonInformative()) { return std::exp(m_GaussianMean) - m_Offset; } @@ -987,13 +956,8 @@ double CLogNormalMeanPrecConjugate::marginalLikelihoodMode(const TWeightStyleVec // is log-normally distributed and for small precision it is log-t. // See evaluateFunctionOnJointDistribution for more discussion. - double varianceScale = 1.0; - try { - varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) * - maths_t::countVarianceScale(weightStyles, weights); - } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to get variance scale: " << e.what()); - } + double varianceScale = maths_t::seasonalVarianceScale(weights) * + maths_t::countVarianceScale(weights); try { double r = m_GammaRate / m_GammaShape; double s = std::exp(-r); @@ -1003,7 +967,7 @@ double CLogNormalMeanPrecConjugate::marginalLikelihoodMode(const TWeightStyleVec m_GammaRate, m_GammaShape, location, scale); LOG_TRACE(<< "location = " << location << ", scale = " << scale); if (m_GammaShape > MINIMUM_LOGNORMAL_SHAPE) { - boost::math::lognormal_distribution<> logNormal(location, scale); + boost::math::lognormal logNormal(location, scale); return boost::math::mode(logNormal) - m_Offset; } CLogTDistribution logt(2.0 * m_GammaShape, location, scale); @@ -1022,8 +986,8 @@ double CLogNormalMeanPrecConjugate::marginalLikelihoodMode(const TWeightStyleVec return (normalPrecision == 0.0 ? 0.0 : std::exp(normalMean - 1.0 / normalPrecision)) - m_Offset; } -double CLogNormalMeanPrecConjugate::marginalLikelihoodVariance(const TWeightStyleVec& weightStyles, - const TDouble4Vec& weights) const { +double CLogNormalMeanPrecConjugate::marginalLikelihoodVariance(const TDoubleWeightsAry& weights) const { + if (this->isNonInformative()) { return boost::numeric::bounds::highest(); } @@ -1048,13 +1012,8 @@ double CLogNormalMeanPrecConjugate::marginalLikelihoodVariance(const TWeightStyl // // Note that b / a > 0 so this is necessarily non-negative. - double varianceScale = 1.0; - try { - varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) * - maths_t::countVarianceScale(weightStyles, weights); - } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to get variance scale: " << e.what()); - } + double varianceScale = maths_t::seasonalVarianceScale(weights) * + maths_t::countVarianceScale(weights); double vh = std::exp(2.0 * m_GaussianMean + m_GammaRate / m_GammaShape * (2.0 / m_GaussianPrecision + 1.0)) * (std::exp(m_GammaRate / m_GammaShape) - 1.0); @@ -1069,8 +1028,7 @@ double CLogNormalMeanPrecConjugate::marginalLikelihoodVariance(const TWeightStyl a[0] = boost::math::quantile(gamma, 0.03); b[0] = boost::math::quantile(gamma, 0.97); - boost::math::normal_distribution<> normal( - m_GaussianMean, 1.0 / a[0] / m_GaussianPrecision); + boost::math::normal normal(m_GaussianMean, 1.0 / a[0] / m_GaussianPrecision); a[1] = boost::math::quantile(normal, 0.03); b[1] = boost::math::quantile(normal, 0.97); @@ -1091,8 +1049,7 @@ double CLogNormalMeanPrecConjugate::marginalLikelihoodVariance(const TWeightStyl CLogNormalMeanPrecConjugate::TDoubleDoublePr CLogNormalMeanPrecConjugate::marginalLikelihoodConfidenceInterval(double percentage, - const TWeightStyleVec& weightStyles, - const TDouble4Vec& weights) const { + const TDoubleWeightsAry& weights) const { if (this->isNonInformative()) { return this->marginalLikelihoodSupport(); } @@ -1103,8 +1060,8 @@ CLogNormalMeanPrecConjugate::marginalLikelihoodConfidenceInterval(double percent // We use the fact that the marginal likelihood is a log-t distribution. try { - double varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) * - maths_t::countVarianceScale(weightStyles, weights); + double varianceScale = maths_t::seasonalVarianceScale(weights) * + maths_t::countVarianceScale(weights); double r = m_GammaRate / m_GammaShape; double s = std::exp(-r); @@ -1115,7 +1072,7 @@ CLogNormalMeanPrecConjugate::marginalLikelihoodConfidenceInterval(double percent LOG_TRACE(<< "location = " << location << ", scale = " << scale); if (m_GammaShape > MINIMUM_LOGNORMAL_SHAPE) { - boost::math::lognormal_distribution<> logNormal(location, scale); + boost::math::lognormal logNormal(location, scale); double x1 = boost::math::quantile(logNormal, (1.0 - percentage) / 2.0) - m_Offset - (this->isInteger() ? 0.5 : 0.0); double x2 = percentage > 0.0 @@ -1123,7 +1080,7 @@ CLogNormalMeanPrecConjugate::marginalLikelihoodConfidenceInterval(double percent m_Offset - (this->isInteger() ? 0.5 : 0.0) : x1; LOG_TRACE(<< "x1 = " << x1 << ", x2 = " << x2); - return std::make_pair(x1, x2); + return {x1, x2}; } CLogTDistribution logt(2.0 * m_GammaShape, location, scale); double x1 = quantile(logt, (1.0 - percentage) / 2.0) - m_Offset - @@ -1132,7 +1089,7 @@ CLogNormalMeanPrecConjugate::marginalLikelihoodConfidenceInterval(double percent m_Offset - (this->isInteger() ? 0.5 : 0.0) : x1; LOG_TRACE(<< "x1 = " << x1 << ", x2 = " << x2); - return std::make_pair(x1, x2); + return {x1, x2}; } catch (const std::exception& e) { LOG_ERROR(<< "Failed to compute confidence interval: " << e.what()); } @@ -1141,9 +1098,8 @@ CLogNormalMeanPrecConjugate::marginalLikelihoodConfidenceInterval(double percent } maths_t::EFloatingPointErrorStatus -CLogNormalMeanPrecConjugate::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +CLogNormalMeanPrecConjugate::jointLogMarginalLikelihood(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& result) const { result = 0.0; @@ -1151,7 +1107,6 @@ CLogNormalMeanPrecConjugate::jointLogMarginalLikelihood(const TWeightStyleVec& w LOG_ERROR(<< "Can't compute likelihood for empty sample set"); return maths_t::E_FpFailed; } - if (samples.size() != weights.size()) { LOG_ERROR(<< "Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '" @@ -1173,8 +1128,8 @@ CLogNormalMeanPrecConjugate::jointLogMarginalLikelihood(const TWeightStyleVec& w } detail::CLogMarginalLikelihood logMarginalLikelihood( - weightStyles, samples, weights, m_Offset, m_GaussianMean, - m_GaussianPrecision, m_GammaShape, m_GammaRate); + samples, weights, m_Offset, m_GaussianMean, m_GaussianPrecision, + m_GammaShape, m_GammaRate); if (this->isInteger()) { CIntegration::logGaussLegendre( logMarginalLikelihood, 0.0, 1.0, result); @@ -1203,7 +1158,6 @@ void CLogNormalMeanPrecConjugate::sampleMarginalLikelihood(std::size_t numberSam if (numberSamples == 0 || this->numberSamples() == 0.0) { return; } - if (this->isNonInformative()) { // We can't sample the marginal likelihood directly. This should // only happen if we've had one sample so just return that sample. @@ -1251,7 +1205,7 @@ void CLogNormalMeanPrecConjugate::sampleMarginalLikelihood(std::size_t numberSam double scale = std::sqrt((m_GaussianPrecision + 1.0) / m_GaussianPrecision * m_GammaRate / m_GammaShape); try { - boost::math::lognormal_distribution<> lognormal(m_GaussianMean, scale); + boost::math::lognormal lognormal(m_GaussianMean, scale); double mean = boost::math::mean(lognormal); @@ -1307,17 +1261,16 @@ void CLogNormalMeanPrecConjugate::sampleMarginalLikelihood(std::size_t numberSam } } -bool CLogNormalMeanPrecConjugate::minusLogJointCdf(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +bool CLogNormalMeanPrecConjugate::minusLogJointCdf(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const { + using TMinusLogCdf = detail::CEvaluateOnSamples; lowerBound = upperBound = 0.0; - TMinusLogCdf minusLogCdf(weightStyles, samples, weights, - this->isNonInformative(), m_Offset, m_GaussianMean, + TMinusLogCdf minusLogCdf(samples, weights, this->isNonInformative(), m_Offset, m_GaussianMean, m_GaussianPrecision, m_GammaShape, m_GammaRate); if (this->isInteger()) { @@ -1347,18 +1300,18 @@ bool CLogNormalMeanPrecConjugate::minusLogJointCdf(const TWeightStyleVec& weight return true; } -bool CLogNormalMeanPrecConjugate::minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +bool CLogNormalMeanPrecConjugate::minusLogJointCdfComplement(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const { + using TMinusLogCdfComplement = detail::CEvaluateOnSamples; lowerBound = upperBound = 0.0; TMinusLogCdfComplement minusLogCdfComplement( - weightStyles, samples, weights, this->isNonInformative(), m_Offset, - m_GaussianMean, m_GaussianPrecision, m_GammaShape, m_GammaRate); + samples, weights, this->isNonInformative(), m_Offset, m_GaussianMean, + m_GaussianPrecision, m_GammaShape, m_GammaRate); if (this->isInteger()) { // If the data are discrete we compute the approximate expectation @@ -1389,18 +1342,18 @@ bool CLogNormalMeanPrecConjugate::minusLogJointCdfComplement(const TWeightStyleV bool CLogNormalMeanPrecConjugate::probabilityOfLessLikelySamples( maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound, maths_t::ETail& tail) const { + lowerBound = upperBound = 0.0; tail = maths_t::E_UndeterminedTail; detail::CProbabilityOfLessLikelySamples probability( - calculation, weightStyles, samples, weights, this->isNonInformative(), - m_Offset, m_GaussianMean, m_GaussianPrecision, m_GammaShape, m_GammaRate); + calculation, samples, weights, this->isNonInformative(), m_Offset, + m_GaussianMean, m_GaussianPrecision, m_GammaShape, m_GammaRate); if (this->isInteger()) { // If the data are discrete we compute the approximate expectation @@ -1438,6 +1391,7 @@ bool CLogNormalMeanPrecConjugate::isNonInformative() const { } void CLogNormalMeanPrecConjugate::print(const std::string& indent, std::string& result) const { + result += core_t::LINE_ENDING + indent + "log-normal "; if (this->isNonInformative()) { result += "non-informative"; @@ -1447,7 +1401,7 @@ void CLogNormalMeanPrecConjugate::print(const std::string& indent, std::string& double scale = std::sqrt((m_GaussianPrecision + 1.0) / m_GaussianPrecision * m_GammaRate / m_GammaShape); try { - boost::math::lognormal_distribution<> lognormal(m_GaussianMean, scale); + boost::math::lognormal lognormal(m_GaussianMean, scale); double mean = boost::math::mean(lognormal); double deviation = boost::math::standard_deviation(lognormal); result += "mean = " + core::CStringUtils::typeToStringPretty(mean - m_Offset) + @@ -1458,6 +1412,7 @@ void CLogNormalMeanPrecConjugate::print(const std::string& indent, std::string& } std::string CLogNormalMeanPrecConjugate::printJointDensityFunction() const { + if (this->isNonInformative()) { // The non-informative prior is improper and effectively 0 everywhere. return std::string(); @@ -1471,7 +1426,7 @@ std::string CLogNormalMeanPrecConjugate::printJointDensityFunction() const { boost::math::gamma_distribution<> gamma(m_GammaShape, 1.0 / m_GammaRate); double precision = m_GaussianPrecision * this->normalPrecision(); - boost::math::normal_distribution<> gaussian(m_GaussianMean, 1.0 / std::sqrt(precision)); + boost::math::normal gaussian(m_GaussianMean, 1.0 / std::sqrt(precision)); double xStart = boost::math::quantile(gamma, (1.0 - RANGE) / 2.0); double xEnd = boost::math::quantile(gamma, (1.0 + RANGE) / 2.0); @@ -1501,7 +1456,7 @@ std::string CLogNormalMeanPrecConjugate::printJointDensityFunction() const { y = yStart; for (unsigned int j = 0u; j < POINTS; ++j, y += yIncrement) { double conditionalPrecision = m_GaussianPrecision * x; - boost::math::normal_distribution<> conditionalGaussian( + boost::math::normal conditionalGaussian( m_GaussianMean, 1.0 / std::sqrt(conditionalPrecision)); pdf << (CTools::safePdf(gamma, x) * CTools::safePdf(conditionalGaussian, y)) @@ -1552,6 +1507,7 @@ double CLogNormalMeanPrecConjugate::normalMean() const { } double CLogNormalMeanPrecConjugate::normalPrecision() const { + if (this->isNonInformative()) { return 0.0; } @@ -1569,9 +1525,10 @@ double CLogNormalMeanPrecConjugate::normalPrecision() const { CLogNormalMeanPrecConjugate::TDoubleDoublePr CLogNormalMeanPrecConjugate::confidenceIntervalNormalMean(double percentage) const { + if (this->isNonInformative()) { - return std::make_pair(boost::numeric::bounds::lowest(), - boost::numeric::bounds::highest()); + return {boost::numeric::bounds::lowest(), + boost::numeric::bounds::highest()}; } // Compute the symmetric confidence interval around the median of the @@ -1596,7 +1553,7 @@ CLogNormalMeanPrecConjugate::confidenceIntervalNormalMean(double percentage) con double lowerPercentile = 0.5 * (1.0 - percentage); double upperPercentile = 0.5 * (1.0 + percentage); - boost::math::students_t_distribution<> students(2.0 * m_GammaShape); + boost::math::students_t students(2.0 * m_GammaShape); double xLower = boost::math::quantile(students, lowerPercentile); double xUpper = boost::math::quantile(students, upperPercentile); @@ -1606,14 +1563,15 @@ CLogNormalMeanPrecConjugate::confidenceIntervalNormalMean(double percentage) con xLower = m_GaussianMean + xLower / std::sqrt(precision); xUpper = m_GaussianMean + xUpper / std::sqrt(precision); - return std::make_pair(xLower, xUpper); + return {xLower, xUpper}; } CLogNormalMeanPrecConjugate::TDoubleDoublePr CLogNormalMeanPrecConjugate::confidenceIntervalNormalPrecision(double percentage) const { + if (this->isNonInformative()) { - return std::make_pair(boost::numeric::bounds::lowest(), - boost::numeric::bounds::highest()); + return {boost::numeric::bounds::lowest(), + boost::numeric::bounds::highest()}; } percentage /= 100.0; @@ -1623,8 +1581,8 @@ CLogNormalMeanPrecConjugate::confidenceIntervalNormalPrecision(double percentage // The marginal prior distribution for the precision is gamma. boost::math::gamma_distribution<> gamma(m_GammaShape, 1.0 / m_GammaRate); - return std::make_pair(boost::math::quantile(gamma, lowerPercentile), - boost::math::quantile(gamma, upperPercentile)); + return {boost::math::quantile(gamma, lowerPercentile), + boost::math::quantile(gamma, upperPercentile)}; } bool CLogNormalMeanPrecConjugate::equalTolerance(const CLogNormalMeanPrecConjugate& rhs, @@ -1638,6 +1596,7 @@ bool CLogNormalMeanPrecConjugate::equalTolerance(const CLogNormalMeanPrecConjuga } double CLogNormalMeanPrecConjugate::mean() const { + if (this->isNonInformative()) { return std::exp(m_GaussianMean) - m_Offset; } diff --git a/lib/maths/CModel.cc b/lib/maths/CModel.cc index c414d78b85..5fd2ab1d2d 100644 --- a/lib/maths/CModel.cc +++ b/lib/maths/CModel.cc @@ -121,8 +121,7 @@ double CModelParams::probabilityBucketEmpty() const { CModelAddSamplesParams::CModelAddSamplesParams() : m_Type(maths_t::E_MixedData), m_IsNonNegative(false), - m_PropagationInterval(1.0), m_WeightStyles(nullptr), - m_TrendWeights(nullptr), m_PriorWeights(nullptr) { + m_PropagationInterval(1.0), m_TrendWeights(nullptr), m_PriorWeights(nullptr) { } CModelAddSamplesParams& CModelAddSamplesParams::integer(bool integer) { @@ -153,36 +152,30 @@ double CModelAddSamplesParams::propagationInterval() const { } CModelAddSamplesParams& -CModelAddSamplesParams::weightStyles(const maths_t::TWeightStyleVec& styles) { - m_WeightStyles = &styles; - return *this; -} - -const maths_t::TWeightStyleVec& CModelAddSamplesParams::weightStyles() const { - return *m_WeightStyles; -} - -CModelAddSamplesParams& CModelAddSamplesParams::trendWeights(const TDouble2Vec4VecVec& weights) { +CModelAddSamplesParams::trendWeights(const TDouble2VecWeightsAryVec& weights) { m_TrendWeights = &weights; return *this; } -const CModelAddSamplesParams::TDouble2Vec4VecVec& CModelAddSamplesParams::trendWeights() const { +const CModelAddSamplesParams::TDouble2VecWeightsAryVec& +CModelAddSamplesParams::trendWeights() const { return *m_TrendWeights; } -CModelAddSamplesParams& CModelAddSamplesParams::priorWeights(const TDouble2Vec4VecVec& weights) { +CModelAddSamplesParams& +CModelAddSamplesParams::priorWeights(const TDouble2VecWeightsAryVec& weights) { m_PriorWeights = &weights; return *this; } -const CModelAddSamplesParams::TDouble2Vec4VecVec& CModelAddSamplesParams::priorWeights() const { +const CModelAddSamplesParams::TDouble2VecWeightsAryVec& +CModelAddSamplesParams::priorWeights() const { return *m_PriorWeights; } CModelProbabilityParams::CModelProbabilityParams() : m_Tag(0), m_SeasonalConfidenceInterval(DEFAULT_SEASONAL_CONFIDENCE_INTERVAL), - m_WeightStyles(nullptr), m_UpdateAnomalyModel(true) { + m_UpdateAnomalyModel(true) { } CModelProbabilityParams& CModelProbabilityParams::tag(std::size_t tag) { @@ -227,30 +220,23 @@ const CModelProbabilityParams::TBool2Vec1Vec& CModelProbabilityParams::bucketEmp } CModelProbabilityParams& -CModelProbabilityParams::weightStyles(const maths_t::TWeightStyleVec& styles) { - m_WeightStyles = &styles; - return *this; -} - -const maths_t::TWeightStyleVec& CModelProbabilityParams::weightStyles() const { - return *m_WeightStyles; -} - -CModelProbabilityParams& CModelProbabilityParams::addWeights(const TDouble2Vec4Vec& weights) { +CModelProbabilityParams::addWeights(const TDouble2VecWeightsAry& weights) { m_Weights.push_back(weights); return *this; } -CModelProbabilityParams& CModelProbabilityParams::weights(const TDouble2Vec4Vec1Vec& weights) { +CModelProbabilityParams& +CModelProbabilityParams::weights(const TDouble2VecWeightsAry1Vec& weights) { m_Weights = weights; return *this; } -const CModelProbabilityParams::TDouble2Vec4Vec1Vec& CModelProbabilityParams::weights() const { +const CModelProbabilityParams::TDouble2VecWeightsAry1Vec& +CModelProbabilityParams::weights() const { return m_Weights; } -CModelProbabilityParams::TDouble2Vec4Vec1Vec& CModelProbabilityParams::weights() { +CModelProbabilityParams::TDouble2VecWeightsAry1Vec& CModelProbabilityParams::weights() { return m_Weights; } @@ -384,21 +370,18 @@ CModelStub::TSize2Vec1Vec CModelStub::correlates() const { } CModelStub::TDouble2Vec CModelStub::mode(core_t::TTime /*time*/, - const maths_t::TWeightStyleVec& /*weightStyles*/, - const TDouble2Vec4Vec& /*weights*/) const { + const TDouble2VecWeightsAry& /*weights*/) const { return {}; } CModelStub::TDouble2Vec1Vec CModelStub::correlateModes(core_t::TTime /*time*/, - const maths_t::TWeightStyleVec& /*weightStyles*/, - const TDouble2Vec4Vec1Vec& /*weights*/) const { + const TDouble2VecWeightsAry1Vec& /*weights*/) const { return {}; } CModelStub::TDouble2Vec1Vec -CModelStub::residualModes(const maths_t::TWeightStyleVec& /*weightStyles*/, - const TDouble2Vec4Vec& /*weights*/) const { +CModelStub::residualModes(const TDouble2VecWeightsAry& /*weights*/) const { return {}; } @@ -427,8 +410,7 @@ CModelStub::TDouble2Vec CModelStub::predict(core_t::TTime /*time*/, CModelStub::TDouble2Vec3Vec CModelStub::confidenceInterval(core_t::TTime /*time*/, double /*confidenceInterval*/, - const maths_t::TWeightStyleVec& /*weightStyles*/, - const TDouble2Vec4Vec& /*weights*/) const { + const TDouble2VecWeightsAry& /*weights*/) const { return {}; } diff --git a/lib/maths/CMultimodalPrior.cc b/lib/maths/CMultimodalPrior.cc index 93df6067d5..3a5cc55c4f 100644 --- a/lib/maths/CMultimodalPrior.cc +++ b/lib/maths/CMultimodalPrior.cc @@ -231,9 +231,8 @@ bool CMultimodalPrior::needsOffset() const { return false; } -double CMultimodalPrior::adjustOffset(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights) { +double CMultimodalPrior::adjustOffset(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights) { double result = 0.0; if (this->needsOffset()) { @@ -244,9 +243,7 @@ double CMultimodalPrior::adjustOffset(const TWeightStyleVec& weightStyles, auto j = std::find_if(m_Modes.begin(), m_Modes.end(), CSetTools::CIndexInSet(cluster.first)); if (j != m_Modes.end()) { - result += j->s_Prior->adjustOffset( - weightStyles, TDouble1Vec(1, samples[i]), - TDouble4Vec1Vec(1, weights[i])); + result += j->s_Prior->adjustOffset({samples[i]}, {weights[i]}); } } } @@ -263,13 +260,11 @@ double CMultimodalPrior::offset() const { return offset; } -void CMultimodalPrior::addSamples(const TWeightStyleVec& weightStyles_, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights) { +void CMultimodalPrior::addSamples(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights) { if (samples.empty()) { return; } - if (samples.size() != weights.size()) { LOG_ERROR(<< "Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '" @@ -277,7 +272,7 @@ void CMultimodalPrior::addSamples(const TWeightStyleVec& weightStyles_, return; } - this->adjustOffset(weightStyles_, samples, weights); + this->adjustOffset(samples, weights); // This uses a clustering methodology (defined by m_Clusterer) // to assign each sample to a cluster. Each cluster has its own @@ -301,31 +296,14 @@ void CMultimodalPrior::addSamples(const TWeightStyleVec& weightStyles_, // Declared outside the loop to minimize the number of times it // is initialized. - TWeightStyleVec weightStyles(weightStyles_); TDouble1Vec sample(1); - TDouble4Vec1Vec weight(1); + TDoubleWeightsAry1Vec weight(1); TSizeDoublePr2Vec clusters; - std::size_t indices[maths_t::NUMBER_WEIGHT_STYLES]; - std::size_t missing = weightStyles.size() + 1; - std::fill_n(indices, maths_t::NUMBER_WEIGHT_STYLES, missing); - for (std::size_t i = 0u; i < weightStyles.size(); ++i) { - indices[weightStyles[i]] = i; - } - std::size_t seasonal = indices[maths_t::E_SampleSeasonalVarianceScaleWeight]; - std::size_t count = indices[maths_t::E_SampleCountWeight]; - std::size_t winsorisation = indices[maths_t::E_SampleWinsorisationWeight]; - if (count == missing) { - count = weightStyles.size(); - weightStyles.push_back(maths_t::E_SampleCountWeight); - } - try { - bool hasSeasonalScale = !this->isNonInformative() && seasonal != missing; - double mean = (!this->isNonInformative() && - maths_t::hasSeasonalVarianceScale(weightStyles_, weights)) - ? this->marginalLikelihoodMean() - : 0.0; + bool hasSeasonalScale = !this->isNonInformative() && + maths_t::hasSeasonalVarianceScale(weights); + double mean = hasSeasonalScale ? this->marginalLikelihoodMean() : 0.0; for (std::size_t i = 0u; i < samples.size(); ++i) { double x = samples[i]; @@ -334,22 +312,22 @@ void CMultimodalPrior::addSamples(const TWeightStyleVec& weightStyles_, continue; } if (hasSeasonalScale) { - x = mean + (x - mean) / std::sqrt(weights[i][seasonal]); + x = mean + (x - mean) / + std::sqrt(maths_t::seasonalVarianceScale(weights[i])); } sample[0] = x; weight[0] = weights[i]; - weight[0].resize(weightStyles.size(), 1.0); - if (seasonal != missing) { - weight[0][seasonal] = 1.0; - } + maths_t::setSeasonalVarianceScale(1.0, weight[0]); clusters.clear(); - m_Clusterer->add(x, clusters, weight[0][count]); + m_Clusterer->add(x, clusters, maths_t::count(weight[0])); - double Z = std::accumulate( - m_Modes.begin(), m_Modes.end(), weight[0][count], - [](double sum, const TMode& mode) { return sum + mode.weight(); }); + auto addModeWeight = [](double sum, const TMode& mode) { + return sum + mode.weight(); + }; + double Z = std::accumulate(m_Modes.begin(), m_Modes.end(), + maths_t::count(weight[0]), addModeWeight); double n = 0.0; for (const auto& cluster : clusters) { @@ -360,14 +338,15 @@ void CMultimodalPrior::addSamples(const TWeightStyleVec& weightStyles_, m_Modes.emplace_back(cluster.first, m_SeedPrior); k = m_Modes.end() - 1; } - weight[0][count] = cluster.second; - if (winsorisation != missing) { - double& ww = weight[0][winsorisation]; + maths_t::setCount(cluster.second, weight[0]); + if (maths_t::isWinsorised(weight)) { + double ww = maths_t::winsorisationWeight(weight[0]); double f = (k->weight() + cluster.second) / Z; - ww = std::max(1.0 - (1.0 - ww) / f, ww * f); + maths_t::setWinsorisationWeight( + std::max(1.0 - (1.0 - ww) / f, ww * f), weight[0]); } - k->s_Prior->addSamples(weightStyles, sample, weight); - n += maths_t::countForUpdate(weightStyles, weight[0]); + k->s_Prior->addSamples(sample, weight); + n += maths_t::countForUpdate(weight[0]); } this->addSamples(n); } @@ -381,7 +360,6 @@ void CMultimodalPrior::propagateForwardsByTime(double time) { LOG_ERROR(<< "Bad propagation time " << time); return; } - if (this->isNonInformative()) { // Nothing to be done. return; @@ -412,6 +390,7 @@ double CMultimodalPrior::marginalLikelihoodMean() const { } double CMultimodalPrior::nearestMarginalLikelihoodMean(double value) const { + if (m_Modes.empty()) { return 0.0; } @@ -429,53 +408,47 @@ double CMultimodalPrior::nearestMarginalLikelihoodMean(double value) const { return result; } -double CMultimodalPrior::marginalLikelihoodMode(const TWeightStyleVec& weightStyles, - const TDouble4Vec& weights) const { - return CMultimodalPriorUtils::marginalLikelihoodMode(m_Modes, weightStyles, weights); +double CMultimodalPrior::marginalLikelihoodMode(const TDoubleWeightsAry& weights) const { + return CMultimodalPriorUtils::marginalLikelihoodMode(m_Modes, weights); } CMultimodalPrior::TDouble1Vec -CMultimodalPrior::marginalLikelihoodModes(const TWeightStyleVec& weightStyles, - const TDouble4Vec& weights) const { +CMultimodalPrior::marginalLikelihoodModes(const TDoubleWeightsAry& weights) const { TDouble1Vec result(m_Modes.size()); for (std::size_t i = 0u; i < m_Modes.size(); ++i) { - result[i] = m_Modes[i].s_Prior->marginalLikelihoodMode(weightStyles, weights); + result[i] = m_Modes[i].s_Prior->marginalLikelihoodMode(weights); } return result; } -double CMultimodalPrior::marginalLikelihoodVariance(const TWeightStyleVec& weightStyles, - const TDouble4Vec& weights) const { - return CMultimodalPriorUtils::marginalLikelihoodVariance(m_Modes, weightStyles, weights); +double CMultimodalPrior::marginalLikelihoodVariance(const TDoubleWeightsAry& weights) const { + return CMultimodalPriorUtils::marginalLikelihoodVariance(m_Modes, weights); } TDoubleDoublePr CMultimodalPrior::marginalLikelihoodConfidenceInterval(double percentage, - const TWeightStyleVec& weightStyles, - const TDouble4Vec& weights) const { + const TDoubleWeightsAry& weights) const { return CMultimodalPriorUtils::marginalLikelihoodConfidenceInterval( - *this, m_Modes, percentage, weightStyles, weights); + *this, m_Modes, percentage, weights); } maths_t::EFloatingPointErrorStatus -CMultimodalPrior::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +CMultimodalPrior::jointLogMarginalLikelihood(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& result) const { + result = 0.0; if (samples.empty()) { LOG_ERROR(<< "Can't compute likelihood for empty sample set"); return maths_t::E_FpFailed; } - if (samples.size() != weights.size()) { LOG_ERROR(<< "Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '" << core::CContainerPrinter::print(weights) << "'"); return maths_t::E_FpFailed; } - if (this->isNonInformative()) { // The non-informative likelihood is improper and effectively // zero everywhere. We use minus max double because @@ -489,14 +462,15 @@ CMultimodalPrior::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles return maths_t::E_FpOverflowed; } - return m_Modes.size() == 1 ? m_Modes[0].s_Prior->jointLogMarginalLikelihood( - weightStyles, samples, weights, result) - : CMultimodalPriorUtils::jointLogMarginalLikelihood( - m_Modes, weightStyles, samples, weights, result); + return m_Modes.size() == 1 + ? m_Modes[0].s_Prior->jointLogMarginalLikelihood(samples, weights, result) + : CMultimodalPriorUtils::jointLogMarginalLikelihood( + m_Modes, samples, weights, result); } void CMultimodalPrior::sampleMarginalLikelihood(std::size_t numberSamples, TDouble1Vec& samples) const { + samples.clear(); if (numberSamples == 0 || this->numberSamples() == 0.0) { @@ -506,34 +480,30 @@ void CMultimodalPrior::sampleMarginalLikelihood(std::size_t numberSamples, CMultimodalPriorUtils::sampleMarginalLikelihood(m_Modes, numberSamples, samples); } -bool CMultimodalPrior::minusLogJointCdf(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +bool CMultimodalPrior::minusLogJointCdf(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const { - return CMultimodalPriorUtils::minusLogJointCdf(m_Modes, weightStyles, samples, - weights, lowerBound, upperBound); + return CMultimodalPriorUtils::minusLogJointCdf(m_Modes, samples, weights, + lowerBound, upperBound); } -bool CMultimodalPrior::minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +bool CMultimodalPrior::minusLogJointCdfComplement(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const { return CMultimodalPriorUtils::minusLogJointCdfComplement( - m_Modes, weightStyles, samples, weights, lowerBound, upperBound); + m_Modes, samples, weights, lowerBound, upperBound); } bool CMultimodalPrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound, maths_t::ETail& tail) const { return CMultimodalPriorUtils::probabilityOfLessLikelySamples( - *this, m_Modes, calculation, weightStyles, samples, weights, lowerBound, - upperBound, tail); + *this, m_Modes, calculation, samples, weights, lowerBound, upperBound, tail); } bool CMultimodalPrior::isNonInformative() const { @@ -593,6 +563,7 @@ std::size_t CMultimodalPrior::numberModes() const { } bool CMultimodalPrior::checkInvariants(const std::string& tag) const { + bool result = true; if (m_Modes.size() != m_Clusterer->numberClusters()) { @@ -643,6 +614,7 @@ CMultimodalPrior::CModeSplitCallback::CModeSplitCallback(CMultimodalPrior& prior void CMultimodalPrior::CModeSplitCallback::operator()(std::size_t sourceIndex, std::size_t leftSplitIndex, std::size_t rightSplitIndex) const { + LOG_TRACE(<< "Splitting mode with index " << sourceIndex); TModeVec& modes = m_Prior->m_Modes; @@ -673,19 +645,17 @@ void CMultimodalPrior::CModeSplitCallback::operator()(std::size_t sourceIndex, } LOG_TRACE(<< "samples = " << core::CContainerPrinter::print(samples)); - double nl = pLeft * numberSamples; - double ns = std::min(nl, 4.0); + double wl = pLeft * numberSamples; + double ws = std::min(wl, 4.0); double n = static_cast(samples.size()); - LOG_TRACE(<< "# left = " << nl); + LOG_TRACE(<< "# left = " << wl); - double seedWeight = ns / n; - TDouble4Vec1Vec weights(samples.size(), TDouble4Vec{seedWeight}); - modes.back().s_Prior->addSamples(TWeights::COUNT, samples, weights); + TDoubleWeightsAry1Vec weights(samples.size(), maths_t::countWeight(ws / n)); + modes.back().s_Prior->addSamples(samples, weights); - double weight = (nl - ns) / n; - if (weight > 0.0) { - weights.assign(weights.size(), TDouble4Vec{weight}); - modes.back().s_Prior->addSamples(TWeights::COUNT, samples, weights); + if (wl > ws) { + weights.assign(weights.size(), maths_t::countWeight((wl - ws) / n)); + modes.back().s_Prior->addSamples(samples, weights); LOG_TRACE(<< modes.back().s_Prior->print()); } } @@ -699,19 +669,17 @@ void CMultimodalPrior::CModeSplitCallback::operator()(std::size_t sourceIndex, } LOG_TRACE(<< "samples = " << core::CContainerPrinter::print(samples)); - double nr = pRight * numberSamples; - double ns = std::min(nr, 4.0); + double wr = pRight * numberSamples; + double ws = std::min(wr, 4.0); double n = static_cast(samples.size()); - LOG_TRACE(<< "# right = " << nr); + LOG_TRACE(<< "# right = " << wr); - double seedWeight = ns / n; - TDouble4Vec1Vec weights(samples.size(), TDouble4Vec{seedWeight}); - modes.back().s_Prior->addSamples(TWeights::COUNT, samples, weights); + TDoubleWeightsAry1Vec weights(samples.size(), maths_t::countWeight(ws / n)); + modes.back().s_Prior->addSamples(samples, weights); - double weight = (nr - ns) / n; - if (weight > 0.0) { - weights.assign(weights.size(), TDouble4Vec{weight}); - modes.back().s_Prior->addSamples(TWeights::COUNT, samples, weights); + if (wr > ws) { + weights.assign(weights.size(), maths_t::countWeight((wr - ws) / n)); + modes.back().s_Prior->addSamples(samples, weights); LOG_TRACE(<< modes.back().s_Prior->print()); } } @@ -733,6 +701,7 @@ CMultimodalPrior::CModeMergeCallback::CModeMergeCallback(CMultimodalPrior& prior void CMultimodalPrior::CModeMergeCallback::operator()(std::size_t leftMergeIndex, std::size_t rightMergeIndex, std::size_t targetIndex) const { + LOG_TRACE(<< "Merging modes with indices " << leftMergeIndex << " " << rightMergeIndex); TModeVec& modes = m_Prior->m_Modes; @@ -742,7 +711,7 @@ void CMultimodalPrior::CModeMergeCallback::operator()(std::size_t leftMergeIndex double wl = 0.0; double wr = 0.0; - double n = 0.0; + double w = 0.0; std::size_t nl = 0; std::size_t nr = 0; TDouble1Vec samples; @@ -751,7 +720,7 @@ void CMultimodalPrior::CModeMergeCallback::operator()(std::size_t leftMergeIndex CSetTools::CIndexInSet(leftMergeIndex)); if (leftMode != modes.end()) { wl = leftMode->s_Prior->numberSamples(); - n += wl; + w += wl; TDouble1Vec leftSamples; leftMode->s_Prior->sampleMarginalLikelihood(MODE_MERGE_NUMBER_SAMPLES, leftSamples); nl = leftSamples.size(); @@ -764,7 +733,7 @@ void CMultimodalPrior::CModeMergeCallback::operator()(std::size_t leftMergeIndex CSetTools::CIndexInSet(rightMergeIndex)); if (rightMode != modes.end()) { wr = rightMode->s_Prior->numberSamples(); - n += wr; + w += wr; TDouble1Vec rightSamples; rightMode->s_Prior->sampleMarginalLikelihood(MODE_MERGE_NUMBER_SAMPLES, rightSamples); nr = rightSamples.size(); @@ -773,7 +742,7 @@ void CMultimodalPrior::CModeMergeCallback::operator()(std::size_t leftMergeIndex LOG_ERROR(<< "Couldn't find mode for " << rightMergeIndex); } - if (n > 0.0) { + if (w > 0.0) { double nl_ = static_cast(nl); double nr_ = static_cast(nr); double Z = (nl_ * wl + nr_ * wr) / (nl_ + nr_); @@ -782,24 +751,22 @@ void CMultimodalPrior::CModeMergeCallback::operator()(std::size_t leftMergeIndex } LOG_TRACE(<< "samples = " << core::CContainerPrinter::print(samples)); - LOG_TRACE(<< "n = " << n << ", wl = " << wl << ", wr = " << wr); + LOG_TRACE(<< "w = " << w << ", wl = " << wl << ", wr = " << wr); - double ns = std::min(n, 4.0); - double s = static_cast(samples.size()); + double ws = std::min(w, 4.0); + double n = static_cast(samples.size()); - double seedWeight = ns / s; - TDouble4Vec1Vec weights; + TDoubleWeightsAry1Vec weights; weights.reserve(samples.size()); - weights.resize(nl, TDouble1Vec{wl * seedWeight}); - weights.resize(nl + nr, TDouble1Vec{wr * seedWeight}); - newMode.s_Prior->addSamples(TWeights::COUNT, samples, weights); - - double weight = (n - ns) / s; - if (weight > 0.0) { - for (std::size_t i = 0u; i < weights.size(); ++i) { - weights[i][0] *= weight / seedWeight; - } - newMode.s_Prior->addSamples(TWeights::COUNT, samples, weights); + weights.resize(nl, maths_t::countWeight(wl * ws / n)); + weights.resize(nl + nr, maths_t::countWeight(wr * ws / n)); + newMode.s_Prior->addSamples(samples, weights); + + if (w > ws) { + weights.clear(); + weights.resize(nl, maths_t::countWeight(wl * (w - ws) / n)); + weights.resize(nl + nr, maths_t::countWeight(wr * (w - ws) / n)); + newMode.s_Prior->addSamples(samples, weights); } // Remove the merged modes. diff --git a/lib/maths/CMultinomialConjugate.cc b/lib/maths/CMultinomialConjugate.cc index f12b73a300..c220409e17 100644 --- a/lib/maths/CMultinomialConjugate.cc +++ b/lib/maths/CMultinomialConjugate.cc @@ -351,9 +351,8 @@ bool CMultinomialConjugate::needsOffset() const { return false; } -double CMultinomialConjugate::adjustOffset(const TWeightStyleVec& /*weightStyles*/, - const TDouble1Vec& /*samples*/, - const TDouble4Vec1Vec& /*weights*/) { +double CMultinomialConjugate::adjustOffset(const TDouble1Vec& /*samples*/, + const TDoubleWeightsAry1Vec& /*weights*/) { return 1.0; } @@ -361,13 +360,11 @@ double CMultinomialConjugate::offset() const { return 0.0; } -void CMultinomialConjugate::addSamples(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights) { +void CMultinomialConjugate::addSamples(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights) { if (samples.empty()) { return; } - if (samples.size() != weights.size()) { LOG_ERROR(<< "Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '" @@ -375,7 +372,7 @@ void CMultinomialConjugate::addSamples(const TWeightStyleVec& weightStyles, return; } - this->CPrior::addSamples(weightStyles, samples, weights); + this->CPrior::addSamples(samples, weights); // If x = {x(i)} denotes the sample vector, then x are multinomially // distributed with probabilities {p(i)}. Let n(i) denote the counts @@ -407,7 +404,7 @@ void CMultinomialConjugate::addSamples(const TWeightStyleVec& weightStyles, LOG_ERROR(<< "Discarding " << x); continue; } - double n = maths_t::countForUpdate(weightStyles, weights[i]); + double n = maths_t::countForUpdate(weights[i]); if (!CMathsFuncs::isFinite(n)) { LOG_ERROR(<< "Bad count weight " << n); continue; @@ -487,16 +484,18 @@ void CMultinomialConjugate::propagateForwardsByTime(double time) { } CMultinomialConjugate::TDoubleDoublePr CMultinomialConjugate::marginalLikelihoodSupport() const { + // Strictly speaking for a particular likelihood this is the // set of discrete values or categories, but we are interested // in the support for the possible discrete values which can // be any real numbers. - return std::make_pair(boost::numeric::bounds::lowest(), - boost::numeric::bounds::highest()); + return {boost::numeric::bounds::lowest(), + boost::numeric::bounds::highest()}; } double CMultinomialConjugate::marginalLikelihoodMean() const { + if (this->isNonInformative()) { return 0.0; } @@ -515,19 +514,17 @@ double CMultinomialConjugate::marginalLikelihoodMean() const { return CBasicStatistics::mean(result); } -double CMultinomialConjugate::marginalLikelihoodMode(const TWeightStyleVec& /*weightStyles*/, - const TDouble4Vec& /*weights*/) const { +double CMultinomialConjugate::marginalLikelihoodMode(const TDoubleWeightsAry& /*weights*/) const { + if (this->isNonInformative()) { return 0.0; } // This is just the category with the maximum concentration. - double modeConcentration = m_Concentrations[0]; std::size_t mode = 0u; for (std::size_t i = 1u; i < m_Concentrations.size(); ++i) { - if (m_Concentrations[i] > modeConcentration) { - modeConcentration = m_Concentrations[i]; + if (m_Concentrations[i] > m_Concentrations[mode]) { mode = i; } } @@ -535,8 +532,8 @@ double CMultinomialConjugate::marginalLikelihoodMode(const TWeightStyleVec& /*we return m_Categories[mode]; } -double CMultinomialConjugate::marginalLikelihoodVariance(const TWeightStyleVec& /*weightStyles*/, - const TDouble4Vec& /*weights*/) const { +double CMultinomialConjugate::marginalLikelihoodVariance(const TDoubleWeightsAry& /*weights*/) const { + using TMeanVarAccumulator = CBasicStatistics::SSampleMeanVar::TAccumulator; if (this->isNonInformative()) { @@ -559,8 +556,8 @@ double CMultinomialConjugate::marginalLikelihoodVariance(const TWeightStyleVec& CMultinomialConjugate::TDoubleDoublePr CMultinomialConjugate::marginalLikelihoodConfidenceInterval(double percentage, - const TWeightStyleVec& /*weightStyles*/, - const TDouble4Vec& /*weights*/) const { + const TDoubleWeightsAry& /*weights*/) const { + if (this->isNonInformative()) { return this->marginalLikelihoodSupport(); } @@ -597,13 +594,12 @@ CMultinomialConjugate::marginalLikelihoodConfidenceInterval(double percentage, LOG_TRACE(<< "quantiles = " << core::CContainerPrinter::print(quantiles)); LOG_TRACE(<< " " << core::CContainerPrinter::print(m_Categories)); - return std::make_pair(x1, x2); + return {x1, x2}; } maths_t::EFloatingPointErrorStatus -CMultinomialConjugate::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +CMultinomialConjugate::jointLogMarginalLikelihood(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& result) const { result = 0.0; @@ -611,14 +607,12 @@ CMultinomialConjugate::jointLogMarginalLikelihood(const TWeightStyleVec& weightS LOG_ERROR(<< "Can't compute likelihood for empty sample set"); return maths_t::E_FpFailed; } - if (samples.size() != weights.size()) { LOG_ERROR(<< "Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '" << core::CContainerPrinter::print(weights) << "'"); return maths_t::E_FpFailed; } - if (this->isNonInformative()) { // The non-informative likelihood is improper and effectively // zero everywhere. We use minus max double because @@ -657,7 +651,7 @@ CMultinomialConjugate::jointLogMarginalLikelihood(const TWeightStyleVec& weightS double numberSamples = 0.0; for (std::size_t i = 0u; i < samples.size(); ++i) { - double n = maths_t::countForUpdate(weightStyles, weights[i]); + double n = maths_t::countForUpdate(weights[i]); numberSamples += n; categoryCounts[samples[i]] += n; } @@ -707,6 +701,7 @@ CMultinomialConjugate::jointLogMarginalLikelihood(const TWeightStyleVec& weightS void CMultinomialConjugate::sampleMarginalLikelihood(std::size_t numberSamples, TDouble1Vec& samples) const { + samples.clear(); if (numberSamples == 0 || this->isNonInformative()) { @@ -750,9 +745,8 @@ void CMultinomialConjugate::sampleMarginalLikelihood(std::size_t numberSamples, LOG_TRACE(<< "samples = " << core::CContainerPrinter::print(samples)); } -bool CMultinomialConjugate::minusLogJointCdf(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +bool CMultinomialConjugate::minusLogJointCdf(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const { lowerBound = upperBound = 0.0; @@ -794,7 +788,7 @@ bool CMultinomialConjugate::minusLogJointCdf(const TWeightStyleVec& weightStyles for (std::size_t i = 0u; i < samples.size(); ++i) { double x = samples[i]; - double n = maths_t::count(weightStyles, weights[i]); + double n = maths_t::count(weights[i]); double sampleLowerBound; double sampleUpperBound; @@ -813,11 +807,11 @@ bool CMultinomialConjugate::minusLogJointCdf(const TWeightStyleVec& weightStyles return true; } -bool CMultinomialConjugate::minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +bool CMultinomialConjugate::minusLogJointCdfComplement(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const { + // See minusLogJointCdf for the rationale behind this approximation. detail::CCdfComplement cdfComplement(m_Categories, m_Concentrations, m_TotalConcentration); @@ -826,7 +820,7 @@ bool CMultinomialConjugate::minusLogJointCdfComplement(const TWeightStyleVec& we for (std::size_t i = 0u; i < samples.size(); ++i) { double x = samples[i]; - double n = maths_t::count(weightStyles, weights[i]); + double n = maths_t::count(weights[i]); double sampleLowerBound; double sampleUpperBound; @@ -846,9 +840,8 @@ bool CMultinomialConjugate::minusLogJointCdfComplement(const TWeightStyleVec& we } bool CMultinomialConjugate::probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound, maths_t::ETail& tail) const { @@ -889,7 +882,7 @@ bool CMultinomialConjugate::probabilityOfLessLikelySamples(maths_t::EProbability detail::CCdf cdf(m_Categories, m_Concentrations, m_TotalConcentration); for (std::size_t i = 0u; i < samples.size(); ++i) { double x = samples[i]; - double n = maths_t::count(weightStyles, weights[i]); + double n = maths_t::count(weights[i]); double sampleLowerBound, sampleUpperBound; cdf(x, sampleLowerBound, sampleUpperBound); jointLowerBound.add(sampleLowerBound, n); @@ -1121,7 +1114,7 @@ bool CMultinomialConjugate::probabilityOfLessLikelySamples(maths_t::EProbability // Count the occurrences of each category in the sample set. for (std::size_t i = 0u; i < samples.size(); ++i) { double x = samples[i]; - double n = maths_t::count(weightStyles, weights[i]); + double n = maths_t::count(weights[i]); categoryCounts[x] += n; } @@ -1170,7 +1163,7 @@ bool CMultinomialConjugate::probabilityOfLessLikelySamples(maths_t::EProbability detail::CCdfComplement cdfComplement(m_Categories, m_Concentrations, m_TotalConcentration); for (std::size_t i = 0u; i < samples.size(); ++i) { double x = samples[i]; - double n = maths_t::count(weightStyles, weights[i]); + double n = maths_t::count(weights[i]); double sampleLowerBound, sampleUpperBound; cdfComplement(x, sampleLowerBound, sampleUpperBound); jointLowerBound.add(sampleLowerBound, n); @@ -1503,7 +1496,7 @@ void CMultinomialConjugate::probabilitiesOfLessLikelyCategories(maths_t::EProbab CMultinomialConjugate::TDoubleDoublePrVec CMultinomialConjugate::confidenceIntervalProbabilities(double percentage) const { if (this->isNonInformative()) { - return TDoubleDoublePrVec(m_Concentrations.size(), std::make_pair(0.0, 1.0)); + return TDoubleDoublePrVec(m_Concentrations.size(), {0.0, 1.0}); } // The marginal distribution over each probability is beta. diff --git a/lib/maths/CMultivariateConstantPrior.cc b/lib/maths/CMultivariateConstantPrior.cc index 114f096d9d..73ad2ea920 100644 --- a/lib/maths/CMultivariateConstantPrior.cc +++ b/lib/maths/CMultivariateConstantPrior.cc @@ -97,14 +97,12 @@ void CMultivariateConstantPrior::setToNonInformative(double /*offset*/, double / m_Constant.reset(); } -void CMultivariateConstantPrior::adjustOffset(const TWeightStyleVec& /*weightStyle*/, - const TDouble10Vec1Vec& /*samples*/, - const TDouble10Vec4Vec1Vec& /*weights*/) { +void CMultivariateConstantPrior::adjustOffset(const TDouble10Vec1Vec& /*samples*/, + const TDouble10VecWeightsAry1Vec& /*weights*/) { } -void CMultivariateConstantPrior::addSamples(const TWeightStyleVec& /*weightStyle*/, - const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& /*weights*/) { +void CMultivariateConstantPrior::addSamples(const TDouble10Vec1Vec& samples, + const TDouble10VecWeightsAry1Vec& /*weights*/) { if (m_Constant || samples.empty()) { return; } @@ -186,8 +184,7 @@ CMultivariateConstantPrior::marginalLikelihoodMean() const { } CMultivariateConstantPrior::TDouble10Vec -CMultivariateConstantPrior::marginalLikelihoodMode(const TWeightStyleVec& /*weightStyles*/, - const TDouble10Vec4Vec& /*weights*/) const { +CMultivariateConstantPrior::marginalLikelihoodMode(const TDouble10VecWeightsAry& /*weights*/) const { return this->marginalLikelihoodMean(); } @@ -210,9 +207,8 @@ CMultivariateConstantPrior::marginalLikelihoodVariances() const { } maths_t::EFloatingPointErrorStatus -CMultivariateConstantPrior::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights, +CMultivariateConstantPrior::jointLogMarginalLikelihood(const TDouble10Vec1Vec& samples, + const TDouble10VecWeightsAry1Vec& weights, double& result) const { result = 0.0; @@ -254,8 +250,7 @@ CMultivariateConstantPrior::jointLogMarginalLikelihood(const TWeightStyleVec& we return maths_t::E_FpOverflowed; } - numberSamples += this->smallest( - maths_t::countForUpdate(m_Dimension, weightStyles, weights[i])); + numberSamples += this->smallest(maths_t::countForUpdate(weights[i])); } result = numberSamples * core::constants::LOG_MAX_DOUBLE; diff --git a/lib/maths/CMultivariateMultimodalPrior.cc b/lib/maths/CMultivariateMultimodalPrior.cc index c8733c573d..0e2bd96e0d 100644 --- a/lib/maths/CMultivariateMultimodalPrior.cc +++ b/lib/maths/CMultivariateMultimodalPrior.cc @@ -16,8 +16,8 @@ namespace maths { namespace multivariate_multimodal_prior_detail { using TDoubleVec = std::vector; -using TDouble10Vec = CMultivariatePrior::TDouble10Vec; -using TDouble10Vec4Vec = CMultivariatePrior::TDouble10Vec4Vec; +using TDouble10Vec1Vec = CMultivariatePrior::TDouble10Vec1Vec; +using TDouble10VecWeightsAry1Vec = CMultivariatePrior::TDouble10VecWeightsAry1Vec; namespace { @@ -38,9 +38,8 @@ std::string printIndices(const TModeVec& modes) { maths_t::EFloatingPointErrorStatus jointLogMarginalLikelihood(const TModeVec& modes, - const maths_t::TWeightStyleVec& weightStyles, const TDouble10Vec1Vec& sample, - const TDouble10Vec4Vec1Vec& weights, + const TDouble10VecWeightsAry1Vec& weights, TSizeDoublePr3Vec& modeLogLikelihoods, double& result) { try { @@ -51,14 +50,14 @@ jointLogMarginalLikelihood(const TModeVec& modes, for (std::size_t i = 0u; i < modes.size(); ++i) { double modeLogLikelihood; - maths_t::EFloatingPointErrorStatus status = modes[i].s_Prior->jointLogMarginalLikelihood( - weightStyles, sample, weights, modeLogLikelihood); + maths_t::EFloatingPointErrorStatus status = + modes[i].s_Prior->jointLogMarginalLikelihood(sample, weights, modeLogLikelihood); if (status & maths_t::E_FpFailed) { // Logging handled at a lower level. return status; } if (!(status & maths_t::E_FpOverflowed)) { - modeLogLikelihoods.push_back({i, modeLogLikelihood}); + modeLogLikelihoods.emplace_back(i, modeLogLikelihood); maxLogLikelihood = std::max(maxLogLikelihood, modeLogLikelihood); } } @@ -151,9 +150,10 @@ void sampleMarginalLikelihood(const TModeVec& modes, } void print(const TModeVec& modes, const std::string& separator, std::string& result) { - double Z = std::accumulate( - modes.begin(), modes.end(), 0.0, - [](double sum, const TMode& mode) { return sum + mode.weight(); }); + auto addWeight = [](double sum, const TMode& mode) { + return sum + mode.weight(); + }; + double Z = std::accumulate(modes.begin(), modes.end(), 0.0, addWeight); std::string separator_ = separator + separator; @@ -182,7 +182,7 @@ void modeMergeCallback(std::size_t dimension, double wl = 0.0; double wr = 0.0; - double n = 0.0; + double w = 0.0; std::size_t nl = 0; std::size_t nr = 0; TDouble10Vec1Vec samples; @@ -191,7 +191,7 @@ void modeMergeCallback(std::size_t dimension, CSetTools::CIndexInSet(leftMergeIndex)); if (leftMode != modes.end()) { wl = leftMode->s_Prior->numberSamples(); - n += wl; + w += wl; TDouble10Vec1Vec leftSamples; leftMode->s_Prior->sampleMarginalLikelihood(numberSamples, leftSamples); nl = leftSamples.size(); @@ -206,7 +206,7 @@ void modeMergeCallback(std::size_t dimension, CSetTools::CIndexInSet(rightMergeIndex)); if (rightMode != modes.end()) { wr = rightMode->s_Prior->numberSamples(); - n += wr; + w += wr; TDouble10Vec1Vec rightSamples; rightMode->s_Prior->sampleMarginalLikelihood(numberSamples, rightSamples); nr = rightSamples.size(); @@ -217,7 +217,7 @@ void modeMergeCallback(std::size_t dimension, << ", merged index = " << targetIndex); } - if (n > 0.0) { + if (w > 0.0) { double nl_ = static_cast(nl); double nr_ = static_cast(nr); double Z = (nl_ * wl + nr_ * wr) / (nl_ + nr_); @@ -226,37 +226,30 @@ void modeMergeCallback(std::size_t dimension, } LOG_TRACE(<< "samples = " << core::CContainerPrinter::print(samples)); - LOG_TRACE(<< "n = " << n << ", wl = " << wl << ", wr = " << wr); + LOG_TRACE(<< "w = " << w << ", wl = " << wl << ", wr = " << wr); - double ns = std::min(n, 4.0); - double s = static_cast(samples.size()); + double ws = std::min(w, 4.0); + double n = static_cast(samples.size()); - TDouble10Vec leftSeedWeight(dimension, wl * ns / s); - TDouble10Vec rightSeedWeight(dimension, wl * ns / s); - TDouble10Vec4Vec1Vec weights; + TDouble10VecWeightsAry1Vec weights; weights.reserve(samples.size()); - weights.resize(nl, TDouble10Vec1Vec(1, leftSeedWeight)); - weights.resize(nl + nr, TDouble10Vec1Vec(1, rightSeedWeight)); - newMode.s_Prior->addSamples(CConstantWeights::COUNT, samples, weights); - - double weight = (n - ns) / s; - if (weight > 0.0) { - for (std::size_t i = 0u; i < dimension; ++i) { - leftSeedWeight[i] = wl * weight; - rightSeedWeight[i] = wr * weight; - } + weights.resize(nl, maths_t::countWeight(wl * ws / n, dimension)); + weights.resize(nl + nr, maths_t::countWeight(wr * ws / n, dimension)); + newMode.s_Prior->addSamples(samples, weights); + + if (w > ws) { weights.clear(); - weights.resize(nl, TDouble10Vec1Vec(1, leftSeedWeight)); - weights.resize(nl + nr, TDouble10Vec1Vec(1, rightSeedWeight)); - newMode.s_Prior->addSamples(CConstantWeights::COUNT, samples, weights); + weights.resize(nl, maths_t::countWeight(wl * (w - ws) / n, dimension)); + weights.resize(nl + nr, maths_t::countWeight(wr * (w - ws) / n, dimension)); + newMode.s_Prior->addSamples(samples, weights); } // Remove the merged modes. TSizeSet mergedIndices; mergedIndices.insert(leftMergeIndex); mergedIndices.insert(rightMergeIndex); - modes.erase(std::remove_if(modes.begin(), modes.end(), CSetTools::CIndexInSet(mergedIndices)), - modes.end()); + auto isMergeIndex = CSetTools::CIndexInSet(mergedIndices); + modes.erase(std::remove_if(modes.begin(), modes.end(), isMergeIndex), modes.end()); // Add the new mode. LOG_TRACE(<< "Creating mode with index " << targetIndex); diff --git a/lib/maths/CMultivariateOneOfNPrior.cc b/lib/maths/CMultivariateOneOfNPrior.cc index e6d9be0665..e45cfcb5af 100644 --- a/lib/maths/CMultivariateOneOfNPrior.cc +++ b/lib/maths/CMultivariateOneOfNPrior.cc @@ -41,7 +41,7 @@ using TDouble10Vec = CMultivariateOneOfNPrior::TDouble10Vec; using TDouble10VecDouble10VecPr = CMultivariateOneOfNPrior::TDouble10VecDouble10VecPr; using TDouble10Vec1Vec = CMultivariateOneOfNPrior::TDouble10Vec1Vec; using TDouble10Vec10Vec = CMultivariateOneOfNPrior::TDouble10Vec10Vec; -using TDouble10Vec4Vec1Vec = CMultivariateOneOfNPrior::TDouble10Vec4Vec1Vec; +using TDouble10VecWeightsAry1Vec = CMultivariateOneOfNPrior::TDouble10VecWeightsAry1Vec; using TPriorPtr = CMultivariateOneOfNPrior::TPriorPtr; using TWeightPriorPtrPr = CMultivariateOneOfNPrior::TWeightPriorPtrPr; using TWeightPriorPtrPrVec = CMultivariateOneOfNPrior::TWeightPriorPtrPrVec; @@ -287,17 +287,15 @@ void CMultivariateOneOfNPrior::setToNonInformative(double offset, double decayRa this->numberSamples(0.0); } -void CMultivariateOneOfNPrior::adjustOffset(const TWeightStyleVec& weightStyles, - const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights) { +void CMultivariateOneOfNPrior::adjustOffset(const TDouble10Vec1Vec& samples, + const TDouble10VecWeightsAry1Vec& weights) { for (auto& model : m_Models) { - model.second->adjustOffset(weightStyles, samples, weights); + model.second->adjustOffset(samples, weights); } } -void CMultivariateOneOfNPrior::addSamples(const TWeightStyleVec& weightStyles, - const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights) { +void CMultivariateOneOfNPrior::addSamples(const TDouble10Vec1Vec& samples, + const TDouble10VecWeightsAry1Vec& weights) { if (samples.empty()) { return; } @@ -305,10 +303,10 @@ void CMultivariateOneOfNPrior::addSamples(const TWeightStyleVec& weightStyles, return; } - this->adjustOffset(weightStyles, samples, weights); + this->adjustOffset(samples, weights); double penalty = CTools::fastLog(this->numberSamples()); - this->CMultivariatePrior::addSamples(weightStyles, samples, weights); + this->CMultivariatePrior::addSamples(samples, weights); penalty = (penalty - CTools::fastLog(this->numberSamples())) / 2.0; // See COneOfNPrior::addSamples for a discussion. @@ -330,8 +328,7 @@ void CMultivariateOneOfNPrior::addSamples(const TWeightStyleVec& weightStyles, // Update the weights with the marginal likelihoods. double logLikelihood = 0.0; maths_t::EFloatingPointErrorStatus status = - use ? model.second->jointLogMarginalLikelihood(weightStyles, samples, - weights, logLikelihood) + use ? model.second->jointLogMarginalLikelihood(samples, weights, logLikelihood) : maths_t::E_FpOverflowed; if (status & maths_t::E_FpFailed) { LOG_ERROR(<< "Failed to compute log-likelihood"); @@ -346,7 +343,7 @@ void CMultivariateOneOfNPrior::addSamples(const TWeightStyleVec& weightStyles, } // Update the component prior distribution. - model.second->addSamples(weightStyles, samples, weights); + model.second->addSamples(samples, weights); used.push_back(use); uses.push_back(model.second->participatesInModelSelection()); @@ -354,13 +351,8 @@ void CMultivariateOneOfNPrior::addSamples(const TWeightStyleVec& weightStyles, } TDouble10Vec n(m_Dimension, 0.0); - try { - for (const auto& weight : weights) { - add(maths_t::count(m_Dimension, weightStyles, weight), n); - } - } catch (std::exception& e) { - LOG_ERROR(<< "Failed to add samples: " << e.what()); - return; + for (const auto& weight : weights) { + add(maths_t::count(weight), n); } if (!isNonInformative && maxLogLikelihood.count() > 0) { @@ -446,9 +438,8 @@ CMultivariateOneOfNPrior::univariate(const TSize10Vec& marginalize, models[i].first *= std::exp(weights[i] - maxWeight[0]) / Z; } - return std::make_pair(TUnivariatePriorPtr(new COneOfNPrior( - models, this->dataType(), this->decayRate())), - maxWeight.count() > 0 ? maxWeight[0] : 0.0); + return {TUnivariatePriorPtr(new COneOfNPrior(models, this->dataType(), this->decayRate())), + maxWeight.count() > 0 ? maxWeight[0] : 0.0}; } CMultivariateOneOfNPrior::TPriorPtrDoublePr @@ -480,12 +471,13 @@ CMultivariateOneOfNPrior::bivariate(const TSize10Vec& marginalize, models[i].first *= std::exp(weights[i] - maxWeight[0]) / Z; } - return std::make_pair(TPriorPtr(new CMultivariateOneOfNPrior( - 2, models, this->dataType(), this->decayRate())), - maxWeight.count() > 0 ? maxWeight[0] : 0.0); + return {TPriorPtr(new CMultivariateOneOfNPrior(2, models, this->dataType(), + this->decayRate())), + maxWeight.count() > 0 ? maxWeight[0] : 0.0}; } TDouble10VecDouble10VecPr CMultivariateOneOfNPrior::marginalLikelihoodSupport() const { + // We define this is as the intersection of the component model // supports. @@ -504,6 +496,7 @@ TDouble10VecDouble10VecPr CMultivariateOneOfNPrior::marginalLikelihoodSupport() } TDouble10Vec CMultivariateOneOfNPrior::marginalLikelihoodMean() const { + // This is E_{P(i)}[ E[X | P(i)] ] and the conditional expectation // is just the individual model expectation. Note we exclude models // with low weight because typically the means are similar between @@ -523,6 +516,7 @@ TDouble10Vec CMultivariateOneOfNPrior::marginalLikelihoodMean() const { TDouble10Vec CMultivariateOneOfNPrior::nearestMarginalLikelihoodMean(const TDouble10Vec& value) const { + // See marginalLikelihoodMean for discussion. TDouble10Vec result(m_Dimension, 0.0); @@ -537,6 +531,7 @@ CMultivariateOneOfNPrior::nearestMarginalLikelihoodMean(const TDouble10Vec& valu } TDouble10Vec10Vec CMultivariateOneOfNPrior::marginalLikelihoodCovariance() const { + TDouble10Vec10Vec result(m_Dimension, TDouble10Vec(m_Dimension, 0.0)); if (this->isNonInformative()) { for (std::size_t i = 0u; i < m_Dimension; ++i) { @@ -562,6 +557,7 @@ TDouble10Vec10Vec CMultivariateOneOfNPrior::marginalLikelihoodCovariance() const } TDouble10Vec CMultivariateOneOfNPrior::marginalLikelihoodVariances() const { + if (this->isNonInformative()) { return TDouble10Vec(m_Dimension, INF); } @@ -578,24 +574,23 @@ TDouble10Vec CMultivariateOneOfNPrior::marginalLikelihoodVariances() const { } TDouble10Vec -CMultivariateOneOfNPrior::marginalLikelihoodMode(const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec& weights) const { +CMultivariateOneOfNPrior::marginalLikelihoodMode(const TDouble10VecWeightsAry& weights) const { + // We approximate this as the weighted average of the component // model modes. // Declared outside the loop to minimize the number of times // it is created. TDouble10Vec1Vec sample(1); - TDouble10Vec4Vec1Vec sampleWeights(1, weights); + TDouble10VecWeightsAry1Vec sampleWeights(1, weights); TDouble10Vec result(m_Dimension, 0.0); double w = 0.0; for (const auto& model : m_Models) { if (model.second->participatesInModelSelection()) { - sample[0] = model.second->marginalLikelihoodMode(weightStyles, weights); + sample[0] = model.second->marginalLikelihoodMode(weights); double logLikelihood; - model.second->jointLogMarginalLikelihood(weightStyles, sample, - sampleWeights, logLikelihood); + model.second->jointLogMarginalLikelihood(sample, sampleWeights, logLikelihood); updateMean(sample[0], model.first * std::exp(logLikelihood), result, w); } } @@ -605,9 +600,8 @@ CMultivariateOneOfNPrior::marginalLikelihoodMode(const TWeightStyleVec& weightSt } maths_t::EFloatingPointErrorStatus -CMultivariateOneOfNPrior::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights, +CMultivariateOneOfNPrior::jointLogMarginalLikelihood(const TDouble10Vec1Vec& samples, + const TDouble10VecWeightsAry1Vec& weights, double& result) const { result = 0.0; @@ -630,8 +624,8 @@ CMultivariateOneOfNPrior::jointLogMarginalLikelihood(const TWeightStyleVec& weig for (const auto& model : m_Models) { if (model.second->participatesInModelSelection()) { double logLikelihood; - maths_t::EFloatingPointErrorStatus status = model.second->jointLogMarginalLikelihood( - weightStyles, samples, weights, logLikelihood); + maths_t::EFloatingPointErrorStatus status = + model.second->jointLogMarginalLikelihood(samples, weights, logLikelihood); if (status & maths_t::E_FpFailed) { return status; } @@ -710,7 +704,7 @@ void CMultivariateOneOfNPrior::sampleMarginalLikelihood(std::size_t numberSample for (std::size_t i = 0u; i < m_Models.size(); ++i) { modelSamples.clear(); m_Models[i].second->sampleMarginalLikelihood(sampling[i], modelSamples); - for (auto sample : modelSamples) { + for (const auto& sample : modelSamples) { samples.push_back(CTools::truncate(sample, support.first, support.second)); } } diff --git a/lib/maths/CMultivariatePrior.cc b/lib/maths/CMultivariatePrior.cc index f6166e6459..35caf7cb88 100644 --- a/lib/maths/CMultivariatePrior.cc +++ b/lib/maths/CMultivariatePrior.cc @@ -83,20 +83,15 @@ void CMultivariatePrior::decayRate(double value) { setDecayRate(value, FALLBACK_DECAY_RATE, m_DecayRate); } -void CMultivariatePrior::addSamples(const TWeightStyleVec& weightStyles, - const TDouble10Vec1Vec& /*samples*/, - const TDouble10Vec4Vec1Vec& weights) { +void CMultivariatePrior::addSamples(const TDouble10Vec1Vec& /*samples*/, + const TDouble10VecWeightsAry1Vec& weights) { std::size_t d = this->dimension(); TDouble10Vec n(d, 0.0); - try { - for (std::size_t i = 0u; i < weights.size(); ++i) { - TDouble10Vec wi = maths_t::countForUpdate(d, weightStyles, weights[i]); - for (std::size_t j = 0u; j < d; ++j) { - n[j] += wi[j]; - } + for (const auto& weight : weights) { + TDouble10Vec n_ = maths_t::countForUpdate(weight); + for (std::size_t i = 0u; i < d; ++i) { + n[i] += n_[i]; } - } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to extract sample counts: " << e.what()); } this->addSamples(smallest(n)); } @@ -107,15 +102,13 @@ CMultivariatePrior::nearestMarginalLikelihoodMean(const TDouble10Vec& /*value*/) } CMultivariatePrior::TDouble10Vec1Vec -CMultivariatePrior::marginalLikelihoodModes(const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec& weights) const { - return TDouble10Vec1Vec{this->marginalLikelihoodMode(weightStyles, weights)}; +CMultivariatePrior::marginalLikelihoodModes(const TDouble10VecWeightsAry& weights) const { + return TDouble10Vec1Vec{this->marginalLikelihoodMode(weights)}; } bool CMultivariatePrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec& weightStyles, const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights, + const TDouble10VecWeightsAry1Vec& weights, const TSize10Vec& coordinates, TDouble10Vec2Vec& lowerBounds, TDouble10Vec2Vec& upperBounds, @@ -140,18 +133,17 @@ bool CMultivariatePrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCal } using TDouble1Vec = core::CSmallVector; - using TDouble4Vec = core::CSmallVector; - using TDouble4Vec1Vec = core::CSmallVector; + using TDoubleWeightsAry1Vec = maths_t::TDoubleWeightsAry1Vec; using TJointProbabilityOfLessLikelySamplesVec = core::CSmallVector; static const TSize10Vec NO_MARGINS; static const TSizeDoublePr10Vec NO_CONDITIONS; - TJointProbabilityOfLessLikelySamplesVec lowerBounds_[2] = { + TJointProbabilityOfLessLikelySamplesVec lowerBounds_[2]{ TJointProbabilityOfLessLikelySamplesVec(coordinates.size()), TJointProbabilityOfLessLikelySamplesVec(coordinates.size())}; - TJointProbabilityOfLessLikelySamplesVec upperBounds_[2] = { + TJointProbabilityOfLessLikelySamplesVec upperBounds_[2]{ TJointProbabilityOfLessLikelySamplesVec(coordinates.size()), TJointProbabilityOfLessLikelySamplesVec(coordinates.size())}; @@ -159,7 +151,7 @@ bool CMultivariatePrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCal TSize10Vec marginalize(d - 1); TSizeDoublePr10Vec condition(d - 1); TDouble1Vec sc(1); - TDouble4Vec1Vec wc{TDouble4Vec(weightStyles.size())}; + TDoubleWeightsAry1Vec wc(1); for (std::size_t i = 0; i < coordinates.size(); ++i) { std::size_t coordinate = coordinates[i]; @@ -187,8 +179,8 @@ bool CMultivariatePrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCal double lb[2], ub[2]; maths_t::ETail tc[2]; - if (!margin->probabilityOfLessLikelySamples( - calculation, weightStyles, sc, wc, lb[0], ub[0], tc[0])) { + if (!margin->probabilityOfLessLikelySamples(calculation, sc, wc, + lb[0], ub[0], tc[0])) { LOG_ERROR(<< "Failed to compute probability for coordinate " << coordinate); return false; } @@ -197,8 +189,8 @@ bool CMultivariatePrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCal TUnivariatePriorPtr conditional( this->univariate(NO_MARGINS, condition).first); - if (!conditional->probabilityOfLessLikelySamples( - calculation, weightStyles, sc, wc, lb[1], ub[1], tc[1])) { + if (!conditional->probabilityOfLessLikelySamples(calculation, sc, wc, + lb[1], ub[1], tc[1])) { LOG_ERROR(<< "Failed to compute probability for coordinate " << coordinate); return false; } @@ -227,9 +219,8 @@ bool CMultivariatePrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCal } bool CMultivariatePrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec& weightStyles, const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights, + const TDouble10VecWeightsAry1Vec& weights, double& lowerBound, double& upperBound, TTail10Vec& tail) const { @@ -247,13 +238,13 @@ bool CMultivariatePrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCal std::iota(coordinates.begin(), coordinates.end(), 0); TDouble10Vec1Vec sample(1); - TDouble10Vec4Vec1Vec weight(1); + TDouble10VecWeightsAry1Vec weight(1); TDouble10Vec2Vec lbs; TDouble10Vec2Vec ubs; for (std::size_t i = 0u; i < samples.size(); ++i) { sample[0] = samples[i]; weight[0] = weights[i]; - if (!this->probabilityOfLessLikelySamples(calculation, weightStyles, sample, weight, + if (!this->probabilityOfLessLikelySamples(calculation, sample, weight, coordinates, lbs, ubs, tail)) { return false; } @@ -281,6 +272,7 @@ bool CMultivariatePrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCal std::string CMultivariatePrior::printMarginalLikelihoodFunction(std::size_t x, std::size_t y) const { + // We'll plot the marginal likelihood function over a range where // most of the mass is, i.e. the 99% confidence interval. @@ -342,8 +334,8 @@ std::string CMultivariatePrior::printMarginalLikelihoodFunction(std::size_t x, yabscissa << "];" << core_t::LINE_ENDING; likelihood << "likelihood = ["; - TDouble10Vec1Vec sample(1, TDouble10Vec(2)); - TDouble10Vec4Vec1Vec weight(1, TDouble10Vec4Vec(1, TDouble10Vec(2, 1.0))); + TDouble10Vec1Vec sample{TDouble10Vec(2)}; + TDouble10VecWeightsAry1Vec weight(TWeights::singleUnit(2)); x_ = xRange.first; for (std::size_t i = 0u; i < POINTS; ++i, x_ += dx) { y_ = yRange.first; @@ -351,7 +343,7 @@ std::string CMultivariatePrior::printMarginalLikelihoodFunction(std::size_t x, sample[0][0] = x_; sample[0][1] = y_; double l; - xyMargin->jointLogMarginalLikelihood(CConstantWeights::COUNT, sample, weight, l); + xyMargin->jointLogMarginalLikelihood(sample, weight, l); likelihood << std::exp(l) << " "; } likelihood << core_t::LINE_ENDING; @@ -403,7 +395,7 @@ void CMultivariatePrior::addSamples(double n) { } bool CMultivariatePrior::check(const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights) const { + const TDouble10VecWeightsAry1Vec& weights) const { if (samples.size() != weights.size()) { LOG_ERROR(<< "Mismatch in samples '" << samples << "' and weights '" << weights << "'"); @@ -426,16 +418,14 @@ bool CMultivariatePrior::check(const TDouble10Vec1Vec& samples, bool CMultivariatePrior::check(const TSize10Vec& marginalize, const TSizeDoublePr10Vec& condition) const { - static const auto FIRST = [](const TSizeDoublePr& pair) { - return pair.first; - }; + const auto first = [](const TSizeDoublePr& pair) { return pair.first; }; std::size_t d = this->dimension(); if ((marginalize.size() > 0 && marginalize.back() >= d) || (condition.size() > 0 && condition.back().first >= d) || CSetTools::setIntersectSize( marginalize.begin(), marginalize.end(), - boost::make_transform_iterator(condition.begin(), FIRST), - boost::make_transform_iterator(condition.end(), FIRST)) != 0) { + boost::make_transform_iterator(condition.begin(), first), + boost::make_transform_iterator(condition.end(), first)) != 0) { LOG_ERROR(<< "Invalid variables for computing univariate distribution: " << "marginalize '" << marginalize << "'" << ", condition '" << condition << "'"); diff --git a/lib/maths/CNaiveBayes.cc b/lib/maths/CNaiveBayes.cc index 7d83fe6f18..64cb285d0b 100644 --- a/lib/maths/CNaiveBayes.cc +++ b/lib/maths/CNaiveBayes.cc @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -41,7 +42,7 @@ CNaiveBayesFeatureDensityFromPrior::CNaiveBayesFeatureDensityFromPrior(const CPr } void CNaiveBayesFeatureDensityFromPrior::add(const TDouble1Vec& x) { - m_Prior->addSamples(CConstantWeights::COUNT, x, CConstantWeights::SINGLE_UNIT); + m_Prior->addSamples(x, maths_t::CUnitWeights::SINGLE_UNIT); } CNaiveBayesFeatureDensityFromPrior* CNaiveBayesFeatureDensityFromPrior::clone() const { @@ -67,7 +68,7 @@ void CNaiveBayesFeatureDensityFromPrior::acceptPersistInserter(core::CStatePersi double CNaiveBayesFeatureDensityFromPrior::logValue(const TDouble1Vec& x) const { double result; - if (m_Prior->jointLogMarginalLikelihood(CConstantWeights::COUNT, x, CConstantWeights::SINGLE_UNIT, + if (m_Prior->jointLogMarginalLikelihood(x, maths_t::CUnitWeights::SINGLE_UNIT, result) != maths_t::E_FpNoErrors) { LOG_ERROR("Bad density value at " << x << " for " << m_Prior->print()); return boost::numeric::bounds::lowest(); @@ -77,9 +78,9 @@ double CNaiveBayesFeatureDensityFromPrior::logValue(const TDouble1Vec& x) const double CNaiveBayesFeatureDensityFromPrior::logMaximumValue() const { double result; - if (m_Prior->jointLogMarginalLikelihood( - CConstantWeights::COUNT, {m_Prior->marginalLikelihoodMode()}, - CConstantWeights::SINGLE_UNIT, result) != maths_t::E_FpNoErrors) { + if (m_Prior->jointLogMarginalLikelihood({m_Prior->marginalLikelihoodMode()}, + maths_t::CUnitWeights::SINGLE_UNIT, + result) != maths_t::E_FpNoErrors) { LOG_ERROR("Bad density value for " << m_Prior->print()); return boost::numeric::bounds::lowest(); } diff --git a/lib/maths/CNormalMeanPrecConjugate.cc b/lib/maths/CNormalMeanPrecConjugate.cc index 7f12a79650..1dd2332e45 100644 --- a/lib/maths/CNormalMeanPrecConjugate.cc +++ b/lib/maths/CNormalMeanPrecConjugate.cc @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -48,10 +49,8 @@ const double MINIMUM_GAUSSIAN_SHAPE = 100.0; namespace detail { -using TWeightStyleVec = maths_t::TWeightStyleVec; using TDouble1Vec = core::CSmallVector; -using TDouble4Vec = core::CSmallVector; -using TDouble4Vec1Vec = core::CSmallVector; +using TDoubleWeightsAry1Vec = maths_t::TDoubleWeightsAry1Vec; using TDoubleDoublePr = std::pair; using TDoubleDoublePrVec = std::vector; @@ -66,9 +65,6 @@ struct SPlusWeight { //! (integrating over the prior for the normal mean and precision) and //! aggregate the results using \p aggregate. //! -//! \param weightStyles Controls the interpretation of the weights that -//! are associated with each sample. See maths_t::ESampleWeightStyle for -//! more details. //! \param samples The weighted samples. //! \param weights The weights of each sample in \p samples. //! \param func The function to evaluate. @@ -83,9 +79,8 @@ struct SPlusWeight { //! \param precision The precision of the conditional mean prior. //! \param result Filled in with the aggregation of results of \p func. template -bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +bool evaluateFunctionOnJointDistribution(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, FUNC func, AGGREGATOR aggregate, bool isNonInformative, @@ -123,7 +118,7 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, // of this distribution.) for (std::size_t i = 0u; i < samples.size(); ++i) { double x = samples[i]; - double n = maths_t::count(weightStyles, weights[i]); + double n = maths_t::count(weights[i]); if (!CMathsFuncs::isFinite(n)) { LOG_ERROR(<< "Bad count weight " << n); return false; @@ -138,11 +133,10 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, // and the error function is significantly cheaper to compute. for (std::size_t i = 0u; i < samples.size(); ++i) { - double n = maths_t::count(weightStyles, weights[i]); - double seasonalScale = std::sqrt( - maths_t::seasonalVarianceScale(weightStyles, weights[i])); - double countVarianceScale = - maths_t::countVarianceScale(weightStyles, weights[i]); + double n = maths_t::count(weights[i]); + double seasonalScale = + std::sqrt(maths_t::seasonalVarianceScale(weights[i])); + double countVarianceScale = maths_t::countVarianceScale(weights[i]); double x = seasonalScale != 1.0 ? predictionMean + (samples[i] - predictionMean) / seasonalScale @@ -154,7 +148,7 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, double deviation = std::sqrt((scaledPrecision + 1.0) / scaledPrecision * scaledRate / shape); - boost::math::normal_distribution<> normal(mean, deviation); + boost::math::normal normal(mean, deviation); result = aggregate(result, func(normal, x + offset), n); } } else { @@ -165,14 +159,13 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, // // and using the student's t distribution with 2*a degrees of freedom. - boost::math::students_t_distribution<> students(2.0 * shape); + boost::math::students_t students(2.0 * shape); for (std::size_t i = 0u; i < samples.size(); ++i) { - double n = maths_t::count(weightStyles, weights[i]); - double seasonalScale = std::sqrt( - maths_t::seasonalVarianceScale(weightStyles, weights[i])); - double countVarianceScale = - maths_t::countVarianceScale(weightStyles, weights[i]); + double n = maths_t::count(weights[i]); + double seasonalScale = + std::sqrt(maths_t::seasonalVarianceScale(weights[i])); + double countVarianceScale = maths_t::countVarianceScale(weights[i]); double x = seasonalScale != 1.0 ? predictionMean + (samples[i] - predictionMean) / seasonalScale @@ -207,29 +200,27 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, template class CEvaluateOnSamples : core::CNonCopyable { public: - CEvaluateOnSamples(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + CEvaluateOnSamples(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, bool isNonInformative, double mean, double precision, double shape, double rate, double predictionMean) - : m_WeightStyles(weightStyles), m_Samples(samples), m_Weights(weights), + : m_Samples(samples), m_Weights(weights), m_IsNonInformative(isNonInformative), m_Mean(mean), m_Precision(precision), m_Shape(shape), m_Rate(rate), m_PredictionMean(predictionMean) {} bool operator()(double x, double& result) const { return evaluateFunctionOnJointDistribution( - m_WeightStyles, m_Samples, m_Weights, F(), SPlusWeight(), m_IsNonInformative, - x, m_Shape, m_Rate, m_Mean, m_Precision, m_PredictionMean, result); + m_Samples, m_Weights, F(), SPlusWeight(), m_IsNonInformative, x, + m_Shape, m_Rate, m_Mean, m_Precision, m_PredictionMean, result); } private: - const TWeightStyleVec& m_WeightStyles; const TDouble1Vec& m_Samples; - const TDouble4Vec1Vec& m_Weights; + const TDoubleWeightsAry1Vec& m_Weights; bool m_IsNonInformative; double m_Mean; double m_Precision; @@ -246,26 +237,26 @@ class CEvaluateOnSamples : core::CNonCopyable { class CProbabilityOfLessLikelySamples : core::CNonCopyable { public: CProbabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, bool isNonInformative, double mean, double precision, double shape, double rate, double predictionMean) - : m_Calculation(calculation), m_WeightStyles(weightStyles), - m_Samples(samples), m_Weights(weights), m_IsNonInformative(isNonInformative), - m_Mean(mean), m_Precision(precision), m_Shape(shape), m_Rate(rate), + : m_Calculation(calculation), m_Samples(samples), m_Weights(weights), + m_IsNonInformative(isNonInformative), m_Mean(mean), + m_Precision(precision), m_Shape(shape), m_Rate(rate), m_PredictionMean(predictionMean), m_Tail(0) {} bool operator()(double x, double& result) const { + CJointProbabilityOfLessLikelySamples probability; maths_t::ETail tail = maths_t::E_UndeterminedTail; if (!evaluateFunctionOnJointDistribution( - m_WeightStyles, m_Samples, m_Weights, + m_Samples, m_Weights, boost::bind(CTools::CProbabilityOfLessLikelySample(m_Calculation), _1, _2, boost::ref(tail)), CJointProbabilityOfLessLikelySamples::SAddProbability(), m_IsNonInformative, @@ -284,9 +275,8 @@ class CProbabilityOfLessLikelySamples : core::CNonCopyable { private: maths_t::EProbabilityCalculation m_Calculation; - const TWeightStyleVec& m_WeightStyles; const TDouble1Vec& m_Samples; - const TDouble4Vec1Vec& m_Weights; + const TDoubleWeightsAry1Vec& m_Weights; bool m_IsNonInformative; double m_Mean; double m_Precision; @@ -316,9 +306,8 @@ class CProbabilityOfLessLikelySamples : core::CNonCopyable { //! a and b are the prior Gamma shape and rate, respectively. class CLogMarginalLikelihood : core::CNonCopyable { public: - CLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + CLogMarginalLikelihood(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double mean, double precision, double shape, @@ -328,11 +317,12 @@ class CLogMarginalLikelihood : core::CNonCopyable { m_NumberSamples(0.0), m_WeightedNumberSamples(0.0), m_SampleMean(0.0), m_SampleSquareDeviation(0.0), m_Constant(0.0), m_ErrorStatus(maths_t::E_FpNoErrors) { - this->precompute(weightStyles, samples, weights, predictionMean); + this->precompute(samples, weights, predictionMean); } //! Evaluate the log marginal likelihood at the offset \p x. bool operator()(double x, double& result) const { + if (m_ErrorStatus & maths_t::E_FpFailed) { return false; } @@ -359,9 +349,8 @@ class CLogMarginalLikelihood : core::CNonCopyable { private: //! Compute all the constants in the integrand. - void precompute(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + void precompute(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double predictionMean) { m_NumberSamples = 0.0; TMeanVarAccumulator sampleMoments; @@ -369,11 +358,10 @@ class CLogMarginalLikelihood : core::CNonCopyable { try { for (std::size_t i = 0u; i < samples.size(); ++i) { - double n = maths_t::countForUpdate(weightStyles, weights[i]); - double seasonalScale = std::sqrt( - maths_t::seasonalVarianceScale(weightStyles, weights[i])); - double countVarianceScale = - maths_t::countVarianceScale(weightStyles, weights[i]); + double n = maths_t::countForUpdate(weights[i]); + double seasonalScale = + std::sqrt(maths_t::seasonalVarianceScale(weights[i])); + double countVarianceScale = maths_t::countVarianceScale(weights[i]); double w = 1.0 / countVarianceScale; m_NumberSamples += n; if (seasonalScale != 1.0) { @@ -542,9 +530,8 @@ void CNormalMeanPrecConjugate::setToNonInformative(double /*offset*/, double dec *this = nonInformativePrior(this->dataType(), decayRate); } -double CNormalMeanPrecConjugate::adjustOffset(const TWeightStyleVec& /*weightStyles*/, - const TDouble1Vec& /*samples*/, - const TDouble4Vec1Vec& /*weights*/) { +double CNormalMeanPrecConjugate::adjustOffset(const TDouble1Vec& /*samples*/, + const TDoubleWeightsAry1Vec& /*weights*/) { return 0.0; } @@ -552,13 +539,11 @@ double CNormalMeanPrecConjugate::offset() const { return 0.0; } -void CNormalMeanPrecConjugate::addSamples(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights) { +void CNormalMeanPrecConjugate::addSamples(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights) { if (samples.empty()) { return; } - if (samples.size() != weights.size()) { LOG_ERROR(<< "Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '" @@ -566,7 +551,7 @@ void CNormalMeanPrecConjugate::addSamples(const TWeightStyleVec& weightStyles, return; } - this->CPrior::addSamples(weightStyles, samples, weights); + this->CPrior::addSamples(samples, weights); // If {x(i)} denotes the sample vector, the likelihood function is: // likelihood(x | p', m') ~ @@ -623,18 +608,12 @@ void CNormalMeanPrecConjugate::addSamples(const TWeightStyleVec& weightStyles, double numberSamples = 0.0; TMeanVarAccumulator sampleMoments; - try { - for (std::size_t i = 0u; i < samples.size(); ++i) { - double n = maths_t::countForUpdate(weightStyles, weights[i]); - double varianceScale = - maths_t::seasonalVarianceScale(weightStyles, weights[i]) * - maths_t::countVarianceScale(weightStyles, weights[i]); - numberSamples += n; - sampleMoments.add(samples[i], n / varianceScale); - } - } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to update likelihood: " << e.what()); - return; + for (std::size_t i = 0u; i < samples.size(); ++i) { + double n = maths_t::countForUpdate(weights[i]); + double varianceScale = maths_t::seasonalVarianceScale(weights[i]) * + maths_t::countVarianceScale(weights[i]); + numberSamples += n; + sampleMoments.add(samples[i], n / varianceScale); } double scaledNumberSamples = CBasicStatistics::count(sampleMoments); double sampleMean = CBasicStatistics::mean(sampleMoments); @@ -725,21 +704,20 @@ void CNormalMeanPrecConjugate::propagateForwardsByTime(double time) { CNormalMeanPrecConjugate::TDoubleDoublePr CNormalMeanPrecConjugate::marginalLikelihoodSupport() const { - return std::make_pair(boost::numeric::bounds::lowest(), - boost::numeric::bounds::highest()); + return {boost::numeric::bounds::lowest(), + boost::numeric::bounds::highest()}; } double CNormalMeanPrecConjugate::marginalLikelihoodMean() const { return this->isInteger() ? this->mean() - 0.5 : this->mean(); } -double CNormalMeanPrecConjugate::marginalLikelihoodMode(const TWeightStyleVec& /*weightStyles*/, - const TDouble4Vec& /*weights*/) const { +double CNormalMeanPrecConjugate::marginalLikelihoodMode(const TDoubleWeightsAry& /*weights*/) const { return this->marginalLikelihoodMean(); } -double CNormalMeanPrecConjugate::marginalLikelihoodVariance(const TWeightStyleVec& weightStyles, - const TDouble4Vec& weights) const { +double CNormalMeanPrecConjugate::marginalLikelihoodVariance(const TDoubleWeightsAry& weights) const { + if (this->isNonInformative() || m_GammaShape <= 1.0) { return boost::numeric::bounds::highest(); } @@ -753,13 +731,8 @@ double CNormalMeanPrecConjugate::marginalLikelihoodVariance(const TWeightStyleVe // and use the fact that X conditioned on M and P is a normal. The // first term evaluates to 1 / P and the second term 1 / p / t whence... - double varianceScale = 1.0; - try { - varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) * - maths_t::countVarianceScale(weightStyles, weights); - } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to get variance scale: " << e.what()); - } + double varianceScale = maths_t::seasonalVarianceScale(weights) * + maths_t::countVarianceScale(weights); double a = m_GammaShape; double b = m_GammaRate; double t = m_GaussianPrecision; @@ -768,8 +741,7 @@ double CNormalMeanPrecConjugate::marginalLikelihoodVariance(const TWeightStyleVe CNormalMeanPrecConjugate::TDoubleDoublePr CNormalMeanPrecConjugate::marginalLikelihoodConfidenceInterval(double percentage, - const TWeightStyleVec& weightStyles, - const TDouble4Vec& weights) const { + const TDoubleWeightsAry& weights) const { if (this->isNonInformative()) { return this->marginalLikelihoodSupport(); } @@ -780,9 +752,8 @@ CNormalMeanPrecConjugate::marginalLikelihoodConfidenceInterval(double percentage // We use the fact that the marginal likelihood is a t-distribution. try { - double seasonalScale = - std::sqrt(maths_t::seasonalVarianceScale(weightStyles, weights)); - double countVarianceScale = maths_t::countVarianceScale(weightStyles, weights); + double seasonalScale = std::sqrt(maths_t::seasonalVarianceScale(weights)); + double countVarianceScale = maths_t::countVarianceScale(weights); double scaledPrecision = countVarianceScale * m_GaussianPrecision; double scaledRate = countVarianceScale * m_GammaRate; @@ -791,7 +762,7 @@ CNormalMeanPrecConjugate::marginalLikelihoodConfidenceInterval(double percentage double m = this->marginalLikelihoodMean(); if (m_GammaShape > MINIMUM_GAUSSIAN_SHAPE) { - boost::math::normal_distribution<> normal(m_GaussianMean, scale); + boost::math::normal normal(m_GaussianMean, scale); double x1 = boost::math::quantile(normal, (1.0 - percentage) / 2.0) - (this->isInteger() ? 0.5 : 0.0); x1 = seasonalScale != 1.0 ? m + seasonalScale * (x1 - m) : x1; @@ -801,9 +772,9 @@ CNormalMeanPrecConjugate::marginalLikelihoodConfidenceInterval(double percentage : x1; x2 = seasonalScale != 1.0 ? m + seasonalScale * (x2 - m) : x2; LOG_TRACE(<< "x1 = " << x1 << ", x2 = " << x2 << ", scale = " << scale); - return std::make_pair(x1, x2); + return {x1, x2}; } - boost::math::students_t_distribution<> students(2.0 * m_GammaShape); + boost::math::students_t students(2.0 * m_GammaShape); double x1 = m_GaussianMean + scale * boost::math::quantile(students, (1.0 - percentage) / 2.0) - (this->isInteger() ? 0.5 : 0.0); @@ -816,7 +787,7 @@ CNormalMeanPrecConjugate::marginalLikelihoodConfidenceInterval(double percentage : x1; x2 = seasonalScale != 1.0 ? m + seasonalScale * (x2 - m) : x2; LOG_TRACE(<< "x1 = " << x1 << ", x2 = " << x2 << ", scale = " << scale); - return std::make_pair(x1, x2); + return {x1, x2}; } catch (const std::exception& e) { LOG_ERROR(<< "Failed to compute confidence interval: " << e.what()); } @@ -825,9 +796,8 @@ CNormalMeanPrecConjugate::marginalLikelihoodConfidenceInterval(double percentage } maths_t::EFloatingPointErrorStatus -CNormalMeanPrecConjugate::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +CNormalMeanPrecConjugate::jointLogMarginalLikelihood(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& result) const { result = 0.0; @@ -857,8 +827,8 @@ CNormalMeanPrecConjugate::jointLogMarginalLikelihood(const TWeightStyleVec& weig } detail::CLogMarginalLikelihood logMarginalLikelihood( - weightStyles, samples, weights, m_GaussianMean, m_GaussianPrecision, - m_GammaShape, m_GammaRate, this->marginalLikelihoodMean()); + samples, weights, m_GaussianMean, m_GaussianPrecision, m_GammaShape, + m_GammaRate, this->marginalLikelihoodMean()); if (this->isInteger()) { CIntegration::logGaussLegendre( logMarginalLikelihood, 0.0, 1.0, result); @@ -953,7 +923,7 @@ void CNormalMeanPrecConjugate::sampleMarginalLikelihood(std::size_t numberSample << ", numberSamples = " << numberSamples); try { - boost::math::normal_distribution<> normal(m_GaussianMean, std::sqrt(variance)); + boost::math::normal normal(m_GaussianMean, std::sqrt(variance)); for (std::size_t i = 1u; i < numberSamples; ++i) { double q = static_cast(i) / static_cast(numberSamples); @@ -986,7 +956,7 @@ void CNormalMeanPrecConjugate::sampleMarginalLikelihood(std::size_t numberSample double degreesFreedom = 2.0 * m_GammaShape; try { - boost::math::students_t_distribution<> students(degreesFreedom); + boost::math::students_t students(degreesFreedom); double scale = std::sqrt((m_GaussianPrecision + 1.0) / m_GaussianPrecision * m_GammaRate / m_GammaShape); @@ -1045,16 +1015,16 @@ void CNormalMeanPrecConjugate::sampleMarginalLikelihood(std::size_t numberSample } } -bool CNormalMeanPrecConjugate::minusLogJointCdf(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +bool CNormalMeanPrecConjugate::minusLogJointCdf(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const { + using TMinusLogCdf = detail::CEvaluateOnSamples; lowerBound = upperBound = 0.0; - TMinusLogCdf minusLogCdf(weightStyles, samples, weights, this->isNonInformative(), + TMinusLogCdf minusLogCdf(samples, weights, this->isNonInformative(), m_GaussianMean, m_GaussianPrecision, m_GammaShape, m_GammaRate, this->marginalLikelihoodMean()); @@ -1085,18 +1055,18 @@ bool CNormalMeanPrecConjugate::minusLogJointCdf(const TWeightStyleVec& weightSty return true; } -bool CNormalMeanPrecConjugate::minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +bool CNormalMeanPrecConjugate::minusLogJointCdfComplement(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const { + using TMinusLogCdfComplement = detail::CEvaluateOnSamples; lowerBound = upperBound = 0.0; TMinusLogCdfComplement minusLogCdfComplement( - weightStyles, samples, weights, this->isNonInformative(), m_GaussianMean, - m_GaussianPrecision, m_GammaShape, m_GammaRate, this->marginalLikelihoodMean()); + samples, weights, this->isNonInformative(), m_GaussianMean, m_GaussianPrecision, + m_GammaShape, m_GammaRate, this->marginalLikelihoodMean()); if (this->isInteger()) { // If the data are discrete we compute the approximate expectation @@ -1127,19 +1097,18 @@ bool CNormalMeanPrecConjugate::minusLogJointCdfComplement(const TWeightStyleVec& bool CNormalMeanPrecConjugate::probabilityOfLessLikelySamples( maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound, maths_t::ETail& tail) const { + lowerBound = upperBound = 0.0; tail = maths_t::E_UndeterminedTail; detail::CProbabilityOfLessLikelySamples probability( - calculation, weightStyles, samples, weights, this->isNonInformative(), - m_GaussianMean, m_GaussianPrecision, m_GammaShape, m_GammaRate, - this->marginalLikelihoodMean()); + calculation, samples, weights, this->isNonInformative(), m_GaussianMean, + m_GaussianPrecision, m_GammaShape, m_GammaRate, this->marginalLikelihoodMean()); if (this->isInteger()) { // If the data are discrete we compute the approximate expectation @@ -1182,13 +1151,14 @@ void CNormalMeanPrecConjugate::print(const std::string& indent, std::string& res result += "non-informative"; return; } - result += "mean = " + core::CStringUtils::typeToStringPretty(this->marginalLikelihoodMean()) + - " sd = " + - core::CStringUtils::typeToStringPretty( - std::sqrt(this->marginalLikelihoodVariance())); + double mean = this->marginalLikelihoodMean(); + double sd = std::sqrt(this->marginalLikelihoodVariance()); + result += "mean = " + core::CStringUtils::typeToStringPretty(mean); + result += " sd = " + core::CStringUtils::typeToStringPretty(sd); } std::string CNormalMeanPrecConjugate::printJointDensityFunction() const { + if (this->isNonInformative()) { // The non-informative prior is improper and effectively 0 everywhere. return std::string(); @@ -1202,7 +1172,7 @@ std::string CNormalMeanPrecConjugate::printJointDensityFunction() const { boost::math::gamma_distribution<> gamma(m_GammaShape, 1.0 / m_GammaRate); double precision = m_GaussianPrecision * this->precision(); - boost::math::normal_distribution<> gaussian(m_GaussianMean, 1.0 / std::sqrt(precision)); + boost::math::normal gaussian(m_GaussianMean, 1.0 / std::sqrt(precision)); double xStart = boost::math::quantile(gamma, (1.0 - RANGE) / 2.0); double xEnd = boost::math::quantile(gamma, (1.0 + RANGE) / 2.0); @@ -1232,7 +1202,7 @@ std::string CNormalMeanPrecConjugate::printJointDensityFunction() const { y = yStart; for (unsigned int j = 0u; j < POINTS; ++j, y += yIncrement) { double conditionalPrecision = m_GaussianPrecision * x; - boost::math::normal_distribution<> conditionalGaussian( + boost::math::normal conditionalGaussian( m_GaussianMean, 1.0 / std::sqrt(conditionalPrecision)); pdf << (CTools::safePdf(gamma, x) * CTools::safePdf(conditionalGaussian, y)) @@ -1291,8 +1261,8 @@ double CNormalMeanPrecConjugate::precision() const { CNormalMeanPrecConjugate::TDoubleDoublePr CNormalMeanPrecConjugate::confidenceIntervalMean(double percentage) const { if (this->isNonInformative()) { - return std::make_pair(boost::numeric::bounds::lowest(), - boost::numeric::bounds::highest()); + return {boost::numeric::bounds::lowest(), + boost::numeric::bounds::highest()}; } // Compute the symmetric confidence interval around the median of the @@ -1316,7 +1286,7 @@ CNormalMeanPrecConjugate::confidenceIntervalMean(double percentage) const { double lowerPercentile = 0.5 * (1.0 - percentage); double upperPercentile = 0.5 * (1.0 + percentage); - boost::math::students_t_distribution<> students(2.0 * m_GammaShape); + boost::math::students_t students(2.0 * m_GammaShape); double xLower = boost::math::quantile(students, lowerPercentile); xLower = m_GaussianMean + @@ -1325,14 +1295,14 @@ CNormalMeanPrecConjugate::confidenceIntervalMean(double percentage) const { xUpper = m_GaussianMean + xUpper / std::sqrt(m_GaussianPrecision * m_GammaShape / m_GammaRate); - return std::make_pair(xLower, xUpper); + return {xLower, xUpper}; } CNormalMeanPrecConjugate::TDoubleDoublePr CNormalMeanPrecConjugate::confidenceIntervalPrecision(double percentage) const { if (this->isNonInformative()) { - return std::make_pair(boost::numeric::bounds::lowest(), - boost::numeric::bounds::highest()); + return {boost::numeric::bounds::lowest(), + boost::numeric::bounds::highest()}; } percentage /= 100.0; @@ -1342,8 +1312,8 @@ CNormalMeanPrecConjugate::confidenceIntervalPrecision(double percentage) const { // The marginal prior distribution for the precision is gamma. boost::math::gamma_distribution<> gamma(m_GammaShape, 1.0 / m_GammaRate); - return std::make_pair(boost::math::quantile(gamma, lowerPercentile), - boost::math::quantile(gamma, upperPercentile)); + return {boost::math::quantile(gamma, lowerPercentile), + boost::math::quantile(gamma, upperPercentile)}; } bool CNormalMeanPrecConjugate::equalTolerance(const CNormalMeanPrecConjugate& rhs, diff --git a/lib/maths/COneOfNPrior.cc b/lib/maths/COneOfNPrior.cc index c14e987f8a..18bf4a3c3c 100644 --- a/lib/maths/COneOfNPrior.cc +++ b/lib/maths/COneOfNPrior.cc @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -220,14 +221,13 @@ bool COneOfNPrior::needsOffset() const { return false; } -double COneOfNPrior::adjustOffset(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights) { +double COneOfNPrior::adjustOffset(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights) { TMeanAccumulator result; TDouble5Vec penalties; for (auto& model : m_Models) { - double penalty = model.second->adjustOffset(weightStyles, samples, weights); + double penalty = model.second->adjustOffset(samples, weights); penalties.push_back(penalty); result.add(penalty, model.first); } @@ -254,13 +254,11 @@ double COneOfNPrior::offset() const { return offset; } -void COneOfNPrior::addSamples(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights) { +void COneOfNPrior::addSamples(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights) { if (samples.empty()) { return; } - if (samples.size() != weights.size()) { LOG_ERROR(<< "Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '" @@ -268,10 +266,10 @@ void COneOfNPrior::addSamples(const TWeightStyleVec& weightStyles, return; } - this->adjustOffset(weightStyles, samples, weights); + this->adjustOffset(samples, weights); double penalty = CTools::fastLog(this->numberSamples()); - this->CPrior::addSamples(weightStyles, samples, weights); + this->CPrior::addSamples(samples, weights); penalty = (penalty - CTools::fastLog(this->numberSamples())) / 2.0; // For this 1-of-n model we assume that all the data come from one @@ -338,8 +336,7 @@ void COneOfNPrior::addSamples(const TWeightStyleVec& weightStyles, // Update the weights with the marginal likelihoods. double logLikelihood = 0.0; maths_t::EFloatingPointErrorStatus status = - use ? model.second->jointLogMarginalLikelihood(weightStyles, samples, - weights, logLikelihood) + use ? model.second->jointLogMarginalLikelihood(samples, weights, logLikelihood) : maths_t::E_FpOverflowed; if (status & maths_t::E_FpFailed) { @@ -357,7 +354,7 @@ void COneOfNPrior::addSamples(const TWeightStyleVec& weightStyles, } // Update the component prior distribution. - model.second->addSamples(weightStyles, samples, weights); + model.second->addSamples(samples, weights); used.push_back(use); uses.push_back(model.second->participatesInModelSelection()); @@ -376,7 +373,7 @@ void COneOfNPrior::addSamples(const TWeightStyleVec& weightStyles, double n = 0.0; try { for (const auto& weight : weights) { - n += maths_t::count(weightStyles, weight); + n += maths_t::count(weight); } } catch (const std::exception& e) { LOG_ERROR(<< "Failed to add samples: " << e.what()); @@ -434,6 +431,7 @@ void COneOfNPrior::propagateForwardsByTime(double time) { } COneOfNPrior::TDoubleDoublePr COneOfNPrior::marginalLikelihoodSupport() const { + TDoubleDoublePr result(MINUS_INF, INF); // We define this is as the intersection of the component model supports. @@ -449,6 +447,7 @@ COneOfNPrior::TDoubleDoublePr COneOfNPrior::marginalLikelihoodSupport() const { } double COneOfNPrior::marginalLikelihoodMean() const { + if (this->isNonInformative()) { return this->medianModelMean(); } @@ -472,6 +471,7 @@ double COneOfNPrior::marginalLikelihoodMean() const { } double COneOfNPrior::nearestMarginalLikelihoodMean(double value) const { + if (this->isNonInformative()) { return this->medianModelMean(); } @@ -490,24 +490,24 @@ double COneOfNPrior::nearestMarginalLikelihoodMean(double value) const { return result / Z; } -double COneOfNPrior::marginalLikelihoodMode(const TWeightStyleVec& weightStyles, - const TDouble4Vec& weights) const { +double COneOfNPrior::marginalLikelihoodMode(const TDoubleWeightsAry& weights) const { + // We approximate this as the weighted average of the component // model modes. // Declared outside the loop to minimize the number of times // they are created. TDouble1Vec sample(1); - TDouble4Vec1Vec weight(1, weights); + TDoubleWeightsAry1Vec weight(1, weights); TMeanAccumulator mode; for (const auto& model : m_Models) { if (model.second->participatesInModelSelection()) { double wi = model.first; - double mi = model.second->marginalLikelihoodMode(weightStyles, weights); + double mi = model.second->marginalLikelihoodMode(weights); double logLikelihood; sample[0] = mi; - model.second->jointLogMarginalLikelihood(weightStyles, sample, weight, logLikelihood); + model.second->jointLogMarginalLikelihood(sample, weight, logLikelihood); mode.add(mi, wi * std::exp(logLikelihood)); } } @@ -517,8 +517,8 @@ double COneOfNPrior::marginalLikelihoodMode(const TWeightStyleVec& weightStyles, return CTools::truncate(result, support.first, support.second); } -double COneOfNPrior::marginalLikelihoodVariance(const TWeightStyleVec& weightStyles, - const TDouble4Vec& weights) const { +double COneOfNPrior::marginalLikelihoodVariance(const TDoubleWeightsAry& weights) const { + if (this->isNonInformative()) { return INF; } @@ -534,7 +534,7 @@ double COneOfNPrior::marginalLikelihoodVariance(const TWeightStyleVec& weightSty for (const auto& model : m_Models) { double wi = model.first; if (wi > MINIMUM_SIGNIFICANT_WEIGHT) { - result += wi * model.second->marginalLikelihoodVariance(weightStyles, weights); + result += wi * model.second->marginalLikelihoodVariance(weights); Z += wi; } } @@ -543,8 +543,8 @@ double COneOfNPrior::marginalLikelihoodVariance(const TWeightStyleVec& weightSty COneOfNPrior::TDoubleDoublePr COneOfNPrior::marginalLikelihoodConfidenceInterval(double percentage, - const TWeightStyleVec& weightStyles, - const TDouble4Vec& weights) const { + const TDoubleWeightsAry& weights) const { + // We approximate this as the weighted sum of the component model // intervals. To compute the weights we expand all component model // marginal likelihoods about a reasonable estimate for the true @@ -570,22 +570,22 @@ COneOfNPrior::marginalLikelihoodConfidenceInterval(double percentage, for (const auto& model : m_Models) { double weight = model.first; if (weight >= MAXIMUM_RELATIVE_ERROR) { - TDoubleDoublePr interval = model.second->marginalLikelihoodConfidenceInterval( - percentage, weightStyles, weights); + TDoubleDoublePr interval = + model.second->marginalLikelihoodConfidenceInterval(percentage, weights); x1.add(interval.first, weight); x2.add(interval.second, weight); } } LOG_TRACE(<< "x1 = " << x1 << ", x2 = " << x2); - return std::make_pair(CBasicStatistics::mean(x1), CBasicStatistics::mean(x2)); + return {CBasicStatistics::mean(x1), CBasicStatistics::mean(x2)}; } maths_t::EFloatingPointErrorStatus -COneOfNPrior::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +COneOfNPrior::jointLogMarginalLikelihood(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& result) const { + result = 0.0; if (samples.empty()) { @@ -618,8 +618,8 @@ COneOfNPrior::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, for (const auto& model : m_Models) { if (model.second->participatesInModelSelection()) { double logLikelihood; - maths_t::EFloatingPointErrorStatus status = model.second->jointLogMarginalLikelihood( - weightStyles, samples, weights, logLikelihood); + maths_t::EFloatingPointErrorStatus status = + model.second->jointLogMarginalLikelihood(samples, weights, logLikelihood); if (status & maths_t::E_FpFailed) { return status; } @@ -669,6 +669,7 @@ COneOfNPrior::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, void COneOfNPrior::sampleMarginalLikelihood(std::size_t numberSamples, TDouble1Vec& samples) const { + samples.clear(); if (numberSamples == 0 || this->isNonInformative()) { @@ -712,11 +713,11 @@ void COneOfNPrior::sampleMarginalLikelihood(std::size_t numberSamples, } bool COneOfNPrior::minusLogJointCdfImpl(bool complement, - const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const { + lowerBound = upperBound = 0.0; if (samples.empty()) { @@ -756,13 +757,11 @@ bool COneOfNPrior::minusLogJointCdfImpl(bool complement, double li = 0.0; double ui = 0.0; - if (complement && !model.minusLogJointCdfComplement(weightStyles, samples, - weights, li, ui)) { + if (complement && !model.minusLogJointCdfComplement(samples, weights, li, ui)) { LOG_ERROR(<< "Failed computing c.d.f. complement for " << core::CContainerPrinter::print(samples)); return false; - } else if (!complement && - !model.minusLogJointCdf(weightStyles, samples, weights, li, ui)) { + } else if (!complement && !model.minusLogJointCdf(samples, weights, li, ui)) { LOG_ERROR(<< "Failed computing c.d.f. for " << core::CContainerPrinter::print(samples)); return false; @@ -809,31 +808,29 @@ bool COneOfNPrior::minusLogJointCdfImpl(bool complement, return true; } -bool COneOfNPrior::minusLogJointCdf(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +bool COneOfNPrior::minusLogJointCdf(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const { - return this->minusLogJointCdfImpl(false, // complement - weightStyles, samples, weights, lowerBound, upperBound); + return this->minusLogJointCdfImpl(false /*complement*/, samples, weights, + lowerBound, upperBound); } -bool COneOfNPrior::minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +bool COneOfNPrior::minusLogJointCdfComplement(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const { - return this->minusLogJointCdfImpl(true, // complement - weightStyles, samples, weights, lowerBound, upperBound); + return this->minusLogJointCdfImpl(true /*complement*/, samples, weights, + lowerBound, upperBound); } bool COneOfNPrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound, maths_t::ETail& tail) const { + lowerBound = upperBound = 0.0; tail = maths_t::E_UndeterminedTail; @@ -841,7 +838,6 @@ bool COneOfNPrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCalculati LOG_ERROR(<< "Can't compute distribution for empty sample set"); return false; } - if (this->isNonInformative()) { lowerBound = upperBound = 1.0; return true; @@ -876,8 +872,7 @@ bool COneOfNPrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCalculati double modelLowerBound, modelUpperBound; maths_t::ETail modelTail; - if (!model.probabilityOfLessLikelySamples(calculation, weightStyles, - samples, weights, modelLowerBound, + if (!model.probabilityOfLessLikelySamples(calculation, samples, weights, modelLowerBound, modelUpperBound, modelTail)) { // Logging handled at a lower level. return false; @@ -888,7 +883,7 @@ bool COneOfNPrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCalculati lowerBound += weight * modelLowerBound; upperBound += weight * modelUpperBound; - tail_.add(TDoubleTailPr(weight * (modelLowerBound + modelUpperBound), modelTail)); + tail_.add({weight * (modelLowerBound + modelUpperBound), modelTail}); } if (!(lowerBound >= 0.0 && lowerBound <= 1.001) || @@ -923,6 +918,7 @@ bool COneOfNPrior::isNonInformative() const { } void COneOfNPrior::print(const std::string& indent, std::string& result) const { + result += core_t::LINE_ENDING + indent + "one-of-n"; if (this->isNonInformative()) { result += " non-informative"; @@ -985,6 +981,7 @@ COneOfNPrior::TDoubleVec COneOfNPrior::weights() const { } COneOfNPrior::TDoubleVec COneOfNPrior::logWeights() const { + TDoubleVec result; result.reserve(m_Models.size()); @@ -1043,6 +1040,7 @@ bool COneOfNPrior::modelAcceptRestoreTraverser(const SDistributionRestoreParams& } COneOfNPrior::TDoubleSizePr5Vec COneOfNPrior::normalizedLogWeights() const { + TDoubleSizePr5Vec result; double Z = 0.0; for (std::size_t i = 0u; i < m_Models.size(); ++i) { diff --git a/lib/maths/CPoissonMeanConjugate.cc b/lib/maths/CPoissonMeanConjugate.cc index a56cefe743..bf897a6d85 100644 --- a/lib/maths/CPoissonMeanConjugate.cc +++ b/lib/maths/CPoissonMeanConjugate.cc @@ -55,9 +55,7 @@ struct SStaticCast { namespace detail { using TDouble1Vec = core::CSmallVector; -using TDouble4Vec = core::CSmallVector; -using TDouble4Vec1Vec = core::CSmallVector; -using TWeightStyleVec = maths_t::TWeightStyleVec; +using TDoubleWeightsAry1Vec = maths_t::TDoubleWeightsAry1Vec; //! Adds "weight" x "right operand" to the "left operand". struct SPlusWeight { @@ -70,9 +68,6 @@ struct SPlusWeight { //! (integrating over the prior for the Poisson rate) and aggregate the //! results using \p aggregate. //! -//! \param[in] weightStyles Controls the interpretation of the weight(s) that -//! are associated with each sample. See maths_t::ESampleWeightStyle for more -//! details. //! \param[in] samples The weighted samples. //! \param[in] func The function to evaluate. //! \param[in] aggregate The function to aggregate the results of \p func. @@ -82,9 +77,8 @@ struct SPlusWeight { //! \param[in] rate The rate of the rate prior. //! \param[out] result Filled in with the aggregation of results of \p func. template -bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +bool evaluateFunctionOnJointDistribution(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, FUNC func, AGGREGATOR aggregate, double offset, @@ -121,7 +115,7 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, // of this distribution.) for (std::size_t i = 0u; i < samples.size(); ++i) { double x = samples[i] + offset; - double n = maths_t::count(weightStyles, weights[i]); + double n = maths_t::count(weights[i]); result = aggregate(result, func(CTools::SImproperDistribution(), x), n); } } else { @@ -139,18 +133,18 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, // and the error function is significantly cheaper to compute. for (std::size_t i = 0u; i < samples.size(); ++i) { - double n = maths_t::count(weightStyles, weights[i]); + double n = maths_t::count(weights[i]); double x = samples[i] + offset; double mean = shape / rate; if (mean > MINIMUM_GAUSSIAN_MEAN) { double deviation = std::sqrt((rate + 1.0) / rate * mean); - boost::math::normal_distribution<> normal(mean, deviation); + boost::math::normal normal(mean, deviation); result = aggregate(result, func(normal, x), n); } else { double r = shape; double p = rate / (rate + 1.0); - boost::math::negative_binomial_distribution<> negativeBinomial(r, p); + boost::math::negative_binomial negativeBinomial(r, p); result = aggregate(result, func(negativeBinomial, x), n); } } @@ -237,9 +231,8 @@ bool CPoissonMeanConjugate::needsOffset() const { return true; } -double CPoissonMeanConjugate::adjustOffset(const TWeightStyleVec& /*weightStyles*/, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& /*weights*/) { +double CPoissonMeanConjugate::adjustOffset(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& /*weights*/) { if (samples.empty() || CMathsFuncs::beginFinite(samples) == CMathsFuncs::endFinite(samples)) { return 0.0; @@ -264,16 +257,15 @@ double CPoissonMeanConjugate::adjustOffset(const TWeightStyleVec& /*weightStyles return 0.0; } - TWeightStyleVec weightStyle(1, maths_t::E_SampleCountWeight); double offset = OFFSET_MARGIN - minimumSample; TDouble1Vec resamples; this->sampleMarginalLikelihood(ADJUST_OFFSET_SAMPLE_SIZE, resamples); double weight = this->numberSamples() / static_cast(resamples.size()); - TDouble4Vec1Vec weights(resamples.size(), TDouble4Vec(1, weight)); + TDoubleWeightsAry1Vec weights(resamples.size(), maths_t::countWeight(weight)); double before = 0.0; if (!resamples.empty()) { - this->jointLogMarginalLikelihood(CConstantWeights::COUNT, resamples, weights, before); + this->jointLogMarginalLikelihood(resamples, weights, before); } // Reset the parameters. @@ -293,10 +285,10 @@ double CPoissonMeanConjugate::adjustOffset(const TWeightStyleVec& /*weightStyles LOG_TRACE(<< "resamples = " << core::CContainerPrinter::print(resamples) << ", weight = " << weight << ", offset = " << m_Offset); - this->addSamples(weightStyle, resamples, weights); + this->addSamples(resamples, weights); double after; - this->jointLogMarginalLikelihood(CConstantWeights::COUNT, resamples, weights, after); + this->jointLogMarginalLikelihood(resamples, weights, after); return std::min(after - before, 0.0); } @@ -305,13 +297,11 @@ double CPoissonMeanConjugate::offset() const { return m_Offset; } -void CPoissonMeanConjugate::addSamples(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights) { +void CPoissonMeanConjugate::addSamples(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights) { if (samples.empty()) { return; } - if (samples.size() != weights.size()) { LOG_ERROR(<< "Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '" @@ -319,8 +309,8 @@ void CPoissonMeanConjugate::addSamples(const TWeightStyleVec& weightStyles, return; } - this->adjustOffset(weightStyles, samples, weights); - this->CPrior::addSamples(weightStyles, samples, weights); + this->adjustOffset(samples, weights); + this->CPrior::addSamples(samples, weights); // The update of the posterior with n independent samples of the // Poisson distribution comes from: @@ -342,20 +332,15 @@ void CPoissonMeanConjugate::addSamples(const TWeightStyleVec& weightStyles, double numberSamples = 0.0; double sampleSum = 0.0; - try { - for (std::size_t i = 0u; i < samples.size(); ++i) { - double n = maths_t::countForUpdate(weightStyles, weights[i]); - double x = samples[i] + m_Offset; - if (!CMathsFuncs::isFinite(x) || x < 0.0) { - LOG_ERROR(<< "Discarding " << x << " it's not Poisson"); - continue; - } - numberSamples += n; - sampleSum += n * x; + for (std::size_t i = 0u; i < samples.size(); ++i) { + double n = maths_t::countForUpdate(weights[i]); + double x = samples[i] + m_Offset; + if (!CMathsFuncs::isFinite(x) || x < 0.0) { + LOG_ERROR(<< "Discarding " << x << " it's not Poisson"); + continue; } - } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to update likelihood: " << e.what()); - return; + numberSamples += n; + sampleSum += n * x; } m_Shape += sampleSum; @@ -371,7 +356,6 @@ void CPoissonMeanConjugate::propagateForwardsByTime(double time) { LOG_ERROR(<< "Bad propagation time " << time); return; } - if (this->isNonInformative()) { // There is nothing to be done. return; @@ -403,10 +387,11 @@ void CPoissonMeanConjugate::propagateForwardsByTime(double time) { } CPoissonMeanConjugate::TDoubleDoublePr CPoissonMeanConjugate::marginalLikelihoodSupport() const { - return std::make_pair(-m_Offset, boost::numeric::bounds::highest()); + return {-m_Offset, boost::numeric::bounds::highest()}; } double CPoissonMeanConjugate::marginalLikelihoodMean() const { + if (this->isNonInformative()) { return -m_Offset; } @@ -418,8 +403,8 @@ double CPoissonMeanConjugate::marginalLikelihoodMean() const { return this->priorMean() - m_Offset; } -double CPoissonMeanConjugate::marginalLikelihoodMode(const TWeightStyleVec& /*weightStyles*/, - const TDouble4Vec& /*weights*/) const { +double CPoissonMeanConjugate::marginalLikelihoodMode(const TDoubleWeightsAry& /*weights*/) const { + if (this->isNonInformative()) { return -m_Offset; } @@ -437,7 +422,7 @@ double CPoissonMeanConjugate::marginalLikelihoodMode(const TWeightStyleVec& /*we try { double r = m_Shape; double p = m_Rate / (m_Rate + 1.0); - boost::math::negative_binomial_distribution<> negativeBinomial(r, p); + boost::math::negative_binomial negativeBinomial(r, p); return boost::math::mode(negativeBinomial) - m_Offset; } catch (const std::exception& e) { LOG_ERROR(<< "Failed to compute marginal likelihood mode: " << e.what() @@ -447,8 +432,8 @@ double CPoissonMeanConjugate::marginalLikelihoodMode(const TWeightStyleVec& /*we return -m_Offset; } -double CPoissonMeanConjugate::marginalLikelihoodVariance(const TWeightStyleVec& weightStyles, - const TDouble4Vec& weights) const { +double CPoissonMeanConjugate::marginalLikelihoodVariance(const TDoubleWeightsAry& weights) const { + if (this->isNonInformative()) { return boost::numeric::bounds::highest(); } @@ -457,20 +442,14 @@ double CPoissonMeanConjugate::marginalLikelihoodVariance(const TWeightStyleVec& // = E_{R}[ R + (R - a/b)^2 ] // = "prior mean" + "prior variance" - double varianceScale = 1.0; - try { - varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) * - maths_t::countVarianceScale(weightStyles, weights); - } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to get variance scale: " << e.what()); - } + double varianceScale = maths_t::seasonalVarianceScale(weights) * + maths_t::countVarianceScale(weights); return varianceScale * (this->priorMean() + this->priorVariance()); } CPoissonMeanConjugate::TDoubleDoublePr CPoissonMeanConjugate::marginalLikelihoodConfidenceInterval(double percentage, - const TWeightStyleVec& /*weightStyles*/, - const TDouble4Vec& /*weights*/) const { + const TDoubleWeightsAry& /*weights*/) const { if (this->isNonInformative()) { return this->marginalLikelihoodSupport(); } @@ -484,12 +463,12 @@ CPoissonMeanConjugate::marginalLikelihoodConfidenceInterval(double percentage, try { double r = m_Shape; double p = m_Rate / (m_Rate + 1.0); - boost::math::negative_binomial_distribution<> negativeBinomial(r, p); + boost::math::negative_binomial negativeBinomial(r, p); double x1 = boost::math::quantile(negativeBinomial, (1.0 - percentage) / 2.0) - m_Offset; double x2 = percentage > 0.0 ? boost::math::quantile(negativeBinomial, (1.0 + percentage) / 2.0) - m_Offset : x1; - return std::make_pair(x1, x2); + return {x1, x2}; } catch (const std::exception& e) { LOG_ERROR(<< "Failed to compute confidence interval: " << e.what()); } @@ -498,9 +477,8 @@ CPoissonMeanConjugate::marginalLikelihoodConfidenceInterval(double percentage, } maths_t::EFloatingPointErrorStatus -CPoissonMeanConjugate::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +CPoissonMeanConjugate::jointLogMarginalLikelihood(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& result) const { result = 0.0; @@ -508,7 +486,6 @@ CPoissonMeanConjugate::jointLogMarginalLikelihood(const TWeightStyleVec& weightS LOG_ERROR(<< "Can't compute likelihood for empty sample set"); return maths_t::E_FpFailed; } - if (samples.size() != weights.size()) { LOG_ERROR(<< "Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '" @@ -552,7 +529,7 @@ CPoissonMeanConjugate::jointLogMarginalLikelihood(const TWeightStyleVec& weightS double sampleLogFactorialSum = 0.0; for (std::size_t i = 0u; i < samples.size(); ++i) { - double n = maths_t::countForUpdate(weightStyles, weights[i]); + double n = maths_t::countForUpdate(weights[i]); double x = samples[i] + m_Offset; if (x < 0.0) { // Technically, the marginal likelihood is zero here @@ -657,7 +634,7 @@ void CPoissonMeanConjugate::sampleMarginalLikelihood(std::size_t numberSamples, LOG_TRACE(<< "mean = " << mean << ", variance = " << variance); try { - boost::math::normal_distribution<> normal(mean, std::sqrt(variance)); + boost::math::normal normal(mean, std::sqrt(variance)); for (std::size_t i = 1u; i < numberSamples; ++i) { double q = static_cast(i) / static_cast(numberSamples); @@ -747,16 +724,15 @@ void CPoissonMeanConjugate::sampleMarginalLikelihood(std::size_t numberSamples, } } -bool CPoissonMeanConjugate::minusLogJointCdf(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +bool CPoissonMeanConjugate::minusLogJointCdf(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const { lowerBound = upperBound = 0.0; double value; if (!detail::evaluateFunctionOnJointDistribution( - weightStyles, samples, weights, CTools::SMinusLogCdf(), detail::SPlusWeight(), + samples, weights, CTools::SMinusLogCdf(), detail::SPlusWeight(), m_Offset, this->isNonInformative(), m_Shape, m_Rate, value)) { LOG_ERROR(<< "Failed computing c.d.f. for " << core::CContainerPrinter::print(samples)); @@ -767,18 +743,16 @@ bool CPoissonMeanConjugate::minusLogJointCdf(const TWeightStyleVec& weightStyles return true; } -bool CPoissonMeanConjugate::minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +bool CPoissonMeanConjugate::minusLogJointCdfComplement(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const { lowerBound = upperBound = 0.0; double value; if (!detail::evaluateFunctionOnJointDistribution( - weightStyles, samples, weights, CTools::SMinusLogCdfComplement(), - detail::SPlusWeight(), m_Offset, this->isNonInformative(), m_Shape, - m_Rate, value)) { + samples, weights, CTools::SMinusLogCdfComplement(), detail::SPlusWeight(), + m_Offset, this->isNonInformative(), m_Shape, m_Rate, value)) { LOG_ERROR(<< "Failed computing c.d.f. complement for " << core::CContainerPrinter::print(samples)); return false; @@ -789,9 +763,8 @@ bool CPoissonMeanConjugate::minusLogJointCdfComplement(const TWeightStyleVec& we } bool CPoissonMeanConjugate::probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound, maths_t::ETail& tail) const { @@ -803,7 +776,7 @@ bool CPoissonMeanConjugate::probabilityOfLessLikelySamples(maths_t::EProbability CJointProbabilityOfLessLikelySamples probability; if (!detail::evaluateFunctionOnJointDistribution( - weightStyles, samples, weights, + samples, weights, boost::bind(CTools::CProbabilityOfLessLikelySample(calculation), _1, _2, boost::ref(tail_)), CJointProbabilityOfLessLikelySamples::SAddProbability(), m_Offset, @@ -837,6 +810,7 @@ void CPoissonMeanConjugate::print(const std::string& indent, std::string& result } std::string CPoissonMeanConjugate::printJointDensityFunction() const { + if (this->isNonInformative()) { // The non-informative prior is improper and effectively 0 everywhere. return std::string(); @@ -904,6 +878,7 @@ void CPoissonMeanConjugate::acceptPersistInserter(core::CStatePersistInserter& i } double CPoissonMeanConjugate::priorMean() const { + if (this->isNonInformative()) { return 0.0; } @@ -937,6 +912,7 @@ double CPoissonMeanConjugate::priorVariance() const { CPoissonMeanConjugate::TDoubleDoublePr CPoissonMeanConjugate::meanConfidenceInterval(double percentage) const { + if (this->isNonInformative()) { return this->marginalLikelihoodSupport(); } @@ -953,8 +929,8 @@ CPoissonMeanConjugate::meanConfidenceInterval(double percentage) const { try { boost::math::gamma_distribution<> gamma(m_Shape, 1.0 / m_Rate); - return std::make_pair(boost::math::quantile(gamma, lowerPercentile) - m_Offset, - boost::math::quantile(gamma, upperPercentile) - m_Offset); + return {boost::math::quantile(gamma, lowerPercentile) - m_Offset, + boost::math::quantile(gamma, upperPercentile) - m_Offset}; } catch (const std::exception& e) { LOG_ERROR(<< "Failed to compute mean confidence interval: " << e.what() << ", prior shape = " << m_Shape << ", prior rate = " << m_Rate); diff --git a/lib/maths/CPrior.cc b/lib/maths/CPrior.cc index b252693768..b6d23887e0 100644 --- a/lib/maths/CPrior.cc +++ b/lib/maths/CPrior.cc @@ -93,13 +93,11 @@ double CPrior::offsetMargin() const { return 0.0; } -void CPrior::addSamples(const TWeightStyleVec& weightStyles, - const TDouble1Vec& /*samples*/, - const TDouble4Vec1Vec& weights) { +void CPrior::addSamples(const TDouble1Vec& /*samples*/, const TDoubleWeightsAry1Vec& weights) { double n = 0.0; try { for (const auto& weight : weights) { - n += maths_t::countForUpdate(weightStyles, weight); + n += maths_t::countForUpdate(weight); } } catch (const std::exception& e) { LOG_ERROR(<< "Failed to extract sample counts: " << e.what()); @@ -111,9 +109,8 @@ double CPrior::nearestMarginalLikelihoodMean(double /*value*/) const { return this->marginalLikelihoodMean(); } -CPrior::TDouble1Vec CPrior::marginalLikelihoodModes(const TWeightStyleVec& weightStyles, - const TDouble4Vec& weights) const { - return TDouble1Vec{this->marginalLikelihoodMode(weightStyles, weights)}; +CPrior::TDouble1Vec CPrior::marginalLikelihoodModes(const TDoubleWeightsAry& weights) const { + return TDouble1Vec{this->marginalLikelihoodMode(weights)}; } std::string CPrior::print() const { @@ -163,8 +160,8 @@ CPrior::SPlot CPrior::marginalLikelihoodPlot(unsigned int numberPoints, double w for (auto x : plot.s_Abscissa) { double likelihood; - maths_t::EFloatingPointErrorStatus status = this->jointLogMarginalLikelihood( - CConstantWeights::COUNT, {x}, CConstantWeights::SINGLE_UNIT, likelihood); + maths_t::EFloatingPointErrorStatus status = + this->jointLogMarginalLikelihood({x}, TWeights::SINGLE_UNIT, likelihood); if (status & maths_t::E_FpFailed) { // Ignore point. } else if (status & maths_t::E_FpOverflowed) { @@ -201,7 +198,8 @@ double CPrior::unmarginalizedParameters() const { void CPrior::adjustOffsetResamples(double minimumSample, TDouble1Vec& resamples, - TDouble4Vec1Vec& resamplesWeights) const { + TDoubleWeightsAry1Vec& resamplesWeights) const { + this->sampleMarginalLikelihood(ADJUST_OFFSET_SAMPLE_SIZE, resamples); std::size_t n = resamples.size(); resamples.erase(std::remove_if(resamples.begin(), resamples.end(), @@ -218,13 +216,12 @@ void CPrior::adjustOffsetResamples(double minimumSample, double resamplesWeight = 1.0; if (n > 0) { resamplesWeight = this->numberSamples() / static_cast(n); - resamplesWeights.resize(n, TDouble4Vec(1, resamplesWeight)); + resamplesWeights.resize(n, maths_t::countWeight(resamplesWeight)); } } -double CPrior::adjustOffsetWithCost(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +double CPrior::adjustOffsetWithCost(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, COffsetCost& cost, CApplyOffset& apply) { if (samples.empty() || @@ -255,7 +252,7 @@ double CPrior::adjustOffsetWithCost(const TWeightStyleVec& weightStyles, double offset = margin - minimumSample; offset *= (offset < 0.0 ? (1.0 - EPS) : (1.0 + EPS)); - cost.samples(weightStyles, samples, weights); + cost.samples(samples, weights); cost.resample(minimumSample); apply.resample(minimumSample); @@ -265,12 +262,11 @@ double CPrior::adjustOffsetWithCost(const TWeightStyleVec& weightStyles, } TDouble1Vec resamples; - TDouble4Vec1Vec resamplesWeights; + TDoubleWeightsAry1Vec resamplesWeights; this->adjustOffsetResamples(minimumSample, resamples, resamplesWeights); double before; - this->jointLogMarginalLikelihood(CConstantWeights::COUNT, resamples, - resamplesWeights, before); + this->jointLogMarginalLikelihood(resamples, resamplesWeights, before); double maximumSample = *std::max_element(samples.begin(), samples.end()); double range = resamples.empty() @@ -296,8 +292,7 @@ double CPrior::adjustOffsetWithCost(const TWeightStyleVec& weightStyles, apply(offset); double after; - this->jointLogMarginalLikelihood(CConstantWeights::COUNT, resamples, - resamplesWeights, after); + this->jointLogMarginalLikelihood(resamples, resamplesWeights, after); return std::min(after - before, 0.0); } @@ -329,9 +324,8 @@ bool CPrior::CModelFilter::operator()(EPrior model) const { ////////// CPrior::CLogMarginalLikelihood Implementation ////////// CPrior::CLogMarginalLikelihood::CLogMarginalLikelihood(const CPrior& prior, - const TWeightStyleVec& weightStyles, - const TDouble4Vec1Vec& weights) - : m_Prior(&prior), m_WeightStyles(&weightStyles), m_Weights(&weights), m_X(1) { + const TDoubleWeightsAry1Vec& weights) + : m_Prior(&prior), m_Weights(&weights), m_X(1) { } double CPrior::CLogMarginalLikelihood::operator()(double x) const { @@ -345,21 +339,17 @@ double CPrior::CLogMarginalLikelihood::operator()(double x) const { bool CPrior::CLogMarginalLikelihood::operator()(double x, double& result) const { m_X[0] = x; - return !(m_Prior->jointLogMarginalLikelihood(*m_WeightStyles, m_X, *m_Weights, result) & - maths_t::E_FpFailed); + return !(m_Prior->jointLogMarginalLikelihood(m_X, *m_Weights, result) & maths_t::E_FpFailed); } ////////// CPrior::COffsetParameters Implementation ////////// CPrior::COffsetParameters::COffsetParameters(CPrior& prior) - : m_Prior(&prior), m_WeightStyles(nullptr), m_Samples(nullptr), - m_Weights(nullptr), m_Resamples(0), m_ResamplesWeights(0) { + : m_Prior(&prior), m_Samples(nullptr), m_Weights(nullptr) { } -void CPrior::COffsetParameters::samples(const maths_t::TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights) { - m_WeightStyles = &weightStyles; +void CPrior::COffsetParameters::samples(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights) { m_Samples = &samples; m_Weights = &weights; } @@ -372,15 +362,11 @@ CPrior& CPrior::COffsetParameters::prior() const { return *m_Prior; } -const maths_t::TWeightStyleVec& CPrior::COffsetParameters::weightStyles() const { - return *m_WeightStyles; -} - const CPrior::TDouble1Vec& CPrior::COffsetParameters::samples() const { return *m_Samples; } -const CPrior::TDouble4Vec1Vec& CPrior::COffsetParameters::weights() const { +const CPrior::TDoubleWeightsAry1Vec& CPrior::COffsetParameters::weights() const { return *m_Weights; } @@ -388,7 +374,7 @@ const CPrior::TDouble1Vec& CPrior::COffsetParameters::resamples() const { return m_Resamples; } -const CPrior::TDouble4Vec1Vec& CPrior::COffsetParameters::resamplesWeights() const { +const CPrior::TDoubleWeightsAry1Vec& CPrior::COffsetParameters::resamplesWeights() const { return m_ResamplesWeights; } @@ -404,8 +390,8 @@ double CPrior::COffsetCost::operator()(double offset) const { void CPrior::COffsetCost::resetPriors(double offset) const { this->prior().setToNonInformative(offset, this->prior().decayRate()); - this->prior().addSamples(TWeights::COUNT, this->resamples(), this->resamplesWeights()); - this->prior().addSamples(this->weightStyles(), this->samples(), this->weights()); + this->prior().addSamples(this->resamples(), this->resamplesWeights()); + this->prior().addSamples(this->samples(), this->weights()); } double CPrior::COffsetCost::computeCost(double offset) const { @@ -413,7 +399,7 @@ double CPrior::COffsetCost::computeCost(double offset) const { maths_t::EFloatingPointErrorStatus status; if (this->resamples().size() > 0) { status = this->prior().jointLogMarginalLikelihood( - TWeights::COUNT, this->resamples(), this->resamplesWeights(), resamplesLogLikelihood); + this->resamples(), this->resamplesWeights(), resamplesLogLikelihood); if (status != maths_t::E_FpNoErrors) { LOG_ERROR(<< "Failed evaluating log-likelihood at " << offset << " for samples " << core::CContainerPrinter::print(this->resamples()) << " and weights " @@ -423,7 +409,7 @@ double CPrior::COffsetCost::computeCost(double offset) const { } double samplesLogLikelihood; status = this->prior().jointLogMarginalLikelihood( - this->weightStyles(), this->samples(), this->weights(), samplesLogLikelihood); + this->samples(), this->weights(), samplesLogLikelihood); if (status != maths_t::E_FpNoErrors) { LOG_ERROR(<< "Failed evaluating log-likelihood at " << offset << " for " << core::CContainerPrinter::print(this->samples()) << " and weights " @@ -440,7 +426,7 @@ CPrior::CApplyOffset::CApplyOffset(CPrior& prior) : COffsetParameters(prior) { void CPrior::CApplyOffset::operator()(double offset) const { this->prior().setToNonInformative(offset, this->prior().decayRate()); - this->prior().addSamples(TWeights::COUNT, this->resamples(), this->resamplesWeights()); + this->prior().addSamples(this->resamples(), this->resamplesWeights()); } } } diff --git a/lib/maths/CTimeSeriesChangeDetector.cc b/lib/maths/CTimeSeriesChangeDetector.cc index e5415b823b..e59eec380b 100644 --- a/lib/maths/CTimeSeriesChangeDetector.cc +++ b/lib/maths/CTimeSeriesChangeDetector.cc @@ -36,8 +36,6 @@ using namespace time_series_change_detector_detail; namespace { using TDouble1Vec = core::CSmallVector; -using TDouble4Vec = core::CSmallVector; -using TDouble4Vec1Vec = core::CSmallVector; using TOptionalChangeDescription = CUnivariateTimeSeriesChangeDetector::TOptionalChangeDescription; const std::string MINIMUM_TIME_TO_DETECT{"a"}; const std::string MAXIMUM_TIME_TO_DETECT{"b"}; @@ -91,9 +89,7 @@ CUnivariateTimeSeriesChangeDetector::CUnivariateTimeSeriesChangeDetector( m_ChangeModels{ std::make_shared(trendModel, residualModel), std::make_shared(trendModel, residualModel), - std::make_shared(trendModel, - residualModel, - -core::constants::HOUR), + std::make_shared(trendModel, residualModel, -core::constants::HOUR), std::make_shared(trendModel, residualModel, +core::constants::HOUR)} { @@ -217,9 +213,8 @@ bool CUnivariateTimeSeriesChangeDetector::stopTesting() const { return false; } -void CUnivariateTimeSeriesChangeDetector::addSamples(const TWeightStyleVec& weightStyles, - const TTimeDoublePr1Vec& samples, - const TDouble4Vec1Vec& weights) { +void CUnivariateTimeSeriesChangeDetector::addSamples(const TTimeDoublePr1Vec& samples, + const TDoubleWeightsAry1Vec& weights) { for (const auto& sample : samples) { m_TimeRange.add(sample.first); } @@ -227,7 +222,7 @@ void CUnivariateTimeSeriesChangeDetector::addSamples(const TWeightStyleVec& weig ++m_SampleCount; for (auto& model : m_ChangeModels) { - model->addSamples(m_SampleCount, weightStyles, samples, weights); + model->addSamples(m_SampleCount, samples, weights); } } @@ -309,25 +304,21 @@ double CUnivariateChangeModel::expectedLogLikelihood() const { return m_ExpectedLogLikelihood; } -void CUnivariateChangeModel::updateLogLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights) { +void CUnivariateChangeModel::updateLogLikelihood(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights) { double logLikelihood{}; - if (m_ResidualModel->jointLogMarginalLikelihood( - weightStyles, samples, weights, logLikelihood) == maths_t::E_FpNoErrors) { + if (m_ResidualModel->jointLogMarginalLikelihood(samples, weights, logLikelihood) == + maths_t::E_FpNoErrors) { m_LogLikelihood += logLikelihood; } } -void CUnivariateChangeModel::updateExpectedLogLikelihood(const TWeightStyleVec& weightStyles, - const TDouble4Vec1Vec& weights) { +void CUnivariateChangeModel::updateExpectedLogLikelihood(const TDoubleWeightsAry1Vec& weights) { for (const auto& weight : weights) { double expectedLogLikelihood{}; - TDouble4Vec1Vec weight_{weight}; if (m_ResidualModel->expectation( - maths::CPrior::CLogMarginalLikelihood{*m_ResidualModel, weightStyles, weight_}, - EXPECTED_LOG_LIKELIHOOD_NUMBER_INTERVALS, expectedLogLikelihood, - weightStyles, weight)) { + maths::CPrior::CLogMarginalLikelihood{*m_ResidualModel, {weight}}, + EXPECTED_LOG_LIKELIHOOD_NUMBER_INTERVALS, expectedLogLikelihood, weight)) { m_ExpectedLogLikelihood += expectedLogLikelihood; } } @@ -378,33 +369,24 @@ TOptionalChangeDescription CUnivariateNoChangeModel::change() const { } void CUnivariateNoChangeModel::addSamples(const std::size_t count, - TWeightStyleVec weightStyles, const TTimeDoublePr1Vec& samples_, - TDouble4Vec1Vec weights) { + TDoubleWeightsAry1Vec weights) { // See, for example, CUnivariateLevelShiftModel::addSamples // for an explanation of the delay updating the log-likelihood. if (count >= COUNT_TO_INITIALIZE) { - CPrior& residualModel{this->residualModel()}; - TDouble1Vec samples; samples.reserve(samples_.size()); for (std::size_t i = 0u; i < samples_.size(); ++i) { core_t::TTime time{samples_[i].first}; double value{samples_[i].second}; - double seasonalScale{maths_t::seasonalVarianceScale(weightStyles, weights[i])}; double sample{this->trendModel().detrend(time, value, 0.0)}; - double weight{tailWinsorisationWeight(residualModel, 0.2, seasonalScale, sample)}; samples.push_back(sample); - maths_t::setWeight(maths_t::E_SampleWinsorisationWeight, weight, - weightStyles, weights[i]); } - for (auto& weight : weights) { - maths_t::setWeight(maths_t::E_SampleWinsorisationWeight, 1.0, - weightStyles, weight); + maths_t::setWinsorisationWeight(1.0, weight); } - this->updateLogLikelihood(weightStyles, samples, weights); + this->updateLogLikelihood(samples, weights); } } @@ -461,9 +443,8 @@ TOptionalChangeDescription CUnivariateLevelShiftModel::change() const { } void CUnivariateLevelShiftModel::addSamples(const std::size_t count, - TWeightStyleVec weightStyles, const TTimeDoublePr1Vec& samples_, - TDouble4Vec1Vec weights) { + TDoubleWeightsAry1Vec weights) { const CTimeSeriesDecompositionInterface& trendModel{this->trendModel()}; // We delay updating the log-likelihood because early on the @@ -481,24 +462,22 @@ void CUnivariateLevelShiftModel::addSamples(const std::size_t count, for (std::size_t i = 0u; i < samples_.size(); ++i) { core_t::TTime time{samples_[i].first}; double value{samples_[i].second}; - double seasonalScale{maths_t::seasonalVarianceScale(weightStyles, weights[i])}; + double seasonalScale{maths_t::seasonalVarianceScale(weights[i])}; double sample{trendModel.detrend(time, value, 0.0) - shift}; - double weight{tailWinsorisationWeight(residualModel, 0.2, seasonalScale, sample)}; + double weight{tailWinsorisationWeight(residualModel, 1.0, seasonalScale, sample)}; samples.push_back(sample); - maths_t::setWeight(maths_t::E_SampleWinsorisationWeight, weight, - weightStyles, weights[i]); - m_SampleCount += maths_t::count(weightStyles, weights[i]); + maths_t::setWinsorisationWeight(weight, weights[i]); + m_SampleCount += maths_t::count(weights[i]); } - residualModel.addSamples(weightStyles, samples, weights); + residualModel.addSamples(samples, weights); residualModel.propagateForwardsByTime(1.0); for (auto& weight : weights) { - maths_t::setWeight(maths_t::E_SampleWinsorisationWeight, 1.0, - weightStyles, weight); + maths_t::setWinsorisationWeight(1.0, weight); } - this->updateLogLikelihood(weightStyles, samples, weights); - this->updateExpectedLogLikelihood(weightStyles, weights); + this->updateLogLikelihood(samples, weights); + this->updateExpectedLogLikelihood(weights); } for (std::size_t i = 0u; i < samples_.size(); ++i) { @@ -565,9 +544,8 @@ CUnivariateLinearScaleModel::change() const { } void CUnivariateLinearScaleModel::addSamples(const std::size_t count, - TWeightStyleVec weightStyles, const TTimeDoublePr1Vec& samples_, - TDouble4Vec1Vec weights) { + TDoubleWeightsAry1Vec weights) { const CTimeSeriesDecompositionInterface& trendModel{this->trendModel()}; // We delay updating the log-likelihood because early on the @@ -596,25 +574,23 @@ void CUnivariateLinearScaleModel::addSamples(const std::size_t count, for (std::size_t i = 0u; i < samples_.size(); ++i) { core_t::TTime time{samples_[i].first}; double value{samples_[i].second}; - double seasonalScale{maths_t::seasonalVarianceScale(weightStyles, weights[i])}; + double seasonalScale{maths_t::seasonalVarianceScale(weights[i])}; double prediction{CBasicStatistics::mean(trendModel.value(time, 0.0))}; double sample{value - scale * prediction}; - double weight{tailWinsorisationWeight(residualModel, 0.2, seasonalScale, sample)}; + double weight{tailWinsorisationWeight(residualModel, 1.0, seasonalScale, sample)}; samples.push_back(sample); - maths_t::setWeight(maths_t::E_SampleWinsorisationWeight, weight, - weightStyles, weights[i]); - m_SampleCount += maths_t::count(weightStyles, weights[i]); + maths_t::setWinsorisationWeight(weight, weights[i]); + m_SampleCount += maths_t::count(weights[i]); } - residualModel.addSamples(weightStyles, samples, weights); + residualModel.addSamples(samples, weights); residualModel.propagateForwardsByTime(1.0); for (auto& weight : weights) { - maths_t::setWeight(maths_t::E_SampleWinsorisationWeight, 1.0, - weightStyles, weight); + maths_t::setWinsorisationWeight(1.0, weight); } - this->updateLogLikelihood(weightStyles, samples, weights); - this->updateExpectedLogLikelihood(weightStyles, weights); + this->updateLogLikelihood(samples, weights); + this->updateExpectedLogLikelihood(weights); } } @@ -668,9 +644,8 @@ TOptionalChangeDescription CUnivariateTimeShiftModel::change() const { } void CUnivariateTimeShiftModel::addSamples(const std::size_t count, - TWeightStyleVec weightStyles, const TTimeDoublePr1Vec& samples_, - TDouble4Vec1Vec weights) { + TDoubleWeightsAry1Vec weights) { // See, for example, CUnivariateLevelShiftModel::addSamples // for an explanation of the delay updating the log-likelihood. @@ -682,23 +657,21 @@ void CUnivariateTimeShiftModel::addSamples(const std::size_t count, for (std::size_t i = 0u; i < samples_.size(); ++i) { core_t::TTime time{samples_[i].first}; double value{samples_[i].second}; - double seasonalScale{maths_t::seasonalVarianceScale(weightStyles, weights[i])}; + double seasonalScale{maths_t::seasonalVarianceScale(weights[i])}; double sample{this->trendModel().detrend(time + m_Shift, value, 0.0)}; - double weight{tailWinsorisationWeight(residualModel, 0.2, seasonalScale, sample)}; + double weight{tailWinsorisationWeight(residualModel, 1.0, seasonalScale, sample)}; samples.push_back(sample); - maths_t::setWeight(maths_t::E_SampleWinsorisationWeight, weight, - weightStyles, weights[i]); + maths_t::setWinsorisationWeight(weight, weights[i]); } - residualModel.addSamples(weightStyles, samples, weights); + residualModel.addSamples(samples, weights); residualModel.propagateForwardsByTime(1.0); for (auto& weight : weights) { - maths_t::setWeight(maths_t::E_SampleWinsorisationWeight, 1.0, - weightStyles, weight); + maths_t::setWinsorisationWeight(1.0, weight); } - this->updateLogLikelihood(weightStyles, samples, weights); - this->updateExpectedLogLikelihood(weightStyles, weights); + this->updateLogLikelihood(samples, weights); + this->updateExpectedLogLikelihood(weights); } } diff --git a/lib/maths/CTimeSeriesDecomposition.cc b/lib/maths/CTimeSeriesDecomposition.cc index a8d609f9f9..288fafbea3 100644 --- a/lib/maths/CTimeSeriesDecomposition.cc +++ b/lib/maths/CTimeSeriesDecomposition.cc @@ -213,8 +213,7 @@ bool CTimeSeriesDecomposition::initialized() const { bool CTimeSeriesDecomposition::addPoint(core_t::TTime time, double value, - const maths_t::TWeightStyleVec& weightStyles, - const maths_t::TDouble4Vec& weights) { + const maths_t::TDoubleWeightsAry& weights) { CComponents::CScopeNotifyOnStateChange result{m_Components}; time += m_TimeShift; @@ -227,7 +226,6 @@ bool CTimeSeriesDecomposition::addPoint(core_t::TTime time, SAddValue message{time, lastTime, value, - weightStyles, weights, CBasicStatistics::mean(this->value(time, 0.0, E_TrendForced)), CBasicStatistics::mean(this->value(time, 0.0, E_Seasonal)), diff --git a/lib/maths/CTimeSeriesDecompositionDetail.cc b/lib/maths/CTimeSeriesDecompositionDetail.cc index f77669a945..a16eab07c6 100644 --- a/lib/maths/CTimeSeriesDecompositionDetail.cc +++ b/lib/maths/CTimeSeriesDecompositionDetail.cc @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -360,20 +361,18 @@ CTimeSeriesDecompositionDetail::SMessage::SMessage(core_t::TTime time, core_t::T //////// SAddValue //////// -CTimeSeriesDecompositionDetail::SAddValue::SAddValue( - core_t::TTime time, - core_t::TTime lastTime, - double value, - const maths_t::TWeightStyleVec& weightStyles, - const maths_t::TDouble4Vec& weights, - double trend, - double seasonal, - double calendar, - const TPredictor& predictor, - const CPeriodicityHypothesisTestsConfig& periodicityTestConfig) - : SMessage{time, lastTime}, s_Value{value}, s_WeightStyles{weightStyles}, - s_Weights{weights}, s_Trend{trend}, s_Seasonal{seasonal}, s_Calendar{calendar}, - s_Predictor{predictor}, s_PeriodicityTestConfig{periodicityTestConfig} { +CTimeSeriesDecompositionDetail::SAddValue::SAddValue(core_t::TTime time, + core_t::TTime lastTime, + double value, + const maths_t::TDoubleWeightsAry& weights, + double trend, + double seasonal, + double calendar, + const TPredictor& predictor, + const CPeriodicityHypothesisTestsConfig& periodicityTestConfig) + : SMessage{time, lastTime}, s_Value{value}, s_Weights{weights}, s_Trend{trend}, + s_Seasonal{seasonal}, s_Calendar{calendar}, s_Predictor{predictor}, + s_PeriodicityTestConfig{periodicityTestConfig} { } //////// SDetectedSeasonal //////// @@ -529,9 +528,8 @@ void CTimeSeriesDecompositionDetail::CPeriodicityTest::swap(CPeriodicityTest& ot void CTimeSeriesDecompositionDetail::CPeriodicityTest::handle(const SAddValue& message) { core_t::TTime time{message.s_Time}; double value{message.s_Value}; - const maths_t::TWeightStyleVec& weightStyles{message.s_WeightStyles}; - const maths_t::TDouble4Vec& weights{message.s_Weights}; - double weight{maths_t::countForUpdate(weightStyles, weights)}; + const maths_t::TDoubleWeightsAry& weights{message.s_Weights}; + double weight{maths_t::countForUpdate(weights)}; this->test(message); @@ -678,6 +676,7 @@ void CTimeSeriesDecompositionDetail::CPeriodicityTest::apply(std::size_t symbol, bool CTimeSeriesDecompositionDetail::CPeriodicityTest::shouldTest(const TExpandingWindowPtr& window, core_t::TTime time) const { + // We need to test more frequently than when we compress, because // this only happens after we've seen 336 buckets, this would thus // significantly delay when we first detect a daily periodic for @@ -696,6 +695,7 @@ bool CTimeSeriesDecompositionDetail::CPeriodicityTest::shouldTest(const TExpandi } CExpandingWindow* CTimeSeriesDecompositionDetail::CPeriodicityTest::newWindow(ETest test) const { + using TTimeCRng = CExpandingWindow::TTimeCRng; auto newWindow = [this](const TTimeVec& bucketLengths) { @@ -785,14 +785,13 @@ void CTimeSeriesDecompositionDetail::CCalendarTest::handle(const SAddValue& mess core_t::TTime time{message.s_Time}; double error{message.s_Value - message.s_Trend - message.s_Seasonal - message.s_Calendar}; - const maths_t::TWeightStyleVec& weightStyles{message.s_WeightStyles}; - const maths_t::TDouble4Vec& weights{message.s_Weights}; + const maths_t::TDoubleWeightsAry& weights{message.s_Weights}; this->test(message); switch (m_Machine.state()) { case CC_TEST: - m_Test->add(time, error, maths_t::countForUpdate(weightStyles, weights)); + m_Test->add(time, error, maths_t::countForUpdate(weights)); break; case CC_NOT_TESTING: break; @@ -1016,6 +1015,7 @@ bool CTimeSeriesDecompositionDetail::CComponents::acceptRestoreTraverser( void CTimeSeriesDecompositionDetail::CComponents::acceptPersistInserter( core::CStatePersistInserter& inserter) const { + inserter.insertValue(VERSION_6_3_TAG, ""); inserter.insertLevel( COMPONENTS_MACHINE_6_3_TAG, @@ -1063,8 +1063,7 @@ void CTimeSeriesDecompositionDetail::CComponents::handle(const SAddValue& messag double trend{message.s_Trend}; double seasonal{message.s_Seasonal}; double calendar{message.s_Calendar}; - const maths_t::TWeightStyleVec& weightStyles{message.s_WeightStyles}; - const maths_t::TDouble4Vec& weights{message.s_Weights}; + const maths_t::TDoubleWeightsAry& weights{message.s_Weights}; TSeasonalComponentPtrVec seasonalComponents; TCalendarComponentPtrVec calendarComponents; @@ -1080,7 +1079,7 @@ void CTimeSeriesDecompositionDetail::CComponents::handle(const SAddValue& messag m_Calendar->componentsAndErrors(time, calendarComponents, calendarErrors); } - double weight{maths_t::countForUpdate(weightStyles, weights)}; + double weight{maths_t::countForUpdate(weights)}; std::size_t m{seasonalComponents.size()}; std::size_t n{calendarComponents.size()}; @@ -1877,10 +1876,12 @@ bool CTimeSeriesDecompositionDetail::CComponents::SSeasonal::prune(core_t::TTime shifted.reserve(s_Components.size()); for (auto& component : s_Components) { const CSeasonalTime& time_ = component.time(); - if (std::find_if(shifted.begin(), shifted.end(), [&time_](const TTimeTimePr& window) { - return !(time_.windowEnd() <= window.first || - time_.windowStart() >= window.second); - }) == shifted.end()) { + auto containsWindow = [&time_](const TTimeTimePr& window) { + return !(time_.windowEnd() <= window.first || + time_.windowStart() >= window.second); + }; + if (std::find_if(shifted.begin(), shifted.end(), + containsWindow) == shifted.end()) { component.shiftLevel(shift.second); } } diff --git a/lib/maths/CTimeSeriesDecompositionStub.cc b/lib/maths/CTimeSeriesDecompositionStub.cc index 0a1f74a660..e7249eccaa 100644 --- a/lib/maths/CTimeSeriesDecompositionStub.cc +++ b/lib/maths/CTimeSeriesDecompositionStub.cc @@ -34,8 +34,7 @@ bool CTimeSeriesDecompositionStub::initialized() const { bool CTimeSeriesDecompositionStub::addPoint(core_t::TTime /*time*/, double /*value*/, - const maths_t::TWeightStyleVec& /*weightStyles*/, - const maths_t::TDouble4Vec& /*weights*/) { + const maths_t::TDoubleWeightsAry& /*weights*/) { return false; } diff --git a/lib/maths/CTimeSeriesModel.cc b/lib/maths/CTimeSeriesModel.cc index 0006a5d5ad..78c879e3dd 100644 --- a/lib/maths/CTimeSeriesModel.cc +++ b/lib/maths/CTimeSeriesModel.cc @@ -38,16 +38,12 @@ namespace { using TDoubleDoublePr = std::pair; using TSizeDoublePr = std::pair; using TTimeDoublePr = std::pair; +using TSizeVec = std::vector; using TDouble1Vec = core::CSmallVector; using TDouble2Vec = core::CSmallVector; -using TDouble4Vec = core::CSmallVector; using TDouble10Vec = core::CSmallVector; -using TDouble4Vec1Vec = core::CSmallVector; using TDouble10Vec1Vec = core::CSmallVector; using TDouble10Vec2Vec = core::CSmallVector; -using TDouble10Vec4Vec = core::CSmallVector; -using TDouble10Vec4Vec1Vec = core::CSmallVector; -using TSizeVec = std::vector; using TSize1Vec = core::CSmallVector; using TSize2Vec = core::CSmallVector; using TSize2Vec1Vec = core::CSmallVector; @@ -108,32 +104,19 @@ TOptionalSize randomlySample(CPRNG::CXorOShiro128Plus& rng, const CModelAddSamplesParams& params, core_t::TTime bucketLength, const TSizeVec& indices) { - using TDouble2Vec4Vec = core::CSmallVector; - - double weight{1.0}; - { - auto i = std::find(params.weightStyles().begin(), params.weightStyles().end(), - maths_t::E_SampleWinsorisationWeight); - if (i != params.weightStyles().end()) { - std::ptrdiff_t index{i - params.weightStyles().begin()}; - auto addWeight = [index](TMeanAccumulator mean, const TDouble2Vec4Vec& weight_) { - mean.add(weight_[index]); - return mean; - }; - TMeanAccumulator mean{std::accumulate(params.trendWeights().begin(), - params.trendWeights().end(), - TMeanAccumulator{}, addWeight)}; - weight = CBasicStatistics::mean(mean); - } - } - + auto addWeight = [](TMeanAccumulator mean, const maths_t::TDouble2VecWeightsAry& weight) { + mean.add(maths_t::winsorisationWeight(weight)[0]); + return mean; + }; + TMeanAccumulator weight{std::accumulate(params.trendWeights().begin(), + params.trendWeights().end(), + TMeanAccumulator{}, addWeight)}; double p{SLIDING_WINDOW_SIZE * static_cast(bucketLength) / - static_cast(core::constants::DAY) * weight}; + static_cast(core::constants::DAY) * CBasicStatistics::mean(weight)}; if (p >= 1.0 || CSampling::uniformSample(rng, 0.0, 1.0) < p) { std::size_t i{CSampling::uniformSample(rng, 0, indices.size())}; return indices[i]; } - return TOptionalSize{}; } @@ -218,13 +201,13 @@ double tailWinsorisationWeight(const CPrior& prior, double derate, double scale, double lowerBound; double upperBound; - if (!prior.minusLogJointCdf(CConstantWeights::SEASONAL_VARIANCE, {value}, - {{scale}}, lowerBound, upperBound)) { + if (!prior.minusLogJointCdf({value}, {maths_t::seasonalVarianceScaleWeight(scale)}, + lowerBound, upperBound)) { return 1.0; } if (upperBound < MINUS_LOG_TOLERANCE && - !prior.minusLogJointCdfComplement(CConstantWeights::SEASONAL_VARIANCE, {value}, - {{scale}}, lowerBound, upperBound)) { + !prior.minusLogJointCdfComplement( + {value}, {maths_t::seasonalVarianceScaleWeight(scale)}, lowerBound, upperBound)) { return 1.0; } @@ -419,15 +402,15 @@ class CTimeSeriesAnomalyModel { //! significantly anomalous. static const double LOG_SMALL_PROBABILITY; //! A unit weight. - static const TDouble10Vec4Vec1Vec UNIT; + static const maths_t::TDouble10VecWeightsAry1Vec UNIT; private: //! Update the appropriate anomaly model with \p anomaly. void sample(core_t::TTime time, const CAnomaly& anomaly, double weight) { std::size_t index(anomaly.positive() ? 0 : 1); TDouble10Vec1Vec features{anomaly.features(this->scale(time))}; - m_AnomalyFeatureModels[index].addSamples(CConstantWeights::COUNT, features, - {{TDouble10Vec(2, weight)}}); + m_AnomalyFeatureModels[index].addSamples(features, + {maths_t::countWeight(weight, 2)}); } //! Get the scaled time. @@ -534,8 +517,7 @@ void CTimeSeriesAnomalyModel::probability(const CModelProbabilityParams& params, if (probability < LARGEST_ANOMALOUS_PROBABILITY && !m_AnomalyFeatureModels[index].isNonInformative() && m_AnomalyFeatureModels[index].probabilityOfLessLikelySamples( - maths_t::E_OneSidedAbove, CConstantWeights::COUNT, features, - UNIT, pl, pu, tail)) { + maths_t::E_OneSidedAbove, features, UNIT, pl, pu, tail)) { double logp{CTools::fastLog(probability)}; double alpha{0.5 * std::min((logp - LOG_LARGEST_ANOMALOUS_PROBABILITY) / (LOG_SMALL_PROBABILITY - LOG_LARGEST_ANOMALOUS_PROBABILITY), @@ -609,8 +591,8 @@ const double CTimeSeriesAnomalyModel::LARGEST_ANOMALOUS_PROBABILITY{0.1}; const double CTimeSeriesAnomalyModel::LOG_LARGEST_ANOMALOUS_PROBABILITY{ CTools::fastLog(LARGEST_ANOMALOUS_PROBABILITY)}; const double CTimeSeriesAnomalyModel::LOG_SMALL_PROBABILITY{CTools::fastLog(SMALL_PROBABILITY)}; -const TDouble10Vec4Vec1Vec CTimeSeriesAnomalyModel::UNIT{ - CConstantWeights::unit(2)}; +const maths_t::TDouble10VecWeightsAry1Vec CTimeSeriesAnomalyModel::UNIT{ + maths_t::CUnitWeights::unit(2)}; CUnivariateTimeSeriesModel::CUnivariateTimeSeriesModel(const CModelParams& params, std::size_t id, @@ -692,9 +674,8 @@ TSize2Vec1Vec CUnivariateTimeSeriesModel::correlates() const { void CUnivariateTimeSeriesModel::addBucketValue(const TTimeDouble2VecSizeTrVec& values) { for (const auto& value : values) { m_ResidualModel->adjustOffset( - CConstantWeights::COUNT, {m_TrendModel->detrend(value.first, value.second[0], 0.0)}, - CConstantWeights::SINGLE_UNIT); + maths_t::CUnitWeights::SINGLE_UNIT); } } @@ -728,8 +709,7 @@ CUnivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams& params, m_ResidualModel->dataType(type); m_TrendModel->dataType(type); - result = CModel::combine(result, this->updateTrend(params.weightStyles(), samples, - params.trendWeights())); + result = CModel::combine(result, this->updateTrend(samples, params.trendWeights())); for (auto& sample : samples) { sample.second[0] = m_TrendModel->detrend(sample.first, sample.second[0], 0.0); @@ -741,7 +721,7 @@ CUnivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams& params, }); TDouble1Vec samples_; - TDouble4Vec1Vec weights_; + maths_t::TDoubleWeightsAry1Vec weights_; samples_.reserve(samples.size()); weights_.reserve(samples.size()); TMeanAccumulator averageTime; @@ -752,7 +732,7 @@ CUnivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams& params, averageTime.add(static_cast(samples[i].first)); } - m_ResidualModel->addSamples(params.weightStyles(), samples_, weights_); + m_ResidualModel->addSamples(samples_, weights_); m_ResidualModel->propagateForwardsByTime(params.propagationInterval()); if (m_AnomalyModel != nullptr) { m_AnomalyModel->propagateForwardsByTime(params.propagationInterval()); @@ -808,16 +788,14 @@ void CUnivariateTimeSeriesModel::skipTime(core_t::TTime gap) { CUnivariateTimeSeriesModel::TDouble2Vec CUnivariateTimeSeriesModel::mode(core_t::TTime time, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec& weights) const { - return {m_ResidualModel->marginalLikelihoodMode(weightStyles, unpack(weights)) + + const TDouble2VecWeightsAry& weights) const { + return {m_ResidualModel->marginalLikelihoodMode(unpack(weights)) + CBasicStatistics::mean(m_TrendModel->value(time))}; } CUnivariateTimeSeriesModel::TDouble2Vec1Vec CUnivariateTimeSeriesModel::correlateModes(core_t::TTime time, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec1Vec& weights) const { + const TDouble2VecWeightsAry1Vec& weights) const { TDouble2Vec1Vec result; TSize1Vec correlated; @@ -834,7 +812,7 @@ CUnivariateTimeSeriesModel::correlateModes(core_t::TTime time, baseline[1] = CBasicStatistics::mean( correlatedTimeSeriesModels[i]->m_TrendModel->value(time)); TDouble10Vec mode(correlationModels[i].first->marginalLikelihoodMode( - weightStyles, CMultivariateTimeSeriesModel::unpack(weights[i]))); + CMultivariateTimeSeriesModel::unpack(weights[i]))); result[i][variables[i][0]] = baseline[0] + mode[variables[i][0]]; result[i][variables[i][1]] = baseline[1] + mode[variables[i][1]]; } @@ -844,16 +822,13 @@ CUnivariateTimeSeriesModel::correlateModes(core_t::TTime time, } CUnivariateTimeSeriesModel::TDouble2Vec1Vec -CUnivariateTimeSeriesModel::residualModes(const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec& weights) const { - TDouble1Vec modes(m_ResidualModel->marginalLikelihoodModes(weightStyles, unpack(weights))); - +CUnivariateTimeSeriesModel::residualModes(const TDouble2VecWeightsAry& weights) const { TDouble2Vec1Vec result; + TDouble1Vec modes(m_ResidualModel->marginalLikelihoodModes(unpack(weights))); result.reserve(modes.size()); for (auto mode : modes) { result.push_back({mode}); } - return result; } @@ -891,6 +866,7 @@ CUnivariateTimeSeriesModel::TDouble2Vec CUnivariateTimeSeriesModel::predict(core_t::TTime time, const TSizeDoublePr1Vec& correlatedValue, TDouble2Vec hint) const { + double correlateCorrection{0.0}; if (!correlatedValue.empty()) { TSize1Vec correlated{correlatedValue[0].first}; @@ -938,8 +914,7 @@ CUnivariateTimeSeriesModel::predict(core_t::TTime time, CUnivariateTimeSeriesModel::TDouble2Vec3Vec CUnivariateTimeSeriesModel::confidenceInterval(core_t::TTime time, double confidenceInterval, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec& weights_) const { + const TDouble2VecWeightsAry& weights_) const { if (m_ResidualModel->isNonInformative()) { return TDouble2Vec3Vec(); } @@ -950,11 +925,11 @@ CUnivariateTimeSeriesModel::confidenceInterval(core_t::TTime time, ? CBasicStatistics::mean(m_TrendModel->value(time, confidenceInterval)) : 0.0}; - TDouble4Vec weights(unpack(weights_)); - double median{CBasicStatistics::mean(m_ResidualModel->marginalLikelihoodConfidenceInterval( - 0.0, weightStyles, weights))}; + TDoubleWeightsAry weights(unpack(weights_)); + double median{CBasicStatistics::mean( + m_ResidualModel->marginalLikelihoodConfidenceInterval(0.0, weights))}; TDoubleDoublePr interval{m_ResidualModel->marginalLikelihoodConfidenceInterval( - confidenceInterval, weightStyles, weights)}; + confidenceInterval, weights)}; double result[]{scale * (trend + interval.first), scale * (trend + median), scale * (trend + interval.second)}; @@ -1004,6 +979,7 @@ bool CUnivariateTimeSeriesModel::probability(const CModelProbabilityParams& para TTail2Vec& tail, bool& conditional, TSize1Vec& mostAnomalousCorrelate) const { + probability = 1.0; tail.resize(1, maths_t::E_UndeterminedTail); conditional = false; @@ -1017,12 +993,12 @@ bool CUnivariateTimeSeriesModel::probability(const CModelProbabilityParams& para core_t::TTime time{time_[0][0]}; TDouble1Vec sample{m_TrendModel->detrend( time, value[0][0], params.seasonalConfidenceInterval())}; - TDouble4Vec1Vec weights{unpack(params.weights()[0])}; + maths_t::TDoubleWeightsAry1Vec weights{unpack(params.weights()[0])}; double pl, pu; maths_t::ETail tail_; if (m_ResidualModel->probabilityOfLessLikelySamples( - params.calculation(0), params.weightStyles(), sample, weights, pl, pu, tail_)) { + params.calculation(0), sample, weights, pl, pu, tail_)) { LOG_TRACE(<< "P(" << sample << " | weight = " << weights << ", time = " << time << ") = " << (pl + pu) / 2.0); } else { @@ -1060,7 +1036,7 @@ bool CUnivariateTimeSeriesModel::probability(const CModelProbabilityParams& para // Declared outside the loop to minimize the number of times they are created. TSize10Vec variable(1); TDouble10Vec1Vec sample{TDouble10Vec(2)}; - TDouble10Vec4Vec1Vec weights(1); + maths_t::TDouble10VecWeightsAry1Vec weights{maths_t::CUnitWeights::unit(2)}; TDouble2Vec probabilityBucketEmpty(2); TDouble10Vec2Vec pli, pui; TTail10Vec ti; @@ -1082,8 +1058,7 @@ bool CUnivariateTimeSeriesModel::probability(const CModelProbabilityParams& para weights[0] = CMultivariateTimeSeriesModel::unpack(params.weights()[i]); if (correlationModels[i].first->probabilityOfLessLikelySamples( - params.calculation(0), params.weightStyles(), sample, - weights, variable, pli, pui, ti)) { + params.calculation(0), sample, weights, variable, pli, pui, ti)) { LOG_TRACE(<< "Marginal P(" << sample << " | weight = " << weights << ", coordinate = " << variable << ") = " << (pli[0][0] + pui[0][0]) / 2.0); @@ -1273,6 +1248,7 @@ bool CUnivariateTimeSeriesModel::acceptRestoreTraverser(const SModelRestoreParam } void CUnivariateTimeSeriesModel::acceptPersistInserter(core::CStatePersistInserter& inserter) const { + // Note that we don't persist this->params() or the correlations // because that state is reinitialized. inserter.insertValue(VERSION_6_3_TAG, ""); @@ -1308,12 +1284,11 @@ maths_t::EDataType CUnivariateTimeSeriesModel::dataType() const { return m_ResidualModel->dataType(); } -CUnivariateTimeSeriesModel::TDouble4Vec -CUnivariateTimeSeriesModel::unpack(const TDouble2Vec4Vec& weights) { - TDouble4Vec result; - result.reserve(weights.size()); - for (const auto& weight : weights) { - result.push_back(weight[0]); +CUnivariateTimeSeriesModel::TDoubleWeightsAry +CUnivariateTimeSeriesModel::unpack(const TDouble2VecWeightsAry& weights) { + TDoubleWeightsAry result{maths_t::CUnitWeights::UNIT}; + for (std::size_t i = 0u; i < weights.size(); ++i) { + result[i] = weights[i][0]; } return result; } @@ -1325,11 +1300,11 @@ void CUnivariateTimeSeriesModel::reinitializeResidualModel(double learnRate, residualModel.setToNonInformative(0.0, residualModel.decayRate()); if (!slidingWindow.empty()) { double slidingWindowLength{static_cast(slidingWindow.size())}; - TDouble4Vec1Vec weight{ - {std::max(learnRate, std::min(5.0 / slidingWindowLength, 1.0))}}; + maths_t::TDoubleWeightsAry1Vec weight{ + maths_t::countWeight(std::max(learnRate, std::min(5.0 / slidingWindowLength, 1.0)))}; for (const auto& value : slidingWindow) { TDouble1Vec sample{trend->detrend(value.first, value.second, 0.0)}; - residualModel.addSamples(CConstantWeights::COUNT, sample, weight); + residualModel.addSamples(sample, weight); } } } @@ -1359,10 +1334,10 @@ CUnivariateTimeSeriesModel::CUnivariateTimeSeriesModel(const CUnivariateTimeSeri : TAnomalyModelPtr()), m_CandidateChangePoint(other.m_CandidateChangePoint), m_CurrentChangeInterval(other.m_CurrentChangeInterval), - m_ChangeDetector( - !isForForecast && other.m_ChangeDetector - ? std::make_shared(*other.m_ChangeDetector) - : TChangeDetectorPtr()), + m_ChangeDetector(!isForForecast && other.m_ChangeDetector + ? std::make_shared( + *other.m_ChangeDetector) + : TChangeDetectorPtr()), m_SlidingWindow(!isForForecast ? other.m_SlidingWindow : TTimeDoublePrCBuf{}), m_Correlations(nullptr) { if (!isForForecast && other.m_Controllers != nullptr) { @@ -1375,13 +1350,13 @@ CUnivariateTimeSeriesModel::testAndApplyChange(const CModelAddSamplesParams& par const TSizeVec& order, const TTimeDouble2VecSizeTrVec& values) { std::size_t median{order[order.size() / 2]}; - TDouble4Vec weights(unpack(params.priorWeights()[median])); + TDoubleWeightsAry weights{unpack(params.priorWeights()[median])}; core_t::TTime time{values[median].first}; if (m_ChangeDetector == nullptr) { core_t::TTime minimumTimeToDetect{this->params().minimumTimeToDetectChange()}; core_t::TTime maximumTimeToTest{this->params().maximumTimeToTestForChange()}; - double weight{maths_t::winsorisationWeight(params.weightStyles(), {weights})}; + double weight{maths_t::winsorisationWeight(weights)}; if (minimumTimeToDetect < maximumTimeToTest && pValueFromTailWinsorisationWeight(weight) <= 1e-5) { m_CurrentChangeInterval += this->params().bucketLength(); @@ -1397,9 +1372,7 @@ CUnivariateTimeSeriesModel::testAndApplyChange(const CModelAddSamplesParams& par } if (m_ChangeDetector != nullptr) { - m_ChangeDetector->addSamples(params.weightStyles(), - {std::make_pair(time, values[median].second[0])}, - {weights}); + m_ChangeDetector->addSamples({std::make_pair(time, values[median].second[0])}, {weights}); if (m_ChangeDetector->stopTesting()) { m_ChangeDetector.reset(); @@ -1441,9 +1414,8 @@ CUnivariateTimeSeriesModel::applyChange(const SChangeDescription& change) { } CUnivariateTimeSeriesModel::EUpdateResult -CUnivariateTimeSeriesModel::updateTrend(const maths_t::TWeightStyleVec& weightStyles, - const TTimeDouble2VecSizeTrVec& samples, - const TDouble2Vec4VecVec& weights) { +CUnivariateTimeSeriesModel::updateTrend(const TTimeDouble2VecSizeTrVec& samples, + const TDouble2VecWeightsAryVec& weights) { for (const auto& sample : samples) { if (sample.second.size() != 1) { LOG_ERROR(<< "Dimension mismatch: '" << sample.second.size() << " != 1'"); @@ -1467,8 +1439,8 @@ CUnivariateTimeSeriesModel::updateTrend(const maths_t::TWeightStyleVec& weightSt for (auto i : timeorder) { core_t::TTime time{samples[i].first}; double value{samples[i].second[0]}; - TDouble4Vec weight(unpack(weights[i])); - if (m_TrendModel->addPoint(time, value, weightStyles, weight)) { + TDoubleWeightsAry weight{unpack(weights[i])}; + if (m_TrendModel->addPoint(time, value, weight)) { result = E_Reset; } } @@ -1555,11 +1527,9 @@ CTimeSeriesCorrelations* CTimeSeriesCorrelations::cloneForPersistence() const { return new CTimeSeriesCorrelations(*this, true); } -void CTimeSeriesCorrelations::processSamples(const maths_t::TWeightStyleVec& weightStyles) { - using TSizeSizePrMultivariatePriorPtrDoublePrUMapCItr = - TSizeSizePrMultivariatePriorPtrDoublePrUMap::const_iterator; +void CTimeSeriesCorrelations::processSamples() { using TSizeSizePrMultivariatePriorPtrDoublePrUMapCItrVec = - std::vector; + std::vector; // The priors use a shared pseudo random number generator which // generates a fixed sequence of random numbers. Since the order @@ -1579,7 +1549,7 @@ void CTimeSeriesCorrelations::processSamples(const maths_t::TWeightStyleVec& wei core::CFunctional::SDereference()); TDouble10Vec1Vec multivariateSamples; - TDouble10Vec4Vec1Vec multivariateWeights; + maths_t::TDouble10VecWeightsAry1Vec multivariateWeights; for (auto i : iterators) { std::size_t pid1{i->first.first}; std::size_t pid2{i->first.second}; @@ -1601,8 +1571,7 @@ void CTimeSeriesCorrelations::processSamples(const maths_t::TWeightStyleVec& wei std::swap(indices[0], indices[1]); } multivariateSamples.assign(n1, TDouble10Vec(2)); - multivariateWeights.assign( - n1, TDouble10Vec4Vec(weightStyles.size(), TDouble10Vec(2))); + multivariateWeights.assign(n1, maths_t::CUnitWeights::unit(2)); TSize1Vec& tags2{samples2->s_Tags}; TTime1Vec& times2{samples2->s_Times}; @@ -1638,7 +1607,7 @@ void CTimeSeriesCorrelations::processSamples(const maths_t::TWeightStyleVec& wei } multivariateSamples[j1][indices[0]] = samples1->s_Samples[j1]; multivariateSamples[j1][indices[1]] = samples2->s_Samples[j2]; - for (std::size_t w = 0u; w < weightStyles.size(); ++w) { + for (std::size_t w = 0u; w < maths_t::NUMBER_WEIGHT_STYLES; ++w) { multivariateWeights[j1][w][indices[0]] = samples1->s_Weights[j1][w]; multivariateWeights[j1][w][indices[1]] = samples2->s_Weights[j2][w]; } @@ -1651,7 +1620,7 @@ void CTimeSeriesCorrelations::processSamples(const maths_t::TWeightStyleVec& wei samples2->s_Type == maths_t::E_IntegerData ? maths_t::E_IntegerData : maths_t::E_ContinuousData); - prior->addSamples(weightStyles, multivariateSamples, multivariateWeights); + prior->addSamples(multivariateSamples, multivariateWeights); prior->propagateForwardsByTime(std::min(samples1->s_Interval, samples2->s_Interval)); prior->decayRate(std::sqrt(samples1->s_Multiplier * samples2->s_Multiplier) * prior->decayRate()); @@ -1922,6 +1891,7 @@ bool CTimeSeriesCorrelations::correlationModels(std::size_t id, TSize2Vec1Vec& variables, TMultivariatePriorCPtrSizePr1Vec& correlationModels, TModelCPtr1Vec& correlatedTimeSeriesModels) const { + variables.clear(); correlationModels.clear(); correlatedTimeSeriesModels.clear(); @@ -2089,8 +2059,7 @@ CMultivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams& params, std::size_t dimension{this->dimension()}; - EUpdateResult result{this->updateTrend(params.weightStyles(), samples, - params.trendWeights())}; + EUpdateResult result{this->updateTrend(samples, params.trendWeights())}; for (auto& sample : samples) { if (sample.second.size() != dimension) { @@ -2110,7 +2079,7 @@ CMultivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams& params, }); TDouble10Vec1Vec samples_; - TDouble10Vec4Vec1Vec weights_; + maths_t::TDouble10VecWeightsAry1Vec weights_; samples_.reserve(samples.size()); weights_.reserve(samples.size()); TMeanAccumulator averageTime; @@ -2121,7 +2090,7 @@ CMultivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams& params, averageTime.add(static_cast(samples[i].first)); } - m_ResidualModel->addSamples(params.weightStyles(), samples_, weights_); + m_ResidualModel->addSamples(samples_, weights_); m_ResidualModel->propagateForwardsByTime(params.propagationInterval()); if (m_AnomalyModel != nullptr) { m_AnomalyModel->propagateForwardsByTime(params.propagationInterval()); @@ -2180,11 +2149,10 @@ void CMultivariateTimeSeriesModel::skipTime(core_t::TTime gap) { CMultivariateTimeSeriesModel::TDouble2Vec CMultivariateTimeSeriesModel::mode(core_t::TTime time, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec& weights) const { - std::size_t dimension = this->dimension(); + const TDouble2VecWeightsAry& weights) const { + std::size_t dimension{this->dimension()}; TDouble2Vec result(dimension); - TDouble10Vec mode(m_ResidualModel->marginalLikelihoodMode(weightStyles, unpack(weights))); + TDouble10Vec mode(m_ResidualModel->marginalLikelihoodMode(unpack(weights))); for (std::size_t d = 0u; d < dimension; ++d) { result[d] = mode[d] + CBasicStatistics::mean(m_TrendModel[d]->value(time)); } @@ -2193,16 +2161,14 @@ CMultivariateTimeSeriesModel::mode(core_t::TTime time, CMultivariateTimeSeriesModel::TDouble2Vec1Vec CMultivariateTimeSeriesModel::correlateModes(core_t::TTime /*time*/, - const maths_t::TWeightStyleVec& /*weightStyles*/, - const TDouble2Vec4Vec1Vec& /*weights*/) const { + const TDouble2VecWeightsAry1Vec& /*weights*/) const { return TDouble2Vec1Vec(); } CMultivariateTimeSeriesModel::TDouble2Vec1Vec -CMultivariateTimeSeriesModel::residualModes(const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec& weights) const { +CMultivariateTimeSeriesModel::residualModes(const TDouble2VecWeightsAry& weights) const { TDouble10Vec1Vec modes( - m_ResidualModel->marginalLikelihoodModes(weightStyles, unpack(weights))); + m_ResidualModel->marginalLikelihoodModes(unpack(weights))); TDouble2Vec1Vec result; result.reserve(modes.size()); for (const auto& mode : modes) { @@ -2268,8 +2234,8 @@ CMultivariateTimeSeriesModel::predict(core_t::TTime time, CMultivariateTimeSeriesModel::TDouble2Vec3Vec CMultivariateTimeSeriesModel::confidenceInterval(core_t::TTime time, double confidenceInterval, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec& weights_) const { + const TDouble2VecWeightsAry& weights_) const { + if (m_ResidualModel->isNonInformative()) { return TDouble2Vec3Vec(); } @@ -2284,25 +2250,23 @@ CMultivariateTimeSeriesModel::confidenceInterval(core_t::TTime time, TDouble2Vec3Vec result(3, TDouble2Vec(dimension)); - TDouble4Vec weights; + maths_t::TDoubleWeightsAry weights{maths_t::CUnitWeights::UNIT}; for (std::size_t d = 0u; d < dimension; --marginalize[std::min(d, dimension - 2)], ++d) { double trend{m_TrendModel[d]->initialized() ? CBasicStatistics::mean(m_TrendModel[d]->value(time, confidenceInterval)) : 0.0}; - weights.clear(); - weights.reserve(weights_.size()); - for (const auto& weight : weights_) { - weights.push_back(weight[d]); + for (std::size_t i = 0u; i < maths_t::NUMBER_WEIGHT_STYLES; ++i) { + weights[i] = weights_[i][d]; } TUnivariatePriorPtr marginal{ m_ResidualModel->univariate(marginalize, NOTHING_TO_CONDITION).first}; double median{CBasicStatistics::mean( - marginal->marginalLikelihoodConfidenceInterval(0.0, weightStyles, weights))}; + marginal->marginalLikelihoodConfidenceInterval(0.0, weights))}; TDoubleDoublePr interval{marginal->marginalLikelihoodConfidenceInterval( - confidenceInterval, weightStyles, weights)}; + confidenceInterval, weights)}; result[0][d] = scale * (trend + interval.first); result[1][d] = scale * (trend + median); @@ -2336,6 +2300,7 @@ bool CMultivariateTimeSeriesModel::probability(const CModelProbabilityParams& pa TTail2Vec& tail, bool& conditional, TSize1Vec& mostAnomalousCorrelate) const { + TSize2Vec coordinates(params.coordinates()); if (coordinates.empty()) { coordinates.resize(this->dimension()); @@ -2354,7 +2319,7 @@ bool CMultivariateTimeSeriesModel::probability(const CModelProbabilityParams& pa sample[0][d] = m_TrendModel[d]->detrend( time, value[0][d], params.seasonalConfidenceInterval()); } - TDouble10Vec4Vec1Vec weights{unpack(params.weights()[0])}; + maths_t::TDouble10VecWeightsAry1Vec weights{unpack(params.weights()[0])}; bool bucketEmpty{params.bucketEmpty()[0][0]}; double probabilityBucketEmpty{this->params().probabilityBucketEmpty()}; @@ -2369,8 +2334,7 @@ bool CMultivariateTimeSeriesModel::probability(const CModelProbabilityParams& pa maths_t::EProbabilityCalculation calculation = params.calculation(i); coordinate[0] = coordinates[i]; if (!m_ResidualModel->probabilityOfLessLikelySamples( - calculation, params.weightStyles(), sample, weights, coordinate, - pls, pus, tail_)) { + calculation, sample, weights, coordinate, pls, pus, tail_)) { LOG_ERROR(<< "Failed to compute P(" << sample << " | weight = " << weights << ")"); return false; } @@ -2411,6 +2375,7 @@ CMultivariateTimeSeriesModel::TDouble2Vec CMultivariateTimeSeriesModel::winsorisationWeight(double derate, core_t::TTime time, const TDouble2Vec& value) const { + TDouble2Vec result(this->dimension()); std::size_t dimension{this->dimension()}; @@ -2531,6 +2496,7 @@ bool CMultivariateTimeSeriesModel::acceptRestoreTraverser(const SModelRestorePar } void CMultivariateTimeSeriesModel::acceptPersistInserter(core::CStatePersistInserter& inserter) const { + // Note that we don't persist this->params() because that state // is reinitialized. inserter.insertValue(VERSION_6_3_TAG, ""); @@ -2558,12 +2524,11 @@ maths_t::EDataType CMultivariateTimeSeriesModel::dataType() const { return m_ResidualModel->dataType(); } -CMultivariateTimeSeriesModel::TDouble10Vec4Vec -CMultivariateTimeSeriesModel::unpack(const TDouble2Vec4Vec& weights) { - TDouble10Vec4Vec result; - result.reserve(weights.size()); - for (const auto& weight : weights) { - result.emplace_back(weight); +CMultivariateTimeSeriesModel::TDouble10VecWeightsAry +CMultivariateTimeSeriesModel::unpack(const TDouble2VecWeightsAry& weights) { + TDouble10VecWeightsAry result{maths_t::CUnitWeights::unit(weights[0])}; + for (std::size_t i = 0u; i < weights.size(); ++i) { + result[i] = weights[i]; } return result; } @@ -2577,14 +2542,14 @@ void CMultivariateTimeSeriesModel::reinitializeResidualModel( if (!slidingWindow.empty()) { std::size_t dimension{residualModel.dimension()}; double slidingWindowLength{static_cast(slidingWindow.size())}; - TDouble10Vec4Vec1Vec weight{{TDouble10Vec( - dimension, std::max(learnRate, std::min(5.0 / slidingWindowLength, 1.0)))}}; + maths_t::TDouble10VecWeightsAry1Vec weight{maths_t::countWeight(TDouble10Vec( + dimension, std::max(learnRate, std::min(5.0 / slidingWindowLength, 1.0))))}; for (const auto& value : slidingWindow) { TDouble10Vec1Vec sample{TDouble10Vec(dimension)}; for (std::size_t i = 0u; i < dimension; ++i) { sample[0][i] = trend[i]->detrend(value.first, value.second[i], 0.0); } - residualModel.addSamples(CConstantWeights::COUNT, sample, weight); + residualModel.addSamples(sample, weight); } } } @@ -2604,9 +2569,8 @@ const CMultivariatePrior& CMultivariateTimeSeriesModel::residualModel() const { } CMultivariateTimeSeriesModel::EUpdateResult -CMultivariateTimeSeriesModel::updateTrend(const maths_t::TWeightStyleVec& weightStyles, - const TTimeDouble2VecSizeTrVec& samples, - const TDouble2Vec4VecVec& weights) { +CMultivariateTimeSeriesModel::updateTrend(const TTimeDouble2VecSizeTrVec& samples, + const TDouble2VecWeightsAryVec& weights) { std::size_t dimension{this->dimension()}; for (const auto& sample : samples) { @@ -2630,15 +2594,15 @@ CMultivariateTimeSeriesModel::updateTrend(const maths_t::TWeightStyleVec& weight EUpdateResult result{E_Success}; { - TDouble4Vec weight(weightStyles.size()); + maths_t::TDoubleWeightsAry weight; for (auto i : timeorder) { core_t::TTime time{samples[i].first}; TDouble10Vec value(samples[i].second); for (std::size_t d = 0u; d < dimension; ++d) { - for (std::size_t j = 0u; j < weights[i].size(); ++j) { + for (std::size_t j = 0u; j < maths_t::NUMBER_WEIGHT_STYLES; ++j) { weight[j] = weights[i][j][d]; } - if (m_TrendModel[d]->addPoint(time, value[d], weightStyles, weight)) { + if (m_TrendModel[d]->addPoint(time, value[d], weight)) { result = E_Reset; } } diff --git a/lib/maths/CTools.cc b/lib/maths/CTools.cc index c637cbcdcd..bd0689f65e 100644 --- a/lib/maths/CTools.cc +++ b/lib/maths/CTools.cc @@ -20,6 +20,7 @@ #include #include #include +#include #include #include diff --git a/lib/maths/CTrendComponent.cc b/lib/maths/CTrendComponent.cc index 9bf7cad3bb..a82c9884c1 100644 --- a/lib/maths/CTrendComponent.cc +++ b/lib/maths/CTrendComponent.cc @@ -209,8 +209,7 @@ void CTrendComponent::shiftLevel(core_t::TTime time, double value, double shift) m_ProbabilityOfLevelChangeModel.addTrainingDataPoint(LEVEL_CHANGE_LABEL, {{dt}, {value}}); } - m_MagnitudeOfLevelChangeModel.addSamples({maths_t::E_SampleCountWeight}, - {shift}, {{1.0}}); + m_MagnitudeOfLevelChangeModel.addSamples({shift}, maths_t::CUnitWeights::SINGLE_UNIT); m_TimeOfLastLevelChange = time; } diff --git a/lib/maths/CXMeansOnline1d.cc b/lib/maths/CXMeansOnline1d.cc index f90809262a..7523788338 100644 --- a/lib/maths/CXMeansOnline1d.cc +++ b/lib/maths/CXMeansOnline1d.cc @@ -46,8 +46,6 @@ namespace maths { namespace { using TDouble1Vec = core::CSmallVector; -using TDouble4Vec = core::CSmallVector; -using TDouble4Vec1Vec = core::CSmallVector; using TDoubleDoublePr = std::pair; using TSizeVec = std::vector; using TTuple = CNaturalBreaksClassifier::TTuple; @@ -91,7 +89,7 @@ maths_t::EFloatingPointErrorStatus logLikelihoodFromCluster(double point, double likelihood; maths_t::EFloatingPointErrorStatus status = normal.jointLogMarginalLikelihood( - CConstantWeights::COUNT, {point}, CConstantWeights::SINGLE_UNIT, likelihood); + {point}, maths_t::CUnitWeights::SINGLE_UNIT, likelihood); if (status & maths_t::E_FpFailed) { LOG_ERROR(<< "Unable to compute likelihood for: " << point); return status; @@ -189,6 +187,7 @@ void BICGain(maths_t::EDataType dataType, double& distance, double& nl, double& nr) { + // The basic idea is to compute the difference between the // Bayes Information Content (BIC) for one and two clusters // for the sketch defined by the categories passed to this @@ -407,6 +406,7 @@ void BICGain(maths_t::EDataType dataType, //! \param[in] interval The Winsorisation interval. //! \param[in,out] category The category to Winsorise. void winsorise(const TDoubleDoublePr& interval, TTuple& category) { + double a = interval.first; double b = interval.second; double m = CBasicStatistics::mean(category); @@ -477,6 +477,7 @@ bool splitSearch(double minimumCount, double smallest, const TTupleVec& categories, TSizeVec& result) { + using TSizeSizePr = std::pair; LOG_TRACE(<< "begin split search"); @@ -539,11 +540,11 @@ bool splitSearch(double minimumCount, if (!satisfiesCount) { // Recurse to the (one) node with sufficient count. if (nl > minimumCount && candidate[0] - node.first > 1) { - node = std::make_pair(node.first, candidate[0]); + node = {node.first, candidate[0]}; continue; } if (nr > minimumCount && node.second - candidate[0] > 1) { - node = std::make_pair(candidate[0], node.second); + node = {candidate[0], node.second}; continue; } } else if (satisfiesDistance) { @@ -794,6 +795,7 @@ bool CXMeansOnline1d::clusterSpread(std::size_t index, double& result) const { } void CXMeansOnline1d::cluster(const double& point, TSizeDoublePr2Vec& result, double count) const { + result.clear(); if (m_Clusters.empty()) { @@ -801,8 +803,8 @@ void CXMeansOnline1d::cluster(const double& point, TSizeDoublePr2Vec& result, do return; } - TClusterVecCItr rightCluster = std::lower_bound( - m_Clusters.begin(), m_Clusters.end(), point, detail::SClusterCentreLess()); + auto rightCluster = std::lower_bound(m_Clusters.begin(), m_Clusters.end(), + point, detail::SClusterCentreLess()); if (rightCluster == m_Clusters.end()) { --rightCluster; @@ -830,7 +832,7 @@ void CXMeansOnline1d::cluster(const double& point, TSizeDoublePr2Vec& result, do // also that we do not want to soft assign the point to a // cluster if its probability is close to zero. - TClusterVecCItr leftCluster = rightCluster; + auto leftCluster = rightCluster; --leftCluster; double likelihoodLeft = leftCluster->logLikelihoodFromCluster(m_WeightCalc, point); double likelihoodRight = rightCluster->logLikelihoodFromCluster(m_WeightCalc, point); @@ -854,14 +856,15 @@ void CXMeansOnline1d::cluster(const double& point, TSizeDoublePr2Vec& result, do } void CXMeansOnline1d::add(const double& point, TSizeDoublePr2Vec& clusters, double count) { + m_HistoryLength += 1.0; m_Smallest.add(point); m_Largest.add(point); clusters.clear(); - TClusterVecItr rightCluster = std::lower_bound( - m_Clusters.begin(), m_Clusters.end(), point, detail::SClusterCentreLess()); + auto rightCluster = std::lower_bound(m_Clusters.begin(), m_Clusters.end(), + point, detail::SClusterCentreLess()); if (rightCluster == m_Clusters.end()) { --rightCluster; @@ -871,7 +874,7 @@ void CXMeansOnline1d::add(const double& point, TSizeDoublePr2Vec& clusters, doub if (this->maybeSplit(rightCluster)) { this->cluster(point, clusters, count); } else if (rightCluster != m_Clusters.begin()) { - TClusterVecItr leftCluster = rightCluster; + auto leftCluster = rightCluster; --leftCluster; if (this->maybeMerge(leftCluster, rightCluster)) { this->cluster(point, clusters, count); @@ -884,7 +887,7 @@ void CXMeansOnline1d::add(const double& point, TSizeDoublePr2Vec& clusters, doub if (this->maybeSplit(rightCluster)) { this->cluster(point, clusters, count); } else { - TClusterVecItr leftCluster = rightCluster; + auto leftCluster = rightCluster; ++rightCluster; if (this->maybeMerge(leftCluster, rightCluster)) { this->cluster(point, clusters, count); @@ -893,7 +896,7 @@ void CXMeansOnline1d::add(const double& point, TSizeDoublePr2Vec& clusters, doub } else { // See the cluster member function for more details on // soft assignment. - TClusterVecItr leftCluster = rightCluster; + auto leftCluster = rightCluster; --leftCluster; double likelihoodLeft = leftCluster->logLikelihoodFromCluster(m_WeightCalc, point); double likelihoodRight = rightCluster->logLikelihoodFromCluster(m_WeightCalc, point); @@ -1025,6 +1028,7 @@ const CXMeansOnline1d::TClusterVec& CXMeansOnline1d::clusters() const { } std::string CXMeansOnline1d::printClusters() const { + if (m_Clusters.empty()) { return std::string(); } @@ -1052,10 +1056,7 @@ std::string CXMeansOnline1d::printClusters() const { weightSum += m_Clusters[i].weight(m_WeightCalc); } - static const maths_t::TWeightStyleVec COUNT_WEIGHT(1, maths_t::E_SampleCountWeight); - static const TDouble4Vec1Vec UNIT_WEIGHT(1, TDouble4Vec(1, 1.0)); - - TDouble1Vec x(1, range.first); + TDouble1Vec x{range.first}; double increment = (range.second - range.first) / (POINTS - 1.0); std::ostringstream coordinatesStr; @@ -1067,7 +1068,7 @@ std::string CXMeansOnline1d::printClusters() const { for (std::size_t j = 0u; j < m_Clusters.size(); ++j) { double logLikelihood; const CPrior& prior = m_Clusters[j].prior(); - if (!(prior.jointLogMarginalLikelihood(COUNT_WEIGHT, x, UNIT_WEIGHT, logLikelihood) & + if (!(prior.jointLogMarginalLikelihood(x, maths_t::CUnitWeights::SINGLE_UNIT, logLikelihood) & (maths_t::E_FpFailed | maths_t::E_FpOverflowed))) { likelihood += m_Clusters[j].weight(m_WeightCalc) / weightSum * std::exp(logLikelihood); @@ -1144,6 +1145,7 @@ double CXMeansOnline1d::minimumSplitCount() const { } bool CXMeansOnline1d::maybeSplit(TClusterVecItr cluster) { + if (cluster == m_Clusters.end()) { return false; } @@ -1165,6 +1167,7 @@ bool CXMeansOnline1d::maybeSplit(TClusterVecItr cluster) { } bool CXMeansOnline1d::maybeMerge(TClusterVecItr cluster1, TClusterVecItr cluster2) { + if (cluster1 == m_Clusters.end() || cluster2 == m_Clusters.end()) { return false; } @@ -1187,6 +1190,7 @@ bool CXMeansOnline1d::maybeMerge(TClusterVecItr cluster1, TClusterVecItr cluster } bool CXMeansOnline1d::prune() { + if (m_Clusters.size() <= 1) { return false; } @@ -1216,13 +1220,14 @@ bool CXMeansOnline1d::prune() { } TDoubleDoublePr CXMeansOnline1d::winsorisationInterval() const { + double f = (1.0 - m_WinsorisationConfidenceInterval) / 2.0; if (f * this->count() < 1.0) { // Don't bother if we don't expect a sample outside the // Winsorisation interval. - return std::make_pair(boost::numeric::bounds::lowest() / 2.0, - boost::numeric::bounds::highest() / 2.0); + return {boost::numeric::bounds::lowest() / 2.0, + boost::numeric::bounds::highest() / 2.0}; } // The Winsorisation interval are the positions corresponding @@ -1305,8 +1310,7 @@ void CXMeansOnline1d::CCluster::dataType(maths_t::EDataType dataType) { } void CXMeansOnline1d::CCluster::add(double point, double count) { - m_Prior.addSamples(CConstantWeights::COUNT, TDouble1Vec(1, point), - TDouble4Vec1Vec(1, TDouble4Vec(1, count))); + m_Prior.addSamples({point}, {maths_t::countWeight(count)}); m_Structure.add(point, count); } @@ -1373,6 +1377,7 @@ CXMeansOnline1d::CCluster::split(CAvailableModeDistributions distributions, double smallest, const TDoubleDoublePr& interval, CIndexGenerator& indexGenerator) { + // We do our clustering top down to minimize space and avoid // making splits before we are confident they exist. This is // important for anomaly detection because we do *not* want @@ -1437,6 +1442,7 @@ bool CXMeansOnline1d::CCluster::shouldMerge(CCluster& other, CAvailableModeDistributions distributions, double smallest, const TDoubleDoublePr& interval) { + if (m_Structure.buffering() || m_Structure.size() == 0 || other.m_Structure.size() == 0) { return false; @@ -1469,6 +1475,7 @@ bool CXMeansOnline1d::CCluster::shouldMerge(CCluster& other, CXMeansOnline1d::CCluster CXMeansOnline1d::CCluster::merge(CCluster& other, CIndexGenerator& indexGenerator) { + TTupleVec left, right; m_Structure.categories(1, 0, left); other.m_Structure.categories(1, 0, right); diff --git a/lib/maths/Constants.cc b/lib/maths/Constants.cc index 183ec48031..4779ca64ba 100644 --- a/lib/maths/Constants.cc +++ b/lib/maths/Constants.cc @@ -9,14 +9,6 @@ namespace ml { namespace maths { -const maths_t::TWeightStyleVec CConstantWeights::COUNT{maths_t::E_SampleCountWeight}; -const maths_t::TWeightStyleVec CConstantWeights::COUNT_VARIANCE{ - maths_t::E_SampleCountVarianceScaleWeight}; -const maths_t::TWeightStyleVec CConstantWeights::SEASONAL_VARIANCE{ - maths_t::E_SampleSeasonalVarianceScaleWeight}; -const CConstantWeights::TDouble4Vec CConstantWeights::UNIT{1.0}; -const CConstantWeights::TDouble4Vec1Vec CConstantWeights::SINGLE_UNIT{UNIT}; - double maxModelPenalty(double numberSamples) { return 10.0 + numberSamples; } diff --git a/lib/maths/MathsTypes.cc b/lib/maths/MathsTypes.cc index e55b893320..58cbd69cde 100644 --- a/lib/maths/MathsTypes.cc +++ b/lib/maths/MathsTypes.cc @@ -19,396 +19,120 @@ namespace ml { namespace maths_t { namespace { -namespace detail { - -//! Check that the weights styles and weights are consistent. -template -inline bool check(const TWeightStyleVec& weightStyles, - const core::CSmallVector& weights) { - if (weightStyles.size() == weights.size()) { - return true; - } - LOG_ERROR(<< "Mismatch in weight styles '" << core::CContainerPrinter::print(weightStyles) - << "' and weights '" << core::CContainerPrinter::print(weights) << "'"); - return false; -} - -//! Multiply \p lhs by \p rhs. -inline void multiplyEquals(double rhs, double& lhs) { - lhs *= rhs; -} - -//! Elementwise multiply \p lhs by \p rhs. -inline void multiplyEquals(const TDouble10Vec& rhs, TDouble10Vec& lhs) { - for (std::size_t i = 0u; i < lhs.size(); ++i) { - lhs[i] *= rhs[i]; - } -} - -//! Check if less than zero. -inline bool isNegative(double value) { - return value < 0.0; -} - -//! Elementwise check if less than zero. -inline bool isNegative(const TDouble10Vec& values) { - for (auto value : values) { - if (value < 0.0) { - return true; - } - } - return false; +TDoubleWeightsAry unitWeight() { + TDoubleWeightsAry result; + result.assign(1.0); + return result; } - -//! Check if less than or equal to zero. -inline bool isNonPostive(double value) { - return value <= 0.0; } -//! Elementwise check if less than or equal to zero. -inline bool isNonPostive(const TDouble10Vec& values) { - for (auto value : values) { - if (value < 0.0) { - return true; - } - } - return false; -} +const TDoubleWeightsAry CUnitWeights::UNIT(unitWeight()); +const TDoubleWeightsAry1Vec CUnitWeights::SINGLE_UNIT{unitWeight()}; -//! Extract the effective sample count from a collection of weights. -template -void count(const TWeightStyleVec& weightStyles, const core::CSmallVector& weights, T& result) { - if (check(weightStyles, weights)) { - T candidate(result); - for (std::size_t i = 0u; i < weightStyles.size(); ++i) { - switch (weightStyles[i]) { - case E_SampleCountWeight: - multiplyEquals(weights[i], candidate); - break; - case E_SampleSeasonalVarianceScaleWeight: - break; - case E_SampleCountVarianceScaleWeight: - break; - case E_SampleWinsorisationWeight: - break; - } - } - if (!maths::CMathsFuncs::isFinite(result) || isNegative(result)) { - LOG_ERROR("Ignoring bad count weight: " << result); - } else { - result = std::move(candidate); - } - } +TDoubleWeightsAry countWeight(double weight) { + TDoubleWeightsAry result(CUnitWeights::UNIT); + result[E_SampleCountWeight] = weight; + return result; } -//! Extract the effective sample count with which to update a model -//! from a collection of weights. -template -void countForUpdate(const TWeightStyleVec& weightStyles, - const core::CSmallVector& weights, - T& result) { - if (check(weightStyles, weights)) { - T candidate(result); - for (std::size_t i = 0u; i < weightStyles.size(); ++i) { - switch (weightStyles[i]) { - case E_SampleCountWeight: - multiplyEquals(weights[i], candidate); - break; - case E_SampleSeasonalVarianceScaleWeight: - break; - case E_SampleCountVarianceScaleWeight: - break; - case E_SampleWinsorisationWeight: - multiplyEquals(weights[i], candidate); - break; - } - } - if (!maths::CMathsFuncs::isFinite(result) || isNegative(result)) { - LOG_ERROR("Ignoring bad count weight: " << result); - } else { - result = std::move(candidate); - } - } +TDouble10VecWeightsAry countWeight(double weight, std::size_t dimension) { + TDouble10VecWeightsAry result(CUnitWeights::unit(dimension)); + result[E_SampleCountWeight] = TDouble10Vec(dimension, weight); + return result; } -//! Extract the Winsorisation weight from a collection of weights. -template -void winsorisationWeight(const TWeightStyleVec& weightStyles, - const core::CSmallVector& weights, - T& result) { - if (check(weightStyles, weights)) { - T candidate(result); - for (std::size_t i = 0u; i < weightStyles.size(); ++i) { - switch (weightStyles[i]) { - case E_SampleCountWeight: - break; - case E_SampleSeasonalVarianceScaleWeight: - break; - case E_SampleCountVarianceScaleWeight: - break; - case E_SampleWinsorisationWeight: - multiplyEquals(weights[i], candidate); - break; - } - } - if (!maths::CMathsFuncs::isFinite(result) || isNegative(result)) { - LOG_ERROR("Ignoring bad Winsorisation weight: " << result); - } else { - result = std::move(candidate); - } - } +void setCount(double weight, std::size_t dimension, TDouble10VecWeightsAry& weights) { + weights[E_SampleCountWeight] = TDouble10Vec(dimension, weight); } -//! Extract the seasonal variance scale from a collection of weights. -template -void seasonalVarianceScale(const TWeightStyleVec& weightStyles, - const core::CSmallVector& weights, - T& result) { - if (check(weightStyles, weights)) { - T candidate(result); - for (std::size_t i = 0u; i < weightStyles.size(); ++i) { - switch (weightStyles[i]) { - case E_SampleCountWeight: - break; - case E_SampleSeasonalVarianceScaleWeight: - multiplyEquals(weights[i], candidate); - break; - case E_SampleCountVarianceScaleWeight: - break; - case E_SampleWinsorisationWeight: - break; - } - } - if (!maths::CMathsFuncs::isFinite(result) || isNonPostive(result)) { - LOG_ERROR("Ignoring bad variance scale: " << result); - } else { - result = std::move(candidate); - } - } +double countForUpdate(const TDoubleWeightsAry& weights) { + return weights[E_SampleCountWeight] * weights[E_SampleWinsorisationWeight]; } -//! Extract the count variance scale from a collection of weights. -template -void countVarianceScale(const TWeightStyleVec& weightStyles, - const core::CSmallVector& weights, - T& result) { - if (check(weightStyles, weights)) { - T candidate(result); - for (std::size_t i = 0u; i < weightStyles.size(); ++i) { - switch (weightStyles[i]) { - case E_SampleCountWeight: - break; - case E_SampleSeasonalVarianceScaleWeight: - break; - case E_SampleCountVarianceScaleWeight: - multiplyEquals(weights[i], candidate); - break; - case E_SampleWinsorisationWeight: - break; - } - } - if (!maths::CMathsFuncs::isFinite(result) || isNonPostive(result)) { - LOG_ERROR("Ignoring bad variance scale: " << result); - } else { - result = std::move(candidate); - } +TDouble10Vec countForUpdate(const TDouble10VecWeightsAry& weights) { + TDouble10Vec result(weights[E_SampleCountWeight]); + for (std::size_t i = 0u; i < weights[E_SampleWinsorisationWeight].size(); ++i) { + result[i] *= weights[E_SampleWinsorisationWeight][i]; } -} -} -} - -double count(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights) { - double result{1.0}; - detail::count(weightStyles, weights, result); return result; } -TDouble10Vec count(std::size_t dimension, - const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec& weights) { - TDouble10Vec result(dimension, 1.0); - detail::count(weightStyles, weights, result); +TDoubleWeightsAry winsorisationWeight(double weight) { + TDoubleWeightsAry result(CUnitWeights::UNIT); + result[E_SampleWinsorisationWeight] = weight; return result; } -double countForUpdate(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights) { - double result{1.0}; - detail::countForUpdate(weightStyles, weights, result); +TDouble10VecWeightsAry winsorisationWeight(double weight, std::size_t dimension) { + TDouble10VecWeightsAry result(CUnitWeights::unit(dimension)); + result[E_SampleWinsorisationWeight] = TDouble10Vec(dimension, weight); return result; } -TDouble10Vec countForUpdate(std::size_t dimension, - const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec& weights) { - TDouble10Vec result(dimension, 1.0); - detail::countForUpdate(weightStyles, weights, result); - return result; +void setWinsorisationWeight(double weight, std::size_t dimension, TDouble10VecWeightsAry& weights) { + weights[E_SampleWinsorisationWeight] = TDouble10Vec(dimension, weight); } -double winsorisationWeight(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights) { - double result{1.0}; - detail::winsorisationWeight(weightStyles, weights, result); - return result; +bool isWinsorised(const TDoubleWeightsAry& weights) { + return weights[E_SampleWinsorisationWeight] != 1.0; } -TDouble10Vec winsorisationWeight(std::size_t dimension, - const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec& weights) { - TDouble10Vec result(dimension, 1.0); - detail::winsorisationWeight(weightStyles, weights, result); - return result; +bool isWinsorised(const TDoubleWeightsAry1Vec& weights) { + return std::any_of(weights.begin(), weights.end(), [](const TDoubleWeightsAry& weight) { + return isWinsorised(weight); + }); } -double seasonalVarianceScale(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights) { - double result{1.0}; - detail::seasonalVarianceScale(weightStyles, weights, result); +TDoubleWeightsAry seasonalVarianceScaleWeight(double weight) { + TDoubleWeightsAry result(CUnitWeights::UNIT); + result[E_SampleSeasonalVarianceScaleWeight] = weight; return result; } -TDouble10Vec seasonalVarianceScale(std::size_t dimension, - const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec& weights) { - TDouble10Vec result(dimension, 1.0); - detail::seasonalVarianceScale(weightStyles, weights, result); +TDouble10VecWeightsAry seasonalVarianceScaleWeight(double weight, std::size_t dimension) { + TDouble10VecWeightsAry result(CUnitWeights::unit(dimension)); + result[E_SampleSeasonalVarianceScaleWeight] = TDouble10Vec(dimension, weight); return result; } -double countVarianceScale(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights) { - double result{1.0}; - detail::countVarianceScale(weightStyles, weights, result); - return result; +void setSeasonalVarianceScale(double weight, std::size_t dimension, TDouble10VecWeightsAry& weights) { + weights[E_SampleSeasonalVarianceScaleWeight] = TDouble10Vec(dimension, weight); } -TDouble10Vec countVarianceScale(std::size_t dimension, - const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec& weights) { - TDouble10Vec result(dimension, 1.0); - detail::countVarianceScale(weightStyles, weights, result); - return result; +bool hasSeasonalVarianceScale(const TDoubleWeightsAry& weights) { + return weights[E_SampleSeasonalVarianceScaleWeight] != 1.0; } -bool hasSeasonalVarianceScale(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights) { - return seasonalVarianceScale(weightStyles, weights) != 1.0; +bool hasSeasonalVarianceScale(const TDoubleWeightsAry1Vec& weights) { + return std::any_of(weights.begin(), weights.end(), [](const TDoubleWeightsAry& weight) { + return hasSeasonalVarianceScale(weight); + }); } -bool hasSeasonalVarianceScale(const TWeightStyleVec& weightStyles, - const TDouble4Vec1Vec& weights) { - for (std::size_t i = 0u; i < weights.size(); ++i) { - if (hasSeasonalVarianceScale(weightStyles, weights[i])) { - return true; - } - } - return false; -} - -bool hasSeasonalVarianceScale(const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec& weights) { - if (!detail::check(weightStyles, weights)) { - return false; - } - for (std::size_t i = 0u; i < weightStyles.size(); ++i) { - switch (weightStyles[i]) { - case E_SampleCountWeight: - break; - case E_SampleSeasonalVarianceScaleWeight: - for (std::size_t j = 0u; j < weights[i].size(); ++j) { - if (weights[i][j] != 1.0) { - return true; - } - } - break; - case E_SampleCountVarianceScaleWeight: - break; - case E_SampleWinsorisationWeight: - break; - } - } - return false; -} - -bool hasSeasonalVarianceScale(const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec1Vec& weights) { - for (std::size_t i = 0u; i < weights.size(); ++i) { - if (hasSeasonalVarianceScale(weightStyles, weights[i])) { - return true; - } - } - return false; -} - -bool hasCountVarianceScale(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights) { - return countVarianceScale(weightStyles, weights) != 1.0; -} - -bool hasCountVarianceScale(const TWeightStyleVec& weightStyles, - const TDouble4Vec1Vec& weights) { - for (std::size_t i = 0u; i < weights.size(); ++i) { - if (hasCountVarianceScale(weightStyles, weights[i])) { - return true; - } - } - return false; +TDoubleWeightsAry countVarianceScaleWeight(double weight) { + TDoubleWeightsAry result(CUnitWeights::UNIT); + result[E_SampleCountVarianceScaleWeight] = weight; + return result; } -bool hasCountVarianceScale(const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec& weights) { - if (!detail::check(weightStyles, weights)) { - return false; - } - for (std::size_t i = 0u; i < weightStyles.size(); ++i) { - switch (weightStyles[i]) { - case E_SampleCountWeight: - break; - case E_SampleSeasonalVarianceScaleWeight: - break; - case E_SampleCountVarianceScaleWeight: - for (std::size_t j = 0u; j < weights[i].size(); ++j) { - if (weights[i][j] != 1.0) { - return true; - } - } - break; - case E_SampleWinsorisationWeight: - break; - } - } - return false; +TDouble10VecWeightsAry countVarianceScaleWeight(double weight, std::size_t dimension) { + TDouble10VecWeightsAry result(CUnitWeights::unit(dimension)); + result[E_SampleCountVarianceScaleWeight] = TDouble10Vec(dimension, weight); + return result; } -bool hasCountVarianceScale(const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec1Vec& weights) { - for (std::size_t i = 0u; i < weights.size(); ++i) { - if (hasCountVarianceScale(weightStyles, weights[i])) { - return true; - } - } - return false; +void setCountVarianceScale(double weight, std::size_t dimension, TDouble10VecWeightsAry& weights) { + weights[E_SampleCountVarianceScaleWeight] = TDouble10Vec(dimension, weight); } -void setWeight(ESampleWeightStyle style, double weight, TWeightStyleVec& weightStyles, TDouble4Vec& weights) { - std::ptrdiff_t i{std::find(weightStyles.begin(), weightStyles.end(), style) - - weightStyles.begin()}; - if (static_cast(i) < weightStyles.size()) { - weights[i] = weight; - } else { - weightStyles.push_back(style); - weights.push_back(weight); - } +bool hasCountVarianceScale(const TDoubleWeightsAry& weights) { + return weights[E_SampleCountVarianceScaleWeight] != 1.0; } -void setWeight(ESampleWeightStyle style, - double weight, - std::size_t dimension, - TWeightStyleVec& weightStyles, - TDouble10Vec4Vec& weights) { - std::ptrdiff_t i{std::find(weightStyles.begin(), weightStyles.end(), style) - - weightStyles.begin()}; - if (static_cast(i) < weightStyles.size()) { - weights[i].assign(dimension, weight); - } else { - weightStyles.push_back(style); - weights.push_back(TDouble10Vec(dimension, weight)); - } +bool hasCountVarianceScale(const TDoubleWeightsAry1Vec& weights) { + return std::any_of(weights.begin(), weights.end(), [](const TDoubleWeightsAry& weight) { + return hasCountVarianceScale(weight); + }); } } } diff --git a/lib/maths/unittest/CAgglomerativeClustererTest.cc b/lib/maths/unittest/CAgglomerativeClustererTest.cc index 22e4e24d14..54653bf639 100644 --- a/lib/maths/unittest/CAgglomerativeClustererTest.cc +++ b/lib/maths/unittest/CAgglomerativeClustererTest.cc @@ -182,10 +182,6 @@ std::string print(maths::CAgglomerativeClusterer::EObjective o) { } void CAgglomerativeClustererTest::testNode() { - LOG_DEBUG(<< "+-----------------------------------------+"); - LOG_DEBUG(<< "| CAgglomerativeClustererTest::testNode |"); - LOG_DEBUG(<< "+-----------------------------------------+"); - double heights[] = {0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.5, 1.9, 4.0}; maths::CAgglomerativeClusterer::CNode nodes[] = { @@ -250,10 +246,6 @@ void CAgglomerativeClustererTest::testNode() { } void CAgglomerativeClustererTest::testSimplePermutations() { - LOG_DEBUG(<< "+-------------------------------------------------------+"); - LOG_DEBUG(<< "| CAgglomerativeClustererTest::testSimplePermutations |"); - LOG_DEBUG(<< "+-------------------------------------------------------+"); - double x[] = {1.0, 3.2, 4.5, 7.8}; std::size_t n = boost::size(x); @@ -306,10 +298,6 @@ void CAgglomerativeClustererTest::testSimplePermutations() { } void CAgglomerativeClustererTest::testDegenerate() { - LOG_DEBUG(<< "+-----------------------------------------------+"); - LOG_DEBUG(<< "| CAgglomerativeClustererTest::testDegenerate |"); - LOG_DEBUG(<< "+-----------------------------------------------+"); - double x[] = {1.0, 3.2, 3.2, 3.2, 4.5, 7.8}; std::size_t n = boost::size(x); @@ -380,10 +368,6 @@ void CAgglomerativeClustererTest::testDegenerate() { } void CAgglomerativeClustererTest::testRandom() { - LOG_DEBUG(<< "+-------------------------------------------+"); - LOG_DEBUG(<< "| CAgglomerativeClustererTest::testRandom |"); - LOG_DEBUG(<< "+-------------------------------------------+"); - test::CRandomNumbers rng; std::size_t n = 20u; diff --git a/lib/maths/unittest/CAssignmentTest.cc b/lib/maths/unittest/CAssignmentTest.cc index bfbc59a980..858f38df9d 100644 --- a/lib/maths/unittest/CAssignmentTest.cc +++ b/lib/maths/unittest/CAssignmentTest.cc @@ -87,10 +87,6 @@ double match(const TDoubleVecVec& costs, TSizeSizePrVec& matching) { } void CAssignmentTest::testKuhnMunkres() { - LOG_DEBUG(<< "+-----------------------------------+"); - LOG_DEBUG(<< "| CAssignmentTest::testKuhnMunkres |"); - LOG_DEBUG(<< "+-----------------------------------+"); - { LOG_DEBUG(<< "test 1: bad input"); const double test11[][5] = { diff --git a/lib/maths/unittest/CBasicStatisticsTest.cc b/lib/maths/unittest/CBasicStatisticsTest.cc index 944f7106b7..63d8e115f6 100644 --- a/lib/maths/unittest/CBasicStatisticsTest.cc +++ b/lib/maths/unittest/CBasicStatisticsTest.cc @@ -81,10 +81,6 @@ CppUnit::Test* CBasicStatisticsTest::suite() { } void CBasicStatisticsTest::testMean() { - LOG_DEBUG(<< "+---------------------------------+"); - LOG_DEBUG(<< "| CBasicStatisticsTest::testMean |"); - LOG_DEBUG(<< "+---------------------------------+"); - double sample[] = {0.9, 10.0, 5.6, 1.23, -12.3, 445.2, 0.0, 1.2}; ml::maths::CBasicStatistics::TDoubleVec sampleVec( @@ -97,10 +93,6 @@ void CBasicStatisticsTest::testMean() { } void CBasicStatisticsTest::testCentralMoments() { - LOG_DEBUG(<< "+--------------------------------------------+"); - LOG_DEBUG(<< "| CBasicStatisticsTest::testCentralMoments |"); - LOG_DEBUG(<< "+--------------------------------------------+"); - using TDoubleVec = std::vector; LOG_DEBUG(<< "Test mean double"); @@ -710,10 +702,6 @@ void CBasicStatisticsTest::testCentralMoments() { } void CBasicStatisticsTest::testVectorCentralMoments() { - LOG_DEBUG(<< "+--------------------------------------------------+"); - LOG_DEBUG(<< "| CBasicStatisticsTest::testVectorCentralMoments |"); - LOG_DEBUG(<< "+--------------------------------------------------+"); - using TDouble2Vec = ml::core::CSmallVector; using TDoubleVec = std::vector; @@ -828,10 +816,6 @@ void CBasicStatisticsTest::testVectorCentralMoments() { } void CBasicStatisticsTest::testCovariances() { - LOG_DEBUG(<< "+-----------------------------------------+"); - LOG_DEBUG(<< "| CBasicStatisticsTest::testCovariances |"); - LOG_DEBUG(<< "+-----------------------------------------+"); - LOG_DEBUG(<< "N(3,I)"); { const double raw[][3] = { @@ -974,10 +958,6 @@ void CBasicStatisticsTest::testCovariances() { } void CBasicStatisticsTest::testCovariancesLedoitWolf() { - LOG_DEBUG(<< "+---------------------------------------------------+"); - LOG_DEBUG(<< "| CBasicStatisticsTest::testCovariancesLedoitWolf |"); - LOG_DEBUG(<< "+---------------------------------------------------+"); - using TDoubleVec = std::vector; using TDoubleVecVec = std::vector; using TVector2 = ml::maths::CVectorNx1; @@ -1053,10 +1033,6 @@ void CBasicStatisticsTest::testCovariancesLedoitWolf() { } void CBasicStatisticsTest::testMedian() { - LOG_DEBUG(<< "+------------------------------------+"); - LOG_DEBUG(<< "| CBasicStatisticsTest::testMedian |"); - LOG_DEBUG(<< "+------------------------------------+"); - { ml::maths::CBasicStatistics::TDoubleVec sampleVec; @@ -1117,10 +1093,6 @@ void CBasicStatisticsTest::testMedian() { } void CBasicStatisticsTest::testOrderStatistics() { - LOG_DEBUG(<< "+---------------------------------------------+"); - LOG_DEBUG(<< "| CBasicStatisticsTest::testOrderStatistics |"); - LOG_DEBUG(<< "+---------------------------------------------+"); - // Test that the order statistics accumulators work for finding min and max // elements of a collection. @@ -1310,10 +1282,6 @@ void CBasicStatisticsTest::testOrderStatistics() { } void CBasicStatisticsTest::testMinMax() { - LOG_DEBUG(<< "+------------------------------------+"); - LOG_DEBUG(<< "| CBasicStatisticsTest::testMinMax |"); - LOG_DEBUG(<< "+------------------------------------+"); - using TDoubleVec = std::vector; TDoubleVec positive{1.0, 2.7, 4.0, 0.3, 11.7}; diff --git a/lib/maths/unittest/CBjkstUniqueValuesTest.cc b/lib/maths/unittest/CBjkstUniqueValuesTest.cc index 90abbcfad5..3433b228dd 100644 --- a/lib/maths/unittest/CBjkstUniqueValuesTest.cc +++ b/lib/maths/unittest/CBjkstUniqueValuesTest.cc @@ -36,10 +36,6 @@ uint8_t trailingZeros(uint32_t x) { } void CBjkstUniqueValuesTest::testTrailingZeros() { - LOG_DEBUG(<< "+---------------------------------------------+"); - LOG_DEBUG(<< "| CBjkstUniqueValuesTest::testTrailingZeros |"); - LOG_DEBUG(<< "+---------------------------------------------+"); - uint32_t n = 1; for (uint8_t i = 0; i < 32; n <<= 1, ++i) { CPPUNIT_ASSERT_EQUAL(i, CBjkstUniqueValues::trailingZeros(n)); @@ -58,10 +54,6 @@ void CBjkstUniqueValuesTest::testTrailingZeros() { } void CBjkstUniqueValuesTest::testNumber() { - LOG_DEBUG(<< "+--------------------------------------+"); - LOG_DEBUG(<< "| CBjkstUniqueValuesTest::testNumber |"); - LOG_DEBUG(<< "+--------------------------------------+"); - // Test the approximation errors. const std::size_t numberTests = 1000u; @@ -125,10 +117,6 @@ void CBjkstUniqueValuesTest::testNumber() { } void CBjkstUniqueValuesTest::testRemove() { - LOG_DEBUG(<< "+--------------------------------------+"); - LOG_DEBUG(<< "| CBjkstUniqueValuesTest::testRemove |"); - LOG_DEBUG(<< "+--------------------------------------+"); - // Check that our error is controlled if we add and remove // categories. Note because compression is an irreversible // operation we expect higher relative error if the number @@ -191,10 +179,6 @@ void CBjkstUniqueValuesTest::testRemove() { } void CBjkstUniqueValuesTest::testSwap() { - LOG_DEBUG(<< "+------------------------------------+"); - LOG_DEBUG(<< "| CBjkstUniqueValuesTest::testSwap |"); - LOG_DEBUG(<< "+------------------------------------+"); - test::CRandomNumbers rng; TSizeVec categories1; @@ -254,10 +238,6 @@ void CBjkstUniqueValuesTest::testSwap() { } void CBjkstUniqueValuesTest::testSmall() { - LOG_DEBUG(<< "+-------------------------------------+"); - LOG_DEBUG(<< "| CBjkstUniqueValuesTest::testSmall |"); - LOG_DEBUG(<< "+-------------------------------------+"); - // Test that there is zero error for small distinct // counts. This is managed by switching to use a sketch // only when exceeding the memory threshold. @@ -299,10 +279,6 @@ void CBjkstUniqueValuesTest::testSmall() { } void CBjkstUniqueValuesTest::testPersist() { - LOG_DEBUG(<< "+---------------------------------------+"); - LOG_DEBUG(<< "| CBjkstUniqueValuesTest::testPersist |"); - LOG_DEBUG(<< "+---------------------------------------+"); - test::CRandomNumbers rng; TSizeVec categories; diff --git a/lib/maths/unittest/CBootstrapClustererTest.cc b/lib/maths/unittest/CBootstrapClustererTest.cc index 0b0f5a80fb..edbc82436c 100644 --- a/lib/maths/unittest/CBootstrapClustererTest.cc +++ b/lib/maths/unittest/CBootstrapClustererTest.cc @@ -105,10 +105,6 @@ void connect(const TSizeVec& U, const TSizeVec& V, TGraph& graph) { } void CBootstrapClustererTest::testFacade() { - LOG_DEBUG(<< "+---------------------------------------+"); - LOG_DEBUG(<< "| CBootstrapClustererTest::testFacade |"); - LOG_DEBUG(<< "+---------------------------------------+"); - // Check that clustering by facade produces the sample result. std::size_t improveParamsKmeansIterations = 4; @@ -179,10 +175,6 @@ void CBootstrapClustererTest::testFacade() { } void CBootstrapClustererTest::testBuildClusterGraph() { - LOG_DEBUG(<< "+--------------------------------------------------+"); - LOG_DEBUG(<< "| CBootstrapClustererTest::testBuildClusterGraph |"); - LOG_DEBUG(<< "+--------------------------------------------------+"); - // Test we get the graph edges we expect for different overlap // thresholds. @@ -286,10 +278,6 @@ void CBootstrapClustererTest::testBuildClusterGraph() { } void CBootstrapClustererTest::testCutSearch() { - LOG_DEBUG(<< "+------------------------------------------+"); - LOG_DEBUG(<< "| CBootstrapClustererTest::testCutSearch |"); - LOG_DEBUG(<< "+------------------------------------------+"); - // Test we generally find the sparsest cut in a graph with two cliques. std::size_t trials = 50; @@ -341,10 +329,6 @@ void CBootstrapClustererTest::testCutSearch() { } void CBootstrapClustererTest::testSeparate() { - LOG_DEBUG(<< "+-----------------------------------------+"); - LOG_DEBUG(<< "| CBootstrapClustererTest::testSeparate |"); - LOG_DEBUG(<< "+-----------------------------------------+"); - // Test we separate a graph with three cliques when we can. test::CRandomNumbers rng; @@ -427,10 +411,6 @@ void CBootstrapClustererTest::testSeparate() { } void CBootstrapClustererTest::testThickets() { - LOG_DEBUG(<< "+-----------------------------------------+"); - LOG_DEBUG(<< "| CBootstrapClustererTest::testThickets |"); - LOG_DEBUG(<< "+-----------------------------------------+"); - // Test we find the correct thickets in a graph with two // components and three cliques. @@ -512,10 +492,6 @@ void CBootstrapClustererTest::testThickets() { } void CBootstrapClustererTest::testNonConvexClustering() { - LOG_DEBUG(<< "+----------------------------------------------------+"); - LOG_DEBUG(<< "| CBootstrapClustererTest::testNonConvexClustering |"); - LOG_DEBUG(<< "+----------------------------------------------------+"); - // Check the improvement in clustering when the underlying // assumptions of x-means (specifically cluster convexness // and Gaussian noise) are violated. @@ -681,10 +657,6 @@ void CBootstrapClustererTest::testNonConvexClustering() { } void CBootstrapClustererTest::testClusteringStability() { - LOG_DEBUG(<< "+----------------------------------------------------+"); - LOG_DEBUG(<< "| CBootstrapClustererTest::testClusteringStability |"); - LOG_DEBUG(<< "+----------------------------------------------------+"); - // Test that when we think there is sufficient certainty // to create clusters the assignment of points to clusters // is stable over multiple samplings of the data. diff --git a/lib/maths/unittest/CBoundingBoxTest.cc b/lib/maths/unittest/CBoundingBoxTest.cc index 9359bd984d..09691d2157 100644 --- a/lib/maths/unittest/CBoundingBoxTest.cc +++ b/lib/maths/unittest/CBoundingBoxTest.cc @@ -50,10 +50,6 @@ bool closerToX(const TBoundingBox4& bb, const TVector4& x, const TVector4& y) { } void CBoundingBoxTest::testAdd() { - LOG_DEBUG(<< "+-----------------------------+"); - LOG_DEBUG(<< "| CBoundingBoxTest::testAdd |"); - LOG_DEBUG(<< "+-----------------------------+"); - double points[][2] = {{-1.0, 5.0}, {2.0, 20.0}, {10.0, 4.0}, {-10.0, -3.0}, {200.0, 50.0}}; TBoundingBox2 bb(TVector2(&points[0][0], &points[0][0] + 2)); @@ -99,10 +95,6 @@ void CBoundingBoxTest::testAdd() { } void CBoundingBoxTest::testCloserTo() { - LOG_DEBUG(<< "+----------------------------------+"); - LOG_DEBUG(<< "| CBoundingBoxTest::testCloserTo |"); - LOG_DEBUG(<< "+----------------------------------+"); - const std::size_t n = 1000; test::CRandomNumbers rng; @@ -120,7 +112,7 @@ void CBoundingBoxTest::testCloserTo() { TBoundingBox2 bb(x1); bb.add(x2); - for (std::size_t j = 0u; j < probes.size(); j += 4) { + for (std::size_t j = 0u; j + 4 <= probes.size(); j += 4) { TVector2 y1(&probes[j], &probes[j + 2]); TVector2 y2(&probes[j + 2], &probes[j + 4]); bool closer = closerToX(bb, y1, y2); @@ -145,7 +137,7 @@ void CBoundingBoxTest::testCloserTo() { TBoundingBox4 bb(x1); bb.add(x2); - for (std::size_t j = 0u; j < probes.size(); j += 4) { + for (std::size_t j = 0u; j + 8 <= probes.size(); j += 4) { TVector4 y1(&probes[j], &probes[j + 4]); TVector4 y2(&probes[j + 4], &probes[j + 8]); bool closer = closerToX(bb, y1, y2); diff --git a/lib/maths/unittest/CCalendarComponentAdaptiveBucketingTest.cc b/lib/maths/unittest/CCalendarComponentAdaptiveBucketingTest.cc index fabbe9cfe5..7d234a23fc 100644 --- a/lib/maths/unittest/CCalendarComponentAdaptiveBucketingTest.cc +++ b/lib/maths/unittest/CCalendarComponentAdaptiveBucketingTest.cc @@ -40,10 +40,6 @@ void CCalendarComponentAdaptiveBucketingTest::tearDown() { } void CCalendarComponentAdaptiveBucketingTest::testInitialize() { - LOG_DEBUG(<< "+-----------------------------------------------------------+"); - LOG_DEBUG(<< "| CCalendarComponentAdaptiveBucketingTest::testInitialize |"); - LOG_DEBUG(<< "+-----------------------------------------------------------+"); - maths::CCalendarFeature feature{maths::CCalendarFeature::DAYS_SINCE_START_OF_MONTH, 86400}; maths::CCalendarComponentAdaptiveBucketing bucketing{feature}; @@ -72,10 +68,6 @@ void CCalendarComponentAdaptiveBucketingTest::testInitialize() { } void CCalendarComponentAdaptiveBucketingTest::testSwap() { - LOG_DEBUG(<< "+-----------------------------------------------------+"); - LOG_DEBUG(<< "| CCalendarComponentAdaptiveBucketingTest::testSwap |"); - LOG_DEBUG(<< "+-----------------------------------------------------+"); - core_t::TTime now{core::CTimeUtils::now()}; maths::CCalendarFeature feature1{maths::CCalendarFeature::DAYS_SINCE_START_OF_MONTH, now}; @@ -118,10 +110,6 @@ void CCalendarComponentAdaptiveBucketingTest::testSwap() { } void CCalendarComponentAdaptiveBucketingTest::testRefine() { - LOG_DEBUG(<< "+-------------------------------------------------------+"); - LOG_DEBUG(<< "| CCalendarComponentAdaptiveBucketingTest::testRefine |"); - LOG_DEBUG(<< "+-------------------------------------------------------+"); - // Test that refine reduces the function approximation error. core_t::TTime times[] = {-1, 3600, 10800, 18000, 25200, 32400, 39600, @@ -206,10 +194,6 @@ void CCalendarComponentAdaptiveBucketingTest::testRefine() { } void CCalendarComponentAdaptiveBucketingTest::testPropagateForwardsByTime() { - LOG_DEBUG(<< "+------------------------------------------------------------------------+"); - LOG_DEBUG(<< "| CCalendarComponentAdaptiveBucketingTest::testPropagateForwardsByTime |"); - LOG_DEBUG(<< "+------------------------------------------------------------------------+"); - // Check no error is introduced by the aging process to // the bucket values and that the rate at which the total // count is reduced uniformly. @@ -239,10 +223,6 @@ void CCalendarComponentAdaptiveBucketingTest::testPropagateForwardsByTime() { } void CCalendarComponentAdaptiveBucketingTest::testMinimumBucketLength() { - LOG_DEBUG(<< "+--------------------------------------------------------------------+"); - LOG_DEBUG(<< "| CCalendarComponentAdaptiveBucketingTest::testMinimumBucketLength |"); - LOG_DEBUG(<< "+--------------------------------------------------------------------+"); - double function[] = {0.0, 0.0, 10.0, 12.0, 11.0, 16.0, 15.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}; @@ -302,10 +282,6 @@ void CCalendarComponentAdaptiveBucketingTest::testMinimumBucketLength() { } void CCalendarComponentAdaptiveBucketingTest::testUnintialized() { - LOG_DEBUG(<< "+-------------------------------------------------------------+"); - LOG_DEBUG(<< "| CCalendarComponentAdaptiveBucketingTest::testUnintialized |"); - LOG_DEBUG(<< "+-------------------------------------------------------------+"); - // Check that all the functions work and return the expected // values on an uninitialized bucketing. @@ -349,10 +325,6 @@ void CCalendarComponentAdaptiveBucketingTest::testUnintialized() { } void CCalendarComponentAdaptiveBucketingTest::testKnots() { - LOG_DEBUG(<< "+------------------------------------------------------+"); - LOG_DEBUG(<< "| CCalendarComponentAdaptiveBucketingTest::testKnots |"); - LOG_DEBUG(<< "+------------------------------------------------------+"); - // Check prediction errors in values and variances. test::CRandomNumbers rng; @@ -445,10 +417,6 @@ void CCalendarComponentAdaptiveBucketingTest::testKnots() { } void CCalendarComponentAdaptiveBucketingTest::testPersist() { - LOG_DEBUG(<< "+--------------------------------------------------------+"); - LOG_DEBUG(<< "| CCalendarComponentAdaptiveBucketingTest::testPersist |"); - LOG_DEBUG(<< "+--------------------------------------------------------+"); - // Check that serialization is idempotent. double decayRate{0.1}; diff --git a/lib/maths/unittest/CCalendarFeatureTest.cc b/lib/maths/unittest/CCalendarFeatureTest.cc index a0908c44ef..7011b34665 100644 --- a/lib/maths/unittest/CCalendarFeatureTest.cc +++ b/lib/maths/unittest/CCalendarFeatureTest.cc @@ -41,10 +41,6 @@ class CScopeGMT { } void CCalendarFeatureTest::testInitialize() { - LOG_DEBUG(<< "+----------------------------------------+"); - LOG_DEBUG(<< "| CCalendarFeatureTest::testInitialize |"); - LOG_DEBUG(<< "+----------------------------------------+"); - // Check we get the expected features. test::CRandomNumbers rng; @@ -73,10 +69,6 @@ void CCalendarFeatureTest::testInitialize() { } void CCalendarFeatureTest::testComparison() { - LOG_DEBUG(<< "+----------------------------------------+"); - LOG_DEBUG(<< "| CCalendarFeatureTest::testComparison |"); - LOG_DEBUG(<< "+----------------------------------------+"); - // Check some comparison invariants. test::CRandomNumbers rng; @@ -111,10 +103,6 @@ void CCalendarFeatureTest::testComparison() { } void CCalendarFeatureTest::testOffset() { - LOG_DEBUG(<< "+------------------------------------+"); - LOG_DEBUG(<< "| CCalendarFeatureTest::testOffset |"); - LOG_DEBUG(<< "+------------------------------------+"); - // Check some properties of offset. Specifically, // - offset(time + delta) = offset(time) + delta provided // times are in same month except when the delta crosses @@ -255,10 +243,6 @@ void CCalendarFeatureTest::testOffset() { } void CCalendarFeatureTest::testPersist() { - LOG_DEBUG(<< "+-------------------------------------+"); - LOG_DEBUG(<< "| CCalendarFeatureTest::testPersist |"); - LOG_DEBUG(<< "+-------------------------------------+"); - maths::CCalendarFeature::TCalendarFeature4Ary features = maths::CCalendarFeature::features(core::CTimeUtils::now()); diff --git a/lib/maths/unittest/CCategoricalToolsTest.cc b/lib/maths/unittest/CCategoricalToolsTest.cc index 99070e4db8..bc585fe50e 100644 --- a/lib/maths/unittest/CCategoricalToolsTest.cc +++ b/lib/maths/unittest/CCategoricalToolsTest.cc @@ -24,22 +24,12 @@ using TSizeVec = std::vector; using namespace ml; void CCategoricalToolsTest::testProbabilityOfLessLikelyMultinomialSample() { - LOG_DEBUG(<< "+-----------------------------------------------------------------------+"); - LOG_DEBUG(<< "| CCategoricalToolsTest::testProbabilityOfLessLikelyMultinomialSample |"); - LOG_DEBUG(<< "+-----------------------------------------------------------------------+"); } void CCategoricalToolsTest::testProbabilityOfLessLikelyCategoryCount() { - LOG_DEBUG(<< "+-------------------------------------------------------------------+"); - LOG_DEBUG(<< "| CCategoricalToolsTest::testProbabilityOfLessLikelyCategoryCount |"); - LOG_DEBUG(<< "+-------------------------------------------------------------------+"); } void CCategoricalToolsTest::testExpectedDistinctCategories() { - LOG_DEBUG(<< "+---------------------------------------------------------+"); - LOG_DEBUG(<< "| CCategoricalToolsTest::testExpectedDistinctCategories |"); - LOG_DEBUG(<< "+---------------------------------------------------------+"); - using TDoubleVecVec = std::vector; using TMeanVarAccumulator = maths::CBasicStatistics::SSampleMeanVar::TAccumulator; @@ -380,10 +370,6 @@ void CCategoricalToolsTest::testExpectedDistinctCategories() { } void CCategoricalToolsTest::testLogBinomialProbability() { - LOG_DEBUG(<< "+-----------------------------------------------------+"); - LOG_DEBUG(<< "| CCategoricalToolsTest::testLogBinomialProbability |"); - LOG_DEBUG(<< "+-----------------------------------------------------+"); - // Test the calculation matches the boost::binomial_distribution. double n[] = {10, 100, 10000}; @@ -426,10 +412,6 @@ void CCategoricalToolsTest::testLogBinomialProbability() { } void CCategoricalToolsTest::testLogMultinomialProbability() { - LOG_DEBUG(<< "+--------------------------------------------------------+"); - LOG_DEBUG(<< "| CCategoricalToolsTest::testLogMultinomialProbability |"); - LOG_DEBUG(<< "+--------------------------------------------------------+"); - // Test: // 1) The two category case matches the binomial. // 2) The marginal matches the binomial. diff --git a/lib/maths/unittest/CChecksumTest.cc b/lib/maths/unittest/CChecksumTest.cc index 9b7ecb4890..23e62a16d5 100644 --- a/lib/maths/unittest/CChecksumTest.cc +++ b/lib/maths/unittest/CChecksumTest.cc @@ -63,10 +63,6 @@ using TBarVec = std::vector; } void CChecksumTest::testMemberChecksum() { - LOG_DEBUG(<< "+-------------------------------------+"); - LOG_DEBUG(<< "| CChecksumTest::testMemberChecksum |"); - LOG_DEBUG(<< "+-------------------------------------+"); - uint64_t seed = 1679023009937ull; LOG_DEBUG(<< ""); @@ -83,10 +79,6 @@ void CChecksumTest::testMemberChecksum() { } void CChecksumTest::testContainers() { - LOG_DEBUG(<< "+---------------------------------+"); - LOG_DEBUG(<< "| CChecksumTest::testContainers |"); - LOG_DEBUG(<< "+---------------------------------+"); - uint64_t seed = 1679023009937ull; test::CRandomNumbers rng; @@ -195,10 +187,6 @@ void CChecksumTest::testContainers() { } void CChecksumTest::testNullable() { - LOG_DEBUG(<< "+-------------------------------+"); - LOG_DEBUG(<< "| CChecksumTest::testNullable |"); - LOG_DEBUG(<< "+-------------------------------+"); - uint64_t seed = 1679023009937ull; // Test optional and pointers. @@ -232,10 +220,6 @@ void CChecksumTest::testNullable() { } void CChecksumTest::testAccumulators() { - LOG_DEBUG(<< "+-----------------------------------+"); - LOG_DEBUG(<< "| CChecksumTest::testAccumulators |"); - LOG_DEBUG(<< "+-----------------------------------+"); - uint64_t seed = 1679023009937ull; // Test accumulators. @@ -253,10 +237,6 @@ void CChecksumTest::testAccumulators() { } void CChecksumTest::testPair() { - LOG_DEBUG(<< "+---------------------------+"); - LOG_DEBUG(<< "| CChecksumTest::testPair |"); - LOG_DEBUG(<< "+---------------------------+"); - uint64_t seed = 1679023009937ull; // Test pair. @@ -292,10 +272,6 @@ void CChecksumTest::testPair() { } void CChecksumTest::testArray() { - LOG_DEBUG(<< "+----------------------------+"); - LOG_DEBUG(<< "| CChecksumTest::testArray |"); - LOG_DEBUG(<< "+----------------------------+"); - uint64_t seed = 1679023009937ull; double a[] = {1.0, 23.8, 15.2, 14.7}; @@ -314,10 +290,6 @@ void CChecksumTest::testArray() { } void CChecksumTest::testCombinations() { - LOG_DEBUG(<< "+-----------------------------------+"); - LOG_DEBUG(<< "| CChecksumTest::testCombinations |"); - LOG_DEBUG(<< "+-----------------------------------+"); - uint64_t seed = 1679023009937ull; test::CRandomNumbers rng; diff --git a/lib/maths/unittest/CClustererTest.cc b/lib/maths/unittest/CClustererTest.cc index 15594374cd..18b404fb41 100644 --- a/lib/maths/unittest/CClustererTest.cc +++ b/lib/maths/unittest/CClustererTest.cc @@ -18,10 +18,6 @@ using namespace ml; void CClustererTest::testIndexGenerator() { - LOG_DEBUG(<< "+--------------------------------------+"); - LOG_DEBUG(<< "| CClustererTest::testIndexGenerator |"); - LOG_DEBUG(<< "+--------------------------------------+"); - // We test the invariants that: // 1) It never produces duplicate index. // 2) The highest index in the set is less than the diff --git a/lib/maths/unittest/CCountMinSketchTest.cc b/lib/maths/unittest/CCountMinSketchTest.cc index f8c50575e2..aa814d8259 100644 --- a/lib/maths/unittest/CCountMinSketchTest.cc +++ b/lib/maths/unittest/CCountMinSketchTest.cc @@ -20,10 +20,6 @@ using namespace ml; using TDoubleVec = std::vector; void CCountMinSketchTest::testCounts() { - LOG_DEBUG(<< "+-----------------------------------+"); - LOG_DEBUG(<< "| CCountMinSketchTest::testCounts |"); - LOG_DEBUG(<< "+-----------------------------------+"); - using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; test::CRandomNumbers rng; @@ -119,10 +115,6 @@ void CCountMinSketchTest::testCounts() { } void CCountMinSketchTest::testSwap() { - LOG_DEBUG(<< "+---------------------------------+"); - LOG_DEBUG(<< "| CCountMinSketchTest::testSwap |"); - LOG_DEBUG(<< "+---------------------------------+"); - test::CRandomNumbers rng; TDoubleVec counts1; @@ -182,10 +174,6 @@ void CCountMinSketchTest::testSwap() { } void CCountMinSketchTest::testPersist() { - LOG_DEBUG(<< "+------------------------------------+"); - LOG_DEBUG(<< "| CCountMinSketchTest::testPersist |"); - LOG_DEBUG(<< "+------------------------------------+"); - test::CRandomNumbers rng; TDoubleVec counts; diff --git a/lib/maths/unittest/CDecayRateControllerTest.cc b/lib/maths/unittest/CDecayRateControllerTest.cc index 42ad920c8a..d6b2a5760e 100644 --- a/lib/maths/unittest/CDecayRateControllerTest.cc +++ b/lib/maths/unittest/CDecayRateControllerTest.cc @@ -22,10 +22,6 @@ using namespace ml; using namespace handy_typedefs; void CDecayRateControllerTest::testLowCov() { - LOG_DEBUG(<< "+----------------------------------------+"); - LOG_DEBUG(<< "| CDecayRateControllerTest::testLowCov |"); - LOG_DEBUG(<< "+----------------------------------------+"); - // Supply small but biased errors so we increase the decay // rate to its maximum then gradually reduce the error to // less than the coefficient of variation cutoff to control @@ -50,10 +46,6 @@ void CDecayRateControllerTest::testLowCov() { } void CDecayRateControllerTest::testOrderedErrors() { - LOG_DEBUG(<< "+-----------------------------------------------+"); - LOG_DEBUG(<< "| CDecayRateControllerTest::testOrderedErrors |"); - LOG_DEBUG(<< "+-----------------------------------------------+"); - // Test that if we add a number of ordered samples, such // that overall they don't have bias, the decay rate is // not increased. @@ -76,10 +68,6 @@ void CDecayRateControllerTest::testOrderedErrors() { } void CDecayRateControllerTest::testPersist() { - LOG_DEBUG(<< "+-----------------------------------------+"); - LOG_DEBUG(<< "| CDecayRateControllerTest::testPersist |"); - LOG_DEBUG(<< "+-----------------------------------------+"); - using TDoubleVec = std::vector; test::CRandomNumbers rng; diff --git a/lib/maths/unittest/CEntropySketchTest.cc b/lib/maths/unittest/CEntropySketchTest.cc index 715df64daa..d3931bac7d 100644 --- a/lib/maths/unittest/CEntropySketchTest.cc +++ b/lib/maths/unittest/CEntropySketchTest.cc @@ -22,10 +22,6 @@ using namespace ml; void CEntropySketchTest::testAll() { - LOG_DEBUG(<< "+---------------------------------------+"); - LOG_DEBUG(<< "| CBjkstUniqueValuesTest::testPersist |"); - LOG_DEBUG(<< "+---------------------------------------+"); - using TSizeVec = std::vector; using TSizeDoubleUMap = boost::unordered_map; using TSizeDoubleUMapCItr = TSizeDoubleUMap::const_iterator; diff --git a/lib/maths/unittest/CEqualWithToleranceTest.cc b/lib/maths/unittest/CEqualWithToleranceTest.cc index 109ef2246e..16016c1d96 100644 --- a/lib/maths/unittest/CEqualWithToleranceTest.cc +++ b/lib/maths/unittest/CEqualWithToleranceTest.cc @@ -14,10 +14,6 @@ using namespace ml; void CEqualWithToleranceTest::testScalar() { - LOG_DEBUG(<< "+---------------------------------------+"); - LOG_DEBUG(<< "| CEqualWithToleranceTest::testScalar |"); - LOG_DEBUG(<< "+---------------------------------------+"); - { maths::CEqualWithTolerance abs( maths::CToleranceTypes::E_AbsoluteTolerance, 0.31); @@ -84,10 +80,6 @@ void CEqualWithToleranceTest::testScalar() { } void CEqualWithToleranceTest::testVector() { - LOG_DEBUG(<< "+---------------------------------------+"); - LOG_DEBUG(<< "| CEqualWithToleranceTest::testVector |"); - LOG_DEBUG(<< "+---------------------------------------+"); - float a_[] = {1.1f, 1.2f}; float b_[] = {1.2f, 1.3f}; float c_[] = {201.1f, 202.2f}; @@ -152,10 +144,6 @@ void CEqualWithToleranceTest::testVector() { } void CEqualWithToleranceTest::testMatrix() { - LOG_DEBUG(<< "+---------------------------------------+"); - LOG_DEBUG(<< "| CEqualWithToleranceTest::testMatrix |"); - LOG_DEBUG(<< "+---------------------------------------+"); - float a_[] = {1.1f, 1.2f, 1.3f}; float b_[] = {1.2f, 1.3f, 1.4f}; float c_[] = {201.1f, 202.2f, 203.4f}; diff --git a/lib/maths/unittest/CForecastTest.cc b/lib/maths/unittest/CForecastTest.cc index a7e1dd4654..cb556697bd 100644 --- a/lib/maths/unittest/CForecastTest.cc +++ b/lib/maths/unittest/CForecastTest.cc @@ -39,8 +39,7 @@ using TDoubleVec = std::vector; using TTimeDoublePr = std::pair; using TTimeDoublePrVec = std::vector; using TDouble2Vec = core::CSmallVector; -using TDouble2Vec4Vec = core::CSmallVector; -using TDouble2Vec4VecVec = std::vector; +using TDouble2VecWeightsAryVec = std::vector; using TTimeDouble2VecSizeTr = core::CTriple; using TTimeDouble2VecSizeTrVec = std::vector; using TErrorBarVec = std::vector; @@ -81,10 +80,6 @@ void mockSink(maths::SErrorBar errorBar, TErrorBarVec& prediction) { } void CForecastTest::testDailyNoLongTermTrend() { - LOG_DEBUG(<< "+-------------------------------------------+"); - LOG_DEBUG(<< "| CForecastTest::testDailyNoLongTermTrend |"); - LOG_DEBUG(<< "+-------------------------------------------+"); - core_t::TTime bucketLength{600}; TDoubleVec y{0.0, 2.0, 2.0, 4.0, 8.0, 10.0, 15.0, 20.0, 120.0, 120.0, 110.0, 100.0, 90.0, 100.0, 130.0, 80.0, @@ -103,10 +98,6 @@ void CForecastTest::testDailyNoLongTermTrend() { } void CForecastTest::testDailyConstantLongTermTrend() { - LOG_DEBUG(<< "+-------------------------------------------------+"); - LOG_DEBUG(<< "| CForecastTest::testDailyConstantLongTermTrend |"); - LOG_DEBUG(<< "+-------------------------------------------------+"); - core_t::TTime bucketLength{3600}; TDoubleVec y{0.0, 2.0, 2.0, 4.0, 8.0, 10.0, 15.0, 20.0, 80.0, 100.0, 110.0, 120.0, 110.0, 100.0, 90.0, 80.0, @@ -122,10 +113,6 @@ void CForecastTest::testDailyConstantLongTermTrend() { } void CForecastTest::testDailyVaryingLongTermTrend() { - LOG_DEBUG(<< "+------------------------------------------------+"); - LOG_DEBUG(<< "| CForecastTest::testDailyVaryingLongTermTrend |"); - LOG_DEBUG(<< "+------------------------------------------------+"); - core_t::TTime bucketLength{3600}; double day{86400.0}; TDoubleVec times{0.0, 5.0 * day, 10.0 * day, 15.0 * day, @@ -151,10 +138,6 @@ void CForecastTest::testDailyVaryingLongTermTrend() { } void CForecastTest::testComplexNoLongTermTrend() { - LOG_DEBUG(<< "+---------------------------------------------+"); - LOG_DEBUG(<< "| CForecastTest::testComplexNoLongTermTrend |"); - LOG_DEBUG(<< "+---------------------------------------------+"); - core_t::TTime bucketLength{3600}; TDoubleVec y{0.0, 10.0, 20.0, 20.0, 30.0, 40.0, 50.0, 60.0, 80.0, 100.0, 110.0, 120.0, 110.0, 100.0, 90.0, 80.0, @@ -171,10 +154,6 @@ void CForecastTest::testComplexNoLongTermTrend() { } void CForecastTest::testComplexConstantLongTermTrend() { - LOG_DEBUG(<< "+---------------------------------------------------+"); - LOG_DEBUG(<< "| CForecastTest::testComplexConstantLongTermTrend |"); - LOG_DEBUG(<< "+---------------------------------------------------+"); - core_t::TTime bucketLength{3600}; TDoubleVec y{0.0, 10.0, 20.0, 20.0, 30.0, 40.0, 50.0, 60.0, 80.0, 100.0, 110.0, 120.0, 110.0, 100.0, 90.0, 80.0, @@ -192,10 +171,6 @@ void CForecastTest::testComplexConstantLongTermTrend() { } void CForecastTest::testComplexVaryingLongTermTrend() { - LOG_DEBUG(<< "+--------------------------------------------------+"); - LOG_DEBUG(<< "| CForecastTest::testComplexVaryingLongTermTrend |"); - LOG_DEBUG(<< "+--------------------------------------------------+"); - core_t::TTime bucketLength{3600}; double day{86400.0}; TDoubleVec times{0.0, 5.0 * day, 10.0 * day, 15.0 * day, @@ -226,10 +201,6 @@ void CForecastTest::testComplexVaryingLongTermTrend() { } void CForecastTest::testNonNegative() { - LOG_DEBUG(<< "+----------------------------------+"); - LOG_DEBUG(<< "| CForecastTest::testNonNegative |"); - LOG_DEBUG(<< "+----------------------------------+"); - core_t::TTime bucketLength{1800}; test::CRandomNumbers rng; @@ -252,7 +223,8 @@ void CForecastTest::testNonNegative() { //TDoubleVec uy; core_t::TTime time{0}; - TDouble2Vec4VecVec weights{{{1.0}}}; + std::vector weights{ + maths_t::CUnitWeights::unit(1)}; for (std::size_t d = 0u; d < 20; ++d) { TDoubleVec noise; rng.generateNormalSamples(2.0, 3.0, 48, noise); @@ -261,7 +233,6 @@ void CForecastTest::testNonNegative() { params.integer(false) .nonNegative(true) .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) .trendWeights(weights) .priorWeights(weights); double y{std::max(*value, 0.0)}; @@ -316,10 +287,6 @@ void CForecastTest::testNonNegative() { } void CForecastTest::testFinancialIndex() { - LOG_DEBUG(<< "+-------------------------------------+"); - LOG_DEBUG(<< "| CForecastTest::testFinancialIndex |"); - LOG_DEBUG(<< "+-------------------------------------+"); - core_t::TTime bucketLength{1800}; TTimeDoublePrVec timeseries; @@ -353,14 +320,10 @@ void CForecastTest::testFinancialIndex() { std::size_t n{5 * timeseries.size() / 6}; - TDouble2Vec4VecVec weights{{{1.0}}}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(1)}; for (std::size_t i = 0u; i < n; ++i) { maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); + params.integer(false).propagationInterval(1.0).trendWeights(weights).priorWeights(weights); model.addSamples( params, {core::make_triple(timeseries[i].first, TDouble2Vec{timeseries[i].second}, TAG)}); @@ -461,18 +424,14 @@ void CForecastTest::test(TTrend trend, &controllers); core_t::TTime time{0}; - TDouble2Vec4VecVec weights{{{1.0}}}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(1)}; for (std::size_t d = 0u; d < daysToLearn; ++d) { TDoubleVec noise; rng.generateNormalSamples(0.0, noiseVariance, 86400 / bucketLength, noise); for (std::size_t i = 0u; i < noise.size(); ++i, time += bucketLength) { maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); + params.integer(false).propagationInterval(1.0).trendWeights(weights).priorWeights(weights); double yi{trend(time, noise[i])}; model.addSamples(params, {core::make_triple(time, TDouble2Vec{yi}, TAG)}); //actual.push_back(yi); diff --git a/lib/maths/unittest/CGammaRateConjugateTest.cc b/lib/maths/unittest/CGammaRateConjugateTest.cc index 97f0c567a0..60f9b80c7a 100644 --- a/lib/maths/unittest/CGammaRateConjugateTest.cc +++ b/lib/maths/unittest/CGammaRateConjugateTest.cc @@ -41,6 +41,7 @@ using TDoubleDoublePrVec = std::vector; using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; using TMeanVarAccumulator = maths::CBasicStatistics::SSampleMeanVar::TAccumulator; using CGammaRateConjugate = CPriorTestInterfaceMixin; +using TWeightFunc = maths_t::TDoubleWeightsAry (*)(double); CGammaRateConjugate makePrior(maths_t::EDataType dataType = maths_t::E_ContinuousData, const double& offset = 0.0, @@ -50,14 +51,10 @@ CGammaRateConjugate makePrior(maths_t::EDataType dataType = maths_t::E_Continuou } void CGammaRateConjugateTest::testMultipleUpdate() { - LOG_DEBUG(<< "+-----------------------------------------------+"); - LOG_DEBUG(<< "| CGammaRateConjugateTest::testMultipleUpdate |"); - LOG_DEBUG(<< "+-----------------------------------------------+"); - // Test that we get the same result updating once with a vector of 100 // samples of an R.V. versus updating individually 100 times. - const maths_t::EDataType dataTypes[] = {maths_t::E_IntegerData, maths_t::E_ContinuousData}; + const maths_t::EDataType dataTypes[]{maths_t::E_IntegerData, maths_t::E_ContinuousData}; const double shape = 2.0; const double scale = 3.0; @@ -72,7 +69,7 @@ void CGammaRateConjugateTest::testMultipleUpdate() { CGammaRateConjugate filter2(filter1); for (std::size_t j = 0; j < samples.size(); ++j) { - filter1.addSamples(TDouble1Vec(1, samples[j])); + filter1.addSamples(TDouble1Vec{samples[j]}); } filter2.addSamples(samples); @@ -91,13 +88,13 @@ void CGammaRateConjugateTest::testMultipleUpdate() { filter1.addSamples(samples); CGammaRateConjugate filter2(filter1); - maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); for (std::size_t j = 0u; j < scaledSamples.size(); ++j) { - filter1.addSamples(weightStyle, TDouble1Vec(1, scaledSamples[j]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 2.0))); + filter1.addSamples({scaledSamples[j]}, + {ml::maths_t::countVarianceScaleWeight(2.0)}); } - filter2.addSamples(weightStyle, scaledSamples, - TDouble4Vec1Vec(scaledSamples.size(), TDouble4Vec(1, 2.0))); + filter2.addSamples(scaledSamples, maths_t::TDoubleWeightsAry1Vec( + scaledSamples.size(), + maths_t::countVarianceScaleWeight(2.0))); using TEqual = maths::CEqualWithTolerance; TEqual equal(maths::CToleranceTypes::E_RelativeTolerance, 0.03); @@ -114,11 +111,9 @@ void CGammaRateConjugateTest::testMultipleUpdate() { std::size_t count = 10; for (std::size_t j = 0u; j < count; ++j) { - filter1.addSamples(TDouble1Vec(1, x)); + filter1.addSamples(TDouble1Vec{x}); } - filter2.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, x), - TDouble4Vec1Vec(1, TDouble4Vec(1, static_cast(count)))); + filter2.addSamples({x}, {maths_t::countWeight(static_cast(count))}); using TEqual = maths::CEqualWithTolerance; TEqual equal(maths::CToleranceTypes::E_RelativeTolerance, 0.01); @@ -127,10 +122,6 @@ void CGammaRateConjugateTest::testMultipleUpdate() { } void CGammaRateConjugateTest::testPropagation() { - LOG_DEBUG(<< "+--------------------------------------------+"); - LOG_DEBUG(<< "| CGammaRateConjugateTest::testPropagation |"); - LOG_DEBUG(<< "+--------------------------------------------+"); - // The quantities are preserved up to the solving tolerance given that // the updated count is still relatively large so the digamma function // is very nearly equal to the log function. @@ -163,10 +154,6 @@ void CGammaRateConjugateTest::testPropagation() { } void CGammaRateConjugateTest::testShapeEstimation() { - LOG_DEBUG(<< "+------------------------------------------------+"); - LOG_DEBUG(<< "| CGammaRateConjugateTest::testShapeEstimation |"); - LOG_DEBUG(<< "+------------------------------------------------+"); - // The idea here is to check that the likelihood shape estimate converges // to the correct value for a range of distribution parameters. We do not // use any explicit bounds on the convergence rates so simply check that @@ -230,10 +217,6 @@ void CGammaRateConjugateTest::testShapeEstimation() { } void CGammaRateConjugateTest::testRateEstimation() { - LOG_DEBUG(<< "+-----------------------------------------------+"); - LOG_DEBUG(<< "| CGammaRateConjugateTest::testRateEstimation |"); - LOG_DEBUG(<< "+-----------------------------------------------+"); - // We are going to test that we correctly estimate a distribution // for the rate of the gamma process by checking that the true // rate of a gamma process lies in various confidence intervals @@ -288,10 +271,6 @@ void CGammaRateConjugateTest::testRateEstimation() { } void CGammaRateConjugateTest::testMarginalLikelihood() { - LOG_DEBUG(<< "+---------------------------------------------------+"); - LOG_DEBUG(<< "| CGammaRateConjugateTest::testMarginalLikelihood |"); - LOG_DEBUG(<< "+---------------------------------------------------+"); - // Check that the c.d.f. <= 1 at extreme. maths_t::EDataType dataTypes[] = {maths_t::E_ContinuousData, maths_t::E_IntegerData}; for (std::size_t t = 0u; t < boost::size(dataTypes); ++t) { @@ -306,17 +285,14 @@ void CGammaRateConjugateTest::testMarginalLikelihood() { rng.generateGammaSamples(shape, scale, 200, samples); filter.addSamples(samples); - maths_t::ESampleWeightStyle weightStyles[] = { - maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight, - maths_t::E_SampleCountWeight}; - double weights[] = {0.1, 1.0, 10.0}; + TWeightFunc weightsFuncs[]{static_cast(maths_t::countWeight), + static_cast(maths_t::winsorisationWeight)}; + double weights[]{0.1, 0.9, 10.0}; - for (std::size_t i = 0u; i < boost::size(weightStyles); ++i) { + for (std::size_t i = 0u; i < boost::size(weightsFuncs); ++i) { for (std::size_t j = 0u; j < boost::size(weights); ++j) { double lb, ub; - filter.minusLogJointCdf( - maths_t::TWeightStyleVec(1, weightStyles[i]), TDouble1Vec(1, 1000.0), - TDouble4Vec1Vec(1, TDouble4Vec(1, weights[j])), lb, ub); + filter.minusLogJointCdf({1000.0}, {weightsFuncs[i](weights[j])}, lb, ub); LOG_DEBUG(<< "-log(c.d.f) = " << (lb + ub) / 2.0); CPPUNIT_ASSERT(lb >= 0.0); CPPUNIT_ASSERT(ub >= 0.0); @@ -465,13 +441,12 @@ void CGammaRateConjugateTest::testMarginalLikelihood() { CPPUNIT_ASSERT(maths::CBasicStatistics::mean(error) < 4e-3); } { - maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); - TDouble4Vec weight(1, 1.0); + maths_t::TDoubleWeightsAry weight(maths_t::CUnitWeights::UNIT); TMeanAccumulator totalError; for (std::size_t i = 0u; i < boost::size(varianceScales); ++i) { TMeanAccumulator error; double vs = varianceScales[i]; - weight[0] = vs; + maths_t::setCountVarianceScale(vs, weight); LOG_DEBUG(<< "*** vs = " << vs << " ***"); for (std::size_t j = 0u; j < boost::size(percentages); ++j) { boost::math::gamma_distribution<> scaledGamma(shape / vs, vs * scale); @@ -479,8 +454,8 @@ void CGammaRateConjugateTest::testMarginalLikelihood() { scaledGamma, (50.0 - percentages[j] / 2.0) / 100.0); double q2 = boost::math::quantile( scaledGamma, (50.0 + percentages[j] / 2.0) / 100.0); - TDoubleDoublePr interval = filter.marginalLikelihoodConfidenceInterval( - percentages[j], weightStyle, weight); + TDoubleDoublePr interval = + filter.marginalLikelihoodConfidenceInterval(percentages[j], weight); LOG_DEBUG(<< "[q1, q2] = [" << q1 << ", " << q2 << "]" << ", interval = " << core::CContainerPrinter::print(interval)); CPPUNIT_ASSERT_DOUBLES_EQUAL(q1, interval.first, 0.4); @@ -498,10 +473,6 @@ void CGammaRateConjugateTest::testMarginalLikelihood() { } void CGammaRateConjugateTest::testMarginalLikelihoodMean() { - LOG_DEBUG(<< "+-------------------------------------------------------+"); - LOG_DEBUG(<< "| CGammaRateConjugateTest::testMarginalLikelihoodMean |"); - LOG_DEBUG(<< "+-------------------------------------------------------+"); - // Test that the expectation of the marginal likelihood matches // the expected mean of the marginal likelihood. @@ -544,10 +515,6 @@ void CGammaRateConjugateTest::testMarginalLikelihoodMean() { } void CGammaRateConjugateTest::testMarginalLikelihoodMode() { - LOG_DEBUG(<< "+-------------------------------------------------------+"); - LOG_DEBUG(<< "| CGammaRateConjugateTest::testMarginalLikelihoodMode |"); - LOG_DEBUG(<< "+-------------------------------------------------------+"); - // Test that the marginal likelihood mode is what we'd expect // with variances variance scales. @@ -569,22 +536,19 @@ void CGammaRateConjugateTest::testMarginalLikelihoodMode() { filter.addSamples(samples); TMeanAccumulator relativeError; - maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); - TDouble4Vec weight(1, 1.0); + maths_t::TDoubleWeightsAry weight(maths_t::CUnitWeights::UNIT); for (std::size_t k = 0u; k < boost::size(varianceScales); ++k) { double vs = varianceScales[k]; - weight[0] = vs; + maths_t::setCountVarianceScale(vs, weight); boost::math::gamma_distribution<> scaledGamma(shapes[i] / vs, vs * scales[j]); double expectedMode = boost::math::mode(scaledGamma); - LOG_DEBUG(<< "marginalLikelihoodMode = " - << filter.marginalLikelihoodMode(weightStyle, weight) + LOG_DEBUG(<< "marginalLikelihoodMode = " << filter.marginalLikelihoodMode(weight) << ", expectedMode = " << expectedMode); - CPPUNIT_ASSERT_DOUBLES_EQUAL( - expectedMode, filter.marginalLikelihoodMode(weightStyle, weight), - 0.28 * expectedMode + 0.3); - double error = std::fabs( - filter.marginalLikelihoodMode(weightStyle, weight) - expectedMode); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMode, + filter.marginalLikelihoodMode(weight), + 0.28 * expectedMode + 0.3); + double error = std::fabs(filter.marginalLikelihoodMode(weight) - expectedMode); relativeError.add(error == 0.0 ? 0.0 : error / expectedMode); } LOG_DEBUG(<< "relativeError = " << maths::CBasicStatistics::mean(relativeError)); @@ -594,10 +558,6 @@ void CGammaRateConjugateTest::testMarginalLikelihoodMode() { } void CGammaRateConjugateTest::testMarginalLikelihoodVariance() { - LOG_DEBUG(<< "+-----------------------------------------------------------+"); - LOG_DEBUG(<< "| CGammaRateConjugateTest::testMarginalLikelihoodVariance |"); - LOG_DEBUG(<< "+-----------------------------------------------------------+"); - // Test that the expectation of the residual from the mean for // the marginal likelihood matches the expected variance of the // marginal likelihood. @@ -652,10 +612,6 @@ void CGammaRateConjugateTest::testMarginalLikelihoodVariance() { } void CGammaRateConjugateTest::testSampleMarginalLikelihood() { - LOG_DEBUG(<< "+---------------------------------------------------------+"); - LOG_DEBUG(<< "| CGammaRateConjugateTest::testSampleMarginalLikelihood |"); - LOG_DEBUG(<< "+---------------------------------------------------------+"); - // We're going to test two properties of the sampling: // 1) That the sample mean is equal to the marginal // likelihood mean. @@ -746,10 +702,6 @@ void CGammaRateConjugateTest::testSampleMarginalLikelihood() { } void CGammaRateConjugateTest::testCdf() { - LOG_DEBUG(<< "+------------------------------------+"); - LOG_DEBUG(<< "| CGammaRateConjugateTest::testCdf |"); - LOG_DEBUG(<< "+------------------------------------+"); - // Test error cases. // // Test some invariants: @@ -769,7 +721,6 @@ void CGammaRateConjugateTest::testCdf() { filter.addSamples(samples); - maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountWeight); double lowerBound; double upperBound; CPPUNIT_ASSERT(!filter.minusLogJointCdf(TDouble1Vec(), lowerBound, upperBound)); @@ -800,10 +751,6 @@ void CGammaRateConjugateTest::testCdf() { } void CGammaRateConjugateTest::testProbabilityOfLessLikelySamples() { - LOG_DEBUG(<< "+---------------------------------------------------------------+"); - LOG_DEBUG(<< "| CGammaRateConjugateTest::testProbabilityOfLessLikelySamples |"); - LOG_DEBUG(<< "+---------------------------------------------------------------+"); - // We test that the probability of less likely samples calculation // agrees with the chance of seeing a sample with lower marginal // likelihood, up to the sampling error. @@ -867,11 +814,9 @@ void CGammaRateConjugateTest::testProbabilityOfLessLikelySamples() { meanError.add(std::fabs(px - (lb + ub) / 2.0)); } - maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); - for (std::size_t k = 0u; k < boost::size(vs); ++k) { - double mode = filter.marginalLikelihoodMode(weightStyle, - TDouble1Vec(1, vs[k])); + double mode = filter.marginalLikelihoodMode( + maths_t::countVarianceScaleWeight(vs[k])); double ss[] = {0.9 * mode, 1.1 * mode}; LOG_DEBUG(<< "vs = " << vs[k] << ", mode = " << mode); @@ -881,42 +826,52 @@ void CGammaRateConjugateTest::testProbabilityOfLessLikelySamples() { { filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(1, ss[0]), - TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, {ss[0]}, + {maths_t::countVarianceScaleWeight(vs[k])}, lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); if (mode > 0.0) { filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_OneSidedBelow, weightStyle, - TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_OneSidedBelow, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_OneSidedAbove, weightStyle, - TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_OneSidedAbove, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } } if (mode > 0.0) { filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(1, ss[1]), - TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, {ss[1]}, + {maths_t::countVarianceScaleWeight(vs[k])}, lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_OneSidedBelow, weightStyle, TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_OneSidedBelow, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_OneSidedAbove, weightStyle, TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_OneSidedAbove, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } } @@ -928,10 +883,6 @@ void CGammaRateConjugateTest::testProbabilityOfLessLikelySamples() { } void CGammaRateConjugateTest::testAnomalyScore() { - LOG_DEBUG(<< "+---------------------------------------------+"); - LOG_DEBUG(<< "| CGammaRateConjugateTest::testAnomalyScore |"); - LOG_DEBUG(<< "+---------------------------------------------+"); - // This test pushes 500 samples through the filter and adds in // anomalous signals in the bins at 30, 120, 300 and 420 with // magnitude 4, 5, 10 and 15 standard deviations, respectively, @@ -1055,10 +1006,6 @@ void CGammaRateConjugateTest::testAnomalyScore() { } void CGammaRateConjugateTest::testOffset() { - LOG_DEBUG(<< "+---------------------------------------+"); - LOG_DEBUG(<< "| CGammaRateConjugateTest::testOffset |"); - LOG_DEBUG(<< "+---------------------------------------+"); - // The idea of this test is to check that the offset correctly cancels // out a translation applied to a gamma distributed data set. @@ -1123,10 +1070,6 @@ void CGammaRateConjugateTest::testOffset() { } void CGammaRateConjugateTest::testIntegerData() { - LOG_DEBUG(<< "+--------------------------------------------+"); - LOG_DEBUG(<< "| CGammaRateConjugateTest::testIntegerData |"); - LOG_DEBUG(<< "+--------------------------------------------+"); - // If the data are discrete then we approximate the discrete distribution // by saying it is uniform on the intervals [n,n+1] for each integral n. // The idea of this test is to check that the inferred model agrees in the @@ -1224,10 +1167,6 @@ void CGammaRateConjugateTest::testIntegerData() { } void CGammaRateConjugateTest::testLowVariationData() { - LOG_DEBUG(<< "+-------------------------------------------------+"); - LOG_DEBUG(<< "| CGammaRateConjugateTest::testLowVariationData |"); - LOG_DEBUG(<< "+-------------------------------------------------+"); - { CGammaRateConjugate filter(makePrior(maths_t::E_IntegerData)); for (std::size_t i = 0u; i < 100; ++i) { @@ -1256,10 +1195,6 @@ void CGammaRateConjugateTest::testLowVariationData() { } void CGammaRateConjugateTest::testPersist() { - LOG_DEBUG(<< "+----------------------------------------+"); - LOG_DEBUG(<< "| CGammaRateConjugateTest::testPersist |"); - LOG_DEBUG(<< "+----------------------------------------+"); - test::CRandomNumbers rng; TDoubleVec samples; @@ -1267,9 +1202,7 @@ void CGammaRateConjugateTest::testPersist() { maths::CGammaRateConjugate origFilter(makePrior(maths_t::E_ContinuousData, 0.1)); for (std::size_t i = 0u; i < samples.size(); ++i) { - origFilter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, samples[i]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0))); + origFilter.addSamples({samples[i]}, maths_t::CUnitWeights::SINGLE_UNIT); } double decayRate = origFilter.decayRate(); @@ -1308,10 +1241,6 @@ void CGammaRateConjugateTest::testPersist() { } void CGammaRateConjugateTest::testVarianceScale() { - LOG_DEBUG(<< "+----------------------------------------------+"); - LOG_DEBUG(<< "| CGammaRateConjugateTest::testVarianceScale |"); - LOG_DEBUG(<< "+----------------------------------------------+"); - // The strategy for this test is to check we correctly account // for variance scaling by scaling the variance of a collection // of samples and then checking that the percentiles for those @@ -1330,10 +1259,11 @@ void CGammaRateConjugateTest::testVarianceScale() { // Finally, we test update with scaled samples produces the // correct posterior. - maths_t::ESampleWeightStyle scales[] = {maths_t::E_SampleSeasonalVarianceScaleWeight, - maths_t::E_SampleCountVarianceScaleWeight}; + TWeightFunc weightsFuncs[]{ + static_cast(maths_t::seasonalVarianceScaleWeight), + static_cast(maths_t::countVarianceScaleWeight)}; - for (std::size_t s = 0u; s < boost::size(scales); ++s) { + for (std::size_t s = 0u; s < boost::size(weightsFuncs); ++s) { const double shape = 3.0; const double scale = 3.0; @@ -1407,10 +1337,8 @@ void CGammaRateConjugateTest::testVarianceScale() { double lowerBound, upperBound; maths_t::ETail tail; CPPUNIT_ASSERT(filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, maths_t::TWeightStyleVec(1, scales[s]), - TDouble1Vec(1, scaledSamples[j]), - TDouble4Vec1Vec(1, TDouble4Vec(1, varianceScales[i])), - lowerBound, upperBound, tail)); + maths_t::E_TwoSided, {scaledSamples[j]}, + {weightsFuncs[s](varianceScales[i])}, lowerBound, upperBound, tail)); CPPUNIT_ASSERT_EQUAL(lowerBound, upperBound); double probability = (lowerBound + upperBound) / 2.0; probabilities.push_back(probability); @@ -1469,12 +1397,10 @@ void CGammaRateConjugateTest::testVarianceScale() { for (std::size_t j = 0u; j < scaledSamples.size(); ++j) { double logLikelihood = 0.0; - CPPUNIT_ASSERT_EQUAL( - maths_t::E_FpNoErrors, - filter.jointLogMarginalLikelihood( - maths_t::TWeightStyleVec(1, scales[s]), - TDouble1Vec(1, scaledSamples[j]), - TDouble4Vec1Vec(1, TDouble4Vec(1, varianceScales[i])), logLikelihood)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, + filter.jointLogMarginalLikelihood( + {scaledSamples[j]}, + {weightsFuncs[s](varianceScales[i])}, logLikelihood)); differentialEntropy -= logLikelihood; } @@ -1495,7 +1421,7 @@ void CGammaRateConjugateTest::testVarianceScale() { const double maximumMeanMeanError[] = {0.01, 0.01}; const double maximumMeanVarianceError[] = {0.08, 0.05}; - for (std::size_t s = 0u; s < boost::size(scales); ++s) { + for (std::size_t s = 0u; s < boost::size(weightsFuncs); ++s) { for (std::size_t t = 0u; t < boost::size(dataTypes); ++t) { const double shapes[] = {1.0, 10.0, 100.0, 1000.0, 100000.0, 1000000.0}; @@ -1503,9 +1429,8 @@ void CGammaRateConjugateTest::testVarianceScale() { 1000.0, 100000.0, 1000000.0}; const double varianceScales[] = {0.1, 0.5, 1.0, 2.0, 10.0, 100.0}; - maths_t::TWeightStyleVec weightStyle(1, scales[s]); TDoubleVec samples; - TDouble4Vec1Vec weights; + maths_t::TDoubleWeightsAry1Vec weights; test::CRandomNumbers rng; @@ -1544,13 +1469,13 @@ void CGammaRateConjugateTest::testVarianceScale() { rng.generateGammaSamples(shape, 1.0 / rate, 200, samples); weights.clear(); - weights.resize(samples.size(), TDouble4Vec(1, 1.0)); - filter.addSamples(weightStyle, samples, weights); + weights.resize(samples.size(), maths_t::CUnitWeights::UNIT); + filter.addSamples(samples, weights); rng.generateGammaSamples(scaledShape, 1.0 / scaledRate, 200, samples); weights.clear(); - weights.resize(samples.size(), TDouble4Vec(1, scale)); - filter.addSamples(weightStyle, samples, weights); + weights.resize(samples.size(), weightsFuncs[s](scale)); + filter.addSamples(samples, weights); double estimatedMean = filter.likelihoodShape() / filter.likelihoodRate(); @@ -1600,10 +1525,6 @@ void CGammaRateConjugateTest::testVarianceScale() { } void CGammaRateConjugateTest::testNegativeSample() { - LOG_DEBUG(<< "+-----------------------------------------------+"); - LOG_DEBUG(<< "| CGammaRateConjugateTest::testNegativeSample |"); - LOG_DEBUG(<< "+-----------------------------------------------+"); - // Test that we recover roughly the same distribution after adjusting // the offset. The idea of this test is to run two priors side by side, // one with a large enough offset that it never needs to adjust the diff --git a/lib/maths/unittest/CGramSchmidtTest.cc b/lib/maths/unittest/CGramSchmidtTest.cc index 81bcbf711d..6ed272f3a2 100644 --- a/lib/maths/unittest/CGramSchmidtTest.cc +++ b/lib/maths/unittest/CGramSchmidtTest.cc @@ -90,10 +90,6 @@ const TDoubleVec& subtract(TDoubleVec& x, const TDoubleVec& y) { } void CGramSchmidtTest::testOrthogonality() { - LOG_DEBUG(<< "+---------------------------------------+"); - LOG_DEBUG(<< "| CGramSchmidtTest::testOrthogonality |"); - LOG_DEBUG(<< "+---------------------------------------+"); - test::CRandomNumbers rng; { @@ -147,10 +143,6 @@ void CGramSchmidtTest::testOrthogonality() { } void CGramSchmidtTest::testNormalisation() { - LOG_DEBUG(<< "+---------------------------------------+"); - LOG_DEBUG(<< "| CGramSchmidtTest::testNormalisation |"); - LOG_DEBUG(<< "+---------------------------------------+"); - test::CRandomNumbers rng; { @@ -202,10 +194,6 @@ void CGramSchmidtTest::testNormalisation() { } void CGramSchmidtTest::testSpan() { - LOG_DEBUG(<< "+------------------------------+"); - LOG_DEBUG(<< "| CGramSchmidtTest::testSpan |"); - LOG_DEBUG(<< "+------------------------------+"); - test::CRandomNumbers rng; { @@ -285,10 +273,6 @@ void CGramSchmidtTest::testSpan() { } void CGramSchmidtTest::testEdgeCases() { - LOG_DEBUG(<< "+-----------------------------------+"); - LOG_DEBUG(<< "| CGramSchmidtTest::testEdgeCases |"); - LOG_DEBUG(<< "+-----------------------------------+"); - { LOG_DEBUG(<< "*** Test zero vector ***"); diff --git a/lib/maths/unittest/CInformationCriteriaTest.cc b/lib/maths/unittest/CInformationCriteriaTest.cc index f1c9d8da00..2151a858e4 100644 --- a/lib/maths/unittest/CInformationCriteriaTest.cc +++ b/lib/maths/unittest/CInformationCriteriaTest.cc @@ -50,10 +50,6 @@ double logfGaussian(const POINT& mean, const MATRIX& covariance, const POINT& x) } void CInformationCriteriaTest::testSphericalGaussian() { - LOG_DEBUG(<< "+---------------------------------------------------+"); - LOG_DEBUG(<< "| CInformationCriteriaTest::testSphericalGaussian |"); - LOG_DEBUG(<< "+---------------------------------------------------+"); - // Check that the information criterion values are the expected // values for the generating distribution. @@ -206,10 +202,6 @@ void CInformationCriteriaTest::testSphericalGaussian() { } void CInformationCriteriaTest::testSphericalGaussianWithSphericalCluster() { - LOG_DEBUG(<< "+-----------------------------------------------------------------------+"); - LOG_DEBUG(<< "| CInformationCriteriaTest::testSphericalGaussianWithSphericalCluster |"); - LOG_DEBUG(<< "+-----------------------------------------------------------------------+"); - // The idea of this test is simply to check that we get the // same result working with clusters of points or their // spherical cluster representation. @@ -271,10 +263,6 @@ void CInformationCriteriaTest::testSphericalGaussianWithSphericalCluster() { } void CInformationCriteriaTest::testGaussian() { - LOG_DEBUG(<< "+------------------------------------------+"); - LOG_DEBUG(<< "| CInformationCriteriaTest::testGaussian |"); - LOG_DEBUG(<< "+------------------------------------------+"); - maths::CSampling::seed(); { @@ -421,10 +409,6 @@ void CInformationCriteriaTest::testGaussian() { } void CInformationCriteriaTest::testGaussianWithSphericalCluster() { - LOG_DEBUG(<< "+--------------------------------------------------------------+"); - LOG_DEBUG(<< "| CInformationCriteriaTest::testGaussianWithSphericalCluster |"); - LOG_DEBUG(<< "+--------------------------------------------------------------+"); - using TSphericalCluster2 = maths::CSphericalCluster::Type; using TSphericalCluster2Vec = std::vector; using TSphericalCluster2VecVec = std::vector; diff --git a/lib/maths/unittest/CIntegerToolsTest.cc b/lib/maths/unittest/CIntegerToolsTest.cc index 4e304df9f8..dc814f92a8 100644 --- a/lib/maths/unittest/CIntegerToolsTest.cc +++ b/lib/maths/unittest/CIntegerToolsTest.cc @@ -37,10 +37,6 @@ std::string printBits(uint64_t x) { } void CIntegerToolsTest::testNextPow2() { - LOG_DEBUG(<< "+-----------------------------------+"); - LOG_DEBUG(<< "| CIntegerToolsTest::testNextPow2 |"); - LOG_DEBUG(<< "+-----------------------------------+"); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), maths::CIntegerTools::nextPow2(0)); test::CRandomNumbers rng; @@ -62,10 +58,6 @@ void CIntegerToolsTest::testNextPow2() { } void CIntegerToolsTest::testReverseBits() { - LOG_DEBUG(<< "+--------------------------------------+"); - LOG_DEBUG(<< "| CIntegerToolsTest::testReverseBits |"); - LOG_DEBUG(<< "+--------------------------------------+"); - test::CRandomNumbers rng; TSizeVec values; @@ -88,10 +80,6 @@ void CIntegerToolsTest::testReverseBits() { } void CIntegerToolsTest::testGcd() { - LOG_DEBUG(<< "+------------------------------+"); - LOG_DEBUG(<< "| CIntegerToolsTest::testGcd |"); - LOG_DEBUG(<< "+------------------------------+"); - // Construct a set of integers out of prime factors so we know // what the g.c.d. should be. @@ -188,10 +176,6 @@ void CIntegerToolsTest::testGcd() { } void CIntegerToolsTest::testBinomial() { - LOG_DEBUG(<< "+-----------------------------------+"); - LOG_DEBUG(<< "| CIntegerToolsTest::testBinomial |"); - LOG_DEBUG(<< "+-----------------------------------+"); - unsigned int n[] = {1u, 2u, 5u, 7u, 10u}; for (std::size_t i = 0u; i < boost::size(n); ++i) { diff --git a/lib/maths/unittest/CIntegrationTest.cc b/lib/maths/unittest/CIntegrationTest.cc index 4981699008..ffa40b88aa 100644 --- a/lib/maths/unittest/CIntegrationTest.cc +++ b/lib/maths/unittest/CIntegrationTest.cc @@ -245,10 +245,6 @@ class CNormal { } void CIntegrationTest::testAllSingleVariate() { - LOG_DEBUG(<< "+-------------------------------------------+"); - LOG_DEBUG(<< "| CIntegerToolsTest::testAllSingleVariate |"); - LOG_DEBUG(<< "+-------------------------------------------+"); - // Test that "low" order polynomials are integrated exactly // (as they should be for a higher order quadrature). @@ -745,10 +741,6 @@ void CIntegrationTest::testAllSingleVariate() { } void CIntegrationTest::testAdaptive() { - LOG_DEBUG(<< "+-----------------------------------+"); - LOG_DEBUG(<< "| CIntegerToolsTest::testAdaptive |"); - LOG_DEBUG(<< "+-----------------------------------+"); - using TDoubleDoublePr = std::pair; using TDoubleDoublePrVec = std::vector; @@ -839,10 +831,6 @@ void CIntegrationTest::testAdaptive() { } void CIntegrationTest::testSparseGrid() { - LOG_DEBUG(<< "+-------------------------------------+"); - LOG_DEBUG(<< "| CIntegerToolsTest::testSparseGrid |"); - LOG_DEBUG(<< "+-------------------------------------+"); - // Compare against known grid characteristics. These are available // at http://www.sparse-grids.de/#Nodes. @@ -1093,10 +1081,6 @@ void CIntegrationTest::testSparseGrid() { } void CIntegrationTest::testMultivariateSmooth() { - LOG_DEBUG(<< "+---------------------------------------------+"); - LOG_DEBUG(<< "| CIntegerToolsTest::testMultivariateSmooth |"); - LOG_DEBUG(<< "+---------------------------------------------+"); - // Test that "low" order polynomials are integrated exactly. // A sparse grid of order l will integrate a polynomial exactly // if its order is no more than 2l - 1. A polynomial order is diff --git a/lib/maths/unittest/CKMeansFastTest.cc b/lib/maths/unittest/CKMeansFastTest.cc index 64337321f8..56419303b7 100644 --- a/lib/maths/unittest/CKMeansFastTest.cc +++ b/lib/maths/unittest/CKMeansFastTest.cc @@ -180,10 +180,6 @@ double sumSquareResiduals(const TVector2VecVec& points) { } void CKMeansFastTest::testDataPropagation() { - LOG_DEBUG(<< "+----------------------------------------+"); - LOG_DEBUG(<< "| CKMeansFastTest::testDataPropagation |"); - LOG_DEBUG(<< "+----------------------------------------+"); - test::CRandomNumbers rng; for (std::size_t i = 1u; i <= 100; ++i) { @@ -214,10 +210,6 @@ void CKMeansFastTest::testDataPropagation() { } void CKMeansFastTest::testFilter() { - LOG_DEBUG(<< "+-------------------------------+"); - LOG_DEBUG(<< "| CKMeansFastTest::testFilter |"); - LOG_DEBUG(<< "+-------------------------------+"); - // Test that the closest centre to each point is never removed // by the centre filter and that we get good speed up in terms // of the number of centre point comparisons avoided. @@ -286,10 +278,6 @@ void CKMeansFastTest::testFilter() { } void CKMeansFastTest::testCentroids() { - LOG_DEBUG(<< "+----------------------------------+"); - LOG_DEBUG(<< "| CKMeansFastTest::testCentroids |"); - LOG_DEBUG(<< "+----------------------------------+"); - // Check that the centroids computed are the centroids for // each cluster, i.e. the centroid of the points closest to // each cluster centre. @@ -367,10 +355,6 @@ void CKMeansFastTest::testCentroids() { } void CKMeansFastTest::testClosestPoints() { - LOG_DEBUG(<< "+--------------------------------------+"); - LOG_DEBUG(<< "| CKMeansFastTest::testClosestPoints |"); - LOG_DEBUG(<< "+--------------------------------------+"); - // Check the obvious invariant that the closest point to each // centre is closer to that centre than any other. @@ -439,10 +423,6 @@ void CKMeansFastTest::testClosestPoints() { } void CKMeansFastTest::testRun() { - LOG_DEBUG(<< "+----------------------------+"); - LOG_DEBUG(<< "| CKMeansFastTest::testRun |"); - LOG_DEBUG(<< "+----------------------------+"); - // Test k-means correctly identifies two separated uniform // random clusters in the data. @@ -488,10 +468,6 @@ void CKMeansFastTest::testRun() { } void CKMeansFastTest::testRunWithSphericalClusters() { - LOG_DEBUG(<< "+-------------------------------------------------+"); - LOG_DEBUG(<< "| CKMeansFastTest::testRunWithSphericalClusters |"); - LOG_DEBUG(<< "+-------------------------------------------------+"); - // The idea of this test is simply to check that we get the // same result working with clusters of points or their // spherical cluster representation. @@ -567,10 +543,6 @@ void CKMeansFastTest::testRunWithSphericalClusters() { } void CKMeansFastTest::testPlusPlus() { - LOG_DEBUG(<< "+---------------------------------+"); - LOG_DEBUG(<< "| CKMeansFastTest::testPlusPlus |"); - LOG_DEBUG(<< "+---------------------------------+"); - // Test the k-means++ sampling scheme always samples all the // clusters present in the data and generally results in lower // square residuals of the points from the cluster centres. diff --git a/lib/maths/unittest/CKMeansOnlineTest.cc b/lib/maths/unittest/CKMeansOnlineTest.cc index 6b72831127..6f1d19a431 100644 --- a/lib/maths/unittest/CKMeansOnlineTest.cc +++ b/lib/maths/unittest/CKMeansOnlineTest.cc @@ -65,10 +65,6 @@ std::string print(const POINT& point) { } void CKMeansOnlineTest::testVariance() { - LOG_DEBUG(<< "+-----------------------------------+"); - LOG_DEBUG(<< "| CKMeansOnlineTest::testVariance |"); - LOG_DEBUG(<< "+-----------------------------------+"); - // Check that the variance calculation gives the correct // spherical variance. @@ -106,10 +102,6 @@ void CKMeansOnlineTest::testVariance() { } void CKMeansOnlineTest::testAdd() { - LOG_DEBUG(<< "+------------------------------+"); - LOG_DEBUG(<< "| CKMeansOnlineTest::testAdd |"); - LOG_DEBUG(<< "+------------------------------+"); - // Test that we correctly compute the mean and spherical // variance. @@ -158,10 +150,6 @@ void CKMeansOnlineTest::testAdd() { } void CKMeansOnlineTest::testReduce() { - LOG_DEBUG(<< "+---------------------------------+"); - LOG_DEBUG(<< "| CKMeansOnlineTest::testReduce |"); - LOG_DEBUG(<< "+---------------------------------+"); - // Test some invariants: // - Number of clusters should be no more than k after // reduce. @@ -222,10 +210,6 @@ void CKMeansOnlineTest::testReduce() { } void CKMeansOnlineTest::testClustering() { - LOG_DEBUG(<< "+-------------------------------------+"); - LOG_DEBUG(<< "| CKMeansOnlineTest::testClustering |"); - LOG_DEBUG(<< "+-------------------------------------+"); - // Test we are reliably able to find as approximately as good // clusterings as k-means working on the full data set. @@ -363,10 +347,6 @@ void CKMeansOnlineTest::testClustering() { } void CKMeansOnlineTest::testSplit() { - LOG_DEBUG(<< "+--------------------------------+"); - LOG_DEBUG(<< "| CKMeansOnlineTest::testSplit |"); - LOG_DEBUG(<< "+--------------------------------+"); - // Test that the clusters are divided amoung the clusterers // in the split as expected. @@ -429,10 +409,6 @@ void CKMeansOnlineTest::testSplit() { } void CKMeansOnlineTest::testMerge() { - LOG_DEBUG(<< "+--------------------------------+"); - LOG_DEBUG(<< "| CKMeansOnlineTest::testMerge |"); - LOG_DEBUG(<< "+--------------------------------+"); - // Test some invariants: // - Number of clusters should be no more than k after merge. // - The count of the points should be unchanged. @@ -495,10 +471,6 @@ void CKMeansOnlineTest::testMerge() { } void CKMeansOnlineTest::testPropagateForwardsByTime() { - LOG_DEBUG(<< "+--------------------------------------------------+"); - LOG_DEBUG(<< "| CKMeansOnlineTest::testPropagateForwardsByTime |"); - LOG_DEBUG(<< "+--------------------------------------------------+"); - // Test pruning of dead clusters. test::CRandomNumbers rng; @@ -539,10 +511,6 @@ void CKMeansOnlineTest::testPropagateForwardsByTime() { } void CKMeansOnlineTest::testSample() { - LOG_DEBUG(<< "+---------------------------------+"); - LOG_DEBUG(<< "| CKMeansOnlineTest::testSample |"); - LOG_DEBUG(<< "+---------------------------------+"); - // We test that for a small number of samples we get back // exactly the points we have added and for a large number // of samples we sample the modes of the mixture correctly. @@ -638,10 +606,6 @@ void CKMeansOnlineTest::testSample() { } void CKMeansOnlineTest::testPersist() { - LOG_DEBUG(<< "+----------------------------------+"); - LOG_DEBUG(<< "| CKMeansOnlineTest::testPersist |"); - LOG_DEBUG(<< "+----------------------------------+"); - test::CRandomNumbers rng; TDoubleVec coordinates; diff --git a/lib/maths/unittest/CKMostCorrelatedTest.cc b/lib/maths/unittest/CKMostCorrelatedTest.cc index 3f4800d49a..2e9cc02c9c 100644 --- a/lib/maths/unittest/CKMostCorrelatedTest.cc +++ b/lib/maths/unittest/CKMostCorrelatedTest.cc @@ -168,10 +168,6 @@ void estimateCorrelation(const std::size_t trials, } void CKMostCorrelatedTest::testCorrelation() { - LOG_DEBUG(<< "+-----------------------------------------+") - LOG_DEBUG(<< "| CKMostCorrelatedTest::testCorrelation |") - LOG_DEBUG(<< "+-----------------------------------------+") - // Check that the proposed estimator is unbiased. maths::CSampling::seed(); @@ -230,10 +226,6 @@ void CKMostCorrelatedTest::testCorrelation() { } void CKMostCorrelatedTest::testNextProjection() { - LOG_DEBUG(<< "+--------------------------------------------+") - LOG_DEBUG(<< "| CKMostCorrelatedTest::testNextProjection |") - LOG_DEBUG(<< "+--------------------------------------------+") - // Test that aging happens correctly and that the projections // are have low mutual information. @@ -346,10 +338,6 @@ void CKMostCorrelatedTest::testNextProjection() { } void CKMostCorrelatedTest::testMostCorrelated() { - LOG_DEBUG(<< "+--------------------------------------------+") - LOG_DEBUG(<< "| CKMostCorrelatedTest::testMostCorrelated |") - LOG_DEBUG(<< "+--------------------------------------------+") - // Check the variables with the highest estimated correlation emerge. using TMaxCorrelationAccumulator = @@ -405,10 +393,6 @@ void CKMostCorrelatedTest::testMostCorrelated() { } void CKMostCorrelatedTest::testRemoveVariables() { - LOG_DEBUG(<< "+---------------------------------------------+") - LOG_DEBUG(<< "| CKMostCorrelatedTest::testRemoveVariables |") - LOG_DEBUG(<< "+---------------------------------------------+") - // Test we correctly remove correlated pairs which include a variable // to prune. // @@ -460,10 +444,6 @@ void CKMostCorrelatedTest::testRemoveVariables() { } void CKMostCorrelatedTest::testAccuracy() { - LOG_DEBUG(<< "+--------------------------------------+") - LOG_DEBUG(<< "| CKMostCorrelatedTest::testAccuracy |") - LOG_DEBUG(<< "+--------------------------------------+") - // Check that we consistently find the most correlated pairs of variables. // // For ten variables [0, ..., 9] create correlated pairs { (0, 1), (2, 3), @@ -519,10 +499,6 @@ void CKMostCorrelatedTest::testAccuracy() { } void CKMostCorrelatedTest::testStability() { - LOG_DEBUG(<< "+---------------------------------------+") - LOG_DEBUG(<< "| CKMostCorrelatedTest::testStability |") - LOG_DEBUG(<< "+---------------------------------------+") - // For twenty variables [0, ..., 19] create correlated pairs { (0, 1), // (2, 3), (4, 5), (6, 7), (8, 9), (10, 11), (12, 13), (14, 15), (16, 17), // (18, 19) } with correlations of { (0, 1), (2, 3), (4, 5), (6, 7), @@ -577,10 +553,6 @@ void CKMostCorrelatedTest::testStability() { } void CKMostCorrelatedTest::testChangingCorrelation() { - LOG_DEBUG(<< "+-------------------------------------------------+") - LOG_DEBUG(<< "| CKMostCorrelatedTest::testChangingCorrelation |") - LOG_DEBUG(<< "+-------------------------------------------------+") - // Test that we correctly identify a newly emerging correlation. // // For ten variables [0, ..., 9] create correlated pairs { (0, 1), (2, 3), @@ -632,10 +604,6 @@ void CKMostCorrelatedTest::testChangingCorrelation() { } void CKMostCorrelatedTest::testMissingData() { - LOG_DEBUG(<< "+-----------------------------------------+") - LOG_DEBUG(<< "| CKMostCorrelatedTest::testMissingData |") - LOG_DEBUG(<< "+-----------------------------------------+") - // Test the case that some of the metric values are missing. // // For ten variables [0, ..., 9] create correlated pairs { (0, 1), (2, 3), @@ -697,10 +665,6 @@ void CKMostCorrelatedTest::testMissingData() { } void CKMostCorrelatedTest::testScale() { - LOG_DEBUG(<< "+-----------------------------------+") - LOG_DEBUG(<< "| CKMostCorrelatedTest::testScale |") - LOG_DEBUG(<< "+-----------------------------------+") - // Test runtime is approximately linear in the number of variables // if we look for O(number of variables) correlations. @@ -806,10 +770,6 @@ void CKMostCorrelatedTest::testScale() { } void CKMostCorrelatedTest::testPersistence() { - LOG_DEBUG(<< "+-----------------------------------------+") - LOG_DEBUG(<< "| CKMostCorrelatedTest::testPersistence |") - LOG_DEBUG(<< "+-----------------------------------------+") - // Check that persistence is idempotent. maths::CSampling::seed(); diff --git a/lib/maths/unittest/CKdTreeTest.cc b/lib/maths/unittest/CKdTreeTest.cc index 33e3a6a4d0..68315d5511 100644 --- a/lib/maths/unittest/CKdTreeTest.cc +++ b/lib/maths/unittest/CKdTreeTest.cc @@ -36,10 +36,6 @@ std::string print(const T& t) { } void CKdTreeTest::testBuild() { - LOG_DEBUG(<< "+--------------------------+"); - LOG_DEBUG(<< "| CKdTreeTest::testBuild |"); - LOG_DEBUG(<< "+--------------------------+"); - const std::size_t numberTests = 200; test::CRandomNumbers rng; @@ -74,10 +70,6 @@ void CKdTreeTest::testBuild() { } void CKdTreeTest::testNearestNeighbour() { - LOG_DEBUG(<< "+-------------------------------------+"); - LOG_DEBUG(<< "| CKdTreeTest::testNearestNeighbour |"); - LOG_DEBUG(<< "+-------------------------------------+"); - const std::size_t numberTests = 200; test::CRandomNumbers rng; diff --git a/lib/maths/unittest/CLassoLogisticRegressionTest.cc b/lib/maths/unittest/CLassoLogisticRegressionTest.cc index 10f8df0874..b6f950ad18 100644 --- a/lib/maths/unittest/CLassoLogisticRegressionTest.cc +++ b/lib/maths/unittest/CLassoLogisticRegressionTest.cc @@ -78,10 +78,6 @@ double logLikelihood(const TDoubleVecVec& x, } void CLassoLogisticRegressionTest::testCyclicCoordinateDescent() { - LOG_DEBUG(<< "+-------------------------------------------------------------+"); - LOG_DEBUG(<< "| CLassoLogisticRegressionTest::testCyclicCoordinateDescent |"); - LOG_DEBUG(<< "+-------------------------------------------------------------+"); - static const double EPS = 5e-3; test::CRandomNumbers rng; @@ -218,9 +214,6 @@ void CLassoLogisticRegressionTest::testCyclicCoordinateDescent() { } void CLassoLogisticRegressionTest::testCyclicCoordinateDescentLargeSparse() { - LOG_DEBUG(<< "+------------------------------------------------------------------------+"); - LOG_DEBUG(<< "| CLassoLogisticRegressionTest::testCyclicCoordinateDescentLargeSparse |"); - LOG_DEBUG(<< "+------------------------------------------------------------------------+"); // TODO } diff --git a/lib/maths/unittest/CLinearAlgebraTest.cc b/lib/maths/unittest/CLinearAlgebraTest.cc index 769e7141ad..e184fd2139 100644 --- a/lib/maths/unittest/CLinearAlgebraTest.cc +++ b/lib/maths/unittest/CLinearAlgebraTest.cc @@ -23,10 +23,6 @@ using TDoubleVec = std::vector; using TDoubleVecVec = std::vector; void CLinearAlgebraTest::testSymmetricMatrixNxN() { - LOG_DEBUG(<< "+----------------------------------------------+"); - LOG_DEBUG(<< "| CLinearAlgebraTest::testSymmetricMatrixNxN |"); - LOG_DEBUG(<< "+----------------------------------------------+"); - // Construction. { maths::CSymmetricMatrixNxN matrix; @@ -153,10 +149,6 @@ void CLinearAlgebraTest::testSymmetricMatrixNxN() { } void CLinearAlgebraTest::testVectorNx1() { - LOG_DEBUG(<< "+-------------------------------------+"); - LOG_DEBUG(<< "| CLinearAlgebraTest::testVectorNx1 |"); - LOG_DEBUG(<< "+-------------------------------------+"); - // Construction. { maths::CVectorNx1 vector; @@ -256,10 +248,6 @@ void CLinearAlgebraTest::testVectorNx1() { } void CLinearAlgebraTest::testSymmetricMatrix() { - LOG_DEBUG(<< "+-------------------------------------------+"); - LOG_DEBUG(<< "| CLinearAlgebraTest::testSymmetricMatrix |"); - LOG_DEBUG(<< "+-------------------------------------------+"); - // Construction. { maths::CSymmetricMatrix matrix(3); @@ -414,10 +402,6 @@ void CLinearAlgebraTest::testSymmetricMatrix() { } void CLinearAlgebraTest::testVector() { - LOG_DEBUG(<< "+----------------------------------+"); - LOG_DEBUG(<< "| CLinearAlgebraTest::testVector |"); - LOG_DEBUG(<< "+----------------------------------+"); - // Construction. { maths::CVector vector(3); @@ -530,10 +514,6 @@ void CLinearAlgebraTest::testVector() { } void CLinearAlgebraTest::testNorms() { - LOG_DEBUG(<< "+---------------------------------+"); - LOG_DEBUG(<< "| CLinearAlgebraTest::testNorms |"); - LOG_DEBUG(<< "+---------------------------------+"); - double v[][5] = {{1.0, 2.1, 3.2, 1.7, 0.1}, {0.0, -2.1, 1.2, 1.9, 4.1}, {-1.0, 7.1, 5.2, 1.7, -0.1}, @@ -559,10 +539,6 @@ void CLinearAlgebraTest::testNorms() { } void CLinearAlgebraTest::testUtils() { - LOG_DEBUG(<< "+---------------------------------+"); - LOG_DEBUG(<< "| CLinearAlgebraTest::testUtils |"); - LOG_DEBUG(<< "+---------------------------------+"); - // Test component min, max, sqrt and fabs. { LOG_DEBUG(<< "Vector min, max, fabs, sqrt"); @@ -712,10 +688,6 @@ void CLinearAlgebraTest::testUtils() { } void CLinearAlgebraTest::testGaussianLogLikelihood() { - LOG_DEBUG(<< "+-------------------------------------------------+"); - LOG_DEBUG(<< "| CLinearAlgebraTest::testGaussianLogLikelihood |"); - LOG_DEBUG(<< "+-------------------------------------------------+"); - // Test the log likelihood (expected from octave). { const double covariance_[][4] = {{10.70779, 0.14869, 1.44263, 2.26889}, @@ -847,10 +819,6 @@ void CLinearAlgebraTest::testGaussianLogLikelihood() { } void CLinearAlgebraTest::testSampleGaussian() { - LOG_DEBUG(<< "+------------------------------------------+"); - LOG_DEBUG(<< "| CLinearAlgebraTest::testSampleGaussian |"); - LOG_DEBUG(<< "+------------------------------------------+"); - // Test singular matrix. { double m[] = {1.0, 2.0, 3.0, 4.0}; @@ -959,10 +927,6 @@ void CLinearAlgebraTest::testSampleGaussian() { } void CLinearAlgebraTest::testLogDeterminant() { - LOG_DEBUG(<< "+------------------------------------------+"); - LOG_DEBUG(<< "| CLinearAlgebraTest::testLogDeterminant |"); - LOG_DEBUG(<< "+------------------------------------------+"); - // Test the determinant (expected from octave). { const double matrices[][3][3] = { @@ -1028,10 +992,6 @@ std::string print(const MATRIX& m) { } void CLinearAlgebraTest::testProjected() { - LOG_DEBUG(<< "+-------------------------------------+"); - LOG_DEBUG(<< "| CLinearAlgebraTest::testProjected |"); - LOG_DEBUG(<< "+-------------------------------------+"); - using TSizeVec = std::vector; const double m[][5] = {{1.2, 2.4, 1.9, 3.8, 8.3}, @@ -1081,10 +1041,6 @@ void CLinearAlgebraTest::testProjected() { } void CLinearAlgebraTest::testPersist() { - LOG_DEBUG(<< "+-----------------------------------+"); - LOG_DEBUG(<< "| CLinearAlgebraTest::testPersist |"); - LOG_DEBUG(<< "+-----------------------------------+"); - // Check conversion to and from delimited is idempotent and parsing // bad input produces an error. diff --git a/lib/maths/unittest/CLogNormalMeanPrecConjugateTest.cc b/lib/maths/unittest/CLogNormalMeanPrecConjugateTest.cc index 79f26ab4be..cab594862a 100644 --- a/lib/maths/unittest/CLogNormalMeanPrecConjugateTest.cc +++ b/lib/maths/unittest/CLogNormalMeanPrecConjugateTest.cc @@ -42,6 +42,7 @@ using TDoubleDoublePrVec = std::vector; using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; using TMeanVarAccumulator = maths::CBasicStatistics::SSampleMeanVar::TAccumulator; using CLogNormalMeanPrecConjugate = CPriorTestInterfaceMixin; +using TWeightFunc = maths_t::TDoubleWeightsAry (*)(double); CLogNormalMeanPrecConjugate makePrior(maths_t::EDataType dataType = maths_t::E_ContinuousData, const double& offset = 0.0, @@ -51,10 +52,6 @@ CLogNormalMeanPrecConjugate makePrior(maths_t::EDataType dataType = maths_t::E_C } void CLogNormalMeanPrecConjugateTest::testMultipleUpdate() { - LOG_DEBUG(<< "+-------------------------------------------------------+"); - LOG_DEBUG(<< "| CLogNormalMeanPrecConjugateTest::testMultipleUpdate |"); - LOG_DEBUG(<< "+-------------------------------------------------------+"); - // Test that we get the same result updating once with a vector of 100 // samples of an R.V. versus updating individually 100 times. @@ -75,7 +72,7 @@ void CLogNormalMeanPrecConjugateTest::testMultipleUpdate() { CLogNormalMeanPrecConjugate filter2(filter1); for (std::size_t j = 0u; j < samples.size(); ++j) { - filter1.addSamples(TDouble1Vec(1, samples[j])); + filter1.addSamples(TDouble1Vec{samples[j]}); } filter2.addSamples(samples); @@ -103,13 +100,12 @@ void CLogNormalMeanPrecConjugateTest::testMultipleUpdate() { filter1.addSamples(samples); CLogNormalMeanPrecConjugate filter2(filter1); - maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); + maths_t::TDoubleWeightsAry1Vec weights; + weights.resize(samples.size(), maths_t::countVarianceScaleWeight(2.0)); for (std::size_t j = 0u; j < scaledSamples.size(); ++j) { - filter1.addSamples(weightStyle, TDouble1Vec(1, scaledSamples[j]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 2.0))); + filter1.addSamples({scaledSamples[j]}, {weights[j]}); } - filter2.addSamples(weightStyle, scaledSamples, - TDouble4Vec1Vec(scaledSamples.size(), TDouble4Vec(1, 2.0))); + filter2.addSamples(scaledSamples, weights); LOG_DEBUG(<< filter1.print()); LOG_DEBUG(<< "vs"); @@ -126,13 +122,10 @@ void CLogNormalMeanPrecConjugateTest::testMultipleUpdate() { double x = 3.0; std::size_t count = 10; - for (std::size_t j = 0u; j < count; ++j) { - filter1.addSamples(TDouble1Vec(1, x)); + filter1.addSamples(TDouble1Vec{x}); } - filter2.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, x), - TDouble4Vec1Vec(1, TDouble4Vec(1, static_cast(count)))); + filter2.addSamples({x}, {maths_t::countWeight(static_cast(count))}); LOG_DEBUG(<< filter1.print()); LOG_DEBUG(<< "vs"); @@ -143,10 +136,6 @@ void CLogNormalMeanPrecConjugateTest::testMultipleUpdate() { } void CLogNormalMeanPrecConjugateTest::testPropagation() { - LOG_DEBUG(<< "+----------------------------------------------------+"); - LOG_DEBUG(<< "| CLogNormalMeanPrecConjugateTest::testPropagation |"); - LOG_DEBUG(<< "+----------------------------------------------------+"); - // Test that propagation doesn't affect the expected values // of likelihood mean and precision. @@ -180,10 +169,6 @@ void CLogNormalMeanPrecConjugateTest::testPropagation() { } void CLogNormalMeanPrecConjugateTest::testMeanEstimation() { - LOG_DEBUG(<< "+-------------------------------------------------------+"); - LOG_DEBUG(<< "| CLogNormalMeanPrecConjugateTest::testMeanEstimation |"); - LOG_DEBUG(<< "+-------------------------------------------------------+"); - // We are going to test that we correctly estimate the distribution // for the mean of the exponentiated Gaussian of a log-normal process // by checking that the true mean lies in various confidence intervals @@ -243,10 +228,6 @@ void CLogNormalMeanPrecConjugateTest::testMeanEstimation() { } void CLogNormalMeanPrecConjugateTest::testPrecisionEstimation() { - LOG_DEBUG(<< "+------------------------------------------------------------+"); - LOG_DEBUG(<< "| CLogNormalMeanPrecConjugateTest::testPrecisionEstimation |"); - LOG_DEBUG(<< "+------------------------------------------------------------+"); - // We are going to test that we correctly estimate a distribution for // the precision of the exponentiated Gaussian of a log-normal process by // checking that the true precision lies in various confidence intervals @@ -308,10 +289,6 @@ void CLogNormalMeanPrecConjugateTest::testPrecisionEstimation() { } void CLogNormalMeanPrecConjugateTest::testMarginalLikelihood() { - LOG_DEBUG(<< "+-----------------------------------------------------------+"); - LOG_DEBUG(<< "| CLogNormalMeanPrecConjugateTest::testMarginalLikelihood |"); - LOG_DEBUG(<< "+-----------------------------------------------------------+"); - // Check that the c.d.f. <= 1 at extreme. maths_t::EDataType dataTypes[] = {maths_t::E_ContinuousData, maths_t::E_IntegerData}; for (std::size_t t = 0u; t < boost::size(dataTypes); ++t) { @@ -326,17 +303,14 @@ void CLogNormalMeanPrecConjugateTest::testMarginalLikelihood() { rng.generateLogNormalSamples(location, squareScale, 200, samples); filter.addSamples(samples); - maths_t::ESampleWeightStyle weightStyles[] = { - maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight, - maths_t::E_SampleCountWeight}; - double weights[] = {0.1, 1.0, 10.0}; + TWeightFunc weightsFuncs[]{static_cast(maths_t::countWeight), + static_cast(maths_t::winsorisationWeight)}; + double weights[]{0.1, 1.0, 10.0}; - for (std::size_t i = 0u; i < boost::size(weightStyles); ++i) { + for (std::size_t i = 0u; i < boost::size(weightsFuncs); ++i) { for (std::size_t j = 0u; j < boost::size(weights); ++j) { double lb, ub; - filter.minusLogJointCdf( - maths_t::TWeightStyleVec(1, weightStyles[i]), TDouble1Vec(1, 10000.0), - TDouble4Vec1Vec(1, TDouble4Vec(1, weights[j])), lb, ub); + filter.minusLogJointCdf({10000.0}, {weightsFuncs[i](weights[j])}, lb, ub); LOG_DEBUG(<< "-log(c.d.f) = " << (lb + ub) / 2.0); CPPUNIT_ASSERT(lb >= 0.0); CPPUNIT_ASSERT(ub >= 0.0); @@ -506,9 +480,7 @@ void CLogNormalMeanPrecConjugateTest::testMarginalLikelihood() { double q2 = boost::math::quantile( scaledLogNormal, (50.0 + percentages[j] / 2.0) / 100.0); TDoubleDoublePr interval = filter.marginalLikelihoodConfidenceInterval( - percentages[j], - maths_t::TWeightStyleVec(1, maths_t::E_SampleCountVarianceScaleWeight), - TDouble4Vec(1, vs)); + percentages[j], maths_t::countVarianceScaleWeight(vs)); LOG_DEBUG(<< "[q1, q2] = [" << q1 << ", " << q2 << "]" << ", interval = " << core::CContainerPrinter::print(interval)); CPPUNIT_ASSERT_DOUBLES_EQUAL(q1, interval.first, @@ -528,10 +500,6 @@ void CLogNormalMeanPrecConjugateTest::testMarginalLikelihood() { } void CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodMean() { - LOG_DEBUG(<< "+---------------------------------------------------------------+"); - LOG_DEBUG(<< "| CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodMean |"); - LOG_DEBUG(<< "+---------------------------------------------------------------+"); - // Test that the expectation of the marginal likelihood matches // the expected mean of the marginal likelihood. @@ -581,10 +549,6 @@ void CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodMean() { } void CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodMode() { - LOG_DEBUG(<< "+---------------------------------------------------------------+"); - LOG_DEBUG(<< "| CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodMode |"); - LOG_DEBUG(<< "+---------------------------------------------------------------+"); - // Test that the marginal likelihood mode is what we'd expect // with variances variance scales. @@ -609,12 +573,11 @@ void CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodMode() { rng.generateLogNormalSamples(locations[i], squareScales[j], 1000, samples); filter.addSamples(samples); - maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); - TDouble4Vec weight(1, 1.0); + maths_t::TDoubleWeightsAry weight(maths_t::CUnitWeights::UNIT); TMeanAccumulator error; for (std::size_t k = 0u; k < boost::size(varianceScales); ++k) { double vs = varianceScales[k]; - weight[0] = vs; + maths_t::setCountVarianceScale(vs, weight); double shift = std::log(1.0 + vs * (std::exp(squareScales[j]) - 1.0)) - squareScales[j]; double shiftedLocation = locations[i] - 0.5 * shift; @@ -622,18 +585,15 @@ void CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodMode() { boost::math::lognormal_distribution<> scaledLogNormal( shiftedLocation, std::sqrt(shiftedSquareScale)); double expectedMode = boost::math::mode(scaledLogNormal); - LOG_DEBUG(<< "dm = " - << boost::math::mean(scaledLogNormal) - boost::math::mean(logNormal) - << ", vs = " - << boost::math::variance(scaledLogNormal) / - boost::math::variance(logNormal) - << ", marginalLikelihoodMode = " - << filter.marginalLikelihoodMode(weightStyle, weight) - << ", expectedMode = " << expectedMode); + LOG_DEBUG( + << "dm = " << boost::math::mean(scaledLogNormal) - boost::math::mean(logNormal) + << ", vs = " + << boost::math::variance(scaledLogNormal) / boost::math::variance(logNormal) + << ", marginalLikelihoodMode = " << filter.marginalLikelihoodMode(weight) + << ", expectedMode = " << expectedMode); CPPUNIT_ASSERT_DOUBLES_EQUAL( - expectedMode, filter.marginalLikelihoodMode(weightStyle, weight), 1.0); - error.add(std::fabs(filter.marginalLikelihoodMode(weightStyle, weight) - - expectedMode)); + expectedMode, filter.marginalLikelihoodMode(weight), 1.0); + error.add(std::fabs(filter.marginalLikelihoodMode(weight) - expectedMode)); } LOG_DEBUG(<< "error = " << maths::CBasicStatistics::mean(error)); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(error) < 0.26); @@ -642,10 +602,6 @@ void CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodMode() { } void CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodVariance() { - LOG_DEBUG(<< "+-------------------------------------------------------------------+"); - LOG_DEBUG(<< "| CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodVariance |"); - LOG_DEBUG(<< "+-------------------------------------------------------------------+"); - // Test that the expectation of the residual from the mean for // the marginal likelihood matches the expected variance of the // marginal likelihood. @@ -694,10 +650,6 @@ void CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodVariance() { } void CLogNormalMeanPrecConjugateTest::testSampleMarginalLikelihood() { - LOG_DEBUG(<< "+-----------------------------------------------------------------+"); - LOG_DEBUG(<< "| CLogNormalMeanPrecConjugateTest::testSampleMarginalLikelihood |"); - LOG_DEBUG(<< "+-----------------------------------------------------------------+"); - // We're going to test two properties of the sampling: // 1) That the sample mean is equal to the marginal // likelihood mean. @@ -778,10 +730,6 @@ void CLogNormalMeanPrecConjugateTest::testSampleMarginalLikelihood() { } void CLogNormalMeanPrecConjugateTest::testCdf() { - LOG_DEBUG(<< "+--------------------------------------------+"); - LOG_DEBUG(<< "| CLogNormalMeanPrecConjugateTest::testCdf |"); - LOG_DEBUG(<< "+--------------------------------------------+"); - // Test error cases. // // Test some invariants: @@ -833,10 +781,6 @@ void CLogNormalMeanPrecConjugateTest::testCdf() { } void CLogNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples() { - LOG_DEBUG(<< "+-----------------------------------------------------------------------+"); - LOG_DEBUG(<< "| CLogNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples |"); - LOG_DEBUG(<< "+-----------------------------------------------------------------------+"); - // We test that the probability of less likely samples calculation // agrees with the chance of seeing a sample with lower marginal // likelihood, up to the sampling error. @@ -901,11 +845,9 @@ void CLogNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples() { meanError.add(std::fabs(px - (lb + ub) / 2.0)); } - maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); - for (std::size_t k = 0u; k < boost::size(vs); ++k) { - double mode = filter.marginalLikelihoodMode(weightStyle, - TDouble4Vec(1, vs[k])); + double mode = filter.marginalLikelihoodMode( + maths_t::countVarianceScaleWeight(vs[k])); double ss[] = {0.9 * mode, 1.1 * mode}; LOG_DEBUG(<< "vs = " << vs[k] << ", mode = " << mode); @@ -915,42 +857,52 @@ void CLogNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples() { { filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(1, ss[0]), - TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, {ss[0]}, + {maths_t::countVarianceScaleWeight(vs[k])}, lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); if (mode > 0.0) { filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_OneSidedBelow, weightStyle, - TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_OneSidedBelow, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_OneSidedAbove, weightStyle, - TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_OneSidedAbove, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } } if (mode > 0.0) { filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(1, ss[1]), - TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, {ss[1]}, + {maths_t::countVarianceScaleWeight(vs[k])}, lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_OneSidedBelow, weightStyle, TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_OneSidedBelow, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_OneSidedAbove, weightStyle, TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_OneSidedAbove, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } } @@ -962,10 +914,6 @@ void CLogNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples() { } void CLogNormalMeanPrecConjugateTest::testAnomalyScore() { - LOG_DEBUG(<< "+-----------------------------------------------------+"); - LOG_DEBUG(<< "| CLogNormalMeanPrecConjugateTest::testAnomalyScore |"); - LOG_DEBUG(<< "+-----------------------------------------------------+"); - // This test pushes 500 samples through the filter and adds in // anomalous signals in the bins at 30, 120, 300 and 420 with // magnitude 4, 5, 10 and 15 standard deviations, respectively, @@ -1091,10 +1039,6 @@ void CLogNormalMeanPrecConjugateTest::testAnomalyScore() { } void CLogNormalMeanPrecConjugateTest::testOffset() { - LOG_DEBUG(<< "+-----------------------------------------------+"); - LOG_DEBUG(<< "| CLogNormalMeanPrecConjugateTest::testOffset |"); - LOG_DEBUG(<< "+-----------------------------------------------+"); - // The idea of this test is to check that the offset correctly cancels // out a translation applied to a log-normally distributed data set. @@ -1160,10 +1104,6 @@ void CLogNormalMeanPrecConjugateTest::testOffset() { } void CLogNormalMeanPrecConjugateTest::testIntegerData() { - LOG_DEBUG(<< "+----------------------------------------------------+"); - LOG_DEBUG(<< "| CLogNormalMeanPrecConjugateTest::testIntegerData |"); - LOG_DEBUG(<< "+----------------------------------------------------+"); - // If the data are discrete then we approximate the discrete distribution // by saying it is uniform on the intervals [n,n+1] for each integral n. // The idea of this test is to check that the inferred model agrees in the @@ -1285,10 +1225,6 @@ void CLogNormalMeanPrecConjugateTest::testIntegerData() { } void CLogNormalMeanPrecConjugateTest::testLowVariationData() { - LOG_DEBUG(<< "+---------------------------------------------------------+"); - LOG_DEBUG(<< "| CLogNormalMeanPrecConjugateTest::testLowVariationData |"); - LOG_DEBUG(<< "+---------------------------------------------------------+"); - { CLogNormalMeanPrecConjugate filter(makePrior(maths_t::E_IntegerData)); for (std::size_t i = 0u; i < 100; ++i) { @@ -1317,10 +1253,6 @@ void CLogNormalMeanPrecConjugateTest::testLowVariationData() { } void CLogNormalMeanPrecConjugateTest::testPersist() { - LOG_DEBUG(<< "+------------------------------------------------+"); - LOG_DEBUG(<< "| CLogNormalMeanPrecConjugateTest::testPersist |"); - LOG_DEBUG(<< "+------------------------------------------------+"); - const double location = std::log(10.0); const double squareScale = 3.0; @@ -1331,9 +1263,7 @@ void CLogNormalMeanPrecConjugateTest::testPersist() { maths::CLogNormalMeanPrecConjugate origFilter(makePrior()); for (std::size_t i = 0u; i < samples.size(); ++i) { - origFilter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, samples[i]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0))); + origFilter.addSamples({samples[i]}, maths_t::CUnitWeights::SINGLE_UNIT); } double decayRate = origFilter.decayRate(); uint64_t checksum = origFilter.checksum(); @@ -1372,10 +1302,6 @@ void CLogNormalMeanPrecConjugateTest::testPersist() { } void CLogNormalMeanPrecConjugateTest::testVarianceScale() { - LOG_DEBUG(<< "+------------------------------------------------------+"); - LOG_DEBUG(<< "| CLogNormalMeanPrecConjugateTest::testVarianceScale |"); - LOG_DEBUG(<< "+------------------------------------------------------+"); - // The strategy for this test is to check we correctly account // for variance scaling by scaling the variance of a collection // of samples and then checking that the percentiles for those @@ -1394,10 +1320,11 @@ void CLogNormalMeanPrecConjugateTest::testVarianceScale() { // Finally, we test update with scaled samples produces the // correct posterior. - maths_t::ESampleWeightStyle scales[] = {maths_t::E_SampleSeasonalVarianceScaleWeight, - maths_t::E_SampleCountVarianceScaleWeight}; + TWeightFunc weightsFuncs[]{ + static_cast(maths_t::seasonalVarianceScaleWeight), + static_cast(maths_t::countVarianceScaleWeight)}; - for (std::size_t s = 0u; s < boost::size(scales); ++s) { + for (std::size_t s = 0u; s < boost::size(weightsFuncs); ++s) { const double location = 2.0; const double squareScale = 1.5; { @@ -1488,10 +1415,8 @@ void CLogNormalMeanPrecConjugateTest::testVarianceScale() { double lowerBound, upperBound; maths_t::ETail tail; CPPUNIT_ASSERT(filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, maths_t::TWeightStyleVec(1, scales[s]), - TDouble1Vec(1, scaledSamples[k]), - TDouble4Vec1Vec(1, TDouble4Vec(1, varianceScales[j])), - lowerBound, upperBound, tail)); + maths_t::E_TwoSided, {scaledSamples[k]}, + {weightsFuncs[s](varianceScales[j])}, lowerBound, upperBound, tail)); CPPUNIT_ASSERT_EQUAL(lowerBound, upperBound); double probability = (lowerBound + upperBound) / 2.0; probabilities.push_back(probability); @@ -1569,12 +1494,10 @@ void CLogNormalMeanPrecConjugateTest::testVarianceScale() { for (std::size_t j = 0u; j < scaledSamples.size(); ++j) { double logLikelihood = 0.0; - CPPUNIT_ASSERT_EQUAL( - maths_t::E_FpNoErrors, - filter.jointLogMarginalLikelihood( - maths_t::TWeightStyleVec(1, scales[s]), - TDouble1Vec(1, scaledSamples[j]), - TDouble4Vec1Vec(1, TDouble4Vec(1, varianceScales[i])), logLikelihood)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, + filter.jointLogMarginalLikelihood( + {scaledSamples[j]}, + {weightsFuncs[s](varianceScales[i])}, logLikelihood)); differentialEntropy -= logLikelihood; } @@ -1595,7 +1518,7 @@ void CLogNormalMeanPrecConjugateTest::testVarianceScale() { const double maximumMeanMeanError[] = {0.02, 0.01}; const double maximumMeanVarianceError[] = {0.18, 0.1}; - for (std::size_t s = 0u; s < boost::size(scales); ++s) { + for (std::size_t s = 0u; s < boost::size(weightsFuncs); ++s) { for (std::size_t t = 0u; t < boost::size(dataTypes); ++t) { const double means[] = {0.1, 1.0, 10.0, 100.0, 1000.0, 100000.0, 1000000.0}; @@ -1603,9 +1526,8 @@ void CLogNormalMeanPrecConjugateTest::testVarianceScale() { 1000.0, 100000.0, 1000000.0}; const double varianceScales[] = {0.1, 0.5, 1.0, 2.0, 10.0, 100.0}; - maths_t::TWeightStyleVec weightStyle(1, scales[s]); TDoubleVec samples; - TDouble4Vec1Vec weights; + maths_t::TDoubleWeightsAry1Vec weights; test::CRandomNumbers rng; @@ -1670,13 +1592,13 @@ void CLogNormalMeanPrecConjugateTest::testVarianceScale() { rng.generateLogNormalSamples(location, squareScale, 200, samples); weights.clear(); - weights.resize(samples.size(), TDouble4Vec(1, 1.0)); - filter.addSamples(weightStyle, samples, weights); + weights.resize(samples.size(), maths_t::CUnitWeights::UNIT); + filter.addSamples(samples, weights); rng.generateLogNormalSamples( scaledLocation, scaledSquareScale, 200, samples); weights.clear(); - weights.resize(samples.size(), TDouble4Vec(1, scale)); - filter.addSamples(weightStyle, samples, weights); + weights.resize(samples.size(), weightsFuncs[s](scale)); + filter.addSamples(samples, weights); boost::math::lognormal_distribution<> logNormal( filter.normalMean(), @@ -1726,10 +1648,6 @@ void CLogNormalMeanPrecConjugateTest::testVarianceScale() { } void CLogNormalMeanPrecConjugateTest::testNegativeSample() { - LOG_DEBUG(<< "+-------------------------------------------------------+"); - LOG_DEBUG(<< "| CLogNormalMeanPrecConjugateTest::testNegativeSample |"); - LOG_DEBUG(<< "+-------------------------------------------------------+"); - // Test that we recover roughly the same distribution after adjusting // the offset. The idea of this test is to run two priors side by side, // one with a large enough offset that it never needs to adjust the diff --git a/lib/maths/unittest/CLogTDistributionTest.cc b/lib/maths/unittest/CLogTDistributionTest.cc index 335a8890ef..cbb391b758 100644 --- a/lib/maths/unittest/CLogTDistributionTest.cc +++ b/lib/maths/unittest/CLogTDistributionTest.cc @@ -25,10 +25,6 @@ using TDoubleVecItr = TDoubleVec::iterator; using TDoubleVecCItr = TDoubleVec::const_iterator; void CLogTDistributionTest::testMode() { - LOG_DEBUG(<< "+-----------------------------------+"); - LOG_DEBUG(<< "| CLogTDistributionTest::testMode |"); - LOG_DEBUG(<< "+-----------------------------------+"); - // The mode of the distribution should be at the maximum // of the distribution, i.e. p.d.f. derivative should be // zero and curvature should be positive. @@ -69,10 +65,6 @@ void CLogTDistributionTest::testMode() { } void CLogTDistributionTest::testPdf() { - LOG_DEBUG(<< "+----------------------------------+"); - LOG_DEBUG(<< "| CLogTDistributionTest::testPdf |"); - LOG_DEBUG(<< "+----------------------------------+"); - // Check that the p.d.f. is the derivative of the c.d.f. const double tolerance = 1e-6; @@ -105,10 +97,6 @@ void CLogTDistributionTest::testPdf() { } void CLogTDistributionTest::testCdf() { - LOG_DEBUG(<< "+----------------------------------+"); - LOG_DEBUG(<< "| CLogTDistributionTest::testCdf |"); - LOG_DEBUG(<< "+----------------------------------+"); - // The idea here is that the distribution should describe data // generated by exp(X / s + m)) where X is student's t. @@ -154,10 +142,6 @@ void CLogTDistributionTest::testCdf() { } void CLogTDistributionTest::testQuantile() { - LOG_DEBUG(<< "+---------------------------------------+"); - LOG_DEBUG(<< "| CLogTDistributionTest::testQuantile |"); - LOG_DEBUG(<< "+---------------------------------------+"); - // Check that the quantile is the inverse of the c.d.f. const double degreesFreedom[] = {2.0, 10.0, 40.0}; diff --git a/lib/maths/unittest/CMathsMemoryTest.cc b/lib/maths/unittest/CMathsMemoryTest.cc index 5d8f89e0c6..e1528e7c92 100644 --- a/lib/maths/unittest/CMathsMemoryTest.cc +++ b/lib/maths/unittest/CMathsMemoryTest.cc @@ -43,41 +43,36 @@ void CMathsMemoryTest::testPriors() { CConstantPrior constantPrior(d); CPPUNIT_ASSERT_EQUAL(std::size_t(0), constantPrior.memoryUsage()); - CGammaRateConjugate::TWeightStyleVec weightStyles; CGammaRateConjugate::TDoubleVec samples; - CGammaRateConjugate::TDoubleVecVec weights; - - weightStyles.push_back(maths_t::E_SampleCountWeight); samples.push_back(0.996); - CGammaRateConjugate::TDoubleVec weight; - weight.push_back(0.2); - weights.push_back(weight); + maths_t::TDoubleWeightsAry weight(maths_t::countWeight(0.2)); + maths_t::TDoubleWeightsAry1Vec weights{weight}; CGammaRateConjugate gammaRateConjugate(maths_t::E_ContinuousData, 0.0, 0.9, 0.8, 0.7); CPPUNIT_ASSERT_EQUAL(std::size_t(0), gammaRateConjugate.memoryUsage()); - gammaRateConjugate.addSamples(weightStyles, samples, weights); + gammaRateConjugate.addSamples(samples, weights); CPPUNIT_ASSERT_EQUAL(std::size_t(0), gammaRateConjugate.memoryUsage()); CLogNormalMeanPrecConjugate logNormalConjugate(maths_t::E_ContinuousData, 0.0, 0.9, 0.8, 0.7, 0.2); CPPUNIT_ASSERT_EQUAL(std::size_t(0), logNormalConjugate.memoryUsage()); - logNormalConjugate.addSamples(weightStyles, samples, weights); + logNormalConjugate.addSamples(samples, weights); CPPUNIT_ASSERT_EQUAL(std::size_t(0), logNormalConjugate.memoryUsage()); CPoissonMeanConjugate poissonConjugate(0.0, 0.8, 0.7, 0.3); CPPUNIT_ASSERT_EQUAL(std::size_t(0), poissonConjugate.memoryUsage()); - poissonConjugate.addSamples(weightStyles, samples, weights); + poissonConjugate.addSamples(samples, weights); CPPUNIT_ASSERT_EQUAL(std::size_t(0), poissonConjugate.memoryUsage()); CNormalMeanPrecConjugate normalConjugate(maths_t::E_ContinuousData, 0.0, 0.9, 0.8, 0.7, 0.2); CPPUNIT_ASSERT_EQUAL(std::size_t(0), normalConjugate.memoryUsage()); - normalConjugate.addSamples(weightStyles, samples, weights); + normalConjugate.addSamples(samples, weights); CPPUNIT_ASSERT_EQUAL(std::size_t(0), normalConjugate.memoryUsage()); CMultinomialConjugate multinomialConjugate; CPPUNIT_ASSERT_EQUAL(std::size_t(0), multinomialConjugate.memoryUsage()); - multinomialConjugate.addSamples(weightStyles, samples, weights); + multinomialConjugate.addSamples(samples, weights); CPPUNIT_ASSERT_EQUAL(std::size_t(0), multinomialConjugate.memoryUsage()); CXMeansOnline1d clusterer(maths_t::E_ContinuousData, @@ -111,7 +106,7 @@ void CMathsMemoryTest::testPriors() { std::size_t initialMultimodalPriorSize = multimodalPrior.memoryUsage(); - multimodalPrior.addSamples(weightStyles, samples, weights); + multimodalPrior.addSamples(samples, weights); CPPUNIT_ASSERT(initialMultimodalPriorSize < multimodalPrior.memoryUsage()); core::CMemoryUsage mem; diff --git a/lib/maths/unittest/CMixtureDistributionTest.cc b/lib/maths/unittest/CMixtureDistributionTest.cc index fbe6421246..dda827f8fd 100644 --- a/lib/maths/unittest/CMixtureDistributionTest.cc +++ b/lib/maths/unittest/CMixtureDistributionTest.cc @@ -25,10 +25,6 @@ using TLogNormalVec = std::vector>; using TGammaVec = std::vector>; void CMixtureDistributionTest::testSupport() { - LOG_DEBUG(<< "+--------------------------------------+"); - LOG_DEBUG(<< "| CLogTDistributionTest::testSupport |"); - LOG_DEBUG(<< "+--------------------------------------+"); - { boost::math::normal_distribution<> n1(0.0, 1.0); boost::math::normal_distribution<> n2(5.0, 1.0); @@ -58,10 +54,6 @@ void CMixtureDistributionTest::testSupport() { } void CMixtureDistributionTest::testMode() { - LOG_DEBUG(<< "+-----------------------------------+"); - LOG_DEBUG(<< "| CLogTDistributionTest::testMode |"); - LOG_DEBUG(<< "+-----------------------------------+"); - // The mode of the distribution should be at the maximum // of the distribution, i.e. p.d.f. derivative should be // zero and curvature should be positive. @@ -165,10 +157,6 @@ void CMixtureDistributionTest::testMode() { } void CMixtureDistributionTest::testPdf() { - LOG_DEBUG(<< "+-------------------------------------+"); - LOG_DEBUG(<< "| CMixtureDistributionTest::testPdf |"); - LOG_DEBUG(<< "+-------------------------------------+"); - // Check that the p.d.f. is the derivative of the c.d.f. const double tolerance = 1e-6; @@ -218,10 +206,6 @@ void CMixtureDistributionTest::testPdf() { } void CMixtureDistributionTest::testCdf() { - LOG_DEBUG(<< "+-------------------------------------+"); - LOG_DEBUG(<< "| CMixtureDistributionTest::testCdf |"); - LOG_DEBUG(<< "+-------------------------------------+"); - // The idea here is that the distribution should describe data // generated by a mixture of distributions. @@ -284,10 +268,6 @@ void CMixtureDistributionTest::testCdf() { } void CMixtureDistributionTest::testQuantile() { - LOG_DEBUG(<< "+------------------------------------------+"); - LOG_DEBUG(<< "| CMixtureDistributionTest::testQuantile |"); - LOG_DEBUG(<< "+------------------------------------------+"); - // Check that the quantile is the inverse of the c.d.f. const double weights[][3] = { diff --git a/lib/maths/unittest/CModelTest.cc b/lib/maths/unittest/CModelTest.cc index 701ecc0895..61406dd52d 100644 --- a/lib/maths/unittest/CModelTest.cc +++ b/lib/maths/unittest/CModelTest.cc @@ -15,12 +15,11 @@ using namespace ml; void CModelTest::testAll() { - LOG_DEBUG(<< "+-----------------------+"); - LOG_DEBUG(<< "| CModelTest::testAll |"); - LOG_DEBUG(<< "+-----------------------+"); - // Test that the various parameter classes work as expected. + using TDouble2Vec = maths_t::TDouble2Vec; + using TDouble2VecWeightsAryVec = std::vector; + { core_t::TTime bucketLength{600}; double learnRate{0.5}; @@ -40,43 +39,37 @@ void CModelTest::testAll() { CPPUNIT_ASSERT_EQUAL(core::constants::DAY, params.maximumTimeToTestForChange()); } { - maths::CModelAddSamplesParams::TDouble2Vec weight1(2, 0.4); - maths::CModelAddSamplesParams::TDouble2Vec weight2(2, 0.7); - maths::CModelAddSamplesParams::TDouble2Vec4Vec weights1(1, weight1); - maths::CModelAddSamplesParams::TDouble2Vec4Vec weights2(1, weight2); - maths::CModelAddSamplesParams::TDouble2Vec4VecVec trendWeights(1, weights1); - maths::CModelAddSamplesParams::TDouble2Vec4VecVec priorWeights(1, weights2); + maths_t::TDouble2VecWeightsAry weight1(maths_t::CUnitWeights::unit(2)); + maths_t::TDouble2VecWeightsAry weight2(maths_t::CUnitWeights::unit(2)); + maths_t::setSeasonalVarianceScale(TDouble2Vec(2, 0.4), weight1); + maths_t::setSeasonalVarianceScale(TDouble2Vec(2, 0.7), weight2); + TDouble2VecWeightsAryVec trendWeights{weight1}; + TDouble2VecWeightsAryVec priorWeights{weight2}; maths::CModelAddSamplesParams params; - params.integer(true) - .propagationInterval(1.5) - .weightStyles(maths::CConstantWeights::SEASONAL_VARIANCE) - .trendWeights(trendWeights) - .priorWeights(priorWeights); + params.integer(true).propagationInterval(1.5).trendWeights(trendWeights).priorWeights(priorWeights); CPPUNIT_ASSERT_EQUAL(maths_t::E_IntegerData, params.type()); CPPUNIT_ASSERT_EQUAL(1.5, params.propagationInterval()); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(maths::CConstantWeights::SEASONAL_VARIANCE), - core::CContainerPrinter::print(params.weightStyles())); CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(trendWeights), core::CContainerPrinter::print(params.trendWeights())); CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(priorWeights), core::CContainerPrinter::print(params.priorWeights())); } { - maths::CModelProbabilityParams::TDouble2Vec weight1(2, 0.4); - maths::CModelProbabilityParams::TDouble2Vec weight2(2, 0.7); - maths::CModelProbabilityParams::TDouble2Vec4Vec weights1(1, weight1); - maths::CModelProbabilityParams::TDouble2Vec4Vec weights2(1, weight2); + maths_t::TDouble2VecWeightsAry weight1(maths_t::CUnitWeights::unit(2)); + maths_t::TDouble2VecWeightsAry weight2(maths_t::CUnitWeights::unit(2)); + maths_t::setCountVarianceScale(TDouble2Vec(2, 0.4), weight1); + maths_t::setCountVarianceScale(TDouble2Vec(2, 0.7), weight2); + TDouble2VecWeightsAryVec weights{weight1, weight2}; maths::CModelProbabilityParams params; CPPUNIT_ASSERT(!params.mostAnomalousCorrelate()); CPPUNIT_ASSERT(params.coordinates().empty()); params.addCalculation(maths_t::E_OneSidedAbove) .addCalculation(maths_t::E_TwoSided) .seasonalConfidenceInterval(50.0) - .addBucketEmpty(maths::CModelProbabilityParams::TBool2Vec{true, true}) - .addBucketEmpty(maths::CModelProbabilityParams::TBool2Vec{false, true}) - .weightStyles(maths::CConstantWeights::COUNT_VARIANCE) - .addWeights(weights1) - .addWeights(weights2) + .addBucketEmpty({true, true}) + .addBucketEmpty({false, true}) + .addWeights(weight1) + .addWeights(weight2) .mostAnomalousCorrelate(1) .addCoordinate(1) .addCoordinate(0); @@ -86,9 +79,7 @@ void CModelTest::testAll() { CPPUNIT_ASSERT_EQUAL(50.0, params.seasonalConfidenceInterval()); CPPUNIT_ASSERT_EQUAL(std::string("[[true, true], [false, true]]"), core::CContainerPrinter::print(params.bucketEmpty())); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(maths::CConstantWeights::COUNT_VARIANCE), - core::CContainerPrinter::print(params.weightStyles())); - CPPUNIT_ASSERT_EQUAL(std::string("[[[0.4, 0.4]], [[0.7, 0.7]]]"), + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(weights), core::CContainerPrinter::print(params.weights())); CPPUNIT_ASSERT_EQUAL(std::size_t(1), *params.mostAnomalousCorrelate()); CPPUNIT_ASSERT_EQUAL(std::string("[1, 0]"), diff --git a/lib/maths/unittest/CMultimodalPriorTest.cc b/lib/maths/unittest/CMultimodalPriorTest.cc index dbba9f30c4..8fb8d7d651 100644 --- a/lib/maths/unittest/CMultimodalPriorTest.cc +++ b/lib/maths/unittest/CMultimodalPriorTest.cc @@ -49,6 +49,7 @@ using CLogNormalMeanPrecConjugate = CPriorTestInterfaceMixin; using CMultimodalPrior = CPriorTestInterfaceMixin; using COneOfNPrior = CPriorTestInterfaceMixin; +using TWeightFunc = maths_t::TDoubleWeightsAry (*)(double); //! Make the default mode prior. COneOfNPrior makeModePrior(const double& decayRate = 0.0) { @@ -142,10 +143,6 @@ void probabilityOfLessLikelySample(const maths::CMixtureDistribution& mixture } void CMultimodalPriorTest::testMultipleUpdate() { - LOG_DEBUG(<< "+--------------------------------------------+"); - LOG_DEBUG(<< "| CMultimodalPriorTest::testMultipleUpdate |"); - LOG_DEBUG(<< "+--------------------------------------------+"); - // Test that we get the same result updating once with a vector of 100 // samples of an R.V. versus updating individually 100 times. @@ -183,10 +180,6 @@ void CMultimodalPriorTest::testMultipleUpdate() { } void CMultimodalPriorTest::testPropagation() { - LOG_DEBUG(<< "+-----------------------------------------+"); - LOG_DEBUG(<< "| CMultimodalPriorTest::testPropagation |"); - LOG_DEBUG(<< "+-----------------------------------------+"); - // Test that propagation doesn't affect the marginal likelihood // mean and the marginal likelihood confidence intervals increase // (due to influence of the prior uncertainty) after propagation. @@ -243,10 +236,6 @@ void CMultimodalPriorTest::testPropagation() { } void CMultimodalPriorTest::testSingleMode() { - LOG_DEBUG(<< "+----------------------------------------+"); - LOG_DEBUG(<< "| CMultimodalPriorTest::testSingleMode |"); - LOG_DEBUG(<< "+----------------------------------------+"); - // We test the log likelihood of the data for the estimated // distributions versus the generating distributions. Note // that the generating distribution doesn't necessarily have @@ -399,10 +388,6 @@ void CMultimodalPriorTest::testSingleMode() { } void CMultimodalPriorTest::testMultipleModes() { - LOG_DEBUG(<< "+-------------------------------------------+"); - LOG_DEBUG(<< "| CMultimodalPriorTest::testMultipleModes |"); - LOG_DEBUG(<< "+-------------------------------------------+"); - // We check that for data generated from multiple modes // we get something close to the generating distribution. // In particular, we test the log likelihood of the data @@ -691,10 +676,6 @@ void CMultimodalPriorTest::testMultipleModes() { } void CMultimodalPriorTest::testMarginalLikelihood() { - LOG_DEBUG(<< "+------------------------------------------------+"); - LOG_DEBUG(<< "| CMultimodalPriorTest::testMarginalLikelihood |"); - LOG_DEBUG(<< "+------------------------------------------------+"); - using TNormalVec = std::vector>; // Check that the c.d.f. <= 1 at extreme. @@ -714,17 +695,14 @@ void CMultimodalPriorTest::testMarginalLikelihood() { rng.generateLogNormalSamples(location, squareScale, 100, samples); filter.addSamples(samples); - maths_t::ESampleWeightStyle weightStyles[] = { - maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight, - maths_t::E_SampleCountWeight}; - double weights[] = {0.1, 1.0, 10.0}; + TWeightFunc weightsFuncs[]{static_cast(maths_t::countWeight), + static_cast(maths_t::winsorisationWeight)}; + double weights[]{0.1, 1.0, 10.0}; - for (std::size_t i = 0u; i < boost::size(weightStyles); ++i) { + for (std::size_t i = 0u; i < boost::size(weightsFuncs); ++i) { for (std::size_t j = 0u; j < boost::size(weights); ++j) { double lb, ub; - filter.minusLogJointCdf( - maths_t::TWeightStyleVec(1, weightStyles[i]), TDouble1Vec(1, 20000.0), - TDouble4Vec1Vec(1, TDouble4Vec(1, weights[j])), lb, ub); + filter.minusLogJointCdf({20000.0}, {weightsFuncs[i](weights[j])}, lb, ub); LOG_DEBUG(<< "-log(c.d.f) = " << (lb + ub) / 2.0); CPPUNIT_ASSERT(lb >= 0.0); CPPUNIT_ASSERT(ub >= 0.0); @@ -876,10 +854,6 @@ void CMultimodalPriorTest::testMarginalLikelihood() { } void CMultimodalPriorTest::testMarginalLikelihoodMode() { - LOG_DEBUG(<< "+----------------------------------------------------+"); - LOG_DEBUG(<< "| CMultimodalPriorTest::testMarginalLikelihoodMode |"); - LOG_DEBUG(<< "+----------------------------------------------------+"); - // Test that the marginal likelihood mode is at a local // minimum of the likelihood function. And we don't find // a higher likelihood location with high probability. @@ -910,30 +884,24 @@ void CMultimodalPriorTest::testMarginalLikelihoodMode() { CMultimodalPrior filter(makePrior()); filter.addSamples(samples); - maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); - TDouble4Vec weight(1, 1.0); - TDouble4Vec1Vec weights(1, weight); + maths_t::TDoubleWeightsAry weight(maths_t::CUnitWeights::UNIT); std::size_t totalCount = 0u; for (std::size_t i = 0u; i < boost::size(varianceScales); ++i) { double vs = varianceScales[i]; - weight[0] = vs; - weights[0][0] = vs; + maths_t::setCountVarianceScale(vs, weight); LOG_DEBUG(<< "*** vs = " << vs << " ***"); - double mode = filter.marginalLikelihoodMode(weightStyle, weight); + double mode = filter.marginalLikelihoodMode(weight); LOG_DEBUG(<< "marginalLikelihoodMode = " << mode); // Should be near 8. - CPPUNIT_ASSERT_DOUBLES_EQUAL( - 8.0, filter.marginalLikelihoodMode(weightStyle, weight), 2.0); + CPPUNIT_ASSERT_DOUBLES_EQUAL(8.0, filter.marginalLikelihoodMode(weight), 2.0); double eps = 0.01; double modeMinusEps = mode - eps; double modePlusEps = mode + eps; double fMode, fModeMinusEps, fModePlusEps; - filter.jointLogMarginalLikelihood(weightStyle, TDouble1Vec(1, mode), weights, fMode); - filter.jointLogMarginalLikelihood(weightStyle, TDouble1Vec(1, modeMinusEps), - weights, fModeMinusEps); - filter.jointLogMarginalLikelihood(weightStyle, TDouble1Vec(1, modePlusEps), - weights, fModePlusEps); + filter.jointLogMarginalLikelihood({mode}, {weight}, fMode); + filter.jointLogMarginalLikelihood({modeMinusEps}, {weight}, fModeMinusEps); + filter.jointLogMarginalLikelihood({modePlusEps}, {weight}, fModePlusEps); fMode = std::exp(fMode); fModeMinusEps = std::exp(fModeMinusEps); fModePlusEps = std::exp(fModePlusEps); @@ -950,8 +918,7 @@ void CMultimodalPriorTest::testMarginalLikelihoodMode() { TDoubleVec fTrials; for (std::size_t j = 0u; j < trials.size(); ++j) { double fTrial; - filter.jointLogMarginalLikelihood( - weightStyle, TDouble1Vec(1, trials[j]), weights, fTrial); + filter.jointLogMarginalLikelihood({trials[j]}, {weight}, fTrial); fTrial = std::exp(fTrial); if (fTrial > fMode) { LOG_DEBUG(<< "f(" << trials[j] << ") = " << fTrial << " > " << fMode); @@ -969,10 +936,6 @@ void CMultimodalPriorTest::testMarginalLikelihoodMode() { } void CMultimodalPriorTest::testMarginalLikelihoodConfidenceInterval() { - LOG_DEBUG(<< "+------------------------------------------------------------------+"); - LOG_DEBUG(<< "| CMultimodalPriorTest::testMarginalLikelihoodConfidenceInterval |"); - LOG_DEBUG(<< "+------------------------------------------------------------------+"); - // Test that marginal likelihood confidence intervals are // what we'd expect for various variance scales. @@ -1055,7 +1018,8 @@ void CMultimodalPriorTest::testMarginalLikelihoodConfidenceInterval() { CPPUNIT_ASSERT(maths::CBasicStatistics::mean(error) < 0.05); } - LOG_DEBUG(<< "Problem Case (Issue 439)") { + LOG_DEBUG(<< "Problem Case (Issue 439)"); + { std::ifstream file; file.open("testfiles/poorly_conditioned_multimodal.txt"); std::ostringstream state; @@ -1071,9 +1035,9 @@ void CMultimodalPriorTest::testMarginalLikelihoodConfidenceInterval() { maths::CPriorStateSerialiser restorer; CPPUNIT_ASSERT(restorer(params, prior, traverser)); TDoubleDoublePr median = prior->marginalLikelihoodConfidenceInterval( - 0, maths::CConstantWeights::COUNT, maths::CConstantWeights::UNIT); + 0, maths_t::CUnitWeights::UNIT); TDoubleDoublePr i90 = prior->marginalLikelihoodConfidenceInterval( - 90, maths::CConstantWeights::COUNT, maths::CConstantWeights::UNIT); + 90, maths_t::CUnitWeights::UNIT); LOG_DEBUG(<< "median = " << maths::CBasicStatistics::mean(median)); LOG_DEBUG(<< "confidence interval = " << core::CContainerPrinter::print(i90)); @@ -1086,10 +1050,6 @@ void CMultimodalPriorTest::testMarginalLikelihoodConfidenceInterval() { } void CMultimodalPriorTest::testSampleMarginalLikelihood() { - LOG_DEBUG(<< "+------------------------------------------------------+"); - LOG_DEBUG(<< "| CMultimodalPriorTest::testSampleMarginalLikelihood |"); - LOG_DEBUG(<< "+------------------------------------------------------+"); - // We're going to test two properties of the sampling: // 1) That the sample mean is equal to the marginal likelihood // mean. @@ -1214,10 +1174,6 @@ void CMultimodalPriorTest::testSampleMarginalLikelihood() { } void CMultimodalPriorTest::testCdf() { - LOG_DEBUG(<< "+---------------------------------+"); - LOG_DEBUG(<< "| CMultimodalPriorTest::testCdf |"); - LOG_DEBUG(<< "+---------------------------------+"); - // Test error cases. // // Test some invariants: @@ -1277,10 +1233,6 @@ void CMultimodalPriorTest::testCdf() { } void CMultimodalPriorTest::testProbabilityOfLessLikelySamples() { - LOG_DEBUG(<< "+------------------------------------------------------------+"); - LOG_DEBUG(<< "| CMultimodalPriorTest::testProbabilityOfLessLikelySamples |"); - LOG_DEBUG(<< "+------------------------------------------------------------+"); - using TNormalVec = std::vector>; using TLogNormalVec = std::vector>; using TGammaVec = std::vector>; @@ -1349,20 +1301,17 @@ void CMultimodalPriorTest::testProbabilityOfLessLikelySamples() { double lb, ub; maths_t::ETail tail; - filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, - maths_t::TWeightStyleVec(1, maths_t::E_SampleCountVarianceScaleWeight), - TDouble1Vec(1, 49.0), TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0)), lb, ub, tail); + filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, {49.0}, + maths_t::CUnitWeights::SINGLE_UNIT, + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); - filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, - maths_t::TWeightStyleVec(1, maths_t::E_SampleCountVarianceScaleWeight), - TDouble1Vec(1, 54.0), TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0)), lb, ub, tail); + filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, {54.0}, + maths_t::CUnitWeights::SINGLE_UNIT, + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); - filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, - maths_t::TWeightStyleVec(1, maths_t::E_SampleCountVarianceScaleWeight), - TDouble1Vec(1, 59.0), TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0)), lb, ub, tail); + filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, {59.0}, + maths_t::CUnitWeights::SINGLE_UNIT, + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } { @@ -1493,10 +1442,6 @@ void CMultimodalPriorTest::testProbabilityOfLessLikelySamples() { } void CMultimodalPriorTest::testLargeValues() { - LOG_DEBUG(<< "+-----------------------------------------+"); - LOG_DEBUG(<< "| CMultimodalPriorTest::testLargeValues |"); - LOG_DEBUG(<< "+-----------------------------------------+"); - // Check that the confidence interval calculation stays // well conditioned for very large values. @@ -1642,12 +1587,10 @@ void CMultimodalPriorTest::testLargeValues() { clusterer, modePrior, 0.001); for (auto value : values) { - - multimodalPrior.addSamples(maths::CConstantWeights::COUNT, TDouble1Vec(1, value), - TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0 / 3.0))); + multimodalPrior.addSamples({value}, {maths_t::countWeight(1.0 / 3.0)}); if (!multimodalPrior.isNonInformative()) { TDoubleDoublePr interval = multimodalPrior.marginalLikelihoodConfidenceInterval( - 95.0, maths::CConstantWeights::COUNT, maths::CConstantWeights::UNIT); + 95.0, maths_t::CUnitWeights::UNIT); if (interval.second - interval.first >= 3e11) { LOG_DEBUG(<< "interval = " << interval.second - interval.first); LOG_DEBUG(<< multimodalPrior.print()); @@ -1658,10 +1601,6 @@ void CMultimodalPriorTest::testLargeValues() { } void CMultimodalPriorTest::testSeasonalVarianceScale() { - LOG_DEBUG(<< "+---------------------------------------------------+"); - LOG_DEBUG(<< "| CMultimodalPriorTest::testSeasonalVarianceScale |"); - LOG_DEBUG(<< "+---------------------------------------------------+"); - // We are test: // 1) The marginal likelihood is normalized. // 2) E[(X - m)^2] w.r.t. the log-likelihood is scaled. @@ -1688,9 +1627,7 @@ void CMultimodalPriorTest::testSeasonalVarianceScale() { rng.generateNormalSamples(mean3, variance3, 100, samples3); double varianceScales[] = {0.2, 0.5, 1.0, 2.0, 5.0}; - maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleSeasonalVarianceScaleWeight); - TDouble4Vec weight(1, 1.0); - TDouble4Vec1Vec weights(1, weight); + maths_t::TDoubleWeightsAry weight(maths_t::CUnitWeights::UNIT); double m; double v; @@ -1716,37 +1653,36 @@ void CMultimodalPriorTest::testSeasonalVarianceScale() { for (std::size_t i = 0u; i < boost::size(varianceScales); ++i) { double vs = varianceScales[i]; - weight[0] = vs; - weights[0][0] = vs; + maths_t::setSeasonalVarianceScale(vs, weight); LOG_DEBUG(<< "*** variance scale = " << vs << " ***"); double Z; - filter.expectation(C1dUnitKernel(), 50, Z, weightStyle, weight); + filter.expectation(C1dUnitKernel(), 50, Z, weight); LOG_DEBUG(<< "Z = " << Z); CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, Z, 1e-3); - LOG_DEBUG(<< "sv = " << filter.marginalLikelihoodVariance(weightStyle, weight)); + LOG_DEBUG(<< "sv = " << filter.marginalLikelihoodVariance(weight)); double expectationVariance; filter.expectation(CVarianceKernel(filter.marginalLikelihoodMean()), - 50, expectationVariance, weightStyle, weight); + 50, expectationVariance, weight); LOG_DEBUG(<< "expectationVariance = " << expectationVariance); CPPUNIT_ASSERT_DOUBLES_EQUAL(vs * unscaledExpectationVariance, expectationVariance, 1e-3 * vs * unscaledExpectationVariance); CPPUNIT_ASSERT_DOUBLES_EQUAL( - filter.marginalLikelihoodVariance(weightStyle, weight), expectationVariance, - 1e-3 * filter.marginalLikelihoodVariance(weightStyle, weight)); + filter.marginalLikelihoodVariance(weight), expectationVariance, + 1e-3 * filter.marginalLikelihoodVariance(weight)); TDouble1Vec sample(1, 0.0); for (std::size_t j = 0u; j < boost::size(points); ++j) { TDouble1Vec x(1, points[j]); double fx; - filter.jointLogMarginalLikelihood(weightStyle, x, weights, fx); + filter.jointLogMarginalLikelihood(x, {weight}, fx); TDouble1Vec xMinusEps(1, points[j] - 1e-3); TDouble1Vec xPlusEps(1, points[j] + 1e-3); double lb, ub; - filter.minusLogJointCdf(weightStyle, xPlusEps, weights, lb, ub); + filter.minusLogJointCdf(xPlusEps, {weight}, lb, ub); double FxPlusEps = std::exp(-(lb + ub) / 2.0); - filter.minusLogJointCdf(weightStyle, xMinusEps, weights, lb, ub); + filter.minusLogJointCdf(xMinusEps, {weight}, lb, ub); double FxMinusEps = std::exp(-(lb + ub) / 2.0); LOG_DEBUG(<< "x = " << points[j] << ", log(f(x)) = " << fx << ", log(dF/dx)) = " << std::log((FxPlusEps - FxMinusEps) / 2e-3)); @@ -1754,22 +1690,21 @@ void CMultimodalPriorTest::testSeasonalVarianceScale() { 0.05 * std::fabs(fx)); sample[0] = m + (points[j] - m) / std::sqrt(vs); - weights[0][0] = 1.0; + maths_t::setSeasonalVarianceScale(1.0, weight); double expectedLowerBound; double expectedUpperBound; maths_t::ETail expectedTail; - filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, sample, weights, - expectedLowerBound, expectedUpperBound, expectedTail); + filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, sample, + {weight}, expectedLowerBound, + expectedUpperBound, expectedTail); sample[0] = points[j]; - weights[0][0] = vs; + maths_t::setSeasonalVarianceScale(vs, weight); double lowerBound; double upperBound; maths_t::ETail tail; - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - weightStyle, sample, weights, - lowerBound, upperBound, tail); + filter.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, sample, {weight}, lowerBound, upperBound, tail); LOG_DEBUG(<< "expectedLowerBound = " << expectedLowerBound); LOG_DEBUG(<< "lowerBound = " << lowerBound); @@ -1804,9 +1739,9 @@ void CMultimodalPriorTest::testSeasonalVarianceScale() { rng.random_shuffle(samples.begin(), samples.end()); CMultimodalPrior filter(makePrior()); - weights[0][0] = vs; + maths_t::setSeasonalVarianceScale(vs, weight); for (std::size_t j = 0u; j < samples.size(); ++j) { - filter.addSamples(weightStyle, TDouble1Vec(1, samples[j]), weights); + filter.addSamples({samples[j]}, {weight}); } double sm = filter.marginalLikelihoodMean(); @@ -1820,10 +1755,6 @@ void CMultimodalPriorTest::testSeasonalVarianceScale() { } void CMultimodalPriorTest::testPersist() { - LOG_DEBUG(<< "+-------------------------------------+"); - LOG_DEBUG(<< "| CMultimodalPriorTest::testPersist |"); - LOG_DEBUG(<< "+-------------------------------------+"); - test::CRandomNumbers rng; TDoubleVec samples1; @@ -1853,9 +1784,7 @@ void CMultimodalPriorTest::testPersist() { maths::CMultimodalPrior origFilter(maths_t::E_ContinuousData, clusterer, modePrior); for (std::size_t i = 0u; i < samples.size(); ++i) { - origFilter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, samples[i]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0))); + origFilter.addSamples({samples[i]}, maths_t::CUnitWeights::SINGLE_UNIT); } double decayRate = origFilter.decayRate(); uint64_t checksum = origFilter.checksum(); diff --git a/lib/maths/unittest/CMultinomialConjugateTest.cc b/lib/maths/unittest/CMultinomialConjugateTest.cc index 0f0668ffd0..dd0c67830c 100644 --- a/lib/maths/unittest/CMultinomialConjugateTest.cc +++ b/lib/maths/unittest/CMultinomialConjugateTest.cc @@ -41,10 +41,6 @@ using TDoubleDoublePrVec = std::vector; using CMultinomialConjugate = CPriorTestInterfaceMixin; void CMultinomialConjugateTest::testMultipleUpdate() { - LOG_DEBUG(<< "+-------------------------------------------------+"); - LOG_DEBUG(<< "| CMultinomialConjugateTest::testMultipleUpdate |"); - LOG_DEBUG(<< "+-------------------------------------------------+"); - // Test that we get the same result updating once with a vector of 100 // samples of an R.V. versus updating individually 100 times. @@ -73,10 +69,6 @@ void CMultinomialConjugateTest::testMultipleUpdate() { } void CMultinomialConjugateTest::testPropagation() { - LOG_DEBUG(<< "+----------------------------------------------+"); - LOG_DEBUG(<< "| CMultinomialConjugateTest::testPropagation |"); - LOG_DEBUG(<< "+----------------------------------------------+"); - // Test that propagation doesn't affect the expected values // of probabilities. @@ -115,10 +107,6 @@ void CMultinomialConjugateTest::testPropagation() { } void CMultinomialConjugateTest::testProbabilityEstimation() { - LOG_DEBUG(<< "+--------------------------------------------------------+"); - LOG_DEBUG(<< "| CMultinomialConjugateTest::testProbabilityEstimation |"); - LOG_DEBUG(<< "+--------------------------------------------------------+"); - // We are going to test that we correctly estimate the distribution // for the probabilities of a multinomial process by checking that // the true probabilities lie in various confidence intervals the @@ -200,10 +188,6 @@ void CMultinomialConjugateTest::testProbabilityEstimation() { } void CMultinomialConjugateTest::testMarginalLikelihood() { - LOG_DEBUG(<< "+-----------------------------------------------------+"); - LOG_DEBUG(<< "| CMultinomialConjugateTest::testMarginalLikelihood |"); - LOG_DEBUG(<< "+-----------------------------------------------------+"); - { // For a single sample the log likelihood of the i'th category is // equal to log(p(i)) where p(i) is the i'th category expected @@ -388,10 +372,6 @@ void CMultinomialConjugateTest::testMarginalLikelihood() { } void CMultinomialConjugateTest::testSampleMarginalLikelihood() { - LOG_DEBUG(<< "+-----------------------------------------------------------+"); - LOG_DEBUG(<< "| CMultinomialConjugateTest::testSampleMarginalLikelihood |"); - LOG_DEBUG(<< "+-----------------------------------------------------------+"); - // Test that we sample categories in proportion to their marginal // probabilities. We test two cases: // 1) The probabilities exactly divide the requested number of samples n. @@ -474,10 +454,6 @@ void CMultinomialConjugateTest::testSampleMarginalLikelihood() { } void CMultinomialConjugateTest::testProbabilityOfLessLikelySamples() { - LOG_DEBUG(<< "+-----------------------------------------------------------------+"); - LOG_DEBUG(<< "| CMultinomialConjugateTest::testProbabilityOfLessLikelySamples |"); - LOG_DEBUG(<< "+-----------------------------------------------------------------+"); - using TDoubleSizePr = std::pair; using TDoubleSizePrVec = std::vector; @@ -500,24 +476,12 @@ void CMultinomialConjugateTest::testProbabilityOfLessLikelySamples() { CMultinomialConjugate filter(CMultinomialConjugate::nonInformativePrior(6u)); // Large update limit. - filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[0]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 10000.0))); // P = 0.10 - filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[1]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 12000.0))); // P = 0.12 - filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[2]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 29000.0))); // P = 0.29 - filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[3]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 39000.0))); // P = 0.39 - filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[4]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 4000.0))); // P = 0.04 - filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[5]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 6000.0))); // P = 0.06 + filter.addSamples({categories[0]}, {maths_t::countWeight(10000.0)}); // P = 0.10 + filter.addSamples({categories[1]}, {maths_t::countWeight(12000.0)}); // P = 0.12 + filter.addSamples({categories[2]}, {maths_t::countWeight(29000.0)}); // P = 0.29 + filter.addSamples({categories[3]}, {maths_t::countWeight(39000.0)}); // P = 0.39 + filter.addSamples({categories[4]}, {maths_t::countWeight(4000.0)}); // P = 0.04 + filter.addSamples({categories[5]}, {maths_t::countWeight(6000.0)}); // P = 0.06 // We expect the following probabilities for each category: // P(1.1) = 0.20 @@ -548,24 +512,12 @@ void CMultinomialConjugateTest::testProbabilityOfLessLikelySamples() { CMultinomialConjugate filter(CMultinomialConjugate::nonInformativePrior(6u)); // Large update limit. - filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[0]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 11000.0))); // P = 0.11 - filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[1]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 11000.0))); // P = 0.11 - filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[2]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 29000.0))); // P = 0.29 - filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[3]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 39000.0))); // P = 0.39 - filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[4]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 5000.0))); // P = 0.05 - filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[5]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 5000.0))); // P = 0.05 + filter.addSamples({categories[0]}, {maths_t::countWeight(11000.0)}); // P = 0.11 + filter.addSamples({categories[1]}, {maths_t::countWeight(11000.0)}); // P = 0.11 + filter.addSamples({categories[2]}, {maths_t::countWeight(29000.0)}); // P = 0.29 + filter.addSamples({categories[3]}, {maths_t::countWeight(39000.0)}); // P = 0.39 + filter.addSamples({categories[4]}, {maths_t::countWeight(5000.0)}); // P = 0.05 + filter.addSamples({categories[5]}, {maths_t::countWeight(5000.0)}); // P = 0.05 // We expect the following probabilities for each category: // P(1.1) = P(1.2) = 0.32 @@ -593,24 +545,12 @@ void CMultinomialConjugateTest::testProbabilityOfLessLikelySamples() { CMultinomialConjugate filter(CMultinomialConjugate::nonInformativePrior(6u)); // Large update limit. - filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[0]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 15000.0))); // P = 0.15 - filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[1]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 15000.0))); // P = 0.15 - filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[2]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 30000.0))); // P = 0.30 - filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[3]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 30000.0))); // P = 0.30 - filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[4]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 5000.0))); // P = 0.05 - filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[5]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 5000.0))); // P = 0.05 + filter.addSamples({categories[0]}, {maths_t::countWeight(15000.0)}); // P = 0.15 + filter.addSamples({categories[1]}, {maths_t::countWeight(15000.0)}); // P = 0.15 + filter.addSamples({categories[2]}, {maths_t::countWeight(30000.0)}); // P = 0.30 + filter.addSamples({categories[3]}, {maths_t::countWeight(30000.0)}); // P = 0.30 + filter.addSamples({categories[4]}, {maths_t::countWeight(5000.0)}); // P = 0.05 + filter.addSamples({categories[5]}, {maths_t::countWeight(5000.0)}); // P = 0.05 // We expect the following probabilities for each category: // P(1.1) = P(1.2) = 0.40 @@ -702,24 +642,12 @@ void CMultinomialConjugateTest::testProbabilityOfLessLikelySamples() { CMultinomialConjugate filter(CMultinomialConjugate::nonInformativePrior(6u)); // Large update limit. - filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[0]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 10000.0))); // P = 0.10 - filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[1]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 12000.0))); // P = 0.12 - filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[2]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 29000.0))); // P = 0.29 - filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[3]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 39000.0))); // P = 0.39 - filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[4]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 4000.0))); // P = 0.04 - filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[5]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 6000.0))); // P = 0.06 + filter.addSamples({categories[0]}, {maths_t::countWeight(10000.0)}); // P = 0.10 + filter.addSamples({categories[1]}, {maths_t::countWeight(12000.0)}); // P = 0.12 + filter.addSamples({categories[2]}, {maths_t::countWeight(29000.0)}); // P = 0.29 + filter.addSamples({categories[3]}, {maths_t::countWeight(39000.0)}); // P = 0.39 + filter.addSamples({categories[4]}, {maths_t::countWeight(4000.0)}); // P = 0.04 + filter.addSamples({categories[5]}, {maths_t::countWeight(6000.0)}); // P = 0.06 double expectedProbabilities[] = {0.2, 0.32, 0.61, 1.0, 0.04, 0.1}; @@ -817,10 +745,8 @@ void CMultinomialConjugateTest::testProbabilityOfLessLikelySamples() { CMultinomialConjugate filter( CMultinomialConjugate::nonInformativePrior(categories.size())); for (std::size_t i = 0u; i < categories.size(); ++i) { - filter.addSamples( - maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[i]), - TDouble4Vec1Vec(1, TDouble4Vec(1, rawProbabilities[i] * 100.0))); + filter.addSamples({categories[i]}, + {maths_t::countWeight(rawProbabilities[i] * 100.0)}); } TDoubleVec lowerBounds, upperBounds; @@ -857,10 +783,6 @@ void CMultinomialConjugateTest::testAnomalyScore() { } void CMultinomialConjugateTest::testRemoveCategories() { - LOG_DEBUG(<< "+---------------------------------------------------+"); - LOG_DEBUG(<< "| CMultinomialConjugateTest::testRemoveCategories |"); - LOG_DEBUG(<< "+---------------------------------------------------+"); - double rawCategories[] = {1.0, 3.0, 15.0, 17.0, 19.0, 20.0}; double rawConcentrations[] = {1.0, 2.0, 1.5, 12.0, 10.0, 2.0}; @@ -937,10 +859,6 @@ void CMultinomialConjugateTest::testRemoveCategories() { } void CMultinomialConjugateTest::testPersist() { - LOG_DEBUG(<< "+------------------------------------------+"); - LOG_DEBUG(<< "| CMultinomialConjugateTest::testPersist |"); - LOG_DEBUG(<< "+------------------------------------------+"); - const double rawCategories[] = {-1.0, 5.0, 2.1, 78.0, 15.3}; const double rawProbabilities[] = {0.1, 0.2, 0.35, 0.3, 0.05}; const TDoubleVec categories(boost::begin(rawCategories), boost::end(rawCategories)); @@ -953,9 +871,7 @@ void CMultinomialConjugateTest::testPersist() { rng.generateMultinomialSamples(categories, probabilities, 100, samples); maths::CMultinomialConjugate origFilter(CMultinomialConjugate::nonInformativePrior(5)); for (std::size_t i = 0u; i < samples.size(); ++i) { - origFilter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, samples[i]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0))); + origFilter.addSamples({samples[i]}, maths_t::CUnitWeights::SINGLE_UNIT); } double decayRate = origFilter.decayRate(); uint64_t checksum = origFilter.checksum(); diff --git a/lib/maths/unittest/CMultivariateConstantPriorTest.cc b/lib/maths/unittest/CMultivariateConstantPriorTest.cc index 4e74efa280..42e4b5a3af 100644 --- a/lib/maths/unittest/CMultivariateConstantPriorTest.cc +++ b/lib/maths/unittest/CMultivariateConstantPriorTest.cc @@ -24,58 +24,31 @@ using namespace ml; using namespace handy_typedefs; -namespace { - -const maths_t::TWeightStyleVec COUNT_WEIGHT(1, maths_t::E_SampleCountWeight); - -TDouble10Vec4Vec unitWeight(std::size_t dimension) { - return TDouble10Vec4Vec(1, TDouble10Vec(dimension, 1.0)); -} - -TDouble10Vec4Vec1Vec singleUnitWeight(std::size_t dimension) { - return TDouble10Vec4Vec1Vec(1, unitWeight(dimension)); -} -} - void CMultivariateConstantPriorTest::testAddSamples() { - LOG_DEBUG(<< "+--------------------------------------------------+"); - LOG_DEBUG(<< "| CMultivariateConstantPriorTest::testAddSamples |"); - LOG_DEBUG(<< "+--------------------------------------------------+"); - // Test error cases. maths::CMultivariateConstantPrior filter(2); double wrongDimension[] = {1.3, 2.1, 7.9}; - filter.addSamples(COUNT_WEIGHT, - TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(wrongDimension), - boost::end(wrongDimension))), - singleUnitWeight(3)); + filter.addSamples({TDouble10Vec(boost::begin(wrongDimension), boost::end(wrongDimension))}, + maths_t::CUnitWeights::singleUnit(3)); CPPUNIT_ASSERT(filter.isNonInformative()); double nans[] = {1.3, std::numeric_limits::quiet_NaN()}; - filter.addSamples( - COUNT_WEIGHT, - TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(nans), boost::end(nans))), - singleUnitWeight(3)); + filter.addSamples({TDouble10Vec(boost::begin(nans), boost::end(nans))}, + maths_t::CUnitWeights::singleUnit(2)); CPPUNIT_ASSERT(filter.isNonInformative()); double constant[] = {1.4, 1.0}; - filter.addSamples(COUNT_WEIGHT, - TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(constant), - boost::end(constant))), - singleUnitWeight(2)); + filter.addSamples({TDouble10Vec(boost::begin(constant), boost::end(constant))}, + maths_t::CUnitWeights::singleUnit(2)); CPPUNIT_ASSERT(!filter.isNonInformative()); } void CMultivariateConstantPriorTest::testMarginalLikelihood() { - LOG_DEBUG(<< "+----------------------------------------------------------+"); - LOG_DEBUG(<< "| CMultivariateConstantPriorTest::testMarginalLikelihood |"); - LOG_DEBUG(<< "+----------------------------------------------------------+"); - // Check that the marginal likelihood is 0 for non informative, otherwise // either 0 or infinity depending on whether the value is equal to the // constant or not. @@ -87,50 +60,42 @@ void CMultivariateConstantPriorTest::testMarginalLikelihood() { double likelihood; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpFailed, filter.jointLogMarginalLikelihood( - COUNT_WEIGHT, TDouble10Vec1Vec(), - singleUnitWeight(2), likelihood)); CPPUNIT_ASSERT_EQUAL( maths_t::E_FpFailed, filter.jointLogMarginalLikelihood( - COUNT_WEIGHT, + {}, maths_t::CUnitWeights::singleUnit(2), likelihood)); + CPPUNIT_ASSERT_EQUAL( + maths_t::E_FpFailed, + filter.jointLogMarginalLikelihood( TDouble10Vec1Vec(2, TDouble10Vec(boost::begin(constant), boost::end(constant))), - singleUnitWeight(2), likelihood)); + maths_t::CUnitWeights::singleUnit(2), likelihood)); CPPUNIT_ASSERT_EQUAL( maths_t::E_FpOverflowed, filter.jointLogMarginalLikelihood( - COUNT_WEIGHT, - TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(constant), boost::end(constant))), - singleUnitWeight(2), likelihood)); + {TDouble10Vec(boost::begin(constant), boost::end(constant))}, + maths_t::CUnitWeights::singleUnit(2), likelihood)); CPPUNIT_ASSERT_EQUAL(boost::numeric::bounds::lowest(), likelihood); - filter.addSamples(COUNT_WEIGHT, - TDouble10Vec1Vec(2, TDouble10Vec(boost::begin(constant), + filter.addSamples(TDouble10Vec1Vec(2, TDouble10Vec(boost::begin(constant), boost::end(constant))), - singleUnitWeight(2)); + maths_t::CUnitWeights::singleUnit(2)); CPPUNIT_ASSERT_EQUAL( maths_t::E_FpNoErrors, filter.jointLogMarginalLikelihood( - COUNT_WEIGHT, - TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(constant), boost::end(constant))), - singleUnitWeight(2), likelihood)); + {TDouble10Vec(boost::begin(constant), boost::end(constant))}, + maths_t::CUnitWeights::singleUnit(2), likelihood)); CPPUNIT_ASSERT_EQUAL(std::log(boost::numeric::bounds::highest()), likelihood); CPPUNIT_ASSERT_EQUAL( maths_t::E_FpOverflowed, filter.jointLogMarginalLikelihood( - COUNT_WEIGHT, - TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(different), boost::end(different))), - singleUnitWeight(2), likelihood)); + {TDouble10Vec(boost::begin(different), boost::end(different))}, + maths_t::CUnitWeights::singleUnit(2), likelihood)); CPPUNIT_ASSERT_EQUAL(boost::numeric::bounds::lowest(), likelihood); } void CMultivariateConstantPriorTest::testMarginalLikelihoodMean() { - LOG_DEBUG(<< "+--------------------------------------------------------------+"); - LOG_DEBUG(<< "| CMultivariateConstantPriorTest::testMarginalLikelihoodMean |"); - LOG_DEBUG(<< "+--------------------------------------------------------------+"); - // Check that the marginal likelihood mean is 0 for non informative, // otherwise equal to the constant. @@ -140,20 +105,14 @@ void CMultivariateConstantPriorTest::testMarginalLikelihoodMean() { core::CContainerPrinter::print(filter.marginalLikelihoodMean())); double constant[] = {1.2, 6.0, 14.1}; - filter.addSamples(COUNT_WEIGHT, - TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(constant), - boost::end(constant))), - singleUnitWeight(3)); + filter.addSamples({TDouble10Vec(boost::begin(constant), boost::end(constant))}, + maths_t::CUnitWeights::singleUnit(3)); CPPUNIT_ASSERT_EQUAL(std::string("[1.2, 6, 14.1]"), core::CContainerPrinter::print(filter.marginalLikelihoodMean())); } void CMultivariateConstantPriorTest::testMarginalLikelihoodMode() { - LOG_DEBUG(<< "+--------------------------------------------------------------+"); - LOG_DEBUG(<< "| CMultivariateConstantPriorTest::testMarginalLikelihoodMode |"); - LOG_DEBUG(<< "+--------------------------------------------------------------+"); - // Check that the marginal likelihood mode is 0 for non informative, // otherwise equal to the constant. @@ -161,24 +120,18 @@ void CMultivariateConstantPriorTest::testMarginalLikelihoodMode() { CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(filter.marginalLikelihoodMean()), core::CContainerPrinter::print(filter.marginalLikelihoodMode( - COUNT_WEIGHT, unitWeight(4)))); + maths_t::CUnitWeights::unit(4)))); double constant[] = {1.1, 6.5, 12.3, 14.1}; - filter.addSamples(COUNT_WEIGHT, - TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(constant), - boost::end(constant))), - singleUnitWeight(4)); + filter.addSamples({TDouble10Vec(boost::begin(constant), boost::end(constant))}, + maths_t::CUnitWeights::singleUnit(4)); CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(filter.marginalLikelihoodMean()), core::CContainerPrinter::print(filter.marginalLikelihoodMode( - COUNT_WEIGHT, unitWeight(4)))); + maths_t::CUnitWeights::unit(4)))); } void CMultivariateConstantPriorTest::testMarginalLikelihoodCovariance() { - LOG_DEBUG(<< "+--------------------------------------------------------------------+"); - LOG_DEBUG(<< "| CMultivariateConstantPriorTest::testMarginalLikelihoodCovariance |"); - LOG_DEBUG(<< "+--------------------------------------------------------------------+"); - // Check that the marginal likelihood mode is infinite diagonal for // non informative, otherwise the zero matrix. @@ -198,10 +151,8 @@ void CMultivariateConstantPriorTest::testMarginalLikelihoodCovariance() { } double constant[] = {1.1, 6.5, 12.3, 14.1}; - filter.addSamples(COUNT_WEIGHT, - TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(constant), - boost::end(constant))), - singleUnitWeight(4)); + filter.addSamples({TDouble10Vec(boost::begin(constant), boost::end(constant))}, + maths_t::CUnitWeights::singleUnit(2)); covariance = filter.marginalLikelihoodCovariance(); CPPUNIT_ASSERT_EQUAL(std::size_t(4), covariance.size()); @@ -214,14 +165,6 @@ void CMultivariateConstantPriorTest::testMarginalLikelihoodCovariance() { } void CMultivariateConstantPriorTest::testSampleMarginalLikelihood() { - LOG_DEBUG(<< "+------------------------------------------------------------" - "----+"); - LOG_DEBUG(<< "| " - "CMultivariateConstantPriorTest::testSampleMarginalLikelihood " - " |"); - LOG_DEBUG(<< "+------------------------------------------------------------" - "----+"); - // Check we get zero samples for non-informative and sample of the // constant otherwise. @@ -233,10 +176,8 @@ void CMultivariateConstantPriorTest::testSampleMarginalLikelihood() { double constant[] = {1.2, 4.1}; - filter.addSamples(COUNT_WEIGHT, - TDouble10Vec1Vec(2, TDouble10Vec(boost::begin(constant), - boost::end(constant))), - singleUnitWeight(2)); + filter.addSamples({TDouble10Vec(boost::begin(constant), boost::end(constant))}, + maths_t::CUnitWeights::singleUnit(2)); filter.sampleMarginalLikelihood(4, samples); CPPUNIT_ASSERT_EQUAL(std::size_t(4), samples.size()); @@ -247,10 +188,6 @@ void CMultivariateConstantPriorTest::testSampleMarginalLikelihood() { } void CMultivariateConstantPriorTest::testProbabilityOfLessLikelySamples() { - LOG_DEBUG(<< "+----------------------------------------------------------------------+"); - LOG_DEBUG(<< "| CMultivariateConstantPriorTest::testProbabilityOfLessLikelySamples |"); - LOG_DEBUG(<< "+----------------------------------------------------------------------+"); - // Check we get one for non-informative and the constant and zero // otherwise. @@ -265,25 +202,25 @@ void CMultivariateConstantPriorTest::testProbabilityOfLessLikelySamples() { for (std::size_t i = 0u; i < boost::size(samples); ++i) { double lb, ub; maths::CMultivariateConstantPrior::TTail10Vec tail; - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, COUNT_WEIGHT, - samples[i], singleUnitWeight(2), - lb, ub, tail); + filter.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, samples[i], + maths_t::CUnitWeights::singleUnit(2), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(1.0, lb); CPPUNIT_ASSERT_EQUAL(1.0, ub); LOG_DEBUG(<< "tail = " << core::CContainerPrinter::print(tail)); CPPUNIT_ASSERT_EQUAL(std::string("[0, 0]"), core::CContainerPrinter::print(tail)); } - filter.addSamples(COUNT_WEIGHT, samples[0], singleUnitWeight(2)); + filter.addSamples(samples[0], maths_t::CUnitWeights::singleUnit(2)); CPPUNIT_ASSERT(!filter.isNonInformative()); std::string expectedTails[] = {"[0, 0]", "[1, 2]", "[1, 2]"}; for (std::size_t i = 0u; i < boost::size(samples); ++i) { double lb, ub; maths::CMultivariateConstantPrior::TTail10Vec tail; - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, COUNT_WEIGHT, - samples[i], singleUnitWeight(2), - lb, ub, tail); + filter.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, samples[i], + maths_t::CUnitWeights::singleUnit(2), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(i == 0 ? 1.0 : 0.0, lb); CPPUNIT_ASSERT_EQUAL(i == 0 ? 1.0 : 0.0, ub); LOG_DEBUG(<< "tail = " << core::CContainerPrinter::print(tail)); @@ -292,10 +229,6 @@ void CMultivariateConstantPriorTest::testProbabilityOfLessLikelySamples() { } void CMultivariateConstantPriorTest::testPersist() { - LOG_DEBUG(<< "+-----------------------------------------------+"); - LOG_DEBUG(<< "| CMultivariateConstantPriorTest::testPersist |"); - LOG_DEBUG(<< "+-----------------------------------------------+"); - // Check persistence is idempotent. LOG_DEBUG(<< "*** Non-informative ***"); diff --git a/lib/maths/unittest/CMultivariateMultimodalPriorTest.cc b/lib/maths/unittest/CMultivariateMultimodalPriorTest.cc index f890344675..a81721c64a 100644 --- a/lib/maths/unittest/CMultivariateMultimodalPriorTest.cc +++ b/lib/maths/unittest/CMultivariateMultimodalPriorTest.cc @@ -36,12 +36,6 @@ using TMean2Accumulator = maths::CBasicStatistics::SSampleMean::TAccum using TCovariances2 = maths::CBasicStatistics::SSampleCovariances; namespace { - -const maths_t::TWeightStyleVec COUNT_WEIGHT(1, maths_t::E_SampleCountWeight); -const maths_t::TWeightStyleVec VARIANCE_WEIGHT(1, maths_t::E_SampleCountVarianceScaleWeight); -const TDouble10Vec UNIT_WEIGHT_2(2, 1.0); -const TDouble10Vec4Vec1Vec SINGLE_UNIT_WEIGHT_2(1, TDouble10Vec4Vec(1, UNIT_WEIGHT_2)); - template class CMultivariateMultimodalPriorForTest : public maths::CMultivariateMultimodalPrior { @@ -70,12 +64,11 @@ makePrior(maths_t::EDataType dataType, double decayRate = 0.0) { } void gaussianSamples(test::CRandomNumbers& rng, - std::size_t modes, - const std::size_t* n, + const TSizeVec& n, const double (*means)[2], const double (*covariances)[3], TDouble10Vec1Vec& samples) { - for (std::size_t i = 0u; i < modes; ++i) { + for (std::size_t i = 0u; i < n.size(); ++i) { TVector2 mean(means[i], means[i] + 2); TMatrix2 covariance(covariances[i], covariances[i] + 3); TDoubleVecVec samples_; @@ -157,21 +150,17 @@ std::string print(maths_t::EDataType dataType) { } void CMultivariateMultimodalPriorTest::testMultipleUpdate() { - LOG_DEBUG(<< "+--------------------------------------------------------+"); - LOG_DEBUG(<< "| CMultivariateMultimodalPriorTest::testMultipleUpdate |"); - LOG_DEBUG(<< "+--------------------------------------------------------+"); - // Test that we get the same result updating once with a vector of 100 // samples of an R.V. versus updating individually 100 times. - const std::size_t n[] = {100}; + const TSizeVec n{100}; const double means[][2] = {{10.0, 20.0}}; const double covariances[][3] = {{3.0, 1.0, 2.0}}; test::CRandomNumbers rng; TDouble10Vec1Vec samples; - gaussianSamples(rng, boost::size(n), n, means, covariances, samples); + gaussianSamples(rng, n, means, covariances, samples); const maths_t::EDataType dataTypes[] = {maths_t::E_IntegerData, maths_t::E_ContinuousData}; @@ -184,13 +173,13 @@ void CMultivariateMultimodalPriorTest::testMultipleUpdate() { maths::CSampling::seed(); for (std::size_t j = 0; j < samples.size(); ++j) { - filter1.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[j]), - SINGLE_UNIT_WEIGHT_2); + filter1.addSamples({samples[j]}, + maths_t::CUnitWeights::singleUnit(2)); } maths::CSampling::seed(); - filter2.addSamples( - COUNT_WEIGHT, samples, - TDouble10Vec4Vec1Vec(samples.size(), TDouble10Vec4Vec(1, UNIT_WEIGHT_2))); + filter2.addSamples(samples, maths_t::TDouble10VecWeightsAry1Vec( + samples.size(), + maths_t::CUnitWeights::unit(2))); LOG_DEBUG(<< "checksum 1 " << filter1.checksum()); LOG_DEBUG(<< "checksum 2 " << filter2.checksum()); @@ -204,17 +193,15 @@ void CMultivariateMultimodalPriorTest::testMultipleUpdate() { maths::CMultivariateMultimodalPrior<2> filter1(makePrior<2>(dataTypes[i])); maths::CMultivariateMultimodalPrior<2> filter2(filter1); - TDouble10Vec4Vec1Vec weights; - weights.resize(samples.size() / 2, TDouble10Vec4Vec(1, TDouble10Vec(2, 1.5))); - weights.resize(samples.size(), TDouble10Vec4Vec(1, TDouble10Vec(2, 2.0))); + maths_t::TDouble10VecWeightsAry1Vec weights; + weights.resize(samples.size() / 2, maths_t::countVarianceScaleWeight(1.5, 2)); + weights.resize(samples.size(), maths_t::countVarianceScaleWeight(2.0, 2)); maths::CSampling::seed(); for (std::size_t j = 0u; j < samples.size(); ++j) { - TDouble10Vec1Vec sample(1, samples[j]); - TDouble10Vec4Vec1Vec weight(1, weights[j]); - filter1.addSamples(VARIANCE_WEIGHT, sample, weight); + filter1.addSamples({samples[j]}, {weights[j]}); } maths::CSampling::seed(); - filter2.addSamples(VARIANCE_WEIGHT, samples, weights); + filter2.addSamples(samples, weights); LOG_DEBUG(<< "checksum 1 " << filter1.checksum()); LOG_DEBUG(<< "checksum 2 " << filter2.checksum()); @@ -223,10 +210,6 @@ void CMultivariateMultimodalPriorTest::testMultipleUpdate() { } void CMultivariateMultimodalPriorTest::testPropagation() { - LOG_DEBUG(<< "+-----------------------------------------------------+"); - LOG_DEBUG(<< "| CMultivariateMultimodalPriorTest::testPropagation |"); - LOG_DEBUG(<< "+-----------------------------------------------------+"); - // Test that propagation doesn't affect the marginal likelihood // mean and the marginal likelihood variance increases (due to // influence of the prior uncertainty) after propagation. @@ -235,14 +218,14 @@ void CMultivariateMultimodalPriorTest::testPropagation() { const double eps = 1e-3; - const std::size_t n[] = {400, 600}; + const TSizeVec n{400, 600}; const double means[][2] = {{10.0, 10.0}, {20.0, 20.0}}; const double covariances[][3] = {{8.0, 1.0, 8.0}, {20.0, -4.0, 10.0}}; test::CRandomNumbers rng; TDouble10Vec1Vec samples; - gaussianSamples(rng, boost::size(n), n, means, covariances, samples); + gaussianSamples(rng, n, means, covariances, samples); rng.random_shuffle(samples.begin(), samples.end()); @@ -250,8 +233,9 @@ void CMultivariateMultimodalPriorTest::testPropagation() { maths::CMultivariateMultimodalPrior<2> filter( makePrior<2>(maths_t::E_ContinuousData, decayRate)); - filter.addSamples(COUNT_WEIGHT, samples, - TDouble10Vec4Vec1Vec(samples.size(), TDouble10Vec4Vec(1, UNIT_WEIGHT_2))); + filter.addSamples(samples, maths_t::TDouble10VecWeightsAry1Vec( + samples.size(), + maths_t::CUnitWeights::unit(2))); double numberSamples = filter.numberSamples(); TDouble10Vec mean = filter.marginalLikelihoodMean(); @@ -292,36 +276,29 @@ void CMultivariateMultimodalPriorTest::testPropagation() { } void CMultivariateMultimodalPriorTest::testSingleMode() { - LOG_DEBUG(<< "+----------------------------------------------------+"); - LOG_DEBUG(<< "| CMultivariateMultimodalPriorTest::testSingleMode |"); - LOG_DEBUG(<< "+----------------------------------------------------+"); - // Test that we stably get one cluster. maths::CSampling::seed(); - const std::size_t n[] = {500}; + const TSizeVec n{500}; const double means[][2] = {{20.0, 20.0}}; const double covariances[][3] = {{40.0, 10.0, 20.0}}; test::CRandomNumbers rng; TDouble10Vec1Vec samples; - gaussianSamples(rng, boost::size(n), n, means, covariances, samples); + gaussianSamples(rng, n, means, covariances, samples); maths::CMultivariateMultimodalPrior<2> filter(makePrior<2>(maths_t::E_ContinuousData)); for (std::size_t i = 0; i < samples.size(); ++i) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[i]), SINGLE_UNIT_WEIGHT_2); + filter.addSamples({samples[i]}, + maths_t::CUnitWeights::singleUnit(2)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), filter.numberModes()); } } void CMultivariateMultimodalPriorTest::testMultipleModes() { - LOG_DEBUG(<< "+-------------------------------------------------------+"); - LOG_DEBUG(<< "| CMultivariateMultimodalPriorTest::testMultipleModes |"); - LOG_DEBUG(<< "+-------------------------------------------------------+"); - // We check that for data generated from multiple modes // we get something close to the generating distribution. // In particular, we test the log likelihood of the data @@ -337,12 +314,12 @@ void CMultivariateMultimodalPriorTest::testMultipleModes() { LOG_DEBUG(<< "Mixture Normals"); { - const std::size_t n[] = {400, 600}; + const TSizeVec n{400, 600}; const double means[][2] = {{10.0, 10.0}, {20.0, 20.0}}; const double covariances[][3] = {{4.0, 1.0, 4.0}, {10.0, -4.0, 6.0}}; TDouble10Vec1Vec samples; - gaussianSamples(rng, boost::size(n), n, means, covariances, samples); + gaussianSamples(rng, n, means, covariances, samples); double w[] = {n[0] / static_cast(n[0] + n[1]), n[1] / static_cast(n[0] + n[1])}; @@ -363,11 +340,13 @@ void CMultivariateMultimodalPriorTest::testMultipleModes() { maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData); filter1.addSamples( - COUNT_WEIGHT, samples, - TDouble10Vec4Vec1Vec(samples.size(), TDouble10Vec4Vec(1, UNIT_WEIGHT_2))); + samples, + maths_t::TDouble10VecWeightsAry1Vec( + samples.size(), maths_t::CUnitWeights::unit(2))); filter2.addSamples( - COUNT_WEIGHT, samples, - TDouble10Vec4Vec1Vec(samples.size(), TDouble10Vec4Vec(1, UNIT_WEIGHT_2))); + samples, + maths_t::TDouble10VecWeightsAry1Vec( + samples.size(), maths_t::CUnitWeights::unit(2))); CPPUNIT_ASSERT_EQUAL(std::size_t(2), filter1.numberModes()); @@ -379,14 +358,16 @@ void CMultivariateMultimodalPriorTest::testMultipleModes() { TDouble10Vec1Vec sample(1, samples[j]); double l1; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, - filter1.jointLogMarginalLikelihood( - COUNT_WEIGHT, sample, SINGLE_UNIT_WEIGHT_2, l1)); + CPPUNIT_ASSERT_EQUAL( + maths_t::E_FpNoErrors, + filter1.jointLogMarginalLikelihood( + sample, maths_t::CUnitWeights::singleUnit(2), l1)); loss1G.add(ll - l1); double l2; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, - filter2.jointLogMarginalLikelihood( - COUNT_WEIGHT, sample, SINGLE_UNIT_WEIGHT_2, l2)); + CPPUNIT_ASSERT_EQUAL( + maths_t::E_FpNoErrors, + filter2.jointLogMarginalLikelihood( + sample, maths_t::CUnitWeights::singleUnit(2), l2)); loss12.add(l2 - l1); } @@ -406,10 +387,6 @@ void CMultivariateMultimodalPriorTest::testMultipleModes() { } void CMultivariateMultimodalPriorTest::testSplitAndMerge() { - LOG_DEBUG(<< "+-------------------------------------------------------+"); - LOG_DEBUG(<< "| CMultivariateMultimodalPriorTest::testSplitAndMerge |"); - LOG_DEBUG(<< "+-------------------------------------------------------+"); - // Test clustering which changes over time. maths::CSampling::seed(); @@ -463,8 +440,8 @@ void CMultivariateMultimodalPriorTest::testSplitAndMerge() { LOG_DEBUG(<< "# samples = " << samples.size()); for (std::size_t j = 0u; j < samples.size(); ++j) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[j]), - TDouble10Vec4Vec1Vec(1, TDouble10Vec4Vec(1, UNIT_WEIGHT_2))); + filter.addSamples({samples[j]}, + maths_t::CUnitWeights::singleUnit(2)); //pointsToDate.push_back(samples[j]); //if (pointsToDate.size() == subplotCounts[subplot]) @@ -534,10 +511,6 @@ void CMultivariateMultimodalPriorTest::testSplitAndMerge() { } void CMultivariateMultimodalPriorTest::testMarginalLikelihood() { - LOG_DEBUG(<< "+------------------------------------------------------------+"); - LOG_DEBUG(<< "| CMultivariateMultimodalPriorTest::testMarginalLikelihood |"); - LOG_DEBUG(<< "+------------------------------------------------------------+"); - // Test that: // 1) The likelihood is normalized. // 2) E[X] w.r.t. the likelihood is equal to the predictive distribution mean. @@ -569,9 +542,9 @@ void CMultivariateMultimodalPriorTest::testMarginalLikelihood() { rng.random_shuffle(samples.begin(), samples.end()); maths::CMultivariateMultimodalPrior<2> filter(makePrior<2>(maths_t::E_ContinuousData)); - filter.addSamples(COUNT_WEIGHT, samples, - TDouble10Vec4Vec1Vec(samples.size(), - TDouble10Vec4Vec(1, UNIT_WEIGHT_2))); + filter.addSamples(samples, maths_t::TDouble10VecWeightsAry1Vec( + samples.size(), + maths_t::CUnitWeights::unit(2))); LOG_DEBUG(<< "# modes = " << filter.numberModes()); if (filter.numberModes() != 3) { continue; @@ -660,14 +633,6 @@ void CMultivariateMultimodalPriorTest::testMarginalLikelihood() { } void CMultivariateMultimodalPriorTest::testMarginalLikelihoodMean() { - LOG_DEBUG(<< "+------------------------------------------------------------" - "----+"); - LOG_DEBUG(<< "| " - "CMultivariateMultimodalPriorTest::testMarginalLikelihoodMean " - " |"); - LOG_DEBUG(<< "+------------------------------------------------------------" - "----+"); - // Test that the marginal likelihood mean is close to the sample // mean for a multimodal distribution. @@ -675,14 +640,14 @@ void CMultivariateMultimodalPriorTest::testMarginalLikelihoodMean() { const double eps = 0.05; - const std::size_t n[] = {400, 600}; + const TSizeVec n{400, 600}; const double means[][2] = {{10.0, 10.0}, {20.0, 20.0}}; const double covariances[][3] = {{8.0, 1.0, 8.0}, {20.0, -4.0, 10.0}}; test::CRandomNumbers rng; TDouble10Vec1Vec samples; - gaussianSamples(rng, boost::size(n), n, means, covariances, samples); + gaussianSamples(rng, n, means, covariances, samples); rng.random_shuffle(samples.begin(), samples.end()); @@ -691,7 +656,8 @@ void CMultivariateMultimodalPriorTest::testMarginalLikelihoodMean() { TMeanAccumulator meanError; for (std::size_t i = 0u; i < samples.size(); ++i) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec{samples[i]}, SINGLE_UNIT_WEIGHT_2); + filter.addSamples({samples[i]}, + maths_t::CUnitWeights::singleUnit(2)); expectedMean.add(TVector2(samples[i])); if (i % 10 == 0) { @@ -713,14 +679,6 @@ void CMultivariateMultimodalPriorTest::testMarginalLikelihoodMean() { } void CMultivariateMultimodalPriorTest::testMarginalLikelihoodMode() { - LOG_DEBUG(<< "+------------------------------------------------------------" - "----+"); - LOG_DEBUG(<< "| " - "CMultivariateMultimodalPriorTest::testMarginalLikelihoodMode " - " |"); - LOG_DEBUG(<< "+------------------------------------------------------------" - "----+"); - // Test that the sample mode is close to the generating distribution mode. using TMaxAccumulator = @@ -751,19 +709,20 @@ void CMultivariateMultimodalPriorTest::testMarginalLikelihoodMode() { rng.random_shuffle(samples.begin(), samples.end()); CMultivariateMultimodalPriorForTest<2> filter(makePrior<2>(maths_t::E_ContinuousData)); - filter.addSamples(COUNT_WEIGHT, samples, - TDouble10Vec4Vec1Vec(samples.size(), SINGLE_UNIT_WEIGHT_2[0])); - TDouble10Vec mode = - filter.marginalLikelihoodMode(COUNT_WEIGHT, SINGLE_UNIT_WEIGHT_2[0]); + filter.addSamples(samples, maths_t::TDouble10VecWeightsAry1Vec( + samples.size(), + maths_t::CUnitWeights::unit(2))); + TDouble10Vec mode = filter.marginalLikelihoodMode( + maths_t::CUnitWeights::unit(2)); TVector2 expectedMode; TMaxAccumulator maxLikelihood; for (std::size_t i = 0u; i < filter.modes().size(); ++i) { TDouble10Vec mi = (filter.modes())[i].s_Prior->marginalLikelihoodMode( - COUNT_WEIGHT, SINGLE_UNIT_WEIGHT_2[0]); + maths_t::CUnitWeights::unit(2)); double likelihood; - filter.jointLogMarginalLikelihood(COUNT_WEIGHT, TDouble10Vec1Vec(1, mi), - SINGLE_UNIT_WEIGHT_2, likelihood); + filter.jointLogMarginalLikelihood( + {mi}, maths_t::CUnitWeights::singleUnit(2), likelihood); if (maxLikelihood.add(likelihood)) { expectedMode = TVector2(mi); } @@ -783,10 +742,6 @@ void CMultivariateMultimodalPriorTest::testMarginalLikelihoodMode() { } void CMultivariateMultimodalPriorTest::testSampleMarginalLikelihood() { - LOG_DEBUG(<< "+------------------------------------------------------------------+"); - LOG_DEBUG(<< "| CMultivariateMultimodalPriorTest::testSampleMarginalLikelihood |"); - LOG_DEBUG(<< "+------------------------------------------------------------------+"); - // We're going to test the following properties of the sampling: // 1) That the sampled mean and covariance are close to the marginal // likelihood mean and covariance. @@ -824,8 +779,9 @@ void CMultivariateMultimodalPriorTest::testSampleMarginalLikelihood() { LOG_DEBUG(<< "# samples = " << samples.size()); maths::CMultivariateMultimodalPrior<2> filter(makePrior<2>(maths_t::E_ContinuousData)); - filter.addSamples(COUNT_WEIGHT, samples, - TDouble10Vec4Vec1Vec(samples.size(), TDouble10Vec4Vec(1, UNIT_WEIGHT_2))); + filter.addSamples(samples, maths_t::TDouble10VecWeightsAry1Vec( + samples.size(), + maths_t::CUnitWeights::unit(2))); TDouble10Vec1Vec sampled; filter.sampleMarginalLikelihood(300, sampled); @@ -881,10 +837,6 @@ void CMultivariateMultimodalPriorTest::testSampleMarginalLikelihood() { } void CMultivariateMultimodalPriorTest::testProbabilityOfLessLikelySamples() { - LOG_DEBUG(<< "+------------------------------------------------------------------------+"); - LOG_DEBUG(<< "| CMultivariateMultimodalPriorTest::testProbabilityOfLessLikelySamples |"); - LOG_DEBUG(<< "+------------------------------------------------------------------------+"); - // Test that the probability is approximately equal to the chance of drawing // a less likely sample from generating distribution. @@ -930,7 +882,8 @@ void CMultivariateMultimodalPriorTest::testProbabilityOfLessLikelySamples() { CMultivariateMultimodalPriorForTest<2> filter(makePrior<2>(maths_t::E_ContinuousData)); for (std::size_t k = 0u; k < samples.size(); ++k) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[k]), SINGLE_UNIT_WEIGHT_2); + filter.addSamples({samples[k]}, + maths_t::CUnitWeights::singleUnit(2)); } LOG_DEBUG(<< "# modes = " << filter.numberModes()); @@ -952,9 +905,8 @@ void CMultivariateMultimodalPriorTest::testProbabilityOfLessLikelySamples() { double lb, ub; maths::CMultivariatePrior::TTail10Vec tail; filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, COUNT_WEIGHT, - TDouble10Vec1Vec(1, x.toVector()), - SINGLE_UNIT_WEIGHT_2, lb, ub, tail); + maths_t::E_TwoSided, {x.toVector()}, + maths_t::CUnitWeights::singleUnit(2), lb, ub, tail); double pa = (lb + ub) / 2.0; LOG_DEBUG(<< " p(" << x << "), actual = " << pa << ", expected = " << px); @@ -977,26 +929,14 @@ void CMultivariateMultimodalPriorTest::testProbabilityOfLessLikelySamples() { } void CMultivariateMultimodalPriorTest::testIntegerData() { - LOG_DEBUG(<< "+-----------------------------------------------------+"); - LOG_DEBUG(<< "| CMultivariateMultimodalPriorTest::testIntegerData |"); - LOG_DEBUG(<< "+-----------------------------------------------------+"); - // TODO } void CMultivariateMultimodalPriorTest::testLowVariationData() { - LOG_DEBUG(<< "+----------------------------------------------------------+"); - LOG_DEBUG(<< "| CMultivariateMultimodalPriorTest::testLowVariationData |"); - LOG_DEBUG(<< "+----------------------------------------------------------+"); - // TODO } void CMultivariateMultimodalPriorTest::testLatLongData() { - LOG_DEBUG(<< "+-----------------------------------------------------+"); - LOG_DEBUG(<< "| CMultivariateMultimodalPriorTest::testLatLongData |"); - LOG_DEBUG(<< "+-----------------------------------------------------+"); - using TTimeDoubleVecPr = std::pair; using TTimeDoubleVecPrVec = std::vector; @@ -1022,8 +962,8 @@ void CMultivariateMultimodalPriorTest::testLatLongData() { *modePrior); for (std::size_t i = 0u; i < timeseries.size(); ++i) { - filter->addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, timeseries[i].second), - SINGLE_UNIT_WEIGHT_2); + filter->addSamples({timeseries[i].second}, + maths_t::CUnitWeights::singleUnit(2)); filter->propagateForwardsByTime(1.0); } LOG_DEBUG(<< filter->print()); @@ -1050,22 +990,18 @@ void CMultivariateMultimodalPriorTest::testLatLongData() { } void CMultivariateMultimodalPriorTest::testPersist() { - LOG_DEBUG(<< "+-------------------------------------------------+"); - LOG_DEBUG(<< "| CMultivariateMultimodalPriorTest::testPersist |"); - LOG_DEBUG(<< "+-------------------------------------------------+"); - // Check that persist/restore is idempotent. maths::CSampling::seed(); - std::size_t n[] = {100, 100}; + const TSizeVec n{100, 100}; const double means[][2] = {{10.0, 20.0}, {100.0, 30.0}}; const double covariances[][3] = {{3.0, 1.0, 2.0}, {60.0, 20.0, 70.0}}; test::CRandomNumbers rng; TDouble10Vec1Vec samples; - gaussianSamples(rng, boost::size(n), n, means, covariances, samples); + gaussianSamples(rng, n, means, covariances, samples); rng.random_shuffle(samples.begin(), samples.end()); maths_t::EDataType dataType = maths_t::E_ContinuousData; @@ -1074,7 +1010,8 @@ void CMultivariateMultimodalPriorTest::testPersist() { maths::CMultivariateMultimodalPrior<2> origFilter(makePrior<2>(dataType)); for (std::size_t i = 0u; i < samples.size(); ++i) { - origFilter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[i]), SINGLE_UNIT_WEIGHT_2); + origFilter.addSamples({samples[i]}, + maths_t::CUnitWeights::singleUnit(2)); } uint64_t checksum = origFilter.checksum(); diff --git a/lib/maths/unittest/CMultivariateNormalConjugateTest.cc b/lib/maths/unittest/CMultivariateNormalConjugateTest.cc index 354bb03f75..f952549c2d 100644 --- a/lib/maths/unittest/CMultivariateNormalConjugateTest.cc +++ b/lib/maths/unittest/CMultivariateNormalConjugateTest.cc @@ -29,13 +29,6 @@ using TDoubleDoublePrVec = std::vector; using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; namespace { - -const maths_t::TWeightStyleVec COUNT_WEIGHT(1, maths_t::E_SampleCountWeight); -const maths_t::TWeightStyleVec VARIANCE_WEIGHT(1, maths_t::E_SampleCountVarianceScaleWeight); -const TDouble10Vec4Vec UNIT_WEIGHT_2(1, TDouble10Vec(2, 1.0)); -const TDouble10Vec4Vec1Vec - SINGLE_UNIT_WEIGHT_2(1, TDouble10Vec4Vec(1, TDouble10Vec(2, 1.0))); - void empiricalProbabilityOfLessLikelySamples(const TDoubleVec& mean, const TDoubleVecVec& covariance, TDoubleVec& result) { @@ -83,10 +76,6 @@ void gaussianSamples(test::CRandomNumbers& rng, } void CMultivariateNormalConjugateTest::testMultipleUpdate() { - LOG_DEBUG(<< "+--------------------------------------------------------+"); - LOG_DEBUG(<< "| CMultivariateNormalConjugateTest::testMultipleUpdate |"); - LOG_DEBUG(<< "+--------------------------------------------------------+"); - maths::CSampling::seed(); const maths_t::EDataType dataTypes[] = {maths_t::E_IntegerData, maths_t::E_ContinuousData}; @@ -111,11 +100,12 @@ void CMultivariateNormalConjugateTest::testMultipleUpdate() { maths::CMultivariateNormalConjugate<2> filter2(filter1); for (std::size_t j = 0u; j < samples.size(); ++j) { - filter1.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[j]), - SINGLE_UNIT_WEIGHT_2); + filter1.addSamples({samples[j]}, + maths_t::CUnitWeights::singleUnit(2)); } - TDouble10Vec4Vec1Vec weights(samples.size(), UNIT_WEIGHT_2); - filter2.addSamples(COUNT_WEIGHT, samples, weights); + maths_t::TDouble10VecWeightsAry1Vec weights( + samples.size(), maths_t::CUnitWeights::unit(2)); + filter2.addSamples(samples, weights); CPPUNIT_ASSERT(filter1.equalTolerance( filter2, maths::CToleranceTypes::E_AbsoluteTolerance, 1e-5)); @@ -129,16 +119,13 @@ void CMultivariateNormalConjugateTest::testMultipleUpdate() { maths::CMultivariateNormalConjugate<2>::nonInformativePrior(dataTypes[i])); maths::CMultivariateNormalConjugate<2> filter2(filter1); - TDouble10Vec4Vec1Vec weights; - weights.resize(samples.size() / 2, TDouble10Vec4Vec(1, TDouble10Vec(2, 1.5))); - weights.resize(samples.size(), TDouble10Vec4Vec(1, TDouble10Vec(2, 2.0))); - + maths_t::TDouble10VecWeightsAry1Vec weights; + weights.resize(samples.size() / 2, maths_t::countVarianceScaleWeight(1.5, 2)); + weights.resize(samples.size(), maths_t::countVarianceScaleWeight(2.0, 2)); for (std::size_t j = 0u; j < samples.size(); ++j) { - TDouble10Vec1Vec sample(1, samples[j]); - TDouble10Vec4Vec1Vec weight(1, weights[j]); - filter1.addSamples(VARIANCE_WEIGHT, sample, weight); + filter1.addSamples({samples[j]}, {weights[j]}); } - filter2.addSamples(VARIANCE_WEIGHT, samples, weights); + filter2.addSamples(samples, weights); CPPUNIT_ASSERT(filter1.equalTolerance( filter2, maths::CToleranceTypes::E_RelativeTolerance, 1e-5)); @@ -157,13 +144,11 @@ void CMultivariateNormalConjugateTest::testMultipleUpdate() { double x = 3.0; std::size_t count = 10; for (std::size_t j = 0u; j < count; ++j) { - filter1.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, TDouble10Vec(2, x)), - SINGLE_UNIT_WEIGHT_2); + filter1.addSamples({TDouble10Vec(2, x)}, + maths_t::CUnitWeights::singleUnit(2)); } - TDouble10Vec1Vec sample(1, TDouble10Vec(2, x)); - TDouble10Vec4Vec1Vec weight( - 1, TDouble10Vec4Vec(1, TDouble10Vec(2, static_cast(count)))); - filter2.addSamples(COUNT_WEIGHT, sample, weight); + filter2.addSamples({TDouble10Vec(2, x)}, + {maths_t::countWeight(static_cast(count), 2)}); CPPUNIT_ASSERT(filter1.equalTolerance( filter2, maths::CToleranceTypes::E_AbsoluteTolerance, 1e-5)); @@ -171,10 +156,6 @@ void CMultivariateNormalConjugateTest::testMultipleUpdate() { } void CMultivariateNormalConjugateTest::testPropagation() { - LOG_DEBUG(<< "+-----------------------------------------------------+"); - LOG_DEBUG(<< "| CMultivariateNormalConjugateTest::testPropagation |"); - LOG_DEBUG(<< "+-----------------------------------------------------+"); - // Test that propagation doesn't affect the marginal likelihood // mean and expected precision. @@ -198,8 +179,9 @@ void CMultivariateNormalConjugateTest::testPropagation() { maths::CMultivariateNormalConjugate<2> filter( maths::CMultivariateNormalConjugate<2>::nonInformativePrior(dataTypes[i], 0.1)); - TDouble10Vec4Vec1Vec weights(samples.size(), UNIT_WEIGHT_2); - filter.addSamples(COUNT_WEIGHT, samples, weights); + maths_t::TDouble10VecWeightsAry1Vec weights( + samples.size(), maths_t::CUnitWeights::unit(2)); + filter.addSamples(samples, weights); TVector2 initialMean = filter.mean(); TMatrix2 initialPrecision = filter.precision(); @@ -221,10 +203,6 @@ void CMultivariateNormalConjugateTest::testPropagation() { } void CMultivariateNormalConjugateTest::testMeanVectorEstimation() { - LOG_DEBUG(<< "+--------------------------------------------------------------+"); - LOG_DEBUG(<< "| CMultivariateNormalConjugateTest::testMeanVectorEstimation |"); - LOG_DEBUG(<< "+--------------------------------------------------------------+"); - // We are going to test that we correctly estimate a distribution // for the mean of a multivariate normal by checking that the true // mean lies in various confidence intervals the correct percentage @@ -266,8 +244,8 @@ void CMultivariateNormalConjugateTest::testMeanVectorEstimation() { maths::CMultivariateNormalConjugate<2>::nonInformativePrior( maths_t::E_ContinuousData, decayRates[i])); for (std::size_t j = 0u; j < samples.size(); ++j) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[j]), - SINGLE_UNIT_WEIGHT_2); + filter.addSamples({samples[j]}, + maths_t::CUnitWeights::singleUnit(2)); filter.propagateForwardsByTime(1.0); } @@ -318,10 +296,6 @@ void CMultivariateNormalConjugateTest::testMeanVectorEstimation() { } void CMultivariateNormalConjugateTest::testPrecisionMatrixEstimation() { - LOG_DEBUG(<< "+-------------------------------------------------------------------+"); - LOG_DEBUG(<< "| CMultivariateNormalConjugateTest::testPrecisionMatrixEstimation |"); - LOG_DEBUG(<< "+-------------------------------------------------------------------+"); - // We are going to test that we correctly estimate a distribution // for the precision of a multivariate normal by checking that the // true precision lies in various confidence intervals the correct @@ -371,8 +345,8 @@ void CMultivariateNormalConjugateTest::testPrecisionMatrixEstimation() { maths::CMultivariateNormalConjugate<2>::nonInformativePrior( maths_t::E_ContinuousData, decayRates[i])); for (std::size_t j = 0u; j < samples.size(); ++j) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[j]), - SINGLE_UNIT_WEIGHT_2); + filter.addSamples({samples[j]}, + maths_t::CUnitWeights::singleUnit(2)); filter.propagateForwardsByTime(1.0); } @@ -380,7 +354,7 @@ void CMultivariateNormalConjugateTest::testPrecisionMatrixEstimation() { std::size_t n = 500; TMatrix2Vec precisionSamples; filter.randomSamplePrecisionMatrixPrior(n, precisionSamples); - TDouble10Vec4Vec elementSamples(3); + TDouble10Vec10Vec elementSamples(3); for (std::size_t j = 0; j < precisionSamples.size(); ++j) { elementSamples[0].push_back(precisionSamples[j](0, 0)); elementSamples[1].push_back(precisionSamples[j](1, 0)); @@ -430,10 +404,6 @@ void CMultivariateNormalConjugateTest::testPrecisionMatrixEstimation() { } void CMultivariateNormalConjugateTest::testMarginalLikelihood() { - LOG_DEBUG(<< "+------------------------------------------------------------+"); - LOG_DEBUG(<< "| CMultivariateNormalConjugateTest::testMarginalLikelihood |"); - LOG_DEBUG(<< "+------------------------------------------------------------+"); - // Test that: // 1) The likelihood is normalized. // 2) E[X] w.r.t. the likelihood is equal to the predictive distribution mean. @@ -474,7 +444,8 @@ void CMultivariateNormalConjugateTest::testMarginalLikelihood() { TMeanAccumulator meanCovarianceError; for (std::size_t i = 0u; i < samples.size(); ++i) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[i]), SINGLE_UNIT_WEIGHT_2); + filter.addSamples({samples[i]}, + maths_t::CUnitWeights::singleUnit(2)); if (!filter.isNonInformative()) { TDouble10Vec m = filter.marginalLikelihoodMean(); @@ -552,14 +523,6 @@ void CMultivariateNormalConjugateTest::testMarginalLikelihood() { } void CMultivariateNormalConjugateTest::testMarginalLikelihoodMode() { - LOG_DEBUG(<< "+------------------------------------------------------------" - "----+"); - LOG_DEBUG(<< "| " - "CMultivariateNormalConjugateTest::testMarginalLikelihoodMode " - " |"); - LOG_DEBUG(<< "+------------------------------------------------------------" - "----+"); - // Test that the marginal likelihood mode is at a stationary maximum // of the likelihood function. @@ -574,11 +537,13 @@ void CMultivariateNormalConjugateTest::testMarginalLikelihoodMode() { maths::CMultivariateNormalConjugate<2> filter( maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData)); for (std::size_t i = 0u; i < samples.size(); ++i) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[i]), SINGLE_UNIT_WEIGHT_2); + filter.addSamples({samples[i]}, + maths_t::CUnitWeights::singleUnit(2)); } LOG_DEBUG(<< "prior = " << filter.print()); - TDouble10Vec mode = filter.marginalLikelihoodMode(COUNT_WEIGHT, UNIT_WEIGHT_2); + TDouble10Vec mode = + filter.marginalLikelihoodMode(maths_t::CUnitWeights::unit(2)); TDoubleVec epsilons; rng.generateUniformSamples(-0.01, 0.01, 10, epsilons); @@ -597,12 +562,12 @@ void CMultivariateNormalConjugateTest::testMarginalLikelihoodMode() { norm = std::sqrt(norm); double llm, ll, llp; - filter.jointLogMarginalLikelihood(COUNT_WEIGHT, modeMinusEps, - SINGLE_UNIT_WEIGHT_2, llm); - filter.jointLogMarginalLikelihood(COUNT_WEIGHT, TDouble10Vec1Vec(1, mode), - SINGLE_UNIT_WEIGHT_2, ll); - filter.jointLogMarginalLikelihood(COUNT_WEIGHT, modePlusEps, - SINGLE_UNIT_WEIGHT_2, llp); + filter.jointLogMarginalLikelihood( + modeMinusEps, maths_t::CUnitWeights::singleUnit(2), llm); + filter.jointLogMarginalLikelihood( + {mode}, maths_t::CUnitWeights::singleUnit(2), ll); + filter.jointLogMarginalLikelihood( + modePlusEps, maths_t::CUnitWeights::singleUnit(2), llp); double gradient = std::fabs(std::exp(llp) - std::exp(llm)) / norm; LOG_DEBUG(<< "gradient = " << gradient); CPPUNIT_ASSERT(gradient < 1e-6); @@ -611,10 +576,6 @@ void CMultivariateNormalConjugateTest::testMarginalLikelihoodMode() { } void CMultivariateNormalConjugateTest::testSampleMarginalLikelihood() { - LOG_DEBUG(<< "+------------------------------------------------------------------+"); - LOG_DEBUG(<< "| CMultivariateNormalConjugateTest::testSampleMarginalLikelihood |"); - LOG_DEBUG(<< "+------------------------------------------------------------------+"); - // We're going to test three properties of the sampling: // 1) That the sample mean is equal to the marginal likelihood mean. // 2) The sample variance is close to the marginal likelihood variance. @@ -652,7 +613,8 @@ void CMultivariateNormalConjugateTest::testSampleMarginalLikelihood() { core::CContainerPrinter::print(resamples[0])); } - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[i]), SINGLE_UNIT_WEIGHT_2); + filter.addSamples({samples[i]}, + maths_t::CUnitWeights::singleUnit(2)); } TDoubleVec p; @@ -689,9 +651,8 @@ void CMultivariateNormalConjugateTest::testSampleMarginalLikelihood() { TDoubleVec sampleProbabilities; for (std::size_t j = 0u; j < resamples.size(); ++j) { double ll; - filter.jointLogMarginalLikelihood(COUNT_WEIGHT, - TDouble10Vec1Vec(1, resamples[j]), - SINGLE_UNIT_WEIGHT_2, ll); + filter.jointLogMarginalLikelihood( + {resamples[j]}, maths_t::CUnitWeights::singleUnit(2), ll); sampleProbabilities.push_back( static_cast(std::lower_bound(p.begin(), p.end(), ll) - p.begin()) / static_cast(p.size())); @@ -706,7 +667,8 @@ void CMultivariateNormalConjugateTest::testSampleMarginalLikelihood() { pAbsError.add(error); pRelError.add(error / expectedProbability); } - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[i]), SINGLE_UNIT_WEIGHT_2); + filter.addSamples({samples[i]}, + maths_t::CUnitWeights::singleUnit(2)); } LOG_DEBUG(<< "pAbsError = " << maths::CBasicStatistics::mean(pAbsError)); @@ -716,10 +678,6 @@ void CMultivariateNormalConjugateTest::testSampleMarginalLikelihood() { } void CMultivariateNormalConjugateTest::testProbabilityOfLessLikelySamples() { - LOG_DEBUG(<< "+------------------------------------------------------------------------+"); - LOG_DEBUG(<< "| CMultivariateNormalConjugateTest::testProbabilityOfLessLikelySamples |"); - LOG_DEBUG(<< "+------------------------------------------------------------------------+"); - // Test that the probability is approximately equal to the chance of drawing // a less likely sample from generating distribution. @@ -755,8 +713,8 @@ void CMultivariateNormalConjugateTest::testProbabilityOfLessLikelySamples() { maths::CMultivariateNormalConjugate<2>::nonInformativePrior( maths_t::E_ContinuousData)); for (std::size_t k = 0u; k < samples.size(); ++k) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[k]), - SINGLE_UNIT_WEIGHT_2); + filter.addSamples({samples[k]}, + maths_t::CUnitWeights::singleUnit(2)); } TDoubleVec p; @@ -778,9 +736,8 @@ void CMultivariateNormalConjugateTest::testProbabilityOfLessLikelySamples() { double lb, ub; maths::CMultivariatePrior::TTail10Vec tail; filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, COUNT_WEIGHT, - TDouble10Vec1Vec(1, x.toVector()), - SINGLE_UNIT_WEIGHT_2, lb, ub, tail); + maths_t::E_TwoSided, {x.toVector()}, + maths_t::CUnitWeights::singleUnit(2), lb, ub, tail); double pa = (lb + ub) / 2.0; LOG_DEBUG(<< " p(" << x << "), actual = " << pa << ", expected = " << px); @@ -803,10 +760,6 @@ void CMultivariateNormalConjugateTest::testProbabilityOfLessLikelySamples() { } void CMultivariateNormalConjugateTest::testIntegerData() { - LOG_DEBUG(<< "+-----------------------------------------------------+"); - LOG_DEBUG(<< "| CMultivariateNormalConjugateTest::testIntegerData |"); - LOG_DEBUG(<< "+-----------------------------------------------------+"); - // If the data are discrete then we approximate the discrete distribution // by saying it is uniform on the intervals [n,n+1] for each integral n. // The idea of this test is to check that the inferred model agrees in the @@ -848,9 +801,11 @@ void CMultivariateNormalConjugateTest::testIntegerData() { for (std::size_t k = 0u; k < n; ++k) { TVector2 x(samples[k]); TDouble10Vec1Vec sample(1, x.toVector()); - filter1.addSamples(COUNT_WEIGHT, sample, SINGLE_UNIT_WEIGHT_2); + filter1.addSamples( + sample, maths_t::CUnitWeights::singleUnit(2)); sample[0] = (x + TVector2(uniform[k])).toVector(); - filter2.addSamples(COUNT_WEIGHT, sample, SINGLE_UNIT_WEIGHT_2); + filter2.addSamples( + sample, maths_t::CUnitWeights::singleUnit(2)); } CPPUNIT_ASSERT(filter1.equalTolerance( @@ -864,14 +819,14 @@ void CMultivariateNormalConjugateTest::testIntegerData() { TDouble10Vec1Vec sample(1, x.toVector()); double ll1; - filter1.jointLogMarginalLikelihood(COUNT_WEIGHT, sample, - SINGLE_UNIT_WEIGHT_2, ll1); + filter1.jointLogMarginalLikelihood( + sample, maths_t::CUnitWeights::singleUnit(2), ll1); meanLogLikelihood1.add(-ll1); sample[0] = (x + TVector2(uniform[k])).toVector(); double ll2; - filter2.jointLogMarginalLikelihood(COUNT_WEIGHT, sample, - SINGLE_UNIT_WEIGHT_2, ll2); + filter2.jointLogMarginalLikelihood( + sample, maths_t::CUnitWeights::singleUnit(2), ll2); meanLogLikelihood2.add(-ll2); } @@ -887,16 +842,12 @@ void CMultivariateNormalConjugateTest::testIntegerData() { } void CMultivariateNormalConjugateTest::testLowVariationData() { - LOG_DEBUG(<< "+----------------------------------------------------------+"); - LOG_DEBUG(<< "| CMultivariateNormalConjugateTest::testLowVariationData |"); - LOG_DEBUG(<< "+----------------------------------------------------------+"); - { maths::CMultivariateNormalConjugate<2> filter( maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_IntegerData)); for (std::size_t i = 0u; i < 100; ++i) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, TDouble10Vec(2, 430.0)), - SINGLE_UNIT_WEIGHT_2); + filter.addSamples({TDouble10Vec(2, 430.0)}, + maths_t::CUnitWeights::singleUnit(2)); } TDouble10Vec10Vec covariances = filter.marginalLikelihoodCovariance(); @@ -908,8 +859,8 @@ void CMultivariateNormalConjugateTest::testLowVariationData() { maths::CMultivariateNormalConjugate<2> filter( maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData)); for (std::size_t i = 0u; i < 100; ++i) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, TDouble10Vec(2, 430.0)), - SINGLE_UNIT_WEIGHT_2); + filter.addSamples({TDouble10Vec(2, 430.0)}, + maths_t::CUnitWeights::singleUnit(2)); } TDouble10Vec10Vec covariances = filter.marginalLikelihoodCovariance(); @@ -921,10 +872,6 @@ void CMultivariateNormalConjugateTest::testLowVariationData() { } void CMultivariateNormalConjugateTest::testPersist() { - LOG_DEBUG(<< "+-------------------------------------------------+"); - LOG_DEBUG(<< "| CMultivariateNormalConjugateTest::testPersist |"); - LOG_DEBUG(<< "+-------------------------------------------------+"); - // Check that persist/restore is idempotent. const double mean[] = {10.0, 20.0}; @@ -940,7 +887,8 @@ void CMultivariateNormalConjugateTest::testPersist() { maths::CMultivariateNormalConjugate<2> origFilter( maths::CMultivariateNormalConjugate<2>::nonInformativePrior(dataType)); for (std::size_t i = 0u; i < samples.size(); ++i) { - origFilter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[i]), SINGLE_UNIT_WEIGHT_2); + origFilter.addSamples({samples[i]}, + maths_t::CUnitWeights::singleUnit(2)); } double decayRate = origFilter.decayRate(); uint64_t checksum = origFilter.checksum(); @@ -979,10 +927,6 @@ void CMultivariateNormalConjugateTest::testPersist() { } void CMultivariateNormalConjugateTest::calibrationExperiment() { - LOG_DEBUG(<< "+-----------------------------------------------------------+"); - LOG_DEBUG(<< "| CMultivariateNormalConjugateTest::calibrationExperiment |"); - LOG_DEBUG(<< "+-----------------------------------------------------------+"); - using TVector10 = maths::CVectorNx1; using TMatrix10 = maths::CSymmetricMatrixNxN; @@ -1027,7 +971,8 @@ void CMultivariateNormalConjugateTest::calibrationExperiment() { TDouble10Vec1Vec sample(1, TDouble10Vec(2)); sample[0][0] = samples[i][indices[j][0]]; sample[0][1] = samples[i][indices[j][1]]; - filters[j].addSamples(COUNT_WEIGHT, sample, SINGLE_UNIT_WEIGHT_2); + filters[j].addSamples( + sample, maths_t::CUnitWeights::singleUnit(2)); } } @@ -1043,9 +988,9 @@ void CMultivariateNormalConjugateTest::calibrationExperiment() { sample[0][1] = samples[i][indices[j][1]]; double lb, ub; maths::CMultivariatePrior::TTail10Vec tail; - filters[j].probabilityOfLessLikelySamples(maths_t::E_TwoSided, COUNT_WEIGHT, - sample, SINGLE_UNIT_WEIGHT_2, - lb, ub, tail); + filters[j].probabilityOfLessLikelySamples( + maths_t::E_TwoSided, sample, + maths_t::CUnitWeights::singleUnit(2), lb, ub, tail); p[j].push_back((lb + ub) / 2.0); mpi = std::min(mpi, (lb + ub) / 2.0); epi.add((lb + ub) / 2.0, 0.5); @@ -1083,10 +1028,6 @@ void CMultivariateNormalConjugateTest::calibrationExperiment() { } void CMultivariateNormalConjugateTest::dataGenerator() { - LOG_DEBUG(<< "+---------------------------------------------------+"); - LOG_DEBUG(<< "| CMultivariateNormalConjugateTest::dataGenerator |"); - LOG_DEBUG(<< "+---------------------------------------------------+"); - const double means[][2] = {{10.0, 20.0}, {30.0, 25.0}, {50.0, 5.0}, {100.0, 50.0}}; const double covariances[][3] = { {3.0, 2.0, 2.0}, {6.0, -4.0, 5.0}, {4.0, 1.0, 3.0}, {20.0, -12.0, 12.0}}; diff --git a/lib/maths/unittest/CMultivariateOneOfNPriorTest.cc b/lib/maths/unittest/CMultivariateOneOfNPriorTest.cc index acbea2ca37..7eabeba6a5 100644 --- a/lib/maths/unittest/CMultivariateOneOfNPriorTest.cc +++ b/lib/maths/unittest/CMultivariateOneOfNPriorTest.cc @@ -34,15 +34,11 @@ namespace { using TDoubleVec = std::vector; using TDoubleVecVec = std::vector; +using TSizeVec = std::vector; using TPriorPtr = maths::CMultivariateOneOfNPrior::TPriorPtr; using TPriorPtrVec = maths::CMultivariateOneOfNPrior::TPriorPtrVec; using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; -const maths_t::TWeightStyleVec COUNT_WEIGHT(1, maths_t::E_SampleCountWeight); -const maths_t::TWeightStyleVec VARIANCE_WEIGHT(1, maths_t::E_SampleCountVarianceScaleWeight); -const TDouble10Vec4Vec UNIT_WEIGHT_2(1, TDouble10Vec(2, 1.0)); -const TDouble10Vec4Vec1Vec SINGLE_UNIT_WEIGHT_2(1, UNIT_WEIGHT_2); - class CMinusLogLikelihood : public maths::CGradientDescent::CFunction { public: CMinusLogLikelihood(const maths::CMultivariateOneOfNPrior& prior) @@ -50,9 +46,9 @@ class CMinusLogLikelihood : public maths::CGradientDescent::CFunction { bool operator()(const maths::CGradientDescent::TVector& x, double& result) const { if (m_Prior->jointLogMarginalLikelihood( - COUNT_WEIGHT, - TDouble10Vec1Vec(1, TDouble10Vec(x.toVector())), - SINGLE_UNIT_WEIGHT_2, result) == maths_t::E_FpNoErrors) { + {x.toVector()}, + maths_t::CUnitWeights::singleUnit(2), + result) == maths_t::E_FpNoErrors) { result = -result; return true; } @@ -78,20 +74,18 @@ template maths::CMultivariateOneOfNPrior makeOneOfN(maths_t::EDataType dataType, double decayRate = 0.0) { TPriorPtrVec priors; - priors.push_back(TPriorPtr( - maths::CMultivariateNormalConjugate::nonInformativePrior(dataType, decayRate) - .clone())); - priors.push_back(TPriorPtr(makeMultimodal(dataType, decayRate).clone())); + priors.emplace_back(maths::CMultivariateNormalConjugate::nonInformativePrior(dataType, decayRate) + .clone()); + priors.emplace_back(makeMultimodal(dataType, decayRate).clone()); return maths::CMultivariateOneOfNPrior(N, priors, dataType, decayRate); } void gaussianSamples(test::CRandomNumbers& rng, - std::size_t modes, - const std::size_t* n, + const TSizeVec& n, const double (*means)[2], const double (*covariances)[3], TDouble10Vec1Vec& samples) { - for (std::size_t i = 0u; i < modes; ++i) { + for (std::size_t i = 0u; i < n.size(); ++i) { TVector2 mean(means[i], means[i] + 2); TMatrix2 covariance(covariances[i], covariances[i] + 3); TDoubleVecVec samples_; @@ -126,10 +120,6 @@ std::string print(maths_t::EDataType dataType) { } void CMultivariateOneOfNPriorTest::testMultipleUpdate() { - LOG_DEBUG(<< "+----------------------------------------------------+"); - LOG_DEBUG(<< "| CMultivariateOneOfNPriorTest::testMultipleUpdate |"); - LOG_DEBUG(<< "+----------------------------------------------------+"); - // Test that we get the same result updating once with a vector of 100 // samples of an R.V. versus updating individually 100 times. @@ -170,19 +160,18 @@ void CMultivariateOneOfNPriorTest::testMultipleUpdate() { maths::CMultivariateOneOfNPrior filter2(filter1); for (std::size_t j = 0u; j < seedSamples.size(); ++j) { - TDouble10Vec1Vec sample(1, seedSamples[j]); - TDouble10Vec4Vec1Vec weight(1, TDouble10Vec4Vec(1, TDouble10Vec(2, 1.0))); - filter1.addSamples(COUNT_WEIGHT, sample, weight); - filter2.addSamples(COUNT_WEIGHT, sample, weight); + filter1.addSamples({seedSamples[j]}, + maths_t::CUnitWeights::singleUnit(2)); + filter2.addSamples({seedSamples[j]}, + maths_t::CUnitWeights::singleUnit(2)); } for (std::size_t j = 0u; j < samples.size(); ++j) { - TDouble10Vec1Vec sample(1, samples[j]); - TDouble10Vec4Vec1Vec weight(1, TDouble10Vec4Vec(1, TDouble10Vec(2, 1.0))); - filter1.addSamples(COUNT_WEIGHT, sample, weight); + filter1.addSamples({samples[j]}, + maths_t::CUnitWeights::singleUnit(2)); } - TDouble10Vec4Vec1Vec weights(samples.size(), - TDouble10Vec4Vec(1, TDouble10Vec(2, 1.0))); - filter2.addSamples(COUNT_WEIGHT, samples, weights); + maths_t::TDouble10VecWeightsAry1Vec weights( + samples.size(), maths_t::CUnitWeights::unit(2)); + filter2.addSamples(samples, weights); LOG_DEBUG(<< "checksum 1 " << filter1.checksum()); LOG_DEBUG(<< "checksum 2 " << filter2.checksum()); @@ -198,19 +187,16 @@ void CMultivariateOneOfNPriorTest::testMultipleUpdate() { for (std::size_t j = 0u; j < seedSamples.size(); ++j) { TDouble10Vec1Vec sample(1, seedSamples[j]); - TDouble10Vec4Vec1Vec weight(1, TDouble10Vec4Vec(1, TDouble10Vec(2, 1.0))); - filter1.addSamples(COUNT_WEIGHT, sample, weight); - filter2.addSamples(COUNT_WEIGHT, sample, weight); + filter1.addSamples(sample, maths_t::CUnitWeights::singleUnit(2)); + filter2.addSamples(sample, maths_t::CUnitWeights::singleUnit(2)); } - TDouble10Vec4Vec1Vec weights; - weights.resize(samples.size() / 2, TDouble10Vec4Vec(1, TDouble10Vec(2, 1.5))); - weights.resize(samples.size(), TDouble10Vec4Vec(1, TDouble10Vec(2, 2.0))); + maths_t::TDouble10VecWeightsAry1Vec weights; + weights.resize(samples.size() / 2, maths_t::countVarianceScaleWeight(1.5, 2)); + weights.resize(samples.size(), maths_t::countVarianceScaleWeight(2.0, 2)); for (std::size_t j = 0u; j < samples.size(); ++j) { - TDouble10Vec1Vec sample(1, samples[j]); - TDouble10Vec4Vec1Vec weight(1, weights[j]); - filter1.addSamples(VARIANCE_WEIGHT, sample, weight); + filter1.addSamples({samples[j]}, {weights[j]}); } - filter2.addSamples(VARIANCE_WEIGHT, samples, weights); + filter2.addSamples(samples, weights); LOG_DEBUG(<< "checksum 1 " << filter1.checksum()); LOG_DEBUG(<< "checksum 2 " << filter2.checksum()); @@ -219,10 +205,6 @@ void CMultivariateOneOfNPriorTest::testMultipleUpdate() { } void CMultivariateOneOfNPriorTest::testPropagation() { - LOG_DEBUG(<< "+-------------------------------------------------+"); - LOG_DEBUG(<< "| CMultivariateOneOfNPriorTest::testPropagation |"); - LOG_DEBUG(<< "+-------------------------------------------------+"); - // Test that propagation doesn't affect the marginal likelihood // mean and the marginal likelihood variance increases (due to // influence of the prior uncertainty) after propagation. @@ -231,14 +213,14 @@ void CMultivariateOneOfNPriorTest::testPropagation() { const double eps = 2e-3; - const std::size_t n[] = {400, 600}; + const TSizeVec n{400, 600}; const double means[][2] = {{10.0, 10.0}, {20.0, 20.0}}; const double covariances[][3] = {{8.0, 1.0, 8.0}, {20.0, -4.0, 10.0}}; test::CRandomNumbers rng; TDouble10Vec1Vec samples; - gaussianSamples(rng, boost::size(n), n, means, covariances, samples); + gaussianSamples(rng, n, means, covariances, samples); rng.random_shuffle(samples.begin(), samples.end()); LOG_DEBUG(<< "# samples = " << samples.size()); @@ -246,8 +228,8 @@ void CMultivariateOneOfNPriorTest::testPropagation() { maths::CMultivariateOneOfNPrior filter(makeOneOfN<2>(maths_t::E_ContinuousData, decayRate)); for (std::size_t i = 0u; i < samples.size(); ++i) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[i]), - TDouble10Vec4Vec1Vec(1, UNIT_WEIGHT_2)); + filter.addSamples({samples[i]}, + maths_t::CUnitWeights::singleUnit(2)); } double numberSamples = filter.numberSamples(); @@ -295,10 +277,6 @@ void CMultivariateOneOfNPriorTest::testPropagation() { } void CMultivariateOneOfNPriorTest::testWeightUpdate() { - LOG_DEBUG(<< "+--------------------------------------------------+"); - LOG_DEBUG(<< "| CMultivariateOneOfNPriorTest::testWeightUpdate |"); - LOG_DEBUG(<< "+--------------------------------------------------+"); - // Test that the weights stay normalized over update. maths::CSampling::seed(); @@ -306,12 +284,12 @@ void CMultivariateOneOfNPriorTest::testWeightUpdate() { test::CRandomNumbers rng; { - const std::size_t n[] = {100}; + const TSizeVec n{100}; const double mean[][2] = {{10.0, 20.0}}; const double covariance[][3] = {{3.0, 1.0, 2.0}}; TDouble10Vec1Vec samples; - gaussianSamples(rng, boost::size(n), n, mean, covariance, samples); + gaussianSamples(rng, n, mean, covariance, samples); using TEqual = maths::CEqualWithTolerance; TEqual equal(maths::CToleranceTypes::E_AbsoluteTolerance, 1e-10); @@ -321,8 +299,8 @@ void CMultivariateOneOfNPriorTest::testWeightUpdate() { maths::CMultivariateOneOfNPrior filter( makeOneOfN<2>(maths_t::E_ContinuousData, decayRates[i])); for (std::size_t j = 0u; j < samples.size(); ++j) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[j]), - SINGLE_UNIT_WEIGHT_2); + filter.addSamples({samples[j]}, + maths_t::CUnitWeights::singleUnit(2)); CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, sum(filter.weights()), 1e-6); filter.propagateForwardsByTime(1.0); CPPUNIT_ASSERT(equal(sum(filter.weights()), 1.0)); @@ -333,12 +311,12 @@ void CMultivariateOneOfNPriorTest::testWeightUpdate() { { // Test that non-zero decay rate behaves as expected. - const std::size_t n[] = {4000, 6000}; + const TSizeVec n{4000, 6000}; const double means[][2] = {{10.0, 10.0}, {20.0, 20.0}}; const double covariances[][3] = {{8.0, 1.0, 8.0}, {20.0, -4.0, 10.0}}; TDouble10Vec1Vec samples; - gaussianSamples(rng, boost::size(n), n, means, covariances, samples); + gaussianSamples(rng, n, means, covariances, samples); rng.random_shuffle(samples.begin(), samples.end()); const double decayRates[] = {0.0008, 0.004, 0.02}; @@ -350,8 +328,8 @@ void CMultivariateOneOfNPriorTest::testWeightUpdate() { makeOneOfN<2>(maths_t::E_ContinuousData, decayRates[i])); for (std::size_t j = 0u; j < samples.size(); ++j) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[j]), - SINGLE_UNIT_WEIGHT_2); + filter.addSamples({samples[j]}, + maths_t::CUnitWeights::singleUnit(2)); filter.propagateForwardsByTime(1.0); } @@ -368,20 +346,16 @@ void CMultivariateOneOfNPriorTest::testWeightUpdate() { } void CMultivariateOneOfNPriorTest::testModelUpdate() { - LOG_DEBUG(<< "+-------------------------------------------------+"); - LOG_DEBUG(<< "| CMultivariateOneOfNPriorTest::testModelUpdate |"); - LOG_DEBUG(<< "+-------------------------------------------------+"); - maths::CSampling::CScopeMockRandomNumberGenerator scopeMockRng; - const std::size_t n[] = {400, 600}; + const TSizeVec n{400, 600}; const double means[][2] = {{10.0, 10.0}, {20.0, 20.0}}; const double covariances[][3] = {{8.0, 1.0, 8.0}, {20.0, -4.0, 10.0}}; test::CRandomNumbers rng; TDouble10Vec1Vec samples; - gaussianSamples(rng, boost::size(n), n, means, covariances, samples); + gaussianSamples(rng, n, means, covariances, samples); rng.random_shuffle(samples.begin(), samples.end()); const maths_t::EDataType dataTypes[] = {maths_t::E_IntegerData, maths_t::E_ContinuousData}; @@ -393,12 +367,11 @@ void CMultivariateOneOfNPriorTest::testModelUpdate() { makeMultimodal<2>(dataTypes[i]); maths::CMultivariateOneOfNPrior oneOfN(makeOneOfN<2>(dataTypes[i])); - normal.addSamples(COUNT_WEIGHT, samples, - TDouble10Vec4Vec1Vec(samples.size(), UNIT_WEIGHT_2)); - multimodal.addSamples(COUNT_WEIGHT, samples, - TDouble10Vec4Vec1Vec(samples.size(), UNIT_WEIGHT_2)); - oneOfN.addSamples(COUNT_WEIGHT, samples, - TDouble10Vec4Vec1Vec(samples.size(), UNIT_WEIGHT_2)); + maths_t::TDouble10VecWeightsAry1Vec weights( + samples.size(), maths_t::CUnitWeights::unit(2)); + normal.addSamples(samples, weights); + multimodal.addSamples(samples, weights); + oneOfN.addSamples(samples, weights); CPPUNIT_ASSERT_EQUAL(normal.checksum(), oneOfN.models()[0]->checksum()); CPPUNIT_ASSERT_EQUAL(multimodal.checksum(), oneOfN.models()[1]->checksum()); @@ -406,18 +379,10 @@ void CMultivariateOneOfNPriorTest::testModelUpdate() { } void CMultivariateOneOfNPriorTest::testModelSelection() { - LOG_DEBUG(<< "+----------------------------------------------------+"); - LOG_DEBUG(<< "| CMultivariateOneOfNPriorTest::testModelSelection |"); - LOG_DEBUG(<< "+----------------------------------------------------+"); - // TODO When copula models are available. } void CMultivariateOneOfNPriorTest::testMarginalLikelihood() { - LOG_DEBUG(<< "+--------------------------------------------------------+"); - LOG_DEBUG(<< "| CMultivariateOneOfNPriorTest::testMarginalLikelihood |"); - LOG_DEBUG(<< "+--------------------------------------------------------+"); - // Test that: // 1) The likelihood is normalized. // 2) E[X] w.r.t. the likelihood is equal to the predictive distribution mean. @@ -461,8 +426,8 @@ void CMultivariateOneOfNPriorTest::testMarginalLikelihood() { TMeanAccumulator meanCovarianceError; for (std::size_t i = 0u; i < samples.size(); ++i) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[i]), - SINGLE_UNIT_WEIGHT_2); + filter.addSamples({samples[i]}, + maths_t::CUnitWeights::singleUnit(2)); if (!filter.isNonInformative()) { TDouble10Vec m = filter.marginalLikelihoodMean(); @@ -570,8 +535,8 @@ void CMultivariateOneOfNPriorTest::testMarginalLikelihood() { maths::CMultivariateOneOfNPrior filter(makeOneOfN<2>(maths_t::E_ContinuousData)); for (std::size_t i = 0u; i < samples.size(); ++i) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[i]), - SINGLE_UNIT_WEIGHT_2); + filter.addSamples({samples[i]}, + maths_t::CUnitWeights::singleUnit(2)); } TDouble10Vec m = filter.marginalLikelihoodMean(); @@ -666,10 +631,6 @@ void CMultivariateOneOfNPriorTest::testMarginalLikelihood() { } void CMultivariateOneOfNPriorTest::testMarginalLikelihoodMean() { - LOG_DEBUG(<< "+------------------------------------------------------------+"); - LOG_DEBUG(<< "| CMultivariateOneOfNPriorTest::testMarginalLikelihoodMean |"); - LOG_DEBUG(<< "+------------------------------------------------------------+"); - // Test that the marginal likelihood mean is close to the sample // mean for a variety of models. @@ -711,7 +672,8 @@ void CMultivariateOneOfNPriorTest::testMarginalLikelihoodMean() { TMean2Accumulator expectedMean; for (std::size_t j = 0u; j < samples.size(); ++j) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[j]), SINGLE_UNIT_WEIGHT_2); + filter.addSamples({samples[j]}, + maths_t::CUnitWeights::singleUnit(2)); expectedMean.add(TVector2(samples[j])); if (!filter.isNonInformative()) { @@ -735,10 +697,6 @@ void CMultivariateOneOfNPriorTest::testMarginalLikelihoodMean() { } void CMultivariateOneOfNPriorTest::testMarginalLikelihoodMode() { - LOG_DEBUG(<< "+----------------------------------------------------+"); - LOG_DEBUG(<< "| CMultivariateOneOfNPriorTest::testMultipleUpdate |"); - LOG_DEBUG(<< "+----------------------------------------------------+"); - // Test that the marginal likelihood mode is near the maximum // of the marginal likelihood. @@ -761,7 +719,7 @@ void CMultivariateOneOfNPriorTest::testMarginalLikelihoodMode() { for (std::size_t i = 0u; i < boost::size(means); ++i) { for (std::size_t j = 0u; j < boost::size(covariances); ++j) { - std::size_t n[] = {100}; + const TSizeVec n{100}; const double mean[][2] = {{means[i][0], means[i][1]}}; const double covariance[][3] = { {covariances[i][0], covariances[i][1], covariances[i][2]}}; @@ -770,12 +728,12 @@ void CMultivariateOneOfNPriorTest::testMarginalLikelihoodMode() { << ", variance = " << covariance[0][0] << " ***"); TDouble10Vec1Vec samples; - gaussianSamples(rng, 1, n, mean, covariance, samples); + gaussianSamples(rng, n, mean, covariance, samples); maths::CMultivariateOneOfNPrior filter(makeOneOfN<2>(maths_t::E_ContinuousData)); for (std::size_t k = 0u; k < samples.size(); ++k) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[k]), - SINGLE_UNIT_WEIGHT_2); + filter.addSamples({samples[k]}, + maths_t::CUnitWeights::singleUnit(2)); } CMinusLogLikelihood likelihood(filter); @@ -788,7 +746,8 @@ void CMultivariateOneOfNPriorTest::testMarginalLikelihoodMode() { maths::CVector(mean[0], mean[0] + 2), likelihood, gradientOfLikelihood, expectedMode, likelihoods); - TDouble10Vec mode = filter.marginalLikelihoodMode(COUNT_WEIGHT, UNIT_WEIGHT_2); + TDouble10Vec mode = filter.marginalLikelihoodMode( + maths_t::CUnitWeights::unit(2)); LOG_DEBUG(<< "marginalLikelihoodMode = " << core::CContainerPrinter::print(mode) << ", expectedMode = " << expectedMode); @@ -804,7 +763,7 @@ void CMultivariateOneOfNPriorTest::testMarginalLikelihoodMode() { { LOG_DEBUG(<< "****** Multimodal ******"); - const std::size_t n[] = {100, 100}; + const TSizeVec n{100, 100}; const double means[][2] = { {10.0, 10.0}, {16.0, 18.0}, @@ -815,11 +774,12 @@ void CMultivariateOneOfNPriorTest::testMarginalLikelihoodMode() { }; TDouble10Vec1Vec samples; - gaussianSamples(rng, boost::size(n), n, means, covariances, samples); + gaussianSamples(rng, n, means, covariances, samples); maths::CMultivariateOneOfNPrior filter(makeOneOfN<2>(maths_t::E_ContinuousData)); for (std::size_t i = 0u; i < samples.size(); ++i) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[i]), SINGLE_UNIT_WEIGHT_2); + filter.addSamples({samples[i]}, + maths_t::CUnitWeights::singleUnit(2)); } CMinusLogLikelihood likelihood(filter); @@ -832,7 +792,8 @@ void CMultivariateOneOfNPriorTest::testMarginalLikelihoodMode() { maths::CVector(means[0], means[0] + 2), likelihood, gradientOfLikelihood, expectedMode, likelihoods); - TDouble10Vec mode = filter.marginalLikelihoodMode(COUNT_WEIGHT, UNIT_WEIGHT_2); + TDouble10Vec mode = filter.marginalLikelihoodMode( + maths_t::CUnitWeights::unit(2)); LOG_DEBUG(<< "marginalLikelihoodMode = " << core::CContainerPrinter::print(mode) << ", expectedMode = " << expectedMode); @@ -844,17 +805,13 @@ void CMultivariateOneOfNPriorTest::testMarginalLikelihoodMode() { } void CMultivariateOneOfNPriorTest::testSampleMarginalLikelihood() { - LOG_DEBUG(<< "+--------------------------------------------------------------+"); - LOG_DEBUG(<< "| CMultivariateOneOfNPriorTest::testSampleMarginalLikelihood |"); - LOG_DEBUG(<< "+--------------------------------------------------------------+"); - // Test we sample the constitute priors in proportion to their weights. maths::CSampling::seed(); test::CRandomNumbers rng; - const std::size_t n[] = {50, 50}; + const TSizeVec n{50, 50}; const double means[][2] = { {10.0, 10.0}, {25.0, 25.0}, @@ -865,13 +822,14 @@ void CMultivariateOneOfNPriorTest::testSampleMarginalLikelihood() { }; TDouble10Vec1Vec samples; - gaussianSamples(rng, boost::size(n), n, means, covariances, samples); + gaussianSamples(rng, n, means, covariances, samples); rng.random_shuffle(samples.begin(), samples.end()); maths::CMultivariateOneOfNPrior filter(makeOneOfN<2>(maths_t::E_ContinuousData)); for (std::size_t i = 0u; i < samples.size(); ++i) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[i]), SINGLE_UNIT_WEIGHT_2); + filter.addSamples({samples[i]}, + maths_t::CUnitWeights::singleUnit(2)); if (!filter.isNonInformative()) { TDoubleVec weights = filter.weights(); @@ -907,10 +865,6 @@ void CMultivariateOneOfNPriorTest::testSampleMarginalLikelihood() { } void CMultivariateOneOfNPriorTest::testProbabilityOfLessLikelySamples() { - LOG_DEBUG(<< "+--------------------------------------------------------------------+"); - LOG_DEBUG(<< "| CMultivariateOneOfNPriorTest::testProbabilityOfLessLikelySamples |"); - LOG_DEBUG(<< "+--------------------------------------------------------------------+"); - // We simply test that the calculation is close to the weighted // sum of component model calculations. @@ -918,7 +872,7 @@ void CMultivariateOneOfNPriorTest::testProbabilityOfLessLikelySamples() { test::CRandomNumbers rng; - const std::size_t n[] = {100, 100}; + const TSizeVec n{100, 100}; const double means[][2] = { {10.0, 10.0}, {16.0, 18.0}, @@ -929,7 +883,7 @@ void CMultivariateOneOfNPriorTest::testProbabilityOfLessLikelySamples() { }; TDouble10Vec1Vec samples; - gaussianSamples(rng, boost::size(n), n, means, covariances, samples); + gaussianSamples(rng, n, means, covariances, samples); rng.random_shuffle(samples.begin(), samples.end()); maths::CMultivariateOneOfNPrior filter(makeOneOfN<2>(maths_t::E_ContinuousData)); @@ -938,12 +892,12 @@ void CMultivariateOneOfNPriorTest::testProbabilityOfLessLikelySamples() { for (std::size_t i = 0u; i < samples.size(); ++i) { TDouble10Vec1Vec sample(1, samples[i]); - filter.addSamples(COUNT_WEIGHT, sample, SINGLE_UNIT_WEIGHT_2); + filter.addSamples(sample, maths_t::CUnitWeights::singleUnit(2)); double lowerBound, upperBound; maths::CMultivariatePrior::TTail10Vec tail; CPPUNIT_ASSERT(filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, COUNT_WEIGHT, sample, SINGLE_UNIT_WEIGHT_2, + maths_t::E_TwoSided, sample, maths_t::CUnitWeights::singleUnit(2), lowerBound, upperBound, tail)); CPPUNIT_ASSERT_EQUAL(lowerBound, upperBound); @@ -957,7 +911,8 @@ void CMultivariateOneOfNPriorTest::testProbabilityOfLessLikelySamples() { double modelLowerBound, modelUpperBound; double weight = weights[j]; CPPUNIT_ASSERT(models[j]->probabilityOfLessLikelySamples( - maths_t::E_TwoSided, COUNT_WEIGHT, sample, SINGLE_UNIT_WEIGHT_2, + maths_t::E_TwoSided, sample, + maths_t::CUnitWeights::singleUnit(2), modelLowerBound, modelUpperBound, tail)); CPPUNIT_ASSERT_EQUAL(modelLowerBound, modelUpperBound); double modelProbability = (modelLowerBound + modelUpperBound) / 2.0; @@ -977,26 +932,23 @@ void CMultivariateOneOfNPriorTest::testProbabilityOfLessLikelySamples() { } void CMultivariateOneOfNPriorTest::testPersist() { - LOG_DEBUG(<< "+---------------------------------------------+"); - LOG_DEBUG(<< "| CMultivariateOneOfNPriorTest::testPersist |"); - LOG_DEBUG(<< "+---------------------------------------------+"); - // Check that persist/restore is idempotent. - const std::size_t n[] = {100}; + const TSizeVec n{100}; const double mean[][2] = {{10.0, 20.0}}; const double covariance[][3] = {{3.0, 1.0, 2.0}}; test::CRandomNumbers rng; TDouble10Vec1Vec samples; - gaussianSamples(rng, boost::size(n), n, mean, covariance, samples); + gaussianSamples(rng, n, mean, covariance, samples); maths_t::EDataType dataType = maths_t::E_ContinuousData; maths::CMultivariateOneOfNPrior origFilter(makeOneOfN<2>(dataType)); for (std::size_t i = 0u; i < samples.size(); ++i) { - origFilter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[i]), SINGLE_UNIT_WEIGHT_2); + origFilter.addSamples({samples[i]}, + maths_t::CUnitWeights::singleUnit(2)); } std::size_t dimension = origFilter.dimension(); double decayRate = origFilter.decayRate(); diff --git a/lib/maths/unittest/CNaiveBayesTest.cc b/lib/maths/unittest/CNaiveBayesTest.cc index df14117fb0..b0de6d00d0 100644 --- a/lib/maths/unittest/CNaiveBayesTest.cc +++ b/lib/maths/unittest/CNaiveBayesTest.cc @@ -19,10 +19,9 @@ #include #include -#include -#include #include +#include using namespace ml; @@ -35,10 +34,6 @@ using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumula using TMeanVarAccumulator = maths::CBasicStatistics::SSampleMeanVar::TAccumulator; void CNaiveBayesTest::testClassification() { - LOG_DEBUG(<< "+---------------------------------------+"); - LOG_DEBUG(<< "| CNaiveBayesTest::testClassification |"); - LOG_DEBUG(<< "+---------------------------------------+"); - // We'll test classification using Gaussian naive Bayes. We // test: // - We get the probabilities we expect using if the underlying @@ -175,10 +170,6 @@ void CNaiveBayesTest::testClassification() { } void CNaiveBayesTest::testPropagationByTime() { - LOG_DEBUG(<< "+------------------------------------------+"); - LOG_DEBUG(<< "| CNaiveBayesTest::testPropagationByTime |"); - LOG_DEBUG(<< "+------------------------------------------+"); - // Make feature distributions drift over time and verify that // the classifier adapts. @@ -240,14 +231,10 @@ void CNaiveBayesTest::testPropagationByTime() { } void CNaiveBayesTest::testMemoryUsage() { - LOG_DEBUG(<< "+------------------------------------+"); - LOG_DEBUG(<< "| CNaiveBayesTest::testMemoryUsage |"); - LOG_DEBUG(<< "+------------------------------------+"); - // Check invariants. - using TMemoryUsagePtr = boost::scoped_ptr; - using TNaiveBayesPtr = boost::shared_ptr; + using TMemoryUsagePtr = std::unique_ptr; + using TNaiveBayesPtr = std::shared_ptr; test::CRandomNumbers rng; @@ -284,10 +271,6 @@ void CNaiveBayesTest::testMemoryUsage() { } void CNaiveBayesTest::testPersist() { - LOG_DEBUG(<< "+--------------------------------+"); - LOG_DEBUG(<< "| CNaiveBayesTest::testPersist |"); - LOG_DEBUG(<< "+--------------------------------+"); - test::CRandomNumbers rng; TDoubleVec trainingData[4]; diff --git a/lib/maths/unittest/CNaturalBreaksClassifierTest.cc b/lib/maths/unittest/CNaturalBreaksClassifierTest.cc index ec693a7534..904e257d5e 100644 --- a/lib/maths/unittest/CNaturalBreaksClassifierTest.cc +++ b/lib/maths/unittest/CNaturalBreaksClassifierTest.cc @@ -149,10 +149,6 @@ bool naturalBreaksBranchAndBound(const TTupleVec& categories, } void CNaturalBreaksClassifierTest::testCategories() { - LOG_DEBUG(<< "+------------------------------------------------+"); - LOG_DEBUG(<< "| CNaturalBreaksClassifierTest::testCategories |"); - LOG_DEBUG(<< "+------------------------------------------------+"); - // Check that we correctly find the optimum solution. { test::CRandomNumbers rng; @@ -490,10 +486,6 @@ void CNaturalBreaksClassifierTest::testCategories() { } void CNaturalBreaksClassifierTest::testPropagateForwardsByTime() { - LOG_DEBUG(<< "+-------------------------------------------------------------+"); - LOG_DEBUG(<< "| CNaturalBreaksClassifierTest::testPropagateForwardsByTime |"); - LOG_DEBUG(<< "+-------------------------------------------------------------+"); - // Check pruning of dead categories. test::CRandomNumbers rng; @@ -533,10 +525,6 @@ void CNaturalBreaksClassifierTest::testPropagateForwardsByTime() { } void CNaturalBreaksClassifierTest::testSample() { - LOG_DEBUG(<< "+--------------------------------------------+"); - LOG_DEBUG(<< "| CNaturalBreaksClassifierTest::testSample |"); - LOG_DEBUG(<< "+--------------------------------------------+"); - // We test that for a small number of samples we get back exactly // the points we have added and for a large number of samples we // sample the modes of the mixture correctly. @@ -619,10 +607,6 @@ void CNaturalBreaksClassifierTest::testSample() { } void CNaturalBreaksClassifierTest::testPersist() { - LOG_DEBUG(<< "+--------------------------------------------+"); - LOG_DEBUG(<< "| CNaturalBreaksClassifierTest::testSample |"); - LOG_DEBUG(<< "+--------------------------------------------+"); - test::CRandomNumbers rng; TDoubleVec samples1; diff --git a/lib/maths/unittest/CNormalMeanPrecConjugateTest.cc b/lib/maths/unittest/CNormalMeanPrecConjugateTest.cc index a2187e07f8..994941b9e5 100644 --- a/lib/maths/unittest/CNormalMeanPrecConjugateTest.cc +++ b/lib/maths/unittest/CNormalMeanPrecConjugateTest.cc @@ -42,6 +42,7 @@ using TDoubleDoublePrVec = std::vector; using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; using TMeanVarAccumulator = maths::CBasicStatistics::SSampleMeanVar::TAccumulator; using CNormalMeanPrecConjugate = CPriorTestInterfaceMixin; +using TWeightFunc = maths_t::TDoubleWeightsAry (*)(double); CNormalMeanPrecConjugate makePrior(maths_t::EDataType dataType = maths_t::E_ContinuousData, const double& decayRate = 0.0) { @@ -50,10 +51,6 @@ CNormalMeanPrecConjugate makePrior(maths_t::EDataType dataType = maths_t::E_Cont } void CNormalMeanPrecConjugateTest::testMultipleUpdate() { - LOG_DEBUG(<< "+----------------------------------------------------+"); - LOG_DEBUG(<< "| CNormalMeanPrecConjugateTest::testMultipleUpdate |"); - LOG_DEBUG(<< "+----------------------------------------------------+"); - // Test that we get the same result updating once with a vector of 100 // samples of an R.V. versus updating individually 100 times. @@ -91,13 +88,12 @@ void CNormalMeanPrecConjugateTest::testMultipleUpdate() { CNormalMeanPrecConjugate filter1(makePrior(dataTypes[i])); CNormalMeanPrecConjugate filter2(filter1); - maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); + maths_t::TDoubleWeightsAry1Vec weights; + weights.resize(samples.size(), maths_t::countVarianceScaleWeight(2.0)); for (std::size_t j = 0u; j < samples.size(); ++j) { - filter1.addSamples(weightStyle, TDouble1Vec(1, samples[j]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 2.0))); + filter1.addSamples({samples[j]}, {weights[j]}); } - filter2.addSamples(weightStyle, samples, - TDouble4Vec1Vec(samples.size(), TDouble4Vec(1, 2.0))); + filter2.addSamples(samples, weights); LOG_DEBUG(<< filter1.print()); LOG_DEBUG(<< "vs"); @@ -117,9 +113,7 @@ void CNormalMeanPrecConjugateTest::testMultipleUpdate() { for (std::size_t j = 0u; j < count; ++j) { filter1.addSamples(TDouble1Vec(1, x)); } - filter2.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, x), - TDouble4Vec1Vec(1, TDouble4Vec(1, static_cast(count)))); + filter2.addSamples({x}, {maths_t::countWeight(static_cast(count))}); TEqual equal(maths::CToleranceTypes::E_AbsoluteTolerance, 1e-5); CPPUNIT_ASSERT(filter1.equalTolerance(filter2, equal)); @@ -127,10 +121,6 @@ void CNormalMeanPrecConjugateTest::testMultipleUpdate() { } void CNormalMeanPrecConjugateTest::testPropagation() { - LOG_DEBUG(<< "+-------------------------------------------------+"); - LOG_DEBUG(<< "| CNormalMeanPrecConjugateTest::testPropagation |"); - LOG_DEBUG(<< "+-------------------------------------------------+"); - // Test that propagation doesn't affect the expected values // of likelihood mean and precision. @@ -164,10 +154,6 @@ void CNormalMeanPrecConjugateTest::testPropagation() { } void CNormalMeanPrecConjugateTest::testMeanEstimation() { - LOG_DEBUG(<< "+----------------------------------------------------+"); - LOG_DEBUG(<< "| CNormalMeanPrecConjugateTest::testMeanEstimation |"); - LOG_DEBUG(<< "+----------------------------------------------------+"); - // We are going to test that we correctly estimate a distribution // for the mean of the Gaussian process by checking that the true // mean of a Gaussian process lies in various confidence intervals @@ -227,10 +213,6 @@ void CNormalMeanPrecConjugateTest::testMeanEstimation() { } void CNormalMeanPrecConjugateTest::testPrecisionEstimation() { - LOG_DEBUG(<< "+---------------------------------------------------------+"); - LOG_DEBUG(<< "| CNormalMeanPrecConjugateTest::testPrecisionEstimation |"); - LOG_DEBUG(<< "+---------------------------------------------------------+"); - // We are going to test that we correctly estimate a distribution // for the precision of the Gaussian process by checking that the // true precision of a Gaussian process lies in various confidence @@ -292,10 +274,6 @@ void CNormalMeanPrecConjugateTest::testPrecisionEstimation() { } void CNormalMeanPrecConjugateTest::testMarginalLikelihood() { - LOG_DEBUG(<< "+--------------------------------------------------------+"); - LOG_DEBUG(<< "| CNormalMeanPrecConjugateTest::testMarginalLikelihood |"); - LOG_DEBUG(<< "+--------------------------------------------------------+"); - // Check that the c.d.f. <= 1 at extreme. maths_t::EDataType dataTypes[] = {maths_t::E_ContinuousData, maths_t::E_IntegerData}; for (std::size_t t = 0u; t < boost::size(dataTypes); ++t) { @@ -310,17 +288,14 @@ void CNormalMeanPrecConjugateTest::testMarginalLikelihood() { rng.generateNormalSamples(mean, variance, 200, samples); filter.addSamples(samples); - maths_t::ESampleWeightStyle weightStyles[] = { - maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight, - maths_t::E_SampleCountWeight}; - double weights[] = {0.1, 1.0, 10.0}; + TWeightFunc weightsFuncs[]{static_cast(maths_t::countWeight), + static_cast(maths_t::winsorisationWeight)}; + double weights[]{0.1, 1.0, 10.0}; - for (std::size_t i = 0u; i < boost::size(weightStyles); ++i) { + for (std::size_t i = 0u; i < boost::size(weightsFuncs); ++i) { for (std::size_t j = 0u; j < boost::size(weights); ++j) { double lb, ub; - filter.minusLogJointCdf( - maths_t::TWeightStyleVec(1, weightStyles[i]), TDouble1Vec(1, 1000.0), - TDouble4Vec1Vec(1, TDouble4Vec(1, weights[j])), lb, ub); + filter.minusLogJointCdf({1000.0}, {weightsFuncs[i](weights[j])}, lb, ub); LOG_DEBUG(<< "-log(c.d.f) = " << (lb + ub) / 2.0); CPPUNIT_ASSERT(lb >= 0.0); CPPUNIT_ASSERT(ub >= 0.0); @@ -481,9 +456,7 @@ void CNormalMeanPrecConjugateTest::testMarginalLikelihood() { double q2 = boost::math::quantile( scaledNormal, (50.0 + percentages[j] / 2.0) / 100.0); TDoubleDoublePr interval = filter.marginalLikelihoodConfidenceInterval( - percentages[j], - maths_t::TWeightStyleVec(1, maths_t::E_SampleCountVarianceScaleWeight), - TDouble4Vec(1, vs)); + percentages[j], maths_t::countVarianceScaleWeight(vs)); LOG_DEBUG(<< "[q1, q2] = [" << q1 << ", " << q2 << "]" << ", interval = " << core::CContainerPrinter::print(interval)); CPPUNIT_ASSERT_DOUBLES_EQUAL(q1, interval.first, 0.3); @@ -502,10 +475,6 @@ void CNormalMeanPrecConjugateTest::testMarginalLikelihood() { } void CNormalMeanPrecConjugateTest::testMarginalLikelihoodMean() { - LOG_DEBUG(<< "+------------------------------------------------------------+"); - LOG_DEBUG(<< "| CNormalMeanPrecConjugateTest::testMarginalLikelihoodMean |"); - LOG_DEBUG(<< "+------------------------------------------------------------+"); - // Test that the expectation of the marginal likelihood matches // the expected mean of the marginal likelihood. @@ -555,10 +524,6 @@ void CNormalMeanPrecConjugateTest::testMarginalLikelihoodMean() { } void CNormalMeanPrecConjugateTest::testMarginalLikelihoodMode() { - LOG_DEBUG(<< "+-----------------------------------------------------------+"); - LOG_DEBUG(<< "| CNormalMeanPrecConjugateTest::testMarginalLikelihoodMode |"); - LOG_DEBUG(<< "+-----------------------------------------------------------+"); - // Test that the marginal likelihood mode is what we'd expect // with variances variance scales. @@ -581,35 +546,25 @@ void CNormalMeanPrecConjugateTest::testMarginalLikelihoodMode() { rng.generateNormalSamples(means[i], variances[j], 1000, samples); filter.addSamples(samples); - maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); - TDouble4Vec weight(1, 1.0); + maths_t::TDoubleWeightsAry weight(maths_t::CUnitWeights::UNIT); for (std::size_t k = 0u; k < boost::size(varianceScales); ++k) { double vs = varianceScales[i]; - weight[0] = vs; + maths_t::setCountVarianceScale(vs, weight); boost::math::normal_distribution<> scaledNormal( means[i], std::sqrt(vs * variances[j])); double expectedMode = boost::math::mode(scaledNormal); - LOG_DEBUG(<< "marginalLikelihoodMode = " - << filter.marginalLikelihoodMode(weightStyle, weight) + LOG_DEBUG(<< "marginalLikelihoodMode = " << filter.marginalLikelihoodMode(weight) << ", expectedMode = " << expectedMode); - CPPUNIT_ASSERT_DOUBLES_EQUAL( - expectedMode, filter.marginalLikelihoodMode(weightStyle, weight), - 0.12 * std::sqrt(variances[j])); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMode, + filter.marginalLikelihoodMode(weight), + 0.12 * std::sqrt(variances[j])); } } } } void CNormalMeanPrecConjugateTest::testMarginalLikelihoodVariance() { - LOG_DEBUG(<< "+------------------------------------------------------------" - "----+"); - LOG_DEBUG(<< "| " - "CNormalMeanPrecConjugateTest::testMarginalLikelihoodVariance " - " |"); - LOG_DEBUG(<< "+------------------------------------------------------------" - "----+"); - // Test that the expectation of the residual from the mean for // the marginal likelihood matches the expected variance of the // marginal likelihood. @@ -660,10 +615,6 @@ void CNormalMeanPrecConjugateTest::testMarginalLikelihoodVariance() { } void CNormalMeanPrecConjugateTest::testSampleMarginalLikelihood() { - LOG_DEBUG(<< "+--------------------------------------------------------------+"); - LOG_DEBUG(<< "| CNormalMeanPrecConjugateTest::testSampleMarginalLikelihood |"); - LOG_DEBUG(<< "+--------------------------------------------------------------+"); - // We're going to test two properties of the sampling: // 1) That the sample mean is equal to the marginal // likelihood mean. @@ -744,10 +695,6 @@ void CNormalMeanPrecConjugateTest::testSampleMarginalLikelihood() { } void CNormalMeanPrecConjugateTest::testCdf() { - LOG_DEBUG(<< "+-----------------------------------------+"); - LOG_DEBUG(<< "| CNormalMeanPrecConjugateTest::testCdf |"); - LOG_DEBUG(<< "+-----------------------------------------+"); - // Test error cases. // // Test some invariants: @@ -789,10 +736,6 @@ void CNormalMeanPrecConjugateTest::testCdf() { } void CNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples() { - LOG_DEBUG(<< "+--------------------------------------------------------------------+"); - LOG_DEBUG(<< "| CNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples |"); - LOG_DEBUG(<< "+--------------------------------------------------------------------+"); - // We test that the probability of less likely samples calculation // agrees with the chance of seeing a sample with lower marginal // likelihood, up to the sampling error. @@ -856,11 +799,9 @@ void CNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples() { meanError.add(std::fabs(px - (lb + ub) / 2.0)); } - maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); - for (std::size_t k = 0u; k < boost::size(vs); ++k) { - double mode = filter.marginalLikelihoodMode(weightStyle, - TDouble4Vec(1, vs[k])); + double mode = filter.marginalLikelihoodMode( + maths_t::countVarianceScaleWeight(vs[k])); double ss[] = {0.9 * mode, 1.1 * mode}; LOG_DEBUG(<< "vs = " << vs[k] << ", mode = " << mode); @@ -870,42 +811,52 @@ void CNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples() { { filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(1, ss[0]), - TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, {ss[0]}, + {maths_t::countVarianceScaleWeight(vs[k])}, lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); if (mode > 0.0) { filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_OneSidedBelow, weightStyle, - TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_OneSidedBelow, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_OneSidedAbove, weightStyle, - TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_OneSidedAbove, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } } if (mode > 0.0) { filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(1, ss[1]), - TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, {ss[1]}, + {maths_t::countVarianceScaleWeight(vs[k])}, lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_OneSidedBelow, weightStyle, TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_OneSidedBelow, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_OneSidedAbove, weightStyle, TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_OneSidedAbove, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } } @@ -917,10 +868,6 @@ void CNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples() { } void CNormalMeanPrecConjugateTest::testAnomalyScore() { - LOG_DEBUG(<< "+--------------------------------------------------+"); - LOG_DEBUG(<< "| CNormalMeanPrecConjugateTest::testAnomalyScore |"); - LOG_DEBUG(<< "+--------------------------------------------------+"); - // This test pushes 500 samples through the filter and adds in // anomalous signals in the bins at 30, 120, 300 and 420 with // magnitude 4, 5, 10 and 15 standard deviations, respectively, @@ -1046,10 +993,6 @@ void CNormalMeanPrecConjugateTest::testAnomalyScore() { } void CNormalMeanPrecConjugateTest::testIntegerData() { - LOG_DEBUG(<< "+-------------------------------------------------+"); - LOG_DEBUG(<< "| CNormalMeanPrecConjugateTest::testIntegerData |"); - LOG_DEBUG(<< "+-------------------------------------------------+"); - // If the data are discrete then we approximate the discrete distribution // by saying it is uniform on the intervals [n,n+1] for each integral n. // The idea of this test is to check that the inferred model agrees in the @@ -1152,10 +1095,6 @@ void CNormalMeanPrecConjugateTest::testIntegerData() { } void CNormalMeanPrecConjugateTest::testLowVariationData() { - LOG_DEBUG(<< "+------------------------------------------------------+"); - LOG_DEBUG(<< "| CNormalMeanPrecConjugateTest::testLowVariationData |"); - LOG_DEBUG(<< "+------------------------------------------------------+"); - { CNormalMeanPrecConjugate filter(makePrior(maths_t::E_IntegerData)); for (std::size_t i = 0u; i < 100; ++i) { @@ -1184,10 +1123,6 @@ void CNormalMeanPrecConjugateTest::testLowVariationData() { } void CNormalMeanPrecConjugateTest::testPersist() { - LOG_DEBUG(<< "+---------------------------------------------+"); - LOG_DEBUG(<< "| CNormalMeanPrecConjugateTest::testPersist |"); - LOG_DEBUG(<< "+---------------------------------------------+"); - // Check that persist/restore is idempotent. const double mean = 10.0; @@ -1200,9 +1135,7 @@ void CNormalMeanPrecConjugateTest::testPersist() { maths::CNormalMeanPrecConjugate origFilter(makePrior()); for (std::size_t i = 0u; i < samples.size(); ++i) { - origFilter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, samples[i]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0))); + origFilter.addSamples({samples[i]}, maths_t::CUnitWeights::SINGLE_UNIT); } double decayRate = origFilter.decayRate(); uint64_t checksum = origFilter.checksum(); @@ -1241,10 +1174,6 @@ void CNormalMeanPrecConjugateTest::testPersist() { } void CNormalMeanPrecConjugateTest::testSeasonalVarianceScale() { - LOG_DEBUG(<< "+-----------------------------------------------------------+"); - LOG_DEBUG(<< "| CNormalMeanPrecConjugateTest::testSeasonalVarianceScale |"); - LOG_DEBUG(<< "+-----------------------------------------------------------+"); - // We are test: // 1) The marginal likelihood is normalized. // 2) E[(X - m)^2] w.r.t. the log-likelihood is scaled. @@ -1266,9 +1195,7 @@ void CNormalMeanPrecConjugateTest::testSeasonalVarianceScale() { rng.generateNormalSamples(means[i], variances[j], 100, samples); double varianceScales[] = {0.2, 0.5, 1.0, 2.0, 5.0}; - maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleSeasonalVarianceScaleWeight); - TDouble4Vec weight(1, 1.0); - TDouble4Vec1Vec weights(1, weight); + maths_t::TDoubleWeightsAry weight(maths_t::CUnitWeights::UNIT); double m; double v; @@ -1291,37 +1218,32 @@ void CNormalMeanPrecConjugateTest::testSeasonalVarianceScale() { for (std::size_t k = 0u; k < boost::size(varianceScales); ++k) { double vs = varianceScales[k]; - weight[0] = vs; - weights[0][0] = vs; + maths_t::setSeasonalVarianceScale(vs, weight); LOG_DEBUG(<< "*** variance scale = " << vs << " ***"); double Z; - filter.expectation(C1dUnitKernel(), 50, Z, weightStyle, weight); + filter.expectation(C1dUnitKernel(), 50, Z, weight); LOG_DEBUG(<< "Z = " << Z); CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, Z, 1e-3); - LOG_DEBUG(<< "sv = " - << filter.marginalLikelihoodVariance(weightStyle, weight)); + LOG_DEBUG(<< "sv = " << filter.marginalLikelihoodVariance(weight)); double expectationVariance; filter.expectation(CVarianceKernel(filter.marginalLikelihoodMean()), - 100, expectationVariance, weightStyle, weight); + 100, expectationVariance, weight); LOG_DEBUG(<< "expectationVariance = " << expectationVariance); CPPUNIT_ASSERT_DOUBLES_EQUAL( vs * unscaledExpectationVariance, expectationVariance, 0.01 * vs * unscaledExpectationVariance); CPPUNIT_ASSERT_DOUBLES_EQUAL( - filter.marginalLikelihoodVariance(weightStyle, weight), expectationVariance, - 0.01 * filter.marginalLikelihoodVariance(weightStyle, weight)); + filter.marginalLikelihoodVariance(weight), expectationVariance, + 0.01 * filter.marginalLikelihoodVariance(weight)); - double mode = filter.marginalLikelihoodMode(weightStyle, weight); + double mode = filter.marginalLikelihoodMode(weight); double fm; double fmMinusEps, fmPlusEps; - filter.jointLogMarginalLikelihood( - weightStyle, TDouble1Vec(1, mode - 1e-3), weights, fmMinusEps); - filter.jointLogMarginalLikelihood( - weightStyle, TDouble1Vec(1, mode), weights, fm); - filter.jointLogMarginalLikelihood( - weightStyle, TDouble1Vec(1, mode + 1e-3), weights, fmPlusEps); + filter.jointLogMarginalLikelihood({mode - 1e-3}, {weight}, fmMinusEps); + filter.jointLogMarginalLikelihood({mode}, {weight}, fm); + filter.jointLogMarginalLikelihood({mode + 1e-3}, {weight}, fmPlusEps); LOG_DEBUG(<< "log(f(mode)) = " << fm << ", log(f(mode - eps)) = " << fmMinusEps << ", log(f(mode + eps)) = " << fmPlusEps); CPPUNIT_ASSERT(fm > fmMinusEps); @@ -1332,13 +1254,13 @@ void CNormalMeanPrecConjugateTest::testSeasonalVarianceScale() { for (std::size_t l = 0u; l < boost::size(points); ++l) { TDouble1Vec x(1, points[l]); double fx; - filter.jointLogMarginalLikelihood(weightStyle, x, weights, fx); + filter.jointLogMarginalLikelihood(x, {weight}, fx); TDouble1Vec xMinusEps(1, points[l] - 1e-3); TDouble1Vec xPlusEps(1, points[l] + 1e-3); double lb, ub; - filter.minusLogJointCdf(weightStyle, xPlusEps, weights, lb, ub); + filter.minusLogJointCdf(xPlusEps, {weight}, lb, ub); double FxPlusEps = std::exp(-(lb + ub) / 2.0); - filter.minusLogJointCdf(weightStyle, xMinusEps, weights, lb, ub); + filter.minusLogJointCdf(xMinusEps, {weight}, lb, ub); double FxMinusEps = std::exp(-(lb + ub) / 2.0); LOG_DEBUG(<< "x = " << points[l] << ", log(f(x)) = " << fx << ", F(x - eps) = " << FxMinusEps @@ -1349,22 +1271,22 @@ void CNormalMeanPrecConjugateTest::testSeasonalVarianceScale() { 0.05 * std::fabs(fx)); sample[0] = m + (points[l] - m) / std::sqrt(vs); - weights[0][0] = 1.0; + maths_t::setSeasonalVarianceScale(1.0, weight); double expectedLowerBound; double expectedUpperBound; maths_t::ETail expectedTail; filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, sample, weights, + maths_t::E_TwoSided, sample, {weight}, expectedLowerBound, expectedUpperBound, expectedTail); sample[0] = points[l]; - weights[0][0] = vs; + maths_t::setSeasonalVarianceScale(vs, weight); double lowerBound; double upperBound; maths_t::ETail tail; - filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, sample, weights, - lowerBound, upperBound, tail); + filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, + sample, {weight}, lowerBound, + upperBound, tail); LOG_DEBUG(<< "expectedLowerBound = " << expectedLowerBound); LOG_DEBUG(<< "lowerBound = " << lowerBound); @@ -1396,9 +1318,9 @@ void CNormalMeanPrecConjugateTest::testSeasonalVarianceScale() { rng.random_shuffle(samples.begin(), samples.end()); CNormalMeanPrecConjugate filter(makePrior()); - weights[0][0] = vs; + maths_t::setSeasonalVarianceScale(vs, weight); for (std::size_t l = 0u; l < samples.size(); ++l) { - filter.addSamples(weightStyle, TDouble1Vec(1, samples[l]), weights); + filter.addSamples({samples[l]}, {weight}); } double sm = filter.marginalLikelihoodMean(); @@ -1414,10 +1336,6 @@ void CNormalMeanPrecConjugateTest::testSeasonalVarianceScale() { } void CNormalMeanPrecConjugateTest::testCountVarianceScale() { - LOG_DEBUG(<< "+--------------------------------------------------------+"); - LOG_DEBUG(<< "| CNormalMeanPrecConjugateTest::testCountVarianceScale |"); - LOG_DEBUG(<< "+--------------------------------------------------------+"); - // The strategy for this test is to check we correctly account // for variance scaling by scaling the variance of a collection // of samples and then checking that the percentiles for those @@ -1507,10 +1425,8 @@ void CNormalMeanPrecConjugateTest::testCountVarianceScale() { double lowerBound, upperBound; maths_t::ETail tail; CPPUNIT_ASSERT(filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, - maths_t::TWeightStyleVec(1, maths_t::E_SampleCountVarianceScaleWeight), - TDouble1Vec(1, scaledSamples[k]), - TDouble4Vec1Vec(1, TDouble4Vec(1, varianceScales[j])), + maths_t::E_TwoSided, {scaledSamples[k]}, + {maths_t::countVarianceScaleWeight(varianceScales[j])}, lowerBound, upperBound, tail)); CPPUNIT_ASSERT_EQUAL(lowerBound, upperBound); double probability = (lowerBound + upperBound) / 2.0; @@ -1574,9 +1490,8 @@ void CNormalMeanPrecConjugateTest::testCountVarianceScale() { CPPUNIT_ASSERT_EQUAL( maths_t::E_FpNoErrors, filter.jointLogMarginalLikelihood( - maths_t::TWeightStyleVec(1, maths_t::E_SampleCountVarianceScaleWeight), - TDouble1Vec(1, scaledSamples[j]), - TDouble4Vec1Vec(1, TDouble4Vec(1, varianceScales[i])), logLikelihood)); + {scaledSamples[j]}, + {maths_t::countVarianceScaleWeight(varianceScales[i])}, logLikelihood)); differentialEntropy -= logLikelihood; } @@ -1600,7 +1515,6 @@ void CNormalMeanPrecConjugateTest::testCountVarianceScale() { 85.0, 90.0, 95.0, 99.0}; unsigned int errors[] = {0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u}; - maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); double variances[] = {1.0, 5.0}; double precision = 1 / variances[0]; @@ -1610,8 +1524,9 @@ void CNormalMeanPrecConjugateTest::testCountVarianceScale() { for (std::size_t i = 0u; i < boost::size(variances); ++i) { TDoubleVec samples; rng.generateNormalSamples(0.0, variances[i], 1000, samples); - TDouble4Vec1Vec weights(samples.size(), TDouble4Vec(1, variances[i])); - filter.addSamples(weightStyle, samples, weights); + filter.addSamples(samples, maths_t::TDoubleWeightsAry1Vec( + samples.size(), maths_t::countVarianceScaleWeight( + variances[i]))); } for (std::size_t i = 0; i < boost::size(testIntervals); ++i) { diff --git a/lib/maths/unittest/COneOfNPriorTest.cc b/lib/maths/unittest/COneOfNPriorTest.cc index 2530f23a85..7f2bb5fe64 100644 --- a/lib/maths/unittest/COneOfNPriorTest.cc +++ b/lib/maths/unittest/COneOfNPriorTest.cc @@ -57,6 +57,7 @@ using CMultimodalPrior = CPriorTestInterfaceMixin; using CNormalMeanPrecConjugate = CPriorTestInterfaceMixin; using COneOfNPrior = CPriorTestInterfaceMixin; using CPoissonMeanConjugate = CPriorTestInterfaceMixin; +using TWeightFunc = maths_t::TDoubleWeightsAry (*)(double); COneOfNPrior::TPriorPtrVec clone(const TPriorPtrVec& models, const TOptionalDouble& decayRate = TOptionalDouble()) { @@ -86,10 +87,6 @@ using maths_t::E_IntegerData; } void COneOfNPriorTest::testFilter() { - LOG_DEBUG(<< "+--------------------------------+"); - LOG_DEBUG(<< "| COneOfNPriorTest::testFilter |"); - LOG_DEBUG(<< "+--------------------------------+"); - TPriorPtrVec models; models.push_back(TPriorPtr( maths::CGammaRateConjugate::nonInformativePrior(E_ContinuousData).clone())); @@ -130,10 +127,6 @@ void COneOfNPriorTest::testFilter() { } void COneOfNPriorTest::testMultipleUpdate() { - LOG_DEBUG(<< "+----------------------------------------+"); - LOG_DEBUG(<< "| COneOfNPriorTest::testMultipleUpdate |"); - LOG_DEBUG(<< "+----------------------------------------+"); - // Test that we get the same result updating once with a vector of 100 // samples of an R.V. versus updating individually 100 times. @@ -211,18 +204,12 @@ void COneOfNPriorTest::testMultipleUpdate() { for (std::size_t j = 0u; j < count; ++j) { filter1.addSamples(TDouble1Vec(1, x)); } - filter2.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, x), - TDouble4Vec1Vec(1, TDouble4Vec(1, static_cast(count)))); + filter2.addSamples({x}, {maths_t::countWeight(static_cast(count))}); CPPUNIT_ASSERT_EQUAL(filter1.checksum(), filter2.checksum()); } void COneOfNPriorTest::testWeights() { - LOG_DEBUG(<< "+---------------------------------+"); - LOG_DEBUG(<< "| COneOfNPriorTest::testWeights |"); - LOG_DEBUG(<< "+---------------------------------+"); - test::CRandomNumbers rng; { @@ -299,10 +286,6 @@ void COneOfNPriorTest::testWeights() { } void COneOfNPriorTest::testModels() { - LOG_DEBUG(<< "+--------------------------------+"); - LOG_DEBUG(<< "| COneOfNPriorTest::testModels |"); - LOG_DEBUG(<< "+--------------------------------+"); - // Test the models posterior mean values. // Since the component model's posterior distributions are tested @@ -391,10 +374,6 @@ void COneOfNPriorTest::testModels() { } void COneOfNPriorTest::testModelSelection() { - LOG_DEBUG(<< "+----------------------------------------+"); - LOG_DEBUG(<< "| COneOfNPriorTest::testModelSelection |"); - LOG_DEBUG(<< "+----------------------------------------+"); - test::CRandomNumbers rng; { @@ -545,10 +524,6 @@ void COneOfNPriorTest::testModelSelection() { } void COneOfNPriorTest::testMarginalLikelihood() { - LOG_DEBUG(<< "+--------------------------------------------+"); - LOG_DEBUG(<< "| COneOfNPriorTest::testMarginalLikelihood |"); - LOG_DEBUG(<< "+--------------------------------------------+"); - // Check that the c.d.f. <= 1 at extreme. maths_t::EDataType dataTypes[] = {E_ContinuousData, E_IntegerData}; @@ -572,17 +547,14 @@ void COneOfNPriorTest::testMarginalLikelihood() { rng.generateLogNormalSamples(location, squareScale, 10, samples); filter.addSamples(samples); - maths_t::ESampleWeightStyle weightStyles[] = { - maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight, - maths_t::E_SampleCountWeight}; - double weights[] = {0.1, 1.0, 10.0}; + TWeightFunc weightsFuncs[]{static_cast(maths_t::countWeight), + static_cast(maths_t::winsorisationWeight)}; + double weights[]{0.1, 1.0, 10.0}; - for (std::size_t i = 0u; i < boost::size(weightStyles); ++i) { + for (std::size_t i = 0u; i < boost::size(weightsFuncs); ++i) { for (std::size_t j = 0u; j < boost::size(weights); ++j) { double lb, ub; - filter.minusLogJointCdf( - maths_t::TWeightStyleVec(1, weightStyles[i]), TDouble1Vec(1, 10000.0), - TDouble4Vec1Vec(1, TDouble4Vec(1, weights[j])), lb, ub); + filter.minusLogJointCdf({10000.0}, {weightsFuncs[i](weights[j])}, lb, ub); LOG_DEBUG(<< "-log(c.d.f) = " << (lb + ub) / 2.0); CPPUNIT_ASSERT(lb >= 0.0); CPPUNIT_ASSERT(ub >= 0.0); @@ -651,10 +623,6 @@ void COneOfNPriorTest::testMarginalLikelihood() { } void COneOfNPriorTest::testMarginalLikelihoodMean() { - LOG_DEBUG(<< "+------------------------------------------------+"); - LOG_DEBUG(<< "| COneOfNPriorTest::testMarginalLikelihoodMean |"); - LOG_DEBUG(<< "+------------------------------------------------+"); - // Test that the expectation of the marginal likelihood matches // the expected mean of the marginal likelihood. @@ -762,10 +730,6 @@ void COneOfNPriorTest::testMarginalLikelihoodMean() { } void COneOfNPriorTest::testMarginalLikelihoodMode() { - LOG_DEBUG(<< "+------------------------------------------------+"); - LOG_DEBUG(<< "| COneOfNPriorTest::testMarginalLikelihoodMode |"); - LOG_DEBUG(<< "+------------------------------------------------+"); - // Test that the marginal likelihood mode is near the maximum // of the marginal likelihood. @@ -866,10 +830,6 @@ void COneOfNPriorTest::testMarginalLikelihoodMode() { } void COneOfNPriorTest::testMarginalLikelihoodVariance() { - LOG_DEBUG(<< "+----------------------------------------------------+"); - LOG_DEBUG(<< "| COneOfNPriorTest::testMarginalLikelihoodVariance |"); - LOG_DEBUG(<< "+----------------------------------------------------+"); - // Test that the expectation of the residual from the mean for // the marginal likelihood matches the expected variance of the // marginal likelihood. @@ -990,10 +950,6 @@ void COneOfNPriorTest::testMarginalLikelihoodVariance() { } void COneOfNPriorTest::testSampleMarginalLikelihood() { - LOG_DEBUG(<< "+--------------------------------------------------+"); - LOG_DEBUG(<< "| COneOfNPriorTest::testSampleMarginalLikelihood |"); - LOG_DEBUG(<< "+--------------------------------------------------+"); - // Test we sample the constitute priors in proportion to their weights. const double mean = 5.0; @@ -1067,10 +1023,6 @@ void COneOfNPriorTest::testSampleMarginalLikelihood() { } void COneOfNPriorTest::testCdf() { - LOG_DEBUG(<< "+-----------------------------+"); - LOG_DEBUG(<< "| COneOfNPriorTest::testCdf |"); - LOG_DEBUG(<< "+-----------------------------+"); - // Test error cases and the invariant "cdf" + "cdf complement" = 1 const double mean = 20.0; @@ -1116,10 +1068,6 @@ void COneOfNPriorTest::testCdf() { } void COneOfNPriorTest::testProbabilityOfLessLikelySamples() { - LOG_DEBUG(<< "+--------------------------------------------------------+"); - LOG_DEBUG(<< "| COneOfNPriorTest::testProbabilityOfLessLikelySamples |"); - LOG_DEBUG(<< "+--------------------------------------------------------+"); - // We simply test that the calculation yields the weighted sum // of component model calculations (which is its definition). @@ -1160,9 +1108,8 @@ void COneOfNPriorTest::testProbabilityOfLessLikelySamples() { for (std::size_t j = 0u; j < weights.size(); ++j) { double weight = weights[j]; CPPUNIT_ASSERT(models[j]->probabilityOfLessLikelySamples( - maths_t::E_TwoSided, maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, sample[0]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0)), lb, ub, tail)); + maths_t::E_TwoSided, {sample[0]}, + maths_t::CUnitWeights::SINGLE_UNIT, lb, ub, tail)); CPPUNIT_ASSERT_EQUAL(lb, ub); double modelProbability = (lb + ub) / 2.0; expectedProbability += weight * modelProbability; @@ -1174,51 +1121,61 @@ void COneOfNPriorTest::testProbabilityOfLessLikelySamples() { CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedProbability, probability, 1e-3 * std::max(expectedProbability, probability)); - maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); - for (std::size_t k = 0u; ((i + 1) % 11 == 0) && k < boost::size(vs); ++k) { - double mode = filter.marginalLikelihoodMode(weightStyle, - TDouble4Vec(1, vs[k])); + double mode = filter.marginalLikelihoodMode( + maths_t::countVarianceScaleWeight(vs[k])); double ss[] = {0.9 * mode, 1.1 * mode}; LOG_DEBUG(<< "vs = " << vs[k] << ", mode = " << mode); if (mode > 0.0) { filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(1, ss[0]), - TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, {ss[0]}, + {maths_t::countVarianceScaleWeight(vs[k])}, lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); if (mode > 0.0) { filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_OneSidedBelow, weightStyle, TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_OneSidedBelow, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_OneSidedAbove, weightStyle, TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_OneSidedAbove, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } } if (mode > 0.0) { filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(1, ss[1]), - TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, {ss[1]}, + {maths_t::countVarianceScaleWeight(vs[k])}, lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_OneSidedBelow, weightStyle, TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_OneSidedBelow, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_OneSidedAbove, weightStyle, TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_OneSidedAbove, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } } @@ -1226,10 +1183,6 @@ void COneOfNPriorTest::testProbabilityOfLessLikelySamples() { } void COneOfNPriorTest::testPersist() { - LOG_DEBUG(<< "+---------------------------------+"); - LOG_DEBUG(<< "| COneOfNPriorTest::testPersist |"); - LOG_DEBUG(<< "+---------------------------------+"); - // Check that persist/restore is idempotent. TPriorPtrVec models; @@ -1250,9 +1203,7 @@ void COneOfNPriorTest::testPersist() { maths::COneOfNPrior origFilter(clone(models), E_IntegerData); for (std::size_t i = 0u; i < samples.size(); ++i) { - origFilter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, samples[i]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0))); + origFilter.addSamples({samples[i]}, maths_t::CUnitWeights::SINGLE_UNIT); } double decayRate = origFilter.decayRate(); uint64_t checksum = origFilter.checksum(); diff --git a/lib/maths/unittest/COrderingsTest.cc b/lib/maths/unittest/COrderingsTest.cc index 560c9fa8a2..ed429f3fec 100644 --- a/lib/maths/unittest/COrderingsTest.cc +++ b/lib/maths/unittest/COrderingsTest.cc @@ -59,10 +59,6 @@ void swap(CDictionary& lhs, CDictionary& rhs) { } void COrderingsTest::testOptionalOrdering() { - LOG_DEBUG(<< "+----------------------------------------+"); - LOG_DEBUG(<< "| COrderingsTest::testOptionalOrdering |"); - LOG_DEBUG(<< "+----------------------------------------+"); - TOptionalDouble null; TOptionalDouble one(1.0); TOptionalDouble two(2.0); @@ -98,10 +94,6 @@ void COrderingsTest::testOptionalOrdering() { } void COrderingsTest::testPtrOrdering() { - LOG_DEBUG(<< "+-----------------------------------+"); - LOG_DEBUG(<< "| COrderingsTest::testPtrOrdering |"); - LOG_DEBUG(<< "+-----------------------------------+"); - const double* null = nullptr; double one_(1.0); double two_(2.0); @@ -138,10 +130,6 @@ void COrderingsTest::testPtrOrdering() { } void COrderingsTest::testLess() { - LOG_DEBUG(<< "+----------------------------+"); - LOG_DEBUG(<< "| COrderingsTest::testLess |"); - LOG_DEBUG(<< "+----------------------------+"); - maths::COrderings::SLess less; { @@ -219,10 +207,6 @@ void COrderingsTest::testLess() { } void COrderingsTest::testFirstLess() { - LOG_DEBUG(<< "+---------------------------------+"); - LOG_DEBUG(<< "| COrderingsTest::testFirstLess |"); - LOG_DEBUG(<< "+---------------------------------+"); - maths::COrderings::SFirstLess less; CPPUNIT_ASSERT(less(std::make_pair(1.0, 1.0), std::make_pair(2.0, 1.0))); @@ -258,10 +242,6 @@ void COrderingsTest::testFirstLess() { } void COrderingsTest::testFirstGreater() { - LOG_DEBUG(<< "+------------------------------------+"); - LOG_DEBUG(<< "| COrderingsTest::testFirstGreater |"); - LOG_DEBUG(<< "+------------------------------------+"); - maths::COrderings::SFirstGreater greater; CPPUNIT_ASSERT(!greater(std::make_pair(1.0, 2.0), std::make_pair(2.0, 1.0))); @@ -297,10 +277,6 @@ void COrderingsTest::testFirstGreater() { } void COrderingsTest::testSecondLess() { - LOG_DEBUG(<< "+----------------------------------+"); - LOG_DEBUG(<< "| COrderingsTest::testSecondLess |"); - LOG_DEBUG(<< "+----------------------------------+"); - maths::COrderings::SSecondLess less; CPPUNIT_ASSERT(!less(std::make_pair(1.0, 2.0), std::make_pair(2.0, 1.0))); @@ -340,10 +316,6 @@ void COrderingsTest::testSecondLess() { } void COrderingsTest::testSecondGreater() { - LOG_DEBUG(<< "+-------------------------------------+"); - LOG_DEBUG(<< "| COrderingsTest::testSecondGreater |"); - LOG_DEBUG(<< "+-------------------------------------+"); - maths::COrderings::SSecondGreater greater; CPPUNIT_ASSERT(greater(std::make_pair(1.0, 2.0), std::make_pair(2.0, 1.0))); @@ -383,10 +355,6 @@ void COrderingsTest::testSecondGreater() { } void COrderingsTest::testDereference() { - LOG_DEBUG(<< "+-----------------------------------+"); - LOG_DEBUG(<< "| COrderingsTest::testDereference |"); - LOG_DEBUG(<< "+-----------------------------------+"); - using TDoubleVec = std::vector; using TDoubleVecCItr = std::vector::const_iterator; using TDoubleVecCItrVec = std::vector; @@ -408,10 +376,6 @@ void COrderingsTest::testDereference() { } void COrderingsTest::testLexicographicalCompare() { - LOG_DEBUG(<< "+----------------------------------------------+"); - LOG_DEBUG(<< "| COrderingsTest::testLexicographicalCompare |"); - LOG_DEBUG(<< "+----------------------------------------------+"); - using TDoubleVec = std::vector; using TDoubleDoublePr = std::pair; @@ -558,10 +522,6 @@ void COrderingsTest::testLexicographicalCompare() { } void COrderingsTest::testSimultaneousSort() { - LOG_DEBUG(<< "+----------------------------------------+"); - LOG_DEBUG(<< "| COrderingsTest::testSimultaneousSort |"); - LOG_DEBUG(<< "+----------------------------------------+"); - using TDoubleVec = std::vector; using TDouble1Vec = core::CSmallVector; using TDoubleDoublePr = std::pair; @@ -601,8 +561,7 @@ void COrderingsTest::testSimultaneousSort() { std::string expectedKeys("[0.2, 0.7, 1, 1.1, 5, 7]"); std::string expectedValues1("[~, ;, q, e, y, w]"); - std::string expectedValues2("[(1.3, 1.9), (1.2, 10.1), (2.1, 1.1), " - "(3.2, 12.9), (1.3, 6.2), (2, 1)]"); + std::string expectedValues2("[(1.3, 1.9), (1.2, 10.1), (2.1, 1.1), (3.2, 12.9), (1.3, 6.2), (2, 1)]"); maths::COrderings::simultaneousSort(keys, values1, values2); LOG_DEBUG(<< "keys = " << core::CContainerPrinter::print(keys)); @@ -632,9 +591,7 @@ void COrderingsTest::testSimultaneousSort() { LOG_DEBUG(<< "values3 = " << core::CContainerPrinter::print(values3)); std::string expectedKeys("[0.1, 0.7, 0.9, 1.4, 4, 5.1, 7.1, 80]"); std::string expectedValues1("[23, ;;, ~1, b4, pq, zz, a1, sss]"); - std::string expectedValues2("[(4.1, 1.1), (2.2, 1.1), (5.3, 3.9), " - "(7.2, 22.9), (10.3, 13.2), (0.3, 16.2), " - "(1, 1), (21.2, 11.1)]"); + std::string expectedValues2("[(4.1, 1.1), (2.2, 1.1), (5.3, 3.9), (7.2, 22.9), (10.3, 13.2), (0.3, 16.2), (1, 1), (21.2, 11.1)]"); maths::COrderings::simultaneousSort(keys, values1, values2, values3); LOG_DEBUG(<< "keys = " << core::CContainerPrinter::print(keys)); diff --git a/lib/maths/unittest/COrdinalTest.cc b/lib/maths/unittest/COrdinalTest.cc index 3952e5276d..0efbc463ce 100644 --- a/lib/maths/unittest/COrdinalTest.cc +++ b/lib/maths/unittest/COrdinalTest.cc @@ -35,11 +35,6 @@ std::string precisePrint(T x) { } void COrdinalTest::testEqual() { - LOG_DEBUG(<< ""); - LOG_DEBUG(<< "+---------------------------+"); - LOG_DEBUG(<< "| COrdinalTest::testEqual |"); - LOG_DEBUG(<< "+---------------------------+"); - test::CRandomNumbers rng; for (std::size_t i = 0u; i < 1000; ++i) { @@ -113,11 +108,6 @@ void COrdinalTest::testEqual() { } void COrdinalTest::testLess() { - LOG_DEBUG(<< ""); - LOG_DEBUG(<< "+--------------------------+"); - LOG_DEBUG(<< "| COrdinalTest::testLess |"); - LOG_DEBUG(<< "+--------------------------+"); - test::CRandomNumbers rng; // Test some random orderings on integer types which don't overflow. @@ -205,11 +195,6 @@ void COrdinalTest::testLess() { } void COrdinalTest::testIsNan() { - LOG_DEBUG(<< ""); - LOG_DEBUG(<< "+---------------------------+"); - LOG_DEBUG(<< "| COrdinalTest::testIsNan |"); - LOG_DEBUG(<< "+---------------------------+"); - maths::COrdinal nan; CPPUNIT_ASSERT(nan.isNan()); @@ -250,11 +235,6 @@ void COrdinalTest::testIsNan() { } void COrdinalTest::testAsDouble() { - LOG_DEBUG(<< ""); - LOG_DEBUG(<< "+------------------------------+"); - LOG_DEBUG(<< "| COrdinalTest::testAsDouble |"); - LOG_DEBUG(<< "+------------------------------+"); - // Check that double conversion is as expected. test::CRandomNumbers rng; @@ -287,11 +267,6 @@ void COrdinalTest::testAsDouble() { } void COrdinalTest::testHash() { - LOG_DEBUG(<< ""); - LOG_DEBUG(<< "+--------------------------+"); - LOG_DEBUG(<< "| COrdinalTest::testHash |"); - LOG_DEBUG(<< "+--------------------------+"); - // Test that hashing works over the full range of the distinct types. using TSizeUSet = boost::unordered_set; diff --git a/lib/maths/unittest/CPRNGTest.cc b/lib/maths/unittest/CPRNGTest.cc index 4ebe063231..a9e0c4d3dc 100644 --- a/lib/maths/unittest/CPRNGTest.cc +++ b/lib/maths/unittest/CPRNGTest.cc @@ -20,10 +20,6 @@ using namespace ml; void CPRNGTest::testSplitMix64() { - LOG_DEBUG(<< "+-----------------------------+"); - LOG_DEBUG(<< "| CPRNGTest::testSplitMix64 |"); - LOG_DEBUG(<< "+-----------------------------+"); - maths::CPRNG::CSplitMix64 rng1; boost::uniform_01<> u01; @@ -129,10 +125,6 @@ void CPRNGTest::testSplitMix64() { } void CPRNGTest::testXorOShiro128Plus() { - LOG_DEBUG(<< "+-----------------------------------+"); - LOG_DEBUG(<< "| CPRNGTest::testXorOShiro128Plus |"); - LOG_DEBUG(<< "+-----------------------------------+"); - maths::CPRNG::CXorOShiro128Plus rng1; boost::uniform_01<> u01; @@ -255,10 +247,6 @@ void CPRNGTest::testXorOShiro128Plus() { } void CPRNGTest::testXorShift1024Mult() { - LOG_DEBUG(<< "+-----------------------------------+"); - LOG_DEBUG(<< "| CPRNGTest::testXorShift1024Mult |"); - LOG_DEBUG(<< "+-----------------------------------+"); - maths::CPRNG::CXorShift1024Mult rng1; boost::uniform_01<> u01; diff --git a/lib/maths/unittest/CPackedBitVectorTest.cc b/lib/maths/unittest/CPackedBitVectorTest.cc index a398de0fbb..a644de7968 100644 --- a/lib/maths/unittest/CPackedBitVectorTest.cc +++ b/lib/maths/unittest/CPackedBitVectorTest.cc @@ -43,10 +43,6 @@ std::string toBitString(const TBoolVec& v) { } void CPackedBitVectorTest::testCreation() { - LOG_DEBUG(<< "+--------------------------------------+"); - LOG_DEBUG(<< "| CPackedBitVectorTest::testCreation |"); - LOG_DEBUG(<< "+--------------------------------------+"); - maths::CPackedBitVector test1(3, true); LOG_DEBUG(<< "test1 = " << test1); CPPUNIT_ASSERT_EQUAL(std::size_t(3), test1.dimension()); @@ -117,10 +113,6 @@ void CPackedBitVectorTest::testCreation() { } void CPackedBitVectorTest::testExtend() { - LOG_DEBUG(<< "+------------------------------------+"); - LOG_DEBUG(<< "| CPackedBitVectorTest::testExtend |"); - LOG_DEBUG(<< "+------------------------------------+"); - maths::CPackedBitVector test1; test1.extend(true); LOG_DEBUG(<< "test1 = " << test1); @@ -185,10 +177,6 @@ void CPackedBitVectorTest::testExtend() { } void CPackedBitVectorTest::testContract() { - LOG_DEBUG(<< "+--------------------------------------+"); - LOG_DEBUG(<< "| CPackedBitVectorTest::testContract |"); - LOG_DEBUG(<< "+--------------------------------------+"); - maths::CPackedBitVector test1; test1.extend(true); test1.extend(true); @@ -228,10 +216,6 @@ void CPackedBitVectorTest::testContract() { } void CPackedBitVectorTest::testOperators() { - LOG_DEBUG(<< "+---------------------------------------+"); - LOG_DEBUG(<< "| CPackedBitVectorTest::testOperators |"); - LOG_DEBUG(<< "+---------------------------------------+"); - test::CRandomNumbers rng; TPackedBitVectorVec test; @@ -251,10 +235,6 @@ void CPackedBitVectorTest::testOperators() { } void CPackedBitVectorTest::testInner() { - LOG_DEBUG(<< "+-----------------------------------+"); - LOG_DEBUG(<< "| CPackedBitVectorTest::testInner |"); - LOG_DEBUG(<< "+-----------------------------------+"); - using TVector = maths::CVector; using TVectorVec = std::vector; @@ -317,10 +297,6 @@ void CPackedBitVectorTest::testInner() { } void CPackedBitVectorTest::testBitwiseOr() { - LOG_DEBUG(<< "+---------------------------------------+"); - LOG_DEBUG(<< "| CPackedBitVectorTest::testBitwiseOr |"); - LOG_DEBUG(<< "+---------------------------------------+"); - using TBitSetVec = std::vector>; test::CRandomNumbers rng; @@ -368,10 +344,6 @@ void CPackedBitVectorTest::testBitwiseOr() { } void CPackedBitVectorTest::testPersist() { - LOG_DEBUG(<< "+-------------------------------------+"); - LOG_DEBUG(<< "| CPackedBitVectorTest::testPersist |"); - LOG_DEBUG(<< "+-------------------------------------+"); - bool bits[] = {true, true, false, false, true, false, false, false, true, true}; diff --git a/lib/maths/unittest/CPeriodicityHypothesisTestsTest.cc b/lib/maths/unittest/CPeriodicityHypothesisTestsTest.cc index 5090e6ba1d..ef64bfefcc 100644 --- a/lib/maths/unittest/CPeriodicityHypothesisTestsTest.cc +++ b/lib/maths/unittest/CPeriodicityHypothesisTestsTest.cc @@ -42,10 +42,6 @@ const core_t::TTime WEEK{core::constants::WEEK}; } void CPeriodicityHypothesisTestsTest::testNonPeriodic() { - LOG_DEBUG(<< "+----------------------------------------------------+"); - LOG_DEBUG(<< "| CPeriodicityHypothesisTestsTest::testNonPeriodic |"); - LOG_DEBUG(<< "+----------------------------------------------------+"); - // Test a variety of synthetic non-periodic signals. TTimeVec windows{WEEK, 2 * WEEK, 16 * DAY, 4 * WEEK}; @@ -105,10 +101,6 @@ void CPeriodicityHypothesisTestsTest::testNonPeriodic() { } void CPeriodicityHypothesisTestsTest::testDiurnal() { - LOG_DEBUG(<< "+------------------------------------------------+"); - LOG_DEBUG(<< "| CPeriodicityHypothesisTestsTest::testDiurnal |"); - LOG_DEBUG(<< "+------------------------------------------------+"); - // Test the recall for a variety of synthetic periodic signals // and for a number of real data examples. @@ -316,8 +308,7 @@ void CPeriodicityHypothesisTestsTest::testDiurnal() { if (time > lastTest + window) { maths::CPeriodicityHypothesisTestsResult result{hypotheses.test()}; CPPUNIT_ASSERT(result.print() == "{ 'weekend daily' 'weekday daily' }" || - result.print() == "{ 'weekend daily' 'weekday daily' " - "'weekend weekly' 'weekday weekly' }"); + result.print() == "{ 'weekend daily' 'weekday daily' 'weekend weekly' 'weekday weekly' }"); hypotheses = maths::CPeriodicityHypothesisTests(); hypotheses.initialize(HOUR, window, DAY); lastTest += window; @@ -328,10 +319,6 @@ void CPeriodicityHypothesisTestsTest::testDiurnal() { } void CPeriodicityHypothesisTestsTest::testNonDiurnal() { - LOG_DEBUG(<< "+---------------------------------------------------+"); - LOG_DEBUG(<< "| CPeriodicityHypothesisTestsTest::testNonDiurnal |"); - LOG_DEBUG(<< "+---------------------------------------------------+"); - // Test the recall for periods in the range [DAY / 5, 5 * DAY]. TTimeVec windows{WEEK, 2 * WEEK, 16 * DAY, 4 * WEEK}; @@ -410,10 +397,6 @@ void CPeriodicityHypothesisTestsTest::testNonDiurnal() { } void CPeriodicityHypothesisTestsTest::testWithSparseData() { - LOG_DEBUG(<< "+-----------------------------------------------------------+"); - LOG_DEBUG(<< "| CPeriodicityHypothesisTestsTest::testTestWithSparseData |"); - LOG_DEBUG(<< "+-----------------------------------------------------------+"); - test::CRandomNumbers rng; LOG_DEBUG(<< "Daily Periodic") { @@ -545,10 +528,6 @@ void CPeriodicityHypothesisTestsTest::testWithSparseData() { } void CPeriodicityHypothesisTestsTest::testTestForPeriods() { - LOG_DEBUG(<< "+-------------------------------------------------------+"); - LOG_DEBUG(<< "| CPeriodicityHypothesisTestsTest::testTestForPeriods |"); - LOG_DEBUG(<< "+-------------------------------------------------------+"); - // Test the ability to correctly find and test for periodic // signals without being told the periods to test a-priori. diff --git a/lib/maths/unittest/CPoissonMeanConjugateTest.cc b/lib/maths/unittest/CPoissonMeanConjugateTest.cc index e16f8f2a34..bcdadf39a3 100644 --- a/lib/maths/unittest/CPoissonMeanConjugateTest.cc +++ b/lib/maths/unittest/CPoissonMeanConjugateTest.cc @@ -45,10 +45,6 @@ using CPoissonMeanConjugate = CPriorTestInterfaceMixin(count)))); + filter2.addSamples({x}, {maths_t::countWeight(10.0)}); LOG_DEBUG(<< filter1.print()); LOG_DEBUG(<< "vs"); @@ -121,10 +115,6 @@ void CPoissonMeanConjugateTest::testMultipleUpdate() { } void CPoissonMeanConjugateTest::testPropagation() { - LOG_DEBUG(<< "+----------------------------------------------+"); - LOG_DEBUG(<< "| CPoissonMeanConjugateTest::testPropagation |"); - LOG_DEBUG(<< "+----------------------------------------------+"); - // Test that propagation doesn't affect the expected values // of likelihood mean. @@ -153,10 +143,6 @@ void CPoissonMeanConjugateTest::testPropagation() { } void CPoissonMeanConjugateTest::testMeanEstimation() { - LOG_DEBUG(<< "+-------------------------------------------------+"); - LOG_DEBUG(<< "| CPoissonMeanConjugateTest::testMeanEstimation |"); - LOG_DEBUG(<< "+-------------------------------------------------+"); - // We are going to test that we correctly estimate a distribution // for the mean of the Poisson process by checking that the true // mean of a Poisson process lies in various confidence intervals @@ -214,10 +200,6 @@ void CPoissonMeanConjugateTest::testMeanEstimation() { } void CPoissonMeanConjugateTest::testMarginalLikelihood() { - LOG_DEBUG(<< "+-----------------------------------------------------+"); - LOG_DEBUG(<< "| CPoissonMeanConjugateTest::testMarginalLikelihood |"); - LOG_DEBUG(<< "+-----------------------------------------------------+"); - { // Check that the marginal likelihood and c.d.f. agree for some // test data and that the c.d.f. <= 1. @@ -354,10 +336,6 @@ void CPoissonMeanConjugateTest::testMarginalLikelihood() { } void CPoissonMeanConjugateTest::testMarginalLikelihoodMode() { - LOG_DEBUG(<< "+---------------------------------------------------------+"); - LOG_DEBUG(<< "| CPoissonMeanConjugateTest::testMarginalLikelihoodMode |"); - LOG_DEBUG(<< "+---------------------------------------------------------+"); - // Test that the marginal likelihood mode is what we'd expect // with variances variance scales. @@ -380,27 +358,21 @@ void CPoissonMeanConjugateTest::testMarginalLikelihoodMode() { filter.addSamples(TDouble1Vec(1, static_cast(samples[j]))); } - maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); - TDouble4Vec weight(1, 1.0); + maths_t::TDoubleWeightsAry weight(maths_t::CUnitWeights::UNIT); for (std::size_t j = 0u; j < boost::size(varianceScales); ++j) { double vs = varianceScales[j]; - weight[0] = vs; + maths_t::setCountVarianceScale(vs, weight); double expectedMode = boost::math::mode(poisson); - LOG_DEBUG(<< "marginalLikelihoodMode = " - << filter.marginalLikelihoodMode(weightStyle, weight) + LOG_DEBUG(<< "marginalLikelihoodMode = " << filter.marginalLikelihoodMode(weight) << ", expectedMode = " << expectedMode); - CPPUNIT_ASSERT_DOUBLES_EQUAL( - expectedMode, filter.marginalLikelihoodMode(weightStyle, weight), 1.0); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMode, + filter.marginalLikelihoodMode(weight), 1.0); } } } void CPoissonMeanConjugateTest::testMarginalLikelihoodVariance() { - LOG_DEBUG(<< "+-------------------------------------------------------------+"); - LOG_DEBUG(<< "| CPoissonMeanConjugateTest::testMarginalLikelihoodVariance |"); - LOG_DEBUG(<< "+-------------------------------------------------------------+"); - const double rates[] = {0.1, 5.0, 100.0}; test::CRandomNumbers rng; @@ -443,10 +415,6 @@ void CPoissonMeanConjugateTest::testMarginalLikelihoodVariance() { } void CPoissonMeanConjugateTest::testSampleMarginalLikelihood() { - LOG_DEBUG(<< "+-----------------------------------------------------------+"); - LOG_DEBUG(<< "| CPoissonMeanConjugateTest::testSampleMarginalLikelihood |"); - LOG_DEBUG(<< "+-----------------------------------------------------------+"); - // We're going to test two properties of the sampling: // 1) That the sample mean is equal to the marginal // likelihood mean. @@ -527,10 +495,6 @@ void CPoissonMeanConjugateTest::testSampleMarginalLikelihood() { } void CPoissonMeanConjugateTest::testCdf() { - LOG_DEBUG(<< "+--------------------------------------+"); - LOG_DEBUG(<< "| CPoissonMeanConjugateTest::testCdf |"); - LOG_DEBUG(<< "+--------------------------------------+"); - // Test error cases. // // Test some invariants: @@ -581,10 +545,6 @@ void CPoissonMeanConjugateTest::testCdf() { } void CPoissonMeanConjugateTest::testProbabilityOfLessLikelySamples() { - LOG_DEBUG(<< "+-----------------------------------------------------------------+"); - LOG_DEBUG(<< "| CPoissonMeanConjugateTest::testProbabilityOfLessLikelySamples |"); - LOG_DEBUG(<< "+-----------------------------------------------------------------+"); - // We test that the probability of less likely samples calculation // agrees with the chance of seeing a sample with lower marginal // likelihood, up to the sampling error. @@ -646,11 +606,9 @@ void CPoissonMeanConjugateTest::testProbabilityOfLessLikelySamples() { meanError.add(std::fabs(px - (lb + ub) / 2.0)); } - maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); - for (std::size_t k = 0u; k < boost::size(vs); ++k) { - double mode = filter.marginalLikelihoodMode(weightStyle, - TDouble4Vec(1, vs[k])); + double mode = filter.marginalLikelihoodMode( + maths_t::countVarianceScaleWeight(vs[k])); double ss[] = {0.9 * mode, 1.1 * mode}; LOG_DEBUG(<< "vs = " << vs[k] << ", mode = " << mode); @@ -660,40 +618,52 @@ void CPoissonMeanConjugateTest::testProbabilityOfLessLikelySamples() { if (mode > 0.0) { filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(1, ss[0]), - TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, {ss[0]}, + {maths_t::countVarianceScaleWeight(vs[k])}, lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); if (mode > 0.0) { filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_OneSidedBelow, weightStyle, TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_OneSidedBelow, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_OneSidedAbove, weightStyle, TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_OneSidedAbove, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } } if (mode > 0.0) { filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(1, ss[1]), - TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, {ss[1]}, + {maths_t::countVarianceScaleWeight(vs[k])}, lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_OneSidedBelow, weightStyle, TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_OneSidedBelow, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_OneSidedAbove, weightStyle, TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_OneSidedAbove, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } } @@ -704,10 +674,6 @@ void CPoissonMeanConjugateTest::testProbabilityOfLessLikelySamples() { } void CPoissonMeanConjugateTest::testAnomalyScore() { - LOG_DEBUG(<< "+-----------------------------------------------+"); - LOG_DEBUG(<< "| CPoissonMeanConjugateTest::testAnomalyScore |"); - LOG_DEBUG(<< "+-----------------------------------------------+"); - // This test pushes 500 samples through the filter and adds in // anomalous signals in the bins at 30, 120, 300 and 420 with // magnitude 4, 5, 10 and 15 standard deviations, respectively, @@ -825,10 +791,6 @@ void CPoissonMeanConjugateTest::testAnomalyScore() { } void CPoissonMeanConjugateTest::testOffset() { - LOG_DEBUG(<< "+-----------------------------------------+"); - LOG_DEBUG(<< "| CPoissonMeanConjugateTest::testOffset |"); - LOG_DEBUG(<< "+-----------------------------------------+"); - // The idea of this test is to check that the offset correctly cancels // out a translation applied to a log-normally distributed data set. @@ -889,10 +851,6 @@ void CPoissonMeanConjugateTest::testOffset() { } void CPoissonMeanConjugateTest::testPersist() { - LOG_DEBUG(<< "+------------------------------------------+"); - LOG_DEBUG(<< "| CPoissonMeanConjugateTest::testPersist |"); - LOG_DEBUG(<< "+------------------------------------------+"); - const double rate = 5.0; test::CRandomNumbers rng; @@ -902,9 +860,7 @@ void CPoissonMeanConjugateTest::testPersist() { maths::CPoissonMeanConjugate origFilter(CPoissonMeanConjugate::nonInformativePrior()); for (std::size_t i = 0u; i < samples.size(); ++i) { - origFilter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, samples[i]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0))); + origFilter.addSamples({samples[i]}, maths_t::CUnitWeights::SINGLE_UNIT); } double decayRate = origFilter.decayRate(); uint64_t checksum = origFilter.checksum(); @@ -944,10 +900,6 @@ void CPoissonMeanConjugateTest::testPersist() { } void CPoissonMeanConjugateTest::testNegativeSample() { - LOG_DEBUG(<< "+-------------------------------------------------+"); - LOG_DEBUG(<< "| CPoissonMeanConjugateTest::testNegativeSample |"); - LOG_DEBUG(<< "+-------------------------------------------------+"); - // Test that we recover roughly the same distribution after adjusting // the offset. The idea of this test is to run two priors side by side, // one with a large enough offset that it never needs to adjust the diff --git a/lib/maths/unittest/CPriorTest.cc b/lib/maths/unittest/CPriorTest.cc index b5014760a8..360ca62af3 100644 --- a/lib/maths/unittest/CPriorTest.cc +++ b/lib/maths/unittest/CPriorTest.cc @@ -57,36 +57,28 @@ class CMinusLogLikelihood { public: CMinusLogLikelihood(const maths::CPrior& prior) - : m_Prior(&prior), m_WeightStyle(1, maths_t::E_SampleCountWeight), - m_X(1, 0.0), m_Weight(1, TDoubleVec(1, 1.0)) {} + : m_Prior(&prior), m_X(1, 0.0) {} bool operator()(const double& x, double& result) const { m_X[0] = x; - maths_t::EFloatingPointErrorStatus status = - m_Prior->jointLogMarginalLikelihood(m_WeightStyle, m_X, m_Weight, result); + maths_t::EFloatingPointErrorStatus status = m_Prior->jointLogMarginalLikelihood( + m_X, maths_t::CUnitWeights::SINGLE_UNIT, result); result = -result; return !(status & maths_t::E_FpFailed); } private: const maths::CPrior* m_Prior; - maths_t::TWeightStyleVec m_WeightStyle; mutable TDoubleVec m_X; - TDoubleVecVec m_Weight; }; } void CPriorTest::testExpectation() { - LOG_DEBUG(<< "+-------------------------------+"); - LOG_DEBUG(<< "| CPriorTest::testExpectation |"); - LOG_DEBUG(<< "+-------------------------------+"); - using TMeanVarAccumulator = maths::CBasicStatistics::SSampleMeanVar::TAccumulator; - using CNormalMeanPrecConjugate = CPriorTestInterfaceMixin; test::CRandomNumbers rng; - CNormalMeanPrecConjugate prior( + maths::CNormalMeanPrecConjugate prior( maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData)); TDoubleVec samples; @@ -94,7 +86,8 @@ void CPriorTest::testExpectation() { TMeanVarAccumulator moments; moments.add(samples); - prior.addSamples(samples); + prior.addSamples(samples, maths_t::TDoubleWeightsAry1Vec( + samples.size(), maths_t::CUnitWeights::UNIT)); double trueMean = maths::CBasicStatistics::mean(moments); LOG_DEBUG(<< "true mean = " << trueMean); diff --git a/lib/maths/unittest/CProbabilityAggregatorsTest.cc b/lib/maths/unittest/CProbabilityAggregatorsTest.cc index 934c81c2dd..6802122a35 100644 --- a/lib/maths/unittest/CProbabilityAggregatorsTest.cc +++ b/lib/maths/unittest/CProbabilityAggregatorsTest.cc @@ -137,10 +137,6 @@ class CExpectedLogProbabilityOfMFromNExtremeSamples { } void CProbabilityAggregatorsTest::testJointProbabilityOfLessLikelySamples() { - LOG_DEBUG(<< "+------------------------------------------------------------------------+"); - LOG_DEBUG(<< "| CProbabilityAggregatorsTest::testJointProbabilityOfLessLikelySamples |"); - LOG_DEBUG(<< "+------------------------------------------------------------------------+"); - // Test case that overflows boost incomplete gamma function. { @@ -267,10 +263,6 @@ void CProbabilityAggregatorsTest::testJointProbabilityOfLessLikelySamples() { } void CProbabilityAggregatorsTest::testLogJointProbabilityOfLessLikelySamples() { - LOG_DEBUG(<< "+---------------------------------------------------------------------------+"); - LOG_DEBUG(<< "| CProbabilityAggregatorsTest::testLogJointProbabilityOfLessLikelySamples |"); - LOG_DEBUG(<< "+---------------------------------------------------------------------------+"); - { std::ifstream ifs("testfiles/probabilities"); @@ -378,10 +370,6 @@ void CProbabilityAggregatorsTest::testLogJointProbabilityOfLessLikelySamples() { } void CProbabilityAggregatorsTest::testProbabilityOfExtremeSample() { - LOG_DEBUG(<< "+-------------------------------------------------------------+"); - LOG_DEBUG(<< "| CProbabilityAggregatorsTest::testProbabilityExtremeSample |"); - LOG_DEBUG(<< "+-------------------------------------------------------------+"); - // The idea of this test is to check that the extreme sample // probability is correctly predicted. @@ -448,10 +436,6 @@ void CProbabilityAggregatorsTest::testProbabilityOfExtremeSample() { } void CProbabilityAggregatorsTest::testProbabilityOfMFromNExtremeSamples() { - LOG_DEBUG(<< "+----------------------------------------------------------------------+"); - LOG_DEBUG(<< "| CProbabilityAggregatorsTest::testProbabilityOfMFromNExtremeSamples |"); - LOG_DEBUG(<< "+----------------------------------------------------------------------+"); - // We perform four tests: // 1) A test that the numerical integral is close to the // closed form integral. diff --git a/lib/maths/unittest/CProbabilityCalibratorTest.cc b/lib/maths/unittest/CProbabilityCalibratorTest.cc index 6412dba110..7f9a05d25a 100644 --- a/lib/maths/unittest/CProbabilityCalibratorTest.cc +++ b/lib/maths/unittest/CProbabilityCalibratorTest.cc @@ -24,10 +24,6 @@ using namespace ml; void CProbabilityCalibratorTest::testCalibration() { - LOG_DEBUG(<< "+-----------------------------------------------+"); - LOG_DEBUG(<< "| CProbabilityCalibratorTest::testCalibration |"); - LOG_DEBUG(<< "+-----------------------------------------------+"); - using TDoubleVec = std::vector; using CLogNormalMeanPrecConjugate = CPriorTestInterfaceMixin; diff --git a/lib/maths/unittest/CQDigestTest.cc b/lib/maths/unittest/CQDigestTest.cc index a5fda450a5..cdf04ef8f6 100644 --- a/lib/maths/unittest/CQDigestTest.cc +++ b/lib/maths/unittest/CQDigestTest.cc @@ -31,10 +31,6 @@ using TUInt32UInt64Pr = std::pair; using TUInt32UInt64PrVec = std::vector; void CQDigestTest::testAdd() { - LOG_DEBUG(<< "+-------------------------+"); - LOG_DEBUG(<< "| CQDigestTest::testAdd |"); - LOG_DEBUG(<< "+-------------------------+"); - // We test the space and error bounds on the quantile calculations // for various inputs. @@ -146,10 +142,6 @@ void CQDigestTest::testMerge() { } void CQDigestTest::testCdf() { - LOG_DEBUG(<< "+-------------------------+"); - LOG_DEBUG(<< "| CQDigestTest::testCdf |"); - LOG_DEBUG(<< "+-------------------------+"); - // We check the relationship that c.d.f. is the approximate inverse // of quantile. We also test the quality of the approximation versus // the true c.d.f. of the data. @@ -213,10 +205,6 @@ void CQDigestTest::testCdf() { } void CQDigestTest::testSummary() { - LOG_DEBUG(<< "+-----------------------------+"); - LOG_DEBUG(<< "| CQDigestTest::testSummary |"); - LOG_DEBUG(<< "+-----------------------------+"); - // Check that quantiles of the summary agree with the digest. { CQDigest qDigest(20u); @@ -280,10 +268,6 @@ void CQDigestTest::testSummary() { } void CQDigestTest::testPropagateForwardByTime() { - LOG_DEBUG(<< "+--------------------------------------------+"); - LOG_DEBUG(<< "| CQDigestTest::testPropagateForwardByTime |"); - LOG_DEBUG(<< "+--------------------------------------------+"); - using TMeanAccumlator = CBasicStatistics::SSampleMean::TAccumulator; { @@ -424,10 +408,6 @@ void CQDigestTest::testPropagateForwardByTime() { } void CQDigestTest::testScale() { - LOG_DEBUG(<< "+---------------------------+"); - LOG_DEBUG(<< "| CQDigestTest::testScale |"); - LOG_DEBUG(<< "+---------------------------+"); - { CQDigest qDigest(10u, 1.0); @@ -542,10 +522,6 @@ void CQDigestTest::testScale() { } void CQDigestTest::testPersist() { - LOG_DEBUG(<< "+-----------------------------+"); - LOG_DEBUG(<< "| CQDigestTest::testPersist |"); - LOG_DEBUG(<< "+-----------------------------+"); - // Large n uniform random. CRandomNumbers generator; diff --git a/lib/maths/unittest/CQuantileSketchTest.cc b/lib/maths/unittest/CQuantileSketchTest.cc index 69b96e2d00..f206267f1f 100644 --- a/lib/maths/unittest/CQuantileSketchTest.cc +++ b/lib/maths/unittest/CQuantileSketchTest.cc @@ -72,10 +72,6 @@ void testSketch(maths::CQuantileSketch::EInterpolation interpolation, } void CQuantileSketchTest::testAdd() { - LOG_DEBUG(<< "+--------------------------------+"); - LOG_DEBUG(<< "| CQuantileSketchTest::testAdd |"); - LOG_DEBUG(<< "+--------------------------------+"); - maths::CQuantileSketch sketch(maths::CQuantileSketch::E_Linear, 5); // Test adding a point. @@ -98,10 +94,6 @@ void CQuantileSketchTest::testAdd() { } void CQuantileSketchTest::testReduce() { - LOG_DEBUG(<< "+-----------------------------------+"); - LOG_DEBUG(<< "| CQuantileSketchTest::testReduce |"); - LOG_DEBUG(<< "+-----------------------------------+"); - LOG_DEBUG(<< "*** Linear ***"); { maths::CQuantileSketch sketch(maths::CQuantileSketch::E_Linear, 6); @@ -258,10 +250,6 @@ void CQuantileSketchTest::testReduce() { } void CQuantileSketchTest::testMerge() { - LOG_DEBUG(<< "+----------------------------------+"); - LOG_DEBUG(<< "| CQuantileSketchTest::testMerge |"); - LOG_DEBUG(<< "+----------------------------------+"); - { // Simple merge no reduction. @@ -323,10 +311,6 @@ void CQuantileSketchTest::testMerge() { } void CQuantileSketchTest::testMedian() { - LOG_DEBUG(<< "+-----------------------------------+"); - LOG_DEBUG(<< "| CQuantileSketchTest::testMedian |"); - LOG_DEBUG(<< "+-----------------------------------+"); - LOG_DEBUG(<< "*** Exact ***"); { maths::CQuantileSketch sketch(maths::CQuantileSketch::E_PiecewiseConstant, 10); @@ -386,10 +370,6 @@ void CQuantileSketchTest::testMedian() { } void CQuantileSketchTest::testPropagateForwardByTime() { - LOG_DEBUG(<< "+---------------------------------------------------+"); - LOG_DEBUG(<< "| CQuantileSketchTest::testPropagateForwardByTime |"); - LOG_DEBUG(<< "+---------------------------------------------------+"); - // Check that the count is reduced and the invariants still hold. test::CRandomNumbers rng; @@ -406,10 +386,6 @@ void CQuantileSketchTest::testPropagateForwardByTime() { } void CQuantileSketchTest::testQuantileAccuracy() { - LOG_DEBUG(<< "+---------------------------------------------+"); - LOG_DEBUG(<< "| CQuantileSketchTest::testQuantileAccuracy |"); - LOG_DEBUG(<< "+---------------------------------------------+"); - // Test on a variety of random data sets versus the corresponding // quantile in the raw data. @@ -506,10 +482,6 @@ void CQuantileSketchTest::testQuantileAccuracy() { } void CQuantileSketchTest::testCdf() { - LOG_DEBUG(<< "+--------------------------------+"); - LOG_DEBUG(<< "| CQuantileSketchTest::testCdf |"); - LOG_DEBUG(<< "+--------------------------------+"); - // Test that quantile and c.d.f. are idempotent. test::CRandomNumbers rng; @@ -584,10 +556,6 @@ void CQuantileSketchTest::testCdf() { } void CQuantileSketchTest::testPersist() { - LOG_DEBUG(<< "+------------------------------------+"); - LOG_DEBUG(<< "| CQuantileSketchTest::testPersist |"); - LOG_DEBUG(<< "+------------------------------------+"); - test::CRandomNumbers generator; TDoubleVec samples; generator.generateUniformSamples(0.0, 5000.0, 500u, samples); diff --git a/lib/maths/unittest/CRadialBasisFunctionTest.cc b/lib/maths/unittest/CRadialBasisFunctionTest.cc index fc302848c1..822a40488c 100644 --- a/lib/maths/unittest/CRadialBasisFunctionTest.cc +++ b/lib/maths/unittest/CRadialBasisFunctionTest.cc @@ -56,10 +56,6 @@ class CSquareDerivativeAdaptor { } void CRadialBasisFunctionTest::testDerivative() { - LOG_DEBUG(<< "+--------------------------------------------+"); - LOG_DEBUG(<< "| CRadialBasisFunctionTest::testDerivative |"); - LOG_DEBUG(<< "+--------------------------------------------+"); - const double a = 0.0; const double b = 10.0; const double centres[] = {0.0, 5.0, 10.0}; @@ -111,10 +107,6 @@ void CRadialBasisFunctionTest::testDerivative() { } void CRadialBasisFunctionTest::testMean() { - LOG_DEBUG(<< "+--------------------------------------+"); - LOG_DEBUG(<< "| CRadialBasisFunctionTest::testMean |"); - LOG_DEBUG(<< "+--------------------------------------+"); - const double a = 0.0; const double b = 10.0; const double centres[] = {0.0, 5.0, 10.0}; @@ -174,10 +166,6 @@ void CRadialBasisFunctionTest::testMean() { } void CRadialBasisFunctionTest::testMeanSquareDerivative() { - LOG_DEBUG(<< "+--------------------------------------+"); - LOG_DEBUG(<< "| CRadialBasisFunctionTest::testMean |"); - LOG_DEBUG(<< "+--------------------------------------+"); - const double a = 0.0; const double b = 10.0; const double centres[] = {0.0, 5.0, 10.0}; @@ -238,10 +226,6 @@ void CRadialBasisFunctionTest::testMeanSquareDerivative() { } void CRadialBasisFunctionTest::testProduct() { - LOG_DEBUG(<< "+-----------------------------------------+"); - LOG_DEBUG(<< "| CRadialBasisFunctionTest::testProduct |"); - LOG_DEBUG(<< "+-----------------------------------------+"); - const double a = 0.0; const double b = 10.0; const double centres[] = {0.0, 5.0, 10.0}; diff --git a/lib/maths/unittest/CRandomProjectionClustererTest.cc b/lib/maths/unittest/CRandomProjectionClustererTest.cc index c76e63e679..71f0c632d2 100644 --- a/lib/maths/unittest/CRandomProjectionClustererTest.cc +++ b/lib/maths/unittest/CRandomProjectionClustererTest.cc @@ -83,10 +83,6 @@ class CRandomProjectionClustererForTest } void CRandomProjectionClustererTest::testGenerateProjections() { - LOG_DEBUG(<< "+-----------------------------------------------------------+"); - LOG_DEBUG(<< "| CRandomProjectionClustererTest::testGenerateProjections |"); - LOG_DEBUG(<< "+-----------------------------------------------------------+"); - using TVectorArrayVec = CRandomProjectionClustererForTest<5>::TVectorArrayVec; using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; using TMeanVarAccumulator = maths::CBasicStatistics::SSampleMeanVar::TAccumulator; @@ -165,10 +161,6 @@ void CRandomProjectionClustererTest::testGenerateProjections() { } void CRandomProjectionClustererTest::testClusterProjections() { - LOG_DEBUG(<< "+----------------------------------------------------------+"); - LOG_DEBUG(<< "| CRandomProjectionClustererTest::testClusterProjections |"); - LOG_DEBUG(<< "+----------------------------------------------------------+"); - // Test that we get the cluster weights, means and covariance // matrices, and the sampled points we expect. Note that we // create a trivial to cluster data set since we don't want @@ -267,10 +259,6 @@ void CRandomProjectionClustererTest::testClusterProjections() { } void CRandomProjectionClustererTest::testNeighbourhoods() { - LOG_DEBUG(<< "+------------------------------------------------------+"); - LOG_DEBUG(<< "| CRandomProjectionClustererTest::testNeighbourhoods |"); - LOG_DEBUG(<< "+------------------------------------------------------+"); - // Test that the neighbourhoods for each point agree reasonably // accurately with the points nearest neighbours. The agreement // isn't perfect because we don't store the full points so are @@ -361,10 +349,6 @@ void CRandomProjectionClustererTest::testNeighbourhoods() { } void CRandomProjectionClustererTest::testSimilarities() { - LOG_DEBUG(<< "+----------------------------------------------------+"); - LOG_DEBUG(<< "| CRandomProjectionClustererTest::testSimilarities |"); - LOG_DEBUG(<< "+----------------------------------------------------+"); - test::CRandomNumbers rng; std::size_t dimension = 30u; @@ -448,10 +432,6 @@ void CRandomProjectionClustererTest::testSimilarities() { } void CRandomProjectionClustererTest::testClusterNeighbourhoods() { - LOG_DEBUG(<< "+-------------------------------------------------------------+"); - LOG_DEBUG(<< "| CRandomProjectionClustererTest::testClusterNeighbourhoods |"); - LOG_DEBUG(<< "+-------------------------------------------------------------+"); - // Test we recover the true clusters. test::CRandomNumbers rng; @@ -531,9 +511,6 @@ void CRandomProjectionClustererTest::testClusterNeighbourhoods() { } void CRandomProjectionClustererTest::testAccuracy() { - LOG_DEBUG(<< "+------------------------------------------------+"); - LOG_DEBUG(<< "| CRandomProjectionClustererTest::testAccuracy |"); - LOG_DEBUG(<< "+------------------------------------------------+"); } CppUnit::Test* CRandomProjectionClustererTest::suite() { diff --git a/lib/maths/unittest/CRegressionTest.cc b/lib/maths/unittest/CRegressionTest.cc index dafeec6549..6005c1d970 100644 --- a/lib/maths/unittest/CRegressionTest.cc +++ b/lib/maths/unittest/CRegressionTest.cc @@ -56,10 +56,6 @@ using TDoubleArray4 = boost::array; } void CRegressionTest::testInvariants() { - LOG_DEBUG(<< "+----------------------------------+"); - LOG_DEBUG(<< "| CRegressionTest::testInvariants |"); - LOG_DEBUG(<< "+----------------------------------+"); - // Test at (local) minimum of quadratic residuals. test::CRandomNumbers rng; @@ -119,10 +115,6 @@ void CRegressionTest::testInvariants() { } void CRegressionTest::testFit() { - LOG_DEBUG(<< "+----------------------------+"); - LOG_DEBUG(<< "| CRegressionTest::testFit |"); - LOG_DEBUG(<< "+----------------------------+"); - test::CRandomNumbers rng; std::size_t n = 50; @@ -200,10 +192,6 @@ void CRegressionTest::testFit() { } void CRegressionTest::testShiftAbscissa() { - LOG_DEBUG(<< "+--------------------------------------+"); - LOG_DEBUG(<< "| CRegressionTest::testShiftAbscissa |"); - LOG_DEBUG(<< "+--------------------------------------+"); - // Test shifting the abscissa is equivalent to updating // with shifted X-values. @@ -280,10 +268,6 @@ void CRegressionTest::testShiftAbscissa() { } void CRegressionTest::testShiftOrdinate() { - LOG_DEBUG(<< "+--------------------------------------+"); - LOG_DEBUG(<< "| CRegressionTest::testShiftOrdinate |"); - LOG_DEBUG(<< "+--------------------------------------+"); - // Test that translating the regression by a some delta // produces the desired translation and no change to any // of the derivatives. @@ -312,10 +296,6 @@ void CRegressionTest::testShiftOrdinate() { } void CRegressionTest::testShiftGradient() { - LOG_DEBUG(<< "+--------------------------------------+"); - LOG_DEBUG(<< "| CRegressionTest::testShiftGradient |"); - LOG_DEBUG(<< "+--------------------------------------+"); - // Test that translating the regression by a some delta // produces the desired translation and no change to any // of the derivatives. @@ -384,10 +364,6 @@ void CRegressionTest::testLinearScale() { } void CRegressionTest::testAge() { - LOG_DEBUG(<< "+----------------------------+"); - LOG_DEBUG(<< "| CRegressionTest::testAge |"); - LOG_DEBUG(<< "+----------------------------+"); - // Test that the regression is mean reverting. double intercept = 5.0; @@ -506,10 +482,6 @@ void CRegressionTest::testAge() { } void CRegressionTest::testPrediction() { - LOG_DEBUG(<< "+-----------------------------------+"); - LOG_DEBUG(<< "| CRegressionTest::testPrediction |"); - LOG_DEBUG(<< "+-----------------------------------+"); - // Check we get successive better predictions of a power // series function, i.e. x -> sin(x), using higher order // approximations. @@ -589,10 +561,6 @@ void CRegressionTest::testPrediction() { } void CRegressionTest::testCombination() { - LOG_DEBUG(<< "+------------------------------------+"); - LOG_DEBUG(<< "| CRegressionTest::testCombination |"); - LOG_DEBUG(<< "+------------------------------------+"); - // Test that we can combine regressions on two subsets of // the points to get the same result as the regression on // the full collection of points. @@ -648,10 +616,6 @@ void CRegressionTest::testCombination() { } void CRegressionTest::testSingular() { - LOG_DEBUG(<< "+---------------------------------+"); - LOG_DEBUG(<< "| CRegressionTest::testSingular |"); - LOG_DEBUG(<< "+---------------------------------+"); - // Test that we get the highest order polynomial regression // available for the points added at any time. In particular, // one needs at least n + 1 points to be able to determine @@ -770,10 +734,6 @@ void CRegressionTest::testSingular() { } void CRegressionTest::testScale() { - LOG_DEBUG(<< "+------------------------------+"); - LOG_DEBUG(<< "| CRegressionTest::testScale |"); - LOG_DEBUG(<< "+------------------------------+"); - // Test that scale reduces the count in the regression statistic maths::CRegression::CLeastSquaresOnline<1, double> regression; @@ -824,10 +784,6 @@ class CRegressionPrediction { }; void CRegressionTest::testMean() { - LOG_DEBUG(<< "+-----------------------------+"); - LOG_DEBUG(<< "| CRegressionTest::testMean |"); - LOG_DEBUG(<< "+-----------------------------+"); - // Test that the mean agrees with the numeric integration // of the regression. @@ -871,10 +827,6 @@ void CRegressionTest::testMean() { } void CRegressionTest::testCovariances() { - LOG_DEBUG(<< "+------------------------------------+"); - LOG_DEBUG(<< "| CRegressionTest::testCovariances |"); - LOG_DEBUG(<< "+------------------------------------+"); - // Test the covariance matrix of the regression parameters // agree with the observed sample covariances of independent // fits to a matched model. @@ -951,10 +903,6 @@ void CRegressionTest::testCovariances() { } void CRegressionTest::testParameters() { - LOG_DEBUG(<< "+-----------------------------------+"); - LOG_DEBUG(<< "| CRegressionTest::testParameters |"); - LOG_DEBUG(<< "+-----------------------------------+"); - maths::CRegression::CLeastSquaresOnline<3, double> regression; for (std::size_t i = 0u; i < 20; ++i) { @@ -978,10 +926,6 @@ void CRegressionTest::testParameters() { } void CRegressionTest::testPersist() { - LOG_DEBUG(<< "+--------------------------------+"); - LOG_DEBUG(<< "| CRegressionTest::testPersist |"); - LOG_DEBUG(<< "+--------------------------------+"); - // Test that persistence is idempotent. maths::CRegression::CLeastSquaresOnline<2, double> origRegression; @@ -1022,10 +966,6 @@ void CRegressionTest::testPersist() { } void CRegressionTest::testParameterProcess() { - LOG_DEBUG(<< "+-----------------------------------------+"); - LOG_DEBUG(<< "| CRegressionTest::testParameterProcess |"); - LOG_DEBUG(<< "+-----------------------------------------+"); - // Approximately test the variance predicted by the regression // parameter process is an unbiased estimator. This is done by // simulating an approximation of the process d^2X(t)/dt^2 = W(t), diff --git a/lib/maths/unittest/CSamplingTest.cc b/lib/maths/unittest/CSamplingTest.cc index d5fb6f745b..4a30353cd0 100644 --- a/lib/maths/unittest/CSamplingTest.cc +++ b/lib/maths/unittest/CSamplingTest.cc @@ -114,10 +114,6 @@ double frobenius(const TDoubleVecVec& m) { } void CSamplingTest::testMultinomialSample() { - LOG_DEBUG(<< "+----------------------------------------+"); - LOG_DEBUG(<< "| CSamplingTest::testMultinomialSample |"); - LOG_DEBUG(<< "+----------------------------------------+"); - using TSizeVecDoubleMap = std::map; using TSizeVecDoubleMapCItr = TSizeVecDoubleMap::const_iterator; @@ -158,10 +154,6 @@ void CSamplingTest::testMultinomialSample() { } void CSamplingTest::testMultivariateNormalSample() { - LOG_DEBUG(<< "+-----------------------------------------------+"); - LOG_DEBUG(<< "| CSamplingTest::testMultivariateNormalSample |"); - LOG_DEBUG(<< "+-----------------------------------------------+"); - using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; maths::CSampling::seed(); diff --git a/lib/maths/unittest/CSeasonalComponentAdaptiveBucketingTest.cc b/lib/maths/unittest/CSeasonalComponentAdaptiveBucketingTest.cc index 518fdeebad..5e801bf849 100644 --- a/lib/maths/unittest/CSeasonalComponentAdaptiveBucketingTest.cc +++ b/lib/maths/unittest/CSeasonalComponentAdaptiveBucketingTest.cc @@ -36,10 +36,6 @@ using TMaxAccumulator = maths::CBasicStatistics::SMax::TAccumulator; } void CSeasonalComponentAdaptiveBucketingTest::testInitialize() { - LOG_DEBUG(<< "+-----------------------------------------------------------+"); - LOG_DEBUG(<< "| CSeasonalComponentAdaptiveBucketingTest::testInitialize |"); - LOG_DEBUG(<< "+-----------------------------------------------------------+"); - maths::CDiurnalTime time(0, 1, 101, 100); maths::CSeasonalComponentAdaptiveBucketing bucketing(time); @@ -66,10 +62,6 @@ void CSeasonalComponentAdaptiveBucketingTest::testInitialize() { } void CSeasonalComponentAdaptiveBucketingTest::testSwap() { - LOG_DEBUG(<< "+-----------------------------------------------------+"); - LOG_DEBUG(<< "| CSeasonalComponentAdaptiveBucketingTest::testSwap |"); - LOG_DEBUG(<< "+-----------------------------------------------------+"); - maths::CDiurnalTime time1(0, 0, 100, 100); maths::CSeasonalComponentAdaptiveBucketing bucketing1(time1, 0.05); @@ -106,10 +98,6 @@ void CSeasonalComponentAdaptiveBucketingTest::testSwap() { } void CSeasonalComponentAdaptiveBucketingTest::testRefine() { - LOG_DEBUG(<< "+-------------------------------------------------------+"); - LOG_DEBUG(<< "| CSeasonalComponentAdaptiveBucketingTest::testRefine |"); - LOG_DEBUG(<< "+-------------------------------------------------------+"); - test::CRandomNumbers rng; { @@ -302,10 +290,6 @@ void CSeasonalComponentAdaptiveBucketingTest::testRefine() { } void CSeasonalComponentAdaptiveBucketingTest::testPropagateForwardsByTime() { - LOG_DEBUG(<< "+------------------------------------------------------------------------+"); - LOG_DEBUG(<< "| CSeasonalComponentAdaptiveBucketingTest::testPropagateForwardsByTime |"); - LOG_DEBUG(<< "+------------------------------------------------------------------------+"); - // Check no error is introduced by the aging process to // the bucket values and that the rate at which the total // count is reduced uniformly. @@ -338,10 +322,6 @@ void CSeasonalComponentAdaptiveBucketingTest::testPropagateForwardsByTime() { } void CSeasonalComponentAdaptiveBucketingTest::testMinimumBucketLength() { - LOG_DEBUG(<< "+--------------------------------------------------------------------+"); - LOG_DEBUG(<< "| CSeasonalComponentAdaptiveBucketingTest::testMinimumBucketLength |"); - LOG_DEBUG(<< "+--------------------------------------------------------------------+"); - const double bucketLength = 3600.0; const double function[] = {0.0, 0.0, 10.0, 12.0, 11.0, 16.0, 15.0, 1.0, 0.0, 0.0, 0.0, 0.0}; @@ -406,10 +386,6 @@ void CSeasonalComponentAdaptiveBucketingTest::testMinimumBucketLength() { } void CSeasonalComponentAdaptiveBucketingTest::testUnintialized() { - LOG_DEBUG(<< "+-------------------------------------------------------------+"); - LOG_DEBUG(<< "| CSeasonalComponentAdaptiveBucketingTest::testUnintialized |"); - LOG_DEBUG(<< "+-------------------------------------------------------------+"); - // Check that all the functions work and return the expected // values on an uninitialized bucketing. @@ -454,10 +430,6 @@ void CSeasonalComponentAdaptiveBucketingTest::testUnintialized() { } void CSeasonalComponentAdaptiveBucketingTest::testKnots() { - LOG_DEBUG(<< "+------------------------------------------------------+"); - LOG_DEBUG(<< "| CSeasonalComponentAdaptiveBucketingTest::testKnots |"); - LOG_DEBUG(<< "+------------------------------------------------------+"); - // Check prediction errors in values and variances. test::CRandomNumbers rng; @@ -554,10 +526,6 @@ void CSeasonalComponentAdaptiveBucketingTest::testKnots() { } void CSeasonalComponentAdaptiveBucketingTest::testLongTermTrendKnots() { - LOG_DEBUG(<< "+-------------------------------------------------------------------+"); - LOG_DEBUG(<< "| CSeasonalComponentAdaptiveBucketingTest::testLongTermTrendKnots |"); - LOG_DEBUG(<< "+-------------------------------------------------------------------+"); - // Check prediction errors in values. test::CRandomNumbers rng; @@ -617,10 +585,6 @@ void CSeasonalComponentAdaptiveBucketingTest::testLongTermTrendKnots() { } void CSeasonalComponentAdaptiveBucketingTest::testShiftValue() { - LOG_DEBUG(<< "+-----------------------------------------------------------+"); - LOG_DEBUG(<< "| CSeasonalComponentAdaptiveBucketingTest::testShiftValue |"); - LOG_DEBUG(<< "+-----------------------------------------------------------+"); - // Test that applying a shift translates the predicted values // but doesn't alter the slope or predicted variances. @@ -666,10 +630,6 @@ void CSeasonalComponentAdaptiveBucketingTest::testShiftValue() { } void CSeasonalComponentAdaptiveBucketingTest::testSlope() { - LOG_DEBUG(<< "+------------------------------------------------------+"); - LOG_DEBUG(<< "| CSeasonalComponentAdaptiveBucketingTest::testSlope |"); - LOG_DEBUG(<< "+------------------------------------------------------+"); - // Test that the slope increases by the shift. maths::CDiurnalTime time(0, 0, 86400, 86400); @@ -704,10 +664,6 @@ void CSeasonalComponentAdaptiveBucketingTest::testSlope() { } void CSeasonalComponentAdaptiveBucketingTest::testPersist() { - LOG_DEBUG(<< "+--------------------------------------------------------+"); - LOG_DEBUG(<< "| CSeasonalComponentAdaptiveBucketingTest::testPersist |"); - LOG_DEBUG(<< "+--------------------------------------------------------+"); - // Check that serialization is idempotent. double decayRate = 0.1; @@ -762,10 +718,6 @@ void CSeasonalComponentAdaptiveBucketingTest::testPersist() { } void CSeasonalComponentAdaptiveBucketingTest::testUpgrade() { - LOG_DEBUG(<< "+--------------------------------------------------------+"); - LOG_DEBUG(<< "| CSeasonalComponentAdaptiveBucketingTest::testUpgrade |"); - LOG_DEBUG(<< "+--------------------------------------------------------+"); - // Check we can validly upgrade existing state. double decayRate = 0.1; diff --git a/lib/maths/unittest/CSeasonalComponentTest.cc b/lib/maths/unittest/CSeasonalComponentTest.cc index 63f79730b2..2205e1a57a 100644 --- a/lib/maths/unittest/CSeasonalComponentTest.cc +++ b/lib/maths/unittest/CSeasonalComponentTest.cc @@ -116,10 +116,6 @@ double mean(const TDoubleDoublePr& x) { } void CSeasonalComponentTest::testNoPeriodicity() { - LOG_DEBUG(<< "+---------------------------------------------+"); - LOG_DEBUG(<< "| CSeasonalComponentTest::testNoPeriodicity |"); - LOG_DEBUG(<< "+---------------------------------------------+"); - const core_t::TTime startTime = 1354492800; TTimeDoublePrVec function; @@ -212,10 +208,6 @@ void CSeasonalComponentTest::testNoPeriodicity() { } void CSeasonalComponentTest::testConstantPeriodic() { - LOG_DEBUG(<< "+------------------------------------------------+"); - LOG_DEBUG(<< "| CSeasonalComponentTest::testConstantPeriodic |"); - LOG_DEBUG(<< "+------------------------------------------------+"); - const core_t::TTime startTime = 1354492800; test::CRandomNumbers rng; @@ -432,10 +424,6 @@ void CSeasonalComponentTest::testConstantPeriodic() { } void CSeasonalComponentTest::testTimeVaryingPeriodic() { - LOG_DEBUG(<< "+---------------------------------------------------+"); - LOG_DEBUG(<< "| CSeasonalComponentTest::testTimeVaryingPeriodic |"); - LOG_DEBUG(<< "+---------------------------------------------------+"); - // Test a signal with periodicity which changes slowly // over time. @@ -557,10 +545,6 @@ void CSeasonalComponentTest::testTimeVaryingPeriodic() { } void CSeasonalComponentTest::testVeryLowVariation() { - LOG_DEBUG(<< "+------------------------------------------------+"); - LOG_DEBUG(<< "| CSeasonalComponentTest::testVeryLowVariation |"); - LOG_DEBUG(<< "+------------------------------------------------+"); - // Test we very accurately fit low variation data. const core_t::TTime startTime = 1354492800; @@ -655,10 +639,6 @@ void CSeasonalComponentTest::testVeryLowVariation() { } void CSeasonalComponentTest::testVariance() { - LOG_DEBUG(<< "+----------------------------------------+"); - LOG_DEBUG(<< "| CSeasonalComponentTest::testVariance |"); - LOG_DEBUG(<< "+----------------------------------------+"); - using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; // Check that we estimate a periodic variance. @@ -704,10 +684,6 @@ void CSeasonalComponentTest::testVariance() { } void CSeasonalComponentTest::testPersist() { - LOG_DEBUG(<< "+---------------------------------------+"); - LOG_DEBUG(<< "| CSeasonalComponentTest::testPersist |"); - LOG_DEBUG(<< "+---------------------------------------+"); - // Check that persistence is idempotent. const core_t::TTime startTime = 1354492800; diff --git a/lib/maths/unittest/CSetToolsTest.cc b/lib/maths/unittest/CSetToolsTest.cc index 9c4120ba0f..5d58e20369 100644 --- a/lib/maths/unittest/CSetToolsTest.cc +++ b/lib/maths/unittest/CSetToolsTest.cc @@ -25,10 +25,6 @@ using TDoubleVec = std::vector; using TSizeVec = std::vector; void CSetToolsTest::testInplaceSetDifference() { - LOG_DEBUG(<< "+-------------------------------------------+"); - LOG_DEBUG(<< "| CSetToolsTest::testInplaceSetDifference |"); - LOG_DEBUG(<< "+-------------------------------------------+"); - // Test some edge cases. { LOG_DEBUG(<< "Edge cases"); @@ -108,10 +104,6 @@ void CSetToolsTest::testInplaceSetDifference() { } void CSetToolsTest::testSetSizes() { - LOG_DEBUG(<< "+-------------------------------+"); - LOG_DEBUG(<< "| CSetToolsTest::testSetSizes |"); - LOG_DEBUG(<< "+-------------------------------+"); - { LOG_DEBUG(<< "Edge cases"); @@ -209,10 +201,6 @@ void CSetToolsTest::testSetSizes() { } void CSetToolsTest::testJaccard() { - LOG_DEBUG(<< "+------------------------------+"); - LOG_DEBUG(<< "| CSetToolsTest::testJaccard |"); - LOG_DEBUG(<< "+------------------------------+"); - { LOG_DEBUG(<< "Edge cases"); @@ -263,10 +251,6 @@ void CSetToolsTest::testJaccard() { } void CSetToolsTest::testOverlap() { - LOG_DEBUG(<< "+------------------------------+"); - LOG_DEBUG(<< "| CSetToolsTest::testOverlap |"); - LOG_DEBUG(<< "+------------------------------+"); - { LOG_DEBUG(<< "Edge cases"); diff --git a/lib/maths/unittest/CSignalTest.cc b/lib/maths/unittest/CSignalTest.cc index d0c16fef3c..b49562d695 100644 --- a/lib/maths/unittest/CSignalTest.cc +++ b/lib/maths/unittest/CSignalTest.cc @@ -47,10 +47,6 @@ void bruteForceDft(maths::CSignal::TComplexVec& f, double sign) { } void CSignalTest::testFFTVersusOctave() { - LOG_DEBUG(<< "+------------------------------------+"); - LOG_DEBUG(<< "| CSignalTest::testFFTVersusOctave |"); - LOG_DEBUG(<< "+------------------------------------+"); - // Test versus values calculated using octave fft. double x[][20] = { @@ -147,10 +143,6 @@ void CSignalTest::testFFTVersusOctave() { } void CSignalTest::testIFFTVersusOctave() { - LOG_DEBUG(<< "+-------------------------------------+"); - LOG_DEBUG(<< "| CSignalTest::testIFFTVersusOctave |"); - LOG_DEBUG(<< "+-------------------------------------+"); - // Test versus values calculated using octave ifft. double x[][20] = { @@ -224,10 +216,6 @@ void CSignalTest::testIFFTVersusOctave() { } void CSignalTest::testFFTRandomized() { - LOG_DEBUG(<< "+----------------------------------+"); - LOG_DEBUG(<< "| CSignalTest::testFFTRandomized |"); - LOG_DEBUG(<< "+----------------------------------+"); - // Test on randomized input versus brute force. test::CRandomNumbers rng; @@ -264,10 +252,6 @@ void CSignalTest::testFFTRandomized() { } void CSignalTest::testIFFTRandomized() { - LOG_DEBUG(<< "+-----------------------------------+"); - LOG_DEBUG(<< "| CSignalTest::testIFFTRandomized |"); - LOG_DEBUG(<< "+-----------------------------------+"); - // Test on randomized input versus brute force. test::CRandomNumbers rng; @@ -304,10 +288,6 @@ void CSignalTest::testIFFTRandomized() { } void CSignalTest::testFFTIFFTIdempotency() { - LOG_DEBUG(<< "+---------------------------------------+"); - LOG_DEBUG(<< "| CSignalTest::testFFTIFFTIdempotency |"); - LOG_DEBUG(<< "+---------------------------------------+"); - // Test on randomized input that x = F(F^-1(x)). test::CRandomNumbers rng; @@ -344,10 +324,6 @@ void CSignalTest::testFFTIFFTIdempotency() { } void CSignalTest::testAutocorrelations() { - LOG_DEBUG(<< "+-------------------------------------+"); - LOG_DEBUG(<< "| CSignalTest::testAutocorrelations |"); - LOG_DEBUG(<< "+-------------------------------------+"); - test::CRandomNumbers rng; TSizeVec sizes; diff --git a/lib/maths/unittest/CSolversTest.cc b/lib/maths/unittest/CSolversTest.cc index 110cf8be48..9b2de6a1ad 100644 --- a/lib/maths/unittest/CSolversTest.cc +++ b/lib/maths/unittest/CSolversTest.cc @@ -74,10 +74,6 @@ class CLog { } void CSolversTest::testBracket() { - LOG_DEBUG(<< "+-----------------------------+"); - LOG_DEBUG(<< "| CSolversTest::testBracket |"); - LOG_DEBUG(<< "+-----------------------------+"); - { CCompositeFunctions::CMinusConstant f(CLog(), 0.0); std::size_t maxIterations = 10u; @@ -133,10 +129,6 @@ void CSolversTest::testBracket() { } void CSolversTest::testBisection() { - LOG_DEBUG(<< "+-------------------------------+"); - LOG_DEBUG(<< "| CSolversTest::testBisection |"); - LOG_DEBUG(<< "+-------------------------------+"); - double a, b; double bestGuess; std::size_t iterations; @@ -267,10 +259,6 @@ void CSolversTest::testBisection() { } void CSolversTest::testBrent() { - LOG_DEBUG(<< "+---------------------------+"); - LOG_DEBUG(<< "| CSolversTest::testBrent |"); - LOG_DEBUG(<< "+---------------------------+"); - double a, b; double bestGuess; std::size_t iterations; diff --git a/lib/maths/unittest/CSplineTest.cc b/lib/maths/unittest/CSplineTest.cc index ceed2e0e49..9043428866 100644 --- a/lib/maths/unittest/CSplineTest.cc +++ b/lib/maths/unittest/CSplineTest.cc @@ -49,10 +49,6 @@ std::string print(maths::CSplineTypes::EType type) { } void CSplineTest::testNatural() { - LOG_DEBUG(<< "+----------------------------+"); - LOG_DEBUG(<< "| CSplineTest::testNatural |"); - LOG_DEBUG(<< "+----------------------------+"); - // Test cubic spline with the natural boundary condition, // i.e. the case that the curvature vanishes at the interval // end points. @@ -122,10 +118,6 @@ void CSplineTest::testNatural() { } void CSplineTest::testParabolicRunout() { - LOG_DEBUG(<< "+------------------------------------+"); - LOG_DEBUG(<< "| CSplineTest::testParabolicRunout |"); - LOG_DEBUG(<< "+------------------------------------+"); - { double x_[] = {0.0, 20.0, 21.0, 30.0, 56.0, 100.0, 102.0}; double y_[] = {1.0, 5.0, 4.0, 13.0, 20.0, 12.0, 17.0}; @@ -195,10 +187,6 @@ void CSplineTest::testParabolicRunout() { } void CSplineTest::testPeriodic() { - LOG_DEBUG(<< "+-----------------------------+"); - LOG_DEBUG(<< "| CSplineTest::testPeriodic |"); - LOG_DEBUG(<< "+-----------------------------+"); - { double x_[] = {0.0, 0.1, 0.3, 0.33, 0.5, 0.75, 0.8, 1.0}; TDoubleVec x(boost::begin(x_), boost::end(x_)); @@ -252,10 +240,6 @@ void CSplineTest::testPeriodic() { } void CSplineTest::testMean() { - LOG_DEBUG(<< "+-------------------------+"); - LOG_DEBUG(<< "| CSplineTest::testMean |"); - LOG_DEBUG(<< "+-------------------------+"); - // Test that the mean of the cubic spline agrees with its // (numerical) integral and the expected mean of the cosine // over a whole number of periods. @@ -357,10 +341,6 @@ void CSplineTest::testMean() { } void CSplineTest::testIllposed() { - LOG_DEBUG(<< "+-----------------------------+"); - LOG_DEBUG(<< "| CSplineTest::testIllposed |"); - LOG_DEBUG(<< "+-----------------------------+"); - // Test a case where some of the knot points are colocated. double x_[] = {0.0, 0.0, 10.0, 10.0, 15.0, 15.5, @@ -397,10 +377,6 @@ void CSplineTest::testIllposed() { } void CSplineTest::testSlope() { - LOG_DEBUG(<< "+--------------------------+"); - LOG_DEBUG(<< "| CSplineTest::testSlope |"); - LOG_DEBUG(<< "+--------------------------+"); - // Test that the slope and absolute slope agree with the // numerical derivatives of the value. @@ -510,10 +486,6 @@ void CSplineTest::testSlope() { } void CSplineTest::testSplineReference() { - LOG_DEBUG(<< "+------------------------------------+"); - LOG_DEBUG(<< "| CSplineTest::testSplineReference |"); - LOG_DEBUG(<< "+------------------------------------+"); - using TFloatVec = std::vector; using TFloatVecRef = boost::reference_wrapper; using TDoubleVecRef = boost::reference_wrapper; diff --git a/lib/maths/unittest/CStatisticalTestsTest.cc b/lib/maths/unittest/CStatisticalTestsTest.cc index 78a101a872..d5752f3406 100644 --- a/lib/maths/unittest/CStatisticalTestsTest.cc +++ b/lib/maths/unittest/CStatisticalTestsTest.cc @@ -32,10 +32,6 @@ using namespace ml; using TDoubleVec = std::vector; void CStatisticalTestsTest::testCramerVonMises() { - LOG_DEBUG(<< "+---------------------------------------------+"); - LOG_DEBUG(<< "| CStatisticalTestsTest::testCramerVonMises |"); - LOG_DEBUG(<< "+---------------------------------------------+"); - // These test that the test statistic p value percentiles // are correct if the random variable and the distribution // function are perfectly matched. @@ -121,10 +117,6 @@ void CStatisticalTestsTest::testCramerVonMises() { } void CStatisticalTestsTest::testPersist() { - LOG_DEBUG(<< "+--------------------------------------+"); - LOG_DEBUG(<< "| CStatisticalTestsTest::testPersist |"); - LOG_DEBUG(<< "+--------------------------------------+"); - // Check that serialization is idempotent. { diff --git a/lib/maths/unittest/CTimeSeriesChangeDetectorTest.cc b/lib/maths/unittest/CTimeSeriesChangeDetectorTest.cc index 3b167a441d..d9e926af1f 100644 --- a/lib/maths/unittest/CTimeSeriesChangeDetectorTest.cc +++ b/lib/maths/unittest/CTimeSeriesChangeDetectorTest.cc @@ -82,10 +82,6 @@ TPriorPtr makeResidualModel() { } void CTimeSeriesChangeDetectorTest::testNoChange() { - LOG_DEBUG(<< "+-----------------------------------------------+"); - LOG_DEBUG(<< "| CTimeSeriesChangeDetectorTest::testNoChange |"); - LOG_DEBUG(<< "+-----------------------------------------------+"); - test::CRandomNumbers rng; TDoubleVec variances{1.0, 10.0, 20.0, 30.0, 100.0, 1000.0}; @@ -118,8 +114,7 @@ void CTimeSeriesChangeDetectorTest::testNoChange() { auto addSampleToModel = [&trendModel, &residualModel](core_t::TTime time, double x) { trendModel->addPoint(time, x); double detrended{trendModel->detrend(time, x, 0.0)}; - residualModel->addSamples(maths::CConstantWeights::COUNT, - {detrended}, {{1.0}}); + residualModel->addSamples({detrended}, maths_t::CUnitWeights::SINGLE_UNIT); residualModel->propagateForwardsByTime(1.0); }; @@ -134,8 +129,7 @@ void CTimeSeriesChangeDetectorTest::testNoChange() { 24 * core::constants::HOUR, 14.0}; for (std::size_t i = 950u; i < samples.size(); ++i) { addSampleToModel(time, samples[i]); - detector.addSamples(maths::CConstantWeights::COUNT, - {{time, samples[i]}}, {{1.0}}); + detector.addSamples({{time, samples[i]}}, maths_t::CUnitWeights::SINGLE_UNIT); if (detector.stopTesting()) { break; } @@ -148,42 +142,26 @@ void CTimeSeriesChangeDetectorTest::testNoChange() { } void CTimeSeriesChangeDetectorTest::testLevelShift() { - LOG_DEBUG(<< "+-------------------------------------------------+"); - LOG_DEBUG(<< "| CTimeSeriesChangeDetectorTest::testLevelShift |"); - LOG_DEBUG(<< "+-------------------------------------------------+"); - TGeneratorVec trends{constant, ramp, smoothDaily, weekends, spikeyDaily}; - this->testChange( trends, maths::SChangeDescription::E_LevelShift, [](TGenerator trend, core_t::TTime time) { return trend(time) + 0.5; }, 5.0, 15.0); } void CTimeSeriesChangeDetectorTest::testLinearScale() { - LOG_DEBUG(<< "+--------------------------------------------------+"); - LOG_DEBUG(<< "| CTimeSeriesChangeDetectorTest::testLinearScale |"); - LOG_DEBUG(<< "+--------------------------------------------------+"); - TGeneratorVec trends{smoothDaily, spikeyDaily}; - this->testChange( trends, maths::SChangeDescription::E_LinearScale, [](TGenerator trend, core_t::TTime time) { return 3.0 * trend(time); }, 3.0, 15.0); } void CTimeSeriesChangeDetectorTest::testTimeShift() { - LOG_DEBUG(<< "+------------------------------------------------+"); - LOG_DEBUG(<< "| CTimeSeriesChangeDetectorTest::testTimeShift |"); - LOG_DEBUG(<< "+------------------------------------------------+"); - TGeneratorVec trends{smoothDaily, spikeyDaily}; - this->testChange(trends, maths::SChangeDescription::E_TimeShift, [](TGenerator trend, core_t::TTime time) { return trend(time - core::constants::HOUR); }, -static_cast(core::constants::HOUR), 24.0); - this->testChange(trends, maths::SChangeDescription::E_TimeShift, [](TGenerator trend, core_t::TTime time) { return trend(time + core::constants::HOUR); @@ -192,10 +170,6 @@ void CTimeSeriesChangeDetectorTest::testTimeShift() { } void CTimeSeriesChangeDetectorTest::testPersist() { - LOG_DEBUG(<< "+----------------------------------------------+"); - LOG_DEBUG(<< "| CTimeSeriesChangeDetectorTest::testPersist |"); - LOG_DEBUG(<< "+----------------------------------------------+"); - test::CRandomNumbers rng; TDoubleVec samples; @@ -207,8 +181,7 @@ void CTimeSeriesChangeDetectorTest::testPersist() { auto addSampleToModel = [&trendModel, &residualModel](core_t::TTime time, double x) { trendModel->addPoint(time, x); double detrended{trendModel->detrend(time, x, 0.0)}; - residualModel->addSamples(maths::CConstantWeights::COUNT, {detrended}, - maths::CConstantWeights::SINGLE_UNIT); + residualModel->addSamples({detrended}, maths_t::CUnitWeights::SINGLE_UNIT); residualModel->propagateForwardsByTime(1.0); }; @@ -302,10 +275,9 @@ void CTimeSeriesChangeDetectorTest::testChange(const TGeneratorVec& trends, auto addSampleToModel = [&trendModel, &residualModel]( core_t::TTime time, double x, double weight) { - trendModel->addPoint(time, x, maths::CConstantWeights::COUNT, {weight}); + trendModel->addPoint(time, x, maths_t::countWeight(weight)); double detrended{trendModel->detrend(time, x, 0.0)}; - residualModel->addSamples(maths::CConstantWeights::COUNT, - {detrended}, {{weight}}); + residualModel->addSamples({detrended}, {maths_t::countWeight(weight)}); residualModel->propagateForwardsByTime(1.0); }; @@ -325,7 +297,7 @@ void CTimeSeriesChangeDetectorTest::testChange(const TGeneratorVec& trends, double x{10.0 * applyChange(trends[t % trends.size()], time) + samples[i]}; addSampleToModel(time, x, 0.5); - detector.addSamples(maths::CConstantWeights::COUNT, {{time, x}}, {{1.0}}); + detector.addSamples({{time, x}}, maths_t::CUnitWeights::SINGLE_UNIT); auto change = detector.change(); if (change) { diff --git a/lib/maths/unittest/CTimeSeriesDecompositionTest.cc b/lib/maths/unittest/CTimeSeriesDecompositionTest.cc index a538d965b8..db35533dd7 100644 --- a/lib/maths/unittest/CTimeSeriesDecompositionTest.cc +++ b/lib/maths/unittest/CTimeSeriesDecompositionTest.cc @@ -57,10 +57,6 @@ const core_t::TTime YEAR = core::constants::YEAR; } void CTimeSeriesDecompositionTest::testSuperpositionOfSines() { - LOG_DEBUG(<< "+----------------------------------------------------------+"); - LOG_DEBUG(<< "| CTimeSeriesDecompositionTest::testSuperpositionOfSines |"); - LOG_DEBUG(<< "+----------------------------------------------------------+"); - TTimeVec times; TDoubleVec trend; for (core_t::TTime time = 0; time < 100 * WEEK + 1; time += HALF_HOUR) { @@ -160,10 +156,6 @@ void CTimeSeriesDecompositionTest::testSuperpositionOfSines() { } void CTimeSeriesDecompositionTest::testDistortedPeriodic() { - LOG_DEBUG(<< "+-------------------------------------------------------+"); - LOG_DEBUG(<< "| CTimeSeriesDecompositionTest::testDistortedPeriodic |"); - LOG_DEBUG(<< "+-------------------------------------------------------+"); - const core_t::TTime bucketLength = HOUR; const core_t::TTime startTime = 0; const TDoubleVec timeseries{ @@ -346,10 +338,6 @@ void CTimeSeriesDecompositionTest::testDistortedPeriodic() { } void CTimeSeriesDecompositionTest::testMinimizeLongComponents() { - LOG_DEBUG(<< "+------------------------------------------------------------+"); - LOG_DEBUG(<< "| CTimeSeriesDecompositionTest::testMinimizeLongComponents |"); - LOG_DEBUG(<< "+------------------------------------------------------------+"); - double weights[] = {1.0, 0.1, 1.0, 1.0, 0.1, 1.0, 1.0}; TTimeVec times; @@ -467,10 +455,6 @@ void CTimeSeriesDecompositionTest::testMinimizeLongComponents() { } void CTimeSeriesDecompositionTest::testWeekend() { - LOG_DEBUG(<< "+---------------------------------------------+"); - LOG_DEBUG(<< "| CTimeSeriesDecompositionTest::testWeekend |"); - LOG_DEBUG(<< "+---------------------------------------------+"); - double weights[] = {0.1, 0.1, 1.0, 1.0, 1.0, 1.0, 1.0}; TTimeVec times; @@ -571,10 +555,6 @@ void CTimeSeriesDecompositionTest::testWeekend() { } void CTimeSeriesDecompositionTest::testSinglePeriodicity() { - LOG_DEBUG(<< "+-------------------------------------------------------+"); - LOG_DEBUG(<< "| CTimeSeriesDecompositionTest::testSinglePeriodicity |"); - LOG_DEBUG(<< "+-------------------------------------------------------+"); - TTimeVec times; TDoubleVec trend; for (core_t::TTime time = 0; time < 10 * WEEK + 1; time += HALF_HOUR) { @@ -686,10 +666,6 @@ void CTimeSeriesDecompositionTest::testSinglePeriodicity() { } void CTimeSeriesDecompositionTest::testSeasonalOnset() { - LOG_DEBUG(<< "+---------------------------------------------------+"); - LOG_DEBUG(<< "| CTimeSeriesDecompositionTest::testSeasonalOnset |"); - LOG_DEBUG(<< "+---------------------------------------------------+"); - const double daily[] = {0.0, 0.0, 0.0, 0.0, 5.0, 5.0, 5.0, 40.0, 40.0, 40.0, 30.0, 30.0, 35.0, 35.0, 40.0, 50.0, 60.0, 80.0, 80.0, 10.0, 5.0, 0.0, 0.0, 0.0}; @@ -804,10 +780,6 @@ void CTimeSeriesDecompositionTest::testSeasonalOnset() { } void CTimeSeriesDecompositionTest::testVarianceScale() { - LOG_DEBUG(<< "+---------------------------------------------------+"); - LOG_DEBUG(<< "| CTimeSeriesDecompositionTest::testVarianceScale |"); - LOG_DEBUG(<< "+---------------------------------------------------+"); - // Test that variance scales are correctly computed. test::CRandomNumbers rng; @@ -953,10 +925,6 @@ void CTimeSeriesDecompositionTest::testVarianceScale() { } void CTimeSeriesDecompositionTest::testSpikeyDataProblemCase() { - LOG_DEBUG(<< "+-----------------------------------------------------------+"); - LOG_DEBUG(<< "| CTimeSeriesDecompositionTest::testSpikeyDataProblemCase |"); - LOG_DEBUG(<< "+-----------------------------------------------------------+"); - TTimeDoublePrVec timeseries; core_t::TTime startTime; core_t::TTime endTime; @@ -1032,9 +1000,8 @@ void CTimeSeriesDecompositionTest::testSpikeyDataProblemCase() { if (decomposition.addPoint(time, value)) { model.setToNonInformative(0.0, 0.01); } - model.addSamples(maths_t::TWeightStyleVec{maths_t::E_SampleCountWeight}, - TDoubleVec{decomposition.detrend(time, value, 70.0)}, - TDoubleVecVec{TDoubleVec(1, 1.0)}); + model.addSamples({decomposition.detrend(time, value, 70.0)}, + maths_t::CUnitWeights::SINGLE_UNIT); } LOG_DEBUG(<< "total 'sum residual' / 'sum value' = " << totalSumResidual / totalSumValue); @@ -1063,11 +1030,9 @@ void CTimeSeriesDecompositionTest::testSpikeyDataProblemCase() { double lb, ub; maths_t::ETail tail; model.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, - maths_t::TWeightStyleVec{maths_t::E_SampleSeasonalVarianceScaleWeight}, - TDoubleVec{decomposition.detrend(time, value, 70.0)}, - TDoubleVecVec{TDoubleVec{ - std::max(decomposition.scale(time, variance, 70.0).second, 0.25)}}, + maths_t::E_TwoSided, {decomposition.detrend(time, value, 70.0)}, + {maths_t::seasonalVarianceScaleWeight( + std::max(decomposition.scale(time, variance, 70.0).second, 0.25))}, lb, ub, tail); double pScaled = (lb + ub) / 2.0; pMinScaled = std::min(pMinScaled, pScaled); @@ -1079,10 +1044,8 @@ void CTimeSeriesDecompositionTest::testSpikeyDataProblemCase() { //probs.push_back(-std::log(pScaled)); model.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, - maths_t::TWeightStyleVec(1, maths_t::E_SampleSeasonalVarianceScaleWeight), - TDoubleVec(1, decomposition.detrend(time, value, 70.0)), - TDoubleVecVec(1, TDoubleVec(1, 1.0)), lb, ub, tail); + maths_t::E_TwoSided, {decomposition.detrend(time, value, 70.0)}, + maths_t::CUnitWeights::SINGLE_UNIT, lb, ub, tail); double pUnscaled = (lb + ub) / 2.0; pMinUnscaled = std::min(pMinUnscaled, pUnscaled); } @@ -1103,10 +1066,6 @@ void CTimeSeriesDecompositionTest::testSpikeyDataProblemCase() { } void CTimeSeriesDecompositionTest::testDiurnalProblemCase() { - LOG_DEBUG(<< "+--------------------------------------------------------+"); - LOG_DEBUG(<< "| CTimeSeriesDecompositionTest::testDiurnalProblemCase |"); - LOG_DEBUG(<< "+--------------------------------------------------------+"); - TTimeDoublePrVec timeseries; core_t::TTime startTime; core_t::TTime endTime; @@ -1217,10 +1176,6 @@ void CTimeSeriesDecompositionTest::testDiurnalProblemCase() { } void CTimeSeriesDecompositionTest::testComplexDiurnalProblemCase() { - LOG_DEBUG(<< "+---------------------------------------------------------------+"); - LOG_DEBUG(<< "| CTimeSeriesDecompositionTest::testComplexDiurnalProblemCase |"); - LOG_DEBUG(<< "+---------------------------------------------------------------+"); - TTimeDoublePrVec timeseries; core_t::TTime startTime; core_t::TTime endTime; @@ -1325,10 +1280,6 @@ void CTimeSeriesDecompositionTest::testComplexDiurnalProblemCase() { } void CTimeSeriesDecompositionTest::testDiurnalPeriodicityWithMissingValues() { - LOG_DEBUG(<< "+-------------------------------------------------------------------------+"); - LOG_DEBUG(<< "| CTimeSeriesDecompositionTest::testDiurnalPeriodicityWithMissingValues |"); - LOG_DEBUG(<< "+-------------------------------------------------------------------------+"); - test::CRandomNumbers rng; LOG_DEBUG(<< "Daily Periodic") { @@ -1437,10 +1388,6 @@ void CTimeSeriesDecompositionTest::testDiurnalPeriodicityWithMissingValues() { } void CTimeSeriesDecompositionTest::testLongTermTrend() { - LOG_DEBUG(<< "+---------------------------------------------------+"); - LOG_DEBUG(<< "| CTimeSeriesDecompositionTest::testLongTermTrend |"); - LOG_DEBUG(<< "+---------------------------------------------------+"); - const core_t::TTime length = 120 * DAY; TTimeVec times; @@ -1455,7 +1402,8 @@ void CTimeSeriesDecompositionTest::testLongTermTrend() { //TDoubleVec f; //TDoubleVec values; - LOG_DEBUG(<< "Linear Ramp") { + LOG_DEBUG(<< "Linear Ramp"); + { for (core_t::TTime time = 0; time < length; time += HALF_HOUR) { times.push_back(time); trend.push_back(5.0 + static_cast(time) / static_cast(DAY)); @@ -1532,7 +1480,7 @@ void CTimeSeriesDecompositionTest::testLongTermTrend() { trend.clear(); { - std::size_t i = 0u; + std::size_t i = 1; for (core_t::TTime time = 0; time < length; time += HALF_HOUR, (time > drops[i] ? ++i : i)) { times.push_back(time); @@ -1602,10 +1550,6 @@ void CTimeSeriesDecompositionTest::testLongTermTrend() { } void CTimeSeriesDecompositionTest::testLongTermTrendAndPeriodicity() { - LOG_DEBUG(<< "+-----------------------------------------------------------------+"); - LOG_DEBUG(<< "| CTimeSeriesDecompositionTest::testLongTermTrendAndPeriodicity |"); - LOG_DEBUG(<< "+-----------------------------------------------------------------+"); - // Test long term mean reverting component plus daily periodic component. TTimeVec times; @@ -1693,10 +1637,6 @@ void CTimeSeriesDecompositionTest::testLongTermTrendAndPeriodicity() { } void CTimeSeriesDecompositionTest::testNonDiurnal() { - LOG_DEBUG(<< "+------------------------------------------------+"); - LOG_DEBUG(<< "| CTimeSeriesDecompositionTest::testNonDiurnal |"); - LOG_DEBUG(<< "+------------------------------------------------+"); - test::CRandomNumbers rng; LOG_DEBUG(<< "Hourly") { @@ -1876,10 +1816,6 @@ void CTimeSeriesDecompositionTest::testNonDiurnal() { } void CTimeSeriesDecompositionTest::testYearly() { - LOG_DEBUG(<< "+--------------------------------------------+"); - LOG_DEBUG(<< "| CTimeSeriesDecompositionTest::testYearly |"); - LOG_DEBUG(<< "+--------------------------------------------+"); - using TDouble1Vec = core::CSmallVector; test::CRandomNumbers rng; @@ -1944,10 +1880,6 @@ void CTimeSeriesDecompositionTest::testYearly() { } void CTimeSeriesDecompositionTest::testCalendar() { - LOG_DEBUG(<< "+----------------------------------------------+"); - LOG_DEBUG(<< "| CTimeSeriesDecompositionTest::testCalendar |"); - LOG_DEBUG(<< "+----------------------------------------------+"); - // Test that we significantly reduce the error on the last Friday of each // month after estimating the appropriate component. @@ -2027,10 +1959,6 @@ void CTimeSeriesDecompositionTest::testCalendar() { } void CTimeSeriesDecompositionTest::testConditionOfTrend() { - LOG_DEBUG(<< "+------------------------------------------------------+"); - LOG_DEBUG(<< "| CTimeSeriesDecompositionTest::testConditionOfTrend |"); - LOG_DEBUG(<< "+------------------------------------------------------+"); - auto trend = [](core_t::TTime time) { return std::pow(static_cast(time) / static_cast(WEEK), 2.0); }; @@ -2051,10 +1979,6 @@ void CTimeSeriesDecompositionTest::testConditionOfTrend() { } void CTimeSeriesDecompositionTest::testSwap() { - LOG_DEBUG(<< "+------------------------------------------+"); - LOG_DEBUG(<< "| CTimeSeriesDecompositionTest::testSwap |"); - LOG_DEBUG(<< "+------------------------------------------+"); - const double decayRate = 0.01; const core_t::TTime bucketLength = HALF_HOUR; @@ -2094,10 +2018,6 @@ void CTimeSeriesDecompositionTest::testSwap() { } void CTimeSeriesDecompositionTest::testPersist() { - LOG_DEBUG(<< "+---------------------------------------------+"); - LOG_DEBUG(<< "| CTimeSeriesDecompositionTest::testPersist |"); - LOG_DEBUG(<< "+---------------------------------------------+"); - // Check that serialization is idempotent. const double decayRate = 0.01; const core_t::TTime bucketLength = HALF_HOUR; @@ -2151,10 +2071,6 @@ void CTimeSeriesDecompositionTest::testPersist() { } void CTimeSeriesDecompositionTest::testUpgrade() { - LOG_DEBUG(<< "+---------------------------------------------+"); - LOG_DEBUG(<< "| CTimeSeriesDecompositionTest::testUpgrade |"); - LOG_DEBUG(<< "+---------------------------------------------+"); - // Check we can validly upgrade existing state. using TStrVec = std::vector; diff --git a/lib/maths/unittest/CTimeSeriesModelTest.cc b/lib/maths/unittest/CTimeSeriesModelTest.cc index 1eef7210c6..e3a2837edf 100644 --- a/lib/maths/unittest/CTimeSeriesModelTest.cc +++ b/lib/maths/unittest/CTimeSeriesModelTest.cc @@ -40,10 +40,12 @@ using namespace handy_typedefs; using TBool2Vec = core::CSmallVector; using TDoubleVec = std::vector; using TDoubleVecVec = std::vector; +using TDoubleWeightsAry1Vec = maths_t::TDoubleWeightsAry1Vec; using TDouble2Vec = core::CSmallVector; using TDouble2Vec1Vec = core::CSmallVector; -using TDouble2Vec4Vec = core::CSmallVector; -using TDouble2Vec4VecVec = std::vector; +using TDouble2VecWeightsAry = maths_t::TDouble2VecWeightsAry; +using TDouble2VecWeightsAryVec = std::vector; +using TDouble10VecWeightsAry1Vec = maths_t::TDouble10VecWeightsAry1Vec; using TSize1Vec = core::CSmallVector; using TTime2Vec = core::CSmallVector; using TTime2Vec1Vec = core::CSmallVector; @@ -55,6 +57,7 @@ using TMeanAccumulator2Vec = core::CSmallVector; using TDecompositionPtr = std::shared_ptr; using TDecompositionPtr10Vec = core::CSmallVector; using TDecayRateController2Ary = maths::CUnivariateTimeSeriesModel::TDecayRateController2Ary; +using TSetWeightsFunc = void (*)(double, std::size_t, TDouble2VecWeightsAry&); const double MINIMUM_SEASONAL_SCALE{0.25}; const double MINIMUM_SIGNIFICANT_CORRELATION{0.4}; @@ -99,28 +102,22 @@ maths::CModelParams modelParams(core_t::TTime bucketLength) { core::constants::DAY}; } -maths::CModelAddSamplesParams addSampleParams(double interval, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4VecVec& weights) { +maths::CModelAddSamplesParams +addSampleParams(double interval, const TDouble2VecWeightsAryVec& weights) { maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(interval) - .weightStyles(weightStyles) - .trendWeights(weights) - .priorWeights(weights); + params.integer(false).propagationInterval(interval).trendWeights(weights).priorWeights(weights); return params; } -maths::CModelAddSamplesParams addSampleParams(const TDouble2Vec4VecVec& weights) { - return addSampleParams(1.0, maths::CConstantWeights::COUNT, weights); +maths::CModelAddSamplesParams addSampleParams(const TDouble2VecWeightsAryVec& weights) { + return addSampleParams(1.0, weights); } -maths::CModelProbabilityParams computeProbabilityParams(const TDouble2Vec4Vec& weight) { +maths::CModelProbabilityParams computeProbabilityParams(const TDouble2VecWeightsAry& weight) { maths::CModelProbabilityParams params; params.addCalculation(maths_t::E_TwoSided) .seasonalConfidenceInterval(50.0) .addBucketEmpty({false}) - .weightStyles(maths::CConstantWeights::COUNT) .addWeights(weight); return params; } @@ -180,8 +177,8 @@ void reinitializePrior(double learnRate, for (std::size_t i = 0u; i < value.second.size(); ++i) { detrended_[0][i] = trends[i]->detrend(value.first, value.second[i], 0.0); } - prior.addSamples(maths::CConstantWeights::COUNT, detrended_, - {{TDouble10Vec(value.second.size(), learnRate)}}); + prior.addSamples(detrended_, + {maths_t::countWeight(learnRate, value.second.size())}); } if (controllers) { for (auto& trend : trends) { @@ -195,10 +192,6 @@ void reinitializePrior(double learnRate, } void CTimeSeriesModelTest::testClone() { - LOG_DEBUG(<< "+-----------------------------------+"); - LOG_DEBUG(<< "| CTimeSeriesModelTest::testClone |"); - LOG_DEBUG(<< "+-----------------------------------+"); - // Test all the state is cloned. core_t::TTime bucketLength{600}; @@ -215,7 +208,7 @@ void CTimeSeriesModelTest::testClone() { TDoubleVec samples; rng.generateNormalSamples(1.0, 4.0, 1000, samples); - TDouble2Vec4VecVec weights{{{1.0}}}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(1)}; core_t::TTime time{0}; for (auto sample : samples) { model.addSamples(addSampleParams(weights), @@ -241,7 +234,7 @@ void CTimeSeriesModelTest::testClone() { TDoubleVecVec samples; rng.generateMultivariateNormalSamples(mean, covariance, 1000, samples); - TDouble2Vec4VecVec weights{maths::CConstantWeights::unit(3)}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(3)}; core_t::TTime time{0}; for (const auto& sample : samples) { model.addSamples(addSampleParams(weights), @@ -261,10 +254,6 @@ void CTimeSeriesModelTest::testClone() { } void CTimeSeriesModelTest::testMode() { - LOG_DEBUG(<< "+----------------------------------+"); - LOG_DEBUG(<< "| CTimeSeriesModelTest::testMode |"); - LOG_DEBUG(<< "+----------------------------------+"); - // Test that we get the modes we expect based versus updating the trend(s) // and prior directly. @@ -285,14 +274,12 @@ void CTimeSeriesModelTest::testMode() { for (auto sample : samples) { trend.addPoint(time, sample); TDouble1Vec sample_{trend.detrend(time, sample, 0.0)}; - prior.addSamples(maths::CConstantWeights::COUNT, sample_, - maths::CConstantWeights::SINGLE_UNIT); + prior.addSamples(sample_, maths_t::CUnitWeights::SINGLE_UNIT); prior.propagateForwardsByTime(1.0); time += bucketLength; } - TDouble2Vec4Vec weight{{1.0}}; - TDouble2Vec4VecVec weights{weight}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(1)}; time = 0; for (auto sample : samples) { model.addSamples(addSampleParams(weights), @@ -301,7 +288,7 @@ void CTimeSeriesModelTest::testMode() { } double expectedMode{maths::CBasicStatistics::mean(trend.value(time)) + prior.marginalLikelihoodMode()}; - TDouble2Vec mode(model.mode(time, maths::CConstantWeights::COUNT, weight)); + TDouble2Vec mode(model.mode(time, maths_t::CUnitWeights::unit(1))); LOG_DEBUG(<< "expected mode = " << expectedMode); LOG_DEBUG(<< "mode = " << mode[0]); @@ -314,7 +301,6 @@ void CTimeSeriesModelTest::testMode() { TDoubleVec samples; rng.generateNormalSamples(1.0, 4.0, 1000, samples); - double learnRate{modelParams(bucketLength).learnRate()}; maths::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; maths::CNormalMeanPrecConjugate prior{univariateNormal()}; maths::CUnivariateTimeSeriesModel model{modelParams(bucketLength), 0, trend, prior}; @@ -326,8 +312,7 @@ void CTimeSeriesModelTest::testMode() { time += bucketLength; } - TDouble2Vec4Vec weight{{1.0}}; - TDouble2Vec4VecVec weights{weight}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(1)}; time = 0; for (auto sample : samples) { model.addSamples(addSampleParams(weights), @@ -335,21 +320,19 @@ void CTimeSeriesModelTest::testMode() { if (trend.addPoint(time, sample)) { prior.setToNonInformative(0.0, DECAY_RATE); for (const auto& value : model.slidingWindow()) { - prior.addSamples(maths::CConstantWeights::COUNT, - {trend.detrend(value.first, value.second, 0.0)}, - {{learnRate}}); + prior.addSamples({trend.detrend(value.first, value.second, 0.0)}, + maths_t::CUnitWeights::SINGLE_UNIT); } } TDouble1Vec sample_{trend.detrend(time, sample, 0.0)}; - prior.addSamples(maths::CConstantWeights::COUNT, sample_, - maths::CConstantWeights::SINGLE_UNIT); + prior.addSamples(sample_, maths_t::CUnitWeights::SINGLE_UNIT); prior.propagateForwardsByTime(1.0); time += bucketLength; } double expectedMode{maths::CBasicStatistics::mean(trend.value(time)) + prior.marginalLikelihoodMode()}; - TDouble2Vec mode(model.mode(time, maths::CConstantWeights::COUNT, weight)); + TDouble2Vec mode(model.mode(time, maths_t::CUnitWeights::unit(1))); LOG_DEBUG(<< "expected mode = " << expectedMode); LOG_DEBUG(<< "mode = " << mode[0]); @@ -379,12 +362,12 @@ void CTimeSeriesModelTest::testMode() { trends[i]->addPoint(time, sample[i]); detrended[0][i] = trends[i]->detrend(time, sample[i], 0.0); } - prior.addSamples(maths::CConstantWeights::COUNT, detrended, - maths::CConstantWeights::singleUnit(3)); + prior.addSamples(detrended, + maths_t::CUnitWeights::singleUnit(3)); prior.propagateForwardsByTime(1.0); } - TDouble2Vec4VecVec weights{maths::CConstantWeights::unit(3)}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(3)}; time = 0; for (const auto& sample : samples) { model.addSamples(addSampleParams(weights), @@ -392,12 +375,11 @@ void CTimeSeriesModelTest::testMode() { time += bucketLength; } TDouble2Vec expectedMode(prior.marginalLikelihoodMode( - maths::CConstantWeights::COUNT, maths::CConstantWeights::unit(3))); + maths_t::CUnitWeights::unit(3))); for (std::size_t i = 0u; i < trends.size(); ++i) { expectedMode[i] += maths::CBasicStatistics::mean(trends[i]->value(time)); } - TDouble2Vec mode(model.mode(time, maths::CConstantWeights::COUNT, - maths::CConstantWeights::unit(3))); + TDouble2Vec mode(model.mode(time, maths_t::CUnitWeights::unit(3))); LOG_DEBUG(<< "expected mode = " << expectedMode); LOG_DEBUG(<< "mode = " << mode); @@ -435,7 +417,7 @@ void CTimeSeriesModelTest::testMode() { time += bucketLength; } - TDouble2Vec4VecVec weights{maths::CConstantWeights::unit(3)}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(3)}; time = 0; for (const auto& sample : samples) { model.addSamples(addSampleParams(weights), @@ -450,19 +432,18 @@ void CTimeSeriesModelTest::testMode() { if (reinitialize) { reinitializePrior(learnRate, model, trends, prior); } - prior.addSamples(maths::CConstantWeights::COUNT, detrended, - maths::CConstantWeights::singleUnit(3)); + prior.addSamples(detrended, + maths_t::CUnitWeights::singleUnit(3)); prior.propagateForwardsByTime(1.0); time += bucketLength; } TDouble2Vec expectedMode(prior.marginalLikelihoodMode( - maths::CConstantWeights::COUNT, maths::CConstantWeights::unit(3))); + maths_t::CUnitWeights::unit(3))); for (std::size_t i = 0u; i < trends.size(); ++i) { expectedMode[i] += maths::CBasicStatistics::mean(trends[i]->value(time)); } - TDouble2Vec mode(model.mode(time, maths::CConstantWeights::COUNT, - maths::CConstantWeights::unit(3))); + TDouble2Vec mode(model.mode(time, maths_t::CUnitWeights::unit(3))); LOG_DEBUG(<< "expected mode = " << expectedMode); LOG_DEBUG(<< "mode = " << mode); @@ -474,10 +455,6 @@ void CTimeSeriesModelTest::testMode() { } void CTimeSeriesModelTest::testAddBucketValue() { - LOG_DEBUG(<< "+--------------------------------------------+"); - LOG_DEBUG(<< "| CTimeSeriesModelTest::testAddBucketValue |"); - LOG_DEBUG(<< "+--------------------------------------------+"); - // Test that the prior support is correctly updated to account // for negative bucket values. @@ -490,29 +467,27 @@ void CTimeSeriesModelTest::testAddBucketValue() { core::make_triple(core_t::TTime{20}, TDouble2Vec{3.5}, TAG), core::make_triple(core_t::TTime{12}, TDouble2Vec{3.9}, TAG), core::make_triple(core_t::TTime{18}, TDouble2Vec{2.1}, TAG), - core::make_triple(core_t::TTime{12}, TDouble2Vec{1.2}, TAG), - }; - TDouble2Vec4VecVec weights{{{1.0}}, {{1.5}}, {{0.9}}, {{1.9}}}; + core::make_triple(core_t::TTime{12}, TDouble2Vec{1.2}, TAG)}; + TDoubleVec weights{1.0, 1.5, 0.9, 1.9}; + TDouble2VecWeightsAryVec modelWeights{ + maths_t::countWeight(TDouble2Vec{weights[0]}), + maths_t::countWeight(TDouble2Vec{weights[1]}), + maths_t::countWeight(TDouble2Vec{weights[2]}), + maths_t::countWeight(TDouble2Vec{weights[3]})}; for (std::size_t i = 0u; i < samples.size(); ++i) { - prior.addSamples(maths::CConstantWeights::COUNT, {samples[i].second[0]}, - {{weights[i][0][0]}}); + prior.addSamples({samples[i].second[0]}, {maths_t::countWeight(weights[i])}); } prior.propagateForwardsByTime(1.0); - prior.adjustOffset(maths::CConstantWeights::COUNT, {-1.0}, - maths::CConstantWeights::SINGLE_UNIT); + prior.adjustOffset({-1.0}, maths_t::CUnitWeights::SINGLE_UNIT); - model.addSamples(addSampleParams(weights), samples); + model.addSamples(addSampleParams(modelWeights), samples); model.addBucketValue({core::make_triple(core_t::TTime{20}, TDouble2Vec{-1.0}, TAG)}); CPPUNIT_ASSERT_EQUAL(prior.checksum(), model.residualModel().checksum()); } void CTimeSeriesModelTest::testAddSamples() { - LOG_DEBUG(<< "+----------------------------------------+"); - LOG_DEBUG(<< "| CTimeSeriesModelTest::testAddSamples |"); - LOG_DEBUG(<< "+----------------------------------------+"); - // Test: 1) Test multiple samples // 2) Test propagation interval // 3) Test decay rate control @@ -531,20 +506,24 @@ void CTimeSeriesModelTest::testAddSamples() { core::make_triple(core_t::TTime{20}, TDouble2Vec{3.5}, TAG), core::make_triple(core_t::TTime{12}, TDouble2Vec{3.9}, TAG), core::make_triple(core_t::TTime{18}, TDouble2Vec{2.1}, TAG)}; - TDouble2Vec4VecVec weights{{{1.0}}, {{1.5}}, {{0.9}}}; + TDoubleVec weights{1.0, 1.5, 0.9}; + TDouble2VecWeightsAryVec modelWeights{ + maths_t::countWeight(TDouble2Vec{weights[0]}), + maths_t::countWeight(TDouble2Vec{weights[1]}), + maths_t::countWeight(TDouble2Vec{weights[2]})}; - model.addSamples(addSampleParams(weights), samples); + model.addSamples(addSampleParams(modelWeights), samples); trend.addPoint(samples[1].first, samples[1].second[0], - maths::CConstantWeights::COUNT, weights[1][0]); + maths_t::countWeight(weights[1])); trend.addPoint(samples[2].first, samples[2].second[0], - maths::CConstantWeights::COUNT, weights[2][0]); + maths_t::countWeight(weights[2])); trend.addPoint(samples[0].first, samples[0].second[0], - maths::CConstantWeights::COUNT, weights[0][0]); - TDouble1Vec samples_{samples[2].second[0], samples[0].second[0], - samples[1].second[0]}; - TDouble4Vec1Vec weights_{weights[2][0], weights[0][0], weights[1][0]}; - prior.addSamples(maths::CConstantWeights::COUNT, samples_, weights_); + maths_t::countWeight(weights[0])); + prior.addSamples( + {samples[2].second[0], samples[0].second[0], samples[1].second[0]}, + {maths_t::countWeight(weights[2]), maths_t::countWeight(weights[0]), + maths_t::countWeight(weights[1])}); prior.propagateForwardsByTime(1.0); uint64_t checksum1{trend.checksum()}; @@ -571,25 +550,29 @@ void CTimeSeriesModelTest::testAddSamples() { core::make_triple(core_t::TTime{20}, TDouble2Vec{3.5, 3.4, 3.3}, TAG), core::make_triple(core_t::TTime{12}, TDouble2Vec{3.9, 3.8, 3.7}, TAG), core::make_triple(core_t::TTime{18}, TDouble2Vec{2.1, 2.0, 1.9}, TAG)}; - TDouble2Vec4VecVec weights{{{1.0, 1.1, 1.2}}, {{1.5, 1.6, 1.7}}, {{0.9, 1.0, 1.1}}}; + double weights[][3]{{1.0, 1.1, 1.2}, {1.5, 1.6, 1.7}, {0.9, 1.0, 1.1}}; + TDouble2VecWeightsAryVec modelWeights{ + maths_t::countWeight(TDouble2Vec(weights[0], weights[0] + 3)), + maths_t::countWeight(TDouble2Vec(weights[1], weights[1] + 3)), + maths_t::countWeight(TDouble2Vec(weights[2], weights[2] + 3))}; - model.addSamples(addSampleParams(weights), samples); + model.addSamples(addSampleParams(modelWeights), samples); for (std::size_t i = 0u; i < trends.size(); ++i) { trends[i]->addPoint(samples[1].first, samples[1].second[i], - maths::CConstantWeights::COUNT, - TDouble4Vec{weights[1][0][i]}); + maths_t::countWeight(weights[0][i])); trends[i]->addPoint(samples[2].first, samples[2].second[i], - maths::CConstantWeights::COUNT, - TDouble4Vec{weights[2][0][i]}); + maths_t::countWeight(weights[1][i])); trends[i]->addPoint(samples[0].first, samples[0].second[i], - maths::CConstantWeights::COUNT, - TDouble4Vec{weights[0][0][i]}); + maths_t::countWeight(weights[2][i])); } TDouble10Vec1Vec samples_{samples[2].second, samples[0].second, samples[1].second}; - TDouble10Vec4Vec1Vec weights_{{weights[2][0]}, {weights[0][0]}, {weights[1][0]}}; - prior.addSamples(maths::CConstantWeights::COUNT, samples_, weights_); + TDouble10VecWeightsAry1Vec weights_{ + maths_t::countWeight(TDouble10Vec(weights[2], weights[2] + 3)), + maths_t::countWeight(TDouble10Vec(weights[0], weights[0] + 3)), + maths_t::countWeight(TDouble10Vec(weights[1], weights[1] + 3))}; + prior.addSamples(samples_, weights_); prior.propagateForwardsByTime(1.0); for (std::size_t i = 0u; i < trends.size(); ++i) { @@ -604,10 +587,6 @@ void CTimeSeriesModelTest::testAddSamples() { CPPUNIT_ASSERT_EQUAL(checksum1, checksum2); } - maths_t::TWeightStyleVec weightStyles{maths_t::E_SampleWinsorisationWeight, - maths_t::E_SampleCountWeight, - maths_t::E_SampleCountVarianceScaleWeight}; - LOG_DEBUG(<< "Propagation interval univariate"); { maths::CTimeSeriesDecompositionStub trend; @@ -616,15 +595,21 @@ void CTimeSeriesModelTest::testAddSamples() { double interval[]{1.0, 1.1, 0.4}; TDouble2Vec samples[]{{10.0}, {13.9}, {27.1}}; - TDouble2Vec4VecVec weights{{{0.9}, {1.5}, {1.1}}}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(1)}; + maths_t::setCount(TDouble2Vec{1.5}, weights[0]); + maths_t::setWinsorisationWeight(TDouble2Vec{0.9}, weights[0]); + maths_t::setCountVarianceScale(TDouble2Vec{1.1}, weights[0]); core_t::TTime time{0}; for (std::size_t i = 0u; i < 3; ++i) { TTimeDouble2VecSizeTrVec sample{core::make_triple(time, samples[i], TAG)}; - model.addSamples(addSampleParams(interval[i], weightStyles, weights), sample); + model.addSamples(addSampleParams(interval[i], weights), sample); - TDouble4Vec weight{weights[0][0][0], weights[0][1][0], weights[0][2][0]}; - prior.addSamples(weightStyles, samples[i], {weight}); + TDoubleWeightsAry1Vec weight{maths_t::CUnitWeights::UNIT}; + for (std::size_t j = 0u; j < weights[0].size(); ++j) { + weight[0][j] = weights[0][j][0]; + } + prior.addSamples(TDouble1Vec(samples[i]), weight); prior.propagateForwardsByTime(interval[i]); uint64_t checksum1{prior.checksum()}; @@ -648,20 +633,21 @@ void CTimeSeriesModelTest::testAddSamples() { double interval[]{1.0, 1.1, 0.4}; TDouble2Vec samples[]{{13.5, 13.4, 13.3}, {13.9, 13.8, 13.7}, {20.1, 20.0, 10.9}}; - TDouble2Vec4VecVec weights{ - {{0.1, 0.1, 0.2}, {1.0, 1.1, 1.2}, {2.0, 2.1, 2.2}}, - {{0.5, 0.6, 0.7}, {2.0, 2.1, 2.2}, {1.0, 1.1, 1.2}}, - {{0.9, 1.0, 1.0}, {0.9, 1.0, 1.0}, {1.9, 2.0, 2.0}}}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(3)}; + maths_t::setCount(TDouble2Vec{1.0, 1.1, 1.2}, weights[0]); + maths_t::setWinsorisationWeight(TDouble2Vec{0.1, 0.1, 0.2}, weights[0]); + maths_t::setCountVarianceScale(TDouble2Vec{2.0, 2.1, 2.2}, weights[0]); core_t::TTime time{0}; for (std::size_t i = 0u; i < 3; ++i) { TTimeDouble2VecSizeTrVec sample{core::make_triple(time, samples[i], TAG)}; - model.addSamples(addSampleParams(interval[i], weightStyles, weights), sample); + model.addSamples(addSampleParams(interval[i], weights), sample); - TDouble10Vec4Vec weight{TDouble10Vec(weights[0][0]), - TDouble10Vec(weights[0][1]), - TDouble10Vec(weights[0][2])}; - prior.addSamples(weightStyles, {TDouble10Vec(samples[i])}, {weight}); + TDouble10VecWeightsAry1Vec weight{maths_t::CUnitWeights::unit(3)}; + for (std::size_t j = 0u; j < weights[0].size(); ++j) { + weight[0][j] = weights[0][j]; + } + prior.addSamples({TDouble10Vec(samples[i])}, weight); prior.propagateForwardsByTime(interval[i]); uint64_t checksum1{prior.checksum()}; @@ -675,7 +661,6 @@ void CTimeSeriesModelTest::testAddSamples() { LOG_DEBUG(<< "Decay rate control univariate"); { - double learnRate{modelParams(bucketLength).learnRate()}; maths::CTimeSeriesDecomposition trend{DECAY_RATE, bucketLength}; maths::CNormalMeanPrecConjugate prior{univariateNormal()}; auto controllers = decayRateControllers(1); @@ -685,8 +670,7 @@ void CTimeSeriesModelTest::testAddSamples() { TDoubleVec samples; rng.generateNormalSamples(1.0, 4.0, 2000, samples); - TDouble4Vec1Vec weight{{1.0}}; - TDouble2Vec4VecVec weights{maths::CConstantWeights::unit(1)}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(1)}; core_t::TTime time{0}; for (auto noise : samples) { @@ -703,16 +687,15 @@ void CTimeSeriesModelTest::testAddSamples() { trend.decayRate(trend.decayRate() / controllers[0].multiplier()); prior.setToNonInformative(0.0, prior.decayRate()); for (const auto& value : model.slidingWindow()) { - prior.addSamples(maths::CConstantWeights::COUNT, - {trend.detrend(value.first, value.second, 0.0)}, - {{learnRate}}); + prior.addSamples({trend.detrend(value.first, value.second, 0.0)}, + maths_t::CUnitWeights::SINGLE_UNIT); } prior.decayRate(prior.decayRate() / controllers[1].multiplier()); controllers[0].reset(); controllers[1].reset(); } double detrended{trend.detrend(time, sample, 0.0)}; - prior.addSamples(maths::CConstantWeights::COUNT, {detrended}, weight); + prior.addSamples({detrended}, maths_t::CUnitWeights::SINGLE_UNIT); prior.propagateForwardsByTime(1.0); if (trend.initialized()) { @@ -759,8 +742,7 @@ void CTimeSeriesModelTest::testAddSamples() { rng.generateMultivariateNormalSamples(mean, covariance, 1000, samples); } - TDouble10Vec4Vec1Vec weight{{{1.0, 1.0, 1.0}}}; - TDouble2Vec4VecVec weights{{{1.0, 1.0, 1.0}}}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(3)}; core_t::TTime time{0}; for (auto& sample : samples) { @@ -790,7 +772,8 @@ void CTimeSeriesModelTest::testAddSamples() { if (reinitialize) { reinitializePrior(learnRate, model, trends, prior, &controllers); } - prior.addSamples(maths::CConstantWeights::COUNT, detrended, weight); + prior.addSamples(detrended, + maths_t::CUnitWeights::singleUnit(3)); prior.propagateForwardsByTime(1.0); if (hasTrend) { @@ -827,10 +810,6 @@ void CTimeSeriesModelTest::testAddSamples() { } void CTimeSeriesModelTest::testPredict() { - LOG_DEBUG(<< "+-------------------------------------+"); - LOG_DEBUG(<< "| CTimeSeriesModelTest::testPredict |"); - LOG_DEBUG(<< "+-------------------------------------+"); - // Test prediction with a trend and with multimodal data. core_t::TTime bucketLength{600}; @@ -839,7 +818,6 @@ void CTimeSeriesModelTest::testPredict() { LOG_DEBUG(<< "Univariate seasonal"); { - double learnRate{modelParams(bucketLength).learnRate()}; maths::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; maths::CNormalMeanPrecConjugate prior{univariateNormal()}; auto controllers = decayRateControllers(1); @@ -848,7 +826,7 @@ void CTimeSeriesModelTest::testPredict() { TDoubleVec samples; rng.generateNormalSamples(0.0, 4.0, 1008, samples); - TDouble2Vec4VecVec weights{maths::CConstantWeights::unit(1)}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(1)}; core_t::TTime time{0}; for (auto sample : samples) { sample += 10.0 + 5.0 * std::sin(boost::math::double_constants::two_pi * @@ -860,14 +838,12 @@ void CTimeSeriesModelTest::testPredict() { if (trend.addPoint(time, sample)) { prior.setToNonInformative(0.0, DECAY_RATE); for (const auto& value : model.slidingWindow()) { - prior.addSamples(maths::CConstantWeights::COUNT, - {trend.detrend(value.first, value.second, 0.0)}, - {{learnRate}}); + prior.addSamples({trend.detrend(value.first, value.second, 0.0)}, + maths_t::CUnitWeights::SINGLE_UNIT); } } - prior.addSamples(maths::CConstantWeights::COUNT, - {trend.detrend(time, sample, 0.0)}, - maths::CConstantWeights::SINGLE_UNIT); + prior.addSamples({trend.detrend(time, sample, 0.0)}, + maths_t::CUnitWeights::SINGLE_UNIT); prior.propagateForwardsByTime(1.0); time += bucketLength; @@ -907,7 +883,7 @@ void CTimeSeriesModelTest::testPredict() { samples.insert(samples.end(), samples_.begin(), samples_.end()); rng.random_shuffle(samples.begin(), samples.end()); - TDouble2Vec4VecVec weights{maths::CConstantWeights::unit(1)}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(1)}; core_t::TTime time{0}; for (auto sample : samples) { model.addSamples(addSampleParams(weights), @@ -947,7 +923,7 @@ void CTimeSeriesModelTest::testPredict() { rng.generateMultivariateNormalSamples( mean, {{3.0, 2.9, 0.5}, {2.9, 2.6, 0.1}, {0.5, 0.1, 2.0}}, 1000, samples); - TDouble2Vec4VecVec weights{maths::CConstantWeights::unit(3)}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(3)}; core_t::TTime time{0}; for (auto& sample : samples) { for (auto& coordinate : sample) { @@ -963,8 +939,8 @@ void CTimeSeriesModelTest::testPredict() { if (reinitialize) { reinitializePrior(learnRate, model, trends, prior); } - prior.addSamples(maths::CConstantWeights::COUNT, {detrended}, - maths::CConstantWeights::singleUnit(3)); + prior.addSamples({detrended}, + maths_t::CUnitWeights::singleUnit(3)); prior.propagateForwardsByTime(1.0); model.addSamples(addSampleParams(weights), @@ -1027,7 +1003,7 @@ void CTimeSeriesModelTest::testPredict() { rng.random_shuffle(samples.begin(), samples.end()); } - TDouble2Vec4VecVec weights{maths::CConstantWeights::unit(3)}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(3)}; core_t::TTime time{0}; for (const auto& sample : samples) { model.addSamples(addSampleParams(weights), @@ -1054,10 +1030,6 @@ void CTimeSeriesModelTest::testPredict() { } void CTimeSeriesModelTest::testProbability() { - LOG_DEBUG(<< "+-----------------------------------------+"); - LOG_DEBUG(<< "| CTimeSeriesModelTest::testProbability |"); - LOG_DEBUG(<< "+-----------------------------------------+"); - // Test: 1) The different the calculation matches the expected values // given the trend and decomposition for different calculations, // seasonal confidence intervals, weights and so on. @@ -1086,16 +1058,18 @@ void CTimeSeriesModelTest::testProbability() { rng.generateNormalSamples(10.0, 4.0, 1000, samples); core_t::TTime time{0}; - const TDouble2Vec4VecVec weight{maths::CConstantWeights::unit(1)}; - for (auto sample : samples) { - double trend{5.0 + 5.0 * std::sin(boost::math::double_constants::two_pi * - static_cast(time) / 86400.0)}; - models[0].addSamples(addSampleParams(weight), - {core::make_triple(time, TDouble2Vec{sample}, TAG)}); - models[1].addSamples( - addSampleParams(weight), - {core::make_triple(time, TDouble2Vec{trend + sample}, TAG)}); - time += bucketLength; + { + const TDouble2VecWeightsAryVec weight{maths_t::CUnitWeights::unit(1)}; + for (auto sample : samples) { + double trend{5.0 + 5.0 * std::sin(boost::math::double_constants::two_pi * + static_cast(time) / 86400.0)}; + models[0].addSamples(addSampleParams(weight), + {core::make_triple(time, TDouble2Vec{sample}, TAG)}); + models[1].addSamples( + addSampleParams(weight), + {core::make_triple(time, TDouble2Vec{trend + sample}, TAG)}); + time += bucketLength; + } } TTime2Vec1Vec time_{{time}}; @@ -1105,11 +1079,10 @@ void CTimeSeriesModelTest::testProbability() { maths_t::E_OneSidedAbove}; double confidences[]{0.0, 20.0, 50.0}; bool empties[]{true, false}; - maths_t::TWeightStyleVec weightStyles[]{ - {maths_t::E_SampleCountVarianceScaleWeight}, - {maths_t::E_SampleCountVarianceScaleWeight, - maths_t::E_SampleSeasonalVarianceScaleWeight}}; - TDouble2Vec4Vec weights[]{{{0.9}}, {{1.1}, {1.8}}}; + TDouble2VecWeightsAryVec weights(2, maths_t::CUnitWeights::unit(1)); + maths_t::setCountVarianceScale(TDouble2Vec{0.9}, weights[0]); + maths_t::setCountVarianceScale(TDouble2Vec{1.1}, weights[1]); + maths_t::setSeasonalVarianceScale(TDouble2Vec{1.8}, weights[1]); for (auto calculation : calculations) { LOG_DEBUG(<< "calculation = " << calculation); @@ -1117,24 +1090,24 @@ void CTimeSeriesModelTest::testProbability() { LOG_DEBUG(<< " confidence = " << confidence); for (auto empty : empties) { LOG_DEBUG(<< " empty = " << empty); - for (std::size_t i = 0u; i < boost::size(weightStyles); ++i) { + for (const auto& weight : weights) { LOG_DEBUG(<< " weights = " - << core::CContainerPrinter::print(weights[i])); + << core::CContainerPrinter::print(weight)); double expectedProbability[2]; maths_t::ETail expectedTail[2]; { - TDouble4Vec weights_; - for (const auto& weight_ : weights[i]) { - weights_.push_back(weight_[0]); + maths_t::TDoubleWeightsAry weight_(maths_t::CUnitWeights::UNIT); + for (std::size_t i = 0u; i < weight.size(); ++i) { + weight_[i] = weight[i][0]; } double lb[2], ub[2]; models[0].residualModel().probabilityOfLessLikelySamples( - calculation, weightStyles[i], sample, - {weights_}, lb[0], ub[0], expectedTail[0]); + calculation, sample, {weight_}, lb[0], ub[0], + expectedTail[0]); models[1].residualModel().probabilityOfLessLikelySamples( - calculation, weightStyles[i], + calculation, {models[1].trendModel().detrend(time, sample[0], confidence)}, - {weights_}, lb[1], ub[1], expectedTail[1]); + {weight_}, lb[1], ub[1], expectedTail[1]); expectedProbability[0] = (lb[0] + ub[0]) / 2.0; expectedProbability[1] = (lb[1] + ub[1]) / 2.0; } @@ -1146,8 +1119,7 @@ void CTimeSeriesModelTest::testProbability() { params.addCalculation(calculation) .seasonalConfidenceInterval(confidence) .addBucketEmpty({empty}) - .weightStyles(weightStyles[i]) - .addWeights(weights[i]); + .addWeights(weight); bool conditional; TSize1Vec mostAnomalousCorrelate; models[0].probability(params, time_, {sample}, @@ -1185,22 +1157,21 @@ void CTimeSeriesModelTest::testProbability() { {{3.0, 2.9, 0.5}, {2.9, 2.6, 0.1}, {0.5, 0.1, 2.0}}, 1000, samples); core_t::TTime time{0}; - const TDouble2Vec4VecVec weight{maths::CConstantWeights::unit(3)}; - for (const auto& sample : samples) { - TDouble2Vec sample_(sample); - models[0].addSamples(addSampleParams(weight), - {core::make_triple(time, sample_, TAG)}); - - double trend{5.0 + 5.0 * std::sin(boost::math::double_constants::two_pi * - static_cast(time) / 86400.0)}; - for (auto& component : sample_) { - component += trend; + { + TDouble2VecWeightsAryVec weight{maths_t::CUnitWeights::unit(3)}; + for (auto& sample : samples) { + TDouble2Vec sample_(sample); + models[0].addSamples(addSampleParams(weight), + {core::make_triple(time, sample_, TAG)}); + double trend{5.0 + 5.0 * std::sin(boost::math::double_constants::two_pi * + static_cast(time) / 86400.0)}; + for (auto& component : sample_) { + component += trend; + } + models[1].addSamples(addSampleParams(weight), + {core::make_triple(time, sample_, TAG)}); + time += bucketLength; } - - models[1].addSamples(addSampleParams(weight), - {core::make_triple(time, sample_, TAG)}); - - time += bucketLength; } TTime2Vec1Vec time_{{time}}; @@ -1210,11 +1181,10 @@ void CTimeSeriesModelTest::testProbability() { maths_t::E_OneSidedAbove}; double confidences[]{0.0, 20.0, 50.0}; bool empties[]{true, false}; - maths_t::TWeightStyleVec weightStyles[]{ - {maths_t::E_SampleCountVarianceScaleWeight}, - {maths_t::E_SampleCountVarianceScaleWeight, - maths_t::E_SampleSeasonalVarianceScaleWeight}}; - TDouble2Vec4Vec weights[]{{{0.9, 0.9, 0.8}}, {{1.1, 1.0, 1.2}, {1.8, 1.7, 1.6}}}; + TDouble2VecWeightsAryVec weights(2, maths_t::CUnitWeights::unit(3)); + maths_t::setCountVarianceScale(TDouble2Vec{0.9, 0.9, 0.8}, weights[0]); + maths_t::setCountVarianceScale(TDouble2Vec{1.1, 1.0, 1.2}, weights[1]); + maths_t::setSeasonalVarianceScale(TDouble2Vec{1.8, 1.7, 1.6}, weights[1]); for (auto calculation : calculations) { LOG_DEBUG(<< "calculation = " << calculation); @@ -1222,28 +1192,29 @@ void CTimeSeriesModelTest::testProbability() { LOG_DEBUG(<< " confidence = " << confidence); for (auto empty : empties) { LOG_DEBUG(<< " empty = " << empty); - for (std::size_t i = 0u; i < boost::size(weightStyles); ++i) { + for (const auto& weight : weights) { LOG_DEBUG(<< " weights = " - << core::CContainerPrinter::print(weights[i])); + << core::CContainerPrinter::print(weight)); double expectedProbability[2]; TTail10Vec expectedTail[2]; { - TDouble10Vec4Vec weights_; - for (const auto& weight_ : weights[i]) { - weights_.push_back(weight_); + maths_t::TDouble10VecWeightsAry weight_( + maths_t::CUnitWeights::unit(3)); + for (std::size_t i = 0u; i < weight.size(); ++i) { + weight_[i] = weight[i]; } double lb[2], ub[2]; models[0].residualModel().probabilityOfLessLikelySamples( - calculation, weightStyles[i], {TDouble10Vec(sample)}, - {weights_}, lb[0], ub[0], expectedTail[0]); + calculation, {TDouble10Vec(sample)}, {weight_}, + lb[0], ub[0], expectedTail[0]); TDouble10Vec detrended; for (std::size_t j = 0u; j < sample.size(); ++j) { detrended.push_back(models[1].trendModel()[j]->detrend( time, sample[j], confidence)); } models[1].residualModel().probabilityOfLessLikelySamples( - calculation, weightStyles[i], {detrended}, - {weights_}, lb[1], ub[1], expectedTail[1]); + calculation, {detrended}, {weight_}, lb[1], + ub[1], expectedTail[1]); expectedProbability[0] = (lb[0] + ub[0]) / 2.0; expectedProbability[1] = (lb[1] + ub[1]) / 2.0; } @@ -1255,8 +1226,7 @@ void CTimeSeriesModelTest::testProbability() { params.addCalculation(calculation) .seasonalConfidenceInterval(confidence) .addBucketEmpty({empty}) - .weightStyles(weightStyles[i]) - .addWeights(weights[i]); + .addWeights(weight); bool conditional; TSize1Vec mostAnomalousCorrelate; models[0].probability(params, time_, {sample}, @@ -1293,8 +1263,7 @@ void CTimeSeriesModelTest::testProbability() { maths::CBasicStatistics::COrderStatisticsHeap smallest(10); - TDouble2Vec4Vec weight(maths::CConstantWeights::unit(1)); - TDouble2Vec4VecVec weights{weight}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(1)}; std::size_t bucket{0}; core_t::TTime time{0}; for (auto sample : samples) { @@ -1303,15 +1272,13 @@ void CTimeSeriesModelTest::testProbability() { } model.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec{sample}, TAG)}); - TTail2Vec tail; double probability; bool conditional; TSize1Vec mostAnomalousCorrelate; - model.probability(computeProbabilityParams(weight), {{time}}, {{sample}}, + model.probability(computeProbabilityParams(weights[0]), {{time}}, {{sample}}, probability, tail, conditional, mostAnomalousCorrelate); smallest.add({probability, bucket - 1}); - time += bucketLength; } @@ -1328,10 +1295,6 @@ void CTimeSeriesModelTest::testProbability() { } void CTimeSeriesModelTest::testWeights() { - LOG_DEBUG(<< "+-------------------------------------+"); - LOG_DEBUG(<< "| CTimeSeriesModelTest::testWeights |"); - LOG_DEBUG(<< "+-------------------------------------+"); - // Check that the seasonal weight matches the value we expect given // 1) the trend and residual model // 2) the variation in the input data @@ -1351,7 +1314,7 @@ void CTimeSeriesModelTest::testWeights() { TDoubleVec samples; rng.generateNormalSamples(0.0, 4.0, 1008, samples); - TDouble2Vec4VecVec weights{{{1.0}}}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(1)}; core_t::TTime time{0}; for (auto sample : samples) { double scale{10.0 + 5.0 * std::sin(boost::math::double_constants::two_pi * @@ -1408,7 +1371,7 @@ void CTimeSeriesModelTest::testWeights() { {10.0, 15.0, 11.0}, {{3.0, 2.9, 0.5}, {2.9, 2.6, 0.1}, {0.5, 0.1, 2.0}}, 1008, samples); - TDouble2Vec4VecVec weights{{{1.0, 1.0, 1.0}}}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(3)}; core_t::TTime time{0}; for (auto& sample : samples) { double scale{10.0 + 5.0 * std::sin(boost::math::double_constants::two_pi * @@ -1458,10 +1421,6 @@ void CTimeSeriesModelTest::testWeights() { } void CTimeSeriesModelTest::testMemoryUsage() { - LOG_DEBUG(<< "+-----------------------------------------+"); - LOG_DEBUG(<< "| CTimeSeriesModelTest::testMemoryUsage |"); - LOG_DEBUG(<< "+-----------------------------------------+"); - // Test we account for the appropriate memory. core_t::TTime bucketLength{600}; @@ -1477,7 +1436,7 @@ void CTimeSeriesModelTest::testMemoryUsage() { TDoubleVec samples; rng.generateNormalSamples(1.0, 4.0, 1000, samples); - TDouble2Vec4VecVec weights{{{1.0}}}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(1)}; core_t::TTime time{0}; for (auto sample : samples) { sample += 10.0 + 5.0 * std::sin(boost::math::double_constants::two_pi * @@ -1511,7 +1470,7 @@ void CTimeSeriesModelTest::testMemoryUsage() { std::unique_ptr model{new maths::CMultivariateTimeSeriesModel{ modelParams(bucketLength), trend, prior, &controllers}}; - TDouble2Vec4VecVec weights{maths::CConstantWeights::unit(3)}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(3)}; core_t::TTime time{0}; for (auto& sample : samples) { for (auto& coordinate : sample) { @@ -1538,10 +1497,6 @@ void CTimeSeriesModelTest::testMemoryUsage() { } void CTimeSeriesModelTest::testPersist() { - LOG_DEBUG(<< "+-------------------------------------+"); - LOG_DEBUG(<< "| CTimeSeriesModelTest::testPersist |"); - LOG_DEBUG(<< "+-------------------------------------+"); - // Test the restored model checksum matches the persisted model. core_t::TTime bucketLength{600}; @@ -1558,7 +1513,7 @@ void CTimeSeriesModelTest::testPersist() { TDoubleVec samples; rng.generateNormalSamples(1.0, 4.0, 1000, samples); - TDouble2Vec4VecVec weights{{{1.0}}}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(1)}; core_t::TTime time{0}; for (auto sample : samples) { origModel.addSamples(addSampleParams(weights), @@ -1604,7 +1559,7 @@ void CTimeSeriesModelTest::testPersist() { maths::CMultivariateTimeSeriesModel origModel{modelParams(bucketLength), trend, prior, &controllers}; - TDouble2Vec4VecVec weights{maths::CConstantWeights::unit(3)}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(3)}; core_t::TTime time{0}; for (const auto& sample : samples) { origModel.addSamples(addSampleParams(weights), @@ -1641,10 +1596,6 @@ void CTimeSeriesModelTest::testPersist() { } void CTimeSeriesModelTest::testUpgrade() { - LOG_DEBUG(<< "+-------------------------------------+"); - LOG_DEBUG(<< "| CTimeSeriesModelTest::testUpgrade |"); - LOG_DEBUG(<< "+-------------------------------------+"); - // Test upgrade is minimally disruptive. We test the upgraded model // predicted confidence intervals verses the values we obtain from // the previous model. Note the confidence interval depends on both @@ -1698,8 +1649,7 @@ void CTimeSeriesModelTest::testUpgrade() { core::CStringUtils::tokenise(",", expectedIntervals[i], expectedInterval, empty); std::string interval_{core::CContainerPrinter::print(restoredModel.confidenceInterval( - time, 90.0, maths::CConstantWeights::COUNT, - maths::CConstantWeights::unit(1)))}; + time, 90.0, maths_t::CUnitWeights::unit(1)))}; core::CStringUtils::replace("[", "", interval_); core::CStringUtils::replace("]", "", interval_); core::CStringUtils::replace(" ", "", interval_); @@ -1748,8 +1698,7 @@ void CTimeSeriesModelTest::testUpgrade() { core::CStringUtils::tokenise(",", expectedIntervals[i], expectedInterval, empty); std::string interval_{core::CContainerPrinter::print(restoredModel.confidenceInterval( - time, 90.0, maths::CConstantWeights::COUNT, - maths::CConstantWeights::unit(3)))}; + time, 90.0, maths_t::CUnitWeights::unit(3)))}; core::CStringUtils::replace("[", "", interval_); core::CStringUtils::replace("]", "", interval_); core::CStringUtils::replace(" ", "", interval_); @@ -1767,10 +1716,6 @@ void CTimeSeriesModelTest::testUpgrade() { } void CTimeSeriesModelTest::testAddSamplesWithCorrelations() { - LOG_DEBUG(<< "+--------------------------------------------------------+"); - LOG_DEBUG(<< "| CTimeSeriesModelTest::testAddSamplesWithCorrelations |"); - LOG_DEBUG(<< "+--------------------------------------------------------+"); - LOG_DEBUG(<< "Correlations no trend"); core_t::TTime bucketLength{600}; @@ -1792,7 +1737,7 @@ void CTimeSeriesModelTest::testAddSamplesWithCorrelations() { models[1].modelCorrelations(correlations); CTimeSeriesCorrelateModelAllocator allocator; - TDouble2Vec4VecVec weights{{{1.0}}}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(1)}; core_t::TTime time{0}; for (auto sample : samples) { correlations.refresh(allocator); @@ -1800,7 +1745,7 @@ void CTimeSeriesModelTest::testAddSamplesWithCorrelations() { {core::make_triple(time, TDouble2Vec{sample[0]}, TAG)}); models[1].addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec{sample[1]}, TAG)}); - correlations.processSamples(maths::CConstantWeights::COUNT); + correlations.processSamples(); time += bucketLength; } } @@ -1810,16 +1755,9 @@ void CTimeSeriesModelTest::testAddSamplesWithCorrelations() { } void CTimeSeriesModelTest::testProbabilityWithCorrelations() { - LOG_DEBUG(<< "+---------------------------------------------------------+"); - LOG_DEBUG(<< "| CTimeSeriesModelTest::testProbabilityWithCorrelations |"); - LOG_DEBUG(<< "+---------------------------------------------------------+"); } void CTimeSeriesModelTest::testAnomalyModel() { - LOG_DEBUG(<< "+------------------------------------------+"); - LOG_DEBUG(<< "| CTimeSeriesModelTest::testAnomalyModel |"); - LOG_DEBUG(<< "+------------------------------------------+"); - // We test we can find the "odd anomaly out". using TSizeVec = std::vector; @@ -1848,8 +1786,7 @@ void CTimeSeriesModelTest::testAnomalyModel() { //TDoubleVec scores; maths::CBasicStatistics::COrderStatisticsHeap mostAnomalous(10); - TDouble2Vec4Vec weight(maths::CConstantWeights::unit(1)); - TDouble2Vec4VecVec weights{weight}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(1)}; std::size_t bucket{0}; core_t::TTime time{0}; for (auto& sample : samples) { @@ -1861,16 +1798,14 @@ void CTimeSeriesModelTest::testAnomalyModel() { } model.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec{sample}, TAG)}); - TTail2Vec tail; double probability; bool conditional; TSize1Vec mostAnomalousCorrelate; - model.probability(computeProbabilityParams(weight), {{time}}, {{sample}}, + model.probability(computeProbabilityParams(weights[0]), {{time}}, {{sample}}, probability, tail, conditional, mostAnomalousCorrelate); mostAnomalous.add({std::log(probability), bucket}); //scores.push_back(maths::CTools::deviation(probability)); - time += bucketLength; } @@ -1921,8 +1856,7 @@ void CTimeSeriesModelTest::testAnomalyModel() { //TDoubleVec scores; maths::CBasicStatistics::COrderStatisticsHeap mostAnomalous(10); - TDouble2Vec4Vec weight(maths::CConstantWeights::unit(3)); - TDouble2Vec4VecVec weights{weight}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(3)}; core_t::TTime time{0}; std::size_t bucket{0}; for (auto& sample : samples) { @@ -1937,16 +1871,14 @@ void CTimeSeriesModelTest::testAnomalyModel() { ++bucket; model.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec(sample), TAG)}); - TTail2Vec tail; double probability; bool conditional; TSize1Vec mostAnomalousCorrelate; - model.probability(computeProbabilityParams(weight), {{time}}, {(sample)}, + model.probability(computeProbabilityParams(weights[0]), {{time}}, {(sample)}, probability, tail, conditional, mostAnomalousCorrelate); mostAnomalous.add({std::log(probability), bucket}); //scores.push_back(maths::CTools::deviation(probability)); - time += bucketLength; } @@ -1984,10 +1916,6 @@ void CTimeSeriesModelTest::testAnomalyModel() { } void CTimeSeriesModelTest::testStepChangeDiscontinuities() { - LOG_DEBUG("+-------------------------------------------------------+"); - LOG_DEBUG("| CTimeSeriesModelTest::testStepChangeDiscontinuities |"); - LOG_DEBUG("+-------------------------------------------------------+"); - // Check detection and modelling of step changes in data with // 1) Piecewise constant, // 2) Saw tooth. @@ -1995,11 +1923,11 @@ void CTimeSeriesModelTest::testStepChangeDiscontinuities() { using TDouble3Vec = core::CSmallVector; using TDouble3VecVec = std::vector; - TDouble2Vec4VecVec weight{{{1.0}}}; + TDouble2VecWeightsAryVec weight{maths_t::CUnitWeights::unit(1)}; auto updateModel = [&](core_t::TTime time, double value, maths::CUnivariateTimeSeriesModel& model) { - weight[0][0] = model.winsorisationWeight(0.0, time, {value}); - model.addSamples(addSampleParams(1.0, {maths_t::E_SampleWinsorisationWeight}, weight), + maths_t::setWinsorisationWeight(model.winsorisationWeight(0.0, time, {value}), weight[0]); + model.addSamples(addSampleParams(1.0, weight), {core::make_triple(time, TDouble2Vec{value}, TAG)}); }; @@ -2178,20 +2106,16 @@ void CTimeSeriesModelTest::testStepChangeDiscontinuities() { } void CTimeSeriesModelTest::testLinearScaling() { - LOG_DEBUG("+-------------------------------------------+"); - LOG_DEBUG("| CTimeSeriesModelTest::testLinearScaling |"); - LOG_DEBUG("+-------------------------------------------+"); - // We test that the predictions are good and the bounds do not // blow up after we: // 1) linearly scale down a periodic pattern, // 2) linearly scale up the same periodic pattern. - TDouble2Vec4VecVec weight{{{1.0}}}; + TDouble2VecWeightsAryVec weight{maths_t::CUnitWeights::unit(1)}; auto updateModel = [&](core_t::TTime time, double value, maths::CUnivariateTimeSeriesModel& model) { - weight[0][0] = model.winsorisationWeight(0.0, time, {value}); - model.addSamples(addSampleParams(1.0, {maths_t::E_SampleWinsorisationWeight}, weight), + maths_t::setWinsorisationWeight(model.winsorisationWeight(0.0, time, {value}), weight[0]); + model.addSamples(addSampleParams(1.0, weight), {core::make_triple(time, TDouble2Vec{value}, TAG)}); }; @@ -2243,8 +2167,7 @@ void CTimeSeriesModelTest::testLinearScaling() { sample = 0.3 * (12.0 + 10.0 * smoothDaily(time) + sample); updateModel(time, sample, model); //updateTestDebug(time, sample, model); - auto x = model.confidenceInterval(time, 90.0, - {maths_t::E_SampleCountWeight}, {{1.0}}); + auto x = model.confidenceInterval(time, 90.0, maths_t::CUnitWeights::unit(1)); CPPUNIT_ASSERT(::fabs(sample - x[1][0]) < 1.2 * std::sqrt(noiseVariance)); CPPUNIT_ASSERT(::fabs(x[2][0] - x[0][0]) < 3.3 * std::sqrt(noiseVariance)); time += bucketLength; @@ -2264,8 +2187,7 @@ void CTimeSeriesModelTest::testLinearScaling() { sample = 2.0 * (12.0 + 10.0 * smoothDaily(time)) + sample; updateModel(time, sample, model); //updateTestDebug(time, sample, model); - auto x = model.confidenceInterval(time, 90.0, - {maths_t::E_SampleCountWeight}, {{1.0}}); + auto x = model.confidenceInterval(time, 90.0, maths_t::CUnitWeights::unit(1)); CPPUNIT_ASSERT(::fabs(sample - x[1][0]) < 3.1 * std::sqrt(noiseVariance)); CPPUNIT_ASSERT(::fabs(x[2][0] - x[0][0]) < 3.3 * std::sqrt(noiseVariance)); time += bucketLength; @@ -2278,15 +2200,11 @@ void CTimeSeriesModelTest::testLinearScaling() { } void CTimeSeriesModelTest::testDaylightSaving() { - LOG_DEBUG("+--------------------------------------------+"); - LOG_DEBUG("| CTimeSeriesModelTest::testDaylightSaving |"); - LOG_DEBUG("+--------------------------------------------+"); - - TDouble2Vec4VecVec weight{{{1.0}}}; + TDouble2VecWeightsAryVec weight{maths_t::CUnitWeights::unit(1)}; auto updateModel = [&](core_t::TTime time, double value, maths::CUnivariateTimeSeriesModel& model) { - weight[0][0] = model.winsorisationWeight(0.0, time, {value}); - model.addSamples(addSampleParams(1.0, {maths_t::E_SampleWinsorisationWeight}, weight), + maths_t::setWinsorisationWeight(model.winsorisationWeight(0.0, time, {value}), weight[0]); + model.addSamples(addSampleParams(1.0, weight), {core::make_triple(time, TDouble2Vec{value}, TAG)}); }; @@ -2340,8 +2258,7 @@ void CTimeSeriesModelTest::testDaylightSaving() { updateModel(time, sample, model); //updateTestDebug(time, sample, model); CPPUNIT_ASSERT_EQUAL(hour, model.trendModel().timeShift()); - auto x = model.confidenceInterval(time, 90.0, - {maths_t::E_SampleCountWeight}, {{1.0}}); + auto x = model.confidenceInterval(time, 90.0, maths_t::CUnitWeights::unit(1)); CPPUNIT_ASSERT(::fabs(sample - x[1][0]) < 3.6 * std::sqrt(noiseVariance)); CPPUNIT_ASSERT(::fabs(x[2][0] - x[0][0]) < 3.6 * std::sqrt(noiseVariance)); time += bucketLength; @@ -2362,8 +2279,7 @@ void CTimeSeriesModelTest::testDaylightSaving() { updateModel(time, sample, model); //updateTestDebug(time, sample, model); CPPUNIT_ASSERT_EQUAL(core_t::TTime(0), model.trendModel().timeShift()); - auto x = model.confidenceInterval(time, 90.0, - {maths_t::E_SampleCountWeight}, {{1.0}}); + auto x = model.confidenceInterval(time, 90.0, maths_t::CUnitWeights::unit(1)); CPPUNIT_ASSERT(::fabs(sample - x[1][0]) < 4.1 * std::sqrt(noiseVariance)); CPPUNIT_ASSERT(::fabs(x[2][0] - x[0][0]) < 3.8 * std::sqrt(noiseVariance)); time += bucketLength; diff --git a/lib/maths/unittest/CToolsTest.cc b/lib/maths/unittest/CToolsTest.cc index b6f2dba0ea..6c3617b26a 100644 --- a/lib/maths/unittest/CToolsTest.cc +++ b/lib/maths/unittest/CToolsTest.cc @@ -339,10 +339,6 @@ class CLogPdf { } void CToolsTest::testProbabilityOfLessLikelySample() { - LOG_DEBUG(<< "+-------------------------------------------------+"); - LOG_DEBUG(<< "| CToolsTest::testProbabilityOfLessLikelySample |"); - LOG_DEBUG(<< "+-------------------------------------------------+"); - // The probability of a lower likelihood sample x from a single // mode distribution is: // F(a) + 1 - F(b) @@ -849,10 +845,6 @@ void CToolsTest::testProbabilityOfLessLikelySample() { } void CToolsTest::testIntervalExpectation() { - LOG_DEBUG(<< "+---------------------------------------+"); - LOG_DEBUG(<< "| CToolsTest::testIntervalExpectation |"); - LOG_DEBUG(<< "+---------------------------------------+"); - // We check the expectations agree with numerical integration // and also some corner cases. Specifically, that we handle // +/- infinity correctly and the also the case that a and b @@ -940,10 +932,6 @@ void CToolsTest::testIntervalExpectation() { } void CToolsTest::testMixtureProbabilityOfLessLikelySample() { - LOG_DEBUG(<< "+--------------------------------------------------------+"); - LOG_DEBUG(<< "| CToolsTest::testMixtureProbabilityOfLessLikelySample |"); - LOG_DEBUG(<< "+--------------------------------------------------------+"); - using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; test::CRandomNumbers rng; @@ -1048,11 +1036,7 @@ void CToolsTest::testMixtureProbabilityOfLessLikelySample() { } void CToolsTest::testAnomalyScore() { - LOG_DEBUG("+--------------------------------+"); - LOG_DEBUG("| CToolsTest::testAnomalyScore |"); - LOG_DEBUG("+--------------------------------+"); - - // Test p = inverseDeviation(deviation(p)) + // Test p = inverseAnomalyScore(anomalyScore(p)) double p = 0.04; for (std::size_t i = 0u; i < 305; ++i, p *= 0.1) { @@ -1063,10 +1047,6 @@ void CToolsTest::testAnomalyScore() { } void CToolsTest::testSpread() { - LOG_DEBUG(<< "+--------------------------+"); - LOG_DEBUG(<< "| CToolsTest::testSpread |"); - LOG_DEBUG(<< "+--------------------------+"); - double period = 86400.0; { double raw[] = {15.0, 120.0, 4500.0, 9000.0, 25700.0, @@ -1122,10 +1102,6 @@ void CToolsTest::testSpread() { } void CToolsTest::testFastLog() { - LOG_DEBUG(<< "+---------------------------+"); - LOG_DEBUG(<< "| CToolsTest::testFastLog |"); - LOG_DEBUG(<< "+---------------------------+"); - test::CRandomNumbers rng; // Small @@ -1168,10 +1144,6 @@ void CToolsTest::testFastLog() { } void CToolsTest::testMiscellaneous() { - LOG_DEBUG(<< "+---------------------------------+"); - LOG_DEBUG(<< "| CToolsTest::testMiscellaneous |"); - LOG_DEBUG(<< "+---------------------------------+"); - double x_[] = {0.0, 3.2, 2.1, -1.8, 4.5}; maths::CVectorNx1 x(x_, x_ + 5); diff --git a/lib/maths/unittest/CTrendComponentTest.cc b/lib/maths/unittest/CTrendComponentTest.cc index 81511c3f8b..2a591e2a36 100644 --- a/lib/maths/unittest/CTrendComponentTest.cc +++ b/lib/maths/unittest/CTrendComponentTest.cc @@ -170,10 +170,6 @@ TDoubleVec switching(test::CRandomNumbers& rng, } void CTrendComponentTest::testValueAndVariance() { - LOG_DEBUG(<< "+---------------------------------------------+"); - LOG_DEBUG(<< "| CTrendComponentTest::testValueAndVariance |"); - LOG_DEBUG(<< "+---------------------------------------------+"); - // Check that the prediction bias is small in the long run // and that the predicted variance approximately matches the // variance observed in prediction errors. @@ -215,10 +211,6 @@ void CTrendComponentTest::testValueAndVariance() { } void CTrendComponentTest::testDecayRate() { - LOG_DEBUG(<< "+--------------------------------------+"); - LOG_DEBUG(<< "| CTrendComponentTest::testDecayRate |"); - LOG_DEBUG(<< "+--------------------------------------+"); - // Test that the trend short range predictions approximately // match a regression model with the same decay rate. @@ -274,10 +266,6 @@ void CTrendComponentTest::testDecayRate() { } void CTrendComponentTest::testForecast() { - LOG_DEBUG(<< "+-------------------------------------+"); - LOG_DEBUG(<< "| CTrendComponentTest::testForecast |"); - LOG_DEBUG(<< "+-------------------------------------+"); - // Check the forecast errors for a variety of signals. test::CRandomNumbers rng; @@ -382,10 +370,6 @@ void CTrendComponentTest::testForecast() { } void CTrendComponentTest::testPersist() { - LOG_DEBUG(<< "+------------------------------------+"); - LOG_DEBUG(<< "| CTrendComponentTest::testPersist |"); - LOG_DEBUG(<< "+------------------------------------+"); - // Check that serialization is idempotent. test::CRandomNumbers rng; diff --git a/lib/maths/unittest/CTrendTestsTest.cc b/lib/maths/unittest/CTrendTestsTest.cc index 54d8b0f530..ffc7e1b775 100644 --- a/lib/maths/unittest/CTrendTestsTest.cc +++ b/lib/maths/unittest/CTrendTestsTest.cc @@ -42,10 +42,6 @@ const core_t::TTime WEEK = core::constants::WEEK; } void CTrendTestsTest::testRandomizedPeriodicity() { - LOG_DEBUG(<< "+----------------------------------------------+"); - LOG_DEBUG(<< "| CTrendTestsTest::testRandomizedPeriodicity |"); - LOG_DEBUG(<< "+----------------------------------------------+"); - using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; using TMeanVarAccumulator = maths::CBasicStatistics::SSampleMeanVar::TAccumulator; using TMaxAccumulator = @@ -134,10 +130,6 @@ void CTrendTestsTest::testRandomizedPeriodicity() { } void CTrendTestsTest::testCalendarCyclic() { - LOG_DEBUG(<< "+---------------------------------------+"); - LOG_DEBUG(<< "| CTrendTestsTest::testCalendarCyclic |"); - LOG_DEBUG(<< "+---------------------------------------+"); - using TOptionalFeature = maths::CCalendarCyclicTest::TOptionalFeature; core::CTimezone::instance().timezoneName("GMT"); @@ -289,10 +281,6 @@ void CTrendTestsTest::testCalendarCyclic() { } void CTrendTestsTest::testPersist() { - LOG_DEBUG(<< "+--------------------------------+"); - LOG_DEBUG(<< "| CTrendTestsTest::testPersist |"); - LOG_DEBUG(<< "+--------------------------------+"); - // Check that persistence is idempotent. LOG_DEBUG(<< "Test CRandomizedPeriodicityTest"); diff --git a/lib/maths/unittest/CXMeansOnline1dTest.cc b/lib/maths/unittest/CXMeansOnline1dTest.cc index 7f3ced0e37..8c7b39911f 100644 --- a/lib/maths/unittest/CXMeansOnline1dTest.cc +++ b/lib/maths/unittest/CXMeansOnline1dTest.cc @@ -53,10 +53,6 @@ void debug(const TClusterVec& clusters) { } void CXMeansOnline1dTest::testCluster() { - LOG_DEBUG(<< "+------------------------------------+"); - LOG_DEBUG(<< "| CXMeansOnline1dTest::testCluster |"); - LOG_DEBUG(<< "+------------------------------------+"); - maths::CXMeansOnline1d clusterer(maths_t::E_ContinuousData, maths::CAvailableModeDistributions::ALL, maths_t::E_ClustersFractionWeight, 0.1); @@ -181,10 +177,6 @@ void CXMeansOnline1dTest::testCluster() { } void CXMeansOnline1dTest::testMixtureOfGaussians() { - LOG_DEBUG(<< "+-----------------------------------------------+"); - LOG_DEBUG(<< "| CXMeansOnline1dTest::testMixtureOfGaussians |"); - LOG_DEBUG(<< "+-----------------------------------------------+"); - test::CRandomNumbers rng; // Test 1: @@ -406,10 +398,6 @@ void CXMeansOnline1dTest::testMixtureOfGaussians() { } void CXMeansOnline1dTest::testMixtureOfUniforms() { - LOG_DEBUG(<< "+----------------------------------------------+"); - LOG_DEBUG(<< "| CXMeansOnline1dTest::testMixtureOfUniforms |"); - LOG_DEBUG(<< "+----------------------------------------------+"); - test::CRandomNumbers rng; // * Cluster 1 = U([12, 15]), 100 points @@ -480,10 +468,6 @@ void CXMeansOnline1dTest::testMixtureOfUniforms() { } void CXMeansOnline1dTest::testMixtureOfLogNormals() { - LOG_DEBUG(<< "+------------------------------------------------+"); - LOG_DEBUG(<< "| CXMeansOnline1dTest::testMixtureOfLogNormals |"); - LOG_DEBUG(<< "+------------------------------------------------+"); - test::CRandomNumbers rng; // * Cluster 1 = LogNormal(3, 0.01), 100 points @@ -576,10 +560,6 @@ void CXMeansOnline1dTest::testMixtureOfLogNormals() { } void CXMeansOnline1dTest::testOutliers() { - LOG_DEBUG(<< "+-------------------------------------+"); - LOG_DEBUG(<< "| CXMeansOnline1dTest::testOutliers |"); - LOG_DEBUG(<< "+-------------------------------------+"); - test::CRandomNumbers rng; TDoubleVec mode1; @@ -664,10 +644,6 @@ void CXMeansOnline1dTest::testOutliers() { } void CXMeansOnline1dTest::testManyClusters() { - LOG_DEBUG(<< "+-----------------------------------------+"); - LOG_DEBUG(<< "| CXMeansOnline1dTest::testManyClusters |"); - LOG_DEBUG(<< "+-----------------------------------------+"); - using TTimeDoublePr = std::pair; using TTimeDoublePrVec = std::vector; @@ -704,10 +680,6 @@ void CXMeansOnline1dTest::testManyClusters() { } void CXMeansOnline1dTest::testLowVariation() { - LOG_DEBUG(<< "+-----------------------------------------+"); - LOG_DEBUG(<< "| CXMeansOnline1dTest::testLowVariation |"); - LOG_DEBUG(<< "+-----------------------------------------+"); - maths::CXMeansOnline1d clusterer(maths_t::E_ContinuousData, maths::CAvailableModeDistributions::ALL, maths_t::E_ClustersFractionWeight); @@ -723,10 +695,6 @@ void CXMeansOnline1dTest::testLowVariation() { } void CXMeansOnline1dTest::testAdaption() { - LOG_DEBUG(<< "+-------------------------------------+"); - LOG_DEBUG(<< "| CXMeansOnline1dTest::testAdaption |"); - LOG_DEBUG(<< "+-------------------------------------+"); - // Test a case where the cluster pattern changes over time. // Specifically, the data set starts with one cluster then // a new cluster appears and subsequently disappears. @@ -735,10 +703,6 @@ void CXMeansOnline1dTest::testAdaption() { } void CXMeansOnline1dTest::testLargeHistory() { - LOG_DEBUG(<< "+-----------------------------------------+"); - LOG_DEBUG(<< "| CXMeansOnline1dTest::testLargeHistory |"); - LOG_DEBUG(<< "+-----------------------------------------+"); - // If we get a lot of history, because we detect that the system // is stable and reduce the decay rate then we should also reduce // the fraction of points required to create a cluster. @@ -779,10 +743,6 @@ void CXMeansOnline1dTest::testLargeHistory() { } void CXMeansOnline1dTest::testPersist() { - LOG_DEBUG(<< "+------------------------------------+"); - LOG_DEBUG(<< "| CXMeansOnline1dTest::testPersist |"); - LOG_DEBUG(<< "+------------------------------------+"); - test::CRandomNumbers rng; TDoubleVec mode1; @@ -840,10 +800,6 @@ void CXMeansOnline1dTest::testPersist() { } void CXMeansOnline1dTest::testPruneEmptyCluster() { - LOG_DEBUG(<< "+----------------------------------------------+"); - LOG_DEBUG(<< "| CXMeansOnline1dTest::testPruneEmptyCluster |"); - LOG_DEBUG(<< "+----------------------------------------------+"); - maths::CXMeansOnline1d clusterer(maths_t::E_ContinuousData, maths::CAvailableModeDistributions::ALL, maths_t::E_ClustersFractionWeight); diff --git a/lib/maths/unittest/CXMeansOnlineTest.cc b/lib/maths/unittest/CXMeansOnlineTest.cc index a71c498699..9f88fd241d 100644 --- a/lib/maths/unittest/CXMeansOnlineTest.cc +++ b/lib/maths/unittest/CXMeansOnlineTest.cc @@ -73,10 +73,6 @@ bool restore(const maths::SDistributionRestoreParams& params, } void CXMeansOnlineTest::testCluster() { - LOG_DEBUG(<< "+----------------------------------+"); - LOG_DEBUG(<< "| CXMeansOnlineTest::testCluster |"); - LOG_DEBUG(<< "+----------------------------------+"); - // Test the core functionality of cluster. TXMeans2 clusterer(maths_t::E_ContinuousData, maths_t::E_ClustersFractionWeight, 0.1); @@ -219,10 +215,6 @@ void CXMeansOnlineTest::testCluster() { } void CXMeansOnlineTest::testClusteringVanilla() { - LOG_DEBUG(<< "+--------------------------------------------+"); - LOG_DEBUG(<< "| CXMeansOnlineTest::testClusteringVanilla |"); - LOG_DEBUG(<< "+--------------------------------------------+"); - // This tests that the chance of splitting data with a single // cluster is low and that we accurately find a small number // of significant clusters. @@ -330,10 +322,6 @@ void CXMeansOnlineTest::testClusteringVanilla() { } void CXMeansOnlineTest::testClusteringWithOutliers() { - LOG_DEBUG(<< "+-------------------------------------------------+"); - LOG_DEBUG(<< "| CXMeansOnlineTest::testClusteringWithOutliers |"); - LOG_DEBUG(<< "+-------------------------------------------------+"); - // Test that we are still able to find significant clusters // in the presence of a small number of significant outliers. @@ -432,10 +420,6 @@ void CXMeansOnlineTest::testClusteringWithOutliers() { } void CXMeansOnlineTest::testManyClusters() { - LOG_DEBUG(<< "+---------------------------------------+"); - LOG_DEBUG(<< "| CXMeansOnlineTest::testManyClusters |"); - LOG_DEBUG(<< "+---------------------------------------+"); - using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; maths::CSampling::seed(); @@ -518,10 +502,6 @@ void CXMeansOnlineTest::testManyClusters() { } void CXMeansOnlineTest::testAdaption() { - LOG_DEBUG(<< "+-----------------------------------+"); - LOG_DEBUG(<< "| CXMeansOnlineTest::testAdaption |"); - LOG_DEBUG(<< "+-----------------------------------+"); - // Test a case where the cluster pattern changes over time. // Specifically, the data set starts with one cluster then // a new cluster appears and subsequently disappears. @@ -626,10 +606,6 @@ void CXMeansOnlineTest::testAdaption() { } void CXMeansOnlineTest::testLargeHistory() { - LOG_DEBUG(<< "+---------------------------------------+"); - LOG_DEBUG(<< "| CXMeansOnlineTest::testLargeHistory |"); - LOG_DEBUG(<< "+---------------------------------------+"); - // If we get a lot of history, because we detect that the system // is stable and reduce the decay rate then we should also reduce // the fraction of points required to create a cluster. @@ -670,10 +646,6 @@ void CXMeansOnlineTest::testLargeHistory() { } void CXMeansOnlineTest::testLatLongData() { - LOG_DEBUG(<< "+--------------------------------------+"); - LOG_DEBUG(<< "| CXMeansOnlineTest::testLatLongData |"); - LOG_DEBUG(<< "+--------------------------------------+"); - using TTimeDoubleVecPr = std::pair; using TTimeDoubleVecPrVec = std::vector; using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; @@ -740,10 +712,6 @@ void CXMeansOnlineTest::testLatLongData() { } void CXMeansOnlineTest::testPersist() { - LOG_DEBUG(<< "+----------------------------------+"); - LOG_DEBUG(<< "| CXMeansOnlineTest::testPersist |"); - LOG_DEBUG(<< "+----------------------------------+"); - // Check that persistence is idempotent. test::CRandomNumbers rng; diff --git a/lib/maths/unittest/CXMeansTest.cc b/lib/maths/unittest/CXMeansTest.cc index 56c418eb6d..f5c61e9935 100644 --- a/lib/maths/unittest/CXMeansTest.cc +++ b/lib/maths/unittest/CXMeansTest.cc @@ -121,10 +121,6 @@ void computePurities(const TSizeVecVec& clusters, TDoubleVec& purities) { } void CXMeansTest::testCluster() { - LOG_DEBUG(<< "+----------------------------+"); - LOG_DEBUG(<< "| CXMeansTest::testCluster |"); - LOG_DEBUG(<< "+----------------------------+"); - // Test basic accessors and checksum functionality of cluster. maths::CSampling::seed(); @@ -186,10 +182,6 @@ void CXMeansTest::testCluster() { } void CXMeansTest::testImproveStructure() { - LOG_DEBUG(<< "+-------------------------------------+"); - LOG_DEBUG(<< "| CXMeansTest::testImproveStructure |"); - LOG_DEBUG(<< "+-------------------------------------+"); - // Test improve structure finds an obvious split of the data. maths::CSampling::seed(); @@ -257,10 +249,6 @@ void CXMeansTest::testImproveStructure() { } void CXMeansTest::testImproveParams() { - LOG_DEBUG(<< "+----------------------------------+"); - LOG_DEBUG(<< "| CXMeansTest::testImproveParams |"); - LOG_DEBUG(<< "+----------------------------------+"); - // Test that improve params is equivalent to a round of k-means // on current state cluster centres. @@ -317,10 +305,6 @@ void CXMeansTest::testImproveParams() { } void CXMeansTest::testOneCluster() { - LOG_DEBUG(<< "+-------------------------------+"); - LOG_DEBUG(<< "| CXMeansTest::testOneCluster |"); - LOG_DEBUG(<< "+-------------------------------+"); - // Test it typically chooses just one cluster and that when we // do choose to split it is because a spherical Gaussian is a // bad approximation. @@ -374,10 +358,6 @@ void CXMeansTest::testOneCluster() { } void CXMeansTest::testFiveClusters() { - LOG_DEBUG(<< "+---------------------------------+"); - LOG_DEBUG(<< "| CXMeansTest::testFiveClusters |"); - LOG_DEBUG(<< "+---------------------------------+"); - // Test x-means clustering quality on data with five clusters. maths::CSampling::seed(); @@ -502,10 +482,6 @@ void CXMeansTest::testFiveClusters() { } void CXMeansTest::testTwentyClusters() { - LOG_DEBUG(<< "+-----------------------------------+"); - LOG_DEBUG(<< "| CXMeansTest::testTwentyClusters |"); - LOG_DEBUG(<< "+-----------------------------------+"); - // Test x-means clustering quality on data with twenty clusters. maths::CSampling::seed(); @@ -606,10 +582,6 @@ void CXMeansTest::testTwentyClusters() { } void CXMeansTest::testPoorlyConditioned() { - LOG_DEBUG(<< "+--------------------------------------+"); - LOG_DEBUG(<< "| CXMeansTest::testPoorlyConditioned |"); - LOG_DEBUG(<< "+--------------------------------------+"); - // Test we can handle poorly conditioned covariance matrices. maths::CSampling::seed(); diff --git a/lib/maths/unittest/TestUtils.cc b/lib/maths/unittest/TestUtils.cc index 907469cc50..987936987f 100644 --- a/lib/maths/unittest/TestUtils.cc +++ b/lib/maths/unittest/TestUtils.cc @@ -37,8 +37,8 @@ class CCdf : public std::unary_function { double lowerBound, upperBound; m_X[0] = x; - if (!m_Prior->minusLogJointCdf(CConstantWeights::COUNT_VARIANCE, m_X, - CConstantWeights::SINGLE_UNIT, lowerBound, upperBound)) { + if (!m_Prior->minusLogJointCdf(m_X, maths_t::CUnitWeights::SINGLE_UNIT, + lowerBound, upperBound)) { // We have no choice but to throw because this is // invoked inside a boost root finding function. @@ -92,71 +92,51 @@ CPriorTestInterface::CPriorTestInterface(CPrior& prior) : m_Prior(&prior) { } void CPriorTestInterface::addSamples(const TDouble1Vec& samples) { - TDouble4Vec1Vec weights(samples.size(), TWeights::UNIT); - m_Prior->addSamples(TWeights::COUNT, samples, weights); + maths_t::TDoubleWeightsAry1Vec weights(samples.size(), TWeights::UNIT); + m_Prior->addSamples(samples, weights); } maths_t::EFloatingPointErrorStatus CPriorTestInterface::jointLogMarginalLikelihood(const TDouble1Vec& samples, double& result) const { - TDouble4Vec1Vec weights(samples.size(), TWeights::UNIT); - return m_Prior->jointLogMarginalLikelihood(TWeights::COUNT, samples, weights, result); + maths_t::TDoubleWeightsAry1Vec weights(samples.size(), TWeights::UNIT); + return m_Prior->jointLogMarginalLikelihood(samples, weights, result); } bool CPriorTestInterface::minusLogJointCdf(const TDouble1Vec& samples, double& lowerBound, double& upperBound) const { - TDouble4Vec1Vec weights(samples.size(), TWeights::UNIT); - return m_Prior->minusLogJointCdf(TWeights::COUNT, samples, weights, lowerBound, upperBound); + maths_t::TDoubleWeightsAry1Vec weights(samples.size(), TWeights::UNIT); + return m_Prior->minusLogJointCdf(samples, weights, lowerBound, upperBound); } bool CPriorTestInterface::minusLogJointCdfComplement(const TDouble1Vec& samples, double& lowerBound, double& upperBound) const { - TDouble4Vec1Vec weights(samples.size(), TWeights::UNIT); - return m_Prior->minusLogJointCdfComplement(TWeights::COUNT, samples, - weights, lowerBound, upperBound); + maths_t::TDoubleWeightsAry1Vec weights(samples.size(), TWeights::UNIT); + return m_Prior->minusLogJointCdfComplement(samples, weights, lowerBound, upperBound); } bool CPriorTestInterface::probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, const TDouble1Vec& samples, double& lowerBound, double& upperBound) const { - TDouble4Vec1Vec weights(samples.size(), TWeights::UNIT); + maths_t::TDoubleWeightsAry1Vec weights(samples.size(), TWeights::UNIT); maths_t::ETail tail; - return m_Prior->probabilityOfLessLikelySamples( - calculation, TWeights::COUNT, samples, weights, lowerBound, upperBound, tail); + return m_Prior->probabilityOfLessLikelySamples(calculation, samples, weights, + lowerBound, upperBound, tail); } bool CPriorTestInterface::anomalyScore(maths_t::EProbabilityCalculation calculation, const TDouble1Vec& samples, double& result) const { - TDoubleDoublePr1Vec weightedSamples; - weightedSamples.reserve(samples.size()); - for (std::size_t i = 0u; i < samples.size(); ++i) { - weightedSamples.push_back(std::make_pair(samples[i], 1.0)); - } - return this->anomalyScore(calculation, maths_t::E_SampleCountWeight, - weightedSamples, result); -} -bool CPriorTestInterface::anomalyScore(maths_t::EProbabilityCalculation calculation, - maths_t::ESampleWeightStyle weightStyle, - const TDoubleDoublePr1Vec& samples, - double& result) const { result = 0.0; - TWeightStyleVec weightStyles(1, weightStyle); - TDouble1Vec samples_(samples.size()); - TDouble4Vec1Vec weights(samples.size(), TWeights::UNIT); - for (std::size_t i = 0u; i < samples.size(); ++i) { - samples_[i] = samples[i].first; - weights[i][0] = samples[i].second; - } - double lowerBound, upperBound; maths_t::ETail tail; - if (!m_Prior->probabilityOfLessLikelySamples(calculation, weightStyles, samples_, weights, + if (!m_Prior->probabilityOfLessLikelySamples(calculation, samples, + maths_t::CUnitWeights::SINGLE_UNIT, lowerBound, upperBound, tail)) { LOG_ERROR(<< "Failed computing probability of less likely samples"); return false; @@ -170,6 +150,7 @@ bool CPriorTestInterface::anomalyScore(maths_t::EProbabilityCalculation calculat bool CPriorTestInterface::marginalLikelihoodQuantileForTest(double percentage, double eps, double& result) const { + result = 0.0; percentage /= 100.0; @@ -206,6 +187,7 @@ bool CPriorTestInterface::marginalLikelihoodQuantileForTest(double percentage, } bool CPriorTestInterface::marginalLikelihoodMeanForTest(double& result) const { + using TMarginalLikelihood = CCompositeFunctions::CExp; using TFunctionTimesMarginalLikelihood = @@ -249,6 +231,7 @@ bool CPriorTestInterface::marginalLikelihoodMeanForTest(double& result) const { } bool CPriorTestInterface::marginalLikelihoodVarianceForTest(double& result) const { + using TMarginalLikelihood = CCompositeFunctions::CExp; using TResidualTimesMarginalLikelihood = diff --git a/lib/maths/unittest/TestUtils.h b/lib/maths/unittest/TestUtils.h index c95f3d6910..e4d96de16a 100644 --- a/lib/maths/unittest/TestUtils.h +++ b/lib/maths/unittest/TestUtils.h @@ -21,13 +21,9 @@ namespace ml { namespace handy_typedefs { using TDouble1Vec = core::CSmallVector; -using TDouble4Vec = core::CSmallVector; using TDouble10Vec = core::CSmallVector; -using TDouble4Vec1Vec = core::CSmallVector; using TDouble10Vec1Vec = core::CSmallVector; -using TDouble10Vec4Vec = core::CSmallVector; using TDouble10Vec10Vec = core::CSmallVector; -using TDouble10Vec4Vec1Vec = core::CSmallVector; using TVector2 = maths::CVectorNx1; using TVector2Vec = std::vector; using TVector2VecVec = std::vector; @@ -47,8 +43,7 @@ class CPriorTestInterface { public: using TDoubleDoublePr = std::pair; using TDoubleDoublePr1Vec = core::CSmallVector; - using TWeightStyleVec = maths_t::TWeightStyleVec; - using TWeights = maths::CConstantWeights; + using TWeights = maths_t::CUnitWeights; public: explicit CPriorTestInterface(maths::CPrior& prior); @@ -209,14 +204,13 @@ class CUnitKernel { bool operator()(const maths::CVectorNx1& x, double& result) const { m_X[0].assign(x.begin(), x.end()); - m_Prior->jointLogMarginalLikelihood(maths::CConstantWeights::COUNT, m_X, - SINGLE_UNIT, result); + m_Prior->jointLogMarginalLikelihood(m_X, SINGLE_UNIT, result); result = std::exp(result); return true; } private: - static handy_typedefs::TDouble10Vec4Vec1Vec SINGLE_UNIT; + static ml::maths_t::TDouble10VecWeightsAry1Vec SINGLE_UNIT; private: const maths::CMultivariatePrior* m_Prior; @@ -224,9 +218,8 @@ class CUnitKernel { }; template -handy_typedefs::TDouble10Vec4Vec1Vec CUnitKernel::SINGLE_UNIT( - 1, - handy_typedefs::TDouble10Vec4Vec(1, handy_typedefs::TDouble10Vec(N, 1.0))); +ml::maths_t::TDouble10VecWeightsAry1Vec CUnitKernel::SINGLE_UNIT{ + ml::maths_t::CUnitWeights::unit(N)}; //! \brief The kernel for computing the mean of a multivariate prior. template @@ -239,15 +232,14 @@ class CMeanKernel { maths::CVectorNx1& result) const { m_X[0].assign(x.begin(), x.end()); double likelihood; - m_Prior->jointLogMarginalLikelihood(maths::CConstantWeights::COUNT, m_X, - SINGLE_UNIT, likelihood); + m_Prior->jointLogMarginalLikelihood(m_X, SINGLE_UNIT, likelihood); likelihood = std::exp(likelihood); result = x * likelihood; return true; } private: - static handy_typedefs::TDouble10Vec4Vec1Vec SINGLE_UNIT; + static ml::maths_t::TDouble10VecWeightsAry1Vec SINGLE_UNIT; private: const maths::CMultivariatePrior* m_Prior; @@ -255,9 +247,8 @@ class CMeanKernel { }; template -handy_typedefs::TDouble10Vec4Vec1Vec CMeanKernel::SINGLE_UNIT( - 1, - handy_typedefs::TDouble10Vec4Vec(1, handy_typedefs::TDouble10Vec(N, 1.0))); +ml::maths_t::TDouble10VecWeightsAry1Vec CMeanKernel::SINGLE_UNIT{ + ml::maths_t::CUnitWeights::unit(N)}; //! \brief The kernel for computing the variance of a multivariate prior. template @@ -271,15 +262,14 @@ class CCovarianceKernel { maths::CSymmetricMatrixNxN& result) const { m_X[0].assign(x.begin(), x.end()); double likelihood; - m_Prior->jointLogMarginalLikelihood(maths::CConstantWeights::COUNT, m_X, - SINGLE_UNIT, likelihood); + m_Prior->jointLogMarginalLikelihood(m_X, SINGLE_UNIT, likelihood); likelihood = std::exp(likelihood); result = (x - m_Mean).outer() * likelihood; return true; } private: - static handy_typedefs::TDouble10Vec4Vec1Vec SINGLE_UNIT; + static ml::maths_t::TDouble10VecWeightsAry1Vec SINGLE_UNIT; private: const maths::CMultivariatePrior* m_Prior; @@ -288,9 +278,8 @@ class CCovarianceKernel { }; template -handy_typedefs::TDouble10Vec4Vec1Vec CCovarianceKernel::SINGLE_UNIT( - 1, - handy_typedefs::TDouble10Vec4Vec(1, handy_typedefs::TDouble10Vec(N, 1.0))); +ml::maths_t::TDouble10VecWeightsAry1Vec CCovarianceKernel::SINGLE_UNIT{ + ml::maths_t::CUnitWeights::unit(N)}; //! A constant function. double constant(core_t::TTime time); diff --git a/lib/model/CEventRateModel.cc b/lib/model/CEventRateModel.cc index 8e42026142..3815ea4233 100644 --- a/lib/model/CEventRateModel.cc +++ b/lib/model/CEventRateModel.cc @@ -50,18 +50,11 @@ namespace { using TDouble2Vec = core::CSmallVector; using TDouble2Vec1Vec = core::CSmallVector; -using TDouble2Vec4Vec = core::CSmallVector; -using TBool2Vec = core::CSmallVector; using TTime2Vec = core::CSmallVector; // We use short field names to reduce the state size const std::string INDIVIDUAL_STATE_TAG("a"); const std::string PROBABILITY_PRIOR_TAG("b"); - -const maths_t::TWeightStyleVec SAMPLE_WEIGHT_STYLES{ - maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight}; -const maths_t::TWeightStyleVec - PROBABILITY_WEIGHT_STYLES(1, maths_t::E_SampleSeasonalVarianceScaleWeight); } CEventRateModel::CEventRateModel(const SModelParams& params, @@ -247,7 +240,7 @@ void CEventRateModel::sample(core_t::TTime startTime, // Declared outside the loop to minimize the number of times they are created. maths::CModel::TTimeDouble2VecSizeTrVec values; - maths::CModelAddSamplesParams::TDouble2Vec4VecVec weights(1); + maths::CModelAddSamplesParams::TDouble2VecWeightsAryVec weights; for (auto& featureData : m_CurrentBucketStats.s_FeatureData) { model_t::EFeature feature = featureData.first; @@ -260,10 +253,8 @@ void CEventRateModel::sample(core_t::TTime startTime, for (const auto& data_ : data) { if (data_.second.s_Count > 0) { LOG_TRACE(<< "person = " << this->personName(data_.first)); - m_ProbabilityPrior.addSamples( - maths::CConstantWeights::COUNT, - TDouble1Vec{static_cast(data_.first)}, - maths::CConstantWeights::SINGLE_UNIT); + m_ProbabilityPrior.addSamples({static_cast(data_.first)}, + maths_t::CUnitWeights::SINGLE_UNIT); } } if (!data.empty()) { @@ -304,6 +295,7 @@ void CEventRateModel::sample(core_t::TTime startTime, double interval = (1.0 + (this->params().s_InitialDecayRateMultiplier - 1.0) * derate) * emptyBucketWeight; + double ceff = emptyBucketWeight * this->learnRate(feature); LOG_TRACE(<< "Bucket = " << this->printCurrentBucket() << ", feature = " << model_t::print(feature) << ", count = " @@ -314,17 +306,17 @@ void CEventRateModel::sample(core_t::TTime startTime, model->params().probabilityBucketEmpty( this->probabilityBucketEmpty(feature, pid)); - TDouble2Vec value(1, count); + TDouble2Vec value{count}; values.assign(1, core::make_triple(sampleTime, value, model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID)); - weights[0].resize(2, TDouble2Vec(1)); - weights[0][0].assign(dimension, emptyBucketWeight * this->learnRate(feature)); - weights[0][1] = model->winsorisationWeight(derate, sampleTime, value); + weights.resize(1, maths_t::CUnitWeights::unit(dimension)); + maths_t::setCount(TDouble2Vec(dimension, ceff), weights[0]); + maths_t::setWinsorisationWeight( + model->winsorisationWeight(derate, sampleTime, value), weights[0]); maths::CModelAddSamplesParams params; params.integer(true) .nonNegative(true) .propagationInterval(interval) - .weightStyles(SAMPLE_WEIGHT_STYLES) .trendWeights(weights) .priorWeights(weights); @@ -334,7 +326,7 @@ void CEventRateModel::sample(core_t::TTime startTime, } } - this->sampleCorrelateModels(SAMPLE_WEIGHT_STYLES); + this->sampleCorrelateModels(); m_Probabilities = TCategoryProbabilityCache(m_ProbabilityPrior); } } @@ -590,8 +582,8 @@ void CEventRateModel::fill(model_t::EFeature feature, core_t::TTime time{model_t::sampleTime(feature, bucketTime, this->bucketLength())}; TOptionalUInt64 count{this->currentBucketCount(pid, bucketTime)}; double value{model_t::offsetCountToZero(feature, static_cast(data->s_Count))}; - TDouble2Vec4Vec weight{ - model->seasonalWeight(maths::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL, time)}; + maths_t::TDouble2VecWeightsAry weight(maths_t::seasonalVarianceScaleWeight( + model->seasonalWeight(maths::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL, time))); params.s_Feature = feature; params.s_Model = model; @@ -599,7 +591,7 @@ void CEventRateModel::fill(model_t::EFeature feature, params.s_Time.assign(1, TTime2Vec{time}); params.s_Value.assign(1, TDouble2Vec{value}); if (interim && model_t::requiresInterimResultAdjustment(feature)) { - double mode{params.s_Model->mode(time, PROBABILITY_WEIGHT_STYLES, weight)[0]}; + double mode{params.s_Model->mode(time, weight)[0]}; TDouble2Vec correction{this->interimValueCorrector().corrections( time, this->currentBucketTotalCount(), mode, value)}; params.s_Value[0] += correction; @@ -608,9 +600,8 @@ void CEventRateModel::fill(model_t::EFeature feature, } params.s_Count = 1.0; params.s_ComputeProbabilityParams - .addCalculation(model_t::probabilityCalculation(feature)) - .weightStyles(PROBABILITY_WEIGHT_STYLES) - .addBucketEmpty(TBool2Vec(1, !count || *count == 0)) + .addCalculation(model_t::probabilityCalculation(feature)) // new line + .addBucketEmpty({!count || *count == 0}) .addWeights(weight); } @@ -637,9 +628,7 @@ void CEventRateModel::fill(model_t::EFeature feature, params.s_Variables.resize(correlates.size()); params.s_CorrelatedLabels.resize(correlates.size()); params.s_Correlated.resize(correlates.size()); - params.s_ComputeProbabilityParams - .addCalculation(model_t::probabilityCalculation(feature)) - .weightStyles(PROBABILITY_WEIGHT_STYLES); + params.s_ComputeProbabilityParams.addCalculation(model_t::probabilityCalculation(feature)); // These are indexed as follows: // influenceValues["influencer name"]["correlate"]["influence value"] @@ -661,18 +650,17 @@ void CEventRateModel::fill(model_t::EFeature feature, params.s_Variables[i] = variables; const maths::CModel* models[]{ model, this->model(feature, correlates[i][variables[1]])}; - TDouble2Vec4Vec weight(1, TDouble2Vec(2)); - weight[0][variables[0]] = models[0]->seasonalWeight( + maths_t::TDouble2Vec scale(2); + scale[variables[0]] = models[0]->seasonalWeight( maths::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL, time)[0]; - weight[0][variables[1]] = models[1]->seasonalWeight( + scale[variables[1]] = models[1]->seasonalWeight( maths::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL, time)[0]; TOptionalUInt64 count[2]; count[0] = this->currentBucketCount(correlates[i][0], bucketTime); count[1] = this->currentBucketCount(correlates[i][1], bucketTime); params.s_ComputeProbabilityParams - .addBucketEmpty(TBool2Vec{!count[0] || *count[0] == 0, - !count[1] || *count[1] == 0}) - .addWeights(weight); + .addBucketEmpty({!count[0] || *count[0] == 0, !count[1] || *count[1] == 0}) // new line + .addWeights(maths_t::seasonalVarianceScaleWeight(scale)); const TFeatureData* data[2]; data[0] = this->featureData(feature, correlates[i][0], bucketTime); @@ -710,7 +698,7 @@ void CEventRateModel::fill(model_t::EFeature feature, } if (interim && model_t::requiresInterimResultAdjustment(feature)) { TDouble2Vec1Vec modes = params.s_Model->correlateModes( - time, PROBABILITY_WEIGHT_STYLES, params.s_ComputeProbabilityParams.weights()); + time, params.s_ComputeProbabilityParams.weights()); for (std::size_t i = 0u; i < modes.size(); ++i) { TDouble2Vec& value_ = params.s_Values[i]; if (!value_.empty()) { diff --git a/lib/model/CEventRatePopulationModel.cc b/lib/model/CEventRatePopulationModel.cc index ce2a0e17a2..f98d3e50c5 100644 --- a/lib/model/CEventRatePopulationModel.cc +++ b/lib/model/CEventRatePopulationModel.cc @@ -42,9 +42,6 @@ namespace { using TDouble2Vec = core::CSmallVector; using TDouble2Vec1Vec = core::CSmallVector; -using TDouble2Vec4Vec = core::CSmallVector; -using TDouble2Vec4VecVec = std::vector; -using TBool2Vec = core::CSmallVector; using TTime2Vec = core::CSmallVector; using TSizeSizePrFeatureDataPrVec = CEventRatePopulationModel::TSizeSizePrFeatureDataPrVec; using TFeatureSizeSizePrFeatureDataPrVecPr = @@ -56,7 +53,7 @@ using TSizeFuzzyDeduplicateUMap = //! \brief The values and weights for an attribute. struct SValuesAndWeights { maths::CModel::TTimeDouble2VecSizeTrVec s_Values; - maths::CModelAddSamplesParams::TDouble2Vec4VecVec s_Weights; + maths::CModelAddSamplesParams::TDouble2VecWeightsAryVec s_Weights; }; using TSizeValuesAndWeightsUMap = boost::unordered_map; @@ -68,11 +65,6 @@ const std::string FEATURE_MODELS_TAG("d"); const std::string FEATURE_CORRELATE_MODELS_TAG("e"); const std::string MEMORY_ESTIMATOR_TAG("f"); const std::string EMPTY_STRING(""); - -const maths_t::TWeightStyleVec SAMPLE_WEIGHT_STYLES{ - maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight}; -const maths_t::TWeightStyleVec - PROBABILITY_WEIGHT_STYLES(1, maths_t::E_SampleSeasonalVarianceScaleWeight); } CEventRatePopulationModel::CEventRatePopulationModel( @@ -374,6 +366,7 @@ void CEventRatePopulationModel::sample(core_t::TTime startTime, for (auto& featureData_ : featureData) { model_t::EFeature feature = featureData_.first; + std::size_t dimension = model_t::dimension(feature); TSizeSizePrFeatureDataPrVec& data = m_CurrentBucketStats.s_FeatureData[feature]; data.swap(featureData_.second); LOG_TRACE(<< model_t::print(feature) << ": " @@ -453,13 +446,18 @@ void CEventRatePopulationModel::sample(core_t::TTime startTime, : attribute.s_Values.size(); if (duplicate < attribute.s_Values.size()) { - attribute.s_Weights[duplicate][0][0] += - this->sampleRateWeight(pid, cid) * this->learnRate(feature); + double weight{this->sampleRateWeight(pid, cid) * this->learnRate(feature)}; + maths_t::addCount(TDouble2Vec{weight}, attribute.s_Weights[duplicate]); } else { attribute.s_Values.emplace_back(sampleTime, TDouble2Vec{value}, pid); - attribute.s_Weights.emplace_back(TDouble2Vec4Vec{ - {this->sampleRateWeight(pid, cid) * this->learnRate(feature)}, - model->winsorisationWeight(1.0, sampleTime, {value})}); + attribute.s_Weights.push_back( + maths_t::CUnitWeights::unit(1)); + auto& weight = attribute.s_Weights.back(); + maths_t::setCount(TDouble2Vec{this->sampleRateWeight(pid, cid) * + this->learnRate(feature)}, + weight); + maths_t::setWinsorisationWeight( + model->winsorisationWeight(1.0, sampleTime, {value}), weight); } } @@ -469,7 +467,6 @@ void CEventRatePopulationModel::sample(core_t::TTime startTime, params.integer(true) .nonNegative(true) .propagationInterval(this->propagationTime(cid, sampleTime)) - .weightStyles(SAMPLE_WEIGHT_STYLES) .trendWeights(attribute.second.s_Weights) .priorWeights(attribute.second.s_Weights); maths::CModel* model{this->model(feature, cid)}; @@ -481,7 +478,7 @@ void CEventRatePopulationModel::sample(core_t::TTime startTime, } for (const auto& feature : m_FeatureCorrelatesModels) { - feature.s_Models->processSamples(SAMPLE_WEIGHT_STYLES); + feature.s_Models->processSamples(); } m_AttributeProbabilities = TCategoryProbabilityCache(m_AttributeProbabilityPrior); @@ -594,13 +591,11 @@ bool CEventRatePopulationModel::computeProbability(std::size_t pid, const TSizeSizePrFeatureDataPrVec& data = this->featureData(feature, startTime); TSizeSizePr range = personRange(data, pid); for (std::size_t j = range.first; j < range.second; ++j) { - TDouble1Vec category( - 1, static_cast(CDataGatherer::extractAttributeId(data[j]))); - TDouble4Vec1Vec weights( - 1, TDouble4Vec(1, static_cast( - CDataGatherer::extractData(data[j]).s_Count))); - personAttributeProbabilityPrior.addSamples( - maths::CConstantWeights::COUNT, category, weights); + TDouble1Vec category{ + static_cast(CDataGatherer::extractAttributeId(data[j]))}; + maths_t::TDoubleWeightsAry1Vec weights{maths_t::countWeight( + static_cast(CDataGatherer::extractData(data[j]).s_Count))}; + personAttributeProbabilityPrior.addSamples(category, weights); } continue; } @@ -1011,8 +1006,8 @@ void CEventRatePopulationModel::fill(model_t::EFeature feature, auto data = find(this->featureData(feature, bucketTime), pid, cid); const maths::CModel* model{this->model(feature, cid)}; core_t::TTime time{model_t::sampleTime(feature, bucketTime, this->bucketLength())}; - TDouble2Vec4Vec weight{ - model->seasonalWeight(maths::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL, time)}; + maths_t::TDouble2VecWeightsAry weight(maths_t::seasonalVarianceScaleWeight( + model->seasonalWeight(maths::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL, time))); double value{model_t::offsetCountToZero( feature, static_cast(CDataGatherer::extractData(*data).s_Count))}; @@ -1022,7 +1017,7 @@ void CEventRatePopulationModel::fill(model_t::EFeature feature, params.s_Time.assign(1, TTime2Vec{time}); params.s_Value.assign(1, TDouble2Vec{value}); if (interim && model_t::requiresInterimResultAdjustment(feature)) { - double mode{params.s_Model->mode(time, PROBABILITY_WEIGHT_STYLES, weight)[0]}; + double mode{params.s_Model->mode(time, weight)[0]}; TDouble2Vec correction{this->interimValueCorrector().corrections( time, this->currentBucketTotalCount(), mode, value)}; params.s_Value[0] += correction; @@ -1030,10 +1025,10 @@ void CEventRatePopulationModel::fill(model_t::EFeature feature, CCorrectionKey(feature, pid, cid), correction); } params.s_Count = 1.0; - params.s_ComputeProbabilityParams.tag(pid) + params.s_ComputeProbabilityParams + .tag(pid) // new line .addCalculation(model_t::probabilityCalculation(feature)) - .weightStyles(PROBABILITY_WEIGHT_STYLES) - .addBucketEmpty(TBool2Vec(1, false)) + .addBucketEmpty({false}) .addWeights(weight); } diff --git a/lib/model/CIndividualModel.cc b/lib/model/CIndividualModel.cc index b46e5a03c7..d21b0b8284 100644 --- a/lib/model/CIndividualModel.cc +++ b/lib/model/CIndividualModel.cc @@ -554,9 +554,9 @@ maths::CModel* CIndividualModel::model(model_t::EFeature feature, std::size_t pi : nullptr; } -void CIndividualModel::sampleCorrelateModels(const maths_t::TWeightStyleVec& weightStyles) { +void CIndividualModel::sampleCorrelateModels() { for (const auto& feature : m_FeatureCorrelatesModels) { - feature.s_Models->processSamples(weightStyles); + feature.s_Models->processSamples(); } } diff --git a/lib/model/CMetricModel.cc b/lib/model/CMetricModel.cc index abb9a7690d..e1e986c541 100644 --- a/lib/model/CMetricModel.cc +++ b/lib/model/CMetricModel.cc @@ -50,20 +50,10 @@ namespace { using TTime1Vec = core::CSmallVector; using TDouble1Vec = core::CSmallVector; using TDouble2Vec = core::CSmallVector; -using TDouble4Vec = core::CSmallVector; using TDouble2Vec1Vec = core::CSmallVector; -using TDouble2Vec4Vec = core::CSmallVector; -using TDouble4Vec1Vec = core::CSmallVector; -using TBool2Vec = core::CSmallVector; // We use short field names to reduce the state size const std::string INDIVIDUAL_STATE_TAG("a"); - -const maths_t::TWeightStyleVec SAMPLE_WEIGHT_STYLES{ - maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight, - maths_t::E_SampleCountVarianceScaleWeight}; -const maths_t::TWeightStyleVec PROBABILITY_WEIGHT_STYLES{ - maths_t::E_SampleSeasonalVarianceScaleWeight, maths_t::E_SampleCountVarianceScaleWeight}; } CMetricModel::CMetricModel(const SModelParams& params, @@ -224,8 +214,8 @@ void CMetricModel::sample(core_t::TTime startTime, // Declared outside the loop to minimize the number of times they are created. maths::CModel::TTimeDouble2VecSizeTrVec values; - maths::CModelAddSamplesParams::TDouble2Vec4VecVec trendWeights; - maths::CModelAddSamplesParams::TDouble2Vec4VecVec priorWeights; + maths::CModelAddSamplesParams::TDouble2VecWeightsAryVec trendWeights; + maths::CModelAddSamplesParams::TDouble2VecWeightsAryVec priorWeights; for (auto& featureData : m_CurrentBucketStats.s_FeatureData) { model_t::EFeature feature = featureData.first; @@ -265,15 +255,16 @@ void CMetricModel::sample(core_t::TTime startTime, continue; } + std::size_t n = samples.size(); double derate = this->derate(pid, sampleTime); double interval = (1.0 + (this->params().s_InitialDecayRateMultiplier - 1.0) * derate) * emptyBucketWeight; - double count = this->params().s_MaximumUpdatesPerBucket > 0.0 && - samples.size() > 0 + double count = this->params().s_MaximumUpdatesPerBucket > 0.0 && n > 0 ? this->params().s_MaximumUpdatesPerBucket / - static_cast(samples.size()) + static_cast(n) : 1.0; + double ceff = emptyBucketWeight * count * this->learnRate(feature); LOG_TRACE(<< "Bucket = " << gatherer.printCurrentBucket(time) << ", feature = " << model_t::print(feature) @@ -286,31 +277,32 @@ void CMetricModel::sample(core_t::TTime startTime, model->params().probabilityBucketEmpty( this->probabilityBucketEmpty(feature, pid)); - values.resize(samples.size()); - trendWeights.resize(samples.size(), TDouble2Vec4Vec(3)); - priorWeights.resize(samples.size(), TDouble2Vec4Vec(3)); - for (std::size_t i = 0u; i < samples.size(); ++i) { + values.resize(n); + trendWeights.resize(n, maths_t::CUnitWeights::unit(dimension)); + priorWeights.resize(n, maths_t::CUnitWeights::unit(dimension)); + for (std::size_t i = 0u; i < n; ++i) { core_t::TTime ti = samples[i].time(); TDouble2Vec vi(samples[i].value(dimension)); double vs = samples[i].varianceScale(); values[i] = core::make_triple( model_t::sampleTime(feature, time, bucketLength, ti), vi, model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID); - trendWeights[i][0].assign(dimension, emptyBucketWeight * count * - this->learnRate(feature) / vs); - trendWeights[i][1] = model->winsorisationWeight(derate, ti, vi); - trendWeights[i][2].assign(dimension, vs); - priorWeights[i][0].assign(dimension, emptyBucketWeight * count * - this->learnRate(feature)); - priorWeights[i][1] = trendWeights[i][1]; - priorWeights[i][2].assign(dimension, vs); + maths_t::setCount(TDouble2Vec(dimension, ceff / vs), trendWeights[i]); + maths_t::setWinsorisationWeight( + model->winsorisationWeight(derate, ti, vi), trendWeights[i]); + maths_t::setCountVarianceScale(TDouble2Vec(dimension, vs), + trendWeights[i]); + maths_t::setCount(TDouble2Vec(dimension, ceff), priorWeights[i]); + maths_t::setWinsorisationWeight( + maths_t::winsorisationWeight(trendWeights[i]), priorWeights[i]); + maths_t::setCountVarianceScale(TDouble2Vec(dimension, vs), + priorWeights[i]); } maths::CModelAddSamplesParams params; params.integer(data_.second.s_IsInteger) .nonNegative(data_.second.s_IsNonNegative) .propagationInterval(interval) - .weightStyles(SAMPLE_WEIGHT_STYLES) .trendWeights(trendWeights) .priorWeights(priorWeights); @@ -320,7 +312,7 @@ void CMetricModel::sample(core_t::TTime startTime, } } - this->sampleCorrelateModels(SAMPLE_WEIGHT_STYLES); + this->sampleCorrelateModels(); } } @@ -539,15 +531,17 @@ void CMetricModel::fill(model_t::EFeature feature, core_t::TTime bucketTime, bool interim, CProbabilityAndInfluenceCalculator::SParams& params) const { + std::size_t dimension{model_t::dimension(feature)}; const TFeatureData* data{this->featureData(feature, pid, bucketTime)}; const TOptionalSample& bucket{data->s_BucketValue}; const maths::CModel* model{this->model(feature, pid)}; core_t::TTime time{model_t::sampleTime(feature, bucketTime, this->bucketLength(), bucket->time())}; - TDouble2Vec4Vec weights(2); - weights[0] = model->seasonalWeight(maths::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL, time); - weights[1].assign(dimension, bucket->varianceScale()); + maths_t::TDouble2VecWeightsAry weights(maths_t::CUnitWeights::unit(dimension)); + maths_t::setSeasonalVarianceScale( + model->seasonalWeight(maths::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL, time), weights); + maths_t::setCountVarianceScale(TDouble2Vec(dimension, bucket->varianceScale()), weights); TOptionalUInt64 count{this->currentBucketCount(pid, bucketTime)}; params.s_Feature = feature; @@ -556,7 +550,7 @@ void CMetricModel::fill(model_t::EFeature feature, params.s_Time.assign(1, TTime2Vec{time}); params.s_Value.assign(1, bucket->value()); if (interim && model_t::requiresInterimResultAdjustment(feature)) { - TDouble2Vec mode(params.s_Model->mode(time, PROBABILITY_WEIGHT_STYLES, weights)); + TDouble2Vec mode(params.s_Model->mode(time, weights)); TDouble2Vec correction(this->interimValueCorrector().corrections( time, this->currentBucketTotalCount(), mode, bucket->value(dimension))); params.s_Value[0] += correction; @@ -565,9 +559,8 @@ void CMetricModel::fill(model_t::EFeature feature, } params.s_Count = bucket->count(); params.s_ComputeProbabilityParams - .addCalculation(model_t::probabilityCalculation(feature)) - .weightStyles(PROBABILITY_WEIGHT_STYLES) - .addBucketEmpty(TBool2Vec(1, !count || *count == 0)) + .addCalculation(model_t::probabilityCalculation(feature)) // new line + .addBucketEmpty({!count || *count == 0}) .addWeights(weights); } @@ -577,6 +570,7 @@ void CMetricModel::fill(model_t::EFeature feature, bool interim, CProbabilityAndInfluenceCalculator::SCorrelateParams& params, TStrCRefDouble1VecDouble1VecPrPrVecVecVec& influenceValues) const { + using TStrCRefDouble1VecDoublePrPr = std::pair; const CDataGatherer& gatherer{this->dataGatherer()}; @@ -594,9 +588,7 @@ void CMetricModel::fill(model_t::EFeature feature, params.s_Variables.resize(correlates.size()); params.s_CorrelatedLabels.resize(correlates.size()); params.s_Correlated.resize(correlates.size()); - params.s_ComputeProbabilityParams - .addCalculation(model_t::probabilityCalculation(feature)) - .weightStyles(PROBABILITY_WEIGHT_STYLES); + params.s_ComputeProbabilityParams.addCalculation(model_t::probabilityCalculation(feature)); // These are indexed as follows: // influenceValues["influencer name"]["correlate"]["influence value"] @@ -617,11 +609,13 @@ void CMetricModel::fill(model_t::EFeature feature, params.s_Variables[i] = variables; const maths::CModel* models[]{ model, this->model(feature, correlates[i][variables[1]])}; - TDouble2Vec4Vec weight(2, TDouble2Vec(2, 1.0)); - weight[0][variables[0]] = models[0]->seasonalWeight( + maths_t::TDouble2VecWeightsAry weight(maths_t::CUnitWeights::unit(2)); + TDouble2Vec scale(2); + scale[variables[0]] = models[0]->seasonalWeight( maths::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL, bucketTime)[0]; - weight[0][variables[1]] = models[1]->seasonalWeight( + scale[variables[1]] = models[1]->seasonalWeight( maths::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL, bucketTime)[0]; + maths_t::setSeasonalVarianceScale(scale, weight); const TFeatureData* data[2]; data[0] = this->featureData(feature, correlates[i][0], bucketTime); @@ -642,8 +636,9 @@ void CMetricModel::fill(model_t::EFeature feature, params.s_Values[i][2 * j + 0] = bucket0->value()[j]; params.s_Values[i][2 * j + 1] = bucket1->value()[j]; } - weight[1][variables[0]] = bucket0->varianceScale(); - weight[1][variables[1]] = bucket1->varianceScale(); + scale[variables[0]] = bucket0->varianceScale(); + scale[variables[1]] = bucket1->varianceScale(); + maths_t::setCountVarianceScale(scale, weight); for (std::size_t j = 0u; j < data[0]->s_InfluenceValues.size(); ++j) { for (const auto& influenceValue : data[0]->s_InfluenceValues[j]) { TStrCRef influence = influenceValue.first; @@ -673,14 +668,13 @@ void CMetricModel::fill(model_t::EFeature feature, count[0] = this->currentBucketCount(correlates[i][0], bucketTime); count[1] = this->currentBucketCount(correlates[i][1], bucketTime); params.s_ComputeProbabilityParams - .addBucketEmpty(TBool2Vec{!count[0] || *count[0] == 0, - !count[1] || *count[1] == 0}) + .addBucketEmpty({!count[0] || *count[0] == 0, !count[1] || *count[1] == 0}) // new line .addWeights(weight); } if (interim && model_t::requiresInterimResultAdjustment(feature)) { core_t::TTime time{bucketTime + bucketLength / 2}; TDouble2Vec1Vec modes(params.s_Model->correlateModes( - time, PROBABILITY_WEIGHT_STYLES, params.s_ComputeProbabilityParams.weights())); + time, params.s_ComputeProbabilityParams.weights())); for (std::size_t i = 0u; i < modes.size(); ++i) { if (!params.s_Values.empty()) { TDouble2Vec value_{params.s_Values[i][0], params.s_Values[i][1]}; @@ -691,7 +685,7 @@ void CMetricModel::fill(model_t::EFeature feature, } this->currentBucketInterimCorrections().emplace( core::make_triple(feature, pid, params.s_Correlated[i]), - TDouble1Vec(1, correction[params.s_Variables[i][0]])); + TDouble1Vec{correction[params.s_Variables[i][0]]}); } } } diff --git a/lib/model/CMetricPopulationModel.cc b/lib/model/CMetricPopulationModel.cc index e34c4d87a3..ebe85f1cc0 100644 --- a/lib/model/CMetricPopulationModel.cc +++ b/lib/model/CMetricPopulationModel.cc @@ -46,9 +46,6 @@ namespace { using TDouble2Vec = core::CSmallVector; using TDouble2Vec1Vec = core::CSmallVector; -using TDouble2Vec4Vec = core::CSmallVector; -using TDouble2Vec4VecVec = std::vector; -using TBool2Vec = core::CSmallVector; using TTime2Vec = core::CSmallVector; using TOptionalSample = boost::optional; using TSizeSizePrFeatureDataPrVec = CMetricPopulationModel::TSizeSizePrFeatureDataPrVec; @@ -64,8 +61,8 @@ struct SValuesAndWeights { bool s_IsInteger, s_IsNonNegative; maths::CModel::TTimeDouble2VecSizeTrVec s_BucketValues; maths::CModel::TTimeDouble2VecSizeTrVec s_Values; - maths::CModelAddSamplesParams::TDouble2Vec4VecVec s_TrendWeights; - maths::CModelAddSamplesParams::TDouble2Vec4VecVec s_PriorWeights; + maths::CModelAddSamplesParams::TDouble2VecWeightsAryVec s_TrendWeights; + maths::CModelAddSamplesParams::TDouble2VecWeightsAryVec s_PriorWeights; }; using TSizeValuesAndWeightsUMap = boost::unordered_map; @@ -74,13 +71,6 @@ const std::string POPULATION_STATE_TAG("a"); const std::string FEATURE_MODELS_TAG("b"); const std::string FEATURE_CORRELATE_MODELS_TAG("c"); const std::string MEMORY_ESTIMATOR_TAG("d"); - -const maths_t::TWeightStyleVec SAMPLE_WEIGHT_STYLES{ - maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight, - maths_t::E_SampleCountVarianceScaleWeight}; -const maths_t::TWeightStyleVec PROBABILITY_WEIGHT_STYLES{ - maths_t::E_SampleSeasonalVarianceScaleWeight, maths_t::E_SampleCountVarianceScaleWeight}; - } // unnamed:: CMetricPopulationModel::CMetricPopulationModel( @@ -438,26 +428,26 @@ void CMetricPopulationModel::sample(core_t::TTime startTime, : attribute.s_Values.size(); if (duplicate < attribute.s_Values.size()) { - std::for_each(attribute.s_TrendWeights[duplicate][0].begin(), - attribute.s_TrendWeights[duplicate][0].end(), - [countWeight, vs](double& weight) { - weight += countWeight / vs; - }); - std::for_each(attribute.s_PriorWeights[duplicate][0].begin(), - attribute.s_PriorWeights[duplicate][0].end(), - [countWeight](double& weight) { - weight += countWeight; - }); + maths_t::addCount(TDouble2Vec(dimension, countWeight / vs), + attribute.s_TrendWeights[duplicate]); + maths_t::addCount(TDouble2Vec(dimension, countWeight), + attribute.s_PriorWeights[duplicate]); } else { attribute.s_Values.emplace_back(sample.time(), value, pid); attribute.s_TrendWeights.push_back( - {TDouble2Vec(dimension, countWeight / vs), - model->winsorisationWeight(1.0, sample.time(), value), - TDouble2Vec(dimension, vs)}); + maths_t::CUnitWeights::unit(dimension)); attribute.s_PriorWeights.push_back( - {TDouble2Vec(dimension, countWeight), - model->winsorisationWeight(1.0, sample.time(), value), - TDouble2Vec(dimension, vs)}); + maths_t::CUnitWeights::unit(dimension)); + auto& trendWeight = attribute.s_TrendWeights.back(); + auto& priorWeight = attribute.s_PriorWeights.back(); + maths_t::setCount(TDouble2Vec(dimension, countWeight / vs), trendWeight); + maths_t::setWinsorisationWeight( + model->winsorisationWeight(1.0, sample.time(), value), trendWeight); + maths_t::setCountVarianceScale(TDouble2Vec(dimension, vs), trendWeight); + maths_t::setCount(TDouble2Vec(dimension, countWeight), priorWeight); + maths_t::setWinsorisationWeight( + maths_t::winsorisationWeight(trendWeight), priorWeight); + maths_t::setCountVarianceScale(TDouble2Vec(dimension, vs), priorWeight); } } } @@ -473,7 +463,6 @@ void CMetricPopulationModel::sample(core_t::TTime startTime, params.integer(attribute.second.s_IsInteger) .nonNegative(attribute.second.s_IsNonNegative) .propagationInterval(this->propagationTime(cid, latest)) - .weightStyles(SAMPLE_WEIGHT_STYLES) .trendWeights(attribute.second.s_TrendWeights) .priorWeights(attribute.second.s_PriorWeights); @@ -486,7 +475,7 @@ void CMetricPopulationModel::sample(core_t::TTime startTime, } for (const auto& feature : m_FeatureCorrelatesModels) { - feature.s_Models->processSamples(SAMPLE_WEIGHT_STYLES); + feature.s_Models->processSamples(); } m_Probabilities.clear(); @@ -927,15 +916,17 @@ void CMetricPopulationModel::fill(model_t::EFeature feature, core_t::TTime bucketTime, bool interim, CProbabilityAndInfluenceCalculator::SParams& params) const { + std::size_t dimension{model_t::dimension(feature)}; auto data = find(this->featureData(feature, bucketTime), pid, cid); const maths::CModel* model{this->model(feature, cid)}; const TOptionalSample& bucket{CDataGatherer::extractData(*data).s_BucketValue}; core_t::TTime time{model_t::sampleTime(feature, bucketTime, this->bucketLength(), bucket->time())}; - TDouble2Vec4Vec weights{ - model->seasonalWeight(maths::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL, time), - TDouble2Vec(dimension, bucket->varianceScale())}; + maths_t::TDouble2VecWeightsAry weights(maths_t::CUnitWeights::unit(dimension)); + maths_t::setSeasonalVarianceScale( + model->seasonalWeight(maths::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL, time), weights); + maths_t::setCountVarianceScale(TDouble2Vec(dimension, bucket->varianceScale()), weights); params.s_Feature = feature; params.s_Model = model; @@ -943,7 +934,7 @@ void CMetricPopulationModel::fill(model_t::EFeature feature, params.s_Time.assign(1, TTime2Vec{time}); params.s_Value.assign(1, bucket->value()); if (interim && model_t::requiresInterimResultAdjustment(feature)) { - TDouble2Vec mode(params.s_Model->mode(time, PROBABILITY_WEIGHT_STYLES, weights)); + TDouble2Vec mode(params.s_Model->mode(time, weights)); TDouble2Vec correction(this->interimValueCorrector().corrections( time, this->currentBucketTotalCount(), mode, bucket->value(dimension))); params.s_Value[0] += correction; @@ -951,10 +942,10 @@ void CMetricPopulationModel::fill(model_t::EFeature feature, CCorrectionKey(feature, pid, cid), correction); } params.s_Count = 1.0; - params.s_ComputeProbabilityParams.tag(pid) + params.s_ComputeProbabilityParams + .tag(pid) // new line .addCalculation(model_t::probabilityCalculation(feature)) - .weightStyles(PROBABILITY_WEIGHT_STYLES) - .addBucketEmpty(TBool2Vec(1, false)) + .addBucketEmpty({false}) .addWeights(weights); } diff --git a/lib/model/CModelDetailsView.cc b/lib/model/CModelDetailsView.cc index a1bb7238d5..1a866aaeb1 100644 --- a/lib/model/CModelDetailsView.cc +++ b/lib/model/CModelDetailsView.cc @@ -20,8 +20,6 @@ namespace ml { namespace model { namespace { -const maths_t::TWeightStyleVec WEIGHT_STYLES{maths_t::E_SampleSeasonalVarianceScaleWeight, - maths_t::E_SampleCountVarianceScaleWeight}; const std::string EMPTY_STRING(""); } @@ -76,7 +74,6 @@ void CModelDetailsView::modelPlotForByFieldId(core_t::TTime time, using TDouble1VecDouble1VecPr = std::pair; using TDouble2Vec = core::CSmallVector; using TDouble2Vec3Vec = core::CSmallVector; - using TDouble2Vec4Vec = core::CSmallVector; if (this->isByFieldIdActive(byFieldId)) { const maths::CModel* model = this->model(feature, byFieldId); @@ -86,16 +83,18 @@ void CModelDetailsView::modelPlotForByFieldId(core_t::TTime time, std::size_t dimension = model_t::dimension(feature); - TDouble2Vec4Vec weights(WEIGHT_STYLES.size()); - weights[0] = model->seasonalWeight(maths::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL, time); - weights[1].assign(dimension, this->countVarianceScale(feature, byFieldId, time)); + maths_t::TDouble2VecWeightsAry weights( + maths_t::CUnitWeights::unit(dimension)); + maths_t::setSeasonalVarianceScale( + model->seasonalWeight(maths::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL, time), weights); + maths_t::setCountVarianceScale( + TDouble2Vec(dimension, this->countVarianceScale(feature, byFieldId, time)), weights); TDouble1VecDouble1VecPr support(model_t::support(feature)); TDouble2Vec supportLower(support.first); TDouble2Vec supportUpper(support.second); - TDouble2Vec3Vec interval(model->confidenceInterval(time, boundsPercentile, - WEIGHT_STYLES, weights)); + TDouble2Vec3Vec interval(model->confidenceInterval(time, boundsPercentile, weights)); if (interval.size() == 3) { TDouble2Vec lower = maths::CTools::truncate(interval[0], supportLower, supportUpper); diff --git a/lib/model/CModelTools.cc b/lib/model/CModelTools.cc index e4525826e7..0d16ac1a50 100644 --- a/lib/model/CModelTools.cc +++ b/lib/model/CModelTools.cc @@ -333,8 +333,7 @@ void CModelTools::CProbabilityCache::addModes(model_t::EFeature feature, TDouble1Vec& modes{m_Caches[{feature, id}].s_Modes}; if (modes.empty()) { TDouble2Vec1Vec modes_( - model.residualModes(maths::CConstantWeights::COUNT_VARIANCE, - maths::CConstantWeights::unit(1))); + model.residualModes(maths_t::CUnitWeights::unit(1))); for (const auto& mode : modes_) { modes.push_back(mode[0]); } diff --git a/lib/model/CProbabilityAndInfluenceCalculator.cc b/lib/model/CProbabilityAndInfluenceCalculator.cc index c039b8fe25..0c2bca3dc7 100644 --- a/lib/model/CProbabilityAndInfluenceCalculator.cc +++ b/lib/model/CProbabilityAndInfluenceCalculator.cc @@ -27,9 +27,7 @@ using TSize1Vec = CProbabilityAndInfluenceCalculator::TSize1Vec; using TSize2Vec = CProbabilityAndInfluenceCalculator::TSize2Vec; using TDouble1Vec = CProbabilityAndInfluenceCalculator::TDouble1Vec; using TDouble2Vec = CProbabilityAndInfluenceCalculator::TDouble2Vec; -using TDouble4Vec = CProbabilityAndInfluenceCalculator::TDouble4Vec; using TDouble2Vec1Vec = CProbabilityAndInfluenceCalculator::TDouble2Vec1Vec; -using TDouble4Vec1Vec = CProbabilityAndInfluenceCalculator::TDouble4Vec1Vec; using TDouble1VecDoublePr = CProbabilityAndInfluenceCalculator::TDouble1VecDoublePr; using TBool2Vec = CProbabilityAndInfluenceCalculator::TBool2Vec; using TTime2Vec = CProbabilityAndInfluenceCalculator::TTime2Vec; @@ -48,8 +46,6 @@ using TStoredStringPtrStoredStringPtrPrDoublePrVec = CProbabilityAndInfluenceCalculator::TStoredStringPtrStoredStringPtrPrDoublePrVec; using TTail2Vec = core::CSmallVector; using TProbabilityCalculation2Vec = core::CSmallVector; -using TDouble2Vec4Vec = core::CSmallVector; -using TDouble2Vec4Vec1Vec = core::CSmallVector; using TSizeDoublePr = std::pair; using TSizeDoublePr1Vec = core::CSmallVector; @@ -158,10 +154,10 @@ class CValueDifference { double ni, maths::CModelProbabilityParams& params, TDouble2Vec& difference) const { - params.addBucketEmpty(TBool2Vec{n == ni}); for (std::size_t i = 0u; i < v.size(); ++i) { difference[i] = v[i] - vi[i]; } + params.addBucketEmpty({n == ni}); } //! Correlates. @@ -190,10 +186,10 @@ class CValueIntersection { double ni, maths::CModelProbabilityParams& params, TDouble2Vec& intersection) const { - params.addBucketEmpty(TBool2Vec{ni == 0}); for (std::size_t i = 0u; i < vi.size(); ++i) { intersection[i] = vi[i]; } + params.addBucketEmpty({ni == 0}); } //! Correlates. @@ -222,18 +218,14 @@ class CMeanDifference { double ni, maths::CModelProbabilityParams& params, TDouble2Vec& difference) const { - params.addBucketEmpty(TBool2Vec{n == ni}); for (std::size_t d = 0u; d < v.size(); ++d) { - for (std::size_t i = 0u; i < params.weightStyles().size(); ++i) { - if (params.weightStyles()[i] == maths_t::E_SampleCountVarianceScaleWeight) { - params.weights()[0][i][d] *= n / (n - ni); - break; - } - } difference[d] = maths::CBasicStatistics::mean( maths::CBasicStatistics::accumulator(n, v[d]) - maths::CBasicStatistics::accumulator(ni, vi[d])); } + maths_t::multiplyCountVarianceScale(TDouble2Vec(v.size(), n / (n - ni)), + params.weights()[0]); + params.addBucketEmpty({n == ni}); } //! Correlates. @@ -246,16 +238,13 @@ class CMeanDifference { TBool2Vec bucketEmpty(2); for (std::size_t d = 0u; d < 2; ++d) { bucketEmpty[d] = ((n[d] - ni[d]) == 0); - for (std::size_t i = 0u; i < params.weightStyles().size(); ++i) { - if (params.weightStyles()[i] == maths_t::E_SampleCountVarianceScaleWeight) { - params.weights()[0][i][d] *= n[d] / (n[d] - ni[d]); - break; - } - } difference[d] = maths::CBasicStatistics::mean( maths::CBasicStatistics::accumulator(n[d], v[d]) - maths::CBasicStatistics::accumulator(ni[d], vi[d])); } + maths_t::multiplyCountVarianceScale( + TDouble2Vec{n[0] / (n[0] - ni[0]), n[1] / (n[1] - ni[1])}, + params.weights()[0]); params.addBucketEmpty(bucketEmpty); } }; @@ -271,18 +260,14 @@ class CVarianceDifference { maths::CModelProbabilityParams& params, TDouble2Vec& difference) const { std::size_t dimension = v.size() / 2; - params.addBucketEmpty(TBool2Vec{n == ni}); for (std::size_t d = 0u; d < dimension; ++d) { - for (std::size_t i = 0u; i < params.weightStyles().size(); ++i) { - if (params.weightStyles()[i] == maths_t::E_SampleCountVarianceScaleWeight) { - params.weights()[0][i][d] *= n / (n - ni); - break; - } - } difference[d] = maths::CBasicStatistics::maximumLikelihoodVariance( maths::CBasicStatistics::accumulator(n, v[dimension + d], v[d]) - maths::CBasicStatistics::accumulator(ni, vi[dimension + d], vi[d])); } + maths_t::multiplyCountVarianceScale(TDouble2Vec(dimension, n / (n - ni)), + params.weights()[0]); + params.addBucketEmpty({n == ni}); } //! Correlates. @@ -295,17 +280,14 @@ class CVarianceDifference { TBool2Vec bucketEmpty(2); for (std::size_t d = 0u; d < 2; ++d) { bucketEmpty[d] = ((n[d] - ni[d]) == 0); - for (std::size_t i = 0u; i < params.weightStyles().size(); ++i) { - if (params.weightStyles()[i] == maths_t::E_SampleCountVarianceScaleWeight) { - params.weights()[0][i][d] *= n[d] / (n[d] - ni[d]); - break; - } - } difference[d] = maths::CBasicStatistics::maximumLikelihoodVariance( maths::CBasicStatistics::accumulator(n[d], v[2 + d], v[d]) - maths::CBasicStatistics::accumulator(ni[d], vi[2 + d], vi[d])); } params.addBucketEmpty(bucketEmpty); + maths_t::multiplyCountVarianceScale( + TDouble2Vec{n[0] / (n[0] - ni[0]), n[1] / (n[1] - ni[1])}, + params.weights()[0]); } }; @@ -384,7 +366,7 @@ void doComputeInfluences(model_t::EFeature feature, TSize1Vec mostAnomalousCorrelate; double logp = maths::CTools::fastLog(probability); - TDouble2Vec4Vec1Vec weights(params.weights()); + maths_t::TDouble2VecWeightsAry1Vec weights(params.weights()); for (auto i = influencerValues.begin(); i != influencerValues.end(); ++i) { params.weights(weights).updateAnomalyModel(false); @@ -477,7 +459,7 @@ void doComputeCorrelateInfluences(model_t::EFeature feature, TSize1Vec mostAnomalousCorrelate; double logp = std::log(probability); - TDouble2Vec4Vec1Vec weights(params.weights()); + maths_t::TDouble2VecWeightsAry1Vec weights(params.weights()); for (const auto& influence_ : influencerValues) { params.weights(weights).updateAnomalyModel(false); @@ -638,7 +620,7 @@ bool CProbabilityAndInfluenceCalculator::addProbability(model_t::EFeature featur return false; } - // Maybe check the cache. + // Check the cache. if (!model_t::isConstant(feature) && m_ProbabilityCache) { TDouble2Vec1Vec values(model_t::stripExtraStatistics(feature, values_)); model.detrend(time, params.seasonalConfidenceInterval(), values); @@ -896,7 +878,6 @@ void CLogProbabilityComplementInfluenceCalculator::computeInfluences(TParams& pa params_ .seasonalConfidenceInterval( params.s_ComputeProbabilityParams.seasonalConfidenceInterval()) - .weightStyles(params.s_ComputeProbabilityParams.weightStyles()) .addWeights(params.s_ComputeProbabilityParams.weights()[0]); TStrCRefDouble1VecDoublePrPrVec& influencerValues = params.s_InfluencerValues; @@ -925,7 +906,6 @@ void CLogProbabilityComplementInfluenceCalculator::computeInfluences(TCorrelateP params_.addCalculation(maths_t::E_OneSidedAbove) .seasonalConfidenceInterval( params.s_ComputeProbabilityParams.seasonalConfidenceInterval()) - .weightStyles(params.s_ComputeProbabilityParams.weightStyles()) .addWeights(params.s_ComputeProbabilityParams.weights()[correlate]) .mostAnomalousCorrelate(correlate); LOG_TRACE(<< "influencerValues = " @@ -975,7 +955,6 @@ void CLogProbabilityInfluenceCalculator::computeInfluences(TParams& params) cons params_ .seasonalConfidenceInterval( params.s_ComputeProbabilityParams.seasonalConfidenceInterval()) - .weightStyles(params.s_ComputeProbabilityParams.weightStyles()) .addWeights(params.s_ComputeProbabilityParams.weights()[0]); TStrCRefDouble1VecDoublePrPrVec& influencerValues = params.s_InfluencerValues; @@ -1006,7 +985,6 @@ void CLogProbabilityInfluenceCalculator::computeInfluences(TCorrelateParams& par params_ .seasonalConfidenceInterval( params.s_ComputeProbabilityParams.seasonalConfidenceInterval()) - .weightStyles(params.s_ComputeProbabilityParams.weightStyles()) .addWeights(params.s_ComputeProbabilityParams.weights()[correlate]) .mostAnomalousCorrelate(correlate); LOG_TRACE(<< "influencerValues = " @@ -1034,7 +1012,6 @@ void CMeanInfluenceCalculator::computeInfluences(TParams& params) const { params_ .seasonalConfidenceInterval( params.s_ComputeProbabilityParams.seasonalConfidenceInterval()) - .weightStyles(params.s_ComputeProbabilityParams.weightStyles()) .addWeights(params.s_ComputeProbabilityParams.weights()[0]); TStrCRefDouble1VecDoublePrPrVec& influencerValues = params.s_InfluencerValues; @@ -1065,7 +1042,6 @@ void CMeanInfluenceCalculator::computeInfluences(TCorrelateParams& params) const params_ .seasonalConfidenceInterval( params.s_ComputeProbabilityParams.seasonalConfidenceInterval()) - .weightStyles(params.s_ComputeProbabilityParams.weightStyles()) .addWeights(params.s_ComputeProbabilityParams.weights()[correlate]) .mostAnomalousCorrelate(correlate); LOG_TRACE(<< "influencerValues = " @@ -1093,7 +1069,6 @@ void CVarianceInfluenceCalculator::computeInfluences(TParams& params) const { params_ .seasonalConfidenceInterval( params.s_ComputeProbabilityParams.seasonalConfidenceInterval()) - .weightStyles(params.s_ComputeProbabilityParams.weightStyles()) .addWeights(params.s_ComputeProbabilityParams.weights()[0]); TStrCRefDouble1VecDoublePrPrVec& influencerValues = params.s_InfluencerValues; @@ -1125,7 +1100,6 @@ void CVarianceInfluenceCalculator::computeInfluences(TCorrelateParams& params) c params_ .seasonalConfidenceInterval( params.s_ComputeProbabilityParams.seasonalConfidenceInterval()) - .weightStyles(params.s_ComputeProbabilityParams.weightStyles()) .addWeights(params.s_ComputeProbabilityParams.weights()[correlate]) .mostAnomalousCorrelate(correlate); LOG_TRACE(<< "influencerValues = " diff --git a/lib/model/CRuleCondition.cc b/lib/model/CRuleCondition.cc index 8bcece4d58..83904c02c5 100644 --- a/lib/model/CRuleCondition.cc +++ b/lib/model/CRuleCondition.cc @@ -138,8 +138,7 @@ bool CRuleCondition::checkCondition(const CAnomalyDetectorModel& model, switch (m_Type) { case E_CategoricalMatch: case E_CategoricalComplement: { - LOG_ERROR(<< "Should never check numerical condition for categorical " - "rule condition"); + LOG_ERROR(<< "Should never check numerical condition for categorical rule condition"); return false; } case E_NumericalActual: { @@ -164,8 +163,7 @@ bool CRuleCondition::checkCondition(const CAnomalyDetectorModel& model, return false; } if (value.size() != typical.size()) { - LOG_ERROR(<< "Cannot apply rule condition: cannot calculate difference " - "between " + LOG_ERROR(<< "Cannot apply rule condition: cannot calculate difference between " << "actual and typical values due to different dimensions."); return false; } diff --git a/lib/model/ModelTypes.cc b/lib/model/ModelTypes.cc index d97bf7593e..042fb6644f 100644 --- a/lib/model/ModelTypes.cc +++ b/lib/model/ModelTypes.cc @@ -1699,20 +1699,15 @@ std::string print(EFeature feature) { case E_PopulationHighUniqueCountByBucketPersonAndAttribute: return "'high unique count per bucket by person and attribute'"; case E_PopulationLowCountsByBucketPersonAndAttribute: - return "'low values of non-zero count per bucket by person and " - "attribute'"; + return "'low values of non-zero count per bucket by person and attribute'"; case E_PopulationHighCountsByBucketPersonAndAttribute: - return "'high values of non-zero count per bucket by person and " - "attribute'"; + return "'high values of non-zero count per bucket by person and attribute'"; case E_PopulationInfoContentByBucketPersonAndAttribute: - return "'information content of value per bucket by person and " - "attribute'"; + return "'information content of value per bucket by person and attribute'"; case E_PopulationLowInfoContentByBucketPersonAndAttribute: - return "'low information content of value per bucket by person and " - "attribute'"; + return "'low information content of value per bucket by person and attribute'"; case E_PopulationHighInfoContentByBucketPersonAndAttribute: - return "'high information content of value per bucket by person and " - "attribute'"; + return "'high information content of value per bucket by person and attribute'"; case E_PopulationTimeOfDayByBucketPersonAndAttribute: return "'time-of-day per bucket by person and attribute'"; case E_PopulationTimeOfWeekByBucketPersonAndAttribute: @@ -1764,23 +1759,17 @@ std::string print(EFeature feature) { case E_PeersLowUniqueCountByBucketPersonAndAttribute: return "'low unique count per bucket by peers of person and attribute'"; case E_PeersHighUniqueCountByBucketPersonAndAttribute: - return "'high unique count per bucket by peers of person and " - "attribute'"; + return "'high unique count per bucket by peers of person and attribute'"; case E_PeersLowCountsByBucketPersonAndAttribute: - return "'low values of non-zero count per bucket by peers of person " - "and attribute'"; + return "'low values of non-zero count per bucket by peers of person and attribute'"; case E_PeersHighCountsByBucketPersonAndAttribute: - return "'high values of non-zero count per bucket by peers of person " - "and attribute'"; + return "'high values of non-zero count per bucket by peers of person and attribute'"; case E_PeersInfoContentByBucketPersonAndAttribute: - return "'information content of value per bucket by peers of person " - "and attribute'"; + return "'information content of value per bucket by peers of person and attribute'"; case E_PeersLowInfoContentByBucketPersonAndAttribute: - return "'low information content of value per bucket by peers of " - "person and attribute'"; + return "'low information content of value per bucket by peers of person and attribute'"; case E_PeersHighInfoContentByBucketPersonAndAttribute: - return "'high information content of value per bucket by peers of " - "person and attribute'"; + return "'high information content of value per bucket by peers of person and attribute'"; case E_PeersTimeOfDayByBucketPersonAndAttribute: return "'time-of-day per bucket by peers of person and attribute'"; case E_PeersTimeOfWeekByBucketPersonAndAttribute: diff --git a/lib/model/unittest/CAnnotatedProbabilityBuilderTest.cc b/lib/model/unittest/CAnnotatedProbabilityBuilderTest.cc index 2fda9a9ca0..ae3180067a 100644 --- a/lib/model/unittest/CAnnotatedProbabilityBuilderTest.cc +++ b/lib/model/unittest/CAnnotatedProbabilityBuilderTest.cc @@ -137,16 +137,16 @@ void CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenPopulatio maths::CMultinomialConjugate::nonInformativePrior(4u)); for (std::size_t i = 1u; i <= 4u; ++i) { TDouble1Vec samples(i, static_cast(i)); - TDouble4Vec1Vec weights(i, maths::CConstantWeights::UNIT); - attributePrior.addSamples(maths::CConstantWeights::COUNT, samples, weights); + maths_t::TDoubleWeightsAry1Vec weights(i, maths_t::CUnitWeights::UNIT); + attributePrior.addSamples(samples, weights); } maths::CMultinomialConjugate personAttributePrior( maths::CMultinomialConjugate::nonInformativePrior(4u)); for (std::size_t i = 1u; i <= 4u; ++i) { TDouble1Vec samples(2 * i, static_cast(i)); - TDouble4Vec1Vec weights(2 * i, maths::CConstantWeights::UNIT); - personAttributePrior.addSamples(maths::CConstantWeights::COUNT, samples, weights); + maths_t::TDoubleWeightsAry1Vec weights(2 * i, maths_t::CUnitWeights::UNIT); + personAttributePrior.addSamples(samples, weights); } SAnnotatedProbability result; @@ -217,16 +217,16 @@ void CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenPopulatio maths::CMultinomialConjugate::nonInformativePrior(4u)); for (std::size_t i = 1u; i <= 4u; ++i) { TDouble1Vec samples(i, static_cast(i)); - TDouble4Vec1Vec weights(i, maths::CConstantWeights::UNIT); - attributePrior.addSamples(maths::CConstantWeights::COUNT, samples, weights); + maths_t::TDoubleWeightsAry1Vec weights(i, maths_t::CUnitWeights::UNIT); + attributePrior.addSamples(samples, weights); } maths::CMultinomialConjugate personAttributePrior( maths::CMultinomialConjugate::nonInformativePrior(4u)); for (std::size_t i = 1u; i <= 4u; ++i) { TDouble1Vec samples(2 * i, static_cast(i)); - TDouble4Vec1Vec weights(2 * i, maths::CConstantWeights::UNIT); - personAttributePrior.addSamples(maths::CConstantWeights::COUNT, samples, weights); + maths_t::TDoubleWeightsAry1Vec weights(2 * i, maths_t::CUnitWeights::UNIT); + personAttributePrior.addSamples(samples, weights); } SAnnotatedProbability result; diff --git a/lib/model/unittest/CCountingModelTest.cc b/lib/model/unittest/CCountingModelTest.cc index 03dddadd26..b6481c6dc4 100644 --- a/lib/model/unittest/CCountingModelTest.cc +++ b/lib/model/unittest/CCountingModelTest.cc @@ -71,8 +71,6 @@ const std::string EMPTY_STRING; } void CCountingModelTest::testSkipSampling() { - LOG_DEBUG(<< "*** testSkipSampling ***"); - core_t::TTime startTime(100); core_t::TTime bucketLength(100); std::size_t maxAgeBuckets(1); @@ -138,8 +136,6 @@ void CCountingModelTest::testSkipSampling() { } void CCountingModelTest::testCheckScheduledEvents() { - LOG_DEBUG(<< "*** testCheckScheduledEvents ***"); - core_t::TTime startTime(100); core_t::TTime bucketLength(100); diff --git a/lib/model/unittest/CDetectionRuleTest.cc b/lib/model/unittest/CDetectionRuleTest.cc index b1269b4d34..4acaec6d72 100644 --- a/lib/model/unittest/CDetectionRuleTest.cc +++ b/lib/model/unittest/CDetectionRuleTest.cc @@ -78,8 +78,6 @@ CppUnit::Test* CDetectionRuleTest::suite() { } void CDetectionRuleTest::testApplyGivenCategoricalCondition() { - LOG_DEBUG(<< "*** testApplyGivenCategoricalCondition ***"); - core_t::TTime bucketLength = 100; core_t::TTime startTime = 100; CSearchKey key; @@ -331,8 +329,6 @@ void CDetectionRuleTest::testApplyGivenCategoricalCondition() { } void CDetectionRuleTest::testApplyGivenNumericalActualCondition() { - LOG_DEBUG(<< "*** testApplyGivenNumericalActionCondition ***"); - core_t::TTime bucketLength = 100; core_t::TTime startTime = 100; CSearchKey key; @@ -438,8 +434,6 @@ void CDetectionRuleTest::testApplyGivenNumericalActualCondition() { } void CDetectionRuleTest::testApplyGivenNumericalTypicalCondition() { - LOG_DEBUG(<< "*** testApplyGivenNumericalTypicalCondition ***"); - core_t::TTime bucketLength = 100; core_t::TTime startTime = 100; CSearchKey key; @@ -513,8 +507,6 @@ void CDetectionRuleTest::testApplyGivenNumericalTypicalCondition() { } void CDetectionRuleTest::testApplyGivenNumericalDiffAbsCondition() { - LOG_DEBUG(<< "*** testApplyGivenNumericalDiffAbsCondition ***"); - core_t::TTime bucketLength = 100; core_t::TTime startTime = 100; CSearchKey key; @@ -612,8 +604,6 @@ void CDetectionRuleTest::testApplyGivenNumericalDiffAbsCondition() { } void CDetectionRuleTest::testApplyGivenSingleSeriesModelAndConditionWithField() { - LOG_DEBUG(<< "*** testApplyGivenSingleSeriesModelAndConditionWithField ***"); - core_t::TTime bucketLength = 100; core_t::TTime startTime = 100; CSearchKey key; @@ -661,8 +651,6 @@ void CDetectionRuleTest::testApplyGivenSingleSeriesModelAndConditionWithField() } void CDetectionRuleTest::testApplyGivenNoActualValueAvailable() { - LOG_DEBUG(<< "*** testApplyGivenNoActualValueAvailable ***"); - core_t::TTime bucketLength = 100; core_t::TTime startTime = 100; CSearchKey key; @@ -702,8 +690,6 @@ void CDetectionRuleTest::testApplyGivenNoActualValueAvailable() { } void CDetectionRuleTest::testApplyGivenDifferentSeriesAndIndividualModel() { - LOG_DEBUG(<< "*** testApplyGivenDifferentSeriesAndIndividualModel ***"); - core_t::TTime bucketLength = 100; core_t::TTime startTime = 100; CSearchKey key; @@ -748,8 +734,6 @@ void CDetectionRuleTest::testApplyGivenDifferentSeriesAndIndividualModel() { } void CDetectionRuleTest::testApplyGivenDifferentSeriesAndPopulationModel() { - LOG_DEBUG(<< "*** testApplyGivenDifferentSeriesAndPopulationModel ***"); - core_t::TTime bucketLength = 100; core_t::TTime startTime = 100; CSearchKey key; @@ -813,8 +797,6 @@ void CDetectionRuleTest::testApplyGivenDifferentSeriesAndPopulationModel() { } void CDetectionRuleTest::testApplyGivenMultipleConditionsWithOr() { - LOG_DEBUG(<< "*** testApplyGivenMultipleConditionsWithOr ***"); - core_t::TTime bucketLength = 100; core_t::TTime startTime = 100; CSearchKey key; @@ -932,8 +914,6 @@ void CDetectionRuleTest::testApplyGivenMultipleConditionsWithOr() { } void CDetectionRuleTest::testApplyGivenMultipleConditionsWithAnd() { - LOG_DEBUG(<< "*** testApplyGivenMultipleConditionsWithAnd ***"); - core_t::TTime bucketLength = 100; core_t::TTime startTime = 100; CSearchKey key; @@ -1055,8 +1035,6 @@ void CDetectionRuleTest::testApplyGivenMultipleConditionsWithAnd() { } void CDetectionRuleTest::testApplyGivenTargetFieldIsPartitionAndIndividualModel() { - LOG_DEBUG(<< "*** testApplyGivenTargetFieldIsPartitionAndIndividualModel ***"); - core_t::TTime bucketLength = 100; core_t::TTime startTime = 100; CSearchKey key; @@ -1148,8 +1126,6 @@ void CDetectionRuleTest::testApplyGivenTargetFieldIsPartitionAndIndividualModel( } void CDetectionRuleTest::testApplyGivenTimeCondition() { - LOG_DEBUG(<< "*** testApplyGivenTimeCondition ***"); - core_t::TTime bucketLength = 100; core_t::TTime startTime = 100; CSearchKey key; @@ -1193,8 +1169,6 @@ void CDetectionRuleTest::testApplyGivenTimeCondition() { } void CDetectionRuleTest::testRuleActions() { - LOG_DEBUG(<< "*** testRuleActions ***"); - core_t::TTime bucketLength = 100; core_t::TTime startTime = 100; CSearchKey key; diff --git a/lib/model/unittest/CDetectorEqualizerTest.cc b/lib/model/unittest/CDetectorEqualizerTest.cc index 34691016e9..c8e46b9694 100644 --- a/lib/model/unittest/CDetectorEqualizerTest.cc +++ b/lib/model/unittest/CDetectorEqualizerTest.cc @@ -30,8 +30,6 @@ const double THRESHOLD = std::log(0.05); } void CDetectorEqualizerTest::testCorrect() { - LOG_DEBUG(<< "*** CDetectorEqualizerTest::testCorrect ***"); - // Test that the distribution of scores are more similar after correcting. double scales[] = {1.0, 2.1, 3.2}; @@ -84,8 +82,6 @@ void CDetectorEqualizerTest::testCorrect() { } void CDetectorEqualizerTest::testAge() { - LOG_DEBUG(<< "*** CDetectorEqualizerTest::testAge ***"); - // Test that propagation doesn't introduce a bias into the corrections. double scales[] = {1.0, 2.1, 3.2}; @@ -130,8 +126,6 @@ void CDetectorEqualizerTest::testAge() { } void CDetectorEqualizerTest::testPersist() { - LOG_DEBUG(<< "*** CDetectorEqualizerTest::testPersist ***"); - double scales[] = {1.0, 2.1, 3.2}; model::CDetectorEqualizer origEqualizer; diff --git a/lib/model/unittest/CDynamicStringIdRegistryTest.cc b/lib/model/unittest/CDynamicStringIdRegistryTest.cc index e0dbd1079b..519d292c6a 100644 --- a/lib/model/unittest/CDynamicStringIdRegistryTest.cc +++ b/lib/model/unittest/CDynamicStringIdRegistryTest.cc @@ -33,8 +33,6 @@ CppUnit::Test* CDynamicStringIdRegistryTest::suite() { } void CDynamicStringIdRegistryTest::testAddName() { - LOG_DEBUG(<< "*** testAddName ***"); - CResourceMonitor resourceMonitor; CDynamicStringIdRegistry registry("person", stat_t::E_NumberNewPeople, stat_t::E_NumberNewPeopleNotAllowed, @@ -88,8 +86,6 @@ void CDynamicStringIdRegistryTest::testAddName() { } void CDynamicStringIdRegistryTest::testPersist() { - LOG_DEBUG(<< "*** testPersist ***"); - CResourceMonitor resourceMonitor; CDynamicStringIdRegistry registry("person", stat_t::E_NumberNewPeople, stat_t::E_NumberNewPeopleNotAllowed, diff --git a/lib/model/unittest/CEventRateDataGathererTest.cc b/lib/model/unittest/CEventRateDataGathererTest.cc index a4d6d2345c..82682e2276 100644 --- a/lib/model/unittest/CEventRateDataGathererTest.cc +++ b/lib/model/unittest/CEventRateDataGathererTest.cc @@ -292,8 +292,6 @@ void importCsvData(CDataGatherer& gatherer, } // namespace void CEventRateDataGathererTest::testLatencyPersist() { - LOG_DEBUG(<< "*** testLatencyPersist ***"); - core_t::TTime bucketLength = 3600; core_t::TTime latency = 5 * bucketLength; core_t::TTime startTime = 1420192800; @@ -720,8 +718,6 @@ void CEventRateDataGathererTest::multipleSeriesTests() { } void CEventRateDataGathererTest::testRemovePeople() { - LOG_DEBUG(<< "*** testRemovePeople ***"); - // Test various combinations of removed people. const core_t::TTime startTime = 0; @@ -1297,8 +1293,6 @@ void CEventRateDataGathererTest::multipleSeriesOutOfOrderFinalResultTests() { } void CEventRateDataGathererTest::testArrivalBeforeLatencyWindowIsIgnored() { - LOG_DEBUG(<< "*** testArrivalBeforeLatencyWindowIsIgnored ***"); - const core_t::TTime startTime = 0; const core_t::TTime bucketLength = 600; std::size_t latencyBuckets(2); @@ -1340,8 +1334,6 @@ void CEventRateDataGathererTest::testArrivalBeforeLatencyWindowIsIgnored() { } void CEventRateDataGathererTest::testResetBucketGivenSingleSeries() { - LOG_DEBUG(<< "*** testResetBucketGivenSingleSeries ***"); - const core_t::TTime startTime = 0; const core_t::TTime bucketLength = 600; std::size_t latencyBuckets(2); @@ -1398,8 +1390,6 @@ void CEventRateDataGathererTest::testResetBucketGivenSingleSeries() { } void CEventRateDataGathererTest::testResetBucketGivenMultipleSeries() { - LOG_DEBUG(<< "*** testResetBucketGivenMultipleSeries ***"); - const core_t::TTime startTime = 0; const core_t::TTime bucketLength = 600; std::size_t latencyBuckets(2); @@ -1460,8 +1450,6 @@ void CEventRateDataGathererTest::testResetBucketGivenMultipleSeries() { } void CEventRateDataGathererTest::testResetBucketGivenBucketNotAvailable() { - LOG_DEBUG(<< "*** testResetBucketGivenBucketNotAvailable ***"); - const core_t::TTime startTime = 0; const core_t::TTime bucketLength = 600; std::size_t latencyBuckets(1); @@ -1865,7 +1853,6 @@ void CEventRateDataGathererTest::testDistinctStrings() { } void CEventRateDataGathererTest::testDiurnalFeatures() { - LOG_DEBUG(<< "*** testDiurnalFeatures ***"); const std::string person("p"); const std::string attribute("a"); const std::string emptyString(""); diff --git a/lib/model/unittest/CEventRateModelTest.cc b/lib/model/unittest/CEventRateModelTest.cc index 05c417fe54..c62d9dc941 100644 --- a/lib/model/unittest/CEventRateModelTest.cc +++ b/lib/model/unittest/CEventRateModelTest.cc @@ -299,15 +299,11 @@ void testModelWithValueField(model_t::EFeature feature, } } -const maths_t::TWeightStyleVec COUNT_WEIGHT(1, maths_t::E_SampleCountWeight); -const TDoubleVecVec UNIT_WEIGHT(1, TDoubleVec(1, 1.0)); const TSizeDoublePr1Vec NO_CORRELATES; } // unnamed:: void CEventRateModelTest::testOnlineCountSample() { - LOG_DEBUG(<< "*** testOnlineCountSample ***"); - const core_t::TTime startTime = 1346968800; const core_t::TTime bucketLength = 3600; SModelParams params(bucketLength); @@ -323,8 +319,8 @@ void CEventRateModelTest::testOnlineCountSample() { TMathsModelPtr timeseriesModel{factory.defaultFeatureModel( model_t::E_IndividualCountByBucketAndPerson, bucketLength, 0.4, true)}; - maths::CModelAddSamplesParams::TDouble2Vec4VecVec weights{ - maths::CConstantWeights::unit(1)}; + maths::CModelAddSamplesParams::TDouble2VecWeightsAryVec weights{ + maths_t::CUnitWeights::unit(1)}; // Generate some events. TTimeVec eventTimes; @@ -353,7 +349,6 @@ void CEventRateModelTest::testOnlineCountSample() { params_.integer(true) .nonNegative(true) .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) .trendWeights(weights) .priorWeights(weights); double sample{static_cast(expectedEventCounts[j])}; @@ -409,8 +404,6 @@ void CEventRateModelTest::testOnlineCountSample() { } void CEventRateModelTest::testOnlineNonZeroCountSample() { - LOG_DEBUG(<< "*** testOnlineNonZeroCountSample ***"); - const core_t::TTime startTime = 1346968800; const core_t::TTime bucketLength = 3600; SModelParams params(bucketLength); @@ -426,8 +419,8 @@ void CEventRateModelTest::testOnlineNonZeroCountSample() { TMathsModelPtr timeseriesModel{factory.defaultFeatureModel( model_t::E_IndividualNonZeroCountByBucketAndPerson, bucketLength, 0.4, true)}; - maths::CModelAddSamplesParams::TDouble2Vec4VecVec weights{ - maths::CConstantWeights::unit(1)}; + maths::CModelAddSamplesParams::TDouble2VecWeightsAryVec weights{ + maths_t::CUnitWeights::unit(1)}; // Generate some events. TTimeVec eventTimes; @@ -457,7 +450,6 @@ void CEventRateModelTest::testOnlineNonZeroCountSample() { params_.integer(true) .nonNegative(true) .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) .trendWeights(weights) .priorWeights(weights); double sample{static_cast(model_t::offsetCountToZero( @@ -484,8 +476,6 @@ void CEventRateModelTest::testOnlineNonZeroCountSample() { } void CEventRateModelTest::testOnlineRare() { - LOG_DEBUG(<< "*** testOnlineRare ***"); - const core_t::TTime startTime = 1346968800; const core_t::TTime bucketLength = 3600; SModelParams params(bucketLength); @@ -568,8 +558,6 @@ void CEventRateModelTest::testOnlineRare() { } void CEventRateModelTest::testOnlineProbabilityCalculation() { - LOG_DEBUG(<< "*** testOnlineProbabilityCalculation ***"); - using TDoubleSizePr = std::pair; using TMinAccumulator = maths::CBasicStatistics::COrderStatisticsHeap; @@ -628,8 +616,6 @@ void CEventRateModelTest::testOnlineProbabilityCalculation() { } void CEventRateModelTest::testOnlineProbabilityCalculationForLowNonZeroCount() { - LOG_DEBUG(<< "*** testOnlineProbabilityCalculationForLowNonZeroCount ***"); - core_t::TTime startTime(0); core_t::TTime bucketLength(100); std::size_t lowNonZeroCountBucket = 6u; @@ -680,8 +666,6 @@ void CEventRateModelTest::testOnlineProbabilityCalculationForLowNonZeroCount() { } void CEventRateModelTest::testOnlineProbabilityCalculationForHighNonZeroCount() { - LOG_DEBUG(<< "*** testOnlineProbabilityCalculationForHighNonZeroCount ***"); - core_t::TTime startTime(0); core_t::TTime bucketLength(100); std::size_t lowNonZeroCountBucket = 6u; @@ -732,8 +716,6 @@ void CEventRateModelTest::testOnlineProbabilityCalculationForHighNonZeroCount() } void CEventRateModelTest::testOnlineCorrelatedNoTrend() { - LOG_DEBUG(<< "*** testOnlineCorrelatedNoTrend ***"); - // Check we find the correct correlated variables, and identify // correlate and marginal anomalies. @@ -939,8 +921,6 @@ void CEventRateModelTest::testOnlineCorrelatedNoTrend() { } void CEventRateModelTest::testOnlineCorrelatedTrend() { - LOG_DEBUG(<< "*** testOnlineCorrelatedTrend ***"); - // FIXME return; @@ -1065,8 +1045,6 @@ void CEventRateModelTest::testOnlineCorrelatedTrend() { } void CEventRateModelTest::testPrune() { - LOG_DEBUG(<< "*** testPrune ***"); - using TUInt64VecVec = std::vector; using TEventDataVec = std::vector; using TSizeSizeMap = std::map; @@ -1378,8 +1356,6 @@ void CEventRateModelTest::testModelsWithValueFields() { } void CEventRateModelTest::testCountProbabilityCalculationWithInfluence() { - LOG_DEBUG(<< "*** testCountProbabilityCalculationWithInfluence ***"); - const core_t::TTime startTime = 1346968800; const core_t::TTime bucketLength = 3600; @@ -1723,8 +1699,6 @@ void CEventRateModelTest::testCountProbabilityCalculationWithInfluence() { } void CEventRateModelTest::testDistinctCountProbabilityCalculationWithInfluence() { - LOG_DEBUG(<< "*** testCountProbabilityCalculationWithInfluence ***"); - const core_t::TTime startTime = 1346968800; const core_t::TTime bucketLength = 3600; @@ -2053,8 +2027,6 @@ void CEventRateModelTest::testDistinctCountProbabilityCalculationWithInfluence() } void CEventRateModelTest::testOnlineRareWithInfluence() { - LOG_DEBUG(<< "*** testOnlineRareWithInfluence ***"); - const core_t::TTime startTime = 1346968800; const core_t::TTime bucketLength = 3600; SModelParams params(bucketLength); @@ -2165,8 +2137,6 @@ void CEventRateModelTest::testOnlineRareWithInfluence() { } void CEventRateModelTest::testSkipSampling() { - LOG_DEBUG(<< "*** testSkipSampling ***"); - core_t::TTime startTime(100); std::size_t bucketLength(100); std::size_t maxAgeBuckets(5); @@ -2253,8 +2223,6 @@ void CEventRateModelTest::testSkipSampling() { } void CEventRateModelTest::testExplicitNulls() { - LOG_DEBUG(<< "*** testExplicitNulls ***"); - core_t::TTime startTime(100); std::size_t bucketLength(100); std::string summaryCountField("count"); @@ -2356,8 +2324,6 @@ void CEventRateModelTest::testExplicitNulls() { } void CEventRateModelTest::testInterimCorrections() { - LOG_DEBUG(<< "*** testInterimCorrections ***"); - core_t::TTime startTime(3600); core_t::TTime bucketLength(3600); core_t::TTime endTime(2 * 24 * bucketLength); @@ -2478,8 +2444,6 @@ void CEventRateModelTest::testInterimCorrections() { } void CEventRateModelTest::testInterimCorrectionsWithCorrelations() { - LOG_DEBUG(<< "*** testInterimCorrectionsWithCorrelations ***"); - core_t::TTime startTime(3600); core_t::TTime bucketLength(3600); @@ -2564,8 +2528,6 @@ void CEventRateModelTest::testInterimCorrectionsWithCorrelations() { } void CEventRateModelTest::testSummaryCountZeroRecordsAreIgnored() { - LOG_DEBUG(<< "*** testSummaryCountZeroRecordsAreIgnored ***"); - core_t::TTime startTime(100); core_t::TTime bucketLength(100); SModelParams params(bucketLength); @@ -2629,8 +2591,6 @@ void CEventRateModelTest::testSummaryCountZeroRecordsAreIgnored() { } void CEventRateModelTest::testComputeProbabilityGivenDetectionRule() { - LOG_DEBUG(<< "*** testComputeProbabilityGivenDetectionRule ***"); - CRuleCondition condition; condition.type(CRuleCondition::E_NumericalActual); condition.condition().s_Op = CRuleCondition::E_LT; @@ -2678,8 +2638,6 @@ void CEventRateModelTest::testComputeProbabilityGivenDetectionRule() { } void CEventRateModelTest::testDecayRateControl() { - LOG_DEBUG(<< "*** testDecayRateControl ***"); - core_t::TTime startTime = 0; core_t::TTime bucketLength = 1800; @@ -2688,7 +2646,7 @@ void CEventRateModelTest::testDecayRateControl() { SModelParams params(bucketLength); params.s_DecayRate = 0.001; - params.s_MinimumModeFraction = model::CAnomalyDetectorModelConfig::DEFAULT_INDIVIDUAL_MINIMUM_MODE_FRACTION; + params.s_MinimumModeFraction = CAnomalyDetectorModelConfig::DEFAULT_INDIVIDUAL_MINIMUM_MODE_FRACTION; test::CRandomNumbers rng; @@ -2882,8 +2840,6 @@ void CEventRateModelTest::testDecayRateControl() { } void CEventRateModelTest::testIgnoreSamplingGivenDetectionRules() { - LOG_DEBUG(<< "*** testIgnoreSamplingGivenDetectionRules ***"); - // Create 2 models, one of which has a skip sampling rule. // Feed the same data into both models then add extra data // into the first model we know will be filtered out. @@ -3019,10 +2975,12 @@ CppUnit::Test* CEventRateModelTest::suite() { "CEventRateModelTest::testOnlineProbabilityCalculation", &CEventRateModelTest::testOnlineProbabilityCalculation)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CEventRateModelTest::testOnlineProbabilityCalculationForLowNonZeroCount", + "CEventRateModelTest::" + "testOnlineProbabilityCalculationForLowNonZeroCount", &CEventRateModelTest::testOnlineProbabilityCalculationForLowNonZeroCount)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CEventRateModelTest::testOnlineProbabilityCalculationForHighNonZeroCount", + "CEventRateModelTest::" + "testOnlineProbabilityCalculationForHighNonZeroCount", &CEventRateModelTest::testOnlineProbabilityCalculationForHighNonZeroCount)); suiteOfTests->addTest(new CppUnit::TestCaller( "CEventRateModelTest::testOnlineCorrelatedNoTrend", @@ -3041,7 +2999,8 @@ CppUnit::Test* CEventRateModelTest::suite() { "CEventRateModelTest::testCountProbabilityCalculationWithInfluence", &CEventRateModelTest::testCountProbabilityCalculationWithInfluence)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CEventRateModelTest::testDistinctCountProbabilityCalculationWithInfluence", + "CEventRateModelTest::" + "testDistinctCountProbabilityCalculationWithInfluence", &CEventRateModelTest::testDistinctCountProbabilityCalculationWithInfluence)); suiteOfTests->addTest(new CppUnit::TestCaller( "CEventRateModelTest::testOnlineRareWithInfluence", diff --git a/lib/model/unittest/CEventRatePopulationDataGathererTest.cc b/lib/model/unittest/CEventRatePopulationDataGathererTest.cc index d847a3d635..93aa099815 100644 --- a/lib/model/unittest/CEventRatePopulationDataGathererTest.cc +++ b/lib/model/unittest/CEventRatePopulationDataGathererTest.cc @@ -212,8 +212,6 @@ const std::string EMPTY_STRING; } void CEventRatePopulationDataGathererTest::testAttributeCounts() { - LOG_DEBUG(<< "*** CEventRatePopulationDataGathererTest::testAttributeCounts ***"); - // We check that we correctly sample the unique people per // attribute and (attribute, person) pair counts. @@ -332,8 +330,6 @@ void CEventRatePopulationDataGathererTest::testAttributeCounts() { } void CEventRatePopulationDataGathererTest::testAttributeIndicator() { - LOG_DEBUG(<< "*** CEventRatePopulationDataGathererTest::testAttributeIndicator ***"); - // We check that we correctly sample the (attribute, person) // indicator. @@ -394,8 +390,6 @@ void CEventRatePopulationDataGathererTest::testAttributeIndicator() { } void CEventRatePopulationDataGathererTest::testUniqueValueCounts() { - LOG_DEBUG(<< "*** CEventRatePopulationDataGathererTest::testUniqueAttributeCounts ***"); - // We check that we correctly sample the unique counts // of values per person. @@ -465,8 +459,6 @@ void CEventRatePopulationDataGathererTest::testUniqueValueCounts() { } void CEventRatePopulationDataGathererTest::testCompressedLength() { - LOG_DEBUG(<< "*** CEventRatePopulationDataGathererTest::testCompressedLength ***"); - // We check that we correctly sample the compressed length of unique // values per person. @@ -555,8 +547,6 @@ void CEventRatePopulationDataGathererTest::testCompressedLength() { } void CEventRatePopulationDataGathererTest::testRemovePeople() { - LOG_DEBUG(<< "*** CEventRatePopulationDataGathererTest::testRemovePeople ***"); - using TStrSizeMap = std::map; using TSizeUInt64Pr = std::pair; using TSizeUInt64PrVec = std::vector; @@ -687,8 +677,6 @@ void CEventRatePopulationDataGathererTest::testRemovePeople() { } void CEventRatePopulationDataGathererTest::testRemoveAttributes() { - LOG_DEBUG(<< "*** CEventRatePopulationDataGathererTest::testRemoveAttributes ***"); - const core_t::TTime startTime = 1367280000; const core_t::TTime bucketLength = 3600; @@ -794,8 +782,6 @@ bool isSpace(const char x) { } void CEventRatePopulationDataGathererTest::testPersistence() { - LOG_DEBUG(<< "*** CEventRatePopulationDataGathererTest::testPersistence ***"); - const core_t::TTime startTime = 1367280000; const core_t::TTime bucketLength = 3600; diff --git a/lib/model/unittest/CEventRatePopulationModelTest.cc b/lib/model/unittest/CEventRatePopulationModelTest.cc index 5e91fa624a..330ad5adb5 100644 --- a/lib/model/unittest/CEventRatePopulationModelTest.cc +++ b/lib/model/unittest/CEventRatePopulationModelTest.cc @@ -211,8 +211,6 @@ const TSizeDoublePr1Vec NO_CORRELATES; } void CEventRatePopulationModelTest::testBasicAccessors() { - LOG_DEBUG(<< "*** testBasicAccessors ***"); - // Check that the correct data is read retrieved by the // basic model accessors. @@ -318,15 +316,13 @@ void CEventRatePopulationModelTest::testBasicAccessors() { } void CEventRatePopulationModelTest::testFeatures() { - LOG_DEBUG(<< "*** testFeatures ***"); - // We check that the correct data is read from the gatherer // into the model on sample. using TDouble2Vec = core::CSmallVector; using TDouble2VecVec = std::vector; - using TDouble2Vec4Vec = core::CSmallVector; - using TDouble2Vec4VecVec = std::vector; + using TDouble2VecWeightsAry = maths_t::TDouble2VecWeightsAry; + using TDouble2VecWeightsAryVec = std::vector; using TSizeSet = std::set; using TSizeSizeSetMap = std::map; using TStrStrPr = std::pair; @@ -337,12 +333,10 @@ void CEventRatePopulationModelTest::testFeatures() { using TSizeSizePrUInt64Map = std::map; using TMathsModelPtr = std::shared_ptr; using TSizeMathsModelPtrMap = std::map; - using TDouble2VecVecDouble2Vec4VecVecPr = std::pair; - using TSizeDouble2VecVecDouble2Vec4VecVecPrMap = - std::map; - - static const maths_t::TWeightStyleVec WEIGHT_STYLES{ - maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight}; + using TDouble2VecVecDouble2VecWeightsAryVecPr = + std::pair; + using TSizeDouble2VecVecDouble2VecWeightsAryVecPrMap = + std::map; core_t::TTime startTime = 1367280000; const core_t::TTime bucketLength = 3600; @@ -397,7 +391,7 @@ void CEventRatePopulationModelTest::testFeatures() { expectedNonZeroCounts[{pid, cid}] = count.second; } - TSizeDouble2VecVecDouble2Vec4VecVecPrMap populationSamples; + TSizeDouble2VecVecDouble2VecWeightsAryVecPrMap populationSamples; for (const auto& count_ : expectedNonZeroCounts) { std::size_t pid = count_.first.first; std::size_t cid = count_.first.second; @@ -411,14 +405,17 @@ void CEventRatePopulationModelTest::testFeatures() { } TDoubleVec sample(1, count); - TDouble2Vec4Vec weight{{model->sampleRateWeight(pid, cid)}, - model_->winsorisationWeight(1.0, time, sample)}; + TDouble2VecWeightsAry weight( + maths_t::CUnitWeights::unit(1)); + maths_t::setCount(TDouble2Vec{model->sampleRateWeight(pid, cid)}, weight); + maths_t::setWinsorisationWeight( + model_->winsorisationWeight(1.0, time, sample), weight); populationSamples[cid].first.push_back({sample[0]}); populationSamples[cid].second.push_back(weight); } for (auto& samples_ : populationSamples) { std::size_t cid = samples_.first; - TDouble2Vec4VecVec& weights = samples_.second.second; + TDouble2VecWeightsAryVec& weights = samples_.second.second; maths::COrderings::simultaneousSort(samples_.second.first, weights); maths::CModel::TTimeDouble2VecSizeTrVec samples; for (const auto& sample : samples_.second.first) { @@ -428,7 +425,6 @@ void CEventRatePopulationModelTest::testFeatures() { params_.integer(true) .nonNegative(true) .propagationInterval(1.0) - .weightStyles(WEIGHT_STYLES) .trendWeights(weights) .priorWeights(weights); expectedPopulationModels[cid]->addSamples(params_, samples); @@ -477,8 +473,6 @@ void CEventRatePopulationModelTest::testFeatures() { } void CEventRatePopulationModelTest::testComputeProbability() { - LOG_DEBUG(<< "*** testComputeProbability ***"); - // Check that we get the probabilities we expect. using TAnomalyVec = std::vector; @@ -564,8 +558,6 @@ void CEventRatePopulationModelTest::testComputeProbability() { } void CEventRatePopulationModelTest::testPrune() { - LOG_DEBUG(<< "*** testPrune ***"); - // This test has four people and five attributes. We expect // person 2 and attributes 1, 2 and 5 to be deleted. @@ -740,8 +732,6 @@ void CEventRatePopulationModelTest::testPrune() { } void CEventRatePopulationModelTest::testKey() { - LOG_DEBUG(<< "*** testKey ***"); - function_t::EFunction countFunctions[] = {function_t::E_PopulationCount, function_t::E_PopulationDistinctCount, function_t::E_PopulationRare, @@ -780,8 +770,6 @@ void CEventRatePopulationModelTest::testKey() { } void CEventRatePopulationModelTest::testFrequency() { - LOG_DEBUG(<< "*** CEventRatePopulationModelTest::testFrequency ***"); - using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; // Test we correctly compute frequencies for people and attributes. @@ -871,8 +859,6 @@ void CEventRatePopulationModelTest::testFrequency() { } void CEventRatePopulationModelTest::testSampleRateWeight() { - LOG_DEBUG(<< "*** CEventRatePopulationModelTest::testSampleRateWeight ***"); - // Test that we correctly compensate for heavy hitters. // There are 10 attributes. @@ -992,8 +978,6 @@ void CEventRatePopulationModelTest::testSampleRateWeight() { } void CEventRatePopulationModelTest::testPeriodicity() { - LOG_DEBUG(<< "*** testPeriodicity ***"); - // Create a daily periodic population and check that the // periodicity is learned and compensated (approximately). @@ -1114,8 +1098,6 @@ void CEventRatePopulationModelTest::testPeriodicity() { } void CEventRatePopulationModelTest::testSkipSampling() { - LOG_DEBUG(<< "*** testSkipSampling ***"); - core_t::TTime startTime(100); std::size_t bucketLength(100); std::size_t maxAgeBuckets(5); @@ -1201,8 +1183,6 @@ void CEventRatePopulationModelTest::testSkipSampling() { } void CEventRatePopulationModelTest::testInterimCorrections() { - LOG_DEBUG(<< "*** testInterimCorrections ***"); - core_t::TTime startTime(3600); std::size_t bucketLength(3600); SModelParams params(bucketLength); @@ -1285,8 +1265,6 @@ void CEventRatePopulationModelTest::testInterimCorrections() { } void CEventRatePopulationModelTest::testPersistence() { - LOG_DEBUG(<< "*** testPersistence ***"); - core_t::TTime startTime = 1367280000; const core_t::TTime bucketLength = 3600; @@ -1354,8 +1332,6 @@ void CEventRatePopulationModelTest::testPersistence() { } void CEventRatePopulationModelTest::testIgnoreSamplingGivenDetectionRules() { - LOG_DEBUG(<< "*** testIgnoreSamplingGivenDetectionRules ***"); - // Create 2 models, one of which has a skip sampling rule. // Feed the same data into both models then add extra data // into the first model we know will be filtered out. @@ -1436,9 +1412,8 @@ void CEventRatePopulationModelTest::testIgnoreSamplingGivenDetectionRules() { // Checksums will be different because a model is created for attribute a3 CPPUNIT_ASSERT(modelWithSkip->checksum() != modelNoSkip->checksum()); - CAnomalyDetectorModel::CModelDetailsViewPtr modelWithSkipView = - modelWithSkip->details(); - CAnomalyDetectorModel::CModelDetailsViewPtr modelNoSkipView = modelNoSkip->details(); + auto modelWithSkipView = modelWithSkip->details(); + auto modelNoSkipView = modelNoSkip->details(); // but the underlying models for attributes a1 and a2 are the same uint64_t withSkipChecksum = diff --git a/lib/model/unittest/CHierarchicalResultsTest.cc b/lib/model/unittest/CHierarchicalResultsTest.cc index bd24650304..2981588303 100644 --- a/lib/model/unittest/CHierarchicalResultsTest.cc +++ b/lib/model/unittest/CHierarchicalResultsTest.cc @@ -428,8 +428,6 @@ void addResult(int detector, } // unnamed:: void CHierarchicalResultsTest::testBreadthFirstVisit() { - LOG_DEBUG(<< "*** testBreadthFirstVisit ***"); - model::CHierarchicalResults results; // Three partitioning fields PART1, PART2, PART3: @@ -493,8 +491,6 @@ void CHierarchicalResultsTest::testBreadthFirstVisit() { } void CHierarchicalResultsTest::testDepthFirstVisit() { - LOG_DEBUG(<< "*** testDepthFirstVisit ***"); - model::CHierarchicalResults results; // Three partitioning fields PART1, PART2, PART3: @@ -599,8 +595,6 @@ const std::string p35("p35"); } // unnamed:: void CHierarchicalResultsTest::testBuildHierarchy() { - LOG_DEBUG(<< "*** testBuildHierarchy ***"); - static const std::string FUNC("mean"); static const ml::model::function_t::EFunction function( ml::model::function_t::E_IndividualMetricMean); @@ -734,8 +728,6 @@ void CHierarchicalResultsTest::testBuildHierarchy() { } void CHierarchicalResultsTest::testBuildHierarchyGivenPartitionsWithSinglePersonFieldValue() { - LOG_DEBUG(<< "*** testBuildHierarchyGivenPartitionsWithSinglePersonFieldValue ***"); - static const std::string FUNC("mean"); static const ml::model::function_t::EFunction function( ml::model::function_t::E_IndividualMetricMean); @@ -792,8 +784,6 @@ void CHierarchicalResultsTest::testBuildHierarchyGivenPartitionsWithSinglePerson } void CHierarchicalResultsTest::testBasicVisitor() { - LOG_DEBUG(<< "*** testBasicVisitor ***"); - static const std::string FUNC("max"); static const ml::model::function_t::EFunction function(ml::model::function_t::E_IndividualMetricMax); @@ -1008,8 +998,6 @@ void CHierarchicalResultsTest::testBasicVisitor() { } void CHierarchicalResultsTest::testAggregator() { - LOG_DEBUG(<< "*** testAggregator ***"); - using TAnnotatedProbabilityVec = std::vector; model::CAnomalyDetectorModelConfig modelConfig = @@ -1182,8 +1170,6 @@ void CHierarchicalResultsTest::testAggregator() { } void CHierarchicalResultsTest::testInfluence() { - LOG_DEBUG(<< "*** testInfluence ***"); - model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(); model::CHierarchicalResultsAggregator aggregator(modelConfig); @@ -1345,8 +1331,6 @@ void CHierarchicalResultsTest::testInfluence() { } void CHierarchicalResultsTest::testScores() { - LOG_DEBUG(<< "*** testScores ***"); - model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(); model::CLimits limits; @@ -1476,8 +1460,6 @@ void CHierarchicalResultsTest::testScores() { } void CHierarchicalResultsTest::testWriter() { - LOG_DEBUG(<< "*** testWriter ***"); - model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(); model::CLimits limits; @@ -1552,8 +1534,6 @@ void CHierarchicalResultsTest::testWriter() { } void CHierarchicalResultsTest::testNormalizer() { - LOG_DEBUG(<< "*** testNormalizer ***"); - using TNormalizerPtr = std::shared_ptr; using TStrNormalizerPtrMap = std::map; using TStrNormalizerPtrMapItr = TStrNormalizerPtrMap::iterator; @@ -1765,8 +1745,6 @@ void CHierarchicalResultsTest::testNormalizer() { } void CHierarchicalResultsTest::testDetectorEqualizing() { - LOG_DEBUG(<< "*** testDetectorEqualizing ***"); - model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(); test::CRandomNumbers rng; diff --git a/lib/model/unittest/CMemoryUsageEstimatorTest.cc b/lib/model/unittest/CMemoryUsageEstimatorTest.cc index 0ff6c33934..b0fa8c16b0 100644 --- a/lib/model/unittest/CMemoryUsageEstimatorTest.cc +++ b/lib/model/unittest/CMemoryUsageEstimatorTest.cc @@ -45,8 +45,6 @@ CMemoryUsageEstimator::TOptionalSize estimate(CMemoryUsageEstimator& estimator, } void CMemoryUsageEstimatorTest::testEstimateLinear() { - LOG_DEBUG(<< "Running estimator test estimate linear"); - CMemoryUsageEstimator estimator; // Pscale = 54 @@ -99,8 +97,6 @@ void CMemoryUsageEstimatorTest::testEstimateLinear() { } void CMemoryUsageEstimatorTest::testEstimateNonlinear() { - LOG_DEBUG(<< "Running estimator test estimate non-linear"); - { // intercept = 356 // Pscale = 54 @@ -156,8 +152,6 @@ void CMemoryUsageEstimatorTest::testEstimateNonlinear() { } void CMemoryUsageEstimatorTest::testPersist() { - LOG_DEBUG(<< "Running estimator test persist"); - CMemoryUsageEstimator origEstimator; { std::string origXml; diff --git a/lib/model/unittest/CMetricDataGathererTest.cc b/lib/model/unittest/CMetricDataGathererTest.cc index 1272ec28cf..94e87ea28b 100644 --- a/lib/model/unittest/CMetricDataGathererTest.cc +++ b/lib/model/unittest/CMetricDataGathererTest.cc @@ -163,8 +163,6 @@ const std::string EMPTY_STRING; } void CMetricDataGathererTest::singleSeriesTests() { - LOG_DEBUG(<< "*** CMetricDataGathererTest::singleSeriesTests ***"); - // Test that the various statistics come back as we suspect. const core_t::TTime startTime = 0; @@ -428,8 +426,6 @@ void CMetricDataGathererTest::singleSeriesTests() { } void CMetricDataGathererTest::multipleSeriesTests() { - LOG_DEBUG(<< "*** CMetricDataGathererTest::multipleSeriesTests ***"); - // Test that the various statistics come back as we suspect // for multiple people. @@ -660,8 +656,6 @@ void CMetricDataGathererTest::multipleSeriesTests() { } void CMetricDataGathererTest::testSampleCount() { - LOG_DEBUG(<< "*** CMetricDataGathererTest::testSampleCount ***"); - // Test that we set sensible sample counts for each person. // Person 1 has constant update rate of 4 values per bucket. @@ -732,8 +726,6 @@ void CMetricDataGathererTest::testSampleCount() { } void CMetricDataGathererTest::testRemovePeople() { - LOG_DEBUG(<< "*** CMetricDataGathererTest::testRemovePeople ***"); - // Test various combinations of removed people. const core_t::TTime startTime = 0; @@ -886,8 +878,6 @@ void CMetricDataGathererTest::testRemovePeople() { } void CMetricDataGathererTest::testSum() { - LOG_DEBUG(<< "*** CMetricDataGathererTest::testSum ***"); - // Test sum and non-zero sum work as expected. const core_t::TTime bucketLength = 600; @@ -982,8 +972,6 @@ void CMetricDataGathererTest::testSum() { } void CMetricDataGathererTest::singleSeriesOutOfOrderTests() { - LOG_DEBUG(<< "*** CMetricDataGathererTest::singleSeriesOutOfOrderTests ***"); - // Test that the various statistics come back as we suspect. const core_t::TTime startTime = 0; @@ -1178,8 +1166,6 @@ void CMetricDataGathererTest::singleSeriesOutOfOrderTests() { } void CMetricDataGathererTest::testResetBucketGivenSingleSeries() { - LOG_DEBUG(<< "*** CMetricDataGathererTest::testResetBucketGivenSingleSeries ***"); - const core_t::TTime startTime = 0; const core_t::TTime bucketLength = 600; SModelParams params(bucketLength); @@ -1293,8 +1279,6 @@ void CMetricDataGathererTest::testResetBucketGivenSingleSeries() { } void CMetricDataGathererTest::testResetBucketGivenMultipleSeries() { - LOG_DEBUG(<< "*** CMetricDataGathererTest::testResetBucketGivenMultipleSeries ***"); - const core_t::TTime startTime = 0; const core_t::TTime bucketLength = 600; SModelParams params(bucketLength); @@ -1535,8 +1519,6 @@ void CMetricDataGathererTest::testResetBucketGivenMultipleSeries() { } void CMetricDataGathererTest::testInfluenceStatistics() { - LOG_DEBUG(<< "*** CMetricDataGathererTest::testInfluenceStatistics ***"); - using TTimeDoubleStrStrTuple = boost::tuple; using TDoubleDoublePr = std::pair; diff --git a/lib/model/unittest/CMetricModelTest.cc b/lib/model/unittest/CMetricModelTest.cc index ccd0fa55b7..bf16e42f2d 100644 --- a/lib/model/unittest/CMetricModelTest.cc +++ b/lib/model/unittest/CMetricModelTest.cc @@ -68,9 +68,8 @@ using TOptionalStr = boost::optional; using TTimeDoublePr = std::pair; using TOptionalTimeDoublePr = boost::optional; using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; -using TMinAccumulator = maths::CBasicStatistics::COrderStatisticsStack; -using TMaxAccumulator = - maths::CBasicStatistics::COrderStatisticsStack>; +using TMinAccumulator = maths::CBasicStatistics::SMin::TAccumulator; +using TMaxAccumulator = maths::CBasicStatistics::SMax::TAccumulator; using TMathsModelPtr = std::shared_ptr; using TPriorPtr = std::shared_ptr; using TMultivariatePriorPtr = std::shared_ptr; @@ -272,14 +271,10 @@ void processBucket(core_t::TTime time, partitioningFields, 1, probability2); } -const maths_t::TWeightStyleVec COUNT_WEIGHT(1, maths_t::E_SampleCountWeight); -const TDouble4Vec1Vec UNIT_WEIGHT(1, TDouble4Vec(1, 1.0)); const TSizeDoublePr1Vec NO_CORRELATES; } void CMetricModelTest::testSample() { - LOG_DEBUG(<< "*** testSample ***"); - core_t::TTime startTime(45); core_t::TTime bucketLength(5); SModelParams params(bucketLength); @@ -387,13 +382,12 @@ void CMetricModelTest::testSample() { << core::CContainerPrinter::print(expectedMinSamples) << ", max samples = " << core::CContainerPrinter::print(expectedMaxSamples)); - maths::CModelAddSamplesParams::TDouble2Vec4VecVec weights( - numberSamples, maths::CConstantWeights::unit(1)); + maths::CModelAddSamplesParams::TDouble2VecWeightsAryVec weights( + numberSamples, maths_t::CUnitWeights::unit(1)); maths::CModelAddSamplesParams params_; params_.integer(false) .nonNegative(true) .propagationInterval(1.0) - .weightStyles(COUNT_WEIGHT) .trendWeights(weights) .priorWeights(weights); @@ -532,9 +526,6 @@ void CMetricModelTest::testSample() { } void CMetricModelTest::testMultivariateSample() { - LOG_DEBUG(<< "*** testMultivariateSample ***"); - - using TDoubleVecVecVec = std::vector; using TVector2 = maths::CVectorNx1; using TMean2Accumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; using TTimeDouble2AryPr = std::pair>; @@ -553,8 +544,8 @@ void CMetricModelTest::testMultivariateSample() { {202, 1.3, 1.1}, {204, 1.5, 1.8}}; TTimeDouble2AryPrVec data; for (std::size_t i = 0u; i < boost::size(data_); ++i) { - boost::array values = {{data_[i][1], data_[i][2]}}; - data.push_back(TTimeDouble2AryPr(static_cast(data_[i][0]), values)); + boost::array value = {{data_[i][1], data_[i][2]}}; + data.emplace_back(static_cast(data_[i][0]), value); } unsigned int sampleCounts[] = {2u, 1u}; @@ -581,7 +572,7 @@ void CMetricModelTest::testMultivariateSample() { TMean2Accumulator expectedLatLongSample; std::size_t numberSamples = 0; TDoubleVecVec expectedLatLongSamples; - TMultivariatePriorPtr expectedMeanPrior = + TMultivariatePriorPtr expectedPrior = factory.defaultMultivariatePrior(model_t::E_IndividualMeanLatLongByPerson); std::size_t j = 0; @@ -617,53 +608,54 @@ void CMetricModelTest::testMultivariateSample() { expectedLatLongSamples.end()); LOG_DEBUG(<< "Adding mean samples = " << core::CContainerPrinter::print(expectedLatLongSamples)); - expectedMeanPrior->dataType(maths_t::E_ContinuousData); - expectedMeanPrior->addSamples( - COUNT_WEIGHT, expectedLatLongSamples, - TDoubleVecVecVec(expectedLatLongSamples.size(), - TDoubleVecVec(1, TDoubleVec(2, 1.0)))); + expectedPrior->dataType(maths_t::E_ContinuousData); + expectedPrior->addSamples( + expectedLatLongSamples, + maths_t::TDouble10VecWeightsAry1Vec( + expectedLatLongSamples.size(), + maths_t::CUnitWeights::unit(2))); numberSamples = 0u; expectedLatLongSamples.clear(); } model_t::CResultType type(model_t::CResultType::E_Unconditional | model_t::CResultType::E_Final); - TOptionalUInt64 currentCount = model.currentBucketCount(0, time); + TOptionalUInt64 count = model.currentBucketCount(0, time); TDouble1Vec bucketLatLong = model.currentBucketValue( model_t::E_IndividualMeanLatLongByPerson, 0, 0, time); TDouble1Vec baselineLatLong = model.baselineBucketMean(model_t::E_IndividualMeanLatLongByPerson, 0, 0, type, NO_CORRELATES, time); + TDouble1Vec featureLatLong = multivariateFeatureData( + model, model_t::E_IndividualMeanLatLongByPerson, 0, time); + const auto& prior = + dynamic_cast( + model.details()->model(model_t::E_IndividualMeanLatLongByPerson, 0)) + ->residualModel(); - LOG_DEBUG(<< "bucket count = " - << core::CContainerPrinter::print(currentCount)); - LOG_DEBUG(<< "current bucket mean = " - << core::CContainerPrinter::print(bucketLatLong) << ", expected baseline bucket mean = " - << maths::CBasicStatistics::mean(expectedBaselineLatLong) << ", baseline bucket mean = " + LOG_DEBUG(<< "bucket count = " << core::CContainerPrinter::print(count)); + LOG_DEBUG(<< "current = " << core::CContainerPrinter::print(bucketLatLong) + << "expected baseline = " + << maths::CBasicStatistics::mean(expectedBaselineLatLong) << "actual baseline = " << core::CContainerPrinter::print(baselineLatLong)); - CPPUNIT_ASSERT(currentCount); - CPPUNIT_ASSERT_EQUAL(expectedCount, *currentCount); + CPPUNIT_ASSERT(count); + CPPUNIT_ASSERT_EQUAL(expectedCount, *count); TDouble1Vec latLong; if (maths::CBasicStatistics::count(expectedLatLong) > 0.0) { latLong.push_back(maths::CBasicStatistics::mean(expectedLatLong)(0)); latLong.push_back(maths::CBasicStatistics::mean(expectedLatLong)(1)); } - CPPUNIT_ASSERT(latLong == bucketLatLong); + CPPUNIT_ASSERT_EQUAL(latLong, bucketLatLong); if (!baselineLatLong.empty()) { baselineLatLongError.add(maths::fabs( TVector2(baselineLatLong) - maths::CBasicStatistics::mean(expectedBaselineLatLong))); } - CPPUNIT_ASSERT(latLong == multivariateFeatureData(model, model_t::E_IndividualMeanLatLongByPerson, - 0, time)); - CPPUNIT_ASSERT_EQUAL( - expectedMeanPrior->checksum(), - dynamic_cast( - model.details()->model(model_t::E_IndividualMeanLatLongByPerson, 0)) - ->residualModel() - .checksum()); + + CPPUNIT_ASSERT_EQUAL(latLong, featureLatLong); + CPPUNIT_ASSERT_EQUAL(expectedPrior->checksum(), prior.checksum()); // Test persistence. (We check for idempotency.) std::string origXml; @@ -715,8 +707,6 @@ void CMetricModelTest::testMultivariateSample() { } void CMetricModelTest::testProbabilityCalculationForMetric() { - LOG_DEBUG(<< "*** testProbabilityCalculationForMetric ***"); - core_t::TTime startTime(0); core_t::TTime bucketLength(10); SModelParams params(bucketLength); @@ -779,8 +769,6 @@ void CMetricModelTest::testProbabilityCalculationForMetric() { } void CMetricModelTest::testProbabilityCalculationForMedian() { - LOG_DEBUG(<< "*** testProbabilityCalculationForMedian ***"); - core_t::TTime startTime(0); core_t::TTime bucketLength(10); SModelParams params(bucketLength); @@ -855,8 +843,6 @@ void CMetricModelTest::testProbabilityCalculationForMedian() { } void CMetricModelTest::testProbabilityCalculationForLowMean() { - LOG_DEBUG(<< "*** testProbabilityCalculationForLowMean ***"); - core_t::TTime startTime(0); core_t::TTime bucketLength(10); SModelParams params(bucketLength); @@ -919,8 +905,6 @@ void CMetricModelTest::testProbabilityCalculationForLowMean() { } void CMetricModelTest::testProbabilityCalculationForHighMean() { - LOG_DEBUG(<< "*** testProbabilityCalculationForHighMean ***"); - core_t::TTime startTime(0); core_t::TTime bucketLength(10); SModelParams params(bucketLength); @@ -981,8 +965,6 @@ void CMetricModelTest::testProbabilityCalculationForHighMean() { } void CMetricModelTest::testProbabilityCalculationForLowSum() { - LOG_DEBUG(<< "*** testProbabilityCalculationForLowSum ***"); - core_t::TTime startTime(0); core_t::TTime bucketLength(10); SModelParams params(bucketLength); @@ -1042,8 +1024,6 @@ void CMetricModelTest::testProbabilityCalculationForLowSum() { } void CMetricModelTest::testProbabilityCalculationForHighSum() { - LOG_DEBUG(<< "*** testProbabilityCalculationForLowSum ***"); - core_t::TTime startTime(0); core_t::TTime bucketLength(10); SModelParams params(bucketLength); @@ -1103,14 +1083,10 @@ void CMetricModelTest::testProbabilityCalculationForHighSum() { } void CMetricModelTest::testProbabilityCalculationForLatLong() { - LOG_DEBUG(<< "*** testProbabilityCalculationForLatLong ***"); - // TODO } void CMetricModelTest::testInfluence() { - LOG_DEBUG(<< "*** testInfluence ***"); - using TStrDoubleDoubleTr = core::CTriple; using TStrDoubleDoubleTrVec = std::vector; using TStrDoubleDoubleTrVecVec = std::vector; @@ -1354,14 +1330,10 @@ void CMetricModelTest::testInfluence() { } void CMetricModelTest::testLatLongInfluence() { - LOG_DEBUG(<< "*** testLatLongInfluence ***"); - // TODO } void CMetricModelTest::testPrune() { - LOG_DEBUG(<< "*** testPrune ***"); - maths::CSampling::CScopeMockRandomNumberGenerator scopeMockRng; using TSizeVec = std::vector; @@ -1569,8 +1541,6 @@ void CMetricModelTest::testKey() { } void CMetricModelTest::testSkipSampling() { - LOG_DEBUG(<< "*** testSkipSampling ***"); - core_t::TTime startTime(100); core_t::TTime bucketLength(100); SModelParams params(bucketLength); @@ -1659,8 +1629,6 @@ void CMetricModelTest::testSkipSampling() { } void CMetricModelTest::testExplicitNulls() { - LOG_DEBUG(<< "*** testExplicitNulls ***"); - core_t::TTime startTime(100); core_t::TTime bucketLength(100); SModelParams params(bucketLength); @@ -1756,8 +1724,6 @@ void CMetricModelTest::testExplicitNulls() { } void CMetricModelTest::testVarp() { - LOG_DEBUG(<< "*** testVarp ***"); - core_t::TTime startTime(500000); core_t::TTime bucketLength(1000); SModelParams params(bucketLength); @@ -1884,8 +1850,6 @@ void CMetricModelTest::testVarp() { } void CMetricModelTest::testInterimCorrections() { - LOG_DEBUG(<< "*** testInterimCorrections ***"); - core_t::TTime startTime(3600); core_t::TTime bucketLength(3600); SModelParams params(bucketLength); @@ -1976,8 +1940,6 @@ void CMetricModelTest::testInterimCorrections() { } void CMetricModelTest::testInterimCorrectionsWithCorrelations() { - LOG_DEBUG(<< "*** testInterimCorrectionsWithCorrelations ***"); - core_t::TTime startTime(3600); core_t::TTime bucketLength(3600); SModelParams params(bucketLength); @@ -2072,8 +2034,6 @@ void CMetricModelTest::testInterimCorrectionsWithCorrelations() { } void CMetricModelTest::testCorrelatePersist() { - LOG_DEBUG(<< "*** testCorrelatePersist ***"); - using TVector2 = maths::CVectorNx1; using TMatrix2 = maths::CSymmetricMatrixNxN; @@ -2150,8 +2110,6 @@ void CMetricModelTest::testCorrelatePersist() { } void CMetricModelTest::testSummaryCountZeroRecordsAreIgnored() { - LOG_DEBUG(<< "*** testSummaryCountZeroRecordsAreIgnored ***"); - core_t::TTime startTime(100); core_t::TTime bucketLength(100); SModelParams params(bucketLength); @@ -2220,8 +2178,6 @@ void CMetricModelTest::testSummaryCountZeroRecordsAreIgnored() { } void CMetricModelTest::testDecayRateControl() { - LOG_DEBUG(<< "*** testDecayRateControl ***"); - core_t::TTime startTime = 0; core_t::TTime bucketLength = 1800; @@ -2417,8 +2373,6 @@ void CMetricModelTest::testDecayRateControl() { } void CMetricModelTest::testProbabilityCalculationForLowMedian() { - LOG_DEBUG(<< "*** testProbabilityCalculationForLowMedian ***"); - core_t::TTime startTime(0); core_t::TTime bucketLength(10); SModelParams params(bucketLength); @@ -2481,8 +2435,6 @@ void CMetricModelTest::testProbabilityCalculationForLowMedian() { } void CMetricModelTest::testProbabilityCalculationForHighMedian() { - LOG_DEBUG(<< "*** testProbabilityCalculationForHighMedian ***"); - core_t::TTime startTime(0); core_t::TTime bucketLength(10); SModelParams params(bucketLength); @@ -2543,8 +2495,6 @@ void CMetricModelTest::testProbabilityCalculationForHighMedian() { } void CMetricModelTest::testIgnoreSamplingGivenDetectionRules() { - LOG_DEBUG(<< "*** testIgnoreSamplingGivenDetectionRules ***"); - // Create 2 models, one of which has a skip sampling rule. // Feed the same data into both models then add extra data // into the first model we know will be filtered out. diff --git a/lib/model/unittest/CMetricPopulationDataGathererTest.cc b/lib/model/unittest/CMetricPopulationDataGathererTest.cc index 647b6d608b..282e268ad2 100644 --- a/lib/model/unittest/CMetricPopulationDataGathererTest.cc +++ b/lib/model/unittest/CMetricPopulationDataGathererTest.cc @@ -135,8 +135,6 @@ const std::string EMPTY_STRING; } // unnamed:: void CMetricPopulationDataGathererTest::testMean() { - LOG_DEBUG(<< "*** CMetricPopulationDataGathererTest::testMean ***"); - // Test that we correctly sample the bucket means. using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; @@ -202,8 +200,6 @@ void CMetricPopulationDataGathererTest::testMean() { } void CMetricPopulationDataGathererTest::testMin() { - LOG_DEBUG(<< "*** CMetricPopulationDataGathererTest::testMin ***"); - // Test that we correctly sample the bucket minimums. using TMinAccumulator = maths::CBasicStatistics::COrderStatisticsStack; @@ -268,8 +264,6 @@ void CMetricPopulationDataGathererTest::testMin() { } void CMetricPopulationDataGathererTest::testMax() { - LOG_DEBUG(<< "*** CMetricPopulationDataGathererTest::testMax ***"); - // Test that we correctly sample the bucket maximums. using TMaxAccumulator = @@ -335,8 +329,6 @@ void CMetricPopulationDataGathererTest::testMax() { } void CMetricPopulationDataGathererTest::testSum() { - LOG_DEBUG(<< "*** CMetricPopulationDataGathererTest::testSum ***"); - // Test that we correctly sample the bucket sums. const core_t::TTime startTime = 1373932800; @@ -391,8 +383,6 @@ void CMetricPopulationDataGathererTest::testSum() { } void CMetricPopulationDataGathererTest::testSampleCount() { - LOG_DEBUG(<< "*** CMetricPopulationDataGathererTest::testSampleCount ***"); - // Test that we set sensible sample counts for each attribute. const core_t::TTime startTime = 1373932800; @@ -456,8 +446,6 @@ void CMetricPopulationDataGathererTest::testSampleCount() { } void CMetricPopulationDataGathererTest::testFeatureData() { - LOG_DEBUG(<< "*** CMetricPopulationDataGathererTest::testFeatureData ***"); - // Test we correctly sample the mean, minimum and maximum statistics. using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; @@ -642,8 +630,6 @@ void CMetricPopulationDataGathererTest::testFeatureData() { } void CMetricPopulationDataGathererTest::testRemovePeople() { - LOG_DEBUG(<< "*** CMetricPopulationDataGathererTest::testRemovePeople ***"); - // Check that all the state is correctly updated when some // people are removed. @@ -786,8 +772,6 @@ void CMetricPopulationDataGathererTest::testRemovePeople() { } void CMetricPopulationDataGathererTest::testRemoveAttributes() { - LOG_DEBUG(<< "*** CMetricPopulationDataGathererTest::testRemoveAttributes ***"); - // Check that all the state is correctly updated when some // attributes are removed. @@ -910,8 +894,6 @@ void CMetricPopulationDataGathererTest::testRemoveAttributes() { } void CMetricPopulationDataGathererTest::testInfluenceStatistics() { - LOG_DEBUG(<< "*** CMetricPopulationDataGathererTest::testInfluenceStatistics ***"); - using TDoubleDoublePr = std::pair; using TStrDoubleDoublePrPr = std::pair; using TStrDoubleDoublePrPrVec = std::vector; @@ -1027,8 +1009,6 @@ void CMetricPopulationDataGathererTest::testInfluenceStatistics() { } void CMetricPopulationDataGathererTest::testPersistence() { - LOG_DEBUG(<< "*** CMetricPopulationDataGathererTest::testPersistence ***"); - const core_t::TTime startTime = 1367280000; const core_t::TTime bucketLength = 3600; SModelParams params(bucketLength); @@ -1099,8 +1079,6 @@ void CMetricPopulationDataGathererTest::testPersistence() { } void CMetricPopulationDataGathererTest::testReleaseMemory() { - LOG_DEBUG(<< "*** CMetricPopulationDataGathererTest::testReleaseMemory ***"); - const core_t::TTime startTime = 1373932800; const core_t::TTime bucketLength = 3600; diff --git a/lib/model/unittest/CMetricPopulationModelTest.cc b/lib/model/unittest/CMetricPopulationModelTest.cc index eef6c32321..7b36516817 100644 --- a/lib/model/unittest/CMetricPopulationModelTest.cc +++ b/lib/model/unittest/CMetricPopulationModelTest.cc @@ -288,8 +288,6 @@ void processBucket(core_t::TTime time, } void CMetricPopulationModelTest::testBasicAccessors() { - LOG_DEBUG(<< "*** CMetricPopulationModelTest::testBasicAccessors ***"); - // Check that the correct data is read retrieved by the // basic model accessors. @@ -436,30 +434,25 @@ void CMetricPopulationModelTest::testBasicAccessors() { } void CMetricPopulationModelTest::testMinMaxAndMean() { - LOG_DEBUG(<< "*** testMinMaxAndMean ***"); - // We check that the correct data is read from the gatherer // into the model on sample. using TTimeDouble2VecSizeTr = core::CTriple; using TTimeDouble2VecSizeTrVec = std::vector; - using TDouble2Vec4Vec = core::CSmallVector; - using TDouble2Vec4VecVec = std::vector; + using TDouble2VecWeightsAry = maths_t::TDouble2VecWeightsAry; + using TDouble2VecWeightsAryVec = std::vector; using TSizeSizePrDoubleVecMap = std::map; using TSizeSizePrMeanAccumulatorUMap = std::map; using TSizeSizePrMinAccumulatorMap = std::map; using TSizeSizePrMaxAccumulatorMap = std::map; using TMathsModelPtr = std::shared_ptr; using TSizeMathsModelPtrMap = std::map; - using TTimeDouble2VecSizeTrVecDouble2Vec4VecVecPr = - std::pair; - using TSizeTimeDouble2VecSizeTrVecDouble2Vec4VecVecPrMap = - std::map; - using TSizeSizeTimeDouble2VecSizeTrVecDouble2Vec4VecVecPrMapMap = - std::map; - - static const maths_t::TWeightStyleVec WEIGHT_STYLES{ - maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight}; + using TTimeDouble2VecSizeTrVecDouble2VecWeightsAryVecPr = + std::pair; + using TSizeTimeDouble2VecSizeTrVecDouble2VecWeightsAryVecPrMap = + std::map; + using TSizeSizeTimeDouble2VecSizeTrVecDouble2VecWeightAryVecPrMapMap = + std::map; core_t::TTime startTime = 1367280000; const core_t::TTime bucketLength = 3600; @@ -503,15 +496,14 @@ void CMetricPopulationModelTest::testMinMaxAndMean() { if (message.s_Time >= startTime + bucketLength) { model->sample(startTime, startTime + bucketLength, m_ResourceMonitor); - TSizeSizeTimeDouble2VecSizeTrVecDouble2Vec4VecVecPrMapMap populationWeightedSamples; + TSizeSizeTimeDouble2VecSizeTrVecDouble2VecWeightAryVecPrMapMap populationWeightedSamples; for (std::size_t feature = 0u; feature < features.size(); ++feature) { for (const auto& samples_ : expectedSamples[feature]) { std::size_t pid = samples_.first.first; std::size_t cid = samples_.first.second; - double weight = model->sampleRateWeight(pid, cid); TTimeDouble2VecSizeTrVec& samples = populationWeightedSamples[feature][cid].first; - TDouble2Vec4VecVec& weights = + TDouble2VecWeightsAryVec& weights = populationWeightedSamples[feature][cid].second; TMathsModelPtr& model_ = expectedPopulationModels[feature][cid]; if (!model_) { @@ -525,8 +517,12 @@ void CMetricPopulationModelTest::testMinMaxAndMean() { expectedSampleTimes[{pid, cid}][j] + 0.5); TDouble2Vec sample{samples_.second[j]}; samples.emplace_back(time_, sample, pid); - weights.push_back( - {{weight}, model_->winsorisationWeight(1.0, time_, sample)}); + weights.push_back(maths_t::CUnitWeights::unit(1)); + auto& weight = weights.back(); + maths_t::setCount( + TDouble2Vec{model->sampleRateWeight(pid, cid)}, weight); + maths_t::setWinsorisationWeight( + model_->winsorisationWeight(1.0, time_, sample), weight); } } } @@ -534,13 +530,12 @@ void CMetricPopulationModelTest::testMinMaxAndMean() { for (auto& attribute : feature.second) { std::size_t cid = attribute.first; TTimeDouble2VecSizeTrVec& samples = attribute.second.first; - TDouble2Vec4VecVec& weights = attribute.second.second; + TDouble2VecWeightsAryVec& weights = attribute.second.second; maths::COrderings::simultaneousSort(samples, weights); maths::CModelAddSamplesParams params_; params_.integer(false) .nonNegative(nonNegative) .propagationInterval(1.0) - .weightStyles(WEIGHT_STYLES) .trendWeights(weights) .priorWeights(weights); expectedPopulationModels[feature.first][cid]->addSamples(params_, samples); @@ -597,8 +592,6 @@ void CMetricPopulationModelTest::testMinMaxAndMean() { } void CMetricPopulationModelTest::testVarp() { - LOG_DEBUG(<< "*** testVarp ***"); - core_t::TTime startTime(3600); core_t::TTime bucketLength(3600); SModelParams params(bucketLength); @@ -699,8 +692,6 @@ void CMetricPopulationModelTest::testVarp() { } void CMetricPopulationModelTest::testComputeProbability() { - LOG_DEBUG(<< "*** testComputeProbability ***"); - maths::CSampling::CScopeMockRandomNumberGenerator scopeMockRng; // Test that we correctly pick out synthetic the anomalies, @@ -798,8 +789,6 @@ void CMetricPopulationModelTest::testComputeProbability() { } void CMetricPopulationModelTest::testPrune() { - LOG_DEBUG(<< "*** testPrune ***"); - // This test has four people and five attributes. We expect // person 2 and attributes 1, 2 and 5 to be deleted. @@ -1002,8 +991,6 @@ void CMetricPopulationModelTest::testPrune() { } void CMetricPopulationModelTest::testKey() { - LOG_DEBUG(<< "*** testKey ***"); - function_t::EFunction countFunctions[] = { function_t::E_PopulationMetric, function_t::E_PopulationMetricMean, function_t::E_PopulationMetricMin, function_t::E_PopulationMetricMax, @@ -1038,8 +1025,6 @@ void CMetricPopulationModelTest::testKey() { } void CMetricPopulationModelTest::testFrequency() { - LOG_DEBUG(<< "*** CMetricPopulationModelTest::testFrequency ***"); - // Test we correctly compute frequencies for people and attributes. const core_t::TTime bucketLength = 600; @@ -1126,8 +1111,6 @@ void CMetricPopulationModelTest::testFrequency() { } void CMetricPopulationModelTest::testSampleRateWeight() { - LOG_DEBUG(<< "*** CMetricPopulationModelTest::testSampleRateWeight ***"); - // Test that we correctly compensate for heavy hitters. // There are 10 attributes. @@ -1247,8 +1230,6 @@ void CMetricPopulationModelTest::testSampleRateWeight() { } void CMetricPopulationModelTest::testPeriodicity() { - LOG_DEBUG(<< "*** testPeriodicity ***"); - // Create a daily periodic population and check that the // periodicity is learned and compensated (approximately). @@ -1432,8 +1413,6 @@ void CMetricPopulationModelTest::testPersistence() { } void CMetricPopulationModelTest::testIgnoreSamplingGivenDetectionRules() { - LOG_DEBUG(<< "*** testIgnoreSamplingGivenDetectionRules ***"); - // Create 2 models, one of which has a skip sampling rule. // Feed the same data into both models then add extra data // into the first model we know will be filtered out. diff --git a/lib/model/unittest/CModelDetailsViewTest.cc b/lib/model/unittest/CModelDetailsViewTest.cc index e101e03d62..1714acc287 100644 --- a/lib/model/unittest/CModelDetailsViewTest.cc +++ b/lib/model/unittest/CModelDetailsViewTest.cc @@ -31,8 +31,6 @@ const std::string EMPTY_STRING; } // unnamed void CModelDetailsViewTest::testModelPlot() { - LOG_DEBUG(<< "*** CModelDetailsViewTest::testModelPlot ***"); - using TDoubleVec = std::vector; using TStrVec = std::vector; using TMockModelPtr = std::unique_ptr; diff --git a/lib/model/unittest/CModelToolsTest.cc b/lib/model/unittest/CModelToolsTest.cc index fac6206087..342858d60e 100644 --- a/lib/model/unittest/CModelToolsTest.cc +++ b/lib/model/unittest/CModelToolsTest.cc @@ -62,8 +62,6 @@ maths::CMultimodalPrior multimodal() { } void CModelToolsTest::testFuzzyDeduplicate() { - LOG_DEBUG(<< "*** CModelToolsTest::testFuzzyDeduplicate ***"); - test::CRandomNumbers rng; TDoubleVec values; @@ -187,15 +185,12 @@ void CModelToolsTest::testFuzzyDeduplicate() { } void CModelToolsTest::testProbabilityCache() { - LOG_DEBUG(<< "*** CModelToolsTest::testProbabilityCache ***"); - using TBool2Vec = core::CSmallVector; using TSize1Vec = core::CSmallVector; using TTime2Vec = core::CSmallVector; using TTime2Vec1Vec = core::CSmallVector; using TDouble2Vec1Vec = core::CSmallVector; - using TDouble2Vec4Vec = core::CSmallVector; - using TDouble2Vec4VecVec = std::vector; + using TDouble2VecWeightsAryVec = std::vector; using TTail2Vec = core::CSmallVector; using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; @@ -207,8 +202,7 @@ void CModelToolsTest::testProbabilityCache() { test::CRandomNumbers rng; core_t::TTime time_{0}; - TDouble2Vec4Vec weight{TDouble2Vec{1.0}}; - TDouble2Vec4VecVec weights{weight}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(1)}; { TDoubleVec samples_[3]; @@ -222,11 +216,7 @@ void CModelToolsTest::testProbabilityCache() { rng.random_shuffle(samples.begin(), samples.end()); for (auto sample : samples) { maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); + params.integer(false).propagationInterval(1.0).trendWeights(weights).priorWeights(weights); model.addSamples( params, {core::make_triple(time_, TDouble2Vec(1, sample), TAG)}); } @@ -262,8 +252,7 @@ void CModelToolsTest::testProbabilityCache() { params.addCalculation(maths_t::E_TwoSided) .seasonalConfidenceInterval(0.0) .addBucketEmpty(TBool2Vec{false}) - .weightStyles(maths::CConstantWeights::COUNT) - .addWeights(weight); + .addWeights(weights[0]); double expectedProbability; TTail2Vec expectedTail; bool conditional; @@ -305,8 +294,7 @@ void CModelToolsTest::testProbabilityCache() { params.addCalculation(maths_t::E_TwoSided) .seasonalConfidenceInterval(0.0) .addBucketEmpty(TBool2Vec{false}) - .weightStyles(maths::CConstantWeights::COUNT) - .addWeights(weight); + .addWeights(weights[0]); double expectedProbability; TTail2Vec expectedTail; bool conditional; diff --git a/lib/model/unittest/CProbabilityAndInfluenceCalculatorTest.cc b/lib/model/unittest/CProbabilityAndInfluenceCalculatorTest.cc index 9dc40c3a12..b8ece3b15f 100644 --- a/lib/model/unittest/CProbabilityAndInfluenceCalculatorTest.cc +++ b/lib/model/unittest/CProbabilityAndInfluenceCalculatorTest.cc @@ -39,11 +39,9 @@ using TTimeVec = std::vector; using TBool2Vec = core::CSmallVector; using TDouble1Vec = core::CSmallVector; using TDouble2Vec = core::CSmallVector; -using TDouble4Vec = core::CSmallVector; +using TDouble10Vec = core::CSmallVector; using TDouble2Vec1Vec = core::CSmallVector; -using TDouble2Vec4Vec = core::CSmallVector; -using TDouble4Vec1Vec = core::CSmallVector; -using TDouble2Vec4VecVec = std::vector; +using TDouble2VecWeightsAryVec = std::vector; using TSize1Vec = core::CSmallVector; using TSize10Vec = core::CSmallVector; using TTail2Vec = core::CSmallVector; @@ -69,17 +67,16 @@ using TStoredStringPtrStoredStringPtrPrDoublePrVec = using TInfluenceCalculatorCPtr = std::shared_ptr; TDouble1VecDoublePr make_pair(double first, double second) { - return TDouble1VecDoublePr{TDouble1Vec{first}, second}; + return TDouble1VecDoublePr{{first}, second}; } TDouble1VecDoublePr make_pair(double first1, double first2, double second) { - return TDouble1VecDoublePr{TDouble1Vec{first1, first2}, second}; + return TDouble1VecDoublePr{{first1, first2}, second}; } -//TDouble1VecDouble1VecPr make_pair(double first1, double first2, double second1, double second2) -//{ -// return TDouble1VecDouble1VecPr{TDouble1Vec{first1, first2}, TDouble1Vec{second1, second2}}; -//} +TDouble1VecDouble1VecPr make_pair(double first1, double first2, double second1, double second2) { + return TDouble1VecDouble1VecPr{{first1, first2}, {second1, second2}}; +} maths::CModelParams params(core_t::TTime bucketLength) { double learnRate{static_cast(bucketLength) / 1800.0}; @@ -109,17 +106,13 @@ TTimeDouble2VecSizeTr sample(core_t::TTime time, const TDoubleVec& sample) { template core_t::TTime addSamples(core_t::TTime bucketLength, const SAMPLES& samples, maths::CModel& model) { - TDouble2Vec4VecVec weights{ - maths::CConstantWeights::unit(dimension(samples[0]))}; + TDouble2VecWeightsAryVec weights{ + maths_t::CUnitWeights::unit(dimension(samples[0]))}; maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); + params.integer(false).propagationInterval(1.0).trendWeights(weights).priorWeights(weights); core_t::TTime time{0}; for (const auto& sample_ : samples) { - model.addSamples(params, TTimeDouble2VecSizeTrVec{sample(time, sample_)}); + model.addSamples(params, {sample(time, sample_)}); time += bucketLength; } return time; @@ -131,16 +124,15 @@ void computeProbability(core_t::TTime time, const maths::CModel& model, double& probablity, TTail2Vec& tail) { - TDouble2Vec4Vec weight{model.seasonalWeight(0.0, time)}; + maths_t::TDouble2VecWeightsAry weight( + maths_t::CUnitWeights::unit(sample.size())); + maths_t::setSeasonalVarianceScale(model.seasonalWeight(0.0, time), weight); maths::CModelProbabilityParams params; - params.addCalculation(calculation) - .addBucketEmpty(TBool2Vec{false}) - .weightStyles(maths::CConstantWeights::SEASONAL_VARIANCE) - .addWeights(weight); + params.addCalculation(calculation).addBucketEmpty(TBool2Vec{false}).addWeights(weight); bool conditional; TSize1Vec mostAnomalousCorrelate; - model.probability(params, TTime2Vec1Vec{TTime2Vec{time}}, TDouble2Vec1Vec{sample}, - probablity, tail, conditional, mostAnomalousCorrelate); + model.probability(params, {{time}}, {sample}, probablity, tail, conditional, + mostAnomalousCorrelate); } const std::string I("I"); @@ -161,18 +153,16 @@ void computeInfluences(CALCULATOR& calculator, const std::string& influencerName, const TStrCRefDouble1VecDoublePrPrVec& influencerValues, TStoredStringPtrStoredStringPtrPrDoublePrVec& result) { - maths_t::TWeightStyleVec weightStyles; - weightStyles.push_back(maths_t::E_SampleSeasonalVarianceScaleWeight); - weightStyles.push_back(maths_t::E_SampleCountVarianceScaleWeight); model::CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); - TDouble2Vec4Vec weights{model.seasonalWeight(0.0, time), TDouble2Vec{1.0}}; + maths_t::TDouble2VecWeightsAry weight(maths_t::CUnitWeights::unit(1)); + maths_t::setSeasonalVarianceScale(model.seasonalWeight(0.0, time), weight); model::CProbabilityAndInfluenceCalculator::SParams params(partitioningFields); params.s_Feature = feature; params.s_Model = &model; params.s_Time = TTime2Vec1Vec{TTimeVec{time}}; params.s_Value = TDouble2Vec1Vec{TDoubleVec{value}}; params.s_Count = count; - params.s_ComputeProbabilityParams.weightStyles(weightStyles).addWeights(weights); + params.s_ComputeProbabilityParams.addWeights(weight); params.s_Probability = probability; params.s_Tail = tail; params.s_InfluencerName = model::CStringStore::influencers().get(influencerName); @@ -194,9 +184,6 @@ void computeInfluences(CALCULATOR& calculator, const std::string& influencerName, const TStrCRefDouble1VecDouble1VecPrPrVec& influencerValues, TStoredStringPtrStoredStringPtrPrDoublePrVec& result) { - maths_t::TWeightStyleVec weightStyles; - weightStyles.push_back(maths_t::E_SampleSeasonalVarianceScaleWeight); - weightStyles.push_back(maths_t::E_SampleCountVarianceScaleWeight); model::CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); TTime2Vec times_(×[0], ×[2]); TDouble2Vec values_(&values[0], &values[2]); @@ -208,12 +195,8 @@ void computeInfluences(CALCULATOR& calculator, params.s_Times.push_back(times_); params.s_Values.push_back(values_); params.s_Counts.push_back(counts_); - params.s_ComputeProbabilityParams.weightStyles(weightStyles); - //for (auto &weight : weights) - //{ - // weight.resize(weightStyles.size(), TDouble2Vec(2, 1.0)); - // params.s_ComputeProbabilityParams.addWeights(weight); - //} + params.s_ComputeProbabilityParams.addWeights( + maths_t::CUnitWeights::unit(2)); params.s_Probability = probability; params.s_Tail = tail; params.s_MostAnomalousCorrelate.push_back(0); @@ -230,9 +213,6 @@ void testProbabilityAndGetInfluences(model_t::EFeature feature, const TDoubleVecVec& values, const TStrCRefDouble1VecDoublePrPrVecVec& influencerValues, TStoredStringPtrStoredStringPtrPrDoublePrVec& influences) { - maths_t::TWeightStyleVec weightStyles; - weightStyles.push_back(maths_t::E_SampleSeasonalVarianceScaleWeight); - weightStyles.push_back(maths_t::E_SampleCountVarianceScaleWeight); model::CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); model::CProbabilityAndInfluenceCalculator calculator(0.3); @@ -248,7 +228,10 @@ void testProbabilityAndGetInfluences(model_t::EFeature feature, std::size_t dimension{values[i].size() - 1}; TTime2Vec1Vec time{TTime2Vec{time_}}; TDouble2Vec1Vec value{TDouble2Vec(&values[i][0], &values[i][dimension])}; - TDouble2Vec4Vec weight(2, TDouble2Vec(dimension, values[i][dimension])); + maths_t::TDouble2VecWeightsAry weight( + maths_t::CUnitWeights::unit(dimension)); + maths_t::setSeasonalVarianceScale(TDouble2Vec(dimension, values[i][dimension]), weight); + maths_t::setCountVarianceScale(TDouble2Vec(dimension, values[i][dimension]), weight); double count{0.0}; for (const auto& influence : influencerValues[i]) { count += influence.second.second; @@ -258,7 +241,6 @@ void testProbabilityAndGetInfluences(model_t::EFeature feature, params_.addCalculation(model_t::probabilityCalculation(feature)) .seasonalConfidenceInterval(0.0) .addBucketEmpty(TBool2Vec{false}) - .weightStyles(weightStyles) .addWeights(weight); double p = 0.0; @@ -298,8 +280,6 @@ void testProbabilityAndGetInfluences(model_t::EFeature feature, } void CProbabilityAndInfluenceCalculatorTest::testInfluenceUnavailableCalculator() { - LOG_DEBUG(<< "*** testInfluenceUnavailableCalculator ***"); - test::CRandomNumbers rng; core_t::TTime bucketLength{1800}; @@ -325,58 +305,52 @@ void CProbabilityAndInfluenceCalculatorTest::testInfluenceUnavailableCalculator( TStoredStringPtrStoredStringPtrPrDoublePrVec influences; computeInfluences(calculator, model_t::E_IndividualLowCountsByBucketAndPerson, model, 0 /*time*/, 15.0 /*value*/, 1.0 /*count*/, - 0.001 /*probability*/, TTail2Vec{maths_t::E_RightTail}, - I, influencerValues, influences); + 0.001 /*probability*/, {maths_t::E_RightTail}, I, + influencerValues, influences); LOG_DEBUG(<< "influences = " << core::CContainerPrinter::print(influences)); CPPUNIT_ASSERT(influences.empty()); } - /*{ + { LOG_DEBUG(<< "Test correlated"); model::CInfluenceUnavailableCalculator calculator; maths::CTimeSeriesDecomposition trend{0.0, 600}; maths::CMultivariateNormalConjugate<2> prior{ - maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData, 0.0)}; - maths::CMultivariateTimeSeriesModel model{params(600), 0, trend, prior}; + maths::CMultivariateNormalConjugate<2>::nonInformativePrior( + maths_t::E_ContinuousData, 0.0)}; + maths::CMultivariateTimeSeriesModel model{params(600), trend, prior}; TDoubleVec samples_; rng.generateNormalSamples(10.0, 1.0, 50, samples_); - TDouble10Vec1Vec samples; - for (std::size_t i = 0u; i < samples_.size(); ++i) - { - samples.push_back(TDouble10Vec(2, samples_[i])); + for (std::size_t i = 0u; i < samples_.size(); ++i) { + prior.addSamples({TDouble10Vec(2, samples_[i])}, + maths_t::CUnitWeights::singleUnit(2)); } - TDouble10Vec4Vec1Vec weights(samples.size(), TDouble10Vec4Vec(1, TDouble10Vec(2, 1.0))); - prior->addSamples(COUNT_WEIGHT, samples, weights); core_t::TTime times[] = {0, 0}; - double values[] = {15.0, 15.0}; - double counts[] = {1.0, 1.0}; + double values[]{15.0, 15.0}; + double counts[]{1.0, 1.0}; TStrCRefDouble1VecDouble1VecPrPrVec influencerValues; influencerValues.emplace_back(TStrCRef(i1), make_pair(11.0, 11.0, 1.0, 1.0)); influencerValues.emplace_back(TStrCRef(i2), make_pair(11.0, 11.0, 1.0, 1.0)); influencerValues.emplace_back(TStrCRef(i3), make_pair(15.0, 15.0, 1.0, 1.0)); TStoredStringPtrStoredStringPtrPrDoublePrVec influences; - computeInfluences(calculator, - model_t::E_IndividualLowCountsByBucketAndPerson, model, - times, values, TDouble10Vec4Vec1Vec{TDouble10Vec4Vec{TDouble10Vec{1.0}}}, counts, - 0.1probability, maths_t::E_RightTail, 0, I, influencerValues, influences); + computeInfluences(calculator, model_t::E_IndividualLowCountsByBucketAndPerson, + model, times, values, counts, 0.1 /*probability*/, + TTail2Vec(2, maths_t::E_RightTail), I, + influencerValues, influences); LOG_DEBUG(<< "influences = " << core::CContainerPrinter::print(influences)); CPPUNIT_ASSERT(influences.empty()); - }*/ + } } void CProbabilityAndInfluenceCalculatorTest::testLogProbabilityComplementInfluenceCalculator() { - LOG_DEBUG(<< "*** testLogProbabilityComplementInfluenceCalculator ***"); - test::CRandomNumbers rng; - maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleSeasonalVarianceScaleWeight); - model::CLogProbabilityComplementInfluenceCalculator calculator; core_t::TTime bucketLength{600}; @@ -494,16 +468,19 @@ void CProbabilityAndInfluenceCalculatorTest::testLogProbabilityComplementInfluen } } } - /*{ + { LOG_DEBUG(<< "Test correlated"); - double counts[] = {1.0, 1.0}; + double counts[]{1.0, 1.0}; { LOG_DEBUG(<< "One influencer value"); - maths::CMultivariateNormalConjugateFactory::TPriorPtr prior = - maths::CMultivariateNormalConjugateFactory::nonInformative(2, maths_t::E_ContinuousData, 0.0); + maths::CTimeSeriesDecomposition trend{0.0, 600}; + maths::CMultivariateNormalConjugate<2> prior{ + maths::CMultivariateNormalConjugate<2>::nonInformativePrior( + maths_t::E_ContinuousData, 0.0)}; + maths::CMultivariateTimeSeriesModel model{params(600), trend, prior}; TDoubleVec mean(2, 10.0); TDoubleVecVec covariances(2, TDoubleVec(2)); @@ -511,44 +488,38 @@ void CProbabilityAndInfluenceCalculatorTest::testLogProbabilityComplementInfluen covariances[0][1] = covariances[1][0] = 4.0; TDoubleVecVec samples_; rng.generateMultivariateNormalSamples(mean, covariances, 50, samples_); - TDouble10Vec1Vec samples; - for (std::size_t i = 0u; i < samples_.size(); ++i) - { - samples.push_back(samples_[i]); + for (std::size_t i = 0u; i < samples_.size(); ++i) { + prior.addSamples({TDouble10Vec(samples_[i])}, + maths_t::CUnitWeights::singleUnit(2)); } - TDouble10Vec4Vec1Vec weights(samples.size(), TDouble10Vec4Vec(1, TDouble10Vec(2, 1.0))); - prior->addSamples(COUNT_WEIGHT, samples, weights); - core_t::TTime times[] = {0, 0}; - double values[] = {15.0, 15.0}; - double vs[] = {1.0, 1.0}; + core_t::TTime times[]{0, 0}; + double values[]{15.0, 15.0}; double lb, ub; TTail10Vec tail; - TDouble10Vec1Vec sample(1, TDouble10Vec(&values[0], &values[2])); - TDouble10Vec4Vec1Vec weight(1, TDouble10Vec4Vec(1, TDouble10Vec(&vs[0], &vs[2]))); - prior->probabilityOfLessLikelySamples(maths_t::E_TwoSided, - weightStyle, - sample, - weight, - lb, ub, tail); + prior.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, {TDouble10Vec(&values[0], &values[2])}, + maths_t::CUnitWeights::singleUnit(2), lb, ub, tail); TStrCRefDouble1VecDouble1VecPrPrVec influencerValues; - influencerValues.push_back(TStrCRefDouble1VecDouble1VecPrPr(TStrCRef(i1), make_pair(15.0, 15.0, 1.0, 1.0))); + influencerValues.push_back({TStrCRef(i1), make_pair(15.0, 15.0, 1.0, 1.0)}); TStoredStringPtrStoredStringPtrPrDoublePrVec influences; - computeInfluences(calculator, - model_t::E_IndividualCountByBucketAndPerson, TDecompositionCPtr1Vec(), *prior, - times, values, weight, counts, - 0.5*(lb+ub), tail, 0, 0.0confidence, - I, influencerValues, influences); + computeInfluences(calculator, model_t::E_IndividualCountByBucketAndPerson, + model, times, values, counts, 0.5 * (lb + ub), + tail, I, influencerValues, influences); LOG_DEBUG(<< " influences = " << core::CContainerPrinter::print(influences)); - CPPUNIT_ASSERT_EQUAL(std::string("[((I, i1), 1)]"), core::CContainerPrinter::print(influences)); + CPPUNIT_ASSERT_EQUAL(std::string("[((I, i1), 1)]"), + core::CContainerPrinter::print(influences)); } - { + /*{ LOG_DEBUG(<< "No trend"); - maths::CMultivariateNormalConjugateFactory::TPriorPtr prior = - maths::CMultivariateNormalConjugateFactory::nonInformative(2, maths_t::E_ContinuousData, 0.0); + maths::CTimeSeriesDecomposition trend{0.0, 600}; + maths::CMultivariateNormalConjugate<2> prior{ + maths::CMultivariateNormalConjugate<2>::nonInformativePrior( + maths_t::E_ContinuousData, 0.0)}; + maths::CMultivariateTimeSeriesModel model{params(600), trend, prior}; TDoubleVec mean(2, 10.0); TDoubleVecVec covariances(2, TDoubleVec(2)); @@ -556,44 +527,31 @@ void CProbabilityAndInfluenceCalculatorTest::testLogProbabilityComplementInfluen covariances[0][1] = covariances[1][0] = 4.0; TDoubleVecVec samples_; rng.generateMultivariateNormalSamples(mean, covariances, 50, samples_); - TDouble10Vec1Vec samples; - for (std::size_t i = 0u; i < samples_.size(); ++i) - { - samples.push_back(samples_[i]); + for (std::size_t i = 0u; i < samples_.size(); ++i) { + prior.addSamples({TDouble10Vec(samples_[i])}, + maths_t::CUnitWeights::singleUnit(2)); } - TDouble10Vec4Vec1Vec weights(samples.size(), TDouble10Vec4Vec(1, TDouble10Vec(2, 1.0))); - prior->addSamples(COUNT_WEIGHT, samples, weights); - - core_t::TTime times[] = {0, 0}; - double values[] = {20.0, 10.0}; - double vs[] = {1.0, 1.0}; - TSize10Vec coordinates(std::size_t(1), 0); - TDouble10Vec2Vec lbs, ubs; + + core_t::TTime times[]{0, 0}; + double values[]{20.0, 10.0}; + double lb, ub; TTail10Vec tail; - TDouble10Vec1Vec sample(1, TDouble10Vec(&values[0], &values[2])); - TDouble10Vec4Vec1Vec weight(1, TDouble10Vec4Vec(1, TDouble10Vec(&vs[0], &vs[2]))); - prior->probabilityOfLessLikelySamples(maths_t::E_TwoSided, - weightStyle, - sample, - weight, - coordinates, - lbs, ubs, tail); - double lb = std::sqrt(lbs[0][0] * lbs[1][0]); - double ub = std::sqrt(ubs[0][0] * ubs[1][0]); + prior.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, {TDouble10Vec(&values[0], &values[2])}, + maths_t::CUnitWeights::singleUnit(2), lb, ub, tail); TStrCRefDouble1VecDouble1VecPrPrVec influencerValues; - influencerValues.push_back(TStrCRefDouble1VecDouble1VecPrPr(TStrCRef(i1), make_pair( 1.0, 1.0, 1.0, 1.0))); - influencerValues.push_back(TStrCRefDouble1VecDouble1VecPrPr(TStrCRef(i2), make_pair( 1.0, 1.0, 1.0, 1.0))); - influencerValues.push_back(TStrCRefDouble1VecDouble1VecPrPr(TStrCRef(i3), make_pair(18.0, 8.0, 1.0, 1.0))); + influencerValues.push_back({TStrCRef(i1), make_pair(1.0, 1.0, 1.0, 1.0)}); + influencerValues.push_back({TStrCRef(i2), make_pair(1.0, 1.0, 1.0, 1.0)}); + influencerValues.push_back({TStrCRef(i3), make_pair(18.0, 8.0, 1.0, 1.0)}); TStoredStringPtrStoredStringPtrPrDoublePrVec influences; - computeInfluences(calculator, - model_t::E_IndividualCountByBucketAndPerson, TDecompositionCPtr1Vec(), *prior, - times, values, weight, counts, - 0.5*(lb+ub), tail, coordinates[0], 0.0confidence, - I, influencerValues, influences); + computeInfluences(calculator, model_t::E_IndividualCountByBucketAndPerson, + model, times, values, counts, 0.5 * (lb + ub), + tail, I, influencerValues, influences); LOG_DEBUG(<< " influences = " << core::CContainerPrinter::print(influences)); - CPPUNIT_ASSERT_EQUAL(std::string("[((I, i3), 1)]"), core::CContainerPrinter::print(influences)); + CPPUNIT_ASSERT_EQUAL(std::string("[((I, i3), 1)]"), + core::CContainerPrinter::print(influences)); } { LOG_DEBUG(<< "Trend"); @@ -691,13 +649,11 @@ void CProbabilityAndInfluenceCalculatorTest::testLogProbabilityComplementInfluen CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedInfluences[i][j], influences[j].second, 0.05); } } - } - }*/ + }*/ + } } void CProbabilityAndInfluenceCalculatorTest::testMeanInfluenceCalculator() { - LOG_DEBUG(<< "*** testMeanInfluenceCalculator ***"); - test::CRandomNumbers rng; model::CMeanInfluenceCalculator calculator; @@ -835,16 +791,20 @@ void CProbabilityAndInfluenceCalculatorTest::testMeanInfluenceCalculator() { } } } - /*{ + { LOG_DEBUG(<< "Test correlated"); - core_t::TTime times[] = {0, 0}; + core_t::TTime times[]{0, 0}; { LOG_DEBUG(<< "One influencer value"); - maths::CMultivariateNormalConjugateFactory::TPriorPtr prior = - maths::CMultivariateNormalConjugateFactory::nonInformative(2, maths_t::E_ContinuousData, 0.0); + maths::CTimeSeriesDecomposition trend{0.0, 600}; + maths::CMultivariateNormalConjugate<2> prior{ + maths::CMultivariateNormalConjugate<2>::nonInformativePrior( + maths_t::E_ContinuousData, 0.0)}; + maths::CMultivariateTimeSeriesModel model{params(600), trend, prior}; + { TDoubleVec mean(2, 10.0); TDoubleVecVec covariances(2, TDoubleVec(2)); @@ -852,41 +812,33 @@ void CProbabilityAndInfluenceCalculatorTest::testMeanInfluenceCalculator() { covariances[0][1] = covariances[1][0] = 4.0; TDoubleVecVec samples_; rng.generateMultivariateNormalSamples(mean, covariances, 50, samples_); - TDouble10Vec1Vec samples; - for (std::size_t i = 0u; i < samples_.size(); ++i) - { - samples.push_back(samples_[i]); + for (std::size_t i = 0u; i < samples_.size(); ++i) { + prior.addSamples({TDouble10Vec(samples_[i])}, + maths_t::CUnitWeights::singleUnit(2)); } - TDouble10Vec4Vec1Vec weights(samples.size(), TDouble10Vec4Vec(1, TDouble10Vec(2, 1.0))); - prior->addSamples(COUNT_WEIGHT, samples, weights); } - double values[] = {5.0, 5.0}; - double counts[] = {1.0, 1.0}; + double values[]{5.0, 5.0}; + double counts[]{1.0, 1.0}; double lb, ub; TTail10Vec tail; - TDouble10Vec1Vec sample(1, TDouble10Vec(&values[0], &values[2])); - TDouble10Vec4Vec1Vec weights(1, TDouble10Vec4Vec(2, TDouble10Vec(2, 1.0))); - prior->probabilityOfLessLikelySamples(maths_t::E_TwoSided, - weightStyles, - sample, - weights, - lb, ub, tail); + prior.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, {TDouble10Vec(&values[0], &values[2])}, + maths_t::CUnitWeights::singleUnit(2), lb, ub, tail); TStrCRefDouble1VecDouble1VecPrPrVec influencerValues; - influencerValues.push_back(TStrCRefDouble1VecDouble1VecPrPr(TStrCRef(i1), make_pair(5.0, 5.0, 1.0, 1.0))); + influencerValues.push_back(TStrCRefDouble1VecDouble1VecPrPr( + TStrCRef(i1), make_pair(5.0, 5.0, 1.0, 1.0))); TStoredStringPtrStoredStringPtrPrDoublePrVec influences; - computeInfluences(calculator, - model_t::E_IndividualMeanByPerson, TDecompositionCPtr1Vec(), *prior, - times, values, weights, counts, - 0.5*(lb+ub), tail, 0, - I, influencerValues, influences); + computeInfluences(calculator, model_t::E_IndividualMeanByPerson, + model, times, values, counts, 0.5 * (lb + ub), + tail, I, influencerValues, influences); LOG_DEBUG(<< " influences = " << core::CContainerPrinter::print(influences)); CPPUNIT_ASSERT_EQUAL(std::string("[((I, i1), 1)]"), core::CContainerPrinter::print(influences)); } - { + /*{ LOG_DEBUG(<< "No trend"); maths::CMultivariateNormalConjugateFactory::TPriorPtr prior = @@ -1041,19 +993,16 @@ void CProbabilityAndInfluenceCalculatorTest::testMeanInfluenceCalculator() { CPPUNIT_ASSERT_EQUAL(i3, *influences[1].first.second); CPPUNIT_ASSERT_DOUBLES_EQUAL(0.6, influences[1].second, 0.08); } - } - }*/ + }*/ + } } void CProbabilityAndInfluenceCalculatorTest::testLogProbabilityInfluenceCalculator() { - LOG_DEBUG(<< "*** testLogProbabilityInfluenceCalculator ***"); - test::CRandomNumbers rng; model::CLogProbabilityInfluenceCalculator calculator; core_t::TTime bucketLength{600}; - maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleSeasonalVarianceScaleWeight); { LOG_DEBUG(<< "Test univariate"); @@ -1380,8 +1329,6 @@ void CProbabilityAndInfluenceCalculatorTest::testLogProbabilityInfluenceCalculat } void CProbabilityAndInfluenceCalculatorTest::testIndicatorInfluenceCalculator() { - LOG_DEBUG(<< "*** testIndicatorInfluenceCalculator ***"); - { LOG_DEBUG(<< "Test univariate"); @@ -1399,46 +1346,48 @@ void CProbabilityAndInfluenceCalculatorTest::testIndicatorInfluenceCalculator() TStoredStringPtrStoredStringPtrPrDoublePrVec influences; computeInfluences(calculator, model_t::E_IndividualIndicatorOfBucketPerson, - model, 0 /*time*/, 1.0 /*value*/, 1.0 /*count*/, - 0.1 /*probability*/, TTail2Vec{maths_t::E_RightTail}, - I, influencerValues, influences); + model, 0 /*time*/, 1.0 /*value*/, 1.0 /*count*/, 0.1 /*probability*/, + {maths_t::E_RightTail}, I, influencerValues, influences); LOG_DEBUG(<< "influences = " << core::CContainerPrinter::print(influences)); CPPUNIT_ASSERT_EQUAL(std::string("[((I, i1), 1), ((I, i2), 1), ((I, i3), 1)]"), core::CContainerPrinter::print(influences)); } - /*{ + { LOG_DEBUG(<< "Test correlated"); model::CIndicatorInfluenceCalculator calculator; - maths::CMultivariateNormalConjugateFactory::TPriorPtr prior = - maths::CMultivariateNormalConjugateFactory::nonInformative(2, maths_t::E_ContinuousData, 0.0); + maths::CTimeSeriesDecomposition trend{0.0, 600}; + maths::CMultivariateNormalConjugate<2> prior{ + maths::CMultivariateNormalConjugate<2>::nonInformativePrior( + maths_t::E_ContinuousData, 0.0)}; + maths::CMultivariateTimeSeriesModel model{params(600), trend, prior}; - core_t::TTime times[] = {0, 0}; - double values[] = {1.0, 1.0}; - double counts[] = {1.0, 1.0}; + core_t::TTime times[]{0, 0}; + double values[]{1.0, 1.0}; + double counts[]{1.0, 1.0}; TStrCRefDouble1VecDouble1VecPrPrVec influencerValues; - influencerValues.push_back(TStrCRefDouble1VecDouble1VecPrPr(TStrCRef(i1), make_pair(1.0, 1.0, 1.0, 1.0))); - influencerValues.push_back(TStrCRefDouble1VecDouble1VecPrPr(TStrCRef(i2), make_pair(1.0, 1.0, 1.0, 1.0))); - influencerValues.push_back(TStrCRefDouble1VecDouble1VecPrPr(TStrCRef(i3), make_pair(1.0, 1.0, 1.0, 1.0))); + influencerValues.push_back(TStrCRefDouble1VecDouble1VecPrPr( + TStrCRef(i1), make_pair(1.0, 1.0, 1.0, 1.0))); + influencerValues.push_back(TStrCRefDouble1VecDouble1VecPrPr( + TStrCRef(i2), make_pair(1.0, 1.0, 1.0, 1.0))); + influencerValues.push_back(TStrCRefDouble1VecDouble1VecPrPr( + TStrCRef(i3), make_pair(1.0, 1.0, 1.0, 1.0))); TStoredStringPtrStoredStringPtrPrDoublePrVec influences; - computeInfluences(calculator, - model_t::E_IndividualIndicatorOfBucketPerson, TDecompositionCPtr1Vec(), *prior, - times, values, TDouble10Vec4Vec1Vec(1, TDouble10Vec4Vec(1, TDouble10Vec(2, 1.0))), counts, - 0.1probability, maths_t::E_RightTail, 0, - I, influencerValues, influences); + computeInfluences(calculator, model_t::E_IndividualIndicatorOfBucketPerson, + model, times, values, counts, 0.1 /*probability*/, + TTail2Vec(2, maths_t::E_RightTail), I, + influencerValues, influences); LOG_DEBUG(<< "influences = " << core::CContainerPrinter::print(influences)); CPPUNIT_ASSERT_EQUAL(std::string("[((I, i1), 1), ((I, i2), 1), ((I, i3), 1)]"), core::CContainerPrinter::print(influences)); - }*/ + } } void CProbabilityAndInfluenceCalculatorTest::testProbabilityAndInfluenceCalculator() { - LOG_DEBUG(<< "*** testProbabilityAndInfluenceCalculator ***"); - test::CRandomNumbers rng; core_t::TTime bucketLength{600}; @@ -1466,9 +1415,6 @@ void CProbabilityAndInfluenceCalculatorTest::testProbabilityAndInfluenceCalculat model_t::E_IndividualMeanLatLongByPerson}; const maths::CModel* models[]{&univariateModel, &multivariateModel}; - maths_t::TWeightStyleVec weightStyles; - weightStyles.push_back(maths_t::E_SampleSeasonalVarianceScaleWeight); - weightStyles.push_back(maths_t::E_SampleCountVarianceScaleWeight); model::CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); { @@ -1502,13 +1448,14 @@ void CProbabilityAndInfluenceCalculatorTest::testProbabilityAndInfluenceCalculat for (std::size_t j = 0u; j < features.size(); ++j) { TDouble2Vec1Vec value{TDouble2Vec(&values[i + 5 * j][0], &values[i + 5 * j][1 + j])}; - TDouble2Vec4Vec weights{TDouble2Vec(1 + j, values[i + 5 * j][1 + j]), - TDouble2Vec(1 + j, 1.0)}; + maths_t::TDouble2VecWeightsAry weights( + maths_t::CUnitWeights::unit(1 + j)); + maths_t::setSeasonalVarianceScale( + TDouble2Vec(1 + j, values[i + 5 * j][1 + j]), weights); maths::CModelProbabilityParams params_; params_.addCalculation(maths_t::E_TwoSided) .seasonalConfidenceInterval(0.0) .addBucketEmpty(TBool2Vec{false}) - .weightStyles(weightStyles) .addWeights(weights); double p; TTail2Vec tail; @@ -1554,25 +1501,19 @@ void CProbabilityAndInfluenceCalculatorTest::testProbabilityAndInfluenceCalculat LOG_DEBUG(<< "influencing joint probability"); TDoubleVecVec values[]{ - TDoubleVecVec{{12.0, 1.0}, {15.0, 1.0}, {7.0, 1.5}, {9.0, 1.0}, {17.0, 2.0}}, - TDoubleVecVec{{12.0, 17.0, 1.0}, - {15.0, 20.0, 1.0}, - {7.0, 12.0, 1.5}, - {9.0, 14.0, 1.0}, - {17.0, 22.0, 2.0}}}; + {{12.0, 1.0}, {15.0, 1.0}, {7.0, 1.5}, {9.0, 1.0}, {17.0, 2.0}}, + {{12.0, 17.0, 1.0}, {15.0, 20.0, 1.0}, {7.0, 12.0, 1.5}, {9.0, 14.0, 1.0}, {17.0, 22.0, 2.0}}}; TStrCRefDouble1VecDoublePrPrVecVec influencerValues[]{ - TStrCRefDouble1VecDoublePrPrVecVec{ - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i2), make_pair(12.0, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(15.0, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i2), make_pair(7.0, 1.5)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i2), make_pair(9.0, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(17.0, 2.0)}}}, - TStrCRefDouble1VecDoublePrPrVecVec{ - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i2), make_pair(12.0, 17.0, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(15.0, 20.0, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i2), make_pair(7.0, 12.0, 1.5)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i2), make_pair(9.0, 14.0, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(17.0, 22.0, 2.0)}}}}; + {{{TStrCRef(i2), make_pair(12.0, 1.0)}}, + {{TStrCRef(i1), make_pair(15.0, 1.0)}}, + {{TStrCRef(i2), make_pair(7.0, 1.5)}}, + {{TStrCRef(i2), make_pair(9.0, 1.0)}}, + {{TStrCRef(i1), make_pair(17.0, 2.0)}}}, + {{{TStrCRef(i2), make_pair(12.0, 17.0, 1.0)}}, + {{TStrCRef(i1), make_pair(15.0, 20.0, 1.0)}}, + {{TStrCRef(i2), make_pair(7.0, 12.0, 1.5)}}, + {{TStrCRef(i2), make_pair(9.0, 14.0, 1.0)}}, + {{TStrCRef(i1), make_pair(17.0, 22.0, 2.0)}}}}; for (std::size_t i = 0u; i < features.size(); ++i) { TStoredStringPtrStoredStringPtrPrDoublePrVec influences; testProbabilityAndGetInfluences(features[i], *models[i], now, values[i], @@ -1587,25 +1528,19 @@ void CProbabilityAndInfluenceCalculatorTest::testProbabilityAndInfluenceCalculat LOG_DEBUG(<< "influencing extreme probability"); TDoubleVecVec values[]{ - TDoubleVecVec{{11.0, 1.0}, {10.5, 1.0}, {8.5, 1.5}, {10.8, 1.5}, {19.0, 1.0}}, - TDoubleVecVec{{11.0, 16.0, 1.0}, - {10.5, 15.5, 1.0}, - {8.5, 13.5, 1.5}, - {10.8, 15.8, 1.5}, - {19.0, 24.0, 1.0}}}; + {{11.0, 1.0}, {10.5, 1.0}, {8.5, 1.5}, {10.8, 1.5}, {19.0, 1.0}}, + {{11.0, 16.0, 1.0}, {10.5, 15.5, 1.0}, {8.5, 13.5, 1.5}, {10.8, 15.8, 1.5}, {19.0, 24.0, 1.0}}}; TStrCRefDouble1VecDoublePrPrVecVec influencerValues[]{ - TStrCRefDouble1VecDoublePrPrVecVec{ - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(11.0, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(10.5, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(8.5, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(10.8, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i2), make_pair(19.0, 1.0)}}}, - TStrCRefDouble1VecDoublePrPrVecVec{ - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(11.0, 16.0, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(10.5, 15.5, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(8.5, 13.5, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(10.8, 15.8, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i2), make_pair(19.0, 24.0, 1.0)}}}}; + {{{TStrCRef(i1), make_pair(11.0, 1.0)}}, + {{TStrCRef(i1), make_pair(10.5, 1.0)}}, + {{TStrCRef(i1), make_pair(8.5, 1.0)}}, + {{TStrCRef(i1), make_pair(10.8, 1.0)}}, + {{TStrCRef(i2), make_pair(19.0, 1.0)}}}, + {{{TStrCRef(i1), make_pair(11.0, 16.0, 1.0)}}, + {{TStrCRef(i1), make_pair(10.5, 15.5, 1.0)}}, + {{TStrCRef(i1), make_pair(8.5, 13.5, 1.0)}}, + {{TStrCRef(i1), make_pair(10.8, 15.8, 1.0)}}, + {{TStrCRef(i2), make_pair(19.0, 24.0, 1.0)}}}}; for (std::size_t i = 0u; i < features.size(); ++i) { TStoredStringPtrStoredStringPtrPrDoublePrVec influences; @@ -1621,36 +1556,24 @@ void CProbabilityAndInfluenceCalculatorTest::testProbabilityAndInfluenceCalculat LOG_DEBUG(<< "marginal influence"); TDoubleVecVec values[]{ - TDoubleVecVec{{11.0, 1.0}, {10.5, 1.0}, {8.0, 1.0}, {10.8, 1.0}, {14.0, 1.0}}, - TDoubleVecVec{{11.0, 16.0, 1.0}, - {10.5, 15.5, 1.0}, - {8.0, 13.0, 1.0}, - {10.8, 15.8, 1.0}, - {14.0, 19.0, 1.0}}}; + {{11.0, 1.0}, {10.5, 1.0}, {8.0, 1.0}, {10.8, 1.0}, {14.0, 1.0}}, + {{11.0, 16.0, 1.0}, {10.5, 15.5, 1.0}, {8.0, 13.0, 1.0}, {10.8, 15.8, 1.0}, {14.0, 19.0, 1.0}}}; TStrCRefDouble1VecDoublePrPrVecVec influencerValues[]{ - TStrCRefDouble1VecDoublePrPrVecVec{ - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(12.0, 1.0)}, - {TStrCRef(i2), make_pair(10.0, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(10.5, 1.0)}, - {TStrCRef(i2), make_pair(10.5, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(9.0, 1.0)}, - {TStrCRef(i2), make_pair(7.0, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(11.0, 1.0)}, - {TStrCRef(i2), make_pair(10.6, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(16.0, 1.0)}, - {TStrCRef(i2), make_pair(12.0, 1.0)}}}, - TStrCRefDouble1VecDoublePrPrVecVec{ - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(12.0, 17.0, 1.0)}, - {TStrCRef(i2), make_pair(10.0, 15.0, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(10.5, 15.5, 1.0)}, - {TStrCRef(i2), make_pair(10.5, 15.5, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(9.0, 14.0, 1.0)}, - {TStrCRef(i2), make_pair(7.0, 12.0, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(11.0, 16.0, 1.0)}, - {TStrCRef(i2), make_pair(10.6, 15.6, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{ - {TStrCRef(i1), make_pair(16.0, 21.0, 1.0)}, - {TStrCRef(i2), make_pair(12.0, 17.0, 1.0)}}}}; + {{{TStrCRef(i1), make_pair(12.0, 1.0)}, {TStrCRef(i2), make_pair(10.0, 1.0)}}, + {{TStrCRef(i1), make_pair(10.5, 1.0)}, {TStrCRef(i2), make_pair(10.5, 1.0)}}, + {{TStrCRef(i1), make_pair(9.0, 1.0)}, {TStrCRef(i2), make_pair(7.0, 1.0)}}, + {{TStrCRef(i1), make_pair(11.0, 1.0)}, {TStrCRef(i2), make_pair(10.6, 1.0)}}, + {{TStrCRef(i1), make_pair(16.0, 1.0)}, {TStrCRef(i2), make_pair(12.0, 1.0)}}}, + {{{TStrCRef(i1), make_pair(12.0, 17.0, 1.0)}, + {TStrCRef(i2), make_pair(10.0, 15.0, 1.0)}}, + {{TStrCRef(i1), make_pair(10.5, 15.5, 1.0)}, + {TStrCRef(i2), make_pair(10.5, 15.5, 1.0)}}, + {{TStrCRef(i1), make_pair(9.0, 14.0, 1.0)}, + {TStrCRef(i2), make_pair(7.0, 12.0, 1.0)}}, + {{TStrCRef(i1), make_pair(11.0, 16.0, 1.0)}, + {TStrCRef(i2), make_pair(10.6, 15.6, 1.0)}}, + {{TStrCRef(i1), make_pair(16.0, 21.0, 1.0)}, + {TStrCRef(i2), make_pair(12.0, 17.0, 1.0)}}}}; { TStoredStringPtrStoredStringPtrPrDoublePrVec influences; diff --git a/lib/model/unittest/CToolsTest.cc b/lib/model/unittest/CToolsTest.cc index 4ef5fd24c5..2e184ade05 100644 --- a/lib/model/unittest/CToolsTest.cc +++ b/lib/model/unittest/CToolsTest.cc @@ -21,8 +21,6 @@ void CToolsTest::testDataGatherers() { } void CToolsTest::testProbabilityAggregator() { - LOG_DEBUG(<< "****** CToolsTest::testProbabilityAggregator ******"); - // Test a variety of min aggregations. { diff --git a/lib/test/CTestTimer.cc b/lib/test/CTestTimer.cc index fec0f7e8cd..72fbff1a7c 100644 --- a/lib/test/CTestTimer.cc +++ b/lib/test/CTestTimer.cc @@ -12,8 +12,14 @@ namespace ml { namespace test { -void CTestTimer::startTest(CppUnit::Test* /* test */) { +void CTestTimer::startTest(CppUnit::Test* test) { m_StopWatch.reset(true); + if (test != nullptr) { + std::string testName{"| " + test->getName() + " |"}; + LOG_DEBUG(<< "+" << std::string(testName.length() - 2, '-') << "+"); + LOG_DEBUG(<< testName); + LOG_DEBUG(<< "+" << std::string(testName.length() - 2, '-') << "+"); + } } void CTestTimer::endTest(CppUnit::Test* test) { diff --git a/lib/ver/CBuildInfo.cc.dev_template b/lib/ver/CBuildInfo.cc.dev_template index 7f0ec0341a..9f9d3840b4 100644 --- a/lib/ver/CBuildInfo.cc.dev_template +++ b/lib/ver/CBuildInfo.cc.dev_template @@ -8,12 +8,8 @@ #include #include - -namespace ml -{ -namespace ver -{ - +namespace ml { +namespace ver { // Initialise static strings // The tokens in the template file are substituted by the Makefile @@ -22,33 +18,24 @@ const std::string CBuildInfo::VERSION_NUMBER("based on @version.number@"); const std::string CBuildInfo::BUILD_NUMBER("DEVELOPMENT BUILD by @user.name@"); const std::string CBuildInfo::COPYRIGHT("Copyright (c) @build.year@ Elasticsearch BV"); - -const std::string &CBuildInfo::versionNumber() -{ +const std::string& CBuildInfo::versionNumber() { return VERSION_NUMBER; } -const std::string &CBuildInfo::buildNumber() -{ +const std::string& CBuildInfo::buildNumber() { return BUILD_NUMBER; } -const std::string &CBuildInfo::copyright() -{ +const std::string& CBuildInfo::copyright() { return COPYRIGHT; } -std::string CBuildInfo::fullInfo() -{ +std::string CBuildInfo::fullInfo() { static const size_t BITS_PER_BYTE(8); - return core::CProgName::progName() + - " (" + core::CStringUtils::typeToString(sizeof(void *) * BITS_PER_BYTE) + " bit):" - " Version " + VERSION_NUMBER + " (Build " + BUILD_NUMBER + ") " + - COPYRIGHT; + return core::CProgName::progName() + " (" + + core::CStringUtils::typeToString(sizeof(void*) * BITS_PER_BYTE) + + " bit): Version " + VERSION_NUMBER + " (Build " + BUILD_NUMBER + ") " + COPYRIGHT; } - - } } - diff --git a/lib/ver/CBuildInfo.cc.tagged_template b/lib/ver/CBuildInfo.cc.tagged_template index 4e898c4b41..8ff4375181 100644 --- a/lib/ver/CBuildInfo.cc.tagged_template +++ b/lib/ver/CBuildInfo.cc.tagged_template @@ -8,12 +8,8 @@ #include #include - -namespace ml -{ -namespace ver -{ - +namespace ml { +namespace ver { // Initialise static strings // The tokens in the template file are substituted by the Makefile @@ -22,33 +18,24 @@ const std::string CBuildInfo::VERSION_NUMBER("@version.number@"); const std::string CBuildInfo::BUILD_NUMBER("@build.number@"); const std::string CBuildInfo::COPYRIGHT("Copyright (c) @build.year@ Elasticsearch BV"); - -const std::string &CBuildInfo::versionNumber() -{ +const std::string& CBuildInfo::versionNumber() { return VERSION_NUMBER; } -const std::string &CBuildInfo::buildNumber() -{ +const std::string& CBuildInfo::buildNumber() { return BUILD_NUMBER; } -const std::string &CBuildInfo::copyright() -{ +const std::string& CBuildInfo::copyright() { return COPYRIGHT; } -std::string CBuildInfo::fullInfo() -{ +std::string CBuildInfo::fullInfo() { static const size_t BITS_PER_BYTE(8); - return core::CProgName::progName() + - " (" + core::CStringUtils::typeToString(sizeof(void *) * BITS_PER_BYTE) + " bit):" - " Version " + VERSION_NUMBER + " (Build " + BUILD_NUMBER + ") " + - COPYRIGHT; + return core::CProgName::progName() + " (" + + core::CStringUtils::typeToString(sizeof(void*) * BITS_PER_BYTE) + + " bit): Version " + VERSION_NUMBER + " (Build " + BUILD_NUMBER + ") " + COPYRIGHT; } - - } } - From 705a6a43c9e9815f114fc98e6aa9c29e1f989e90 Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Tue, 24 Apr 2018 10:11:35 +0100 Subject: [PATCH 22/29] C++11 style changes for clustering code --- include/maths/CXMeansOnline.h | 123 ++++++++++++++++----------------- lib/maths/CXMeansOnline1d.cc | 124 +++++++++++++++------------------- 2 files changed, 110 insertions(+), 137 deletions(-) diff --git a/include/maths/CXMeansOnline.h b/include/maths/CXMeansOnline.h index fb45693bbd..c102a38e89 100644 --- a/include/maths/CXMeansOnline.h +++ b/include/maths/CXMeansOnline.h @@ -276,8 +276,8 @@ class CXMeansOnline : public CClusterer> { TCovariances covariances[2]; TSphericalClusterVec clusters; this->sphericalClusters(clusters); - for (std::size_t i = 0u; i < 2; ++i) { - for (std::size_t j = 0u; j < split[i].size(); ++j) { + for (std::size_t i = 0; i < 2; ++i) { + for (std::size_t j = 0; j < split[i].size(); ++j) { covariances[i].add(clusters[split[i][j]]); } } @@ -441,10 +441,10 @@ class CXMeansOnline : public CClusterer> { LOG_TRACE(<< "Checking full split"); TSizeVec assignment(remainder.size()); - for (std::size_t i = 0u; i < remainder.size(); ++i) { + for (std::size_t i = 0; i < remainder.size(); ++i) { assignment[i] = nearest(remainder[i], covariances); } - for (std::size_t i = 0u; i < assignment.size(); ++i) { + for (std::size_t i = 0; i < assignment.size(); ++i) { std::size_t j = assignment[i]; TCovariances ci; ci.add(remainder[i]); @@ -468,8 +468,8 @@ class CXMeansOnline : public CClusterer> { boost::counting_iterator(clusters.size())); COrderings::simultaneousSort( clusters, indexes, typename CSphericalCluster::SLess()); - for (std::size_t i = 0u; i < candidate.size(); ++i) { - for (std::size_t j = 0u; j < candidate[i].size(); ++j) { + for (std::size_t i = 0; i < candidate.size(); ++i) { + for (std::size_t j = 0; j < candidate[i].size(); ++j) { std::size_t k = std::lower_bound( clusters.begin(), clusters.end(), @@ -498,8 +498,8 @@ class CXMeansOnline : public CClusterer> { m_Structure.clusters(result); switch (m_DataType) { case maths_t::E_IntegerData: { - for (std::size_t i = 0u; i < result.size(); ++i) { - result[i].annotation().s_Variance += 1.0 / 12.0; + for (auto& cluster : result) { + cluster.annotation().s_Variance += 1.0 / 12.0; } break; } @@ -671,9 +671,9 @@ class CXMeansOnline : public CClusterer> { //! Persist state by passing information to the supplied inserter. virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const { - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { + for (const auto& cluster : m_Clusters) { inserter.insertLevel(CLUSTER_TAG, boost::bind(&CCluster::acceptPersistInserter, - &m_Clusters[i], _1)); + &cluster, _1)); } inserter.insertValue(DECAY_RATE_TAG, m_DecayRate, core::CIEEE754::E_SinglePrecision); inserter.insertValue(HISTORY_LENGTH_TAG, m_HistoryLength, @@ -706,16 +706,16 @@ class CXMeansOnline : public CClusterer> { //! Set the type of data being clustered. virtual void dataType(maths_t::EDataType dataType) { m_DataType = dataType; - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { - m_Clusters[i].dataType(dataType); + for (auto& cluster : m_Clusters) { + cluster.dataType(dataType); } } //! Set the rate at which information is aged out. virtual void decayRate(double decayRate) { m_DecayRate = decayRate; - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { - m_Clusters[i].decayRate(decayRate); + for (auto& cluster : m_Clusters) { + cluster.decayRate(decayRate); } } @@ -775,42 +775,40 @@ class CXMeansOnline : public CClusterer> { result.reserve(m_Clusters.size()); double renormalizer = boost::numeric::bounds::lowest(); - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { - double likelihood = m_Clusters[i].logLikelihoodFromCluster(m_WeightCalc, point); - result.push_back(std::make_pair(m_Clusters[i].index(), likelihood)); + for (const auto& cluster : m_Clusters) { + double likelihood = cluster.logLikelihoodFromCluster(m_WeightCalc, point); + result.emplace_back(cluster.index(), likelihood); renormalizer = std::max(renormalizer, likelihood); } - double normalizer = 0.0; - for (std::size_t i = 0u; i < result.size(); ++i) { - result[i].second = std::exp(result[i].second - renormalizer); - normalizer += result[i].second; + double Z = 0.0; + for (auto& p : result) { + p.second = std::exp(p.second - renormalizer); + Z += p.second; } double pmax = 0.0; - for (std::size_t i = 0u; i < result.size(); ++i) { - result[i].second /= normalizer; - pmax = std::max(pmax, result[i].second); + for (auto& p : result) { + p.second /= Z; + pmax = std::max(pmax, p.second); } result.erase(std::remove_if(result.begin(), result.end(), CProbabilityLessThan(HARD_ASSIGNMENT_THRESHOLD * pmax)), result.end()); - normalizer = 0.0; - for (std::size_t i = 0u; i < result.size(); ++i) { - normalizer += result[i].second; + Z = 0.0; + for (const auto& p : result) { + Z += p.second; } - for (std::size_t i = 0u; i < result.size(); ++i) { - result[i].second *= count / normalizer; + for (auto& p : result) { + p.second *= count / Z; } } //! Update the clustering with \p point and return its cluster(s) //! together with their weighting factor. virtual void add(const TPointPrecise& x, TSizeDoublePr2Vec& clusters, double count = 1.0) { - m_HistoryLength += 1.0; - if (m_Clusters.size() == 1) { LOG_TRACE(<< "Adding " << x << " to " << m_Clusters[0].centre()); m_Clusters[0].add(x, count); - clusters.push_back(std::make_pair(m_Clusters[0].index(), count)); + clusters.emplace_back(m_Clusters[0].index(), count); if (this->maybeSplit(m_Clusters.begin())) { this->cluster(x, clusters, count); } @@ -820,9 +818,8 @@ class CXMeansOnline : public CClusterer> { CBasicStatistics::COrderStatisticsStack>; TMaxAccumulator closest; - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { - closest.add(std::make_pair( - m_Clusters[i].logLikelihoodFromCluster(m_WeightCalc, x), i)); + for (std::size_t i = 0; i < m_Clusters.size(); ++i) { + closest.add({m_Clusters[i].logLikelihoodFromCluster(m_WeightCalc, x), i}); } closest.sort(); LOG_TRACE(<< "closest = " << closest.print()); @@ -844,7 +841,7 @@ class CXMeansOnline : public CClusterer> { if (p1 < HARD_ASSIGNMENT_THRESHOLD * p0) { LOG_TRACE(<< "Adding " << x << " to " << cluster0->centre()); cluster0->add(x, count); - clusters.push_back(std::make_pair(cluster0->index(), count)); + clusters.emplace_back(cluster0->index(), count); if (this->maybeSplit(cluster0) || this->maybeMerge(cluster0)) { this->cluster(x, clusters, count); } @@ -858,8 +855,8 @@ class CXMeansOnline : public CClusterer> { cluster0->add(x, count0); cluster1->add(x, count1); - clusters.push_back(std::make_pair(cluster0->index(), count0)); - clusters.push_back(std::make_pair(cluster1->index(), count1)); + clusters.emplace_back(cluster0->index(), count0); + clusters.emplace_back(cluster1->index(), count1); if (this->maybeSplit(cluster0) || this->maybeSplit(cluster1) || this->maybeMerge(cluster0) || this->maybeMerge(cluster1)) { this->cluster(x, clusters, count); @@ -878,8 +875,8 @@ class CXMeansOnline : public CClusterer> { m_Clusters.push_back(CCluster(*this)); } TSizeDoublePr2Vec dummy; - for (std::size_t i = 0u; i < x.size(); ++i) { - this->add(x[i].first, dummy, x[i].second); + for (const auto& x_ : x) { + this->add(x_.first, dummy, x_.second); } } @@ -896,9 +893,9 @@ class CXMeansOnline : public CClusterer> { LOG_ERROR(<< "Can't propagate backwards in time"); return; } - m_HistoryLength *= std::exp(-m_DecayRate * time); - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { - m_Clusters[i].propagateForwardsByTime(time); + m_HistoryLength = (m_HistoryLength + time) * std::exp(-m_DecayRate * time); + for (auto& cluster : m_Clusters) { + cluster.propagateForwardsByTime(time); } } @@ -925,8 +922,7 @@ class CXMeansOnline : public CClusterer> { virtual double probability(std::size_t index) const { double weight = 0.0; double Z = 0.0; - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { - const CCluster& cluster = m_Clusters[i]; + for (const auto& cluster : m_Clusters) { if (cluster.index() == index) { weight = cluster.weight(maths_t::E_ClustersFractionWeight); } @@ -965,11 +961,10 @@ class CXMeansOnline : public CClusterer> { //! The total count of points. double count() const { - double result = 0.0; - for (std::size_t i = 0; i < m_Clusters.size(); ++i) { - result += m_Clusters[i].count(); - } - return result; + return std::accumulate(m_Clusters.begin(), m_Clusters.end(), 0.0, + [](double count, const CCluster& cluster) { + return count + cluster.count(); + }); } //! Print a representation of the clusters that can be plotted in octave. @@ -1023,9 +1018,9 @@ class CXMeansOnline : public CClusterer> { //! Get the cluster with the index \p index. const CCluster* cluster(std::size_t index) const { - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { - if (m_Clusters[i].index() == index) { - return &m_Clusters[i]; + for (const auto& cluster : m_Clusters) { + if (cluster.index() == index) { + return &cluster; } } return nullptr; @@ -1035,13 +1030,9 @@ class CXMeansOnline : public CClusterer> { double minimumSplitCount() const { double result = m_MinimumClusterCount; if (m_MinimumClusterFraction > 0.0) { - double count = 0.0; - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { - count += m_Clusters[i].count(); - } - double scale = std::max( - m_HistoryLength * (1.0 - std::exp(-m_InitialDecayRate)), 1.0); - count *= m_MinimumClusterFraction / scale; + double count = this->count(); + double scale = m_HistoryLength * (1.0 - std::exp(-m_InitialDecayRate)); + count *= m_MinimumClusterFraction / std::max(scale, 1.0); result = std::max(result, count); } LOG_TRACE(<< "minimumSplitCount = " << result); @@ -1109,9 +1100,9 @@ class CXMeansOnline : public CClusterer> { // Get the clusters to prune. for (;;) { TMinAccumulator prune; - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { + for (std::size_t i = 0; i < m_Clusters.size(); ++i) { if (m_Clusters[i].count() < minimumCount) { - prune.add(std::make_pair(m_Clusters[i].count(), i)); + prune.add({m_Clusters[i].count(), i}); } } if (prune.count() == 0) { @@ -1150,13 +1141,13 @@ class CXMeansOnline : public CClusterer> { CCluster* result = nullptr; TMinAccumulator min; - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { - if (cluster.index() == m_Clusters[i].index()) { + for (auto& candidate : m_Clusters) { + if (cluster.index() == candidate.index()) { continue; } - if (min.add(CCluster::BICGain(cluster, m_Clusters[i]))) { - result = &m_Clusters[i]; + if (min.add(CCluster::BICGain(cluster, candidate))) { + result = &candidate; } } if (!result) { diff --git a/lib/maths/CXMeansOnline1d.cc b/lib/maths/CXMeansOnline1d.cc index 7523788338..30b98c4724 100644 --- a/lib/maths/CXMeansOnline1d.cc +++ b/lib/maths/CXMeansOnline1d.cc @@ -723,9 +723,9 @@ std::string CXMeansOnline1d::persistenceTag() const { } void CXMeansOnline1d::acceptPersistInserter(core::CStatePersistInserter& inserter) const { - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { - inserter.insertLevel(CLUSTER_TAG, boost::bind(&CCluster::acceptPersistInserter, - &m_Clusters[i], _1)); + for (const auto& cluster : m_Clusters) { + inserter.insertLevel( + CLUSTER_TAG, boost::bind(&CCluster::acceptPersistInserter, &cluster, _1)); } inserter.insertValue(AVAILABLE_DISTRIBUTIONS_TAG, m_AvailableDistributions.toString()); inserter.insertValue(DECAY_RATE_TAG, m_DecayRate, core::CIEEE754::E_SinglePrecision); @@ -758,15 +758,15 @@ std::size_t CXMeansOnline1d::numberClusters() const { void CXMeansOnline1d::dataType(maths_t::EDataType dataType) { m_DataType = dataType; - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { - m_Clusters[i].dataType(dataType); + for (auto& cluster : m_Clusters) { + cluster.dataType(dataType); } } void CXMeansOnline1d::decayRate(double decayRate) { m_DecayRate = decayRate; - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { - m_Clusters[i].decayRate(decayRate); + for (auto& cluster : m_Clusters) { + cluster.decayRate(decayRate); } } @@ -857,7 +857,6 @@ void CXMeansOnline1d::cluster(const double& point, TSizeDoublePr2Vec& result, do void CXMeansOnline1d::add(const double& point, TSizeDoublePr2Vec& clusters, double count) { - m_HistoryLength += 1.0; m_Smallest.add(point); m_Largest.add(point); @@ -905,9 +904,9 @@ void CXMeansOnline1d::add(const double& point, TSizeDoublePr2Vec& clusters, doub double renormalizer = std::max(likelihoodLeft, likelihoodRight); double pLeft = std::exp(likelihoodLeft - renormalizer); double pRight = std::exp(likelihoodRight - renormalizer); - double normalizer = pLeft + pRight; - pLeft /= normalizer; - pRight /= normalizer; + double pLeftPlusRight = pLeft + pRight; + pLeft /= pLeftPlusRight; + pRight /= pLeftPlusRight; if (pLeft < HARD_ASSIGNMENT_THRESHOLD * pRight) { LOG_TRACE(<< "Adding " << point << " to " << rightCluster->centre()); @@ -949,11 +948,11 @@ void CXMeansOnline1d::add(const double& point, TSizeDoublePr2Vec& clusters, doub void CXMeansOnline1d::add(const TDoubleDoublePrVec& points) { if (m_Clusters.empty()) { - m_Clusters.push_back(CCluster(*this)); + m_Clusters.emplace_back(*this); } TSizeDoublePr2Vec dummy; - for (std::size_t i = 0u; i < points.size(); ++i) { - this->add(points[i].first, dummy, points[i].second); + for (const auto& point : points) { + this->add(point.first, dummy, point.second); } } @@ -962,9 +961,9 @@ void CXMeansOnline1d::propagateForwardsByTime(double time) { LOG_ERROR(<< "Can't propagate backwards in time"); return; } - m_HistoryLength *= std::exp(-m_DecayRate * time); - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { - m_Clusters[i].propagateForwardsByTime(time); + m_HistoryLength = (m_HistoryLength + time) * std::exp(-m_DecayRate * time); + for (auto& cluster : m_Clusters) { + cluster.propagateForwardsByTime(time); } } @@ -980,15 +979,14 @@ bool CXMeansOnline1d::sample(std::size_t index, std::size_t numberSamples, TDoub double CXMeansOnline1d::probability(std::size_t index) const { double weight = 0.0; - double weightSum = 0.0; - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { - const CCluster& cluster = m_Clusters[i]; + double Z = 0.0; + for (const auto& cluster : m_Clusters) { if (cluster.index() == index) { weight = cluster.weight(maths_t::E_ClustersFractionWeight); } - weightSum += cluster.weight(maths_t::E_ClustersFractionWeight); + Z += cluster.weight(maths_t::E_ClustersFractionWeight); } - return weightSum == 0.0 ? 0.0 : weight / weightSum; + return Z == 0.0 ? 0.0 : weight / Z; } void CXMeansOnline1d::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { @@ -1016,11 +1014,10 @@ uint64_t CXMeansOnline1d::checksum(uint64_t seed) const { } double CXMeansOnline1d::count() const { - double result = 0.0; - for (std::size_t i = 0; i < m_Clusters.size(); ++i) { - result += m_Clusters[i].count(); - } - return result; + return std::accumulate(m_Clusters.begin(), m_Clusters.end(), 0.0, + [](double count, const CCluster& cluster) { + return count + cluster.count(); + }); } const CXMeansOnline1d::TClusterVec& CXMeansOnline1d::clusters() const { @@ -1044,16 +1041,16 @@ std::string CXMeansOnline1d::printClusters() const { TDoubleDoublePr range(boost::numeric::bounds::highest(), boost::numeric::bounds::lowest()); - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { - const CPrior& prior = m_Clusters[i].prior(); + for (const auto& cluster : m_Clusters) { + const CPrior& prior = cluster.prior(); TDoubleDoublePr clusterRange = prior.marginalLikelihoodConfidenceInterval(RANGE); range.first = std::min(range.first, clusterRange.first); range.second = std::max(range.second, clusterRange.second); } - double weightSum = 0.0; - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { - weightSum += m_Clusters[i].weight(m_WeightCalc); + double Z = 0.0; + for (const auto& cluster : m_Clusters) { + Z += cluster.weight(m_WeightCalc); } TDouble1Vec x{range.first}; @@ -1063,15 +1060,14 @@ std::string CXMeansOnline1d::printClusters() const { std::ostringstream likelihoodStr; coordinatesStr << "x = ["; likelihoodStr << "likelihood = ["; - for (unsigned int i = 0u; i < POINTS; ++i, x[0] += increment) { + for (unsigned int i = 0; i < POINTS; ++i, x[0] += increment) { double likelihood = 0.0; - for (std::size_t j = 0u; j < m_Clusters.size(); ++j) { + for (const auto& cluster : m_Clusters) { double logLikelihood; - const CPrior& prior = m_Clusters[j].prior(); + const CPrior& prior = cluster.prior(); if (!(prior.jointLogMarginalLikelihood(x, maths_t::CUnitWeights::SINGLE_UNIT, logLikelihood) & (maths_t::E_FpFailed | maths_t::E_FpOverflowed))) { - likelihood += m_Clusters[j].weight(m_WeightCalc) / weightSum * - std::exp(logLikelihood); + likelihood += cluster.weight(m_WeightCalc) / Z * std::exp(logLikelihood); } } coordinatesStr << x[0] << " "; @@ -1120,9 +1116,9 @@ bool CXMeansOnline1d::acceptRestoreTraverser(const SDistributionRestoreParams& p } const CXMeansOnline1d::CCluster* CXMeansOnline1d::cluster(std::size_t index) const { - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { - if (m_Clusters[i].index() == index) { - return &m_Clusters[i]; + for (const auto& cluster : m_Clusters) { + if (cluster.index() == index) { + return &cluster; } } return nullptr; @@ -1131,13 +1127,9 @@ const CXMeansOnline1d::CCluster* CXMeansOnline1d::cluster(std::size_t index) con double CXMeansOnline1d::minimumSplitCount() const { double result = m_MinimumClusterCount; if (m_MinimumClusterFraction > 0.0) { - double count = 0.0; - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { - count += m_Clusters[i].count(); - } - double scale = - std::max(m_HistoryLength * (1.0 - std::exp(-m_InitialDecayRate)), 1.0); - count *= m_MinimumClusterFraction / scale; + double count = this->count(); + double scale = m_HistoryLength * (1.0 - std::exp(-m_InitialDecayRate)); + count *= m_MinimumClusterFraction / std::max(scale, 1.0); result = std::max(result, count); } LOG_TRACE(<< "minimumSplitCount = " << result); @@ -1145,11 +1137,9 @@ double CXMeansOnline1d::minimumSplitCount() const { } bool CXMeansOnline1d::maybeSplit(TClusterVecItr cluster) { - if (cluster == m_Clusters.end()) { return false; } - TDoubleDoublePr interval = this->winsorisationInterval(); if (TOptionalClusterClusterPr split = cluster->split(m_AvailableDistributions, this->minimumSplitCount(), @@ -1162,16 +1152,13 @@ bool CXMeansOnline1d::maybeSplit(TClusterVecItr cluster) { (this->splitFunc())(index, split->first.index(), split->second.index()); return true; } - return false; } bool CXMeansOnline1d::maybeMerge(TClusterVecItr cluster1, TClusterVecItr cluster2) { - if (cluster1 == m_Clusters.end() || cluster2 == m_Clusters.end()) { return false; } - TDoubleDoublePr interval = this->winsorisationInterval(); if (cluster1->shouldMerge(*cluster2, m_AvailableDistributions, m_Smallest[0], interval)) { LOG_TRACE(<< "Merging cluster " << cluster1->index() << " at " @@ -1185,7 +1172,6 @@ bool CXMeansOnline1d::maybeMerge(TClusterVecItr cluster1, TClusterVecItr cluster (this->mergeFunc())(index1, index2, merged.index()); return true; } - return false; } @@ -1235,11 +1221,7 @@ TDoubleDoublePr CXMeansOnline1d::winsorisationInterval() const { // Winsorisation confidence interval, i.e. we truncate the // data to the 1 - f central confidence interval. - double totalCount = 0.0; - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { - totalCount += m_Clusters[i].count(); - } - + double totalCount = this->count(); double leftCount = f * totalCount; double rightCount = (1.0 - f) * totalCount; LOG_TRACE(<< "totalCount = " << totalCount << " interval = [" << leftCount @@ -1249,15 +1231,15 @@ TDoubleDoublePr CXMeansOnline1d::winsorisationInterval() const { TDoubleDoublePr result; double partialCount = 0.0; - for (std::size_t i = 0u; i < m_Clusters.size(); ++i) { - double count = m_Clusters[i].count(); + for (const auto& cluster : m_Clusters) { + double count = cluster.count(); if (partialCount < leftCount && partialCount + count >= leftCount) { double p = 100.0 * (leftCount - partialCount) / count; - result.first = m_Clusters[i].percentile(p); + result.first = cluster.percentile(p); } if (partialCount < rightCount && partialCount + count >= rightCount) { double p = 100.0 * (rightCount - partialCount) / count; - result.second = m_Clusters[i].percentile(p); + result.second = cluster.percentile(p); break; } partialCount += count; @@ -1395,7 +1377,7 @@ CXMeansOnline1d::CCluster::split(CAvailableModeDistributions distributions, LOG_TRACE(<< "split"); if (m_Structure.buffering()) { - return TOptionalClusterClusterPr(); + return {}; } maths_t::EDataType dataType = m_Prior.dataType(); @@ -1403,19 +1385,19 @@ CXMeansOnline1d::CCluster::split(CAvailableModeDistributions distributions, std::size_t n = m_Structure.size(); if (n < 2) { - return TOptionalClusterClusterPr(); + return {}; } TSizeVec split; { TTupleVec categories; m_Structure.categories(n, 0, categories); - for (std::size_t i = 0u; i < categories.size(); ++i) { - detail::winsorise(interval, categories[i]); + for (auto& category : categories) { + detail::winsorise(interval, category); } if (!detail::splitSearch(minimumCount, MINIMUM_SPLIT_DISTANCE, dataType, distributions, smallest, categories, split)) { - return TOptionalClusterClusterPr(); + return {}; } } @@ -1434,8 +1416,8 @@ CXMeansOnline1d::CCluster::split(CAvailableModeDistributions distributions, CNormalMeanPrecConjugate leftNormal(dataType, categories[0], decayRate); CNormalMeanPrecConjugate rightNormal(dataType, categories[1], decayRate); - return TClusterClusterPr(CCluster(index1, leftNormal, classifiers[0]), - CCluster(index2, rightNormal, classifiers[1])); + return TClusterClusterPr{CCluster(index1, leftNormal, classifiers[0]), + CCluster(index2, rightNormal, classifiers[1])}; } bool CXMeansOnline1d::CCluster::shouldMerge(CCluster& other, @@ -1458,8 +1440,8 @@ bool CXMeansOnline1d::CCluster::shouldMerge(CCluster& other, return false; } - for (std::size_t i = 0u; i < categories.size(); ++i) { - detail::winsorise(interval, categories[i]); + for (auto& category : categories) { + detail::winsorise(interval, category); } double distance; From 2aa457b8402b679244d4f57f9b530004a0fb07af Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Thu, 26 Apr 2018 16:18:14 +0100 Subject: [PATCH 23/29] Fix unit test --- lib/maths/unittest/CForecastTest.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/maths/unittest/CForecastTest.cc b/lib/maths/unittest/CForecastTest.cc index c3de95931e..2b1bbf5775 100644 --- a/lib/maths/unittest/CForecastTest.cc +++ b/lib/maths/unittest/CForecastTest.cc @@ -150,7 +150,7 @@ void CForecastTest::testComplexNoLongTermTrend() { return scale[d] * (20.0 + y[h] + noise); }; - this->test(trend, bucketLength, 60, 24.0, 34.0, 0.13); + this->test(trend, bucketLength, 60, 24.0, 35.0, 0.13); } void CForecastTest::testComplexConstantLongTermTrend() { From bb83c5e88d1775a0efdf38a7a86f099526e9d7e7 Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Fri, 4 May 2018 10:20:27 +0100 Subject: [PATCH 24/29] Improve anomaly sign calculation --- lib/maths/CTimeSeriesModel.cc | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/lib/maths/CTimeSeriesModel.cc b/lib/maths/CTimeSeriesModel.cc index 78c879e3dd..06ac53107c 100644 --- a/lib/maths/CTimeSeriesModel.cc +++ b/lib/maths/CTimeSeriesModel.cc @@ -326,13 +326,9 @@ class CTimeSeriesAnomalyModel { std::size_t tag() const { return m_Tag; } //! Add a result to the anomaly. - void update(const TDouble2Vec& errors) { - double norm{0.0}; - for (const auto& error : errors) { - norm += std::pow(error, 2.0); - m_Sign += error; - } - m_MeanErrorNorm.add(std::sqrt(norm)); + void update(double norm, double sign) { + m_MeanErrorNorm.add(norm); + m_Sign += sign; } //! Get the weight to apply to this anomaly on update. @@ -460,20 +456,18 @@ void CTimeSeriesAnomalyModel::updateAnomaly(const CModelProbabilityParams& param [tag](const CAnomaly& anomaly_) { return anomaly_.tag() == tag; }); if (probability < LARGEST_ANOMALOUS_PROBABILITY) { - m_MeanError.add(std::sqrt( - std::accumulate(errors.begin(), errors.end(), 0.0, - [](double n, double x) { return n + x * x; }))); - + double norm{std::sqrt( + std::accumulate(errors.begin(), errors.end(), 0.0, + [](double n, double x) { return n + x * x; }))}; + m_MeanError.add(norm); double scale{CBasicStatistics::mean(m_MeanError)}; - for (auto& error : errors) { - error = scale == 0.0 ? 1.0 : error / scale; - } - + norm = (scale == 0.0 ? 1.0 : norm / scale); + double sign{std::accumulate(errors.begin(), errors.end(), 0.0) / scale}; if (anomaly == m_Anomalies.end()) { m_Anomalies.emplace_back(tag, this->scale(time)); anomaly = m_Anomalies.end() - 1; } - anomaly->update(errors); + anomaly->update(norm, sign); } else if (anomaly != m_Anomalies.end()) { this->sample(time, *anomaly, 1.0 - anomaly->weight(this->scale(time))); m_Anomalies.erase(anomaly); From d0ae51c556f5658cf142d799b09e52394d7f13fa Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Fri, 4 May 2018 10:20:27 +0100 Subject: [PATCH 25/29] Improve anomaly sign calculation --- lib/maths/CTimeSeriesModel.cc | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/lib/maths/CTimeSeriesModel.cc b/lib/maths/CTimeSeriesModel.cc index 78c879e3dd..da0c8087dc 100644 --- a/lib/maths/CTimeSeriesModel.cc +++ b/lib/maths/CTimeSeriesModel.cc @@ -326,13 +326,9 @@ class CTimeSeriesAnomalyModel { std::size_t tag() const { return m_Tag; } //! Add a result to the anomaly. - void update(const TDouble2Vec& errors) { - double norm{0.0}; - for (const auto& error : errors) { - norm += std::pow(error, 2.0); - m_Sign += error; - } - m_MeanErrorNorm.add(std::sqrt(norm)); + void update(double norm, double sign) { + m_MeanErrorNorm.add(norm); + m_Sign += sign; } //! Get the weight to apply to this anomaly on update. @@ -460,20 +456,18 @@ void CTimeSeriesAnomalyModel::updateAnomaly(const CModelProbabilityParams& param [tag](const CAnomaly& anomaly_) { return anomaly_.tag() == tag; }); if (probability < LARGEST_ANOMALOUS_PROBABILITY) { - m_MeanError.add(std::sqrt( - std::accumulate(errors.begin(), errors.end(), 0.0, - [](double n, double x) { return n + x * x; }))); - + double norm{std::sqrt( + std::accumulate(errors.begin(), errors.end(), 0.0, + [](double n, double x) { return n + x * x; }))}; + m_MeanError.add(norm); double scale{CBasicStatistics::mean(m_MeanError)}; - for (auto& error : errors) { - error = scale == 0.0 ? 1.0 : error / scale; - } - + norm = (scale == 0.0 ? 1.0 : norm / scale); + double sign{std::accumulate(errors.begin(), errors.end(), 0.0)}; if (anomaly == m_Anomalies.end()) { m_Anomalies.emplace_back(tag, this->scale(time)); anomaly = m_Anomalies.end() - 1; } - anomaly->update(errors); + anomaly->update(norm, sign); } else if (anomaly != m_Anomalies.end()) { this->sample(time, *anomaly, 1.0 - anomaly->weight(this->scale(time))); m_Anomalies.erase(anomaly); From 0229047f7546ae24bfacf00ddfd72c0a95c49c97 Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Wed, 9 May 2018 13:59:01 +0100 Subject: [PATCH 26/29] Standardise on CTools pow2 --- lib/maths/CCooccurrences.cc | 7 +-- lib/maths/CKMostCorrelated.cc | 6 +- lib/maths/CLogNormalMeanPrecConjugate.cc | 16 ++--- lib/maths/CLogTDistribution.cc | 21 +++---- lib/maths/CTimeSeriesDecompositionDetail.cc | 11 ++-- lib/maths/CTools.cc | 10 +-- lib/maths/CXMeansOnline1d.cc | 68 ++++++++++----------- 7 files changed, 60 insertions(+), 79 deletions(-) diff --git a/lib/maths/CCooccurrences.cc b/lib/maths/CCooccurrences.cc index 044f89f755..9c7655be04 100644 --- a/lib/maths/CCooccurrences.cc +++ b/lib/maths/CCooccurrences.cc @@ -53,11 +53,6 @@ struct SCooccurrence { using TMostSignificant = CBasicStatistics::COrderStatisticsHeap; -//! Compute \p x * \p x. -double pow2(double x) { - return x * x; -} - //! Generate a random projection in the positive orthant. //! //! \param[in] dimension The dimension. @@ -168,7 +163,7 @@ void seed(const TPackedBitVectorVec& indicators, TDoubleVec theta(n, 0.0); for (std::size_t i = 0u; i < n; ++i) { for (std::size_t j = 0u; j < projected.size(); ++j) { - theta[i] += pow2(projected[j][i]); + theta[i] += CTools::pow2(projected[j][i]); } theta[i] = std::acos(std::sqrt(theta[i])); } diff --git a/lib/maths/CKMostCorrelated.cc b/lib/maths/CKMostCorrelated.cc index b858157251..b9b5f20fee 100644 --- a/lib/maths/CKMostCorrelated.cc +++ b/lib/maths/CKMostCorrelated.cc @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -86,12 +87,9 @@ class CCloserThan : public std::unary_function { : m_Threshold(threshold), m_X(x) {} bool operator()(const TPointSizePr& y) const { - return pow2(bg::distance(m_X, y.first)) < m_Threshold; + return CTools::pow2(bg::distance(m_X, y.first)) < m_Threshold; } -private: - static double pow2(double x) { return x * x; } - private: double m_Threshold; TPoint m_X; diff --git a/lib/maths/CLogNormalMeanPrecConjugate.cc b/lib/maths/CLogNormalMeanPrecConjugate.cc index c5f7ac647f..a4d8271788 100644 --- a/lib/maths/CLogNormalMeanPrecConjugate.cc +++ b/lib/maths/CLogNormalMeanPrecConjugate.cc @@ -51,11 +51,6 @@ using TDoubleWeightsAry1Vec = maths_t::TDoubleWeightsAry1Vec; using TMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; using TMeanVarAccumulator = CBasicStatistics::SSampleMeanVar::TAccumulator; -//! Compute x * x. -inline double pow2(double x) { - return x * x; -} - const double MINIMUM_LOGNORMAL_SHAPE = 100.0; namespace detail { @@ -309,7 +304,7 @@ class CVarianceKernel { boost::math::normal normal(m_M, std::sqrt(1.0 / x(0) / m_P)); double fx = boost::math::pdf(normal, x(1)) * boost::math::pdf(gamma, x(0)); double m = std::exp(x(1) + 0.5 / x(0)); - result(0) = (m * m * (std::exp(1.0 / x(0)) - 1.0) + pow2(m - m_Mean)) * fx; + result(0) = (m * m * (std::exp(1.0 / x(0)) - 1.0) + CTools::pow2(m - m_Mean)) * fx; result(1) = fx; } catch (const std::exception& e) { LOG_ERROR(<< "Failed to calculate mean kernel: " << e.what() @@ -473,7 +468,7 @@ class CLogMarginalLikelihood : core::CNonCopyable { double impliedShape = m_Shape + 0.5 * m_NumberSamples; double impliedRate = m_Rate + 0.5 * (logSamplesSquareDeviation + m_Precision * weightedNumberSamples * - pow2(logSamplesMean - m_Mean) / + CTools::pow2(logSamplesMean - m_Mean) / (m_Precision + weightedNumberSamples)); result = m_Constant - impliedShape * std::log(impliedRate) - logSamplesSum; @@ -585,7 +580,7 @@ class CLogSampleSquareDeviation : core::CNonCopyable { } double n = maths_t::countForUpdate(m_Weights[i]); residual = std::log(residual + x) - m_Mean; - result += n * pow2(residual); + result += n * CTools::pow2(residual); } return true; } @@ -841,7 +836,7 @@ void CLogNormalMeanPrecConjugate::addSamples(const TDouble1Vec& samples, m_GammaShape += 0.5 * numberSamples; m_GammaRate += 0.5 * (logSamplesSquareDeviation + m_GaussianPrecision * scaledNumberSamples * - pow2(logSamplesMean - m_GaussianMean) / + CTools::pow2(logSamplesMean - m_GaussianMean) / (m_GaussianPrecision + scaledNumberSamples)); m_GaussianMean = (m_GaussianPrecision * m_GaussianMean + scaledNumberSamples * logSamplesMean) / @@ -873,7 +868,8 @@ void CLogNormalMeanPrecConjugate::addSamples(const TDouble1Vec& samples, // // From which we derive the results below. - double minimumRate = (2.0 * m_GammaShape - 1.0) * pow2(MINIMUM_COEFFICIENT_OF_VARIATION); + double minimumRate = (2.0 * m_GammaShape - 1.0) * + CTools::pow2(MINIMUM_COEFFICIENT_OF_VARIATION); if (m_GammaRate < minimumRate) { double extraVariation = (minimumRate - m_GammaRate) / diff --git a/lib/maths/CLogTDistribution.cc b/lib/maths/CLogTDistribution.cc index cad91f9a60..05e977fdb3 100644 --- a/lib/maths/CLogTDistribution.cc +++ b/lib/maths/CLogTDistribution.cc @@ -17,13 +17,6 @@ namespace ml { namespace maths { -namespace { - -inline double square(double x) { - return x * x; -} -} - CLogTDistribution::CLogTDistribution(double degreesFreedom, double location, double scale) : m_DegreesFreedom(degreesFreedom), m_Location(location), m_Scale(scale) { } @@ -74,16 +67,17 @@ double mode(const CLogTDistribution& distribution) { // x = exp(m - (n+1) / 2 + ((n+1)^2 / 4 - n * s^2) ^ (1/2)) double degreesFreedom = distribution.degreesFreedom(); - double squareScale = square(distribution.scale()); + double squareScale = CTools::pow2(distribution.scale()); - if (square(degreesFreedom + 1.0) < 4.0 * degreesFreedom * squareScale) { + if (CTools::pow2(degreesFreedom + 1.0) < 4.0 * degreesFreedom * squareScale) { return 0.0; } double location = distribution.location(); return std::exp(location - (degreesFreedom + 1.0) / 2.0 + - std::sqrt(square(degreesFreedom + 1.0) / 4.0 - degreesFreedom * squareScale)); + std::sqrt(CTools::pow2(degreesFreedom + 1.0) / 4.0 - + degreesFreedom * squareScale)); } CLogTDistribution::TOptionalDouble localMinimum(const CLogTDistribution& distribution) { @@ -96,16 +90,17 @@ CLogTDistribution::TOptionalDouble localMinimum(const CLogTDistribution& distrib // See the documentation in the mode function for more details. double degreesFreedom = distribution.degreesFreedom(); - double squareScale = square(distribution.scale()); + double squareScale = CTools::pow2(distribution.scale()); - if (square(degreesFreedom + 1.0) < 4.0 * degreesFreedom * squareScale) { + if (CTools::pow2(degreesFreedom + 1.0) < 4.0 * degreesFreedom * squareScale) { return CLogTDistribution::TOptionalDouble(); } double location = distribution.location(); return std::exp(location - (degreesFreedom + 1.0) / 2.0 - - std::sqrt(square(degreesFreedom + 1.0) / 4.0 - degreesFreedom * squareScale)); + std::sqrt(CTools::pow2(degreesFreedom + 1.0) / 4.0 - + degreesFreedom * squareScale)); } double pdf(const CLogTDistribution& distribution, double x) { diff --git a/lib/maths/CTimeSeriesDecompositionDetail.cc b/lib/maths/CTimeSeriesDecompositionDetail.cc index a8505b9774..961779bffa 100644 --- a/lib/maths/CTimeSeriesDecompositionDetail.cc +++ b/lib/maths/CTimeSeriesDecompositionDetail.cc @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -73,11 +74,6 @@ const core_t::TTime DAY = core::constants::DAY; const core_t::TTime WEEK = core::constants::WEEK; const core_t::TTime MONTH = 4 * WEEK; -//! Get the square of \p x. -double pow2(double x) { - return x * x; -} - //! Compute the mean of \p mean of \p components. template double meanOf(MEAN_FUNCTION mean, const TSeasonalComponentVec& components) { @@ -1748,8 +1744,9 @@ std::string CTimeSeriesDecompositionDetail::CComponents::CComponentErrors::toDel void CTimeSeriesDecompositionDetail::CComponents::CComponentErrors::add(double error, double prediction, double weight) { - double errorWithComponent{winsorise(pow2(error), m_MeanErrorWithComponent)}; - double errorWithoutComponent{winsorise(pow2(error - prediction), m_MeanErrorWithoutComponent)}; + double errorWithComponent{winsorise(CTools::pow2(error), m_MeanErrorWithComponent)}; + double errorWithoutComponent{winsorise(CTools::pow2(error - prediction), + m_MeanErrorWithoutComponent)}; m_MeanErrorWithComponent.add(errorWithComponent, weight); m_MeanErrorWithoutComponent.add(errorWithoutComponent, weight); } diff --git a/lib/maths/CTools.cc b/lib/maths/CTools.cc index bd0689f65e..17aa2cffdb 100644 --- a/lib/maths/CTools.cc +++ b/lib/maths/CTools.cc @@ -594,10 +594,10 @@ operator()(const lognormal& logNormal, double x, maths_t::ETail& tail) const { // + 2 * s^2 * (log(x) - m))^(1/2)) if x > mode double logx = std::log(x); - double squareScale = pow2(logNormal.scale()); - double discriminant = std::sqrt( - pow2(squareScale) + (logx - logNormal.location() + 2.0 * squareScale) * - (logx - logNormal.location())); + double squareScale = CTools::pow2(logNormal.scale()); + double discriminant = std::sqrt(CTools::pow2(squareScale) + + (logx - logNormal.location() + 2.0 * squareScale) * + (logx - logNormal.location())); double m = boost::math::mode(logNormal); this->tail(x, m, tail); double y = m * std::exp(x > m ? -discriminant : discriminant); @@ -1831,7 +1831,7 @@ double CTools::differentialEntropy(const lognormal& logNormal) { double location = logNormal.location(); double scale = logNormal.scale(); return 0.5 * std::log(boost::math::double_constants::two_pi * - boost::math::double_constants::e * pow2(scale)) + + boost::math::double_constants::e * CTools::pow2(scale)) + location; } diff --git a/lib/maths/CXMeansOnline1d.cc b/lib/maths/CXMeansOnline1d.cc index 30b98c4724..0938a9c6de 100644 --- a/lib/maths/CXMeansOnline1d.cc +++ b/lib/maths/CXMeansOnline1d.cc @@ -69,11 +69,6 @@ struct SClusterCentreLess { } }; -//! Get \p x time \p x. -double pow2(double x) { - return x * x; -} - //! Get the minimum of \p x, \p y and \p z. double min(double x, double y, double z) { return std::min(std::min(x, y), z); @@ -272,14 +267,15 @@ void BICGain(maths_t::EDataType dataType, } // Log-normal (method of moments) - double s = std::log(1.0 + v / pow2(m + logNormalOffset)); + double s = std::log(1.0 + v / CTools::pow2(m + logNormalOffset)); double l = std::log(m + logNormalOffset) - s / 2.0; // Gamma (method of moments) - double a = pow2(m + gammaOffset) / v; + double a = CTools::pow2(m + gammaOffset) / v; double b = (m + gammaOffset) / v; double smin = std::max(logNormalOffset, gammaOffset); - double vmin = std::min(MIN_RELATIVE_VARIANCE * std::max(v, pow2(smin)), MIN_ABSOLUTE_VARIANCE); + double vmin = std::min(MIN_RELATIVE_VARIANCE * std::max(v, CTools::pow2(smin)), + MIN_ABSOLUTE_VARIANCE); // Mixture of normals double wl = CBasicStatistics::count(mvl) / n; @@ -291,23 +287,27 @@ void BICGain(maths_t::EDataType dataType, try { // Mixture of log-normals (method of moments) - double sl = std::log(1.0 + vl / pow2(ml + logNormalOffset)); + double sl = std::log(1.0 + vl / CTools::pow2(ml + logNormalOffset)); double ll = std::log(ml + logNormalOffset) - sl / 2.0; - double sr = std::log(1.0 + vr / pow2(mr + logNormalOffset)); + double sr = std::log(1.0 + vr / CTools::pow2(mr + logNormalOffset)); double lr = std::log(mr + logNormalOffset) - sr / 2.0; // Mixture of gammas (method of moments) - double al = pow2(ml + gammaOffset) / vl; + double al = CTools::pow2(ml + gammaOffset) / vl; double bl = (ml + gammaOffset) / vl; - double ar = pow2(mr + gammaOffset) / vr; + double ar = CTools::pow2(mr + gammaOffset) / vr; double br = (mr + gammaOffset) / vr; double log2piv = std::log(boost::math::double_constants::two_pi * v); double log2pis = std::log(boost::math::double_constants::two_pi * s); double loggn = boost::math::lgamma(a) - a * std::log(b); - double log2pivl = std::log(boost::math::double_constants::two_pi * vl / pow2(wl)); - double log2pivr = std::log(boost::math::double_constants::two_pi * vr / pow2(wr)); - double log2pisl = std::log(boost::math::double_constants::two_pi * sl / pow2(wl)); - double log2pisr = std::log(boost::math::double_constants::two_pi * sr / pow2(wr)); + double log2pivl = + std::log(boost::math::double_constants::two_pi * vl / CTools::pow2(wl)); + double log2pivr = + std::log(boost::math::double_constants::two_pi * vr / CTools::pow2(wr)); + double log2pisl = + std::log(boost::math::double_constants::two_pi * sl / CTools::pow2(wl)); + double log2pisr = + std::log(boost::math::double_constants::two_pi * sr / CTools::pow2(wr)); double loggnl = boost::math::lgamma(al) - al * std::log(bl) - std::log(wl); double loggnr = boost::math::lgamma(ar) - ar * std::log(br) - std::log(wr); @@ -318,20 +318,20 @@ void BICGain(maths_t::EDataType dataType, if (vi == 0.0) { double li = std::log(mi + logNormalOffset); - ll1n += ni * ((vi + pow2(mi - m)) / v + log2piv); - ll1l += ni * (pow2(li - l) / s + 2.0 * li + log2pis); + ll1n += ni * ((vi + CTools::pow2(mi - m)) / v + log2piv); + ll1l += ni * (CTools::pow2(li - l) / s + 2.0 * li + log2pis); ll1g += ni * 2.0 * (b * (mi + gammaOffset) - (a - 1.0) * li + loggn); - ll2nl += ni * ((vi + pow2(mi - ml)) / vl + log2pivl); - ll2ll += ni * (pow2(li - ll) / sl + 2.0 * li + log2pisl); + ll2nl += ni * ((vi + CTools::pow2(mi - ml)) / vl + log2pivl); + ll2ll += ni * (CTools::pow2(li - ll) / sl + 2.0 * li + log2pisl); ll2gl += ni * 2.0 * (bl * (mi + gammaOffset) - (al - 1.0) * li + loggnl); } else { - double si = std::log(1.0 + vi / pow2(mi + logNormalOffset)); + double si = std::log(1.0 + vi / CTools::pow2(mi + logNormalOffset)); double li = std::log(mi + logNormalOffset) - si / 2.0; - ll1n += ni * ((vi + pow2(mi - m)) / v + log2piv); - ll1l += ni * ((si + pow2(li - l)) / s + 2.0 * li + log2pis); + ll1n += ni * ((vi + CTools::pow2(mi - m)) / v + log2piv); + ll1l += ni * ((si + CTools::pow2(li - l)) / s + 2.0 * li + log2pis); ll1g += ni * 2.0 * (b * (mi + gammaOffset) - (a - 1.0) * li + loggn); - ll2nl += ni * ((vi + pow2(mi - ml)) / vl + log2pivl); - ll2ll += ni * ((si + pow2(li - ll)) / sl + 2.0 * li + log2pisl); + ll2nl += ni * ((vi + CTools::pow2(mi - ml)) / vl + log2pivl); + ll2ll += ni * ((si + CTools::pow2(li - ll)) / sl + 2.0 * li + log2pisl); ll2gl += ni * 2.0 * (bl * (mi + gammaOffset) - (al - 1.0) * li + loggnl); } } @@ -343,20 +343,20 @@ void BICGain(maths_t::EDataType dataType, if (vi == 0.0) { double li = std::log(mi + logNormalOffset); - ll1n += ni * ((vi + pow2(mi - m)) / v + log2piv); - ll1l += ni * (pow2(li - l) / s + 2.0 * li + log2pis); + ll1n += ni * ((vi + CTools::pow2(mi - m)) / v + log2piv); + ll1l += ni * (CTools::pow2(li - l) / s + 2.0 * li + log2pis); ll1g += ni * 2.0 * (b * (mi + gammaOffset) - (a - 1.0) * li + loggn); - ll2nr += ni * ((vi + pow2(mi - mr)) / vr + log2pivr); - ll2lr += ni * (pow2(li - lr) / sr + 2.0 * li + log2pisr); + ll2nr += ni * ((vi + CTools::pow2(mi - mr)) / vr + log2pivr); + ll2lr += ni * (CTools::pow2(li - lr) / sr + 2.0 * li + log2pisr); ll2gr += ni * 2.0 * (br * (mi + gammaOffset) - (ar - 1.0) * li + loggnr); } else { - double si = std::log(1.0 + vi / pow2(mi + logNormalOffset)); + double si = std::log(1.0 + vi / CTools::pow2(mi + logNormalOffset)); double li = std::log(mi + logNormalOffset) - si / 2.0; - ll1n += ni * ((vi + pow2(mi - m)) / v + log2piv); - ll1l += ni * ((si + pow2(li - l)) / s + 2.0 * li + log2pis); + ll1n += ni * ((vi + CTools::pow2(mi - m)) / v + log2piv); + ll1l += ni * ((si + CTools::pow2(li - l)) / s + 2.0 * li + log2pis); ll1g += ni * 2.0 * (b * (mi + gammaOffset) - (a - 1.0) * li + loggn); - ll2nr += ni * ((vi + pow2(mi - mr)) / vr + log2pivr); - ll2lr += ni * ((si + pow2(li - lr)) / sr + 2.0 * li + log2pisr); + ll2nr += ni * ((vi + CTools::pow2(mi - mr)) / vr + log2pivr); + ll2lr += ni * ((si + CTools::pow2(li - lr)) / sr + 2.0 * li + log2pisr); ll2gr += ni * 2.0 * (br * (mi + gammaOffset) - (ar - 1.0) * li + loggnr); } } From f3e03722073b8d154a2c9b655d4ab2bbf241116e Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Wed, 9 May 2018 14:44:21 +0100 Subject: [PATCH 27/29] Fix tests and formatting --- include/maths/CPriorDetail.h | 6 +- include/maths/CTimeSeriesDecomposition.h | 2 +- .../maths/CTimeSeriesDecompositionDetail.h | 10 +- include/maths/CTimeSeriesModel.h | 14 +- include/maths/Constants.h | 8 +- lib/maths/CRestoreParams.cc | 2 +- lib/maths/CTimeSeriesChangeDetector.cc | 10 +- lib/maths/CTimeSeriesDecomposition.cc | 2 + lib/maths/CTimeSeriesDecompositionDetail.cc | 9 +- lib/maths/CTimeSeriesModel.cc | 165 +++++++++--------- .../unittest/CTimeSeriesChangeDetectorTest.cc | 4 +- lib/maths/unittest/CTimeSeriesModelTest.cc | 51 +++--- lib/maths/unittest/CTrendComponentTest.cc | 2 +- 13 files changed, 158 insertions(+), 127 deletions(-) diff --git a/include/maths/CPriorDetail.h b/include/maths/CPriorDetail.h index 3a9c3c1006..03087a6e05 100644 --- a/include/maths/CPriorDetail.h +++ b/include/maths/CPriorDetail.h @@ -30,7 +30,7 @@ bool CPrior::expectation(const F& f, double x{interval.first}; double dx{(interval.second - interval.first) / n}; - double normalizationFactor{0.0}; + double Z{0.0}; TDoubleWeightsAry1Vec weights{weight}; CPrior::CLogMarginalLikelihood logLikelihood(*this, weights); CCompositeFunctions::CExp likelihood(logLikelihood); @@ -44,9 +44,9 @@ bool CPrior::expectation(const F& f, return false; } result += productIntegral; - normalizationFactor += likelihoodIntegral; + Z += likelihoodIntegral; } - result /= normalizationFactor; + result /= Z; return true; } diff --git a/include/maths/CTimeSeriesDecomposition.h b/include/maths/CTimeSeriesDecomposition.h index 0b5233468e..38c72c8bb0 100644 --- a/include/maths/CTimeSeriesDecomposition.h +++ b/include/maths/CTimeSeriesDecomposition.h @@ -60,7 +60,7 @@ class MATHS_EXPORT CTimeSeriesDecomposition : public CTimeSeriesDecompositionInt //! use estimate a seasonal component. explicit CTimeSeriesDecomposition(double decayRate = 0.0, core_t::TTime bucketLength = 0, - std::size_t seasonalComponentSize = DECOMPOSITION_COMPONENT_SIZE); + std::size_t seasonalComponentSize = COMPONENT_SIZE); //! Construct from part of a state document. CTimeSeriesDecomposition(const STimeSeriesDecompositionRestoreParams& params, diff --git a/include/maths/CTimeSeriesDecompositionDetail.h b/include/maths/CTimeSeriesDecompositionDetail.h index d0bb4670be..7b99bd6711 100644 --- a/include/maths/CTimeSeriesDecompositionDetail.h +++ b/include/maths/CTimeSeriesDecompositionDetail.h @@ -176,6 +176,10 @@ class MATHS_EXPORT CTimeSeriesDecompositionDetail { //! \brief Scans through increasingly low frequencies looking for custom //! diurnal and any other large amplitude seasonal components. class MATHS_EXPORT CPeriodicityTest : public CHandler { + public: + //! Test types (categorised as short and long period tests). + enum ETest { E_Short, E_Long }; + public: CPeriodicityTest(double decayRate, core_t::TTime bucketLength); CPeriodicityTest(const CPeriodicityTest& other, bool isForForecast = false); @@ -198,6 +202,9 @@ class MATHS_EXPORT CTimeSeriesDecompositionDetail { //! Test to see whether any seasonal components are present. void test(const SAddValue& message); + //! Clear the test identified by \p test. + void clear(ETest test, core_t::TTime time); + //! Age the test to account for the interval \p end - \p start //! elapsed time. void propagateForwards(core_t::TTime start, core_t::TTime end); @@ -216,9 +223,6 @@ class MATHS_EXPORT CTimeSeriesDecompositionDetail { using TExpandingWindowPtr = std::shared_ptr; using TExpandingWindowPtrAry = boost::array; - //! Test types (categorised as short and long period tests). - enum ETest { E_Short, E_Long }; - private: //! The bucket lengths to use to test for short period components. static const TTimeVec SHORT_BUCKET_LENGTHS; diff --git a/include/maths/CTimeSeriesModel.h b/include/maths/CTimeSeriesModel.h index 16cc62e6f0..8574c1df5e 100644 --- a/include/maths/CTimeSeriesModel.h +++ b/include/maths/CTimeSeriesModel.h @@ -30,19 +30,21 @@ struct SChangeDescription; struct SDistributionRestoreParams; struct SModelRestoreParams; +namespace winsorisation { //! Computes a Winsorisation weight for \p value based on its //! one tail p-value. MATHS_EXPORT -double tailWinsorisationWeight(const CPrior& prior, double derate, double scale, double value); +double tailWeight(const CPrior& prior, double derate, double scale, double value); //! Computes a Winsorisation weight for \p value based on its //! marginal for \p dimension one tail p-value. MATHS_EXPORT -double tailWinsorisationWeight(const CMultivariatePrior& prior, - std::size_t dimension, - double derate, - double scale, - const core::CSmallVector& value); +double tailWeight(const CMultivariatePrior& prior, + std::size_t dimension, + double derate, + double scale, + const core::CSmallVector& value); +} //! \brief A CModel implementation for modeling a univariate time series. class MATHS_EXPORT CUnivariateTimeSeriesModel : public CModel { diff --git a/include/maths/Constants.h b/include/maths/Constants.h index 0f11c0fdf9..18707acc67 100644 --- a/include/maths/Constants.h +++ b/include/maths/Constants.h @@ -86,6 +86,10 @@ const double COMPONENT_STATISTICALLY_SIGNIFICANT{0.001}; const double LOG_COMPONENT_STATISTICALLY_SIGNIFICANCE{ std::log(COMPONENT_STATISTICALLY_SIGNIFICANT)}; +//! The default number of regression models used in periodic and +//! calendar cyclic components of the trend decomposition. +const std::size_t COMPONENT_SIZE{36u}; + //! The minimum variance scale for which the likelihood function //! can be accurately adjusted. For smaller scales errors are //! introduced for some priors. @@ -96,10 +100,6 @@ const double MINIMUM_ACCURATE_VARIANCE_SCALE{0.5}; //! introduced for some priors. const double MAXIMUM_ACCURATE_VARIANCE_SCALE{2.0}; -//! The default number of regression models used in periodic and -//! calendar cyclic components of the trend decomposition. -const std::size_t DECOMPOSITION_COMPONENT_SIZE{36u}; - //! The confidence interval to use for the seasonal trend and //! variation. We detrend to the nearest point in the confidence //! interval and use the upper confidence interval variance when diff --git a/lib/maths/CRestoreParams.cc b/lib/maths/CRestoreParams.cc index d842095b9a..340f4902d4 100644 --- a/lib/maths/CRestoreParams.cc +++ b/lib/maths/CRestoreParams.cc @@ -34,7 +34,7 @@ STimeSeriesDecompositionRestoreParams::STimeSeriesDecompositionRestoreParams( core_t::TTime minimumBucketLength, const SDistributionRestoreParams& changeModelParams) : s_DecayRate{decayRate}, s_MinimumBucketLength{minimumBucketLength}, - s_ComponentSize{DECOMPOSITION_COMPONENT_SIZE}, s_ChangeModelParams{changeModelParams} { + s_ComponentSize{COMPONENT_SIZE}, s_ChangeModelParams{changeModelParams} { } SModelRestoreParams::SModelRestoreParams(const CModelParams& params, diff --git a/lib/maths/CTimeSeriesChangeDetector.cc b/lib/maths/CTimeSeriesChangeDetector.cc index e59eec380b..8dad8364c8 100644 --- a/lib/maths/CTimeSeriesChangeDetector.cc +++ b/lib/maths/CTimeSeriesChangeDetector.cc @@ -56,6 +56,7 @@ const double EXPECTED_EVIDENCE_THRESHOLD_MULTIPLIER{0.9}; const std::size_t COUNT_TO_INITIALIZE{5u}; const double MINIMUM_SCALE{0.1}; const double MAXIMUM_SCALE{10.0}; +const double WINSORISATION_DERATE{1.0}; } SChangeDescription::SChangeDescription(EDescription description, double value, const TPriorPtr& residualModel) @@ -464,7 +465,8 @@ void CUnivariateLevelShiftModel::addSamples(const std::size_t count, double value{samples_[i].second}; double seasonalScale{maths_t::seasonalVarianceScale(weights[i])}; double sample{trendModel.detrend(time, value, 0.0) - shift}; - double weight{tailWinsorisationWeight(residualModel, 1.0, seasonalScale, sample)}; + double weight{winsorisation::tailWeight( + residualModel, WINSORISATION_DERATE, seasonalScale, sample)}; samples.push_back(sample); maths_t::setWinsorisationWeight(weight, weights[i]); m_SampleCount += maths_t::count(weights[i]); @@ -577,7 +579,8 @@ void CUnivariateLinearScaleModel::addSamples(const std::size_t count, double seasonalScale{maths_t::seasonalVarianceScale(weights[i])}; double prediction{CBasicStatistics::mean(trendModel.value(time, 0.0))}; double sample{value - scale * prediction}; - double weight{tailWinsorisationWeight(residualModel, 1.0, seasonalScale, sample)}; + double weight{winsorisation::tailWeight( + residualModel, WINSORISATION_DERATE, seasonalScale, sample)}; samples.push_back(sample); maths_t::setWinsorisationWeight(weight, weights[i]); m_SampleCount += maths_t::count(weights[i]); @@ -659,7 +662,8 @@ void CUnivariateTimeShiftModel::addSamples(const std::size_t count, double value{samples_[i].second}; double seasonalScale{maths_t::seasonalVarianceScale(weights[i])}; double sample{this->trendModel().detrend(time + m_Shift, value, 0.0)}; - double weight{tailWinsorisationWeight(residualModel, 1.0, seasonalScale, sample)}; + double weight{winsorisation::tailWeight( + residualModel, WINSORISATION_DERATE, seasonalScale, sample)}; samples.push_back(sample); maths_t::setWinsorisationWeight(weight, weights[i]); } diff --git a/lib/maths/CTimeSeriesDecomposition.cc b/lib/maths/CTimeSeriesDecomposition.cc index 288fafbea3..dbd352d3a4 100644 --- a/lib/maths/CTimeSeriesDecomposition.cc +++ b/lib/maths/CTimeSeriesDecomposition.cc @@ -252,9 +252,11 @@ bool CTimeSeriesDecomposition::applyChange(core_t::TTime time, switch (change.s_Description) { case SChangeDescription::E_LevelShift: m_Components.shiftLevel(time, value, change.s_Value[0]); + m_PeriodicityTest.clear(CPeriodicityTest::E_Short, time); break; case SChangeDescription::E_LinearScale: m_Components.linearScale(time, change.s_Value[0]); + m_PeriodicityTest.clear(CPeriodicityTest::E_Short, time); break; case SChangeDescription::E_TimeShift: m_TimeShift += static_cast(change.s_Value[0]); diff --git a/lib/maths/CTimeSeriesDecompositionDetail.cc b/lib/maths/CTimeSeriesDecompositionDetail.cc index 961779bffa..e06d69bba5 100644 --- a/lib/maths/CTimeSeriesDecompositionDetail.cc +++ b/lib/maths/CTimeSeriesDecompositionDetail.cc @@ -578,6 +578,13 @@ void CTimeSeriesDecompositionDetail::CPeriodicityTest::test(const SAddValue& mes } } +void CTimeSeriesDecompositionDetail::CPeriodicityTest::clear(ETest test, core_t::TTime time) { + if (m_Windows[test] != nullptr) { + m_Windows[test].reset(this->newWindow(test)); + m_Windows[test]->initialize(time); + } +} + void CTimeSeriesDecompositionDetail::CPeriodicityTest::propagateForwards(core_t::TTime start, core_t::TTime end) { stepwisePropagateForwards(DAY, start, end, m_Windows[E_Short]); @@ -1127,7 +1134,7 @@ void CTimeSeriesDecompositionDetail::CComponents::handle(const SAddValue& messag : CTools::logisticFunction(v1 / vt, 0.1, 1.0, -1.0))}; m_UsingTrendForPrediction = (p >= 0.25); if (m_UsingTrendForPrediction) { - LOG_DEBUG("Detected trend at " << time); + LOG_DEBUG(<< "Detected trend at " << time); } *m_Watcher = m_UsingTrendForPrediction; } diff --git a/lib/maths/CTimeSeriesModel.cc b/lib/maths/CTimeSeriesModel.cc index da0c8087dc..1455b599f3 100644 --- a/lib/maths/CTimeSeriesModel.cc +++ b/lib/maths/CTimeSeriesModel.cc @@ -68,36 +68,7 @@ const double MINIMUM_CORRELATE_PRIOR_SAMPLE_COUNT{24.0}; const std::size_t SLIDING_WINDOW_SIZE{12u}; const TSize10Vec NOTHING_TO_MARGINALIZE; const TSizeDoublePr10Vec NOTHING_TO_CONDITION; -const double WINSORISED_FRACTION{1e-2}; -const double MINIMUM_WINSORISATION_WEIGHT_FRACTION{1e-10}; -const double MINIMUM_TAIL_WINSORISATION_WEIGHT{1e-2}; -const double MINIMUM_CHANGE_WINSORISATION_WEIGHT{1e-1}; -const double LOG_WINSORISED_FRACTION{std::log(WINSORISED_FRACTION)}; -const double LOG_MINIMUM_WEIGHT_FRACTION{std::log(MINIMUM_WINSORISATION_WEIGHT_FRACTION)}; -const double LOG_MINIMUM_TAIL_WINSORISATION_WEIGHT{std::log(MINIMUM_TAIL_WINSORISATION_WEIGHT)}; -const double MINUS_LOG_TOLERANCE{ - -std::log(1.0 - 100.0 * std::numeric_limits::epsilon())}; - -//! Derate the minimum Winsorisation weight. -double deratedMinimumWinsorisationWeight(double derate) { - derate = CTools::truncate(derate, 0.0, 1.0); - return MINIMUM_TAIL_WINSORISATION_WEIGHT + - (0.5 - MINIMUM_TAIL_WINSORISATION_WEIGHT) * derate; -} - -//! Get the one tail p-value from a specified Winsorisation weight. -double pValueFromTailWinsorisationWeight(double weight) { - if (weight >= 1.0) { - return 1.0; - } - - double logw{std::log(std::max(weight, MINIMUM_TAIL_WINSORISATION_WEIGHT))}; - return std::exp( - 0.5 * (LOG_WINSORISED_FRACTION - - std::sqrt(CTools::pow2(LOG_WINSORISED_FRACTION) + - 4.0 * logw / LOG_MINIMUM_TAIL_WINSORISATION_WEIGHT * LOG_MINIMUM_WEIGHT_FRACTION * - (LOG_MINIMUM_WEIGHT_FRACTION - LOG_WINSORISED_FRACTION)))); -} +const double CHANGE_P_VALUE{1e-5}; //! Optionally randomly sample from \p indices. TOptionalSize randomlySample(CPRNG::CXorOShiro128Plus& rng, @@ -120,18 +91,6 @@ TOptionalSize randomlySample(CPRNG::CXorOShiro128Plus& rng, return TOptionalSize{}; } -//! Computes a Winsorisation weight based on the chance that the -//! time series is currently undergoing a change. -double changeWinsorisationWeight(const TChangeDetectorPtr& detector) { - if (detector != nullptr) { - std::size_t dummy; - return std::max(CTools::logisticFunction(detector->decisionFunction(dummy), - 0.1, 1.0, -1.0), - MINIMUM_CHANGE_WINSORISATION_WEIGHT); - } - return 1.0; -} - //! Convert \p value to comma separated string. std::string toDelimited(const TTimeDoublePr& value) { return core::CStringUtils::typeToString(value.first) + ',' + @@ -196,55 +155,97 @@ const std::string ERROR_MULTIVARIATE("Forecast not supported for multivariate fe } } -double tailWinsorisationWeight(const CPrior& prior, double derate, double scale, double value) { - double deratedMinimumWeight{deratedMinimumWinsorisationWeight(derate)}; +namespace winsorisation { +namespace { +const double MAXIMUM_P_VALUE{1e-3}; +const double MINIMUM_P_VALUE{1e-10}; +const double MINIMUM_WEIGHT{1e-2}; +const double LOG_MAXIMUM_P_VALUE{std::log(MAXIMUM_P_VALUE)}; +const double LOG_MINIMUM_P_VALUE{std::log(MINIMUM_P_VALUE)}; +const double LOG_MINIMUM_WEIGHT{std::log(MINIMUM_WEIGHT)}; +const double MINUS_LOG_TOLERANCE{ + -std::log(1.0 - 100.0 * std::numeric_limits::epsilon())}; + +//! Derate the minimum Winsorisation weight. +double deratedMinimumWeight(double derate) { + derate = CTools::truncate(derate, 0.0, 1.0); + return MINIMUM_WEIGHT + (0.5 - MINIMUM_WEIGHT) * derate; +} + +//! Get the one tail p-value from a specified Winsorisation weight. +double pValueFromWeight(double weight) { + if (weight >= 1.0) { + return 1.0; + } + + double logw{std::log(std::max(weight, MINIMUM_WEIGHT))}; + return std::exp(0.5 * (LOG_MAXIMUM_P_VALUE - + std::sqrt(CTools::pow2(LOG_MAXIMUM_P_VALUE) + + 4.0 * logw / LOG_MINIMUM_WEIGHT * LOG_MINIMUM_P_VALUE * + (LOG_MINIMUM_P_VALUE - LOG_MAXIMUM_P_VALUE)))); +} + +//! Computes a Winsorisation weight based on the chance that the +//! time series is currently undergoing a change. +double changeWeight(const TChangeDetectorPtr& detector) { + if (detector != nullptr) { + std::size_t dummy; + return std::max(CTools::logisticFunction(detector->decisionFunction(dummy), + 0.1, 1.0, -1.0), + MINIMUM_WEIGHT); + } + return 1.0; +} +} + +double tailWeight(const CPrior& prior, double derate, double scale, double value) { + double minimumWeight{deratedMinimumWeight(derate)}; + double f{}; double lowerBound; double upperBound; if (!prior.minusLogJointCdf({value}, {maths_t::seasonalVarianceScaleWeight(scale)}, lowerBound, upperBound)) { return 1.0; - } - if (upperBound < MINUS_LOG_TOLERANCE && - !prior.minusLogJointCdfComplement( - {value}, {maths_t::seasonalVarianceScaleWeight(scale)}, lowerBound, upperBound)) { + } else if (upperBound >= MINUS_LOG_TOLERANCE) { + f = std::exp(-(lowerBound + upperBound) / 2.0); + f = std::min(f, 1.0 - f); + } else if (!prior.minusLogJointCdfComplement( + {value}, {maths_t::seasonalVarianceScaleWeight(scale)}, lowerBound, upperBound)) { return 1.0; + } else { + f = std::exp(-(lowerBound + upperBound) / 2.0); } - double f{std::exp(-(lowerBound + upperBound) / 2.0)}; - f = std::min(f, 1.0 - f); - if (f >= WINSORISED_FRACTION) { + if (f >= MAXIMUM_P_VALUE) { return 1.0; } - if (f <= MINIMUM_WINSORISATION_WEIGHT_FRACTION) { - return deratedMinimumWeight; + if (f <= MINIMUM_P_VALUE) { + return minimumWeight; } - // We interpolate between 1.0 and the minimum weight on the - // interval [WINSORISED_FRACTION, MINIMUM_WEIGHT_FRACTION] - // by fitting (f / WF)^(-c log(f)) where WF is the Winsorised - // fraction and c is determined by solving: - // MW = (MWF / WF)^(-c log(MWF)) + // We logarithmically interpolate between 1.0 and the minimum weight + // on the interval [MAXIMUM_P_VALUE, MINIMUM_P_VALUE]. - double deratedExponent{-std::log(deratedMinimumWeight) / LOG_MINIMUM_WEIGHT_FRACTION / - (LOG_MINIMUM_WEIGHT_FRACTION - LOG_WINSORISED_FRACTION)}; + double maximumExponent{-std::log(minimumWeight) / LOG_MINIMUM_P_VALUE / + (LOG_MINIMUM_P_VALUE - LOG_MAXIMUM_P_VALUE)}; double logf{std::log(f)}; - double result{std::exp(-deratedExponent * logf * (logf - LOG_WINSORISED_FRACTION))}; + double result{std::exp(-maximumExponent * logf * (logf - LOG_MAXIMUM_P_VALUE))}; if (CMathsFuncs::isNan(result)) { return 1.0; } - LOG_TRACE("sample = " << value << " min(F, 1-F) = " << f << ", weight = " << result); + LOG_TRACE(<< "sample = " << value << " min(F, 1-F) = " << f << ", weight = " << result); return result; } -double tailWinsorisationWeight(const CMultivariatePrior& prior, - std::size_t dimension, - double derate, - double scale, - const core::CSmallVector& value) { +double tailWeight(const CMultivariatePrior& prior, + std::size_t dimension, + double derate, + double scale, + const core::CSmallVector& value) { std::size_t dimensions = prior.dimension(); TSizeDoublePr10Vec condition(dimensions - 1); for (std::size_t i = 0u, j = 0u; i < dimensions; ++i) { @@ -254,7 +255,8 @@ double tailWinsorisationWeight(const CMultivariatePrior& prior, } std::shared_ptr conditional( prior.univariate(NOTHING_TO_MARGINALIZE, condition).first); - return tailWinsorisationWeight(*conditional, derate, scale, value[dimension]); + return tailWeight(*conditional, derate, scale, value[dimension]); +} } //! \brief A model of anomalous sections of a time series. @@ -457,8 +459,8 @@ void CTimeSeriesAnomalyModel::updateAnomaly(const CModelProbabilityParams& param if (probability < LARGEST_ANOMALOUS_PROBABILITY) { double norm{std::sqrt( - std::accumulate(errors.begin(), errors.end(), 0.0, - [](double n, double x) { return n + x * x; }))}; + std::accumulate(errors.begin(), errors.end(), 0.0, + [](double n, double x) { return n + x * x; }))}; m_MeanError.add(norm); double scale{CBasicStatistics::mean(m_MeanError)}; norm = (scale == 0.0 ? 1.0 : norm / scale); @@ -781,8 +783,7 @@ void CUnivariateTimeSeriesModel::skipTime(core_t::TTime gap) { } CUnivariateTimeSeriesModel::TDouble2Vec -CUnivariateTimeSeriesModel::mode(core_t::TTime time, - const TDouble2VecWeightsAry& weights) const { +CUnivariateTimeSeriesModel::mode(core_t::TTime time, const TDouble2VecWeightsAry& weights) const { return {m_ResidualModel->marginalLikelihoodMode(unpack(weights)) + CBasicStatistics::mean(m_TrendModel->value(time))}; } @@ -1030,7 +1031,8 @@ bool CUnivariateTimeSeriesModel::probability(const CModelProbabilityParams& para // Declared outside the loop to minimize the number of times they are created. TSize10Vec variable(1); TDouble10Vec1Vec sample{TDouble10Vec(2)}; - maths_t::TDouble10VecWeightsAry1Vec weights{maths_t::CUnitWeights::unit(2)}; + maths_t::TDouble10VecWeightsAry1Vec weights{ + maths_t::CUnitWeights::unit(2)}; TDouble2Vec probabilityBucketEmpty(2); TDouble10Vec2Vec pli, pui; TTail10Vec ti; @@ -1121,8 +1123,8 @@ CUnivariateTimeSeriesModel::winsorisationWeight(double derate, const TDouble2Vec& value) const { double scale{this->seasonalWeight(0.0, time)[0]}; double sample{m_TrendModel->detrend(time, value[0], 0.0)}; - return {tailWinsorisationWeight(*m_ResidualModel, derate, scale, sample) * - changeWinsorisationWeight(m_ChangeDetector)}; + return {winsorisation::tailWeight(*m_ResidualModel, derate, scale, sample) * + winsorisation::changeWeight(m_ChangeDetector)}; } CUnivariateTimeSeriesModel::TDouble2Vec @@ -1294,8 +1296,8 @@ void CUnivariateTimeSeriesModel::reinitializeResidualModel(double learnRate, residualModel.setToNonInformative(0.0, residualModel.decayRate()); if (!slidingWindow.empty()) { double slidingWindowLength{static_cast(slidingWindow.size())}; - maths_t::TDoubleWeightsAry1Vec weight{ - maths_t::countWeight(std::max(learnRate, std::min(5.0 / slidingWindowLength, 1.0)))}; + maths_t::TDoubleWeightsAry1Vec weight{maths_t::countWeight( + std::max(learnRate, std::min(5.0 / slidingWindowLength, 1.0)))}; for (const auto& value : slidingWindow) { TDouble1Vec sample{trend->detrend(value.first, value.second, 0.0)}; residualModel.addSamples(sample, weight); @@ -1352,7 +1354,7 @@ CUnivariateTimeSeriesModel::testAndApplyChange(const CModelAddSamplesParams& par core_t::TTime maximumTimeToTest{this->params().maximumTimeToTestForChange()}; double weight{maths_t::winsorisationWeight(weights)}; if (minimumTimeToDetect < maximumTimeToTest && - pValueFromTailWinsorisationWeight(weight) <= 1e-5) { + winsorisation::pValueFromWeight(weight) <= CHANGE_P_VALUE) { m_CurrentChangeInterval += this->params().bucketLength(); if (this->params().testForChange(m_CurrentChangeInterval)) { m_ChangeDetector = std::make_shared( @@ -1366,7 +1368,7 @@ CUnivariateTimeSeriesModel::testAndApplyChange(const CModelAddSamplesParams& par } if (m_ChangeDetector != nullptr) { - m_ChangeDetector->addSamples({std::make_pair(time, values[median].second[0])}, {weights}); + m_ChangeDetector->addSamples({{time, values[median].second[0]}}, {weights}); if (m_ChangeDetector->stopTesting()) { m_ChangeDetector.reset(); @@ -2161,8 +2163,7 @@ CMultivariateTimeSeriesModel::correlateModes(core_t::TTime /*time*/, CMultivariateTimeSeriesModel::TDouble2Vec1Vec CMultivariateTimeSeriesModel::residualModes(const TDouble2VecWeightsAry& weights) const { - TDouble10Vec1Vec modes( - m_ResidualModel->marginalLikelihoodModes(unpack(weights))); + TDouble10Vec1Vec modes(m_ResidualModel->marginalLikelihoodModes(unpack(weights))); TDouble2Vec1Vec result; result.reserve(modes.size()); for (const auto& mode : modes) { @@ -2380,7 +2381,7 @@ CMultivariateTimeSeriesModel::winsorisationWeight(double derate, } for (std::size_t d = 0u; d < dimension; ++d) { - result[d] = tailWinsorisationWeight(*m_ResidualModel, d, derate, scale[d], sample); + result[d] = winsorisation::tailWeight(*m_ResidualModel, d, derate, scale[d], sample); } return result; diff --git a/lib/maths/unittest/CTimeSeriesChangeDetectorTest.cc b/lib/maths/unittest/CTimeSeriesChangeDetectorTest.cc index d9e926af1f..c87863fd81 100644 --- a/lib/maths/unittest/CTimeSeriesChangeDetectorTest.cc +++ b/lib/maths/unittest/CTimeSeriesChangeDetectorTest.cc @@ -145,14 +145,14 @@ void CTimeSeriesChangeDetectorTest::testLevelShift() { TGeneratorVec trends{constant, ramp, smoothDaily, weekends, spikeyDaily}; this->testChange( trends, maths::SChangeDescription::E_LevelShift, - [](TGenerator trend, core_t::TTime time) { return trend(time) + 0.5; }, 5.0, 15.0); + [](TGenerator trend, core_t::TTime time) { return trend(time) + 0.5; }, 5.0, 16.0); } void CTimeSeriesChangeDetectorTest::testLinearScale() { TGeneratorVec trends{smoothDaily, spikeyDaily}; this->testChange( trends, maths::SChangeDescription::E_LinearScale, - [](TGenerator trend, core_t::TTime time) { return 3.0 * trend(time); }, 3.0, 15.0); + [](TGenerator trend, core_t::TTime time) { return 3.0 * trend(time); }, 3.0, 16.0); } void CTimeSeriesChangeDetectorTest::testTimeShift() { diff --git a/lib/maths/unittest/CTimeSeriesModelTest.cc b/lib/maths/unittest/CTimeSeriesModelTest.cc index 21a383dedc..100218086b 100644 --- a/lib/maths/unittest/CTimeSeriesModelTest.cc +++ b/lib/maths/unittest/CTimeSeriesModelTest.cc @@ -1059,7 +1059,8 @@ void CTimeSeriesModelTest::testProbability() { core_t::TTime time{0}; { - const TDouble2VecWeightsAryVec weight{maths_t::CUnitWeights::unit(1)}; + const TDouble2VecWeightsAryVec weight{ + maths_t::CUnitWeights::unit(1)}; for (auto sample : samples) { double trend{5.0 + 5.0 * std::sin(boost::math::double_constants::two_pi * static_cast(time) / 86400.0)}; @@ -1276,8 +1277,9 @@ void CTimeSeriesModelTest::testProbability() { double probability; bool conditional; TSize1Vec mostAnomalousCorrelate; - model.probability(computeProbabilityParams(weights[0]), {{time}}, {{sample}}, - probability, tail, conditional, mostAnomalousCorrelate); + model.probability(computeProbabilityParams(weights[0]), {{time}}, + {{sample}}, probability, tail, conditional, + mostAnomalousCorrelate); smallest.add({probability, bucket - 1}); time += bucketLength; } @@ -1802,8 +1804,9 @@ void CTimeSeriesModelTest::testAnomalyModel() { double probability; bool conditional; TSize1Vec mostAnomalousCorrelate; - model.probability(computeProbabilityParams(weights[0]), {{time}}, {{sample}}, - probability, tail, conditional, mostAnomalousCorrelate); + model.probability(computeProbabilityParams(weights[0]), {{time}}, + {{sample}}, probability, tail, conditional, + mostAnomalousCorrelate); mostAnomalous.add({std::log(probability), bucket}); //scores.push_back(maths::CTools::deviation(probability)); time += bucketLength; @@ -1875,8 +1878,9 @@ void CTimeSeriesModelTest::testAnomalyModel() { double probability; bool conditional; TSize1Vec mostAnomalousCorrelate; - model.probability(computeProbabilityParams(weights[0]), {{time}}, {(sample)}, - probability, tail, conditional, mostAnomalousCorrelate); + model.probability(computeProbabilityParams(weights[0]), {{time}}, + {(sample)}, probability, tail, conditional, + mostAnomalousCorrelate); mostAnomalous.add({std::log(probability), bucket}); //scores.push_back(maths::CTools::deviation(probability)); time += bucketLength; @@ -1926,7 +1930,8 @@ void CTimeSeriesModelTest::testStepChangeDiscontinuities() { TDouble2VecWeightsAryVec weight{maths_t::CUnitWeights::unit(1)}; auto updateModel = [&](core_t::TTime time, double value, maths::CUnivariateTimeSeriesModel& model) { - maths_t::setWinsorisationWeight(model.winsorisationWeight(0.0, time, {value}), weight[0]); + maths_t::setWinsorisationWeight( + model.winsorisationWeight(0.0, time, {value}), weight[0]); model.addSamples(addSampleParams(1.0, weight), {core::make_triple(time, TDouble2Vec{value}, TAG)}); }; @@ -2114,7 +2119,8 @@ void CTimeSeriesModelTest::testLinearScaling() { TDouble2VecWeightsAryVec weight{maths_t::CUnitWeights::unit(1)}; auto updateModel = [&](core_t::TTime time, double value, maths::CUnivariateTimeSeriesModel& model) { - maths_t::setWinsorisationWeight(model.winsorisationWeight(0.0, time, {value}), weight[0]); + maths_t::setWinsorisationWeight( + model.winsorisationWeight(0.0, time, {value}), weight[0]); model.addSamples(addSampleParams(1.0, weight), {core::make_triple(time, TDouble2Vec{value}, TAG)}); }; @@ -2167,7 +2173,8 @@ void CTimeSeriesModelTest::testLinearScaling() { sample = 0.3 * (12.0 + 10.0 * smoothDaily(time) + sample); updateModel(time, sample, model); //updateTestDebug(time, sample, model); - auto x = model.confidenceInterval(time, 90.0, maths_t::CUnitWeights::unit(1)); + auto x = model.confidenceInterval( + time, 90.0, maths_t::CUnitWeights::unit(1)); CPPUNIT_ASSERT(::fabs(sample - x[1][0]) < 1.2 * std::sqrt(noiseVariance)); CPPUNIT_ASSERT(::fabs(x[2][0] - x[0][0]) < 3.3 * std::sqrt(noiseVariance)); time += bucketLength; @@ -2187,9 +2194,10 @@ void CTimeSeriesModelTest::testLinearScaling() { sample = 2.0 * (12.0 + 10.0 * smoothDaily(time)) + sample; updateModel(time, sample, model); //updateTestDebug(time, sample, model); - auto x = model.confidenceInterval(time, 90.0, maths_t::CUnitWeights::unit(1)); - CPPUNIT_ASSERT(::fabs(sample - x[1][0]) < 3.1 * std::sqrt(noiseVariance)); - CPPUNIT_ASSERT(::fabs(x[2][0] - x[0][0]) < 3.3 * std::sqrt(noiseVariance)); + auto x = model.confidenceInterval( + time, 90.0, maths_t::CUnitWeights::unit(1)); + CPPUNIT_ASSERT(std::fabs(sample - x[1][0]) < 3.1 * std::sqrt(noiseVariance)); + CPPUNIT_ASSERT(std::fabs(x[2][0] - x[0][0]) < 3.3 * std::sqrt(noiseVariance)); time += bucketLength; } @@ -2203,7 +2211,8 @@ void CTimeSeriesModelTest::testDaylightSaving() { TDouble2VecWeightsAryVec weight{maths_t::CUnitWeights::unit(1)}; auto updateModel = [&](core_t::TTime time, double value, maths::CUnivariateTimeSeriesModel& model) { - maths_t::setWinsorisationWeight(model.winsorisationWeight(0.0, time, {value}), weight[0]); + maths_t::setWinsorisationWeight( + model.winsorisationWeight(0.0, time, {value}), weight[0]); model.addSamples(addSampleParams(1.0, weight), {core::make_triple(time, TDouble2Vec{value}, TAG)}); }; @@ -2258,9 +2267,10 @@ void CTimeSeriesModelTest::testDaylightSaving() { updateModel(time, sample, model); //updateTestDebug(time, sample, model); CPPUNIT_ASSERT_EQUAL(hour, model.trendModel().timeShift()); - auto x = model.confidenceInterval(time, 90.0, maths_t::CUnitWeights::unit(1)); - CPPUNIT_ASSERT(::fabs(sample - x[1][0]) < 3.6 * std::sqrt(noiseVariance)); - CPPUNIT_ASSERT(::fabs(x[2][0] - x[0][0]) < 3.6 * std::sqrt(noiseVariance)); + auto x = model.confidenceInterval( + time, 90.0, maths_t::CUnitWeights::unit(1)); + CPPUNIT_ASSERT(std::fabs(sample - x[1][0]) < 3.6 * std::sqrt(noiseVariance)); + CPPUNIT_ASSERT(std::fabs(x[2][0] - x[0][0]) < 3.7 * std::sqrt(noiseVariance)); time += bucketLength; } @@ -2279,9 +2289,10 @@ void CTimeSeriesModelTest::testDaylightSaving() { updateModel(time, sample, model); //updateTestDebug(time, sample, model); CPPUNIT_ASSERT_EQUAL(core_t::TTime(0), model.trendModel().timeShift()); - auto x = model.confidenceInterval(time, 90.0, maths_t::CUnitWeights::unit(1)); - CPPUNIT_ASSERT(::fabs(sample - x[1][0]) < 4.1 * std::sqrt(noiseVariance)); - CPPUNIT_ASSERT(::fabs(x[2][0] - x[0][0]) < 3.8 * std::sqrt(noiseVariance)); + auto x = model.confidenceInterval( + time, 90.0, maths_t::CUnitWeights::unit(1)); + CPPUNIT_ASSERT(std::fabs(sample - x[1][0]) < 4.1 * std::sqrt(noiseVariance)); + CPPUNIT_ASSERT(std::fabs(x[2][0] - x[0][0]) < 3.9 * std::sqrt(noiseVariance)); time += bucketLength; } diff --git a/lib/maths/unittest/CTrendComponentTest.cc b/lib/maths/unittest/CTrendComponentTest.cc index 4f4b2e778f..85130c1b86 100644 --- a/lib/maths/unittest/CTrendComponentTest.cc +++ b/lib/maths/unittest/CTrendComponentTest.cc @@ -13,9 +13,9 @@ #include #include -#include #include #include +#include #include #include From b54c4086971a40409c7025e3249f38163078ae69 Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Fri, 11 May 2018 10:40:47 +0100 Subject: [PATCH 28/29] Add change log entry --- docs/CHANGELOG.asciidoc | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc index 252a13e975..ca132857ba 100644 --- a/docs/CHANGELOG.asciidoc +++ b/docs/CHANGELOG.asciidoc @@ -29,6 +29,7 @@ Improve and use periodic boundary condition for seasonal component modeling ({pull}84[#84]) Improve robustness w.r.t. outliers of detection and initialisation of seasonal components ({pull}90[#90]) +Explicit change point detection and modelling ({pull}92[#92]) === Bug Fixes From 1ae79abe54418ecdbbc4e9ba85c0bdd5ba46a36d Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Fri, 11 May 2018 11:18:37 +0100 Subject: [PATCH 29/29] Formatting fix --- lib/maths/CTimeSeriesModel.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/maths/CTimeSeriesModel.cc b/lib/maths/CTimeSeriesModel.cc index 1455b599f3..0e2e561452 100644 --- a/lib/maths/CTimeSeriesModel.cc +++ b/lib/maths/CTimeSeriesModel.cc @@ -210,8 +210,9 @@ double tailWeight(const CPrior& prior, double derate, double scale, double value } else if (upperBound >= MINUS_LOG_TOLERANCE) { f = std::exp(-(lowerBound + upperBound) / 2.0); f = std::min(f, 1.0 - f); - } else if (!prior.minusLogJointCdfComplement( - {value}, {maths_t::seasonalVarianceScaleWeight(scale)}, lowerBound, upperBound)) { + } else if (!prior.minusLogJointCdfComplement( + {value}, {maths_t::seasonalVarianceScaleWeight(scale)}, + lowerBound, upperBound)) { return 1.0; } else { f = std::exp(-(lowerBound + upperBound) / 2.0);